aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-07-26 19:03:47 +0000
committerDimitry Andric <dim@FreeBSD.org>2023-07-26 19:04:23 +0000
commit7fa27ce4a07f19b07799a767fc29416f3b625afb (patch)
tree27825c83636c4de341eb09a74f49f5d38a15d165 /llvm
parente3b557809604d036af6e00c60f012c2025b59a5e (diff)
downloadsrc-7fa27ce4a07f19b07799a767fc29416f3b625afb.tar.gz
src-7fa27ce4a07f19b07799a767fc29416f3b625afb.zip
Vendor import of llvm-project main llvmorg-17-init-19304-gd0b54bb50e51,vendor/llvm-project/llvmorg-17-init-19304-gd0b54bb50e51
the last commit before the upstream release/17.x branch was created.
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm-c/Core.h82
-rw-r--r--llvm/include/llvm-c/DebugInfo.h7
-rw-r--r--llvm/include/llvm-c/Initialization.h50
-rw-r--r--llvm/include/llvm-c/Transforms/IPO.h90
-rw-r--r--llvm/include/llvm-c/Transforms/InstCombine.h40
-rw-r--r--llvm/include/llvm-c/Transforms/PassBuilder.h3
-rw-r--r--llvm/include/llvm-c/Transforms/PassManagerBuilder.h81
-rw-r--r--llvm/include/llvm-c/Transforms/Scalar.h167
-rw-r--r--llvm/include/llvm-c/Transforms/Utils.h50
-rw-r--r--llvm/include/llvm-c/Transforms/Vectorize.h47
-rw-r--r--llvm/include/llvm-c/Types.h3
-rw-r--r--llvm/include/llvm-c/module.modulemap4
-rw-r--r--llvm/include/llvm/ADT/ADL.h103
-rw-r--r--llvm/include/llvm/ADT/APFloat.h95
-rw-r--r--llvm/include/llvm/ADT/APInt.h100
-rw-r--r--llvm/include/llvm/ADT/AddressRanges.h206
-rw-r--r--llvm/include/llvm/ADT/Any.h11
-rw-r--r--llvm/include/llvm/ADT/ArrayRef.h4
-rw-r--r--llvm/include/llvm/ADT/BitVector.h15
-rw-r--r--llvm/include/llvm/ADT/BitmaskEnum.h43
-rw-r--r--llvm/include/llvm/ADT/BreadthFirstIterator.h4
-rw-r--r--llvm/include/llvm/ADT/ConcurrentHashtable.h401
-rw-r--r--llvm/include/llvm/ADT/DenseMap.h33
-rw-r--r--llvm/include/llvm/ADT/DenseMapInfo.h40
-rw-r--r--llvm/include/llvm/ADT/DenseMapInfoVariant.h71
-rw-r--r--llvm/include/llvm/ADT/DepthFirstIterator.h4
-rw-r--r--llvm/include/llvm/ADT/EpochTracker.h6
-rw-r--r--llvm/include/llvm/ADT/FloatingPointMode.h74
-rw-r--r--llvm/include/llvm/ADT/FunctionExtras.h7
-rw-r--r--llvm/include/llvm/ADT/GenericCycleImpl.h9
-rw-r--r--llvm/include/llvm/ADT/GenericCycleInfo.h21
-rw-r--r--llvm/include/llvm/ADT/GenericSSAContext.h46
-rw-r--r--llvm/include/llvm/ADT/GenericUniformityImpl.h213
-rw-r--r--llvm/include/llvm/ADT/GenericUniformityInfo.h24
-rw-r--r--llvm/include/llvm/ADT/Hashing.h41
-rw-r--r--llvm/include/llvm/ADT/IntervalTree.h2
-rw-r--r--llvm/include/llvm/ADT/MapVector.h18
-rw-r--r--llvm/include/llvm/ADT/None.h31
-rw-r--r--llvm/include/llvm/ADT/Optional.h27
-rw-r--r--llvm/include/llvm/ADT/PointerIntPair.h42
-rw-r--r--llvm/include/llvm/ADT/PointerUnion.h6
-rw-r--r--llvm/include/llvm/ADT/PostOrderIterator.h40
-rw-r--r--llvm/include/llvm/ADT/SCCIterator.h41
-rw-r--r--llvm/include/llvm/ADT/STLExtras.h569
-rw-r--r--llvm/include/llvm/ADT/STLFunctionalExtras.h2
-rw-r--r--llvm/include/llvm/ADT/Sequence.h6
-rw-r--r--llvm/include/llvm/ADT/SetOperations.h32
-rw-r--r--llvm/include/llvm/ADT/SetVector.h109
-rw-r--r--llvm/include/llvm/ADT/SmallBitVector.h14
-rw-r--r--llvm/include/llvm/ADT/SmallPtrSet.h5
-rw-r--r--llvm/include/llvm/ADT/SmallSet.h2
-rw-r--r--llvm/include/llvm/ADT/SmallVector.h7
-rw-r--r--llvm/include/llvm/ADT/SmallVectorExtras.h36
-rw-r--r--llvm/include/llvm/ADT/SparseBitVector.h8
-rw-r--r--llvm/include/llvm/ADT/SparseSet.h1
-rw-r--r--llvm/include/llvm/ADT/StringMap.h24
-rw-r--r--llvm/include/llvm/ADT/StringRef.h50
-rw-r--r--llvm/include/llvm/ADT/StringSwitch.h4
-rw-r--r--llvm/include/llvm/ADT/TinyPtrVector.h101
-rw-r--r--llvm/include/llvm/ADT/Uniformity.h3
-rw-r--r--llvm/include/llvm/ADT/bit.h33
-rw-r--r--llvm/include/llvm/ADT/edit_distance.h13
-rw-r--r--llvm/include/llvm/ADT/ilist.h64
-rw-r--r--llvm/include/llvm/ADT/iterator_range.h30
-rw-r--r--llvm/include/llvm/Analysis/AliasAnalysis.h25
-rw-r--r--llvm/include/llvm/Analysis/AssumeBundleQueries.h2
-rw-r--r--llvm/include/llvm/Analysis/AssumptionCache.h8
-rw-r--r--llvm/include/llvm/Analysis/BasicAliasAnalysis.h23
-rw-r--r--llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h7
-rw-r--r--llvm/include/llvm/Analysis/BranchProbabilityInfo.h3
-rw-r--r--llvm/include/llvm/Analysis/CFGPrinter.h4
-rw-r--r--llvm/include/llvm/Analysis/CGSCCPassManager.h22
-rw-r--r--llvm/include/llvm/Analysis/ConstantFolding.h21
-rw-r--r--llvm/include/llvm/Analysis/ConstraintSystem.h115
-rw-r--r--llvm/include/llvm/Analysis/CycleAnalysis.h2
-rw-r--r--llvm/include/llvm/Analysis/DemandedBits.h24
-rw-r--r--llvm/include/llvm/Analysis/DependenceGraphBuilder.h2
-rw-r--r--llvm/include/llvm/Analysis/DivergenceAnalysis.h210
-rw-r--r--llvm/include/llvm/Analysis/EHUtils.h90
-rw-r--r--llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h20
-rw-r--r--llvm/include/llvm/Analysis/IRSimilarityIdentifier.h68
-rw-r--r--llvm/include/llvm/Analysis/IVDescriptors.h33
-rw-r--r--llvm/include/llvm/Analysis/IVUsers.h3
-rw-r--r--llvm/include/llvm/Analysis/InlineAdvisor.h3
-rw-r--r--llvm/include/llvm/Analysis/InlineModelFeatureMaps.h115
-rw-r--r--llvm/include/llvm/Analysis/InlineOrder.h47
-rw-r--r--llvm/include/llvm/Analysis/InstructionSimplify.h31
-rw-r--r--llvm/include/llvm/Analysis/InteractiveModelRunner.h71
-rw-r--r--llvm/include/llvm/Analysis/LazyCallGraph.h2
-rw-r--r--llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h103
-rw-r--r--llvm/include/llvm/Analysis/Lint.h3
-rw-r--r--llvm/include/llvm/Analysis/LoopAccessAnalysis.h69
-rw-r--r--llvm/include/llvm/Analysis/LoopInfo.h711
-rw-r--r--llvm/include/llvm/Analysis/MLInlineAdvisor.h4
-rw-r--r--llvm/include/llvm/Analysis/MLModelRunner.h3
-rw-r--r--llvm/include/llvm/Analysis/MemoryBuiltins.h7
-rw-r--r--llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h10
-rw-r--r--llvm/include/llvm/Analysis/MemoryProfileInfo.h28
-rw-r--r--llvm/include/llvm/Analysis/MemorySSA.h25
-rw-r--r--llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h4
-rw-r--r--llvm/include/llvm/Analysis/MustExecute.h2
-rw-r--r--llvm/include/llvm/Analysis/PHITransAddr.h67
-rw-r--r--llvm/include/llvm/Analysis/Passes.h40
-rw-r--r--llvm/include/llvm/Analysis/ProfileSummaryInfo.h224
-rw-r--r--llvm/include/llvm/Analysis/ReleaseModeModelRunner.h6
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolution.h153
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolutionDivision.h2
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h37
-rw-r--r--llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h6
-rw-r--r--llvm/include/llvm/Analysis/SyncDependenceAnalysis.h92
-rw-r--r--llvm/include/llvm/Analysis/TargetFolder.h5
-rw-r--r--llvm/include/llvm/Analysis/TargetLibraryInfo.def45
-rw-r--r--llvm/include/llvm/Analysis/TargetLibraryInfo.h32
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfo.h288
-rw-r--r--llvm/include/llvm/Analysis/TargetTransformInfoImpl.h164
-rw-r--r--llvm/include/llvm/Analysis/TensorSpec.h3
-rw-r--r--llvm/include/llvm/Analysis/UniformityAnalysis.h7
-rw-r--r--llvm/include/llvm/Analysis/Utils/TrainingLogger.h14
-rw-r--r--llvm/include/llvm/Analysis/ValueTracking.h369
-rw-r--r--llvm/include/llvm/Analysis/VecFuncs.def316
-rw-r--r--llvm/include/llvm/Analysis/VectorUtils.h65
-rw-r--r--llvm/include/llvm/AsmParser/LLLexer.h8
-rw-r--r--llvm/include/llvm/AsmParser/LLParser.h3
-rw-r--r--llvm/include/llvm/AsmParser/LLToken.h21
-rw-r--r--llvm/include/llvm/BinaryFormat/COFF.h14
-rw-r--r--llvm/include/llvm/BinaryFormat/DXContainer.h234
-rw-r--r--llvm/include/llvm/BinaryFormat/DXContainerConstants.def1
-rw-r--r--llvm/include/llvm/BinaryFormat/Dwarf.def27
-rw-r--r--llvm/include/llvm/BinaryFormat/Dwarf.h14
-rw-r--r--llvm/include/llvm/BinaryFormat/DynamicTags.def7
-rw-r--r--llvm/include/llvm/BinaryFormat/ELF.h14
-rw-r--r--llvm/include/llvm/BinaryFormat/ELFRelocs/ARM.def4
-rw-r--r--llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def15
-rw-r--r--llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def1
-rw-r--r--llvm/include/llvm/BinaryFormat/GOFF.h135
-rw-r--r--llvm/include/llvm/BinaryFormat/MachO.def1
-rw-r--r--llvm/include/llvm/BinaryFormat/MachO.h8
-rw-r--r--llvm/include/llvm/BinaryFormat/MinidumpConstants.def1
-rw-r--r--llvm/include/llvm/BinaryFormat/MsgPackDocument.h17
-rw-r--r--llvm/include/llvm/BinaryFormat/WasmRelocs.def1
-rw-r--r--llvm/include/llvm/BinaryFormat/XCOFF.h1
-rw-r--r--llvm/include/llvm/Bitcode/BitcodeAnalyzer.h22
-rw-r--r--llvm/include/llvm/Bitcode/BitcodeReader.h1
-rw-r--r--llvm/include/llvm/Bitcode/LLVMBitCodes.h1
-rw-r--r--llvm/include/llvm/CodeGen/AccelTable.h23
-rw-r--r--llvm/include/llvm/CodeGen/Analysis.h22
-rw-r--r--llvm/include/llvm/CodeGen/AsmPrinter.h28
-rw-r--r--llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h4
-rw-r--r--llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h15
-rw-r--r--llvm/include/llvm/CodeGen/BasicTTIImpl.h108
-rw-r--r--llvm/include/llvm/CodeGen/ByteProvider.h89
-rw-r--r--llvm/include/llvm/CodeGen/CallingConvLower.h51
-rw-r--r--llvm/include/llvm/CodeGen/CodeGenCommonISel.h15
-rw-r--r--llvm/include/llvm/CodeGen/CodeGenPassBuilder.h1
-rw-r--r--llvm/include/llvm/CodeGen/CommandFlags.h7
-rw-r--r--llvm/include/llvm/CodeGen/ComplexDeinterleavingPass.h8
-rw-r--r--llvm/include/llvm/CodeGen/CostTable.h2
-rw-r--r--llvm/include/llvm/CodeGen/DFAPacketizer.h30
-rw-r--r--llvm/include/llvm/CodeGen/DIE.h82
-rw-r--r--llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h2
-rw-r--r--llvm/include/llvm/CodeGen/DebugHandlerBase.h6
-rw-r--r--llvm/include/llvm/CodeGen/DetectDeadLanes.h119
-rw-r--r--llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h14
-rw-r--r--llvm/include/llvm/CodeGen/EdgeBundles.h2
-rw-r--r--llvm/include/llvm/CodeGen/ExecutionDomainFix.h10
-rw-r--r--llvm/include/llvm/CodeGen/ExpandReductions.h2
-rw-r--r--llvm/include/llvm/CodeGen/FastISel.h2
-rw-r--r--llvm/include/llvm/CodeGen/FunctionLoweringInfo.h13
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h2
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h11
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h46
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h605
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h (renamed from llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h)396
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h8
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h100
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h39
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h547
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h2
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h3
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h11
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h24
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h17
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Localizer.h4
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h42
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h2
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Utils.h2
-rw-r--r--llvm/include/llvm/CodeGen/HardwareLoops.h76
-rw-r--r--llvm/include/llvm/CodeGen/ISDOpcodes.h80
-rw-r--r--llvm/include/llvm/CodeGen/IndirectThunks.h7
-rw-r--r--llvm/include/llvm/CodeGen/LatencyPriorityQueue.h2
-rw-r--r--llvm/include/llvm/CodeGen/LiveIntervals.h14
-rw-r--r--llvm/include/llvm/CodeGen/LivePhysRegs.h5
-rw-r--r--llvm/include/llvm/CodeGen/LiveRangeEdit.h5
-rw-r--r--llvm/include/llvm/CodeGen/LiveRegMatrix.h6
-rw-r--r--llvm/include/llvm/CodeGen/LiveRegUnits.h12
-rw-r--r--llvm/include/llvm/CodeGen/LiveStacks.h2
-rw-r--r--llvm/include/llvm/CodeGen/LiveVariables.h6
-rw-r--r--llvm/include/llvm/CodeGen/LowLevelType.h417
-rw-r--r--llvm/include/llvm/CodeGen/LowLevelTypeUtils.h45
-rw-r--r--llvm/include/llvm/CodeGen/MIRFSDiscriminator.h5
-rw-r--r--llvm/include/llvm/CodeGen/MIRFormatter.h2
-rw-r--r--llvm/include/llvm/CodeGen/MIRPrinter.h2
-rw-r--r--llvm/include/llvm/CodeGen/MIRSampleProfile.h8
-rw-r--r--llvm/include/llvm/CodeGen/MIRYamlMapping.h30
-rw-r--r--llvm/include/llvm/CodeGen/MachineBasicBlock.h35
-rw-r--r--llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h1
-rw-r--r--llvm/include/llvm/CodeGen/MachineCombinerPattern.h5
-rw-r--r--llvm/include/llvm/CodeGen/MachineFunction.h83
-rw-r--r--llvm/include/llvm/CodeGen/MachineInstr.h184
-rw-r--r--llvm/include/llvm/CodeGen/MachineInstrBuilder.h3
-rw-r--r--llvm/include/llvm/CodeGen/MachineInstrBundle.h7
-rw-r--r--llvm/include/llvm/CodeGen/MachineLoopInfo.h2
-rw-r--r--llvm/include/llvm/CodeGen/MachineMemOperand.h23
-rw-r--r--llvm/include/llvm/CodeGen/MachineModuleInfo.h8
-rw-r--r--llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h4
-rw-r--r--llvm/include/llvm/CodeGen/MachineOperand.h3
-rw-r--r--llvm/include/llvm/CodeGen/MachineOutliner.h4
-rw-r--r--llvm/include/llvm/CodeGen/MachinePassManager.h3
-rw-r--r--llvm/include/llvm/CodeGen/MachinePassRegistry.def1
-rw-r--r--llvm/include/llvm/CodeGen/MachinePipeliner.h11
-rw-r--r--llvm/include/llvm/CodeGen/MachineRegisterInfo.h31
-rw-r--r--llvm/include/llvm/CodeGen/MachineSSAContext.h12
-rw-r--r--llvm/include/llvm/CodeGen/MachineSSAUpdater.h6
-rw-r--r--llvm/include/llvm/CodeGen/MachineScheduler.h258
-rw-r--r--llvm/include/llvm/CodeGen/MachineTraceMetrics.h23
-rw-r--r--llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h14
-rw-r--r--llvm/include/llvm/CodeGen/MachineValueType.h555
-rw-r--r--llvm/include/llvm/CodeGen/ModuloSchedule.h14
-rw-r--r--llvm/include/llvm/CodeGen/PBQP/CostAllocator.h2
-rw-r--r--llvm/include/llvm/CodeGen/Passes.h22
-rw-r--r--llvm/include/llvm/CodeGen/RDFGraph.h1243
-rw-r--r--llvm/include/llvm/CodeGen/RDFLiveness.h225
-rw-r--r--llvm/include/llvm/CodeGen/RDFRegisters.h516
-rw-r--r--llvm/include/llvm/CodeGen/ReachingDefAnalysis.h10
-rw-r--r--llvm/include/llvm/CodeGen/RegAllocRegistry.h2
-rw-r--r--llvm/include/llvm/CodeGen/Register.h72
-rw-r--r--llvm/include/llvm/CodeGen/RegisterBank.h10
-rw-r--r--llvm/include/llvm/CodeGen/RegisterBankInfo.h33
-rw-r--r--llvm/include/llvm/CodeGen/RegisterPressure.h6
-rw-r--r--llvm/include/llvm/CodeGen/RegisterScavenging.h36
-rw-r--r--llvm/include/llvm/CodeGen/RegisterUsageInfo.h2
-rw-r--r--llvm/include/llvm/CodeGen/RuntimeLibcalls.h8
-rw-r--r--llvm/include/llvm/CodeGen/ScheduleDAG.h11
-rw-r--r--llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h6
-rw-r--r--llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h3
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAG.h89
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGISel.h3
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGNodes.h44
-rw-r--r--llvm/include/llvm/CodeGen/SlotIndexes.h2
-rw-r--r--llvm/include/llvm/CodeGen/StackProtector.h36
-rw-r--r--llvm/include/llvm/CodeGen/SwitchLoweringUtils.h12
-rw-r--r--llvm/include/llvm/CodeGen/TargetCallingConv.h2
-rw-r--r--llvm/include/llvm/CodeGen/TargetFrameLowering.h12
-rw-r--r--llvm/include/llvm/CodeGen/TargetInstrInfo.h73
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h240
-rw-r--r--llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h5
-rw-r--r--llvm/include/llvm/CodeGen/TargetPassConfig.h4
-rw-r--r--llvm/include/llvm/CodeGen/TargetRegisterInfo.h14
-rw-r--r--llvm/include/llvm/CodeGen/TargetSchedule.h2
-rw-r--r--llvm/include/llvm/CodeGen/TargetSubtargetInfo.h5
-rw-r--r--llvm/include/llvm/CodeGen/TileShapeInfo.h4
-rw-r--r--llvm/include/llvm/CodeGen/VLIWMachineScheduler.h7
-rw-r--r--llvm/include/llvm/CodeGen/ValueTypes.h29
-rw-r--r--llvm/include/llvm/CodeGen/ValueTypes.td486
-rw-r--r--llvm/include/llvm/CodeGen/WasmAddressSpaces.h48
-rw-r--r--llvm/include/llvm/CodeGen/WasmEHFuncInfo.h8
-rw-r--r--llvm/include/llvm/CodeGen/WinEHFuncInfo.h9
-rw-r--r--llvm/include/llvm/DWARFLinker/DWARFLinker.h431
-rw-r--r--llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h50
-rw-r--r--llvm/include/llvm/DWARFLinker/DWARFStreamer.h166
-rw-r--r--llvm/include/llvm/DWARFLinkerParallel/AddressesMap.h70
-rw-r--r--llvm/include/llvm/DWARFLinkerParallel/DWARFFile.h73
-rw-r--r--llvm/include/llvm/DWARFLinkerParallel/DWARFLinker.h213
-rw-r--r--llvm/include/llvm/DWARFLinkerParallel/StringPool.h74
-rw-r--r--llvm/include/llvm/DWARFLinkerParallel/StringTable.h88
-rw-r--r--llvm/include/llvm/DWP/DWP.h3
-rw-r--r--llvm/include/llvm/DebugInfo/BTF/BTF.def (renamed from llvm/lib/Target/BPF/BTF.def)0
-rw-r--r--llvm/include/llvm/DebugInfo/BTF/BTF.h (renamed from llvm/lib/Target/BPF/BTF.h)18
-rw-r--r--llvm/include/llvm/DebugInfo/BTF/BTFContext.h58
-rw-r--r--llvm/include/llvm/DebugInfo/BTF/BTFParser.h81
-rw-r--r--llvm/include/llvm/DebugInfo/CodeView/CodeView.h6
-rw-r--r--llvm/include/llvm/DebugInfo/DIContext.h6
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h12
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h215
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h14
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h14
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h2
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h37
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h34
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h17
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFLocationExpression.h2
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h14
-rw-r--r--llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h11
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/FileWriter.h2
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h16
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h171
-rw-r--r--llvm/include/llvm/DebugInfo/GSYM/LookupResult.h10
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h20
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Core/LVLocation.h21
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Core/LVObject.h15
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Core/LVOptions.h4
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Core/LVReader.h115
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h57
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Core/LVStringPool.h7
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Core/LVSupport.h84
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Core/LVSymbol.h15
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Core/LVType.h8
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/LVReaderHandler.h11
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h25
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h236
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.h477
-rw-r--r--llvm/include/llvm/DebugInfo/LogicalView/Readers/LVELFReader.h13
-rw-r--r--llvm/include/llvm/DebugInfo/MSF/MSFError.h29
-rw-r--r--llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h28
-rw-r--r--llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h11
-rw-r--r--llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h13
-rw-r--r--llvm/include/llvm/Debuginfod/Debuginfod.h6
-rw-r--r--llvm/include/llvm/Debuginfod/HTTPClient.h2
-rw-r--r--llvm/include/llvm/Debuginfod/HTTPServer.h10
-rw-r--r--llvm/include/llvm/Demangle/Demangle.h27
-rw-r--r--llvm/include/llvm/Demangle/ItaniumDemangle.h279
-rw-r--r--llvm/include/llvm/Demangle/MicrosoftDemangle.h135
-rw-r--r--llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h12
-rw-r--r--llvm/include/llvm/Demangle/StringView.h122
-rw-r--r--llvm/include/llvm/Demangle/StringViewExtras.h38
-rw-r--r--llvm/include/llvm/Demangle/Utility.h22
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h38
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/ELF_ppc64.h50
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h4
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h215
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h3
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h17
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h297
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h127
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/i386.h174
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/ppc64.h333
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/riscv.h12
-rw-r--r--llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h45
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/COFFPlatform.h24
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Core.h59
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h24
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h9
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h8
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h56
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h32
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h30
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h139
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h24
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h133
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h33
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h30
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/ObjectFileInterface.h2
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h157
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h21
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h54
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/MemoryFlags.h57
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h69
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h8
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h61
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/Speculation.h8
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h232
-rw-r--r--llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h18
-rw-r--r--llvm/include/llvm/ExecutionEngine/RuntimeDyldChecker.h3
-rw-r--r--llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h3
-rw-r--r--llvm/include/llvm/Frontend/Debug/Options.h62
-rw-r--r--llvm/include/llvm/Frontend/OpenACC/ACC.td49
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMP.td78
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPConstants.h9
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h36
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h919
-rw-r--r--llvm/include/llvm/Frontend/OpenMP/OMPKinds.def18
-rw-r--r--llvm/include/llvm/FuzzMutate/IRMutator.h29
-rw-r--r--llvm/include/llvm/FuzzMutate/OpDescriptor.h77
-rw-r--r--llvm/include/llvm/FuzzMutate/Operations.h5
-rw-r--r--llvm/include/llvm/FuzzMutate/RandomIRBuilder.h50
-rw-r--r--llvm/include/llvm/IR/Argument.h4
-rw-r--r--llvm/include/llvm/IR/AttributeMask.h86
-rw-r--r--llvm/include/llvm/IR/Attributes.h83
-rw-r--r--llvm/include/llvm/IR/Attributes.td11
-rw-r--r--llvm/include/llvm/IR/BasicBlock.h14
-rw-r--r--llvm/include/llvm/IR/CallingConv.h18
-rw-r--r--llvm/include/llvm/IR/ConstantFolder.h5
-rw-r--r--llvm/include/llvm/IR/ConstantRange.h4
-rw-r--r--llvm/include/llvm/IR/Constants.h36
-rw-r--r--llvm/include/llvm/IR/ConstrainedOps.def1
-rw-r--r--llvm/include/llvm/IR/CycleInfo.h31
-rw-r--r--llvm/include/llvm/IR/DIBuilder.h55
-rw-r--r--llvm/include/llvm/IR/DataLayout.h77
-rw-r--r--llvm/include/llvm/IR/DebugInfo.h25
-rw-r--r--llvm/include/llvm/IR/DebugInfoMetadata.h557
-rw-r--r--llvm/include/llvm/IR/DerivedTypes.h34
-rw-r--r--llvm/include/llvm/IR/Dominators.h2
-rw-r--r--llvm/include/llvm/IR/EHPersonalities.h (renamed from llvm/include/llvm/Analysis/EHPersonalities.h)6
-rw-r--r--llvm/include/llvm/IR/FMF.h3
-rw-r--r--llvm/include/llvm/IR/Function.h18
-rw-r--r--llvm/include/llvm/IR/GCStrategy.h2
-rw-r--r--llvm/include/llvm/IR/GetElementPtrTypeIterator.h12
-rw-r--r--llvm/include/llvm/IR/GlobalObject.h6
-rw-r--r--llvm/include/llvm/IR/IRBuilder.h53
-rw-r--r--llvm/include/llvm/IR/InstrTypes.h53
-rw-r--r--llvm/include/llvm/IR/Instruction.h67
-rw-r--r--llvm/include/llvm/IR/Instructions.h76
-rw-r--r--llvm/include/llvm/IR/IntrinsicInst.h251
-rw-r--r--llvm/include/llvm/IR/Intrinsics.h28
-rw-r--r--llvm/include/llvm/IR/Intrinsics.td535
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAArch64.td582
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAMDGPU.td369
-rw-r--r--llvm/include/llvm/IR/IntrinsicsARM.td6
-rw-r--r--llvm/include/llvm/IR/IntrinsicsHexagon.td23
-rw-r--r--llvm/include/llvm/IR/IntrinsicsNVVM.td326
-rw-r--r--llvm/include/llvm/IR/IntrinsicsPowerPC.td60
-rw-r--r--llvm/include/llvm/IR/IntrinsicsRISCV.td567
-rw-r--r--llvm/include/llvm/IR/IntrinsicsRISCVXTHead.td12
-rw-r--r--llvm/include/llvm/IR/IntrinsicsRISCVXsf.td135
-rw-r--r--llvm/include/llvm/IR/IntrinsicsSPIRV.td1
-rw-r--r--llvm/include/llvm/IR/IntrinsicsSystemZ.td2
-rw-r--r--llvm/include/llvm/IR/IntrinsicsWebAssembly.td18
-rw-r--r--llvm/include/llvm/IR/IntrinsicsX86.td145
-rw-r--r--llvm/include/llvm/IR/LLVMContext.h5
-rw-r--r--llvm/include/llvm/IR/MDBuilder.h2
-rw-r--r--llvm/include/llvm/IR/Metadata.h23
-rw-r--r--llvm/include/llvm/IR/Module.h60
-rw-r--r--llvm/include/llvm/IR/ModuleSummaryIndex.h107
-rw-r--r--llvm/include/llvm/IR/OptBisect.h2
-rw-r--r--llvm/include/llvm/IR/PassManager.h16
-rw-r--r--llvm/include/llvm/IR/PatternMatch.h57
-rw-r--r--llvm/include/llvm/IR/PseudoProbe.h19
-rw-r--r--llvm/include/llvm/IR/ReplaceConstant.h38
-rw-r--r--llvm/include/llvm/IR/RuntimeLibcalls.def14
-rw-r--r--llvm/include/llvm/IR/SSAContext.h6
-rw-r--r--llvm/include/llvm/IR/StructuralHash.h7
-rw-r--r--llvm/include/llvm/IR/Type.h27
-rw-r--r--llvm/include/llvm/IR/VPIntrinsics.def126
-rw-r--r--llvm/include/llvm/IR/Value.h5
-rw-r--r--llvm/include/llvm/InitializePasses.h86
-rw-r--r--llvm/include/llvm/InterfaceStub/IFSStub.h7
-rw-r--r--llvm/include/llvm/LTO/Config.h6
-rw-r--r--llvm/include/llvm/LTO/LTO.h39
-rw-r--r--llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h2
-rw-r--r--llvm/include/llvm/LinkAllPasses.h58
-rw-r--r--llvm/include/llvm/MC/DXContainerPSVInfo.h51
-rw-r--r--llvm/include/llvm/MC/MCAsmBackend.h23
-rw-r--r--llvm/include/llvm/MC/MCAsmInfo.h10
-rw-r--r--llvm/include/llvm/MC/MCAsmMacro.h4
-rw-r--r--llvm/include/llvm/MC/MCAssembler.h3
-rw-r--r--llvm/include/llvm/MC/MCCodeEmitter.h20
-rw-r--r--llvm/include/llvm/MC/MCCodeView.h3
-rw-r--r--llvm/include/llvm/MC/MCContext.h31
-rw-r--r--llvm/include/llvm/MC/MCDXContainerWriter.h2
-rw-r--r--llvm/include/llvm/MC/MCDirectives.h1
-rw-r--r--llvm/include/llvm/MC/MCDwarf.h117
-rw-r--r--llvm/include/llvm/MC/MCELFObjectWriter.h2
-rw-r--r--llvm/include/llvm/MC/MCExpr.h2
-rw-r--r--llvm/include/llvm/MC/MCFragment.h7
-rw-r--r--llvm/include/llvm/MC/MCInstrAnalysis.h2
-rw-r--r--llvm/include/llvm/MC/MCInstrDesc.h16
-rw-r--r--llvm/include/llvm/MC/MCMachObjectWriter.h2
-rw-r--r--llvm/include/llvm/MC/MCObjectFileInfo.h4
-rw-r--r--llvm/include/llvm/MC/MCObjectStreamer.h10
-rw-r--r--llvm/include/llvm/MC/MCObjectWriter.h5
-rw-r--r--llvm/include/llvm/MC/MCParser/AsmLexer.h2
-rw-r--r--llvm/include/llvm/MC/MCParser/MCAsmLexer.h2
-rw-r--r--llvm/include/llvm/MC/MCParser/MCAsmParser.h6
-rw-r--r--llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h2
-rw-r--r--llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h2
-rw-r--r--llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h66
-rw-r--r--llvm/include/llvm/MC/MCPseudoProbe.h23
-rw-r--r--llvm/include/llvm/MC/MCRegister.h53
-rw-r--r--llvm/include/llvm/MC/MCRegisterInfo.h312
-rw-r--r--llvm/include/llvm/MC/MCSchedule.h23
-rw-r--r--llvm/include/llvm/MC/MCSection.h2
-rw-r--r--llvm/include/llvm/MC/MCStreamer.h55
-rw-r--r--llvm/include/llvm/MC/MCSubtargetInfo.h8
-rw-r--r--llvm/include/llvm/MC/MCSymbol.h35
-rw-r--r--llvm/include/llvm/MC/MCSymbolCOFF.h19
-rw-r--r--llvm/include/llvm/MC/MCTargetOptions.h4
-rw-r--r--llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h2
-rw-r--r--llvm/include/llvm/MC/MCWinCOFFObjectWriter.h66
-rw-r--r--llvm/include/llvm/MC/MCWinEH.h2
-rw-r--r--llvm/include/llvm/MC/MCXCOFFStreamer.h6
-rw-r--r--llvm/include/llvm/MC/TargetRegistry.h12
-rw-r--r--llvm/include/llvm/MCA/CodeEmitter.h5
-rw-r--r--llvm/include/llvm/MCA/CustomBehaviour.h18
-rw-r--r--llvm/include/llvm/MCA/HWEventListener.h5
-rw-r--r--llvm/include/llvm/MCA/HardwareUnits/LSUnit.h19
-rw-r--r--llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h6
-rw-r--r--llvm/include/llvm/MCA/IncrementalSourceMgr.h6
-rw-r--r--llvm/include/llvm/MCA/InstrBuilder.h8
-rw-r--r--llvm/include/llvm/MCA/Pipeline.h6
-rw-r--r--llvm/include/llvm/MCA/Stages/InOrderIssueStage.h6
-rw-r--r--llvm/include/llvm/MCA/Stages/Stage.h4
-rw-r--r--llvm/include/llvm/MCA/Support.h2
-rw-r--r--llvm/include/llvm/Object/Archive.h10
-rw-r--r--llvm/include/llvm/Object/ArchiveWriter.h3
-rw-r--r--llvm/include/llvm/Object/Binary.h7
-rw-r--r--llvm/include/llvm/Object/BuildID.h5
-rw-r--r--llvm/include/llvm/Object/COFF.h62
-rw-r--r--llvm/include/llvm/Object/COFFImportFile.h2
-rw-r--r--llvm/include/llvm/Object/COFFModuleDefinition.h5
-rw-r--r--llvm/include/llvm/Object/DXContainer.h119
-rw-r--r--llvm/include/llvm/Object/ELF.h53
-rw-r--r--llvm/include/llvm/Object/ELFObjectFile.h23
-rw-r--r--llvm/include/llvm/Object/ELFTypes.h98
-rw-r--r--llvm/include/llvm/Object/GOFF.h284
-rw-r--r--llvm/include/llvm/Object/GOFFObjectFile.h130
-rw-r--r--llvm/include/llvm/Object/IRObjectFile.h4
-rw-r--r--llvm/include/llvm/Object/MachO.h12
-rw-r--r--llvm/include/llvm/Object/MachOUniversal.h2
-rw-r--r--llvm/include/llvm/Object/ObjectFile.h57
-rw-r--r--llvm/include/llvm/Object/OffloadBinary.h8
-rw-r--r--llvm/include/llvm/Object/SymbolicFile.h2
-rw-r--r--llvm/include/llvm/Object/TapiFile.h13
-rw-r--r--llvm/include/llvm/Object/Wasm.h2
-rw-r--r--llvm/include/llvm/Object/WindowsResource.h2
-rw-r--r--llvm/include/llvm/Object/XCOFFObjectFile.h16
-rw-r--r--llvm/include/llvm/ObjectYAML/COFFYAML.h29
-rw-r--r--llvm/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h2
-rw-r--r--llvm/include/llvm/ObjectYAML/DWARFEmitter.h2
-rw-r--r--llvm/include/llvm/ObjectYAML/DXContainerYAML.h30
-rw-r--r--llvm/include/llvm/Option/Arg.h20
-rw-r--r--llvm/include/llvm/Option/ArgList.h6
-rw-r--r--llvm/include/llvm/Option/OptParser.td5
-rw-r--r--llvm/include/llvm/Option/OptTable.h5
-rw-r--r--llvm/include/llvm/Pass.h3
-rw-r--r--llvm/include/llvm/PassRegistry.h4
-rw-r--r--llvm/include/llvm/Passes/PassBuilder.h79
-rw-r--r--llvm/include/llvm/Passes/StandardInstrumentations.h10
-rw-r--r--llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h22
-rw-r--r--llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h3
-rw-r--r--llvm/include/llvm/ProfileData/GCOV.h4
-rw-r--r--llvm/include/llvm/ProfileData/InstrProf.h110
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfData.inc5
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfReader.h55
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfWriter.h28
-rw-r--r--llvm/include/llvm/ProfileData/ItaniumManglingCanonicalizer.h (renamed from llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h)6
-rw-r--r--llvm/include/llvm/ProfileData/MemProfData.inc26
-rw-r--r--llvm/include/llvm/ProfileData/ProfileCommon.h3
-rw-r--r--llvm/include/llvm/ProfileData/RawMemProfReader.h27
-rw-r--r--llvm/include/llvm/ProfileData/SampleProf.h191
-rw-r--r--llvm/include/llvm/ProfileData/SampleProfReader.h149
-rw-r--r--llvm/include/llvm/ProfileData/SampleProfWriter.h120
-rw-r--r--llvm/include/llvm/ProfileData/SymbolRemappingReader.h (renamed from llvm/include/llvm/Support/SymbolRemappingReader.h)8
-rw-r--r--llvm/include/llvm/Remarks/Remark.h44
-rw-r--r--llvm/include/llvm/Remarks/RemarkLinker.h13
-rw-r--r--llvm/include/llvm/Support/AArch64TargetParser.h15
-rw-r--r--llvm/include/llvm/Support/AMDHSAKernelDescriptor.h7
-rw-r--r--llvm/include/llvm/Support/ARMTargetParser.h15
-rw-r--r--llvm/include/llvm/Support/ARMTargetParserCommon.h15
-rw-r--r--llvm/include/llvm/Support/Alignment.h2
-rw-r--r--llvm/include/llvm/Support/AllocatorBase.h8
-rw-r--r--llvm/include/llvm/Support/AtomicOrdering.h3
-rw-r--r--llvm/include/llvm/Support/BalancedPartitioning.h202
-rw-r--r--llvm/include/llvm/Support/BlockFrequency.h49
-rw-r--r--llvm/include/llvm/Support/CSKYTargetParser.h15
-rw-r--r--llvm/include/llvm/Support/CachePruning.h4
-rw-r--r--llvm/include/llvm/Support/Casting.h13
-rw-r--r--llvm/include/llvm/Support/CheckedArithmetic.h25
-rw-r--r--llvm/include/llvm/Support/CommandLine.h24
-rw-r--r--llvm/include/llvm/Support/Compiler.h20
-rw-r--r--llvm/include/llvm/Support/ConvertEBCDIC.h28
-rw-r--r--llvm/include/llvm/Support/Discriminator.h19
-rw-r--r--llvm/include/llvm/Support/EndianStream.h11
-rw-r--r--llvm/include/llvm/Support/Error.h18
-rw-r--r--llvm/include/llvm/Support/ErrorOr.h13
-rw-r--r--llvm/include/llvm/Support/ExitCodes.h6
-rw-r--r--llvm/include/llvm/Support/FileUtilities.h35
-rw-r--r--llvm/include/llvm/Support/Format.h4
-rw-r--r--llvm/include/llvm/Support/FormatProviders.h11
-rw-r--r--llvm/include/llvm/Support/FormatVariadicDetails.h15
-rw-r--r--llvm/include/llvm/Support/GenericDomTree.h6
-rw-r--r--llvm/include/llvm/Support/GenericLoopInfo.h727
-rw-r--r--llvm/include/llvm/Support/GenericLoopInfoImpl.h (renamed from llvm/include/llvm/Analysis/LoopInfoImpl.h)38
-rw-r--r--llvm/include/llvm/Support/GraphWriter.h2
-rw-r--r--llvm/include/llvm/Support/Host.h4
-rw-r--r--llvm/include/llvm/Support/JSON.h34
-rw-r--r--llvm/include/llvm/Support/KnownBits.h110
-rw-r--r--llvm/include/llvm/Support/LEB128.h2
-rw-r--r--llvm/include/llvm/Support/LLVMDriver.h29
-rw-r--r--llvm/include/llvm/Support/LoongArchTargetParser.h15
-rw-r--r--llvm/include/llvm/Support/LowLevelTypeImpl.h431
-rw-r--r--llvm/include/llvm/Support/MachineValueType.h1576
-rw-r--r--llvm/include/llvm/Support/MathExtras.h146
-rw-r--r--llvm/include/llvm/Support/ModRef.h173
-rw-r--r--llvm/include/llvm/Support/OnDiskHashTable.h2
-rw-r--r--llvm/include/llvm/Support/PGOOptions.h43
-rw-r--r--llvm/include/llvm/Support/Parallel.h38
-rw-r--r--llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h120
-rw-r--r--llvm/include/llvm/Support/RISCVISAInfo.h19
-rw-r--r--llvm/include/llvm/Support/ReverseIteration.h2
-rw-r--r--llvm/include/llvm/Support/SMLoc.h10
-rw-r--r--llvm/include/llvm/Support/ScaledNumber.h11
-rw-r--r--llvm/include/llvm/Support/ScopedPrinter.h88
-rw-r--r--llvm/include/llvm/Support/Signals.h11
-rw-r--r--llvm/include/llvm/Support/SpecialCaseList.h5
-rw-r--r--llvm/include/llvm/Support/StringSaver.h2
-rw-r--r--llvm/include/llvm/Support/SuffixTree.h252
-rw-r--r--llvm/include/llvm/Support/SuffixTreeNode.h171
-rw-r--r--llvm/include/llvm/Support/SwapByteOrder.h12
-rw-r--r--llvm/include/llvm/Support/TargetOpcodes.def14
-rw-r--r--llvm/include/llvm/Support/TaskQueue.h138
-rw-r--r--llvm/include/llvm/Support/Timer.h14
-rw-r--r--llvm/include/llvm/Support/TrailingObjects.h4
-rw-r--r--llvm/include/llvm/Support/TrigramIndex.h67
-rw-r--r--llvm/include/llvm/Support/TypeName.h2
-rw-r--r--llvm/include/llvm/Support/TypeSize.h6
-rw-r--r--llvm/include/llvm/Support/VirtualFileSystem.h14
-rw-r--r--llvm/include/llvm/Support/WithColor.h2
-rw-r--r--llvm/include/llvm/Support/X86FoldTablesUtils.h58
-rw-r--r--llvm/include/llvm/Support/X86TargetParser.def15
-rw-r--r--llvm/include/llvm/Support/X86TargetParser.h15
-rw-r--r--llvm/include/llvm/Support/YAMLTraits.h7
-rw-r--r--llvm/include/llvm/Support/raw_ostream.h4
-rw-r--r--llvm/include/llvm/Support/type_traits.h17
-rw-r--r--llvm/include/llvm/Support/xxhash.h5
-rw-r--r--llvm/include/llvm/TableGen/DirectiveEmitter.h5
-rw-r--r--llvm/include/llvm/TableGen/Error.h2
-rw-r--r--llvm/include/llvm/TableGen/Main.h5
-rw-r--r--llvm/include/llvm/TableGen/Record.h173
-rw-r--r--llvm/include/llvm/TableGen/TableGenBackend.h34
-rw-r--r--llvm/include/llvm/Target/GenericOpcodes.td44
-rw-r--r--llvm/include/llvm/Target/GlobalISel/Combine.td109
-rw-r--r--llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td3
-rw-r--r--llvm/include/llvm/Target/GlobalISel/Target.td2
-rw-r--r--llvm/include/llvm/Target/Target.td64
-rw-r--r--llvm/include/llvm/Target/TargetMachine.h7
-rw-r--r--llvm/include/llvm/Target/TargetOptions.h42
-rw-r--r--llvm/include/llvm/Target/TargetSchedule.td6
-rw-r--r--llvm/include/llvm/Target/TargetSelectionDAG.td28
-rw-r--r--llvm/include/llvm/TargetParser/AArch64TargetParser.h266
-rw-r--r--llvm/include/llvm/TargetParser/ARMTargetParser.h18
-rw-r--r--llvm/include/llvm/TargetParser/LoongArchTargetParser.def8
-rw-r--r--llvm/include/llvm/TargetParser/LoongArchTargetParser.h8
-rw-r--r--llvm/include/llvm/TargetParser/RISCVTargetParser.h18
-rw-r--r--llvm/include/llvm/TargetParser/SubtargetFeature.h (renamed from llvm/include/llvm/MC/SubtargetFeature.h)8
-rw-r--r--llvm/include/llvm/TargetParser/TargetParser.h22
-rw-r--r--llvm/include/llvm/TargetParser/Triple.h60
-rw-r--r--llvm/include/llvm/TargetParser/X86TargetParser.def52
-rw-r--r--llvm/include/llvm/TargetParser/X86TargetParser.h7
-rw-r--r--llvm/include/llvm/TextAPI/InterfaceFile.h193
-rw-r--r--llvm/include/llvm/TextAPI/PackedVersion.h3
-rw-r--r--llvm/include/llvm/TextAPI/Platform.h3
-rw-r--r--llvm/include/llvm/TextAPI/Symbol.h37
-rw-r--r--llvm/include/llvm/TextAPI/SymbolSet.h182
-rw-r--r--llvm/include/llvm/TextAPI/Target.h16
-rw-r--r--llvm/include/llvm/TextAPI/TextAPIWriter.h3
-rw-r--r--llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h2
-rw-r--r--llvm/include/llvm/Transforms/Coroutines/CoroSplit.h8
-rw-r--r--llvm/include/llvm/Transforms/IPO.h154
-rw-r--r--llvm/include/llvm/Transforms/IPO/Attributor.h1246
-rw-r--r--llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h1
-rw-r--r--llvm/include/llvm/Transforms/IPO/EmbedBitcodePass.h58
-rw-r--r--llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h4
-rw-r--r--llvm/include/llvm/Transforms/IPO/FunctionAttrs.h13
-rw-r--r--llvm/include/llvm/Transforms/IPO/FunctionImport.h16
-rw-r--r--llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h130
-rw-r--r--llvm/include/llvm/Transforms/IPO/GlobalDCE.h8
-rw-r--r--llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h5
-rw-r--r--llvm/include/llvm/Transforms/IPO/Inliner.h61
-rw-r--r--llvm/include/llvm/Transforms/IPO/Internalize.h11
-rw-r--r--llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h59
-rw-r--r--llvm/include/llvm/Transforms/IPO/OpenMPOpt.h15
-rw-r--r--llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h135
-rw-r--r--llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h38
-rw-r--r--llvm/include/llvm/Transforms/IPO/SampleProfile.h11
-rw-r--r--llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h25
-rw-r--r--llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h7
-rw-r--r--llvm/include/llvm/Transforms/InstCombine/InstCombine.h30
-rw-r--r--llvm/include/llvm/Transforms/InstCombine/InstCombiner.h9
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h14
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/BlockCoverageInference.h86
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/CFGMST.h (renamed from llvm/lib/Transforms/Instrumentation/CFGMST.h)10
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h4
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h18
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h19
-rw-r--r--llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h13
-rw-r--r--llvm/include/llvm/Transforms/ObjCARC.h18
-rw-r--r--llvm/include/llvm/Transforms/Scalar.h237
-rw-r--r--llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h22
-rw-r--r--llvm/include/llvm/Transforms/Scalar/GVN.h11
-rw-r--r--llvm/include/llvm/Transforms/Scalar/JumpThreading.h71
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopPassManager.h4
-rw-r--r--llvm/include/llvm/Transforms/Scalar/LoopRotation.h3
-rw-r--r--llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h5
-rw-r--r--llvm/include/llvm/Transforms/Scalar/PlaceSafepoints.h71
-rw-r--r--llvm/include/llvm/Transforms/Scalar/SCCP.h9
-rw-r--r--llvm/include/llvm/Transforms/Scalar/SROA.h9
-rw-r--r--llvm/include/llvm/Transforms/Scalar/Scalarizer.h14
-rw-r--r--llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h2
-rw-r--r--llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h8
-rw-r--r--llvm/include/llvm/Transforms/Utils.h37
-rw-r--r--llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h6
-rw-r--r--llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h111
-rw-r--r--llvm/include/llvm/Transforms/Utils/BuildLibCalls.h15
-rw-r--r--llvm/include/llvm/Transforms/Utils/Cloning.h10
-rw-r--r--llvm/include/llvm/Transforms/Utils/CountVisits.h28
-rw-r--r--llvm/include/llvm/Transforms/Utils/Debugify.h4
-rw-r--r--llvm/include/llvm/Transforms/Utils/Evaluator.h8
-rw-r--r--llvm/include/llvm/Transforms/Utils/FunctionComparator.h4
-rw-r--r--llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h13
-rw-r--r--llvm/include/llvm/Transforms/Utils/InstructionWorklist.h11
-rw-r--r--llvm/include/llvm/Transforms/Utils/Local.h6
-rw-r--r--llvm/include/llvm/Transforms/Utils/LoopUtils.h29
-rw-r--r--llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h5
-rw-r--r--llvm/include/llvm/Transforms/Utils/MoveAutoInit.h29
-rw-r--r--llvm/include/llvm/Transforms/Utils/SCCPSolver.h39
-rw-r--r--llvm/include/llvm/Transforms/Utils/SSAUpdater.h11
-rw-r--r--llvm/include/llvm/Transforms/Utils/SampleProfileInference.h75
-rw-r--r--llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h150
-rw-r--r--llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h6
-rw-r--r--llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h6
-rw-r--r--llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h11
-rw-r--r--llvm/include/llvm/Transforms/Utils/SizeOpts.h28
-rw-r--r--llvm/include/llvm/Transforms/Utils/SymbolRewriter.h3
-rw-r--r--llvm/include/llvm/Transforms/Utils/VNCoercion.h26
-rw-r--r--llvm/include/llvm/Transforms/Utils/ValueMapper.h6
-rw-r--r--llvm/include/llvm/Transforms/Vectorize.h120
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h49
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h4
-rw-r--r--llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h27
-rw-r--r--llvm/include/llvm/WindowsDriver/MSVCPaths.h12
-rw-r--r--llvm/include/llvm/XRay/XRayRecord.h6
-rw-r--r--llvm/include/llvm/module.install.modulemap31
-rw-r--r--llvm/include/llvm/module.modulemap462
-rw-r--r--llvm/include/module.extern.modulemap (renamed from llvm/include/llvm/module.extern.modulemap)1
-rw-r--r--llvm/include/module.install.modulemap35
-rw-r--r--llvm/include/module.modulemap428
-rw-r--r--llvm/include/module.modulemap.build (renamed from llvm/include/llvm/module.modulemap.build)6
-rw-r--r--llvm/lib/Analysis/AliasAnalysis.cpp52
-rw-r--r--llvm/lib/Analysis/AliasAnalysisSummary.cpp104
-rw-r--r--llvm/lib/Analysis/AliasAnalysisSummary.h268
-rw-r--r--llvm/lib/Analysis/AliasSetTracker.cpp1
-rw-r--r--llvm/lib/Analysis/Analysis.cpp18
-rw-r--r--llvm/lib/Analysis/AssumeBundleQueries.cpp2
-rw-r--r--llvm/lib/Analysis/AssumptionCache.cpp35
-rw-r--r--llvm/lib/Analysis/BasicAliasAnalysis.cpp69
-rw-r--r--llvm/lib/Analysis/BlockFrequencyInfo.cpp5
-rw-r--r--llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp5
-rw-r--r--llvm/lib/Analysis/BranchProbabilityInfo.cpp19
-rw-r--r--llvm/lib/Analysis/CFGPrinter.cpp3
-rw-r--r--llvm/lib/Analysis/CGSCCPassManager.cpp47
-rw-r--r--llvm/lib/Analysis/CallGraphSCCPass.cpp1
-rw-r--r--llvm/lib/Analysis/CaptureTracking.cpp41
-rw-r--r--llvm/lib/Analysis/CmpInstAnalysis.cpp2
-rw-r--r--llvm/lib/Analysis/ConstantFolding.cpp219
-rw-r--r--llvm/lib/Analysis/ConstraintSystem.cpp170
-rw-r--r--llvm/lib/Analysis/CycleAnalysis.cpp3
-rw-r--r--llvm/lib/Analysis/DDG.cpp5
-rw-r--r--llvm/lib/Analysis/DemandedBits.cpp59
-rw-r--r--llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp8
-rw-r--r--llvm/lib/Analysis/DivergenceAnalysis.cpp409
-rw-r--r--llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp24
-rw-r--r--llvm/lib/Analysis/GuardUtils.cpp17
-rw-r--r--llvm/lib/Analysis/IRSimilarityIdentifier.cpp309
-rw-r--r--llvm/lib/Analysis/IVDescriptors.cpp187
-rw-r--r--llvm/lib/Analysis/IVUsers.cpp9
-rw-r--r--llvm/lib/Analysis/InlineAdvisor.cpp15
-rw-r--r--llvm/lib/Analysis/InlineCost.cpp131
-rw-r--r--llvm/lib/Analysis/InlineOrder.cpp27
-rw-r--r--llvm/lib/Analysis/InstructionPrecedenceTracking.cpp4
-rw-r--r--llvm/lib/Analysis/InstructionSimplify.cpp1095
-rw-r--r--llvm/lib/Analysis/InteractiveModelRunner.cpp82
-rw-r--r--llvm/lib/Analysis/LazyValueInfo.cpp47
-rw-r--r--llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp435
-rw-r--r--llvm/lib/Analysis/Lint.cpp78
-rw-r--r--llvm/lib/Analysis/Loads.cpp51
-rw-r--r--llvm/lib/Analysis/LoopAccessAnalysis.cpp389
-rw-r--r--llvm/lib/Analysis/LoopCacheAnalysis.cpp8
-rw-r--r--llvm/lib/Analysis/LoopInfo.cpp9
-rw-r--r--llvm/lib/Analysis/MLInlineAdvisor.cpp95
-rw-r--r--llvm/lib/Analysis/MemDepPrinter.cpp164
-rw-r--r--llvm/lib/Analysis/MemDerefPrinter.cpp59
-rw-r--r--llvm/lib/Analysis/MemoryBuiltins.cpp37
-rw-r--r--llvm/lib/Analysis/MemoryDependenceAnalysis.cpp8
-rw-r--r--llvm/lib/Analysis/MemoryLocation.cpp2
-rw-r--r--llvm/lib/Analysis/MemoryProfileInfo.cpp69
-rw-r--r--llvm/lib/Analysis/MemorySSA.cpp52
-rw-r--r--llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp38
-rw-r--r--llvm/lib/Analysis/ModuleSummaryAnalysis.cpp102
-rw-r--r--llvm/lib/Analysis/MustExecute.cpp105
-rw-r--r--llvm/lib/Analysis/PHITransAddr.cpp151
-rw-r--r--llvm/lib/Analysis/ProfileSummaryInfo.cpp150
-rw-r--r--llvm/lib/Analysis/ScalarEvolution.cpp1914
-rw-r--r--llvm/lib/Analysis/ScalarEvolutionDivision.cpp4
-rw-r--r--llvm/lib/Analysis/ScalarEvolutionNormalization.cpp15
-rw-r--r--llvm/lib/Analysis/StackLifetime.cpp6
-rw-r--r--llvm/lib/Analysis/StratifiedSets.h595
-rw-r--r--llvm/lib/Analysis/SyncDependenceAnalysis.cpp478
-rw-r--r--llvm/lib/Analysis/TargetLibraryInfo.cpp47
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp93
-rw-r--r--llvm/lib/Analysis/TensorSpec.cpp21
-rw-r--r--llvm/lib/Analysis/TrainingLogger.cpp12
-rw-r--r--llvm/lib/Analysis/TypeMetadataUtils.cpp6
-rw-r--r--llvm/lib/Analysis/UniformityAnalysis.cpp74
-rw-r--r--llvm/lib/Analysis/ValueTracking.cpp3547
-rw-r--r--llvm/lib/Analysis/VectorUtils.cpp231
-rw-r--r--llvm/lib/AsmParser/LLLexer.cpp37
-rw-r--r--llvm/lib/AsmParser/LLParser.cpp243
-rw-r--r--llvm/lib/AsmParser/Parser.cpp4
-rw-r--r--llvm/lib/BinaryFormat/Dwarf.cpp37
-rw-r--r--llvm/lib/BinaryFormat/MachO.cpp4
-rw-r--r--llvm/lib/BinaryFormat/Magic.cpp12
-rw-r--r--llvm/lib/BinaryFormat/MsgPackDocument.cpp10
-rw-r--r--llvm/lib/BinaryFormat/MsgPackReader.cpp6
-rw-r--r--llvm/lib/Bitcode/Reader/BitcodeReader.cpp137
-rw-r--r--llvm/lib/Bitcode/Reader/MetadataLoader.cpp84
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp53
-rw-r--r--llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp18
-rw-r--r--llvm/lib/CodeGen/AggressiveAntiDepBreaker.h3
-rw-r--r--llvm/lib/CodeGen/Analysis.cpp63
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp18
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp5
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp250
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp57
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp1
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp55
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DIE.cpp7
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp11
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp3
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp251
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h57
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp289
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h26
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp18
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfFile.h7
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp16
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h4
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp2
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp19
-rw-r--r--llvm/lib/CodeGen/AsmPrinter/WinException.cpp22
-rw-r--r--llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp738
-rw-r--r--llvm/lib/CodeGen/AtomicExpandPass.cpp5
-rw-r--r--llvm/lib/CodeGen/BasicBlockSections.cpp18
-rw-r--r--llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp92
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp87
-rw-r--r--llvm/lib/CodeGen/BranchFolding.h16
-rw-r--r--llvm/lib/CodeGen/BranchRelaxation.cpp19
-rw-r--r--llvm/lib/CodeGen/BreakFalseDeps.cpp30
-rw-r--r--llvm/lib/CodeGen/CFIInstrInserter.cpp3
-rw-r--r--llvm/lib/CodeGen/CalcSpillWeights.cpp19
-rw-r--r--llvm/lib/CodeGen/CallBrPrepare.cpp231
-rw-r--r--llvm/lib/CodeGen/CallingConvLower.cpp29
-rw-r--r--llvm/lib/CodeGen/CodeGen.cpp9
-rw-r--r--llvm/lib/CodeGen/CodeGenCommonISel.cpp18
-rw-r--r--llvm/lib/CodeGen/CodeGenPrepare.cpp438
-rw-r--r--llvm/lib/CodeGen/CommandFlags.cpp65
-rw-r--r--llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp1670
-rw-r--r--llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp37
-rw-r--r--llvm/lib/CodeGen/DFAPacketizer.cpp36
-rw-r--r--llvm/lib/CodeGen/DeadMachineInstructionElim.cpp40
-rw-r--r--llvm/lib/CodeGen/DetectDeadLanes.cpp215
-rw-r--r--llvm/lib/CodeGen/DwarfEHPrepare.cpp4
-rw-r--r--llvm/lib/CodeGen/EarlyIfConversion.cpp86
-rw-r--r--llvm/lib/CodeGen/ExecutionDomainFix.cpp2
-rw-r--r--llvm/lib/CodeGen/ExpandMemCmp.cpp18
-rw-r--r--llvm/lib/CodeGen/ExpandPostRAPseudos.cpp73
-rw-r--r--llvm/lib/CodeGen/ExpandReductions.cpp34
-rw-r--r--llvm/lib/CodeGen/ExpandVectorPredication.cpp49
-rw-r--r--llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp4
-rw-r--r--llvm/lib/CodeGen/GCRootLowering.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp9
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CallLowering.cpp5
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp535
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp68
-rw-r--r--llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp11
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp154
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp6
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp13
-rw-r--r--llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp60
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp9
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Legalizer.cpp28
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp571
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp322
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Localizer.cpp4
-rw-r--r--llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp45
-rw-r--r--llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp10
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Utils.cpp11
-rw-r--r--llvm/lib/CodeGen/GlobalMerge.cpp12
-rw-r--r--llvm/lib/CodeGen/HardwareLoops.cpp171
-rw-r--r--llvm/lib/CodeGen/IfConversion.cpp50
-rw-r--r--llvm/lib/CodeGen/ImplicitNullChecks.cpp10
-rw-r--r--llvm/lib/CodeGen/InlineSpiller.cpp117
-rw-r--r--llvm/lib/CodeGen/InterferenceCache.cpp15
-rw-r--r--llvm/lib/CodeGen/InterferenceCache.h5
-rw-r--r--llvm/lib/CodeGen/InterleavedAccessPass.cpp152
-rw-r--r--llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp6
-rw-r--r--llvm/lib/CodeGen/KCFI.cpp (renamed from llvm/lib/Target/AArch64/AArch64KCFI.cpp)70
-rw-r--r--llvm/lib/CodeGen/LLVMTargetMachine.cpp11
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp46
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h8
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp4
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp14
-rw-r--r--llvm/lib/CodeGen/LiveInterval.cpp6
-rw-r--r--llvm/lib/CodeGen/LiveIntervals.cpp27
-rw-r--r--llvm/lib/CodeGen/LivePhysRegs.cpp11
-rw-r--r--llvm/lib/CodeGen/LiveRangeEdit.cpp17
-rw-r--r--llvm/lib/CodeGen/LiveRangeShrink.cpp4
-rw-r--r--llvm/lib/CodeGen/LiveRegMatrix.cpp16
-rw-r--r--llvm/lib/CodeGen/LiveVariables.cpp76
-rw-r--r--llvm/lib/CodeGen/LowLevelType.cpp101
-rw-r--r--llvm/lib/CodeGen/LowLevelTypeUtils.cpp85
-rw-r--r--llvm/lib/CodeGen/MIRFSDiscriminator.cpp80
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.cpp1
-rw-r--r--llvm/lib/CodeGen/MIRParser/MILexer.h1
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIParser.cpp21
-rw-r--r--llvm/lib/CodeGen/MIRParser/MIRParser.cpp74
-rw-r--r--llvm/lib/CodeGen/MIRPrinter.cpp34
-rw-r--r--llvm/lib/CodeGen/MIRSampleProfile.cpp95
-rw-r--r--llvm/lib/CodeGen/MIRVRegNamerUtils.cpp2
-rw-r--r--llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp47
-rw-r--r--llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp42
-rw-r--r--llvm/lib/CodeGen/MachineBasicBlock.cpp97
-rw-r--r--llvm/lib/CodeGen/MachineBlockPlacement.cpp31
-rw-r--r--llvm/lib/CodeGen/MachineCSE.cpp24
-rw-r--r--llvm/lib/CodeGen/MachineCheckDebugify.cpp1
-rw-r--r--llvm/lib/CodeGen/MachineCombiner.cpp98
-rw-r--r--llvm/lib/CodeGen/MachineCopyPropagation.cpp494
-rw-r--r--llvm/lib/CodeGen/MachineDebugify.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineFrameInfo.cpp4
-rw-r--r--llvm/lib/CodeGen/MachineFunction.cpp54
-rw-r--r--llvm/lib/CodeGen/MachineFunctionSplitter.cpp126
-rw-r--r--llvm/lib/CodeGen/MachineInstr.cpp100
-rw-r--r--llvm/lib/CodeGen/MachineInstrBundle.cpp37
-rw-r--r--llvm/lib/CodeGen/MachineLICM.cpp55
-rw-r--r--llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp104
-rw-r--r--llvm/lib/CodeGen/MachineLoopInfo.cpp2
-rw-r--r--llvm/lib/CodeGen/MachineModuleInfo.cpp11
-rw-r--r--llvm/lib/CodeGen/MachineOperand.cpp26
-rw-r--r--llvm/lib/CodeGen/MachineOutliner.cpp298
-rw-r--r--llvm/lib/CodeGen/MachinePassManager.cpp2
-rw-r--r--llvm/lib/CodeGen/MachinePipeliner.cpp54
-rw-r--r--llvm/lib/CodeGen/MachineRegisterInfo.cpp22
-rw-r--r--llvm/lib/CodeGen/MachineSSAContext.cpp10
-rw-r--r--llvm/lib/CodeGen/MachineScheduler.cpp412
-rw-r--r--llvm/lib/CodeGen/MachineSink.cpp159
-rw-r--r--llvm/lib/CodeGen/MachineSizeOpts.cpp166
-rw-r--r--llvm/lib/CodeGen/MachineTraceMetrics.cpp88
-rw-r--r--llvm/lib/CodeGen/MachineUniformityAnalysis.cpp95
-rw-r--r--llvm/lib/CodeGen/MachineVerifier.cpp239
-rw-r--r--llvm/lib/CodeGen/ModuloSchedule.cpp9
-rw-r--r--llvm/lib/CodeGen/OptimizePHIs.cpp4
-rw-r--r--llvm/lib/CodeGen/PHIElimination.cpp6
-rw-r--r--llvm/lib/CodeGen/PeepholeOptimizer.cpp10
-rw-r--r--llvm/lib/CodeGen/PostRASchedulerList.cpp6
-rw-r--r--llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp170
-rw-r--r--llvm/lib/CodeGen/ProcessImplicitDefs.cpp10
-rw-r--r--llvm/lib/CodeGen/PrologEpilogInserter.cpp137
-rw-r--r--llvm/lib/CodeGen/PseudoProbeInserter.cpp5
-rw-r--r--llvm/lib/CodeGen/RDFGraph.cpp930
-rw-r--r--llvm/lib/CodeGen/RDFLiveness.cpp328
-rw-r--r--llvm/lib/CodeGen/RDFRegisters.cpp321
-rw-r--r--llvm/lib/CodeGen/ReachingDefAnalysis.cpp25
-rw-r--r--llvm/lib/CodeGen/RegAllocBasic.cpp6
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp8
-rw-r--r--llvm/lib/CodeGen/RegAllocEvictionAdvisor.h2
-rw-r--r--llvm/lib/CodeGen/RegAllocFast.cpp354
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.cpp87
-rw-r--r--llvm/lib/CodeGen/RegAllocGreedy.h26
-rw-r--r--llvm/lib/CodeGen/RegAllocPBQP.cpp4
-rw-r--r--llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp2
-rw-r--r--llvm/lib/CodeGen/RegUsageInfoCollector.cpp4
-rw-r--r--llvm/lib/CodeGen/RegisterBank.cpp18
-rw-r--r--llvm/lib/CodeGen/RegisterBankInfo.cpp52
-rw-r--r--llvm/lib/CodeGen/RegisterCoalescer.cpp100
-rw-r--r--llvm/lib/CodeGen/RegisterPressure.cpp10
-rw-r--r--llvm/lib/CodeGen/RegisterScavenging.cpp159
-rw-r--r--llvm/lib/CodeGen/RenameIndependentSubregs.cpp8
-rw-r--r--llvm/lib/CodeGen/ReplaceWithVeclib.cpp2
-rw-r--r--llvm/lib/CodeGen/ResetMachineFunctionPass.cpp5
-rw-r--r--llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp17
-rw-r--r--llvm/lib/CodeGen/ScheduleDAG.cpp16
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp24
-rw-r--r--llvm/lib/CodeGen/SelectOptimize.cpp11
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2633
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp98
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp49
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp461
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp138
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp389
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp11
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h40
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp93
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp377
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp16
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp973
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp717
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h62
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp38
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp125
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp619
-rw-r--r--llvm/lib/CodeGen/ShrinkWrap.cpp536
-rw-r--r--llvm/lib/CodeGen/SjLjEHPrepare.cpp26
-rw-r--r--llvm/lib/CodeGen/SlotIndexes.cpp4
-rw-r--r--llvm/lib/CodeGen/SpillPlacement.h10
-rw-r--r--llvm/lib/CodeGen/SplitKit.cpp19
-rw-r--r--llvm/lib/CodeGen/SplitKit.h11
-rw-r--r--llvm/lib/CodeGen/StackColoring.cpp13
-rw-r--r--llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp5
-rw-r--r--llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp2
-rw-r--r--llvm/lib/CodeGen/StackMaps.cpp13
-rw-r--r--llvm/lib/CodeGen/StackProtector.cpp117
-rw-r--r--llvm/lib/CodeGen/StackSlotColoring.cpp73
-rw-r--r--llvm/lib/CodeGen/TailDuplicator.cpp25
-rw-r--r--llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp11
-rw-r--r--llvm/lib/CodeGen/TargetInstrInfo.cpp186
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp69
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp127
-rw-r--r--llvm/lib/CodeGen/TargetPassConfig.cpp57
-rw-r--r--llvm/lib/CodeGen/TargetRegisterInfo.cpp20
-rw-r--r--llvm/lib/CodeGen/TwoAddressInstructionPass.cpp61
-rw-r--r--llvm/lib/CodeGen/TypePromotion.cpp9
-rw-r--r--llvm/lib/CodeGen/UnreachableBlockElim.cpp41
-rw-r--r--llvm/lib/CodeGen/VLIWMachineScheduler.cpp2
-rw-r--r--llvm/lib/CodeGen/ValueTypes.cpp45
-rw-r--r--llvm/lib/CodeGen/VirtRegMap.cpp31
-rw-r--r--llvm/lib/CodeGen/WasmEHPrepare.cpp7
-rw-r--r--llvm/lib/CodeGen/WinEHPrepare.cpp150
-rw-r--r--llvm/lib/CodeGen/XRayInstrumentation.cpp3
-rw-r--r--llvm/lib/DWARFLinker/DWARFLinker.cpp1226
-rw-r--r--llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp53
-rw-r--r--llvm/lib/DWARFLinker/DWARFStreamer.cpp790
-rw-r--r--llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp131
-rw-r--r--llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.h274
-rw-r--r--llvm/lib/DWARFLinkerParallel/DWARFLinker.cpp12
-rw-r--r--llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.h156
-rw-r--r--llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.cpp46
-rw-r--r--llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.h319
-rw-r--r--llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.h186
-rw-r--r--llvm/lib/DWARFLinkerParallel/OutputSections.cpp36
-rw-r--r--llvm/lib/DWARFLinkerParallel/OutputSections.h67
-rw-r--r--llvm/lib/DWARFLinkerParallel/StringPool.cpp (renamed from llvm/include/llvm/ADT/Triple.h)10
-rw-r--r--llvm/lib/DWP/DWP.cpp91
-rw-r--r--llvm/lib/DebugInfo/BTF/BTFContext.cpp69
-rw-r--r--llvm/lib/DebugInfo/BTF/BTFParser.cpp283
-rw-r--r--llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp1
-rw-r--r--llvm/lib/DebugInfo/CodeView/EnumTables.cpp1
-rw-r--r--llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp10
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp154
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp214
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFContext.cpp109
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp71
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp179
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFDie.cpp2
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp106
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp124
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp14
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp6
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp16
-rw-r--r--llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp162
-rw-r--r--llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp64
-rw-r--r--llvm/lib/DebugInfo/GSYM/GsymCreator.cpp254
-rw-r--r--llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp37
-rw-r--r--llvm/lib/DebugInfo/LogicalView/Core/LVLocation.cpp19
-rw-r--r--llvm/lib/DebugInfo/LogicalView/Core/LVReader.cpp3
-rw-r--r--llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp69
-rw-r--r--llvm/lib/DebugInfo/LogicalView/Core/LVSupport.cpp109
-rw-r--r--llvm/lib/DebugInfo/LogicalView/Core/LVSymbol.cpp22
-rw-r--r--llvm/lib/DebugInfo/LogicalView/Core/LVType.cpp19
-rw-r--r--llvm/lib/DebugInfo/LogicalView/LVReaderHandler.cpp158
-rw-r--r--llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp166
-rw-r--r--llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp1221
-rw-r--r--llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp3525
-rw-r--r--llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp166
-rw-r--r--llvm/lib/DebugInfo/MSF/MSFBuilder.cpp12
-rw-r--r--llvm/lib/DebugInfo/MSF/MSFError.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/InputFile.cpp25
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp1
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp1
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp1
-rw-r--r--llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp4
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBExtras.cpp2
-rw-r--r--llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp2
-rw-r--r--llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp47
-rw-r--r--llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp55
-rw-r--r--llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp2
-rw-r--r--llvm/lib/DebugInfo/Symbolize/Symbolize.cpp54
-rw-r--r--llvm/lib/Debuginfod/Debuginfod.cpp51
-rw-r--r--llvm/lib/Debuginfod/HTTPServer.cpp23
-rw-r--r--llvm/lib/Demangle/DLangDemangle.cpp379
-rw-r--r--llvm/lib/Demangle/Demangle.cpp44
-rw-r--r--llvm/lib/Demangle/ItaniumDemangle.cpp39
-rw-r--r--llvm/lib/Demangle/MicrosoftDemangle.cpp570
-rw-r--r--llvm/lib/Demangle/MicrosoftDemangleNodes.cpp4
-rw-r--r--llvm/lib/Demangle/RustDemangle.cpp59
-rw-r--r--llvm/lib/ExecutionEngine/ExecutionEngine.cpp16
-rw-r--r--llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp9
-rw-r--r--llvm/lib/ExecutionEngine/Interpreter/Interpreter.h2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.h2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp25
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h5
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp28
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF.cpp25
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h148
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp311
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp64
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp26
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_loongarch.cpp16
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp396
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp406
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp139
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLink.cpp16
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp9
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h61
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp45
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp28
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h1
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp13
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp13
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/SEHFrameSupport.h2
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/aarch32.cpp519
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/aarch64.cpp6
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/i386.cpp48
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/ppc64.cpp102
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/riscv.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/JITLink/x86_64.cpp8
-rw-r--r--llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp150
-rw-r--r--llvm/lib/ExecutionEngine/Orc/COFFVCRuntimeSupport.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Core.cpp84
-rw-r--r--llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp101
-rw-r--r--llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp11
-rw-r--r--llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp11
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp168
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp11
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.cpp3
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp5
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp77
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp162
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp73
-rw-r--r--llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp5
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LLJIT.cpp409
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Layer.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp29
-rw-r--r--llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp418
-rw-r--r--llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp3
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp24
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp93
-rw-r--r--llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp212
-rw-r--r--llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp48
-rw-r--r--llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp5
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp94
-rw-r--r--llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp36
-rw-r--r--llvm/lib/ExecutionEngine/Orc/Speculation.cpp11
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp11
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp25
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp4
-rw-r--r--llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp22
-rw-r--r--llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp2
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp14
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h4
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h1
-rw-r--r--llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h1
-rw-r--r--llvm/lib/ExecutionEngine/SectionMemoryManager.cpp17
-rw-r--r--llvm/lib/ExecutionEngine/TargetSelect.cpp7
-rw-r--r--llvm/lib/FileCheck/FileCheck.cpp270
-rw-r--r--llvm/lib/FileCheck/FileCheckImpl.h39
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPContext.cpp2
-rw-r--r--llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp1034
-rw-r--r--llvm/lib/FuzzMutate/FuzzerCLI.cpp2
-rw-r--r--llvm/lib/FuzzMutate/IRMutator.cpp186
-rw-r--r--llvm/lib/FuzzMutate/OpDescriptor.cpp19
-rw-r--r--llvm/lib/FuzzMutate/Operations.cpp37
-rw-r--r--llvm/lib/FuzzMutate/RandomIRBuilder.cpp375
-rw-r--r--llvm/lib/IR/AsmWriter.cpp45
-rw-r--r--llvm/lib/IR/AttributeImpl.h1
-rw-r--r--llvm/lib/IR/Attributes.cpp102
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp424
-rw-r--r--llvm/lib/IR/BasicBlock.cpp14
-rw-r--r--llvm/lib/IR/ConstantFold.cpp108
-rw-r--r--llvm/lib/IR/ConstantRange.cpp70
-rw-r--r--llvm/lib/IR/Constants.cpp124
-rw-r--r--llvm/lib/IR/ConstantsContext.h33
-rw-r--r--llvm/lib/IR/Core.cpp86
-rw-r--r--llvm/lib/IR/CycleInfo.cpp (renamed from llvm/include/llvm/Support/TargetParser.h)16
-rw-r--r--llvm/lib/IR/DIBuilder.cpp159
-rw-r--r--llvm/lib/IR/DataLayout.cpp202
-rw-r--r--llvm/lib/IR/DebugInfo.cpp405
-rw-r--r--llvm/lib/IR/DebugInfoMetadata.cpp188
-rw-r--r--llvm/lib/IR/DiagnosticInfo.cpp3
-rw-r--r--llvm/lib/IR/Dominators.cpp14
-rw-r--r--llvm/lib/IR/EHPersonalities.cpp (renamed from llvm/lib/Analysis/EHPersonalities.cpp)51
-rw-r--r--llvm/lib/IR/Function.cpp231
-rw-r--r--llvm/lib/IR/Globals.cpp66
-rw-r--r--llvm/lib/IR/IRBuilder.cpp150
-rw-r--r--llvm/lib/IR/Instruction.cpp125
-rw-r--r--llvm/lib/IR/Instructions.cpp210
-rw-r--r--llvm/lib/IR/IntrinsicInst.cpp57
-rw-r--r--llvm/lib/IR/LLVMContext.cpp9
-rw-r--r--llvm/lib/IR/LLVMContextImpl.cpp24
-rw-r--r--llvm/lib/IR/LLVMContextImpl.h36
-rw-r--r--llvm/lib/IR/LLVMRemarkStreamer.cpp6
-rw-r--r--llvm/lib/IR/LegacyPassManager.cpp15
-rw-r--r--llvm/lib/IR/MDBuilder.cpp4
-rw-r--r--llvm/lib/IR/Mangler.cpp3
-rw-r--r--llvm/lib/IR/Metadata.cpp134
-rw-r--r--llvm/lib/IR/Module.cpp23
-rw-r--r--llvm/lib/IR/ModuleSummaryIndex.cpp16
-rw-r--r--llvm/lib/IR/PassManager.cpp7
-rw-r--r--llvm/lib/IR/PseudoProbe.cpp22
-rw-r--r--llvm/lib/IR/ReplaceConstant.cpp166
-rw-r--r--llvm/lib/IR/SSAContext.cpp6
-rw-r--r--llvm/lib/IR/SafepointIRVerifier.cpp4
-rw-r--r--llvm/lib/IR/StructuralHash.cpp49
-rw-r--r--llvm/lib/IR/Type.cpp120
-rw-r--r--llvm/lib/IR/Value.cpp103
-rw-r--r--llvm/lib/IR/ValueSymbolTable.cpp2
-rw-r--r--llvm/lib/IR/VectorBuilder.cpp4
-rw-r--r--llvm/lib/IR/Verifier.cpp514
-rw-r--r--llvm/lib/InterfaceStub/IFSHandler.cpp12
-rw-r--r--llvm/lib/LTO/LTO.cpp315
-rw-r--r--llvm/lib/LTO/LTOBackend.cpp32
-rw-r--r--llvm/lib/LTO/LTOCodeGenerator.cpp9
-rw-r--r--llvm/lib/LTO/LTOModule.cpp16
-rw-r--r--llvm/lib/LTO/ThinLTOCodeGenerator.cpp109
-rw-r--r--llvm/lib/LTO/UpdateCompilerUsed.cpp1
-rw-r--r--llvm/lib/Linker/IRMover.cpp63
-rw-r--r--llvm/lib/MC/DXContainerPSVInfo.cpp54
-rw-r--r--llvm/lib/MC/ELFObjectWriter.cpp9
-rw-r--r--llvm/lib/MC/MCAsmBackend.cpp24
-rw-r--r--llvm/lib/MC/MCAsmInfo.cpp1
-rw-r--r--llvm/lib/MC/MCAsmInfoXCOFF.cpp1
-rw-r--r--llvm/lib/MC/MCAsmStreamer.cpp208
-rw-r--r--llvm/lib/MC/MCAssembler.cpp59
-rw-r--r--llvm/lib/MC/MCCodeEmitter.cpp9
-rw-r--r--llvm/lib/MC/MCCodeView.cpp2
-rw-r--r--llvm/lib/MC/MCContext.cpp54
-rw-r--r--llvm/lib/MC/MCDisassembler/Disassembler.cpp2
-rw-r--r--llvm/lib/MC/MCDwarf.cpp102
-rw-r--r--llvm/lib/MC/MCELFStreamer.cpp7
-rw-r--r--llvm/lib/MC/MCExpr.cpp109
-rw-r--r--llvm/lib/MC/MCInstrDesc.cpp2
-rw-r--r--llvm/lib/MC/MCMachOStreamer.cpp9
-rw-r--r--llvm/lib/MC/MCObjectFileInfo.cpp43
-rw-r--r--llvm/lib/MC/MCObjectStreamer.cpp55
-rw-r--r--llvm/lib/MC/MCObjectWriter.cpp4
-rw-r--r--llvm/lib/MC/MCParser/AsmLexer.cpp28
-rw-r--r--llvm/lib/MC/MCParser/AsmParser.cpp125
-rw-r--r--llvm/lib/MC/MCParser/COFFAsmParser.cpp35
-rw-r--r--llvm/lib/MC/MCParser/DarwinAsmParser.cpp6
-rw-r--r--llvm/lib/MC/MCParser/ELFAsmParser.cpp16
-rw-r--r--llvm/lib/MC/MCParser/MCAsmLexer.cpp1
-rw-r--r--llvm/lib/MC/MCParser/MCTargetAsmParser.cpp21
-rw-r--r--llvm/lib/MC/MCParser/MasmParser.cpp144
-rw-r--r--llvm/lib/MC/MCParser/WasmAsmParser.cpp3
-rw-r--r--llvm/lib/MC/MCPseudoProbe.cpp50
-rw-r--r--llvm/lib/MC/MCRegisterInfo.cpp37
-rw-r--r--llvm/lib/MC/MCSPIRVStreamer.cpp3
-rw-r--r--llvm/lib/MC/MCSchedule.cpp1
-rw-r--r--llvm/lib/MC/MCSection.cpp7
-rw-r--r--llvm/lib/MC/MCSectionELF.cpp4
-rw-r--r--llvm/lib/MC/MCStreamer.cpp99
-rw-r--r--llvm/lib/MC/MCSubtargetInfo.cpp4
-rw-r--r--llvm/lib/MC/MCTargetOptions.cpp3
-rw-r--r--llvm/lib/MC/MCTargetOptionsCommandFlags.cpp10
-rw-r--r--llvm/lib/MC/MCWasmStreamer.cpp3
-rw-r--r--llvm/lib/MC/MCWin64EH.cpp6
-rw-r--r--llvm/lib/MC/MCWinCOFFStreamer.cpp16
-rw-r--r--llvm/lib/MC/MCXCOFFStreamer.cpp22
-rw-r--r--llvm/lib/MC/MachObjectWriter.cpp4
-rw-r--r--llvm/lib/MC/TargetRegistry.cpp10
-rw-r--r--llvm/lib/MC/WasmObjectWriter.cpp4
-rw-r--r--llvm/lib/MC/WinCOFFObjectWriter.cpp674
-rw-r--r--llvm/lib/MC/XCOFFObjectWriter.cpp235
-rw-r--r--llvm/lib/MCA/CodeEmitter.cpp2
-rw-r--r--llvm/lib/MCA/CustomBehaviour.cpp11
-rw-r--r--llvm/lib/MCA/HardwareUnits/RegisterFile.cpp48
-rw-r--r--llvm/lib/MCA/HardwareUnits/ResourceManager.cpp2
-rw-r--r--llvm/lib/MCA/InstrBuilder.cpp14
-rw-r--r--llvm/lib/MCA/Stages/EntryStage.cpp3
-rw-r--r--llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp30
-rw-r--r--llvm/lib/ObjCopy/ConfigManager.cpp16
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp19
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObject.cpp38
-rw-r--r--llvm/lib/ObjCopy/ELF/ELFObject.h6
-rw-r--r--llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp3
-rw-r--r--llvm/lib/Object/Archive.cpp298
-rw-r--r--llvm/lib/Object/ArchiveWriter.cpp482
-rw-r--r--llvm/lib/Object/BuildID.cpp31
-rw-r--r--llvm/lib/Object/COFFImportFile.cpp5
-rw-r--r--llvm/lib/Object/COFFModuleDefinition.cpp17
-rw-r--r--llvm/lib/Object/COFFObjectFile.cpp55
-rw-r--r--llvm/lib/Object/DXContainer.cpp89
-rw-r--r--llvm/lib/Object/Decompressor.cpp1
-rw-r--r--llvm/lib/Object/ELF.cpp111
-rw-r--r--llvm/lib/Object/ELFObjectFile.cpp162
-rw-r--r--llvm/lib/Object/GOFFObjectFile.cpp483
-rw-r--r--llvm/lib/Object/IRSymtab.cpp8
-rw-r--r--llvm/lib/Object/MachOObjectFile.cpp4
-rw-r--r--llvm/lib/Object/MachOUniversalWriter.cpp6
-rw-r--r--llvm/lib/Object/ModuleSymbolTable.cpp14
-rw-r--r--llvm/lib/Object/ObjectFile.cpp6
-rw-r--r--llvm/lib/Object/OffloadBinary.cpp8
-rw-r--r--llvm/lib/Object/RelocationResolver.cpp21
-rw-r--r--llvm/lib/Object/TapiFile.cpp36
-rw-r--r--llvm/lib/Object/WasmObjectFile.cpp19
-rw-r--r--llvm/lib/Object/WindowsMachineFlag.cpp3
-rw-r--r--llvm/lib/Object/WindowsResource.cpp1
-rw-r--r--llvm/lib/Object/XCOFFObjectFile.cpp26
-rw-r--r--llvm/lib/ObjectYAML/COFFEmitter.cpp47
-rw-r--r--llvm/lib/ObjectYAML/COFFYAML.cpp109
-rw-r--r--llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp1
-rw-r--r--llvm/lib/ObjectYAML/DWARFEmitter.cpp2
-rw-r--r--llvm/lib/ObjectYAML/DWARFYAML.cpp12
-rw-r--r--llvm/lib/ObjectYAML/DXContainerEmitter.cpp14
-rw-r--r--llvm/lib/ObjectYAML/DXContainerYAML.cpp157
-rw-r--r--llvm/lib/ObjectYAML/ELFYAML.cpp12
-rw-r--r--llvm/lib/ObjectYAML/MachOEmitter.cpp4
-rw-r--r--llvm/lib/ObjectYAML/MachOYAML.cpp2
-rw-r--r--llvm/lib/ObjectYAML/MinidumpEmitter.cpp4
-rw-r--r--llvm/lib/ObjectYAML/OffloadEmitter.cpp9
-rw-r--r--llvm/lib/ObjectYAML/XCOFFEmitter.cpp2
-rw-r--r--llvm/lib/Option/Arg.cpp6
-rw-r--r--llvm/lib/Option/OptTable.cpp12
-rw-r--r--llvm/lib/Option/Option.cpp29
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp265
-rw-r--r--llvm/lib/Passes/PassBuilderBindings.cpp7
-rw-r--r--llvm/lib/Passes/PassBuilderPipelines.cpp492
-rw-r--r--llvm/lib/Passes/PassRegistry.def90
-rw-r--r--llvm/lib/Passes/StandardInstrumentations.cpp310
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMapping.cpp128
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp21
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp3
-rw-r--r--llvm/lib/ProfileData/GCOV.cpp22
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp97
-rw-r--r--llvm/lib/ProfileData/InstrProfReader.cpp159
-rw-r--r--llvm/lib/ProfileData/InstrProfWriter.cpp132
-rw-r--r--llvm/lib/ProfileData/ItaniumManglingCanonicalizer.cpp (renamed from llvm/lib/Support/ItaniumManglingCanonicalizer.cpp)12
-rw-r--r--llvm/lib/ProfileData/RawMemProfReader.cpp183
-rw-r--r--llvm/lib/ProfileData/SampleProf.cpp22
-rw-r--r--llvm/lib/ProfileData/SampleProfReader.cpp476
-rw-r--r--llvm/lib/ProfileData/SampleProfWriter.cpp217
-rw-r--r--llvm/lib/ProfileData/SymbolRemappingReader.cpp (renamed from llvm/lib/Support/SymbolRemappingReader.cpp)2
-rw-r--r--llvm/lib/Remarks/Remark.cpp28
-rw-r--r--llvm/lib/Remarks/RemarkLinker.cpp3
-rw-r--r--llvm/lib/Remarks/YAMLRemarkParser.cpp25
-rw-r--r--llvm/lib/Support/APFloat.cpp1142
-rw-r--r--llvm/lib/Support/APInt.cpp99
-rw-r--r--llvm/lib/Support/APSInt.cpp2
-rw-r--r--llvm/lib/Support/AddressRanges.cpp70
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S2
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S2
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm12
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S2
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S2
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm36
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_impl.h10
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S2
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S2
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm36
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S2
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S2
-rw-r--r--llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm36
-rw-r--r--llvm/lib/Support/BLAKE3/llvm_blake3_prefix.h41
-rw-r--r--llvm/lib/Support/BalancedPartitioning.cpp337
-rw-r--r--llvm/lib/Support/BinaryStreamWriter.cpp1
-rw-r--r--llvm/lib/Support/BlockFrequency.cpp45
-rw-r--r--llvm/lib/Support/Chrono.cpp2
-rw-r--r--llvm/lib/Support/CommandLine.cpp5
-rw-r--r--llvm/lib/Support/ConvertEBCDIC.cpp123
-rw-r--r--llvm/lib/Support/ConvertUTFWrapper.cpp4
-rw-r--r--llvm/lib/Support/CrashRecoveryContext.cpp13
-rw-r--r--llvm/lib/Support/DataExtractor.cpp1
-rw-r--r--llvm/lib/Support/DebugOptions.h5
-rw-r--r--llvm/lib/Support/DivisionByConstantInfo.cpp2
-rw-r--r--llvm/lib/Support/ELFAttributeParser.cpp12
-rw-r--r--llvm/lib/Support/Errno.cpp8
-rw-r--r--llvm/lib/Support/Error.cpp13
-rw-r--r--llvm/lib/Support/FileUtilities.cpp62
-rw-r--r--llvm/lib/Support/FloatingPointMode.cpp95
-rw-r--r--llvm/lib/Support/FoldingSet.cpp2
-rw-r--r--llvm/lib/Support/JSON.cpp4
-rw-r--r--llvm/lib/Support/KnownBits.cpp697
-rw-r--r--llvm/lib/Support/LowLevelType.cpp59
-rw-r--r--llvm/lib/Support/MemoryBuffer.cpp9
-rw-r--r--llvm/lib/Support/NativeFormatting.cpp5
-rw-r--r--llvm/lib/Support/PGOOptions.cpp58
-rw-r--r--llvm/lib/Support/Parallel.cpp106
-rw-r--r--llvm/lib/Support/Path.cpp10
-rw-r--r--llvm/lib/Support/PrettyStackTrace.cpp3
-rw-r--r--llvm/lib/Support/RISCVISAInfo.cpp667
-rw-r--r--llvm/lib/Support/Regex.cpp7
-rw-r--r--llvm/lib/Support/ScaledNumber.cpp8
-rw-r--r--llvm/lib/Support/SpecialCaseList.cpp5
-rw-r--r--llvm/lib/Support/StringMap.cpp11
-rw-r--r--llvm/lib/Support/StringRef.cpp25
-rw-r--r--llvm/lib/Support/SuffixTree.cpp156
-rw-r--r--llvm/lib/Support/SuffixTreeNode.cpp40
-rw-r--r--llvm/lib/Support/ThreadPool.cpp2
-rw-r--r--llvm/lib/Support/Threading.cpp5
-rw-r--r--llvm/lib/Support/TrigramIndex.cpp107
-rw-r--r--llvm/lib/Support/Unix/Path.inc2
-rw-r--r--llvm/lib/Support/Unix/Signals.inc72
-rw-r--r--llvm/lib/Support/VirtualFileSystem.cpp62
-rw-r--r--llvm/lib/Support/Windows/Path.inc4
-rw-r--r--llvm/lib/Support/Windows/Signals.inc23
-rw-r--r--llvm/lib/Support/Windows/Threading.inc2
-rw-r--r--llvm/lib/Support/YAMLParser.cpp7
-rw-r--r--llvm/lib/Support/YAMLTraits.cpp26
-rw-r--r--llvm/lib/Support/Z3Solver.cpp2
-rw-r--r--llvm/lib/Support/raw_ostream.cpp21
-rw-r--r--llvm/lib/Support/regcomp.c8
-rw-r--r--llvm/lib/Support/regex_impl.h6
-rw-r--r--llvm/lib/Support/xxhash.cpp294
-rw-r--r--llvm/lib/TableGen/Main.cpp25
-rw-r--r--llvm/lib/TableGen/Record.cpp633
-rw-r--r--llvm/lib/TableGen/TGLexer.cpp95
-rw-r--r--llvm/lib/TableGen/TGLexer.h174
-rw-r--r--llvm/lib/TableGen/TGParser.cpp849
-rw-r--r--llvm/lib/TableGen/TGParser.h223
-rw-r--r--llvm/lib/TableGen/TableGenBackend.cpp9
-rw-r--r--llvm/lib/TableGen/TableGenBackendSkeleton.cpp16
-rw-r--r--llvm/lib/Target/AArch64/AArch64.h4
-rw-r--r--llvm/lib/Target/AArch64/AArch64.td114
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp113
-rw-r--r--llvm/lib/Target/AArch64/AArch64CallingConvention.td20
-rw-r--r--llvm/lib/Target/AArch64/AArch64CollectLOH.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64Combine.td40
-rw-r--r--llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp17
-rw-r--r--llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandImm.cpp135
-rw-r--r--llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp61
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp108
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp152
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp142
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp1347
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp3309
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.h67
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrAtomics.td31
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrFormats.td139
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrGISel.td105
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp413
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.h13
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.td470
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp74
-rw-r--r--llvm/lib/Target/AArch64/AArch64MCInstLower.h2
-rw-r--r--llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp212
-rw-r--r--llvm/lib/Target/AArch64/AArch64MacroFusion.cpp61
-rw-r--r--llvm/lib/Target/AArch64/AArch64PerfectShuffle.h4
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp35
-rw-r--r--llvm/lib/Target/AArch64/AArch64RegisterInfo.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SLSHardening.cpp12
-rw-r--r--llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td530
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td400
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedA510.td1386
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedAmpere1.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM3.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM4.td4
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedExynosM5.td8
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td1060
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td29
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td1861
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td2805
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td25
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedPredExynos.td12
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td56
-rw-r--r--llvm/lib/Target/AArch64/AArch64SchedPredicates.td137
-rw-r--r--llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp12
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackTagging.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.cpp111
-rw-r--r--llvm/lib/Target/AArch64/AArch64Subtarget.h45
-rw-r--r--llvm/lib/Target/AArch64/AArch64SystemOperands.td18
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetMachine.cpp61
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp981
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h45
-rw-r--r--llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp1095
-rw-r--r--llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp21
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp187
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h4
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp277
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp203
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h1
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp67
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp101
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp260
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp84
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp111
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp72
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h12
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp17
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp51
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h4
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp8
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h2
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp12
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h1
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp8
-rw-r--r--llvm/lib/Target/AArch64/SMEInstrFormats.td364
-rw-r--r--llvm/lib/Target/AArch64/SVEInstrFormats.td1047
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h58
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h7
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h216
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.td489
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp31
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp231
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp349
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp343
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp34
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp1110
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombine.td108
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp53
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp94
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGISel.td14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp767
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp195
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h17
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp1107
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h60
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp369
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td43
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp359
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h25
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructions.td77
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp28
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp1413
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h32
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp177
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp9
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp993
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp213
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h18
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp38
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp216
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp120
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp71
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp1109
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp426
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp222
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp77
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.h29
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp129
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h5
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp156
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp186
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp648
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp32
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td54
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp39
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h15
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp173
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp215
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h51
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp85
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h36
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp1399
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td212
-rw-r--r--llvm/lib/Target/AMDGPU/DSInstructions.td49
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp875
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h91
-rw-r--r--llvm/lib/Target/AMDGPU/FLATInstructions.td151
-rw-r--r--llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp33
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp48
-rw-r--r--llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.cpp139
-rw-r--r--llvm/lib/Target/AMDGPU/GCNProcessors.td16
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRegPressure.cpp43
-rw-r--r--llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp502
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp50
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h61
-rw-r--r--llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/LDSDIRInstructions.td2
-rw-r--r--llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp180
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h48
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp582
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h68
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp11
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h4
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp41
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h18
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp38
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp594
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td169
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.cpp25
-rw-r--r--llvm/lib/Target/AMDGPU/R600ISelLowering.h3
-rw-r--r--llvm/lib/Target/AMDGPU/R600InstrInfo.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/R600Instructions.td4
-rw-r--r--llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h69
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp17
-rw-r--r--llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp306
-rw-r--r--llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.cpp113
-rw-r--r--llvm/lib/Target/AMDGPU/SIFrameLowering.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp2214
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h51
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp133
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrFormats.td5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp1419
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h132
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td347
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td239
-rw-r--r--llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp57
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp23
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp141
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp141
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp99
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h133
-rw-r--r--llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp34
-rw-r--r--llvm/lib/Target/AMDGPU/SIModeRegister.cpp16
-rw-r--r--llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp38
-rw-r--r--llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h90
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp57
-rw-r--r--llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIPostRABundler.cpp4
-rw-r--r--llvm/lib/Target/AMDGPU/SIProgramInfo.cpp20
-rw-r--r--llvm/lib/Target/AMDGPU/SIProgramInfo.h20
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp416
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.h49
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td361
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp21
-rw-r--r--llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp60
-rw-r--r--llvm/lib/Target/AMDGPU/SMInstructions.td757
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td57
-rw-r--r--llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h7
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp363
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h198
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp82
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h3
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp123
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h44
-rw-r--r--llvm/lib/Target/AMDGPU/VINTERPInstructions.td1
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td67
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td165
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td71
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3PInstructions.td90
-rw-r--r--llvm/lib/Target/AMDGPU/VOPCInstructions.td76
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td12
-rw-r--r--llvm/lib/Target/ARC/ARCISelLowering.cpp28
-rw-r--r--llvm/lib/Target/ARC/ARCOptAddrMode.cpp7
-rw-r--r--llvm/lib/Target/ARC/ARCRegisterInfo.cpp3
-rw-r--r--llvm/lib/Target/ARC/ARCRegisterInfo.h2
-rw-r--r--llvm/lib/Target/ARM/ARM.td11
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.cpp65
-rw-r--r--llvm/lib/Target/ARM/ARMAsmPrinter.h3
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp73
-rw-r--r--llvm/lib/Target/ARM/ARMBaseInstrInfo.h4
-rw-r--r--llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp14
-rw-r--r--llvm/lib/Target/ARM/ARMBaseRegisterInfo.h3
-rw-r--r--llvm/lib/Target/ARM/ARMBasicBlockInfo.h2
-rw-r--r--llvm/lib/Target/ARM/ARMBranchTargets.cpp20
-rw-r--r--llvm/lib/Target/ARM/ARMCallLowering.cpp10
-rw-r--r--llvm/lib/Target/ARM/ARMCallingConv.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMConstantIslandPass.cpp73
-rw-r--r--llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp295
-rw-r--r--llvm/lib/Target/ARM/ARMFastISel.cpp6
-rw-r--r--llvm/lib/Target/ARM/ARMFrameLowering.cpp45
-rw-r--r--llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp53
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp689
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.h32
-rw-r--r--llvm/lib/Target/ARM/ARMInstrInfo.td56
-rw-r--r--llvm/lib/Target/ARM/ARMInstrMVE.td33
-rw-r--r--llvm/lib/Target/ARM/ARMInstrNEON.td21
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb.td40
-rw-r--r--llvm/lib/Target/ARM/ARMInstrThumb2.td116
-rw-r--r--llvm/lib/Target/ARM/ARMInstrVFP.td2
-rw-r--r--llvm/lib/Target/ARM/ARMInstructionSelector.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMLegalizerInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp322
-rw-r--r--llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp9
-rw-r--r--llvm/lib/Target/ARM/ARMMCInstLower.cpp16
-rw-r--r--llvm/lib/Target/ARM/ARMMachineFunctionInfo.h10
-rw-r--r--llvm/lib/Target/ARM/ARMParallelDSP.cpp15
-rw-r--r--llvm/lib/Target/ARM/ARMPredicates.td10
-rw-r--r--llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp3
-rw-r--r--llvm/lib/Target/ARM/ARMSLSHardening.cpp3
-rw-r--r--llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp5
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.cpp17
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h18
-rw-r--r--llvm/lib/Target/ARM/ARMTargetMachine.cpp11
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp53
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h22
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp1252
-rw-r--r--llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp18
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h56
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp61
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h2
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h5
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h28
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp21
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp13
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h6
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp12
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp84
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp20
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h29
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp15
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp11
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp3
-rw-r--r--llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp4
-rw-r--r--llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp52
-rw-r--r--llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp3
-rw-r--r--llvm/lib/Target/ARM/MVETailPredication.cpp163
-rw-r--r--llvm/lib/Target/ARM/Thumb1FrameLowering.cpp5
-rw-r--r--llvm/lib/Target/ARM/Thumb1InstrInfo.cpp11
-rw-r--r--llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp5
-rw-r--r--llvm/lib/Target/ARM/Thumb2InstrInfo.cpp8
-rw-r--r--llvm/lib/Target/ARM/ThumbRegisterInfo.cpp10
-rw-r--r--llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp3
-rw-r--r--llvm/lib/Target/ARM/Utils/ARMBaseInfo.h2
-rw-r--r--llvm/lib/Target/AVR/AVRAsmPrinter.cpp78
-rw-r--r--llvm/lib/Target/AVR/AVRDevices.td22
-rw-r--r--llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp322
-rw-r--r--llvm/lib/Target/AVR/AVRFrameLowering.cpp10
-rw-r--r--llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp12
-rw-r--r--llvm/lib/Target/AVR/AVRISelLowering.cpp129
-rw-r--r--llvm/lib/Target/AVR/AVRISelLowering.h9
-rw-r--r--llvm/lib/Target/AVR/AVRInstrFormats.td4
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.cpp23
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.h7
-rw-r--r--llvm/lib/Target/AVR/AVRInstrInfo.td59
-rw-r--r--llvm/lib/Target/AVR/AVRShiftExpand.cpp24
-rw-r--r--llvm/lib/Target/AVR/AVRSubtarget.cpp2
-rw-r--r--llvm/lib/Target/AVR/AVRSubtarget.h6
-rw-r--r--llvm/lib/Target/AVR/AVRTargetMachine.cpp1
-rw-r--r--llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp30
-rw-r--r--llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp43
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp6
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h2
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp3
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp25
-rw-r--r--llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h5
-rw-r--r--llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp17
-rw-r--r--llvm/lib/Target/BPF/BPF.h8
-rw-r--r--llvm/lib/Target/BPF/BPF.td12
-rw-r--r--llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp27
-rw-r--r--llvm/lib/Target/BPF/BPFAdjustOpt.cpp17
-rw-r--r--llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp201
-rw-r--r--llvm/lib/Target/BPF/BPFIRPeephole.cpp15
-rw-r--r--llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp3
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.cpp52
-rw-r--r--llvm/lib/Target/BPF/BPFISelLowering.h4
-rw-r--r--llvm/lib/Target/BPF/BPFInstrInfo.td4
-rw-r--r--llvm/lib/Target/BPF/BPFMIChecking.cpp4
-rw-r--r--llvm/lib/Target/BPF/BPFPreserveDIType.cpp20
-rw-r--r--llvm/lib/Target/BPF/BPFRegisterInfo.cpp23
-rw-r--r--llvm/lib/Target/BPF/BPFSubtarget.cpp2
-rw-r--r--llvm/lib/Target/BPF/BPFSubtarget.h1
-rw-r--r--llvm/lib/Target/BPF/BPFTargetMachine.cpp14
-rw-r--r--llvm/lib/Target/BPF/BPFTargetTransformInfo.h4
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.cpp50
-rw-r--r--llvm/lib/Target/BPF/BTFDebug.h5
-rw-r--r--llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp4
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h2
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp18
-rw-r--r--llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp354
-rw-r--r--llvm/lib/Target/CSKY/CSKY.td8
-rw-r--r--llvm/lib/Target/CSKY/CSKYAsmPrinter.h2
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp4
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelLowering.cpp45
-rw-r--r--llvm/lib/Target/CSKY/CSKYISelLowering.h3
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo.td256
-rw-r--r--llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td54
-rw-r--r--llvm/lib/Target/CSKY/CSKYSubtarget.h2
-rw-r--r--llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp6
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp4
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp6
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp2
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp2
-rw-r--r--llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp2
-rw-r--r--llvm/lib/Target/DirectX/DXContainerGlobals.cpp1
-rw-r--r--llvm/lib/Target/DirectX/DXILMetadata.cpp2
-rw-r--r--llvm/lib/Target/DirectX/DXILPrepare.cpp4
-rw-r--r--llvm/lib/Target/DirectX/DXILResource.cpp2
-rw-r--r--llvm/lib/Target/DirectX/DXILResourceAnalysis.h5
-rw-r--r--llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp2
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp28
-rw-r--r--llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h5
-rw-r--r--llvm/lib/Target/DirectX/DirectXIRPasses/PointerTypeAnalysis.cpp18
-rw-r--r--llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/Hexagon.td4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBitTracker.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp15
-rw-r--r--llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp9
-rw-r--r--llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp49
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenExtract.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonGenMux.cpp10
-rw-r--r--llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp9
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp80
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.cpp82
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLowering.h14
-rw-r--r--llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp41
-rw-r--r--llvm/lib/Target/Hexagon/HexagonInstrInfo.h2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonIntrinsics.td14
-rw-r--r--llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp8
-rw-r--r--llvm/lib/Target/Hexagon/HexagonPatterns.td89
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp20
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp5
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRegisterInfo.h2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonRegisterInfo.td10
-rw-r--r--llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp1
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetMachine.h2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h6
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp2
-rw-r--r--llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp887
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp22
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp15
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h6
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h1
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp18
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp3
-rw-r--r--llvm/lib/Target/Hexagon/RDFCopy.cpp90
-rw-r--r--llvm/lib/Target/Hexagon/RDFCopy.h9
-rw-r--r--llvm/lib/Target/Hexagon/RDFDeadCode.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/RDFDeadCode.h2
-rw-r--r--llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp4
-rw-r--r--llvm/lib/Target/Lanai/LanaiISelLowering.cpp70
-rw-r--r--llvm/lib/Target/Lanai/LanaiISelLowering.h6
-rw-r--r--llvm/lib/Target/Lanai/LanaiInstrInfo.td4
-rw-r--r--llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp9
-rw-r--r--llvm/lib/Target/Lanai/LanaiRegisterInfo.h2
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp10
-rw-r--r--llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp184
-rw-r--r--llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp27
-rw-r--r--llvm/lib/Target/LoongArch/LoongArch.td44
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp79
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h5
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp230
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td170
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td202
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td175
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp9
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchFrameLowering.h2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp19
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp783
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.h39
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrFormats.td156
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp26
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.h2
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchInstrInfo.td1009
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td459
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td1032
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLBTInstrFormats.td256
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLBTInstrInfo.td241
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td486
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td1007
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLVZInstrInfo.td33
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp24
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td54
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp32
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchSubtarget.h13
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp42
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetMachine.h1
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp22
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h47
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h9
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp66
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h11
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp8
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp87
-rw-r--r--llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp228
-rw-r--r--llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp23
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp4
-rw-r--r--llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68k.h10
-rw-r--r--llvm/lib/Target/M68k/M68k.td11
-rw-r--r--llvm/lib/Target/M68k/M68kAsmPrinter.cpp84
-rw-r--r--llvm/lib/Target/M68k/M68kAsmPrinter.h12
-rw-r--r--llvm/lib/Target/M68k/M68kCallingConv.td9
-rw-r--r--llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp9
-rw-r--r--llvm/lib/Target/M68k/M68kExpandPseudo.cpp21
-rw-r--r--llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp93
-rw-r--r--llvm/lib/Target/M68k/M68kISelLowering.cpp223
-rw-r--r--llvm/lib/Target/M68k/M68kISelLowering.h36
-rw-r--r--llvm/lib/Target/M68k/M68kInstrArithmetic.td165
-rw-r--r--llvm/lib/Target/M68k/M68kInstrControl.td70
-rw-r--r--llvm/lib/Target/M68k/M68kInstrData.td58
-rw-r--r--llvm/lib/Target/M68k/M68kInstrFormats.td272
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.cpp34
-rw-r--r--llvm/lib/Target/M68k/M68kInstrInfo.td30
-rw-r--r--llvm/lib/Target/M68k/M68kInstrShiftRotate.td6
-rw-r--r--llvm/lib/Target/M68k/M68kMCInstLower.cpp15
-rw-r--r--llvm/lib/Target/M68k/M68kMachineFunction.h2
-rw-r--r--llvm/lib/Target/M68k/M68kRegisterInfo.cpp12
-rw-r--r--llvm/lib/Target/M68k/M68kRegisterInfo.td20
-rw-r--r--llvm/lib/Target/M68k/M68kSubtarget.cpp2
-rw-r--r--llvm/lib/Target/M68k/M68kSubtarget.h10
-rw-r--r--llvm/lib/Target/M68k/M68kTargetMachine.cpp4
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h57
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp51
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp57
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h15
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp7
-rw-r--r--llvm/lib/Target/M68k/MCTargetDesc/M68kMemOperandPrinter.h80
-rw-r--r--llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp58
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp4
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h2
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp12
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h2
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp7
-rw-r--r--llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp24
-rw-r--r--llvm/lib/Target/MSP430/MSP430FrameLowering.cpp183
-rw-r--r--llvm/lib/Target/MSP430/MSP430FrameLowering.h22
-rw-r--r--llvm/lib/Target/MSP430/MSP430ISelLowering.cpp61
-rw-r--r--llvm/lib/Target/MSP430/MSP430ISelLowering.h11
-rw-r--r--llvm/lib/Target/MSP430/MSP430InstrInfo.h2
-rw-r--r--llvm/lib/Target/MSP430/MSP430InstrInfo.td8
-rw-r--r--llvm/lib/Target/MSP430/MSP430RegisterInfo.h2
-rw-r--r--llvm/lib/Target/MSP430/MSP430RegisterInfo.td64
-rw-r--r--llvm/lib/Target/MSP430/MSP430Subtarget.cpp3
-rw-r--r--llvm/lib/Target/MSP430/MSP430Subtarget.h7
-rw-r--r--llvm/lib/Target/MSP430/MSP430TargetMachine.cpp1
-rw-r--r--llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp329
-rw-r--r--llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp20
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp4
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp2
-rw-r--r--llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp4
-rw-r--r--llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td1
-rw-r--r--llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp8
-rw-r--r--llvm/lib/Target/Mips/Mips16InstrInfo.cpp10
-rw-r--r--llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsAsmPrinter.cpp12
-rw-r--r--llvm/lib/Target/Mips/MipsCallLowering.cpp10
-rw-r--r--llvm/lib/Target/Mips/MipsCombine.td6
-rw-r--r--llvm/lib/Target/Mips/MipsDSPInstrInfo.td2
-rw-r--r--llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp3
-rw-r--r--llvm/lib/Target/Mips/MipsFastISel.cpp6
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.cpp112
-rw-r--r--llvm/lib/Target/Mips/MipsISelLowering.h4
-rw-r--r--llvm/lib/Target/Mips/MipsInstructionSelector.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsLegalizerInfo.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsOptimizePICCall.cpp2
-rw-r--r--llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp71
-rw-r--r--llvm/lib/Target/Mips/MipsRegisterInfo.cpp5
-rw-r--r--llvm/lib/Target/Mips/MipsRegisterInfo.h4
-rw-r--r--llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp14
-rw-r--r--llvm/lib/Target/Mips/MipsSEISelLowering.cpp17
-rw-r--r--llvm/lib/Target/Mips/MipsSEISelLowering.h2
-rw-r--r--llvm/lib/Target/Mips/MipsSEInstrInfo.cpp34
-rw-r--r--llvm/lib/Target/Mips/MipsSubtarget.cpp3
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp6
-rw-r--r--llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTX.h10
-rw-r--r--llvm/lib/Target/NVPTX/NVPTX.td100
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp98
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h101
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp154
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp117
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h30
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp52
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp418
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp283
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.h7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp5
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td789
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td586
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp86
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp126
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp5
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXMCExpr.h6
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp12
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td8
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXSubtarget.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp77
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetMachine.h2
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp8
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h6
-rw-r--r--llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp58
-rw-r--r--llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp171
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp89
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp40
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp57
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h3
-rw-r--r--llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp15
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h3
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp151
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h48
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp11
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h85
-rw-r--r--llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp8
-rw-r--r--llvm/lib/Target/PowerPC/P10InstrResources.td675
-rw-r--r--llvm/lib/Target/PowerPC/P9InstrResources.td13
-rw-r--r--llvm/lib/Target/PowerPC/PPC.h5
-rw-r--r--llvm/lib/Target/PowerPC/PPC.td6
-rw-r--r--llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp222
-rw-r--r--llvm/lib/Target/PowerPC/PPCBack2BackFusion.def8
-rw-r--r--llvm/lib/Target/PowerPC/PPCCallingConv.cpp38
-rw-r--r--llvm/lib/Target/PowerPC/PPCCallingConv.h3
-rw-r--r--llvm/lib/Target/PowerPC/PPCCallingConv.td44
-rw-r--r--llvm/lib/Target/PowerPC/PPCFastISel.cpp8
-rw-r--r--llvm/lib/Target/PowerPC/PPCFrameLowering.cpp70
-rw-r--r--llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def34
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp306
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp819
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h75
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td1208
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrAltivec.td943
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrDFP.td193
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFormats.td489
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrFuture.td32
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrHTM.td28
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.h7
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td1755
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrMMA.td10
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrP10.td1119
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrSPE.td112
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrVSX.td570
-rw-r--r--llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp6
-rw-r--r--llvm/lib/Target/PowerPC/PPCMCInstLower.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCMIPeephole.cpp52
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.h7
-rw-r--r--llvm/lib/Target/PowerPC/PPCRegisterInfo.td109
-rw-r--r--llvm/lib/Target/PowerPC/PPCScheduleP10.td6
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.cpp4
-rw-r--r--llvm/lib/Target/PowerPC/PPCSubtarget.h10
-rw-r--r--llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp29
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetMachine.cpp20
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp16
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h4
-rw-r--r--llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp5
-rw-r--r--llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp2
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp1907
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp352
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp290
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h7
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp12
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h2
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp5
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h4
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td2
-rw-r--r--llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp202
-rw-r--r--llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.h22
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp122
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h10
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp116
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h245
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp11
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp167
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h72
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h24
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp90
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h10
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp4
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h2
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp157
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp4
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h6
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp15
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h4
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp94
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h4
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp101
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h2
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp36
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h20
-rw-r--r--llvm/lib/Target/RISCV/RISCV.h27
-rw-r--r--llvm/lib/Target/RISCV/RISCV.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp427
-rw-r--r--llvm/lib/Target/RISCV/RISCVCallingConv.td39
-rw-r--r--llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp27
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp165
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td422
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp301
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp149
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp1275
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h69
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp5563
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h250
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp135
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp373
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormats.td113
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormatsC.td255
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormatsV.td30
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp630
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h48
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td370
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoA.td123
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoC.td188
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoD.td569
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoF.td641
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoM.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoV.td1020
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td3965
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td1306
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td3084
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td205
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td530
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td737
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td78
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td196
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZc.td293
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td270
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td63
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td706
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td16
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td43
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZk.td41
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td31
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td357
-rw-r--r--llvm/lib/Target/RISCV/RISCVMCInstLower.cpp257
-rw-r--r--llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h22
-rw-r--r--llvm/lib/Target/RISCV/RISCVMacroFusion.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVMacroFusion.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp16
-rw-r--r--llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp11
-rw-r--r--llvm/lib/Target/RISCV/RISCVMoveMerger.cpp238
-rw-r--r--llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp (renamed from llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp)346
-rw-r--r--llvm/lib/Target/RISCV/RISCVProcessors.td52
-rw-r--r--llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp145
-rw-r--r--llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp274
-rw-r--r--llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp6
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp12
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.h4
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td241
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedRocket.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSiFive7.td966
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedule.td36
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleV.td436
-rw-r--r--llvm/lib/Target/RISCV/RISCVScheduleZb.td10
-rw-r--r--llvm/lib/Target/RISCV/RISCVStripWSuffix.cpp87
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.cpp20
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h47
-rw-r--r--llvm/lib/Target/RISCV/RISCVSystemOperands.td133
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp70
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.h4
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetObjectFile.cpp10
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetObjectFile.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp450
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h80
-rw-r--r--llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp2
-rw-r--r--llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h2
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.h2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp621
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.h2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVBuiltins.td166
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp28
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp22
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp4
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstrInfo.td2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp38
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp43
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h3
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp70
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp200
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp6
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp2
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp1
-rw-r--r--llvm/lib/Target/SPIRV/SPIRVUtils.cpp54
-rw-r--r--llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp14
-rw-r--r--llvm/lib/Target/Sparc/DelaySlotFiller.cpp3
-rw-r--r--llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp2
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp19
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h3
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp8
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp15
-rw-r--r--llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h2
-rw-r--r--llvm/lib/Target/Sparc/Sparc.td2
-rw-r--r--llvm/lib/Target/Sparc/SparcISelLowering.cpp119
-rw-r--r--llvm/lib/Target/Sparc/SparcISelLowering.h3
-rw-r--r--llvm/lib/Target/Sparc/SparcInstr64Bit.td33
-rw-r--r--llvm/lib/Target/Sparc/SparcInstrAliases.td30
-rw-r--r--llvm/lib/Target/Sparc/SparcInstrFormats.td5
-rw-r--r--llvm/lib/Target/Sparc/SparcInstrInfo.cpp158
-rw-r--r--llvm/lib/Target/Sparc/SparcInstrInfo.h9
-rw-r--r--llvm/lib/Target/Sparc/SparcInstrInfo.td18
-rw-r--r--llvm/lib/Target/Sparc/SparcSubtarget.cpp22
-rw-r--r--llvm/lib/Target/Sparc/SparcSubtarget.h50
-rw-r--r--llvm/lib/Target/Sparc/SparcTargetMachine.cpp8
-rw-r--r--llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp27
-rw-r--r--llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp195
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp17
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h1
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp58
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp175
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.cpp49
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.h66
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h38
-rw-r--r--llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp108
-rw-r--r--llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp160
-rw-r--r--llvm/lib/Target/SystemZ/SystemZAsmPrinter.h46
-rw-r--r--llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp46
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp326
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.h24
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrFP.td2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrFormats.td672
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp7
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.h11
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrInfo.td66
-rw-r--r--llvm/lib/Target/SystemZ/SystemZInstrVector.td2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h20
-rw-r--r--llvm/lib/Target/SystemZ/SystemZMachineScheduler.h6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZOperands.td39
-rw-r--r--llvm/lib/Target/SystemZ/SystemZOperators.td10
-rw-r--r--llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp15
-rw-r--r--llvm/lib/Target/SystemZ/SystemZRegisterInfo.h6
-rw-r--r--llvm/lib/Target/SystemZ/SystemZRegisterInfo.td16
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.cpp35
-rw-r--r--llvm/lib/Target/SystemZ/SystemZSubtarget.h4
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/Target.cpp5
-rw-r--r--llvm/lib/Target/TargetMachine.cpp19
-rw-r--r--llvm/lib/Target/TargetMachineC.cpp4
-rw-r--r--llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp6
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp7
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h2
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp7
-rw-r--r--llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.h4
-rw-r--r--llvm/lib/Target/VE/VE.h4
-rw-r--r--llvm/lib/Target/VE/VE.td4
-rw-r--r--llvm/lib/Target/VE/VEISelLowering.cpp18
-rw-r--r--llvm/lib/Target/VE/VEISelLowering.h2
-rw-r--r--llvm/lib/Target/VE/VEInstrFormats.td4
-rw-r--r--llvm/lib/Target/VE/VEInstrInfo.td126
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp105
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp151
-rw-r--r--llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h10
-rw-r--r--llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp3
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h2
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp24
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp22
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h55
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp124
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.h73
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h2
-rw-r--r--llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/WebAssembly/README.txt2
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp116
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h93
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp24
-rw-r--r--llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h6
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp49
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h1
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp1
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp10
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp399
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h32
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp1
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp39
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp18
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp202
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h6
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td48
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp10
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp5
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h4
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp29
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp155
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp65
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp287
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp17
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp244
-rw-r--r--llvm/lib/Target/X86/AsmParser/X86Operand.h34
-rw-r--r--llvm/lib/Target/X86/MCA/X86CustomBehaviour.h2
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp24
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp99
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h11
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp38
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp479
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h30
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimizationForImmediate.def72
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp165
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.h54
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp24
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp5
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp1173
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h2
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp63
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h17
-rw-r--r--llvm/lib/Target/X86/X86.h15
-rw-r--r--llvm/lib/Target/X86/X86.td289
-rw-r--r--llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp198
-rw-r--r--llvm/lib/Target/X86/X86AsmPrinter.cpp8
-rw-r--r--llvm/lib/Target/X86/X86CallFrameOptimization.cpp35
-rw-r--r--llvm/lib/Target/X86/X86CallLowering.cpp6
-rw-r--r--llvm/lib/Target/X86/X86CallingConv.td35
-rw-r--r--llvm/lib/Target/X86/X86CmovConversion.cpp23
-rw-r--r--llvm/lib/Target/X86/X86DynAllocaExpander.cpp13
-rw-r--r--llvm/lib/Target/X86/X86ExpandPseudo.cpp10
-rw-r--r--llvm/lib/Target/X86/X86FastISel.cpp63
-rw-r--r--llvm/lib/Target/X86/X86FixupBWInsts.cpp16
-rw-r--r--llvm/lib/Target/X86/X86FixupInstTuning.cpp517
-rw-r--r--llvm/lib/Target/X86/X86FixupLEAs.cpp48
-rw-r--r--llvm/lib/Target/X86/X86FixupVectorConstants.cpp398
-rw-r--r--llvm/lib/Target/X86/X86FloatingPoint.cpp18
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.cpp344
-rw-r--r--llvm/lib/Target/X86/X86FrameLowering.h2
-rw-r--r--llvm/lib/Target/X86/X86ISelDAGToDAG.cpp312
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp6233
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h55
-rw-r--r--llvm/lib/Target/X86/X86InsertPrefetch.cpp5
-rw-r--r--llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp1078
-rw-r--r--llvm/lib/Target/X86/X86InstrAMX.td42
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td635
-rw-r--r--llvm/lib/Target/X86/X86InstrArithmetic.td1455
-rw-r--r--llvm/lib/Target/X86/X86InstrAsmAlias.td688
-rw-r--r--llvm/lib/Target/X86/X86InstrCMovSetCC.td35
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td113
-rw-r--r--llvm/lib/Target/X86/X86InstrControl.td31
-rw-r--r--llvm/lib/Target/X86/X86InstrExtension.td8
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA.td38
-rw-r--r--llvm/lib/Target/X86/X86InstrFPStack.td30
-rw-r--r--llvm/lib/Target/X86/X86InstrFoldTables.cpp5922
-rw-r--r--llvm/lib/Target/X86/X86InstrFoldTables.h56
-rw-r--r--llvm/lib/Target/X86/X86InstrFormats.td49
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp467
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.h34
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td2637
-rw-r--r--llvm/lib/Target/X86/X86InstrKL.td33
-rw-r--r--llvm/lib/Target/X86/X86InstrMMX.td10
-rw-r--r--llvm/lib/Target/X86/X86InstrMisc.td1670
-rw-r--r--llvm/lib/Target/X86/X86InstrSNP.td24
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td917
-rw-r--r--llvm/lib/Target/X86/X86InstrSVM.td10
-rw-r--r--llvm/lib/Target/X86/X86InstrShiftRotate.td164
-rw-r--r--llvm/lib/Target/X86/X86InstrSystem.td47
-rw-r--r--llvm/lib/Target/X86/X86InstrTBM.td194
-rw-r--r--llvm/lib/Target/X86/X86InstrVMX.td24
-rw-r--r--llvm/lib/Target/X86/X86InstrXOP.td16
-rw-r--r--llvm/lib/Target/X86/X86InstructionSelector.cpp231
-rw-r--r--llvm/lib/Target/X86/X86InterleavedAccess.cpp10
-rw-r--r--llvm/lib/Target/X86/X86KCFI.cpp150
-rw-r--r--llvm/lib/Target/X86/X86LegalizerInfo.cpp962
-rw-r--r--llvm/lib/Target/X86/X86LegalizerInfo.h13
-rw-r--r--llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp8
-rw-r--r--llvm/lib/Target/X86/X86LowerAMXType.cpp38
-rw-r--r--llvm/lib/Target/X86/X86MCInstLower.cpp639
-rw-r--r--llvm/lib/Target/X86/X86MachineFunctionInfo.h16
-rw-r--r--llvm/lib/Target/X86/X86OptimizeLEAs.cpp3
-rw-r--r--llvm/lib/Target/X86/X86PartialReduction.cpp4
-rw-r--r--llvm/lib/Target/X86/X86PfmCounters.td15
-rw-r--r--llvm/lib/Target/X86/X86PreAMXConfig.cpp1
-rw-r--r--llvm/lib/Target/X86/X86PreTileConfig.cpp12
-rw-r--r--llvm/lib/Target/X86/X86RegisterBankInfo.cpp3
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.cpp60
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.h6
-rw-r--r--llvm/lib/Target/X86/X86RegisterInfo.td24
-rw-r--r--llvm/lib/Target/X86/X86ReturnThunks.cpp2
-rw-r--r--llvm/lib/Target/X86/X86SchedAlderlakeP.td7
-rw-r--r--llvm/lib/Target/X86/X86SchedIceLake.td155
-rw-r--r--llvm/lib/Target/X86/X86SchedSapphireRapids.td5202
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeClient.td16
-rw-r--r--llvm/lib/Target/X86/X86SchedSkylakeServer.td16
-rw-r--r--llvm/lib/Target/X86/X86ScheduleAtom.td2
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver1.td8
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver2.td3
-rw-r--r--llvm/lib/Target/X86/X86ScheduleZnver4.td1957
-rw-r--r--llvm/lib/Target/X86/X86SelectionDAGInfo.cpp55
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp2
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h13
-rw-r--r--llvm/lib/Target/X86/X86TargetMachine.cpp10
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp561
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h26
-rw-r--r--llvm/lib/Target/X86/X86WinEHState.cpp9
-rw-r--r--llvm/lib/Target/XCore/XCoreISelLowering.cpp54
-rw-r--r--llvm/lib/Target/XCore/XCoreRegisterInfo.cpp9
-rw-r--r--llvm/lib/Target/XCore/XCoreRegisterInfo.h2
-rw-r--r--llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp15
-rw-r--r--llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp2
-rw-r--r--llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCAsmInfo.cpp2
-rw-r--r--llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp7
-rw-r--r--llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp2
-rw-r--r--llvm/lib/TargetParser/AArch64TargetParser.cpp57
-rw-r--r--llvm/lib/TargetParser/ARMTargetParser.cpp29
-rw-r--r--llvm/lib/TargetParser/Host.cpp78
-rw-r--r--llvm/lib/TargetParser/LoongArchTargetParser.cpp11
-rw-r--r--llvm/lib/TargetParser/RISCVTargetParser.cpp83
-rw-r--r--llvm/lib/TargetParser/SubtargetFeature.cpp (renamed from llvm/lib/MC/SubtargetFeature.cpp)4
-rw-r--r--llvm/lib/TargetParser/TargetParser.cpp284
-rw-r--r--llvm/lib/TargetParser/Triple.cpp45
-rw-r--r--llvm/lib/TargetParser/X86TargetParser.cpp276
-rw-r--r--llvm/lib/TextAPI/Architecture.cpp4
-rw-r--r--llvm/lib/TextAPI/InterfaceFile.cpp74
-rw-r--r--llvm/lib/TextAPI/PackedVersion.cpp7
-rw-r--r--llvm/lib/TextAPI/Platform.cpp10
-rw-r--r--llvm/lib/TextAPI/Symbol.cpp18
-rw-r--r--llvm/lib/TextAPI/SymbolSet.cpp36
-rw-r--r--llvm/lib/TextAPI/Target.cpp17
-rw-r--r--llvm/lib/TextAPI/TextStub.cpp113
-rw-r--r--llvm/lib/TextAPI/TextStubCommon.cpp18
-rw-r--r--llvm/lib/TextAPI/TextStubCommon.h25
-rw-r--r--llvm/lib/TextAPI/TextStubV5.cpp1021
-rw-r--r--llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp13
-rw-r--r--llvm/lib/ToolDrivers/llvm-dlltool/Options.td6
-rw-r--r--llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp154
-rw-r--r--llvm/lib/ToolDrivers/llvm-lib/Options.td1
-rw-r--r--llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp362
-rw-r--r--llvm/lib/Transforms/CFGuard/CFGuard.cpp2
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroCleanup.cpp10
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp4
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroElide.cpp44
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroFrame.cpp608
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroInternal.h15
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroSplit.cpp188
-rw-r--r--llvm/lib/Transforms/Coroutines/Coroutines.cpp26
-rw-r--r--llvm/lib/Transforms/IPO/AlwaysInliner.cpp136
-rw-r--r--llvm/lib/Transforms/IPO/Annotation2Metadata.cpp34
-rw-r--r--llvm/lib/Transforms/IPO/ArgumentPromotion.cpp72
-rw-r--r--llvm/lib/Transforms/IPO/Attributor.cpp1093
-rw-r--r--llvm/lib/Transforms/IPO/AttributorAttributes.cpp3316
-rw-r--r--llvm/lib/Transforms/IPO/BlockExtractor.cpp2
-rw-r--r--llvm/lib/Transforms/IPO/CalledValuePropagation.cpp32
-rw-r--r--llvm/lib/Transforms/IPO/ConstantMerge.cpp31
-rw-r--r--llvm/lib/Transforms/IPO/CrossDSOCFI.cpp20
-rw-r--r--llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp40
-rw-r--r--llvm/lib/Transforms/IPO/ElimAvailExtern.cpp110
-rw-r--r--llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp52
-rw-r--r--llvm/lib/Transforms/IPO/ExtractGV.cpp26
-rw-r--r--llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp31
-rw-r--r--llvm/lib/Transforms/IPO/FunctionAttrs.cpp308
-rw-r--r--llvm/lib/Transforms/IPO/FunctionImport.cpp362
-rw-r--r--llvm/lib/Transforms/IPO/FunctionSpecialization.cpp707
-rw-r--r--llvm/lib/Transforms/IPO/GlobalDCE.cpp110
-rw-r--r--llvm/lib/Transforms/IPO/GlobalOpt.cpp368
-rw-r--r--llvm/lib/Transforms/IPO/GlobalSplit.cpp35
-rw-r--r--llvm/lib/Transforms/IPO/HotColdSplitting.cpp57
-rw-r--r--llvm/lib/Transforms/IPO/IPO.cpp97
-rw-r--r--llvm/lib/Transforms/IPO/IROutliner.cpp75
-rw-r--r--llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp36
-rw-r--r--llvm/lib/Transforms/IPO/InlineSimple.cpp118
-rw-r--r--llvm/lib/Transforms/IPO/Inliner.cpp558
-rw-r--r--llvm/lib/Transforms/IPO/Internalize.cpp63
-rw-r--r--llvm/lib/Transforms/IPO/LoopExtractor.cpp4
-rw-r--r--llvm/lib/Transforms/IPO/LowerTypeTests.cpp197
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp3277
-rw-r--r--llvm/lib/Transforms/IPO/MergeFunctions.cpp28
-rw-r--r--llvm/lib/Transforms/IPO/ModuleInliner.cpp9
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp699
-rw-r--r--llvm/lib/Transforms/IPO/PartialInlining.cpp76
-rw-r--r--llvm/lib/Transforms/IPO/PassManagerBuilder.cpp517
-rw-r--r--llvm/lib/Transforms/IPO/SCCP.cpp132
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfile.cpp569
-rw-r--r--llvm/lib/Transforms/IPO/SampleProfileProbe.cpp110
-rw-r--r--llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp29
-rw-r--r--llvm/lib/Transforms/IPO/StripSymbols.cpp209
-rw-r--r--llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp55
-rw-r--r--llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp162
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp102
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp487
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp18
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp596
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp314
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp1160
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h49
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp139
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp244
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp12
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp448
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp67
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp192
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp258
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp1311
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp323
-rw-r--r--llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp368
-rw-r--r--llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp10
-rw-r--r--llvm/lib/Transforms/Instrumentation/CGProfile.cpp23
-rw-r--r--llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp22
-rw-r--r--llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp31
-rw-r--r--llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp20
-rw-r--r--llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp319
-rw-r--r--llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp65
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp3
-rw-r--r--llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp106
-rw-r--r--llvm/lib/Transforms/Instrumentation/Instrumentation.cpp5
-rw-r--r--llvm/lib/Transforms/Instrumentation/KCFI.cpp20
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemProfiler.cpp315
-rw-r--r--llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp335
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp813
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp187
-rw-r--r--llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp36
-rw-r--r--llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp6
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARC.h2
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp2
-rw-r--r--llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp10
-rw-r--r--llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp71
-rw-r--r--llvm/lib/Transforms/Scalar/ADCE.cpp94
-rw-r--r--llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp56
-rw-r--r--llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp8
-rw-r--r--llvm/lib/Transforms/Scalar/BDCE.cpp36
-rw-r--r--llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp39
-rw-r--r--llvm/lib/Transforms/Scalar/ConstantHoisting.cpp130
-rw-r--r--llvm/lib/Transforms/Scalar/ConstraintElimination.cpp702
-rw-r--r--llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp105
-rw-r--r--llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp50
-rw-r--r--llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp169
-rw-r--r--llvm/lib/Transforms/Scalar/DivRemPairs.cpp45
-rw-r--r--llvm/lib/Transforms/Scalar/EarlyCSE.cpp74
-rw-r--r--llvm/lib/Transforms/Scalar/Float2Int.cpp47
-rw-r--r--llvm/lib/Transforms/Scalar/GVN.cpp296
-rw-r--r--llvm/lib/Transforms/Scalar/GVNHoist.cpp57
-rw-r--r--llvm/lib/Transforms/Scalar/GVNSink.cpp41
-rw-r--r--llvm/lib/Transforms/Scalar/GuardWidening.cpp222
-rw-r--r--llvm/lib/Transforms/Scalar/IndVarSimplify.cpp261
-rw-r--r--llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp503
-rw-r--r--llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp157
-rw-r--r--llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp15
-rw-r--r--llvm/lib/Transforms/Scalar/JumpThreading.cpp404
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp517
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDeletion.cpp71
-rw-r--r--llvm/lib/Transforms/Scalar/LoopDistribute.cpp57
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFlatten.cpp97
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFuse.cpp64
-rw-r--r--llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp159
-rw-r--r--llvm/lib/Transforms/Scalar/LoopInterchange.cpp60
-rw-r--r--llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp94
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPassManager.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/LoopPredication.cpp39
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRerollPass.cpp45
-rw-r--r--llvm/lib/Transforms/Scalar/LoopRotation.cpp15
-rw-r--r--llvm/lib/Transforms/Scalar/LoopSink.cpp40
-rw-r--r--llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp390
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp74
-rw-r--r--llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp41
-rw-r--r--llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp65
-rw-r--r--llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp3
-rw-r--r--llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp295
-rw-r--r--llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp471
-rw-r--r--llvm/lib/Transforms/Scalar/MergeICmps.cpp13
-rw-r--r--llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp23
-rw-r--r--llvm/lib/Transforms/Scalar/NaryReassociate.cpp14
-rw-r--r--llvm/lib/Transforms/Scalar/NewGVN.cpp94
-rw-r--r--llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp476
-rw-r--r--llvm/lib/Transforms/Scalar/Reassociate.cpp160
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp245
-rw-r--r--llvm/lib/Transforms/Scalar/SCCP.cpp52
-rw-r--r--llvm/lib/Transforms/Scalar/SROA.cpp627
-rw-r--r--llvm/lib/Transforms/Scalar/Scalar.cpp232
-rw-r--r--llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp28
-rw-r--r--llvm/lib/Transforms/Scalar/Scalarizer.cpp774
-rw-r--r--llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp129
-rw-r--r--llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp695
-rw-r--r--llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp28
-rw-r--r--llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp50
-rw-r--r--llvm/lib/Transforms/Scalar/StructurizeCFG.cpp41
-rw-r--r--llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp45
-rw-r--r--llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp322
-rw-r--r--llvm/lib/Transforms/Utils/AddDiscriminators.cpp31
-rw-r--r--llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp64
-rw-r--r--llvm/lib/Transforms/Utils/BasicBlockUtils.cpp282
-rw-r--r--llvm/lib/Transforms/Utils/BuildLibCalls.cpp88
-rw-r--r--llvm/lib/Transforms/Utils/BypassSlowDivision.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/CallGraphUpdater.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/CallPromotionUtils.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/CloneFunction.cpp17
-rw-r--r--llvm/lib/Transforms/Utils/CodeExtractor.cpp43
-rw-r--r--llvm/lib/Transforms/Utils/CodeLayout.cpp770
-rw-r--r--llvm/lib/Transforms/Utils/CountVisits.cpp25
-rw-r--r--llvm/lib/Transforms/Utils/CtorUtils.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/Debugify.cpp83
-rw-r--r--llvm/lib/Transforms/Utils/DemoteRegToStack.cpp1
-rw-r--r--llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp16
-rw-r--r--llvm/lib/Transforms/Utils/EscapeEnumerator.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/Evaluator.cpp40
-rw-r--r--llvm/lib/Transforms/Utils/FlattenCFG.cpp13
-rw-r--r--llvm/lib/Transforms/Utils/FunctionComparator.cpp63
-rw-r--r--llvm/lib/Transforms/Utils/InjectTLIMappings.cpp65
-rw-r--r--llvm/lib/Transforms/Utils/InlineFunction.cpp123
-rw-r--r--llvm/lib/Transforms/Utils/InstructionNamer.cpp33
-rw-r--r--llvm/lib/Transforms/Utils/LCSSA.cpp46
-rw-r--r--llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp107
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp293
-rw-r--r--llvm/lib/Transforms/Utils/LoopPeel.cpp50
-rw-r--r--llvm/lib/Transforms/Utils/LoopRotationUtils.cpp7
-rw-r--r--llvm/lib/Transforms/Utils/LoopSimplify.cpp20
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnroll.cpp42
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/LoopUtils.cpp78
-rw-r--r--llvm/lib/Transforms/Utils/LoopVersioning.cpp56
-rw-r--r--llvm/lib/Transforms/Utils/LowerAtomic.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp75
-rw-r--r--llvm/lib/Transforms/Utils/Mem2Reg.cpp12
-rw-r--r--llvm/lib/Transforms/Utils/MemoryOpRemark.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/MetaRenamer.cpp85
-rw-r--r--llvm/lib/Transforms/Utils/ModuleUtils.cpp20
-rw-r--r--llvm/lib/Transforms/Utils/MoveAutoInit.cpp231
-rw-r--r--llvm/lib/Transforms/Utils/NameAnonGlobals.cpp2
-rw-r--r--llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp81
-rw-r--r--llvm/lib/Transforms/Utils/SCCPSolver.cpp511
-rw-r--r--llvm/lib/Transforms/Utils/SSAUpdater.cpp28
-rw-r--r--llvm/lib/Transforms/Utils/SampleProfileInference.cpp36
-rw-r--r--llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp442
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp261
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyIndVar.cpp26
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp218
-rw-r--r--llvm/lib/Transforms/Utils/SizeOpts.cpp6
-rw-r--r--llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp39
-rw-r--r--llvm/lib/Transforms/Utils/SymbolRewriter.cpp43
-rw-r--r--llvm/lib/Transforms/Utils/UnifyLoopExits.cpp4
-rw-r--r--llvm/lib/Transforms/Utils/Utils.cpp27
-rw-r--r--llvm/lib/Transforms/Utils/VNCoercion.cpp192
-rw-r--r--llvm/lib/Transforms/Utils/ValueMapper.cpp10
-rw-r--r--llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp2049
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp188
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h64
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp2346
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp4239
-rw-r--r--llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h24
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.cpp116
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlan.h680
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanCFG.h1
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp25
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h5
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp385
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp327
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.h29
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanValue.h18
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp45
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp85
-rw-r--r--llvm/lib/Transforms/Vectorize/Vectorize.cpp19
-rw-r--r--llvm/lib/WindowsDriver/MSVCPaths.cpp35
-rw-r--r--llvm/lib/XRay/InstrumentationMap.cpp3
-rw-r--r--llvm/tools/bugpoint/BugDriver.cpp2
-rw-r--r--llvm/tools/bugpoint/BugDriver.h9
-rw-r--r--llvm/tools/bugpoint/CrashDebugger.cpp59
-rw-r--r--llvm/tools/bugpoint/ExtractFunction.cpp1
-rw-r--r--llvm/tools/bugpoint/OptimizerDriver.cpp2
-rw-r--r--llvm/tools/bugpoint/ToolRunner.cpp2
-rw-r--r--llvm/tools/bugpoint/ToolRunner.h2
-rw-r--r--llvm/tools/bugpoint/bugpoint.cpp50
-rw-r--r--llvm/tools/llc/llc.cpp38
-rw-r--r--llvm/tools/lli/ExecutionUtils.h4
-rw-r--r--llvm/tools/lli/lli.cpp189
-rw-r--r--llvm/tools/llvm-ar/llvm-ar.cpp36
-rw-r--r--llvm/tools/llvm-cov/CodeCoverage.cpp46
-rw-r--r--llvm/tools/llvm-cov/CoverageReport.cpp1
-rw-r--r--llvm/tools/llvm-cov/llvm-cov.cpp2
-rw-r--r--llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp19
-rw-r--r--llvm/tools/llvm-cxxmap/llvm-cxxmap.cpp2
-rw-r--r--llvm/tools/llvm-debuginfo-analyzer/README.txt224
-rw-r--r--llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp48
-rw-r--r--llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp297
-rw-r--r--llvm/tools/llvm-dwarfutil/Error.h2
-rw-r--r--llvm/tools/llvm-dwarfutil/Options.h1
-rw-r--r--llvm/tools/llvm-dwarfutil/Options.td5
-rw-r--r--llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp12
-rw-r--r--llvm/tools/llvm-dwp/Opts.td13
-rw-r--r--llvm/tools/llvm-dwp/llvm-dwp.cpp86
-rw-r--r--llvm/tools/llvm-lto/llvm-lto.cpp7
-rw-r--r--llvm/tools/llvm-lto2/llvm-lto2.cpp35
-rw-r--r--llvm/tools/llvm-mc/Disassembler.cpp2
-rw-r--r--llvm/tools/llvm-mc/llvm-mc.cpp2
-rw-r--r--llvm/tools/llvm-mca/CodeRegion.cpp11
-rw-r--r--llvm/tools/llvm-mca/CodeRegion.h33
-rw-r--r--llvm/tools/llvm-mca/CodeRegionGenerator.cpp45
-rw-r--r--llvm/tools/llvm-mca/CodeRegionGenerator.h81
-rw-r--r--llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp4
-rw-r--r--llvm/tools/llvm-mca/Views/BottleneckAnalysis.h6
-rw-r--r--llvm/tools/llvm-mca/Views/InstructionInfoView.cpp18
-rw-r--r--llvm/tools/llvm-mca/Views/InstructionInfoView.h11
-rw-r--r--llvm/tools/llvm-mca/Views/ResourcePressureView.cpp4
-rw-r--r--llvm/tools/llvm-mca/Views/TimelineView.cpp4
-rw-r--r--llvm/tools/llvm-mca/llvm-mca.cpp30
-rw-r--r--llvm/tools/llvm-nm/llvm-nm.cpp96
-rw-r--r--llvm/tools/llvm-objcopy/ObjcopyOptions.cpp5
-rw-r--r--llvm/tools/llvm-objcopy/ObjcopyOpts.td36
-rw-r--r--llvm/tools/llvm-objcopy/llvm-objcopy.cpp5
-rw-r--r--llvm/tools/llvm-objdump/COFFDump.cpp35
-rw-r--r--llvm/tools/llvm-objdump/ELFDump.cpp130
-rw-r--r--llvm/tools/llvm-objdump/ELFDump.h2
-rw-r--r--llvm/tools/llvm-objdump/MachODump.cpp38
-rw-r--r--llvm/tools/llvm-objdump/ObjdumpOpts.td10
-rw-r--r--llvm/tools/llvm-objdump/SourcePrinter.cpp50
-rw-r--r--llvm/tools/llvm-objdump/SourcePrinter.h4
-rw-r--r--llvm/tools/llvm-objdump/WasmDump.cpp19
-rw-r--r--llvm/tools/llvm-objdump/XCOFFDump.cpp326
-rw-r--r--llvm/tools/llvm-objdump/XCOFFDump.h7
-rw-r--r--llvm/tools/llvm-objdump/llvm-objdump.cpp322
-rw-r--r--llvm/tools/llvm-objdump/llvm-objdump.h100
-rw-r--r--llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp1
-rw-r--r--llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp3
-rw-r--r--llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp1
-rw-r--r--llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp1
-rw-r--r--llvm/tools/llvm-profdata/llvm-profdata.cpp301
-rw-r--r--llvm/tools/llvm-readobj/COFFDumper.cpp98
-rw-r--r--llvm/tools/llvm-readobj/ELFDumper.cpp896
-rw-r--r--llvm/tools/llvm-readobj/ObjDumper.h1
-rw-r--r--llvm/tools/llvm-readobj/Opts.td1
-rw-r--r--llvm/tools/llvm-readobj/XCOFFDumper.cpp11
-rw-r--r--llvm/tools/llvm-readobj/llvm-readobj.cpp8
-rw-r--r--llvm/tools/llvm-remarkutil/RemarkUtil.cpp109
-rw-r--r--llvm/tools/llvm-size/llvm-size.cpp3
-rw-r--r--llvm/tools/llvm-stress/llvm-stress.cpp14
-rw-r--r--llvm/tools/llvm-strings/llvm-strings.cpp2
-rw-r--r--llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp63
-rw-r--r--llvm/tools/llvm-tapi-diff/DiffEngine.cpp58
-rw-r--r--llvm/tools/llvm-tapi-diff/DiffEngine.h7
-rw-r--r--llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp4
-rw-r--r--llvm/tools/llvm-xray/xray-account.cpp2
-rw-r--r--llvm/tools/llvm-xray/xray-graph.cpp2
-rw-r--r--llvm/tools/opt/AnalysisWrappers.cpp71
-rw-r--r--llvm/tools/opt/BreakpointPrinter.cpp71
-rw-r--r--llvm/tools/opt/BreakpointPrinter.h24
-rw-r--r--llvm/tools/opt/NewPMDriver.cpp51
-rw-r--r--llvm/tools/opt/NewPMDriver.h8
-rw-r--r--llvm/tools/opt/opt.cpp109
-rw-r--r--llvm/utils/TableGen/AsmMatcherEmitter.cpp78
-rw-r--r--llvm/utils/TableGen/AsmWriterEmitter.cpp19
-rw-r--r--llvm/utils/TableGen/AsmWriterInst.cpp1
-rw-r--r--llvm/utils/TableGen/AsmWriterInst.h1
-rw-r--r--llvm/utils/TableGen/Attributes.cpp15
-rw-r--r--llvm/utils/TableGen/CTagsEmitter.cpp11
-rw-r--r--llvm/utils/TableGen/CallingConvEmitter.cpp24
-rw-r--r--llvm/utils/TableGen/CodeEmitterGen.cpp242
-rw-r--r--llvm/utils/TableGen/CodeGenDAGPatterns.cpp339
-rw-r--r--llvm/utils/TableGen/CodeGenDAGPatterns.h127
-rw-r--r--llvm/utils/TableGen/CodeGenHwModes.cpp43
-rw-r--r--llvm/utils/TableGen/CodeGenHwModes.h9
-rw-r--r--llvm/utils/TableGen/CodeGenInstAlias.cpp283
-rw-r--r--llvm/utils/TableGen/CodeGenInstAlias.h105
-rw-r--r--llvm/utils/TableGen/CodeGenInstruction.cpp266
-rw-r--r--llvm/utils/TableGen/CodeGenInstruction.h71
-rw-r--r--llvm/utils/TableGen/CodeGenIntrinsics.cpp270
-rw-r--r--llvm/utils/TableGen/CodeGenIntrinsics.h20
-rw-r--r--llvm/utils/TableGen/CodeGenMapTable.cpp1
-rw-r--r--llvm/utils/TableGen/CodeGenRegisters.cpp13
-rw-r--r--llvm/utils/TableGen/CodeGenRegisters.h16
-rw-r--r--llvm/utils/TableGen/CodeGenSchedule.cpp10
-rw-r--r--llvm/utils/TableGen/CodeGenSchedule.h7
-rw-r--r--llvm/utils/TableGen/CodeGenTarget.cpp326
-rw-r--r--llvm/utils/TableGen/CodeGenTarget.h21
-rw-r--r--llvm/utils/TableGen/CompressInstEmitter.cpp10
-rw-r--r--llvm/utils/TableGen/DAGISelEmitter.cpp16
-rw-r--r--llvm/utils/TableGen/DAGISelMatcher.cpp11
-rw-r--r--llvm/utils/TableGen/DAGISelMatcher.h83
-rw-r--r--llvm/utils/TableGen/DAGISelMatcherEmitter.cpp17
-rw-r--r--llvm/utils/TableGen/DAGISelMatcherGen.cpp104
-rw-r--r--llvm/utils/TableGen/DAGISelMatcherOpt.cpp284
-rw-r--r--llvm/utils/TableGen/DFAEmitter.cpp11
-rw-r--r--llvm/utils/TableGen/DFAEmitter.h2
-rw-r--r--llvm/utils/TableGen/DFAPacketizerEmitter.cpp12
-rw-r--r--llvm/utils/TableGen/DXILEmitter.cpp28
-rw-r--r--llvm/utils/TableGen/DecoderEmitter.cpp186
-rw-r--r--llvm/utils/TableGen/DirectiveEmitter.cpp127
-rw-r--r--llvm/utils/TableGen/DisassemblerEmitter.cpp11
-rw-r--r--llvm/utils/TableGen/ExegesisEmitter.cpp9
-rw-r--r--llvm/utils/TableGen/FastISelEmitter.cpp10
-rw-r--r--llvm/utils/TableGen/GICombinerEmitter.cpp71
-rw-r--r--llvm/utils/TableGen/GlobalISel/CodeExpander.h2
-rw-r--r--llvm/utils/TableGen/GlobalISel/CombinerUtils.h72
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchDag.h2
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.h2
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h2
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchDagOperands.h2
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h2
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp109
-rw-r--r--llvm/utils/TableGen/GlobalISel/GIMatchTree.h2
-rw-r--r--llvm/utils/TableGen/GlobalISelCombinerMatchTableEmitter.cpp1575
-rw-r--r--llvm/utils/TableGen/GlobalISelEmitter.cpp4431
-rw-r--r--llvm/utils/TableGen/GlobalISelMatchTable.cpp2019
-rw-r--r--llvm/utils/TableGen/GlobalISelMatchTable.h2162
-rw-r--r--llvm/utils/TableGen/GlobalISelMatchTableExecutorEmitter.cpp267
-rw-r--r--llvm/utils/TableGen/GlobalISelMatchTableExecutorEmitter.h228
-rw-r--r--llvm/utils/TableGen/InfoByHwMode.cpp6
-rw-r--r--llvm/utils/TableGen/InfoByHwMode.h85
-rw-r--r--llvm/utils/TableGen/InstrDocsEmitter.cpp18
-rw-r--r--llvm/utils/TableGen/InstrInfoEmitter.cpp293
-rw-r--r--llvm/utils/TableGen/IntrinsicEmitter.cpp406
-rw-r--r--llvm/utils/TableGen/OptParserEmitter.cpp9
-rw-r--r--llvm/utils/TableGen/OptRSTEmitter.cpp8
-rw-r--r--llvm/utils/TableGen/PredicateExpander.cpp1
-rw-r--r--llvm/utils/TableGen/PseudoLoweringEmitter.cpp9
-rw-r--r--llvm/utils/TableGen/RISCVTargetDefEmitter.cpp14
-rw-r--r--llvm/utils/TableGen/RegisterBankEmitter.cpp79
-rw-r--r--llvm/utils/TableGen/RegisterInfoEmitter.cpp82
-rw-r--r--llvm/utils/TableGen/SearchableTableEmitter.cpp12
-rw-r--r--llvm/utils/TableGen/SubtargetEmitter.cpp131
-rw-r--r--llvm/utils/TableGen/SubtargetFeatureInfo.cpp14
-rw-r--r--llvm/utils/TableGen/SubtargetFeatureInfo.h6
-rw-r--r--llvm/utils/TableGen/TableGen.cpp294
-rw-r--r--llvm/utils/TableGen/TableGenBackends.h41
-rw-r--r--llvm/utils/TableGen/Types.cpp8
-rw-r--r--llvm/utils/TableGen/Types.h3
-rw-r--r--llvm/utils/TableGen/VTEmitter.cpp130
-rw-r--r--llvm/utils/TableGen/VarLenCodeEmitterGen.cpp1
-rw-r--r--llvm/utils/TableGen/X86DisassemblerTables.cpp270
-rw-r--r--llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp18
-rw-r--r--llvm/utils/TableGen/X86FoldTablesEmitter.cpp546
-rw-r--r--llvm/utils/TableGen/X86ManualFoldTables.def288
-rw-r--r--llvm/utils/TableGen/X86MnemonicTables.cpp9
-rw-r--r--llvm/utils/TableGen/X86ModRMFilters.h2
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.cpp31
-rw-r--r--llvm/utils/TableGen/X86RecognizableInstr.h10
2889 files changed, 225826 insertions, 121403 deletions
diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index 7ce91bf25193..fbba8ca42a8c 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -474,8 +474,6 @@ typedef unsigned LLVMAttributeIndex;
* @}
*/
-void LLVMInitializeCore(LLVMPassRegistryRef R);
-
/** Deallocate and destroy all ManagedStatic variables.
@see llvm::llvm_shutdown
@see ManagedStatic */
@@ -567,13 +565,6 @@ LLVMBool LLVMContextShouldDiscardValueNames(LLVMContextRef C);
void LLVMContextSetDiscardValueNames(LLVMContextRef C, LLVMBool Discard);
/**
- * Set whether the given context is in opaque pointer mode.
- *
- * @see LLVMContext::setOpaquePointers()
- */
-void LLVMContextSetOpaquePointers(LLVMContextRef C, LLVMBool OpaquePointers);
-
-/**
* Destroy a context instance.
*
* This should be called for every call to LLVMContextCreate() or memory
@@ -1418,8 +1409,6 @@ LLVMBool LLVMIsLiteralStruct(LLVMTypeRef StructTy);
/**
* Obtain the element type of an array or vector type.
*
- * This currently also works for pointer types, but this usage is deprecated.
- *
* @see llvm::SequentialType::getElementType()
*/
LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty);
@@ -1444,20 +1433,43 @@ unsigned LLVMGetNumContainedTypes(LLVMTypeRef Tp);
* The created type will exist in the context that its element type
* exists in.
*
+ * @deprecated LLVMArrayType is deprecated in favor of the API accurate
+ * LLVMArrayType2
* @see llvm::ArrayType::get()
*/
LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount);
/**
+ * Create a fixed size array type that refers to a specific type.
+ *
+ * The created type will exist in the context that its element type
+ * exists in.
+ *
+ * @see llvm::ArrayType::get()
+ */
+LLVMTypeRef LLVMArrayType2(LLVMTypeRef ElementType, uint64_t ElementCount);
+
+/**
* Obtain the length of an array type.
*
* This only works on types that represent arrays.
*
+ * @deprecated LLVMGetArrayLength is deprecated in favor of the API accurate
+ * LLVMGetArrayLength2
* @see llvm::ArrayType::getNumElements()
*/
unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy);
/**
+ * Obtain the length of an array type.
+ *
+ * This only works on types that represent arrays.
+ *
+ * @see llvm::ArrayType::getNumElements()
+ */
+uint64_t LLVMGetArrayLength2(LLVMTypeRef ArrayTy);
+
+/**
* Create a pointer type that points to a defined type.
*
* The created type will exist in the context that its pointee type
@@ -1792,6 +1804,7 @@ LLVMBool LLVMIsPoison(LLVMValueRef Val);
LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DECLARE_VALUE_CAST)
LLVMValueRef LLVMIsAMDNode(LLVMValueRef Val);
+LLVMValueRef LLVMIsAValueAsMetadata(LLVMValueRef Val);
LLVMValueRef LLVMIsAMDString(LLVMValueRef Val);
/** Deprecated: Use LLVMGetValueName2 instead. */
@@ -2124,12 +2137,22 @@ LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
/**
* Create a ConstantArray from values.
*
+ * @deprecated LLVMConstArray is deprecated in favor of the API accurate
+ * LLVMConstArray2
* @see llvm::ConstantArray::get()
*/
LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
LLVMValueRef *ConstantVals, unsigned Length);
/**
+ * Create a ConstantArray from values.
+ *
+ * @see llvm::ConstantArray::get()
+ */
+LLVMValueRef LLVMConstArray2(LLVMTypeRef ElementTy, LLVMValueRef *ConstantVals,
+ uint64_t Length);
+
+/**
* Create a non-anonymous ConstantStruct from values.
*
* @see llvm::ConstantStruct::get()
@@ -2232,9 +2255,6 @@ LLVMValueRef LLVMConstPointerCast(LLVMValueRef ConstantVal,
LLVMValueRef LLVMConstIntCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType,
LLVMBool isSigned);
LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType);
-LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
- LLVMValueRef ConstantIfTrue,
- LLVMValueRef ConstantIfFalse);
LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
LLVMValueRef IndexConstant);
LLVMValueRef LLVMConstInsertElement(LLVMValueRef VectorConstant,
@@ -2921,6 +2941,14 @@ unsigned LLVMGetMDNodeNumOperands(LLVMValueRef V);
*/
void LLVMGetMDNodeOperands(LLVMValueRef V, LLVMValueRef *Dest);
+/**
+ * Replace an operand at a specific index in a llvm::MDNode value.
+ *
+ * @see llvm::MDNode::replaceOperandWith()
+ */
+void LLVMReplaceMDNodeOperandWith(LLVMValueRef V, unsigned Index,
+ LLVMMetadataRef Replacement);
+
/** Deprecated: Use LLVMMDStringInContext2 instead. */
LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
unsigned SLen);
@@ -3885,6 +3913,13 @@ LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V,
LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef, LLVMValueRef V, const char *Name);
LLVMValueRef LLVMBuildNot(LLVMBuilderRef, LLVMValueRef V, const char *Name);
+LLVMBool LLVMGetNUW(LLVMValueRef ArithInst);
+void LLVMSetNUW(LLVMValueRef ArithInst, LLVMBool HasNUW);
+LLVMBool LLVMGetNSW(LLVMValueRef ArithInst);
+void LLVMSetNSW(LLVMValueRef ArithInst, LLVMBool HasNSW);
+LLVMBool LLVMGetExact(LLVMValueRef DivOrShrInst);
+void LLVMSetExact(LLVMValueRef DivOrShrInst, LLVMBool IsExact);
+
/* Memory */
LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef, LLVMTypeRef Ty, const char *Name);
LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef, LLVMTypeRef Ty,
@@ -4065,8 +4100,8 @@ int LLVMGetUndefMaskElem(void);
* Get the mask value at position Elt in the mask of a ShuffleVector
* instruction.
*
- * \Returns the result of \c LLVMGetUndefMaskElem() if the mask value is undef
- * at that position.
+ * \Returns the result of \c LLVMGetUndefMaskElem() if the mask value is
+ * poison at that position.
*/
int LLVMGetMaskValue(LLVMValueRef ShuffleVectorInst, unsigned Elt);
@@ -4133,21 +4168,6 @@ void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf);
*/
/**
- * @defgroup LLVMCCorePassRegistry Pass Registry
- * @ingroup LLVMCCore
- *
- * @{
- */
-
-/** Return the global pass registry, for use with initialization functions.
- @see llvm::PassRegistry::getPassRegistry */
-LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void);
-
-/**
- * @}
- */
-
-/**
* @defgroup LLVMCCorePassManagers Pass Managers
* @ingroup LLVMCCore
*
diff --git a/llvm/include/llvm-c/DebugInfo.h b/llvm/include/llvm-c/DebugInfo.h
index 122bfccedf5c..5924294708cc 100644
--- a/llvm/include/llvm-c/DebugInfo.h
+++ b/llvm/include/llvm-c/DebugInfo.h
@@ -125,6 +125,7 @@ typedef enum {
LLVMDWARFSourceLanguageFortran18,
LLVMDWARFSourceLanguageAda2005,
LLVMDWARFSourceLanguageAda2012,
+ LLVMDWARFSourceLanguageMojo,
// Vendor extensions:
LLVMDWARFSourceLanguageMips_Assembler,
LLVMDWARFSourceLanguageGOOGLE_RenderScript,
@@ -1149,6 +1150,12 @@ LLVMMetadataRef LLVMDIBuilderCreateGlobalVariableExpression(
unsigned LineNo, LLVMMetadataRef Ty, LLVMBool LocalToUnit,
LLVMMetadataRef Expr, LLVMMetadataRef Decl, uint32_t AlignInBits);
+
+/**
+ * Get the dwarf::Tag of a DINode
+ */
+uint16_t LLVMGetDINodeTag(LLVMMetadataRef MD);
+
/**
* Retrieves the \c DIVariable associated with this global variable expression.
* \param GVE The global variable expression.
diff --git a/llvm/include/llvm-c/Initialization.h b/llvm/include/llvm-c/Initialization.h
deleted file mode 100644
index a24fc9598967..000000000000
--- a/llvm/include/llvm-c/Initialization.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*===-- llvm-c/Initialization.h - Initialization C Interface ------*- C -*-===*\
-|* *|
-|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
-|* Exceptions. *|
-|* See https://llvm.org/LICENSE.txt for license information. *|
-|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
-|* *|
-|*===----------------------------------------------------------------------===*|
-|* *|
-|* This header declares the C interface to LLVM initialization routines, *|
-|* which must be called before you can use the functionality provided by *|
-|* the corresponding LLVM library. *|
-|* *|
-\*===----------------------------------------------------------------------===*/
-
-#ifndef LLVM_C_INITIALIZATION_H
-#define LLVM_C_INITIALIZATION_H
-
-#include "llvm-c/ExternC.h"
-#include "llvm-c/Types.h"
-
-LLVM_C_EXTERN_C_BEGIN
-
-/**
- * @defgroup LLVMCInitialization Initialization Routines
- * @ingroup LLVMC
- *
- * This module contains routines used to initialize the LLVM system.
- *
- * @{
- */
-
-void LLVMInitializeCore(LLVMPassRegistryRef R);
-void LLVMInitializeTransformUtils(LLVMPassRegistryRef R);
-void LLVMInitializeScalarOpts(LLVMPassRegistryRef R);
-void LLVMInitializeVectorization(LLVMPassRegistryRef R);
-void LLVMInitializeInstCombine(LLVMPassRegistryRef R);
-void LLVMInitializeIPO(LLVMPassRegistryRef R);
-void LLVMInitializeAnalysis(LLVMPassRegistryRef R);
-void LLVMInitializeIPA(LLVMPassRegistryRef R);
-void LLVMInitializeCodeGen(LLVMPassRegistryRef R);
-void LLVMInitializeTarget(LLVMPassRegistryRef R);
-
-/**
- * @}
- */
-
-LLVM_C_EXTERN_C_END
-
-#endif
diff --git a/llvm/include/llvm-c/Transforms/IPO.h b/llvm/include/llvm-c/Transforms/IPO.h
deleted file mode 100644
index b049e9e67f34..000000000000
--- a/llvm/include/llvm-c/Transforms/IPO.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*===-- IPO.h - Interprocedural Transformations C Interface -----*- C++ -*-===*\
-|* *|
-|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
-|* Exceptions. *|
-|* See https://llvm.org/LICENSE.txt for license information. *|
-|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
-|* *|
-|*===----------------------------------------------------------------------===*|
-|* *|
-|* This header declares the C interface to libLLVMIPO.a, which implements *|
-|* various interprocedural transformations of the LLVM IR. *|
-|* *|
-\*===----------------------------------------------------------------------===*/
-
-#ifndef LLVM_C_TRANSFORMS_IPO_H
-#define LLVM_C_TRANSFORMS_IPO_H
-
-#include "llvm-c/ExternC.h"
-#include "llvm-c/Types.h"
-
-LLVM_C_EXTERN_C_BEGIN
-
-/**
- * @defgroup LLVMCTransformsIPO Interprocedural transformations
- * @ingroup LLVMCTransforms
- *
- * @{
- */
-
-/** See llvm::createConstantMergePass function. */
-void LLVMAddConstantMergePass(LLVMPassManagerRef PM);
-
-/** See llvm::createMergeFunctionsPass function. */
-void LLVMAddMergeFunctionsPass(LLVMPassManagerRef PM);
-
-/** See llvm::createCalledValuePropagationPass function. */
-void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM);
-
-/** See llvm::createDeadArgEliminationPass function. */
-void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM);
-
-/** See llvm::createFunctionAttrsPass function. */
-void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM);
-
-/** See llvm::createFunctionInliningPass function. */
-void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM);
-
-/** See llvm::createAlwaysInlinerPass function. */
-void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM);
-
-/** See llvm::createGlobalDCEPass function. */
-void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM);
-
-/** See llvm::createGlobalOptimizerPass function. */
-void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM);
-
-/** See llvm::createIPSCCPPass function. */
-void LLVMAddIPSCCPPass(LLVMPassManagerRef PM);
-
-/** See llvm::createInternalizePass function. */
-void LLVMAddInternalizePass(LLVMPassManagerRef, unsigned AllButMain);
-
-/**
- * Create and add the internalize pass to the given pass manager with the
- * provided preservation callback.
- *
- * The context parameter is forwarded to the callback on each invocation.
- * As such, it is the responsibility of the caller to extend its lifetime
- * until execution of this pass has finished.
- *
- * @see llvm::createInternalizePass function.
- */
-void LLVMAddInternalizePassWithMustPreservePredicate(
- LLVMPassManagerRef PM,
- void *Context,
- LLVMBool (*MustPreserve)(LLVMValueRef, void *));
-
-/** See llvm::createStripDeadPrototypesPass function. */
-void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM);
-
-/** See llvm::createStripSymbolsPass function. */
-void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM);
-
-/**
- * @}
- */
-
-LLVM_C_EXTERN_C_END
-
-#endif
diff --git a/llvm/include/llvm-c/Transforms/InstCombine.h b/llvm/include/llvm-c/Transforms/InstCombine.h
deleted file mode 100644
index ebe17d667061..000000000000
--- a/llvm/include/llvm-c/Transforms/InstCombine.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*===-- Scalar.h - Scalar Transformation Library C Interface ----*- C++ -*-===*\
-|* *|
-|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
-|* Exceptions. *|
-|* See https://llvm.org/LICENSE.txt for license information. *|
-|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
-|* *|
-|*===----------------------------------------------------------------------===*|
-|* *|
-|* This header declares the C interface to libLLVMInstCombine.a, which *|
-|* combines instructions to form fewer, simple IR instructions. *|
-|* *|
-\*===----------------------------------------------------------------------===*/
-
-#ifndef LLVM_C_TRANSFORMS_INSTCOMBINE_H
-#define LLVM_C_TRANSFORMS_INSTCOMBINE_H
-
-#include "llvm-c/ExternC.h"
-#include "llvm-c/Types.h"
-
-LLVM_C_EXTERN_C_BEGIN
-
-/**
- * @defgroup LLVMCTransformsInstCombine Instruction Combining transformations
- * @ingroup LLVMCTransforms
- *
- * @{
- */
-
-/** See llvm::createInstructionCombiningPass function. */
-void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM);
-
-/**
- * @}
- */
-
-LLVM_C_EXTERN_C_END
-
-#endif
-
diff --git a/llvm/include/llvm-c/Transforms/PassBuilder.h b/llvm/include/llvm-c/Transforms/PassBuilder.h
index 6d9f1b45c707..d0466dd7fc0a 100644
--- a/llvm/include/llvm-c/Transforms/PassBuilder.h
+++ b/llvm/include/llvm-c/Transforms/PassBuilder.h
@@ -99,6 +99,9 @@ void LLVMPassBuilderOptionsSetCallGraphProfile(
void LLVMPassBuilderOptionsSetMergeFunctions(LLVMPassBuilderOptionsRef Options,
LLVMBool MergeFunctions);
+void LLVMPassBuilderOptionsSetInlinerThreshold(
+ LLVMPassBuilderOptionsRef Options, int Threshold);
+
/**
* Dispose of a heap-allocated PassBuilderOptions instance
*/
diff --git a/llvm/include/llvm-c/Transforms/PassManagerBuilder.h b/llvm/include/llvm-c/Transforms/PassManagerBuilder.h
deleted file mode 100644
index 3ba75440129a..000000000000
--- a/llvm/include/llvm-c/Transforms/PassManagerBuilder.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*===-- llvm-c/Transform/PassManagerBuilder.h - PMB C Interface ---*- C -*-===*\
-|* *|
-|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
-|* Exceptions. *|
-|* See https://llvm.org/LICENSE.txt for license information. *|
-|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
-|* *|
-|*===----------------------------------------------------------------------===*|
-|* *|
-|* This header declares the C interface to the PassManagerBuilder class. *|
-|* *|
-\*===----------------------------------------------------------------------===*/
-
-#ifndef LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H
-#define LLVM_C_TRANSFORMS_PASSMANAGERBUILDER_H
-
-#include "llvm-c/ExternC.h"
-#include "llvm-c/Types.h"
-
-typedef struct LLVMOpaquePassManagerBuilder *LLVMPassManagerBuilderRef;
-
-LLVM_C_EXTERN_C_BEGIN
-
-/**
- * @defgroup LLVMCTransformsPassManagerBuilder Pass manager builder
- * @ingroup LLVMCTransforms
- *
- * @{
- */
-
-/** See llvm::PassManagerBuilder. */
-LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate(void);
-void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB);
-
-/** See llvm::PassManagerBuilder::OptLevel. */
-void
-LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
- unsigned OptLevel);
-
-/** See llvm::PassManagerBuilder::SizeLevel. */
-void
-LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
- unsigned SizeLevel);
-
-/** See llvm::PassManagerBuilder::DisableUnitAtATime. */
-void
-LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
- LLVMBool Value);
-
-/** See llvm::PassManagerBuilder::DisableUnrollLoops. */
-void
-LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
- LLVMBool Value);
-
-/** See llvm::PassManagerBuilder::DisableSimplifyLibCalls */
-void
-LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
- LLVMBool Value);
-
-/** See llvm::PassManagerBuilder::Inliner. */
-void
-LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,
- unsigned Threshold);
-
-/** See llvm::PassManagerBuilder::populateFunctionPassManager. */
-void
-LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
- LLVMPassManagerRef PM);
-
-/** See llvm::PassManagerBuilder::populateModulePassManager. */
-void
-LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
- LLVMPassManagerRef PM);
-
-/**
- * @}
- */
-
-LLVM_C_EXTERN_C_END
-
-#endif
diff --git a/llvm/include/llvm-c/Transforms/Scalar.h b/llvm/include/llvm-c/Transforms/Scalar.h
deleted file mode 100644
index 1d0944799710..000000000000
--- a/llvm/include/llvm-c/Transforms/Scalar.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/*===-- Scalar.h - Scalar Transformation Library C Interface ----*- C++ -*-===*\
-|* *|
-|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
-|* Exceptions. *|
-|* See https://llvm.org/LICENSE.txt for license information. *|
-|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
-|* *|
-|*===----------------------------------------------------------------------===*|
-|* *|
-|* This header declares the C interface to libLLVMScalarOpts.a, which *|
-|* implements various scalar transformations of the LLVM IR. *|
-|* *|
-|* Many exotic languages can interoperate with C code but have a harder time *|
-|* with C++ due to name mangling. So in addition to C, this interface enables *|
-|* tools written in such languages. *|
-|* *|
-\*===----------------------------------------------------------------------===*/
-
-#ifndef LLVM_C_TRANSFORMS_SCALAR_H
-#define LLVM_C_TRANSFORMS_SCALAR_H
-
-#include "llvm-c/ExternC.h"
-#include "llvm-c/Types.h"
-
-LLVM_C_EXTERN_C_BEGIN
-
-/**
- * @defgroup LLVMCTransformsScalar Scalar transformations
- * @ingroup LLVMCTransforms
- *
- * @{
- */
-
-/** See llvm::createAggressiveDCEPass function. */
-void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM);
-
-/** See llvm::createDeadCodeEliminationPass function. */
-void LLVMAddDCEPass(LLVMPassManagerRef PM);
-
-/** See llvm::createBitTrackingDCEPass function. */
-void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM);
-
-/** See llvm::createAlignmentFromAssumptionsPass function. */
-void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM);
-
-/** See llvm::createCFGSimplificationPass function. */
-void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM);
-
-/** See llvm::createDeadStoreEliminationPass function. */
-void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM);
-
-/** See llvm::createScalarizerPass function. */
-void LLVMAddScalarizerPass(LLVMPassManagerRef PM);
-
-/** See llvm::createMergedLoadStoreMotionPass function. */
-void LLVMAddMergedLoadStoreMotionPass(LLVMPassManagerRef PM);
-
-/** See llvm::createGVNPass function. */
-void LLVMAddGVNPass(LLVMPassManagerRef PM);
-
-/** See llvm::createGVNPass function. */
-void LLVMAddNewGVNPass(LLVMPassManagerRef PM);
-
-/** See llvm::createIndVarSimplifyPass function. */
-void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM);
-
-/** See llvm::createInstructionCombiningPass function. */
-void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM);
-
-/** See llvm::createInstSimplifyLegacyPass function. */
-void LLVMAddInstructionSimplifyPass(LLVMPassManagerRef PM);
-
-/** See llvm::createJumpThreadingPass function. */
-void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM);
-
-/** See llvm::createLICMPass function. */
-void LLVMAddLICMPass(LLVMPassManagerRef PM);
-
-/** See llvm::createLoopDeletionPass function. */
-void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM);
-
-/** See llvm::createLoopIdiomPass function */
-void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM);
-
-/** See llvm::createLoopRotatePass function. */
-void LLVMAddLoopRotatePass(LLVMPassManagerRef PM);
-
-/** See llvm::createLoopRerollPass function. */
-void LLVMAddLoopRerollPass(LLVMPassManagerRef PM);
-
-/** See llvm::createLoopUnrollPass function. */
-void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM);
-
-/** See llvm::createLoopUnrollAndJamPass function. */
-void LLVMAddLoopUnrollAndJamPass(LLVMPassManagerRef PM);
-
-/** See llvm::createLowerAtomicPass function. */
-void LLVMAddLowerAtomicPass(LLVMPassManagerRef PM);
-
-/** See llvm::createMemCpyOptPass function. */
-void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM);
-
-/** See llvm::createPartiallyInlineLibCallsPass function. */
-void LLVMAddPartiallyInlineLibCallsPass(LLVMPassManagerRef PM);
-
-/** See llvm::createReassociatePass function. */
-void LLVMAddReassociatePass(LLVMPassManagerRef PM);
-
-/** See llvm::createSCCPPass function. */
-void LLVMAddSCCPPass(LLVMPassManagerRef PM);
-
-/** See llvm::createSROAPass function. */
-void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM);
-
-/** See llvm::createSROAPass function. */
-void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM);
-
-/** See llvm::createSROAPass function. */
-void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
- int Threshold);
-
-/** See llvm::createSimplifyLibCallsPass function. */
-void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM);
-
-/** See llvm::createTailCallEliminationPass function. */
-void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM);
-
-/** See llvm::demotePromoteMemoryToRegisterPass function. */
-void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM);
-
-/** See llvm::createVerifierPass function. */
-void LLVMAddVerifierPass(LLVMPassManagerRef PM);
-
-/** See llvm::createCorrelatedValuePropagationPass function */
-void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM);
-
-/** See llvm::createEarlyCSEPass function */
-void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM);
-
-/** See llvm::createEarlyCSEPass function */
-void LLVMAddEarlyCSEMemSSAPass(LLVMPassManagerRef PM);
-
-/** See llvm::createLowerExpectIntrinsicPass function */
-void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM);
-
-/** See llvm::createLowerConstantIntrinsicsPass function */
-void LLVMAddLowerConstantIntrinsicsPass(LLVMPassManagerRef PM);
-
-/** See llvm::createTypeBasedAliasAnalysisPass function */
-void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM);
-
-/** See llvm::createScopedNoAliasAAPass function */
-void LLVMAddScopedNoAliasAAPass(LLVMPassManagerRef PM);
-
-/** See llvm::createBasicAliasAnalysisPass function */
-void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM);
-
-/** See llvm::createUnifyFunctionExitNodesPass function */
-void LLVMAddUnifyFunctionExitNodesPass(LLVMPassManagerRef PM);
-
-/**
- * @}
- */
-
-LLVM_C_EXTERN_C_END
-
-#endif
diff --git a/llvm/include/llvm-c/Transforms/Utils.h b/llvm/include/llvm-c/Transforms/Utils.h
deleted file mode 100644
index 30d1ae63de1d..000000000000
--- a/llvm/include/llvm-c/Transforms/Utils.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*===-- Utils.h - Transformation Utils Library C Interface ------*- C++ -*-===*\
-|* *|
-|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
-|* Exceptions. *|
-|* See https://llvm.org/LICENSE.txt for license information. *|
-|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
-|* *|
-|*===----------------------------------------------------------------------===*|
-|* *|
-|* This header declares the C interface to libLLVMTransformUtils.a, which *|
-|* implements various transformation utilities of the LLVM IR. *|
-|* *|
-|* Many exotic languages can interoperate with C code but have a harder time *|
-|* with C++ due to name mangling. So in addition to C, this interface enables *|
-|* tools written in such languages. *|
-|* *|
-\*===----------------------------------------------------------------------===*/
-
-#ifndef LLVM_C_TRANSFORMS_UTILS_H
-#define LLVM_C_TRANSFORMS_UTILS_H
-
-#include "llvm-c/ExternC.h"
-#include "llvm-c/Types.h"
-
-LLVM_C_EXTERN_C_BEGIN
-
-/**
- * @defgroup LLVMCTransformsUtils Transformation Utilities
- * @ingroup LLVMCTransforms
- *
- * @{
- */
-
-/** See llvm::createLowerSwitchPass function. */
-void LLVMAddLowerSwitchPass(LLVMPassManagerRef PM);
-
-/** See llvm::createPromoteMemoryToRegisterPass function. */
-void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM);
-
-/** See llvm::createAddDiscriminatorsPass function. */
-void LLVMAddAddDiscriminatorsPass(LLVMPassManagerRef PM);
-
-/**
- * @}
- */
-
-LLVM_C_EXTERN_C_END
-
-#endif
-
diff --git a/llvm/include/llvm-c/Transforms/Vectorize.h b/llvm/include/llvm-c/Transforms/Vectorize.h
deleted file mode 100644
index 0de458381399..000000000000
--- a/llvm/include/llvm-c/Transforms/Vectorize.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*===---------------------------Vectorize.h --------------------- -*- C -*-===*\
-|*===----------- Vectorization Transformation Library C Interface ---------===*|
-|* *|
-|* Part of the LLVM Project, under the Apache License v2.0 with LLVM *|
-|* Exceptions. *|
-|* See https://llvm.org/LICENSE.txt for license information. *|
-|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception *|
-|* *|
-|*===----------------------------------------------------------------------===*|
-|* *|
-|* This header declares the C interface to libLLVMVectorize.a, which *|
-|* implements various vectorization transformations of the LLVM IR. *|
-|* *|
-|* Many exotic languages can interoperate with C code but have a harder time *|
-|* with C++ due to name mangling. So in addition to C, this interface enables *|
-|* tools written in such languages. *|
-|* *|
-\*===----------------------------------------------------------------------===*/
-
-#ifndef LLVM_C_TRANSFORMS_VECTORIZE_H
-#define LLVM_C_TRANSFORMS_VECTORIZE_H
-
-#include "llvm-c/ExternC.h"
-#include "llvm-c/Types.h"
-
-LLVM_C_EXTERN_C_BEGIN
-
-/**
- * @defgroup LLVMCTransformsVectorize Vectorization transformations
- * @ingroup LLVMCTransforms
- *
- * @{
- */
-
-/** See llvm::createLoopVectorizePass function. */
-void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM);
-
-/** See llvm::createSLPVectorizerPass function. */
-void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM);
-
-/**
- * @}
- */
-
-LLVM_C_EXTERN_C_END
-
-#endif
diff --git a/llvm/include/llvm-c/Types.h b/llvm/include/llvm-c/Types.h
index 4e02498a2348..4e9967372d79 100644
--- a/llvm/include/llvm-c/Types.h
+++ b/llvm/include/llvm-c/Types.h
@@ -126,9 +126,6 @@ typedef struct LLVMOpaqueModuleProvider *LLVMModuleProviderRef;
/** @see llvm::PassManagerBase */
typedef struct LLVMOpaquePassManager *LLVMPassManagerRef;
-/** @see llvm::PassRegistry */
-typedef struct LLVMOpaquePassRegistry *LLVMPassRegistryRef;
-
/**
* Used to get the users and usees of a Value.
*
diff --git a/llvm/include/llvm-c/module.modulemap b/llvm/include/llvm-c/module.modulemap
deleted file mode 100644
index a456119595c9..000000000000
--- a/llvm/include/llvm-c/module.modulemap
+++ /dev/null
@@ -1,4 +0,0 @@
-module LLVM_C {
- umbrella "."
- module * { export * }
-}
diff --git a/llvm/include/llvm/ADT/ADL.h b/llvm/include/llvm/ADT/ADL.h
new file mode 100644
index 000000000000..ab1f28ff6b9c
--- /dev/null
+++ b/llvm/include/llvm/ADT/ADL.h
@@ -0,0 +1,103 @@
+//===- llvm/ADT/ADL.h - Argument dependent lookup utilities -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_ADL_H
+#define LLVM_ADT_ADL_H
+
+#include <type_traits>
+#include <iterator>
+#include <utility>
+
+namespace llvm {
+
+// Only used by compiler if both template types are the same. Useful when
+// using SFINAE to test for the existence of member functions.
+template <typename T, T> struct SameType;
+
+namespace adl_detail {
+
+using std::begin;
+
+template <typename RangeT>
+constexpr auto begin_impl(RangeT &&range)
+ -> decltype(begin(std::forward<RangeT>(range))) {
+ return begin(std::forward<RangeT>(range));
+}
+
+using std::end;
+
+template <typename RangeT>
+constexpr auto end_impl(RangeT &&range)
+ -> decltype(end(std::forward<RangeT>(range))) {
+ return end(std::forward<RangeT>(range));
+}
+
+using std::swap;
+
+template <typename T>
+constexpr void swap_impl(T &&lhs,
+ T &&rhs) noexcept(noexcept(swap(std::declval<T>(),
+ std::declval<T>()))) {
+ swap(std::forward<T>(lhs), std::forward<T>(rhs));
+}
+
+using std::size;
+
+template <typename RangeT>
+constexpr auto size_impl(RangeT &&range)
+ -> decltype(size(std::forward<RangeT>(range))) {
+ return size(std::forward<RangeT>(range));
+}
+
+} // end namespace adl_detail
+
+/// Returns the begin iterator to \p range using `std::begin` and
+/// function found through Argument-Dependent Lookup (ADL).
+template <typename RangeT>
+constexpr auto adl_begin(RangeT &&range)
+ -> decltype(adl_detail::begin_impl(std::forward<RangeT>(range))) {
+ return adl_detail::begin_impl(std::forward<RangeT>(range));
+}
+
+/// Returns the end iterator to \p range using `std::end` and
+/// functions found through Argument-Dependent Lookup (ADL).
+template <typename RangeT>
+constexpr auto adl_end(RangeT &&range)
+ -> decltype(adl_detail::end_impl(std::forward<RangeT>(range))) {
+ return adl_detail::end_impl(std::forward<RangeT>(range));
+}
+
+/// Swaps \p lhs with \p rhs using `std::swap` and functions found through
+/// Argument-Dependent Lookup (ADL).
+template <typename T>
+constexpr void adl_swap(T &&lhs, T &&rhs) noexcept(
+ noexcept(adl_detail::swap_impl(std::declval<T>(), std::declval<T>()))) {
+ adl_detail::swap_impl(std::forward<T>(lhs), std::forward<T>(rhs));
+}
+
+/// Returns the size of \p range using `std::size` and functions found through
+/// Argument-Dependent Lookup (ADL).
+template <typename RangeT>
+constexpr auto adl_size(RangeT &&range)
+ -> decltype(adl_detail::size_impl(std::forward<RangeT>(range))) {
+ return adl_detail::size_impl(std::forward<RangeT>(range));
+}
+
+namespace detail {
+
+template <typename RangeT>
+using IterOfRange = decltype(adl_begin(std::declval<RangeT &>()));
+
+template <typename RangeT>
+using ValueOfRange =
+ std::remove_reference_t<decltype(*adl_begin(std::declval<RangeT &>()))>;
+
+} // namespace detail
+} // namespace llvm
+
+#endif // LLVM_ADT_ADL_H
diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h
index c0e2d13c2939..64caa5a76545 100644
--- a/llvm/include/llvm/ADT/APFloat.h
+++ b/llvm/include/llvm/ADT/APFloat.h
@@ -158,11 +158,37 @@ struct APFloatBase {
// 8-bit floating point number following IEEE-754 conventions with bit
// layout S1E5M2 as described in https://arxiv.org/abs/2209.05433.
S_Float8E5M2,
+ // 8-bit floating point number mostly following IEEE-754 conventions
+ // and bit layout S1E5M2 described in https://arxiv.org/abs/2206.02915,
+ // with expanded range and with no infinity or signed zero.
+ // NaN is represented as negative zero. (FN -> Finite, UZ -> unsigned zero).
+ // This format's exponent bias is 16, instead of the 15 (2 ** (5 - 1) - 1)
+ // that IEEE precedent would imply.
+ S_Float8E5M2FNUZ,
// 8-bit floating point number mostly following IEEE-754 conventions with
// bit layout S1E4M3 as described in https://arxiv.org/abs/2209.05433.
// Unlike IEEE-754 types, there are no infinity values, and NaN is
// represented with the exponent and mantissa bits set to all 1s.
S_Float8E4M3FN,
+ // 8-bit floating point number mostly following IEEE-754 conventions
+ // and bit layout S1E4M3 described in https://arxiv.org/abs/2206.02915,
+ // with expanded range and with no infinity or signed zero.
+ // NaN is represented as negative zero. (FN -> Finite, UZ -> unsigned zero).
+ // This format's exponent bias is 8, instead of the 7 (2 ** (4 - 1) - 1)
+ // that IEEE precedent would imply.
+ S_Float8E4M3FNUZ,
+ // 8-bit floating point number mostly following IEEE-754 conventions
+ // and bit layout S1E4M3 with expanded range and with no infinity or signed
+ // zero.
+ // NaN is represented as negative zero. (FN -> Finite, UZ -> unsigned zero).
+ // This format's exponent bias is 11, instead of the 7 (2 ** (4 - 1) - 1)
+ // that IEEE precedent would imply.
+ S_Float8E4M3B11FNUZ,
+ // Floating point number that occupies 32 bits or less of storage, providing
+ // improved range compared to half (16-bit) formats, at (potentially)
+ // greater throughput than single precision (32-bit) formats.
+ S_FloatTF32,
+
S_x87DoubleExtended,
S_MaxSemantics = S_x87DoubleExtended,
};
@@ -177,7 +203,11 @@ struct APFloatBase {
static const fltSemantics &IEEEquad() LLVM_READNONE;
static const fltSemantics &PPCDoubleDouble() LLVM_READNONE;
static const fltSemantics &Float8E5M2() LLVM_READNONE;
+ static const fltSemantics &Float8E5M2FNUZ() LLVM_READNONE;
static const fltSemantics &Float8E4M3FN() LLVM_READNONE;
+ static const fltSemantics &Float8E4M3FNUZ() LLVM_READNONE;
+ static const fltSemantics &Float8E4M3B11FNUZ() LLVM_READNONE;
+ static const fltSemantics &FloatTF32() LLVM_READNONE;
static const fltSemantics &x87DoubleExtended() LLVM_READNONE;
/// A Pseudo fltsemantic used to construct APFloats that cannot conflict with
@@ -246,6 +276,12 @@ struct APFloatBase {
static ExponentType semanticsMinExponent(const fltSemantics &);
static ExponentType semanticsMaxExponent(const fltSemantics &);
static unsigned int semanticsSizeInBits(const fltSemantics &);
+ static unsigned int semanticsIntSizeInBits(const fltSemantics&, bool);
+
+ // Returns true if any number described by \p Src can be precisely represented
+ // by a normal (not subnormal) value in \p Dst.
+ static bool isRepresentableAsNormalIn(const fltSemantics &Src,
+ const fltSemantics &Dst);
/// Returns the size of the floating point number (in bits) in the given
/// semantics.
@@ -561,6 +597,7 @@ private:
/// @}
+ template <const fltSemantics &S> APInt convertIEEEFloatToAPInt() const;
APInt convertHalfAPFloatToAPInt() const;
APInt convertBFloatAPFloatToAPInt() const;
APInt convertFloatAPFloatToAPInt() const;
@@ -569,8 +606,13 @@ private:
APInt convertF80LongDoubleAPFloatToAPInt() const;
APInt convertPPCDoubleDoubleAPFloatToAPInt() const;
APInt convertFloat8E5M2APFloatToAPInt() const;
+ APInt convertFloat8E5M2FNUZAPFloatToAPInt() const;
APInt convertFloat8E4M3FNAPFloatToAPInt() const;
+ APInt convertFloat8E4M3FNUZAPFloatToAPInt() const;
+ APInt convertFloat8E4M3B11FNUZAPFloatToAPInt() const;
+ APInt convertFloatTF32APFloatToAPInt() const;
void initFromAPInt(const fltSemantics *Sem, const APInt &api);
+ template <const fltSemantics &S> void initFromIEEEAPInt(const APInt &api);
void initFromHalfAPInt(const APInt &api);
void initFromBFloatAPInt(const APInt &api);
void initFromFloatAPInt(const APInt &api);
@@ -579,7 +621,11 @@ private:
void initFromF80LongDoubleAPInt(const APInt &api);
void initFromPPCDoubleDoubleAPInt(const APInt &api);
void initFromFloat8E5M2APInt(const APInt &api);
+ void initFromFloat8E5M2FNUZAPInt(const APInt &api);
void initFromFloat8E4M3FNAPInt(const APInt &api);
+ void initFromFloat8E4M3FNUZAPInt(const APInt &api);
+ void initFromFloat8E4M3B11FNUZAPInt(const APInt &api);
+ void initFromFloatTF32APInt(const APInt &api);
void assign(const IEEEFloat &);
void copySignificand(const IEEEFloat &);
@@ -640,21 +686,14 @@ public:
DoubleAPFloat(DoubleAPFloat &&RHS);
DoubleAPFloat &operator=(const DoubleAPFloat &RHS);
-
- DoubleAPFloat &operator=(DoubleAPFloat &&RHS) {
- if (this != &RHS) {
- this->~DoubleAPFloat();
- new (this) DoubleAPFloat(std::move(RHS));
- }
- return *this;
- }
+ inline DoubleAPFloat &operator=(DoubleAPFloat &&RHS);
bool needsCleanup() const { return Floats != nullptr; }
- APFloat &getFirst() { return Floats[0]; }
- const APFloat &getFirst() const { return Floats[0]; }
- APFloat &getSecond() { return Floats[1]; }
- const APFloat &getSecond() const { return Floats[1]; }
+ inline APFloat &getFirst();
+ inline const APFloat &getFirst() const;
+ inline APFloat &getSecond();
+ inline const APFloat &getSecond() const;
opStatus add(const DoubleAPFloat &RHS, roundingMode RM);
opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM);
@@ -1114,6 +1153,14 @@ public:
return Value;
}
+ /// Assuming this is an IEEE-754 NaN value, quiet its signaling bit.
+ /// This preserves the sign and payload bits.
+ APFloat makeQuiet() const {
+ APFloat Result(*this);
+ Result.getIEEE().makeQuiet();
+ return Result;
+ }
+
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM,
bool *losesInfo);
opStatus convertToInteger(MutableArrayRef<integerPart> Input,
@@ -1250,6 +1297,9 @@ public:
APFLOAT_DISPATCH_ON_SEMANTICS(isSmallestNormalized());
}
+ /// Return the FPClassTest which will return true for the value.
+ FPClassTest classify() const;
+
APFloat &operator=(const APFloat &RHS) = default;
APFloat &operator=(APFloat &&RHS) = default;
@@ -1358,6 +1408,27 @@ inline APFloat maximum(const APFloat &A, const APFloat &B) {
return A < B ? B : A;
}
+// We want the following functions to be available in the header for inlining.
+// We cannot define them inline in the class definition of `DoubleAPFloat`
+// because doing so would instantiate `std::unique_ptr<APFloat[]>` before
+// `APFloat` is defined, and that would be undefined behavior.
+namespace detail {
+
+DoubleAPFloat &DoubleAPFloat::operator=(DoubleAPFloat &&RHS) {
+ if (this != &RHS) {
+ this->~DoubleAPFloat();
+ new (this) DoubleAPFloat(std::move(RHS));
+ }
+ return *this;
+}
+
+APFloat &DoubleAPFloat::getFirst() { return Floats[0]; }
+const APFloat &DoubleAPFloat::getFirst() const { return Floats[0]; }
+APFloat &DoubleAPFloat::getSecond() { return Floats[1]; }
+const APFloat &DoubleAPFloat::getSecond() const { return Floats[1]; }
+
+} // namespace detail
+
} // namespace llvm
#undef APFLOAT_DISPATCH_ON_SEMANTICS
diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index 2374cfa2dcdc..6f2f25548cc8 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -28,6 +28,7 @@ class FoldingSetNodeID;
class StringRef;
class hash_code;
class raw_ostream;
+struct Align;
template <typename T> class SmallVectorImpl;
template <typename T> class ArrayRef;
@@ -176,9 +177,6 @@ public:
/// Get the '0' value for the specified bit-width.
static APInt getZero(unsigned numBits) { return APInt(numBits, 0); }
- /// NOTE: This is soft-deprecated. Please use `getZero()` instead.
- static APInt getNullValue(unsigned numBits) { return getZero(numBits); }
-
/// Return an APInt zero bits wide.
static APInt getZeroWidth() { return getZero(0); }
@@ -215,9 +213,6 @@ public:
return APInt(numBits, WORDTYPE_MAX, true);
}
- /// NOTE: This is soft-deprecated. Please use `getAllOnes()` instead.
- static APInt getAllOnesValue(unsigned numBits) { return getAllOnes(numBits); }
-
/// Return an APInt with exactly one bit set in the result.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
APInt Res(numBits, 0);
@@ -347,7 +342,7 @@ public:
///
/// \returns true if this APInt only has the specified bit set.
bool isOneBitSet(unsigned BitNo) const {
- return (*this)[BitNo] && countPopulation() == 1;
+ return (*this)[BitNo] && popcount() == 1;
}
/// Determine if all bits are set. This is true for zero-width values.
@@ -359,9 +354,6 @@ public:
return countTrailingOnesSlowCase() == BitWidth;
}
- /// NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
- bool isAllOnesValue() const { return isAllOnes(); }
-
/// Determine if this value is zero, i.e. all bits are clear.
bool isZero() const {
if (isSingleWord())
@@ -369,9 +361,6 @@ public:
return countLeadingZerosSlowCase() == BitWidth;
}
- /// NOTE: This is soft-deprecated. Please use `isZero()` instead.
- bool isNullValue() const { return isZero(); }
-
/// Determine if this is a value of 1.
///
/// This checks to see if the value of this APInt is one.
@@ -381,9 +370,6 @@ public:
return countLeadingZerosSlowCase() == BitWidth - 1;
}
- /// NOTE: This is soft-deprecated. Please use `isOne()` instead.
- bool isOneValue() const { return isOne(); }
-
/// Determine if this is the largest unsigned value.
///
/// This checks to see if the value of this APInt is the maximum unsigned
@@ -443,11 +429,15 @@ public:
if (isNonNegative())
return false;
// NegatedPowerOf2 - shifted mask in the top bits.
- unsigned LO = countLeadingOnes();
- unsigned TZ = countTrailingZeros();
+ unsigned LO = countl_one();
+ unsigned TZ = countr_zero();
return (LO + TZ) == BitWidth;
}
+ /// Checks if this APInt -interpreted as an address- is aligned to the
+ /// provided value.
+ bool isAligned(Align A) const;
+
/// Check if the APInt's value is returned by getSignMask.
///
/// \returns true if this is the value returned by getSignMask.
@@ -500,7 +490,7 @@ public:
return isShiftedMask_64(U.VAL);
unsigned Ones = countPopulationSlowCase();
unsigned LeadZ = countLeadingZerosSlowCase();
- return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
+ return (Ones + LeadZ + countr_zero()) == BitWidth;
}
/// Return true if this APInt value contains a non-empty sequence of ones with
@@ -1003,7 +993,9 @@ public:
APInt smul_ov(const APInt &RHS, bool &Overflow) const;
APInt umul_ov(const APInt &RHS, bool &Overflow) const;
APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
+ APInt sshl_ov(unsigned Amt, bool &Overflow) const;
APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
+ APInt ushl_ov(unsigned Amt, bool &Overflow) const;
// Operations that saturate
APInt sadd_sat(const APInt &RHS) const;
@@ -1013,7 +1005,9 @@ public:
APInt smul_sat(const APInt &RHS) const;
APInt umul_sat(const APInt &RHS) const;
APInt sshl_sat(const APInt &RHS) const;
+ APInt sshl_sat(unsigned RHS) const;
APInt ushl_sat(const APInt &RHS) const;
+ APInt ushl_sat(unsigned RHS) const;
/// Array-indexing support.
///
@@ -1460,7 +1454,7 @@ public:
/// This function returns the number of active bits which is defined as the
/// bit width minus the number of leading zeros. This is used in several
/// computations to see how "wide" the value is.
- unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); }
+ unsigned getActiveBits() const { return BitWidth - countl_zero(); }
/// Compute the number of active words in the value of this APInt.
///
@@ -1483,9 +1477,6 @@ public:
return BitWidth - getNumSignBits() + 1;
}
- /// NOTE: This is soft-deprecated. Please use `getSignificantBits()` instead.
- unsigned getMinSignedBits() const { return getSignificantBits(); }
-
/// Get zero extended value
///
/// This method attempts to return the value of this APInt as a zero extended
@@ -1541,82 +1532,86 @@ public:
/// parsing the value in the string.
static unsigned getSufficientBitsNeeded(StringRef Str, uint8_t Radix);
- /// The APInt version of the countLeadingZeros functions in
- /// MathExtras.h.
+ /// The APInt version of std::countl_zero.
///
/// It counts the number of zeros from the most significant bit to the first
/// one bit.
///
/// \returns BitWidth if the value is zero, otherwise returns the number of
/// zeros from the most significant bit to the first one bits.
- unsigned countLeadingZeros() const {
+ unsigned countl_zero() const {
if (isSingleWord()) {
unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
- return llvm::countLeadingZeros(U.VAL) - unusedBits;
+ return llvm::countl_zero(U.VAL) - unusedBits;
}
return countLeadingZerosSlowCase();
}
+ unsigned countLeadingZeros() const { return countl_zero(); }
+
/// Count the number of leading one bits.
///
- /// This function is an APInt version of the countLeadingOnes
- /// functions in MathExtras.h. It counts the number of ones from the most
- /// significant bit to the first zero bit.
+ /// This function is an APInt version of std::countl_one. It counts the number
+ /// of ones from the most significant bit to the first zero bit.
///
/// \returns 0 if the high order bit is not set, otherwise returns the number
/// of 1 bits from the most significant to the least
- unsigned countLeadingOnes() const {
+ unsigned countl_one() const {
if (isSingleWord()) {
if (LLVM_UNLIKELY(BitWidth == 0))
return 0;
- return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
+ return llvm::countl_one(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
}
return countLeadingOnesSlowCase();
}
+ unsigned countLeadingOnes() const { return countl_one(); }
+
/// Computes the number of leading bits of this APInt that are equal to its
/// sign bit.
unsigned getNumSignBits() const {
- return isNegative() ? countLeadingOnes() : countLeadingZeros();
+ return isNegative() ? countl_one() : countl_zero();
}
/// Count the number of trailing zero bits.
///
- /// This function is an APInt version of the countTrailingZeros
- /// functions in MathExtras.h. It counts the number of zeros from the least
- /// significant bit to the first set bit.
+ /// This function is an APInt version of std::countr_zero. It counts the
+ /// number of zeros from the least significant bit to the first set bit.
///
/// \returns BitWidth if the value is zero, otherwise returns the number of
/// zeros from the least significant bit to the first one bit.
- unsigned countTrailingZeros() const {
+ unsigned countr_zero() const {
if (isSingleWord()) {
- unsigned TrailingZeros = llvm::countTrailingZeros(U.VAL);
+ unsigned TrailingZeros = llvm::countr_zero(U.VAL);
return (TrailingZeros > BitWidth ? BitWidth : TrailingZeros);
}
return countTrailingZerosSlowCase();
}
+ unsigned countTrailingZeros() const { return countr_zero(); }
+
/// Count the number of trailing one bits.
///
- /// This function is an APInt version of the countTrailingOnes
- /// functions in MathExtras.h. It counts the number of ones from the least
- /// significant bit to the first zero bit.
+ /// This function is an APInt version of std::countr_one. It counts the number
+ /// of ones from the least significant bit to the first zero bit.
///
/// \returns BitWidth if the value is all ones, otherwise returns the number
/// of ones from the least significant bit to the first zero bit.
- unsigned countTrailingOnes() const {
+ unsigned countr_one() const {
if (isSingleWord())
- return llvm::countTrailingOnes(U.VAL);
+ return llvm::countr_one(U.VAL);
return countTrailingOnesSlowCase();
}
+ unsigned countTrailingOnes() const { return countr_one(); }
+
/// Count the number of bits set.
///
- /// This function is an APInt version of the countPopulation functions
- /// in MathExtras.h. It counts the number of 1 bits in the APInt value.
+ /// This function is an APInt version of std::popcount. It counts the number
+ /// of 1 bits in the APInt value.
///
/// \returns 0 if the value is zero, otherwise returns the number of set bits.
- unsigned countPopulation() const {
+ unsigned popcount() const {
if (isSingleWord())
return llvm::popcount(U.VAL);
return countPopulationSlowCase();
@@ -1628,9 +1623,10 @@ public:
void print(raw_ostream &OS, bool isSigned) const;
/// Converts an APInt to a string and append it to Str. Str is commonly a
- /// SmallString.
+ /// SmallString. If Radix > 10, UpperCase determine the case of letter
+ /// digits.
void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
- bool formatAsCLiteral = false) const;
+ bool formatAsCLiteral = false, bool UpperCase = true) const;
/// Considers the APInt to be unsigned and converts it into a string in the
/// radix given. The radix can be 2, 8, 10 16, or 36.
@@ -1665,7 +1661,7 @@ public:
/// The conversion does not do a translation from integer to double, it just
/// re-interprets the bits as a double. Note that it is valid to do this on
/// any bit width. Exactly 64 bits will be translated.
- double bitsToDouble() const { return BitsToDouble(getWord(0)); }
+ double bitsToDouble() const { return llvm::bit_cast<double>(getWord(0)); }
/// Converts APInt bits to a float
///
@@ -1673,7 +1669,7 @@ public:
/// re-interprets the bits as a float. Note that it is valid to do this on
/// any bit width. Exactly 32 bits will be translated.
float bitsToFloat() const {
- return BitsToFloat(static_cast<uint32_t>(getWord(0)));
+ return llvm::bit_cast<float>(static_cast<uint32_t>(getWord(0)));
}
/// Converts a double to APInt bits.
@@ -1681,7 +1677,7 @@ public:
/// The conversion does not do a translation from double to integer, it just
/// re-interprets the bits of the double.
static APInt doubleToBits(double V) {
- return APInt(sizeof(double) * CHAR_BIT, DoubleToBits(V));
+ return APInt(sizeof(double) * CHAR_BIT, llvm::bit_cast<uint64_t>(V));
}
/// Converts a float to APInt bits.
@@ -1689,7 +1685,7 @@ public:
/// The conversion does not do a translation from float to integer, it just
/// re-interprets the bits of the float.
static APInt floatToBits(float V) {
- return APInt(sizeof(float) * CHAR_BIT, FloatToBits(V));
+ return APInt(sizeof(float) * CHAR_BIT, llvm::bit_cast<uint32_t>(V));
}
/// @}
diff --git a/llvm/include/llvm/ADT/AddressRanges.h b/llvm/include/llvm/ADT/AddressRanges.h
index f2052d82e7c1..415d30bbb5cf 100644
--- a/llvm/include/llvm/ADT/AddressRanges.h
+++ b/llvm/include/llvm/ADT/AddressRanges.h
@@ -28,7 +28,11 @@ public:
uint64_t start() const { return Start; }
uint64_t end() const { return End; }
uint64_t size() const { return End - Start; }
+ uint64_t empty() const { return size() == 0; }
bool contains(uint64_t Addr) const { return Start <= Addr && Addr < End; }
+ bool contains(const AddressRange &R) const {
+ return Start <= R.Start && R.End <= End;
+ }
bool intersects(const AddressRange &R) const {
return Start < R.End && R.Start < End;
}
@@ -45,101 +49,163 @@ private:
uint64_t End = 0;
};
-/// The AddressRanges class helps normalize address range collections.
-/// This class keeps a sorted vector of AddressRange objects and can perform
-/// insertions and searches efficiently. The address ranges are always sorted
-/// and never contain any invalid or empty address ranges.
-/// Intersecting([100,200), [150,300)) and adjacent([100,200), [200,300))
-/// address ranges are combined during insertion.
-class AddressRanges {
+/// The AddressRangesBase class presents the base functionality for the
+/// normalized address ranges collection. This class keeps a sorted vector
+/// of AddressRange-like objects and can perform searches efficiently.
+/// The address ranges are always sorted and never contain any invalid,
+/// empty or intersected address ranges.
+
+template <typename T> class AddressRangesBase {
protected:
- using Collection = SmallVector<AddressRange>;
+ using Collection = SmallVector<T>;
Collection Ranges;
public:
void clear() { Ranges.clear(); }
bool empty() const { return Ranges.empty(); }
- bool contains(uint64_t Addr) const { return find(Addr) != Ranges.end(); }
+ bool contains(uint64_t Addr) const {
+ return find(Addr, Addr + 1) != Ranges.end();
+ }
bool contains(AddressRange Range) const {
- return find(Range) != Ranges.end();
+ return find(Range.start(), Range.end()) != Ranges.end();
}
- std::optional<AddressRange> getRangeThatContains(uint64_t Addr) const {
- Collection::const_iterator It = find(Addr);
+ void reserve(size_t Capacity) { Ranges.reserve(Capacity); }
+ size_t size() const { return Ranges.size(); }
+
+ std::optional<T> getRangeThatContains(uint64_t Addr) const {
+ typename Collection::const_iterator It = find(Addr, Addr + 1);
if (It == Ranges.end())
return std::nullopt;
return *It;
}
- Collection::const_iterator insert(AddressRange Range);
- void reserve(size_t Capacity) { Ranges.reserve(Capacity); }
- size_t size() const { return Ranges.size(); }
- bool operator==(const AddressRanges &RHS) const {
- return Ranges == RHS.Ranges;
- }
- const AddressRange &operator[](size_t i) const {
+
+ typename Collection::const_iterator begin() const { return Ranges.begin(); }
+ typename Collection::const_iterator end() const { return Ranges.end(); }
+
+ const T &operator[](size_t i) const {
assert(i < Ranges.size());
return Ranges[i];
}
- Collection::const_iterator begin() const { return Ranges.begin(); }
- Collection::const_iterator end() const { return Ranges.end(); }
+
+ bool operator==(const AddressRangesBase<T> &RHS) const {
+ return Ranges == RHS.Ranges;
+ }
protected:
- Collection::const_iterator find(uint64_t Addr) const;
- Collection::const_iterator find(AddressRange Range) const;
+ typename Collection::const_iterator find(uint64_t Start, uint64_t End) const {
+ if (Start >= End)
+ return Ranges.end();
+
+ auto It =
+ std::partition_point(Ranges.begin(), Ranges.end(), [=](const T &R) {
+ return AddressRange(R).start() <= Start;
+ });
+
+ if (It == Ranges.begin())
+ return Ranges.end();
+
+ --It;
+ if (End > AddressRange(*It).end())
+ return Ranges.end();
+
+ return It;
+ }
};
-/// AddressRangesMap class maps values to the address ranges.
-/// It keeps address ranges and corresponding values. If ranges
-/// are combined during insertion, then combined range keeps
-/// newly inserted value.
-template <typename T> class AddressRangesMap : protected AddressRanges {
+/// The AddressRanges class helps normalize address range collections.
+/// This class keeps a sorted vector of AddressRange objects and can perform
+/// insertions and searches efficiently. Intersecting([100,200), [150,300))
+/// and adjacent([100,200), [200,300)) address ranges are combined during
+/// insertion.
+class AddressRanges : public AddressRangesBase<AddressRange> {
public:
- void clear() {
- Ranges.clear();
- Values.clear();
+ Collection::const_iterator insert(AddressRange Range) {
+ if (Range.empty())
+ return Ranges.end();
+
+ auto It = llvm::upper_bound(Ranges, Range);
+ auto It2 = It;
+ while (It2 != Ranges.end() && It2->start() <= Range.end())
+ ++It2;
+ if (It != It2) {
+ Range = {Range.start(), std::max(Range.end(), std::prev(It2)->end())};
+ It = Ranges.erase(It, It2);
+ }
+ if (It != Ranges.begin() && Range.start() <= std::prev(It)->end()) {
+ --It;
+ *It = {It->start(), std::max(It->end(), Range.end())};
+ return It;
+ }
+
+ return Ranges.insert(It, Range);
}
- bool empty() const { return AddressRanges::empty(); }
- bool contains(uint64_t Addr) const { return AddressRanges::contains(Addr); }
- bool contains(AddressRange Range) const {
- return AddressRanges::contains(Range);
- }
- void insert(AddressRange Range, T Value) {
- size_t InputSize = Ranges.size();
- Collection::const_iterator RangesIt = AddressRanges::insert(Range);
- if (RangesIt == Ranges.end())
- return;
+};
- // make Values match to Ranges.
- size_t Idx = RangesIt - Ranges.begin();
- typename ValuesCollection::iterator ValuesIt = Values.begin() + Idx;
- if (InputSize < Ranges.size())
- Values.insert(ValuesIt, T());
- else if (InputSize > Ranges.size())
- Values.erase(ValuesIt, ValuesIt + InputSize - Ranges.size());
- assert(Ranges.size() == Values.size());
-
- // set value to the inserted or combined range.
- Values[Idx] = Value;
- }
- size_t size() const {
- assert(Ranges.size() == Values.size());
- return AddressRanges::size();
- }
- std::optional<std::pair<AddressRange, T>>
- getRangeValueThatContains(uint64_t Addr) const {
- Collection::const_iterator It = find(Addr);
- if (It == Ranges.end())
- return std::nullopt;
+class AddressRangeValuePair {
+public:
+ operator AddressRange() const { return Range; }
- return std::make_pair(*It, Values[It - Ranges.begin()]);
- }
- std::pair<AddressRange, T> operator[](size_t Idx) const {
- return std::make_pair(Ranges[Idx], Values[Idx]);
- }
+ AddressRange Range;
+ int64_t Value = 0;
+};
-protected:
- using ValuesCollection = SmallVector<T>;
- ValuesCollection Values;
+inline bool operator==(const AddressRangeValuePair &LHS,
+ const AddressRangeValuePair &RHS) {
+ return LHS.Range == RHS.Range && LHS.Value == RHS.Value;
+}
+
+/// AddressRangesMap class maps values to the address ranges.
+/// It keeps normalized address ranges and corresponding values.
+/// This class keeps a sorted vector of AddressRangeValuePair objects
+/// and can perform insertions and searches efficiently.
+/// Intersecting([100,200), [150,300)) ranges splitted into non-conflicting
+/// parts([100,200), [200,300)). Adjacent([100,200), [200,300)) address
+/// ranges are not combined during insertion.
+class AddressRangesMap : public AddressRangesBase<AddressRangeValuePair> {
+public:
+ void insert(AddressRange Range, int64_t Value) {
+ if (Range.empty())
+ return;
+
+ // Search for range which is less than or equal incoming Range.
+ auto It = std::partition_point(Ranges.begin(), Ranges.end(),
+ [=](const AddressRangeValuePair &R) {
+ return R.Range.start() <= Range.start();
+ });
+
+ if (It != Ranges.begin())
+ It--;
+
+ while (!Range.empty()) {
+ // Inserted range does not overlap with any range.
+ // Store it into the Ranges collection.
+ if (It == Ranges.end() || Range.end() <= It->Range.start()) {
+ Ranges.insert(It, {Range, Value});
+ return;
+ }
+
+ // Inserted range partially overlaps with current range.
+ // Store not overlapped part of inserted range.
+ if (Range.start() < It->Range.start()) {
+ It = Ranges.insert(It, {{Range.start(), It->Range.start()}, Value});
+ It++;
+ Range = {It->Range.start(), Range.end()};
+ continue;
+ }
+
+ // Inserted range fully overlaps with current range.
+ if (Range.end() <= It->Range.end())
+ return;
+
+ // Inserted range partially overlaps with current range.
+ // Remove overlapped part from the inserted range.
+ if (Range.start() < It->Range.end())
+ Range = {It->Range.end(), Range.end()};
+
+ It++;
+ }
+ }
};
} // namespace llvm
diff --git a/llvm/include/llvm/ADT/Any.h b/llvm/include/llvm/ADT/Any.h
index acb7101a5145..7486491914ef 100644
--- a/llvm/include/llvm/ADT/Any.h
+++ b/llvm/include/llvm/ADT/Any.h
@@ -124,7 +124,16 @@ private:
std::unique_ptr<StorageBase> Storage;
};
-template <typename T> char Any::TypeId<T>::Id = 0;
+// Define the type id and initialize with a non-zero value.
+// Initializing with a zero value means the variable can end up in either the
+// .data or the .bss section. This can lead to multiple definition linker errors
+// when some object files are compiled with a compiler that puts the variable
+// into .data but they are linked to object files from a different compiler that
+// put the variable into .bss. To prevent this issue from happening, initialize
+// the variable with a non-zero value, which forces it to land in .data (because
+// .bss is zero-initialized).
+// See also https://github.com/llvm/llvm-project/issues/62270
+template <typename T> char Any::TypeId<T>::Id = 1;
template <typename T>
LLVM_DEPRECATED("Use any_cast(Any*) != nullptr instead", "any_cast")
diff --git a/llvm/include/llvm/ADT/ArrayRef.h b/llvm/include/llvm/ADT/ArrayRef.h
index a25cf1cf817e..713f463f65ed 100644
--- a/llvm/include/llvm/ADT/ArrayRef.h
+++ b/llvm/include/llvm/ADT/ArrayRef.h
@@ -79,7 +79,9 @@ namespace llvm {
/// Construct an ArrayRef from a range.
constexpr ArrayRef(const T *begin, const T *end)
- : Data(begin), Length(end - begin) {}
+ : Data(begin), Length(end - begin) {
+ assert(begin <= end);
+ }
/// Construct an ArrayRef from a SmallVector. This is templated in order to
/// avoid instantiating SmallVectorTemplateCommon<T> whenever we
diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h
index da7f9d2f3ae8..a8847a326753 100644
--- a/llvm/include/llvm/ADT/BitVector.h
+++ b/llvm/include/llvm/ADT/BitVector.h
@@ -24,6 +24,7 @@
#include <cstdint>
#include <cstdlib>
#include <cstring>
+#include <iterator>
#include <utility>
namespace llvm {
@@ -40,6 +41,12 @@ template <typename BitVectorT> class const_set_bits_iterator_impl {
}
public:
+ using iterator_category = std::forward_iterator_tag;
+ using difference_type = void;
+ using value_type = int;
+ using pointer = value_type*;
+ using reference = value_type&;
+
const_set_bits_iterator_impl(const BitVectorT &Parent, int Current)
: Parent(Parent), Current(Current) {}
explicit const_set_bits_iterator_impl(const BitVectorT &Parent)
@@ -213,7 +220,7 @@ public:
Copy &= maskTrailingOnes<BitWord>(LastBit + 1);
}
if (Copy != 0)
- return i * BITWORD_SIZE + countTrailingZeros(Copy);
+ return i * BITWORD_SIZE + llvm::countr_zero(Copy);
}
return -1;
}
@@ -243,7 +250,7 @@ public:
}
if (Copy != 0)
- return (CurrentWord + 1) * BITWORD_SIZE - countLeadingZeros(Copy) - 1;
+ return (CurrentWord + 1) * BITWORD_SIZE - llvm::countl_zero(Copy) - 1;
}
return -1;
@@ -281,7 +288,7 @@ public:
if (Copy != ~BitWord(0)) {
unsigned Result =
- (CurrentWord + 1) * BITWORD_SIZE - countLeadingOnes(Copy) - 1;
+ (CurrentWord + 1) * BITWORD_SIZE - llvm::countl_one(Copy) - 1;
return Result < Size ? Result : -1;
}
}
@@ -763,7 +770,7 @@ private:
}
int next_unset_in_word(int WordIndex, BitWord Word) const {
- unsigned Result = WordIndex * BITWORD_SIZE + countTrailingOnes(Word);
+ unsigned Result = WordIndex * BITWORD_SIZE + llvm::countr_one(Word);
return Result < size() ? Result : -1;
}
diff --git a/llvm/include/llvm/ADT/BitmaskEnum.h b/llvm/include/llvm/ADT/BitmaskEnum.h
index 205da1240d44..976fddde725f 100644
--- a/llvm/include/llvm/ADT/BitmaskEnum.h
+++ b/llvm/include/llvm/ADT/BitmaskEnum.h
@@ -41,6 +41,33 @@
#define LLVM_MARK_AS_BITMASK_ENUM(LargestValue) \
LLVM_BITMASK_LARGEST_ENUMERATOR = LargestValue
+/// LLVM_DECLARE_ENUM_AS_BITMASK can be used to declare an enum type as a bit
+/// set, so that bitwise operation on such enum does not require static_cast.
+///
+/// \code
+/// enum MyEnum { E1 = 1, E2 = 2, E3 = 4, E4 = 8 };
+/// LLVM_DECLARE_ENUM_AS_BITMASK(MyEnum, E4);
+///
+/// void Foo() {
+/// MyEnum A = (E1 | E2) & E3 ^ ~E4; // No static_cast
+/// }
+/// \endcode
+///
+/// The second parameter to LLVM_DECLARE_ENUM_AS_BITMASK specifies the largest
+/// bit value of the enum type.
+///
+/// LLVM_DECLARE_ENUM_AS_BITMASK should be used in llvm namespace.
+///
+/// This a non-intrusive alternative for LLVM_MARK_AS_BITMASK_ENUM. It allows
+/// declaring more than one non-scoped enumerations as bitmask types in the same
+/// scope. Otherwise it provides the same functionality as
+/// LLVM_MARK_AS_BITMASK_ENUM.
+#define LLVM_DECLARE_ENUM_AS_BITMASK(Enum, LargestValue) \
+ template <> struct is_bitmask_enum<Enum> : std::true_type {}; \
+ template <> struct largest_bitmask_enum_bit<Enum> { \
+ static constexpr std::underlying_type_t<Enum> value = LargestValue; \
+ }
+
/// LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE() pulls the operator overloads used
/// by LLVM_MARK_AS_BITMASK_ENUM into the current namespace.
///
@@ -73,6 +100,18 @@ template <typename E>
struct is_bitmask_enum<
E, std::enable_if_t<sizeof(E::LLVM_BITMASK_LARGEST_ENUMERATOR) >= 0>>
: std::true_type {};
+
+/// Trait class to determine bitmask enumeration largest bit.
+template <typename E, typename Enable = void> struct largest_bitmask_enum_bit;
+
+template <typename E>
+struct largest_bitmask_enum_bit<
+ E, std::enable_if_t<sizeof(E::LLVM_BITMASK_LARGEST_ENUMERATOR) >= 0>> {
+ using UnderlyingTy = std::underlying_type_t<E>;
+ static constexpr UnderlyingTy value =
+ static_cast<UnderlyingTy>(E::LLVM_BITMASK_LARGEST_ENUMERATOR);
+};
+
namespace BitmaskEnumDetail {
/// Get a bitmask with 1s in all places up to the high-order bit of E's largest
@@ -80,9 +119,7 @@ namespace BitmaskEnumDetail {
template <typename E> constexpr std::underlying_type_t<E> Mask() {
// On overflow, NextPowerOf2 returns zero with the type uint64_t, so
// subtracting 1 gives us the mask with all bits set, like we want.
- return NextPowerOf2(static_cast<std::underlying_type_t<E>>(
- E::LLVM_BITMASK_LARGEST_ENUMERATOR)) -
- 1;
+ return NextPowerOf2(largest_bitmask_enum_bit<E>::value) - 1;
}
/// Check that Val is in range for E, and return Val cast to E's underlying
diff --git a/llvm/include/llvm/ADT/BreadthFirstIterator.h b/llvm/include/llvm/ADT/BreadthFirstIterator.h
index c1a236bf5692..29e96693c4d1 100644
--- a/llvm/include/llvm/ADT/BreadthFirstIterator.h
+++ b/llvm/include/llvm/ADT/BreadthFirstIterator.h
@@ -50,7 +50,7 @@ public:
using value_type = typename GT::NodeRef;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
- using reference = value_type &;
+ using reference = const value_type &;
private:
using NodeRef = typename GT::NodeRef;
@@ -123,7 +123,7 @@ public:
bool operator!=(const bf_iterator &RHS) const { return !(*this == RHS); }
- const NodeRef &operator*() const { return VisitQueue.front()->first; }
+ reference operator*() const { return VisitQueue.front()->first; }
// This is a nonstandard operator-> that dereferences the pointer an extra
// time so that you can actually call methods on the node, because the
diff --git a/llvm/include/llvm/ADT/ConcurrentHashtable.h b/llvm/include/llvm/ADT/ConcurrentHashtable.h
new file mode 100644
index 000000000000..37c8af36bc73
--- /dev/null
+++ b/llvm/include/llvm/ADT/ConcurrentHashtable.h
@@ -0,0 +1,401 @@
+//===- ConcurrentHashtable.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_CONCURRENTHASHTABLE_H
+#define LLVM_ADT_CONCURRENTHASHTABLE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Parallel.h"
+#include "llvm/Support/WithColor.h"
+#include "llvm/Support/xxhash.h"
+#include <atomic>
+#include <cstddef>
+#include <iomanip>
+#include <mutex>
+#include <sstream>
+#include <type_traits>
+
+namespace llvm {
+
+/// ConcurrentHashTable - is a resizeable concurrent hashtable.
+/// The number of resizings limited up to x2^31. This hashtable is
+/// useful to have efficient access to aggregate data(like strings,
+/// type descriptors...) and to keep only single copy of such
+/// an aggregate. The hashtable allows only concurrent insertions:
+///
+/// KeyDataTy* = insert ( const KeyTy& );
+///
+/// Data structure:
+///
+/// Inserted value KeyTy is mapped to 64-bit hash value ->
+///
+/// [------- 64-bit Hash value --------]
+/// [ StartEntryIndex ][ Bucket Index ]
+/// | |
+/// points to the points to
+/// first probe the bucket.
+/// position inside
+/// bucket entries
+///
+/// After initialization, all buckets have an initial size. During insertions,
+/// buckets might be extended to contain more entries. Each bucket can be
+/// independently resized and rehashed(no need to lock the whole table).
+/// Different buckets may have different sizes. If the single bucket is full
+/// then the bucket is resized.
+///
+/// BucketsArray keeps all buckets. Each bucket keeps an array of Entries
+/// (pointers to KeyDataTy) and another array of entries hashes:
+///
+/// BucketsArray[BucketIdx].Hashes[EntryIdx]:
+/// BucketsArray[BucketIdx].Entries[EntryIdx]:
+///
+/// [Bucket 0].Hashes -> [uint32_t][uint32_t]
+/// [Bucket 0].Entries -> [KeyDataTy*][KeyDataTy*]
+///
+/// [Bucket 1].Hashes -> [uint32_t][uint32_t][uint32_t][uint32_t]
+/// [Bucket 1].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*][KeyDataTy*]
+/// .........................
+/// [Bucket N].Hashes -> [uint32_t][uint32_t][uint32_t]
+/// [Bucket N].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*]
+///
+/// ConcurrentHashTableByPtr uses an external thread-safe allocator to allocate
+/// KeyDataTy items.
+
+template <typename KeyTy, typename KeyDataTy, typename AllocatorTy>
+class ConcurrentHashTableInfoByPtr {
+public:
+ /// \returns Hash value for the specified \p Key.
+ static inline uint64_t getHashValue(const KeyTy &Key) {
+ return xxh3_64bits(Key);
+ }
+
+ /// \returns true if both \p LHS and \p RHS are equal.
+ static inline bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
+ return LHS == RHS;
+ }
+
+ /// \returns key for the specified \p KeyData.
+ static inline const KeyTy &getKey(const KeyDataTy &KeyData) {
+ return KeyData.getKey();
+ }
+
+ /// \returns newly created object of KeyDataTy type.
+ static inline KeyDataTy *create(const KeyTy &Key, AllocatorTy &Allocator) {
+ return KeyDataTy::create(Key, Allocator);
+ }
+};
+
+template <typename KeyTy, typename KeyDataTy, typename AllocatorTy,
+ typename Info =
+ ConcurrentHashTableInfoByPtr<KeyTy, KeyDataTy, AllocatorTy>>
+class ConcurrentHashTableByPtr {
+public:
+ ConcurrentHashTableByPtr(
+ AllocatorTy &Allocator, uint64_t EstimatedSize = 100000,
+ size_t ThreadsNum = parallel::strategy.compute_thread_count(),
+ size_t InitialNumberOfBuckets = 128)
+ : MultiThreadAllocator(Allocator) {
+ assert((ThreadsNum > 0) && "ThreadsNum must be greater than 0");
+ assert((InitialNumberOfBuckets > 0) &&
+ "InitialNumberOfBuckets must be greater than 0");
+
+ // Calculate number of buckets.
+ uint64_t EstimatedNumberOfBuckets = ThreadsNum;
+ if (ThreadsNum > 1) {
+ EstimatedNumberOfBuckets *= InitialNumberOfBuckets;
+ EstimatedNumberOfBuckets *= std::max(
+ 1,
+ countr_zero(PowerOf2Ceil(EstimatedSize / InitialNumberOfBuckets)) >>
+ 2);
+ }
+ EstimatedNumberOfBuckets = PowerOf2Ceil(EstimatedNumberOfBuckets);
+ NumberOfBuckets =
+ std::min(EstimatedNumberOfBuckets, (uint64_t)(1Ull << 31));
+
+ // Allocate buckets.
+ BucketsArray = std::make_unique<Bucket[]>(NumberOfBuckets);
+
+ InitialBucketSize = EstimatedSize / NumberOfBuckets;
+ InitialBucketSize = std::max((uint32_t)1, InitialBucketSize);
+ InitialBucketSize = PowerOf2Ceil(InitialBucketSize);
+
+ // Initialize each bucket.
+ for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
+ HashesPtr Hashes = new ExtHashBitsTy[InitialBucketSize];
+ memset(Hashes, 0, sizeof(ExtHashBitsTy) * InitialBucketSize);
+
+ DataPtr Entries = new EntryDataTy[InitialBucketSize];
+ memset(Entries, 0, sizeof(EntryDataTy) * InitialBucketSize);
+
+ BucketsArray[Idx].Size = InitialBucketSize;
+ BucketsArray[Idx].Hashes = Hashes;
+ BucketsArray[Idx].Entries = Entries;
+ }
+
+ // Calculate masks.
+ HashMask = NumberOfBuckets - 1;
+
+ size_t LeadingZerosNumber = countl_zero(HashMask);
+ HashBitsNum = 64 - LeadingZerosNumber;
+
+ // We keep only high 32-bits of hash value. So bucket size cannot
+ // exceed 2^31. Bucket size is always power of two.
+ MaxBucketSize = 1Ull << (std::min((size_t)31, LeadingZerosNumber));
+
+ // Calculate mask for extended hash bits.
+ ExtHashMask = (NumberOfBuckets * MaxBucketSize) - 1;
+ }
+
+ virtual ~ConcurrentHashTableByPtr() {
+ // Deallocate buckets.
+ for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
+ delete[] BucketsArray[Idx].Hashes;
+ delete[] BucketsArray[Idx].Entries;
+ }
+ }
+
+ /// Insert new value \p NewValue or return already existing entry.
+ ///
+ /// \returns entry and "true" if an entry is just inserted or
+ /// "false" if an entry already exists.
+ std::pair<KeyDataTy *, bool> insert(const KeyTy &NewValue) {
+ // Calculate bucket index.
+ uint64_t Hash = Info::getHashValue(NewValue);
+ Bucket &CurBucket = BucketsArray[getBucketIdx(Hash)];
+ uint32_t ExtHashBits = getExtHashBits(Hash);
+
+#if LLVM_ENABLE_THREADS
+ // Lock bucket.
+ CurBucket.Guard.lock();
+#endif
+
+ HashesPtr BucketHashes = CurBucket.Hashes;
+ DataPtr BucketEntries = CurBucket.Entries;
+ uint32_t CurEntryIdx = getStartIdx(ExtHashBits, CurBucket.Size);
+
+ while (true) {
+ uint32_t CurEntryHashBits = BucketHashes[CurEntryIdx];
+
+ if (CurEntryHashBits == 0 && BucketEntries[CurEntryIdx] == nullptr) {
+ // Found empty slot. Insert data.
+ KeyDataTy *NewData = Info::create(NewValue, MultiThreadAllocator);
+ BucketEntries[CurEntryIdx] = NewData;
+ BucketHashes[CurEntryIdx] = ExtHashBits;
+
+ CurBucket.NumberOfEntries++;
+ RehashBucket(CurBucket);
+
+#if LLVM_ENABLE_THREADS
+ CurBucket.Guard.unlock();
+#endif
+
+ return {NewData, true};
+ }
+
+ if (CurEntryHashBits == ExtHashBits) {
+ // Hash matched. Check value for equality.
+ KeyDataTy *EntryData = BucketEntries[CurEntryIdx];
+ if (Info::isEqual(Info::getKey(*EntryData), NewValue)) {
+ // Already existed entry matched with inserted data is found.
+#if LLVM_ENABLE_THREADS
+ CurBucket.Guard.unlock();
+#endif
+
+ return {EntryData, false};
+ }
+ }
+
+ CurEntryIdx++;
+ CurEntryIdx &= (CurBucket.Size - 1);
+ }
+
+ llvm_unreachable("Insertion error.");
+ return {};
+ }
+
+ /// Print information about current state of hash table structures.
+ void printStatistic(raw_ostream &OS) {
+ OS << "\n--- HashTable statistic:\n";
+ OS << "\nNumber of buckets = " << NumberOfBuckets;
+ OS << "\nInitial bucket size = " << InitialBucketSize;
+
+ uint64_t NumberOfNonEmptyBuckets = 0;
+ uint64_t NumberOfEntriesPlusEmpty = 0;
+ uint64_t OverallNumberOfEntries = 0;
+ uint64_t OverallSize = sizeof(*this) + NumberOfBuckets * sizeof(Bucket);
+
+ DenseMap<uint32_t, uint32_t> BucketSizesMap;
+
+ // For each bucket...
+ for (uint32_t Idx = 0; Idx < NumberOfBuckets; Idx++) {
+ Bucket &CurBucket = BucketsArray[Idx];
+
+ BucketSizesMap[CurBucket.Size]++;
+
+ if (CurBucket.NumberOfEntries != 0)
+ NumberOfNonEmptyBuckets++;
+ NumberOfEntriesPlusEmpty += CurBucket.Size;
+ OverallNumberOfEntries += CurBucket.NumberOfEntries;
+ OverallSize +=
+ (sizeof(ExtHashBitsTy) + sizeof(EntryDataTy)) * CurBucket.Size;
+ }
+
+ OS << "\nOverall number of entries = " << OverallNumberOfEntries;
+ OS << "\nOverall number of non empty buckets = " << NumberOfNonEmptyBuckets;
+ for (auto &BucketSize : BucketSizesMap)
+ OS << "\n Number of buckets with size " << BucketSize.first << ": "
+ << BucketSize.second;
+
+ std::stringstream stream;
+ stream << std::fixed << std::setprecision(2)
+ << ((float)OverallNumberOfEntries / (float)NumberOfEntriesPlusEmpty);
+ std::string str = stream.str();
+
+ OS << "\nLoad factor = " << str;
+ OS << "\nOverall allocated size = " << OverallSize;
+ }
+
+protected:
+ using ExtHashBitsTy = uint32_t;
+ using EntryDataTy = KeyDataTy *;
+
+ using HashesPtr = ExtHashBitsTy *;
+ using DataPtr = EntryDataTy *;
+
+ // Bucket structure. Keeps bucket data.
+ struct Bucket {
+ Bucket() = default;
+
+ // Size of bucket.
+ uint32_t Size = 0;
+
+ // Number of non-null entries.
+ uint32_t NumberOfEntries = 0;
+
+ // Hashes for [Size] entries.
+ HashesPtr Hashes = nullptr;
+
+ // [Size] entries.
+ DataPtr Entries = nullptr;
+
+#if LLVM_ENABLE_THREADS
+ // Mutex for this bucket.
+ std::mutex Guard;
+#endif
+ };
+
+ // Reallocate and rehash bucket if this is full enough.
+ void RehashBucket(Bucket &CurBucket) {
+ assert((CurBucket.Size > 0) && "Uninitialised bucket");
+ if (CurBucket.NumberOfEntries < CurBucket.Size * 0.9)
+ return;
+
+ if (CurBucket.Size >= MaxBucketSize)
+ report_fatal_error("ConcurrentHashTable is full");
+
+ uint32_t NewBucketSize = CurBucket.Size << 1;
+ assert((NewBucketSize <= MaxBucketSize) && "New bucket size is too big");
+ assert((CurBucket.Size < NewBucketSize) &&
+ "New bucket size less than size of current bucket");
+
+ // Store old entries & hashes arrays.
+ HashesPtr SrcHashes = CurBucket.Hashes;
+ DataPtr SrcEntries = CurBucket.Entries;
+
+ // Allocate new entries&hashes arrays.
+ HashesPtr DestHashes = new ExtHashBitsTy[NewBucketSize];
+ memset(DestHashes, 0, sizeof(ExtHashBitsTy) * NewBucketSize);
+
+ DataPtr DestEntries = new EntryDataTy[NewBucketSize];
+ memset(DestEntries, 0, sizeof(EntryDataTy) * NewBucketSize);
+
+ // For each entry in source arrays...
+ for (uint32_t CurSrcEntryIdx = 0; CurSrcEntryIdx < CurBucket.Size;
+ CurSrcEntryIdx++) {
+ uint32_t CurSrcEntryHashBits = SrcHashes[CurSrcEntryIdx];
+
+ // Check for null entry.
+ if (CurSrcEntryHashBits == 0 && SrcEntries[CurSrcEntryIdx] == nullptr)
+ continue;
+
+ uint32_t StartDestIdx = getStartIdx(CurSrcEntryHashBits, NewBucketSize);
+
+ // Insert non-null entry into the new arrays.
+ while (true) {
+ uint32_t CurDestEntryHashBits = DestHashes[StartDestIdx];
+
+ if (CurDestEntryHashBits == 0 && DestEntries[StartDestIdx] == nullptr) {
+ // Found empty slot. Insert data.
+ DestHashes[StartDestIdx] = CurSrcEntryHashBits;
+ DestEntries[StartDestIdx] = SrcEntries[CurSrcEntryIdx];
+ break;
+ }
+
+ StartDestIdx++;
+ StartDestIdx = StartDestIdx & (NewBucketSize - 1);
+ }
+ }
+
+ // Update bucket fields.
+ CurBucket.Hashes = DestHashes;
+ CurBucket.Entries = DestEntries;
+ CurBucket.Size = NewBucketSize;
+
+ // Delete old bucket entries.
+ if (SrcHashes != nullptr)
+ delete[] SrcHashes;
+ if (SrcEntries != nullptr)
+ delete[] SrcEntries;
+ }
+
+ uint32_t getBucketIdx(hash_code Hash) { return Hash & HashMask; }
+
+ uint32_t getExtHashBits(uint64_t Hash) {
+ return (Hash & ExtHashMask) >> HashBitsNum;
+ }
+
+ uint32_t getStartIdx(uint32_t ExtHashBits, uint32_t BucketSize) {
+ assert((BucketSize > 0) && "Empty bucket");
+
+ return ExtHashBits & (BucketSize - 1);
+ }
+
+ // Number of bits in hash mask.
+ uint64_t HashBitsNum = 0;
+
+ // Hash mask.
+ uint64_t HashMask = 0;
+
+ // Hash mask for the extended hash bits.
+ uint64_t ExtHashMask = 0;
+
+ // The maximal bucket size.
+ uint32_t MaxBucketSize = 0;
+
+ // Initial size of bucket.
+ uint32_t InitialBucketSize = 0;
+
+ // The number of buckets.
+ uint32_t NumberOfBuckets = 0;
+
+ // Array of buckets.
+ std::unique_ptr<Bucket[]> BucketsArray;
+
+ // Used for allocating KeyDataTy values.
+ AllocatorTy &MultiThreadAllocator;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_CONCURRENTHASHTABLE_H
diff --git a/llvm/include/llvm/ADT/DenseMap.h b/llvm/include/llvm/ADT/DenseMap.h
index 7adc6710cfa8..3ef6a7cd1b4b 100644
--- a/llvm/include/llvm/ADT/DenseMap.h
+++ b/llvm/include/llvm/ADT/DenseMap.h
@@ -141,10 +141,15 @@ public:
setNumTombstones(0);
}
+ /// Return true if the specified key is in the map, false otherwise.
+ bool contains(const_arg_type_t<KeyT> Val) const {
+ const BucketT *TheBucket;
+ return LookupBucketFor(Val, TheBucket);
+ }
+
/// Return 1 if the specified key is in the map, 0 otherwise.
size_type count(const_arg_type_t<KeyT> Val) const {
- const BucketT *TheBucket;
- return LookupBucketFor(Val, TheBucket) ? 1 : 0;
+ return contains(Val) ? 1 : 0;
}
iterator find(const_arg_type_t<KeyT> Val) {
@@ -201,6 +206,14 @@ public:
return ValueT();
}
+ /// at - Return the entry for the specified key, or abort if no such
+ /// entry exists.
+ const ValueT &at(const_arg_type_t<KeyT> Val) const {
+ auto Iter = this->find(std::move(Val));
+ assert(Iter != this->end() && "DenseMap::at failed due to a missing key");
+ return Iter->second;
+ }
+
// Inserts key,value pair into the map if the key isn't already in the map.
// If the key is already in the map, it returns false and doesn't update the
// value.
@@ -299,6 +312,20 @@ public:
insert(*I);
}
+ /// Returns the value associated to the key in the map if it exists. If it
+ /// does not exist, emplace a default value for the key and returns a
+ /// reference to the newly created value.
+ ValueT &getOrInsertDefault(KeyT &&Key) {
+ return try_emplace(Key).first->second;
+ }
+
+ /// Returns the value associated to the key in the map if it exists. If it
+ /// does not exist, emplace a default value for the key and returns a
+ /// reference to the newly created value.
+ ValueT &getOrInsertDefault(const KeyT &Key) {
+ return try_emplace(Key).first->second;
+ }
+
bool erase(const KeyT &Val) {
BucketT *TheBucket;
if (!LookupBucketFor(Val, TheBucket))
@@ -906,7 +933,7 @@ class SmallDenseMap
public:
explicit SmallDenseMap(unsigned NumInitBuckets = 0) {
if (NumInitBuckets > InlineBuckets)
- NumInitBuckets = NextPowerOf2(NumInitBuckets - 1);
+ NumInitBuckets = llvm::bit_ceil(NumInitBuckets);
init(NumInitBuckets);
}
diff --git a/llvm/include/llvm/ADT/DenseMapInfo.h b/llvm/include/llvm/ADT/DenseMapInfo.h
index 1c00fb699cc2..5b7dce7b53c6 100644
--- a/llvm/include/llvm/ADT/DenseMapInfo.h
+++ b/llvm/include/llvm/ADT/DenseMapInfo.h
@@ -20,7 +20,6 @@
#include <tuple>
#include <type_traits>
#include <utility>
-#include <variant>
namespace llvm {
@@ -234,6 +233,14 @@ struct DenseMapInfo<std::pair<T, U>> {
SecondInfo::getHashValue(PairVal.second));
}
+ // Expose an additional function intended to be used by other
+ // specializations of DenseMapInfo without needing to know how
+ // to combine hash values manually
+ static unsigned getHashValuePiecewise(const T &First, const U &Second) {
+ return detail::combineHashValue(FirstInfo::getHashValue(First),
+ SecondInfo::getHashValue(Second));
+ }
+
static bool isEqual(const Pair &LHS, const Pair &RHS) {
return FirstInfo::isEqual(LHS.first, RHS.first) &&
SecondInfo::isEqual(LHS.second, RHS.second);
@@ -290,37 +297,6 @@ template <typename... Ts> struct DenseMapInfo<std::tuple<Ts...>> {
}
};
-// Provide DenseMapInfo for variants whose all alternatives have DenseMapInfo.
-template <typename... Ts> struct DenseMapInfo<std::variant<Ts...>> {
- using Variant = std::variant<Ts...>;
- using FirstT = std::variant_alternative_t<0, Variant>;
-
- static inline Variant getEmptyKey() {
- return Variant(std::in_place_index<0>, DenseMapInfo<FirstT>::getEmptyKey());
- }
-
- static inline Variant getTombstoneKey() {
- return Variant(std::in_place_index<0>,
- DenseMapInfo<FirstT>::getTombstoneKey());
- }
-
- static unsigned getHashValue(const Variant &Val) {
- return std::visit(
- [&Val](auto &&Alternative) {
- using T = std::decay_t<decltype(Alternative)>;
- // Include index in hash to make sure same value as different
- // alternatives don't collide.
- return detail::combineHashValue(
- DenseMapInfo<size_t>::getHashValue(Val.index()),
- DenseMapInfo<T>::getHashValue(Alternative));
- },
- Val);
- }
-
- static bool isEqual(const Variant &LHS, const Variant &RHS) {
- return LHS == RHS;
- }
-};
} // end namespace llvm
#endif // LLVM_ADT_DENSEMAPINFO_H
diff --git a/llvm/include/llvm/ADT/DenseMapInfoVariant.h b/llvm/include/llvm/ADT/DenseMapInfoVariant.h
new file mode 100644
index 000000000000..a97f9b9566c8
--- /dev/null
+++ b/llvm/include/llvm/ADT/DenseMapInfoVariant.h
@@ -0,0 +1,71 @@
+//===- DenseMapInfoVariant.h - Type traits for DenseMap<variant> *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines DenseMapInfo traits for DenseMap<std::variant<Ts...>>.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_DENSEMAPINFOVARIANT_H
+#define LLVM_ADT_DENSEMAPINFOVARIANT_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include <utility>
+#include <variant>
+
+namespace llvm {
+
+// Provide DenseMapInfo for variants whose all alternatives have DenseMapInfo.
+template <typename... Ts> struct DenseMapInfo<std::variant<Ts...>> {
+ using Variant = std::variant<Ts...>;
+ using FirstT = std::variant_alternative_t<0, Variant>;
+
+ static inline Variant getEmptyKey() {
+ return Variant(std::in_place_index<0>, DenseMapInfo<FirstT>::getEmptyKey());
+ }
+
+ static inline Variant getTombstoneKey() {
+ return Variant(std::in_place_index<0>,
+ DenseMapInfo<FirstT>::getTombstoneKey());
+ }
+
+ static unsigned getHashValue(const Variant &Val) {
+ return std::visit(
+ [&Val](auto &&Alternative) {
+ using T = std::decay_t<decltype(Alternative)>;
+ // Include index in hash to make sure same value as different
+ // alternatives don't collide.
+ return DenseMapInfo<std::pair<size_t, T>>::getHashValuePiecewise(
+ Val.index(), Alternative);
+ },
+ Val);
+ }
+
+ static bool isEqual(const Variant &LHS, const Variant &RHS) {
+ if (LHS.index() != RHS.index())
+ return false;
+ if (LHS.valueless_by_exception())
+ return true;
+ // We want to dispatch to DenseMapInfo<T>::isEqual(LHS.get(I), RHS.get(I))
+ // We know the types are the same, but std::visit(V, LHS, RHS) doesn't.
+ // We erase the type held in LHS to void*, and dispatch over RHS.
+ const void *ErasedLHS =
+ std::visit([](const auto &LHS) -> const void * { return &LHS; }, LHS);
+ return std::visit(
+ [&](const auto &RHS) -> bool {
+ using T = std::remove_cv_t<std::remove_reference_t<decltype(RHS)>>;
+ return DenseMapInfo<T>::isEqual(*static_cast<const T *>(ErasedLHS),
+ RHS);
+ },
+ RHS);
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_DENSEMAPINFOVARIANT_H
diff --git a/llvm/include/llvm/ADT/DepthFirstIterator.h b/llvm/include/llvm/ADT/DepthFirstIterator.h
index 29ea2d541d9f..71053c2d0d8a 100644
--- a/llvm/include/llvm/ADT/DepthFirstIterator.h
+++ b/llvm/include/llvm/ADT/DepthFirstIterator.h
@@ -88,7 +88,7 @@ public:
using value_type = typename GT::NodeRef;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
- using reference = value_type &;
+ using reference = const value_type &;
private:
using NodeRef = typename GT::NodeRef;
@@ -165,7 +165,7 @@ public:
}
bool operator!=(const df_iterator &x) const { return !(*this == x); }
- const NodeRef &operator*() const { return VisitStack.back().first; }
+ reference operator*() const { return VisitStack.back().first; }
// This is a nonstandard operator-> that dereferences the pointer an extra
// time... so that you can actually call methods ON the Node, because
diff --git a/llvm/include/llvm/ADT/EpochTracker.h b/llvm/include/llvm/ADT/EpochTracker.h
index a639d1b5b3ec..fc41d6f2c92d 100644
--- a/llvm/include/llvm/ADT/EpochTracker.h
+++ b/llvm/include/llvm/ADT/EpochTracker.h
@@ -23,6 +23,7 @@
namespace llvm {
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+#define LLVM_DEBUGEPOCHBASE_HANDLEBASE_EMPTYBASE
/// A base class for data structure classes wishing to make iterators
/// ("handles") pointing into themselves fail-fast. When building without
@@ -78,6 +79,11 @@ public:
};
#else
+#ifdef _MSC_VER
+#define LLVM_DEBUGEPOCHBASE_HANDLEBASE_EMPTYBASE __declspec(empty_bases)
+#else
+#define LLVM_DEBUGEPOCHBASE_HANDLEBASE_EMPTYBASE
+#endif // _MSC_VER
class DebugEpochBase {
public:
diff --git a/llvm/include/llvm/ADT/FloatingPointMode.h b/llvm/include/llvm/ADT/FloatingPointMode.h
index 59ccea1f9d44..61e57094fdbb 100644
--- a/llvm/include/llvm/ADT/FloatingPointMode.h
+++ b/llvm/include/llvm/ADT/FloatingPointMode.h
@@ -15,6 +15,7 @@
#ifndef LLVM_ADT_FLOATINGPOINTMODE_H
#define LLVM_ADT_FLOATINGPOINTMODE_H
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/raw_ostream.h"
@@ -79,7 +80,10 @@ struct DenormalMode {
PreserveSign,
/// Denormals are flushed to positive zero.
- PositiveZero
+ PositiveZero,
+
+ /// Denormals have unknown treatment.
+ Dynamic
};
/// Denormal flushing mode for floating point instruction results in the
@@ -100,6 +104,11 @@ struct DenormalMode {
return DenormalMode(DenormalModeKind::Invalid, DenormalModeKind::Invalid);
}
+ /// Return the assumed default mode for a function without denormal-fp-math.
+ static constexpr DenormalMode getDefault() {
+ return getIEEE();
+ }
+
static constexpr DenormalMode getIEEE() {
return DenormalMode(DenormalModeKind::IEEE, DenormalModeKind::IEEE);
}
@@ -114,6 +123,10 @@ struct DenormalMode {
DenormalModeKind::PositiveZero);
}
+ static constexpr DenormalMode getDynamic() {
+ return DenormalMode(DenormalModeKind::Dynamic, DenormalModeKind::Dynamic);
+ }
+
bool operator==(DenormalMode Other) const {
return Output == Other.Output && Input == Other.Input;
}
@@ -131,6 +144,30 @@ struct DenormalMode {
Input != DenormalModeKind::Invalid;
}
+ /// Return true if input denormals must be implicitly treated as 0.
+ constexpr bool inputsAreZero() const {
+ return Input == DenormalModeKind::PreserveSign ||
+ Input == DenormalModeKind::PositiveZero;
+ }
+
+ /// Return true if output denormals should be flushed to 0.
+ constexpr bool outputsAreZero() const {
+ return Output == DenormalModeKind::PreserveSign ||
+ Output == DenormalModeKind::PositiveZero;
+ }
+
+ /// Get the effective denormal mode if the mode if this caller calls into a
+ /// function with \p Callee. This promotes dynamic modes to the mode of the
+ /// caller.
+ DenormalMode mergeCalleeMode(DenormalMode Callee) const {
+ DenormalMode MergedMode = Callee;
+ if (Callee.Input == DenormalMode::Dynamic)
+ MergedMode.Input = Input;
+ if (Callee.Output == DenormalMode::Dynamic)
+ MergedMode.Output = Output;
+ return MergedMode;
+ }
+
inline void print(raw_ostream &OS) const;
inline std::string str() const {
@@ -151,10 +188,11 @@ inline DenormalMode::DenormalModeKind
parseDenormalFPAttributeComponent(StringRef Str) {
// Assume ieee on unspecified attribute.
return StringSwitch<DenormalMode::DenormalModeKind>(Str)
- .Cases("", "ieee", DenormalMode::IEEE)
- .Case("preserve-sign", DenormalMode::PreserveSign)
- .Case("positive-zero", DenormalMode::PositiveZero)
- .Default(DenormalMode::Invalid);
+ .Cases("", "ieee", DenormalMode::IEEE)
+ .Case("preserve-sign", DenormalMode::PreserveSign)
+ .Case("positive-zero", DenormalMode::PositiveZero)
+ .Case("dynamic", DenormalMode::Dynamic)
+ .Default(DenormalMode::Invalid);
}
/// Return the name used for the denormal handling mode used by the the
@@ -167,6 +205,8 @@ inline StringRef denormalModeKindName(DenormalMode::DenormalModeKind Mode) {
return "preserve-sign";
case DenormalMode::PositiveZero:
return "positive-zero";
+ case DenormalMode::Dynamic:
+ return "dynamic";
default:
return "";
}
@@ -192,11 +232,11 @@ void DenormalMode::print(raw_ostream &OS) const {
OS << denormalModeKindName(Output) << ',' << denormalModeKindName(Input);
}
-}
-
/// Floating-point class tests, supported by 'is_fpclass' intrinsic. Actual
/// test may be an OR combination of basic tests.
-enum FPClassTest {
+enum FPClassTest : unsigned {
+ fcNone = 0,
+
fcSNan = 0x0001,
fcQNan = 0x0002,
fcNegInf = 0x0004,
@@ -216,7 +256,23 @@ enum FPClassTest {
fcPosFinite = fcPosNormal | fcPosSubnormal | fcPosZero,
fcNegFinite = fcNegNormal | fcNegSubnormal | fcNegZero,
fcFinite = fcPosFinite | fcNegFinite,
- fcAllFlags = fcNan | fcInf | fcFinite
+ fcPositive = fcPosFinite | fcPosInf,
+ fcNegative = fcNegFinite | fcNegInf,
+
+ fcAllFlags = fcNan | fcInf | fcFinite,
};
+LLVM_DECLARE_ENUM_AS_BITMASK(FPClassTest, /* LargestValue */ fcPosInf);
+
+/// Return the test mask which returns true if the value's sign bit is flipped.
+FPClassTest fneg(FPClassTest Mask);
+
+/// Return the test mask which returns true if the value's sign bit is cleared.
+FPClassTest fabs(FPClassTest Mask);
+
+/// Write a human readable form of \p Mask to \p OS
+raw_ostream &operator<<(raw_ostream &OS, FPClassTest Mask);
+
+} // namespace llvm
+
#endif // LLVM_ADT_FLOATINGPOINTMODE_H
diff --git a/llvm/include/llvm/ADT/FunctionExtras.h b/llvm/include/llvm/ADT/FunctionExtras.h
index 8f04277cdf0e..53de2cb74253 100644
--- a/llvm/include/llvm/ADT/FunctionExtras.h
+++ b/llvm/include/llvm/ADT/FunctionExtras.h
@@ -172,16 +172,15 @@ protected:
bool isInlineStorage() const { return CallbackAndInlineFlag.getInt(); }
bool isTrivialCallback() const {
- return CallbackAndInlineFlag.getPointer().template is<TrivialCallback *>();
+ return isa<TrivialCallback *>(CallbackAndInlineFlag.getPointer());
}
CallPtrT getTrivialCallback() const {
- return CallbackAndInlineFlag.getPointer().template get<TrivialCallback *>()->CallPtr;
+ return cast<TrivialCallback *>(CallbackAndInlineFlag.getPointer())->CallPtr;
}
NonTrivialCallbacks *getNonTrivialCallbacks() const {
- return CallbackAndInlineFlag.getPointer()
- .template get<NonTrivialCallbacks *>();
+ return cast<NonTrivialCallbacks *>(CallbackAndInlineFlag.getPointer());
}
CallPtrT getCallPtr() const {
diff --git a/llvm/include/llvm/ADT/GenericCycleImpl.h b/llvm/include/llvm/ADT/GenericCycleImpl.h
index 07ac1768ea27..c9e0772c2464 100644
--- a/llvm/include/llvm/ADT/GenericCycleImpl.h
+++ b/llvm/include/llvm/ADT/GenericCycleImpl.h
@@ -15,8 +15,8 @@
///
/// This file should only be included by files that implement a
/// specialization of the relevant templates. Currently these are:
-/// - CycleAnalysis.cpp
-/// - MachineCycleAnalysis.cpp
+/// - llvm/lib/IR/CycleInfo.cpp
+/// - llvm/lib/CodeGen/MachineCycleAnalysis.cpp
///
//===----------------------------------------------------------------------===//
@@ -177,8 +177,7 @@ void GenericCycleInfo<ContextT>::moveTopLevelCycleToNewParent(CycleT *NewParent,
CurrentContainer.pop_back();
Child->ParentCycle = NewParent;
- NewParent->Blocks.insert(NewParent->Blocks.end(), Child->block_begin(),
- Child->block_end());
+ NewParent->Blocks.insert(Child->block_begin(), Child->block_end());
for (auto &It : BlockMapTopLevel)
if (It.second == Child)
@@ -266,7 +265,7 @@ void GenericCycleInfoCompute<ContextT>::run(BlockT *EntryBlock) {
} else {
Info.BlockMap.try_emplace(Block, NewCycle.get());
assert(!is_contained(NewCycle->Blocks, Block));
- NewCycle->Blocks.push_back(Block);
+ NewCycle->Blocks.insert(Block);
ProcessPredecessors(Block);
Info.BlockMapTopLevel.try_emplace(Block, NewCycle.get());
}
diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h
index 63db9eb9a601..51ea7ed9a498 100644
--- a/llvm/include/llvm/ADT/GenericCycleInfo.h
+++ b/llvm/include/llvm/ADT/GenericCycleInfo.h
@@ -28,16 +28,12 @@
#ifndef LLVM_ADT_GENERICCYCLEINFO_H
#define LLVM_ADT_GENERICCYCLEINFO_H
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/GenericSSAContext.h"
#include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
-#include <vector>
namespace llvm {
@@ -67,7 +63,9 @@ private:
/// Basic blocks that are contained in the cycle, including entry blocks,
/// and including blocks that are part of a child cycle.
- std::vector<BlockT *> Blocks;
+ using BlockSetVectorT = SetVector<BlockT *, SmallVector<BlockT *, 8>,
+ DenseSet<const BlockT *>, 8>;
+ BlockSetVectorT Blocks;
/// Depth of the cycle in the tree. The root "cycle" is at depth 0.
///
@@ -85,7 +83,7 @@ private:
}
void appendEntry(BlockT *Block) { Entries.push_back(Block); }
- void appendBlock(BlockT *Block) { Blocks.push_back(Block); }
+ void appendBlock(BlockT *Block) { Blocks.insert(Block); }
GenericCycle(const GenericCycle &) = delete;
GenericCycle &operator=(const GenericCycle &) = delete;
@@ -110,9 +108,7 @@ public:
}
/// \brief Return whether \p Block is contained in the cycle.
- bool contains(const BlockT *Block) const {
- return is_contained(Blocks, Block);
- }
+ bool contains(const BlockT *Block) const { return Blocks.contains(Block); }
/// \brief Returns true iff this cycle contains \p C.
///
@@ -171,7 +167,7 @@ public:
/// Iteration over blocks in the cycle (including entry blocks).
//@{
- using const_block_iterator = typename std::vector<BlockT *>::const_iterator;
+ using const_block_iterator = typename BlockSetVectorT::const_iterator;
const_block_iterator block_begin() const {
return const_block_iterator{Blocks.begin()};
@@ -274,6 +270,7 @@ public:
#endif
void print(raw_ostream &Out) const;
void dump() const { print(dbgs()); }
+ Printable print(const CycleT *Cycle) { return Cycle->print(Context); }
//@}
/// Iteration over top-level cycles.
diff --git a/llvm/include/llvm/ADT/GenericSSAContext.h b/llvm/include/llvm/ADT/GenericSSAContext.h
index 409222547d5c..929fd1442750 100644
--- a/llvm/include/llvm/ADT/GenericSSAContext.h
+++ b/llvm/include/llvm/ADT/GenericSSAContext.h
@@ -31,13 +31,26 @@ public:
// equivalent of a ValueT.
//
// using ValueRefT = ...
+ //
+ // The ConstValueRefT is needed to work with "const Value *", where const
+ // needs to bind to the pointee and not the pointer.
+ //
+ // using ConstValueRefT = ...
+ //
+ // The null value for ValueRefT.
+ //
+ // static constexpr ValueRefT ValueRefNull;
- // An InstT is a subclass of ValueT that itself defines one or more ValueT
- // objects.
+ // An InstructionT usually defines one or more ValueT objects.
//
- // using InstT = ... must be a subclass of Value
+ // using InstructionT = ... must be a subclass of Value
- // A BlockT is a sequence of InstT, and forms a node of the CFG. It
+ // A UseT represents a data-edge from the defining instruction to the using
+ // instruction.
+ //
+ // using UseT = ...
+
+ // A BlockT is a sequence of InstructionT, and forms a node of the CFG. It
// has global methods predecessors() and successors() that return
// the list of incoming CFG edges and outgoing CFG edges
// respectively.
@@ -53,9 +66,10 @@ public:
// indicated by the compiler.
using FunctionT = typename _FunctionT::invalidTemplateInstanceError;
- // Every FunctionT has a unique BlockT marked as its entry.
+ // A dominator tree provides the dominance relation between basic blocks in
+ // a given funciton.
//
- // static BlockT* getEntryBlock(FunctionT &F);
+ // using DominatorTreeT = ...
// Initialize the SSA context with information about the FunctionT being
// processed.
@@ -63,6 +77,26 @@ public:
// void setFunction(FunctionT &function);
// FunctionT* getFunction() const;
+ // Every FunctionT has a unique BlockT marked as its entry.
+ //
+ // static BlockT* getEntryBlock(FunctionT &F);
+
+ // Methods to examine basic blocks and values
+ //
+ // static void appendBlockDefs(SmallVectorImpl<ValueRefT> &defs,
+ // BlockT &block);
+ // static void appendBlockDefs(SmallVectorImpl<const ValueRefT> &defs,
+ // const BlockT &block);
+
+ // static void appendBlockTerms(SmallVectorImpl<InstructionT *> &terms,
+ // BlockT &block);
+ // static void appendBlockTerms(SmallVectorImpl<const InstructionT *> &terms,
+ // const BlockT &block);
+ //
+ // static bool comesBefore(const InstructionT *lhs, const InstructionT *rhs);
+ // static bool isConstantOrUndefValuePhi(const InstructionT &Instr);
+ // const BlockT *getDefBlock(const ValueRefT value) const;
+
// Methods to print various objects.
//
// Printable print(BlockT *block) const;
diff --git a/llvm/include/llvm/ADT/GenericUniformityImpl.h b/llvm/include/llvm/ADT/GenericUniformityImpl.h
index 06d9b417ebde..4df04accc683 100644
--- a/llvm/include/llvm/ADT/GenericUniformityImpl.h
+++ b/llvm/include/llvm/ADT/GenericUniformityImpl.h
@@ -1,4 +1,4 @@
-//===- GenericUniformAnalysis.cpp --------------------*- C++ -*------------===//
+//===- GenericUniformityImpl.h -----------------------*- C++ -*------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -330,6 +330,7 @@ public:
using FunctionT = typename ContextT::FunctionT;
using ValueRefT = typename ContextT::ValueRefT;
using ConstValueRefT = typename ContextT::ConstValueRefT;
+ using UseT = typename ContextT::UseT;
using InstructionT = typename ContextT::InstructionT;
using DominatorTreeT = typename ContextT::DominatorTreeT;
@@ -354,12 +355,16 @@ public:
/// \brief Mark \p UniVal as a value that is always uniform.
void addUniformOverride(const InstructionT &Instr);
- /// \brief Mark \p DivVal as a value that is always divergent.
+ /// \brief Examine \p I for divergent outputs and add to the worklist.
+ void markDivergent(const InstructionT &I);
+
+ /// \brief Mark \p DivVal as a divergent value.
/// \returns Whether the tracked divergence state of \p DivVal changed.
- bool markDivergent(const InstructionT &I);
bool markDivergent(ConstValueRefT DivVal);
- bool markDefsDivergent(const InstructionT &Instr,
- bool AllDefsDivergent = true);
+
+ /// \brief Mark outputs of \p Instr as divergent.
+ /// \returns Whether the tracked divergence state of any output has changed.
+ bool markDefsDivergent(const InstructionT &Instr);
/// \brief Propagate divergence to all instructions in the region.
/// Divergence is seeded by calls to \p markDivergent.
@@ -384,6 +389,8 @@ public:
/// \brief Whether \p Val is divergent at its definition.
bool isDivergent(ConstValueRefT V) const { return DivergentValues.count(V); }
+ bool isDivergentUse(const UseT &U) const;
+
bool hasDivergentTerminator(const BlockT &B) const {
return DivergentTermBlocks.contains(&B);
}
@@ -448,13 +455,12 @@ private:
void propagateCycleExitDivergence(const BlockT &DivExit,
const CycleT &DivCycle);
- /// \brief Internal implementation function for propagateCycleExitDivergence.
- void analyzeCycleExitDivergence(const CycleT &OuterDivCycle);
+ /// Mark as divergent all external uses of values defined in \p DefCycle.
+ void analyzeCycleExitDivergence(const CycleT &DefCycle);
- /// \brief Mark all instruction as divergent that use a value defined in \p
- /// OuterDivCycle. Push their users on the worklist.
- void analyzeTemporalDivergence(const InstructionT &I,
- const CycleT &OuterDivCycle);
+ /// \brief Mark as divergent all uses of \p I that are outside \p DefCycle.
+ void propagateTemporalDivergence(const InstructionT &I,
+ const CycleT &DefCycle);
/// \brief Push all users of \p Val (in the region) to the worklist.
void pushUsers(const InstructionT &I);
@@ -462,9 +468,9 @@ private:
bool usesValueFromCycle(const InstructionT &I, const CycleT &DefCycle) const;
- /// \brief Whether \p Val is divergent when read in \p ObservingBlock.
+ /// \brief Whether \p Def is divergent when read in \p ObservingBlock.
bool isTemporalDivergent(const BlockT &ObservingBlock,
- ConstValueRefT Val) const;
+ const InstructionT &Def) const;
};
template <typename ImplT>
@@ -773,18 +779,23 @@ auto llvm::GenericSyncDependenceAnalysis<ContextT>::getJoinBlocks(
}
template <typename ContextT>
-bool GenericUniformityAnalysisImpl<ContextT>::markDivergent(
+void GenericUniformityAnalysisImpl<ContextT>::markDivergent(
const InstructionT &I) {
+ if (isAlwaysUniform(I))
+ return;
+ bool Marked = false;
if (I.isTerminator()) {
- if (DivergentTermBlocks.insert(I.getParent()).second) {
+ Marked = DivergentTermBlocks.insert(I.getParent()).second;
+ if (Marked) {
LLVM_DEBUG(dbgs() << "marked divergent term block: "
<< Context.print(I.getParent()) << "\n");
- return true;
}
- return false;
+ } else {
+ Marked = markDefsDivergent(I);
}
- return markDefsDivergent(I);
+ if (Marked)
+ Worklist.push_back(&I);
}
template <typename ContextT>
@@ -803,101 +814,38 @@ void GenericUniformityAnalysisImpl<ContextT>::addUniformOverride(
UniformOverrides.insert(&Instr);
}
-template <typename ContextT>
-void GenericUniformityAnalysisImpl<ContextT>::analyzeTemporalDivergence(
- const InstructionT &I, const CycleT &OuterDivCycle) {
- if (isDivergent(I))
- return;
-
- LLVM_DEBUG(dbgs() << "Analyze temporal divergence: " << Context.print(&I)
- << "\n");
- if (!usesValueFromCycle(I, OuterDivCycle))
- return;
-
- if (isAlwaysUniform(I))
- return;
-
- if (markDivergent(I))
- Worklist.push_back(&I);
-}
-
-// Mark all external users of values defined inside \param
-// OuterDivCycle as divergent.
+// Mark as divergent all external uses of values defined in \p DefCycle.
+//
+// A value V defined by a block B inside \p DefCycle may be used outside the
+// cycle only if the use is a PHI in some exit block, or B dominates some exit
+// block. Thus, we check uses as follows:
+//
+// - Check all PHIs in all exit blocks for inputs defined inside \p DefCycle.
+// - For every block B inside \p DefCycle that dominates at least one exit
+// block, check all uses outside \p DefCycle.
//
-// This follows all live out edges wherever they may lead. Potential
-// users of values defined inside DivCycle could be anywhere in the
-// dominance region of DivCycle (including its fringes for phi nodes).
-// A cycle C dominates a block B iff every path from the entry block
-// to B must pass through a block contained in C. If C is a reducible
-// cycle (or natural loop), C dominates B iff the header of C
-// dominates B. But in general, we iteratively examine cycle cycle
-// exits and their successors.
+// FIXME: This function does not distinguish between divergent and uniform
+// exits. For each divergent exit, only the values that are live at that exit
+// need to be propagated as divergent at their use outside the cycle.
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::analyzeCycleExitDivergence(
- const CycleT &OuterDivCycle) {
- // Set of blocks that are dominated by the cycle, i.e., each is only
- // reachable from paths that pass through the cycle.
- SmallPtrSet<BlockT *, 16> DomRegion;
-
- // The boundary of DomRegion, formed by blocks that are not
- // dominated by the cycle.
- SmallVector<BlockT *> DomFrontier;
- OuterDivCycle.getExitBlocks(DomFrontier);
-
- // Returns true if BB is dominated by the cycle.
- auto isInDomRegion = [&](BlockT *BB) {
- for (auto *P : predecessors(BB)) {
- if (OuterDivCycle.contains(P))
- continue;
- if (DomRegion.count(P))
- continue;
- return false;
- }
- return true;
- };
-
- // Keep advancing the frontier along successor edges, while
- // promoting blocks to DomRegion.
- while (true) {
- bool Promoted = false;
- SmallVector<BlockT *> Temp;
- for (auto *W : DomFrontier) {
- if (!isInDomRegion(W)) {
- Temp.push_back(W);
- continue;
- }
- DomRegion.insert(W);
- Promoted = true;
- for (auto *Succ : successors(W)) {
- if (DomRegion.contains(Succ))
- continue;
- Temp.push_back(Succ);
+ const CycleT &DefCycle) {
+ SmallVector<BlockT *> Exits;
+ DefCycle.getExitBlocks(Exits);
+ for (auto *Exit : Exits) {
+ for (auto &Phi : Exit->phis()) {
+ if (usesValueFromCycle(Phi, DefCycle)) {
+ markDivergent(Phi);
}
}
- if (!Promoted)
- break;
- DomFrontier = Temp;
- }
-
- // At DomFrontier, only the PHI nodes are affected by temporal
- // divergence.
- for (const auto *UserBlock : DomFrontier) {
- LLVM_DEBUG(dbgs() << "Analyze phis after cycle exit: "
- << Context.print(UserBlock) << "\n");
- for (const auto &Phi : UserBlock->phis()) {
- LLVM_DEBUG(dbgs() << " " << Context.print(&Phi) << "\n");
- analyzeTemporalDivergence(Phi, OuterDivCycle);
- }
}
- // All instructions inside the dominance region are affected by
- // temporal divergence.
- for (const auto *UserBlock : DomRegion) {
- LLVM_DEBUG(dbgs() << "Analyze non-phi users after cycle exit: "
- << Context.print(UserBlock) << "\n");
- for (const auto &I : *UserBlock) {
- LLVM_DEBUG(dbgs() << " " << Context.print(&I) << "\n");
- analyzeTemporalDivergence(I, OuterDivCycle);
+ for (auto *BB : DefCycle.blocks()) {
+ if (!llvm::any_of(Exits,
+ [&](BlockT *Exit) { return DT.dominates(BB, Exit); }))
+ continue;
+ for (auto &II : *BB) {
+ propagateTemporalDivergence(II, DefCycle);
}
}
}
@@ -947,12 +895,7 @@ void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
if (I.isTerminator())
break;
- // Mark this as divergent. We don't check if the instruction is
- // always uniform. In a cycle where the thread convergence is not
- // statically known, the instruction is not statically converged,
- // and its outputs cannot be statically uniform.
- if (markDivergent(I))
- Worklist.push_back(&I);
+ markDivergent(I);
}
}
@@ -963,10 +906,16 @@ void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
<< "\n");
for (const auto &Phi : JoinBlock.phis()) {
- if (ContextT::isConstantValuePhi(Phi))
+ // FIXME: The non-undef value is not constant per se; it just happens to be
+ // uniform and may not dominate this PHI. So assuming that the same value
+ // reaches along all incoming edges may itself be undefined behaviour. This
+ // particular interpretation of the undef value was added to
+ // DivergenceAnalysis in the following review:
+ //
+ // https://reviews.llvm.org/D19013
+ if (ContextT::isConstantOrUndefValuePhi(Phi))
continue;
- if (markDivergent(Phi))
- Worklist.push_back(&Phi);
+ markDivergent(Phi);
}
}
@@ -1081,6 +1030,20 @@ getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
}
template <typename ContextT>
+bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
+ const BlockT &ObservingBlock, const InstructionT &Def) const {
+ const BlockT *DefBlock = Def.getParent();
+ for (const CycleT *Cycle = CI.getCycle(DefBlock);
+ Cycle && !Cycle->contains(&ObservingBlock);
+ Cycle = Cycle->getParentCycle()) {
+ if (DivergentExitCycles.contains(Cycle)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence(
const InstructionT &Term) {
const auto *DivTermBlock = Term.getParent();
@@ -1176,17 +1139,17 @@ GenericUniformityInfo<ContextT>::GenericUniformityInfo(
const TargetTransformInfo *TTI)
: F(&Func) {
DA.reset(new ImplT{Func, DT, CI, TTI});
- DA->initialize();
- DA->compute();
}
template <typename ContextT>
void GenericUniformityAnalysisImpl<ContextT>::print(raw_ostream &OS) const {
bool haveDivergentArgs = false;
- if (DivergentValues.empty()) {
- assert(DivergentTermBlocks.empty());
- assert(DivergentExitCycles.empty());
+ // Control flow instructions may be divergent even if their inputs are
+ // uniform. Thus, although exceedingly rare, it is possible to have a program
+ // with no divergent values but with divergent control structures.
+ if (DivergentValues.empty() && DivergentTermBlocks.empty() &&
+ DivergentExitCycles.empty()) {
OS << "ALL VALUES UNIFORM\n";
return;
}
@@ -1258,6 +1221,16 @@ bool GenericUniformityInfo<ContextT>::isDivergent(ConstValueRefT V) const {
}
template <typename ContextT>
+bool GenericUniformityInfo<ContextT>::isDivergent(const InstructionT *I) const {
+ return DA->isDivergent(*I);
+}
+
+template <typename ContextT>
+bool GenericUniformityInfo<ContextT>::isDivergentUse(const UseT &U) const {
+ return DA->isDivergentUse(U);
+}
+
+template <typename ContextT>
bool GenericUniformityInfo<ContextT>::hasDivergentTerminator(const BlockT &B) {
return DA->hasDivergentTerminator(B);
}
diff --git a/llvm/include/llvm/ADT/GenericUniformityInfo.h b/llvm/include/llvm/ADT/GenericUniformityInfo.h
index 24807bdc1c35..114fdfed765c 100644
--- a/llvm/include/llvm/ADT/GenericUniformityInfo.h
+++ b/llvm/include/llvm/ADT/GenericUniformityInfo.h
@@ -5,18 +5,11 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_GENERICUNIFORMITYINFO_H
#define LLVM_ADT_GENERICUNIFORMITYINFO_H
-// #include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/GenericCycleInfo.h"
-// #include "llvm/ADT/SmallPtrSet.h"
-// #include "llvm/ADT/Uniformity.h"
-// #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
@@ -39,6 +32,7 @@ public:
using FunctionT = typename ContextT::FunctionT;
using ValueRefT = typename ContextT::ValueRefT;
using ConstValueRefT = typename ContextT::ConstValueRefT;
+ using UseT = typename ContextT::UseT;
using InstructionT = typename ContextT::InstructionT;
using DominatorTreeT = typename ContextT::DominatorTreeT;
using ThisT = GenericUniformityInfo<ContextT>;
@@ -53,6 +47,11 @@ public:
GenericUniformityInfo(GenericUniformityInfo &&) = default;
GenericUniformityInfo &operator=(GenericUniformityInfo &&) = default;
+ void compute() {
+ DA->initialize();
+ DA->compute();
+ }
+
/// Whether any divergence was detected.
bool hasDivergence() const;
@@ -65,6 +64,17 @@ public:
/// Whether \p V is uniform/non-divergent.
bool isUniform(ConstValueRefT V) const { return !isDivergent(V); }
+ // Similar queries for InstructionT. These accept a pointer argument so that
+ // in LLVM IR, they overload the equivalent queries for Value*. For example,
+ // if querying whether a BranchInst is divergent, it should not be treated as
+ // a Value in LLVM IR.
+ bool isUniform(const InstructionT *I) const { return !isDivergent(I); };
+ bool isDivergent(const InstructionT *I) const;
+
+ /// \brief Whether \p U is divergent. Uses of a uniform value can be
+ /// divergent.
+ bool isDivergentUse(const UseT &U) const;
+
bool hasDivergentTerminator(const BlockT &B);
void print(raw_ostream &Out) const;
diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h
index 463a8d572aa1..ef983105c7ba 100644
--- a/llvm/include/llvm/ADT/Hashing.h
+++ b/llvm/include/llvm/ADT/Hashing.h
@@ -219,29 +219,30 @@ inline uint64_t hash_17to32_bytes(const char *s, size_t len, uint64_t seed) {
uint64_t b = fetch64(s + 8);
uint64_t c = fetch64(s + len - 8) * k2;
uint64_t d = fetch64(s + len - 16) * k0;
- return hash_16_bytes(rotate(a - b, 43) + rotate(c ^ seed, 30) + d,
- a + rotate(b ^ k3, 20) - c + len + seed);
+ return hash_16_bytes(llvm::rotr<uint64_t>(a - b, 43) +
+ llvm::rotr<uint64_t>(c ^ seed, 30) + d,
+ a + llvm::rotr<uint64_t>(b ^ k3, 20) - c + len + seed);
}
inline uint64_t hash_33to64_bytes(const char *s, size_t len, uint64_t seed) {
uint64_t z = fetch64(s + 24);
uint64_t a = fetch64(s) + (len + fetch64(s + len - 16)) * k0;
- uint64_t b = rotate(a + z, 52);
- uint64_t c = rotate(a, 37);
+ uint64_t b = llvm::rotr<uint64_t>(a + z, 52);
+ uint64_t c = llvm::rotr<uint64_t>(a, 37);
a += fetch64(s + 8);
- c += rotate(a, 7);
+ c += llvm::rotr<uint64_t>(a, 7);
a += fetch64(s + 16);
uint64_t vf = a + z;
- uint64_t vs = b + rotate(a, 31) + c;
+ uint64_t vs = b + llvm::rotr<uint64_t>(a, 31) + c;
a = fetch64(s + 16) + fetch64(s + len - 32);
z = fetch64(s + len - 8);
- b = rotate(a + z, 52);
- c = rotate(a, 37);
+ b = llvm::rotr<uint64_t>(a + z, 52);
+ c = llvm::rotr<uint64_t>(a, 37);
a += fetch64(s + len - 24);
- c += rotate(a, 7);
+ c += llvm::rotr<uint64_t>(a, 7);
a += fetch64(s + len - 16);
uint64_t wf = a + z;
- uint64_t ws = b + rotate(a, 31) + c;
+ uint64_t ws = b + llvm::rotr<uint64_t>(a, 31) + c;
uint64_t r = shift_mix((vf + ws) * k2 + (wf + vs) * k0);
return shift_mix((seed ^ (r * k0)) + vs) * k2;
}
@@ -271,9 +272,13 @@ struct hash_state {
/// seed and the first 64-byte chunk.
/// This effectively performs the initial mix.
static hash_state create(const char *s, uint64_t seed) {
- hash_state state = {
- 0, seed, hash_16_bytes(seed, k1), rotate(seed ^ k1, 49),
- seed * k1, shift_mix(seed), 0 };
+ hash_state state = {0,
+ seed,
+ hash_16_bytes(seed, k1),
+ llvm::rotr<uint64_t>(seed ^ k1, 49),
+ seed * k1,
+ shift_mix(seed),
+ 0};
state.h6 = hash_16_bytes(state.h4, state.h5);
state.mix(s);
return state;
@@ -284,10 +289,10 @@ struct hash_state {
static void mix_32_bytes(const char *s, uint64_t &a, uint64_t &b) {
a += fetch64(s);
uint64_t c = fetch64(s + 24);
- b = rotate(b + a + c, 21);
+ b = llvm::rotr<uint64_t>(b + a + c, 21);
uint64_t d = a;
a += fetch64(s + 8) + fetch64(s + 16);
- b += rotate(a, 44) + d;
+ b += llvm::rotr<uint64_t>(a, 44) + d;
a += c;
}
@@ -295,11 +300,11 @@ struct hash_state {
/// We mix all 64 bytes even when the chunk length is smaller, but we
/// record the actual length.
void mix(const char *s) {
- h0 = rotate(h0 + h1 + h3 + fetch64(s + 8), 37) * k1;
- h1 = rotate(h1 + h4 + fetch64(s + 48), 42) * k1;
+ h0 = llvm::rotr<uint64_t>(h0 + h1 + h3 + fetch64(s + 8), 37) * k1;
+ h1 = llvm::rotr<uint64_t>(h1 + h4 + fetch64(s + 48), 42) * k1;
h0 ^= h6;
h1 += h3 + fetch64(s + 40);
- h2 = rotate(h2 + h5, 33) * k1;
+ h2 = llvm::rotr<uint64_t>(h2 + h5, 33) * k1;
h3 = h4 * k1;
h4 = h0 + h5;
mix_32_bytes(s, h3, h4);
diff --git a/llvm/include/llvm/ADT/IntervalTree.h b/llvm/include/llvm/ADT/IntervalTree.h
index f6bff70cdbdc..cb707bd77a64 100644
--- a/llvm/include/llvm/ADT/IntervalTree.h
+++ b/llvm/include/llvm/ADT/IntervalTree.h
@@ -463,7 +463,7 @@ public:
// Current node and index while traversing the intervals that contain
// the reference point.
IntervalNode *Node = nullptr;
- PointType Point;
+ PointType Point = {};
unsigned Index = 0;
// For the current node, check if we have intervals that contain the
diff --git a/llvm/include/llvm/ADT/MapVector.h b/llvm/include/llvm/ADT/MapVector.h
index 9d908f3af4ed..c45779c0ce8e 100644
--- a/llvm/include/llvm/ADT/MapVector.h
+++ b/llvm/include/llvm/ADT/MapVector.h
@@ -10,7 +10,7 @@
/// This file implements a map that provides insertion order iteration. The
/// interface is purposefully minimal. The key is assumed to be cheap to copy
/// and 2 copies are kept, one for indexing in a DenseMap, one for iteration in
-/// a std::vector.
+/// a SmallVector.
///
//===----------------------------------------------------------------------===//
@@ -24,16 +24,15 @@
#include <iterator>
#include <type_traits>
#include <utility>
-#include <vector>
namespace llvm {
/// This class implements a map that also provides access to all stored values
-/// in a deterministic order. The values are kept in a std::vector and the
+/// in a deterministic order. The values are kept in a SmallVector<*, 0> and the
/// mapping is done with DenseMap from Keys to indexes in that vector.
-template<typename KeyT, typename ValueT,
- typename MapType = DenseMap<KeyT, unsigned>,
- typename VectorType = std::vector<std::pair<KeyT, ValueT>>>
+template <typename KeyT, typename ValueT,
+ typename MapType = DenseMap<KeyT, unsigned>,
+ typename VectorType = SmallVector<std::pair<KeyT, ValueT>, 0>>
class MapVector {
MapType Map;
VectorType Vector;
@@ -140,10 +139,9 @@ public:
return std::make_pair(begin() + I, false);
}
- size_type count(const KeyT &Key) const {
- typename MapType::const_iterator Pos = Map.find(Key);
- return Pos == Map.end()? 0 : 1;
- }
+ bool contains(const KeyT &Key) const { return Map.find(Key) != Map.end(); }
+
+ size_type count(const KeyT &Key) const { return contains(Key) ? 1 : 0; }
iterator find(const KeyT &Key) {
typename MapType::const_iterator Pos = Map.find(Key);
diff --git a/llvm/include/llvm/ADT/None.h b/llvm/include/llvm/ADT/None.h
deleted file mode 100644
index c497821a696e..000000000000
--- a/llvm/include/llvm/ADT/None.h
+++ /dev/null
@@ -1,31 +0,0 @@
-//===-- None.h - Simple null value for implicit construction ------*- C++ -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file provides None, an enumerator for use in implicit constructors
-/// of various (usually templated) types to make such construction more
-/// terse.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ADT_NONE_H
-#define LLVM_ADT_NONE_H
-
-#include "llvm/Support/Compiler.h"
-#include <optional>
-
-namespace llvm {
-/// A simple null object to allow implicit construction of std::optional<T>
-/// and similar types without having to spell out the specialization's name.
-LLVM_DEPRECATED("Use std::nullopt_t instead", "std::nullopt_t")
-typedef std::nullopt_t NoneType;
-LLVM_DEPRECATED("Use std::nullopt instead.", "std::nullopt")
-inline constexpr std::nullopt_t None = std::nullopt;
-}
-
-#endif
diff --git a/llvm/include/llvm/ADT/Optional.h b/llvm/include/llvm/ADT/Optional.h
deleted file mode 100644
index c3382837c0ae..000000000000
--- a/llvm/include/llvm/ADT/Optional.h
+++ /dev/null
@@ -1,27 +0,0 @@
-//===- Optional.h - Simple variant for passing optional values --*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This file provides Optional, a template class modeled in the spirit of
-/// OCaml's 'opt' variant. The idea is to strongly type whether or not
-/// a value can be optional.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ADT_OPTIONAL_H
-#define LLVM_ADT_OPTIONAL_H
-
-#include <optional>
-
-namespace llvm {
-// Legacy alias of llvm::Optional to std::optional.
-// FIXME: Remove this after LLVM 16.
-template <class T> using Optional = std::optional<T>;
-} // namespace llvm
-
-#endif // LLVM_ADT_OPTIONAL_H
diff --git a/llvm/include/llvm/ADT/PointerIntPair.h b/llvm/include/llvm/ADT/PointerIntPair.h
index 9278ccdb4788..f73f5bcd6ce0 100644
--- a/llvm/include/llvm/ADT/PointerIntPair.h
+++ b/llvm/include/llvm/ADT/PointerIntPair.h
@@ -19,10 +19,44 @@
#include "llvm/Support/type_traits.h"
#include <cassert>
#include <cstdint>
+#include <cstring>
#include <limits>
namespace llvm {
+namespace detail {
+template <typename Ptr> struct PunnedPointer {
+ static_assert(sizeof(Ptr) == sizeof(intptr_t), "");
+
+ // Asserts that allow us to let the compiler implement the destructor and
+ // copy/move constructors
+ static_assert(std::is_trivially_destructible<Ptr>::value, "");
+ static_assert(std::is_trivially_copy_constructible<Ptr>::value, "");
+ static_assert(std::is_trivially_move_constructible<Ptr>::value, "");
+
+ explicit constexpr PunnedPointer(intptr_t i = 0) { *this = i; }
+
+ constexpr intptr_t asInt() const {
+ intptr_t R = 0;
+ std::memcpy(&R, Data, sizeof(R));
+ return R;
+ }
+
+ constexpr operator intptr_t() const { return asInt(); }
+
+ constexpr PunnedPointer &operator=(intptr_t V) {
+ std::memcpy(Data, &V, sizeof(Data));
+ return *this;
+ }
+
+ Ptr *getPointerAddress() { return reinterpret_cast<Ptr *>(Data); }
+ const Ptr *getPointerAddress() const { return reinterpret_cast<Ptr *>(Data); }
+
+private:
+ alignas(Ptr) unsigned char Data[sizeof(Ptr)];
+};
+} // namespace detail
+
template <typename T, typename Enable> struct DenseMapInfo;
template <typename PointerT, unsigned IntBits, typename PtrTraits>
struct PointerIntPairInfo;
@@ -46,7 +80,7 @@ template <typename PointerTy, unsigned IntBits, typename IntType = unsigned,
class PointerIntPair {
// Used by MSVC visualizer and generally helpful for debugging/visualizing.
using InfoTy = Info;
- intptr_t Value = 0;
+ detail::PunnedPointer<PointerTy> Value;
public:
constexpr PointerIntPair() = default;
@@ -86,10 +120,12 @@ public:
assert(Value == reinterpret_cast<intptr_t>(getPointer()) &&
"Can only return the address if IntBits is cleared and "
"PtrTraits doesn't change the pointer");
- return reinterpret_cast<PointerTy *>(&Value);
+ return Value.getPointerAddress();
}
- void *getOpaqueValue() const { return reinterpret_cast<void *>(Value); }
+ void *getOpaqueValue() const {
+ return reinterpret_cast<void *>(Value.asInt());
+ }
void setFromOpaqueValue(void *Val) & {
Value = reinterpret_cast<intptr_t>(Val);
diff --git a/llvm/include/llvm/ADT/PointerUnion.h b/llvm/include/llvm/ADT/PointerUnion.h
index 061c4000fcb3..7d4ed02b6226 100644
--- a/llvm/include/llvm/ADT/PointerUnion.h
+++ b/llvm/include/llvm/ADT/PointerUnion.h
@@ -172,9 +172,9 @@ public:
/// If the union is set to the first pointer type get an address pointing to
/// it.
First *getAddrOfPtr1() {
- assert(is<First>() && "Val is not the first pointer");
+ assert(isa<First>(*this) && "Val is not the first pointer");
assert(
- PointerLikeTypeTraits<First>::getAsVoidPointer(get<First>()) ==
+ PointerLikeTypeTraits<First>::getAsVoidPointer(cast<First>(*this)) ==
this->Val.getPointer() &&
"Can't get the address because PointerLikeTypeTraits changes the ptr");
return const_cast<First *>(
@@ -231,7 +231,7 @@ template <typename... PTs> struct CastInfoPointerUnionImpl {
}
template <typename To> static To doCast(From &F) {
- assert(isPossible<To>(F) && "cast to an incompatible type !");
+ assert(isPossible<To>(F) && "cast to an incompatible type!");
return PointerLikeTypeTraits<To>::getFromVoidPointer(F.Val.getPointer());
}
};
diff --git a/llvm/include/llvm/ADT/PostOrderIterator.h b/llvm/include/llvm/ADT/PostOrderIterator.h
index a80eed78c94d..33d3330a40bd 100644
--- a/llvm/include/llvm/ADT/PostOrderIterator.h
+++ b/llvm/include/llvm/ADT/PostOrderIterator.h
@@ -100,19 +100,20 @@ public:
using value_type = typename GT::NodeRef;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
- using reference = value_type &;
+ using reference = const value_type &;
private:
using NodeRef = typename GT::NodeRef;
using ChildItTy = typename GT::ChildIteratorType;
- // VisitStack - Used to maintain the ordering. Top = current block
- // First element is basic block pointer, second is the 'next child' to visit
- SmallVector<std::pair<NodeRef, ChildItTy>, 8> VisitStack;
+ /// Used to maintain the ordering.
+ /// First element is basic block pointer, second is iterator for the next
+ /// child to visit, third is the end iterator.
+ SmallVector<std::tuple<NodeRef, ChildItTy, ChildItTy>, 8> VisitStack;
po_iterator(NodeRef BB) {
this->insertEdge(std::optional<NodeRef>(), BB);
- VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
+ VisitStack.emplace_back(BB, GT::child_begin(BB), GT::child_end(BB));
traverseChild();
}
@@ -121,7 +122,7 @@ private:
po_iterator(NodeRef BB, SetType &S)
: po_iterator_storage<SetType, ExtStorage>(S) {
if (this->insertEdge(std::optional<NodeRef>(), BB)) {
- VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
+ VisitStack.emplace_back(BB, GT::child_begin(BB), GT::child_end(BB));
traverseChild();
}
}
@@ -131,12 +132,14 @@ private:
} // End is when stack is empty.
void traverseChild() {
- while (VisitStack.back().second != GT::child_end(VisitStack.back().first)) {
- NodeRef BB = *VisitStack.back().second++;
- if (this->insertEdge(std::optional<NodeRef>(VisitStack.back().first),
- BB)) {
+ while (true) {
+ auto &Entry = VisitStack.back();
+ if (std::get<1>(Entry) == std::get<2>(Entry))
+ break;
+ NodeRef BB = *std::get<1>(Entry)++;
+ if (this->insertEdge(std::optional<NodeRef>(std::get<0>(Entry)), BB)) {
// If the block is not visited...
- VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
+ VisitStack.emplace_back(BB, GT::child_begin(BB), GT::child_end(BB));
}
}
}
@@ -158,7 +161,7 @@ public:
}
bool operator!=(const po_iterator &x) const { return !(*this == x); }
- const NodeRef &operator*() const { return VisitStack.back().first; }
+ reference operator*() const { return std::get<0>(VisitStack.back()); }
// This is a nonstandard operator-> that dereferences the pointer an extra
// time... so that you can actually call methods ON the BasicBlock, because
@@ -167,7 +170,7 @@ public:
NodeRef operator->() const { return **this; }
po_iterator &operator++() { // Preincrement
- this->finishPostorder(VisitStack.back().first);
+ this->finishPostorder(std::get<0>(VisitStack.back()));
VisitStack.pop_back();
if (!VisitStack.empty())
traverseChild();
@@ -293,23 +296,24 @@ template<class GraphT, class GT = GraphTraits<GraphT>>
class ReversePostOrderTraversal {
using NodeRef = typename GT::NodeRef;
- std::vector<NodeRef> Blocks; // Block list in normal PO order
+ using VecTy = SmallVector<NodeRef, 8>;
+ VecTy Blocks; // Block list in normal PO order
void Initialize(const GraphT &G) {
std::copy(po_begin(G), po_end(G), std::back_inserter(Blocks));
}
public:
- using rpo_iterator = typename std::vector<NodeRef>::reverse_iterator;
- using const_rpo_iterator = typename std::vector<NodeRef>::const_reverse_iterator;
+ using rpo_iterator = typename VecTy::reverse_iterator;
+ using const_rpo_iterator = typename VecTy::const_reverse_iterator;
ReversePostOrderTraversal(const GraphT &G) { Initialize(G); }
// Because we want a reverse post order, use reverse iterators from the vector
rpo_iterator begin() { return Blocks.rbegin(); }
- const_rpo_iterator begin() const { return Blocks.crbegin(); }
+ const_rpo_iterator begin() const { return Blocks.rbegin(); }
rpo_iterator end() { return Blocks.rend(); }
- const_rpo_iterator end() const { return Blocks.crend(); }
+ const_rpo_iterator end() const { return Blocks.rend(); }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/ADT/SCCIterator.h b/llvm/include/llvm/ADT/SCCIterator.h
index e4035a02b5f5..e743ae7c11ed 100644
--- a/llvm/include/llvm/ADT/SCCIterator.h
+++ b/llvm/include/llvm/ADT/SCCIterator.h
@@ -23,6 +23,7 @@
#define LLVM_ADT_SCCITERATOR_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/iterator.h"
#include <cassert>
@@ -243,11 +244,11 @@ template <class T> scc_iterator<T> scc_end(const T &G) {
/// declared in its graph traits in order to use this iterator.
///
/// This is implemented using Kruskal's minimal spanning tree algorithm followed
-/// by a BFS walk. First a maximum spanning tree (forest) is built based on all
-/// edges within the SCC collection. Then a BFS walk is initiated on tree nodes
-/// that do not have a predecessor. Finally, the BFS order computed is the
-/// traversal order of the nodes of the SCC. Such order ensures that
-/// high-weighted edges are visited first during the tranversal.
+/// by Kahn's algorithm to compute a topological order on the MST. First a
+/// maximum spanning tree (forest) is built based on all edges within the SCC
+/// collection. Then a topological walk is initiated on tree nodes that do not
+/// have a predecessor and then applied to all nodes of the SCC. Such order
+/// ensures that high-weighted edges are visited first during the traversal.
template <class GraphT, class GT = GraphTraits<GraphT>>
class scc_member_iterator {
using NodeType = typename GT::NodeType;
@@ -258,7 +259,8 @@ class scc_member_iterator {
struct NodeInfo {
NodeInfo *Group = this;
uint32_t Rank = 0;
- bool Visited = true;
+ bool Visited = false;
+ DenseSet<const EdgeType *> IncomingMSTEdges;
};
// Find the root group of the node and compress the path from node to the
@@ -340,20 +342,22 @@ scc_member_iterator<GraphT, GT>::scc_member_iterator(
MSTEdges.insert(Edge);
}
- // Do BFS on MST, starting from nodes that have no incoming edge. These nodes
- // are "roots" of the MST forest. This ensures that nodes are visited before
- // their decsendents are, thus ensures hot edges are processed before cold
- // edges, based on how MST is computed.
+ // Run Kahn's algorithm on MST to compute a topological traversal order.
+ // The algorithm starts from nodes that have no incoming edge. These nodes are
+ // "roots" of the MST forest. This ensures that nodes are visited before their
+ // descendants are, thus ensures hot edges are processed before cold edges,
+ // based on how MST is computed.
+ std::queue<NodeType *> Queue;
for (const auto *Edge : MSTEdges)
- NodeInfoMap[Edge->Target].Visited = false;
+ NodeInfoMap[Edge->Target].IncomingMSTEdges.insert(Edge);
- std::queue<NodeType *> Queue;
- // Initialze the queue with MST roots. Note that walking through SortedEdges
- // instead of NodeInfoMap ensures an ordered deterministic push.
+ // Walk through SortedEdges to initialize the queue, instead of using NodeInfoMap
+ // to ensure an ordered deterministic push.
for (auto *Edge : SortedEdges) {
- if (NodeInfoMap[Edge->Source].Visited) {
+ if (!NodeInfoMap[Edge->Source].Visited &&
+ NodeInfoMap[Edge->Source].IncomingMSTEdges.empty()) {
Queue.push(Edge->Source);
- NodeInfoMap[Edge->Source].Visited = false;
+ NodeInfoMap[Edge->Source].Visited = true;
}
}
@@ -362,8 +366,9 @@ scc_member_iterator<GraphT, GT>::scc_member_iterator(
Queue.pop();
Nodes.push_back(Node);
for (auto &Edge : Node->Edges) {
- if (MSTEdges.count(&Edge) && !NodeInfoMap[Edge.Target].Visited) {
- NodeInfoMap[Edge.Target].Visited = true;
+ NodeInfoMap[Edge.Target].IncomingMSTEdges.erase(&Edge);
+ if (MSTEdges.count(&Edge) &&
+ NodeInfoMap[Edge.Target].IncomingMSTEdges.empty()) {
Queue.push(Edge.Target);
}
}
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index 79b145632d5a..7edc582636c7 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -17,6 +17,7 @@
#ifndef LLVM_ADT_STLEXTRAS_H
#define LLVM_ADT_STLEXTRAS_H
+#include "llvm/ADT/ADL.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/STLForwardCompat.h"
#include "llvm/ADT/STLFunctionalExtras.h"
@@ -46,21 +47,6 @@
namespace llvm {
-// Only used by compiler if both template types are the same. Useful when
-// using SFINAE to test for the existence of member functions.
-template <typename T, T> struct SameType;
-
-namespace detail {
-
-template <typename RangeT>
-using IterOfRange = decltype(std::begin(std::declval<RangeT &>()));
-
-template <typename RangeT>
-using ValueOfRange =
- std::remove_reference_t<decltype(*std::begin(std::declval<RangeT &>()))>;
-
-} // end namespace detail
-
//===----------------------------------------------------------------------===//
// Extra additions to <type_traits>
//===----------------------------------------------------------------------===//
@@ -333,48 +319,6 @@ public:
} // namespace callable_detail
-namespace adl_detail {
-
-using std::begin;
-
-template <typename ContainerTy>
-decltype(auto) adl_begin(ContainerTy &&container) {
- return begin(std::forward<ContainerTy>(container));
-}
-
-using std::end;
-
-template <typename ContainerTy>
-decltype(auto) adl_end(ContainerTy &&container) {
- return end(std::forward<ContainerTy>(container));
-}
-
-using std::swap;
-
-template <typename T>
-void adl_swap(T &&lhs, T &&rhs) noexcept(noexcept(swap(std::declval<T>(),
- std::declval<T>()))) {
- swap(std::forward<T>(lhs), std::forward<T>(rhs));
-}
-
-} // end namespace adl_detail
-
-template <typename ContainerTy>
-decltype(auto) adl_begin(ContainerTy &&container) {
- return adl_detail::adl_begin(std::forward<ContainerTy>(container));
-}
-
-template <typename ContainerTy>
-decltype(auto) adl_end(ContainerTy &&container) {
- return adl_detail::adl_end(std::forward<ContainerTy>(container));
-}
-
-template <typename T>
-void adl_swap(T &&lhs, T &&rhs) noexcept(
- noexcept(adl_detail::adl_swap(std::declval<T>(), std::declval<T>()))) {
- adl_detail::adl_swap(std::forward<T>(lhs), std::forward<T>(rhs));
-}
-
/// Returns true if the given container only contains a single element.
template <typename ContainerTy> bool hasSingleElement(ContainerTy &&C) {
auto B = std::begin(C), E = std::end(C);
@@ -432,7 +376,8 @@ inline mapped_iterator<ItTy, FuncTy> map_iterator(ItTy I, FuncTy F) {
template <class ContainerTy, class FuncTy>
auto map_range(ContainerTy &&C, FuncTy F) {
- return make_range(map_iterator(C.begin(), F), map_iterator(C.end(), F));
+ return make_range(map_iterator(std::begin(C), F),
+ map_iterator(std::end(C), F));
}
/// A base type of mapped iterator, that is useful for building derived
@@ -734,6 +679,8 @@ bool any_of(R &&range, UnaryPredicate P);
template <typename T> bool all_equal(std::initializer_list<T> Values);
+template <typename R> constexpr size_t range_size(R &&Range);
+
namespace detail {
using std::declval;
@@ -744,26 +691,26 @@ template<typename... Iters> struct ZipTupleType {
using type = std::tuple<decltype(*declval<Iters>())...>;
};
-template <typename ZipType, typename... Iters>
+template <typename ZipType, typename ReferenceTupleType, typename... Iters>
using zip_traits = iterator_facade_base<
ZipType,
std::common_type_t<
std::bidirectional_iterator_tag,
typename std::iterator_traits<Iters>::iterator_category...>,
// ^ TODO: Implement random access methods.
- typename ZipTupleType<Iters...>::type,
+ ReferenceTupleType,
typename std::iterator_traits<
std::tuple_element_t<0, std::tuple<Iters...>>>::difference_type,
// ^ FIXME: This follows boost::make_zip_iterator's assumption that all
// inner iterators have the same difference_type. It would fail if, for
// instance, the second field's difference_type were non-numeric while the
// first is.
- typename ZipTupleType<Iters...>::type *,
- typename ZipTupleType<Iters...>::type>;
+ ReferenceTupleType *, ReferenceTupleType>;
-template <typename ZipType, typename... Iters>
-struct zip_common : public zip_traits<ZipType, Iters...> {
- using Base = zip_traits<ZipType, Iters...>;
+template <typename ZipType, typename ReferenceTupleType, typename... Iters>
+struct zip_common : public zip_traits<ZipType, ReferenceTupleType, Iters...> {
+ using Base = zip_traits<ZipType, ReferenceTupleType, Iters...>;
+ using IndexSequence = std::index_sequence_for<Iters...>;
using value_type = typename Base::value_type;
std::tuple<Iters...> iterators;
@@ -773,19 +720,17 @@ protected:
return value_type(*std::get<Ns>(iterators)...);
}
- template <size_t... Ns>
- decltype(iterators) tup_inc(std::index_sequence<Ns...>) const {
- return std::tuple<Iters...>(std::next(std::get<Ns>(iterators))...);
+ template <size_t... Ns> void tup_inc(std::index_sequence<Ns...>) {
+ (++std::get<Ns>(iterators), ...);
}
- template <size_t... Ns>
- decltype(iterators) tup_dec(std::index_sequence<Ns...>) const {
- return std::tuple<Iters...>(std::prev(std::get<Ns>(iterators))...);
+ template <size_t... Ns> void tup_dec(std::index_sequence<Ns...>) {
+ (--std::get<Ns>(iterators), ...);
}
template <size_t... Ns>
bool test_all_equals(const zip_common &other,
- std::index_sequence<Ns...>) const {
+ std::index_sequence<Ns...>) const {
return ((std::get<Ns>(this->iterators) == std::get<Ns>(other.iterators)) &&
...);
}
@@ -793,85 +738,121 @@ protected:
public:
zip_common(Iters &&... ts) : iterators(std::forward<Iters>(ts)...) {}
- value_type operator*() const {
- return deref(std::index_sequence_for<Iters...>{});
- }
+ value_type operator*() const { return deref(IndexSequence{}); }
ZipType &operator++() {
- iterators = tup_inc(std::index_sequence_for<Iters...>{});
- return *reinterpret_cast<ZipType *>(this);
+ tup_inc(IndexSequence{});
+ return static_cast<ZipType &>(*this);
}
ZipType &operator--() {
static_assert(Base::IsBidirectional,
"All inner iterators must be at least bidirectional.");
- iterators = tup_dec(std::index_sequence_for<Iters...>{});
- return *reinterpret_cast<ZipType *>(this);
+ tup_dec(IndexSequence{});
+ return static_cast<ZipType &>(*this);
}
/// Return true if all the iterator are matching `other`'s iterators.
bool all_equals(zip_common &other) {
- return test_all_equals(other, std::index_sequence_for<Iters...>{});
+ return test_all_equals(other, IndexSequence{});
}
};
template <typename... Iters>
-struct zip_first : public zip_common<zip_first<Iters...>, Iters...> {
- using Base = zip_common<zip_first<Iters...>, Iters...>;
+struct zip_first : zip_common<zip_first<Iters...>,
+ typename ZipTupleType<Iters...>::type, Iters...> {
+ using zip_common<zip_first, typename ZipTupleType<Iters...>::type,
+ Iters...>::zip_common;
- bool operator==(const zip_first<Iters...> &other) const {
+ bool operator==(const zip_first &other) const {
return std::get<0>(this->iterators) == std::get<0>(other.iterators);
}
-
- zip_first(Iters &&... ts) : Base(std::forward<Iters>(ts)...) {}
};
template <typename... Iters>
-class zip_shortest : public zip_common<zip_shortest<Iters...>, Iters...> {
+struct zip_shortest
+ : zip_common<zip_shortest<Iters...>, typename ZipTupleType<Iters...>::type,
+ Iters...> {
+ using zip_common<zip_shortest, typename ZipTupleType<Iters...>::type,
+ Iters...>::zip_common;
+
+ bool operator==(const zip_shortest &other) const {
+ return any_iterator_equals(other, std::index_sequence_for<Iters...>{});
+ }
+
+private:
template <size_t... Ns>
- bool test(const zip_shortest<Iters...> &other,
- std::index_sequence<Ns...>) const {
- return ((std::get<Ns>(this->iterators) != std::get<Ns>(other.iterators)) &&
+ bool any_iterator_equals(const zip_shortest &other,
+ std::index_sequence<Ns...>) const {
+ return ((std::get<Ns>(this->iterators) == std::get<Ns>(other.iterators)) ||
...);
}
+};
-public:
- using Base = zip_common<zip_shortest<Iters...>, Iters...>;
-
- zip_shortest(Iters &&... ts) : Base(std::forward<Iters>(ts)...) {}
+/// Helper to obtain the iterator types for the tuple storage within `zippy`.
+template <template <typename...> class ItType, typename TupleStorageType,
+ typename IndexSequence>
+struct ZippyIteratorTuple;
+
+/// Partial specialization for non-const tuple storage.
+template <template <typename...> class ItType, typename... Args,
+ std::size_t... Ns>
+struct ZippyIteratorTuple<ItType, std::tuple<Args...>,
+ std::index_sequence<Ns...>> {
+ using type = ItType<decltype(adl_begin(
+ std::get<Ns>(declval<std::tuple<Args...> &>())))...>;
+};
- bool operator==(const zip_shortest<Iters...> &other) const {
- return !test(other, std::index_sequence_for<Iters...>{});
- }
+/// Partial specialization for const tuple storage.
+template <template <typename...> class ItType, typename... Args,
+ std::size_t... Ns>
+struct ZippyIteratorTuple<ItType, const std::tuple<Args...>,
+ std::index_sequence<Ns...>> {
+ using type = ItType<decltype(adl_begin(
+ std::get<Ns>(declval<const std::tuple<Args...> &>())))...>;
};
template <template <typename...> class ItType, typename... Args> class zippy {
+private:
+ std::tuple<Args...> storage;
+ using IndexSequence = std::index_sequence_for<Args...>;
+
public:
- using iterator = ItType<decltype(std::begin(std::declval<Args>()))...>;
+ using iterator = typename ZippyIteratorTuple<ItType, decltype(storage),
+ IndexSequence>::type;
+ using const_iterator =
+ typename ZippyIteratorTuple<ItType, const decltype(storage),
+ IndexSequence>::type;
using iterator_category = typename iterator::iterator_category;
using value_type = typename iterator::value_type;
using difference_type = typename iterator::difference_type;
using pointer = typename iterator::pointer;
using reference = typename iterator::reference;
+ using const_reference = typename const_iterator::reference;
-private:
- std::tuple<Args...> ts;
+ zippy(Args &&...args) : storage(std::forward<Args>(args)...) {}
+ const_iterator begin() const { return begin_impl(IndexSequence{}); }
+ iterator begin() { return begin_impl(IndexSequence{}); }
+ const_iterator end() const { return end_impl(IndexSequence{}); }
+ iterator end() { return end_impl(IndexSequence{}); }
+
+private:
template <size_t... Ns>
- iterator begin_impl(std::index_sequence<Ns...>) const {
- return iterator(std::begin(std::get<Ns>(ts))...);
+ const_iterator begin_impl(std::index_sequence<Ns...>) const {
+ return const_iterator(adl_begin(std::get<Ns>(storage))...);
}
- template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) const {
- return iterator(std::end(std::get<Ns>(ts))...);
+ template <size_t... Ns> iterator begin_impl(std::index_sequence<Ns...>) {
+ return iterator(adl_begin(std::get<Ns>(storage))...);
}
-public:
- zippy(Args &&... ts_) : ts(std::forward<Args>(ts_)...) {}
-
- iterator begin() const {
- return begin_impl(std::index_sequence_for<Args...>{});
+ template <size_t... Ns>
+ const_iterator end_impl(std::index_sequence<Ns...>) const {
+ return const_iterator(adl_end(std::get<Ns>(storage))...);
+ }
+ template <size_t... Ns> iterator end_impl(std::index_sequence<Ns...>) {
+ return iterator(adl_end(std::get<Ns>(storage))...);
}
- iterator end() const { return end_impl(std::index_sequence_for<Args...>{}); }
};
} // end namespace detail
@@ -891,9 +872,7 @@ detail::zippy<detail::zip_shortest, T, U, Args...> zip(T &&t, U &&u,
template <typename T, typename U, typename... Args>
detail::zippy<detail::zip_first, T, U, Args...> zip_equal(T &&t, U &&u,
Args &&...args) {
- assert(all_equal({std::distance(adl_begin(t), adl_end(t)),
- std::distance(adl_begin(u), adl_end(u)),
- std::distance(adl_begin(args), adl_end(args))...}) &&
+ assert(all_equal({range_size(t), range_size(u), range_size(args)...}) &&
"Iteratees do not have equal length");
return detail::zippy<detail::zip_first, T, U, Args...>(
std::forward<T>(t), std::forward<U>(u), std::forward<Args>(args)...);
@@ -906,9 +885,7 @@ detail::zippy<detail::zip_first, T, U, Args...> zip_equal(T &&t, U &&u,
template <typename T, typename U, typename... Args>
detail::zippy<detail::zip_first, T, U, Args...> zip_first(T &&t, U &&u,
Args &&...args) {
- assert(std::distance(adl_begin(t), adl_end(t)) <=
- std::min({std::distance(adl_begin(u), adl_end(u)),
- std::distance(adl_begin(args), adl_end(args))...}) &&
+ assert(range_size(t) <= std::min({range_size(u), range_size(args)...}) &&
"First iteratee is not the shortest");
return detail::zippy<detail::zip_first, T, U, Args...>(
@@ -1472,19 +1449,21 @@ template <typename ContainerTy> auto make_second_range(ContainerTy &&c) {
// Extra additions to <utility>
//===----------------------------------------------------------------------===//
-/// Function object to check whether the first component of a std::pair
-/// compares less than the first component of another std::pair.
+/// Function object to check whether the first component of a container
+/// supported by std::get (like std::pair and std::tuple) compares less than the
+/// first component of another container.
struct less_first {
template <typename T> bool operator()(const T &lhs, const T &rhs) const {
- return std::less<>()(lhs.first, rhs.first);
+ return std::less<>()(std::get<0>(lhs), std::get<0>(rhs));
}
};
-/// Function object to check whether the second component of a std::pair
-/// compares less than the second component of another std::pair.
+/// Function object to check whether the second component of a container
+/// supported by std::get (like std::pair and std::tuple) compares less than the
+/// second component of another container.
struct less_second {
template <typename T> bool operator()(const T &lhs, const T &rhs) const {
- return std::less<>()(lhs.second, rhs.second);
+ return std::less<>()(std::get<1>(lhs), std::get<1>(rhs));
}
};
@@ -1722,6 +1701,29 @@ auto size(R &&Range,
return std::distance(Range.begin(), Range.end());
}
+namespace detail {
+template <typename Range>
+using check_has_free_function_size =
+ decltype(adl_size(std::declval<Range &>()));
+
+template <typename Range>
+static constexpr bool HasFreeFunctionSize =
+ is_detected<check_has_free_function_size, Range>::value;
+} // namespace detail
+
+/// Returns the size of the \p Range, i.e., the number of elements. This
+/// implementation takes inspiration from `std::ranges::size` from C++20 and
+/// delegates the size check to `adl_size` or `std::distance`, in this order of
+/// preference. Unlike `llvm::size`, this function does *not* guarantee O(1)
+/// running time, and is intended to be used in generic code that does not know
+/// the exact range type.
+template <typename R> constexpr size_t range_size(R &&Range) {
+ if constexpr (detail::HasFreeFunctionSize<R>)
+ return adl_size(Range);
+ else
+ return static_cast<size_t>(std::distance(adl_begin(Range), adl_end(Range)));
+}
+
/// Provide wrappers to std::for_each which take ranges instead of having to
/// pass begin/end explicitly.
template <typename R, typename UnaryFunction>
@@ -1863,18 +1865,49 @@ OutputIt move(R &&Range, OutputIt Out) {
return std::move(adl_begin(Range), adl_end(Range), Out);
}
-/// Wrapper function around std::find to detect if an element exists
-/// in a container.
+namespace detail {
+template <typename Range, typename Element>
+using check_has_member_contains_t =
+ decltype(std::declval<Range &>().contains(std::declval<const Element &>()));
+
+template <typename Range, typename Element>
+static constexpr bool HasMemberContains =
+ is_detected<check_has_member_contains_t, Range, Element>::value;
+
+template <typename Range, typename Element>
+using check_has_member_find_t =
+ decltype(std::declval<Range &>().find(std::declval<const Element &>()) !=
+ std::declval<Range &>().end());
+
+template <typename Range, typename Element>
+static constexpr bool HasMemberFind =
+ is_detected<check_has_member_find_t, Range, Element>::value;
+
+} // namespace detail
+
+/// Returns true if \p Element is found in \p Range. Delegates the check to
+/// either `.contains(Element)`, `.find(Element)`, or `std::find`, in this
+/// order of preference. This is intended as the canonical way to check if an
+/// element exists in a range in generic code or range type that does not
+/// expose a `.contains(Element)` member.
template <typename R, typename E>
bool is_contained(R &&Range, const E &Element) {
- return std::find(adl_begin(Range), adl_end(Range), Element) != adl_end(Range);
+ if constexpr (detail::HasMemberContains<R, E>)
+ return Range.contains(Element);
+ else if constexpr (detail::HasMemberFind<R, E>)
+ return Range.find(Element) != Range.end();
+ else
+ return std::find(adl_begin(Range), adl_end(Range), Element) !=
+ adl_end(Range);
}
-template <typename T>
-constexpr bool is_contained(std::initializer_list<T> Set, T Value) {
+/// Returns true iff \p Element exists in \p Set. This overload takes \p Set as
+/// an initializer list and is `constexpr`-friendly.
+template <typename T, typename E>
+constexpr bool is_contained(std::initializer_list<T> Set, const E &Element) {
// TODO: Use std::find when we switch to C++20.
- for (T V : Set)
- if (V == Value)
+ for (const T &V : Set)
+ if (V == Element)
return true;
return false;
}
@@ -2012,7 +2045,7 @@ void erase_value(Container &C, ValueType V) {
/// C.insert(C.end(), R.begin(), R.end());
template <typename Container, typename Range>
inline void append_range(Container &C, Range &&R) {
- C.insert(C.end(), R.begin(), R.end());
+ C.insert(C.end(), adl_begin(R), adl_end(R));
}
/// Given a sequence container Cont, replace the range [ContIt, ContEnd) with
@@ -2139,130 +2172,192 @@ template <typename T> struct deref {
namespace detail {
-template <typename R> class enumerator_iter;
+/// Tuple-like type for `zip_enumerator` dereference.
+template <typename... Refs> struct enumerator_result;
-template <typename R> struct result_pair {
- using value_reference =
- typename std::iterator_traits<IterOfRange<R>>::reference;
-
- friend class enumerator_iter<R>;
-
- result_pair() = default;
- result_pair(std::size_t Index, IterOfRange<R> Iter)
- : Index(Index), Iter(Iter) {}
+template <typename... Iters>
+using EnumeratorTupleType = enumerator_result<decltype(*declval<Iters>())...>;
+
+/// Zippy iterator that uses the second iterator for comparisons. For the
+/// increment to be safe, the second range has to be the shortest.
+/// Returns `enumerator_result` on dereference to provide `.index()` and
+/// `.value()` member functions.
+/// Note: Because the dereference operator returns `enumerator_result` as a
+/// value instead of a reference and does not strictly conform to the C++17's
+/// definition of forward iterator. However, it satisfies all the
+/// forward_iterator requirements that the `zip_common` and `zippy` depend on
+/// and fully conforms to the C++20 definition of forward iterator.
+/// This is similar to `std::vector<bool>::iterator` that returns bit reference
+/// wrappers on dereference.
+template <typename... Iters>
+struct zip_enumerator : zip_common<zip_enumerator<Iters...>,
+ EnumeratorTupleType<Iters...>, Iters...> {
+ static_assert(sizeof...(Iters) >= 2, "Expected at least two iteratees");
+ using zip_common<zip_enumerator<Iters...>, EnumeratorTupleType<Iters...>,
+ Iters...>::zip_common;
- result_pair(const result_pair<R> &Other)
- : Index(Other.Index), Iter(Other.Iter) {}
- result_pair &operator=(const result_pair &Other) {
- Index = Other.Index;
- Iter = Other.Iter;
- return *this;
+ bool operator==(const zip_enumerator &Other) const {
+ return std::get<1>(this->iterators) == std::get<1>(Other.iterators);
}
-
- std::size_t index() const { return Index; }
- value_reference value() const { return *Iter; }
-
-private:
- std::size_t Index = std::numeric_limits<std::size_t>::max();
- IterOfRange<R> Iter;
};
-template <std::size_t i, typename R>
-decltype(auto) get(const result_pair<R> &Pair) {
- static_assert(i < 2);
- if constexpr (i == 0) {
- return Pair.index();
- } else {
- return Pair.value();
- }
-}
-
-template <typename R>
-class enumerator_iter
- : public iterator_facade_base<enumerator_iter<R>, std::forward_iterator_tag,
- const result_pair<R>> {
- using result_type = result_pair<R>;
-
-public:
- explicit enumerator_iter(IterOfRange<R> EndIter)
- : Result(std::numeric_limits<size_t>::max(), EndIter) {}
-
- enumerator_iter(std::size_t Index, IterOfRange<R> Iter)
- : Result(Index, Iter) {}
-
- const result_type &operator*() const { return Result; }
-
- enumerator_iter &operator++() {
- assert(Result.Index != std::numeric_limits<size_t>::max());
- ++Result.Iter;
- ++Result.Index;
- return *this;
- }
-
- bool operator==(const enumerator_iter &RHS) const {
- // Don't compare indices here, only iterators. It's possible for an end
- // iterator to have different indices depending on whether it was created
- // by calling std::end() versus incrementing a valid iterator.
- return Result.Iter == RHS.Result.Iter;
- }
-
- enumerator_iter(const enumerator_iter &Other) : Result(Other.Result) {}
- enumerator_iter &operator=(const enumerator_iter &Other) {
- Result = Other.Result;
- return *this;
+template <typename... Refs> struct enumerator_result<std::size_t, Refs...> {
+ static constexpr std::size_t NumRefs = sizeof...(Refs);
+ static_assert(NumRefs != 0);
+ // `NumValues` includes the index.
+ static constexpr std::size_t NumValues = NumRefs + 1;
+
+ // Tuple type whose element types are references for each `Ref`.
+ using range_reference_tuple = std::tuple<Refs...>;
+ // Tuple type who elements are references to all values, including both
+ // the index and `Refs` reference types.
+ using value_reference_tuple = std::tuple<std::size_t, Refs...>;
+
+ enumerator_result(std::size_t Index, Refs &&...Rs)
+ : Idx(Index), Storage(std::forward<Refs>(Rs)...) {}
+
+ /// Returns the 0-based index of the current position within the original
+ /// input range(s).
+ std::size_t index() const { return Idx; }
+
+ /// Returns the value(s) for the current iterator. This does not include the
+ /// index.
+ decltype(auto) value() const {
+ if constexpr (NumRefs == 1)
+ return std::get<0>(Storage);
+ else
+ return Storage;
+ }
+
+ /// Returns the value at index `I`. This case covers the index.
+ template <std::size_t I, typename = std::enable_if_t<I == 0>>
+ friend std::size_t get(const enumerator_result &Result) {
+ return Result.Idx;
+ }
+
+ /// Returns the value at index `I`. This case covers references to the
+ /// iteratees.
+ template <std::size_t I, typename = std::enable_if_t<I != 0>>
+ friend decltype(auto) get(const enumerator_result &Result) {
+ // Note: This is a separate function from the other `get`, instead of an
+ // `if constexpr` case, to work around an MSVC 19.31.31XXX compiler
+ // (Visual Studio 2022 17.1) return type deduction bug.
+ return std::get<I - 1>(Result.Storage);
+ }
+
+ template <typename... Ts>
+ friend bool operator==(const enumerator_result &Result,
+ const std::tuple<std::size_t, Ts...> &Other) {
+ static_assert(NumRefs == sizeof...(Ts), "Size mismatch");
+ if (Result.Idx != std::get<0>(Other))
+ return false;
+ return Result.is_value_equal(Other, std::make_index_sequence<NumRefs>{});
}
private:
- result_type Result;
+ template <typename Tuple, std::size_t... Idx>
+ bool is_value_equal(const Tuple &Other, std::index_sequence<Idx...>) const {
+ return ((std::get<Idx>(Storage) == std::get<Idx + 1>(Other)) && ...);
+ }
+
+ std::size_t Idx;
+ // Make this tuple mutable to avoid casts that obfuscate const-correctness
+ // issues. Const-correctness of references is taken care of by `zippy` that
+ // defines const-non and const iterator types that will propagate down to
+ // `enumerator_result`'s `Refs`.
+ // Note that unlike the results of `zip*` functions, `enumerate`'s result are
+ // supposed to be modifiable even when defined as
+ // `const`.
+ mutable range_reference_tuple Storage;
};
-template <typename R> class enumerator {
-public:
- explicit enumerator(R &&Range) : TheRange(std::forward<R>(Range)) {}
+/// Infinite stream of increasing 0-based `size_t` indices.
+struct index_stream {
+ struct iterator : iterator_facade_base<iterator, std::forward_iterator_tag,
+ const iterator> {
+ iterator &operator++() {
+ assert(Index != std::numeric_limits<std::size_t>::max() &&
+ "Attempting to increment end iterator");
+ ++Index;
+ return *this;
+ }
- enumerator_iter<R> begin() {
- return enumerator_iter<R>(0, std::begin(TheRange));
- }
- enumerator_iter<R> begin() const {
- return enumerator_iter<R>(0, std::begin(TheRange));
- }
+ // Note: This dereference operator returns a value instead of a reference
+ // and does not strictly conform to the C++17's definition of forward
+ // iterator. However, it satisfies all the forward_iterator requirements
+ // that the `zip_common` depends on and fully conforms to the C++20
+ // definition of forward iterator.
+ std::size_t operator*() const { return Index; }
- enumerator_iter<R> end() {
- return enumerator_iter<R>(std::end(TheRange));
- }
- enumerator_iter<R> end() const {
- return enumerator_iter<R>(std::end(TheRange));
- }
+ friend bool operator==(const iterator &Lhs, const iterator &Rhs) {
+ return Lhs.Index == Rhs.Index;
+ }
-private:
- R TheRange;
+ std::size_t Index = 0;
+ };
+
+ iterator begin() const { return {}; }
+ iterator end() const {
+ // We approximate 'infinity' with the max size_t value, which should be good
+ // enough to index over any container.
+ iterator It;
+ It.Index = std::numeric_limits<std::size_t>::max();
+ return It;
+ }
};
} // end namespace detail
-/// Given an input range, returns a new range whose values are are pair (A,B)
-/// such that A is the 0-based index of the item in the sequence, and B is
-/// the value from the original sequence. Example:
+/// Given two or more input ranges, returns a new range whose values are are
+/// tuples (A, B, C, ...), such that A is the 0-based index of the item in the
+/// sequence, and B, C, ..., are the values from the original input ranges. All
+/// input ranges are required to have equal lengths. Note that the returned
+/// iterator allows for the values (B, C, ...) to be modified. Example:
+///
+/// ```c++
+/// std::vector<char> Letters = {'A', 'B', 'C', 'D'};
+/// std::vector<int> Vals = {10, 11, 12, 13};
///
-/// std::vector<char> Items = {'A', 'B', 'C', 'D'};
-/// for (auto X : enumerate(Items)) {
-/// printf("Item %d - %c\n", X.index(), X.value());
+/// for (auto [Index, Letter, Value] : enumerate(Letters, Vals)) {
+/// printf("Item %zu - %c: %d\n", Index, Letter, Value);
+/// Value -= 10;
/// }
+/// ```
///
-/// or using structured bindings:
+/// Output:
+/// Item 0 - A: 10
+/// Item 1 - B: 11
+/// Item 2 - C: 12
+/// Item 3 - D: 13
///
-/// for (auto [Index, Value] : enumerate(Items)) {
-/// printf("Item %d - %c\n", Index, Value);
+/// or using an iterator:
+/// ```c++
+/// for (auto it : enumerate(Vals)) {
+/// it.value() += 10;
+/// printf("Item %zu: %d\n", it.index(), it.value());
/// }
+/// ```
///
/// Output:
-/// Item 0 - A
-/// Item 1 - B
-/// Item 2 - C
-/// Item 3 - D
+/// Item 0: 20
+/// Item 1: 21
+/// Item 2: 22
+/// Item 3: 23
///
-template <typename R> detail::enumerator<R> enumerate(R &&TheRange) {
- return detail::enumerator<R>(std::forward<R>(TheRange));
+template <typename FirstRange, typename... RestRanges>
+auto enumerate(FirstRange &&First, RestRanges &&...Rest) {
+ if constexpr (sizeof...(Rest) != 0) {
+#ifndef NDEBUG
+ // Note: Create an array instead of an initializer list to work around an
+ // Apple clang 14 compiler bug.
+ size_t sizes[] = {range_size(First), range_size(Rest)...};
+ assert(all_equal(sizes) && "Ranges have different length");
+#endif
+ }
+ using enumerator = detail::zippy<detail::zip_enumerator, detail::index_stream,
+ FirstRange, RestRanges...>;
+ return enumerator(detail::index_stream{}, std::forward<FirstRange>(First),
+ std::forward<RestRanges>(Rest)...);
}
namespace detail {
@@ -2394,15 +2489,17 @@ template <class T> constexpr T *to_address(T *P) { return P; }
} // end namespace llvm
namespace std {
-template <typename R>
-struct tuple_size<llvm::detail::result_pair<R>>
- : std::integral_constant<std::size_t, 2> {};
+template <typename... Refs>
+struct tuple_size<llvm::detail::enumerator_result<Refs...>>
+ : std::integral_constant<std::size_t, sizeof...(Refs)> {};
-template <std::size_t i, typename R>
-struct tuple_element<i, llvm::detail::result_pair<R>>
- : std::conditional<i == 0, std::size_t,
- typename llvm::detail::result_pair<R>::value_reference> {
-};
+template <std::size_t I, typename... Refs>
+struct tuple_element<I, llvm::detail::enumerator_result<Refs...>>
+ : std::tuple_element<I, std::tuple<Refs...>> {};
+
+template <std::size_t I, typename... Refs>
+struct tuple_element<I, const llvm::detail::enumerator_result<Refs...>>
+ : std::tuple_element<I, std::tuple<Refs...>> {};
} // namespace std
diff --git a/llvm/include/llvm/ADT/STLFunctionalExtras.h b/llvm/include/llvm/ADT/STLFunctionalExtras.h
index 6d7e5b53f2a2..dd7fc6dc7486 100644
--- a/llvm/include/llvm/ADT/STLFunctionalExtras.h
+++ b/llvm/include/llvm/ADT/STLFunctionalExtras.h
@@ -17,9 +17,9 @@
#include "llvm/ADT/STLForwardCompat.h"
+#include <cstdint>
#include <type_traits>
#include <utility>
-#include <cstdint>
namespace llvm {
diff --git a/llvm/include/llvm/ADT/Sequence.h b/llvm/include/llvm/ADT/Sequence.h
index 1153352d8b24..ddda9a95a7bc 100644
--- a/llvm/include/llvm/ADT/Sequence.h
+++ b/llvm/include/llvm/ADT/Sequence.h
@@ -190,7 +190,7 @@ template <typename T, bool IsReverse> struct SafeIntIterator {
using value_type = T;
using difference_type = intmax_t;
using pointer = T *;
- using reference = T &;
+ using reference = value_type; // The iterator does not reference memory.
// Construct from T.
explicit SafeIntIterator(T Value) : SI(CheckedInt::from<T>(Value)) {}
@@ -198,9 +198,9 @@ template <typename T, bool IsReverse> struct SafeIntIterator {
SafeIntIterator(const SafeIntIterator<T, !IsReverse> &O) : SI(O.SI) {}
// Dereference
- value_type operator*() const { return SI.to<T>(); }
+ reference operator*() const { return SI.to<T>(); }
// Indexing
- value_type operator[](intmax_t Offset) const { return *(*this + Offset); }
+ reference operator[](intmax_t Offset) const { return *(*this + Offset); }
// Can be compared for equivalence using the equality/inequality operators.
bool operator==(const SafeIntIterator &O) const { return SI == O.SI; }
diff --git a/llvm/include/llvm/ADT/SetOperations.h b/llvm/include/llvm/ADT/SetOperations.h
index c9462f077dc8..52aced706893 100644
--- a/llvm/include/llvm/ADT/SetOperations.h
+++ b/llvm/include/llvm/ADT/SetOperations.h
@@ -45,6 +45,25 @@ void set_intersect(S1Ty &S1, const S2Ty &S2) {
}
}
+template <class S1Ty, class S2Ty>
+S1Ty set_intersection_impl(const S1Ty &S1, const S2Ty &S2) {
+ S1Ty Result;
+ for (typename S1Ty::const_iterator SI = S1.begin(), SE = S1.end(); SI != SE;
+ ++SI)
+ if (S2.count(*SI))
+ Result.insert(*SI);
+ return Result;
+}
+
+/// set_intersection(A, B) - Return A ^ B
+template <class S1Ty, class S2Ty>
+S1Ty set_intersection(const S1Ty &S1, const S2Ty &S2) {
+ if (S1.size() < S2.size())
+ return set_intersection_impl(S1, S2);
+ else
+ return set_intersection_impl(S2, S1);
+}
+
/// set_difference(A, B) - Return A - B
///
template <class S1Ty, class S2Ty>
@@ -66,6 +85,19 @@ void set_subtract(S1Ty &S1, const S2Ty &S2) {
S1.erase(*SI);
}
+/// set_subtract(A, B, C, D) - Compute A := A - B, set C to the elements of B
+/// removed from A (A ^ B), and D to the elements of B not found in and removed
+/// from A (B - A).
+template <class S1Ty, class S2Ty>
+void set_subtract(S1Ty &S1, const S2Ty &S2, S1Ty &Removed, S1Ty &Remaining) {
+ for (typename S2Ty::const_iterator SI = S2.begin(), SE = S2.end(); SI != SE;
+ ++SI)
+ if (S1.erase(*SI))
+ Removed.insert(*SI);
+ else
+ Remaining.insert(*SI);
+}
+
/// set_is_subset(A, B) - Return true iff A in B
///
template <class S1Ty, class S2Ty>
diff --git a/llvm/include/llvm/ADT/SetVector.h b/llvm/include/llvm/ADT/SetVector.h
index 37509e28f891..781ca367b97e 100644
--- a/llvm/include/llvm/ADT/SetVector.h
+++ b/llvm/include/llvm/ADT/SetVector.h
@@ -35,14 +35,35 @@ namespace llvm {
/// This adapter class provides a way to keep a set of things that also has the
/// property of a deterministic iteration order. The order of iteration is the
/// order of insertion.
+///
+/// The key and value types are derived from the Set and Vector types
+/// respectively. This allows the vector-type operations and set-type operations
+/// to have different types. In particular, this is useful when storing pointers
+/// as "Foo *" values but looking them up as "const Foo *" keys.
+///
+/// No constraint is placed on the key and value types, although it is assumed
+/// that value_type can be converted into key_type for insertion. Users must be
+/// aware of any loss of information in this conversion. For example, setting
+/// value_type to float and key_type to int can produce very surprising results,
+/// but it is not explicitly disallowed.
+///
+/// The parameter N specifies the "small" size of the container, which is the
+/// number of elements upto which a linear scan over the Vector will be used
+/// when searching for elements instead of checking Set, due to it being better
+/// for performance. A value of 0 means that this mode of operation is not used,
+/// and is the default value.
template <typename T, typename Vector = std::vector<T>,
- typename Set = DenseSet<T>>
+ typename Set = DenseSet<T>, unsigned N = 0>
class SetVector {
+ // Much like in SmallPtrSet, this value should not be too high to prevent
+ // excessively long linear scans from occuring.
+ static_assert(N <= 32, "Small size should be less than or equal to 32!");
+
public:
- using value_type = T;
- using key_type = T;
- using reference = T&;
- using const_reference = const T&;
+ using value_type = typename Vector::value_type;
+ using key_type = typename Set::key_type;
+ using reference = value_type &;
+ using const_reference = const value_type &;
using set_type = Set;
using vector_type = Vector;
using iterator = typename vector_type::const_iterator;
@@ -60,7 +81,7 @@ public:
insert(Start, End);
}
- ArrayRef<T> getArrayRef() const { return vector_; }
+ ArrayRef<value_type> getArrayRef() const { return vector_; }
/// Clear the SetVector and return the underlying vector.
Vector takeVector() {
@@ -119,13 +140,13 @@ public:
}
/// Return the first element of the SetVector.
- const T &front() const {
+ const value_type &front() const {
assert(!empty() && "Cannot call front() on empty SetVector!");
return vector_.front();
}
/// Return the last element of the SetVector.
- const T &back() const {
+ const value_type &back() const {
assert(!empty() && "Cannot call back() on empty SetVector!");
return vector_.back();
}
@@ -139,6 +160,17 @@ public:
/// Insert a new element into the SetVector.
/// \returns true if the element was inserted into the SetVector.
bool insert(const value_type &X) {
+ if constexpr (canBeSmall())
+ if (isSmall()) {
+ if (llvm::find(vector_, X) == vector_.end()) {
+ vector_.push_back(X);
+ if (vector_.size() > N)
+ makeBig();
+ return true;
+ }
+ return false;
+ }
+
bool result = set_.insert(X).second;
if (result)
vector_.push_back(X);
@@ -149,12 +181,21 @@ public:
template<typename It>
void insert(It Start, It End) {
for (; Start != End; ++Start)
- if (set_.insert(*Start).second)
- vector_.push_back(*Start);
+ insert(*Start);
}
/// Remove an item from the set vector.
bool remove(const value_type& X) {
+ if constexpr (canBeSmall())
+ if (isSmall()) {
+ typename vector_type::iterator I = find(vector_, X);
+ if (I != vector_.end()) {
+ vector_.erase(I);
+ return true;
+ }
+ return false;
+ }
+
if (set_.erase(X)) {
typename vector_type::iterator I = find(vector_, X);
assert(I != vector_.end() && "Corrupted SetVector instances!");
@@ -169,6 +210,10 @@ public:
/// element erased. This is the end of the SetVector if the last element is
/// erased.
iterator erase(const_iterator I) {
+ if constexpr (canBeSmall())
+ if (isSmall())
+ return vector_.erase(I);
+
const key_type &V = *I;
assert(set_.count(V) && "Corrupted SetVector instances!");
set_.erase(V);
@@ -190,8 +235,15 @@ public:
/// \returns true if any element is removed.
template <typename UnaryPredicate>
bool remove_if(UnaryPredicate P) {
- typename vector_type::iterator I =
- llvm::remove_if(vector_, TestAndEraseFromSet<UnaryPredicate>(P, set_));
+ typename vector_type::iterator I = [this, P] {
+ if constexpr (canBeSmall())
+ if (isSmall())
+ return llvm::remove_if(vector_, P);
+
+ return llvm::remove_if(vector_,
+ TestAndEraseFromSet<UnaryPredicate>(P, set_));
+ }();
+
if (I == vector_.end())
return false;
vector_.erase(I, vector_.end());
@@ -200,12 +252,20 @@ public:
/// Check if the SetVector contains the given key.
bool contains(const key_type &key) const {
+ if constexpr (canBeSmall())
+ if (isSmall())
+ return is_contained(vector_, key);
+
return set_.find(key) != set_.end();
}
/// Count the number of elements of a given key in the SetVector.
/// \returns 0 if the element is not in the SetVector, 1 if it is.
size_type count(const key_type &key) const {
+ if constexpr (canBeSmall())
+ if (isSmall())
+ return is_contained(vector_, key);
+
return set_.count(key);
}
@@ -222,8 +282,8 @@ public:
vector_.pop_back();
}
- [[nodiscard]] T pop_back_val() {
- T Ret = back();
+ [[nodiscard]] value_type pop_back_val() {
+ value_type Ret = back();
pop_back();
return Ret;
}
@@ -261,7 +321,7 @@ public:
remove(*SI);
}
- void swap(SetVector<T, Vector, Set> &RHS) {
+ void swap(SetVector<T, Vector, Set, N> &RHS) {
set_.swap(RHS.set_);
vector_.swap(RHS.vector_);
}
@@ -290,6 +350,16 @@ private:
}
};
+ [[nodiscard]] static constexpr bool canBeSmall() { return N != 0; }
+
+ [[nodiscard]] bool isSmall() const { return set_.empty(); }
+
+ void makeBig() {
+ if constexpr (canBeSmall())
+ for (const auto &entry : vector_)
+ set_.insert(entry);
+ }
+
set_type set_; ///< The set.
vector_type vector_; ///< The vector.
};
@@ -297,8 +367,7 @@ private:
/// A SetVector that performs no allocations if smaller than
/// a certain size.
template <typename T, unsigned N>
-class SmallSetVector
- : public SetVector<T, SmallVector<T, N>, SmallDenseSet<T, N>> {
+class SmallSetVector : public SetVector<T, SmallVector<T, N>, DenseSet<T>, N> {
public:
SmallSetVector() = default;
@@ -314,9 +383,9 @@ public:
namespace std {
/// Implement std::swap in terms of SetVector swap.
-template<typename T, typename V, typename S>
-inline void
-swap(llvm::SetVector<T, V, S> &LHS, llvm::SetVector<T, V, S> &RHS) {
+template <typename T, typename V, typename S, unsigned N>
+inline void swap(llvm::SetVector<T, V, S, N> &LHS,
+ llvm::SetVector<T, V, S, N> &RHS) {
LHS.swap(RHS);
}
diff --git a/llvm/include/llvm/ADT/SmallBitVector.h b/llvm/include/llvm/ADT/SmallBitVector.h
index f73db0ef53f6..c538a4d4023b 100644
--- a/llvm/include/llvm/ADT/SmallBitVector.h
+++ b/llvm/include/llvm/ADT/SmallBitVector.h
@@ -232,7 +232,7 @@ public:
uintptr_t Bits = getSmallBits();
if (Bits == 0)
return -1;
- return countTrailingZeros(Bits);
+ return llvm::countr_zero(Bits);
}
return getPointer()->find_first();
}
@@ -242,7 +242,7 @@ public:
uintptr_t Bits = getSmallBits();
if (Bits == 0)
return -1;
- return NumBaseBits - countLeadingZeros(Bits) - 1;
+ return NumBaseBits - llvm::countl_zero(Bits) - 1;
}
return getPointer()->find_last();
}
@@ -254,7 +254,7 @@ public:
return -1;
uintptr_t Bits = getSmallBits();
- return countTrailingOnes(Bits);
+ return llvm::countr_one(Bits);
}
return getPointer()->find_first_unset();
}
@@ -267,7 +267,7 @@ public:
uintptr_t Bits = getSmallBits();
// Set unused bits.
Bits |= ~uintptr_t(0) << getSmallSize();
- return NumBaseBits - countLeadingOnes(Bits) - 1;
+ return NumBaseBits - llvm::countl_one(Bits) - 1;
}
return getPointer()->find_last_unset();
}
@@ -281,7 +281,7 @@ public:
Bits &= ~uintptr_t(0) << (Prev + 1);
if (Bits == 0 || Prev + 1 >= getSmallSize())
return -1;
- return countTrailingZeros(Bits);
+ return llvm::countr_zero(Bits);
}
return getPointer()->find_next(Prev);
}
@@ -298,7 +298,7 @@ public:
if (Bits == ~uintptr_t(0) || Prev + 1 >= getSmallSize())
return -1;
- return countTrailingOnes(Bits);
+ return llvm::countr_one(Bits);
}
return getPointer()->find_next_unset(Prev);
}
@@ -316,7 +316,7 @@ public:
if (Bits == 0)
return -1;
- return NumBaseBits - countLeadingZeros(Bits) - 1;
+ return NumBaseBits - llvm::countl_zero(Bits) - 1;
}
return getPointer()->find_prev(PriorTo);
}
diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h
index 3d8191b3d162..4c064397057d 100644
--- a/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -264,8 +264,9 @@ protected:
/// SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet.
template <typename PtrTy>
-class SmallPtrSetIterator : public SmallPtrSetIteratorImpl,
- DebugEpochBase::HandleBase {
+class LLVM_DEBUGEPOCHBASE_HANDLEBASE_EMPTYBASE SmallPtrSetIterator
+ : public SmallPtrSetIteratorImpl,
+ DebugEpochBase::HandleBase {
using PtrTraits = PointerLikeTypeTraits<PtrTy>;
public:
diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h
index 5ac868d58314..a16e8ac6f075 100644
--- a/llvm/include/llvm/ADT/SmallSet.h
+++ b/llvm/include/llvm/ADT/SmallSet.h
@@ -149,7 +149,9 @@ class SmallSet {
static_assert(N <= 32, "N should be small");
public:
+ using key_type = T;
using size_type = size_t;
+ using value_type = T;
using const_iterator = SmallSetIterator<T, N, C>;
SmallSet() = default;
diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index 98dce891688d..93d94916745d 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -1206,7 +1206,12 @@ public:
this->destroy_range(this->begin(), this->end());
}
- explicit SmallVector(size_t Size, const T &Value = T())
+ explicit SmallVector(size_t Size)
+ : SmallVectorImpl<T>(N) {
+ this->resize(Size);
+ }
+
+ SmallVector(size_t Size, const T &Value)
: SmallVectorImpl<T>(N) {
this->assign(Size, Value);
}
diff --git a/llvm/include/llvm/ADT/SmallVectorExtras.h b/llvm/include/llvm/ADT/SmallVectorExtras.h
new file mode 100644
index 000000000000..d5159aa0e62f
--- /dev/null
+++ b/llvm/include/llvm/ADT/SmallVectorExtras.h
@@ -0,0 +1,36 @@
+//===- llvm/ADT/SmallVectorExtras.h -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines less commonly used SmallVector utilities.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SMALLVECTOREXTRAS_H
+#define LLVM_ADT_SMALLVECTOREXTRAS_H
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+/// Map a range to a SmallVector with element types deduced from the mapping.
+template <unsigned Size, class ContainerTy, class FuncTy>
+auto map_to_vector(ContainerTy &&C, FuncTy &&F) {
+ return to_vector<Size>(
+ map_range(std::forward<ContainerTy>(C), std::forward<FuncTy>(F)));
+}
+template <class ContainerTy, class FuncTy>
+auto map_to_vector(ContainerTy &&C, FuncTy &&F) {
+ return to_vector(
+ map_range(std::forward<ContainerTy>(C), std::forward<FuncTy>(F)));
+}
+
+} // namespace llvm
+
+#endif // LLVM_ADT_SMALLVECTOREXTRAS_H \ No newline at end of file
diff --git a/llvm/include/llvm/ADT/SparseBitVector.h b/llvm/include/llvm/ADT/SparseBitVector.h
index f398f3bb19ac..1e00c1386187 100644
--- a/llvm/include/llvm/ADT/SparseBitVector.h
+++ b/llvm/include/llvm/ADT/SparseBitVector.h
@@ -128,7 +128,7 @@ public:
int find_first() const {
for (unsigned i = 0; i < BITWORDS_PER_ELEMENT; ++i)
if (Bits[i] != 0)
- return i * BITWORD_SIZE + countTrailingZeros(Bits[i]);
+ return i * BITWORD_SIZE + llvm::countr_zero(Bits[i]);
llvm_unreachable("Illegal empty element");
}
@@ -138,7 +138,7 @@ public:
unsigned Idx = BITWORDS_PER_ELEMENT - I - 1;
if (Bits[Idx] != 0)
return Idx * BITWORD_SIZE + BITWORD_SIZE -
- countLeadingZeros(Bits[Idx]) - 1;
+ llvm::countl_zero(Bits[Idx]) - 1;
}
llvm_unreachable("Illegal empty element");
}
@@ -159,12 +159,12 @@ public:
Copy &= ~0UL << BitPos;
if (Copy != 0)
- return WordPos * BITWORD_SIZE + countTrailingZeros(Copy);
+ return WordPos * BITWORD_SIZE + llvm::countr_zero(Copy);
// Check subsequent words.
for (unsigned i = WordPos+1; i < BITWORDS_PER_ELEMENT; ++i)
if (Bits[i] != 0)
- return i * BITWORD_SIZE + countTrailingZeros(Bits[i]);
+ return i * BITWORD_SIZE + llvm::countr_zero(Bits[i]);
return -1;
}
diff --git a/llvm/include/llvm/ADT/SparseSet.h b/llvm/include/llvm/ADT/SparseSet.h
index c9895d747540..4a999e6b4c69 100644
--- a/llvm/include/llvm/ADT/SparseSet.h
+++ b/llvm/include/llvm/ADT/SparseSet.h
@@ -203,6 +203,7 @@ public:
///
iterator findIndex(unsigned Idx) {
assert(Idx < Universe && "Key out of range");
+ assert(Sparse != nullptr && "Invalid sparse type");
const unsigned Stride = std::numeric_limits<SparseT>::max() + 1u;
for (unsigned i = Sparse[Idx], e = size(); i < e; i += Stride) {
const unsigned FoundIdx = ValIndexOf(Dense[i]);
diff --git a/llvm/include/llvm/ADT/StringMap.h b/llvm/include/llvm/ADT/StringMap.h
index 0849bef53ba1..466f95254d10 100644
--- a/llvm/include/llvm/ADT/StringMap.h
+++ b/llvm/include/llvm/ADT/StringMap.h
@@ -107,8 +107,9 @@ public:
/// funky memory allocation and hashing things to make it extremely efficient,
/// storing the string data *after* the value in the map.
template <typename ValueTy, typename AllocatorTy = MallocAllocator>
-class StringMap : public StringMapImpl,
- private detail::AllocatorHolder<AllocatorTy> {
+class LLVM_ALLOCATORHOLDER_EMPTYBASE StringMap
+ : public StringMapImpl,
+ private detail::AllocatorHolder<AllocatorTy> {
using AllocTy = detail::AllocatorHolder<AllocatorTy>;
public:
@@ -231,18 +232,29 @@ public:
/// lookup - Return the entry for the specified key, or a default
/// constructed value if no such entry exists.
ValueTy lookup(StringRef Key) const {
- const_iterator it = find(Key);
- if (it != end())
- return it->second;
+ const_iterator Iter = find(Key);
+ if (Iter != end())
+ return Iter->second;
return ValueTy();
}
+ /// at - Return the entry for the specified key, or abort if no such
+ /// entry exists.
+ const ValueTy &at(StringRef Val) const {
+ auto Iter = this->find(std::move(Val));
+ assert(Iter != this->end() && "StringMap::at failed due to a missing key");
+ return Iter->second;
+ }
+
/// Lookup the ValueTy for the \p Key, or create a default constructed value
/// if the key is not in the map.
ValueTy &operator[](StringRef Key) { return try_emplace(Key).first->second; }
+ /// contains - Return true if the element is in the map, false otherwise.
+ bool contains(StringRef Key) const { return find(Key) != end(); }
+
/// count - Return 1 if the element is in the map, 0 otherwise.
- size_type count(StringRef Key) const { return find(Key) == end() ? 0 : 1; }
+ size_type count(StringRef Key) const { return contains(Key) ? 1 : 0; }
template <typename InputTy>
size_type count(const StringMapEntry<InputTy> &MapEntry) const {
diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 2d2f0bedfe1f..235a7b27c384 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -264,7 +264,10 @@ namespace llvm {
/// Check if this string starts with the given \p Prefix, ignoring case.
[[nodiscard]] bool starts_with_insensitive(StringRef Prefix) const;
- [[nodiscard]] bool startswith_insensitive(StringRef Prefix) const {
+ [[nodiscard]] LLVM_DEPRECATED(
+ "Use starts_with_insensitive instead",
+ "starts_with_insensitive") bool startswith_insensitive(StringRef Prefix)
+ const {
return starts_with_insensitive(Prefix);
}
@@ -280,7 +283,10 @@ namespace llvm {
/// Check if this string ends with the given \p Suffix, ignoring case.
[[nodiscard]] bool ends_with_insensitive(StringRef Suffix) const;
- [[nodiscard]] bool endswith_insensitive(StringRef Suffix) const {
+ [[nodiscard]] LLVM_DEPRECATED(
+ "Use ends_with_insensitive instead",
+ "ends_with_insensitive") bool endswith_insensitive(StringRef Suffix)
+ const {
return ends_with_insensitive(Suffix);
}
@@ -343,12 +349,11 @@ namespace llvm {
/// \returns The index of the last occurrence of \p C, or npos if not
/// found.
[[nodiscard]] size_t rfind(char C, size_t From = npos) const {
- From = std::min(From, Length);
- size_t i = From;
- while (i != 0) {
- --i;
- if (Data[i] == C)
- return i;
+ size_t I = std::min(From, Length);
+ while (I) {
+ --I;
+ if (Data[I] == C)
+ return I;
}
return npos;
}
@@ -449,8 +454,8 @@ namespace llvm {
/// Return the number of occurrences of \p C in the string.
[[nodiscard]] size_t count(char C) const {
size_t Count = 0;
- for (size_t i = 0, e = Length; i != e; ++i)
- if (Data[i] == C)
+ for (size_t I = 0; I != Length; ++I)
+ if (Data[I] == C)
++Count;
return Count;
}
@@ -524,6 +529,17 @@ namespace llvm {
/// string is well-formed in the given radix.
bool getAsInteger(unsigned Radix, APInt &Result) const;
+ /// Parse the current string as an integer of the specified \p Radix. If
+ /// \p Radix is specified as zero, this does radix autosensing using
+ /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
+ ///
+ /// If the string does not begin with a number of the specified radix,
+ /// this returns true to signify the error. The string is considered
+ /// erroneous if empty.
+ /// The portion of the string representing the discovered numeric value
+ /// is removed from the beginning of the string.
+ bool consumeInteger(unsigned Radix, APInt &Result);
+
/// Parse the current string as an IEEE double-precision floating
/// point value. The string must be a well-formed double.
///
@@ -624,17 +640,17 @@ namespace llvm {
if (!starts_with(Prefix))
return false;
- *this = drop_front(Prefix.size());
+ *this = substr(Prefix.size());
return true;
}
/// Returns true if this StringRef has the given prefix, ignoring case,
/// and removes that prefix.
bool consume_front_insensitive(StringRef Prefix) {
- if (!startswith_insensitive(Prefix))
+ if (!starts_with_insensitive(Prefix))
return false;
- *this = drop_front(Prefix.size());
+ *this = substr(Prefix.size());
return true;
}
@@ -644,17 +660,17 @@ namespace llvm {
if (!ends_with(Suffix))
return false;
- *this = drop_back(Suffix.size());
+ *this = substr(0, size() - Suffix.size());
return true;
}
/// Returns true if this StringRef has the given suffix, ignoring case,
/// and removes that suffix.
bool consume_back_insensitive(StringRef Suffix) {
- if (!endswith_insensitive(Suffix))
+ if (!ends_with_insensitive(Suffix))
return false;
- *this = drop_back(Suffix.size());
+ *this = substr(0, size() - Suffix.size());
return true;
}
@@ -671,7 +687,7 @@ namespace llvm {
/// be returned.
[[nodiscard]] StringRef slice(size_t Start, size_t End) const {
Start = std::min(Start, Length);
- End = std::min(std::max(Start, End), Length);
+ End = std::clamp(End, Start, Length);
return StringRef(Data + Start, End - Start);
}
diff --git a/llvm/include/llvm/ADT/StringSwitch.h b/llvm/include/llvm/ADT/StringSwitch.h
index 1ba7c8dfd80e..519f7c4f2125 100644
--- a/llvm/include/llvm/ADT/StringSwitch.h
+++ b/llvm/include/llvm/ADT/StringSwitch.h
@@ -147,14 +147,14 @@ public:
}
StringSwitch &EndsWithLower(StringLiteral S, T Value) {
- if (!Result && Str.endswith_insensitive(S))
+ if (!Result && Str.ends_with_insensitive(S))
Result = Value;
return *this;
}
StringSwitch &StartsWithLower(StringLiteral S, T Value) {
- if (!Result && Str.startswith_insensitive(S))
+ if (!Result && Str.starts_with_insensitive(S))
Result = std::move(Value);
return *this;
diff --git a/llvm/include/llvm/ADT/TinyPtrVector.h b/llvm/include/llvm/ADT/TinyPtrVector.h
index aa87fd66ac20..fa2bcd8933a0 100644
--- a/llvm/include/llvm/ADT/TinyPtrVector.h
+++ b/llvm/include/llvm/ADT/TinyPtrVector.h
@@ -43,12 +43,12 @@ public:
TinyPtrVector() = default;
~TinyPtrVector() {
- if (VecTy *V = Val.template dyn_cast<VecTy*>())
+ if (VecTy *V = dyn_cast_if_present<VecTy *>(Val))
delete V;
}
TinyPtrVector(const TinyPtrVector &RHS) : Val(RHS.Val) {
- if (VecTy *V = Val.template dyn_cast<VecTy*>())
+ if (VecTy *V = dyn_cast_if_present<VecTy *>(Val))
Val = new VecTy(*V);
}
@@ -62,20 +62,20 @@ public:
// Try to squeeze into the single slot. If it won't fit, allocate a copied
// vector.
- if (Val.template is<EltTy>()) {
+ if (isa<EltTy>(Val)) {
if (RHS.size() == 1)
Val = RHS.front();
else
- Val = new VecTy(*RHS.Val.template get<VecTy*>());
+ Val = new VecTy(*cast<VecTy *>(RHS.Val));
return *this;
}
// If we have a full vector allocated, try to re-use it.
- if (RHS.Val.template is<EltTy>()) {
- Val.template get<VecTy*>()->clear();
- Val.template get<VecTy*>()->push_back(RHS.front());
+ if (isa<EltTy>(RHS.Val)) {
+ cast<VecTy *>(Val)->clear();
+ cast<VecTy *>(Val)->push_back(RHS.front());
} else {
- *Val.template get<VecTy*>() = *RHS.Val.template get<VecTy*>();
+ *cast<VecTy *>(Val) = *cast<VecTy *>(RHS.Val);
}
return *this;
}
@@ -95,8 +95,8 @@ public:
// If this vector has been allocated on the heap, re-use it if cheap. If it
// would require more copying, just delete it and we'll steal the other
// side.
- if (VecTy *V = Val.template dyn_cast<VecTy*>()) {
- if (RHS.Val.template is<EltTy>()) {
+ if (VecTy *V = dyn_cast_if_present<VecTy *>(Val)) {
+ if (isa<EltTy>(RHS.Val)) {
V->clear();
V->push_back(RHS.front());
RHS.Val = EltTy();
@@ -136,18 +136,18 @@ public:
operator ArrayRef<EltTy>() const {
if (Val.isNull())
return std::nullopt;
- if (Val.template is<EltTy>())
+ if (isa<EltTy>(Val))
return *Val.getAddrOfPtr1();
- return *Val.template get<VecTy*>();
+ return *cast<VecTy *>(Val);
}
// implicit conversion operator to MutableArrayRef.
operator MutableArrayRef<EltTy>() {
if (Val.isNull())
return std::nullopt;
- if (Val.template is<EltTy>())
+ if (isa<EltTy>(Val))
return *Val.getAddrOfPtr1();
- return *Val.template get<VecTy*>();
+ return *cast<VecTy *>(Val);
}
// Implicit conversion to ArrayRef<U> if EltTy* implicitly converts to U*.
@@ -163,7 +163,7 @@ public:
// This vector can be empty if it contains no element, or if it
// contains a pointer to an empty vector.
if (Val.isNull()) return true;
- if (VecTy *Vec = Val.template dyn_cast<VecTy*>())
+ if (VecTy *Vec = dyn_cast_if_present<VecTy *>(Val))
return Vec->empty();
return false;
}
@@ -171,9 +171,9 @@ public:
unsigned size() const {
if (empty())
return 0;
- if (Val.template is<EltTy>())
+ if (isa<EltTy>(Val))
return 1;
- return Val.template get<VecTy*>()->size();
+ return cast<VecTy *>(Val)->size();
}
using iterator = EltTy *;
@@ -182,17 +182,17 @@ public:
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
iterator begin() {
- if (Val.template is<EltTy>())
+ if (isa<EltTy>(Val))
return Val.getAddrOfPtr1();
- return Val.template get<VecTy *>()->begin();
+ return cast<VecTy *>(Val)->begin();
}
iterator end() {
- if (Val.template is<EltTy>())
+ if (isa<EltTy>(Val))
return begin() + (Val.isNull() ? 0 : 1);
- return Val.template get<VecTy *>()->end();
+ return cast<VecTy *>(Val)->end();
}
const_iterator begin() const {
@@ -216,28 +216,27 @@ public:
EltTy operator[](unsigned i) const {
assert(!Val.isNull() && "can't index into an empty vector");
- if (Val.template is<EltTy>()) {
+ if (isa<EltTy>(Val)) {
assert(i == 0 && "tinyvector index out of range");
- return Val.template get<EltTy>();
+ return cast<EltTy>(Val);
}
- assert(i < Val.template get<VecTy*>()->size() &&
- "tinyvector index out of range");
- return (*Val.template get<VecTy*>())[i];
+ assert(i < cast<VecTy *>(Val)->size() && "tinyvector index out of range");
+ return (*cast<VecTy *>(Val))[i];
}
EltTy front() const {
assert(!empty() && "vector empty");
- if (Val.template is<EltTy>())
- return Val.template get<EltTy>();
- return Val.template get<VecTy*>()->front();
+ if (isa<EltTy>(Val))
+ return cast<EltTy>(Val);
+ return cast<VecTy *>(Val)->front();
}
EltTy back() const {
assert(!empty() && "vector empty");
- if (Val.template is<EltTy>())
- return Val.template get<EltTy>();
- return Val.template get<VecTy*>()->back();
+ if (isa<EltTy>(Val))
+ return cast<EltTy>(Val);
+ return cast<VecTy *>(Val)->back();
}
void push_back(EltTy NewVal) {
@@ -249,29 +248,29 @@ public:
}
// If we have a single value, convert to a vector.
- if (Val.template is<EltTy>()) {
- EltTy V = Val.template get<EltTy>();
+ if (isa<EltTy>(Val)) {
+ EltTy V = cast<EltTy>(Val);
Val = new VecTy();
- Val.template get<VecTy*>()->push_back(V);
+ cast<VecTy *>(Val)->push_back(V);
}
// Add the new value, we know we have a vector.
- Val.template get<VecTy*>()->push_back(NewVal);
+ cast<VecTy *>(Val)->push_back(NewVal);
}
void pop_back() {
// If we have a single value, convert to empty.
- if (Val.template is<EltTy>())
+ if (isa<EltTy>(Val))
Val = (EltTy)nullptr;
- else if (VecTy *Vec = Val.template get<VecTy*>())
+ else if (VecTy *Vec = cast<VecTy *>(Val))
Vec->pop_back();
}
void clear() {
// If we have a single value, convert to empty.
- if (Val.template is<EltTy>()) {
+ if (isa<EltTy>(Val)) {
Val = EltTy();
- } else if (VecTy *Vec = Val.template dyn_cast<VecTy*>()) {
+ } else if (VecTy *Vec = dyn_cast_if_present<VecTy *>(Val)) {
// If we have a vector form, just clear it.
Vec->clear();
}
@@ -283,10 +282,10 @@ public:
assert(I < end() && "Erasing at past-the-end iterator.");
// If we have a single value, convert to empty.
- if (Val.template is<EltTy>()) {
+ if (isa<EltTy>(Val)) {
if (I == begin())
Val = EltTy();
- } else if (VecTy *Vec = Val.template dyn_cast<VecTy*>()) {
+ } else if (VecTy *Vec = dyn_cast_if_present<VecTy *>(Val)) {
// multiple items in a vector; just do the erase, there is no
// benefit to collapsing back to a pointer
return Vec->erase(I);
@@ -299,10 +298,10 @@ public:
assert(S <= E && "Trying to erase invalid range.");
assert(E <= end() && "Trying to erase past the end.");
- if (Val.template is<EltTy>()) {
+ if (isa<EltTy>(Val)) {
if (S == begin() && S != E)
Val = EltTy();
- } else if (VecTy *Vec = Val.template dyn_cast<VecTy*>()) {
+ } else if (VecTy *Vec = dyn_cast_if_present<VecTy *>(Val)) {
return Vec->erase(S, E);
}
return end();
@@ -316,15 +315,15 @@ public:
return std::prev(end());
}
assert(!Val.isNull() && "Null value with non-end insert iterator.");
- if (Val.template is<EltTy>()) {
- EltTy V = Val.template get<EltTy>();
+ if (isa<EltTy>(Val)) {
+ EltTy V = cast<EltTy>(Val);
assert(I == begin());
Val = Elt;
push_back(V);
return begin();
}
- return Val.template get<VecTy*>()->insert(I, Elt);
+ return cast<VecTy *>(Val)->insert(I, Elt);
}
template<typename ItTy>
@@ -343,12 +342,12 @@ public:
}
Val = new VecTy();
- } else if (Val.template is<EltTy>()) {
- EltTy V = Val.template get<EltTy>();
+ } else if (isa<EltTy>(Val)) {
+ EltTy V = cast<EltTy>(Val);
Val = new VecTy();
- Val.template get<VecTy*>()->push_back(V);
+ cast<VecTy *>(Val)->push_back(V);
}
- return Val.template get<VecTy*>()->insert(begin() + Offset, From, To);
+ return cast<VecTy *>(Val)->insert(begin() + Offset, From, To);
}
};
diff --git a/llvm/include/llvm/ADT/Uniformity.h b/llvm/include/llvm/ADT/Uniformity.h
index adba1cd61832..21ca106b80be 100644
--- a/llvm/include/llvm/ADT/Uniformity.h
+++ b/llvm/include/llvm/ADT/Uniformity.h
@@ -5,9 +5,6 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
#ifndef LLVM_ADT_UNIFORMITY_H
#define LLVM_ADT_UNIFORMITY_H
diff --git a/llvm/include/llvm/ADT/bit.h b/llvm/include/llvm/ADT/bit.h
index d93023d88b4e..2840c5f608d3 100644
--- a/llvm/include/llvm/ADT/bit.h
+++ b/llvm/include/llvm/ADT/bit.h
@@ -297,7 +297,7 @@ template <typename T> [[nodiscard]] T bit_floor(T Value) {
}
/// Returns the smallest integral power of two no smaller than Value if Value is
-/// nonzero. Returns 0 otherwise.
+/// nonzero. Returns 1 otherwise.
///
/// Ex. bit_ceil(5) == 8.
///
@@ -350,6 +350,37 @@ template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>>
return detail::PopulationCounter<T, sizeof(T)>::count(Value);
}
+// Forward-declare rotr so that rotl can use it.
+template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>>
+[[nodiscard]] constexpr T rotr(T V, int R);
+
+template <typename T, typename = std::enable_if_t<std::is_unsigned_v<T>>>
+[[nodiscard]] constexpr T rotl(T V, int R) {
+ unsigned N = std::numeric_limits<T>::digits;
+
+ R = R % N;
+ if (!R)
+ return V;
+
+ if (R < 0)
+ return llvm::rotr(V, -R);
+
+ return (V << R) | (V >> (N - R));
+}
+
+template <typename T, typename> [[nodiscard]] constexpr T rotr(T V, int R) {
+ unsigned N = std::numeric_limits<T>::digits;
+
+ R = R % N;
+ if (!R)
+ return V;
+
+ if (R < 0)
+ return llvm::rotl(V, -R);
+
+ return (V >> R) | (V << (N - R));
+}
+
} // namespace llvm
#endif
diff --git a/llvm/include/llvm/ADT/edit_distance.h b/llvm/include/llvm/ADT/edit_distance.h
index 6df3db6125d4..00387836c97e 100644
--- a/llvm/include/llvm/ADT/edit_distance.h
+++ b/llvm/include/llvm/ADT/edit_distance.h
@@ -18,7 +18,6 @@
#include "llvm/ADT/ArrayRef.h"
#include <algorithm>
-#include <memory>
namespace llvm {
@@ -70,16 +69,8 @@ unsigned ComputeMappedEditDistance(ArrayRef<T> FromArray, ArrayRef<T> ToArray,
return MaxEditDistance + 1;
}
- const unsigned SmallBufferSize = 64;
- unsigned SmallBuffer[SmallBufferSize];
- std::unique_ptr<unsigned[]> Allocated;
- unsigned *Row = SmallBuffer;
- if (n + 1 > SmallBufferSize) {
- Row = new unsigned[n + 1];
- Allocated.reset(Row);
- }
-
- for (unsigned i = 1; i <= n; ++i)
+ SmallVector<unsigned, 64> Row(n + 1);
+ for (unsigned i = 1; i < Row.size(); ++i)
Row[i] = i;
for (typename ArrayRef<T>::size_type y = 1; y <= m; ++y) {
diff --git a/llvm/include/llvm/ADT/ilist.h b/llvm/include/llvm/ADT/ilist.h
index 9913b7cccbdd..aed19ccbff7f 100644
--- a/llvm/include/llvm/ADT/ilist.h
+++ b/llvm/include/llvm/ADT/ilist.h
@@ -92,63 +92,6 @@ struct ilist_traits : public ilist_node_traits<NodeTy> {};
/// Const traits should never be instantiated.
template <typename Ty> struct ilist_traits<const Ty> {};
-namespace ilist_detail {
-
-template <class T> T &make();
-
-/// Type trait to check for a traits class that has a getNext member (as a
-/// canary for any of the ilist_nextprev_traits API).
-template <class TraitsT, class NodeT> struct HasGetNext {
- typedef char Yes[1];
- typedef char No[2];
- template <size_t N> struct SFINAE {};
-
- template <class U>
- static Yes &test(U *I, decltype(I->getNext(&make<NodeT>())) * = nullptr);
- template <class> static No &test(...);
-
-public:
- static const bool value = sizeof(test<TraitsT>(nullptr)) == sizeof(Yes);
-};
-
-/// Type trait to check for a traits class that has a createSentinel member (as
-/// a canary for any of the ilist_sentinel_traits API).
-template <class TraitsT> struct HasCreateSentinel {
- typedef char Yes[1];
- typedef char No[2];
-
- template <class U>
- static Yes &test(U *I, decltype(I->createSentinel()) * = nullptr);
- template <class> static No &test(...);
-
-public:
- static const bool value = sizeof(test<TraitsT>(nullptr)) == sizeof(Yes);
-};
-
-/// Type trait to check for a traits class that has a createNode member.
-/// Allocation should be managed in a wrapper class, instead of in
-/// ilist_traits.
-template <class TraitsT, class NodeT> struct HasCreateNode {
- typedef char Yes[1];
- typedef char No[2];
- template <size_t N> struct SFINAE {};
-
- template <class U>
- static Yes &test(U *I, decltype(I->createNode(make<NodeT>())) * = 0);
- template <class> static No &test(...);
-
-public:
- static const bool value = sizeof(test<TraitsT>(nullptr)) == sizeof(Yes);
-};
-
-template <class TraitsT, class NodeT> struct HasObsoleteCustomization {
- static const bool value = HasGetNext<TraitsT, NodeT>::value ||
- HasCreateSentinel<TraitsT>::value ||
- HasCreateNode<TraitsT, NodeT>::value;
-};
-
-} // end namespace ilist_detail
-
//===----------------------------------------------------------------------===//
//
/// A wrapper around an intrusive list with callbacks and non-intrusive
@@ -182,13 +125,6 @@ public:
typename base_list_type::const_reverse_iterator const_reverse_iterator;
private:
- // TODO: Drop this assertion and the transitive type traits anytime after
- // v4.0 is branched (i.e,. keep them for one release to help out-of-tree code
- // update).
- static_assert(
- !ilist_detail::HasObsoleteCustomization<TraitsT, value_type>::value,
- "ilist customization points have changed!");
-
static bool op_less(const_reference L, const_reference R) { return L < R; }
static bool op_equal(const_reference L, const_reference R) { return L == R; }
diff --git a/llvm/include/llvm/ADT/iterator_range.h b/llvm/include/llvm/ADT/iterator_range.h
index a9b46a3aa45b..8c37455dc219 100644
--- a/llvm/include/llvm/ADT/iterator_range.h
+++ b/llvm/include/llvm/ADT/iterator_range.h
@@ -18,10 +18,22 @@
#ifndef LLVM_ADT_ITERATOR_RANGE_H
#define LLVM_ADT_ITERATOR_RANGE_H
+#include "llvm/ADT/ADL.h"
+#include <type_traits>
#include <utility>
namespace llvm {
+template <typename From, typename To, typename = void>
+struct explicitly_convertible : std::false_type {};
+
+template <typename From, typename To>
+struct explicitly_convertible<
+ From, To,
+ std::void_t<decltype(static_cast<To>(
+ std::declval<std::add_rvalue_reference_t<From>>()))>> : std::true_type {
+};
+
/// A range adaptor for a pair of iterators.
///
/// This just wraps two iterators into a range-compatible interface. Nothing
@@ -31,12 +43,19 @@ class iterator_range {
IteratorT begin_iterator, end_iterator;
public:
- //TODO: Add SFINAE to test that the Container's iterators match the range's
- // iterators.
+#if __GNUC__ == 7
+ // Be careful no to break gcc-7 on the mlir target.
+ // See https://github.com/llvm/llvm-project/issues/63843
template <typename Container>
+#else
+ template <typename Container,
+ std::enable_if_t<explicitly_convertible<
+ detail::IterOfRange<Container>, IteratorT>::value> * = nullptr>
+#endif
iterator_range(Container &&c)
- //TODO: Consider ADL/non-member begin/end calls.
- : begin_iterator(c.begin()), end_iterator(c.end()) {}
+ : begin_iterator(adl_begin(std::forward<Container>(c))),
+ end_iterator(adl_end(std::forward<Container>(c))) {
+ }
iterator_range(IteratorT begin_iterator, IteratorT end_iterator)
: begin_iterator(std::move(begin_iterator)),
end_iterator(std::move(end_iterator)) {}
@@ -46,6 +65,9 @@ public:
bool empty() const { return begin_iterator == end_iterator; }
};
+template <typename Container>
+iterator_range(Container &&) -> iterator_range<detail::IterOfRange<Container>>;
+
/// Convenience function for iterating over sub-ranges.
///
/// This provides a bit of syntactic sugar to make using sub-ranges
diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index 953e15e358f1..8da8d516499a 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -54,7 +54,6 @@ namespace llvm {
class AnalysisUsage;
class AtomicCmpXchgInst;
-class BasicAAResult;
class BasicBlock;
class CatchPadInst;
class CatchReturnInst;
@@ -116,6 +115,15 @@ public:
operator Kind() const { return static_cast<Kind>(Alias); }
+ bool operator==(const AliasResult &Other) const {
+ return Alias == Other.Alias && HasOffset == Other.HasOffset &&
+ Offset == Other.Offset;
+ }
+ bool operator!=(const AliasResult &Other) const { return !(*this == Other); }
+
+ bool operator==(Kind K) const { return Alias == K; }
+ bool operator!=(Kind K) const { return !(*this == K); }
+
constexpr bool hasOffset() const { return HasOffset; }
constexpr int32_t getOffset() const {
assert(HasOffset && "No offset!");
@@ -964,8 +972,6 @@ struct ExternalAAWrapperPass : ImmutablePass {
}
};
-FunctionPass *createAAResultsWrapperPass();
-
/// A wrapper pass around a callback which can be used to populate the
/// AAResults in the AAResultsWrapperPass from an external AA.
///
@@ -976,19 +982,6 @@ FunctionPass *createAAResultsWrapperPass();
ImmutablePass *createExternalAAWrapperPass(
std::function<void(Pass &, Function &, AAResults &)> Callback);
-/// A helper for the legacy pass manager to create a \c AAResults
-/// object populated to the best of our ability for a particular function when
-/// inside of a \c ModulePass or a \c CallGraphSCCPass.
-///
-/// If a \c ModulePass or a \c CallGraphSCCPass calls \p
-/// createLegacyPMAAResults, it also needs to call \p addUsedAAAnalyses in \p
-/// getAnalysisUsage.
-AAResults createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR);
-
-/// A helper for the legacy pass manager to populate \p AU to add uses to make
-/// sure the analyses required by \p createLegacyPMAAResults are available.
-void getAAResultsAnalysisUsage(AnalysisUsage &AU);
-
} // end namespace llvm
#endif // LLVM_ANALYSIS_ALIASANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/AssumeBundleQueries.h b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
index 785980130386..b3f499faa14e 100644
--- a/llvm/include/llvm/Analysis/AssumeBundleQueries.h
+++ b/llvm/include/llvm/Analysis/AssumeBundleQueries.h
@@ -1,4 +1,4 @@
-//===- AssumeBundleQueries.h - utilities to query assume bundles *- C++ -*-===//
+//===- AssumeBundleQueries.h - utilis to query assume bundles ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/include/llvm/Analysis/AssumptionCache.h b/llvm/include/llvm/Analysis/AssumptionCache.h
index 838426d71288..12dd9b04c932 100644
--- a/llvm/include/llvm/Analysis/AssumptionCache.h
+++ b/llvm/include/llvm/Analysis/AssumptionCache.h
@@ -26,7 +26,7 @@
namespace llvm {
-class CondGuardInst;
+class AssumeInst;
class Function;
class raw_ostream;
class TargetTransformInfo;
@@ -120,15 +120,15 @@ public:
///
/// The call passed in must be an instruction within this function and must
/// not already be in the cache.
- void registerAssumption(CondGuardInst *CI);
+ void registerAssumption(AssumeInst *CI);
/// Remove an \@llvm.assume intrinsic from this function's cache if it has
/// been added to the cache earlier.
- void unregisterAssumption(CondGuardInst *CI);
+ void unregisterAssumption(AssumeInst *CI);
/// Update the cache of values being affected by this assumption (i.e.
/// the values about which this assumption provides information).
- void updateAffectedValues(CondGuardInst *CI);
+ void updateAffectedValues(AssumeInst *CI);
/// Clear the cache of \@llvm.assume intrinsics for a function.
///
diff --git a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index a2735f039a01..ca67e0905c5f 100644
--- a/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -31,7 +31,6 @@ class GEPOperator;
class PHINode;
class SelectInst;
class TargetLibraryInfo;
-class PhiValues;
class Value;
/// This is the AA result object for the basic, local, and stateless alias
@@ -176,28 +175,6 @@ public:
FunctionPass *createBasicAAWrapperPass();
-/// A helper for the legacy pass manager to create a \c BasicAAResult object
-/// populated to the best of our ability for a particular function when inside
-/// of a \c ModulePass or a \c CallGraphSCCPass.
-BasicAAResult createLegacyPMBasicAAResult(Pass &P, Function &F);
-
-/// This class is a functor to be used in legacy module or SCC passes for
-/// computing AA results for a function. We store the results in fields so that
-/// they live long enough to be queried, but we re-use them each time.
-class LegacyAARGetter {
- Pass &P;
- std::optional<BasicAAResult> BAR;
- std::optional<AAResults> AAR;
-
-public:
- LegacyAARGetter(Pass &P) : P(P) {}
- AAResults &operator()(Function &F) {
- BAR.emplace(createLegacyPMBasicAAResult(P, F));
- AAR.emplace(createLegacyPMAAResults(P, F, *BAR));
- return *AAR;
- }
-};
-
} // end namespace llvm
#endif // LLVM_ANALYSIS_BASICALIASANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 2186ace5a942..9d96748874a0 100644
--- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -25,6 +25,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
@@ -311,14 +312,14 @@ public:
/// the context of distributing mass, L will be the number of loop headers
/// an early exit edge jumps out of.
BlockNode getResolvedNode() const {
- auto L = getPackagedLoop();
+ auto *L = getPackagedLoop();
return L ? L->getHeader() : Node;
}
LoopData *getPackagedLoop() const {
if (!Loop || !Loop->IsPackaged)
return nullptr;
- auto L = Loop;
+ auto *L = Loop;
while (L->Parent && L->Parent->IsPackaged)
L = L->Parent;
return L;
@@ -1655,7 +1656,7 @@ template <class BT> struct BlockEdgesAdder {
void operator()(IrreducibleGraph &G, IrreducibleGraph::IrrNode &Irr,
const LoopData *OuterLoop) {
const BlockT *BB = BFI.RPOT[Irr.Node.Index];
- for (const auto Succ : children<const BlockT *>(BB))
+ for (const auto *Succ : children<const BlockT *>(BB))
G.addEdge(Irr, BFI.getNode(Succ), OuterLoop);
}
};
diff --git a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
index 14d3080b5053..fb02997371bf 100644
--- a/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -189,6 +189,9 @@ public:
/// unset for source.
void copyEdgeProbabilities(BasicBlock *Src, BasicBlock *Dst);
+ /// Swap outgoing edges probabilities for \p Src with branch terminator
+ void swapSuccEdgesProbabilities(const BasicBlock *Src);
+
static BranchProbability getBranchProbStackProtector(bool IsLikely) {
static const BranchProbability LikelyProb((1u << 20) - 1, 1u << 20);
return IsLikely ? LikelyProb : LikelyProb.getCompl();
diff --git a/llvm/include/llvm/Analysis/CFGPrinter.h b/llvm/include/llvm/Analysis/CFGPrinter.h
index eeac11bc7af1..8c4c0c3f182f 100644
--- a/llvm/include/llvm/Analysis/CFGPrinter.h
+++ b/llvm/include/llvm/Analysis/CFGPrinter.h
@@ -35,21 +35,25 @@ template <class GraphType> struct GraphTraits;
class CFGViewerPass : public PassInfoMixin<CFGViewerPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
class CFGOnlyViewerPass : public PassInfoMixin<CFGOnlyViewerPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
class CFGPrinterPass : public PassInfoMixin<CFGPrinterPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
class CFGOnlyPrinterPass : public PassInfoMixin<CFGOnlyPrinterPass> {
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ static bool isRequired() { return true; }
};
class DOTFuncInfo {
diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h
index 9d1b331346b6..0264b36a1d12 100644
--- a/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -159,7 +159,7 @@ struct RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, CGSCCAnalysisManager,
function_ref<StringRef(StringRef)> MapClassName2PassName) {
auto ClassName = AnalysisT::name();
auto PassName = MapClassName2PassName(ClassName);
- OS << "require<" << PassName << ">";
+ OS << "require<" << PassName << '>';
}
};
@@ -357,7 +357,7 @@ public:
function_ref<StringRef(StringRef)> MapClassName2PassName) {
OS << "cgscc(";
Pass->printPipeline(OS, MapClassName2PassName);
- OS << ")";
+ OS << ')';
}
static bool isRequired() { return true; }
@@ -487,11 +487,19 @@ public:
void printPipeline(raw_ostream &OS,
function_ref<StringRef(StringRef)> MapClassName2PassName) {
OS << "function";
- if (EagerlyInvalidate)
- OS << "<eager-inv>";
- OS << "(";
+ if (EagerlyInvalidate || NoRerun) {
+ OS << "<";
+ if (EagerlyInvalidate)
+ OS << "eager-inv";
+ if (EagerlyInvalidate && NoRerun)
+ OS << ";";
+ if (NoRerun)
+ OS << "no-rerun";
+ OS << ">";
+ }
+ OS << '(';
Pass->printPipeline(OS, MapClassName2PassName);
- OS << ")";
+ OS << ')';
}
static bool isRequired() { return true; }
@@ -567,7 +575,7 @@ public:
function_ref<StringRef(StringRef)> MapClassName2PassName) {
OS << "devirt<" << MaxIterations << ">(";
Pass->printPipeline(OS, MapClassName2PassName);
- OS << ")";
+ OS << ')';
}
private:
diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h
index 23ec7d6b70ec..169a1d0c48e6 100644
--- a/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -68,28 +68,26 @@ Constant *ConstantFoldInstOperands(Instruction *I, ArrayRef<Constant *> Ops,
const TargetLibraryInfo *TLI = nullptr);
/// Attempt to constant fold a compare instruction (icmp/fcmp) with the
-/// specified operands. If it fails, it returns a constant expression of the
-/// specified operands.
+/// specified operands. Returns null or a constant expression of the specified
+/// operands on failure.
/// Denormal inputs may be flushed based on the denormal handling mode.
Constant *ConstantFoldCompareInstOperands(
unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL,
const TargetLibraryInfo *TLI = nullptr, const Instruction *I = nullptr);
-/// Attempt to constant fold a unary operation with the specified
-/// operand. If it fails, it returns a constant expression of the specified
-/// operands.
+/// Attempt to constant fold a unary operation with the specified operand.
+/// Returns null on failure.
Constant *ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,
const DataLayout &DL);
-/// Attempt to constant fold a binary operation with the specified
-/// operands. If it fails, it returns a constant expression of the specified
-/// operands.
+/// Attempt to constant fold a binary operation with the specified operands.
+/// Returns null or a constant expression of the specified operands on failure.
Constant *ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
Constant *RHS, const DataLayout &DL);
/// Attempt to constant fold a floating point binary operation with the
-/// specified operands, applying the denormal handling mod to the operands. If
-/// it fails, it returns a constant expression of the specified operands.
+/// specified operands, applying the denormal handling mod to the operands.
+/// Returns null or a constant expression of the specified operands on failure.
Constant *ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
Constant *RHS, const DataLayout &DL,
const Instruction *I);
@@ -99,6 +97,9 @@ Constant *ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
/// correct sign, otherwise return the original constant. Inputs and outputs to
/// floating point instructions can have their mode set separately, so the
/// direction is also needed.
+///
+/// If the calling function's "denormal-fp-math" input mode is "dynamic" for the
+/// floating-point type, returns nullptr for denormal inputs.
Constant *FlushFPConstant(Constant *Operand, const Instruction *I,
bool IsOutput);
diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h
index 719fe339cf78..5d3bc64bf8b4 100644
--- a/llvm/include/llvm/Analysis/ConstraintSystem.h
+++ b/llvm/include/llvm/Analysis/ConstraintSystem.h
@@ -11,17 +11,48 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/MathExtras.h"
#include <string>
namespace llvm {
+class Value;
class ConstraintSystem {
+ struct Entry {
+ int64_t Coefficient;
+ uint16_t Id;
+
+ Entry(int64_t Coefficient, uint16_t Id)
+ : Coefficient(Coefficient), Id(Id) {}
+ };
+
+ static int64_t getConstPart(const Entry &E) {
+ if (E.Id == 0)
+ return E.Coefficient;
+ return 0;
+ }
+
+ static int64_t getLastCoefficient(ArrayRef<Entry> Row, uint16_t Id) {
+ if (Row.empty())
+ return 0;
+ if (Row.back().Id == Id)
+ return Row.back().Coefficient;
+ return 0;
+ }
+
+ size_t NumVariables = 0;
+
/// Current linear constraints in the system.
/// An entry of the form c0, c1, ... cn represents the following constraint:
/// c0 >= v0 * c1 + .... + v{n-1} * cn
- SmallVector<SmallVector<int64_t, 8>, 4> Constraints;
+ SmallVector<SmallVector<Entry, 8>, 4> Constraints;
+
+ /// A map of variables (IR values) to their corresponding index in the
+ /// constraint system.
+ DenseMap<Value *, unsigned> Value2Index;
/// Current greatest common divisor for all coefficients in the system.
uint32_t GCD = 1;
@@ -29,39 +60,58 @@ class ConstraintSystem {
// Eliminate constraints from the system using Fourier–Motzkin elimination.
bool eliminateUsingFM();
- /// Print the constraints in the system, using x0...xn as variable names.
- void dump() const;
-
/// Returns true if there may be a solution for the constraints in the system.
bool mayHaveSolutionImpl();
+ /// Get list of variable names from the Value2Index map.
+ SmallVector<std::string> getVarNamesList() const;
+
public:
+ ConstraintSystem() {}
+ ConstraintSystem(ArrayRef<Value *> FunctionArgs) {
+ NumVariables += FunctionArgs.size();
+ for (auto *Arg : FunctionArgs) {
+ Value2Index.insert({Arg, Value2Index.size() + 1});
+ }
+ }
+ ConstraintSystem(const DenseMap<Value *, unsigned> &Value2Index)
+ : NumVariables(Value2Index.size()), Value2Index(Value2Index) {}
+
bool addVariableRow(ArrayRef<int64_t> R) {
- assert(Constraints.empty() || R.size() == Constraints.back().size());
+ assert(Constraints.empty() || R.size() == NumVariables);
// If all variable coefficients are 0, the constraint does not provide any
// usable information.
if (all_of(ArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; }))
return false;
- for (const auto &C : R) {
+ SmallVector<Entry, 4> NewRow;
+ for (const auto &[Idx, C] : enumerate(R)) {
+ if (C == 0)
+ continue;
auto A = std::abs(C);
GCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)A}, {32, GCD})
.getZExtValue();
+
+ NewRow.emplace_back(C, Idx);
}
- Constraints.emplace_back(R.begin(), R.end());
+ if (Constraints.empty())
+ NumVariables = R.size();
+ Constraints.push_back(std::move(NewRow));
return true;
}
+ DenseMap<Value *, unsigned> &getValue2Index() { return Value2Index; }
+ const DenseMap<Value *, unsigned> &getValue2Index() const {
+ return Value2Index;
+ }
+
bool addVariableRowFill(ArrayRef<int64_t> R) {
// If all variable coefficients are 0, the constraint does not provide any
// usable information.
if (all_of(ArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; }))
return false;
- for (auto &CR : Constraints) {
- while (CR.size() != R.size())
- CR.push_back(0);
- }
+ NumVariables = std::max(R.size(), NumVariables);
return addVariableRow(R);
}
@@ -72,27 +122,54 @@ public:
// The negated constraint R is obtained by multiplying by -1 and adding 1 to
// the constant.
R[0] += 1;
+ return negateOrEqual(R);
+ }
+
+ /// Multiplies each coefficient in the given vector by -1. Does not modify the
+ /// original vector.
+ ///
+ /// \param R The vector of coefficients to be negated.
+ static SmallVector<int64_t, 8> negateOrEqual(SmallVector<int64_t, 8> R) {
+ // The negated constraint R is obtained by multiplying by -1.
for (auto &C : R)
- C *= -1;
+ if (MulOverflow(C, int64_t(-1), C))
+ return {};
+ return R;
+ }
+
+ /// Converts the given vector to form a strict less than inequality. Does not
+ /// modify the original vector.
+ ///
+ /// \param R The vector of coefficients to be converted.
+ static SmallVector<int64_t, 8> toStrictLessThan(SmallVector<int64_t, 8> R) {
+ // The strict less than is obtained by subtracting 1 from the constant.
+ if (SubOverflow(R[0], int64_t(1), R[0])) {
+ return {};
+ }
return R;
}
bool isConditionImplied(SmallVector<int64_t, 8> R) const;
- ArrayRef<int64_t> getLastConstraint() { return Constraints[0]; }
+ SmallVector<int64_t> getLastConstraint() const {
+ assert(!Constraints.empty() && "Constraint system is empty");
+ SmallVector<int64_t> Result(NumVariables, 0);
+ for (auto &Entry : Constraints.back())
+ Result[Entry.Id] = Entry.Coefficient;
+ return Result;
+ }
+
void popLastConstraint() { Constraints.pop_back(); }
void popLastNVariables(unsigned N) {
- for (auto &C : Constraints) {
- for (unsigned i = 0; i < N; i++)
- C.pop_back();
- }
+ assert(NumVariables > N);
+ NumVariables -= N;
}
/// Returns the number of rows in the constraint system.
unsigned size() const { return Constraints.size(); }
- /// Print the constraints in the system, using \p Names as variable names.
- void dump(ArrayRef<std::string> Names) const;
+ /// Print the constraints in the system.
+ void dump() const;
};
} // namespace llvm
diff --git a/llvm/include/llvm/Analysis/CycleAnalysis.h b/llvm/include/llvm/Analysis/CycleAnalysis.h
index 30bf6856b69b..f9f5e5b95b1d 100644
--- a/llvm/include/llvm/Analysis/CycleAnalysis.h
+++ b/llvm/include/llvm/Analysis/CycleAnalysis.h
@@ -15,7 +15,7 @@
#ifndef LLVM_ANALYSIS_CYCLEANALYSIS_H
#define LLVM_ANALYSIS_CYCLEANALYSIS_H
-#include "llvm/ADT/GenericCycleInfo.h"
+#include "llvm/IR/CycleInfo.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/SSAContext.h"
#include "llvm/Pass.h"
diff --git a/llvm/include/llvm/Analysis/DemandedBits.h b/llvm/include/llvm/Analysis/DemandedBits.h
index 1a2e7238f86c..d7709b742378 100644
--- a/llvm/include/llvm/Analysis/DemandedBits.h
+++ b/llvm/include/llvm/Analysis/DemandedBits.h
@@ -25,7 +25,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
#include <optional>
namespace llvm {
@@ -99,26 +98,6 @@ private:
SmallPtrSet<Use *, 16> DeadUses;
};
-class DemandedBitsWrapperPass : public FunctionPass {
-private:
- mutable std::optional<DemandedBits> DB;
-
-public:
- static char ID; // Pass identification, replacement for typeid
-
- DemandedBitsWrapperPass();
-
- bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- /// Clean up memory in between runs
- void releaseMemory() override;
-
- DemandedBits &getDemandedBits() { return *DB; }
-
- void print(raw_ostream &OS, const Module *M) const override;
-};
-
/// An analysis that produces \c DemandedBits for a function.
class DemandedBitsAnalysis : public AnalysisInfoMixin<DemandedBitsAnalysis> {
friend AnalysisInfoMixin<DemandedBitsAnalysis>;
@@ -144,9 +123,6 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
-/// Create a demanded bits analysis pass.
-FunctionPass *createDemandedBitsWrapperPass();
-
} // end namespace llvm
#endif // LLVM_ANALYSIS_DEMANDEDBITS_H
diff --git a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
index e0dbdcdaa749..f490f20e7c19 100644
--- a/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
+++ b/llvm/include/llvm/Analysis/DependenceGraphBuilder.h
@@ -156,7 +156,7 @@ protected:
/// Given an instruction \p I return its associated ordinal number.
size_t getOrdinal(Instruction &I) {
- assert(InstOrdinalMap.find(&I) != InstOrdinalMap.end() &&
+ assert(InstOrdinalMap.contains(&I) &&
"No ordinal computed for this instruction.");
return InstOrdinalMap[&I];
}
diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
deleted file mode 100644
index 4c2a5399ea54..000000000000
--- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h
+++ /dev/null
@@ -1,210 +0,0 @@
-//===- llvm/Analysis/DivergenceAnalysis.h - Divergence Analysis -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// \file
-// The divergence analysis determines which instructions and branches are
-// divergent given a set of divergent source instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
-#define LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
-
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/Analysis/SyncDependenceAnalysis.h"
-#include "llvm/IR/PassManager.h"
-#include <vector>
-
-namespace llvm {
-class Function;
-class Instruction;
-class Loop;
-class raw_ostream;
-class TargetTransformInfo;
-class Value;
-
-/// \brief Generic divergence analysis for reducible CFGs.
-///
-/// This analysis propagates divergence in a data-parallel context from sources
-/// of divergence to all users. It requires reducible CFGs. All assignments
-/// should be in SSA form.
-class DivergenceAnalysisImpl {
-public:
- /// \brief This instance will analyze the whole function \p F or the loop \p
- /// RegionLoop.
- ///
- /// \param RegionLoop if non-null the analysis is restricted to \p RegionLoop.
- /// Otherwise the whole function is analyzed.
- /// \param IsLCSSAForm whether the analysis may assume that the IR in the
- /// region in LCSSA form.
- DivergenceAnalysisImpl(const Function &F, const Loop *RegionLoop,
- const DominatorTree &DT, const LoopInfo &LI,
- SyncDependenceAnalysis &SDA, bool IsLCSSAForm);
-
- /// \brief The loop that defines the analyzed region (if any).
- const Loop *getRegionLoop() const { return RegionLoop; }
- const Function &getFunction() const { return F; }
-
- /// \brief Whether \p BB is part of the region.
- bool inRegion(const BasicBlock &BB) const;
- /// \brief Whether \p I is part of the region.
- bool inRegion(const Instruction &I) const;
-
- /// \brief Mark \p UniVal as a value that is always uniform.
- void addUniformOverride(const Value &UniVal);
-
- /// \brief Mark \p DivVal as a value that is always divergent. Will not do so
- /// if `isAlwaysUniform(DivVal)`.
- /// \returns Whether the tracked divergence state of \p DivVal changed.
- bool markDivergent(const Value &DivVal);
-
- /// \brief Propagate divergence to all instructions in the region.
- /// Divergence is seeded by calls to \p markDivergent.
- void compute();
-
- /// \brief Whether any value was marked or analyzed to be divergent.
- bool hasDetectedDivergence() const { return !DivergentValues.empty(); }
-
- /// \brief Whether \p Val will always return a uniform value regardless of its
- /// operands
- bool isAlwaysUniform(const Value &Val) const;
-
- /// \brief Whether \p Val is divergent at its definition.
- bool isDivergent(const Value &Val) const;
-
- /// \brief Whether \p U is divergent. Uses of a uniform value can be
- /// divergent.
- bool isDivergentUse(const Use &U) const;
-
-private:
- /// \brief Mark \p Term as divergent and push all Instructions that become
- /// divergent as a result on the worklist.
- void analyzeControlDivergence(const Instruction &Term);
- /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
- /// the worklist.
- void taintAndPushPhiNodes(const BasicBlock &JoinBlock);
-
- /// \brief Identify all Instructions that become divergent because \p DivExit
- /// is a divergent loop exit of \p DivLoop. Mark those instructions as
- /// divergent and push them on the worklist.
- void propagateLoopExitDivergence(const BasicBlock &DivExit,
- const Loop &DivLoop);
-
- /// \brief Internal implementation function for propagateLoopExitDivergence.
- void analyzeLoopExitDivergence(const BasicBlock &DivExit,
- const Loop &OuterDivLoop);
-
- /// \brief Mark all instruction as divergent that use a value defined in \p
- /// OuterDivLoop. Push their users on the worklist.
- void analyzeTemporalDivergence(const Instruction &I,
- const Loop &OuterDivLoop);
-
- /// \brief Push all users of \p Val (in the region) to the worklist.
- void pushUsers(const Value &I);
-
- /// \brief Whether \p Val is divergent when read in \p ObservingBlock.
- bool isTemporalDivergent(const BasicBlock &ObservingBlock,
- const Value &Val) const;
-
-private:
- const Function &F;
- // If regionLoop != nullptr, analysis is only performed within \p RegionLoop.
- // Otherwise, analyze the whole function
- const Loop *RegionLoop;
-
- const DominatorTree &DT;
- const LoopInfo &LI;
-
- // Recognized divergent loops
- DenseSet<const Loop *> DivergentLoops;
-
- // The SDA links divergent branches to divergent control-flow joins.
- SyncDependenceAnalysis &SDA;
-
- // Use simplified code path for LCSSA form.
- bool IsLCSSAForm;
-
- // Set of known-uniform values.
- DenseSet<const Value *> UniformOverrides;
-
- // Detected/marked divergent values.
- DenseSet<const Value *> DivergentValues;
-
- // Internal worklist for divergence propagation.
- std::vector<const Instruction *> Worklist;
-};
-
-class DivergenceInfo {
- Function &F;
-
- // If the function contains an irreducible region the divergence
- // analysis can run indefinitely. We set ContainsIrreducible and no
- // analysis is actually performed on the function. All values in
- // this function are conservatively reported as divergent instead.
- bool ContainsIrreducible = false;
- std::unique_ptr<SyncDependenceAnalysis> SDA;
- std::unique_ptr<DivergenceAnalysisImpl> DA;
-
-public:
- DivergenceInfo(Function &F, const DominatorTree &DT,
- const PostDominatorTree &PDT, const LoopInfo &LI,
- const TargetTransformInfo &TTI, bool KnownReducible);
-
- /// Whether any divergence was detected.
- bool hasDivergence() const {
- return ContainsIrreducible || DA->hasDetectedDivergence();
- }
-
- /// The GPU kernel this analysis result is for
- const Function &getFunction() const { return F; }
-
- /// Whether \p V is divergent at its definition.
- bool isDivergent(const Value &V) const {
- return ContainsIrreducible || DA->isDivergent(V);
- }
-
- /// Whether \p U is divergent. Uses of a uniform value can be divergent.
- bool isDivergentUse(const Use &U) const {
- return ContainsIrreducible || DA->isDivergentUse(U);
- }
-
- /// Whether \p V is uniform/non-divergent.
- bool isUniform(const Value &V) const { return !isDivergent(V); }
-
- /// Whether \p U is uniform/non-divergent. Uses of a uniform value can be
- /// divergent.
- bool isUniformUse(const Use &U) const { return !isDivergentUse(U); }
-};
-
-/// \brief Divergence analysis frontend for GPU kernels.
-class DivergenceAnalysis : public AnalysisInfoMixin<DivergenceAnalysis> {
- friend AnalysisInfoMixin<DivergenceAnalysis>;
-
- static AnalysisKey Key;
-
-public:
- using Result = DivergenceInfo;
-
- /// Runs the divergence analysis on @F, a GPU kernel
- Result run(Function &F, FunctionAnalysisManager &AM);
-};
-
-/// Printer pass to dump divergence analysis results.
-struct DivergenceAnalysisPrinterPass
- : public PassInfoMixin<DivergenceAnalysisPrinterPass> {
- DivergenceAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
-
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
-
-private:
- raw_ostream &OS;
-}; // class DivergenceAnalysisPrinterPass
-
-} // namespace llvm
-
-#endif // LLVM_ANALYSIS_DIVERGENCEANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/EHUtils.h b/llvm/include/llvm/Analysis/EHUtils.h
new file mode 100644
index 000000000000..728ab53c89bc
--- /dev/null
+++ b/llvm/include/llvm/Analysis/EHUtils.h
@@ -0,0 +1,90 @@
+//===-- Analysis/EHUtils.h - Exception handling related utils --*-//C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+
+#ifndef LLVM_ANALYSIS_EHUTILS_H
+#define LLVM_ANALYSIS_EHUTILS_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+
+namespace llvm {
+
+/// Compute a list of blocks that are only reachable via EH paths.
+template <typename FunctionT, typename BlockT>
+static void computeEHOnlyBlocks(FunctionT &F, DenseSet<BlockT *> &EHBlocks) {
+ // A block can be unknown if its not reachable from anywhere
+ // EH if its only reachable from start blocks via some path through EH pads
+ // NonEH if it's reachable from Non EH blocks as well.
+ enum Status { Unknown = 0, EH = 1, NonEH = 2 };
+ DenseSet<BlockT *> WorkList;
+ DenseMap<BlockT *, Status> Statuses;
+
+ auto GetStatus = [&](BlockT *BB) {
+ if (Statuses.find(BB) != Statuses.end())
+ return Statuses[BB];
+ else
+ return Unknown;
+ };
+
+ auto CheckPredecessors = [&](BlockT *BB, Status Stat) {
+ for (auto *PredBB : predecessors(BB)) {
+ Status PredStatus = GetStatus(PredBB);
+ // If status of predecessor block has gone above current block
+ // we update current blocks status.
+ if (PredStatus > Stat)
+ Stat = PredStatus;
+ }
+ return Stat;
+ };
+
+ auto AddSuccesors = [&](BlockT *BB) {
+ for (auto *SuccBB : successors(BB)) {
+ if (!SuccBB->isEHPad())
+ WorkList.insert(SuccBB);
+ }
+ };
+
+ // Insert the successors of start block and landing pads successor.
+ BlockT *StartBlock = &F.front();
+ Statuses[StartBlock] = NonEH;
+ AddSuccesors(StartBlock);
+
+ for (auto &BB : F) {
+ if (BB.isEHPad()) {
+ AddSuccesors(&BB);
+ Statuses[&BB] = EH;
+ }
+ }
+
+ // Worklist iterative algorithm.
+ while (!WorkList.empty()) {
+ auto *BB = *WorkList.begin();
+ WorkList.erase(BB);
+
+ Status OldStatus = GetStatus(BB);
+
+ // Check on predecessors and check for
+ // Status update.
+ Status NewStatus = CheckPredecessors(BB, OldStatus);
+
+ // Did the block status change?
+ bool Changed = OldStatus != NewStatus;
+ if (Changed) {
+ AddSuccesors(BB);
+ Statuses[BB] = NewStatus;
+ }
+ }
+
+ EHBlocks.clear();
+ for (auto Entry : Statuses) {
+ if (Entry.second == EH)
+ EHBlocks.insert(Entry.first);
+ }
+}
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
index cd32979b9ea5..85d98a05bbd7 100644
--- a/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
+++ b/llvm/include/llvm/Analysis/FunctionPropertiesAnalysis.h
@@ -20,6 +20,7 @@
#include "llvm/IR/PassManager.h"
namespace llvm {
+class DominatorTree;
class Function;
class LoopInfo;
@@ -31,7 +32,11 @@ class FunctionPropertiesInfo {
public:
static FunctionPropertiesInfo
- getFunctionPropertiesInfo(const Function &F, FunctionAnalysisManager &FAM);
+ getFunctionPropertiesInfo(const Function &F, const DominatorTree &DT,
+ const LoopInfo &LI);
+
+ static FunctionPropertiesInfo
+ getFunctionPropertiesInfo(Function &F, FunctionAnalysisManager &FAM);
bool operator==(const FunctionPropertiesInfo &FPI) const {
return std::memcmp(this, &FPI, sizeof(FunctionPropertiesInfo)) == 0;
@@ -109,14 +114,21 @@ public:
/// inlining.
class FunctionPropertiesUpdater {
public:
- FunctionPropertiesUpdater(FunctionPropertiesInfo &FPI, const CallBase &CB);
+ FunctionPropertiesUpdater(FunctionPropertiesInfo &FPI, CallBase &CB);
void finish(FunctionAnalysisManager &FAM) const;
+ bool finishAndTest(FunctionAnalysisManager &FAM) const {
+ finish(FAM);
+ return isUpdateValid(Caller, FPI, FAM);
+ }
private:
FunctionPropertiesInfo &FPI;
- const BasicBlock &CallSiteBB;
- const Function &Caller;
+ BasicBlock &CallSiteBB;
+ Function &Caller;
+
+ static bool isUpdateValid(Function &F, const FunctionPropertiesInfo &FPI,
+ FunctionAnalysisManager &FAM);
DenseSet<const BasicBlock *> Successors;
};
diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 91af95b3f002..ad137baff5d4 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -127,7 +127,7 @@ struct IRInstructionData
/// This is only relevant if we are wrapping a CmpInst where we needed to
/// change the predicate of a compare instruction from a greater than form
- /// to a less than form. It is None otherwise.
+ /// to a less than form. It is std::nullopt otherwise.
std::optional<CmpInst::Predicate> RevisedPredicate;
/// This is only relevant if we are wrapping a CallInst. If we are requiring
@@ -226,6 +226,11 @@ struct IRInstructionData
void
setPHIPredecessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
+ /// Get the BasicBlock based operands for PHINodes and BranchInsts.
+ ///
+ /// \returns A list of relevant BasicBlocks.
+ ArrayRef<Value *> getBlockOperVals();
+
/// Hashes \p Value based on its opcode, types, and operand types.
/// Two IRInstructionData instances produce the same hash when they perform
/// the same operation.
@@ -763,6 +768,24 @@ public:
static bool compareCommutativeOperandMapping(OperandMapping A,
OperandMapping B);
+ /// Compare the GVN of the assignment value in corresponding instructions in
+ /// IRSimilarityCandidates \p A and \p B and check that there exists a mapping
+ /// between the values and replaces the mapping with a one-to-one value if
+ /// needed.
+ ///
+ /// \param InstValA - The assignment GVN from the first IRSimilarityCandidate.
+ /// \param InstValB - The assignment GVN from the second
+ /// IRSimilarityCandidate.
+ /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from
+ /// candidate \p A to candidate \B.
+ /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from
+ /// candidate \p B to candidate \A.
+ /// \returns true if the IRSimilarityCandidates assignments are compatible.
+ static bool compareAssignmentMapping(
+ const unsigned InstValA, const unsigned &InstValB,
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB);
+
/// Compare the relative locations in \p A and \p B and check that the
/// distances match if both locations are contained in the region, and that
/// the branches both point outside the region if they do not.
@@ -827,6 +850,49 @@ public:
IRSimilarityCandidate &SourceCand,
DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
+
+ /// Create a mapping for the value numbering of the calling
+ /// IRSimilarityCandidate, to a different separate set of numbers, based on
+ /// the canonical ordering in \p SourceCand. These are defined based on the
+ /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of
+ /// these relationships should have the same information, just in opposite
+ /// directions. Uses the \p OneToOne mapping from target candidate to \p
+ /// SourceCand GVNs to determine the mapping first for values with multiple
+ /// mappings. This mapping is created by the ordering of operands in the
+ /// instruction they are first seen in the candidates.
+ ///
+ /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
+ /// canonical numbering from.
+ /// \param [in,out] OneToOne - A mapping of value numbers from candidate
+ /// \p A to candidate \B using the structure of the original instructions.
+ /// \param ToSourceMapping - The mapping of value numbers from this candidate
+ /// to \p SourceCand.
+ /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand
+ /// to this candidate.
+ void createCanonicalRelationFrom(
+ IRSimilarityCandidate &SourceCand,
+ DenseMap<unsigned, unsigned> &OneToOne,
+ DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
+ DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping);
+
+ /// Create a mapping for the value numbering of the calling
+ /// IRSimilarityCandidate, to a different separate set of numbers, based on
+ /// the canonical ordering in \p SourceCand. These are defined based on the
+ /// canonical mapping defined between \p SoureCandLarge and
+ /// \p TargetCandLarge. These IRSimilarityCandidates are already structurally
+ /// similar, and fully encapsulate the IRSimilarityCandidates in question.
+ /// These are used as a "bridge" from the \p SourceCand to the target.
+ ///
+ /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a
+ /// canonical numbering from.
+ /// \param SoureCandLarge - The IRSimilarityCandidate fully containing
+ /// \p SourceCand.
+ /// \param TargetCandLarge - The IRSimilarityCandidate fully containing
+ /// this Candidate.
+ void createCanonicalRelationFrom(
+ IRSimilarityCandidate &SourceCand,
+ IRSimilarityCandidate &SourceCandLarge,
+ IRSimilarityCandidate &TargetCandLarge);
/// \param [in,out] BBSet - The set to track the basic blocks.
void getBasicBlocks(DenseSet<BasicBlock *> &BBSet) const {
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 696d5e290c16..dfa1a119c079 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -47,6 +47,8 @@ enum class RecurKind {
FMul, ///< Product of floats.
FMin, ///< FP min implemented in terms of select(cmp()).
FMax, ///< FP max implemented in terms of select(cmp()).
+ FMinimum, ///< FP min with llvm.minimum semantics
+ FMaximum, ///< FP max with llvm.maximum semantics
FMulAdd, ///< Fused multiply-add of floats (a * b + c).
SelectICmp, ///< Integer select(icmp(),x,y) where one of (x,y) is loop
///< invariant
@@ -186,14 +188,11 @@ public:
/// previous iteration (e.g. if the value is defined in the previous
/// iteration, we refer to it as first-order recurrence, if it is defined in
/// the iteration before the previous, we refer to it as second-order
- /// recurrence and so on). \p SinkAfter includes pairs of instructions where
- /// the first will be rescheduled to appear after the second if/when the loop
- /// is vectorized. It may be augmented with additional pairs if needed in
- /// order to handle Phi as a first-order recurrence.
- static bool
- isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
- MapVector<Instruction *, Instruction *> &SinkAfter,
- DominatorTree *DT);
+ /// recurrence and so on). Note that this function optimistically assumes that
+ /// uses of the recurrence can be re-ordered if necessary and users need to
+ /// check and perform the re-ordering.
+ static bool isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+ DominatorTree *DT);
RecurKind getRecurrenceKind() const { return Kind; }
@@ -226,7 +225,8 @@ public:
/// Returns true if the recurrence kind is a floating-point min/max kind.
static bool isFPMinMaxRecurrenceKind(RecurKind Kind) {
- return Kind == RecurKind::FMin || Kind == RecurKind::FMax;
+ return Kind == RecurKind::FMin || Kind == RecurKind::FMax ||
+ Kind == RecurKind::FMinimum || Kind == RecurKind::FMaximum;
}
/// Returns true if the recurrence kind is any min/max kind.
@@ -309,7 +309,7 @@ public:
enum InductionKind {
IK_NoInduction, ///< Not an induction variable.
IK_IntInduction, ///< Integer induction variable. Step = C.
- IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem).
+ IK_PtrInduction, ///< Pointer induction var. Step = C.
IK_FpInduction ///< Floating point induction variable.
};
@@ -325,7 +325,9 @@ public:
/// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an
/// induction, the induction descriptor \p D will contain the data describing
- /// this induction. If by some other means the caller has a better SCEV
+ /// this induction. Since Induction Phis can only be present inside loop
+ /// headers, the function will assert if it is passed a Phi whose parent is
+ /// not the loop header. If by some other means the caller has a better SCEV
/// expression for \p Phi than the one returned by the ScalarEvolution
/// analysis, it can be passed through \p Expr. If the def-use chain
/// associated with the phi includes casts (that we know we can ignore
@@ -367,11 +369,6 @@ public:
: Instruction::BinaryOpsEnd;
}
- Type *getElementType() const {
- assert(IK == IK_PtrInduction && "Only pointer induction has element type");
- return ElementType;
- }
-
/// Returns a reference to the type cast instructions in the induction
/// update chain, that are redundant when guarded with a runtime
/// SCEV overflow check.
@@ -383,7 +380,6 @@ private:
/// Private constructor - used by \c isInductionPHI.
InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
BinaryOperator *InductionBinOp = nullptr,
- Type *ElementType = nullptr,
SmallVectorImpl<Instruction *> *Casts = nullptr);
/// Start value.
@@ -394,9 +390,6 @@ private:
const SCEV *Step = nullptr;
// Instruction that advances induction variable.
BinaryOperator *InductionBinOp = nullptr;
- // Element type for pointer induction variables.
- // TODO: This can be dropped once support for typed pointers is removed.
- Type *ElementType = nullptr;
// Instructions used for type-casts of the induction variable,
// that are redundant when guarded with a runtime SCEV overflow check.
SmallVector<Instruction *, 2> RedundantCasts;
diff --git a/llvm/include/llvm/Analysis/IVUsers.h b/llvm/include/llvm/Analysis/IVUsers.h
index e5a496037691..6b9b6bf190f1 100644
--- a/llvm/include/llvm/Analysis/IVUsers.h
+++ b/llvm/include/llvm/Analysis/IVUsers.h
@@ -131,7 +131,8 @@ public:
/// value of the OperandValToReplace of the given IVStrideUse.
const SCEV *getReplacementExpr(const IVStrideUse &IU) const;
- /// getExpr - Return the expression for the use.
+ /// getExpr - Return the expression for the use. Returns nullptr if the result
+ /// is not invertible.
const SCEV *getExpr(const IVStrideUse &IU) const;
const SCEV *getStride(const IVStrideUse &IU, const Loop *L) const;
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index c67698777775..53c018d15cd7 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -357,7 +357,8 @@ public:
};
std::unique_ptr<InlineAdvisor>
-getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM);
+getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM,
+ std::function<bool(CallBase &)> GetDefaultAdvice);
std::unique_ptr<InlineAdvisor>
getDevelopmentModeAdvisor(Module &M, ModuleAnalysisManager &MAM,
diff --git a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
index fb8236c28b25..77ae60059ce9 100644
--- a/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
+++ b/llvm/include/llvm/Analysis/InlineModelFeatureMaps.h
@@ -22,34 +22,51 @@ namespace llvm {
// inline cost, and we define them separately to preserve the original heuristic
// behavior.
#define INLINE_COST_FEATURE_ITERATOR(M) \
- M(SROASavings, "sroa_savings") \
- M(SROALosses, "sroa_losses") \
- M(LoadElimination, "load_elimination") \
- M(CallPenalty, "call_penalty") \
- M(CallArgumentSetup, "call_argument_setup") \
- M(LoadRelativeIntrinsic, "load_relative_intrinsic") \
- M(LoweredCallArgSetup, "lowered_call_arg_setup") \
- M(IndirectCallPenalty, "indirect_call_penalty") \
- M(JumpTablePenalty, "jump_table_penalty") \
- M(CaseClusterPenalty, "case_cluster_penalty") \
- M(SwitchPenalty, "switch_penalty") \
- M(UnsimplifiedCommonInstructions, "unsimplified_common_instructions") \
- M(NumLoops, "num_loops") \
- M(DeadBlocks, "dead_blocks") \
- M(SimplifiedInstructions, "simplified_instructions") \
- M(ConstantArgs, "constant_args") \
- M(ConstantOffsetPtrArgs, "constant_offset_ptr_args") \
- M(CallSiteCost, "callsite_cost") \
- M(ColdCcPenalty, "cold_cc_penalty") \
- M(LastCallToStaticBonus, "last_call_to_static_bonus") \
- M(IsMultipleBlocks, "is_multiple_blocks") \
- M(NestedInlines, "nested_inlines") \
- M(NestedInlineCostEstimate, "nested_inline_cost_estimate") \
- M(Threshold, "threshold")
+ M(int64_t, {1}, sroa_savings, \
+ "Savings from SROA (scalar replacement of aggregates)") \
+ M(int64_t, {1}, sroa_losses, \
+ "Losses from SROA (scalar replacement of aggregates)") \
+ M(int64_t, {1}, load_elimination, "Cost of load elimination in the call") \
+ M(int64_t, {1}, call_penalty, \
+ "Accumulation of penalty applied to call sites when inlining") \
+ M(int64_t, {1}, call_argument_setup, \
+ "Accumulation of call argument setup costs") \
+ M(int64_t, {1}, load_relative_intrinsic, \
+ "Accumulation of costs of loading relative intrinsics") \
+ M(int64_t, {1}, lowered_call_arg_setup, \
+ "Accumulation of cost of lowered call argument setups") \
+ M(int64_t, {1}, indirect_call_penalty, \
+ "Accumulation of costs for indirect calls") \
+ M(int64_t, {1}, jump_table_penalty, "Accumulation of costs for jump tables") \
+ M(int64_t, {1}, case_cluster_penalty, \
+ "Accumulation of costs for case clusters") \
+ M(int64_t, {1}, switch_penalty, \
+ "Accumulation of costs for switch statements") \
+ M(int64_t, {1}, unsimplified_common_instructions, \
+ "Costs from unsimplified common instructions") \
+ M(int64_t, {1}, num_loops, "Number of loops in the caller") \
+ M(int64_t, {1}, dead_blocks, "Number of dead blocks in the caller") \
+ M(int64_t, {1}, simplified_instructions, \
+ "Number of simplified instructions") \
+ M(int64_t, {1}, constant_args, \
+ "Number of constant arguments in the call site") \
+ M(int64_t, {1}, constant_offset_ptr_args, \
+ "Number of constant offset pointer args in the call site") \
+ M(int64_t, {1}, callsite_cost, "Estimated cost of the call site") \
+ M(int64_t, {1}, cold_cc_penalty, "Penalty for a cold calling convention") \
+ M(int64_t, {1}, last_call_to_static_bonus, \
+ "Bonus for being the last call to static") \
+ M(int64_t, {1}, is_multiple_blocks, \
+ "Boolean; is the Callee multiple blocks") \
+ M(int64_t, {1}, nested_inlines, \
+ "Would the default inliner perfom nested inlining") \
+ M(int64_t, {1}, nested_inline_cost_estimate, \
+ "Estimate of the accumulated cost of nested inlines") \
+ M(int64_t, {1}, threshold, "Threshold for the heuristic inliner")
// clang-format off
enum class InlineCostFeatureIndex : size_t {
-#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME,
+#define POPULATE_INDICES(DTYPE, SHAPE, NAME, DOC) NAME,
INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES)
#undef POPULATE_INDICES
@@ -62,15 +79,15 @@ using InlineCostFeatures =
static_cast<size_t>(InlineCostFeatureIndex::NumberOfFeatures)>;
constexpr bool isHeuristicInlineCostFeature(InlineCostFeatureIndex Feature) {
- return Feature != InlineCostFeatureIndex::SROASavings &&
- Feature != InlineCostFeatureIndex::IsMultipleBlocks &&
- Feature != InlineCostFeatureIndex::DeadBlocks &&
- Feature != InlineCostFeatureIndex::SimplifiedInstructions &&
- Feature != InlineCostFeatureIndex::ConstantArgs &&
- Feature != InlineCostFeatureIndex::ConstantOffsetPtrArgs &&
- Feature != InlineCostFeatureIndex::NestedInlines &&
- Feature != InlineCostFeatureIndex::NestedInlineCostEstimate &&
- Feature != InlineCostFeatureIndex::Threshold;
+ return Feature != InlineCostFeatureIndex::sroa_savings &&
+ Feature != InlineCostFeatureIndex::is_multiple_blocks &&
+ Feature != InlineCostFeatureIndex::dead_blocks &&
+ Feature != InlineCostFeatureIndex::simplified_instructions &&
+ Feature != InlineCostFeatureIndex::constant_args &&
+ Feature != InlineCostFeatureIndex::constant_offset_ptr_args &&
+ Feature != InlineCostFeatureIndex::nested_inlines &&
+ Feature != InlineCostFeatureIndex::nested_inline_cost_estimate &&
+ Feature != InlineCostFeatureIndex::threshold;
}
// List of features. Each feature is defined through a triple:
@@ -81,39 +98,37 @@ constexpr bool isHeuristicInlineCostFeature(InlineCostFeatureIndex Feature) {
// programmatically, and serves as workaround to inability of inserting comments
// in macros.
#define INLINE_FEATURE_ITERATOR(M) \
- M(CalleeBasicBlockCount, "callee_basic_block_count", \
+ M(int64_t, {1}, callee_basic_block_count, \
"number of basic blocks of the callee") \
- M(CallSiteHeight, "callsite_height", \
+ M(int64_t, {1}, callsite_height, \
"position of the call site in the original call graph - measured from " \
"the farthest SCC") \
- M(NodeCount, "node_count", \
+ M(int64_t, {1}, node_count, \
"total current number of defined functions in the module") \
- M(NrCtantParams, "nr_ctant_params", \
+ M(int64_t, {1}, nr_ctant_params, \
"number of parameters in the call site that are constants") \
- M(CostEstimate, "cost_estimate", "total cost estimate (threshold - free)") \
- M(EdgeCount, "edge_count", "total number of calls in the module") \
- M(CallerUsers, "caller_users", \
+ M(int64_t, {1}, cost_estimate, "total cost estimate (threshold - free)") \
+ M(int64_t, {1}, edge_count, "total number of calls in the module") \
+ M(int64_t, {1}, caller_users, \
"number of module-internal users of the caller, +1 if the caller is " \
"exposed externally") \
- M(CallerConditionallyExecutedBlocks, "caller_conditionally_executed_blocks", \
+ M(int64_t, {1}, caller_conditionally_executed_blocks, \
"number of blocks reached from a conditional instruction, in the caller") \
- M(CallerBasicBlockCount, "caller_basic_block_count", \
+ M(int64_t, {1}, caller_basic_block_count, \
"number of basic blocks in the caller") \
- M(CalleeConditionallyExecutedBlocks, "callee_conditionally_executed_blocks", \
+ M(int64_t, {1}, callee_conditionally_executed_blocks, \
"number of blocks reached from a conditional instruction, in the callee") \
- M(CalleeUsers, "callee_users", \
+ M(int64_t, {1}, callee_users, \
"number of module-internal users of the callee, +1 if the callee is " \
"exposed externally")
// clang-format off
enum class FeatureIndex : size_t {
+#define POPULATE_INDICES(DTYPE, SHAPE, NAME, COMMENT) NAME,
// InlineCost features - these must come first
-#define POPULATE_INDICES(INDEX_NAME, NAME) INDEX_NAME,
INLINE_COST_FEATURE_ITERATOR(POPULATE_INDICES)
-#undef POPULATE_INDICES
// Non-cost features
-#define POPULATE_INDICES(INDEX_NAME, NAME, COMMENT) INDEX_NAME,
INLINE_FEATURE_ITERATOR(POPULATE_INDICES)
#undef POPULATE_INDICES
@@ -129,10 +144,12 @@ inlineCostFeatureToMlFeature(InlineCostFeatureIndex Feature) {
constexpr size_t NumberOfFeatures =
static_cast<size_t>(FeatureIndex::NumberOfFeatures);
-extern const std::array<TensorSpec, NumberOfFeatures> FeatureMap;
+extern const std::vector<TensorSpec> FeatureMap;
extern const char *const DecisionName;
+extern const TensorSpec InlineDecisionSpec;
extern const char *const DefaultDecisionName;
+extern const TensorSpec DefaultDecisionSpec;
extern const char *const RewardName;
using InlineFeatures = std::vector<int64_t>;
diff --git a/llvm/include/llvm/Analysis/InlineOrder.h b/llvm/include/llvm/Analysis/InlineOrder.h
index 4c82d73df8a5..a1d25a25b856 100644
--- a/llvm/include/llvm/Analysis/InlineOrder.h
+++ b/llvm/include/llvm/Analysis/InlineOrder.h
@@ -32,7 +32,52 @@ public:
};
std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
-getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params);
+getDefaultInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params,
+ ModuleAnalysisManager &MAM, Module &M);
+
+std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
+getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params,
+ ModuleAnalysisManager &MAM, Module &M);
+
+/// Used for dynamically loading instances of InlineOrder as plugins
+///
+/// Plugins must implement an InlineOrderFactory, for an example refer to:
+/// llvm/unittests/Analysis/InlineOrderPlugin/InlineOrderPlugin.cpp
+///
+/// If a PluginInlineOrderAnalysis has been registered with the
+/// current ModuleAnalysisManager, llvm::getInlineOrder returns an
+/// InlineOrder created by the PluginInlineOrderAnalysis' Factory.
+///
+class PluginInlineOrderAnalysis
+ : public AnalysisInfoMixin<PluginInlineOrderAnalysis> {
+public:
+ static AnalysisKey Key;
+
+ typedef std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>> (
+ *InlineOrderFactory)(FunctionAnalysisManager &FAM,
+ const InlineParams &Params,
+ ModuleAnalysisManager &MAM, Module &M);
+
+ PluginInlineOrderAnalysis(InlineOrderFactory Factory) : Factory(Factory) {
+ HasBeenRegistered = true;
+ assert(Factory != nullptr &&
+ "The plugin inline order factory should not be a null pointer.");
+ }
+
+ struct Result {
+ InlineOrderFactory Factory;
+ };
+
+ Result run(Module &, ModuleAnalysisManager &) { return {Factory}; }
+ Result getResult() { return {Factory}; }
+
+ static bool isRegistered() { return HasBeenRegistered; }
+ static void unregister() { HasBeenRegistered = false; }
+
+private:
+ static bool HasBeenRegistered;
+ InlineOrderFactory Factory;
+};
} // namespace llvm
#endif // LLVM_ANALYSIS_INLINEORDER_H
diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h
index d75e04156794..df0784664ead 100644
--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -19,12 +19,8 @@
// values. This will prevent other code from seeing the same undef uses and
// resolving them to different values.
//
-// These routines are designed to tolerate moderately incomplete IR, such as
-// instructions that are not connected to basic blocks yet. However, they do
-// require that all the IR that they encounter be valid. In particular, they
-// require that all non-constant values be defined in the same function, and the
-// same call context of that function (and not split between caller and callee
-// contexts of a directly recursive call, for example).
+// They require that all the IR that they encounter be valid and inserted into a
+// parent function.
//
// Additionally, these routines can't simplify to the instructions that are not
// def-reachable, meaning we can't just scan the basic block for instructions
@@ -50,7 +46,6 @@ class Function;
class Instruction;
struct LoopStandardAnalysisResults;
class MDNode;
-class OptimizationRemarkEmitter;
class Pass;
template <class T, unsigned n> class SmallSetVector;
class TargetLibraryInfo;
@@ -88,6 +83,12 @@ struct InstrInfoQuery {
return cast<PossiblyExactOperator>(Op)->isExact();
return false;
}
+
+ template <class InstT> bool hasNoSignedZeros(const InstT *Op) const {
+ if (UseInstrInfo)
+ return Op->hasNoSignedZeros();
+ return false;
+ }
};
struct SimplifyQuery {
@@ -302,8 +303,9 @@ Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF,
const SimplifyQuery &Q);
-/// Given a callsite, fold the result or return null.
-Value *simplifyCall(CallBase *Call, const SimplifyQuery &Q);
+/// Given a callsite, callee, and arguments, fold the result or return null.
+Value *simplifyCall(CallBase *Call, Value *Callee, ArrayRef<Value *> Args,
+ const SimplifyQuery &Q);
/// Given a constrained FP intrinsic call, tries to compute its simplified
/// version. Returns a simplified result or null.
@@ -318,22 +320,25 @@ Value *simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q);
/// If not, this returns null.
Value *simplifyFreezeInst(Value *Op, const SimplifyQuery &Q);
+/// Given a load instruction and its pointer operand, fold the result or return
+/// null.
+Value *simplifyLoadInst(LoadInst *LI, Value *PtrOp, const SimplifyQuery &Q);
+
/// See if we can compute a simplified version of this instruction. If not,
/// return null.
-Value *simplifyInstruction(Instruction *I, const SimplifyQuery &Q,
- OptimizationRemarkEmitter *ORE = nullptr);
+Value *simplifyInstruction(Instruction *I, const SimplifyQuery &Q);
/// Like \p simplifyInstruction but the operands of \p I are replaced with
/// \p NewOps. Returns a simplified value, or null if none was found.
Value *
simplifyInstructionWithOperands(Instruction *I, ArrayRef<Value *> NewOps,
- const SimplifyQuery &Q,
- OptimizationRemarkEmitter *ORE = nullptr);
+ const SimplifyQuery &Q);
/// See if V simplifies when its operand Op is replaced with RepOp. If not,
/// return null.
/// AllowRefinement specifies whether the simplification can be a refinement
/// (e.g. 0 instead of poison), or whether it needs to be strictly identical.
+/// Op and RepOp can be assumed to not be poison when determining refinement.
Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
const SimplifyQuery &Q, bool AllowRefinement);
diff --git a/llvm/include/llvm/Analysis/InteractiveModelRunner.h b/llvm/include/llvm/Analysis/InteractiveModelRunner.h
new file mode 100644
index 000000000000..680dc4249d80
--- /dev/null
+++ b/llvm/include/llvm/Analysis/InteractiveModelRunner.h
@@ -0,0 +1,71 @@
+//===- InteractiveModelRunner.h ---- "gym" ML model runner -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_ANALYSIS_INTERACTIVEMODELRUNNER_H
+#define LLVM_ANALYSIS_INTERACTIVEMODELRUNNER_H
+
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/TensorSpec.h"
+#include "llvm/Analysis/Utils/TrainingLogger.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+#include <system_error>
+
+namespace llvm {
+
+/// A MLModelRunner that asks for advice from an external agent, or host. It
+/// uses 2 files - ideally named pipes - one to send data to that agent, and
+/// one to receive advice.
+/// The data exchange uses the training logger (Utils/TrainingLogger.h) format.
+/// Specifically, the compiler will send the log header, set the context, and
+/// send observations; the host is expected to reply with a tensor value after
+/// each observation as a binary buffer that's conforming to the shape of the
+/// advice. Interleaved, the data closely resembles the training log for a
+/// log where we don't capture the reward signal.
+///
+/// Note that the correctness of the received data is the responsibility of the
+/// host. In particular, if insufficient data were sent, the compiler will block
+/// when waiting for an advice.
+///
+/// Note that the host can either open the pipes RW, or open first the pipe to
+/// the compiler - i.e. the "Inbound" - and then the "Outbound", to avoid
+/// deadlock. This is because the compiler first tries to open the inbound
+/// (which will hang until there's a writer on the other end).
+class InteractiveModelRunner : public MLModelRunner {
+public:
+ InteractiveModelRunner(LLVMContext &Ctx,
+ const std::vector<TensorSpec> &Inputs,
+ const TensorSpec &Advice, StringRef OutboundName,
+ StringRef InboundName);
+
+ static bool classof(const MLModelRunner *R) {
+ return R->getKind() == MLModelRunner::Kind::Interactive;
+ }
+ void switchContext(StringRef Name) override {
+ Log->switchContext(Name);
+ Log->flush();
+ }
+
+ virtual ~InteractiveModelRunner();
+
+private:
+ void *evaluateUntyped() override;
+ // This must be declared before InEC if we want to initialize it in the
+ // ctor initializer list.
+ int Inbound = -1;
+ const std::vector<TensorSpec> InputSpecs;
+ const TensorSpec OutputSpec;
+ std::error_code OutEC;
+ std::error_code InEC;
+ std::vector<char> OutputBuffer;
+ std::unique_ptr<Logger> Log;
+};
+} // namespace llvm
+#endif // LLVM_ANALYSIS_INTERACTIVEMODELRUNNER_H
diff --git a/llvm/include/llvm/Analysis/LazyCallGraph.h b/llvm/include/llvm/Analysis/LazyCallGraph.h
index d438cea9bf85..211a058aa017 100644
--- a/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -255,7 +255,7 @@ public:
iterator end() { return iterator(Edges.end(), Edges.end()); }
Edge &operator[](Node &N) {
- assert(EdgeIndexMap.find(&N) != EdgeIndexMap.end() && "No such edge!");
+ assert(EdgeIndexMap.contains(&N) && "No such edge!");
auto &E = Edges[EdgeIndexMap.find(&N)->second];
assert(E && "Dead or null edge!");
return E;
diff --git a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h b/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
deleted file mode 100644
index 261935a37819..000000000000
--- a/llvm/include/llvm/Analysis/LegacyDivergenceAnalysis.h
+++ /dev/null
@@ -1,103 +0,0 @@
-//===- llvm/Analysis/LegacyDivergenceAnalysis.h - KernelDivergence Analysis -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// The kernel divergence analysis is an LLVM pass which can be used to find out
-// if a branch instruction in a GPU program (kernel) is divergent or not. It can help
-// branch optimizations such as jump threading and loop unswitching to make
-// better decisions.
-//
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
-#define LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
-
-#include "llvm/ADT/DenseSet.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
-#include <memory>
-
-namespace llvm {
-class DivergenceInfo;
-class Function;
-class Module;
-class raw_ostream;
-class TargetTransformInfo;
-class Use;
-class Value;
-
-class LegacyDivergenceAnalysisImpl {
-public:
- // Returns true if V is divergent at its definition.
- bool isDivergent(const Value *V) const;
-
- // Returns true if U is divergent. Uses of a uniform value can be divergent.
- bool isDivergentUse(const Use *U) const;
-
- // Returns true if V is uniform/non-divergent.
- bool isUniform(const Value *V) const { return !isDivergent(V); }
-
- // Returns true if U is uniform/non-divergent. Uses of a uniform value can be
- // divergent.
- bool isUniformUse(const Use *U) const { return !isDivergentUse(U); }
-
- // Keep the analysis results uptodate by removing an erased value.
- void removeValue(const Value *V) { DivergentValues.erase(V); }
-
- // Print all divergent branches in the function.
- void print(raw_ostream &OS, const Module *) const;
-
- // Whether analysis should be performed by GPUDivergenceAnalysis.
- bool shouldUseGPUDivergenceAnalysis(const Function &F,
- const TargetTransformInfo &TTI,
- const LoopInfo &LI);
-
- void run(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
- PostDominatorTree &PDT, const LoopInfo &LI);
-
-protected:
- // (optional) handle to new DivergenceAnalysis
- std::unique_ptr<DivergenceInfo> gpuDA;
-
- // Stores all divergent values.
- DenseSet<const Value *> DivergentValues;
-
- // Stores divergent uses of possibly uniform values.
- DenseSet<const Use *> DivergentUses;
-};
-
-class LegacyDivergenceAnalysis : public FunctionPass,
- public LegacyDivergenceAnalysisImpl {
-public:
- static char ID;
-
- LegacyDivergenceAnalysis();
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
-};
-
-class LegacyDivergenceAnalysisPass
- : public PassInfoMixin<LegacyDivergenceAnalysisPass>,
- public LegacyDivergenceAnalysisImpl {
-public:
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-
-private:
- // (optional) handle to new DivergenceAnalysis
- std::unique_ptr<DivergenceInfo> gpuDA;
-
- // Stores all divergent values.
- DenseSet<const Value *> DivergentValues;
-
- // Stores divergent uses of possibly uniform values.
- DenseSet<const Use *> DivergentUses;
-};
-
-} // end namespace llvm
-
-#endif // LLVM_ANALYSIS_LEGACYDIVERGENCEANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/Lint.h b/llvm/include/llvm/Analysis/Lint.h
index 4ceae2d29f16..8dffa1ecb5f3 100644
--- a/llvm/include/llvm/Analysis/Lint.h
+++ b/llvm/include/llvm/Analysis/Lint.h
@@ -22,12 +22,9 @@
namespace llvm {
-class FunctionPass;
class Module;
class Function;
-FunctionPass *createLintLegacyPassPass();
-
/// Lint a module.
///
/// This should only be used for debugging, because it plays games with
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index a49e24ada440..eb35ef515a1f 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -184,7 +184,7 @@ public:
///
/// Only checks sets with elements in \p CheckDeps.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
- const ValueToValueMap &Strides);
+ const DenseMap<Value *, const SCEV *> &Strides);
/// No memory dependence was encountered that would inhibit
/// vectorization.
@@ -316,7 +316,7 @@ private:
/// Otherwise, this function returns true signaling a possible dependence.
Dependence::DepType isDependent(const MemAccessInfo &A, unsigned AIdx,
const MemAccessInfo &B, unsigned BIdx,
- const ValueToValueMap &Strides);
+ const DenseMap<Value *, const SCEV *> &Strides);
/// Check whether the data dependence could prevent store-load
/// forwarding.
@@ -588,10 +588,9 @@ public:
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
DominatorTree *DT);
- /// Returns true if the value V is uniform within the loop.
- bool isUniform(Value *V) const;
+ /// Returns true if value \p V is loop invariant.
+ bool isInvariant(Value *V) const;
- uint64_t getMaxSafeDepDistBytes() const { return MaxSafeDepDistBytes; }
unsigned getNumStores() const { return NumStores; }
unsigned getNumLoads() const { return NumLoads;}
@@ -612,10 +611,9 @@ public:
/// If an access has a symbolic strides, this maps the pointer value to
/// the stride symbol.
- const ValueToValueMap &getSymbolicStrides() const { return SymbolicStrides; }
-
- /// Pointer has a symbolic stride.
- bool hasStride(Value *V) const { return StrideSet.count(V); }
+ const DenseMap<Value *, const SCEV *> &getSymbolicStrides() const {
+ return SymbolicStrides;
+ }
/// Print the information about the memory accesses in the loop.
void print(raw_ostream &OS, unsigned Depth = 0) const;
@@ -699,14 +697,9 @@ private:
/// If an access has a symbolic strides, this maps the pointer value to
/// the stride symbol.
- ValueToValueMap SymbolicStrides;
-
- /// Set of symbolic strides values.
- SmallPtrSet<Value *, 8> StrideSet;
+ DenseMap<Value *, const SCEV *> SymbolicStrides;
};
-Value *stripIntegerCast(Value *V);
-
/// Return the SCEV corresponding to a pointer with the symbolic stride
/// replaced with constant one, assuming the SCEV predicate associated with
/// \p PSE is true.
@@ -716,9 +709,10 @@ Value *stripIntegerCast(Value *V);
///
/// \p PtrToStride provides the mapping between the pointer value and its
/// stride as collected by LoopVectorizationLegality::collectStridedAccess.
-const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
- const ValueToValueMap &PtrToStride,
- Value *Ptr);
+const SCEV *
+replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
+ const DenseMap<Value *, const SCEV *> &PtrToStride,
+ Value *Ptr);
/// If the pointer has a constant stride return it in units of the access type
/// size. Otherwise return std::nullopt.
@@ -730,10 +724,14 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
/// to \p PtrToStride and therefore add further predicates to \p PSE.
/// The \p Assume parameter indicates if we are allowed to make additional
/// run-time assumptions.
+///
+/// Note that the analysis results are defined if-and-only-if the original
+/// memory access was defined. If that access was dead, or UB, then the
+/// result of this function is undefined.
std::optional<int64_t>
getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
const Loop *Lp,
- const ValueToValueMap &StridesMap = ValueToValueMap(),
+ const DenseMap<Value *, const SCEV *> &StridesMap = DenseMap<Value *, const SCEV *>(),
bool Assume = false, bool ShouldCheckWrap = true);
/// Returns the distance between the pointers \p PtrA and \p PtrB iff they are
@@ -785,38 +783,9 @@ public:
const LoopAccessInfo &getInfo(Loop &L);
void clear() { LoopAccessInfoMap.clear(); }
-};
-
-/// This analysis provides dependence information for the memory accesses
-/// of a loop.
-///
-/// It runs the analysis for a loop on demand. This can be initiated by
-/// querying the loop access info via LAA::getInfo. getInfo return a
-/// LoopAccessInfo object. See this class for the specifics of what information
-/// is provided.
-class LoopAccessLegacyAnalysis : public FunctionPass {
-public:
- static char ID;
-
- LoopAccessLegacyAnalysis();
-
- bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- /// Return the proxy object for retrieving LoopAccessInfo for individual
- /// loops.
- ///
- /// If there is no cached result available run the analysis.
- LoopAccessInfoManager &getLAIs() { return *LAIs; }
-
- void releaseMemory() override {
- // Invalidate the cache when the pass is freed.
- LAIs->clear();
- }
-
-private:
- std::unique_ptr<LoopAccessInfoManager> LAIs;
+ bool invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv);
};
/// This analysis provides dependence information for the memory
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index b77a335d1ee6..3434630c27cf 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -6,32 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the LoopInfo class that is used to identify natural loops
-// and determine the loop depth of various nodes of the CFG. A natural loop
-// has exactly one entry-point, which is called the header. Note that natural
-// loops may actually be several loops that share the same header node.
-//
-// This analysis calculates the nesting structure of loops in a function. For
-// each natural loop identified, this analysis identifies natural loops
-// contained entirely within the loop and the basic blocks the make up the loop.
-//
-// It can calculate on the fly various bits of information, for example:
-//
-// * whether there is a preheader for the loop
-// * the number of back edges to the header
-// * whether or not a particular block branches out of the loop
-// * the successor blocks of the loop
-// * the loop depth
-// * etc...
-//
-// Note that this analysis specifically identifies *Loops* not cycles or SCCs
-// in the CFG. There can be strongly connected components in the CFG which
-// this analysis will not recognize and that will not be represented by a Loop
-// instance. In particular, a Loop might be inside such a non-loop SCC, or a
-// non-loop SCC might contain a sub-SCC which is a Loop.
-//
-// For an overview of terminology used in this API (and thus all of our loop
-// analyses or transforms), see docs/LoopTerminology.rst.
+// This file declares a GenericLoopInfo instantiation for LLVM IR.
//
//===----------------------------------------------------------------------===//
@@ -47,7 +22,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
-#include "llvm/Support/Allocator.h"
+#include "llvm/Support/GenericLoopInfo.h"
#include <algorithm>
#include <optional>
#include <utility>
@@ -63,483 +38,8 @@ class MDNode;
class MemorySSAUpdater;
class ScalarEvolution;
class raw_ostream;
-template <class N, bool IsPostDom> class DominatorTreeBase;
-template <class N, class M> class LoopInfoBase;
-template <class N, class M> class LoopBase;
-
-//===----------------------------------------------------------------------===//
-/// Instances of this class are used to represent loops that are detected in the
-/// flow graph.
-///
-template <class BlockT, class LoopT> class LoopBase {
- LoopT *ParentLoop;
- // Loops contained entirely within this one.
- std::vector<LoopT *> SubLoops;
-
- // The list of blocks in this loop. First entry is the header node.
- std::vector<BlockT *> Blocks;
-
- SmallPtrSet<const BlockT *, 8> DenseBlockSet;
-
-#if LLVM_ENABLE_ABI_BREAKING_CHECKS
- /// Indicator that this loop is no longer a valid loop.
- bool IsInvalid = false;
-#endif
-
- LoopBase(const LoopBase<BlockT, LoopT> &) = delete;
- const LoopBase<BlockT, LoopT> &
- operator=(const LoopBase<BlockT, LoopT> &) = delete;
-
-public:
- /// Return the nesting level of this loop. An outer-most loop has depth 1,
- /// for consistency with loop depth values used for basic blocks, where depth
- /// 0 is used for blocks not inside any loops.
- unsigned getLoopDepth() const {
- assert(!isInvalid() && "Loop not in a valid state!");
- unsigned D = 1;
- for (const LoopT *CurLoop = ParentLoop; CurLoop;
- CurLoop = CurLoop->ParentLoop)
- ++D;
- return D;
- }
- BlockT *getHeader() const { return getBlocks().front(); }
- /// Return the parent loop if it exists or nullptr for top
- /// level loops.
-
- /// A loop is either top-level in a function (that is, it is not
- /// contained in any other loop) or it is entirely enclosed in
- /// some other loop.
- /// If a loop is top-level, it has no parent, otherwise its
- /// parent is the innermost loop in which it is enclosed.
- LoopT *getParentLoop() const { return ParentLoop; }
-
- /// Get the outermost loop in which this loop is contained.
- /// This may be the loop itself, if it already is the outermost loop.
- const LoopT *getOutermostLoop() const {
- const LoopT *L = static_cast<const LoopT *>(this);
- while (L->ParentLoop)
- L = L->ParentLoop;
- return L;
- }
-
- LoopT *getOutermostLoop() {
- LoopT *L = static_cast<LoopT *>(this);
- while (L->ParentLoop)
- L = L->ParentLoop;
- return L;
- }
-
- /// This is a raw interface for bypassing addChildLoop.
- void setParentLoop(LoopT *L) {
- assert(!isInvalid() && "Loop not in a valid state!");
- ParentLoop = L;
- }
-
- /// Return true if the specified loop is contained within in this loop.
- bool contains(const LoopT *L) const {
- assert(!isInvalid() && "Loop not in a valid state!");
- if (L == this)
- return true;
- if (!L)
- return false;
- return contains(L->getParentLoop());
- }
-
- /// Return true if the specified basic block is in this loop.
- bool contains(const BlockT *BB) const {
- assert(!isInvalid() && "Loop not in a valid state!");
- return DenseBlockSet.count(BB);
- }
-
- /// Return true if the specified instruction is in this loop.
- template <class InstT> bool contains(const InstT *Inst) const {
- return contains(Inst->getParent());
- }
-
- /// Return the loops contained entirely within this loop.
- const std::vector<LoopT *> &getSubLoops() const {
- assert(!isInvalid() && "Loop not in a valid state!");
- return SubLoops;
- }
- std::vector<LoopT *> &getSubLoopsVector() {
- assert(!isInvalid() && "Loop not in a valid state!");
- return SubLoops;
- }
- typedef typename std::vector<LoopT *>::const_iterator iterator;
- typedef
- typename std::vector<LoopT *>::const_reverse_iterator reverse_iterator;
- iterator begin() const { return getSubLoops().begin(); }
- iterator end() const { return getSubLoops().end(); }
- reverse_iterator rbegin() const { return getSubLoops().rbegin(); }
- reverse_iterator rend() const { return getSubLoops().rend(); }
-
- // LoopInfo does not detect irreducible control flow, just natural
- // loops. That is, it is possible that there is cyclic control
- // flow within the "innermost loop" or around the "outermost
- // loop".
-
- /// Return true if the loop does not contain any (natural) loops.
- bool isInnermost() const { return getSubLoops().empty(); }
- /// Return true if the loop does not have a parent (natural) loop
- // (i.e. it is outermost, which is the same as top-level).
- bool isOutermost() const { return getParentLoop() == nullptr; }
-
- /// Get a list of the basic blocks which make up this loop.
- ArrayRef<BlockT *> getBlocks() const {
- assert(!isInvalid() && "Loop not in a valid state!");
- return Blocks;
- }
- typedef typename ArrayRef<BlockT *>::const_iterator block_iterator;
- block_iterator block_begin() const { return getBlocks().begin(); }
- block_iterator block_end() const { return getBlocks().end(); }
- inline iterator_range<block_iterator> blocks() const {
- assert(!isInvalid() && "Loop not in a valid state!");
- return make_range(block_begin(), block_end());
- }
-
- /// Get the number of blocks in this loop in constant time.
- /// Invalidate the loop, indicating that it is no longer a loop.
- unsigned getNumBlocks() const {
- assert(!isInvalid() && "Loop not in a valid state!");
- return Blocks.size();
- }
-
- /// Return a direct, mutable handle to the blocks vector so that we can
- /// mutate it efficiently with techniques like `std::remove`.
- std::vector<BlockT *> &getBlocksVector() {
- assert(!isInvalid() && "Loop not in a valid state!");
- return Blocks;
- }
- /// Return a direct, mutable handle to the blocks set so that we can
- /// mutate it efficiently.
- SmallPtrSetImpl<const BlockT *> &getBlocksSet() {
- assert(!isInvalid() && "Loop not in a valid state!");
- return DenseBlockSet;
- }
-
- /// Return a direct, immutable handle to the blocks set.
- const SmallPtrSetImpl<const BlockT *> &getBlocksSet() const {
- assert(!isInvalid() && "Loop not in a valid state!");
- return DenseBlockSet;
- }
-
- /// Return true if this loop is no longer valid. The only valid use of this
- /// helper is "assert(L.isInvalid())" or equivalent, since IsInvalid is set to
- /// true by the destructor. In other words, if this accessor returns true,
- /// the caller has already triggered UB by calling this accessor; and so it
- /// can only be called in a context where a return value of true indicates a
- /// programmer error.
- bool isInvalid() const {
-#if LLVM_ENABLE_ABI_BREAKING_CHECKS
- return IsInvalid;
-#else
- return false;
-#endif
- }
-
- /// True if terminator in the block can branch to another block that is
- /// outside of the current loop. \p BB must be inside the loop.
- bool isLoopExiting(const BlockT *BB) const {
- assert(!isInvalid() && "Loop not in a valid state!");
- assert(contains(BB) && "Exiting block must be part of the loop");
- for (const auto *Succ : children<const BlockT *>(BB)) {
- if (!contains(Succ))
- return true;
- }
- return false;
- }
-
- /// Returns true if \p BB is a loop-latch.
- /// A latch block is a block that contains a branch back to the header.
- /// This function is useful when there are multiple latches in a loop
- /// because \fn getLoopLatch will return nullptr in that case.
- bool isLoopLatch(const BlockT *BB) const {
- assert(!isInvalid() && "Loop not in a valid state!");
- assert(contains(BB) && "block does not belong to the loop");
-
- BlockT *Header = getHeader();
- auto PredBegin = GraphTraits<Inverse<BlockT *>>::child_begin(Header);
- auto PredEnd = GraphTraits<Inverse<BlockT *>>::child_end(Header);
- return std::find(PredBegin, PredEnd, BB) != PredEnd;
- }
-
- /// Calculate the number of back edges to the loop header.
- unsigned getNumBackEdges() const {
- assert(!isInvalid() && "Loop not in a valid state!");
- unsigned NumBackEdges = 0;
- BlockT *H = getHeader();
-
- for (const auto Pred : children<Inverse<BlockT *>>(H))
- if (contains(Pred))
- ++NumBackEdges;
-
- return NumBackEdges;
- }
-
- //===--------------------------------------------------------------------===//
- // APIs for simple analysis of the loop.
- //
- // Note that all of these methods can fail on general loops (ie, there may not
- // be a preheader, etc). For best success, the loop simplification and
- // induction variable canonicalization pass should be used to normalize loops
- // for easy analysis. These methods assume canonical loops.
-
- /// Return all blocks inside the loop that have successors outside of the
- /// loop. These are the blocks _inside of the current loop_ which branch out.
- /// The returned list is always unique.
- void getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const;
-
- /// If getExitingBlocks would return exactly one block, return that block.
- /// Otherwise return null.
- BlockT *getExitingBlock() const;
-
- /// Return all of the successor blocks of this loop. These are the blocks
- /// _outside of the current loop_ which are branched to.
- void getExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
-
- /// If getExitBlocks would return exactly one block, return that block.
- /// Otherwise return null.
- BlockT *getExitBlock() const;
-
- /// Return true if no exit block for the loop has a predecessor that is
- /// outside the loop.
- bool hasDedicatedExits() const;
-
- /// Return all unique successor blocks of this loop.
- /// These are the blocks _outside of the current loop_ which are branched to.
- void getUniqueExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
-
- /// Return all unique successor blocks of this loop except successors from
- /// Latch block are not considered. If the exit comes from Latch has also
- /// non Latch predecessor in a loop it will be added to ExitBlocks.
- /// These are the blocks _outside of the current loop_ which are branched to.
- void getUniqueNonLatchExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
-
- /// If getUniqueExitBlocks would return exactly one block, return that block.
- /// Otherwise return null.
- BlockT *getUniqueExitBlock() const;
-
- /// Return true if this loop does not have any exit blocks.
- bool hasNoExitBlocks() const;
-
- /// Edge type.
- typedef std::pair<BlockT *, BlockT *> Edge;
-
- /// Return all pairs of (_inside_block_,_outside_block_).
- void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const;
-
- /// If there is a preheader for this loop, return it. A loop has a preheader
- /// if there is only one edge to the header of the loop from outside of the
- /// loop. If this is the case, the block branching to the header of the loop
- /// is the preheader node.
- ///
- /// This method returns null if there is no preheader for the loop.
- BlockT *getLoopPreheader() const;
-
- /// If the given loop's header has exactly one unique predecessor outside the
- /// loop, return it. Otherwise return null.
- /// This is less strict that the loop "preheader" concept, which requires
- /// the predecessor to have exactly one successor.
- BlockT *getLoopPredecessor() const;
-
- /// If there is a single latch block for this loop, return it.
- /// A latch block is a block that contains a branch back to the header.
- BlockT *getLoopLatch() const;
-
- /// Return all loop latch blocks of this loop. A latch block is a block that
- /// contains a branch back to the header.
- void getLoopLatches(SmallVectorImpl<BlockT *> &LoopLatches) const {
- assert(!isInvalid() && "Loop not in a valid state!");
- BlockT *H = getHeader();
- for (const auto Pred : children<Inverse<BlockT *>>(H))
- if (contains(Pred))
- LoopLatches.push_back(Pred);
- }
-
- /// Return all inner loops in the loop nest rooted by the loop in preorder,
- /// with siblings in forward program order.
- template <class Type>
- static void getInnerLoopsInPreorder(const LoopT &L,
- SmallVectorImpl<Type> &PreOrderLoops) {
- SmallVector<LoopT *, 4> PreOrderWorklist;
- PreOrderWorklist.append(L.rbegin(), L.rend());
-
- while (!PreOrderWorklist.empty()) {
- LoopT *L = PreOrderWorklist.pop_back_val();
- // Sub-loops are stored in forward program order, but will process the
- // worklist backwards so append them in reverse order.
- PreOrderWorklist.append(L->rbegin(), L->rend());
- PreOrderLoops.push_back(L);
- }
- }
-
- /// Return all loops in the loop nest rooted by the loop in preorder, with
- /// siblings in forward program order.
- SmallVector<const LoopT *, 4> getLoopsInPreorder() const {
- SmallVector<const LoopT *, 4> PreOrderLoops;
- const LoopT *CurLoop = static_cast<const LoopT *>(this);
- PreOrderLoops.push_back(CurLoop);
- getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
- return PreOrderLoops;
- }
- SmallVector<LoopT *, 4> getLoopsInPreorder() {
- SmallVector<LoopT *, 4> PreOrderLoops;
- LoopT *CurLoop = static_cast<LoopT *>(this);
- PreOrderLoops.push_back(CurLoop);
- getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
- return PreOrderLoops;
- }
-
- //===--------------------------------------------------------------------===//
- // APIs for updating loop information after changing the CFG
- //
-
- /// This method is used by other analyses to update loop information.
- /// NewBB is set to be a new member of the current loop.
- /// Because of this, it is added as a member of all parent loops, and is added
- /// to the specified LoopInfo object as being in the current basic block. It
- /// is not valid to replace the loop header with this method.
- void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LI);
-
- /// This is used when splitting loops up. It replaces the OldChild entry in
- /// our children list with NewChild, and updates the parent pointer of
- /// OldChild to be null and the NewChild to be this loop.
- /// This updates the loop depth of the new child.
- void replaceChildLoopWith(LoopT *OldChild, LoopT *NewChild);
-
- /// Add the specified loop to be a child of this loop.
- /// This updates the loop depth of the new child.
- void addChildLoop(LoopT *NewChild) {
- assert(!isInvalid() && "Loop not in a valid state!");
- assert(!NewChild->ParentLoop && "NewChild already has a parent!");
- NewChild->ParentLoop = static_cast<LoopT *>(this);
- SubLoops.push_back(NewChild);
- }
-
- /// This removes the specified child from being a subloop of this loop. The
- /// loop is not deleted, as it will presumably be inserted into another loop.
- LoopT *removeChildLoop(iterator I) {
- assert(!isInvalid() && "Loop not in a valid state!");
- assert(I != SubLoops.end() && "Cannot remove end iterator!");
- LoopT *Child = *I;
- assert(Child->ParentLoop == this && "Child is not a child of this loop!");
- SubLoops.erase(SubLoops.begin() + (I - begin()));
- Child->ParentLoop = nullptr;
- return Child;
- }
-
- /// This removes the specified child from being a subloop of this loop. The
- /// loop is not deleted, as it will presumably be inserted into another loop.
- LoopT *removeChildLoop(LoopT *Child) {
- return removeChildLoop(llvm::find(*this, Child));
- }
- /// This adds a basic block directly to the basic block list.
- /// This should only be used by transformations that create new loops. Other
- /// transformations should use addBasicBlockToLoop.
- void addBlockEntry(BlockT *BB) {
- assert(!isInvalid() && "Loop not in a valid state!");
- Blocks.push_back(BB);
- DenseBlockSet.insert(BB);
- }
-
- /// interface to reverse Blocks[from, end of loop] in this loop
- void reverseBlock(unsigned from) {
- assert(!isInvalid() && "Loop not in a valid state!");
- std::reverse(Blocks.begin() + from, Blocks.end());
- }
-
- /// interface to do reserve() for Blocks
- void reserveBlocks(unsigned size) {
- assert(!isInvalid() && "Loop not in a valid state!");
- Blocks.reserve(size);
- }
-
- /// This method is used to move BB (which must be part of this loop) to be the
- /// loop header of the loop (the block that dominates all others).
- void moveToHeader(BlockT *BB) {
- assert(!isInvalid() && "Loop not in a valid state!");
- if (Blocks[0] == BB)
- return;
- for (unsigned i = 0;; ++i) {
- assert(i != Blocks.size() && "Loop does not contain BB!");
- if (Blocks[i] == BB) {
- Blocks[i] = Blocks[0];
- Blocks[0] = BB;
- return;
- }
- }
- }
-
- /// This removes the specified basic block from the current loop, updating the
- /// Blocks as appropriate. This does not update the mapping in the LoopInfo
- /// class.
- void removeBlockFromLoop(BlockT *BB) {
- assert(!isInvalid() && "Loop not in a valid state!");
- auto I = find(Blocks, BB);
- assert(I != Blocks.end() && "N is not in this list!");
- Blocks.erase(I);
-
- DenseBlockSet.erase(BB);
- }
-
- /// Verify loop structure
- void verifyLoop() const;
-
- /// Verify loop structure of this loop and all nested loops.
- void verifyLoopNest(DenseSet<const LoopT *> *Loops) const;
-
- /// Returns true if the loop is annotated parallel.
- ///
- /// Derived classes can override this method using static template
- /// polymorphism.
- bool isAnnotatedParallel() const { return false; }
-
- /// Print loop with all the BBs inside it.
- void print(raw_ostream &OS, bool Verbose = false, bool PrintNested = true,
- unsigned Depth = 0) const;
-
-protected:
- friend class LoopInfoBase<BlockT, LoopT>;
-
- /// This creates an empty loop.
- LoopBase() : ParentLoop(nullptr) {}
-
- explicit LoopBase(BlockT *BB) : ParentLoop(nullptr) {
- Blocks.push_back(BB);
- DenseBlockSet.insert(BB);
- }
-
- // Since loop passes like SCEV are allowed to key analysis results off of
- // `Loop` pointers, we cannot re-use pointers within a loop pass manager.
- // This means loop passes should not be `delete` ing `Loop` objects directly
- // (and risk a later `Loop` allocation re-using the address of a previous one)
- // but should be using LoopInfo::markAsRemoved, which keeps around the `Loop`
- // pointer till the end of the lifetime of the `LoopInfo` object.
- //
- // To make it easier to follow this rule, we mark the destructor as
- // non-public.
- ~LoopBase() {
- for (auto *SubLoop : SubLoops)
- SubLoop->~LoopT();
-
-#if LLVM_ENABLE_ABI_BREAKING_CHECKS
- IsInvalid = true;
-#endif
- SubLoops.clear();
- Blocks.clear();
- DenseBlockSet.clear();
- ParentLoop = nullptr;
- }
-};
-
-template <class BlockT, class LoopT>
-raw_ostream &operator<<(raw_ostream &OS, const LoopBase<BlockT, LoopT> &Loop) {
- Loop.print(OS);
- return OS;
-}
-
-// Implementation in LoopInfoImpl.h
+// Implementation in Support/GenericLoopInfoImpl.h
extern template class LoopBase<BasicBlock, Loop>;
/// Represents a single loop in the control flow graph. Note that not all SCCs
@@ -664,7 +164,7 @@ public:
/// - the step instruction of the induction variable can be found
/// - the final value of the induction variable can be found
///
- /// Else None.
+ /// Else std::nullopt.
static std::optional<Loop::LoopBounds>
getBounds(const Loop &L, PHINode &IndVar, ScalarEvolution &SE);
@@ -904,205 +404,7 @@ private:
~Loop() = default;
};
-//===----------------------------------------------------------------------===//
-/// This class builds and contains all of the top-level loop
-/// structures in the specified function.
-///
-
-template <class BlockT, class LoopT> class LoopInfoBase {
- // BBMap - Mapping of basic blocks to the inner most loop they occur in
- DenseMap<const BlockT *, LoopT *> BBMap;
- std::vector<LoopT *> TopLevelLoops;
- BumpPtrAllocator LoopAllocator;
-
- friend class LoopBase<BlockT, LoopT>;
- friend class LoopInfo;
-
- void operator=(const LoopInfoBase &) = delete;
- LoopInfoBase(const LoopInfoBase &) = delete;
-
-public:
- LoopInfoBase() = default;
- ~LoopInfoBase() { releaseMemory(); }
-
- LoopInfoBase(LoopInfoBase &&Arg)
- : BBMap(std::move(Arg.BBMap)),
- TopLevelLoops(std::move(Arg.TopLevelLoops)),
- LoopAllocator(std::move(Arg.LoopAllocator)) {
- // We have to clear the arguments top level loops as we've taken ownership.
- Arg.TopLevelLoops.clear();
- }
- LoopInfoBase &operator=(LoopInfoBase &&RHS) {
- BBMap = std::move(RHS.BBMap);
-
- for (auto *L : TopLevelLoops)
- L->~LoopT();
-
- TopLevelLoops = std::move(RHS.TopLevelLoops);
- LoopAllocator = std::move(RHS.LoopAllocator);
- RHS.TopLevelLoops.clear();
- return *this;
- }
-
- void releaseMemory() {
- BBMap.clear();
-
- for (auto *L : TopLevelLoops)
- L->~LoopT();
- TopLevelLoops.clear();
- LoopAllocator.Reset();
- }
-
- template <typename... ArgsTy> LoopT *AllocateLoop(ArgsTy &&... Args) {
- LoopT *Storage = LoopAllocator.Allocate<LoopT>();
- return new (Storage) LoopT(std::forward<ArgsTy>(Args)...);
- }
-
- /// iterator/begin/end - The interface to the top-level loops in the current
- /// function.
- ///
- typedef typename std::vector<LoopT *>::const_iterator iterator;
- typedef
- typename std::vector<LoopT *>::const_reverse_iterator reverse_iterator;
- iterator begin() const { return TopLevelLoops.begin(); }
- iterator end() const { return TopLevelLoops.end(); }
- reverse_iterator rbegin() const { return TopLevelLoops.rbegin(); }
- reverse_iterator rend() const { return TopLevelLoops.rend(); }
- bool empty() const { return TopLevelLoops.empty(); }
-
- /// Return all of the loops in the function in preorder across the loop
- /// nests, with siblings in forward program order.
- ///
- /// Note that because loops form a forest of trees, preorder is equivalent to
- /// reverse postorder.
- SmallVector<LoopT *, 4> getLoopsInPreorder() const;
-
- /// Return all of the loops in the function in preorder across the loop
- /// nests, with siblings in *reverse* program order.
- ///
- /// Note that because loops form a forest of trees, preorder is equivalent to
- /// reverse postorder.
- ///
- /// Also note that this is *not* a reverse preorder. Only the siblings are in
- /// reverse program order.
- SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder() const;
-
- /// Return the inner most loop that BB lives in. If a basic block is in no
- /// loop (for example the entry node), null is returned.
- LoopT *getLoopFor(const BlockT *BB) const { return BBMap.lookup(BB); }
-
- /// Same as getLoopFor.
- const LoopT *operator[](const BlockT *BB) const { return getLoopFor(BB); }
-
- /// Return the loop nesting level of the specified block. A depth of 0 means
- /// the block is not inside any loop.
- unsigned getLoopDepth(const BlockT *BB) const {
- const LoopT *L = getLoopFor(BB);
- return L ? L->getLoopDepth() : 0;
- }
-
- // True if the block is a loop header node
- bool isLoopHeader(const BlockT *BB) const {
- const LoopT *L = getLoopFor(BB);
- return L && L->getHeader() == BB;
- }
-
- /// Return the top-level loops.
- const std::vector<LoopT *> &getTopLevelLoops() const { return TopLevelLoops; }
-
- /// Return the top-level loops.
- std::vector<LoopT *> &getTopLevelLoopsVector() { return TopLevelLoops; }
-
- /// This removes the specified top-level loop from this loop info object.
- /// The loop is not deleted, as it will presumably be inserted into
- /// another loop.
- LoopT *removeLoop(iterator I) {
- assert(I != end() && "Cannot remove end iterator!");
- LoopT *L = *I;
- assert(L->isOutermost() && "Not a top-level loop!");
- TopLevelLoops.erase(TopLevelLoops.begin() + (I - begin()));
- return L;
- }
-
- /// Change the top-level loop that contains BB to the specified loop.
- /// This should be used by transformations that restructure the loop hierarchy
- /// tree.
- void changeLoopFor(BlockT *BB, LoopT *L) {
- if (!L) {
- BBMap.erase(BB);
- return;
- }
- BBMap[BB] = L;
- }
-
- /// Replace the specified loop in the top-level loops list with the indicated
- /// loop.
- void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop) {
- auto I = find(TopLevelLoops, OldLoop);
- assert(I != TopLevelLoops.end() && "Old loop not at top level!");
- *I = NewLoop;
- assert(!NewLoop->ParentLoop && !OldLoop->ParentLoop &&
- "Loops already embedded into a subloop!");
- }
-
- /// This adds the specified loop to the collection of top-level loops.
- void addTopLevelLoop(LoopT *New) {
- assert(New->isOutermost() && "Loop already in subloop!");
- TopLevelLoops.push_back(New);
- }
-
- /// This method completely removes BB from all data structures,
- /// including all of the Loop objects it is nested in and our mapping from
- /// BasicBlocks to loops.
- void removeBlock(BlockT *BB) {
- auto I = BBMap.find(BB);
- if (I != BBMap.end()) {
- for (LoopT *L = I->second; L; L = L->getParentLoop())
- L->removeBlockFromLoop(BB);
-
- BBMap.erase(I);
- }
- }
-
- // Internals
-
- static bool isNotAlreadyContainedIn(const LoopT *SubLoop,
- const LoopT *ParentLoop) {
- if (!SubLoop)
- return true;
- if (SubLoop == ParentLoop)
- return false;
- return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop);
- }
-
- /// Create the loop forest using a stable algorithm.
- void analyze(const DominatorTreeBase<BlockT, false> &DomTree);
-
- // Debugging
- void print(raw_ostream &OS) const;
-
- void verify(const DominatorTreeBase<BlockT, false> &DomTree) const;
-
- /// Destroy a loop that has been removed from the `LoopInfo` nest.
- ///
- /// This runs the destructor of the loop object making it invalid to
- /// reference afterward. The memory is retained so that the *pointer* to the
- /// loop remains valid.
- ///
- /// The caller is responsible for removing this loop from the loop nest and
- /// otherwise disconnecting it from the broader `LoopInfo` data structures.
- /// Callers that don't naturally handle this themselves should probably call
- /// `erase' instead.
- void destroy(LoopT *L) {
- L->~LoopT();
-
- // Since LoopAllocator is a BumpPtrAllocator, this Deallocate only poisons
- // \c L, but the pointer remains valid for non-dereferencing uses.
- LoopAllocator.Deallocate(L);
- }
-};
-
-// Implementation in LoopInfoImpl.h
+// Implementation in Support/GenericLoopInfoImpl.h
extern template class LoopInfoBase<BasicBlock, Loop>;
class LoopInfo : public LoopInfoBase<BasicBlock, Loop> {
@@ -1235,7 +537,6 @@ public:
// IR is assumed to be in LCSSA form before the planned insertion.
bool wouldBeOutOfLoopUseRequiringLCSSA(const Value *V,
const BasicBlock *ExitBB) const;
-
};
/// Enable verification of loop info.
@@ -1391,6 +692,6 @@ makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID,
llvm::ArrayRef<llvm::StringRef> RemovePrefixes,
llvm::ArrayRef<llvm::MDNode *> AddAttrs);
-} // End llvm namespace
+} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Analysis/MLInlineAdvisor.h b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
index 23ada7fe6a25..f58862e53352 100644
--- a/llvm/include/llvm/Analysis/MLInlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/MLInlineAdvisor.h
@@ -28,7 +28,8 @@ class MLInlineAdvice;
class MLInlineAdvisor : public InlineAdvisor {
public:
MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM,
- std::unique_ptr<MLModelRunner> ModelRunner);
+ std::unique_ptr<MLModelRunner> ModelRunner,
+ std::function<bool(CallBase &)> GetDefaultAdvice);
virtual ~MLInlineAdvisor() = default;
@@ -63,6 +64,7 @@ protected:
unsigned getInitialFunctionLevel(const Function &F) const;
std::unique_ptr<MLModelRunner> ModelRunner;
+ std::function<bool(CallBase &)> GetDefaultAdvice;
private:
int64_t getModuleIRSize() const;
diff --git a/llvm/include/llvm/Analysis/MLModelRunner.h b/llvm/include/llvm/Analysis/MLModelRunner.h
index 872c0e37f00e..903411fbdf7e 100644
--- a/llvm/include/llvm/Analysis/MLModelRunner.h
+++ b/llvm/include/llvm/Analysis/MLModelRunner.h
@@ -47,8 +47,9 @@ public:
return (const_cast<MLModelRunner *>(this))->getTensorUntyped(Index);
}
- enum class Kind : int { Unknown, Release, Development, NoOp };
+ enum class Kind : int { Unknown, Release, Development, NoOp, Interactive };
Kind getKind() const { return Type; }
+ virtual void switchContext(StringRef Name) {}
protected:
MLModelRunner(LLVMContext &Ctx, Kind Type, size_t NrInputs)
diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h
index 146781515aba..711bbf6a0afe 100644
--- a/llvm/include/llvm/Analysis/MemoryBuiltins.h
+++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h
@@ -182,9 +182,10 @@ bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
/// argument of the call to objectsize.
Value *lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL,
const TargetLibraryInfo *TLI, bool MustSucceed);
-Value *lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL,
- const TargetLibraryInfo *TLI, AAResults *AA,
- bool MustSucceed);
+Value *lowerObjectSizeCall(
+ IntrinsicInst *ObjectSize, const DataLayout &DL,
+ const TargetLibraryInfo *TLI, AAResults *AA, bool MustSucceed,
+ SmallVectorImpl<Instruction *> *InsertedInstructions = nullptr);
using SizeOffsetType = std::pair<APInt, APInt>;
diff --git a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index f66b42929429..27185aa9942e 100644
--- a/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -208,11 +208,11 @@ class NonLocalDepEntry {
MemDepResult Result;
public:
- NonLocalDepEntry(BasicBlock *bb, MemDepResult result)
- : BB(bb), Result(result) {}
+ NonLocalDepEntry(BasicBlock *BB, MemDepResult Result)
+ : BB(BB), Result(Result) {}
// This is used for searches.
- NonLocalDepEntry(BasicBlock *bb) : BB(bb) {}
+ NonLocalDepEntry(BasicBlock *BB) : BB(BB) {}
// BB is the sort key, it can't be changed.
BasicBlock *getBB() const { return BB; }
@@ -233,8 +233,8 @@ class NonLocalDepResult {
Value *Address;
public:
- NonLocalDepResult(BasicBlock *bb, MemDepResult result, Value *address)
- : Entry(bb, result), Address(address) {}
+ NonLocalDepResult(BasicBlock *BB, MemDepResult Result, Value *Address)
+ : Entry(BB, Result), Address(Address) {}
// BB is the sort key, it can't be changed.
BasicBlock *getBB() const { return Entry.getBB(); }
diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index 24956e781572..355bff46f627 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -24,8 +24,8 @@ namespace llvm {
namespace memprof {
/// Return the allocation type for a given set of memory profile values.
-AllocationType getAllocType(uint64_t MaxAccessCount, uint64_t MinSize,
- uint64_t MinLifetime);
+AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity,
+ uint64_t AllocCount, uint64_t TotalLifetime);
/// Build callstack metadata from the provided list of call stack ids. Returns
/// the resulting metadata node.
@@ -37,6 +37,12 @@ MDNode *getMIBStackNode(const MDNode *MIB);
/// Returns the allocation type from an MIB metadata node.
AllocationType getMIBAllocType(const MDNode *MIB);
+/// Returns the string to use in attributes with the given type.
+std::string getAllocTypeAttributeString(AllocationType Type);
+
+/// True if the AllocTypes bitmask contains just a single type.
+bool hasSingleAllocType(uint8_t AllocTypes);
+
/// Class to build a trie of call stack contexts for a particular profiled
/// allocation call, along with their associated allocation types.
/// The allocation will be at the root of the trie, which is then used to
@@ -55,9 +61,9 @@ private:
};
// The node for the allocation at the root.
- CallStackTrieNode *Alloc;
+ CallStackTrieNode *Alloc = nullptr;
// The allocation's leaf stack id.
- uint64_t AllocStackId;
+ uint64_t AllocStackId = 0;
void deleteTrieNode(CallStackTrieNode *Node) {
if (!Node)
@@ -74,7 +80,7 @@ private:
bool CalleeHasAmbiguousCallerContext);
public:
- CallStackTrie() : Alloc(nullptr), AllocStackId(0) {}
+ CallStackTrie() = default;
~CallStackTrie() { deleteTrieNode(Alloc); }
bool empty() const { return Alloc == nullptr; }
@@ -128,6 +134,7 @@ public:
CallStackIterator begin() const;
CallStackIterator end() const { return CallStackIterator(N, /*End*/ true); }
CallStackIterator beginAfterSharedPrefix(CallStack &Other);
+ uint64_t back() const;
private:
const NodeT *N = nullptr;
@@ -137,8 +144,10 @@ template <class NodeT, class IteratorT>
CallStack<NodeT, IteratorT>::CallStackIterator::CallStackIterator(
const NodeT *N, bool End)
: N(N) {
- if (!N)
+ if (!N) {
+ Iter = nullptr;
return;
+ }
Iter = End ? N->StackIdIndices.end() : N->StackIdIndices.begin();
}
@@ -149,6 +158,12 @@ uint64_t CallStack<NodeT, IteratorT>::CallStackIterator::operator*() {
}
template <class NodeT, class IteratorT>
+uint64_t CallStack<NodeT, IteratorT>::back() const {
+ assert(N);
+ return N->StackIdIndices.back();
+}
+
+template <class NodeT, class IteratorT>
typename CallStack<NodeT, IteratorT>::CallStackIterator
CallStack<NodeT, IteratorT>::begin() const {
return CallStackIterator(N, /*End*/ false);
@@ -170,6 +185,7 @@ CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::CallStackIterator(
const MDNode *N, bool End);
template <>
uint64_t CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::operator*();
+template <> uint64_t CallStack<MDNode, MDNode::op_iterator>::back() const;
} // end namespace memprof
} // end namespace llvm
diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h
index d861057cb8cb..94d7f1a78b84 100644
--- a/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/llvm/include/llvm/Analysis/MemorySSA.h
@@ -798,7 +798,6 @@ public:
protected:
// Used by Memory SSA dumpers and wrapper pass
- friend class MemorySSAPrinterLegacyPass;
friend class MemorySSAUpdater;
void verifyOrderingDominationAndDefUses(
@@ -919,18 +918,6 @@ protected:
AliasAnalysis &AA);
};
-// This pass does eager building and then printing of MemorySSA. It is used by
-// the tests to be able to build, dump, and verify Memory SSA.
-class MemorySSAPrinterLegacyPass : public FunctionPass {
-public:
- MemorySSAPrinterLegacyPass();
-
- bool runOnFunction(Function &) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
- static char ID;
-};
-
/// An analysis that produces \c MemorySSA for a function.
///
class MemorySSAAnalysis : public AnalysisInfoMixin<MemorySSAAnalysis> {
@@ -959,9 +946,11 @@ public:
/// Printer pass for \c MemorySSA.
class MemorySSAPrinterPass : public PassInfoMixin<MemorySSAPrinterPass> {
raw_ostream &OS;
+ bool EnsureOptimizedUses;
public:
- explicit MemorySSAPrinterPass(raw_ostream &OS) : OS(OS) {}
+ explicit MemorySSAPrinterPass(raw_ostream &OS, bool EnsureOptimizedUses)
+ : OS(OS), EnsureOptimizedUses(EnsureOptimizedUses) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
@@ -1272,11 +1261,11 @@ private:
const_cast<Value *>(Location.Ptr),
OriginalAccess->getBlock()->getModule()->getDataLayout(), nullptr);
- if (!Translator.PHITranslateValue(OriginalAccess->getBlock(),
+ if (Value *Addr =
+ Translator.translateValue(OriginalAccess->getBlock(),
DefIterator.getPhiArgBlock(), DT, true))
- if (Translator.getAddr() != CurrentPair.second.Ptr)
- CurrentPair.second =
- CurrentPair.second.getWithNewPtr(Translator.getAddr());
+ if (Addr != CurrentPair.second.Ptr)
+ CurrentPair.second = CurrentPair.second.getWithNewPtr(Addr);
// Mark size as unknown, if the location is not guaranteed to be
// loop-invariant for any possible loop in the function. Setting the size
diff --git a/llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h b/llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h
index 9a809171e870..e36dea58cec4 100644
--- a/llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h
+++ b/llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h
@@ -99,6 +99,10 @@ public:
ImmutablePass *
createImmutableModuleSummaryIndexWrapperPass(const ModuleSummaryIndex *Index);
+/// Returns true if the instruction could have memprof metadata, used to ensure
+/// consistency between summary analysis and the ThinLTO backend processing.
+bool mayHaveMemprofSummary(const CallBase *CB);
+
} // end namespace llvm
#endif // LLVM_ANALYSIS_MODULESUMMARYANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h
index b83705cb6111..9c97bd1725ac 100644
--- a/llvm/include/llvm/Analysis/MustExecute.h
+++ b/llvm/include/llvm/Analysis/MustExecute.h
@@ -25,8 +25,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionPrecedenceTracking.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
diff --git a/llvm/include/llvm/Analysis/PHITransAddr.h b/llvm/include/llvm/Analysis/PHITransAddr.h
index a23f8e61c303..de9c3c4fd292 100644
--- a/llvm/include/llvm/Analysis/PHITransAddr.h
+++ b/llvm/include/llvm/Analysis/PHITransAddr.h
@@ -17,10 +17,10 @@
#include "llvm/IR/Instruction.h"
namespace llvm {
- class AssumptionCache;
- class DominatorTree;
- class DataLayout;
- class TargetLibraryInfo;
+class AssumptionCache;
+class DominatorTree;
+class DataLayout;
+class TargetLibraryInfo;
/// PHITransAddr - An address value which tracks and handles phi translation.
/// As we walk "up" the CFG through predecessors, we need to ensure that the
@@ -49,71 +49,68 @@ class PHITransAddr {
SmallVector<Instruction*, 4> InstInputs;
public:
- PHITransAddr(Value *addr, const DataLayout &DL, AssumptionCache *AC)
- : Addr(addr), DL(DL), AC(AC) {
+ PHITransAddr(Value *Addr, const DataLayout &DL, AssumptionCache *AC)
+ : Addr(Addr), DL(DL), AC(AC) {
// If the address is an instruction, the whole thing is considered an input.
- if (Instruction *I = dyn_cast<Instruction>(Addr))
- InstInputs.push_back(I);
+ addAsInput(Addr);
}
Value *getAddr() const { return Addr; }
- /// NeedsPHITranslationFromBlock - Return true if moving from the specified
+ /// needsPHITranslationFromBlock - Return true if moving from the specified
/// BasicBlock to its predecessors requires PHI translation.
- bool NeedsPHITranslationFromBlock(BasicBlock *BB) const {
+ bool needsPHITranslationFromBlock(BasicBlock *BB) const {
// We do need translation if one of our input instructions is defined in
// this block.
- for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
- if (InstInputs[i]->getParent() == BB)
- return true;
- return false;
+ return any_of(InstInputs, [BB](const auto &InstInput) {
+ return InstInput->getParent() == BB;
+ });
}
- /// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
+ /// isPotentiallyPHITranslatable - If this needs PHI translation, return true
/// if we have some hope of doing it. This should be used as a filter to
/// avoid calling PHITranslateValue in hopeless situations.
- bool IsPotentiallyPHITranslatable() const;
+ bool isPotentiallyPHITranslatable() const;
- /// PHITranslateValue - PHI translate the current address up the CFG from
+ /// translateValue - PHI translate the current address up the CFG from
/// CurBB to Pred, updating our state to reflect any needed changes. If
- /// 'MustDominate' is true, the translated value must dominate
- /// PredBB. This returns true on failure and sets Addr to null.
- bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
- const DominatorTree *DT, bool MustDominate);
+ /// 'MustDominate' is true, the translated value must dominate PredBB.
+ Value *translateValue(BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree *DT, bool MustDominate);
- /// PHITranslateWithInsertion - PHI translate this value into the specified
+ /// translateWithInsertion - PHI translate this value into the specified
/// predecessor block, inserting a computation of the value if it is
/// unavailable.
///
/// All newly created instructions are added to the NewInsts list. This
/// returns null on failure.
///
- Value *PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
- const DominatorTree &DT,
- SmallVectorImpl<Instruction *> &NewInsts);
+ Value *translateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree &DT,
+ SmallVectorImpl<Instruction *> &NewInsts);
void dump() const;
- /// Verify - Check internal consistency of this data structure. If the
+ /// verify - Check internal consistency of this data structure. If the
/// structure is valid, it returns true. If invalid, it prints errors and
/// returns false.
- bool Verify() const;
+ bool verify() const;
private:
- Value *PHITranslateSubExpr(Value *V, BasicBlock *CurBB, BasicBlock *PredBB,
- const DominatorTree *DT);
+ Value *translateSubExpr(Value *V, BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree *DT);
- /// InsertPHITranslatedSubExpr - Insert a computation of the PHI translated
+ /// insertTranslatedSubExpr - Insert a computation of the PHI translated
/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
/// block. All newly created instructions are added to the NewInsts list.
/// This returns null on failure.
///
- Value *InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
- BasicBlock *PredBB, const DominatorTree &DT,
- SmallVectorImpl<Instruction *> &NewInsts);
+ Value *insertTranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
+ BasicBlock *PredBB, const DominatorTree &DT,
+ SmallVectorImpl<Instruction *> &NewInsts);
- /// AddAsInput - If the specified value is an instruction, add it as an input.
- Value *AddAsInput(Value *V) {
+ /// addAsInput - If the specified value is an instruction, add it as an input.
+ Value *addAsInput(Value *V) {
// If V is an instruction, it is now an input.
if (Instruction *VI = dyn_cast<Instruction>(V))
InstInputs.push_back(VI);
diff --git a/llvm/include/llvm/Analysis/Passes.h b/llvm/include/llvm/Analysis/Passes.h
index 343c239dede6..ac1bc3549910 100644
--- a/llvm/include/llvm/Analysis/Passes.h
+++ b/llvm/include/llvm/Analysis/Passes.h
@@ -48,13 +48,6 @@ namespace llvm {
//===--------------------------------------------------------------------===//
//
- // createLegacyDivergenceAnalysisPass - This pass determines which branches in a GPU
- // program are divergent.
- //
- FunctionPass *createLegacyDivergenceAnalysisPass();
-
- //===--------------------------------------------------------------------===//
- //
// Minor pass prototypes, allowing us to expose them through bugpoint and
// analyze.
FunctionPass *createInstCountPass();
@@ -65,39 +58,6 @@ namespace llvm {
// in a function and builds the region hierarchy.
//
FunctionPass *createRegionInfoPass();
-
- // Print module-level debug info metadata in human-readable form.
- ModulePass *createModuleDebugInfoPrinterPass();
-
- //===--------------------------------------------------------------------===//
- //
- // createMemDepPrinter - This pass exhaustively collects all memdep
- // information and prints it with -analyze.
- //
- FunctionPass *createMemDepPrinter();
-
- //===--------------------------------------------------------------------===//
- //
- // createMemDerefPrinter - This pass collects memory dereferenceability
- // information and prints it with -analyze.
- //
- FunctionPass *createMemDerefPrinter();
-
- //===--------------------------------------------------------------------===//
- //
- // createMustExecutePrinter - This pass collects information about which
- // instructions within a loop are guaranteed to execute if the loop header is
- // entered and prints it with -analyze.
- //
- FunctionPass *createMustExecutePrinter();
-
- //===--------------------------------------------------------------------===//
- //
- // createMustBeExecutedContextPrinter - This pass prints information about which
- // instructions are guaranteed to execute together (run with -analyze).
- //
- ModulePass *createMustBeExecutedContextPrinter();
-
}
#endif
diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index 292c713f07ca..38eb71ba271d 100644
--- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -15,6 +15,9 @@
#define LLVM_ANALYSIS_PROFILESUMMARYINFO_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/Pass.h"
@@ -23,9 +26,8 @@
namespace llvm {
class BasicBlock;
-class BlockFrequencyInfo;
class CallBase;
-class Function;
+class MachineFunction;
/// Analysis providing profile information.
///
@@ -107,28 +109,75 @@ public:
bool hasHugeWorkingSetSize() const;
/// Returns true if the working set size of the code is considered large.
bool hasLargeWorkingSetSize() const;
- /// Returns true if \p F has hot function entry.
- bool isFunctionEntryHot(const Function *F) const;
+ /// Returns true if \p F has hot function entry. If it returns false, it
+ /// either means it is not hot or it is unknown whether it is hot or not (for
+ /// example, no profile data is available).
+ template <typename FuncT> bool isFunctionEntryHot(const FuncT *F) const {
+ if (!F || !hasProfileSummary())
+ return false;
+ std::optional<Function::ProfileCount> FunctionCount = getEntryCount(F);
+ // FIXME: The heuristic used below for determining hotness is based on
+ // preliminary SPEC tuning for inliner. This will eventually be a
+ // convenience method that calls isHotCount.
+ return FunctionCount && isHotCount(FunctionCount->getCount());
+ }
+
/// Returns true if \p F contains hot code.
- bool isFunctionHotInCallGraph(const Function *F,
- BlockFrequencyInfo &BFI) const;
+ template <typename FuncT, typename BFIT>
+ bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const {
+ if (!F || !hasProfileSummary())
+ return false;
+ if (auto FunctionCount = getEntryCount(F))
+ if (isHotCount(FunctionCount->getCount()))
+ return true;
+
+ if (auto TotalCallCount = getTotalCallCount(F))
+ if (isHotCount(*TotalCallCount))
+ return true;
+
+ for (const auto &BB : *F)
+ if (isHotBlock(&BB, &BFI))
+ return true;
+ return false;
+ }
/// Returns true if \p F has cold function entry.
bool isFunctionEntryCold(const Function *F) const;
/// Returns true if \p F contains only cold code.
- bool isFunctionColdInCallGraph(const Function *F,
- BlockFrequencyInfo &BFI) const;
+ template <typename FuncT, typename BFIT>
+ bool isFunctionColdInCallGraph(const FuncT *F, BFIT &BFI) const {
+ if (!F || !hasProfileSummary())
+ return false;
+ if (auto FunctionCount = getEntryCount(F))
+ if (!isColdCount(FunctionCount->getCount()))
+ return false;
+
+ if (auto TotalCallCount = getTotalCallCount(F))
+ if (!isColdCount(*TotalCallCount))
+ return false;
+
+ for (const auto &BB : *F)
+ if (!isColdBlock(&BB, &BFI))
+ return false;
+ return true;
+ }
/// Returns true if the hotness of \p F is unknown.
bool isFunctionHotnessUnknown(const Function &F) const;
/// Returns true if \p F contains hot code with regard to a given hot
/// percentile cutoff value.
+ template <typename FuncT, typename BFIT>
bool isFunctionHotInCallGraphNthPercentile(int PercentileCutoff,
- const Function *F,
- BlockFrequencyInfo &BFI) const;
+ const FuncT *F, BFIT &BFI) const {
+ return isFunctionHotOrColdInCallGraphNthPercentile<true, FuncT, BFIT>(
+ PercentileCutoff, F, BFI);
+ }
/// Returns true if \p F contains cold code with regard to a given cold
/// percentile cutoff value.
+ template <typename FuncT, typename BFIT>
bool isFunctionColdInCallGraphNthPercentile(int PercentileCutoff,
- const Function *F,
- BlockFrequencyInfo &BFI) const;
+ const FuncT *F, BFIT &BFI) const {
+ return isFunctionHotOrColdInCallGraphNthPercentile<false, FuncT, BFIT>(
+ PercentileCutoff, F, BFI);
+ }
/// Returns true if count \p C is considered hot.
bool isHotCount(uint64_t C) const;
/// Returns true if count \p C is considered cold.
@@ -143,22 +192,57 @@ public:
/// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
/// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
bool isColdCountNthPercentile(int PercentileCutoff, uint64_t C) const;
+
/// Returns true if BasicBlock \p BB is considered hot.
- bool isHotBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const;
+ template <typename BBType, typename BFIT>
+ bool isHotBlock(const BBType *BB, BFIT *BFI) const {
+ auto Count = BFI->getBlockProfileCount(BB);
+ return Count && isHotCount(*Count);
+ }
+
/// Returns true if BasicBlock \p BB is considered cold.
- bool isColdBlock(const BasicBlock *BB, BlockFrequencyInfo *BFI) const;
- /// Returns true if BasicBlock \p BB is considered hot with regard to a given
- /// hot percentile cutoff value.
- /// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
- /// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
- bool isHotBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB,
- BlockFrequencyInfo *BFI) const;
+ template <typename BBType, typename BFIT>
+ bool isColdBlock(const BBType *BB, BFIT *BFI) const {
+ auto Count = BFI->getBlockProfileCount(BB);
+ return Count && isColdCount(*Count);
+ }
+
+ template <typename BFIT>
+ bool isColdBlock(BlockFrequency BlockFreq, const BFIT *BFI) const {
+ auto Count = BFI->getProfileCountFromFreq(BlockFreq.getFrequency());
+ return Count && isColdCount(*Count);
+ }
+
+ template <typename BBType, typename BFIT>
+ bool isHotBlockNthPercentile(int PercentileCutoff, const BBType *BB,
+ BFIT *BFI) const {
+ return isHotOrColdBlockNthPercentile<true, BBType, BFIT>(PercentileCutoff,
+ BB, BFI);
+ }
+
+ template <typename BFIT>
+ bool isHotBlockNthPercentile(int PercentileCutoff, BlockFrequency BlockFreq,
+ BFIT *BFI) const {
+ return isHotOrColdBlockNthPercentile<true, BFIT>(PercentileCutoff,
+ BlockFreq, BFI);
+ }
+
/// Returns true if BasicBlock \p BB is considered cold with regard to a given
/// cold percentile cutoff value.
/// PercentileCutoff is encoded as a 6 digit decimal fixed point number, where
/// the first two digits are the whole part. E.g. 995000 for 99.5 percentile.
- bool isColdBlockNthPercentile(int PercentileCutoff, const BasicBlock *BB,
- BlockFrequencyInfo *BFI) const;
+ template <typename BBType, typename BFIT>
+ bool isColdBlockNthPercentile(int PercentileCutoff, const BBType *BB,
+ BFIT *BFI) const {
+ return isHotOrColdBlockNthPercentile<false, BBType, BFIT>(PercentileCutoff,
+ BB, BFI);
+ }
+ template <typename BFIT>
+ bool isColdBlockNthPercentile(int PercentileCutoff, BlockFrequency BlockFreq,
+ BFIT *BFI) const {
+ return isHotOrColdBlockNthPercentile<false, BFIT>(PercentileCutoff,
+ BlockFreq, BFI);
+ }
/// Returns true if the call site \p CB is considered hot.
bool isHotCallSite(const CallBase &CB, BlockFrequencyInfo *BFI) const;
/// Returns true if call site \p CB is considered cold.
@@ -178,18 +262,94 @@ public:
return ColdCountThreshold.value_or(0);
}
- private:
- template <bool isHot>
- bool isFunctionHotOrColdInCallGraphNthPercentile(
- int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const;
- template <bool isHot>
- bool isHotOrColdCountNthPercentile(int PercentileCutoff, uint64_t C) const;
- template <bool isHot>
- bool isHotOrColdBlockNthPercentile(int PercentileCutoff,
- const BasicBlock *BB,
- BlockFrequencyInfo *BFI) const;
+private:
+ template <typename FuncT>
+ std::optional<uint64_t> getTotalCallCount(const FuncT *F) const {
+ return std::nullopt;
+ }
+
+ template <bool isHot, typename FuncT, typename BFIT>
+ bool isFunctionHotOrColdInCallGraphNthPercentile(int PercentileCutoff,
+ const FuncT *F,
+ BFIT &FI) const {
+ if (!F || !hasProfileSummary())
+ return false;
+ if (auto FunctionCount = getEntryCount(F)) {
+ if (isHot &&
+ isHotCountNthPercentile(PercentileCutoff, FunctionCount->getCount()))
+ return true;
+ if (!isHot && !isColdCountNthPercentile(PercentileCutoff,
+ FunctionCount->getCount()))
+ return false;
+ }
+ if (auto TotalCallCount = getTotalCallCount(F)) {
+ if (isHot && isHotCountNthPercentile(PercentileCutoff, *TotalCallCount))
+ return true;
+ if (!isHot &&
+ !isColdCountNthPercentile(PercentileCutoff, *TotalCallCount))
+ return false;
+ }
+ for (const auto &BB : *F) {
+ if (isHot && isHotBlockNthPercentile(PercentileCutoff, &BB, &FI))
+ return true;
+ if (!isHot && !isColdBlockNthPercentile(PercentileCutoff, &BB, &FI))
+ return false;
+ }
+ return !isHot;
+ }
+
+ template <bool isHot>
+ bool isHotOrColdCountNthPercentile(int PercentileCutoff, uint64_t C) const;
+
+ template <bool isHot, typename BBType, typename BFIT>
+ bool isHotOrColdBlockNthPercentile(int PercentileCutoff, const BBType *BB,
+ BFIT *BFI) const {
+ auto Count = BFI->getBlockProfileCount(BB);
+ if (isHot)
+ return Count && isHotCountNthPercentile(PercentileCutoff, *Count);
+ else
+ return Count && isColdCountNthPercentile(PercentileCutoff, *Count);
+ }
+
+ template <bool isHot, typename BFIT>
+ bool isHotOrColdBlockNthPercentile(int PercentileCutoff,
+ BlockFrequency BlockFreq,
+ BFIT *BFI) const {
+ auto Count = BFI->getProfileCountFromFreq(BlockFreq.getFrequency());
+ if (isHot)
+ return Count && isHotCountNthPercentile(PercentileCutoff, *Count);
+ else
+ return Count && isColdCountNthPercentile(PercentileCutoff, *Count);
+ }
+
+ template <typename FuncT>
+ std::optional<Function::ProfileCount> getEntryCount(const FuncT *F) const {
+ return F->getEntryCount();
+ }
};
+template <>
+inline std::optional<uint64_t>
+ProfileSummaryInfo::getTotalCallCount<Function>(const Function *F) const {
+ if (!hasSampleProfile())
+ return std::nullopt;
+ uint64_t TotalCallCount = 0;
+ for (const auto &BB : *F)
+ for (const auto &I : BB)
+ if (isa<CallInst>(I) || isa<InvokeInst>(I))
+ if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr))
+ TotalCallCount += *CallCount;
+ return TotalCallCount;
+}
+
+// Declare template specialization for llvm::MachineFunction. Do not implement
+// here, because we cannot include MachineFunction header here, that would break
+// dependency rules.
+template <>
+std::optional<Function::ProfileCount>
+ProfileSummaryInfo::getEntryCount<MachineFunction>(
+ const MachineFunction *F) const;
+
/// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
class ProfileSummaryInfoWrapperPass : public ImmutablePass {
std::unique_ptr<ProfileSummaryInfo> PSI;
diff --git a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
index bf1aaca2adbb..91855138fe18 100644
--- a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
+++ b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
@@ -85,6 +85,12 @@ public:
void *arg_data(int) { llvm_unreachable(NOOP_MODEL_ERRMSG); }
#undef NOOP_MODEL_ERRMSG
};
+
+template <class T> bool isEmbeddedModelEvaluatorValid() { return true; }
+
+template <> inline bool isEmbeddedModelEvaluatorValid<NoopSavedModelImpl>() {
+ return false;
+}
} // namespace llvm
#endif // LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index c4bd0fa3e07c..c108a7ae9c9b 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -560,12 +560,16 @@ public:
/// expression.
const SCEV *getSCEV(Value *V);
+ /// Return an existing SCEV for V if there is one, otherwise return nullptr.
+ const SCEV *getExistingSCEV(Value *V);
+
const SCEV *getConstant(ConstantInt *V);
const SCEV *getConstant(const APInt &Val);
const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false);
const SCEV *getLosslessPtrToIntExpr(const SCEV *Op, unsigned Depth = 0);
const SCEV *getPtrToIntExpr(const SCEV *Op, Type *Ty);
const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
+ const SCEV *getVScale(Type *Ty);
const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
const SCEV *getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
unsigned Depth = 0);
@@ -655,15 +659,19 @@ public:
/// Return a SCEV for the constant 1 of a specific type.
const SCEV *getOne(Type *Ty) { return getConstant(Ty, 1); }
+ /// Return a SCEV for the constant \p Power of two.
+ const SCEV *getPowerOfTwo(Type *Ty, unsigned Power) {
+ assert(Power < getTypeSizeInBits(Ty) && "Power out of range");
+ return getConstant(APInt::getOneBitSet(getTypeSizeInBits(Ty), Power));
+ }
+
/// Return a SCEV for the constant -1 of a specific type.
const SCEV *getMinusOne(Type *Ty) {
return getConstant(Ty, -1, /*isSigned=*/true);
}
- /// Return an expression for sizeof ScalableTy that is type IntTy, where
- /// ScalableTy is a scalable vector type.
- const SCEV *getSizeOfScalableVectorExpr(Type *IntTy,
- ScalableVectorType *ScalableTy);
+ /// Return an expression for a TypeSize.
+ const SCEV *getSizeOfExpr(Type *IntTy, TypeSize Size);
/// Return an expression for the alloc size of AllocTy that is type IntTy
const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy);
@@ -786,16 +794,19 @@ public:
bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS);
+ /// A version of getTripCountFromExitCount below which always picks an
+ /// evaluation type which can not result in overflow.
+ const SCEV *getTripCountFromExitCount(const SCEV *ExitCount);
+
/// Convert from an "exit count" (i.e. "backedge taken count") to a "trip
/// count". A "trip count" is the number of times the header of the loop
/// will execute if an exit is taken after the specified number of backedges
/// have been taken. (e.g. TripCount = ExitCount + 1). Note that the
- /// expression can overflow if ExitCount = UINT_MAX. \p Extend controls
- /// how potential overflow is handled. If true, a wider result type is
- /// returned. ex: EC = 255 (i8), TC = 256 (i9). If false, result unsigned
- /// wraps with 2s-complement semantics. ex: EC = 255 (i8), TC = 0 (i8)
- const SCEV *getTripCountFromExitCount(const SCEV *ExitCount,
- bool Extend = true);
+ /// expression can overflow if ExitCount = UINT_MAX. If EvalTy is not wide
+ /// enough to hold the result without overflow, result unsigned wraps with
+ /// 2s-complement semantics. ex: EC = 255 (i8), TC = 0 (i8)
+ const SCEV *getTripCountFromExitCount(const SCEV *ExitCount, Type *EvalTy,
+ const Loop *L);
/// Returns the exact trip count of the loop if we can compute it, and
/// the result is a small constant. '0' is used to represent an unknown
@@ -820,13 +831,6 @@ public:
/// Returns 0 if the trip count is unknown or not constant.
unsigned getSmallConstantMaxTripCount(const Loop *L);
- /// Returns the upper bound of the loop trip count infered from array size.
- /// Can not access bytes starting outside the statically allocated size
- /// without being immediate UB.
- /// Returns SCEVCouldNotCompute if the trip count could not inferred
- /// from array accesses.
- const SCEV *getConstantMaxTripCountFromArray(const Loop *L);
-
/// Returns the largest constant divisor of the trip count as a normal
/// unsigned value, if possible. This means that the actual trip count is
/// always a multiple of the returned value. Returns 1 if the trip count is
@@ -957,7 +961,13 @@ public:
/// (at every loop iteration). It is, at the same time, the minimum number
/// of times S is divisible by 2. For example, given {4,+,8} it returns 2.
/// If S is guaranteed to be 0, it returns the bitwidth of S.
- uint32_t GetMinTrailingZeros(const SCEV *S);
+ uint32_t getMinTrailingZeros(const SCEV *S);
+
+ /// Returns the max constant multiple of S.
+ APInt getConstantMultiple(const SCEV *S);
+
+ // Returns the max constant multiple of S. If S is exactly 0, return 1.
+ APInt getNonZeroConstantMultiple(const SCEV *S);
/// Determine the unsigned range for a particular SCEV.
/// NOTE: This returns a copy of the reference returned by getRangeRef.
@@ -1128,7 +1138,7 @@ public:
/// Compute the number of times the backedge of the specified loop will
/// execute if its exit condition were a conditional branch of ExitCond.
///
- /// \p ControlsExit is true if ExitCond directly controls the exit
+ /// \p ControlsOnlyExit is true if ExitCond directly controls the only exit
/// branch. In this case, we can assume that the loop exits only if the
/// condition is true and can infer that failing to meet the condition prior
/// to integer wraparound results in undefined behavior.
@@ -1136,7 +1146,7 @@ public:
/// If \p AllowPredicates is set, this call will try to use a minimal set of
/// SCEV predicates in order to return an exact answer.
ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond,
- bool ExitIfTrue, bool ControlsExit,
+ bool ExitIfTrue, bool ControlsOnlyExit,
bool AllowPredicates = false);
/// A predicate is said to be monotonically increasing if may go from being
@@ -1195,11 +1205,9 @@ public:
/// Simplify LHS and RHS in a comparison with predicate Pred. Return true
/// iff any changes were made. If the operands are provably equal or
/// unequal, LHS and RHS are set to the same value and Pred is set to either
- /// ICMP_EQ or ICMP_NE. ControllingFiniteLoop is set if this comparison
- /// controls the exit of a loop known to have a finite number of iterations.
+ /// ICMP_EQ or ICMP_NE.
bool SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS,
- const SCEV *&RHS, unsigned Depth = 0,
- bool ControllingFiniteLoop = false);
+ const SCEV *&RHS, unsigned Depth = 0);
/// Return the "disposition" of the given SCEV with respect to the given
/// loop.
@@ -1297,41 +1305,26 @@ public:
bool loopIsFiniteByAssumption(const Loop *L);
class FoldID {
- SmallVector<unsigned, 4> Bits;
+ const SCEV *Op = nullptr;
+ const Type *Ty = nullptr;
+ unsigned short C;
public:
- void addInteger(unsigned long I) { Bits.push_back(I); }
- void addInteger(unsigned I) { Bits.push_back(I); }
- void addInteger(int I) { Bits.push_back(I); }
-
- void addInteger(unsigned long long I) {
- addInteger(unsigned(I));
- addInteger(unsigned(I >> 32));
+ FoldID(SCEVTypes C, const SCEV *Op, const Type *Ty) : Op(Op), Ty(Ty), C(C) {
+ assert(Op);
+ assert(Ty);
}
- void addPointer(const void *Ptr) {
- // Note: this adds pointers to the hash using sizes and endianness that
- // depend on the host. It doesn't matter, however, because hashing on
- // pointer values is inherently unstable. Nothing should depend on the
- // ordering of nodes in the folding set.
- static_assert(sizeof(uintptr_t) <= sizeof(unsigned long long),
- "unexpected pointer size");
- addInteger(reinterpret_cast<uintptr_t>(Ptr));
- }
+ FoldID(unsigned short C) : C(C) {}
unsigned computeHash() const {
- unsigned Hash = Bits.size();
- for (unsigned I = 0; I != Bits.size(); ++I)
- Hash = detail::combineHashValue(Hash, Bits[I]);
- return Hash;
+ return detail::combineHashValue(
+ C, detail::combineHashValue(reinterpret_cast<uintptr_t>(Op),
+ reinterpret_cast<uintptr_t>(Ty)));
}
+
bool operator==(const FoldID &RHS) const {
- if (Bits.size() != RHS.Bits.size())
- return false;
- for (unsigned I = 0; I != Bits.size(); ++I)
- if (Bits[I] != RHS.Bits[I])
- return false;
- return true;
+ return std::tie(Op, Ty, C) == std::tie(RHS.Op, RHS.Ty, RHS.C);
}
};
@@ -1421,14 +1414,14 @@ private:
/// predicate by splitting it into a set of independent predicates.
bool ProvingSplitPredicate = false;
- /// Memoized values for the GetMinTrailingZeros
- DenseMap<const SCEV *, uint32_t> MinTrailingZerosCache;
+ /// Memoized values for the getConstantMultiple
+ DenseMap<const SCEV *, APInt> ConstantMultipleCache;
/// Return the Value set from which the SCEV expr is generated.
ArrayRef<Value *> getSCEVValues(const SCEV *S);
- /// Private helper method for the GetMinTrailingZeros method
- uint32_t GetMinTrailingZerosImpl(const SCEV *S);
+ /// Private helper method for the getConstantMultiple method.
+ APInt getConstantMultipleImpl(const SCEV *S);
/// Information about the number of times a particular loop exit may be
/// reached before exiting the loop.
@@ -1655,7 +1648,7 @@ private:
/// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Step}.
/// Helper for \c getRange.
ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Step,
- const SCEV *MaxBECount, unsigned BitWidth);
+ const APInt &MaxBECount);
/// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p
/// Start,+,\p Step}<nw>.
@@ -1668,7 +1661,7 @@ private:
/// Step} by "factoring out" a ternary expression from the add recurrence.
/// Helper called by \c getRange.
ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Step,
- const SCEV *MaxBECount, unsigned BitWidth);
+ const APInt &MaxBECount);
/// If the unknown expression U corresponds to a simple recurrence, return
/// a constant range which represents the entire recurrence. Note that
@@ -1759,11 +1752,11 @@ private:
// complexity.
class ExitLimitCache {
- // It may look like we need key on the whole (L, ExitIfTrue, ControlsExit,
- // AllowPredicates) tuple, but recursive calls to
+ // It may look like we need key on the whole (L, ExitIfTrue,
+ // ControlsOnlyExit, AllowPredicates) tuple, but recursive calls to
// computeExitLimitFromCondCached from computeExitLimitFromCondImpl only
- // vary the in \c ExitCond and \c ControlsExit parameters. We remember the
- // initial values of the other values to assert our assumption.
+ // vary the in \c ExitCond and \c ControlsOnlyExit parameters. We remember
+ // the initial values of the other values to assert our assumption.
SmallDenseMap<PointerIntPair<Value *, 1>, ExitLimit> TripCountMap;
const Loop *L;
@@ -1775,11 +1768,12 @@ private:
: L(L), ExitIfTrue(ExitIfTrue), AllowPredicates(AllowPredicates) {}
std::optional<ExitLimit> find(const Loop *L, Value *ExitCond,
- bool ExitIfTrue, bool ControlsExit,
+ bool ExitIfTrue, bool ControlsOnlyExit,
bool AllowPredicates);
void insert(const Loop *L, Value *ExitCond, bool ExitIfTrue,
- bool ControlsExit, bool AllowPredicates, const ExitLimit &EL);
+ bool ControlsOnlyExit, bool AllowPredicates,
+ const ExitLimit &EL);
};
using ExitLimitCacheTy = ExitLimitCache;
@@ -1787,16 +1781,15 @@ private:
ExitLimit computeExitLimitFromCondCached(ExitLimitCacheTy &Cache,
const Loop *L, Value *ExitCond,
bool ExitIfTrue,
- bool ControlsExit,
+ bool ControlsOnlyExit,
bool AllowPredicates);
ExitLimit computeExitLimitFromCondImpl(ExitLimitCacheTy &Cache, const Loop *L,
Value *ExitCond, bool ExitIfTrue,
- bool ControlsExit,
+ bool ControlsOnlyExit,
bool AllowPredicates);
- std::optional<ScalarEvolution::ExitLimit>
- computeExitLimitFromCondFromBinOp(ExitLimitCacheTy &Cache, const Loop *L,
- Value *ExitCond, bool ExitIfTrue,
- bool ControlsExit, bool AllowPredicates);
+ std::optional<ScalarEvolution::ExitLimit> computeExitLimitFromCondFromBinOp(
+ ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
+ bool ControlsOnlyExit, bool AllowPredicates);
/// Compute the number of times the backedge of the specified loop will
/// execute if its exit condition were a conditional branch of the ICmpInst
@@ -1861,14 +1854,14 @@ private:
///
/// \p isSigned specifies whether the less-than is signed.
///
- /// \p ControlsExit is true when the LHS < RHS condition directly controls
+ /// \p ControlsOnlyExit is true when the LHS < RHS condition directly controls
/// the branch (loops exits only if condition is true). In this case, we can
/// use NoWrapFlags to skip overflow checks.
///
/// If \p AllowPredicates is set, this call will try to use a minimal set of
/// SCEV predicates in order to return an exact answer.
ExitLimit howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L,
- bool isSigned, bool ControlsExit,
+ bool isSigned, bool ControlsOnlyExit,
bool AllowPredicates = false);
ExitLimit howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L,
@@ -2032,8 +2025,11 @@ private:
/// Helper for forgetMemoizedResults.
void forgetMemoizedResultsImpl(const SCEV *S);
- /// Return an existing SCEV for V if there is one, otherwise return nullptr.
- const SCEV *getExistingSCEV(Value *V);
+ /// Iterate over instructions in \p Worklist and their users. Erase entries
+ /// from ValueExprMap and collect SCEV expressions in \p ToForget
+ void visitAndClearUsers(SmallVectorImpl<Instruction *> &Worklist,
+ SmallPtrSetImpl<Instruction *> &Visited,
+ SmallVectorImpl<const SCEV *> &ToForget);
/// Erase Value from ValueExprMap and ExprValueMap.
void eraseValueFromMap(Value *V);
@@ -2188,6 +2184,11 @@ private:
void getReachableBlocks(SmallPtrSetImpl<BasicBlock *> &Reachable,
Function &F);
+ /// Return the given SCEV expression with a new set of operands.
+ /// This preserves the origial nowrap flags.
+ const SCEV *getWithOperands(const SCEV *S,
+ SmallVectorImpl<const SCEV *> &NewOps);
+
FoldingSet<SCEV> UniqueSCEVs;
FoldingSet<SCEVPredicate> UniquePreds;
BumpPtrAllocator SCEVAllocator;
@@ -2364,13 +2365,11 @@ private:
template <> struct DenseMapInfo<ScalarEvolution::FoldID> {
static inline ScalarEvolution::FoldID getEmptyKey() {
- ScalarEvolution::FoldID ID;
- ID.addInteger(~0ULL);
+ ScalarEvolution::FoldID ID(0);
return ID;
}
static inline ScalarEvolution::FoldID getTombstoneKey() {
- ScalarEvolution::FoldID ID;
- ID.addInteger(~0ULL - 1ULL);
+ ScalarEvolution::FoldID ID(1);
return ID;
}
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionDivision.h b/llvm/include/llvm/Analysis/ScalarEvolutionDivision.h
index 7d5902d31795..3283d438ccb5 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionDivision.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionDivision.h
@@ -48,6 +48,8 @@ public:
void visitConstant(const SCEVConstant *Numerator);
+ void visitVScale(const SCEVVScale *Numerator);
+
void visitAddRecExpr(const SCEVAddRecExpr *Numerator);
void visitAddExpr(const SCEVAddExpr *Numerator);
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
index 80443510d449..91848a91c17e 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -39,6 +39,7 @@ enum SCEVTypes : unsigned short {
// These should be ordered in terms of increasing complexity to make the
// folders simpler.
scConstant,
+ scVScale,
scTruncate,
scZeroExtend,
scSignExtend,
@@ -75,6 +76,23 @@ public:
static bool classof(const SCEV *S) { return S->getSCEVType() == scConstant; }
};
+/// This class represents the value of vscale, as used when defining the length
+/// of a scalable vector or returned by the llvm.vscale() intrinsic.
+class SCEVVScale : public SCEV {
+ friend class ScalarEvolution;
+
+ SCEVVScale(const FoldingSetNodeIDRef ID, Type *ty)
+ : SCEV(ID, scVScale, 0), Ty(ty) {}
+
+ Type *Ty;
+
+public:
+ Type *getType() const { return Ty; }
+
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const SCEV *S) { return S->getSCEVType() == scVScale; }
+};
+
inline unsigned short computeExpressionSize(ArrayRef<const SCEV *> Args) {
APInt Size(16, 1);
for (const auto *Arg : Args)
@@ -579,18 +597,6 @@ class SCEVUnknown final : public SCEV, private CallbackVH {
public:
Value *getValue() const { return getValPtr(); }
- /// @{
- /// Test whether this is a special constant representing a type
- /// size, alignment, or field offset in a target-independent
- /// manner, and hasn't happened to have been folded with other
- /// operations into something unrecognizable. This is mainly only
- /// useful for pretty-printing and other situations where it isn't
- /// absolutely required for these to succeed.
- bool isSizeOf(Type *&AllocTy) const;
- bool isAlignOf(Type *&AllocTy) const;
- bool isOffsetOf(Type *&STy, Constant *&FieldNo) const;
- /// @}
-
Type *getType() const { return getValPtr()->getType(); }
/// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -604,6 +610,8 @@ template <typename SC, typename RetVal = void> struct SCEVVisitor {
switch (S->getSCEVType()) {
case scConstant:
return ((SC *)this)->visitConstant((const SCEVConstant *)S);
+ case scVScale:
+ return ((SC *)this)->visitVScale((const SCEVVScale *)S);
case scPtrToInt:
return ((SC *)this)->visitPtrToIntExpr((const SCEVPtrToIntExpr *)S);
case scTruncate:
@@ -671,6 +679,7 @@ public:
switch (S->getSCEVType()) {
case scConstant:
+ case scVScale:
case scUnknown:
continue;
case scPtrToInt:
@@ -743,7 +752,7 @@ protected:
// a SCEV is referenced by multiple SCEVs. Without memoization, this
// visit algorithm would have exponential time complexity in the worst
// case, causing the compiler to hang on certain tests.
- DenseMap<const SCEV *, const SCEV *> RewriteResults;
+ SmallDenseMap<const SCEV *, const SCEV *> RewriteResults;
public:
SCEVRewriteVisitor(ScalarEvolution &SE) : SE(SE) {}
@@ -760,6 +769,8 @@ public:
const SCEV *visitConstant(const SCEVConstant *Constant) { return Constant; }
+ const SCEV *visitVScale(const SCEVVScale *VScale) { return VScale; }
+
const SCEV *visitPtrToIntExpr(const SCEVPtrToIntExpr *Expr) {
const SCEV *Operand = ((SC *)this)->visit(Expr->getOperand());
return Operand == Expr->getOperand()
diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h b/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h
index da420ff1e6d2..b34db8f5a03a 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h
@@ -50,9 +50,11 @@ typedef SmallPtrSet<const Loop *, 2> PostIncLoopSet;
typedef function_ref<bool(const SCEVAddRecExpr *)> NormalizePredTy;
/// Normalize \p S to be post-increment for all loops present in \p
-/// Loops.
+/// Loops. Returns nullptr if the result is not invertible and \p
+/// CheckInvertible is true.
const SCEV *normalizeForPostIncUse(const SCEV *S, const PostIncLoopSet &Loops,
- ScalarEvolution &SE);
+ ScalarEvolution &SE,
+ bool CheckInvertible = true);
/// Normalize \p S for all add recurrence sub-expressions for which \p
/// Pred returns true.
diff --git a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h b/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
deleted file mode 100644
index e6e3efbe0fcb..000000000000
--- a/llvm/include/llvm/Analysis/SyncDependenceAnalysis.h
+++ /dev/null
@@ -1,92 +0,0 @@
-//===- SyncDependenceAnalysis.h - Divergent Branch Dependence -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// \file
-// This file defines the SyncDependenceAnalysis class, which computes for
-// every divergent branch the set of phi nodes that the branch will make
-// divergent.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
-#define LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include <map>
-#include <memory>
-#include <unordered_map>
-#include <vector>
-
-namespace llvm {
-
-class BasicBlock;
-class DominatorTree;
-class Instruction;
-class LoopInfo;
-class PostDominatorTree;
-
-using ConstBlockSet = SmallPtrSet<const BasicBlock *, 4>;
-struct ControlDivergenceDesc {
- // Join points of divergent disjoint paths.
- ConstBlockSet JoinDivBlocks;
- // Divergent loop exits
- ConstBlockSet LoopDivBlocks;
-};
-
-struct ModifiedPO {
- std::vector<const BasicBlock *> LoopPO;
- std::unordered_map<const BasicBlock *, unsigned> POIndex;
- void appendBlock(const BasicBlock &BB) {
- POIndex[&BB] = LoopPO.size();
- LoopPO.push_back(&BB);
- }
- unsigned getIndexOf(const BasicBlock &BB) const {
- return POIndex.find(&BB)->second;
- }
- unsigned size() const { return LoopPO.size(); }
- const BasicBlock *getBlockAt(unsigned Idx) const { return LoopPO[Idx]; }
-};
-
-/// \brief Relates points of divergent control to join points in
-/// reducible CFGs.
-///
-/// This analysis relates points of divergent control to points of converging
-/// divergent control. The analysis requires all loops to be reducible.
-class SyncDependenceAnalysis {
-public:
- ~SyncDependenceAnalysis();
- SyncDependenceAnalysis(const DominatorTree &DT, const PostDominatorTree &PDT,
- const LoopInfo &LI);
-
- /// \brief Computes divergent join points and loop exits caused by branch
- /// divergence in \p Term.
- ///
- /// The set of blocks which are reachable by disjoint paths from \p Term.
- /// The set also contains loop exits if there two disjoint paths:
- /// one from \p Term to the loop exit and another from \p Term to the loop
- /// header. Those exit blocks are added to the returned set.
- /// If L is the parent loop of \p Term and an exit of L is in the returned
- /// set then L is a divergent loop.
- const ControlDivergenceDesc &getJoinBlocks(const Instruction &Term);
-
-private:
- static ControlDivergenceDesc EmptyDivergenceDesc;
-
- ModifiedPO LoopPO;
-
- const DominatorTree &DT;
- const PostDominatorTree &PDT;
- const LoopInfo &LI;
-
- std::map<const Instruction *, std::unique_ptr<ControlDivergenceDesc>>
- CachedControlDivDescs;
-};
-
-} // namespace llvm
-
-#endif // LLVM_ANALYSIS_SYNCDEPENDENCEANALYSIS_H
diff --git a/llvm/include/llvm/Analysis/TargetFolder.h b/llvm/include/llvm/Analysis/TargetFolder.h
index db7eda54b5c4..3d9edf132dc1 100644
--- a/llvm/include/llvm/Analysis/TargetFolder.h
+++ b/llvm/include/llvm/Analysis/TargetFolder.h
@@ -116,6 +116,9 @@ public:
Value *FoldGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
bool IsInBounds = false) const override {
+ if (!ConstantExpr::isSupportedGetElementPtr(Ty))
+ return nullptr;
+
if (auto *PC = dyn_cast<Constant>(Ptr)) {
// Every index must be constant.
if (any_of(IdxList, [](Value *V) { return !isa<Constant>(V); }))
@@ -133,7 +136,7 @@ public:
auto *TC = dyn_cast<Constant>(True);
auto *FC = dyn_cast<Constant>(False);
if (CC && TC && FC)
- return Fold(ConstantExpr::getSelect(CC, TC, FC));
+ return ConstantFoldSelectInstruction(CC, TC, FC);
return nullptr;
}
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 5f6af3514fc2..03ac422d3e6b 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -256,21 +256,46 @@ TLI_DEFINE_ENUM_INTERNAL(Znam)
TLI_DEFINE_STRING_INTERNAL("_Znam")
TLI_DEFINE_SIG_INTERNAL(Ptr, Long)
+/// void *operator new[](unsigned long, __hot_cold_t)
+/// Currently this and other operator new interfaces that take a __hot_cold_t
+/// hint are supported by the open source version of tcmalloc, see:
+/// https://github.com/google/tcmalloc/blob/master/tcmalloc/new_extension.h
+/// and for the definition of the __hot_cold_t parameter see:
+/// https://github.com/google/tcmalloc/blob/master/tcmalloc/malloc_extension.h
+TLI_DEFINE_ENUM_INTERNAL(Znam12__hot_cold_t)
+TLI_DEFINE_STRING_INTERNAL("_Znam12__hot_cold_t")
+TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Bool)
+
/// void *operator new[](unsigned long, const std::nothrow_t&);
TLI_DEFINE_ENUM_INTERNAL(ZnamRKSt9nothrow_t)
TLI_DEFINE_STRING_INTERNAL("_ZnamRKSt9nothrow_t")
TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Ptr)
+/// void *operator new[](unsigned long, const std::nothrow_t&, __hot_cold_t)
+TLI_DEFINE_ENUM_INTERNAL(ZnamRKSt9nothrow_t12__hot_cold_t)
+TLI_DEFINE_STRING_INTERNAL("_ZnamRKSt9nothrow_t12__hot_cold_t")
+TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Ptr, Bool)
+
/// void *operator new[](unsigned long, std::align_val_t)
TLI_DEFINE_ENUM_INTERNAL(ZnamSt11align_val_t)
TLI_DEFINE_STRING_INTERNAL("_ZnamSt11align_val_t")
TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Long)
+/// void *operator new[](unsigned long, std::align_val_t, __hot_cold_t)
+TLI_DEFINE_ENUM_INTERNAL(ZnamSt11align_val_t12__hot_cold_t)
+TLI_DEFINE_STRING_INTERNAL("_ZnamSt11align_val_t12__hot_cold_t")
+TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Long, Bool)
+
/// void *operator new[](unsigned long, std::align_val_t, const std::nothrow_t&)
TLI_DEFINE_ENUM_INTERNAL(ZnamSt11align_val_tRKSt9nothrow_t)
TLI_DEFINE_STRING_INTERNAL("_ZnamSt11align_val_tRKSt9nothrow_t")
TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Long, Ptr)
+/// void *operator new[](unsigned long, std::align_val_t, const std::nothrow_t&, __hot_cold_t)
+TLI_DEFINE_ENUM_INTERNAL(ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t)
+TLI_DEFINE_STRING_INTERNAL("_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t")
+TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Long, Ptr, Bool)
+
/// void *operator new(unsigned int);
TLI_DEFINE_ENUM_INTERNAL(Znwj)
TLI_DEFINE_STRING_INTERNAL("_Znwj")
@@ -296,21 +321,41 @@ TLI_DEFINE_ENUM_INTERNAL(Znwm)
TLI_DEFINE_STRING_INTERNAL("_Znwm")
TLI_DEFINE_SIG_INTERNAL(Ptr, Long)
+/// void *operator new(unsigned long, __hot_cold_t)
+TLI_DEFINE_ENUM_INTERNAL(Znwm12__hot_cold_t)
+TLI_DEFINE_STRING_INTERNAL("_Znwm12__hot_cold_t")
+TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Bool)
+
/// void *operator new(unsigned long, const std::nothrow_t&);
TLI_DEFINE_ENUM_INTERNAL(ZnwmRKSt9nothrow_t)
TLI_DEFINE_STRING_INTERNAL("_ZnwmRKSt9nothrow_t")
TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Ptr)
+/// void *operator new(unsigned long, const std::nothrow_t&, __hot_cold_t)
+TLI_DEFINE_ENUM_INTERNAL(ZnwmRKSt9nothrow_t12__hot_cold_t)
+TLI_DEFINE_STRING_INTERNAL("_ZnwmRKSt9nothrow_t12__hot_cold_t")
+TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Ptr, Bool)
+
/// void *operator new(unsigned long, std::align_val_t)
TLI_DEFINE_ENUM_INTERNAL(ZnwmSt11align_val_t)
TLI_DEFINE_STRING_INTERNAL("_ZnwmSt11align_val_t")
TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Long)
+/// void *operator new(unsigned long, std::align_val_t, __hot_cold_t)
+TLI_DEFINE_ENUM_INTERNAL(ZnwmSt11align_val_t12__hot_cold_t)
+TLI_DEFINE_STRING_INTERNAL("_ZnwmSt11align_val_t12__hot_cold_t")
+TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Long, Bool)
+
/// void *operator new(unsigned long, std::align_val_t, const std::nothrow_t&)
TLI_DEFINE_ENUM_INTERNAL(ZnwmSt11align_val_tRKSt9nothrow_t)
TLI_DEFINE_STRING_INTERNAL("_ZnwmSt11align_val_tRKSt9nothrow_t")
TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Long, Ptr)
+/// void *operator new(unsigned long, std::align_val_t, const std::nothrow_t&, __hot_cold_t)
+TLI_DEFINE_ENUM_INTERNAL(ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t)
+TLI_DEFINE_STRING_INTERNAL("_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t")
+TLI_DEFINE_SIG_INTERNAL(Ptr, Long, Long, Ptr, Bool)
+
/// double __acos_finite(double x);
TLI_DEFINE_ENUM_INTERNAL(acos_finite)
TLI_DEFINE_STRING_INTERNAL("__acos_finite")
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 8fcfbdbd6665..5d62e837c1f3 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -11,10 +11,10 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
+#include "llvm/TargetParser/Triple.h"
#include <optional>
namespace llvm {
@@ -31,6 +31,7 @@ struct VecDesc {
StringRef ScalarFnName;
StringRef VectorFnName;
ElementCount VectorizationFactor;
+ bool Masked;
};
enum LibFunc : unsigned {
@@ -95,7 +96,8 @@ public:
LIBMVEC_X86, // GLIBC Vector Math library.
MASSV, // IBM MASS vector library.
SVML, // Intel short vector math library.
- SLEEFGNUABI // SLEEF - SIMD Library for Evaluating Elementary Functions.
+ SLEEFGNUABI, // SLEEF - SIMD Library for Evaluating Elementary Functions.
+ ArmPL // Arm Performance Libraries.
};
TargetLibraryInfoImpl();
@@ -138,7 +140,7 @@ public:
if (StandardNames[F] != Name) {
setState(F, CustomName);
CustomNames[F] = std::string(Name);
- assert(CustomNames.find(F) != CustomNames.end());
+ assert(CustomNames.contains(F));
} else {
setState(F, StandardName);
}
@@ -161,7 +163,8 @@ public:
/// Return true if the function F has a vector equivalent with vectorization
/// factor VF.
bool isFunctionVectorizable(StringRef F, const ElementCount &VF) const {
- return !getVectorizedFunction(F, VF).empty();
+ return !(getVectorizedFunction(F, VF, false).empty() &&
+ getVectorizedFunction(F, VF, true).empty());
}
/// Return true if the function F has a vector equivalent with any
@@ -170,7 +173,8 @@ public:
/// Return the name of the equivalent of F, vectorized with factor VF. If no
/// such mapping exists, return the empty string.
- StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const;
+ StringRef getVectorizedFunction(StringRef F, const ElementCount &VF,
+ bool Masked) const;
/// Set to true iff i32 parameters to library functions should have signext
/// or zeroext attributes if they correspond to C-level int or unsigned int,
@@ -346,8 +350,9 @@ public:
bool isFunctionVectorizable(StringRef F) const {
return Impl->isFunctionVectorizable(F);
}
- StringRef getVectorizedFunction(StringRef F, const ElementCount &VF) const {
- return Impl->getVectorizedFunction(F, VF);
+ StringRef getVectorizedFunction(StringRef F, const ElementCount &VF,
+ bool Masked = false) const {
+ return Impl->getVectorizedFunction(F, VF, Masked);
}
/// Tests if the function is both available and a candidate for optimized code
@@ -374,6 +379,7 @@ public:
case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl:
case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l:
case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l:
+ case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl:
case LibFunc_memcpy: case LibFunc_memset: case LibFunc_memmove:
case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp:
case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen:
@@ -408,14 +414,14 @@ public:
ShouldExtI32Param = true;
ShouldExtI32Return = true;
}
- // Mips and riscv64, on the other hand, needs signext on i32 parameters
- // corresponding to both signed and unsigned ints.
- if (T.isMIPS() || T.isRISCV64()) {
+ // LoongArch, Mips, and riscv64, on the other hand, need signext on i32
+ // parameters corresponding to both signed and unsigned ints.
+ if (T.isLoongArch() || T.isMIPS() || T.isRISCV64()) {
ShouldSignExtI32Param = true;
}
- // riscv64 needs signext on i32 returns corresponding to both signed and
- // unsigned ints.
- if (T.isRISCV64()) {
+ // LoongArch and riscv64 need signext on i32 returns corresponding to both
+ // signed and unsigned ints.
+ if (T.isLoongArch() || T.isRISCV64()) {
ShouldSignExtI32Return = true;
}
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 0c81f0bfd3a0..1ae595d21104 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -39,6 +39,7 @@ namespace Intrinsic {
typedef unsigned ID;
}
+class AllocaInst;
class AssumptionCache;
class BlockFrequencyInfo;
class DominatorTree;
@@ -95,7 +96,7 @@ struct MemIntrinsicInfo {
/// Attributes of a target dependent hardware loop.
struct HardwareLoopInfo {
HardwareLoopInfo() = delete;
- HardwareLoopInfo(Loop *L) : L(L) {}
+ HardwareLoopInfo(Loop *L);
Loop *L = nullptr;
BasicBlock *ExitBlock = nullptr;
BranchInst *ExitBranch = nullptr;
@@ -162,7 +163,44 @@ public:
bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
};
-enum class PredicationStyle { None, Data, DataAndControlFlow };
+enum class TailFoldingStyle {
+ /// Don't use tail folding
+ None,
+ /// Use predicate only to mask operations on data in the loop.
+ /// When the VL is not known to be a power-of-2, this method requires a
+ /// runtime overflow check for the i + VL in the loop because it compares the
+ /// scalar induction variable against the tripcount rounded up by VL which may
+ /// overflow. When the VL is a power-of-2, both the increment and uprounded
+ /// tripcount will overflow to 0, which does not require a runtime check
+ /// since the loop is exited when the loop induction variable equals the
+ /// uprounded trip-count, which are both 0.
+ Data,
+ /// Same as Data, but avoids using the get.active.lane.mask intrinsic to
+ /// calculate the mask and instead implements this with a
+ /// splat/stepvector/cmp.
+ /// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
+ /// active.lane.mask intrinsic when it is not natively supported?
+ DataWithoutLaneMask,
+ /// Use predicate to control both data and control flow.
+ /// This method always requires a runtime overflow check for the i + VL
+ /// increment inside the loop, because it uses the result direclty in the
+ /// active.lane.mask to calculate the mask for the next iteration. If the
+ /// increment overflows, the mask is no longer correct.
+ DataAndControlFlow,
+ /// Use predicate to control both data and control flow, but modify
+ /// the trip count so that a runtime overflow check can be avoided
+ /// and such that the scalar epilogue loop can always be removed.
+ DataAndControlFlowWithoutRuntimeCheck
+};
+
+struct TailFoldingInfo {
+ TargetLibraryInfo *TLI;
+ LoopVectorizationLegality *LVL;
+ InterleavedAccessInfo *IAI;
+ TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL,
+ InterleavedAccessInfo *IAI)
+ : TLI(TLI), LVL(LVL), IAI(IAI) {}
+};
class TargetTransformInfo;
typedef TargetTransformInfo TTI;
@@ -246,11 +284,62 @@ public:
};
/// Estimate the cost of a GEP operation when lowered.
+ ///
+ /// \p PointeeType is the source element type of the GEP.
+ /// \p Ptr is the base pointer operand.
+ /// \p Operands is the list of indices following the base pointer.
+ ///
+ /// \p AccessType is a hint as to what type of memory might be accessed by
+ /// users of the GEP. getGEPCost will use it to determine if the GEP can be
+ /// folded into the addressing mode of a load/store. If AccessType is null,
+ /// then the resulting target type based off of PointeeType will be used as an
+ /// approximation.
InstructionCost
getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
+ ArrayRef<const Value *> Operands, Type *AccessType = nullptr,
TargetCostKind CostKind = TCK_SizeAndLatency) const;
+ /// Describe known properties for a set of pointers.
+ struct PointersChainInfo {
+ /// All the GEPs in a set have same base address.
+ unsigned IsSameBaseAddress : 1;
+ /// These properties only valid if SameBaseAddress is set.
+ /// True if all pointers are separated by a unit stride.
+ unsigned IsUnitStride : 1;
+ /// True if distance between any two neigbouring pointers is a known value.
+ unsigned IsKnownStride : 1;
+ unsigned Reserved : 29;
+
+ bool isSameBase() const { return IsSameBaseAddress; }
+ bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
+ bool isKnownStride() const { return IsSameBaseAddress && IsKnownStride; }
+
+ static PointersChainInfo getUnitStride() {
+ return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/1,
+ /*IsKnownStride=*/1, 0};
+ }
+ static PointersChainInfo getKnownStride() {
+ return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
+ /*IsKnownStride=*/1, 0};
+ }
+ static PointersChainInfo getUnknownStride() {
+ return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
+ /*IsKnownStride=*/0, 0};
+ }
+ };
+ static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
+
+ /// Estimate the cost of a chain of pointers (typically pointer operands of a
+ /// chain of loads or stores within same block) operations set when lowered.
+ /// \p AccessTy is the type of the loads/stores that will ultimately use the
+ /// \p Ptrs.
+ InstructionCost
+ getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
+ const PointersChainInfo &Info, Type *AccessTy,
+ TargetCostKind CostKind = TTI::TCK_RecipThroughput
+
+ ) const;
+
/// \returns A value by which our inlining threshold should be multiplied.
/// This is primarily used to bump up the inlining threshold wholesale on
/// targets where calls are unusually expensive.
@@ -262,6 +351,10 @@ public:
/// \returns A value to be added to the inlining threshold.
unsigned adjustInliningThreshold(const CallBase *CB) const;
+ /// \returns The cost of having an Alloca in the caller if not inlined, to be
+ /// added to the threshold
+ unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
+
/// \returns Vector bonus in percent.
///
/// Vector bonuses: We want to more aggressively inline vector-dense kernels
@@ -278,6 +371,10 @@ public:
/// source/destination type and alignment and the number of bytes copied.
InstructionCost getMemcpyCost(const Instruction *I) const;
+ /// Returns the maximum memset / memcpy size in bytes that still makes it
+ /// profitable to inline the call.
+ uint64_t getMaxMemIntrinsicInlineSizeThreshold() const;
+
/// \return The estimated number of case clusters when lowering \p 'SI'.
/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
/// table.
@@ -320,18 +417,16 @@ public:
/// Branch divergence has a significantly negative impact on GPU performance
/// when threads in the same wavefront take different paths due to conditional
/// branches.
- bool hasBranchDivergence() const;
-
- /// Return true if the target prefers to use GPU divergence analysis to
- /// replace the legacy version.
- bool useGPUDivergenceAnalysis() const;
+ ///
+ /// If \p F is passed, provides a context function. If \p F is known to only
+ /// execute in a single threaded environment, the target may choose to skip
+ /// uniformity analysis and assume all values are uniform.
+ bool hasBranchDivergence(const Function *F = nullptr) const;
/// Returns whether V is a source of divergence.
///
/// This function provides the target-dependent information for
- /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
- /// first builds the dependency graph, and then runs the reachability
- /// algorithm starting with the sources of divergence.
+ /// the target-independent UniformityAnalysis.
bool isSourceOfDivergence(const Value *V) const;
// Returns true for the target specific
@@ -339,6 +434,13 @@ public:
// even taking non-uniform arguments
bool isAlwaysUniform(const Value *V) const;
+ /// Query the target whether the specified address space cast from FromAS to
+ /// ToAS is valid.
+ bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
+
+ /// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
+ bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
+
/// Returns the address space ID for a target's 'flat' address space. Note
/// this is not necessarily the same as addrspace(0), which LLVM sometimes
/// refers to as the generic address space. The flat address space is a
@@ -493,6 +595,8 @@ public:
/// Don't allow loop unrolling to simulate more than this number of
/// iterations when checking full unroll profitability
unsigned MaxIterationsCountToAnalyze;
+ /// Don't disable runtime unroll for the loops which were vectorized.
+ bool UnrollVectorizedLoop = false;
};
/// Get target-customized preferences for the generic loop unrolling
@@ -510,19 +614,16 @@ public:
/// Query the target whether it would be prefered to create a predicated
/// vector loop, which can avoid the need to emit a scalar epilogue loop.
- bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
- AssumptionCache &AC, TargetLibraryInfo *TLI,
- DominatorTree *DT,
- LoopVectorizationLegality *LVL,
- InterleavedAccessInfo *IAI) const;
-
- /// Query the target whether lowering of the llvm.get.active.lane.mask
- /// intrinsic is supported and how the mask should be used. A return value
- /// of PredicationStyle::Data indicates the mask is used as data only,
- /// whereas PredicationStyle::DataAndControlFlow indicates we should also use
- /// the mask for control flow in the loop. If unsupported the return value is
- /// PredicationStyle::None.
- PredicationStyle emitGetActiveLaneMask() const;
+ bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const;
+
+ /// Query the target what the preferred style of tail folding is.
+ /// \param IVUpdateMayOverflow Tells whether it is known if the IV update
+ /// may (or will never) overflow for the suggested VF/UF in the given loop.
+ /// Targets can use this information to select a more optimal tail folding
+ /// style. The value conservatively defaults to true, such that no assumptions
+ /// are made on overflow.
+ TailFoldingStyle
+ getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
// Parameters that control the loop peeling transformation
struct PeelingPreferences {
@@ -981,6 +1082,9 @@ public:
/// \return the value of vscale to tune the cost model for.
std::optional<unsigned> getVScaleForTuning() const;
+ /// \return true if vscale is known to be a power of 2
+ bool isVScaleKnownToBeAPowerOfTwo() const;
+
/// \return True if the vectorization factor should be chosen to
/// make the vector of the smallest element type match the size of a
/// vector register. For wider element types, this could result in
@@ -1077,7 +1181,7 @@ public:
/// \return The maximum interleave factor that any transform should try to
/// perform for this target. This number depends on the level of parallelism
/// and the number of execution units in the CPU.
- unsigned getMaxInterleaveFactor(unsigned VF) const;
+ unsigned getMaxInterleaveFactor(ElementCount VF) const;
/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
static OperandValueInfo getOperandInfo(const Value *V);
@@ -1305,7 +1409,7 @@ public:
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
InstructionCost getMinMaxReductionCost(
- VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
+ Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF = FastMathFlags(),
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
/// Calculate the cost of an extended reduction pattern, similar to
@@ -1323,7 +1427,7 @@ public:
/// ResTy vecreduce.opcode(ext(Ty A)).
InstructionCost getExtendedReductionCost(
unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
- std::optional<FastMathFlags> FMF,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
@@ -1551,6 +1655,20 @@ public:
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
/// @}
+ /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
+ /// state.
+ ///
+ /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
+ /// node containing a jump table in a format suitable for the target, so it
+ /// needs to know what format of jump table it can legally use.
+ ///
+ /// For non-Arm targets, this function isn't used. It defaults to returning
+ /// false, but it shouldn't matter what it returns anyway.
+ bool hasArmWideBranch(bool Thumb) const;
+
+ /// \return The maximum number of function arguments the target supports.
+ unsigned getMaxNumArgs() const;
+
/// @}
private:
@@ -1571,11 +1689,19 @@ public:
virtual const DataLayout &getDataLayout() const = 0;
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands,
+ Type *AccessType,
TTI::TargetCostKind CostKind) = 0;
- virtual unsigned getInliningThresholdMultiplier() = 0;
+ virtual InstructionCost
+ getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
+ const TTI::PointersChainInfo &Info, Type *AccessTy,
+ TTI::TargetCostKind CostKind) = 0;
+ virtual unsigned getInliningThresholdMultiplier() const = 0;
virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
- virtual int getInlinerVectorBonusPercent() = 0;
+ virtual int getInlinerVectorBonusPercent() const = 0;
+ virtual unsigned getCallerAllocaCost(const CallBase *CB,
+ const AllocaInst *AI) const = 0;
virtual InstructionCost getMemcpyCost(const Instruction *I) = 0;
+ virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const = 0;
virtual unsigned
getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
ProfileSummaryInfo *PSI,
@@ -1584,10 +1710,11 @@ public:
ArrayRef<const Value *> Operands,
TargetCostKind CostKind) = 0;
virtual BranchProbability getPredictableBranchThreshold() = 0;
- virtual bool hasBranchDivergence() = 0;
- virtual bool useGPUDivergenceAnalysis() = 0;
+ virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isAlwaysUniform(const Value *V) = 0;
+ virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
+ virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = 0;
virtual unsigned getFlatAddressSpace() = 0;
virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const = 0;
@@ -1611,12 +1738,9 @@ public:
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) = 0;
- virtual bool
- preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
- AssumptionCache &AC, TargetLibraryInfo *TLI,
- DominatorTree *DT, LoopVectorizationLegality *LVL,
- InterleavedAccessInfo *IAI) = 0;
- virtual PredicationStyle emitGetActiveLaneMask() = 0;
+ virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) = 0;
+ virtual TailFoldingStyle
+ getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
virtual std::optional<Instruction *> instCombineIntrinsic(
InstCombiner &IC, IntrinsicInst &II) = 0;
virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
@@ -1725,6 +1849,7 @@ public:
virtual unsigned getMinVectorRegisterBitWidth() const = 0;
virtual std::optional<unsigned> getMaxVScale() const = 0;
virtual std::optional<unsigned> getVScaleForTuning() const = 0;
+ virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
virtual bool
shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = 0;
virtual ElementCount getMinimumVF(unsigned ElemWidth,
@@ -1766,7 +1891,7 @@ public:
/// \return if target want to issue a prefetch in address space \p AS.
virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
- virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
+ virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
virtual InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
@@ -1833,11 +1958,11 @@ public:
std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) = 0;
virtual InstructionCost
- getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
+ getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
TTI::TargetCostKind CostKind) = 0;
virtual InstructionCost getExtendedReductionCost(
unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
- std::optional<FastMathFlags> FMF,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0;
virtual InstructionCost getMulAccReductionCost(
bool IsUnsigned, Type *ResTy, VectorType *Ty,
@@ -1908,6 +2033,8 @@ public:
Align Alignment) const = 0;
virtual VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
+ virtual bool hasArmWideBranch(bool Thumb) const = 0;
+ virtual unsigned getMaxNumArgs() const = 0;
};
template <typename T>
@@ -1924,22 +2051,38 @@ public:
InstructionCost
getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
+ ArrayRef<const Value *> Operands, Type *AccessType,
TargetTransformInfo::TargetCostKind CostKind) override {
- return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
+ return Impl.getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
}
- unsigned getInliningThresholdMultiplier() override {
+ InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
+ const Value *Base,
+ const PointersChainInfo &Info,
+ Type *AccessTy,
+ TargetCostKind CostKind) override {
+ return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
+ }
+ unsigned getInliningThresholdMultiplier() const override {
return Impl.getInliningThresholdMultiplier();
}
unsigned adjustInliningThreshold(const CallBase *CB) override {
return Impl.adjustInliningThreshold(CB);
}
- int getInlinerVectorBonusPercent() override {
+ int getInlinerVectorBonusPercent() const override {
return Impl.getInlinerVectorBonusPercent();
}
+ unsigned getCallerAllocaCost(const CallBase *CB,
+ const AllocaInst *AI) const override {
+ return Impl.getCallerAllocaCost(CB, AI);
+ }
InstructionCost getMemcpyCost(const Instruction *I) override {
return Impl.getMemcpyCost(I);
}
+
+ uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
+ return Impl.getMaxMemIntrinsicInlineSizeThreshold();
+ }
+
InstructionCost getInstructionCost(const User *U,
ArrayRef<const Value *> Operands,
TargetCostKind CostKind) override {
@@ -1948,9 +2091,8 @@ public:
BranchProbability getPredictableBranchThreshold() override {
return Impl.getPredictableBranchThreshold();
}
- bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
- bool useGPUDivergenceAnalysis() override {
- return Impl.useGPUDivergenceAnalysis();
+ bool hasBranchDivergence(const Function *F = nullptr) override {
+ return Impl.hasBranchDivergence(F);
}
bool isSourceOfDivergence(const Value *V) override {
return Impl.isSourceOfDivergence(V);
@@ -1960,6 +2102,14 @@ public:
return Impl.isAlwaysUniform(V);
}
+ bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
+ return Impl.isValidAddrSpaceCast(FromAS, ToAS);
+ }
+
+ bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
+ return Impl.addrspacesMayAlias(AS0, AS1);
+ }
+
unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
@@ -2009,15 +2159,12 @@ public:
HardwareLoopInfo &HWLoopInfo) override {
return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
}
- bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
- AssumptionCache &AC, TargetLibraryInfo *TLI,
- DominatorTree *DT,
- LoopVectorizationLegality *LVL,
- InterleavedAccessInfo *IAI) override {
- return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
+ bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
+ return Impl.preferPredicateOverEpilogue(TFI);
}
- PredicationStyle emitGetActiveLaneMask() override {
- return Impl.emitGetActiveLaneMask();
+ TailFoldingStyle
+ getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
+ return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
}
std::optional<Instruction *>
instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
@@ -2262,6 +2409,9 @@ public:
std::optional<unsigned> getVScaleForTuning() const override {
return Impl.getVScaleForTuning();
}
+ bool isVScaleKnownToBeAPowerOfTwo() const override {
+ return Impl.isVScaleKnownToBeAPowerOfTwo();
+ }
bool shouldMaximizeVectorBandwidth(
TargetTransformInfo::RegisterKind K) const override {
return Impl.shouldMaximizeVectorBandwidth(K);
@@ -2325,7 +2475,7 @@ public:
return Impl.shouldPrefetchAddressSpace(AS);
}
- unsigned getMaxInterleaveFactor(unsigned VF) override {
+ unsigned getMaxInterleaveFactor(ElementCount VF) override {
return Impl.getMaxInterleaveFactor(VF);
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
@@ -2433,20 +2583,20 @@ public:
return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
}
InstructionCost
- getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
+ getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
TTI::TargetCostKind CostKind) override {
- return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
+ return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind);
}
- InstructionCost getExtendedReductionCost(
- unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
- std::optional<FastMathFlags> FMF,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override {
+ InstructionCost
+ getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
+ VectorType *Ty, FastMathFlags FMF,
+ TTI::TargetCostKind CostKind) override {
return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
CostKind);
}
- InstructionCost getMulAccReductionCost(
- bool IsUnsigned, Type *ResTy, VectorType *Ty,
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override {
+ InstructionCost
+ getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
+ TTI::TargetCostKind CostKind) override {
return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
}
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
@@ -2587,6 +2737,14 @@ public:
getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
return Impl.getVPLegalizationStrategy(PI);
}
+
+ bool hasArmWideBranch(bool Thumb) const override {
+ return Impl.hasArmWideBranch(Thumb);
+ }
+
+ unsigned getMaxNumArgs() const override {
+ return Impl.getMaxNumArgs();
+ }
};
template <typename T>
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 21d10482cf36..4ab339956182 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -47,7 +47,7 @@ public:
const DataLayout &getDataLayout() const { return DL; }
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
+ ArrayRef<const Value *> Operands, Type *AccessType,
TTI::TargetCostKind CostKind) const {
// In the basic model, we just assume that all-constant GEPs will be folded
// into their uses via addressing modes.
@@ -70,6 +70,9 @@ public:
unsigned getInliningThresholdMultiplier() const { return 1; }
unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
+ unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
+ return 0;
+ };
int getInlinerVectorBonusPercent() const { return 150; }
@@ -77,6 +80,10 @@ public:
return TTI::TCC_Expensive;
}
+ uint64_t getMaxMemIntrinsicInlineSizeThreshold() const {
+ return 64;
+ }
+
// Although this default value is arbitrary, it is not random. It is assumed
// that a condition that evaluates the same way by a higher percentage than
// this is best represented as control flow. Therefore, the default value N
@@ -87,14 +94,20 @@ public:
return BranchProbability(99, 100);
}
- bool hasBranchDivergence() const { return false; }
-
- bool useGPUDivergenceAnalysis() const { return false; }
+ bool hasBranchDivergence(const Function *F = nullptr) const { return false; }
bool isSourceOfDivergence(const Value *V) const { return false; }
bool isAlwaysUniform(const Value *V) const { return false; }
+ bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
+ return false;
+ }
+
+ bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
+ return true;
+ }
+
unsigned getFlatAddressSpace() const { return -1; }
bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
@@ -163,16 +176,11 @@ public:
return false;
}
- bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
- AssumptionCache &AC, TargetLibraryInfo *TLI,
- DominatorTree *DT,
- LoopVectorizationLegality *LVL,
- InterleavedAccessInfo *IAI) const {
- return false;
- }
+ bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { return false; }
- PredicationStyle emitGetActiveLaneMask() const {
- return PredicationStyle::None;
+ TailFoldingStyle
+ getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
+ return TailFoldingStyle::DataWithoutLaneMask;
}
std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
@@ -438,6 +446,7 @@ public:
std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
+ bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
bool
shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const {
@@ -491,13 +500,21 @@ public:
bool enableWritePrefetching() const { return false; }
bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
- unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
+ unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info,
ArrayRef<const Value *> Args,
const Instruction *CxtI = nullptr) const {
+ // Widenable conditions will eventually lower into constants, so some
+ // operations with them will be trivially optimized away.
+ auto IsWidenableCondition = [](const Value *V) {
+ if (auto *II = dyn_cast<IntrinsicInst>(V))
+ if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
+ return true;
+ return false;
+ };
// FIXME: A number of transformation tests seem to require these values
// which seems a little odd for how arbitary there are.
switch (Opcode) {
@@ -511,6 +528,11 @@ public:
case Instruction::URem:
// FIXME: Unlikely to be true for CodeSize.
return TTI::TCC_Expensive;
+ case Instruction::And:
+ case Instruction::Or:
+ if (any_of(Args, IsWidenableCondition))
+ return TTI::TCC_Free;
+ break;
}
// Assume a 3cy latency for fp arithmetic ops.
@@ -653,6 +675,7 @@ public:
case Intrinsic::sideeffect:
case Intrinsic::pseudoprobe:
case Intrinsic::arithmetic_fence:
+ case Intrinsic::dbg_assign:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::dbg_label:
@@ -679,6 +702,7 @@ public:
case Intrinsic::coro_suspend:
case Intrinsic::coro_subfn_addr:
case Intrinsic::threadlocal_address:
+ case Intrinsic::experimental_widenable_condition:
// These intrinsics don't actually represent code after lowering.
return 0;
}
@@ -705,14 +729,15 @@ public:
return 1;
}
- InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool,
+ InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *,
+ FastMathFlags,
TTI::TargetCostKind) const {
return 1;
}
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
Type *ResTy, VectorType *Ty,
- std::optional<FastMathFlags> FMF,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind) const {
return 1;
}
@@ -862,6 +887,10 @@ public:
/* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
}
+ bool hasArmWideBranch(bool) const { return false; }
+
+ unsigned getMaxNumArgs() const { return UINT_MAX; }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
@@ -887,7 +916,7 @@ protected:
bool signedElement = IntElement->getValue().isNegative();
// Get the element min required size.
unsigned ElementMinRequiredSize =
- IntElement->getValue().getMinSignedBits() - 1;
+ IntElement->getValue().getSignificantBits() - 1;
// In case one element is signed then all the vector is signed.
isSigned |= signedElement;
// Save the max required bit size between all the elements.
@@ -902,7 +931,7 @@ protected:
if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
isSigned = CI->getValue().isNegative();
- return CI->getValue().getMinSignedBits() - 1;
+ return CI->getValue().getSignificantBits() - 1;
}
if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
@@ -958,12 +987,9 @@ public:
using BaseT::getGEPCost;
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
+ ArrayRef<const Value *> Operands, Type *AccessType,
TTI::TargetCostKind CostKind) {
assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
- assert(cast<PointerType>(Ptr->getType()->getScalarType())
- ->isOpaqueOrPointeeTypeMatches(PointeeType) &&
- "explicit pointee type doesn't match operand's pointee type");
auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
bool HasBaseReg = (BaseGV == nullptr);
@@ -1012,14 +1038,69 @@ public:
}
}
+ // If we haven't been provided a hint, use the target type for now.
+ //
+ // TODO: Take a look at potentially removing this: This is *slightly* wrong
+ // as it's possible to have a GEP with a foldable target type but a memory
+ // access that isn't foldable. For example, this load isn't foldable on
+ // RISC-V:
+ //
+ // %p = getelementptr i32, ptr %base, i32 42
+ // %x = load <2 x i32>, ptr %p
+ if (!AccessType)
+ AccessType = TargetType;
+
+ // If the final address of the GEP is a legal addressing mode for the given
+ // access type, then we can fold it into its users.
if (static_cast<T *>(this)->isLegalAddressingMode(
- TargetType, const_cast<GlobalValue *>(BaseGV),
+ AccessType, const_cast<GlobalValue *>(BaseGV),
BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
Ptr->getType()->getPointerAddressSpace()))
return TTI::TCC_Free;
+
+ // TODO: Instead of returning TCC_Basic here, we should use
+ // getArithmeticInstrCost. Or better yet, provide a hook to let the target
+ // model it.
return TTI::TCC_Basic;
}
+ InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
+ const Value *Base,
+ const TTI::PointersChainInfo &Info,
+ Type *AccessTy,
+ TTI::TargetCostKind CostKind) {
+ InstructionCost Cost = TTI::TCC_Free;
+ // In the basic model we take into account GEP instructions only
+ // (although here can come alloca instruction, a value, constants and/or
+ // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
+ // pointer). Typically, if Base is a not a GEP-instruction and all the
+ // pointers are relative to the same base address, all the rest are
+ // either GEP instructions, PHIs, bitcasts or constants. When we have same
+ // base, we just calculate cost of each non-Base GEP as an ADD operation if
+ // any their index is a non-const.
+ // If no known dependecies between the pointers cost is calculated as a sum
+ // of costs of GEP instructions.
+ for (const Value *V : Ptrs) {
+ const auto *GEP = dyn_cast<GetElementPtrInst>(V);
+ if (!GEP)
+ continue;
+ if (Info.isSameBase() && V != Base) {
+ if (GEP->hasAllConstantIndices())
+ continue;
+ Cost += static_cast<T *>(this)->getArithmeticInstrCost(
+ Instruction::Add, GEP->getType(), CostKind,
+ {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None},
+ std::nullopt);
+ } else {
+ SmallVector<const Value *> Indices(GEP->indices());
+ Cost += static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
+ GEP->getPointerOperand(),
+ Indices, AccessTy, CostKind);
+ }
+ }
+ return Cost;
+ }
+
InstructionCost getInstructionCost(const User *U,
ArrayRef<const Value *> Operands,
TTI::TargetCostKind CostKind) {
@@ -1066,9 +1147,15 @@ public:
break;
case Instruction::GetElementPtr: {
const auto *GEP = cast<GEPOperator>(U);
+ Type *AccessType = nullptr;
+ // For now, only provide the AccessType in the simple case where the GEP
+ // only has one user.
+ if (GEP->hasOneUser() && I)
+ AccessType = I->user_back()->getAccessType();
+
return TargetTTI->getGEPCost(GEP->getSourceElementType(),
- GEP->getPointerOperand(),
- Operands.drop_front(), CostKind);
+ Operands.front(), Operands.drop_front(),
+ AccessType, CostKind);
}
case Instruction::Add:
case Instruction::FAdd:
@@ -1089,11 +1176,10 @@ public:
case Instruction::Or:
case Instruction::Xor:
case Instruction::FNeg: {
- const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(U->getOperand(0));
+ const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Operands[0]);
TTI::OperandValueInfo Op2Info;
if (Opcode != Instruction::FNeg)
- Op2Info = TTI::getOperandInfo(U->getOperand(1));
- SmallVector<const Value *, 2> Operands(U->operand_values());
+ Op2Info = TTI::getOperandInfo(Operands[1]);
return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
Op2Info, Operands, I);
}
@@ -1110,14 +1196,14 @@ public:
case Instruction::SExt:
case Instruction::ZExt:
case Instruction::AddrSpaceCast: {
- Type *OpTy = U->getOperand(0)->getType();
+ Type *OpTy = Operands[0]->getType();
return TargetTTI->getCastInstrCost(
Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
}
case Instruction::Store: {
auto *SI = cast<StoreInst>(U);
- Type *ValTy = U->getOperand(0)->getType();
- TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(U->getOperand(0));
+ Type *ValTy = Operands[0]->getType();
+ TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(Operands[0]);
return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
SI->getPointerAddressSpace(), CostKind,
OpInfo, I);
@@ -1160,14 +1246,14 @@ public:
match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
CostKind, Op1Info, Op2Info, Operands, I);
}
- Type *CondTy = U->getOperand(0)->getType();
+ Type *CondTy = Operands[0]->getType();
return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
CmpInst::BAD_ICMP_PREDICATE,
CostKind, I);
}
case Instruction::ICmp:
case Instruction::FCmp: {
- Type *ValTy = U->getOperand(0)->getType();
+ Type *ValTy = Operands[0]->getType();
// TODO: Also handle ICmp/FCmp constant expressions.
return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
I ? cast<CmpInst>(I)->getPredicate()
@@ -1179,7 +1265,7 @@ public:
if (!IE)
return TTI::TCC_Basic; // FIXME
unsigned Idx = -1;
- if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
+ if (auto *CI = dyn_cast<ConstantInt>(Operands[2]))
if (CI->getValue().getActiveBits() <= 32)
Idx = CI->getZExtValue();
return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
@@ -1190,7 +1276,7 @@ public:
return TTI::TCC_Basic; // FIXME
auto *VecTy = cast<VectorType>(U->getType());
- auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
+ auto *VecSrcTy = cast<VectorType>(Operands[0]->getType());
int NumSubElts, SubIndex;
if (Shuffle->changesLength()) {
@@ -1213,9 +1299,9 @@ public:
int ReplicationFactor, VF;
if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
APInt DemandedDstElts =
- APInt::getNullValue(Shuffle->getShuffleMask().size());
+ APInt::getZero(Shuffle->getShuffleMask().size());
for (auto I : enumerate(Shuffle->getShuffleMask())) {
- if (I.value() != UndefMaskElem)
+ if (I.value() != PoisonMaskElem)
DemandedDstElts.setBit(I.index());
}
return TargetTTI->getReplicationShuffleCost(
@@ -1274,10 +1360,10 @@ public:
if (!EEI)
return TTI::TCC_Basic; // FIXME
unsigned Idx = -1;
- if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
+ if (auto *CI = dyn_cast<ConstantInt>(Operands[1]))
if (CI->getValue().getActiveBits() <= 32)
Idx = CI->getZExtValue();
- Type *DstTy = U->getOperand(0)->getType();
+ Type *DstTy = Operands[0]->getType();
return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
}
}
diff --git a/llvm/include/llvm/Analysis/TensorSpec.h b/llvm/include/llvm/Analysis/TensorSpec.h
index 3e0db32a2204..c50507b7a6b1 100644
--- a/llvm/include/llvm/Analysis/TensorSpec.h
+++ b/llvm/include/llvm/Analysis/TensorSpec.h
@@ -103,6 +103,9 @@ private:
size_t ElementSize = 0;
};
+/// For debugging.
+std::string tensorValueToString(const char *Buffer, const TensorSpec &Spec);
+
/// Construct a TensorSpec from a JSON dictionary of the form:
/// { "name": <string>,
/// "port": <int>,
diff --git a/llvm/include/llvm/Analysis/UniformityAnalysis.h b/llvm/include/llvm/Analysis/UniformityAnalysis.h
index 66ce480d4b76..f42c4950ed64 100644
--- a/llvm/include/llvm/Analysis/UniformityAnalysis.h
+++ b/llvm/include/llvm/Analysis/UniformityAnalysis.h
@@ -1,4 +1,4 @@
-//===- ConvergenceUtils.h -----------------------*- C++ -*-----------------===//
+//===- UniformityAnalysis.h ---------------------*- C++ -*-----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,10 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// \brief Convergence info and convergence-aware uniform info for LLVM IR
-///
-/// This differs from traditional divergence analysis by taking convergence
-/// intrinsics into account.
+/// \brief LLVM IR instance of the generic uniformity analysis
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Analysis/Utils/TrainingLogger.h b/llvm/include/llvm/Analysis/Utils/TrainingLogger.h
index b7db58f67436..8f46779a732d 100644
--- a/llvm/include/llvm/Analysis/Utils/TrainingLogger.h
+++ b/llvm/include/llvm/Analysis/Utils/TrainingLogger.h
@@ -96,7 +96,7 @@ class Logger final {
StringMap<size_t> ObservationIDs;
std::string CurrentContext;
- void writeHeader();
+ void writeHeader(std::optional<TensorSpec> AdviceSpec);
void writeTensor(const TensorSpec &Spec, const char *RawData) {
OS->write(RawData, Spec.getTotalTensorBufferSize());
}
@@ -111,16 +111,24 @@ public:
/// corresponding to the model being trained/logged.
Logger(std::unique_ptr<raw_ostream> OS,
const std::vector<TensorSpec> &FeatureSpecs,
- const TensorSpec &RewardSpec, bool IncludeReward);
+ const TensorSpec &RewardSpec, bool IncludeReward,
+ std::optional<TensorSpec> AdviceSpec = std::nullopt);
void switchContext(StringRef Name);
void startObservation();
void endObservation();
+ void flush() { OS->flush(); }
const std::string &currentContext() const { return CurrentContext; }
+ /// Check if there is at least an observation for `currentContext()`.
bool hasObservationInProgress() const {
- return ObservationIDs.find(CurrentContext) != ObservationIDs.end();
+ return hasAnyObservationForContext(CurrentContext);
+ }
+
+ /// Check if there is at least an observation for the context `Ctx`.
+ bool hasAnyObservationForContext(StringRef Ctx) const {
+ return ObservationIDs.contains(Ctx);
}
template <typename T> void logReward(T Value) {
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index d1d31dc795b2..8cab01c2f11d 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -38,7 +38,7 @@ struct KnownBits;
class Loop;
class LoopInfo;
class MDNode;
-class OptimizationRemarkEmitter;
+struct SimplifyQuery;
class StringRef;
class TargetLibraryInfo;
class Value;
@@ -57,7 +57,6 @@ void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL,
unsigned Depth = 0, AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
const DominatorTree *DT = nullptr,
- OptimizationRemarkEmitter *ORE = nullptr,
bool UseInstrInfo = true);
/// Determine which bits of V are known to be either zero or one and return
@@ -73,7 +72,6 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
unsigned Depth = 0, AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
const DominatorTree *DT = nullptr,
- OptimizationRemarkEmitter *ORE = nullptr,
bool UseInstrInfo = true);
/// Returns the known bits rather than passing by reference.
@@ -81,7 +79,6 @@ KnownBits computeKnownBits(const Value *V, const DataLayout &DL,
unsigned Depth = 0, AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
const DominatorTree *DT = nullptr,
- OptimizationRemarkEmitter *ORE = nullptr,
bool UseInstrInfo = true);
/// Returns the known bits rather than passing by reference.
@@ -90,7 +87,6 @@ KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
AssumptionCache *AC = nullptr,
const Instruction *CxtI = nullptr,
const DominatorTree *DT = nullptr,
- OptimizationRemarkEmitter *ORE = nullptr,
bool UseInstrInfo = true);
/// Compute known bits from the range metadata.
@@ -98,6 +94,17 @@ KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
/// \p KnownOne the set of bits that are known to be one
void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, KnownBits &Known);
+/// Merge bits known from assumes into Known.
+void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
+ unsigned Depth, const SimplifyQuery &Q);
+
+/// Using KnownBits LHS/RHS produce the known bits for logic op (and/xor/or).
+KnownBits analyzeKnownBitsFromAndXorOr(
+ const Operator *I, const KnownBits &KnownLHS, const KnownBits &KnownRHS,
+ unsigned Depth, const DataLayout &DL, AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true);
+
/// Return true if LHS and RHS have no common bits set.
bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
const DataLayout &DL, AssumptionCache *AC = nullptr,
@@ -117,6 +124,10 @@ bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
const DominatorTree *DT = nullptr,
bool UseInstrInfo = true);
+/// Return true if the given instruction is only used in zero comparison
+bool isOnlyUsedInZeroComparison(const Instruction *CxtI);
+
+/// Return true if the given instruction is only used in zero equality comparison
bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI);
/// Return true if the given value is known to be non-zero when defined. For
@@ -210,10 +221,277 @@ unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL,
Intrinsic::ID getIntrinsicForCallSite(const CallBase &CB,
const TargetLibraryInfo *TLI);
+/// Returns a pair of values, which if passed to llvm.is.fpclass, returns the
+/// same result as an fcmp with the given operands.
+///
+/// If \p LookThroughSrc is true, consider the input value when computing the
+/// mask.
+///
+/// If \p LookThroughSrc is false, ignore the source value (i.e. the first pair
+/// element will always be LHS.
+std::pair<Value *, FPClassTest> fcmpToClassTest(CmpInst::Predicate Pred,
+ const Function &F, Value *LHS,
+ Value *RHS,
+ bool LookThroughSrc = true);
+
+struct KnownFPClass {
+ /// Floating-point classes the value could be one of.
+ FPClassTest KnownFPClasses = fcAllFlags;
+
+ /// std::nullopt if the sign bit is unknown, true if the sign bit is
+ /// definitely set or false if the sign bit is definitely unset.
+ std::optional<bool> SignBit;
+
+ /// Return true if it's known this can never be one of the mask entries.
+ bool isKnownNever(FPClassTest Mask) const {
+ return (KnownFPClasses & Mask) == fcNone;
+ }
+
+ bool isUnknown() const {
+ return KnownFPClasses == fcAllFlags && !SignBit;
+ }
+
+ /// Return true if it's known this can never be a nan.
+ bool isKnownNeverNaN() const {
+ return isKnownNever(fcNan);
+ }
+
+ /// Return true if it's known this can never be an infinity.
+ bool isKnownNeverInfinity() const {
+ return isKnownNever(fcInf);
+ }
+
+ /// Return true if it's known this can never be +infinity.
+ bool isKnownNeverPosInfinity() const {
+ return isKnownNever(fcPosInf);
+ }
+
+ /// Return true if it's known this can never be -infinity.
+ bool isKnownNeverNegInfinity() const {
+ return isKnownNever(fcNegInf);
+ }
+
+ /// Return true if it's known this can never be a subnormal
+ bool isKnownNeverSubnormal() const {
+ return isKnownNever(fcSubnormal);
+ }
+
+ /// Return true if it's known this can never be a positive subnormal
+ bool isKnownNeverPosSubnormal() const {
+ return isKnownNever(fcPosSubnormal);
+ }
+
+ /// Return true if it's known this can never be a negative subnormal
+ bool isKnownNeverNegSubnormal() const {
+ return isKnownNever(fcNegSubnormal);
+ }
+
+ /// Return true if it's known this can never be a zero. This means a literal
+ /// [+-]0, and does not include denormal inputs implicitly treated as [+-]0.
+ bool isKnownNeverZero() const {
+ return isKnownNever(fcZero);
+ }
+
+ /// Return true if it's known this can never be a literal positive zero.
+ bool isKnownNeverPosZero() const {
+ return isKnownNever(fcPosZero);
+ }
+
+ /// Return true if it's known this can never be a negative zero. This means a
+ /// literal -0 and does not include denormal inputs implicitly treated as -0.
+ bool isKnownNeverNegZero() const {
+ return isKnownNever(fcNegZero);
+ }
+
+ /// Return true if it's know this can never be interpreted as a zero. This
+ /// extends isKnownNeverZero to cover the case where the assumed
+ /// floating-point mode for the function interprets denormals as zero.
+ bool isKnownNeverLogicalZero(const Function &F, Type *Ty) const;
+
+ /// Return true if it's know this can never be interpreted as a negative zero.
+ bool isKnownNeverLogicalNegZero(const Function &F, Type *Ty) const;
+
+ /// Return true if it's know this can never be interpreted as a positive zero.
+ bool isKnownNeverLogicalPosZero(const Function &F, Type *Ty) const;
+
+ static constexpr FPClassTest OrderedLessThanZeroMask =
+ fcNegSubnormal | fcNegNormal | fcNegInf;
+ static constexpr FPClassTest OrderedGreaterThanZeroMask =
+ fcPosSubnormal | fcPosNormal | fcPosInf;
+
+ /// Return true if we can prove that the analyzed floating-point value is
+ /// either NaN or never less than -0.0.
+ ///
+ /// NaN --> true
+ /// +0 --> true
+ /// -0 --> true
+ /// x > +0 --> true
+ /// x < -0 --> false
+ bool cannotBeOrderedLessThanZero() const {
+ return isKnownNever(OrderedLessThanZeroMask);
+ }
+
+ /// Return true if we can prove that the analyzed floating-point value is
+ /// either NaN or never greater than -0.0.
+ /// NaN --> true
+ /// +0 --> true
+ /// -0 --> true
+ /// x > +0 --> false
+ /// x < -0 --> true
+ bool cannotBeOrderedGreaterThanZero() const {
+ return isKnownNever(OrderedGreaterThanZeroMask);
+ }
+
+ KnownFPClass &operator|=(const KnownFPClass &RHS) {
+ KnownFPClasses = KnownFPClasses | RHS.KnownFPClasses;
+
+ if (SignBit != RHS.SignBit)
+ SignBit = std::nullopt;
+ return *this;
+ }
+
+ void knownNot(FPClassTest RuleOut) {
+ KnownFPClasses = KnownFPClasses & ~RuleOut;
+ }
+
+ void fneg() {
+ KnownFPClasses = llvm::fneg(KnownFPClasses);
+ if (SignBit)
+ SignBit = !*SignBit;
+ }
+
+ void fabs() {
+ if (KnownFPClasses & fcNegZero)
+ KnownFPClasses |= fcPosZero;
+
+ if (KnownFPClasses & fcNegInf)
+ KnownFPClasses |= fcPosInf;
+
+ if (KnownFPClasses & fcNegSubnormal)
+ KnownFPClasses |= fcPosSubnormal;
+
+ if (KnownFPClasses & fcNegNormal)
+ KnownFPClasses |= fcPosNormal;
+
+ signBitMustBeZero();
+ }
+
+ /// Return true if the sign bit must be 0, ignoring the sign of nans.
+ bool signBitIsZeroOrNaN() const {
+ return isKnownNever(fcNegative);
+ }
+
+ /// Assume the sign bit is zero.
+ void signBitMustBeZero() {
+ KnownFPClasses &= (fcPositive | fcNan);
+ SignBit = false;
+ }
+
+ void copysign(const KnownFPClass &Sign) {
+ // Don't know anything about the sign of the source. Expand the possible set
+ // to its opposite sign pair.
+ if (KnownFPClasses & fcZero)
+ KnownFPClasses |= fcZero;
+ if (KnownFPClasses & fcSubnormal)
+ KnownFPClasses |= fcSubnormal;
+ if (KnownFPClasses & fcNormal)
+ KnownFPClasses |= fcNormal;
+ if (KnownFPClasses & fcInf)
+ KnownFPClasses |= fcInf;
+
+ // Sign bit is exactly preserved even for nans.
+ SignBit = Sign.SignBit;
+
+ // Clear sign bits based on the input sign mask.
+ if (Sign.isKnownNever(fcPositive | fcNan) || (SignBit && *SignBit))
+ KnownFPClasses &= (fcNegative | fcNan);
+ if (Sign.isKnownNever(fcNegative | fcNan) || (SignBit && !*SignBit))
+ KnownFPClasses &= (fcPositive | fcNan);
+ }
+
+ // Propagate knowledge that a non-NaN source implies the result can also not
+ // be a NaN. For unconstrained operations, signaling nans are not guaranteed
+ // to be quieted but cannot be introduced.
+ void propagateNaN(const KnownFPClass &Src, bool PreserveSign = false) {
+ if (Src.isKnownNever(fcNan)) {
+ knownNot(fcNan);
+ if (PreserveSign)
+ SignBit = Src.SignBit;
+ } else if (Src.isKnownNever(fcSNan))
+ knownNot(fcSNan);
+ }
+
+ /// Propagate knowledge from a source value that could be a denormal or
+ /// zero. We have to be conservative since output flushing is not guaranteed,
+ /// so known-never-zero may not hold.
+ ///
+ /// This assumes a copy-like operation and will replace any currently known
+ /// information.
+ void propagateDenormal(const KnownFPClass &Src, const Function &F, Type *Ty);
+
+ /// Report known classes if \p Src is evaluated through a potentially
+ /// canonicalizing operation. We can assume signaling nans will not be
+ /// introduced, but cannot assume a denormal will be flushed under FTZ/DAZ.
+ ///
+ /// This assumes a copy-like operation and will replace any currently known
+ /// information.
+ void propagateCanonicalizingSrc(const KnownFPClass &Src, const Function &F,
+ Type *Ty);
+
+ void resetAll() { *this = KnownFPClass(); }
+};
+
+inline KnownFPClass operator|(KnownFPClass LHS, const KnownFPClass &RHS) {
+ LHS |= RHS;
+ return LHS;
+}
+
+inline KnownFPClass operator|(const KnownFPClass &LHS, KnownFPClass &&RHS) {
+ RHS |= LHS;
+ return std::move(RHS);
+}
+
+/// Determine which floating-point classes are valid for \p V, and return them
+/// in KnownFPClass bit sets.
+///
+/// This function is defined on values with floating-point type, values vectors
+/// of floating-point type, and arrays of floating-point type.
+
+/// \p InterestedClasses is a compile time optimization hint for which floating
+/// point classes should be queried. Queries not specified in \p
+/// InterestedClasses should be reliable if they are determined during the
+/// query.
+KnownFPClass computeKnownFPClass(
+ const Value *V, const APInt &DemandedElts, const DataLayout &DL,
+ FPClassTest InterestedClasses = fcAllFlags, unsigned Depth = 0,
+ const TargetLibraryInfo *TLI = nullptr, AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true);
+
+KnownFPClass computeKnownFPClass(
+ const Value *V, const DataLayout &DL,
+ FPClassTest InterestedClasses = fcAllFlags, unsigned Depth = 0,
+ const TargetLibraryInfo *TLI = nullptr, AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true);
+
/// Return true if we can prove that the specified FP value is never equal to
-/// -0.0.
-bool CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
- unsigned Depth = 0);
+/// -0.0. Users should use caution when considering PreserveSign
+/// denormal-fp-math.
+inline bool cannotBeNegativeZero(const Value *V, const DataLayout &DL,
+ const TargetLibraryInfo *TLI = nullptr,
+ unsigned Depth = 0,
+ AssumptionCache *AC = nullptr,
+ const Instruction *CtxI = nullptr,
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true) {
+ KnownFPClass Known = computeKnownFPClass(V, DL, fcNegZero, Depth, TLI, AC,
+ CtxI, DT, UseInstrInfo);
+ return Known.isKnownNeverNegZero();
+}
+
+bool CannotBeOrderedLessThanZero(const Value *V, const DataLayout &DL,
+ const TargetLibraryInfo *TLI);
/// Return true if we can prove that the specified FP value is either NaN or
/// never less than -0.0.
@@ -223,19 +501,58 @@ bool CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
/// -0 --> true
/// x > +0 --> true
/// x < -0 --> false
-bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI);
+inline bool cannotBeOrderedLessThanZero(const Value *V, const DataLayout &DL,
+ const TargetLibraryInfo *TLI = nullptr,
+ unsigned Depth = 0,
+ AssumptionCache *AC = nullptr,
+ const Instruction *CtxI = nullptr,
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true) {
+ KnownFPClass Known =
+ computeKnownFPClass(V, DL, KnownFPClass::OrderedLessThanZeroMask, Depth,
+ TLI, AC, CtxI, DT, UseInstrInfo);
+ return Known.cannotBeOrderedLessThanZero();
+}
/// Return true if the floating-point scalar value is not an infinity or if
/// the floating-point vector value has no infinities. Return false if a value
/// could ever be infinity.
-bool isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI,
- unsigned Depth = 0);
+inline bool isKnownNeverInfinity(const Value *V, const DataLayout &DL,
+ const TargetLibraryInfo *TLI = nullptr,
+ unsigned Depth = 0,
+ AssumptionCache *AC = nullptr,
+ const Instruction *CtxI = nullptr,
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true) {
+ KnownFPClass Known = computeKnownFPClass(V, DL, fcInf, Depth, TLI, AC, CtxI,
+ DT, UseInstrInfo);
+ return Known.isKnownNeverInfinity();
+}
+
+/// Return true if the floating-point value can never contain a NaN or infinity.
+inline bool isKnownNeverInfOrNaN(
+ const Value *V, const DataLayout &DL, const TargetLibraryInfo *TLI,
+ unsigned Depth = 0, AssumptionCache *AC = nullptr,
+ const Instruction *CtxI = nullptr, const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true) {
+ KnownFPClass Known = computeKnownFPClass(V, DL, fcInf | fcNan, Depth, TLI, AC,
+ CtxI, DT, UseInstrInfo);
+ return Known.isKnownNeverNaN() && Known.isKnownNeverInfinity();
+}
/// Return true if the floating-point scalar value is not a NaN or if the
/// floating-point vector value has no NaN elements. Return false if a value
/// could ever be NaN.
-bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
- unsigned Depth = 0);
+inline bool isKnownNeverNaN(const Value *V, const DataLayout &DL,
+ const TargetLibraryInfo *TLI, unsigned Depth = 0,
+ AssumptionCache *AC = nullptr,
+ const Instruction *CtxI = nullptr,
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true) {
+ KnownFPClass Known = computeKnownFPClass(V, DL, fcNan, Depth, TLI, AC, CtxI,
+ DT, UseInstrInfo);
+ return Known.isKnownNeverNaN();
+}
/// Return true if we can prove that the specified FP value's sign bit is 0.
///
@@ -244,7 +561,8 @@ bool isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
/// -0 --> false
/// x > +0 --> true
/// x < -0 --> false
-bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI);
+bool SignBitMustBeZero(const Value *V, const DataLayout &DL,
+ const TargetLibraryInfo *TLI);
/// If the specified value can be set by repeating the same byte in memory,
/// return the i8 value that it is represented with. This is true for all i8
@@ -554,6 +872,10 @@ OverflowResult computeOverflowForSignedSub(const Value *LHS, const Value *RHS,
bool isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
const DominatorTree &DT);
+/// Determine the possible constant range of vscale with the given bit width,
+/// based on the vscale_range function attribute.
+ConstantRange getVScaleRange(const Function *F, unsigned BitWidth);
+
/// Determine the possible constant range of an integer or vector of integer
/// value. This is intended as a cheap, non-recursive check.
ConstantRange computeConstantRange(const Value *V, bool ForSigned,
@@ -629,7 +951,7 @@ void getGuaranteedWellDefinedOps(const Instruction *I,
/// when I is executed with any operands which appear in KnownPoison holding
/// a poison value at the point of execution.
bool mustTriggerUB(const Instruction *I,
- const SmallSet<const Value *, 16> &KnownPoison);
+ const SmallPtrSetImpl<const Value *> &KnownPoison);
/// Return true if this function can prove that if Inst is executed
/// and yields a poison value or undef bits, then that will trigger
@@ -686,6 +1008,17 @@ bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC = nullptr,
const DominatorTree *DT = nullptr,
unsigned Depth = 0);
+/// Return true if undefined behavior would provable be executed on the path to
+/// OnPathTo if Root produced a posion result. Note that this doesn't say
+/// anything about whether OnPathTo is actually executed or whether Root is
+/// actually poison. This can be used to assess whether a new use of Root can
+/// be added at a location which is control equivalent with OnPathTo (such as
+/// immediately before it) without introducing UB which didn't previously
+/// exist. Note that a false result conveys no information.
+bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
+ Instruction *OnPathTo,
+ DominatorTree *DT);
+
/// Specific patterns of select instructions we can match.
enum SelectPatternFlavor {
SPF_UNKNOWN = 0,
@@ -845,12 +1178,6 @@ std::optional<bool> isImpliedByDomCondition(CmpInst::Predicate Pred,
const Value *LHS, const Value *RHS,
const Instruction *ContextI,
const DataLayout &DL);
-
-/// If Ptr1 is provably equal to Ptr2 plus a constant offset, return that
-/// offset. For example, Ptr1 might be &A[42], and Ptr2 might be &A[40]. In
-/// this case offset would be -8.
-std::optional<int64_t> isPointerOffset(const Value *Ptr1, const Value *Ptr2,
- const DataLayout &DL);
} // end namespace llvm
#endif // LLVM_ANALYSIS_VALUETRACKING_H
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index 85d208b94625..b884c1e3911e 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -19,9 +19,11 @@
#define FIXED(NL) ElementCount::getFixed(NL)
#define SCALABLE(NL) ElementCount::getScalable(NL)
+#define NOMASK false
+#define MASKED true
#if !(defined(TLI_DEFINE_VECFUNC))
-#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF},
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF, NOMASK},
#endif
#if defined(TLI_DEFINE_ACCELERATE_VECFUNCS)
@@ -525,7 +527,6 @@ TLI_DEFINE_VECFUNC( "sinh", "_ZGVnN2v_sinh", FIXED(2))
TLI_DEFINE_VECFUNC( "llvm.sinh.f64", "_ZGVnN2v_sinh", FIXED(2))
TLI_DEFINE_VECFUNC( "sqrt", "_ZGVnN2v_sqrt", FIXED(2))
-TLI_DEFINE_VECFUNC( "llvm.sqrt.f64", "_ZGVnN2v_sqrt", FIXED(2))
TLI_DEFINE_VECFUNC( "tan", "_ZGVnN2v_tan", FIXED(2))
TLI_DEFINE_VECFUNC( "llvm.tan.f64", "_ZGVnN2v_tan", FIXED(2))
@@ -595,7 +596,6 @@ TLI_DEFINE_VECFUNC( "sinhf", "_ZGVnN4v_sinhf", FIXED(4))
TLI_DEFINE_VECFUNC( "llvm.sinh.f32", "_ZGVnN4v_sinhf", FIXED(4))
TLI_DEFINE_VECFUNC( "sqrtf", "_ZGVnN4v_sqrtf", FIXED(4))
-TLI_DEFINE_VECFUNC( "llvm.sqrt.f32", "_ZGVnN4v_sqrtf", FIXED(4))
TLI_DEFINE_VECFUNC( "tanf", "_ZGVnN4v_tanf", FIXED(4))
TLI_DEFINE_VECFUNC( "llvm.tan.f32", "_ZGVnN4v_tanf", FIXED(4))
@@ -606,10 +606,318 @@ TLI_DEFINE_VECFUNC( "llvm.tanh.f32", "_ZGVnN4v_tanhf", FIXED(4))
TLI_DEFINE_VECFUNC( "tgammaf", "_ZGVnN4v_tgammaf", FIXED(4))
TLI_DEFINE_VECFUNC( "llvm.tgamma.f32", "_ZGVnN4v_tgammaf", FIXED(4))
+#elif defined(TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS)
+
+TLI_DEFINE_VECFUNC("acos", "_ZGVsMxv_acos", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("acosf", "_ZGVsMxv_acosf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("asin", "_ZGVsMxv_asin", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("asinf", "_ZGVsMxv_asinf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("atan", "_ZGVsMxv_atan", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("atanf", "_ZGVsMxv_atanf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("cos", "_ZGVsMxv_cos", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("cosf", "_ZGVsMxv_cosf", SCALABLE(4), MASKED)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVsMxv_cos", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVsMxv_cosf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("cosh", "_ZGVsMxv_cosh", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("coshf", "_ZGVsMxv_coshf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("exp", "_ZGVsMxv_exp", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("expf", "_ZGVsMxv_expf", SCALABLE(4), MASKED)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVsMxv_exp", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVsMxv_expf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("exp2", "_ZGVsMxv_exp2", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED)
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("lgamma", "_ZGVsMxv_lgamma", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("lgammaf", "_ZGVsMxv_lgammaf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("log", "_ZGVsMxv_log", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("logf", "_ZGVsMxv_logf", SCALABLE(4), MASKED)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVsMxv_log", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVsMxv_logf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC( "log2", "_ZGVsMxv_log2", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC( "log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED)
+TLI_DEFINE_VECFUNC( "llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC( "llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("log10", "_ZGVsMxv_log10", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED)
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVsMxvv_powf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("sin", "_ZGVsMxv_sin", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("sqrt", "_ZGVsMxv_sqrt", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("sqrtf", "_ZGVsMxv_sqrtf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("tan", "_ZGVsMxv_tan", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("tanf", "_ZGVsMxv_tanf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("tanh", "_ZGVsMxv_tanh", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("tanhf", "_ZGVsMxv_tanhf", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("tgamma", "_ZGVsMxv_tgamma", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("tgammaf", "_ZGVsMxv_tgammaf", SCALABLE(4), MASKED)
+
+#elif defined(TLI_DEFINE_ARMPL_VECFUNCS)
+
+TLI_DEFINE_VECFUNC("acos", "armpl_vacosq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("acosf", "armpl_vacosq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("acos", "armpl_svacos_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("acosf", "armpl_svacos_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("acosh", "armpl_vacoshq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("acoshf", "armpl_vacoshq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("acosh", "armpl_svacosh_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("acoshf", "armpl_svacosh_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("asin", "armpl_vasinq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("asinf", "armpl_vasinq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("asin", "armpl_svasin_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("asinf", "armpl_svasin_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("asinh", "armpl_vasinhq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("asinhf", "armpl_vasinhq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("asinh", "armpl_svasinh_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("asinhf", "armpl_svasinh_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("atan", "armpl_vatanq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("atanf", "armpl_vatanq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("atan", "armpl_svatan_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("atanf", "armpl_svatan_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("atan2", "armpl_vatan2q_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("atan2f", "armpl_vatan2q_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("atan2", "armpl_svatan2_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("atan2f", "armpl_svatan2_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("atanh", "armpl_vatanhq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("atanhf", "armpl_vatanhq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("atanh", "armpl_svatanh_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("atanhf", "armpl_svatanh_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("cbrt", "armpl_vcbrtq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("cbrtf", "armpl_vcbrtq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("cbrt", "armpl_svcbrt_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("cbrtf", "armpl_svcbrt_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("copysign", "armpl_vcopysignq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("copysignf", "armpl_vcopysignq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("copysign", "armpl_svcopysign_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("copysignf", "armpl_svcopysign_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("cos", "armpl_vcosq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("cosf", "armpl_vcosq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("cos", "armpl_svcos_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("cosf", "armpl_svcos_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "armpl_vcosq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "armpl_vcosq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "armpl_svcos_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "armpl_svcos_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("cosh", "armpl_vcoshq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("coshf", "armpl_vcoshq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("cosh", "armpl_svcosh_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("coshf", "armpl_svcosh_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("erf", "armpl_verfq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("erff", "armpl_verfq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("erf", "armpl_sverf_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("erff", "armpl_sverf_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("erfc", "armpl_verfcq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("erfcf", "armpl_verfcq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("erfc", "armpl_sverfc_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("erfcf", "armpl_sverfc_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("exp", "armpl_vexpq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("expf", "armpl_vexpq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("exp", "armpl_svexp_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("expf", "armpl_svexp_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "armpl_vexpq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "armpl_vexpq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "armpl_svexp_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "armpl_svexp_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("exp2", "armpl_vexp2q_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("exp2f", "armpl_vexp2q_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("exp2", "armpl_svexp2_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("exp2f", "armpl_svexp2_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_vexp2q_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_vexp2q_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_svexp2_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_svexp2_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("exp10", "armpl_vexp10q_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("exp10f", "armpl_vexp10q_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("exp10", "armpl_svexp10_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("exp10f", "armpl_svexp10_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("expm1", "armpl_vexpm1q_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("expm1f", "armpl_vexpm1q_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("expm1", "armpl_svexpm1_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("expm1f", "armpl_svexpm1_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("fdim", "armpl_vfdimq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("fdimf", "armpl_vfdimq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("fdim", "armpl_svfdim_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("fdimf", "armpl_svfdim_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("fma", "armpl_vfmaq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("fmaf", "armpl_vfmaq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("fma", "armpl_svfma_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("fmaf", "armpl_svfma_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("fmin", "armpl_vfminq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("fminf", "armpl_vfminq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("fmin", "armpl_svfmin_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("fminf", "armpl_svfmin_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("fmod", "armpl_vfmodq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("fmodf", "armpl_vfmodq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("fmod", "armpl_svfmod_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("fmodf", "armpl_svfmod_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("hypot", "armpl_vhypotq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("hypotf", "armpl_vhypotq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("hypot", "armpl_svhypot_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("hypotf", "armpl_svhypot_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("lgamma", "armpl_vlgammaq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("lgammaf", "armpl_vlgammaq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("lgamma", "armpl_svlgamma_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("lgammaf", "armpl_svlgamma_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("log", "armpl_vlogq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("logf", "armpl_vlogq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("log", "armpl_svlog_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("logf", "armpl_svlog_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("llvm.log.f64", "armpl_vlogq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "armpl_vlogq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "armpl_svlog_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "armpl_svlog_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("log1p", "armpl_vlog1pq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("log1pf", "armpl_vlog1pq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("log1p", "armpl_svlog1p_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("log1pf", "armpl_svlog1p_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("log2", "armpl_vlog2q_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("log2f", "armpl_vlog2q_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("log2", "armpl_svlog2_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("log2f", "armpl_svlog2_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "armpl_vlog2q_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_vlog2q_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "armpl_svlog2_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_svlog2_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("log10", "armpl_vlog10q_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("log10f", "armpl_vlog10q_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("log10", "armpl_svlog10_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("log10f", "armpl_svlog10_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_vlog10q_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("nextafter", "armpl_vnextafterq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("nextafterf", "armpl_vnextafterq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("nextafter", "armpl_svnextafter_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("nextafterf", "armpl_svnextafter_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("pow", "armpl_vpowq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("powf", "armpl_vpowq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("pow", "armpl_svpow_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("powf", "armpl_svpow_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "armpl_vpowq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "armpl_vpowq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "armpl_svpow_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "armpl_svpow_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("sin", "armpl_vsinq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("sinf", "armpl_vsinq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("sin", "armpl_svsin_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("sinf", "armpl_svsin_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "armpl_vsinq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_vsinq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "armpl_svsin_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_svsin_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("sinh", "armpl_vsinhq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("sinh", "armpl_svsinh_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("sinhf", "armpl_svsinh_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("sinpi", "armpl_vsinpiq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("sinpif", "armpl_vsinpiq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("sinpi", "armpl_svsinpi_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("sinpif", "armpl_svsinpi_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("sqrt", "armpl_vsqrtq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("sqrtf", "armpl_vsqrtq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("sqrt", "armpl_svsqrt_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("sqrtf", "armpl_svsqrt_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("tan", "armpl_vtanq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("tanf", "armpl_vtanq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("tan", "armpl_svtan_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("tanf", "armpl_svtan_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("tanh", "armpl_vtanhq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("tanhf", "armpl_vtanhq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("tanh", "armpl_svtanh_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("tanhf", "armpl_svtanh_f32_x", SCALABLE(4), MASKED)
+
+TLI_DEFINE_VECFUNC("tgamma", "armpl_vtgammaq_f64", FIXED(2), NOMASK)
+TLI_DEFINE_VECFUNC("tgammaf", "armpl_vtgammaq_f32", FIXED(4), NOMASK)
+TLI_DEFINE_VECFUNC("tgamma", "armpl_svtgamma_f64_x", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("tgammaf", "armpl_svtgamma_f32_x", SCALABLE(4), MASKED)
+
#else
#error "Must choose which vector library functions are to be defined."
#endif
+#undef MASKED
+#undef NOMASK
+#undef SCALABLE
+#undef FIXED
+
#undef TLI_DEFINE_VECFUNC
#undef TLI_DEFINE_ACCELERATE_VECFUNCS
#undef TLI_DEFINE_DARWIN_LIBSYSTEM_M_VECFUNCS
@@ -618,4 +926,6 @@ TLI_DEFINE_VECFUNC( "llvm.tgamma.f32", "_ZGVnN4v_tgammaf", FIXED(4))
#undef TLI_DEFINE_SVML_VECFUNCS
#undef TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS
#undef TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS
+#undef TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS
#undef TLI_DEFINE_MASSV_VECFUNCS_NAMES
+#undef TLI_DEFINE_ARMPL_VECFUNCS
diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 181c5f78c47b..ad69f5711abd 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -125,6 +125,21 @@ struct VFInfo {
std::string ScalarName; /// Scalar Function Name.
std::string VectorName; /// Vector Function Name associated to this VFInfo.
VFISAKind ISA; /// Instruction Set Architecture.
+
+ /// Returns the index of the first parameter with the kind 'GlobalPredicate',
+ /// if any exist.
+ std::optional<unsigned> getParamIndexForOptionalMask() const {
+ unsigned ParamCount = Shape.Parameters.size();
+ for (unsigned i = 0; i < ParamCount; ++i)
+ if (Shape.Parameters[i].ParamKind == VFParamKind::GlobalPredicate)
+ return i;
+
+ return std::nullopt;
+ }
+
+ /// Returns true if at least one of the operands to the vectorized function
+ /// has the kind 'GlobalPredicate'.
+ bool isMasked() const { return getParamIndexForOptionalMask().has_value(); }
};
namespace VFABI {
@@ -177,7 +192,7 @@ std::optional<VFInfo> tryDemangleForVFABI(StringRef MangledName,
/// where:
///
/// <isa> = "_LLVM_"
-/// <mask> = "N". Note: TLI does not support masked interfaces.
+/// <mask> = "M" if masked, "N" if no mask.
/// <vlen> = Number of concurrent lanes, stored in the `VectorizationFactor`
/// field of the `VecDesc` struct. If the number of lanes is scalable
/// then 'x' is printed instead.
@@ -185,7 +200,8 @@ std::optional<VFInfo> tryDemangleForVFABI(StringRef MangledName,
/// <scalarname> = the name of the scalar function.
/// <vectorname> = the name of the vector function.
std::string mangleTLIVectorName(StringRef VectorName, StringRef ScalarName,
- unsigned numArgs, ElementCount VF);
+ unsigned numArgs, ElementCount VF,
+ bool Masked = false);
/// Retrieve the `VFParamKind` from a string token.
VFParamKind getVFParamKindFromString(const StringRef Token);
@@ -258,6 +274,20 @@ public:
return Ret;
}
+ static bool hasMaskedVariant(const CallInst &CI,
+ std::optional<ElementCount> VF = std::nullopt) {
+ // Check whether we have at least one masked vector version of a scalar
+ // function. If no VF is specified then we check for any masked variant,
+ // otherwise we look for one that matches the supplied VF.
+ auto Mappings = VFDatabase::getMappings(CI);
+ for (VFInfo Info : Mappings)
+ if (!VF || Info.Shape.VF == *VF)
+ if (Info.isMasked())
+ return true;
+
+ return false;
+ }
+
/// Constructor, requires a CallInst instance.
VFDatabase(CallInst &CI)
: M(CI.getModule()), CI(CI),
@@ -281,7 +311,6 @@ public:
template <typename T> class ArrayRef;
class DemandedBits;
-class GetElementPtrInst;
template <typename InstTy> class InterleaveGroup;
class IRBuilderBase;
class Loop;
@@ -317,9 +346,9 @@ bool isTriviallyVectorizable(Intrinsic::ID ID);
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
unsigned ScalarOpdIdx);
-/// Identifies if the vector form of the intrinsic has a operand that has
-/// an overloaded type.
-bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx);
+/// Identifies if the vector form of the intrinsic is overloaded on the type of
+/// the operand at index \p OpdIdx, or on the return type if \p OpdIdx is -1.
+bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx);
/// Returns intrinsic ID for call.
/// For the input call instruction it finds mapping intrinsic and returns
@@ -327,23 +356,6 @@ bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, unsigned OpdIdx);
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI,
const TargetLibraryInfo *TLI);
-/// Find the operand of the GEP that should be checked for consecutive
-/// stores. This ignores trailing indices that have no effect on the final
-/// pointer.
-unsigned getGEPInductionOperand(const GetElementPtrInst *Gep);
-
-/// If the argument is a GEP, then returns the operand identified by
-/// getGEPInductionOperand. However, if there is some other non-loop-invariant
-/// operand, it returns that instead.
-Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
-
-/// If a value has only one user that is a CastInst, return it.
-Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty);
-
-/// Get the stride of a pointer access in a loop. Looks for symbolic
-/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
-Value *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp);
-
/// Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.
@@ -800,7 +812,7 @@ public:
/// Check if \p Instr belongs to any interleave group.
bool isInterleaved(Instruction *Instr) const {
- return InterleaveGroupMap.find(Instr) != InterleaveGroupMap.end();
+ return InterleaveGroupMap.contains(Instr);
}
/// Get the interleave group that \p Instr belongs to.
@@ -904,7 +916,7 @@ private:
/// Collect all the accesses with a constant stride in program order.
void collectConstStrideAccesses(
MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
- const ValueToValueMap &Strides);
+ const DenseMap<Value *, const SCEV *> &Strides);
/// Returns true if \p Stride is allowed in an interleaved group.
static bool isStrided(int Stride);
@@ -964,8 +976,7 @@ private:
// If we know there is a dependence from source to sink, assume the
// instructions can't be reordered. Otherwise, reordering is legal.
- return Dependences.find(Src) == Dependences.end() ||
- !Dependences.lookup(Src).count(Sink);
+ return !Dependences.contains(Src) || !Dependences.lookup(Src).count(Sink);
}
/// Collect the dependences from LoopAccessInfo.
diff --git a/llvm/include/llvm/AsmParser/LLLexer.h b/llvm/include/llvm/AsmParser/LLLexer.h
index 7bcb33f18768..bd929db33c4a 100644
--- a/llvm/include/llvm/AsmParser/LLLexer.h
+++ b/llvm/include/llvm/AsmParser/LLLexer.h
@@ -36,14 +36,14 @@ namespace llvm {
const char *TokStart;
lltok::Kind CurKind;
std::string StrVal;
- unsigned UIntVal;
+ unsigned UIntVal = 0;
Type *TyVal = nullptr;
- APFloat APFloatVal;
- APSInt APSIntVal;
+ APFloat APFloatVal{0.0};
+ APSInt APSIntVal{0};
// When false (default), an identifier ending in ':' is a label token.
// When true, the ':' is treated as a separate token.
- bool IgnoreColonInIdentifiers;
+ bool IgnoreColonInIdentifiers = false;
public:
explicit LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &,
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index b07e9fc9cc75..eca908a24aac 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -20,6 +20,7 @@
#include "llvm/IR/FMF.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/Support/ModRef.h"
#include <map>
#include <optional>
@@ -42,7 +43,6 @@ namespace llvm {
class Comdat;
class MDString;
class MDNode;
- class MemoryEffects;
struct SlotMapping;
/// ValID - Represents a reference of a definition of some sort with no type.
@@ -294,6 +294,7 @@ namespace llvm {
bool parseOptionalUWTableKind(UWTableKind &Kind);
bool parseAllocKind(AllocFnKind &Kind);
std::optional<MemoryEffects> parseMemoryAttr();
+ unsigned parseNoFPClassAttr();
bool parseScopeAndOrdering(bool IsAtomic, SyncScope::ID &SSID,
AtomicOrdering &Ordering);
bool parseScope(SyncScope::ID &SSID);
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index 60e25cefbd24..673dc58ce645 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -170,6 +170,8 @@ enum Kind {
kw_amdgpu_gs,
kw_amdgpu_ps,
kw_amdgpu_cs,
+ kw_amdgpu_cs_chain,
+ kw_amdgpu_cs_chain_preserve,
kw_amdgpu_kernel,
kw_amdgpu_gfx,
kw_tailcc,
@@ -195,6 +197,24 @@ enum Kind {
kw_inaccessiblememonly,
kw_inaccessiblemem_or_argmemonly,
+ // nofpclass attribute:
+ kw_all,
+ kw_nan,
+ kw_snan,
+ kw_qnan,
+ kw_inf,
+ // kw_ninf, - already an fmf
+ kw_pinf,
+ kw_norm,
+ kw_nnorm,
+ kw_pnorm,
+ // kw_sub, - already an instruction
+ kw_nsub,
+ kw_psub,
+ kw_zero,
+ kw_nzero,
+ kw_pzero,
+
kw_type,
kw_opaque,
@@ -416,7 +436,6 @@ enum Kind {
kw_versions,
kw_memProf,
kw_notcold,
- kw_notcoldandcold,
// GV's with __attribute__((no_sanitize("address"))), or things in
// -fsanitize-ignorelist when built with ASan.
diff --git a/llvm/include/llvm/BinaryFormat/COFF.h b/llvm/include/llvm/BinaryFormat/COFF.h
index c9b1174c9eaa..522ee37da6e8 100644
--- a/llvm/include/llvm/BinaryFormat/COFF.h
+++ b/llvm/include/llvm/BinaryFormat/COFF.h
@@ -99,6 +99,7 @@ enum MachineTypes : unsigned {
IMAGE_FILE_MACHINE_ARMNT = 0x1C4,
IMAGE_FILE_MACHINE_ARM64 = 0xAA64,
IMAGE_FILE_MACHINE_ARM64EC = 0xA641,
+ IMAGE_FILE_MACHINE_ARM64X = 0xA64E,
IMAGE_FILE_MACHINE_EBC = 0xEBC,
IMAGE_FILE_MACHINE_I386 = 0x14C,
IMAGE_FILE_MACHINE_IA64 = 0x200,
@@ -120,6 +121,19 @@ enum MachineTypes : unsigned {
IMAGE_FILE_MACHINE_WCEMIPSV2 = 0x169
};
+template <typename T> bool isArm64EC(T Machine) {
+ return Machine == IMAGE_FILE_MACHINE_ARM64EC ||
+ Machine == IMAGE_FILE_MACHINE_ARM64X;
+}
+
+template <typename T> bool isAnyArm64(T Machine) {
+ return Machine == IMAGE_FILE_MACHINE_ARM64 || isArm64EC(Machine);
+}
+
+template <typename T> bool is64Bit(T Machine) {
+ return Machine == IMAGE_FILE_MACHINE_AMD64 || isAnyArm64(Machine);
+}
+
enum Characteristics : unsigned {
C_Invalid = 0,
diff --git a/llvm/include/llvm/BinaryFormat/DXContainer.h b/llvm/include/llvm/BinaryFormat/DXContainer.h
index 44b77b11fdd3..f202f1bf6dff 100644
--- a/llvm/include/llvm/BinaryFormat/DXContainer.h
+++ b/llvm/include/llvm/BinaryFormat/DXContainer.h
@@ -15,6 +15,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/TargetParser/Triple.h"
#include <stdint.h>
@@ -36,6 +37,12 @@ namespace llvm {
namespace dxbc {
+inline Triple::EnvironmentType getShaderStage(uint32_t Kind) {
+ assert(Kind <= Triple::Amplification - Triple::Pixel &&
+ "Shader kind out of expected range.");
+ return static_cast<Triple::EnvironmentType>(Triple::Pixel + Kind);
+}
+
struct Hash {
uint8_t Digest[16];
};
@@ -142,6 +149,233 @@ static_assert((uint64_t)FeatureFlags::NextUnusedBit <= 1ull << 63,
PartType parsePartType(StringRef S);
+struct VertexPSVInfo {
+ uint8_t OutputPositionPresent;
+ uint8_t Unused[3];
+
+ void swapBytes() {
+ // nothing to swap
+ }
+};
+
+struct HullPSVInfo {
+ uint32_t InputControlPointCount;
+ uint32_t OutputControlPointCount;
+ uint32_t TessellatorDomain;
+ uint32_t TessellatorOutputPrimitive;
+
+ void swapBytes() {
+ sys::swapByteOrder(InputControlPointCount);
+ sys::swapByteOrder(OutputControlPointCount);
+ sys::swapByteOrder(TessellatorDomain);
+ sys::swapByteOrder(TessellatorOutputPrimitive);
+ }
+};
+
+struct DomainPSVInfo {
+ uint32_t InputControlPointCount;
+ uint8_t OutputPositionPresent;
+ uint8_t Unused[3];
+ uint32_t TessellatorDomain;
+
+ void swapBytes() {
+ sys::swapByteOrder(InputControlPointCount);
+ sys::swapByteOrder(TessellatorDomain);
+ }
+};
+
+struct GeometryPSVInfo {
+ uint32_t InputPrimitive;
+ uint32_t OutputTopology;
+ uint32_t OutputStreamMask;
+ uint8_t OutputPositionPresent;
+ uint8_t Unused[3];
+
+ void swapBytes() {
+ sys::swapByteOrder(InputPrimitive);
+ sys::swapByteOrder(OutputTopology);
+ sys::swapByteOrder(OutputStreamMask);
+ }
+};
+
+struct PixelPSVInfo {
+ uint8_t DepthOutput;
+ uint8_t SampleFrequency;
+ uint8_t Unused[2];
+
+ void swapBytes() {
+ // nothing to swap
+ }
+};
+
+struct MeshPSVInfo {
+ uint32_t GroupSharedBytesUsed;
+ uint32_t GroupSharedBytesDependentOnViewID;
+ uint32_t PayloadSizeInBytes;
+ uint16_t MaxOutputVertices;
+ uint16_t MaxOutputPrimitives;
+
+ void swapBytes() {
+ sys::swapByteOrder(GroupSharedBytesUsed);
+ sys::swapByteOrder(GroupSharedBytesDependentOnViewID);
+ sys::swapByteOrder(PayloadSizeInBytes);
+ sys::swapByteOrder(MaxOutputVertices);
+ sys::swapByteOrder(MaxOutputPrimitives);
+ }
+};
+
+struct AmplificationPSVInfo {
+ uint32_t PayloadSizeInBytes;
+
+ void swapBytes() { sys::swapByteOrder(PayloadSizeInBytes); }
+};
+
+union PipelinePSVInfo {
+ VertexPSVInfo VS;
+ HullPSVInfo HS;
+ DomainPSVInfo DS;
+ GeometryPSVInfo GS;
+ PixelPSVInfo PS;
+ MeshPSVInfo MS;
+ AmplificationPSVInfo AS;
+
+ void swapBytes(Triple::EnvironmentType Stage) {
+ switch (Stage) {
+ case Triple::EnvironmentType::Pixel:
+ PS.swapBytes();
+ break;
+ case Triple::EnvironmentType::Vertex:
+ VS.swapBytes();
+ break;
+ case Triple::EnvironmentType::Geometry:
+ GS.swapBytes();
+ break;
+ case Triple::EnvironmentType::Hull:
+ HS.swapBytes();
+ break;
+ case Triple::EnvironmentType::Domain:
+ DS.swapBytes();
+ break;
+ case Triple::EnvironmentType::Mesh:
+ MS.swapBytes();
+ break;
+ case Triple::EnvironmentType::Amplification:
+ AS.swapBytes();
+ break;
+ default:
+ break;
+ }
+ }
+};
+
+static_assert(sizeof(PipelinePSVInfo) == 4 * sizeof(uint32_t),
+ "Pipeline-specific PSV info must fit in 16 bytes.");
+
+namespace PSV {
+
+namespace v0 {
+struct RuntimeInfo {
+ PipelinePSVInfo StageInfo;
+ uint32_t MinimumWaveLaneCount; // minimum lane count required, 0 if unused
+ uint32_t MaximumWaveLaneCount; // maximum lane count required,
+ // 0xffffffff if unused
+ void swapBytes() {
+ // Skip the union because we don't know which field it has
+ sys::swapByteOrder(MinimumWaveLaneCount);
+ sys::swapByteOrder(MaximumWaveLaneCount);
+ }
+
+ void swapBytes(Triple::EnvironmentType Stage) { StageInfo.swapBytes(Stage); }
+};
+
+struct ResourceBindInfo {
+ uint32_t Type;
+ uint32_t Space;
+ uint32_t LowerBound;
+ uint32_t UpperBound;
+
+ void swapBytes() {
+ sys::swapByteOrder(Type);
+ sys::swapByteOrder(Space);
+ sys::swapByteOrder(LowerBound);
+ sys::swapByteOrder(UpperBound);
+ }
+};
+
+} // namespace v0
+
+namespace v1 {
+
+struct MeshRuntimeInfo {
+ uint8_t SigPrimVectors; // Primitive output for MS
+ uint8_t MeshOutputTopology;
+};
+
+union GeometryExtraInfo {
+ uint16_t MaxVertexCount; // MaxVertexCount for GS only (max 1024)
+ uint8_t SigPatchConstOrPrimVectors; // Output for HS; Input for DS;
+ // Primitive output for MS (overlaps
+ // MeshInfo::SigPrimVectors)
+ MeshRuntimeInfo MeshInfo;
+};
+struct RuntimeInfo : public v0::RuntimeInfo {
+ uint8_t ShaderStage; // PSVShaderKind
+ uint8_t UsesViewID;
+ GeometryExtraInfo GeomData;
+
+ // PSVSignatureElement counts
+ uint8_t SigInputElements;
+ uint8_t SigOutputElements;
+ uint8_t SigPatchConstOrPrimElements;
+
+ // Number of packed vectors per signature
+ uint8_t SigInputVectors;
+ uint8_t SigOutputVectors[4];
+
+ void swapBytes() {
+ // nothing to swap since everything is single-byte or a union field
+ }
+
+ void swapBytes(Triple::EnvironmentType Stage) {
+ v0::RuntimeInfo::swapBytes(Stage);
+ if (Stage == Triple::EnvironmentType::Geometry)
+ sys::swapByteOrder(GeomData.MaxVertexCount);
+ }
+};
+
+} // namespace v1
+
+namespace v2 {
+struct RuntimeInfo : public v1::RuntimeInfo {
+ uint32_t NumThreadsX;
+ uint32_t NumThreadsY;
+ uint32_t NumThreadsZ;
+
+ void swapBytes() {
+ sys::swapByteOrder(NumThreadsX);
+ sys::swapByteOrder(NumThreadsY);
+ sys::swapByteOrder(NumThreadsZ);
+ }
+
+ void swapBytes(Triple::EnvironmentType Stage) {
+ v1::RuntimeInfo::swapBytes(Stage);
+ }
+};
+
+struct ResourceBindInfo : public v0::ResourceBindInfo {
+ uint32_t Kind;
+ uint32_t Flags;
+
+ void swapBytes() {
+ v0::ResourceBindInfo::swapBytes();
+ sys::swapByteOrder(Kind);
+ sys::swapByteOrder(Flags);
+ }
+};
+
+} // namespace v2
+} // namespace PSV
+
} // namespace dxbc
} // namespace llvm
diff --git a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def
index 7907bfcc31ea..0073abcd0703 100644
--- a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def
+++ b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def
@@ -3,6 +3,7 @@
CONTAINER_PART(DXIL)
CONTAINER_PART(SFI0)
CONTAINER_PART(HASH)
+CONTAINER_PART(PSV0)
#undef CONTAINER_PART
#endif
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def
index 1409568b8664..40d958c867de 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -13,7 +13,8 @@
// TODO: Add other DW-based macros.
#if !( \
defined HANDLE_DW_TAG || defined HANDLE_DW_AT || defined HANDLE_DW_FORM || \
- defined HANDLE_DW_OP || defined HANDLE_DW_LANG || defined HANDLE_DW_ATE || \
+ defined HANDLE_DW_OP || defined HANDLE_DW_OP_LLVM_USEROP || \
+ defined HANDLE_DW_LANG || defined HANDLE_DW_ATE || \
defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED || \
defined HANDLE_DW_CC || defined HANDLE_DW_LNS || defined HANDLE_DW_LNE || \
defined HANDLE_DW_LNCT || defined HANDLE_DW_MACRO || \
@@ -52,6 +53,10 @@
#define HANDLE_DW_OP(ID, NAME, VERSION, VENDOR)
#endif
+#ifndef HANDLE_DW_OP_LLVM_USEROP
+#define HANDLE_DW_OP_LLVM_USEROP(ID, NAME)
+#endif
+
#ifndef HANDLE_DW_LANG
#define HANDLE_DW_LANG(ID, NAME, LOWER_BOUND, VERSION, VENDOR)
#endif
@@ -874,6 +879,24 @@ HANDLE_DW_OP(0xf8, PGI_omp_thread_num, 0, PGI)
HANDLE_DW_OP(0xfb, GNU_addr_index, 0, GNU)
HANDLE_DW_OP(0xfc, GNU_const_index, 0, GNU)
+// DW_OP_LLVM_user has two operands:
+// (1) An unsigned LEB128 "LLVM Vendor Extension Opcode".
+// (2) Zero or more literal operands, the number and type of which are
+// implied by the opcode (1).
+// DW_OP_LLVM_user acts as an extension multiplexer, opening up the encoding
+// space to accommodate an infinite number of extensions. This better reflects
+// the de-facto permanent allocation of extensions.
+HANDLE_DW_OP(0xe9, LLVM_user, 0, LLVM)
+// "LLVM Vendor Extension" operations under the DW_OP_LLVM_user encoding
+// scheme. This list is authoritative and exhaustive. Once an operation is
+// registered here it cannot be removed nor have its encoding changed. The
+// encoding space must skip zero (which is reserved) and have no gaps.
+//
+// The DW_OP_LLVM_user DW_OP_LLVM_nop operation has no effect on the
+// location stack or any of its values. It is defined as a placeholder for
+// testing purposes.
+HANDLE_DW_OP_LLVM_USEROP(0x0001, nop)
+
// DWARF languages.
HANDLE_DW_LANG(0x0001, C89, 0, 2, DWARF)
HANDLE_DW_LANG(0x0002, C, 0, 2, DWARF)
@@ -925,6 +948,7 @@ HANDLE_DW_LANG(0x002c, C17, 0, 0, DWARF)
HANDLE_DW_LANG(0x002d, Fortran18, 0, 0, DWARF)
HANDLE_DW_LANG(0x002e, Ada2005, 0, 0, DWARF)
HANDLE_DW_LANG(0x002f, Ada2012, 0, 0, DWARF)
+HANDLE_DW_LANG(0x0033, Mojo, 0, 0, DWARF)
// Vendor extensions:
HANDLE_DW_LANG(0x8001, Mips_Assembler, std::nullopt, 0, MIPS)
HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript, 0, 0, GOOGLE)
@@ -1235,6 +1259,7 @@ HANDLE_DW_SECT(8, RNGLISTS)
#undef HANDLE_DW_AT
#undef HANDLE_DW_FORM
#undef HANDLE_DW_OP
+#undef HANDLE_DW_OP_LLVM_USEROP
#undef HANDLE_DW_LANG
#undef HANDLE_DW_ATE
#undef HANDLE_DW_VIRTUALITY
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h
index 60b2c77d1632..869352b35e32 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -24,7 +24,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadicDetails.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/TargetParser/Triple.h"
#include <limits>
@@ -146,6 +146,11 @@ enum LocationAtom {
DW_OP_LLVM_arg = 0x1005, ///< Only used in LLVM metadata.
};
+enum LlvmUserLocationAtom {
+#define HANDLE_DW_OP_LLVM_USEROP(ID, NAME) DW_OP_LLVM_##NAME = ID,
+#include "llvm/BinaryFormat/Dwarf.def"
+};
+
enum TypeKind : uint8_t {
#define HANDLE_DW_ATE(ID, NAME, VERSION, VENDOR) DW_ATE_##NAME = ID,
#include "llvm/BinaryFormat/Dwarf.def"
@@ -263,6 +268,7 @@ inline bool isCPlusPlus(SourceLanguage S) {
case DW_LANG_Fortran18:
case DW_LANG_Ada2005:
case DW_LANG_Ada2012:
+ case DW_LANG_Mojo:
result = false;
break;
}
@@ -329,6 +335,7 @@ inline bool isFortran(SourceLanguage S) {
case DW_LANG_C17:
case DW_LANG_Ada2005:
case DW_LANG_Ada2012:
+ case DW_LANG_Mojo:
result = false;
break;
}
@@ -393,6 +400,7 @@ inline bool isC(SourceLanguage S) {
case DW_LANG_Fortran18:
case DW_LANG_Ada2005:
case DW_LANG_Ada2012:
+ case DW_LANG_Mojo:
return false;
}
llvm_unreachable("Unknown language kind.");
@@ -631,6 +639,8 @@ StringRef ChildrenString(unsigned Children);
StringRef AttributeString(unsigned Attribute);
StringRef FormEncodingString(unsigned Encoding);
StringRef OperationEncodingString(unsigned Encoding);
+StringRef SubOperationEncodingString(unsigned OpEncoding,
+ unsigned SubOpEncoding);
StringRef AttributeEncodingString(unsigned Encoding);
StringRef DecimalSignString(unsigned Sign);
StringRef EndianityString(unsigned Endian);
@@ -674,6 +684,8 @@ StringRef RLEString(unsigned RLE);
/// @{
unsigned getTag(StringRef TagString);
unsigned getOperationEncoding(StringRef OperationEncodingString);
+unsigned getSubOperationEncoding(unsigned OpEncoding,
+ StringRef SubOperationEncodingString);
unsigned getVirtuality(StringRef VirtualityString);
unsigned getLanguage(StringRef LanguageString);
unsigned getCallingConvention(StringRef LanguageString);
diff --git a/llvm/include/llvm/BinaryFormat/DynamicTags.def b/llvm/include/llvm/BinaryFormat/DynamicTags.def
index ae25ec53813c..f393b82406b4 100644
--- a/llvm/include/llvm/BinaryFormat/DynamicTags.def
+++ b/llvm/include/llvm/BinaryFormat/DynamicTags.def
@@ -126,6 +126,11 @@ DYNAMIC_TAG(VERNEEDNUM, 0X6FFFFFFF) // The number of entries in DT_VERNEED.
AARCH64_DYNAMIC_TAG(AARCH64_BTI_PLT, 0x70000001)
AARCH64_DYNAMIC_TAG(AARCH64_PAC_PLT, 0x70000003)
AARCH64_DYNAMIC_TAG(AARCH64_VARIANT_PCS, 0x70000005)
+AARCH64_DYNAMIC_TAG(AARCH64_MEMTAG_MODE, 0x70000009)
+AARCH64_DYNAMIC_TAG(AARCH64_MEMTAG_HEAP, 0x7000000b)
+AARCH64_DYNAMIC_TAG(AARCH64_MEMTAG_STACK, 0x7000000c)
+AARCH64_DYNAMIC_TAG(AARCH64_MEMTAG_GLOBALS, 0x7000000d)
+AARCH64_DYNAMIC_TAG(AARCH64_MEMTAG_GLOBALSSZ, 0x7000000f)
// Hexagon specific dynamic table entries
HEXAGON_DYNAMIC_TAG(HEXAGON_SYMSZ, 0x70000000)
@@ -218,6 +223,8 @@ PPC_DYNAMIC_TAG(PPC_OPT, 0x70000001) // Has TLS optimization.
// PPC64 specific dynamic table entries.
PPC64_DYNAMIC_TAG(PPC64_GLINK, 0x70000000) // Address of 32 bytes before the
// first glink lazy resolver stub.
+PPC64_DYNAMIC_TAG(PPC64_OPT, 0x70000003) // Flags to control optimizations
+ // for TLS and multiple TOCs.
// RISC-V specific dynamic array tags.
RISCV_DYNAMIC_TAG(RISCV_VARIANT_CC, 0x70000001)
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 75f0c960beea..f5a7cdb387a6 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -773,15 +773,20 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040,
EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042,
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X43 = 0x043,
+ EF_AMDGPU_MACH_AMDGCN_GFX1150 = 0x043,
EF_AMDGPU_MACH_AMDGCN_GFX1103 = 0x044,
EF_AMDGPU_MACH_AMDGCN_GFX1036 = 0x045,
EF_AMDGPU_MACH_AMDGCN_GFX1101 = 0x046,
EF_AMDGPU_MACH_AMDGCN_GFX1102 = 0x047,
+ EF_AMDGPU_MACH_AMDGCN_RESERVED_0X48 = 0x048,
+ EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049,
+ EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a,
+ EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b,
+ EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1102,
+ EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX942,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
@@ -1031,6 +1036,7 @@ enum : unsigned {
SHT_LLVM_CALL_GRAPH_PROFILE = 0x6fff4c09, // LLVM Call Graph Profile.
SHT_LLVM_BB_ADDR_MAP = 0x6fff4c0a, // LLVM Basic Block Address Map.
SHT_LLVM_OFFLOADING = 0x6fff4c0b, // LLVM device offloading data.
+ SHT_LLVM_LTO = 0x6fff4c0c, // .llvm.lto for fat LTO.
// Android's experimental support for SHT_RELR sections.
// https://android.googlesource.com/platform/bionic/+/b7feec74547f84559a1467aca02708ff61346d2a/libc/include/elf.h#512
SHT_ANDROID_RELR = 0x6fffff00, // Relocation entries; only offsets.
@@ -1404,6 +1410,7 @@ enum {
PT_OPENBSD_MUTABLE = 0x65a3dbe5, // Like bss, but not immutable.
PT_OPENBSD_RANDOMIZE = 0x65a3dbe6, // Fill with random data.
PT_OPENBSD_WXNEEDED = 0x65a3dbe7, // Program does W^X violations.
+ PT_OPENBSD_NOBTCFI = 0x65a3dbe8, // Do not enforce branch target CFI.
PT_OPENBSD_BOOTDATA = 0x65a41be6, // Section for boot arguments.
// ARM program header types.
@@ -1604,6 +1611,9 @@ enum : unsigned {
NT_ARM_HW_WATCH = 0x403,
NT_ARM_SVE = 0x405,
NT_ARM_PAC_MASK = 0x406,
+ NT_ARM_SSVE = 0x40b,
+ NT_ARM_ZA = 0x40c,
+ NT_ARM_ZT = 0x40d,
NT_FILE = 0x46494c45,
NT_PRXFPREG = 0x46e62b7f,
diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/ARM.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/ARM.def
index e0709fb81813..47084d1eb0aa 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/ARM.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/ARM.def
@@ -135,6 +135,10 @@ ELF_RELOC(R_ARM_PRIVATE_15, 0x7f)
ELF_RELOC(R_ARM_ME_TOO, 0x80)
ELF_RELOC(R_ARM_THM_TLS_DESCSEQ16, 0x81)
ELF_RELOC(R_ARM_THM_TLS_DESCSEQ32, 0x82)
+ELF_RELOC(R_ARM_THM_ALU_ABS_G0_NC, 0x84)
+ELF_RELOC(R_ARM_THM_ALU_ABS_G1_NC, 0x85)
+ELF_RELOC(R_ARM_THM_ALU_ABS_G2_NC, 0x86)
+ELF_RELOC(R_ARM_THM_ALU_ABS_G3, 0x87)
ELF_RELOC(R_ARM_THM_BF16, 0x88)
ELF_RELOC(R_ARM_THM_BF12, 0x89)
ELF_RELOC(R_ARM_THM_BF18, 0x8a)
diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
index 67dbd020140b..02bce3c71712 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
@@ -103,3 +103,18 @@ ELF_RELOC(R_LARCH_TLS_GD_PC_HI20, 97)
ELF_RELOC(R_LARCH_TLS_GD_HI20, 98)
ELF_RELOC(R_LARCH_32_PCREL, 99)
ELF_RELOC(R_LARCH_RELAX, 100)
+
+// Relocs added in ELF for the LoongArch™ Architecture v20230519, part of the
+// v2.10 LoongArch ABI specs.
+//
+// Spec addition: https://github.com/loongson/la-abi-specs/pull/1
+// Binutils commit 57a930e3bfe4b2c7fd6463ed39311e1938513138
+ELF_RELOC(R_LARCH_DELETE, 101)
+ELF_RELOC(R_LARCH_ALIGN, 102)
+ELF_RELOC(R_LARCH_PCREL20_S2, 103)
+ELF_RELOC(R_LARCH_CFA, 104)
+ELF_RELOC(R_LARCH_ADD6, 105)
+ELF_RELOC(R_LARCH_SUB6, 106)
+ELF_RELOC(R_LARCH_ADD_ULEB128, 107)
+ELF_RELOC(R_LARCH_SUB_ULEB128, 108)
+ELF_RELOC(R_LARCH_64_PCREL, 109)
diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
index 454450950444..9a126df01531 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
@@ -54,3 +54,4 @@ ELF_RELOC(R_RISCV_SET16, 55)
ELF_RELOC(R_RISCV_SET32, 56)
ELF_RELOC(R_RISCV_32_PCREL, 57)
ELF_RELOC(R_RISCV_IRELATIVE, 58)
+ELF_RELOC(R_RISCV_PLT32, 59)
diff --git a/llvm/include/llvm/BinaryFormat/GOFF.h b/llvm/include/llvm/BinaryFormat/GOFF.h
index 96992414c6cc..b4ddbabdf1e4 100644
--- a/llvm/include/llvm/BinaryFormat/GOFF.h
+++ b/llvm/include/llvm/BinaryFormat/GOFF.h
@@ -9,7 +9,8 @@
// This header contains common, non-processor-specific data structures and
// constants for the GOFF file format.
//
-// GOFF specifics can be found in MVS Program Management: Advanced Facilities
+// GOFF specifics can be found in MVS Program Management: Advanced Facilities.
+//
//===----------------------------------------------------------------------===//
#ifndef LLVM_BINARYFORMAT_GOFF_H
@@ -18,14 +19,142 @@
#include "llvm/Support/DataTypes.h"
namespace llvm {
-
namespace GOFF {
+constexpr uint8_t RecordLength = 80;
+constexpr uint8_t RecordPrefixLength = 3;
+constexpr uint8_t PayloadLength = 77;
+
+// Prefix byte on every record. This indicates GOFF format.
+constexpr uint8_t PTVPrefix = 0x03;
+
+enum RecordType : uint8_t {
+ RT_ESD = 0,
+ RT_TXT = 1,
+ RT_RLD = 2,
+ RT_LEN = 3,
+ RT_END = 4,
+ RT_HDR = 15,
+};
+
+enum ESDSymbolType : uint8_t {
+ ESD_ST_SectionDefinition = 0,
+ ESD_ST_ElementDefinition = 1,
+ ESD_ST_LabelDefinition = 2,
+ ESD_ST_PartReference = 3,
+ ESD_ST_ExternalReference = 4,
+};
+
+enum ESDNameSpaceId : uint8_t {
+ ESD_NS_ProgramManagementBinder = 0,
+ ESD_NS_NormalName = 1,
+ ESD_NS_PseudoRegister = 2,
+ ESD_NS_Parts = 3
+};
+
+enum ESDReserveQwords : uint8_t {
+ ESD_RQ_0 = 0,
+ ESD_RQ_1 = 1,
+ ESD_RQ_2 = 2,
+ ESD_RQ_3 = 3
+};
+
+enum ESDAmode : uint8_t {
+ ESD_AMODE_None = 0,
+ ESD_AMODE_24 = 1,
+ ESD_AMODE_31 = 2,
+ ESD_AMODE_ANY = 3,
+ ESD_AMODE_64 = 4,
+ ESD_AMODE_MIN = 16,
+};
+
+enum ESDRmode : uint8_t {
+ ESD_RMODE_None = 0,
+ ESD_RMODE_24 = 1,
+ ESD_RMODE_31 = 3,
+ ESD_RMODE_64 = 4,
+};
+
+enum ESDTextStyle : uint8_t {
+ ESD_TS_ByteOriented = 0,
+ ESD_TS_Structured = 1,
+ ESD_TS_Unstructured = 2,
+};
+
+enum ESDBindingAlgorithm : uint8_t {
+ ESD_BA_Concatenate = 0,
+ ESD_BA_Merge = 1,
+};
+
+enum ESDTaskingBehavior : uint8_t {
+ ESD_TA_Unspecified = 0,
+ ESD_TA_NonReus = 1,
+ ESD_TA_Reus = 2,
+ ESD_TA_Rent = 3,
+};
+
+enum ESDExecutable : uint8_t {
+ ESD_EXE_Unspecified = 0,
+ ESD_EXE_DATA = 1,
+ ESD_EXE_CODE = 2,
+};
+
+enum ESDDuplicateSymbolSeverity : uint8_t {
+ ESD_DSS_NoWarning = 0,
+ ESD_DSS_Warning = 1,
+ ESD_DSS_Error = 2,
+ ESD_DSS_Reserved = 3,
+};
+
+enum ESDBindingStrength : uint8_t {
+ ESD_BST_Strong = 0,
+ ESD_BST_Weak = 1,
+};
+
+enum ESDLoadingBehavior : uint8_t {
+ ESD_LB_Initial = 0,
+ ESD_LB_Deferred = 1,
+ ESD_LB_NoLoad = 2,
+ ESD_LB_Reserved = 3,
+};
+
+enum ESDBindingScope : uint8_t {
+ ESD_BSC_Unspecified = 0,
+ ESD_BSC_Section = 1,
+ ESD_BSC_Module = 2,
+ ESD_BSC_Library = 3,
+ ESD_BSC_ImportExport = 4,
+};
+
+enum ESDLinkageType : uint8_t { ESD_LT_OS = 0, ESD_LT_XPLink = 1 };
+
+enum ESDAlignment : uint8_t {
+ ESD_ALIGN_Byte = 0,
+ ESD_ALIGN_Halfword = 1,
+ ESD_ALIGN_Fullword = 2,
+ ESD_ALIGN_Doubleword = 3,
+ ESD_ALIGN_Quadword = 4,
+ ESD_ALIGN_32byte = 5,
+ ESD_ALIGN_64byte = 6,
+ ESD_ALIGN_128byte = 7,
+ ESD_ALIGN_256byte = 8,
+ ESD_ALIGN_512byte = 9,
+ ESD_ALIGN_1024byte = 10,
+ ESD_ALIGN_2Kpage = 11,
+ ESD_ALIGN_4Kpage = 12,
+};
+
+enum ENDEntryPointRequest : uint8_t {
+ END_EPR_None = 0,
+ END_EPR_EsdidOffset = 1,
+ END_EPR_ExternalName = 2,
+ END_EPR_Reserved = 3,
+};
+
// \brief Subsections of the primary C_CODE section in the object file.
enum SubsectionKind : uint8_t {
SK_PPA1 = 2,
};
-
} // end namespace GOFF
} // end namespace llvm
diff --git a/llvm/include/llvm/BinaryFormat/MachO.def b/llvm/include/llvm/BinaryFormat/MachO.def
index 6d1ddda3bfa9..d841b42ee808 100644
--- a/llvm/include/llvm/BinaryFormat/MachO.def
+++ b/llvm/include/llvm/BinaryFormat/MachO.def
@@ -77,6 +77,7 @@ HANDLE_LOAD_COMMAND(LC_BUILD_VERSION, 0x00000032u, build_version_command)
HANDLE_LOAD_COMMAND(LC_DYLD_EXPORTS_TRIE, 0x80000033u, linkedit_data_command)
HANDLE_LOAD_COMMAND(LC_DYLD_CHAINED_FIXUPS, 0x80000034u, linkedit_data_command)
HANDLE_LOAD_COMMAND(LC_FILESET_ENTRY, 0x80000035u, fileset_entry_command)
+HANDLE_LOAD_COMMAND(LC_ATOM_INFO, 0x00000036u, linkedit_data_command)
#endif
diff --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h
index d51af31fb14f..a6d64b4c04ee 100644
--- a/llvm/include/llvm/BinaryFormat/MachO.h
+++ b/llvm/include/llvm/BinaryFormat/MachO.h
@@ -473,6 +473,8 @@ enum RelocationInfoType {
ARM64_RELOC_TLVP_LOAD_PAGEOFF12 = 9,
// Must be followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12.
ARM64_RELOC_ADDEND = 10,
+ // An authenticated pointer.
+ ARM64_RELOC_AUTHENTICATED_POINTER = 11,
// Constant values for the r_type field in an x86_64 architecture
// llvm::MachO::relocation_info or llvm::MachO::scattered_relocation_info
@@ -509,7 +511,7 @@ enum PlatformType {
};
// Values for tools enum in build_tool_version.
-enum { TOOL_CLANG = 1, TOOL_SWIFT = 2, TOOL_LD = 3 };
+enum { TOOL_CLANG = 1, TOOL_SWIFT = 2, TOOL_LD = 3, TOOL_LLD = 4 };
// Structs from <mach-o/loader.h>
@@ -1037,8 +1039,8 @@ enum {
// Values for dyld_chained_starts_in_segment::page_start.
enum {
DYLD_CHAINED_PTR_START_NONE = 0xFFFF,
- DYLD_CHAINED_PTR_START_MULTI = 0x8000,
- DYLD_CHAINED_PTR_START_LAST = 0x8000,
+ DYLD_CHAINED_PTR_START_MULTI = 0x8000, // page which has multiple starts
+ DYLD_CHAINED_PTR_START_LAST = 0x8000, // last chain_start for a given page
};
// Values for dyld_chained_starts_in_segment::pointer_format.
diff --git a/llvm/include/llvm/BinaryFormat/MinidumpConstants.def b/llvm/include/llvm/BinaryFormat/MinidumpConstants.def
index 543305feea77..5226da3e8412 100644
--- a/llvm/include/llvm/BinaryFormat/MinidumpConstants.def
+++ b/llvm/include/llvm/BinaryFormat/MinidumpConstants.def
@@ -115,6 +115,7 @@ HANDLE_MDMP_PLATFORM(0x8202, Solaris) // Solaris
HANDLE_MDMP_PLATFORM(0x8203, Android) // Android
HANDLE_MDMP_PLATFORM(0x8204, PS3) // PS3
HANDLE_MDMP_PLATFORM(0x8205, NaCl) // Native Client (NaCl)
+HANDLE_MDMP_PLATFORM(0x8206, OpenHOS) // OpenHarmony OS
HANDLE_MDMP_PROTECT(0x01, NoAccess, PAGE_NO_ACCESS)
HANDLE_MDMP_PROTECT(0x02, ReadOnly, PAGE_READ_ONLY)
diff --git a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
index 448c7a4e0034..7a181bd9bf84 100644
--- a/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
+++ b/llvm/include/llvm/BinaryFormat/MsgPackDocument.h
@@ -124,6 +124,11 @@ public:
return Raw;
}
+ MemoryBufferRef getBinary() const {
+ assert(getKind() == Type::Binary);
+ return MemoryBufferRef(Raw, "");
+ }
+
/// Get an ArrayDocNode for an array node. If Convert, convert the node to an
/// array node if necessary.
ArrayDocNode &getArray(bool Convert = false) {
@@ -201,6 +206,7 @@ public:
/// that restriction.
DocNode &operator=(const char *Val) { return *this = StringRef(Val); }
DocNode &operator=(StringRef Val);
+ DocNode &operator=(MemoryBufferRef Val);
DocNode &operator=(bool Val);
DocNode &operator=(int Val);
DocNode &operator=(unsigned Val);
@@ -368,6 +374,17 @@ public:
return getNode(StringRef(V), Copy);
}
+ /// Create a Binary node associated with this Document. If !Copy, the passed
+ /// buffer must remain valid for the lifetime of the Document.
+ DocNode getNode(MemoryBufferRef V, bool Copy = false) {
+ auto Raw = V.getBuffer();
+ if (Copy)
+ Raw = addString(Raw);
+ auto N = DocNode(&KindAndDocs[size_t(Type::Binary)]);
+ N.Raw = Raw;
+ return N;
+ }
+
/// Create an empty Map node associated with this Document.
MapDocNode getMapNode() {
auto N = DocNode(&KindAndDocs[size_t(Type::Map)]);
diff --git a/llvm/include/llvm/BinaryFormat/WasmRelocs.def b/llvm/include/llvm/BinaryFormat/WasmRelocs.def
index 2913f20dfd26..6f5a946b1c63 100644
--- a/llvm/include/llvm/BinaryFormat/WasmRelocs.def
+++ b/llvm/include/llvm/BinaryFormat/WasmRelocs.def
@@ -28,3 +28,4 @@ WASM_RELOC(R_WASM_FUNCTION_OFFSET_I64, 22)
WASM_RELOC(R_WASM_MEMORY_ADDR_LOCREL_I32, 23)
WASM_RELOC(R_WASM_TABLE_INDEX_REL_SLEB64, 24)
WASM_RELOC(R_WASM_MEMORY_ADDR_TLS_SLEB64, 25)
+WASM_RELOC(R_WASM_FUNCTION_INDEX_I32, 26)
diff --git a/llvm/include/llvm/BinaryFormat/XCOFF.h b/llvm/include/llvm/BinaryFormat/XCOFF.h
index 5774e01429d3..19d44a5ac57f 100644
--- a/llvm/include/llvm/BinaryFormat/XCOFF.h
+++ b/llvm/include/llvm/BinaryFormat/XCOFF.h
@@ -328,6 +328,7 @@ enum CFileStringType : uint8_t {
enum CFileLangId : uint8_t {
TB_C = 0, ///< C language.
+ TB_Fortran = 1, ///< Fortran language.
TB_CPLUSPLUS = 9 ///< C++ language.
};
diff --git a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
index 63ecf8513360..c7219a52f976 100644
--- a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
+++ b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h
@@ -57,29 +57,27 @@ class BitcodeAnalyzer {
unsigned NumTopBlocks = 0;
struct PerRecordStats {
- unsigned NumInstances;
- unsigned NumAbbrev;
- uint64_t TotalBits;
- PerRecordStats() : NumInstances(0), NumAbbrev(0), TotalBits(0) {}
+ unsigned NumInstances = 0;
+ unsigned NumAbbrev = 0;
+ uint64_t TotalBits = 0;
+ PerRecordStats() = default;
};
struct PerBlockIDStats {
/// NumInstances - This the number of times this block ID has been seen.
- unsigned NumInstances;
+ unsigned NumInstances = 0;
/// NumBits - The total size in bits of all of these blocks.
- uint64_t NumBits;
+ uint64_t NumBits = 0;
/// NumSubBlocks - The total number of blocks these blocks contain.
- unsigned NumSubBlocks;
+ unsigned NumSubBlocks = 0;
/// NumAbbrevs - The total number of abbreviations.
- unsigned NumAbbrevs;
+ unsigned NumAbbrevs = 0;
/// NumRecords - The total number of records these blocks contain, and the
/// number that are abbreviated.
- unsigned NumRecords, NumAbbreviatedRecords;
+ unsigned NumRecords = 0, NumAbbreviatedRecords = 0;
/// CodeFreq - Keep track of the number of times we see each code.
std::vector<PerRecordStats> CodeFreq;
- PerBlockIDStats()
- : NumInstances(0), NumBits(0), NumSubBlocks(0), NumAbbrevs(0),
- NumRecords(0), NumAbbreviatedRecords(0) {}
+ PerBlockIDStats() = default;
};
std::map<unsigned, PerBlockIDStats> BlockIDStats;
diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h
index 5f87445eff1d..8fc1dbda11ed 100644
--- a/llvm/include/llvm/Bitcode/BitcodeReader.h
+++ b/llvm/include/llvm/Bitcode/BitcodeReader.h
@@ -94,6 +94,7 @@ struct ParserCallbacks {
bool IsThinLTO;
bool HasSummary;
bool EnableSplitLTOUnit;
+ bool UnifiedLTO;
};
/// Represents a module in a bitcode file.
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 62dc01877b27..52e76356a892 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -712,6 +712,7 @@ enum AttributeKindCodes {
ATTR_KIND_FNRETTHUNK_EXTERN = 84,
ATTR_KIND_SKIP_PROFILE = 85,
ATTR_KIND_MEMORY = 86,
+ ATTR_KIND_NOFPCLASS = 87,
};
enum ComdatSelectionKindCodes {
diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h
index be7ed03deb27..ec977b546f46 100644
--- a/llvm/include/llvm/CodeGen/AccelTable.h
+++ b/llvm/include/llvm/CodeGen/AccelTable.h
@@ -14,6 +14,7 @@
#define LLVM_CODEGEN_ACCELTABLE_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
@@ -142,9 +143,6 @@ public:
std::vector<AccelTableData *> Values;
MCSymbol *Sym;
- HashData(DwarfStringPoolEntryRef Name, HashFn *Hash)
- : Name(Name), HashValue(Hash(Name.getString())) {}
-
#ifndef NDEBUG
void print(raw_ostream &OS) const;
void dump() const { print(dbgs()); }
@@ -157,19 +155,19 @@ protected:
/// Allocator for HashData and Values.
BumpPtrAllocator Allocator;
- using StringEntries = StringMap<HashData, BumpPtrAllocator &>;
+ using StringEntries = MapVector<StringRef, HashData>;
StringEntries Entries;
HashFn *Hash;
- uint32_t BucketCount;
- uint32_t UniqueHashCount;
+ uint32_t BucketCount = 0;
+ uint32_t UniqueHashCount = 0;
HashList Hashes;
BucketList Buckets;
void computeBucketCount();
- AccelTableBase(HashFn *Hash) : Entries(Allocator), Hash(Hash) {}
+ AccelTableBase(HashFn *Hash) : Hash(Hash) {}
public:
void finalize(AsmPrinter *Asm, StringRef Prefix);
@@ -207,10 +205,13 @@ void AccelTable<AccelTableDataT>::addName(DwarfStringPoolEntryRef Name,
assert(Buckets.empty() && "Already finalized!");
// If the string is in the list already then add this die to the list
// otherwise add a new one.
- auto Iter = Entries.try_emplace(Name.getString(), Name, Hash).first;
- assert(Iter->second.Name == Name);
- Iter->second.Values.push_back(
- new (Allocator) AccelTableDataT(std::forward<Types>(Args)...));
+ auto &It = Entries[Name.getString()];
+ if (It.Values.empty()) {
+ It.Name = Name;
+ It.HashValue = Hash(Name.getString());
+ }
+ It.Values.push_back(new (Allocator)
+ AccelTableDataT(std::forward<Types>(Args)...));
}
/// A base class for different implementations of Data classes for Apple
diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h
index 1a09820f80ef..1c67fe2d003d 100644
--- a/llvm/include/llvm/CodeGen/Analysis.h
+++ b/llvm/include/llvm/CodeGen/Analysis.h
@@ -64,15 +64,33 @@ inline unsigned ComputeLinearIndex(Type *Ty,
///
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
SmallVectorImpl<EVT> &ValueVTs,
- SmallVectorImpl<uint64_t> *Offsets = nullptr,
+ SmallVectorImpl<TypeSize> *Offsets,
+ TypeSize StartingOffset);
+void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
+ SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<TypeSize> *Offsets = nullptr,
uint64_t StartingOffset = 0);
+void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
+ SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *FixedOffsets,
+ uint64_t StartingOffset);
/// Variant of ComputeValueVTs that also produces the memory VTs.
void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
SmallVectorImpl<EVT> &ValueVTs,
SmallVectorImpl<EVT> *MemVTs,
- SmallVectorImpl<uint64_t> *Offsets = nullptr,
+ SmallVectorImpl<TypeSize> *Offsets,
+ TypeSize StartingOffset);
+void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
+ SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<EVT> *MemVTs,
+ SmallVectorImpl<TypeSize> *Offsets = nullptr,
uint64_t StartingOffset = 0);
+void ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, Type *Ty,
+ SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<EVT> *MemVTs,
+ SmallVectorImpl<uint64_t> *FixedOffsets,
+ uint64_t StartingOffset);
/// computeValueLLTs - Given an LLVM IR type, compute a sequence of
/// LLTs that represent all the individual underlying
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 33fda248120b..0ac497c5f8ef 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -87,7 +87,7 @@ public:
TargetMachine &TM;
/// Target Asm Printer information.
- const MCAsmInfo *MAI;
+ const MCAsmInfo *MAI = nullptr;
/// This is the context for the output file that we are streaming. This owns
/// all of the global MC-related objects for the generated translation unit.
@@ -111,7 +111,7 @@ public:
MachineLoopInfo *MLI = nullptr;
/// Optimization remark emitter.
- MachineOptimizationRemarkEmitter *ORE;
+ MachineOptimizationRemarkEmitter *ORE = nullptr;
/// The symbol for the entry in __patchable_function_entires.
MCSymbol *CurrentPatchableFunctionEntrySym = nullptr;
@@ -236,6 +236,10 @@ private:
/// split stack prologue.
bool HasNoSplitStack = false;
+ /// Raw FDOstream for outputting machine basic block frequncies if the
+ /// --mbb-profile-dump flag is set for downstream cost modelling applications
+ std::unique_ptr<raw_fd_ostream> MBBProfileDumpFileOutput;
+
protected:
explicit AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer);
@@ -445,9 +449,9 @@ public:
/// Since emitting CFI unwind information is entangled with supporting the
/// exceptions, this returns true for platforms which use CFI unwind
- /// information for debugging purpose when
+ /// information for other purposes (debugging, sanitizers, ...) when
/// `MCAsmInfo::ExceptionsType == ExceptionHandling::None`.
- bool needsCFIForDebug() const;
+ bool usesCFIWithoutEH() const;
/// Print to the current output stream assembly representations of the
/// constants in the constant pool MCP. This is used to print out constants
@@ -643,6 +647,13 @@ public:
/// Emit a long long directive and value.
void emitInt64(uint64_t Value) const;
+ /// Emit the specified signed leb128 value.
+ void emitSLEB128(int64_t Value, const char *Desc = nullptr) const;
+
+ /// Emit the specified unsigned leb128 value.
+ void emitULEB128(uint64_t Value, const char *Desc = nullptr,
+ unsigned PadTo = 0) const;
+
/// Emit something like ".long Hi-Lo" where the size in bytes of the directive
/// is specified by Size and Hi/Lo specify the labels. This implicitly uses
/// .set if it is available.
@@ -670,13 +681,6 @@ public:
// Dwarf Emission Helper Routines
//===------------------------------------------------------------------===//
- /// Emit the specified signed leb128 value.
- void emitSLEB128(int64_t Value, const char *Desc = nullptr) const;
-
- /// Emit the specified unsigned leb128 value.
- void emitULEB128(uint64_t Value, const char *Desc = nullptr,
- unsigned PadTo = 0) const;
-
/// Emit a .byte 42 directive that corresponds to an encoding. If verbose
/// assembly output is enabled, we output comments describing the encoding.
/// Desc is a string saying what the encoding is specifying (e.g. "LSDA").
@@ -862,7 +866,7 @@ private:
/// Emit llvm.ident metadata in an '.ident' directive.
void emitModuleIdents(Module &M);
/// Emit bytes for llvm.commandline metadata.
- void emitModuleCommandLines(Module &M);
+ virtual void emitModuleCommandLines(Module &M);
GCMetadataPrinter *getOrCreateGCPrinter(GCStrategy &S);
void emitGlobalAlias(Module &M, const GlobalAlias &GA);
diff --git a/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h b/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h
index 6e82b2b1c158..b740ab567b12 100644
--- a/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h
+++ b/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h
@@ -3,12 +3,12 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Pass.h"
namespace llvm {
class Function;
class Instruction;
-class Value;
class raw_ostream;
} // namespace llvm
class FunctionVarLocsBuilder;
@@ -21,7 +21,7 @@ struct VarLocInfo {
llvm::VariableID VariableID;
DIExpression *Expr = nullptr;
DebugLoc DL;
- Value *V = nullptr; // TODO: Needs to be value_s_ for variadic expressions.
+ RawLocationWrapper Values = RawLocationWrapper();
};
/// Data structure describing the variable locations in a function. Used as the
diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
index b944c6edde55..cd27fea771ba 100644
--- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
+++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h
@@ -16,15 +16,16 @@
#define LLVM_CODEGEN_BASICBLOCKSECTIONSPROFILEREADER_H
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Module.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
-
using namespace llvm;
namespace llvm {
@@ -73,8 +74,9 @@ public:
std::pair<bool, SmallVector<BBClusterInfo>>
getBBClusterInfoForFunction(StringRef FuncName) const;
- /// Read profiles of basic blocks if available here.
- void initializePass() override;
+ // Initializes the FunctionNameToDIFilename map for the current module and
+ // then reads the profile for matching functions.
+ bool doInitialization(Module &M) override;
private:
StringRef getAliasName(StringRef FuncName) const {
@@ -82,9 +84,16 @@ private:
return R == FuncAliasMap.end() ? FuncName : R->second;
}
+ // Reads the basic block sections profile for functions in this module.
+ Error ReadProfile();
+
// This contains the basic-block-sections profile.
const MemoryBuffer *MBuf = nullptr;
+ // Map from every function name in the module to its debug info filename or
+ // empty string if no debug info is available.
+ StringMap<SmallString<128>> FunctionNameToDIFilename;
+
// This encapsulates the BB cluster information for the whole program.
//
// For every function name, it contains the cluster information for (all or
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 77dd3157d070..383fdd1f4d79 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -26,6 +26,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TargetTransformInfoImpl.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -44,7 +45,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -276,14 +276,20 @@ public:
E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
}
- bool hasBranchDivergence() { return false; }
-
- bool useGPUDivergenceAnalysis() { return false; }
+ bool hasBranchDivergence(const Function *F = nullptr) { return false; }
bool isSourceOfDivergence(const Value *V) { return false; }
bool isAlwaysUniform(const Value *V) { return false; }
+ bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
+ return false;
+ }
+
+ bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
+ return true;
+ }
+
unsigned getFlatAddressSpace() {
// Return an invalid address space.
return -1;
@@ -414,9 +420,9 @@ public:
}
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
+ ArrayRef<const Value *> Operands, Type *AccessType,
TTI::TargetCostKind CostKind) {
- return BaseT::getGEPCost(PointeeType, Ptr, Operands, CostKind);
+ return BaseT::getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
@@ -530,10 +536,13 @@ public:
return TargetTransformInfo::TCC_Expensive;
}
- unsigned getInliningThresholdMultiplier() { return 1; }
+ unsigned getInliningThresholdMultiplier() const { return 1; }
unsigned adjustInliningThreshold(const CallBase *CB) { return 0; }
+ unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
+ return 0;
+ }
- int getInlinerVectorBonusPercent() { return 150; }
+ int getInlinerVectorBonusPercent() const { return 150; }
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
@@ -622,16 +631,13 @@ public:
return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
}
- bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
- AssumptionCache &AC, TargetLibraryInfo *TLI,
- DominatorTree *DT,
- LoopVectorizationLegality *LVL,
- InterleavedAccessInfo *IAI) {
- return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
+ bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) {
+ return BaseT::preferPredicateOverEpilogue(TFI);
}
- PredicationStyle emitGetActiveLaneMask() {
- return BaseT::emitGetActiveLaneMask();
+ TailFoldingStyle
+ getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) {
+ return BaseT::getPreferredTailFoldingStyle(IVUpdateMayOverflow);
}
std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
@@ -713,6 +719,7 @@ public:
std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
+ bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
/// are set if the demanded result elements need to be inserted and/or
@@ -844,7 +851,7 @@ public:
}
}
- unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
+ unsigned getMaxInterleaveFactor(ElementCount VF) { return 1; }
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
@@ -1508,11 +1515,11 @@ public:
// SelectionDAGBuilder.
APInt Exponent = RHSC->getValue().abs();
unsigned ActiveBits = Exponent.getActiveBits();
- unsigned PopCount = Exponent.countPopulation();
+ unsigned PopCount = Exponent.popcount();
InstructionCost Cost = (ActiveBits + PopCount - 2) *
thisT()->getArithmeticInstrCost(
Instruction::FMul, RetTy, CostKind);
- if (RHSC->getSExtValue() < 0)
+ if (RHSC->isNegative())
Cost += thisT()->getArithmeticInstrCost(Instruction::FDiv, RetTy,
CostKind);
return Cost;
@@ -1595,6 +1602,8 @@ public:
case Intrinsic::vector_reduce_smin:
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fmaximum:
+ case Intrinsic::vector_reduce_fminimum:
case Intrinsic::vector_reduce_umax:
case Intrinsic::vector_reduce_umin: {
IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1);
@@ -1883,17 +1892,29 @@ public:
return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy,
FMF, CostKind);
case Intrinsic::vector_reduce_smax:
+ return thisT()->getMinMaxReductionCost(Intrinsic::smax, VecOpTy,
+ ICA.getFlags(), CostKind);
case Intrinsic::vector_reduce_smin:
- case Intrinsic::vector_reduce_fmax:
- case Intrinsic::vector_reduce_fmin:
- return thisT()->getMinMaxReductionCost(
- VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
- /*IsUnsigned=*/false, CostKind);
+ return thisT()->getMinMaxReductionCost(Intrinsic::smin, VecOpTy,
+ ICA.getFlags(), CostKind);
case Intrinsic::vector_reduce_umax:
+ return thisT()->getMinMaxReductionCost(Intrinsic::umax, VecOpTy,
+ ICA.getFlags(), CostKind);
case Intrinsic::vector_reduce_umin:
- return thisT()->getMinMaxReductionCost(
- VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
- /*IsUnsigned=*/true, CostKind);
+ return thisT()->getMinMaxReductionCost(Intrinsic::umin, VecOpTy,
+ ICA.getFlags(), CostKind);
+ case Intrinsic::vector_reduce_fmax:
+ return thisT()->getMinMaxReductionCost(Intrinsic::maxnum, VecOpTy,
+ ICA.getFlags(), CostKind);
+ case Intrinsic::vector_reduce_fmin:
+ return thisT()->getMinMaxReductionCost(Intrinsic::minnum, VecOpTy,
+ ICA.getFlags(), CostKind);
+ case Intrinsic::vector_reduce_fmaximum:
+ return thisT()->getMinMaxReductionCost(Intrinsic::maximum, VecOpTy,
+ ICA.getFlags(), CostKind);
+ case Intrinsic::vector_reduce_fminimum:
+ return thisT()->getMinMaxReductionCost(Intrinsic::minimum, VecOpTy,
+ ICA.getFlags(), CostKind);
case Intrinsic::abs: {
// abs(X) = select(icmp(X,0),X,sub(0,X))
Type *CondTy = RetTy->getWithNewBitWidth(1);
@@ -2333,6 +2354,7 @@ public:
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) {
+ assert(Ty && "Unknown reduction vector type");
if (TTI::requiresOrderedReduction(FMF))
return getOrderedReductionCost(Opcode, Ty, CostKind);
return getTreeReductionCost(Opcode, Ty, CostKind);
@@ -2340,8 +2362,8 @@ public:
/// Try to calculate op costs for min/max reduction operations.
/// \param CondTy Conditional type for the Select instruction.
- InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned,
+ InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
// Targets must implement a default value for the scalable case, since
// we don't know how many lanes the vector has.
@@ -2349,17 +2371,8 @@ public:
return InstructionCost::getInvalid();
Type *ScalarTy = Ty->getElementType();
- Type *ScalarCondTy = CondTy->getElementType();
unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
unsigned NumReduxLevels = Log2_32(NumVecElts);
- unsigned CmpOpcode;
- if (Ty->isFPOrFPVectorTy()) {
- CmpOpcode = Instruction::FCmp;
- } else {
- assert(Ty->isIntOrIntVectorTy() &&
- "expecting floating point or integer type for min/max reduction");
- CmpOpcode = Instruction::ICmp;
- }
InstructionCost MinMaxCost = 0;
InstructionCost ShuffleCost = 0;
std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
@@ -2369,16 +2382,13 @@ public:
while (NumVecElts > MVTLen) {
NumVecElts /= 2;
auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
- CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts);
ShuffleCost +=
thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt,
CostKind, NumVecElts, SubTy);
- MinMaxCost +=
- thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind) +
- thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
+
+ IntrinsicCostAttributes Attrs(IID, SubTy, {SubTy, SubTy}, FMF);
+ MinMaxCost += getIntrinsicInstrCost(Attrs, CostKind);
Ty = SubTy;
++LongVectorCount;
}
@@ -2392,12 +2402,8 @@ public:
ShuffleCost +=
NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
std::nullopt, CostKind, 0, Ty);
- MinMaxCost +=
- NumReduxLevels *
- (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind) +
- thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind));
+ IntrinsicCostAttributes Attrs(IID, Ty, {Ty, Ty}, FMF);
+ MinMaxCost += NumReduxLevels * getIntrinsicInstrCost(Attrs, CostKind);
// The last min/max should be in vector registers and we counted it above.
// So just need a single extractelement.
return ShuffleCost + MinMaxCost +
@@ -2407,7 +2413,7 @@ public:
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
Type *ResTy, VectorType *Ty,
- std::optional<FastMathFlags> FMF,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
// Without any native support, this is equivalent to the cost of
// vecreduce.opcode(ext(Ty A)).
diff --git a/llvm/include/llvm/CodeGen/ByteProvider.h b/llvm/include/llvm/CodeGen/ByteProvider.h
new file mode 100644
index 000000000000..3187b4e68c56
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/ByteProvider.h
@@ -0,0 +1,89 @@
+//===-- include/llvm/CodeGen/ByteProvider.h - Map bytes ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// This file implements ByteProvider. The purpose of ByteProvider is to provide
+// a map between a target node's byte (byte position is DestOffset) and the
+// source (and byte position) that provides it (in Src and SrcOffset
+// respectively) See CodeGen/SelectionDAG/DAGCombiner.cpp MatchLoadCombine
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_BYTEPROVIDER_H
+#define LLVM_CODEGEN_BYTEPROVIDER_H
+
+#include <optional>
+#include <type_traits>
+
+namespace llvm {
+
+/// Represents known origin of an individual byte in combine pattern. The
+/// value of the byte is either constant zero, or comes from memory /
+/// some other productive instruction (e.g. arithmetic instructions).
+/// Bit manipulation instructions like shifts are not ByteProviders, rather
+/// are used to extract Bytes.
+template <typename ISelOp> class ByteProvider {
+private:
+ ByteProvider(std::optional<ISelOp> Src, int64_t DestOffset, int64_t SrcOffset)
+ : Src(Src), DestOffset(DestOffset), SrcOffset(SrcOffset) {}
+
+ // TODO -- use constraint in c++20
+ // Does this type correspond with an operation in selection DAG
+ template <typename T> class is_op {
+ private:
+ using yes = std::true_type;
+ using no = std::false_type;
+
+ // Only allow classes with member function getOpcode
+ template <typename U>
+ static auto test(int) -> decltype(std::declval<U>().getOpcode(), yes());
+
+ template <typename> static no test(...);
+
+ public:
+ using remove_pointer_t = typename std::remove_pointer<T>::type;
+ static constexpr bool value =
+ std::is_same<decltype(test<remove_pointer_t>(0)), yes>::value;
+ };
+
+public:
+ // For constant zero providers Src is set to nullopt. For actual providers
+ // Src represents the node which originally produced the relevant bits.
+ std::optional<ISelOp> Src = std::nullopt;
+ // DestOffset is the offset of the byte in the dest we are trying to map for.
+ int64_t DestOffset = 0;
+ // SrcOffset is the offset in the ultimate source node that maps to the
+ // DestOffset
+ int64_t SrcOffset = 0;
+
+ ByteProvider() = default;
+
+ static ByteProvider getSrc(std::optional<ISelOp> Val, int64_t ByteOffset,
+ int64_t VectorOffset) {
+ static_assert(is_op<ISelOp>().value,
+ "ByteProviders must contain an operation in selection DAG.");
+ return ByteProvider(Val, ByteOffset, VectorOffset);
+ }
+
+ static ByteProvider getConstantZero() {
+ return ByteProvider<ISelOp>(std::nullopt, 0, 0);
+ }
+ bool isConstantZero() const { return !Src; }
+
+ bool hasSrc() const { return Src.has_value(); }
+
+ bool hasSameSrc(const ByteProvider &Other) const { return Other.Src == Src; }
+
+ bool operator==(const ByteProvider &Other) const {
+ return Other.Src == Src && Other.DestOffset == DestOffset &&
+ Other.SrcOffset == SrcOffset;
+ }
+};
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_BYTEPROVIDER_H
diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h
index 005cfd269e3c..cb88482b9415 100644
--- a/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -19,6 +19,8 @@
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/Support/Alignment.h"
+#include <variant>
+#include <vector>
namespace llvm {
@@ -91,14 +93,14 @@ public:
return getReg(ValNo, ValVT, RegNo, LocVT, HTP, /*IsCustom=*/true);
}
- static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset,
+ static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset,
MVT LocVT, LocInfo HTP, bool IsCustom = false) {
CCValAssign Ret(HTP, ValNo, ValVT, LocVT, IsCustom);
- Ret.Data = int64_t(Offset);
+ Ret.Data = Offset;
return Ret;
}
- static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset,
+ static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset,
MVT LocVT, LocInfo HTP) {
return getMem(ValNo, ValVT, Offset, LocVT, HTP, /*IsCustom=*/true);
}
@@ -112,7 +114,7 @@ public:
void convertToReg(unsigned RegNo) { Data = Register(RegNo); }
- void convertToMem(unsigned Offset) { Data = int64_t(Offset); }
+ void convertToMem(int64_t Offset) { Data = Offset; }
unsigned getValNo() const { return ValNo; }
MVT getValVT() const { return ValVT; }
@@ -124,7 +126,7 @@ public:
bool needsCustom() const { return isCustom; }
Register getLocReg() const { return std::get<Register>(Data); }
- unsigned getLocMemOffset() const { return std::get<int64_t>(Data); }
+ int64_t getLocMemOffset() const { return std::get<int64_t>(Data); }
unsigned getExtraInfo() const { return std::get<unsigned>(Data); }
MVT getLocVT() const { return LocVT; }
@@ -174,8 +176,10 @@ private:
const TargetRegisterInfo &TRI;
SmallVectorImpl<CCValAssign> &Locs;
LLVMContext &Context;
+ // True if arguments should be allocated at negative offsets.
+ bool NegativeOffsets;
- unsigned StackOffset;
+ uint64_t StackSize;
Align MaxStackArgAlign;
SmallVector<uint32_t, 16> UsedRegs;
SmallVector<CCValAssign, 4> PendingLocs;
@@ -224,8 +228,9 @@ private:
unsigned InRegsParamsProcessed;
public:
- CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
- SmallVectorImpl<CCValAssign> &locs, LLVMContext &C);
+ CCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
+ SmallVectorImpl<CCValAssign> &Locs, LLVMContext &Context,
+ bool NegativeOffsets = false);
void addLoc(const CCValAssign &V) {
Locs.push_back(V);
@@ -236,17 +241,14 @@ public:
CallingConv::ID getCallingConv() const { return CallingConv; }
bool isVarArg() const { return IsVarArg; }
- /// getNextStackOffset - Return the next stack offset such that all stack
- /// slots satisfy their alignment requirements.
- unsigned getNextStackOffset() const {
- return StackOffset;
- }
+ /// Returns the size of the currently allocated portion of the stack.
+ uint64_t getStackSize() const { return StackSize; }
/// getAlignedCallFrameSize - Return the size of the call frame needed to
/// be able to store all arguments and such that the alignment requirement
/// of each of the arguments is satisfied.
- unsigned getAlignedCallFrameSize() const {
- return alignTo(StackOffset, MaxStackArgAlign);
+ uint64_t getAlignedCallFrameSize() const {
+ return alignTo(StackSize, MaxStackArgAlign);
}
/// isAllocated - Return true if the specified register (or an alias) is
@@ -399,21 +401,26 @@ public:
/// AllocateStack - Allocate a chunk of stack space with the specified size
/// and alignment.
- unsigned AllocateStack(unsigned Size, Align Alignment) {
- StackOffset = alignTo(StackOffset, Alignment);
- unsigned Result = StackOffset;
- StackOffset += Size;
+ int64_t AllocateStack(unsigned Size, Align Alignment) {
+ int64_t Offset;
+ if (NegativeOffsets) {
+ StackSize = alignTo(StackSize + Size, Alignment);
+ Offset = -StackSize;
+ } else {
+ Offset = alignTo(StackSize, Alignment);
+ StackSize = Offset + Size;
+ }
MaxStackArgAlign = std::max(Alignment, MaxStackArgAlign);
ensureMaxAlignment(Alignment);
- return Result;
+ return Offset;
}
void ensureMaxAlignment(Align Alignment);
/// Version of AllocateStack with list of extra registers to be shadowed.
/// Note that, unlike AllocateReg, this shadows ALL of the shadow registers.
- unsigned AllocateStack(unsigned Size, Align Alignment,
- ArrayRef<MCPhysReg> ShadowRegs) {
+ int64_t AllocateStack(unsigned Size, Align Alignment,
+ ArrayRef<MCPhysReg> ShadowRegs) {
for (MCPhysReg Reg : ShadowRegs)
MarkAllocated(Reg);
return AllocateStack(Size, Alignment);
diff --git a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
index 3b11c840256d..90ef890f22d1 100644
--- a/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
+++ b/llvm/include/llvm/CodeGen/CodeGenCommonISel.h
@@ -19,6 +19,8 @@
namespace llvm {
class BasicBlock;
+enum FPClassTest : unsigned;
+
/// Encapsulates all of the information needed to generate a stack protector
/// check, and signals to isel when initialized that one needs to be generated.
///
@@ -212,13 +214,14 @@ private:
MachineBasicBlock::iterator
findSplitPointForStackProtector(MachineBasicBlock *BB,
const TargetInstrInfo &TII);
-/// Evaluates if the specified FP class test is an inversion of a simpler test.
-/// An example is the test "inf|normal|subnormal|zero", which is an inversion
-/// of "nan".
+
+/// Evaluates if the specified FP class test is better performed as the inverse
+/// (i.e. fewer instructions should be required to lower it). An example is the
+/// test "inf|normal|subnormal|zero", which is an inversion of "nan".
/// \param Test The test as specified in 'is_fpclass' intrinsic invocation.
-/// \returns The inverted test, or zero, if inversion does not produce simpler
-/// test.
-unsigned getInvertedFPClassTest(unsigned Test);
+/// \returns The inverted test, or fcNone, if inversion does not produce a
+/// simpler test.
+FPClassTest invertFPClassTestIfSimpler(FPClassTest Test);
/// Assuming the instruction \p MI is going to be deleted, attempt to salvage
/// debug users of \p MI by writing the effect of \p MI in a DIExpression.
diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
index 9ac51ed9f6fa..ab1219328a5d 100644
--- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -714,6 +714,7 @@ template <typename Derived>
void CodeGenPassBuilder<Derived>::addISelPrepare(AddIRPass &addPass) const {
derived().addPreISel(addPass);
+ addPass(CallBrPrepare());
// Add both the safe stack and the stack protection passes: each of them will
// only protect functions that have corresponding attributes.
addPass(SafeStackPass());
diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h
index 7b1ef60912f1..fa10ddd4447d 100644
--- a/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -94,7 +94,7 @@ std::string getTrapFuncName();
bool getUseCtors();
-bool getLowerGlobalDtorsViaCxaAtExit();
+bool getDisableIntegratedAS();
bool getRelaxELFRelocations();
@@ -113,6 +113,7 @@ std::string getBBSections();
unsigned getTLSSize();
bool getEmulatedTLS();
+std::optional<bool> getExplicitEmulatedTLS();
bool getUniqueSectionNames();
@@ -137,7 +138,7 @@ std::optional<bool> getExplicitValueTrackingVariableLocations();
bool getForceDwarfFrameSection();
-bool getXRayOmitFunctionIndex();
+bool getXRayFunctionIndex();
bool getDebugStrictDwarf();
@@ -145,6 +146,8 @@ unsigned getAlignLoops();
bool getJMCInstrument();
+bool getXCOFFReadOnlyPointers();
+
/// Create this object with static storage to register codegen-related command
/// line options.
struct RegisterCodeGenFlags {
diff --git a/llvm/include/llvm/CodeGen/ComplexDeinterleavingPass.h b/llvm/include/llvm/CodeGen/ComplexDeinterleavingPass.h
index 99df6e5ad1d7..84a2673fecb5 100644
--- a/llvm/include/llvm/CodeGen/ComplexDeinterleavingPass.h
+++ b/llvm/include/llvm/CodeGen/ComplexDeinterleavingPass.h
@@ -15,7 +15,6 @@
#define LLVM_CODEGEN_COMPLEXDEINTERLEAVING_H
#include "llvm/IR/PassManager.h"
-#include "llvm/IR/PatternMatch.h"
namespace llvm {
@@ -38,7 +37,12 @@ enum class ComplexDeinterleavingOperation {
CMulPartial,
// The following 'operations' are used to represent internal states. Backends
// are not expected to try and support these in any capacity.
- Shuffle
+ Deinterleave,
+ Splat,
+ Symmetric,
+ ReductionPHI,
+ ReductionOperation,
+ ReductionSelect,
};
enum class ComplexDeinterleavingRotation {
diff --git a/llvm/include/llvm/CodeGen/CostTable.h b/llvm/include/llvm/CodeGen/CostTable.h
index d23f79d94a7a..ca0cbdda3b11 100644
--- a/llvm/include/llvm/CodeGen/CostTable.h
+++ b/llvm/include/llvm/CodeGen/CostTable.h
@@ -16,7 +16,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/MachineValueType.h"
+#include "llvm/CodeGen/MachineValueType.h"
namespace llvm {
diff --git a/llvm/include/llvm/CodeGen/DFAPacketizer.h b/llvm/include/llvm/CodeGen/DFAPacketizer.h
index aba6503a6a1f..a7a2dfdf0950 100644
--- a/llvm/include/llvm/CodeGen/DFAPacketizer.h
+++ b/llvm/include/llvm/CodeGen/DFAPacketizer.h
@@ -26,6 +26,8 @@
#define LLVM_CODEGEN_DFAPACKETIZER_H
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/Support/Automaton.h"
#include <cstdint>
#include <map>
@@ -35,7 +37,6 @@
namespace llvm {
-class DefaultVLIWScheduler;
class ScheduleDAGMutation;
class InstrItineraryData;
class MachineFunction;
@@ -45,6 +46,30 @@ class MCInstrDesc;
class SUnit;
class TargetInstrInfo;
+// This class extends ScheduleDAGInstrs and overrides the schedule method
+// to build the dependence graph.
+class DefaultVLIWScheduler : public ScheduleDAGInstrs {
+private:
+ AAResults *AA;
+ /// Ordered list of DAG postprocessing steps.
+ std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
+
+public:
+ DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
+ AAResults *AA);
+
+ // Actual scheduling work.
+ void schedule() override;
+
+ /// DefaultVLIWScheduler takes ownership of the Mutation object.
+ void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
+ Mutations.push_back(std::move(Mutation));
+ }
+
+protected:
+ void postProcessDAG();
+};
+
class DFAPacketizer {
private:
const InstrItineraryData *InstrItins;
@@ -127,7 +152,8 @@ public:
// The AAResults parameter can be nullptr.
VLIWPacketizerList(MachineFunction &MF, MachineLoopInfo &MLI,
AAResults *AA);
-
+ VLIWPacketizerList &operator=(const VLIWPacketizerList &other) = delete;
+ VLIWPacketizerList(const VLIWPacketizerList &other) = delete;
virtual ~VLIWPacketizerList();
// Implement this API in the backend to bundle instructions.
diff --git a/llvm/include/llvm/CodeGen/DIE.h b/llvm/include/llvm/CodeGen/DIE.h
index 7f7372630dbe..952c38cd22db 100644
--- a/llvm/include/llvm/CodeGen/DIE.h
+++ b/llvm/include/llvm/CodeGen/DIE.h
@@ -115,6 +115,11 @@ public:
Data.push_back(DIEAbbrevData(Attribute, Value));
}
+ /// Adds another set of attribute information to the abbreviation.
+ void AddAttribute(const DIEAbbrevData &AbbrevData) {
+ Data.push_back(AbbrevData);
+ }
+
/// Used to gather unique data for the abbreviation folding set.
void Profile(FoldingSetNodeID &ID) const;
@@ -459,6 +464,8 @@ public:
}
DIEValue &operator=(const DIEValue &X) {
+ if (this == &X)
+ return *this;
destroyVal();
Ty = X.Ty;
Attribute = X.Attribute;
@@ -559,6 +566,7 @@ public:
void push_back(T &N) { IntrusiveBackListBase::push_back(N); }
void push_front(T &N) { IntrusiveBackListBase::push_front(N); }
+
T &back() { return *static_cast<T *>(Last); }
const T &back() const { return *static_cast<T *>(Last); }
T &front() {
@@ -587,6 +595,25 @@ public:
Other.Last = nullptr;
}
+ bool deleteNode(T &N) {
+ if (Last == &N) {
+ Last = Last->Next.getPointer();
+ Last->Next.setInt(true);
+ return true;
+ }
+
+ Node *cur = Last;
+ while (cur && cur->Next.getPointer()) {
+ if (cur->Next.getPointer() == &N) {
+ cur->Next.setPointer(cur->Next.getPointer()->Next.getPointer());
+ return true;
+ }
+ cur = cur->Next.getPointer();
+ }
+
+ return false;
+ }
+
class const_iterator;
class iterator
: public iterator_facade_base<iterator, std::forward_iterator_tag, T> {
@@ -716,10 +743,63 @@ public:
}
template <class T>
value_iterator addValue(BumpPtrAllocator &Alloc, dwarf::Attribute Attribute,
- dwarf::Form Form, T &&Value) {
+ dwarf::Form Form, T &&Value) {
return addValue(Alloc, DIEValue(Attribute, Form, std::forward<T>(Value)));
}
+ /* zr33: add method here */
+ template <class T>
+ bool replaceValue(BumpPtrAllocator &Alloc, dwarf::Attribute Attribute,
+ dwarf::Attribute NewAttribute, dwarf::Form Form,
+ T &&NewValue) {
+ for (llvm::DIEValue &val : values()) {
+ if (val.getAttribute() == Attribute) {
+ val = *new (Alloc)
+ DIEValue(NewAttribute, Form, std::forward<T>(NewValue));
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ template <class T>
+ bool replaceValue(BumpPtrAllocator &Alloc, dwarf::Attribute Attribute,
+ dwarf::Form Form, T &&NewValue) {
+ for (llvm::DIEValue &val : values()) {
+ if (val.getAttribute() == Attribute) {
+ val = *new (Alloc) DIEValue(Attribute, Form, std::forward<T>(NewValue));
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ bool replaceValue(BumpPtrAllocator &Alloc, dwarf::Attribute Attribute,
+ dwarf::Form Form, DIEValue &NewValue) {
+ for (llvm::DIEValue &val : values()) {
+ if (val.getAttribute() == Attribute) {
+ val = NewValue;
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ bool deleteValue(dwarf::Attribute Attribute) {
+
+ for (auto &node : List) {
+ if (node.V.getAttribute() == Attribute) {
+ return List.deleteNode(node);
+ }
+ }
+
+ return false;
+ }
+ /* end */
+
/// Take ownership of the nodes in \p Other, and append them to the back of
/// the list.
void takeValues(DIEValueList &Other) { List.takeNodes(Other.List); }
diff --git a/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h b/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
index 0cfe04af6f9e..7708df725180 100644
--- a/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
+++ b/llvm/include/llvm/CodeGen/DbgEntityHistoryCalculator.h
@@ -122,7 +122,7 @@ public:
EntriesMap::const_iterator end() const { return VarEntries.end(); }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- LLVM_DUMP_METHOD void dump() const;
+ LLVM_DUMP_METHOD void dump(StringRef FuncName) const;
#endif
};
diff --git a/llvm/include/llvm/CodeGen/DebugHandlerBase.h b/llvm/include/llvm/CodeGen/DebugHandlerBase.h
index 1b7355f5d9fb..af25f2544da7 100644
--- a/llvm/include/llvm/CodeGen/DebugHandlerBase.h
+++ b/llvm/include/llvm/CodeGen/DebugHandlerBase.h
@@ -55,10 +55,10 @@ protected:
DebugHandlerBase(AsmPrinter *A);
/// Target of debug info emission.
- AsmPrinter *Asm;
+ AsmPrinter *Asm = nullptr;
/// Collected machine module information.
- MachineModuleInfo *MMI;
+ MachineModuleInfo *MMI = nullptr;
/// Previous instruction's location information. This is used to
/// determine label location to indicate scope boundaries in debug info.
@@ -73,7 +73,7 @@ protected:
DebugLoc PrologEndLoc;
/// This block includes epilogue instructions.
- const MachineBasicBlock *EpilogBeginBlock;
+ const MachineBasicBlock *EpilogBeginBlock = nullptr;
/// If nonnull, stores the current machine instruction we're processing.
const MachineInstr *CurMI = nullptr;
diff --git a/llvm/include/llvm/CodeGen/DetectDeadLanes.h b/llvm/include/llvm/CodeGen/DetectDeadLanes.h
new file mode 100644
index 000000000000..93c7582dce09
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/DetectDeadLanes.h
@@ -0,0 +1,119 @@
+//===- DetectDeadLanes.h - SubRegister Lane Usage Analysis --*- C++ -*-----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Analysis that tracks defined/used subregister lanes across COPY instructions
+/// and instructions that get lowered to a COPY (PHI, REG_SEQUENCE,
+/// INSERT_SUBREG, EXTRACT_SUBREG).
+/// The information is used to detect dead definitions and the usage of
+/// (completely) undefined values and mark the operands as such.
+/// This pass is necessary because the dead/undef status is not obvious anymore
+/// when subregisters are involved.
+///
+/// Example:
+/// %0 = some definition
+/// %1 = IMPLICIT_DEF
+/// %2 = REG_SEQUENCE %0, sub0, %1, sub1
+/// %3 = EXTRACT_SUBREG %2, sub1
+/// = use %3
+/// The %0 definition is dead and %3 contains an undefined value.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_DETECTDEADLANES_H
+#define LLVM_CODEGEN_DETECTDEADLANES_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/MC/LaneBitmask.h"
+#include <deque>
+
+namespace llvm {
+
+class MachineInstr;
+class MachineOperand;
+class MachineRegisterInfo;
+class TargetRegisterInfo;
+
+class DeadLaneDetector {
+public:
+ /// Contains a bitmask of which lanes of a given virtual register are
+ /// defined and which ones are actually used.
+ struct VRegInfo {
+ LaneBitmask UsedLanes;
+ LaneBitmask DefinedLanes;
+ };
+
+ DeadLaneDetector(const MachineRegisterInfo *MRI,
+ const TargetRegisterInfo *TRI);
+
+ /// Update the \p DefinedLanes and the \p UsedLanes for all virtual registers.
+ void computeSubRegisterLaneBitInfo();
+
+ const VRegInfo &getVRegInfo(unsigned RegIdx) const {
+ return VRegInfos[RegIdx];
+ }
+
+ bool isDefinedByCopy(unsigned RegIdx) const {
+ return DefinedByCopy.test(RegIdx);
+ }
+
+private:
+ /// Add used lane bits on the register used by operand \p MO. This translates
+ /// the bitmask based on the operands subregister, and puts the register into
+ /// the worklist if any new bits were added.
+ void addUsedLanesOnOperand(const MachineOperand &MO, LaneBitmask UsedLanes);
+
+ /// Given a bitmask \p UsedLanes for the used lanes on a def output of a
+ /// COPY-like instruction determine the lanes used on the use operands
+ /// and call addUsedLanesOnOperand() for them.
+ void transferUsedLanesStep(const MachineInstr &MI, LaneBitmask UsedLanes);
+
+ /// Given a use regiser operand \p Use and a mask of defined lanes, check
+ /// if the operand belongs to a lowersToCopies() instruction, transfer the
+ /// mask to the def and put the instruction into the worklist.
+ void transferDefinedLanesStep(const MachineOperand &Use,
+ LaneBitmask DefinedLanes);
+
+public:
+ /// Given a mask \p DefinedLanes of lanes defined at operand \p OpNum
+ /// of COPY-like instruction, determine which lanes are defined at the output
+ /// operand \p Def.
+ LaneBitmask transferDefinedLanes(const MachineOperand &Def, unsigned OpNum,
+ LaneBitmask DefinedLanes) const;
+
+ /// Given a mask \p UsedLanes used from the output of instruction \p MI
+ /// determine which lanes are used from operand \p MO of this instruction.
+ LaneBitmask transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes,
+ const MachineOperand &MO) const;
+
+private:
+ LaneBitmask determineInitialDefinedLanes(unsigned Reg);
+ LaneBitmask determineInitialUsedLanes(unsigned Reg);
+
+ const MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI;
+
+ void PutInWorklist(unsigned RegIdx) {
+ if (WorklistMembers.test(RegIdx))
+ return;
+ WorklistMembers.set(RegIdx);
+ Worklist.push_back(RegIdx);
+ }
+
+ std::unique_ptr<VRegInfo[]> VRegInfos;
+ /// Worklist containing virtreg indexes.
+ std::deque<unsigned> Worklist;
+ BitVector WorklistMembers;
+ /// This bitvector is set for each vreg index where the vreg is defined
+ /// by an instruction where lowersToCopies()==true.
+ BitVector DefinedByCopy;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_DETECTDEADLANES_H
diff --git a/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h b/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
index f19d321793e9..7822ebf2eb09 100644
--- a/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
+++ b/llvm/include/llvm/CodeGen/DwarfStringPoolEntry.h
@@ -63,7 +63,7 @@ public:
/// thus specified entry mustn`t be reallocated.
DwarfStringPoolEntryRef(const StringMapEntry<DwarfStringPoolEntry *> &Entry)
: MapEntry(&Entry) {
- assert(MapEntry.get<ByPtrStringEntryPtr>()->second != nullptr);
+ assert(cast<ByPtrStringEntryPtr>(MapEntry)->second != nullptr);
}
explicit operator bool() const { return !MapEntry.isNull(); }
@@ -85,18 +85,18 @@ public:
/// \returns string.
StringRef getString() const {
- if (MapEntry.is<ByValStringEntryPtr>())
- return MapEntry.get<ByValStringEntryPtr>()->first();
+ if (isa<ByValStringEntryPtr>(MapEntry))
+ return cast<ByValStringEntryPtr>(MapEntry)->first();
- return MapEntry.get<ByPtrStringEntryPtr>()->first();
+ return cast<ByPtrStringEntryPtr>(MapEntry)->first();
}
/// \returns the entire string pool entry for convenience.
const DwarfStringPoolEntry &getEntry() const {
- if (MapEntry.is<ByValStringEntryPtr>())
- return MapEntry.get<ByValStringEntryPtr>()->second;
+ if (isa<ByValStringEntryPtr>(MapEntry))
+ return cast<ByValStringEntryPtr>(MapEntry)->second;
- return *MapEntry.get<ByPtrStringEntryPtr>()->second;
+ return *cast<ByPtrStringEntryPtr>(MapEntry)->second;
}
bool operator==(const DwarfStringPoolEntryRef &X) const {
diff --git a/llvm/include/llvm/CodeGen/EdgeBundles.h b/llvm/include/llvm/CodeGen/EdgeBundles.h
index b6187fc6dcef..b844bd307c19 100644
--- a/llvm/include/llvm/CodeGen/EdgeBundles.h
+++ b/llvm/include/llvm/CodeGen/EdgeBundles.h
@@ -22,7 +22,7 @@
namespace llvm {
class EdgeBundles : public MachineFunctionPass {
- const MachineFunction *MF;
+ const MachineFunction *MF = nullptr;
/// EC - Each edge bundle is an equivalence class. The keys are:
/// 2*BB->getNumber() -> Ingoing bundle.
diff --git a/llvm/include/llvm/CodeGen/ExecutionDomainFix.h b/llvm/include/llvm/CodeGen/ExecutionDomainFix.h
index c87d4f993e77..4e2b171c73cc 100644
--- a/llvm/include/llvm/CodeGen/ExecutionDomainFix.h
+++ b/llvm/include/llvm/CodeGen/ExecutionDomainFix.h
@@ -102,7 +102,7 @@ struct DomainValue {
/// First domain available.
unsigned getFirstDomain() const {
- return countTrailingZeros(AvailableDomains);
+ return llvm::countr_zero(AvailableDomains);
}
/// Clear this DomainValue and point to next which has all its data.
@@ -118,9 +118,9 @@ class ExecutionDomainFix : public MachineFunctionPass {
SmallVector<DomainValue *, 16> Avail;
const TargetRegisterClass *const RC;
- MachineFunction *MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ MachineFunction *MF = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
std::vector<SmallVector<int, 1>> AliasMap;
const unsigned NumRegs;
/// Value currently in each register, or NULL when no value is being tracked.
@@ -133,7 +133,7 @@ class ExecutionDomainFix : public MachineFunctionPass {
using OutRegsInfoMap = SmallVector<LiveRegsDVInfo, 4>;
OutRegsInfoMap MBBOutRegsInfos;
- ReachingDefAnalysis *RDA;
+ ReachingDefAnalysis *RDA = nullptr;
public:
ExecutionDomainFix(char &PassID, const TargetRegisterClass &RC)
diff --git a/llvm/include/llvm/CodeGen/ExpandReductions.h b/llvm/include/llvm/CodeGen/ExpandReductions.h
index 5dbed07873c1..91c2507a9e39 100644
--- a/llvm/include/llvm/CodeGen/ExpandReductions.h
+++ b/llvm/include/llvm/CodeGen/ExpandReductions.h
@@ -1,4 +1,4 @@
-//===----- ExpandReductions.h - Expand experimental reduction intrinsics --===//
+//===- ExpandReductions.h - Expand reduction intrinsics ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h
index d25776e5eff1..dc2931b40d35 100644
--- a/llvm/include/llvm/CodeGen/FastISel.h
+++ b/llvm/include/llvm/CodeGen/FastISel.h
@@ -19,13 +19,13 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/Support/MachineValueType.h"
#include <cstdint>
#include <utility>
diff --git a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index f8156ce73196..4c17e8dcc41a 100644
--- a/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -35,7 +35,7 @@ namespace llvm {
class Argument;
class BasicBlock;
class BranchProbabilityInfo;
-class LegacyDivergenceAnalysis;
+class DbgDeclareInst;
class Function;
class Instruction;
class MachineFunction;
@@ -45,6 +45,11 @@ class MVT;
class SelectionDAG;
class TargetLowering;
+template <typename T> class GenericSSAContext;
+using SSAContext = GenericSSAContext<Function>;
+template <typename T> class GenericUniformityInfo;
+using UniformityInfo = GenericUniformityInfo<SSAContext>;
+
//===--------------------------------------------------------------------===//
/// FunctionLoweringInfo - This contains information that is global to a
/// function that is used when lowering a region of the function.
@@ -56,7 +61,7 @@ public:
const TargetLowering *TLI;
MachineRegisterInfo *RegInfo;
BranchProbabilityInfo *BPI;
- const LegacyDivergenceAnalysis *DA;
+ const UniformityInfo *UA;
/// CanLowerReturn - true iff the function's return value can be lowered to
/// registers.
bool CanLowerReturn;
@@ -183,6 +188,10 @@ public:
/// SelectionDAGISel::PrepareEHLandingPad().
unsigned ExceptionPointerVirtReg, ExceptionSelectorVirtReg;
+ /// Collection of dbg.declare instructions handled after argument
+ /// lowering and before ISel proper.
+ SmallPtrSet<const DbgDeclareInst *, 8> PreprocessedDbgDeclares;
+
/// set - Initialize this FunctionLoweringInfo with the given Function
/// and its associated MachineFunction.
///
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h
index 87dae64c5f90..09d6192a2bd5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h
@@ -111,6 +111,8 @@ class GISelCSEInfo : public GISelChangeObserver {
/// into the CSEMap. MI should return true for shouldCSE(MI->getOpcode())
void insertInstr(MachineInstr *MI, void *InsertPos = nullptr);
+ bool HandlingRecordedInstrs = false;
+
public:
GISelCSEInfo() = default;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index de2d6876e6b7..c8e198157b08 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -17,14 +17,14 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetCallingConv.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/MachineValueType.h"
#include <cstdint>
#include <functional>
@@ -188,7 +188,7 @@ public:
if (getAssignFn(State.isVarArg())(ValNo, ValVT, LocVT, LocInfo, Flags,
State))
return true;
- StackOffset = State.getNextStackOffset();
+ StackSize = State.getStackSize();
return false;
}
@@ -199,9 +199,8 @@ public:
/// as AssignFn on most targets.
CCAssignFn *AssignFnVarArg;
- /// Stack offset for next argument. At the end of argument evaluation, this
- /// is typically the total stack size.
- uint64_t StackOffset = 0;
+ /// The size of the currently allocated portion of the stack.
+ uint64_t StackSize = 0;
/// Select the appropriate assignment function depending on whether this is
/// a variadic call.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9dc1abbcfa40..0a9f0e931ae0 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -19,8 +19,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/Register.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/IR/InstrTypes.h"
#include <functional>
@@ -29,6 +29,7 @@ namespace llvm {
class GISelChangeObserver;
class APFloat;
class APInt;
+class ConstantFP;
class GPtrAdd;
class GStore;
class GZExtLoad;
@@ -79,14 +80,6 @@ struct ShiftOfShiftedLogic {
using BuildFnTy = std::function<void(MachineIRBuilder &)>;
-struct MergeTruncStoresInfo {
- SmallVector<GStore *> FoundStores;
- GStore *LowestIdxStore = nullptr;
- Register WideSrcVal;
- bool NeedBSwap = false;
- bool NeedRotate = false;
-};
-
using OperandBuildSteps =
SmallVector<std::function<void(MachineInstrBuilder &)>, 4>;
struct InstructionBuildSteps {
@@ -310,6 +303,8 @@ public:
void applyShiftOfShiftedLogic(MachineInstr &MI,
ShiftOfShiftedLogic &MatchInfo);
+ bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo);
+
/// Transform a multiply by a power-of-2 value to a left shift.
bool matchCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
void applyCombineMulToShl(MachineInstr &MI, unsigned &ShiftVal);
@@ -358,10 +353,7 @@ public:
void applyCombineUnmergeZExtToZExt(MachineInstr &MI);
/// Transform fp_instr(cst) to constant result of the fp operation.
- bool matchCombineConstantFoldFpUnary(MachineInstr &MI,
- std::optional<APFloat> &Cst);
- void applyCombineConstantFoldFpUnary(MachineInstr &MI,
- std::optional<APFloat> &Cst);
+ void applyCombineConstantFoldFpUnary(MachineInstr &MI, const ConstantFP *Cst);
/// Transform IntToPtr(PtrToInt(x)) to x if cast is in the same address space.
bool matchCombineI2PToP2I(MachineInstr &MI, Register &Reg);
@@ -446,22 +438,22 @@ public:
bool matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx);
/// Replace an instruction with a G_FCONSTANT with value \p C.
- bool replaceInstWithFConstant(MachineInstr &MI, double C);
+ void replaceInstWithFConstant(MachineInstr &MI, double C);
/// Replace an instruction with a G_CONSTANT with value \p C.
- bool replaceInstWithConstant(MachineInstr &MI, int64_t C);
+ void replaceInstWithConstant(MachineInstr &MI, int64_t C);
/// Replace an instruction with a G_CONSTANT with value \p C.
- bool replaceInstWithConstant(MachineInstr &MI, APInt C);
+ void replaceInstWithConstant(MachineInstr &MI, APInt C);
/// Replace an instruction with a G_IMPLICIT_DEF.
- bool replaceInstWithUndef(MachineInstr &MI);
+ void replaceInstWithUndef(MachineInstr &MI);
/// Delete \p MI and replace all of its uses with its \p OpIdx-th operand.
- bool replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx);
+ void replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx);
/// Delete \p MI and replace all of its uses with \p Replacement.
- bool replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement);
+ void replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement);
/// Return true if \p MOP1 and \p MOP2 are register operands are defined by
/// equivalent instructions.
@@ -487,7 +479,7 @@ public:
bool matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI, unsigned OpIdx);
/// Erase \p MI
- bool eraseInst(MachineInstr &MI);
+ void eraseInst(MachineInstr &MI);
/// Return true if MI is a G_ADD which can be simplified to a G_SUB.
bool matchSimplifyAddToSub(MachineInstr &MI,
@@ -558,7 +550,7 @@ public:
/// binop (select cond, K0, K1), K2 ->
/// select cond, (binop K0, K2), (binop K1, K2)
bool matchFoldBinOpIntoSelect(MachineInstr &MI, unsigned &SelectOpNo);
- bool applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo);
+ void applyFoldBinOpIntoSelect(MachineInstr &MI, const unsigned &SelectOpNo);
bool matchCombineInsertVecElts(MachineInstr &MI,
SmallVectorImpl<Register> &MatchInfo);
@@ -577,9 +569,6 @@ public:
/// bswap.
bool matchLoadOrCombine(MachineInstr &MI, BuildFnTy &MatchInfo);
- bool matchTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo);
- void applyTruncStoreMerge(MachineInstr &MI, MergeTruncStoresInfo &MatchInfo);
-
bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
void applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
@@ -642,6 +631,12 @@ public:
/// addressing mode usage.
bool matchReassocPtrAdd(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Try to reassociate to reassociate operands of a commutative binop.
+ bool tryReassocBinOp(unsigned Opc, Register DstReg, Register Op0,
+ Register Op1, BuildFnTy &MatchInfo);
+ /// Reassociate commutative binary operations like G_ADD.
+ bool matchReassocCommBinOp(MachineInstr &MI, BuildFnTy &MatchInfo);
+
/// Do constant folding when opportunities are exposed after MIR building.
bool matchConstantFold(MachineInstr &MI, APInt &MatchInfo);
@@ -789,6 +784,9 @@ public:
/// (X ^ Y) != X -> Y != 0
bool matchRedundantBinOpInEquality(MachineInstr &MI, BuildFnTy &MatchInfo);
+ /// Match shifts greater or equal to the bitwidth of the operation.
+ bool matchShiftsTooBig(MachineInstr &MI);
+
private:
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
new file mode 100644
index 000000000000..f7fe1649b4d9
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
@@ -0,0 +1,605 @@
+//===- llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file declares the GIMatchTableExecutor API, the opcodes supported
+/// by the match table, and some associated data structures used by the
+/// executor's implementation (see `GIMatchTableExecutorImpl.h`).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_GIMATCHTABLEEXECUTOR_H
+#define LLVM_CODEGEN_GLOBALISEL_GIMATCHTABLEEXECUTOR_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Function.h"
+#include <bitset>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <initializer_list>
+#include <optional>
+#include <vector>
+
+namespace llvm {
+
+class BlockFrequencyInfo;
+class CodeGenCoverage;
+class MachineBasicBlock;
+class ProfileSummaryInfo;
+class APInt;
+class APFloat;
+class GISelKnownBits;
+class MachineInstr;
+class MachineInstrBuilder;
+class MachineFunction;
+class MachineOperand;
+class MachineRegisterInfo;
+class RegisterBankInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+
+/// Container class for CodeGen predicate results.
+/// This is convenient because std::bitset does not have a constructor
+/// with an initializer list of set bits.
+///
+/// Each GIMatchTableExecutor subclass should define a PredicateBitset class
+/// with:
+/// const unsigned MAX_SUBTARGET_PREDICATES = 192;
+/// using PredicateBitset = PredicateBitsetImpl<MAX_SUBTARGET_PREDICATES>;
+/// and updating the constant to suit the target. Tablegen provides a suitable
+/// definition for the predicates in use in <Target>GenGlobalISel.inc when
+/// GET_GLOBALISEL_PREDICATE_BITSET is defined.
+template <std::size_t MaxPredicates>
+class PredicateBitsetImpl : public std::bitset<MaxPredicates> {
+public:
+ // Cannot inherit constructors because it's not supported by VC++..
+ PredicateBitsetImpl() = default;
+
+ PredicateBitsetImpl(const std::bitset<MaxPredicates> &B)
+ : std::bitset<MaxPredicates>(B) {}
+
+ PredicateBitsetImpl(std::initializer_list<unsigned> Init) {
+ for (auto I : Init)
+ std::bitset<MaxPredicates>::set(I);
+ }
+};
+
+enum {
+ GICXXPred_Invalid = 0,
+ GICXXCustomAction_Invalid = 0,
+};
+
+enum {
+ /// Begin a try-block to attempt a match and jump to OnFail if it is
+ /// unsuccessful.
+ /// - OnFail - The MatchTable entry at which to resume if the match fails.
+ ///
+ /// FIXME: This ought to take an argument indicating the number of try-blocks
+ /// to exit on failure. It's usually one but the last match attempt of
+ /// a block will need more. The (implemented) alternative is to tack a
+ /// GIM_Reject on the end of each try-block which is simpler but
+ /// requires an extra opcode and iteration in the interpreter on each
+ /// failed match.
+ GIM_Try,
+
+ /// Switch over the opcode on the specified instruction
+ /// - InsnID - Instruction ID
+ /// - LowerBound - numerically minimum opcode supported
+ /// - UpperBound - numerically maximum + 1 opcode supported
+ /// - Default - failure jump target
+ /// - JumpTable... - (UpperBound - LowerBound) (at least 2) jump targets
+ GIM_SwitchOpcode,
+
+ /// Switch over the LLT on the specified instruction operand
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - LowerBound - numerically minimum Type ID supported
+ /// - UpperBound - numerically maximum + 1 Type ID supported
+ /// - Default - failure jump target
+ /// - JumpTable... - (UpperBound - LowerBound) (at least 2) jump targets
+ GIM_SwitchType,
+
+ /// Record the specified instruction.
+ /// The IgnoreCopies variant ignores COPY instructions.
+ /// - NewInsnID - Instruction ID to define
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ GIM_RecordInsn,
+ GIM_RecordInsnIgnoreCopies,
+
+ /// Check the feature bits
+ /// - Expected features
+ GIM_CheckFeatures,
+
+ /// Check the opcode on the specified instruction
+ /// - InsnID - Instruction ID
+ /// - Expected opcode
+ GIM_CheckOpcode,
+
+ /// Check the opcode on the specified instruction, checking 2 acceptable
+ /// alternatives.
+ /// - InsnID - Instruction ID
+ /// - Expected opcode
+ /// - Alternative expected opcode
+ GIM_CheckOpcodeIsEither,
+
+ /// Check the instruction has the right number of operands
+ /// - InsnID - Instruction ID
+ /// - Expected number of operands
+ GIM_CheckNumOperands,
+ /// Check an immediate predicate on the specified instruction
+ /// - InsnID - Instruction ID
+ /// - The predicate to test
+ GIM_CheckI64ImmPredicate,
+ /// Check an immediate predicate on the specified instruction via an APInt.
+ /// - InsnID - Instruction ID
+ /// - The predicate to test
+ GIM_CheckAPIntImmPredicate,
+ /// Check a floating point immediate predicate on the specified instruction.
+ /// - InsnID - Instruction ID
+ /// - The predicate to test
+ GIM_CheckAPFloatImmPredicate,
+ /// Check an immediate predicate on the specified instruction
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - The predicate to test
+ GIM_CheckImmOperandPredicate,
+ /// Check a memory operation has the specified atomic ordering.
+ /// - InsnID - Instruction ID
+ /// - Ordering - The AtomicOrdering value
+ GIM_CheckAtomicOrdering,
+ GIM_CheckAtomicOrderingOrStrongerThan,
+ GIM_CheckAtomicOrderingWeakerThan,
+ /// Check the size of the memory access for the given machine memory operand.
+ /// - InsnID - Instruction ID
+ /// - MMOIdx - MMO index
+ /// - Size - The size in bytes of the memory access
+ GIM_CheckMemorySizeEqualTo,
+
+ /// Check the address space of the memory access for the given machine memory
+ /// operand.
+ /// - InsnID - Instruction ID
+ /// - MMOIdx - MMO index
+ /// - NumAddrSpace - Number of valid address spaces
+ /// - AddrSpaceN - An allowed space of the memory access
+ /// - AddrSpaceN+1 ...
+ GIM_CheckMemoryAddressSpace,
+
+ /// Check the minimum alignment of the memory access for the given machine
+ /// memory operand.
+ /// - InsnID - Instruction ID
+ /// - MMOIdx - MMO index
+ /// - MinAlign - Minimum acceptable alignment
+ GIM_CheckMemoryAlignment,
+
+ /// Check the size of the memory access for the given machine memory operand
+ /// against the size of an operand.
+ /// - InsnID - Instruction ID
+ /// - MMOIdx - MMO index
+ /// - OpIdx - The operand index to compare the MMO against
+ GIM_CheckMemorySizeEqualToLLT,
+ GIM_CheckMemorySizeLessThanLLT,
+ GIM_CheckMemorySizeGreaterThanLLT,
+
+ /// Check if this is a vector that can be treated as a vector splat
+ /// constant. This is valid for both G_BUILD_VECTOR as well as
+ /// G_BUILD_VECTOR_TRUNC. For AllOnes refers to individual bits, so a -1
+ /// element.
+ /// - InsnID - Instruction ID
+ GIM_CheckIsBuildVectorAllOnes,
+ GIM_CheckIsBuildVectorAllZeros,
+
+ /// Check a trivial predicate which takes no arguments.
+ /// This can be used by executors to implement custom flags that don't fit in
+ /// target features.
+ GIM_CheckSimplePredicate,
+
+ /// Check a generic C++ instruction predicate
+ /// - InsnID - Instruction ID
+ /// - PredicateID - The ID of the predicate function to call
+ GIM_CheckCxxInsnPredicate,
+
+ /// Check if there's no use of the first result.
+ /// - InsnID - Instruction ID
+ GIM_CheckHasNoUse,
+
+ /// Check the type for the specified operand
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - Expected type
+ GIM_CheckType,
+ /// Check the type of a pointer to any address space.
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - SizeInBits - The size of the pointer value in bits.
+ GIM_CheckPointerToAny,
+ /// Check the register bank for the specified operand
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - Expected register bank (specified as a register class)
+ GIM_CheckRegBankForClass,
+
+ /// Check the operand matches a complex predicate
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - RendererID - The renderer to hold the result
+ /// - Complex predicate ID
+ GIM_CheckComplexPattern,
+
+ /// Check the operand is a specific integer
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - Expected integer
+ GIM_CheckConstantInt,
+ /// Check the operand is a specific literal integer (i.e. MO.isImm() or
+ /// MO.isCImm() is true).
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - Expected integer
+ GIM_CheckLiteralInt,
+ /// Check the operand is a specific intrinsic ID
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - Expected Intrinsic ID
+ GIM_CheckIntrinsicID,
+
+ /// Check the operand is a specific predicate
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - Expected predicate
+ GIM_CheckCmpPredicate,
+
+ /// Check the specified operand is an MBB
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ GIM_CheckIsMBB,
+
+ /// Check the specified operand is an Imm
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ GIM_CheckIsImm,
+
+ /// Check if the specified operand is safe to fold into the current
+ /// instruction.
+ /// - InsnID - Instruction ID
+ GIM_CheckIsSafeToFold,
+
+ /// Check the specified operands are identical.
+ /// The IgnoreCopies variant looks through COPY instructions before
+ /// comparing the operands.
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - OtherInsnID - Other instruction ID
+ /// - OtherOpIdx - Other operand index
+ GIM_CheckIsSameOperand,
+ GIM_CheckIsSameOperandIgnoreCopies,
+
+ /// Predicates with 'let PredicateCodeUsesOperands = 1' need to examine some
+ /// named operands that will be recorded in RecordedOperands. Names of these
+ /// operands are referenced in predicate argument list. Emitter determines
+ /// StoreIdx(corresponds to the order in which names appear in argument list).
+ /// - InsnID - Instruction ID
+ /// - OpIdx - Operand index
+ /// - StoreIdx - Store location in RecordedOperands.
+ GIM_RecordNamedOperand,
+
+ /// Fail the current try-block, or completely fail to match if there is no
+ /// current try-block.
+ GIM_Reject,
+
+ //=== Renderers ===
+
+ /// Mutate an instruction
+ /// - NewInsnID - Instruction ID to define
+ /// - OldInsnID - Instruction ID to mutate
+ /// - NewOpcode - The new opcode to use
+ GIR_MutateOpcode,
+
+ /// Build a new instruction
+ /// - InsnID - Instruction ID to define
+ /// - Opcode - The new opcode to use
+ GIR_BuildMI,
+
+ /// Copy an operand to the specified instruction
+ /// - NewInsnID - Instruction ID to modify
+ /// - OldInsnID - Instruction ID to copy from
+ /// - OpIdx - The operand to copy
+ GIR_Copy,
+
+ /// Copy an operand to the specified instruction or add a zero register if the
+ /// operand is a zero immediate.
+ /// - NewInsnID - Instruction ID to modify
+ /// - OldInsnID - Instruction ID to copy from
+ /// - OpIdx - The operand to copy
+ /// - ZeroReg - The zero register to use
+ GIR_CopyOrAddZeroReg,
+ /// Copy an operand to the specified instruction
+ /// - NewInsnID - Instruction ID to modify
+ /// - OldInsnID - Instruction ID to copy from
+ /// - OpIdx - The operand to copy
+ /// - SubRegIdx - The subregister to copy
+ GIR_CopySubReg,
+
+ /// Add an implicit register def to the specified instruction
+ /// - InsnID - Instruction ID to modify
+ /// - RegNum - The register to add
+ GIR_AddImplicitDef,
+ /// Add an implicit register use to the specified instruction
+ /// - InsnID - Instruction ID to modify
+ /// - RegNum - The register to add
+ GIR_AddImplicitUse,
+ /// Add an register to the specified instruction
+ /// - InsnID - Instruction ID to modify
+ /// - RegNum - The register to add
+ GIR_AddRegister,
+
+ /// Add a temporary register to the specified instruction
+ /// - InsnID - Instruction ID to modify
+ /// - TempRegID - The temporary register ID to add
+ /// - TempRegFlags - The register flags to set
+ GIR_AddTempRegister,
+
+ /// Add a temporary register to the specified instruction
+ /// - InsnID - Instruction ID to modify
+ /// - TempRegID - The temporary register ID to add
+ /// - TempRegFlags - The register flags to set
+ /// - SubRegIndex - The subregister index to set
+ GIR_AddTempSubRegister,
+
+ /// Add an immediate to the specified instruction
+ /// - InsnID - Instruction ID to modify
+ /// - Imm - The immediate to add
+ GIR_AddImm,
+
+ /// Render complex operands to the specified instruction
+ /// - InsnID - Instruction ID to modify
+ /// - RendererID - The renderer to call
+ GIR_ComplexRenderer,
+ /// Render sub-operands of complex operands to the specified instruction
+ /// - InsnID - Instruction ID to modify
+ /// - RendererID - The renderer to call
+ /// - RenderOpID - The suboperand to render.
+ GIR_ComplexSubOperandRenderer,
+ /// Render subregisters of suboperands of complex operands to the
+ /// specified instruction
+ /// - InsnID - Instruction ID to modify
+ /// - RendererID - The renderer to call
+ /// - RenderOpID - The suboperand to render
+ /// - SubRegIdx - The subregister to extract
+ GIR_ComplexSubOperandSubRegRenderer,
+
+ /// Render operands to the specified instruction using a custom function
+ /// - InsnID - Instruction ID to modify
+ /// - OldInsnID - Instruction ID to get the matched operand from
+ /// - RendererFnID - Custom renderer function to call
+ GIR_CustomRenderer,
+
+ /// Calls a C++ function to perform an action when a match is complete.
+ /// The MatcherState is passed to the function to allow it to modify
+ /// instructions.
+ /// This is less constrained than a custom renderer and can update instructions
+ /// in the state.
+ /// - FnID - The function to call.
+ /// TODO: Remove this at some point when combiners aren't reliant on it. It's
+ /// a bit of a hack.
+ GIR_CustomAction,
+
+ /// Render operands to the specified instruction using a custom function,
+ /// reading from a specific operand.
+ /// - InsnID - Instruction ID to modify
+ /// - OldInsnID - Instruction ID to get the matched operand from
+ /// - OpIdx - Operand index in OldInsnID the render function should read
+ /// from..
+ /// - RendererFnID - Custom renderer function to call
+ GIR_CustomOperandRenderer,
+
+ /// Render a G_CONSTANT operator as a sign-extended immediate.
+ /// - NewInsnID - Instruction ID to modify
+ /// - OldInsnID - Instruction ID to copy from
+ /// The operand index is implicitly 1.
+ GIR_CopyConstantAsSImm,
+
+ /// Render a G_FCONSTANT operator as a sign-extended immediate.
+ /// - NewInsnID - Instruction ID to modify
+ /// - OldInsnID - Instruction ID to copy from
+ /// The operand index is implicitly 1.
+ GIR_CopyFConstantAsFPImm,
+
+ /// Constrain an instruction operand to a register class.
+ /// - InsnID - Instruction ID to modify
+ /// - OpIdx - Operand index
+ /// - RCEnum - Register class enumeration value
+ GIR_ConstrainOperandRC,
+
+ /// Constrain an instructions operands according to the instruction
+ /// description.
+ /// - InsnID - Instruction ID to modify
+ GIR_ConstrainSelectedInstOperands,
+
+ /// Merge all memory operands into instruction.
+ /// - InsnID - Instruction ID to modify
+ /// - MergeInsnID... - One or more Instruction ID to merge into the result.
+ /// - GIU_MergeMemOperands_EndOfList - Terminates the list of instructions to
+ /// merge.
+ GIR_MergeMemOperands,
+
+ /// Erase from parent.
+ /// - InsnID - Instruction ID to erase
+ GIR_EraseFromParent,
+
+ /// Create a new temporary register that's not constrained.
+ /// - TempRegID - The temporary register ID to initialize.
+ /// - Expected type
+ GIR_MakeTempReg,
+
+ /// A successful emission
+ GIR_Done,
+
+ /// Increment the rule coverage counter.
+ /// - RuleID - The ID of the rule that was covered.
+ GIR_Coverage,
+
+ /// Keeping track of the number of the GI opcodes. Must be the last entry.
+ GIU_NumOpcodes,
+};
+
+enum {
+ /// Indicates the end of the variable-length MergeInsnID list in a
+ /// GIR_MergeMemOperands opcode.
+ GIU_MergeMemOperands_EndOfList = -1,
+};
+
+/// Provides the logic to execute GlobalISel match tables, which are used by the
+/// instruction selector and instruction combiners as their engine to match and
+/// apply MIR patterns.
+class GIMatchTableExecutor {
+public:
+ virtual ~GIMatchTableExecutor() = default;
+
+ CodeGenCoverage *CoverageInfo = nullptr;
+ GISelKnownBits *KB = nullptr;
+ MachineFunction *MF = nullptr;
+ ProfileSummaryInfo *PSI = nullptr;
+ BlockFrequencyInfo *BFI = nullptr;
+ // For some predicates, we need to track the current MBB.
+ MachineBasicBlock *CurMBB = nullptr;
+
+ virtual void setupGeneratedPerFunctionState(MachineFunction &MF) {
+ llvm_unreachable("TableGen should have emitted implementation");
+ }
+
+ /// Setup per-MF executor state.
+ virtual void setupMF(MachineFunction &mf, GISelKnownBits *kb,
+ CodeGenCoverage *covinfo = nullptr,
+ ProfileSummaryInfo *psi = nullptr,
+ BlockFrequencyInfo *bfi = nullptr) {
+ CoverageInfo = covinfo;
+ KB = kb;
+ MF = &mf;
+ PSI = psi;
+ BFI = bfi;
+ CurMBB = nullptr;
+ setupGeneratedPerFunctionState(mf);
+ }
+
+protected:
+ using ComplexRendererFns =
+ std::optional<SmallVector<std::function<void(MachineInstrBuilder &)>, 4>>;
+ using RecordedMIVector = SmallVector<MachineInstr *, 4>;
+ using NewMIVector = SmallVector<MachineInstrBuilder, 4>;
+
+ struct MatcherState {
+ std::vector<ComplexRendererFns::value_type> Renderers;
+ RecordedMIVector MIs;
+ DenseMap<unsigned, unsigned> TempRegisters;
+ /// Named operands that predicate with 'let PredicateCodeUsesOperands = 1'
+ /// referenced in its argument list. Operands are inserted at index set by
+ /// emitter, it corresponds to the order in which names appear in argument
+ /// list. Currently such predicates don't have more then 3 arguments.
+ std::array<const MachineOperand *, 3> RecordedOperands;
+
+ MatcherState(unsigned MaxRenderers);
+ };
+
+ bool shouldOptForSize(const MachineFunction *MF) const {
+ const auto &F = MF->getFunction();
+ return F.hasOptSize() || F.hasMinSize() ||
+ (PSI && BFI && CurMBB && llvm::shouldOptForSize(*CurMBB, PSI, BFI));
+ }
+
+public:
+ template <class PredicateBitset, class ComplexMatcherMemFn,
+ class CustomRendererFn>
+ struct ExecInfoTy {
+ ExecInfoTy(const LLT *TypeObjects, size_t NumTypeObjects,
+ const PredicateBitset *FeatureBitsets,
+ const ComplexMatcherMemFn *ComplexPredicates,
+ const CustomRendererFn *CustomRenderers)
+ : TypeObjects(TypeObjects), FeatureBitsets(FeatureBitsets),
+ ComplexPredicates(ComplexPredicates),
+ CustomRenderers(CustomRenderers) {
+
+ for (size_t I = 0; I < NumTypeObjects; ++I)
+ TypeIDMap[TypeObjects[I]] = I;
+ }
+ const LLT *TypeObjects;
+ const PredicateBitset *FeatureBitsets;
+ const ComplexMatcherMemFn *ComplexPredicates;
+ const CustomRendererFn *CustomRenderers;
+
+ SmallDenseMap<LLT, unsigned, 64> TypeIDMap;
+ };
+
+protected:
+ GIMatchTableExecutor();
+
+ /// Execute a given matcher table and return true if the match was successful
+ /// and false otherwise.
+ template <class TgtExecutor, class PredicateBitset, class ComplexMatcherMemFn,
+ class CustomRendererFn>
+ bool executeMatchTable(
+ TgtExecutor &Exec, NewMIVector &OutMIs, MatcherState &State,
+ const ExecInfoTy<PredicateBitset, ComplexMatcherMemFn, CustomRendererFn>
+ &ISelInfo,
+ const int64_t *MatchTable, const TargetInstrInfo &TII,
+ MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+ const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures,
+ CodeGenCoverage *CoverageInfo) const;
+
+ virtual const int64_t *getMatchTable() const {
+ llvm_unreachable("Should have been overridden by tablegen if used");
+ }
+
+ virtual bool testImmPredicate_I64(unsigned, int64_t) const {
+ llvm_unreachable(
+ "Subclasses must override this with a tablegen-erated function");
+ }
+ virtual bool testImmPredicate_APInt(unsigned, const APInt &) const {
+ llvm_unreachable(
+ "Subclasses must override this with a tablegen-erated function");
+ }
+ virtual bool testImmPredicate_APFloat(unsigned, const APFloat &) const {
+ llvm_unreachable(
+ "Subclasses must override this with a tablegen-erated function");
+ }
+ virtual bool testMIPredicate_MI(unsigned, const MachineInstr &,
+ const MatcherState &State) const {
+ llvm_unreachable(
+ "Subclasses must override this with a tablegen-erated function");
+ }
+
+ virtual bool testSimplePredicate(unsigned) const {
+ llvm_unreachable("Subclass does not implement testSimplePredicate!");
+ }
+
+ virtual void runCustomAction(unsigned, const MatcherState &State) const {
+ llvm_unreachable("Subclass does not implement runCustomAction!");
+ }
+
+ bool isOperandImmEqual(const MachineOperand &MO, int64_t Value,
+ const MachineRegisterInfo &MRI) const;
+
+ /// Return true if the specified operand is a G_PTR_ADD with a G_CONSTANT on
+ /// the right-hand side. GlobalISel's separation of pointer and integer types
+ /// means that we don't need to worry about G_OR with equivalent semantics.
+ bool isBaseWithConstantOffset(const MachineOperand &Root,
+ const MachineRegisterInfo &MRI) const;
+
+ /// Return true if MI can obviously be folded into IntoMI.
+ /// MI and IntoMI do not need to be in the same basic blocks, but MI must
+ /// preceed IntoMI.
+ bool isObviouslySafeToFold(MachineInstr &MI, MachineInstr &IntoMI) const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_GLOBALISEL_GIMATCHTABLEEXECUTOR_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
index fc4e94929d41..1d24563071cb 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
@@ -1,4 +1,4 @@
-//===- llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h --------*- C++ -*-===//
+//===- llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,17 +6,17 @@
//
//===----------------------------------------------------------------------===//
//
-/// \file This file declares the API for the instruction selector.
-/// This class is responsible for selecting machine instructions.
-/// It's implemented by the target. It's used by the InstructionSelect pass.
+/// \file This file implements GIMatchTableExecutor's `executeMatchTable`
+/// function. This is implemented in a separate file because the function is
+/// quite large.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H
-#define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H
+#ifndef LLVM_CODEGEN_GLOBALISEL_GIMATCHTABLEEXECUTORIMPL_H
+#define LLVM_CODEGEN_GLOBALISEL_GIMATCHTABLEEXECUTORIMPL_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -37,24 +37,16 @@
namespace llvm {
-/// GlobalISel PatFrag Predicates
-enum {
- GIPFP_I64_Invalid = 0,
- GIPFP_APInt_Invalid = 0,
- GIPFP_APFloat_Invalid = 0,
- GIPFP_MI_Invalid = 0,
-};
-
-template <class TgtInstructionSelector, class PredicateBitset,
- class ComplexMatcherMemFn, class CustomRendererFn>
-bool InstructionSelector::executeMatchTable(
- TgtInstructionSelector &ISel, NewMIVector &OutMIs, MatcherState &State,
- const ISelInfoTy<PredicateBitset, ComplexMatcherMemFn, CustomRendererFn>
- &ISelInfo,
+template <class TgtExecutor, class PredicateBitset, class ComplexMatcherMemFn,
+ class CustomRendererFn>
+bool GIMatchTableExecutor::executeMatchTable(
+ TgtExecutor &Exec, NewMIVector &OutMIs, MatcherState &State,
+ const ExecInfoTy<PredicateBitset, ComplexMatcherMemFn, CustomRendererFn>
+ &ExecInfo,
const int64_t *MatchTable, const TargetInstrInfo &TII,
MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures,
- CodeGenCoverage &CoverageInfo) const {
+ CodeGenCoverage *CoverageInfo) const {
uint64_t CurrentIdx = 0;
SmallVector<uint64_t, 4> OnFailResumeAt;
@@ -66,12 +58,12 @@ bool InstructionSelector::executeMatchTable(
enum RejectAction { RejectAndGiveUp, RejectAndResume };
auto handleReject = [&]() -> RejectAction {
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": Rejected\n");
if (OnFailResumeAt.empty())
return RejectAndGiveUp;
CurrentIdx = OnFailResumeAt.pop_back_val();
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": Resume at " << CurrentIdx << " ("
<< OnFailResumeAt.size() << " try-blocks remain)\n");
return RejectAndResume;
@@ -95,13 +87,14 @@ bool InstructionSelector::executeMatchTable(
int64_t MatcherOpcode = MatchTable[CurrentIdx++];
switch (MatcherOpcode) {
case GIM_Try: {
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": Begin try-block\n");
OnFailResumeAt.push_back(MatchTable[CurrentIdx++]);
break;
}
- case GIM_RecordInsn: {
+ case GIM_RecordInsn:
+ case GIM_RecordInsnIgnoreCopies: {
int64_t NewInsnID = MatchTable[CurrentIdx++];
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
@@ -112,21 +105,26 @@ bool InstructionSelector::executeMatchTable(
MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
if (!MO.isReg()) {
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": Not a register\n");
if (handleReject() == RejectAndGiveUp)
return false;
break;
}
if (MO.getReg().isPhysical()) {
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": Is a physical register\n");
if (handleReject() == RejectAndGiveUp)
return false;
break;
}
- MachineInstr *NewMI = MRI.getVRegDef(MO.getReg());
+ MachineInstr *NewMI;
+ if (MatcherOpcode == GIM_RecordInsnIgnoreCopies)
+ NewMI = getDefIgnoringCopies(MO.getReg(), MRI);
+ else
+ NewMI = MRI.getVRegDef(MO.getReg());
+
if ((size_t)NewInsnID < State.MIs.size())
State.MIs[NewInsnID] = NewMI;
else {
@@ -134,7 +132,7 @@ bool InstructionSelector::executeMatchTable(
"Expected to store MIs in order");
State.MIs.push_back(NewMI);
}
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": MIs[" << NewInsnID
<< "] = GIM_RecordInsn(" << InsnID << ", " << OpIdx
<< ")\n");
@@ -143,18 +141,17 @@ bool InstructionSelector::executeMatchTable(
case GIM_CheckFeatures: {
int64_t ExpectedBitsetID = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx
<< ": GIM_CheckFeatures(ExpectedBitsetID="
<< ExpectedBitsetID << ")\n");
- if ((AvailableFeatures & ISelInfo.FeatureBitsets[ExpectedBitsetID]) !=
- ISelInfo.FeatureBitsets[ExpectedBitsetID]) {
+ if ((AvailableFeatures & ExecInfo.FeatureBitsets[ExpectedBitsetID]) !=
+ ExecInfo.FeatureBitsets[ExpectedBitsetID]) {
if (handleReject() == RejectAndGiveUp)
return false;
}
break;
}
-
case GIM_CheckOpcode:
case GIM_CheckOpcodeIsEither: {
int64_t InsnID = MatchTable[CurrentIdx++];
@@ -166,13 +163,12 @@ bool InstructionSelector::executeMatchTable(
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
unsigned Opcode = State.MIs[InsnID]->getOpcode();
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
- dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID
- << "], ExpectedOpcode=" << Expected0;
- if (MatcherOpcode == GIM_CheckOpcodeIsEither)
- dbgs() << " || " << Expected1;
- dbgs() << ") // Got=" << Opcode << "\n";
- );
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
+ dbgs() << CurrentIdx << ": GIM_CheckOpcode(MIs[" << InsnID
+ << "], ExpectedOpcode=" << Expected0;
+ if (MatcherOpcode == GIM_CheckOpcodeIsEither) dbgs()
+ << " || " << Expected1;
+ dbgs() << ") // Got=" << Opcode << "\n";);
if (Opcode != Expected0 && Opcode != Expected1) {
if (handleReject() == RejectAndGiveUp)
@@ -189,7 +185,7 @@ bool InstructionSelector::executeMatchTable(
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
const int64_t Opcode = State.MIs[InsnID]->getOpcode();
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), {
+ DEBUG_WITH_TYPE(TgtExecutor::getName(), {
dbgs() << CurrentIdx << ": GIM_SwitchOpcode(MIs[" << InsnID << "], ["
<< LowerBound << ", " << UpperBound << "), Default=" << Default
<< ", JumpTable...) // Got=" << Opcode << "\n";
@@ -217,7 +213,7 @@ bool InstructionSelector::executeMatchTable(
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), {
+ DEBUG_WITH_TYPE(TgtExecutor::getName(), {
dbgs() << CurrentIdx << ": GIM_SwitchType(MIs[" << InsnID
<< "]->getOperand(" << OpIdx << "), [" << LowerBound << ", "
<< UpperBound << "), Default=" << Default
@@ -232,8 +228,8 @@ bool InstructionSelector::executeMatchTable(
break;
}
const LLT Ty = MRI.getType(MO.getReg());
- const auto TyI = ISelInfo.TypeIDMap.find(Ty);
- if (TyI == ISelInfo.TypeIDMap.end()) {
+ const auto TyI = ExecInfo.TypeIDMap.find(Ty);
+ if (TyI == ExecInfo.TypeIDMap.end()) {
CurrentIdx = Default;
break;
}
@@ -254,7 +250,7 @@ bool InstructionSelector::executeMatchTable(
case GIM_CheckNumOperands: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t Expected = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckNumOperands(MIs["
<< InsnID << "], Expected=" << Expected << ")\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
@@ -271,7 +267,7 @@ bool InstructionSelector::executeMatchTable(
? MatchTable[CurrentIdx++]
: 1;
int64_t Predicate = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckImmPredicate(MIs["
<< InsnID << "]->getOperand(" << OpIdx
<< "), Predicate=" << Predicate << ")\n");
@@ -279,7 +275,7 @@ bool InstructionSelector::executeMatchTable(
assert((State.MIs[InsnID]->getOperand(OpIdx).isImm() ||
State.MIs[InsnID]->getOperand(OpIdx).isCImm()) &&
"Expected immediate operand");
- assert(Predicate > GIPFP_I64_Invalid && "Expected a valid predicate");
+ assert(Predicate > GICXXPred_Invalid && "Expected a valid predicate");
int64_t Value = 0;
if (State.MIs[InsnID]->getOperand(OpIdx).isCImm())
Value = State.MIs[InsnID]->getOperand(OpIdx).getCImm()->getSExtValue();
@@ -296,20 +292,20 @@ bool InstructionSelector::executeMatchTable(
case GIM_CheckAPIntImmPredicate: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t Predicate = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs()
<< CurrentIdx << ": GIM_CheckAPIntImmPredicate(MIs["
<< InsnID << "], Predicate=" << Predicate << ")\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
assert(State.MIs[InsnID]->getOpcode() == TargetOpcode::G_CONSTANT &&
"Expected G_CONSTANT");
- assert(Predicate > GIPFP_APInt_Invalid && "Expected a valid predicate");
- APInt Value;
- if (State.MIs[InsnID]->getOperand(1).isCImm())
- Value = State.MIs[InsnID]->getOperand(1).getCImm()->getValue();
- else
+ assert(Predicate > GICXXPred_Invalid &&
+ "Expected a valid predicate");
+ if (!State.MIs[InsnID]->getOperand(1).isCImm())
llvm_unreachable("Expected Imm or CImm operand");
+ const APInt &Value =
+ State.MIs[InsnID]->getOperand(1).getCImm()->getValue();
if (!testImmPredicate_APInt(Predicate, Value))
if (handleReject() == RejectAndGiveUp)
return false;
@@ -318,16 +314,19 @@ bool InstructionSelector::executeMatchTable(
case GIM_CheckAPFloatImmPredicate: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t Predicate = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs()
<< CurrentIdx << ": GIM_CheckAPFloatImmPredicate(MIs["
<< InsnID << "], Predicate=" << Predicate << ")\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
assert(State.MIs[InsnID]->getOpcode() == TargetOpcode::G_FCONSTANT &&
"Expected G_FCONSTANT");
- assert(State.MIs[InsnID]->getOperand(1).isFPImm() && "Expected FPImm operand");
- assert(Predicate > GIPFP_APFloat_Invalid && "Expected a valid predicate");
- APFloat Value = State.MIs[InsnID]->getOperand(1).getFPImm()->getValueAPF();
+ assert(State.MIs[InsnID]->getOperand(1).isFPImm() &&
+ "Expected FPImm operand");
+ assert(Predicate > GICXXPred_Invalid &&
+ "Expected a valid predicate");
+ const APFloat &Value =
+ State.MIs[InsnID]->getOperand(1).getFPImm()->getValueAPF();
if (!testImmPredicate_APFloat(Predicate, Value))
if (handleReject() == RejectAndGiveUp)
@@ -338,7 +337,7 @@ bool InstructionSelector::executeMatchTable(
case GIM_CheckIsBuildVectorAllZeros: {
int64_t InsnID = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx
<< ": GIM_CheckBuildVectorAll{Zeros|Ones}(MIs["
<< InsnID << "])\n");
@@ -363,18 +362,33 @@ bool InstructionSelector::executeMatchTable(
break;
}
+ case GIM_CheckSimplePredicate: {
+ // Note: we don't check for invalid here because this is purely a hook to
+ // allow some executors (such as the combiner) to check arbitrary,
+ // contextless predicates, such as whether a rule is enabled or not.
+ int64_t Predicate = MatchTable[CurrentIdx++];
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
+ dbgs() << CurrentIdx
+ << ": GIM_CheckSimplePredicate(Predicate="
+ << Predicate << ")\n");
+ assert(Predicate > GICXXPred_Invalid && "Expected a valid predicate");
+ if (!testSimplePredicate(Predicate)) {
+ if (handleReject() == RejectAndGiveUp)
+ return false;
+ }
+ break;
+ }
case GIM_CheckCxxInsnPredicate: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t Predicate = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs()
<< CurrentIdx << ": GIM_CheckCxxPredicate(MIs["
<< InsnID << "], Predicate=" << Predicate << ")\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
- assert(Predicate > GIPFP_MI_Invalid && "Expected a valid predicate");
+ assert(Predicate > GICXXPred_Invalid && "Expected a valid predicate");
- if (!testMIPredicate_MI(Predicate, *State.MIs[InsnID],
- State.RecordedOperands))
+ if (!testMIPredicate_MI(Predicate, *State.MIs[InsnID], State))
if (handleReject() == RejectAndGiveUp)
return false;
break;
@@ -382,7 +396,7 @@ bool InstructionSelector::executeMatchTable(
case GIM_CheckHasNoUse: {
int64_t InsnID = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckHasNoUse(MIs["
<< InsnID << "]\n");
@@ -401,7 +415,7 @@ bool InstructionSelector::executeMatchTable(
case GIM_CheckAtomicOrdering: {
int64_t InsnID = MatchTable[CurrentIdx++];
AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckAtomicOrdering(MIs["
<< InsnID << "], " << (uint64_t)Ordering << ")\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
@@ -418,7 +432,7 @@ bool InstructionSelector::executeMatchTable(
case GIM_CheckAtomicOrderingOrStrongerThan: {
int64_t InsnID = MatchTable[CurrentIdx++];
AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx
<< ": GIM_CheckAtomicOrderingOrStrongerThan(MIs["
<< InsnID << "], " << (uint64_t)Ordering << ")\n");
@@ -436,7 +450,7 @@ bool InstructionSelector::executeMatchTable(
case GIM_CheckAtomicOrderingWeakerThan: {
int64_t InsnID = MatchTable[CurrentIdx++];
AtomicOrdering Ordering = (AtomicOrdering)MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx
<< ": GIM_CheckAtomicOrderingWeakerThan(MIs["
<< InsnID << "], " << (uint64_t)Ordering << ")\n");
@@ -467,17 +481,16 @@ bool InstructionSelector::executeMatchTable(
// a match earlier.
const uint64_t LastIdx = CurrentIdx + NumAddrSpace;
- const MachineMemOperand *MMO
- = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
+ const MachineMemOperand *MMO =
+ *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
const unsigned MMOAddrSpace = MMO->getAddrSpace();
bool Success = false;
for (int I = 0; I != NumAddrSpace; ++I) {
unsigned AddrSpace = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(
- TgtInstructionSelector::getName(),
- dbgs() << "addrspace(" << MMOAddrSpace << ") vs "
- << AddrSpace << '\n');
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
+ dbgs() << "addrspace(" << MMOAddrSpace << ") vs "
+ << AddrSpace << '\n');
if (AddrSpace == MMOAddrSpace) {
Success = true;
@@ -503,12 +516,13 @@ bool InstructionSelector::executeMatchTable(
break;
}
- MachineMemOperand *MMO
- = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ MachineMemOperand *MMO =
+ *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckMemoryAlignment"
- << "(MIs[" << InsnID << "]->memoperands() + " << MMOIdx
- << ")->getAlignment() >= " << MinAlign << ")\n");
+ << "(MIs[" << InsnID << "]->memoperands() + "
+ << MMOIdx << ")->getAlignment() >= " << MinAlign
+ << ")\n");
if (MMO->getAlign() < MinAlign && handleReject() == RejectAndGiveUp)
return false;
@@ -519,10 +533,9 @@ bool InstructionSelector::executeMatchTable(
int64_t MMOIdx = MatchTable[CurrentIdx++];
uint64_t Size = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
- dbgs() << CurrentIdx
- << ": GIM_CheckMemorySizeEqual(MIs[" << InsnID
- << "]->memoperands() + " << MMOIdx
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
+ dbgs() << CurrentIdx << ": GIM_CheckMemorySizeEqual(MIs["
+ << InsnID << "]->memoperands() + " << MMOIdx
<< ", Size=" << Size << ")\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
@@ -532,11 +545,12 @@ bool InstructionSelector::executeMatchTable(
break;
}
- MachineMemOperand *MMO = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
+ MachineMemOperand *MMO =
+ *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
- dbgs() << MMO->getSize() << " bytes vs " << Size
- << " bytes\n");
+ DEBUG_WITH_TYPE(TgtExecutor::getName(), dbgs() << MMO->getSize()
+ << " bytes vs " << Size
+ << " bytes\n");
if (MMO->getSize() != Size)
if (handleReject() == RejectAndGiveUp)
return false;
@@ -551,20 +565,19 @@ bool InstructionSelector::executeMatchTable(
int64_t OpIdx = MatchTable[CurrentIdx++];
DEBUG_WITH_TYPE(
- TgtInstructionSelector::getName(),
+ TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckMemorySize"
- << (MatcherOpcode == GIM_CheckMemorySizeEqualToLLT
- ? "EqualTo"
- : MatcherOpcode == GIM_CheckMemorySizeGreaterThanLLT
- ? "GreaterThan"
- : "LessThan")
+ << (MatcherOpcode == GIM_CheckMemorySizeEqualToLLT ? "EqualTo"
+ : MatcherOpcode == GIM_CheckMemorySizeGreaterThanLLT
+ ? "GreaterThan"
+ : "LessThan")
<< "LLT(MIs[" << InsnID << "]->memoperands() + " << MMOIdx
<< ", OpIdx=" << OpIdx << ")\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
if (!MO.isReg()) {
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": Not a register\n");
if (handleReject() == RejectAndGiveUp)
return false;
@@ -577,7 +590,8 @@ bool InstructionSelector::executeMatchTable(
break;
}
- MachineMemOperand *MMO = *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
+ MachineMemOperand *MMO =
+ *(State.MIs[InsnID]->memoperands_begin() + MMOIdx);
unsigned Size = MRI.getType(MO.getReg()).getSizeInBits();
if (MatcherOpcode == GIM_CheckMemorySizeEqualToLLT &&
@@ -599,14 +613,14 @@ bool InstructionSelector::executeMatchTable(
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
int64_t TypeID = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckType(MIs[" << InsnID
<< "]->getOperand(" << OpIdx
<< "), TypeID=" << TypeID << ")\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
MachineOperand &MO = State.MIs[InsnID]->getOperand(OpIdx);
if (!MO.isReg() ||
- MRI.getType(MO.getReg()) != ISelInfo.TypeObjects[TypeID]) {
+ MRI.getType(MO.getReg()) != ExecInfo.TypeObjects[TypeID]) {
if (handleReject() == RejectAndGiveUp)
return false;
}
@@ -617,7 +631,7 @@ bool InstructionSelector::executeMatchTable(
int64_t OpIdx = MatchTable[CurrentIdx++];
uint64_t SizeInBits = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckPointerToAny(MIs["
<< InsnID << "]->getOperand(" << OpIdx
<< "), SizeInBits=" << SizeInBits << ")\n");
@@ -648,7 +662,7 @@ bool InstructionSelector::executeMatchTable(
int64_t OpIdx = MatchTable[CurrentIdx++];
uint64_t StoreIdx = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_RecordNamedOperand(MIs["
<< InsnID << "]->getOperand(" << OpIdx
<< "), StoreIdx=" << StoreIdx << ")\n");
@@ -661,7 +675,7 @@ bool InstructionSelector::executeMatchTable(
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
int64_t RCEnum = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckRegBankForClass(MIs["
<< InsnID << "]->getOperand(" << OpIdx
<< "), RCEnum=" << RCEnum << ")\n");
@@ -682,7 +696,7 @@ bool InstructionSelector::executeMatchTable(
int64_t OpIdx = MatchTable[CurrentIdx++];
int64_t RendererID = MatchTable[CurrentIdx++];
int64_t ComplexPredicateID = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": State.Renderers[" << RendererID
<< "] = GIM_CheckComplexPattern(MIs[" << InsnID
<< "]->getOperand(" << OpIdx
@@ -691,13 +705,12 @@ bool InstructionSelector::executeMatchTable(
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
// FIXME: Use std::invoke() when it's available.
ComplexRendererFns Renderer =
- (ISel.*ISelInfo.ComplexPredicates[ComplexPredicateID])(
+ (Exec.*ExecInfo.ComplexPredicates[ComplexPredicateID])(
State.MIs[InsnID]->getOperand(OpIdx));
if (Renderer)
State.Renderers[RendererID] = *Renderer;
- else
- if (handleReject() == RejectAndGiveUp)
- return false;
+ else if (handleReject() == RejectAndGiveUp)
+ return false;
break;
}
@@ -705,7 +718,7 @@ bool InstructionSelector::executeMatchTable(
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
int64_t Value = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckConstantInt(MIs["
<< InsnID << "]->getOperand(" << OpIdx
<< "), Value=" << Value << ")\n");
@@ -730,7 +743,7 @@ bool InstructionSelector::executeMatchTable(
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
int64_t Value = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckLiteralInt(MIs["
<< InsnID << "]->getOperand(" << OpIdx
<< "), Value=" << Value << ")\n");
@@ -752,7 +765,7 @@ bool InstructionSelector::executeMatchTable(
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
int64_t Value = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckIntrinsicID(MIs["
<< InsnID << "]->getOperand(" << OpIdx
<< "), Value=" << Value << ")\n");
@@ -767,7 +780,7 @@ bool InstructionSelector::executeMatchTable(
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
int64_t Value = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckCmpPredicate(MIs["
<< InsnID << "]->getOperand(" << OpIdx
<< "), Value=" << Value << ")\n");
@@ -781,7 +794,7 @@ bool InstructionSelector::executeMatchTable(
case GIM_CheckIsMBB: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckIsMBB(MIs[" << InsnID
<< "]->getOperand(" << OpIdx << "))\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
@@ -794,7 +807,7 @@ bool InstructionSelector::executeMatchTable(
case GIM_CheckIsImm: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckIsImm(MIs[" << InsnID
<< "]->getOperand(" << OpIdx << "))\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
@@ -806,7 +819,7 @@ bool InstructionSelector::executeMatchTable(
}
case GIM_CheckIsSafeToFold: {
int64_t InsnID = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckIsSafeToFold(MIs["
<< InsnID << "])\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
@@ -816,31 +829,42 @@ bool InstructionSelector::executeMatchTable(
}
break;
}
- case GIM_CheckIsSameOperand: {
+ case GIM_CheckIsSameOperand:
+ case GIM_CheckIsSameOperandIgnoreCopies: {
int64_t InsnID = MatchTable[CurrentIdx++];
int64_t OpIdx = MatchTable[CurrentIdx++];
int64_t OtherInsnID = MatchTable[CurrentIdx++];
int64_t OtherOpIdx = MatchTable[CurrentIdx++];
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_CheckIsSameOperand(MIs["
<< InsnID << "][" << OpIdx << "], MIs["
<< OtherInsnID << "][" << OtherOpIdx << "])\n");
assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
assert(State.MIs[OtherInsnID] != nullptr && "Used insn before defined");
- if (!State.MIs[InsnID]->getOperand(OpIdx).isIdenticalTo(
- State.MIs[OtherInsnID]->getOperand(OtherOpIdx))) {
+
+ MachineOperand &Op = State.MIs[InsnID]->getOperand(OpIdx);
+ MachineOperand &OtherOp = State.MIs[OtherInsnID]->getOperand(OtherOpIdx);
+
+ if (MatcherOpcode == GIM_CheckIsSameOperandIgnoreCopies) {
+ if (Op.isReg() && OtherOp.isReg()) {
+ if (getSrcRegIgnoringCopies(Op.getReg(), MRI) ==
+ getSrcRegIgnoringCopies(OtherOp.getReg(), MRI))
+ break;
+ }
+ }
+
+ if (!Op.isIdenticalTo(OtherOp)) {
if (handleReject() == RejectAndGiveUp)
return false;
}
break;
}
case GIM_Reject:
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIM_Reject\n");
if (handleReject() == RejectAndGiveUp)
return false;
break;
-
case GIR_MutateOpcode: {
int64_t OldInsnID = MatchTable[CurrentIdx++];
uint64_t NewInsnID = MatchTable[CurrentIdx++];
@@ -851,7 +875,7 @@ bool InstructionSelector::executeMatchTable(
OutMIs[NewInsnID] = MachineInstrBuilder(*State.MIs[OldInsnID]->getMF(),
State.MIs[OldInsnID]);
OutMIs[NewInsnID]->setDesc(TII.get(NewOpcode));
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_MutateOpcode(OutMIs["
<< NewInsnID << "], MIs[" << OldInsnID << "], "
<< NewOpcode << ")\n");
@@ -866,7 +890,7 @@ bool InstructionSelector::executeMatchTable(
OutMIs[NewInsnID] = BuildMI(*State.MIs[0]->getParent(), State.MIs[0],
MIMetadata(*State.MIs[0]), TII.get(Opcode));
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_BuildMI(OutMIs["
<< NewInsnID << "], " << Opcode << ")\n");
break;
@@ -878,7 +902,7 @@ bool InstructionSelector::executeMatchTable(
int64_t OpIdx = MatchTable[CurrentIdx++];
assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction");
OutMIs[NewInsnID].add(State.MIs[OldInsnID]->getOperand(OpIdx));
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs()
<< CurrentIdx << ": GIR_Copy(OutMIs[" << NewInsnID
<< "], MIs[" << OldInsnID << "], " << OpIdx << ")\n");
@@ -896,7 +920,7 @@ bool InstructionSelector::executeMatchTable(
OutMIs[NewInsnID].addReg(ZeroReg);
else
OutMIs[NewInsnID].add(MO);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_CopyOrAddZeroReg(OutMIs["
<< NewInsnID << "], MIs[" << OldInsnID << "], "
<< OpIdx << ", " << ZeroReg << ")\n");
@@ -911,7 +935,7 @@ bool InstructionSelector::executeMatchTable(
assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction");
OutMIs[NewInsnID].addReg(State.MIs[OldInsnID]->getOperand(OpIdx).getReg(),
0, SubRegIdx);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_CopySubReg(OutMIs["
<< NewInsnID << "], MIs[" << OldInsnID << "], "
<< OpIdx << ", " << SubRegIdx << ")\n");
@@ -923,7 +947,7 @@ bool InstructionSelector::executeMatchTable(
int64_t RegNum = MatchTable[CurrentIdx++];
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
OutMIs[InsnID].addDef(RegNum, RegState::Implicit);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_AddImplicitDef(OutMIs["
<< InsnID << "], " << RegNum << ")\n");
break;
@@ -934,7 +958,7 @@ bool InstructionSelector::executeMatchTable(
int64_t RegNum = MatchTable[CurrentIdx++];
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
OutMIs[InsnID].addUse(RegNum, RegState::Implicit);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_AddImplicitUse(OutMIs["
<< InsnID << "], " << RegNum << ")\n");
break;
@@ -946,10 +970,10 @@ bool InstructionSelector::executeMatchTable(
uint64_t RegFlags = MatchTable[CurrentIdx++];
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
OutMIs[InsnID].addReg(RegNum, RegFlags);
- DEBUG_WITH_TYPE(
- TgtInstructionSelector::getName(),
- dbgs() << CurrentIdx << ": GIR_AddRegister(OutMIs["
- << InsnID << "], " << RegNum << ", " << RegFlags << ")\n");
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
+ dbgs()
+ << CurrentIdx << ": GIR_AddRegister(OutMIs[" << InsnID
+ << "], " << RegNum << ", " << RegFlags << ")\n");
break;
}
@@ -964,14 +988,14 @@ bool InstructionSelector::executeMatchTable(
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
- OutMIs[InsnID].addReg(State.TempRegisters[TempRegID], TempRegFlags, SubReg);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
- dbgs() << CurrentIdx << ": GIR_AddTempRegister(OutMIs["
- << InsnID << "], TempRegisters[" << TempRegID
- << "]";
- if (SubReg)
- dbgs() << '.' << TRI.getSubRegIndexName(SubReg);
- dbgs() << ", " << TempRegFlags << ")\n");
+ OutMIs[InsnID].addReg(State.TempRegisters[TempRegID], TempRegFlags,
+ SubReg);
+ DEBUG_WITH_TYPE(
+ TgtExecutor::getName(),
+ dbgs() << CurrentIdx << ": GIR_AddTempRegister(OutMIs[" << InsnID
+ << "], TempRegisters[" << TempRegID << "]";
+ if (SubReg) dbgs() << '.' << TRI.getSubRegIndexName(SubReg);
+ dbgs() << ", " << TempRegFlags << ")\n");
break;
}
@@ -980,7 +1004,7 @@ bool InstructionSelector::executeMatchTable(
int64_t Imm = MatchTable[CurrentIdx++];
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
OutMIs[InsnID].addImm(Imm);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_AddImm(OutMIs[" << InsnID
<< "], " << Imm << ")\n");
break;
@@ -992,7 +1016,7 @@ bool InstructionSelector::executeMatchTable(
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
for (const auto &RenderOpFn : State.Renderers[RendererID])
RenderOpFn(OutMIs[InsnID]);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_ComplexRenderer(OutMIs["
<< InsnID << "], " << RendererID << ")\n");
break;
@@ -1003,19 +1027,36 @@ bool InstructionSelector::executeMatchTable(
int64_t RenderOpID = MatchTable[CurrentIdx++];
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
State.Renderers[RendererID][RenderOpID](OutMIs[InsnID]);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx
<< ": GIR_ComplexSubOperandRenderer(OutMIs["
<< InsnID << "], " << RendererID << ", "
<< RenderOpID << ")\n");
break;
}
+ case GIR_ComplexSubOperandSubRegRenderer: {
+ int64_t InsnID = MatchTable[CurrentIdx++];
+ int64_t RendererID = MatchTable[CurrentIdx++];
+ int64_t RenderOpID = MatchTable[CurrentIdx++];
+ int64_t SubRegIdx = MatchTable[CurrentIdx++];
+ MachineInstrBuilder &MI = OutMIs[InsnID];
+ assert(MI && "Attempted to add to undefined instruction");
+ State.Renderers[RendererID][RenderOpID](MI);
+ MI->getOperand(MI->getNumOperands() - 1).setSubReg(SubRegIdx);
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
+ dbgs() << CurrentIdx
+ << ": GIR_ComplexSubOperandSubRegRenderer(OutMIs["
+ << InsnID << "], " << RendererID << ", "
+ << RenderOpID << ", " << SubRegIdx << ")\n");
+ break;
+ }
case GIR_CopyConstantAsSImm: {
int64_t NewInsnID = MatchTable[CurrentIdx++];
int64_t OldInsnID = MatchTable[CurrentIdx++];
assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction");
- assert(State.MIs[OldInsnID]->getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+ assert(State.MIs[OldInsnID]->getOpcode() == TargetOpcode::G_CONSTANT &&
+ "Expected G_CONSTANT");
if (State.MIs[OldInsnID]->getOperand(1).isCImm()) {
OutMIs[NewInsnID].addImm(
State.MIs[OldInsnID]->getOperand(1).getCImm()->getSExtValue());
@@ -1023,7 +1064,7 @@ bool InstructionSelector::executeMatchTable(
OutMIs[NewInsnID].add(State.MIs[OldInsnID]->getOperand(1));
else
llvm_unreachable("Expected Imm or CImm operand");
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_CopyConstantAsSImm(OutMIs["
<< NewInsnID << "], MIs[" << OldInsnID << "])\n");
break;
@@ -1034,15 +1075,17 @@ bool InstructionSelector::executeMatchTable(
int64_t NewInsnID = MatchTable[CurrentIdx++];
int64_t OldInsnID = MatchTable[CurrentIdx++];
assert(OutMIs[NewInsnID] && "Attempted to add to undefined instruction");
- assert(State.MIs[OldInsnID]->getOpcode() == TargetOpcode::G_FCONSTANT && "Expected G_FCONSTANT");
+ assert(State.MIs[OldInsnID]->getOpcode() == TargetOpcode::G_FCONSTANT &&
+ "Expected G_FCONSTANT");
if (State.MIs[OldInsnID]->getOperand(1).isFPImm())
OutMIs[NewInsnID].addFPImm(
State.MIs[OldInsnID]->getOperand(1).getFPImm());
else
llvm_unreachable("Expected FPImm operand");
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
- dbgs() << CurrentIdx << ": GIR_CopyFPConstantAsFPImm(OutMIs["
- << NewInsnID << "], MIs[" << OldInsnID << "])\n");
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
+ dbgs()
+ << CurrentIdx << ": GIR_CopyFPConstantAsFPImm(OutMIs["
+ << NewInsnID << "], MIs[" << OldInsnID << "])\n");
break;
}
@@ -1051,13 +1094,22 @@ bool InstructionSelector::executeMatchTable(
int64_t OldInsnID = MatchTable[CurrentIdx++];
int64_t RendererFnID = MatchTable[CurrentIdx++];
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_CustomRenderer(OutMIs["
<< InsnID << "], MIs[" << OldInsnID << "], "
<< RendererFnID << ")\n");
- (ISel.*ISelInfo.CustomRenderers[RendererFnID])(
- OutMIs[InsnID], *State.MIs[OldInsnID],
- -1); // Not a source operand of the old instruction.
+ (Exec.*ExecInfo.CustomRenderers[RendererFnID])(
+ OutMIs[InsnID], *State.MIs[OldInsnID],
+ -1); // Not a source operand of the old instruction.
+ break;
+ }
+ case GIR_CustomAction: {
+ int64_t FnID = MatchTable[CurrentIdx++];
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
+ dbgs() << CurrentIdx << ": GIR_CustomAction(FnID=" << FnID
+ << ")\n");
+ assert(FnID > GICXXCustomAction_Invalid && "Expected a valid FnID");
+ runCustomAction(FnID, State);
break;
}
case GIR_CustomOperandRenderer: {
@@ -1067,15 +1119,13 @@ bool InstructionSelector::executeMatchTable(
int64_t RendererFnID = MatchTable[CurrentIdx++];
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
- DEBUG_WITH_TYPE(
- TgtInstructionSelector::getName(),
- dbgs() << CurrentIdx << ": GIR_CustomOperandRenderer(OutMIs["
- << InsnID << "], MIs[" << OldInsnID << "]->getOperand("
- << OpIdx << "), "
- << RendererFnID << ")\n");
- (ISel.*ISelInfo.CustomRenderers[RendererFnID])(OutMIs[InsnID],
- *State.MIs[OldInsnID],
- OpIdx);
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
+ dbgs() << CurrentIdx
+ << ": GIR_CustomOperandRenderer(OutMIs[" << InsnID
+ << "], MIs[" << OldInsnID << "]->getOperand("
+ << OpIdx << "), " << RendererFnID << ")\n");
+ (Exec.*ExecInfo.CustomRenderers[RendererFnID])(
+ OutMIs[InsnID], *State.MIs[OldInsnID], OpIdx);
break;
}
case GIR_ConstrainOperandRC: {
@@ -1089,7 +1139,7 @@ bool InstructionSelector::executeMatchTable(
const TargetRegisterClass &RC = *TRI.getRegClass(RCEnum);
MachineOperand &MO = I.getOperand(OpIdx);
constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, RC, MO);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_ConstrainOperandRC(OutMIs["
<< InsnID << "], " << OpIdx << ", " << RCEnum
<< ")\n");
@@ -1101,7 +1151,7 @@ bool InstructionSelector::executeMatchTable(
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
constrainSelectedInstRegOperands(*OutMIs[InsnID].getInstr(), TII, TRI,
RBI);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx
<< ": GIR_ConstrainSelectedInstOperands(OutMIs["
<< InsnID << "])\n");
@@ -1112,18 +1162,18 @@ bool InstructionSelector::executeMatchTable(
int64_t InsnID = MatchTable[CurrentIdx++];
assert(OutMIs[InsnID] && "Attempted to add to undefined instruction");
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_MergeMemOperands(OutMIs["
<< InsnID << "]");
int64_t MergeInsnID = GIU_MergeMemOperands_EndOfList;
while ((MergeInsnID = MatchTable[CurrentIdx++]) !=
GIU_MergeMemOperands_EndOfList) {
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << ", MIs[" << MergeInsnID << "]");
for (const auto &MMO : State.MIs[MergeInsnID]->memoperands())
OutMIs[InsnID].addMemOperand(MMO);
}
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(), dbgs() << ")\n");
+ DEBUG_WITH_TYPE(TgtExecutor::getName(), dbgs() << ")\n");
break;
}
@@ -1132,7 +1182,7 @@ bool InstructionSelector::executeMatchTable(
assert(State.MIs[InsnID] &&
"Attempted to erase an undefined instruction");
State.MIs[InsnID]->eraseFromParent();
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_EraseFromParent(MIs["
<< InsnID << "])\n");
break;
@@ -1143,8 +1193,8 @@ bool InstructionSelector::executeMatchTable(
int64_t TypeID = MatchTable[CurrentIdx++];
State.TempRegisters[TempRegID] =
- MRI.createGenericVirtualRegister(ISelInfo.TypeObjects[TypeID]);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ MRI.createGenericVirtualRegister(ExecInfo.TypeObjects[TypeID]);
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": TempRegs[" << TempRegID
<< "] = GIR_MakeTempReg(" << TypeID << ")\n");
break;
@@ -1152,20 +1202,20 @@ bool InstructionSelector::executeMatchTable(
case GIR_Coverage: {
int64_t RuleID = MatchTable[CurrentIdx++];
- CoverageInfo.setCovered(RuleID);
+ assert(CoverageInfo);
+ CoverageInfo->setCovered(RuleID);
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
- dbgs()
- << CurrentIdx << ": GIR_Coverage(" << RuleID << ")");
+ DEBUG_WITH_TYPE(TgtExecutor::getName(), dbgs() << CurrentIdx
+ << ": GIR_Coverage("
+ << RuleID << ")");
break;
}
case GIR_Done:
- DEBUG_WITH_TYPE(TgtInstructionSelector::getName(),
+ DEBUG_WITH_TYPE(TgtExecutor::getName(),
dbgs() << CurrentIdx << ": GIR_Done\n");
propagateFlags(OutMIs);
return true;
-
default:
llvm_unreachable("Unexpected command");
}
@@ -1174,4 +1224,4 @@ bool InstructionSelector::executeMatchTable(
} // end namespace llvm
-#endif // LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTORIMPL_H
+#endif // LLVM_CODEGEN_GLOBALISEL_GIMATCHTABLEEXECUTORIMPL_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
index 035c5a08feef..eff87c5617d9 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GISelKnownBits.h
@@ -93,10 +93,10 @@ public:
Align computeKnownAlignment(Register R, unsigned Depth = 0);
// Observer API. No-op for non-caching implementation.
- void erasingInstr(MachineInstr &MI) override{};
- void createdInstr(MachineInstr &MI) override{};
- void changingInstr(MachineInstr &MI) override{};
- void changedInstr(MachineInstr &MI) override{};
+ void erasingInstr(MachineInstr &MI) override {}
+ void createdInstr(MachineInstr &MI) override {}
+ void changingInstr(MachineInstr &MI) override {}
+ void changedInstr(MachineInstr &MI) override {}
protected:
unsigned getMaxDepth() const { return MaxDepth; }
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 049efa672f5b..8484d970aff0 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -258,6 +258,106 @@ public:
}
};
+/// Represents overflowing binary operations.
+/// Only carry-out:
+/// G_UADDO, G_SADDO, G_USUBO, G_SSUBO, G_UMULO, G_SMULO
+/// Carry-in and carry-out:
+/// G_UADDE, G_SADDE, G_USUBE, G_SSUBE
+class GBinOpCarryOut : public GenericMachineInstr {
+public:
+ Register getDstReg() const { return getReg(0); }
+ Register getCarryOutReg() const { return getReg(1); }
+ MachineOperand &getLHS() { return getOperand(2); }
+ MachineOperand &getRHS() { return getOperand(3); }
+
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_UMULO:
+ case TargetOpcode::G_SMULO:
+ return true;
+ default:
+ return false;
+ }
+ }
+};
+
+/// Represents overflowing add/sub operations.
+/// Only carry-out:
+/// G_UADDO, G_SADDO, G_USUBO, G_SSUBO
+/// Carry-in and carry-out:
+/// G_UADDE, G_SADDE, G_USUBE, G_SSUBE
+class GAddSubCarryOut : public GBinOpCarryOut {
+public:
+ bool isAdd() const {
+ switch (getOpcode()) {
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SADDE:
+ return true;
+ default:
+ return false;
+ }
+ }
+ bool isSub() const { return !isAdd(); }
+
+ bool isSigned() const {
+ switch (getOpcode()) {
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_SSUBE:
+ return true;
+ default:
+ return false;
+ }
+ }
+ bool isUnsigned() const { return !isSigned(); }
+
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SSUBE:
+ return true;
+ default:
+ return false;
+ }
+ }
+};
+
+/// Represents overflowing add/sub operations that also consume a carry-in.
+/// G_UADDE, G_SADDE, G_USUBE, G_SSUBE
+class GAddSubCarryInOut : public GAddSubCarryOut {
+public:
+ Register getCarryInReg() const { return getReg(4); }
+
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SSUBE:
+ return true;
+ default:
+ return false;
+ }
+ }
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 67e884038b47..4d26af3e3e6d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -41,6 +41,8 @@ class CallLowering;
class Constant;
class ConstrainedFPIntrinsic;
class DataLayout;
+class DbgDeclareInst;
+class DbgValueInst;
class Instruction;
class MachineBasicBlock;
class MachineFunction;
@@ -67,7 +69,7 @@ public:
private:
/// Interface used to lower the everything related to calls.
- const CallLowering *CLI;
+ const CallLowering *CLI = nullptr;
/// This class contains the mapping between the Values to vreg related data.
class ValueToVRegInfo {
@@ -104,9 +106,7 @@ private:
return ValToVRegs.find(&V);
}
- bool contains(const Value &V) const {
- return ValToVRegs.find(&V) != ValToVRegs.end();
- }
+ bool contains(const Value &V) const { return ValToVRegs.contains(&V); }
void reset() {
ValToVRegs.clear();
@@ -117,7 +117,7 @@ private:
private:
VRegListT *insertVRegs(const Value &V) {
- assert(ValToVRegs.find(&V) == ValToVRegs.end() && "Value already exists");
+ assert(!ValToVRegs.contains(&V) && "Value already exists");
// We placement new using our fast allocator since we never try to free
// the vectors until translation is finished.
@@ -127,8 +127,7 @@ private:
}
OffsetListT *insertOffsets(const Value &V) {
- assert(TypeToOffsets.find(V.getType()) == TypeToOffsets.end() &&
- "Type already exists");
+ assert(!TypeToOffsets.contains(V.getType()) && "Type already exists");
auto *OffsetList = new (OffsetAlloc.Allocate()) OffsetListT();
TypeToOffsets[V.getType()] = OffsetList;
@@ -247,6 +246,20 @@ private:
bool translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder);
+ /// Returns the single livein physical register Arg was lowered to, if
+ /// possible.
+ std::optional<MCRegister> getArgPhysReg(Argument &Arg);
+
+ /// If DebugInst targets an Argument and its expression is an EntryValue,
+ /// lower it as an entry in the MF debug table.
+ bool translateIfEntryValueArgument(const DbgDeclareInst &DebugInst);
+
+ /// If DebugInst targets an Argument and its expression is an EntryValue,
+ /// lower as a DBG_VALUE targeting the corresponding livein register for that
+ /// Argument.
+ bool translateIfEntryValueArgument(const DbgValueInst &DebugInst,
+ MachineIRBuilder &MIRBuilder);
+
bool translateInlineAsm(const CallBase &CB, MachineIRBuilder &MIRBuilder);
/// Common code for translating normal calls or invokes.
@@ -556,24 +569,24 @@ private:
std::unique_ptr<MachineIRBuilder> EntryBuilder;
// The MachineFunction currently being translated.
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
/// MachineRegisterInfo used to create virtual registers.
MachineRegisterInfo *MRI = nullptr;
- const DataLayout *DL;
+ const DataLayout *DL = nullptr;
/// Current target configuration. Controls how the pass handles errors.
- const TargetPassConfig *TPC;
+ const TargetPassConfig *TPC = nullptr;
CodeGenOpt::Level OptLevel;
/// Current optimization remark emitter. Used to report failures.
std::unique_ptr<OptimizationRemarkEmitter> ORE;
- AAResults *AA;
- AssumptionCache *AC;
- const TargetLibraryInfo *LibInfo;
+ AAResults *AA = nullptr;
+ AssumptionCache *AC = nullptr;
+ const TargetLibraryInfo *LibInfo = nullptr;
FunctionLoweringInfo FuncInfo;
// True when either the Target Machine specifies no optimizations or the
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
index db1a5473e45a..1662136cfa94 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -7,427 +7,18 @@
//===----------------------------------------------------------------------===//
//
/// \file This file declares the API for the instruction selector.
-/// This class is responsible for selecting machine instructions.
-/// It's implemented by the target. It's used by the InstructionSelect pass.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H
#define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/IR/Function.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
-#include <bitset>
-#include <cstddef>
-#include <cstdint>
-#include <functional>
-#include <initializer_list>
-#include <optional>
-#include <vector>
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
namespace llvm {
-
-class BlockFrequencyInfo;
-class CodeGenCoverage;
-class MachineBasicBlock;
-class ProfileSummaryInfo;
-class APInt;
-class APFloat;
-class GISelKnownBits;
-class MachineInstr;
-class MachineInstrBuilder;
-class MachineFunction;
-class MachineOperand;
-class MachineRegisterInfo;
-class RegisterBankInfo;
-class TargetInstrInfo;
-class TargetRegisterInfo;
-
-/// Container class for CodeGen predicate results.
-/// This is convenient because std::bitset does not have a constructor
-/// with an initializer list of set bits.
-///
-/// Each InstructionSelector subclass should define a PredicateBitset class
-/// with:
-/// const unsigned MAX_SUBTARGET_PREDICATES = 192;
-/// using PredicateBitset = PredicateBitsetImpl<MAX_SUBTARGET_PREDICATES>;
-/// and updating the constant to suit the target. Tablegen provides a suitable
-/// definition for the predicates in use in <Target>GenGlobalISel.inc when
-/// GET_GLOBALISEL_PREDICATE_BITSET is defined.
-template <std::size_t MaxPredicates>
-class PredicateBitsetImpl : public std::bitset<MaxPredicates> {
+class InstructionSelector : public GIMatchTableExecutor {
public:
- // Cannot inherit constructors because it's not supported by VC++..
- PredicateBitsetImpl() = default;
-
- PredicateBitsetImpl(const std::bitset<MaxPredicates> &B)
- : std::bitset<MaxPredicates>(B) {}
-
- PredicateBitsetImpl(std::initializer_list<unsigned> Init) {
- for (auto I : Init)
- std::bitset<MaxPredicates>::set(I);
- }
-};
-
-enum {
- /// Begin a try-block to attempt a match and jump to OnFail if it is
- /// unsuccessful.
- /// - OnFail - The MatchTable entry at which to resume if the match fails.
- ///
- /// FIXME: This ought to take an argument indicating the number of try-blocks
- /// to exit on failure. It's usually one but the last match attempt of
- /// a block will need more. The (implemented) alternative is to tack a
- /// GIM_Reject on the end of each try-block which is simpler but
- /// requires an extra opcode and iteration in the interpreter on each
- /// failed match.
- GIM_Try,
-
- /// Switch over the opcode on the specified instruction
- /// - InsnID - Instruction ID
- /// - LowerBound - numerically minimum opcode supported
- /// - UpperBound - numerically maximum + 1 opcode supported
- /// - Default - failure jump target
- /// - JumpTable... - (UpperBound - LowerBound) (at least 2) jump targets
- GIM_SwitchOpcode,
-
- /// Switch over the LLT on the specified instruction operand
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- /// - LowerBound - numerically minimum Type ID supported
- /// - UpperBound - numerically maximum + 1 Type ID supported
- /// - Default - failure jump target
- /// - JumpTable... - (UpperBound - LowerBound) (at least 2) jump targets
- GIM_SwitchType,
-
- /// Record the specified instruction
- /// - NewInsnID - Instruction ID to define
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- GIM_RecordInsn,
-
- /// Check the feature bits
- /// - Expected features
- GIM_CheckFeatures,
-
- /// Check the opcode on the specified instruction
- /// - InsnID - Instruction ID
- /// - Expected opcode
- GIM_CheckOpcode,
-
- /// Check the opcode on the specified instruction, checking 2 acceptable
- /// alternatives.
- /// - InsnID - Instruction ID
- /// - Expected opcode
- /// - Alternative expected opcode
- GIM_CheckOpcodeIsEither,
-
- /// Check the instruction has the right number of operands
- /// - InsnID - Instruction ID
- /// - Expected number of operands
- GIM_CheckNumOperands,
- /// Check an immediate predicate on the specified instruction
- /// - InsnID - Instruction ID
- /// - The predicate to test
- GIM_CheckI64ImmPredicate,
- /// Check an immediate predicate on the specified instruction via an APInt.
- /// - InsnID - Instruction ID
- /// - The predicate to test
- GIM_CheckAPIntImmPredicate,
- /// Check a floating point immediate predicate on the specified instruction.
- /// - InsnID - Instruction ID
- /// - The predicate to test
- GIM_CheckAPFloatImmPredicate,
- /// Check an immediate predicate on the specified instruction
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- /// - The predicate to test
- GIM_CheckImmOperandPredicate,
- /// Check a memory operation has the specified atomic ordering.
- /// - InsnID - Instruction ID
- /// - Ordering - The AtomicOrdering value
- GIM_CheckAtomicOrdering,
- GIM_CheckAtomicOrderingOrStrongerThan,
- GIM_CheckAtomicOrderingWeakerThan,
- /// Check the size of the memory access for the given machine memory operand.
- /// - InsnID - Instruction ID
- /// - MMOIdx - MMO index
- /// - Size - The size in bytes of the memory access
- GIM_CheckMemorySizeEqualTo,
-
- /// Check the address space of the memory access for the given machine memory
- /// operand.
- /// - InsnID - Instruction ID
- /// - MMOIdx - MMO index
- /// - NumAddrSpace - Number of valid address spaces
- /// - AddrSpaceN - An allowed space of the memory access
- /// - AddrSpaceN+1 ...
- GIM_CheckMemoryAddressSpace,
-
- /// Check the minimum alignment of the memory access for the given machine
- /// memory operand.
- /// - InsnID - Instruction ID
- /// - MMOIdx - MMO index
- /// - MinAlign - Minimum acceptable alignment
- GIM_CheckMemoryAlignment,
-
- /// Check the size of the memory access for the given machine memory operand
- /// against the size of an operand.
- /// - InsnID - Instruction ID
- /// - MMOIdx - MMO index
- /// - OpIdx - The operand index to compare the MMO against
- GIM_CheckMemorySizeEqualToLLT,
- GIM_CheckMemorySizeLessThanLLT,
- GIM_CheckMemorySizeGreaterThanLLT,
-
- /// Check if this is a vector that can be treated as a vector splat
- /// constant. This is valid for both G_BUILD_VECTOR as well as
- /// G_BUILD_VECTOR_TRUNC. For AllOnes refers to individual bits, so a -1
- /// element.
- /// - InsnID - Instruction ID
- GIM_CheckIsBuildVectorAllOnes,
- GIM_CheckIsBuildVectorAllZeros,
-
- /// Check a generic C++ instruction predicate
- /// - InsnID - Instruction ID
- /// - PredicateID - The ID of the predicate function to call
- GIM_CheckCxxInsnPredicate,
-
- /// Check if there's no use of the first result.
- /// - InsnID - Instruction ID
- GIM_CheckHasNoUse,
-
- /// Check the type for the specified operand
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- /// - Expected type
- GIM_CheckType,
- /// Check the type of a pointer to any address space.
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- /// - SizeInBits - The size of the pointer value in bits.
- GIM_CheckPointerToAny,
- /// Check the register bank for the specified operand
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- /// - Expected register bank (specified as a register class)
- GIM_CheckRegBankForClass,
-
- /// Check the operand matches a complex predicate
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- /// - RendererID - The renderer to hold the result
- /// - Complex predicate ID
- GIM_CheckComplexPattern,
-
- /// Check the operand is a specific integer
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- /// - Expected integer
- GIM_CheckConstantInt,
- /// Check the operand is a specific literal integer (i.e. MO.isImm() or
- /// MO.isCImm() is true).
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- /// - Expected integer
- GIM_CheckLiteralInt,
- /// Check the operand is a specific intrinsic ID
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- /// - Expected Intrinsic ID
- GIM_CheckIntrinsicID,
-
- /// Check the operand is a specific predicate
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- /// - Expected predicate
- GIM_CheckCmpPredicate,
-
- /// Check the specified operand is an MBB
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- GIM_CheckIsMBB,
-
- /// Check the specified operand is an Imm
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- GIM_CheckIsImm,
-
- /// Check if the specified operand is safe to fold into the current
- /// instruction.
- /// - InsnID - Instruction ID
- GIM_CheckIsSafeToFold,
-
- /// Check the specified operands are identical.
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- /// - OtherInsnID - Other instruction ID
- /// - OtherOpIdx - Other operand index
- GIM_CheckIsSameOperand,
-
- /// Predicates with 'let PredicateCodeUsesOperands = 1' need to examine some
- /// named operands that will be recorded in RecordedOperands. Names of these
- /// operands are referenced in predicate argument list. Emitter determines
- /// StoreIdx(corresponds to the order in which names appear in argument list).
- /// - InsnID - Instruction ID
- /// - OpIdx - Operand index
- /// - StoreIdx - Store location in RecordedOperands.
- GIM_RecordNamedOperand,
-
- /// Fail the current try-block, or completely fail to match if there is no
- /// current try-block.
- GIM_Reject,
-
- //=== Renderers ===
-
- /// Mutate an instruction
- /// - NewInsnID - Instruction ID to define
- /// - OldInsnID - Instruction ID to mutate
- /// - NewOpcode - The new opcode to use
- GIR_MutateOpcode,
-
- /// Build a new instruction
- /// - InsnID - Instruction ID to define
- /// - Opcode - The new opcode to use
- GIR_BuildMI,
-
- /// Copy an operand to the specified instruction
- /// - NewInsnID - Instruction ID to modify
- /// - OldInsnID - Instruction ID to copy from
- /// - OpIdx - The operand to copy
- GIR_Copy,
-
- /// Copy an operand to the specified instruction or add a zero register if the
- /// operand is a zero immediate.
- /// - NewInsnID - Instruction ID to modify
- /// - OldInsnID - Instruction ID to copy from
- /// - OpIdx - The operand to copy
- /// - ZeroReg - The zero register to use
- GIR_CopyOrAddZeroReg,
- /// Copy an operand to the specified instruction
- /// - NewInsnID - Instruction ID to modify
- /// - OldInsnID - Instruction ID to copy from
- /// - OpIdx - The operand to copy
- /// - SubRegIdx - The subregister to copy
- GIR_CopySubReg,
-
- /// Add an implicit register def to the specified instruction
- /// - InsnID - Instruction ID to modify
- /// - RegNum - The register to add
- GIR_AddImplicitDef,
- /// Add an implicit register use to the specified instruction
- /// - InsnID - Instruction ID to modify
- /// - RegNum - The register to add
- GIR_AddImplicitUse,
- /// Add an register to the specified instruction
- /// - InsnID - Instruction ID to modify
- /// - RegNum - The register to add
- GIR_AddRegister,
-
- /// Add a temporary register to the specified instruction
- /// - InsnID - Instruction ID to modify
- /// - TempRegID - The temporary register ID to add
- /// - TempRegFlags - The register flags to set
- GIR_AddTempRegister,
-
- /// Add a temporary register to the specified instruction
- /// - InsnID - Instruction ID to modify
- /// - TempRegID - The temporary register ID to add
- /// - TempRegFlags - The register flags to set
- /// - SubRegIndex - The subregister index to set
- GIR_AddTempSubRegister,
-
- /// Add an immediate to the specified instruction
- /// - InsnID - Instruction ID to modify
- /// - Imm - The immediate to add
- GIR_AddImm,
- /// Render complex operands to the specified instruction
- /// - InsnID - Instruction ID to modify
- /// - RendererID - The renderer to call
- GIR_ComplexRenderer,
-
- /// Render sub-operands of complex operands to the specified instruction
- /// - InsnID - Instruction ID to modify
- /// - RendererID - The renderer to call
- /// - RenderOpID - The suboperand to render.
- GIR_ComplexSubOperandRenderer,
- /// Render operands to the specified instruction using a custom function
- /// - InsnID - Instruction ID to modify
- /// - OldInsnID - Instruction ID to get the matched operand from
- /// - RendererFnID - Custom renderer function to call
- GIR_CustomRenderer,
-
- /// Render operands to the specified instruction using a custom function,
- /// reading from a specific operand.
- /// - InsnID - Instruction ID to modify
- /// - OldInsnID - Instruction ID to get the matched operand from
- /// - OpIdx - Operand index in OldInsnID the render function should read from..
- /// - RendererFnID - Custom renderer function to call
- GIR_CustomOperandRenderer,
-
- /// Render a G_CONSTANT operator as a sign-extended immediate.
- /// - NewInsnID - Instruction ID to modify
- /// - OldInsnID - Instruction ID to copy from
- /// The operand index is implicitly 1.
- GIR_CopyConstantAsSImm,
-
- /// Render a G_FCONSTANT operator as a sign-extended immediate.
- /// - NewInsnID - Instruction ID to modify
- /// - OldInsnID - Instruction ID to copy from
- /// The operand index is implicitly 1.
- GIR_CopyFConstantAsFPImm,
-
- /// Constrain an instruction operand to a register class.
- /// - InsnID - Instruction ID to modify
- /// - OpIdx - Operand index
- /// - RCEnum - Register class enumeration value
- GIR_ConstrainOperandRC,
-
- /// Constrain an instructions operands according to the instruction
- /// description.
- /// - InsnID - Instruction ID to modify
- GIR_ConstrainSelectedInstOperands,
-
- /// Merge all memory operands into instruction.
- /// - InsnID - Instruction ID to modify
- /// - MergeInsnID... - One or more Instruction ID to merge into the result.
- /// - GIU_MergeMemOperands_EndOfList - Terminates the list of instructions to
- /// merge.
- GIR_MergeMemOperands,
-
- /// Erase from parent.
- /// - InsnID - Instruction ID to erase
- GIR_EraseFromParent,
-
- /// Create a new temporary register that's not constrained.
- /// - TempRegID - The temporary register ID to initialize.
- /// - Expected type
- GIR_MakeTempReg,
-
- /// A successful emission
- GIR_Done,
-
- /// Increment the rule coverage counter.
- /// - RuleID - The ID of the rule that was covered.
- GIR_Coverage,
-
- /// Keeping track of the number of the GI opcodes. Must be the last entry.
- GIU_NumOpcodes,
-};
-
-enum {
- /// Indicates the end of the variable-length MergeInsnID list in a
- /// GIR_MergeMemOperands opcode.
- GIU_MergeMemOperands_EndOfList = -1,
-};
-
-/// Provides the logic to select generic machine instructions.
-class InstructionSelector {
-public:
- virtual ~InstructionSelector() = default;
+ virtual ~InstructionSelector();
/// Select the (possibly generic) instruction \p I to only use target-specific
/// opcodes. It is OK to insert multiple instructions, but they cannot be
@@ -440,135 +31,7 @@ public:
/// for I in all mutated/inserted instructions:
/// !isPreISelGenericOpcode(I.getOpcode())
virtual bool select(MachineInstr &I) = 0;
-
- CodeGenCoverage *CoverageInfo = nullptr;
- GISelKnownBits *KnownBits = nullptr;
- MachineFunction *MF = nullptr;
- ProfileSummaryInfo *PSI = nullptr;
- BlockFrequencyInfo *BFI = nullptr;
- // For some predicates, we need to track the current MBB.
- MachineBasicBlock *CurMBB = nullptr;
-
- virtual void setupGeneratedPerFunctionState(MachineFunction &MF) {
- llvm_unreachable("TableGen should have emitted implementation");
- }
-
- /// Setup per-MF selector state.
- virtual void setupMF(MachineFunction &mf, GISelKnownBits *KB,
- CodeGenCoverage &covinfo, ProfileSummaryInfo *psi,
- BlockFrequencyInfo *bfi) {
- CoverageInfo = &covinfo;
- KnownBits = KB;
- MF = &mf;
- PSI = psi;
- BFI = bfi;
- CurMBB = nullptr;
- setupGeneratedPerFunctionState(mf);
- }
-
-protected:
- using ComplexRendererFns =
- std::optional<SmallVector<std::function<void(MachineInstrBuilder &)>, 4>>;
- using RecordedMIVector = SmallVector<MachineInstr *, 4>;
- using NewMIVector = SmallVector<MachineInstrBuilder, 4>;
-
- struct MatcherState {
- std::vector<ComplexRendererFns::value_type> Renderers;
- RecordedMIVector MIs;
- DenseMap<unsigned, unsigned> TempRegisters;
- /// Named operands that predicate with 'let PredicateCodeUsesOperands = 1'
- /// referenced in its argument list. Operands are inserted at index set by
- /// emitter, it corresponds to the order in which names appear in argument
- /// list. Currently such predicates don't have more then 3 arguments.
- std::array<const MachineOperand *, 3> RecordedOperands;
-
- MatcherState(unsigned MaxRenderers);
- };
-
- bool shouldOptForSize(const MachineFunction *MF) const {
- const auto &F = MF->getFunction();
- return F.hasOptSize() || F.hasMinSize() ||
- (PSI && BFI && CurMBB && llvm::shouldOptForSize(*CurMBB, PSI, BFI));
- }
-
-public:
- template <class PredicateBitset, class ComplexMatcherMemFn,
- class CustomRendererFn>
- struct ISelInfoTy {
- ISelInfoTy(const LLT *TypeObjects, size_t NumTypeObjects,
- const PredicateBitset *FeatureBitsets,
- const ComplexMatcherMemFn *ComplexPredicates,
- const CustomRendererFn *CustomRenderers)
- : TypeObjects(TypeObjects),
- FeatureBitsets(FeatureBitsets),
- ComplexPredicates(ComplexPredicates),
- CustomRenderers(CustomRenderers) {
-
- for (size_t I = 0; I < NumTypeObjects; ++I)
- TypeIDMap[TypeObjects[I]] = I;
- }
- const LLT *TypeObjects;
- const PredicateBitset *FeatureBitsets;
- const ComplexMatcherMemFn *ComplexPredicates;
- const CustomRendererFn *CustomRenderers;
-
- SmallDenseMap<LLT, unsigned, 64> TypeIDMap;
- };
-
-protected:
- InstructionSelector();
-
- /// Execute a given matcher table and return true if the match was successful
- /// and false otherwise.
- template <class TgtInstructionSelector, class PredicateBitset,
- class ComplexMatcherMemFn, class CustomRendererFn>
- bool executeMatchTable(
- TgtInstructionSelector &ISel, NewMIVector &OutMIs, MatcherState &State,
- const ISelInfoTy<PredicateBitset, ComplexMatcherMemFn, CustomRendererFn>
- &ISelInfo,
- const int64_t *MatchTable, const TargetInstrInfo &TII,
- MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
- const RegisterBankInfo &RBI, const PredicateBitset &AvailableFeatures,
- CodeGenCoverage &CoverageInfo) const;
-
- virtual const int64_t *getMatchTable() const {
- llvm_unreachable("Should have been overridden by tablegen if used");
- }
-
- virtual bool testImmPredicate_I64(unsigned, int64_t) const {
- llvm_unreachable(
- "Subclasses must override this with a tablegen-erated function");
- }
- virtual bool testImmPredicate_APInt(unsigned, const APInt &) const {
- llvm_unreachable(
- "Subclasses must override this with a tablegen-erated function");
- }
- virtual bool testImmPredicate_APFloat(unsigned, const APFloat &) const {
- llvm_unreachable(
- "Subclasses must override this with a tablegen-erated function");
- }
- virtual bool testMIPredicate_MI(
- unsigned, const MachineInstr &,
- const std::array<const MachineOperand *, 3> &Operands) const {
- llvm_unreachable(
- "Subclasses must override this with a tablegen-erated function");
- }
-
- bool isOperandImmEqual(const MachineOperand &MO, int64_t Value,
- const MachineRegisterInfo &MRI) const;
-
- /// Return true if the specified operand is a G_PTR_ADD with a G_CONSTANT on the
- /// right-hand side. GlobalISel's separation of pointer and integer types
- /// means that we don't need to worry about G_OR with equivalent semantics.
- bool isBaseWithConstantOffset(const MachineOperand &Root,
- const MachineRegisterInfo &MRI) const;
-
- /// Return true if MI can obviously be folded into IntoMI.
- /// MI and IntoMI do not need to be in the same basic blocks, but MI must
- /// preceed IntoMI.
- bool isObviouslySafeToFold(MachineInstr &MI, MachineInstr &IntoMI) const;
};
+} // namespace llvm
-} // end namespace llvm
-
-#endif // LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H
+#endif
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
index 3cacdc99dbf8..08233dba2041 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h
@@ -16,8 +16,8 @@
#define LLVM_CODEGEN_GLOBALISEL_LEGACYLEGALIZERINFO_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/TargetOpcodes.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include <unordered_map>
namespace llvm {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
index 7884b3f2ea6e..9f9e435b9ce2 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
@@ -22,6 +22,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -75,7 +76,7 @@ public:
legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
ArrayRef<GISelChangeObserver *> AuxObservers,
LostDebugLocObserver &LocObserver,
- MachineIRBuilder &MIRBuilder);
+ MachineIRBuilder &MIRBuilder, GISelKnownBits *KB);
};
} // End namespace llvm.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index a019bc9876bd..a568edd0e640 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -21,6 +21,7 @@
#define LLVM_CODEGEN_GLOBALISEL_LEGALIZERHELPER_H
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetOpcodes.h"
@@ -56,6 +57,7 @@ private:
MachineRegisterInfo &MRI;
const LegalizerInfo &LI;
const TargetLowering &TLI;
+ GISelKnownBits *KB;
public:
enum LegalizeResult {
@@ -74,11 +76,13 @@ public:
/// Expose LegalizerInfo so the clients can re-use.
const LegalizerInfo &getLegalizerInfo() const { return LI; }
const TargetLowering &getTargetLowering() const { return TLI; }
+ GISelKnownBits *getKnownBits() const { return KB; }
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer,
MachineIRBuilder &B);
LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
- GISelChangeObserver &Observer, MachineIRBuilder &B);
+ GISelChangeObserver &Observer, MachineIRBuilder &B,
+ GISelKnownBits *KB = nullptr);
/// Replace \p MI by a sequence of legal instructions that can implement the
/// same operation. Note that this means \p MI may be deleted, so any iterator
@@ -324,6 +328,9 @@ public:
unsigned TypeIdx,
LLT NarrowTy);
+ /// Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
+ LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI);
+
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx,
LLT NarrowTy);
@@ -350,6 +357,7 @@ public:
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+ LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
/// Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
@@ -359,6 +367,7 @@ public:
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
LLT CastTy);
+ LegalizeResult lowerFConstant(MachineInstr &MI);
LegalizeResult lowerBitcast(MachineInstr &MI);
LegalizeResult lowerLoad(GAnyLoad &MI);
LegalizeResult lowerStore(GStore &MI);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
index 50b11a4920bb..d38ff71b1589 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -17,12 +17,12 @@
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/LegacyLegalizerInfo.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include <cassert>
#include <cstdint>
#include <tuple>
@@ -941,6 +941,28 @@ public:
changeElementTo(typeIdx(TypeIdx), Ty));
}
+ /// Ensure the vector size is at least as wide as VectorSize by promoting the
+ /// element.
+ LegalizeRuleSet &widenVectorEltsToVectorMinSize(unsigned TypeIdx,
+ unsigned VectorSize) {
+ using namespace LegalityPredicates;
+ using namespace LegalizeMutations;
+ return actionIf(
+ LegalizeAction::WidenScalar,
+ [=](const LegalityQuery &Query) {
+ const LLT VecTy = Query.Types[TypeIdx];
+ return VecTy.isVector() && !VecTy.isScalable() &&
+ VecTy.getSizeInBits() < VectorSize;
+ },
+ [=](const LegalityQuery &Query) {
+ const LLT VecTy = Query.Types[TypeIdx];
+ unsigned NumElts = VecTy.getNumElements();
+ unsigned MinSize = VectorSize / NumElts;
+ LLT NewTy = LLT::fixed_vector(NumElts, LLT::scalar(MinSize));
+ return std::make_pair(TypeIdx, NewTy);
+ });
+ }
+
/// Ensure the scalar is at least as wide as Ty.
LegalizeRuleSet &minScalar(unsigned TypeIdx, const LLT Ty) {
using namespace LegalityPredicates;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
index 6efe7c7c9bbd..5562e76b67f6 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LoadStoreOpt.h
@@ -15,6 +15,7 @@
#define LLVM_CODEGEN_GLOBALISEL_LOADSTOREOPT_H
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -68,11 +69,11 @@ private:
/// on the given MachineFunction.
std::function<bool(const MachineFunction &)> DoNotRunPass;
- MachineRegisterInfo *MRI;
- const TargetLowering *TLI;
- MachineFunction *MF;
- AliasAnalysis *AA;
- const LegalizerInfo *LI;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetLowering *TLI = nullptr;
+ MachineFunction *MF = nullptr;
+ AliasAnalysis *AA = nullptr;
+ const LegalizerInfo *LI = nullptr;
MachineIRBuilder Builder;
@@ -131,6 +132,10 @@ private:
bool mergeBlockStores(MachineBasicBlock &MBB);
bool mergeFunctionStores(MachineFunction &MF);
+ bool mergeTruncStore(GStore &StoreMI,
+ SmallPtrSetImpl<GStore *> &DeletedStores);
+ bool mergeTruncStoresBlock(MachineBasicBlock &MBB);
+
/// Initialize some target-specific data structures for the store merging
/// optimization. \p AddrSpace indicates which address space to use when
/// probing the legalizer info for legal stores.
@@ -140,7 +145,7 @@ private:
/// that bit's value is legal. E.g. if bit 64 is set, then 64 bit scalar
/// stores are legal.
DenseMap<unsigned, BitVector> LegalStoreSizes;
- bool IsPreLegalizer;
+ bool IsPreLegalizer = false;
/// Contains instructions to be erased at the end of a block scan.
SmallSet<MachineInstr *, 16> InstsToErase;
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
index 9ea0d095eeb1..b1fcdd207a60 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
@@ -51,9 +51,9 @@ private:
/// MRI contains all the register class/bank information that this
/// pass uses and updates.
- MachineRegisterInfo *MRI;
+ MachineRegisterInfo *MRI = nullptr;
/// TTI used for getting remat costs for instructions.
- TargetTransformInfo *TTI;
+ TargetTransformInfo *TTI = nullptr;
/// Check if \p MOUse is used in the same basic block as \p Def.
/// If the use is in the same block, we say it is local.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index e5b48d9d52c0..5341b57477ce 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -284,6 +284,10 @@ public:
return getMF().getFunction().getParent()->getDataLayout();
}
+ LLVMContext &getContext() const {
+ return getMF().getFunction().getContext();
+ }
+
/// Getter for DebugLoc
const DebugLoc &getDL() { return State.DL; }
@@ -458,6 +462,17 @@ public:
/// \return a MachineInstrBuilder for the newly created instruction.
MachineInstrBuilder buildGlobalValue(const DstOp &Res, const GlobalValue *GV);
+ /// Build and insert \p Res = G_CONSTANT_POOL \p Idx
+ ///
+ /// G_CONSTANT_POOL materializes the address of an object in the constant
+ /// pool.
+ ///
+ /// \pre setBasicBlock or setMI must have been called.
+ /// \pre \p Res must be a generic virtual register with pointer type.
+ ///
+ /// \return a MachineInstrBuilder for the newly created instruction.
+ MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx);
+
/// Build and insert \p Res = G_PTR_ADD \p Op0, \p Op1
///
/// G_PTR_ADD adds \p Op1 addressible units to the pointer specified by \p Op0,
@@ -1166,6 +1181,13 @@ public:
const SrcOp &Op0, const SrcOp &Op1,
std::optional<unsigned> Flags = std::nullopt);
+ /// Build and insert a \p Res = G_IS_FPCLASS \p Pred\p Src, \p Mask
+ MachineInstrBuilder buildIsFPClass(const DstOp &Res, const SrcOp &Src,
+ unsigned Mask) {
+ return buildInstr(TargetOpcode::G_IS_FPCLASS, {Res},
+ {Src, SrcOp(static_cast<int64_t>(Mask))});
+ }
+
/// Build and insert a \p Res = G_SELECT \p Tst, \p Op0, \p Op1
///
/// \pre setBasicBlock or setMI must have been called.
@@ -1810,6 +1832,20 @@ public:
return buildInstr(TargetOpcode::G_FPOW, {Dst}, {Src0, Src1}, Flags);
}
+ /// Build and insert \p Dst = G_FLDEXP \p Src0, \p Src1
+ MachineInstrBuilder
+ buildFLdexp(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1,
+ std::optional<unsigned> Flags = std::nullopt) {
+ return buildInstr(TargetOpcode::G_FLDEXP, {Dst}, {Src0, Src1}, Flags);
+ }
+
+ /// Build and insert \p Fract, \p Exp = G_FFREXP \p Src
+ MachineInstrBuilder
+ buildFFrexp(const DstOp &Fract, const DstOp &Exp, const SrcOp &Src,
+ std::optional<unsigned> Flags = std::nullopt) {
+ return buildInstr(TargetOpcode::G_FFREXP, {Fract, Exp}, {Src}, Flags);
+ }
+
/// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1) {
@@ -1836,6 +1872,12 @@ public:
return buildInstr(TargetOpcode::G_FPTOSI, {Dst}, {Src0});
}
+ /// Build and insert \p Dst = G_FRINT \p Src0, \p Src1
+ MachineInstrBuilder buildFRint(const DstOp &Dst, const SrcOp &Src0,
+ std::optional<unsigned> Flags = std::nullopt) {
+ return buildInstr(TargetOpcode::G_FRINT, {Dst}, {Src0}, Flags);
+ }
+
/// Build and insert \p Res = G_SMIN \p Op0, \p Op1
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0,
const SrcOp &Src1) {
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
index 8ca15bdae1de..609326e28e30 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
@@ -617,7 +617,7 @@ protected:
public:
/// Create a RegBankSelect pass with the specified \p RunningMode.
- RegBankSelect(Mode RunningMode = Fast);
+ RegBankSelect(char &PassID = ID, Mode RunningMode = Fast);
StringRef getPassName() const override { return "RegBankSelect"; }
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index c9941afc8013..d5c1fd8d0d51 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -17,11 +17,11 @@
#include "GISelWorkList.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include <cstdint>
namespace llvm {
diff --git a/llvm/include/llvm/CodeGen/HardwareLoops.h b/llvm/include/llvm/CodeGen/HardwareLoops.h
new file mode 100644
index 000000000000..c7b6e0f5ae56
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/HardwareLoops.h
@@ -0,0 +1,76 @@
+//===- HardwareLoops.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Defines an IR pass for the creation of hardware loops.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_HARDWARELOOPS_H
+#define LLVM_CODEGEN_HARDWARELOOPS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct HardwareLoopOptions {
+ std::optional<unsigned> Decrement;
+ std::optional<unsigned> Bitwidth;
+ std::optional<bool> Force;
+ std::optional<bool> ForcePhi;
+ std::optional<bool> ForceNested;
+ std::optional<bool> ForceGuard;
+
+ HardwareLoopOptions &setDecrement(unsigned Count) {
+ Decrement = Count;
+ return *this;
+ }
+ HardwareLoopOptions &setCounterBitwidth(unsigned Width) {
+ Bitwidth = Width;
+ return *this;
+ }
+ HardwareLoopOptions &setForce(bool Force) {
+ this->Force = Force;
+ return *this;
+ }
+ HardwareLoopOptions &setForcePhi(bool Force) {
+ ForcePhi = Force;
+ return *this;
+ }
+ HardwareLoopOptions &setForceNested(bool Force) {
+ ForceNested = Force;
+ return *this;
+ }
+ HardwareLoopOptions &setForceGuard(bool Force) {
+ ForceGuard = Force;
+ return *this;
+ }
+ bool getForcePhi() const {
+ return ForcePhi.has_value() && ForcePhi.value();
+ }
+ bool getForceNested() const {
+ return ForceNested.has_value() && ForceNested.value();
+ }
+ bool getForceGuard() const {
+ return ForceGuard.has_value() && ForceGuard.value();
+ }
+};
+
+class HardwareLoopsPass : public PassInfoMixin<HardwareLoopsPass> {
+ HardwareLoopOptions Opts;
+
+public:
+ explicit HardwareLoopsPass(HardwareLoopOptions Opts = {})
+ : Opts(Opts) { }
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_HARDWARELOOPS_H
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index 157247dfba98..45ff99b9c973 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -263,9 +263,9 @@ enum NodeType {
/// These nodes take two operands of the same value type, and produce two
/// results. The first result is the normal add or sub result, the second
/// result is the carry flag result.
- /// FIXME: These nodes are deprecated in favor of ADDCARRY and SUBCARRY.
+ /// FIXME: These nodes are deprecated in favor of UADDO_CARRY and USUBO_CARRY.
/// They are kept around for now to provide a smooth transition path
- /// toward the use of ADDCARRY/SUBCARRY and will eventually be removed.
+ /// toward the use of UADDO_CARRY/USUBO_CARRY and will eventually be removed.
ADDC,
SUBC,
@@ -297,11 +297,11 @@ enum NodeType {
/// it, as the carry is a regular value rather than a glue, which allows
/// further optimisation.
///
- /// These opcodes are different from [US]{ADD,SUB}O in that ADDCARRY/SUBCARRY
- /// consume and produce a carry/borrow, whereas [US]{ADD,SUB}O produce an
- /// overflow.
- ADDCARRY,
- SUBCARRY,
+ /// These opcodes are different from [US]{ADD,SUB}O in that
+ /// U{ADD,SUB}O_CARRY consume and produce a carry/borrow, whereas
+ /// [US]{ADD,SUB}O produce an overflow.
+ UADDO_CARRY,
+ USUBO_CARRY,
/// Carry-using overflow-aware nodes for multiple precision addition and
/// subtraction. These nodes take three operands: The first two are normal lhs
@@ -411,6 +411,7 @@ enum NodeType {
STRICT_FSQRT,
STRICT_FPOW,
STRICT_FPOWI,
+ STRICT_FLDEXP,
STRICT_FSIN,
STRICT_FCOS,
STRICT_FEXP,
@@ -571,6 +572,19 @@ enum NodeType {
/// vector, but not the other way around.
EXTRACT_SUBVECTOR,
+ /// VECTOR_DEINTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and
+ /// output vectors having the same type. The first output contains the even
+ /// indices from CONCAT_VECTORS(VEC1, VEC2), with the second output
+ /// containing the odd indices. The relative order of elements within an
+ /// output match that of the concatenated input.
+ VECTOR_DEINTERLEAVE,
+
+ /// VECTOR_INTERLEAVE(VEC1, VEC2) - Returns two vectors with all input and
+ /// output vectors having the same type. The first output contains the
+ /// result of interleaving the low half of CONCAT_VECTORS(VEC1, VEC2), with
+ /// the second output containing the result of interleaving the high half.
+ VECTOR_INTERLEAVE,
+
/// VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR,
/// whose elements are shuffled using the following algorithm:
/// RESULT[i] = VECTOR[VECTOR.ElementCount - 1 - i]
@@ -739,7 +753,7 @@ enum NodeType {
/// op #2 is a boolean indicating if there is an incoming carry. This
/// operator checks the result of "LHS - RHS - Carry", and can be used to
/// compare two wide integers:
- /// (setcccarry lhshi rhshi (subcarry lhslo rhslo) cc).
+ /// (setcccarry lhshi rhshi (usubo_carry lhslo rhslo) cc).
/// Only valid for integers.
SETCCCARRY,
@@ -913,8 +927,16 @@ enum NodeType {
FCBRT,
FSIN,
FCOS,
- FPOWI,
FPOW,
+ FPOWI,
+ /// FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
+ FLDEXP,
+
+ /// FFREXP - frexp, extract fractional and exponent component of a
+ /// floating-point value. Returns the two components as separate return
+ /// values.
+ FFREXP,
+
FLOG,
FLOG2,
FLOG10,
@@ -958,6 +980,30 @@ enum NodeType {
/// FSINCOS - Compute both fsin and fcos as a single operation.
FSINCOS,
+ /// Gets the current floating-point environment. The first operand is a token
+ /// chain. The results are FP environment, represented by an integer value,
+ /// and a token chain.
+ GET_FPENV,
+
+ /// Sets the current floating-point environment. The first operand is a token
+ /// chain, the second is FP environment, represented by an integer value. The
+ /// result is a token chain.
+ SET_FPENV,
+
+ /// Set floating-point environment to default state. The first operand and the
+ /// result are token chains.
+ RESET_FPENV,
+
+ /// Gets the current floating-point environment. The first operand is a token
+ /// chain, the second is a pointer to memory, where FP environment is stored
+ /// to. The result is a token chain.
+ GET_FPENV_MEM,
+
+ /// Sets the current floating point environment. The first operand is a token
+ /// chain, the second is a pointer to memory, where FP environment is loaded
+ /// from. The result is a token chain.
+ SET_FPENV_MEM,
+
/// LOAD and STORE have token chains as their first operand, then the same
/// operands as an LLVM load/store instruction, then an offset node that
/// is added / subtracted from the base pointer to form the address (for
@@ -1278,6 +1324,10 @@ enum NodeType {
/// FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
VECREDUCE_FMAX,
VECREDUCE_FMIN,
+ /// FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the
+ /// llvm.minimum and llvm.maximum semantics.
+ VECREDUCE_FMAXIMUM,
+ VECREDUCE_FMINIMUM,
/// Integer reductions may have a result type larger than the vector element
/// type. However, the reduction is performed using the vector element type
/// and the value in the top bits is unspecified.
@@ -1346,6 +1396,12 @@ std::optional<unsigned> getVPMaskIdx(unsigned Opcode);
/// The operand position of the explicit vector length parameter.
std::optional<unsigned> getVPExplicitVectorLengthIdx(unsigned Opcode);
+/// Translate this VP Opcode to its corresponding non-VP Opcode.
+std::optional<unsigned> getBaseOpcodeForVP(unsigned Opcode, bool hasFPExcept);
+
+/// Translate this non-VP Opcode to its corresponding VP Opcode.
+unsigned getVPForBaseOpcode(unsigned Opcode);
+
//===--------------------------------------------------------------------===//
/// MemIndexedMode enum - This enum defines the load / store indexed
/// addressing modes.
@@ -1495,6 +1551,12 @@ inline bool isExtOpcode(unsigned Opcode) {
Opcode == ISD::SIGN_EXTEND;
}
+inline bool isExtVecInRegOpcode(unsigned Opcode) {
+ return Opcode == ISD::ANY_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ Opcode == ISD::SIGN_EXTEND_VECTOR_INREG;
+}
+
namespace GlobalISel {
/// Return the operation corresponding to !(X op Y), where 'op' is a valid
/// SetCC operation. The U bit of the condition code has different meanings
diff --git a/llvm/include/llvm/CodeGen/IndirectThunks.h b/llvm/include/llvm/CodeGen/IndirectThunks.h
index 6da60fb658ae..b0a8e3043be5 100644
--- a/llvm/include/llvm/CodeGen/IndirectThunks.h
+++ b/llvm/include/llvm/CodeGen/IndirectThunks.h
@@ -33,7 +33,7 @@ protected:
InsertedThunksTy InsertedThunks;
void doInitialization(Module &M) {}
void createThunkFunction(MachineModuleInfo &MMI, StringRef Name,
- bool Comdat = true);
+ bool Comdat = true, StringRef TargetAttrs = "");
public:
void init(Module &M) {
@@ -46,7 +46,8 @@ public:
template <typename Derived, typename InsertedThunksTy>
void ThunkInserter<Derived, InsertedThunksTy>::createThunkFunction(
- MachineModuleInfo &MMI, StringRef Name, bool Comdat) {
+ MachineModuleInfo &MMI, StringRef Name, bool Comdat,
+ StringRef TargetAttrs) {
assert(Name.startswith(getDerived().getThunkPrefix()) &&
"Created a thunk with an unexpected prefix!");
@@ -67,6 +68,8 @@ void ThunkInserter<Derived, InsertedThunksTy>::createThunkFunction(
AttrBuilder B(Ctx);
B.addAttribute(llvm::Attribute::NoUnwind);
B.addAttribute(llvm::Attribute::Naked);
+ if (TargetAttrs != "")
+ B.addAttribute("target-features", TargetAttrs);
F->addFnAttrs(B);
// Populate our function a bit so that we can verify.
diff --git a/llvm/include/llvm/CodeGen/LatencyPriorityQueue.h b/llvm/include/llvm/CodeGen/LatencyPriorityQueue.h
index 95f4c6473542..556ef3f8cc78 100644
--- a/llvm/include/llvm/CodeGen/LatencyPriorityQueue.h
+++ b/llvm/include/llvm/CodeGen/LatencyPriorityQueue.h
@@ -31,7 +31,7 @@ namespace llvm {
class LatencyPriorityQueue : public SchedulingPriorityQueue {
// SUnits - The SUnits for the current graph.
- std::vector<SUnit> *SUnits;
+ std::vector<SUnit> *SUnits = nullptr;
/// NumNodesSolelyBlocking - This vector contains, for every node in the
/// Queue, the number of nodes that the node is the sole unscheduled
diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h
index 0c846c6671a7..3b3a4e12f794 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervals.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervals.h
@@ -51,11 +51,11 @@ class TargetInstrInfo;
class VirtRegMap;
class LiveIntervals : public MachineFunctionPass {
- MachineFunction* MF;
- MachineRegisterInfo* MRI;
- const TargetRegisterInfo* TRI;
- const TargetInstrInfo *TII;
- SlotIndexes* Indexes;
+ MachineFunction *MF = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ SlotIndexes *Indexes = nullptr;
MachineDominatorTree *DomTree = nullptr;
LiveIntervalCalc *LICalc = nullptr;
@@ -417,8 +417,8 @@ class VirtRegMap;
/// method can result in inconsistent liveness tracking if multiple phyical
/// registers share a regunit, and should be used cautiously.
void removeAllRegUnitsForPhysReg(MCRegister Reg) {
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
- removeRegUnit(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ removeRegUnit(Unit);
}
/// Remove value numbers and related live segments starting at position
diff --git a/llvm/include/llvm/CodeGen/LivePhysRegs.h b/llvm/include/llvm/CodeGen/LivePhysRegs.h
index 27285d63aa83..76bb34d270a2 100644
--- a/llvm/include/llvm/CodeGen/LivePhysRegs.h
+++ b/llvm/include/llvm/CodeGen/LivePhysRegs.h
@@ -81,9 +81,8 @@ public:
void addReg(MCPhysReg Reg) {
assert(TRI && "LivePhysRegs is not initialized.");
assert(Reg <= TRI->getNumRegs() && "Expected a physical register.");
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- LiveRegs.insert(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ LiveRegs.insert(SubReg);
}
/// Removes a physical register, all its sub-registers, and all its
diff --git a/llvm/include/llvm/CodeGen/LiveRangeEdit.h b/llvm/include/llvm/CodeGen/LiveRangeEdit.h
index 507258fe665a..0950c20325fb 100644
--- a/llvm/include/llvm/CodeGen/LiveRangeEdit.h
+++ b/llvm/include/llvm/CodeGen/LiveRangeEdit.h
@@ -97,8 +97,7 @@ private:
/// a load, eliminate the register by folding the def into the use.
bool foldAsLoad(LiveInterval *LI, SmallVectorImpl<MachineInstr *> &Dead);
- using ToShrinkSet = SetVector<LiveInterval *, SmallVector<LiveInterval *, 8>,
- SmallPtrSet<LiveInterval *, 8>>;
+ using ToShrinkSet = SmallSetVector<LiveInterval *, 8>;
/// Helper for eliminateDeadDefs.
void eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink);
@@ -211,7 +210,7 @@ public:
/// by new MI in the index map.
/// Return the SlotIndex of the new instruction.
SlotIndex rematerializeAt(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI, unsigned DestReg,
+ MachineBasicBlock::iterator MI, Register DestReg,
const Remat &RM, const TargetRegisterInfo &,
bool Late = false, unsigned SubIdx = 0,
MachineInstr *ReplaceIndexMI = nullptr);
diff --git a/llvm/include/llvm/CodeGen/LiveRegMatrix.h b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
index 9e28e4d243c2..2b32308c7c07 100644
--- a/llvm/include/llvm/CodeGen/LiveRegMatrix.h
+++ b/llvm/include/llvm/CodeGen/LiveRegMatrix.h
@@ -38,9 +38,9 @@ class TargetRegisterInfo;
class VirtRegMap;
class LiveRegMatrix : public MachineFunctionPass {
- const TargetRegisterInfo *TRI;
- LiveIntervals *LIS;
- VirtRegMap *VRM;
+ const TargetRegisterInfo *TRI = nullptr;
+ LiveIntervals *LIS = nullptr;
+ VirtRegMap *VRM = nullptr;
// UserTag changes whenever virtual registers have been modified.
unsigned UserTag = 0;
diff --git a/llvm/include/llvm/CodeGen/LiveRegUnits.h b/llvm/include/llvm/CodeGen/LiveRegUnits.h
index a5a8fc6d92a3..a750d5dec546 100644
--- a/llvm/include/llvm/CodeGen/LiveRegUnits.h
+++ b/llvm/include/llvm/CodeGen/LiveRegUnits.h
@@ -84,8 +84,8 @@ public:
/// Adds register units covered by physical register \p Reg.
void addReg(MCPhysReg Reg) {
- for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit)
- Units.set(*Unit);
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ Units.set(Unit);
}
/// Adds register units covered by physical register \p Reg that are
@@ -100,8 +100,8 @@ public:
/// Removes all register units covered by physical register \p Reg.
void removeReg(MCPhysReg Reg) {
- for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit)
- Units.reset(*Unit);
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ Units.reset(Unit);
}
/// Removes register units not preserved by the regmask \p RegMask.
@@ -114,8 +114,8 @@ public:
/// Returns true if no part of physical register \p Reg is live.
bool available(MCPhysReg Reg) const {
- for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
- if (Units.test(*Unit))
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ if (Units.test(Unit))
return false;
}
return true;
diff --git a/llvm/include/llvm/CodeGen/LiveStacks.h b/llvm/include/llvm/CodeGen/LiveStacks.h
index 26f30fb4d088..2edc2985f0ee 100644
--- a/llvm/include/llvm/CodeGen/LiveStacks.h
+++ b/llvm/include/llvm/CodeGen/LiveStacks.h
@@ -33,7 +33,7 @@ class TargetRegisterClass;
class TargetRegisterInfo;
class LiveStacks : public MachineFunctionPass {
- const TargetRegisterInfo *TRI;
+ const TargetRegisterInfo *TRI = nullptr;
/// Special pool allocator for VNInfo's (LiveInterval val#).
///
diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h
index 03a0517d2642..a1ed3c073251 100644
--- a/llvm/include/llvm/CodeGen/LiveVariables.h
+++ b/llvm/include/llvm/CodeGen/LiveVariables.h
@@ -124,11 +124,11 @@ private:
SparseBitVector<> PHIJoins;
private: // Intermediate data structures
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
- MachineRegisterInfo* MRI;
+ MachineRegisterInfo *MRI = nullptr;
- const TargetRegisterInfo *TRI;
+ const TargetRegisterInfo *TRI = nullptr;
// PhysRegInfo - Keep track of which instruction was the last def of a
// physical register. This is a purely local property, because all physical
diff --git a/llvm/include/llvm/CodeGen/LowLevelType.h b/llvm/include/llvm/CodeGen/LowLevelType.h
index 922f93d2e598..2924f475ac85 100644
--- a/llvm/include/llvm/CodeGen/LowLevelType.h
+++ b/llvm/include/llvm/CodeGen/LowLevelType.h
@@ -9,37 +9,420 @@
/// Implement a low-level type suitable for MachineInstr level instruction
/// selection.
///
-/// This provides the CodeGen aspects of LowLevelType, such as Type conversion.
+/// For a type attached to a MachineInstr, we only care about 2 details: total
+/// size and the number of vector lanes (if any). Accordingly, there are 4
+/// possible valid type-kinds:
+///
+/// * `sN` for scalars and aggregates
+/// * `<N x sM>` for vectors, which must have at least 2 elements.
+/// * `pN` for pointers
+///
+/// Other information required for correct selection is expected to be carried
+/// by the opcode, or non-type flags. For example the distinction between G_ADD
+/// and G_FADD for int/float or fast-math flags.
///
//===----------------------------------------------------------------------===//
#ifndef LLVM_CODEGEN_LOWLEVELTYPE_H
#define LLVM_CODEGEN_LOWLEVELTYPE_H
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/Support/Debug.h"
+#include <cassert>
namespace llvm {
-class DataLayout;
class Type;
-struct fltSemantics;
+class raw_ostream;
+
+class LLT {
+public:
+ /// Get a low-level scalar or aggregate "bag of bits".
+ static constexpr LLT scalar(unsigned SizeInBits) {
+ return LLT{/*isPointer=*/false, /*isVector=*/false, /*isScalar=*/true,
+ ElementCount::getFixed(0), SizeInBits,
+ /*AddressSpace=*/0};
+ }
+
+ /// Get a low-level pointer in the given address space.
+ static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits) {
+ assert(SizeInBits > 0 && "invalid pointer size");
+ return LLT{/*isPointer=*/true, /*isVector=*/false, /*isScalar=*/false,
+ ElementCount::getFixed(0), SizeInBits, AddressSpace};
+ }
+
+ /// Get a low-level vector of some number of elements and element width.
+ static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits) {
+ assert(!EC.isScalar() && "invalid number of vector elements");
+ return LLT{/*isPointer=*/false, /*isVector=*/true, /*isScalar=*/false,
+ EC, ScalarSizeInBits, /*AddressSpace=*/0};
+ }
+
+ /// Get a low-level vector of some number of elements and element type.
+ static constexpr LLT vector(ElementCount EC, LLT ScalarTy) {
+ assert(!EC.isScalar() && "invalid number of vector elements");
+ assert(!ScalarTy.isVector() && "invalid vector element type");
+ return LLT{ScalarTy.isPointer(),
+ /*isVector=*/true,
+ /*isScalar=*/false,
+ EC,
+ ScalarTy.getSizeInBits().getFixedValue(),
+ ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0};
+ }
+
+ /// Get a low-level fixed-width vector of some number of elements and element
+ /// width.
+ static constexpr LLT fixed_vector(unsigned NumElements,
+ unsigned ScalarSizeInBits) {
+ return vector(ElementCount::getFixed(NumElements), ScalarSizeInBits);
+ }
+
+ /// Get a low-level fixed-width vector of some number of elements and element
+ /// type.
+ static constexpr LLT fixed_vector(unsigned NumElements, LLT ScalarTy) {
+ return vector(ElementCount::getFixed(NumElements), ScalarTy);
+ }
+
+ /// Get a low-level scalable vector of some number of elements and element
+ /// width.
+ static constexpr LLT scalable_vector(unsigned MinNumElements,
+ unsigned ScalarSizeInBits) {
+ return vector(ElementCount::getScalable(MinNumElements), ScalarSizeInBits);
+ }
+
+ /// Get a low-level scalable vector of some number of elements and element
+ /// type.
+ static constexpr LLT scalable_vector(unsigned MinNumElements, LLT ScalarTy) {
+ return vector(ElementCount::getScalable(MinNumElements), ScalarTy);
+ }
+
+ static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy) {
+ return EC.isScalar() ? ScalarTy : LLT::vector(EC, ScalarTy);
+ }
+
+ static constexpr LLT scalarOrVector(ElementCount EC, uint64_t ScalarSize) {
+ assert(ScalarSize <= std::numeric_limits<unsigned>::max() &&
+ "Not enough bits in LLT to represent size");
+ return scalarOrVector(EC, LLT::scalar(static_cast<unsigned>(ScalarSize)));
+ }
+
+ explicit constexpr LLT(bool isPointer, bool isVector, bool isScalar,
+ ElementCount EC, uint64_t SizeInBits,
+ unsigned AddressSpace)
+ : LLT() {
+ init(isPointer, isVector, isScalar, EC, SizeInBits, AddressSpace);
+ }
+ explicit constexpr LLT()
+ : IsScalar(false), IsPointer(false), IsVector(false), RawData(0) {}
+
+ explicit LLT(MVT VT);
+
+ constexpr bool isValid() const { return IsScalar || RawData != 0; }
+
+ constexpr bool isScalar() const { return IsScalar; }
+
+ constexpr bool isPointer() const {
+ return isValid() && IsPointer && !IsVector;
+ }
+
+ constexpr bool isVector() const { return isValid() && IsVector; }
+
+ /// Returns the number of elements in a vector LLT. Must only be called on
+ /// vector types.
+ constexpr uint16_t getNumElements() const {
+ if (isScalable())
+ llvm::reportInvalidSizeRequest(
+ "Possible incorrect use of LLT::getNumElements() for "
+ "scalable vector. Scalable flag may be dropped, use "
+ "LLT::getElementCount() instead");
+ return getElementCount().getKnownMinValue();
+ }
+
+ /// Returns true if the LLT is a scalable vector. Must only be called on
+ /// vector types.
+ constexpr bool isScalable() const {
+ assert(isVector() && "Expected a vector type");
+ return IsPointer ? getFieldValue(PointerVectorScalableFieldInfo)
+ : getFieldValue(VectorScalableFieldInfo);
+ }
+
+ constexpr ElementCount getElementCount() const {
+ assert(IsVector && "cannot get number of elements on scalar/aggregate");
+ return ElementCount::get(IsPointer
+ ? getFieldValue(PointerVectorElementsFieldInfo)
+ : getFieldValue(VectorElementsFieldInfo),
+ isScalable());
+ }
+
+ /// Returns the total size of the type. Must only be called on sized types.
+ constexpr TypeSize getSizeInBits() const {
+ if (isPointer() || isScalar())
+ return TypeSize::Fixed(getScalarSizeInBits());
+ auto EC = getElementCount();
+ return TypeSize(getScalarSizeInBits() * EC.getKnownMinValue(),
+ EC.isScalable());
+ }
+
+ /// Returns the total size of the type in bytes, i.e. number of whole bytes
+ /// needed to represent the size in bits. Must only be called on sized types.
+ constexpr TypeSize getSizeInBytes() const {
+ TypeSize BaseSize = getSizeInBits();
+ return {(BaseSize.getKnownMinValue() + 7) / 8, BaseSize.isScalable()};
+ }
-/// Construct a low-level type based on an LLVM type.
-LLT getLLTForType(Type &Ty, const DataLayout &DL);
+ constexpr LLT getScalarType() const {
+ return isVector() ? getElementType() : *this;
+ }
-/// Get a rough equivalent of an MVT for a given LLT. MVT can't distinguish
-/// pointers, so these will convert to a plain integer.
-MVT getMVTForLLT(LLT Ty);
-EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx);
+ /// If this type is a vector, return a vector with the same number of elements
+ /// but the new element type. Otherwise, return the new element type.
+ constexpr LLT changeElementType(LLT NewEltTy) const {
+ return isVector() ? LLT::vector(getElementCount(), NewEltTy) : NewEltTy;
+ }
+
+ /// If this type is a vector, return a vector with the same number of elements
+ /// but the new element size. Otherwise, return the new element type. Invalid
+ /// for pointer types. For pointer types, use changeElementType.
+ constexpr LLT changeElementSize(unsigned NewEltSize) const {
+ assert(!getScalarType().isPointer() &&
+ "invalid to directly change element size for pointers");
+ return isVector() ? LLT::vector(getElementCount(), NewEltSize)
+ : LLT::scalar(NewEltSize);
+ }
+
+ /// Return a vector or scalar with the same element type and the new element
+ /// count.
+ constexpr LLT changeElementCount(ElementCount EC) const {
+ return LLT::scalarOrVector(EC, getScalarType());
+ }
+
+ /// Return a type that is \p Factor times smaller. Reduces the number of
+ /// elements if this is a vector, or the bitwidth for scalar/pointers. Does
+ /// not attempt to handle cases that aren't evenly divisible.
+ constexpr LLT divide(int Factor) const {
+ assert(Factor != 1);
+ assert((!isScalar() || getScalarSizeInBits() != 0) &&
+ "cannot divide scalar of size zero");
+ if (isVector()) {
+ assert(getElementCount().isKnownMultipleOf(Factor));
+ return scalarOrVector(getElementCount().divideCoefficientBy(Factor),
+ getElementType());
+ }
+
+ assert(getScalarSizeInBits() % Factor == 0);
+ return scalar(getScalarSizeInBits() / Factor);
+ }
+
+ /// Produce a vector type that is \p Factor times bigger, preserving the
+ /// element type. For a scalar or pointer, this will produce a new vector with
+ /// \p Factor elements.
+ constexpr LLT multiplyElements(int Factor) const {
+ if (isVector()) {
+ return scalarOrVector(getElementCount().multiplyCoefficientBy(Factor),
+ getElementType());
+ }
+
+ return fixed_vector(Factor, *this);
+ }
+
+ constexpr bool isByteSized() const {
+ return getSizeInBits().isKnownMultipleOf(8);
+ }
+
+ constexpr unsigned getScalarSizeInBits() const {
+ if (IsScalar)
+ return getFieldValue(ScalarSizeFieldInfo);
+ if (IsVector) {
+ if (!IsPointer)
+ return getFieldValue(VectorSizeFieldInfo);
+ else
+ return getFieldValue(PointerVectorSizeFieldInfo);
+ } else if (IsPointer)
+ return getFieldValue(PointerSizeFieldInfo);
+ else
+ llvm_unreachable("unexpected LLT");
+ }
+
+ constexpr unsigned getAddressSpace() const {
+ assert(RawData != 0 && "Invalid Type");
+ assert(IsPointer && "cannot get address space of non-pointer type");
+ if (!IsVector)
+ return getFieldValue(PointerAddressSpaceFieldInfo);
+ else
+ return getFieldValue(PointerVectorAddressSpaceFieldInfo);
+ }
+
+ /// Returns the vector's element type. Only valid for vector types.
+ constexpr LLT getElementType() const {
+ assert(isVector() && "cannot get element type of scalar/aggregate");
+ if (IsPointer)
+ return pointer(getAddressSpace(), getScalarSizeInBits());
+ else
+ return scalar(getScalarSizeInBits());
+ }
+
+ void print(raw_ostream &OS) const;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const;
+#endif
+
+ constexpr bool operator==(const LLT &RHS) const {
+ return IsPointer == RHS.IsPointer && IsVector == RHS.IsVector &&
+ IsScalar == RHS.IsScalar && RHS.RawData == RawData;
+ }
+
+ constexpr bool operator!=(const LLT &RHS) const { return !(*this == RHS); }
+
+ friend struct DenseMapInfo<LLT>;
+ friend class GISelInstProfileBuilder;
+
+private:
+ /// LLT is packed into 64 bits as follows:
+ /// isScalar : 1
+ /// isPointer : 1
+ /// isVector : 1
+ /// with 61 bits remaining for Kind-specific data, packed in bitfields
+ /// as described below. As there isn't a simple portable way to pack bits
+ /// into bitfields, here the different fields in the packed structure is
+ /// described in static const *Field variables. Each of these variables
+ /// is a 2-element array, with the first element describing the bitfield size
+ /// and the second element describing the bitfield offset.
+ typedef int BitFieldInfo[2];
+ ///
+ /// This is how the bitfields are packed per Kind:
+ /// * Invalid:
+ /// gets encoded as RawData == 0, as that is an invalid encoding, since for
+ /// valid encodings, SizeInBits/SizeOfElement must be larger than 0.
+ /// * Non-pointer scalar (isPointer == 0 && isVector == 0):
+ /// SizeInBits: 32;
+ static const constexpr BitFieldInfo ScalarSizeFieldInfo{32, 0};
+ /// * Pointer (isPointer == 1 && isVector == 0):
+ /// SizeInBits: 16;
+ /// AddressSpace: 24;
+ static const constexpr BitFieldInfo PointerSizeFieldInfo{16, 0};
+ static const constexpr BitFieldInfo PointerAddressSpaceFieldInfo{
+ 24, PointerSizeFieldInfo[0] + PointerSizeFieldInfo[1]};
+ static_assert((PointerAddressSpaceFieldInfo[0] +
+ PointerAddressSpaceFieldInfo[1]) <= 61,
+ "Insufficient bits to encode all data");
+ /// * Vector-of-non-pointer (isPointer == 0 && isVector == 1):
+ /// NumElements: 16;
+ /// SizeOfElement: 32;
+ /// Scalable: 1;
+ static const constexpr BitFieldInfo VectorElementsFieldInfo{16, 0};
+ static const constexpr BitFieldInfo VectorSizeFieldInfo{
+ 32, VectorElementsFieldInfo[0] + VectorElementsFieldInfo[1]};
+ static const constexpr BitFieldInfo VectorScalableFieldInfo{
+ 1, VectorSizeFieldInfo[0] + VectorSizeFieldInfo[1]};
+ static_assert((VectorSizeFieldInfo[0] + VectorSizeFieldInfo[1]) <= 61,
+ "Insufficient bits to encode all data");
+ /// * Vector-of-pointer (isPointer == 1 && isVector == 1):
+ /// NumElements: 16;
+ /// SizeOfElement: 16;
+ /// AddressSpace: 24;
+ /// Scalable: 1;
+ static const constexpr BitFieldInfo PointerVectorElementsFieldInfo{16, 0};
+ static const constexpr BitFieldInfo PointerVectorSizeFieldInfo{
+ 16,
+ PointerVectorElementsFieldInfo[1] + PointerVectorElementsFieldInfo[0]};
+ static const constexpr BitFieldInfo PointerVectorAddressSpaceFieldInfo{
+ 24, PointerVectorSizeFieldInfo[1] + PointerVectorSizeFieldInfo[0]};
+ static const constexpr BitFieldInfo PointerVectorScalableFieldInfo{
+ 1, PointerVectorAddressSpaceFieldInfo[0] +
+ PointerVectorAddressSpaceFieldInfo[1]};
+ static_assert((PointerVectorAddressSpaceFieldInfo[0] +
+ PointerVectorAddressSpaceFieldInfo[1]) <= 61,
+ "Insufficient bits to encode all data");
+
+ uint64_t IsScalar : 1;
+ uint64_t IsPointer : 1;
+ uint64_t IsVector : 1;
+ uint64_t RawData : 61;
+
+ static constexpr uint64_t getMask(const BitFieldInfo FieldInfo) {
+ const int FieldSizeInBits = FieldInfo[0];
+ return (((uint64_t)1) << FieldSizeInBits) - 1;
+ }
+ static constexpr uint64_t maskAndShift(uint64_t Val, uint64_t Mask,
+ uint8_t Shift) {
+ assert(Val <= Mask && "Value too large for field");
+ return (Val & Mask) << Shift;
+ }
+ static constexpr uint64_t maskAndShift(uint64_t Val,
+ const BitFieldInfo FieldInfo) {
+ return maskAndShift(Val, getMask(FieldInfo), FieldInfo[1]);
+ }
+
+ constexpr uint64_t getFieldValue(const BitFieldInfo FieldInfo) const {
+ return getMask(FieldInfo) & (RawData >> FieldInfo[1]);
+ }
+
+ constexpr void init(bool IsPointer, bool IsVector, bool IsScalar,
+ ElementCount EC, uint64_t SizeInBits,
+ unsigned AddressSpace) {
+ assert(SizeInBits <= std::numeric_limits<unsigned>::max() &&
+ "Not enough bits in LLT to represent size");
+ this->IsPointer = IsPointer;
+ this->IsVector = IsVector;
+ this->IsScalar = IsScalar;
+ if (IsScalar)
+ RawData = maskAndShift(SizeInBits, ScalarSizeFieldInfo);
+ else if (IsVector) {
+ assert(EC.isVector() && "invalid number of vector elements");
+ if (!IsPointer)
+ RawData =
+ maskAndShift(EC.getKnownMinValue(), VectorElementsFieldInfo) |
+ maskAndShift(SizeInBits, VectorSizeFieldInfo) |
+ maskAndShift(EC.isScalable() ? 1 : 0, VectorScalableFieldInfo);
+ else
+ RawData =
+ maskAndShift(EC.getKnownMinValue(),
+ PointerVectorElementsFieldInfo) |
+ maskAndShift(SizeInBits, PointerVectorSizeFieldInfo) |
+ maskAndShift(AddressSpace, PointerVectorAddressSpaceFieldInfo) |
+ maskAndShift(EC.isScalable() ? 1 : 0,
+ PointerVectorScalableFieldInfo);
+ } else if (IsPointer)
+ RawData = maskAndShift(SizeInBits, PointerSizeFieldInfo) |
+ maskAndShift(AddressSpace, PointerAddressSpaceFieldInfo);
+ else
+ llvm_unreachable("unexpected LLT configuration");
+ }
+
+public:
+ constexpr uint64_t getUniqueRAWLLTData() const {
+ return ((uint64_t)RawData) << 3 | ((uint64_t)IsScalar) << 2 |
+ ((uint64_t)IsPointer) << 1 | ((uint64_t)IsVector);
+ }
+};
+
+inline raw_ostream& operator<<(raw_ostream &OS, const LLT &Ty) {
+ Ty.print(OS);
+ return OS;
+}
-/// Get a rough equivalent of an LLT for a given MVT. LLT does not yet support
-/// scalarable vector types, and will assert if used.
-LLT getLLTForMVT(MVT Ty);
+template<> struct DenseMapInfo<LLT> {
+ static inline LLT getEmptyKey() {
+ LLT Invalid;
+ Invalid.IsPointer = true;
+ return Invalid;
+ }
+ static inline LLT getTombstoneKey() {
+ LLT Invalid;
+ Invalid.IsVector = true;
+ return Invalid;
+ }
+ static inline unsigned getHashValue(const LLT &Ty) {
+ uint64_t Val = Ty.getUniqueRAWLLTData();
+ return DenseMapInfo<uint64_t>::getHashValue(Val);
+ }
+ static bool isEqual(const LLT &LHS, const LLT &RHS) {
+ return LHS == RHS;
+ }
+};
-/// Get the appropriate floating point arithmetic semantic based on the bit size
-/// of the given scalar LLT.
-const llvm::fltSemantics &getFltSemanticForLLT(LLT Ty);
}
#endif // LLVM_CODEGEN_LOWLEVELTYPE_H
diff --git a/llvm/include/llvm/CodeGen/LowLevelTypeUtils.h b/llvm/include/llvm/CodeGen/LowLevelTypeUtils.h
new file mode 100644
index 000000000000..5cd8e5412df2
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/LowLevelTypeUtils.h
@@ -0,0 +1,45 @@
+//== llvm/CodeGen/LowLevelTypeUtils.h -------------------------- -*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Implement a low-level type suitable for MachineInstr level instruction
+/// selection.
+///
+/// This provides the CodeGen aspects of LowLevelType, such as Type conversion.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LOWLEVELTYPEUTILS_H
+#define LLVM_CODEGEN_LOWLEVELTYPEUTILS_H
+
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/ValueTypes.h"
+
+namespace llvm {
+
+class DataLayout;
+class Type;
+struct fltSemantics;
+
+/// Construct a low-level type based on an LLVM type.
+LLT getLLTForType(Type &Ty, const DataLayout &DL);
+
+/// Get a rough equivalent of an MVT for a given LLT. MVT can't distinguish
+/// pointers, so these will convert to a plain integer.
+MVT getMVTForLLT(LLT Ty);
+EVT getApproximateEVTForLLT(LLT Ty, const DataLayout &DL, LLVMContext &Ctx);
+
+/// Get a rough equivalent of an LLT for a given MVT. LLT does not yet support
+/// scalarable vector types, and will assert if used.
+LLT getLLTForMVT(MVT Ty);
+
+/// Get the appropriate floating point arithmetic semantic based on the bit size
+/// of the given scalar LLT.
+const llvm::fltSemantics &getFltSemanticForLLT(LLT Ty);
+}
+
+#endif // LLVM_CODEGEN_LOWLEVELTYPEUTILS_H
diff --git a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
index 3bbcfd63e3aa..da943268dac8 100644
--- a/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
+++ b/llvm/include/llvm/CodeGen/MIRFSDiscriminator.h
@@ -30,7 +30,8 @@ class MachineFunction;
using namespace sampleprof;
class MIRAddFSDiscriminators : public MachineFunctionPass {
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
+ FSDiscriminatorPass Pass;
unsigned LowBit;
unsigned HighBit;
@@ -38,7 +39,7 @@ public:
static char ID;
/// PassNum is the sequence number this pass is called, start from 1.
MIRAddFSDiscriminators(FSDiscriminatorPass P = FSDiscriminatorPass::Pass1)
- : MachineFunctionPass(ID) {
+ : MachineFunctionPass(ID), Pass(P) {
LowBit = getFSPassBitBegin(P);
HighBit = getFSPassBitEnd(P);
assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
diff --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h
index a039da1bd5b3..203d965836f6 100644
--- a/llvm/include/llvm/CodeGen/MIRFormatter.h
+++ b/llvm/include/llvm/CodeGen/MIRFormatter.h
@@ -35,7 +35,7 @@ public:
/// Implement target specific printing for machine operand immediate value, so
/// that we can have more meaningful mnemonic than a 64-bit integer. Passing
- /// None to OpIdx means the index is unknown.
+ /// std::nullopt to OpIdx means the index is unknown.
virtual void printImm(raw_ostream &OS, const MachineInstr &MI,
std::optional<unsigned> OpIdx, int64_t Imm) const {
OS << Imm;
diff --git a/llvm/include/llvm/CodeGen/MIRPrinter.h b/llvm/include/llvm/CodeGen/MIRPrinter.h
index 45e30686b642..5e94418d5fe0 100644
--- a/llvm/include/llvm/CodeGen/MIRPrinter.h
+++ b/llvm/include/llvm/CodeGen/MIRPrinter.h
@@ -34,7 +34,7 @@ void printMIR(raw_ostream &OS, const MachineFunction &MF);
/// you the correct list of successor blocks in most cases except for things
/// like jump tables where the basic block references can't easily be found.
/// The MIRPRinter will skip printing successors if they match the result of
-/// this funciton and the parser will use this function to construct a list if
+/// this function and the parser will use this function to construct a list if
/// it is missing.
void guessSuccessors(const MachineBasicBlock &MBB,
SmallVectorImpl<MachineBasicBlock*> &Result,
diff --git a/llvm/include/llvm/CodeGen/MIRSampleProfile.h b/llvm/include/llvm/CodeGen/MIRSampleProfile.h
index f54c4b5891be..221e966e2b9e 100644
--- a/llvm/include/llvm/CodeGen/MIRSampleProfile.h
+++ b/llvm/include/llvm/CodeGen/MIRSampleProfile.h
@@ -14,6 +14,7 @@
#ifndef LLVM_CODEGEN_MIRSAMPLEPROFILE_H
#define LLVM_CODEGEN_MIRSAMPLEPROFILE_H
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/Discriminator.h"
@@ -26,6 +27,10 @@ class MachineBlockFrequencyInfo;
class MachineFunction;
class Module;
+namespace vfs {
+class FileSystem;
+} // namespace vfs
+
using namespace sampleprof;
class MIRProfileLoader;
@@ -41,7 +46,8 @@ public:
/// FS bits will only use the '1' bits in the Mask.
MIRProfileLoaderPass(std::string FileName = "",
std::string RemappingFileName = "",
- FSDiscriminatorPass P = FSDiscriminatorPass::Pass1);
+ FSDiscriminatorPass P = FSDiscriminatorPass::Pass1,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS = nullptr);
/// getMachineFunction - Return the last machine function computed.
const MachineFunction *getMachineFunction() const { return MF; }
diff --git a/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index 62911c2bd741..16e773c18641 100644
--- a/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -304,6 +304,30 @@ template <> struct MappingTraits<MachineStackObject> {
static const bool flow = true;
};
+/// Serializable representation of the MCRegister variant of
+/// MachineFunction::VariableDbgInfo.
+struct EntryValueObject {
+ StringValue EntryValueRegister;
+ StringValue DebugVar;
+ StringValue DebugExpr;
+ StringValue DebugLoc;
+ bool operator==(const EntryValueObject &Other) const {
+ return EntryValueRegister == Other.EntryValueRegister &&
+ DebugVar == Other.DebugVar && DebugExpr == Other.DebugExpr &&
+ DebugLoc == Other.DebugLoc;
+ }
+};
+
+template <> struct MappingTraits<EntryValueObject> {
+ static void mapping(yaml::IO &YamlIO, EntryValueObject &Object) {
+ YamlIO.mapRequired("entry-value-register", Object.EntryValueRegister);
+ YamlIO.mapRequired("debug-info-variable", Object.DebugVar);
+ YamlIO.mapRequired("debug-info-expression", Object.DebugExpr);
+ YamlIO.mapRequired("debug-info-location", Object.DebugLoc);
+ }
+ static const bool flow = true;
+};
+
/// Serializable representation of the fixed stack object from the
/// MachineFrameInfo class.
struct FixedMachineStackObject {
@@ -572,6 +596,7 @@ template <> struct MappingTraits<MachineJumpTable::Entry> {
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineFunctionLiveIn)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::VirtualRegisterDefinition)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineStackObject)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::EntryValueObject)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::FixedMachineStackObject)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::CallSiteInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::MachineConstantPoolValue)
@@ -704,6 +729,7 @@ struct MachineFunction {
bool HasEHCatchret = false;
bool HasEHScopes = false;
bool HasEHFunclets = false;
+ bool IsOutlined = false;
bool FailsVerification = false;
bool TracksDebugUserValues = false;
@@ -715,6 +741,7 @@ struct MachineFunction {
// Frame information
MachineFrameInfo FrameInfo;
std::vector<FixedMachineStackObject> FixedStackObjects;
+ std::vector<EntryValueObject> EntryValueObjects;
std::vector<MachineStackObject> StackObjects;
std::vector<MachineConstantPoolValue> Constants; /// Constant pool.
std::unique_ptr<MachineFunctionInfo> MachineFuncInfo;
@@ -742,6 +769,7 @@ template <> struct MappingTraits<MachineFunction> {
YamlIO.mapOptional("hasEHCatchret", MF.HasEHCatchret, false);
YamlIO.mapOptional("hasEHScopes", MF.HasEHScopes, false);
YamlIO.mapOptional("hasEHFunclets", MF.HasEHFunclets, false);
+ YamlIO.mapOptional("isOutlined", MF.IsOutlined, false);
YamlIO.mapOptional("debugInstrRef", MF.UseDebugInstrRef, false);
YamlIO.mapOptional("failsVerification", MF.FailsVerification, false);
@@ -758,6 +786,8 @@ template <> struct MappingTraits<MachineFunction> {
std::vector<FixedMachineStackObject>());
YamlIO.mapOptional("stack", MF.StackObjects,
std::vector<MachineStackObject>());
+ YamlIO.mapOptional("entry_values", MF.EntryValueObjects,
+ std::vector<EntryValueObject>());
YamlIO.mapOptional("callSites", MF.CallSitesInfo,
std::vector<CallSiteInfo>());
YamlIO.mapOptional("debugValueSubstitutions", MF.DebugValueSubstitutions,
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 1ab24b554f5b..52388692c196 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -796,12 +796,12 @@ public:
/// it. If an explicit branch to the fallthrough block is not allowed,
/// set JumpToFallThrough to be false. Non-null return is a conservative
/// answer.
- MachineBasicBlock *getFallThrough(bool JumpToFallThrough = false);
+ MachineBasicBlock *getFallThrough(bool JumpToFallThrough = true);
/// Return the fallthrough block if the block can implicitly
/// transfer control to it's successor, whether by a branch or
/// a fallthrough. Non-null return is a conservative answer.
- MachineBasicBlock *getLogicalFallThrough() { return getFallThrough(true); }
+ MachineBasicBlock *getLogicalFallThrough() { return getFallThrough(false); }
/// Return true if the block can implicitly transfer control to the
/// block after it by falling off the end of it. This should return
@@ -816,6 +816,9 @@ public:
/// the first instruction, which might be PHI.
/// Returns end() is there's no non-PHI instruction.
iterator getFirstNonPHI();
+ const_iterator getFirstNonPHI() const {
+ return const_cast<MachineBasicBlock *>(this)->getFirstNonPHI();
+ }
/// Return the first instruction in MBB after I that is not a PHI or a label.
/// This is the correct point to insert lowered copies at the beginning of a
@@ -1066,31 +1069,33 @@ public:
/// instead of basic block \p Old.
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New);
- /// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE
- /// and DBG_LABEL instructions. Return UnknownLoc if there is none.
+ /// Find the next valid DebugLoc starting at MBBI, skipping any debug
+ /// instructions. Return UnknownLoc if there is none.
DebugLoc findDebugLoc(instr_iterator MBBI);
DebugLoc findDebugLoc(iterator MBBI) {
return findDebugLoc(MBBI.getInstrIterator());
}
- /// Has exact same behavior as @ref findDebugLoc (it also
- /// searches from the first to the last MI of this MBB) except
- /// that this takes reverse iterator.
+ /// Has exact same behavior as @ref findDebugLoc (it also searches towards the
+ /// end of this MBB) except that this function takes a reverse iterator to
+ /// identify the starting MI.
DebugLoc rfindDebugLoc(reverse_instr_iterator MBBI);
DebugLoc rfindDebugLoc(reverse_iterator MBBI) {
return rfindDebugLoc(MBBI.getInstrIterator());
}
- /// Find the previous valid DebugLoc preceding MBBI, skipping and DBG_VALUE
- /// instructions. Return UnknownLoc if there is none.
+ /// Find the previous valid DebugLoc preceding MBBI, skipping any debug
+ /// instructions. It is possible to find the last DebugLoc in the MBB using
+ /// findPrevDebugLoc(instr_end()). Return UnknownLoc if there is none.
DebugLoc findPrevDebugLoc(instr_iterator MBBI);
DebugLoc findPrevDebugLoc(iterator MBBI) {
return findPrevDebugLoc(MBBI.getInstrIterator());
}
- /// Has exact same behavior as @ref findPrevDebugLoc (it also
- /// searches from the last to the first MI of this MBB) except
- /// that this takes reverse iterator.
+ /// Has exact same behavior as @ref findPrevDebugLoc (it also searches towards
+ /// the beginning of this MBB) except that this function takes reverse
+ /// iterator to identify the starting MI. A minor difference compared to
+ /// findPrevDebugLoc is that we can't start scanning at "instr_end".
DebugLoc rfindPrevDebugLoc(reverse_instr_iterator MBBI);
DebugLoc rfindPrevDebugLoc(reverse_iterator MBBI) {
return rfindPrevDebugLoc(MBBI.getInstrIterator());
@@ -1261,6 +1266,12 @@ template <> struct GraphTraits<Inverse<const MachineBasicBlock*>> {
static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); }
};
+// These accessors are handy for sharing templated code between IR and MIR.
+inline auto successors(const MachineBasicBlock *BB) { return BB->successors(); }
+inline auto predecessors(const MachineBasicBlock *BB) {
+ return BB->predecessors();
+}
+
/// MachineInstrSpan provides an interface to get an iteration range
/// containing the instruction it was initialized with, along with all
/// those instructions inserted prior to or following that instruction
diff --git a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index 2290d26c0827..6d58c7a14fb9 100644
--- a/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -66,6 +66,7 @@ public:
/// Compute the frequency of the block, relative to the entry block.
/// This API assumes getEntryFreq() is non-zero.
float getBlockFreqRelativeToEntryBlock(const MachineBasicBlock *MBB) const {
+ assert(getEntryFreq() != 0 && "getEntryFreq() should not return 0 here!");
return getBlockFreq(MBB).getFrequency() * (1.0f / getEntryFreq());
}
diff --git a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
index 39e70d583710..89eed7463bd7 100644
--- a/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
+++ b/llvm/include/llvm/CodeGen/MachineCombinerPattern.h
@@ -175,6 +175,11 @@ enum class MachineCombinerPattern {
FMADD_XA,
FMSUB,
FNMSUB,
+
+ // X86 VNNI
+ DPWSSD,
+
+ FNMADD,
};
} // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index 220d18d15821..09f9ff60f955 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -24,10 +24,10 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/iterator.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ArrayRecycler.h"
#include "llvm/Support/AtomicOrdering.h"
@@ -38,6 +38,7 @@
#include <cstdint>
#include <memory>
#include <utility>
+#include <variant>
#include <vector>
namespace llvm {
@@ -374,6 +375,7 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
bool HasEHCatchret = false;
bool HasEHScopes = false;
bool HasEHFunclets = false;
+ bool IsOutlined = false;
/// BBID to assign to the next basic block of this function.
unsigned NextBBID = 0;
@@ -406,16 +408,50 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
void init();
public:
- struct VariableDbgInfo {
+ /// Description of the location of a variable whose Address is valid and
+ /// unchanging during function execution. The Address may be:
+ /// * A stack index, which can be negative for fixed stack objects.
+ /// * A MCRegister, whose entry value contains the address of the variable.
+ class VariableDbgInfo {
+ std::variant<int, MCRegister> Address;
+
+ public:
const DILocalVariable *Var;
const DIExpression *Expr;
- // The Slot can be negative for fixed stack objects.
- int Slot;
const DILocation *Loc;
VariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
int Slot, const DILocation *Loc)
- : Var(Var), Expr(Expr), Slot(Slot), Loc(Loc) {}
+ : Address(Slot), Var(Var), Expr(Expr), Loc(Loc) {}
+
+ VariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
+ MCRegister EntryValReg, const DILocation *Loc)
+ : Address(EntryValReg), Var(Var), Expr(Expr), Loc(Loc) {}
+
+ /// Return true if this variable is in a stack slot.
+ bool inStackSlot() const { return std::holds_alternative<int>(Address); }
+
+ /// Return true if this variable is in the entry value of a register.
+ bool inEntryValueRegister() const {
+ return std::holds_alternative<MCRegister>(Address);
+ }
+
+ /// Returns the stack slot of this variable, assuming `inStackSlot()` is
+ /// true.
+ int getStackSlot() const { return std::get<int>(Address); }
+
+ /// Returns the MCRegister of this variable, assuming
+ /// `inEntryValueRegister()` is true.
+ MCRegister getEntryValueRegister() const {
+ return std::get<MCRegister>(Address);
+ }
+
+ /// Updates the stack slot of this variable, assuming `inStackSlot()` is
+ /// true.
+ void updateStackSlot(int NewSlot) {
+ assert(inStackSlot());
+ Address = NewSlot;
+ }
};
class Delegate {
@@ -1116,6 +1152,9 @@ public:
bool hasEHFunclets() const { return HasEHFunclets; }
void setHasEHFunclets(bool V) { HasEHFunclets = V; }
+ bool isOutlined() const { return IsOutlined; }
+ void setIsOutlined(bool V) { IsOutlined = V; }
+
/// Find or create an LandingPadInfo for the specified MachineBasicBlock.
LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad);
@@ -1223,17 +1262,49 @@ public:
/// \}
- /// Collect information used to emit debugging information of a variable.
+ /// Collect information used to emit debugging information of a variable in a
+ /// stack slot.
void setVariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
int Slot, const DILocation *Loc) {
VariableDbgInfos.emplace_back(Var, Expr, Slot, Loc);
}
+ /// Collect information used to emit debugging information of a variable in
+ /// the entry value of a register.
+ void setVariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
+ MCRegister Reg, const DILocation *Loc) {
+ VariableDbgInfos.emplace_back(Var, Expr, Reg, Loc);
+ }
+
VariableDbgInfoMapTy &getVariableDbgInfo() { return VariableDbgInfos; }
const VariableDbgInfoMapTy &getVariableDbgInfo() const {
return VariableDbgInfos;
}
+ /// Returns the collection of variables for which we have debug info and that
+ /// have been assigned a stack slot.
+ auto getInStackSlotVariableDbgInfo() {
+ return make_filter_range(getVariableDbgInfo(), [](auto &VarInfo) {
+ return VarInfo.inStackSlot();
+ });
+ }
+
+ /// Returns the collection of variables for which we have debug info and that
+ /// have been assigned a stack slot.
+ auto getInStackSlotVariableDbgInfo() const {
+ return make_filter_range(getVariableDbgInfo(), [](const auto &VarInfo) {
+ return VarInfo.inStackSlot();
+ });
+ }
+
+ /// Returns the collection of variables for which we have debug info and that
+ /// have been assigned an entry value register.
+ auto getEntryValueVariableDbgInfo() const {
+ return make_filter_range(getVariableDbgInfo(), [](const auto &VarInfo) {
+ return VarInfo.inEntryValueRegister();
+ });
+ }
+
/// Start tracking the arguments passed to the call \p CallI.
void addCallArgsForwardingRegs(const MachineInstr *CallI,
CallSiteInfoImpl &&CallInfo) {
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 272360e12372..2928ccfbcef7 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -29,6 +29,7 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ArrayRecycler.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TrailingObjects.h"
#include <algorithm>
#include <cassert>
@@ -80,38 +81,39 @@ public:
};
enum MIFlag {
- NoFlags = 0,
- FrameSetup = 1 << 0, // Instruction is used as a part of
- // function frame setup code.
- FrameDestroy = 1 << 1, // Instruction is used as a part of
- // function frame destruction code.
- BundledPred = 1 << 2, // Instruction has bundled predecessors.
- BundledSucc = 1 << 3, // Instruction has bundled successors.
- FmNoNans = 1 << 4, // Instruction does not support Fast
- // math nan values.
- FmNoInfs = 1 << 5, // Instruction does not support Fast
- // math infinity values.
- FmNsz = 1 << 6, // Instruction is not required to retain
- // signed zero values.
- FmArcp = 1 << 7, // Instruction supports Fast math
- // reciprocal approximations.
- FmContract = 1 << 8, // Instruction supports Fast math
- // contraction operations like fma.
- FmAfn = 1 << 9, // Instruction may map to Fast math
- // intrinsic approximation.
- FmReassoc = 1 << 10, // Instruction supports Fast math
- // reassociation of operand order.
- NoUWrap = 1 << 11, // Instruction supports binary operator
- // no unsigned wrap.
- NoSWrap = 1 << 12, // Instruction supports binary operator
- // no signed wrap.
- IsExact = 1 << 13, // Instruction supports division is
- // known to be exact.
- NoFPExcept = 1 << 14, // Instruction does not raise
- // floatint-point exceptions.
- NoMerge = 1 << 15, // Passes that drop source location info
- // (e.g. branch folding) should skip
- // this instruction.
+ NoFlags = 0,
+ FrameSetup = 1 << 0, // Instruction is used as a part of
+ // function frame setup code.
+ FrameDestroy = 1 << 1, // Instruction is used as a part of
+ // function frame destruction code.
+ BundledPred = 1 << 2, // Instruction has bundled predecessors.
+ BundledSucc = 1 << 3, // Instruction has bundled successors.
+ FmNoNans = 1 << 4, // Instruction does not support Fast
+ // math nan values.
+ FmNoInfs = 1 << 5, // Instruction does not support Fast
+ // math infinity values.
+ FmNsz = 1 << 6, // Instruction is not required to retain
+ // signed zero values.
+ FmArcp = 1 << 7, // Instruction supports Fast math
+ // reciprocal approximations.
+ FmContract = 1 << 8, // Instruction supports Fast math
+ // contraction operations like fma.
+ FmAfn = 1 << 9, // Instruction may map to Fast math
+ // intrinsic approximation.
+ FmReassoc = 1 << 10, // Instruction supports Fast math
+ // reassociation of operand order.
+ NoUWrap = 1 << 11, // Instruction supports binary operator
+ // no unsigned wrap.
+ NoSWrap = 1 << 12, // Instruction supports binary operator
+ // no signed wrap.
+ IsExact = 1 << 13, // Instruction supports division is
+ // known to be exact.
+ NoFPExcept = 1 << 14, // Instruction does not raise
+ // floatint-point exceptions.
+ NoMerge = 1 << 15, // Passes that drop source location info
+ // (e.g. branch folding) should skip
+ // this instruction.
+ Unpredictable = 1 << 16, // Instruction with unpredictable condition.
};
private:
@@ -120,24 +122,27 @@ private:
// Operands are allocated by an ArrayRecycler.
MachineOperand *Operands = nullptr; // Pointer to the first operand.
- unsigned NumOperands = 0; // Number of operands on instruction.
- uint16_t Flags = 0; // Various bits of additional
- // information about machine
- // instruction.
+#define LLVM_MI_NUMOPERANDS_BITS 24
+#define LLVM_MI_FLAGS_BITS 24
+#define LLVM_MI_ASMPRINTERFLAGS_BITS 8
- uint8_t AsmPrinterFlags = 0; // Various bits of information used by
- // the AsmPrinter to emit helpful
- // comments. This is *not* semantic
- // information. Do not use this for
- // anything other than to convey comment
- // information to AsmPrinter.
+ /// Number of operands on instruction.
+ uint32_t NumOperands : LLVM_MI_NUMOPERANDS_BITS;
- // OperandCapacity has uint8_t size, so it should be next to AsmPrinterFlags
+ // OperandCapacity has uint8_t size, so it should be next to NumOperands
// to properly pack.
using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;
OperandCapacity CapOperands; // Capacity of the Operands array.
+ /// Various bits of additional information about the machine instruction.
+ uint32_t Flags : LLVM_MI_FLAGS_BITS;
+
+ /// Various bits of information used by the AsmPrinter to emit helpful
+ /// comments. This is *not* semantic information. Do not use this for
+ /// anything other than to convey comment information to AsmPrinter.
+ uint8_t AsmPrinterFlags : LLVM_MI_ASMPRINTERFLAGS_BITS;
+
/// Internal implementation detail class that provides out-of-line storage for
/// extra info used by the machine instruction when this info cannot be stored
/// in-line within the instruction itself.
@@ -304,6 +309,14 @@ private:
dumprImpl(const MachineRegisterInfo &MRI, unsigned Depth, unsigned MaxDepth,
SmallPtrSetImpl<const MachineInstr *> &AlreadySeenInstrs) const;
+ static bool opIsRegDef(const MachineOperand &Op) {
+ return Op.isReg() && Op.isDef();
+ }
+
+ static bool opIsRegUse(const MachineOperand &Op) {
+ return Op.isReg() && Op.isUse();
+ }
+
public:
MachineInstr(const MachineInstr &) = delete;
MachineInstr &operator=(const MachineInstr &) = delete;
@@ -335,35 +348,47 @@ public:
/// Return whether an AsmPrinter flag is set.
bool getAsmPrinterFlag(CommentFlag Flag) const {
+ assert(isUInt<LLVM_MI_ASMPRINTERFLAGS_BITS>(unsigned(Flag)) &&
+ "Flag is out of range for the AsmPrinterFlags field");
return AsmPrinterFlags & Flag;
}
/// Set a flag for the AsmPrinter.
void setAsmPrinterFlag(uint8_t Flag) {
+ assert(isUInt<LLVM_MI_ASMPRINTERFLAGS_BITS>(unsigned(Flag)) &&
+ "Flag is out of range for the AsmPrinterFlags field");
AsmPrinterFlags |= Flag;
}
/// Clear specific AsmPrinter flags.
void clearAsmPrinterFlag(CommentFlag Flag) {
+ assert(isUInt<LLVM_MI_ASMPRINTERFLAGS_BITS>(unsigned(Flag)) &&
+ "Flag is out of range for the AsmPrinterFlags field");
AsmPrinterFlags &= ~Flag;
}
/// Return the MI flags bitvector.
- uint16_t getFlags() const {
+ uint32_t getFlags() const {
return Flags;
}
/// Return whether an MI flag is set.
bool getFlag(MIFlag Flag) const {
+ assert(isUInt<LLVM_MI_FLAGS_BITS>(unsigned(Flag)) &&
+ "Flag is out of range for the Flags field");
return Flags & Flag;
}
/// Set a MI flag.
void setFlag(MIFlag Flag) {
- Flags |= (uint16_t)Flag;
+ assert(isUInt<LLVM_MI_FLAGS_BITS>(unsigned(Flag)) &&
+ "Flag is out of range for the Flags field");
+ Flags |= (uint32_t)Flag;
}
void setFlags(unsigned flags) {
+ assert(isUInt<LLVM_MI_FLAGS_BITS>(flags) &&
+ "flags to be set are out of range for the Flags field");
// Filter out the automatically maintained flags.
unsigned Mask = BundledPred | BundledSucc;
Flags = (Flags & Mask) | (flags & ~Mask);
@@ -371,7 +396,9 @@ public:
/// clearFlag - Clear a MI flag.
void clearFlag(MIFlag Flag) {
- Flags &= ~((uint16_t)Flag);
+ assert(isUInt<LLVM_MI_FLAGS_BITS>(unsigned(Flag)) &&
+ "Flag to clear is out of range for the Flags field");
+ Flags &= ~((uint32_t)Flag);
}
/// Return true if MI is in a bundle (but not the first MI in a bundle).
@@ -702,6 +729,31 @@ public:
operands_begin() + getNumExplicitOperands());
}
+ using filtered_mop_iterator =
+ filter_iterator<mop_iterator, bool (*)(const MachineOperand &)>;
+ using filtered_const_mop_iterator =
+ filter_iterator<const_mop_iterator, bool (*)(const MachineOperand &)>;
+
+ /// Returns an iterator range over all operands that are (explicit or
+ /// implicit) register defs.
+ iterator_range<filtered_mop_iterator> all_defs() {
+ return make_filter_range(operands(), opIsRegDef);
+ }
+ /// \copydoc all_defs()
+ iterator_range<filtered_const_mop_iterator> all_defs() const {
+ return make_filter_range(operands(), opIsRegDef);
+ }
+
+ /// Returns an iterator range over all operands that are (explicit or
+ /// implicit) register uses.
+ iterator_range<filtered_mop_iterator> all_uses() {
+ return make_filter_range(uses(), opIsRegUse);
+ }
+ /// \copydoc all_uses()
+ iterator_range<filtered_const_mop_iterator> all_uses() const {
+ return make_filter_range(uses(), opIsRegUse);
+ }
+
/// Returns the number of the operand iterator \p I points to.
unsigned getOperandNo(const_mop_iterator I) const {
return I - operands_begin();
@@ -1851,9 +1903,9 @@ public:
/// Return the MIFlags which represent both MachineInstrs. This
/// should be used when merging two MachineInstrs into one. This routine does
/// not modify the MIFlags of this MachineInstr.
- uint16_t mergeFlagsWith(const MachineInstr& Other) const;
+ uint32_t mergeFlagsWith(const MachineInstr& Other) const;
- static uint16_t copyFlagsFromInstruction(const Instruction &I);
+ static uint32_t copyFlagsFromInstruction(const Instruction &I);
/// Copy all flags to MachineInst MIFlags
void copyIRFlags(const Instruction &I);
@@ -1896,11 +1948,47 @@ public:
}
}
+ std::tuple<Register, Register> getFirst2Regs() const {
+ return std::tuple(getOperand(0).getReg(), getOperand(1).getReg());
+ }
+
+ std::tuple<Register, Register, Register> getFirst3Regs() const {
+ return std::tuple(getOperand(0).getReg(), getOperand(1).getReg(),
+ getOperand(2).getReg());
+ }
+
+ std::tuple<Register, Register, Register, Register> getFirst4Regs() const {
+ return std::tuple(getOperand(0).getReg(), getOperand(1).getReg(),
+ getOperand(2).getReg(), getOperand(3).getReg());
+ }
+
+ std::tuple<Register, Register, Register, Register, Register>
+ getFirst5Regs() const {
+ return std::tuple(getOperand(0).getReg(), getOperand(1).getReg(),
+ getOperand(2).getReg(), getOperand(3).getReg(),
+ getOperand(4).getReg());
+ }
+
+ std::tuple<LLT, LLT> getFirst2LLTs() const;
+ std::tuple<LLT, LLT, LLT> getFirst3LLTs() const;
+ std::tuple<LLT, LLT, LLT, LLT> getFirst4LLTs() const;
+ std::tuple<LLT, LLT, LLT, LLT, LLT> getFirst5LLTs() const;
+
+ std::tuple<Register, LLT, Register, LLT> getFirst2RegLLTs() const;
+ std::tuple<Register, LLT, Register, LLT, Register, LLT>
+ getFirst3RegLLTs() const;
+ std::tuple<Register, LLT, Register, LLT, Register, LLT, Register, LLT>
+ getFirst4RegLLTs() const;
+ std::tuple<Register, LLT, Register, LLT, Register, LLT, Register, LLT,
+ Register, LLT>
+ getFirst5RegLLTs() const;
+
private:
/// If this instruction is embedded into a MachineFunction, return the
/// MachineRegisterInfo object for the current function, otherwise
/// return null.
MachineRegisterInfo *getRegInfo();
+ const MachineRegisterInfo *getRegInfo() const;
/// Unlink all of the register operands in this instruction from their
/// respective use lists. This requires that the operands already be on their
diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index c35bdc0c2b44..c6f3a84809fa 100644
--- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -9,7 +9,8 @@
// This file exposes a function named BuildMI, which is useful for dramatically
// simplifying how MachineInstr's are created. It allows use of code like this:
//
-// M = BuildMI(MBB, MI, DL, TII.get(X86::ADD8rr), Dst)
+// MIMetadata MIMD(MI); // Propagates DebugLoc and other metadata
+// M = BuildMI(MBB, MI, MIMD, TII.get(X86::ADD8rr), Dst)
// .addReg(argVal1)
// .addReg(argVal2);
//
diff --git a/llvm/include/llvm/CodeGen/MachineInstrBundle.h b/llvm/include/llvm/CodeGen/MachineInstrBundle.h
index 8a73f9a18f47..9685d1fd8a3e 100644
--- a/llvm/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/llvm/include/llvm/CodeGen/MachineInstrBundle.h
@@ -241,6 +241,13 @@ VirtRegInfo AnalyzeVirtRegInBundle(
MachineInstr &MI, Register Reg,
SmallVectorImpl<std::pair<MachineInstr *, unsigned>> *Ops = nullptr);
+/// Return a pair of lane masks (reads, writes) indicating which lanes this
+/// instruction uses with Reg.
+std::pair<LaneBitmask, LaneBitmask>
+AnalyzeVirtRegLanesInBundle(const MachineInstr &MI, Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI);
+
/// Information about how a physical register Reg is used by a set of
/// operands.
struct PhysRegInfo {
diff --git a/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/llvm/include/llvm/CodeGen/MachineLoopInfo.h
index daf0f18a7518..cf8d1f17bde7 100644
--- a/llvm/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineLoopInfo.h
@@ -29,10 +29,10 @@
#ifndef LLVM_CODEGEN_MACHINELOOPINFO_H
#define LLVM_CODEGEN_MACHINELOOPINFO_H
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/Support/GenericLoopInfo.h"
namespace llvm {
diff --git a/llvm/include/llvm/CodeGen/MachineMemOperand.h b/llvm/include/llvm/CodeGen/MachineMemOperand.h
index 41574d8d556a..da7fd7cdf029 100644
--- a/llvm/include/llvm/CodeGen/MachineMemOperand.h
+++ b/llvm/include/llvm/CodeGen/MachineMemOperand.h
@@ -17,16 +17,15 @@
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/PointerUnion.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Value.h" // PointerLikeTypeTraits<Value*>
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
namespace llvm {
-class FoldingSetNodeID;
class MDNode;
class raw_ostream;
class MachineFunction;
@@ -69,19 +68,19 @@ struct MachinePointerInfo {
uint8_t ID = 0)
: V(v), Offset(offset), StackID(ID) {
if (V) {
- if (const auto *ValPtr = V.dyn_cast<const Value*>())
+ if (const auto *ValPtr = dyn_cast_if_present<const Value *>(V))
AddrSpace = ValPtr->getType()->getPointerAddressSpace();
else
- AddrSpace = V.get<const PseudoSourceValue*>()->getAddressSpace();
+ AddrSpace = cast<const PseudoSourceValue *>(V)->getAddressSpace();
}
}
MachinePointerInfo getWithOffset(int64_t O) const {
if (V.isNull())
return MachinePointerInfo(AddrSpace, Offset + O);
- if (V.is<const Value*>())
- return MachinePointerInfo(V.get<const Value*>(), Offset + O, StackID);
- return MachinePointerInfo(V.get<const PseudoSourceValue*>(), Offset + O,
+ if (isa<const Value *>(V))
+ return MachinePointerInfo(cast<const Value *>(V), Offset + O, StackID);
+ return MachinePointerInfo(cast<const PseudoSourceValue *>(V), Offset + O,
StackID);
}
@@ -207,10 +206,12 @@ public:
/// other PseudoSourceValue member functions which return objects which stand
/// for frame/stack pointer relative references and other special references
/// which are not representable in the high-level IR.
- const Value *getValue() const { return PtrInfo.V.dyn_cast<const Value*>(); }
+ const Value *getValue() const {
+ return dyn_cast_if_present<const Value *>(PtrInfo.V);
+ }
const PseudoSourceValue *getPseudoValue() const {
- return PtrInfo.V.dyn_cast<const PseudoSourceValue*>();
+ return dyn_cast_if_present<const PseudoSourceValue *>(PtrInfo.V);
}
const void *getOpaqueValue() const { return PtrInfo.V.getOpaqueValue(); }
@@ -323,10 +324,6 @@ public:
MemoryType = NewTy;
}
- /// Profile - Gather unique data for the object.
- ///
- void Profile(FoldingSetNodeID &ID) const;
-
/// Support for operator<<.
/// @{
void print(raw_ostream &OS, ModuleSlotTracker &MST,
diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
index ea07e365d465..4f0ada3d7e17 100644
--- a/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@@ -84,7 +84,7 @@ class MachineModuleInfo {
MCContext *ExternalContext = nullptr;
/// This is the LLVM Module being worked on.
- const Module *TheModule;
+ const Module *TheModule = nullptr;
/// This is the object-file-format-specific implementation of
/// MachineModuleInfoImpl, which lets targets accumulate whatever info they
@@ -95,7 +95,7 @@ class MachineModuleInfo {
/// \{
/// The current call site index being processed, if any. 0 if none.
- unsigned CurCallSite;
+ unsigned CurCallSite = 0;
/// \}
@@ -106,11 +106,11 @@ class MachineModuleInfo {
// go into .eh_frame only, while others go into .debug_frame only.
/// True if debugging information is available in this module.
- bool DbgInfoAvailable;
+ bool DbgInfoAvailable = false;
/// True if this module is being built for windows/msvc, and uses floating
/// point. This is used to emit an undefined reference to _fltused.
- bool UsesMSVCFloatingPoint;
+ bool UsesMSVCFloatingPoint = false;
/// Maps IR Functions to their corresponding MachineFunctions.
DenseMap<const Function*, std::unique_ptr<MachineFunction>> MachineFunctions;
diff --git a/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h b/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h
index 58f7163d7b81..f8a328f13ede 100644
--- a/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h
+++ b/llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h
@@ -15,7 +15,7 @@
#define LLVM_CODEGEN_MACHINEMODULEINFOIMPLS_H
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include <cassert>
@@ -110,7 +110,7 @@ class MachineModuleInfoWasm : public MachineModuleInfoImpl {
public:
MachineModuleInfoWasm(const MachineModuleInfo &) {}
- StringSet<> MachineSymbolsUsed;
+ SetVector<StringRef> MachineSymbolsUsed;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/MachineOperand.h b/llvm/include/llvm/CodeGen/MachineOperand.h
index 75710a4542e4..1f3b7feedd18 100644
--- a/llvm/include/llvm/CodeGen/MachineOperand.h
+++ b/llvm/include/llvm/CodeGen/MachineOperand.h
@@ -253,6 +253,9 @@ public:
///
void clearParent() { ParentMI = nullptr; }
+ /// Returns the index of this operand in the instruction that it belongs to.
+ unsigned getOperandNo() const;
+
/// Print a subreg index operand.
/// MO_Immediate operands can also be subreg idices. If it's the case, the
/// subreg index name will be printed. MachineInstr::isOperandSubregIdx can be
diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h
index f968089e0de0..d0ff02fea4ff 100644
--- a/llvm/include/llvm/CodeGen/MachineOutliner.h
+++ b/llvm/include/llvm/CodeGen/MachineOutliner.h
@@ -199,7 +199,7 @@ public:
unsigned FunctionIdx, unsigned Flags)
: StartIdx(StartIdx), Len(Len), FirstInst(FirstInst), LastInst(LastInst),
MBB(MBB), FunctionIdx(FunctionIdx), Flags(Flags) {}
- Candidate() = default;
+ Candidate() = delete;
/// Used to ensure that \p Candidates are outlined in an order that
/// preserves the start and end indices of other \p Candidates.
@@ -268,7 +268,7 @@ public:
C.Benefit = B;
}
- OutlinedFunction() = default;
+ OutlinedFunction() = delete;
};
} // namespace outliner
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/MachinePassManager.h b/llvm/include/llvm/CodeGen/MachinePassManager.h
index 6089339c7f5a..5dc0e2918d46 100644
--- a/llvm/include/llvm/CodeGen/MachinePassManager.h
+++ b/llvm/include/llvm/CodeGen/MachinePassManager.h
@@ -134,8 +134,7 @@ class MachineFunctionPassManager
using Base = PassManager<MachineFunction, MachineFunctionAnalysisManager>;
public:
- MachineFunctionPassManager(bool DebugLogging = false,
- bool RequireCodeGenSCCOrder = false,
+ MachineFunctionPassManager(bool RequireCodeGenSCCOrder = false,
bool VerifyMachineFunction = false)
: RequireCodeGenSCCOrder(RequireCodeGenSCCOrder),
VerifyMachineFunction(VerifyMachineFunction) {}
diff --git a/llvm/include/llvm/CodeGen/MachinePassRegistry.def b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
index 8efd1d2e95e9..a29269644ea1 100644
--- a/llvm/include/llvm/CodeGen/MachinePassRegistry.def
+++ b/llvm/include/llvm/CodeGen/MachinePassRegistry.def
@@ -123,6 +123,7 @@ DUMMY_FUNCTION_PASS("cfguard-dispatch", CFGuardDispatchPass, ())
DUMMY_FUNCTION_PASS("cfguard-check", CFGuardCheckPass, ())
DUMMY_FUNCTION_PASS("gc-info-printer", GCInfoPrinterPass, ())
DUMMY_FUNCTION_PASS("select-optimize", SelectOptimizePass, ())
+DUMMY_FUNCTION_PASS("callbrprepare", CallBrPrepare, ())
#undef DUMMY_FUNCTION_PASS
#ifndef DUMMY_MODULE_PASS
diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index c0d05ae73c5a..04055ba9732d 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -69,7 +69,7 @@ public:
MachineOptimizationRemarkEmitter *ORE = nullptr;
const MachineLoopInfo *MLI = nullptr;
const MachineDominatorTree *MDT = nullptr;
- const InstrItineraryData *InstrItins;
+ const InstrItineraryData *InstrItins = nullptr;
const TargetInstrInfo *TII = nullptr;
RegisterClassInfo RegClassInfo;
bool disabledByPragma = false;
@@ -168,7 +168,7 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
SmallVector<SmallVector<int, 4>, 16> AdjK;
// Node to Index from ScheduleDAGTopologicalSort
std::vector<int> *Node2Idx;
- unsigned NumPaths;
+ unsigned NumPaths = 0u;
static unsigned MaxPaths;
public:
@@ -179,7 +179,8 @@ class SwingSchedulerDAG : public ScheduleDAGInstrs {
for (const auto &NodeNum : Topo)
Node2Idx->at(NodeNum) = Idx++;
}
-
+ Circuits &operator=(const Circuits &other) = delete;
+ Circuits(const Circuits &other) = delete;
~Circuits() { delete Node2Idx; }
/// Reset the data structures used in the circuit algorithm.
@@ -310,7 +311,7 @@ private:
bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
unsigned &OffsetPos, unsigned &NewBase,
int64_t &NewOffset);
- void postprocessDAG();
+ void postProcessDAG();
/// Set the Minimum Initiation Interval for this schedule attempt.
void setMII(unsigned ResMII, unsigned RecMII);
/// Set the Maximum Initiation Interval for this schedule attempt.
@@ -464,7 +465,7 @@ private:
/// processor resource masks. There is exactly one element per each processor
/// resource declared by the scheduling model.
llvm::SmallVector<uint64_t, DefaultProcResSize> ProcResourceMasks;
- int InitiationInterval;
+ int InitiationInterval = 0;
/// The number of micro operations that can be scheduled at a cycle.
int IssueWidth;
diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 7f0c24e4e115..496224a85c52 100644
--- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -57,7 +57,7 @@ public:
virtual ~Delegate() = default;
virtual void MRI_NoteNewVirtualRegister(Register Reg) = 0;
- virtual void MRI_NotecloneVirtualRegister(Register NewReg,
+ virtual void MRI_NoteCloneVirtualRegister(Register NewReg,
Register SrcReg) {
MRI_NoteNewVirtualRegister(NewReg);
}
@@ -101,8 +101,9 @@ private:
/// first member of the pair being non-zero. If the hinted register is
/// virtual, it means the allocator should prefer the physical register
/// allocated to it if any.
- IndexedMap<std::pair<Register, SmallVector<Register, 4>>,
- VirtReg2IndexFunctor> RegAllocHints;
+ IndexedMap<std::pair<unsigned, SmallVector<Register, 4>>,
+ VirtReg2IndexFunctor>
+ RegAllocHints;
/// PhysRegUseDefLists - This is an array of the head of the use/def list for
/// physical registers.
@@ -180,7 +181,7 @@ public:
void noteCloneVirtualRegister(Register NewReg, Register SrcReg) {
for (auto *TheDelegate : TheDelegates)
- TheDelegate->MRI_NotecloneVirtualRegister(NewReg, SrcReg);
+ TheDelegate->MRI_NoteCloneVirtualRegister(NewReg, SrcReg);
}
//===--------------------------------------------------------------------===//
@@ -452,7 +453,7 @@ public:
}
void insertVRegByName(StringRef Name, Register Reg) {
- assert((Name.empty() || VRegNames.find(Name) == VRegNames.end()) &&
+ assert((Name.empty() || !VRegNames.contains(Name)) &&
"Named VRegs Must be Unique.");
if (!Name.empty()) {
VRegNames.insert(Name);
@@ -659,9 +660,9 @@ public:
/// This shouldn't be used directly unless \p Reg has a register class.
/// \see getRegClassOrNull when this might happen.
const TargetRegisterClass *getRegClass(Register Reg) const {
- assert(VRegInfo[Reg.id()].first.is<const TargetRegisterClass *>() &&
+ assert(isa<const TargetRegisterClass *>(VRegInfo[Reg.id()].first) &&
"Register class not set, wrong accessor");
- return VRegInfo[Reg.id()].first.get<const TargetRegisterClass *>();
+ return cast<const TargetRegisterClass *>(VRegInfo[Reg.id()].first);
}
/// Return the register class of \p Reg, or null if Reg has not been assigned
@@ -677,7 +678,7 @@ public:
/// the select pass, using getRegClass is safe.
const TargetRegisterClass *getRegClassOrNull(Register Reg) const {
const RegClassOrRegBank &Val = VRegInfo[Reg].first;
- return Val.dyn_cast<const TargetRegisterClass *>();
+ return dyn_cast_if_present<const TargetRegisterClass *>(Val);
}
/// Return the register bank of \p Reg, or null if Reg has not been assigned
@@ -686,7 +687,7 @@ public:
/// RegisterBankInfo::getRegBankFromRegClass.
const RegisterBank *getRegBankOrNull(Register Reg) const {
const RegClassOrRegBank &Val = VRegInfo[Reg].first;
- return Val.dyn_cast<const RegisterBank *>();
+ return dyn_cast_if_present<const RegisterBank *>(Val);
}
/// Return the register bank or register class of \p Reg.
@@ -818,27 +819,25 @@ public:
/// getRegAllocationHint - Return the register allocation hint for the
/// specified virtual register. If there are many hints, this returns the
/// one with the greatest weight.
- std::pair<Register, Register>
- getRegAllocationHint(Register VReg) const {
+ std::pair<unsigned, Register> getRegAllocationHint(Register VReg) const {
assert(VReg.isVirtual());
Register BestHint = (RegAllocHints[VReg.id()].second.size() ?
RegAllocHints[VReg.id()].second[0] : Register());
- return std::pair<Register, Register>(RegAllocHints[VReg.id()].first,
- BestHint);
+ return {RegAllocHints[VReg.id()].first, BestHint};
}
/// getSimpleHint - same as getRegAllocationHint except it will only return
/// a target independent hint.
Register getSimpleHint(Register VReg) const {
assert(VReg.isVirtual());
- std::pair<Register, Register> Hint = getRegAllocationHint(VReg);
+ std::pair<unsigned, Register> Hint = getRegAllocationHint(VReg);
return Hint.first ? Register() : Hint.second;
}
/// getRegAllocationHints - Return a reference to the vector of all
/// register allocation hints for VReg.
- const std::pair<Register, SmallVector<Register, 4>>
- &getRegAllocationHints(Register VReg) const {
+ const std::pair<unsigned, SmallVector<Register, 4>> &
+ getRegAllocationHints(Register VReg) const {
assert(VReg.isVirtual());
return RegAllocHints[VReg];
}
diff --git a/llvm/include/llvm/CodeGen/MachineSSAContext.h b/llvm/include/llvm/CodeGen/MachineSSAContext.h
index e3b2dc459881..2409c83071e1 100644
--- a/llvm/include/llvm/CodeGen/MachineSSAContext.h
+++ b/llvm/include/llvm/CodeGen/MachineSSAContext.h
@@ -26,10 +26,6 @@ class Register;
template <typename _FunctionT> class GenericSSAContext;
template <typename, bool> class DominatorTreeBase;
-inline auto successors(const MachineBasicBlock *BB) { return BB->successors(); }
-inline auto predecessors(const MachineBasicBlock *BB) {
- return BB->predecessors();
-}
inline unsigned succ_size(const MachineBasicBlock *BB) {
return BB->succ_size();
}
@@ -40,7 +36,7 @@ inline auto instrs(const MachineBasicBlock &BB) { return BB.instrs(); }
template <> class GenericSSAContext<MachineFunction> {
const MachineRegisterInfo *RegInfo = nullptr;
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
public:
using BlockT = MachineBasicBlock;
@@ -48,9 +44,11 @@ public:
using InstructionT = MachineInstr;
using ValueRefT = Register;
using ConstValueRefT = Register;
- static const Register ValueRefNull;
+ using UseT = MachineOperand;
using DominatorTreeT = DominatorTreeBase<BlockT, false>;
+ static constexpr Register ValueRefNull = 0;
+
void setFunction(MachineFunction &Fn);
MachineFunction *getFunction() const { return MF; }
@@ -62,7 +60,7 @@ public:
static void appendBlockTerms(SmallVectorImpl<const MachineInstr *> &terms,
const MachineBasicBlock &block);
MachineBasicBlock *getDefBlock(Register) const;
- static bool isConstantValuePhi(const MachineInstr &Phi);
+ static bool isConstantOrUndefValuePhi(const MachineInstr &Phi);
Printable print(const MachineBasicBlock *Block) const;
Printable print(const MachineInstr *Inst) const;
diff --git a/llvm/include/llvm/CodeGen/MachineSSAUpdater.h b/llvm/include/llvm/CodeGen/MachineSSAUpdater.h
index 3f0b55e0abb8..bbd09d7d151b 100644
--- a/llvm/include/llvm/CodeGen/MachineSSAUpdater.h
+++ b/llvm/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -41,14 +41,14 @@ private:
void *AV = nullptr;
/// VRC - Register class of the current virtual register.
- const TargetRegisterClass *VRC;
+ const TargetRegisterClass *VRC = nullptr;
/// InsertedPHIs - If this is non-null, the MachineSSAUpdater adds all PHI
/// nodes that it creates to the vector.
SmallVectorImpl<MachineInstr*> *InsertedPHIs;
- const TargetInstrInfo *TII;
- MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
public:
/// MachineSSAUpdater constructor. If InsertedPHIs is specified, it will be
diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index 997c3a4f74a1..c950a9ac5c38 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -92,6 +92,7 @@
#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <cassert>
+#include <llvm/Support/raw_ostream.h>
#include <memory>
#include <string>
#include <vector>
@@ -135,6 +136,8 @@ struct MachineSchedContext {
RegisterClassInfo *RegClassInfo;
MachineSchedContext();
+ MachineSchedContext &operator=(const MachineSchedContext &other) = delete;
+ MachineSchedContext(const MachineSchedContext &other) = delete;
virtual ~MachineSchedContext();
};
@@ -361,7 +364,7 @@ protected:
/// Apply each ScheduleDAGMutation step in order. This allows different
/// instances of ScheduleDAGMI to perform custom DAG postprocessing.
- void postprocessDAG();
+ void postProcessDAG();
/// Release ExitSU predecessors and setup scheduler queues.
void initQueues(ArrayRef<SUnit*> TopRoots, ArrayRef<SUnit*> BotRoots);
@@ -374,6 +377,9 @@ protected:
/// dump the scheduled Sequence.
void dumpSchedule() const;
+ /// Print execution trace of the schedule top-down or bottom-up.
+ void dumpScheduleTraceTopDown() const;
+ void dumpScheduleTraceBottomUp() const;
// Lesser helpers...
bool checkSchedLimit();
@@ -605,6 +611,220 @@ struct SchedRemainder {
void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
};
+/// ResourceSegments are a collection of intervals closed on the
+/// left and opened on the right:
+///
+/// list{ [a1, b1), [a2, b2), ..., [a_N, b_N) }
+///
+/// The collection has the following properties:
+///
+/// 1. The list is ordered: a_i < b_i and b_i < a_(i+1)
+///
+/// 2. The intervals in the collection do not intersect each other.
+///
+/// A \ref ResourceSegments instance represents the cycle
+/// reservation history of the instance of and individual resource.
+class ResourceSegments {
+public:
+ /// Represents an interval of discrete integer values closed on
+ /// the left and open on the right: [a, b).
+ typedef std::pair<int64_t, int64_t> IntervalTy;
+
+ /// Adds an interval [a, b) to the collection of the instance.
+ ///
+ /// When adding [a, b[ to the collection, the operation merges the
+ /// adjacent intervals. For example
+ ///
+ /// 0 1 2 3 4 5 6 7 8 9 10
+ /// [-----) [--) [--)
+ /// + [--)
+ /// = [-----------) [--)
+ ///
+ /// To be able to debug duplicate resource usage, the function has
+ /// assertion that checks that no interval should be added if it
+ /// overlaps any of the intervals in the collection. We can
+ /// require this because by definition a \ref ResourceSegments is
+ /// attached only to an individual resource instance.
+ void add(IntervalTy A, const unsigned CutOff = 10);
+
+public:
+ /// Checks whether intervals intersect.
+ static bool intersects(IntervalTy A, IntervalTy B);
+
+ /// These function return the interval used by a resource in bottom and top
+ /// scheduling.
+ ///
+ /// Consider an instruction that uses resources X0, X1 and X2 as follows:
+ ///
+ /// X0 X1 X1 X2 +--------+------------+------+
+ /// |Resource|StartAtCycle|Cycles|
+ /// +--------+------------+------+
+ /// | X0 | 0 | 1 |
+ /// +--------+------------+------+
+ /// | X1 | 1 | 3 |
+ /// +--------+------------+------+
+ /// | X2 | 3 | 4 |
+ /// +--------+------------+------+
+ ///
+ /// If we can schedule the instruction at cycle C, we need to
+ /// compute the interval of the resource as follows:
+ ///
+ /// # TOP DOWN SCHEDULING
+ ///
+ /// Cycles scheduling flows to the _right_, in the same direction
+ /// of time.
+ ///
+ /// C 1 2 3 4 5 ...
+ /// ------|------|------|------|------|------|----->
+ /// X0 X1 X1 X2 ---> direction of time
+ /// X0 [C, C+1)
+ /// X1 [C+1, C+3)
+ /// X2 [C+3, C+4)
+ ///
+ /// Therefore, the formula to compute the interval for a resource
+ /// of an instruction that can be scheduled at cycle C in top-down
+ /// scheduling is:
+ ///
+ /// [C+StartAtCycle, C+Cycles)
+ ///
+ ///
+ /// # BOTTOM UP SCHEDULING
+ ///
+ /// Cycles scheduling flows to the _left_, in opposite direction
+ /// of time.
+ ///
+ /// In bottom up scheduling, the scheduling happens in opposite
+ /// direction to the execution of the cycles of the
+ /// instruction. When the instruction is scheduled at cycle `C`,
+ /// the resources are allocated in the past relative to `C`:
+ ///
+ /// 2 1 C -1 -2 -3 -4 -5 ...
+ /// <-----|------|------|------|------|------|------|------|---
+ /// X0 X1 X1 X2 ---> direction of time
+ /// X0 (C+1, C]
+ /// X1 (C, C-2]
+ /// X2 (C-2, C-3]
+ ///
+ /// Therefore, the formula to compute the interval for a resource
+ /// of an instruction that can be scheduled at cycle C in bottom-up
+ /// scheduling is:
+ ///
+ /// [C-Cycle+1, C-StartAtCycle+1)
+ ///
+ ///
+ /// NOTE: In both cases, the number of cycles booked by a
+ /// resources is the value (Cycle - StartAtCycles).
+ static IntervalTy getResourceIntervalBottom(unsigned C, unsigned StartAtCycle,
+ unsigned Cycle) {
+ return std::make_pair<long, long>((long)C - (long)Cycle + 1L,
+ (long)C - (long)StartAtCycle + 1L);
+ }
+ static IntervalTy getResourceIntervalTop(unsigned C, unsigned StartAtCycle,
+ unsigned Cycle) {
+ return std::make_pair<long, long>((long)C + (long)StartAtCycle,
+ (long)C + (long)Cycle);
+ }
+
+private:
+ /// Finds the first cycle in which a resource can be allocated.
+ ///
+ /// The function uses the \param IntervalBuider [*] to build a
+ /// resource interval [a, b[ out of the input parameters \param
+ /// CurrCycle, \param StartAtCycle and \param Cycle.
+ ///
+ /// The function then loops through the intervals in the ResourceSegments
+ /// and shifts the interval [a, b[ and the ReturnCycle to the
+ /// right until there is no intersection between the intervals of
+ /// the \ref ResourceSegments instance and the new shifted [a, b[. When
+ /// this condition is met, the ReturnCycle (which
+ /// correspond to the cycle in which the resource can be
+ /// allocated) is returned.
+ ///
+ /// c = CurrCycle in input
+ /// c 1 2 3 4 5 6 7 8 9 10 ... ---> (time
+ /// flow)
+ /// ResourceSegments... [---) [-------) [-----------)
+ /// c [1 3[ -> StartAtCycle=1, Cycles=3
+ /// ++c [1 3)
+ /// ++c [1 3)
+ /// ++c [1 3)
+ /// ++c [1 3)
+ /// ++c [1 3) ---> returns c
+ /// incremented by 5 (c+5)
+ ///
+ ///
+ /// Notice that for bottom-up scheduling the diagram is slightly
+ /// different because the current cycle c is always on the right
+ /// of the interval [a, b) (see \ref
+ /// `getResourceIntervalBottom`). This is because the cycle
+ /// increments for bottom-up scheduling moved in the direction
+ /// opposite to the direction of time:
+ ///
+ /// --------> direction of time.
+ /// XXYZZZ (resource usage)
+ /// --------> direction of top-down execution cycles.
+ /// <-------- direction of bottom-up execution cycles.
+ ///
+ /// Even though bottom-up scheduling moves against the flow of
+ /// time, the algorithm used to find the first free slot in between
+ /// intervals is the same as for top-down scheduling.
+ ///
+ /// [*] See \ref `getResourceIntervalTop` and
+ /// \ref `getResourceIntervalBottom` to see how such resource intervals
+ /// are built.
+ unsigned
+ getFirstAvailableAt(unsigned CurrCycle, unsigned StartAtCycle, unsigned Cycle,
+ std::function<IntervalTy(unsigned, unsigned, unsigned)>
+ IntervalBuilder) const;
+
+public:
+ /// getFirstAvailableAtFromBottom and getFirstAvailableAtFromTop
+ /// should be merged in a single function in which a function that
+ /// creates the `NewInterval` is passed as a parameter.
+ unsigned getFirstAvailableAtFromBottom(unsigned CurrCycle,
+ unsigned StartAtCycle,
+ unsigned Cycle) const {
+ return getFirstAvailableAt(CurrCycle, StartAtCycle, Cycle,
+ getResourceIntervalBottom);
+ }
+ unsigned getFirstAvailableAtFromTop(unsigned CurrCycle, unsigned StartAtCycle,
+ unsigned Cycle) const {
+ return getFirstAvailableAt(CurrCycle, StartAtCycle, Cycle,
+ getResourceIntervalTop);
+ }
+
+private:
+ std::list<IntervalTy> _Intervals;
+ /// Merge all adjacent intervals in the collection. For all pairs
+ /// of adjacient intervals, it performs [a, b) + [b, c) -> [a, c).
+ ///
+ /// Before performing the merge operation, the intervals are
+ /// sorted with \ref sort_predicate.
+ void sortAndMerge();
+
+public:
+ // constructor for empty set
+ explicit ResourceSegments(){};
+ bool empty() const { return _Intervals.empty(); }
+ explicit ResourceSegments(std::list<IntervalTy> Intervals)
+ : _Intervals(Intervals) {
+ sortAndMerge();
+ }
+
+ friend bool operator==(const ResourceSegments &c1,
+ const ResourceSegments &c2) {
+ return c1._Intervals == c2._Intervals;
+ }
+ friend llvm::raw_ostream &operator<<(llvm::raw_ostream &os,
+ const ResourceSegments &Segments) {
+ os << "{ ";
+ for (auto p : Segments._Intervals)
+ os << "[" << p.first << ", " << p.second << "), ";
+ os << "}\n";
+ return os;
+ }
+};
+
/// Each Scheduling boundary is associated with ready queues. It tracks the
/// current cycle in the direction of movement, and maintains the state
/// of "hazards" and other interlocks at the current cycle.
@@ -670,12 +890,14 @@ private:
// Is the scheduled region resource limited vs. latency limited.
bool IsResourceLimited;
- // Record the highest cycle at which each resource has been reserved by a
- // scheduled instruction.
- SmallVector<unsigned, 16> ReservedCycles;
-
- /// For each PIdx, stores first index into ReservedCycles that corresponds to
- /// it.
+public:
+private:
+ /// Record how resources have been allocated across the cycles of
+ /// the execution.
+ std::map<unsigned, ResourceSegments> ReservedResourceSegments;
+ std::vector<unsigned> ReservedCycles;
+ /// For each PIdx, stores first index into ReservedResourceSegments that
+ /// corresponds to it.
///
/// For example, consider the following 3 resources (ResourceCount =
/// 3):
@@ -691,12 +913,14 @@ private:
/// +------------+--------+
///
/// In this case, the total number of resource instances is 6. The
- /// vector \ref ReservedCycles will have a slot for each instance. The
- /// vector \ref ReservedCyclesIndex will track at what index the first
+ /// vector \ref ReservedResourceSegments will have a slot for each instance.
+ /// The vector \ref ReservedCyclesIndex will track at what index the first
/// instance of the resource is found in the vector of \ref
- /// ReservedCycles:
+ /// ReservedResourceSegments:
+ ///
+ /// Indexes of instances in
+ /// ReservedResourceSegments
///
- /// Indexes of instances in ReservedCycles
/// 0 1 2 3 4 5
/// ReservedCyclesIndex[0] = 0; [X0, X1,
/// ReservedCyclesIndex[1] = 2; Y0, Y1, Y2
@@ -719,7 +943,8 @@ public:
Available(ID, Name+".A"), Pending(ID << LogMaxQID, Name+".P") {
reset();
}
-
+ SchedBoundary &operator=(const SchedBoundary &other) = delete;
+ SchedBoundary(const SchedBoundary &other) = delete;
~SchedBoundary();
void reset();
@@ -781,11 +1006,13 @@ public:
unsigned getLatencyStallCycles(SUnit *SU);
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex,
- unsigned Cycles);
+ unsigned Cycles,
+ unsigned StartAtCycle);
std::pair<unsigned, unsigned> getNextResourceCycle(const MCSchedClassDesc *SC,
unsigned PIdx,
- unsigned Cycles);
+ unsigned Cycles,
+ unsigned StartAtCycle);
bool isUnbufferedGroup(unsigned PIdx) const {
return SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin &&
@@ -814,7 +1041,8 @@ public:
void incExecutedResources(unsigned PIdx, unsigned Count);
unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx,
- unsigned Cycles, unsigned ReadyCycle);
+ unsigned Cycles, unsigned ReadyCycle,
+ unsigned StartAtCycle);
void bumpNode(SUnit *SU);
diff --git a/llvm/include/llvm/CodeGen/MachineTraceMetrics.h b/llvm/include/llvm/CodeGen/MachineTraceMetrics.h
index 89c9c94455d9..63e4210b2a86 100644
--- a/llvm/include/llvm/CodeGen/MachineTraceMetrics.h
+++ b/llvm/include/llvm/CodeGen/MachineTraceMetrics.h
@@ -82,6 +82,16 @@ struct LiveRegUnit {
LiveRegUnit(unsigned RU) : RegUnit(RU) {}
};
+/// Strategies for selecting traces.
+enum class MachineTraceStrategy {
+ /// Select the trace through a block that has the fewest instructions.
+ TS_MinInstrCount,
+ /// Select the trace that contains only the current basic block. For instance,
+ /// this strategy can be used by MachineCombiner to make better decisions when
+ /// we estimate critical path for in-order cores.
+ TS_Local,
+ TS_NumStrategies
+};
class MachineTraceMetrics : public MachineFunctionPass {
const MachineFunction *MF = nullptr;
@@ -372,18 +382,10 @@ public:
};
- /// Strategies for selecting traces.
- enum Strategy {
- /// Select the trace through a block that has the fewest instructions.
- TS_MinInstrCount,
-
- TS_NumStrategies
- };
-
/// Get the trace ensemble representing the given trace selection strategy.
/// The returned Ensemble object is owned by the MachineTraceMetrics analysis,
/// and valid for the lifetime of the analysis pass.
- Ensemble *getEnsemble(Strategy);
+ Ensemble *getEnsemble(MachineTraceStrategy);
/// Invalidate cached information about MBB. This must be called *before* MBB
/// is erased, or the CFG is otherwise changed.
@@ -407,7 +409,8 @@ private:
SmallVector<unsigned, 0> ProcResourceCycles;
// One ensemble per strategy.
- Ensemble* Ensembles[TS_NumStrategies];
+ Ensemble
+ *Ensembles[static_cast<size_t>(MachineTraceStrategy::TS_NumStrategies)];
// Convert scaled resource usage to a cycle count that can be compared with
// latencies.
diff --git a/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h b/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
index 614f09caa3c7..e6da099751e7 100644
--- a/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
+++ b/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// \brief Uniformity info and uniformity-aware uniform info for Machine IR
+/// \brief Machine IR instance of the generic uniformity analysis
//
//===----------------------------------------------------------------------===//
@@ -24,11 +24,13 @@ namespace llvm {
extern template class GenericUniformityInfo<MachineSSAContext>;
using MachineUniformityInfo = GenericUniformityInfo<MachineSSAContext>;
-/// \brief Compute the uniform information of a Machine IR function.
-MachineUniformityInfo
-computeMachineUniformityInfo(MachineFunction &F,
- const MachineCycleInfo &cycleInfo,
- const MachineDomTree &domTree);
+/// \brief Compute uniformity information for a Machine IR function.
+///
+/// If \p HasBranchDivergence is false, produces a dummy result which assumes
+/// everything is uniform.
+MachineUniformityInfo computeMachineUniformityInfo(
+ MachineFunction &F, const MachineCycleInfo &cycleInfo,
+ const MachineDomTree &domTree, bool HasBranchDivergence);
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/MachineValueType.h b/llvm/include/llvm/CodeGen/MachineValueType.h
new file mode 100644
index 000000000000..d7dc38c7bd98
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/MachineValueType.h
@@ -0,0 +1,555 @@
+//===- CodeGen/MachineValueType.h - Machine-Level types ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the set of machine-level target independent types which
+// legal values in the code generator use.
+//
+// Constants and properties are defined in ValueTypes.td.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEVALUETYPE_H
+#define LLVM_CODEGEN_MACHINEVALUETYPE_H
+
+#include "llvm/ADT/Sequence.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TypeSize.h"
+#include <cassert>
+#include <cstdint>
+
+namespace llvm {
+
+ class Type;
+ class raw_ostream;
+
+ /// Machine Value Type. Every type that is supported natively by some
+ /// processor targeted by LLVM occurs here. This means that any legal value
+ /// type can be represented by an MVT.
+ class MVT {
+ public:
+ enum SimpleValueType : uint8_t {
+ // Simple value types that aren't explicitly part of this enumeration
+ // are considered extended value types.
+ INVALID_SIMPLE_VALUE_TYPE = 0,
+
+#define GET_VT_ATTR(Ty, n, sz, Any, Int, FP, Vec, Sc) Ty = n,
+#define GET_VT_RANGES
+#include "llvm/CodeGen/GenVT.inc"
+#undef GET_VT_ATTR
+#undef GET_VT_RANGES
+
+ VALUETYPE_SIZE = LAST_VALUETYPE + 1,
+
+ // This is the current maximum for LAST_VALUETYPE.
+ // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
+ // This value must be a multiple of 32.
+ MAX_ALLOWED_VALUETYPE = 224,
+ };
+
+ static_assert(FIRST_VALUETYPE > 0);
+ static_assert(LAST_VALUETYPE < MAX_ALLOWED_VALUETYPE);
+
+ SimpleValueType SimpleTy = INVALID_SIMPLE_VALUE_TYPE;
+
+ constexpr MVT() = default;
+ constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {}
+
+ bool operator>(const MVT& S) const { return SimpleTy > S.SimpleTy; }
+ bool operator<(const MVT& S) const { return SimpleTy < S.SimpleTy; }
+ bool operator==(const MVT& S) const { return SimpleTy == S.SimpleTy; }
+ bool operator!=(const MVT& S) const { return SimpleTy != S.SimpleTy; }
+ bool operator>=(const MVT& S) const { return SimpleTy >= S.SimpleTy; }
+ bool operator<=(const MVT& S) const { return SimpleTy <= S.SimpleTy; }
+
+ /// Support for debugging, callable in GDB: VT.dump()
+ void dump() const;
+
+ /// Implement operator<<.
+ void print(raw_ostream &OS) const;
+
+ /// Return true if this is a valid simple valuetype.
+ bool isValid() const {
+ return (SimpleTy >= MVT::FIRST_VALUETYPE &&
+ SimpleTy <= MVT::LAST_VALUETYPE);
+ }
+
+ /// Return true if this is a FP or a vector FP type.
+ bool isFloatingPoint() const {
+ return ((SimpleTy >= MVT::FIRST_FP_VALUETYPE &&
+ SimpleTy <= MVT::LAST_FP_VALUETYPE) ||
+ (SimpleTy >= MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE) ||
+ (SimpleTy >= MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE));
+ }
+
+ /// Return true if this is an integer or a vector integer type.
+ bool isInteger() const {
+ return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
+ SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) ||
+ (SimpleTy >= MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE) ||
+ (SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE));
+ }
+
+ /// Return true if this is an integer, not including vectors.
+ bool isScalarInteger() const {
+ return (SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
+ SimpleTy <= MVT::LAST_INTEGER_VALUETYPE);
+ }
+
+ /// Return true if this is a vector value type.
+ bool isVector() const {
+ return (SimpleTy >= MVT::FIRST_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
+ }
+
+ /// Return true if this is a vector value type where the
+ /// runtime length is machine dependent
+ bool isScalableVector() const {
+ return (SimpleTy >= MVT::FIRST_SCALABLE_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_SCALABLE_VECTOR_VALUETYPE);
+ }
+
+ /// Return true if this is a custom target type that has a scalable size.
+ bool isScalableTargetExtVT() const {
+ return SimpleTy == MVT::aarch64svcount;
+ }
+
+ /// Return true if the type is a scalable type.
+ bool isScalableVT() const {
+ return isScalableVector() || isScalableTargetExtVT();
+ }
+
+ bool isFixedLengthVector() const {
+ return (SimpleTy >= MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE &&
+ SimpleTy <= MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE);
+ }
+
+ /// Return true if this is a 16-bit vector type.
+ bool is16BitVector() const {
+ return (isFixedLengthVector() && getFixedSizeInBits() == 16);
+ }
+
+ /// Return true if this is a 32-bit vector type.
+ bool is32BitVector() const {
+ return (isFixedLengthVector() && getFixedSizeInBits() == 32);
+ }
+
+ /// Return true if this is a 64-bit vector type.
+ bool is64BitVector() const {
+ return (isFixedLengthVector() && getFixedSizeInBits() == 64);
+ }
+
+ /// Return true if this is a 128-bit vector type.
+ bool is128BitVector() const {
+ return (isFixedLengthVector() && getFixedSizeInBits() == 128);
+ }
+
+ /// Return true if this is a 256-bit vector type.
+ bool is256BitVector() const {
+ return (isFixedLengthVector() && getFixedSizeInBits() == 256);
+ }
+
+ /// Return true if this is a 512-bit vector type.
+ bool is512BitVector() const {
+ return (isFixedLengthVector() && getFixedSizeInBits() == 512);
+ }
+
+ /// Return true if this is a 1024-bit vector type.
+ bool is1024BitVector() const {
+ return (isFixedLengthVector() && getFixedSizeInBits() == 1024);
+ }
+
+ /// Return true if this is a 2048-bit vector type.
+ bool is2048BitVector() const {
+ return (isFixedLengthVector() && getFixedSizeInBits() == 2048);
+ }
+
+ /// Return true if this is an overloaded type for TableGen.
+ bool isOverloaded() const {
+ switch (SimpleTy) {
+#define GET_VT_ATTR(Ty, n, sz, Any, Int, FP, Vec, Sc) \
+ case Ty: \
+ return Any;
+#include "llvm/CodeGen/GenVT.inc"
+#undef GET_VT_ATTR
+ default:
+ return false;
+ }
+ }
+
+ /// Return a vector with the same number of elements as this vector, but
+ /// with the element type converted to an integer type with the same
+ /// bitwidth.
+ MVT changeVectorElementTypeToInteger() const {
+ MVT EltTy = getVectorElementType();
+ MVT IntTy = MVT::getIntegerVT(EltTy.getSizeInBits());
+ MVT VecTy = MVT::getVectorVT(IntTy, getVectorElementCount());
+ assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE &&
+ "Simple vector VT not representable by simple integer vector VT!");
+ return VecTy;
+ }
+
+ /// Return a VT for a vector type whose attributes match ourselves
+ /// with the exception of the element type that is chosen by the caller.
+ MVT changeVectorElementType(MVT EltVT) const {
+ MVT VecTy = MVT::getVectorVT(EltVT, getVectorElementCount());
+ assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE &&
+ "Simple vector VT not representable by simple integer vector VT!");
+ return VecTy;
+ }
+
+ /// Return the type converted to an equivalently sized integer or vector
+ /// with integer element type. Similar to changeVectorElementTypeToInteger,
+ /// but also handles scalars.
+ MVT changeTypeToInteger() {
+ if (isVector())
+ return changeVectorElementTypeToInteger();
+ return MVT::getIntegerVT(getSizeInBits());
+ }
+
+ /// Return a VT for a vector type with the same element type but
+ /// half the number of elements.
+ MVT getHalfNumVectorElementsVT() const {
+ MVT EltVT = getVectorElementType();
+ auto EltCnt = getVectorElementCount();
+ assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
+ return getVectorVT(EltVT, EltCnt.divideCoefficientBy(2));
+ }
+
+ // Return a VT for a vector type with the same element type but
+ // double the number of elements.
+ MVT getDoubleNumVectorElementsVT() const {
+ MVT EltVT = getVectorElementType();
+ auto EltCnt = getVectorElementCount();
+ return MVT::getVectorVT(EltVT, EltCnt * 2);
+ }
+
+ /// Returns true if the given vector is a power of 2.
+ bool isPow2VectorType() const {
+ unsigned NElts = getVectorMinNumElements();
+ return !(NElts & (NElts - 1));
+ }
+
+ /// Widens the length of the given vector MVT up to the nearest power of 2
+ /// and returns that type.
+ MVT getPow2VectorType() const {
+ if (isPow2VectorType())
+ return *this;
+
+ ElementCount NElts = getVectorElementCount();
+ unsigned NewMinCount = 1 << Log2_32_Ceil(NElts.getKnownMinValue());
+ NElts = ElementCount::get(NewMinCount, NElts.isScalable());
+ return MVT::getVectorVT(getVectorElementType(), NElts);
+ }
+
+ /// If this is a vector, return the element type, otherwise return this.
+ MVT getScalarType() const {
+ return isVector() ? getVectorElementType() : *this;
+ }
+
+ MVT getVectorElementType() const {
+ switch (SimpleTy) {
+ default:
+ llvm_unreachable("Not a vector MVT!");
+
+#define GET_VT_VECATTR(Ty, Sc, nElem, ElTy, ElSz) \
+ case Ty: \
+ return ElTy;
+#include "llvm/CodeGen/GenVT.inc"
+#undef GET_VT_VECATTR
+ }
+ }
+
+ /// Given a vector type, return the minimum number of elements it contains.
+ unsigned getVectorMinNumElements() const {
+ switch (SimpleTy) {
+ default:
+ llvm_unreachable("Not a vector MVT!");
+
+#define GET_VT_VECATTR(Ty, Sc, nElem, ElTy, ElSz) \
+ case Ty: \
+ return nElem;
+#include "llvm/CodeGen/GenVT.inc"
+#undef GET_VT_VECATTR
+ }
+ }
+
+ ElementCount getVectorElementCount() const {
+ return ElementCount::get(getVectorMinNumElements(), isScalableVector());
+ }
+
+ unsigned getVectorNumElements() const {
+ if (isScalableVector())
+ llvm::reportInvalidSizeRequest(
+ "Possible incorrect use of MVT::getVectorNumElements() for "
+ "scalable vector. Scalable flag may be dropped, use "
+ "MVT::getVectorElementCount() instead");
+ return getVectorMinNumElements();
+ }
+
+ /// Returns the size of the specified MVT in bits.
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getSizeInBits() const {
+ switch (SimpleTy) {
+ default:
+ switch (SimpleTy) {
+ default:
+ llvm_unreachable("getSizeInBits called on extended MVT.");
+
+#define GET_VT_ATTR(Ty, N, Sz, Any, Int, FP, Vec, Sc) \
+ case Ty: \
+ return (Sc ? TypeSize::Scalable(Sz) : TypeSize::Fixed(Sz));
+#include "llvm/CodeGen/GenVT.inc"
+#undef GET_VT_ATTR
+ }
+ case Other:
+ llvm_unreachable("Value type is non-standard value, Other.");
+ case iPTR:
+ llvm_unreachable("Value type size is target-dependent. Ask TLI.");
+ case iPTRAny:
+ case iAny:
+ case fAny:
+ case vAny:
+ case Any:
+ llvm_unreachable("Value type is overloaded.");
+ case token:
+ llvm_unreachable("Token type is a sentinel that cannot be used "
+ "in codegen and has no size");
+ case Metadata:
+ llvm_unreachable("Value type is metadata.");
+ case aarch64svcount: // FIXME: Not in the td.
+ return TypeSize::Scalable(16);
+ }
+ }
+
+ /// Return the size of the specified fixed width value type in bits. The
+ /// function will assert if the type is scalable.
+ uint64_t getFixedSizeInBits() const {
+ return getSizeInBits().getFixedValue();
+ }
+
+ uint64_t getScalarSizeInBits() const {
+ return getScalarType().getSizeInBits().getFixedValue();
+ }
+
+ /// Return the number of bytes overwritten by a store of the specified value
+ /// type.
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getStoreSize() const {
+ TypeSize BaseSize = getSizeInBits();
+ return {(BaseSize.getKnownMinValue() + 7) / 8, BaseSize.isScalable()};
+ }
+
+ // Return the number of bytes overwritten by a store of this value type or
+ // this value type's element type in the case of a vector.
+ uint64_t getScalarStoreSize() const {
+ return getScalarType().getStoreSize().getFixedValue();
+ }
+
+ /// Return the number of bits overwritten by a store of the specified value
+ /// type.
+ ///
+ /// If the value type is a scalable vector type, the scalable property will
+ /// be set and the runtime size will be a positive integer multiple of the
+ /// base size.
+ TypeSize getStoreSizeInBits() const {
+ return getStoreSize() * 8;
+ }
+
+ /// Returns true if the number of bits for the type is a multiple of an
+ /// 8-bit byte.
+ bool isByteSized() const { return getSizeInBits().isKnownMultipleOf(8); }
+
+ /// Return true if we know at compile time this has more bits than VT.
+ bool knownBitsGT(MVT VT) const {
+ return TypeSize::isKnownGT(getSizeInBits(), VT.getSizeInBits());
+ }
+
+ /// Return true if we know at compile time this has more than or the same
+ /// bits as VT.
+ bool knownBitsGE(MVT VT) const {
+ return TypeSize::isKnownGE(getSizeInBits(), VT.getSizeInBits());
+ }
+
+ /// Return true if we know at compile time this has fewer bits than VT.
+ bool knownBitsLT(MVT VT) const {
+ return TypeSize::isKnownLT(getSizeInBits(), VT.getSizeInBits());
+ }
+
+ /// Return true if we know at compile time this has fewer than or the same
+ /// bits as VT.
+ bool knownBitsLE(MVT VT) const {
+ return TypeSize::isKnownLE(getSizeInBits(), VT.getSizeInBits());
+ }
+
+ /// Return true if this has more bits than VT.
+ bool bitsGT(MVT VT) const {
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsGT(VT);
+ }
+
+ /// Return true if this has no less bits than VT.
+ bool bitsGE(MVT VT) const {
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsGE(VT);
+ }
+
+ /// Return true if this has less bits than VT.
+ bool bitsLT(MVT VT) const {
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsLT(VT);
+ }
+
+ /// Return true if this has no more bits than VT.
+ bool bitsLE(MVT VT) const {
+ assert(isScalableVector() == VT.isScalableVector() &&
+ "Comparison between scalable and fixed types");
+ return knownBitsLE(VT);
+ }
+
+ static MVT getFloatingPointVT(unsigned BitWidth) {
+#define GET_VT_ATTR(Ty, n, sz, Any, Int, FP, Vec, Sc) \
+ if (FP == 3 && sz == BitWidth) \
+ return Ty;
+#include "llvm/CodeGen/GenVT.inc"
+#undef GET_VT_ATTR
+
+ llvm_unreachable("Bad bit width!");
+ }
+
+ static MVT getIntegerVT(unsigned BitWidth) {
+#define GET_VT_ATTR(Ty, n, sz, Any, Int, FP, Vec, Sc) \
+ if (Int == 3 && sz == BitWidth) \
+ return Ty;
+#include "llvm/CodeGen/GenVT.inc"
+#undef GET_VT_ATTR
+
+ return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
+ }
+
+ static MVT getVectorVT(MVT VT, unsigned NumElements) {
+#define GET_VT_VECATTR(Ty, Sc, nElem, ElTy, ElSz) \
+ if (!Sc && VT.SimpleTy == ElTy && NumElements == nElem) \
+ return Ty;
+#include "llvm/CodeGen/GenVT.inc"
+#undef GET_VT_VECATTR
+
+ return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
+ }
+
+ static MVT getScalableVectorVT(MVT VT, unsigned NumElements) {
+#define GET_VT_VECATTR(Ty, Sc, nElem, ElTy, ElSz) \
+ if (Sc && VT.SimpleTy == ElTy && NumElements == nElem) \
+ return Ty;
+#include "llvm/CodeGen/GenVT.inc"
+#undef GET_VT_VECATTR
+
+ return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
+ }
+
+ static MVT getVectorVT(MVT VT, unsigned NumElements, bool IsScalable) {
+ if (IsScalable)
+ return getScalableVectorVT(VT, NumElements);
+ return getVectorVT(VT, NumElements);
+ }
+
+ static MVT getVectorVT(MVT VT, ElementCount EC) {
+ if (EC.isScalable())
+ return getScalableVectorVT(VT, EC.getKnownMinValue());
+ return getVectorVT(VT, EC.getKnownMinValue());
+ }
+
+ /// Return the value type corresponding to the specified type. This returns
+ /// all pointers as iPTR. If HandleUnknown is true, unknown types are
+ /// returned as Other, otherwise they are invalid.
+ static MVT getVT(Type *Ty, bool HandleUnknown = false);
+
+ public:
+ /// SimpleValueType Iteration
+ /// @{
+ static auto all_valuetypes() {
+ return enum_seq_inclusive(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE,
+ force_iteration_on_noniterable_enum);
+ }
+
+ static auto integer_valuetypes() {
+ return enum_seq_inclusive(MVT::FIRST_INTEGER_VALUETYPE,
+ MVT::LAST_INTEGER_VALUETYPE,
+ force_iteration_on_noniterable_enum);
+ }
+
+ static auto fp_valuetypes() {
+ return enum_seq_inclusive(MVT::FIRST_FP_VALUETYPE, MVT::LAST_FP_VALUETYPE,
+ force_iteration_on_noniterable_enum);
+ }
+
+ static auto vector_valuetypes() {
+ return enum_seq_inclusive(MVT::FIRST_VECTOR_VALUETYPE,
+ MVT::LAST_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
+ }
+
+ static auto fixedlen_vector_valuetypes() {
+ return enum_seq_inclusive(MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE,
+ MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
+ }
+
+ static auto scalable_vector_valuetypes() {
+ return enum_seq_inclusive(MVT::FIRST_SCALABLE_VECTOR_VALUETYPE,
+ MVT::LAST_SCALABLE_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
+ }
+
+ static auto integer_fixedlen_vector_valuetypes() {
+ return enum_seq_inclusive(MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
+ MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
+ }
+
+ static auto fp_fixedlen_vector_valuetypes() {
+ return enum_seq_inclusive(MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE,
+ MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
+ }
+
+ static auto integer_scalable_vector_valuetypes() {
+ return enum_seq_inclusive(MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
+ MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
+ }
+
+ static auto fp_scalable_vector_valuetypes() {
+ return enum_seq_inclusive(MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE,
+ MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE,
+ force_iteration_on_noniterable_enum);
+ }
+ /// @}
+ };
+
+ inline raw_ostream &operator<<(raw_ostream &OS, const MVT &VT) {
+ VT.print(OS);
+ return OS;
+ }
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINEVALUETYPE_H
diff --git a/llvm/include/llvm/CodeGen/ModuloSchedule.h b/llvm/include/llvm/CodeGen/ModuloSchedule.h
index b9213ab58aa4..d03f7b495915 100644
--- a/llvm/include/llvm/CodeGen/ModuloSchedule.h
+++ b/llvm/include/llvm/CodeGen/ModuloSchedule.h
@@ -170,11 +170,11 @@ private:
MachineFunction &MF;
const TargetSubtargetInfo &ST;
MachineRegisterInfo &MRI;
- const TargetInstrInfo *TII;
+ const TargetInstrInfo *TII = nullptr;
LiveIntervals &LIS;
- MachineBasicBlock *BB;
- MachineBasicBlock *Preheader;
+ MachineBasicBlock *BB = nullptr;
+ MachineBasicBlock *Preheader = nullptr;
MachineBasicBlock *NewKernel = nullptr;
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
@@ -297,13 +297,13 @@ protected:
MachineFunction &MF;
const TargetSubtargetInfo &ST;
MachineRegisterInfo &MRI;
- const TargetInstrInfo *TII;
- LiveIntervals *LIS;
+ const TargetInstrInfo *TII = nullptr;
+ LiveIntervals *LIS = nullptr;
/// The original loop block that gets rewritten in-place.
- MachineBasicBlock *BB;
+ MachineBasicBlock *BB = nullptr;
/// The original loop preheader.
- MachineBasicBlock *Preheader;
+ MachineBasicBlock *Preheader = nullptr;
/// All prolog and epilog blocks.
SmallVector<MachineBasicBlock *, 4> Prologs, Epilogs;
/// For every block, the stages that are produced.
diff --git a/llvm/include/llvm/CodeGen/PBQP/CostAllocator.h b/llvm/include/llvm/CodeGen/PBQP/CostAllocator.h
index 0d6d8a31317b..7a8ee691c034 100644
--- a/llvm/include/llvm/CodeGen/PBQP/CostAllocator.h
+++ b/llvm/include/llvm/CodeGen/PBQP/CostAllocator.h
@@ -100,7 +100,7 @@ public:
auto P = std::make_shared<PoolEntry>(*this, std::move(ValueKey));
EntrySet.insert(P.get());
- return PoolRef(std::move(P), &P->getValue());
+ return PoolRef(P, &P->getValue());
}
};
diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index b331c9a19fd1..11bc1d48a93d 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -31,6 +31,11 @@ class Pass;
class TargetMachine;
class raw_ostream;
+template <typename T> class IntrusiveRefCntPtr;
+namespace vfs {
+class FileSystem;
+} // namespace vfs
+
} // End llvm namespace
// List of target independent CodeGen pass IDs.
@@ -494,8 +499,7 @@ namespace llvm {
/// printing assembly.
ModulePass *createMachineOutlinerPass(bool RunOnAllFunctions = true);
- /// This pass expands the experimental reduction intrinsics into sequences of
- /// shuffles.
+ /// This pass expands the reduction intrinsics into sequences of shuffles.
FunctionPass *createExpandReductionsPass();
// This pass replaces intrinsics operating on vector operands with calls to
@@ -537,7 +541,7 @@ namespace llvm {
FunctionPass *createEHContGuardCatchretPass();
/// Create Hardware Loop pass. \see HardwareLoops.cpp
- FunctionPass *createHardwareLoopsPass();
+ FunctionPass *createHardwareLoopsLegacyPass();
/// This pass inserts pseudo probe annotation for callsite profiling.
FunctionPass *createPseudoProbeInserter();
@@ -551,9 +555,10 @@ namespace llvm {
createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P);
/// Read Flow Sensitive Profile.
- FunctionPass *createMIRProfileLoaderPass(std::string File,
- std::string RemappingFile,
- sampleprof::FSDiscriminatorPass P);
+ FunctionPass *
+ createMIRProfileLoaderPass(std::string File, std::string RemappingFile,
+ sampleprof::FSDiscriminatorPass P,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS);
/// Creates MIR Debugify pass. \see MachineDebugify.cpp
ModulePass *createDebugifyMachineModulePass();
@@ -591,6 +596,11 @@ namespace llvm {
/// This pass converts conditional moves to conditional jumps when profitable.
FunctionPass *createSelectOptimizePass();
+
+ FunctionPass *createCallBrPass();
+
+ /// Lowers KCFI operand bundles for indirect calls.
+ FunctionPass *createKCFIPass();
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/CodeGen/RDFGraph.h b/llvm/include/llvm/CodeGen/RDFGraph.h
index 43eb051c136b..cf7344e8c3e7 100644
--- a/llvm/include/llvm/CodeGen/RDFGraph.h
+++ b/llvm/include/llvm/CodeGen/RDFGraph.h
@@ -225,6 +225,7 @@
#define LLVM_CODEGEN_RDFGRAPH_H
#include "RDFRegisters.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/Support/Allocator.h"
@@ -258,715 +259,737 @@ class TargetRegisterInfo;
namespace rdf {
- using NodeId = uint32_t;
-
- struct DataFlowGraph;
-
- struct NodeAttrs {
- enum : uint16_t {
- None = 0x0000, // Nothing
-
- // Types: 2 bits
- TypeMask = 0x0003,
- Code = 0x0001, // 01, Container
- Ref = 0x0002, // 10, Reference
-
- // Kind: 3 bits
- KindMask = 0x0007 << 2,
- Def = 0x0001 << 2, // 001
- Use = 0x0002 << 2, // 010
- Phi = 0x0003 << 2, // 011
- Stmt = 0x0004 << 2, // 100
- Block = 0x0005 << 2, // 101
- Func = 0x0006 << 2, // 110
-
- // Flags: 7 bits for now
- FlagMask = 0x007F << 5,
- Shadow = 0x0001 << 5, // 0000001, Has extra reaching defs.
- Clobbering = 0x0002 << 5, // 0000010, Produces unspecified values.
- PhiRef = 0x0004 << 5, // 0000100, Member of PhiNode.
- Preserving = 0x0008 << 5, // 0001000, Def can keep original bits.
- Fixed = 0x0010 << 5, // 0010000, Fixed register.
- Undef = 0x0020 << 5, // 0100000, Has no pre-existing value.
- Dead = 0x0040 << 5, // 1000000, Does not define a value.
- };
-
- static uint16_t type(uint16_t T) { return T & TypeMask; }
- static uint16_t kind(uint16_t T) { return T & KindMask; }
- static uint16_t flags(uint16_t T) { return T & FlagMask; }
+using NodeId = uint32_t;
+
+struct DataFlowGraph;
+
+struct NodeAttrs {
+ // clang-format off
+ enum : uint16_t {
+ None = 0x0000, // Nothing
+
+ // Types: 2 bits
+ TypeMask = 0x0003,
+ Code = 0x0001, // 01, Container
+ Ref = 0x0002, // 10, Reference
+
+ // Kind: 3 bits
+ KindMask = 0x0007 << 2,
+ Def = 0x0001 << 2, // 001
+ Use = 0x0002 << 2, // 010
+ Phi = 0x0003 << 2, // 011
+ Stmt = 0x0004 << 2, // 100
+ Block = 0x0005 << 2, // 101
+ Func = 0x0006 << 2, // 110
+
+ // Flags: 7 bits for now
+ FlagMask = 0x007F << 5,
+ Shadow = 0x0001 << 5, // 0000001, Has extra reaching defs.
+ Clobbering = 0x0002 << 5, // 0000010, Produces unspecified values.
+ PhiRef = 0x0004 << 5, // 0000100, Member of PhiNode.
+ Preserving = 0x0008 << 5, // 0001000, Def can keep original bits.
+ Fixed = 0x0010 << 5, // 0010000, Fixed register.
+ Undef = 0x0020 << 5, // 0100000, Has no pre-existing value.
+ Dead = 0x0040 << 5, // 1000000, Does not define a value.
+ };
+ // clang-format on
- static uint16_t set_type(uint16_t A, uint16_t T) {
- return (A & ~TypeMask) | T;
- }
+ static uint16_t type(uint16_t T) { //
+ return T & TypeMask;
+ }
+ static uint16_t kind(uint16_t T) { //
+ return T & KindMask;
+ }
+ static uint16_t flags(uint16_t T) { //
+ return T & FlagMask;
+ }
+ static uint16_t set_type(uint16_t A, uint16_t T) {
+ return (A & ~TypeMask) | T;
+ }
- static uint16_t set_kind(uint16_t A, uint16_t K) {
- return (A & ~KindMask) | K;
- }
+ static uint16_t set_kind(uint16_t A, uint16_t K) {
+ return (A & ~KindMask) | K;
+ }
- static uint16_t set_flags(uint16_t A, uint16_t F) {
- return (A & ~FlagMask) | F;
- }
+ static uint16_t set_flags(uint16_t A, uint16_t F) {
+ return (A & ~FlagMask) | F;
+ }
- // Test if A contains B.
- static bool contains(uint16_t A, uint16_t B) {
- if (type(A) != Code)
- return false;
- uint16_t KB = kind(B);
- switch (kind(A)) {
- case Func:
- return KB == Block;
- case Block:
- return KB == Phi || KB == Stmt;
- case Phi:
- case Stmt:
- return type(B) == Ref;
- }
+ // Test if A contains B.
+ static bool contains(uint16_t A, uint16_t B) {
+ if (type(A) != Code)
return false;
- }
- };
+ uint16_t KB = kind(B);
+ switch (kind(A)) {
+ case Func:
+ return KB == Block;
+ case Block:
+ return KB == Phi || KB == Stmt;
+ case Phi:
+ case Stmt:
+ return type(B) == Ref;
+ }
+ return false;
+ }
+};
- struct BuildOptions {
- enum : unsigned {
- None = 0x00,
- KeepDeadPhis = 0x01, // Do not remove dead phis during build.
- };
+struct BuildOptions {
+ enum : unsigned {
+ None = 0x00,
+ KeepDeadPhis = 0x01, // Do not remove dead phis during build.
+ OmitReserved = 0x02, // Do not track reserved registers.
};
+};
- template <typename T> struct NodeAddr {
- NodeAddr() = default;
- NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
-
- // Type cast (casting constructor). The reason for having this class
- // instead of std::pair.
- template <typename S> NodeAddr(const NodeAddr<S> &NA)
- : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
+template <typename T> struct NodeAddr {
+ NodeAddr() = default;
+ NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
- bool operator== (const NodeAddr<T> &NA) const {
- assert((Addr == NA.Addr) == (Id == NA.Id));
- return Addr == NA.Addr;
- }
- bool operator!= (const NodeAddr<T> &NA) const {
- return !operator==(NA);
- }
+ // Type cast (casting constructor). The reason for having this class
+ // instead of std::pair.
+ template <typename S>
+ NodeAddr(const NodeAddr<S> &NA) : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
- T Addr = nullptr;
- NodeId Id = 0;
- };
+ bool operator==(const NodeAddr<T> &NA) const {
+ assert((Addr == NA.Addr) == (Id == NA.Id));
+ return Addr == NA.Addr;
+ }
+ bool operator!=(const NodeAddr<T> &NA) const { //
+ return !operator==(NA);
+ }
- struct NodeBase;
-
- // Fast memory allocation and translation between node id and node address.
- // This is really the same idea as the one underlying the "bump pointer
- // allocator", the difference being in the translation. A node id is
- // composed of two components: the index of the block in which it was
- // allocated, and the index within the block. With the default settings,
- // where the number of nodes per block is 4096, the node id (minus 1) is:
- //
- // bit position: 11 0
- // +----------------------------+--------------+
- // | Index of the block |Index in block|
- // +----------------------------+--------------+
- //
- // The actual node id is the above plus 1, to avoid creating a node id of 0.
- //
- // This method significantly improved the build time, compared to using maps
- // (std::unordered_map or DenseMap) to translate between pointers and ids.
- struct NodeAllocator {
- // Amount of storage for a single node.
- enum { NodeMemSize = 32 };
-
- NodeAllocator(uint32_t NPB = 4096)
- : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
- IndexMask((1 << BitsPerIndex)-1) {
- assert(isPowerOf2_32(NPB));
- }
+ T Addr = nullptr;
+ NodeId Id = 0;
+};
+
+struct NodeBase;
+
+struct RefNode;
+struct DefNode;
+struct UseNode;
+struct PhiUseNode;
+
+struct CodeNode;
+struct InstrNode;
+struct PhiNode;
+struct StmtNode;
+struct BlockNode;
+struct FuncNode;
+
+// Use these short names with rdf:: qualification to avoid conflicts with
+// preexisting names. Do not use 'using namespace rdf'.
+using Node = NodeAddr<NodeBase *>;
+
+using Ref = NodeAddr<RefNode *>;
+using Def = NodeAddr<DefNode *>;
+using Use = NodeAddr<UseNode *>; // This may conflict with llvm::Use.
+using PhiUse = NodeAddr<PhiUseNode *>;
+
+using Code = NodeAddr<CodeNode *>;
+using Instr = NodeAddr<InstrNode *>;
+using Phi = NodeAddr<PhiNode *>;
+using Stmt = NodeAddr<StmtNode *>;
+using Block = NodeAddr<BlockNode *>;
+using Func = NodeAddr<FuncNode *>;
+
+// Fast memory allocation and translation between node id and node address.
+// This is really the same idea as the one underlying the "bump pointer
+// allocator", the difference being in the translation. A node id is
+// composed of two components: the index of the block in which it was
+// allocated, and the index within the block. With the default settings,
+// where the number of nodes per block is 4096, the node id (minus 1) is:
+//
+// bit position: 11 0
+// +----------------------------+--------------+
+// | Index of the block |Index in block|
+// +----------------------------+--------------+
+//
+// The actual node id is the above plus 1, to avoid creating a node id of 0.
+//
+// This method significantly improved the build time, compared to using maps
+// (std::unordered_map or DenseMap) to translate between pointers and ids.
+struct NodeAllocator {
+ // Amount of storage for a single node.
+ enum { NodeMemSize = 32 };
+
+ NodeAllocator(uint32_t NPB = 4096)
+ : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
+ IndexMask((1 << BitsPerIndex) - 1) {
+ assert(isPowerOf2_32(NPB));
+ }
- NodeBase *ptr(NodeId N) const {
- uint32_t N1 = N-1;
- uint32_t BlockN = N1 >> BitsPerIndex;
- uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
- return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset);
- }
+ NodeBase *ptr(NodeId N) const {
+ uint32_t N1 = N - 1;
+ uint32_t BlockN = N1 >> BitsPerIndex;
+ uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
+ return reinterpret_cast<NodeBase *>(Blocks[BlockN] + Offset);
+ }
- NodeId id(const NodeBase *P) const;
- NodeAddr<NodeBase*> New();
- void clear();
+ NodeId id(const NodeBase *P) const;
+ Node New();
+ void clear();
- private:
- void startNewBlock();
- bool needNewBlock();
+private:
+ void startNewBlock();
+ bool needNewBlock();
- uint32_t makeId(uint32_t Block, uint32_t Index) const {
- // Add 1 to the id, to avoid the id of 0, which is treated as "null".
- return ((Block << BitsPerIndex) | Index) + 1;
- }
+ uint32_t makeId(uint32_t Block, uint32_t Index) const {
+ // Add 1 to the id, to avoid the id of 0, which is treated as "null".
+ return ((Block << BitsPerIndex) | Index) + 1;
+ }
- const uint32_t NodesPerBlock;
- const uint32_t BitsPerIndex;
- const uint32_t IndexMask;
- char *ActiveEnd = nullptr;
- std::vector<char*> Blocks;
- using AllocatorTy = BumpPtrAllocatorImpl<MallocAllocator, 65536>;
- AllocatorTy MemPool;
- };
+ const uint32_t NodesPerBlock;
+ const uint32_t BitsPerIndex;
+ const uint32_t IndexMask;
+ char *ActiveEnd = nullptr;
+ std::vector<char *> Blocks;
+ using AllocatorTy = BumpPtrAllocatorImpl<MallocAllocator, 65536>;
+ AllocatorTy MemPool;
+};
- using RegisterSet = std::set<RegisterRef>;
+using RegisterSet = std::set<RegisterRef>;
- struct TargetOperandInfo {
- TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
- virtual ~TargetOperandInfo() = default;
+struct TargetOperandInfo {
+ TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
+ virtual ~TargetOperandInfo() = default;
- virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
- virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
- virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
+ virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
+ virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
+ virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
- const TargetInstrInfo &TII;
- };
+ const TargetInstrInfo &TII;
+};
- // Packed register reference. Only used for storage.
- struct PackedRegisterRef {
- RegisterId Reg;
- uint32_t MaskId;
- };
+// Packed register reference. Only used for storage.
+struct PackedRegisterRef {
+ RegisterId Reg;
+ uint32_t MaskId;
+};
- struct LaneMaskIndex : private IndexedSet<LaneBitmask> {
- LaneMaskIndex() = default;
+struct LaneMaskIndex : private IndexedSet<LaneBitmask> {
+ LaneMaskIndex() = default;
- LaneBitmask getLaneMaskForIndex(uint32_t K) const {
- return K == 0 ? LaneBitmask::getAll() : get(K);
- }
+ LaneBitmask getLaneMaskForIndex(uint32_t K) const {
+ return K == 0 ? LaneBitmask::getAll() : get(K);
+ }
- uint32_t getIndexForLaneMask(LaneBitmask LM) {
- assert(LM.any());
- return LM.all() ? 0 : insert(LM);
- }
+ uint32_t getIndexForLaneMask(LaneBitmask LM) {
+ assert(LM.any());
+ return LM.all() ? 0 : insert(LM);
+ }
- uint32_t getIndexForLaneMask(LaneBitmask LM) const {
- assert(LM.any());
- return LM.all() ? 0 : find(LM);
- }
+ uint32_t getIndexForLaneMask(LaneBitmask LM) const {
+ assert(LM.any());
+ return LM.all() ? 0 : find(LM);
+ }
+};
+
+struct NodeBase {
+public:
+ // Make sure this is a POD.
+ NodeBase() = default;
+
+ uint16_t getType() const { return NodeAttrs::type(Attrs); }
+ uint16_t getKind() const { return NodeAttrs::kind(Attrs); }
+ uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
+ NodeId getNext() const { return Next; }
+
+ uint16_t getAttrs() const { return Attrs; }
+ void setAttrs(uint16_t A) { Attrs = A; }
+ void setFlags(uint16_t F) { setAttrs(NodeAttrs::set_flags(getAttrs(), F)); }
+
+ // Insert node NA after "this" in the circular chain.
+ void append(Node NA);
+
+ // Initialize all members to 0.
+ void init() { memset(this, 0, sizeof *this); }
+
+ void setNext(NodeId N) { Next = N; }
+
+protected:
+ uint16_t Attrs;
+ uint16_t Reserved;
+ NodeId Next; // Id of the next node in the circular chain.
+ // Definitions of nested types. Using anonymous nested structs would make
+ // this class definition clearer, but unnamed structs are not a part of
+ // the standard.
+ struct Def_struct {
+ NodeId DD, DU; // Ids of the first reached def and use.
};
-
- struct NodeBase {
- public:
- // Make sure this is a POD.
- NodeBase() = default;
-
- uint16_t getType() const { return NodeAttrs::type(Attrs); }
- uint16_t getKind() const { return NodeAttrs::kind(Attrs); }
- uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
- NodeId getNext() const { return Next; }
-
- uint16_t getAttrs() const { return Attrs; }
- void setAttrs(uint16_t A) { Attrs = A; }
- void setFlags(uint16_t F) { setAttrs(NodeAttrs::set_flags(getAttrs(), F)); }
-
- // Insert node NA after "this" in the circular chain.
- void append(NodeAddr<NodeBase*> NA);
-
- // Initialize all members to 0.
- void init() { memset(this, 0, sizeof *this); }
-
- void setNext(NodeId N) { Next = N; }
-
- protected:
- uint16_t Attrs;
- uint16_t Reserved;
- NodeId Next; // Id of the next node in the circular chain.
- // Definitions of nested types. Using anonymous nested structs would make
- // this class definition clearer, but unnamed structs are not a part of
- // the standard.
- struct Def_struct {
- NodeId DD, DU; // Ids of the first reached def and use.
- };
- struct PhiU_struct {
- NodeId PredB; // Id of the predecessor block for a phi use.
- };
- struct Code_struct {
- void *CP; // Pointer to the actual code.
- NodeId FirstM, LastM; // Id of the first member and last.
- };
- struct Ref_struct {
- NodeId RD, Sib; // Ids of the reaching def and the sibling.
- union {
- Def_struct Def;
- PhiU_struct PhiU;
- };
- union {
- MachineOperand *Op; // Non-phi refs point to a machine operand.
- PackedRegisterRef PR; // Phi refs store register info directly.
- };
+ struct PhiU_struct {
+ NodeId PredB; // Id of the predecessor block for a phi use.
+ };
+ struct Code_struct {
+ void *CP; // Pointer to the actual code.
+ NodeId FirstM, LastM; // Id of the first member and last.
+ };
+ struct Ref_struct {
+ NodeId RD, Sib; // Ids of the reaching def and the sibling.
+ union {
+ Def_struct Def;
+ PhiU_struct PhiU;
};
-
- // The actual payload.
union {
- Ref_struct Ref;
- Code_struct Code;
+ MachineOperand *Op; // Non-phi refs point to a machine operand.
+ PackedRegisterRef PR; // Phi refs store register info directly.
};
};
- // The allocator allocates chunks of 32 bytes for each node. The fact that
- // each node takes 32 bytes in memory is used for fast translation between
- // the node id and the node address.
- static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize,
- "NodeBase must be at most NodeAllocator::NodeMemSize bytes");
- using NodeList = SmallVector<NodeAddr<NodeBase *>, 4>;
- using NodeSet = std::set<NodeId>;
+ // The actual payload.
+ union {
+ Ref_struct RefData;
+ Code_struct CodeData;
+ };
+};
+// The allocator allocates chunks of 32 bytes for each node. The fact that
+// each node takes 32 bytes in memory is used for fast translation between
+// the node id and the node address.
+static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize,
+ "NodeBase must be at most NodeAllocator::NodeMemSize bytes");
- struct RefNode : public NodeBase {
- RefNode() = default;
+using NodeList = SmallVector<Node, 4>;
+using NodeSet = std::set<NodeId>;
- RegisterRef getRegRef(const DataFlowGraph &G) const;
+struct RefNode : public NodeBase {
+ RefNode() = default;
- MachineOperand &getOp() {
- assert(!(getFlags() & NodeAttrs::PhiRef));
- return *Ref.Op;
- }
+ RegisterRef getRegRef(const DataFlowGraph &G) const;
- void setRegRef(RegisterRef RR, DataFlowGraph &G);
- void setRegRef(MachineOperand *Op, DataFlowGraph &G);
+ MachineOperand &getOp() {
+ assert(!(getFlags() & NodeAttrs::PhiRef));
+ return *RefData.Op;
+ }
- NodeId getReachingDef() const {
- return Ref.RD;
- }
- void setReachingDef(NodeId RD) {
- Ref.RD = RD;
- }
+ void setRegRef(RegisterRef RR, DataFlowGraph &G);
+ void setRegRef(MachineOperand *Op, DataFlowGraph &G);
- NodeId getSibling() const {
- return Ref.Sib;
- }
- void setSibling(NodeId Sib) {
- Ref.Sib = Sib;
- }
+ NodeId getReachingDef() const { return RefData.RD; }
+ void setReachingDef(NodeId RD) { RefData.RD = RD; }
- bool isUse() const {
- assert(getType() == NodeAttrs::Ref);
- return getKind() == NodeAttrs::Use;
- }
+ NodeId getSibling() const { return RefData.Sib; }
+ void setSibling(NodeId Sib) { RefData.Sib = Sib; }
- bool isDef() const {
- assert(getType() == NodeAttrs::Ref);
- return getKind() == NodeAttrs::Def;
- }
-
- template <typename Predicate>
- NodeAddr<RefNode*> getNextRef(RegisterRef RR, Predicate P, bool NextOnly,
- const DataFlowGraph &G);
- NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
- };
+ bool isUse() const {
+ assert(getType() == NodeAttrs::Ref);
+ return getKind() == NodeAttrs::Use;
+ }
- struct DefNode : public RefNode {
- NodeId getReachedDef() const {
- return Ref.Def.DD;
- }
- void setReachedDef(NodeId D) {
- Ref.Def.DD = D;
- }
- NodeId getReachedUse() const {
- return Ref.Def.DU;
- }
- void setReachedUse(NodeId U) {
- Ref.Def.DU = U;
- }
+ bool isDef() const {
+ assert(getType() == NodeAttrs::Ref);
+ return getKind() == NodeAttrs::Def;
+ }
- void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
- };
+ template <typename Predicate>
+ Ref getNextRef(RegisterRef RR, Predicate P, bool NextOnly,
+ const DataFlowGraph &G);
+ Node getOwner(const DataFlowGraph &G);
+};
+
+struct DefNode : public RefNode {
+ NodeId getReachedDef() const { return RefData.Def.DD; }
+ void setReachedDef(NodeId D) { RefData.Def.DD = D; }
+ NodeId getReachedUse() const { return RefData.Def.DU; }
+ void setReachedUse(NodeId U) { RefData.Def.DU = U; }
+
+ void linkToDef(NodeId Self, Def DA);
+};
+
+struct UseNode : public RefNode {
+ void linkToDef(NodeId Self, Def DA);
+};
+
+struct PhiUseNode : public UseNode {
+ NodeId getPredecessor() const {
+ assert(getFlags() & NodeAttrs::PhiRef);
+ return RefData.PhiU.PredB;
+ }
+ void setPredecessor(NodeId B) {
+ assert(getFlags() & NodeAttrs::PhiRef);
+ RefData.PhiU.PredB = B;
+ }
+};
- struct UseNode : public RefNode {
- void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
- };
+struct CodeNode : public NodeBase {
+ template <typename T> T getCode() const { //
+ return static_cast<T>(CodeData.CP);
+ }
+ void setCode(void *C) { CodeData.CP = C; }
- struct PhiUseNode : public UseNode {
- NodeId getPredecessor() const {
- assert(getFlags() & NodeAttrs::PhiRef);
- return Ref.PhiU.PredB;
- }
- void setPredecessor(NodeId B) {
- assert(getFlags() & NodeAttrs::PhiRef);
- Ref.PhiU.PredB = B;
- }
- };
+ Node getFirstMember(const DataFlowGraph &G) const;
+ Node getLastMember(const DataFlowGraph &G) const;
+ void addMember(Node NA, const DataFlowGraph &G);
+ void addMemberAfter(Node MA, Node NA, const DataFlowGraph &G);
+ void removeMember(Node NA, const DataFlowGraph &G);
- struct CodeNode : public NodeBase {
- template <typename T> T getCode() const {
- return static_cast<T>(Code.CP);
- }
- void setCode(void *C) {
- Code.CP = C;
- }
+ NodeList members(const DataFlowGraph &G) const;
+ template <typename Predicate>
+ NodeList members_if(Predicate P, const DataFlowGraph &G) const;
+};
- NodeAddr<NodeBase*> getFirstMember(const DataFlowGraph &G) const;
- NodeAddr<NodeBase*> getLastMember(const DataFlowGraph &G) const;
- void addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
- void addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
- const DataFlowGraph &G);
- void removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
+struct InstrNode : public CodeNode {
+ Node getOwner(const DataFlowGraph &G);
+};
- NodeList members(const DataFlowGraph &G) const;
- template <typename Predicate>
- NodeList members_if(Predicate P, const DataFlowGraph &G) const;
- };
+struct PhiNode : public InstrNode {
+ MachineInstr *getCode() const { return nullptr; }
+};
- struct InstrNode : public CodeNode {
- NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
- };
+struct StmtNode : public InstrNode {
+ MachineInstr *getCode() const { //
+ return CodeNode::getCode<MachineInstr *>();
+ }
+};
- struct PhiNode : public InstrNode {
- MachineInstr *getCode() const {
- return nullptr;
- }
- };
+struct BlockNode : public CodeNode {
+ MachineBasicBlock *getCode() const {
+ return CodeNode::getCode<MachineBasicBlock *>();
+ }
- struct StmtNode : public InstrNode {
- MachineInstr *getCode() const {
- return CodeNode::getCode<MachineInstr*>();
- }
- };
+ void addPhi(Phi PA, const DataFlowGraph &G);
+};
- struct BlockNode : public CodeNode {
- MachineBasicBlock *getCode() const {
- return CodeNode::getCode<MachineBasicBlock*>();
- }
+struct FuncNode : public CodeNode {
+ MachineFunction *getCode() const {
+ return CodeNode::getCode<MachineFunction *>();
+ }
- void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G);
+ Block findBlock(const MachineBasicBlock *BB, const DataFlowGraph &G) const;
+ Block getEntryBlock(const DataFlowGraph &G);
+};
+
+struct DataFlowGraph {
+ DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf);
+ DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf,
+ const TargetOperandInfo &toi);
+
+ struct Config {
+ Config() = default;
+ Config(unsigned Opts) : Options(Opts) {}
+ Config(ArrayRef<const TargetRegisterClass *> RCs) : Classes(RCs) {}
+ Config(ArrayRef<MCPhysReg> Track) : TrackRegs(Track.begin(), Track.end()) {}
+ Config(ArrayRef<RegisterId> Track)
+ : TrackRegs(Track.begin(), Track.end()) {}
+
+ unsigned Options = BuildOptions::None;
+ SmallVector<const TargetRegisterClass *> Classes;
+ std::set<RegisterId> TrackRegs;
};
- struct FuncNode : public CodeNode {
- MachineFunction *getCode() const {
- return CodeNode::getCode<MachineFunction*>();
- }
+ NodeBase *ptr(NodeId N) const;
+ template <typename T> T ptr(NodeId N) const { //
+ return static_cast<T>(ptr(N));
+ }
- NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB,
- const DataFlowGraph &G) const;
- NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G);
- };
+ NodeId id(const NodeBase *P) const;
- struct DataFlowGraph {
- DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
- const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
- const MachineDominanceFrontier &mdf);
- DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
- const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
- const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi);
-
- NodeBase *ptr(NodeId N) const;
- template <typename T> T ptr(NodeId N) const {
- return static_cast<T>(ptr(N));
- }
+ template <typename T> NodeAddr<T> addr(NodeId N) const {
+ return {ptr<T>(N), N};
+ }
- NodeId id(const NodeBase *P) const;
+ Func getFunc() const { return TheFunc; }
+ MachineFunction &getMF() const { return MF; }
+ const TargetInstrInfo &getTII() const { return TII; }
+ const TargetRegisterInfo &getTRI() const { return TRI; }
+ const PhysicalRegisterInfo &getPRI() const { return PRI; }
+ const MachineDominatorTree &getDT() const { return MDT; }
+ const MachineDominanceFrontier &getDF() const { return MDF; }
+ const RegisterAggr &getLiveIns() const { return LiveIns; }
- template <typename T> NodeAddr<T> addr(NodeId N) const {
- return { ptr<T>(N), N };
- }
+ struct DefStack {
+ DefStack() = default;
- NodeAddr<FuncNode*> getFunc() const { return Func; }
- MachineFunction &getMF() const { return MF; }
- const TargetInstrInfo &getTII() const { return TII; }
- const TargetRegisterInfo &getTRI() const { return TRI; }
- const PhysicalRegisterInfo &getPRI() const { return PRI; }
- const MachineDominatorTree &getDT() const { return MDT; }
- const MachineDominanceFrontier &getDF() const { return MDF; }
- const RegisterAggr &getLiveIns() const { return LiveIns; }
+ bool empty() const { return Stack.empty() || top() == bottom(); }
- struct DefStack {
- DefStack() = default;
+ private:
+ using value_type = Def;
+ struct Iterator {
+ using value_type = DefStack::value_type;
- bool empty() const { return Stack.empty() || top() == bottom(); }
+ Iterator &up() {
+ Pos = DS.nextUp(Pos);
+ return *this;
+ }
+ Iterator &down() {
+ Pos = DS.nextDown(Pos);
+ return *this;
+ }
- private:
- using value_type = NodeAddr<DefNode *>;
- struct Iterator {
- using value_type = DefStack::value_type;
-
- Iterator &up() { Pos = DS.nextUp(Pos); return *this; }
- Iterator &down() { Pos = DS.nextDown(Pos); return *this; }
-
- value_type operator*() const {
- assert(Pos >= 1);
- return DS.Stack[Pos-1];
- }
- const value_type *operator->() const {
- assert(Pos >= 1);
- return &DS.Stack[Pos-1];
- }
- bool operator==(const Iterator &It) const { return Pos == It.Pos; }
- bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
-
- private:
- friend struct DefStack;
-
- Iterator(const DefStack &S, bool Top);
-
- // Pos-1 is the index in the StorageType object that corresponds to
- // the top of the DefStack.
- const DefStack &DS;
- unsigned Pos;
- };
-
- public:
- using iterator = Iterator;
-
- iterator top() const { return Iterator(*this, true); }
- iterator bottom() const { return Iterator(*this, false); }
- unsigned size() const;
-
- void push(NodeAddr<DefNode*> DA) { Stack.push_back(DA); }
- void pop();
- void start_block(NodeId N);
- void clear_block(NodeId N);
+ value_type operator*() const {
+ assert(Pos >= 1);
+ return DS.Stack[Pos - 1];
+ }
+ const value_type *operator->() const {
+ assert(Pos >= 1);
+ return &DS.Stack[Pos - 1];
+ }
+ bool operator==(const Iterator &It) const { return Pos == It.Pos; }
+ bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
private:
- friend struct Iterator;
+ friend struct DefStack;
- using StorageType = std::vector<value_type>;
+ Iterator(const DefStack &S, bool Top);
- bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
- return (P.Addr == nullptr) && (N == 0 || P.Id == N);
- }
+ // Pos-1 is the index in the StorageType object that corresponds to
+ // the top of the DefStack.
+ const DefStack &DS;
+ unsigned Pos;
+ };
- unsigned nextUp(unsigned P) const;
- unsigned nextDown(unsigned P) const;
+ public:
+ using iterator = Iterator;
- StorageType Stack;
- };
+ iterator top() const { return Iterator(*this, true); }
+ iterator bottom() const { return Iterator(*this, false); }
+ unsigned size() const;
- // Make this std::unordered_map for speed of accessing elements.
- // Map: Register (physical or virtual) -> DefStack
- using DefStackMap = std::unordered_map<RegisterId, DefStack>;
+ void push(Def DA) { Stack.push_back(DA); }
+ void pop();
+ void start_block(NodeId N);
+ void clear_block(NodeId N);
- void build(unsigned Options = BuildOptions::None);
- void pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
- void markBlock(NodeId B, DefStackMap &DefM);
- void releaseBlock(NodeId B, DefStackMap &DefM);
+ private:
+ friend struct Iterator;
- PackedRegisterRef pack(RegisterRef RR) {
- return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) };
- }
- PackedRegisterRef pack(RegisterRef RR) const {
- return { RR.Reg, LMI.getIndexForLaneMask(RR.Mask) };
- }
- RegisterRef unpack(PackedRegisterRef PR) const {
- return RegisterRef(PR.Reg, LMI.getLaneMaskForIndex(PR.MaskId));
+ using StorageType = std::vector<value_type>;
+
+ bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
+ return (P.Addr == nullptr) && (N == 0 || P.Id == N);
}
- RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const;
- RegisterRef makeRegRef(const MachineOperand &Op) const;
+ unsigned nextUp(unsigned P) const;
+ unsigned nextDown(unsigned P) const;
- NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const;
- NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA, bool Create);
- NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const;
+ StorageType Stack;
+ };
- NodeList getRelatedRefs(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const;
+ // Make this std::unordered_map for speed of accessing elements.
+ // Map: Register (physical or virtual) -> DefStack
+ using DefStackMap = std::unordered_map<RegisterId, DefStack>;
- NodeAddr<BlockNode*> findBlock(MachineBasicBlock *BB) const {
- return BlockNodes.at(BB);
- }
+ void build(const Config &config);
+ void build() { build(Config()); }
- void unlinkUse(NodeAddr<UseNode*> UA, bool RemoveFromOwner) {
- unlinkUseDF(UA);
- if (RemoveFromOwner)
- removeFromOwner(UA);
- }
+ void pushAllDefs(Instr IA, DefStackMap &DM);
+ void markBlock(NodeId B, DefStackMap &DefM);
+ void releaseBlock(NodeId B, DefStackMap &DefM);
- void unlinkDef(NodeAddr<DefNode*> DA, bool RemoveFromOwner) {
- unlinkDefDF(DA);
- if (RemoveFromOwner)
- removeFromOwner(DA);
- }
+ PackedRegisterRef pack(RegisterRef RR) {
+ return {RR.Reg, LMI.getIndexForLaneMask(RR.Mask)};
+ }
+ PackedRegisterRef pack(RegisterRef RR) const {
+ return {RR.Reg, LMI.getIndexForLaneMask(RR.Mask)};
+ }
+ RegisterRef unpack(PackedRegisterRef PR) const {
+ return RegisterRef(PR.Reg, LMI.getLaneMaskForIndex(PR.MaskId));
+ }
- // Some useful filters.
- template <uint16_t Kind>
- static bool IsRef(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Ref &&
- BA.Addr->getKind() == Kind;
- }
+ RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const;
+ RegisterRef makeRegRef(const MachineOperand &Op) const;
- template <uint16_t Kind>
- static bool IsCode(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Code &&
- BA.Addr->getKind() == Kind;
- }
+ Ref getNextRelated(Instr IA, Ref RA) const;
+ Ref getNextShadow(Instr IA, Ref RA, bool Create);
- static bool IsDef(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Ref &&
- BA.Addr->getKind() == NodeAttrs::Def;
- }
+ NodeList getRelatedRefs(Instr IA, Ref RA) const;
- static bool IsUse(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Ref &&
- BA.Addr->getKind() == NodeAttrs::Use;
- }
+ Block findBlock(MachineBasicBlock *BB) const { return BlockNodes.at(BB); }
- static bool IsPhi(const NodeAddr<NodeBase*> BA) {
- return BA.Addr->getType() == NodeAttrs::Code &&
- BA.Addr->getKind() == NodeAttrs::Phi;
- }
+ void unlinkUse(Use UA, bool RemoveFromOwner) {
+ unlinkUseDF(UA);
+ if (RemoveFromOwner)
+ removeFromOwner(UA);
+ }
- static bool IsPreservingDef(const NodeAddr<DefNode*> DA) {
- uint16_t Flags = DA.Addr->getFlags();
- return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
- }
+ void unlinkDef(Def DA, bool RemoveFromOwner) {
+ unlinkDefDF(DA);
+ if (RemoveFromOwner)
+ removeFromOwner(DA);
+ }
- private:
- void reset();
-
- RegisterSet getLandingPadLiveIns() const;
-
- NodeAddr<NodeBase*> newNode(uint16_t Attrs);
- NodeAddr<NodeBase*> cloneNode(const NodeAddr<NodeBase*> B);
- NodeAddr<UseNode*> newUse(NodeAddr<InstrNode*> Owner,
- MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
- NodeAddr<PhiUseNode*> newPhiUse(NodeAddr<PhiNode*> Owner,
- RegisterRef RR, NodeAddr<BlockNode*> PredB,
- uint16_t Flags = NodeAttrs::PhiRef);
- NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
- MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
- NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
- RegisterRef RR, uint16_t Flags = NodeAttrs::PhiRef);
- NodeAddr<PhiNode*> newPhi(NodeAddr<BlockNode*> Owner);
- NodeAddr<StmtNode*> newStmt(NodeAddr<BlockNode*> Owner,
- MachineInstr *MI);
- NodeAddr<BlockNode*> newBlock(NodeAddr<FuncNode*> Owner,
- MachineBasicBlock *BB);
- NodeAddr<FuncNode*> newFunc(MachineFunction *MF);
-
- template <typename Predicate>
- std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>>
- locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
- Predicate P) const;
-
- using BlockRefsMap = std::map<NodeId, RegisterSet>;
-
- void buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In);
- void recordDefsForDF(BlockRefsMap &PhiM, NodeAddr<BlockNode*> BA);
- void buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs,
- NodeAddr<BlockNode*> BA);
- void removeUnusedPhis();
-
- void pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DM);
- void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
- template <typename T> void linkRefUp(NodeAddr<InstrNode*> IA,
- NodeAddr<T> TA, DefStack &DS);
- template <typename Predicate> void linkStmtRefs(DefStackMap &DefM,
- NodeAddr<StmtNode*> SA, Predicate P);
- void linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA);
-
- void unlinkUseDF(NodeAddr<UseNode*> UA);
- void unlinkDefDF(NodeAddr<DefNode*> DA);
-
- void removeFromOwner(NodeAddr<RefNode*> RA) {
- NodeAddr<InstrNode*> IA = RA.Addr->getOwner(*this);
- IA.Addr->removeMember(RA, *this);
- }
+ bool isTracked(RegisterRef RR) const;
+ bool hasUntrackedRef(Stmt S, bool IgnoreReserved = true) const;
- // Default TOI object, if not given in the constructor.
- std::unique_ptr<TargetOperandInfo> DefaultTOI;
-
- MachineFunction &MF;
- const TargetInstrInfo &TII;
- const TargetRegisterInfo &TRI;
- const PhysicalRegisterInfo PRI;
- const MachineDominatorTree &MDT;
- const MachineDominanceFrontier &MDF;
- const TargetOperandInfo &TOI;
-
- RegisterAggr LiveIns;
- NodeAddr<FuncNode*> Func;
- NodeAllocator Memory;
- // Local map: MachineBasicBlock -> NodeAddr<BlockNode*>
- std::map<MachineBasicBlock*,NodeAddr<BlockNode*>> BlockNodes;
- // Lane mask map.
- LaneMaskIndex LMI;
- }; // struct DataFlowGraph
+ // Some useful filters.
+ template <uint16_t Kind> static bool IsRef(const Node BA) {
+ return BA.Addr->getType() == NodeAttrs::Ref && BA.Addr->getKind() == Kind;
+ }
- template <typename Predicate>
- NodeAddr<RefNode*> RefNode::getNextRef(RegisterRef RR, Predicate P,
- bool NextOnly, const DataFlowGraph &G) {
- // Get the "Next" reference in the circular list that references RR and
- // satisfies predicate "Pred".
- auto NA = G.addr<NodeBase*>(getNext());
-
- while (NA.Addr != this) {
- if (NA.Addr->getType() == NodeAttrs::Ref) {
- NodeAddr<RefNode*> RA = NA;
- if (RA.Addr->getRegRef(G) == RR && P(NA))
- return NA;
- if (NextOnly)
- break;
- NA = G.addr<NodeBase*>(NA.Addr->getNext());
- } else {
- // We've hit the beginning of the chain.
- assert(NA.Addr->getType() == NodeAttrs::Code);
- NodeAddr<CodeNode*> CA = NA;
- NA = CA.Addr->getFirstMember(G);
- }
- }
- // Return the equivalent of "nullptr" if such a node was not found.
- return NodeAddr<RefNode*>();
+ template <uint16_t Kind> static bool IsCode(const Node BA) {
+ return BA.Addr->getType() == NodeAttrs::Code && BA.Addr->getKind() == Kind;
+ }
+
+ static bool IsDef(const Node BA) {
+ return BA.Addr->getType() == NodeAttrs::Ref &&
+ BA.Addr->getKind() == NodeAttrs::Def;
+ }
+
+ static bool IsUse(const Node BA) {
+ return BA.Addr->getType() == NodeAttrs::Ref &&
+ BA.Addr->getKind() == NodeAttrs::Use;
+ }
+
+ static bool IsPhi(const Node BA) {
+ return BA.Addr->getType() == NodeAttrs::Code &&
+ BA.Addr->getKind() == NodeAttrs::Phi;
}
+ static bool IsPreservingDef(const Def DA) {
+ uint16_t Flags = DA.Addr->getFlags();
+ return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
+ }
+
+private:
+ void reset();
+
+ RegisterAggr getLandingPadLiveIns() const;
+
+ Node newNode(uint16_t Attrs);
+ Node cloneNode(const Node B);
+ Use newUse(Instr Owner, MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
+ PhiUse newPhiUse(Phi Owner, RegisterRef RR, Block PredB,
+ uint16_t Flags = NodeAttrs::PhiRef);
+ Def newDef(Instr Owner, MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
+ Def newDef(Instr Owner, RegisterRef RR, uint16_t Flags = NodeAttrs::PhiRef);
+ Phi newPhi(Block Owner);
+ Stmt newStmt(Block Owner, MachineInstr *MI);
+ Block newBlock(Func Owner, MachineBasicBlock *BB);
+ Func newFunc(MachineFunction *MF);
+
template <typename Predicate>
- NodeList CodeNode::members_if(Predicate P, const DataFlowGraph &G) const {
- NodeList MM;
- auto M = getFirstMember(G);
- if (M.Id == 0)
- return MM;
-
- while (M.Addr != this) {
- if (P(M))
- MM.push_back(M);
- M = G.addr<NodeBase*>(M.Addr->getNext());
+ std::pair<Ref, Ref> locateNextRef(Instr IA, Ref RA, Predicate P) const;
+
+ using BlockRefsMap = RegisterAggrMap<NodeId>;
+
+ void buildStmt(Block BA, MachineInstr &In);
+ void recordDefsForDF(BlockRefsMap &PhiM, Block BA);
+ void buildPhis(BlockRefsMap &PhiM, Block BA);
+ void removeUnusedPhis();
+
+ void pushClobbers(Instr IA, DefStackMap &DM);
+ void pushDefs(Instr IA, DefStackMap &DM);
+ template <typename T> void linkRefUp(Instr IA, NodeAddr<T> TA, DefStack &DS);
+ template <typename Predicate>
+ void linkStmtRefs(DefStackMap &DefM, Stmt SA, Predicate P);
+ void linkBlockRefs(DefStackMap &DefM, Block BA);
+
+ void unlinkUseDF(Use UA);
+ void unlinkDefDF(Def DA);
+
+ void removeFromOwner(Ref RA) {
+ Instr IA = RA.Addr->getOwner(*this);
+ IA.Addr->removeMember(RA, *this);
+ }
+
+ // Default TOI object, if not given in the constructor.
+ std::unique_ptr<TargetOperandInfo> DefaultTOI;
+
+ MachineFunction &MF;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const PhysicalRegisterInfo PRI;
+ const MachineDominatorTree &MDT;
+ const MachineDominanceFrontier &MDF;
+ const TargetOperandInfo &TOI;
+
+ RegisterAggr LiveIns;
+ Func TheFunc;
+ NodeAllocator Memory;
+ // Local map: MachineBasicBlock -> NodeAddr<BlockNode*>
+ std::map<MachineBasicBlock *, Block> BlockNodes;
+ // Lane mask map.
+ LaneMaskIndex LMI;
+
+ Config BuildCfg;
+ std::set<unsigned> TrackedUnits;
+ BitVector ReservedRegs;
+}; // struct DataFlowGraph
+
+template <typename Predicate>
+Ref RefNode::getNextRef(RegisterRef RR, Predicate P, bool NextOnly,
+ const DataFlowGraph &G) {
+ // Get the "Next" reference in the circular list that references RR and
+ // satisfies predicate "Pred".
+ auto NA = G.addr<NodeBase *>(getNext());
+
+ while (NA.Addr != this) {
+ if (NA.Addr->getType() == NodeAttrs::Ref) {
+ Ref RA = NA;
+ if (G.getPRI().equal_to(RA.Addr->getRegRef(G), RR) && P(NA))
+ return NA;
+ if (NextOnly)
+ break;
+ NA = G.addr<NodeBase *>(NA.Addr->getNext());
+ } else {
+ // We've hit the beginning of the chain.
+ assert(NA.Addr->getType() == NodeAttrs::Code);
+ // Make sure we stop here with NextOnly. Otherwise we can return the
+ // wrong ref. Consider the following while creating/linking shadow uses:
+ // -> code -> sr1 -> sr2 -> [back to code]
+ // Say that shadow refs sr1, and sr2 have been linked, but we need to
+ // create and link another one. Starting from sr2, we'd hit the code
+ // node and return sr1 if the iteration didn't stop here.
+ if (NextOnly)
+ break;
+ Code CA = NA;
+ NA = CA.Addr->getFirstMember(G);
}
+ }
+ // Return the equivalent of "nullptr" if such a node was not found.
+ return Ref();
+}
+
+template <typename Predicate>
+NodeList CodeNode::members_if(Predicate P, const DataFlowGraph &G) const {
+ NodeList MM;
+ auto M = getFirstMember(G);
+ if (M.Id == 0)
return MM;
+
+ while (M.Addr != this) {
+ if (P(M))
+ MM.push_back(M);
+ M = G.addr<NodeBase *>(M.Addr->getNext());
}
+ return MM;
+}
- template <typename T>
- struct Print {
- Print(const T &x, const DataFlowGraph &g) : Obj(x), G(g) {}
+template <typename T> struct Print {
+ Print(const T &x, const DataFlowGraph &g) : Obj(x), G(g) {}
- const T &Obj;
- const DataFlowGraph &G;
- };
+ const T &Obj;
+ const DataFlowGraph &G;
+};
- template <typename T> Print(const T &, const DataFlowGraph &) -> Print<T>;
+template <typename T> Print(const T &, const DataFlowGraph &) -> Print<T>;
- template <typename T>
- struct PrintNode : Print<NodeAddr<T>> {
- PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
+template <typename T> struct PrintNode : Print<NodeAddr<T>> {
+ PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
: Print<NodeAddr<T>>(x, g) {}
- };
-
- raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterRef> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeId> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<DefNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<UseNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<PhiUseNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<RefNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeList> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeSet> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<PhiNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<StmtNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<InstrNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<BlockNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<NodeAddr<FuncNode *>> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterSet> &P);
- raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterAggr> &P);
- raw_ostream &operator<<(raw_ostream &OS,
- const Print<DataFlowGraph::DefStack> &P);
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterRef> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeId> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<Def> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<Use> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<PhiUse> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<Ref> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeList> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeSet> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<Phi> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<Stmt> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<Instr> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<Block> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<Func> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterSet> &P);
+raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterAggr> &P);
+raw_ostream &operator<<(raw_ostream &OS,
+ const Print<DataFlowGraph::DefStack> &P);
} // end namespace rdf
-
} // end namespace llvm
#endif // LLVM_CODEGEN_RDFGRAPH_H
diff --git a/llvm/include/llvm/CodeGen/RDFLiveness.h b/llvm/include/llvm/CodeGen/RDFLiveness.h
index 45cd84a13007..fe1034f9b6f8 100644
--- a/llvm/include/llvm/CodeGen/RDFLiveness.h
+++ b/llvm/include/llvm/CodeGen/RDFLiveness.h
@@ -30,9 +30,6 @@ class MachineDominatorTree;
class MachineRegisterInfo;
class TargetRegisterInfo;
-} // namespace llvm
-
-namespace llvm {
namespace rdf {
namespace detail {
@@ -53,123 +50,111 @@ template <> struct hash<llvm::rdf::detail::NodeRef> {
} // namespace std
-namespace llvm {
-namespace rdf {
+namespace llvm::rdf {
+
+struct Liveness {
+public:
+ using LiveMapType = RegisterAggrMap<MachineBasicBlock *>;
+ using NodeRef = detail::NodeRef;
+ using NodeRefSet = std::unordered_set<NodeRef>;
+ using RefMap = std::unordered_map<RegisterId, NodeRefSet>;
+
+ Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g)
+ : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()),
+ MDF(g.getDF()), LiveMap(g.getPRI()), Empty(), NoRegs(g.getPRI()) {}
+
+ NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode *> RefA,
+ bool TopShadows, bool FullChain,
+ const RegisterAggr &DefRRs);
+
+ NodeList getAllReachingDefs(NodeAddr<RefNode *> RefA) {
+ return getAllReachingDefs(RefA.Addr->getRegRef(DFG), RefA, false, false,
+ NoRegs);
+ }
+
+ NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode *> RefA) {
+ return getAllReachingDefs(RefRR, RefA, false, false, NoRegs);
+ }
+
+ NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode *> DefA,
+ const RegisterAggr &DefRRs);
+
+ NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode *> DefA) {
+ return getAllReachedUses(RefRR, DefA, NoRegs);
+ }
+
+ std::pair<NodeSet, bool> getAllReachingDefsRec(RegisterRef RefRR,
+ NodeAddr<RefNode *> RefA,
+ NodeSet &Visited,
+ const NodeSet &Defs);
+
+ NodeAddr<RefNode *> getNearestAliasedRef(RegisterRef RefRR,
+ NodeAddr<InstrNode *> IA);
+
+ LiveMapType &getLiveMap() { return LiveMap; }
+ const LiveMapType &getLiveMap() const { return LiveMap; }
+
+ const RefMap &getRealUses(NodeId P) const {
+ auto F = RealUseMap.find(P);
+ return F == RealUseMap.end() ? Empty : F->second;
+ }
+
+ void computePhiInfo();
+ void computeLiveIns();
+ void resetLiveIns();
+ void resetKills();
+ void resetKills(MachineBasicBlock *B);
+
+ void trace(bool T) { Trace = T; }
+
+private:
+ const DataFlowGraph &DFG;
+ const TargetRegisterInfo &TRI;
+ const PhysicalRegisterInfo &PRI;
+ const MachineDominatorTree &MDT;
+ const MachineDominanceFrontier &MDF;
+ LiveMapType LiveMap;
+ const RefMap Empty;
+ const RegisterAggr NoRegs;
+ bool Trace = false;
+
+ // Cache of mapping from node ids (for RefNodes) to the containing
+ // basic blocks. Not computing it each time for each node reduces
+ // the liveness calculation time by a large fraction.
+ DenseMap<NodeId, MachineBasicBlock *> NBMap;
+
+ // Phi information:
+ //
+ // RealUseMap
+ // map: NodeId -> (map: RegisterId -> NodeRefSet)
+ // phi id -> (map: register -> set of reached non-phi uses)
+ DenseMap<NodeId, RefMap> RealUseMap;
+
+ // Inverse iterated dominance frontier.
+ std::map<MachineBasicBlock *, std::set<MachineBasicBlock *>> IIDF;
+
+ // Live on entry.
+ std::map<MachineBasicBlock *, RefMap> PhiLON;
+
+ // Phi uses are considered to be located at the end of the block that
+ // they are associated with. The reaching def of a phi use dominates the
+ // block that the use corresponds to, but not the block that contains
+ // the phi itself. To include these uses in the liveness propagation (up
+ // the dominator tree), create a map: block -> set of uses live on exit.
+ std::map<MachineBasicBlock *, RefMap> PhiLOX;
+
+ MachineBasicBlock *getBlockWithRef(NodeId RN) const;
+ void traverse(MachineBasicBlock *B, RefMap &LiveIn);
+ void emptify(RefMap &M);
+
+ std::pair<NodeSet, bool>
+ getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr<RefNode *> RefA,
+ NodeSet &Visited, const NodeSet &Defs,
+ unsigned Nest, unsigned MaxNest);
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<Liveness::RefMap> &P);
- struct Liveness {
- public:
- // This is really a std::map, except that it provides a non-trivial
- // default constructor to the element accessed via [].
- struct LiveMapType {
- LiveMapType(const PhysicalRegisterInfo &pri) : Empty(pri) {}
-
- RegisterAggr &operator[] (MachineBasicBlock *B) {
- return Map.emplace(B, Empty).first->second;
- }
-
- private:
- RegisterAggr Empty;
- std::map<MachineBasicBlock*,RegisterAggr> Map;
- };
-
- using NodeRef = detail::NodeRef;
- using NodeRefSet = std::unordered_set<NodeRef>;
- using RefMap = std::unordered_map<RegisterId, NodeRefSet>;
-
- Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g)
- : DFG(g), TRI(g.getTRI()), PRI(g.getPRI()), MDT(g.getDT()),
- MDF(g.getDF()), LiveMap(g.getPRI()), Empty(), NoRegs(g.getPRI()) {}
-
- NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
- bool TopShadows, bool FullChain, const RegisterAggr &DefRRs);
-
- NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA) {
- return getAllReachingDefs(RefA.Addr->getRegRef(DFG), RefA, false,
- false, NoRegs);
- }
-
- NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA) {
- return getAllReachingDefs(RefRR, RefA, false, false, NoRegs);
- }
-
- NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA,
- const RegisterAggr &DefRRs);
-
- NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA) {
- return getAllReachedUses(RefRR, DefA, NoRegs);
- }
-
- std::pair<NodeSet,bool> getAllReachingDefsRec(RegisterRef RefRR,
- NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs);
-
- NodeAddr<RefNode*> getNearestAliasedRef(RegisterRef RefRR,
- NodeAddr<InstrNode*> IA);
-
- LiveMapType &getLiveMap() { return LiveMap; }
- const LiveMapType &getLiveMap() const { return LiveMap; }
-
- const RefMap &getRealUses(NodeId P) const {
- auto F = RealUseMap.find(P);
- return F == RealUseMap.end() ? Empty : F->second;
- }
-
- void computePhiInfo();
- void computeLiveIns();
- void resetLiveIns();
- void resetKills();
- void resetKills(MachineBasicBlock *B);
-
- void trace(bool T) { Trace = T; }
-
- private:
- const DataFlowGraph &DFG;
- const TargetRegisterInfo &TRI;
- const PhysicalRegisterInfo &PRI;
- const MachineDominatorTree &MDT;
- const MachineDominanceFrontier &MDF;
- LiveMapType LiveMap;
- const RefMap Empty;
- const RegisterAggr NoRegs;
- bool Trace = false;
-
- // Cache of mapping from node ids (for RefNodes) to the containing
- // basic blocks. Not computing it each time for each node reduces
- // the liveness calculation time by a large fraction.
- DenseMap<NodeId, MachineBasicBlock *> NBMap;
-
- // Phi information:
- //
- // RealUseMap
- // map: NodeId -> (map: RegisterId -> NodeRefSet)
- // phi id -> (map: register -> set of reached non-phi uses)
- DenseMap<NodeId, RefMap> RealUseMap;
-
- // Inverse iterated dominance frontier.
- std::map<MachineBasicBlock*,std::set<MachineBasicBlock*>> IIDF;
-
- // Live on entry.
- std::map<MachineBasicBlock*,RefMap> PhiLON;
-
- // Phi uses are considered to be located at the end of the block that
- // they are associated with. The reaching def of a phi use dominates the
- // block that the use corresponds to, but not the block that contains
- // the phi itself. To include these uses in the liveness propagation (up
- // the dominator tree), create a map: block -> set of uses live on exit.
- std::map<MachineBasicBlock*,RefMap> PhiLOX;
-
- MachineBasicBlock *getBlockWithRef(NodeId RN) const;
- void traverse(MachineBasicBlock *B, RefMap &LiveIn);
- void emptify(RefMap &M);
-
- std::pair<NodeSet,bool> getAllReachingDefsRecImpl(RegisterRef RefRR,
- NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs,
- unsigned Nest, unsigned MaxNest);
- };
-
- raw_ostream &operator<<(raw_ostream &OS, const Print<Liveness::RefMap> &P);
-
-} // end namespace rdf
-
-} // end namespace llvm
+} // end namespace llvm::rdf
#endif // LLVM_CODEGEN_RDFLIVENESS_H
diff --git a/llvm/include/llvm/CodeGen/RDFRegisters.h b/llvm/include/llvm/CodeGen/RDFRegisters.h
index b18cbba2275c..7eed0b4e1e7b 100644
--- a/llvm/include/llvm/CodeGen/RDFRegisters.h
+++ b/llvm/include/llvm/CodeGen/RDFRegisters.h
@@ -11,8 +11,10 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegister.h"
#include <cassert>
#include <cstdint>
#include <map>
@@ -25,255 +27,351 @@ class MachineFunction;
class raw_ostream;
namespace rdf {
+struct RegisterAggr;
+
+using RegisterId = uint32_t;
+
+template <typename T>
+bool disjoint(const std::set<T> &A, const std::set<T> &B) {
+ auto ItA = A.begin(), EndA = A.end();
+ auto ItB = B.begin(), EndB = B.end();
+ while (ItA != EndA && ItB != EndB) {
+ if (*ItA < *ItB)
+ ++ItA;
+ else if (*ItB < *ItA)
+ ++ItB;
+ else
+ return false;
+ }
+ return true;
+}
- using RegisterId = uint32_t;
-
- // Template class for a map translating uint32_t into arbitrary types.
- // The map will act like an indexed set: upon insertion of a new object,
- // it will automatically assign a new index to it. Index of 0 is treated
- // as invalid and is never allocated.
- template <typename T, unsigned N = 32>
- struct IndexedSet {
- IndexedSet() { Map.reserve(N); }
-
- T get(uint32_t Idx) const {
- // Index Idx corresponds to Map[Idx-1].
- assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
- return Map[Idx-1];
- }
-
- uint32_t insert(T Val) {
- // Linear search.
- auto F = llvm::find(Map, Val);
- if (F != Map.end())
- return F - Map.begin() + 1;
- Map.push_back(Val);
- return Map.size(); // Return actual_index + 1.
- }
-
- uint32_t find(T Val) const {
- auto F = llvm::find(Map, Val);
- assert(F != Map.end());
+// Template class for a map translating uint32_t into arbitrary types.
+// The map will act like an indexed set: upon insertion of a new object,
+// it will automatically assign a new index to it. Index of 0 is treated
+// as invalid and is never allocated.
+template <typename T, unsigned N = 32> struct IndexedSet {
+ IndexedSet() { Map.reserve(N); }
+
+ T get(uint32_t Idx) const {
+ // Index Idx corresponds to Map[Idx-1].
+ assert(Idx != 0 && !Map.empty() && Idx - 1 < Map.size());
+ return Map[Idx - 1];
+ }
+
+ uint32_t insert(T Val) {
+ // Linear search.
+ auto F = llvm::find(Map, Val);
+ if (F != Map.end())
return F - Map.begin() + 1;
- }
+ Map.push_back(Val);
+ return Map.size(); // Return actual_index + 1.
+ }
+
+ uint32_t find(T Val) const {
+ auto F = llvm::find(Map, Val);
+ assert(F != Map.end());
+ return F - Map.begin() + 1;
+ }
+
+ uint32_t size() const { return Map.size(); }
+
+ using const_iterator = typename std::vector<T>::const_iterator;
- uint32_t size() const { return Map.size(); }
+ const_iterator begin() const { return Map.begin(); }
+ const_iterator end() const { return Map.end(); }
+
+private:
+ std::vector<T> Map;
+};
+
+struct RegisterRef {
+ RegisterId Reg = 0;
+ LaneBitmask Mask = LaneBitmask::getNone(); // Only for registers.
+
+ constexpr RegisterRef() = default;
+ constexpr explicit RegisterRef(RegisterId R,
+ LaneBitmask M = LaneBitmask::getAll())
+ : Reg(R), Mask(isRegId(R) && R != 0 ? M : LaneBitmask::getNone()) {}
+
+ // Classify null register as a "register".
+ constexpr bool isReg() const { return Reg == 0 || isRegId(Reg); }
+ constexpr bool isUnit() const { return isUnitId(Reg); }
+ constexpr bool isMask() const { return isMaskId(Reg); }
+
+ constexpr unsigned idx() const { return toIdx(Reg); }
+
+ constexpr operator bool() const {
+ return !isReg() || (Reg != 0 && Mask.any());
+ }
+
+ size_t hash() const {
+ return std::hash<RegisterId>{}(Reg) ^
+ std::hash<LaneBitmask::Type>{}(Mask.getAsInteger());
+ }
+
+ static constexpr bool isRegId(unsigned Id) {
+ return Register::isPhysicalRegister(Id);
+ }
+ static constexpr bool isUnitId(unsigned Id) {
+ return Register::isVirtualRegister(Id);
+ }
+ static constexpr bool isMaskId(unsigned Id) {
+ return Register::isStackSlot(Id);
+ }
+
+ static constexpr RegisterId toUnitId(unsigned Idx) {
+ return Idx | MCRegister::VirtualRegFlag;
+ }
+
+ static constexpr unsigned toIdx(RegisterId Id) {
+ // Not using virtReg2Index or stackSlot2Index, because they are
+ // not constexpr.
+ if (isUnitId(Id))
+ return Id & ~MCRegister::VirtualRegFlag;
+ // RegId and MaskId are unchanged.
+ return Id;
+ }
+
+ bool operator<(RegisterRef) const = delete;
+ bool operator==(RegisterRef) const = delete;
+ bool operator!=(RegisterRef) const = delete;
+};
+
+struct PhysicalRegisterInfo {
+ PhysicalRegisterInfo(const TargetRegisterInfo &tri,
+ const MachineFunction &mf);
+
+ RegisterId getRegMaskId(const uint32_t *RM) const {
+ return Register::index2StackSlot(RegMasks.find(RM));
+ }
+
+ const uint32_t *getRegMaskBits(RegisterId R) const {
+ return RegMasks.get(Register::stackSlot2Index(R));
+ }
+
+ bool alias(RegisterRef RA, RegisterRef RB) const;
+
+ // Returns the set of aliased physical registers.
+ std::set<RegisterId> getAliasSet(RegisterId Reg) const;
+
+ RegisterRef getRefForUnit(uint32_t U) const {
+ return RegisterRef(UnitInfos[U].Reg, UnitInfos[U].Mask);
+ }
+
+ const BitVector &getMaskUnits(RegisterId MaskId) const {
+ return MaskInfos[Register::stackSlot2Index(MaskId)].Units;
+ }
+
+ std::set<RegisterId> getUnits(RegisterRef RR) const;
+
+ const BitVector &getUnitAliases(uint32_t U) const {
+ return AliasInfos[U].Regs;
+ }
+
+ RegisterRef mapTo(RegisterRef RR, unsigned R) const;
+ const TargetRegisterInfo &getTRI() const { return TRI; }
- using const_iterator = typename std::vector<T>::const_iterator;
+ bool equal_to(RegisterRef A, RegisterRef B) const;
+ bool less(RegisterRef A, RegisterRef B) const;
- const_iterator begin() const { return Map.begin(); }
- const_iterator end() const { return Map.end(); }
-
- private:
- std::vector<T> Map;
+ void print(raw_ostream &OS, RegisterRef A) const;
+ void print(raw_ostream &OS, const RegisterAggr &A) const;
+
+private:
+ struct RegInfo {
+ const TargetRegisterClass *RegClass = nullptr;
};
-
- struct RegisterRef {
+ struct UnitInfo {
RegisterId Reg = 0;
- LaneBitmask Mask = LaneBitmask::getNone();
-
- RegisterRef() = default;
- explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
- : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
-
- operator bool() const {
- return Reg != 0 && Mask.any();
- }
-
- bool operator== (const RegisterRef &RR) const {
- return Reg == RR.Reg && Mask == RR.Mask;
- }
-
- bool operator!= (const RegisterRef &RR) const {
- return !operator==(RR);
- }
-
- bool operator< (const RegisterRef &RR) const {
- return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask);
- }
-
- size_t hash() const {
- return std::hash<RegisterId>{}(Reg) ^
- std::hash<LaneBitmask::Type>{}(Mask.getAsInteger());
- }
+ LaneBitmask Mask;
+ };
+ struct MaskInfo {
+ BitVector Units;
+ };
+ struct AliasInfo {
+ BitVector Regs;
};
+ const TargetRegisterInfo &TRI;
+ IndexedSet<const uint32_t *> RegMasks;
+ std::vector<RegInfo> RegInfos;
+ std::vector<UnitInfo> UnitInfos;
+ std::vector<MaskInfo> MaskInfos;
+ std::vector<AliasInfo> AliasInfos;
+};
- struct PhysicalRegisterInfo {
- PhysicalRegisterInfo(const TargetRegisterInfo &tri,
- const MachineFunction &mf);
+struct RegisterAggr {
+ RegisterAggr(const PhysicalRegisterInfo &pri)
+ : Units(pri.getTRI().getNumRegUnits()), PRI(pri) {}
+ RegisterAggr(const RegisterAggr &RG) = default;
- static bool isRegMaskId(RegisterId R) {
- return Register::isStackSlot(R);
- }
+ unsigned size() const { return Units.count(); }
+ bool empty() const { return Units.none(); }
+ bool hasAliasOf(RegisterRef RR) const;
+ bool hasCoverOf(RegisterRef RR) const;
- RegisterId getRegMaskId(const uint32_t *RM) const {
- return Register::index2StackSlot(RegMasks.find(RM));
- }
+ const PhysicalRegisterInfo &getPRI() const { return PRI; }
- const uint32_t *getRegMaskBits(RegisterId R) const {
- return RegMasks.get(Register::stackSlot2Index(R));
- }
+ bool operator==(const RegisterAggr &A) const {
+ return DenseMapInfo<BitVector>::isEqual(Units, A.Units);
+ }
- bool alias(RegisterRef RA, RegisterRef RB) const {
- if (!isRegMaskId(RA.Reg))
- return !isRegMaskId(RB.Reg) ? aliasRR(RA, RB) : aliasRM(RA, RB);
- return !isRegMaskId(RB.Reg) ? aliasRM(RB, RA) : aliasMM(RA, RB);
- }
+ static bool isCoverOf(RegisterRef RA, RegisterRef RB,
+ const PhysicalRegisterInfo &PRI) {
+ return RegisterAggr(PRI).insert(RA).hasCoverOf(RB);
+ }
- std::set<RegisterId> getAliasSet(RegisterId Reg) const;
+ RegisterAggr &insert(RegisterRef RR);
+ RegisterAggr &insert(const RegisterAggr &RG);
+ RegisterAggr &intersect(RegisterRef RR);
+ RegisterAggr &intersect(const RegisterAggr &RG);
+ RegisterAggr &clear(RegisterRef RR);
+ RegisterAggr &clear(const RegisterAggr &RG);
- RegisterRef getRefForUnit(uint32_t U) const {
- return RegisterRef(UnitInfos[U].Reg, UnitInfos[U].Mask);
- }
-
- const BitVector &getMaskUnits(RegisterId MaskId) const {
- return MaskInfos[Register::stackSlot2Index(MaskId)].Units;
- }
+ RegisterRef intersectWith(RegisterRef RR) const;
+ RegisterRef clearIn(RegisterRef RR) const;
+ RegisterRef makeRegRef() const;
- const BitVector &getUnitAliases(uint32_t U) const {
- return AliasInfos[U].Regs;
- }
+ size_t hash() const { return DenseMapInfo<BitVector>::getHashValue(Units); }
- RegisterRef mapTo(RegisterRef RR, unsigned R) const;
- const TargetRegisterInfo &getTRI() const { return TRI; }
+ struct ref_iterator {
+ using MapType = std::map<RegisterId, LaneBitmask>;
private:
- struct RegInfo {
- const TargetRegisterClass *RegClass = nullptr;
- };
- struct UnitInfo {
- RegisterId Reg = 0;
- LaneBitmask Mask;
- };
- struct MaskInfo {
- BitVector Units;
- };
- struct AliasInfo {
- BitVector Regs;
- };
-
- const TargetRegisterInfo &TRI;
- IndexedSet<const uint32_t*> RegMasks;
- std::vector<RegInfo> RegInfos;
- std::vector<UnitInfo> UnitInfos;
- std::vector<MaskInfo> MaskInfos;
- std::vector<AliasInfo> AliasInfos;
-
- bool aliasRR(RegisterRef RA, RegisterRef RB) const;
- bool aliasRM(RegisterRef RR, RegisterRef RM) const;
- bool aliasMM(RegisterRef RM, RegisterRef RN) const;
- };
+ MapType Masks;
+ MapType::iterator Pos;
+ unsigned Index;
+ const RegisterAggr *Owner;
- struct RegisterAggr {
- RegisterAggr(const PhysicalRegisterInfo &pri)
- : Units(pri.getTRI().getNumRegUnits()), PRI(pri) {}
- RegisterAggr(const RegisterAggr &RG) = default;
+ public:
+ ref_iterator(const RegisterAggr &RG, bool End);
- unsigned count() const { return Units.count(); }
- bool empty() const { return Units.none(); }
- bool hasAliasOf(RegisterRef RR) const;
- bool hasCoverOf(RegisterRef RR) const;
+ RegisterRef operator*() const {
+ return RegisterRef(Pos->first, Pos->second);
+ }
- bool operator==(const RegisterAggr &A) const {
- return DenseMapInfo<BitVector>::isEqual(Units, A.Units);
+ ref_iterator &operator++() {
+ ++Pos;
+ ++Index;
+ return *this;
}
- static bool isCoverOf(RegisterRef RA, RegisterRef RB,
- const PhysicalRegisterInfo &PRI) {
- return RegisterAggr(PRI).insert(RA).hasCoverOf(RB);
+ bool operator==(const ref_iterator &I) const {
+ assert(Owner == I.Owner);
+ (void)Owner;
+ return Index == I.Index;
}
- RegisterAggr &insert(RegisterRef RR);
- RegisterAggr &insert(const RegisterAggr &RG);
- RegisterAggr &intersect(RegisterRef RR);
- RegisterAggr &intersect(const RegisterAggr &RG);
- RegisterAggr &clear(RegisterRef RR);
- RegisterAggr &clear(const RegisterAggr &RG);
+ bool operator!=(const ref_iterator &I) const { return !(*this == I); }
+ };
- RegisterRef intersectWith(RegisterRef RR) const;
- RegisterRef clearIn(RegisterRef RR) const;
- RegisterRef makeRegRef() const;
+ ref_iterator ref_begin() const { return ref_iterator(*this, false); }
+ ref_iterator ref_end() const { return ref_iterator(*this, true); }
+
+ using unit_iterator = typename BitVector::const_set_bits_iterator;
+ unit_iterator unit_begin() const { return Units.set_bits_begin(); }
+ unit_iterator unit_end() const { return Units.set_bits_end(); }
+
+ iterator_range<ref_iterator> refs() const {
+ return make_range(ref_begin(), ref_end());
+ }
+ iterator_range<unit_iterator> units() const {
+ return make_range(unit_begin(), unit_end());
+ }
+
+private:
+ BitVector Units;
+ const PhysicalRegisterInfo &PRI;
+};
+
+// This is really a std::map, except that it provides a non-trivial
+// default constructor to the element accessed via [].
+template <typename KeyType> struct RegisterAggrMap {
+ RegisterAggrMap(const PhysicalRegisterInfo &pri) : Empty(pri) {}
+
+ RegisterAggr &operator[](KeyType Key) {
+ return Map.emplace(Key, Empty).first->second;
+ }
+
+ auto begin() { return Map.begin(); }
+ auto end() { return Map.end(); }
+ auto begin() const { return Map.begin(); }
+ auto end() const { return Map.end(); }
+ auto find(const KeyType &Key) const { return Map.find(Key); }
+
+private:
+ RegisterAggr Empty;
+ std::map<KeyType, RegisterAggr> Map;
+
+public:
+ using key_type = typename decltype(Map)::key_type;
+ using mapped_type = typename decltype(Map)::mapped_type;
+ using value_type = typename decltype(Map)::value_type;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const RegisterAggr &A);
+
+// Print the lane mask in a short form (or not at all if all bits are set).
+struct PrintLaneMaskShort {
+ PrintLaneMaskShort(LaneBitmask M) : Mask(M) {}
+ LaneBitmask Mask;
+};
+raw_ostream &operator<<(raw_ostream &OS, const PrintLaneMaskShort &P);
- size_t hash() const {
- return DenseMapInfo<BitVector>::getHashValue(Units);
- }
+} // end namespace rdf
+} // end namespace llvm
- void print(raw_ostream &OS) const;
+namespace std {
- struct rr_iterator {
- using MapType = std::map<RegisterId, LaneBitmask>;
+template <> struct hash<llvm::rdf::RegisterRef> {
+ size_t operator()(llvm::rdf::RegisterRef A) const { //
+ return A.hash();
+ }
+};
- private:
- MapType Masks;
- MapType::iterator Pos;
- unsigned Index;
- const RegisterAggr *Owner;
+template <> struct hash<llvm::rdf::RegisterAggr> {
+ size_t operator()(const llvm::rdf::RegisterAggr &A) const { //
+ return A.hash();
+ }
+};
- public:
- rr_iterator(const RegisterAggr &RG, bool End);
+template <> struct equal_to<llvm::rdf::RegisterRef> {
+ constexpr equal_to(const llvm::rdf::PhysicalRegisterInfo &pri) : PRI(&pri) {}
- RegisterRef operator*() const {
- return RegisterRef(Pos->first, Pos->second);
- }
+ bool operator()(llvm::rdf::RegisterRef A, llvm::rdf::RegisterRef B) const {
+ return PRI->equal_to(A, B);
+ }
- rr_iterator &operator++() {
- ++Pos;
- ++Index;
- return *this;
- }
+private:
+ // Make it a pointer just in case. See comment in `less` below.
+ const llvm::rdf::PhysicalRegisterInfo *PRI;
+};
- bool operator==(const rr_iterator &I) const {
- assert(Owner == I.Owner);
- (void)Owner;
- return Index == I.Index;
- }
+template <> struct equal_to<llvm::rdf::RegisterAggr> {
+ bool operator()(const llvm::rdf::RegisterAggr &A,
+ const llvm::rdf::RegisterAggr &B) const {
+ return A == B;
+ }
+};
- bool operator!=(const rr_iterator &I) const {
- return !(*this == I);
- }
- };
+template <> struct less<llvm::rdf::RegisterRef> {
+ constexpr less(const llvm::rdf::PhysicalRegisterInfo &pri) : PRI(&pri) {}
- rr_iterator rr_begin() const {
- return rr_iterator(*this, false);
- }
- rr_iterator rr_end() const {
- return rr_iterator(*this, true);
- }
+ bool operator()(llvm::rdf::RegisterRef A, llvm::rdf::RegisterRef B) const {
+ return PRI->less(A, B);
+ }
- private:
- BitVector Units;
- const PhysicalRegisterInfo &PRI;
- };
+private:
+ // Make it a pointer because apparently some versions of MSVC use std::swap
+ // on the std::less specialization.
+ const llvm::rdf::PhysicalRegisterInfo *PRI;
+};
- // Optionally print the lane mask, if it is not ~0.
- struct PrintLaneMaskOpt {
- PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
- LaneBitmask Mask;
- };
- raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P);
+} // namespace std
- raw_ostream &operator<< (raw_ostream &OS, const RegisterAggr &A);
-} // end namespace rdf
+namespace llvm::rdf {
+using RegisterSet = std::set<RegisterRef, std::less<RegisterRef>>;
+} // namespace llvm::rdf
-} // end namespace llvm
-
-namespace std {
- template <> struct hash<llvm::rdf::RegisterRef> {
- size_t operator()(llvm::rdf::RegisterRef A) const {
- return A.hash();
- }
- };
- template <> struct hash<llvm::rdf::RegisterAggr> {
- size_t operator()(const llvm::rdf::RegisterAggr &A) const {
- return A.hash();
- }
- };
- template <> struct equal_to<llvm::rdf::RegisterAggr> {
- bool operator()(const llvm::rdf::RegisterAggr &A,
- const llvm::rdf::RegisterAggr &B) const {
- return A == B;
- }
- };
-}
#endif // LLVM_CODEGEN_RDFREGISTERS_H
diff --git a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
index 5144548a2792..ec652f448f0f 100644
--- a/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
+++ b/llvm/include/llvm/CodeGen/ReachingDefAnalysis.h
@@ -68,10 +68,10 @@ struct PointerLikeTypeTraits<ReachingDef> {
/// This class provides the reaching def analysis.
class ReachingDefAnalysis : public MachineFunctionPass {
private:
- MachineFunction *MF;
- const TargetRegisterInfo *TRI;
+ MachineFunction *MF = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
LoopTraversal::TraversalOrder TraversedMBBOrder;
- unsigned NumRegUnits;
+ unsigned NumRegUnits = 0;
/// Instruction that defined each register, relative to the beginning of the
/// current basic block. When a LiveRegsDefInfo is used to represent a
/// live-out register, this value is relative to the end of the basic block,
@@ -87,7 +87,7 @@ private:
/// Current instruction number.
/// The first instruction in each basic block is 0.
- int CurInstr;
+ int CurInstr = -1;
/// Maps instructions to their instruction Ids, relative to the beginning of
/// their basic blocks.
@@ -102,7 +102,7 @@ private:
MBBReachingDefsInfo MBBReachingDefs;
/// Default values are 'nothing happened a long time ago'.
- const int ReachingDefDefaultVal = -(1 << 20);
+ const int ReachingDefDefaultVal = -(1 << 21);
using InstSet = SmallPtrSetImpl<MachineInstr*>;
using BlockSet = SmallPtrSetImpl<MachineBasicBlock*>;
diff --git a/llvm/include/llvm/CodeGen/RegAllocRegistry.h b/llvm/include/llvm/CodeGen/RegAllocRegistry.h
index e33d16c63627..cd81e084a859 100644
--- a/llvm/include/llvm/CodeGen/RegAllocRegistry.h
+++ b/llvm/include/llvm/CodeGen/RegAllocRegistry.h
@@ -66,7 +66,7 @@ public:
/// RegisterRegAlloc's global Registry tracks allocator registration.
template <class T>
-MachinePassRegistry<RegisterRegAlloc::FunctionPassCtor>
+MachinePassRegistry<typename RegisterRegAllocBase<T>::FunctionPassCtor>
RegisterRegAllocBase<T>::Registry;
} // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/Register.h b/llvm/include/llvm/CodeGen/Register.h
index 2f2d58f5185b..e1456f81d467 100644
--- a/llvm/include/llvm/CodeGen/Register.h
+++ b/llvm/include/llvm/CodeGen/Register.h
@@ -20,8 +20,8 @@ class Register {
unsigned Reg;
public:
- constexpr Register(unsigned Val = 0): Reg(Val) {}
- constexpr Register(MCRegister Val): Reg(Val) {}
+ constexpr Register(unsigned Val = 0) : Reg(Val) {}
+ constexpr Register(MCRegister Val) : Reg(Val) {}
// Register numbers can represent physical registers, virtual registers, and
// sometimes stack slots. The unsigned values are divided into these ranges:
@@ -41,12 +41,12 @@ public:
/// returns true if Reg is in the range used for stack slots.
///
/// FIXME: remove in favor of member.
- static bool isStackSlot(unsigned Reg) {
+ static constexpr bool isStackSlot(unsigned Reg) {
return MCRegister::isStackSlot(Reg);
}
/// Return true if this is a stack slot.
- bool isStack() const { return MCRegister::isStackSlot(Reg); }
+ constexpr bool isStack() const { return MCRegister::isStackSlot(Reg); }
/// Compute the frame index from a register value representing a stack slot.
static int stackSlot2Index(Register Reg) {
@@ -62,13 +62,13 @@ public:
/// Return true if the specified register number is in
/// the physical register namespace.
- static bool isPhysicalRegister(unsigned Reg) {
+ static constexpr bool isPhysicalRegister(unsigned Reg) {
return MCRegister::isPhysicalRegister(Reg);
}
/// Return true if the specified register number is in
/// the virtual register namespace.
- static bool isVirtualRegister(unsigned Reg) {
+ static constexpr bool isVirtualRegister(unsigned Reg) {
return Reg & MCRegister::VirtualRegFlag;
}
@@ -88,31 +88,21 @@ public:
/// Return true if the specified register number is in the virtual register
/// namespace.
- bool isVirtual() const {
- return isVirtualRegister(Reg);
- }
+ constexpr bool isVirtual() const { return isVirtualRegister(Reg); }
/// Return true if the specified register number is in the physical register
/// namespace.
- bool isPhysical() const {
- return isPhysicalRegister(Reg);
- }
+ constexpr bool isPhysical() const { return isPhysicalRegister(Reg); }
/// Convert a virtual register number to a 0-based index. The first virtual
/// register in a function will get the index 0.
- unsigned virtRegIndex() const {
- return virtReg2Index(Reg);
- }
+ unsigned virtRegIndex() const { return virtReg2Index(Reg); }
- constexpr operator unsigned() const {
- return Reg;
- }
+ constexpr operator unsigned() const { return Reg; }
- unsigned id() const { return Reg; }
+ constexpr unsigned id() const { return Reg; }
- operator MCRegister() const {
- return MCRegister(Reg);
- }
+ constexpr operator MCRegister() const { return MCRegister(Reg); }
/// Utility to check-convert this value to a MCRegister. The caller is
/// expected to have already validated that this Register is, indeed,
@@ -123,29 +113,41 @@ public:
return MCRegister(Reg);
}
- bool isValid() const { return Reg != MCRegister::NoRegister; }
+ constexpr bool isValid() const { return Reg != MCRegister::NoRegister; }
/// Comparisons between register objects
- bool operator==(const Register &Other) const { return Reg == Other.Reg; }
- bool operator!=(const Register &Other) const { return Reg != Other.Reg; }
- bool operator==(const MCRegister &Other) const { return Reg == Other.id(); }
- bool operator!=(const MCRegister &Other) const { return Reg != Other.id(); }
+ constexpr bool operator==(const Register &Other) const {
+ return Reg == Other.Reg;
+ }
+ constexpr bool operator!=(const Register &Other) const {
+ return Reg != Other.Reg;
+ }
+ constexpr bool operator==(const MCRegister &Other) const {
+ return Reg == Other.id();
+ }
+ constexpr bool operator!=(const MCRegister &Other) const {
+ return Reg != Other.id();
+ }
/// Comparisons against register constants. E.g.
/// * R == AArch64::WZR
/// * R == 0
/// * R == VirtRegMap::NO_PHYS_REG
- bool operator==(unsigned Other) const { return Reg == Other; }
- bool operator!=(unsigned Other) const { return Reg != Other; }
- bool operator==(int Other) const { return Reg == unsigned(Other); }
- bool operator!=(int Other) const { return Reg != unsigned(Other); }
+ constexpr bool operator==(unsigned Other) const { return Reg == Other; }
+ constexpr bool operator!=(unsigned Other) const { return Reg != Other; }
+ constexpr bool operator==(int Other) const { return Reg == unsigned(Other); }
+ constexpr bool operator!=(int Other) const { return Reg != unsigned(Other); }
// MSVC requires that we explicitly declare these two as well.
- bool operator==(MCPhysReg Other) const { return Reg == unsigned(Other); }
- bool operator!=(MCPhysReg Other) const { return Reg != unsigned(Other); }
+ constexpr bool operator==(MCPhysReg Other) const {
+ return Reg == unsigned(Other);
+ }
+ constexpr bool operator!=(MCPhysReg Other) const {
+ return Reg != unsigned(Other);
+ }
};
// Provide DenseMapInfo for Register
-template<> struct DenseMapInfo<Register> {
+template <> struct DenseMapInfo<Register> {
static inline unsigned getEmptyKey() {
return DenseMapInfo<unsigned>::getEmptyKey();
}
@@ -160,6 +162,6 @@ template<> struct DenseMapInfo<Register> {
}
};
-}
+} // namespace llvm
#endif // LLVM_CODEGEN_REGISTER_H
diff --git a/llvm/include/llvm/CodeGen/RegisterBank.h b/llvm/include/llvm/CodeGen/RegisterBank.h
index 66885f113e8e..ee295c7cdde0 100644
--- a/llvm/include/llvm/CodeGen/RegisterBank.h
+++ b/llvm/include/llvm/CodeGen/RegisterBank.h
@@ -29,7 +29,6 @@ class RegisterBank {
private:
unsigned ID;
const char *Name;
- unsigned Size;
BitVector ContainedRegClasses;
/// Sentinel value used to recognize register bank not properly
@@ -40,8 +39,8 @@ private:
friend RegisterBankInfo;
public:
- RegisterBank(unsigned ID, const char *Name, unsigned Size,
- const uint32_t *CoveredClasses, unsigned NumRegClasses);
+ RegisterBank(unsigned ID, const char *Name, const uint32_t *CoveredClasses,
+ unsigned NumRegClasses);
/// Get the identifier of this register bank.
unsigned getID() const { return ID; }
@@ -50,9 +49,6 @@ public:
/// Should be used only for debugging purposes.
const char *getName() const { return Name; }
- /// Get the maximal size in bits that fits in this register bank.
- unsigned getSize() const { return Size; }
-
/// Check whether this instance is ready to be used.
bool isValid() const;
@@ -62,7 +58,7 @@ public:
/// \note This method does not check anything when assertions are disabled.
///
/// \return True is the check was successful.
- bool verify(const TargetRegisterInfo &TRI) const;
+ bool verify(const RegisterBankInfo &RBI, const TargetRegisterInfo &TRI) const;
/// Check whether this register bank covers \p RC.
/// In other words, check if this register bank fully covers
diff --git a/llvm/include/llvm/CodeGen/RegisterBankInfo.h b/llvm/include/llvm/CodeGen/RegisterBankInfo.h
index bba4f1f025a0..60f03756e1b5 100644
--- a/llvm/include/llvm/CodeGen/RegisterBankInfo.h
+++ b/llvm/include/llvm/CodeGen/RegisterBankInfo.h
@@ -18,9 +18,10 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/Register.h"
+#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include <cassert>
#include <initializer_list>
#include <memory>
@@ -30,7 +31,6 @@ namespace llvm {
class MachineInstr;
class MachineRegisterInfo;
class raw_ostream;
-class RegisterBank;
class TargetInstrInfo;
class TargetRegisterClass;
class TargetRegisterInfo;
@@ -83,7 +83,7 @@ public:
/// \note This method does not check anything when assertions are disabled.
///
/// \return True is the check was successful.
- bool verify() const;
+ bool verify(const RegisterBankInfo &RBI) const;
};
/// Helper struct that represents how a value is mapped through
@@ -175,7 +175,7 @@ public:
/// \note This method does not check anything when assertions are disabled.
///
/// \return True is the check was successful.
- bool verify(unsigned MeaningfulBitWidth) const;
+ bool verify(const RegisterBankInfo &RBI, unsigned MeaningfulBitWidth) const;
/// Print this on dbgs() stream.
void dump() const;
@@ -384,11 +384,17 @@ public:
protected:
/// Hold the set of supported register banks.
- RegisterBank **RegBanks;
+ const RegisterBank **RegBanks;
/// Total number of register banks.
unsigned NumRegBanks;
+ /// Hold the sizes of the register banks for all HwModes.
+ const unsigned *Sizes;
+
+ /// Current HwMode for the target.
+ unsigned HwMode;
+
/// Keep dynamically allocated PartialMapping in a separate map.
/// This shouldn't be needed when everything gets TableGen'ed.
mutable DenseMap<unsigned, std::unique_ptr<const PartialMapping>>
@@ -415,7 +421,8 @@ protected:
/// Create a RegisterBankInfo that can accommodate up to \p NumRegBanks
/// RegisterBank instances.
- RegisterBankInfo(RegisterBank **RegBanks, unsigned NumRegBanks);
+ RegisterBankInfo(const RegisterBank **RegBanks, unsigned NumRegBanks,
+ const unsigned *Sizes, unsigned HwMode);
/// This constructor is meaningless.
/// It just provides a default constructor that can be used at link time
@@ -428,14 +435,14 @@ protected:
}
/// Get the register bank identified by \p ID.
- RegisterBank &getRegBank(unsigned ID) {
+ const RegisterBank &getRegBank(unsigned ID) {
assert(ID < getNumRegBanks() && "Accessing an unknown register bank");
return *RegBanks[ID];
}
/// Get the MinimalPhysRegClass for Reg.
/// \pre Reg is a physical register.
- const TargetRegisterClass &
+ const TargetRegisterClass *
getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const;
/// Try to get the mapping of \p MI.
@@ -576,6 +583,11 @@ public:
return const_cast<RegisterBankInfo *>(this)->getRegBank(ID);
}
+ /// Get the maximum size in bits that fits in the given register bank.
+ unsigned getMaximumSize(unsigned RegBankID) const {
+ return Sizes[RegBankID + HwMode * NumRegBanks];
+ }
+
/// Get the register bank of \p Reg.
/// If Reg has not been assigned a register, a register class,
/// or a register bank, then this returns nullptr.
@@ -587,6 +599,11 @@ public:
/// Get the total number of register banks.
unsigned getNumRegBanks() const { return NumRegBanks; }
+ /// Returns true if the register bank is considered divergent.
+ virtual bool isDivergentRegBank(const RegisterBank *RB) const {
+ return false;
+ }
+
/// Get a register bank that covers \p RC.
///
/// \pre \p RC is a user-defined register class (as opposed as one
diff --git a/llvm/include/llvm/CodeGen/RegisterPressure.h b/llvm/include/llvm/CodeGen/RegisterPressure.h
index 1164b60a11eb..8a46e505affd 100644
--- a/llvm/include/llvm/CodeGen/RegisterPressure.h
+++ b/llvm/include/llvm/CodeGen/RegisterPressure.h
@@ -201,6 +201,8 @@ class PressureDiffs {
public:
PressureDiffs() = default;
+ PressureDiffs &operator=(const PressureDiffs &other) = delete;
+ PressureDiffs(const PressureDiffs &other) = delete;
~PressureDiffs() { free(PDiffArray); }
void clear() { Size = 0; }
@@ -272,7 +274,7 @@ private:
using RegSet = SparseSet<IndexMaskPair>;
RegSet Regs;
- unsigned NumRegUnits;
+ unsigned NumRegUnits = 0u;
unsigned getSparseIndexFromReg(Register Reg) const {
if (Reg.isVirtual())
@@ -358,7 +360,7 @@ class RegPressureTracker {
const MachineFunction *MF = nullptr;
const TargetRegisterInfo *TRI = nullptr;
const RegisterClassInfo *RCI = nullptr;
- const MachineRegisterInfo *MRI;
+ const MachineRegisterInfo *MRI = nullptr;
const LiveIntervals *LIS = nullptr;
/// We currently only allow pressure tracking within a block.
diff --git a/llvm/include/llvm/CodeGen/RegisterScavenging.h b/llvm/include/llvm/CodeGen/RegisterScavenging.h
index 52797afbd848..21f2d355f237 100644
--- a/llvm/include/llvm/CodeGen/RegisterScavenging.h
+++ b/llvm/include/llvm/CodeGen/RegisterScavenging.h
@@ -32,9 +32,9 @@ class TargetRegisterClass;
class TargetRegisterInfo;
class RegScavenger {
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
- MachineRegisterInfo* MRI;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
MachineBasicBlock *MBB = nullptr;
MachineBasicBlock::iterator MBBI;
unsigned NumRegUnits = 0;
@@ -105,8 +105,8 @@ public:
/// Move the internal MBB iterator and update register states until
/// it has processed the specific iterator.
void forward(MachineBasicBlock::iterator I) {
- if (!Tracking && MBB->begin() != I) forward();
- while (MBBI != I) forward();
+ while (!Tracking || MBBI != I)
+ forward();
}
/// Update internal register state and move MBB iterator backwards.
@@ -160,23 +160,6 @@ public:
A.push_back(I.FrameIndex);
}
- /// Make a register of the specific register class
- /// available and do the appropriate bookkeeping. SPAdj is the stack
- /// adjustment due to call frame, it's passed along to eliminateFrameIndex().
- /// Returns the scavenged register.
- /// This is deprecated as it depends on the quality of the kill flags being
- /// present; Use scavengeRegisterBackwards() instead!
- ///
- /// If \p AllowSpill is false, fail if a spill is required to make the
- /// register available, and return NoRegister.
- Register scavengeRegister(const TargetRegisterClass *RC,
- MachineBasicBlock::iterator I, int SPAdj,
- bool AllowSpill = true);
- Register scavengeRegister(const TargetRegisterClass *RegClass, int SPAdj,
- bool AllowSpill = true) {
- return scavengeRegister(RegClass, MBBI, SPAdj, AllowSpill);
- }
-
/// Make a register of the specific register class available from the current
/// position backwards to the place before \p To. If \p RestoreAfter is true
/// this includes the instruction following the current position.
@@ -217,15 +200,6 @@ private:
/// Remove all Reg Units that \p Reg contains from \p BV.
void removeRegUnits(BitVector &BV, MCRegister Reg);
- /// Return the candidate register that is unused for the longest after
- /// StartMI. UseMI is set to the instruction where the search stopped.
- ///
- /// No more than InstrLimit instructions are inspected.
- Register findSurvivorReg(MachineBasicBlock::iterator StartMI,
- BitVector &Candidates,
- unsigned InstrLimit,
- MachineBasicBlock::iterator &UseMI);
-
/// Initialize RegisterScavenger.
void init(MachineBasicBlock &MBB);
diff --git a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
index 8b406a275025..aa1f5ef8110b 100644
--- a/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
+++ b/llvm/include/llvm/CodeGen/RegisterUsageInfo.h
@@ -63,7 +63,7 @@ private:
/// and 1 means content of register will be preserved around function call.
DenseMap<const Function *, std::vector<uint32_t>> RegMasks;
- const LLVMTargetMachine *TM;
+ const LLVMTargetMachine *TM = nullptr;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
index d8c631060b7e..666420681510 100644
--- a/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -70,6 +70,14 @@ namespace RTLIB {
/// UNKNOWN_LIBCALL if there is none.
Libcall getPOWI(EVT RetVT);
+ /// getLDEXP - Return the LDEXP_* value for the given types, or
+ /// UNKNOWN_LIBCALL if there is none.
+ Libcall getLDEXP(EVT RetVT);
+
+ /// getFREXP - Return the FREXP_* value for the given types, or
+ /// UNKNOWN_LIBCALL if there is none.
+ Libcall getFREXP(EVT RetVT);
+
/// Return the SYNC_FETCH_AND_* value for the given opcode and type, or
/// UNKNOWN_LIBCALL if there is none.
Libcall getSYNC(unsigned Opc, MVT VT);
diff --git a/llvm/include/llvm/CodeGen/ScheduleDAG.h b/llvm/include/llvm/CodeGen/ScheduleDAG.h
index 2fe2aabe833e..89b71167a43a 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAG.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAG.h
@@ -93,7 +93,7 @@ class TargetRegisterInfo;
/// The time associated with this edge. Often this is just the value of the
/// Latency field of the predecessor, however advanced models may provide
/// additional information about specific edges.
- unsigned Latency;
+ unsigned Latency = 0u;
public:
/// Constructs a null SDep. This is only for use by container classes which
@@ -568,6 +568,15 @@ class TargetRegisterInfo;
bool StressSched;
#endif
+ // This class is designed to be passed by reference only. Copy constructor
+ // is declared as deleted here to make the derived classes have deleted
+ // implicit-declared copy constructor, which suppresses the warnings from
+ // static analyzer when the derived classes own resources that are freed in
+ // their destructors, but don't have user-written copy constructors (rule
+ // of three).
+ ScheduleDAG(const ScheduleDAG &) = delete;
+ ScheduleDAG &operator=(const ScheduleDAG &) = delete;
+
explicit ScheduleDAG(MachineFunction &mf);
virtual ~ScheduleDAG();
diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
index dc8f02e28adf..5ea68e0a64af 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -119,7 +119,7 @@ namespace llvm {
/// A ScheduleDAG for scheduling lists of MachineInstr.
class ScheduleDAGInstrs : public ScheduleDAG {
protected:
- const MachineLoopInfo *MLI;
+ const MachineLoopInfo *MLI = nullptr;
const MachineFrameInfo &MFI;
/// TargetSchedModel provides an interface to the machine model.
@@ -143,7 +143,7 @@ namespace llvm {
// ------------------------------------------------
/// The block in which to insert instructions
- MachineBasicBlock *BB;
+ MachineBasicBlock *BB = nullptr;
/// The beginning of the range to be scheduled.
MachineBasicBlock::iterator RegionBegin;
@@ -152,7 +152,7 @@ namespace llvm {
MachineBasicBlock::iterator RegionEnd;
/// Instructions in this region (distance(RegionBegin, RegionEnd)).
- unsigned NumRegionInstrs;
+ unsigned NumRegionInstrs = 0;
/// After calling BuildSchedGraph, each machine instruction in the current
/// scheduling region is mapped to an SUnit.
diff --git a/llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
index cefafe87a17d..a70acca98a5c 100644
--- a/llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
+++ b/llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -49,7 +49,8 @@ class ScoreboardHazardRecognizer : public ScheduleHazardRecognizer {
public:
Scoreboard() = default;
-
+ Scoreboard &operator=(const Scoreboard &other) = delete;
+ Scoreboard(const Scoreboard &other) = delete;
~Scoreboard() {
delete[] Data;
}
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index aa1936c2757e..55c6354f03c8 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DebugLoc.h"
@@ -37,7 +38,6 @@
#include "llvm/Support/ArrayRecycler.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/RecyclingAllocator.h"
#include <cassert>
#include <cstdint>
@@ -71,7 +71,6 @@ class FunctionLoweringInfo;
class FunctionVarLocs;
class GlobalValue;
struct KnownBits;
-class LegacyDivergenceAnalysis;
class LLVMContext;
class MachineBasicBlock;
class MachineConstantPoolValue;
@@ -89,6 +88,11 @@ class TargetMachine;
class TargetSubtargetInfo;
class Value;
+template <typename T> class GenericSSAContext;
+using SSAContext = GenericSSAContext<Function>;
+template <typename T> class GenericUniformityInfo;
+using UniformityInfo = GenericUniformityInfo<SSAContext>;
+
class SDVTListNode : public FoldingSetNode {
friend struct FoldingSetTrait<SDVTListNode>;
@@ -229,7 +233,7 @@ class SelectionDAG {
LLVMContext *Context;
CodeGenOpt::Level OptLevel;
- LegacyDivergenceAnalysis * DA = nullptr;
+ UniformityInfo *UA = nullptr;
FunctionLoweringInfo * FLI = nullptr;
/// The function-level optimization remark emitter. Used to emit remarks
@@ -451,7 +455,7 @@ public:
/// Prepare this SelectionDAG to process code in the given MachineFunction.
void init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE,
Pass *PassPtr, const TargetLibraryInfo *LibraryInfo,
- LegacyDivergenceAnalysis *Divergence, ProfileSummaryInfo *PSIin,
+ UniformityInfo *UA, ProfileSummaryInfo *PSIin,
BlockFrequencyInfo *BFIin, FunctionVarLocs const *FnVarLocs);
void setFunctionLoweringInfo(FunctionLoweringInfo * FuncInfo) {
@@ -474,7 +478,7 @@ public:
const TargetLowering &getTargetLoweringInfo() const { return *TLI; }
const TargetLibraryInfo &getLibInfo() const { return *LibInfo; }
const SelectionDAGTargetInfo &getSelectionDAGInfo() const { return *TSI; }
- const LegacyDivergenceAnalysis *getDivergenceAnalysis() const { return DA; }
+ const UniformityInfo *getUniformityInfo() const { return UA; }
/// Returns the result of the AssignmentTrackingAnalysis pass if it's
/// available, otherwise return nullptr.
const FunctionVarLocs *getFunctionVarLocs() const { return FnVarLocs; }
@@ -944,6 +948,13 @@ public:
/// integer type VT, by either zero-extending or truncating it.
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT);
+ /// Convert Op, which must be of integer type, to the
+ /// integer type VT, by either sign/zero-extending (depending on IsSigned) or
+ /// truncating it.
+ SDValue getExtOrTrunc(bool IsSigned, SDValue Op, const SDLoc &DL, EVT VT) {
+ return IsSigned ? getSExtOrTrunc(Op, DL, VT) : getZExtOrTrunc(Op, DL, VT);
+ }
+
/// Return the expression required to zero extend the Op
/// value assuming it was the smaller SrcTy value.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT);
@@ -1059,12 +1070,11 @@ public:
}
/// Return a node that represents the runtime scaling 'MulImm * RuntimeVL'.
- SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm) {
- assert(MulImm.getMinSignedBits() <= VT.getSizeInBits() &&
- "Immediate does not fit VT");
- return getNode(ISD::VSCALE, DL, VT,
- getConstant(MulImm.sextOrTrunc(VT.getSizeInBits()), DL, VT));
- }
+ SDValue getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
+ bool ConstantFold = true);
+
+ SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC,
+ bool ConstantFold = true);
/// Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getGLOBAL_OFFSET_TABLE(EVT VT) {
@@ -1579,6 +1589,11 @@ public:
ISD::MemIndexType IndexType,
bool IsTruncating = false);
+ SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT,
+ MachineMemOperand *MMO);
+ SDValue getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT,
+ MachineMemOperand *MMO);
+
/// Construct a node to track a Value* through the backend.
SDValue getSrcValue(const Value *v);
@@ -1984,13 +1999,36 @@ public:
OFK_Always,
};
- /// Determine if the result of the addition of 2 node can overflow.
- OverflowKind computeOverflowKind(SDValue N0, SDValue N1) const;
+ /// Determine if the result of the signed addition of 2 nodes can overflow.
+ OverflowKind computeOverflowForSignedAdd(SDValue N0, SDValue N1) const;
+
+ /// Determine if the result of the unsigned addition of 2 nodes can overflow.
+ OverflowKind computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const;
+
+ /// Determine if the result of the addition of 2 nodes can overflow.
+ OverflowKind computeOverflowForAdd(bool IsSigned, SDValue N0,
+ SDValue N1) const {
+ return IsSigned ? computeOverflowForSignedAdd(N0, N1)
+ : computeOverflowForUnsignedAdd(N0, N1);
+ }
+
+ /// Determine if the result of the signed sub of 2 nodes can overflow.
+ OverflowKind computeOverflowForSignedSub(SDValue N0, SDValue N1) const;
+
+ /// Determine if the result of the unsigned sub of 2 nodes can overflow.
+ OverflowKind computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const;
+
+ /// Determine if the result of the sub of 2 nodes can overflow.
+ OverflowKind computeOverflowForSub(bool IsSigned, SDValue N0,
+ SDValue N1) const {
+ return IsSigned ? computeOverflowForSignedSub(N0, N1)
+ : computeOverflowForUnsignedSub(N0, N1);
+ }
/// Test if the given value is known to have exactly one bit set. This differs
/// from computeKnownBits in that it doesn't necessarily determine which bit
/// is set.
- bool isKnownToBeAPowerOfTwo(SDValue Val) const;
+ bool isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth = 0) const;
/// Return the number of times the sign bit of the register is replicated into
/// the other bits. We know that at least 1 bit is always equal to the sign
@@ -2098,7 +2136,7 @@ public:
bool isKnownNeverZeroFloat(SDValue Op) const;
/// Test whether the given SDValue is known to contain non-zero value(s).
- bool isKnownNeverZero(SDValue Op) const;
+ bool isKnownNeverZero(SDValue Op, unsigned Depth = 0) const;
/// Test whether two SDValues are known to compare equal. This
/// is true if they are the same value, or if one is negative zero and the
@@ -2184,6 +2222,11 @@ public:
/// cannot be inferred.
MaybeAlign InferPtrAlign(SDValue Ptr) const;
+ /// Split the scalar node with EXTRACT_ELEMENT using the provided VTs and
+ /// return the low/high part.
+ std::pair<SDValue, SDValue> SplitScalar(const SDValue &N, const SDLoc &DL,
+ const EVT &LoVT, const EVT &HiVT);
+
/// Compute the VTs needed for the low/hi parts of a type
/// which is split (or expanded) into two not necessarily identical pieces.
std::pair<EVT, EVT> GetSplitDestVTs(const EVT &VT) const;
@@ -2313,6 +2356,22 @@ public:
}
}
+ /// Check if the provided node is save to speculatively executed given its
+ /// current arguments. So, while `udiv` the opcode is not safe to
+ /// speculatively execute, a given `udiv` node may be if the denominator is
+ /// known nonzero.
+ bool isSafeToSpeculativelyExecuteNode(const SDNode *N) const {
+ switch (N->getOpcode()) {
+ case ISD::UDIV:
+ return isKnownNeverZero(N->getOperand(1));
+ default:
+ return isSafeToSpeculativelyExecute(N->getOpcode());
+ }
+ }
+
+ SDValue makeStateFunctionCall(unsigned LibFunc, SDValue Ptr, SDValue InChain,
+ const SDLoc &DLoc);
+
private:
void InsertNode(SDNode *N);
bool RemoveNodeFromCSEMaps(SDNode *N);
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index b7c5bec91051..6c0b2cfe8ef3 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -337,6 +337,9 @@ private:
/// instruction selected, false if no code should be emitted for it.
bool PrepareEHLandingPad();
+ // Mark and Report IPToState for each Block under AsynchEH
+ void reportIPToStateForBlocks(MachineFunction *Fn);
+
/// Perform instruction selection on all basic blocks in the function.
void SelectAllBasicBlocks(const Function &Fn);
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 187d179e3403..ba2222390a76 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -30,6 +30,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Constants.h"
@@ -42,7 +43,6 @@
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/TypeSize.h"
#include <algorithm>
#include <cassert>
@@ -392,9 +392,11 @@ private:
// We assume instructions do not raise floating-point exceptions by default,
// and only those marked explicitly may do so. We could choose to represent
// this via a positive "FPExcept" flags like on the MI level, but having a
- // negative "NoFPExcept" flag here (that defaults to true) makes the flag
- // intersection logic more straightforward.
+ // negative "NoFPExcept" flag here makes the flag intersection logic more
+ // straightforward.
bool NoFPExcept : 1;
+ // Instructions with attached 'unpredictable' metadata on IR level.
+ bool Unpredictable : 1;
public:
/// Default constructor turns off all optimization flags.
@@ -402,7 +404,7 @@ public:
: NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false),
NoInfs(false), NoSignedZeros(false), AllowReciprocal(false),
AllowContract(false), ApproximateFuncs(false),
- AllowReassociation(false), NoFPExcept(false) {}
+ AllowReassociation(false), NoFPExcept(false), Unpredictable(false) {}
/// Propagate the fast-math-flags from an IR FPMathOperator.
void copyFMF(const FPMathOperator &FPMO) {
@@ -427,6 +429,7 @@ public:
void setApproximateFuncs(bool b) { ApproximateFuncs = b; }
void setAllowReassociation(bool b) { AllowReassociation = b; }
void setNoFPExcept(bool b) { NoFPExcept = b; }
+ void setUnpredictable(bool b) { Unpredictable = b; }
// These are accessors for each flag.
bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
@@ -440,6 +443,7 @@ public:
bool hasApproximateFuncs() const { return ApproximateFuncs; }
bool hasAllowReassociation() const { return AllowReassociation; }
bool hasNoFPExcept() const { return NoFPExcept; }
+ bool hasUnpredictable() const { return Unpredictable; }
/// Clear any flags in this flag set that aren't also set in Flags. All
/// flags will be cleared if Flags are undefined.
@@ -455,6 +459,7 @@ public:
ApproximateFuncs &= Flags.ApproximateFuncs;
AllowReassociation &= Flags.AllowReassociation;
NoFPExcept &= Flags.NoFPExcept;
+ Unpredictable &= Flags.Unpredictable;
}
};
@@ -470,7 +475,7 @@ public:
/// We do not place that under `#if LLVM_ENABLE_ABI_BREAKING_CHECKS`
/// intentionally because it adds unneeded complexity without noticeable
/// benefits (see discussion with @thakis in D120714).
- uint16_t PersistentId;
+ uint16_t PersistentId = 0xffff;
protected:
// We define a set of mini-helper classes to help us interpret the bits in our
@@ -1433,6 +1438,8 @@ public:
case ISD::VP_SCATTER:
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
+ case ISD::GET_FPENV_MEM:
+ case ISD::SET_FPENV_MEM:
return true;
default:
return N->isMemIntrinsic() || N->isTargetMemoryOpcode();
@@ -1608,11 +1615,7 @@ public:
bool isOne() const { return Value->isOne(); }
bool isZero() const { return Value->isZero(); }
- // NOTE: This is soft-deprecated. Please use `isZero()` instead.
- bool isNullValue() const { return isZero(); }
bool isAllOnes() const { return Value->isMinusOne(); }
- // NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
- bool isAllOnesValue() const { return isAllOnes(); }
bool isMaxSignedValue() const { return Value->isMaxValue(true); }
bool isMinSignedValue() const { return Value->isMinValue(true); }
@@ -1712,6 +1715,10 @@ SDValue peekThroughOneUseBitcasts(SDValue V);
/// If \p V is not an extracted subvector, it is returned as-is.
SDValue peekThroughExtractSubvectors(SDValue V);
+/// Return the non-truncated source operand of \p V if it exists.
+/// If \p V is not a truncation, it is returned as-is.
+SDValue peekThroughTruncates(SDValue V);
+
/// Returns true if \p V is a bitwise not operation. Assumes that an all ones
/// constant is canonicalized to be operand 1.
bool isBitwiseNot(SDValue V, bool AllowUndefs = false);
@@ -2891,6 +2898,23 @@ public:
}
};
+class FPStateAccessSDNode : public MemSDNode {
+public:
+ friend class SelectionDAG;
+
+ FPStateAccessSDNode(unsigned NodeTy, unsigned Order, const DebugLoc &dl,
+ SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+ : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
+ assert((NodeTy == ISD::GET_FPENV_MEM || NodeTy == ISD::SET_FPENV_MEM) &&
+ "Expected FP state access node");
+ }
+
+ static bool classof(const SDNode *N) {
+ return N->getOpcode() == ISD::GET_FPENV_MEM ||
+ N->getOpcode() == ISD::SET_FPENV_MEM;
+ }
+};
+
/// An SDNode that represents everything that will be needed
/// to construct a MachineInstr. These nodes are created during the
/// instruction selection proper phase.
@@ -2938,7 +2962,7 @@ public:
return ArrayRef(MemRefs.getAddrOfPtr1(), 1);
// Otherwise we have an actual array.
- return ArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs);
+ return ArrayRef(cast<MachineMemOperand **>(MemRefs), NumMemRefs);
}
mmo_iterator memoperands_begin() const { return memoperands().begin(); }
mmo_iterator memoperands_end() const { return memoperands().end(); }
diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h
index 403a94c53dc4..7e013dbf2ab3 100644
--- a/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -540,7 +540,7 @@ class raw_ostream;
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late = false) {
assert(!MI.isInsideBundle() &&
"Instructions inside bundles should use bundle start's slot.");
- assert(mi2iMap.find(&MI) == mi2iMap.end() && "Instr already indexed.");
+ assert(!mi2iMap.contains(&MI) && "Instr already indexed.");
// Numbering debug instructions could cause code generation to be
// affected by debug information.
assert(!MI.isDebugInstr() && "Cannot number debug instructions.");
diff --git a/llvm/include/llvm/CodeGen/StackProtector.h b/llvm/include/llvm/CodeGen/StackProtector.h
index 6150684236c8..70a3abff83f6 100644
--- a/llvm/include/llvm/CodeGen/StackProtector.h
+++ b/llvm/include/llvm/CodeGen/StackProtector.h
@@ -17,22 +17,19 @@
#define LLVM_CODEGEN_STACKPROTECTOR_H
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Pass.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
class BasicBlock;
-class DominatorTree;
class Function;
-class Instruction;
class Module;
class TargetLoweringBase;
class TargetMachine;
-class Type;
class StackProtector : public FunctionPass {
private:
@@ -49,8 +46,8 @@ private:
const TargetLoweringBase *TLI = nullptr;
Triple Trip;
- Function *F;
- Module *M;
+ Function *F = nullptr;
+ Module *M = nullptr;
std::optional<DomTreeUpdater> DTU;
@@ -63,12 +60,6 @@ private:
/// protection when -fstack-protection is used.
unsigned SSPBufferSize = DefaultSSPBufferSize;
- /// VisitedPHIs - The set of PHI nodes visited when determining
- /// if a variable's reference has been taken. This set
- /// is maintained to ensure we don't visit the same PHI node multiple
- /// times.
- SmallPtrSet<const PHINode *, 16> VisitedPHIs;
-
// A prologue is generated.
bool HasPrologue = false;
@@ -87,22 +78,6 @@ private:
/// check fails.
BasicBlock *CreateFailBB();
- /// ContainsProtectableArray - Check whether the type either is an array or
- /// contains an array of sufficient size so that we need stack protectors
- /// for it.
- /// \param [out] IsLarge is set to true if a protectable array is found and
- /// it is "large" ( >= ssp-buffer-size). In the case of a structure with
- /// multiple arrays, this gets set if any of them is large.
- bool ContainsProtectableArray(Type *Ty, bool &IsLarge, bool Strong = false,
- bool InStruct = false) const;
-
- /// Check whether a stack allocation has its address taken.
- bool HasAddressTaken(const Instruction *AI, TypeSize AllocSize);
-
- /// RequiresStackProtector - Check whether or not this function needs a
- /// stack protector based upon the stack protector level.
- bool RequiresStackProtector();
-
public:
static char ID; // Pass identification, replacement for typeid.
@@ -116,6 +91,11 @@ public:
bool runOnFunction(Function &Fn) override;
void copyToMachineFrameInfo(MachineFrameInfo &MFI) const;
+
+ /// Check whether or not \p F needs a stack protector based upon the stack
+ /// protector level.
+ static bool requiresStackProtector(Function *F, SSPLayoutMap *Layout = nullptr);
+
};
} // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
index 47bedd9befc8..189dfef590b0 100644
--- a/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
+++ b/llvm/include/llvm/CodeGen/SwitchLoweringUtils.h
@@ -237,11 +237,11 @@ uint64_t getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
unsigned First, unsigned Last);
struct SwitchWorkListItem {
- MachineBasicBlock *MBB;
+ MachineBasicBlock *MBB = nullptr;
CaseClusterIt FirstCluster;
CaseClusterIt LastCluster;
- const ConstantInt *GE;
- const ConstantInt *LT;
+ const ConstantInt *GE = nullptr;
+ const ConstantInt *LT = nullptr;
BranchProbability DefaultProb;
};
using SwitchWorkList = SmallVector<SwitchWorkListItem, 4>;
@@ -292,9 +292,9 @@ public:
virtual ~SwitchLowering() = default;
private:
- const TargetLowering *TLI;
- const TargetMachine *TM;
- const DataLayout *DL;
+ const TargetLowering *TLI = nullptr;
+ const TargetMachine *TM = nullptr;
+ const DataLayout *DL = nullptr;
FunctionLoweringInfo &FuncInfo;
};
diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h
index 1333f2d98973..89ea9bcb2a40 100644
--- a/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -13,9 +13,9 @@
#ifndef LLVM_CODEGEN_TARGETCALLINGCONV_H
#define LLVM_CODEGEN_TARGETCALLINGCONV_H
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Support/Alignment.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include <cassert>
#include <climits>
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index fbce5d7a9102..94de30461547 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -54,15 +54,18 @@ public:
};
struct DwarfFrameBase {
- // The frame base may be either a register (the default), the CFA,
- // or a WebAssembly-specific location description.
+ // The frame base may be either a register (the default), the CFA with an
+ // offset, or a WebAssembly-specific location description.
enum FrameBaseKind { Register, CFA, WasmFrameBase } Kind;
struct WasmFrameBase {
unsigned Kind; // Wasm local, global, or value stack
unsigned Index;
};
union {
+ // Used with FrameBaseKind::Register.
unsigned Reg;
+ // Used with FrameBaseKind::CFA.
+ int Offset;
struct WasmFrameBase WasmLoc;
} Location;
};
@@ -123,11 +126,6 @@ public:
return StackRealignable;
}
- /// Return the skew that has to be applied to stack alignment under
- /// certain conditions (e.g. stack was adjusted before function \p MF
- /// was called).
- virtual unsigned getStackAlignmentSkew(const MachineFunction &MF) const;
-
/// This method returns whether or not it is safe for an object with the
/// given stack id to be bundled into the local area.
virtual bool isStackIdSafeForLocalArea(unsigned StackId) const {
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index ee5d87e0ce2e..817d32ea0ef6 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -61,6 +61,7 @@ class TargetRegisterInfo;
class TargetSchedModel;
class TargetSubtargetInfo;
enum class MachineCombinerPattern;
+enum class MachineTraceStrategy;
template <class T> class SmallVectorImpl;
@@ -1004,6 +1005,10 @@ public:
return false;
}
+ /// Return an index for MachineJumpTableInfo if \p insn is an indirect jump
+ /// using a jump table, otherwise -1.
+ virtual int getJumpTableIndex(const MachineInstr &MI) const { return -1; }
+
protected:
/// Target-dependent implementation for IsCopyInstr.
/// If the specific machine instruction is a instruction that moves/copies
@@ -1039,6 +1044,16 @@ public:
return isCopyInstrImpl(MI);
}
+ bool isFullCopyInstr(const MachineInstr &MI) const {
+ auto DestSrc = isCopyInstr(MI);
+ if (!DestSrc)
+ return false;
+
+ const MachineOperand *DestRegOp = DestSrc->Destination;
+ const MachineOperand *SrcRegOp = DestSrc->Source;
+ return !DestRegOp->getSubReg() && !SrcRegOp->getSubReg();
+ }
+
/// If the specific machine instruction is an instruction that adds an
/// immediate value and a physical register, and stores the result in
/// the given physical register \c Reg, return a pair of the source
@@ -1145,6 +1160,10 @@ public:
MachineInstr &LoadMI,
LiveIntervals *LIS = nullptr) const;
+ /// This function defines the logic to lower COPY instruction to
+ /// target specific instruction(s).
+ void lowerCopy(MachineInstr *MI, const TargetRegisterInfo *TRI) const;
+
/// Return true when there is potentially a faster code sequence
/// for an instruction chain ending in \p Root. All potential patterns are
/// returned in the \p Pattern vector. Pattern should be sorted in priority
@@ -1222,6 +1241,13 @@ public:
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const;
+ /// When calculate the latency of the root instruction, accumulate the
+ /// latency of the sequence to the root latency.
+ /// \param Root - Instruction that could be combined with one of its operands
+ virtual bool accumulateInstrSeqToRootLatency(MachineInstr &Root) const {
+ return true;
+ }
+
/// Attempt to reassociate \P Root and \P Prev according to \P Pattern to
/// reduce critical path length.
void reassociateOps(MachineInstr &Root, MachineInstr &Prev,
@@ -1251,6 +1277,9 @@ public:
/// Return true when a target supports MachineCombiner.
virtual bool useMachineCombiner() const { return false; }
+ /// Return a strategy that MachineCombiner must use when creating traces.
+ virtual MachineTraceStrategy getMachineCombinerTraceStrategy() const;
+
/// Return true if the given SDNode can be copied during scheduling
/// even if it has glue.
virtual bool canCopyGluedNodeDuringSchedule(SDNode *N) const { return false; }
@@ -1939,6 +1968,13 @@ public:
return false;
}
+ /// Allows targets to use appropriate copy instruction while spilitting live
+ /// range of a register in register allocation.
+ virtual unsigned getLiveRangeSplitOpcode(Register Reg,
+ const MachineFunction &MF) const {
+ return TargetOpcode::COPY;
+ }
+
/// During PHI eleimination lets target to make necessary checks and
/// insert the copy to the PHI destination register in a target specific
/// manner.
@@ -1962,8 +1998,9 @@ public:
}
/// Returns a \p outliner::OutlinedFunction struct containing target-specific
- /// information for a set of outlining candidates.
- virtual outliner::OutlinedFunction getOutliningCandidateInfo(
+ /// information for a set of outlining candidates. Returns std::nullopt if the
+ /// candidates are not suitable for outlining.
+ virtual std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
llvm_unreachable(
"Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!");
@@ -1975,18 +2012,44 @@ public:
virtual void mergeOutliningCandidateAttributes(
Function &F, std::vector<outliner::Candidate> &Candidates) const;
- /// Returns how or if \p MI should be outlined.
+protected:
+ /// Target-dependent implementation for getOutliningTypeImpl.
virtual outliner::InstrType
- getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const {
+ getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const {
llvm_unreachable(
- "Target didn't implement TargetInstrInfo::getOutliningType!");
+ "Target didn't implement TargetInstrInfo::getOutliningTypeImpl!");
}
+public:
+ /// Returns how or if \p MIT should be outlined. \p Flags is the
+ /// target-specific information returned by isMBBSafeToOutlineFrom.
+ outliner::InstrType
+ getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const;
+
/// Optional target hook that returns true if \p MBB is safe to outline from,
/// and returns any target-specific information in \p Flags.
virtual bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
unsigned &Flags) const;
+ /// Optional target hook which partitions \p MBB into outlinable ranges for
+ /// instruction mapping purposes. Each range is defined by two iterators:
+ /// [start, end).
+ ///
+ /// Ranges are expected to be ordered top-down. That is, ranges closer to the
+ /// top of the block should come before ranges closer to the end of the block.
+ ///
+ /// Ranges cannot overlap.
+ ///
+ /// If an entire block is mappable, then its range is [MBB.begin(), MBB.end())
+ ///
+ /// All instructions not present in an outlinable range are considered
+ /// illegal.
+ virtual SmallVector<
+ std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
+ getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const {
+ return {std::make_pair(MBB.begin(), MBB.end())};
+ }
+
/// Insert a custom frame for outlined functions.
virtual void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
const outliner::OutlinedFunction &OF) const {
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 639d48e342ef..6daf623665da 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -30,7 +30,9 @@
#include "llvm/CodeGen/ComplexDeinterleavingPass.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -49,7 +51,6 @@
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include <algorithm>
#include <cassert>
#include <climits>
@@ -74,7 +75,6 @@ class GISelKnownBits;
class IntrinsicInst;
class IRBuilderBase;
struct KnownBits;
-class LegacyDivergenceAnalysis;
class LLVMContext;
class MachineBasicBlock;
class MachineFunction;
@@ -282,6 +282,15 @@ public:
Expensive = 2 // Negated expression is more expensive.
};
+ /// Enum of different potentially desirable ways to fold (and/or (setcc ...),
+ /// (setcc ...)).
+ enum AndOrSETCCFoldKind : uint8_t {
+ None = 0, // No fold is preferable.
+ AddAnd = 1, // Fold with `Add` op and `And` op is preferable.
+ NotAnd = 2, // Fold with `Not` op and `And` op is preferable.
+ ABS = 4, // Fold with `llvm.abs` op is preferable.
+ };
+
class ArgListEntry {
public:
Value *Val = nullptr;
@@ -425,6 +434,13 @@ public:
return MachineMemOperand::MONone;
}
+ /// This callback is used to inspect load/store SDNode.
+ /// The default implementation does nothing.
+ virtual MachineMemOperand::Flags
+ getTargetMMOFlags(const MemSDNode &Node) const {
+ return MachineMemOperand::MONone;
+ }
+
MachineMemOperand::Flags
getLoadMemOperandFlags(const LoadInst &LI, const DataLayout &DL,
AssumptionCache *AC = nullptr,
@@ -444,6 +460,17 @@ public:
return true;
}
+ virtual bool shouldExpandGetVectorLength(EVT CountVT, unsigned VF,
+ bool IsScalable) const {
+ return true;
+ }
+
+ // Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to
+ // vecreduce(op(x, y)) for the reduction opcode RedOpc.
+ virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const {
+ return true;
+ }
+
/// Return true if it is profitable to convert a select of FP constants into
/// a constant pool load whose address depends on the select condition. The
/// parameter may be used to differentiate a select with FP compare from
@@ -587,13 +614,13 @@ public:
return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO);
}
- /// Return true if it is expected to be cheaper to do a store of a non-zero
- /// vector constant with the given size and type for the address space than to
+ /// Return true if it is expected to be cheaper to do a store of vector
+ /// constant with the given size and type for the address space than to
/// store the individual scalar element constants.
- virtual bool storeOfVectorConstantIsCheap(EVT MemVT,
+ virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,
unsigned NumElem,
unsigned AddrSpace) const {
- return false;
+ return IsZero;
}
/// Allow store merging for the specified type after legalization in addition
@@ -638,14 +665,6 @@ public:
/// gen prepare.
virtual bool preferZeroCompareBranch() const { return false; }
- /// Return true if it is safe to transform an integer-domain bitwise operation
- /// into the equivalent floating-point operation. This should be set to true
- /// if the target has IEEE-754-compliant fabs/fneg operations for the input
- /// type.
- virtual bool hasBitPreservingFPLogic(EVT VT) const {
- return false;
- }
-
/// Return true if it is cheaper to split the store of a merged int val
/// from a pair of smaller values into multiple stores.
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
@@ -666,6 +685,13 @@ public:
return false;
}
+ /// Return true if it is valid to merge the TargetMMOFlags in two SDNodes.
+ virtual bool
+ areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX,
+ const MemSDNode &NodeY) const {
+ return true;
+ }
+
/// Use bitwise logic to make pairs of compares more efficient. For example:
/// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
/// This should be true when it takes more than one instruction to lower
@@ -789,8 +815,14 @@ public:
return true;
}
+ // By default prefer folding (abs (sub nsw x, y)) -> abds(x, y). Some targets
+ // may want to avoid this to prevent loss of sub_nsw pattern.
+ virtual bool preferABDSToABSWithNSW(EVT VT) const {
+ return true;
+ }
+
// Return true if the target wants to transform Op(Splat(X)) -> Splat(Op(X))
- virtual bool preferScalarizeSplat(unsigned Opc) const { return true; }
+ virtual bool preferScalarizeSplat(SDNode *N) const { return true; }
/// Return true if the target wants to use the optimization that
/// turns ext(promotableInst1(...(promotableInstN(load)))) into
@@ -1514,15 +1546,16 @@ public:
EVT getMemValueType(const DataLayout &DL, Type *Ty,
bool AllowUnknown = false) const {
// Lower scalar pointers to native pointer types.
- if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+ if (auto *PTy = dyn_cast<PointerType>(Ty))
return getPointerMemTy(DL, PTy->getAddressSpace());
- else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- Type *Elm = VTy->getElementType();
- if (PointerType *PT = dyn_cast<PointerType>(Elm)) {
- EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace()));
- Elm = PointerTy.getTypeForEVT(Ty->getContext());
+
+ if (auto *VTy = dyn_cast<VectorType>(Ty)) {
+ Type *EltTy = VTy->getElementType();
+ if (auto *PTy = dyn_cast<PointerType>(EltTy)) {
+ EVT PointerTy(getPointerMemTy(DL, PTy->getAddressSpace()));
+ EltTy = PointerTy.getTypeForEVT(Ty->getContext());
}
- return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
+ return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false),
VTy->getElementCount());
}
@@ -1549,11 +1582,8 @@ public:
/// Return the type of registers that this ValueType will eventually require.
MVT getRegisterType(LLVMContext &Context, EVT VT) const {
- if (VT.isSimple()) {
- assert((unsigned)VT.getSimpleVT().SimpleTy <
- std::size(RegisterTypeForVT));
- return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
- }
+ if (VT.isSimple())
+ return getRegisterType(VT.getSimpleVT());
if (VT.isVector()) {
EVT VT1;
MVT RegisterVT;
@@ -1642,6 +1672,10 @@ public:
return true;
}
+ /// Return true (the default) if it is profitable to remove a sext_inreg(x)
+ /// where the sext is redundant, and use x directly.
+ virtual bool shouldRemoveRedundantExtend(SDValue Op) const { return true; }
+
/// When splitting a value of the specified type into parts, does the Lo
/// or Hi part come first? This usually follows the endianness, except
/// for ppcf128, where the Hi part always comes first.
@@ -2052,6 +2086,18 @@ public:
llvm_unreachable("Masked cmpxchg expansion unimplemented on this target");
}
+ //===--------------------------------------------------------------------===//
+ /// \name KCFI check lowering.
+ /// @{
+
+ virtual MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator &MBBI,
+ const TargetInstrInfo *TII) const {
+ llvm_unreachable("KCFI is not supported on this target");
+ }
+
+ /// @}
+
/// Inserts in the IR a target-specific intrinsic specifying a fence.
/// It is called by AtomicExpandPass before expanding an
/// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
@@ -2270,11 +2316,11 @@ public:
/// considered beneficial.
/// If optimizing for size, expansion is only considered beneficial for upto
/// 5 multiplies and a divide (if the exponent is negative).
- bool isBeneficialToExpandPowI(int Exponent, bool OptForSize) const {
+ bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const {
if (Exponent < 0)
Exponent = -Exponent;
- return !OptForSize ||
- (llvm::popcount((unsigned int)Exponent) + Log2_32(Exponent) < 7);
+ uint64_t E = static_cast<uint64_t>(Exponent);
+ return !OptForSize || (llvm::popcount(E) + Log2_64(E) < 7);
}
//===--------------------------------------------------------------------===//
@@ -2694,6 +2740,8 @@ public:
case ISD::AVGFLOORU:
case ISD::AVGCEILS:
case ISD::AVGCEILU:
+ case ISD::ABDS:
+ case ISD::ABDU:
return true;
default: return false;
}
@@ -2831,6 +2879,13 @@ public:
getApproximateEVTForLLT(ToTy, DL, Ctx));
}
+ /// Return true if zero-extending the specific node Val to type VT2 is free
+ /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or
+ /// because it's folded such as X86 zero-extending loads).
+ virtual bool isZExtFree(SDValue Val, EVT VT2) const {
+ return isZExtFree(Val.getValueType(), VT2);
+ }
+
/// Return true if sign-extension from FromTy to ToTy is cheaper than
/// zero-extension.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const {
@@ -2853,8 +2908,9 @@ public:
/// Try to optimize extending or truncating conversion instructions (like
/// zext, trunc, fptoui, uitofp) for the target.
- virtual bool optimizeExtendOrTruncateConversion(Instruction *I,
- Loop *L) const {
+ virtual bool
+ optimizeExtendOrTruncateConversion(Instruction *I, Loop *L,
+ const TargetTransformInfo &TTI) const {
return false;
}
@@ -2916,11 +2972,26 @@ public:
return false;
}
- /// Return true if zero-extending the specific node Val to type VT2 is free
- /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or
- /// because it's folded such as X86 zero-extending loads).
- virtual bool isZExtFree(SDValue Val, EVT VT2) const {
- return isZExtFree(Val.getValueType(), VT2);
+ /// Lower a deinterleave intrinsic to a target specific load intrinsic.
+ /// Return true on success. Currently only supports
+ /// llvm.experimental.vector.deinterleave2
+ ///
+ /// \p DI is the deinterleave intrinsic.
+ /// \p LI is the accompanying load instruction
+ virtual bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
+ LoadInst *LI) const {
+ return false;
+ }
+
+ /// Lower an interleave intrinsic to a target specific store intrinsic.
+ /// Return true on success. Currently only supports
+ /// llvm.experimental.vector.interleave2
+ ///
+ /// \p II is the interleave intrinsic.
+ /// \p SI is the accompanying store instruction
+ virtual bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
+ StoreInst *SI) const {
+ return false;
}
/// Return true if an fpext operation is free (for instance, because
@@ -3039,10 +3110,10 @@ public:
return false;
}
- /// Return true if it's profitable to narrow operations of type VT1 to
- /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from
+ /// Return true if it's profitable to narrow operations of type SrcVT to
+ /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not from
/// i32 to i16.
- virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const {
+ virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
return false;
}
@@ -3149,7 +3220,7 @@ public:
/// If one cannot be created using all the given inputs, nullptr should be
/// returned.
virtual Value *createComplexDeinterleavingIR(
- Instruction *I, ComplexDeinterleavingOperation OperationType,
+ IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
Value *Accumulator = nullptr) const {
return nullptr;
@@ -3302,7 +3373,7 @@ private:
/// register class is the largest legal super-reg register class of the
/// register class of the specified type. e.g. On x86, i8, i16, and i32's
/// representative class would be GR32.
- const TargetRegisterClass *RepRegClassForVT[MVT::VALUETYPE_SIZE];
+ const TargetRegisterClass *RepRegClassForVT[MVT::VALUETYPE_SIZE] = {0};
/// This indicates the "cost" of the "representative" register class for each
/// ValueType. The cost is used by the scheduler to approximate register
@@ -3521,7 +3592,7 @@ public:
virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
FunctionLoweringInfo *FLI,
- LegacyDivergenceAnalysis *DA) const {
+ UniformityInfo *UA) const {
return false;
}
@@ -3536,6 +3607,17 @@ public:
return N0.hasOneUse();
}
+ // Lets target to control the following reassociation of operands: (op (op x,
+ // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By
+ // default consider profitable any case where N0 has single use. This
+ // behavior reflects the condition replaced by this target hook call in the
+ // combiner. Any particular target can implement its own heuristic to
+ // restrict common combiner.
+ virtual bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
+ Register N1) const {
+ return MRI.hasOneNonDBGUse(N0);
+ }
+
virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
return false;
}
@@ -3594,15 +3676,13 @@ public:
/// legal. It is frequently not legal in PIC relocation models.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
- /// Return true if the operand with index OpNo corresponding to a target
- /// branch, for example, in following case
- ///
- /// call void asm "lea r8, $0\0A\09call qword ptr ${1:P}\0A\09ret",
- /// "*m,*m,~{r8},~{dirflag},~{fpsr},~{flags}"
- /// ([9 x i32]* @Arr), void (...)* @sincos_asm)
+ /// On x86, return true if the operand with index OpNo is a CALL or JUMP
+ /// instruction, which can use either a memory constraint or an address
+ /// constraint. -fasm-blocks "__asm call foo" lowers to
+ /// call void asm sideeffect inteldialect "call ${0:P}", "*m..."
///
- /// the operand $1 (sincos_asm) is target branch in inline asm, but the
- /// operand $0 (Arr) is not.
+ /// This function is used by a hack to choose the address constraint,
+ /// lowering to a direct call.
virtual bool
isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
unsigned OpNo) const {
@@ -3702,7 +3782,8 @@ public:
/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This
/// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
/// generalized for targets with other types of implicit widening casts.
- bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
+ bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
+ const APInt &DemandedBits,
TargetLoweringOpt &TLO) const;
/// Look at Op. At this point, we know that only the DemandedBits bits of the
@@ -4005,6 +4086,42 @@ public:
return true;
}
+ /// GlobalISel - return true if it is profitable to move this shift by a
+ /// constant amount through its operand, adjusting any immediate operands as
+ /// necessary to preserve semantics. This transformation may not be desirable
+ /// if it disrupts a particularly auspicious target-specific tree (e.g.
+ /// bitfield extraction in AArch64). By default, it returns true.
+ ///
+ /// @param MI the shift instruction
+ /// @param IsAfterLegal true if running after legalization.
+ virtual bool isDesirableToCommuteWithShift(const MachineInstr &MI,
+ bool IsAfterLegal) const {
+ return true;
+ }
+
+ // Return AndOrSETCCFoldKind::{AddAnd, ABS} if its desirable to try and
+ // optimize LogicOp(SETCC0, SETCC1). An example (what is implemented as of
+ // writing this) is:
+ // With C as a power of 2 and C != 0 and C != INT_MIN:
+ // AddAnd:
+ // (icmp eq A, C) | (icmp eq A, -C)
+ // -> (icmp eq and(add(A, C), ~(C + C)), 0)
+ // (icmp ne A, C) & (icmp ne A, -C)w
+ // -> (icmp ne and(add(A, C), ~(C + C)), 0)
+ // ABS:
+ // (icmp eq A, C) | (icmp eq A, -C)
+ // -> (icmp eq Abs(A), C)
+ // (icmp ne A, C) & (icmp ne A, -C)w
+ // -> (icmp ne Abs(A), C)
+ //
+ // @param LogicOp the logic op
+ // @param SETCC0 the first of the SETCC nodes
+ // @param SETCC0 the second of the SETCC nodes
+ virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC(
+ const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const {
+ return AndOrSETCCFoldKind::None;
+ }
+
/// Return true if it is profitable to combine an XOR of a logical shift
/// to create a logical shift of NOT. This transformation may not be desirable
/// if it disrupts a particularly auspicious target-specific tree (e.g.
@@ -4459,7 +4576,7 @@ public:
/// necessary information.
virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
ISD::NodeType /*ExtendKind*/) const {
- EVT MinVT = getRegisterType(Context, MVT::i32);
+ EVT MinVT = getRegisterType(MVT::i32);
return VT.bitsLT(MinVT) ? MinVT : VT;
}
@@ -4488,6 +4605,12 @@ public:
return nullptr;
}
+ /// Returns a 0 terminated array of rounding control registers that can be
+ /// attached into strict FP call.
+ virtual ArrayRef<MCPhysReg> getRoundingControlRegisters() const {
+ return ArrayRef<MCPhysReg>();
+ }
+
/// This callback is used to prepare for a volatile or atomic load.
/// It takes a chain node as input and returns the chain for the load itself.
///
@@ -4709,7 +4832,7 @@ public:
SelectionDAG &DAG) const;
// Lower custom output constraints. If invalid, return SDValue().
- virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
+ virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue,
const SDLoc &DL,
const AsmOperandInfo &OpInfo,
SelectionDAG &DAG) const;
@@ -4922,7 +5045,7 @@ public:
/// \param Test The test to perform.
/// \param Flags The optimization flags.
/// \returns The expansion result or SDValue() if it fails.
- SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, unsigned Test,
+ SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test,
SDNodeFlags Flags, const SDLoc &DL,
SelectionDAG &DAG) const;
@@ -4973,6 +5096,11 @@ public:
SDValue expandABS(SDNode *N, SelectionDAG &DAG,
bool IsNegative = false) const;
+ /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes.
+ /// \param N Node to expand
+ /// \returns The expansion result or SDValue() if it fails.
+ SDValue expandABD(SDNode *N, SelectionDAG &DAG) const;
+
/// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64
/// scalar types. Returns SDValue() if expand fails.
/// \param N Node to expand
diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 08267d70906a..9f92b919824d 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -156,6 +156,8 @@ public:
void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV,
const TargetMachine &TM) const override;
+
+ MCSection *getSectionForCommandLines() const override;
};
class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
@@ -178,6 +180,9 @@ public:
MCSection *getSectionForJumpTable(const Function &F,
const TargetMachine &TM) const override;
+ bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference,
+ const Function &F) const override;
+
/// Emit Obj-C garbage collection and linker options.
void emitModuleMetadata(MCStreamer &Streamer, Module &M) const override;
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 8d7086d02c8a..9cdd9e30a361 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -438,6 +438,10 @@ protected:
/// immediately before machine code is emitted.
virtual void addPreEmitPass() { }
+ /// This pass may be implemented by targets that want to run passes
+ /// immediately after basic block sections are assigned.
+ virtual void addPostBBSections() {}
+
/// Targets may add passes immediately before machine code is emitted in this
/// callback. This is called even later than `addPreEmitPass`.
// FIXME: Rename `addPreEmitPass` to something more sensible given its actual
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index 5eb1a644ffba..62a955f6b7d4 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -20,11 +20,11 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Printable.h"
#include <cassert>
@@ -428,8 +428,8 @@ public:
/// Returns true if Reg contains RegUnit.
bool hasRegUnit(MCRegister Reg, Register RegUnit) const {
- for (MCRegUnitIterator Units(Reg, this); Units.isValid(); ++Units)
- if (Register(*Units) == RegUnit)
+ for (MCRegUnit Unit : regunits(Reg))
+ if (Register(Unit) == RegUnit)
return true;
return false;
}
@@ -557,6 +557,12 @@ public:
return false;
}
+ /// Returns true if the register is considered uniform.
+ virtual bool isUniformReg(const MachineRegisterInfo &MRI,
+ const RegisterBankInfo &RBI, Register Reg) const {
+ return false;
+ }
+
/// Physical registers that may be modified within a function but are
/// guaranteed to be restored before any uses. This is useful for targets that
/// have call sequences where a GOT register may be updated by the caller
@@ -1255,7 +1261,7 @@ class BitMaskClassIterator {
// Otherwise look for the first bit set from the right
// (representation of the class ID is big endian).
// See getSubClassMask for more details on the representation.
- unsigned Offset = countTrailingZeros(CurrentChunk);
+ unsigned Offset = llvm::countr_zero(CurrentChunk);
// Add the Offset to the adjusted base number of this chunk: Idx.
// This is the ID of the register class.
ID = Idx + Offset;
diff --git a/llvm/include/llvm/CodeGen/TargetSchedule.h b/llvm/include/llvm/CodeGen/TargetSchedule.h
index 049ede89ab46..bfab9cb92a38 100644
--- a/llvm/include/llvm/CodeGen/TargetSchedule.h
+++ b/llvm/include/llvm/CodeGen/TargetSchedule.h
@@ -90,7 +90,7 @@ public:
bool hasInstrSchedModelOrItineraries() const {
return hasInstrSchedModel() || hasInstrItineraries();
}
-
+ bool enableIntervals() const { return SchedModel.EnableIntervals; }
/// Identify the processor corresponding to the current subtarget.
unsigned getProcessorID() const { return SchedModel.getProcessorID(); }
diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index 1312ae602162..9b98ef353d73 100644
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -318,6 +318,11 @@ public:
classifyGlobalFunctionReference(const GlobalValue *GV) const {
return 0;
}
+
+ /// Enable spillage copy elimination in MachineCopyPropagation pass. This
+ /// helps removing redundant copies generated by register allocator when
+ /// handling complex eviction chains.
+ virtual bool enableSpillageCopyElimination() const { return false; }
};
} // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/TileShapeInfo.h b/llvm/include/llvm/CodeGen/TileShapeInfo.h
index 1b5f902139fb..48c2d9ae70df 100644
--- a/llvm/include/llvm/CodeGen/TileShapeInfo.h
+++ b/llvm/include/llvm/CodeGen/TileShapeInfo.h
@@ -87,8 +87,8 @@ private:
static constexpr int64_t InvalidImmShape = -1;
MachineOperand *Row;
MachineOperand *Col;
- int64_t RowImm;
- int64_t ColImm;
+ int64_t RowImm = -1;
+ int64_t ColImm = -1;
};
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/VLIWMachineScheduler.h b/llvm/include/llvm/CodeGen/VLIWMachineScheduler.h
index a39f04f6db6c..bd12baa6afab 100644
--- a/llvm/include/llvm/CodeGen/VLIWMachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/VLIWMachineScheduler.h
@@ -48,7 +48,8 @@ protected:
public:
VLIWResourceModel(const TargetSubtargetInfo &STI, const TargetSchedModel *SM);
-
+ VLIWResourceModel &operator=(const VLIWResourceModel &other) = delete;
+ VLIWResourceModel(const VLIWResourceModel &other) = delete;
virtual ~VLIWResourceModel();
virtual void reset();
@@ -151,6 +152,8 @@ protected:
Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name + ".P") {}
~VLIWSchedBoundary();
+ VLIWSchedBoundary &operator=(const VLIWSchedBoundary &other) = delete;
+ VLIWSchedBoundary(const VLIWSchedBoundary &other) = delete;
void init(VLIWMachineScheduler *dag, const TargetSchedModel *smodel) {
DAG = dag;
@@ -261,8 +264,6 @@ protected:
#endif
};
-ScheduleDAGMILive *createVLIWSched(MachineSchedContext *C);
-
} // end namespace llvm
#endif // LLVM_CODEGEN_VLIWMACHINESCHEDULER_H
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h
index af4c8ab40e82..c4cd332d101c 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.h
+++ b/llvm/include/llvm/CodeGen/ValueTypes.h
@@ -15,8 +15,8 @@
#ifndef LLVM_CODEGEN_VALUETYPES_H
#define LLVM_CODEGEN_VALUETYPES_H
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/Support/Compiler.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TypeSize.h"
#include <cassert>
@@ -110,7 +110,7 @@ namespace llvm {
/// Return the type converted to an equivalently sized integer or vector
/// with integer element type. Similar to changeVectorElementTypeToInteger,
/// but also handles scalars.
- EVT changeTypeToInteger() {
+ EVT changeTypeToInteger() const {
if (isVector())
return changeVectorElementTypeToInteger();
@@ -122,7 +122,7 @@ namespace llvm {
/// Test if the given EVT has zero size, this will fail if called on a
/// scalable type
bool isZeroSized() const {
- return !isScalableVector() && getSizeInBits() == 0;
+ return getSizeInBits().isZero();
}
/// Test if the given EVT is simple (as opposed to being extended).
@@ -150,6 +150,12 @@ namespace llvm {
return isSimple() ? V.isScalarInteger() : isExtendedScalarInteger();
}
+ /// Return true if this is a vector type where the runtime
+ /// length is machine dependent
+ bool isScalableTargetExtVT() const {
+ return isSimple() && V.isScalableTargetExtVT();
+ }
+
/// Return true if this is a vector value type.
bool isVector() const {
return isSimple() ? V.isVector() : isExtendedVector();
@@ -166,6 +172,11 @@ namespace llvm {
: isExtendedFixedLengthVector();
}
+ /// Return true if the type is a scalable type.
+ bool isScalableVT() const {
+ return isScalableVector() || isScalableTargetExtVT();
+ }
+
/// Return true if this is a 16-bit vector type.
bool is16BitVector() const {
return isSimple() ? V.is16BitVector() : isExtended16BitVector();
@@ -456,6 +467,14 @@ namespace llvm {
/// This function returns value type as a string, e.g. "i32".
std::string getEVTString() const;
+ /// Support for debugging, callable in GDB: VT.dump()
+ void dump() const;
+
+ /// Implement operator<<.
+ void print(raw_ostream &OS) const {
+ OS << getEVTString();
+ }
+
/// This method returns an LLVM type corresponding to the specified EVT.
/// For integer types, this returns an unsigned type. Note that this will
/// abort for types that cannot be represented.
@@ -516,6 +535,10 @@ namespace llvm {
TypeSize getExtendedSizeInBits() const LLVM_READONLY;
};
+ inline raw_ostream &operator<<(raw_ostream &OS, const EVT &V) {
+ V.print(OS);
+ return OS;
+ }
} // end namespace llvm
#endif // LLVM_CODEGEN_VALUETYPES_H
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index c22553855c55..25f0d385259d 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -14,250 +14,302 @@
class ValueType<int size, int value> {
string Namespace = "MVT";
+ string LLVMName = NAME;
int Size = size;
int Value = value;
+ int nElem = 1;
+ ValueType ElementType = ?;
+ int isOverloaded = false;
+ int isInteger = false;
+ int isFP = false;
+ int isVector = false;
+ int isScalable = false;
}
-def OtherVT : ValueType<0, 1>; // "Other" value
-def i1 : ValueType<1, 2>; // One bit boolean value
-def i2 : ValueType<2, 3>; // 2-bit integer value
-def i4 : ValueType<4, 4>; // 4-bit integer value
-def i8 : ValueType<8, 5>; // 8-bit integer value
-def i16 : ValueType<16, 6>; // 16-bit integer value
-def i32 : ValueType<32, 7>; // 32-bit integer value
-def i64 : ValueType<64, 8>; // 64-bit integer value
-def i128 : ValueType<128, 9>; // 128-bit integer value
-
-def bf16 : ValueType<16, 10>; // 16-bit brain floating point value
-def f16 : ValueType<16, 11>; // 16-bit floating point value
-def f32 : ValueType<32, 12>; // 32-bit floating point value
-def f64 : ValueType<64, 13>; // 64-bit floating point value
-def f80 : ValueType<80, 14>; // 80-bit floating point value
-def f128 : ValueType<128, 15>; // 128-bit floating point value
-def ppcf128 : ValueType<128, 16>; // PPC 128-bit floating point value
-
-def v1i1 : ValueType<1, 17>; // 1 x i1 vector value
-def v2i1 : ValueType<2, 18>; // 2 x i1 vector value
-def v4i1 : ValueType<4, 19>; // 4 x i1 vector value
-def v8i1 : ValueType<8, 20>; // 8 x i1 vector value
-def v16i1 : ValueType<16, 21>; // 16 x i1 vector value
-def v32i1 : ValueType<32, 22>; // 32 x i1 vector value
-def v64i1 : ValueType<64, 23>; // 64 x i1 vector value
-def v128i1 : ValueType<128, 24>; // 128 x i1 vector value
-def v256i1 : ValueType<256, 25>; // 256 x i1 vector value
-def v512i1 : ValueType<512, 26>; // 512 x i1 vector value
-def v1024i1 : ValueType<1024, 27>; // 1024 x i1 vector value
-def v2048i1 : ValueType<2048, 28>; // 2048 x i1 vector value
-
-def v128i2 : ValueType<256, 29>; // 128 x i2 vector value
-def v256i2 : ValueType<512, 30>; // 256 x i2 vector value
-
-def v64i4 : ValueType<256, 31>; // 64 x i4 vector value
-def v128i4 : ValueType<512, 32>; // 128 x i4 vector value
-
-def v1i8 : ValueType<8, 33>; // 1 x i8 vector value
-def v2i8 : ValueType<16, 34>; // 2 x i8 vector value
-def v4i8 : ValueType<32, 35>; // 4 x i8 vector value
-def v8i8 : ValueType<64, 36>; // 8 x i8 vector value
-def v16i8 : ValueType<128, 37>; // 16 x i8 vector value
-def v32i8 : ValueType<256, 38>; // 32 x i8 vector value
-def v64i8 : ValueType<512, 39>; // 64 x i8 vector value
-def v128i8 : ValueType<1024, 40>; // 128 x i8 vector value
-def v256i8 : ValueType<2048, 41>; // 256 x i8 vector value
-def v512i8 : ValueType<4096, 42>; // 512 x i8 vector value
-def v1024i8 : ValueType<8192, 43>; // 1024 x i8 vector value
-
-def v1i16 : ValueType<16, 44>; // 1 x i16 vector value
-def v2i16 : ValueType<32, 45>; // 2 x i16 vector value
-def v3i16 : ValueType<48, 46>; // 3 x i16 vector value
-def v4i16 : ValueType<64, 47>; // 4 x i16 vector value
-def v8i16 : ValueType<128, 48>; // 8 x i16 vector value
-def v16i16 : ValueType<256, 49>; // 16 x i16 vector value
-def v32i16 : ValueType<512, 50>; // 32 x i16 vector value
-def v64i16 : ValueType<1024, 51>; // 64 x i16 vector value
-def v128i16 : ValueType<2048, 52>; // 128 x i16 vector value
-def v256i16 : ValueType<4096, 53>; // 256 x i16 vector value
-def v512i16 : ValueType<8192, 54>; // 512 x i16 vector value
-
-def v1i32 : ValueType<32, 55>; // 1 x i32 vector value
-def v2i32 : ValueType<64, 56>; // 2 x i32 vector value
-def v3i32 : ValueType<96, 57>; // 3 x i32 vector value
-def v4i32 : ValueType<128, 58>; // 4 x i32 vector value
-def v5i32 : ValueType<160, 59>; // 5 x i32 vector value
-def v6i32 : ValueType<192, 60>; // 6 x f32 vector value
-def v7i32 : ValueType<224, 61>; // 7 x f32 vector value
-def v8i32 : ValueType<256, 62>; // 8 x i32 vector value
-def v9i32 : ValueType<288, 63>; // 9 x i32 vector value
-def v10i32 : ValueType<320, 64>; // 10 x i32 vector value
-def v11i32 : ValueType<352, 65>; // 11 x i32 vector value
-def v12i32 : ValueType<384, 66>; // 12 x i32 vector value
-def v16i32 : ValueType<512, 67>; // 16 x i32 vector value
-def v32i32 : ValueType<1024, 68>; // 32 x i32 vector value
-def v64i32 : ValueType<2048, 69>; // 64 x i32 vector value
-def v128i32 : ValueType<4096, 70>; // 128 x i32 vector value
-def v256i32 : ValueType<8192, 71>; // 256 x i32 vector value
-def v512i32 : ValueType<16384, 72>; // 512 x i32 vector value
-def v1024i32 : ValueType<32768, 73>; // 1024 x i32 vector value
-def v2048i32 : ValueType<65536, 74>; // 2048 x i32 vector value
-
-def v1i64 : ValueType<64, 75>; // 1 x i64 vector value
-def v2i64 : ValueType<128, 76>; // 2 x i64 vector value
-def v3i64 : ValueType<192, 77>; // 3 x i64 vector value
-def v4i64 : ValueType<256, 78>; // 4 x i64 vector value
-def v8i64 : ValueType<512, 79>; // 8 x i64 vector value
-def v16i64 : ValueType<1024, 80>; // 16 x i64 vector value
-def v32i64 : ValueType<2048, 81>; // 32 x i64 vector value
-def v64i64 : ValueType<4096, 82>; // 64 x i64 vector value
-def v128i64 : ValueType<8192, 83>; // 128 x i64 vector value
-def v256i64 : ValueType<16384, 84>; // 256 x i64 vector value
-
-def v1i128 : ValueType<128, 85>; // 1 x i128 vector value
-
-def v1f16 : ValueType<16, 86>; // 1 x f16 vector value
-def v2f16 : ValueType<32, 87>; // 2 x f16 vector value
-def v3f16 : ValueType<48, 88>; // 3 x f16 vector value
-def v4f16 : ValueType<64, 89>; // 4 x f16 vector value
-def v8f16 : ValueType<128, 90>; // 8 x f16 vector value
-def v16f16 : ValueType<256, 91>; // 16 x f16 vector value
-def v32f16 : ValueType<512, 92>; // 32 x f16 vector value
-def v64f16 : ValueType<1024, 93>; // 64 x f16 vector value
-def v128f16 : ValueType<2048, 94>; // 128 x f16 vector value
-def v256f16 : ValueType<4096, 95>; // 256 x f16 vector value
-def v512f16 : ValueType<8192, 96>; // 512 x f16 vector value
-
-def v2bf16 : ValueType<32, 97>; // 2 x bf16 vector value
-def v3bf16 : ValueType<48, 98>; // 3 x bf16 vector value
-def v4bf16 : ValueType<64, 99>; // 4 x bf16 vector value
-def v8bf16 : ValueType<128, 100>; // 8 x bf16 vector value
-def v16bf16 : ValueType<256, 101>; // 16 x bf16 vector value
-def v32bf16 : ValueType<512, 102>; // 32 x bf16 vector value
-def v64bf16 : ValueType<1024, 103>; // 64 x bf16 vector value
-def v128bf16 : ValueType<2048, 104>; // 128 x bf16 vector value
-
-def v1f32 : ValueType<32, 105>; // 1 x f32 vector value
-def v2f32 : ValueType<64, 106>; // 2 x f32 vector value
-def v3f32 : ValueType<96, 107>; // 3 x f32 vector value
-def v4f32 : ValueType<128, 108>; // 4 x f32 vector value
-def v5f32 : ValueType<160, 109>; // 5 x f32 vector value
-def v6f32 : ValueType<192, 110>; // 6 x f32 vector value
-def v7f32 : ValueType<224, 111>; // 7 x f32 vector value
-def v8f32 : ValueType<256, 112>; // 8 x f32 vector value
-def v9f32 : ValueType<288, 113>; // 9 x f32 vector value
-def v10f32 : ValueType<320, 114>; // 10 x f32 vector value
-def v11f32 : ValueType<352, 115>; // 11 x f32 vector value
-def v12f32 : ValueType<384, 116>; // 12 x f32 vector value
-def v16f32 : ValueType<512, 117>; // 16 x f32 vector value
-def v32f32 : ValueType<1024, 118>; // 32 x f32 vector value
-def v64f32 : ValueType<2048, 119>; // 64 x f32 vector value
-def v128f32 : ValueType<4096, 120>; // 128 x f32 vector value
-def v256f32 : ValueType<8192, 121>; // 256 x f32 vector value
-def v512f32 : ValueType<16384, 122>; // 512 x f32 vector value
-def v1024f32 : ValueType<32768, 123>; // 1024 x f32 vector value
-def v2048f32 : ValueType<65536, 124>; // 2048 x f32 vector value
-
-def v1f64 : ValueType<64, 125>; // 1 x f64 vector value
-def v2f64 : ValueType<128, 126>; // 2 x f64 vector value
-def v3f64 : ValueType<192, 127>; // 3 x f64 vector value
-def v4f64 : ValueType<256, 128>; // 4 x f64 vector value
-def v8f64 : ValueType<512, 129>; // 8 x f64 vector value
-def v16f64 : ValueType<1024, 130>; // 16 x f64 vector value
-def v32f64 : ValueType<2048, 131>; // 32 x f64 vector value
-def v64f64 : ValueType<4096, 132>; // 64 x f64 vector value
-def v128f64 : ValueType<8192, 133>; // 128 x f64 vector value
-def v256f64 : ValueType<16384, 134>; // 256 x f64 vector value
-
-def nxv1i1 : ValueType<1, 135>; // n x 1 x i1 vector value
-def nxv2i1 : ValueType<2, 136>; // n x 2 x i1 vector value
-def nxv4i1 : ValueType<4, 137>; // n x 4 x i1 vector value
-def nxv8i1 : ValueType<8, 138>; // n x 8 x i1 vector value
-def nxv16i1 : ValueType<16, 139>; // n x 16 x i1 vector value
-def nxv32i1 : ValueType<32, 140>; // n x 32 x i1 vector value
-def nxv64i1 : ValueType<64, 141>; // n x 64 x i1 vector value
-
-def nxv1i8 : ValueType<8, 142>; // n x 1 x i8 vector value
-def nxv2i8 : ValueType<16, 143>; // n x 2 x i8 vector value
-def nxv4i8 : ValueType<32, 144>; // n x 4 x i8 vector value
-def nxv8i8 : ValueType<64, 145>; // n x 8 x i8 vector value
-def nxv16i8 : ValueType<128, 146>; // n x 16 x i8 vector value
-def nxv32i8 : ValueType<256, 147>; // n x 32 x i8 vector value
-def nxv64i8 : ValueType<512, 148>; // n x 64 x i8 vector value
-
-def nxv1i16 : ValueType<16, 149>; // n x 1 x i16 vector value
-def nxv2i16 : ValueType<32, 150>; // n x 2 x i16 vector value
-def nxv4i16 : ValueType<64, 151>; // n x 4 x i16 vector value
-def nxv8i16 : ValueType<128, 152>; // n x 8 x i16 vector value
-def nxv16i16 : ValueType<256, 153>; // n x 16 x i16 vector value
-def nxv32i16 : ValueType<512, 154>; // n x 32 x i16 vector value
-
-def nxv1i32 : ValueType<32, 155>; // n x 1 x i32 vector value
-def nxv2i32 : ValueType<64, 156>; // n x 2 x i32 vector value
-def nxv4i32 : ValueType<128, 157>; // n x 4 x i32 vector value
-def nxv8i32 : ValueType<256, 158>; // n x 8 x i32 vector value
-def nxv16i32 : ValueType<512, 159>; // n x 16 x i32 vector value
-def nxv32i32 : ValueType<1024, 160>; // n x 32 x i32 vector value
-
-def nxv1i64 : ValueType<64, 161>; // n x 1 x i64 vector value
-def nxv2i64 : ValueType<128, 162>; // n x 2 x i64 vector value
-def nxv4i64 : ValueType<256, 163>; // n x 4 x i64 vector value
-def nxv8i64 : ValueType<512, 164>; // n x 8 x i64 vector value
-def nxv16i64 : ValueType<1024, 165>; // n x 16 x i64 vector value
-def nxv32i64 : ValueType<2048, 166>; // n x 32 x i64 vector value
-
-def nxv1f16 : ValueType<16, 167>; // n x 1 x f16 vector value
-def nxv2f16 : ValueType<32, 168>; // n x 2 x f16 vector value
-def nxv4f16 : ValueType<64, 169>; // n x 4 x f16 vector value
-def nxv8f16 : ValueType<128, 170>; // n x 8 x f16 vector value
-def nxv16f16 : ValueType<256, 171>; // n x 16 x f16 vector value
-def nxv32f16 : ValueType<512, 172>; // n x 32 x f16 vector value
-
-def nxv1bf16 : ValueType<16, 173>; // n x 1 x bf16 vector value
-def nxv2bf16 : ValueType<32, 174>; // n x 2 x bf16 vector value
-def nxv4bf16 : ValueType<64, 175>; // n x 4 x bf16 vector value
-def nxv8bf16 : ValueType<128, 176>; // n x 8 x bf16 vector value
-def nxv16bf16 : ValueType<256, 177>; // n x 16 x bf16 vector value
-def nxv32bf16 : ValueType<512, 178>; // n x 32 x bf16 vector value
-
-def nxv1f32 : ValueType<32, 179>; // n x 1 x f32 vector value
-def nxv2f32 : ValueType<64, 180>; // n x 2 x f32 vector value
-def nxv4f32 : ValueType<128, 181>; // n x 4 x f32 vector value
-def nxv8f32 : ValueType<256, 182>; // n x 8 x f32 vector value
-def nxv16f32 : ValueType<512, 183>; // n x 16 x f32 vector value
-
-def nxv1f64 : ValueType<64, 184>; // n x 1 x f64 vector value
-def nxv2f64 : ValueType<128, 185>; // n x 2 x f64 vector value
-def nxv4f64 : ValueType<256, 186>; // n x 4 x f64 vector value
-def nxv8f64 : ValueType<512, 187>; // n x 8 x f64 vector value
+class VTAny<int value> : ValueType<0, value> {
+ let isOverloaded = true;
+}
+
+class VTInt<int size, int value>
+ : ValueType<size, value> {
+ let isInteger = true;
+}
+
+class VTFP<int size, int value>
+ : ValueType<size, value> {
+ let isFP = true;
+}
+
+class VTVec<int nelem, ValueType elt, int value>
+ : ValueType<!mul(nelem, elt.Size), value> {
+ let nElem = nelem;
+ let ElementType = elt;
+ let isInteger = elt.isInteger;
+ let isFP = elt.isFP;
+ let isVector = true;
+}
+
+class VTScalableVec<int nelem, ValueType elt, int value>
+ : VTVec<nelem, elt, value> {
+ let isScalable = true;
+}
+
+defset list<ValueType> ValueTypes = {
+
+def OtherVT : ValueType<0, 1> { // "Other" value
+ let LLVMName = "Other";
+}
+
+def i1 : VTInt<1, 2>; // One bit boolean value
+def i2 : VTInt<2, 3>; // 2-bit integer value
+def i4 : VTInt<4, 4>; // 4-bit integer value
+def i8 : VTInt<8, 5>; // 8-bit integer value
+def i16 : VTInt<16, 6>; // 16-bit integer value
+def i32 : VTInt<32, 7>; // 32-bit integer value
+def i64 : VTInt<64, 8>; // 64-bit integer value
+def i128 : VTInt<128, 9>; // 128-bit integer value
+
+def bf16 : VTFP<16, 10>; // 16-bit brain floating point value
+def f16 : VTFP<16, 11>; // 16-bit floating point value
+def f32 : VTFP<32, 12>; // 32-bit floating point value
+def f64 : VTFP<64, 13>; // 64-bit floating point value
+def f80 : VTFP<80, 14>; // 80-bit floating point value
+def f128 : VTFP<128, 15>; // 128-bit floating point value
+def ppcf128 : VTFP<128, 16>; // PPC 128-bit floating point value
+
+def v1i1 : VTVec<1, i1, 17>; // 1 x i1 vector value
+def v2i1 : VTVec<2, i1, 18>; // 2 x i1 vector value
+def v4i1 : VTVec<4, i1, 19>; // 4 x i1 vector value
+def v8i1 : VTVec<8, i1, 20>; // 8 x i1 vector value
+def v16i1 : VTVec<16, i1, 21>; // 16 x i1 vector value
+def v32i1 : VTVec<32, i1, 22>; // 32 x i1 vector value
+def v64i1 : VTVec<64, i1, 23>; // 64 x i1 vector value
+def v128i1 : VTVec<128, i1, 24>; // 128 x i1 vector value
+def v256i1 : VTVec<256, i1, 25>; // 256 x i1 vector value
+def v512i1 : VTVec<512, i1, 26>; // 512 x i1 vector value
+def v1024i1 : VTVec<1024, i1, 27>; // 1024 x i1 vector value
+def v2048i1 : VTVec<2048, i1, 28>; // 2048 x i1 vector value
+
+def v128i2 : VTVec<128, i2, 29>; // 128 x i2 vector value
+def v256i2 : VTVec<256, i2, 30>; // 256 x i2 vector value
+
+def v64i4 : VTVec<64, i4, 31>; // 64 x i4 vector value
+def v128i4 : VTVec<128, i4, 32>; // 128 x i4 vector value
+
+def v1i8 : VTVec<1, i8, 33>; // 1 x i8 vector value
+def v2i8 : VTVec<2, i8, 34>; // 2 x i8 vector value
+def v4i8 : VTVec<4, i8, 35>; // 4 x i8 vector value
+def v8i8 : VTVec<8, i8, 36>; // 8 x i8 vector value
+def v16i8 : VTVec<16, i8, 37>; // 16 x i8 vector value
+def v32i8 : VTVec<32, i8, 38>; // 32 x i8 vector value
+def v64i8 : VTVec<64, i8, 39>; // 64 x i8 vector value
+def v128i8 : VTVec<128, i8, 40>; // 128 x i8 vector value
+def v256i8 : VTVec<256, i8, 41>; // 256 x i8 vector value
+def v512i8 : VTVec<512, i8, 42>; // 512 x i8 vector value
+def v1024i8 : VTVec<1024, i8, 43>; // 1024 x i8 vector value
+
+def v1i16 : VTVec<1, i16, 44>; // 1 x i16 vector value
+def v2i16 : VTVec<2, i16, 45>; // 2 x i16 vector value
+def v3i16 : VTVec<3, i16, 46>; // 3 x i16 vector value
+def v4i16 : VTVec<4, i16, 47>; // 4 x i16 vector value
+def v8i16 : VTVec<8, i16, 48>; // 8 x i16 vector value
+def v16i16 : VTVec<16, i16, 49>; // 16 x i16 vector value
+def v32i16 : VTVec<32, i16, 50>; // 32 x i16 vector value
+def v64i16 : VTVec<64, i16, 51>; // 64 x i16 vector value
+def v128i16 : VTVec<128, i16, 52>; // 128 x i16 vector value
+def v256i16 : VTVec<256, i16, 53>; // 256 x i16 vector value
+def v512i16 : VTVec<512, i16, 54>; // 512 x i16 vector value
+
+def v1i32 : VTVec<1, i32, 55>; // 1 x i32 vector value
+def v2i32 : VTVec<2, i32, 56>; // 2 x i32 vector value
+def v3i32 : VTVec<3, i32, 57>; // 3 x i32 vector value
+def v4i32 : VTVec<4, i32, 58>; // 4 x i32 vector value
+def v5i32 : VTVec<5, i32, 59>; // 5 x i32 vector value
+def v6i32 : VTVec<6, i32, 60>; // 6 x f32 vector value
+def v7i32 : VTVec<7, i32, 61>; // 7 x f32 vector value
+def v8i32 : VTVec<8, i32, 62>; // 8 x i32 vector value
+def v9i32 : VTVec<9, i32, 63>; // 9 x i32 vector value
+def v10i32 : VTVec<10, i32, 64>; // 10 x i32 vector value
+def v11i32 : VTVec<11, i32, 65>; // 11 x i32 vector value
+def v12i32 : VTVec<12, i32, 66>; // 12 x i32 vector value
+def v16i32 : VTVec<16, i32, 67>; // 16 x i32 vector value
+def v32i32 : VTVec<32, i32, 68>; // 32 x i32 vector value
+def v64i32 : VTVec<64, i32, 69>; // 64 x i32 vector value
+def v128i32 : VTVec<128, i32, 70>; // 128 x i32 vector value
+def v256i32 : VTVec<256, i32, 71>; // 256 x i32 vector value
+def v512i32 : VTVec<512, i32, 72>; // 512 x i32 vector value
+def v1024i32 : VTVec<1024, i32, 73>; // 1024 x i32 vector value
+def v2048i32 : VTVec<2048, i32, 74>; // 2048 x i32 vector value
+
+def v1i64 : VTVec<1, i64, 75>; // 1 x i64 vector value
+def v2i64 : VTVec<2, i64, 76>; // 2 x i64 vector value
+def v3i64 : VTVec<3, i64, 77>; // 3 x i64 vector value
+def v4i64 : VTVec<4, i64, 78>; // 4 x i64 vector value
+def v8i64 : VTVec<8, i64, 79>; // 8 x i64 vector value
+def v16i64 : VTVec<16, i64, 80>; // 16 x i64 vector value
+def v32i64 : VTVec<32, i64, 81>; // 32 x i64 vector value
+def v64i64 : VTVec<64, i64, 82>; // 64 x i64 vector value
+def v128i64 : VTVec<128, i64, 83>; // 128 x i64 vector value
+def v256i64 : VTVec<256, i64, 84>; // 256 x i64 vector value
+
+def v1i128 : VTVec<1, i128, 85>; // 1 x i128 vector value
+
+def v1f16 : VTVec<1, f16, 86>; // 1 x f16 vector value
+def v2f16 : VTVec<2, f16, 87>; // 2 x f16 vector value
+def v3f16 : VTVec<3, f16, 88>; // 3 x f16 vector value
+def v4f16 : VTVec<4, f16, 89>; // 4 x f16 vector value
+def v8f16 : VTVec<8, f16, 90>; // 8 x f16 vector value
+def v16f16 : VTVec<16, f16, 91>; // 16 x f16 vector value
+def v32f16 : VTVec<32, f16, 92>; // 32 x f16 vector value
+def v64f16 : VTVec<64, f16, 93>; // 64 x f16 vector value
+def v128f16 : VTVec<128, f16, 94>; // 128 x f16 vector value
+def v256f16 : VTVec<256, f16, 95>; // 256 x f16 vector value
+def v512f16 : VTVec<512, f16, 96>; // 512 x f16 vector value
+
+def v2bf16 : VTVec<2, bf16, 97>; // 2 x bf16 vector value
+def v3bf16 : VTVec<3, bf16, 98>; // 3 x bf16 vector value
+def v4bf16 : VTVec<4, bf16, 99>; // 4 x bf16 vector value
+def v8bf16 : VTVec<8, bf16, 100>; // 8 x bf16 vector value
+def v16bf16 : VTVec<16, bf16, 101>; // 16 x bf16 vector value
+def v32bf16 : VTVec<32, bf16, 102>; // 32 x bf16 vector value
+def v64bf16 : VTVec<64, bf16, 103>; // 64 x bf16 vector value
+def v128bf16 : VTVec<128, bf16, 104>; // 128 x bf16 vector value
+
+def v1f32 : VTVec<1, f32, 105>; // 1 x f32 vector value
+def v2f32 : VTVec<2, f32, 106>; // 2 x f32 vector value
+def v3f32 : VTVec<3, f32, 107>; // 3 x f32 vector value
+def v4f32 : VTVec<4, f32, 108>; // 4 x f32 vector value
+def v5f32 : VTVec<5, f32, 109>; // 5 x f32 vector value
+def v6f32 : VTVec<6, f32, 110>; // 6 x f32 vector value
+def v7f32 : VTVec<7, f32, 111>; // 7 x f32 vector value
+def v8f32 : VTVec<8, f32, 112>; // 8 x f32 vector value
+def v9f32 : VTVec<9, f32, 113>; // 9 x f32 vector value
+def v10f32 : VTVec<10, f32, 114>; // 10 x f32 vector value
+def v11f32 : VTVec<11, f32, 115>; // 11 x f32 vector value
+def v12f32 : VTVec<12, f32, 116>; // 12 x f32 vector value
+def v16f32 : VTVec<16, f32, 117>; // 16 x f32 vector value
+def v32f32 : VTVec<32, f32, 118>; // 32 x f32 vector value
+def v64f32 : VTVec<64, f32, 119>; // 64 x f32 vector value
+def v128f32 : VTVec<128, f32, 120>; // 128 x f32 vector value
+def v256f32 : VTVec<256, f32, 121>; // 256 x f32 vector value
+def v512f32 : VTVec<512, f32, 122>; // 512 x f32 vector value
+def v1024f32 : VTVec<1024, f32, 123>; // 1024 x f32 vector value
+def v2048f32 : VTVec<2048, f32, 124>; // 2048 x f32 vector value
+
+def v1f64 : VTVec<1, f64, 125>; // 1 x f64 vector value
+def v2f64 : VTVec<2, f64, 126>; // 2 x f64 vector value
+def v3f64 : VTVec<3, f64, 127>; // 3 x f64 vector value
+def v4f64 : VTVec<4, f64, 128>; // 4 x f64 vector value
+def v8f64 : VTVec<8, f64, 129>; // 8 x f64 vector value
+def v16f64 : VTVec<16, f64, 130>; // 16 x f64 vector value
+def v32f64 : VTVec<32, f64, 131>; // 32 x f64 vector value
+def v64f64 : VTVec<64, f64, 132>; // 64 x f64 vector value
+def v128f64 : VTVec<128, f64, 133>; // 128 x f64 vector value
+def v256f64 : VTVec<256, f64, 134>; // 256 x f64 vector value
+
+def nxv1i1 : VTScalableVec<1, i1, 135>; // n x 1 x i1 vector value
+def nxv2i1 : VTScalableVec<2, i1, 136>; // n x 2 x i1 vector value
+def nxv4i1 : VTScalableVec<4, i1, 137>; // n x 4 x i1 vector value
+def nxv8i1 : VTScalableVec<8, i1, 138>; // n x 8 x i1 vector value
+def nxv16i1 : VTScalableVec<16, i1, 139>; // n x 16 x i1 vector value
+def nxv32i1 : VTScalableVec<32, i1, 140>; // n x 32 x i1 vector value
+def nxv64i1 : VTScalableVec<64, i1, 141>; // n x 64 x i1 vector value
+
+def nxv1i8 : VTScalableVec<1, i8, 142>; // n x 1 x i8 vector value
+def nxv2i8 : VTScalableVec<2, i8, 143>; // n x 2 x i8 vector value
+def nxv4i8 : VTScalableVec<4, i8, 144>; // n x 4 x i8 vector value
+def nxv8i8 : VTScalableVec<8, i8, 145>; // n x 8 x i8 vector value
+def nxv16i8 : VTScalableVec<16, i8, 146>; // n x 16 x i8 vector value
+def nxv32i8 : VTScalableVec<32, i8, 147>; // n x 32 x i8 vector value
+def nxv64i8 : VTScalableVec<64, i8, 148>; // n x 64 x i8 vector value
+
+def nxv1i16 : VTScalableVec<1, i16, 149>; // n x 1 x i16 vector value
+def nxv2i16 : VTScalableVec<2, i16, 150>; // n x 2 x i16 vector value
+def nxv4i16 : VTScalableVec<4, i16, 151>; // n x 4 x i16 vector value
+def nxv8i16 : VTScalableVec<8, i16, 152>; // n x 8 x i16 vector value
+def nxv16i16 : VTScalableVec<16, i16, 153>; // n x 16 x i16 vector value
+def nxv32i16 : VTScalableVec<32, i16, 154>; // n x 32 x i16 vector value
+
+def nxv1i32 : VTScalableVec<1, i32, 155>; // n x 1 x i32 vector value
+def nxv2i32 : VTScalableVec<2, i32, 156>; // n x 2 x i32 vector value
+def nxv4i32 : VTScalableVec<4, i32, 157>; // n x 4 x i32 vector value
+def nxv8i32 : VTScalableVec<8, i32, 158>; // n x 8 x i32 vector value
+def nxv16i32 : VTScalableVec<16, i32, 159>; // n x 16 x i32 vector value
+def nxv32i32 : VTScalableVec<32, i32, 160>; // n x 32 x i32 vector value
+
+def nxv1i64 : VTScalableVec<1, i64, 161>; // n x 1 x i64 vector value
+def nxv2i64 : VTScalableVec<2, i64, 162>; // n x 2 x i64 vector value
+def nxv4i64 : VTScalableVec<4, i64, 163>; // n x 4 x i64 vector value
+def nxv8i64 : VTScalableVec<8, i64, 164>; // n x 8 x i64 vector value
+def nxv16i64 : VTScalableVec<16, i64, 165>; // n x 16 x i64 vector value
+def nxv32i64 : VTScalableVec<32, i64, 166>; // n x 32 x i64 vector value
+
+def nxv1f16 : VTScalableVec<1, f16, 167>; // n x 1 x f16 vector value
+def nxv2f16 : VTScalableVec<2, f16, 168>; // n x 2 x f16 vector value
+def nxv4f16 : VTScalableVec<4, f16, 169>; // n x 4 x f16 vector value
+def nxv8f16 : VTScalableVec<8, f16, 170>; // n x 8 x f16 vector value
+def nxv16f16 : VTScalableVec<16, f16, 171>; // n x 16 x f16 vector value
+def nxv32f16 : VTScalableVec<32, f16, 172>; // n x 32 x f16 vector value
+
+def nxv1bf16 : VTScalableVec<1, bf16, 173>; // n x 1 x bf16 vector value
+def nxv2bf16 : VTScalableVec<2, bf16, 174>; // n x 2 x bf16 vector value
+def nxv4bf16 : VTScalableVec<4, bf16, 175>; // n x 4 x bf16 vector value
+def nxv8bf16 : VTScalableVec<8, bf16, 176>; // n x 8 x bf16 vector value
+def nxv16bf16 : VTScalableVec<16, bf16, 177>; // n x 16 x bf16 vector value
+def nxv32bf16 : VTScalableVec<32, bf16, 178>; // n x 32 x bf16 vector value
+
+def nxv1f32 : VTScalableVec<1, f32, 179>; // n x 1 x f32 vector value
+def nxv2f32 : VTScalableVec<2, f32, 180>; // n x 2 x f32 vector value
+def nxv4f32 : VTScalableVec<4, f32, 181>; // n x 4 x f32 vector value
+def nxv8f32 : VTScalableVec<8, f32, 182>; // n x 8 x f32 vector value
+def nxv16f32 : VTScalableVec<16, f32, 183>; // n x 16 x f32 vector value
+
+def nxv1f64 : VTScalableVec<1, f64, 184>; // n x 1 x f64 vector value
+def nxv2f64 : VTScalableVec<2, f64, 185>; // n x 2 x f64 vector value
+def nxv4f64 : VTScalableVec<4, f64, 186>; // n x 4 x f64 vector value
+def nxv8f64 : VTScalableVec<8, f64, 187>; // n x 8 x f64 vector value
def x86mmx : ValueType<64, 188>; // X86 MMX value
-def FlagVT : ValueType<0, 189>; // Pre-RA sched glue
+def FlagVT : ValueType<0, 189> { // Pre-RA sched glue
+ let LLVMName = "Glue";
+}
def isVoid : ValueType<0, 190>; // Produces no value
-def untyped : ValueType<8, 191>; // Produces an untyped value
+def untyped : ValueType<8, 191> { // Produces an untyped value
+ let LLVMName = "Untyped";
+}
def funcref : ValueType<0, 192>; // WebAssembly's funcref type
def externref : ValueType<0, 193>; // WebAssembly's externref type
def x86amx : ValueType<8192, 194>; // X86 AMX value
def i64x8 : ValueType<512, 195>; // 8 Consecutive GPRs (AArch64)
+def aarch64svcount
+ : ValueType<16, 196>; // AArch64 predicate-as-counter
+def spirvbuiltin : ValueType<0, 197>; // SPIR-V's builtin type
def token : ValueType<0, 248>; // TokenTy
-def MetadataVT : ValueType<0, 249>; // Metadata
+def MetadataVT : ValueType<0, 249> { // Metadata
+ let LLVMName = "Metadata";
+}
// Pseudo valuetype mapped to the current pointer size to any address space.
// Should only be used in TableGen.
-def iPTRAny : ValueType<0, 250>;
+def iPTRAny : VTAny<250>;
// Pseudo valuetype to represent "vector of any size"
-def vAny : ValueType<0, 251>;
+def vAny : VTAny<251>;
// Pseudo valuetype to represent "float of any format"
-def fAny : ValueType<0, 252>;
+def fAny : VTAny<252>;
// Pseudo valuetype to represent "integer of any bit width"
-def iAny : ValueType<0, 253>;
+def iAny : VTAny<253>;
// Pseudo valuetype mapped to the current pointer size.
def iPTR : ValueType<0, 254>;
// Pseudo valuetype to represent "any type of any size".
-def Any : ValueType<0, 255>;
+def Any : VTAny<255>;
+
+} // end defset ValueTypes
/// This class is for targets that want to use pointer types in patterns
/// with the GlobalISelEmitter. Targets must define their own pointer
diff --git a/llvm/include/llvm/CodeGen/WasmAddressSpaces.h b/llvm/include/llvm/CodeGen/WasmAddressSpaces.h
new file mode 100644
index 000000000000..c47b05f88a0a
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/WasmAddressSpaces.h
@@ -0,0 +1,48 @@
+//===--- llvm/CodeGen/WasmAddressSpaces.h -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Address Spaces for WebAssembly Type Handling
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_WASM_ADDRESS_SPACES_H
+#define LLVM_CODEGEN_WASM_ADDRESS_SPACES_H
+
+namespace llvm {
+
+namespace WebAssembly {
+
+enum WasmAddressSpace : unsigned {
+ // Default address space, for pointers to linear memory (stack, heap, data).
+ WASM_ADDRESS_SPACE_DEFAULT = 0,
+ // A non-integral address space for pointers to named objects outside of
+ // linear memory: WebAssembly globals or WebAssembly locals. Loads and stores
+ // to these pointers are lowered to global.get / global.set or local.get /
+ // local.set, as appropriate.
+ WASM_ADDRESS_SPACE_VAR = 1,
+ // A non-integral address space for externref values
+ WASM_ADDRESS_SPACE_EXTERNREF = 10,
+ // A non-integral address space for funcref values
+ WASM_ADDRESS_SPACE_FUNCREF = 20,
+};
+
+inline bool isDefaultAddressSpace(unsigned AS) {
+ return AS == WASM_ADDRESS_SPACE_DEFAULT;
+}
+inline bool isWasmVarAddressSpace(unsigned AS) {
+ return AS == WASM_ADDRESS_SPACE_VAR;
+}
+inline bool isValidAddressSpace(unsigned AS) {
+ return isDefaultAddressSpace(AS) || isWasmVarAddressSpace(AS);
+}
+
+} // namespace WebAssembly
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_WASM_ADDRESS_SPACES_H
diff --git a/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h b/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
index 60ee6493b1a1..ab6b897e9f99 100644
--- a/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
+++ b/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
@@ -38,14 +38,14 @@ struct WasmEHFuncInfo {
// Helper functions
const BasicBlock *getUnwindDest(const BasicBlock *BB) const {
assert(hasUnwindDest(BB));
- return SrcToUnwindDest.lookup(BB).get<const BasicBlock *>();
+ return cast<const BasicBlock *>(SrcToUnwindDest.lookup(BB));
}
SmallPtrSet<const BasicBlock *, 4> getUnwindSrcs(const BasicBlock *BB) const {
assert(hasUnwindSrcs(BB));
const auto &Set = UnwindDestToSrcs.lookup(BB);
SmallPtrSet<const BasicBlock *, 4> Ret;
for (const auto P : Set)
- Ret.insert(P.get<const BasicBlock *>());
+ Ret.insert(cast<const BasicBlock *>(P));
return Ret;
}
void setUnwindDest(const BasicBlock *BB, const BasicBlock *Dest) {
@@ -61,7 +61,7 @@ struct WasmEHFuncInfo {
MachineBasicBlock *getUnwindDest(MachineBasicBlock *MBB) const {
assert(hasUnwindDest(MBB));
- return SrcToUnwindDest.lookup(MBB).get<MachineBasicBlock *>();
+ return cast<MachineBasicBlock *>(SrcToUnwindDest.lookup(MBB));
}
SmallPtrSet<MachineBasicBlock *, 4>
getUnwindSrcs(MachineBasicBlock *MBB) const {
@@ -69,7 +69,7 @@ struct WasmEHFuncInfo {
const auto &Set = UnwindDestToSrcs.lookup(MBB);
SmallPtrSet<MachineBasicBlock *, 4> Ret;
for (const auto P : Set)
- Ret.insert(P.get<MachineBasicBlock *>());
+ Ret.insert(cast<MachineBasicBlock *>(P));
return Ret;
}
void setUnwindDest(MachineBasicBlock *MBB, MachineBasicBlock *Dest) {
diff --git a/llvm/include/llvm/CodeGen/WinEHFuncInfo.h b/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
index f098316de793..c007d462e070 100644
--- a/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/llvm/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -92,6 +92,7 @@ struct WinEHFuncInfo {
DenseMap<const FuncletPadInst *, int> FuncletBaseStateMap;
DenseMap<const InvokeInst *, int> InvokeStateMap;
DenseMap<MCSymbol *, std::pair<int, MCSymbol *>> LabelToStateMap;
+ DenseMap<const BasicBlock *, int> BlockToStateMap; // for AsynchEH
SmallVector<CxxUnwindMapEntry, 4> CxxUnwindMap;
SmallVector<WinEHTryBlockMapEntry, 4> TryBlockMap;
SmallVector<SEHUnwindMapEntry, 4> SEHUnwindMap;
@@ -104,6 +105,8 @@ struct WinEHFuncInfo {
void addIPToStateRange(const InvokeInst *II, MCSymbol *InvokeBegin,
MCSymbol *InvokeEnd);
+ void addIPToStateRange(int State, MCSymbol *InvokeBegin, MCSymbol *InvokeEnd);
+
int EHRegNodeFrameIndex = std::numeric_limits<int>::max();
int EHRegNodeEndOffset = std::numeric_limits<int>::max();
int EHGuardFrameIndex = std::numeric_limits<int>::max();
@@ -123,6 +126,12 @@ void calculateSEHStateNumbers(const Function *ParentFn,
void calculateClrEHStateNumbers(const Function *Fn, WinEHFuncInfo &FuncInfo);
+// For AsynchEH (VC++ option -EHa)
+void calculateCXXStateForAsynchEH(const BasicBlock *BB, int State,
+ WinEHFuncInfo &FuncInfo);
+void calculateSEHStateForAsynchEH(const BasicBlock *BB, int State,
+ WinEHFuncInfo &FuncInfo);
+
} // end namespace llvm
#endif // LLVM_CODEGEN_WINEHFUNCINFO_H
diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/DWARFLinker.h
index 5b2f798b784a..5dce990adf02 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFLinker.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFLinker.h
@@ -14,29 +14,22 @@
#include "llvm/CodeGen/AccelTable.h"
#include "llvm/CodeGen/NonRelocatableStringpool.h"
#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include <map>
namespace llvm {
-class DWARFContext;
class DWARFExpression;
class DWARFUnit;
class DataExtractor;
class DeclContextTree;
-struct MCDwarfLineTableParams;
template <typename T> class SmallVectorImpl;
enum class DwarfLinkerClient { Dsymutil, LLD, General };
-/// The kind of accelerator tables we should emit.
-enum class DwarfLinkerAccelTableKind : uint8_t {
- Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc.
- Pub, ///< .debug_pubnames, .debug_pubtypes
- DebugNames ///< .debug_names.
-};
-
/// AddressesMap represents information about valid addresses used
/// by debug information. Valid addresses are those which points to
/// live code sections. i.e. relocations for these addresses point
@@ -49,17 +42,25 @@ public:
/// section.
virtual bool hasValidRelocs() = 0;
- /// Checks that the specified variable \p DIE references live code section.
- /// Allowed kind of input die: DW_TAG_variable, DW_TAG_constant.
- /// \returns true and sets Info.InDebugMap if it is the case.
- virtual bool isLiveVariable(const DWARFDie &DIE,
- CompileUnit::DIEInfo &Info) = 0;
-
- /// Checks that the specified subprogram \p DIE references live code section.
- /// Allowed kind of input die: DW_TAG_subprogram, DW_TAG_label.
- /// \returns true and sets Info.InDebugMap if it is the case.
- virtual bool isLiveSubprogram(const DWARFDie &DIE,
- CompileUnit::DIEInfo &Info) = 0;
+ /// Checks that the specified DWARF expression operand \p Op references live
+ /// code section and returns the relocation adjustment value (to get the
+ /// linked address this value might be added to the source expression operand
+ /// address).
+ /// \returns relocation adjustment value or std::nullopt if there is no
+ /// corresponding live address.
+ virtual std::optional<int64_t>
+ getExprOpAddressRelocAdjustment(DWARFUnit &U,
+ const DWARFExpression::Operation &Op,
+ uint64_t StartOffset, uint64_t EndOffset) = 0;
+
+ /// Checks that the specified subprogram \p DIE references the live code
+ /// section and returns the relocation adjustment value (to get the linked
+ /// address this value might be added to the source subprogram address).
+ /// Allowed kinds of input DIE: DW_TAG_subprogram, DW_TAG_label.
+ /// \returns relocation adjustment value or std::nullopt if there is no
+ /// corresponding live address.
+ virtual std::optional<int64_t>
+ getSubprogramRelocAdjustment(const DWARFDie &DIE) = 0;
/// Apply the valid relocations to the buffer \p Data, taking into
/// account that Data is at \p BaseOffset in the .debug_info section.
@@ -68,20 +69,31 @@ public:
virtual bool applyValidRelocs(MutableArrayRef<char> Data, uint64_t BaseOffset,
bool IsLittleEndian) = 0;
- /// Relocate the given address offset if a valid relocation exists.
- virtual llvm::Expected<uint64_t> relocateIndexedAddr(uint64_t StartOffset,
- uint64_t EndOffset) = 0;
-
- /// Returns all valid functions address ranges(i.e., those ranges
- /// which points to sections with code).
- virtual RangesTy &getValidAddressRanges() = 0;
-
/// Erases all data.
virtual void clear() = 0;
};
using Offset2UnitMap = DenseMap<uint64_t, CompileUnit *>;
+struct DebugAddrPool {
+ DenseMap<uint64_t, uint64_t> AddrIndexMap;
+ SmallVector<uint64_t> Addrs;
+
+ uint64_t getAddrIndex(uint64_t Addr) {
+ DenseMap<uint64_t, uint64_t>::iterator It = AddrIndexMap.find(Addr);
+ if (It == AddrIndexMap.end()) {
+ It = AddrIndexMap.insert(std::make_pair(Addr, Addrs.size())).first;
+ Addrs.push_back(Addr);
+ }
+ return It->second;
+ }
+
+ void clear() {
+ AddrIndexMap.clear();
+ Addrs.clear();
+ }
+};
+
/// DwarfEmitter presents interface to generate all debug info tables.
class DwarfEmitter {
public:
@@ -98,9 +110,12 @@ public:
emitAbbrevs(const std::vector<std::unique_ptr<DIEAbbrev>> &Abbrevs,
unsigned DwarfVersion) = 0;
- /// Emit the string table described by \p Pool.
+ /// Emit the string table described by \p Pool into .debug_str table.
virtual void emitStrings(const NonRelocatableStringpool &Pool) = 0;
+ /// Emit the string table described by \p Pool into .debug_line_str table.
+ virtual void emitLineStrings(const NonRelocatableStringpool &Pool) = 0;
+
/// Emit DWARF debug names.
virtual void
emitDebugNames(AccelTable<DWARF5AccelTableStaticData> &Table) = 0;
@@ -121,27 +136,53 @@ public:
virtual void
emitAppleTypes(AccelTable<AppleAccelTableStaticTypeData> &Table) = 0;
- /// Emit piece of .debug_ranges for \p Ranges.
+ /// Emit debug ranges (.debug_ranges, .debug_rnglists) header.
+ virtual MCSymbol *emitDwarfDebugRangeListHeader(const CompileUnit &Unit) = 0;
+
+ /// Emit debug ranges (.debug_ranges, .debug_rnglists) fragment.
+ virtual void
+ emitDwarfDebugRangeListFragment(const CompileUnit &Unit,
+ const AddressRanges &LinkedRanges,
+ PatchLocation Patch) = 0;
+
+ /// Emit debug ranges (.debug_ranges, .debug_rnglists) footer.
+ virtual void emitDwarfDebugRangeListFooter(const CompileUnit &Unit,
+ MCSymbol *EndLabel) = 0;
+
+ /// Emit debug locations (.debug_loc, .debug_loclists) header.
+ virtual MCSymbol *emitDwarfDebugLocListHeader(const CompileUnit &Unit) = 0;
+
+ /// Emit debug locations (.debug_loc, .debug_loclists) fragment.
+ virtual void emitDwarfDebugLocListFragment(
+ const CompileUnit &Unit,
+ const DWARFLocationExpressionsVector &LinkedLocationExpression,
+ PatchLocation Patch, DebugAddrPool &AddrPool) = 0;
+
+ /// Emit debug locations (.debug_loc, .debug_loclists) footer.
+ virtual void emitDwarfDebugLocListFooter(const CompileUnit &Unit,
+ MCSymbol *EndLabel) = 0;
+
+ /// Emit .debug_addr header.
+ virtual MCSymbol *emitDwarfDebugAddrsHeader(const CompileUnit &Unit) = 0;
+
+ /// Emit the addresses described by \p Addrs into the .debug_addr section.
+ virtual void emitDwarfDebugAddrs(const SmallVector<uint64_t> &Addrs,
+ uint8_t AddrSize) = 0;
+
+ /// Emit .debug_addr footer.
+ virtual void emitDwarfDebugAddrsFooter(const CompileUnit &Unit,
+ MCSymbol *EndLabel) = 0;
+
+ /// Emit .debug_aranges entries for \p Unit
virtual void
- emitDwarfDebugRangesTableFragment(const CompileUnit &Unit,
- const AddressRanges &LinkedRanges) = 0;
-
- /// Emit .debug_aranges entries for \p Unit and if \p DoRangesSection is true,
- /// also emit the .debug_ranges entries for the DW_TAG_compile_unit's
- /// DW_AT_ranges attribute.
- virtual void emitUnitRangesEntries(CompileUnit &Unit,
- bool DoRangesSection) = 0;
-
- /// Copy the .debug_line over to the updated binary while unobfuscating the
- /// file names and directories.
- virtual void translateLineTable(DataExtractor LineData, uint64_t Offset) = 0;
-
- /// Emit the line table described in \p Rows into the .debug_line section.
- virtual void emitLineTableForUnit(MCDwarfLineTableParams Params,
- StringRef PrologueBytes,
- unsigned MinInstLength,
- std::vector<DWARFDebugLine::Row> &Rows,
- unsigned AdddressSize) = 0;
+ emitDwarfDebugArangesTable(const CompileUnit &Unit,
+ const AddressRanges &LinkedRanges) = 0;
+
+ /// Emit specified \p LineTable into .debug_line table.
+ virtual void emitLineTableForUnit(const DWARFDebugLine::LineTable &LineTable,
+ const CompileUnit &Unit,
+ OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool) = 0;
/// Emit the .debug_pubnames contribution for \p Unit.
virtual void emitPubNamesForUnit(const CompileUnit &Unit) = 0;
@@ -156,14 +197,6 @@ public:
virtual void emitFDE(uint32_t CIEOffset, uint32_t AddreSize, uint64_t Address,
StringRef Bytes) = 0;
- /// Emit the .debug_loc contribution for \p Unit by copying the entries from
- /// \p Dwarf and offsetting them. Update the location attributes to point to
- /// the new entries.
- virtual void emitLocationsForUnit(
- const CompileUnit &Unit, DWARFContext &Dwarf,
- std::function<void(StringRef, SmallVectorImpl<uint8_t> &)>
- ProcessExpr) = 0;
-
/// Emit the compilation unit header for \p Unit in the
/// .debug_info section.
///
@@ -193,6 +226,9 @@ public:
/// Returns size of generated .debug_ranges section.
virtual uint64_t getRangesSectionSize() const = 0;
+ /// Returns size of generated .debug_rnglists section.
+ virtual uint64_t getRngListsSectionSize() const = 0;
+
/// Returns size of generated .debug_info section.
virtual uint64_t getDebugInfoSectionSize() const = 0;
@@ -201,35 +237,54 @@ public:
/// Returns size of generated .debug_macro section.
virtual uint64_t getDebugMacroSectionSize() const = 0;
+
+ /// Returns size of generated .debug_loclists section.
+ virtual uint64_t getLocListsSectionSize() const = 0;
+
+ /// Returns size of generated .debug_addr section.
+ virtual uint64_t getDebugAddrSectionSize() const = 0;
+
+ /// Dump the file to the disk.
+ virtual void finish() = 0;
+
+ /// Emit the swift_ast section stored in \p Buffer.
+ virtual void emitSwiftAST(StringRef Buffer) = 0;
+
+ /// Emit the swift reflection section stored in \p Buffer.
+ virtual void emitSwiftReflectionSection(
+ llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind,
+ StringRef Buffer, uint32_t Alignment, uint32_t Size) = 0;
+
+ /// Returns underlying AsmPrinter.
+ virtual AsmPrinter &getAsmPrinter() const = 0;
};
+class DwarfStreamer;
using UnitListTy = std::vector<std::unique_ptr<CompileUnit>>;
-/// this class represents DWARF information for source file
-/// and it`s address map.
+/// This class represents DWARF information for source file
+/// and its address map.
class DWARFFile {
public:
- DWARFFile(StringRef Name, DWARFContext *Dwarf, AddressesMap *Addresses,
+ DWARFFile(StringRef Name, std::unique_ptr<DWARFContext> Dwarf,
+ std::unique_ptr<AddressesMap> Addresses,
const std::vector<std::string> &Warnings)
- : FileName(Name), Dwarf(Dwarf), Addresses(Addresses), Warnings(Warnings) {
- }
+ : FileName(Name), Dwarf(std::move(Dwarf)),
+ Addresses(std::move(Addresses)), Warnings(Warnings) {}
- /// object file name.
+ /// The object file name.
StringRef FileName;
- /// source DWARF information.
- DWARFContext *Dwarf = nullptr;
- /// helpful address information(list of valid address ranges, relocations).
- AddressesMap *Addresses = nullptr;
- /// warnings for object file.
+
+ /// The source DWARF information.
+ std::unique_ptr<DWARFContext> Dwarf;
+
+ /// Helpful address information(list of valid address ranges, relocations).
+ std::unique_ptr<AddressesMap> Addresses;
+
+ /// Warnings for this object file.
const std::vector<std::string> &Warnings;
};
-typedef std::function<void(const Twine &Warning, StringRef Context,
- const DWARFDie *DIE)>
- messageHandler;
-typedef std::function<ErrorOr<DWARFFile &>(StringRef ContainerName,
- StringRef Path)>
- objFileLoader;
typedef std::map<std::string, std::string> swiftInterfacesMap;
typedef std::map<std::string, std::string> objectPrefixMap;
@@ -251,9 +306,43 @@ typedef function_ref<void(const DWARFUnit &Unit)> CompileUnitHandler;
/// processing a object file.
class DWARFLinker {
public:
- DWARFLinker(DwarfEmitter *Emitter,
- DwarfLinkerClient ClientID = DwarfLinkerClient::General)
- : TheDwarfEmitter(Emitter), DwarfLinkerClientID(ClientID) {}
+ typedef std::function<void(const Twine &Warning, StringRef Context,
+ const DWARFDie *DIE)>
+ messageHandler;
+ DWARFLinker(messageHandler ErrorHandler, messageHandler WarningHandler,
+ std::function<StringRef(StringRef)> StringsTranslator)
+ : DwarfLinkerClientID(DwarfLinkerClient::Dsymutil),
+ StringsTranslator(StringsTranslator), ErrorHandler(ErrorHandler),
+ WarningHandler(WarningHandler) {}
+
+ static std::unique_ptr<DWARFLinker> createLinker(
+ messageHandler ErrorHandler, messageHandler WarningHandler,
+ std::function<StringRef(StringRef)> StringsTranslator = nullptr) {
+ return std::make_unique<DWARFLinker>(ErrorHandler, WarningHandler,
+ StringsTranslator);
+ }
+
+ /// Type of output file.
+ enum class OutputFileType {
+ Object,
+ Assembly,
+ };
+
+ /// The kind of accelerator tables we should emit.
+ enum class AccelTableKind : uint8_t {
+ Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc.
+ Pub, ///< .debug_pubnames, .debug_pubtypes
+ DebugNames ///< .debug_names.
+ };
+ typedef std::function<void(const DWARFFile &File)> inputVerificationHandler;
+ typedef std::function<ErrorOr<DWARFFile &>(StringRef ContainerName,
+ StringRef Path)>
+ objFileLoader;
+
+ Error createEmitter(const Triple &TheTriple, OutputFileType FileType,
+ raw_pwrite_stream &OutFile);
+
+ DwarfEmitter *getEmitter();
/// Add object file to be linked. Pre-load compile unit die. Call
/// \p OnCUDieLoaded for each compile unit die. If specified \p File
@@ -265,8 +354,7 @@ public:
DWARFFile &File, objFileLoader Loader = nullptr,
CompileUnitHandler OnCUDieLoaded = [](const DWARFUnit &) {});
- /// Link debug info for added objFiles. Object
- /// files are linked all together.
+ /// Link debug info for added objFiles. Object files are linked all together.
Error link();
/// A number of methods setting various linking options:
@@ -280,14 +368,15 @@ public:
/// Verify the input DWARF.
void setVerifyInputDWARF(bool Verify) { Options.VerifyInputDWARF = Verify; }
- /// Do not emit linked dwarf info.
- void setNoOutput(bool NoOut) { Options.NoOutput = NoOut; }
-
/// Do not unique types according to ODR.
void setNoODR(bool NoODR) { Options.NoODR = NoODR; }
- /// update existing DWARF info(for the linked binary).
- void setUpdate(bool Update) { Options.Update = Update; }
+ /// Update index tables only(do not modify rest of DWARF).
+ void setUpdateIndexTablesOnly(bool Update) { Options.Update = Update; }
+
+ /// Allow generating valid, but non-deterministic output.
+ void setAllowNonDeterministicOutput(bool) { /* Nothing to do. */
+ }
/// Set whether to keep the enclosing function for a static variable.
void setKeepFunctionForStatic(bool KeepFunctionForStatic) {
@@ -298,34 +387,23 @@ public:
void setNumThreads(unsigned NumThreads) { Options.Threads = NumThreads; }
/// Add kind of accelerator tables to be generated.
- void addAccelTableKind(DwarfLinkerAccelTableKind Kind) {
- assert(std::find(Options.AccelTables.begin(), Options.AccelTables.end(),
- Kind) == Options.AccelTables.end());
+ void addAccelTableKind(AccelTableKind Kind) {
+ assert(!llvm::is_contained(Options.AccelTables, Kind));
Options.AccelTables.emplace_back(Kind);
}
/// Set prepend path for clang modules.
void setPrependPath(const std::string &Ppath) { Options.PrependPath = Ppath; }
- /// Set translator which would be used for strings.
- void
- setStringsTranslator(std::function<StringRef(StringRef)> StringsTranslator) {
- this->StringsTranslator = StringsTranslator;
- }
-
/// Set estimated objects files amount, for preliminary data allocation.
void setEstimatedObjfilesAmount(unsigned ObjFilesNum) {
ObjectContexts.reserve(ObjFilesNum);
}
- /// Set warning handler which would be used to report warnings.
- void setWarningHandler(messageHandler Handler) {
- Options.WarningHandler = Handler;
- }
-
- /// Set error handler which would be used to report errors.
- void setErrorHandler(messageHandler Handler) {
- Options.ErrorHandler = Handler;
+ /// Set verification handler which would be used to report verification
+ /// errors.
+ void setInputVerificationHandler(inputVerificationHandler Handler) {
+ Options.InputVerificationHandler = Handler;
}
/// Set map for Swift interfaces.
@@ -340,7 +418,7 @@ public:
/// Set target DWARF version.
Error setTargetDWARFVersion(uint16_t TargetDWARFVersion) {
- if (TargetDWARFVersion < 1 || TargetDWARFVersion > 5)
+ if ((TargetDWARFVersion < 1) || (TargetDWARFVersion > 5))
return createStringError(std::errc::invalid_argument,
"unsupported DWARF version: %d",
TargetDWARFVersion);
@@ -407,21 +485,21 @@ private:
};
/// Verify the given DWARF file.
- bool verify(const DWARFFile &File);
+ void verifyInput(const DWARFFile &File);
/// returns true if we need to translate strings.
bool needToTranslateStrings() { return StringsTranslator != nullptr; }
void reportWarning(const Twine &Warning, const DWARFFile &File,
const DWARFDie *DIE = nullptr) const {
- if (Options.WarningHandler != nullptr)
- Options.WarningHandler(Warning, File.FileName, DIE);
+ if (WarningHandler != nullptr)
+ WarningHandler(Warning, File.FileName, DIE);
}
void reportError(const Twine &Warning, const DWARFFile &File,
const DWARFDie *DIE = nullptr) const {
- if (Options.ErrorHandler != nullptr)
- Options.ErrorHandler(Warning, File.FileName, DIE);
+ if (ErrorHandler != nullptr)
+ ErrorHandler(Warning, File.FileName, DIE);
}
/// Emit warnings as Dwarf compile units to leave a trail after linking.
@@ -494,10 +572,9 @@ private:
/// keep. Store that information in \p CU's DIEInfo.
///
/// The return value indicates whether the DIE is incomplete.
- void lookForDIEsToKeep(AddressesMap &RelocMgr, RangesTy &Ranges,
- const UnitListTy &Units, const DWARFDie &DIE,
- const DWARFFile &File, CompileUnit &CU,
- unsigned Flags);
+ void lookForDIEsToKeep(AddressesMap &RelocMgr, const UnitListTy &Units,
+ const DWARFDie &DIE, const DWARFFile &File,
+ CompileUnit &CU, unsigned Flags);
/// Check whether specified \p CUDie is a Clang module reference.
/// if \p Quiet is false then display error messages.
@@ -529,29 +606,30 @@ private:
/// Clone specified Clang module unit \p Unit.
Error cloneModuleUnit(LinkContext &Context, RefModuleUnit &Unit,
DeclContextTree &ODRContexts,
- OffsetsStringPool &OffsetsStringPool,
+ OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool,
unsigned Indent = 0);
- /// Mark the passed DIE as well as all the ones it depends on as kept.
- void keepDIEAndDependencies(AddressesMap &RelocMgr, RangesTy &Ranges,
- const UnitListTy &Units, const DWARFDie &DIE,
- CompileUnit::DIEInfo &MyInfo,
- const DWARFFile &File, CompileUnit &CU,
- bool UseODR);
+ unsigned shouldKeepDIE(AddressesMap &RelocMgr, const DWARFDie &DIE,
+ const DWARFFile &File, CompileUnit &Unit,
+ CompileUnit::DIEInfo &MyInfo, unsigned Flags);
- unsigned shouldKeepDIE(AddressesMap &RelocMgr, RangesTy &Ranges,
- const DWARFDie &DIE, const DWARFFile &File,
- CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo,
- unsigned Flags);
+ /// This function checks whether variable has DWARF expression containing
+ /// operation referencing live address(f.e. DW_OP_addr, DW_OP_addrx...).
+ /// \returns first is true if the expression has an operation referencing an
+ /// address.
+ /// second is the relocation adjustment value if the live address is
+ /// referenced.
+ std::pair<bool, std::optional<int64_t>>
+ getVariableRelocAdjustment(AddressesMap &RelocMgr, const DWARFDie &DIE);
/// Check if a variable describing DIE should be kept.
/// \returns updated TraversalFlags.
unsigned shouldKeepVariableDIE(AddressesMap &RelocMgr, const DWARFDie &DIE,
CompileUnit::DIEInfo &MyInfo, unsigned Flags);
- unsigned shouldKeepSubprogramDIE(AddressesMap &RelocMgr, RangesTy &Ranges,
- const DWARFDie &DIE, const DWARFFile &File,
- CompileUnit &Unit,
+ unsigned shouldKeepSubprogramDIE(AddressesMap &RelocMgr, const DWARFDie &DIE,
+ const DWARFFile &File, CompileUnit &Unit,
CompileUnit::DIEInfo &MyInfo,
unsigned Flags);
@@ -575,6 +653,9 @@ private:
DWARFLinker &Linker;
DwarfEmitter *Emitter;
DWARFFile &ObjFile;
+ OffsetsStringPool &DebugStrPool;
+ OffsetsStringPool &DebugLineStrPool;
+ DebugAddrPool AddrPool;
/// Allocator used for all the DIEValue objects.
BumpPtrAllocator &DIEAlloc;
@@ -591,8 +672,10 @@ private:
DIECloner(DWARFLinker &Linker, DwarfEmitter *Emitter, DWARFFile &ObjFile,
BumpPtrAllocator &DIEAlloc,
std::vector<std::unique_ptr<CompileUnit>> &CompileUnits,
- bool Update)
+ bool Update, OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool)
: Linker(Linker), Emitter(Emitter), ObjFile(ObjFile),
+ DebugStrPool(DebugStrPool), DebugLineStrPool(DebugLineStrPool),
DIEAlloc(DIEAlloc), CompileUnits(CompileUnits), Update(Update) {}
/// Recursively clone \p InputDIE into an tree of DIE objects
@@ -607,17 +690,27 @@ private:
/// \param Die the output DIE to use, pass NULL to create one.
/// \returns the root of the cloned tree or null if nothing was selected.
DIE *cloneDIE(const DWARFDie &InputDIE, const DWARFFile &File,
- CompileUnit &U, OffsetsStringPool &StringPool,
- int64_t PCOffset, uint32_t OutOffset, unsigned Flags,
- bool IsLittleEndian, DIE *Die = nullptr);
+ CompileUnit &U, int64_t PCOffset, uint32_t OutOffset,
+ unsigned Flags, bool IsLittleEndian, DIE *Die = nullptr);
/// Construct the output DIE tree by cloning the DIEs we
/// chose to keep above. If there are no valid relocs, then there's
/// nothing to clone/emit.
uint64_t cloneAllCompileUnits(DWARFContext &DwarfContext,
- const DWARFFile &File,
- OffsetsStringPool &StringPool,
- bool IsLittleEndian);
+ const DWARFFile &File, bool IsLittleEndian);
+
+ /// Emit the .debug_addr section for the \p Unit.
+ void emitDebugAddrSection(CompileUnit &Unit,
+ const uint16_t DwarfVersion) const;
+
+ using ExpressionHandlerRef = function_ref<void(
+ SmallVectorImpl<uint8_t> &, SmallVectorImpl<uint8_t> &,
+ int64_t AddrRelocAdjustment)>;
+
+ /// Compute and emit debug locations (.debug_loc, .debug_loclists)
+ /// for \p Unit, patch the attributes referencing it.
+ void generateUnitLocations(CompileUnit &Unit, const DWARFFile &File,
+ ExpressionHandlerRef ExprHandler);
private:
using AttributeSpec = DWARFAbbreviationDeclaration::AttributeSpec;
@@ -632,18 +725,6 @@ private:
uint32_t NameOffset = 0;
uint32_t MangledNameOffset = 0;
- /// Value of AT_low_pc in the input DIE
- uint64_t OrigLowPc = std::numeric_limits<uint64_t>::max();
-
- /// Value of AT_high_pc in the input DIE
- uint64_t OrigHighPc = 0;
-
- /// Value of DW_AT_call_return_pc in the input DIE
- uint64_t OrigCallReturnPc = 0;
-
- /// Value of DW_AT_call_pc in the input DIE
- uint64_t OrigCallPc = 0;
-
/// Offset to apply to PC addresses inside a function.
int64_t PCOffset = 0;
@@ -662,7 +743,6 @@ private:
/// Helper for cloneDIE.
unsigned cloneAttribute(DIE &Die, const DWARFDie &InputDIE,
const DWARFFile &File, CompileUnit &U,
- OffsetsStringPool &StringPool,
const DWARFFormValue &Val,
const AttributeSpec AttrSpec, unsigned AttrSize,
AttributesInfo &AttrInfo, bool IsLittleEndian);
@@ -672,7 +752,6 @@ private:
/// \returns the size of the new attribute.
unsigned cloneStringAttribute(DIE &Die, AttributeSpec AttrSpec,
const DWARFFormValue &Val, const DWARFUnit &U,
- OffsetsStringPool &StringPool,
AttributesInfo &Info);
/// Clone an attribute referencing another DIE and add
@@ -688,21 +767,24 @@ private:
/// Clone a DWARF expression that may be referencing another DIE.
void cloneExpression(DataExtractor &Data, DWARFExpression Expression,
const DWARFFile &File, CompileUnit &Unit,
- SmallVectorImpl<uint8_t> &OutputBuffer);
+ SmallVectorImpl<uint8_t> &OutputBuffer,
+ int64_t AddrRelocAdjustment, bool IsLittleEndian);
/// Clone an attribute referencing another DIE and add
/// it to \p Die.
/// \returns the size of the new attribute.
- unsigned cloneBlockAttribute(DIE &Die, const DWARFFile &File,
- CompileUnit &Unit, AttributeSpec AttrSpec,
- const DWARFFormValue &Val, unsigned AttrSize,
+ unsigned cloneBlockAttribute(DIE &Die, const DWARFDie &InputDIE,
+ const DWARFFile &File, CompileUnit &Unit,
+ AttributeSpec AttrSpec,
+ const DWARFFormValue &Val,
bool IsLittleEndian);
/// Clone an attribute referencing another DIE and add
/// it to \p Die.
/// \returns the size of the new attribute.
- unsigned cloneAddressAttribute(DIE &Die, AttributeSpec AttrSpec,
- unsigned AttrSize, const DWARFFormValue &Val,
+ unsigned cloneAddressAttribute(DIE &Die, const DWARFDie &InputDIE,
+ AttributeSpec AttrSpec, unsigned AttrSize,
+ const DWARFFormValue &Val,
const CompileUnit &Unit,
AttributesInfo &Info);
@@ -731,32 +813,25 @@ private:
OffsetsStringPool &StringPool, bool SkipPubSection);
void rememberUnitForMacroOffset(CompileUnit &Unit);
+
+ /// Clone and emit the line table for the specified \p Unit.
+ /// Translate directories and file names if necessary.
+ /// Relocate address ranges.
+ void generateLineTableForUnit(CompileUnit &Unit);
};
/// Assign an abbreviation number to \p Abbrev
void assignAbbrev(DIEAbbrev &Abbrev);
- /// Compute and emit .debug_ranges section for \p Unit, and
- /// patch the attributes referencing it.
- void patchRangesForUnit(const CompileUnit &Unit, DWARFContext &Dwarf,
- const DWARFFile &File) const;
-
- /// Generate and emit the DW_AT_ranges attribute for a compile_unit if it had
- /// one.
- void generateUnitRanges(CompileUnit &Unit) const;
-
- /// Extract the line tables from the original dwarf, extract the relevant
- /// parts according to the linked function ranges and emit the result in the
- /// .debug_line section.
- void patchLineTableForUnit(CompileUnit &Unit, DWARFContext &OrigDwarf,
- const DWARFFile &File);
+ /// Compute and emit debug ranges(.debug_aranges, .debug_ranges,
+ /// .debug_rnglists) for \p Unit, patch the attributes referencing it.
+ void generateUnitRanges(CompileUnit &Unit, const DWARFFile &File) const;
/// Emit the accelerator entries for \p Unit.
void emitAcceleratorEntriesForUnit(CompileUnit &Unit);
/// Patch the frame info for an object file and emit it.
- void patchFrameInfoForObject(const DWARFFile &, RangesTy &Ranges,
- DWARFContext &, unsigned AddressSize);
+ void patchFrameInfoForObject(LinkContext &Context);
/// FoldingSet that uniques the abbreviations.
FoldingSet<DIEAbbrev> AbbreviationsSet;
@@ -776,7 +851,7 @@ private:
BumpPtrAllocator DIEAlloc;
/// @}
- DwarfEmitter *TheDwarfEmitter;
+ std::unique_ptr<DwarfStreamer> TheDwarfEmitter;
std::vector<LinkContext> ObjectContexts;
/// The CIEs that have been emitted in the output section. The actual CIE
@@ -805,6 +880,12 @@ private:
/// A unique ID that identifies each compile unit.
unsigned UniqueUnitID = 0;
+ // error handler
+ messageHandler ErrorHandler = nullptr;
+
+ // warning handler
+ messageHandler WarningHandler = nullptr;
+
/// linking options
struct DWARFLinkerOptions {
/// DWARF version for the output.
@@ -819,9 +900,6 @@ private:
/// Verify the input DWARF.
bool VerifyInputDWARF = false;
- /// Skip emitting output
- bool NoOutput = false;
-
/// Do not unique types according to ODR
bool NoODR = false;
@@ -836,16 +914,13 @@ private:
unsigned Threads = 1;
/// The accelerator table kinds
- SmallVector<DwarfLinkerAccelTableKind, 1> AccelTables;
+ SmallVector<AccelTableKind, 1> AccelTables;
/// Prepend path for the clang modules.
std::string PrependPath;
- // warning handler
- messageHandler WarningHandler = nullptr;
-
- // error handler
- messageHandler ErrorHandler = nullptr;
+ // input verification handler
+ inputVerificationHandler InputVerificationHandler = nullptr;
/// A list of all .swiftinterface files referenced by the debug
/// info, mapping Module name to path on disk. The entries need to
diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
index 5b0ea339c4d6..08ebd4bc70bc 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerCompileUnit.h
@@ -21,14 +21,24 @@ class DeclContext;
/// Mapped value in the address map is the offset to apply to the
/// linked address.
-using RangesTy = AddressRangesMap<int64_t>;
-
-// FIXME: Delete this structure.
+using RangesTy = AddressRangesMap;
+
+// This structure keeps patch for the attribute and, optionally,
+// the value of relocation which should be applied. Currently,
+// only location attribute needs to have relocation: either to the
+// function ranges if location attribute is of type 'loclist',
+// either to the operand of DW_OP_addr/DW_OP_addrx if location attribute
+// is of type 'exprloc'.
+// ASSUMPTION: Location attributes of 'loclist' type containing 'exprloc'
+// with address expression operands are not supported yet.
struct PatchLocation {
DIE::value_iterator I;
+ int64_t RelocAdjustment = 0;
PatchLocation() = default;
PatchLocation(DIE::value_iterator I) : I(I) {}
+ PatchLocation(DIE::value_iterator I, int64_t Reloc)
+ : I(I), RelocAdjustment(Reloc) {}
void set(uint64_t New) const {
assert(I);
@@ -43,6 +53,9 @@ struct PatchLocation {
}
};
+using RngListAttributesTy = SmallVector<PatchLocation>;
+using LocListAttributesTy = SmallVector<PatchLocation>;
+
/// Stores all information relating to a compile unit, be it in its original
/// instance in the object file to its brand new cloned and generated DIE tree.
class CompileUnit {
@@ -82,6 +95,9 @@ public:
/// Is this a reference to a DIE that hasn't been cloned yet?
bool UnclonedReference : 1;
+ /// Is this a variable with a location attribute referencing address?
+ bool HasLocationExpressionAddr : 1;
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump();
#endif // if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -143,18 +159,15 @@ public:
uint64_t getHighPc() const { return HighPc; }
bool hasLabelAt(uint64_t Addr) const { return Labels.count(Addr); }
- std::optional<PatchLocation> getUnitRangesAttribute() const {
- return UnitRangeAttribute;
- }
-
const RangesTy &getFunctionRanges() const { return Ranges; }
- const std::vector<PatchLocation> &getRangesAttributes() const {
- return RangeAttributes;
+ const RngListAttributesTy &getRangesAttributes() { return RangeAttributes; }
+
+ std::optional<PatchLocation> getUnitRangesAttribute() const {
+ return UnitRangeAttribute;
}
- const std::vector<std::pair<PatchLocation, int64_t>> &
- getLocationAttributes() const {
+ const LocListAttributesTy &getLocationAttributes() const {
return LocationAttributes;
}
@@ -191,7 +204,7 @@ public:
/// Keep track of a location attribute pointing to a location list in the
/// debug_loc section.
- void noteLocationAttribute(PatchLocation Attr, int64_t PcOffset);
+ void noteLocationAttribute(PatchLocation Attr);
/// Add a name accelerator entry for \a Die with \a Name.
void addNamespaceAccelerator(const DIE *Die, DwarfStringPoolEntryRef Name);
@@ -278,18 +291,19 @@ private:
/// The DW_AT_low_pc of each DW_TAG_label.
SmallDenseMap<uint64_t, uint64_t, 1> Labels;
- /// DW_AT_ranges attributes to patch after we have gathered
- /// all the unit's function addresses.
+ /// 'rnglist'(DW_AT_ranges, DW_AT_start_scope) attributes to patch after
+ /// we have gathered all the unit's function addresses.
/// @{
- std::vector<PatchLocation> RangeAttributes;
+ RngListAttributesTy RangeAttributes;
std::optional<PatchLocation> UnitRangeAttribute;
/// @}
/// Location attributes that need to be transferred from the
- /// original debug_loc section to the liked one. They are stored
+ /// original debug_loc section to the linked one. They are stored
/// along with the PC offset that is to be applied to their
- /// function's address.
- std::vector<std::pair<PatchLocation, int64_t>> LocationAttributes;
+ /// function's address or to be applied to address operands of
+ /// location expression.
+ LocListAttributesTy LocationAttributes;
/// Accelerator entries for the unit, both for the pub*
/// sections and the apple* ones.
diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
index 24248c8dffd0..ec2281d462b6 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
@@ -23,11 +23,6 @@
namespace llvm {
template <typename DataT> class AccelTable;
-enum class OutputFileType {
- Object,
- Assembly,
-};
-
/// User of DwarfStreamer should call initialization code
/// for AsmPrinter:
///
@@ -45,18 +40,19 @@ class DWARFDebugMacro;
/// information binary representation are handled in this class.
class DwarfStreamer : public DwarfEmitter {
public:
- DwarfStreamer(OutputFileType OutFileType, raw_pwrite_stream &OutFile,
+ DwarfStreamer(DWARFLinker::OutputFileType OutFileType,
+ raw_pwrite_stream &OutFile,
std::function<StringRef(StringRef Input)> Translator,
- messageHandler Error, messageHandler Warning)
+ DWARFLinker::messageHandler Warning)
: OutFile(OutFile), OutFileType(OutFileType), Translator(Translator),
- ErrorHandler(Error), WarningHandler(Warning) {}
+ WarningHandler(Warning) {}
- bool init(Triple TheTriple, StringRef Swift5ReflectionSegmentName);
+ Error init(Triple TheTriple, StringRef Swift5ReflectionSegmentName);
/// Dump the file to the disk.
- void finish();
+ void finish() override;
- AsmPrinter &getAsmPrinter() const { return *Asm; }
+ AsmPrinter &getAsmPrinter() const override { return *Asm; }
/// Set the current output section to debug_info and change
/// the MC Dwarf version to \p DwarfVersion.
@@ -82,46 +78,71 @@ public:
/// Emit contents of section SecName From Obj.
void emitSectionContents(StringRef SecData, StringRef SecName) override;
- /// Emit the string table described by \p Pool.
+ /// Emit the string table described by \p Pool into .debug_str table.
void emitStrings(const NonRelocatableStringpool &Pool) override;
+ /// Emit the string table described by \p Pool into .debug_line_str table.
+ void emitLineStrings(const NonRelocatableStringpool &Pool) override;
+
/// Emit the swift_ast section stored in \p Buffer.
- void emitSwiftAST(StringRef Buffer);
+ void emitSwiftAST(StringRef Buffer) override;
/// Emit the swift reflection section stored in \p Buffer.
void emitSwiftReflectionSection(
llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind,
- StringRef Buffer, uint32_t Alignment, uint32_t Size);
+ StringRef Buffer, uint32_t Alignment, uint32_t Size) override;
- /// Emit piece of .debug_ranges for \p Ranges.
- virtual void
- emitDwarfDebugRangesTableFragment(const CompileUnit &Unit,
- const AddressRanges &LinkedRanges) override;
+ /// Emit debug ranges(.debug_ranges, .debug_rnglists) header.
+ MCSymbol *emitDwarfDebugRangeListHeader(const CompileUnit &Unit) override;
- /// Emit debug_aranges entries for \p Unit and if \p DoRangesSection is true,
- /// also emit the debug_ranges entries for the DW_TAG_compile_unit's
- /// DW_AT_ranges attribute.
- void emitUnitRangesEntries(CompileUnit &Unit, bool DoRangesSection) override;
+ /// Emit debug ranges(.debug_ranges, .debug_rnglists) fragment.
+ void emitDwarfDebugRangeListFragment(const CompileUnit &Unit,
+ const AddressRanges &LinkedRanges,
+ PatchLocation Patch) override;
- uint64_t getRangesSectionSize() const override { return RangesSectionSize; }
+ /// Emit debug ranges(.debug_ranges, .debug_rnglists) footer.
+ void emitDwarfDebugRangeListFooter(const CompileUnit &Unit,
+ MCSymbol *EndLabel) override;
+
+ /// Emit debug locations(.debug_loc, .debug_loclists) header.
+ MCSymbol *emitDwarfDebugLocListHeader(const CompileUnit &Unit) override;
+
+ /// Emit .debug_addr header.
+ MCSymbol *emitDwarfDebugAddrsHeader(const CompileUnit &Unit) override;
- /// Emit the debug_loc contribution for \p Unit by copying the entries from
- /// \p Dwarf and offsetting them. Update the location attributes to point to
- /// the new entries.
- void emitLocationsForUnit(
- const CompileUnit &Unit, DWARFContext &Dwarf,
- std::function<void(StringRef, SmallVectorImpl<uint8_t> &)> ProcessExpr)
- override;
+ /// Emit the addresses described by \p Addrs into .debug_addr table.
+ void emitDwarfDebugAddrs(const SmallVector<uint64_t> &Addrs,
+ uint8_t AddrSize) override;
- /// Emit the line table described in \p Rows into the debug_line section.
- void emitLineTableForUnit(MCDwarfLineTableParams Params,
- StringRef PrologueBytes, unsigned MinInstLength,
- std::vector<DWARFDebugLine::Row> &Rows,
- unsigned AdddressSize) override;
+ /// Emit .debug_addr footer.
+ void emitDwarfDebugAddrsFooter(const CompileUnit &Unit,
+ MCSymbol *EndLabel) override;
- /// Copy the debug_line over to the updated binary while unobfuscating the
- /// file names and directories.
- void translateLineTable(DataExtractor LineData, uint64_t Offset) override;
+ /// Emit debug ranges(.debug_loc, .debug_loclists) fragment.
+ void emitDwarfDebugLocListFragment(
+ const CompileUnit &Unit,
+ const DWARFLocationExpressionsVector &LinkedLocationExpression,
+ PatchLocation Patch, DebugAddrPool &AddrPool) override;
+
+ /// Emit debug ranges(.debug_loc, .debug_loclists) footer.
+ void emitDwarfDebugLocListFooter(const CompileUnit &Unit,
+ MCSymbol *EndLabel) override;
+
+ /// Emit .debug_aranges entries for \p Unit
+ void emitDwarfDebugArangesTable(const CompileUnit &Unit,
+ const AddressRanges &LinkedRanges) override;
+
+ uint64_t getRangesSectionSize() const override { return RangesSectionSize; }
+
+ uint64_t getRngListsSectionSize() const override {
+ return RngListsSectionSize;
+ }
+
+ /// Emit .debug_line table entry for specified \p LineTable
+ void emitLineTableForUnit(const DWARFDebugLine::LineTable &LineTable,
+ const CompileUnit &Unit,
+ OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool) override;
uint64_t getLineSectionSize() const override { return LineSectionSize; }
@@ -171,16 +192,17 @@ public:
return MacroSectionSize;
}
+ uint64_t getLocListsSectionSize() const override {
+ return LocListsSectionSize;
+ }
+
+ uint64_t getDebugAddrSectionSize() const override { return AddrSectionSize; }
+
void emitMacroTables(DWARFContext *Context,
const Offset2UnitMap &UnitMacroMap,
OffsetsStringPool &StringPool) override;
private:
- inline void error(const Twine &Error, StringRef Context = "") {
- if (ErrorHandler)
- ErrorHandler(Error, Context, nullptr);
- }
-
inline void warn(const Twine &Warning, StringRef Context = "") {
if (WarningHandler)
WarningHandler(Warning, Context, nullptr);
@@ -189,8 +211,54 @@ private:
void emitMacroTableImpl(const DWARFDebugMacro *MacroTable,
const Offset2UnitMap &UnitMacroMap,
OffsetsStringPool &StringPool, uint64_t &OutOffset);
- void emitDwarfDebugArangesTable(const CompileUnit &Unit,
- const AddressRanges &LinkedRanges);
+
+ /// Emit piece of .debug_ranges for \p LinkedRanges.
+ void emitDwarfDebugRangesTableFragment(const CompileUnit &Unit,
+ const AddressRanges &LinkedRanges,
+ PatchLocation Patch);
+
+ /// Emit piece of .debug_rnglists for \p LinkedRanges.
+ void emitDwarfDebugRngListsTableFragment(const CompileUnit &Unit,
+ const AddressRanges &LinkedRanges,
+ PatchLocation Patch);
+
+ /// Emit piece of .debug_loc for \p LinkedRanges.
+ void emitDwarfDebugLocTableFragment(
+ const CompileUnit &Unit,
+ const DWARFLocationExpressionsVector &LinkedLocationExpression,
+ PatchLocation Patch);
+
+ /// Emit piece of .debug_loclists for \p LinkedRanges.
+ void emitDwarfDebugLocListsTableFragment(
+ const CompileUnit &Unit,
+ const DWARFLocationExpressionsVector &LinkedLocationExpression,
+ PatchLocation Patch, DebugAddrPool &AddrPool);
+
+ /// \defgroup Line table emission
+ /// @{
+ void emitLineTablePrologue(const DWARFDebugLine::Prologue &P,
+ OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool);
+ void emitLineTableString(const DWARFDebugLine::Prologue &P,
+ const DWARFFormValue &String,
+ OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool);
+ void emitLineTableProloguePayload(const DWARFDebugLine::Prologue &P,
+ OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool);
+ void emitLineTablePrologueV2IncludeAndFileTable(
+ const DWARFDebugLine::Prologue &P, OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool);
+ void emitLineTablePrologueV5IncludeAndFileTable(
+ const DWARFDebugLine::Prologue &P, OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool);
+ void emitLineTableRows(const DWARFDebugLine::LineTable &LineTable,
+ MCSymbol *LineEndSym, unsigned AddressByteSize);
+ void emitIntOffset(uint64_t Offset, dwarf::DwarfFormat Format,
+ uint64_t &SectionSize);
+ void emitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+ dwarf::DwarfFormat Format, uint64_t &SectionSize);
+ /// @}
/// \defgroup MCObjects MC layer objects constructed by the streamer
/// @{
@@ -210,16 +278,19 @@ private:
/// The output file we stream the linked Dwarf to.
raw_pwrite_stream &OutFile;
- OutputFileType OutFileType = OutputFileType::Object;
+ DWARFLinker::OutputFileType OutFileType = DWARFLinker::OutputFileType::Object;
std::function<StringRef(StringRef Input)> Translator;
uint64_t RangesSectionSize = 0;
+ uint64_t RngListsSectionSize = 0;
uint64_t LocSectionSize = 0;
+ uint64_t LocListsSectionSize = 0;
uint64_t LineSectionSize = 0;
uint64_t FrameSectionSize = 0;
uint64_t DebugInfoSectionSize = 0;
uint64_t MacInfoSectionSize = 0;
uint64_t MacroSectionSize = 0;
+ uint64_t AddrSectionSize = 0;
/// Keep track of emitted CUs and their Unique ID.
struct EmittedUnit {
@@ -234,8 +305,7 @@ private:
const CompileUnit &Unit,
const std::vector<CompileUnit::AccelInfo> &Names);
- messageHandler ErrorHandler = nullptr;
- messageHandler WarningHandler = nullptr;
+ DWARFLinker::messageHandler WarningHandler = nullptr;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/DWARFLinkerParallel/AddressesMap.h b/llvm/include/llvm/DWARFLinkerParallel/AddressesMap.h
new file mode 100644
index 000000000000..5d735abab419
--- /dev/null
+++ b/llvm/include/llvm/DWARFLinkerParallel/AddressesMap.h
@@ -0,0 +1,70 @@
+//===- AddressesMap.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DWARFLINKERPARALLEL_ADDRESSESMAP_H
+#define LLVM_DWARFLINKERPARALLEL_ADDRESSESMAP_H
+
+#include "llvm/ADT/AddressRanges.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include <cstdint>
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// Mapped value in the address map is the offset to apply to the
+/// linked address.
+using RangesTy = AddressRangesMap;
+
+/// AddressesMap represents information about valid addresses used
+/// by debug information. Valid addresses are those which points to
+/// live code sections. i.e. relocations for these addresses point
+/// into sections which would be/are placed into resulting binary.
+class AddressesMap {
+public:
+ virtual ~AddressesMap() = default;
+
+ /// Checks that there are valid relocations in the .debug_info
+ /// section.
+ virtual bool hasValidRelocs() = 0;
+
+ /// Checks that the specified DWARF expression operand \p Op references live
+ /// code section and returns the relocation adjustment value (to get the
+ /// linked address this value might be added to the source expression operand
+ /// address).
+ /// \returns relocation adjustment value or std::nullopt if there is no
+ /// corresponding live address.
+ virtual std::optional<int64_t>
+ getExprOpAddressRelocAdjustment(DWARFUnit &U,
+ const DWARFExpression::Operation &Op,
+ uint64_t StartOffset, uint64_t EndOffset) = 0;
+
+ /// Checks that the specified subprogram \p DIE references the live code
+ /// section and returns the relocation adjustment value (to get the linked
+ /// address this value might be added to the source subprogram address).
+ /// Allowed kinds of input DIE: DW_TAG_subprogram, DW_TAG_label.
+ /// \returns relocation adjustment value or std::nullopt if there is no
+ /// corresponding live address.
+ virtual std::optional<int64_t>
+ getSubprogramRelocAdjustment(const DWARFDie &DIE) = 0;
+
+ /// Apply the valid relocations to the buffer \p Data, taking into
+ /// account that Data is at \p BaseOffset in the .debug_info section.
+ ///
+ /// \returns true whether any reloc has been applied.
+ virtual bool applyValidRelocs(MutableArrayRef<char> Data, uint64_t BaseOffset,
+ bool IsLittleEndian) = 0;
+
+ /// Erases all data.
+ virtual void clear() = 0;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_DWARFLINKERPARALLEL_ADDRESSESMAP_H
diff --git a/llvm/include/llvm/DWARFLinkerParallel/DWARFFile.h b/llvm/include/llvm/DWARFLinkerParallel/DWARFFile.h
new file mode 100644
index 000000000000..c20d59f9771d
--- /dev/null
+++ b/llvm/include/llvm/DWARFLinkerParallel/DWARFFile.h
@@ -0,0 +1,73 @@
+//===- DWARFFile.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DWARFLINKERPARALLEL_DWARFFILE_H
+#define LLVM_DWARFLINKERPARALLEL_DWARFFILE_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DWARFLinkerParallel/AddressesMap.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Support/Endian.h"
+#include <functional>
+#include <memory>
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// This class represents DWARF information for source file
+/// and it's address map.
+///
+/// May be used asynchroniously for reading.
+class DWARFFile {
+public:
+ using UnloadCallbackTy = std::function<void(StringRef FileName)>;
+
+ DWARFFile(StringRef Name, std::unique_ptr<DWARFContext> Dwarf,
+ std::unique_ptr<AddressesMap> Addresses,
+ const std::vector<std::string> &Warnings,
+ UnloadCallbackTy UnloadFunc = nullptr)
+ : FileName(Name), Dwarf(std::move(Dwarf)),
+ Addresses(std::move(Addresses)), Warnings(Warnings),
+ UnloadFunc(UnloadFunc) {
+ if (this->Dwarf)
+ Endianess = this->Dwarf->isLittleEndian() ? support::endianness::little
+ : support::endianness::big;
+ }
+
+ /// Object file name.
+ StringRef FileName;
+
+ /// Source DWARF information.
+ std::unique_ptr<DWARFContext> Dwarf;
+
+ /// Helpful address information(list of valid address ranges, relocations).
+ std::unique_ptr<AddressesMap> Addresses;
+
+ /// Warnings for object file.
+ const std::vector<std::string> &Warnings;
+
+ /// Endiannes of source DWARF information.
+ support::endianness Endianess = support::endianness::little;
+
+ /// Callback to the module keeping object file to unload.
+ UnloadCallbackTy UnloadFunc;
+
+ /// Unloads object file and corresponding AddressesMap and Dwarf Context.
+ void unload() {
+ Addresses.reset();
+ Dwarf.reset();
+
+ if (UnloadFunc)
+ UnloadFunc(FileName);
+ }
+};
+
+} // end namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_DWARFLINKERPARALLEL_DWARFFILE_H
diff --git a/llvm/include/llvm/DWARFLinkerParallel/DWARFLinker.h b/llvm/include/llvm/DWARFLinkerParallel/DWARFLinker.h
index b1169fdd8541..3c725fc4f53a 100644
--- a/llvm/include/llvm/DWARFLinkerParallel/DWARFLinker.h
+++ b/llvm/include/llvm/DWARFLinkerParallel/DWARFLinker.h
@@ -9,8 +9,219 @@
#ifndef LLVM_DWARFLINKERPARALLEL_DWARFLINKER_H
#define LLVM_DWARFLINKERPARALLEL_DWARFLINKER_H
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/DWARFLinkerParallel/DWARFFile.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/TargetParser/Triple.h"
+
+/// ------------------------------------------------------------------
+/// The core of the Dwarf linking logic.
+///
+/// The generation of the dwarf information from the object files will be
+/// driven by the selection of 'root DIEs', which are DIEs that
+/// describe variables or functions that resolves to the corresponding
+/// code section(and thus have entries in the Addresses map). All the debug
+/// information that will be generated(the DIEs, but also the line
+/// tables, ranges, ...) is derived from that set of root DIEs.
+///
+/// The root DIEs are identified because they contain relocations that
+/// points to code section(the low_pc for a function, the location for
+/// a variable). These relocations are gathered as a very first step
+/// when we start processing a object file by AddressesMap.
+///
+/// The overall linking process looks like this:
+///
+/// parrallel_for_each(ObjectFile) {
+/// for_each (Compile Unit) {
+/// 1. Load Clang modules.
+/// }
+///
+/// parrallel_for_each(Compile Unit) {
+/// 1. Load input DWARF for Compile Unit.
+/// 2. Report warnings for Clang modules.
+/// 3. Analyze live DIEs and type names(if ODR deduplication is requested).
+/// 4. Clone DIEs(Generate output DIEs and resulting DWARF tables).
+/// The result is in an OutDebugInfoBytes, which is an ELF file
+/// containing DWARF tables corresponding to the current compile unit.
+/// 5. Cleanup Input and Output DIEs.
+/// }
+///
+/// Deallocate loaded Object file.
+/// }
+///
+/// if (ODR deduplication is requested)
+/// Generate an artificial compilation unit ("Type Table": used to partially
+/// generate DIEs at the clone stage).
+///
+/// for_each (ObjectFile) {
+/// for_each (Compile Unit) {
+/// 1. Set offsets to Compile Units DWARF tables.
+/// 2. Sort offsets/attributes/patches to have a predictable result.
+/// 3. Patch size/offsets fields.
+/// 4. Generate index tables.
+/// 5. Move DWARF tables of compile units into the resulting file.
+/// }
+/// }
+///
+/// Every compile unit is processed separately, visited only once
+/// (except case inter-CU references exist), and used data is freed
+/// after the compile unit is processed. The resulting file is glued together
+/// from the generated debug tables which correspond to separate compile units.
+///
+/// Handling inter-CU references: inter-CU references are hard to process
+/// using only one pass. f.e. if CU1 references CU100 and CU100 references
+/// CU1, we could not finish handling of CU1 until we finished CU100.
+/// Thus we either need to load all CUs into the memory, either load CUs several
+/// times. This implementation loads inter-connected CU into memory at the first
+/// pass and processes them at the second pass.
+///
+/// ODR deduplication: Artificial compilation unit will be constructed to keep
+/// type dies. All types are moved into that compilation unit. Type's references
+/// are patched so that they point to the corresponding types from artificial
+/// compilation unit. All partial type definitions would be merged into single
+/// type definition.
+///
+
namespace llvm {
-namespace dwarflinker_parallel {} // end namespace dwarflinker_parallel
+namespace dwarflinker_parallel {
+
+/// ExtraDwarfEmitter allows adding extra data to the DWARFLinker output.
+/// The finish() method should be called after all extra data are emitted.
+class ExtraDwarfEmitter {
+public:
+ virtual ~ExtraDwarfEmitter() = default;
+
+ /// Dump the file to the disk.
+ virtual void finish() = 0;
+
+ /// Emit section named SecName with data SecData.
+ virtual void emitSectionContents(StringRef SecData, StringRef SecName) = 0;
+
+ /// Emit temporarily symbol named \p SymName inside section \p SecName.
+ virtual MCSymbol *emitTempSym(StringRef SecName, StringRef SymName) = 0;
+
+ /// Emit the swift_ast section stored in \p Buffer.
+ virtual void emitSwiftAST(StringRef Buffer) = 0;
+
+ /// Emit the swift reflection section stored in \p Buffer.
+ virtual void emitSwiftReflectionSection(
+ llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind,
+ StringRef Buffer, uint32_t Alignment, uint32_t Size) = 0;
+
+ /// Returns underlying AsmPrinter.
+ virtual AsmPrinter &getAsmPrinter() const = 0;
+};
+
+class DWARFLinker {
+public:
+ /// Type of output file.
+ enum class OutputFileType {
+ Object,
+ Assembly,
+ };
+
+ /// The kind of accelerator tables we should emit.
+ enum class AccelTableKind : uint8_t {
+ Apple, ///< .apple_names, .apple_namespaces, .apple_types, .apple_objc.
+ Pub, ///< .debug_pubnames, .debug_pubtypes
+ DebugNames ///< .debug_names.
+ };
+
+ using MessageHandlerTy = std::function<void(
+ const Twine &Warning, StringRef Context, const DWARFDie *DIE)>;
+ using ObjFileLoaderTy = std::function<ErrorOr<DWARFFile &>(
+ StringRef ContainerName, StringRef Path)>;
+ using InputVerificationHandlerTy = std::function<void(const DWARFFile &File)>;
+ using ObjectPrefixMapTy = std::map<std::string, std::string>;
+ using CompileUnitHandlerTy = function_ref<void(const DWARFUnit &Unit)>;
+ using TranslatorFuncTy = std::function<StringRef(StringRef)>;
+ using SwiftInterfacesMapTy = std::map<std::string, std::string>;
+
+ virtual ~DWARFLinker() = default;
+
+ /// Creates dwarf linker instance.
+ static std::unique_ptr<DWARFLinker>
+ createLinker(MessageHandlerTy ErrorHandler, MessageHandlerTy WarningHandler,
+ TranslatorFuncTy StringsTranslator = nullptr);
+
+ /// Creates emitter for output dwarf.
+ virtual Error createEmitter(const Triple &TheTriple, OutputFileType FileType,
+ raw_pwrite_stream &OutFile) = 0;
+
+ /// Returns previously created dwarf emitter. May be nullptr.
+ virtual ExtraDwarfEmitter *getEmitter() = 0;
+
+ /// Add object file to be linked. Pre-load compile unit die. Call
+ /// \p OnCUDieLoaded for each compile unit die. If specified \p File
+ /// has reference to the Clang module then such module would be
+ /// pre-loaded by \p Loader for !Update case.
+ ///
+ /// \pre NoODR, Update options should be set before call to addObjectFile.
+ virtual void addObjectFile(
+ DWARFFile &File, ObjFileLoaderTy Loader = nullptr,
+ CompileUnitHandlerTy OnCUDieLoaded = [](const DWARFUnit &) {}) = 0;
+
+ /// Link debug info for added files.
+ virtual Error link() = 0;
+
+ /// \defgroup Methods setting various linking options:
+ ///
+ /// @{
+
+ /// Allows to generate log of linking process to the standard output.
+ virtual void setVerbosity(bool Verbose) = 0;
+
+ /// Print statistics to standard output.
+ virtual void setStatistics(bool Statistics) = 0;
+
+ /// Verify the input DWARF.
+ virtual void setVerifyInputDWARF(bool Verify) = 0;
+
+ /// Do not unique types according to ODR.
+ virtual void setNoODR(bool NoODR) = 0;
+
+ /// Update index tables only(do not modify rest of DWARF).
+ virtual void setUpdateIndexTablesOnly(bool UpdateIndexTablesOnly) = 0;
+
+ /// Allow generating valid, but non-deterministic output.
+ virtual void
+ setAllowNonDeterministicOutput(bool AllowNonDeterministicOutput) = 0;
+
+ /// Set to keep the enclosing function for a static variable.
+ virtual void setKeepFunctionForStatic(bool KeepFunctionForStatic) = 0;
+
+ /// Use specified number of threads for parallel files linking.
+ virtual void setNumThreads(unsigned NumThreads) = 0;
+
+ /// Add kind of accelerator tables to be generated.
+ virtual void addAccelTableKind(AccelTableKind Kind) = 0;
+
+ /// Set prepend path for clang modules.
+ virtual void setPrependPath(const std::string &Ppath) = 0;
+
+ /// Set estimated objects files amount, for preliminary data allocation.
+ virtual void setEstimatedObjfilesAmount(unsigned ObjFilesNum) = 0;
+
+ /// Set verification handler which would be used to report verification
+ /// errors.
+ virtual void
+ setInputVerificationHandler(InputVerificationHandlerTy Handler) = 0;
+
+ /// Set map for Swift interfaces.
+ virtual void setSwiftInterfacesMap(SwiftInterfacesMapTy *Map) = 0;
+
+ /// Set prefix map for objects.
+ virtual void setObjectPrefixMap(ObjectPrefixMapTy *Map) = 0;
+
+ /// Set target DWARF version.
+ virtual Error setTargetDWARFVersion(uint16_t TargetDWARFVersion) = 0;
+ /// @}
+};
+
+} // end namespace dwarflinker_parallel
} // end namespace llvm
#endif // LLVM_DWARFLINKERPARALLEL_DWARFLINKER_H
diff --git a/llvm/include/llvm/DWARFLinkerParallel/StringPool.h b/llvm/include/llvm/DWARFLinkerParallel/StringPool.h
new file mode 100644
index 000000000000..44383ed6c7dd
--- /dev/null
+++ b/llvm/include/llvm/DWARFLinkerParallel/StringPool.h
@@ -0,0 +1,74 @@
+//===- StringPool.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DWARFLINKERPARALLEL_STRINGPOOL_H
+#define LLVM_DWARFLINKERPARALLEL_STRINGPOOL_H
+
+#include "llvm/ADT/ConcurrentHashtable.h"
+#include "llvm/CodeGen/DwarfStringPoolEntry.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/PerThreadBumpPtrAllocator.h"
+#include <string>
+#include <string_view>
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// StringEntry keeps data of the string: the length, external offset
+/// and a string body which is placed right after StringEntry.
+using StringEntry = StringMapEntry<DwarfStringPoolEntry *>;
+
+class StringPoolEntryInfo {
+public:
+ /// \returns Hash value for the specified \p Key.
+ static inline uint64_t getHashValue(const StringRef &Key) {
+ return xxh3_64bits(Key);
+ }
+
+ /// \returns true if both \p LHS and \p RHS are equal.
+ static inline bool isEqual(const StringRef &LHS, const StringRef &RHS) {
+ return LHS == RHS;
+ }
+
+ /// \returns key for the specified \p KeyData.
+ static inline StringRef getKey(const StringEntry &KeyData) {
+ return KeyData.getKey();
+ }
+
+ /// \returns newly created object of KeyDataTy type.
+ static inline StringEntry *
+ create(const StringRef &Key, parallel::PerThreadBumpPtrAllocator &Allocator) {
+ return StringEntry::create(Key, Allocator);
+ }
+};
+
+class StringPool
+ : public ConcurrentHashTableByPtr<StringRef, StringEntry,
+ parallel::PerThreadBumpPtrAllocator,
+ StringPoolEntryInfo> {
+public:
+ StringPool()
+ : ConcurrentHashTableByPtr<StringRef, StringEntry,
+ parallel::PerThreadBumpPtrAllocator,
+ StringPoolEntryInfo>(Allocator) {}
+
+ StringPool(size_t InitialSize)
+ : ConcurrentHashTableByPtr<StringRef, StringEntry,
+ parallel::PerThreadBumpPtrAllocator,
+ StringPoolEntryInfo>(Allocator, InitialSize) {}
+
+ parallel::PerThreadBumpPtrAllocator &getAllocatorRef() { return Allocator; }
+
+private:
+ parallel::PerThreadBumpPtrAllocator Allocator;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_DWARFLINKERPARALLEL_STRINGPOOL_H
diff --git a/llvm/include/llvm/DWARFLinkerParallel/StringTable.h b/llvm/include/llvm/DWARFLinkerParallel/StringTable.h
new file mode 100644
index 000000000000..4f8aece521d8
--- /dev/null
+++ b/llvm/include/llvm/DWARFLinkerParallel/StringTable.h
@@ -0,0 +1,88 @@
+//===- StringTable.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DWARFLINKERPARALLEL_STRINGTABLE_H
+#define LLVM_DWARFLINKERPARALLEL_STRINGTABLE_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/DWARFLinkerParallel/StringPool.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+using StringsVector = SmallVector<StringEntry *>;
+
+/// This class prepares strings for emission into .debug_str table:
+/// translates string if necessary, assigns index and offset, keeps in order.
+class StringTable {
+public:
+ StringTable(StringPool &Strings,
+ std::function<StringRef(StringRef)> StringsTranslator)
+ : Strings(Strings), StringsTranslator(StringsTranslator) {}
+ ~StringTable() {}
+
+ /// Add string to the vector of strings which should be emitted.
+ /// Translate input string if neccessary, assign index and offset.
+ /// \returns updated string entry.
+ StringEntry *add(StringEntry *String) {
+ // Translate string if necessary.
+ if (StringsTranslator)
+ String = Strings.insert(StringsTranslator(String->first())).first;
+
+ // Store String for emission and assign index and offset.
+ if (String->getValue() == nullptr) {
+ DwarfStringPoolEntry *NewEntry =
+ Strings.getAllocatorRef().Allocate<DwarfStringPoolEntry>();
+
+ NewEntry->Symbol = nullptr;
+ NewEntry->Index = StringEntriesForEmission.size();
+
+ if (StringEntriesForEmission.empty())
+ NewEntry->Offset = 0;
+ else {
+ StringEntry *PrevString = StringEntriesForEmission.back();
+ NewEntry->Offset =
+ PrevString->getValue()->Offset + PrevString->getKeyLength() + 1;
+ }
+
+ String->getValue() = NewEntry;
+ StringEntriesForEmission.push_back(String);
+ }
+
+ return String;
+ }
+
+ /// Erase contents of StringsForEmission.
+ void clear() { StringEntriesForEmission.clear(); }
+
+ /// Enumerate all strings in sequential order and call \p Handler for each
+ /// string.
+ void forEach(function_ref<void(DwarfStringPoolEntryRef)> Handler) const {
+ for (const StringEntry *Entry : StringEntriesForEmission)
+ Handler(*Entry);
+ }
+
+ std::function<StringRef(StringRef)> getTranslator() {
+ return StringsTranslator;
+ }
+
+protected:
+ /// List of strings for emission.
+ StringsVector StringEntriesForEmission;
+
+ /// String pool for the translated strings.
+ StringPool &Strings;
+
+ /// Translator for the strings.
+ std::function<StringRef(StringRef)> StringsTranslator;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_DWARFLINKERPARALLEL_STRINGTABLE_H
diff --git a/llvm/include/llvm/DWP/DWP.h b/llvm/include/llvm/DWP/DWP.h
index 543354d86bbb..e33133d38ae0 100644
--- a/llvm/include/llvm/DWP/DWP.h
+++ b/llvm/include/llvm/DWP/DWP.h
@@ -60,7 +60,8 @@ struct CompileUnitIdentifiers {
const char *DWOName = "";
};
-Error write(MCStreamer &Out, ArrayRef<std::string> Inputs);
+Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
+ bool ContinueOnCuIndexOverflow);
unsigned getContributionIndex(DWARFSectionKind Kind, uint32_t IndexVersion);
diff --git a/llvm/lib/Target/BPF/BTF.def b/llvm/include/llvm/DebugInfo/BTF/BTF.def
index 1de0e51b4757..1de0e51b4757 100644
--- a/llvm/lib/Target/BPF/BTF.def
+++ b/llvm/include/llvm/DebugInfo/BTF/BTF.def
diff --git a/llvm/lib/Target/BPF/BTF.h b/llvm/include/llvm/DebugInfo/BTF/BTF.h
index 89852be4a8c8..c1c77cd447d6 100644
--- a/llvm/lib/Target/BPF/BTF.h
+++ b/llvm/include/llvm/DebugInfo/BTF/BTF.h
@@ -152,9 +152,9 @@ struct BTFEnum {
/// The exact number of BTFEnum64 is stored in the vlen (of the
/// info in "struct CommonType").
struct BTFEnum64 {
- uint32_t NameOff; ///< Enum name offset in the string table
- uint32_t Val_Lo32; ///< Enum member lo32 value
- uint32_t Val_Hi32; ///< Enum member hi32 value
+ uint32_t NameOff; ///< Enum name offset in the string table
+ uint32_t Val_Lo32; ///< Enum member lo32 value
+ uint32_t Val_Hi32; ///< Enum member hi32 value
};
/// BTF_KIND_ARRAY is followed by one "struct BTFArray".
@@ -218,10 +218,10 @@ struct ExtHeader {
uint8_t Flags;
uint32_t HdrLen;
- uint32_t FuncInfoOff; ///< Offset of func info section
- uint32_t FuncInfoLen; ///< Length of func info section
- uint32_t LineInfoOff; ///< Offset of line info section
- uint32_t LineInfoLen; ///< Length of line info section
+ uint32_t FuncInfoOff; ///< Offset of func info section
+ uint32_t FuncInfoLen; ///< Length of func info section
+ uint32_t LineInfoOff; ///< Offset of line info section
+ uint32_t LineInfoLen; ///< Length of line info section
uint32_t FieldRelocOff; ///< Offset of offset reloc section
uint32_t FieldRelocLen; ///< Length of offset reloc section
};
@@ -245,6 +245,8 @@ struct BPFLineInfo {
uint32_t LineOff; ///< Line index in the .BTF string table
uint32_t LineCol; ///< Line num: line_col >> 10,
/// col num: line_col & 0x3ff
+ uint32_t getLine() const { return LineCol >> 10; }
+ uint32_t getCol() const { return LineCol & 0x3ff; }
};
/// Specifying line info's in one section.
@@ -263,7 +265,7 @@ struct BPFFieldReloc {
/// Specifying offset relocation's in one section.
struct SecFieldReloc {
- uint32_t SecNameOff; ///< Section name index in the .BTF string table
+ uint32_t SecNameOff; ///< Section name index in the .BTF string table
uint32_t NumFieldReloc; ///< Number of offset reloc's in this section
};
diff --git a/llvm/include/llvm/DebugInfo/BTF/BTFContext.h b/llvm/include/llvm/DebugInfo/BTF/BTFContext.h
new file mode 100644
index 000000000000..c16bee613322
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/BTF/BTFContext.h
@@ -0,0 +1,58 @@
+//===- BTFContext.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// BTFContext interface is used by llvm-objdump tool to print source
+// code alongside disassembly.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_BTF_BTFCONTEXT_H
+#define LLVM_DEBUGINFO_BTF_BTFCONTEXT_H
+
+#include "llvm/DebugInfo/BTF/BTFParser.h"
+#include "llvm/DebugInfo/DIContext.h"
+
+namespace llvm {
+
+class BTFContext final : public DIContext {
+ BTFParser BTF;
+
+public:
+ BTFContext() : DIContext(CK_BTF) {}
+
+ void dump(raw_ostream &OS, DIDumpOptions DumpOpts) override {
+ // This function is called from objdump when --dwarf=? option is set.
+ // BTF is no DWARF, so ignore this operation for now.
+ }
+
+ DILineInfo getLineInfoForAddress(
+ object::SectionedAddress Address,
+ DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
+
+ DILineInfo
+ getLineInfoForDataAddress(object::SectionedAddress Address) override;
+
+ DILineInfoTable getLineInfoForAddressRange(
+ object::SectionedAddress Address, uint64_t Size,
+ DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
+
+ DIInliningInfo getInliningInfoForAddress(
+ object::SectionedAddress Address,
+ DILineInfoSpecifier Specifier = DILineInfoSpecifier()) override;
+
+ std::vector<DILocal>
+ getLocalsForAddress(object::SectionedAddress Address) override;
+
+ static std::unique_ptr<BTFContext> create(
+ const object::ObjectFile &Obj,
+ std::function<void(Error)> ErrorHandler = WithColor::defaultErrorHandler);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_BTF_BTFCONTEXT_H
diff --git a/llvm/include/llvm/DebugInfo/BTF/BTFParser.h b/llvm/include/llvm/DebugInfo/BTF/BTFParser.h
new file mode 100644
index 000000000000..33d0f32c7c55
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/BTF/BTFParser.h
@@ -0,0 +1,81 @@
+//===- BTFParser.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// BTFParser reads .BTF and .BTF.ext ELF sections generated by LLVM
+// BPF backend and provides introspection for the stored information.
+// Currently the following information is accessible:
+// - string table;
+// - instruction offset to line information mapping.
+//
+// See llvm/DebugInfo/BTF/BTF.h for some details about binary format
+// and links to Linux Kernel documentation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_BTF_BTFPARSER_H
+#define LLVM_DEBUGINFO_BTF_BTFPARSER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/DebugInfo/BTF/BTF.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/DataExtractor.h"
+
+namespace llvm {
+using object::ObjectFile;
+using object::SectionedAddress;
+using object::SectionRef;
+
+class BTFParser {
+ using BTFLinesVector = SmallVector<BTF::BPFLineInfo, 0>;
+
+ // In BTF strings are stored as a continuous memory region with
+ // individual strings separated by 0 bytes. Strings are identified
+ // by an offset in such region.
+ // The `StringsTable` points to this region in the parsed ObjectFile.
+ StringRef StringsTable;
+
+ // Maps ELF section number to instruction line number information.
+ // Each BTFLinesVector is sorted by `InsnOffset` to allow fast lookups.
+ DenseMap<uint64_t, BTFLinesVector> SectionLines;
+
+ struct ParseContext;
+ Error parseBTF(ParseContext &Ctx, SectionRef BTF);
+ Error parseBTFExt(ParseContext &Ctx, SectionRef BTFExt);
+ Error parseLineInfo(ParseContext &Ctx, DataExtractor &Extractor,
+ uint64_t LineInfoStart, uint64_t LineInfoEnd);
+
+public:
+ // Looks-up a string in the .BTF section's string table.
+ // Offset is relative to string table start.
+ StringRef findString(uint32_t Offset) const;
+
+ // Search for line information for a specific address,
+ // address match is exact (contrary to DWARFContext).
+ // Return nullptr if no information found.
+ // If information is present, return a pointer to object
+ // owned by this class.
+ const BTF::BPFLineInfo *findLineInfo(SectionedAddress Address) const;
+
+ // Fills instance of BTFParser with information stored in .BTF and
+ // .BTF.ext sections of the `Obj`. If this instance was already
+ // filled, old data is discarded.
+ //
+ // If information cannot be parsed:
+ // - return an error describing the failure;
+ // - state of the BTFParser might be incomplete but is not invalid,
+ // queries might be run against it, but some (or all) information
+ // might be unavailable;
+ Error parse(const ObjectFile &Obj);
+
+ // Return true if `Obj` has .BTF and .BTF.ext sections.
+ static bool hasBTFSections(const ObjectFile &Obj);
+};
+
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_BTF_BTFPARSER_H
diff --git a/llvm/include/llvm/DebugInfo/CodeView/CodeView.h b/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
index 010a82dd0e23..a9ad99a1d0a8 100644
--- a/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
+++ b/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
@@ -138,8 +138,8 @@ enum class CPUType : uint16_t {
D3D11_Shader = 0x100,
};
-/// These values correspond to the CV_CFL_LANG enumeration, and are documented
-/// here: https://msdn.microsoft.com/en-us/library/bw3aekw6.aspx
+/// These values correspond to the CV_CFL_LANG enumeration in the Microsoft
+/// Debug Interface Access SDK
enum SourceLanguage : uint8_t {
C = 0x00,
Cpp = 0x01,
@@ -158,6 +158,8 @@ enum SourceLanguage : uint8_t {
JScript = 0x0e,
MSIL = 0x0f,
HLSL = 0x10,
+ ObjC = 0x11,
+ ObjCpp = 0x12,
Rust = 0x15,
diff --git a/llvm/include/llvm/DebugInfo/DIContext.h b/llvm/include/llvm/DebugInfo/DIContext.h
index 6866a6614b56..9ad27033ec11 100644
--- a/llvm/include/llvm/DebugInfo/DIContext.h
+++ b/llvm/include/llvm/DebugInfo/DIContext.h
@@ -37,7 +37,11 @@ struct DILineInfo {
std::string FileName;
std::string FunctionName;
std::string StartFileName;
+ // Full source corresponding to `FileName`
std::optional<StringRef> Source;
+ // Source code for this particular line
+ // (in case if `Source` is not available)
+ std::optional<StringRef> LineSource;
uint32_t Line = 0;
uint32_t Column = 0;
uint32_t StartLine = 0;
@@ -228,7 +232,7 @@ struct DIDumpOptions {
class DIContext {
public:
- enum DIContextKind { CK_DWARF, CK_PDB };
+ enum DIContextKind { CK_DWARF, CK_PDB, CK_BTF };
DIContext(DIContextKind K) : Kind(K) {}
virtual ~DIContext() = default;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
index cff13dcf2955..02b402e86d23 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
@@ -19,12 +20,12 @@
namespace llvm {
class DataExtractor;
-class DWARFFormValue;
class DWARFUnit;
class raw_ostream;
class DWARFAbbreviationDeclaration {
public:
+ enum class ExtractState { Complete, MoreItems };
struct AttributeSpec {
AttributeSpec(dwarf::Attribute A, dwarf::Form F, int64_t Value)
: Attr(A), Form(F), Value(Value) {
@@ -39,6 +40,13 @@ public:
this->ByteSize.ByteSize = *ByteSize;
}
+ DWARFFormValue getFormValue() const {
+ if (Form == dwarf::DW_FORM_implicit_const)
+ return DWARFFormValue::createFromSValue(Form, getImplicitConstValue());
+
+ return DWARFFormValue(Form);
+ }
+
dwarf::Attribute Attr;
dwarf::Form Form;
@@ -165,7 +173,7 @@ public:
getAttributeValueFromOffset(uint32_t AttrIndex, uint64_t Offset,
const DWARFUnit &U) const;
- bool extract(DataExtractor Data, uint64_t* OffsetPtr);
+ llvm::Expected<ExtractState> extract(DataExtractor Data, uint64_t *OffsetPtr);
void dump(raw_ostream &OS) const;
// Return an optional byte size of all attribute data in this abbreviation
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
index 278a1b871a97..ce5d2f6c1457 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
@@ -10,6 +10,7 @@
#define LLVM_DEBUGINFO_DWARF_DWARFACCELERATORTABLE_H
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
@@ -103,8 +104,10 @@ class AppleAcceleratorTable : public DWARFAcceleratorTable {
extractOffset(std::optional<DWARFFormValue> Value) const;
};
- struct Header Hdr;
- struct HeaderData HdrData;
+ Header Hdr;
+ HeaderData HdrData;
+ dwarf::FormParams FormParams;
+ uint32_t HashDataEntryLength;
bool IsValid = false;
/// Returns true if we should continue scanning for entries or false if we've
@@ -112,15 +115,90 @@ class AppleAcceleratorTable : public DWARFAcceleratorTable {
bool dumpName(ScopedPrinter &W, SmallVectorImpl<DWARFFormValue> &AtomForms,
uint64_t *DataOffset) const;
+ /// Reads an uint32_t from the accelerator table at Offset, which is
+ /// incremented by the number of bytes read.
+ std::optional<uint32_t> readU32FromAccel(uint64_t &Offset,
+ bool UseRelocation = false) const;
+
+ /// Reads a StringRef from the string table at Offset.
+ std::optional<StringRef>
+ readStringFromStrSection(uint64_t StringSectionOffset) const;
+
+ /// Return the offset into the section where the Buckets begin.
+ uint64_t getBucketBase() const { return sizeof(Hdr) + Hdr.HeaderDataLength; }
+
+ /// Return the offset into the section where the I-th bucket is.
+ uint64_t getIthBucketBase(uint32_t I) const {
+ return getBucketBase() + I * 4;
+ }
+
+ /// Return the offset into the section where the hash list begins.
+ uint64_t getHashBase() const { return getBucketBase() + getNumBuckets() * 4; }
+
+ /// Return the offset into the section where the I-th hash is.
+ uint64_t getIthHashBase(uint32_t I) const { return getHashBase() + I * 4; }
+
+ /// Return the offset into the section where the offset list begins.
+ uint64_t getOffsetBase() const { return getHashBase() + getNumHashes() * 4; }
+
+ /// Return the offset into the section where the table entries begin.
+ uint64_t getEntriesBase() const {
+ return getOffsetBase() + getNumHashes() * 4;
+ }
+
+ /// Return the offset into the section where the I-th offset is.
+ uint64_t getIthOffsetBase(uint32_t I) const {
+ return getOffsetBase() + I * 4;
+ }
+
+ /// Returns the index of the bucket where a hypothetical Hash would be.
+ uint32_t hashToBucketIdx(uint32_t Hash) const {
+ return Hash % getNumBuckets();
+ }
+
+ /// Returns true iff a hypothetical Hash would be assigned to the BucketIdx-th
+ /// bucket.
+ bool wouldHashBeInBucket(uint32_t Hash, uint32_t BucketIdx) const {
+ return hashToBucketIdx(Hash) == BucketIdx;
+ }
+
+ /// Reads the contents of the I-th bucket, that is, the index in the hash list
+ /// where the hashes corresponding to this bucket begin.
+ std::optional<uint32_t> readIthBucket(uint32_t I) const {
+ uint64_t Offset = getIthBucketBase(I);
+ return readU32FromAccel(Offset);
+ }
+
+ /// Reads the I-th hash in the hash list.
+ std::optional<uint32_t> readIthHash(uint32_t I) const {
+ uint64_t Offset = getIthHashBase(I);
+ return readU32FromAccel(Offset);
+ }
+
+ /// Reads the I-th offset in the offset list.
+ std::optional<uint32_t> readIthOffset(uint32_t I) const {
+ uint64_t Offset = getIthOffsetBase(I);
+ return readU32FromAccel(Offset);
+ }
+
+ /// Reads a string offset from the accelerator table at Offset, which is
+ /// incremented by the number of bytes read.
+ std::optional<uint32_t> readStringOffsetAt(uint64_t &Offset) const {
+ return readU32FromAccel(Offset, /*UseRelocation*/ true);
+ }
+
+ /// Scans through all Hashes in the BucketIdx-th bucket, attempting to find
+ /// HashToFind. If it is found, its index in the list of hashes is returned.
+ std::optional<uint32_t> idxOfHashInBucket(uint32_t HashToFind,
+ uint32_t BucketIdx) const;
+
public:
/// Apple-specific implementation of an Accelerator Entry.
class Entry final : public DWARFAcceleratorTable::Entry {
- const HeaderData *HdrData = nullptr;
-
- Entry(const HeaderData &Data);
- Entry() = default;
+ const AppleAcceleratorTable &Table;
- void extract(const AppleAcceleratorTable &AccelTable, uint64_t *Offset);
+ Entry(const AppleAcceleratorTable &Table);
+ void extract(uint64_t *Offset);
public:
std::optional<uint64_t> getCUOffset() const override;
@@ -141,40 +219,82 @@ public:
friend class ValueIterator;
};
- class ValueIterator {
- const AppleAcceleratorTable *AccelTable = nullptr;
- Entry Current; ///< The current entry.
- uint64_t DataOffset = 0; ///< Offset into the section.
- unsigned Data = 0; ///< Current data entry.
- unsigned NumData = 0; ///< Number of data entries.
-
- /// Advance the iterator.
- void Next();
+ /// An iterator for Entries all having the same string as key.
+ class SameNameIterator
+ : public iterator_facade_base<SameNameIterator, std::forward_iterator_tag,
+ Entry> {
+ Entry Current;
+ uint64_t Offset = 0;
public:
- using iterator_category = std::input_iterator_tag;
- using value_type = Entry;
- using difference_type = std::ptrdiff_t;
- using pointer = value_type *;
- using reference = value_type &;
-
/// Construct a new iterator for the entries at \p DataOffset.
- ValueIterator(const AppleAcceleratorTable &AccelTable, uint64_t DataOffset);
- /// End marker.
- ValueIterator() = default;
+ SameNameIterator(const AppleAcceleratorTable &AccelTable,
+ uint64_t DataOffset);
- const Entry &operator*() const { return Current; }
- ValueIterator &operator++() { Next(); return *this; }
- ValueIterator operator++(int) {
- ValueIterator I = *this;
- Next();
- return I;
+ const Entry &operator*() {
+ uint64_t OffsetCopy = Offset;
+ Current.extract(&OffsetCopy);
+ return Current;
}
- friend bool operator==(const ValueIterator &A, const ValueIterator &B) {
- return A.NumData == B.NumData && A.DataOffset == B.DataOffset;
+ SameNameIterator &operator++() {
+ Offset += Current.Table.getHashDataEntryLength();
+ return *this;
}
- friend bool operator!=(const ValueIterator &A, const ValueIterator &B) {
- return !(A == B);
+ friend bool operator==(const SameNameIterator &A,
+ const SameNameIterator &B) {
+ return A.Offset == B.Offset;
+ }
+ };
+
+ struct EntryWithName {
+ EntryWithName(const AppleAcceleratorTable &Table)
+ : BaseEntry(Table), StrOffset(0) {}
+
+ std::optional<StringRef> readName() const {
+ return BaseEntry.Table.readStringFromStrSection(StrOffset);
+ }
+
+ Entry BaseEntry;
+ uint32_t StrOffset;
+ };
+
+ /// An iterator for all entries in the table.
+ class Iterator
+ : public iterator_facade_base<Iterator, std::forward_iterator_tag,
+ EntryWithName> {
+ constexpr static auto EndMarker = std::numeric_limits<uint64_t>::max();
+
+ EntryWithName Current;
+ uint64_t Offset = EndMarker;
+ uint32_t NumEntriesToCome = 0;
+
+ void setToEnd() { Offset = EndMarker; }
+ bool isEnd() const { return Offset == EndMarker; }
+ const AppleAcceleratorTable &getTable() const {
+ return Current.BaseEntry.Table;
+ }
+
+ /// Reads the next Entry in the table, populating `Current`.
+ /// If not possible (e.g. end of the section), becomes the end iterator.
+ void prepareNextEntryOrEnd();
+
+ /// Reads the next string pointer and the entry count for that string,
+ /// populating `NumEntriesToCome`.
+ /// If not possible (e.g. end of the section), becomes the end iterator.
+ /// Assumes `Offset` points to a string reference.
+ void prepareNextStringOrEnd();
+
+ public:
+ Iterator(const AppleAcceleratorTable &Table, bool SetEnd = false);
+
+ Iterator &operator++() {
+ prepareNextEntryOrEnd();
+ return *this;
+ }
+ bool operator==(const Iterator &It) const { return Offset == It.Offset; }
+ const EntryWithName &operator*() const {
+ assert(!isEnd() && "dereferencing end iterator");
+ return Current;
}
};
@@ -183,14 +303,24 @@ public:
: DWARFAcceleratorTable(AccelSection, StringSection) {}
Error extract() override;
- uint32_t getNumBuckets();
- uint32_t getNumHashes();
- uint32_t getSizeHdr();
- uint32_t getHeaderDataLength();
+ uint32_t getNumBuckets() const;
+ uint32_t getNumHashes() const;
+ uint32_t getSizeHdr() const;
+ uint32_t getHeaderDataLength() const;
+
+ /// Returns the size of one HashData entry.
+ uint32_t getHashDataEntryLength() const { return HashDataEntryLength; }
/// Return the Atom description, which can be used to interpret the raw values
/// of the Accelerator Entries in this table.
ArrayRef<std::pair<HeaderData::AtomType, HeaderData::Form>> getAtomsDesc();
+
+ /// Returns true iff `AtomTy` is one of the atoms available in Entries of this
+ /// table.
+ bool containsAtomType(HeaderData::AtomType AtomTy) const {
+ return is_contained(make_first_range(HdrData.Atoms), AtomTy);
+ }
+
bool validateForms();
/// Return information related to the DWARF DIE we're looking for when
@@ -205,7 +335,12 @@ public:
void dump(raw_ostream &OS) const override;
/// Look up all entries in the accelerator table matching \c Key.
- iterator_range<ValueIterator> equal_range(StringRef Key) const;
+ iterator_range<SameNameIterator> equal_range(StringRef Key) const;
+
+ /// Lookup all entries in the accelerator table.
+ auto entries() const {
+ return make_range(Iterator(*this), Iterator(*this, /*SetEnd*/ true));
+ }
};
/// .debug_names section consists of one or more units. Each unit starts with a
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 4c464418a3e0..27720ac746ee 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -10,6 +10,7 @@
#define LLVM_DEBUGINFO_DWARF_DWARFCONTEXT_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/DebugInfo/DIContext.h"
@@ -21,7 +22,7 @@
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/Host.h"
+#include "llvm/TargetParser/Host.h"
#include <cstdint>
#include <memory>
@@ -445,7 +446,16 @@ public:
/// address.
/// TODO: change input parameter from "uint64_t Address"
/// into "SectionedAddress Address"
- DWARFCompileUnit *getCompileUnitForAddress(uint64_t Address);
+ DWARFCompileUnit *getCompileUnitForCodeAddress(uint64_t Address);
+
+ /// Return the compile unit which contains data with the provided address.
+ /// Note: This is more expensive than `getCompileUnitForAddress`, as if
+ /// `Address` isn't found in the CU ranges (which is cheap), then it falls
+ /// back to an expensive O(n) walk of all CU's looking for data that spans the
+ /// address.
+ /// TODO: change input parameter from "uint64_t Address" into
+ /// "SectionedAddress Address"
+ DWARFCompileUnit *getCompileUnitForDataAddress(uint64_t Address);
/// Returns whether CU/TU should be populated manually. TU Index populated
/// manually only for DWARF5.
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
index 52a88f2c390a..8e4aa3aa61e9 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
@@ -34,7 +34,7 @@ public:
uint64_t getOffset() const { return Offset; }
void dump(raw_ostream &OS) const;
- bool extract(DataExtractor Data, uint64_t *OffsetPtr);
+ Error extract(DataExtractor Data, uint64_t *OffsetPtr);
const DWARFAbbreviationDeclaration *
getAbbreviationDeclaration(uint32_t AbbrCode) const;
@@ -49,6 +49,8 @@ public:
std::string getCodeRange() const;
+ uint32_t getFirstAbbrCode() const { return FirstAbbrCode; }
+
private:
void clear();
};
@@ -62,26 +64,22 @@ class DWARFDebugAbbrev {
mutable std::optional<DataExtractor> Data;
public:
- DWARFDebugAbbrev();
+ DWARFDebugAbbrev(DataExtractor Data);
- const DWARFAbbreviationDeclarationSet *
+ Expected<const DWARFAbbreviationDeclarationSet *>
getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const;
void dump(raw_ostream &OS) const;
void parse() const;
- void extract(DataExtractor Data);
DWARFAbbreviationDeclarationSetMap::const_iterator begin() const {
- parse();
+ assert(!Data && "Must call parse before iterating over DWARFDebugAbbrev");
return AbbrDeclSets.begin();
}
DWARFAbbreviationDeclarationSetMap::const_iterator end() const {
return AbbrDeclSets.end();
}
-
-private:
- void clear();
};
} // end namespace llvm
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
index 93c2c9110d39..bc35f2ab988e 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugFrame.h
@@ -11,10 +11,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator.h"
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/Support/Error.h"
+#include "llvm/TargetParser/Triple.h"
#include <map>
#include <memory>
#include <vector>
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index de9902ae2ebc..ce3bae6a1760 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -167,6 +167,10 @@ public:
/// An unsigned integer whose value encodes the applicable instruction set
/// architecture for the current instruction.
uint8_t Isa;
+ /// An unsigned integer representing the index of an operation within a
+ /// VLIW instruction. The index of the first operation is 0.
+ /// For non-VLIW architectures, this register will always be 0.
+ uint8_t OpIndex;
/// A boolean indicating that the current instruction is the beginning of a
/// statement.
uint8_t IsStmt : 1,
@@ -355,6 +359,7 @@ public:
private:
DWARFUnit *prepareToParse(uint64_t Offset);
void moveToNextTable(uint64_t OldOffset, const Prologue &P);
+ bool hasValidVersion(uint64_t Offset);
LineToUnitMap LineToUnit;
@@ -372,29 +377,37 @@ private:
void resetRowAndSequence();
void appendRowToMatrix();
- /// Advance the address by the \p OperationAdvance value. \returns the
- /// amount advanced by.
- uint64_t advanceAddr(uint64_t OperationAdvance, uint8_t Opcode,
- uint64_t OpcodeOffset);
+ struct AddrOpIndexDelta {
+ uint64_t AddrOffset;
+ int16_t OpIndexDelta;
+ };
+
+ /// Advance the address and op-index by the \p OperationAdvance value.
+ /// \returns the amount advanced by.
+ AddrOpIndexDelta advanceAddrOpIndex(uint64_t OperationAdvance,
+ uint8_t Opcode, uint64_t OpcodeOffset);
- struct AddrAndAdjustedOpcode {
+ struct OpcodeAdvanceResults {
uint64_t AddrDelta;
+ int16_t OpIndexDelta;
uint8_t AdjustedOpcode;
};
- /// Advance the address as required by the specified \p Opcode.
+ /// Advance the address and op-index as required by the specified \p Opcode.
/// \returns the amount advanced by and the calculated adjusted opcode.
- AddrAndAdjustedOpcode advanceAddrForOpcode(uint8_t Opcode,
- uint64_t OpcodeOffset);
+ OpcodeAdvanceResults advanceForOpcode(uint8_t Opcode,
+ uint64_t OpcodeOffset);
- struct AddrAndLineDelta {
+ struct SpecialOpcodeDelta {
uint64_t Address;
int32_t Line;
+ int16_t OpIndex;
};
- /// Advance the line and address as required by the specified special \p
- /// Opcode. \returns the address and line delta.
- AddrAndLineDelta handleSpecialOpcode(uint8_t Opcode, uint64_t OpcodeOffset);
+ /// Advance the line, address and op-index as required by the specified
+ /// special \p Opcode. \returns the address, op-index and line delta.
+ SpecialOpcodeDelta handleSpecialOpcode(uint8_t Opcode,
+ uint64_t OpcodeOffset);
/// Line table we're currently parsing.
struct LineTable *LineTable;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
index 7fde32711745..00228a32173f 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
@@ -24,8 +24,7 @@ class DWARFExpression {
public:
class iterator;
- /// This class represents an Operation in the Expression. Each operation can
- /// have up to 2 oprerands.
+ /// This class represents an Operation in the Expression.
///
/// An Operation can be in Error state (check with isError()). This
/// means that it couldn't be decoded successfully and if it is the
@@ -43,6 +42,9 @@ public:
SizeRefAddr = 6,
SizeBlock = 7, ///< Preceding operand contains block size
BaseTypeRef = 8,
+ /// The operand is a ULEB128 encoded SubOpcode. This is only valid
+ /// for the first operand of an operation.
+ SizeSubOpLEB = 9,
WasmLocationArg = 30,
SignBit = 0x80,
SignedSize1 = SignBit | Size1,
@@ -50,7 +52,6 @@ public:
SignedSize4 = SignBit | Size4,
SignedSize8 = SignBit | Size8,
SignedSizeLEB = SignBit | SizeLEB,
- SizeNA = 0xFF ///< Unused operands get this encoding.
};
enum DwarfVersion : uint8_t {
@@ -64,14 +65,13 @@ public:
/// Description of the encoding of one expression Op.
struct Description {
DwarfVersion Version; ///< Dwarf version where the Op was introduced.
- Encoding Op[2]; ///< Encoding for Op operands, or SizeNA.
-
- Description(DwarfVersion Version = DwarfNA, Encoding Op1 = SizeNA,
- Encoding Op2 = SizeNA)
- : Version(Version) {
- Op[0] = Op1;
- Op[1] = Op2;
- }
+ SmallVector<Encoding> Op; ///< Encoding for Op operands.
+
+ template <typename... Ts>
+ Description(DwarfVersion Version, Ts... Op)
+ : Version(Version), Op{Op...} {}
+ Description() : Description(DwarfNA) {}
+ ~Description() = default;
};
private:
@@ -80,13 +80,19 @@ public:
Description Desc;
bool Error = false;
uint64_t EndOffset;
- uint64_t Operands[2];
- uint64_t OperandEndOffsets[2];
+ SmallVector<uint64_t> Operands;
+ SmallVector<uint64_t> OperandEndOffsets;
public:
const Description &getDescription() const { return Desc; }
uint8_t getCode() const { return Opcode; }
+ std::optional<unsigned> getSubCode() const;
+ uint64_t getNumOperands() const { return Operands.size(); }
+ ArrayRef<uint64_t> getRawOperands() const { return Operands; };
uint64_t getRawOperand(unsigned Idx) const { return Operands[Idx]; }
+ ArrayRef<uint64_t> getOperandEndOffsets() const {
+ return OperandEndOffsets;
+ }
uint64_t getOperandEndOffset(unsigned Idx) const {
return OperandEndOffsets[Idx];
}
@@ -165,7 +171,7 @@ public:
static bool prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS,
DIDumpOptions DumpOpts, uint8_t Opcode,
- const uint64_t Operands[2]);
+ const ArrayRef<uint64_t> Operands);
private:
DataExtractor Data;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index 1951d085e5dc..2dcd7805b6c9 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -38,7 +38,6 @@ public:
FC_Exprloc
};
-private:
struct ValueType {
ValueType() { uval = 0; }
ValueType(int64_t V) : sval(V) {}
@@ -51,10 +50,11 @@ private:
const char *cstr;
};
const uint8_t *data = nullptr;
- uint64_t SectionIndex; /// Section index for reference forms.
+ uint64_t SectionIndex; /// Section index for reference forms.
};
- dwarf::Form Form; /// Form for this value.
+private:
+ dwarf::Form Form; /// Form for this value.
dwarf::DwarfFormat Format =
dwarf::DWARF32; /// Remember the DWARF format at extract time.
ValueType Value; /// Contains all data for the form.
@@ -73,6 +73,9 @@ public:
ArrayRef<uint8_t> D);
static DWARFFormValue createFromUnit(dwarf::Form F, const DWARFUnit *Unit,
uint64_t *OffsetPtr);
+ static std::optional<object::SectionedAddress>
+ getAsSectionedAddress(const ValueType &Val, const dwarf::Form Form,
+ const DWARFUnit *U);
dwarf::Form getForm() const { return Form; }
uint64_t getRawUValue() const { return Value.uval; }
@@ -349,6 +352,14 @@ toBlock(const std::optional<DWARFFormValue> &V) {
return std::nullopt;
}
+/// Check whether specified \p Form belongs to the \p FC class.
+/// \param Form an attribute form.
+/// \param FC an attribute form class to check.
+/// \param DwarfVersion the version of DWARF debug info keeping the attribute.
+/// \returns true if specified \p Form belongs to the \p FC class.
+bool doesFormBelongToClass(dwarf::Form Form, DWARFFormValue::FormClass FC,
+ uint16_t DwarfVersion);
+
} // end namespace dwarf
} // end namespace llvm
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFLocationExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFLocationExpression.h
index b221f9cc9279..8b5497f4eeb9 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFLocationExpression.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFLocationExpression.h
@@ -19,7 +19,7 @@ class raw_ostream;
/// Typically used in DW_AT_location attributes to describe the location of
/// objects.
struct DWARFLocationExpression {
- /// The address range in which this expression is valid. None denotes a
+ /// The address range in which this expression is valid. std::nullopt denotes a
/// default entry which is valid in addresses not covered by other location
/// expressions, or everywhere if there are no other expressions.
std::optional<DWARFAddressRange> Range;
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index b4978cc80d1b..b42c951598d6 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -43,6 +43,9 @@ class DWARFObject;
class raw_ostream;
struct DIDumpOptions;
struct DWARFSection;
+namespace dwarflinker_parallel {
+class CompileUnit;
+}
/// Base class describing the header of any kind of "unit." Some information
/// is specific to certain unit types. We separate this class out so we can
@@ -253,6 +256,8 @@ class DWARFUnit {
std::shared_ptr<DWARFUnit> DWO;
protected:
+ friend dwarflinker_parallel::CompileUnit;
+
/// Return the index of a \p Die entry inside the unit's DIE vector.
///
/// It is illegal to call this method with a DIE that hasn't be
@@ -350,6 +355,15 @@ public:
return AddrOffsetSectionBase;
}
+ /// Returns offset to the indexed address value inside .debug_addr section.
+ std::optional<uint64_t> getIndexedAddressOffset(uint64_t Index) {
+ if (std::optional<uint64_t> AddrOffsetSectionBase =
+ getAddrOffsetSectionBase())
+ return *AddrOffsetSectionBase + Index * getAddressByteSize();
+
+ return std::nullopt;
+ }
+
/// Recursively update address to Die map.
void updateAddressDieMap(DWARFDie Die);
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
index b9ead366cb23..ac890cdf065f 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFVerifier.h
@@ -338,6 +338,17 @@ public:
/// \returns true if the existing Apple-style accelerator tables verify
/// successfully, false otherwise.
bool handleAccelTables();
+
+ /// Verify the information in the .debug_str_offsets[.dwo].
+ ///
+ /// Any errors are reported to the stream that was this object was
+ /// constructed with.
+ ///
+ /// \returns true if the .debug_line verifies successfully, false otherwise.
+ bool handleDebugStrOffsets();
+ bool verifyDebugStrOffsets(
+ StringRef SectionName, const DWARFSection &Section, StringRef StrData,
+ void (DWARFObject::*)(function_ref<void(const DWARFSection &)>) const);
};
static inline bool operator<(const DWARFVerifier::DieRangeInfo &LHS,
diff --git a/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h b/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h
index 84b568759722..74811240e0b5 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/FileWriter.h
@@ -113,6 +113,8 @@ public:
return OS;
}
+ llvm::support::endianness getByteOrder() const { return ByteOrder; }
+
private:
FileWriter(const FileWriter &rhs) = delete;
void operator=(const FileWriter &rhs) = delete;
diff --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
index 713e3c239e95..cf917bf294cf 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -9,6 +9,7 @@
#ifndef LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
#define LLVM_DEBUGINFO_GSYM_FUNCTIONINFO_H
+#include "llvm/ADT/SmallString.h"
#include "llvm/DebugInfo/GSYM/ExtractRanges.h"
#include "llvm/DebugInfo/GSYM/InlineInfo.h"
#include "llvm/DebugInfo/GSYM/LineTable.h"
@@ -90,6 +91,10 @@ struct FunctionInfo {
uint32_t Name; ///< String table offset in the string table.
std::optional<LineTable> OptLineTable;
std::optional<InlineInfo> Inline;
+ /// If we encode a FunctionInfo during segmenting so we know its size, we can
+ /// cache that encoding here so we don't need to re-encode it when saving the
+ /// GSYM file.
+ SmallString<32> EncodingCache;
FunctionInfo(uint64_t Addr = 0, uint64_t Size = 0, uint32_t N = 0)
: Range(Addr, Addr + Size), Name(N) {}
@@ -140,6 +145,17 @@ struct FunctionInfo {
/// function info that was successfully written into the stream.
llvm::Expected<uint64_t> encode(FileWriter &O) const;
+ /// Encode this function info into the internal byte cache and return the size
+ /// in bytes.
+ ///
+ /// When segmenting GSYM files we need to know how big each FunctionInfo will
+ /// encode into so we can generate segments of the right size. We don't want
+ /// to have to encode a FunctionInfo twice, so we can cache the encoded bytes
+ /// and re-use then when calling FunctionInfo::encode(...).
+ ///
+ /// \returns The size in bytes of the FunctionInfo if it were to be encoded
+ /// into a byte stream.
+ uint64_t cacheEncoding();
/// Lookup an address within a FunctionInfo object's data stream.
///
diff --git a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
index 2eac8b43f006..bca3a83cc685 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymCreator.h
@@ -137,6 +137,8 @@ class GsymCreator {
StringTableBuilder StrTab;
StringSet<> StringStorage;
DenseMap<llvm::gsym::FileEntry, uint32_t> FileEntryToIndex;
+ // Needed for mapping string offsets back to the string stored in \a StrTab.
+ DenseMap<uint64_t, CachedHashStringRef> StringOffsetMap;
std::vector<llvm::gsym::FileEntry> Files;
std::vector<uint8_t> UUID;
std::optional<AddressRanges> ValidTextRanges;
@@ -145,6 +147,141 @@ class GsymCreator {
bool Finalized = false;
bool Quiet;
+
+ /// Get the first function start address.
+ ///
+ /// \returns The start address of the first FunctionInfo or std::nullopt if
+ /// there are no function infos.
+ std::optional<uint64_t> getFirstFunctionAddress() const;
+
+ /// Get the last function address.
+ ///
+ /// \returns The start address of the last FunctionInfo or std::nullopt if
+ /// there are no function infos.
+ std::optional<uint64_t> getLastFunctionAddress() const;
+
+ /// Get the base address to use for this GSYM file.
+ ///
+ /// \returns The base address to put into the header and to use when creating
+ /// the address offset table or std::nullpt if there are no valid
+ /// function infos or if the base address wasn't specified.
+ std::optional<uint64_t> getBaseAddress() const;
+
+ /// Get the size of an address offset in the address offset table.
+ ///
+ /// GSYM files store offsets from the base address in the address offset table
+ /// and we store the size of the address offsets in the GSYM header. This
+ /// function will calculate the size in bytes of these address offsets based
+ /// on the current contents of the GSYM file.
+ ///
+ /// \returns The size in byets of the address offsets.
+ uint8_t getAddressOffsetSize() const;
+
+ /// Get the maximum address offset for the current address offset size.
+ ///
+ /// This is used when creating the address offset table to ensure we have
+ /// values that are in range so we don't end up truncating address offsets
+ /// when creating GSYM files as the code evolves.
+ ///
+ /// \returns The maximum address offset value that will be encoded into a GSYM
+ /// file.
+ uint64_t getMaxAddressOffset() const;
+
+ /// Calculate the byte size of the GSYM header and tables sizes.
+ ///
+ /// This function will calculate the exact size in bytes of the encocded GSYM
+ /// for the following items:
+ /// - The GSYM header
+ /// - The Address offset table
+ /// - The Address info offset table
+ /// - The file table
+ /// - The string table
+ ///
+ /// This is used to help split GSYM files into segments.
+ ///
+ /// \returns Size in bytes the GSYM header and tables.
+ uint64_t calculateHeaderAndTableSize() const;
+
+ /// Copy a FunctionInfo from the \a SrcGC GSYM creator into this creator.
+ ///
+ /// Copy the function info and only the needed files and strings and add a
+ /// converted FunctionInfo into this object. This is used to segment GSYM
+ /// files into separate files while only transferring the files and strings
+ /// that are needed from \a SrcGC.
+ ///
+ /// \param SrcGC The source gsym creator to copy from.
+ /// \param FuncInfoIdx The function info index within \a SrcGC to copy.
+ /// \returns The number of bytes it will take to encode the function info in
+ /// this GsymCreator. This helps calculate the size of the current GSYM
+ /// segment file.
+ uint64_t copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncInfoIdx);
+
+ /// Copy a string from \a SrcGC into this object.
+ ///
+ /// Copy a string from \a SrcGC by string table offset into this GSYM creator.
+ /// If a string has already been copied, the uniqued string table offset will
+ /// be returned, otherwise the string will be copied and a unique offset will
+ /// be returned.
+ ///
+ /// \param SrcGC The source gsym creator to copy from.
+ /// \param StrOff The string table offset from \a SrcGC to copy.
+ /// \returns The new string table offset of the string within this object.
+ uint32_t copyString(const GsymCreator &SrcGC, uint32_t StrOff);
+
+ /// Copy a file from \a SrcGC into this object.
+ ///
+ /// Copy a file from \a SrcGC by file index into this GSYM creator. Files
+ /// consist of two string table entries, one for the directory and one for the
+ /// filename, this function will copy any needed strings ensure the file is
+ /// uniqued within this object. If a file already exists in this GSYM creator
+ /// the uniqued index will be returned, else the stirngs will be copied and
+ /// the new file index will be returned.
+ ///
+ /// \param SrcGC The source gsym creator to copy from.
+ /// \param FileIdx The 1 based file table index within \a SrcGC to copy. A
+ /// file index of zero will always return zero as the zero is a reserved file
+ /// index that means no file.
+ /// \returns The new file index of the file within this object.
+ uint32_t copyFile(const GsymCreator &SrcGC, uint32_t FileIdx);
+
+ /// Inserts a FileEntry into the file table.
+ ///
+ /// This is used to insert a file entry in a thread safe way into this object.
+ ///
+ /// \param FE A file entry object that contains valid string table offsets
+ /// from this object already.
+ uint32_t insertFileEntry(FileEntry FE);
+
+ /// Fixup any string and file references by updating any file indexes and
+ /// strings offsets in the InlineInfo parameter.
+ ///
+ /// When copying InlineInfo entries, we can simply make a copy of the object
+ /// and then fixup the files and strings for efficiency.
+ ///
+ /// \param SrcGC The source gsym creator to copy from.
+ /// \param II The inline info that contains file indexes and string offsets
+ /// that come from \a SrcGC. The entries will be updated by coping any files
+ /// and strings over into this object.
+ void fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II);
+
+ /// Save this GSYM file into segments that are roughly \a SegmentSize in size.
+ ///
+ /// When segemented GSYM files are saved to disk, they will use \a Path as a
+ /// prefix and then have the first function info address appended to the path
+ /// when each segment is saved. Each segmented GSYM file has a only the
+ /// strings and files that are needed to save the function infos that are in
+ /// each segment. These smaller files are easy to compress and download
+ /// separately and allow for efficient lookups with very large GSYM files and
+ /// segmenting them allows servers to download only the segments that are
+ /// needed.
+ ///
+ /// \param Path The path prefix to use when saving the GSYM files.
+ /// \param ByteOrder The endianness to use when saving the file.
+ /// \param SegmentSize The size in bytes to segment the GSYM file into.
+ llvm::Error saveSegments(StringRef Path,
+ llvm::support::endianness ByteOrder,
+ uint64_t SegmentSize) const;
+
public:
GsymCreator(bool Quiet = false);
@@ -152,8 +289,18 @@ public:
///
/// \param Path The file path to save the GSYM file to.
/// \param ByteOrder The endianness to use when saving the file.
+ /// \param SegmentSize The size in bytes to segment the GSYM file into. If
+ /// this option is set this function will create N segments
+ /// that are all around \a SegmentSize bytes in size. This
+ /// allows a very large GSYM file to be broken up into
+ /// shards. Each GSYM file will have its own file table,
+ /// and string table that only have the files and strings
+ /// needed for the shared. If this argument has no value,
+ /// a single GSYM file that contains all function
+ /// information will be created.
/// \returns An error object that indicates success or failure of the save.
- llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder) const;
+ llvm::Error save(StringRef Path, llvm::support::endianness ByteOrder,
+ std::optional<uint64_t> SegmentSize = std::nullopt) const;
/// Encode a GSYM into the file writer stream at the current position.
///
@@ -291,6 +438,28 @@ public:
/// Whether the transformation should be quiet, i.e. not output warnings.
bool isQuiet() const { return Quiet; }
+
+
+ /// Create a segmented GSYM creator starting with function info index
+ /// \a FuncIdx.
+ ///
+ /// This function will create a GsymCreator object that will encode into
+ /// roughly \a SegmentSize bytes and return it. It is used by the private
+ /// saveSegments(...) function and also is used by the GSYM unit tests to test
+ /// segmenting of GSYM files. The returned GsymCreator can be finalized and
+ /// encoded.
+ ///
+ /// \param [in] SegmentSize The size in bytes to roughly segment the GSYM file
+ /// into.
+ /// \param [in,out] FuncIdx The index of the first function info to encode
+ /// into the returned GsymCreator. This index will be updated so it can be
+ /// used in subsequent calls to this function to allow more segments to be
+ /// created.
+ /// \returns An expected unique pointer to a GsymCreator or an error. The
+ /// returned unique pointer can be NULL if there are no more functions to
+ /// encode.
+ llvm::Expected<std::unique_ptr<GsymCreator>>
+ createSegment(uint64_t SegmentSize, size_t &FuncIdx) const;
};
} // namespace gsym
diff --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
index 44e58f522002..9ccc96fbb4d5 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
@@ -52,6 +52,16 @@ struct LookupResult {
std::string getSourceFile(uint32_t Index) const;
};
+inline bool operator==(const LookupResult &LHS, const LookupResult &RHS) {
+ if (LHS.LookupAddr != RHS.LookupAddr)
+ return false;
+ if (LHS.FuncRange != RHS.FuncRange)
+ return false;
+ if (LHS.FuncName != RHS.FuncName)
+ return false;
+ return LHS.Locations == RHS.Locations;
+}
+
raw_ostream &operator<<(raw_ostream &OS, const LookupResult &R);
} // namespace gsym
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h
index 2603c4542e8d..17fa04040ad7 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVElement.h
@@ -15,7 +15,6 @@
#define LLVM_DEBUGINFO_LOGICALVIEW_CORE_LVELEMENT_H
#include "llvm/DebugInfo/LogicalView/Core/LVObject.h"
-#include "llvm/DebugInfo/LogicalView/Core/LVStringPool.h"
#include "llvm/Support/Casting.h"
#include <map>
#include <set>
@@ -206,6 +205,9 @@ public:
size_t getNameIndex() const { return NameIndex; }
size_t getQualifiedNameIndex() const { return QualifiedNameIndex; }
+ void setInnerComponent() { setInnerComponent(getName()); }
+ void setInnerComponent(StringRef Name);
+
// Element type name.
StringRef getTypeName() const;
@@ -254,7 +256,7 @@ public:
virtual void setDiscriminator(uint32_t Value) {}
// Process the values for a DW_TAG_enumerator.
- virtual std::string getValue() const { return {}; }
+ virtual StringRef getValue() const { return {}; }
virtual void setValue(StringRef Value) {}
virtual size_t getValueIndex() const { return 0; }
@@ -264,6 +266,13 @@ public:
StringRef
accessibilityString(uint32_t Access = dwarf::DW_ACCESS_private) const;
+ // CodeView Accessibility Codes.
+ std::optional<uint32_t> getAccessibilityCode(codeview::MemberAccess Access);
+ void setAccessibilityCode(codeview::MemberAccess Access) {
+ if (std::optional<uint32_t> Code = getAccessibilityCode(Access))
+ AccessibilityCode = Code.value();
+ }
+
// DWARF Inline Codes.
uint32_t getInlineCode() const { return InlineCode; }
void setInlineCode(uint32_t Code) { InlineCode = Code; }
@@ -275,6 +284,13 @@ public:
StringRef
virtualityString(uint32_t Virtuality = dwarf::DW_VIRTUALITY_none) const;
+ // CodeView Virtuality Codes.
+ std::optional<uint32_t> getVirtualityCode(codeview::MethodKind Virtuality);
+ void setVirtualityCode(codeview::MethodKind Virtuality) {
+ if (std::optional<uint32_t> Code = getVirtualityCode(Virtuality))
+ VirtualityCode = Code.value();
+ }
+
// DWARF Extern Codes.
StringRef externalString() const;
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVLocation.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVLocation.h
index 94edd83be336..3b556f992783 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVLocation.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVLocation.h
@@ -33,22 +33,17 @@ class LVOperation final {
// OP_[GNU_]deref_type, OP_[GNU_]entry_value, OP_implicit_value,
// OP_[GNU_]implicit_pointer, OP_[GNU_]regval_type, OP_xderef_type.
LVSmall Opcode = 0;
- uint64_t Operands[2];
+ SmallVector<uint64_t> Operands;
public:
LVOperation() = delete;
- LVOperation(LVSmall Opcode, LVUnsigned Operand1, LVUnsigned Operand2)
- : Opcode(Opcode) {
- Operands[0] = Operand1;
- Operands[1] = Operand2;
- }
+ LVOperation(LVSmall Opcode, ArrayRef<LVUnsigned> Operands)
+ : Opcode(Opcode), Operands(Operands) {}
LVOperation(const LVOperation &) = delete;
LVOperation &operator=(const LVOperation &) = delete;
~LVOperation() = default;
LVSmall getOpcode() const { return Opcode; }
- uint64_t getOperand1() const { return Operands[0]; }
- uint64_t getOperand2() const { return Operands[1]; }
std::string getOperandsDWARFInfo();
std::string getOperandsCodeViewInfo();
@@ -154,8 +149,7 @@ public:
virtual void addObject(LVAddress LowPC, LVAddress HighPC,
LVUnsigned SectionOffset, uint64_t LocDescOffset) {}
- virtual void addObject(LVSmall Opcode, LVUnsigned Operand1,
- LVUnsigned Operand2) {}
+ virtual void addObject(LVSmall Opcode, ArrayRef<LVUnsigned> Operands) {}
static void print(LVLocations *Locations, raw_ostream &OS, bool Full = true);
void printInterval(raw_ostream &OS, bool Full = true) const;
@@ -172,7 +166,7 @@ public:
class LVLocationSymbol final : public LVLocation {
// Location descriptors for the active range.
- LVAutoOperations *Entries = nullptr;
+ std::unique_ptr<LVOperations> Entries;
void updateKind() override;
@@ -180,12 +174,11 @@ public:
LVLocationSymbol() : LVLocation() {}
LVLocationSymbol(const LVLocationSymbol &) = delete;
LVLocationSymbol &operator=(const LVLocationSymbol &) = delete;
- ~LVLocationSymbol() { delete Entries; };
+ ~LVLocationSymbol() = default;
void addObject(LVAddress LowPC, LVAddress HighPC, LVUnsigned SectionOffset,
uint64_t LocDescOffset) override;
- void addObject(LVSmall Opcode, LVUnsigned Operand1,
- LVUnsigned Operand2) override;
+ void addObject(LVSmall Opcode, ArrayRef<LVUnsigned> Operands) override;
void printRawExtra(raw_ostream &OS, bool Full = true) const override;
void printExtra(raw_ostream &OS, bool Full = true) const override;
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVObject.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVObject.h
index a097372bccfd..ca429d2c2893 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVObject.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVObject.h
@@ -74,21 +74,6 @@ using LVSymbolGetFunction = bool (LVSymbol::*)() const;
using LVTypeSetFunction = void (LVType::*)();
using LVTypeGetFunction = bool (LVType::*)() const;
-// The LVScope class represents a logical scope and uses vectors to store its
-// children, which are pointers to other allocated logical elements (types,
-// symbols, lines, scopes, ranges). On destruction, we have to traverse each
-// vector and destroy its elements. The other case is LVSymbol.
-// These definitions are intended to be used by the LVScope and LVSymbol
-// to support automatic vector cleanup.
-using LVAutoLines = LVAutoSmallVector<LVLine *>;
-using LVAutoLocations = LVAutoSmallVector<LVLocation *>;
-using LVAutoOperations = LVAutoSmallVector<LVOperation *, 8>;
-using LVAutoScopes = LVAutoSmallVector<LVScope *>;
-using LVAutoSymbols = LVAutoSmallVector<LVSymbol *>;
-using LVAutoTypes = LVAutoSmallVector<LVType *>;
-
-// These definitions are intended to be used when the vector will be used
-// just a container, with no automatic destruction.
using LVElements = SmallVector<LVElement *, 8>;
using LVLines = SmallVector<LVLine *, 8>;
using LVLocations = SmallVector<LVLocation *, 8>;
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVOptions.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVOptions.h
index e154243f4470..a0a360c0a434 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVOptions.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVOptions.h
@@ -511,7 +511,9 @@ class LVPatterns final {
void resolveGenericPatternMatch(T *Element, const U &Requests) {
assert(Element && "Element must not be nullptr");
auto CheckPattern = [=]() -> bool {
- return (Element->isNamed() && matchGenericPattern(Element->getName())) ||
+ return (Element->isNamed() &&
+ (matchGenericPattern(Element->getName()) ||
+ matchGenericPattern(Element->getLinkageName()))) ||
(Element->isTyped() &&
matchGenericPattern(Element->getTypeName()));
};
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVReader.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVReader.h
index ed1807ce8bf8..9ce26398e48d 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVReader.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVReader.h
@@ -54,6 +54,9 @@ public:
raw_fd_ostream &os() { return OutputFile->os(); }
};
+/// The logical reader owns of all the logical elements created during
+/// the debug information parsing. For its creation it uses a specific
+/// bump allocator for each type of logical element.
class LVReader {
LVBinaryType BinaryType;
@@ -74,6 +77,50 @@ class LVReader {
Error createSplitFolder();
bool OutputSplit = false;
+// Define a specific bump allocator for the given KIND.
+#define LV_OBJECT_ALLOCATOR(KIND) \
+ llvm::SpecificBumpPtrAllocator<LV##KIND> Allocated##KIND;
+
+ // Lines allocator.
+ LV_OBJECT_ALLOCATOR(Line)
+ LV_OBJECT_ALLOCATOR(LineDebug)
+ LV_OBJECT_ALLOCATOR(LineAssembler)
+
+ // Locations allocator.
+ LV_OBJECT_ALLOCATOR(Location)
+ LV_OBJECT_ALLOCATOR(LocationSymbol)
+
+ // Operations allocator.
+ LV_OBJECT_ALLOCATOR(Operation)
+
+ // Scopes allocator.
+ LV_OBJECT_ALLOCATOR(Scope)
+ LV_OBJECT_ALLOCATOR(ScopeAggregate)
+ LV_OBJECT_ALLOCATOR(ScopeAlias)
+ LV_OBJECT_ALLOCATOR(ScopeArray)
+ LV_OBJECT_ALLOCATOR(ScopeCompileUnit)
+ LV_OBJECT_ALLOCATOR(ScopeEnumeration)
+ LV_OBJECT_ALLOCATOR(ScopeFormalPack)
+ LV_OBJECT_ALLOCATOR(ScopeFunction)
+ LV_OBJECT_ALLOCATOR(ScopeFunctionInlined)
+ LV_OBJECT_ALLOCATOR(ScopeFunctionType)
+ LV_OBJECT_ALLOCATOR(ScopeNamespace)
+ LV_OBJECT_ALLOCATOR(ScopeRoot)
+ LV_OBJECT_ALLOCATOR(ScopeTemplatePack)
+
+ // Symbols allocator.
+ LV_OBJECT_ALLOCATOR(Symbol)
+
+ // Types allocator.
+ LV_OBJECT_ALLOCATOR(Type)
+ LV_OBJECT_ALLOCATOR(TypeDefinition)
+ LV_OBJECT_ALLOCATOR(TypeEnumerator)
+ LV_OBJECT_ALLOCATOR(TypeImport)
+ LV_OBJECT_ALLOCATOR(TypeParam)
+ LV_OBJECT_ALLOCATOR(TypeSubrange)
+
+#undef LV_OBJECT_ALLOCATOR
+
protected:
LVScopeRoot *Root = nullptr;
std::string InputFilename;
@@ -92,7 +139,7 @@ protected:
// Create the Scope Root.
virtual Error createScopes() {
- Root = new LVScopeRoot();
+ Root = createScopeRoot();
Root->setName(getFilename());
if (options().getAttributeFormat())
Root->setFileFormatName(FileFormatName);
@@ -129,9 +176,60 @@ public:
OS(W.getOStream()) {}
LVReader(const LVReader &) = delete;
LVReader &operator=(const LVReader &) = delete;
- virtual ~LVReader() {
- if (Root)
- delete Root;
+ virtual ~LVReader() = default;
+
+// Creates a logical object of the given KIND. The signature for the created
+// functions looks like:
+// ...
+// LVScope *createScope()
+// LVScopeRoot *creatScopeRoot()
+// LVType *createType();
+// ...
+#define LV_CREATE_OBJECT(KIND) \
+ LV##KIND *create##KIND() { \
+ return new (Allocated##KIND.Allocate()) LV##KIND(); \
+ }
+
+ // Lines creation.
+ LV_CREATE_OBJECT(Line)
+ LV_CREATE_OBJECT(LineDebug)
+ LV_CREATE_OBJECT(LineAssembler)
+
+ // Locations creation.
+ LV_CREATE_OBJECT(Location)
+ LV_CREATE_OBJECT(LocationSymbol)
+
+ // Scopes creation.
+ LV_CREATE_OBJECT(Scope)
+ LV_CREATE_OBJECT(ScopeAggregate)
+ LV_CREATE_OBJECT(ScopeAlias)
+ LV_CREATE_OBJECT(ScopeArray)
+ LV_CREATE_OBJECT(ScopeCompileUnit)
+ LV_CREATE_OBJECT(ScopeEnumeration)
+ LV_CREATE_OBJECT(ScopeFormalPack)
+ LV_CREATE_OBJECT(ScopeFunction)
+ LV_CREATE_OBJECT(ScopeFunctionInlined)
+ LV_CREATE_OBJECT(ScopeFunctionType)
+ LV_CREATE_OBJECT(ScopeNamespace)
+ LV_CREATE_OBJECT(ScopeRoot)
+ LV_CREATE_OBJECT(ScopeTemplatePack)
+
+ // Symbols creation.
+ LV_CREATE_OBJECT(Symbol)
+
+ // Types creation.
+ LV_CREATE_OBJECT(Type)
+ LV_CREATE_OBJECT(TypeDefinition)
+ LV_CREATE_OBJECT(TypeEnumerator)
+ LV_CREATE_OBJECT(TypeImport)
+ LV_CREATE_OBJECT(TypeParam)
+ LV_CREATE_OBJECT(TypeSubrange)
+
+#undef LV_CREATE_OBJECT
+
+ // Operations creation.
+ LVOperation *createOperation(LVSmall OpCode, ArrayRef<LVUnsigned> Operands) {
+ return new (AllocatedOperation.Allocate()) LVOperation(OpCode, Operands);
}
StringRef getFilename(LVObject *Object, size_t Index) const;
@@ -150,6 +248,12 @@ public:
assert(Scope && Scope->isCompileUnit() && "Scope is not a compile unit");
CompileUnit = static_cast<LVScopeCompileUnit *>(Scope);
}
+ void setCompileUnitCPUType(codeview::CPUType Type) {
+ CompileUnit->setCPUType(Type);
+ }
+ codeview::CPUType getCompileUnitCPUType() {
+ return CompileUnit->getCPUType();
+ }
// Access to the scopes root.
LVScopeRoot *getScopesRoot() const { return Root; }
@@ -157,7 +261,8 @@ public:
Error doPrint();
Error doLoad();
- virtual std::string getRegisterName(LVSmall Opcode, uint64_t Operands[2]) {
+ virtual std::string getRegisterName(LVSmall Opcode,
+ ArrayRef<uint64_t> Operands) {
llvm_unreachable("Invalid instance reader.");
return {};
}
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
index 8204163f34de..1b3c377cd7db 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVScope.h
@@ -63,12 +63,11 @@ using LVScopeKindSet = std::set<LVScopeKind>;
using LVScopeDispatch = std::map<LVScopeKind, LVScopeGetFunction>;
using LVScopeRequest = std::vector<LVScopeGetFunction>;
-using LVOffsetList = std::list<LVOffset>;
using LVOffsetElementMap = std::map<LVOffset, LVElement *>;
-using LVOffsetLinesMap = std::map<LVOffset, LVLines *>;
-using LVOffsetLocationsMap = std::map<LVOffset, LVLocations *>;
+using LVOffsetLinesMap = std::map<LVOffset, LVLines>;
+using LVOffsetLocationsMap = std::map<LVOffset, LVLocations>;
using LVOffsetSymbolMap = std::map<LVOffset, LVSymbol *>;
-using LVTagOffsetsMap = std::map<dwarf::Tag, LVOffsetList *>;
+using LVTagOffsetsMap = std::map<dwarf::Tag, LVOffsets>;
// Class to represent a DWARF Scope.
class LVScope : public LVElement {
@@ -100,7 +99,8 @@ class LVScope : public LVElement {
// Calculate coverage factor.
void calculateCoverage() {
float CoveragePercentage = 0;
- LVLocation::calculateCoverage(Ranges, CoverageFactor, CoveragePercentage);
+ LVLocation::calculateCoverage(Ranges.get(), CoverageFactor,
+ CoveragePercentage);
}
// Decide if the scope will be printed, using some conditions given by:
@@ -117,11 +117,11 @@ class LVScope : public LVElement {
protected:
// Types, Symbols, Scopes, Lines, Locations in this scope.
- LVAutoTypes *Types = nullptr;
- LVAutoSymbols *Symbols = nullptr;
- LVAutoScopes *Scopes = nullptr;
- LVAutoLines *Lines = nullptr;
- LVAutoLocations *Ranges = nullptr;
+ std::unique_ptr<LVTypes> Types;
+ std::unique_ptr<LVSymbols> Symbols;
+ std::unique_ptr<LVScopes> Scopes;
+ std::unique_ptr<LVLines> Lines;
+ std::unique_ptr<LVLocations> Ranges;
// Vector of elements (types, scopes and symbols).
// It is the union of (*Types, *Symbols and *Scopes) to be used for
@@ -129,7 +129,7 @@ protected:
// - Preserve the order the logical elements are read in.
// - To have a single container with all the logical elements, when
// the traversal does not require any specific element kind.
- LVElements *Children = nullptr;
+ std::unique_ptr<LVElements> Children;
// Resolve the template parameters/arguments relationship.
void resolveTemplate();
@@ -150,7 +150,7 @@ public:
}
LVScope(const LVScope &) = delete;
LVScope &operator=(const LVScope &) = delete;
- virtual ~LVScope();
+ virtual ~LVScope() = default;
static bool classof(const LVElement *Element) {
return Element->getSubclassID() == LVSubclassID::LV_SCOPE;
@@ -202,12 +202,12 @@ public:
const char *kind() const override;
// Get the specific children.
- const LVLines *getLines() const { return Lines; }
- const LVLocations *getRanges() const { return Ranges; }
- const LVScopes *getScopes() const { return Scopes; }
- const LVSymbols *getSymbols() const { return Symbols; }
- const LVTypes *getTypes() const { return Types; }
- const LVElements *getChildren() const { return Children; }
+ const LVLines *getLines() const { return Lines.get(); }
+ const LVLocations *getRanges() const { return Ranges.get(); }
+ const LVScopes *getScopes() const { return Scopes.get(); }
+ const LVSymbols *getSymbols() const { return Symbols.get(); }
+ const LVTypes *getTypes() const { return Types.get(); }
+ const LVElements *getChildren() const { return Children.get(); }
void addElement(LVElement *Element);
void addElement(LVLine *Line);
@@ -410,6 +410,9 @@ class LVScopeCompileUnit final : public LVScope {
// Compilation directory name.
size_t CompilationDirectoryIndex = 0;
+ // Used by the CodeView Reader.
+ codeview::CPUType CompilationCPUType = codeview::CPUType::X64;
+
// Keep record of elements. They are needed at the compilation unit level
// to print the summary at the end of the printing.
LVCounter Allocated;
@@ -456,8 +459,8 @@ class LVScopeCompileUnit final : public LVScope {
LVOffsetLocationsMap *Map) {
LVOffset Offset = Element->getOffset();
addInvalidOffset(Offset, Element);
- addItem<LVOffsetLocationsMap, LVLocations, LVOffset, LVLocation *>(
- Map, Offset, Location);
+ addItem<LVOffsetLocationsMap, LVOffset, LVLocation *>(Map, Offset,
+ Location);
}
// Record scope sizes indexed by lexical level.
@@ -489,12 +492,7 @@ public:
}
LVScopeCompileUnit(const LVScopeCompileUnit &) = delete;
LVScopeCompileUnit &operator=(const LVScopeCompileUnit &) = delete;
- ~LVScopeCompileUnit() {
- deleteList<LVTagOffsetsMap>(DebugTags);
- deleteList<LVOffsetLocationsMap>(InvalidLocations);
- deleteList<LVOffsetLocationsMap>(InvalidRanges);
- deleteList<LVOffsetLinesMap>(LinesZero);
- }
+ ~LVScopeCompileUnit() = default;
LVScope *getCompileUnitParent() const override {
return static_cast<LVScope *>(const_cast<LVScopeCompileUnit *>(this));
@@ -542,6 +540,9 @@ public:
ProducerIndex = getStringPool().getIndex(ProducerName);
}
+ void setCPUType(codeview::CPUType Type) { CompilationCPUType = Type; }
+ codeview::CPUType getCPUType() { return CompilationCPUType; }
+
// Record DWARF tags.
void addDebugTag(dwarf::Tag Target, LVOffset Offset);
// Record elements with invalid offsets.
@@ -794,6 +795,10 @@ public:
FileFormatNameIndex = getStringPool().getIndex(FileFormatName);
}
+ // The CodeView Reader uses scoped names. Recursively transform the
+ // element name to use just the most inner component.
+ void transformScopedName();
+
// Process the collected location, ranges and calculate coverage.
void processRangeInformation();
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVStringPool.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVStringPool.h
index 671ccf5d0e15..4c596b5b1dde 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVStringPool.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVStringPool.h
@@ -71,11 +71,6 @@ public:
return (Index >= Entries.size()) ? StringRef() : Entries[Index]->getKey();
}
- static LVStringPool &getInstance() {
- static LVStringPool Instance;
- return Instance;
- }
-
void print(raw_ostream &OS) const {
if (!Entries.empty()) {
OS << "\nString Pool:\n";
@@ -90,8 +85,6 @@ public:
#endif
};
-inline LVStringPool &getStringPool() { return LVStringPool::getInstance(); }
-
} // namespace logicalview
} // end namespace llvm
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSupport.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSupport.h
index bff1499c1a60..50f2c9a09ff5 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSupport.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSupport.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVStringPool.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Path.h"
@@ -27,24 +28,13 @@
namespace llvm {
namespace logicalview {
-template <typename T>
-using TypeIsValid = std::bool_constant<std::is_pointer<T>::value>;
+// Returns the unique string pool instance.
+LVStringPool &getStringPool();
-// Utility class to help memory management and perform an automatic cleaning.
-template <typename T, unsigned N = 8>
-class LVAutoSmallVector : public SmallVector<T, N> {
- static_assert(TypeIsValid<T>::value, "T must be a pointer type");
-
-public:
- using iterator = typename SmallVector<T, N>::iterator;
- LVAutoSmallVector() : SmallVector<T, N>::SmallVector() {}
-
- ~LVAutoSmallVector() {
- // Destroy the constructed elements in the vector.
- for (auto *Item : *this)
- delete Item;
- }
-};
+using LVStringRefs = std::vector<StringRef>;
+using LVLexicalComponent = std::tuple<StringRef, StringRef>;
+using LVLexicalIndex =
+ std::tuple<LVStringRefs::size_type, LVStringRefs::size_type>;
// Used to record specific characteristics about the objects.
template <typename T> class LVProperties {
@@ -143,25 +133,10 @@ std::string formatAttributes(const StringRef First, Args... Others) {
return Stream.str();
}
-// Add an item to a map with second being a list.
-template <typename MapType, typename ListType, typename KeyType,
- typename ValueType>
+// Add an item to a map with second being a small vector.
+template <typename MapType, typename KeyType, typename ValueType>
void addItem(MapType *Map, KeyType Key, ValueType Value) {
- ListType *List = nullptr;
- typename MapType::const_iterator Iter = Map->find(Key);
- if (Iter != Map->end())
- List = Iter->second;
- else {
- List = new ListType();
- Map->emplace(Key, List);
- }
- List->push_back(Value);
-}
-
-// Delete the map contained list.
-template <typename MapType> void deleteList(MapType &Map) {
- for (typename MapType::const_reference Entry : Map)
- delete Entry.second;
+ (*Map)[Key].push_back(Value);
}
// Double map data structure.
@@ -170,33 +145,26 @@ class LVDoubleMap {
static_assert(std::is_pointer<ValueType>::value,
"ValueType must be a pointer.");
using LVSecondMapType = std::map<SecondKeyType, ValueType>;
- using LVFirstMapType = std::map<FirstKeyType, LVSecondMapType *>;
+ using LVFirstMapType =
+ std::map<FirstKeyType, std::unique_ptr<LVSecondMapType>>;
using LVAuxMapType = std::map<SecondKeyType, FirstKeyType>;
using LVValueTypes = std::vector<ValueType>;
LVFirstMapType FirstMap;
LVAuxMapType AuxMap;
public:
- LVDoubleMap() = default;
- ~LVDoubleMap() {
- for (auto &Entry : FirstMap)
- delete Entry.second;
- }
-
void add(FirstKeyType FirstKey, SecondKeyType SecondKey, ValueType Value) {
- LVSecondMapType *SecondMap = nullptr;
typename LVFirstMapType::iterator FirstIter = FirstMap.find(FirstKey);
if (FirstIter == FirstMap.end()) {
- SecondMap = new LVSecondMapType();
- FirstMap.emplace(FirstKey, SecondMap);
+ auto SecondMapSP = std::make_unique<LVSecondMapType>();
+ SecondMapSP->emplace(SecondKey, Value);
+ FirstMap.emplace(FirstKey, std::move(SecondMapSP));
} else {
- SecondMap = FirstIter->second;
+ LVSecondMapType *SecondMap = FirstIter->second.get();
+ if (SecondMap->find(SecondKey) == SecondMap->end())
+ SecondMap->emplace(SecondKey, Value);
}
- assert(SecondMap && "SecondMap is null.");
- if (SecondMap && SecondMap->find(SecondKey) == SecondMap->end())
- SecondMap->emplace(SecondKey, Value);
-
typename LVAuxMapType::iterator AuxIter = AuxMap.find(SecondKey);
if (AuxIter == AuxMap.end()) {
AuxMap.emplace(SecondKey, FirstKey);
@@ -208,8 +176,7 @@ public:
if (FirstIter == FirstMap.end())
return nullptr;
- LVSecondMapType *SecondMap = FirstIter->second;
- return SecondMap;
+ return FirstIter->second.get();
}
ValueType find(FirstKeyType FirstKey, SecondKeyType SecondKey) const {
@@ -235,8 +202,8 @@ public:
if (FirstMap.empty())
return Values;
for (typename LVFirstMapType::const_reference FirstEntry : FirstMap) {
- LVSecondMapType *SecondMap = FirstEntry.second;
- for (typename LVSecondMapType::const_reference SecondEntry : *SecondMap)
+ LVSecondMapType &SecondMap = *FirstEntry.second;
+ for (typename LVSecondMapType::const_reference SecondEntry : SecondMap)
Values.push_back(SecondEntry.second);
}
return Values;
@@ -259,6 +226,15 @@ inline std::string formattedNames(StringRef Name1, StringRef Name2) {
return (Twine("'") + Twine(Name1) + Twine(Name2) + Twine("'")).str();
}
+// The given string represents a symbol or type name with optional enclosing
+// scopes, such as: name, name<..>, scope::name, scope::..::name, etc.
+// The string can have multiple references to template instantiations.
+// It returns the inner most component.
+LVLexicalComponent getInnerComponent(StringRef Name);
+LVStringRefs getAllLexicalComponents(StringRef Name);
+std::string getScopedName(const LVStringRefs &Components,
+ StringRef BaseName = {});
+
// These are the values assigned to the debug location record IDs.
// See DebugInfo/CodeView/CodeViewSymbols.def.
// S_DEFRANGE 0x113f
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSymbol.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSymbol.h
index b9628e312784..25bfa9eb77d8 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSymbol.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVSymbol.h
@@ -46,7 +46,7 @@ class LVSymbol final : public LVElement {
// Reference to DW_AT_specification, DW_AT_abstract_origin attribute.
LVSymbol *Reference = nullptr;
- LVAutoLocations *Locations = nullptr;
+ std::unique_ptr<LVLocations> Locations;
LVLocation *CurrentLocation = nullptr;
// Bitfields length.
@@ -60,8 +60,8 @@ class LVSymbol final : public LVElement {
float CoveragePercentage = 0;
// Add a location gap into the location list.
- LVAutoLocations::iterator addLocationGap(LVAutoLocations::iterator Pos,
- LVAddress LowPC, LVAddress HighPC);
+ LVLocations::iterator addLocationGap(LVLocations::iterator Pos,
+ LVAddress LowPC, LVAddress HighPC);
// Find the current symbol in the given 'Targets'.
LVSymbol *findIn(const LVSymbols *Targets) const;
@@ -73,7 +73,7 @@ public:
}
LVSymbol(const LVSymbol &) = delete;
LVSymbol &operator=(const LVSymbol &) = delete;
- ~LVSymbol() { delete Locations; }
+ ~LVSymbol() = default;
static bool classof(const LVElement *Element) {
return Element->getSubclassID() == LVSubclassID::LV_SYMBOL;
@@ -115,8 +115,8 @@ public:
void setBitSize(uint32_t Size) override { BitSize = Size; }
// Process the values for a DW_AT_const_value.
- std::string getValue() const override {
- return std::string(getStringPool().getString(ValueIndex));
+ StringRef getValue() const override {
+ return getStringPool().getString(ValueIndex);
}
void setValue(StringRef Value) override {
ValueIndex = getStringPool().getIndex(Value);
@@ -126,8 +126,7 @@ public:
// Add a Location Entry.
void addLocationConstant(dwarf::Attribute Attr, LVUnsigned Constant,
uint64_t LocDescOffset);
- void addLocationOperands(LVSmall Opcode, uint64_t Operand1,
- uint64_t Operand2);
+ void addLocationOperands(LVSmall Opcode, ArrayRef<uint64_t> Operands);
void addLocation(dwarf::Attribute Attr, LVAddress LowPC, LVAddress HighPC,
LVUnsigned SectionOffset, uint64_t LocDescOffset,
bool CallSiteLocation = false);
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVType.h b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVType.h
index 4d377ce2ff87..28881b3c95b1 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Core/LVType.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Core/LVType.h
@@ -182,8 +182,8 @@ public:
~LVTypeEnumerator() = default;
// Process the values for a DW_TAG_enumerator.
- std::string getValue() const override {
- return std::string(getStringPool().getString(ValueIndex));
+ StringRef getValue() const override {
+ return getStringPool().getString(ValueIndex);
}
void setValue(StringRef Value) override {
ValueIndex = getStringPool().getIndex(Value);
@@ -222,8 +222,8 @@ public:
~LVTypeParam() = default;
// Template parameter value.
- std::string getValue() const override {
- return std::string(getStringPool().getString(ValueIndex));
+ StringRef getValue() const override {
+ return getStringPool().getString(ValueIndex);
}
void setValue(StringRef Value) override {
ValueIndex = getStringPool().getIndex(Value);
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/LVReaderHandler.h b/llvm/include/llvm/DebugInfo/LogicalView/LVReaderHandler.h
index 3030e9f24ed8..bf30501d00c1 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/LVReaderHandler.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/LVReaderHandler.h
@@ -27,7 +27,7 @@
namespace llvm {
namespace logicalview {
-using LVReaders = std::vector<LVReader *>;
+using LVReaders = std::vector<std::unique_ptr<LVReader>>;
using ArgVector = std::vector<std::string>;
using PdbOrObj = PointerUnion<object::ObjectFile *, pdb::PDBFile *>;
@@ -45,7 +45,6 @@ class LVReaderHandler {
LVReaders TheReaders;
Error createReaders();
- void destroyReaders();
Error printReaders();
Error compareReaders();
@@ -59,6 +58,8 @@ class LVReaderHandler {
object::MachOUniversalBinary &Mach);
Error handleObject(LVReaders &Readers, StringRef Filename,
object::Binary &Binary);
+ Error handleObject(LVReaders &Readers, StringRef Filename, StringRef Buffer,
+ StringRef ExePath);
Error createReader(StringRef Filename, LVReaders &Readers, PdbOrObj &Input,
StringRef FileFormatName, StringRef ExePath = {});
@@ -72,20 +73,18 @@ public:
}
LVReaderHandler(const LVReaderHandler &) = delete;
LVReaderHandler &operator=(const LVReaderHandler &) = delete;
- ~LVReaderHandler() { destroyReaders(); }
Error createReader(StringRef Filename, LVReaders &Readers) {
return handleFile(Readers, Filename);
}
Error process();
- Expected<LVReader *> createReader(StringRef Pathname) {
+ Expected<std::unique_ptr<LVReader>> createReader(StringRef Pathname) {
LVReaders Readers;
if (Error Err = createReader(Pathname, Readers))
return std::move(Err);
- return Readers[0];
+ return std::move(Readers[0]);
}
- void deleteReader(LVReader *Reader) { delete Reader; }
void print(raw_ostream &OS) const;
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h
index 8c3cce9286a3..a66cf4608823 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h
@@ -24,6 +24,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Object/COFF.h"
#include "llvm/Object/ObjectFile.h"
namespace llvm {
@@ -69,6 +70,12 @@ class LVBinaryReader : public LVReader {
// Function names extracted from the object symbol table.
LVSymbolTable SymbolTable;
+ // It contains the LVLineDebug elements representing the inlined logical
+ // lines for the current compile unit, created by parsing the CodeView
+ // S_INLINESITE symbol annotation data.
+ using LVInlineeLine = std::map<LVScope *, std::unique_ptr<LVLines>>;
+ LVInlineeLine CUInlineeLines;
+
// Instruction lines for a logical scope. These instructions are fetched
// during its merge with the debug lines.
LVDoubleMap<LVSectionIndex, LVScope *, LVLines *> ScopeInstructions;
@@ -89,7 +96,7 @@ class LVBinaryReader : public LVReader {
// Scopes with ranges for current compile unit. It is used to find a line
// giving its exact or closest address. To support comdat functions, all
// addresses for the same section are recorded in the same map.
- using LVSectionRanges = std::map<LVSectionIndex, LVRange *>;
+ using LVSectionRanges = std::map<LVSectionIndex, std::unique_ptr<LVRange>>;
LVSectionRanges SectionRanges;
// Image base and virtual address for Executable file.
@@ -100,6 +107,8 @@ class LVBinaryReader : public LVReader {
using LVSections = std::map<LVSectionIndex, object::SectionRef>;
LVSections Sections;
+ std::vector<std::unique_ptr<LVLines>> DiscoveredLines;
+
protected:
// It contains the LVLineDebug elements representing the logical lines for
// the current compile unit, created by parsing the debug line section.
@@ -133,6 +142,8 @@ protected:
LVAddress LowerAddress, LVAddress UpperAddress);
LVRange *getSectionRanges(LVSectionIndex SectionIndex);
+ void includeInlineeLines(LVSectionIndex SectionIndex, LVScope *Function);
+
Error createInstructions();
Error createInstructions(LVScope *Function, LVSectionIndex SectionIndex);
Error createInstructions(LVScope *Function, LVSectionIndex SectionIndex,
@@ -149,7 +160,17 @@ public:
: LVReader(Filename, FileFormatName, W, BinaryType) {}
LVBinaryReader(const LVBinaryReader &) = delete;
LVBinaryReader &operator=(const LVBinaryReader &) = delete;
- virtual ~LVBinaryReader();
+ virtual ~LVBinaryReader() = default;
+
+ void addInlineeLines(LVScope *Scope, LVLines &Lines) {
+ CUInlineeLines.emplace(Scope, std::make_unique<LVLines>(std::move(Lines)));
+ }
+
+ // Convert Segment::Offset pair to absolute address.
+ LVAddress linearAddress(uint16_t Segment, uint32_t Offset,
+ LVAddress Addendum = 0) {
+ return ImageBaseAddress + (Segment * VirtualAddress) + Offset + Addendum;
+ }
void addToSymbolTable(StringRef Name, LVScope *Function,
LVSectionIndex SectionIndex = 0);
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h
new file mode 100644
index 000000000000..8a32210bac3c
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h
@@ -0,0 +1,236 @@
+//===-- LVCodeViewReader.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LVCodeViewReader class, which is used to describe a
+// debug information (COFF) reader.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_LOGICALVIEW_READERS_CODEVIEWREADER_H
+#define LLVM_DEBUGINFO_LOGICALVIEW_READERS_CODEVIEWREADER_H
+
+#include "llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h"
+#include "llvm/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDB.h"
+#include "llvm/Support/BinaryByteStream.h"
+#include "llvm/Support/BinaryItemStream.h"
+#include "llvm/Support/BinaryStreamArray.h"
+
+namespace llvm {
+template <> struct BinaryItemTraits<codeview::CVType> {
+ static size_t length(const codeview::CVType &Item) { return Item.length(); }
+ static ArrayRef<uint8_t> bytes(const codeview::CVType &Item) {
+ return Item.data();
+ }
+};
+
+namespace codeview {
+class LazyRandomTypeCollection;
+}
+namespace object {
+struct coff_section;
+}
+namespace pdb {
+class SymbolGroup;
+}
+namespace logicalview {
+
+class LVElement;
+class LVLine;
+class LVScope;
+class LVScopeCompileUnit;
+class LVSymbol;
+class LVType;
+class LVTypeVisitor;
+class LVSymbolVisitor;
+class LVSymbolVisitorDelegate;
+
+using LVNames = SmallVector<StringRef, 16>;
+
+// The ELF reader uses the DWARF constants to create the logical elements.
+// The DW_TAG_* and DW_AT_* are used to select the logical object and to
+// set specific attributes, such as name, type, etc.
+// As the CodeView constants are different to the DWARF constants, the
+// CodeView reader will map them to the DWARF ones.
+
+class LVCodeViewReader final : public LVBinaryReader {
+ friend class LVTypeVisitor;
+ friend class LVSymbolVisitor;
+ friend class LVSymbolVisitorDelegate;
+
+ using LVModules = std::vector<LVScope *>;
+ LVModules Modules;
+
+ // Encapsulates access to the input file and any dependent type server,
+ // including any precompiled header object.
+ llvm::pdb::InputFile Input;
+ std::shared_ptr<llvm::pdb::InputFile> TypeServer;
+ std::shared_ptr<LazyRandomTypeCollection> PrecompHeader;
+
+ // Persistance data when loading a type server.
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = nullptr;
+ std::unique_ptr<MemoryBuffer> MemBuffer;
+ std::unique_ptr<llvm::pdb::IPDBSession> Session;
+ std::unique_ptr<llvm::pdb::NativeSession> PdbSession;
+
+ // Persistance data when loading a precompiled header.
+ BumpPtrAllocator BuilderAllocator;
+ std::unique_ptr<AppendingTypeTableBuilder> Builder;
+ std::unique_ptr<BinaryItemStream<CVType>> ItemStream;
+ std::unique_ptr<BinaryStreamReader> ReaderPrecomp;
+ std::vector<CVType> TypeArray;
+ CVTypeArray TypeStream;
+ CVTypeArray CVTypesPrecomp;
+
+ // Persistance data when loading an executable file.
+ std::unique_ptr<MemoryBuffer> BinaryBuffer;
+ std::unique_ptr<llvm::object::Binary> BinaryExecutable;
+
+ Error loadTargetInfo(const object::ObjectFile &Obj);
+ Error loadTargetInfo(const llvm::pdb::PDBFile &Pdb);
+
+ void mapRangeAddress(const object::ObjectFile &Obj,
+ const object::SectionRef &Section,
+ bool IsComdat) override;
+
+ llvm::object::COFFObjectFile &getObj() { return Input.obj(); }
+ llvm::pdb::PDBFile &getPdb() { return Input.pdb(); }
+ bool isObj() const { return Input.isObj(); }
+ bool isPdb() const { return Input.isPdb(); }
+ StringRef getFileName() { return Input.getFilePath(); }
+
+ // Pathname to executable image.
+ std::string ExePath;
+
+ LVOffset CurrentOffset = 0;
+ int32_t CurrentModule = -1;
+
+ using RelocMapTy = DenseMap<const llvm::object::coff_section *,
+ std::vector<llvm::object::RelocationRef>>;
+ RelocMapTy RelocMap;
+
+ // Object files have only one type stream that contains both types and ids.
+ // Precompiled header objects don't contain an IPI stream. Use the TPI.
+ LazyRandomTypeCollection &types() {
+ return TypeServer ? TypeServer->types()
+ : (PrecompHeader ? *PrecompHeader : Input.types());
+ }
+ LazyRandomTypeCollection &ids() {
+ return TypeServer ? TypeServer->ids()
+ : (PrecompHeader ? *PrecompHeader : Input.ids());
+ }
+
+ LVLogicalVisitor LogicalVisitor;
+
+ Expected<StringRef>
+ getFileNameForFileOffset(uint32_t FileOffset,
+ const llvm::pdb::SymbolGroup *SG = nullptr);
+ void printRelocatedField(StringRef Label,
+ const llvm::object::coff_section *CoffSection,
+ uint32_t RelocOffset, uint32_t Offset,
+ StringRef *RelocSym);
+
+ Error printFileNameForOffset(StringRef Label, uint32_t FileOffset,
+ const llvm::pdb::SymbolGroup *SG = nullptr);
+
+ Error loadPrecompiledObject(PrecompRecord &Precomp, CVTypeArray &CVTypesObj);
+ Error loadTypeServer(TypeServer2Record &TS);
+ Error traverseTypes(llvm::pdb::PDBFile &Pdb, LazyRandomTypeCollection &Types,
+ LazyRandomTypeCollection &Ids);
+
+ Error collectInlineeInfo(DebugInlineeLinesSubsectionRef &Lines,
+ const llvm::pdb::SymbolGroup *SG = nullptr);
+
+ void cacheRelocations();
+ Error resolveSymbol(const llvm::object::coff_section *CoffSection,
+ uint64_t Offset, llvm::object::SymbolRef &Sym);
+ Error resolveSymbolName(const llvm::object::coff_section *CoffSection,
+ uint64_t Offset, StringRef &Name);
+ Error traverseTypeSection(StringRef SectionName,
+ const llvm::object::SectionRef &Section);
+ Error traverseSymbolSection(StringRef SectionName,
+ const llvm::object::SectionRef &Section);
+ Error traverseInlineeLines(StringRef Subsection);
+
+ DebugChecksumsSubsectionRef CVFileChecksumTable;
+ DebugStringTableSubsectionRef CVStringTable;
+
+ Error traverseSymbolsSubsection(StringRef Subsection,
+ const llvm::object::SectionRef &Section,
+ StringRef SectionContents);
+
+ /// Given a .debug$S section, find the string table and file checksum table.
+ /// This function taken from (COFFDumper.cpp).
+ /// TODO: It can be moved to the COFF library.
+ Error initializeFileAndStringTables(BinaryStreamReader &Reader);
+
+ Error createLines(const FixedStreamArray<LineNumberEntry> &LineNumbers,
+ LVAddress Addendum, uint32_t Segment, uint32_t Begin,
+ uint32_t Size, uint32_t NameIndex,
+ const llvm::pdb::SymbolGroup *SG = nullptr);
+ Error createScopes(llvm::object::COFFObjectFile &Obj);
+ Error createScopes(llvm::pdb::PDBFile &Pdb);
+ Error processModule();
+
+protected:
+ Error createScopes() override;
+ void sortScopes() override;
+
+public:
+ LVCodeViewReader() = delete;
+ LVCodeViewReader(StringRef Filename, StringRef FileFormatName,
+ llvm::object::COFFObjectFile &Obj, ScopedPrinter &W,
+ StringRef ExePath)
+ : LVBinaryReader(Filename, FileFormatName, W, LVBinaryType::COFF),
+ Input(&Obj), ExePath(ExePath), LogicalVisitor(this, W, Input) {}
+ LVCodeViewReader(StringRef Filename, StringRef FileFormatName,
+ llvm::pdb::PDBFile &Pdb, ScopedPrinter &W, StringRef ExePath)
+ : LVBinaryReader(Filename, FileFormatName, W, LVBinaryType::COFF),
+ Input(&Pdb), ExePath(ExePath), LogicalVisitor(this, W, Input) {}
+ LVCodeViewReader(const LVCodeViewReader &) = delete;
+ LVCodeViewReader &operator=(const LVCodeViewReader &) = delete;
+ ~LVCodeViewReader() = default;
+
+ void getLinkageName(const llvm::object::coff_section *CoffSection,
+ uint32_t RelocOffset, uint32_t Offset,
+ StringRef *RelocSym);
+
+ void addModule(LVScope *Scope) { Modules.push_back(Scope); }
+ LVScope *getScopeForModule(uint32_t Modi) {
+ return Modi >= Modules.size() ? nullptr : Modules[Modi];
+ }
+
+ // Get the string representation for the CodeView symbols.
+ static StringRef getSymbolKindName(SymbolKind Kind);
+ static std::string formatRegisterId(RegisterId Register, CPUType CPU);
+
+ std::string getRegisterName(LVSmall Opcode,
+ ArrayRef<uint64_t> Operands) override;
+
+ bool isSystemEntry(LVElement *Element, StringRef Name) const override;
+
+ void print(raw_ostream &OS) const;
+ void printRecords(raw_ostream &OS) const override {
+ LogicalVisitor.printRecords(OS);
+ };
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump() const { print(dbgs()); }
+#endif
+};
+
+} // end namespace logicalview
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_LOGICALVIEW_READERS_CODEVIEWREADER_H
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.h b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.h
new file mode 100644
index 000000000000..3c461fd9e1e9
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.h
@@ -0,0 +1,477 @@
+//===-- LVCodeViewVisitor.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LVCodeViewVisitor class, which is used to describe a
+// debug information (CodeView) visitor.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_LOGICALVIEW_READERS_CODEVIEWVISITOR_H
+#define LLVM_DEBUGINFO_LOGICALVIEW_READERS_CODEVIEWVISITOR_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h"
+#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/DebugInfo/LogicalView/Readers/LVBinaryReader.h"
+#include "llvm/DebugInfo/PDB/Native/InputFile.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Error.h"
+#include <stack>
+#include <utility>
+
+namespace llvm {
+namespace logicalview {
+
+using namespace llvm::codeview;
+
+class LVCodeViewReader;
+class LVLogicalVisitor;
+struct LVShared;
+
+class LVTypeVisitor final : public TypeVisitorCallbacks {
+ ScopedPrinter &W;
+ LVLogicalVisitor *LogicalVisitor;
+ LazyRandomTypeCollection &Types;
+ LazyRandomTypeCollection &Ids;
+ uint32_t StreamIdx;
+ LVShared *Shared = nullptr;
+
+ // In a PDB, a type index may refer to a type (TPI) or an item ID (IPI).
+ // In a COFF or PDB (/Z7), the type index always refer to a type (TPI).
+ // When creating logical elements, we must access the correct element
+ // table, while searching for a type index.
+ bool HasIds = false;
+
+ // Current type index during the types traversal.
+ TypeIndex CurrentTypeIndex = TypeIndex::None();
+
+ void printTypeIndex(StringRef FieldName, TypeIndex TI,
+ uint32_t StreamIdx) const;
+
+public:
+ LVTypeVisitor(ScopedPrinter &W, LVLogicalVisitor *LogicalVisitor,
+ LazyRandomTypeCollection &Types, LazyRandomTypeCollection &Ids,
+ uint32_t StreamIdx, LVShared *Shared)
+ : TypeVisitorCallbacks(), W(W), LogicalVisitor(LogicalVisitor),
+ Types(Types), Ids(Ids), StreamIdx(StreamIdx), Shared(Shared) {
+ HasIds = &Types != &Ids;
+ }
+
+ Error visitTypeBegin(CVType &Record) override;
+ Error visitTypeBegin(CVType &Record, TypeIndex TI) override;
+ Error visitMemberBegin(CVMemberRecord &Record) override;
+ Error visitMemberEnd(CVMemberRecord &Record) override;
+ Error visitUnknownMember(CVMemberRecord &Record) override;
+
+ Error visitKnownRecord(CVType &Record, BuildInfoRecord &Args) override;
+ Error visitKnownRecord(CVType &Record, ClassRecord &Class) override;
+ Error visitKnownRecord(CVType &Record, EnumRecord &Enum) override;
+ Error visitKnownRecord(CVType &Record, FuncIdRecord &Func) override;
+ Error visitKnownRecord(CVType &Record, ProcedureRecord &Proc) override;
+ Error visitKnownRecord(CVType &Record, StringIdRecord &String) override;
+ Error visitKnownRecord(CVType &Record, UdtSourceLineRecord &Line) override;
+ Error visitKnownRecord(CVType &Record, UnionRecord &Union) override;
+ Error visitUnknownType(CVType &Record) override;
+};
+
+class LVSymbolVisitorDelegate final : public SymbolVisitorDelegate {
+ LVCodeViewReader *Reader;
+ const llvm::object::coff_section *CoffSection;
+ StringRef SectionContents;
+
+public:
+ LVSymbolVisitorDelegate(LVCodeViewReader *Reader,
+ const llvm::object::SectionRef &Section,
+ const llvm::object::COFFObjectFile *Obj,
+ StringRef SectionContents)
+ : Reader(Reader), SectionContents(SectionContents) {
+ CoffSection = Obj->getCOFFSection(Section);
+ }
+
+ uint32_t getRecordOffset(BinaryStreamReader Reader) override {
+ ArrayRef<uint8_t> Data;
+ if (Error Err = Reader.readLongestContiguousChunk(Data)) {
+ llvm::consumeError(std::move(Err));
+ return 0;
+ }
+ return Data.data() - SectionContents.bytes_begin();
+ }
+
+ void printRelocatedField(StringRef Label, uint32_t RelocOffset,
+ uint32_t Offset, StringRef *RelocSym = nullptr);
+
+ void getLinkageName(uint32_t RelocOffset, uint32_t Offset,
+ StringRef *RelocSym = nullptr);
+
+ StringRef getFileNameForFileOffset(uint32_t FileOffset) override;
+ DebugStringTableSubsectionRef getStringTable() override;
+};
+
+class LVElement;
+class LVScope;
+class LVSymbol;
+class LVType;
+
+// Visitor for CodeView symbol streams found in COFF object files and PDB files.
+class LVSymbolVisitor final : public SymbolVisitorCallbacks {
+ LVCodeViewReader *Reader;
+ ScopedPrinter &W;
+ LVLogicalVisitor *LogicalVisitor;
+ LazyRandomTypeCollection &Types;
+ LazyRandomTypeCollection &Ids;
+ LVSymbolVisitorDelegate *ObjDelegate;
+ LVShared *Shared;
+
+ // Symbol offset when processing PDB streams.
+ uint32_t CurrentOffset = 0;
+ // Current object name collected from S_OBJNAME.
+ StringRef CurrentObjectName;
+ // Last symbol processed by S_LOCAL.
+ LVSymbol *LocalSymbol = nullptr;
+
+ bool HasIds;
+ bool InFunctionScope = false;
+ bool IsCompileUnit = false;
+
+ // Register for the locals and parameters symbols in the current frame.
+ RegisterId LocalFrameRegister = RegisterId::NONE;
+ RegisterId ParamFrameRegister = RegisterId::NONE;
+
+ void printLocalVariableAddrRange(const LocalVariableAddrRange &Range,
+ uint32_t RelocationOffset);
+ void printLocalVariableAddrGap(ArrayRef<LocalVariableAddrGap> Gaps);
+ void printTypeIndex(StringRef FieldName, TypeIndex TI) const;
+
+ // Return true if this symbol is a Compile Unit.
+ bool symbolIsCompileUnit(SymbolKind Kind) {
+ switch (Kind) {
+ case SymbolKind::S_COMPILE2:
+ case SymbolKind::S_COMPILE3:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ // Determine symbol kind (local or parameter).
+ void determineSymbolKind(LVSymbol *Symbol, RegisterId Register) {
+ if (Register == LocalFrameRegister) {
+ Symbol->setIsVariable();
+ return;
+ }
+ if (Register == ParamFrameRegister) {
+ Symbol->setIsParameter();
+ return;
+ }
+ // Assume is a variable.
+ Symbol->setIsVariable();
+ }
+
+public:
+ LVSymbolVisitor(LVCodeViewReader *Reader, ScopedPrinter &W,
+ LVLogicalVisitor *LogicalVisitor,
+ LazyRandomTypeCollection &Types,
+ LazyRandomTypeCollection &Ids,
+ LVSymbolVisitorDelegate *ObjDelegate, LVShared *Shared)
+ : Reader(Reader), W(W), LogicalVisitor(LogicalVisitor), Types(Types),
+ Ids(Ids), ObjDelegate(ObjDelegate), Shared(Shared) {
+ HasIds = &Types != &Ids;
+ }
+
+ Error visitSymbolBegin(CVSymbol &Record) override;
+ Error visitSymbolBegin(CVSymbol &Record, uint32_t Offset) override;
+ Error visitSymbolEnd(CVSymbol &Record) override;
+ Error visitUnknownSymbol(CVSymbol &Record) override;
+
+ Error visitKnownRecord(CVSymbol &Record, BlockSym &Block) override;
+ Error visitKnownRecord(CVSymbol &Record, BPRelativeSym &Local) override;
+ Error visitKnownRecord(CVSymbol &Record, BuildInfoSym &BuildInfo) override;
+ Error visitKnownRecord(CVSymbol &Record, Compile2Sym &Compile2) override;
+ Error visitKnownRecord(CVSymbol &Record, Compile3Sym &Compile3) override;
+ Error visitKnownRecord(CVSymbol &Record, ConstantSym &Constant) override;
+ Error visitKnownRecord(CVSymbol &Record, DataSym &Data) override;
+ Error visitKnownRecord(CVSymbol &Record,
+ DefRangeFramePointerRelFullScopeSym
+ &DefRangeFramePointerRelFullScope) override;
+ Error visitKnownRecord(
+ CVSymbol &Record,
+ DefRangeFramePointerRelSym &DefRangeFramePointerRel) override;
+ Error visitKnownRecord(CVSymbol &Record,
+ DefRangeRegisterRelSym &DefRangeRegisterRel) override;
+ Error visitKnownRecord(CVSymbol &Record,
+ DefRangeRegisterSym &DefRangeRegister) override;
+ Error visitKnownRecord(
+ CVSymbol &Record,
+ DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) override;
+ Error visitKnownRecord(CVSymbol &Record,
+ DefRangeSubfieldSym &DefRangeSubfield) override;
+ Error visitKnownRecord(CVSymbol &Record, DefRangeSym &DefRange) override;
+ Error visitKnownRecord(CVSymbol &Record, FrameProcSym &FrameProc) override;
+ Error visitKnownRecord(CVSymbol &Record, InlineSiteSym &InlineSite) override;
+ Error visitKnownRecord(CVSymbol &Record, LocalSym &Local) override;
+ Error visitKnownRecord(CVSymbol &Record, ObjNameSym &ObjName) override;
+ Error visitKnownRecord(CVSymbol &Record, ProcSym &Proc) override;
+ Error visitKnownRecord(CVSymbol &Record, RegRelativeSym &Local) override;
+ Error visitKnownRecord(CVSymbol &Record, ScopeEndSym &ScopeEnd) override;
+ Error visitKnownRecord(CVSymbol &Record, Thunk32Sym &Thunk) override;
+ Error visitKnownRecord(CVSymbol &Record, UDTSym &UDT) override;
+ Error visitKnownRecord(CVSymbol &Record, UsingNamespaceSym &UN) override;
+};
+
+// Visitor for CodeView types and symbols to populate elements.
+class LVLogicalVisitor final {
+ LVCodeViewReader *Reader;
+ ScopedPrinter &W;
+
+ // Encapsulates access to the input file and any dependent type server,
+ // including any precompiled header object.
+ llvm::pdb::InputFile &Input;
+ std::shared_ptr<llvm::pdb::InputFile> TypeServer = nullptr;
+ std::shared_ptr<LazyRandomTypeCollection> PrecompHeader = nullptr;
+
+ std::shared_ptr<LVShared> Shared;
+
+ // Object files have only one type stream that contains both types and ids.
+ // Precompiled header objects don't contain an IPI stream. Use the TPI.
+ LazyRandomTypeCollection &types() {
+ return TypeServer ? TypeServer->types()
+ : (PrecompHeader ? *PrecompHeader : Input.types());
+ }
+ LazyRandomTypeCollection &ids() {
+ return TypeServer ? TypeServer->ids()
+ : (PrecompHeader ? *PrecompHeader : Input.ids());
+ }
+
+ using LVScopeStack = std::stack<LVScope *>;
+ LVScopeStack ScopeStack;
+ LVScope *ReaderParent = nullptr;
+ LVScope *ReaderScope = nullptr;
+ bool InCompileUnitScope = false;
+
+ // Allow processing of argument list.
+ bool ProcessArgumentList = false;
+ StringRef OverloadedMethodName;
+ std::string CompileUnitName;
+
+ // Inlined functions source information.
+ using LVInlineeEntry = std::pair<uint32_t, StringRef>;
+ using LVInlineeInfo = std::map<TypeIndex, LVInlineeEntry>;
+ LVInlineeInfo InlineeInfo;
+
+ Error visitFieldListMemberStream(TypeIndex TI, LVElement *Element,
+ ArrayRef<uint8_t> FieldList);
+
+ LVType *createBaseType(TypeIndex TI, StringRef TypeName);
+ LVType *createPointerType(TypeIndex TI, StringRef TypeName);
+ LVSymbol *createParameter(TypeIndex TI, StringRef Name, LVScope *Parent);
+ LVSymbol *createParameter(LVElement *Element, StringRef Name,
+ LVScope *Parent);
+ void createDataMember(CVMemberRecord &Record, LVScope *Parent, StringRef Name,
+ TypeIndex Type, MemberAccess Access);
+ void createParents(StringRef ScopedName, LVElement *Element);
+
+public:
+ LVLogicalVisitor(LVCodeViewReader *Reader, ScopedPrinter &W,
+ llvm::pdb::InputFile &Input);
+
+ // Current elements during the processing of a RecordType or RecordSymbol.
+ // They are shared with the SymbolVisitor.
+ LVElement *CurrentElement = nullptr;
+ LVScope *CurrentScope = nullptr;
+ LVSymbol *CurrentSymbol = nullptr;
+ LVType *CurrentType = nullptr;
+
+ // Input source in the case of type server or precompiled header.
+ void setInput(std::shared_ptr<llvm::pdb::InputFile> TypeServer) {
+ this->TypeServer = TypeServer;
+ }
+ void setInput(std::shared_ptr<LazyRandomTypeCollection> PrecompHeader) {
+ this->PrecompHeader = PrecompHeader;
+ }
+
+ void addInlineeInfo(TypeIndex TI, uint32_t LineNumber, StringRef Filename) {
+ InlineeInfo.emplace(std::piecewise_construct, std::forward_as_tuple(TI),
+ std::forward_as_tuple(LineNumber, Filename));
+ }
+
+ void printTypeIndex(StringRef FieldName, TypeIndex TI, uint32_t StreamIdx);
+ void printMemberAttributes(MemberAttributes Attrs);
+ void printMemberAttributes(MemberAccess Access, MethodKind Kind,
+ MethodOptions Options);
+
+ LVElement *createElement(TypeLeafKind Kind);
+ LVElement *createElement(SymbolKind Kind);
+ LVElement *createElement(TypeIndex TI, TypeLeafKind Kind);
+
+ // Break down the annotation byte code and calculate code and line offsets.
+ Error inlineSiteAnnotation(LVScope *AbstractFunction,
+ LVScope *InlinedFunction,
+ InlineSiteSym &InlineSite);
+
+ void pushScope(LVScope *Scope) {
+ ScopeStack.push(ReaderParent);
+ ReaderParent = ReaderScope;
+ ReaderScope = Scope;
+ }
+ void popScope() {
+ ReaderScope = ReaderParent;
+ ReaderParent = ScopeStack.top();
+ ScopeStack.pop();
+ }
+ void closeScope() {
+ if (InCompileUnitScope) {
+ InCompileUnitScope = false;
+ popScope();
+ }
+ }
+ void setRoot(LVScope *Root) { ReaderScope = Root; }
+
+ void addElement(LVScope *Scope, bool IsCompileUnit);
+ void addElement(LVSymbol *Symbol);
+ void addElement(LVType *Type);
+
+ std::string getCompileUnitName() { return CompileUnitName; }
+ void setCompileUnitName(std::string Name) {
+ CompileUnitName = std::move(Name);
+ }
+
+ LVElement *getElement(uint32_t StreamIdx, TypeIndex TI,
+ LVScope *Parent = nullptr);
+ LVShared *getShared() { return Shared.get(); }
+
+ LVScope *getReaderScope() const { return ReaderScope; }
+
+ void printTypeBegin(CVType &Record, TypeIndex TI, LVElement *Element,
+ uint32_t StreamIdx);
+ void printTypeEnd(CVType &Record);
+ void printMemberBegin(CVMemberRecord &Record, TypeIndex TI,
+ LVElement *Element, uint32_t StreamIdx);
+ void printMemberEnd(CVMemberRecord &Record);
+
+ void startProcessArgumentList() { ProcessArgumentList = true; }
+ void stopProcessArgumentList() { ProcessArgumentList = false; }
+
+ void processFiles();
+ void processLines();
+ void processNamespaces();
+
+ void printRecords(raw_ostream &OS) const;
+
+ Error visitUnknownType(CVType &Record, TypeIndex TI);
+ Error visitKnownRecord(CVType &Record, ArgListRecord &Args, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, ArrayRecord &AT, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, BitFieldRecord &BF, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, BuildInfoRecord &BI, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, ClassRecord &Class, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, EnumRecord &Enum, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, FieldListRecord &FieldList,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownRecord(CVType &Record, FuncIdRecord &Func, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, LabelRecord &LR, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, ModifierRecord &Mod, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, MemberFuncIdRecord &Id, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, MemberFunctionRecord &MF, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, MethodOverloadListRecord &Overloads,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownRecord(CVType &Record, PointerRecord &Ptr, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, ProcedureRecord &Proc, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, UnionRecord &Union, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, TypeServer2Record &TS, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, VFTableRecord &VFT, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, VFTableShapeRecord &Shape,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownRecord(CVType &Record, StringListRecord &Strings,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownRecord(CVType &Record, StringIdRecord &String, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, UdtSourceLineRecord &SourceLine,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownRecord(CVType &Record, UdtModSourceLineRecord &ModSourceLine,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownRecord(CVType &Record, PrecompRecord &Precomp, TypeIndex TI,
+ LVElement *Element);
+ Error visitKnownRecord(CVType &Record, EndPrecompRecord &EndPrecomp,
+ TypeIndex TI, LVElement *Element);
+
+ Error visitUnknownMember(CVMemberRecord &Record, TypeIndex TI);
+ Error visitKnownMember(CVMemberRecord &Record, BaseClassRecord &Base,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownMember(CVMemberRecord &Record, DataMemberRecord &Field,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownMember(CVMemberRecord &Record, EnumeratorRecord &Enum,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownMember(CVMemberRecord &Record, ListContinuationRecord &Cont,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownMember(CVMemberRecord &Record, NestedTypeRecord &Nested,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownMember(CVMemberRecord &Record, OneMethodRecord &Method,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownMember(CVMemberRecord &Record, OverloadedMethodRecord &Method,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownMember(CVMemberRecord &Record, StaticDataMemberRecord &Field,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownMember(CVMemberRecord &Record, VFPtrRecord &VFTable,
+ TypeIndex TI, LVElement *Element);
+ Error visitKnownMember(CVMemberRecord &Record, VirtualBaseClassRecord &Base,
+ TypeIndex TI, LVElement *Element);
+
+ template <typename T>
+ Error visitKnownMember(CVMemberRecord &Record,
+ TypeVisitorCallbacks &Callbacks, TypeIndex TI,
+ LVElement *Element) {
+ TypeRecordKind RK = static_cast<TypeRecordKind>(Record.Kind);
+ T KnownRecord(RK);
+ if (Error Err = Callbacks.visitKnownMember(Record, KnownRecord))
+ return Err;
+ if (Error Err = visitKnownMember(Record, KnownRecord, TI, Element))
+ return Err;
+ return Error::success();
+ }
+
+ template <typename T>
+ Error visitKnownRecord(CVType &Record, TypeIndex TI, LVElement *Element) {
+ TypeRecordKind RK = static_cast<TypeRecordKind>(Record.kind());
+ T KnownRecord(RK);
+ if (Error Err = TypeDeserializer::deserializeAs(
+ const_cast<CVType &>(Record), KnownRecord))
+ return Err;
+ if (Error Err = visitKnownRecord(Record, KnownRecord, TI, Element))
+ return Err;
+ return Error::success();
+ }
+
+ Error visitMemberRecord(CVMemberRecord &Record,
+ TypeVisitorCallbacks &Callbacks, TypeIndex TI,
+ LVElement *Element);
+ Error finishVisitation(CVType &Record, TypeIndex TI, LVElement *Element);
+};
+
+} // namespace logicalview
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_LOGICALVIEW_READERS_CODEVIEWVISITOR_H
diff --git a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVELFReader.h b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVELFReader.h
index 4ab17eca5f92..0837b886a273 100644
--- a/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVELFReader.h
+++ b/llvm/include/llvm/DebugInfo/LogicalView/Readers/LVELFReader.h
@@ -69,7 +69,12 @@ class LVELFReader final : public LVBinaryReader {
// Cross references (Elements).
using LVElementSet = std::unordered_set<LVElement *>;
- using LVElementEntry = std::pair<LVElement *, LVElementSet>;
+ struct LVElementEntry {
+ LVElement *Element;
+ LVElementSet References;
+ LVElementSet Types;
+ LVElementEntry(LVElement *Element = nullptr) : Element(Element) {}
+ };
using LVElementReference = std::unordered_map<LVOffset, LVElementEntry>;
LVElementReference ElementTable;
@@ -114,7 +119,8 @@ class LVELFReader final : public LVBinaryReader {
void updateReference(dwarf::Attribute Attr, const DWARFFormValue &FormValue);
// Get an element given the DIE offset.
- LVElement *getElementForOffset(LVOffset offset, LVElement *Element);
+ LVElement *getElementForOffset(LVOffset offset, LVElement *Element,
+ bool IsType);
protected:
Error createScopes() override;
@@ -139,7 +145,8 @@ public:
return SymbolsWithLocations;
}
- std::string getRegisterName(LVSmall Opcode, uint64_t Operands[2]) override;
+ std::string getRegisterName(LVSmall Opcode,
+ ArrayRef<uint64_t> Operands) override;
void print(raw_ostream &OS) const;
diff --git a/llvm/include/llvm/DebugInfo/MSF/MSFError.h b/llvm/include/llvm/DebugInfo/MSF/MSFError.h
index b84f9d7c4fee..0d0a43102a9b 100644
--- a/llvm/include/llvm/DebugInfo/MSF/MSFError.h
+++ b/llvm/include/llvm/DebugInfo/MSF/MSFError.h
@@ -16,14 +16,15 @@ namespace msf {
enum class msf_error_code {
unspecified = 1,
insufficient_buffer,
+ not_writable,
+ no_stream,
+ invalid_format,
+ block_in_use,
size_overflow_4096,
size_overflow_8192,
size_overflow_16384,
size_overflow_32768,
- not_writable,
- no_stream,
- invalid_format,
- block_in_use
+ stream_directory_overflow,
};
} // namespace msf
} // namespace llvm
@@ -46,6 +47,26 @@ class MSFError : public ErrorInfo<MSFError, StringError> {
public:
using ErrorInfo<MSFError, StringError>::ErrorInfo; // inherit constructors
MSFError(const Twine &S) : ErrorInfo(S, msf_error_code::unspecified) {}
+
+ bool isPageOverflow() const {
+ switch (static_cast<msf_error_code>(convertToErrorCode().value())) {
+ case msf_error_code::unspecified:
+ case msf_error_code::insufficient_buffer:
+ case msf_error_code::not_writable:
+ case msf_error_code::no_stream:
+ case msf_error_code::invalid_format:
+ case msf_error_code::block_in_use:
+ return false;
+ case msf_error_code::size_overflow_4096:
+ case msf_error_code::size_overflow_8192:
+ case msf_error_code::size_overflow_16384:
+ case msf_error_code::size_overflow_32768:
+ case msf_error_code::stream_directory_overflow:
+ return true;
+ }
+ llvm_unreachable("msf error code not implemented");
+ }
+
static char ID;
};
} // namespace msf
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
index f799b0a4cde0..b89f1da5857a 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h
@@ -51,8 +51,7 @@ public:
StringRef Command) = 0;
virtual bool printError(const Request &Request,
- const ErrorInfoBase &ErrorInfo,
- StringRef ErrorBanner) = 0;
+ const ErrorInfoBase &ErrorInfo) = 0;
virtual void listBegin() = 0;
virtual void listEnd() = 0;
@@ -66,10 +65,12 @@ struct PrinterConfig {
int SourceContextLines;
};
+using ErrorHandler = function_ref<void(const ErrorInfoBase &, StringRef)>;
+
class PlainPrinterBase : public DIPrinter {
protected:
raw_ostream &OS;
- raw_ostream &ES;
+ ErrorHandler ErrHandler;
PrinterConfig Config;
void print(const DILineInfo &Info, bool Inlined);
@@ -85,8 +86,8 @@ private:
void printHeader(uint64_t Address);
public:
- PlainPrinterBase(raw_ostream &OS, raw_ostream &ES, PrinterConfig &Config)
- : OS(OS), ES(ES), Config(Config) {}
+ PlainPrinterBase(raw_ostream &OS, ErrorHandler EH, PrinterConfig &Config)
+ : OS(OS), ErrHandler(EH), Config(Config) {}
void print(const Request &Request, const DILineInfo &Info) override;
void print(const Request &Request, const DIInliningInfo &Info) override;
@@ -96,8 +97,8 @@ public:
void printInvalidCommand(const Request &Request, StringRef Command) override;
- bool printError(const Request &Request, const ErrorInfoBase &ErrorInfo,
- StringRef ErrorBanner) override;
+ bool printError(const Request &Request,
+ const ErrorInfoBase &ErrorInfo) override;
void listBegin() override {}
void listEnd() override {}
@@ -110,8 +111,8 @@ private:
void printFooter() override;
public:
- LLVMPrinter(raw_ostream &OS, raw_ostream &ES, PrinterConfig &Config)
- : PlainPrinterBase(OS, ES, Config) {}
+ LLVMPrinter(raw_ostream &OS, ErrorHandler EH, PrinterConfig &Config)
+ : PlainPrinterBase(OS, EH, Config) {}
};
class GNUPrinter : public PlainPrinterBase {
@@ -119,8 +120,9 @@ private:
void printSimpleLocation(StringRef Filename, const DILineInfo &Info) override;
public:
- GNUPrinter(raw_ostream &OS, raw_ostream &ES, PrinterConfig &Config)
- : PlainPrinterBase(OS, ES, Config) {}
+ GNUPrinter(raw_ostream &OS, ErrorHandler EH, PrinterConfig &Config)
+ : PlainPrinterBase(OS, EH, Config) {}
+
};
class JSONPrinter : public DIPrinter {
@@ -147,8 +149,8 @@ public:
void printInvalidCommand(const Request &Request, StringRef Command) override;
- bool printError(const Request &Request, const ErrorInfoBase &ErrorInfo,
- StringRef ErrorBanner) override;
+ bool printError(const Request &Request,
+ const ErrorInfoBase &ErrorInfo) override;
void listBegin() override;
void listEnd() override;
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
index 534255640075..a1514d91702b 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
@@ -15,13 +15,12 @@
#ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H
#define LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H
-#include "Markup.h"
-
-#include <map>
-
#include "llvm/ADT/DenseMap.h"
+#include "llvm/DebugInfo/Symbolize/Markup.h"
+#include "llvm/Object/BuildID.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
+#include <map>
namespace llvm {
namespace symbolize {
@@ -116,7 +115,7 @@ private:
std::optional<uint64_t> parseAddr(StringRef Str) const;
std::optional<uint64_t> parseModuleID(StringRef Str) const;
std::optional<uint64_t> parseSize(StringRef Str) const;
- std::optional<SmallVector<uint8_t>> parseBuildID(StringRef Str) const;
+ object::BuildID parseBuildID(StringRef Str) const;
std::optional<std::string> parseMode(StringRef Str) const;
std::optional<PCType> parsePCType(StringRef Str) const;
std::optional<uint64_t> parseFrameNumber(StringRef Str) const;
@@ -124,7 +123,7 @@ private:
bool checkTag(const MarkupNode &Node) const;
bool checkNumFields(const MarkupNode &Element, size_t Size) const;
bool checkNumFieldsAtLeast(const MarkupNode &Element, size_t Size) const;
- bool checkNumFieldsAtMost(const MarkupNode &Element, size_t Size) const;
+ void warnNumFieldsAtMost(const MarkupNode &Element, size_t Size) const;
void reportTypeError(StringRef Str, StringRef TypeName) const;
void reportLocation(StringRef::iterator Loc) const;
diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
index c633c894a44e..99a7f219baaa 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h
@@ -119,6 +119,13 @@ public:
BIDFetcher = std::move(Fetcher);
}
+ /// Returns a SymbolizableModule or an error if loading debug info failed.
+ /// Only one attempt is made to load a module, and errors during loading are
+ /// only reported once. Subsequent calls to get module info for a module that
+ /// failed to load will return nullptr.
+ Expected<SymbolizableModule *>
+ getOrCreateModuleInfo(const std::string &ModuleName);
+
private:
// Bundles together object file with code/data and object file with
// corresponding debug info. These objects can be the same.
@@ -140,12 +147,6 @@ private:
symbolizeFrameCommon(const T &ModuleSpecifier,
object::SectionedAddress ModuleOffset);
- /// Returns a SymbolizableModule or an error if loading debug info failed.
- /// Only one attempt is made to load a module, and errors during loading are
- /// only reported once. Subsequent calls to get module info for a module that
- /// failed to load will return nullptr.
- Expected<SymbolizableModule *>
- getOrCreateModuleInfo(const std::string &ModuleName);
Expected<SymbolizableModule *> getOrCreateModuleInfo(const ObjectFile &Obj);
/// Returns a SymbolizableModule or an error if loading debug info failed.
diff --git a/llvm/include/llvm/Debuginfod/Debuginfod.h b/llvm/include/llvm/Debuginfod/Debuginfod.h
index caece0e6fc19..ec7f5691dda4 100644
--- a/llvm/include/llvm/Debuginfod/Debuginfod.h
+++ b/llvm/include/llvm/Debuginfod/Debuginfod.h
@@ -38,9 +38,13 @@
namespace llvm {
+/// Returns false if a debuginfod lookup can be determined to have no chance of
+/// succeeding.
+bool canUseDebuginfod();
+
/// Finds default array of Debuginfod server URLs by checking DEBUGINFOD_URLS
/// environment variable.
-Expected<SmallVector<StringRef>> getDefaultDebuginfodUrls();
+SmallVector<StringRef> getDefaultDebuginfodUrls();
/// Finds a default local file caching directory for the debuginfod client,
/// first checking DEBUGINFOD_CACHE_PATH.
diff --git a/llvm/include/llvm/Debuginfod/HTTPClient.h b/llvm/include/llvm/Debuginfod/HTTPClient.h
index 1c9f719051ec..6ded55502f05 100644
--- a/llvm/include/llvm/Debuginfod/HTTPClient.h
+++ b/llvm/include/llvm/Debuginfod/HTTPClient.h
@@ -15,6 +15,8 @@
#ifndef LLVM_DEBUGINFOD_HTTPCLIENT_H
#define LLVM_DEBUGINFOD_HTTPCLIENT_H
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
diff --git a/llvm/include/llvm/Debuginfod/HTTPServer.h b/llvm/include/llvm/Debuginfod/HTTPServer.h
index 15e611ec546f..c200089200ab 100644
--- a/llvm/include/llvm/Debuginfod/HTTPServer.h
+++ b/llvm/include/llvm/Debuginfod/HTTPServer.h
@@ -34,6 +34,16 @@ struct HTTPResponse;
struct StreamingHTTPResponse;
class HTTPServer;
+class HTTPServerError : public ErrorInfo<HTTPServerError, ECError> {
+public:
+ static char ID;
+ HTTPServerError(const Twine &Msg);
+ void log(raw_ostream &OS) const override;
+
+private:
+ std::string Msg;
+};
+
class HTTPServerRequest {
friend HTTPServer;
diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h
index 6133d0b95bbf..e1f73c422db8 100644
--- a/llvm/include/llvm/Demangle/Demangle.h
+++ b/llvm/include/llvm/Demangle/Demangle.h
@@ -11,6 +11,7 @@
#include <cstddef>
#include <string>
+#include <string_view>
namespace llvm {
/// This is a llvm local version of __cxa_demangle. Other than the name and
@@ -28,8 +29,10 @@ enum : int {
demangle_success = 0,
};
-char *itaniumDemangle(const char *mangled_name, char *buf, size_t *n,
- int *status);
+/// Returns a non-NULL pointer to a NUL-terminated C style string
+/// that should be explicitly freed, if successful. Otherwise, may return
+/// nullptr if mangled_name is not a valid mangling or is nullptr.
+char *itaniumDemangle(std::string_view mangled_name);
enum MSDemangleFlags {
MSDF_None = 0,
@@ -46,31 +49,25 @@ enum MSDemangleFlags {
/// success, or nullptr on error.
/// If n_read is non-null and demangling was successful, it receives how many
/// bytes of the input string were consumed.
-/// buf can point to a *n_buf bytes large buffer where the demangled name is
-/// stored. If the buffer is too small, it is grown with realloc(). If buf is
-/// nullptr, then this malloc()s memory for the result.
-/// *n_buf stores the size of buf on input if buf is non-nullptr, and it
-/// receives the size of the demangled string on output if n_buf is not nullptr.
/// status receives one of the demangle_ enum entries above if it's not nullptr.
/// Flags controls various details of the demangled representation.
-char *microsoftDemangle(const char *mangled_name, size_t *n_read, char *buf,
- size_t *n_buf, int *status,
- MSDemangleFlags Flags = MSDF_None);
+char *microsoftDemangle(std::string_view mangled_name, size_t *n_read,
+ int *status, MSDemangleFlags Flags = MSDF_None);
// Demangles a Rust v0 mangled symbol.
-char *rustDemangle(const char *MangledName);
+char *rustDemangle(std::string_view MangledName);
// Demangles a D mangled symbol.
-char *dlangDemangle(const char *MangledName);
+char *dlangDemangle(std::string_view MangledName);
/// Attempt to demangle a string using different demangling schemes.
/// The function uses heuristics to determine which demangling scheme to use.
/// \param MangledName - reference to string to demangle.
/// \returns - the demangled string, or a copy of the input string if no
/// demangling occurred.
-std::string demangle(const std::string &MangledName);
+std::string demangle(std::string_view MangledName);
-bool nonMicrosoftDemangle(const char *MangledName, std::string &Result);
+bool nonMicrosoftDemangle(std::string_view MangledName, std::string &Result);
/// "Partial" demangler. This supports demangling a string into an AST
/// (typically an intermediate stage in itaniumDemangle) and querying certain
@@ -87,7 +84,7 @@ struct ItaniumPartialDemangler {
bool partialDemangle(const char *MangledName);
/// Just print the entire mangled name into Buf. Buf and N behave like the
- /// second and third parameters to itaniumDemangle.
+ /// second and third parameters to __cxa_demangle.
char *finishDemangle(char *Buf, size_t *N) const;
/// Get the base name of a function. This doesn't include trailing template
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 0dd49ea91455..550e1699b228 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -17,7 +17,7 @@
#define DEMANGLE_ITANIUMDEMANGLE_H
#include "DemangleConfig.h"
-#include "StringView.h"
+#include "StringViewExtras.h"
#include "Utility.h"
#include <algorithm>
#include <cassert>
@@ -27,6 +27,8 @@
#include <cstring>
#include <limits>
#include <new>
+#include <string_view>
+#include <type_traits>
#include <utility>
DEMANGLE_NAMESPACE_BEGIN
@@ -286,7 +288,7 @@ public:
// implementation.
virtual void printRight(OutputBuffer &) const {}
- virtual StringView getBaseName() const { return StringView(); }
+ virtual std::string_view getBaseName() const { return {}; }
// Silence compiler warnings, this dtor will never be called.
virtual ~Node() = default;
@@ -345,10 +347,10 @@ struct NodeArrayNode : Node {
class DotSuffix final : public Node {
const Node *Prefix;
- const StringView Suffix;
+ const std::string_view Suffix;
public:
- DotSuffix(const Node *Prefix_, StringView Suffix_)
+ DotSuffix(const Node *Prefix_, std::string_view Suffix_)
: Node(KDotSuffix), Prefix(Prefix_), Suffix(Suffix_) {}
template<typename Fn> void match(Fn F) const { F(Prefix, Suffix); }
@@ -363,15 +365,15 @@ public:
class VendorExtQualType final : public Node {
const Node *Ty;
- StringView Ext;
+ std::string_view Ext;
const Node *TA;
public:
- VendorExtQualType(const Node *Ty_, StringView Ext_, const Node *TA_)
+ VendorExtQualType(const Node *Ty_, std::string_view Ext_, const Node *TA_)
: Node(KVendorExtQualType), Ty(Ty_), Ext(Ext_), TA(TA_) {}
const Node *getTy() const { return Ty; }
- StringView getExt() const { return Ext; }
+ std::string_view getExt() const { return Ext; }
const Node *getTA() const { return TA; }
template <typename Fn> void match(Fn F) const { F(Ty, Ext, TA); }
@@ -462,10 +464,10 @@ public:
class PostfixQualifiedType final : public Node {
const Node *Ty;
- const StringView Postfix;
+ const std::string_view Postfix;
public:
- PostfixQualifiedType(const Node *Ty_, StringView Postfix_)
+ PostfixQualifiedType(const Node *Ty_, std::string_view Postfix_)
: Node(KPostfixQualifiedType), Ty(Ty_), Postfix(Postfix_) {}
template<typename Fn> void match(Fn F) const { F(Ty, Postfix); }
@@ -477,15 +479,15 @@ public:
};
class NameType final : public Node {
- const StringView Name;
+ const std::string_view Name;
public:
- NameType(StringView Name_) : Node(KNameType), Name(Name_) {}
+ NameType(std::string_view Name_) : Node(KNameType), Name(Name_) {}
template<typename Fn> void match(Fn F) const { F(Name); }
- StringView getName() const { return Name; }
- StringView getBaseName() const override { return Name; }
+ std::string_view getName() const { return Name; }
+ std::string_view getBaseName() const override { return Name; }
void printLeft(OutputBuffer &OB) const override { OB += Name; }
};
@@ -511,10 +513,10 @@ public:
};
class ElaboratedTypeSpefType : public Node {
- StringView Kind;
+ std::string_view Kind;
Node *Child;
public:
- ElaboratedTypeSpefType(StringView Kind_, Node *Child_)
+ ElaboratedTypeSpefType(std::string_view Kind_, Node *Child_)
: Node(KElaboratedTypeSpefType), Kind(Kind_), Child(Child_) {}
template<typename Fn> void match(Fn F) const { F(Kind, Child); }
@@ -528,15 +530,17 @@ public:
struct AbiTagAttr : Node {
Node *Base;
- StringView Tag;
+ std::string_view Tag;
- AbiTagAttr(Node* Base_, StringView Tag_)
- : Node(KAbiTagAttr, Base_->RHSComponentCache,
- Base_->ArrayCache, Base_->FunctionCache),
+ AbiTagAttr(Node *Base_, std::string_view Tag_)
+ : Node(KAbiTagAttr, Base_->RHSComponentCache, Base_->ArrayCache,
+ Base_->FunctionCache),
Base(Base_), Tag(Tag_) {}
template<typename Fn> void match(Fn F) const { F(Base, Tag); }
+ std::string_view getBaseName() const override { return Base->getBaseName(); }
+
void printLeft(OutputBuffer &OB) const override {
Base->printLeft(OB);
OB += "[abi:";
@@ -562,12 +566,12 @@ public:
class ObjCProtoName : public Node {
const Node *Ty;
- StringView Protocol;
+ std::string_view Protocol;
friend class PointerType;
public:
- ObjCProtoName(const Node *Ty_, StringView Protocol_)
+ ObjCProtoName(const Node *Ty_, std::string_view Protocol_)
: Node(KObjCProtoName), Ty(Ty_), Protocol(Protocol_) {}
template<typename Fn> void match(Fn F) const { F(Ty, Protocol); }
@@ -944,11 +948,11 @@ public:
};
class SpecialName final : public Node {
- const StringView Special;
+ const std::string_view Special;
const Node *Child;
public:
- SpecialName(StringView Special_, const Node *Child_)
+ SpecialName(std::string_view Special_, const Node *Child_)
: Node(KSpecialName), Special(Special_), Child(Child_) {}
template<typename Fn> void match(Fn F) const { F(Special, Child); }
@@ -987,7 +991,7 @@ struct NestedName : Node {
template<typename Fn> void match(Fn F) const { F(Qual, Name); }
- StringView getBaseName() const override { return Name->getBaseName(); }
+ std::string_view getBaseName() const override { return Name->getBaseName(); }
void printLeft(OutputBuffer &OB) const override {
Qual->print(OB);
@@ -1027,7 +1031,7 @@ struct ModuleEntity : Node {
template <typename Fn> void match(Fn F) const { F(Module, Name); }
- StringView getBaseName() const override { return Name->getBaseName(); }
+ std::string_view getBaseName() const override { return Name->getBaseName(); }
void printLeft(OutputBuffer &OB) const override {
Name->print(OB);
@@ -1063,7 +1067,7 @@ public:
template<typename Fn> void match(Fn F) const { F(Qualifier, Name); }
- StringView getBaseName() const override { return Name->getBaseName(); }
+ std::string_view getBaseName() const override { return Name->getBaseName(); }
void printLeft(OutputBuffer &OB) const override {
Qualifier->print(OB);
@@ -1485,7 +1489,7 @@ struct NameWithTemplateArgs : Node {
template<typename Fn> void match(Fn F) const { F(Name, TemplateArgs); }
- StringView getBaseName() const override { return Name->getBaseName(); }
+ std::string_view getBaseName() const override { return Name->getBaseName(); }
void printLeft(OutputBuffer &OB) const override {
Name->print(OB);
@@ -1502,7 +1506,7 @@ public:
template<typename Fn> void match(Fn F) const { F(Child); }
- StringView getBaseName() const override { return Child->getBaseName(); }
+ std::string_view getBaseName() const override { return Child->getBaseName(); }
void printLeft(OutputBuffer &OB) const override {
OB += "::";
@@ -1538,20 +1542,20 @@ protected:
return unsigned(SSK) >= unsigned(SpecialSubKind::string);
}
- StringView getBaseName() const override {
+ std::string_view getBaseName() const override {
switch (SSK) {
case SpecialSubKind::allocator:
- return StringView("allocator");
+ return {"allocator"};
case SpecialSubKind::basic_string:
- return StringView("basic_string");
+ return {"basic_string"};
case SpecialSubKind::string:
- return StringView("basic_string");
+ return {"basic_string"};
case SpecialSubKind::istream:
- return StringView("basic_istream");
+ return {"basic_istream"};
case SpecialSubKind::ostream:
- return StringView("basic_ostream");
+ return {"basic_ostream"};
case SpecialSubKind::iostream:
- return StringView("basic_iostream");
+ return {"basic_iostream"};
}
DEMANGLE_UNREACHABLE;
}
@@ -1575,12 +1579,12 @@ public:
template<typename Fn> void match(Fn F) const { F(SSK); }
- StringView getBaseName() const override {
- auto SV = ExpandedSpecialSubstitution::getBaseName ();
+ std::string_view getBaseName() const override {
+ std::string_view SV = ExpandedSpecialSubstitution::getBaseName();
if (isInstantiation()) {
// The instantiations are typedefs that drop the "basic_" prefix.
- assert(SV.startsWith("basic_"));
- SV = SV.dropFront(sizeof("basic_") - 1);
+ assert(llvm::itanium_demangle::starts_with(SV, "basic_"));
+ SV.remove_prefix(sizeof("basic_") - 1);
}
return SV;
}
@@ -1628,10 +1632,11 @@ public:
};
class UnnamedTypeName : public Node {
- const StringView Count;
+ const std::string_view Count;
public:
- UnnamedTypeName(StringView Count_) : Node(KUnnamedTypeName), Count(Count_) {}
+ UnnamedTypeName(std::string_view Count_)
+ : Node(KUnnamedTypeName), Count(Count_) {}
template<typename Fn> void match(Fn F) const { F(Count); }
@@ -1645,11 +1650,11 @@ public:
class ClosureTypeName : public Node {
NodeArray TemplateParams;
NodeArray Params;
- StringView Count;
+ std::string_view Count;
public:
ClosureTypeName(NodeArray TemplateParams_, NodeArray Params_,
- StringView Count_)
+ std::string_view Count_)
: Node(KClosureTypeName), TemplateParams(TemplateParams_),
Params(Params_), Count(Count_) {}
@@ -1696,12 +1701,12 @@ public:
class BinaryExpr : public Node {
const Node *LHS;
- const StringView InfixOperator;
+ const std::string_view InfixOperator;
const Node *RHS;
public:
- BinaryExpr(const Node *LHS_, StringView InfixOperator_, const Node *RHS_,
- Prec Prec_)
+ BinaryExpr(const Node *LHS_, std::string_view InfixOperator_,
+ const Node *RHS_, Prec Prec_)
: Node(KBinaryExpr, Prec_), LHS(LHS_), InfixOperator(InfixOperator_),
RHS(RHS_) {}
@@ -1750,10 +1755,10 @@ public:
class PostfixExpr : public Node {
const Node *Child;
- const StringView Operator;
+ const std::string_view Operator;
public:
- PostfixExpr(const Node *Child_, StringView Operator_, Prec Prec_)
+ PostfixExpr(const Node *Child_, std::string_view Operator_, Prec Prec_)
: Node(KPostfixExpr, Prec_), Child(Child_), Operator(Operator_) {}
template <typename Fn> void match(Fn F) const {
@@ -1791,11 +1796,12 @@ public:
class MemberExpr : public Node {
const Node *LHS;
- const StringView Kind;
+ const std::string_view Kind;
const Node *RHS;
public:
- MemberExpr(const Node *LHS_, StringView Kind_, const Node *RHS_, Prec Prec_)
+ MemberExpr(const Node *LHS_, std::string_view Kind_, const Node *RHS_,
+ Prec Prec_)
: Node(KMemberExpr, Prec_), LHS(LHS_), Kind(Kind_), RHS(RHS_) {}
template <typename Fn> void match(Fn F) const {
@@ -1812,13 +1818,14 @@ public:
class SubobjectExpr : public Node {
const Node *Type;
const Node *SubExpr;
- StringView Offset;
+ std::string_view Offset;
NodeArray UnionSelectors;
bool OnePastTheEnd;
public:
- SubobjectExpr(const Node *Type_, const Node *SubExpr_, StringView Offset_,
- NodeArray UnionSelectors_, bool OnePastTheEnd_)
+ SubobjectExpr(const Node *Type_, const Node *SubExpr_,
+ std::string_view Offset_, NodeArray UnionSelectors_,
+ bool OnePastTheEnd_)
: Node(KSubobjectExpr), Type(Type_), SubExpr(SubExpr_), Offset(Offset_),
UnionSelectors(UnionSelectors_), OnePastTheEnd(OnePastTheEnd_) {}
@@ -1835,7 +1842,7 @@ public:
OB += "0";
} else if (Offset[0] == 'n') {
OB += "-";
- OB += Offset.dropFront();
+ OB += std::string_view(Offset.data() + 1, Offset.size() - 1);
} else {
OB += Offset;
}
@@ -1844,12 +1851,12 @@ public:
};
class EnclosingExpr : public Node {
- const StringView Prefix;
+ const std::string_view Prefix;
const Node *Infix;
- const StringView Postfix;
+ const std::string_view Postfix;
public:
- EnclosingExpr(StringView Prefix_, const Node *Infix_,
+ EnclosingExpr(std::string_view Prefix_, const Node *Infix_,
Prec Prec_ = Prec::Primary)
: Node(KEnclosingExpr, Prec_), Prefix(Prefix_), Infix(Infix_) {}
@@ -1868,12 +1875,13 @@ public:
class CastExpr : public Node {
// cast_kind<to>(from)
- const StringView CastKind;
+ const std::string_view CastKind;
const Node *To;
const Node *From;
public:
- CastExpr(StringView CastKind_, const Node *To_, const Node *From_, Prec Prec_)
+ CastExpr(std::string_view CastKind_, const Node *To_, const Node *From_,
+ Prec Prec_)
: Node(KCastExpr, Prec_), CastKind(CastKind_), To(To_), From(From_) {}
template <typename Fn> void match(Fn F) const {
@@ -1996,11 +2004,11 @@ public:
};
class PrefixExpr : public Node {
- StringView Prefix;
+ std::string_view Prefix;
Node *Child;
public:
- PrefixExpr(StringView Prefix_, Node *Child_, Prec Prec_)
+ PrefixExpr(std::string_view Prefix_, Node *Child_, Prec Prec_)
: Node(KPrefixExpr, Prec_), Prefix(Prefix_), Child(Child_) {}
template <typename Fn> void match(Fn F) const {
@@ -2014,10 +2022,11 @@ public:
};
class FunctionParam : public Node {
- StringView Number;
+ std::string_view Number;
public:
- FunctionParam(StringView Number_) : Node(KFunctionParam), Number(Number_) {}
+ FunctionParam(std::string_view Number_)
+ : Node(KFunctionParam), Number(Number_) {}
template<typename Fn> void match(Fn F) const { F(Number); }
@@ -2052,11 +2061,11 @@ public:
class PointerToMemberConversionExpr : public Node {
const Node *Type;
const Node *SubExpr;
- StringView Offset;
+ std::string_view Offset;
public:
PointerToMemberConversionExpr(const Node *Type_, const Node *SubExpr_,
- StringView Offset_, Prec Prec_)
+ std::string_view Offset_, Prec Prec_)
: Node(KPointerToMemberConversionExpr, Prec_), Type(Type_),
SubExpr(SubExpr_), Offset(Offset_) {}
@@ -2141,11 +2150,11 @@ public:
class FoldExpr : public Node {
const Node *Pack, *Init;
- StringView OperatorName;
+ std::string_view OperatorName;
bool IsLeftFold;
public:
- FoldExpr(bool IsLeftFold_, StringView OperatorName_, const Node *Pack_,
+ FoldExpr(bool IsLeftFold_, std::string_view OperatorName_, const Node *Pack_,
const Node *Init_)
: Node(KFoldExpr), Pack(Pack_), Init(Init_), OperatorName(OperatorName_),
IsLeftFold(IsLeftFold_) {}
@@ -2209,7 +2218,7 @@ public:
template<typename Fn> void match(Fn F) const { F(Value); }
void printLeft(OutputBuffer &OB) const override {
- OB += Value ? StringView("true") : StringView("false");
+ OB += Value ? std::string_view("true") : std::string_view("false");
}
};
@@ -2247,10 +2256,10 @@ public:
class EnumLiteral : public Node {
// ty(integer)
const Node *Ty;
- StringView Integer;
+ std::string_view Integer;
public:
- EnumLiteral(const Node *Ty_, StringView Integer_)
+ EnumLiteral(const Node *Ty_, std::string_view Integer_)
: Node(KEnumLiteral), Ty(Ty_), Integer(Integer_) {}
template<typename Fn> void match(Fn F) const { F(Ty, Integer); }
@@ -2261,18 +2270,18 @@ public:
OB.printClose();
if (Integer[0] == 'n')
- OB << "-" << Integer.dropFront(1);
+ OB << '-' << std::string_view(Integer.data() + 1, Integer.size() - 1);
else
OB << Integer;
}
};
class IntegerLiteral : public Node {
- StringView Type;
- StringView Value;
+ std::string_view Type;
+ std::string_view Value;
public:
- IntegerLiteral(StringView Type_, StringView Value_)
+ IntegerLiteral(std::string_view Type_, std::string_view Value_)
: Node(KIntegerLiteral), Type(Type_), Value(Value_) {}
template<typename Fn> void match(Fn F) const { F(Type, Value); }
@@ -2284,10 +2293,9 @@ public:
OB.printClose();
}
- if (Value[0] == 'n') {
- OB += '-';
- OB += Value.dropFront(1);
- } else
+ if (Value[0] == 'n')
+ OB << '-' << std::string_view(Value.data() + 1, Value.size() - 1);
+ else
OB += Value;
if (Type.size() <= 3)
@@ -2310,29 +2318,26 @@ constexpr Node::Kind getFloatLiteralKind(long double *) {
}
template <class Float> class FloatLiteralImpl : public Node {
- const StringView Contents;
+ const std::string_view Contents;
static constexpr Kind KindForClass =
float_literal_impl::getFloatLiteralKind((Float *)nullptr);
public:
- FloatLiteralImpl(StringView Contents_)
+ FloatLiteralImpl(std::string_view Contents_)
: Node(KindForClass), Contents(Contents_) {}
template<typename Fn> void match(Fn F) const { F(Contents); }
void printLeft(OutputBuffer &OB) const override {
- const char *first = Contents.begin();
- const char *last = Contents.end() + 1;
-
const size_t N = FloatData<Float>::mangled_size;
- if (static_cast<std::size_t>(last - first) > N) {
- last = first + N;
+ if (Contents.size() >= N) {
union {
Float value;
char buf[sizeof(Float)];
};
- const char *t = first;
+ const char *t = Contents.data();
+ const char *last = t + N;
char *e = buf;
for (; t != last; ++t, ++e) {
unsigned d1 = isdigit(*t) ? static_cast<unsigned>(*t - '0')
@@ -2347,7 +2352,7 @@ public:
#endif
char num[FloatData<Float>::max_demangled_size] = {0};
int n = snprintf(num, sizeof(num), FloatData<Float>::spec, value);
- OB += StringView(num, num + n);
+ OB += std::string_view(num, n);
}
}
};
@@ -2474,8 +2479,9 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
return res;
}
- bool consumeIf(StringView S) {
- if (StringView(First, Last).startsWith(S)) {
+ bool consumeIf(std::string_view S) {
+ if (llvm::itanium_demangle::starts_with(
+ std::string_view(First, Last - First), S)) {
First += S.size();
return true;
}
@@ -2500,10 +2506,10 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
size_t numLeft() const { return static_cast<size_t>(Last - First); }
- StringView parseNumber(bool AllowNegative = false);
+ std::string_view parseNumber(bool AllowNegative = false);
Qualifiers parseCVQualifiers();
bool parsePositiveInteger(size_t *Out);
- StringView parseBareSourceName();
+ std::string_view parseBareSourceName();
bool parseSeqId(size_t *Out);
Node *parseSubstitution();
@@ -2514,9 +2520,9 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
/// Parse the <expr> production.
Node *parseExpr();
- Node *parsePrefixExpr(StringView Kind, Node::Prec Prec);
- Node *parseBinaryExpr(StringView Kind, Node::Prec Prec);
- Node *parseIntegerLiteral(StringView Lit);
+ Node *parsePrefixExpr(std::string_view Kind, Node::Prec Prec);
+ Node *parseBinaryExpr(std::string_view Kind, Node::Prec Prec);
+ Node *parseIntegerLiteral(std::string_view Lit);
Node *parseExprPrimary();
template <class Float> Node *parseFloatingLiteral();
Node *parseFunctionParam();
@@ -2624,17 +2630,18 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
bool operator!=(const char *Peek) const { return !this->operator==(Peek); }
public:
- StringView getSymbol() const {
- StringView Res = Name;
+ std::string_view getSymbol() const {
+ std::string_view Res = Name;
if (Kind < Unnameable) {
- assert(Res.startsWith("operator") &&
+ assert(llvm::itanium_demangle::starts_with(Res, "operator") &&
"operator name does not start with 'operator'");
- Res = Res.dropFront(sizeof("operator") - 1);
- Res.consumeFront(' ');
+ Res.remove_prefix(sizeof("operator") - 1);
+ if (llvm::itanium_demangle::starts_with(Res, ' '))
+ Res.remove_prefix(1);
}
return Res;
}
- StringView getName() const { return Name; }
+ std::string_view getName() const { return Name; }
OIKind getKind() const { return Kind; }
bool getFlag() const { return Flag; }
Node::Prec getPrecedence() const { return Prec; }
@@ -2854,7 +2861,7 @@ AbstractManglingParser<Derived, Alloc>::parseUnnamedTypeName(NameState *State) {
TemplateParams.clear();
if (consumeIf("Ut")) {
- StringView Count = parseNumber();
+ std::string_view Count = parseNumber();
if (!consumeIf('_'))
return nullptr;
return make<UnnamedTypeName>(Count);
@@ -2866,7 +2873,7 @@ AbstractManglingParser<Derived, Alloc>::parseUnnamedTypeName(NameState *State) {
size_t ParamsBegin = Names.size();
while (look() == 'T' &&
- StringView("yptn").find(look(1)) != StringView::npos) {
+ std::string_view("yptn").find(look(1)) != std::string_view::npos) {
Node *T = parseTemplateParamDecl();
if (!T)
return nullptr;
@@ -2909,7 +2916,7 @@ AbstractManglingParser<Derived, Alloc>::parseUnnamedTypeName(NameState *State) {
}
NodeArray Params = popTrailingNodeArray(ParamsBegin);
- StringView Count = parseNumber();
+ std::string_view Count = parseNumber();
if (!consumeIf('_'))
return nullptr;
return make<ClosureTypeName>(TempParams, Params, Count);
@@ -2931,9 +2938,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSourceName(NameState *) {
return nullptr;
if (numLeft() < Length || Length == 0)
return nullptr;
- StringView Name(First, First + Length);
+ std::string_view Name(First, Length);
First += Length;
- if (Name.startsWith("_GLOBAL__N"))
+ if (llvm::itanium_demangle::starts_with(Name, "_GLOBAL__N"))
return make<NameType>("(anonymous namespace)");
return make<NameType>(Name);
}
@@ -3447,7 +3454,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseUnresolvedName(bool Global) {
template <typename Derived, typename Alloc>
Node *AbstractManglingParser<Derived, Alloc>::parseAbiTags(Node *N) {
while (consumeIf('B')) {
- StringView SN = parseBareSourceName();
+ std::string_view SN = parseBareSourceName();
if (SN.empty())
return nullptr;
N = make<AbiTagAttr>(N, SN);
@@ -3459,16 +3466,16 @@ Node *AbstractManglingParser<Derived, Alloc>::parseAbiTags(Node *N) {
// <number> ::= [n] <non-negative decimal integer>
template <typename Alloc, typename Derived>
-StringView
+std::string_view
AbstractManglingParser<Alloc, Derived>::parseNumber(bool AllowNegative) {
const char *Tmp = First;
if (AllowNegative)
consumeIf('n');
if (numLeft() == 0 || !std::isdigit(*First))
- return StringView();
+ return std::string_view();
while (numLeft() != 0 && std::isdigit(*First))
++First;
- return StringView(Tmp, First);
+ return std::string_view(Tmp, First - Tmp);
}
// <positive length number> ::= [0-9]*
@@ -3485,11 +3492,11 @@ bool AbstractManglingParser<Alloc, Derived>::parsePositiveInteger(size_t *Out) {
}
template <typename Alloc, typename Derived>
-StringView AbstractManglingParser<Alloc, Derived>::parseBareSourceName() {
+std::string_view AbstractManglingParser<Alloc, Derived>::parseBareSourceName() {
size_t Int = 0;
if (parsePositiveInteger(&Int) || numLeft() < Int)
- return StringView();
- StringView R(First, First + Int);
+ return {};
+ std::string_view R(First, Int);
First += Int;
return R;
}
@@ -3673,7 +3680,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parsePointerToMemberType() {
// ::= Te <name> # dependent elaborated type specifier using 'enum'
template <typename Derived, typename Alloc>
Node *AbstractManglingParser<Derived, Alloc>::parseClassEnumType() {
- StringView ElabSpef;
+ std::string_view ElabSpef;
if (consumeIf("Ts"))
ElabSpef = "struct";
else if (consumeIf("Tu"))
@@ -3697,17 +3704,18 @@ Node *AbstractManglingParser<Derived, Alloc>::parseClassEnumType() {
template <typename Derived, typename Alloc>
Node *AbstractManglingParser<Derived, Alloc>::parseQualifiedType() {
if (consumeIf('U')) {
- StringView Qual = parseBareSourceName();
+ std::string_view Qual = parseBareSourceName();
if (Qual.empty())
return nullptr;
// extension ::= U <objc-name> <objc-type> # objc-type<identifier>
- if (Qual.startsWith("objcproto")) {
- StringView ProtoSourceName = Qual.dropFront(std::strlen("objcproto"));
- StringView Proto;
+ if (llvm::itanium_demangle::starts_with(Qual, "objcproto")) {
+ constexpr size_t Len = sizeof("objcproto") - 1;
+ std::string_view ProtoSourceName(Qual.data() + Len, Qual.size() - Len);
+ std::string_view Proto;
{
- ScopedOverride<const char *> SaveFirst(First, ProtoSourceName.begin()),
- SaveLast(Last, ProtoSourceName.end());
+ ScopedOverride<const char *> SaveFirst(First, ProtoSourceName.data()),
+ SaveLast(Last, &*ProtoSourceName.rbegin() + 1);
Proto = parseBareSourceName();
}
if (Proto.empty())
@@ -3875,7 +3883,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
// <builtin-type> ::= u <source-name> # vendor extended type
case 'u': {
++First;
- StringView Res = parseBareSourceName();
+ std::string_view Res = parseBareSourceName();
if (Res.empty())
return nullptr;
// Typically, <builtin-type>s are not considered substitution candidates,
@@ -4123,8 +4131,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
}
template <typename Derived, typename Alloc>
-Node *AbstractManglingParser<Derived, Alloc>::parsePrefixExpr(StringView Kind,
- Node::Prec Prec) {
+Node *
+AbstractManglingParser<Derived, Alloc>::parsePrefixExpr(std::string_view Kind,
+ Node::Prec Prec) {
Node *E = getDerived().parseExpr();
if (E == nullptr)
return nullptr;
@@ -4132,8 +4141,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parsePrefixExpr(StringView Kind,
}
template <typename Derived, typename Alloc>
-Node *AbstractManglingParser<Derived, Alloc>::parseBinaryExpr(StringView Kind,
- Node::Prec Prec) {
+Node *
+AbstractManglingParser<Derived, Alloc>::parseBinaryExpr(std::string_view Kind,
+ Node::Prec Prec) {
Node *LHS = getDerived().parseExpr();
if (LHS == nullptr)
return nullptr;
@@ -4144,9 +4154,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parseBinaryExpr(StringView Kind,
}
template <typename Derived, typename Alloc>
-Node *
-AbstractManglingParser<Derived, Alloc>::parseIntegerLiteral(StringView Lit) {
- StringView Tmp = parseNumber(true);
+Node *AbstractManglingParser<Derived, Alloc>::parseIntegerLiteral(
+ std::string_view Lit) {
+ std::string_view Tmp = parseNumber(true);
if (!Tmp.empty() && consumeIf('E'))
return make<IntegerLiteral>(Lit, Tmp);
return nullptr;
@@ -4176,7 +4186,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseFunctionParam() {
return make<NameType>("this");
if (consumeIf("fp")) {
parseCVQualifiers();
- StringView Num = parseNumber();
+ std::string_view Num = parseNumber();
if (!consumeIf('_'))
return nullptr;
return make<FunctionParam>(Num);
@@ -4187,7 +4197,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseFunctionParam() {
if (!consumeIf('p'))
return nullptr;
parseCVQualifiers();
- StringView Num = parseNumber();
+ std::string_view Num = parseNumber();
if (!consumeIf('_'))
return nullptr;
return make<FunctionParam>(Num);
@@ -4341,7 +4351,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseExprPrimary() {
Node *T = getDerived().parseType();
if (T == nullptr)
return nullptr;
- StringView N = parseNumber(/*AllowNegative=*/true);
+ std::string_view N = parseNumber(/*AllowNegative=*/true);
if (N.empty())
return nullptr;
if (!consumeIf('E'))
@@ -4464,7 +4474,7 @@ AbstractManglingParser<Derived, Alloc>::parsePointerToMemberConversionExpr(
Node *Expr = getDerived().parseExpr();
if (!Expr)
return nullptr;
- StringView Offset = getDerived().parseNumber(true);
+ std::string_view Offset = getDerived().parseNumber(true);
if (!consumeIf('E'))
return nullptr;
return make<PointerToMemberConversionExpr>(Ty, Expr, Offset, Prec);
@@ -4482,7 +4492,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSubobjectExpr() {
Node *Expr = getDerived().parseExpr();
if (!Expr)
return nullptr;
- StringView Offset = getDerived().parseNumber(true);
+ std::string_view Offset = getDerived().parseNumber(true);
size_t SelectorsBegin = Names.size();
while (consumeIf('_')) {
Node *Selector = make<NameType>(parseNumber());
@@ -5141,7 +5151,7 @@ Node *AbstractManglingParser<Alloc, Derived>::parseFloatingLiteral() {
const size_t N = FloatData<Float>::mangled_size;
if (numLeft() <= N)
return nullptr;
- StringView Data(First, First + N);
+ std::string_view Data(First, N);
for (char C : Data)
if (!std::isxdigit(C))
return nullptr;
@@ -5461,7 +5471,8 @@ Node *AbstractManglingParser<Derived, Alloc>::parse() {
if (Encoding == nullptr)
return nullptr;
if (look() == '.') {
- Encoding = make<DotSuffix>(Encoding, StringView(First, Last));
+ Encoding =
+ make<DotSuffix>(Encoding, std::string_view(First, Last - First));
First = Last;
}
if (numLeft() != 0)
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangle.h b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
index f1a5e1b64ebb..1529b803debe 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangle.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
@@ -10,8 +10,9 @@
#define LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
#include "llvm/Demangle/MicrosoftDemangleNodes.h"
-#include "llvm/Demangle/StringView.h"
+#include <cassert>
+#include <string_view>
#include <utility>
namespace llvm {
@@ -142,9 +143,9 @@ public:
// You are supposed to call parse() first and then check if error is true. If
// it is false, call output() to write the formatted name to the given stream.
- SymbolNode *parse(StringView &MangledName);
+ SymbolNode *parse(std::string_view &MangledName);
- TagTypeNode *parseTagUniqueName(StringView &MangledName);
+ TagTypeNode *parseTagUniqueName(std::string_view &MangledName);
// True if an error occurred.
bool Error = false;
@@ -152,104 +153,112 @@ public:
void dumpBackReferences();
private:
- SymbolNode *demangleEncodedSymbol(StringView &MangledName,
+ SymbolNode *demangleEncodedSymbol(std::string_view &MangledName,
QualifiedNameNode *QN);
- SymbolNode *demangleDeclarator(StringView &MangledName);
- SymbolNode *demangleMD5Name(StringView &MangledName);
- SymbolNode *demangleTypeinfoName(StringView &MangledName);
+ SymbolNode *demangleDeclarator(std::string_view &MangledName);
+ SymbolNode *demangleMD5Name(std::string_view &MangledName);
+ SymbolNode *demangleTypeinfoName(std::string_view &MangledName);
- VariableSymbolNode *demangleVariableEncoding(StringView &MangledName,
+ VariableSymbolNode *demangleVariableEncoding(std::string_view &MangledName,
StorageClass SC);
- FunctionSymbolNode *demangleFunctionEncoding(StringView &MangledName);
+ FunctionSymbolNode *demangleFunctionEncoding(std::string_view &MangledName);
- Qualifiers demanglePointerExtQualifiers(StringView &MangledName);
+ Qualifiers demanglePointerExtQualifiers(std::string_view &MangledName);
// Parser functions. This is a recursive-descent parser.
- TypeNode *demangleType(StringView &MangledName, QualifierMangleMode QMM);
- PrimitiveTypeNode *demanglePrimitiveType(StringView &MangledName);
- CustomTypeNode *demangleCustomType(StringView &MangledName);
- TagTypeNode *demangleClassType(StringView &MangledName);
- PointerTypeNode *demanglePointerType(StringView &MangledName);
- PointerTypeNode *demangleMemberPointerType(StringView &MangledName);
- FunctionSignatureNode *demangleFunctionType(StringView &MangledName,
+ TypeNode *demangleType(std::string_view &MangledName,
+ QualifierMangleMode QMM);
+ PrimitiveTypeNode *demanglePrimitiveType(std::string_view &MangledName);
+ CustomTypeNode *demangleCustomType(std::string_view &MangledName);
+ TagTypeNode *demangleClassType(std::string_view &MangledName);
+ PointerTypeNode *demanglePointerType(std::string_view &MangledName);
+ PointerTypeNode *demangleMemberPointerType(std::string_view &MangledName);
+ FunctionSignatureNode *demangleFunctionType(std::string_view &MangledName,
bool HasThisQuals);
- ArrayTypeNode *demangleArrayType(StringView &MangledName);
+ ArrayTypeNode *demangleArrayType(std::string_view &MangledName);
- NodeArrayNode *demangleFunctionParameterList(StringView &MangledName,
+ NodeArrayNode *demangleFunctionParameterList(std::string_view &MangledName,
bool &IsVariadic);
- NodeArrayNode *demangleTemplateParameterList(StringView &MangledName);
+ NodeArrayNode *demangleTemplateParameterList(std::string_view &MangledName);
- std::pair<uint64_t, bool> demangleNumber(StringView &MangledName);
- uint64_t demangleUnsigned(StringView &MangledName);
- int64_t demangleSigned(StringView &MangledName);
+ std::pair<uint64_t, bool> demangleNumber(std::string_view &MangledName);
+ uint64_t demangleUnsigned(std::string_view &MangledName);
+ int64_t demangleSigned(std::string_view &MangledName);
- void memorizeString(StringView s);
+ void memorizeString(std::string_view s);
void memorizeIdentifier(IdentifierNode *Identifier);
/// Allocate a copy of \p Borrowed into memory that we own.
- StringView copyString(StringView Borrowed);
+ std::string_view copyString(std::string_view Borrowed);
- QualifiedNameNode *demangleFullyQualifiedTypeName(StringView &MangledName);
- QualifiedNameNode *demangleFullyQualifiedSymbolName(StringView &MangledName);
+ QualifiedNameNode *
+ demangleFullyQualifiedTypeName(std::string_view &MangledName);
+ QualifiedNameNode *
+ demangleFullyQualifiedSymbolName(std::string_view &MangledName);
- IdentifierNode *demangleUnqualifiedTypeName(StringView &MangledName,
+ IdentifierNode *demangleUnqualifiedTypeName(std::string_view &MangledName,
bool Memorize);
- IdentifierNode *demangleUnqualifiedSymbolName(StringView &MangledName,
+ IdentifierNode *demangleUnqualifiedSymbolName(std::string_view &MangledName,
NameBackrefBehavior NBB);
- QualifiedNameNode *demangleNameScopeChain(StringView &MangledName,
+ QualifiedNameNode *demangleNameScopeChain(std::string_view &MangledName,
IdentifierNode *UnqualifiedName);
- IdentifierNode *demangleNameScopePiece(StringView &MangledName);
+ IdentifierNode *demangleNameScopePiece(std::string_view &MangledName);
- NamedIdentifierNode *demangleBackRefName(StringView &MangledName);
- IdentifierNode *demangleTemplateInstantiationName(StringView &MangledName,
- NameBackrefBehavior NBB);
+ NamedIdentifierNode *demangleBackRefName(std::string_view &MangledName);
+ IdentifierNode *
+ demangleTemplateInstantiationName(std::string_view &MangledName,
+ NameBackrefBehavior NBB);
IntrinsicFunctionKind
translateIntrinsicFunctionCode(char CH, FunctionIdentifierCodeGroup Group);
- IdentifierNode *demangleFunctionIdentifierCode(StringView &MangledName);
+ IdentifierNode *demangleFunctionIdentifierCode(std::string_view &MangledName);
IdentifierNode *
- demangleFunctionIdentifierCode(StringView &MangledName,
+ demangleFunctionIdentifierCode(std::string_view &MangledName,
FunctionIdentifierCodeGroup Group);
- StructorIdentifierNode *demangleStructorIdentifier(StringView &MangledName,
- bool IsDestructor);
+ StructorIdentifierNode *
+ demangleStructorIdentifier(std::string_view &MangledName, bool IsDestructor);
ConversionOperatorIdentifierNode *
- demangleConversionOperatorIdentifier(StringView &MangledName);
+ demangleConversionOperatorIdentifier(std::string_view &MangledName);
LiteralOperatorIdentifierNode *
- demangleLiteralOperatorIdentifier(StringView &MangledName);
+ demangleLiteralOperatorIdentifier(std::string_view &MangledName);
- SymbolNode *demangleSpecialIntrinsic(StringView &MangledName);
+ SymbolNode *demangleSpecialIntrinsic(std::string_view &MangledName);
SpecialTableSymbolNode *
- demangleSpecialTableSymbolNode(StringView &MangledName,
+ demangleSpecialTableSymbolNode(std::string_view &MangledName,
SpecialIntrinsicKind SIK);
LocalStaticGuardVariableNode *
- demangleLocalStaticGuard(StringView &MangledName, bool IsThread);
+ demangleLocalStaticGuard(std::string_view &MangledName, bool IsThread);
VariableSymbolNode *demangleUntypedVariable(ArenaAllocator &Arena,
- StringView &MangledName,
- StringView VariableName);
+ std::string_view &MangledName,
+ std::string_view VariableName);
VariableSymbolNode *
demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
- StringView &MangledName);
- FunctionSymbolNode *demangleInitFiniStub(StringView &MangledName,
+ std::string_view &MangledName);
+ FunctionSymbolNode *demangleInitFiniStub(std::string_view &MangledName,
bool IsDestructor);
- NamedIdentifierNode *demangleSimpleName(StringView &MangledName,
+ NamedIdentifierNode *demangleSimpleName(std::string_view &MangledName,
bool Memorize);
- NamedIdentifierNode *demangleAnonymousNamespaceName(StringView &MangledName);
- NamedIdentifierNode *demangleLocallyScopedNamePiece(StringView &MangledName);
- EncodedStringLiteralNode *demangleStringLiteral(StringView &MangledName);
- FunctionSymbolNode *demangleVcallThunkNode(StringView &MangledName);
-
- StringView demangleSimpleString(StringView &MangledName, bool Memorize);
-
- FuncClass demangleFunctionClass(StringView &MangledName);
- CallingConv demangleCallingConvention(StringView &MangledName);
- StorageClass demangleVariableStorageClass(StringView &MangledName);
- bool demangleThrowSpecification(StringView &MangledName);
- wchar_t demangleWcharLiteral(StringView &MangledName);
- uint8_t demangleCharLiteral(StringView &MangledName);
-
- std::pair<Qualifiers, bool> demangleQualifiers(StringView &MangledName);
+ NamedIdentifierNode *
+ demangleAnonymousNamespaceName(std::string_view &MangledName);
+ NamedIdentifierNode *
+ demangleLocallyScopedNamePiece(std::string_view &MangledName);
+ EncodedStringLiteralNode *
+ demangleStringLiteral(std::string_view &MangledName);
+ FunctionSymbolNode *demangleVcallThunkNode(std::string_view &MangledName);
+
+ std::string_view demangleSimpleString(std::string_view &MangledName,
+ bool Memorize);
+
+ FuncClass demangleFunctionClass(std::string_view &MangledName);
+ CallingConv demangleCallingConvention(std::string_view &MangledName);
+ StorageClass demangleVariableStorageClass(std::string_view &MangledName);
+ bool demangleThrowSpecification(std::string_view &MangledName);
+ wchar_t demangleWcharLiteral(std::string_view &MangledName);
+ uint8_t demangleCharLiteral(std::string_view &MangledName);
+
+ std::pair<Qualifiers, bool> demangleQualifiers(std::string_view &MangledName);
// Memory allocator.
ArenaAllocator Arena;
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index 8ad2472364b4..1913bff0ada7 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -13,10 +13,10 @@
#ifndef LLVM_DEMANGLE_MICROSOFTDEMANGLENODES_H
#define LLVM_DEMANGLE_MICROSOFTDEMANGLENODES_H
-#include "llvm/Demangle/StringView.h"
#include <array>
#include <cstdint>
#include <string>
+#include <string_view>
namespace llvm {
namespace itanium_demangle {
@@ -25,7 +25,6 @@ class OutputBuffer;
}
using llvm::itanium_demangle::OutputBuffer;
-using llvm::itanium_demangle::StringView;
namespace llvm {
namespace ms_demangle {
@@ -384,7 +383,7 @@ struct NamedIdentifierNode : public IdentifierNode {
void output(OutputBuffer &OB, OutputFlags Flags) const override;
- StringView Name;
+ std::string_view Name;
};
struct IntrinsicFunctionIdentifierNode : public IdentifierNode {
@@ -403,7 +402,7 @@ struct LiteralOperatorIdentifierNode : public IdentifierNode {
void output(OutputBuffer &OB, OutputFlags Flags) const override;
- StringView Name;
+ std::string_view Name;
};
struct LocalStaticGuardIdentifierNode : public IdentifierNode {
@@ -516,7 +515,8 @@ struct NodeArrayNode : public Node {
void output(OutputBuffer &OB, OutputFlags Flags) const override;
- void output(OutputBuffer &OB, OutputFlags Flags, StringView Separator) const;
+ void output(OutputBuffer &OB, OutputFlags Flags,
+ std::string_view Separator) const;
Node **Nodes = nullptr;
size_t Count = 0;
@@ -601,7 +601,7 @@ struct EncodedStringLiteralNode : public SymbolNode {
void output(OutputBuffer &OB, OutputFlags Flags) const override;
- StringView DecodedString;
+ std::string_view DecodedString;
bool IsTruncated = false;
CharKind Char = CharKind::Char;
};
diff --git a/llvm/include/llvm/Demangle/StringView.h b/llvm/include/llvm/Demangle/StringView.h
deleted file mode 100644
index 6bbb8837fed1..000000000000
--- a/llvm/include/llvm/Demangle/StringView.h
+++ /dev/null
@@ -1,122 +0,0 @@
-//===--- StringView.h ----------------*- mode:c++;eval:(read-only-mode) -*-===//
-// Do not edit! See README.txt.
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// FIXME: Use std::string_view instead when we support C++17.
-// There are two copies of this file in the source tree. The one under
-// libcxxabi is the original and the one under llvm is the copy. Use
-// cp-to-llvm.sh to update the copy. See README.txt for more details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef DEMANGLE_STRINGVIEW_H
-#define DEMANGLE_STRINGVIEW_H
-
-#include "DemangleConfig.h"
-#include <cassert>
-#include <cstring>
-
-DEMANGLE_NAMESPACE_BEGIN
-
-class StringView {
- const char *First;
- const char *Last;
-
-public:
- static const size_t npos = ~size_t(0);
-
- template <size_t N>
- StringView(const char (&Str)[N]) : First(Str), Last(Str + N - 1) {}
- StringView(const char *First_, const char *Last_)
- : First(First_), Last(Last_) {}
- StringView(const char *First_, size_t Len)
- : First(First_), Last(First_ + Len) {}
- StringView(const char *Str) : First(Str), Last(Str + std::strlen(Str)) {}
- StringView() : First(nullptr), Last(nullptr) {}
-
- StringView substr(size_t Pos, size_t Len = npos) const {
- assert(Pos <= size());
- if (Len > size() - Pos)
- Len = size() - Pos;
- return StringView(begin() + Pos, Len);
- }
-
- size_t find(char C, size_t From = 0) const {
- // Avoid calling memchr with nullptr.
- if (From < size()) {
- // Just forward to memchr, which is faster than a hand-rolled loop.
- if (const void *P = ::memchr(First + From, C, size() - From))
- return size_t(static_cast<const char *>(P) - First);
- }
- return npos;
- }
-
- StringView dropFront(size_t N = 1) const {
- if (N >= size())
- N = size();
- return StringView(First + N, Last);
- }
-
- StringView dropBack(size_t N = 1) const {
- if (N >= size())
- N = size();
- return StringView(First, Last - N);
- }
-
- char front() const {
- assert(!empty());
- return *begin();
- }
-
- char back() const {
- assert(!empty());
- return *(end() - 1);
- }
-
- char popFront() {
- assert(!empty());
- return *First++;
- }
-
- bool consumeFront(char C) {
- if (!startsWith(C))
- return false;
- *this = dropFront(1);
- return true;
- }
-
- bool consumeFront(StringView S) {
- if (!startsWith(S))
- return false;
- *this = dropFront(S.size());
- return true;
- }
-
- bool startsWith(char C) const { return !empty() && *begin() == C; }
-
- bool startsWith(StringView Str) const {
- if (Str.size() > size())
- return false;
- return std::strncmp(Str.begin(), begin(), Str.size()) == 0;
- }
-
- const char &operator[](size_t Idx) const { return *(begin() + Idx); }
-
- const char *begin() const { return First; }
- const char *end() const { return Last; }
- size_t size() const { return static_cast<size_t>(Last - First); }
- bool empty() const { return First == Last; }
-};
-
-inline bool operator==(const StringView &LHS, const StringView &RHS) {
- return LHS.size() == RHS.size() &&
- std::strncmp(LHS.begin(), RHS.begin(), LHS.size()) == 0;
-}
-
-DEMANGLE_NAMESPACE_END
-
-#endif
diff --git a/llvm/include/llvm/Demangle/StringViewExtras.h b/llvm/include/llvm/Demangle/StringViewExtras.h
new file mode 100644
index 000000000000..93940a545e1f
--- /dev/null
+++ b/llvm/include/llvm/Demangle/StringViewExtras.h
@@ -0,0 +1,38 @@
+//===--- StringViewExtras.h ----------*- mode:c++;eval:(read-only-mode) -*-===//
+// Do not edit! See README.txt.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// There are two copies of this file in the source tree. The one under
+// libcxxabi is the original and the one under llvm is the copy. Use
+// cp-to-llvm.sh to update the copy. See README.txt for more details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef DEMANGLE_STRINGVIEW_H
+#define DEMANGLE_STRINGVIEW_H
+
+#include "DemangleConfig.h"
+
+#include <string_view>
+
+DEMANGLE_NAMESPACE_BEGIN
+
+inline bool starts_with(std::string_view self, char C) noexcept {
+ return !self.empty() && *self.begin() == C;
+}
+
+inline bool starts_with(std::string_view haystack,
+ std::string_view needle) noexcept {
+ if (needle.size() > haystack.size())
+ return false;
+ haystack.remove_suffix(haystack.size() - needle.size());
+ return haystack == needle;
+}
+
+DEMANGLE_NAMESPACE_END
+
+#endif
diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index 855c56e9df32..a906d238cf44 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -16,13 +16,16 @@
#ifndef DEMANGLE_UTILITY_H
#define DEMANGLE_UTILITY_H
-#include "StringView.h"
+#include "DemangleConfig.h"
+
#include <array>
+#include <cassert>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <exception>
#include <limits>
+#include <string_view>
DEMANGLE_NAMESPACE_BEGIN
@@ -64,7 +67,8 @@ class OutputBuffer {
if (isNeg)
*--TempPtr = '-';
- return operator+=(StringView(TempPtr, Temp.data() + Temp.size()));
+ return operator+=(
+ std::string_view(TempPtr, Temp.data() + Temp.size() - TempPtr));
}
public:
@@ -77,7 +81,9 @@ public:
OutputBuffer(const OutputBuffer &) = delete;
OutputBuffer &operator=(const OutputBuffer &) = delete;
- operator StringView() const { return StringView(Buffer, CurrentPosition); }
+ operator std::string_view() const {
+ return std::string_view(Buffer, CurrentPosition);
+ }
/// If a ParameterPackExpansion (or similar type) is encountered, the offset
/// into the pack that we're currently printing.
@@ -99,10 +105,10 @@ public:
*this += Close;
}
- OutputBuffer &operator+=(StringView R) {
+ OutputBuffer &operator+=(std::string_view R) {
if (size_t Size = R.size()) {
grow(Size);
- std::memcpy(Buffer + CurrentPosition, R.begin(), Size);
+ std::memcpy(Buffer + CurrentPosition, &*R.begin(), Size);
CurrentPosition += Size;
}
return *this;
@@ -114,18 +120,18 @@ public:
return *this;
}
- OutputBuffer &prepend(StringView R) {
+ OutputBuffer &prepend(std::string_view R) {
size_t Size = R.size();
grow(Size);
std::memmove(Buffer + Size, Buffer, CurrentPosition);
- std::memcpy(Buffer, R.begin(), Size);
+ std::memcpy(Buffer, &*R.begin(), Size);
CurrentPosition += Size;
return *this;
}
- OutputBuffer &operator<<(StringView R) { return (*this += R); }
+ OutputBuffer &operator<<(std::string_view R) { return (*this += R); }
OutputBuffer &operator<<(char C) { return (*this += C); }
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h b/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
index 49e1ce7278ff..7fb61b6a021a 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h
@@ -13,10 +13,10 @@
#ifndef LLVM_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORT_H
#define LLVM_EXECUTIONENGINE_JITLINK_EHFRAMESUPPORT_H
-#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/Support/Error.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
namespace jitlink {
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h
new file mode 100644
index 000000000000..25d1c3aac2c2
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h
@@ -0,0 +1,38 @@
+//===---- ELF_aarch32.h - JIT link functions for arm/thumb -----*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// jit-link functions for ELF/aarch32.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH32
+#define LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH32
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITLink/aarch32.h"
+
+namespace llvm {
+namespace jitlink {
+
+/// Create a LinkGraph from an ELF/arm relocatable object
+///
+/// Note: The graph does not take ownership of the underlying buffer, nor copy
+/// its contents. The caller is responsible for ensuring that the object buffer
+/// outlives the graph.
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer);
+
+/// jit-link the given object buffer, which must be an ELF arm/thumb object
+/// file.
+void link_ELF_aarch32(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx);
+
+} // end namespace jitlink
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH32
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_ppc64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_ppc64.h
new file mode 100644
index 000000000000..8db986a4a9fa
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_ppc64.h
@@ -0,0 +1,50 @@
+//===------ ELF_ppc64.h - JIT link functions for ELF/ppc64 ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// jit-link functions for ELF/ppc64{le}.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_PPC64_H
+#define LLVM_EXECUTIONENGINE_JITLINK_ELF_PPC64_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+
+namespace llvm::jitlink {
+
+/// Create a LinkGraph from an ELF/ppc64 relocatable object.
+///
+/// Note: The graph does not take ownership of the underlying buffer, nor copy
+/// its contents. The caller is responsible for ensuring that the object buffer
+/// outlives the graph.
+///
+/// WARNING: The big-endian backend has not been tested yet.
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_ppc64(MemoryBufferRef ObjectBuffer);
+
+/// Create a LinkGraph from an ELF/ppc64le relocatable object.
+///
+/// Note: The graph does not take ownership of the underlying buffer, nor copy
+/// its contents. The caller is responsible for ensuring that the object buffer
+/// outlives the graph.
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_ppc64le(MemoryBufferRef ObjectBuffer);
+
+/// jit-link the given object buffer, which must be a ELF ppc64le object file.
+///
+/// WARNING: The big-endian backend has not been tested yet.
+void link_ELF_ppc64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx);
+
+/// jit-link the given object buffer, which must be a ELF ppc64le object file.
+void link_ELF_ppc64le(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx);
+
+} // end namespace llvm::jitlink
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_PPC64_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h
index 5a8b186a2c3e..a0e573baca06 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h
@@ -32,6 +32,10 @@ createLinkGraphFromELFObject_riscv(MemoryBufferRef ObjectBuffer);
void link_ELF_riscv(std::unique_ptr<LinkGraph> G,
std::unique_ptr<JITLinkContext> Ctx);
+/// Returns a pass that performs linker relaxation. Should be added to
+/// PostAllocationPasses.
+LinkGraphPassFunction createRelaxationPass_ELF_riscv();
+
} // end namespace jitlink
} // end namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
index 0f0fa6cae316..568c9cf87f80 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h
@@ -16,9 +16,10 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h"
#include "llvm/ExecutionEngine/Orc/Shared/MemoryFlags.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/BinaryStreamReader.h"
@@ -28,6 +29,8 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
#include <optional>
#include <map>
@@ -163,7 +166,7 @@ private:
assert(AlignmentOffset <= MaxAlignmentOffset &&
"Alignment offset exceeds maximum");
ContentMutable = false;
- P2Align = Alignment ? countTrailingZeros(Alignment) : 0;
+ P2Align = Alignment ? llvm::countr_zero(Alignment) : 0;
this->AlignmentOffset = AlignmentOffset;
}
@@ -180,7 +183,7 @@ private:
assert(AlignmentOffset <= MaxAlignmentOffset &&
"Alignment offset exceeds maximum");
ContentMutable = false;
- P2Align = Alignment ? countTrailingZeros(Alignment) : 0;
+ P2Align = Alignment ? llvm::countr_zero(Alignment) : 0;
this->AlignmentOffset = AlignmentOffset;
}
@@ -199,7 +202,7 @@ private:
assert(AlignmentOffset <= MaxAlignmentOffset &&
"Alignment offset exceeds maximum");
ContentMutable = true;
- P2Align = Alignment ? countTrailingZeros(Alignment) : 0;
+ P2Align = Alignment ? llvm::countr_zero(Alignment) : 0;
this->AlignmentOffset = AlignmentOffset;
}
@@ -289,7 +292,7 @@ public:
/// Set the alignment for this content.
void setAlignment(uint64_t Alignment) {
assert(isPowerOf2_64(Alignment) && "Alignment must be a power of two");
- P2Align = Alignment ? countTrailingZeros(Alignment) : 0;
+ P2Align = Alignment ? llvm::countr_zero(Alignment) : 0;
}
/// Get the alignment offset for this content.
@@ -352,23 +355,30 @@ private:
};
// Align an address to conform with block alignment requirements.
-inline uint64_t alignToBlock(uint64_t Addr, Block &B) {
+inline uint64_t alignToBlock(uint64_t Addr, const Block &B) {
uint64_t Delta = (B.getAlignmentOffset() - Addr) % B.getAlignment();
return Addr + Delta;
}
// Align a orc::ExecutorAddr to conform with block alignment requirements.
-inline orc::ExecutorAddr alignToBlock(orc::ExecutorAddr Addr, Block &B) {
+inline orc::ExecutorAddr alignToBlock(orc::ExecutorAddr Addr, const Block &B) {
return orc::ExecutorAddr(alignToBlock(Addr.getValue(), B));
}
-/// Describes symbol linkage. This can be used to make resolve definition
-/// clashes.
+// Returns true if the given blocks contains exactly one valid c-string.
+// Zero-fill blocks of size 1 count as valid empty strings. Content blocks
+// must end with a zero, and contain no zeros before the end.
+bool isCStringBlock(Block &B);
+
+/// Describes symbol linkage. This can be used to resolve definition clashes.
enum class Linkage : uint8_t {
Strong,
Weak,
};
+/// Holds target-specific properties for a symbol.
+using TargetFlagsType = uint8_t;
+
/// For errors and debugging output.
const char *getLinkageName(Linkage L);
@@ -413,6 +423,7 @@ private:
setScope(S);
setLive(IsLive);
setCallable(IsCallable);
+ setTargetFlags(TargetFlagsType{});
}
static Symbol &constructExternal(BumpPtrAllocator &Allocator,
@@ -553,6 +564,11 @@ public:
/// Returns the offset for this symbol within the underlying addressable.
orc::ExecutorAddrDiff getOffset() const { return Offset; }
+ void setOffset(orc::ExecutorAddrDiff NewOffset) {
+ assert(NewOffset < getBlock().getSize() && "Offset out of range");
+ Offset = NewOffset;
+ }
+
/// Returns the address of this symbol.
orc::ExecutorAddr getAddress() const { return Base->getAddress() + Offset; }
@@ -606,6 +622,17 @@ public:
this->S = static_cast<uint8_t>(S);
}
+ /// Check wehther the given target flags are set for this Symbol.
+ bool hasTargetFlags(TargetFlagsType Flags) const {
+ return static_cast<TargetFlagsType>(TargetFlags) & Flags;
+ }
+
+ /// Set the target flags for this Symbol.
+ void setTargetFlags(TargetFlagsType Flags) {
+ assert(Flags <= 1 && "Add more bits to store more than single flag");
+ TargetFlags = Flags;
+ }
+
/// Returns true if this is a weakly referenced external symbol.
/// This method may only be called on external symbols.
bool isWeaklyReferenced() const {
@@ -640,22 +667,18 @@ private:
void setBlock(Block &B) { Base = &B; }
- void setOffset(orc::ExecutorAddrDiff NewOffset) {
- assert(NewOffset <= MaxOffset && "Offset out of range");
- Offset = NewOffset;
- }
-
static constexpr uint64_t MaxOffset = (1ULL << 59) - 1;
// FIXME: A char* or SymbolStringPtr may pack better.
StringRef Name;
Addressable *Base = nullptr;
- uint64_t Offset : 58;
+ uint64_t Offset : 57;
uint64_t L : 1;
uint64_t S : 2;
uint64_t IsLive : 1;
uint64_t IsCallable : 1;
uint64_t WeakRef : 1;
+ uint64_t TargetFlags : 1;
size_t Size = 0;
};
@@ -699,15 +722,18 @@ public:
/// Set the protection flags for this section.
void setMemProt(orc::MemProt Prot) { this->Prot = Prot; }
- /// Get the deallocation policy for this section.
- orc::MemDeallocPolicy getMemDeallocPolicy() const { return MDP; }
+ /// Get the memory lifetime policy for this section.
+ orc::MemLifetimePolicy getMemLifetimePolicy() const { return MLP; }
- /// Set the deallocation policy for this section.
- void setMemDeallocPolicy(orc::MemDeallocPolicy MDP) { this->MDP = MDP; }
+ /// Set the memory lifetime policy for this section.
+ void setMemLifetimePolicy(orc::MemLifetimePolicy MLP) { this->MLP = MLP; }
/// Returns the ordinal for this section.
SectionOrdinal getOrdinal() const { return SecOrdinal; }
+ /// Returns true if this section is empty (contains no blocks or symbols).
+ bool empty() const { return Blocks.empty(); }
+
/// Returns an iterator over the blocks defined in this section.
iterator_range<block_iterator> blocks() {
return make_range(Blocks.begin(), Blocks.end());
@@ -768,7 +794,7 @@ private:
StringRef Name;
orc::MemProt Prot;
- orc::MemDeallocPolicy MDP = orc::MemDeallocPolicy::Standard;
+ orc::MemLifetimePolicy MLP = orc::MemLifetimePolicy::Standard;
SectionOrdinal SecOrdinal = 0;
BlockSet Blocks;
SymbolSet Symbols;
@@ -821,7 +847,7 @@ private:
class LinkGraph {
private:
- using SectionList = std::vector<std::unique_ptr<Section>>;
+ using SectionMap = DenseMap<StringRef, std::unique_ptr<Section>>;
using ExternalSymbolSet = DenseSet<Symbol *>;
using BlockSet = DenseSet<Block *>;
@@ -860,7 +886,7 @@ private:
}
static iterator_range<Section::const_block_iterator>
- getSectionConstBlocks(Section &S) {
+ getSectionConstBlocks(const Section &S) {
return S.blocks();
}
@@ -870,15 +896,27 @@ private:
}
static iterator_range<Section::const_symbol_iterator>
- getSectionConstSymbols(Section &S) {
+ getSectionConstSymbols(const Section &S) {
return S.symbols();
}
+ struct GetSectionMapEntryValue {
+ Section &operator()(SectionMap::value_type &KV) const { return *KV.second; }
+ };
+
+ struct GetSectionMapEntryConstValue {
+ const Section &operator()(const SectionMap::value_type &KV) const {
+ return *KV.second;
+ }
+ };
+
public:
using external_symbol_iterator = ExternalSymbolSet::iterator;
- using section_iterator = pointee_iterator<SectionList::iterator>;
- using const_section_iterator = pointee_iterator<SectionList::const_iterator>;
+ using section_iterator =
+ mapped_iterator<SectionMap::iterator, GetSectionMapEntryValue>;
+ using const_section_iterator =
+ mapped_iterator<SectionMap::const_iterator, GetSectionMapEntryConstValue>;
template <typename OuterItrT, typename InnerItrT, typename T,
iterator_range<InnerItrT> getInnerRange(
@@ -928,18 +966,17 @@ public:
};
using defined_symbol_iterator =
- nested_collection_iterator<const_section_iterator,
- Section::symbol_iterator, Symbol *,
- getSectionSymbols>;
+ nested_collection_iterator<section_iterator, Section::symbol_iterator,
+ Symbol *, getSectionSymbols>;
using const_defined_symbol_iterator =
nested_collection_iterator<const_section_iterator,
Section::const_symbol_iterator, const Symbol *,
getSectionConstSymbols>;
- using block_iterator = nested_collection_iterator<const_section_iterator,
- Section::block_iterator,
- Block *, getSectionBlocks>;
+ using block_iterator =
+ nested_collection_iterator<section_iterator, Section::block_iterator,
+ Block *, getSectionBlocks>;
using const_block_iterator =
nested_collection_iterator<const_section_iterator,
@@ -948,11 +985,18 @@ public:
using GetEdgeKindNameFunction = const char *(*)(Edge::Kind);
+ LinkGraph(std::string Name, const Triple &TT, SubtargetFeatures Features,
+ unsigned PointerSize, support::endianness Endianness,
+ GetEdgeKindNameFunction GetEdgeKindName)
+ : Name(std::move(Name)), TT(TT), Features(std::move(Features)),
+ PointerSize(PointerSize), Endianness(Endianness),
+ GetEdgeKindName(std::move(GetEdgeKindName)) {}
+
LinkGraph(std::string Name, const Triple &TT, unsigned PointerSize,
support::endianness Endianness,
GetEdgeKindNameFunction GetEdgeKindName)
- : Name(std::move(Name)), TT(TT), PointerSize(PointerSize),
- Endianness(Endianness), GetEdgeKindName(std::move(GetEdgeKindName)) {}
+ : LinkGraph(std::move(Name), TT, SubtargetFeatures(), PointerSize,
+ Endianness, GetEdgeKindName) {}
LinkGraph(const LinkGraph &) = delete;
LinkGraph &operator=(const LinkGraph &) = delete;
@@ -966,6 +1010,9 @@ public:
/// Returns the target triple for this Graph.
const Triple &getTargetTriple() const { return TT; }
+ /// Return the subtarget features for this Graph.
+ const SubtargetFeatures &getFeatures() const { return Features; }
+
/// Returns the pointer size for use in this graph.
unsigned getPointerSize() const { return PointerSize; }
@@ -996,7 +1043,7 @@ public:
/// Note: This Twine-based overload requires an extra string copy and an
/// extra heap allocation for large strings. The ArrayRef<char> overload
/// should be preferred where possible.
- MutableArrayRef<char> allocateString(Twine Source) {
+ MutableArrayRef<char> allocateContent(Twine Source) {
SmallString<256> TmpBuffer;
auto SourceStr = Source.toStringRef(TmpBuffer);
auto *AllocatedBuffer = Allocator.Allocate<char>(SourceStr.size());
@@ -1004,16 +1051,41 @@ public:
return MutableArrayRef<char>(AllocatedBuffer, SourceStr.size());
}
+ /// Allocate a copy of the given string using the LinkGraph's allocator.
+ ///
+ /// The allocated string will be terminated with a null character, and the
+ /// returned MutableArrayRef will include this null character in the last
+ /// position.
+ MutableArrayRef<char> allocateCString(StringRef Source) {
+ char *AllocatedBuffer = Allocator.Allocate<char>(Source.size() + 1);
+ llvm::copy(Source, AllocatedBuffer);
+ AllocatedBuffer[Source.size()] = '\0';
+ return MutableArrayRef<char>(AllocatedBuffer, Source.size() + 1);
+ }
+
+ /// Allocate a copy of the given string using the LinkGraph's allocator.
+ ///
+ /// The allocated string will be terminated with a null character, and the
+ /// returned MutableArrayRef will include this null character in the last
+ /// position.
+ ///
+ /// Note: This Twine-based overload requires an extra string copy and an
+ /// extra heap allocation for large strings. The ArrayRef<char> overload
+ /// should be preferred where possible.
+ MutableArrayRef<char> allocateCString(Twine Source) {
+ SmallString<256> TmpBuffer;
+ auto SourceStr = Source.toStringRef(TmpBuffer);
+ auto *AllocatedBuffer = Allocator.Allocate<char>(SourceStr.size() + 1);
+ llvm::copy(SourceStr, AllocatedBuffer);
+ AllocatedBuffer[SourceStr.size()] = '\0';
+ return MutableArrayRef<char>(AllocatedBuffer, SourceStr.size() + 1);
+ }
+
/// Create a section with the given name, protection flags, and alignment.
Section &createSection(StringRef Name, orc::MemProt Prot) {
- assert(llvm::none_of(Sections,
- [&](std::unique_ptr<Section> &Sec) {
- return Sec->getName() == Name;
- }) &&
- "Duplicate section name");
+ assert(!Sections.count(Name) && "Duplicate section name");
std::unique_ptr<Section> Sec(new Section(Name, Prot, Sections.size()));
- Sections.push_back(std::move(Sec));
- return *Sections.back();
+ return *Sections.insert(std::make_pair(Name, std::move(Sec))).first->second;
}
/// Create a content block.
@@ -1041,7 +1113,7 @@ public:
orc::ExecutorAddr Address,
uint64_t Alignment, uint64_t AlignmentOffset,
bool ZeroInitialize = true) {
- auto Content = allocateContent(ContentSize);
+ auto Content = allocateBuffer(ContentSize);
if (ZeroInitialize)
memset(Content.data(), 0, Content.size());
return createBlock(Parent, Content, Address, Alignment, AlignmentOffset);
@@ -1172,29 +1244,39 @@ public:
}
iterator_range<section_iterator> sections() {
- return make_range(section_iterator(Sections.begin()),
- section_iterator(Sections.end()));
+ return make_range(
+ section_iterator(Sections.begin(), GetSectionMapEntryValue()),
+ section_iterator(Sections.end(), GetSectionMapEntryValue()));
}
- SectionList::size_type sections_size() const { return Sections.size(); }
+ iterator_range<const_section_iterator> sections() const {
+ return make_range(
+ const_section_iterator(Sections.begin(),
+ GetSectionMapEntryConstValue()),
+ const_section_iterator(Sections.end(), GetSectionMapEntryConstValue()));
+ }
+
+ size_t sections_size() const { return Sections.size(); }
/// Returns the section with the given name if it exists, otherwise returns
/// null.
Section *findSectionByName(StringRef Name) {
- for (auto &S : sections())
- if (S.getName() == Name)
- return &S;
- return nullptr;
+ auto I = Sections.find(Name);
+ if (I == Sections.end())
+ return nullptr;
+ return I->second.get();
}
iterator_range<block_iterator> blocks() {
- return make_range(block_iterator(Sections.begin(), Sections.end()),
- block_iterator(Sections.end(), Sections.end()));
+ auto Secs = sections();
+ return make_range(block_iterator(Secs.begin(), Secs.end()),
+ block_iterator(Secs.end(), Secs.end()));
}
iterator_range<const_block_iterator> blocks() const {
- return make_range(const_block_iterator(Sections.begin(), Sections.end()),
- const_block_iterator(Sections.end(), Sections.end()));
+ auto Secs = sections();
+ return make_range(const_block_iterator(Secs.begin(), Secs.end()),
+ const_block_iterator(Secs.end(), Secs.end()));
}
iterator_range<external_symbol_iterator> external_symbols() {
@@ -1206,14 +1288,15 @@ public:
}
iterator_range<defined_symbol_iterator> defined_symbols() {
- return make_range(defined_symbol_iterator(Sections.begin(), Sections.end()),
- defined_symbol_iterator(Sections.end(), Sections.end()));
+ auto Secs = sections();
+ return make_range(defined_symbol_iterator(Secs.begin(), Secs.end()),
+ defined_symbol_iterator(Secs.end(), Secs.end()));
}
iterator_range<const_defined_symbol_iterator> defined_symbols() const {
- return make_range(
- const_defined_symbol_iterator(Sections.begin(), Sections.end()),
- const_defined_symbol_iterator(Sections.end(), Sections.end()));
+ auto Secs = sections();
+ return make_range(const_defined_symbol_iterator(Secs.begin(), Secs.end()),
+ const_defined_symbol_iterator(Secs.end(), Secs.end()));
}
/// Make the given symbol external (must not already be external).
@@ -1412,11 +1495,10 @@ public:
/// Remove a section. The section reference is defunct after calling this
/// function and should no longer be used.
void removeSection(Section &Sec) {
- auto I = llvm::find_if(Sections, [&Sec](const std::unique_ptr<Section> &S) {
- return S.get() == &Sec;
- });
- assert(I != Sections.end() && "Section does not appear in this graph");
- Sections.erase(I);
+ assert(Sections.count(Sec.getName()) && "Section not found");
+ assert(Sections.find(Sec.getName())->second.get() == &Sec &&
+ "Section map entry invalid");
+ Sections.erase(Sec.getName());
}
/// Accessor for the AllocActions object for this graph. This can be used to
@@ -1436,10 +1518,11 @@ private:
std::string Name;
Triple TT;
+ SubtargetFeatures Features;
unsigned PointerSize;
support::endianness Endianness;
GetEdgeKindNameFunction GetEdgeKindName = nullptr;
- SectionList Sections;
+ DenseMap<StringRef, std::unique_ptr<Section>> Sections;
ExternalSymbolSet ExternalSymbols;
ExternalSymbolSet AbsoluteSymbols;
orc::shared::AllocActions AAs;
@@ -1592,7 +1675,7 @@ private:
};
/// A function for mutating LinkGraphs.
-using LinkGraphPassFunction = std::function<Error(LinkGraph &)>;
+using LinkGraphPassFunction = unique_function<Error(LinkGraph &)>;
/// A list of LinkGraph passes.
using LinkGraphPassList = std::vector<LinkGraphPassFunction>;
@@ -1662,7 +1745,7 @@ enum class SymbolLookupFlags { RequiredSymbol, WeaklyReferencedSymbol };
raw_ostream &operator<<(raw_ostream &OS, const SymbolLookupFlags &LF);
/// A map of symbol names to resolved addresses.
-using AsyncLookupResult = DenseMap<StringRef, JITEvaluatedSymbol>;
+using AsyncLookupResult = DenseMap<StringRef, orc::ExecutorSymbolDef>;
/// A function object to call with a resolved symbol map (See AsyncLookupResult)
/// or an error if resolution failed.
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
index 6ef4a0bd0c98..09e0d71cf0bd 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -291,6 +291,9 @@ private:
/// Segment. Clients can obtain a pointer to the working memory and executor
/// address of that block using the Segment's AllocGroup. Once memory has been
/// populated, clients can call finalize to finalize the memory.
+///
+/// Note: Segments with MemLifetimePolicy::NoAlloc are not permitted, since
+/// they would not be useful, and their presence is likely to indicate a bug.
class SimpleSegmentAlloc {
public:
/// Describes a segment to be allocated.
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h
index 28996cbea35b..7ab8ae3e53ce 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/TableManager.h
@@ -38,7 +38,7 @@ public:
if (EntryI == Entries.end()) {
auto &Entry = impl().createEntry(G, Target);
DEBUG_WITH_TYPE("jitlink", {
- dbgs() << " Created " << impl().getSectionName() << "entry for "
+ dbgs() << " Created " << impl().getSectionName() << " entry for "
<< Target.getName() << ": " << Entry << "\n";
});
EntryI = Entries.insert(std::make_pair(Target.getName(), &Entry)).first;
@@ -52,6 +52,21 @@ public:
return *EntryI->second;
}
+ /// Register a pre-existing entry.
+ ///
+ /// Objects may include pre-existing table entries (e.g. for GOTs).
+ /// This method can be used to register those entries so that they will not
+ /// be duplicated by createEntry the first time that getEntryForTarget is
+ /// called.
+ bool registerPreExistingEntry(Symbol &Target, Symbol &Entry) {
+ assert(Target.hasName() && "Edge cannot point to anonymous target");
+ auto Res = Entries.insert({
+ Target.getName(),
+ &Entry,
+ });
+ return Res.second;
+ }
+
private:
TableManagerImplT &impl() { return static_cast<TableManagerImplT &>(*this); }
DenseMap<StringRef, Symbol *> Entries;
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h
new file mode 100644
index 000000000000..c05c7ab2ad83
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h
@@ -0,0 +1,297 @@
+//===------ aarch32.h - Generic JITLink arm/thumb utilities -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic utilities for graphs representing arm/thumb objects.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_AARCH32
+#define LLVM_EXECUTIONENGINE_JITLINK_AARCH32
+
+#include "TableManager.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace jitlink {
+namespace aarch32 {
+
+/// JITLink-internal AArch32 fixup kinds
+enum EdgeKind_aarch32 : Edge::Kind {
+
+ ///
+ /// Relocations of class Data respect target endianness (unless otherwise
+ /// specified)
+ ///
+ FirstDataRelocation = Edge::FirstRelocation,
+
+ /// Relative 32-bit value relocation
+ Data_Delta32 = FirstDataRelocation,
+
+ /// Absolute 32-bit value relocation
+ Data_Pointer32,
+
+ LastDataRelocation = Data_Pointer32,
+
+ ///
+ /// Relocations of class Arm (covers fixed-width 4-byte instruction subset)
+ ///
+ FirstArmRelocation,
+
+ /// TODO: Arm_Call is here only as a placeholder for now.
+ Arm_Call = FirstArmRelocation,
+
+ LastArmRelocation = Arm_Call,
+
+ ///
+ /// Relocations of class Thumb16 and Thumb32 (covers Thumb instruction subset)
+ ///
+ FirstThumbRelocation,
+
+ /// Write immediate value for PC-relative branch with link (can bridge between
+ /// Arm and Thumb).
+ Thumb_Call = FirstThumbRelocation,
+
+ /// Write immediate value for (unconditional) PC-relative branch without link.
+ Thumb_Jump24,
+
+ /// Write immediate value to the lower halfword of the destination register
+ Thumb_MovwAbsNC,
+
+ /// Write immediate value to the top halfword of the destination register
+ Thumb_MovtAbs,
+
+ LastThumbRelocation = Thumb_MovtAbs,
+};
+
+/// Flags enum for AArch32-specific symbol properties
+enum TargetFlags_aarch32 : TargetFlagsType {
+ ThumbSymbol = 1 << 0,
+};
+
+/// Human-readable name for a given CPU architecture kind
+const char *getCPUArchName(ARMBuildAttrs::CPUArch K);
+
+/// Get a human-readable name for the given AArch32 edge kind.
+const char *getEdgeKindName(Edge::Kind K);
+
+/// AArch32 uses stubs for a number of purposes, like branch range extension
+/// or interworking between Arm and Thumb instruction subsets.
+///
+/// Stub implementations vary depending on CPU architecture (v4, v6, v7),
+/// instruction subset and branch type (absolute/PC-relative).
+///
+/// For each kind of stub, the StubsFlavor defines one concrete form that is
+/// used throughout the LinkGraph.
+///
+/// Stubs are often called "veneers" in the official docs and online.
+///
+enum StubsFlavor {
+ Unsupported = 0,
+ Thumbv7,
+};
+
+/// JITLink sub-arch configuration for Arm CPU models
+struct ArmConfig {
+ bool J1J2BranchEncoding = false;
+ StubsFlavor Stubs = Unsupported;
+};
+
+/// Obtain the sub-arch configuration for a given Arm CPU model.
+inline ArmConfig getArmConfigForCPUArch(ARMBuildAttrs::CPUArch CPUArch) {
+ ArmConfig ArmCfg;
+ switch (CPUArch) {
+ case ARMBuildAttrs::v7:
+ case ARMBuildAttrs::v8_A:
+ ArmCfg.J1J2BranchEncoding = true;
+ ArmCfg.Stubs = Thumbv7;
+ break;
+ default:
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Warning: ARM config not defined for CPU architecture "
+ << getCPUArchName(CPUArch);
+ });
+ break;
+ }
+ return ArmCfg;
+}
+
+/// Immutable pair of halfwords, Hi and Lo, with overflow check
+struct HalfWords {
+ constexpr HalfWords() : Hi(0), Lo(0) {}
+ constexpr HalfWords(uint32_t Hi, uint32_t Lo) : Hi(Hi), Lo(Lo) {
+ assert(isUInt<16>(Hi) && "Overflow in first half-word");
+ assert(isUInt<16>(Lo) && "Overflow in second half-word");
+ }
+ const uint16_t Hi; // First halfword
+ const uint16_t Lo; // Second halfword
+};
+
+/// Collection of named constants per fixup kind. It may contain but is not
+/// limited to the following entries:
+///
+/// Opcode - Values of the op-code bits in the instruction, with
+/// unaffected bits nulled
+/// OpcodeMask - Mask with all bits set that encode the op-code
+/// ImmMask - Mask with all bits set that encode the immediate value
+/// RegMask - Mask with all bits set that encode the register
+///
+template <EdgeKind_aarch32 Kind> struct FixupInfo {};
+
+template <> struct FixupInfo<Thumb_Jump24> {
+ static constexpr HalfWords Opcode{0xf000, 0x8000};
+ static constexpr HalfWords OpcodeMask{0xf800, 0x8000};
+ static constexpr HalfWords ImmMask{0x07ff, 0x2fff};
+ static constexpr uint16_t LoBitConditional = 0x1000;
+};
+
+template <> struct FixupInfo<Thumb_Call> {
+ static constexpr HalfWords Opcode{0xf000, 0xc000};
+ static constexpr HalfWords OpcodeMask{0xf800, 0xc000};
+ static constexpr HalfWords ImmMask{0x07ff, 0x2fff};
+ static constexpr uint16_t LoBitH = 0x0001;
+ static constexpr uint16_t LoBitNoBlx = 0x1000;
+};
+
+template <> struct FixupInfo<Thumb_MovtAbs> {
+ static constexpr HalfWords Opcode{0xf2c0, 0x0000};
+ static constexpr HalfWords OpcodeMask{0xfbf0, 0x8000};
+ static constexpr HalfWords ImmMask{0x040f, 0x70ff};
+ static constexpr HalfWords RegMask{0x0000, 0x0f00};
+};
+
+template <>
+struct FixupInfo<Thumb_MovwAbsNC> : public FixupInfo<Thumb_MovtAbs> {
+ static constexpr HalfWords Opcode{0xf240, 0x0000};
+};
+
+/// Helper function to read the initial addend for Data-class relocations.
+Expected<int64_t> readAddendData(LinkGraph &G, Block &B, const Edge &E);
+
+/// Helper function to read the initial addend for Arm-class relocations.
+Expected<int64_t> readAddendArm(LinkGraph &G, Block &B, const Edge &E);
+
+/// Helper function to read the initial addend for Thumb-class relocations.
+Expected<int64_t> readAddendThumb(LinkGraph &G, Block &B, const Edge &E,
+ const ArmConfig &ArmCfg);
+
+/// Read the initial addend for a REL-type relocation. It's the value encoded
+/// in the immediate field of the fixup location by the compiler.
+inline Expected<int64_t> readAddend(LinkGraph &G, Block &B, const Edge &E,
+ const ArmConfig &ArmCfg) {
+ Edge::Kind Kind = E.getKind();
+ if (Kind <= LastDataRelocation)
+ return readAddendData(G, B, E);
+
+ if (Kind <= LastArmRelocation)
+ return readAddendArm(G, B, E);
+
+ if (Kind <= LastThumbRelocation)
+ return readAddendThumb(G, B, E, ArmCfg);
+
+ llvm_unreachable("Relocation must be of class Data, Arm or Thumb");
+}
+
+/// Helper function to apply the fixup for Data-class relocations.
+Error applyFixupData(LinkGraph &G, Block &B, const Edge &E);
+
+/// Helper function to apply the fixup for Arm-class relocations.
+Error applyFixupArm(LinkGraph &G, Block &B, const Edge &E);
+
+/// Helper function to apply the fixup for Thumb-class relocations.
+Error applyFixupThumb(LinkGraph &G, Block &B, const Edge &E,
+ const ArmConfig &ArmCfg);
+
+/// Apply fixup expression for edge to block content.
+inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
+ const ArmConfig &ArmCfg) {
+ Edge::Kind Kind = E.getKind();
+
+ if (Kind <= LastDataRelocation)
+ return applyFixupData(G, B, E);
+
+ if (Kind <= LastArmRelocation)
+ return applyFixupArm(G, B, E);
+
+ if (Kind <= LastThumbRelocation)
+ return applyFixupThumb(G, B, E, ArmCfg);
+
+ llvm_unreachable("Relocation must be of class Data, Arm or Thumb");
+}
+
+/// Stubs builder for a specific StubsFlavor
+///
+/// Right now we only have one default stub kind, but we want to extend this
+/// and allow creation of specific kinds in the future (e.g. branch range
+/// extension or interworking).
+///
+/// Let's keep it simple for the moment and not wire this through a GOT.
+///
+template <StubsFlavor Flavor>
+class StubsManager : public TableManager<StubsManager<Flavor>> {
+public:
+ StubsManager() = default;
+
+ /// Name of the object file section that will contain all our stubs.
+ static StringRef getSectionName() { return "__llvm_jitlink_STUBS"; }
+
+ /// Implements link-graph traversal via visitExistingEdges().
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ if (E.getTarget().isDefined())
+ return false;
+
+ switch (E.getKind()) {
+ case Thumb_Call:
+ case Thumb_Jump24: {
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+ << B->getFixupAddress(E) << " (" << B->getAddress() << " + "
+ << formatv("{0:x}", E.getOffset()) << ")\n";
+ });
+ E.setTarget(this->getEntryForTarget(G, E.getTarget()));
+ return true;
+ }
+ }
+ return false;
+ }
+
+ /// Create a branch range extension stub for the class's flavor.
+ Symbol &createEntry(LinkGraph &G, Symbol &Target);
+
+private:
+ /// Create a new node in the link-graph for the given stub template.
+ template <size_t Size>
+ Block &addStub(LinkGraph &G, const uint8_t (&Code)[Size],
+ uint64_t Alignment) {
+ ArrayRef<char> Template(reinterpret_cast<const char *>(Code), Size);
+ return G.createContentBlock(getStubsSection(G), Template,
+ orc::ExecutorAddr(), Alignment, 0);
+ }
+
+ /// Get or create the object file section that will contain all our stubs.
+ Section &getStubsSection(LinkGraph &G) {
+ if (!StubsSection)
+ StubsSection = &G.createSection(getSectionName(),
+ orc::MemProt::Read | orc::MemProt::Exec);
+ return *StubsSection;
+ }
+
+ Section *StubsSection = nullptr;
+};
+
+/// Create a branch range extension stub with Thumb encoding for v7 CPUs.
+template <>
+Symbol &StubsManager<Thumbv7>::createEntry(LinkGraph &G, Symbol &Target);
+
+} // namespace aarch32
+} // namespace jitlink
+} // namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_AARCH32
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h
index 33c09e1a2a31..50bebf335de1 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h
@@ -107,6 +107,50 @@ enum EdgeKind_aarch64 : Edge::Kind {
/// out-of-range error will be returned.
Branch26PCRel,
+ /// A 14-bit PC-relative test and branch.
+ ///
+ /// Represents a PC-relative test and branch to a target within +/-32Kb. The
+ /// target must be 32-bit aligned.
+ ///
+ /// Fixup expression:
+ /// Fixup <- (Target - Fixup + Addend) >> 2 : int14
+ ///
+ /// Notes:
+ /// The '14' in the name refers to the number operand bits and follows the
+ /// naming convention used by the corresponding ELF relocation.
+ /// Since the low two bits must be zero (because of the 32-bit alignment of
+ /// the target) the operand is effectively a signed 16-bit number.
+ ///
+ ///
+ /// Errors:
+ /// - The result of the unshifted part of the fixup expression must be
+ /// 32-bit aligned otherwise an alignment error will be returned.
+ /// - The result of the fixup expression must fit into an int14 otherwise an
+ /// out-of-range error will be returned.
+ TestAndBranch14PCRel,
+
+ /// A 19-bit PC-relative conditional branch.
+ ///
+ /// Represents a PC-relative conditional branch to a target within +/-1Mb. The
+ /// target must be 32-bit aligned.
+ ///
+ /// Fixup expression:
+ /// Fixup <- (Target - Fixup + Addend) >> 2 : int19
+ ///
+ /// Notes:
+ /// The '19' in the name refers to the number operand bits and follows the
+ /// naming convention used by the corresponding ELF relocation.
+ /// Since the low two bits must be zero (because of the 32-bit alignment of
+ /// the target) the operand is effectively a signed 21-bit number.
+ ///
+ ///
+ /// Errors:
+ /// - The result of the unshifted part of the fixup expression must be
+ /// 32-bit aligned otherwise an alignment error will be returned.
+ /// - The result of the fixup expression must fit into an int19 otherwise an
+ /// out-of-range error will be returned.
+ CondBranch19PCRel,
+
/// A 16-bit slice of the target address (which slice depends on the
/// instruction at the fixup location).
///
@@ -136,6 +180,20 @@ enum EdgeKind_aarch64 : Edge::Kind {
/// out-of-range error will be returned.
LDRLiteral19,
+ /// The signed 21-bit delta from the fixup to the target.
+ ///
+ /// Fixup expression:
+ ///
+ /// Fixup <- Target - Fixup + Addend : int21
+ ///
+ /// Notes:
+ /// For ADR fixups.
+ ///
+ /// Errors:
+ /// - The result of the fixup expression must fit into an int21 otherwise an
+ /// out-of-range error will be returned.
+ ADRLiteral21,
+
/// The signed 21-bit delta from the fixup page to the page containing the
/// target.
///
@@ -299,6 +357,26 @@ inline bool isLoadStoreImm12(uint32_t Instr) {
return (Instr & LoadStoreImm12Mask) == 0x39000000;
}
+inline bool isTestAndBranchImm14(uint32_t Instr) {
+ constexpr uint32_t TestAndBranchImm14Mask = 0x7e000000;
+ return (Instr & TestAndBranchImm14Mask) == 0x36000000;
+}
+
+inline bool isCondBranchImm19(uint32_t Instr) {
+ constexpr uint32_t CondBranchImm19Mask = 0xfe000000;
+ return (Instr & CondBranchImm19Mask) == 0x54000000;
+}
+
+inline bool isCompAndBranchImm19(uint32_t Instr) {
+ constexpr uint32_t CompAndBranchImm19Mask = 0x7e000000;
+ return (Instr & CompAndBranchImm19Mask) == 0x34000000;
+}
+
+inline bool isADR(uint32_t Instr) {
+ constexpr uint32_t ADRMask = 0x9f000000;
+ return (Instr & ADRMask) == 0x10000000;
+}
+
// Returns the amount the address operand of LD/ST (imm12)
// should be shifted right by.
//
@@ -431,6 +509,55 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E) {
*(ulittle32_t *)FixupPtr = FixedInstr;
break;
}
+ case ADRLiteral21: {
+ assert((FixupAddress.getValue() & 0x3) == 0 && "ADR is not 32-bit aligned");
+ uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
+ assert(isADR(RawInstr) && "RawInstr is not an ADR");
+ int64_t Delta = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
+ if (!isInt<21>(Delta))
+ return makeTargetOutOfRangeError(G, B, E);
+ auto UDelta = static_cast<uint32_t>(Delta);
+ uint32_t EncodedImmHi = ((UDelta >> 2) & 0x7ffff) << 5;
+ uint32_t EncodedImmLo = (UDelta & 0x3) << 29;
+ uint32_t FixedInstr = RawInstr | EncodedImmHi | EncodedImmLo;
+ *(ulittle32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ case TestAndBranch14PCRel: {
+ assert((FixupAddress.getValue() & 0x3) == 0 &&
+ "Test and branch is not 32-bit aligned");
+ uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
+ assert(isTestAndBranchImm14(RawInstr) &&
+ "RawInstr is not a test and branch");
+ int64_t Delta = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
+ if (Delta & 0x3)
+ return make_error<JITLinkError>(
+ "Test and branch literal target is not 32-bit aligned");
+ if (!isInt<16>(Delta))
+ return makeTargetOutOfRangeError(G, B, E);
+ uint32_t EncodedImm = ((static_cast<uint32_t>(Delta) >> 2) & 0x3fff) << 5;
+ uint32_t FixedInstr = RawInstr | EncodedImm;
+ *(ulittle32_t *)FixupPtr = FixedInstr;
+ break;
+ }
+ case CondBranch19PCRel: {
+ assert((FixupAddress.getValue() & 0x3) == 0 &&
+ "Conditional branch is not 32-bit aligned");
+ uint32_t RawInstr = *(ulittle32_t *)FixupPtr;
+ assert((isCondBranchImm19(RawInstr) || isCompAndBranchImm19(RawInstr)) &&
+ "RawInstr is not a conditional branch");
+ int64_t Delta = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
+ if (Delta & 0x3)
+ return make_error<JITLinkError>(
+ "Conditional branch literal target is not 32-bit "
+ "aligned");
+ if (!isInt<21>(Delta))
+ return makeTargetOutOfRangeError(G, B, E);
+ uint32_t EncodedImm = ((static_cast<uint32_t>(Delta) >> 2) & 0x7ffff) << 5;
+ uint32_t FixedInstr = RawInstr | EncodedImm;
+ *(ulittle32_t *)FixupPtr = FixedInstr;
+ break;
+ }
case Page21: {
uint64_t TargetPage =
(E.getTarget().getAddress().getValue() + E.getAddend()) &
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/i386.h b/llvm/include/llvm/ExecutionEngine/JITLink/i386.h
index a590713625d2..f8d24d8bf31c 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/i386.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/i386.h
@@ -124,23 +124,75 @@ enum EdgeKind_i386 : Edge::Kind {
/// - *ASSERTION* Failure to handle edges of this kind prior to the fixup
/// phase will result in an assert/unreachable during the fixup phase
RequestGOTAndTransformToDelta32FromGOT,
+
+ /// A 32-bit PC-relative branch.
+ ///
+ /// Represents a PC-relative call or branch to a target. This can be used to
+ /// identify, record, and/or patch call sites.
+ ///
+ /// The fixup expression for this kind includes an implicit offset to account
+ /// for the PC (unlike the Delta edges) so that a Branch32PCRel with a target
+ /// T and addend zero is a call/branch to the start (offset zero) of T.
+ ///
+ /// Fixup expression:
+ /// Fixup <- Target - (Fixup + 4) + Addend : int32
+ ///
+ /// Errors:
+ /// - The result of the fixup expression must fit into an int32, otherwise
+ /// an out-of-range error will be returned.
+ ///
+ BranchPCRel32,
+
+ /// A 32-bit PC-relative branch to a pointer jump stub.
+ ///
+ /// The target of this relocation should be a pointer jump stub of the form:
+ ///
+ /// \code{.s}
+ /// .text
+ /// jmp *tgtptr
+ /// ; ...
+ ///
+ /// .data
+ /// tgtptr:
+ /// .quad 0
+ /// \endcode
+ ///
+ /// This edge kind has the same fixup expression as BranchPCRel32, but further
+ /// identifies the call/branch as being to a pointer jump stub. For edges of
+ /// this kind the jump stub should not be bypassed (use
+ /// BranchPCRel32ToPtrJumpStubBypassable for that), but the pointer location
+ /// target may be recorded to allow manipulation at runtime.
+ ///
+ /// Fixup expression:
+ /// Fixup <- Target - Fixup + Addend - 4 : int32
+ ///
+ /// Errors:
+ /// - The result of the fixup expression must fit into an int32, otherwise
+ /// an out-of-range error will be returned.
+ ///
+ BranchPCRel32ToPtrJumpStub,
+
+ /// A relaxable version of BranchPCRel32ToPtrJumpStub.
+ ///
+ /// The edge kind has the same fixup expression as BranchPCRel32ToPtrJumpStub,
+ /// but identifies the call/branch as being to a pointer jump stub that may be
+ /// bypassed with a direct jump to the ultimate target if the ultimate target
+ /// is within range of the fixup location.
+ ///
+ /// Fixup expression:
+ /// Fixup <- Target - Fixup + Addend - 4: int32
+ ///
+ /// Errors:
+ /// - The result of the fixup expression must fit into an int32, otherwise
+ /// an out-of-range error will be returned.
+ ///
+ BranchPCRel32ToPtrJumpStubBypassable,
};
/// Returns a string name for the given i386 edge. For debugging purposes
/// only
const char *getEdgeKindName(Edge::Kind K);
-/// Returns true if the given uint32_t value is in range for a uint16_t.
-inline bool isInRangeForImmU16(uint32_t Value) {
- return Value <= std::numeric_limits<uint16_t>::max();
-}
-
-/// Returns true if the given int32_t value is in range for an int16_t.
-inline bool isInRangeForImmS16(int32_t Value) {
- return (Value >= std::numeric_limits<int16_t>::min() &&
- Value <= std::numeric_limits<int16_t>::max());
-}
-
/// Apply fixup expression for edge to block content.
inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
const Symbol *GOTSymbol) {
@@ -171,7 +223,7 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
case i386::Pointer16: {
uint32_t Value = E.getTarget().getAddress().getValue() + E.getAddend();
- if (LLVM_LIKELY(isInRangeForImmU16(Value)))
+ if (LLVM_LIKELY(isUInt<16>(Value)))
*(ulittle16_t *)FixupPtr = Value;
else
return makeTargetOutOfRangeError(G, B, E);
@@ -181,7 +233,7 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
case i386::PCRel16: {
int32_t Value =
E.getTarget().getAddress() - (FixupAddress + 4) + E.getAddend();
- if (LLVM_LIKELY(isInRangeForImmS16(Value)))
+ if (LLVM_LIKELY(isInt<16>(Value)))
*(little16_t *)FixupPtr = Value;
else
return makeTargetOutOfRangeError(G, B, E);
@@ -202,10 +254,19 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
break;
}
+ case i386::BranchPCRel32:
+ case i386::BranchPCRel32ToPtrJumpStub:
+ case i386::BranchPCRel32ToPtrJumpStubBypassable: {
+ int32_t Value =
+ E.getTarget().getAddress() - (FixupAddress + 4) + E.getAddend();
+ *(little32_t *)FixupPtr = Value;
+ break;
+ }
+
default:
return make_error<JITLinkError>(
"In graph " + G.getName() + ", section " + B.getSection().getName() +
- "unsupported edge kind" + getEdgeKindName(E.getKind()));
+ " unsupported edge kind " + getEdgeKindName(E.getKind()));
}
return Error::success();
@@ -217,6 +278,13 @@ constexpr uint32_t PointerSize = 4;
/// i386 null pointer content.
extern const char NullPointerContent[PointerSize];
+/// i386 pointer jump stub content.
+///
+/// Contains the instruction sequence for an indirect jump via an in-memory
+/// pointer:
+/// jmpq *ptr
+extern const char PointerJumpStubContent[6];
+
/// Creates a new pointer block in the given section and returns an anonymous
/// symbol pointing to it.
///
@@ -237,6 +305,36 @@ inline Symbol &createAnonymousPointer(LinkGraph &G, Section &PointerSection,
return G.addAnonymousSymbol(B, 0, PointerSize, false, false);
}
+/// Create a jump stub block that jumps via the pointer at the given symbol.
+///
+/// The stub block will have the following default values:
+/// alignment: 8-bit
+/// alignment-offset: 0
+/// address: highest allowable: (~5U)
+inline Block &createPointerJumpStubBlock(LinkGraph &G, Section &StubSection,
+ Symbol &PointerSymbol) {
+ auto &B = G.createContentBlock(StubSection, PointerJumpStubContent,
+ orc::ExecutorAddr(), 8, 0);
+ B.addEdge(Pointer32,
+ // Offset is 2 because the the first 2 bytes of the
+ // jump stub block are {0xff, 0x25} -- an indirect absolute
+ // jump.
+ 2, PointerSymbol, 0);
+ return B;
+}
+
+/// Create a jump stub that jumps via the pointer at the given symbol and
+/// an anonymous symbol pointing to it. Return the anonymous symbol.
+///
+/// The stub block will be created by createPointerJumpStubBlock.
+inline Symbol &createAnonymousPointerJumpStub(LinkGraph &G,
+ Section &StubSection,
+ Symbol &PointerSymbol) {
+ return G.addAnonymousSymbol(
+ createPointerJumpStubBlock(G, StubSection, PointerSymbol), 0, 6, true,
+ false);
+}
+
/// Global Offset Table Builder.
class GOTTableManager : public TableManager<GOTTableManager> {
public:
@@ -283,6 +381,54 @@ private:
Section *GOTSection = nullptr;
};
+/// Procedure Linkage Table Builder.
+class PLTTableManager : public TableManager<PLTTableManager> {
+public:
+ PLTTableManager(GOTTableManager &GOT) : GOT(GOT) {}
+
+ static StringRef getSectionName() { return "$__STUBS"; }
+
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ if (E.getKind() == i386::BranchPCRel32 && !E.getTarget().isDefined()) {
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at "
+ << B->getFixupAddress(E) << " (" << B->getAddress() << " + "
+ << formatv("{0:x}", E.getOffset()) << ")\n";
+ });
+ // Set the edge kind to Branch32ToPtrJumpStubBypassable to enable it to
+ // be optimized when the target is in-range.
+ E.setKind(i386::BranchPCRel32ToPtrJumpStubBypassable);
+ E.setTarget(getEntryForTarget(G, E.getTarget()));
+ return true;
+ }
+ return false;
+ }
+
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ return createAnonymousPointerJumpStub(G, getStubsSection(G),
+ GOT.getEntryForTarget(G, Target));
+ }
+
+public:
+ Section &getStubsSection(LinkGraph &G) {
+ if (!PLTSection)
+ PLTSection = &G.createSection(getSectionName(),
+ orc::MemProt::Read | orc::MemProt::Exec);
+ return *PLTSection;
+ }
+
+ GOTTableManager &GOT;
+ Section *PLTSection = nullptr;
+};
+
+/// Optimize the GOT and Stub relocations if the edge target address is in range
+/// 1. PCRel32GOTLoadRelaxable. For this edge kind, if the target is in range,
+/// then replace GOT load with lea. (THIS IS UNIMPLEMENTED RIGHT NOW!)
+/// 2. BranchPCRel32ToPtrJumpStubRelaxable. For this edge kind, if the target is
+/// in range, replace a indirect jump by plt stub with a direct jump to the
+/// target
+Error optimizeGOTAndStubAccesses(LinkGraph &G);
+
} // namespace llvm::jitlink::i386
#endif // LLVM_EXECUTIONENGINE_JITLINK_I386_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ppc64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ppc64.h
new file mode 100644
index 000000000000..0b2d562b71c4
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ppc64.h
@@ -0,0 +1,333 @@
+//===--- ppc64.h - Generic JITLink ppc64 edge kinds, utilities --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic utilities for graphs representing 64-bit PowerPC objects.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITLINK_PPC64_H
+#define LLVM_EXECUTIONENGINE_JITLINK_PPC64_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITLink/TableManager.h"
+#include "llvm/Support/Endian.h"
+
+namespace llvm::jitlink::ppc64 {
+
+/// Represents ppc64 fixups and other ppc64-specific edge kinds.
+enum EdgeKind_ppc64 : Edge::Kind {
+ Pointer64 = Edge::FirstRelocation,
+ Pointer32,
+ Delta64,
+ Delta32,
+ NegDelta32,
+ Delta16,
+ Delta16HA,
+ Delta16LO,
+ TOCDelta16HA,
+ TOCDelta16LO,
+ TOCDelta16DS,
+ TOCDelta16LODS,
+ CallBranchDelta,
+ // Need to restore r2 after the bl, suggesting the bl is followed by a nop.
+ CallBranchDeltaRestoreTOC,
+ // Need PLT call stub using TOC, TOC pointer is not saved before branching.
+ RequestPLTCallStub,
+ // Need PLT call stub using TOC, TOC pointer is saved before branching.
+ RequestPLTCallStubSaveTOC,
+ // Need PLT call stub without using TOC.
+ RequestPLTCallStubNoTOC,
+};
+
+enum PLTCallStubKind {
+ LongBranch,
+ LongBranchSaveR2,
+ LongBranchNoTOC,
+};
+
+extern const char NullPointerContent[8];
+extern const char PointerJumpStubContent_big[20];
+extern const char PointerJumpStubContent_little[20];
+extern const char PointerJumpStubNoTOCContent_big[32];
+extern const char PointerJumpStubNoTOCContent_little[32];
+
+struct PLTCallStubReloc {
+ Edge::Kind K;
+ size_t Offset;
+ Edge::AddendT A;
+};
+
+struct PLTCallStubInfo {
+ ArrayRef<char> Content;
+ SmallVector<PLTCallStubReloc, 2> Relocs;
+};
+
+template <support::endianness Endianness>
+inline PLTCallStubInfo pickStub(PLTCallStubKind StubKind) {
+ constexpr bool isLE = Endianness == support::endianness::little;
+ switch (StubKind) {
+ case LongBranch: {
+ ArrayRef<char> Content =
+ isLE ? PointerJumpStubContent_little : PointerJumpStubContent_big;
+ // Skip save r2.
+ Content = Content.slice(4);
+ return PLTCallStubInfo{
+ Content,
+ {{TOCDelta16HA, 0, 0}, {TOCDelta16LO, 4, 0}},
+ };
+ }
+ case LongBranchSaveR2: {
+ ArrayRef<char> Content =
+ isLE ? PointerJumpStubContent_little : PointerJumpStubContent_big;
+ return PLTCallStubInfo{
+ Content,
+ {{TOCDelta16HA, 4, 0}, {TOCDelta16LO, 8, 0}},
+ };
+ }
+ case LongBranchNoTOC: {
+ ArrayRef<char> Content = isLE ? PointerJumpStubNoTOCContent_little
+ : PointerJumpStubNoTOCContent_big;
+ return PLTCallStubInfo{
+ Content,
+ {{Delta16HA, 16, 8}, {Delta16LO, 20, 12}},
+ };
+ }
+ }
+ llvm_unreachable("Unknown PLTCallStubKind enum");
+}
+
+inline Symbol &createAnonymousPointer(LinkGraph &G, Section &PointerSection,
+ Symbol *InitialTarget = nullptr,
+ uint64_t InitialAddend = 0) {
+ assert(G.getPointerSize() == sizeof(NullPointerContent) &&
+ "LinkGraph's pointer size should be consistent with size of "
+ "NullPointerContent");
+ Block &B = G.createContentBlock(PointerSection, NullPointerContent,
+ orc::ExecutorAddr(), G.getPointerSize(), 0);
+ if (InitialTarget)
+ B.addEdge(Pointer64, 0, *InitialTarget, InitialAddend);
+ return G.addAnonymousSymbol(B, 0, G.getPointerSize(), false, false);
+}
+
+template <support::endianness Endianness>
+inline Symbol &createAnonymousPointerJumpStub(LinkGraph &G,
+ Section &StubSection,
+ Symbol &PointerSymbol,
+ PLTCallStubKind StubKind) {
+ PLTCallStubInfo StubInfo = pickStub<Endianness>(StubKind);
+ Block &B = G.createContentBlock(StubSection, StubInfo.Content,
+ orc::ExecutorAddr(), 4, 0);
+ for (auto const &Reloc : StubInfo.Relocs)
+ B.addEdge(Reloc.K, Reloc.Offset, PointerSymbol, Reloc.A);
+ return G.addAnonymousSymbol(B, 0, StubInfo.Content.size(), true, false);
+}
+
+template <support::endianness Endianness>
+class TOCTableManager : public TableManager<TOCTableManager<Endianness>> {
+public:
+ // FIXME: `llvm-jitlink -check` relies this name to be $__GOT.
+ static StringRef getSectionName() { return "$__GOT"; }
+
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ Edge::Kind K = E.getKind();
+ switch (K) {
+ case TOCDelta16HA:
+ case TOCDelta16LO:
+ case TOCDelta16DS:
+ case TOCDelta16LODS:
+ case CallBranchDeltaRestoreTOC:
+ case RequestPLTCallStub:
+ case RequestPLTCallStubSaveTOC:
+ // Create TOC section if TOC relocation, PLT or GOT is used.
+ getOrCreateTOCSection(G);
+ return false;
+ default:
+ return false;
+ }
+ }
+
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ return createAnonymousPointer(G, getOrCreateTOCSection(G), &Target);
+ }
+
+private:
+ Section &getOrCreateTOCSection(LinkGraph &G) {
+ TOCSection = G.findSectionByName(getSectionName());
+ if (!TOCSection)
+ TOCSection = &G.createSection(getSectionName(), orc::MemProt::Read);
+ return *TOCSection;
+ }
+
+ Section *TOCSection = nullptr;
+};
+
+template <support::endianness Endianness>
+class PLTTableManager : public TableManager<PLTTableManager<Endianness>> {
+public:
+ PLTTableManager(TOCTableManager<Endianness> &TOC) : TOC(TOC) {}
+
+ static StringRef getSectionName() { return "$__STUBS"; }
+
+ bool visitEdge(LinkGraph &G, Block *B, Edge &E) {
+ Edge::Kind K = E.getKind();
+ if (K == ppc64::RequestPLTCallStubSaveTOC && E.getTarget().isExternal()) {
+ E.setKind(ppc64::CallBranchDeltaRestoreTOC);
+ this->StubKind = LongBranchSaveR2;
+ E.setTarget(this->getEntryForTarget(G, E.getTarget()));
+ return true;
+ }
+ if (K == ppc64::RequestPLTCallStubNoTOC && E.getTarget().isExternal()) {
+ E.setKind(ppc64::CallBranchDelta);
+ this->StubKind = LongBranchNoTOC;
+ E.setTarget(this->getEntryForTarget(G, E.getTarget()));
+ return true;
+ }
+ return false;
+ }
+
+ Symbol &createEntry(LinkGraph &G, Symbol &Target) {
+ return createAnonymousPointerJumpStub<Endianness>(
+ G, getOrCreateStubsSection(G), TOC.getEntryForTarget(G, Target),
+ this->StubKind);
+ }
+
+private:
+ Section &getOrCreateStubsSection(LinkGraph &G) {
+ PLTSection = G.findSectionByName(getSectionName());
+ if (!PLTSection)
+ PLTSection = &G.createSection(getSectionName(),
+ orc::MemProt::Read | orc::MemProt::Exec);
+ return *PLTSection;
+ }
+
+ TOCTableManager<Endianness> &TOC;
+ Section *PLTSection = nullptr;
+ PLTCallStubKind StubKind;
+};
+
+/// Returns a string name for the given ppc64 edge. For debugging purposes
+/// only.
+const char *getEdgeKindName(Edge::Kind K);
+
+inline static uint16_t ha16(uint64_t x) { return (x + 0x8000) >> 16; }
+
+inline static uint16_t lo16(uint64_t x) { return x & 0xffff; }
+
+/// Apply fixup expression for edge to block content.
+template <support::endianness Endianness>
+inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
+ const Symbol *TOCSymbol) {
+ char *BlockWorkingMem = B.getAlreadyMutableContent().data();
+ char *FixupPtr = BlockWorkingMem + E.getOffset();
+ orc::ExecutorAddr FixupAddress = B.getAddress() + E.getOffset();
+ int64_t S = E.getTarget().getAddress().getValue();
+ int64_t A = E.getAddend();
+ int64_t P = FixupAddress.getValue();
+ int64_t TOCBase = TOCSymbol ? TOCSymbol->getAddress().getValue() : 0;
+ Edge::Kind K = E.getKind();
+
+ DEBUG_WITH_TYPE("jitlink", {
+ dbgs() << " Applying fixup on " << G.getEdgeKindName(K)
+ << " edge, (S, A, P, .TOC.) = (" << formatv("{0:x}", S) << ", "
+ << formatv("{0:x}", A) << ", " << formatv("{0:x}", P) << ", "
+ << formatv("{0:x}", TOCBase) << ")\n";
+ });
+
+ switch (K) {
+ case Pointer64: {
+ uint64_t Value = S + A;
+ support::endian::write64<Endianness>(FixupPtr, Value);
+ break;
+ }
+ case Delta16HA:
+ case Delta16LO: {
+ int64_t Value = S + A - P;
+ if (LLVM_UNLIKELY(!isInt<32>(Value))) {
+ return makeTargetOutOfRangeError(G, B, E);
+ }
+ if (K == Delta16LO)
+ support::endian::write16<Endianness>(FixupPtr, lo16(Value));
+ else
+ support::endian::write16<Endianness>(FixupPtr, ha16(Value));
+ break;
+ }
+ case TOCDelta16HA:
+ case TOCDelta16LO: {
+ int64_t Value = S + A - TOCBase;
+ if (LLVM_UNLIKELY(!isInt<32>(Value))) {
+ return makeTargetOutOfRangeError(G, B, E);
+ }
+ if (K == TOCDelta16LO)
+ support::endian::write16<Endianness>(FixupPtr, lo16(Value));
+ else
+ support::endian::write16<Endianness>(FixupPtr, ha16(Value));
+ break;
+ }
+ case TOCDelta16DS:
+ case TOCDelta16LODS: {
+ int64_t Value = S + A - TOCBase;
+ if (LLVM_UNLIKELY(!isInt<32>(Value))) {
+ return makeTargetOutOfRangeError(G, B, E);
+ }
+ if (K == TOCDelta16LODS)
+ support::endian::write16<Endianness>(FixupPtr, lo16(Value) & ~3);
+ else
+ support::endian::write16<Endianness>(FixupPtr, Value & ~3);
+ break;
+ }
+ case CallBranchDeltaRestoreTOC:
+ case CallBranchDelta: {
+ int64_t Value = S + A - P;
+ if (LLVM_UNLIKELY(!isInt<26>(Value))) {
+ return makeTargetOutOfRangeError(G, B, E);
+ }
+ uint32_t Inst = support::endian::read32<Endianness>(FixupPtr);
+ support::endian::write32<Endianness>(FixupPtr, (Inst & 0xfc000003) |
+ (Value & 0x03fffffc));
+ if (K == CallBranchDeltaRestoreTOC) {
+ uint32_t NopInst = support::endian::read32<Endianness>(FixupPtr + 4);
+ assert(NopInst == 0x60000000 &&
+ "NOP should be placed here for restoring r2");
+ (void)NopInst;
+ // Restore r2 by instruction 0xe8410018 which is `ld r2, 24(r1)`.
+ support::endian::write32<Endianness>(FixupPtr + 4, 0xe8410018);
+ }
+ break;
+ }
+ case Delta64: {
+ int64_t Value = S + A - P;
+ support::endian::write64<Endianness>(FixupPtr, Value);
+ break;
+ }
+ case Delta32: {
+ int64_t Value = S + A - P;
+ if (LLVM_UNLIKELY(!isInt<32>(Value))) {
+ return makeTargetOutOfRangeError(G, B, E);
+ }
+ support::endian::write32<Endianness>(FixupPtr, Value);
+ break;
+ }
+ case NegDelta32: {
+ int64_t Value = P - S + A;
+ if (LLVM_UNLIKELY(!isInt<32>(Value))) {
+ return makeTargetOutOfRangeError(G, B, E);
+ }
+ support::endian::write32<Endianness>(FixupPtr, Value);
+ break;
+ }
+ default:
+ return make_error<JITLinkError>(
+ "In graph " + G.getName() + ", section " + B.getSection().getName() +
+ " unsupported edge kind " + getEdgeKindName(E.getKind()));
+ }
+ return Error::success();
+}
+
+} // end namespace llvm::jitlink::ppc64
+
+#endif // LLVM_EXECUTIONENGINE_JITLINK_PPC64_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
index 2acb03c440a4..c884cc28428b 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
@@ -202,6 +202,18 @@ enum EdgeKind_riscv : Edge::Kind {
/// Fixup expression:
/// Fixup <- (Target - Fixup + Addend)
R_RISCV_32_PCREL,
+
+ /// An auipc/jalr pair eligible for linker relaxation.
+ ///
+ /// Linker relaxation will replace this with R_RISCV_RVC_JUMP or R_RISCV_JAL
+ /// if it succeeds, or with R_RISCV_CALL_PLT if it fails.
+ CallRelaxable,
+
+ /// Alignment requirement used by linker relaxation.
+ ///
+ /// Linker relaxation will use this to ensure all code sequences are properly
+ /// aligned and then remove these edges from the graph.
+ AlignRelaxable,
};
/// Returns a string name for the given riscv edge. For debugging purposes
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
index 8e1fbba4e173..80af39055f19 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/x86_64.h
@@ -16,8 +16,6 @@
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#include "llvm/ExecutionEngine/JITLink/TableManager.h"
-#include <limits>
-
namespace llvm {
namespace jitlink {
namespace x86_64 {
@@ -64,6 +62,17 @@ enum EdgeKind_x86_64 : Edge::Kind {
///
Pointer16,
+ /// A plain 8-bit pointer value relocation.
+ ///
+ /// Fixup expression:
+ /// Fixup <- Target + Addend : uint8
+ ///
+ /// Errors:
+ /// - The target must reside in the low 8-bits of the address space,
+ /// otherwise an out-of-range error will be returned.
+ ///
+ Pointer8,
+
/// A 64-bit delta.
///
/// Delta from the fixup to the target.
@@ -381,17 +390,6 @@ enum EdgeKind_x86_64 : Edge::Kind {
/// only.
const char *getEdgeKindName(Edge::Kind K);
-/// Returns true if the given uint64_t value is in range for a uint32_t.
-inline bool isInRangeForImmU32(uint64_t Value) {
- return Value <= std::numeric_limits<uint32_t>::max();
-}
-
-/// Returns true if the given int64_t value is in range for an int32_t.
-inline bool isInRangeForImmS32(int64_t Value) {
- return (Value >= std::numeric_limits<int32_t>::min() &&
- Value <= std::numeric_limits<int32_t>::max());
-}
-
/// Apply fixup expression for edge to block content.
inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
const Symbol *GOTSymbol) {
@@ -411,7 +409,7 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
case Pointer32: {
uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend();
- if (LLVM_LIKELY(isInRangeForImmU32(Value)))
+ if (LLVM_LIKELY(isUInt<32>(Value)))
*(ulittle32_t *)FixupPtr = Value;
else
return makeTargetOutOfRangeError(G, B, E);
@@ -419,7 +417,7 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
}
case Pointer32Signed: {
int64_t Value = E.getTarget().getAddress().getValue() + E.getAddend();
- if (LLVM_LIKELY(isInRangeForImmS32(Value)))
+ if (LLVM_LIKELY(isInt<32>(Value)))
*(little32_t *)FixupPtr = Value;
else
return makeTargetOutOfRangeError(G, B, E);
@@ -435,6 +433,15 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
break;
}
+ case Pointer8: {
+ uint64_t Value = E.getTarget().getAddress().getValue() + E.getAddend();
+ if (LLVM_LIKELY(isUInt<8>(Value)))
+ *(uint8_t *)FixupPtr = Value;
+ else
+ return makeTargetOutOfRangeError(G, B, E);
+ break;
+ }
+
case PCRel32:
case BranchPCRel32:
case BranchPCRel32ToPtrJumpStub:
@@ -444,7 +451,7 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
case PCRel32TLVPLoadREXRelaxable: {
int64_t Value =
E.getTarget().getAddress() - (FixupAddress + 4) + E.getAddend();
- if (LLVM_LIKELY(isInRangeForImmS32(Value)))
+ if (LLVM_LIKELY(isInt<32>(Value)))
*(little32_t *)FixupPtr = Value;
else
return makeTargetOutOfRangeError(G, B, E);
@@ -459,7 +466,7 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
case Delta32: {
int64_t Value = E.getTarget().getAddress() - FixupAddress + E.getAddend();
- if (LLVM_LIKELY(isInRangeForImmS32(Value)))
+ if (LLVM_LIKELY(isInt<32>(Value)))
*(little32_t *)FixupPtr = Value;
else
return makeTargetOutOfRangeError(G, B, E);
@@ -474,7 +481,7 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
case NegDelta32: {
int64_t Value = FixupAddress - E.getTarget().getAddress() + E.getAddend();
- if (LLVM_LIKELY(isInRangeForImmS32(Value)))
+ if (LLVM_LIKELY(isInt<32>(Value)))
*(little32_t *)FixupPtr = Value;
else
return makeTargetOutOfRangeError(G, B, E);
@@ -491,7 +498,7 @@ inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E,
default:
return make_error<JITLinkError>(
"In graph " + G.getName() + ", section " + B.getSection().getName() +
- "unsupported edge kind" + getEdgeKindName(E.getKind()));
+ " unsupported edge kind " + getEdgeKindName(E.getKind()));
}
return Error::success();
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/COFFPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/COFFPlatform.h
index 0a9e08fdd6d7..4ef208dbbca2 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/COFFPlatform.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/COFFPlatform.h
@@ -41,6 +41,14 @@ public:
/// given JITDylib.
static Expected<std::unique_ptr<COFFPlatform>>
Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD,
+ std::unique_ptr<MemoryBuffer> OrcRuntimeArchiveBuffer,
+ LoadDynamicLibrary LoadDynLibrary, bool StaticVCRuntime = false,
+ const char *VCRuntimePath = nullptr,
+ std::optional<SymbolAliasMap> RuntimeAliases = std::nullopt);
+
+ static Expected<std::unique_ptr<COFFPlatform>>
+ Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
JITDylib &PlatformJD, const char *OrcRuntimePath,
LoadDynamicLibrary LoadDynLibrary, bool StaticVCRuntime = false,
const char *VCRuntimePath = nullptr,
@@ -67,10 +75,6 @@ public:
static ArrayRef<std::pair<const char *, const char *>>
standardRuntimeUtilityAliases();
- static bool isInitializerSection(StringRef Name) {
- return Name.startswith(".CRT");
- }
-
static StringRef getSEHFrameSectionName() { return ".pdata"; }
private:
@@ -140,10 +144,14 @@ private:
static bool supportedTarget(const Triple &TT);
- COFFPlatform(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
- JITDylib &PlatformJD, const char *OrcRuntimePath,
- LoadDynamicLibrary LoadDynLibrary, bool StaticVCRuntime,
- const char *VCRuntimePath, Error &Err);
+ COFFPlatform(
+ ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD,
+ std::unique_ptr<StaticLibraryDefinitionGenerator> OrcRuntimeGenerator,
+ std::unique_ptr<MemoryBuffer> OrcRuntimeArchiveBuffer,
+ std::unique_ptr<object::Archive> OrcRuntimeArchive,
+ LoadDynamicLibrary LoadDynLibrary, bool StaticVCRuntime,
+ const char *VCRuntimePath, Error &Err);
// Associate COFFPlatform JIT-side runtime support functions with handlers.
Error associateRuntimeSupportFunctions(JITDylib &PlatformJD);
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Core.h b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
index daa0fa275591..9554a2195948 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Core.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Core.h
@@ -20,6 +20,8 @@
#include "llvm/ExecutionEngine/JITLink/JITLinkDylib.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h"
#include "llvm/ExecutionEngine/Orc/Shared/WrapperFunctionUtils.h"
#include "llvm/ExecutionEngine/Orc/TaskDispatch.h"
#include "llvm/Support/Debug.h"
@@ -115,7 +117,7 @@ using SymbolNameVector = std::vector<SymbolStringPtr>;
/// A map from symbol names (as SymbolStringPtrs) to JITSymbols
/// (address/flags pairs).
-using SymbolMap = DenseMap<SymbolStringPtr, JITEvaluatedSymbol>;
+using SymbolMap = DenseMap<SymbolStringPtr, ExecutorSymbolDef>;
/// A map from symbol names (as SymbolStringPtrs) to JITSymbolFlags.
using SymbolFlagsMap = DenseMap<SymbolStringPtr, JITSymbolFlags>;
@@ -605,20 +607,6 @@ public:
/// callbacks, metadata).
Error defineMaterializing(SymbolFlagsMap SymbolFlags);
- /// Define the given symbols as non-existent, removing it from the symbol
- /// table and notifying any pending queries. Queries that lookup up the
- /// symbol using the SymbolLookupFlags::WeaklyReferencedSymbol flag will
- /// behave as if the symbol had not been matched in the first place. Queries
- /// that required this symbol will fail with a missing symbol definition
- /// error.
- ///
- /// This method is intended to support cleanup of special symbols like
- /// initializer symbols: Queries using
- /// SymbolLookupFlags::WeaklyReferencedSymbol can be used to trigger their
- /// emission, and this method can be used to remove them from the JITDylib
- /// once materialization is complete.
- void defineNonExistent(ArrayRef<SymbolStringPtr> Symbols);
-
/// Notify all not-yet-emitted covered by this MaterializationResponsibility
/// instance that an error has occurred.
/// This will remove all symbols covered by this MaterializationResponsibilty
@@ -762,7 +750,7 @@ private:
/// \code{.cpp}
/// JITDylib &JD = ...;
/// SymbolStringPtr Foo = ...;
-/// JITEvaluatedSymbol FooSym = ...;
+/// ExecutorSymbolDef FooSym = ...;
/// if (auto Err = JD.define(absoluteSymbols({{Foo, FooSym}})))
/// return Err;
/// \endcode
@@ -866,7 +854,7 @@ public:
/// Notify the query that a requested symbol has reached the required state.
void notifySymbolMetRequiredState(const SymbolStringPtr &Name,
- JITEvaluatedSymbol Sym);
+ ExecutorSymbolDef Sym);
/// Returns true if all symbols covered by this query have been
/// resolved.
@@ -1054,6 +1042,11 @@ public:
void setLinkOrder(JITDylibSearchOrder NewSearchOrder,
bool LinkAgainstThisJITDylibFirst = true);
+ /// Append the given JITDylibSearchOrder to the link order for this
+ /// JITDylib (discarding any elements already present in this JITDylib's
+ /// link order).
+ void addToLinkOrder(const JITDylibSearchOrder &NewLinks);
+
/// Add the given JITDylib to the link order for definitions in this
/// JITDylib.
///
@@ -1211,14 +1204,14 @@ private:
: Flags(Flags), State(static_cast<uint8_t>(SymbolState::NeverSearched)),
MaterializerAttached(false), PendingRemoval(false) {}
- JITTargetAddress getAddress() const { return Addr; }
+ ExecutorAddr getAddress() const { return Addr; }
JITSymbolFlags getFlags() const { return Flags; }
SymbolState getState() const { return static_cast<SymbolState>(State); }
bool hasMaterializerAttached() const { return MaterializerAttached; }
bool isPendingRemoval() const { return PendingRemoval; }
- void setAddress(JITTargetAddress Addr) { this->Addr = Addr; }
+ void setAddress(ExecutorAddr Addr) { this->Addr = Addr; }
void setFlags(JITSymbolFlags Flags) { this->Flags = Flags; }
void setState(SymbolState State) {
assert(static_cast<uint8_t>(State) < (1 << 6) &&
@@ -1234,12 +1227,10 @@ private:
this->PendingRemoval = PendingRemoval;
}
- JITEvaluatedSymbol getSymbol() const {
- return JITEvaluatedSymbol(Addr, Flags);
- }
+ ExecutorSymbolDef getSymbol() const { return {Addr, Flags}; }
private:
- JITTargetAddress Addr = 0;
+ ExecutorAddr Addr;
JITSymbolFlags Flags;
uint8_t State : 6;
uint8_t MaterializerAttached : 1;
@@ -1267,7 +1258,9 @@ private:
const SymbolStringPtr &DependantName,
MaterializingInfo &EmittedMI);
- Expected<SymbolFlagsMap> defineMaterializing(SymbolFlagsMap SymbolFlags);
+ Expected<SymbolFlagsMap>
+ defineMaterializing(MaterializationResponsibility &FromMR,
+ SymbolFlagsMap SymbolFlags);
Error replace(MaterializationResponsibility &FromMR,
std::unique_ptr<MaterializationUnit> MU);
@@ -1415,6 +1408,9 @@ public:
/// ExecutionSession.
ExecutorProcessControl &getExecutorProcessControl() { return *EPC; }
+ /// Return the triple for the executor.
+ const Triple &getTargetTriple() const { return EPC->getTargetTriple(); }
+
/// Get the SymbolStringPool for this instance.
std::shared_ptr<SymbolStringPool> getSymbolStringPool() {
return EPC->getSymbolStringPool();
@@ -1550,21 +1546,21 @@ public:
/// Convenience version of blocking lookup.
/// Searches each of the JITDylibs in the search order in turn for the given
/// symbol.
- Expected<JITEvaluatedSymbol>
+ Expected<ExecutorSymbolDef>
lookup(const JITDylibSearchOrder &SearchOrder, SymbolStringPtr Symbol,
SymbolState RequiredState = SymbolState::Ready);
/// Convenience version of blocking lookup.
/// Searches each of the JITDylibs in the search order in turn for the given
/// symbol. The search will not find non-exported symbols.
- Expected<JITEvaluatedSymbol>
+ Expected<ExecutorSymbolDef>
lookup(ArrayRef<JITDylib *> SearchOrder, SymbolStringPtr Symbol,
SymbolState RequiredState = SymbolState::Ready);
/// Convenience version of blocking lookup.
/// Searches each of the JITDylibs in the search order in turn for the given
/// symbol. The search will not find non-exported symbols.
- Expected<JITEvaluatedSymbol>
+ Expected<ExecutorSymbolDef>
lookup(ArrayRef<JITDylib *> SearchOrder, StringRef Symbol,
SymbolState RequiredState = SymbolState::Ready);
@@ -1669,10 +1665,9 @@ public:
/// Run a registered jit-side wrapper function.
/// This should be called by the ExecutorProcessControl instance in response
/// to incoming jit-dispatch requests from the executor.
- void
- runJITDispatchHandler(SendResultFunction SendResult,
- JITTargetAddress HandlerFnTagAddr,
- ArrayRef<char> ArgBuffer);
+ void runJITDispatchHandler(SendResultFunction SendResult,
+ ExecutorAddr HandlerFnTagAddr,
+ ArrayRef<char> ArgBuffer);
/// Dump the state of all the JITDylibs in this session.
void dump(raw_ostream &OS);
@@ -1774,7 +1769,7 @@ private:
OutstandingMUs;
mutable std::mutex JITDispatchHandlersMutex;
- DenseMap<JITTargetAddress, std::shared_ptr<JITDispatchHandlerFunction>>
+ DenseMap<ExecutorAddr, std::shared_ptr<JITDispatchHandlerFunction>>
JITDispatchHandlers;
};
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h b/llvm/include/llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h
index 9f10a7750e12..70f5230c5fce 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h
@@ -13,7 +13,6 @@
#ifndef LLVM_EXECUTIONENGINE_ORC_DEBUGOBJECTMANAGERPLUGIN_H
#define LLVM_EXECUTIONENGINE_ORC_DEBUGOBJECTMANAGERPLUGIN_H
-#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#include "llvm/ExecutionEngine/Orc/Core.h"
#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h"
@@ -21,6 +20,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/Memory.h"
#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/TargetParser/Triple.h"
#include <functional>
#include <map>
@@ -47,8 +47,28 @@ class DebugObject;
///
class DebugObjectManagerPlugin : public ObjectLinkingLayer::Plugin {
public:
+ // DEPRECATED - Please specify options explicitly
DebugObjectManagerPlugin(ExecutionSession &ES,
std::unique_ptr<DebugObjectRegistrar> Target);
+
+ /// Create the plugin to submit DebugObjects for JITLink artifacts. For all
+ /// options the recommended setting is true.
+ ///
+ /// RequireDebugSections:
+ /// Submit debug objects to the executor only if they contain actual debug
+ /// info. Turning this off may allow minimal debugging based on raw symbol
+ /// names. Note that this may cause significant memory and transport
+ /// overhead for objects built with a release configuration.
+ ///
+ /// AutoRegisterCode:
+ /// Notify the debugger for each new debug object. This is a good default
+ /// mode, but it may cause significant overhead when adding many modules in
+ /// sequence. When turning this off, the user has to issue the call to
+ /// __jit_debug_register_code() on the executor side manually.
+ ///
+ DebugObjectManagerPlugin(ExecutionSession &ES,
+ std::unique_ptr<DebugObjectRegistrar> Target,
+ bool RequireDebugSections, bool AutoRegisterCode);
~DebugObjectManagerPlugin();
void notifyMaterializing(MaterializationResponsibility &MR,
@@ -77,6 +97,8 @@ private:
std::mutex RegisteredObjsLock;
std::unique_ptr<DebugObjectRegistrar> Target;
+ bool RequireDebugSections;
+ bool AutoRegisterCode;
};
} // namespace orc
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h
index 758c0016e685..84977711d63f 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ELFNixPlatform.h
@@ -94,6 +94,12 @@ public:
/// setting up all aliases (including the required ones).
static Expected<std::unique_ptr<ELFNixPlatform>>
Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD, std::unique_ptr<DefinitionGenerator> OrcRuntime,
+ std::optional<SymbolAliasMap> RuntimeAliases = std::nullopt);
+
+ /// Construct using a path to the ORC runtime.
+ static Expected<std::unique_ptr<ELFNixPlatform>>
+ Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
JITDylib &PlatformJD, const char *OrcRuntimePath,
std::optional<SymbolAliasMap> RuntimeAliases = std::nullopt);
@@ -119,9 +125,6 @@ public:
static ArrayRef<std::pair<const char *, const char *>>
standardRuntimeUtilityAliases();
- /// Returns true if the given section name is an initializer section.
- static bool isInitializerSection(StringRef SecName);
-
private:
// The ELFNixPlatformPlugin scans/modifies LinkGraphs to support ELF
// platform features including initializers, exceptions, TLV, and language
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
index 8bd762460dd2..201d52aa9581 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h
@@ -31,7 +31,8 @@ class ExecutionSession;
/// Abstract interface for registering debug objects in the executor process.
class DebugObjectRegistrar {
public:
- virtual Error registerDebugObject(ExecutorAddrRange TargetMem) = 0;
+ virtual Error registerDebugObject(ExecutorAddrRange TargetMem,
+ bool AutoRegisterCode) = 0;
virtual ~DebugObjectRegistrar() = default;
};
@@ -42,7 +43,8 @@ public:
EPCDebugObjectRegistrar(ExecutionSession &ES, ExecutorAddr RegisterFn)
: ES(ES), RegisterFn(RegisterFn) {}
- Error registerDebugObject(ExecutorAddrRange TargetMem) override;
+ Error registerDebugObject(ExecutorAddrRange TargetMem,
+ bool AutoRegisterCode) override;
private:
ExecutionSession &ES;
@@ -51,7 +53,7 @@ private:
/// Create a ExecutorProcessControl-based DebugObjectRegistrar that emits debug
/// objects to the GDB JIT interface. This will use the EPC's lookupSymbols
-/// method to find the registration/deregistration funciton addresses by name.
+/// method to find the registration/deregistration function addresses by name.
///
/// If RegistrationFunctionsDylib is non-None then it will be searched to find
/// the registration functions. If it is None then the process dylib will be
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h
index 0494705b462d..9772c84b682a 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h
@@ -27,7 +27,7 @@ class EPCEHFrameRegistrar : public jitlink::EHFrameRegistrar {
public:
/// Create from a ExecutorProcessControl instance alone. This will use
/// the EPC's lookupSymbols method to find the registration/deregistration
- /// funciton addresses by name.
+ /// function addresses by name.
///
/// If RegistrationFunctionsDylib is non-None then it will be searched to
/// find the registration functions. If it is None then the process dylib
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
index 354984b540a9..2834331b21f2 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/EPCIndirectionUtils.h
@@ -54,20 +54,18 @@ public:
unsigned getResolverCodeSize() const { return ResolverCodeSize; }
virtual void writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddr,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr) const = 0;
+ ExecutorAddr ResolverTargetAddr,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr) const = 0;
virtual void writeTrampolines(char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTragetAddr,
- JITTargetAddress ResolverAddr,
+ ExecutorAddr TrampolineBlockTragetAddr,
+ ExecutorAddr ResolverAddr,
unsigned NumTrampolines) const = 0;
- virtual void
- writeIndirectStubsBlock(char *StubsBlockWorkingMem,
- JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress,
- unsigned NumStubs) const = 0;
+ virtual void writeIndirectStubsBlock(
+ char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) const = 0;
private:
unsigned PointerSize = 0;
@@ -86,6 +84,12 @@ public:
static Expected<std::unique_ptr<EPCIndirectionUtils>>
Create(ExecutorProcessControl &EPC);
+ /// Create based on the ExecutorProcessControl triple.
+ static Expected<std::unique_ptr<EPCIndirectionUtils>>
+ Create(ExecutionSession &ES) {
+ return Create(ES.getExecutorProcessControl());
+ }
+
/// Return a reference to the ExecutorProcessControl object.
ExecutorProcessControl &getExecutorProcessControl() const { return EPC; }
@@ -99,13 +103,12 @@ public:
/// Write resolver code to the executor process and return its address.
/// This must be called before any call to createTrampolinePool or
/// createLazyCallThroughManager.
- Expected<JITTargetAddress>
- writeResolverBlock(JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr);
+ Expected<ExecutorAddr> writeResolverBlock(ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr);
/// Returns the address of the Resolver block. Returns zero if the
/// writeResolverBlock method has not previously been called.
- JITTargetAddress getResolverBlockAddress() const { return ResolverBlockAddr; }
+ ExecutorAddr getResolverBlockAddress() const { return ResolverBlockAddr; }
/// Create an IndirectStubsManager for the executor process.
std::unique_ptr<IndirectStubsManager> createIndirectStubsManager();
@@ -117,7 +120,7 @@ public:
/// This function should only be called once.
LazyCallThroughManager &
createLazyCallThroughManager(ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddr);
+ ExecutorAddr ErrorHandlerAddr);
/// Create a LazyCallThroughManager for the executor process.
LazyCallThroughManager &getLazyCallThroughManager() {
@@ -130,11 +133,10 @@ private:
struct IndirectStubInfo {
IndirectStubInfo() = default;
- IndirectStubInfo(JITTargetAddress StubAddress,
- JITTargetAddress PointerAddress)
+ IndirectStubInfo(ExecutorAddr StubAddress, ExecutorAddr PointerAddress)
: StubAddress(StubAddress), PointerAddress(PointerAddress) {}
- JITTargetAddress StubAddress = 0;
- JITTargetAddress PointerAddress = 0;
+ ExecutorAddr StubAddress;
+ ExecutorAddr PointerAddress;
};
using IndirectStubInfoVector = std::vector<IndirectStubInfo>;
@@ -148,7 +150,7 @@ private:
std::mutex EPCUIMutex;
ExecutorProcessControl &EPC;
std::unique_ptr<ABISupport> ABI;
- JITTargetAddress ResolverBlockAddr = 0;
+ ExecutorAddr ResolverBlockAddr;
FinalizedAlloc ResolverBlock;
std::unique_ptr<TrampolinePool> TP;
std::unique_ptr<LazyCallThroughManager> LCTM;
@@ -181,16 +183,16 @@ public:
ORCABI::ResolverCodeSize) {}
void writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddr,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr) const override {
+ ExecutorAddr ResolverTargetAddr,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr) const override {
ORCABI::writeResolverCode(ResolverWorkingMem, ResolverTargetAddr,
ReentryFnAddr, ReentryCtxAddr);
}
void writeTrampolines(char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddr,
- JITTargetAddress ResolverAddr,
+ ExecutorAddr TrampolineBlockTargetAddr,
+ ExecutorAddr ResolverAddr,
unsigned NumTrampolines) const override {
ORCABI::writeTrampolines(TrampolineBlockWorkingMem,
TrampolineBlockTargetAddr, ResolverAddr,
@@ -198,8 +200,8 @@ public:
}
void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
- JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress,
+ ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress,
unsigned NumStubs) const override {
ORCABI::writeIndirectStubsBlock(StubsBlockWorkingMem,
StubsBlockTargetAddress,
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
index 812313b68c7e..2f13560061c4 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
@@ -36,6 +36,10 @@ class Function;
class Module;
class Value;
+namespace object {
+class MachOUniversalBinary;
+}
+
namespace orc {
class ObjectLayer;
@@ -176,10 +180,6 @@ public:
void runDestructors();
protected:
- template <typename PtrTy> JITTargetAddress toTargetAddress(PtrTy *P) {
- return static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(P));
- }
-
using DestructorPtr = void (*)(void *);
using CXXDestructorDataPair = std::pair<DestructorPtr, void *>;
using CXXDestructorDataPairList = std::vector<CXXDestructorDataPair>;
@@ -267,23 +267,26 @@ public:
/// Try to create a StaticLibraryDefinitionGenerator from the given path.
///
/// This call will succeed if the file at the given path is a static library
- /// is a valid archive, otherwise it will return an error.
+ /// or a MachO universal binary containing a static library that is compatible
+ /// with the ExecutionSession's triple. Otherwise it will return an error.
static Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
Load(ObjectLayer &L, const char *FileName,
GetObjectFileInterface GetObjFileInterface = GetObjectFileInterface());
- /// Try to create a StaticLibraryDefinitionGenerator from the given path.
- ///
- /// This call will succeed if the file at the given path is a static library
- /// or a MachO universal binary containing a static library that is compatible
- /// with the given triple. Otherwise it will return an error.
+ /// Try to create a StaticLibrarySearchGenerator from the given memory buffer
+ /// and Archive object.
static Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
- Load(ObjectLayer &L, const char *FileName, const Triple &TT,
- GetObjectFileInterface GetObjFileInterface = GetObjectFileInterface());
+ Create(ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer,
+ std::unique_ptr<object::Archive> Archive,
+ GetObjectFileInterface GetObjFileInterface = GetObjectFileInterface());
/// Try to create a StaticLibrarySearchGenerator from the given memory buffer.
/// This call will succeed if the buffer contains a valid archive, otherwise
/// it will return an error.
+ ///
+ /// This call will succeed if the buffer contains a valid static library or a
+ /// MachO universal binary containing a static library that is compatible
+ /// with the ExecutionSession's triple. Otherwise it will return an error.
static Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
Create(ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer,
GetObjectFileInterface GetObjFileInterface = GetObjectFileInterface());
@@ -302,11 +305,14 @@ public:
private:
StaticLibraryDefinitionGenerator(ObjectLayer &L,
std::unique_ptr<MemoryBuffer> ArchiveBuffer,
+ std::unique_ptr<object::Archive> Archive,
GetObjectFileInterface GetObjFileInterface,
Error &Err);
-
Error buildObjectFilesMap();
+ static Expected<std::pair<size_t, size_t>>
+ getSliceRangeForArch(object::MachOUniversalBinary &UB, const Triple &TT);
+
ObjectLayer &L;
GetObjectFileInterface GetObjFileInterface;
std::set<std::string> ImportedDynamicLibraries;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
index f858788af2ab..a16a3b9f92a1 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ExecutorProcessControl.h
@@ -14,7 +14,6 @@
#define LLVM_EXECUTIONENGINE_ORC_EXECUTORPROCESSCONTROL_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
@@ -23,6 +22,7 @@
#include "llvm/ExecutionEngine/Orc/TaskDispatch.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/MSVCErrorWorkarounds.h"
+#include "llvm/TargetParser/Triple.h"
#include <future>
#include <mutex>
@@ -218,6 +218,33 @@ public:
return *MemMgr;
}
+ /// Returns the bootstrap map.
+ const StringMap<std::vector<char>> &getBootstrapMap() const {
+ return BootstrapMap;
+ }
+
+ /// Look up and SPS-deserialize a bootstrap map value.
+ ///
+ ///
+ template <typename T, typename SPSTagT>
+ Error getBootstrapMapValue(StringRef Key, std::optional<T> &Val) const {
+ Val = std::nullopt;
+
+ auto I = BootstrapMap.find(Key);
+ if (I == BootstrapMap.end())
+ return Error::success();
+
+ T Tmp;
+ shared::SPSInputBuffer IB(I->second.data(), I->second.size());
+ if (!shared::SPSArgList<SPSTagT>::deserialize(IB, Tmp))
+ return make_error<StringError>("Could not deserialize value for key " +
+ Key,
+ inconvertibleErrorCode());
+
+ Val = std::move(Tmp);
+ return Error::success();
+ }
+
/// Returns the bootstrap symbol map.
const StringMap<ExecutorAddr> &getBootstrapSymbolsMap() const {
return BootstrapSymbols;
@@ -372,6 +399,7 @@ protected:
JITDispatchInfo JDI;
MemoryAccess *MemAccess = nullptr;
jitlink::JITLinkMemoryManager *MemMgr = nullptr;
+ StringMap<std::vector<char>> BootstrapMap;
StringMap<ExecutorAddr> BootstrapSymbols;
};
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index d659d6ae6b49..3b9ba55ef9c4 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -63,17 +63,17 @@ namespace orc {
class TrampolinePool {
public:
using NotifyLandingResolvedFunction =
- unique_function<void(JITTargetAddress) const>;
+ unique_function<void(ExecutorAddr) const>;
using ResolveLandingFunction = unique_function<void(
- JITTargetAddress TrampolineAddr,
+ ExecutorAddr TrampolineAddr,
NotifyLandingResolvedFunction OnLandingResolved) const>;
virtual ~TrampolinePool();
/// Get an available trampoline address.
/// Returns an error if no trampoline can be created.
- Expected<JITTargetAddress> getTrampoline() {
+ Expected<ExecutorAddr> getTrampoline() {
std::lock_guard<std::mutex> Lock(TPMutex);
if (AvailableTrampolines.empty()) {
if (auto Err = grow())
@@ -86,7 +86,7 @@ public:
}
/// Returns the given trampoline to the pool for re-use.
- void releaseTrampoline(JITTargetAddress TrampolineAddr) {
+ void releaseTrampoline(ExecutorAddr TrampolineAddr) {
std::lock_guard<std::mutex> Lock(TPMutex);
AvailableTrampolines.push_back(TrampolineAddr);
}
@@ -95,7 +95,7 @@ protected:
virtual Error grow() = 0;
std::mutex TPMutex;
- std::vector<JITTargetAddress> AvailableTrampolines;
+ std::vector<ExecutorAddr> AvailableTrampolines;
};
/// A trampoline pool for trampolines within the current process.
@@ -121,14 +121,14 @@ private:
LocalTrampolinePool<ORCABI> *TrampolinePool =
static_cast<LocalTrampolinePool *>(TrampolinePoolPtr);
- std::promise<JITTargetAddress> LandingAddressP;
+ std::promise<ExecutorAddr> LandingAddressP;
auto LandingAddressF = LandingAddressP.get_future();
- TrampolinePool->ResolveLanding(pointerToJITTargetAddress(TrampolineId),
- [&](JITTargetAddress LandingAddress) {
+ TrampolinePool->ResolveLanding(ExecutorAddr::fromPtr(TrampolineId),
+ [&](ExecutorAddr LandingAddress) {
LandingAddressP.set_value(LandingAddress);
});
- return LandingAddressF.get();
+ return LandingAddressF.get().getValue();
}
LocalTrampolinePool(ResolveLandingFunction ResolveLanding, Error &Err)
@@ -147,9 +147,9 @@ private:
}
ORCABI::writeResolverCode(static_cast<char *>(ResolverBlock.base()),
- pointerToJITTargetAddress(ResolverBlock.base()),
- pointerToJITTargetAddress(&reenter),
- pointerToJITTargetAddress(this));
+ ExecutorAddr::fromPtr(ResolverBlock.base()),
+ ExecutorAddr::fromPtr(&reenter),
+ ExecutorAddr::fromPtr(this));
EC = sys::Memory::protectMappedMemory(ResolverBlock.getMemoryBlock(),
sys::Memory::MF_READ |
@@ -177,12 +177,12 @@ private:
char *TrampolineMem = static_cast<char *>(TrampolineBlock.base());
ORCABI::writeTrampolines(
- TrampolineMem, pointerToJITTargetAddress(TrampolineMem),
- pointerToJITTargetAddress(ResolverBlock.base()), NumTrampolines);
+ TrampolineMem, ExecutorAddr::fromPtr(TrampolineMem),
+ ExecutorAddr::fromPtr(ResolverBlock.base()), NumTrampolines);
for (unsigned I = 0; I < NumTrampolines; ++I)
- AvailableTrampolines.push_back(pointerToJITTargetAddress(
- TrampolineMem + (I * ORCABI::TrampolineSize)));
+ AvailableTrampolines.push_back(
+ ExecutorAddr::fromPtr(TrampolineMem + (I * ORCABI::TrampolineSize)));
if (auto EC = sys::Memory::protectMappedMemory(
TrampolineBlock.getMemoryBlock(),
@@ -202,22 +202,22 @@ private:
/// Target-independent base class for compile callback management.
class JITCompileCallbackManager {
public:
- using CompileFunction = std::function<JITTargetAddress()>;
+ using CompileFunction = std::function<ExecutorAddr()>;
virtual ~JITCompileCallbackManager() = default;
/// Reserve a compile callback.
- Expected<JITTargetAddress> getCompileCallback(CompileFunction Compile);
+ Expected<ExecutorAddr> getCompileCallback(CompileFunction Compile);
/// Execute the callback for the given trampoline id. Called by the JIT
/// to compile functions on demand.
- JITTargetAddress executeCompileCallback(JITTargetAddress TrampolineAddr);
+ ExecutorAddr executeCompileCallback(ExecutorAddr TrampolineAddr);
protected:
/// Construct a JITCompileCallbackManager.
JITCompileCallbackManager(std::unique_ptr<TrampolinePool> TP,
ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddress)
+ ExecutorAddr ErrorHandlerAddress)
: TP(std::move(TP)), ES(ES),
CallbacksJD(ES.createBareJITDylib("<Callbacks>")),
ErrorHandlerAddress(ErrorHandlerAddress) {}
@@ -231,8 +231,8 @@ private:
std::unique_ptr<TrampolinePool> TP;
ExecutionSession &ES;
JITDylib &CallbacksJD;
- JITTargetAddress ErrorHandlerAddress;
- std::map<JITTargetAddress, SymbolStringPtr> AddrToSymbol;
+ ExecutorAddr ErrorHandlerAddress;
+ std::map<ExecutorAddr, SymbolStringPtr> AddrToSymbol;
size_t NextCallbackId = 0;
};
@@ -242,7 +242,7 @@ class LocalJITCompileCallbackManager : public JITCompileCallbackManager {
public:
/// Create a new LocalJITCompileCallbackManager.
static Expected<std::unique_ptr<LocalJITCompileCallbackManager>>
- Create(ExecutionSession &ES, JITTargetAddress ErrorHandlerAddress) {
+ Create(ExecutionSession &ES, ExecutorAddr ErrorHandlerAddress) {
Error Err = Error::success();
auto CCMgr = std::unique_ptr<LocalJITCompileCallbackManager>(
new LocalJITCompileCallbackManager(ES, ErrorHandlerAddress, Err));
@@ -256,15 +256,14 @@ private:
/// @param ErrorHandlerAddress The address of an error handler in the target
/// process to be used if a compile callback fails.
LocalJITCompileCallbackManager(ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddress,
- Error &Err)
+ ExecutorAddr ErrorHandlerAddress, Error &Err)
: JITCompileCallbackManager(nullptr, ES, ErrorHandlerAddress) {
using NotifyLandingResolvedFunction =
TrampolinePool::NotifyLandingResolvedFunction;
ErrorAsOutParameter _(&Err);
auto TP = LocalTrampolinePool<ORCABI>::Create(
- [this](JITTargetAddress TrampolineAddr,
+ [this](ExecutorAddr TrampolineAddr,
NotifyLandingResolvedFunction NotifyLandingResolved) {
NotifyLandingResolved(executeCompileCallback(TrampolineAddr));
});
@@ -282,12 +281,12 @@ private:
class IndirectStubsManager {
public:
/// Map type for initializing the manager. See init.
- using StubInitsMap = StringMap<std::pair<JITTargetAddress, JITSymbolFlags>>;
+ using StubInitsMap = StringMap<std::pair<ExecutorAddr, JITSymbolFlags>>;
virtual ~IndirectStubsManager() = default;
/// Create a single stub with the given name, target address and flags.
- virtual Error createStub(StringRef StubName, JITTargetAddress StubAddr,
+ virtual Error createStub(StringRef StubName, ExecutorAddr StubAddr,
JITSymbolFlags StubFlags) = 0;
/// Create StubInits.size() stubs with the given names, target
@@ -297,13 +296,14 @@ public:
/// Find the stub with the given name. If ExportedStubsOnly is true,
/// this will only return a result if the stub's flags indicate that it
/// is exported.
- virtual JITEvaluatedSymbol findStub(StringRef Name, bool ExportedStubsOnly) = 0;
+ virtual ExecutorSymbolDef findStub(StringRef Name,
+ bool ExportedStubsOnly) = 0;
/// Find the implementation-pointer for the stub.
- virtual JITEvaluatedSymbol findPointer(StringRef Name) = 0;
+ virtual ExecutorSymbolDef findPointer(StringRef Name) = 0;
/// Change the value of the implementation pointer for the stub.
- virtual Error updatePointer(StringRef Name, JITTargetAddress NewAddr) = 0;
+ virtual Error updatePointer(StringRef Name, ExecutorAddr NewAddr) = 0;
private:
virtual void anchor();
@@ -334,10 +334,10 @@ public:
sys::MemoryBlock StubsBlock(StubsAndPtrsMem.base(), ISAS.StubBytes);
auto StubsBlockMem = static_cast<char *>(StubsAndPtrsMem.base());
auto PtrBlockAddress =
- pointerToJITTargetAddress(StubsBlockMem) + ISAS.StubBytes;
+ ExecutorAddr::fromPtr(StubsBlockMem) + ISAS.StubBytes;
ORCABI::writeIndirectStubsBlock(StubsBlockMem,
- pointerToJITTargetAddress(StubsBlockMem),
+ ExecutorAddr::fromPtr(StubsBlockMem),
PtrBlockAddress, ISAS.NumStubs);
if (auto EC = sys::Memory::protectMappedMemory(
@@ -369,7 +369,7 @@ private:
template <typename TargetT>
class LocalIndirectStubsManager : public IndirectStubsManager {
public:
- Error createStub(StringRef StubName, JITTargetAddress StubAddr,
+ Error createStub(StringRef StubName, ExecutorAddr StubAddr,
JITSymbolFlags StubFlags) override {
std::lock_guard<std::mutex> Lock(StubsMutex);
if (auto Err = reserveStubs(1))
@@ -392,36 +392,34 @@ public:
return Error::success();
}
- JITEvaluatedSymbol findStub(StringRef Name, bool ExportedStubsOnly) override {
+ ExecutorSymbolDef findStub(StringRef Name, bool ExportedStubsOnly) override {
std::lock_guard<std::mutex> Lock(StubsMutex);
auto I = StubIndexes.find(Name);
if (I == StubIndexes.end())
- return nullptr;
+ return ExecutorSymbolDef();
auto Key = I->second.first;
- void *StubAddr = IndirectStubsInfos[Key.first].getStub(Key.second);
- assert(StubAddr && "Missing stub address");
- auto StubTargetAddr =
- static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(StubAddr));
- auto StubSymbol = JITEvaluatedSymbol(StubTargetAddr, I->second.second);
+ void *StubPtr = IndirectStubsInfos[Key.first].getStub(Key.second);
+ assert(StubPtr && "Missing stub address");
+ auto StubAddr = ExecutorAddr::fromPtr(StubPtr);
+ auto StubSymbol = ExecutorSymbolDef(StubAddr, I->second.second);
if (ExportedStubsOnly && !StubSymbol.getFlags().isExported())
- return nullptr;
+ return ExecutorSymbolDef();
return StubSymbol;
}
- JITEvaluatedSymbol findPointer(StringRef Name) override {
+ ExecutorSymbolDef findPointer(StringRef Name) override {
std::lock_guard<std::mutex> Lock(StubsMutex);
auto I = StubIndexes.find(Name);
if (I == StubIndexes.end())
- return nullptr;
+ return ExecutorSymbolDef();
auto Key = I->second.first;
- void *PtrAddr = IndirectStubsInfos[Key.first].getPtr(Key.second);
- assert(PtrAddr && "Missing pointer address");
- auto PtrTargetAddr =
- static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(PtrAddr));
- return JITEvaluatedSymbol(PtrTargetAddr, I->second.second);
+ void *PtrPtr = IndirectStubsInfos[Key.first].getPtr(Key.second);
+ assert(PtrPtr && "Missing pointer address");
+ auto PtrAddr = ExecutorAddr::fromPtr(PtrPtr);
+ return ExecutorSymbolDef(PtrAddr, I->second.second);
}
- Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override {
+ Error updatePointer(StringRef Name, ExecutorAddr NewAddr) override {
using AtomicIntPtr = std::atomic<uintptr_t>;
std::lock_guard<std::mutex> Lock(StubsMutex);
@@ -430,7 +428,7 @@ public:
auto Key = I->second.first;
AtomicIntPtr *AtomicStubPtr = reinterpret_cast<AtomicIntPtr *>(
IndirectStubsInfos[Key.first].getPtr(Key.second));
- *AtomicStubPtr = static_cast<uintptr_t>(NewAddr);
+ *AtomicStubPtr = static_cast<uintptr_t>(NewAddr.getValue());
return Error::success();
}
@@ -451,12 +449,12 @@ private:
return Error::success();
}
- void createStubInternal(StringRef StubName, JITTargetAddress InitAddr,
+ void createStubInternal(StringRef StubName, ExecutorAddr InitAddr,
JITSymbolFlags StubFlags) {
auto Key = FreeStubs.back();
FreeStubs.pop_back();
*IndirectStubsInfos[Key.first].getPtr(Key.second) =
- jitTargetAddressToPointer<void *>(InitAddr);
+ InitAddr.toPtr<void *>();
StubIndexes[StubName] = std::make_pair(Key, StubFlags);
}
@@ -475,7 +473,7 @@ private:
/// manager if a compile callback fails.
Expected<std::unique_ptr<JITCompileCallbackManager>>
createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddress);
+ ExecutorAddr ErrorHandlerAddress);
/// Create a local indriect stubs manager builder.
///
@@ -488,7 +486,7 @@ createLocalIndirectStubsManagerBuilder(const Triple &T);
///
/// Usage example: Turn a trampoline address into a function pointer constant
/// for use in a stub.
-Constant *createIRTypedAddress(FunctionType &FT, JITTargetAddress Addr);
+Constant *createIRTypedAddress(FunctionType &FT, ExecutorAddr Addr);
/// Create a function pointer with the given type, name, and initializer
/// in the given Module.
@@ -515,7 +513,7 @@ private:
/// Clone a function declaration into a new module.
///
/// This function can be used as the first step towards creating a callback
-/// stub (see makeStub), or moving a function body (see moveFunctionBody).
+/// stub (see makeStub).
///
/// If the VMap argument is non-null, a mapping will be added between F and
/// the new declaration, and between each of F's arguments and the new
@@ -527,43 +525,14 @@ private:
Function *cloneFunctionDecl(Module &Dst, const Function &F,
ValueToValueMapTy *VMap = nullptr);
-/// Move the body of function 'F' to a cloned function declaration in a
-/// different module (See related cloneFunctionDecl).
-///
-/// If the target function declaration is not supplied via the NewF parameter
-/// then it will be looked up via the VMap.
-///
-/// This will delete the body of function 'F' from its original parent module,
-/// but leave its declaration.
-void moveFunctionBody(Function &OrigF, ValueToValueMapTy &VMap,
- ValueMaterializer *Materializer = nullptr,
- Function *NewF = nullptr);
-
/// Clone a global variable declaration into a new module.
GlobalVariable *cloneGlobalVariableDecl(Module &Dst, const GlobalVariable &GV,
ValueToValueMapTy *VMap = nullptr);
-/// Move global variable GV from its parent module to cloned global
-/// declaration in a different module.
-///
-/// If the target global declaration is not supplied via the NewGV parameter
-/// then it will be looked up via the VMap.
-///
-/// This will delete the initializer of GV from its original parent module,
-/// but leave its declaration.
-void moveGlobalVariableInitializer(GlobalVariable &OrigGV,
- ValueToValueMapTy &VMap,
- ValueMaterializer *Materializer = nullptr,
- GlobalVariable *NewGV = nullptr);
-
/// Clone a global alias declaration into a new module.
GlobalAlias *cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
ValueToValueMapTy &VMap);
-/// Clone module flags metadata into the destination module.
-void cloneModuleFlagsMetadata(Module &Dst, const Module &Src,
- ValueToValueMapTy &VMap);
-
/// Introduce relocations to \p Sym in its own definition if there are any
/// pointers formed via PC-relative address that do not already have a
/// relocation.
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h b/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
index 8865d301e79e..0e72194aec9b 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h
@@ -13,12 +13,12 @@
#ifndef LLVM_EXECUTIONENGINE_ORC_JITTARGETMACHINEBUILDER_H
#define LLVM_EXECUTIONENGINE_ORC_JITTARGETMACHINEBUILDER_H
-#include "llvm/ADT/Triple.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Error.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
#include <memory>
#include <optional>
#include <string>
@@ -38,18 +38,16 @@ class JITTargetMachineBuilder {
public:
/// Create a JITTargetMachineBuilder based on the given triple.
///
- /// Note: TargetOptions is default-constructed, then EmulatedTLS and
- /// ExplicitEmulatedTLS are set to true. If EmulatedTLS is not
- /// required, these values should be reset before calling
- /// createTargetMachine.
+ /// Note: TargetOptions is default-constructed, then EmulatedTLS is set to
+ /// true. If EmulatedTLS is not required, these values should be reset before
+ /// calling createTargetMachine.
JITTargetMachineBuilder(Triple TT);
/// Create a JITTargetMachineBuilder for the host system.
///
- /// Note: TargetOptions is default-constructed, then EmulatedTLS and
- /// ExplicitEmulatedTLS are set to true. If EmulatedTLS is not
- /// required, these values should be reset before calling
- /// createTargetMachine.
+ /// Note: TargetOptions is default-constructed, then EmulatedTLS is set to
+ /// true. If EmulatedTLS is not required, these values should be reset before
+ /// calling createTargetMachine.
static Expected<JITTargetMachineBuilder> detectHost();
/// Create a TargetMachine.
@@ -125,9 +123,9 @@ public:
/// Set TargetOptions.
///
/// Note: This operation will overwrite any previously configured options,
- /// including EmulatedTLS, ExplicitEmulatedTLS, and UseInitArray which
- /// the JITTargetMachineBuilder sets by default. Clients are responsible
- /// for re-enabling these overwritten options.
+ /// including EmulatedTLS and UseInitArray which the JITTargetMachineBuilder
+ /// sets by default. Clients are responsible for re-enabling these overwritten
+ /// options.
JITTargetMachineBuilder &setOptions(TargetOptions Options) {
this->Options = std::move(Options);
return *this;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
index 2982a7af09b0..d1affd9d2eb3 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -22,6 +22,7 @@
#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ThreadPool.h"
+#include <variant>
namespace llvm {
namespace orc {
@@ -37,7 +38,7 @@ class ExecutorProcessControl;
class LLJIT {
template <typename, typename, typename> friend class LLJITBuilderSetters;
- friend void setUpGenericLLVMIRPlatform(LLJIT &J);
+ friend Expected<JITDylibSP> setUpGenericLLVMIRPlatform(LLJIT &J);
public:
/// Initializer support for LLJIT.
@@ -70,21 +71,64 @@ public:
/// Returns a reference to the JITDylib representing the JIT'd main program.
JITDylib &getMainJITDylib() { return *Main; }
+ /// Returns the ProcessSymbols JITDylib, which by default reflects non-JIT'd
+ /// symbols in the host process.
+ ///
+ /// Note: JIT'd code should not be added to the ProcessSymbols JITDylib. Use
+ /// the main JITDylib or a custom JITDylib instead.
+ JITDylibSP getProcessSymbolsJITDylib();
+
+ /// Returns the Platform JITDylib, which will contain the ORC runtime (if
+ /// given) and any platform symbols.
+ ///
+ /// Note: JIT'd code should not be added to the Platform JITDylib. Use the
+ /// main JITDylib or a custom JITDylib instead.
+ JITDylibSP getPlatformJITDylib();
+
/// Returns the JITDylib with the given name, or nullptr if no JITDylib with
/// that name exists.
JITDylib *getJITDylibByName(StringRef Name) {
return ES->getJITDylibByName(Name);
}
+ /// Load a (real) dynamic library and make its symbols available through a
+ /// new JITDylib with the same name.
+ ///
+ /// If the given *executor* path contains a valid platform dynamic library
+ /// then that library will be loaded, and a new bare JITDylib whose name is
+ /// the given path will be created to make the library's symbols available to
+ /// JIT'd code.
+ Expected<JITDylib &> loadPlatformDynamicLibrary(const char *Path);
+
+ /// Link a static library into the given JITDylib.
+ ///
+ /// If the given MemoryBuffer contains a valid static archive (or a universal
+ /// binary with an archive slice that fits the LLJIT instance's platform /
+ /// architecture) then it will be added to the given JITDylib using a
+ /// StaticLibraryDefinitionGenerator.
+ Error linkStaticLibraryInto(JITDylib &JD,
+ std::unique_ptr<MemoryBuffer> LibBuffer);
+
+ /// Link a static library into the given JITDylib.
+ ///
+ /// If the given *host* path contains a valid static archive (or a universal
+ /// binary with an archive slice that fits the LLJIT instance's platform /
+ /// architecture) then it will be added to the given JITDylib using a
+ /// StaticLibraryDefinitionGenerator.
+ Error linkStaticLibraryInto(JITDylib &JD, const char *Path);
+
/// Create a new JITDylib with the given name and return a reference to it.
///
/// JITDylib names must be unique. If the given name is derived from user
/// input or elsewhere in the environment then the client should check
/// (e.g. by calling getJITDylibByName) that the given name is not already in
/// use.
- Expected<JITDylib &> createJITDylib(std::string Name) {
- return ES->createJITDylib(std::move(Name));
- }
+ Expected<JITDylib &> createJITDylib(std::string Name);
+
+ /// Returns the default link order for this LLJIT instance. This link order
+ /// will be appended to the link order of JITDylibs created by LLJIT's
+ /// createJITDylib method.
+ JITDylibSearchOrder defaultLinkOrder() { return DefaultLinks; }
/// Adds an IR module with the given ResourceTracker.
Error addIRModule(ResourceTrackerSP RT, ThreadSafeModule TSM);
@@ -202,8 +246,12 @@ protected:
std::unique_ptr<ExecutionSession> ES;
std::unique_ptr<PlatformSupport> PS;
+ JITDylib *ProcessSymbols = nullptr;
+ JITDylib *Platform = nullptr;
JITDylib *Main = nullptr;
+ JITDylibSearchOrder DefaultLinks;
+
DataLayout DL;
Triple TT;
std::unique_ptr<ThreadPool> CompileThreads;
@@ -258,16 +306,22 @@ public:
std::function<Expected<std::unique_ptr<IRCompileLayer::IRCompiler>>(
JITTargetMachineBuilder JTMB)>;
- using PlatformSetupFunction = std::function<Error(LLJIT &J)>;
+ using ProcessSymbolsJITDylibSetupFunction =
+ std::function<Error(JITDylib &JD)>;
+
+ using PlatformSetupFunction = unique_function<Expected<JITDylibSP>(LLJIT &J)>;
std::unique_ptr<ExecutorProcessControl> EPC;
std::unique_ptr<ExecutionSession> ES;
std::optional<JITTargetMachineBuilder> JTMB;
std::optional<DataLayout> DL;
+ bool LinkProcessSymbolsByDefault = true;
+ ProcessSymbolsJITDylibSetupFunction SetupProcessSymbolsJITDylib;
ObjectLinkingLayerCreator CreateObjectLinkingLayer;
CompileFunctionCreator CreateCompileFunction;
PlatformSetupFunction SetUpPlatform;
unsigned NumCompileThreads = 0;
+ bool EnableDebuggerSupport = false;
/// Called prior to JIT class construcion to fix up defaults.
Error prepareForConstruction();
@@ -290,6 +344,10 @@ public:
/// Set an ExecutionSession for this instance.
SetterImpl &setExecutionSession(std::unique_ptr<ExecutionSession> ES) {
+ assert(
+ !impl().EPC &&
+ "setExecutionSession should not be called if an ExecutorProcessControl "
+ "object has already been set");
impl().ES = std::move(ES);
return impl();
}
@@ -316,6 +374,28 @@ public:
return impl();
}
+ /// The LinkProcessSymbolsDyDefault flag determines whether the "Process"
+ /// JITDylib will be added to the default link order at LLJIT construction
+ /// time. If true, the Process JITDylib will be added as the last item in the
+ /// default link order. If false (or if the Process JITDylib is disabled via
+ /// setProcessSymbolsJITDylibSetup) then the Process JITDylib will not appear
+ /// in the default link order.
+ SetterImpl &setLinkProcessSymbolsByDefault(bool LinkProcessSymbolsByDefault) {
+ impl().LinkProcessSymbolsByDefault = LinkProcessSymbolsByDefault;
+ return impl();
+ }
+
+ /// Set a setup function for the process symbols dylib. If not provided,
+ /// but LinkProcessSymbolsJITDylibByDefault is true, then the process-symbols
+ /// JITDylib will be configured with a DynamicLibrarySearchGenerator with a
+ /// default symbol filter.
+ SetterImpl &setProcessSymbolsJITDylibSetup(
+ LLJITBuilderState::ProcessSymbolsJITDylibSetupFunction
+ SetupProcessSymbolsJITDylib) {
+ impl().SetupProcessSymbolsJITDylib = std::move(SetupProcessSymbolsJITDylib);
+ return impl();
+ }
+
/// Set an ObjectLinkingLayer creation function.
///
/// If this method is not called, a default creation function will be used
@@ -361,6 +441,12 @@ public:
return impl();
}
+ /// Enable / disable debugger support (off by default).
+ SetterImpl &setEnableDebuggerSupport(bool EnableDebuggerSupport) {
+ impl().EnableDebuggerSupport = EnableDebuggerSupport;
+ return impl();
+ }
+
/// Set an ExecutorProcessControl object.
///
/// If the platform uses ObjectLinkingLayer by default and no
@@ -447,20 +533,49 @@ class LLLazyJITBuilder
public LLLazyJITBuilderSetters<LLLazyJIT, LLLazyJITBuilder,
LLLazyJITBuilderState> {};
-/// Configure the LLJIT instance to use orc runtime support.
-Error setUpOrcPlatform(LLJIT& J);
+/// Configure the LLJIT instance to use orc runtime support. This overload
+/// assumes that the client has manually configured a Platform object.
+Error setUpOrcPlatformManually(LLJIT &J);
+
+/// Configure the LLJIT instance to use the ORC runtime and the detected
+/// native target for the executor.
+class ExecutorNativePlatform {
+public:
+ /// Set up using path to Orc runtime.
+ ExecutorNativePlatform(std::string OrcRuntimePath)
+ : OrcRuntime(std::move(OrcRuntimePath)) {}
+
+ /// Set up using the given memory buffer.
+ ExecutorNativePlatform(std::unique_ptr<MemoryBuffer> OrcRuntimeMB)
+ : OrcRuntime(std::move(OrcRuntimeMB)) {}
+
+ // TODO: add compiler-rt.
+
+ /// Add a path to the VC runtime.
+ ExecutorNativePlatform &addVCRuntime(std::string VCRuntimePath,
+ bool StaticVCRuntime) {
+ VCRuntime = {std::move(VCRuntimePath), StaticVCRuntime};
+ return *this;
+ }
+
+ Expected<JITDylibSP> operator()(LLJIT &J);
+
+private:
+ std::variant<std::string, std::unique_ptr<MemoryBuffer>> OrcRuntime;
+ std::optional<std::pair<std::string, bool>> VCRuntime;
+};
/// Configure the LLJIT instance to scrape modules for llvm.global_ctors and
/// llvm.global_dtors variables and (if present) build initialization and
/// deinitialization functions. Platform specific initialization configurations
/// should be preferred where available.
-void setUpGenericLLVMIRPlatform(LLJIT &J);
+Expected<JITDylibSP> setUpGenericLLVMIRPlatform(LLJIT &J);
/// Configure the LLJIT instance to disable platform support explicitly. This is
/// useful in two cases: for platforms that don't have such requirements and for
/// platforms, that we have no explicit support yet and that don't work well
/// with the generic IR platform.
-Error setUpInactivePlatform(LLJIT &J);
+Expected<JITDylibSP> setUpInactivePlatform(LLJIT &J);
} // End namespace orc
} // End namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
index f81cdcef6655..4916460a9b94 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
@@ -38,19 +38,19 @@ namespace orc {
class LazyCallThroughManager {
public:
using NotifyResolvedFunction =
- unique_function<Error(JITTargetAddress ResolvedAddr)>;
+ unique_function<Error(ExecutorAddr ResolvedAddr)>;
- LazyCallThroughManager(ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP);
+ LazyCallThroughManager(ExecutionSession &ES, ExecutorAddr ErrorHandlerAddr,
+ TrampolinePool *TP);
// Return a free call-through trampoline and bind it to look up and call
// through to the given symbol.
- Expected<JITTargetAddress>
+ Expected<ExecutorAddr>
getCallThroughTrampoline(JITDylib &SourceJD, SymbolStringPtr SymbolName,
NotifyResolvedFunction NotifyResolved);
void resolveTrampolineLandingAddress(
- JITTargetAddress TrampolineAddr,
+ ExecutorAddr TrampolineAddr,
TrampolinePool::NotifyLandingResolvedFunction NotifyLandingResolved);
virtual ~LazyCallThroughManager() = default;
@@ -64,20 +64,19 @@ protected:
SymbolStringPtr SymbolName;
};
- JITTargetAddress reportCallThroughError(Error Err);
- Expected<ReexportsEntry> findReexport(JITTargetAddress TrampolineAddr);
- Error notifyResolved(JITTargetAddress TrampolineAddr,
- JITTargetAddress ResolvedAddr);
+ ExecutorAddr reportCallThroughError(Error Err);
+ Expected<ReexportsEntry> findReexport(ExecutorAddr TrampolineAddr);
+ Error notifyResolved(ExecutorAddr TrampolineAddr, ExecutorAddr ResolvedAddr);
void setTrampolinePool(TrampolinePool &TP) { this->TP = &TP; }
private:
- using ReexportsMap = std::map<JITTargetAddress, ReexportsEntry>;
+ using ReexportsMap = std::map<ExecutorAddr, ReexportsEntry>;
- using NotifiersMap = std::map<JITTargetAddress, NotifyResolvedFunction>;
+ using NotifiersMap = std::map<ExecutorAddr, NotifyResolvedFunction>;
std::mutex LCTMMutex;
ExecutionSession &ES;
- JITTargetAddress ErrorHandlerAddr;
+ ExecutorAddr ErrorHandlerAddr;
TrampolinePool *TP = nullptr;
ReexportsMap Reexports;
NotifiersMap Notifiers;
@@ -86,15 +85,15 @@ private:
/// A lazy call-through manager that builds trampolines in the current process.
class LocalLazyCallThroughManager : public LazyCallThroughManager {
private:
- using NotifyTargetResolved = unique_function<void(JITTargetAddress)>;
+ using NotifyTargetResolved = unique_function<void(ExecutorAddr)>;
LocalLazyCallThroughManager(ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddr)
+ ExecutorAddr ErrorHandlerAddr)
: LazyCallThroughManager(ES, ErrorHandlerAddr, nullptr) {}
template <typename ORCABI> Error init() {
auto TP = LocalTrampolinePool<ORCABI>::Create(
- [this](JITTargetAddress TrampolineAddr,
+ [this](ExecutorAddr TrampolineAddr,
TrampolinePool::NotifyLandingResolvedFunction
NotifyLandingResolved) {
resolveTrampolineLandingAddress(TrampolineAddr,
@@ -116,7 +115,7 @@ public:
/// createLocalLazyCallThroughManager.
template <typename ORCABI>
static Expected<std::unique_ptr<LocalLazyCallThroughManager>>
- Create(ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr) {
+ Create(ExecutionSession &ES, ExecutorAddr ErrorHandlerAddr) {
auto LLCTM = std::unique_ptr<LocalLazyCallThroughManager>(
new LocalLazyCallThroughManager(ES, ErrorHandlerAddr));
@@ -131,7 +130,7 @@ public:
/// session.
Expected<std::unique_ptr<LazyCallThroughManager>>
createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddr);
+ ExecutorAddr ErrorHandlerAddr);
/// A materialization unit that builds lazy re-exports. These are callable
/// entry points that call through to the given symbols.
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
index c51608899e04..15dae6f920d5 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h
@@ -79,6 +79,12 @@ public:
/// setting up all aliases (including the required ones).
static Expected<std::unique_ptr<MachOPlatform>>
Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD, std::unique_ptr<DefinitionGenerator> OrcRuntime,
+ std::optional<SymbolAliasMap> RuntimeAliases = std::nullopt);
+
+ /// Construct using a path to the ORC runtime.
+ static Expected<std::unique_ptr<MachOPlatform>>
+ Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
JITDylib &PlatformJD, const char *OrcRuntimePath,
std::optional<SymbolAliasMap> RuntimeAliases = std::nullopt);
@@ -103,9 +109,6 @@ public:
static ArrayRef<std::pair<const char *, const char *>>
standardRuntimeUtilityAliases();
- /// Returns true if the given section name is an initializer section.
- static bool isInitializerSection(StringRef SegName, StringRef SectName);
-
private:
// Data needed for bootstrap only.
struct BootstrapInfo {
@@ -153,17 +156,20 @@ private:
ExecutorAddrRange CompactUnwindSection;
};
+ struct ObjCImageInfo {
+ uint32_t Version = 0;
+ uint32_t Flags = 0;
+ };
+
Error bootstrapPipelineStart(jitlink::LinkGraph &G);
Error bootstrapPipelineRecordRuntimeFunctions(jitlink::LinkGraph &G);
Error bootstrapPipelineEnd(jitlink::LinkGraph &G);
- Error recordRuntimeRegistrationFunctions(jitlink::LinkGraph &G);
-
Error associateJITDylibHeaderSymbol(jitlink::LinkGraph &G,
MaterializationResponsibility &MR);
- Error preserveInitSections(jitlink::LinkGraph &G,
- MaterializationResponsibility &MR);
+ Error preserveImportantSections(jitlink::LinkGraph &G,
+ MaterializationResponsibility &MR);
Error processObjCImageInfo(jitlink::LinkGraph &G,
MaterializationResponsibility &MR);
@@ -175,12 +181,16 @@ private:
Error registerObjectPlatformSections(jitlink::LinkGraph &G, JITDylib &JD,
bool InBootstrapPhase);
+ Error createObjCRuntimeObject(jitlink::LinkGraph &G);
+ Error populateObjCRuntimeObject(jitlink::LinkGraph &G,
+ MaterializationResponsibility &MR);
+
std::mutex PluginMutex;
MachOPlatform &MP;
// FIXME: ObjCImageInfos and HeaderAddrs need to be cleared when
// JITDylibs are removed.
- DenseMap<JITDylib *, std::pair<uint32_t, uint32_t>> ObjCImageInfos;
+ DenseMap<JITDylib *, ObjCImageInfo> ObjCImageInfos;
DenseMap<JITDylib *, ExecutorAddr> HeaderAddrs;
InitSymbolDepMap InitSymbolDeps;
};
@@ -250,6 +260,10 @@ private:
ES.intern("___orc_rt_macho_deregister_object_platform_sections")};
RuntimeFunction CreatePThreadKey{
ES.intern("___orc_rt_macho_create_pthread_key")};
+ RuntimeFunction RegisterObjCRuntimeObject{
+ ES.intern("___orc_rt_macho_register_objc_runtime_object")};
+ RuntimeFunction DeregisterObjCRuntimeObject{
+ ES.intern("___orc_rt_macho_deregister_objc_runtime_object")};
DenseMap<JITDylib *, SymbolLookupSet> RegisteredInitSymbols;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/ObjectFileInterface.h b/llvm/include/llvm/ExecutionEngine/Orc/ObjectFileInterface.h
index 7d8a3ebbe01b..1bf09069163e 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/ObjectFileInterface.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/ObjectFileInterface.h
@@ -32,8 +32,6 @@ void addInitSymbol(MaterializationUnit::Interface &I, ExecutionSession &ES,
Expected<MaterializationUnit::Interface>
getObjectFileInterface(ExecutionSession &ES, MemoryBufferRef ObjBuffer);
-bool hasInitializerSection(jitlink::LinkGraph &G);
-
} // End namespace orc
} // End namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index 304854791278..5d25a3e85464 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -17,7 +17,7 @@
#ifndef LLVM_EXECUTIONENGINE_ORC_ORCABISUPPORT_H
#define LLVM_EXECUTIONENGINE_ORC_ORCABISUPPORT_H
-#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -61,24 +61,25 @@ public:
static constexpr unsigned ResolverCodeSize = 1;
static void writeResolverCode(char *ResolveWorkingMem,
- JITTargetAddress ResolverTargetAddr,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr) {
+ ExecutorAddr ResolverTargetAddr,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr) {
llvm_unreachable("writeResolverCode is not supported by the generic host "
"support class");
}
static void writeTrampolines(char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddr,
- JITTargetAddress ResolverAddr,
+ ExecutorAddr TrampolineBlockTargetAddr,
+ ExecutorAddr ResolverAddr,
unsigned NumTrampolines) {
llvm_unreachable("writeTrampolines is not supported by the generic host "
"support class");
}
- static void writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
+ static void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+ ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress,
+ unsigned NumStubs) {
llvm_unreachable(
"writeIndirectStubsBlock is not supported by the generic host "
"support class");
@@ -101,25 +102,26 @@ public:
/// argument of writeResolverCode will be passed as the second argument to
/// the function at ReentryFnAddr.
static void writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress RentryCtxAddr);
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr RentryCtxAddr);
/// Write the requested number of trampolines into the given memory,
/// which must be big enough to hold 1 pointer, plus NumTrampolines
/// trampolines.
static void writeTrampolines(char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverAddr,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverAddr,
unsigned NumTrampolines);
/// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
/// Stubs will be written as if linked at StubsBlockTargetAddress, with the
/// Nth stub using the Nth pointer in memory starting at
/// PointersBlockTargetAddress.
- static void writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned MinStubs);
+ static void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+ ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress,
+ unsigned MinStubs);
};
/// X86_64 code that's common to all ABIs.
@@ -136,17 +138,18 @@ public:
/// which must be big enough to hold 1 pointer, plus NumTrampolines
/// trampolines.
static void writeTrampolines(char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverAddr,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverAddr,
unsigned NumTrampolines);
/// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
/// Stubs will be written as if linked at StubsBlockTargetAddress, with the
/// Nth stub using the Nth pointer in memory starting at
/// PointersBlockTargetAddress.
- static void writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
+ static void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+ ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress,
+ unsigned NumStubs);
};
/// X86_64 support for SysV ABI (Linux, MacOSX).
@@ -164,9 +167,9 @@ public:
/// argument of writeResolverCode will be passed as the second argument to
/// the function at ReentryFnAddr.
static void writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr);
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr);
};
/// X86_64 support for Win32.
@@ -184,9 +187,9 @@ public:
/// argument of writeResolverCode will be passed as the second argument to
/// the function at ReentryFnAddr.
static void writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr);
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr);
};
/// I386 support.
@@ -208,25 +211,26 @@ public:
/// argument of writeResolverCode will be passed as the second argument to
/// the function at ReentryFnAddr.
static void writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr);
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr);
/// Write the requested number of trampolines into the given memory,
/// which must be big enough to hold 1 pointer, plus NumTrampolines
/// trampolines.
static void writeTrampolines(char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverAddr,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverAddr,
unsigned NumTrampolines);
/// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
/// Stubs will be written as if linked at StubsBlockTargetAddress, with the
/// Nth stub using the Nth pointer in memory starting at
/// PointersBlockTargetAddress.
- static void writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
+ static void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+ ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress,
+ unsigned NumStubs);
};
// @brief Mips32 support.
@@ -244,8 +248,8 @@ public:
/// which must be big enough to hold 1 pointer, plus NumTrampolines
/// trampolines.
static void writeTrampolines(char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverAddr,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverAddr,
unsigned NumTrampolines);
/// Write the resolver code into the given memory. The user is
@@ -256,25 +260,25 @@ public:
/// argument of writeResolverCode will be passed as the second argument to
/// the function at ReentryFnAddr.
static void writeResolverCode(char *ResolverBlockWorkingMem,
- JITTargetAddress ResolverBlockTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr,
- bool isBigEndian);
+ ExecutorAddr ResolverBlockTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr, bool isBigEndian);
/// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
/// Stubs will be written as if linked at StubsBlockTargetAddress, with the
/// Nth stub using the Nth pointer in memory starting at
/// PointersBlockTargetAddress.
- static void writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
+ static void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+ ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress,
+ unsigned NumStubs);
};
class OrcMips32Le : public OrcMips32_Base {
public:
static void writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr) {
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr) {
OrcMips32_Base::writeResolverCode(ResolverWorkingMem, ResolverTargetAddress,
ReentryFnAddr, ReentryCtxAddr, false);
}
@@ -283,9 +287,9 @@ public:
class OrcMips32Be : public OrcMips32_Base {
public:
static void writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr) {
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr) {
OrcMips32_Base::writeResolverCode(ResolverWorkingMem, ResolverTargetAddress,
ReentryFnAddr, ReentryCtxAddr, true);
}
@@ -310,24 +314,25 @@ public:
/// argument of writeResolverCode will be passed as the second argument to
/// the function at ReentryFnAddr.
static void writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr);
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr);
/// Write the requested number of trampolines into the given memory,
/// which must be big enough to hold 1 pointer, plus NumTrampolines
/// trampolines.
static void writeTrampolines(char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverFnAddr,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverFnAddr,
unsigned NumTrampolines);
/// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
/// Stubs will be written as if linked at StubsBlockTargetAddress, with the
/// Nth stub using the Nth pointer in memory starting at
/// PointersBlockTargetAddress.
- static void writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
+ static void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+ ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress,
+ unsigned NumStubs);
};
// @brief riscv64 support.
@@ -349,24 +354,25 @@ public:
/// argument of writeResolverCode will be passed as the second argument to
/// the function at ReentryFnAddr.
static void writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr);
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr);
/// Write the requested number of trampolines into the given memory,
/// which must be big enough to hold 1 pointer, plus NumTrampolines
/// trampolines.
static void writeTrampolines(char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverFnAddr,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverFnAddr,
unsigned NumTrampolines);
/// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
/// Stubs will be written as if linked at StubsBlockTargetAddress, with the
/// Nth stub using the Nth pointer in memory starting at
/// PointersBlockTargetAddress.
- static void writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
+ static void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+ ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress,
+ unsigned NumStubs);
};
// @brief loongarch64 support.
@@ -388,25 +394,26 @@ public:
/// argument of writeResolverCode will be passed as the second argument to
/// the function at ReentryFnAddr.
static void writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr);
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr);
/// Write the requested number of trampolines into the given memory,
/// which must be big enough to hold 1 pointer, plus NumTrampolines
/// trampolines.
static void writeTrampolines(char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverFnAddr,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverFnAddr,
unsigned NumTrampolines);
/// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
/// Stubs will be written as if linked at StubsBlockTargetAddress, with the
/// Nth stub using the Nth pointer in memory starting at
/// PointersBlockTargetAddress.
- static void writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
+ static void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+ ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress,
+ unsigned NumStubs);
};
} // end namespace orc
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
index f6673b18cb5a..b7b98d55cc65 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h
@@ -206,6 +206,27 @@ struct ExecutorAddrRange {
const ExecutorAddrRange &RHS) {
return !(LHS == RHS);
}
+ friend bool operator<(const ExecutorAddrRange &LHS,
+ const ExecutorAddrRange &RHS) {
+ return LHS.Start < RHS.Start ||
+ (LHS.Start == RHS.Start && LHS.End < RHS.End);
+ }
+ friend bool operator<=(const ExecutorAddrRange &LHS,
+ const ExecutorAddrRange &RHS) {
+ return LHS.Start < RHS.Start ||
+ (LHS.Start == RHS.Start && LHS.End <= RHS.End);
+ }
+ friend bool operator>(const ExecutorAddrRange &LHS,
+ const ExecutorAddrRange &RHS) {
+ return LHS.Start > RHS.Start ||
+ (LHS.Start == RHS.Start && LHS.End > RHS.End);
+ }
+ friend bool operator>=(const ExecutorAddrRange &LHS,
+ const ExecutorAddrRange &RHS) {
+ return LHS.Start > RHS.Start ||
+ (LHS.Start == RHS.Start && LHS.End >= RHS.End);
+ }
+
bool contains(ExecutorAddr Addr) const { return Start <= Addr && Addr < End; }
bool overlaps(const ExecutorAddrRange &Other) {
return !(Other.End <= Start || End <= Other.Start);
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h
new file mode 100644
index 000000000000..5c58a7255ebd
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ExecutorSymbolDef.h
@@ -0,0 +1,54 @@
+//===--------- ExecutorSymbolDef.h - (Addr, Flags) pair ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Represents a defining location for a JIT symbol.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_EXECUTORSYMBOLDEF_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_EXECUTORSYMBOLDEF_H
+
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h"
+
+namespace llvm {
+namespace orc {
+
+/// Represents a defining location for a JIT symbol.
+class ExecutorSymbolDef {
+public:
+ ExecutorSymbolDef() = default;
+ ExecutorSymbolDef(ExecutorAddr Addr, JITSymbolFlags Flags)
+ : Addr(Addr), Flags(Flags) {}
+
+ const ExecutorAddr &getAddress() const { return Addr; }
+
+ const JITSymbolFlags &getFlags() const { return Flags; }
+
+ void setFlags(JITSymbolFlags Flags) { this->Flags = Flags; }
+
+ friend bool operator==(const ExecutorSymbolDef &LHS,
+ const ExecutorSymbolDef &RHS) {
+ return LHS.getAddress() == RHS.getAddress() &&
+ LHS.getFlags() == RHS.getFlags();
+ }
+
+ friend bool operator!=(const ExecutorSymbolDef &LHS,
+ const ExecutorSymbolDef &RHS) {
+ return !(LHS == RHS);
+ }
+
+private:
+ ExecutorAddr Addr;
+ JITSymbolFlags Flags;
+};
+
+} // End namespace orc.
+} // End namespace llvm.
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_EXECUTORSYMBOLDEF_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/MemoryFlags.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/MemoryFlags.h
index 2642e6c241b6..c20366cfbb38 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/MemoryFlags.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/MemoryFlags.h
@@ -65,26 +65,43 @@ inline MemProt fromSysMemoryProtectionFlags(sys::Memory::ProtectionFlags PF) {
return MP;
}
-/// Describes a memory deallocation policy for memory to be allocated by a
+/// Describes a memory lifetime policy for memory to be allocated by a
/// JITLinkMemoryManager.
///
/// All memory allocated by a call to JITLinkMemoryManager::allocate should be
/// deallocated if a call is made to
/// JITLinkMemoryManager::InFlightAllocation::abandon. The policies below apply
/// to finalized allocations.
-enum class MemDeallocPolicy {
- /// Standard memory should be deallocated when the deallocate method is called
- /// for the finalized allocation.
+enum class MemLifetimePolicy {
+ /// Standard memory should be allocated by the allocator and then deallocated
+ /// when the deallocate method is called for the finalized allocation.
Standard,
- /// Finalize memory should be overwritten and then deallocated after all
- /// finalization functions have been run.
- Finalize
+ /// Finalize memory should be allocated by the allocator, and then be
+ /// overwritten and deallocated after all finalization functions have been
+ /// run.
+ Finalize,
+
+ /// NoAlloc memory should not be allocated by the JITLinkMemoryManager at
+ /// all. It is used for sections that don't need to be transferred to the
+ /// executor process, typically metadata sections.
+ NoAlloc
};
/// Print a MemDeallocPolicy.
-inline raw_ostream &operator<<(raw_ostream &OS, MemDeallocPolicy MDP) {
- return OS << (MDP == MemDeallocPolicy::Standard ? "standard" : "finalize");
+inline raw_ostream &operator<<(raw_ostream &OS, MemLifetimePolicy MLP) {
+ switch (MLP) {
+ case MemLifetimePolicy::Standard:
+ OS << "standard";
+ break;
+ case MemLifetimePolicy::Finalize:
+ OS << "finalize";
+ break;
+ case MemLifetimePolicy::NoAlloc:
+ OS << "noalloc";
+ break;
+ }
+ return OS;
}
/// A pair of memory protections and allocation policies.
@@ -95,34 +112,34 @@ class AllocGroup {
using underlying_type = uint8_t;
static constexpr unsigned BitsForProt = 3;
- static constexpr unsigned BitsForDeallocPolicy = 1;
+ static constexpr unsigned BitsForLifetimePolicy = 2;
static constexpr unsigned MaxIdentifiers =
- 1U << (BitsForProt + BitsForDeallocPolicy);
+ 1U << (BitsForProt + BitsForLifetimePolicy);
public:
static constexpr unsigned NumGroups = MaxIdentifiers;
/// Create a default AllocGroup. No memory protections, standard
- /// deallocation policy.
+ /// lifetime policy.
AllocGroup() = default;
/// Create an AllocGroup from a MemProt only -- uses
- /// MemoryDeallocationPolicy::Standard.
+ /// MemLifetimePolicy::Standard.
AllocGroup(MemProt MP) : Id(static_cast<underlying_type>(MP)) {}
- /// Create an AllocGroup from a MemProt and a MemoryDeallocationPolicy.
- AllocGroup(MemProt MP, MemDeallocPolicy MDP)
+ /// Create an AllocGroup from a MemProt and a MemLifetimePolicy.
+ AllocGroup(MemProt MP, MemLifetimePolicy MLP)
: Id(static_cast<underlying_type>(MP) |
- (static_cast<underlying_type>(MDP) << BitsForProt)) {}
+ (static_cast<underlying_type>(MLP) << BitsForProt)) {}
/// Returns the MemProt for this group.
MemProt getMemProt() const {
return static_cast<MemProt>(Id & ((1U << BitsForProt) - 1));
}
- /// Returns the MemoryDeallocationPolicy for this group.
- MemDeallocPolicy getMemDeallocPolicy() const {
- return static_cast<MemDeallocPolicy>(Id >> BitsForProt);
+ /// Returns the MemLifetimePolicy for this group.
+ MemLifetimePolicy getMemLifetimePolicy() const {
+ return static_cast<MemLifetimePolicy>(Id >> BitsForProt);
}
friend bool operator==(const AllocGroup &LHS, const AllocGroup &RHS) {
@@ -186,7 +203,7 @@ private:
/// Print an AllocGroup.
inline raw_ostream &operator<<(raw_ostream &OS, AllocGroup AG) {
- return OS << '(' << AG.getMemProt() << ", " << AG.getMemDeallocPolicy()
+ return OS << '(' << AG.getMemProt() << ", " << AG.getMemLifetimePolicy()
<< ')';
}
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h
new file mode 100644
index 000000000000..b7bc54b465a0
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h
@@ -0,0 +1,69 @@
+//===------ ObjectFormats.h - Object format details for ORC -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// ORC-specific object format details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_SHARED_OBJECTFORMATS_H
+#define LLVM_EXECUTIONENGINE_ORC_SHARED_OBJECTFORMATS_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+namespace orc {
+
+// MachO section names.
+
+extern StringRef MachODataCommonSectionName;
+extern StringRef MachODataDataSectionName;
+extern StringRef MachOEHFrameSectionName;
+extern StringRef MachOCompactUnwindInfoSectionName;
+extern StringRef MachOModInitFuncSectionName;
+extern StringRef MachOObjCCatListSectionName;
+extern StringRef MachOObjCCatList2SectionName;
+extern StringRef MachOObjCClassListSectionName;
+extern StringRef MachOObjCClassNameSectionName;
+extern StringRef MachOObjCClassRefsSectionName;
+extern StringRef MachOObjCConstSectionName;
+extern StringRef MachOObjCDataSectionName;
+extern StringRef MachOObjCImageInfoSectionName;
+extern StringRef MachOObjCMethNameSectionName;
+extern StringRef MachOObjCMethTypeSectionName;
+extern StringRef MachOObjCNLCatListSectionName;
+extern StringRef MachOObjCSelRefsSectionName;
+extern StringRef MachOSwift5ProtoSectionName;
+extern StringRef MachOSwift5ProtosSectionName;
+extern StringRef MachOSwift5TypesSectionName;
+extern StringRef MachOSwift5TypeRefSectionName;
+extern StringRef MachOSwift5FieldMetadataSectionName;
+extern StringRef MachOSwift5EntrySectionName;
+extern StringRef MachOThreadBSSSectionName;
+extern StringRef MachOThreadDataSectionName;
+extern StringRef MachOThreadVarsSectionName;
+
+extern StringRef MachOInitSectionNames[19];
+
+// ELF section names.
+extern StringRef ELFEHFrameSectionName;
+extern StringRef ELFInitArrayFuncSectionName;
+
+extern StringRef ELFThreadBSSSectionName;
+extern StringRef ELFThreadDataSectionName;
+
+bool isMachOInitializerSection(StringRef SegName, StringRef SecName);
+bool isMachOInitializerSection(StringRef QualifiedName);
+
+bool isELFInitializerSection(StringRef SecName);
+
+bool isCOFFInitializerSection(StringRef Name);
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_SHARED_MEMORYFLAGS_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h
index 9e074ed1f931..ee3919c73340 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.h
@@ -45,6 +45,7 @@ enum class SimpleRemoteEPCOpcode : uint8_t {
struct SimpleRemoteEPCExecutorInfo {
std::string TargetTriple;
uint64_t PageSize;
+ StringMap<std::vector<char>> BootstrapMap;
StringMap<ExecutorAddr> BootstrapSymbols;
};
@@ -161,6 +162,7 @@ using SPSRemoteSymbolLookup = SPSTuple<uint64_t, SPSRemoteSymbolLookupSet>;
/// Tuple containing target triple, page size, and bootstrap symbols.
using SPSSimpleRemoteEPCExecutorInfo =
SPSTuple<SPSString, uint64_t,
+ SPSSequence<SPSTuple<SPSString, SPSSequence<char>>>,
SPSSequence<SPSTuple<SPSString, SPSExecutorAddr>>>;
template <>
@@ -206,18 +208,18 @@ class SPSSerializationTraits<SPSSimpleRemoteEPCExecutorInfo,
public:
static size_t size(const SimpleRemoteEPCExecutorInfo &SI) {
return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::size(
- SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols);
+ SI.TargetTriple, SI.PageSize, SI.BootstrapMap, SI.BootstrapSymbols);
}
static bool serialize(SPSOutputBuffer &OB,
const SimpleRemoteEPCExecutorInfo &SI) {
return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::serialize(
- OB, SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols);
+ OB, SI.TargetTriple, SI.PageSize, SI.BootstrapMap, SI.BootstrapSymbols);
}
static bool deserialize(SPSInputBuffer &IB, SimpleRemoteEPCExecutorInfo &SI) {
return SPSSimpleRemoteEPCExecutorInfo::AsArgList ::deserialize(
- IB, SI.TargetTriple, SI.PageSize, SI.BootstrapSymbols);
+ IB, SI.TargetTriple, SI.PageSize, SI.BootstrapMap, SI.BootstrapSymbols);
}
};
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
index 565fb5477c4a..09c73db44a94 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h
@@ -30,8 +30,24 @@ namespace llvm {
namespace orc {
namespace tpctypes {
+struct RemoteAllocGroup {
+ RemoteAllocGroup() = default;
+ RemoteAllocGroup(MemProt Prot) : Prot(Prot) {}
+ RemoteAllocGroup(MemProt Prot, bool FinalizeLifetime)
+ : Prot(Prot), FinalizeLifetime(FinalizeLifetime) {}
+ RemoteAllocGroup(const AllocGroup &AG) : Prot(AG.getMemProt()) {
+ assert(AG.getMemLifetimePolicy() != orc::MemLifetimePolicy::NoAlloc &&
+ "Cannot use no-alloc memory in a remote alloc request");
+ FinalizeLifetime =
+ AG.getMemLifetimePolicy() == orc::MemLifetimePolicy::Finalize;
+ }
+
+ MemProt Prot;
+ bool FinalizeLifetime = false;
+};
+
struct SegFinalizeRequest {
- AllocGroup AG;
+ RemoteAllocGroup RAG;
ExecutorAddr Addr;
uint64_t Size;
ArrayRef<char> Content;
@@ -43,7 +59,7 @@ struct FinalizeRequest {
};
struct SharedMemorySegFinalizeRequest {
- AllocGroup AG;
+ RemoteAllocGroup RAG;
ExecutorAddr Addr;
uint64_t Size;
};
@@ -93,16 +109,16 @@ using LookupResult = std::vector<ExecutorAddr>;
namespace shared {
-class SPSAllocGroup {};
+class SPSRemoteAllocGroup;
using SPSSegFinalizeRequest =
- SPSTuple<SPSAllocGroup, SPSExecutorAddr, uint64_t, SPSSequence<char>>;
+ SPSTuple<SPSRemoteAllocGroup, SPSExecutorAddr, uint64_t, SPSSequence<char>>;
using SPSFinalizeRequest = SPSTuple<SPSSequence<SPSSegFinalizeRequest>,
SPSSequence<SPSAllocActionCallPair>>;
using SPSSharedMemorySegFinalizeRequest =
- SPSTuple<SPSAllocGroup, SPSExecutorAddr, uint64_t>;
+ SPSTuple<SPSRemoteAllocGroup, SPSExecutorAddr, uint64_t>;
using SPSSharedMemoryFinalizeRequest =
SPSTuple<SPSSequence<SPSSharedMemorySegFinalizeRequest>,
@@ -118,7 +134,8 @@ using SPSMemoryAccessUInt64Write = SPSMemoryAccessUIntWrite<uint64_t>;
using SPSMemoryAccessBufferWrite = SPSTuple<SPSExecutorAddr, SPSSequence<char>>;
-template <> class SPSSerializationTraits<SPSAllocGroup, AllocGroup> {
+template <>
+class SPSSerializationTraits<SPSRemoteAllocGroup, tpctypes::RemoteAllocGroup> {
enum WireBits {
ReadBit = 1 << 0,
WriteBit = 1 << 1,
@@ -127,25 +144,26 @@ template <> class SPSSerializationTraits<SPSAllocGroup, AllocGroup> {
};
public:
- static size_t size(const AllocGroup &AG) {
+ static size_t size(const tpctypes::RemoteAllocGroup &RAG) {
// All AllocGroup values encode to the same size.
return SPSArgList<uint8_t>::size(uint8_t(0));
}
- static bool serialize(SPSOutputBuffer &OB, const AllocGroup &AG) {
+ static bool serialize(SPSOutputBuffer &OB,
+ const tpctypes::RemoteAllocGroup &RAG) {
uint8_t WireValue = 0;
- if ((AG.getMemProt() & MemProt::Read) != MemProt::None)
+ if ((RAG.Prot & MemProt::Read) != MemProt::None)
WireValue |= ReadBit;
- if ((AG.getMemProt() & MemProt::Write) != MemProt::None)
+ if ((RAG.Prot & MemProt::Write) != MemProt::None)
WireValue |= WriteBit;
- if ((AG.getMemProt() & MemProt::Exec) != MemProt::None)
+ if ((RAG.Prot & MemProt::Exec) != MemProt::None)
WireValue |= ExecBit;
- if (AG.getMemDeallocPolicy() == MemDeallocPolicy::Finalize)
+ if (RAG.FinalizeLifetime)
WireValue |= FinalizeBit;
return SPSArgList<uint8_t>::serialize(OB, WireValue);
}
- static bool deserialize(SPSInputBuffer &IB, AllocGroup &AG) {
+ static bool deserialize(SPSInputBuffer &IB, tpctypes::RemoteAllocGroup &RAG) {
uint8_t Val;
if (!SPSArgList<uint8_t>::deserialize(IB, Val))
return false;
@@ -156,9 +174,8 @@ public:
MP |= MemProt::Write;
if (Val & ExecBit)
MP |= MemProt::Exec;
- MemDeallocPolicy MDP = (Val & FinalizeBit) ? MemDeallocPolicy::Finalize
- : MemDeallocPolicy::Standard;
- AG = AllocGroup(MP, MDP);
+ bool FinalizeLifetime = (Val & FinalizeBit) ? true : false;
+ RAG = {MP, FinalizeLifetime};
return true;
}
};
@@ -170,17 +187,17 @@ class SPSSerializationTraits<SPSSegFinalizeRequest,
public:
static size_t size(const tpctypes::SegFinalizeRequest &SFR) {
- return SFRAL::size(SFR.AG, SFR.Addr, SFR.Size, SFR.Content);
+ return SFRAL::size(SFR.RAG, SFR.Addr, SFR.Size, SFR.Content);
}
static bool serialize(SPSOutputBuffer &OB,
const tpctypes::SegFinalizeRequest &SFR) {
- return SFRAL::serialize(OB, SFR.AG, SFR.Addr, SFR.Size, SFR.Content);
+ return SFRAL::serialize(OB, SFR.RAG, SFR.Addr, SFR.Size, SFR.Content);
}
static bool deserialize(SPSInputBuffer &IB,
tpctypes::SegFinalizeRequest &SFR) {
- return SFRAL::deserialize(IB, SFR.AG, SFR.Addr, SFR.Size, SFR.Content);
+ return SFRAL::deserialize(IB, SFR.RAG, SFR.Addr, SFR.Size, SFR.Content);
}
};
@@ -210,17 +227,17 @@ class SPSSerializationTraits<SPSSharedMemorySegFinalizeRequest,
public:
static size_t size(const tpctypes::SharedMemorySegFinalizeRequest &SFR) {
- return SFRAL::size(SFR.AG, SFR.Addr, SFR.Size);
+ return SFRAL::size(SFR.RAG, SFR.Addr, SFR.Size);
}
static bool serialize(SPSOutputBuffer &OB,
const tpctypes::SharedMemorySegFinalizeRequest &SFR) {
- return SFRAL::serialize(OB, SFR.AG, SFR.Addr, SFR.Size);
+ return SFRAL::serialize(OB, SFR.RAG, SFR.Addr, SFR.Size);
}
static bool deserialize(SPSInputBuffer &IB,
tpctypes::SharedMemorySegFinalizeRequest &SFR) {
- return SFRAL::deserialize(IB, SFR.AG, SFR.Addr, SFR.Size);
+ return SFRAL::deserialize(IB, SFR.RAG, SFR.Addr, SFR.Size);
}
};
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
index 0a309c1fdd5e..88c90f54acbd 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
@@ -59,7 +59,7 @@ private:
// Defines Speculator Concept,
class Speculator {
public:
- using TargetFAddr = JITTargetAddress;
+ using TargetFAddr = ExecutorAddr;
using FunctionCandidatesMap = DenseMap<SymbolStringPtr, SymbolNameSet>;
using StubAddrLikelies = DenseMap<TargetFAddr, SymbolNameSet>;
@@ -70,7 +70,7 @@ private:
GlobalSpecMap.insert({ImplAddr, std::move(likelySymbols)});
}
- void launchCompile(JITTargetAddress FAddr) {
+ void launchCompile(ExecutorAddr FAddr) {
SymbolNameSet CandidateSet;
// Copy CandidateSet is necessary, to avoid unsynchronized access to
// the datastructure.
@@ -144,8 +144,8 @@ public:
auto OnReadyFixUp = [Likely, Target,
this](Expected<SymbolMap> ReadySymbol) {
if (ReadySymbol) {
- auto RAddr = (*ReadySymbol)[Target].getAddress();
- registerSymbolsWithAddr(RAddr, std::move(Likely));
+ auto RDef = (*ReadySymbol)[Target];
+ registerSymbolsWithAddr(RDef.getAddress(), std::move(Likely));
} else
this->getES().reportError(ReadySymbol.takeError());
};
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h b/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
index e20f20fb7da2..497e29da98bd 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/SymbolStringPool.h
@@ -24,11 +24,14 @@ class raw_ostream;
namespace orc {
+class SymbolStringPtrBase;
class SymbolStringPtr;
+class NonOwningSymbolStringPtr;
/// String pool for symbol names used by the JIT.
class SymbolStringPool {
- friend class SymbolStringPtr;
+ friend class SymbolStringPoolTest;
+ friend class SymbolStringPtrBase;
// Implemented in DebugUtils.h.
friend raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPool &SSP);
@@ -45,7 +48,10 @@ public:
/// Returns true if the pool is empty.
bool empty() const;
+
private:
+ size_t getRefCount(const SymbolStringPtrBase &S) const;
+
using RefCountType = std::atomic<size_t>;
using PoolMap = StringMap<RefCountType>;
using PoolMapEntry = StringMapEntry<RefCountType>;
@@ -53,8 +59,81 @@ private:
PoolMap Pool;
};
+/// Base class for both owning and non-owning symbol-string ptrs.
+///
+/// All symbol-string ptrs are convertible to bool, dereferenceable and
+/// comparable.
+///
+/// SymbolStringPtrBases are default-constructible and constructible
+/// from nullptr to enable comparison with these values.
+class SymbolStringPtrBase {
+ friend class SymbolStringPool;
+ friend struct DenseMapInfo<SymbolStringPtr>;
+ friend struct DenseMapInfo<NonOwningSymbolStringPtr>;
+
+public:
+ SymbolStringPtrBase() = default;
+ SymbolStringPtrBase(std::nullptr_t) {}
+
+ explicit operator bool() const { return S; }
+
+ StringRef operator*() const { return S->first(); }
+
+ friend bool operator==(SymbolStringPtrBase LHS, SymbolStringPtrBase RHS) {
+ return LHS.S == RHS.S;
+ }
+
+ friend bool operator!=(SymbolStringPtrBase LHS, SymbolStringPtrBase RHS) {
+ return !(LHS == RHS);
+ }
+
+ friend bool operator<(SymbolStringPtrBase LHS, SymbolStringPtrBase RHS) {
+ return LHS.S < RHS.S;
+ }
+
+#ifndef NDEBUG
+ // Returns true if the pool entry's ref count is above zero (or if the entry
+ // is an empty or tombstone value). Useful for debugging and testing -- this
+ // method can be used to identify SymbolStringPtrs and
+ // NonOwningSymbolStringPtrs that are pointing to abandoned pool entries.
+ bool poolEntryIsAlive() const {
+ return isRealPoolEntry(S) ? S->getValue() != 0 : true;
+ }
+#endif
+
+protected:
+ using PoolEntry = SymbolStringPool::PoolMapEntry;
+ using PoolEntryPtr = PoolEntry *;
+
+ SymbolStringPtrBase(PoolEntryPtr S) : S(S) {}
+
+ constexpr static uintptr_t EmptyBitPattern =
+ std::numeric_limits<uintptr_t>::max()
+ << PointerLikeTypeTraits<PoolEntryPtr>::NumLowBitsAvailable;
+
+ constexpr static uintptr_t TombstoneBitPattern =
+ (std::numeric_limits<uintptr_t>::max() - 1)
+ << PointerLikeTypeTraits<PoolEntryPtr>::NumLowBitsAvailable;
+
+ constexpr static uintptr_t InvalidPtrMask =
+ (std::numeric_limits<uintptr_t>::max() - 3)
+ << PointerLikeTypeTraits<PoolEntryPtr>::NumLowBitsAvailable;
+
+ // Returns false for null, empty, and tombstone values, true otherwise.
+ static bool isRealPoolEntry(PoolEntryPtr P) {
+ return ((reinterpret_cast<uintptr_t>(P) - 1) & InvalidPtrMask) !=
+ InvalidPtrMask;
+ }
+
+ size_t getRefCount() const {
+ return isRealPoolEntry(S) ? size_t(S->getValue()) : size_t(0);
+ }
+
+ PoolEntryPtr S = nullptr;
+};
+
/// Pointer to a pooled string representing a symbol name.
-class SymbolStringPtr {
+class SymbolStringPtr : public SymbolStringPtrBase {
friend class OrcV2CAPIHelper;
friend class SymbolStringPool;
friend struct DenseMapInfo<SymbolStringPtr>;
@@ -62,77 +141,43 @@ class SymbolStringPtr {
public:
SymbolStringPtr() = default;
SymbolStringPtr(std::nullptr_t) {}
- SymbolStringPtr(const SymbolStringPtr &Other)
- : S(Other.S) {
- if (isRealPoolEntry(S))
- ++S->getValue();
+ SymbolStringPtr(const SymbolStringPtr &Other) : SymbolStringPtrBase(Other.S) {
+ incRef();
}
+ explicit SymbolStringPtr(NonOwningSymbolStringPtr Other);
+
SymbolStringPtr& operator=(const SymbolStringPtr &Other) {
- if (isRealPoolEntry(S)) {
- assert(S->getValue() && "Releasing SymbolStringPtr with zero ref count");
- --S->getValue();
- }
+ decRef();
S = Other.S;
- if (isRealPoolEntry(S))
- ++S->getValue();
+ incRef();
return *this;
}
- SymbolStringPtr(SymbolStringPtr &&Other) : S(nullptr) {
- std::swap(S, Other.S);
- }
+ SymbolStringPtr(SymbolStringPtr &&Other) { std::swap(S, Other.S); }
SymbolStringPtr& operator=(SymbolStringPtr &&Other) {
- if (isRealPoolEntry(S)) {
- assert(S->getValue() && "Releasing SymbolStringPtr with zero ref count");
- --S->getValue();
- }
+ decRef();
S = nullptr;
std::swap(S, Other.S);
return *this;
}
- ~SymbolStringPtr() {
- if (isRealPoolEntry(S)) {
- assert(S->getValue() && "Releasing SymbolStringPtr with zero ref count");
- --S->getValue();
- }
- }
-
- explicit operator bool() const { return S; }
-
- StringRef operator*() const { return S->first(); }
-
- friend bool operator==(const SymbolStringPtr &LHS,
- const SymbolStringPtr &RHS) {
- return LHS.S == RHS.S;
- }
-
- friend bool operator!=(const SymbolStringPtr &LHS,
- const SymbolStringPtr &RHS) {
- return !(LHS == RHS);
- }
-
- friend bool operator<(const SymbolStringPtr &LHS,
- const SymbolStringPtr &RHS) {
- return LHS.S < RHS.S;
- }
+ ~SymbolStringPtr() { decRef(); }
private:
- using PoolEntry = SymbolStringPool::PoolMapEntry;
- using PoolEntryPtr = PoolEntry *;
+ SymbolStringPtr(PoolEntryPtr S) : SymbolStringPtrBase(S) { incRef(); }
- SymbolStringPtr(SymbolStringPool::PoolMapEntry *S)
- : S(S) {
+ void incRef() {
if (isRealPoolEntry(S))
++S->getValue();
}
- // Returns false for null, empty, and tombstone values, true otherwise.
- bool isRealPoolEntry(PoolEntryPtr P) {
- return ((reinterpret_cast<uintptr_t>(P) - 1) & InvalidPtrMask) !=
- InvalidPtrMask;
+ void decRef() {
+ if (isRealPoolEntry(S)) {
+ assert(S->getValue() && "Releasing SymbolStringPtr with zero ref count");
+ --S->getValue();
+ }
}
static SymbolStringPtr getEmptyVal() {
@@ -142,22 +187,53 @@ private:
static SymbolStringPtr getTombstoneVal() {
return SymbolStringPtr(reinterpret_cast<PoolEntryPtr>(TombstoneBitPattern));
}
+};
- constexpr static uintptr_t EmptyBitPattern =
- std::numeric_limits<uintptr_t>::max()
- << PointerLikeTypeTraits<PoolEntryPtr>::NumLowBitsAvailable;
+/// Non-owning SymbolStringPool entry pointer. Instances are comparable with
+/// SymbolStringPtr instances and guaranteed to have the same hash, but do not
+/// affect the ref-count of the pooled string (and are therefore cheaper to
+/// copy).
+///
+/// NonOwningSymbolStringPtrs are silently invalidated if the pool entry's
+/// ref-count drops to zero, so they should only be used in contexts where a
+/// corresponding SymbolStringPtr is known to exist (which will guarantee that
+/// the ref-count stays above zero). E.g. in a graph where nodes are
+/// represented by SymbolStringPtrs the edges can be represented by pairs of
+/// NonOwningSymbolStringPtrs and this will make the introduction of deletion
+/// of edges cheaper.
+class NonOwningSymbolStringPtr : public SymbolStringPtrBase {
+ friend struct DenseMapInfo<orc::NonOwningSymbolStringPtr>;
- constexpr static uintptr_t TombstoneBitPattern =
- (std::numeric_limits<uintptr_t>::max() - 1)
- << PointerLikeTypeTraits<PoolEntryPtr>::NumLowBitsAvailable;
+public:
+ NonOwningSymbolStringPtr() = default;
+ explicit NonOwningSymbolStringPtr(const SymbolStringPtr &S)
+ : SymbolStringPtrBase(S) {}
- constexpr static uintptr_t InvalidPtrMask =
- (std::numeric_limits<uintptr_t>::max() - 3)
- << PointerLikeTypeTraits<PoolEntryPtr>::NumLowBitsAvailable;
+ using SymbolStringPtrBase::operator=;
- PoolEntryPtr S = nullptr;
+private:
+ NonOwningSymbolStringPtr(PoolEntryPtr S) : SymbolStringPtrBase(S) {}
+
+ static NonOwningSymbolStringPtr getEmptyVal() {
+ return NonOwningSymbolStringPtr(
+ reinterpret_cast<PoolEntryPtr>(EmptyBitPattern));
+ }
+
+ static NonOwningSymbolStringPtr getTombstoneVal() {
+ return NonOwningSymbolStringPtr(
+ reinterpret_cast<PoolEntryPtr>(TombstoneBitPattern));
+ }
};
+inline SymbolStringPtr::SymbolStringPtr(NonOwningSymbolStringPtr Other)
+ : SymbolStringPtrBase(Other) {
+ assert(poolEntryIsAlive() &&
+ "SymbolStringPtr constructed from invalid non-owning pointer.");
+
+ if (isRealPoolEntry(S))
+ ++S->getValue();
+}
+
inline SymbolStringPool::~SymbolStringPool() {
#ifndef NDEBUG
clearDeadEntries();
@@ -187,6 +263,11 @@ inline bool SymbolStringPool::empty() const {
return Pool.empty();
}
+inline size_t
+SymbolStringPool::getRefCount(const SymbolStringPtrBase &S) const {
+ return S.getRefCount();
+}
+
} // end namespace orc
template <>
@@ -200,12 +281,33 @@ struct DenseMapInfo<orc::SymbolStringPtr> {
return orc::SymbolStringPtr::getTombstoneVal();
}
- static unsigned getHashValue(const orc::SymbolStringPtr &V) {
+ static unsigned getHashValue(const orc::SymbolStringPtrBase &V) {
return DenseMapInfo<orc::SymbolStringPtr::PoolEntryPtr>::getHashValue(V.S);
}
- static bool isEqual(const orc::SymbolStringPtr &LHS,
- const orc::SymbolStringPtr &RHS) {
+ static bool isEqual(const orc::SymbolStringPtrBase &LHS,
+ const orc::SymbolStringPtrBase &RHS) {
+ return LHS.S == RHS.S;
+ }
+};
+
+template <> struct DenseMapInfo<orc::NonOwningSymbolStringPtr> {
+
+ static orc::NonOwningSymbolStringPtr getEmptyKey() {
+ return orc::NonOwningSymbolStringPtr::getEmptyVal();
+ }
+
+ static orc::NonOwningSymbolStringPtr getTombstoneKey() {
+ return orc::NonOwningSymbolStringPtr::getTombstoneVal();
+ }
+
+ static unsigned getHashValue(const orc::SymbolStringPtrBase &V) {
+ return DenseMapInfo<
+ orc::NonOwningSymbolStringPtr::PoolEntryPtr>::getHashValue(V.S);
+ }
+
+ static bool isEqual(const orc::SymbolStringPtrBase &LHS,
+ const orc::SymbolStringPtrBase &RHS) {
return LHS.S == RHS.S;
}
};
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h
index afd3d39dbb53..07f01ecb68a4 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h
@@ -64,6 +64,17 @@ public:
public:
SimpleRemoteEPCServer &server() { return S; }
+ StringMap<std::vector<char>> &bootstrapMap() { return BootstrapMap; }
+ template <typename T, typename SPSTagT>
+ void setBootstrapMapValue(std::string Key, const T &Value) {
+ std::vector<char> Buffer;
+ Buffer.resize(shared::SPSArgList<SPSTagT>::size(Value));
+ shared::SPSOutputBuffer OB(Buffer.data(), Buffer.size());
+ bool Success = shared::SPSArgList<SPSTagT>::serialize(OB, Value);
+ (void)Success;
+ assert(Success && "Bootstrap map value serialization failed");
+ BootstrapMap[std::move(Key)] = std::move(Buffer);
+ }
StringMap<ExecutorAddr> &bootstrapSymbols() { return BootstrapSymbols; }
std::vector<std::unique_ptr<ExecutorBootstrapService>> &services() {
return Services;
@@ -76,6 +87,7 @@ public:
private:
Setup(SimpleRemoteEPCServer &S) : S(S) {}
SimpleRemoteEPCServer &S;
+ StringMap<std::vector<char>> BootstrapMap;
StringMap<ExecutorAddr> BootstrapSymbols;
std::vector<std::unique_ptr<ExecutorBootstrapService>> Services;
};
@@ -114,7 +126,8 @@ public:
for (auto &Service : Server->Services)
Service->addBootstrapSymbols(S.bootstrapSymbols());
- if (auto Err = Server->sendSetupMessage(std::move(S.BootstrapSymbols)))
+ if (auto Err = Server->sendSetupMessage(std::move(S.BootstrapMap),
+ std::move(S.BootstrapSymbols)))
return std::move(Err);
return std::move(Server);
}
@@ -141,7 +154,8 @@ private:
Error sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
ExecutorAddr TagAddr, ArrayRef<char> ArgBytes);
- Error sendSetupMessage(StringMap<ExecutorAddr> BootstrapSymbols);
+ Error sendSetupMessage(StringMap<std::vector<char>> BootstrapMap,
+ StringMap<ExecutorAddr> BootstrapSymbols);
Error handleResult(uint64_t SeqNo, ExecutorAddr TagAddr,
SimpleRemoteEPCArgBytesVector ArgBytes);
diff --git a/llvm/include/llvm/ExecutionEngine/RuntimeDyldChecker.h b/llvm/include/llvm/ExecutionEngine/RuntimeDyldChecker.h
index f094c02e86f3..5904250c9a72 100644
--- a/llvm/include/llvm/ExecutionEngine/RuntimeDyldChecker.h
+++ b/llvm/include/llvm/ExecutionEngine/RuntimeDyldChecker.h
@@ -62,6 +62,7 @@ class raw_ostream;
/// | 'next_pc' '(' symbol ')'
/// | 'stub_addr' '(' stub-container-name ',' symbol ')'
/// | 'got_addr' '(' stub-container-name ',' symbol ')'
+/// | 'section_addr' '(' stub-container-name ',' symbol ')'
/// | symbol
///
/// binary_expr = expr '+' expr
@@ -172,7 +173,7 @@ public:
bool LocalAddress);
/// If there is a section at the given local address, return its load
- /// address, otherwise return none.
+ /// address, otherwise return std::nullopt.
std::optional<uint64_t> getSectionLoadAddress(void *LocalAddress) const;
private:
diff --git a/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h b/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h
index 455efc9f9001..fa1b2355528d 100644
--- a/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h
@@ -185,7 +185,8 @@ private:
MemoryGroup CodeMem;
MemoryGroup RWDataMem;
MemoryGroup RODataMem;
- MemoryMapper &MMapper;
+ MemoryMapper *MMapper;
+ std::unique_ptr<MemoryMapper> OwnedMMapper;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Frontend/Debug/Options.h b/llvm/include/llvm/Frontend/Debug/Options.h
new file mode 100644
index 000000000000..c490508d3793
--- /dev/null
+++ b/llvm/include/llvm/Frontend/Debug/Options.h
@@ -0,0 +1,62 @@
+//===--- DebugInfoOptions.h - Debug Info Emission Types ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FRONTEND_DEBUG_OPTIONS_H
+#define LLVM_FRONTEND_DEBUG_OPTIONS_H
+
+namespace llvm {
+namespace codegenoptions {
+
+enum DebugInfoFormat {
+ DIF_DWARF,
+ DIF_CodeView,
+};
+
+enum DebugInfoKind {
+ /// Don't generate debug info.
+ NoDebugInfo,
+
+ /// Emit location information but do not generate debug info in the output.
+ /// This is useful in cases where the backend wants to track source
+ /// locations for instructions without actually emitting debug info for them
+ /// (e.g., when -Rpass is used).
+ LocTrackingOnly,
+
+ /// Emit only debug directives with the line numbers data
+ DebugDirectivesOnly,
+
+ /// Emit only debug info necessary for generating line number tables
+ /// (-gline-tables-only).
+ DebugLineTablesOnly,
+
+ /// Limit generated debug info for classes to reduce size. This emits class
+ /// type info only where the constructor is emitted, if it is a class that
+ /// has a constructor.
+ /// FIXME: Consider combining this with LimitedDebugInfo.
+ DebugInfoConstructor,
+
+ /// Limit generated debug info to reduce size (-fno-standalone-debug). This
+ /// emits forward decls for types that could be replaced with forward decls in
+ /// the source code. For dynamic C++ classes type info is only emitted into
+ /// the module that contains the classe's vtable.
+ LimitedDebugInfo,
+
+ /// Generate complete debug info.
+ FullDebugInfo,
+
+ /// Generate debug info for types that may be unused in the source
+ /// (-fno-eliminate-unused-debug-types).
+ UnusedTypeInfo,
+};
+
+enum class DebugTemplateNamesKind { Full, Simple, Mangled };
+
+} // end namespace codegenoptions
+} // end namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td
index e5f0632f59f5..62b4113be6e2 100644
--- a/llvm/include/llvm/Frontend/OpenACC/ACC.td
+++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This is the definition file for OpenACC 3.1 directives and clauses.
+// This is the definition file for OpenACC 3.3 directives and clauses.
//
//===----------------------------------------------------------------------===//
@@ -56,7 +56,7 @@ def ACCC_Capture : Clause<"capture"> {
// 2.9.1
def ACCC_Collapse : Clause<"collapse"> {
- let flangClass = "ScalarIntConstantExpr";
+ let flangClass = "AccCollapseArg";
}
// 2.7.6
@@ -64,6 +64,7 @@ def ACCC_Copy : Clause<"copy"> {
let flangClass = "AccObjectList";
let aliases = ["present_or_copy", "pcopy"];
}
+
// 2.7.7
def ACCC_Copyin : Clause<"copyin"> {
let flangClass = "AccObjectListWithModifier";
@@ -82,7 +83,7 @@ def ACCC_Create : Clause<"create"> {
let aliases = ["present_or_create", "pcreate"];
}
-// 2.5.15
+// 2.5.16
def ACC_Default_none : ClauseVal<"none", 1, 1> { let isDefault = 1; }
def ACC_Default_present : ClauseVal<"present", 0, 1> {}
@@ -140,14 +141,14 @@ def ACCC_DeviceType : Clause<"device_type"> {
// 2.6.6
def ACCC_Finalize : Clause<"finalize"> {}
-// 2.5.13
+// 2.5.14
def ACCC_FirstPrivate : Clause<"firstprivate"> {
let flangClass = "AccObjectList";
}
// 2.9.2
def ACCC_Gang : Clause<"gang"> {
- let flangClass = "AccGangArgument";
+ let flangClass = "AccGangArgList";
let isValueOptional = true;
}
@@ -156,7 +157,7 @@ def ACCC_Host : Clause<"host"> {
let flangClass = "AccObjectList";
}
-// 2.5.5
+// 2.5.6
def ACCC_If : Clause <"if"> {
let flangClass = "ScalarLogicalExpr";
}
@@ -180,12 +181,13 @@ def ACCC_NoCreate : Clause<"no_create"> {
// 2.15.1
def ACCC_NoHost : Clause<"nohost"> {}
-// 2.5.9
+// 2.5.10
def ACCC_NumGangs : Clause<"num_gangs"> {
let flangClass = "ScalarIntExpr";
+ let isValueList = 1;
}
-// 2.5.10
+// 2.5.11
def ACCC_NumWorkers : Clause<"num_workers"> {
let flangClass = "ScalarIntExpr";
}
@@ -195,7 +197,7 @@ def ACCC_Present : Clause<"present"> {
let flangClass = "AccObjectList";
}
-// 2.5.12
+// 2.5.13
def ACCC_Private : Clause<"private"> {
let flangClass = "AccObjectList";
}
@@ -213,12 +215,12 @@ def ACCC_UseDevice : Clause <"use_device"> {
// 2.12
def ACCC_Read : Clause<"read"> {}
-// 2.5.14
+// 2.5.15
def ACCC_Reduction : Clause<"reduction"> {
let flangClass = "AccObjectListWithReduction";
}
-// 2.5.6
+// 2.5.7
def ACCC_Self : Clause<"self"> {
let flangClass = "AccSelfClause";
let isValueOptional = true;
@@ -234,7 +236,7 @@ def ACCC_Vector : Clause<"vector"> {
let prefix = "length";
}
-// 2.5.11
+// 2.5.12
def ACCC_VectorLength : Clause<"vector_length"> {
let flangClass = "ScalarIntExpr";
}
@@ -269,9 +271,14 @@ def ACC_Atomic : Directive<"atomic"> {}
// 2.6.5
def ACC_Data : Directive<"data"> {
let allowedOnceClauses = [
+ VersionedClause<ACCC_Async, 32>,
VersionedClause<ACCC_If>,
VersionedClause<ACCC_Default>
];
+ let allowedClauses = [
+ VersionedClause<ACCC_DeviceType, 32>,
+ VersionedClause<ACCC_Wait, 32>
+ ];
let requiredClauses = [
VersionedClause<ACCC_Attach>,
VersionedClause<ACCC_Copy>,
@@ -338,6 +345,7 @@ def ACC_Parallel : Directive<"parallel"> {
VersionedClause<ACCC_Present>,
VersionedClause<ACCC_Private>,
VersionedClause<ACCC_FirstPrivate>,
+ VersionedClause<ACCC_Reduction>,
VersionedClause<ACCC_Wait>
];
let allowedOnceClauses = [
@@ -346,7 +354,6 @@ def ACC_Parallel : Directive<"parallel"> {
VersionedClause<ACCC_If>,
VersionedClause<ACCC_NumGangs>,
VersionedClause<ACCC_NumWorkers>,
- VersionedClause<ACCC_Reduction>,
VersionedClause<ACCC_Self>,
VersionedClause<ACCC_VectorLength>
];
@@ -368,13 +375,13 @@ def ACC_Serial : Directive<"serial"> {
VersionedClause<ACCC_Present>,
VersionedClause<ACCC_Private>,
VersionedClause<ACCC_FirstPrivate>,
+ VersionedClause<ACCC_Reduction>,
VersionedClause<ACCC_Wait>
];
let allowedOnceClauses = [
VersionedClause<ACCC_Async>,
VersionedClause<ACCC_Default>,
VersionedClause<ACCC_If>,
- VersionedClause<ACCC_Reduction>,
VersionedClause<ACCC_Self>
];
}
@@ -383,12 +390,12 @@ def ACC_Serial : Directive<"serial"> {
def ACC_Loop : Directive<"loop"> {
let allowedClauses = [
VersionedClause<ACCC_DeviceType>,
- VersionedClause<ACCC_Private>
+ VersionedClause<ACCC_Private>,
+ VersionedClause<ACCC_Reduction>
];
let allowedOnceClauses = [
VersionedClause<ACCC_Collapse>,
VersionedClause<ACCC_Gang>,
- VersionedClause<ACCC_Reduction>,
VersionedClause<ACCC_Tile>,
VersionedClause<ACCC_Vector>,
VersionedClause<ACCC_Worker>
@@ -417,9 +424,7 @@ def ACC_Routine : Directive<"routine"> {
let allowedOnceClauses = [
VersionedClause<ACCC_Bind>,
VersionedClause<ACCC_DeviceType>,
- VersionedClause<ACCC_NoHost>
- ];
- let requiredClauses = [
+ VersionedClause<ACCC_NoHost>,
VersionedClause<ACCC_Gang>,
VersionedClause<ACCC_Seq>,
VersionedClause<ACCC_Vector>,
@@ -535,6 +540,7 @@ def ACC_KernelsLoop : Directive<"kernels loop"> {
VersionedClause<ACCC_NoCreate>,
VersionedClause<ACCC_Present>,
VersionedClause<ACCC_Private>,
+ VersionedClause<ACCC_Reduction>,
VersionedClause<ACCC_DevicePtr>,
VersionedClause<ACCC_Attach>,
VersionedClause<ACCC_Wait>
@@ -547,7 +553,6 @@ def ACC_KernelsLoop : Directive<"kernels loop"> {
VersionedClause<ACCC_If>,
VersionedClause<ACCC_NumGangs>,
VersionedClause<ACCC_NumWorkers>,
- VersionedClause<ACCC_Reduction>,
VersionedClause<ACCC_Self>,
VersionedClause<ACCC_Tile>,
VersionedClause<ACCC_Vector>,
@@ -575,6 +580,7 @@ def ACC_ParallelLoop : Directive<"parallel loop"> {
VersionedClause<ACCC_NoCreate>,
VersionedClause<ACCC_Present>,
VersionedClause<ACCC_Private>,
+ VersionedClause<ACCC_Reduction>,
VersionedClause<ACCC_Tile>,
VersionedClause<ACCC_Wait>
];
@@ -586,7 +592,6 @@ def ACC_ParallelLoop : Directive<"parallel loop"> {
VersionedClause<ACCC_If>,
VersionedClause<ACCC_NumGangs>,
VersionedClause<ACCC_NumWorkers>,
- VersionedClause<ACCC_Reduction>,
VersionedClause<ACCC_Self>,
VersionedClause<ACCC_Vector>,
VersionedClause<ACCC_VectorLength>,
@@ -613,6 +618,7 @@ def ACC_SerialLoop : Directive<"serial loop"> {
VersionedClause<ACCC_NoCreate>,
VersionedClause<ACCC_Present>,
VersionedClause<ACCC_Private>,
+ VersionedClause<ACCC_Reduction>,
VersionedClause<ACCC_Wait>
];
let allowedOnceClauses = [
@@ -621,7 +627,6 @@ def ACC_SerialLoop : Directive<"serial loop"> {
VersionedClause<ACCC_Default>,
VersionedClause<ACCC_Gang>,
VersionedClause<ACCC_If>,
- VersionedClause<ACCC_Reduction>,
VersionedClause<ACCC_Self>,
VersionedClause<ACCC_Tile>,
VersionedClause<ACCC_Vector>,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 9f732e8c6134..c67b54acc47c 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -222,6 +222,9 @@ def OMPC_Device : Clause<"device"> {
let clangClass = "OMPDeviceClause";
let flangClass = "OmpDeviceClause";
}
+def OMPC_DeviceType : Clause<"device_type"> {
+ let flangClass = "OmpDeviceTypeClause";
+}
def OMPC_Threads : Clause<"threads"> { let clangClass = "OMPThreadsClause"; }
def OMPC_Simd : Clause<"simd"> { let clangClass = "OMPSIMDClause"; }
def OMPC_Map : Clause<"map"> {
@@ -292,8 +295,7 @@ def OMPC_From : Clause<"from"> {
}
def OMPC_UseDevicePtr : Clause<"use_device_ptr"> {
let clangClass = "OMPUseDevicePtrClause";
- let flangClass = "Name";
- let isValueList = true;
+ let flangClass = "OmpObjectList";
}
def OMPC_IsDevicePtr : Clause<"is_device_ptr"> {
let clangClass = "OMPIsDevicePtrClause";
@@ -352,6 +354,7 @@ def OMP_ORDER_concurrent : ClauseVal<"concurrent",1,1> {}
def OMP_ORDER_unknown : ClauseVal<"unknown",2,0> { let isDefault = 1; }
def OMPC_Order : Clause<"order"> {
let clangClass = "OMPOrderClause";
+ let flangClass = "OmpOrderClause";
let enumClauseValue = "OrderKind";
let allowedClauseValues = [
OMP_ORDER_unknown,
@@ -392,12 +395,12 @@ def OMPC_Affinity : Clause<"affinity"> {
}
def OMPC_UseDeviceAddr : Clause<"use_device_addr"> {
let clangClass = "OMPUseDeviceAddrClause";
+ let flangClass = "OmpObjectList";
}
def OMPC_Uniform : Clause<"uniform"> {
let flangClass = "Name";
let isValueList = true;
}
-def OMPC_DeviceType : Clause<"device_type"> {}
def OMPC_Match : Clause<"match"> {}
def OMPC_AdjustArgs : Clause<"adjust_args"> { }
def OMPC_AppendArgs : Clause<"append_args"> { }
@@ -441,6 +444,10 @@ def OMPC_OMPX_DynCGroupMem : Clause<"ompx_dyn_cgroup_mem"> {
let flangClass = "ScalarIntExpr";
}
+def OMPC_Doacross : Clause<"doacross"> {
+ let clangClass = "OMPDoacrossClause";
+}
+
//===----------------------------------------------------------------------===//
// Definition of OpenMP directives
//===----------------------------------------------------------------------===//
@@ -491,13 +498,13 @@ def OMP_Simd : Directive<"simd"> {
VersionedClause<OMPC_Reduction>,
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_NonTemporal, 50>,
- VersionedClause<OMPC_Order, 50>
];
let allowedOnceClauses = [
VersionedClause<OMPC_Collapse>,
VersionedClause<OMPC_SafeLen>,
VersionedClause<OMPC_SimdLen>,
VersionedClause<OMPC_If, 50>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_Tile : Directive<"tile"> {
@@ -538,7 +545,8 @@ def OMP_Do : Directive<"do"> {
VersionedClause<OMPC_Schedule>,
VersionedClause<OMPC_Collapse>,
VersionedClause<OMPC_Ordered>,
- VersionedClause<OMPC_NoWait>
+ VersionedClause<OMPC_NoWait>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_Sections : Directive<"sections"> {
@@ -600,7 +608,8 @@ def OMP_Flush : Directive<"flush"> {
}
def OMP_Ordered : Directive<"ordered"> {
let allowedClauses = [
- VersionedClause<OMPC_Depend>
+ VersionedClause<OMPC_Depend>,
+ VersionedClause<OMPC_Doacross, 52>
];
let allowedOnceClauses = [
VersionedClause<OMPC_Threads>,
@@ -789,7 +798,6 @@ def OMP_TargetParallelDo : Directive<"target parallel do"> {
VersionedClause<OMPC_IsDevicePtr>,
VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Allocator>,
- VersionedClause<OMPC_Order>,
VersionedClause<OMPC_UsesAllocators>,
VersionedClause<OMPC_Default>,
VersionedClause<OMPC_Copyin>
@@ -803,7 +811,8 @@ def OMP_TargetParallelDo : Directive<"target parallel do"> {
VersionedClause<OMPC_Schedule>,
VersionedClause<OMPC_Collapse>,
VersionedClause<OMPC_Ordered>,
- VersionedClause<OMPC_NoWait>
+ VersionedClause<OMPC_NoWait>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_TargetUpdate : Directive<"target update"> {
@@ -855,7 +864,8 @@ def OMP_ParallelDo : Directive<"parallel do"> {
VersionedClause<OMPC_ProcBind>,
VersionedClause<OMPC_Schedule>,
VersionedClause<OMPC_Ordered>,
- VersionedClause<OMPC_Collapse>
+ VersionedClause<OMPC_Collapse>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_ParallelForSimd : Directive<"parallel for simd"> {
@@ -895,7 +905,6 @@ def OMP_ParallelDoSimd : Directive<"parallel do simd"> {
VersionedClause<OMPC_Aligned>,
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_NonTemporal>,
- VersionedClause<OMPC_Order>
];
let allowedOnceClauses = [
VersionedClause<OMPC_If>,
@@ -905,7 +914,8 @@ def OMP_ParallelDoSimd : Directive<"parallel do simd"> {
VersionedClause<OMPC_Ordered>,
VersionedClause<OMPC_Collapse>,
VersionedClause<OMPC_SafeLen>,
- VersionedClause<OMPC_SimdLen>
+ VersionedClause<OMPC_SimdLen>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_ParallelMaster : Directive<"parallel master"> {
@@ -971,7 +981,7 @@ def OMP_ForSimd : Directive<"for simd"> {
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_If, 50>,
VersionedClause<OMPC_NonTemporal, 50>,
- VersionedClause<OMPC_Order, 50>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_DoSimd : Directive<"do simd"> {
@@ -989,7 +999,8 @@ def OMP_DoSimd : Directive<"do simd"> {
VersionedClause<OMPC_Ordered>,
VersionedClause<OMPC_SafeLen>,
VersionedClause<OMPC_SimdLen>,
- VersionedClause<OMPC_NoWait>
+ VersionedClause<OMPC_NoWait>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_CancellationPoint : Directive<"cancellation point"> {}
@@ -1050,7 +1061,6 @@ def OMP_TaskLoopSimd : Directive<"taskloop simd"> {
VersionedClause<OMPC_Mergeable>,
VersionedClause<OMPC_NoGroup>,
VersionedClause<OMPC_NonTemporal, 50>,
- VersionedClause<OMPC_Order, 50>,
VersionedClause<OMPC_Private>,
VersionedClause<OMPC_Reduction>,
VersionedClause<OMPC_Shared>,
@@ -1062,7 +1072,8 @@ def OMP_TaskLoopSimd : Directive<"taskloop simd"> {
VersionedClause<OMPC_SafeLen>,
VersionedClause<OMPC_SimdLen>,
VersionedClause<OMPC_Final>,
- VersionedClause<OMPC_Priority>
+ VersionedClause<OMPC_Priority>,
+ VersionedClause<OMPC_Order, 50>
];
let allowedExclusiveClauses = [
VersionedClause<OMPC_GrainSize>,
@@ -1095,6 +1106,9 @@ def OMP_DeclareTarget : Directive<"declare target"> {
VersionedClause<OMPC_Link>,
VersionedClause<OMPC_Indirect>
];
+ let allowedOnceClauses = [
+ VersionedClause<OMPC_DeviceType, 50>
+ ];
}
def OMP_EndDeclareTarget : Directive<"end declare target"> {}
def OMP_DistributeParallelFor : Directive<"distribute parallel for"> {
@@ -1122,7 +1136,6 @@ def OMP_DistributeParallelDo : Directive<"distribute parallel do"> {
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_LastPrivate>,
VersionedClause<OMPC_Allocate>,
- VersionedClause<OMPC_Order>,
VersionedClause<OMPC_Default>,
VersionedClause<OMPC_Shared>,
VersionedClause<OMPC_Reduction>,
@@ -1136,7 +1149,8 @@ def OMP_DistributeParallelDo : Directive<"distribute parallel do"> {
VersionedClause<OMPC_NumThreads>,
VersionedClause<OMPC_ProcBind>,
VersionedClause<OMPC_Schedule>,
- VersionedClause<OMPC_Ordered>
+ VersionedClause<OMPC_Ordered>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_DistributeParallelForSimd : Directive<"distribute parallel for simd"> {
@@ -1184,7 +1198,7 @@ def OMP_DistributeParallelDoSimd : Directive<"distribute parallel do simd"> {
VersionedClause<OMPC_SimdLen>,
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_NonTemporal>,
- VersionedClause<OMPC_Order>
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_DistributeSimd : Directive<"distribute simd"> {
@@ -1197,7 +1211,6 @@ def OMP_DistributeSimd : Directive<"distribute simd"> {
VersionedClause<OMPC_FirstPrivate>,
VersionedClause<OMPC_LastPrivate>,
VersionedClause<OMPC_NonTemporal, 50>,
- VersionedClause<OMPC_Order, 50>,
VersionedClause<OMPC_Private>,
VersionedClause<OMPC_Reduction>
];
@@ -1210,7 +1223,8 @@ def OMP_DistributeSimd : Directive<"distribute simd"> {
VersionedClause<OMPC_ProcBind>,
VersionedClause<OMPC_Schedule>,
VersionedClause<OMPC_SafeLen>,
- VersionedClause<OMPC_SimdLen>
+ VersionedClause<OMPC_SimdLen>,
+ VersionedClause<OMPC_Order, 50>
];
}
@@ -1275,7 +1289,7 @@ def OMP_TargetParallelDoSimd : Directive<"target parallel do simd"> {
VersionedClause<OMPC_HasDeviceAddr, 51>,
VersionedClause<OMPC_Allocate>,
VersionedClause<OMPC_NonTemporal>,
- VersionedClause<OMPC_Order>,
+ VersionedClause<OMPC_Order, 50>,
VersionedClause<OMPC_UsesAllocators>
];
}
@@ -1292,7 +1306,6 @@ def OMP_TargetSimd : Directive<"target simd"> {
VersionedClause<OMPC_Map>,
VersionedClause<OMPC_NonTemporal, 50>,
VersionedClause<OMPC_NoWait>,
- VersionedClause<OMPC_Order, 50>,
VersionedClause<OMPC_Private>,
VersionedClause<OMPC_Reduction>,
VersionedClause<OMPC_Shared>,
@@ -1309,6 +1322,7 @@ def OMP_TargetSimd : Directive<"target simd"> {
VersionedClause<OMPC_DefaultMap>,
VersionedClause<OMPC_Schedule>,
VersionedClause<OMPC_OMPX_DynCGroupMem>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_TeamsDistribute : Directive<"teams distribute"> {
@@ -1334,7 +1348,6 @@ def OMP_TeamsDistributeSimd : Directive<"teams distribute simd"> {
VersionedClause<OMPC_LastPrivate>,
VersionedClause<OMPC_Linear>,
VersionedClause<OMPC_NonTemporal, 50>,
- VersionedClause<OMPC_Order, 50>,
VersionedClause<OMPC_Private>,
VersionedClause<OMPC_Reduction>,
VersionedClause<OMPC_Shared>
@@ -1347,7 +1360,8 @@ def OMP_TeamsDistributeSimd : Directive<"teams distribute simd"> {
VersionedClause<OMPC_NumTeams>,
VersionedClause<OMPC_SafeLen>,
VersionedClause<OMPC_SimdLen>,
- VersionedClause<OMPC_ThreadLimit>
+ VersionedClause<OMPC_ThreadLimit>,
+ VersionedClause<OMPC_Order, 50>
];
}
@@ -1387,7 +1401,6 @@ def OMP_TeamsDistributeParallelDoSimd :
VersionedClause<OMPC_Shared>,
VersionedClause<OMPC_Reduction>,
VersionedClause<OMPC_Linear>,
- VersionedClause<OMPC_Order>,
VersionedClause<OMPC_Aligned>,
VersionedClause<OMPC_NonTemporal>
];
@@ -1403,6 +1416,7 @@ def OMP_TeamsDistributeParallelDoSimd :
VersionedClause<OMPC_SafeLen>,
VersionedClause<OMPC_SimdLen>,
VersionedClause<OMPC_If>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_TeamsDistributeParallelFor :
@@ -1446,11 +1460,11 @@ let allowedOnceClauses = [
VersionedClause<OMPC_Collapse>,
VersionedClause<OMPC_DistSchedule>,
VersionedClause<OMPC_Ordered>,
- VersionedClause<OMPC_Order>,
VersionedClause<OMPC_If>,
VersionedClause<OMPC_NumThreads>,
VersionedClause<OMPC_ProcBind>,
- VersionedClause<OMPC_Schedule>
+ VersionedClause<OMPC_Schedule>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_TargetTeams : Directive<"target teams"> {
@@ -1556,7 +1570,6 @@ def OMP_TargetTeamsDistributeParallelDo :
VersionedClause<OMPC_Copyin>,
VersionedClause<OMPC_Linear>,
VersionedClause<OMPC_Ordered>,
- VersionedClause<OMPC_Order>
];
let allowedOnceClauses = [
VersionedClause<OMPC_Device>,
@@ -1570,6 +1583,7 @@ def OMP_TargetTeamsDistributeParallelDo :
VersionedClause<OMPC_NumThreads>,
VersionedClause<OMPC_ProcBind>,
VersionedClause<OMPC_Schedule>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_TargetTeamsDistributeParallelForSimd :
@@ -1626,7 +1640,6 @@ def OMP_TargetTeamsDistributeParallelDoSimd :
VersionedClause<OMPC_Copyin>,
VersionedClause<OMPC_Linear>,
VersionedClause<OMPC_Ordered>,
- VersionedClause<OMPC_Order>,
VersionedClause<OMPC_Aligned>,
VersionedClause<OMPC_NonTemporal>
];
@@ -1644,7 +1657,8 @@ def OMP_TargetTeamsDistributeParallelDoSimd :
VersionedClause<OMPC_ProcBind>,
VersionedClause<OMPC_Schedule>,
VersionedClause<OMPC_SafeLen>,
- VersionedClause<OMPC_SimdLen>
+ VersionedClause<OMPC_SimdLen>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_TargetTeamsDistributeSimd :
@@ -1661,7 +1675,6 @@ def OMP_TargetTeamsDistributeSimd :
VersionedClause<OMPC_Linear>,
VersionedClause<OMPC_Map>,
VersionedClause<OMPC_NonTemporal, 50>,
- VersionedClause<OMPC_Order, 50>,
VersionedClause<OMPC_Private>,
VersionedClause<OMPC_Reduction>,
VersionedClause<OMPC_Shared>,
@@ -1678,6 +1691,7 @@ def OMP_TargetTeamsDistributeSimd :
VersionedClause<OMPC_SafeLen>,
VersionedClause<OMPC_SimdLen>,
VersionedClause<OMPC_OMPX_DynCGroupMem>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_Allocate : Directive<"allocate"> {
@@ -1996,7 +2010,7 @@ def OMP_loop : Directive<"loop"> {
let allowedOnceClauses = [
VersionedClause<OMPC_Bind, 50>,
VersionedClause<OMPC_Collapse>,
- VersionedClause<OMPC_Order>,
+ VersionedClause<OMPC_Order, 50>
];
}
def OMP_teams_loop : Directive<"teams loop"> {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
index afdbc4d9788d..32dcdd587f3b 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h
@@ -72,6 +72,9 @@ enum class IdentFlag {
#define OMP_IDENT_FLAG(Enum, ...) constexpr auto Enum = omp::IdentFlag::Enum;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
+// Version of the kernel argument format used by the omp runtime.
+#define OMP_KERNEL_ARG_VERSION 2
+
/// \note This needs to be kept in sync with kmp.h enum sched_type.
/// Todo: Update kmp.h to include this file, and remove the enums in kmp.h
enum class OMPScheduleType {
@@ -241,6 +244,12 @@ enum class OpenMPOffloadMappingFlags : uint64_t {
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF)
};
+enum OpenMPOffloadingReservedDeviceIDs {
+ /// Device ID if the device was not defined, runtime should get it
+ /// from environment variables in the spec.
+ OMP_DEVICEID_UNDEF = -1
+};
+
enum class AddressSpace : unsigned {
Generic = 0,
Global = 1,
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
index 93464063dfaf..bfac2d734b81 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -85,23 +85,23 @@ struct GV {
/// For AMDGPU GPUs
static constexpr GV AMDGPUGridValues64 = {
- 256, // GV_Slot_Size
- 64, // GV_Warp_Size
+ 256, // GV_Slot_Size
+ 64, // GV_Warp_Size
(1 << 16), // GV_Max_Teams
- 440, // GV_Default_Num_Teams
- 896, // GV_SimpleBufferSize
- 1024, // GV_Max_WG_Size,
- 256, // GV_Default_WG_Size
+ 440, // GV_Default_Num_Teams
+ 896, // GV_SimpleBufferSize
+ 1024, // GV_Max_WG_Size,
+ 256, // GV_Default_WG_Size
};
static constexpr GV AMDGPUGridValues32 = {
- 256, // GV_Slot_Size
- 32, // GV_Warp_Size
+ 256, // GV_Slot_Size
+ 32, // GV_Warp_Size
(1 << 16), // GV_Max_Teams
- 440, // GV_Default_Num_Teams
- 896, // GV_SimpleBufferSize
- 1024, // GV_Max_WG_Size,
- 256, // GV_Default_WG_Size
+ 440, // GV_Default_Num_Teams
+ 896, // GV_SimpleBufferSize
+ 1024, // GV_Max_WG_Size,
+ 256, // GV_Default_WG_Size
};
template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
@@ -111,13 +111,13 @@ template <unsigned wavesize> constexpr const GV &getAMDGPUGridValues() {
/// For Nvidia GPUs
static constexpr GV NVPTXGridValues = {
- 256, // GV_Slot_Size
- 32, // GV_Warp_Size
+ 256, // GV_Slot_Size
+ 32, // GV_Warp_Size
(1 << 16), // GV_Max_Teams
- 3200, // GV_Default_Num_Teams
- 896, // GV_SimpleBufferSize
- 1024, // GV_Max_WG_Size
- 128, // GV_Default_WG_Size
+ 3200, // GV_Default_Num_Teams
+ 896, // GV_SimpleBufferSize
+ 1024, // GV_Max_WG_Size
+ 128, // GV_Default_WG_Size
};
} // namespace omp
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 84c062978a32..fc838765949c 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -27,6 +27,7 @@ namespace llvm {
class CanonicalLoopInfo;
struct TargetRegionEntryInfo;
class OffloadEntriesInfoManager;
+class OpenMPIRBuilder;
/// Move the instruction after an InsertPoint to the beginning of another
/// BasicBlock.
@@ -83,11 +84,10 @@ class OpenMPIRBuilderConfig {
public:
/// Flag for specifying if the compilation is done for embedded device code
/// or host code.
- std::optional<bool> IsEmbedded;
+ std::optional<bool> IsTargetDevice;
- /// Flag for specifying if the compilation is done for an offloading target,
- /// like GPU.
- std::optional<bool> IsTargetCodegen;
+ /// Flag for specifying if the compilation is done for an accelerator.
+ std::optional<bool> IsGPU;
/// Flag for specifying weather a requires unified_shared_memory
/// directive is present or not.
@@ -102,22 +102,22 @@ public:
std::optional<StringRef> Separator;
OpenMPIRBuilderConfig() {}
- OpenMPIRBuilderConfig(bool IsEmbedded, bool IsTargetCodegen,
+ OpenMPIRBuilderConfig(bool IsTargetDevice, bool IsGPU,
bool HasRequiresUnifiedSharedMemory,
bool OpenMPOffloadMandatory)
- : IsEmbedded(IsEmbedded), IsTargetCodegen(IsTargetCodegen),
+ : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
HasRequiresUnifiedSharedMemory(HasRequiresUnifiedSharedMemory),
OpenMPOffloadMandatory(OpenMPOffloadMandatory) {}
// Getters functions that assert if the required values are not present.
- bool isEmbedded() const {
- assert(IsEmbedded.has_value() && "IsEmbedded is not set");
- return *IsEmbedded;
+ bool isTargetDevice() const {
+ assert(IsTargetDevice.has_value() && "IsTargetDevice is not set");
+ return *IsTargetDevice;
}
- bool isTargetCodegen() const {
- assert(IsTargetCodegen.has_value() && "IsTargetCodegen is not set");
- return *IsTargetCodegen;
+ bool isGPU() const {
+ assert(IsGPU.has_value() && "IsGPU is not set");
+ return *IsGPU;
}
bool hasRequiresUnifiedSharedMemory() const {
@@ -131,28 +131,28 @@ public:
"OpenMPOffloadMandatory is not set");
return *OpenMPOffloadMandatory;
}
- // Returns the FirstSeparator if set, otherwise use the default
- // separator depending on isTargetCodegen
+ // Returns the FirstSeparator if set, otherwise use the default separator
+ // depending on isGPU
StringRef firstSeparator() const {
if (FirstSeparator.has_value())
return *FirstSeparator;
- if (isTargetCodegen())
+ if (isGPU())
return "_";
return ".";
}
- // Returns the Separator if set, otherwise use the default
- // separator depending on isTargetCodegen
+ // Returns the Separator if set, otherwise use the default separator depending
+ // on isGPU
StringRef separator() const {
if (Separator.has_value())
return *Separator;
- if (isTargetCodegen())
+ if (isGPU())
return "$";
return ".";
}
- void setIsEmbedded(bool Value) { IsEmbedded = Value; }
- void setIsTargetCodegen(bool Value) { IsTargetCodegen = Value; }
+ void setIsTargetDevice(bool Value) { IsTargetDevice = Value; }
+ void setIsGPU(bool Value) { IsGPU = Value; }
void setHasRequiresUnifiedSharedMemory(bool Value) {
HasRequiresUnifiedSharedMemory = Value;
}
@@ -160,6 +160,270 @@ public:
void setSeparator(StringRef S) { Separator = S; }
};
+/// Data structure to contain the information needed to uniquely identify
+/// a target entry.
+struct TargetRegionEntryInfo {
+ std::string ParentName;
+ unsigned DeviceID;
+ unsigned FileID;
+ unsigned Line;
+ unsigned Count;
+
+ TargetRegionEntryInfo() : DeviceID(0), FileID(0), Line(0), Count(0) {}
+ TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID,
+ unsigned FileID, unsigned Line, unsigned Count = 0)
+ : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line),
+ Count(Count) {}
+
+ static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
+ StringRef ParentName,
+ unsigned DeviceID, unsigned FileID,
+ unsigned Line, unsigned Count);
+
+ bool operator<(const TargetRegionEntryInfo RHS) const {
+ return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
+ std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
+ RHS.Count);
+ }
+};
+
+/// Class that manages information about offload code regions and data
+class OffloadEntriesInfoManager {
+ /// Number of entries registered so far.
+ OpenMPIRBuilder *OMPBuilder;
+ unsigned OffloadingEntriesNum = 0;
+
+public:
+ /// Base class of the entries info.
+ class OffloadEntryInfo {
+ public:
+ /// Kind of a given entry.
+ enum OffloadingEntryInfoKinds : unsigned {
+ /// Entry is a target region.
+ OffloadingEntryInfoTargetRegion = 0,
+ /// Entry is a declare target variable.
+ OffloadingEntryInfoDeviceGlobalVar = 1,
+ /// Invalid entry info.
+ OffloadingEntryInfoInvalid = ~0u
+ };
+
+ protected:
+ OffloadEntryInfo() = delete;
+ explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
+ explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
+ uint32_t Flags)
+ : Flags(Flags), Order(Order), Kind(Kind) {}
+ ~OffloadEntryInfo() = default;
+
+ public:
+ bool isValid() const { return Order != ~0u; }
+ unsigned getOrder() const { return Order; }
+ OffloadingEntryInfoKinds getKind() const { return Kind; }
+ uint32_t getFlags() const { return Flags; }
+ void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
+ Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
+ void setAddress(Constant *V) {
+ assert(!Addr.pointsToAliveValue() && "Address has been set before!");
+ Addr = V;
+ }
+ static bool classof(const OffloadEntryInfo *Info) { return true; }
+
+ private:
+ /// Address of the entity that has to be mapped for offloading.
+ WeakTrackingVH Addr;
+
+ /// Flags associated with the device global.
+ uint32_t Flags = 0u;
+
+ /// Order this entry was emitted.
+ unsigned Order = ~0u;
+
+ OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid;
+ };
+
+ /// Return true if a there are no entries defined.
+ bool empty() const;
+ /// Return number of entries defined so far.
+ unsigned size() const { return OffloadingEntriesNum; }
+
+ OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {}
+
+ //
+ // Target region entries related.
+ //
+
+ /// Kind of the target registry entry.
+ enum OMPTargetRegionEntryKind : uint32_t {
+ /// Mark the entry as target region.
+ OMPTargetRegionEntryTargetRegion = 0x0,
+ /// Mark the entry as a global constructor.
+ OMPTargetRegionEntryCtor = 0x02,
+ /// Mark the entry as a global destructor.
+ OMPTargetRegionEntryDtor = 0x04,
+ };
+
+ /// Target region entries info.
+ class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo {
+ /// Address that can be used as the ID of the entry.
+ Constant *ID = nullptr;
+
+ public:
+ OffloadEntryInfoTargetRegion()
+ : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {}
+ explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
+ Constant *ID,
+ OMPTargetRegionEntryKind Flags)
+ : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags),
+ ID(ID) {
+ setAddress(Addr);
+ }
+
+ Constant *getID() const { return ID; }
+ void setID(Constant *V) {
+ assert(!ID && "ID has been set before!");
+ ID = V;
+ }
+ static bool classof(const OffloadEntryInfo *Info) {
+ return Info->getKind() == OffloadingEntryInfoTargetRegion;
+ }
+ };
+
+ /// Initialize target region entry.
+ /// This is ONLY needed for DEVICE compilation.
+ void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo,
+ unsigned Order);
+ /// Register target region entry.
+ void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
+ Constant *Addr, Constant *ID,
+ OMPTargetRegionEntryKind Flags);
+ /// Return true if a target region entry with the provided information
+ /// exists.
+ bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
+ bool IgnoreAddressId = false) const;
+
+ // Return the Name based on \a EntryInfo using the next available Count.
+ void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
+ const TargetRegionEntryInfo &EntryInfo);
+
+ /// brief Applies action \a Action on all registered entries.
+ typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
+ const OffloadEntryInfoTargetRegion &)>
+ OffloadTargetRegionEntryInfoActTy;
+ void
+ actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action);
+
+ //
+ // Device global variable entries related.
+ //
+
+ /// Kind of the global variable entry..
+ enum OMPTargetGlobalVarEntryKind : uint32_t {
+ /// Mark the entry as a to declare target.
+ OMPTargetGlobalVarEntryTo = 0x0,
+ /// Mark the entry as a to declare target link.
+ OMPTargetGlobalVarEntryLink = 0x1,
+ /// Mark the entry as a declare target enter.
+ OMPTargetGlobalVarEntryEnter = 0x2,
+ /// Mark the entry as having no declare target entry kind.
+ OMPTargetGlobalVarEntryNone = 0x3,
+ };
+
+ /// Kind of device clause for declare target variables
+ /// and functions
+ /// NOTE: Currently not used as a part of a variable entry
+ /// used for Flang and Clang to interface with the variable
+ /// related registration functions
+ enum OMPTargetDeviceClauseKind : uint32_t {
+ /// The target is marked for all devices
+ OMPTargetDeviceClauseAny = 0x0,
+ /// The target is marked for non-host devices
+ OMPTargetDeviceClauseNoHost = 0x1,
+ /// The target is marked for host devices
+ OMPTargetDeviceClauseHost = 0x2,
+ /// The target is marked as having no clause
+ OMPTargetDeviceClauseNone = 0x3
+ };
+
+ /// Device global variable entries info.
+ class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo {
+ /// Type of the global variable.
+ int64_t VarSize;
+ GlobalValue::LinkageTypes Linkage;
+
+ public:
+ OffloadEntryInfoDeviceGlobalVar()
+ : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {}
+ explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
+ OMPTargetGlobalVarEntryKind Flags)
+ : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {}
+ explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
+ int64_t VarSize,
+ OMPTargetGlobalVarEntryKind Flags,
+ GlobalValue::LinkageTypes Linkage)
+ : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags),
+ VarSize(VarSize), Linkage(Linkage) {
+ setAddress(Addr);
+ }
+
+ int64_t getVarSize() const { return VarSize; }
+ void setVarSize(int64_t Size) { VarSize = Size; }
+ GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
+ void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
+ static bool classof(const OffloadEntryInfo *Info) {
+ return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
+ }
+ };
+
+ /// Initialize device global variable entry.
+ /// This is ONLY used for DEVICE compilation.
+ void initializeDeviceGlobalVarEntryInfo(StringRef Name,
+ OMPTargetGlobalVarEntryKind Flags,
+ unsigned Order);
+
+ /// Register device global variable entry.
+ void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr,
+ int64_t VarSize,
+ OMPTargetGlobalVarEntryKind Flags,
+ GlobalValue::LinkageTypes Linkage);
+ /// Checks if the variable with the given name has been registered already.
+ bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const {
+ return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
+ }
+ /// Applies action \a Action on all registered entries.
+ typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
+ OffloadDeviceGlobalVarEntryInfoActTy;
+ void actOnDeviceGlobalVarEntriesInfo(
+ const OffloadDeviceGlobalVarEntryInfoActTy &Action);
+
+private:
+ /// Return the count of entries at a particular source location.
+ unsigned
+ getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
+
+ /// Update the count of entries at a particular source location.
+ void
+ incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
+
+ static TargetRegionEntryInfo
+ getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
+ return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
+ EntryInfo.FileID, EntryInfo.Line, 0);
+ }
+
+ // Count of entries at a location.
+ std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
+
+ // Storage for target region entries kind.
+ typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
+ OffloadEntriesTargetRegionTy;
+ OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
+ /// Storage for device global variable entries kind. The storage is to be
+ /// indexed by mangled name.
+ typedef StringMap<OffloadEntryInfoDeviceGlobalVar>
+ OffloadEntriesDeviceGlobalVarTy;
+ OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
+};
+
/// An interface to create LLVM-IR for OpenMP directives.
///
/// Each OpenMP directive has a corresponding public generator method.
@@ -167,13 +431,20 @@ class OpenMPIRBuilder {
public:
/// Create a new OpenMPIRBuilder operating on the given module \p M. This will
/// not have an effect on \p M (see initialize)
- OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
+ OpenMPIRBuilder(Module &M)
+ : M(M), Builder(M.getContext()), OffloadInfoManager(this) {}
~OpenMPIRBuilder();
/// Initialize the internal state, this will put structures types and
/// potentially other helpers into the underlying module. Must be called
- /// before any other method and only once!
- void initialize();
+ /// before any other method and only once! This internal state includes
+ /// Types used in the OpenMPIRBuilder generated from OMPKinds.def as well
+ /// as loading offload metadata for device from the OpenMP host IR file
+ /// passed in as the HostFilePath argument.
+ /// \param HostFilePath The path to the host IR file, used to load in
+ /// offload metadata for the device, allowing host and device to
+ /// maintain the same metadata mapping.
+ void initialize(StringRef HostFilePath = {});
void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
@@ -502,6 +773,102 @@ public:
ArrayRef<CanonicalLoopInfo *> Loops,
InsertPointTy ComputeIP);
+ /// Get the default alignment value for given target
+ ///
+ /// \param TargetTriple Target triple
+ /// \param Features StringMap which describes extra CPU features
+ static unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
+ const StringMap<bool> &Features);
+
+ /// Retrieve (or create if non-existent) the address of a declare
+ /// target variable, used in conjunction with registerTargetGlobalVariable
+ /// to create declare target global variables.
+ ///
+ /// \param CaptureClause - enumerator corresponding to the OpenMP capture
+ /// clause used in conjunction with the variable being registered (link,
+ /// to, enter).
+ /// \param DeviceClause - enumerator corresponding to the OpenMP capture
+ /// clause used in conjunction with the variable being registered (nohost,
+ /// host, any)
+ /// \param IsDeclaration - boolean stating if the variable being registered
+ /// is a declaration-only and not a definition
+ /// \param IsExternallyVisible - boolean stating if the variable is externally
+ /// visible
+ /// \param EntryInfo - Unique entry information for the value generated
+ /// using getTargetEntryUniqueInfo, used to name generated pointer references
+ /// to the declare target variable
+ /// \param MangledName - the mangled name of the variable being registered
+ /// \param GeneratedRefs - references generated by invocations of
+ /// registerTargetGlobalVariable invoked from getAddrOfDeclareTargetVar,
+ /// these are required by Clang for book keeping.
+ /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
+ /// \param TargetTriple - The OpenMP device target triple we are compiling
+ /// for
+ /// \param LlvmPtrTy - The type of the variable we are generating or
+ /// retrieving an address for
+ /// \param GlobalInitializer - a lambda function which creates a constant
+ /// used for initializing a pointer reference to the variable in certain
+ /// cases. If a nullptr is passed, it will default to utilising the original
+ /// variable to initialize the pointer reference.
+ /// \param VariableLinkage - a lambda function which returns the variables
+ /// linkage type, if unspecified and a nullptr is given, it will instead
+ /// utilise the linkage stored on the existing global variable in the
+ /// LLVMModule.
+ Constant *getAddrOfDeclareTargetVar(
+ OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
+ OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
+ bool IsDeclaration, bool IsExternallyVisible,
+ TargetRegionEntryInfo EntryInfo, StringRef MangledName,
+ std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
+ std::vector<Triple> TargetTriple, Type *LlvmPtrTy,
+ std::function<Constant *()> GlobalInitializer,
+ std::function<GlobalValue::LinkageTypes()> VariableLinkage);
+
+ /// Registers a target variable for device or host.
+ ///
+ /// \param CaptureClause - enumerator corresponding to the OpenMP capture
+ /// clause used in conjunction with the variable being registered (link,
+ /// to, enter).
+ /// \param DeviceClause - enumerator corresponding to the OpenMP capture
+ /// clause used in conjunction with the variable being registered (nohost,
+ /// host, any)
+ /// \param IsDeclaration - boolean stating if the variable being registered
+ /// is a declaration-only and not a definition
+ /// \param IsExternallyVisible - boolean stating if the variable is externally
+ /// visible
+ /// \param EntryInfo - Unique entry information for the value generated
+ /// using getTargetEntryUniqueInfo, used to name generated pointer references
+ /// to the declare target variable
+ /// \param MangledName - the mangled name of the variable being registered
+ /// \param GeneratedRefs - references generated by invocations of
+ /// registerTargetGlobalVariable these are required by Clang for book
+ /// keeping.
+ /// \param OpenMPSIMD - if OpenMP SIMD mode is currently enabled
+ /// \param TargetTriple - The OpenMP device target triple we are compiling
+ /// for
+ /// \param GlobalInitializer - a lambda function which creates a constant
+ /// used for initializing a pointer reference to the variable in certain
+ /// cases. If a nullptr is passed, it will default to utilising the original
+ /// variable to initialize the pointer reference.
+ /// \param VariableLinkage - a lambda function which returns the variables
+ /// linkage type, if unspecified and a nullptr is given, it will instead
+ /// utilise the linkage stored on the existing global variable in the
+ /// LLVMModule.
+ /// \param LlvmPtrTy - The type of the variable we are generating or
+ /// retrieving an address for
+ /// \param Addr - the original llvm value (addr) of the variable to be
+ /// registered
+ void registerTargetGlobalVariable(
+ OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
+ OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
+ bool IsDeclaration, bool IsExternallyVisible,
+ TargetRegionEntryInfo EntryInfo, StringRef MangledName,
+ std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
+ std::vector<Triple> TargetTriple,
+ std::function<Constant *()> GlobalInitializer,
+ std::function<GlobalValue::LinkageTypes()> VariableLinkage,
+ Type *LlvmPtrTy, Constant *Addr);
+
private:
/// Modifies the canonical loop to be a statically-scheduled workshare loop.
///
@@ -787,6 +1154,20 @@ public:
InsertPointTy AllocaIP,
BodyGenCallbackTy BodyGenCB);
+
+ using FileIdentifierInfoCallbackTy = std::function<std::tuple<std::string, uint64_t>()>;
+
+ /// Creates a unique info for a target entry when provided a filename and
+ /// line number from.
+ ///
+ /// \param CallBack A callback function which should return filename the entry
+ /// resides in as well as the line number for the target entry
+ /// \param ParentName The name of the parent the target entry resides in, if
+ /// any.
+ static TargetRegionEntryInfo
+ getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
+ StringRef ParentName = "");
+
/// Functions used to generate reductions. Such functions take two Values
/// representing LHS and RHS of the reduction, respectively, and a reference
/// to the value that is updated to refer to the reduction result.
@@ -807,10 +1188,7 @@ public:
AtomicReductionGenTy AtomicReductionGen)
: ElementType(ElementType), Variable(Variable),
PrivateVariable(PrivateVariable), ReductionGen(ReductionGen),
- AtomicReductionGen(AtomicReductionGen) {
- assert(cast<PointerType>(Variable->getType())
- ->isOpaqueOrPointeeTypeMatches(ElementType) && "Invalid elem type");
- }
+ AtomicReductionGen(AtomicReductionGen) {}
/// Reduction element type, must match pointee type of variable.
Type *ElementType;
@@ -981,6 +1359,7 @@ public:
/// Generate a target region entry call.
///
/// \param Loc The location at which the request originated and is fulfilled.
+ /// \param AllocaIP The insertion point to be used for alloca instructions.
/// \param Return Return value of the created function returned by reference.
/// \param DeviceID Identifier for the device via the 'device' clause.
/// \param NumTeams Numer of teams for the region via the 'num_teams' clause
@@ -988,7 +1367,8 @@ public:
/// \param NumThreads Number of threads via the 'thread_limit' clause.
/// \param HostPtr Pointer to the host-side pointer of the target kernel.
/// \param KernelArgs Array of arguments to the kernel.
- InsertPointTy emitTargetKernel(const LocationDescription &Loc, Value *&Return,
+ InsertPointTy emitTargetKernel(const LocationDescription &Loc,
+ InsertPointTy AllocaIP, Value *&Return,
Value *Ident, Value *DeviceID, Value *NumTeams,
Value *NumThreads, Value *HostPtr,
ArrayRef<Value *> KernelArgs);
@@ -1054,6 +1434,9 @@ public:
/// Map to remember existing ident_t*.
DenseMap<std::pair<Constant *, uint64_t>, Constant *> IdentMap;
+ /// Info manager to keep track of target regions.
+ OffloadEntriesInfoManager OffloadInfoManager;
+
/// Helper that contains information about regions we need to outline
/// during finalization.
struct OutlineInfo {
@@ -1086,7 +1469,29 @@ public:
/// <critical_section_name> + ".var" for "omp critical" directives; 2)
/// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
/// variables.
- StringMap<Constant*, BumpPtrAllocator> InternalVars;
+ StringMap<GlobalVariable *, BumpPtrAllocator> InternalVars;
+
+ /// Computes the size of type in bytes.
+ Value *getSizeInBytes(Value *BasePtr);
+
+ // Emit a branch from the current block to the Target block only if
+ // the current block has a terminator.
+ void emitBranch(BasicBlock *Target);
+
+ // If BB has no use then delete it and return. Else place BB after the current
+ // block, if possible, or else at the end of the function. Also add a branch
+ // from current block to BB if current block does not have a terminator.
+ void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished = false);
+
+ /// Emits code for OpenMP 'if' clause using specified \a BodyGenCallbackTy
+ /// Here is the logic:
+ /// if (Cond) {
+ /// ThenGen();
+ /// } else {
+ /// ElseGen();
+ /// }
+ void emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
+ BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP = {});
/// Create the global variable holding the offload mappings information.
GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
@@ -1124,7 +1529,6 @@ public:
/// Container for the arguments used to pass data to the runtime library.
struct TargetDataRTArgs {
- explicit TargetDataRTArgs() {}
/// The array of base pointer passed to the runtime library.
Value *BasePointersArray = nullptr;
/// The array of section pointers passed to the runtime library.
@@ -1144,8 +1548,53 @@ public:
/// The array of original declaration names of mapped pointers sent to the
/// runtime library for debugging
Value *MapNamesArray = nullptr;
+
+ explicit TargetDataRTArgs() {}
+ explicit TargetDataRTArgs(Value *BasePointersArray, Value *PointersArray,
+ Value *SizesArray, Value *MapTypesArray,
+ Value *MapTypesArrayEnd, Value *MappersArray,
+ Value *MapNamesArray)
+ : BasePointersArray(BasePointersArray), PointersArray(PointersArray),
+ SizesArray(SizesArray), MapTypesArray(MapTypesArray),
+ MapTypesArrayEnd(MapTypesArrayEnd), MappersArray(MappersArray),
+ MapNamesArray(MapNamesArray) {}
};
+ /// Data structure that contains the needed information to construct the
+ /// kernel args vector.
+ struct TargetKernelArgs {
+ /// Number of arguments passed to the runtime library.
+ unsigned NumTargetItems;
+ /// Arguments passed to the runtime library
+ TargetDataRTArgs RTArgs;
+ /// The number of iterations
+ Value *NumIterations;
+ /// The number of teams.
+ Value *NumTeams;
+ /// The number of threads.
+ Value *NumThreads;
+ /// The size of the dynamic shared memory.
+ Value *DynCGGroupMem;
+ /// True if the kernel has 'no wait' clause.
+ bool HasNoWait;
+
+ /// Constructor for TargetKernelArgs
+ TargetKernelArgs(unsigned NumTargetItems, TargetDataRTArgs RTArgs,
+ Value *NumIterations, Value *NumTeams, Value *NumThreads,
+ Value *DynCGGroupMem, bool HasNoWait)
+ : NumTargetItems(NumTargetItems), RTArgs(RTArgs),
+ NumIterations(NumIterations), NumTeams(NumTeams),
+ NumThreads(NumThreads), DynCGGroupMem(DynCGGroupMem),
+ HasNoWait(HasNoWait) {}
+ };
+
+ /// Create the kernel args vector used by emitTargetKernel. This function
+ /// creates various constant values that are used in the resulting args
+ /// vector.
+ static void getKernelArgsVector(TargetKernelArgs &KernelArgs,
+ IRBuilderBase &Builder,
+ SmallVector<Value *> &ArgsVector);
+
/// Struct that keeps the information that should be kept throughout
/// a 'target data' region.
class TargetDataInfo {
@@ -1158,6 +1607,9 @@ public:
public:
TargetDataRTArgs RTArgs;
+ SmallMapVector<const Value *, std::pair<Value *, Value *>, 4>
+ DevicePtrInfoMap;
+
/// Indicate whether any user-defined mapper exists.
bool HasMapper = false;
/// The total number of pointers passed to the runtime library.
@@ -1184,6 +1636,76 @@ public:
bool separateBeginEndCalls() { return SeparateBeginEndCalls; }
};
+ enum class DeviceInfoTy { None, Pointer, Address };
+ using MapValuesArrayTy = SmallVector<Value *, 4>;
+ using MapDeviceInfoArrayTy = SmallVector<DeviceInfoTy, 4>;
+ using MapFlagsArrayTy = SmallVector<omp::OpenMPOffloadMappingFlags, 4>;
+ using MapNamesArrayTy = SmallVector<Constant *, 4>;
+ using MapDimArrayTy = SmallVector<uint64_t, 4>;
+ using MapNonContiguousArrayTy = SmallVector<MapValuesArrayTy, 4>;
+
+ /// This structure contains combined information generated for mappable
+ /// clauses, including base pointers, pointers, sizes, map types, user-defined
+ /// mappers, and non-contiguous information.
+ struct MapInfosTy {
+ struct StructNonContiguousInfo {
+ bool IsNonContiguous = false;
+ MapDimArrayTy Dims;
+ MapNonContiguousArrayTy Offsets;
+ MapNonContiguousArrayTy Counts;
+ MapNonContiguousArrayTy Strides;
+ };
+ MapValuesArrayTy BasePointers;
+ MapValuesArrayTy Pointers;
+ MapDeviceInfoArrayTy DevicePointers;
+ MapValuesArrayTy Sizes;
+ MapFlagsArrayTy Types;
+ MapNamesArrayTy Names;
+ StructNonContiguousInfo NonContigInfo;
+
+ /// Append arrays in \a CurInfo.
+ void append(MapInfosTy &CurInfo) {
+ BasePointers.append(CurInfo.BasePointers.begin(),
+ CurInfo.BasePointers.end());
+ Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
+ DevicePointers.append(CurInfo.DevicePointers.begin(),
+ CurInfo.DevicePointers.end());
+ Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
+ Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
+ Names.append(CurInfo.Names.begin(), CurInfo.Names.end());
+ NonContigInfo.Dims.append(CurInfo.NonContigInfo.Dims.begin(),
+ CurInfo.NonContigInfo.Dims.end());
+ NonContigInfo.Offsets.append(CurInfo.NonContigInfo.Offsets.begin(),
+ CurInfo.NonContigInfo.Offsets.end());
+ NonContigInfo.Counts.append(CurInfo.NonContigInfo.Counts.begin(),
+ CurInfo.NonContigInfo.Counts.end());
+ NonContigInfo.Strides.append(CurInfo.NonContigInfo.Strides.begin(),
+ CurInfo.NonContigInfo.Strides.end());
+ }
+ };
+
+ /// Callback function type for functions emitting the host fallback code that
+ /// is executed when the kernel launch fails. It takes an insertion point as
+ /// parameter where the code should be emitted. It returns an insertion point
+ /// that points right after after the emitted code.
+ using EmitFallbackCallbackTy = function_ref<InsertPointTy(InsertPointTy)>;
+
+ /// Generate a target region entry call and host fallback call.
+ ///
+ /// \param Loc The location at which the request originated and is fulfilled.
+ /// \param OutlinedFn The outlined kernel function.
+ /// \param OutlinedFnID The ooulined function ID.
+ /// \param EmitTargetCallFallbackCB Call back function to generate host
+ /// fallback code.
+ /// \param Args Data structure holding information about the kernel arguments.
+ /// \param DeviceID Identifier for the device via the 'device' clause.
+ /// \param RTLoc Source location identifier
+ /// \param AllocaIP The insertion point to be used for alloca instructions.
+ InsertPointTy emitKernelLaunch(
+ const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID,
+ EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
+ Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP);
+
/// Emit the arguments to be passed to the runtime library based on the
/// arrays of base pointers, pointers, sizes, map types, and mappers. If
/// ForEndCall, emit map types to be passed for the end of the region instead
@@ -1194,6 +1716,21 @@ public:
bool EmitDebug = false,
bool ForEndCall = false);
+ /// Emit an array of struct descriptors to be assigned to the offload args.
+ void emitNonContiguousDescriptor(InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ MapInfosTy &CombinedInfo,
+ TargetDataInfo &Info);
+
+ /// Emit the arrays used to pass the captures and map information to the
+ /// offloading runtime library. If there is no map or capture information,
+ /// return nullptr by reference.
+ void emitOffloadingArrays(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
+ TargetDataInfo &Info, bool IsNonContiguous = false,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+ function_ref<Value *(unsigned int)> CustomMapperCB = nullptr);
+
/// Creates offloading entry for the provided entry ID \a ID, address \a
/// Addr, size \a Size, and flags \a Flags.
void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size,
@@ -1219,7 +1756,6 @@ public:
//
// We only generate metadata for function that contain target regions.
void createOffloadEntriesAndInfoMetadata(
- OffloadEntriesInfoManager &OffloadEntriesInfoManager,
EmitMetadataErrorReportFunctionTy &ErrorReportFunction);
public:
@@ -1519,8 +2055,7 @@ public:
/// \param NumThreads Number default threads
/// \param OutlinedFunction Pointer to the outlined function
/// \param EntryFnIDName Name of the ID o be created
- void emitTargetRegionFunction(OffloadEntriesInfoManager &InfoManager,
- TargetRegionEntryInfo &EntryInfo,
+ void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo,
FunctionGenCallback &GenerateFunctionCallback,
int32_t NumTeams, int32_t NumThreads,
bool IsOffloadEntry, Function *&OutlinedFn,
@@ -1536,12 +2071,74 @@ public:
/// \param EntryFnIDName Name of the ID o be created
/// \param NumTeams Number default teams
/// \param NumThreads Number default threads
- Constant *registerTargetRegionFunction(OffloadEntriesInfoManager &InfoManager,
- TargetRegionEntryInfo &EntryInfo,
+ Constant *registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo,
Function *OutlinedFunction,
StringRef EntryFnName,
StringRef EntryFnIDName,
int32_t NumTeams, int32_t NumThreads);
+ /// Type of BodyGen to use for region codegen
+ ///
+ /// Priv: If device pointer privatization is required, emit the body of the
+ /// region here. It will have to be duplicated: with and without
+ /// privatization.
+ /// DupNoPriv: If we need device pointer privatization, we need
+ /// to emit the body of the region with no privatization in the 'else' branch
+ /// of the conditional.
+ /// NoPriv: If we don't require privatization of device
+ /// pointers, we emit the body in between the runtime calls. This avoids
+ /// duplicating the body code.
+ enum BodyGenTy { Priv, DupNoPriv, NoPriv };
+
+ /// Generator for '#omp target data'
+ ///
+ /// \param Loc The location where the target data construct was encountered.
+ /// \param AllocaIP The insertion points to be used for alloca instructions.
+ /// \param CodeGenIP The insertion point at which the target directive code
+ /// should be placed.
+ /// \param IsBegin If true then emits begin mapper call otherwise emits
+ /// end mapper call.
+ /// \param DeviceID Stores the DeviceID from the device clause.
+ /// \param IfCond Value which corresponds to the if clause condition.
+ /// \param Info Stores all information realted to the Target Data directive.
+ /// \param GenMapInfoCB Callback that populates the MapInfos and returns.
+ /// \param BodyGenCB Optional Callback to generate the region code.
+ /// \param DeviceAddrCB Optional callback to generate code related to
+ /// use_device_ptr and use_device_addr.
+ /// \param CustomMapperCB Optional callback to generate code related to
+ /// custom mappers.
+ OpenMPIRBuilder::InsertPointTy createTargetData(
+ const LocationDescription &Loc, InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
+ TargetDataInfo &Info,
+ function_ref<MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCB,
+ omp::RuntimeFunction *MapperFunc = nullptr,
+ function_ref<InsertPointTy(InsertPointTy CodeGenIP,
+ BodyGenTy BodyGenType)>
+ BodyGenCB = nullptr,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB = nullptr,
+ function_ref<Value *(unsigned int)> CustomMapperCB = nullptr,
+ Value *SrcLocInfo = nullptr);
+
+ using TargetBodyGenCallbackTy = function_ref<InsertPointTy(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP)>;
+
+ /// Generator for '#omp target'
+ ///
+ /// \param Loc where the target data construct was encountered.
+ /// \param CodeGenIP The insertion point where the call to the outlined
+ /// function should be emitted.
+ /// \param EntryInfo The entry information about the function.
+ /// \param NumTeams Number of teams specified in the num_teams clause.
+ /// \param NumThreads Number of teams specified in the thread_limit clause.
+ /// \param Inputs The input values to the region that will be passed.
+ /// as arguments to the outlined function.
+ /// \param BodyGenCB Callback that will generate the region code.
+ InsertPointTy createTarget(const LocationDescription &Loc,
+ OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ TargetRegionEntryInfo &EntryInfo, int32_t NumTeams,
+ int32_t NumThreads,
+ SmallVectorImpl<Value *> &Inputs,
+ TargetBodyGenCallbackTy BodyGenCB);
/// Declarations for LLVM-IR types (simple, array, function and structure) are
/// generated below. Their names are defined and used in OpenMPKinds.def. Here
@@ -1883,10 +2480,7 @@ public:
///
/// \param M Module to load Metadata info from. Module passed maybe
/// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module.
- /// \param OffloadEntriesInfoManager Initialize Offload Entry information.
- void
- loadOffloadInfoMetadata(Module &M,
- OffloadEntriesInfoManager &OffloadEntriesInfoManager);
+ void loadOffloadInfoMetadata(Module &M);
/// Gets (if variable with the given name already exist) or creates
/// internal global variable with the specified Name. The created variable has
@@ -1898,253 +2492,6 @@ public:
unsigned AddressSpace = 0);
};
-/// Data structure to contain the information needed to uniquely identify
-/// a target entry.
-struct TargetRegionEntryInfo {
- std::string ParentName;
- unsigned DeviceID;
- unsigned FileID;
- unsigned Line;
- unsigned Count;
-
- TargetRegionEntryInfo()
- : ParentName(""), DeviceID(0), FileID(0), Line(0), Count(0) {}
- TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID,
- unsigned FileID, unsigned Line, unsigned Count = 0)
- : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line),
- Count(Count) {}
-
- static void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
- StringRef ParentName,
- unsigned DeviceID, unsigned FileID,
- unsigned Line, unsigned Count);
-
- bool operator<(const TargetRegionEntryInfo RHS) const {
- return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) <
- std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line,
- RHS.Count);
- }
-};
-
-/// Class that manages information about offload code regions and data
-class OffloadEntriesInfoManager {
- /// Number of entries registered so far.
- OpenMPIRBuilderConfig Config;
- unsigned OffloadingEntriesNum = 0;
-
-public:
- void setConfig(OpenMPIRBuilderConfig C) { Config = C; }
-
- /// Base class of the entries info.
- class OffloadEntryInfo {
- public:
- /// Kind of a given entry.
- enum OffloadingEntryInfoKinds : unsigned {
- /// Entry is a target region.
- OffloadingEntryInfoTargetRegion = 0,
- /// Entry is a declare target variable.
- OffloadingEntryInfoDeviceGlobalVar = 1,
- /// Invalid entry info.
- OffloadingEntryInfoInvalid = ~0u
- };
-
- protected:
- OffloadEntryInfo() = delete;
- explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {}
- explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order,
- uint32_t Flags)
- : Flags(Flags), Order(Order), Kind(Kind) {}
- ~OffloadEntryInfo() = default;
-
- public:
- bool isValid() const { return Order != ~0u; }
- unsigned getOrder() const { return Order; }
- OffloadingEntryInfoKinds getKind() const { return Kind; }
- uint32_t getFlags() const { return Flags; }
- void setFlags(uint32_t NewFlags) { Flags = NewFlags; }
- Constant *getAddress() const { return cast_or_null<Constant>(Addr); }
- void setAddress(Constant *V) {
- assert(!Addr.pointsToAliveValue() && "Address has been set before!");
- Addr = V;
- }
- static bool classof(const OffloadEntryInfo *Info) { return true; }
-
- private:
- /// Address of the entity that has to be mapped for offloading.
- WeakTrackingVH Addr;
-
- /// Flags associated with the device global.
- uint32_t Flags = 0u;
-
- /// Order this entry was emitted.
- unsigned Order = ~0u;
-
- OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid;
- };
-
- /// Return true if a there are no entries defined.
- bool empty() const;
- /// Return number of entries defined so far.
- unsigned size() const { return OffloadingEntriesNum; }
-
- OffloadEntriesInfoManager() : Config() {}
-
- //
- // Target region entries related.
- //
-
- /// Kind of the target registry entry.
- enum OMPTargetRegionEntryKind : uint32_t {
- /// Mark the entry as target region.
- OMPTargetRegionEntryTargetRegion = 0x0,
- /// Mark the entry as a global constructor.
- OMPTargetRegionEntryCtor = 0x02,
- /// Mark the entry as a global destructor.
- OMPTargetRegionEntryDtor = 0x04,
- };
-
- /// Target region entries info.
- class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo {
- /// Address that can be used as the ID of the entry.
- Constant *ID = nullptr;
-
- public:
- OffloadEntryInfoTargetRegion()
- : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {}
- explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr,
- Constant *ID,
- OMPTargetRegionEntryKind Flags)
- : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags),
- ID(ID) {
- setAddress(Addr);
- }
-
- Constant *getID() const { return ID; }
- void setID(Constant *V) {
- assert(!ID && "ID has been set before!");
- ID = V;
- }
- static bool classof(const OffloadEntryInfo *Info) {
- return Info->getKind() == OffloadingEntryInfoTargetRegion;
- }
- };
-
- /// Initialize target region entry.
- /// This is ONLY needed for DEVICE compilation.
- void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo,
- unsigned Order);
- /// Register target region entry.
- void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
- Constant *Addr, Constant *ID,
- OMPTargetRegionEntryKind Flags);
- /// Return true if a target region entry with the provided information
- /// exists.
- bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo,
- bool IgnoreAddressId = false) const;
-
- // Return the Name based on \a EntryInfo using the next available Count.
- void getTargetRegionEntryFnName(SmallVectorImpl<char> &Name,
- const TargetRegionEntryInfo &EntryInfo);
-
- /// brief Applies action \a Action on all registered entries.
- typedef function_ref<void(const TargetRegionEntryInfo &EntryInfo,
- const OffloadEntryInfoTargetRegion &)>
- OffloadTargetRegionEntryInfoActTy;
- void
- actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action);
-
- //
- // Device global variable entries related.
- //
-
- /// Kind of the global variable entry..
- enum OMPTargetGlobalVarEntryKind : uint32_t {
- /// Mark the entry as a to declare target.
- OMPTargetGlobalVarEntryTo = 0x0,
- /// Mark the entry as a to declare target link.
- OMPTargetGlobalVarEntryLink = 0x1,
- };
-
- /// Device global variable entries info.
- class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo {
- /// Type of the global variable.
- int64_t VarSize;
- GlobalValue::LinkageTypes Linkage;
-
- public:
- OffloadEntryInfoDeviceGlobalVar()
- : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {}
- explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order,
- OMPTargetGlobalVarEntryKind Flags)
- : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {}
- explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr,
- int64_t VarSize,
- OMPTargetGlobalVarEntryKind Flags,
- GlobalValue::LinkageTypes Linkage)
- : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags),
- VarSize(VarSize), Linkage(Linkage) {
- setAddress(Addr);
- }
-
- int64_t getVarSize() const { return VarSize; }
- void setVarSize(int64_t Size) { VarSize = Size; }
- GlobalValue::LinkageTypes getLinkage() const { return Linkage; }
- void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; }
- static bool classof(const OffloadEntryInfo *Info) {
- return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar;
- }
- };
-
- /// Initialize device global variable entry.
- /// This is ONLY used for DEVICE compilation.
- void initializeDeviceGlobalVarEntryInfo(StringRef Name,
- OMPTargetGlobalVarEntryKind Flags,
- unsigned Order);
-
- /// Register device global variable entry.
- void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr,
- int64_t VarSize,
- OMPTargetGlobalVarEntryKind Flags,
- GlobalValue::LinkageTypes Linkage);
- /// Checks if the variable with the given name has been registered already.
- bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const {
- return OffloadEntriesDeviceGlobalVar.count(VarName) > 0;
- }
- /// Applies action \a Action on all registered entries.
- typedef function_ref<void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)>
- OffloadDeviceGlobalVarEntryInfoActTy;
- void actOnDeviceGlobalVarEntriesInfo(
- const OffloadDeviceGlobalVarEntryInfoActTy &Action);
-
-private:
- /// Return the count of entries at a particular source location.
- unsigned
- getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const;
-
- /// Update the count of entries at a particular source location.
- void
- incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo);
-
- static TargetRegionEntryInfo
- getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) {
- return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID,
- EntryInfo.FileID, EntryInfo.Line, 0);
- }
-
- // Count of entries at a location.
- std::map<TargetRegionEntryInfo, unsigned> OffloadEntriesTargetRegionCount;
-
- // Storage for target region entries kind.
- typedef std::map<TargetRegionEntryInfo, OffloadEntryInfoTargetRegion>
- OffloadEntriesTargetRegionTy;
- OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion;
- /// Storage for device global variable entries kind. The storage is to be
- /// indexed by mangled name.
- typedef StringMap<OffloadEntryInfoDeviceGlobalVar>
- OffloadEntriesDeviceGlobalVarTy;
- OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar;
-};
-
/// Class to represented the control flow structure of an OpenMP canonical loop.
///
/// The control-flow structure is standardized for easy consumption by
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 8a09fb7cb7a6..b2dfda490585 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -402,7 +402,7 @@ __OMP_RTL(__kmpc_aligned_alloc, false, VoidPtr, /* Int */ Int32, SizeTy, SizeTy,
__OMP_RTL(__kmpc_free, false, Void, /* Int */ Int32, VoidPtr, VoidPtr)
__OMP_RTL(__tgt_interop_init, false, Void, IdentPtr, Int32, VoidPtrPtr, Int32,
- Int32, Int64, VoidPtr, Int32)
+ Int32, Int32, VoidPtr, Int32)
__OMP_RTL(__tgt_interop_destroy, false, Void, IdentPtr, Int32, VoidPtrPtr,
Int32, Int32, VoidPtr, Int32)
__OMP_RTL(__tgt_interop_use, false, Void, IdentPtr, Int32, VoidPtrPtr, Int32,
@@ -512,11 +512,19 @@ __OMP_ATTRS_SET(
MemoryAttr(MemoryEffects::inaccessibleMemOnly(ModRefInfo::Ref)))
: AttributeSet(EnumAttr(NoUnwind)))
__OMP_ATTRS_SET(
+ GetterArgReadAttrs,
+ OptimisticAttributes
+ ? AttributeSet(
+ EnumAttr(NoUnwind), EnumAttr(NoSync), EnumAttr(NoFree),
+ EnumAttr(WillReturn),
+ MemoryAttr(MemoryEffects::inaccessibleOrArgMemOnly(ModRefInfo::Ref)))
+ : AttributeSet(EnumAttr(NoUnwind)))
+__OMP_ATTRS_SET(
GetterArgWriteAttrs,
OptimisticAttributes
? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync), EnumAttr(NoFree),
EnumAttr(WillReturn),
- MemoryAttr(MemoryEffects::inaccessibleOrArgMemOnly()))
+ MemoryAttr(MemoryEffects::argMemOnly() | MemoryEffects::inaccessibleMemOnly(ModRefInfo::Ref)))
: AttributeSet(EnumAttr(NoUnwind)))
__OMP_ATTRS_SET(
SetterAttrs,
@@ -642,7 +650,7 @@ __OMP_RTL_ATTRS(__kmpc_error, AttributeSet(), AttributeSet(),
ParamAttrs(AttributeSet(), SExt))
__OMP_RTL_ATTRS(__kmpc_flush, BarrierAttrs, AttributeSet(),
ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, SExt,
+__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterArgReadAttrs, SExt,
ParamAttrs(ReadOnlyPtrAttrs))
__OMP_RTL_ATTRS(__kmpc_get_hardware_thread_id_in_block, GetterAttrs, ZExt,
ParamAttrs())
@@ -693,9 +701,9 @@ __OMP_RTL_ATTRS(omp_get_place_proc_ids, GetterArgWriteAttrs, AttributeSet(),
EnumAttr(WriteOnly))))
__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, SExt, ParamAttrs())
__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, SExt, ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(),
+__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterArgWriteAttrs, AttributeSet(),
ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_wtime, GetterArgWriteAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_wtime, GetterAttrs, AttributeSet(), ParamAttrs())
__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(),
ParamAttrs(SExt))
diff --git a/llvm/include/llvm/FuzzMutate/IRMutator.h b/llvm/include/llvm/FuzzMutate/IRMutator.h
index 38f8f7ba1623..dd4534bd9d1a 100644
--- a/llvm/include/llvm/FuzzMutate/IRMutator.h
+++ b/llvm/include/llvm/FuzzMutate/IRMutator.h
@@ -70,7 +70,19 @@ public:
: AllowedTypes(std::move(AllowedTypes)),
Strategies(std::move(Strategies)) {}
- void mutateModule(Module &M, int Seed, size_t CurSize, size_t MaxSize);
+ /// Calculate the size of module as the number of objects in it, i.e.
+ /// instructions, basic blocks, functions, and aliases.
+ ///
+ /// \param M module
+ /// \return number of objects in module
+ static size_t getModuleSize(const Module &M);
+
+ /// Mutate given module. No change will be made if no strategy is selected.
+ ///
+ /// \param M module to mutate
+ /// \param Seed seed for random mutation
+ /// \param MaxSize max module size (see getModuleSize)
+ void mutateModule(Module &M, int Seed, size_t MaxSize);
};
/// Strategy that injects operations into the function.
@@ -81,6 +93,7 @@ class InjectorIRStrategy : public IRMutationStrategy {
RandomIRBuilder &IB);
public:
+ InjectorIRStrategy() : Operations(getDefaultOps()) {}
InjectorIRStrategy(std::vector<fuzzerop::OpDescriptor> &&Operations)
: Operations(std::move(Operations)) {}
static std::vector<fuzzerop::OpDescriptor> getDefaultOps();
@@ -118,6 +131,20 @@ public:
void mutate(Instruction &Inst, RandomIRBuilder &IB) override;
};
+/// Strategy that generates new function calls and inserts function signatures
+/// to the modules. If any signatures are present in the module it will be
+/// called.
+class InsertFunctionStrategy : public IRMutationStrategy {
+public:
+ uint64_t getWeight(size_t CurrentSize, size_t MaxSize,
+ uint64_t CurrentWeight) override {
+ return 10;
+ }
+
+ using IRMutationStrategy::mutate;
+ void mutate(BasicBlock &BB, RandomIRBuilder &IB) override;
+};
+
/// Strategy to split a random block and insert a random CFG in between.
class InsertCFGStrategy : public IRMutationStrategy {
private:
diff --git a/llvm/include/llvm/FuzzMutate/OpDescriptor.h b/llvm/include/llvm/FuzzMutate/OpDescriptor.h
index 800586da8f2d..00a8ea0e5bab 100644
--- a/llvm/include/llvm/FuzzMutate/OpDescriptor.h
+++ b/llvm/include/llvm/FuzzMutate/OpDescriptor.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include <functional>
@@ -117,6 +118,20 @@ static inline SourcePred anyIntType() {
return {Pred, Make};
}
+static inline SourcePred anyIntOrVecIntType() {
+ auto Pred = [](ArrayRef<Value *>, const Value *V) {
+ return V->getType()->isIntOrIntVectorTy();
+ };
+ return {Pred, std::nullopt};
+}
+
+static inline SourcePred boolOrVecBoolType() {
+ auto Pred = [](ArrayRef<Value *>, const Value *V) {
+ return V->getType()->isIntOrIntVectorTy(1);
+ };
+ return {Pred, std::nullopt};
+}
+
static inline SourcePred anyFloatType() {
auto Pred = [](ArrayRef<Value *>, const Value *V) {
return V->getType()->isFloatingPointTy();
@@ -125,6 +140,13 @@ static inline SourcePred anyFloatType() {
return {Pred, Make};
}
+static inline SourcePred anyFloatOrVecFloatType() {
+ auto Pred = [](ArrayRef<Value *>, const Value *V) {
+ return V->getType()->isFPOrFPVectorTy();
+ };
+ return {Pred, std::nullopt};
+}
+
static inline SourcePred anyPtrType() {
auto Pred = [](ArrayRef<Value *>, const Value *V) {
return V->getType()->isPointerTy() && !V->isSwiftError();
@@ -144,14 +166,13 @@ static inline SourcePred sizedPtrType() {
if (V->isSwiftError())
return false;
- if (const auto *PtrT = dyn_cast<PointerType>(V->getType()))
- return PtrT->isOpaque() ||
- PtrT->getNonOpaquePointerElementType()->isSized();
- return false;
+ return V->getType()->isPointerTy();
};
auto Make = [](ArrayRef<Value *>, ArrayRef<Type *> Ts) {
std::vector<Constant *> Result;
+ // TODO: This doesn't really make sense with opaque pointers,
+ // as the pointer type will always be the same.
for (Type *T : Ts)
if (T->isSized())
Result.push_back(UndefValue::get(PointerType::getUnqual(T)));
@@ -161,6 +182,54 @@ static inline SourcePred sizedPtrType() {
return {Pred, Make};
}
+static inline SourcePred matchFirstLengthWAnyType() {
+ auto Pred = [](ArrayRef<Value *> Cur, const Value *V) {
+ assert(!Cur.empty() && "No first source yet");
+ Type *This = V->getType(), *First = Cur[0]->getType();
+ VectorType *ThisVec = dyn_cast<VectorType>(This);
+ VectorType *FirstVec = dyn_cast<VectorType>(First);
+ if (ThisVec && FirstVec) {
+ return ThisVec->getElementCount() == FirstVec->getElementCount();
+ }
+ return (ThisVec == nullptr) && (FirstVec == nullptr) && (!This->isVoidTy());
+ };
+ auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *> BaseTypes) {
+ assert(!Cur.empty() && "No first source yet");
+ std::vector<Constant *> Result;
+ ElementCount EC;
+ bool isVec = false;
+ if (VectorType *VecTy = dyn_cast<VectorType>(Cur[0]->getType())) {
+ EC = VecTy->getElementCount();
+ isVec = true;
+ }
+ for (Type *T : BaseTypes) {
+ if (VectorType::isValidElementType(T)) {
+ if (isVec)
+ // If the first pred is <i1 x N>, make the result <T x N>
+ makeConstantsWithType(VectorType::get(T, EC), Result);
+ else
+ makeConstantsWithType(T, Result);
+ }
+ }
+ assert(!Result.empty() && "No potential constants.");
+ return Result;
+ };
+ return {Pred, Make};
+}
+
+/// Match values that have the same type as the first source.
+static inline SourcePred matchSecondType() {
+ auto Pred = [](ArrayRef<Value *> Cur, const Value *V) {
+ assert((Cur.size() > 1) && "No second source yet");
+ return V->getType() == Cur[1]->getType();
+ };
+ auto Make = [](ArrayRef<Value *> Cur, ArrayRef<Type *>) {
+ assert((Cur.size() > 1) && "No second source yet");
+ return makeConstantsWithType(Cur[1]->getType());
+ };
+ return {Pred, Make};
+}
+
static inline SourcePred anyAggregateType() {
auto Pred = [](ArrayRef<Value *>, const Value *V) {
// We can't index zero sized arrays.
diff --git a/llvm/include/llvm/FuzzMutate/Operations.h b/llvm/include/llvm/FuzzMutate/Operations.h
index d2a6180633ea..84155730a93c 100644
--- a/llvm/include/llvm/FuzzMutate/Operations.h
+++ b/llvm/include/llvm/FuzzMutate/Operations.h
@@ -28,12 +28,16 @@ void describeFuzzerControlFlowOps(std::vector<fuzzerop::OpDescriptor> &Ops);
void describeFuzzerPointerOps(std::vector<fuzzerop::OpDescriptor> &Ops);
void describeFuzzerAggregateOps(std::vector<fuzzerop::OpDescriptor> &Ops);
void describeFuzzerVectorOps(std::vector<fuzzerop::OpDescriptor> &Ops);
+void describeFuzzerUnaryOperations(std::vector<fuzzerop::OpDescriptor> &Ops);
+void describeFuzzerOtherOps(std::vector<fuzzerop::OpDescriptor> &Ops);
/// @}
namespace fuzzerop {
/// Descriptors for individual operations.
/// @{
+OpDescriptor selectDescriptor(unsigned Weight);
+OpDescriptor fnegDescriptor(unsigned Weight);
OpDescriptor binOpDescriptor(unsigned Weight, Instruction::BinaryOps Op);
OpDescriptor cmpOpDescriptor(unsigned Weight, Instruction::OtherOps CmpOp,
CmpInst::Predicate Pred);
@@ -44,6 +48,7 @@ OpDescriptor insertValueDescriptor(unsigned Weight);
OpDescriptor extractElementDescriptor(unsigned Weight);
OpDescriptor insertElementDescriptor(unsigned Weight);
OpDescriptor shuffleVectorDescriptor(unsigned Weight);
+
/// @}
} // namespace fuzzerop
diff --git a/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h b/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h
index a7048f6def89..1a422fcc0be6 100644
--- a/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h
+++ b/llvm/include/llvm/FuzzMutate/RandomIRBuilder.h
@@ -18,11 +18,16 @@
#include <random>
namespace llvm {
+class AllocaInst;
class BasicBlock;
+class Function;
+class GlobalVariable;
class Instruction;
class LLVMContext;
+class Module;
class Type;
class Value;
+
namespace fuzzerop {
class SourcePred;
}
@@ -33,11 +38,32 @@ struct RandomIRBuilder {
RandomEngine Rand;
SmallVector<Type *, 16> KnownTypes;
+ uint64_t MinArgNum = 0;
+ uint64_t MaxArgNum = 5;
+ uint64_t MinFunctionNum = 1;
+
RandomIRBuilder(int Seed, ArrayRef<Type *> AllowedTypes)
: Rand(Seed), KnownTypes(AllowedTypes.begin(), AllowedTypes.end()) {}
// TODO: Try to make this a bit less of a random mishmash of functions.
+ /// Create a stack memory at the head of the function, store \c Init to the
+ /// memory if provided.
+ AllocaInst *createStackMemory(Function *F, Type *Ty, Value *Init = nullptr);
+ /// Find or create a global variable. It will be initialized by random
+ /// constants that satisfies \c Pred. It will also report whether this global
+ /// variable found or created.
+ std::pair<GlobalVariable *, bool>
+ findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs,
+ fuzzerop::SourcePred Pred);
+ enum SourceType {
+ SrcFromInstInCurBlock,
+ FunctionArgument,
+ InstInDominator,
+ SrcFromGlobalVariable,
+ NewConstOrStack,
+ EndOfValueSource,
+ };
/// Find a "source" for some operation, which will be used in one of the
/// operation's operands. This either selects an instruction in \c Insts or
/// returns some new arbitrary Value.
@@ -54,17 +80,29 @@ struct RandomIRBuilder {
Value *newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
ArrayRef<Value *> Srcs, fuzzerop::SourcePred Pred,
bool allowConstant = true);
+
+ enum SinkType {
+ /// TODO: Also consider pointers in function argument.
+ SinkToInstInCurBlock,
+ PointersInDominator,
+ InstInDominatee,
+ NewStore,
+ SinkToGlobalVariable,
+ EndOfValueSink,
+ };
/// Find a viable user for \c V in \c Insts, which should all be contained in
/// \c BB. This may also create some new instruction in \c BB and use that.
- void connectToSink(BasicBlock &BB, ArrayRef<Instruction *> Insts, Value *V);
+ Instruction *connectToSink(BasicBlock &BB, ArrayRef<Instruction *> Insts,
+ Value *V);
/// Create a user for \c V in \c BB.
- void newSink(BasicBlock &BB, ArrayRef<Instruction *> Insts, Value *V);
- Value *findPointer(BasicBlock &BB, ArrayRef<Instruction *> Insts,
- ArrayRef<Value *> Srcs, fuzzerop::SourcePred Pred);
- Type *chooseType(LLVMContext &Context, ArrayRef<Value *> Srcs,
- fuzzerop::SourcePred Pred);
+ Instruction *newSink(BasicBlock &BB, ArrayRef<Instruction *> Insts, Value *V);
+ Value *findPointer(BasicBlock &BB, ArrayRef<Instruction *> Insts);
/// Return a uniformly choosen type from \c AllowedTypes
Type *randomType();
+ Function *createFunctionDeclaration(Module &M, uint64_t ArgNum);
+ Function *createFunctionDeclaration(Module &M);
+ Function *createFunctionDefinition(Module &M, uint64_t ArgNum);
+ Function *createFunctionDefinition(Module &M);
};
} // namespace llvm
diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h
index dba1dbc26ae3..f0c0ce75d2b7 100644
--- a/llvm/include/llvm/IR/Argument.h
+++ b/llvm/include/llvm/IR/Argument.h
@@ -63,6 +63,10 @@ public:
/// number of bytes known to be dereferenceable. Otherwise, zero is returned.
uint64_t getDereferenceableOrNullBytes() const;
+ /// If this argument has nofpclass attribute, return the mask representing
+ /// disallowed floating-point values. Otherwise, fcNone is returned.
+ FPClassTest getNoFPClass() const;
+
/// Return true if this argument has the byval attribute.
bool hasByValAttr() const;
diff --git a/llvm/include/llvm/IR/AttributeMask.h b/llvm/include/llvm/IR/AttributeMask.h
new file mode 100644
index 000000000000..857761149ff4
--- /dev/null
+++ b/llvm/include/llvm/IR/AttributeMask.h
@@ -0,0 +1,86 @@
+//===- llvm/AttributeMask.h - Mask for Attributes ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+// This file declares the AttributeMask class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_ATTRIBUTEMASK_H
+#define LLVM_IR_ATTRIBUTEMASK_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/IR/Attributes.h"
+#include <bitset>
+#include <cassert>
+#include <set>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// This class stores enough information to efficiently remove some attributes
+/// from an existing AttrBuilder, AttributeSet or AttributeList.
+class AttributeMask {
+ std::bitset<Attribute::EndAttrKinds> Attrs;
+ std::set<SmallString<32>, std::less<>> TargetDepAttrs;
+
+public:
+ AttributeMask() = default;
+ AttributeMask(const AttributeMask &) = delete;
+ AttributeMask(AttributeMask &&) = default;
+
+ AttributeMask(AttributeSet AS) {
+ for (Attribute A : AS)
+ addAttribute(A);
+ }
+
+ /// Add an attribute to the mask.
+ AttributeMask &addAttribute(Attribute::AttrKind Val) {
+ assert((unsigned)Val < Attribute::EndAttrKinds &&
+ "Attribute out of range!");
+ Attrs[Val] = true;
+ return *this;
+ }
+
+ /// Add the Attribute object to the builder.
+ AttributeMask &addAttribute(Attribute A) {
+ if (A.isStringAttribute())
+ addAttribute(A.getKindAsString());
+ else
+ addAttribute(A.getKindAsEnum());
+ return *this;
+ }
+
+ /// Add the target-dependent attribute to the builder.
+ AttributeMask &addAttribute(StringRef A) {
+ TargetDepAttrs.insert(A);
+ return *this;
+ }
+
+ /// Return true if the builder has the specified attribute.
+ bool contains(Attribute::AttrKind A) const {
+ assert((unsigned)A < Attribute::EndAttrKinds && "Attribute out of range!");
+ return Attrs[A];
+ }
+
+ /// Return true if the builder has the specified target-dependent
+ /// attribute.
+ bool contains(StringRef A) const { return TargetDepAttrs.count(A); }
+
+ /// Return true if the mask contains the specified attribute.
+ bool contains(Attribute A) const {
+ if (A.isStringAttribute())
+ return contains(A.getKindAsString());
+ return contains(A.getKindAsEnum());
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_IR_ATTRIBUTEMASK_H
diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index c4e12a673ed2..db33b5400471 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -18,17 +18,15 @@
#include "llvm-c/Types.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitmaskEnum.h"
-#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ModRef.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
-#include <bitset>
#include <cassert>
#include <cstdint>
#include <optional>
-#include <set>
#include <string>
#include <utility>
@@ -42,9 +40,9 @@ class AttributeSetNode;
class FoldingSetNodeID;
class Function;
class LLVMContext;
-class MemoryEffects;
class Type;
class raw_ostream;
+enum FPClassTest : unsigned;
enum class AllocFnKind : uint64_t {
Unknown = 0,
@@ -148,6 +146,7 @@ public:
static Attribute getWithInAllocaType(LLVMContext &Context, Type *Ty);
static Attribute getWithUWTableKind(LLVMContext &Context, UWTableKind Kind);
static Attribute getWithMemoryEffects(LLVMContext &Context, MemoryEffects ME);
+ static Attribute getWithNoFPClass(LLVMContext &Context, FPClassTest Mask);
/// For a typed attribute, return the equivalent attribute with the type
/// changed to \p ReplacementTy.
@@ -249,6 +248,9 @@ public:
/// Returns memory effects.
MemoryEffects getMemoryEffects() const;
+ /// Return the FPClassTest for nofpclass
+ FPClassTest getNoFPClass() const;
+
/// The Attribute is converted to a string of equivalent mnemonic. This
/// is, presumably, for writing out the mnemonics for the assembly writer.
std::string getAsString(bool InAttrGrp = false) const;
@@ -383,6 +385,7 @@ public:
UWTableKind getUWTableKind() const;
AllocFnKind getAllocKind() const;
MemoryEffects getMemoryEffects() const;
+ FPClassTest getNoFPClass() const;
std::string getAsString(bool InAttrGrp = false) const;
/// Return true if this attribute set belongs to the LLVMContext.
@@ -877,6 +880,12 @@ public:
/// arg.
uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const;
+ /// Get the disallowed floating-point classes of the return value.
+ FPClassTest getRetNoFPClass() const;
+
+ /// Get the disallowed floating-point classes of the argument value.
+ FPClassTest getParamNoFPClass(unsigned ArgNo) const;
+
/// Get the unwind table kind requested for the function.
UWTableKind getUWTableKind() const;
@@ -974,65 +983,6 @@ template <> struct DenseMapInfo<AttributeList, void> {
//===----------------------------------------------------------------------===//
/// \class
-/// This class stores enough information to efficiently remove some attributes
-/// from an existing AttrBuilder, AttributeSet or AttributeList.
-class AttributeMask {
- std::bitset<Attribute::EndAttrKinds> Attrs;
- std::set<SmallString<32>, std::less<>> TargetDepAttrs;
-
-public:
- AttributeMask() = default;
- AttributeMask(const AttributeMask &) = delete;
- AttributeMask(AttributeMask &&) = default;
-
- AttributeMask(AttributeSet AS) {
- for (Attribute A : AS)
- addAttribute(A);
- }
-
- /// Add an attribute to the mask.
- AttributeMask &addAttribute(Attribute::AttrKind Val) {
- assert((unsigned)Val < Attribute::EndAttrKinds &&
- "Attribute out of range!");
- Attrs[Val] = true;
- return *this;
- }
-
- /// Add the Attribute object to the builder.
- AttributeMask &addAttribute(Attribute A) {
- if (A.isStringAttribute())
- addAttribute(A.getKindAsString());
- else
- addAttribute(A.getKindAsEnum());
- return *this;
- }
-
- /// Add the target-dependent attribute to the builder.
- AttributeMask &addAttribute(StringRef A) {
- TargetDepAttrs.insert(A);
- return *this;
- }
-
- /// Return true if the builder has the specified attribute.
- bool contains(Attribute::AttrKind A) const {
- assert((unsigned)A < Attribute::EndAttrKinds && "Attribute out of range!");
- return Attrs[A];
- }
-
- /// Return true if the builder has the specified target-dependent
- /// attribute.
- bool contains(StringRef A) const { return TargetDepAttrs.count(A); }
-
- /// Return true if the mask contains the specified attribute.
- bool contains(Attribute A) const {
- if (A.isStringAttribute())
- return contains(A.getKindAsString());
- return contains(A.getKindAsEnum());
- }
-};
-
-//===----------------------------------------------------------------------===//
-/// \class
/// This class is used in conjunction with the Attribute::get method to
/// create an Attribute object. The object itself is uniquified. The Builder's
/// value, however, is not. So this can be used as a quick way to test for
@@ -1236,6 +1186,9 @@ public:
/// Add memory effect attribute.
AttrBuilder &addMemoryAttr(MemoryEffects ME);
+ // Add nofpclass attribute
+ AttrBuilder &addNoFPClassAttr(FPClassTest NoFPClassMask);
+
ArrayRef<Attribute> attrs() const { return Attrs; }
bool operator==(const AttrBuilder &B) const;
@@ -1250,6 +1203,10 @@ enum AttributeSafetyKind : uint8_t {
ASK_ALL = ASK_SAFE_TO_DROP | ASK_UNSAFE_TO_DROP,
};
+/// Returns true if this is a type legal for the 'nofpclass' attribute. This
+/// follows the same type rules as FPMathOperator.
+bool isNoFPClassCompatibleType(Type *Ty);
+
/// Which attributes cannot be applied to a type. The argument \p ASK indicates,
/// if only attributes that are known to be safely droppable are contained in
/// the mask; only attributes that might be unsafe to drop (e.g., ABI-related
diff --git a/llvm/include/llvm/IR/Attributes.td b/llvm/include/llvm/IR/Attributes.td
index 75fe534ac61e..aba1d718f7f7 100644
--- a/llvm/include/llvm/IR/Attributes.td
+++ b/llvm/include/llvm/IR/Attributes.td
@@ -41,6 +41,9 @@ class TypeAttr<string S, list<AttrProperty> P> : Attr<S, P>;
/// StringBool attribute.
class StrBoolAttr<string S> : Attr<S, []>;
+/// Arbitrary string attribute.
+class ComplexStrAttr<string S, list<AttrProperty> P> : Attr<S, P>;
+
/// Target-independent enum attributes.
/// Alignment of parameter (5 bits) stored as log2 of alignment with +1 bias.
@@ -118,6 +121,9 @@ def JumpTable : EnumAttr<"jumptable", [FnAttr]>;
/// Memory effects of the function.
def Memory : IntAttr<"memory", [FnAttr]>;
+/// Forbidden floating-point classes.
+def NoFPClass : IntAttr<"nofpclass", [ParamAttr, RetAttr]>;
+
/// Function must be optimized for size first.
def MinSize : EnumAttr<"minsize", [FnAttr]>;
@@ -318,6 +324,9 @@ def NoInlineLineTables : StrBoolAttr<"no-inline-line-tables">;
def ProfileSampleAccurate : StrBoolAttr<"profile-sample-accurate">;
def UseSampleProfile : StrBoolAttr<"use-sample-profile">;
+def DenormalFPMath : ComplexStrAttr<"denormal-fp-math", [FnAttr]>;
+def DenormalFPMathF32 : ComplexStrAttr<"denormal-fp-math-f32", [FnAttr]>;
+
class CompatRule<string F> {
// The name of the function called to check the attribute of the caller and
// callee and decide whether inlining should be allowed. The function's
@@ -337,6 +346,8 @@ def : CompatRule<"isEqual<SafeStackAttr>">;
def : CompatRule<"isEqual<ShadowCallStackAttr>">;
def : CompatRule<"isEqual<UseSampleProfileAttr>">;
def : CompatRule<"isEqual<NoProfileAttr>">;
+def : CompatRule<"checkDenormMode">;
+
class MergeRule<string F> {
// The name of the function called to merge the attributes of the caller and
diff --git a/llvm/include/llvm/IR/BasicBlock.h b/llvm/include/llvm/IR/BasicBlock.h
index 71d9ada69148..19bf9549a8ca 100644
--- a/llvm/include/llvm/IR/BasicBlock.h
+++ b/llvm/include/llvm/IR/BasicBlock.h
@@ -213,6 +213,15 @@ public:
.getNonConst();
}
+ /// Returns the first potential AsynchEH faulty instruction
+ /// currently it checks for loads/stores (which may dereference a null
+ /// pointer) and calls/invokes (which may propagate exceptions)
+ const Instruction* getFirstMayFaultInst() const;
+ Instruction* getFirstMayFaultInst() {
+ return const_cast<Instruction*>(
+ static_cast<const BasicBlock*>(this)->getFirstMayFaultInst());
+ }
+
/// Return a const iterator range over the instructions in the block, skipping
/// any debug instructions. Skip any pseudo operations as well if \c
/// SkipPseudoOp is true.
@@ -242,7 +251,10 @@ public:
/// Unlink this basic block from its current function and insert it into
/// the function that \p MovePos lives in, right before \p MovePos.
- void moveBefore(BasicBlock *MovePos);
+ inline void moveBefore(BasicBlock *MovePos) {
+ moveBefore(MovePos->getIterator());
+ }
+ void moveBefore(SymbolTableList<BasicBlock>::iterator MovePos);
/// Unlink this basic block from its current function and insert it
/// right after \p MovePos in the function \p MovePos lives in.
diff --git a/llvm/include/llvm/IR/CallingConv.h b/llvm/include/llvm/IR/CallingConv.h
index 9fefeef05cb2..e97623b29f52 100644
--- a/llvm/include/llvm/IR/CallingConv.h
+++ b/llvm/include/llvm/IR/CallingConv.h
@@ -159,13 +159,9 @@ namespace CallingConv {
/// registers.
X86_VectorCall = 80,
- /// Used by HipHop Virtual Machine (HHVM) to perform calls to and from
- /// translation cache, and for calling PHP functions. HHVM calling
- /// convention supports tail/sibling call elimination.
- HHVM = 81,
-
- /// HHVM calling convention for invoking C/C++ helpers.
- HHVM_C = 82,
+ /// Placeholders for HHVM calling conventions (deprecated, removed).
+ DUMMY_HHVM = 81,
+ DUMMY_HHVM_C = 82,
/// x86 hardware interrupt context. Callee may take one or two parameters,
/// where the 1st represents a pointer to hardware context frame and the 2nd
@@ -241,6 +237,14 @@ namespace CallingConv {
/// Preserve X2-X15, X19-X29, SP, Z0-Z31, P0-P15.
AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2 = 103,
+ /// Used on AMDGPUs to give the middle-end more control over argument
+ /// placement.
+ AMDGPU_CS_Chain = 104,
+
+ /// Used on AMDGPUs to give the middle-end more control over argument
+ /// placement. Preserves active lane values for input VGPRs.
+ AMDGPU_CS_ChainPreserve = 105,
+
/// The highest possible ID. Must be some 2^k - 1.
MaxID = 1023
};
diff --git a/llvm/include/llvm/IR/ConstantFolder.h b/llvm/include/llvm/IR/ConstantFolder.h
index 82c07d47a193..56da3d205fe4 100644
--- a/llvm/include/llvm/IR/ConstantFolder.h
+++ b/llvm/include/llvm/IR/ConstantFolder.h
@@ -105,6 +105,9 @@ public:
Value *FoldGEP(Type *Ty, Value *Ptr, ArrayRef<Value *> IdxList,
bool IsInBounds = false) const override {
+ if (!ConstantExpr::isSupportedGetElementPtr(Ty))
+ return nullptr;
+
if (auto *PC = dyn_cast<Constant>(Ptr)) {
// Every index must be constant.
if (any_of(IdxList, [](Value *V) { return !isa<Constant>(V); }))
@@ -123,7 +126,7 @@ public:
auto *TC = dyn_cast<Constant>(True);
auto *FC = dyn_cast<Constant>(False);
if (CC && TC && FC)
- return ConstantExpr::getSelect(CC, TC, FC);
+ return ConstantFoldSelectInstruction(CC, TC, FC);
return nullptr;
}
diff --git a/llvm/include/llvm/IR/ConstantRange.h b/llvm/include/llvm/IR/ConstantRange.h
index 0b9ac18d0a92..ca36732e4e2e 100644
--- a/llvm/include/llvm/IR/ConstantRange.h
+++ b/llvm/include/llvm/IR/ConstantRange.h
@@ -526,6 +526,10 @@ public:
/// \p IntMinIsPoison is false.
ConstantRange abs(bool IntMinIsPoison = false) const;
+ /// Calculate ctlz range. If \p ZeroIsPoison is set, the range is computed
+ /// ignoring a possible zero value contained in the input range.
+ ConstantRange ctlz(bool ZeroIsPoison = false) const;
+
/// Represents whether an operation on the given constant range is known to
/// always or never overflow.
enum class OverflowResult {
diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index 8aaedc3d082b..94940c816179 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -111,8 +111,12 @@ public:
/// either getSExtValue() or getZExtValue() will yield a correctly sized and
/// signed value for the type Ty.
/// Get a ConstantInt for a specific signed value.
- static ConstantInt *getSigned(IntegerType *Ty, int64_t V);
- static Constant *getSigned(Type *Ty, int64_t V);
+ static ConstantInt *getSigned(IntegerType *Ty, int64_t V) {
+ return get(Ty, V, true);
+ }
+ static Constant *getSigned(Type *Ty, int64_t V) {
+ return get(Ty, V, true);
+ }
/// Return a ConstantInt with the specified value and an implied Type. The
/// type is the integer type that corresponds to the bit width of the value.
@@ -265,11 +269,6 @@ class ConstantFP final : public ConstantData {
public:
ConstantFP(const ConstantFP &) = delete;
- /// Floating point negation must be implemented with f(x) = -0.0 - x. This
- /// method returns the negative zero constant for floating point or vector
- /// floating point types; for all other types, it returns the null value.
- static Constant *getZeroValueForNegation(Type *Ty);
-
/// This returns a ConstantFP, or a vector containing a splat of a ConstantFP,
/// for the specified value in the specified type. This should only be used
/// for simple constant values like 2.0/1.0 etc, that are known-valid both as
@@ -1027,16 +1026,6 @@ public:
///
static Constant *getSizeOf(Type *Ty);
- /// getOffsetOf constant expr - computes the offset of a struct field in a
- /// target independent way (Note: the return type is an i64).
- ///
- static Constant *getOffsetOf(StructType *STy, unsigned FieldNo);
-
- /// getOffsetOf constant expr - This is a generalized form of getOffsetOf,
- /// which supports any aggregate type, and any Constant index.
- ///
- static Constant *getOffsetOf(Type *Ty, Constant *FieldNo);
-
static Constant *getNeg(Constant *C, bool HasNUW = false,
bool HasNSW = false);
static Constant *getNot(Constant *C);
@@ -1049,7 +1038,6 @@ public:
static Constant *getAnd(Constant *C1, Constant *C2);
static Constant *getOr(Constant *C1, Constant *C2);
static Constant *getXor(Constant *C1, Constant *C2);
- static Constant *getUMin(Constant *C1, Constant *C2);
static Constant *getShl(Constant *C1, Constant *C2, bool HasNUW = false,
bool HasNSW = false);
static Constant *getLShr(Constant *C1, Constant *C2, bool isExact = false);
@@ -1209,12 +1197,6 @@ public:
/// Return true if this is a compare constant expression
bool isCompare() const;
- /// Select constant expr
- ///
- /// \param OnlyIfReducedTy see \a getWithOperands() docs.
- static Constant *getSelect(Constant *C, Constant *V1, Constant *V2,
- Type *OnlyIfReducedTy = nullptr);
-
/// get - Return a binary or shift operator constant expression,
/// folding if possible.
///
@@ -1350,6 +1332,12 @@ public:
/// supported.
static bool isSupportedBinOp(unsigned Opcode);
+ /// Whether creating a constant expression for this getelementptr type is
+ /// supported.
+ static bool isSupportedGetElementPtr(const Type *SrcElemTy) {
+ return !SrcElemTy->isScalableTy();
+ }
+
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Value *V) {
return V->getValueID() == ConstantExprVal;
diff --git a/llvm/include/llvm/IR/ConstrainedOps.def b/llvm/include/llvm/IR/ConstrainedOps.def
index ecba68fe0c0e..41aa44de957f 100644
--- a/llvm/include/llvm/IR/ConstrainedOps.def
+++ b/llvm/include/llvm/IR/ConstrainedOps.def
@@ -89,6 +89,7 @@ DAG_FUNCTION(minimum, 2, 0, experimental_constrained_minimum, FMINIMU
DAG_FUNCTION(nearbyint, 1, 1, experimental_constrained_nearbyint, FNEARBYINT)
DAG_FUNCTION(pow, 2, 1, experimental_constrained_pow, FPOW)
DAG_FUNCTION(powi, 2, 1, experimental_constrained_powi, FPOWI)
+DAG_FUNCTION(ldexp, 2, 1, experimental_constrained_ldexp, FLDEXP)
DAG_FUNCTION(rint, 1, 1, experimental_constrained_rint, FRINT)
DAG_FUNCTION(round, 1, 0, experimental_constrained_round, FROUND)
DAG_FUNCTION(roundeven, 1, 0, experimental_constrained_roundeven, FROUNDEVEN)
diff --git a/llvm/include/llvm/IR/CycleInfo.h b/llvm/include/llvm/IR/CycleInfo.h
new file mode 100644
index 000000000000..02b25e5abe1a
--- /dev/null
+++ b/llvm/include/llvm/IR/CycleInfo.h
@@ -0,0 +1,31 @@
+//===- CycleInfo.h - Cycle Info for LLVM IR -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file declares the LLVM IR specialization of the GenericCycle
+/// templates.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_CYCLEINFO_H
+#define LLVM_IR_CYCLEINFO_H
+
+#include "llvm/ADT/GenericCycleInfo.h"
+#include "llvm/IR/SSAContext.h"
+
+namespace llvm {
+
+extern template class GenericCycleInfo<SSAContext>;
+extern template class GenericCycle<SSAContext>;
+
+using CycleInfo = GenericCycleInfo<SSAContext>;
+using Cycle = CycleInfo::CycleT;
+
+} // namespace llvm
+
+#endif // LLVM_IR_CYCLEINFO_H
diff --git a/llvm/include/llvm/IR/DIBuilder.h b/llvm/include/llvm/IR/DIBuilder.h
index 45b94044bc64..ecd6dd7b0a4f 100644
--- a/llvm/include/llvm/IR/DIBuilder.h
+++ b/llvm/include/llvm/IR/DIBuilder.h
@@ -47,15 +47,14 @@ namespace llvm {
Function *DeclareFn; ///< llvm.dbg.declare
Function *ValueFn; ///< llvm.dbg.value
Function *LabelFn; ///< llvm.dbg.label
- Function *AddrFn; ///< llvm.dbg.addr
Function *AssignFn; ///< llvm.dbg.assign
SmallVector<TrackingMDNodeRef, 4> AllEnumTypes;
/// Track the RetainTypes, since they can be updated later on.
SmallVector<TrackingMDNodeRef, 4> AllRetainTypes;
- SmallVector<Metadata *, 4> AllSubprograms;
+ SmallVector<DISubprogram *, 4> AllSubprograms;
SmallVector<Metadata *, 4> AllGVs;
- SmallVector<TrackingMDNodeRef, 4> AllImportedModules;
+ SmallVector<TrackingMDNodeRef, 4> ImportedModules;
/// Map Macro parent (which can be DIMacroFile or nullptr) to a list of
/// Metadata all of type DIMacroNode.
/// DIMacroNode's with nullptr parent are DICompileUnit direct children.
@@ -65,15 +64,25 @@ namespace llvm {
SmallVector<TrackingMDNodeRef, 4> UnresolvedNodes;
bool AllowUnresolvedNodes;
- /// Each subprogram's preserved local variables.
+ /// Each subprogram's preserved local variables, labels and imported
+ /// entities.
///
/// Do not use a std::vector. Some versions of libc++ apparently copy
/// instead of move on grow operations, and TrackingMDRef is expensive to
/// copy.
- DenseMap<MDNode *, SmallVector<TrackingMDNodeRef, 1>> PreservedVariables;
-
- /// Each subprogram's preserved labels.
- DenseMap<MDNode *, SmallVector<TrackingMDNodeRef, 1>> PreservedLabels;
+ DenseMap<DISubprogram *, SmallVector<TrackingMDNodeRef, 4>>
+ SubprogramTrackedNodes;
+
+ SmallVectorImpl<TrackingMDNodeRef> &
+ getImportTrackingVector(const DIScope *S) {
+ return isa_and_nonnull<DILocalScope>(S)
+ ? getSubprogramNodesTrackingVector(S)
+ : ImportedModules;
+ }
+ SmallVectorImpl<TrackingMDNodeRef> &
+ getSubprogramNodesTrackingVector(const DIScope *S) {
+ return SubprogramTrackedNodes[cast<DILocalScope>(S)->getSubprogram()];
+ }
/// Create a temporary.
///
@@ -102,12 +111,6 @@ namespace llvm {
DIExpression *Expr, const DILocation *DL,
BasicBlock *InsertBB, Instruction *InsertBefore);
- /// Internal helper for insertDbgAddrIntrinsic.
- Instruction *
- insertDbgAddrIntrinsic(llvm::Value *Val, DILocalVariable *VarInfo,
- DIExpression *Expr, const DILocation *DL,
- BasicBlock *InsertBB, Instruction *InsertBefore);
-
public:
/// Construct a builder for a module.
///
@@ -986,30 +989,6 @@ namespace llvm {
const DILocation *DL,
Instruction *InsertBefore);
- /// Insert a new llvm.dbg.addr intrinsic call.
- /// \param Addr llvm::Value of the address
- /// \param VarInfo Variable's debug info descriptor.
- /// \param Expr A complex location expression.
- /// \param DL Debug info location.
- /// \param InsertAtEnd Location for the new intrinsic.
- Instruction *insertDbgAddrIntrinsic(llvm::Value *Addr,
- DILocalVariable *VarInfo,
- DIExpression *Expr,
- const DILocation *DL,
- BasicBlock *InsertAtEnd);
-
- /// Insert a new llvm.dbg.addr intrinsic call.
- /// \param Addr llvm::Value of the address.
- /// \param VarInfo Variable's debug info descriptor.
- /// \param Expr A complex location expression.
- /// \param DL Debug info location.
- /// \param InsertBefore Location for the new intrinsic.
- Instruction *insertDbgAddrIntrinsic(llvm::Value *Addr,
- DILocalVariable *VarInfo,
- DIExpression *Expr,
- const DILocation *DL,
- Instruction *InsertBefore);
-
/// Replace the vtable holder in the given type.
///
/// If this creates a self reference, it may orphan some unresolved cycles
diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h
index fbfbf7732448..7d92ac6c64fb 100644
--- a/llvm/include/llvm/IR/DataLayout.h
+++ b/llvm/include/llvm/IR/DataLayout.h
@@ -52,7 +52,6 @@ class Value;
/// Enum used to categorize the alignment types stored by LayoutAlignElem
enum AlignTypeEnum {
- INVALID_ALIGN = 0,
INTEGER_ALIGN = 'i',
VECTOR_ALIGN = 'v',
FLOAT_ALIGN = 'f',
@@ -66,20 +65,17 @@ enum AlignTypeEnum {
/// Layout alignment element.
///
-/// Stores the alignment data associated with a given alignment type (integer,
-/// vector, float) and type bit width.
+/// Stores the alignment data associated with a given type bit width.
///
/// \note The unusual order of elements in the structure attempts to reduce
/// padding and make the structure slightly more cache friendly.
struct LayoutAlignElem {
- /// Alignment type from \c AlignTypeEnum
- unsigned AlignType : 8;
- unsigned TypeBitWidth : 24;
+ uint32_t TypeBitWidth;
Align ABIAlign;
Align PrefAlign;
- static LayoutAlignElem get(AlignTypeEnum align_type, Align abi_align,
- Align pref_align, uint32_t bit_width);
+ static LayoutAlignElem get(Align ABIAlign, Align PrefAlign,
+ uint32_t BitWidth);
bool operator==(const LayoutAlignElem &rhs) const;
};
@@ -147,17 +143,11 @@ private:
/// Primitive type alignment data. This is sorted by type and bit
/// width during construction.
- using AlignmentsTy = SmallVector<LayoutAlignElem, 16>;
- AlignmentsTy Alignments;
-
- AlignmentsTy::const_iterator
- findAlignmentLowerBound(AlignTypeEnum AlignType, uint32_t BitWidth) const {
- return const_cast<DataLayout *>(this)->findAlignmentLowerBound(AlignType,
- BitWidth);
- }
-
- AlignmentsTy::iterator
- findAlignmentLowerBound(AlignTypeEnum AlignType, uint32_t BitWidth);
+ using AlignmentsTy = SmallVector<LayoutAlignElem, 4>;
+ AlignmentsTy IntAlignments;
+ AlignmentsTy FloatAlignments;
+ AlignmentsTy VectorAlignments;
+ LayoutAlignElem StructAlignment;
/// The string representation used to create this DataLayout
std::string StringRepresentation;
@@ -176,8 +166,8 @@ private:
/// Attempts to set the alignment of the given type. Returns an error
/// description on failure.
- Error setAlignment(AlignTypeEnum align_type, Align abi_align,
- Align pref_align, uint32_t bit_width);
+ Error setAlignment(AlignTypeEnum AlignType, Align ABIAlign, Align PrefAlign,
+ uint32_t BitWidth);
/// Attempts to set the alignment of a pointer in the given address space.
/// Returns an error description on failure.
@@ -223,7 +213,10 @@ public:
DefaultGlobalsAddrSpace = DL.DefaultGlobalsAddrSpace;
ManglingMode = DL.ManglingMode;
LegalIntWidths = DL.LegalIntWidths;
- Alignments = DL.Alignments;
+ IntAlignments = DL.IntAlignments;
+ FloatAlignments = DL.FloatAlignments;
+ VectorAlignments = DL.VectorAlignments;
+ StructAlignment = DL.StructAlignment;
Pointers = DL.Pointers;
NonIntegralAddressSpaces = DL.NonIntegralAddressSpaces;
return *this;
@@ -522,15 +515,10 @@ public:
}
/// Returns the minimum ABI-required alignment for the specified type.
- /// FIXME: Deprecate this function once migration to Align is over.
- LLVM_DEPRECATED("use getABITypeAlign instead", "getABITypeAlign")
- uint64_t getABITypeAlignment(Type *Ty) const;
-
- /// Returns the minimum ABI-required alignment for the specified type.
Align getABITypeAlign(Type *Ty) const;
/// Helper function to return `Alignment` if it's set or the result of
- /// `getABITypeAlignment(Ty)`, in any case the result is a valid alignment.
+ /// `getABITypeAlign(Ty)`, in any case the result is a valid alignment.
inline Align getValueOrABITypeAlignment(MaybeAlign Alignment,
Type *Ty) const {
return Alignment ? *Alignment : getABITypeAlign(Ty);
@@ -578,6 +566,11 @@ public:
/// are set.
unsigned getLargestLegalIntTypeSizeInBits() const;
+ /// Returns the type of a GEP index in AddressSpace.
+ /// If it was not specified explicitly, it will be the integer type of the
+ /// pointer width - IntPtrType.
+ IntegerType *getIndexType(LLVMContext &C, unsigned AddressSpace) const;
+
/// Returns the type of a GEP index.
/// If it was not specified explicitly, it will be the integer type of the
/// pointer width - IntPtrType.
@@ -622,16 +615,16 @@ inline LLVMTargetDataRef wrap(const DataLayout *P) {
/// Used to lazily calculate structure layout information for a target machine,
/// based on the DataLayout structure.
-class StructLayout final : public TrailingObjects<StructLayout, uint64_t> {
- uint64_t StructSize;
+class StructLayout final : public TrailingObjects<StructLayout, TypeSize> {
+ TypeSize StructSize;
Align StructAlignment;
unsigned IsPadded : 1;
unsigned NumElements : 31;
public:
- uint64_t getSizeInBytes() const { return StructSize; }
+ TypeSize getSizeInBytes() const { return StructSize; }
- uint64_t getSizeInBits() const { return 8 * StructSize; }
+ TypeSize getSizeInBits() const { return 8 * StructSize; }
Align getAlignment() const { return StructAlignment; }
@@ -641,23 +634,22 @@ public:
/// Given a valid byte offset into the structure, returns the structure
/// index that contains it.
- unsigned getElementContainingOffset(uint64_t Offset) const;
+ unsigned getElementContainingOffset(uint64_t FixedOffset) const;
- MutableArrayRef<uint64_t> getMemberOffsets() {
- return llvm::MutableArrayRef(getTrailingObjects<uint64_t>(),
- NumElements);
+ MutableArrayRef<TypeSize> getMemberOffsets() {
+ return llvm::MutableArrayRef(getTrailingObjects<TypeSize>(), NumElements);
}
- ArrayRef<uint64_t> getMemberOffsets() const {
- return llvm::ArrayRef(getTrailingObjects<uint64_t>(), NumElements);
+ ArrayRef<TypeSize> getMemberOffsets() const {
+ return llvm::ArrayRef(getTrailingObjects<TypeSize>(), NumElements);
}
- uint64_t getElementOffset(unsigned Idx) const {
+ TypeSize getElementOffset(unsigned Idx) const {
assert(Idx < NumElements && "Invalid element idx!");
return getMemberOffsets()[Idx];
}
- uint64_t getElementOffsetInBits(unsigned Idx) const {
+ TypeSize getElementOffsetInBits(unsigned Idx) const {
return getElementOffset(Idx) * 8;
}
@@ -666,7 +658,7 @@ private:
StructLayout(StructType *ST, const DataLayout &DL);
- size_t numTrailingObjects(OverloadToken<uint64_t>) const {
+ size_t numTrailingObjects(OverloadToken<TypeSize>) const {
return NumElements;
}
};
@@ -687,8 +679,7 @@ inline TypeSize DataLayout::getTypeSizeInBits(Type *Ty) const {
}
case Type::StructTyID:
// Get the layout annotation... which is lazily created on demand.
- return TypeSize::Fixed(
- getStructLayout(cast<StructType>(Ty))->getSizeInBits());
+ return getStructLayout(cast<StructType>(Ty))->getSizeInBits();
case Type::IntegerTyID:
return TypeSize::Fixed(Ty->getIntegerBitWidth());
case Type::HalfTyID:
diff --git a/llvm/include/llvm/IR/DebugInfo.h b/llvm/include/llvm/IR/DebugInfo.h
index 5e62d8723a55..26a7cfbbb350 100644
--- a/llvm/include/llvm/IR/DebugInfo.h
+++ b/llvm/include/llvm/IR/DebugInfo.h
@@ -35,13 +35,8 @@ class DbgVariableIntrinsic;
class Instruction;
class Module;
-/// Finds all intrinsics declaring local variables as living in the memory that
-/// 'V' points to. This may include a mix of dbg.declare and
-/// dbg.addr intrinsics.
-TinyPtrVector<DbgVariableIntrinsic *> FindDbgAddrUses(Value *V);
-
-/// Like \c FindDbgAddrUses, but only returns dbg.declare intrinsics, not
-/// dbg.addr.
+/// Finds dbg.declare intrinsics declaring local variables as living in the
+/// memory that 'V' points to.
TinyPtrVector<DbgDeclareInst *> FindDbgDeclareUses(Value *V);
/// Finds the llvm.dbg.value intrinsics describing a value.
@@ -229,6 +224,20 @@ void RAUW(DIAssignID *Old, DIAssignID *New);
/// Remove all Assignment Tracking related intrinsics and metadata from \p F.
void deleteAll(Function *F);
+/// Calculate the fragment of the variable in \p DAI covered
+/// from (Dest + SliceOffsetInBits) to
+/// to (Dest + SliceOffsetInBits + SliceSizeInBits)
+///
+/// Return false if it can't be calculated for any reason.
+/// Result is set to nullopt if the intersect equals the variable fragment (or
+/// variable size) in DAI.
+///
+/// Result contains a zero-sized fragment if there's no intersect.
+bool calculateFragmentIntersect(
+ const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits,
+ uint64_t SliceSizeInBits, const DbgAssignIntrinsic *DAI,
+ std::optional<DIExpression::FragmentInfo> &Result);
+
/// Helper struct for trackAssignments, below. We don't use the similar
/// DebugVariable class because trackAssignments doesn't (yet?) understand
/// partial variables (fragment info) as input and want to make that clear and
@@ -296,7 +305,7 @@ std::optional<AssignmentInfo> getAssignmentInfo(const DataLayout &DL,
class AssignmentTrackingPass : public PassInfoMixin<AssignmentTrackingPass> {
/// Note: this method does not set the debug-info-assignment-tracking module
/// flag.
- void runOnFunction(Function &F);
+ bool runOnFunction(Function &F);
public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index def3ce2b56ea..656122405209 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -1377,7 +1377,8 @@ public:
Default = 0,
GNU = 1,
None = 2,
- LastDebugNameTableKind = None
+ Apple = 3,
+ LastDebugNameTableKind = Apple
};
static std::optional<DebugEmissionKind> getEmissionKind(StringRef Str);
@@ -1599,254 +1600,6 @@ public:
}
};
-/// Debug location.
-///
-/// A debug location in source code, used for debug info and otherwise.
-class DILocation : public MDNode {
- friend class LLVMContextImpl;
- friend class MDNode;
-
- DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
- unsigned Column, ArrayRef<Metadata *> MDs, bool ImplicitCode);
- ~DILocation() { dropAllReferences(); }
-
- static DILocation *getImpl(LLVMContext &Context, unsigned Line,
- unsigned Column, Metadata *Scope,
- Metadata *InlinedAt, bool ImplicitCode,
- StorageType Storage, bool ShouldCreate = true);
- static DILocation *getImpl(LLVMContext &Context, unsigned Line,
- unsigned Column, DILocalScope *Scope,
- DILocation *InlinedAt, bool ImplicitCode,
- StorageType Storage, bool ShouldCreate = true) {
- return getImpl(Context, Line, Column, static_cast<Metadata *>(Scope),
- static_cast<Metadata *>(InlinedAt), ImplicitCode, Storage,
- ShouldCreate);
- }
-
- TempDILocation cloneImpl() const {
- // Get the raw scope/inlinedAt since it is possible to invoke this on
- // a DILocation containing temporary metadata.
- return getTemporary(getContext(), getLine(), getColumn(), getRawScope(),
- getRawInlinedAt(), isImplicitCode());
- }
-
-public:
- // Disallow replacing operands.
- void replaceOperandWith(unsigned I, Metadata *New) = delete;
-
- DEFINE_MDNODE_GET(DILocation,
- (unsigned Line, unsigned Column, Metadata *Scope,
- Metadata *InlinedAt = nullptr, bool ImplicitCode = false),
- (Line, Column, Scope, InlinedAt, ImplicitCode))
- DEFINE_MDNODE_GET(DILocation,
- (unsigned Line, unsigned Column, DILocalScope *Scope,
- DILocation *InlinedAt = nullptr,
- bool ImplicitCode = false),
- (Line, Column, Scope, InlinedAt, ImplicitCode))
-
- /// Return a (temporary) clone of this.
- TempDILocation clone() const { return cloneImpl(); }
-
- unsigned getLine() const { return SubclassData32; }
- unsigned getColumn() const { return SubclassData16; }
- DILocalScope *getScope() const { return cast<DILocalScope>(getRawScope()); }
-
- DILocation *getInlinedAt() const {
- return cast_or_null<DILocation>(getRawInlinedAt());
- }
-
- /// Check if the location corresponds to an implicit code.
- /// When the ImplicitCode flag is true, it means that the Instruction
- /// with this DILocation has been added by the front-end but it hasn't been
- /// written explicitly by the user (e.g. cleanup stuff in C++ put on a closing
- /// bracket). It's useful for code coverage to not show a counter on "empty"
- /// lines.
- bool isImplicitCode() const { return SubclassData1; }
- void setImplicitCode(bool ImplicitCode) { SubclassData1 = ImplicitCode; }
-
- DIFile *getFile() const { return getScope()->getFile(); }
- StringRef getFilename() const { return getScope()->getFilename(); }
- StringRef getDirectory() const { return getScope()->getDirectory(); }
- std::optional<StringRef> getSource() const { return getScope()->getSource(); }
-
- /// Get the scope where this is inlined.
- ///
- /// Walk through \a getInlinedAt() and return \a getScope() from the deepest
- /// location.
- DILocalScope *getInlinedAtScope() const {
- if (auto *IA = getInlinedAt())
- return IA->getInlinedAtScope();
- return getScope();
- }
-
- /// Get the DWARF discriminator.
- ///
- /// DWARF discriminators distinguish identical file locations between
- /// instructions that are on different basic blocks.
- ///
- /// There are 3 components stored in discriminator, from lower bits:
- ///
- /// Base discriminator: assigned by AddDiscriminators pass to identify IRs
- /// that are defined by the same source line, but
- /// different basic blocks.
- /// Duplication factor: assigned by optimizations that will scale down
- /// the execution frequency of the original IR.
- /// Copy Identifier: assigned by optimizations that clones the IR.
- /// Each copy of the IR will be assigned an identifier.
- ///
- /// Encoding:
- ///
- /// The above 3 components are encoded into a 32bit unsigned integer in
- /// order. If the lowest bit is 1, the current component is empty, and the
- /// next component will start in the next bit. Otherwise, the current
- /// component is non-empty, and its content starts in the next bit. The
- /// value of each components is either 5 bit or 12 bit: if the 7th bit
- /// is 0, the bit 2~6 (5 bits) are used to represent the component; if the
- /// 7th bit is 1, the bit 2~6 (5 bits) and 8~14 (7 bits) are combined to
- /// represent the component. Thus, the number of bits used for a component
- /// is either 0 (if it and all the next components are empty); 1 - if it is
- /// empty; 7 - if its value is up to and including 0x1f (lsb and msb are both
- /// 0); or 14, if its value is up to and including 0x1ff. Note that the last
- /// component is also capped at 0x1ff, even in the case when both first
- /// components are 0, and we'd technically have 29 bits available.
- ///
- /// For precise control over the data being encoded in the discriminator,
- /// use encodeDiscriminator/decodeDiscriminator.
-
- inline unsigned getDiscriminator() const;
-
- // For the regular discriminator, it stands for all empty components if all
- // the lowest 3 bits are non-zero and all higher 29 bits are unused(zero by
- // default). Here we fully leverage the higher 29 bits for pseudo probe use.
- // This is the format:
- // [2:0] - 0x7
- // [31:3] - pseudo probe fields guaranteed to be non-zero as a whole
- // So if the lower 3 bits is non-zero and the others has at least one
- // non-zero bit, it guarantees to be a pseudo probe discriminator
- inline static bool isPseudoProbeDiscriminator(unsigned Discriminator) {
- return ((Discriminator & 0x7) == 0x7) && (Discriminator & 0xFFFFFFF8);
- }
-
- /// Returns a new DILocation with updated \p Discriminator.
- inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const;
-
- /// Returns a new DILocation with updated base discriminator \p BD. Only the
- /// base discriminator is set in the new DILocation, the other encoded values
- /// are elided.
- /// If the discriminator cannot be encoded, the function returns std::nullopt.
- inline std::optional<const DILocation *>
- cloneWithBaseDiscriminator(unsigned BD) const;
-
- /// Returns the duplication factor stored in the discriminator, or 1 if no
- /// duplication factor (or 0) is encoded.
- inline unsigned getDuplicationFactor() const;
-
- /// Returns the copy identifier stored in the discriminator.
- inline unsigned getCopyIdentifier() const;
-
- /// Returns the base discriminator stored in the discriminator.
- inline unsigned getBaseDiscriminator() const;
-
- /// Returns a new DILocation with duplication factor \p DF * current
- /// duplication factor encoded in the discriminator. The current duplication
- /// factor is as defined by getDuplicationFactor().
- /// Returns std::nullopt if encoding failed.
- inline std::optional<const DILocation *>
- cloneByMultiplyingDuplicationFactor(unsigned DF) const;
-
- /// When two instructions are combined into a single instruction we also
- /// need to combine the original locations into a single location.
- /// When the locations are the same we can use either location.
- /// When they differ, we need a third location which is distinct from either.
- /// If they share a common scope, use this scope and compare the line/column
- /// pair of the locations with the common scope:
- /// * if both match, keep the line and column;
- /// * if only the line number matches, keep the line and set the column as 0;
- /// * otherwise set line and column as 0.
- /// If they do not share a common scope the location is ambiguous and can't be
- /// represented in a line entry. In this case, set line and column as 0 and
- /// use the scope of any location.
- ///
- /// \p LocA \p LocB: The locations to be merged.
- static const DILocation *getMergedLocation(const DILocation *LocA,
- const DILocation *LocB);
-
- /// Try to combine the vector of locations passed as input in a single one.
- /// This function applies getMergedLocation() repeatedly left-to-right.
- ///
- /// \p Locs: The locations to be merged.
- static const DILocation *
- getMergedLocations(ArrayRef<const DILocation *> Locs);
-
- /// Return the masked discriminator value for an input discrimnator value D
- /// (i.e. zero out the (B+1)-th and above bits for D (B is 0-base).
- // Example: an input of (0x1FF, 7) returns 0xFF.
- static unsigned getMaskedDiscriminator(unsigned D, unsigned B) {
- return (D & getN1Bits(B));
- }
-
- /// Return the bits used for base discriminators.
- static unsigned getBaseDiscriminatorBits() { return getBaseFSBitEnd(); }
-
- /// Returns the base discriminator for a given encoded discriminator \p D.
- static unsigned
- getBaseDiscriminatorFromDiscriminator(unsigned D,
- bool IsFSDiscriminator = false) {
- if (IsFSDiscriminator)
- return getMaskedDiscriminator(D, getBaseDiscriminatorBits());
- return getUnsignedFromPrefixEncoding(D);
- }
-
- /// Raw encoding of the discriminator. APIs such as cloneWithDuplicationFactor
- /// have certain special case behavior (e.g. treating empty duplication factor
- /// as the value '1').
- /// This API, in conjunction with cloneWithDiscriminator, may be used to
- /// encode the raw values provided.
- ///
- /// \p BD: base discriminator
- /// \p DF: duplication factor
- /// \p CI: copy index
- ///
- /// The return is std::nullopt if the values cannot be encoded in 32 bits -
- /// for example, values for BD or DF larger than 12 bits. Otherwise, the
- /// return is the encoded value.
- static std::optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF,
- unsigned CI);
-
- /// Raw decoder for values in an encoded discriminator D.
- static void decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
- unsigned &CI);
-
- /// Returns the duplication factor for a given encoded discriminator \p D, or
- /// 1 if no value or 0 is encoded.
- static unsigned getDuplicationFactorFromDiscriminator(unsigned D) {
- if (EnableFSDiscriminator)
- return 1;
- D = getNextComponentInDiscriminator(D);
- unsigned Ret = getUnsignedFromPrefixEncoding(D);
- if (Ret == 0)
- return 1;
- return Ret;
- }
-
- /// Returns the copy identifier for a given encoded discriminator \p D.
- static unsigned getCopyIdentifierFromDiscriminator(unsigned D) {
- return getUnsignedFromPrefixEncoding(
- getNextComponentInDiscriminator(getNextComponentInDiscriminator(D)));
- }
-
- Metadata *getRawScope() const { return getOperand(0); }
- Metadata *getRawInlinedAt() const {
- if (getNumOperands() == 2)
- return getOperand(1);
- return nullptr;
- }
-
- static bool classof(const Metadata *MD) {
- return MD->getMetadataID() == DILocationKind;
- }
-};
-
/// Subprogram description.
class DISubprogram : public DILocalScope {
friend class LLVMContextImpl;
@@ -2104,6 +1857,9 @@ public:
void replaceRawLinkageName(MDString *LinkageName) {
replaceOperandWith(3, LinkageName);
}
+ void replaceRetainedNodes(DINodeArray N) {
+ replaceOperandWith(7, N.get());
+ }
/// Check if this subprogram describes the given function.
///
@@ -2115,6 +1871,264 @@ public:
}
};
+/// Debug location.
+///
+/// A debug location in source code, used for debug info and otherwise.
+class DILocation : public MDNode {
+ friend class LLVMContextImpl;
+ friend class MDNode;
+
+ DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
+ unsigned Column, ArrayRef<Metadata *> MDs, bool ImplicitCode);
+ ~DILocation() { dropAllReferences(); }
+
+ static DILocation *getImpl(LLVMContext &Context, unsigned Line,
+ unsigned Column, Metadata *Scope,
+ Metadata *InlinedAt, bool ImplicitCode,
+ StorageType Storage, bool ShouldCreate = true);
+ static DILocation *getImpl(LLVMContext &Context, unsigned Line,
+ unsigned Column, DILocalScope *Scope,
+ DILocation *InlinedAt, bool ImplicitCode,
+ StorageType Storage, bool ShouldCreate = true) {
+ return getImpl(Context, Line, Column, static_cast<Metadata *>(Scope),
+ static_cast<Metadata *>(InlinedAt), ImplicitCode, Storage,
+ ShouldCreate);
+ }
+
+ TempDILocation cloneImpl() const {
+ // Get the raw scope/inlinedAt since it is possible to invoke this on
+ // a DILocation containing temporary metadata.
+ return getTemporary(getContext(), getLine(), getColumn(), getRawScope(),
+ getRawInlinedAt(), isImplicitCode());
+ }
+
+public:
+ // Disallow replacing operands.
+ void replaceOperandWith(unsigned I, Metadata *New) = delete;
+
+ DEFINE_MDNODE_GET(DILocation,
+ (unsigned Line, unsigned Column, Metadata *Scope,
+ Metadata *InlinedAt = nullptr, bool ImplicitCode = false),
+ (Line, Column, Scope, InlinedAt, ImplicitCode))
+ DEFINE_MDNODE_GET(DILocation,
+ (unsigned Line, unsigned Column, DILocalScope *Scope,
+ DILocation *InlinedAt = nullptr,
+ bool ImplicitCode = false),
+ (Line, Column, Scope, InlinedAt, ImplicitCode))
+
+ /// Return a (temporary) clone of this.
+ TempDILocation clone() const { return cloneImpl(); }
+
+ unsigned getLine() const { return SubclassData32; }
+ unsigned getColumn() const { return SubclassData16; }
+ DILocalScope *getScope() const { return cast<DILocalScope>(getRawScope()); }
+
+ /// Return the linkage name of Subprogram. If the linkage name is empty,
+ /// return scope name (the demangled name).
+ StringRef getSubprogramLinkageName() const {
+ DISubprogram *SP = getScope()->getSubprogram();
+ if (!SP)
+ return "";
+ auto Name = SP->getLinkageName();
+ if (!Name.empty())
+ return Name;
+ return SP->getName();
+ }
+
+ DILocation *getInlinedAt() const {
+ return cast_or_null<DILocation>(getRawInlinedAt());
+ }
+
+ /// Check if the location corresponds to an implicit code.
+ /// When the ImplicitCode flag is true, it means that the Instruction
+ /// with this DILocation has been added by the front-end but it hasn't been
+ /// written explicitly by the user (e.g. cleanup stuff in C++ put on a closing
+ /// bracket). It's useful for code coverage to not show a counter on "empty"
+ /// lines.
+ bool isImplicitCode() const { return SubclassData1; }
+ void setImplicitCode(bool ImplicitCode) { SubclassData1 = ImplicitCode; }
+
+ DIFile *getFile() const { return getScope()->getFile(); }
+ StringRef getFilename() const { return getScope()->getFilename(); }
+ StringRef getDirectory() const { return getScope()->getDirectory(); }
+ std::optional<StringRef> getSource() const { return getScope()->getSource(); }
+
+ /// Get the scope where this is inlined.
+ ///
+ /// Walk through \a getInlinedAt() and return \a getScope() from the deepest
+ /// location.
+ DILocalScope *getInlinedAtScope() const {
+ if (auto *IA = getInlinedAt())
+ return IA->getInlinedAtScope();
+ return getScope();
+ }
+
+ /// Get the DWARF discriminator.
+ ///
+ /// DWARF discriminators distinguish identical file locations between
+ /// instructions that are on different basic blocks.
+ ///
+ /// There are 3 components stored in discriminator, from lower bits:
+ ///
+ /// Base discriminator: assigned by AddDiscriminators pass to identify IRs
+ /// that are defined by the same source line, but
+ /// different basic blocks.
+ /// Duplication factor: assigned by optimizations that will scale down
+ /// the execution frequency of the original IR.
+ /// Copy Identifier: assigned by optimizations that clones the IR.
+ /// Each copy of the IR will be assigned an identifier.
+ ///
+ /// Encoding:
+ ///
+ /// The above 3 components are encoded into a 32bit unsigned integer in
+ /// order. If the lowest bit is 1, the current component is empty, and the
+ /// next component will start in the next bit. Otherwise, the current
+ /// component is non-empty, and its content starts in the next bit. The
+ /// value of each components is either 5 bit or 12 bit: if the 7th bit
+ /// is 0, the bit 2~6 (5 bits) are used to represent the component; if the
+ /// 7th bit is 1, the bit 2~6 (5 bits) and 8~14 (7 bits) are combined to
+ /// represent the component. Thus, the number of bits used for a component
+ /// is either 0 (if it and all the next components are empty); 1 - if it is
+ /// empty; 7 - if its value is up to and including 0x1f (lsb and msb are both
+ /// 0); or 14, if its value is up to and including 0x1ff. Note that the last
+ /// component is also capped at 0x1ff, even in the case when both first
+ /// components are 0, and we'd technically have 29 bits available.
+ ///
+ /// For precise control over the data being encoded in the discriminator,
+ /// use encodeDiscriminator/decodeDiscriminator.
+
+ inline unsigned getDiscriminator() const;
+
+ // For the regular discriminator, it stands for all empty components if all
+ // the lowest 3 bits are non-zero and all higher 29 bits are unused(zero by
+ // default). Here we fully leverage the higher 29 bits for pseudo probe use.
+ // This is the format:
+ // [2:0] - 0x7
+ // [31:3] - pseudo probe fields guaranteed to be non-zero as a whole
+ // So if the lower 3 bits is non-zero and the others has at least one
+ // non-zero bit, it guarantees to be a pseudo probe discriminator
+ inline static bool isPseudoProbeDiscriminator(unsigned Discriminator) {
+ return ((Discriminator & 0x7) == 0x7) && (Discriminator & 0xFFFFFFF8);
+ }
+
+ /// Returns a new DILocation with updated \p Discriminator.
+ inline const DILocation *cloneWithDiscriminator(unsigned Discriminator) const;
+
+ /// Returns a new DILocation with updated base discriminator \p BD. Only the
+ /// base discriminator is set in the new DILocation, the other encoded values
+ /// are elided.
+ /// If the discriminator cannot be encoded, the function returns std::nullopt.
+ inline std::optional<const DILocation *>
+ cloneWithBaseDiscriminator(unsigned BD) const;
+
+ /// Returns the duplication factor stored in the discriminator, or 1 if no
+ /// duplication factor (or 0) is encoded.
+ inline unsigned getDuplicationFactor() const;
+
+ /// Returns the copy identifier stored in the discriminator.
+ inline unsigned getCopyIdentifier() const;
+
+ /// Returns the base discriminator stored in the discriminator.
+ inline unsigned getBaseDiscriminator() const;
+
+ /// Returns a new DILocation with duplication factor \p DF * current
+ /// duplication factor encoded in the discriminator. The current duplication
+ /// factor is as defined by getDuplicationFactor().
+ /// Returns std::nullopt if encoding failed.
+ inline std::optional<const DILocation *>
+ cloneByMultiplyingDuplicationFactor(unsigned DF) const;
+
+ /// When two instructions are combined into a single instruction we also
+ /// need to combine the original locations into a single location.
+ /// When the locations are the same we can use either location.
+ /// When they differ, we need a third location which is distinct from either.
+ /// If they share a common scope, use this scope and compare the line/column
+ /// pair of the locations with the common scope:
+ /// * if both match, keep the line and column;
+ /// * if only the line number matches, keep the line and set the column as 0;
+ /// * otherwise set line and column as 0.
+ /// If they do not share a common scope the location is ambiguous and can't be
+ /// represented in a line entry. In this case, set line and column as 0 and
+ /// use the scope of any location.
+ ///
+ /// \p LocA \p LocB: The locations to be merged.
+ static DILocation *getMergedLocation(DILocation *LocA, DILocation *LocB);
+
+ /// Try to combine the vector of locations passed as input in a single one.
+ /// This function applies getMergedLocation() repeatedly left-to-right.
+ ///
+ /// \p Locs: The locations to be merged.
+ static DILocation *getMergedLocations(ArrayRef<DILocation *> Locs);
+
+ /// Return the masked discriminator value for an input discrimnator value D
+ /// (i.e. zero out the (B+1)-th and above bits for D (B is 0-base).
+ // Example: an input of (0x1FF, 7) returns 0xFF.
+ static unsigned getMaskedDiscriminator(unsigned D, unsigned B) {
+ return (D & getN1Bits(B));
+ }
+
+ /// Return the bits used for base discriminators.
+ static unsigned getBaseDiscriminatorBits() { return getBaseFSBitEnd(); }
+
+ /// Returns the base discriminator for a given encoded discriminator \p D.
+ static unsigned
+ getBaseDiscriminatorFromDiscriminator(unsigned D,
+ bool IsFSDiscriminator = false) {
+ if (IsFSDiscriminator)
+ return getMaskedDiscriminator(D, getBaseDiscriminatorBits());
+ return getUnsignedFromPrefixEncoding(D);
+ }
+
+ /// Raw encoding of the discriminator. APIs such as cloneWithDuplicationFactor
+ /// have certain special case behavior (e.g. treating empty duplication factor
+ /// as the value '1').
+ /// This API, in conjunction with cloneWithDiscriminator, may be used to
+ /// encode the raw values provided.
+ ///
+ /// \p BD: base discriminator
+ /// \p DF: duplication factor
+ /// \p CI: copy index
+ ///
+ /// The return is std::nullopt if the values cannot be encoded in 32 bits -
+ /// for example, values for BD or DF larger than 12 bits. Otherwise, the
+ /// return is the encoded value.
+ static std::optional<unsigned> encodeDiscriminator(unsigned BD, unsigned DF,
+ unsigned CI);
+
+ /// Raw decoder for values in an encoded discriminator D.
+ static void decodeDiscriminator(unsigned D, unsigned &BD, unsigned &DF,
+ unsigned &CI);
+
+ /// Returns the duplication factor for a given encoded discriminator \p D, or
+ /// 1 if no value or 0 is encoded.
+ static unsigned getDuplicationFactorFromDiscriminator(unsigned D) {
+ if (EnableFSDiscriminator)
+ return 1;
+ D = getNextComponentInDiscriminator(D);
+ unsigned Ret = getUnsignedFromPrefixEncoding(D);
+ if (Ret == 0)
+ return 1;
+ return Ret;
+ }
+
+ /// Returns the copy identifier for a given encoded discriminator \p D.
+ static unsigned getCopyIdentifierFromDiscriminator(unsigned D) {
+ return getUnsignedFromPrefixEncoding(
+ getNextComponentInDiscriminator(getNextComponentInDiscriminator(D)));
+ }
+
+ Metadata *getRawScope() const { return getOperand(0); }
+ Metadata *getRawInlinedAt() const {
+ if (getNumOperands() == 2)
+ return getOperand(1);
+ return nullptr;
+ }
+
+ static bool classof(const Metadata *MD) {
+ return MD->getMetadataID() == DILocationKind;
+ }
+};
+
class DILexicalBlockBase : public DILocalScope {
protected:
DILexicalBlockBase(LLVMContext &C, unsigned ID, StorageType Storage,
@@ -2296,6 +2310,12 @@ DILocation::cloneWithBaseDiscriminator(unsigned D) const {
std::optional<const DILocation *>
DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
assert(!EnableFSDiscriminator && "FSDiscriminator should not call this.");
+ // Do no interfere with pseudo probes. Pseudo probe doesn't need duplication
+ // factor support as samples collected on cloned probes will be aggregated.
+ // Also pseudo probe at a callsite uses the dwarf discriminator to store
+ // pseudo probe related information, such as the probe id.
+ if (isPseudoProbeDiscriminator(getDiscriminator()))
+ return this;
DF *= getDuplicationFactor();
if (DF <= 1)
@@ -2769,10 +2789,31 @@ public:
/// Return whether the first element a DW_OP_deref.
bool startsWithDeref() const;
+ /// Return whether there is exactly one operator and it is a DW_OP_deref;
+ bool isDeref() const;
+
/// Holds the characteristics of one fragment of a larger variable.
struct FragmentInfo {
+ FragmentInfo() = default;
+ FragmentInfo(uint64_t SizeInBits, uint64_t OffsetInBits)
+ : SizeInBits(SizeInBits), OffsetInBits(OffsetInBits) {}
uint64_t SizeInBits;
uint64_t OffsetInBits;
+ /// Return the index of the first bit of the fragment.
+ uint64_t startInBits() const { return OffsetInBits; }
+ /// Return the index of the bit after the end of the fragment, e.g. for
+ /// fragment offset=16 and size=32 return their sum, 48.
+ uint64_t endInBits() const { return OffsetInBits + SizeInBits; }
+
+ /// Returns a zero-sized fragment if A and B don't intersect.
+ static DIExpression::FragmentInfo intersect(DIExpression::FragmentInfo A,
+ DIExpression::FragmentInfo B) {
+ uint64_t StartInBits = std::max(A.OffsetInBits, B.OffsetInBits);
+ uint64_t EndInBits = std::min(A.endInBits(), B.endInBits());
+ if (EndInBits <= StartInBits)
+ return {0, 0};
+ return DIExpression::FragmentInfo(EndInBits - StartInBits, StartInBits);
+ }
};
/// Retrieve the details of this fragment expression.
@@ -2835,9 +2876,9 @@ public:
/// non-variadic form and not considering the debug operands.
/// \p FirstExpr is the DIExpression for the first debug value.
/// \p FirstIndirect should be true if the first debug value is indirect; in
- /// IR this should be true for dbg.declare and dbg.addr intrinsics and false
- /// for dbg.values, and in MIR this should be true only for DBG_VALUE
- /// instructions whose second operand is an immediate value.
+ /// IR this should be true for dbg.declare intrinsics and false for
+ /// dbg.values, and in MIR this should be true only for DBG_VALUE instructions
+ /// whose second operand is an immediate value.
/// \p SecondExpr and \p SecondIndirect have the same meaning as the prior
/// arguments, but apply to the second debug value.
static bool isEqualExpression(const DIExpression *FirstExpr,
@@ -3797,6 +3838,18 @@ template <> struct DenseMapInfo<DebugVariable> {
}
};
+/// Identifies a unique instance of a whole variable (discards/ignores fragment
+/// information).
+class DebugVariableAggregate : public DebugVariable {
+public:
+ DebugVariableAggregate(const DbgVariableIntrinsic *DVI);
+ DebugVariableAggregate(const DebugVariable &V)
+ : DebugVariable(V.getVariable(), std::nullopt, V.getInlinedAt()) {}
+};
+
+template <>
+struct DenseMapInfo<DebugVariableAggregate>
+ : public DenseMapInfo<DebugVariable> {};
} // end namespace llvm
#undef DEFINE_MDNODE_GET_UNPACK_IMPL
diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index 85a41c8dcc85..203a73067edc 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -218,7 +218,9 @@ class StructType : public Type {
SCDB_HasBody = 1,
SCDB_Packed = 2,
SCDB_IsLiteral = 4,
- SCDB_IsSized = 8
+ SCDB_IsSized = 8,
+ SCDB_ContainsScalableVector = 16,
+ SCDB_NotContainsScalableVector = 32
};
/// For a named struct that actually has a name, this is a pointer to the
@@ -284,7 +286,16 @@ public:
bool isSized(SmallPtrSetImpl<Type *> *Visited = nullptr) const;
/// Returns true if this struct contains a scalable vector.
- bool containsScalableVectorType() const;
+ bool
+ containsScalableVectorType(SmallPtrSetImpl<Type *> *Visited = nullptr) const;
+
+ /// Returns true if this struct contains homogeneous scalable vector types.
+ /// Note that the definition of homogeneous scalable vector type is not
+ /// recursive here. That means the following structure will return false
+ /// when calling this function.
+ /// {{<vscale x 2 x i32>, <vscale x 4 x i64>},
+ /// {<vscale x 2 x i32>, <vscale x 4 x i64>}}
+ bool containsHomogeneousScalableVectorTypes() const;
/// Return true if this is a named struct that has a non-empty name.
bool hasName() const { return SymbolTableEntry != nullptr; }
@@ -630,11 +641,8 @@ inline ElementCount VectorType::getElementCount() const {
/// Class to represent pointers.
class PointerType : public Type {
- explicit PointerType(Type *ElType, unsigned AddrSpace);
explicit PointerType(LLVMContext &C, unsigned AddrSpace);
- Type *PointeeTy;
-
public:
PointerType(const PointerType &) = delete;
PointerType &operator=(const PointerType &) = delete;
@@ -663,14 +671,14 @@ public:
/// given address space. This is only useful during the opaque pointer
/// transition.
/// TODO: remove after opaque pointer transition is complete.
+ [[deprecated("Use PointerType::get() with LLVMContext argument instead")]]
static PointerType *getWithSamePointeeType(PointerType *PT,
unsigned AddressSpace) {
- if (PT->isOpaque())
- return get(PT->getContext(), AddressSpace);
- return get(PT->PointeeTy, AddressSpace);
+ return get(PT->getContext(), AddressSpace);
}
- bool isOpaque() const { return !PointeeTy; }
+ [[deprecated("Always returns true")]]
+ bool isOpaque() const { return true; }
/// Return true if the specified type is valid as a element type.
static bool isValidElementType(Type *ElemTy);
@@ -685,16 +693,18 @@ public:
/// type matches Ty. Primarily used for checking if an instruction's pointer
/// operands are valid types. Will be useless after non-opaque pointers are
/// removed.
- bool isOpaqueOrPointeeTypeMatches(Type *Ty) {
- return isOpaque() || PointeeTy == Ty;
+ [[deprecated("Always returns true")]]
+ bool isOpaqueOrPointeeTypeMatches(Type *) {
+ return true;
}
/// Return true if both pointer types have the same element type. Two opaque
/// pointers are considered to have the same element type, while an opaque
/// and a non-opaque pointer have different element types.
/// TODO: Remove after opaque pointer transition is complete.
+ [[deprecated("Always returns true")]]
bool hasSameElementTypeAs(PointerType *Other) {
- return PointeeTy == Other->PointeeTy;
+ return true;
}
/// Implement support type inquiry through isa, cast, and dyn_cast.
diff --git a/llvm/include/llvm/IR/Dominators.h b/llvm/include/llvm/IR/Dominators.h
index c2d080bc2004..6ceadbf30b89 100644
--- a/llvm/include/llvm/IR/Dominators.h
+++ b/llvm/include/llvm/IR/Dominators.h
@@ -191,7 +191,7 @@ class DominatorTree : public DominatorTreeBase<BasicBlock, false> {
/// * Non-instruction Defs dominate everything.
/// * Def does not dominate a use in Def itself (outside of degenerate cases
/// like unreachable code or trivial phi cycles).
- /// * Invoke/callbr Defs only dominate uses in their default destination.
+ /// * Invoke Defs only dominate uses in their default destination.
bool dominates(const Value *Def, const Use &U) const;
/// Return true if value Def dominates all possible uses inside instruction
/// User. Same comments as for the Use-based API apply.
diff --git a/llvm/include/llvm/Analysis/EHPersonalities.h b/llvm/include/llvm/IR/EHPersonalities.h
index 660d431bb063..bd768440bfb9 100644
--- a/llvm/include/llvm/Analysis/EHPersonalities.h
+++ b/llvm/include/llvm/IR/EHPersonalities.h
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_EHPERSONALITIES_H
-#define LLVM_ANALYSIS_EHPERSONALITIES_H
+#ifndef LLVM_IR_EHPERSONALITIES_H
+#define LLVM_IR_EHPERSONALITIES_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/TinyPtrVector.h"
@@ -115,4 +115,4 @@ DenseMap<BasicBlock *, ColorVector> colorEHFunclets(Function &F);
} // end namespace llvm
-#endif
+#endif // LLVM_IR_EHPERSONALITIES_H
diff --git a/llvm/include/llvm/IR/FMF.h b/llvm/include/llvm/IR/FMF.h
index a49feb5a8946..99e9a2477779 100644
--- a/llvm/include/llvm/IR/FMF.h
+++ b/llvm/include/llvm/IR/FMF.h
@@ -13,9 +13,8 @@
#ifndef LLVM_IR_FMF_H
#define LLVM_IR_FMF_H
-#include "llvm/Support/raw_ostream.h"
-
namespace llvm {
+class raw_ostream;
/// Convenience struct for specifying and reasoning about fast-math flags.
class FastMathFlags {
diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index aee0a9dd4fec..93cf0d27e9a7 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -417,10 +417,6 @@ public:
/// gets the specified attribute from the list of attributes.
Attribute getParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
- /// removes noundef and other attributes that imply undefined behavior if a
- /// `undef` or `poison` value is passed from the list of attributes.
- void removeParamUndefImplyingAttrs(unsigned ArgNo);
-
/// Return the stack alignment for the function.
MaybeAlign getFnStackAlign() const {
return AttributeSets.getFnStackAlignment();
@@ -483,6 +479,11 @@ public:
return AttributeSets.getParamDereferenceableOrNullBytes(ArgNo);
}
+ /// Extract the nofpclass attribute for a parameter.
+ FPClassTest getParamNoFPClass(unsigned ArgNo) const {
+ return AttributeSets.getParamNoFPClass(ArgNo);
+ }
+
/// Determine if the function is presplit coroutine.
bool isPresplitCoroutine() const {
return hasFnAttribute(Attribute::PresplitCoroutine);
@@ -649,6 +650,15 @@ public:
/// function.
DenormalMode getDenormalMode(const fltSemantics &FPType) const;
+ /// Return the representational value of "denormal-fp-math". Code interested
+ /// in the semantics of the function should use getDenormalMode instead.
+ DenormalMode getDenormalModeRaw() const;
+
+ /// Return the representational value of "denormal-fp-math-f32". Code
+ /// interested in the semantics of the function should use getDenormalMode
+ /// instead.
+ DenormalMode getDenormalModeF32Raw() const;
+
/// copyAttributesFrom - copy all additional attributes (those not needed to
/// create a Function) from the Function Src to this one.
void copyAttributesFrom(const Function *Src);
diff --git a/llvm/include/llvm/IR/GCStrategy.h b/llvm/include/llvm/IR/GCStrategy.h
index 9f3904f4c850..3186465f0018 100644
--- a/llvm/include/llvm/IR/GCStrategy.h
+++ b/llvm/include/llvm/IR/GCStrategy.h
@@ -105,7 +105,7 @@ public:
/// Returns true if the RewriteStatepointsForGC pass should run on functions
/// using this GC.
bool useRS4GC() const {
- assert(useStatepoints() &&
+ assert((!UseRS4GC || useStatepoints()) &&
"GC strategy has useRS4GC but not useStatepoints set");
return UseRS4GC;
}
diff --git a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
index 1fa996229749..8c6ede96c873 100644
--- a/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
+++ b/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
@@ -68,9 +68,9 @@ public:
// temporarily not giving this iterator an operator*() to avoid a subtle
// semantics break.
Type *getIndexedType() const {
- if (auto *T = CurTy.dyn_cast<Type *>())
+ if (auto *T = dyn_cast_if_present<Type *>(CurTy))
return T;
- return CurTy.get<StructType *>()->getTypeAtIndex(getOperand());
+ return cast<StructType *>(CurTy)->getTypeAtIndex(getOperand());
}
Value *getOperand() const { return const_cast<Value *>(&**OpIt); }
@@ -108,13 +108,13 @@ public:
// we should provide a more minimal API here that exposes not much more than
// that.
- bool isStruct() const { return CurTy.is<StructType *>(); }
- bool isSequential() const { return CurTy.is<Type *>(); }
+ bool isStruct() const { return isa<StructType *>(CurTy); }
+ bool isSequential() const { return isa<Type *>(CurTy); }
- StructType *getStructType() const { return CurTy.get<StructType *>(); }
+ StructType *getStructType() const { return cast<StructType *>(CurTy); }
StructType *getStructTypeOrNull() const {
- return CurTy.dyn_cast<StructType *>();
+ return dyn_cast_if_present<StructType *>(CurTy);
}
};
diff --git a/llvm/include/llvm/IR/GlobalObject.h b/llvm/include/llvm/IR/GlobalObject.h
index 96a270316686..889bd3a28e12 100644
--- a/llvm/include/llvm/IR/GlobalObject.h
+++ b/llvm/include/llvm/IR/GlobalObject.h
@@ -82,6 +82,12 @@ public:
return decodeMaybeAlign(AlignmentData);
}
+ /// Sets the alignment attribute of the GlobalObject.
+ void setAlignment(Align Align);
+
+ /// Sets the alignment attribute of the GlobalObject.
+ /// This method will be deprecated as the alignment property should always be
+ /// defined.
void setAlignment(MaybeAlign Align);
unsigned getGlobalObjectSubClassData() const {
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 3c78faa3f94f..f86ce845d191 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -567,6 +567,12 @@ public:
return DL.getIntPtrType(Context, AddrSpace);
}
+ /// Fetch the type of an integer that should be used to index GEP operations
+ /// within AddressSpace.
+ IntegerType *getIndexTy(const DataLayout &DL, unsigned AddrSpace) {
+ return DL.getIndexType(Context, AddrSpace);
+ }
+
//===--------------------------------------------------------------------===//
// Intrinsic creation methods
//===--------------------------------------------------------------------===//
@@ -750,6 +756,16 @@ public:
/// vector.
CallInst *CreateFPMinReduce(Value *Src);
+ /// Create a vector float maximum reduction intrinsic of the source
+ /// vector. This variant follows the NaN and signed zero semantic of
+ /// llvm.maximum intrinsic.
+ CallInst *CreateFPMaximumReduce(Value *Src);
+
+ /// Create a vector float minimum reduction intrinsic of the source
+ /// vector. This variant follows the NaN and signed zero semantic of
+ /// llvm.minimum intrinsic.
+ CallInst *CreateFPMinimumReduce(Value *Src);
+
/// Create a lifetime.start intrinsic.
///
/// If the pointer isn't i8* it will be converted.
@@ -794,6 +810,12 @@ public:
CallInst *CreateMaskedCompressStore(Value *Val, Value *Ptr,
Value *Mask = nullptr);
+ /// Return an all true boolean vector (mask) with \p NumElts lanes.
+ Value *getAllOnesMask(ElementCount NumElts) {
+ VectorType *VTy = VectorType::get(Type::getInt1Ty(Context), NumElts);
+ return Constant::getAllOnesValue(VTy);
+ }
+
/// Create an assume intrinsic call that allows the optimizer to
/// assume that the provided condition will be true.
///
@@ -893,6 +915,14 @@ public:
/// will be the same type as that of \p Scaling.
Value *CreateVScale(Constant *Scaling, const Twine &Name = "");
+ /// Create an expression which evaluates to the number of elements in \p EC
+ /// at runtime.
+ Value *CreateElementCount(Type *DstType, ElementCount EC);
+
+ /// Create an expression which evaluates to the number of units in \p Size
+ /// at runtime. This works for both units of bits and bytes.
+ Value *CreateTypeSize(Type *DstType, TypeSize Size);
+
/// Creates a vector of type \p DstType with the linear sequence <0, 1, ...>
Value *CreateStepVector(Type *DstType, const Twine &Name = "");
@@ -926,11 +956,21 @@ public:
/// Create call to the minnum intrinsic.
CallInst *CreateMinNum(Value *LHS, Value *RHS, const Twine &Name = "") {
+ if (IsFPConstrained) {
+ return CreateConstrainedFPUnroundedBinOp(
+ Intrinsic::experimental_constrained_minnum, LHS, RHS, nullptr, Name);
+ }
+
return CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS, nullptr, Name);
}
/// Create call to the maxnum intrinsic.
CallInst *CreateMaxNum(Value *LHS, Value *RHS, const Twine &Name = "") {
+ if (IsFPConstrained) {
+ return CreateConstrainedFPUnroundedBinOp(
+ Intrinsic::experimental_constrained_maxnum, LHS, RHS, nullptr, Name);
+ }
+
return CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS, nullptr, Name);
}
@@ -981,8 +1021,6 @@ private:
ArrayRef<Type *> OverloadedTypes,
const Twine &Name = "");
- Value *getCastedInt8PtrValue(Value *Ptr);
-
//===--------------------------------------------------------------------===//
// Instruction creation methods: Terminators
//===--------------------------------------------------------------------===//
@@ -1628,6 +1666,11 @@ public:
std::optional<RoundingMode> Rounding = std::nullopt,
std::optional<fp::ExceptionBehavior> Except = std::nullopt);
+ CallInst *CreateConstrainedFPUnroundedBinOp(
+ Intrinsic::ID ID, Value *L, Value *R, Instruction *FMFSource = nullptr,
+ const Twine &Name = "", MDNode *FPMathTag = nullptr,
+ std::optional<fp::ExceptionBehavior> Except = std::nullopt);
+
Value *CreateNeg(Value *V, const Twine &Name = "", bool HasNUW = false,
bool HasNSW = false) {
return CreateSub(Constant::getNullValue(V->getType()), V, Name, HasNUW,
@@ -2423,12 +2466,12 @@ public:
/// Return a boolean value testing if \p Arg == 0.
Value *CreateIsNull(Value *Arg, const Twine &Name = "") {
- return CreateICmpEQ(Arg, ConstantInt::getNullValue(Arg->getType()), Name);
+ return CreateICmpEQ(Arg, Constant::getNullValue(Arg->getType()), Name);
}
/// Return a boolean value testing if \p Arg != 0.
Value *CreateIsNotNull(Value *Arg, const Twine &Name = "") {
- return CreateICmpNE(Arg, ConstantInt::getNullValue(Arg->getType()), Name);
+ return CreateICmpNE(Arg, Constant::getNullValue(Arg->getType()), Name);
}
/// Return a boolean value testing if \p Arg < 0.
@@ -2498,6 +2541,8 @@ public:
unsigned Index, unsigned FieldIndex,
MDNode *DbgInfo);
+ Value *createIsFPClass(Value *FPNum, unsigned Test);
+
private:
/// Helper function that creates an assume intrinsic call that
/// represents an alignment assumption on the provided pointer \p PtrValue
diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 56c59b2d5692..6095b0a1be69 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -245,7 +245,7 @@ public:
#include "llvm/IR/Instruction.def"
static BinaryOperator *
- CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Instruction *CopyO,
+ CreateWithCopiedFlags(BinaryOps Opc, Value *V1, Value *V2, Value *CopyO,
const Twine &Name = "",
Instruction *InsertBefore = nullptr) {
BinaryOperator *BO = Create(Opc, V1, V2, Name, InsertBefore);
@@ -628,13 +628,6 @@ public:
/// Determine if this is an integer-only cast.
bool isIntegerCast() const;
- /// A lossless cast is one that does not alter the basic value. It implies
- /// a no-op cast but is more stringent, preventing things like int->float,
- /// long->double, or int->ptr.
- /// @returns true iff the cast is lossless.
- /// Determine if this is a lossless cast.
- bool isLosslessCast() const;
-
/// A no-op cast is one that can be effected without changing any bits.
/// It implies that the source and destination types are the same size. The
/// DataLayout argument is to determine the pointer size when examining casts
@@ -844,6 +837,17 @@ public:
return getOrderedPredicate(getPredicate());
}
+ /// Returns the unordered variant of a floating point compare.
+ ///
+ /// For example, OEQ -> UEQ, OLT -> ULT, OEQ -> UEQ
+ static Predicate getUnorderedPredicate(Predicate Pred) {
+ return static_cast<Predicate>(Pred | FCMP_UNO);
+ }
+
+ Predicate getUnorderedPredicate() const {
+ return getUnorderedPredicate(getPredicate());
+ }
+
/// For example, EQ -> NE, UGT -> ULE, SLT -> SGE,
/// OEQ -> UNE, UGT -> OLE, OLT -> UGE, etc.
/// @returns the inverse predicate for predicate provided in \p pred.
@@ -1074,6 +1078,8 @@ struct OperandTraits<CmpInst> : public FixedNumOperandTraits<CmpInst, 2> {
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CmpInst, Value)
+raw_ostream &operator<<(raw_ostream &OS, CmpInst::Predicate Pred);
+
/// A lightweight accessor for an operand bundle meant to be passed
/// around by value.
struct OperandBundleUse {
@@ -1455,7 +1461,6 @@ public:
/// type.
void setCalledFunction(FunctionType *FTy, Value *Fn) {
this->FTy = FTy;
- assert(cast<PointerType>(Fn->getType())->isOpaqueOrPointeeTypeMatches(FTy));
// This function doesn't mutate the return type, only the function
// type. Seems broken, but I'm just gonna stick an assert in for now.
assert(getType() == FTy->getReturnType());
@@ -1564,6 +1569,11 @@ public:
Attrs = Attrs.removeFnAttribute(getContext(), Kind);
}
+ /// Removes the attribute from the function
+ void removeFnAttr(StringRef Kind) {
+ Attrs = Attrs.removeFnAttribute(getContext(), Kind);
+ }
+
/// Removes the attribute from the return value
void removeRetAttr(Attribute::AttrKind Kind) {
Attrs = Attrs.removeRetAttribute(getContext(), Kind);
@@ -1803,7 +1813,10 @@ public:
/// Extract the number of dereferenceable bytes for a call or
/// parameter (0=unknown).
uint64_t getRetDereferenceableBytes() const {
- return Attrs.getRetDereferenceableBytes();
+ uint64_t Bytes = Attrs.getRetDereferenceableBytes();
+ if (const Function *F = getCalledFunction())
+ Bytes = std::max(Bytes, F->getAttributes().getRetDereferenceableBytes());
+ return Bytes;
}
/// Extract the number of dereferenceable bytes for a call or
@@ -1815,7 +1828,13 @@ public:
/// Extract the number of dereferenceable_or_null bytes for a call
/// (0=unknown).
uint64_t getRetDereferenceableOrNullBytes() const {
- return Attrs.getRetDereferenceableOrNullBytes();
+ uint64_t Bytes = Attrs.getRetDereferenceableOrNullBytes();
+ if (const Function *F = getCalledFunction()) {
+ Bytes = std::max(Bytes,
+ F->getAttributes().getRetDereferenceableOrNullBytes());
+ }
+
+ return Bytes;
}
/// Extract the number of dereferenceable_or_null bytes for a
@@ -1824,6 +1843,14 @@ public:
return Attrs.getParamDereferenceableOrNullBytes(i);
}
+ /// Extract a test mask for disallowed floating-point value classes for the
+ /// return value.
+ FPClassTest getRetNoFPClass() const;
+
+ /// Extract a test mask for disallowed floating-point value classes for the
+ /// parameter.
+ FPClassTest getParamNoFPClass(unsigned i) const;
+
/// Return true if the return value is known to be not null.
/// This may be because it has the nonnull attribute, or because at least
/// one byte is dereferenceable and the pointer is in addrspace(0).
@@ -1927,7 +1954,7 @@ public:
return Attrs.hasAttrSomewhere(Attribute::ByVal);
}
- ///@{
+ ///@}
// End of attribute API.
/// \name Operand Bundle API
@@ -2131,7 +2158,7 @@ public:
/// OperandBundleUse.
OperandBundleUse
operandBundleFromBundleOpInfo(const BundleOpInfo &BOI) const {
- auto begin = op_begin();
+ const auto *begin = op_begin();
ArrayRef<Use> Inputs(begin + BOI.Begin, begin + BOI.End);
return OperandBundleUse(BOI.Tag, Inputs);
}
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index be25ad954e75..5fd8b27447b7 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -183,10 +183,10 @@ public:
/// its operands.
bool isOnlyUserOfAnyOperand();
- static const char* getOpcodeName(unsigned OpCode);
+ static const char *getOpcodeName(unsigned Opcode);
- static inline bool isTerminator(unsigned OpCode) {
- return OpCode >= TermOpsBegin && OpCode < TermOpsEnd;
+ static inline bool isTerminator(unsigned Opcode) {
+ return Opcode >= TermOpsBegin && Opcode < TermOpsEnd;
}
static inline bool isUnaryOp(unsigned Opcode) {
@@ -225,19 +225,19 @@ public:
return isBitwiseLogicOp(getOpcode());
}
- /// Determine if the OpCode is one of the CastInst instructions.
- static inline bool isCast(unsigned OpCode) {
- return OpCode >= CastOpsBegin && OpCode < CastOpsEnd;
+ /// Determine if the Opcode is one of the CastInst instructions.
+ static inline bool isCast(unsigned Opcode) {
+ return Opcode >= CastOpsBegin && Opcode < CastOpsEnd;
}
- /// Determine if the OpCode is one of the FuncletPadInst instructions.
- static inline bool isFuncletPad(unsigned OpCode) {
- return OpCode >= FuncletPadOpsBegin && OpCode < FuncletPadOpsEnd;
+ /// Determine if the Opcode is one of the FuncletPadInst instructions.
+ static inline bool isFuncletPad(unsigned Opcode) {
+ return Opcode >= FuncletPadOpsBegin && Opcode < FuncletPadOpsEnd;
}
- /// Returns true if the OpCode is a terminator related to exception handling.
- static inline bool isExceptionalTerminator(unsigned OpCode) {
- switch (OpCode) {
+ /// Returns true if the Opcode is a terminator related to exception handling.
+ static inline bool isExceptionalTerminator(unsigned Opcode) {
+ switch (Opcode) {
case Instruction::CatchSwitch:
case Instruction::CatchRet:
case Instruction::CleanupRet:
@@ -320,7 +320,7 @@ public:
/// @{
/// Passes are required to drop metadata they don't understand. This is a
/// convenience method for passes to do so.
- /// dropUndefImplyingAttrsAndUnknownMetadata should be used instead of
+ /// dropUBImplyingAttrsAndUnknownMetadata should be used instead of
/// this API if the Instruction being modified is a call.
void dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs);
void dropUnknownNonDebugMetadata() {
@@ -339,13 +339,20 @@ public:
/// If this instruction already has !annotation metadata, append \p Annotation
/// to the existing node.
void addAnnotationMetadata(StringRef Annotation);
-
+ /// Adds an !annotation metadata node with an array of \p Annotations
+ /// as a tuple to this instruction. If this instruction already has
+ /// !annotation metadata, append the tuple to
+ /// the existing node.
+ void addAnnotationMetadata(SmallVector<StringRef> Annotations);
/// Returns the AA metadata for this instruction.
AAMDNodes getAAMetadata() const;
/// Sets the AA metadata on this instruction from the AAMDNodes structure.
void setAAMetadata(const AAMDNodes &N);
+ /// Sets the nosanitize metadata on this instruction.
+ void setNoSanitizeMetadata();
+
/// Retrieve total raw weight values of a branch.
/// Returns true on success with profile total weights filled in.
/// Returns false if no metadata was found.
@@ -401,11 +408,15 @@ public:
}
/// This function drops non-debug unknown metadata (through
- /// dropUnknownNonDebugMetadata). For calls, it also drops parameter and
+ /// dropUnknownNonDebugMetadata). For calls, it also drops parameter and
/// return attributes that can cause undefined behaviour. Both of these should
/// be done by passes which move instructions in IR.
- void
- dropUndefImplyingAttrsAndUnknownMetadata(ArrayRef<unsigned> KnownIDs = {});
+ void dropUBImplyingAttrsAndUnknownMetadata(ArrayRef<unsigned> KnownIDs = {});
+
+ /// Drop any attributes or metadata that can cause immediate undefined
+ /// behavior. Retain other attributes/metadata on a best-effort basis.
+ /// This should be used when speculating instructions.
+ void dropUBImplyingAttrsAndMetadata();
/// Determine whether the exact flag is set.
bool isExact() const LLVM_READONLY;
@@ -513,7 +524,7 @@ public:
/// applications, thus the N-way merging should be in code path.
/// The DebugLoc attached to this instruction will be overwritten by the
/// merged DebugLoc.
- void applyMergedLocation(const DILocation *LocA, const DILocation *LocB);
+ void applyMergedLocation(DILocation *LocA, DILocation *LocB);
/// Updates the debug location given that the instruction has been hoisted
/// from a block to a predecessor of that block.
@@ -636,8 +647,15 @@ public:
/// Return true if this instruction has a volatile memory access.
bool isVolatile() const LLVM_READONLY;
+ /// Return the type this instruction accesses in memory, if any.
+ Type *getAccessType() const LLVM_READONLY;
+
/// Return true if this instruction may throw an exception.
- bool mayThrow() const LLVM_READONLY;
+ ///
+ /// If IncludePhaseOneUnwind is set, this will also include cases where
+ /// phase one unwinding may unwind past this frame due to skipping of
+ /// cleanup landingpads.
+ bool mayThrow(bool IncludePhaseOneUnwind = false) const LLVM_READONLY;
/// Return true if this instruction behaves like a memory fence: it can load
/// or store to memory location without being given a memory location.
@@ -764,6 +782,17 @@ public:
/// Determine if one instruction is the same operation as another.
bool isSameOperationAs(const Instruction *I, unsigned flags = 0) const LLVM_READONLY;
+ /// This function determines if the speficied instruction has the same
+ /// "special" characteristics as the current one. This means that opcode
+ /// specific details are the same. As a common example, if we are comparing
+ /// loads, then hasSameSpecialState would compare the alignments (among
+ /// other things).
+ /// @returns true if the specific instruction has the same opcde specific
+ /// characteristics as the current one. Determine if one instruction has the
+ /// same state as another.
+ bool hasSameSpecialState(const Instruction *I2,
+ bool IgnoreAlignment = false) const LLVM_READONLY;
+
/// Return true if there are any uses of this instruction in blocks other than
/// the specified block. Note that PHI nodes are considered to evaluate their
/// operands in the corresponding predecessor block.
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index fb6faec3ad84..8d60384e1a32 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -969,8 +969,6 @@ public:
Instruction *InsertBefore = nullptr) {
unsigned Values = 1 + unsigned(IdxList.size());
assert(PointeeType && "Must specify element type");
- assert(cast<PointerType>(Ptr->getType()->getScalarType())
- ->isOpaqueOrPointeeTypeMatches(PointeeType));
return new (Values) GetElementPtrInst(PointeeType, Ptr, IdxList, Values,
NameStr, InsertBefore);
}
@@ -981,8 +979,6 @@ public:
BasicBlock *InsertAtEnd) {
unsigned Values = 1 + unsigned(IdxList.size());
assert(PointeeType && "Must specify element type");
- assert(cast<PointerType>(Ptr->getType()->getScalarType())
- ->isOpaqueOrPointeeTypeMatches(PointeeType));
return new (Values) GetElementPtrInst(PointeeType, Ptr, IdxList, Values,
NameStr, InsertAtEnd);
}
@@ -1018,8 +1014,6 @@ public:
void setResultElementType(Type *Ty) { ResultElementType = Ty; }
Type *getResultElementType() const {
- assert(cast<PointerType>(getType()->getScalarType())
- ->isOpaqueOrPointeeTypeMatches(ResultElementType));
return ResultElementType;
}
@@ -1083,26 +1077,19 @@ public:
/// Returns the pointer type returned by the GEP
/// instruction, which may be a vector of pointers.
- static Type *getGEPReturnType(Type *ElTy, Value *Ptr,
- ArrayRef<Value *> IdxList) {
- PointerType *OrigPtrTy = cast<PointerType>(Ptr->getType()->getScalarType());
- unsigned AddrSpace = OrigPtrTy->getAddressSpace();
- Type *ResultElemTy = checkGEPType(getIndexedType(ElTy, IdxList));
- Type *PtrTy = OrigPtrTy->isOpaque()
- ? PointerType::get(OrigPtrTy->getContext(), AddrSpace)
- : PointerType::get(ResultElemTy, AddrSpace);
+ static Type *getGEPReturnType(Value *Ptr, ArrayRef<Value *> IdxList) {
// Vector GEP
- if (auto *PtrVTy = dyn_cast<VectorType>(Ptr->getType())) {
- ElementCount EltCount = PtrVTy->getElementCount();
- return VectorType::get(PtrTy, EltCount);
- }
+ Type *Ty = Ptr->getType();
+ if (Ty->isVectorTy())
+ return Ty;
+
for (Value *Index : IdxList)
if (auto *IndexVTy = dyn_cast<VectorType>(Index->getType())) {
ElementCount EltCount = IndexVTy->getElementCount();
- return VectorType::get(PtrTy, EltCount);
+ return VectorType::get(Ty, EltCount);
}
// Scalar GEP
- return PtrTy;
+ return Ty;
}
unsigned getNumIndices() const { // Note: always non-negative
@@ -1160,13 +1147,11 @@ GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr,
ArrayRef<Value *> IdxList, unsigned Values,
const Twine &NameStr,
Instruction *InsertBefore)
- : Instruction(getGEPReturnType(PointeeType, Ptr, IdxList), GetElementPtr,
+ : Instruction(getGEPReturnType(Ptr, IdxList), GetElementPtr,
OperandTraits<GetElementPtrInst>::op_end(this) - Values,
Values, InsertBefore),
SourceElementType(PointeeType),
ResultElementType(getIndexedType(PointeeType, IdxList)) {
- assert(cast<PointerType>(getType()->getScalarType())
- ->isOpaqueOrPointeeTypeMatches(ResultElementType));
init(Ptr, IdxList, NameStr);
}
@@ -1174,13 +1159,11 @@ GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr,
ArrayRef<Value *> IdxList, unsigned Values,
const Twine &NameStr,
BasicBlock *InsertAtEnd)
- : Instruction(getGEPReturnType(PointeeType, Ptr, IdxList), GetElementPtr,
+ : Instruction(getGEPReturnType(Ptr, IdxList), GetElementPtr,
OperandTraits<GetElementPtrInst>::op_end(this) - Values,
Values, InsertAtEnd),
SourceElementType(PointeeType),
ResultElementType(getIndexedType(PointeeType, IdxList)) {
- assert(cast<PointerType>(getType()->getScalarType())
- ->isOpaqueOrPointeeTypeMatches(ResultElementType));
init(Ptr, IdxList, NameStr);
}
@@ -2002,7 +1985,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value)
// ShuffleVectorInst Class
//===----------------------------------------------------------------------===//
-constexpr int UndefMaskElem = -1;
+constexpr int PoisonMaskElem = -1;
/// This instruction constructs a fixed permutation of two
/// input vectors.
@@ -2010,7 +1993,7 @@ constexpr int UndefMaskElem = -1;
/// For each element of the result vector, the shuffle mask selects an element
/// from one of the input vectors to copy to the result. Non-negative elements
/// in the mask represent an index into the concatenated pair of input vectors.
-/// UndefMaskElem (-1) specifies that the result element is undefined.
+/// PoisonMaskElem (-1) specifies that the result element is poison.
///
/// For scalable vectors, all the elements of the mask must be 0 or -1. This
/// requirement may be relaxed in the future.
@@ -2068,16 +2051,16 @@ public:
DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
/// Return the shuffle mask value of this instruction for the given element
- /// index. Return UndefMaskElem if the element is undef.
+ /// index. Return PoisonMaskElem if the element is undef.
int getMaskValue(unsigned Elt) const { return ShuffleMask[Elt]; }
/// Convert the input shuffle mask operand to a vector of integers. Undefined
- /// elements of the mask are returned as UndefMaskElem.
+ /// elements of the mask are returned as PoisonMaskElem.
static void getShuffleMask(const Constant *Mask,
SmallVectorImpl<int> &Result);
/// Return the mask for this instruction as a vector of integers. Undefined
- /// elements of the mask are returned as UndefMaskElem.
+ /// elements of the mask are returned as PoisonMaskElem.
void getShuffleMask(SmallVectorImpl<int> &Result) const {
Result.assign(ShuffleMask.begin(), ShuffleMask.end());
}
@@ -2430,6 +2413,37 @@ public:
}
}
+ /// Return if this shuffle interleaves its two input vectors together.
+ bool isInterleave(unsigned Factor);
+
+ /// Return true if the mask interleaves one or more input vectors together.
+ ///
+ /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
+ /// E.g. For a Factor of 2 (LaneLen=4):
+ /// <0, 4, 1, 5, 2, 6, 3, 7>
+ /// E.g. For a Factor of 3 (LaneLen=4):
+ /// <4, 0, 9, 5, 1, 10, 6, 2, 11, 7, 3, 12>
+ /// E.g. For a Factor of 4 (LaneLen=2):
+ /// <0, 2, 6, 4, 1, 3, 7, 5>
+ ///
+ /// NumInputElts is the total number of elements in the input vectors.
+ ///
+ /// StartIndexes are the first indexes of each vector being interleaved,
+ /// substituting any indexes that were undef
+ /// E.g. <4, -1, 2, 5, 1, 3> (Factor=3): StartIndexes=<4, 0, 2>
+ ///
+ /// Note that this does not check if the input vectors are consecutive:
+ /// It will return true for masks such as
+ /// <0, 4, 6, 1, 5, 7> (Factor=3, LaneLen=2)
+ static bool isInterleaveMask(ArrayRef<int> Mask, unsigned Factor,
+ unsigned NumInputElts,
+ SmallVectorImpl<unsigned> &StartIndexes);
+ static bool isInterleaveMask(ArrayRef<int> Mask, unsigned Factor,
+ unsigned NumInputElts) {
+ SmallVector<unsigned, 8> StartIndexes;
+ return isInterleaveMask(Mask, Factor, NumInputElts, StartIndexes);
+ }
+
// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const Instruction *I) {
return I->getOpcode() == Instruction::ShuffleVector;
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 5de01070c9d9..62bd833198f0 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -154,7 +154,6 @@ static inline bool isDbgInfoIntrinsic(Intrinsic::ID ID) {
switch (ID) {
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
- case Intrinsic::dbg_addr:
case Intrinsic::dbg_label:
case Intrinsic::dbg_assign:
return true;
@@ -177,57 +176,122 @@ public:
/// @}
};
-/// This is the common base class for debug info intrinsics for variables.
-class DbgVariableIntrinsic : public DbgInfoIntrinsic {
+// Iterator for ValueAsMetadata that internally uses direct pointer iteration
+// over either a ValueAsMetadata* or a ValueAsMetadata**, dereferencing to the
+// ValueAsMetadata .
+class location_op_iterator
+ : public iterator_facade_base<location_op_iterator,
+ std::bidirectional_iterator_tag, Value *> {
+ PointerUnion<ValueAsMetadata *, ValueAsMetadata **> I;
+
public:
- // Iterator for ValueAsMetadata that internally uses direct pointer iteration
- // over either a ValueAsMetadata* or a ValueAsMetadata**, dereferencing to the
- // ValueAsMetadata .
- class location_op_iterator
- : public iterator_facade_base<location_op_iterator,
- std::bidirectional_iterator_tag, Value *> {
- PointerUnion<ValueAsMetadata *, ValueAsMetadata **> I;
-
- public:
- location_op_iterator(ValueAsMetadata *SingleIter) : I(SingleIter) {}
- location_op_iterator(ValueAsMetadata **MultiIter) : I(MultiIter) {}
-
- location_op_iterator(const location_op_iterator &R) : I(R.I) {}
- location_op_iterator &operator=(const location_op_iterator &R) {
- I = R.I;
- return *this;
- }
- bool operator==(const location_op_iterator &RHS) const {
- return I == RHS.I;
- }
- const Value *operator*() const {
- ValueAsMetadata *VAM = I.is<ValueAsMetadata *>()
- ? I.get<ValueAsMetadata *>()
- : *I.get<ValueAsMetadata **>();
- return VAM->getValue();
- };
- Value *operator*() {
- ValueAsMetadata *VAM = I.is<ValueAsMetadata *>()
- ? I.get<ValueAsMetadata *>()
- : *I.get<ValueAsMetadata **>();
- return VAM->getValue();
- }
- location_op_iterator &operator++() {
- if (I.is<ValueAsMetadata *>())
- I = I.get<ValueAsMetadata *>() + 1;
- else
- I = I.get<ValueAsMetadata **>() + 1;
- return *this;
- }
- location_op_iterator &operator--() {
- if (I.is<ValueAsMetadata *>())
- I = I.get<ValueAsMetadata *>() - 1;
- else
- I = I.get<ValueAsMetadata **>() - 1;
- return *this;
- }
+ location_op_iterator(ValueAsMetadata *SingleIter) : I(SingleIter) {}
+ location_op_iterator(ValueAsMetadata **MultiIter) : I(MultiIter) {}
+
+ location_op_iterator(const location_op_iterator &R) : I(R.I) {}
+ location_op_iterator &operator=(const location_op_iterator &R) {
+ I = R.I;
+ return *this;
+ }
+ bool operator==(const location_op_iterator &RHS) const { return I == RHS.I; }
+ const Value *operator*() const {
+ ValueAsMetadata *VAM = isa<ValueAsMetadata *>(I)
+ ? cast<ValueAsMetadata *>(I)
+ : *cast<ValueAsMetadata **>(I);
+ return VAM->getValue();
};
+ Value *operator*() {
+ ValueAsMetadata *VAM = isa<ValueAsMetadata *>(I)
+ ? cast<ValueAsMetadata *>(I)
+ : *cast<ValueAsMetadata **>(I);
+ return VAM->getValue();
+ }
+ location_op_iterator &operator++() {
+ if (isa<ValueAsMetadata *>(I))
+ I = cast<ValueAsMetadata *>(I) + 1;
+ else
+ I = cast<ValueAsMetadata **>(I) + 1;
+ return *this;
+ }
+ location_op_iterator &operator--() {
+ if (isa<ValueAsMetadata *>(I))
+ I = cast<ValueAsMetadata *>(I) - 1;
+ else
+ I = cast<ValueAsMetadata **>(I) - 1;
+ return *this;
+ }
+};
+
+/// Lightweight class that wraps the location operand metadata of a debug
+/// intrinsic. The raw location may be a ValueAsMetadata, an empty MDTuple,
+/// or a DIArgList.
+class RawLocationWrapper {
+ Metadata *RawLocation = nullptr;
+
+public:
+ RawLocationWrapper() = default;
+ explicit RawLocationWrapper(Metadata *RawLocation)
+ : RawLocation(RawLocation) {
+ // Allow ValueAsMetadata, empty MDTuple, DIArgList.
+ assert(RawLocation && "unexpected null RawLocation");
+ assert(isa<ValueAsMetadata>(RawLocation) || isa<DIArgList>(RawLocation) ||
+ (isa<MDNode>(RawLocation) &&
+ !cast<MDNode>(RawLocation)->getNumOperands()));
+ }
+ Metadata *getRawLocation() const { return RawLocation; }
+ /// Get the locations corresponding to the variable referenced by the debug
+ /// info intrinsic. Depending on the intrinsic, this could be the
+ /// variable's value or its address.
+ iterator_range<location_op_iterator> location_ops() const;
+ Value *getVariableLocationOp(unsigned OpIdx) const;
+ unsigned getNumVariableLocationOps() const {
+ if (hasArgList())
+ return cast<DIArgList>(getRawLocation())->getArgs().size();
+ return 1;
+ }
+ bool hasArgList() const { return isa<DIArgList>(getRawLocation()); }
+ bool isKillLocation(const DIExpression *Expression) const {
+ // Check for "kill" sentinel values.
+ // Non-variadic: empty metadata.
+ if (!hasArgList() && isa<MDNode>(getRawLocation()))
+ return true;
+ // Variadic: empty DIArgList with empty expression.
+ if (getNumVariableLocationOps() == 0 && !Expression->isComplex())
+ return true;
+ // Variadic and non-variadic: Interpret expressions using undef or poison
+ // values as kills.
+ return any_of(location_ops(), [](Value *V) { return isa<UndefValue>(V); });
+ }
+
+ friend bool operator==(const RawLocationWrapper &A,
+ const RawLocationWrapper &B) {
+ return A.RawLocation == B.RawLocation;
+ }
+ friend bool operator!=(const RawLocationWrapper &A,
+ const RawLocationWrapper &B) {
+ return !(A == B);
+ }
+ friend bool operator>(const RawLocationWrapper &A,
+ const RawLocationWrapper &B) {
+ return A.RawLocation > B.RawLocation;
+ }
+ friend bool operator>=(const RawLocationWrapper &A,
+ const RawLocationWrapper &B) {
+ return A.RawLocation >= B.RawLocation;
+ }
+ friend bool operator<(const RawLocationWrapper &A,
+ const RawLocationWrapper &B) {
+ return A.RawLocation < B.RawLocation;
+ }
+ friend bool operator<=(const RawLocationWrapper &A,
+ const RawLocationWrapper &B) {
+ return A.RawLocation <= B.RawLocation;
+ }
+};
+/// This is the common base class for debug info intrinsics for variables.
+class DbgVariableIntrinsic : public DbgInfoIntrinsic {
+public:
/// Get the locations corresponding to the variable referenced by the debug
/// info intrinsic. Depending on the intrinsic, this could be the
/// variable's value or its address.
@@ -252,19 +316,16 @@ public:
}
unsigned getNumVariableLocationOps() const {
- if (hasArgList())
- return cast<DIArgList>(getRawLocation())->getArgs().size();
- return 1;
+ return getWrappedLocation().getNumVariableLocationOps();
}
- bool hasArgList() const { return isa<DIArgList>(getRawLocation()); }
+ bool hasArgList() const { return getWrappedLocation().hasArgList(); }
- /// Does this describe the address of a local variable. True for dbg.addr and
- /// dbg.declare, but not dbg.value, which describes its value, or dbg.assign,
- /// which describes a combination of the variable's value and address.
+ /// Does this describe the address of a local variable. True for dbg.declare,
+ /// but not dbg.value, which describes its value, or dbg.assign, which
+ /// describes a combination of the variable's value and address.
bool isAddressOfVariable() const {
- return getIntrinsicID() != Intrinsic::dbg_value &&
- getIntrinsicID() != Intrinsic::dbg_assign;
+ return getIntrinsicID() == Intrinsic::dbg_declare;
}
void setKillLocation() {
@@ -280,9 +341,7 @@ public:
}
bool isKillLocation() const {
- return (getNumVariableLocationOps() == 0 &&
- !getExpression()->isComplex()) ||
- any_of(location_ops(), [](Value *V) { return isa<UndefValue>(V); });
+ return getWrappedLocation().isKillLocation(getExpression());
}
DILocalVariable *getVariable() const {
@@ -297,6 +356,10 @@ public:
return cast<MetadataAsValue>(getArgOperand(0))->getMetadata();
}
+ RawLocationWrapper getWrappedLocation() const {
+ return RawLocationWrapper(getRawLocation());
+ }
+
Metadata *getRawVariable() const {
return cast<MetadataAsValue>(getArgOperand(1))->getMetadata();
}
@@ -321,13 +384,25 @@ public:
return getExpression()->getFragmentInfo();
}
+ /// Get the FragmentInfo for the variable if it exists, otherwise return a
+ /// FragmentInfo that covers the entire variable if the variable size is
+ /// known, otherwise return a zero-sized fragment.
+ DIExpression::FragmentInfo getFragmentOrEntireVariable() const {
+ DIExpression::FragmentInfo VariableSlice(0, 0);
+ // Get the fragment or variable size, or zero.
+ if (auto Sz = getFragmentSizeInBits())
+ VariableSlice.SizeInBits = *Sz;
+ if (auto Frag = getExpression()->getFragmentInfo())
+ VariableSlice.OffsetInBits = Frag->OffsetInBits;
+ return VariableSlice;
+ }
+
/// \name Casting methods
/// @{
static bool classof(const IntrinsicInst *I) {
switch (I->getIntrinsicID()) {
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
- case Intrinsic::dbg_addr:
case Intrinsic::dbg_assign:
return true;
default:
@@ -365,25 +440,6 @@ public:
/// @}
};
-/// This represents the llvm.dbg.addr instruction.
-class DbgAddrIntrinsic : public DbgVariableIntrinsic {
-public:
- Value *getAddress() const {
- assert(getNumVariableLocationOps() == 1 &&
- "dbg.addr must have exactly 1 location operand.");
- return getVariableLocationOp(0);
- }
-
- /// \name Casting methods
- /// @{
- static bool classof(const IntrinsicInst *I) {
- return I->getIntrinsicID() == Intrinsic::dbg_addr;
- }
- static bool classof(const Value *V) {
- return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
- }
-};
-
/// This represents the llvm.dbg.value instruction.
class DbgValueInst : public DbgVariableIntrinsic {
public:
@@ -540,8 +596,17 @@ public:
return getFunctionalOpcodeForVP(getIntrinsicID());
}
+ // Equivalent non-predicated constrained ID
+ std::optional<unsigned> getConstrainedIntrinsicID() const {
+ return getConstrainedIntrinsicIDForVP(getIntrinsicID());
+ }
+
// Equivalent non-predicated opcode
static std::optional<unsigned> getFunctionalOpcodeForVP(Intrinsic::ID ID);
+
+ // Equivalent non-predicated constrained ID
+ static std::optional<unsigned>
+ getConstrainedIntrinsicIDForVP(Intrinsic::ID ID);
};
/// This represents vector predication reduction intrinsics.
@@ -1375,6 +1440,17 @@ public:
}
};
+/// This represents the llvm.instrprof.timestamp intrinsic.
+class InstrProfTimestampInst : public InstrProfInstBase {
+public:
+ static bool classof(const IntrinsicInst *I) {
+ return I->getIntrinsicID() == Intrinsic::instrprof_timestamp;
+ }
+ static bool classof(const Value *V) {
+ return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+ }
+};
+
/// This represents the llvm.instrprof.value.profile intrinsic.
class InstrProfValueProfileInst : public InstrProfInstBase {
public:
@@ -1513,20 +1589,9 @@ public:
}
};
-/// This represents intrinsics that guard a condition
-class CondGuardInst : public IntrinsicInst {
-public:
- static bool classof(const IntrinsicInst *I) {
- return I->getIntrinsicID() == Intrinsic::assume ||
- I->getIntrinsicID() == Intrinsic::experimental_guard;
- }
- static bool classof(const Value *V) {
- return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
- }
-};
/// This represents the llvm.assume intrinsic.
-class AssumeInst : public CondGuardInst {
+class AssumeInst : public IntrinsicInst {
public:
static bool classof(const IntrinsicInst *I) {
return I->getIntrinsicID() == Intrinsic::assume;
diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h
index 9bb7c86d26ca..0dfe9f029f9b 100644
--- a/llvm/include/llvm/IR/Intrinsics.h
+++ b/llvm/include/llvm/IR/Intrinsics.h
@@ -128,8 +128,6 @@ namespace Intrinsic {
TruncArgument,
HalfVecArgument,
SameVecWidthArgument,
- PtrToArgument,
- PtrToElt,
VecOfAnyPtrsToElt,
VecElementArgument,
Subdivide2Argument,
@@ -137,7 +135,7 @@ namespace Intrinsic {
VecOfBitcastsToInt,
AMX,
PPCQuad,
- AnyPtrToElt,
+ AArch64Svcount,
} Kind;
union {
@@ -149,20 +147,17 @@ namespace Intrinsic {
ElementCount Vector_Width;
};
+ // AK_% : Defined in Intrinsics.td
enum ArgKind {
- AK_Any,
- AK_AnyInteger,
- AK_AnyFloat,
- AK_AnyVector,
- AK_AnyPointer,
- AK_MatchType = 7
+#define GET_INTRINSIC_ARGKIND
+#include "llvm/IR/IntrinsicEnums.inc"
+#undef GET_INTRINSIC_ARGKIND
};
unsigned getArgumentNumber() const {
assert(Kind == Argument || Kind == ExtendArgument ||
Kind == TruncArgument || Kind == HalfVecArgument ||
- Kind == SameVecWidthArgument || Kind == PtrToArgument ||
- Kind == PtrToElt || Kind == VecElementArgument ||
+ Kind == SameVecWidthArgument || Kind == VecElementArgument ||
Kind == Subdivide2Argument || Kind == Subdivide4Argument ||
Kind == VecOfBitcastsToInt);
return Argument_Info >> 3;
@@ -170,21 +165,20 @@ namespace Intrinsic {
ArgKind getArgumentKind() const {
assert(Kind == Argument || Kind == ExtendArgument ||
Kind == TruncArgument || Kind == HalfVecArgument ||
- Kind == SameVecWidthArgument || Kind == PtrToArgument ||
+ Kind == SameVecWidthArgument ||
Kind == VecElementArgument || Kind == Subdivide2Argument ||
Kind == Subdivide4Argument || Kind == VecOfBitcastsToInt);
return (ArgKind)(Argument_Info & 7);
}
- // VecOfAnyPtrsToElt and AnyPtrToElt uses both an overloaded argument (for
- // address space) and a reference argument (for matching vector width and
- // element types)
+ // VecOfAnyPtrsToElt uses both an overloaded argument (for address space)
+ // and a reference argument (for matching vector width and element types)
unsigned getOverloadArgNumber() const {
- assert(Kind == VecOfAnyPtrsToElt || Kind == AnyPtrToElt);
+ assert(Kind == VecOfAnyPtrsToElt);
return Argument_Info >> 16;
}
unsigned getRefArgNumber() const {
- assert(Kind == VecOfAnyPtrsToElt || Kind == AnyPtrToElt);
+ assert(Kind == VecOfAnyPtrsToElt);
return Argument_Info & 0xFFFF;
}
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index e0fd727607ce..e51c04fbad2f 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -94,6 +94,11 @@ class Align<AttrIndex idx, int align> : IntrinsicProperty {
int Align = align;
}
+class Dereferenceable<AttrIndex idx, int bytes> : IntrinsicProperty {
+ int ArgNo = idx.Value;
+ int Bytes = bytes;
+}
+
// Returned - The specified argument is always the return value of the
// intrinsic.
class Returned<AttrIndex idx> : IntrinsicProperty {
@@ -168,28 +173,235 @@ def IntrSpeculatable : IntrinsicProperty;
def IntrHasSideEffects : IntrinsicProperty;
//===----------------------------------------------------------------------===//
+// IIT constants and utils
+//===----------------------------------------------------------------------===//
+
+// llvm::Intrinsic::IITDescriptor::ArgKind::AK_%
+def ArgKind {
+ int Any = 0;
+ int AnyInteger = 1;
+ int AnyFloat = 2;
+ int AnyVector = 3;
+ int AnyPointer = 4;
+
+ int MatchType = 7;
+}
+
+// Encode placeholder.
+// [15:8] is the ID used how to resolve ArgCode.
+
+// (ACIdx << 3) | ArgCode
+class EncAnyType<int ArgCode=0> {
+ int ID = 0x100;
+ int ret = !or(ID, ArgCode);
+}
+
+// (Mapping[Num] << 3) | AK.MatchType
+class EncMatchType<int Num=0> {
+ int ID = 0x200;
+ int ret = !or(ID, Num);
+}
+
+// (Mapping[Num] << 3) | ArgCodes[Mapping[Num]]
+class EncSameWidth<int Num=0> {
+ int ID = 0x300;
+ int ret = !or(ID, Num);
+}
+
+// ACIdx
+class EncNextArgA<int dummy=0> {
+ int ID = 0x400;
+ int ret = !or(ID, dummy);
+}
+
+// Mapping[Num]
+class EncNextArgN<int Num=0> {
+ int ID = 0x500;
+ int ret = !or(ID, Num);
+}
+
+class ResolveArgCode<
+ list<int> Mapping,
+ list<int> ArgCodes,
+ int ACIdx,
+ int ax> {
+ int ah = !and(ax, 0xFF00);
+ int al = !and(ax, 0x00FF);
+ int num = Mapping[al];
+ int ret = !cond(
+ !eq(ah, EncAnyType<>.ID) : !or(!shl(ACIdx, 3), al),
+ !eq(ah, EncMatchType<>.ID) : !or(!shl(num, 3), ArgKind.MatchType),
+ !eq(ah, EncSameWidth<>.ID) : !or(!shl(num, 3), ArgCodes[num]),
+ !eq(ah, EncNextArgA<>.ID) : ACIdx,
+ !eq(ah, EncNextArgN<>.ID) : num,
+ true : al);
+}
+
+//===----------------------------------------------------------------------===//
+// IIT_Info
+//===----------------------------------------------------------------------===//
+
+class IIT_Base<int num> {
+ int Number = num;
+ list<ValueType> VTs = ?;
+}
+
+class IIT_VT<ValueType vt, int num> : IIT_Base<num> {
+ let VTs = [vt];
+}
+
+class IIT_Int<int size, int num> : IIT_Base<num> {
+ let VTs = !filter(vti, ValueTypes,
+ !and(vti.isInteger, !eq(vti.Size, size)));
+}
+
+class IIT_Vec<int nelem, int num> : IIT_Base<num> {
+ let VTs = !filter(vti, ValueTypes,
+ !and(vti.isVector, !eq(vti.nElem, nelem)));
+}
+
+defset list<IIT_Base> IIT_all = {
+def IIT_Done : IIT_Base< 0>;
+def IIT_I1 : IIT_Int<1, 1>;
+def IIT_I8 : IIT_Int<8, 2>;
+def IIT_I16 : IIT_Int<16, 3>;
+def IIT_I32 : IIT_Int<32, 4>;
+def IIT_I64 : IIT_Int<64, 5>;
+def IIT_F16 : IIT_VT<f16, 6>;
+def IIT_F32 : IIT_VT<f32, 7>;
+def IIT_F64 : IIT_VT<f64, 8>;
+def IIT_V2 : IIT_Vec<2, 9>;
+def IIT_V4 : IIT_Vec<4, 10>;
+def IIT_V8 : IIT_Vec<8, 11>;
+def IIT_V16 : IIT_Vec<16, 12>;
+def IIT_V32 : IIT_Vec<32, 13>;
+def IIT_PTR : IIT_Base< 14>;
+def IIT_ARG : IIT_Base< 15>;
+
+def IIT_V64 : IIT_Vec<64, 16>;
+def IIT_MMX : IIT_VT<x86mmx, 17>;
+def IIT_TOKEN : IIT_VT<token, 18>;
+def IIT_METADATA : IIT_VT<MetadataVT, 19>;
+def IIT_EMPTYSTRUCT : IIT_VT<OtherVT, 20>;
+def IIT_STRUCT2 : IIT_Base<21>;
+def IIT_STRUCT3 : IIT_Base<22>;
+def IIT_STRUCT4 : IIT_Base<23>;
+def IIT_STRUCT5 : IIT_Base<24>;
+def IIT_EXTEND_ARG : IIT_Base<25>;
+def IIT_TRUNC_ARG : IIT_Base<26>;
+def IIT_ANYPTR : IIT_Base<27>;
+def IIT_V1 : IIT_Vec<1, 28>;
+def IIT_VARARG : IIT_VT<isVoid, 29>;
+def IIT_HALF_VEC_ARG : IIT_Base<30>;
+def IIT_SAME_VEC_WIDTH_ARG : IIT_Base<31>;
+def IIT_VEC_OF_ANYPTRS_TO_ELT : IIT_Base<34>;
+def IIT_I128 : IIT_Int<128, 35>;
+def IIT_V512 : IIT_Vec<512, 36>;
+def IIT_V1024 : IIT_Vec<1024, 37>;
+def IIT_STRUCT6 : IIT_Base<38>;
+def IIT_STRUCT7 : IIT_Base<39>;
+def IIT_STRUCT8 : IIT_Base<40>;
+def IIT_F128 : IIT_VT<f128, 41>;
+def IIT_VEC_ELEMENT : IIT_Base<42>;
+def IIT_SCALABLE_VEC : IIT_Base<43>;
+def IIT_SUBDIVIDE2_ARG : IIT_Base<44>;
+def IIT_SUBDIVIDE4_ARG : IIT_Base<45>;
+def IIT_VEC_OF_BITCASTS_TO_INT : IIT_Base<46>;
+def IIT_V128 : IIT_Vec<128, 47>;
+def IIT_BF16 : IIT_VT<bf16, 48>;
+def IIT_STRUCT9 : IIT_Base<49>;
+def IIT_V256 : IIT_Vec<256, 50>;
+def IIT_AMX : IIT_VT<x86amx, 51>;
+def IIT_PPCF128 : IIT_VT<ppcf128, 52>;
+def IIT_V3 : IIT_Vec<3, 53>;
+def IIT_EXTERNREF : IIT_VT<externref, 54>;
+def IIT_FUNCREF : IIT_VT<funcref, 55>;
+def IIT_I2 : IIT_Int<2, 57>;
+def IIT_I4 : IIT_Int<4, 58>;
+def IIT_AARCH64_SVCOUNT : IIT_VT<aarch64svcount, 59>;
+}
+
+defvar IIT_all_FixedTypes = !filter(iit, IIT_all,
+ !or(!isa<IIT_VT>(iit), !isa<IIT_Int>(iit)));
+
+defvar IIT_all_VectorTypes = !filter(iit, IIT_all,
+ !isa<IIT_Vec>(iit));
+
+defvar IIT_RetNumbers = [
+ [IIT_Done.Number],
+ []<int>,
+ [IIT_STRUCT2.Number],
+ [IIT_STRUCT3.Number],
+ [IIT_STRUCT4.Number],
+ [IIT_STRUCT5.Number],
+ [IIT_STRUCT6.Number],
+ [IIT_STRUCT7.Number],
+ [IIT_STRUCT8.Number],
+ [IIT_STRUCT9.Number],
+];
+
+//===----------------------------------------------------------------------===//
// Types used by intrinsics.
//===----------------------------------------------------------------------===//
class LLVMType<ValueType vt> {
ValueType VT = vt;
- int isAny = false;
+ int isAny = vt.isOverloaded;
+
+ int ArgCode = ?;
+ int Number = ?;
+
+ list<IIT_Base> IITs = !filter(iit, IIT_all_FixedTypes,
+ !not(!empty(!filter(iit_vt, iit.VTs,
+ !eq(iit_vt, !if(vt.isVector, vt.ElementType, vt))))));
+ assert !le(!size(IITs), 1), "Duplicate type";
+
+ list<IIT_Base> IIT_Vecs = !if(vt.isVector,
+ !filter(iit, IIT_all_VectorTypes,
+ !not(!empty(!filter(iit_vt, iit.VTs, !and(
+ !eq(iit_vt.ElementType, vt.ElementType),
+ !eq(iit_vt.nElem, vt.nElem)))))),
+ []);
+ assert !le(!size(IIT_Vecs), 1), "Duplicate type";
+
+ list<int> Sig = !listconcat(
+ !if(vt.isScalable, [IIT_SCALABLE_VEC.Number], []),
+ !foreach(iit, IIT_Vecs, iit.Number),
+ !foreach(iit, IITs, iit.Number));
}
-class LLVMQualPointerType<LLVMType elty, int addrspace>
- : LLVMType<iPTR>{
- LLVMType ElTy = elty;
- int AddrSpace = addrspace;
+class LLVMAnyType<ValueType vt> : LLVMType<vt> {
+ let ArgCode = !cond(
+ !eq(vt, Any) : ArgKind.Any,
+ !eq(vt, iAny) : ArgKind.AnyInteger,
+ !eq(vt, fAny) : ArgKind.AnyFloat,
+ !eq(vt, vAny) : ArgKind.AnyVector,
+ !eq(vt, iPTRAny) : ArgKind.AnyPointer,
+ );
+ let Sig = [
+ IIT_ARG.Number,
+ EncAnyType<ArgCode>.ret,
+ ];
+
+ assert isAny, "LLVMAnyType.VT should have isOverloaded";
}
-class LLVMPointerType<LLVMType elty>
- : LLVMQualPointerType<elty, 0>;
-
-class LLVMAnyPointerType<LLVMType elty>
- : LLVMType<iPTRAny>{
- LLVMType ElTy = elty;
+class LLVMQualPointerType<int addrspace>
+ : LLVMType<iPTR> {
+ assert !and(!le(0, addrspace), !le(addrspace, 255)),
+ "Address space exceeds 255";
+
+ let Sig =
+ !if(addrspace, [
+ IIT_ANYPTR.Number,
+ addrspace,
+ ], [
+ IIT_PTR.Number,
+ ]);
+}
- let isAny = true;
+class LLVMAnyPointerType : LLVMAnyType<iPTRAny> {
+ assert isAny, "iPTRAny should have isOverloaded";
}
// Match the type of another intrinsic parameter. Number is an index into the
@@ -198,53 +410,71 @@ class LLVMAnyPointerType<LLVMType elty>
// Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_anyfloat_ty, LLVMMatchType<0>]>
// has two overloaded types, the 2nd and 3rd arguments. LLVMMatchType<0>
// refers to the first overloaded type, which is the 2nd argument.
-class LLVMMatchType<int num>
+class LLVMMatchType<int num, IIT_Base IIT_Info = IIT_ARG>
: LLVMType<OtherVT>{
- int Number = num;
+ let Number = num;
+ let Sig = [
+ IIT_Info.Number,
+ EncMatchType<num>.ret,
+ ];
+}
+
+class LLVMMatchTypeNextArg<int num, IIT_Base IIT_Info>
+ : LLVMMatchType<num, IIT_Info> {
+ let Sig = [
+ IIT_Info.Number,
+ EncNextArgA<>.ret,
+ EncNextArgN<num>.ret,
+ ];
}
// Match the type of another intrinsic parameter that is expected to be based on
// an integral type (i.e. either iN or <N x iM>), but change the scalar size to
// be twice as wide or half as wide as the other type. This is only useful when
// the intrinsic is overloaded, so the matched type should be declared as iAny.
-class LLVMExtendedType<int num> : LLVMMatchType<num>;
-class LLVMTruncatedType<int num> : LLVMMatchType<num>;
+class LLVMExtendedType<int num> : LLVMMatchType<num, IIT_EXTEND_ARG>;
+class LLVMTruncatedType<int num> : LLVMMatchType<num, IIT_TRUNC_ARG>;
// Match the scalar/vector of another intrinsic parameter but with a different
// element type. Either both are scalars or both are vectors with the same
// number of elements.
class LLVMScalarOrSameVectorWidth<int idx, LLVMType elty>
- : LLVMMatchType<idx> {
- ValueType ElTy = elty.VT;
+ : LLVMMatchType<idx, IIT_SAME_VEC_WIDTH_ARG> {
+ let Sig = !listconcat([
+ IIT_SAME_VEC_WIDTH_ARG.Number,
+ EncSameWidth<idx>.ret,
+ ], elty.Sig);
}
-class LLVMPointerTo<int num> : LLVMMatchType<num>;
-class LLVMPointerToElt<int num> : LLVMMatchType<num>;
-class LLVMAnyPointerToElt<int num> : LLVMMatchType<num>;
-class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>;
-class LLVMVectorElementType<int num> : LLVMMatchType<num>;
+class LLVMVectorOfAnyPointersToElt<int num>
+ : LLVMMatchTypeNextArg<num, IIT_VEC_OF_ANYPTRS_TO_ELT>;
+class LLVMVectorElementType<int num> : LLVMMatchType<num, IIT_VEC_ELEMENT>;
// Match the type of another intrinsic parameter that is expected to be a
// vector type, but change the element count to be half as many.
-class LLVMHalfElementsVectorType<int num> : LLVMMatchType<num>;
+class LLVMHalfElementsVectorType<int num>
+ : LLVMMatchType<num, IIT_HALF_VEC_ARG>;
// Match the type of another intrinsic parameter that is expected to be a
// vector type (i.e. <N x iM>) but with each element subdivided to
// form a vector with more elements that are smaller than the original.
-class LLVMSubdivide2VectorType<int num> : LLVMMatchType<num>;
-class LLVMSubdivide4VectorType<int num> : LLVMMatchType<num>;
+class LLVMSubdivide2VectorType<int num>
+ : LLVMMatchType<num, IIT_SUBDIVIDE2_ARG>;
+class LLVMSubdivide4VectorType<int num>
+ : LLVMMatchType<num, IIT_SUBDIVIDE4_ARG>;
// Match the element count and bit width of another intrinsic parameter, but
// change the element type to an integer.
-class LLVMVectorOfBitcastsToInt<int num> : LLVMMatchType<num>;
+class LLVMVectorOfBitcastsToInt<int num>
+ : LLVMMatchType<num, IIT_VEC_OF_BITCASTS_TO_INT>;
def llvm_void_ty : LLVMType<isVoid>;
-let isAny = true in {
- def llvm_any_ty : LLVMType<Any>;
- def llvm_anyint_ty : LLVMType<iAny>;
- def llvm_anyfloat_ty : LLVMType<fAny>;
- def llvm_anyvector_ty : LLVMType<vAny>;
-}
+
+def llvm_any_ty : LLVMAnyType<Any>;
+def llvm_anyint_ty : LLVMAnyType<iAny>;
+def llvm_anyfloat_ty : LLVMAnyType<fAny>;
+def llvm_anyvector_ty : LLVMAnyType<vAny>;
+
def llvm_i1_ty : LLVMType<i1>;
def llvm_i8_ty : LLVMType<i8>;
def llvm_i16_ty : LLVMType<i16>;
@@ -258,16 +488,15 @@ def llvm_double_ty : LLVMType<f64>;
def llvm_f80_ty : LLVMType<f80>;
def llvm_f128_ty : LLVMType<f128>;
def llvm_ppcf128_ty : LLVMType<ppcf128>;
-def llvm_ptr_ty : LLVMPointerType<llvm_i8_ty>; // i8*
-def llvm_ptrptr_ty : LLVMPointerType<llvm_ptr_ty>; // i8**
-def llvm_anyptr_ty : LLVMAnyPointerType<llvm_i8_ty>; // (space)i8*
-def llvm_empty_ty : LLVMType<OtherVT>; // { }
-def llvm_descriptor_ty : LLVMPointerType<llvm_empty_ty>; // { }*
-def llvm_metadata_ty : LLVMType<MetadataVT>; // !{...}
-def llvm_token_ty : LLVMType<token>; // token
+def llvm_ptr_ty : LLVMQualPointerType<0>; // ptr
+def llvm_anyptr_ty : LLVMAnyPointerType; // ptr addrspace(N)
+def llvm_empty_ty : LLVMType<OtherVT>; // { }
+def llvm_metadata_ty : LLVMType<MetadataVT>; // !{...}
+def llvm_token_ty : LLVMType<token>; // token
def llvm_x86mmx_ty : LLVMType<x86mmx>;
-def llvm_ptrx86mmx_ty : LLVMPointerType<llvm_x86mmx_ty>; // <1 x i64>*
+
+def llvm_aarch64_svcount_ty : LLVMType<aarch64svcount>;
def llvm_x86amx_ty : LLVMType<x86amx>;
@@ -349,6 +578,60 @@ def llvm_externref_ty : LLVMType<externref>;
def llvm_funcref_ty : LLVMType<funcref>;
//===----------------------------------------------------------------------===//
+
+class MakeIdx<list<int> Set> {
+ list<int> IdxsR = !foreach(i, !range(Set),
+ !if(Set[i],
+ !foldl(0, !range(0, i), m, j, !add(m, Set[j])),
+ -1));
+
+ list<int> RIdxsR = !foreach(i, !range(Set),
+ !foldl(-1, !range(Set), m, j,
+ !if(!and(Set[j], !eq(IdxsR[j], i)), j, m)));
+
+ list<int> Idxs = !foreach(a, IdxsR, !if(!ge(a, 0), a, ?));
+ list<int> RIdxs = !foreach(a, RIdxsR, !if(!ge(a, 0), a, ?));
+}
+
+class TypeInfoGen<
+ list<LLVMType> RetTypes,
+ list<LLVMType> ParamTypes> {
+ list<LLVMType> AllTypes = !listconcat(RetTypes, ParamTypes);
+
+ // ArgCodes for NextArg -- isAny or MatchTypeNextArg
+ list<int> ACIdxs = MakeIdx<
+ !foreach(ty, AllTypes,
+ !or(ty.isAny, !isa<LLVMMatchTypeNextArg>(ty)))>.Idxs;
+
+ // ArgCodes (only for isAny or MatchTypeNextArg)
+ list<LLVMType> ACTys = !filter(ty, AllTypes,
+ !or(ty.isAny, !isa<LLVMMatchTypeNextArg>(ty)));
+
+ list<int> ArgCodes = !foreach(ty, ACTys, ty.ArgCode);
+
+ // Mappings MatchTypeIdx to ACTys
+ list<int> MappingRIdxs = MakeIdx<
+ !foreach(ty, ACTys, ty.isAny)>.RIdxs;
+
+ // D63507: Exclude LLVMPointerType<llvm_any_ty>
+ bit isOverloaded = !not(!empty(!filter(ty, AllTypes,
+ !isa<LLVMAnyType>(ty))));
+
+ list<LLVMType> Types = !foreach(ty, AllTypes,
+ !if(!isa<LLVMMatchType>(ty), ACTys[MappingRIdxs[ty.Number]], ty));
+
+ list<list<int>> TypeSig = !listconcat(
+ [IIT_RetNumbers[!size(RetTypes)]],
+ !foreach(i, !range(AllTypes),
+ !foreach(a, AllTypes[i].Sig,
+ ResolveArgCode<
+ MappingRIdxs,
+ ArgCodes,
+ ACIdxs[i],
+ a>.ret)));
+}
+
+//===----------------------------------------------------------------------===//
// Intrinsic Definitions.
//===----------------------------------------------------------------------===//
@@ -381,6 +664,11 @@ class Intrinsic<list<LLVMType> ret_types,
bit DisableDefaultAttributes = disable_default_attributes;
bit isTarget = false;
+
+ TypeInfoGen TypeInfo = TypeInfoGen<RetTypes, ParamTypes>;
+ bit isOverloaded = TypeInfo.isOverloaded;
+ list<LLVMType> Types = TypeInfo.Types;
+ list<list<int>> TypeSig = TypeInfo.TypeSig;
}
// Intrinsic with default attributes (disable_default_attributes = false).
@@ -404,6 +692,7 @@ class MSBuiltin<string name> {
string MSBuiltinName = name;
}
+#ifndef TEST_INTRINSICS_SUPPRESS_DEFS
//===--------------- Variable Argument Handling Intrinsics ----------------===//
//
@@ -416,12 +705,12 @@ def int_vaend : DefaultAttrsIntrinsic<[], [llvm_ptr_ty], [], "llvm.va_end">;
//===------------------- Garbage Collection Intrinsics --------------------===//
//
def int_gcroot : Intrinsic<[],
- [llvm_ptrptr_ty, llvm_ptr_ty]>;
+ [llvm_ptr_ty, llvm_ptr_ty]>;
def int_gcread : Intrinsic<[llvm_ptr_ty],
- [llvm_ptr_ty, llvm_ptrptr_ty],
+ [llvm_ptr_ty, llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
def int_gcwrite : Intrinsic<[],
- [llvm_ptr_ty, llvm_ptr_ty, llvm_ptrptr_ty],
+ [llvm_ptr_ty, llvm_ptr_ty, llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<1>>,
NoCapture<ArgIndex<2>>]>;
@@ -437,19 +726,19 @@ def int_objc_autoreleasePoolPush : Intrinsic<[llvm_ptr_ty], []>;
def int_objc_autoreleaseReturnValue : Intrinsic<[llvm_ptr_ty],
[llvm_ptr_ty]>;
def int_objc_copyWeak : Intrinsic<[],
- [llvm_ptrptr_ty,
- llvm_ptrptr_ty]>;
-def int_objc_destroyWeak : Intrinsic<[], [llvm_ptrptr_ty]>;
+ [llvm_ptr_ty,
+ llvm_ptr_ty]>;
+def int_objc_destroyWeak : Intrinsic<[], [llvm_ptr_ty]>;
def int_objc_initWeak : Intrinsic<[llvm_ptr_ty],
- [llvm_ptrptr_ty,
+ [llvm_ptr_ty,
llvm_ptr_ty]>;
def int_objc_loadWeak : Intrinsic<[llvm_ptr_ty],
- [llvm_ptrptr_ty]>;
+ [llvm_ptr_ty]>;
def int_objc_loadWeakRetained : Intrinsic<[llvm_ptr_ty],
- [llvm_ptrptr_ty]>;
+ [llvm_ptr_ty]>;
def int_objc_moveWeak : Intrinsic<[],
- [llvm_ptrptr_ty,
- llvm_ptrptr_ty]>;
+ [llvm_ptr_ty,
+ llvm_ptr_ty]>;
def int_objc_release : Intrinsic<[], [llvm_ptr_ty]>;
def int_objc_retain : Intrinsic<[llvm_ptr_ty],
[llvm_ptr_ty]>;
@@ -462,10 +751,10 @@ def int_objc_retainAutoreleasedReturnValue : Intrinsic<[llvm_ptr_ty],
def int_objc_retainBlock : Intrinsic<[llvm_ptr_ty],
[llvm_ptr_ty]>;
def int_objc_storeStrong : Intrinsic<[],
- [llvm_ptrptr_ty,
+ [llvm_ptr_ty,
llvm_ptr_ty]>;
def int_objc_storeWeak : Intrinsic<[llvm_ptr_ty],
- [llvm_ptrptr_ty,
+ [llvm_ptr_ty,
llvm_ptr_ty]>;
def int_objc_clang_arc_use : Intrinsic<[],
[llvm_vararg_ty]>;
@@ -487,23 +776,23 @@ def int_objc_sync_enter : Intrinsic<[llvm_i32_ty],
def int_objc_sync_exit : Intrinsic<[llvm_i32_ty],
[llvm_ptr_ty]>;
def int_objc_arc_annotation_topdown_bbstart : Intrinsic<[],
- [llvm_ptrptr_ty,
- llvm_ptrptr_ty]>;
+ [llvm_ptr_ty,
+ llvm_ptr_ty]>;
def int_objc_arc_annotation_topdown_bbend : Intrinsic<[],
- [llvm_ptrptr_ty,
- llvm_ptrptr_ty]>;
+ [llvm_ptr_ty,
+ llvm_ptr_ty]>;
def int_objc_arc_annotation_bottomup_bbstart : Intrinsic<[],
- [llvm_ptrptr_ty,
- llvm_ptrptr_ty]>;
+ [llvm_ptr_ty,
+ llvm_ptr_ty]>;
def int_objc_arc_annotation_bottomup_bbend : Intrinsic<[],
- [llvm_ptrptr_ty,
- llvm_ptrptr_ty]>;
+ [llvm_ptr_ty,
+ llvm_ptr_ty]>;
//===--------------- Swift asynchronous context intrinsics ----------------===//
// Returns the location of the Swift asynchronous context (usually stored just
// before the frame pointer), and triggers the creation of a null context if it
// would otherwise be unneeded.
-def int_swift_async_context_addr : Intrinsic<[llvm_ptrptr_ty], [], []>;
+def int_swift_async_context_addr : Intrinsic<[llvm_ptr_ty], [], []>;
//===--------------------- Code Generator Intrinsics ----------------------===//
//
@@ -592,7 +881,7 @@ def int_experimental_noalias_scope_decl
// Stack Protector Intrinsic - The stackprotector intrinsic writes the stack
// guard to the correct place on the stack frame.
-def int_stackprotector : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>;
+def int_stackprotector : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], []>;
def int_stackguard : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], []>;
// A cover for instrumentation based profiling.
@@ -609,6 +898,10 @@ def int_instrprof_increment_step : Intrinsic<[],
[llvm_ptr_ty, llvm_i64_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i64_ty]>;
+// A timestamp for instrumentation based profiling.
+def int_instrprof_timestamp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty,
+ llvm_i32_ty, llvm_i32_ty]>;
+
// A call to profile runtime for value profiling of target expressions
// through instrumentation based profiling.
def int_instrprof_value_profile : Intrinsic<[],
@@ -620,6 +913,11 @@ def int_call_preallocated_setup : DefaultAttrsIntrinsic<[llvm_token_ty], [llvm_i
def int_call_preallocated_arg : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_i32_ty]>;
def int_call_preallocated_teardown : DefaultAttrsIntrinsic<[], [llvm_token_ty]>;
+// This intrinsic is intentionally undocumented and users shouldn't call it;
+// it's produced then quickly consumed during codegen.
+def int_callbr_landingpad : Intrinsic<[llvm_any_ty], [LLVMMatchType<0>],
+ [IntrNoMerge]>;
+
//===------------------- Standard C Library Intrinsics --------------------===//
//
@@ -723,6 +1021,13 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
def int_llround : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
def int_lrint : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
def int_llrint : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty]>;
+
+ // TODO: int operand should be constrained to same number of elements as the result.
+ def int_ldexp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>,
+ llvm_anyint_ty]>;
+
+ // TODO: Should constrain all element counts to match
+ def int_frexp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty, llvm_anyint_ty], [LLVMMatchType<0>]>;
}
def int_minnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
@@ -757,6 +1062,9 @@ def int_objectsize : DefaultAttrsIntrinsic<[llvm_anyint_ty],
let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
def int_get_rounding : DefaultAttrsIntrinsic<[llvm_i32_ty], []>;
def int_set_rounding : DefaultAttrsIntrinsic<[], [llvm_i32_ty]>;
+ def int_get_fpenv : DefaultAttrsIntrinsic<[llvm_anyint_ty], []>;
+ def int_set_fpenv : DefaultAttrsIntrinsic<[], [llvm_anyint_ty]>;
+ def int_reset_fpenv : DefaultAttrsIntrinsic<[], []>;
}
//===--------------- Floating Point Properties ----------------------------===//
@@ -770,7 +1078,11 @@ def int_is_fpclass
//===--------------- Constrained Floating Point Intrinsics ----------------===//
//
-let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
+/// IntrStrictFP - The intrinsic is allowed to be used in an alternate
+/// floating point environment.
+def IntrStrictFP : IntrinsicProperty;
+
+let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn, IntrStrictFP] in {
def int_experimental_constrained_fadd : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
@@ -851,6 +1163,11 @@ let IntrProperties = [IntrInaccessibleMemOnly, IntrWillReturn] in {
llvm_i32_ty,
llvm_metadata_ty,
llvm_metadata_ty ]>;
+ def int_experimental_constrained_ldexp : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ llvm_anyint_ty,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
def int_experimental_constrained_sin : DefaultAttrsIntrinsic<[ llvm_anyfloat_ty ],
[ LLVMMatchType<0>,
llvm_metadata_ty,
@@ -997,10 +1314,6 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable, IntrWillReturn] in {
[llvm_metadata_ty,
llvm_metadata_ty,
llvm_metadata_ty]>;
- def int_dbg_addr : DefaultAttrsIntrinsic<[],
- [llvm_metadata_ty,
- llvm_metadata_ty,
- llvm_metadata_ty]>;
def int_dbg_assign : DefaultAttrsIntrinsic<[],
[llvm_metadata_ty,
llvm_metadata_ty,
@@ -1053,7 +1366,7 @@ def int_var_annotation : DefaultAttrsIntrinsic<
[IntrInaccessibleMemOnly], "llvm.var.annotation">;
def int_ptr_annotation : DefaultAttrsIntrinsic<
- [LLVMAnyPointerType<llvm_anyint_ty>],
+ [llvm_anyptr_ty],
[LLVMMatchType<0>, llvm_anyptr_ty, LLVMMatchType<1>, llvm_i32_ty, LLVMMatchType<1>],
[IntrInaccessibleMemOnly], "llvm.ptr.annotation">;
@@ -1198,13 +1511,13 @@ def int_lifetime_end : DefaultAttrsIntrinsic<[],
[IntrArgMemOnly, IntrWillReturn,
NoCapture<ArgIndex<1>>,
ImmArg<ArgIndex<0>>]>;
-def int_invariant_start : DefaultAttrsIntrinsic<[llvm_descriptor_ty],
+def int_invariant_start : DefaultAttrsIntrinsic<[llvm_ptr_ty],
[llvm_i64_ty, llvm_anyptr_ty],
[IntrArgMemOnly, IntrWillReturn,
NoCapture<ArgIndex<1>>,
ImmArg<ArgIndex<0>>]>;
def int_invariant_end : DefaultAttrsIntrinsic<[],
- [llvm_descriptor_ty, llvm_i64_ty,
+ [llvm_ptr_ty, llvm_i64_ty,
llvm_anyptr_ty],
[IntrArgMemOnly, IntrWillReturn,
NoCapture<ArgIndex<2>>,
@@ -1374,7 +1687,7 @@ def int_experimental_guard : DefaultAttrsIntrinsic<[], [llvm_i1_ty, llvm_vararg_
// Supports widenable conditions for guards represented as explicit branches.
def int_experimental_widenable_condition : DefaultAttrsIntrinsic<[llvm_i1_ty], [],
- [IntrInaccessibleMemOnly, IntrWillReturn, IntrSpeculatable]>;
+ [IntrInaccessibleMemOnly, IntrWillReturn, IntrSpeculatable, NoUndef<RetIndex>]>;
// NOP: calls/invokes to this intrinsic are removed by codegen
def int_donothing : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrWillReturn]>;
@@ -1430,13 +1743,13 @@ def int_experimental_stepvector : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
// Memory Intrinsics
def int_vp_store : DefaultAttrsIntrinsic<[],
[ llvm_anyvector_ty,
- LLVMAnyPointerType<LLVMMatchType<0>>,
+ llvm_anyptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty],
[ NoCapture<ArgIndex<1>>, IntrNoSync, IntrWriteMem, IntrArgMemOnly, IntrWillReturn ]>;
def int_vp_load : DefaultAttrsIntrinsic<[ llvm_anyvector_ty],
- [ LLVMAnyPointerType<LLVMMatchType<0>>,
+ [ llvm_anyptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty],
[ NoCapture<ArgIndex<0>>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>;
@@ -1457,14 +1770,14 @@ def int_vp_scatter: DefaultAttrsIntrinsic<[],
// Experimental strided memory accesses
def int_experimental_vp_strided_store : DefaultAttrsIntrinsic<[],
[ llvm_anyvector_ty,
- LLVMAnyPointerToElt<0>,
+ llvm_anyptr_ty,
llvm_anyint_ty, // Stride in bytes
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty],
[ NoCapture<ArgIndex<1>>, IntrNoSync, IntrWriteMem, IntrArgMemOnly, IntrWillReturn ]>;
def int_experimental_vp_strided_load : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [ LLVMAnyPointerToElt<0>,
+ [ llvm_anyptr_ty,
llvm_anyint_ty, // Stride in bytes
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_i32_ty],
@@ -1540,9 +1853,9 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
llvm_i32_ty]>;
def int_vp_abs : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
+ llvm_i1_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty,
- llvm_i1_ty]>;
+ llvm_i32_ty]>;
def int_vp_smin : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
LLVMMatchType<0>,
@@ -1821,17 +2134,17 @@ let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
llvm_i32_ty]>;
}
-let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn, ImmArg<ArgIndex<3>>] in {
+let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn, ImmArg<ArgIndex<1>>] in {
def int_vp_ctlz : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
+ llvm_i1_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty,
- llvm_i1_ty]>;
+ llvm_i32_ty]>;
def int_vp_cttz : DefaultAttrsIntrinsic<[ llvm_anyvector_ty ],
[ LLVMMatchType<0>,
+ llvm_i1_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- llvm_i32_ty,
- llvm_i1_ty]>;
+ llvm_i32_ty]>;
}
def int_get_active_lane_mask:
@@ -1839,6 +2152,12 @@ def int_get_active_lane_mask:
[llvm_anyint_ty, LLVMMatchType<1>],
[IntrNoMem, IntrNoSync, IntrWillReturn]>;
+def int_experimental_get_vector_length:
+ DefaultAttrsIntrinsic<[llvm_i32_ty],
+ [llvm_anyint_ty, llvm_i32_ty, llvm_i1_ty],
+ [IntrNoMem, IntrNoSync, IntrWillReturn,
+ ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
+
def int_experimental_vp_splice:
DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
@@ -1852,14 +2171,14 @@ def int_experimental_vp_splice:
//
def int_masked_load:
DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty,
+ [llvm_anyptr_ty, llvm_i32_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>],
[IntrReadMem, IntrArgMemOnly, IntrWillReturn, ImmArg<ArgIndex<1>>,
NoCapture<ArgIndex<0>>]>;
def int_masked_store:
DefaultAttrsIntrinsic<[],
- [llvm_anyvector_ty, LLVMAnyPointerType<LLVMMatchType<0>>,
+ [llvm_anyvector_ty, llvm_anyptr_ty,
llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[IntrWriteMem, IntrArgMemOnly, IntrWillReturn,
ImmArg<ArgIndex<2>>, NoCapture<ArgIndex<1>>]>;
@@ -1878,13 +2197,13 @@ def int_masked_scatter:
def int_masked_expandload:
DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMPointerToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ [llvm_ptr_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<0>],
[IntrReadMem, IntrWillReturn, NoCapture<ArgIndex<0>>]>;
def int_masked_compressstore:
DefaultAttrsIntrinsic<[],
- [llvm_anyvector_ty, LLVMPointerToElt<0>,
+ [llvm_anyvector_ty, llvm_ptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[IntrWriteMem, IntrArgMemOnly, IntrWillReturn,
NoCapture<ArgIndex<1>>]>;
@@ -1898,6 +2217,11 @@ def int_type_checked_load : DefaultAttrsIntrinsic<[llvm_ptr_ty, llvm_i1_ty],
[llvm_ptr_ty, llvm_i32_ty, llvm_metadata_ty],
[IntrNoMem, IntrWillReturn]>;
+// Safely loads a relative function pointer from a virtual table pointer using type metadata.
+def int_type_checked_load_relative : DefaultAttrsIntrinsic<[llvm_ptr_ty, llvm_i1_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_metadata_ty],
+ [IntrNoMem, IntrWillReturn]>;
+
// Test whether a pointer is associated with a type metadata identifier. Used
// for public visibility classes that may later be refined to private
// visibility.
@@ -1925,12 +2249,12 @@ def int_hwasan_check_memaccess_shortgranules :
//===----------------------------------------------------------------------===//
// Custom event logging for x-ray.
// Takes a pointer to a string and the length of the string.
-def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
+def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty],
[IntrWriteMem, NoCapture<ArgIndex<0>>,
ReadOnly<ArgIndex<0>>]>;
// Typed event logging for x-ray.
// Takes a numeric type tag, a pointer to a string and the length of the string.
-def int_xray_typedevent : Intrinsic<[], [llvm_i16_ty, llvm_ptr_ty, llvm_i32_ty],
+def int_xray_typedevent : Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty, llvm_i64_ty],
[IntrWriteMem, NoCapture<ArgIndex<1>>,
ReadOnly<ArgIndex<1>>]>;
//===----------------------------------------------------------------------===//
@@ -1965,7 +2289,7 @@ def int_memset_element_unordered_atomic
//===------------------------ Reduction Intrinsics ------------------------===//
//
-let IntrProperties = [IntrNoMem] in {
+let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
def int_vector_reduce_fadd : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[LLVMVectorElementType<0>,
@@ -1995,6 +2319,10 @@ let IntrProperties = [IntrNoMem] in {
[llvm_anyvector_ty]>;
def int_vector_reduce_fmin : DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[llvm_anyvector_ty]>;
+ def int_vector_reduce_fminimum: DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
+ def int_vector_reduce_fmaximum: DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
+ [llvm_anyvector_ty]>;
}
//===----- Matrix intrinsics ---------------------------------------------===//
@@ -2014,7 +2342,7 @@ def int_matrix_multiply
def int_matrix_column_major_load
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMPointerToElt<0>, llvm_anyint_ty, llvm_i1_ty,
+ [llvm_ptr_ty, llvm_anyint_ty, llvm_i1_ty,
llvm_i32_ty, llvm_i32_ty],
[IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrReadMem,
NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>,
@@ -2022,7 +2350,7 @@ def int_matrix_column_major_load
def int_matrix_column_major_store
: DefaultAttrsIntrinsic<[],
- [llvm_anyvector_ty, LLVMPointerToElt<0>,
+ [llvm_anyvector_ty, llvm_ptr_ty,
llvm_anyint_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrWriteMem,
WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
@@ -2116,6 +2444,17 @@ def int_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[llvm_anyvector_ty, llvm_i64_ty],
[IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<1>>]>;
+
+def int_experimental_vector_interleave2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMHalfElementsVectorType<0>,
+ LLVMHalfElementsVectorType<0>],
+ [IntrNoMem]>;
+
+def int_experimental_vector_deinterleave2 : DefaultAttrsIntrinsic<[LLVMHalfElementsVectorType<0>,
+ LLVMHalfElementsVectorType<0>],
+ [llvm_anyvector_ty],
+ [IntrNoMem]>;
+
//===----------------- Pointer Authentication Intrinsics ------------------===//
//
@@ -2169,6 +2508,14 @@ def int_ptrauth_sign_generic :
DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>;
//===----------------------------------------------------------------------===//
+//===------- Convergence Intrinsics ---------------------------------------===//
+
+def int_experimental_convergence_entry
+ : DefaultAttrsIntrinsic<[llvm_token_ty], [], [IntrNoMem, IntrConvergent]>;
+def int_experimental_convergence_anchor
+ : DefaultAttrsIntrinsic<[llvm_token_ty], [], [IntrNoMem, IntrConvergent]>;
+def int_experimental_convergence_loop
+ : DefaultAttrsIntrinsic<[llvm_token_ty], [], [IntrNoMem, IntrConvergent]>;
//===----------------------------------------------------------------------===//
// Target-specific intrinsics
@@ -2191,3 +2538,5 @@ include "llvm/IR/IntrinsicsSPIRV.td"
include "llvm/IR/IntrinsicsVE.td"
include "llvm/IR/IntrinsicsDirectX.td"
include "llvm/IR/IntrinsicsLoongArch.td"
+
+#endif // TEST_INTRINSICS_SUPPRESS_DEFS
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index b1f85563195f..557063c88132 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -557,7 +557,7 @@ def int_aarch64_neon_vcopy_lane: AdvSIMD_2Vector2Index_Intrinsic;
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_1Vec_Load_Intrinsic
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMAnyPointerType<LLVMMatchType<0>>],
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_1Vec_Store_Lane_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, llvm_i64_ty, llvm_anyptr_ty],
@@ -565,7 +565,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_2Vec_Load_Intrinsic
: DefaultAttrsIntrinsic<[LLVMMatchType<0>, llvm_anyvector_ty],
- [LLVMAnyPointerType<LLVMMatchType<0>>],
+ [llvm_anyptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_2Vec_Load_Lane_Intrinsic
: DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>],
@@ -574,7 +574,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_2Vec_Store_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
- LLVMAnyPointerType<LLVMMatchType<0>>],
+ llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
class AdvSIMD_2Vec_Store_Lane_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
@@ -583,7 +583,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_3Vec_Load_Intrinsic
: DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyvector_ty],
- [LLVMAnyPointerType<LLVMMatchType<0>>],
+ [llvm_anyptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_3Vec_Load_Lane_Intrinsic
: DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
@@ -592,7 +592,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_3Vec_Store_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
- LLVMMatchType<0>, LLVMAnyPointerType<LLVMMatchType<0>>],
+ LLVMMatchType<0>, llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
class AdvSIMD_3Vec_Store_Lane_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty,
@@ -603,7 +603,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_4Vec_Load_Intrinsic
: DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>, llvm_anyvector_ty],
- [LLVMAnyPointerType<LLVMMatchType<0>>],
+ [llvm_anyptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_4Vec_Load_Lane_Intrinsic
: DefaultAttrsIntrinsic<[LLVMMatchType<0>, LLVMMatchType<0>,
@@ -615,7 +615,7 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_4Vec_Store_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
LLVMMatchType<0>, LLVMMatchType<0>,
- LLVMAnyPointerType<LLVMMatchType<0>>],
+ llvm_anyptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
class AdvSIMD_4Vec_Store_Lane_Intrinsic
: DefaultAttrsIntrinsic<[], [llvm_anyvector_ty, LLVMMatchType<0>,
@@ -956,59 +956,53 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class AdvSIMD_1Vec_PredLoad_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMPointerToElt<0>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_2Vec_PredLoad_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
- [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMPointerToElt<0>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_3Vec_PredLoad_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
- [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMPointerToElt<0>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_4Vec_PredLoad_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>],
- [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMPointerToElt<0>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
[IntrReadMem, IntrArgMemOnly]>;
class AdvSIMD_1Vec_PredLoad_WriteFFR_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMPointerToElt<0>],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
[IntrInaccessibleMemOrArgMemOnly]>;
class AdvSIMD_1Vec_PredStore_Intrinsic
: DefaultAttrsIntrinsic<[],
[llvm_anyvector_ty,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMPointerToElt<0>],
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<2>>]>;
class AdvSIMD_2Vec_PredStore_Intrinsic
: DefaultAttrsIntrinsic<[],
[llvm_anyvector_ty, LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>],
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<3>>]>;
class AdvSIMD_3Vec_PredStore_Intrinsic
: DefaultAttrsIntrinsic<[],
[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>],
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<4>>]>;
class AdvSIMD_4Vec_PredStore_Intrinsic
: DefaultAttrsIntrinsic<[],
[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,
LLVMMatchType<0>,
- LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMPointerToElt<0>],
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<5>>]>;
class AdvSIMD_SVE_Index_Intrinsic
@@ -1354,8 +1348,8 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
class SVE2_CONFLICT_DETECT_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMAnyPointerType<llvm_any_ty>,
- LLVMMatchType<1>]>;
+ [llvm_anyptr_ty, LLVMMatchType<1>],
+ [IntrNoMem]>;
class SVE2_3VectorArg_Indexed_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
@@ -1391,6 +1385,16 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
+class AdvSIMD_SVE_2SVBoolArg_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty],
+ [llvm_nxv16i1_ty],
+ [IntrNoMem]>;
+
+class AdvSIMD_SVE_3SVBoolArg_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty],
+ [llvm_nxv16i1_ty, llvm_nxv16i1_ty],
+ [IntrNoMem]>;
+
class AdvSIMD_SVE_Reduce_Intrinsic
: DefaultAttrsIntrinsic<[LLVMVectorElementType<0>],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
@@ -1412,7 +1416,7 @@ class AdvSIMD_GatherLoad_SV_64b_Offsets_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMPointerToElt<0>,
+ llvm_ptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i64_ty>
],
[IntrReadMem, IntrArgMemOnly]>;
@@ -1421,7 +1425,7 @@ class AdvSIMD_GatherLoad_SV_64b_Offsets_WriteFFR_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMPointerToElt<0>,
+ llvm_ptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i64_ty>
],
[IntrInaccessibleMemOrArgMemOnly]>;
@@ -1430,7 +1434,7 @@ class AdvSIMD_GatherLoad_SV_32b_Offsets_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMPointerToElt<0>,
+ llvm_ptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>
],
[IntrReadMem, IntrArgMemOnly]>;
@@ -1439,7 +1443,7 @@ class AdvSIMD_GatherLoad_SV_32b_Offsets_WriteFFR_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMPointerToElt<0>,
+ llvm_ptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>
],
[IntrInaccessibleMemOrArgMemOnly]>;
@@ -1467,7 +1471,7 @@ class AdvSIMD_ScatterStore_SV_64b_Offsets_Intrinsic
[
llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMPointerToElt<0>,
+ llvm_ptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i64_ty>
],
[IntrWriteMem, IntrArgMemOnly]>;
@@ -1477,7 +1481,7 @@ class AdvSIMD_ScatterStore_SV_32b_Offsets_Intrinsic
[
llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMPointerToElt<0>,
+ llvm_ptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>
],
[IntrWriteMem, IntrArgMemOnly]>;
@@ -1650,8 +1654,10 @@ def int_aarch64_sve_uabd_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_mad : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_msb : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_mla : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_mla_u : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_mla_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
def int_aarch64_sve_mls : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_mls_u : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_mls_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
def int_aarch64_sve_saddv : AdvSIMD_SVE_SADDV_Reduce_Intrinsic;
@@ -1836,22 +1842,43 @@ def int_aarch64_sve_sel : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_lasta : AdvSIMD_SVE_Reduce_Intrinsic;
def int_aarch64_sve_lastb : AdvSIMD_SVE_Reduce_Intrinsic;
def int_aarch64_sve_rev : AdvSIMD_1VectorArg_Intrinsic;
+def int_aarch64_sve_rev_b16 : AdvSIMD_SVE_2SVBoolArg_Intrinsic;
+def int_aarch64_sve_rev_b32 : AdvSIMD_SVE_2SVBoolArg_Intrinsic;
+def int_aarch64_sve_rev_b64 : AdvSIMD_SVE_2SVBoolArg_Intrinsic;
def int_aarch64_sve_splice : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_sunpkhi : AdvSIMD_SVE_Unpack_Intrinsic;
def int_aarch64_sve_sunpklo : AdvSIMD_SVE_Unpack_Intrinsic;
def int_aarch64_sve_tbl : AdvSIMD_SVE_TBL_Intrinsic;
def int_aarch64_sve_trn1 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_trn1_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_trn1_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_trn1_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
def int_aarch64_sve_trn2 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_trn2_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_trn2_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_trn2_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
def int_aarch64_sve_trn1q : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_trn2q : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_uunpkhi : AdvSIMD_SVE_Unpack_Intrinsic;
def int_aarch64_sve_uunpklo : AdvSIMD_SVE_Unpack_Intrinsic;
def int_aarch64_sve_uzp1 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_uzp1_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_uzp1_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_uzp1_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
def int_aarch64_sve_uzp2 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_uzp2_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_uzp2_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_uzp2_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
def int_aarch64_sve_uzp1q : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_uzp2q : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_zip1 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_zip1_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_zip1_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_zip1_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
def int_aarch64_sve_zip2 : AdvSIMD_2VectorArg_Intrinsic;
+def int_aarch64_sve_zip2_b16 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_zip2_b32 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
+def int_aarch64_sve_zip2_b64 : AdvSIMD_SVE_3SVBoolArg_Intrinsic;
def int_aarch64_sve_zip1q : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_zip2q : AdvSIMD_2VectorArg_Intrinsic;
@@ -1860,11 +1887,15 @@ def int_aarch64_sve_zip2q : AdvSIMD_2VectorArg_Intrinsic;
//
def int_aarch64_sve_and : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_and_u: AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_bic : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_bic_u: AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_cnot : AdvSIMD_Merged1VectorArg_Intrinsic;
def int_aarch64_sve_eor : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_eor_u: AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_not : AdvSIMD_Merged1VectorArg_Intrinsic;
def int_aarch64_sve_orr : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_orr_u: AdvSIMD_Pred2VectorArg_Intrinsic;
//
// Conversion
@@ -1895,31 +1926,44 @@ def int_aarch64_sve_whilehi : AdvSIMD_SVE_WHILE_Intrinsic;
//
def int_aarch64_sve_fabd : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fabd_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fabs : AdvSIMD_Merged1VectorArg_Intrinsic;
def int_aarch64_sve_fadd : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fadd_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fcadd : AdvSIMD_SVE_CADD_Intrinsic;
def int_aarch64_sve_fcmla : AdvSIMD_SVE_CMLA_Intrinsic;
def int_aarch64_sve_fcmla_lane : AdvSIMD_SVE_CMLA_LANE_Intrinsic;
def int_aarch64_sve_fdiv : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fdiv_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fdivr : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fexpa_x : AdvSIMD_SVE_EXPA_Intrinsic;
def int_aarch64_sve_fmad : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fmax : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmax_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmaxnm : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmaxnm_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmin : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fminnm : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fminnm_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmla : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fmla_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_fmla_u : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fmls : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fmls_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_fmls_u : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fmsb : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fmul : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_fmul_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmulx : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmulx_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fneg : AdvSIMD_Merged1VectorArg_Intrinsic;
-def int_aarch64_sve_fmul_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
def int_aarch64_sve_fnmad : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fnmla : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_fnmla_u : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fnmls : AdvSIMD_Pred3VectorArg_Intrinsic;
+def int_aarch64_sve_fnmls_u : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_fnmsb : AdvSIMD_Pred3VectorArg_Intrinsic;
def int_aarch64_sve_frecpe_x : AdvSIMD_1VectorArg_Intrinsic;
def int_aarch64_sve_frecps_x : AdvSIMD_2VectorArg_Intrinsic;
@@ -1936,6 +1980,7 @@ def int_aarch64_sve_frsqrts_x : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_fscale : AdvSIMD_SVE_SCALE_Intrinsic;
def int_aarch64_sve_fsqrt : AdvSIMD_Merged1VectorArg_Intrinsic;
def int_aarch64_sve_fsub : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fsub_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_ftmad_x : AdvSIMD_2VectorArgIndexed_Intrinsic;
def int_aarch64_sve_ftsmul_x : AdvSIMD_SVE_TSMUL_Intrinsic;
@@ -2055,12 +2100,12 @@ def int_aarch64_sve_ptest_last : AdvSIMD_SVE_PTEST_Intrinsic;
// Reinterpreting data
//
-def int_aarch64_sve_convert_from_svbool : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+def int_aarch64_sve_convert_from_svbool : DefaultAttrsIntrinsic<[llvm_any_ty],
[llvm_nxv16i1_ty],
[IntrNoMem]>;
def int_aarch64_sve_convert_to_svbool : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty],
- [llvm_anyvector_ty],
+ [llvm_any_ty],
[IntrNoMem]>;
//
@@ -2207,6 +2252,7 @@ def int_aarch64_sve_sqrshl : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_sqshl : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_sqshlu : AdvSIMD_SVE_ShiftByImm_Intrinsic;
def int_aarch64_sve_sqsub : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_sqsub_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_sqsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_srhadd : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_sri : AdvSIMD_2VectorArgIndexed_Intrinsic;
@@ -2223,6 +2269,7 @@ def int_aarch64_sve_uqadd : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_uqrshl : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_uqshl : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_uqsub : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_uqsub_u : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_uqsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_urecpe : AdvSIMD_Merged1VectorArg_Intrinsic;
def int_aarch64_sve_urhadd : AdvSIMD_Pred2VectorArg_Intrinsic;
@@ -2546,6 +2593,46 @@ def int_aarch64_sve_bfmmla : SVE_4Vec_BF16;
def int_aarch64_sve_bfdot_lane_v2 : SVE_4Vec_BF16_Indexed;
def int_aarch64_sve_bfmlalb_lane_v2 : SVE_4Vec_BF16_Indexed;
def int_aarch64_sve_bfmlalt_lane_v2 : SVE_4Vec_BF16_Indexed;
+
+//
+// SVE2.1 - Contiguous loads to multiple consecutive vectors
+//
+
+ class SVE2p1_Load_PN_X2_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [llvm_aarch64_svcount_ty, llvm_ptr_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+ class SVE2p1_Load_PN_X4_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [llvm_aarch64_svcount_ty, llvm_ptr_ty],
+ [IntrReadMem, IntrArgMemOnly]>;
+
+def int_aarch64_sve_ld1_pn_x2 : SVE2p1_Load_PN_X2_Intrinsic;
+def int_aarch64_sve_ld1_pn_x4 : SVE2p1_Load_PN_X4_Intrinsic;
+def int_aarch64_sve_ldnt1_pn_x2 : SVE2p1_Load_PN_X2_Intrinsic;
+def int_aarch64_sve_ldnt1_pn_x4 : SVE2p1_Load_PN_X4_Intrinsic;
+
+//
+// SVE2.1 - Contiguous stores to multiple consecutive vectors
+//
+
+ class SVE2p1_Store_PN_X2_Intrinsic
+ : DefaultAttrsIntrinsic<[], [ llvm_anyvector_ty, LLVMMatchType<0>,
+ llvm_aarch64_svcount_ty, llvm_ptr_ty ],
+ [IntrWriteMem, IntrArgMemOnly]>;
+
+ class SVE2p1_Store_PN_X4_Intrinsic
+ : DefaultAttrsIntrinsic<[], [ llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ llvm_aarch64_svcount_ty, llvm_ptr_ty],
+ [IntrWriteMem, IntrArgMemOnly]>;
+
+def int_aarch64_sve_st1_pn_x2 : SVE2p1_Store_PN_X2_Intrinsic;
+def int_aarch64_sve_st1_pn_x4 : SVE2p1_Store_PN_X4_Intrinsic;
+def int_aarch64_sve_stnt1_pn_x2 : SVE2p1_Store_PN_X2_Intrinsic;
+def int_aarch64_sve_stnt1_pn_x4 : SVE2p1_Store_PN_X4_Intrinsic;
}
//
@@ -2698,9 +2785,10 @@ let TargetPrefix = "aarch64" in {
//
def int_aarch64_sve_psel
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
- LLVMMatchType<0>, llvm_i32_ty]>;
+ : DefaultAttrsIntrinsic<[llvm_nxv16i1_ty],
+ [llvm_nxv16i1_ty,
+ llvm_anyvector_ty, llvm_i32_ty],
+ [IntrNoMem]>;
//
// Predicate-pair intrinsics
@@ -2712,6 +2800,52 @@ let TargetPrefix = "aarch64" in {
}
//
+ // Predicate-as-counter intrinsics
+ //
+
+ def int_aarch64_sve_pext
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [llvm_aarch64_svcount_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+ def int_aarch64_sve_pext_x2
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [llvm_aarch64_svcount_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+ def int_aarch64_sve_ptrue_c8
+ : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>;
+ def int_aarch64_sve_ptrue_c16
+ : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>;
+ def int_aarch64_sve_ptrue_c32
+ : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>;
+ def int_aarch64_sve_ptrue_c64
+ : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>;
+
+ def int_aarch64_sve_cntp_c8
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_aarch64_sve_cntp_c16
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_aarch64_sve_cntp_c32
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_aarch64_sve_cntp_c64
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_aarch64_svcount_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+ // While (predicate-as-counter) intrinsics
+ foreach cmp = ["ge", "gt", "hi", "hs", "le", "lo", "ls", "lt"] in {
+ foreach ty = ["c8", "c16", "c32", "c64"] in {
+ def int_aarch64_sve_while # cmp # _ # ty
+ : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty],
+ [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+ }
+ }
+
+ //
// SME2 Intrinsics
//
@@ -2770,6 +2904,19 @@ let TargetPrefix = "aarch64" in {
LLVMMatchType<0>, llvm_i32_ty],
[ImmArg<ArgIndex<6>>]>;
+ class SME2_VG2_Multi_Imm_Intrinsic
+ : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
+ [llvm_anyvector_ty, LLVMMatchType<0>,
+ llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<2>>]>;
+
+ class SME2_VG4_Multi_Imm_Intrinsic
+ : DefaultAttrsIntrinsic<[LLVMSubdivide4VectorType<0>],
+ [llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<4>>]>;
+
class SME2_ZA_Write_VG2_Intrinsic
: DefaultAttrsIntrinsic<[],
[llvm_i32_ty,
@@ -2783,6 +2930,50 @@ let TargetPrefix = "aarch64" in {
LLVMMatchType<0>, LLVMMatchType<0>],
[]>;
+ class SME2_VG2_Multi_Single_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class SME2_VG4_Multi_Single_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class SME2_VG2_Multi_Multi_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class SME2_VG4_Multi_Multi_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class SVE2_VG2_Sel_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [llvm_aarch64_svcount_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>], [IntrNoMem]>;
+
+ class SVE2_VG4_Sel_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [llvm_aarch64_svcount_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>], [IntrNoMem]>;
+
class SME2_CVT_VG2_SINGLE_Intrinsic
: DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>],
[llvm_anyvector_ty, LLVMMatchType<0>],
@@ -2820,6 +3011,88 @@ let TargetPrefix = "aarch64" in {
[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
+ class SME2_ZA_ArrayVector_Read_VG2_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [llvm_i32_ty],
+ []>;
+
+ class SME2_ZA_ArrayVector_Read_VG4_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [llvm_i32_ty],
+ []>;
+
+ class SME2_Matrix_TileVector_Read_VG2_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [llvm_i32_ty, llvm_i32_ty],
+ []>;
+
+ class SME2_Matrix_TileVector_Read_VG4_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [llvm_i32_ty, llvm_i32_ty],
+ []>;
+
+ class SME2_ZA_ArrayVector_Write_VG2_Intrinsic
+ : DefaultAttrsIntrinsic<[],
+ [llvm_i32_ty,
+ llvm_anyvector_ty, LLVMMatchType<0>],
+ []>;
+
+ class SME2_ZA_ArrayVector_Write_VG4_Intrinsic
+ : DefaultAttrsIntrinsic<[],
+ [llvm_i32_ty,
+ llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ []>;
+
+ class SME2_Matrix_TileVector_Write_VG2_Intrinsic
+ : DefaultAttrsIntrinsic<[],
+ [llvm_i32_ty, llvm_i32_ty,
+ llvm_anyvector_ty, LLVMMatchType<0>],
+ [ImmArg<ArgIndex<0>>]>;
+
+ class SME2_Matrix_TileVector_Write_VG4_Intrinsic
+ : DefaultAttrsIntrinsic<[],
+ [llvm_i32_ty, llvm_i32_ty,
+ llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [ImmArg<ArgIndex<0>>]>;
+
+ class SME2_VG2_Multi_Single_Single_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class SME2_VG4_Multi_Single_Single_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class SVE2_VG2_ZipUzp_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+
+ class SVE2_VG4_ZipUzp_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>;
+
+ class SME2_VG2_Unpk_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
+ [LLVMSubdivide2VectorType<0>], [IntrNoMem]>;
+
+ class SME2_VG4_Unpk_Intrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMMatchType<0>, LLVMMatchType<0>],
+ [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
+ [IntrNoMem]>;
+
//
// Multi-vector fused multiply-add/subtract
//
@@ -2840,6 +3113,50 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_fmls_lane_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
//
+ // Outer product and accumulate/subtract intrinsics
+ //
+
+ def int_aarch64_sme_smopa_za32 : SME_OuterProduct_Intrinsic;
+ def int_aarch64_sme_umopa_za32 : SME_OuterProduct_Intrinsic;
+ def int_aarch64_sme_smops_za32 : SME_OuterProduct_Intrinsic;
+ def int_aarch64_sme_umops_za32 : SME_OuterProduct_Intrinsic;
+
+ def int_aarch64_sme_bmopa_za32 : SME_OuterProduct_Intrinsic;
+ def int_aarch64_sme_bmops_za32 : SME_OuterProduct_Intrinsic;
+
+ //
+ // Multi-vector rounding shift left intrinsics
+ //
+
+ def int_aarch64_sve_srshl_single_x2 : SME2_VG2_Multi_Single_Intrinsic;
+ def int_aarch64_sve_urshl_single_x2 : SME2_VG2_Multi_Single_Intrinsic;
+ def int_aarch64_sve_srshl_single_x4 : SME2_VG4_Multi_Single_Intrinsic;
+ def int_aarch64_sve_urshl_single_x4 : SME2_VG4_Multi_Single_Intrinsic;
+
+ def int_aarch64_sve_srshl_x2 : SME2_VG2_Multi_Multi_Intrinsic;
+ def int_aarch64_sve_urshl_x2 : SME2_VG2_Multi_Multi_Intrinsic;
+ def int_aarch64_sve_srshl_x4 : SME2_VG4_Multi_Multi_Intrinsic;
+ def int_aarch64_sve_urshl_x4 : SME2_VG4_Multi_Multi_Intrinsic;
+
+ // Multi-vector saturating rounding shift right intrinsics
+
+ def int_aarch64_sve_sqrshr_x2 : SME2_VG2_Multi_Imm_Intrinsic;
+ def int_aarch64_sve_uqrshr_x2 : SME2_VG2_Multi_Imm_Intrinsic;
+ def int_aarch64_sve_sqrshr_x4 : SME2_VG4_Multi_Imm_Intrinsic;
+ def int_aarch64_sve_uqrshr_x4 : SME2_VG4_Multi_Imm_Intrinsic;
+
+ def int_aarch64_sve_sqrshrn_x2 : SME2_VG2_Multi_Imm_Intrinsic;
+ def int_aarch64_sve_uqrshrn_x2 : SME2_VG2_Multi_Imm_Intrinsic;
+ def int_aarch64_sve_sqrshrn_x4 : SME2_VG4_Multi_Imm_Intrinsic;
+ def int_aarch64_sve_uqrshrn_x4 : SME2_VG4_Multi_Imm_Intrinsic;
+
+ def int_aarch64_sve_sqrshru_x2 : SME2_VG2_Multi_Imm_Intrinsic;
+ def int_aarch64_sve_sqrshru_x4 : SME2_VG4_Multi_Imm_Intrinsic;
+
+ def int_aarch64_sve_sqrshrun_x2 : SME2_VG2_Multi_Imm_Intrinsic;
+ def int_aarch64_sve_sqrshrun_x4 : SME2_VG4_Multi_Imm_Intrinsic;
+
+ //
// Multi-vector multiply-add/subtract long
//
@@ -2859,6 +3176,86 @@ let TargetPrefix = "aarch64" in {
}
//
+ // Multi-vector multiply-add long long
+ //
+
+ foreach ty = ["s", "u"] in {
+ foreach instr = ["mla", "mls"] in {
+ foreach za = ["za32", "za64"] in {
+ def int_aarch64_sme_ # ty # instr # _ # za # _single_vg4x1 : SME2_Matrix_ArrayVector_Single_Single_Intrinsic;
+ def int_aarch64_sme_ # ty # instr # _ # za # _single_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
+ def int_aarch64_sme_ # ty # instr # _ # za # _single_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
+
+ def int_aarch64_sme_ # ty # instr # _ # za # _vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
+ def int_aarch64_sme_ # ty # instr # _ # za # _vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
+
+ def int_aarch64_sme_ # ty # instr # _ # za # _lane_vg4x1 : SME2_Matrix_ArrayVector_Single_Index_Intrinsic;
+ def int_aarch64_sme_ # ty # instr # _ # za # _lane_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
+ def int_aarch64_sme_ # ty # instr # _ # za # _lane_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
+ }
+ }
+ }
+
+ def int_aarch64_sme_sumla_za32_single_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
+ def int_aarch64_sme_sumla_za32_single_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
+
+ def int_aarch64_sme_sumla_za32_lane_vg4x1 : SME2_Matrix_ArrayVector_Single_Index_Intrinsic;
+ def int_aarch64_sme_sumla_za32_lane_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
+ def int_aarch64_sme_sumla_za32_lane_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
+
+ def int_aarch64_sme_usmla_za32_single_vg4x1 : SME2_Matrix_ArrayVector_Single_Single_Intrinsic;
+ def int_aarch64_sme_usmla_za32_single_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
+ def int_aarch64_sme_usmla_za32_single_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
+
+ def int_aarch64_sme_usmla_za32_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
+ def int_aarch64_sme_usmla_za32_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
+
+ def int_aarch64_sme_usmla_za32_lane_vg4x1 : SME2_Matrix_ArrayVector_Single_Index_Intrinsic;
+ def int_aarch64_sme_usmla_za32_lane_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
+ def int_aarch64_sme_usmla_za32_lane_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
+
+ // Multi-vector signed saturating doubling multiply high
+
+ def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic;
+ def int_aarch64_sve_sqdmulh_single_vgx4 : SME2_VG4_Multi_Single_Intrinsic;
+
+ def int_aarch64_sve_sqdmulh_vgx2 : SME2_VG2_Multi_Multi_Intrinsic;
+ def int_aarch64_sve_sqdmulh_vgx4 : SME2_VG4_Multi_Multi_Intrinsic;
+
+ // Multi-vector floating-point round to integral value
+
+ foreach inst = ["a", "m", "n", "p"] in {
+ def int_aarch64_sve_frint # inst # _x2 : SVE2_VG2_ZipUzp_Intrinsic;
+ def int_aarch64_sve_frint # inst # _x4 : SVE2_VG4_ZipUzp_Intrinsic;
+ }
+
+ //
+ // Multi-vector min/max
+ //
+
+ foreach ty = ["f", "s", "u"] in {
+ foreach instr = ["max", "min"] in {
+ def int_aarch64_sve_ # ty # instr # _single_x2 : SME2_VG2_Multi_Single_Intrinsic;
+ def int_aarch64_sve_ # ty # instr # _single_x4 : SME2_VG4_Multi_Single_Intrinsic;
+
+ def int_aarch64_sve_ # ty # instr # _x2 : SME2_VG2_Multi_Multi_Intrinsic;
+ def int_aarch64_sve_ # ty # instr # _x4 : SME2_VG4_Multi_Multi_Intrinsic;
+ }
+ }
+
+ //
+ // Multi-vector floating point min/max number
+ //
+
+ foreach instr = ["fmaxnm", "fminnm"] in {
+ def int_aarch64_sve_ # instr # _single_x2 : SME2_VG2_Multi_Single_Intrinsic;
+ def int_aarch64_sve_ # instr # _single_x4 : SME2_VG4_Multi_Single_Intrinsic;
+
+ def int_aarch64_sve_ # instr # _x2 : SME2_VG2_Multi_Multi_Intrinsic;
+ def int_aarch64_sve_ # instr # _x4 : SME2_VG4_Multi_Multi_Intrinsic;
+ }
+
+ //
// Multi-vector vertical dot-products
//
@@ -2928,4 +3325,117 @@ let TargetPrefix = "aarch64" in {
def int_aarch64_sme_sub_write_za_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
def int_aarch64_sme_add_write_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
def int_aarch64_sme_sub_write_za_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
+
+ // Multi-vector clamps
+ def int_aarch64_sve_sclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic;
+ def int_aarch64_sve_uclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic;
+ def int_aarch64_sve_fclamp_single_x2 : SME2_VG2_Multi_Single_Single_Intrinsic;
+
+ def int_aarch64_sve_sclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic;
+ def int_aarch64_sve_uclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic;
+ def int_aarch64_sve_fclamp_single_x4 : SME2_VG4_Multi_Single_Single_Intrinsic;
+
+ //
+ // Multi-vector add/sub and accumulate into ZA
+ //
+ foreach intr = ["add", "sub"] in {
+ foreach za = ["za32", "za64"] in {
+ def int_aarch64_sme_ # intr # _ # za # _vg1x2 : SME2_ZA_Write_VG2_Intrinsic;
+ def int_aarch64_sme_ # intr # _ # za # _vg1x4 : SME2_ZA_Write_VG4_Intrinsic;
+ }
+ }
+
+ //
+ // Move multi-vectors to/from ZA
+ //
+
+ def int_aarch64_sme_read_hor_vg2 : SME2_Matrix_TileVector_Read_VG2_Intrinsic;
+ def int_aarch64_sme_read_hor_vg4 : SME2_Matrix_TileVector_Read_VG4_Intrinsic;
+
+ def int_aarch64_sme_read_ver_vg2 : SME2_Matrix_TileVector_Read_VG2_Intrinsic;
+ def int_aarch64_sme_read_ver_vg4 : SME2_Matrix_TileVector_Read_VG4_Intrinsic;
+
+ def int_aarch64_sme_read_vg1x2 : SME2_ZA_ArrayVector_Read_VG2_Intrinsic;
+ def int_aarch64_sme_read_vg1x4 : SME2_ZA_ArrayVector_Read_VG4_Intrinsic;
+
+ def int_aarch64_sme_write_hor_vg2 : SME2_Matrix_TileVector_Write_VG2_Intrinsic;
+ def int_aarch64_sme_write_hor_vg4 : SME2_Matrix_TileVector_Write_VG4_Intrinsic;
+
+ def int_aarch64_sme_write_ver_vg2 : SME2_Matrix_TileVector_Write_VG2_Intrinsic;
+ def int_aarch64_sme_write_ver_vg4 : SME2_Matrix_TileVector_Write_VG4_Intrinsic;
+
+ def int_aarch64_sme_write_vg1x2 : SME2_ZA_ArrayVector_Write_VG2_Intrinsic;
+ def int_aarch64_sme_write_vg1x4 : SME2_ZA_ArrayVector_Write_VG4_Intrinsic;
+
+ //
+ // Multi-Single Vector add
+ //
+ def int_aarch64_sve_add_single_x2 : SME2_VG2_Multi_Single_Intrinsic;
+ def int_aarch64_sve_add_single_x4 : SME2_VG4_Multi_Single_Intrinsic;
+
+ // 2-way and 4-way multi-vector signed/unsigned integer dot-product
+ foreach ty = ["s", "u"] in {
+ foreach sz = ["za32", "za64"] in {
+ def int_aarch64_sme_ # ty # dot_single_ # sz # _vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
+ def int_aarch64_sme_ # ty # dot_single_ # sz # _vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
+
+ def int_aarch64_sme_ # ty # dot_ # sz # _vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
+ def int_aarch64_sme_ # ty # dot_ # sz # _vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
+
+ def int_aarch64_sme_ # ty # dot_lane_ # sz # _vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
+ def int_aarch64_sme_ # ty # dot_lane_ # sz # _vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
+ }
+ }
+
+ foreach ty = ["su", "us"] in {
+ def int_aarch64_sme_ # ty # dot_single_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
+ def int_aarch64_sme_ # ty # dot_single_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
+
+ def int_aarch64_sme_ # ty # dot_lane_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
+ def int_aarch64_sme_ # ty # dot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
+ }
+
+ def int_aarch64_sme_usdot_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
+ def int_aarch64_sme_usdot_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
+
+ // Multi-vector half-precision or bfloat floating-point dot-product
+ def int_aarch64_sme_fdot_single_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic;
+ def int_aarch64_sme_fdot_single_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic;
+
+ def int_aarch64_sme_fdot_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic;
+ def int_aarch64_sme_fdot_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic;
+
+ def int_aarch64_sme_fdot_lane_za32_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic;
+ def int_aarch64_sme_fdot_lane_za32_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic;
+
+ // Multi-vector zip and unzips
+ def int_aarch64_sve_zip_x2 : SVE2_VG2_ZipUzp_Intrinsic;
+ def int_aarch64_sve_zipq_x2 : SVE2_VG2_ZipUzp_Intrinsic;
+ def int_aarch64_sve_zip_x4 : SVE2_VG4_ZipUzp_Intrinsic;
+ def int_aarch64_sve_zipq_x4 : SVE2_VG4_ZipUzp_Intrinsic;
+ def int_aarch64_sve_uzp_x2 : SVE2_VG2_ZipUzp_Intrinsic;
+ def int_aarch64_sve_uzpq_x2 : SVE2_VG2_ZipUzp_Intrinsic;
+ def int_aarch64_sve_uzp_x4 : SVE2_VG4_ZipUzp_Intrinsic;
+ def int_aarch64_sve_uzpq_x4 : SVE2_VG4_ZipUzp_Intrinsic;
+
+ // Vector dot-products (2-way)
+ def int_aarch64_sve_sdot_x2 : SVE2_3VectorArg_Long_Intrinsic;
+ def int_aarch64_sve_udot_x2 : SVE2_3VectorArg_Long_Intrinsic;
+ def int_aarch64_sve_fdot_x2 : SVE2_3VectorArg_Long_Intrinsic;
+ def int_aarch64_sve_sdot_lane_x2 : SVE2_3VectorArgIndexed_Long_Intrinsic;
+ def int_aarch64_sve_udot_lane_x2 : SVE2_3VectorArgIndexed_Long_Intrinsic;
+ def int_aarch64_sve_fdot_lane_x2 : SVE2_3VectorArgIndexed_Long_Intrinsic;
+
+ //
+ // Signed/unsigned multi-vector unpacks
+ //
+ def int_aarch64_sve_sunpk_x2 : SME2_VG2_Unpk_Intrinsic;
+ def int_aarch64_sve_uunpk_x2 : SME2_VG2_Unpk_Intrinsic;
+ def int_aarch64_sve_sunpk_x4 : SME2_VG4_Unpk_Intrinsic;
+ def int_aarch64_sve_uunpk_x4 : SME2_VG4_Unpk_Intrinsic;
+
+ // 2-way and 4-way vector selects
+ def int_aarch64_sve_sel_x2 : SVE2_VG2_Sel_Intrinsic;
+ def int_aarch64_sve_sel_x4 : SVE2_VG4_Sel_Intrinsic;
+
}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 365e51c1bd22..36093383fdf9 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -53,7 +53,7 @@ def int_r600_group_barrier : ClangBuiltin<"__builtin_r600_group_barrier">,
// AS 7 is PARAM_I_ADDRESS, used for kernel arguments
def int_r600_implicitarg_ptr :
ClangBuiltin<"__builtin_r600_implicitarg_ptr">,
- DefaultAttrsIntrinsic<[LLVMQualPointerType<llvm_i8_ty, 7>], [],
+ DefaultAttrsIntrinsic<[LLVMQualPointerType<7>], [],
[IntrNoMem, IntrSpeculatable]>;
def int_r600_rat_store_typed :
@@ -141,22 +141,22 @@ defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
<"__builtin_amdgcn_workgroup_id">;
def int_amdgcn_dispatch_ptr :
- DefaultAttrsIntrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
+ DefaultAttrsIntrinsic<[LLVMQualPointerType<4>], [],
[Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_queue_ptr :
ClangBuiltin<"__builtin_amdgcn_queue_ptr">,
- DefaultAttrsIntrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
+ DefaultAttrsIntrinsic<[LLVMQualPointerType<4>], [],
[Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_kernarg_segment_ptr :
ClangBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
- DefaultAttrsIntrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
+ DefaultAttrsIntrinsic<[LLVMQualPointerType<4>], [],
[Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_implicitarg_ptr :
ClangBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
- DefaultAttrsIntrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
+ DefaultAttrsIntrinsic<[LLVMQualPointerType<4>], [],
[Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable]>;
def int_amdgcn_groupstaticsize :
@@ -173,7 +173,7 @@ def int_amdgcn_lds_kernel_id :
def int_amdgcn_implicit_buffer_ptr :
ClangBuiltin<"__builtin_amdgcn_implicit_buffer_ptr">,
- DefaultAttrsIntrinsic<[LLVMQualPointerType<llvm_i8_ty, 4>], [],
+ DefaultAttrsIntrinsic<[LLVMQualPointerType<4>], [],
[Align<RetIndex, 4>, IntrNoMem, IntrSpeculatable]>;
// Set EXEC to the 64-bit value given.
@@ -300,6 +300,23 @@ def int_amdgcn_cos : DefaultAttrsIntrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
+// v_log_{f16|f32}, performs log2. f32 version does not handle
+// denormals. There is no reason to use this for f16 as it does
+// support denormals, and the generic log2 intrinsic should be
+// preferred.
+def int_amdgcn_log : DefaultAttrsIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+>;
+
+// v_exp_{f16|f32} (int_amdgcn_exp was taken by export
+// already). Performs exp2. f32 version does not handle
+// denormals. There is no reason to use this for f16 as it does
+// support denormals, and the generic exp2 intrinsic should be
+// preferred.
+def int_amdgcn_exp2 : DefaultAttrsIntrinsic<
+ [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
+>;
+
def int_amdgcn_log_clamp : DefaultAttrsIntrinsic<
[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]
>;
@@ -400,7 +417,7 @@ def int_amdgcn_class : DefaultAttrsIntrinsic<
[IntrNoMem, IntrSpeculatable]
>;
-def int_amdgcn_fmed3 : ClangBuiltin<"__builtin_amdgcn_fmed3">,
+def int_amdgcn_fmed3 :
DefaultAttrsIntrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]
@@ -444,24 +461,9 @@ def int_amdgcn_fmad_ftz :
[IntrNoMem, IntrSpeculatable]
>;
-// Fields should mirror atomicrmw
-class AMDGPUAtomicIncIntrin : Intrinsic<[llvm_anyint_ty],
- [llvm_anyptr_ty,
- LLVMMatchType<0>,
- llvm_i32_ty, // ordering
- llvm_i32_ty, // scope
- llvm_i1_ty], // isVolatile
- [IntrArgMemOnly, IntrWillReturn, NoCapture<ArgIndex<0>>,
- ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree], "",
- [SDNPMemOperand]
->;
-
-def int_amdgcn_atomic_inc : AMDGPUAtomicIncIntrin;
-def int_amdgcn_atomic_dec : AMDGPUAtomicIncIntrin;
-
class AMDGPULDSIntrin :
Intrinsic<[llvm_any_ty],
- [LLVMQualPointerType<LLVMMatchType<0>, 3>,
+ [LLVMQualPointerType<3>,
LLVMMatchType<0>,
llvm_i32_ty, // ordering
llvm_i32_ty, // scope
@@ -475,7 +477,7 @@ class AMDGPUDSOrderedIntrinsic : Intrinsic<
[llvm_i32_ty],
// M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that
// the bit packing can be optimized at the IR level.
- [LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0)
+ [LLVMQualPointerType<2>, // IntToPtr(M0)
llvm_i32_ty, // value to add or swap
llvm_i32_ty, // ordering
llvm_i32_ty, // scope
@@ -872,10 +874,12 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimIntrinsics = {
defm int_amdgcn_image_store : AMDGPUImageDimIntrinsicsAll<
"STORE", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
- [IntrWriteMem, IntrWillReturn], [SDNPMemOperand]>;
+ [IntrWriteMem, IntrWillReturn], [SDNPMemOperand]>,
+ AMDGPUImageDMaskIntrinsic;
defm int_amdgcn_image_store_mip : AMDGPUImageDimIntrinsicsNoMsaa<
"STORE_MIP", [], [AMDGPUArg<llvm_anyfloat_ty, "vdata">],
- [IntrWriteMem, IntrWillReturn], [SDNPMemOperand], 1>;
+ [IntrWriteMem, IntrWillReturn], [SDNPMemOperand], 1>,
+ AMDGPUImageDMaskIntrinsic;
//////////////////////////////////////////////////////////////////////////
// MSAA intrinsics
@@ -988,8 +992,22 @@ defset list<AMDGPUImageDimIntrinsic> AMDGPUImageDimAtomicIntrinsics = {
// Buffer intrinsics
//////////////////////////////////////////////////////////////////////////
+// Data type for buffer resources (V#). Maybe, in the future, we can create a
+// similar one for textures (T#).
+def AMDGPUBufferRsrcTy : LLVMQualPointerType<8>;
+
let TargetPrefix = "amdgcn" in {
+def int_amdgcn_make_buffer_rsrc : DefaultAttrsIntrinsic <
+ [AMDGPUBufferRsrcTy],
+ [llvm_anyptr_ty, // base
+ llvm_i16_ty, // stride (and swizzle control)
+ llvm_i32_ty, // NumRecords / extent
+ llvm_i32_ty], // flags
+ // Attributes lifted from ptrmask + some extra argument attributes.
+ [IntrNoMem, NoCapture<ArgIndex<0>>, ReadNone<ArgIndex<0>>,
+ IntrSpeculatable, IntrWillReturn]>;
+
defset list<AMDGPURsrcIntrinsic> AMDGPUBufferIntrinsics = {
class AMDGPUBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
@@ -1034,6 +1052,10 @@ def int_amdgcn_buffer_store : AMDGPUBufferStore;
// and swizzling changes depending on whether idxen is set in the instruction.
// These new instrinsics also keep the offset and soffset arguments separate as
// they behave differently in bounds checking and swizzling.
+
+// The versions of these intrinsics that take <4 x i32> arguments are deprecated
+// in favor of their .ptr.buffer variants that take ptr addrspace(8) arguments,
+// which allow for improved reasoning about memory accesses.
class AMDGPURawBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
[data_ty],
[llvm_v4i32_ty, // rsrc(SGPR)
@@ -1048,6 +1070,21 @@ class AMDGPURawBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsi
def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad<llvm_anyfloat_ty>;
def int_amdgcn_raw_buffer_load : AMDGPURawBufferLoad;
+class AMDGPURawPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
+ [data_ty],
+ [AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
+ [IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
+ ImmArg<ArgIndex<3>>], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<0>;
+def int_amdgcn_raw_ptr_buffer_load_format : AMDGPURawPtrBufferLoad<llvm_anyfloat_ty>;
+def int_amdgcn_raw_ptr_buffer_load : AMDGPURawPtrBufferLoad;
+
class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
[data_ty],
[llvm_v4i32_ty, // rsrc(SGPR)
@@ -1063,6 +1100,22 @@ class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntri
def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad;
def int_amdgcn_struct_buffer_load : AMDGPUStructBufferLoad;
+class AMDGPUStructPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
+ [data_ty],
+ [AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
+ [IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
+ ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<0>;
+def int_amdgcn_struct_ptr_buffer_load_format : AMDGPUStructPtrBufferLoad;
+def int_amdgcn_struct_ptr_buffer_load : AMDGPUStructPtrBufferLoad;
+
class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
[],
[data_ty, // vdata(VGPR)
@@ -1078,6 +1131,22 @@ class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrins
def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore<llvm_anyfloat_ty>;
def int_amdgcn_raw_buffer_store : AMDGPURawBufferStore;
+class AMDGPURawPtrBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
+ [],
+ [data_ty, // vdata(VGPR)
+ AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
+ [IntrArgMemOnly, IntrWriteMem, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1>;
+def int_amdgcn_raw_ptr_buffer_store_format : AMDGPURawPtrBufferStore<llvm_anyfloat_ty>;
+def int_amdgcn_raw_ptr_buffer_store : AMDGPURawPtrBufferStore;
+
class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
[],
[data_ty, // vdata(VGPR)
@@ -1094,6 +1163,23 @@ class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntr
def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore;
def int_amdgcn_struct_buffer_store : AMDGPUStructBufferStore;
+class AMDGPUStructPtrBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrinsic <
+ [],
+ [data_ty, // vdata(VGPR)
+ AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
+ [IntrArgMemOnly, IntrWriteMem, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1>;
+def int_amdgcn_struct_ptr_buffer_store_format : AMDGPUStructPtrBufferStore;
+def int_amdgcn_struct_ptr_buffer_store : AMDGPUStructPtrBufferStore;
+
class AMDGPURawBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
!if(NoRtn, [], [data_ty]),
[!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
@@ -1128,8 +1214,46 @@ def int_amdgcn_raw_buffer_atomic_cmpswap : Intrinsic<
[ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<2, 0>;
+class AMDGPURawPtrBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
+ !if(NoRtn, [], [data_ty]),
+ [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
+ AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
+ [IntrArgMemOnly, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<4>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1, 0>;
+
+def int_amdgcn_raw_ptr_buffer_atomic_swap : AMDGPURawPtrBufferAtomic;
+def int_amdgcn_raw_ptr_buffer_atomic_add : AMDGPURawPtrBufferAtomic;
+def int_amdgcn_raw_ptr_buffer_atomic_sub : AMDGPURawPtrBufferAtomic;
+def int_amdgcn_raw_ptr_buffer_atomic_smin : AMDGPURawPtrBufferAtomic;
+def int_amdgcn_raw_ptr_buffer_atomic_umin : AMDGPURawPtrBufferAtomic;
+def int_amdgcn_raw_ptr_buffer_atomic_fmin : AMDGPURawPtrBufferAtomic<llvm_anyfloat_ty>;
+def int_amdgcn_raw_ptr_buffer_atomic_smax : AMDGPURawPtrBufferAtomic;
+def int_amdgcn_raw_ptr_buffer_atomic_umax : AMDGPURawPtrBufferAtomic;
+def int_amdgcn_raw_ptr_buffer_atomic_fmax : AMDGPURawPtrBufferAtomic<llvm_anyfloat_ty>;
+def int_amdgcn_raw_ptr_buffer_atomic_and : AMDGPURawPtrBufferAtomic;
+def int_amdgcn_raw_ptr_buffer_atomic_or : AMDGPURawPtrBufferAtomic;
+def int_amdgcn_raw_ptr_buffer_atomic_xor : AMDGPURawPtrBufferAtomic;
+def int_amdgcn_raw_ptr_buffer_atomic_inc : AMDGPURawPtrBufferAtomic;
+def int_amdgcn_raw_ptr_buffer_atomic_dec : AMDGPURawPtrBufferAtomic;
+def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic<
+ [llvm_anyint_ty],
+ [LLVMMatchType<0>, // src(VGPR)
+ LLVMMatchType<0>, // cmp(VGPR)
+ AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
+ [IntrArgMemOnly, NoCapture<ArgIndex<2>>,
+ ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<2, 0>;
+
// gfx908 intrinsic
def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
+def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic<llvm_anyfloat_ty>;
class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
!if(NoRtn, [], [data_ty]),
@@ -1165,13 +1289,52 @@ def int_amdgcn_struct_buffer_atomic_cmpswap : Intrinsic<
[ImmArg<ArgIndex<6>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<2, 0>;
+class AMDGPUStructPtrBufferAtomic<LLVMType data_ty = llvm_any_ty, bit NoRtn = false> : Intrinsic <
+ !if(NoRtn, [], [data_ty]),
+ [!if(NoRtn, data_ty, LLVMMatchType<0>), // vdata(VGPR)
+ AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
+ [IntrArgMemOnly, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1, 0>;
+def int_amdgcn_struct_ptr_buffer_atomic_swap : AMDGPUStructPtrBufferAtomic;
+def int_amdgcn_struct_ptr_buffer_atomic_add : AMDGPUStructPtrBufferAtomic;
+def int_amdgcn_struct_ptr_buffer_atomic_sub : AMDGPUStructPtrBufferAtomic;
+def int_amdgcn_struct_ptr_buffer_atomic_smin : AMDGPUStructPtrBufferAtomic;
+def int_amdgcn_struct_ptr_buffer_atomic_umin : AMDGPUStructPtrBufferAtomic;
+def int_amdgcn_struct_ptr_buffer_atomic_smax : AMDGPUStructPtrBufferAtomic;
+def int_amdgcn_struct_ptr_buffer_atomic_umax : AMDGPUStructPtrBufferAtomic;
+def int_amdgcn_struct_ptr_buffer_atomic_and : AMDGPUStructPtrBufferAtomic;
+def int_amdgcn_struct_ptr_buffer_atomic_or : AMDGPUStructPtrBufferAtomic;
+def int_amdgcn_struct_ptr_buffer_atomic_xor : AMDGPUStructPtrBufferAtomic;
+def int_amdgcn_struct_ptr_buffer_atomic_inc : AMDGPUStructPtrBufferAtomic;
+def int_amdgcn_struct_ptr_buffer_atomic_dec : AMDGPUStructPtrBufferAtomic;
+def int_amdgcn_struct_ptr_buffer_atomic_cmpswap : Intrinsic<
+ [llvm_anyint_ty],
+ [LLVMMatchType<0>, // src(VGPR)
+ LLVMMatchType<0>, // cmp(VGPR)
+ AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty], // cachepolicy(imm; bit 1 = slc)
+ [IntrArgMemOnly, NoCapture<ArgIndex<2>>,
+ ImmArg<ArgIndex<6>>, IntrWillReturn, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<2, 0>;
+
// gfx908 intrinsic
def int_amdgcn_struct_buffer_atomic_fadd : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
+def int_amdgcn_struct_ptr_buffer_atomic_fadd : AMDGPUStructPtrBufferAtomic<llvm_anyfloat_ty>;
// gfx90a intrinsics
def int_amdgcn_struct_buffer_atomic_fmin : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
def int_amdgcn_struct_buffer_atomic_fmax : AMDGPUStructBufferAtomic<llvm_anyfloat_ty>;
+def int_amdgcn_struct_ptr_buffer_atomic_fmin : AMDGPUStructPtrBufferAtomic<llvm_anyfloat_ty>;
+def int_amdgcn_struct_ptr_buffer_atomic_fmax : AMDGPUStructPtrBufferAtomic<llvm_anyfloat_ty>;
// Obsolescent tbuffer intrinsics.
def int_amdgcn_tbuffer_load : DefaultAttrsIntrinsic <
@@ -1225,6 +1388,20 @@ def int_amdgcn_raw_tbuffer_load : DefaultAttrsIntrinsic <
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
+def int_amdgcn_raw_ptr_tbuffer_load : DefaultAttrsIntrinsic <
+ [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
+ [AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds` checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
+ [IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
+ ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<0>;
+
def int_amdgcn_raw_tbuffer_store : DefaultAttrsIntrinsic <
[],
[llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
@@ -1240,6 +1417,21 @@ def int_amdgcn_raw_tbuffer_store : DefaultAttrsIntrinsic <
ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<1>;
+def int_amdgcn_raw_ptr_tbuffer_store : DefaultAttrsIntrinsic <
+ [],
+ [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
+ AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
+ [IntrArgMemOnly, IntrWriteMem, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1>;
+
def int_amdgcn_struct_tbuffer_load : DefaultAttrsIntrinsic <
[llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
[llvm_v4i32_ty, // rsrc(SGPR)
@@ -1255,6 +1447,37 @@ def int_amdgcn_struct_tbuffer_load : DefaultAttrsIntrinsic <
ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
AMDGPURsrcIntrinsic<0>;
+def int_amdgcn_struct_ptr_tbuffer_load : DefaultAttrsIntrinsic <
+ [llvm_any_ty], // overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
+ [AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
+ [IntrArgMemOnly, IntrReadMem, ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
+ ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<0>;
+
+def int_amdgcn_struct_ptr_tbuffer_store : DefaultAttrsIntrinsic <
+ [],
+ [llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
+ AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+),
+ // swizzled buffer (bit 3 = swz))
+ [IntrArgMemOnly, IntrWriteMem, WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>], "", [SDNPMemOperand]>,
+ AMDGPURsrcIntrinsic<1>;
+
def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic <
[],
[llvm_any_ty, // vdata(VGPR), overloaded for types f32/i32, v2f32/v2i32, v4f32/v4i32
@@ -1319,7 +1542,7 @@ def int_amdgcn_buffer_atomic_fadd : AMDGPUBufferAtomicFP;
class AMDGPURawBufferLoadLDS : Intrinsic <
[],
[llvm_v4i32_ty, // rsrc(SGPR)
- LLVMQualPointerType<llvm_i8_ty, 3>, // LDS base offset
+ LLVMQualPointerType<3>, // LDS base offset
llvm_i32_ty, // Data byte size: 1/2/4
llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
@@ -1332,10 +1555,29 @@ class AMDGPURawBufferLoadLDS : Intrinsic <
ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
def int_amdgcn_raw_buffer_load_lds : AMDGPURawBufferLoadLDS;
+class AMDGPURawPtrBufferLoadLDS : Intrinsic <
+ [],
+ [AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ LLVMQualPointerType<3>, // LDS base offset
+ llvm_i32_ty, // Data byte size: 1/2/4
+ llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+))
+ // swizzled buffer (bit 3 = swz))
+ [IntrWillReturn, IntrArgMemOnly,
+ ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
+ WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>,
+ ImmArg<ArgIndex<6>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
+def int_amdgcn_raw_ptr_buffer_load_lds : AMDGPURawPtrBufferLoadLDS;
+
class AMDGPUStructBufferLoadLDS : Intrinsic <
[],
[llvm_v4i32_ty, // rsrc(SGPR)
- LLVMQualPointerType<llvm_i8_ty, 3>, // LDS base offset
+ LLVMQualPointerType<3>, // LDS base offset
llvm_i32_ty, // Data byte size: 1/2/4
llvm_i32_ty, // vindex(VGPR)
llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
@@ -1349,6 +1591,26 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
def int_amdgcn_struct_buffer_load_lds : AMDGPUStructBufferLoadLDS;
+class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
+ [],
+ [AMDGPUBufferRsrcTy, // rsrc(SGPR)
+ LLVMQualPointerType<3> , // LDS base offset
+ llvm_i32_ty, // Data byte size: 1/2/4
+ llvm_i32_ty, // vindex(VGPR)
+ llvm_i32_ty, // voffset(VGPR, included in bounds checking and swizzling)
+ llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
+ llvm_i32_ty, // imm offset(imm, included in bounds checking and swizzling)
+ llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
+ // bit 1 = slc,
+ // bit 2 = dlc on gfx10+))
+ // swizzled buffer (bit 3 = swz))
+ [IntrWillReturn, IntrArgMemOnly,
+ ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
+ WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>,
+ ImmArg<ArgIndex<7>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>, AMDGPURsrcIntrinsic<0>;
+def int_amdgcn_struct_ptr_buffer_load_lds : AMDGPUStructPtrBufferLoadLDS;
+
} // defset AMDGPUBufferIntrinsics
// Uses that do not set the done bit should set IntrWriteMem on the
@@ -1662,6 +1924,23 @@ def int_amdgcn_ballot :
Intrinsic<[llvm_anyint_ty], [llvm_i1_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+def int_amdgcn_inverse_ballot :
+ Intrinsic<[llvm_i1_ty], [llvm_anyint_ty],
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+
+class AMDGPUWaveReduce<LLVMType data_ty = llvm_anyint_ty> : Intrinsic<
+ [data_ty],
+ [
+ LLVMMatchType<0>, // llvm value to reduce (SGPR/VGPR)
+ llvm_i32_ty // Reduction Strategy Switch for lowering ( 0: Default,
+ // 1: Iterative strategy, and
+ // 2. DPP)
+ ],
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree, ImmArg<ArgIndex<1>>]>;
+
+def int_amdgcn_wave_reduce_umin : AMDGPUWaveReduce;
+def int_amdgcn_wave_reduce_umax : AMDGPUWaveReduce;
+
def int_amdgcn_readfirstlane :
ClangBuiltin<"__builtin_amdgcn_readfirstlane">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
@@ -1846,6 +2125,28 @@ def int_amdgcn_is_private : ClangBuiltin<"__builtin_amdgcn_is_private">,
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>]
>;
+// A uniform tail call to a function with the `amdgpu_cs_chain` or
+// `amdgpu_cs_chain_preserve` calling convention. It will populate the SGPRs
+// starting at s0 and the VGPRs starting at v8, set EXEC and perform a jump to
+// the given function.
+// Can only be used in functions with the `amdgpu_cs`, `amdgpu_cs_chain` or
+// `amdgpu_cs_chain_preserve` calling conventions, and only in uniform control
+// flow.
+def int_amdgcn_cs_chain:
+ Intrinsic<[],
+ [llvm_anyptr_ty, // The function to jump to.
+ llvm_anyint_ty, // Value to put in EXEC (should be i32 or i64).
+ llvm_any_ty, // Arguments that will be copied into SGPRs (s0+).
+ // Must be uniform.
+ llvm_any_ty, // Arguments that will be copied into VGPRs (v8+).
+ // Need not be uniform.
+ llvm_i32_ty, // Flags.
+ llvm_vararg_ty // Additional arguments. Only present if Flags is
+ // non-zero.
+ ],
+ [IntrConvergent, IntrNoReturn, ImmArg<ArgIndex<4>>]>;
+
+
//===----------------------------------------------------------------------===//
// CI+ Intrinsics
//===----------------------------------------------------------------------===//
@@ -1919,8 +2220,8 @@ def int_amdgcn_perm :
class AMDGPUGlobalLoadLDS : Intrinsic <
[],
- [LLVMQualPointerType<llvm_i8_ty, 1>, // Base global pointer to load from
- LLVMQualPointerType<llvm_i8_ty, 3>, // LDS base pointer to store to
+ [LLVMQualPointerType<1>, // Base global pointer to load from
+ LLVMQualPointerType<3>, // LDS base pointer to store to
llvm_i32_ty, // Data byte size: 1/2/4
llvm_i32_ty, // imm offset (applied to both global and LDS address)
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc/sc0,
@@ -1996,12 +2297,14 @@ def int_amdgcn_permlane64 :
def int_amdgcn_ds_add_gs_reg_rtn :
ClangBuiltin<"__builtin_amdgcn_ds_add_gs_reg_rtn">,
Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty],
- [ImmArg<ArgIndex<1>>, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+ [ImmArg<ArgIndex<1>>, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree],
+ "", [SDNPMemOperand]>;
def int_amdgcn_ds_sub_gs_reg_rtn :
ClangBuiltin<"__builtin_amdgcn_ds_sub_gs_reg_rtn">,
Intrinsic<[llvm_anyint_ty], [llvm_i32_ty, llvm_i32_ty],
- [ImmArg<ArgIndex<1>>, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+ [ImmArg<ArgIndex<1>>, IntrHasSideEffects, IntrWillReturn, IntrNoCallback, IntrNoFree],
+ "", [SDNPMemOperand]>;
def int_amdgcn_ds_bvh_stack_rtn :
Intrinsic<
@@ -2333,7 +2636,7 @@ def int_amdgcn_global_atomic_fadd_v2bf16 : AMDGPUGlobalAtomicRtn<llvm_v2i16_ty>;
def int_amdgcn_flat_atomic_fadd_v2bf16 : AMDGPUGlobalAtomicRtn<llvm_v2i16_ty>;
def int_amdgcn_ds_fadd_v2bf16 : DefaultAttrsIntrinsic<
[llvm_v2i16_ty],
- [LLVMQualPointerType<llvm_v2i16_ty, 3>, llvm_v2i16_ty],
+ [LLVMQualPointerType<3>, llvm_v2i16_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>,
ClangBuiltin<"__builtin_amdgcn_ds_atomic_fadd_v2bf16">;
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index f3b1a0c5d282..11b9877091a8 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -702,13 +702,13 @@ def int_arm_neon_vld4 : DefaultAttrsIntrinsic<
def int_arm_neon_vld1x2 : DefaultAttrsIntrinsic<
[llvm_anyvector_ty, LLVMMatchType<0>],
- [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>;
+ [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_arm_neon_vld1x3 : DefaultAttrsIntrinsic<
[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>],
- [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>;
+ [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>;
def int_arm_neon_vld1x4 : DefaultAttrsIntrinsic<
[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
- [LLVMAnyPointerType<LLVMMatchType<0>>], [IntrReadMem, IntrArgMemOnly]>;
+ [llvm_anyptr_ty], [IntrReadMem, IntrArgMemOnly]>;
// Vector load N-element structure to one lane.
// Source operands are: the address, the N input vectors (since only one
diff --git a/llvm/include/llvm/IR/IntrinsicsHexagon.td b/llvm/include/llvm/IR/IntrinsicsHexagon.td
index 847197ce28b9..67b873d16cb5 100644
--- a/llvm/include/llvm/IR/IntrinsicsHexagon.td
+++ b/llvm/include/llvm/IR/IntrinsicsHexagon.td
@@ -125,30 +125,27 @@ Hexagon_mem_memsisisi_Intrinsic<"circ_stb">;
def int_hexagon_prefetch :
Hexagon_Intrinsic<"HEXAGON_prefetch", [], [llvm_ptr_ty], []>;
-def llvm_ptr32_ty : LLVMPointerType<llvm_i32_ty>;
-def llvm_ptr64_ty : LLVMPointerType<llvm_i64_ty>;
-
// Mark locked loads as read/write to prevent any accidental reordering.
// These don't use Hexagon_Intrinsic, because they are not nosync, and as such
// cannot use default attributes.
let TargetPrefix = "hexagon" in {
def int_hexagon_L2_loadw_locked :
ClangBuiltin<"__builtin_HEXAGON_L2_loadw_locked">,
- Intrinsic<[llvm_i32_ty], [llvm_ptr32_ty],
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
def int_hexagon_L4_loadd_locked :
ClangBuiltin<"__builtin__HEXAGON_L4_loadd_locked">,
- Intrinsic<[llvm_i64_ty], [llvm_ptr64_ty],
+ Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
[IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
def int_hexagon_S2_storew_locked :
ClangBuiltin<"__builtin_HEXAGON_S2_storew_locked">,
Intrinsic<[llvm_i32_ty],
- [llvm_ptr32_ty, llvm_i32_ty], [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
+ [llvm_ptr_ty, llvm_i32_ty], [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
def int_hexagon_S4_stored_locked :
ClangBuiltin<"__builtin_HEXAGON_S4_stored_locked">,
Intrinsic<[llvm_i32_ty],
- [llvm_ptr64_ty, llvm_i64_ty], [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
+ [llvm_ptr_ty, llvm_i64_ty], [IntrArgMemOnly, NoCapture<ArgIndex<0>>]>;
}
def int_hexagon_vmemcpy : Hexagon_Intrinsic<"hexagon_vmemcpy",
@@ -266,7 +263,7 @@ Hexagon_v64i32_v64i32v32i32i64_rtt_Intrinsic<"HEXAGON_V6_vrmpyub_rtt_acc_128B">;
class Hexagon_pred_vload_imm<LLVMType ValTy>
: Hexagon_NonGCC_Intrinsic<
[ValTy],
- [llvm_i1_ty, LLVMPointerType<ValTy>, llvm_i32_ty],
+ [llvm_i1_ty, llvm_ptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
@@ -284,8 +281,8 @@ def int_hexagon_V6_vL32b_nt_npred_ai_128B: Hexagon_pred_vload_imm_128B;
class Hexagom_pred_vload_upd<LLVMType ValTy, bit TakesImm>
: Hexagon_NonGCC_Intrinsic<
- [ValTy, LLVMPointerType<ValTy>],
- [llvm_i1_ty, LLVMPointerType<ValTy>, llvm_i32_ty],
+ [ValTy, llvm_ptr_ty],
+ [llvm_i1_ty, llvm_ptr_ty, llvm_i32_ty],
!if(TakesImm,
[IntrReadMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>,
ImmArg<ArgIndex<2>>],
@@ -318,7 +315,7 @@ def int_hexagon_V6_vL32b_nt_npred_ppu_128B: Hexagom_pred_vload_upd_128B<0>;
class Hexagon_pred_vstore_imm<LLVMType ValTy>
: Hexagon_NonGCC_Intrinsic<
[],
- [llvm_i1_ty, LLVMPointerType<ValTy>, llvm_i32_ty, ValTy],
+ [llvm_i1_ty, llvm_ptr_ty, llvm_i32_ty, ValTy],
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
@@ -340,8 +337,8 @@ def int_hexagon_V6_vS32b_nt_npred_ai_128B: Hexagon_pred_vstore_imm_128B;
class Hexagon_pred_vstore_upd<LLVMType ValTy, bit TakesImm>
: Hexagon_NonGCC_Intrinsic<
- [LLVMPointerType<ValTy>],
- [llvm_i1_ty, LLVMPointerType<ValTy>, llvm_i32_ty, ValTy],
+ [llvm_ptr_ty],
+ [llvm_i1_ty, llvm_ptr_ty, llvm_i32_ty, ValTy],
!if(TakesImm,
[IntrWriteMem, IntrArgMemOnly, NoCapture<ArgIndex<1>>,
ImmArg<ArgIndex<2>>],
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index b859958e9004..6fd8e80013ce 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -31,11 +31,8 @@
// * llvm.nvvm.max.ull --> ibid.
// * llvm.nvvm.h2f --> llvm.convert.to.fp16.f32
-def llvm_global_i8ptr_ty : LLVMQualPointerType<llvm_i8_ty, 1>; // (global)i8*
-def llvm_shared_i8ptr_ty : LLVMQualPointerType<llvm_i8_ty, 3>; // (shared)i8*
-def llvm_i64ptr_ty : LLVMPointerType<llvm_i64_ty>; // i64*
-def llvm_any_i64ptr_ty : LLVMAnyPointerType<llvm_i64_ty>; // (space)i64*
-def llvm_shared_i64ptr_ty : LLVMQualPointerType<llvm_i64_ty, 3>; // (shared)i64*
+def llvm_global_ptr_ty : LLVMQualPointerType<1>; // (global)ptr
+def llvm_shared_ptr_ty : LLVMQualPointerType<3>; // (shared)ptr
//
// MISC
@@ -583,7 +580,6 @@ let TargetPrefix = "nvvm" in {
"_xorsign_abs_f16", "_ftz_xorsign_abs_f16", "_nan_xorsign_abs_f16",
"_ftz_nan_xorsign_abs_f16"] in {
def int_nvvm_f # operation # variant :
- ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty, llvm_half_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
}
@@ -592,24 +588,25 @@ let TargetPrefix = "nvvm" in {
"_ftz_nan_f16x2", "_xorsign_abs_f16x2", "_ftz_xorsign_abs_f16x2",
"_nan_xorsign_abs_f16x2", "_ftz_nan_xorsign_abs_f16x2"] in {
def int_nvvm_f # operation # variant :
- ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty, llvm_v2f16_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
}
- foreach variant = ["_bf16", "_nan_bf16", "_xorsign_abs_bf16",
- "_nan_xorsign_abs_bf16"] in {
+ foreach variant = ["_bf16", "_ftz_bf16", "_nan_bf16", "_ftz_nan_bf16",
+ "_xorsign_abs_bf16", "_ftz_xorsign_abs_bf16", "_nan_xorsign_abs_bf16",
+ "_ftz_nan_xorsign_abs_bf16"] in {
def int_nvvm_f # operation # variant :
ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
- DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty],
+ DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty, llvm_bfloat_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
}
- foreach variant = ["_bf16x2", "_nan_bf16x2", "_xorsign_abs_bf16x2",
- "_nan_xorsign_abs_bf16x2"] in {
+ foreach variant = ["_bf16x2", "_ftz_bf16x2", "_nan_bf16x2",
+ "_ftz_nan_bf16x2", "_xorsign_abs_bf16x2", "_ftz_xorsign_abs_bf16x2",
+ "_nan_xorsign_abs_bf16x2", "_ftz_nan_xorsign_abs_bf16x2"] in {
def int_nvvm_f # operation # variant :
ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
- DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
+ DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty, llvm_v2bf16_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
}
}
@@ -776,10 +773,10 @@ let TargetPrefix = "nvvm" in {
foreach unary = ["abs", "neg"] in {
def int_nvvm_ # unary # _bf16 :
ClangBuiltin<!strconcat("__nvvm_", unary, "_bf16")>,
- DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_i16_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_bfloat_ty], [llvm_bfloat_ty], [IntrNoMem]>;
def int_nvvm_ # unary # _bf16x2 :
ClangBuiltin<!strconcat("__nvvm_", unary, "_bf16x2")>,
- DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
+ DefaultAttrsIntrinsic<[llvm_v2bf16_ty], [llvm_v2bf16_ty], [IntrNoMem]>;
}
//
@@ -828,9 +825,9 @@ let TargetPrefix = "nvvm" in {
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_ex2_approx_d : ClangBuiltin<"__nvvm_ex2_approx_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
- def int_nvvm_ex2_approx_f16 : ClangBuiltin<"__nvvm_ex2_approx_f16">,
+ def int_nvvm_ex2_approx_f16 :
DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty], [IntrNoMem]>;
- def int_nvvm_ex2_approx_f16x2 : ClangBuiltin<"__nvvm_ex2_approx_f16x2">,
+ def int_nvvm_ex2_approx_f16x2 :
DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty], [IntrNoMem]>;
def int_nvvm_lg2_approx_ftz_f : ClangBuiltin<"__nvvm_lg2_approx_ftz_f">,
@@ -860,31 +857,31 @@ let TargetPrefix = "nvvm" in {
foreach variant = ["_rn_f16", "_rn_ftz_f16", "_rn_sat_f16",
"_rn_ftz_sat_f16", "_rn_relu_f16", "_rn_ftz_relu_f16"] in {
- def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
- DefaultAttrsIntrinsic<[llvm_half_ty],
- [llvm_half_ty, llvm_half_ty, llvm_half_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ def int_nvvm_fma # variant : DefaultAttrsIntrinsic<[llvm_half_ty],
+ [llvm_half_ty, llvm_half_ty, llvm_half_ty],
+ [IntrNoMem, IntrSpeculatable]>;
}
foreach variant = ["_rn_f16x2", "_rn_ftz_f16x2", "_rn_sat_f16x2",
"_rn_ftz_sat_f16x2", "_rn_relu_f16x2", "_rn_ftz_relu_f16x2"] in {
- def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
- DefaultAttrsIntrinsic<[llvm_v2f16_ty],
- [llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ def int_nvvm_fma # variant : DefaultAttrsIntrinsic<[llvm_v2f16_ty],
+ [llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty],
+ [IntrNoMem, IntrSpeculatable]>;
}
- foreach variant = ["_rn_bf16", "_rn_relu_bf16"] in {
+ foreach variant = ["_rn_bf16", "_rn_ftz_bf16", "_rn_sat_bf16",
+ "_rn_ftz_sat_bf16", "_rn_relu_bf16", "_rn_ftz_relu_bf16"] in {
def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
- DefaultAttrsIntrinsic<[llvm_i16_ty],
- [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty],
+ DefaultAttrsIntrinsic<[llvm_bfloat_ty],
+ [llvm_bfloat_ty, llvm_bfloat_ty, llvm_bfloat_ty],
[IntrNoMem, IntrSpeculatable]>;
}
- foreach variant = ["_rn_bf16x2", "_rn_relu_bf16x2"] in {
+ foreach variant = ["_rn_bf16x2", "_rn_ftz_bf16x2", "_rn_sat_bf16x2",
+ "_rn_ftz_sat_bf16x2", "_rn_relu_bf16x2", "_rn_ftz_relu_bf16x2"] in {
def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
- DefaultAttrsIntrinsic<[llvm_i32_ty],
- [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+ DefaultAttrsIntrinsic<[llvm_v2bf16_ty],
+ [llvm_v2bf16_ty, llvm_v2bf16_ty, llvm_v2bf16_ty],
[IntrNoMem, IntrSpeculatable]>;
}
@@ -1236,14 +1233,19 @@ let TargetPrefix = "nvvm" in {
def int_nvvm_f2h_rn : ClangBuiltin<"__nvvm_f2h_rn">,
DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrSpeculatable]>;
+ def int_nvvm_bf2h_rn_ftz : ClangBuiltin<"__nvvm_bf2h_rn_ftz">,
+ DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty], [IntrNoMem, IntrSpeculatable]>;
+ def int_nvvm_bf2h_rn : ClangBuiltin<"__nvvm_bf2h_rn">,
+ DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_bfloat_ty], [IntrNoMem, IntrSpeculatable]>;
+
def int_nvvm_ff2bf16x2_rn : ClangBuiltin<"__nvvm_ff2bf16x2_rn">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
def int_nvvm_ff2bf16x2_rn_relu : ClangBuiltin<"__nvvm_ff2bf16x2_rn_relu">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
def int_nvvm_ff2bf16x2_rz : ClangBuiltin<"__nvvm_ff2bf16x2_rz">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
def int_nvvm_ff2bf16x2_rz_relu : ClangBuiltin<"__nvvm_ff2bf16x2_rz_relu">,
- Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v2bf16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_nvvm_ff2f16x2_rn : ClangBuiltin<"__nvvm_ff2f16x2_rn">,
Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
@@ -1255,13 +1257,13 @@ let TargetPrefix = "nvvm" in {
Intrinsic<[llvm_v2f16_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
def int_nvvm_f2bf16_rn : ClangBuiltin<"__nvvm_f2bf16_rn">,
- Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
def int_nvvm_f2bf16_rn_relu : ClangBuiltin<"__nvvm_f2bf16_rn_relu">,
- Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
def int_nvvm_f2bf16_rz : ClangBuiltin<"__nvvm_f2bf16_rz">,
- Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
def int_nvvm_f2bf16_rz_relu : ClangBuiltin<"__nvvm_f2bf16_rz_relu">,
- Intrinsic<[llvm_i16_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
+ Intrinsic<[llvm_bfloat_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
def int_nvvm_f2tf32_rna : ClangBuiltin<"__nvvm_f2tf32_rna">,
Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem, IntrNoCallback]>;
@@ -1288,19 +1290,19 @@ let TargetPrefix = "nvvm" in {
// Atomics not available as llvm intrinsics.
def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
- [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty],
[IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
def int_nvvm_atomic_load_dec_32 : Intrinsic<[llvm_i32_ty],
- [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty],
[IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
class SCOPED_ATOMIC2_impl<LLVMType elty>
: Intrinsic<[elty],
- [LLVMAnyPointerType<LLVMMatchType<0>>, LLVMMatchType<0>],
+ [llvm_anyptr_ty, LLVMMatchType<0>],
[IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
class SCOPED_ATOMIC3_impl<LLVMType elty>
: Intrinsic<[elty],
- [LLVMAnyPointerType<LLVMMatchType<0>>, LLVMMatchType<0>,
+ [llvm_anyptr_ty, LLVMMatchType<0>,
LLVMMatchType<0>],
[IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>]>;
@@ -1362,6 +1364,14 @@ let TargetPrefix = "nvvm" in {
Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrConvergent, IntrNoCallback]>,
ClangBuiltin<"__nvvm_barrier_sync_cnt">;
+ // barrier.cluster.[wait, arrive, arrive.relaxed]
+ def int_nvvm_barrier_cluster_arrive :
+ Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
+ def int_nvvm_barrier_cluster_arrive_relaxed :
+ Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
+ def int_nvvm_barrier_cluster_wait :
+ Intrinsic<[], [], [IntrConvergent, IntrNoCallback]>;
+
// Membar
def int_nvvm_membar_cta : ClangBuiltin<"__nvvm_membar_cta">,
Intrinsic<[], [], [IntrNoCallback]>;
@@ -1369,45 +1379,38 @@ let TargetPrefix = "nvvm" in {
Intrinsic<[], [], [IntrNoCallback]>;
def int_nvvm_membar_sys : ClangBuiltin<"__nvvm_membar_sys">,
Intrinsic<[], [], [IntrNoCallback]>;
+ def int_nvvm_fence_sc_cluster:
+ Intrinsic<[], [], [IntrNoCallback]>;
// Async Copy
def int_nvvm_cp_async_mbarrier_arrive :
ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive">,
- Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
+ Intrinsic<[],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_cp_async_mbarrier_arrive_shared :
ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_shared">,
- Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
+ Intrinsic<[],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_cp_async_mbarrier_arrive_noinc :
ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc">,
- Intrinsic<[],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
+ Intrinsic<[],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_cp_async_mbarrier_arrive_noinc_shared :
ClangBuiltin<"__nvvm_cp_async_mbarrier_arrive_noinc_shared">,
- Intrinsic<[],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
-
-def int_nvvm_cp_async_ca_shared_global_4 :
- ClangBuiltin<"__nvvm_cp_async_ca_shared_global_4">,
- Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty],
- [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
- WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
- "llvm.nvvm.cp.async.ca.shared.global.4">;
-def int_nvvm_cp_async_ca_shared_global_8 :
- ClangBuiltin<"__nvvm_cp_async_ca_shared_global_8">,
- Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty],
- [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
- WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
- "llvm.nvvm.cp.async.ca.shared.global.8">;
-def int_nvvm_cp_async_ca_shared_global_16 :
- ClangBuiltin<"__nvvm_cp_async_ca_shared_global_16">,
- Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty],
- [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
- WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
- "llvm.nvvm.cp.async.ca.shared.global.16">;
-def int_nvvm_cp_async_cg_shared_global_16 :
- ClangBuiltin<"__nvvm_cp_async_cg_shared_global_16">,
- Intrinsic<[],[llvm_shared_i8ptr_ty, llvm_global_i8ptr_ty],
- [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
- WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
- "llvm.nvvm.cp.async.cg.shared.global.16">;
+ Intrinsic<[],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
+
+multiclass CP_ASYNC_SHARED_GLOBAL<string n, string cc> {
+ def NAME: Intrinsic<[],[llvm_shared_ptr_ty, llvm_global_ptr_ty],
+ [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
+ WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
+ "llvm.nvvm.cp.async." # cc # ".shared.global." # n>;
+ def _s: Intrinsic<[],[llvm_shared_ptr_ty, llvm_global_ptr_ty, llvm_i32_ty],
+ [IntrArgMemOnly, IntrNoCallback, NoAlias<ArgIndex<0>>, NoAlias<ArgIndex<1>>,
+ WriteOnly<ArgIndex<0>>, ReadOnly<ArgIndex<1>>],
+ "llvm.nvvm.cp.async." # cc # ".shared.global." # n # ".s">;
+}
+
+defm int_nvvm_cp_async_ca_shared_global_4 : CP_ASYNC_SHARED_GLOBAL<"4", "ca">;
+defm int_nvvm_cp_async_ca_shared_global_8 : CP_ASYNC_SHARED_GLOBAL<"8", "ca">;
+defm int_nvvm_cp_async_ca_shared_global_16 : CP_ASYNC_SHARED_GLOBAL<"16", "ca">;
+defm int_nvvm_cp_async_cg_shared_global_16 : CP_ASYNC_SHARED_GLOBAL<"16", "cg">;
def int_nvvm_cp_async_commit_group :
ClangBuiltin<"__nvvm_cp_async_commit_group">,
@@ -1423,54 +1426,54 @@ def int_nvvm_cp_async_wait_all :
// mbarrier
def int_nvvm_mbarrier_init : ClangBuiltin<"__nvvm_mbarrier_init">,
- Intrinsic<[],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
+ Intrinsic<[],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_init_shared :
ClangBuiltin<"__nvvm_mbarrier_init_shared">,
- Intrinsic<[],[llvm_shared_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
+ Intrinsic<[],[llvm_shared_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_inval : ClangBuiltin<"__nvvm_mbarrier_inval">,
- Intrinsic<[],[llvm_i64ptr_ty],
+ Intrinsic<[],[llvm_ptr_ty],
[IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
def int_nvvm_mbarrier_inval_shared :
ClangBuiltin<"__nvvm_mbarrier_inval_shared">,
- Intrinsic<[],[llvm_shared_i64ptr_ty],
+ Intrinsic<[],[llvm_shared_ptr_ty],
[IntrConvergent, IntrWriteMem, IntrArgMemOnly, IntrNoCallback,
WriteOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>]>;
def int_nvvm_mbarrier_arrive : ClangBuiltin<"__nvvm_mbarrier_arrive">,
- Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
+ Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_shared :
ClangBuiltin<"__nvvm_mbarrier_arrive_shared">,
- Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
+ Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_noComplete :
ClangBuiltin<"__nvvm_mbarrier_arrive_noComplete">,
- Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
+ Intrinsic<[llvm_i64_ty],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_noComplete_shared :
ClangBuiltin<"__nvvm_mbarrier_arrive_noComplete_shared">,
- Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty,
+ Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty,
llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_drop :
ClangBuiltin<"__nvvm_mbarrier_arrive_drop">,
- Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
+ Intrinsic<[llvm_i64_ty],[llvm_ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_drop_shared :
ClangBuiltin<"__nvvm_mbarrier_arrive_drop_shared">,
- Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty],[IntrConvergent, IntrNoCallback]>;
+ Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_drop_noComplete :
ClangBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete">,
- Intrinsic<[llvm_i64_ty],[llvm_i64ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
+ Intrinsic<[llvm_i64_ty],[llvm_ptr_ty, llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_arrive_drop_noComplete_shared :
ClangBuiltin<"__nvvm_mbarrier_arrive_drop_noComplete_shared">,
- Intrinsic<[llvm_i64_ty],[llvm_shared_i64ptr_ty,
+ Intrinsic<[llvm_i64_ty],[llvm_shared_ptr_ty,
llvm_i32_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_test_wait :
ClangBuiltin<"__nvvm_mbarrier_test_wait">,
- Intrinsic<[llvm_i1_ty],[llvm_i64ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
+ Intrinsic<[llvm_i1_ty],[llvm_ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_test_wait_shared :
ClangBuiltin<"__nvvm_mbarrier_test_wait_shared">,
- Intrinsic<[llvm_i1_ty],[llvm_shared_i64ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
+ Intrinsic<[llvm_i1_ty],[llvm_shared_ptr_ty, llvm_i64_ty],[IntrConvergent, IntrNoCallback]>;
def int_nvvm_mbarrier_pending_count :
ClangBuiltin<"__nvvm_mbarrier_pending_count">,
@@ -1479,30 +1482,30 @@ def int_nvvm_mbarrier_pending_count :
// Generated within nvvm. Use for ldu on sm_20 or later. Second arg is the
// pointer's alignment.
def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty],
- [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.i">;
def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty],
- [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.f">;
def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty],
- [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldu.global.p">;
// Generated within nvvm. Use for ldg on sm_35 or later. Second arg is the
// pointer's alignment.
def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty],
- [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.i">;
def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty],
- [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.f">;
def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty],
- [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i32_ty],
+ [llvm_anyptr_ty, llvm_i32_ty],
[IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture<ArgIndex<0>>],
"llvm.nvvm.ldg.global.p">;
@@ -1565,7 +1568,7 @@ def int_nvvm_move_ptr : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty],
// For getting the handle from a texture or surface variable
def int_nvvm_texsurf_handle
- : Intrinsic<[llvm_i64_ty], [llvm_metadata_ty, llvm_any_i64ptr_ty],
+ : Intrinsic<[llvm_i64_ty], [llvm_metadata_ty, llvm_anyptr_ty],
[IntrNoMem], "llvm.nvvm.texsurf.handle">;
def int_nvvm_texsurf_handle_internal
: Intrinsic<[llvm_i64_ty], [llvm_anyptr_ty],
@@ -1582,153 +1585,157 @@ def int_nvvm_reflect :
// isspacep.{const, global, local, shared}
def int_nvvm_isspacep_const
- : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
"llvm.nvvm.isspacep.const">,
ClangBuiltin<"__nvvm_isspacep_const">;
def int_nvvm_isspacep_global
- : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
"llvm.nvvm.isspacep.global">,
ClangBuiltin<"__nvvm_isspacep_global">;
def int_nvvm_isspacep_local
- : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
"llvm.nvvm.isspacep.local">,
ClangBuiltin<"__nvvm_isspacep_local">;
def int_nvvm_isspacep_shared
- : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
[IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
"llvm.nvvm.isspacep.shared">,
ClangBuiltin<"__nvvm_isspacep_shared">;
+def int_nvvm_isspacep_shared_cluster
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+ [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+ "llvm.nvvm.isspacep.shared.cluster">;
// Environment register read
def int_nvvm_read_ptx_sreg_envreg0
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg0">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg0">;
def int_nvvm_read_ptx_sreg_envreg1
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg1">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg1">;
def int_nvvm_read_ptx_sreg_envreg2
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg2">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg2">;
def int_nvvm_read_ptx_sreg_envreg3
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg3">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg3">;
def int_nvvm_read_ptx_sreg_envreg4
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg4">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg4">;
def int_nvvm_read_ptx_sreg_envreg5
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg5">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg5">;
def int_nvvm_read_ptx_sreg_envreg6
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg6">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg6">;
def int_nvvm_read_ptx_sreg_envreg7
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg7">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg7">;
def int_nvvm_read_ptx_sreg_envreg8
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg8">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg8">;
def int_nvvm_read_ptx_sreg_envreg9
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg9">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg9">;
def int_nvvm_read_ptx_sreg_envreg10
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg10">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg10">;
def int_nvvm_read_ptx_sreg_envreg11
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg11">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg11">;
def int_nvvm_read_ptx_sreg_envreg12
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg12">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg12">;
def int_nvvm_read_ptx_sreg_envreg13
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg13">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg13">;
def int_nvvm_read_ptx_sreg_envreg14
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg14">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg14">;
def int_nvvm_read_ptx_sreg_envreg15
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg15">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg15">;
def int_nvvm_read_ptx_sreg_envreg16
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg16">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg16">;
def int_nvvm_read_ptx_sreg_envreg17
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg17">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg17">;
def int_nvvm_read_ptx_sreg_envreg18
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg18">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg18">;
def int_nvvm_read_ptx_sreg_envreg19
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg19">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg19">;
def int_nvvm_read_ptx_sreg_envreg20
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg20">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg20">;
def int_nvvm_read_ptx_sreg_envreg21
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg21">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg21">;
def int_nvvm_read_ptx_sreg_envreg22
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg22">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg22">;
def int_nvvm_read_ptx_sreg_envreg23
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg23">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg23">;
def int_nvvm_read_ptx_sreg_envreg24
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg24">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg24">;
def int_nvvm_read_ptx_sreg_envreg25
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg25">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg25">;
def int_nvvm_read_ptx_sreg_envreg26
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg26">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg26">;
def int_nvvm_read_ptx_sreg_envreg27
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg27">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg27">;
def int_nvvm_read_ptx_sreg_envreg28
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg28">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg28">;
def int_nvvm_read_ptx_sreg_envreg29
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg29">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg29">;
def int_nvvm_read_ptx_sreg_envreg30
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg30">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg30">;
def int_nvvm_read_ptx_sreg_envreg31
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable],
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.read.ptx.sreg.envreg31">,
ClangBuiltin<"__nvvm_read_ptx_sreg_envreg31">;
@@ -4354,37 +4361,40 @@ def int_nvvm_swap_lo_hi_b64
// Accessing special registers.
+
+class PTXReadSRegIntrinsicNB_r32
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>;
+class PTXReadSRegIntrinsic_r32<string name>
+ : PTXReadSRegIntrinsicNB_r32, ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+
multiclass PTXReadSRegIntrinsic_v4i32<string regname> {
// FIXME: Do we need the 128-bit integer type version?
// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem, IntrSpeculatable]>;
// FIXME: Enable this once v4i32 support is enabled in back-end.
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
+ foreach suffix = ["_x", "_y", "_z", "_w"] in
+ def suffix : PTXReadSRegIntrinsic_r32<regname # suffix>;
+}
- def _x : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
- ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_x">;
- def _y : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
- ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_y">;
- def _z : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
- ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_z">;
- def _w : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
- ClangBuiltin<"__nvvm_read_ptx_sreg_" # regname # "_w">;
+// Same, but without automatic clang builtins. It will be used for
+// registers that require particular GPU or PTX version.
+multiclass PTXReadSRegIntrinsicNB_v4i32 {
+ foreach suffix = ["_x", "_y", "_z", "_w"] in
+ def suffix : PTXReadSRegIntrinsicNB_r32;
}
-class PTXReadSRegIntrinsic_r32<string name>
- : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>,
- ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
class PTXReadSRegIntrinsic_r64<string name>
- : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable]>,
+ : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>]>,
ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
// Intrinsics to read registers with non-constant values. E.g. the values that
// do change over the kernel lifetime. Such reads should not be CSE'd.
class PTXReadNCSRegIntrinsic_r32<string name>
- : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback]>,
+ : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback, NoUndef<RetIndex>]>,
ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
class PTXReadNCSRegIntrinsic_r64<string name>
- : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback]>,
+ : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly, IntrNoCallback, NoUndef<RetIndex>]>,
ClangBuiltin<"__nvvm_read_ptx_sreg_" # name>;
defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
@@ -4422,6 +4432,15 @@ def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
+// sm90+, PTX7.8+
+defm int_nvvm_read_ptx_sreg_clusterid : PTXReadSRegIntrinsicNB_v4i32;
+defm int_nvvm_read_ptx_sreg_nclusterid : PTXReadSRegIntrinsicNB_v4i32;
+defm int_nvvm_read_ptx_sreg_cluster_ctaid : PTXReadSRegIntrinsicNB_v4i32;
+defm int_nvvm_read_ptx_sreg_cluster_nctaid : PTXReadSRegIntrinsicNB_v4i32;
+
+def int_nvvm_read_ptx_sreg_cluster_ctarank : PTXReadSRegIntrinsicNB_r32;
+def int_nvvm_read_ptx_sreg_cluster_nctarank : PTXReadSRegIntrinsicNB_r32;
+
//
// SHUFFLE
//
@@ -4670,4 +4689,25 @@ foreach transposed = [0, 1] in {
}
}
+def int_nvvm_mapa
+ : DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+ "llvm.nvvm.mapa">;
+def int_nvvm_mapa_shared_cluster
+ : DefaultAttrsIntrinsic<[llvm_shared_ptr_ty], [llvm_shared_ptr_ty, llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+ "llvm.nvvm.mapa.shared.cluster">;
+def int_nvvm_getctarank
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty],
+ [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+ "llvm.nvvm.getctarank">;
+def int_nvvm_getctarank_shared_cluster
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_shared_ptr_ty],
+ [IntrNoMem, IntrSpeculatable, NoCapture<ArgIndex<0>>],
+ "llvm.nvvm.getctarank.shared.cluster">;
+def int_nvvm_is_explicit_cluster
+ : DefaultAttrsIntrinsic<[llvm_i1_ty], [],
+ [IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
+ "llvm.nvvm.is_explicit_cluster">;
+
} // let TargetPrefix = "nvvm"
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 4e95b77a0d7c..58822059b9ac 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -31,12 +31,12 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// Get content from current FPSCR register
def int_ppc_readflm : ClangBuiltin<"__builtin_readflm">,
- Intrinsic<[llvm_double_ty], [],
- [IntrNoMerge, IntrHasSideEffects]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [],
+ [IntrNoMerge, IntrHasSideEffects]>;
// Set FPSCR register, and return previous content
def int_ppc_setflm : ClangBuiltin<"__builtin_setflm">,
- Intrinsic<[llvm_double_ty], [llvm_double_ty],
- [IntrHasSideEffects]>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty],
+ [IntrHasSideEffects]>;
// Intrinsics for [double]word extended forms of divide instructions
def int_ppc_divwe : ClangBuiltin<"__builtin_divwe">,
@@ -61,14 +61,14 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// Generate a random number
def int_ppc_darn : ClangBuiltin<"__builtin_darn">,
- Intrinsic<[llvm_i64_ty], [],
- [IntrNoMerge, IntrHasSideEffects]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [],
+ [IntrNoMerge, IntrHasSideEffects]>;
def int_ppc_darnraw : ClangBuiltin<"__builtin_darn_raw">,
- Intrinsic<[llvm_i64_ty], [],
- [IntrNoMerge, IntrHasSideEffects]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [],
+ [IntrNoMerge, IntrHasSideEffects]>;
def int_ppc_darn32 : ClangBuiltin<"__builtin_darn_32">,
- Intrinsic<[llvm_i32_ty], [],
- [IntrNoMerge, IntrHasSideEffects]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [],
+ [IntrNoMerge, IntrHasSideEffects]>;
// Bit permute doubleword
def int_ppc_bpermd : ClangBuiltin<"__builtin_bpermd">,
@@ -389,20 +389,20 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.".
// Stores. These don't map directly to GCC builtins because they represent the
// source address with a single pointer.
def int_ppc_altivec_stvx :
- Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
- [IntrWriteMem, IntrArgMemOnly]>;
+ DefaultAttrsIntrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
+ [IntrWriteMem, IntrArgMemOnly]>;
def int_ppc_altivec_stvxl :
- Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
- [IntrWriteMem, IntrArgMemOnly]>;
+ DefaultAttrsIntrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
+ [IntrWriteMem, IntrArgMemOnly]>;
def int_ppc_altivec_stvebx :
- Intrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty],
- [IntrWriteMem, IntrArgMemOnly]>;
+ DefaultAttrsIntrinsic<[], [llvm_v16i8_ty, llvm_ptr_ty],
+ [IntrWriteMem, IntrArgMemOnly]>;
def int_ppc_altivec_stvehx :
- Intrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty],
- [IntrWriteMem, IntrArgMemOnly]>;
+ DefaultAttrsIntrinsic<[], [llvm_v8i16_ty, llvm_ptr_ty],
+ [IntrWriteMem, IntrArgMemOnly]>;
def int_ppc_altivec_stvewx :
- Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
- [IntrWriteMem, IntrArgMemOnly]>;
+ DefaultAttrsIntrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty],
+ [IntrWriteMem, IntrArgMemOnly]>;
// Comparisons setting a vector.
def int_ppc_altivec_vcmpbfp : ClangBuiltin<"__builtin_altivec_vcmpbfp">,
@@ -1572,7 +1572,7 @@ def int_ppc_cfence : Intrinsic<[], [llvm_any_ty], []>;
// PowerPC set FPSCR Intrinsic Definitions.
def int_ppc_setrnd : ClangBuiltin<"__builtin_setrnd">,
- Intrinsic<[llvm_double_ty], [llvm_i32_ty], []>;
+ DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_i32_ty], [IntrHasSideEffects]>;
}
let TargetPrefix = "ppc" in {
@@ -1728,14 +1728,14 @@ let TargetPrefix = "ppc" in {
def int_ppc_mfmsr : ClangBuiltin<"__builtin_ppc_mfmsr">,
DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
def int_ppc_mfspr
- : Intrinsic<[llvm_anyint_ty], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
+ : DefaultAttrsIntrinsic<[llvm_anyint_ty], [llvm_i32_ty], [ImmArg<ArgIndex<0>>]>;
def int_ppc_mtmsr
: ClangBuiltin<"__builtin_ppc_mtmsr">, Intrinsic<[], [llvm_i32_ty], []>;
def int_ppc_mtspr
- : Intrinsic<[], [llvm_i32_ty, llvm_anyint_ty], [ImmArg<ArgIndex<0>>]>;
+ : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyint_ty], [ImmArg<ArgIndex<0>>]>;
def int_ppc_stfiw : ClangBuiltin<"__builtin_ppc_stfiw">,
- Intrinsic<[], [llvm_ptr_ty, llvm_double_ty],
- [IntrWriteMem]>;
+ DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_double_ty],
+ [IntrWriteMem]>;
// compare
def int_ppc_cmpeqb
: ClangBuiltin<"__builtin_ppc_cmpeqb">,
@@ -1865,8 +1865,8 @@ let TargetPrefix = "ppc" in {
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_ppc_addex
: ClangBuiltin<"__builtin_ppc_addex">,
- Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
- [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<2>>]>;
+ DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
+ [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<2>>]>;
def int_ppc_fsel : ClangBuiltin<"__builtin_ppc_fsel">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty, llvm_double_ty,
llvm_double_ty], [IntrNoMem]>;
@@ -1893,9 +1893,9 @@ let TargetPrefix = "ppc" in {
DefaultAttrsIntrinsic<[llvm_i32_ty],
[llvm_double_ty, llvm_double_ty],
[IntrNoMem]>;
- def int_ppc_test_data_class : Intrinsic<[llvm_i32_ty],
- [llvm_anyfloat_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<1>>]>;
+ def int_ppc_test_data_class
+ : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_anyfloat_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<ArgIndex<1>>]>;
def int_ppc_fnabs
: ClangBuiltin<"__builtin_ppc_fnabs">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index e9c88f468076..e3476b160db6 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -132,38 +132,21 @@ let TargetPrefix = "riscv" in {
/* AVL */ [LLVMMatchType<0>,
/* VSEW */ LLVMMatchType<0>,
/* VLMUL */ LLVMMatchType<0>],
- [IntrNoMem, IntrHasSideEffects,
+ [IntrNoMem,
ImmArg<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
def int_riscv_vsetvlimax : Intrinsic<[llvm_anyint_ty],
/* VSEW */ [LLVMMatchType<0>,
/* VLMUL */ LLVMMatchType<0>],
- [IntrNoMem, IntrHasSideEffects,
+ [IntrNoMem,
ImmArg<ArgIndex<0>>,
ImmArg<ArgIndex<1>>]>;
- // Versions without side effects: better optimizable and usable if only the
- // returned vector length is important.
- def int_riscv_vsetvli_opt : Intrinsic<[llvm_anyint_ty],
- /* AVL */ [LLVMMatchType<0>,
- /* VSEW */ LLVMMatchType<0>,
- /* VLMUL */ LLVMMatchType<0>],
- [IntrNoMem,
- ImmArg<ArgIndex<1>>,
- ImmArg<ArgIndex<2>>]>;
- def int_riscv_vsetvlimax_opt : Intrinsic<[llvm_anyint_ty],
- /* VSEW */ [LLVMMatchType<0>,
- /* VLMUL */ LLVMMatchType<0>],
- [IntrNoMem,
- ImmArg<ArgIndex<0>>,
- ImmArg<ArgIndex<1>>]>;
-
// For unit stride mask load
// Input: (pointer, vl)
class RISCVUSMLoad
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMPointerType<LLVMMatchType<0>>,
- llvm_anyint_ty],
+ [llvm_ptr_ty, llvm_anyint_ty],
[NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic {
let VLOperand = 1;
}
@@ -171,9 +154,7 @@ let TargetPrefix = "riscv" in {
// Input: (passthru, pointer, vl)
class RISCVUSLoad
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>,
- LLVMPointerType<LLVMMatchType<0>>,
- llvm_anyint_ty],
+ [LLVMMatchType<0>, llvm_ptr_ty, llvm_anyint_ty],
[NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic {
let VLOperand = 2;
}
@@ -184,8 +165,7 @@ let TargetPrefix = "riscv" in {
// VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
class RISCVUSLoadFF
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, llvm_anyint_ty],
- [LLVMMatchType<0>,
- LLVMPointerType<LLVMMatchType<0>>, LLVMMatchType<1>],
+ [LLVMMatchType<0>, llvm_ptr_ty, LLVMMatchType<1>],
[NoCapture<ArgIndex<1>>]>,
RISCVVIntrinsic {
let VLOperand = 2;
@@ -194,8 +174,7 @@ let TargetPrefix = "riscv" in {
// Input: (maskedoff, pointer, mask, vl, policy)
class RISCVUSLoadMasked
: DefaultAttrsIntrinsic<[llvm_anyvector_ty ],
- [LLVMMatchType<0>,
- LLVMPointerType<LLVMMatchType<0>>,
+ [LLVMMatchType<0>, llvm_ptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyint_ty, LLVMMatchType<1>],
[NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<4>>, IntrReadMem]>,
@@ -209,8 +188,7 @@ let TargetPrefix = "riscv" in {
// VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
class RISCVUSLoadFFMasked
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, llvm_anyint_ty],
- [LLVMMatchType<0>,
- LLVMPointerType<LLVMMatchType<0>>,
+ [LLVMMatchType<0>, llvm_ptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<1>, LLVMMatchType<1>],
[NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>, RISCVVIntrinsic {
@@ -220,8 +198,7 @@ let TargetPrefix = "riscv" in {
// Input: (passthru, pointer, stride, vl)
class RISCVSLoad
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>,
- LLVMPointerType<LLVMMatchType<0>>,
+ [LLVMMatchType<0>, llvm_ptr_ty,
llvm_anyint_ty, LLVMMatchType<1>],
[NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic {
let VLOperand = 3;
@@ -230,8 +207,7 @@ let TargetPrefix = "riscv" in {
// Input: (maskedoff, pointer, stride, mask, vl, policy)
class RISCVSLoadMasked
: DefaultAttrsIntrinsic<[llvm_anyvector_ty ],
- [LLVMMatchType<0>,
- LLVMPointerType<LLVMMatchType<0>>, llvm_anyint_ty,
+ [LLVMMatchType<0>, llvm_ptr_ty, llvm_anyint_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>,
LLVMMatchType<1>],
[NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<5>>, IntrReadMem]>,
@@ -242,8 +218,7 @@ let TargetPrefix = "riscv" in {
// Input: (passthru, pointer, index, vl)
class RISCVILoad
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>,
- LLVMPointerType<LLVMMatchType<0>>,
+ [LLVMMatchType<0>, llvm_ptr_ty,
llvm_anyvector_ty, llvm_anyint_ty],
[NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic {
let VLOperand = 3;
@@ -252,8 +227,7 @@ let TargetPrefix = "riscv" in {
// Input: (maskedoff, pointer, index, mask, vl, policy)
class RISCVILoadMasked
: DefaultAttrsIntrinsic<[llvm_anyvector_ty ],
- [LLVMMatchType<0>,
- LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
+ [LLVMMatchType<0>, llvm_ptr_ty, llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
LLVMMatchType<2>],
[NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<5>>, IntrReadMem]>,
@@ -264,9 +238,7 @@ let TargetPrefix = "riscv" in {
// Input: (vector_in, pointer, vl)
class RISCVUSStore
: DefaultAttrsIntrinsic<[],
- [llvm_anyvector_ty,
- LLVMPointerType<LLVMMatchType<0>>,
- llvm_anyint_ty],
+ [llvm_anyvector_ty, llvm_ptr_ty, llvm_anyint_ty],
[NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic {
let VLOperand = 2;
}
@@ -274,8 +246,7 @@ let TargetPrefix = "riscv" in {
// Input: (vector_in, pointer, mask, vl)
class RISCVUSStoreMasked
: DefaultAttrsIntrinsic<[],
- [llvm_anyvector_ty,
- LLVMPointerType<LLVMMatchType<0>>,
+ [llvm_anyvector_ty, llvm_ptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyint_ty],
[NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic {
@@ -285,8 +256,7 @@ let TargetPrefix = "riscv" in {
// Input: (vector_in, pointer, stride, vl)
class RISCVSStore
: DefaultAttrsIntrinsic<[],
- [llvm_anyvector_ty,
- LLVMPointerType<LLVMMatchType<0>>,
+ [llvm_anyvector_ty, llvm_ptr_ty,
llvm_anyint_ty, LLVMMatchType<1>],
[NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic {
let VLOperand = 3;
@@ -295,8 +265,7 @@ let TargetPrefix = "riscv" in {
// Input: (vector_in, pointer, stirde, mask, vl)
class RISCVSStoreMasked
: DefaultAttrsIntrinsic<[],
- [llvm_anyvector_ty,
- LLVMPointerType<LLVMMatchType<0>>, llvm_anyint_ty,
+ [llvm_anyvector_ty, llvm_ptr_ty, llvm_anyint_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<1>],
[NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic {
let VLOperand = 4;
@@ -305,8 +274,7 @@ let TargetPrefix = "riscv" in {
// Input: (vector_in, pointer, index, vl)
class RISCVIStore
: DefaultAttrsIntrinsic<[],
- [llvm_anyvector_ty,
- LLVMPointerType<LLVMMatchType<0>>,
+ [llvm_anyvector_ty, llvm_ptr_ty,
llvm_anyint_ty, llvm_anyint_ty],
[NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic {
let VLOperand = 3;
@@ -315,8 +283,7 @@ let TargetPrefix = "riscv" in {
// Input: (vector_in, pointer, index, mask, vl)
class RISCVIStoreMasked
: DefaultAttrsIntrinsic<[],
- [llvm_anyvector_ty,
- LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
+ [llvm_anyvector_ty, llvm_ptr_ty, llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
[NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic {
let VLOperand = 4;
@@ -339,6 +306,24 @@ let TargetPrefix = "riscv" in {
[ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic {
let VLOperand = 3;
}
+ // For destination vector type is the same as source vector.
+ // Input: (passthru, vector_in, frm, vl)
+ class RISCVUnaryAAUnMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty, LLVMMatchType<1>],
+ [ImmArg<ArgIndex<2>>, IntrNoMem]>, RISCVVIntrinsic {
+ let VLOperand = 3;
+ }
+ // For destination vector type is the same as first source vector (with mask).
+ // Input: (vector_in, vector_in, mask, frm, vl, policy)
+ class RISCVUnaryAAMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<1>, LLVMMatchType<1>],
+ [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
+ let VLOperand = 4;
+ }
// Input: (passthru, vector_in, vector_in, mask, vl)
class RISCVCompress
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
@@ -436,6 +421,27 @@ let TargetPrefix = "riscv" in {
let ScalarOperand = 2;
let VLOperand = 4;
}
+ // For destination vector type is the same as first source vector.
+ // Input: (passthru, vector_in, vector_in/scalar_in, frm, vl)
+ class RISCVBinaryAAXUnMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+ llvm_anyint_ty, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<3>>, IntrNoMem]>, RISCVVIntrinsic {
+ let ScalarOperand = 2;
+ let VLOperand = 4;
+ }
+ // For destination vector type is the same as first source vector (with mask).
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, frm, vl, policy)
+ class RISCVBinaryAAXMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<6>>, IntrNoMem]>, RISCVVIntrinsic {
+ let ScalarOperand = 2;
+ let VLOperand = 5;
+ }
// For destination vector type is the same as first source vector. The
// second source operand must match the destination type or be an XLen scalar.
// Input: (passthru, vector_in, vector_in/scalar_in, vl)
@@ -478,6 +484,27 @@ let TargetPrefix = "riscv" in {
let ScalarOperand = 2;
let VLOperand = 4;
}
+ // For destination vector type is NOT the same as first source vector.
+ // Input: (passthru, vector_in, vector_in/scalar_in, frm, vl)
+ class RISCVBinaryABXUnMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
+ llvm_anyint_ty, LLVMMatchType<3>],
+ [ImmArg<ArgIndex<3>>, IntrNoMem]>, RISCVVIntrinsic {
+ let ScalarOperand = 2;
+ let VLOperand = 4;
+ }
+ // For destination vector type is NOT the same as first source vector (with mask).
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, frm, vl, policy)
+ class RISCVBinaryABXMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<3>, LLVMMatchType<3>],
+ [ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<6>>, IntrNoMem]>, RISCVVIntrinsic {
+ let ScalarOperand = 2;
+ let VLOperand = 5;
+ }
// For destination vector type is NOT the same as first source vector. The
// second source operand must match the destination type or be an XLen scalar.
// Input: (passthru, vector_in, vector_in/scalar_in, vl)
@@ -586,6 +613,17 @@ let TargetPrefix = "riscv" in {
let ScalarOperand = 2;
let VLOperand = 3;
}
+ // For Saturating binary operations with rounding-mode operand
+ // The destination vector type is the same as first source vector.
+ // Input: (passthru, vector_in, vector_in/scalar_in, vxrm, vl)
+ class RISCVSaturatingBinaryAAXUnMaskedRoundingMode
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+ llvm_anyint_ty, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<3>>, IntrNoMem]>, RISCVVIntrinsic {
+ let ScalarOperand = 2;
+ let VLOperand = 4;
+ }
// For Saturating binary operations with mask.
// The destination vector type is the same as first source vector.
// Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl, policy)
@@ -598,6 +636,18 @@ let TargetPrefix = "riscv" in {
let ScalarOperand = 2;
let VLOperand = 4;
}
+ // For Saturating binary operations with mask and rounding-mode operand
+ // The destination vector type is the same as first source vector.
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vxrm, vl, policy)
+ class RISCVSaturatingBinaryAAXMaskedRoundingMode
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<6>>, IntrNoMem]>, RISCVVIntrinsic {
+ let ScalarOperand = 2;
+ let VLOperand = 5;
+ }
// For Saturating binary operations.
// The destination vector type is the same as first source vector.
// The second source operand matches the destination type or is an XLen scalar.
@@ -622,6 +672,31 @@ let TargetPrefix = "riscv" in {
let VLOperand = 4;
}
// For Saturating binary operations.
+ // The destination vector type is the same as first source vector.
+ // The second source operand matches the destination type or is an XLen scalar.
+ // Input: (passthru, vector_in, vector_in/scalar_in, vxrm, vl)
+ class RISCVSaturatingBinaryAAShiftUnMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+ llvm_anyint_ty, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<3>>, IntrNoMem, IntrHasSideEffects]>,
+ RISCVVIntrinsic {
+ let VLOperand = 4;
+ }
+ // For Saturating binary operations with mask.
+ // The destination vector type is the same as first source vector.
+ // The second source operand matches the destination type or is an XLen scalar.
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vxrm, vl, policy)
+ class RISCVSaturatingBinaryAAShiftMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>, llvm_any_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<4>>,ImmArg<ArgIndex<6>>, IntrNoMem, IntrHasSideEffects]>,
+ RISCVVIntrinsic {
+ let VLOperand = 6;
+ }
+ // For Saturating binary operations.
// The destination vector type is NOT the same as first source vector.
// The second source operand matches the destination type or is an XLen scalar.
// Input: (passthru, vector_in, vector_in/scalar_in, vl)
@@ -644,6 +719,31 @@ let TargetPrefix = "riscv" in {
[ImmArg<ArgIndex<5>>, IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
let VLOperand = 4;
}
+ // For Saturating binary operations.
+ // The destination vector type is NOT the same as first source vector.
+ // The second source operand matches the destination type or is an XLen scalar.
+ // Input: (passthru, vector_in, vector_in/scalar_in, vxrm, vl)
+ class RISCVSaturatingBinaryABShiftUnMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
+ llvm_anyint_ty, LLVMMatchType<3>],
+ [ImmArg<ArgIndex<3>>, IntrNoMem, IntrHasSideEffects]>,
+ RISCVVIntrinsic {
+ let VLOperand = 4;
+ }
+ // For Saturating binary operations with mask.
+ // The destination vector type is NOT the same as first source vector (with mask).
+ // The second source operand matches the destination type or is an XLen scalar.
+ // Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vxrm, vl, policy)
+ class RISCVSaturatingBinaryABShiftMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_any_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<3>, LLVMMatchType<3>],
+ [ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<6>>, IntrNoMem,
+ IntrHasSideEffects]>, RISCVVIntrinsic {
+ let VLOperand = 5;
+ }
// Input: (vector_in, vector_in, scalar_in, vl, policy)
class RVVSlideUnMasked
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
@@ -682,6 +782,29 @@ let TargetPrefix = "riscv" in {
let ScalarOperand = 1;
let VLOperand = 4;
}
+ // UnMasked Vector Multiply-Add operations, its first operand can not be undef.
+ // Input: (vector_in, vector_in/scalar, vector_in, frm, vl, policy)
+ class RISCVTernaryAAXAUnMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_any_ty, LLVMMatchType<0>,
+ llvm_anyint_ty, LLVMMatchType<2>, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>, IntrNoMem]>,
+ RISCVVIntrinsic {
+ let ScalarOperand = 1;
+ let VLOperand = 4;
+ }
+ // Masked Vector Multiply-Add operations, its first operand can not be undef.
+ // Input: (vector_in, vector_in/scalar, vector_in, mask, frm, vl, policy
+ class RISCVTernaryAAXAMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_any_ty, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty, LLVMMatchType<2>, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<6>>, IntrNoMem]>,
+ RISCVVIntrinsic {
+ let ScalarOperand = 1;
+ let VLOperand = 5;
+ }
// UnMasked Widening Vector Multiply-Add operations, its first operand can not be undef.
// Input: (vector_in, vector_in/scalar, vector_in, vl, policy)
class RISCVTernaryWideUnMasked
@@ -703,6 +826,29 @@ let TargetPrefix = "riscv" in {
let ScalarOperand = 1;
let VLOperand = 4;
}
+ // UnMasked Widening Vector Multiply-Add operations, its first operand can not be undef.
+ // Input: (vector_in, vector_in/scalar, vector_in, frm, vl, policy)
+ class RISCVTernaryWideUnMaskedRoundingMode
+ : DefaultAttrsIntrinsic< [llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_any_ty, llvm_anyvector_ty,
+ llvm_anyint_ty, LLVMMatchType<3>, LLVMMatchType<3>],
+ [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>, IntrNoMem] >,
+ RISCVVIntrinsic {
+ let ScalarOperand = 1;
+ let VLOperand = 4;
+ }
+ // Masked Widening Vector Multiply-Add operations, its first operand can not be undef.
+ // Input: (vector_in, vector_in/scalar, vector_in, mask, frm, vl, policy
+ class RISCVTernaryWideMaskedRoundingMode
+ : DefaultAttrsIntrinsic< [llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_any_ty, llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ llvm_anyint_ty, LLVMMatchType<3>, LLVMMatchType<3>],
+ [ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<6>>, IntrNoMem]>,
+ RISCVVIntrinsic {
+ let ScalarOperand = 1;
+ let VLOperand = 5;
+ }
// For Reduction ternary operations.
// For destination vector type is the same as first and third source vector.
// Input: (vector_in, vector_in, vector_in, vl)
@@ -724,6 +870,28 @@ let TargetPrefix = "riscv" in {
[IntrNoMem]>, RISCVVIntrinsic {
let VLOperand = 4;
}
+ // For Reduction ternary operations.
+ // For destination vector type is the same as first and third source vector.
+ // Input: (vector_in, vector_in, vector_in, frm, vl)
+ class RISCVReductionUnMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>,
+ llvm_anyint_ty, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<3>>, IntrNoMem]>, RISCVVIntrinsic {
+ let VLOperand = 4;
+ }
+ // For Reduction ternary operations with mask.
+ // For destination vector type is the same as first and third source vector.
+ // The mask type come from second source vector.
+ // Input: (vector_in, vector_in, vector_in, mask, frm, vl)
+ class RISCVReductionMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<1, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic {
+ let VLOperand = 5;
+ }
// For unary operations with scalar type output without mask
// Output: (scalar type)
// Input: (vector_in, vl)
@@ -812,6 +980,25 @@ let TargetPrefix = "riscv" in {
[ImmArg<ArgIndex<4>>, IntrNoMem]>, RISCVVIntrinsic {
let VLOperand = 3;
}
+ // For Conversion unary operations.
+ // Input: (passthru, vector_in, frm, vl)
+ class RISCVConversionUnMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty, llvm_anyint_ty,
+ LLVMMatchType<2>],
+ [ImmArg<ArgIndex<2>>, IntrNoMem]>, RISCVVIntrinsic {
+ let VLOperand = 3;
+ }
+ // For Conversion unary operations with mask.
+ // Input: (maskedoff, vector_in, mask, frm, vl, policy)
+ class RISCVConversionMaskedRoundingMode
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyvector_ty,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty,
+ LLVMMatchType<2>, LLVMMatchType<2>],
+ [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<5>>, IntrNoMem]>, RISCVVIntrinsic {
+ let VLOperand = 4;
+ }
// For unit stride segment load
// Input: (passthru, pointer, vl)
@@ -819,7 +1006,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
- [LLVMPointerToElt<0>, llvm_anyint_ty]),
+ [llvm_ptr_ty, llvm_anyint_ty]),
[NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic {
let VLOperand = !add(nf, 1);
}
@@ -829,7 +1016,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
- [LLVMPointerToElt<0>,
+ [llvm_ptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyint_ty, LLVMMatchType<1>]),
[ImmArg<ArgIndex<!add(nf, 3)>>, NoCapture<ArgIndex<nf>>, IntrReadMem]>,
@@ -846,7 +1033,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1)), [llvm_anyint_ty]),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
- [LLVMPointerToElt<0>, LLVMMatchType<1>]),
+ [llvm_ptr_ty, LLVMMatchType<1>]),
[NoCapture<ArgIndex<nf>>]>, RISCVVIntrinsic {
let VLOperand = !add(nf, 1);
}
@@ -859,7 +1046,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1)), [llvm_anyint_ty]),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
- [LLVMPointerToElt<0>,
+ [llvm_ptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<1>, LLVMMatchType<1>]),
[ImmArg<ArgIndex<!add(nf, 3)>>, NoCapture<ArgIndex<nf>>]>,
@@ -873,7 +1060,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
- [LLVMPointerToElt<0>, llvm_anyint_ty, LLVMMatchType<1>]),
+ [llvm_ptr_ty, llvm_anyint_ty, LLVMMatchType<1>]),
[NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic {
let VLOperand = !add(nf, 2);
}
@@ -883,7 +1070,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
- [LLVMPointerToElt<0>,
+ [llvm_ptr_ty,
llvm_anyint_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<1>, LLVMMatchType<1>]),
@@ -898,7 +1085,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
- [LLVMPointerToElt<0>, llvm_anyvector_ty, llvm_anyint_ty]),
+ [llvm_ptr_ty, llvm_anyvector_ty, llvm_anyint_ty]),
[NoCapture<ArgIndex<nf>>, IntrReadMem]>, RISCVVIntrinsic {
let VLOperand = !add(nf, 2);
}
@@ -908,7 +1095,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty], !listsplat(LLVMMatchType<0>,
!add(nf, -1))),
!listconcat(!listsplat(LLVMMatchType<0>, nf),
- [LLVMPointerToElt<0>,
+ [llvm_ptr_ty,
llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyint_ty, LLVMMatchType<2>]),
@@ -923,7 +1110,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<[],
!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>, !add(nf, -1)),
- [LLVMPointerToElt<0>, llvm_anyint_ty]),
+ [llvm_ptr_ty, llvm_anyint_ty]),
[NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic {
let VLOperand = !add(nf, 1);
}
@@ -933,7 +1120,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<[],
!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>, !add(nf, -1)),
- [LLVMPointerToElt<0>,
+ [llvm_ptr_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyint_ty]),
[NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic {
@@ -946,7 +1133,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<[],
!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>, !add(nf, -1)),
- [LLVMPointerToElt<0>, llvm_anyint_ty,
+ [llvm_ptr_ty, llvm_anyint_ty,
LLVMMatchType<1>]),
[NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic {
let VLOperand = !add(nf, 2);
@@ -957,7 +1144,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<[],
!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>, !add(nf, -1)),
- [LLVMPointerToElt<0>, llvm_anyint_ty,
+ [llvm_ptr_ty, llvm_anyint_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
LLVMMatchType<1>]),
[NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic {
@@ -970,7 +1157,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<[],
!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>, !add(nf, -1)),
- [LLVMPointerToElt<0>, llvm_anyvector_ty,
+ [llvm_ptr_ty, llvm_anyvector_ty,
llvm_anyint_ty]),
[NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic {
let VLOperand = !add(nf, 2);
@@ -981,7 +1168,7 @@ let TargetPrefix = "riscv" in {
: DefaultAttrsIntrinsic<[],
!listconcat([llvm_anyvector_ty],
!listsplat(LLVMMatchType<0>, !add(nf, -1)),
- [LLVMPointerToElt<0>, llvm_anyvector_ty,
+ [llvm_ptr_ty, llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
llvm_anyint_ty]),
[NoCapture<ArgIndex<nf>>, IntrWriteMem]>, RISCVVIntrinsic {
@@ -1021,6 +1208,10 @@ let TargetPrefix = "riscv" in {
def "int_riscv_" # NAME : RISCVUnaryAAUnMasked;
def "int_riscv_" # NAME # "_mask" : RISCVUnaryAAMasked;
}
+ multiclass RISCVUnaryAARoundingMode {
+ def "int_riscv_" # NAME : RISCVUnaryAAUnMaskedRoundingMode;
+ def "int_riscv_" # NAME # "_mask" : RISCVUnaryAAMaskedRoundingMode;
+ }
multiclass RISCVUnaryAB {
def "int_riscv_" # NAME : RISCVUnaryABUnMasked;
def "int_riscv_" # NAME # "_mask" : RISCVUnaryABMasked;
@@ -1031,6 +1222,10 @@ let TargetPrefix = "riscv" in {
def "int_riscv_" # NAME : RISCVBinaryAAXUnMasked;
def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAXMasked;
}
+ multiclass RISCVBinaryAAXRoundingMode {
+ def "int_riscv_" # NAME : RISCVBinaryAAXUnMaskedRoundingMode;
+ def "int_riscv_" # NAME # "_mask" : RISCVBinaryAAXMaskedRoundingMode;
+ }
// Like RISCVBinaryAAX, but the second operand is used a shift amount so it
// must be a vector or an XLen scalar.
multiclass RISCVBinaryAAShift {
@@ -1055,6 +1250,10 @@ let TargetPrefix = "riscv" in {
def "int_riscv_" # NAME : RISCVBinaryABXUnMasked;
def "int_riscv_" # NAME # "_mask" : RISCVBinaryABXMasked;
}
+ multiclass RISCVBinaryABXRoundingMode {
+ def "int_riscv_" # NAME : RISCVBinaryABXUnMaskedRoundingMode;
+ def "int_riscv_" # NAME # "_mask" : RISCVBinaryABXMaskedRoundingMode;
+ }
// Like RISCVBinaryABX, but the second operand is used a shift amount so it
// must be a vector or an XLen scalar.
multiclass RISCVBinaryABShift {
@@ -1074,13 +1273,17 @@ let TargetPrefix = "riscv" in {
def "int_riscv_" # NAME : RISCVSaturatingBinaryAAXUnMasked;
def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAXMasked;
}
- multiclass RISCVSaturatingBinaryAAShift {
- def "int_riscv_" # NAME : RISCVSaturatingBinaryAAShiftUnMasked;
- def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAShiftMasked;
+ multiclass RISCVSaturatingBinaryAAXRoundingMode {
+ def "int_riscv_" # NAME : RISCVSaturatingBinaryAAXUnMaskedRoundingMode;
+ def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAXMaskedRoundingMode;
}
- multiclass RISCVSaturatingBinaryABShift {
- def "int_riscv_" # NAME : RISCVSaturatingBinaryABShiftUnMasked;
- def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryABShiftMasked;
+ multiclass RISCVSaturatingBinaryAAShiftRoundingMode {
+ def "int_riscv_" # NAME : RISCVSaturatingBinaryAAShiftUnMaskedRoundingMode;
+ def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryAAShiftMaskedRoundingMode;
+ }
+ multiclass RISCVSaturatingBinaryABShiftRoundingMode {
+ def "int_riscv_" # NAME : RISCVSaturatingBinaryABShiftUnMaskedRoundingMode;
+ def "int_riscv_" # NAME # "_mask" : RISCVSaturatingBinaryABShiftMaskedRoundingMode;
}
multiclass RVVSlide {
def "int_riscv_" # NAME : RVVSlideUnMasked;
@@ -1090,6 +1293,10 @@ let TargetPrefix = "riscv" in {
def "int_riscv_" # NAME : RISCVTernaryAAXAUnMasked;
def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAXAMasked;
}
+ multiclass RISCVTernaryAAXARoundingMode {
+ def "int_riscv_" # NAME : RISCVTernaryAAXAUnMaskedRoundingMode;
+ def "int_riscv_" # NAME # "_mask" : RISCVTernaryAAXAMaskedRoundingMode;
+ }
multiclass RISCVCompare {
def "int_riscv_" # NAME : RISCVCompareUnMasked;
def "int_riscv_" # NAME # "_mask" : RISCVCompareMasked;
@@ -1102,10 +1309,18 @@ let TargetPrefix = "riscv" in {
def "int_riscv_" # NAME : RISCVTernaryWideUnMasked;
def "int_riscv_" # NAME # "_mask" : RISCVTernaryWideMasked;
}
+ multiclass RISCVTernaryWideRoundingMode {
+ def "int_riscv_" # NAME : RISCVTernaryWideUnMaskedRoundingMode;
+ def "int_riscv_" # NAME # "_mask" : RISCVTernaryWideMaskedRoundingMode;
+ }
multiclass RISCVReduction {
def "int_riscv_" # NAME : RISCVReductionUnMasked;
def "int_riscv_" # NAME # "_mask" : RISCVReductionMasked;
}
+ multiclass RISCVReductionRoundingMode {
+ def "int_riscv_" # NAME : RISCVReductionUnMaskedRoundingMode;
+ def "int_riscv_" # NAME # "_mask" : RISCVReductionMaskedRoundingMode;
+ }
multiclass RISCVMaskedUnarySOut {
def "int_riscv_" # NAME : RISCVMaskedUnarySOutUnMasked;
def "int_riscv_" # NAME # "_mask" : RISCVMaskedUnarySOutMasked;
@@ -1118,6 +1333,10 @@ let TargetPrefix = "riscv" in {
def "int_riscv_" #NAME :RISCVConversionUnMasked;
def "int_riscv_" # NAME # "_mask" : RISCVConversionMasked;
}
+ multiclass RISCVConversionRoundingMode {
+ def "int_riscv_" #NAME :RISCVConversionUnMaskedRoundingMode;
+ def "int_riscv_" # NAME # "_mask" : RISCVConversionMaskedRoundingMode;
+ }
multiclass RISCVUSSegLoad<int nf> {
def "int_riscv_" # NAME : RISCVUSSegLoad<nf>;
def "int_riscv_" # NAME # "_mask" : RISCVUSSegLoadMasked<nf>;
@@ -1235,14 +1454,14 @@ let TargetPrefix = "riscv" in {
defm vwmaccus : RISCVTernaryWide;
defm vwmaccsu : RISCVTernaryWide;
- defm vfadd : RISCVBinaryAAX;
- defm vfsub : RISCVBinaryAAX;
- defm vfrsub : RISCVBinaryAAX;
+ defm vfadd : RISCVBinaryAAXRoundingMode;
+ defm vfsub : RISCVBinaryAAXRoundingMode;
+ defm vfrsub : RISCVBinaryAAXRoundingMode;
- defm vfwadd : RISCVBinaryABX;
- defm vfwsub : RISCVBinaryABX;
- defm vfwadd_w : RISCVBinaryAAX;
- defm vfwsub_w : RISCVBinaryAAX;
+ defm vfwadd : RISCVBinaryABXRoundingMode;
+ defm vfwsub : RISCVBinaryABXRoundingMode;
+ defm vfwadd_w : RISCVBinaryAAXRoundingMode;
+ defm vfwsub_w : RISCVBinaryAAXRoundingMode;
defm vsaddu : RISCVSaturatingBinaryAAX;
defm vsadd : RISCVSaturatingBinaryAAX;
@@ -1301,29 +1520,29 @@ let TargetPrefix = "riscv" in {
let VLOperand = 2;
}
- defm vfmul : RISCVBinaryAAX;
- defm vfdiv : RISCVBinaryAAX;
- defm vfrdiv : RISCVBinaryAAX;
+ defm vfmul : RISCVBinaryAAXRoundingMode;
+ defm vfdiv : RISCVBinaryAAXRoundingMode;
+ defm vfrdiv : RISCVBinaryAAXRoundingMode;
- defm vfwmul : RISCVBinaryABX;
+ defm vfwmul : RISCVBinaryABXRoundingMode;
- defm vfmacc : RISCVTernaryAAXA;
- defm vfnmacc : RISCVTernaryAAXA;
- defm vfmsac : RISCVTernaryAAXA;
- defm vfnmsac : RISCVTernaryAAXA;
- defm vfmadd : RISCVTernaryAAXA;
- defm vfnmadd : RISCVTernaryAAXA;
- defm vfmsub : RISCVTernaryAAXA;
- defm vfnmsub : RISCVTernaryAAXA;
+ defm vfmacc : RISCVTernaryAAXARoundingMode;
+ defm vfnmacc : RISCVTernaryAAXARoundingMode;
+ defm vfmsac : RISCVTernaryAAXARoundingMode;
+ defm vfnmsac : RISCVTernaryAAXARoundingMode;
+ defm vfmadd : RISCVTernaryAAXARoundingMode;
+ defm vfnmadd : RISCVTernaryAAXARoundingMode;
+ defm vfmsub : RISCVTernaryAAXARoundingMode;
+ defm vfnmsub : RISCVTernaryAAXARoundingMode;
- defm vfwmacc : RISCVTernaryWide;
- defm vfwnmacc : RISCVTernaryWide;
- defm vfwmsac : RISCVTernaryWide;
- defm vfwnmsac : RISCVTernaryWide;
+ defm vfwmacc : RISCVTernaryWideRoundingMode;
+ defm vfwnmacc : RISCVTernaryWideRoundingMode;
+ defm vfwmsac : RISCVTernaryWideRoundingMode;
+ defm vfwnmsac : RISCVTernaryWideRoundingMode;
- defm vfsqrt : RISCVUnaryAA;
+ defm vfsqrt : RISCVUnaryAARoundingMode;
defm vfrsqrt7 : RISCVUnaryAA;
- defm vfrec7 : RISCVUnaryAA;
+ defm vfrec7 : RISCVUnaryAARoundingMode;
defm vfmin : RISCVBinaryAAX;
defm vfmax : RISCVBinaryAAX;
@@ -1350,18 +1569,18 @@ let TargetPrefix = "riscv" in {
def "int_riscv_vcompress" : RISCVCompress;
- defm vaaddu : RISCVSaturatingBinaryAAX;
- defm vaadd : RISCVSaturatingBinaryAAX;
- defm vasubu : RISCVSaturatingBinaryAAX;
- defm vasub : RISCVSaturatingBinaryAAX;
+ defm vaaddu : RISCVSaturatingBinaryAAXRoundingMode;
+ defm vaadd : RISCVSaturatingBinaryAAXRoundingMode;
+ defm vasubu : RISCVSaturatingBinaryAAXRoundingMode;
+ defm vasub : RISCVSaturatingBinaryAAXRoundingMode;
- defm vsmul : RISCVSaturatingBinaryAAX;
+ defm vsmul : RISCVSaturatingBinaryAAXRoundingMode;
- defm vssrl : RISCVSaturatingBinaryAAShift;
- defm vssra : RISCVSaturatingBinaryAAShift;
+ defm vssrl : RISCVSaturatingBinaryAAShiftRoundingMode;
+ defm vssra : RISCVSaturatingBinaryAAShiftRoundingMode;
- defm vnclipu : RISCVSaturatingBinaryABShift;
- defm vnclip : RISCVSaturatingBinaryABShift;
+ defm vnclipu : RISCVSaturatingBinaryABShiftRoundingMode;
+ defm vnclip : RISCVSaturatingBinaryABShiftRoundingMode;
defm vmfeq : RISCVCompare;
defm vmfne : RISCVCompare;
@@ -1382,13 +1601,13 @@ let TargetPrefix = "riscv" in {
defm vwredsumu : RISCVReduction;
defm vwredsum : RISCVReduction;
- defm vfredosum : RISCVReduction;
- defm vfredusum : RISCVReduction;
+ defm vfredosum : RISCVReductionRoundingMode;
+ defm vfredusum : RISCVReductionRoundingMode;
defm vfredmin : RISCVReduction;
defm vfredmax : RISCVReduction;
- defm vfwredusum : RISCVReduction;
- defm vfwredosum : RISCVReduction;
+ defm vfwredusum : RISCVReductionRoundingMode;
+ defm vfwredosum : RISCVReductionRoundingMode;
def int_riscv_vmand: RISCVBinaryAAAUnMasked;
def int_riscv_vmnand: RISCVBinaryAAAUnMasked;
@@ -1407,28 +1626,28 @@ let TargetPrefix = "riscv" in {
defm vmsof : RISCVMaskedUnaryMOut;
defm vmsif : RISCVMaskedUnaryMOut;
- defm vfcvt_xu_f_v : RISCVConversion;
- defm vfcvt_x_f_v : RISCVConversion;
+ defm vfcvt_xu_f_v : RISCVConversionRoundingMode;
+ defm vfcvt_x_f_v : RISCVConversionRoundingMode;
defm vfcvt_rtz_xu_f_v : RISCVConversion;
defm vfcvt_rtz_x_f_v : RISCVConversion;
- defm vfcvt_f_xu_v : RISCVConversion;
- defm vfcvt_f_x_v : RISCVConversion;
+ defm vfcvt_f_xu_v : RISCVConversionRoundingMode;
+ defm vfcvt_f_x_v : RISCVConversionRoundingMode;
defm vfwcvt_f_xu_v : RISCVConversion;
defm vfwcvt_f_x_v : RISCVConversion;
- defm vfwcvt_xu_f_v : RISCVConversion;
- defm vfwcvt_x_f_v : RISCVConversion;
+ defm vfwcvt_xu_f_v : RISCVConversionRoundingMode;
+ defm vfwcvt_x_f_v : RISCVConversionRoundingMode;
defm vfwcvt_rtz_xu_f_v : RISCVConversion;
defm vfwcvt_rtz_x_f_v : RISCVConversion;
defm vfwcvt_f_f_v : RISCVConversion;
- defm vfncvt_f_xu_w : RISCVConversion;
- defm vfncvt_f_x_w : RISCVConversion;
- defm vfncvt_xu_f_w : RISCVConversion;
- defm vfncvt_x_f_w : RISCVConversion;
+ defm vfncvt_f_xu_w : RISCVConversionRoundingMode;
+ defm vfncvt_f_x_w : RISCVConversionRoundingMode;
+ defm vfncvt_xu_f_w : RISCVConversionRoundingMode;
+ defm vfncvt_x_f_w : RISCVConversionRoundingMode;
defm vfncvt_rtz_xu_f_w : RISCVConversion;
defm vfncvt_rtz_x_f_w : RISCVConversion;
- defm vfncvt_f_f_w : RISCVConversion;
+ defm vfncvt_f_f_w : RISCVConversionRoundingMode;
defm vfncvt_rod_f_f_w : RISCVConversion;
// Output: (vector)
@@ -1493,7 +1712,7 @@ let TargetPrefix = "riscv" in {
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[NoCapture<ArgIndex<1>>, IntrWriteMem]>;
- // Segment loads for fixed vectors.
+ // Segment loads/stores for fixed vectors.
foreach nf = [2, 3, 4, 5, 6, 7, 8] in {
def int_riscv_seg # nf # _load
: DefaultAttrsIntrinsic<!listconcat([llvm_anyvector_ty],
@@ -1501,6 +1720,13 @@ let TargetPrefix = "riscv" in {
!add(nf, -1))),
[llvm_anyptr_ty, llvm_anyint_ty],
[NoCapture<ArgIndex<0>>, IntrReadMem]>;
+ def int_riscv_seg # nf # _store
+ : DefaultAttrsIntrinsic<[],
+ !listconcat([llvm_anyvector_ty],
+ !listsplat(LLVMMatchType<0>,
+ !add(nf, -1)),
+ [llvm_anyptr_ty, llvm_anyint_ty]),
+ [NoCapture<ArgIndex<nf>>, IntrWriteMem]>;
}
} // TargetPrefix = "riscv"
@@ -1513,14 +1739,9 @@ let TargetPrefix = "riscv" in {
let TargetPrefix = "riscv" in {
-class ScalarCryptoGprIntrinsicAny
- : DefaultAttrsIntrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]>;
-
class ScalarCryptoByteSelect32
: DefaultAttrsIntrinsic<[llvm_i32_ty],
- [llvm_i32_ty, llvm_i32_ty, llvm_i8_ty],
+ [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable,
ImmArg<ArgIndex<2>>]>;
@@ -1534,67 +1755,89 @@ class ScalarCryptoGprGprIntrinsic64
[llvm_i64_ty, llvm_i64_ty],
[IntrNoMem, IntrSpeculatable]>;
+class ScalarCryptoGprIntrinsic32
+ : DefaultAttrsIntrinsic<[llvm_i32_ty],
+ [llvm_i32_ty],
+ [IntrNoMem, IntrSpeculatable]>;
+
class ScalarCryptoGprIntrinsic64
: DefaultAttrsIntrinsic<[llvm_i64_ty],
[llvm_i64_ty],
[IntrNoMem, IntrSpeculatable]>;
-class ScalarCryptoByteSelectAny
- : DefaultAttrsIntrinsic<[llvm_anyint_ty],
- [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i8_ty],
- [IntrNoMem, IntrSpeculatable, ImmArg<ArgIndex<2>>]>;
-
// Zknd
-def int_riscv_aes32dsi : ScalarCryptoByteSelect32;
-def int_riscv_aes32dsmi : ScalarCryptoByteSelect32;
+def int_riscv_aes32dsi : ScalarCryptoByteSelect32,
+ ClangBuiltin<"__builtin_riscv_aes32dsi">;
+def int_riscv_aes32dsmi : ScalarCryptoByteSelect32,
+ ClangBuiltin<"__builtin_riscv_aes32dsmi">;
-def int_riscv_aes64ds : ScalarCryptoGprGprIntrinsic64;
-def int_riscv_aes64dsm : ScalarCryptoGprGprIntrinsic64;
+def int_riscv_aes64ds : ScalarCryptoGprGprIntrinsic64,
+ ClangBuiltin<"__builtin_riscv_aes64ds">;
+def int_riscv_aes64dsm : ScalarCryptoGprGprIntrinsic64,
+ ClangBuiltin<"__builtin_riscv_aes64dsm">;
-def int_riscv_aes64im : ScalarCryptoGprIntrinsic64;
+def int_riscv_aes64im : ScalarCryptoGprIntrinsic64,
+ ClangBuiltin<"__builtin_riscv_aes64im">;
// Zkne
-def int_riscv_aes32esi : ScalarCryptoByteSelect32;
-def int_riscv_aes32esmi : ScalarCryptoByteSelect32;
+def int_riscv_aes32esi : ScalarCryptoByteSelect32,
+ ClangBuiltin<"__builtin_riscv_aes32esi">;
+def int_riscv_aes32esmi : ScalarCryptoByteSelect32,
+ ClangBuiltin<"__builtin_riscv_aes32esmi">;
-def int_riscv_aes64es : ScalarCryptoGprGprIntrinsic64;
-def int_riscv_aes64esm : ScalarCryptoGprGprIntrinsic64;
+def int_riscv_aes64es : ScalarCryptoGprGprIntrinsic64,
+ ClangBuiltin<"__builtin_riscv_aes64es">;
+def int_riscv_aes64esm : ScalarCryptoGprGprIntrinsic64,
+ ClangBuiltin<"__builtin_riscv_aes64esm">;
// Zknd & Zkne
-def int_riscv_aes64ks2 : ScalarCryptoGprGprIntrinsic64;
+def int_riscv_aes64ks2 : ScalarCryptoGprGprIntrinsic64,
+ ClangBuiltin<"__builtin_riscv_aes64ks2">;
def int_riscv_aes64ks1i : DefaultAttrsIntrinsic<[llvm_i64_ty],
[llvm_i64_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable,
- ImmArg<ArgIndex<1>>]>;
+ ImmArg<ArgIndex<1>>]>,
+ ClangBuiltin<"__builtin_riscv_aes64ks1i">;
// Zknh
-def int_riscv_sha256sig0 : ScalarCryptoGprIntrinsicAny;
-def int_riscv_sha256sig1 : ScalarCryptoGprIntrinsicAny;
-def int_riscv_sha256sum0 : ScalarCryptoGprIntrinsicAny;
-def int_riscv_sha256sum1 : ScalarCryptoGprIntrinsicAny;
-
-def int_riscv_sha512sig0l : ScalarCryptoGprGprIntrinsic32;
-def int_riscv_sha512sig0h : ScalarCryptoGprGprIntrinsic32;
-def int_riscv_sha512sig1l : ScalarCryptoGprGprIntrinsic32;
-def int_riscv_sha512sig1h : ScalarCryptoGprGprIntrinsic32;
-def int_riscv_sha512sum0r : ScalarCryptoGprGprIntrinsic32;
-def int_riscv_sha512sum1r : ScalarCryptoGprGprIntrinsic32;
-
-def int_riscv_sha512sig0 : ScalarCryptoGprIntrinsic64;
-def int_riscv_sha512sig1 : ScalarCryptoGprIntrinsic64;
-def int_riscv_sha512sum0 : ScalarCryptoGprIntrinsic64;
-def int_riscv_sha512sum1 : ScalarCryptoGprIntrinsic64;
+def int_riscv_sha256sig0 : ScalarCryptoGprIntrinsic32;
+def int_riscv_sha256sig1 : ScalarCryptoGprIntrinsic32;
+def int_riscv_sha256sum0 : ScalarCryptoGprIntrinsic32;
+def int_riscv_sha256sum1 : ScalarCryptoGprIntrinsic32;
+
+def int_riscv_sha512sig0l : ScalarCryptoGprGprIntrinsic32,
+ ClangBuiltin<"__builtin_riscv_sha512sig0l">;
+def int_riscv_sha512sig0h : ScalarCryptoGprGprIntrinsic32,
+ ClangBuiltin<"__builtin_riscv_sha512sig0h">;
+def int_riscv_sha512sig1l : ScalarCryptoGprGprIntrinsic32,
+ ClangBuiltin<"__builtin_riscv_sha512sig1l">;
+def int_riscv_sha512sig1h : ScalarCryptoGprGprIntrinsic32,
+ ClangBuiltin<"__builtin_riscv_sha512sig1h">;
+def int_riscv_sha512sum0r : ScalarCryptoGprGprIntrinsic32,
+ ClangBuiltin<"__builtin_riscv_sha512sum0r">;
+def int_riscv_sha512sum1r : ScalarCryptoGprGprIntrinsic32,
+ ClangBuiltin<"__builtin_riscv_sha512sum1r">;
+
+def int_riscv_sha512sig0 : ScalarCryptoGprIntrinsic64,
+ ClangBuiltin<"__builtin_riscv_sha512sig0">;
+def int_riscv_sha512sig1 : ScalarCryptoGprIntrinsic64,
+ ClangBuiltin<"__builtin_riscv_sha512sig1">;
+def int_riscv_sha512sum0 : ScalarCryptoGprIntrinsic64,
+ ClangBuiltin<"__builtin_riscv_sha512sum0">;
+def int_riscv_sha512sum1 : ScalarCryptoGprIntrinsic64,
+ ClangBuiltin<"__builtin_riscv_sha512sum1">;
// Zksed
-def int_riscv_sm4ks : ScalarCryptoByteSelectAny;
-def int_riscv_sm4ed : ScalarCryptoByteSelectAny;
+def int_riscv_sm4ks : ScalarCryptoByteSelect32;
+def int_riscv_sm4ed : ScalarCryptoByteSelect32;
// Zksh
-def int_riscv_sm3p0 : ScalarCryptoGprIntrinsicAny;
-def int_riscv_sm3p1 : ScalarCryptoGprIntrinsicAny;
+def int_riscv_sm3p0 : ScalarCryptoGprIntrinsic32;
+def int_riscv_sm3p1 : ScalarCryptoGprIntrinsic32;
} // TargetPrefix = "riscv"
//===----------------------------------------------------------------------===//
// Vendor extensions
//===----------------------------------------------------------------------===//
include "llvm/IR/IntrinsicsRISCVXTHead.td"
+include "llvm/IR/IntrinsicsRISCVXsf.td"
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXTHead.td b/llvm/include/llvm/IR/IntrinsicsRISCVXTHead.td
index 8486b678022b..5af10a3e197a 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCVXTHead.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXTHead.td
@@ -1,3 +1,15 @@
+//===- IntrinsicsRISCVXTHead.td - T-Head intrinsics --------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the T-Head vendor intrinsics for RISC-V.
+//
+//===----------------------------------------------------------------------===//
+
let TargetPrefix = "riscv" in {
class TH_VdotTernaryWideMasked
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXsf.td b/llvm/include/llvm/IR/IntrinsicsRISCVXsf.td
new file mode 100644
index 000000000000..0c8da35491ce
--- /dev/null
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXsf.td
@@ -0,0 +1,135 @@
+//===- IntrinsicsRISCVXsf.td - SiFive intrinsics -----------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the SiFive vendor intrinsics for RISC-V.
+//
+//===----------------------------------------------------------------------===//
+
+class VCIXSuffix<string range> {
+ list<string> suffix = !cond(!eq(range, "c"): ["e8mf8", "e8mf4", "e8mf2", "e8m1", "e8m2", "e8m4", "e8m8"],
+ !eq(range, "s"): ["e16mf4", "e16mf2", "e16m1", "e16m2", "e16m4", "e16m8"],
+ !eq(range, "i"): ["e32mf2", "e32m1", "e32m2", "e32m4", "e32m8"],
+ !eq(range, "l"): ["e64m1", "e64m2", "e64m4", "e64m8"]);
+}
+
+let TargetPrefix = "riscv" in {
+ // Output: (vector_out) or ()
+ // Input: (bit<27-26>, bit<24-20>, scalar_in, vl) or
+ // (bit<27-26>, bit<24-20>, bit<11-7>, scalar_in, vl)
+ class RISCVSFCustomVC_X<bit HasDst, bit HasSE, bit ImmScalar>
+ : Intrinsic<!if(HasDst, [llvm_anyvector_ty], []),
+ !listconcat(!if(HasDst, [llvm_anyint_ty, LLVMMatchType<1>],
+ [llvm_anyint_ty, LLVMMatchType<0>, LLVMMatchType<0>]),
+ [llvm_any_ty, llvm_anyint_ty]),
+ !listconcat([IntrNoMem, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>], // bit<27-26> and bit<24-20>
+ !if(HasDst, [], [ImmArg<ArgIndex<2>>]), // Vd or bit<11-7>
+ !if(ImmScalar, !if(HasDst, [ImmArg<ArgIndex<2>>],
+ [ImmArg<ArgIndex<3>>]), []), // ScalarOperand
+ !if(HasSE, [IntrHasSideEffects], []))>,
+ RISCVVIntrinsic {
+ let ScalarOperand = !cond(ImmScalar: NoScalarOperand,
+ HasDst: 2,
+ true: 3);
+ let VLOperand = !if(HasDst, 3, 4);
+ }
+ // Output: (vector_out) or ()
+ // Input: (bit<27-26>, vector_in, vector_in/scalar_in, vl) or
+ // (bit<27-26>, bit<11-7>, vector_in, vector_in/scalar_in, vl)
+ class RISCVSFCustomVC_XV<bit HasDst, bit HasSE, bit ImmScalar>
+ : Intrinsic<!if(HasDst, [llvm_anyvector_ty], []),
+ !listconcat(!if(HasDst, [llvm_anyint_ty, LLVMMatchType<0>],
+ [llvm_anyint_ty, LLVMMatchType<0>, llvm_anyvector_ty]),
+ [llvm_any_ty, llvm_anyint_ty]),
+ !listconcat([IntrNoMem, ImmArg<ArgIndex<0>>], // bit<27-26>
+ !if(HasDst, [], [ImmArg<ArgIndex<1>>]), // Vd or bit<11-7>
+ !if(ImmScalar, !if(HasDst, [ImmArg<ArgIndex<2>>],
+ [ImmArg<ArgIndex<3>>]), []), // ScalarOperand
+ !if(HasSE, [IntrHasSideEffects], []))>,
+ RISCVVIntrinsic {
+ let ScalarOperand = !cond(ImmScalar: NoScalarOperand,
+ HasDst: 2,
+ true: 3);
+ let VLOperand = !if(HasDst, 3, 4);
+ }
+ // Output: (vector_out) or ()
+ // Input: (bit<27-26>, passthru, vector_in, vector_in/scalar_in, vl) or
+ // (bit<27-26>, vector_in, vector_in, vector_in/scalar_in, vl)
+ class RISCVSFCustomVC_XVV<bit HasDst, bit HasSE, bit ImmScalar>
+ : Intrinsic<!if(HasDst, [llvm_anyvector_ty], []),
+ !listconcat(!if(HasDst, [llvm_anyint_ty, LLVMMatchType<0>, LLVMMatchType<0>],
+ [llvm_anyint_ty, llvm_anyvector_ty, LLVMMatchType<1>]),
+ [llvm_any_ty, llvm_anyint_ty]),
+ !listconcat([IntrNoMem, ImmArg<ArgIndex<0>>], // bit<27-26>
+ !if(ImmScalar, [ImmArg<ArgIndex<3>>], []), // ScalarOperand
+ !if(HasSE, [IntrHasSideEffects], []))>,
+ RISCVVIntrinsic {
+ let ScalarOperand = !if(ImmScalar, NoScalarOperand, 3);
+ let VLOperand = 4;
+ }
+ // Output: (wvector_out) or ()
+ // Input: (bit<27-26>, passthru, vector_in, vector_in/scalar_in, vl) or
+ // (bit<27-26>, wvector_in, vector_in, vector_in/scalar_in, vl)
+ class RISCVSFCustomVC_XVW<bit HasDst, bit HasSE, bit ImmScalar>
+ : Intrinsic<!if(HasDst, [llvm_anyvector_ty], []),
+ !listconcat(!if(HasDst, [llvm_anyint_ty, LLVMMatchType<0>, llvm_anyvector_ty],
+ [llvm_anyint_ty, llvm_anyvector_ty, llvm_anyvector_ty]),
+ [llvm_any_ty, llvm_anyint_ty]),
+ !listconcat([IntrNoMem, ImmArg<ArgIndex<0>>], // bit<27-26>
+ !if(ImmScalar, [ImmArg<ArgIndex<3>>], []), // ScalarOperand
+ !if(HasSE, [IntrHasSideEffects], []))>,
+ RISCVVIntrinsic {
+ let ScalarOperand = !if(ImmScalar, NoScalarOperand, 3);
+ let VLOperand = 4;
+ }
+
+ multiclass RISCVSFCustomVC_X<list<string> type> {
+ foreach t = type in {
+ defvar ImmScalar = !eq(t, "i");
+ defvar range = ["c", "s", "i", "l"];
+ foreach r = range in {
+ foreach s = VCIXSuffix<r>.suffix in {
+ def "int_riscv_sf_vc_" # t # "_se_" # s : RISCVSFCustomVC_X</*HasDst*/0, /*HasSE*/1, ImmScalar>;
+ }
+ }
+ def "int_riscv_sf_vc_v_" # t # "_se" : RISCVSFCustomVC_X</*HasDst*/1, /*HasSE*/1, ImmScalar>;
+ def "int_riscv_sf_vc_v_" # t : RISCVSFCustomVC_X</*HasDst*/1, /*HasSE*/0, ImmScalar>;
+ }
+ }
+
+ multiclass RISCVSFCustomVC_XV<list<string> type> {
+ foreach t = type in {
+ defvar ImmScalar = !eq(t, "i");
+ def "int_riscv_sf_vc_" # t # "v_se" : RISCVSFCustomVC_XV</*HasDst*/0, /*HasSE*/1, ImmScalar>;
+ def "int_riscv_sf_vc_v_" # t # "v_se" : RISCVSFCustomVC_XV</*HasDst*/1, /*HasSE*/1, ImmScalar>;
+ def "int_riscv_sf_vc_v_" # t # "v" : RISCVSFCustomVC_XV</*HasDst*/1, /*HasSE*/0, ImmScalar>;
+ }
+ }
+
+ multiclass RISCVSFCustomVC_XVV<list<string> type> {
+ foreach t = type in {
+ defvar ImmScalar = !eq(t, "i");
+ def "int_riscv_sf_vc_" # t # "vv_se" : RISCVSFCustomVC_XVV</*HasDst*/0, /*HasSE*/1, ImmScalar>;
+ def "int_riscv_sf_vc_v_" # t # "vv_se" : RISCVSFCustomVC_XVV</*HasDst*/1, /*HasSE*/1, ImmScalar>;
+ def "int_riscv_sf_vc_v_" # t # "vv" : RISCVSFCustomVC_XVV</*HasDst*/1, /*HasSE*/0, ImmScalar>;
+ }
+ }
+
+ multiclass RISCVSFCustomVC_XVW<list<string> type> {
+ foreach t = type in {
+ defvar ImmScalar = !eq(t, "i");
+ def "int_riscv_sf_vc_" # t # "vw_se" : RISCVSFCustomVC_XVW</*HasDst*/0, /*HasSE*/1, ImmScalar>;
+ def "int_riscv_sf_vc_v_" # t # "vw_se" : RISCVSFCustomVC_XVW</*HasDst*/1, /*HasSE*/1, ImmScalar>;
+ def "int_riscv_sf_vc_v_" # t # "vw" : RISCVSFCustomVC_XVW</*HasDst*/1, /*HasSE*/0, ImmScalar>;
+ }
+ }
+
+ defm "" : RISCVSFCustomVC_X<["x", "i"]>;
+ defm "" : RISCVSFCustomVC_XV<["x", "i", "v", "f"]>;
+ defm "" : RISCVSFCustomVC_XVV<["x", "i", "v", "f"]>;
+ defm "" : RISCVSFCustomVC_XVW<["x", "i", "v", "f"]>;
+} // TargetPrefix = "riscv"
diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
index bf90aa0c0402..2b8602f430df 100644
--- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td
+++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td
@@ -31,4 +31,5 @@ let TargetPrefix = "spv" in {
def int_spv_cmpxchg : Intrinsic<[llvm_i32_ty], [llvm_any_ty, llvm_vararg_ty]>;
def int_spv_unreachable : Intrinsic<[], []>;
def int_spv_alloca : Intrinsic<[llvm_any_ty], []>;
+ def int_spv_undef : Intrinsic<[llvm_i32_ty], []>;
}
diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
index d881a1126bf2..9d21f3eb5352 100644
--- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
@@ -222,7 +222,7 @@ let TargetPrefix = "s390" in {
def int_s390_etnd : ClangBuiltin<"__builtin_tx_nesting_depth">,
Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
- def int_s390_ntstg : Intrinsic<[], [llvm_i64_ty, llvm_ptr64_ty],
+ def int_s390_ntstg : Intrinsic<[], [llvm_i64_ty, llvm_ptr_ty],
[IntrArgMemOnly, IntrWriteMem]>;
def int_s390_ppa_txassist : ClangBuiltin<"__builtin_tx_assist">,
diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
index aa007d928643..b93a5e7be1b5 100644
--- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
+++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
// Type definition for a table in an intrinsic
-def llvm_table_ty : LLVMQualPointerType<llvm_i8_ty, 1>;
+def llvm_table_ty : LLVMQualPointerType<1>;
let TargetPrefix = "wasm" in { // All intrinsics start with "llvm.wasm.".
@@ -144,18 +144,18 @@ def int_wasm_lsda : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
// These don't use default attributes, because they are not nosync.
def int_wasm_memory_atomic_wait32 :
Intrinsic<[llvm_i32_ty],
- [LLVMPointerType<llvm_i32_ty>, llvm_i32_ty, llvm_i64_ty],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i64_ty],
[IntrInaccessibleMemOrArgMemOnly, ReadOnly<ArgIndex<0>>,
NoCapture<ArgIndex<0>>, IntrHasSideEffects],
"", [SDNPMemOperand]>;
def int_wasm_memory_atomic_wait64 :
Intrinsic<[llvm_i32_ty],
- [LLVMPointerType<llvm_i64_ty>, llvm_i64_ty, llvm_i64_ty],
+ [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty],
[IntrInaccessibleMemOrArgMemOnly, ReadOnly<ArgIndex<0>>,
NoCapture<ArgIndex<0>>, IntrHasSideEffects],
"", [SDNPMemOperand]>;
def int_wasm_memory_atomic_notify:
- Intrinsic<[llvm_i32_ty], [LLVMPointerType<llvm_i32_ty>, llvm_i32_ty],
+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
[IntrInaccessibleMemOnly, NoCapture<ArgIndex<0>>,
IntrHasSideEffects],
"", [SDNPMemOperand]>;
@@ -175,7 +175,15 @@ def int_wasm_shuffle :
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ [IntrNoMem, IntrSpeculatable,
+ ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>,
+ ImmArg<ArgIndex<4>>, ImmArg<ArgIndex<5>>,
+ ImmArg<ArgIndex<6>>, ImmArg<ArgIndex<7>>,
+ ImmArg<ArgIndex<8>>, ImmArg<ArgIndex<9>>,
+ ImmArg<ArgIndex<10>>, ImmArg<ArgIndex<11>>,
+ ImmArg<ArgIndex<12>>, ImmArg<ArgIndex<13>>,
+ ImmArg<ArgIndex<14>>, ImmArg<ArgIndex<15>>,
+ ImmArg<ArgIndex<16>>, ImmArg<ArgIndex<17>>]>;
def int_wasm_sub_sat_signed :
DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 239f15809e29..57cd1dc47bd9 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -2053,6 +2053,67 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
DefaultAttrsIntrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
+
+ def int_x86_avx2_vpdpwsud_128
+ : ClangBuiltin<"__builtin_ia32_vpdpwsud128">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx2_vpdpwsud_256
+ : ClangBuiltin<"__builtin_ia32_vpdpwsud256">,
+ DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx2_vpdpwsuds_128
+ : ClangBuiltin<"__builtin_ia32_vpdpwsuds128">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx2_vpdpwsuds_256
+ : ClangBuiltin<"__builtin_ia32_vpdpwsuds256">,
+ DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx2_vpdpwusd_128
+ : ClangBuiltin<"__builtin_ia32_vpdpwusd128">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx2_vpdpwusd_256
+ : ClangBuiltin<"__builtin_ia32_vpdpwusd256">,
+ DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx2_vpdpwusds_128
+ : ClangBuiltin<"__builtin_ia32_vpdpwusds128">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx2_vpdpwusds_256
+ : ClangBuiltin<"__builtin_ia32_vpdpwusds256">,
+ DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx2_vpdpwuud_128
+ : ClangBuiltin<"__builtin_ia32_vpdpwuud128">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx2_vpdpwuud_256
+ : ClangBuiltin<"__builtin_ia32_vpdpwuud256">,
+ DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx2_vpdpwuuds_128
+ : ClangBuiltin<"__builtin_ia32_vpdpwuuds128">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_avx2_vpdpwuuds_256
+ : ClangBuiltin<"__builtin_ia32_vpdpwuuds256">,
+ DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty],
+ [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
@@ -2558,7 +2619,7 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_x86mmx_ty], [IntrNoMem]>;
def int_x86_mmx_movnt_dq : ClangBuiltin<"__builtin_ia32_movntq">,
- Intrinsic<[], [llvm_ptrx86mmx_ty, llvm_x86mmx_ty], []>;
+ Intrinsic<[], [llvm_ptr_ty, llvm_x86mmx_ty], []>;
def int_x86_mmx_palignr_b : ClangBuiltin<"__builtin_ia32_palignr">,
DefaultAttrsIntrinsic<[llvm_x86mmx_ty],
@@ -5106,6 +5167,20 @@ let TargetPrefix = "x86" in {
}
//===----------------------------------------------------------------------===//
+// SHA512 intrinsics
+let TargetPrefix = "x86" in {
+def int_x86_vsha512msg1 : ClangBuiltin<"__builtin_ia32_vsha512msg1">,
+ DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+def int_x86_vsha512msg2 : ClangBuiltin<"__builtin_ia32_vsha512msg2">,
+ DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
+ [IntrNoMem]>;
+def int_x86_vsha512rnds2 : ClangBuiltin<"__builtin_ia32_vsha512rnds2">,
+ DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+}
+
+//===----------------------------------------------------------------------===//
// Thread synchronization ops with timer.
let TargetPrefix = "x86" in {
def int_x86_monitorx
@@ -5352,6 +5427,16 @@ let TargetPrefix = "x86" in {
Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
[ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
+ // AMX-COMPLEX
+ def int_x86_tcmmimfp16ps : ClangBuiltin<"__builtin_ia32_tcmmimfp16ps">,
+ Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>]>;
+ def int_x86_tcmmrlfp16ps : ClangBuiltin<"__builtin_ia32_tcmmrlfp16ps">,
+ Intrinsic<[], [llvm_i8_ty, llvm_i8_ty, llvm_i8_ty],
+ [ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<1>>,
+ ImmArg<ArgIndex<2>>]>;
+
// AMX - internal intrinsics
def int_x86_ldtilecfg_internal :
ClangBuiltin<"__builtin_ia32_tile_loadconfig_internal">,
@@ -5410,10 +5495,25 @@ let TargetPrefix = "x86" in {
[llvm_i16_ty, llvm_i16_ty, llvm_i16_ty,
llvm_x86amx_ty, llvm_x86amx_ty,
llvm_x86amx_ty], []>;
+ // the vector size can be smaller than AMX register size (1024 bytes)
def int_x86_cast_vector_to_tile:
DefaultAttrsIntrinsic<[llvm_x86amx_ty], [llvm_anyvector_ty], [IntrNoMem]>;
+ // the vector size can be smaller than AMX register size (1024 bytes)
def int_x86_cast_tile_to_vector:
DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_x86amx_ty], [IntrNoMem]>;
+
+ def int_x86_tcmmimfp16ps_internal :
+ ClangBuiltin<"__builtin_ia32_tcmmimfp16ps_internal">,
+ Intrinsic<[llvm_x86amx_ty],
+ [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty,
+ llvm_x86amx_ty, llvm_x86amx_ty,
+ llvm_x86amx_ty], []>;
+ def int_x86_tcmmrlfp16ps_internal :
+ ClangBuiltin<"__builtin_ia32_tcmmrlfp16ps_internal">,
+ Intrinsic<[llvm_x86amx_ty],
+ [llvm_i16_ty, llvm_i16_ty, llvm_i16_ty,
+ llvm_x86amx_ty, llvm_x86amx_ty,
+ llvm_x86amx_ty], []>;
}
//===----------------------------------------------------------------------===//
@@ -5488,6 +5588,49 @@ def int_x86_vcvtneps2bf16256
DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8f32_ty], [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
+// SM3 intrinsics
+let TargetPrefix = "x86" in {
+ def int_x86_vsm3msg1
+ : ClangBuiltin<"__builtin_ia32_vsm3msg1">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_vsm3msg2
+ : ClangBuiltin<"__builtin_ia32_vsm3msg2">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_vsm3rnds2
+ : ClangBuiltin<"__builtin_ia32_vsm3rnds2">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty],
+ [ImmArg<ArgIndex<3>>, IntrNoMem]>;
+}
+//===----------------------------------------------------------------------===//
+// SM4 intrinsics
+let TargetPrefix = "x86" in {
+ def int_x86_vsm4key4128
+ : ClangBuiltin<"__builtin_ia32_vsm4key4128">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_vsm4key4256
+ : ClangBuiltin<"__builtin_ia32_vsm4key4256">,
+ DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty],
+ [IntrNoMem]>;
+ def int_x86_vsm4rnds4128
+ : ClangBuiltin<"__builtin_ia32_vsm4rnds4128">,
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_v4i32_ty],
+ [IntrNoMem]>;
+ def int_x86_vsm4rnds4256
+ : ClangBuiltin<"__builtin_ia32_vsm4rnds4256">,
+ DefaultAttrsIntrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v8i32_ty],
+ [IntrNoMem]>;
+}
+//===----------------------------------------------------------------------===//
// RAO-INT intrinsics
let TargetPrefix = "x86" in {
def int_x86_aadd32
diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h
index a8095a5c2fd3..e5786afd7221 100644
--- a/llvm/include/llvm/IR/LLVMContext.h
+++ b/llvm/include/llvm/IR/LLVMContext.h
@@ -68,7 +68,7 @@ class LLVMContext {
public:
LLVMContextImpl *const pImpl;
LLVMContext();
- LLVMContext(LLVMContext &) = delete;
+ LLVMContext(const LLVMContext &) = delete;
LLVMContext &operator=(const LLVMContext &) = delete;
~LLVMContext();
@@ -95,6 +95,7 @@ public:
OB_clang_arc_attachedcall = 6, // "clang.arc.attachedcall"
OB_ptrauth = 7, // "ptrauth"
OB_kcfi = 8, // "kcfi"
+ OB_convergencectrl = 9, // "convergencectrl"
};
/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
@@ -315,9 +316,11 @@ public:
/// times, but only with the same value. Note that creating a pointer type or
/// otherwise querying the opaque pointer mode performs an implicit set to
/// the default value.
+ [[deprecated("Opaque pointers are always enabled")]]
void setOpaquePointers(bool Enable) const;
/// Whether typed pointers are supported. If false, all pointers are opaque.
+ [[deprecated("Always returns false")]]
bool supportsTypedPointers() const;
private:
diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h
index bd542bd0d2b2..39165453de16 100644
--- a/llvm/include/llvm/IR/MDBuilder.h
+++ b/llvm/include/llvm/IR/MDBuilder.h
@@ -78,7 +78,7 @@ public:
MDNode *createFunctionSectionPrefix(StringRef Prefix);
/// Return metadata containing the pseudo probe descriptor for a function.
- MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, Function *F);
+ MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, StringRef FName);
/// Return metadata containing llvm statistics.
MDNode *
diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h
index 954681e24156..9659dbe4f281 100644
--- a/llvm/include/llvm/IR/Metadata.h
+++ b/llvm/include/llvm/IR/Metadata.h
@@ -789,6 +789,13 @@ public:
Op.MD = nullptr;
return *this;
}
+
+ // Check if MDOperand is of type MDString and equals `Str`.
+ bool equalsStr(StringRef Str) const {
+ return isa<MDString>(this->get()) &&
+ cast<MDString>(this->get())->getString() == Str;
+ }
+
~MDOperand() { untrack(); }
Metadata *get() const { return MD; }
@@ -861,18 +868,18 @@ public:
/// Whether this contains RAUW support.
bool hasReplaceableUses() const {
- return Ptr.is<ReplaceableMetadataImpl *>();
+ return isa<ReplaceableMetadataImpl *>(Ptr);
}
LLVMContext &getContext() const {
if (hasReplaceableUses())
return getReplaceableUses()->getContext();
- return *Ptr.get<LLVMContext *>();
+ return *cast<LLVMContext *>(Ptr);
}
ReplaceableMetadataImpl *getReplaceableUses() const {
if (hasReplaceableUses())
- return Ptr.get<ReplaceableMetadataImpl *>();
+ return cast<ReplaceableMetadataImpl *>(Ptr);
return nullptr;
}
@@ -1274,6 +1281,11 @@ private:
template <class NodeTy>
static void dispatchResetHash(NodeTy *, std::false_type) {}
+ /// Merge branch weights from two direct callsites.
+ static MDNode *mergeDirectCallProfMetadata(MDNode *A, MDNode *B,
+ const Instruction *AInstr,
+ const Instruction *BInstr);
+
public:
using op_iterator = const MDOperand *;
using op_range = iterator_range<op_iterator>;
@@ -1319,6 +1331,11 @@ public:
static MDNode *getMostGenericRange(MDNode *A, MDNode *B);
static MDNode *getMostGenericAliasScope(MDNode *A, MDNode *B);
static MDNode *getMostGenericAlignmentOrDereferenceable(MDNode *A, MDNode *B);
+ /// Merge !prof metadata from two instructions.
+ /// Currently only implemented with direct callsites with branch weights.
+ static MDNode *getMergedProfMetadata(MDNode *A, MDNode *B,
+ const Instruction *AInstr,
+ const Instruction *BInstr);
};
/// Tuple of metadata.
diff --git a/llvm/include/llvm/IR/Module.h b/llvm/include/llvm/IR/Module.h
index cd71a848addb..670a40b28eab 100644
--- a/llvm/include/llvm/IR/Module.h
+++ b/llvm/include/llvm/IR/Module.h
@@ -542,6 +542,24 @@ public:
llvm::Error materializeMetadata();
+ /// Detach global variable \p GV from the list but don't delete it.
+ void removeGlobalVariable(GlobalVariable *GV) { GlobalList.remove(GV); }
+ /// Remove global variable \p GV from the list and delete it.
+ void eraseGlobalVariable(GlobalVariable *GV) { GlobalList.erase(GV); }
+ /// Insert global variable \p GV at the end of the global variable list and
+ /// take ownership.
+ void insertGlobalVariable(GlobalVariable *GV) {
+ insertGlobalVariable(GlobalList.end(), GV);
+ }
+ /// Insert global variable \p GV into the global variable list before \p
+ /// Where and take ownership.
+ void insertGlobalVariable(GlobalListType::iterator Where, GlobalVariable *GV) {
+ GlobalList.insert(Where, GV);
+ }
+ // Use global_size() to get the total number of global variables.
+ // Use globals() to get the range of all global variables.
+
+private:
/// @}
/// @name Direct access to the globals list, functions list, and symbol table
/// @{
@@ -554,7 +572,9 @@ public:
static GlobalListType Module::*getSublistAccess(GlobalVariable*) {
return &Module::GlobalList;
}
+ friend class llvm::SymbolTableListTraits<llvm::GlobalVariable>;
+public:
/// Get the Module's list of functions (constant).
const FunctionListType &getFunctionList() const { return FunctionList; }
/// Get the Module's list of functions.
@@ -563,6 +583,36 @@ public:
return &Module::FunctionList;
}
+ /// Detach \p Alias from the list but don't delete it.
+ void removeAlias(GlobalAlias *Alias) { AliasList.remove(Alias); }
+ /// Remove \p Alias from the list and delete it.
+ void eraseAlias(GlobalAlias *Alias) { AliasList.erase(Alias); }
+ /// Insert \p Alias at the end of the alias list and take ownership.
+ void insertAlias(GlobalAlias *Alias) { AliasList.insert(AliasList.end(), Alias); }
+ // Use alias_size() to get the size of AliasList.
+ // Use aliases() to get a range of all Alias objects in AliasList.
+
+ /// Detach \p IFunc from the list but don't delete it.
+ void removeIFunc(GlobalIFunc *IFunc) { IFuncList.remove(IFunc); }
+ /// Remove \p IFunc from the list and delete it.
+ void eraseIFunc(GlobalIFunc *IFunc) { IFuncList.erase(IFunc); }
+ /// Insert \p IFunc at the end of the alias list and take ownership.
+ void insertIFunc(GlobalIFunc *IFunc) { IFuncList.push_back(IFunc); }
+ // Use ifunc_size() to get the number of functions in IFuncList.
+ // Use ifuncs() to get the range of all IFuncs.
+
+ /// Detach \p MDNode from the list but don't delete it.
+ void removeNamedMDNode(NamedMDNode *MDNode) { NamedMDList.remove(MDNode); }
+ /// Remove \p MDNode from the list and delete it.
+ void eraseNamedMDNode(NamedMDNode *MDNode) { NamedMDList.erase(MDNode); }
+ /// Insert \p MDNode at the end of the alias list and take ownership.
+ void insertNamedMDNode(NamedMDNode *MDNode) {
+ NamedMDList.push_back(MDNode);
+ }
+ // Use named_metadata_size() to get the size of the named meatadata list.
+ // Use named_metadata() to get the range of all named metadata.
+
+private: // Please use functions like insertAlias(), removeAlias() etc.
/// Get the Module's list of aliases (constant).
const AliasListType &getAliasList() const { return AliasList; }
/// Get the Module's list of aliases.
@@ -571,6 +621,7 @@ public:
static AliasListType Module::*getSublistAccess(GlobalAlias*) {
return &Module::AliasList;
}
+ friend class llvm::SymbolTableListTraits<llvm::GlobalAlias>;
/// Get the Module's list of ifuncs (constant).
const IFuncListType &getIFuncList() const { return IFuncList; }
@@ -580,6 +631,7 @@ public:
static IFuncListType Module::*getSublistAccess(GlobalIFunc*) {
return &Module::IFuncList;
}
+ friend class llvm::SymbolTableListTraits<llvm::GlobalIFunc>;
/// Get the Module's list of named metadata (constant).
const NamedMDListType &getNamedMDList() const { return NamedMDList; }
@@ -590,6 +642,7 @@ public:
return &Module::NamedMDList;
}
+public:
/// Get the symbol table of global variable and function identifiers
const ValueSymbolTable &getValueSymbolTable() const { return *ValSymTab; }
/// Get the Module's symbol table of global variable and function identifiers.
@@ -892,6 +945,11 @@ public:
/// Set that PLT should be avoid for RTLib calls.
void setRtLibUseGOT();
+ /// Get/set whether referencing global variables can use direct access
+ /// relocations on ELF targets.
+ bool getDirectAccessExternalData() const;
+ void setDirectAccessExternalData(bool Value);
+
/// Get/set whether synthesized functions should get the uwtable attribute.
UWTableKind getUwtable() const;
void setUwtable(UWTableKind Kind);
@@ -923,6 +981,8 @@ public:
unsigned getOverrideStackAlignment() const;
void setOverrideStackAlignment(unsigned Align);
+ unsigned getMaxTLSAlignment() const;
+
/// @name Utility functions for querying and setting the build SDK version
/// @{
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index e5236523a522..91a0133500eb 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -147,7 +147,7 @@ struct alignas(8) GlobalValueSummaryInfo {
StringRef Name;
} U;
- GlobalValueSummaryInfo(bool HaveGVs) : U(HaveGVs) {}
+ inline GlobalValueSummaryInfo(bool HaveGVs);
/// List of global value summary structures for a particular value held
/// in the GlobalValueMap. Requires a vector in the case of multiple
@@ -315,12 +315,39 @@ struct CallsiteInfo {
StackIdIndices(std::move(StackIdIndices)) {}
};
+inline raw_ostream &operator<<(raw_ostream &OS, const CallsiteInfo &SNI) {
+ OS << "Callee: " << SNI.Callee;
+ bool First = true;
+ OS << " Clones: ";
+ for (auto V : SNI.Clones) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ OS << V;
+ }
+ First = true;
+ OS << " StackIds: ";
+ for (auto Id : SNI.StackIdIndices) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ OS << Id;
+ }
+ return OS;
+}
+
// Allocation type assigned to an allocation reached by a given context.
-// More can be added but initially this is just noncold and cold.
+// More can be added, now this is cold, notcold and hot.
// Values should be powers of two so that they can be ORed, in particular to
// track allocations that have different behavior with different calling
// contexts.
-enum class AllocationType : uint8_t { None = 0, NotCold = 1, Cold = 2 };
+enum class AllocationType : uint8_t {
+ None = 0,
+ NotCold = 1,
+ Cold = 2,
+ Hot = 4,
+ All = 7 // This should always be set to the OR of all values.
+};
/// Summary of a single MIB in a memprof metadata on allocations.
struct MIBInfo {
@@ -337,6 +364,19 @@ struct MIBInfo {
: AllocType(AllocType), StackIdIndices(std::move(StackIdIndices)) {}
};
+inline raw_ostream &operator<<(raw_ostream &OS, const MIBInfo &MIB) {
+ OS << "AllocType " << (unsigned)MIB.AllocType;
+ bool First = true;
+ OS << " StackIds: ";
+ for (auto Id : MIB.StackIdIndices) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ OS << Id;
+ }
+ return OS;
+}
+
/// Summary of memprof metadata on allocations.
struct AllocInfo {
// Used to record whole program analysis cloning decisions.
@@ -359,6 +399,22 @@ struct AllocInfo {
: Versions(std::move(Versions)), MIBs(std::move(MIBs)) {}
};
+inline raw_ostream &operator<<(raw_ostream &OS, const AllocInfo &AE) {
+ bool First = true;
+ OS << "Versions: ";
+ for (auto V : AE.Versions) {
+ if (!First)
+ OS << ", ";
+ First = false;
+ OS << (unsigned)V;
+ }
+ OS << " MIB:\n";
+ for (auto &M : AE.MIBs) {
+ OS << "\t\t" << M << "\n";
+ }
+ return OS;
+}
+
/// Function and variable summary information to aid decisions and
/// implementation of importing.
class GlobalValueSummary {
@@ -519,6 +575,8 @@ public:
friend class ModuleSummaryIndex;
};
+GlobalValueSummaryInfo::GlobalValueSummaryInfo(bool HaveGVs) : U(HaveGVs) {}
+
/// Alias summary information.
class AliasSummary : public GlobalValueSummary {
ValueInfo AliaseeValueInfo;
@@ -938,12 +996,22 @@ public:
return {};
}
+ CallsitesTy &mutableCallsites() {
+ assert(Callsites);
+ return *Callsites;
+ }
+
ArrayRef<AllocInfo> allocs() const {
if (Allocs)
return *Allocs;
return {};
}
+ AllocsTy &mutableAllocs() {
+ assert(Allocs);
+ return *Allocs;
+ }
+
friend struct GraphTraits<ValueInfo>;
};
@@ -1240,6 +1308,9 @@ private:
/// Indicates that summary-based synthetic entry count propagation has run
bool HasSyntheticEntryCounts = false;
+ /// Indicates that we linked with allocator supporting hot/cold new operators.
+ bool WithSupportsHotColdNew = false;
+
/// Indicates that distributed backend should skip compilation of the
/// module. Flag is suppose to be set by distributed ThinLTO indexing
/// when it detected that the module is not needed during the final
@@ -1256,6 +1327,9 @@ private:
// True if the index was created for a module compiled with -fsplit-lto-unit.
bool EnableSplitLTOUnit;
+ // True if the index was created for a module compiled with -funified-lto
+ bool UnifiedLTO;
+
// True if some of the modules were compiled with -fsplit-lto-unit and
// some were not. Set when the combined index is created during the thin link.
bool PartiallySplitLTOUnits = false;
@@ -1273,6 +1347,11 @@ private:
// The total number of basic blocks in the module in the per-module summary or
// the total number of basic blocks in the LTO unit in the combined index.
+ // FIXME: Putting this in the distributed ThinLTO index files breaks LTO
+ // backend caching on any BB change to any linked file. It is currently not
+ // used except in the case of a SamplePGO partial profile, and should be
+ // reevaluated/redesigned to allow more effective incremental builds in that
+ // case.
uint64_t BlockCount;
// List of unique stack ids (hashes). We use a 4B index of the id in the
@@ -1296,9 +1375,10 @@ private:
public:
// See HaveGVs variable comment.
- ModuleSummaryIndex(bool HaveGVs, bool EnableSplitLTOUnit = false)
- : HaveGVs(HaveGVs), EnableSplitLTOUnit(EnableSplitLTOUnit), Saver(Alloc),
- BlockCount(0) {}
+ ModuleSummaryIndex(bool HaveGVs, bool EnableSplitLTOUnit = false,
+ bool UnifiedLTO = false)
+ : HaveGVs(HaveGVs), EnableSplitLTOUnit(EnableSplitLTOUnit),
+ UnifiedLTO(UnifiedLTO), Saver(Alloc), BlockCount(0) {}
// Current version for the module summary in bitcode files.
// The BitcodeSummaryVersion should be bumped whenever we introduce changes
@@ -1443,6 +1523,9 @@ public:
bool hasSyntheticEntryCounts() const { return HasSyntheticEntryCounts; }
void setHasSyntheticEntryCounts() { HasSyntheticEntryCounts = true; }
+ bool withSupportsHotColdNew() const { return WithSupportsHotColdNew; }
+ void setWithSupportsHotColdNew() { WithSupportsHotColdNew = true; }
+
bool skipModuleByDistributedBackend() const {
return SkipModuleByDistributedBackend;
}
@@ -1453,6 +1536,9 @@ public:
bool enableSplitLTOUnit() const { return EnableSplitLTOUnit; }
void setEnableSplitLTOUnit() { EnableSplitLTOUnit = true; }
+ bool hasUnifiedLTO() const { return UnifiedLTO; }
+ void setUnifiedLTO() { UnifiedLTO = true; }
+
bool partiallySplitLTOUnits() const { return PartiallySplitLTOUnits; }
void setPartiallySplitLTOUnits() { PartiallySplitLTOUnits = true; }
@@ -1649,6 +1735,13 @@ public:
return &*It;
}
+ /// Return module entry for module with the given \p ModPath.
+ const ModuleInfo *getModule(StringRef ModPath) const {
+ auto It = ModulePathStringTable.find(ModPath);
+ assert(It != ModulePathStringTable.end() && "Module not registered");
+ return &*It;
+ }
+
/// Check if the given Module has any functions available for exporting
/// in the index. We consider any module present in the ModulePathStringTable
/// to have exported functions.
@@ -1745,7 +1838,7 @@ public:
void propagateAttributes(const DenseSet<GlobalValue::GUID> &PreservedSymbols);
/// Checks if we can import global variable from another module.
- bool canImportGlobalVar(GlobalValueSummary *S, bool AnalyzeRefs) const;
+ bool canImportGlobalVar(const GlobalValueSummary *S, bool AnalyzeRefs) const;
};
/// GraphTraits definition to build SCC for the index
diff --git a/llvm/include/llvm/IR/OptBisect.h b/llvm/include/llvm/IR/OptBisect.h
index 6ebb9bec7257..2987e5ad90c4 100644
--- a/llvm/include/llvm/IR/OptBisect.h
+++ b/llvm/include/llvm/IR/OptBisect.h
@@ -19,8 +19,6 @@
namespace llvm {
-class Pass;
-
/// Extensions to this class implement mechanisms to disable passes and
/// individual optimizations at compile time.
class OptPassGate {
diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h
index 21a0af64da4e..5fe28f8556e5 100644
--- a/llvm/include/llvm/IR/PassManager.h
+++ b/llvm/include/llvm/IR/PassManager.h
@@ -489,7 +489,7 @@ public:
auto *P = Passes[Idx].get();
P->printPipeline(OS, MapClassName2PassName);
if (Idx + 1 < Size)
- OS << ",";
+ OS << ',';
}
}
@@ -516,14 +516,14 @@ public:
PreservedAnalyses PassPA = Pass->run(IR, AM, ExtraArgs...);
- // Call onto PassInstrumentation's AfterPass callbacks immediately after
- // running the pass.
- PI.runAfterPass<IRUnitT>(*Pass, IR, PassPA);
-
// Update the analysis manager as each pass runs and potentially
// invalidates analyses.
AM.invalidate(IR, PassPA);
+ // Call onto PassInstrumentation's AfterPass callbacks immediately after
+ // running the pass.
+ PI.runAfterPass<IRUnitT>(*Pass, IR, PassPA);
+
// Finally, intersect the preserved analyses to compute the aggregate
// preserved set for this pass manager.
PA.intersect(std::move(PassPA));
@@ -1260,7 +1260,7 @@ struct RequireAnalysisPass
function_ref<StringRef(StringRef)> MapClassName2PassName) {
auto ClassName = AnalysisT::name();
auto PassName = MapClassName2PassName(ClassName);
- OS << "require<" << PassName << ">";
+ OS << "require<" << PassName << '>';
}
static bool isRequired() { return true; }
};
@@ -1286,7 +1286,7 @@ struct InvalidateAnalysisPass
function_ref<StringRef(StringRef)> MapClassName2PassName) {
auto ClassName = AnalysisT::name();
auto PassName = MapClassName2PassName(ClassName);
- OS << "invalidate<" << PassName << ">";
+ OS << "invalidate<" << PassName << '>';
}
};
@@ -1341,7 +1341,7 @@ public:
function_ref<StringRef(StringRef)> MapClassName2PassName) {
OS << "repeat<" << Count << ">(";
P.printPipeline(OS, MapClassName2PassName);
- OS << ")";
+ OS << ')';
}
private:
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 38a916cccace..621eba6bd0b6 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -445,6 +445,14 @@ inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() {
return cst_pred_ty<is_any_apint>();
}
+struct is_shifted_mask {
+ bool isValue(const APInt &C) { return C.isShiftedMask(); }
+};
+
+inline cst_pred_ty<is_shifted_mask> m_ShiftedMask() {
+ return cst_pred_ty<is_shifted_mask>();
+}
+
struct is_all_ones {
bool isValue(const APInt &C) { return C.isAllOnes(); }
};
@@ -1536,7 +1544,7 @@ struct m_SplatOrUndefMask {
int &SplatIndex;
m_SplatOrUndefMask(int &SplatIndex) : SplatIndex(SplatIndex) {}
bool match(ArrayRef<int> Mask) {
- auto First = find_if(Mask, [](int Elem) { return Elem != -1; });
+ const auto *First = find_if(Mask, [](int Elem) { return Elem != -1; });
if (First == Mask.end())
return false;
SplatIndex = *First;
@@ -1588,6 +1596,23 @@ template <typename Op_t, unsigned Opcode> struct CastClass_match {
}
};
+template <typename Op_t> struct PtrToIntSameSize_match {
+ const DataLayout &DL;
+ Op_t Op;
+
+ PtrToIntSameSize_match(const DataLayout &DL, const Op_t &OpMatch)
+ : DL(DL), Op(OpMatch) {}
+
+ template <typename OpTy> bool match(OpTy *V) {
+ if (auto *O = dyn_cast<Operator>(V))
+ return O->getOpcode() == Instruction::PtrToInt &&
+ DL.getTypeSizeInBits(O->getType()) ==
+ DL.getTypeSizeInBits(O->getOperand(0)->getType()) &&
+ Op.match(O->getOperand(0));
+ return false;
+ }
+};
+
/// Matches BitCast.
template <typename OpTy>
inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) {
@@ -1600,6 +1625,12 @@ inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) {
return CastClass_match<OpTy, Instruction::PtrToInt>(Op);
}
+template <typename OpTy>
+inline PtrToIntSameSize_match<OpTy> m_PtrToIntSameSize(const DataLayout &DL,
+ const OpTy &Op) {
+ return PtrToIntSameSize_match<OpTy>(DL, Op);
+}
+
/// Matches IntToPtr.
template <typename OpTy>
inline CastClass_match<OpTy, Instruction::IntToPtr> m_IntToPtr(const OpTy &Op) {
@@ -2364,6 +2395,14 @@ m_c_MaxOrMin(const LHS &L, const RHS &R) {
m_CombineOr(m_c_UMax(L, R), m_c_UMin(L, R)));
}
+template <Intrinsic::ID IntrID, typename T0, typename T1>
+inline match_combine_or<typename m_Intrinsic_Ty<T0, T1>::Ty,
+ typename m_Intrinsic_Ty<T1, T0>::Ty>
+m_c_Intrinsic(const T0 &Op0, const T1 &Op1) {
+ return m_CombineOr(m_Intrinsic<IntrID>(Op0, Op1),
+ m_Intrinsic<IntrID>(Op1, Op0));
+}
+
/// Matches FAdd with LHS and RHS in either order.
template <typename LHS, typename RHS>
inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true>
@@ -2476,9 +2515,6 @@ inline InsertValue_match<Ind, Val_t, Elt_t> m_InsertValue(const Val_t &Val,
/// `ptrtoint(gep <vscale x 1 x i8>, <vscale x 1 x i8>* null, i32 1>`
/// under the right conditions determined by DataLayout.
struct VScaleVal_match {
- const DataLayout &DL;
- VScaleVal_match(const DataLayout &DL) : DL(DL) {}
-
template <typename ITy> bool match(ITy *V) {
if (m_Intrinsic<Intrinsic::vscale>().match(V))
return true;
@@ -2486,11 +2522,12 @@ struct VScaleVal_match {
Value *Ptr;
if (m_PtrToInt(m_Value(Ptr)).match(V)) {
if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
- auto *DerefTy = GEP->getSourceElementType();
- if (GEP->getNumIndices() == 1 && isa<ScalableVectorType>(DerefTy) &&
+ auto *DerefTy =
+ dyn_cast<ScalableVectorType>(GEP->getSourceElementType());
+ if (GEP->getNumIndices() == 1 && DerefTy &&
+ DerefTy->getElementType()->isIntegerTy(8) &&
m_Zero().match(GEP->getPointerOperand()) &&
- m_SpecificInt(1).match(GEP->idx_begin()->get()) &&
- DL.getTypeAllocSizeInBits(DerefTy).getKnownMinValue() == 8)
+ m_SpecificInt(1).match(GEP->idx_begin()->get()))
return true;
}
}
@@ -2499,8 +2536,8 @@ struct VScaleVal_match {
}
};
-inline VScaleVal_match m_VScale(const DataLayout &DL) {
- return VScaleVal_match(DL);
+inline VScaleVal_match m_VScale() {
+ return VScaleVal_match();
}
template <typename LHS, typename RHS, unsigned Opcode, bool Commutable = false>
diff --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h
index 79726c0eee8b..cdbd498a8be6 100644
--- a/llvm/include/llvm/IR/PseudoProbe.h
+++ b/llvm/include/llvm/IR/PseudoProbe.h
@@ -30,7 +30,8 @@ enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
enum class PseudoProbeAttributes {
Reserved = 0x1,
- Sentinel = 0x2, // A place holder for split function entry address.
+ Sentinel = 0x2, // A place holder for split function entry address.
+ HasDiscriminator = 0x4, // for probes with a discriminator
};
// The saturated distrution factor representing 100% for block probes.
@@ -77,10 +78,22 @@ public:
constexpr static uint8_t FullDistributionFactor = 100;
};
+class PseudoProbeDescriptor {
+ uint64_t FunctionGUID;
+ uint64_t FunctionHash;
+
+public:
+ PseudoProbeDescriptor(uint64_t GUID, uint64_t Hash)
+ : FunctionGUID(GUID), FunctionHash(Hash) {}
+ uint64_t getFunctionGUID() const { return FunctionGUID; }
+ uint64_t getFunctionHash() const { return FunctionHash; }
+};
+
struct PseudoProbe {
uint32_t Id;
uint32_t Type;
uint32_t Attr;
+ uint32_t Discriminator;
// Distribution factor that estimates the portion of the real execution count.
// A saturated distribution factor stands for 1.0 or 100%. A pesudo probe has
// a factor with the value ranged from 0.0 to 1.0.
@@ -91,6 +104,10 @@ static inline bool isSentinelProbe(uint32_t Flags) {
return Flags & (uint32_t)PseudoProbeAttributes::Sentinel;
}
+static inline bool hasDiscriminator(uint32_t Flags) {
+ return Flags & (uint32_t)PseudoProbeAttributes::HasDiscriminator;
+}
+
std::optional<PseudoProbe> extractProbe(const Instruction &Inst);
void setProbeDistributionFactor(Instruction &Inst, float Factor);
diff --git a/llvm/include/llvm/IR/ReplaceConstant.h b/llvm/include/llvm/IR/ReplaceConstant.h
index 1d6b10d9a78b..72823c9ab164 100644
--- a/llvm/include/llvm/IR/ReplaceConstant.h
+++ b/llvm/include/llvm/IR/ReplaceConstant.h
@@ -19,40 +19,12 @@
namespace llvm {
-class ConstantExpr;
-class Instruction;
-class Use;
-template <typename PtrType> class SmallPtrSetImpl;
+template <typename T> class ArrayRef;
+class Constant;
-/// The given instruction \p I contains given constant expression \p CE as one
-/// of its operands, possibly nested within constant expression trees. Convert
-/// all reachable paths from contant expression operands of \p I to \p CE into
-/// corresponding instructions, insert them before \p I, update operands of \p I
-/// accordingly, and if required, return all such converted instructions at
-/// \p Insts.
-void convertConstantExprsToInstructions(
- Instruction *I, ConstantExpr *CE,
- SmallPtrSetImpl<Instruction *> *Insts = nullptr);
-
-/// The given instruction \p I contains constant expression CE within the
-/// constant expression trees of it`s constant expression operands, and
-/// \p CEPaths holds all the reachable paths (to CE) from such constant
-/// expression trees of \p I. Convert constant expressions within these paths
-/// into corresponding instructions, insert them before \p I, update operands of
-/// \p I accordingly, and if required, return all such converted instructions at
-/// \p Insts.
-void convertConstantExprsToInstructions(
- Instruction *I,
- std::map<Use *, std::vector<std::vector<ConstantExpr *>>> &CEPaths,
- SmallPtrSetImpl<Instruction *> *Insts = nullptr);
-
-/// Given an instruction \p I which uses given constant expression \p CE as
-/// operand, either directly or nested within other constant expressions, return
-/// all reachable paths from the constant expression operands of \p I to \p CE,
-/// and return collected paths at \p CEPaths.
-void collectConstantExprPaths(
- Instruction *I, ConstantExpr *CE,
- std::map<Use *, std::vector<std::vector<ConstantExpr *>>> &CEPaths);
+/// Replace constant expressions users of the given constants with
+/// instructions. Return whether anything was changed.
+bool convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts);
} // end namespace llvm
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index ca3903574da8..228cb0aecb88 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -279,6 +279,20 @@ HANDLE_LIBCALL(LLRINT_F64, "llrint")
HANDLE_LIBCALL(LLRINT_F80, "llrintl")
HANDLE_LIBCALL(LLRINT_F128, "llrintl")
HANDLE_LIBCALL(LLRINT_PPCF128, "llrintl")
+HANDLE_LIBCALL(LDEXP_F32, "ldexpf")
+HANDLE_LIBCALL(LDEXP_F64, "ldexp")
+HANDLE_LIBCALL(LDEXP_F80, "ldexpl")
+HANDLE_LIBCALL(LDEXP_F128, "ldexpl")
+HANDLE_LIBCALL(LDEXP_PPCF128, "ldexpl")
+HANDLE_LIBCALL(FREXP_F32, "frexpf")
+HANDLE_LIBCALL(FREXP_F64, "frexp")
+HANDLE_LIBCALL(FREXP_F80, "frexpl")
+HANDLE_LIBCALL(FREXP_F128, "frexpl")
+HANDLE_LIBCALL(FREXP_PPCF128, "frexpl")
+
+// Floating point environment
+HANDLE_LIBCALL(FEGETENV, "fegetenv")
+HANDLE_LIBCALL(FESETENV, "fesetenv")
// Conversion
HANDLE_LIBCALL(FPEXT_F32_PPCF128, "__gcc_stoq")
diff --git a/llvm/include/llvm/IR/SSAContext.h b/llvm/include/llvm/IR/SSAContext.h
index 7551adff1e12..557ec752c216 100644
--- a/llvm/include/llvm/IR/SSAContext.h
+++ b/llvm/include/llvm/IR/SSAContext.h
@@ -43,9 +43,11 @@ public:
using InstructionT = Instruction;
using ValueRefT = Value *;
using ConstValueRefT = const Value *;
- static Value *ValueRefNull;
+ using UseT = Use;
using DominatorTreeT = DominatorTreeBase<BlockT, false>;
+ static constexpr Value *ValueRefNull = nullptr;
+
void setFunction(Function &Fn);
Function *getFunction() const { return F; }
@@ -63,7 +65,7 @@ public:
const BasicBlock &block);
static bool comesBefore(const Instruction *lhs, const Instruction *rhs);
- static bool isConstantValuePhi(const Instruction &Instr);
+ static bool isConstantOrUndefValuePhi(const Instruction &Instr);
const BasicBlock *getDefBlock(const Value *value) const;
Printable print(const BasicBlock *Block) const;
diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h
index eb63a2140310..1bdeb85afa3c 100644
--- a/llvm/include/llvm/IR/StructuralHash.h
+++ b/llvm/include/llvm/IR/StructuralHash.h
@@ -1,4 +1,4 @@
-//===- llvm/IR/StructuralHash.h - IR Hash for expensive checks --*- C++ -*-===//
+//===- llvm/IR/StructuralHash.h - IR Hashing --------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,11 +14,8 @@
#ifndef LLVM_IR_STRUCTURALHASH_H
#define LLVM_IR_STRUCTURALHASH_H
-#ifdef EXPENSIVE_CHECKS
-
#include <cstdint>
-// This header is only meant to be used when -DEXPENSIVE_CHECKS is set
namespace llvm {
class Function;
@@ -30,5 +27,3 @@ uint64_t StructuralHash(const Module &M);
} // end namespace llvm
#endif
-
-#endif // LLVM_IR_STRUCTURALHASH_H
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 37e4c32ee251..fc558e6ee5db 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -206,6 +206,13 @@ public:
/// Return true if this is a target extension type.
bool isTargetExtTy() const { return getTypeID() == TargetExtTyID; }
+ /// Return true if this is a target extension type with a scalable layout.
+ bool isScalableTargetExtTy() const;
+
+ /// Return true if this is a scalable vector type or a target extension type
+ /// with a scalable layout.
+ bool isScalableTy() const;
+
/// Return true if this is a FP type or a vector of FP.
bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); }
@@ -249,7 +256,8 @@ public:
bool isPointerTy() const { return getTypeID() == PointerTyID; }
/// True if this is an instance of an opaque PointerType.
- bool isOpaquePointerTy() const;
+ LLVM_DEPRECATED("Use isPointerTy() instead", "isPointerTy")
+ bool isOpaquePointerTy() const { return isPointerTy(); };
/// Return true if this is a pointer type or a vector of pointer types.
bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); }
@@ -401,23 +409,12 @@ public:
inline StringRef getTargetExtName() const;
- /// This method is deprecated without replacement. Pointer element types are
- /// not available with opaque pointers.
- [[deprecated("Deprecated without replacement, see "
- "https://llvm.org/docs/OpaquePointers.html for context and "
- "migration instructions")]]
- Type *getPointerElementType() const {
- return getNonOpaquePointerElementType();
- }
-
/// Only use this method in code that is not reachable with opaque pointers,
/// or part of deprecated methods that will be removed as part of the opaque
/// pointers transition.
+ [[deprecated("Pointers no longer have element types")]]
Type *getNonOpaquePointerElementType() const {
- assert(getTypeID() == PointerTyID);
- assert(NumContainedTys &&
- "Attempting to get element type of opaque pointer");
- return ContainedTys[0];
+ llvm_unreachable("Pointers no longer have element types");
}
/// Given vector type, change the element type,
@@ -502,6 +499,8 @@ public:
static PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0);
static PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0);
static PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0);
+ static Type *getWasm_ExternrefTy(LLVMContext &C);
+ static Type *getWasm_FuncrefTy(LLVMContext &C);
/// Return a pointer to the current type. This is equivalent to
/// PointerType::get(Foo, AddrSpace).
diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def
index 2a1a34e33ea4..61c7a3e50129 100644
--- a/llvm/include/llvm/IR/VPIntrinsics.def
+++ b/llvm/include/llvm/IR/VPIntrinsics.def
@@ -106,6 +106,12 @@
#define VP_PROPERTY_CONSTRAINEDFP(HASROUND, HASEXCEPT, INTRINID)
#endif
+// The intrinsic and/or SDNode has the same function as this ISD Opcode.
+// \p SDOPC The opcode of the instruction with the same function.
+#ifndef VP_PROPERTY_FUNCTIONAL_SDOPC
+#define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC)
+#endif
+
// Map this VP intrinsic to its canonical functional intrinsic.
// \p INTRIN The non-VP intrinsics with the same function.
#ifndef VP_PROPERTY_FUNCTIONAL_INTRINSIC
@@ -149,100 +155,113 @@
#error \
"The internal helper macro HELPER_REGISTER_BINARY_INT_VP is already defined!"
#endif
-#define HELPER_REGISTER_BINARY_INT_VP(VPID, VPSD, IROPC) \
+#define HELPER_REGISTER_BINARY_INT_VP(VPID, VPSD, IROPC, SDOPC) \
BEGIN_REGISTER_VP(VPID, 2, 3, VPSD, -1) \
VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \
+ VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) \
VP_PROPERTY_BINARYOP \
END_REGISTER_VP(VPID, VPSD)
// llvm.vp.add(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_INT_VP(vp_add, VP_ADD, Add)
+HELPER_REGISTER_BINARY_INT_VP(vp_add, VP_ADD, Add, ADD)
// llvm.vp.and(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_INT_VP(vp_and, VP_AND, And)
+HELPER_REGISTER_BINARY_INT_VP(vp_and, VP_AND, And, AND)
// llvm.vp.ashr(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_INT_VP(vp_ashr, VP_ASHR, AShr)
+HELPER_REGISTER_BINARY_INT_VP(vp_ashr, VP_ASHR, AShr, SRA)
// llvm.vp.lshr(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_INT_VP(vp_lshr, VP_LSHR, LShr)
+HELPER_REGISTER_BINARY_INT_VP(vp_lshr, VP_LSHR, LShr, SRL)
// llvm.vp.mul(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_INT_VP(vp_mul, VP_MUL, Mul)
+HELPER_REGISTER_BINARY_INT_VP(vp_mul, VP_MUL, Mul, MUL)
// llvm.vp.or(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_INT_VP(vp_or, VP_OR, Or)
+HELPER_REGISTER_BINARY_INT_VP(vp_or, VP_OR, Or, OR)
// llvm.vp.sdiv(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_INT_VP(vp_sdiv, VP_SDIV, SDiv)
+HELPER_REGISTER_BINARY_INT_VP(vp_sdiv, VP_SDIV, SDiv, SDIV)
// llvm.vp.shl(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_INT_VP(vp_shl, VP_SHL, Shl)
+HELPER_REGISTER_BINARY_INT_VP(vp_shl, VP_SHL, Shl, SHL)
// llvm.vp.srem(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_INT_VP(vp_srem, VP_SREM, SRem)
+HELPER_REGISTER_BINARY_INT_VP(vp_srem, VP_SREM, SRem, SREM)
// llvm.vp.sub(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_INT_VP(vp_sub, VP_SUB, Sub)
+HELPER_REGISTER_BINARY_INT_VP(vp_sub, VP_SUB, Sub, SUB)
// llvm.vp.udiv(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_INT_VP(vp_udiv, VP_UDIV, UDiv)
+HELPER_REGISTER_BINARY_INT_VP(vp_udiv, VP_UDIV, UDiv, UDIV)
// llvm.vp.urem(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_INT_VP(vp_urem, VP_UREM, URem)
+HELPER_REGISTER_BINARY_INT_VP(vp_urem, VP_UREM, URem, UREM)
// llvm.vp.xor(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor)
+HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor, XOR)
#undef HELPER_REGISTER_BINARY_INT_VP
// llvm.vp.smin(x,y,mask,vlen)
BEGIN_REGISTER_VP(vp_smin, 2, 3, VP_SMIN, -1)
VP_PROPERTY_BINARYOP
+VP_PROPERTY_FUNCTIONAL_SDOPC(SMIN)
END_REGISTER_VP(vp_smin, VP_SMIN)
// llvm.vp.smax(x,y,mask,vlen)
BEGIN_REGISTER_VP(vp_smax, 2, 3, VP_SMAX, -1)
VP_PROPERTY_BINARYOP
+VP_PROPERTY_FUNCTIONAL_SDOPC(SMAX)
END_REGISTER_VP(vp_smax, VP_SMAX)
// llvm.vp.umin(x,y,mask,vlen)
BEGIN_REGISTER_VP(vp_umin, 2, 3, VP_UMIN, -1)
VP_PROPERTY_BINARYOP
+VP_PROPERTY_FUNCTIONAL_SDOPC(UMIN)
END_REGISTER_VP(vp_umin, VP_UMIN)
// llvm.vp.umax(x,y,mask,vlen)
BEGIN_REGISTER_VP(vp_umax, 2, 3, VP_UMAX, -1)
VP_PROPERTY_BINARYOP
+VP_PROPERTY_FUNCTIONAL_SDOPC(UMAX)
END_REGISTER_VP(vp_umax, VP_UMAX)
-// llvm.vp.abs(x,mask,vlen,is_int_min_poison)
-BEGIN_REGISTER_VP(vp_abs, 1, 2, VP_ABS, -1)
+// llvm.vp.abs(x,is_int_min_poison,mask,vlen)
+BEGIN_REGISTER_VP_INTRINSIC(vp_abs, 2, 3)
+BEGIN_REGISTER_VP_SDNODE(VP_ABS, -1, vp_abs, 1, 2)
+HELPER_MAP_VPID_TO_VPSD(vp_abs, VP_ABS)
+VP_PROPERTY_FUNCTIONAL_SDOPC(ABS)
END_REGISTER_VP(vp_abs, VP_ABS)
// llvm.vp.bswap(x,mask,vlen)
BEGIN_REGISTER_VP(vp_bswap, 1, 2, VP_BSWAP, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(BSWAP)
END_REGISTER_VP(vp_bswap, VP_BSWAP)
// llvm.vp.bitreverse(x,mask,vlen)
BEGIN_REGISTER_VP(vp_bitreverse, 1, 2, VP_BITREVERSE, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(BITREVERSE)
END_REGISTER_VP(vp_bitreverse, VP_BITREVERSE)
// llvm.vp.ctpop(x,mask,vlen)
BEGIN_REGISTER_VP(vp_ctpop, 1, 2, VP_CTPOP, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(CTPOP)
END_REGISTER_VP(vp_ctpop, VP_CTPOP)
-// llvm.vp.ctlz(x,mask,vlen, is_zero_poison)
-BEGIN_REGISTER_VP_INTRINSIC(vp_ctlz, 1, 2)
+// llvm.vp.ctlz(x,is_zero_poison,mask,vlen)
+BEGIN_REGISTER_VP_INTRINSIC(vp_ctlz, 2, 3)
BEGIN_REGISTER_VP_SDNODE(VP_CTLZ, -1, vp_ctlz, 1, 2)
+VP_PROPERTY_FUNCTIONAL_SDOPC(CTLZ)
END_REGISTER_VP_SDNODE(VP_CTLZ)
BEGIN_REGISTER_VP_SDNODE(VP_CTLZ_ZERO_UNDEF, -1, vp_ctlz_zero_undef, 1, 2)
END_REGISTER_VP_SDNODE(VP_CTLZ_ZERO_UNDEF)
END_REGISTER_VP_INTRINSIC(vp_ctlz)
-// llvm.vp.cttz(x,mask,vlen, is_zero_poison)
-BEGIN_REGISTER_VP_INTRINSIC(vp_cttz, 1, 2)
+// llvm.vp.cttz(x,is_zero_poison,mask,vlen)
+BEGIN_REGISTER_VP_INTRINSIC(vp_cttz, 2, 3)
BEGIN_REGISTER_VP_SDNODE(VP_CTTZ, -1, vp_cttz, 1, 2)
+VP_PROPERTY_FUNCTIONAL_SDOPC(CTTZ)
END_REGISTER_VP_SDNODE(VP_CTTZ)
BEGIN_REGISTER_VP_SDNODE(VP_CTTZ_ZERO_UNDEF, -1, vp_cttz_zero_undef, 1, 2)
END_REGISTER_VP_SDNODE(VP_CTTZ_ZERO_UNDEF)
@@ -250,10 +269,12 @@ END_REGISTER_VP_INTRINSIC(vp_cttz)
// llvm.vp.fshl(x,y,z,mask,vlen)
BEGIN_REGISTER_VP(vp_fshl, 3, 4, VP_FSHL, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FSHL)
END_REGISTER_VP(vp_fshl, VP_FSHL)
// llvm.vp.fshr(x,y,z,mask,vlen)
BEGIN_REGISTER_VP(vp_fshr, 3, 4, VP_FSHR, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FSHR)
END_REGISTER_VP(vp_fshr, VP_FSHR)
///// } Integer Arithmetic
@@ -265,93 +286,110 @@ END_REGISTER_VP(vp_fshr, VP_FSHR)
#error \
"The internal helper macro HELPER_REGISTER_BINARY_FP_VP is already defined!"
#endif
-#define HELPER_REGISTER_BINARY_FP_VP(OPSUFFIX, VPSD, IROPC) \
+#define HELPER_REGISTER_BINARY_FP_VP(OPSUFFIX, VPSD, IROPC, SDOPC) \
BEGIN_REGISTER_VP(vp_##OPSUFFIX, 2, 3, VPSD, -1) \
VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \
VP_PROPERTY_CONSTRAINEDFP(1, 1, experimental_constrained_##OPSUFFIX) \
+ VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) \
VP_PROPERTY_BINARYOP \
END_REGISTER_VP(vp_##OPSUFFIX, VPSD)
// llvm.vp.fadd(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_FP_VP(fadd, VP_FADD, FAdd)
+HELPER_REGISTER_BINARY_FP_VP(fadd, VP_FADD, FAdd, FADD)
// llvm.vp.fsub(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_FP_VP(fsub, VP_FSUB, FSub)
+HELPER_REGISTER_BINARY_FP_VP(fsub, VP_FSUB, FSub, FSUB)
// llvm.vp.fmul(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_FP_VP(fmul, VP_FMUL, FMul)
+HELPER_REGISTER_BINARY_FP_VP(fmul, VP_FMUL, FMul, FMUL)
// llvm.vp.fdiv(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_FP_VP(fdiv, VP_FDIV, FDiv)
+HELPER_REGISTER_BINARY_FP_VP(fdiv, VP_FDIV, FDiv, FDIV)
// llvm.vp.frem(x,y,mask,vlen)
-HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem)
+HELPER_REGISTER_BINARY_FP_VP(frem, VP_FREM, FRem, FREM)
#undef HELPER_REGISTER_BINARY_FP_VP
// llvm.vp.fneg(x,mask,vlen)
BEGIN_REGISTER_VP(vp_fneg, 1, 2, VP_FNEG, -1)
VP_PROPERTY_FUNCTIONAL_OPC(FNeg)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FNEG)
END_REGISTER_VP(vp_fneg, VP_FNEG)
// llvm.vp.fabs(x,mask,vlen)
BEGIN_REGISTER_VP(vp_fabs, 1, 2, VP_FABS, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FABS)
END_REGISTER_VP(vp_fabs, VP_FABS)
// llvm.vp.sqrt(x,mask,vlen)
BEGIN_REGISTER_VP(vp_sqrt, 1, 2, VP_SQRT, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FSQRT)
END_REGISTER_VP(vp_sqrt, VP_SQRT)
// llvm.vp.fma(x,y,z,mask,vlen)
BEGIN_REGISTER_VP(vp_fma, 3, 4, VP_FMA, -1)
VP_PROPERTY_CONSTRAINEDFP(1, 1, experimental_constrained_fma)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FMA)
END_REGISTER_VP(vp_fma, VP_FMA)
// llvm.vp.fmuladd(x,y,z,mask,vlen)
BEGIN_REGISTER_VP(vp_fmuladd, 3, 4, VP_FMULADD, -1)
VP_PROPERTY_CONSTRAINEDFP(1, 1, experimental_constrained_fmuladd)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FMAD)
END_REGISTER_VP(vp_fmuladd, VP_FMULADD)
// llvm.vp.copysign(x,y,mask,vlen)
BEGIN_REGISTER_VP(vp_copysign, 2, 3, VP_FCOPYSIGN, -1)
+VP_PROPERTY_BINARYOP
+VP_PROPERTY_FUNCTIONAL_SDOPC(FCOPYSIGN)
END_REGISTER_VP(vp_copysign, VP_FCOPYSIGN)
// llvm.vp.minnum(x, y, mask,vlen)
BEGIN_REGISTER_VP(vp_minnum, 2, 3, VP_FMINNUM, -1)
VP_PROPERTY_BINARYOP
+VP_PROPERTY_FUNCTIONAL_SDOPC(FMINNUM)
END_REGISTER_VP(vp_minnum, VP_FMINNUM)
// llvm.vp.maxnum(x, y, mask,vlen)
BEGIN_REGISTER_VP(vp_maxnum, 2, 3, VP_FMAXNUM, -1)
VP_PROPERTY_BINARYOP
+VP_PROPERTY_FUNCTIONAL_SDOPC(FMAXNUM)
END_REGISTER_VP(vp_maxnum, VP_FMAXNUM)
// llvm.vp.ceil(x,mask,vlen)
BEGIN_REGISTER_VP(vp_ceil, 1, 2, VP_FCEIL, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FCEIL)
END_REGISTER_VP(vp_ceil, VP_FCEIL)
// llvm.vp.floor(x,mask,vlen)
BEGIN_REGISTER_VP(vp_floor, 1, 2, VP_FFLOOR, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FFLOOR)
END_REGISTER_VP(vp_floor, VP_FFLOOR)
// llvm.vp.round(x,mask,vlen)
BEGIN_REGISTER_VP(vp_round, 1, 2, VP_FROUND, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FROUND)
END_REGISTER_VP(vp_round, VP_FROUND)
// llvm.vp.roundeven(x,mask,vlen)
BEGIN_REGISTER_VP(vp_roundeven, 1, 2, VP_FROUNDEVEN, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FROUNDEVEN)
END_REGISTER_VP(vp_roundeven, VP_FROUNDEVEN)
// llvm.vp.roundtozero(x,mask,vlen)
BEGIN_REGISTER_VP(vp_roundtozero, 1, 2, VP_FROUNDTOZERO, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FTRUNC)
END_REGISTER_VP(vp_roundtozero, VP_FROUNDTOZERO)
// llvm.vp.rint(x,mask,vlen)
BEGIN_REGISTER_VP(vp_rint, 1, 2, VP_FRINT, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FRINT)
END_REGISTER_VP(vp_rint, VP_FRINT)
// llvm.vp.nearbyint(x,mask,vlen)
BEGIN_REGISTER_VP(vp_nearbyint, 1, 2, VP_FNEARBYINT, -1)
+VP_PROPERTY_FUNCTIONAL_SDOPC(FNEARBYINT)
END_REGISTER_VP(vp_nearbyint, VP_FNEARBYINT)
///// } Floating-Point Arithmetic
@@ -363,30 +401,31 @@ END_REGISTER_VP(vp_nearbyint, VP_FNEARBYINT)
#error \
"The internal helper macro HELPER_REGISTER_FP_CAST_VP is already defined!"
#endif
-#define HELPER_REGISTER_FP_CAST_VP(OPSUFFIX, VPSD, IROPC, HASROUND) \
+#define HELPER_REGISTER_FP_CAST_VP(OPSUFFIX, VPSD, IROPC, SDOPC, HASROUND) \
BEGIN_REGISTER_VP(vp_##OPSUFFIX, 1, 2, VPSD, -1) \
VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \
+ VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) \
VP_PROPERTY_CONSTRAINEDFP(HASROUND, 1, experimental_constrained_##OPSUFFIX) \
VP_PROPERTY_CASTOP \
END_REGISTER_VP(vp_##OPSUFFIX, VPSD)
// llvm.vp.fptoui(x,mask,vlen)
-HELPER_REGISTER_FP_CAST_VP(fptoui, VP_FP_TO_UINT, FPToUI, 0)
+HELPER_REGISTER_FP_CAST_VP(fptoui, VP_FP_TO_UINT, FPToUI, FP_TO_UINT, 0)
// llvm.vp.fptosi(x,mask,vlen)
-HELPER_REGISTER_FP_CAST_VP(fptosi, VP_FP_TO_SINT, FPToSI, 0)
+HELPER_REGISTER_FP_CAST_VP(fptosi, VP_FP_TO_SINT, FPToSI, FP_TO_SINT, 0)
// llvm.vp.uitofp(x,mask,vlen)
-HELPER_REGISTER_FP_CAST_VP(uitofp, VP_UINT_TO_FP, UIToFP, 1)
+HELPER_REGISTER_FP_CAST_VP(uitofp, VP_UINT_TO_FP, UIToFP, UINT_TO_FP, 1)
// llvm.vp.sitofp(x,mask,vlen)
-HELPER_REGISTER_FP_CAST_VP(sitofp, VP_SINT_TO_FP, SIToFP, 1)
+HELPER_REGISTER_FP_CAST_VP(sitofp, VP_SINT_TO_FP, SIToFP, SINT_TO_FP, 1)
// llvm.vp.fptrunc(x,mask,vlen)
-HELPER_REGISTER_FP_CAST_VP(fptrunc, VP_FP_ROUND, FPTrunc, 1)
+HELPER_REGISTER_FP_CAST_VP(fptrunc, VP_FP_ROUND, FPTrunc, FP_ROUND, 1)
// llvm.vp.fpext(x,mask,vlen)
-HELPER_REGISTER_FP_CAST_VP(fpext, VP_FP_EXTEND, FPExt, 0)
+HELPER_REGISTER_FP_CAST_VP(fpext, VP_FP_EXTEND, FPExt, FP_EXTEND, 0)
#undef HELPER_REGISTER_FP_CAST_VP
@@ -396,26 +435,33 @@ HELPER_REGISTER_FP_CAST_VP(fpext, VP_FP_EXTEND, FPExt, 0)
#error \
"The internal helper macro HELPER_REGISTER_INT_CAST_VP is already defined!"
#endif
-#define HELPER_REGISTER_INT_CAST_VP(OPSUFFIX, VPSD, IROPC) \
+#define HELPER_REGISTER_INT_CAST_VP(OPSUFFIX, VPSD, IROPC, SDOPC) \
BEGIN_REGISTER_VP(vp_##OPSUFFIX, 1, 2, VPSD, -1) \
VP_PROPERTY_FUNCTIONAL_OPC(IROPC) \
+ VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) \
VP_PROPERTY_CASTOP \
END_REGISTER_VP(vp_##OPSUFFIX, VPSD)
// llvm.vp.trunc(x,mask,vlen)
-HELPER_REGISTER_INT_CAST_VP(trunc, VP_TRUNCATE, Trunc)
+HELPER_REGISTER_INT_CAST_VP(trunc, VP_TRUNCATE, Trunc, TRUNCATE)
// llvm.vp.zext(x,mask,vlen)
-HELPER_REGISTER_INT_CAST_VP(zext, VP_ZERO_EXTEND, ZExt)
+HELPER_REGISTER_INT_CAST_VP(zext, VP_ZERO_EXTEND, ZExt, ZERO_EXTEND)
// llvm.vp.sext(x,mask,vlen)
-HELPER_REGISTER_INT_CAST_VP(sext, VP_SIGN_EXTEND, SExt)
+HELPER_REGISTER_INT_CAST_VP(sext, VP_SIGN_EXTEND, SExt, SIGN_EXTEND)
// llvm.vp.ptrtoint(x,mask,vlen)
-HELPER_REGISTER_INT_CAST_VP(ptrtoint, VP_PTRTOINT, PtrToInt)
+BEGIN_REGISTER_VP(vp_ptrtoint, 1, 2, VP_PTRTOINT, -1)
+VP_PROPERTY_FUNCTIONAL_OPC(PtrToInt)
+VP_PROPERTY_CASTOP
+END_REGISTER_VP(vp_ptrtoint, VP_PTRTOINT)
// llvm.vp.inttoptr(x,mask,vlen)
-HELPER_REGISTER_INT_CAST_VP(inttoptr, VP_INTTOPTR, IntToPtr)
+BEGIN_REGISTER_VP(vp_inttoptr, 1, 2, VP_INTTOPTR, -1)
+VP_PROPERTY_FUNCTIONAL_OPC(IntToPtr)
+VP_PROPERTY_CASTOP
+END_REGISTER_VP(vp_inttoptr, VP_INTTOPTR)
#undef HELPER_REGISTER_INT_CAST_VP
@@ -606,6 +652,7 @@ HELPER_REGISTER_REDUCTION_SEQ_VP(vp_reduce_fmul, VP_REDUCE_FMUL,
// llvm.vp.select(cond,on_true,on_false,vlen)
BEGIN_REGISTER_VP(vp_select, std::nullopt, 3, VP_SELECT, -1)
VP_PROPERTY_FUNCTIONAL_OPC(Select)
+VP_PROPERTY_FUNCTIONAL_SDOPC(VSELECT)
END_REGISTER_VP(vp_select, VP_SELECT)
// llvm.vp.merge(cond,on_true,on_false,pivot)
@@ -630,5 +677,6 @@ END_REGISTER_VP(experimental_vp_splice, EXPERIMENTAL_VP_SPLICE)
#undef VP_PROPERTY_CONSTRAINEDFP
#undef VP_PROPERTY_FUNCTIONAL_INTRINSIC
#undef VP_PROPERTY_FUNCTIONAL_OPC
+#undef VP_PROPERTY_FUNCTIONAL_SDOPC
#undef VP_PROPERTY_MEMOP
#undef VP_PROPERTY_REDUCTION
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index d0cd83bec89d..16ae451114b5 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -744,6 +744,11 @@ public:
static_cast<const Value *>(this)->stripInBoundsOffsets(Func));
}
+ /// If this ptr is provably equal to \p Other plus a constant offset, return
+ /// that offset in bytes. Essentially `ptr this` subtract `ptr Other`.
+ std::optional<int64_t> getPointerOffsetFrom(const Value *Other,
+ const DataLayout &DL) const;
+
/// Return true if the memory object referred to by V can by freed in the
/// scope for which the SSA value defining the allocation is statically
/// defined. E.g. deallocation after the static scope of a value does not
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 1164cd872eb9..c6fee47b464b 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -50,21 +50,13 @@ void initializeTarget(PassRegistry&);
void initializeAAEvalLegacyPassPass(PassRegistry&);
void initializeAAResultsWrapperPassPass(PassRegistry&);
-void initializeADCELegacyPassPass(PassRegistry&);
-void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&);
-void initializeAlignmentFromAssumptionsPass(PassRegistry&);
void initializeAlwaysInlinerLegacyPassPass(PassRegistry&);
void initializeAssignmentTrackingAnalysisPass(PassRegistry &);
-void initializeAssumeSimplifyPassLegacyPassPass(PassRegistry &);
void initializeAssumeBuilderPassLegacyPassPass(PassRegistry &);
-void initializeAnnotation2MetadataLegacyPass(PassRegistry &);
void initializeAssumptionCacheTrackerPass(PassRegistry&);
void initializeAtomicExpandPass(PassRegistry&);
-void initializeAttributorLegacyPassPass(PassRegistry&);
-void initializeAttributorCGSCCLegacyPassPass(PassRegistry &);
void initializeBasicBlockSectionsProfileReaderPass(PassRegistry &);
void initializeBasicBlockSectionsPass(PassRegistry &);
-void initializeBDCELegacyPassPass(PassRegistry&);
void initializeBarrierNoopPass(PassRegistry&);
void initializeBasicAAWrapperPassPass(PassRegistry&);
void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&);
@@ -83,33 +75,25 @@ void initializeCFGuardLongjmpPass(PassRegistry&);
void initializeCFGViewerLegacyPassPass(PassRegistry&);
void initializeCFIFixupPass(PassRegistry&);
void initializeCFIInstrInserterPass(PassRegistry&);
+void initializeCallBrPreparePass(PassRegistry &);
void initializeCallGraphDOTPrinterPass(PassRegistry&);
void initializeCallGraphPrinterLegacyPassPass(PassRegistry&);
void initializeCallGraphViewerPass(PassRegistry&);
void initializeCallGraphWrapperPassPass(PassRegistry&);
-void initializeCallSiteSplittingLegacyPassPass(PassRegistry&);
-void initializeCalledValuePropagationLegacyPassPass(PassRegistry &);
void initializeCheckDebugMachineModulePass(PassRegistry &);
void initializeCodeGenPreparePass(PassRegistry&);
void initializeComplexDeinterleavingLegacyPassPass(PassRegistry&);
void initializeConstantHoistingLegacyPassPass(PassRegistry&);
-void initializeConstantMergeLegacyPassPass(PassRegistry&);
-void initializeCorrelatedValuePropagationPass(PassRegistry&);
void initializeCostModelAnalysisPass(PassRegistry&);
-void initializeCrossDSOCFIPass(PassRegistry&);
void initializeCycleInfoWrapperPassPass(PassRegistry &);
void initializeDAEPass(PassRegistry&);
void initializeDAHPass(PassRegistry&);
void initializeDCELegacyPassPass(PassRegistry&);
-void initializeDFAJumpThreadingLegacyPassPass(PassRegistry &);
-void initializeDSELegacyPassPass(PassRegistry&);
void initializeDeadMachineInstructionElimPass(PassRegistry&);
void initializeDebugifyMachineModulePass(PassRegistry &);
void initializeDelinearizationPass(PassRegistry&);
-void initializeDemandedBitsWrapperPassPass(PassRegistry&);
void initializeDependenceAnalysisWrapperPassPass(PassRegistry&);
void initializeDetectDeadLanesPass(PassRegistry&);
-void initializeDivRemPairsLegacyPassPass(PassRegistry&);
void initializeDomOnlyPrinterWrapperPassPass(PassRegistry &);
void initializeDomOnlyViewerWrapperPassPass(PassRegistry &);
void initializeDomPrinterWrapperPassPass(PassRegistry &);
@@ -125,7 +109,6 @@ void initializeEarlyMachineLICMPass(PassRegistry&);
void initializeEarlyTailDuplicatePass(PassRegistry&);
void initializeEdgeBundlesPass(PassRegistry&);
void initializeEHContGuardCatchretPass(PassRegistry &);
-void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry&);
void initializeExpandLargeFpConvertLegacyPassPass(PassRegistry&);
void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&);
void initializeExpandMemCmpPassPass(PassRegistry&);
@@ -140,48 +123,32 @@ void initializeFinalizeMachineBundlesPass(PassRegistry&);
void initializeFixIrreduciblePass(PassRegistry &);
void initializeFixupStatepointCallerSavedPass(PassRegistry&);
void initializeFlattenCFGLegacyPassPass(PassRegistry &);
-void initializeFloat2IntLegacyPassPass(PassRegistry&);
-void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&);
void initializeFuncletLayoutPass(PassRegistry&);
void initializeGCMachineCodeAnalysisPass(PassRegistry&);
void initializeGCModuleInfoPass(PassRegistry&);
-void initializeGVNHoistLegacyPassPass(PassRegistry&);
void initializeGVNLegacyPassPass(PassRegistry&);
-void initializeGVNSinkLegacyPassPass(PassRegistry&);
-void initializeGlobalDCELegacyPassPass(PassRegistry&);
void initializeGlobalMergePass(PassRegistry&);
-void initializeGlobalOptLegacyPassPass(PassRegistry&);
-void initializeGlobalSplitPass(PassRegistry&);
void initializeGlobalsAAWrapperPassPass(PassRegistry&);
void initializeGuardWideningLegacyPassPass(PassRegistry&);
-void initializeHardwareLoopsPass(PassRegistry&);
+void initializeHardwareLoopsLegacyPass(PassRegistry&);
void initializeMIRProfileLoaderPassPass(PassRegistry &);
-void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
-void initializeIPSCCPLegacyPassPass(PassRegistry&);
-void initializeIRCELegacyPassPass(PassRegistry&);
-void initializeIROutlinerLegacyPassPass(PassRegistry&);
void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry&);
void initializeIRTranslatorPass(PassRegistry&);
void initializeIVUsersWrapperPassPass(PassRegistry&);
void initializeIfConverterPass(PassRegistry&);
void initializeImmutableModuleSummaryIndexWrapperPassPass(PassRegistry&);
void initializeImplicitNullChecksPass(PassRegistry&);
-void initializeIndVarSimplifyLegacyPassPass(PassRegistry&);
void initializeIndirectBrExpandPassPass(PassRegistry&);
void initializeInferAddressSpacesPass(PassRegistry&);
-void initializeInferFunctionAttrsLegacyPassPass(PassRegistry&);
-void initializeInjectTLIMappingsLegacyPass(PassRegistry &);
void initializeInstCountLegacyPassPass(PassRegistry &);
-void initializeInstNamerPass(PassRegistry&);
void initializeInstSimplifyLegacyPassPass(PassRegistry &);
void initializeInstructionCombiningPassPass(PassRegistry&);
void initializeInstructionSelectPass(PassRegistry&);
void initializeInterleavedAccessPass(PassRegistry&);
void initializeInterleavedLoadCombinePass(PassRegistry &);
-void initializeInternalizeLegacyPassPass(PassRegistry&);
void initializeIntervalPartitionPass(PassRegistry&);
void initializeJMCInstrumenterPass(PassRegistry&);
-void initializeJumpThreadingPass(PassRegistry&);
+void initializeKCFIPass(PassRegistry &);
void initializeLCSSAVerificationPassPass(PassRegistry&);
void initializeLCSSAWrapperPassPass(PassRegistry&);
void initializeLazyBlockFrequencyInfoPassPass(PassRegistry&);
@@ -189,14 +156,11 @@ void initializeLazyBranchProbabilityInfoPassPass(PassRegistry&);
void initializeLazyMachineBlockFrequencyInfoPassPass(PassRegistry&);
void initializeLazyValueInfoPrinterPass(PassRegistry&);
void initializeLazyValueInfoWrapperPassPass(PassRegistry&);
-void initializeLegacyDivergenceAnalysisPass(PassRegistry&);
void initializeLegacyLICMPassPass(PassRegistry&);
void initializeLegacyLoopSinkPassPass(PassRegistry&);
void initializeLegalizerPass(PassRegistry&);
void initializeGISelCSEAnalysisWrapperPassPass(PassRegistry &);
void initializeGISelKnownBitsAnalysisPass(PassRegistry &);
-void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&);
-void initializeLintLegacyPassPass(PassRegistry &);
void initializeLiveDebugValuesPass(PassRegistry&);
void initializeLiveDebugVariablesPass(PassRegistry&);
void initializeLiveIntervalsPass(PassRegistry&);
@@ -208,31 +172,18 @@ void initializeLoadStoreOptPass(PassRegistry &);
void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry&);
void initializeLocalStackSlotPassPass(PassRegistry&);
void initializeLocalizerPass(PassRegistry&);
-void initializeLoopAccessLegacyAnalysisPass(PassRegistry&);
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry&);
-void initializeLoopDeletionLegacyPassPass(PassRegistry&);
-void initializeLoopDistributeLegacyPass(PassRegistry&);
void initializeLoopExtractorLegacyPassPass(PassRegistry &);
void initializeLoopGuardWideningLegacyPassPass(PassRegistry&);
-void initializeLoopFuseLegacyPass(PassRegistry&);
-void initializeLoopIdiomRecognizeLegacyPassPass(PassRegistry&);
void initializeLoopInfoWrapperPassPass(PassRegistry&);
void initializeLoopInstSimplifyLegacyPassPass(PassRegistry&);
-void initializeLoopInterchangeLegacyPassPass(PassRegistry &);
-void initializeLoopFlattenLegacyPassPass(PassRegistry&);
-void initializeLoopLoadEliminationPass(PassRegistry&);
void initializeLoopPassPass(PassRegistry&);
void initializeLoopPredicationLegacyPassPass(PassRegistry&);
-void initializeLoopRerollLegacyPassPass(PassRegistry &);
void initializeLoopRotateLegacyPassPass(PassRegistry&);
void initializeLoopSimplifyCFGLegacyPassPass(PassRegistry&);
void initializeLoopSimplifyPass(PassRegistry&);
void initializeLoopStrengthReducePass(PassRegistry&);
-void initializeLoopUnrollAndJamPass(PassRegistry&);
void initializeLoopUnrollPass(PassRegistry&);
-void initializeLoopVectorizePass(PassRegistry&);
-void initializeLoopVersioningLICMLegacyPassPass(PassRegistry &);
-void initializeLoopVersioningLegacyPassPass(PassRegistry &);
void initializeLowerAtomicLegacyPassPass(PassRegistry&);
void initializeLowerConstantIntrinsicsPass(PassRegistry&);
void initializeLowerEmuTLSPass(PassRegistry&);
@@ -243,8 +194,6 @@ void initializeLowerWidenableConditionLegacyPassPass(PassRegistry&);
void initializeLowerIntrinsicsPass(PassRegistry&);
void initializeLowerInvokeLegacyPassPass(PassRegistry&);
void initializeLowerSwitchLegacyPassPass(PassRegistry &);
-void initializeLowerMatrixIntrinsicsLegacyPassPass(PassRegistry &);
-void initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(PassRegistry &);
void initializeKCFIPass(PassRegistry &);
void initializeMIRAddFSDiscriminatorsPass(PassRegistry &);
void initializeMIRCanonicalizerPass(PassRegistry &);
@@ -280,43 +229,30 @@ void initializeMachineTraceMetricsPass(PassRegistry&);
void initializeMachineUniformityInfoPrinterPassPass(PassRegistry &);
void initializeMachineUniformityAnalysisPassPass(PassRegistry &);
void initializeMachineVerifierPassPass(PassRegistry&);
-void initializeMemCpyOptLegacyPassPass(PassRegistry&);
-void initializeMemDepPrinterPass(PassRegistry&);
-void initializeMemDerefPrinterPass(PassRegistry&);
void initializeMemoryDependenceWrapperPassPass(PassRegistry&);
-void initializeMemorySSAPrinterLegacyPassPass(PassRegistry&);
void initializeMemorySSAWrapperPassPass(PassRegistry&);
-void initializeMergeFunctionsLegacyPassPass(PassRegistry&);
void initializeMergeICmpsLegacyPassPass(PassRegistry &);
void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry&);
-void initializeMetaRenamerPass(PassRegistry&);
-void initializeModuleDebugInfoLegacyPrinterPass(PassRegistry &);
void initializeModuleSummaryIndexWrapperPassPass(PassRegistry&);
void initializeModuloScheduleTestPass(PassRegistry&);
-void initializeMustExecutePrinterPass(PassRegistry&);
-void initializeMustBeExecutedContextPrinterPass(PassRegistry&);
void initializeNaryReassociateLegacyPassPass(PassRegistry&);
-void initializeNewGVNLegacyPassPass(PassRegistry&);
void initializeObjCARCContractLegacyPassPass(PassRegistry &);
void initializeOptimizationRemarkEmitterWrapperPassPass(PassRegistry&);
void initializeOptimizePHIsPass(PassRegistry&);
void initializePEIPass(PassRegistry&);
void initializePHIEliminationPass(PassRegistry&);
-void initializePartialInlinerLegacyPassPass(PassRegistry&);
void initializePartiallyInlineLibCallsLegacyPassPass(PassRegistry&);
void initializePatchableFunctionPass(PassRegistry&);
void initializePeepholeOptimizerPass(PassRegistry&);
void initializePhiValuesWrapperPassPass(PassRegistry&);
void initializePhysicalRegisterUsageInfoPass(PassRegistry&);
-void initializePlaceBackedgeSafepointsImplPass(PassRegistry&);
-void initializePlaceSafepointsPass(PassRegistry&);
+void initializePlaceBackedgeSafepointsLegacyPassPass(PassRegistry &);
void initializePostDomOnlyPrinterWrapperPassPass(PassRegistry &);
void initializePostDomOnlyViewerWrapperPassPass(PassRegistry &);
void initializePostDomPrinterWrapperPassPass(PassRegistry &);
void initializePostDomViewerWrapperPassPass(PassRegistry &);
void initializePostDominatorTreeWrapperPassPass(PassRegistry&);
void initializePostMachineSchedulerPass(PassRegistry&);
-void initializePostOrderFunctionAttrsLegacyPassPass(PassRegistry&);
void initializePostRAHazardRecognizerPass(PassRegistry&);
void initializePostRAMachineSinkingPass(PassRegistry&);
void initializePostRASchedulerPass(PassRegistry&);
@@ -351,12 +287,7 @@ void initializeRemoveRedundantDebugValuesPass(PassRegistry&);
void initializeRenameIndependentSubregsPass(PassRegistry&);
void initializeReplaceWithVeclibLegacyPass(PassRegistry &);
void initializeResetMachineFunctionPass(PassRegistry&);
-void initializeReversePostOrderFunctionAttrsLegacyPassPass(PassRegistry&);
-void initializeRewriteStatepointsForGCLegacyPassPass(PassRegistry &);
-void initializeRewriteSymbolsLegacyPassPass(PassRegistry&);
-void initializeSCCPLegacyPassPass(PassRegistry&);
void initializeSCEVAAWrapperPassPass(PassRegistry&);
-void initializeSLPVectorizerPass(PassRegistry&);
void initializeSROALegacyPassPass(PassRegistry&);
void initializeSafeStackLegacyPassPass(PassRegistry&);
void initializeSafepointIRVerifierPass(PassRegistry&);
@@ -369,7 +300,6 @@ void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&);
void initializeSeparateConstOffsetFromGEPLegacyPassPass(PassRegistry &);
void initializeShadowStackGCLoweringPass(PassRegistry&);
void initializeShrinkWrapPass(PassRegistry&);
-void initializeSimpleInlinerPass(PassRegistry&);
void initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry&);
void initializeSingleLoopExtractorPass(PassRegistry&);
void initializeSinkingLegacyPassPass(PassRegistry&);
@@ -385,14 +315,8 @@ void initializeStackSafetyGlobalInfoWrapperPassPass(PassRegistry &);
void initializeStackSafetyInfoWrapperPassPass(PassRegistry &);
void initializeStackSlotColoringPass(PassRegistry&);
void initializeStraightLineStrengthReduceLegacyPassPass(PassRegistry &);
-void initializeStripDeadDebugInfoPass(PassRegistry&);
-void initializeStripDeadPrototypesLegacyPassPass(PassRegistry&);
-void initializeStripDebugDeclarePass(PassRegistry&);
void initializeStripDebugMachineModulePass(PassRegistry &);
void initializeStripGCRelocatesLegacyPass(PassRegistry &);
-void initializeStripNonDebugSymbolsPass(PassRegistry&);
-void initializeStripNonLineTableDebugLegacyPassPass(PassRegistry &);
-void initializeStripSymbolsPass(PassRegistry&);
void initializeStructurizeCFGLegacyPassPass(PassRegistry &);
void initializeTailCallElimPass(PassRegistry&);
void initializeTailDuplicatePass(PassRegistry&);
@@ -409,11 +333,9 @@ void initializeUnifyLoopExitsLegacyPassPass(PassRegistry &);
void initializeUnpackMachineBundlesPass(PassRegistry&);
void initializeUnreachableBlockElimLegacyPassPass(PassRegistry&);
void initializeUnreachableMachineBlockElimPass(PassRegistry&);
-void initializeVectorCombineLegacyPassPass(PassRegistry&);
void initializeVerifierLegacyPassPass(PassRegistry&);
void initializeVirtRegMapPass(PassRegistry&);
void initializeVirtRegRewriterPass(PassRegistry&);
-void initializeWarnMissedTransformationsLegacyPass(PassRegistry &);
void initializeWasmEHPreparePass(PassRegistry&);
void initializeWinEHPreparePass(PassRegistry&);
void initializeWriteBitcodePassPass(PassRegistry&);
diff --git a/llvm/include/llvm/InterfaceStub/IFSStub.h b/llvm/include/llvm/InterfaceStub/IFSStub.h
index 119669106733..09f96f72950c 100644
--- a/llvm/include/llvm/InterfaceStub/IFSStub.h
+++ b/llvm/include/llvm/InterfaceStub/IFSStub.h
@@ -54,9 +54,9 @@ struct IFSSymbol {
explicit IFSSymbol(std::string SymbolName) : Name(std::move(SymbolName)) {}
std::string Name;
std::optional<uint64_t> Size;
- IFSSymbolType Type;
- bool Undefined;
- bool Weak;
+ IFSSymbolType Type = IFSSymbolType::NoType;
+ bool Undefined = false;
+ bool Weak = false;
std::optional<std::string> Warning;
bool operator<(const IFSSymbol &RHS) const { return Name < RHS.Name; }
};
@@ -97,6 +97,7 @@ struct IFSStub {
IFSStub() = default;
IFSStub(const IFSStub &Stub);
IFSStub(IFSStub &&Stub);
+ virtual ~IFSStub() = default;
};
// Create a alias class for IFSStub.
diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h
index 7a746592c9fc..5c23ba4f7ac4 100644
--- a/llvm/include/llvm/LTO/Config.h
+++ b/llvm/include/llvm/LTO/Config.h
@@ -57,6 +57,7 @@ struct Config {
CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default;
CodeGenFileType CGFileType = CGFT_ObjectFile;
unsigned OptLevel = 2;
+ bool VerifyEach = false;
bool DisableVerify = false;
/// Use the standard optimization pipeline.
@@ -179,10 +180,6 @@ struct Config {
/// Add FSAFDO discriminators.
bool AddFSDiscriminator = false;
- /// Use opaque pointer types. Used to call LLVMContext::setOpaquePointers
- /// unless already set by the `-opaque-pointers` commandline option.
- bool OpaquePointers = true;
-
/// If this field is set, LTO will write input file paths and symbol
/// resolutions here in llvm-lto2 command line flag format. This can be
/// used for testing and for running the LTO pipeline outside of the linker
@@ -298,7 +295,6 @@ struct LTOLLVMContext : LLVMContext {
enableDebugTypeODRUniquing();
setDiagnosticHandler(
std::make_unique<LTOLLVMDiagnosticHandler>(&DiagHandler), true);
- setOpaquePointers(C.OpaquePointers);
}
DiagnosticHandlerFunction DiagHandler;
};
diff --git a/llvm/include/llvm/LTO/LTO.h b/llvm/include/llvm/LTO/LTO.h
index 70d5af91e523..150b31e3e8e4 100644
--- a/llvm/include/llvm/LTO/LTO.h
+++ b/llvm/include/llvm/LTO/LTO.h
@@ -78,9 +78,8 @@ namespace lto {
/// Given the original \p Path to an output file, replace any path
/// prefix matching \p OldPrefix with \p NewPrefix. Also, create the
/// resulting directory if it does not yet exist.
-std::string getThinLTOOutputFile(const std::string &Path,
- const std::string &OldPrefix,
- const std::string &NewPrefix);
+std::string getThinLTOOutputFile(StringRef Path, StringRef OldPrefix,
+ StringRef NewPrefix);
/// Setup optimization remarks.
Expected<std::unique_ptr<ToolOutputFile>> setupLLVMOptimizationRemarks(
@@ -96,6 +95,11 @@ setupStatsFile(StringRef StatsFilename);
/// ordered indices to elements in the input array.
std::vector<int> generateModulesOrdering(ArrayRef<BitcodeModule *> R);
+/// Updates MemProf attributes (and metadata) based on whether the index
+/// has recorded that we are linking with allocation libraries containing
+/// the necessary APIs for downstream transformations.
+void updateMemProfAttributes(Module &Mod, const ModuleSummaryIndex &Index);
+
class LTO;
struct SymbolResolution;
class ThinBackendProc;
@@ -219,11 +223,14 @@ ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism,
/// ShouldEmitImportsFiles is true it also writes a list of imported files to a
/// similar path with ".imports" appended instead.
/// LinkedObjectsFile is an output stream to write the list of object files for
-/// the final ThinLTO linking. Can be nullptr.
-/// OnWrite is callback which receives module identifier and notifies LTO user
-/// that index file for the module (and optionally imports file) was created.
+/// the final ThinLTO linking. Can be nullptr. If LinkedObjectsFile is not
+/// nullptr and NativeObjectPrefix is not empty then it replaces the prefix of
+/// the objects with NativeObjectPrefix instead of NewPrefix. OnWrite is
+/// callback which receives module identifier and notifies LTO user that index
+/// file for the module (and optionally imports file) was created.
ThinBackend createWriteIndexesThinBackend(std::string OldPrefix,
std::string NewPrefix,
+ std::string NativeObjectPrefix,
bool ShouldEmitImportsFiles,
raw_fd_ostream *LinkedObjectsFile,
IndexWriteCallback OnWrite);
@@ -248,13 +255,26 @@ class LTO {
friend InputFile;
public:
+ /// Unified LTO modes
+ enum LTOKind {
+ /// Any LTO mode without Unified LTO. The default mode.
+ LTOK_Default,
+
+ /// Regular LTO, with Unified LTO enabled.
+ LTOK_UnifiedRegular,
+
+ /// ThinLTO, with Unified LTO enabled.
+ LTOK_UnifiedThin,
+ };
+
/// Create an LTO object. A default constructed LTO object has a reasonable
/// production configuration, but you can customize it by passing arguments to
/// this constructor.
/// FIXME: We do currently require the DiagHandler field to be set in Conf.
/// Until that is fixed, a Config argument is required.
LTO(Config Conf, ThinBackend Backend = nullptr,
- unsigned ParallelCodeGenParallelismLevel = 1);
+ unsigned ParallelCodeGenParallelismLevel = 1,
+ LTOKind LTOMode = LTOK_Default);
~LTO();
/// Add an input file to the LTO link, using the provided symbol resolutions.
@@ -289,7 +309,7 @@ private:
const Config &Conf);
struct CommonResolution {
uint64_t Size = 0;
- MaybeAlign Align;
+ Align Alignment;
/// Record if at least one instance of the common was marked as prevailing
bool Prevailing = false;
};
@@ -414,6 +434,9 @@ private:
mutable bool CalledGetMaxTasks = false;
+ // LTO mode when using Unified LTO.
+ LTOKind LTOMode;
+
// Use Optional to distinguish false from not yet initialized.
std::optional<bool> EnableSplitLTOUnit;
diff --git a/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
index 2df51829e5c8..37e9b175c452 100644
--- a/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
+++ b/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
@@ -17,13 +17,13 @@
#include "llvm-c/lto.h"
#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/LTO/LTO.h"
#include "llvm/Support/CachePruning.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include <string>
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index c76d7adcbc27..7420ea64e954 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -69,9 +69,6 @@ namespace {
return;
(void) llvm::createAAEvalPass();
- (void) llvm::createAggressiveDCEPass();
- (void)llvm::createBitTrackingDCEPass();
- (void) llvm::createAlignmentFromAssumptionsPass();
(void) llvm::createBasicAAWrapperPass();
(void) llvm::createSCEVAAWrapperPass();
(void) llvm::createTypeBasedAAWrapperPass();
@@ -81,49 +78,32 @@ namespace {
(void) llvm::createCallGraphViewerPass();
(void) llvm::createCFGSimplificationPass();
(void) llvm::createStructurizeCFGPass();
- (void) llvm::createLibCallsShrinkWrapPass();
- (void) llvm::createCalledValuePropagationPass();
- (void) llvm::createConstantMergePass();
(void) llvm::createCostModelAnalysisPass();
(void) llvm::createDeadArgEliminationPass();
(void) llvm::createDeadCodeEliminationPass();
- (void) llvm::createDeadStoreEliminationPass();
(void) llvm::createDependenceAnalysisWrapperPass();
(void) llvm::createDomOnlyPrinterWrapperPassPass();
(void) llvm::createDomPrinterWrapperPassPass();
(void) llvm::createDomOnlyViewerWrapperPassPass();
(void) llvm::createDomViewerWrapperPassPass();
- (void) llvm::createFunctionInliningPass();
(void) llvm::createAlwaysInlinerLegacyPass();
- (void) llvm::createGlobalDCEPass();
- (void) llvm::createGlobalOptimizerPass();
(void) llvm::createGlobalsAAWrapperPass();
(void) llvm::createGuardWideningPass();
(void) llvm::createLoopGuardWideningPass();
- (void) llvm::createIPSCCPPass();
- (void) llvm::createInductiveRangeCheckEliminationPass();
- (void) llvm::createIndVarSimplifyPass();
(void) llvm::createInstSimplifyLegacyPass();
(void) llvm::createInstructionCombiningPass();
- (void) llvm::createInternalizePass();
(void) llvm::createJMCInstrumenterPass();
+ (void) llvm::createKCFIPass();
(void) llvm::createLCSSAPass();
- (void) llvm::createLegacyDivergenceAnalysisPass();
(void) llvm::createLICMPass();
(void) llvm::createLoopSinkPass();
(void) llvm::createLazyValueInfoPass();
(void) llvm::createLoopExtractorPass();
- (void) llvm::createLoopInterchangePass();
- (void) llvm::createLoopFlattenPass();
(void) llvm::createLoopPredicationPass();
(void) llvm::createLoopSimplifyPass();
(void) llvm::createLoopSimplifyCFGPass();
(void) llvm::createLoopStrengthReducePass();
- (void) llvm::createLoopRerollPass();
(void) llvm::createLoopUnrollPass();
- (void) llvm::createLoopUnrollAndJamPass();
- (void) llvm::createLoopVersioningLICMPass();
- (void) llvm::createLoopIdiomPass();
(void) llvm::createLoopRotatePass();
(void) llvm::createLowerConstantIntrinsicsPass();
(void) llvm::createLowerExpectIntrinsicPass();
@@ -145,37 +125,19 @@ namespace {
(void) llvm::createRegionOnlyViewerPass();
(void) llvm::createRegionPrinterPass();
(void) llvm::createRegionViewerPass();
- (void) llvm::createSCCPPass();
(void) llvm::createSafeStackPass();
(void) llvm::createSROAPass();
(void) llvm::createSingleLoopExtractorPass();
- (void) llvm::createStripSymbolsPass();
- (void) llvm::createStripNonDebugSymbolsPass();
- (void) llvm::createStripDeadDebugInfoPass();
- (void) llvm::createStripDeadPrototypesPass();
(void) llvm::createTailCallEliminationPass();
(void)llvm::createTLSVariableHoistPass();
- (void) llvm::createJumpThreadingPass();
- (void) llvm::createDFAJumpThreadingPass();
(void) llvm::createUnifyFunctionExitNodesPass();
(void) llvm::createInstCountPass();
(void) llvm::createConstantHoistingPass();
(void) llvm::createCodeGenPreparePass();
(void) llvm::createEarlyCSEPass();
- (void) llvm::createGVNHoistPass();
(void) llvm::createMergedLoadStoreMotionPass();
(void) llvm::createGVNPass();
- (void) llvm::createNewGVNPass();
- (void) llvm::createMemCpyOptPass();
- (void) llvm::createLoopDeletionPass();
(void) llvm::createPostDomTree();
- (void) llvm::createInstructionNamerPass();
- (void) llvm::createMetaRenamerPass();
- (void) llvm::createAttributorLegacyPass();
- (void) llvm::createAttributorCGSCCLegacyPass();
- (void) llvm::createPostOrderFunctionAttrsLegacyPass();
- (void) llvm::createReversePostOrderFunctionAttrsPass();
- (void) llvm::createMergeFunctionsPass();
(void) llvm::createMergeICmpsLegacyPass();
(void) llvm::createExpandLargeDivRemPass();
(void) llvm::createExpandMemCmpPass();
@@ -184,33 +146,17 @@ namespace {
llvm::raw_string_ostream os(buf);
(void) llvm::createPrintModulePass(os);
(void) llvm::createPrintFunctionPass(os);
- (void) llvm::createModuleDebugInfoPrinterPass();
- (void) llvm::createPartialInliningPass();
- (void) llvm::createLintLegacyPassPass();
(void) llvm::createSinkingPass();
(void) llvm::createLowerAtomicPass();
- (void) llvm::createCorrelatedValuePropagationPass();
- (void) llvm::createMemDepPrinter();
- (void) llvm::createLoopVectorizePass();
- (void) llvm::createSLPVectorizerPass();
(void) llvm::createLoadStoreVectorizerPass();
- (void) llvm::createVectorCombinePass();
(void) llvm::createPartiallyInlineLibCallsPass();
(void) llvm::createScalarizerPass();
(void) llvm::createSeparateConstOffsetFromGEPPass();
(void) llvm::createSpeculativeExecutionPass();
(void) llvm::createSpeculativeExecutionIfHasBranchDivergencePass();
- (void) llvm::createRewriteSymbolsPass();
(void) llvm::createStraightLineStrengthReducePass();
- (void) llvm::createMemDerefPrinter();
- (void) llvm::createMustExecutePrinter();
- (void) llvm::createMustBeExecutedContextPrinter();
- (void) llvm::createFloat2IntPass();
- (void) llvm::createEliminateAvailableExternallyPass();
(void)llvm::createScalarizeMaskedMemIntrinLegacyPass();
- (void) llvm::createWarnMissedTransformationsPass();
- (void) llvm::createHardwareLoopsPass();
- (void) llvm::createInjectTLIMappingsLegacyPass();
+ (void) llvm::createHardwareLoopsLegacyPass();
(void) llvm::createUnifyLoopExitsPass();
(void) llvm::createFixIrreduciblePass();
(void)llvm::createSelectOptimizePass();
diff --git a/llvm/include/llvm/MC/DXContainerPSVInfo.h b/llvm/include/llvm/MC/DXContainerPSVInfo.h
new file mode 100644
index 000000000000..e17054365d5c
--- /dev/null
+++ b/llvm/include/llvm/MC/DXContainerPSVInfo.h
@@ -0,0 +1,51 @@
+//===- llvm/MC/DXContainerPSVInfo.h - DXContainer PSVInfo -*- C++ -------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_DXCONTAINERPSVINFO_H
+#define LLVM_MC_DXCONTAINERPSVINFO_H
+
+#include "llvm/BinaryFormat/DXContainer.h"
+#include "llvm/TargetParser/Triple.h"
+
+#include <numeric>
+#include <stdint.h>
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+namespace mcdxbc {
+// This data structure is a helper for reading and writing PSV RuntimeInfo data.
+// It is implemented in the BinaryFormat library so that it can be used by both
+// the MC layer and Object tools.
+// This structure is used to represent the extracted data in an inspectable and
+// modifiable format, and can be used to serialize the data back into valid PSV
+// RuntimeInfo.
+struct PSVRuntimeInfo {
+ dxbc::PSV::v2::RuntimeInfo BaseData;
+ std::vector<dxbc::PSV::v2::ResourceBindInfo> Resources;
+
+ // Serialize PSVInfo into the provided raw_ostream. The version field
+ // specifies the data version to encode, the default value specifies encoding
+ // the highest supported version.
+ void write(raw_ostream &OS,
+ uint32_t Version = std::numeric_limits<uint32_t>::max()) const;
+
+ void swapBytes(Triple::EnvironmentType Stage) {
+ BaseData.swapBytes();
+ BaseData.swapBytes(Stage);
+ for (auto &Res : Resources)
+ Res.swapBytes();
+ }
+};
+
+} // namespace mcdxbc
+} // namespace llvm
+
+#endif // LLVM_MC_DXCONTAINERPSVINFO_H
diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index 354f9d8e993f..5e08fb41679b 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -25,7 +25,8 @@ class MCRelaxableFragment;
class MCSymbol;
class MCAsmLayout;
class MCAssembler;
-class MCCFIInstruction;
+class MCContext;
+struct MCDwarfFrameInfo;
struct MCFixupKindInfo;
class MCInst;
class MCObjectStreamer;
@@ -40,7 +41,8 @@ class raw_ostream;
/// Generic interface to target specific assembler backends.
class MCAsmBackend {
protected: // Can only create subclasses.
- MCAsmBackend(support::endianness Endian);
+ MCAsmBackend(support::endianness Endian,
+ unsigned RelaxFixupKind = MaxFixupKind);
public:
MCAsmBackend(const MCAsmBackend &) = delete;
@@ -49,6 +51,9 @@ public:
const support::endianness Endian;
+ /// Fixup kind used for linker relaxation. Currently only used by RISC-V.
+ const unsigned RelaxFixupKind;
+
/// Return true if this target might automatically pad instructions and thus
/// need to emit padding enable/disable directives around sensative code.
virtual bool allowAutoPadding() const { return false; }
@@ -124,6 +129,14 @@ public:
llvm_unreachable("Need to implement hook if target has custom fixups");
}
+ virtual bool handleAddSubRelocations(const MCAsmLayout &Layout,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ const MCValue &Target,
+ uint64_t &FixedValue) const {
+ return false;
+ }
+
/// Apply the \p Value for given \p Fixup into the provided data fragment, at
/// the offset specified by the fixup and following the fixup kind as
/// appropriate. Errors (such as an out of range fixup value) should be
@@ -210,8 +223,8 @@ public:
virtual void handleAssemblerFlag(MCAssemblerFlag Flag) {}
/// Generate the compact unwind encoding for the CFI instructions.
- virtual uint32_t
- generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction>) const {
+ virtual uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
+ const MCContext *Ctxt) const {
return 0;
}
@@ -219,6 +232,8 @@ public:
virtual bool isMicroMips(const MCSymbol *Sym) const {
return false;
}
+
+ bool isDarwinCanonicalPersonality(const MCSymbol *Sym) const;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h
index ab8c36a412fc..c28cd1211235 100644
--- a/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/llvm/include/llvm/MC/MCAsmInfo.h
@@ -453,9 +453,9 @@ protected:
/// Exception handling format for the target. Defaults to None.
ExceptionHandling ExceptionsType = ExceptionHandling::None;
- /// True if target uses CFI unwind information for debugging purpose when
- /// `ExceptionsType == ExceptionHandling::None`.
- bool UsesCFIForDebug = false;
+ /// True if target uses CFI unwind information for other purposes than EH
+ /// (debugging / sanitizers) when `ExceptionsType == ExceptionHandling::None`.
+ bool UsesCFIWithoutEH = false;
/// Windows exception handling data (.pdata) encoding. Defaults to Invalid.
WinEH::EncodingType WinEHEncodingType = WinEH::EncodingType::Invalid;
@@ -785,7 +785,9 @@ public:
ExceptionsType = EH;
}
- bool doesUseCFIForDebug() const { return UsesCFIForDebug; }
+ bool usesCFIWithoutEH() const {
+ return ExceptionsType == ExceptionHandling::None && UsesCFIWithoutEH;
+ }
/// Returns true if the exception handling method for the platform uses call
/// frame information to unwind.
diff --git a/llvm/include/llvm/MC/MCAsmMacro.h b/llvm/include/llvm/MC/MCAsmMacro.h
index e3d6a858132d..e2989c09017a 100644
--- a/llvm/include/llvm/MC/MCAsmMacro.h
+++ b/llvm/include/llvm/MC/MCAsmMacro.h
@@ -46,7 +46,7 @@ public:
Slash, // '/'
BackSlash, // '\'
LParen, RParen, LBrac, RBrac, LCurly, RCurly,
- Star, Dot, Comma, Dollar, Equal, EqualEqual,
+ Question, Star, Dot, Comma, Dollar, Equal, EqualEqual,
Pipe, PipePipe, Caret,
Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
@@ -63,7 +63,7 @@ public:
};
private:
- TokenKind Kind;
+ TokenKind Kind = TokenKind::Eof;
/// A reference to the entire token contents; this is always a pointer into
/// a memory buffer owned by the source manager.
diff --git a/llvm/include/llvm/MC/MCAssembler.h b/llvm/include/llvm/MC/MCAssembler.h
index 80aa97c315da..5e1fc738b1da 100644
--- a/llvm/include/llvm/MC/MCAssembler.h
+++ b/llvm/include/llvm/MC/MCAssembler.h
@@ -473,8 +473,7 @@ public:
/// @{
bool registerSection(MCSection &Section);
-
- void registerSymbol(const MCSymbol &Symbol, bool *Created = nullptr);
+ bool registerSymbol(const MCSymbol &Symbol);
MutableArrayRef<std::pair<std::string, size_t>> getFileNames() {
return FileNames;
diff --git a/llvm/include/llvm/MC/MCCodeEmitter.h b/llvm/include/llvm/MC/MCCodeEmitter.h
index 2794acc0753f..a86b98c145fc 100644
--- a/llvm/include/llvm/MC/MCCodeEmitter.h
+++ b/llvm/include/llvm/MC/MCCodeEmitter.h
@@ -22,6 +22,13 @@ class MCCodeEmitter {
protected: // Can only create subclasses.
MCCodeEmitter();
+ /// EncodeInstruction - Encode the given \p Inst to bytes on the output stream
+ /// \p OS. Allows for an implementation of encodeInstruction that uses streams
+ /// instead of a SmallVector.
+ virtual void encodeInstruction(const MCInst &Inst, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {}
+
public:
MCCodeEmitter(const MCCodeEmitter &) = delete;
MCCodeEmitter &operator=(const MCCodeEmitter &) = delete;
@@ -30,17 +37,16 @@ public:
/// Lifetime management
virtual void reset() {}
- /// Emit the prefixes of given instruction on the output stream.
+ /// Append the prefixes of given instruction to the code buffer.
///
/// \param Inst a single low-level machine instruction.
- /// \param OS output stream.
- virtual void emitPrefix(const MCInst &Inst, raw_ostream &OS,
+ /// \param CB code buffer
+ virtual void emitPrefix(const MCInst &Inst, SmallVectorImpl<char> &CB,
const MCSubtargetInfo &STI) const {}
- /// EncodeInstruction - Encode the given \p Inst to bytes on the output
- /// stream \p OS.
- virtual void encodeInstruction(const MCInst &Inst, raw_ostream &OS,
+ /// EncodeInstruction - Encode the given \p Inst to bytes and append to \p CB.
+ virtual void encodeInstruction(const MCInst &Inst, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const = 0;
+ const MCSubtargetInfo &STI) const;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCCodeView.h b/llvm/include/llvm/MC/MCCodeView.h
index 3d15c4009e43..3e997b1be3b8 100644
--- a/llvm/include/llvm/MC/MCCodeView.h
+++ b/llvm/include/llvm/MC/MCCodeView.h
@@ -146,6 +146,9 @@ public:
CodeViewContext();
~CodeViewContext();
+ CodeViewContext &operator=(const CodeViewContext &other) = delete;
+ CodeViewContext(const CodeViewContext &other) = delete;
+
bool isValidFileNumber(unsigned FileNumber) const;
bool addFile(MCStreamer &OS, unsigned FileNumber, StringRef Filename,
ArrayRef<uint8_t> ChecksumBytes, uint8_t ChecksumKind);
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 981b3cd570c5..68d6f3e59d2d 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -101,7 +101,7 @@ private:
Triple TT;
/// The SourceMgr for this object, if any.
- const SourceMgr *SrcMgr;
+ const SourceMgr *SrcMgr = nullptr;
/// The SourceMgr for inline assembly, if any.
std::unique_ptr<SourceMgr> InlineSrcMgr;
@@ -110,16 +110,16 @@ private:
DiagHandlerTy DiagHandler;
/// The MCAsmInfo for this target.
- const MCAsmInfo *MAI;
+ const MCAsmInfo *MAI = nullptr;
/// The MCRegisterInfo for this target.
- const MCRegisterInfo *MRI;
+ const MCRegisterInfo *MRI = nullptr;
/// The MCObjectFileInfo for this target.
- const MCObjectFileInfo *MOFI;
+ const MCObjectFileInfo *MOFI = nullptr;
/// The MCSubtargetInfo for this target.
- const MCSubtargetInfo *MSTI;
+ const MCSubtargetInfo *MSTI = nullptr;
std::unique_ptr<CodeViewContext> CVContext;
@@ -173,7 +173,7 @@ private:
unsigned GetInstance(unsigned LocalLabelVal);
/// LLVM_BB_ADDR_MAP version to emit.
- uint8_t BBAddrMapVersion = 1;
+ uint8_t BBAddrMapVersion = 2;
/// The file name of the log file from the environment variable
/// AS_SECURE_LOG_FILE. Which must be set before the .secure_log_unique
@@ -190,7 +190,7 @@ private:
SmallString<128> CompilationDir;
/// Prefix replacement map for source file information.
- std::map<std::string, const std::string, std::greater<>> DebugPrefixMap;
+ SmallVector<std::pair<std::string, std::string>, 0> DebugPrefixMap;
/// The main file name if passed in explicitly.
std::string MainFileName;
@@ -473,9 +473,11 @@ public:
/// \name Symbol Management
/// @{
- /// Create and return a new linker temporary symbol with a unique but
- /// unspecified name.
+ /// Create a new linker temporary symbol with the specified prefix (Name) or
+ /// "tmp". This creates a "l"-prefixed symbol for Mach-O and is identical to
+ /// createNamedTempSymbol for other object file formats.
MCSymbol *createLinkerPrivateTempSymbol();
+ MCSymbol *createLinkerPrivateSymbol(const Twine &Name);
/// Create a temporary symbol with a unique name. The name will be omitted
/// in the symbol table if UseNamesOnTempLabels is false (default except
@@ -506,17 +508,17 @@ public:
/// variable after codegen.
///
/// \param Idx - The index of a local variable passed to \@llvm.localescape.
- MCSymbol *getOrCreateFrameAllocSymbol(StringRef FuncName, unsigned Idx);
+ MCSymbol *getOrCreateFrameAllocSymbol(const Twine &FuncName, unsigned Idx);
- MCSymbol *getOrCreateParentFrameOffsetSymbol(StringRef FuncName);
+ MCSymbol *getOrCreateParentFrameOffsetSymbol(const Twine &FuncName);
- MCSymbol *getOrCreateLSDASymbol(StringRef FuncName);
+ MCSymbol *getOrCreateLSDASymbol(const Twine &FuncName);
/// Get the symbol for \p Name, or null.
MCSymbol *lookupSymbol(const Twine &Name) const;
/// Set value for a symbol.
- void setSymbolValue(MCStreamer &Streamer, StringRef Sym, uint64_t Val);
+ void setSymbolValue(MCStreamer &Streamer, const Twine &Sym, uint64_t Val);
/// getSymbols - Get a reference for the symbol table for clients that
/// want to, for example, iterate over all symbols. 'const' because we
@@ -664,7 +666,7 @@ public:
MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K,
unsigned Flags, const MCSymbolWasm *Group,
unsigned UniqueID, const char *BeginSymName);
-
+
/// Get the section for the provided Section name
MCSectionDXContainer *getDXContainerSection(StringRef Section, SectionKind K);
@@ -788,6 +790,7 @@ public:
void setGenDwarfForAssembly(bool Value) { GenDwarfForAssembly = Value; }
unsigned getGenDwarfFileNumber() { return GenDwarfFileNumber; }
EmitDwarfUnwindType emitDwarfUnwindInfo() const;
+ bool emitCompactUnwindNonCanonical() const;
void setGenDwarfFileNumber(unsigned FileNumber) {
GenDwarfFileNumber = FileNumber;
diff --git a/llvm/include/llvm/MC/MCDXContainerWriter.h b/llvm/include/llvm/MC/MCDXContainerWriter.h
index 8ecb86c8a16f..8e78b1f48e16 100644
--- a/llvm/include/llvm/MC/MCDXContainerWriter.h
+++ b/llvm/include/llvm/MC/MCDXContainerWriter.h
@@ -9,8 +9,8 @@
#ifndef LLVM_MC_MCDXCONTAINERWRITER_H
#define LLVM_MC_MCDXCONTAINERWRITER_H
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
diff --git a/llvm/include/llvm/MC/MCDirectives.h b/llvm/include/llvm/MC/MCDirectives.h
index b9668a0c994f..fcab56ff0a74 100644
--- a/llvm/include/llvm/MC/MCDirectives.h
+++ b/llvm/include/llvm/MC/MCDirectives.h
@@ -46,6 +46,7 @@ enum MCSymbolAttr {
MCSA_WeakDefinition, ///< .weak_definition (MachO)
MCSA_WeakReference, ///< .weak_reference (MachO)
MCSA_WeakDefAutoPrivate, ///< .weak_def_can_be_hidden (MachO)
+ MCSA_WeakAntiDep, ///< .weak_anti_dep (COFF)
MCSA_Memtag, ///< .memtag (ELF)
};
diff --git a/llvm/include/llvm/MC/MCDwarf.h b/llvm/include/llvm/MC/MCDwarf.h
index cbc3b8cff244..715714f8e55d 100644
--- a/llvm/include/llvm/MC/MCDwarf.h
+++ b/llvm/include/llvm/MC/MCDwarf.h
@@ -21,6 +21,7 @@
#include "llvm/MC/StringTableBuilder.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MD5.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/StringSaver.h"
#include <cassert>
#include <cstdint>
@@ -39,7 +40,6 @@ class MCSection;
class MCStreamer;
class MCSymbol;
class raw_ostream;
-class SMLoc;
class SourceMgr;
namespace mcdwarf {
@@ -70,6 +70,10 @@ public:
/// Returns finalized section.
SmallString<0> getFinalizedData();
+
+ /// Adds path \p Path to the line string. Returns offset in the
+ /// .debug_line_str section.
+ size_t addString(StringRef Path);
};
/// Instances of this class represent the name of the dwarf .file directive and
@@ -429,8 +433,8 @@ public:
class MCDwarfLineAddr {
public:
/// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
- static void Encode(MCContext &Context, MCDwarfLineTableParams Params,
- int64_t LineDelta, uint64_t AddrDelta, raw_ostream &OS);
+ static void encode(MCContext &Context, MCDwarfLineTableParams Params,
+ int64_t LineDelta, uint64_t AddrDelta, SmallVectorImpl<char> &OS);
/// Utility function to emit the encoding to a streamer.
static void Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
@@ -506,53 +510,59 @@ private:
int Offset;
unsigned Register2;
};
- unsigned AddressSpace;
+ unsigned AddressSpace = ~0u;
+ SMLoc Loc;
std::vector<char> Values;
std::string Comment;
- MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, StringRef V,
- StringRef Comment = "")
- : Operation(Op), Label(L), Register(R), Offset(O),
+ MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, SMLoc Loc,
+ StringRef V = "", StringRef Comment = "")
+ : Operation(Op), Label(L), Register(R), Offset(O), Loc(Loc),
Values(V.begin(), V.end()), Comment(Comment) {
assert(Op != OpRegister && Op != OpLLVMDefAspaceCfa);
}
- MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R1, unsigned R2)
- : Operation(Op), Label(L), Register(R1), Register2(R2) {
+ MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R1, unsigned R2, SMLoc Loc)
+ : Operation(Op), Label(L), Register(R1), Register2(R2), Loc(Loc) {
assert(Op == OpRegister);
}
- MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, unsigned AS)
- : Operation(Op), Label(L), Register(R), Offset(O), AddressSpace(AS) {
+ MCCFIInstruction(OpType Op, MCSymbol *L, unsigned R, int O, unsigned AS,
+ SMLoc Loc)
+ : Operation(Op), Label(L), Register(R), Offset(O), AddressSpace(AS),
+ Loc(Loc) {
assert(Op == OpLLVMDefAspaceCfa);
}
public:
/// .cfi_def_cfa defines a rule for computing CFA as: take address from
/// Register and add Offset to it.
- static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register,
- int Offset) {
- return MCCFIInstruction(OpDefCfa, L, Register, Offset, "");
+ static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset,
+ SMLoc Loc = {}) {
+ return MCCFIInstruction(OpDefCfa, L, Register, Offset, Loc);
}
/// .cfi_def_cfa_register modifies a rule for computing CFA. From now
/// on Register will be used instead of the old one. Offset remains the same.
- static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register) {
- return MCCFIInstruction(OpDefCfaRegister, L, Register, 0, "");
+ static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register,
+ SMLoc Loc = {}) {
+ return MCCFIInstruction(OpDefCfaRegister, L, Register, 0, Loc);
}
/// .cfi_def_cfa_offset modifies a rule for computing CFA. Register
/// remains the same, but offset is new. Note that it is the absolute offset
/// that will be added to a defined register to the compute CFA address.
- static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset) {
- return MCCFIInstruction(OpDefCfaOffset, L, 0, Offset, "");
+ static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset,
+ SMLoc Loc = {}) {
+ return MCCFIInstruction(OpDefCfaOffset, L, 0, Offset, Loc);
}
/// .cfi_adjust_cfa_offset Same as .cfi_def_cfa_offset, but
/// Offset is a relative value that is added/subtracted from the previous
/// offset.
- static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment) {
- return MCCFIInstruction(OpAdjustCfaOffset, L, 0, Adjustment, "");
+ static MCCFIInstruction createAdjustCfaOffset(MCSymbol *L, int Adjustment,
+ SMLoc Loc = {}) {
+ return MCCFIInstruction(OpAdjustCfaOffset, L, 0, Adjustment, Loc);
}
// FIXME: Update the remaining docs to use the new proposal wording.
@@ -561,82 +571,87 @@ public:
/// `DW_OP_constu AS; DW_OP_aspace_bregx R, B` as a location description.
static MCCFIInstruction createLLVMDefAspaceCfa(MCSymbol *L, unsigned Register,
int Offset,
- unsigned AddressSpace) {
+ unsigned AddressSpace,
+ SMLoc Loc) {
return MCCFIInstruction(OpLLVMDefAspaceCfa, L, Register, Offset,
- AddressSpace);
+ AddressSpace, Loc);
}
/// .cfi_offset Previous value of Register is saved at offset Offset
/// from CFA.
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register,
- int Offset) {
- return MCCFIInstruction(OpOffset, L, Register, Offset, "");
+ int Offset, SMLoc Loc = {}) {
+ return MCCFIInstruction(OpOffset, L, Register, Offset, Loc);
}
/// .cfi_rel_offset Previous value of Register is saved at offset
/// Offset from the current CFA register. This is transformed to .cfi_offset
/// using the known displacement of the CFA register from the CFA.
static MCCFIInstruction createRelOffset(MCSymbol *L, unsigned Register,
- int Offset) {
- return MCCFIInstruction(OpRelOffset, L, Register, Offset, "");
+ int Offset, SMLoc Loc = {}) {
+ return MCCFIInstruction(OpRelOffset, L, Register, Offset, Loc);
}
/// .cfi_register Previous value of Register1 is saved in
/// register Register2.
static MCCFIInstruction createRegister(MCSymbol *L, unsigned Register1,
- unsigned Register2) {
- return MCCFIInstruction(OpRegister, L, Register1, Register2);
+ unsigned Register2, SMLoc Loc = {}) {
+ return MCCFIInstruction(OpRegister, L, Register1, Register2, Loc);
}
/// .cfi_window_save SPARC register window is saved.
- static MCCFIInstruction createWindowSave(MCSymbol *L) {
- return MCCFIInstruction(OpWindowSave, L, 0, 0, "");
+ static MCCFIInstruction createWindowSave(MCSymbol *L, SMLoc Loc = {}) {
+ return MCCFIInstruction(OpWindowSave, L, 0, 0, Loc);
}
/// .cfi_negate_ra_state AArch64 negate RA state.
- static MCCFIInstruction createNegateRAState(MCSymbol *L) {
- return MCCFIInstruction(OpNegateRAState, L, 0, 0, "");
+ static MCCFIInstruction createNegateRAState(MCSymbol *L, SMLoc Loc = {}) {
+ return MCCFIInstruction(OpNegateRAState, L, 0, 0, Loc);
}
/// .cfi_restore says that the rule for Register is now the same as it
/// was at the beginning of the function, after all initial instructions added
/// by .cfi_startproc were executed.
- static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register) {
- return MCCFIInstruction(OpRestore, L, Register, 0, "");
+ static MCCFIInstruction createRestore(MCSymbol *L, unsigned Register,
+ SMLoc Loc = {}) {
+ return MCCFIInstruction(OpRestore, L, Register, 0, Loc);
}
/// .cfi_undefined From now on the previous value of Register can't be
/// restored anymore.
- static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register) {
- return MCCFIInstruction(OpUndefined, L, Register, 0, "");
+ static MCCFIInstruction createUndefined(MCSymbol *L, unsigned Register,
+ SMLoc Loc = {}) {
+ return MCCFIInstruction(OpUndefined, L, Register, 0, Loc);
}
/// .cfi_same_value Current value of Register is the same as in the
/// previous frame. I.e., no restoration is needed.
- static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register) {
- return MCCFIInstruction(OpSameValue, L, Register, 0, "");
+ static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register,
+ SMLoc Loc = {}) {
+ return MCCFIInstruction(OpSameValue, L, Register, 0, Loc);
}
/// .cfi_remember_state Save all current rules for all registers.
- static MCCFIInstruction createRememberState(MCSymbol *L) {
- return MCCFIInstruction(OpRememberState, L, 0, 0, "");
+ static MCCFIInstruction createRememberState(MCSymbol *L, SMLoc Loc = {}) {
+ return MCCFIInstruction(OpRememberState, L, 0, 0, Loc);
}
/// .cfi_restore_state Restore the previously saved state.
- static MCCFIInstruction createRestoreState(MCSymbol *L) {
- return MCCFIInstruction(OpRestoreState, L, 0, 0, "");
+ static MCCFIInstruction createRestoreState(MCSymbol *L, SMLoc Loc = {}) {
+ return MCCFIInstruction(OpRestoreState, L, 0, 0, Loc);
}
/// .cfi_escape Allows the user to add arbitrary bytes to the unwind
/// info.
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals,
- StringRef Comment = "") {
- return MCCFIInstruction(OpEscape, L, 0, 0, Vals, Comment);
+ SMLoc Loc = {}, StringRef Comment = "") {
+ return MCCFIInstruction(OpEscape, L, 0, 0, Loc, Vals, Comment);
}
/// A special wrapper for .cfi_escape that indicates GNU_ARGS_SIZE
- static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size) {
- return MCCFIInstruction(OpGnuArgsSize, L, 0, Size, "");
+ static MCCFIInstruction createGnuArgsSize(MCSymbol *L, int Size,
+ SMLoc Loc = {}) {
+ return MCCFIInstruction(OpGnuArgsSize, L, 0, Size, Loc);
}
OpType getOperation() const { return Operation; }
@@ -674,9 +689,8 @@ public:
return StringRef(&Values[0], Values.size());
}
- StringRef getComment() const {
- return Comment;
- }
+ StringRef getComment() const { return Comment; }
+ SMLoc getLoc() const { return Loc; }
};
struct MCDwarfFrameInfo {
@@ -704,9 +718,8 @@ public:
// This emits the frame info section.
//
static void Emit(MCObjectStreamer &streamer, MCAsmBackend *MAB, bool isEH);
- static void EmitAdvanceLoc(MCObjectStreamer &Streamer, uint64_t AddrDelta);
- static void EncodeAdvanceLoc(MCContext &Context, uint64_t AddrDelta,
- raw_ostream &OS);
+ static void encodeAdvanceLoc(MCContext &Context, uint64_t AddrDelta,
+ SmallVectorImpl<char> &OS);
};
} // end namespace llvm
diff --git a/llvm/include/llvm/MC/MCELFObjectWriter.h b/llvm/include/llvm/MC/MCELFObjectWriter.h
index 5a5238942e80..aca77f5f2687 100644
--- a/llvm/include/llvm/MC/MCELFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCELFObjectWriter.h
@@ -9,12 +9,12 @@
#ifndef LLVM_MC_MCELFOBJECTWRITER_H
#define LLVM_MC_MCELFOBJECTWRITER_H
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <cstdint>
#include <vector>
diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index bf1f32bb91ba..5bc5e04f79ff 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -299,6 +299,7 @@ public:
VK_PPC_TLSGD, // symbol@tlsgd
VK_PPC_AIX_TLSGD, // symbol@gd
VK_PPC_AIX_TLSGDM, // symbol@m
+ VK_PPC_AIX_TLSLE, // symbol@le
VK_PPC_GOT_TLSLD, // symbol@got@tlsld
VK_PPC_GOT_TLSLD_LO, // symbol@got@tlsld@l
VK_PPC_GOT_TLSLD_HI, // symbol@got@tlsld@h
@@ -330,6 +331,7 @@ public:
VK_WASM_MBREL, // Memory address relative to __memory_base
VK_WASM_TBREL, // Table index relative to __table_base
VK_WASM_GOT_TLS, // Wasm global index of TLS symbol.
+ VK_WASM_FUNCINDEX, // Wasm function index.
VK_AMDGPU_GOTPCREL32_LO, // symbol@gotpcrel32@lo
VK_AMDGPU_GOTPCREL32_HI, // symbol@gotpcrel32@hi
diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h
index b6329b131624..7be4792a4521 100644
--- a/llvm/include/llvm/MC/MCFragment.h
+++ b/llvm/include/llvm/MC/MCFragment.h
@@ -75,6 +75,7 @@ private:
protected:
bool HasInstructions;
+ bool LinkerRelaxable = false;
MCFragment(FragmentType Kind, bool HasInstructions,
MCSection *Parent = nullptr);
@@ -246,6 +247,9 @@ public:
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_Data;
}
+
+ bool isLinkerRelaxable() const { return LinkerRelaxable; }
+ void setLinkerRelaxable() { LinkerRelaxable = true; }
};
/// This is a compact (memory-size-wise) fragment for holding an encoded
@@ -311,7 +315,7 @@ class MCAlignFragment : public MCFragment {
unsigned MaxBytesToEmit;
/// When emitting Nops some subtargets have specific nop encodings.
- const MCSubtargetInfo *STI;
+ const MCSubtargetInfo *STI = nullptr;
public:
MCAlignFragment(Align Alignment, int64_t Value, unsigned ValueSize,
@@ -488,6 +492,7 @@ public:
AddrDelta(&AddrDelta) {}
const MCExpr &getAddrDelta() const { return *AddrDelta; }
+ void setAddrDelta(const MCExpr *E) { AddrDelta = E; }
static bool classof(const MCFragment *F) {
return F->getKind() == MCFragment::FT_DwarfFrame;
diff --git a/llvm/include/llvm/MC/MCInstrAnalysis.h b/llvm/include/llvm/MC/MCInstrAnalysis.h
index 8a6bc371672a..aca0f4daeeee 100644
--- a/llvm/include/llvm/MC/MCInstrAnalysis.h
+++ b/llvm/include/llvm/MC/MCInstrAnalysis.h
@@ -168,7 +168,7 @@ public:
/// Returns (PLT virtual address, GOT virtual address) pairs for PLT entries.
virtual std::vector<std::pair<uint64_t, uint64_t>>
findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
- uint64_t GotPltSectionVA, const Triple &TargetTriple) const {
+ const Triple &TargetTriple) const {
return {};
}
};
diff --git a/llvm/include/llvm/MC/MCInstrDesc.h b/llvm/include/llvm/MC/MCInstrDesc.h
index 7719cd081530..0f406cb71950 100644
--- a/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/llvm/include/llvm/MC/MCInstrDesc.h
@@ -209,10 +209,10 @@ public:
unsigned short SchedClass; // enum identifying instr sched class
unsigned char NumImplicitUses; // Num of regs implicitly used
unsigned char NumImplicitDefs; // Num of regs implicitly defined
+ unsigned short ImplicitOffset; // Offset to start of implicit op list
+ unsigned short OpInfoOffset; // Offset to info about operands
uint64_t Flags; // Flags identifying machine instr class
uint64_t TSFlags; // Target Specific Flag values
- const MCPhysReg *ImplicitOps; // List of implicit uses followed by defs
- const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands
/// Returns the value of the specified operand constraint if
/// it is present. Returns -1 if it is not present.
@@ -236,13 +236,9 @@ public:
/// well.
unsigned getNumOperands() const { return NumOperands; }
- using const_opInfo_iterator = const MCOperandInfo *;
-
- const_opInfo_iterator opInfo_begin() const { return OpInfo; }
- const_opInfo_iterator opInfo_end() const { return OpInfo + NumOperands; }
-
ArrayRef<MCOperandInfo> operands() const {
- return ArrayRef(OpInfo, NumOperands);
+ auto OpInfo = reinterpret_cast<const MCOperandInfo *>(this + Opcode + 1);
+ return ArrayRef(OpInfo + OpInfoOffset, NumOperands);
}
/// Return the number of MachineOperands that are register
@@ -568,6 +564,8 @@ public:
/// reading the flags. Likewise, the variable shift instruction on X86 is
/// marked as implicitly reading the 'CL' register, which it always does.
ArrayRef<MCPhysReg> implicit_uses() const {
+ auto ImplicitOps =
+ reinterpret_cast<const MCPhysReg *>(this + Opcode + 1) + ImplicitOffset;
return {ImplicitOps, NumImplicitUses};
}
@@ -580,6 +578,8 @@ public:
/// registers. For that instruction, this will return a list containing the
/// EAX/EDX/EFLAGS registers.
ArrayRef<MCPhysReg> implicit_defs() const {
+ auto ImplicitOps =
+ reinterpret_cast<const MCPhysReg *>(this + Opcode + 1) + ImplicitOffset;
return {ImplicitOps + NumImplicitUses, NumImplicitDefs};
}
diff --git a/llvm/include/llvm/MC/MCMachObjectWriter.h b/llvm/include/llvm/MC/MCMachObjectWriter.h
index 15e4652bc05d..05d816671b1a 100644
--- a/llvm/include/llvm/MC/MCMachObjectWriter.h
+++ b/llvm/include/llvm/MC/MCMachObjectWriter.h
@@ -32,7 +32,7 @@ class MCMachObjectTargetWriter : public MCObjectTargetWriter {
protected:
uint32_t CPUSubtype;
public:
- unsigned LocalDifference_RIT;
+ unsigned LocalDifference_RIT = 0;
protected:
MCMachObjectTargetWriter(bool Is64Bit_, uint32_t CPUType_,
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index ce200a41cfa8..54f696cb795f 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -13,10 +13,10 @@
#ifndef LLVM_MC_MCOBJECTFILEINFO_H
#define LLVM_MC_MCOBJECTFILEINFO_H
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/Swift.h"
#include "llvm/MC/MCSection.h"
#include "llvm/Support/VersionTuple.h"
+#include "llvm/TargetParser/Triple.h"
#include <array>
#include <optional>
@@ -227,6 +227,7 @@ protected:
// GOFF specific sections.
MCSection *PPA1Section = nullptr;
+ MCSection *ADASection = nullptr;
// XCOFF specific sections
MCSection *TOCBaseSection = nullptr;
@@ -430,6 +431,7 @@ public:
// GOFF specific sections.
MCSection *getPPA1Section() const { return PPA1Section; }
+ MCSection *getADASection() const { return ADASection; }
// XCOFF specific sections
MCSection *getTOCBaseSection() const { return TOCBaseSection; }
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index 52ddaf348829..5e5b4b315017 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -46,7 +46,7 @@ class MCObjectStreamer : public MCStreamer {
bool EmitDebugFrame;
SmallVector<MCSymbol *, 2> PendingLabels;
SmallSetVector<MCSection *, 4> PendingLabelSections;
- unsigned CurSubsectionIdx;
+ unsigned CurSubsectionIdx = 0;
struct PendingMCFixup {
const MCSymbol *Sym;
MCFixup Fixup;
@@ -113,9 +113,9 @@ protected:
void addPendingLabel(MCSymbol* label);
/// If any labels have been emitted but not assigned fragments in the current
- /// Section and Subsection, ensure that they get assigned, either to fragment
- /// F if possible or to a new data fragment. Optionally, one can provide an
- /// offset \p FOffset as a symbol offset within the fragment.
+ /// Section and Subsection, ensure that they get assigned to fragment F.
+ /// Optionally, one can provide an offset \p FOffset as a symbol offset within
+ /// the fragment.
void flushPendingLabels(MCFragment *F, uint64_t FOffset = 0);
public:
@@ -168,7 +168,7 @@ public:
unsigned PointerSize) override;
void emitDwarfLineEndEntry(MCSection *Section, MCSymbol *LastLabel) override;
void emitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
- const MCSymbol *Label);
+ const MCSymbol *Label, SMLoc Loc);
void emitCVLocDirective(unsigned FunctionId, unsigned FileNo, unsigned Line,
unsigned Column, bool PrologueEnd, bool IsStmt,
StringRef FileName, SMLoc Loc) override;
diff --git a/llvm/include/llvm/MC/MCObjectWriter.h b/llvm/include/llvm/MC/MCObjectWriter.h
index 468a25382119..8c1045237393 100644
--- a/llvm/include/llvm/MC/MCObjectWriter.h
+++ b/llvm/include/llvm/MC/MCObjectWriter.h
@@ -9,8 +9,8 @@
#ifndef LLVM_MC_MCOBJECTWRITER_H
#define LLVM_MC_MCOBJECTWRITER_H
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/TargetParser/Triple.h"
#include <cstdint>
namespace llvm {
@@ -110,6 +110,9 @@ public:
unsigned FunctionSize, bool hasDebug) {
report_fatal_error("addExceptionEntry is only supported on XCOFF targets");
}
+ virtual void addCInfoSymEntry(StringRef Name, StringRef Metadata) {
+ report_fatal_error("addCInfoSymEntry is only supported on XCOFF targets");
+ }
/// Write the object file and returns the number of bytes written.
///
/// This routine is called by the assembler after layout and relaxation is
diff --git a/llvm/include/llvm/MC/MCParser/AsmLexer.h b/llvm/include/llvm/MC/MCParser/AsmLexer.h
index e187a28f267d..735b0c114f2a 100644
--- a/llvm/include/llvm/MC/MCParser/AsmLexer.h
+++ b/llvm/include/llvm/MC/MCParser/AsmLexer.h
@@ -55,7 +55,7 @@ public:
private:
bool isAtStartOfComment(const char *Ptr);
bool isAtStatementSeparator(const char *Ptr);
- int getNextChar();
+ [[nodiscard]] int getNextChar();
int peekNextChar();
AsmToken ReturnError(const char *Loc, const std::string &Msg);
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
index 850a9cffe73a..9affb1f980bb 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -45,7 +45,7 @@ class MCAsmLexer {
protected: // Can only create subclasses.
const char *TokStart = nullptr;
bool SkipSpace = true;
- bool AllowAtInIdentifier;
+ bool AllowAtInIdentifier = false;
bool AllowHashInIdentifier = false;
bool IsAtStartOfStatement = true;
bool LexMasmHexFloats = false;
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index bf9e3b594566..faa72d5f3144 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -83,11 +83,11 @@ struct InlineAsmIdentifierInfo {
Var.Type = type;
Var.Length = size / type;
}
- InlineAsmIdentifierInfo() : Kind(IK_Invalid) {}
+ InlineAsmIdentifierInfo() = default;
private:
// Discriminate using the current kind.
- IdKind Kind;
+ IdKind Kind = IK_Invalid;
};
// Generic type information for an assembly object.
@@ -236,7 +236,7 @@ public:
bool printPendingErrors() {
bool rv = !PendingErrors.empty();
- for (auto Err : PendingErrors) {
+ for (auto &Err : PendingErrors) {
printError(Err.Loc, Twine(Err.Msg), Err.Range);
}
PendingErrors.clear();
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h b/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
index cbabc2c9d69d..e596a7195447 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
@@ -22,7 +22,7 @@ class Twine;
/// which is implemented by target and object file assembly parser
/// implementations.
class MCAsmParserExtension {
- MCAsmParser *Parser;
+ MCAsmParser *Parser = nullptr;
protected:
MCAsmParserExtension();
diff --git a/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index 22f66a011ece..0c9668904e82 100644
--- a/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/llvm/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -24,7 +24,7 @@ class raw_ostream;
class MCParsedAsmOperand {
/// MCOperandNum - The corresponding MCInst operand number. Only valid when
/// parsing MS-style inline assembly.
- unsigned MCOperandNum;
+ unsigned MCOperandNum = ~0u;
/// Constraint - The constraint on this operand. Only valid when parsing
/// MS-style inline assembly.
diff --git a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
index 054669c9f45e..1d87f0131efc 100644
--- a/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
@@ -14,8 +14,8 @@
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/MC/MCTargetOptions.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/SMLoc.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <cstdint>
#include <memory>
@@ -60,19 +60,17 @@ const char AsmRewritePrecedence [] = {
2 // AOK_IntelExpr
};
-// Represnt the various parts which makes up an intel expression,
+// Represent the various parts which make up an intel expression,
// used for emitting compound intel expressions
struct IntelExpr {
- bool NeedBracs;
- int64_t Imm;
+ bool NeedBracs = false;
+ int64_t Imm = 0;
StringRef BaseReg;
StringRef IndexReg;
StringRef OffsetName;
- unsigned Scale;
+ unsigned Scale = 1;
- IntelExpr()
- : NeedBracs(false), Imm(0), BaseReg(StringRef()), IndexReg(StringRef()),
- OffsetName(StringRef()), Scale(1) {}
+ IntelExpr() = default;
// [BaseReg + IndexReg * ScaleExpression + OFFSET name + ImmediateExpression]
IntelExpr(StringRef baseReg, StringRef indexReg, unsigned scale,
StringRef offsetName, int64_t imm, bool needBracs)
@@ -130,6 +128,43 @@ enum OperandMatchResultTy {
MatchOperand_ParseFail // operand matched but had errors
};
+/// Ternary parse status returned by various parse* methods.
+class ParseStatus {
+ enum class StatusTy { Success, Failure, NoMatch } Status;
+
+public:
+#if __cplusplus >= 202002L
+ using enum StatusTy;
+#else
+ static constexpr StatusTy Success = StatusTy::Success;
+ static constexpr StatusTy Failure = StatusTy::Failure;
+ static constexpr StatusTy NoMatch = StatusTy::NoMatch;
+#endif
+
+ constexpr ParseStatus() : Status(NoMatch) {}
+
+ constexpr ParseStatus(StatusTy Status) : Status(Status) {}
+
+ constexpr ParseStatus(bool Error) : Status(Error ? Failure : Success) {}
+
+ template <typename T> constexpr ParseStatus(T) = delete;
+
+ constexpr bool isSuccess() const { return Status == StatusTy::Success; }
+ constexpr bool isFailure() const { return Status == StatusTy::Failure; }
+ constexpr bool isNoMatch() const { return Status == StatusTy::NoMatch; }
+
+ // Allow implicit conversions to / from OperandMatchResultTy.
+ constexpr ParseStatus(OperandMatchResultTy R)
+ : Status(R == MatchOperand_Success ? Success
+ : R == MatchOperand_ParseFail ? Failure
+ : NoMatch) {}
+ constexpr operator OperandMatchResultTy() const {
+ return isSuccess() ? MatchOperand_Success
+ : isFailure() ? MatchOperand_ParseFail
+ : MatchOperand_NoMatch;
+ }
+};
+
enum class DiagnosticPredicateTy {
Match,
NearMatch,
@@ -410,6 +445,7 @@ public:
}
/// ParseDirective - Parse a target specific assembler directive
+ /// This method is deprecated, use 'parseDirective' instead.
///
/// The parser is positioned following the directive name. The target
/// specific directive parser should parse the entire directive doing or
@@ -419,7 +455,19 @@ public:
/// end-of-statement token and false is returned.
///
/// \param DirectiveID - the identifier token of the directive.
- virtual bool ParseDirective(AsmToken DirectiveID) = 0;
+ virtual bool ParseDirective(AsmToken DirectiveID) { return true; }
+
+ /// Parses a target-specific assembler directive.
+ ///
+ /// The parser is positioned following the directive name. The target-specific
+ /// directive parser should parse the entire directive doing or recording any
+ /// target-specific work, or emit an error. On success, the entire line should
+ /// be parsed up to and including the end-of-statement token. On failure, the
+ /// parser is not required to read to the end of the line. If the directive is
+ /// not target-specific, no tokens should be consumed and NoMatch is returned.
+ ///
+ /// \param DirectiveID - The token identifying the directive.
+ virtual ParseStatus parseDirective(AsmToken DirectiveID);
/// MatchAndEmitInstruction - Recognize a series of operands of a parsed
/// instruction as an actual MCInst and emit it to the specified MCStreamer.
diff --git a/llvm/include/llvm/MC/MCPseudoProbe.h b/llvm/include/llvm/MC/MCPseudoProbe.h
index f5796f5c7948..4904cb4ca545 100644
--- a/llvm/include/llvm/MC/MCPseudoProbe.h
+++ b/llvm/include/llvm/MC/MCPseudoProbe.h
@@ -30,12 +30,15 @@
// 0 - block probe, 1 - indirect call, 2 - direct call
// ATTRIBUTE (uint3)
// 1 - reserved
+// 2 - Sentinel
+// 4 - HasDiscriminator
// ADDRESS_TYPE (uint1)
// 0 - code address for regular probes (for downwards compatibility)
// - GUID of linkage name for sentinel probes
// 1 - address delta
// CODE_ADDRESS (uint64 or ULEB128)
// code address or address delta, depending on ADDRESS_TYPE
+// DISCRIMINATOR (ULEB128) if HasDiscriminator
// INLINED FUNCTION RECORDS
// A list of NUM_INLINED_FUNCTIONS entries describing each of the inlined
// callees. Each record contains:
@@ -108,6 +111,7 @@ class MCPseudoProbeBase {
protected:
uint64_t Guid;
uint64_t Index;
+ uint32_t Discriminator;
uint8_t Attributes;
uint8_t Type;
// The value should be equal to PseudoProbeReservedId::Last + 1 which is
@@ -116,8 +120,8 @@ protected:
const static uint32_t PseudoProbeFirstId = 1;
public:
- MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T)
- : Guid(G), Index(I), Attributes(At), Type(T) {}
+ MCPseudoProbeBase(uint64_t G, uint64_t I, uint64_t At, uint8_t T, uint32_t D)
+ : Guid(G), Index(I), Discriminator(D), Attributes(At), Type(T) {}
bool isEntry() const { return Index == PseudoProbeFirstId; }
@@ -125,6 +129,8 @@ public:
uint64_t getIndex() const { return Index; }
+ uint32_t getDiscriminator() const { return Discriminator; }
+
uint8_t getAttributes() const { return Attributes; }
uint8_t getType() const { return Type; }
@@ -155,8 +161,9 @@ class MCPseudoProbe : public MCPseudoProbeBase {
public:
MCPseudoProbe(MCSymbol *Label, uint64_t Guid, uint64_t Index, uint64_t Type,
- uint64_t Attributes)
- : MCPseudoProbeBase(Guid, Index, Attributes, Type), Label(Label) {
+ uint64_t Attributes, uint32_t Discriminator)
+ : MCPseudoProbeBase(Guid, Index, Attributes, Type, Discriminator),
+ Label(Label) {
assert(Type <= 0xFF && "Probe type too big to encode, exceeding 2^8");
assert(Attributes <= 0xFF &&
"Probe attributes too big to encode, exceeding 2^16");
@@ -175,8 +182,9 @@ class MCDecodedPseudoProbe : public MCPseudoProbeBase {
public:
MCDecodedPseudoProbe(uint64_t Ad, uint64_t G, uint32_t I, PseudoProbeType K,
- uint8_t At, MCDecodedPseudoProbeInlineTree *Tree)
- : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K)), Address(Ad),
+ uint8_t At, uint32_t D,
+ MCDecodedPseudoProbeInlineTree *Tree)
+ : MCPseudoProbeBase(G, I, At, static_cast<uint8_t>(K), D), Address(Ad),
InlineTree(Tree){};
uint64_t getAddress() const { return Address; }
@@ -235,7 +243,8 @@ public:
std::vector<ProbeType> &getProbes() { return Probes; }
void addProbes(ProbeType Probe) { Probes.push_back(Probe); }
// Caller node of the inline site
- MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent;
+ MCPseudoProbeInlineTreeBase<ProbeType, DerivedProbeInlineTreeType> *Parent =
+ nullptr;
DerivedProbeInlineTreeType *getOrAddNode(const InlineSite &Site) {
auto Ret = Children.emplace(
Site, std::make_unique<DerivedProbeInlineTreeType>(Site));
diff --git a/llvm/include/llvm/MC/MCRegister.h b/llvm/include/llvm/MC/MCRegister.h
index 1e8c747785eb..530c1870abd6 100644
--- a/llvm/include/llvm/MC/MCRegister.h
+++ b/llvm/include/llvm/MC/MCRegister.h
@@ -20,13 +20,22 @@ namespace llvm {
/// but not necessarily virtual registers.
using MCPhysReg = uint16_t;
+/// Register units are used to compute register aliasing. Every register has at
+/// least one register unit, but it can have more. Two registers overlap if and
+/// only if they have a common register unit.
+///
+/// A target with a complicated sub-register structure will typically have many
+/// fewer register units than actual registers. MCRI::getNumRegUnits() returns
+/// the number of register units in the target.
+using MCRegUnit = unsigned;
+
/// Wrapper class representing physical registers. Should be passed by value.
class MCRegister {
friend hash_code hash_value(const MCRegister &);
unsigned Reg;
public:
- constexpr MCRegister(unsigned Val = 0): Reg(Val) {}
+ constexpr MCRegister(unsigned Val = 0) : Reg(Val) {}
// Register numbers can represent physical registers, virtual registers, and
// sometimes stack slots. The unsigned values are divided into these ranges:
@@ -49,19 +58,17 @@ public:
/// register. StackSlot values do not exist in the MC layer, see
/// Register::isStackSlot() for the more information on them.
///
- static bool isStackSlot(unsigned Reg) {
+ static constexpr bool isStackSlot(unsigned Reg) {
return FirstStackSlot <= Reg && Reg < VirtualRegFlag;
}
/// Return true if the specified register number is in
/// the physical register namespace.
- static bool isPhysicalRegister(unsigned Reg) {
+ static constexpr bool isPhysicalRegister(unsigned Reg) {
return FirstPhysicalReg <= Reg && Reg < FirstStackSlot;
}
- constexpr operator unsigned() const {
- return Reg;
- }
+ constexpr operator unsigned() const { return Reg; }
/// Check the provided unsigned value is a valid MCRegister.
static MCRegister from(unsigned Val) {
@@ -69,31 +76,37 @@ public:
return MCRegister(Val);
}
- unsigned id() const {
- return Reg;
- }
+ constexpr unsigned id() const { return Reg; }
- bool isValid() const { return Reg != NoRegister; }
+ constexpr bool isValid() const { return Reg != NoRegister; }
/// Comparisons between register objects
- bool operator==(const MCRegister &Other) const { return Reg == Other.Reg; }
- bool operator!=(const MCRegister &Other) const { return Reg != Other.Reg; }
+ constexpr bool operator==(const MCRegister &Other) const {
+ return Reg == Other.Reg;
+ }
+ constexpr bool operator!=(const MCRegister &Other) const {
+ return Reg != Other.Reg;
+ }
/// Comparisons against register constants. E.g.
/// * R == AArch64::WZR
/// * R == 0
/// * R == VirtRegMap::NO_PHYS_REG
- bool operator==(unsigned Other) const { return Reg == Other; }
- bool operator!=(unsigned Other) const { return Reg != Other; }
- bool operator==(int Other) const { return Reg == unsigned(Other); }
- bool operator!=(int Other) const { return Reg != unsigned(Other); }
+ constexpr bool operator==(unsigned Other) const { return Reg == Other; }
+ constexpr bool operator!=(unsigned Other) const { return Reg != Other; }
+ constexpr bool operator==(int Other) const { return Reg == unsigned(Other); }
+ constexpr bool operator!=(int Other) const { return Reg != unsigned(Other); }
// MSVC requires that we explicitly declare these two as well.
- bool operator==(MCPhysReg Other) const { return Reg == unsigned(Other); }
- bool operator!=(MCPhysReg Other) const { return Reg != unsigned(Other); }
+ constexpr bool operator==(MCPhysReg Other) const {
+ return Reg == unsigned(Other);
+ }
+ constexpr bool operator!=(MCPhysReg Other) const {
+ return Reg != unsigned(Other);
+ }
};
// Provide DenseMapInfo for MCRegister
-template<> struct DenseMapInfo<MCRegister> {
+template <> struct DenseMapInfo<MCRegister> {
static inline unsigned getEmptyKey() {
return DenseMapInfo<unsigned>::getEmptyKey();
}
@@ -111,6 +124,6 @@ template<> struct DenseMapInfo<MCRegister> {
inline hash_code hash_value(const MCRegister &Reg) {
return hash_value(Reg.id());
}
-}
+} // namespace llvm
#endif // LLVM_MC_MCREGISTER_H
diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h
index 520b6946db91..ede01d624924 100644
--- a/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -27,6 +27,10 @@
namespace llvm {
+class MCRegUnitIterator;
+class MCSubRegIterator;
+class MCSuperRegIterator;
+
/// MCRegisterClass - Base class of TargetRegisterClass.
class MCRegisterClass {
public:
@@ -111,8 +115,8 @@ struct MCRegisterDesc {
// sub-register in SubRegs.
uint32_t SubRegIndices;
- // RegUnits - Points to the list of register units. The low 4 bits holds the
- // Scale, the high bits hold an offset into DiffLists. See MCRegUnitIterator.
+ // Points to the list of register units. The low bits hold the first regunit
+ // number, the high bits hold an offset into DiffLists. See MCRegUnitIterator.
uint32_t RegUnits;
/// Index into list with lane mask sequences. The sequence contains a lanemask
@@ -161,7 +165,7 @@ private:
unsigned NumClasses; // Number of entries in the array
unsigned NumRegUnits; // Number of regunits.
const MCPhysReg (*RegUnitRoots)[2]; // Pointer to regunit root table.
- const MCPhysReg *DiffLists; // Pointer to the difflists array
+ const int16_t *DiffLists; // Pointer to the difflists array
const LaneBitmask *RegUnitMaskSequences; // Pointer to lane mask sequences
// for register units.
const char *RegStrings; // Pointer to the string table.
@@ -185,159 +189,73 @@ private:
DenseMap<MCRegister, int> L2SEHRegs; // LLVM to SEH regs mapping
DenseMap<MCRegister, int> L2CVRegs; // LLVM to CV regs mapping
-public:
- // Forward declaration to become a friend class of DiffListIterator.
- template <class SubT> class mc_difflist_iterator;
-
- /// DiffListIterator - Base iterator class that can traverse the
- /// differentially encoded register and regunit lists in DiffLists.
- /// Don't use this class directly, use one of the specialized sub-classes
- /// defined below.
- class DiffListIterator {
- uint16_t Val = 0;
- const MCPhysReg *List = nullptr;
-
- protected:
- /// Create an invalid iterator. Call init() to point to something useful.
+ /// Iterator class that can traverse the differentially encoded values in
+ /// DiffLists. Don't use this class directly, use one of the adaptors below.
+ class DiffListIterator
+ : public iterator_facade_base<DiffListIterator, std::forward_iterator_tag,
+ unsigned> {
+ unsigned Val = 0;
+ const int16_t *List = nullptr;
+
+ public:
+ /// Constructs an invalid iterator, which is also the end iterator.
+ /// Call init() to point to something useful.
DiffListIterator() = default;
- /// init - Point the iterator to InitVal, decoding subsequent values from
- /// DiffList. The iterator will initially point to InitVal, sub-classes are
- /// responsible for skipping the seed value if it is not part of the list.
- void init(MCPhysReg InitVal, const MCPhysReg *DiffList) {
+ /// Point the iterator to InitVal, decoding subsequent values from DiffList.
+ void init(unsigned InitVal, const int16_t *DiffList) {
Val = InitVal;
List = DiffList;
}
- /// advance - Move to the next list position, return the applied
- /// differential. This function does not detect the end of the list, that
- /// is the caller's responsibility (by checking for a 0 return value).
- MCRegister advance() {
- assert(isValid() && "Cannot move off the end of the list.");
- MCPhysReg D = *List++;
- Val += D;
- return D;
- }
-
- public:
- /// isValid - returns true if this iterator is not yet at the end.
+ /// Returns true if this iterator is not yet at the end.
bool isValid() const { return List; }
/// Dereference the iterator to get the value at the current position.
- MCRegister operator*() const { return Val; }
+ const unsigned &operator*() const { return Val; }
+ using DiffListIterator::iterator_facade_base::operator++;
/// Pre-increment to move to the next position.
- void operator++() {
+ DiffListIterator &operator++() {
+ assert(isValid() && "Cannot move off the end of the list.");
+ int16_t D = *List++;
+ Val += D;
// The end of the list is encoded as a 0 differential.
- if (!advance())
+ if (!D)
List = nullptr;
+ return *this;
}
- template <class SubT> friend class MCRegisterInfo::mc_difflist_iterator;
- };
-
- /// Forward iterator using DiffListIterator.
- template <class SubT>
- class mc_difflist_iterator
- : public iterator_facade_base<mc_difflist_iterator<SubT>,
- std::forward_iterator_tag, MCPhysReg> {
- MCRegisterInfo::DiffListIterator Iter;
- /// Current value as MCPhysReg, so we can return a reference to it.
- MCPhysReg Val;
-
- protected:
- mc_difflist_iterator(MCRegisterInfo::DiffListIterator Iter) : Iter(Iter) {}
-
- // Allow conversion between instantiations where valid.
- mc_difflist_iterator(MCRegister Reg, const MCPhysReg *DiffList) {
- Iter.init(Reg, DiffList);
- Val = *Iter;
- }
-
- public:
- // Allow default construction to build variables, but this doesn't build
- // a useful iterator.
- mc_difflist_iterator() = default;
-
- /// Return an iterator past the last element.
- static SubT end() {
- SubT End;
- End.Iter.List = nullptr;
- return End;
- }
-
- bool operator==(const mc_difflist_iterator &Arg) const {
- return Iter.List == Arg.Iter.List;
+ bool operator==(const DiffListIterator &Other) const {
+ return List == Other.List;
}
-
- const MCPhysReg &operator*() const { return Val; }
-
- using mc_difflist_iterator::iterator_facade_base::operator++;
- void operator++() {
- assert(Iter.List && "Cannot increment the end iterator!");
- ++Iter;
- Val = *Iter;
- }
- };
-
- /// Forward iterator over all sub-registers.
- /// TODO: Replace remaining uses of MCSubRegIterator.
- class mc_subreg_iterator : public mc_difflist_iterator<mc_subreg_iterator> {
- public:
- mc_subreg_iterator(MCRegisterInfo::DiffListIterator Iter)
- : mc_difflist_iterator(Iter) {}
- mc_subreg_iterator() = default;
- mc_subreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI)
- : mc_difflist_iterator(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs) {}
- };
-
- /// Forward iterator over all super-registers.
- /// TODO: Replace remaining uses of MCSuperRegIterator.
- class mc_superreg_iterator
- : public mc_difflist_iterator<mc_superreg_iterator> {
- public:
- mc_superreg_iterator(MCRegisterInfo::DiffListIterator Iter)
- : mc_difflist_iterator(Iter) {}
- mc_superreg_iterator() = default;
- mc_superreg_iterator(MCRegister Reg, const MCRegisterInfo *MCRI)
- : mc_difflist_iterator(Reg,
- MCRI->DiffLists + MCRI->get(Reg).SuperRegs) {}
};
+public:
/// Return an iterator range over all sub-registers of \p Reg, excluding \p
/// Reg.
- iterator_range<mc_subreg_iterator> subregs(MCRegister Reg) const {
- return make_range(std::next(mc_subreg_iterator(Reg, this)),
- mc_subreg_iterator::end());
- }
+ iterator_range<MCSubRegIterator> subregs(MCRegister Reg) const;
/// Return an iterator range over all sub-registers of \p Reg, including \p
/// Reg.
- iterator_range<mc_subreg_iterator> subregs_inclusive(MCRegister Reg) const {
- return make_range({Reg, this}, mc_subreg_iterator::end());
- }
+ iterator_range<MCSubRegIterator> subregs_inclusive(MCRegister Reg) const;
/// Return an iterator range over all super-registers of \p Reg, excluding \p
/// Reg.
- iterator_range<mc_superreg_iterator> superregs(MCRegister Reg) const {
- return make_range(std::next(mc_superreg_iterator(Reg, this)),
- mc_superreg_iterator::end());
- }
+ iterator_range<MCSuperRegIterator> superregs(MCRegister Reg) const;
/// Return an iterator range over all super-registers of \p Reg, including \p
/// Reg.
- iterator_range<mc_superreg_iterator>
- superregs_inclusive(MCRegister Reg) const {
- return make_range({Reg, this}, mc_superreg_iterator::end());
- }
+ iterator_range<MCSuperRegIterator> superregs_inclusive(MCRegister Reg) const;
/// Return an iterator range over all sub- and super-registers of \p Reg,
/// including \p Reg.
- detail::concat_range<const MCPhysReg, iterator_range<mc_subreg_iterator>,
- iterator_range<mc_superreg_iterator>>
- sub_and_superregs_inclusive(MCRegister Reg) const {
- return concat<const MCPhysReg>(subregs_inclusive(Reg), superregs(Reg));
- }
+ detail::concat_range<const MCPhysReg, iterator_range<MCSubRegIterator>,
+ iterator_range<MCSuperRegIterator>>
+ sub_and_superregs_inclusive(MCRegister Reg) const;
+
+ /// Returns an iterator range over all regunits for \p Reg.
+ iterator_range<MCRegUnitIterator> regunits(MCRegister Reg) const;
// These iterators are allowed to sub-class DiffListIterator and access
// internal list pointers.
@@ -351,16 +269,11 @@ public:
/// Initialize MCRegisterInfo, called by TableGen
/// auto-generated routines. *DO NOT USE*.
void InitMCRegisterInfo(const MCRegisterDesc *D, unsigned NR, unsigned RA,
- unsigned PC,
- const MCRegisterClass *C, unsigned NC,
- const MCPhysReg (*RURoots)[2],
- unsigned NRU,
- const MCPhysReg *DL,
- const LaneBitmask *RUMS,
- const char *Strings,
- const char *ClassStrings,
- const uint16_t *SubIndices,
- unsigned NumIndices,
+ unsigned PC, const MCRegisterClass *C, unsigned NC,
+ const MCPhysReg (*RURoots)[2], unsigned NRU,
+ const int16_t *DL, const LaneBitmask *RUMS,
+ const char *Strings, const char *ClassStrings,
+ const uint16_t *SubIndices, unsigned NumIndices,
const SubRegCoveredBits *SubIdxRanges,
const uint16_t *RET) {
Desc = D;
@@ -594,15 +507,37 @@ public:
/// MCSubRegIterator enumerates all sub-registers of Reg.
/// If IncludeSelf is set, Reg itself is included in the list.
-class MCSubRegIterator : public MCRegisterInfo::DiffListIterator {
+class MCSubRegIterator
+ : public iterator_adaptor_base<MCSubRegIterator,
+ MCRegisterInfo::DiffListIterator,
+ std::forward_iterator_tag, const MCPhysReg> {
+ // Cache the current value, so that we can return a reference to it.
+ MCPhysReg Val;
+
public:
+ /// Constructs an end iterator.
+ MCSubRegIterator() = default;
+
MCSubRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI,
bool IncludeSelf = false) {
- init(Reg, MCRI->DiffLists + MCRI->get(Reg).SubRegs);
+ assert(MCRegister::isPhysicalRegister(Reg.id()));
+ I.init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SubRegs);
// Initially, the iterator points to Reg itself.
+ Val = MCPhysReg(*I);
if (!IncludeSelf)
++*this;
}
+
+ const MCPhysReg &operator*() const { return Val; }
+
+ using iterator_adaptor_base::operator++;
+ MCSubRegIterator &operator++() {
+ Val = MCPhysReg(*++I);
+ return *this;
+ }
+
+ /// Returns true if this iterator is not yet at the end.
+ bool isValid() const { return I.isValid(); }
};
/// Iterator that enumerates the sub-registers of a Reg and the associated
@@ -641,46 +576,62 @@ public:
/// MCSuperRegIterator enumerates all super-registers of Reg.
/// If IncludeSelf is set, Reg itself is included in the list.
-class MCSuperRegIterator : public MCRegisterInfo::DiffListIterator {
+class MCSuperRegIterator
+ : public iterator_adaptor_base<MCSuperRegIterator,
+ MCRegisterInfo::DiffListIterator,
+ std::forward_iterator_tag, const MCPhysReg> {
+ // Cache the current value, so that we can return a reference to it.
+ MCPhysReg Val;
+
public:
+ /// Constructs an end iterator.
MCSuperRegIterator() = default;
MCSuperRegIterator(MCRegister Reg, const MCRegisterInfo *MCRI,
bool IncludeSelf = false) {
- init(Reg, MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
+ assert(MCRegister::isPhysicalRegister(Reg.id()));
+ I.init(Reg.id(), MCRI->DiffLists + MCRI->get(Reg).SuperRegs);
// Initially, the iterator points to Reg itself.
+ Val = MCPhysReg(*I);
if (!IncludeSelf)
++*this;
}
+
+ const MCPhysReg &operator*() const { return Val; }
+
+ using iterator_adaptor_base::operator++;
+ MCSuperRegIterator &operator++() {
+ Val = MCPhysReg(*++I);
+ return *this;
+ }
+
+ /// Returns true if this iterator is not yet at the end.
+ bool isValid() const { return I.isValid(); }
};
// Definition for isSuperRegister. Put it down here since it needs the
// iterator defined above in addition to the MCRegisterInfo class itself.
inline bool MCRegisterInfo::isSuperRegister(MCRegister RegA, MCRegister RegB) const{
- for (MCSuperRegIterator I(RegA, this); I.isValid(); ++I)
- if (*I == RegB)
- return true;
- return false;
+ return is_contained(superregs(RegA), RegB);
}
//===----------------------------------------------------------------------===//
// Register Units
//===----------------------------------------------------------------------===//
-// Register units are used to compute register aliasing. Every register has at
-// least one register unit, but it can have more. Two registers overlap if and
-// only if they have a common register unit.
-//
-// A target with a complicated sub-register structure will typically have many
-// fewer register units than actual registers. MCRI::getNumRegUnits() returns
-// the number of register units in the target.
-
// MCRegUnitIterator enumerates a list of register units for Reg. The list is
// in ascending numerical order.
-class MCRegUnitIterator : public MCRegisterInfo::DiffListIterator {
+class MCRegUnitIterator
+ : public iterator_adaptor_base<MCRegUnitIterator,
+ MCRegisterInfo::DiffListIterator,
+ std::forward_iterator_tag, const MCRegUnit> {
+ // The value must be kept in sync with RegisterInfoEmitter.cpp.
+ static constexpr unsigned RegUnitBits = 12;
+ // Cache the current value, so that we can return a reference to it.
+ MCRegUnit Val;
+
public:
- /// MCRegUnitIterator - Create an iterator that traverses the register units
- /// in Reg.
+ /// Constructs an end iterator.
MCRegUnitIterator() = default;
MCRegUnitIterator(MCRegister Reg, const MCRegisterInfo *MCRI) {
@@ -688,24 +639,22 @@ public:
assert(MCRegister::isPhysicalRegister(Reg.id()));
// Decode the RegUnits MCRegisterDesc field.
unsigned RU = MCRI->get(Reg).RegUnits;
- unsigned Scale = RU & 15;
- unsigned Offset = RU >> 4;
-
- // Initialize the iterator to Reg * Scale, and the List pointer to
- // DiffLists + Offset.
- init(Reg * Scale, MCRI->DiffLists + Offset);
-
- // That may not be a valid unit, we need to advance by one to get the real
- // unit number. The first differential can be 0 which would normally
- // terminate the list, but since we know every register has at least one
- // unit, we can allow a 0 differential here.
- advance();
+ unsigned FirstRU = RU & ((1u << RegUnitBits) - 1);
+ unsigned Offset = RU >> RegUnitBits;
+ I.init(FirstRU, MCRI->DiffLists + Offset);
+ Val = MCRegUnit(*I);
}
+ const MCRegUnit &operator*() const { return Val; }
+
+ using iterator_adaptor_base::operator++;
MCRegUnitIterator &operator++() {
- MCRegisterInfo::DiffListIterator::operator++();
+ Val = MCRegUnit(*++I);
return *this;
}
+
+ /// Returns true if this iterator is not yet at the end.
+ bool isValid() const { return I.isValid(); }
};
/// MCRegUnitMaskIterator enumerates a list of register units and their
@@ -842,6 +791,37 @@ public:
}
};
+inline iterator_range<MCSubRegIterator>
+MCRegisterInfo::subregs(MCRegister Reg) const {
+ return make_range({Reg, this, /*IncludeSelf=*/false}, MCSubRegIterator());
+}
+
+inline iterator_range<MCSubRegIterator>
+MCRegisterInfo::subregs_inclusive(MCRegister Reg) const {
+ return make_range({Reg, this, /*IncludeSelf=*/true}, MCSubRegIterator());
+}
+
+inline iterator_range<MCSuperRegIterator>
+MCRegisterInfo::superregs(MCRegister Reg) const {
+ return make_range({Reg, this, /*IncludeSelf=*/false}, MCSuperRegIterator());
+}
+
+inline iterator_range<MCSuperRegIterator>
+MCRegisterInfo::superregs_inclusive(MCRegister Reg) const {
+ return make_range({Reg, this, /*IncludeSelf=*/true}, MCSuperRegIterator());
+}
+
+inline detail::concat_range<const MCPhysReg, iterator_range<MCSubRegIterator>,
+ iterator_range<MCSuperRegIterator>>
+MCRegisterInfo::sub_and_superregs_inclusive(MCRegister Reg) const {
+ return concat<const MCPhysReg>(subregs_inclusive(Reg), superregs(Reg));
+}
+
+inline iterator_range<MCRegUnitIterator>
+MCRegisterInfo::regunits(MCRegister Reg) const {
+ return make_range({Reg, this}, MCRegUnitIterator());
+}
+
} // end namespace llvm
#endif // LLVM_MC_MCREGISTERINFO_H
diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h
index 6dffc158af50..e6cf27ce2d65 100644
--- a/llvm/include/llvm/MC/MCSchedule.h
+++ b/llvm/include/llvm/MC/MCSchedule.h
@@ -58,14 +58,26 @@ struct MCProcResourceDesc {
}
};
-/// Identify one of the processor resource kinds consumed by a particular
-/// scheduling class for the specified number of cycles.
+/// Identify one of the processor resource kinds consumed by a
+/// particular scheduling class for the specified number of cycles.
+/// TODO: consider renaming the field `StartAtCycle` and `Cycles` to
+/// `AcquireAtCycle` and `ReleaseAtCycle` respectively, to stress the
+/// fact that resource allocation is now represented as an interval,
+/// relatively to the issue cycle of the instruction.
struct MCWriteProcResEntry {
uint16_t ProcResourceIdx;
+ /// Cycle at which the resource will be released by an instruction,
+ /// relatively to the cycle in which the instruction is issued
+ /// (assuming no stalls inbetween).
uint16_t Cycles;
+ /// Cycle at which the resource will be grabbed by an instruction,
+ /// relatively to the cycle in which the instruction is issued
+ /// (assuming no stalls inbetween).
+ uint16_t StartAtCycle;
bool operator==(const MCWriteProcResEntry &Other) const {
- return ProcResourceIdx == Other.ProcResourceIdx && Cycles == Other.Cycles;
+ return ProcResourceIdx == Other.ProcResourceIdx && Cycles == Other.Cycles &&
+ StartAtCycle == Other.StartAtCycle;
}
};
@@ -301,6 +313,11 @@ struct MCSchedModel {
bool CompleteModel;
+ // Tells the MachineScheduler whether or not to track resource usage
+ // using intervals via ResourceSegments (see
+ // llvm/include/llvm/CodeGen/MachineScheduler.h).
+ bool EnableIntervals;
+
unsigned ProcID;
const MCProcResourceDesc *ProcResourceTable;
const MCSchedClassDesc *SchedClassTable;
diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h
index f55ebf89b69f..90bc48ec185c 100644
--- a/llvm/include/llvm/MC/MCSection.h
+++ b/llvm/include/llvm/MC/MCSection.h
@@ -74,7 +74,7 @@ private:
/// The section index in the assemblers section list.
unsigned Ordinal = 0;
/// The index of this section in the layout order.
- unsigned LayoutOrder;
+ unsigned LayoutOrder = 0;
/// Keeping track of bundle-locked state.
BundleLockStateType BundleLockState = NotBundleLocked;
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index e15fb485b54a..dcd84a6efda0 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -22,11 +22,11 @@
#include "llvm/MC/MCLinkerOptimizationHint.h"
#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/MC/MCWinEH.h"
-#include "llvm/Support/ARMTargetParser.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/VersionTuple.h"
+#include "llvm/TargetParser/ARMTargetParser.h"
#include <cassert>
#include <cstdint>
#include <memory>
@@ -157,7 +157,7 @@ public:
virtual void emitTextAttribute(unsigned Attribute, StringRef String);
virtual void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
StringRef StringValue = "");
- virtual void emitFPU(unsigned FPU);
+ virtual void emitFPU(ARM::FPUKind FPU);
virtual void emitArch(ARM::ArchKind Arch);
virtual void emitArchExtension(uint64_t ArchExt);
virtual void emitObjectArch(ARM::ArchKind Arch);
@@ -214,6 +214,10 @@ class MCStreamer {
std::unique_ptr<MCTargetStreamer> TargetStreamer;
std::vector<MCDwarfFrameInfo> DwarfFrameInfos;
+ // This is a pair of index into DwarfFrameInfos and the MCSection associated
+ // with the frame. Note, we use an index instead of an iterator because they
+ // can be invalidated in std::vector.
+ SmallVector<std::pair<size_t, MCSection *>, 1> FrameInfoStack;
MCDwarfFrameInfo *getCurrentDwarfFrameInfo();
/// Similar to DwarfFrameInfos, but for SEH unwind info. Chained frames may
@@ -632,7 +636,7 @@ public:
/// \param Symbol - The function containing the trap.
/// \param Lang - The language code for the exception entry.
/// \param Reason - The reason code for the exception entry.
- virtual void emitXCOFFExceptDirective(const MCSymbol *Symbol,
+ virtual void emitXCOFFExceptDirective(const MCSymbol *Symbol,
const MCSymbol *Trap,
unsigned Lang, unsigned Reason,
unsigned FunctionSize, bool hasDebug);
@@ -641,7 +645,13 @@ public:
/// relocation table for one or more symbols.
///
/// \param Sym - The symbol on the .ref directive.
- virtual void emitXCOFFRefDirective(StringRef Sym);
+ virtual void emitXCOFFRefDirective(const MCSymbol *Symbol);
+
+ /// Emit a C_INFO symbol with XCOFF embedded metadata to the .info section.
+ ///
+ /// \param Name - The embedded metadata name
+ /// \param Metadata - The embedded metadata
+ virtual void emitXCOFFCInfoSym(StringRef Name, StringRef Metadata);
/// Emit an ELF .size directive.
///
@@ -1021,28 +1031,29 @@ public:
virtual void emitCFISections(bool EH, bool Debug);
void emitCFIStartProc(bool IsSimple, SMLoc Loc = SMLoc());
void emitCFIEndProc();
- virtual void emitCFIDefCfa(int64_t Register, int64_t Offset);
- virtual void emitCFIDefCfaOffset(int64_t Offset);
- virtual void emitCFIDefCfaRegister(int64_t Register);
+ virtual void emitCFIDefCfa(int64_t Register, int64_t Offset, SMLoc Loc = {});
+ virtual void emitCFIDefCfaOffset(int64_t Offset, SMLoc Loc = {});
+ virtual void emitCFIDefCfaRegister(int64_t Register, SMLoc Loc = {});
virtual void emitCFILLVMDefAspaceCfa(int64_t Register, int64_t Offset,
- int64_t AddressSpace);
- virtual void emitCFIOffset(int64_t Register, int64_t Offset);
+ int64_t AddressSpace, SMLoc Loc = {});
+ virtual void emitCFIOffset(int64_t Register, int64_t Offset, SMLoc Loc = {});
virtual void emitCFIPersonality(const MCSymbol *Sym, unsigned Encoding);
virtual void emitCFILsda(const MCSymbol *Sym, unsigned Encoding);
- virtual void emitCFIRememberState();
- virtual void emitCFIRestoreState();
- virtual void emitCFISameValue(int64_t Register);
- virtual void emitCFIRestore(int64_t Register);
- virtual void emitCFIRelOffset(int64_t Register, int64_t Offset);
- virtual void emitCFIAdjustCfaOffset(int64_t Adjustment);
- virtual void emitCFIEscape(StringRef Values);
+ virtual void emitCFIRememberState(SMLoc Loc);
+ virtual void emitCFIRestoreState(SMLoc Loc);
+ virtual void emitCFISameValue(int64_t Register, SMLoc Loc = {});
+ virtual void emitCFIRestore(int64_t Register, SMLoc Loc = {});
+ virtual void emitCFIRelOffset(int64_t Register, int64_t Offset, SMLoc Loc);
+ virtual void emitCFIAdjustCfaOffset(int64_t Adjustment, SMLoc Loc = {});
+ virtual void emitCFIEscape(StringRef Values, SMLoc Loc = {});
virtual void emitCFIReturnColumn(int64_t Register);
- virtual void emitCFIGnuArgsSize(int64_t Size);
+ virtual void emitCFIGnuArgsSize(int64_t Size, SMLoc Loc = {});
virtual void emitCFISignalFrame();
- virtual void emitCFIUndefined(int64_t Register);
- virtual void emitCFIRegister(int64_t Register1, int64_t Register2);
- virtual void emitCFIWindowSave();
- virtual void emitCFINegateRAState();
+ virtual void emitCFIUndefined(int64_t Register, SMLoc Loc = {});
+ virtual void emitCFIRegister(int64_t Register1, int64_t Register2,
+ SMLoc Loc = {});
+ virtual void emitCFIWindowSave(SMLoc Loc = {});
+ virtual void emitCFINegateRAState(SMLoc Loc = {});
virtual void emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc = SMLoc());
virtual void emitWinCFIEndProc(SMLoc Loc = SMLoc());
@@ -1096,7 +1107,7 @@ public:
/// Emit the a pseudo probe into the current section.
virtual void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
- uint64_t Attr,
+ uint64_t Attr, uint64_t Discriminator,
const MCPseudoProbeInlineStack &InlineStack,
MCSymbol *FnSym);
diff --git a/llvm/include/llvm/MC/MCSubtargetInfo.h b/llvm/include/llvm/MC/MCSubtargetInfo.h
index 0b1f35960911..c1533ac8d005 100644
--- a/llvm/include/llvm/MC/MCSubtargetInfo.h
+++ b/llvm/include/llvm/MC/MCSubtargetInfo.h
@@ -16,10 +16,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCSchedule.h"
-#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <cstdint>
#include <optional>
@@ -230,6 +230,10 @@ public:
return Found != ProcDesc.end() && StringRef(Found->Key) == CPU;
}
+ ArrayRef<SubtargetSubTypeKV> getAllProcessorDescriptions() const {
+ return ProcDesc;
+ }
+
virtual unsigned getHwMode() const { return 0; }
/// Return the cache size in bytes for the given level of cache.
diff --git a/llvm/include/llvm/MC/MCSymbol.h b/llvm/include/llvm/MC/MCSymbol.h
index 8954960e3b8c..d0d51f32e967 100644
--- a/llvm/include/llvm/MC/MCSymbol.h
+++ b/llvm/include/llvm/MC/MCSymbol.h
@@ -76,11 +76,11 @@ protected:
///
/// If this is a fragment, then it gives the fragment this symbol's value is
/// relative to, if any.
- ///
- /// For the 'HasName' integer, this is true if this symbol is named.
- /// A named symbol will have a pointer to the name allocated in the bytes
- /// immediately prior to the MCSymbol.
- mutable PointerIntPair<MCFragment *, 1> FragmentAndHasName;
+ mutable MCFragment *Fragment = nullptr;
+
+ /// True if this symbol is named. A named symbol will have a pointer to the
+ /// name allocated in the bytes immediately prior to the MCSymbol.
+ unsigned HasName : 1;
/// IsTemporary - True if this is an assembler temporary label, which
/// typically does not survive in the .o file's symbol table. Usually
@@ -102,6 +102,9 @@ protected:
/// This symbol is private extern.
mutable unsigned IsPrivateExtern : 1;
+ /// This symbol is weak external.
+ mutable unsigned IsWeakExternal : 1;
+
/// LLVM RTTI discriminator. This is actually a SymbolKind enumerator, but is
/// unsigned to avoid sign extension and achieve better bitpacking with MSVC.
unsigned Kind : 3;
@@ -161,10 +164,10 @@ protected:
MCSymbol(SymbolKind Kind, const StringMapEntry<bool> *Name, bool isTemporary)
: IsTemporary(isTemporary), IsRedefinable(false), IsUsed(false),
IsRegistered(false), IsExternal(false), IsPrivateExtern(false),
- Kind(Kind), IsUsedInReloc(false), SymbolContents(SymContentsUnset),
- CommonAlignLog2(0), Flags(0) {
+ IsWeakExternal(false), Kind(Kind), IsUsedInReloc(false),
+ SymbolContents(SymContentsUnset), CommonAlignLog2(0), Flags(0) {
Offset = 0;
- FragmentAndHasName.setInt(!!Name);
+ HasName = !!Name;
if (Name)
getNameEntryPtr() = Name;
}
@@ -187,7 +190,7 @@ private:
/// Get a reference to the name field. Requires that we have a name
const StringMapEntry<bool> *&getNameEntryPtr() {
- assert(FragmentAndHasName.getInt() && "Name is required");
+ assert(HasName && "Name is required");
NameEntryStorageTy *Name = reinterpret_cast<NameEntryStorageTy *>(this);
return (*(Name - 1)).NameEntry;
}
@@ -201,7 +204,7 @@ public:
/// getName - Get the symbol name.
StringRef getName() const {
- if (!FragmentAndHasName.getInt())
+ if (!HasName)
return StringRef();
return getNameEntryPtr()->first();
@@ -272,11 +275,11 @@ public:
/// Mark the symbol as defined in the fragment \p F.
void setFragment(MCFragment *F) const {
assert(!isVariable() && "Cannot set fragment of variable");
- FragmentAndHasName.setPointer(F);
+ Fragment = F;
}
/// Mark the symbol as undefined.
- void setUndefined() { FragmentAndHasName.setPointer(nullptr); }
+ void setUndefined() { Fragment = nullptr; }
bool isELF() const { return Kind == SymbolKindELF; }
@@ -393,11 +396,11 @@ public:
}
MCFragment *getFragment(bool SetUsed = true) const {
- MCFragment *Fragment = FragmentAndHasName.getPointer();
- if (Fragment || !isVariable())
+ if (Fragment || !isVariable() || isWeakExternal())
return Fragment;
+ // If the symbol is a non-weak alias, get information about
+ // the aliasee. (Don't try to resolve weak aliases.)
Fragment = getVariableValue(SetUsed)->findAssociatedFragment();
- FragmentAndHasName.setPointer(Fragment);
return Fragment;
}
@@ -407,6 +410,8 @@ public:
bool isPrivateExtern() const { return IsPrivateExtern; }
void setPrivateExtern(bool Value) { IsPrivateExtern = Value; }
+ bool isWeakExternal() const { return IsWeakExternal; }
+
/// print - Print the value to the stream \p OS.
void print(raw_ostream &OS, const MCAsmInfo *MAI) const;
diff --git a/llvm/include/llvm/MC/MCSymbolCOFF.h b/llvm/include/llvm/MC/MCSymbolCOFF.h
index 94087ce871ae..7983fff7e6af 100644
--- a/llvm/include/llvm/MC/MCSymbolCOFF.h
+++ b/llvm/include/llvm/MC/MCSymbolCOFF.h
@@ -9,6 +9,7 @@
#ifndef LLVM_MC_MCSYMBOLCOFF_H
#define LLVM_MC_MCSYMBOLCOFF_H
+#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/MCSymbol.h"
#include <cstdint>
@@ -22,8 +23,9 @@ class MCSymbolCOFF : public MCSymbol {
SF_ClassMask = 0x00FF,
SF_ClassShift = 0,
- SF_WeakExternal = 0x0100,
- SF_SafeSEH = 0x0200,
+ SF_SafeSEH = 0x0100,
+ SF_WeakExternalCharacteristicsMask = 0x0E00,
+ SF_WeakExternalCharacteristicsShift = 9,
};
public:
@@ -44,11 +46,16 @@ public:
modifyFlags(StorageClass << SF_ClassShift, SF_ClassMask);
}
- bool isWeakExternal() const {
- return getFlags() & SF_WeakExternal;
+ COFF::WeakExternalCharacteristics getWeakExternalCharacteristics() const {
+ return static_cast<COFF::WeakExternalCharacteristics>((getFlags() & SF_WeakExternalCharacteristicsMask) >>
+ SF_WeakExternalCharacteristicsShift);
}
- void setIsWeakExternal() const {
- modifyFlags(SF_WeakExternal, SF_WeakExternal);
+ void setWeakExternalCharacteristics(COFF::WeakExternalCharacteristics Characteristics) const {
+ modifyFlags(Characteristics << SF_WeakExternalCharacteristicsShift,
+ SF_WeakExternalCharacteristicsMask);
+ }
+ void setIsWeakExternal(bool WeakExt) const {
+ IsWeakExternal = WeakExt;
}
bool isSafeSEH() const {
diff --git a/llvm/include/llvm/MC/MCTargetOptions.h b/llvm/include/llvm/MC/MCTargetOptions.h
index 74fc60823a1b..9fc1e07d085e 100644
--- a/llvm/include/llvm/MC/MCTargetOptions.h
+++ b/llvm/include/llvm/MC/MCTargetOptions.h
@@ -85,6 +85,10 @@ public:
/// integrated assembler.
std::vector<std::string> IASSearchPaths;
+ // Whether to emit compact-unwind for non-canonical personality
+ // functions on Darwins.
+ bool EmitCompactUnwindNonCanonical : 1;
+
MCTargetOptions();
/// getABIName - If this returns a non-empty string this represents the
diff --git a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
index e7211740ec48..7f6ee6c8be22 100644
--- a/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
+++ b/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
@@ -35,6 +35,8 @@ bool getDwarf64();
EmitDwarfUnwindType getEmitDwarfUnwind();
+bool getEmitCompactUnwindNonCanonical();
+
bool getShowMCInst();
bool getFatalWarnings();
diff --git a/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h b/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
index 3015efe7389e..307800e73c68 100644
--- a/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -20,37 +20,41 @@ class MCFixup;
class MCValue;
class raw_pwrite_stream;
- class MCWinCOFFObjectTargetWriter : public MCObjectTargetWriter {
- virtual void anchor();
-
- const unsigned Machine;
-
- protected:
- MCWinCOFFObjectTargetWriter(unsigned Machine_);
-
- public:
- virtual ~MCWinCOFFObjectTargetWriter() = default;
-
- Triple::ObjectFormatType getFormat() const override { return Triple::COFF; }
- static bool classof(const MCObjectTargetWriter *W) {
- return W->getFormat() == Triple::COFF;
- }
-
- unsigned getMachine() const { return Machine; }
- virtual unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
- const MCFixup &Fixup, bool IsCrossSection,
- const MCAsmBackend &MAB) const = 0;
- virtual bool recordRelocation(const MCFixup &) const { return true; }
- };
-
- /// Construct a new Win COFF writer instance.
- ///
- /// \param MOTW - The target specific WinCOFF writer subclass.
- /// \param OS - The stream to write to.
- /// \returns The constructed object writer.
- std::unique_ptr<MCObjectWriter>
- createWinCOFFObjectWriter(std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW,
- raw_pwrite_stream &OS);
+class MCWinCOFFObjectTargetWriter : public MCObjectTargetWriter {
+ virtual void anchor();
+
+ const unsigned Machine;
+
+protected:
+ MCWinCOFFObjectTargetWriter(unsigned Machine_);
+
+public:
+ virtual ~MCWinCOFFObjectTargetWriter() = default;
+
+ Triple::ObjectFormatType getFormat() const override { return Triple::COFF; }
+ static bool classof(const MCObjectTargetWriter *W) {
+ return W->getFormat() == Triple::COFF;
+ }
+
+ unsigned getMachine() const { return Machine; }
+ virtual unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+ const MCFixup &Fixup, bool IsCrossSection,
+ const MCAsmBackend &MAB) const = 0;
+ virtual bool recordRelocation(const MCFixup &) const { return true; }
+};
+
+/// Construct a new Win COFF writer instance.
+///
+/// \param MOTW - The target specific WinCOFF writer subclass.
+/// \param OS - The stream to write to.
+/// \returns The constructed object writer.
+std::unique_ptr<MCObjectWriter>
+createWinCOFFObjectWriter(std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS);
+
+std::unique_ptr<MCObjectWriter>
+createWinCOFFDwoObjectWriter(std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS);
} // end namespace llvm
#endif // LLVM_MC_MCWINCOFFOBJECTWRITER_H
diff --git a/llvm/include/llvm/MC/MCWinEH.h b/llvm/include/llvm/MC/MCWinEH.h
index 2c516f78efef..fcce2dcd5483 100644
--- a/llvm/include/llvm/MC/MCWinEH.h
+++ b/llvm/include/llvm/MC/MCWinEH.h
@@ -68,7 +68,7 @@ struct FrameInfo {
int64_t Offset;
int64_t Length;
bool HasProlog;
- MCSymbol *Symbol;
+ MCSymbol *Symbol = nullptr;
// Map an Epilog's symbol to its offset within the function.
MapVector<MCSymbol *, int64_t> Epilogs;
diff --git a/llvm/include/llvm/MC/MCXCOFFStreamer.h b/llvm/include/llvm/MC/MCXCOFFStreamer.h
index a437faeccbff..041bbbfa474b 100644
--- a/llvm/include/llvm/MC/MCXCOFFStreamer.h
+++ b/llvm/include/llvm/MC/MCXCOFFStreamer.h
@@ -31,10 +31,7 @@ public:
void emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol,
MCSymbolAttr Linkage,
MCSymbolAttr Visibility) override;
- void emitXCOFFRefDirective(StringRef Name) override {
- report_fatal_error("emitXCOFFRefDirective is not implemented yet on object"
- "generation path");
- }
+ void emitXCOFFRefDirective(const MCSymbol *Symbol) override;
void emitXCOFFRenameDirective(const MCSymbol *Name,
StringRef Rename) override {
report_fatal_error("emitXCOFFRenameDirective is not implemented yet on "
@@ -43,6 +40,7 @@ public:
void emitXCOFFExceptDirective(const MCSymbol *Symbol, const MCSymbol *Trap,
unsigned Lang, unsigned Reason,
unsigned FunctionSize, bool hasDebug) override;
+ void emitXCOFFCInfoSym(StringRef Name, StringRef Metadata) override;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/MC/TargetRegistry.h b/llvm/include/llvm/MC/TargetRegistry.h
index 41362feab5b5..3fa150fc1349 100644
--- a/llvm/include/llvm/MC/TargetRegistry.h
+++ b/llvm/include/llvm/MC/TargetRegistry.h
@@ -20,12 +20,12 @@
#include "llvm-c/DisassemblerTypes.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <cstddef>
#include <iterator>
@@ -564,7 +564,8 @@ public:
case Triple::UnknownObjectFormat:
llvm_unreachable("Unknown object format");
case Triple::COFF:
- assert(T.isOSWindows() && "only Windows COFF is supported");
+ assert((T.isOSWindows() || T.isUEFI()) &&
+ "only Windows and UEFI COFF are supported");
S = COFFStreamerCtorFn(Ctx, std::move(TAB), std::move(OW),
std::move(Emitter), RelaxAll,
IncrementalLinkerCompatible);
@@ -791,8 +792,7 @@ struct TargetRegistry {
/// \param Triple - The triple to use for finding a target.
/// \param Error - On failure, an error string describing why no target was
/// found.
- static const Target *lookupTarget(const std::string &Triple,
- std::string &Error);
+ static const Target *lookupTarget(StringRef Triple, std::string &Error);
/// lookupTarget - Lookup a target based on an architecture name
/// and a target triple. If the architecture name is non-empty,
@@ -805,8 +805,8 @@ struct TargetRegistry {
/// by architecture is done.
/// \param Error - On failure, an error string describing why no target was
/// found.
- static const Target *lookupTarget(const std::string &ArchName,
- Triple &TheTriple, std::string &Error);
+ static const Target *lookupTarget(StringRef ArchName, Triple &TheTriple,
+ std::string &Error);
/// @}
/// @name Target Registration
diff --git a/llvm/include/llvm/MCA/CodeEmitter.h b/llvm/include/llvm/MCA/CodeEmitter.h
index 502d8127fa3c..431ceea27064 100644
--- a/llvm/include/llvm/MCA/CodeEmitter.h
+++ b/llvm/include/llvm/MCA/CodeEmitter.h
@@ -23,7 +23,6 @@
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/raw_ostream.h"
namespace llvm {
namespace mca {
@@ -38,7 +37,6 @@ class CodeEmitter {
const MCCodeEmitter &MCE;
SmallString<256> Code;
- raw_svector_ostream VecOS;
ArrayRef<MCInst> Sequence;
// An EncodingInfo pair stores <base, length> information. Base (i.e. first)
@@ -53,8 +51,7 @@ class CodeEmitter {
public:
CodeEmitter(const MCSubtargetInfo &ST, const MCAsmBackend &AB,
const MCCodeEmitter &CE, ArrayRef<MCInst> S)
- : STI(ST), MAB(AB), MCE(CE), VecOS(Code), Sequence(S),
- Encodings(S.size()) {}
+ : STI(ST), MAB(AB), MCE(CE), Sequence(S), Encodings(S.size()) {}
StringRef getEncoding(unsigned MCID) {
EncodingInfo EI = getOrCreateEncodingInfo(MCID);
diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h
index e2a7ad1b2870..b3774894517a 100644
--- a/llvm/include/llvm/MCA/CustomBehaviour.h
+++ b/llvm/include/llvm/MCA/CustomBehaviour.h
@@ -133,7 +133,7 @@ public:
StringRef getData() const { return Data; }
};
-using SharedInstrument = std::shared_ptr<Instrument>;
+using UniqueInstrument = std::unique_ptr<Instrument>;
/// This class allows targets to optionally customize the logic that resolves
/// scheduling class IDs. Targets can use information encoded in Instrument
@@ -156,8 +156,15 @@ public:
// Instrument.Desc equal to Type
virtual bool supportsInstrumentType(StringRef Type) const { return false; }
- /// Allocate an Instrument, and return a shared pointer to it.
- virtual SharedInstrument createInstrument(StringRef Desc, StringRef Data);
+ /// Allocate an Instrument, and return a unique pointer to it. This function
+ /// may be useful to create instruments coming from comments in the assembly.
+ /// See createInstruments to create Instruments from MCInst
+ virtual UniqueInstrument createInstrument(StringRef Desc, StringRef Data);
+
+ /// Return a list of unique pointers to Instruments, where each Instrument
+ /// is allocated by this function. See createInstrument to create Instrument
+ /// from a description and data.
+ virtual SmallVector<UniqueInstrument> createInstruments(const MCInst &Inst);
/// Given an MCInst and a vector of Instrument, a target can
/// return a SchedClassID. This can be used by a subtarget to return a
@@ -165,9 +172,8 @@ public:
/// BaseInstruction This can be useful when a BaseInstruction does not convey
/// the correct scheduling information without additional data. By default,
/// it returns the SchedClassID that belongs to MCI.
- virtual unsigned
- getSchedClassID(const MCInstrInfo &MCII, const MCInst &MCI,
- const SmallVector<SharedInstrument> &IVec) const;
+ virtual unsigned getSchedClassID(const MCInstrInfo &MCII, const MCInst &MCI,
+ const SmallVector<Instrument *> &IVec) const;
};
} // namespace mca
diff --git a/llvm/include/llvm/MCA/HWEventListener.h b/llvm/include/llvm/MCA/HWEventListener.h
index 8298e0705d33..a24b90654e2f 100644
--- a/llvm/include/llvm/MCA/HWEventListener.h
+++ b/llvm/include/llvm/MCA/HWEventListener.h
@@ -29,7 +29,7 @@ public:
// This is the list of event types that are shared by all targets, that
// generic subtarget-agnostic classes (e.g., Pipeline, HWInstructionEvent,
// ...) and generic Views can manipulate.
- // Subtargets are free to define additional event types, that are goin to be
+ // Subtargets are free to define additional event types, that are going to be
// handled by generic components as opaque values, but can still be
// emitted by subtarget-specific pipeline stages (e.g., ExecuteStage,
// DispatchStage, ...) and interpreted by subtarget-specific EventListener
@@ -59,7 +59,10 @@ public:
const InstRef &IR;
};
+// ResourceRef::first is the index of the associated Resource.
+// ResourceRef::second is a bitmask of the referenced sub-unit of the resource.
using ResourceRef = std::pair<uint64_t, uint64_t>;
+
using ResourceUse = std::pair<ResourceRef, ResourceCycles>;
class HWInstructionIssuedEvent : public HWInstructionEvent {
diff --git a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
index c05f770df8eb..81a5453bac26 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/LSUnit.h
@@ -33,13 +33,13 @@ namespace mca {
/// Instruction::LSUTokenID of each dispatched instructions. That token is used
/// internally by the LSUnit to track memory dependencies.
class MemoryGroup {
- unsigned NumPredecessors;
- unsigned NumExecutingPredecessors;
- unsigned NumExecutedPredecessors;
+ unsigned NumPredecessors = 0;
+ unsigned NumExecutingPredecessors = 0;
+ unsigned NumExecutedPredecessors = 0;
- unsigned NumInstructions;
- unsigned NumExecuting;
- unsigned NumExecuted;
+ unsigned NumInstructions = 0;
+ unsigned NumExecuting = 0;
+ unsigned NumExecuted = 0;
// Successors that are in a order dependency with this group.
SmallVector<MemoryGroup *, 4> OrderSucc;
// Successors that are in a data dependency with this group.
@@ -52,10 +52,7 @@ class MemoryGroup {
MemoryGroup &operator=(const MemoryGroup &) = delete;
public:
- MemoryGroup()
- : NumPredecessors(0), NumExecutingPredecessors(0),
- NumExecutedPredecessors(0), NumInstructions(0), NumExecuting(0),
- NumExecuted(0), CriticalPredecessor() {}
+ MemoryGroup() = default;
MemoryGroup(MemoryGroup &&) = default;
size_t getNumSuccessors() const {
@@ -269,7 +266,7 @@ public:
bool isLQFull() const { return LQSize && LQSize == UsedLQEntries; }
bool isValidGroupID(unsigned Index) const {
- return Index && (Groups.find(Index) != Groups.end());
+ return Index && Groups.contains(Index);
}
/// Check if a peviously dispatched instruction IR is now ready for execution.
diff --git a/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h b/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
index 1b811978dd76..5bd74ad5fe78 100644
--- a/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
+++ b/llvm/include/llvm/MCA/HardwareUnits/RegisterFile.h
@@ -237,10 +237,10 @@ public:
SmallVectorImpl<WriteRef> &Writes,
SmallVectorImpl<WriteRef> &CommittedWrites) const;
struct RAWHazard {
- MCPhysReg RegisterID;
- int CyclesLeft;
+ MCPhysReg RegisterID = 0;
+ int CyclesLeft = 0;
- RAWHazard() : RegisterID(), CyclesLeft() {}
+ RAWHazard() = default;
bool isValid() const { return RegisterID; }
bool hasUnknownCycles() const { return CyclesLeft < 0; }
};
diff --git a/llvm/include/llvm/MCA/IncrementalSourceMgr.h b/llvm/include/llvm/MCA/IncrementalSourceMgr.h
index d91cc5f23311..d53f1138b940 100644
--- a/llvm/include/llvm/MCA/IncrementalSourceMgr.h
+++ b/llvm/include/llvm/MCA/IncrementalSourceMgr.h
@@ -35,17 +35,17 @@ class IncrementalSourceMgr : public SourceMgr {
std::deque<Instruction *> Staging;
/// Current instruction index.
- unsigned TotalCounter;
+ unsigned TotalCounter = 0U;
/// End-of-stream flag.
- bool EOS;
+ bool EOS = false;
/// Called when an instruction is no longer needed.
using InstFreedCallback = llvm::function_ref<void(Instruction *)>;
InstFreedCallback InstFreedCB;
public:
- IncrementalSourceMgr() : TotalCounter(0U), EOS(false) {}
+ IncrementalSourceMgr() = default;
void clear();
diff --git a/llvm/include/llvm/MCA/InstrBuilder.h b/llvm/include/llvm/MCA/InstrBuilder.h
index cca71bbdff99..c8619af04b33 100644
--- a/llvm/include/llvm/MCA/InstrBuilder.h
+++ b/llvm/include/llvm/MCA/InstrBuilder.h
@@ -84,11 +84,10 @@ class InstrBuilder {
InstRecycleCallback InstRecycleCB;
Expected<const InstrDesc &>
- createInstrDescImpl(const MCInst &MCI,
- const SmallVector<SharedInstrument> &IVec);
+ createInstrDescImpl(const MCInst &MCI, const SmallVector<Instrument *> &IVec);
Expected<const InstrDesc &>
getOrCreateInstrDesc(const MCInst &MCI,
- const SmallVector<SharedInstrument> &IVec);
+ const SmallVector<Instrument *> &IVec);
InstrBuilder(const InstrBuilder &) = delete;
InstrBuilder &operator=(const InstrBuilder &) = delete;
@@ -114,8 +113,7 @@ public:
void setInstRecycleCallback(InstRecycleCallback CB) { InstRecycleCB = CB; }
Expected<std::unique_ptr<Instruction>>
- createInstruction(const MCInst &MCI,
- const SmallVector<SharedInstrument> &IVec);
+ createInstruction(const MCInst &MCI, const SmallVector<Instrument *> &IVec);
};
} // namespace mca
} // namespace llvm
diff --git a/llvm/include/llvm/MCA/Pipeline.h b/llvm/include/llvm/MCA/Pipeline.h
index 92c3836124ad..18032fdfe012 100644
--- a/llvm/include/llvm/MCA/Pipeline.h
+++ b/llvm/include/llvm/MCA/Pipeline.h
@@ -56,12 +56,12 @@ class Pipeline {
Started, // Pipeline has started running.
Paused // Pipeline is paused.
};
- State CurrentState;
+ State CurrentState = State::Created;
/// An ordered list of stages that define this instruction pipeline.
SmallVector<std::unique_ptr<Stage>, 8> Stages;
std::set<HWEventListener *> Listeners;
- unsigned Cycles;
+ unsigned Cycles = 0;
Error runCycle();
bool hasWorkToProcess();
@@ -69,7 +69,7 @@ class Pipeline {
void notifyCycleEnd();
public:
- Pipeline() : CurrentState(State::Created), Cycles(0) {}
+ Pipeline() = default;
void appendStage(std::unique_ptr<Stage> S);
/// Returns the total number of simulated cycles.
diff --git a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
index 40bc3b5aed94..f9286acef900 100644
--- a/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
+++ b/llvm/include/llvm/MCA/Stages/InOrderIssueStage.h
@@ -35,10 +35,10 @@ struct StallInfo {
};
InstRef IR;
- unsigned CyclesLeft;
- StallKind Kind;
+ unsigned CyclesLeft = 0;
+ StallKind Kind = StallKind::DEFAULT;
- StallInfo() : CyclesLeft(), Kind(StallKind::DEFAULT) {}
+ StallInfo() = default;
StallKind getStallKind() const { return Kind; }
unsigned getCyclesLeft() const { return CyclesLeft; }
diff --git a/llvm/include/llvm/MCA/Stages/Stage.h b/llvm/include/llvm/MCA/Stages/Stage.h
index 2477b9b3d69c..a1fc7a8af7ad 100644
--- a/llvm/include/llvm/MCA/Stages/Stage.h
+++ b/llvm/include/llvm/MCA/Stages/Stage.h
@@ -25,7 +25,7 @@ namespace mca {
class InstRef;
class Stage {
- Stage *NextInSequence;
+ Stage *NextInSequence = nullptr;
std::set<HWEventListener *> Listeners;
Stage(const Stage &Other) = delete;
@@ -35,7 +35,7 @@ protected:
const std::set<HWEventListener *> &getListeners() const { return Listeners; }
public:
- Stage() : NextInSequence(nullptr) {}
+ Stage() = default;
virtual ~Stage();
/// Returns true if it can execute IR during this cycle.
diff --git a/llvm/include/llvm/MCA/Support.h b/llvm/include/llvm/MCA/Support.h
index 1debf376a079..e5e627817105 100644
--- a/llvm/include/llvm/MCA/Support.h
+++ b/llvm/include/llvm/MCA/Support.h
@@ -99,7 +99,7 @@ void computeProcResourceMasks(const MCSchedModel &SM,
// the highest bit set can be used to construct a resource mask identifier.
inline unsigned getResourceStateIndex(uint64_t Mask) {
assert(Mask && "Processor Resource Mask cannot be zero!");
- return (std::numeric_limits<uint64_t>::digits - countLeadingZeros(Mask)) - 1;
+ return llvm::Log2_64(Mask);
}
/// Compute the reciprocal block throughput from a set of processor resource
diff --git a/llvm/include/llvm/Object/Archive.h b/llvm/include/llvm/Object/Archive.h
index fbacee964aa8..27f504779c4f 100644
--- a/llvm/include/llvm/Object/Archive.h
+++ b/llvm/include/llvm/Object/Archive.h
@@ -302,6 +302,7 @@ public:
StringRef getName() const;
Expected<Child> getMember() const;
Symbol getNext() const;
+ bool isECSymbol() const;
};
class symbol_iterator {
@@ -352,6 +353,8 @@ public:
return make_range(symbol_begin(), symbol_end());
}
+ Expected<iterator_range<symbol_iterator>> ec_symbols() const;
+
static bool classof(Binary const *v) { return v->isArchive(); }
// check if a symbol is in the archive
@@ -362,6 +365,7 @@ public:
StringRef getSymbolTable() const { return SymbolTable; }
StringRef getStringTable() const { return StringTable; }
uint32_t getNumberOfSymbols() const;
+ uint32_t getNumberOfECSymbols() const;
virtual uint64_t getFirstChildOffset() const { return getArchiveMagicLen(); }
std::vector<std::unique_ptr<MemoryBuffer>> takeThinBuffers() {
@@ -377,6 +381,7 @@ protected:
void setFirstRegular(const Child &C);
StringRef SymbolTable;
+ StringRef ECSymbolTable;
StringRef StringTable;
private:
@@ -405,14 +410,13 @@ public:
const FixLenHdr *ArFixLenHdr;
uint64_t FirstChildOffset = 0;
uint64_t LastChildOffset = 0;
+ std::string MergedGlobalSymtabBuf;
public:
BigArchive(MemoryBufferRef Source, Error &Err);
uint64_t getFirstChildOffset() const override { return FirstChildOffset; }
uint64_t getLastChildOffset() const { return LastChildOffset; }
- bool isEmpty() const override {
- return Data.getBufferSize() == sizeof(FixLenHdr);
- };
+ bool isEmpty() const override { return getFirstChildOffset() == 0; }
};
} // end namespace object
diff --git a/llvm/include/llvm/Object/ArchiveWriter.h b/llvm/include/llvm/Object/ArchiveWriter.h
index 6acab45215da..c89246f1d569 100644
--- a/llvm/include/llvm/Object/ArchiveWriter.h
+++ b/llvm/include/llvm/Object/ArchiveWriter.h
@@ -43,7 +43,8 @@ Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To);
Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
bool WriteSymtab, object::Archive::Kind Kind,
bool Deterministic, bool Thin,
- std::unique_ptr<MemoryBuffer> OldArchiveBuf = nullptr);
+ std::unique_ptr<MemoryBuffer> OldArchiveBuf = nullptr,
+ bool IsEC = false);
// writeArchiveToBuffer is similar to writeArchive but returns the Archive in a
// buffer instead of writing it out to a file.
diff --git a/llvm/include/llvm/Object/Binary.h b/llvm/include/llvm/Object/Binary.h
index 53b299ae8612..ce870e25acaf 100644
--- a/llvm/include/llvm/Object/Binary.h
+++ b/llvm/include/llvm/Object/Binary.h
@@ -14,11 +14,11 @@
#define LLVM_OBJECT_BINARY_H
#include "llvm-c/Types.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Object/Error.h"
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TargetParser/Triple.h"
#include <memory>
#include <utility>
@@ -69,6 +69,7 @@ protected:
ID_MachO64L, // MachO 64-bit, little endian
ID_MachO64B, // MachO 64-bit, big endian
+ ID_GOFF,
ID_Wasm,
ID_EndObjects
@@ -145,6 +146,8 @@ public:
return TypeID == ID_IR;
}
+ bool isGOFF() const { return TypeID == ID_GOFF; }
+
bool isMinidump() const { return TypeID == ID_Minidump; }
bool isTapiFile() const { return TypeID == ID_TapiFile; }
@@ -164,6 +167,8 @@ public:
return Triple::MachO;
if (isELF())
return Triple::ELF;
+ if (isGOFF())
+ return Triple::GOFF;
return Triple::UnknownObjectFormat;
}
diff --git a/llvm/include/llvm/Object/BuildID.h b/llvm/include/llvm/Object/BuildID.h
index 91c247be2cf6..b20f32b4d133 100644
--- a/llvm/include/llvm/Object/BuildID.h
+++ b/llvm/include/llvm/Object/BuildID.h
@@ -29,8 +29,11 @@ typedef ArrayRef<uint8_t> BuildIDRef;
class ObjectFile;
+/// Parses a build ID from a hex string.
+BuildID parseBuildID(StringRef Str);
+
/// Returns the build ID, if any, contained in the given object file.
-std::optional<BuildIDRef> getBuildID(const ObjectFile *Obj);
+BuildIDRef getBuildID(const ObjectFile *Obj);
/// BuildIDFetcher searches local cache directories for debug info.
class BuildIDFetcher {
diff --git a/llvm/include/llvm/Object/COFF.h b/llvm/include/llvm/Object/COFF.h
index 89e12f465d17..4bad7f10323c 100644
--- a/llvm/include/llvm/Object/COFF.h
+++ b/llvm/include/llvm/Object/COFF.h
@@ -15,7 +15,6 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/COFF.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/CVDebugRecord.h"
#include "llvm/Object/Error.h"
@@ -24,6 +23,7 @@
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <cassert>
#include <cstddef>
#include <cstdint>
@@ -722,6 +722,47 @@ struct coff_load_configuration64 {
support::ulittle64_t CastGuardOsDeterminedFailureMode;
};
+struct chpe_metadata {
+ support::ulittle32_t Version;
+ support::ulittle32_t CodeMap;
+ support::ulittle32_t CodeMapCount;
+ support::ulittle32_t CodeRangesToEntryPoints;
+ support::ulittle32_t RedirectionMetadata;
+ support::ulittle32_t __os_arm64x_dispatch_call_no_redirect;
+ support::ulittle32_t __os_arm64x_dispatch_ret;
+ support::ulittle32_t __os_arm64x_dispatch_call;
+ support::ulittle32_t __os_arm64x_dispatch_icall;
+ support::ulittle32_t __os_arm64x_dispatch_icall_cfg;
+ support::ulittle32_t AlternateEntryPoint;
+ support::ulittle32_t AuxiliaryIAT;
+ support::ulittle32_t CodeRangesToEntryPointsCount;
+ support::ulittle32_t RedirectionMetadataCount;
+ support::ulittle32_t GetX64InformationFunctionPointer;
+ support::ulittle32_t SetX64InformationFunctionPointer;
+ support::ulittle32_t ExtraRFETable;
+ support::ulittle32_t ExtraRFETableSize;
+ support::ulittle32_t __os_arm64x_dispatch_fptr;
+ support::ulittle32_t AuxiliaryIATCopy;
+};
+
+struct chpe_range_entry {
+ support::ulittle32_t StartOffset;
+ support::ulittle32_t Length;
+};
+
+enum chpe_range_type { CHPE_RANGE_ARM64, CHPE_RANGE_ARM64EC, CHPE_RANGE_AMD64 };
+
+struct chpe_code_range_entry {
+ support::ulittle32_t StartRva;
+ support::ulittle32_t EndRva;
+ support::ulittle32_t EntryPoint;
+};
+
+struct chpe_redirection_entry {
+ support::ulittle32_t Source;
+ support::ulittle32_t Destination;
+};
+
struct coff_runtime_function_x64 {
support::ulittle32_t BeginAddress;
support::ulittle32_t EndAddress;
@@ -813,6 +854,7 @@ private:
const coff_tls_directory64 *TLSDirectory64;
// Either coff_load_configuration32 or coff_load_configuration64.
const void *LoadConfig = nullptr;
+ const chpe_metadata *CHPEMetadata = nullptr;
Expected<StringRef> getString(uint32_t offset) const;
@@ -846,8 +888,17 @@ public:
}
uint16_t getMachine() const {
- if (COFFHeader)
+ if (COFFHeader) {
+ if (CHPEMetadata) {
+ switch (COFFHeader->Machine) {
+ case COFF::IMAGE_FILE_MACHINE_AMD64:
+ return COFF::IMAGE_FILE_MACHINE_ARM64EC;
+ case COFF::IMAGE_FILE_MACHINE_ARM64:
+ return COFF::IMAGE_FILE_MACHINE_ARM64X;
+ }
+ }
return COFFHeader->Machine;
+ }
if (COFFBigObjHeader)
return COFFBigObjHeader->Machine;
llvm_unreachable("no COFF header!");
@@ -927,6 +978,9 @@ public:
assert(is64());
return reinterpret_cast<const coff_load_configuration64 *>(LoadConfig);
}
+
+ const chpe_metadata *getCHPEMetadata() const { return CHPEMetadata; }
+
StringRef getRelocationTypeName(uint16_t Type) const;
protected:
@@ -969,6 +1023,8 @@ public:
section_iterator section_begin() const override;
section_iterator section_end() const override;
+ bool is64Bit() const override { return false; }
+
const coff_section *getCOFFSection(const SectionRef &Section) const;
COFFSymbolRef getCOFFSymbol(const DataRefImpl &Ref) const;
COFFSymbolRef getCOFFSymbol(const SymbolRef &Symbol) const;
@@ -1257,7 +1313,7 @@ private:
BinaryByteStream BBS;
SectionRef Section;
- const COFFObjectFile *Obj;
+ const COFFObjectFile *Obj = nullptr;
std::vector<const coff_relocation *> Relocs;
diff --git a/llvm/include/llvm/Object/COFFImportFile.h b/llvm/include/llvm/Object/COFFImportFile.h
index f8f0e0343b22..3d148112dcbb 100644
--- a/llvm/include/llvm/Object/COFFImportFile.h
+++ b/llvm/include/llvm/Object/COFFImportFile.h
@@ -56,6 +56,8 @@ public:
return BasicSymbolRef(Symb, this);
}
+ bool is64Bit() const override { return false; }
+
const coff_import_header *getCOFFImportHeader() const {
return reinterpret_cast<const object::coff_import_header *>(
Data.getBufferStart());
diff --git a/llvm/include/llvm/Object/COFFModuleDefinition.h b/llvm/include/llvm/Object/COFFModuleDefinition.h
index 8e14dd61472d..a4ed9978dcc0 100644
--- a/llvm/include/llvm/Object/COFFModuleDefinition.h
+++ b/llvm/include/llvm/Object/COFFModuleDefinition.h
@@ -39,12 +39,9 @@ struct COFFModuleDefinition {
uint32_t MinorOSVersion = 0;
};
-// mingw and wine def files do not mangle _ for x86 which
-// is a consequence of legacy binutils' dlltool functionality.
-// This MingwDef flag should be removed once mingw stops this pratice.
Expected<COFFModuleDefinition>
parseCOFFModuleDefinition(MemoryBufferRef MB, COFF::MachineTypes Machine,
- bool MingwDef = false);
+ bool MingwDef = false, bool AddUnderscores = true);
} // End namespace object.
} // End namespace llvm.
diff --git a/llvm/include/llvm/Object/DXContainer.h b/llvm/include/llvm/Object/DXContainer.h
index ffa2db4f64f0..ece2dfdd8b2d 100644
--- a/llvm/include/llvm/Object/DXContainer.h
+++ b/llvm/include/llvm/Object/DXContainer.h
@@ -20,9 +20,120 @@
#include "llvm/BinaryFormat/DXContainer.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/TargetParser/Triple.h"
+#include <variant>
namespace llvm {
namespace object {
+
+namespace DirectX {
+class PSVRuntimeInfo {
+
+ // This class provides a view into the underlying resource array. The Resource
+ // data is little-endian encoded and may not be properly aligned to read
+ // directly from. The dereference operator creates a copy of the data and byte
+ // swaps it as appropriate.
+ struct ResourceArray {
+ StringRef Data;
+ uint32_t Stride; // size of each element in the list.
+
+ ResourceArray() = default;
+ ResourceArray(StringRef D, size_t S) : Data(D), Stride(S) {}
+
+ using value_type = dxbc::PSV::v2::ResourceBindInfo;
+ static constexpr uint32_t MaxStride() {
+ return static_cast<uint32_t>(sizeof(value_type));
+ }
+
+ struct iterator {
+ StringRef Data;
+ uint32_t Stride; // size of each element in the list.
+ const char *Current;
+
+ iterator(const ResourceArray &A, const char *C)
+ : Data(A.Data), Stride(A.Stride), Current(C) {}
+ iterator(const iterator &) = default;
+
+ value_type operator*() {
+ // Explicitly zero the structure so that unused fields are zeroed. It is
+ // up to the user to know if the fields are used by verifying the PSV
+ // version.
+ value_type Val = {{0, 0, 0, 0}, 0, 0};
+ if (Current >= Data.end())
+ return Val;
+ memcpy(static_cast<void *>(&Val), Current,
+ std::min(Stride, MaxStride()));
+ if (sys::IsBigEndianHost)
+ Val.swapBytes();
+ return Val;
+ }
+
+ iterator operator++() {
+ if (Current < Data.end())
+ Current += Stride;
+ return *this;
+ }
+
+ iterator operator++(int) {
+ iterator Tmp = *this;
+ ++*this;
+ return Tmp;
+ }
+
+ iterator operator--() {
+ if (Current > Data.begin())
+ Current -= Stride;
+ return *this;
+ }
+
+ iterator operator--(int) {
+ iterator Tmp = *this;
+ --*this;
+ return Tmp;
+ }
+
+ bool operator==(const iterator I) { return I.Current == Current; }
+ bool operator!=(const iterator I) { return !(*this == I); }
+ };
+
+ iterator begin() const { return iterator(*this, Data.begin()); }
+
+ iterator end() const { return iterator(*this, Data.end()); }
+
+ size_t size() const { return Data.size() / Stride; }
+ };
+
+ StringRef Data;
+ uint32_t Size;
+ using InfoStruct =
+ std::variant<std::monostate, dxbc::PSV::v0::RuntimeInfo,
+ dxbc::PSV::v1::RuntimeInfo, dxbc::PSV::v2::RuntimeInfo>;
+ InfoStruct BasicInfo;
+ ResourceArray Resources;
+
+public:
+ PSVRuntimeInfo(StringRef D) : Data(D), Size(0) {}
+
+ // Parsing depends on the shader kind
+ Error parse(uint16_t ShaderKind);
+
+ uint32_t getSize() const { return Size; }
+ uint32_t getResourceCount() const { return Resources.size(); }
+ ResourceArray getResources() const { return Resources; }
+
+ uint32_t getVersion() const {
+ return Size >= sizeof(dxbc::PSV::v2::RuntimeInfo)
+ ? 2
+ : (Size >= sizeof(dxbc::PSV::v1::RuntimeInfo) ? 1 : 0);
+ }
+
+ uint32_t getResourceStride() const { return Resources.Stride; }
+
+ const InfoStruct &getInfo() const { return BasicInfo; }
+};
+
+} // namespace DirectX
+
class DXContainer {
public:
using DXILData = std::pair<dxbc::ProgramHeader, const char *>;
@@ -36,12 +147,14 @@ private:
std::optional<DXILData> DXIL;
std::optional<uint64_t> ShaderFlags;
std::optional<dxbc::ShaderHash> Hash;
+ std::optional<DirectX::PSVRuntimeInfo> PSVInfo;
Error parseHeader();
Error parsePartOffsets();
Error parseDXILHeader(StringRef Part);
Error parseShaderFlags(StringRef Part);
Error parseHash(StringRef Part);
+ Error parsePSVInfo(StringRef Part);
friend class PartIterator;
public:
@@ -118,11 +231,15 @@ public:
const dxbc::Header &getHeader() const { return Header; }
- std::optional<DXILData> getDXIL() const { return DXIL; }
+ const std::optional<DXILData> &getDXIL() const { return DXIL; }
std::optional<uint64_t> getShaderFlags() const { return ShaderFlags; }
std::optional<dxbc::ShaderHash> getShaderHash() const { return Hash; }
+
+ const std::optional<DirectX::PSVRuntimeInfo> &getPSVInfo() const {
+ return PSVInfo;
+ };
};
} // namespace object
diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index 1664ff96542f..a1cf47a1c4a6 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -14,6 +14,7 @@
#define LLVM_OBJECT_ELF_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
@@ -315,7 +316,16 @@ public:
") or size (0x" + Twine::utohexstr(Phdr.p_filesz) + ")");
return Elf_Note_Iterator(Err);
}
- return Elf_Note_Iterator(base() + Phdr.p_offset, Phdr.p_filesz, Err);
+ // Allow 4, 8, and (for Linux core dumps) 0.
+ // TODO: Disallow 1 after all tests are fixed.
+ if (Phdr.p_align != 0 && Phdr.p_align != 1 && Phdr.p_align != 4 &&
+ Phdr.p_align != 8) {
+ Err =
+ createError("alignment (" + Twine(Phdr.p_align) + ") is not 4 or 8");
+ return Elf_Note_Iterator(Err);
+ }
+ return Elf_Note_Iterator(base() + Phdr.p_offset, Phdr.p_filesz,
+ std::max<size_t>(Phdr.p_align, 4), Err);
}
/// Get an iterator over notes in a section.
@@ -334,7 +344,15 @@ public:
") or size (0x" + Twine::utohexstr(Shdr.sh_size) + ")");
return Elf_Note_Iterator(Err);
}
- return Elf_Note_Iterator(base() + Shdr.sh_offset, Shdr.sh_size, Err);
+ // TODO: Allow just 4 and 8 after all tests are fixed.
+ if (Shdr.sh_addralign != 0 && Shdr.sh_addralign != 1 &&
+ Shdr.sh_addralign != 4 && Shdr.sh_addralign != 8) {
+ Err = createError("alignment (" + Twine(Shdr.sh_addralign) +
+ ") is not 4 or 8");
+ return Elf_Note_Iterator(Err);
+ }
+ return Elf_Note_Iterator(base() + Shdr.sh_offset, Shdr.sh_size,
+ std::max<size_t>(Shdr.sh_addralign, 4), Err);
}
/// Get the end iterator for notes.
@@ -391,7 +409,21 @@ public:
Expected<ArrayRef<T>> getSectionContentsAsArray(const Elf_Shdr &Sec) const;
Expected<ArrayRef<uint8_t>> getSectionContents(const Elf_Shdr &Sec) const;
Expected<ArrayRef<uint8_t>> getSegmentContents(const Elf_Phdr &Phdr) const;
- Expected<std::vector<BBAddrMap>> decodeBBAddrMap(const Elf_Shdr &Sec) const;
+
+ /// Returns a vector of BBAddrMap structs corresponding to each function
+ /// within the text section that the SHT_LLVM_BB_ADDR_MAP section \p Sec
+ /// is associated with. If the current ELFFile is relocatable, a corresponding
+ /// \p RelaSec must be passed in as an argument.
+ Expected<std::vector<BBAddrMap>>
+ decodeBBAddrMap(const Elf_Shdr &Sec, const Elf_Shdr *RelaSec = nullptr) const;
+
+ /// Returns a map from every section matching \p IsMatch to its relocation
+ /// section, or \p nullptr if it has no relocation section. This function
+ /// returns an error if any of the \p IsMatch calls fail or if it fails to
+ /// retrieve the content section of any relocation section.
+ Expected<MapVector<const Elf_Shdr *, const Elf_Shdr *>>
+ getSectionAndRelocations(
+ std::function<Expected<bool>(const Elf_Shdr &)> IsMatch) const;
void createFakeSections();
};
@@ -1221,16 +1253,13 @@ Expected<StringRef> ELFFile<ELFT>::getSectionName(const Elf_Shdr &Section,
/// This function returns the hash value for a symbol in the .dynsym section
/// Name of the API remains consistent as specified in the libelf
/// REF : http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#hash
-inline unsigned hashSysV(StringRef SymbolName) {
- unsigned h = 0, g;
- for (char C : SymbolName) {
- h = (h << 4) + C;
- g = h & 0xf0000000L;
- if (g != 0)
- h ^= g >> 24;
- h &= ~g;
+inline uint32_t hashSysV(StringRef SymbolName) {
+ uint32_t H = 0;
+ for (uint8_t C : SymbolName) {
+ H = (H << 4) + C;
+ H ^= (H >> 24) & 0xf0;
}
- return h;
+ return H & 0x0fffffff;
}
/// This function returns the hash value for a symbol in the .dynsym section
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index 8baf6f4c5af3..f3016cc141b0 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -16,10 +16,8 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFTypes.h"
@@ -34,6 +32,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <cstdint>
@@ -48,6 +48,12 @@ extern const llvm::EnumEntry<unsigned> ElfSymbolTypes[NumElfSymbolTypes];
class elf_symbol_iterator;
+struct ELFPltEntry {
+ StringRef Section;
+ std::optional<DataRefImpl> Symbol;
+ uint64_t Address;
+};
+
class ELFObjectFileBase : public ObjectFile {
friend class ELFRelocationRef;
friend class ELFSectionRef;
@@ -97,8 +103,7 @@ public:
virtual uint16_t getEMachine() const = 0;
- std::vector<std::pair<std::optional<DataRefImpl>, uint64_t>>
- getPltAddresses() const;
+ std::vector<ELFPltEntry> getPltEntries() const;
/// Returns a vector containing a symbol version for each dynamic symbol.
/// Returns an empty vector if version sections do not exist.
@@ -402,7 +407,7 @@ protected:
// This flag is used for classof, to distinguish ELFObjectFile from
// its subclass. If more subclasses will be created, this flag will
// have to become an enum.
- bool isDyldELFObject;
+ bool isDyldELFObject = false;
public:
ELFObjectFile(ELFObjectFile<ELFT> &&Other);
@@ -432,6 +437,8 @@ public:
basic_symbol_iterator symbol_begin() const override;
basic_symbol_iterator symbol_end() const override;
+ bool is64Bit() const override { return getBytesInAddress() == 8; }
+
elf_symbol_iterator dynamic_symbol_begin() const;
elf_symbol_iterator dynamic_symbol_end() const;
@@ -523,10 +530,10 @@ Expected<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const {
// If the symbol name is empty use the section name.
if ((*SymOrErr)->getType() == ELF::STT_SECTION) {
- if (Expected<section_iterator> SecOrErr = getSymbolSection(Sym)) {
- consumeError(Name.takeError());
+ Expected<section_iterator> SecOrErr = getSymbolSection(Sym);
+ if (SecOrErr)
return (*SecOrErr)->getName();
- }
+ return SecOrErr.takeError();
}
return Name;
}
diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h
index 45e57869bace..215313ee6f9b 100644
--- a/llvm/include/llvm/Object/ELFTypes.h
+++ b/llvm/include/llvm/Object/ELFTypes.h
@@ -599,15 +599,13 @@ struct Elf_Nhdr_Impl {
Elf_Word n_descsz;
Elf_Word n_type;
- /// The alignment of the name and descriptor.
- ///
- /// Implementations differ from the specification here: in practice all
- /// variants align both the name and descriptor to 4-bytes.
- static const unsigned int Align = 4;
-
- /// Get the size of the note, including name, descriptor, and padding.
- size_t getSize() const {
- return sizeof(*this) + alignTo<Align>(n_namesz) + alignTo<Align>(n_descsz);
+ /// Get the size of the note, including name, descriptor, and padding. Both
+ /// the start and the end of the descriptor are aligned by the section
+ /// alignment. In practice many 64-bit systems deviate from the generic ABI by
+ /// using sh_addralign=4.
+ size_t getSize(size_t Align) const {
+ return alignToPowerOf2(sizeof(*this) + n_namesz, Align) +
+ alignToPowerOf2(n_descsz, Align);
}
};
@@ -635,18 +633,18 @@ public:
}
/// Get the note's descriptor.
- ArrayRef<uint8_t> getDesc() const {
+ ArrayRef<uint8_t> getDesc(size_t Align) const {
if (!Nhdr.n_descsz)
return ArrayRef<uint8_t>();
return ArrayRef<uint8_t>(
- reinterpret_cast<const uint8_t *>(&Nhdr) + sizeof(Nhdr) +
- alignTo<Elf_Nhdr_Impl<ELFT>::Align>(Nhdr.n_namesz),
+ reinterpret_cast<const uint8_t *>(&Nhdr) +
+ alignToPowerOf2(sizeof(Nhdr) + Nhdr.n_namesz, Align),
Nhdr.n_descsz);
}
/// Get the note's descriptor as StringRef
- StringRef getDescAsStringRef() const {
- ArrayRef<uint8_t> Desc = getDesc();
+ StringRef getDescAsStringRef(size_t Align) const {
+ ArrayRef<uint8_t> Desc = getDesc(Align);
return StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
}
@@ -666,6 +664,7 @@ private:
// Nhdr being a nullptr marks the end of iteration.
const Elf_Nhdr_Impl<ELFT> *Nhdr = nullptr;
size_t RemainingSize = 0u;
+ size_t Align = 0;
Error *Err = nullptr;
template <class ELFFileELFT> friend class ELFFile;
@@ -693,7 +692,7 @@ private:
stopWithOverflowError();
else {
Nhdr = reinterpret_cast<const Elf_Nhdr_Impl<ELFT> *>(NhdrPos + NoteSize);
- if (Nhdr->getSize() > RemainingSize)
+ if (Nhdr->getSize(Align) > RemainingSize)
stopWithOverflowError();
else
*Err = Error::success();
@@ -702,8 +701,9 @@ private:
Elf_Note_Iterator_Impl() = default;
explicit Elf_Note_Iterator_Impl(Error &Err) : Err(&Err) {}
- Elf_Note_Iterator_Impl(const uint8_t *Start, size_t Size, Error &Err)
- : RemainingSize(Size), Err(&Err) {
+ Elf_Note_Iterator_Impl(const uint8_t *Start, size_t Size, size_t Align,
+ Error &Err)
+ : RemainingSize(Size), Align(Align), Err(&Err) {
consumeError(std::move(Err));
assert(Start && "ELF note iterator starting at NULL");
advanceNhdr(Start, 0u);
@@ -713,7 +713,7 @@ public:
Elf_Note_Iterator_Impl &operator++() {
assert(Nhdr && "incremented ELF note end iterator");
const uint8_t *NhdrPos = reinterpret_cast<const uint8_t *>(Nhdr);
- size_t NoteSize = Nhdr->getSize();
+ size_t NoteSize = Nhdr->getSize(Align);
advanceNhdr(NhdrPos, NoteSize);
return *this;
}
@@ -799,27 +799,63 @@ struct BBAddrMap {
uint64_t Addr; // Function address
// Struct representing the BBAddrMap information for one basic block.
struct BBEntry {
+ struct Metadata {
+ bool HasReturn : 1; // If this block ends with a return (or tail
+ // call).
+ bool HasTailCall : 1; // If this block ends with a tail call.
+ bool IsEHPad : 1; // If this is an exception handling block.
+ bool CanFallThrough : 1; // If this block can fall through to its next.
+ bool HasIndirectBranch : 1; // If this block ends with an indirect branch
+ // (branch via a register).
+
+ bool operator==(const Metadata &Other) const {
+ return HasReturn == Other.HasReturn &&
+ HasTailCall == Other.HasTailCall && IsEHPad == Other.IsEHPad &&
+ CanFallThrough == Other.CanFallThrough &&
+ HasIndirectBranch == Other.HasIndirectBranch;
+ }
+
+ // Encodes this struct as a uint32_t value.
+ uint32_t encode() const {
+ return static_cast<uint32_t>(HasReturn) |
+ (static_cast<uint32_t>(HasTailCall) << 1) |
+ (static_cast<uint32_t>(IsEHPad) << 2) |
+ (static_cast<uint32_t>(CanFallThrough) << 3) |
+ (static_cast<uint32_t>(HasIndirectBranch) << 4);
+ }
+
+ // Decodes and returns a Metadata struct from a uint32_t value.
+ static Expected<Metadata> decode(uint32_t V) {
+ Metadata MD{/*HasReturn=*/static_cast<bool>(V & 1),
+ /*HasTailCall=*/static_cast<bool>(V & (1 << 1)),
+ /*IsEHPad=*/static_cast<bool>(V & (1 << 2)),
+ /*CanFallThrough=*/static_cast<bool>(V & (1 << 3)),
+ /*HasIndirectBranch=*/static_cast<bool>(V & (1 << 4))};
+ if (MD.encode() != V)
+ return createStringError(
+ std::error_code(), "invalid encoding for BBEntry::Metadata: 0x%x",
+ V);
+ return MD;
+ }
+ };
+
uint32_t ID; // Unique ID of this basic block.
uint32_t Offset; // Offset of basic block relative to function start.
uint32_t Size; // Size of the basic block.
+ Metadata MD; // Metdata for this basic block.
- // The following fields are decoded from the Metadata field. The encoding
- // happens in AsmPrinter.cpp:getBBAddrMapMetadata.
- bool HasReturn; // If this block ends with a return (or tail call).
- bool HasTailCall; // If this block ends with a tail call.
- bool IsEHPad; // If this is an exception handling block.
- bool CanFallThrough; // If this block can fall through to its next.
-
- BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, uint32_t Metadata)
- : ID(ID), Offset(Offset), Size(Size), HasReturn(Metadata & 1),
- HasTailCall(Metadata & (1 << 1)), IsEHPad(Metadata & (1 << 2)),
- CanFallThrough(Metadata & (1 << 3)){};
+ BBEntry(uint32_t ID, uint32_t Offset, uint32_t Size, Metadata MD)
+ : ID(ID), Offset(Offset), Size(Size), MD(MD){};
bool operator==(const BBEntry &Other) const {
return ID == Other.ID && Offset == Other.Offset && Size == Other.Size &&
- HasReturn == Other.HasReturn && HasTailCall == Other.HasTailCall &&
- IsEHPad == Other.IsEHPad && CanFallThrough == Other.CanFallThrough;
+ MD == Other.MD;
}
+
+ bool hasReturn() const { return MD.HasReturn; }
+ bool hasTailCall() const { return MD.HasTailCall; }
+ bool isEHPad() const { return MD.IsEHPad; }
+ bool canFallThrough() const { return MD.CanFallThrough; }
};
std::vector<BBEntry> BBEntries; // Basic block entries for this function.
diff --git a/llvm/include/llvm/Object/GOFF.h b/llvm/include/llvm/Object/GOFF.h
new file mode 100644
index 000000000000..f4aa04cd99fc
--- /dev/null
+++ b/llvm/include/llvm/Object/GOFF.h
@@ -0,0 +1,284 @@
+//===- GOFF.h - GOFF object file implementation -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the GOFFObjectFile class.
+// Record classes and derivatives are also declared and implemented.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_GOFF_H
+#define LLVM_OBJECT_GOFF_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/BinaryFormat/GOFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace object {
+
+/// \brief Represents a GOFF physical record.
+///
+/// Specifies protected member functions to manipulate the record. These should
+/// be called from deriving classes to change values as that record specifies.
+class Record {
+public:
+ static Error getContinuousData(const uint8_t *Record, uint16_t DataLength,
+ int DataIndex, SmallString<256> &CompleteData);
+
+ static bool isContinued(const uint8_t *Record) {
+ uint8_t IsContinued;
+ getBits(Record, 1, 7, 1, IsContinued);
+ return IsContinued;
+ }
+
+ static bool isContinuation(const uint8_t *Record) {
+ uint8_t IsContinuation;
+ getBits(Record, 1, 6, 1, IsContinuation);
+ return IsContinuation;
+ }
+
+protected:
+ /// \brief Get bit field of specified byte.
+ ///
+ /// Used to pack bit fields into one byte. Fields are packed left to right.
+ /// Bit index zero is the most significant bit of the byte.
+ ///
+ /// \param ByteIndex index of byte the field is in.
+ /// \param BitIndex index of first bit of field.
+ /// \param Length length of bit field.
+ /// \param Value value of bit field.
+ static void getBits(const uint8_t *Bytes, uint8_t ByteIndex, uint8_t BitIndex,
+ uint8_t Length, uint8_t &Value) {
+ assert(ByteIndex < GOFF::RecordLength && "Byte index out of bounds!");
+ assert(BitIndex < 8 && "Bit index out of bounds!");
+ assert(Length + BitIndex <= 8 && "Bit length too long!");
+
+ get<uint8_t>(Bytes, ByteIndex, Value);
+ Value = (Value >> (8 - BitIndex - Length)) & ((1 << Length) - 1);
+ }
+
+ template <class T>
+ static void get(const uint8_t *Bytes, uint8_t ByteIndex, T &Value) {
+ assert(ByteIndex + sizeof(T) <= GOFF::RecordLength &&
+ "Byte index out of bounds!");
+ Value = support::endian::read<T, support::big, support::unaligned>(
+ &Bytes[ByteIndex]);
+ }
+};
+
+class HDRRecord : public Record {
+public:
+ static Error getData(const uint8_t *Record, SmallString<256> &CompleteData);
+
+ static uint16_t getPropertyModuleLength(const uint8_t *Record) {
+ uint16_t Length;
+ get<uint16_t>(Record, 52, Length);
+ return Length;
+ }
+};
+
+class ESDRecord : public Record {
+public:
+ /// \brief Number of bytes for name; any more must go in continuation.
+ /// This is the number of bytes that can fit into the data field of an ESD
+ /// record.
+ static const uint8_t ESDMaxUncontinuedNameLength = 8;
+
+ /// \brief Maximum name length for ESD records and continuations.
+ /// This is the number of bytes that can fit into the data field of an ESD
+ /// record AND following continuations. This is limited fundamentally by the
+ /// 16 bit SIGNED length field.
+ static const uint16_t MaxNameLength = 32 * 1024;
+
+public:
+ static Error getData(const uint8_t *Record, SmallString<256> &CompleteData);
+
+ // ESD Get routines.
+ static void getSymbolType(const uint8_t *Record,
+ GOFF::ESDSymbolType &SymbolType) {
+ uint8_t Value;
+ get<uint8_t>(Record, 3, Value);
+ SymbolType = (GOFF::ESDSymbolType)Value;
+ }
+
+ static void getEsdId(const uint8_t *Record, uint32_t &EsdId) {
+ get<uint32_t>(Record, 4, EsdId);
+ }
+
+ static void getParentEsdId(const uint8_t *Record, uint32_t &EsdId) {
+ get<uint32_t>(Record, 8, EsdId);
+ }
+
+ static void getOffset(const uint8_t *Record, uint32_t &Offset) {
+ get<uint32_t>(Record, 16, Offset);
+ }
+
+ static void getLength(const uint8_t *Record, uint32_t &Length) {
+ get<uint32_t>(Record, 24, Length);
+ }
+
+ static void getNameSpaceId(const uint8_t *Record, GOFF::ESDNameSpaceId &Id) {
+ uint8_t Value;
+ get<uint8_t>(Record, 40, Value);
+ Id = (GOFF::ESDNameSpaceId)Value;
+ }
+
+ static void getFillBytePresent(const uint8_t *Record, bool &Present) {
+ uint8_t Value;
+ getBits(Record, 41, 0, 1, Value);
+ Present = (bool)Value;
+ }
+
+ static void getNameMangled(const uint8_t *Record, bool &Mangled) {
+ uint8_t Value;
+ getBits(Record, 41, 1, 1, Value);
+ Mangled = (bool)Value;
+ }
+
+ static void getRenamable(const uint8_t *Record, bool &Renamable) {
+ uint8_t Value;
+ getBits(Record, 41, 2, 1, Value);
+ Renamable = (bool)Value;
+ }
+
+ static void getRemovable(const uint8_t *Record, bool &Removable) {
+ uint8_t Value;
+ getBits(Record, 41, 3, 1, Value);
+ Removable = (bool)Value;
+ }
+
+ static void getFillByteValue(const uint8_t *Record, uint8_t &Fill) {
+ get<uint8_t>(Record, 42, Fill);
+ }
+
+ static void getAdaEsdId(const uint8_t *Record, uint32_t &EsdId) {
+ get<uint32_t>(Record, 44, EsdId);
+ }
+
+ static void getSortPriority(const uint8_t *Record, uint32_t &Priority) {
+ get<uint32_t>(Record, 48, Priority);
+ }
+
+ static void getAmode(const uint8_t *Record, GOFF::ESDAmode &Amode) {
+ uint8_t Value;
+ get<uint8_t>(Record, 60, Value);
+ Amode = (GOFF::ESDAmode)Value;
+ }
+
+ static void getRmode(const uint8_t *Record, GOFF::ESDRmode &Rmode) {
+ uint8_t Value;
+ get<uint8_t>(Record, 61, Value);
+ Rmode = (GOFF::ESDRmode)Value;
+ }
+
+ static void getTextStyle(const uint8_t *Record, GOFF::ESDTextStyle &Style) {
+ uint8_t Value;
+ getBits(Record, 62, 0, 4, Value);
+ Style = (GOFF::ESDTextStyle)Value;
+ }
+
+ static void getBindingAlgorithm(const uint8_t *Record,
+ GOFF::ESDBindingAlgorithm &Algorithm) {
+ uint8_t Value;
+ getBits(Record, 62, 4, 4, Value);
+ Algorithm = (GOFF::ESDBindingAlgorithm)Value;
+ }
+
+ static void getTaskingBehavior(const uint8_t *Record,
+ GOFF::ESDTaskingBehavior &TaskingBehavior) {
+ uint8_t Value;
+ getBits(Record, 63, 0, 3, Value);
+ TaskingBehavior = (GOFF::ESDTaskingBehavior)Value;
+ }
+
+ static void getReadOnly(const uint8_t *Record, bool &ReadOnly) {
+ uint8_t Value;
+ getBits(Record, 63, 4, 1, Value);
+ ReadOnly = (bool)Value;
+ }
+
+ static void getExecutable(const uint8_t *Record,
+ GOFF::ESDExecutable &Executable) {
+ uint8_t Value;
+ getBits(Record, 63, 5, 3, Value);
+ Executable = (GOFF::ESDExecutable)Value;
+ }
+
+ static void getDuplicateSeverity(const uint8_t *Record,
+ GOFF::ESDDuplicateSymbolSeverity &DSS) {
+ uint8_t Value;
+ getBits(Record, 64, 2, 2, Value);
+ DSS = (GOFF::ESDDuplicateSymbolSeverity)Value;
+ }
+
+ static void getBindingStrength(const uint8_t *Record,
+ GOFF::ESDBindingStrength &Strength) {
+ uint8_t Value;
+ getBits(Record, 64, 4, 4, Value);
+ Strength = (GOFF::ESDBindingStrength)Value;
+ }
+
+ static void getLoadingBehavior(const uint8_t *Record,
+ GOFF::ESDLoadingBehavior &Behavior) {
+ uint8_t Value;
+ getBits(Record, 65, 0, 2, Value);
+ Behavior = (GOFF::ESDLoadingBehavior)Value;
+ }
+
+ static void getIndirectReference(const uint8_t *Record, bool &Indirect) {
+ uint8_t Value;
+ getBits(Record, 65, 3, 1, Value);
+ Indirect = (bool)Value;
+ }
+
+ static void getBindingScope(const uint8_t *Record,
+ GOFF::ESDBindingScope &Scope) {
+ uint8_t Value;
+ getBits(Record, 65, 4, 4, Value);
+ Scope = (GOFF::ESDBindingScope)Value;
+ }
+
+ static void getLinkageType(const uint8_t *Record,
+ GOFF::ESDLinkageType &Type) {
+ uint8_t Value;
+ getBits(Record, 66, 2, 1, Value);
+ Type = (GOFF::ESDLinkageType)Value;
+ }
+
+ static void getAlignment(const uint8_t *Record,
+ GOFF::ESDAlignment &Alignment) {
+ uint8_t Value;
+ getBits(Record, 66, 3, 5, Value);
+ Alignment = (GOFF::ESDAlignment)Value;
+ }
+
+ static uint16_t getNameLength(const uint8_t *Record) {
+ uint16_t Length;
+ get<uint16_t>(Record, 70, Length);
+ return Length;
+ }
+};
+
+class ENDRecord : public Record {
+public:
+ static Error getData(const uint8_t *Record, SmallString<256> &CompleteData);
+
+ static uint16_t getNameLength(const uint8_t *Record) {
+ uint16_t Length;
+ get<uint16_t>(Record, 24, Length);
+ return Length;
+ }
+};
+
+} // end namespace object
+} // end namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Object/GOFFObjectFile.h b/llvm/include/llvm/Object/GOFFObjectFile.h
new file mode 100644
index 000000000000..6c7e9cf92e43
--- /dev/null
+++ b/llvm/include/llvm/Object/GOFFObjectFile.h
@@ -0,0 +1,130 @@
+//===- GOFFObjectFile.h - GOFF object file implementation -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the GOFFObjectFile class.
+// Record classes and derivatives are also declared and implemented.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_GOFFOBJECTFILE_H
+#define LLVM_OBJECT_GOFFOBJECTFILE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/BinaryFormat/GOFF.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/ConvertEBCDIC.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
+
+namespace llvm {
+
+namespace object {
+
+class GOFFObjectFile : public ObjectFile {
+ IndexedMap<const uint8_t *> EsdPtrs; // Indexed by EsdId.
+
+ mutable DenseMap<uint32_t, std::pair<size_t, std::unique_ptr<char[]>>>
+ EsdNamesCache;
+
+ typedef DataRefImpl SectionEntryImpl;
+ // (EDID, 0) code, r/o data section
+ // (EDID,PRID) r/w data section
+ SmallVector<SectionEntryImpl, 256> SectionList;
+ mutable DenseMap<uint32_t, std::string> SectionDataCache;
+
+public:
+ Expected<StringRef> getSymbolName(SymbolRef Symbol) const;
+
+ GOFFObjectFile(MemoryBufferRef Object, Error &Err);
+ static inline bool classof(const Binary *V) { return V->isGOFF(); }
+ section_iterator section_begin() const override;
+ section_iterator section_end() const override;
+
+ uint8_t getBytesInAddress() const override { return 8; }
+
+ StringRef getFileFormatName() const override { return "GOFF-SystemZ"; }
+
+ Triple::ArchType getArch() const override { return Triple::systemz; }
+
+ Expected<SubtargetFeatures> getFeatures() const override { return SubtargetFeatures(); }
+
+ bool isRelocatableObject() const override { return true; }
+
+ void moveSymbolNext(DataRefImpl &Symb) const override;
+ basic_symbol_iterator symbol_begin() const override;
+ basic_symbol_iterator symbol_end() const override;
+
+ bool is64Bit() const override {
+ return true;
+ }
+
+private:
+ // SymbolRef.
+ Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
+ Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
+ uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
+ uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
+ Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override;
+ Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
+ Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
+
+ const uint8_t *getSymbolEsdRecord(DataRefImpl Symb) const;
+ bool isSymbolUnresolved(DataRefImpl Symb) const;
+ bool isSymbolIndirect(DataRefImpl Symb) const;
+
+ // SectionRef.
+ void moveSectionNext(DataRefImpl &Sec) const override {}
+ virtual Expected<StringRef> getSectionName(DataRefImpl Sec) const override {
+ return StringRef();
+ }
+ uint64_t getSectionAddress(DataRefImpl Sec) const override { return 0; }
+ uint64_t getSectionSize(DataRefImpl Sec) const override { return 0; }
+ virtual Expected<ArrayRef<uint8_t>>
+ getSectionContents(DataRefImpl Sec) const override {
+ return ArrayRef<uint8_t>();
+ }
+ uint64_t getSectionIndex(DataRefImpl Sec) const override { return 0; }
+ uint64_t getSectionAlignment(DataRefImpl Sec) const override { return 0; }
+ bool isSectionCompressed(DataRefImpl Sec) const override { return false; }
+ bool isSectionText(DataRefImpl Sec) const override { return false; }
+ bool isSectionData(DataRefImpl Sec) const override { return false; }
+ bool isSectionBSS(DataRefImpl Sec) const override { return false; }
+ bool isSectionVirtual(DataRefImpl Sec) const override { return false; }
+ relocation_iterator section_rel_begin(DataRefImpl Sec) const override {
+ return relocation_iterator(RelocationRef(Sec, this));
+ }
+ relocation_iterator section_rel_end(DataRefImpl Sec) const override {
+ return relocation_iterator(RelocationRef(Sec, this));
+ }
+
+ const uint8_t *getSectionEdEsdRecord(DataRefImpl &Sec) const;
+ const uint8_t *getSectionPrEsdRecord(DataRefImpl &Sec) const;
+ const uint8_t *getSectionEdEsdRecord(uint32_t SectionIndex) const;
+ const uint8_t *getSectionPrEsdRecord(uint32_t SectionIndex) const;
+
+ // RelocationRef.
+ void moveRelocationNext(DataRefImpl &Rel) const override {}
+ uint64_t getRelocationOffset(DataRefImpl Rel) const override { return 0; }
+ symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override {
+ DataRefImpl Temp;
+ return basic_symbol_iterator(SymbolRef(Temp, this));
+ }
+ uint64_t getRelocationType(DataRefImpl Rel) const override { return 0; }
+ void getRelocationTypeName(DataRefImpl Rel,
+ SmallVectorImpl<char> &Result) const override {}
+};
+
+} // namespace object
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Object/IRObjectFile.h b/llvm/include/llvm/Object/IRObjectFile.h
index ee9911025a17..55d910fe970e 100644
--- a/llvm/include/llvm/Object/IRObjectFile.h
+++ b/llvm/include/llvm/Object/IRObjectFile.h
@@ -37,7 +37,9 @@ public:
Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override;
basic_symbol_iterator symbol_begin() const override;
basic_symbol_iterator symbol_end() const override;
-
+ bool is64Bit() const override {
+ return Triple(getTargetTriple()).isArch64Bit();
+ }
StringRef getTargetTriple() const;
static bool classof(const Binary *v) {
diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index 56e7c8580b4e..54c876a8cb4f 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -19,11 +19,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/BinaryFormat/Swift.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolicFile.h"
@@ -31,6 +29,8 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
#include <cstdint>
#include <memory>
#include <string>
@@ -503,6 +503,8 @@ public:
basic_symbol_iterator symbol_begin() const override;
basic_symbol_iterator symbol_end() const override;
+ bool is64Bit() const override;
+
// MachO specific.
symbol_iterator getSymbolByIndex(unsigned Index) const;
uint64_t getSymbolIndex(DataRefImpl Symb) const;
@@ -715,7 +717,7 @@ public:
ArrayRef<uint8_t> getDyldInfoLazyBindOpcodes() const;
ArrayRef<uint8_t> getDyldInfoExportsTrie() const;
- /// If the optional is None, no header was found, but the object was
+ /// If the optional is std::nullopt, no header was found, but the object was
/// well-formed.
Expected<std::optional<MachO::dyld_chained_fixups_header>>
getChainedFixupsHeader() const;
@@ -735,7 +737,7 @@ public:
ArrayRef<uint8_t> getUuid() const;
StringRef getStringTableData() const;
- bool is64Bit() const;
+
void ReadULEB128s(uint64_t Index, SmallVectorImpl<uint64_t> &Out) const;
static StringRef guessLibraryShortName(StringRef Name, bool &isFramework,
@@ -805,6 +807,8 @@ public:
case MachO::TOOL_CLANG: return "clang";
case MachO::TOOL_SWIFT: return "swift";
case MachO::TOOL_LD: return "ld";
+ case MachO::TOOL_LLD:
+ return "lld";
default:
std::string ret;
raw_string_ostream ss(ret);
diff --git a/llvm/include/llvm/Object/MachOUniversal.h b/llvm/include/llvm/Object/MachOUniversal.h
index 4fe7a68d9680..5e3a63e05dbf 100644
--- a/llvm/include/llvm/Object/MachOUniversal.h
+++ b/llvm/include/llvm/Object/MachOUniversal.h
@@ -13,11 +13,11 @@
#ifndef LLVM_OBJECT_MACHOUNIVERSAL_H
#define LLVM_OBJECT_MACHOUNIVERSAL_H
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/MachO.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
class StringRef;
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index a01013374edc..2b614185c694 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -16,7 +16,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/BinaryFormat/Swift.h"
@@ -26,6 +25,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <cstdint>
#include <memory>
@@ -46,6 +46,7 @@ class WasmObjectFile;
using section_iterator = content_iterator<SectionRef>;
+typedef std::function<bool(const SectionRef &)> SectionFilterPredicate;
/// This is a value type class that represents a single relocation in the list
/// of relocations in the object file.
class RelocationRef {
@@ -392,10 +393,64 @@ public:
uint32_t UniversalCputype = 0,
uint32_t UniversalIndex = 0);
+ static Expected<std::unique_ptr<ObjectFile>>
+ createGOFFObjectFile(MemoryBufferRef Object);
+
static Expected<std::unique_ptr<WasmObjectFile>>
createWasmObjectFile(MemoryBufferRef Object);
};
+/// A filtered iterator for SectionRefs that skips sections based on some given
+/// predicate.
+class SectionFilterIterator {
+public:
+ SectionFilterIterator(SectionFilterPredicate Pred,
+ const section_iterator &Begin,
+ const section_iterator &End)
+ : Predicate(std::move(Pred)), Iterator(Begin), End(End) {
+ scanPredicate();
+ }
+ const SectionRef &operator*() const { return *Iterator; }
+ SectionFilterIterator &operator++() {
+ ++Iterator;
+ scanPredicate();
+ return *this;
+ }
+ bool operator!=(const SectionFilterIterator &Other) const {
+ return Iterator != Other.Iterator;
+ }
+
+private:
+ void scanPredicate() {
+ while (Iterator != End && !Predicate(*Iterator)) {
+ ++Iterator;
+ }
+ }
+ SectionFilterPredicate Predicate;
+ section_iterator Iterator;
+ section_iterator End;
+};
+
+/// Creates an iterator range of SectionFilterIterators for a given Object and
+/// predicate.
+class SectionFilter {
+public:
+ SectionFilter(SectionFilterPredicate Pred, const ObjectFile &Obj)
+ : Predicate(std::move(Pred)), Object(Obj) {}
+ SectionFilterIterator begin() {
+ return SectionFilterIterator(Predicate, Object.section_begin(),
+ Object.section_end());
+ }
+ SectionFilterIterator end() {
+ return SectionFilterIterator(Predicate, Object.section_end(),
+ Object.section_end());
+ }
+
+private:
+ SectionFilterPredicate Predicate;
+ const ObjectFile &Object;
+};
+
// Inline function definitions.
inline SymbolRef::SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner)
: BasicSymbolRef(SymbolP, Owner) {}
diff --git a/llvm/include/llvm/Object/OffloadBinary.h b/llvm/include/llvm/Object/OffloadBinary.h
index 72e7e83cfc6b..320a8e1f6d8f 100644
--- a/llvm/include/llvm/Object/OffloadBinary.h
+++ b/llvm/include/llvm/Object/OffloadBinary.h
@@ -17,7 +17,7 @@
#ifndef LLVM_OBJECT_OFFLOADBINARY_H
#define LLVM_OBJECT_OFFLOADBINARY_H
-#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/Binary.h"
#include "llvm/Support/Error.h"
@@ -59,7 +59,7 @@ enum ImageKind : uint16_t {
/// offsets from the beginning of the file.
class OffloadBinary : public Binary {
public:
- using string_iterator = StringMap<StringRef>::const_iterator;
+ using string_iterator = MapVector<StringRef, StringRef>::const_iterator;
using string_iterator_range = iterator_range<string_iterator>;
/// The current version of the binary used for backwards compatibility.
@@ -70,7 +70,7 @@ public:
ImageKind TheImageKind;
OffloadKind TheOffloadKind;
uint32_t Flags;
- StringMap<StringRef> StringData;
+ MapVector<StringRef, StringRef> StringData;
std::unique_ptr<MemoryBuffer> Image;
};
@@ -142,7 +142,7 @@ private:
OffloadBinary(const OffloadBinary &Other) = delete;
/// Map from keys to offsets in the binary.
- StringMap<StringRef> StringData;
+ MapVector<StringRef, StringRef> StringData;
/// Raw pointer to the MemoryBufferRef for convenience.
const char *Buffer;
/// Location of the header within the binary.
diff --git a/llvm/include/llvm/Object/SymbolicFile.h b/llvm/include/llvm/Object/SymbolicFile.h
index ea51afce5d2a..b13588c147d9 100644
--- a/llvm/include/llvm/Object/SymbolicFile.h
+++ b/llvm/include/llvm/Object/SymbolicFile.h
@@ -158,6 +158,8 @@ public:
virtual basic_symbol_iterator symbol_end() const = 0;
+ virtual bool is64Bit() const = 0;
+
// convenience wrappers.
using basic_symbol_iterator_range = iterator_range<basic_symbol_iterator>;
basic_symbol_iterator_range symbols() const {
diff --git a/llvm/include/llvm/Object/TapiFile.h b/llvm/include/llvm/Object/TapiFile.h
index 410e58dceaf4..53889a3125cb 100644
--- a/llvm/include/llvm/Object/TapiFile.h
+++ b/llvm/include/llvm/Object/TapiFile.h
@@ -15,6 +15,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/Binary.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBufferRef.h"
@@ -34,7 +35,7 @@ namespace object {
class TapiFile : public SymbolicFile {
public:
- TapiFile(MemoryBufferRef Source, const MachO::InterfaceFile &interface,
+ TapiFile(MemoryBufferRef Source, const MachO::InterfaceFile &Interface,
MachO::Architecture Arch);
~TapiFile() override;
@@ -48,18 +49,22 @@ public:
basic_symbol_iterator symbol_end() const override;
+ Expected<SymbolRef::Type> getSymbolType(DataRefImpl DRI) const;
+
static bool classof(const Binary *v) { return v->isTapiFile(); }
- bool is64Bit() { return MachO::is64Bit(Arch); }
+ bool is64Bit() const override { return MachO::is64Bit(Arch); }
private:
struct Symbol {
StringRef Prefix;
StringRef Name;
uint32_t Flags;
+ SymbolRef::Type Type;
- constexpr Symbol(StringRef Prefix, StringRef Name, uint32_t Flags)
- : Prefix(Prefix), Name(Name), Flags(Flags) {}
+ constexpr Symbol(StringRef Prefix, StringRef Name, uint32_t Flags,
+ SymbolRef::Type Type)
+ : Prefix(Prefix), Name(Name), Flags(Flags), Type(Type) {}
};
std::vector<Symbol> Symbols;
diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h
index 3c8c8a21bf1d..8dd8918ddf21 100644
--- a/llvm/include/llvm/Object/Wasm.h
+++ b/llvm/include/llvm/Object/Wasm.h
@@ -164,6 +164,8 @@ public:
basic_symbol_iterator symbol_end() const override;
Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
+ bool is64Bit() const override { return false; }
+
Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
uint64_t getWasmSymbolValue(const WasmSymbol &Sym) const;
uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
diff --git a/llvm/include/llvm/Object/WindowsResource.h b/llvm/include/llvm/Object/WindowsResource.h
index acda9e2659b1..ec390a4814cc 100644
--- a/llvm/include/llvm/Object/WindowsResource.h
+++ b/llvm/include/llvm/Object/WindowsResource.h
@@ -234,7 +234,7 @@ public:
struct StringOrID {
bool IsString;
ArrayRef<UTF16> String;
- uint32_t ID;
+ uint32_t ID = ~0u;
StringOrID(uint32_t ID) : IsString(false), ID(ID) {}
StringOrID(ArrayRef<UTF16> String) : IsString(true), String(String) {}
diff --git a/llvm/include/llvm/Object/XCOFFObjectFile.h b/llvm/include/llvm/Object/XCOFFObjectFile.h
index 14247804af45..5f51aacfabc0 100644
--- a/llvm/include/llvm/Object/XCOFFObjectFile.h
+++ b/llvm/include/llvm/Object/XCOFFObjectFile.h
@@ -576,7 +576,7 @@ public:
Expected<uint32_t> getSymbolFlags(DataRefImpl Symb) const override;
basic_symbol_iterator symbol_begin() const override;
basic_symbol_iterator symbol_end() const override;
-
+ bool is64Bit() const override;
Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
@@ -625,7 +625,7 @@ public:
bool isRelocatableObject() const override;
// Below here is the non-inherited interface.
- bool is64Bit() const;
+
Expected<StringRef> getRawData(const char *Start, uint64_t Size,
StringRef Name) const;
@@ -715,6 +715,8 @@ public:
uint32_t Distance);
static bool classof(const Binary *B) { return B->isXCOFF(); }
+
+ std::optional<StringRef> tryGetCPUName() const override;
}; // XCOFFObjectFile
typedef struct {
@@ -846,6 +848,7 @@ public:
class XCOFFTracebackTable {
const uint8_t *const TBPtr;
+ bool Is64BitObj;
std::optional<SmallString<32>> ParmsType;
std::optional<uint32_t> TraceBackTableOffset;
std::optional<uint32_t> HandlerMask;
@@ -855,8 +858,10 @@ class XCOFFTracebackTable {
std::optional<uint8_t> AllocaRegister;
std::optional<TBVectorExt> VecExt;
std::optional<uint8_t> ExtensionTable;
+ std::optional<uint64_t> EhInfoDisp;
- XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size, Error &Err);
+ XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size, Error &Err,
+ bool Is64Bit = false);
public:
/// Parse an XCOFF Traceback Table from \a Ptr with \a Size bytes.
@@ -872,8 +877,8 @@ public:
/// If the XCOFF Traceback Table is not parsed successfully or there are
/// extra bytes that are not recognized, \a Size will be updated to be the
/// size up to the end of the last successfully parsed field of the table.
- static Expected<XCOFFTracebackTable> create(const uint8_t *Ptr,
- uint64_t &Size);
+ static Expected<XCOFFTracebackTable>
+ create(const uint8_t *Ptr, uint64_t &Size, bool Is64Bits = false);
uint8_t getVersion() const;
uint8_t getLanguageID() const;
@@ -930,6 +935,7 @@ public:
const std::optional<uint8_t> &getExtensionTable() const {
return ExtensionTable;
}
+ const std::optional<uint64_t> &getEhInfoDisp() const { return EhInfoDisp; }
};
bool doesXCOFFTracebackTableBegin(ArrayRef<uint8_t> Bytes);
diff --git a/llvm/include/llvm/ObjectYAML/COFFYAML.h b/llvm/include/llvm/ObjectYAML/COFFYAML.h
index fbd8298919bd..2f9a1aae0eb0 100644
--- a/llvm/include/llvm/ObjectYAML/COFFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/COFFYAML.h
@@ -15,6 +15,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/COFF.h"
+#include "llvm/Object/COFF.h"
#include "llvm/ObjectYAML/CodeViewYAMLDebugSections.h"
#include "llvm/ObjectYAML/CodeViewYAMLTypeHashing.h"
#include "llvm/ObjectYAML/CodeViewYAMLTypes.h"
@@ -66,6 +67,16 @@ struct Relocation {
std::optional<uint32_t> SymbolTableIndex;
};
+struct SectionDataEntry {
+ std::optional<uint32_t> UInt32;
+ yaml::BinaryRef Binary;
+ std::optional<object::coff_load_configuration32> LoadConfig32;
+ std::optional<object::coff_load_configuration64> LoadConfig64;
+
+ size_t size() const;
+ void writeAsBinary(raw_ostream &OS) const;
+};
+
struct Section {
COFF::section Header;
unsigned Alignment = 0;
@@ -74,6 +85,7 @@ struct Section {
std::vector<CodeViewYAML::LeafRecord> DebugT;
std::vector<CodeViewYAML::LeafRecord> DebugP;
std::optional<CodeViewYAML::DebugHSection> DebugH;
+ std::vector<SectionDataEntry> StructuredData;
std::vector<Relocation> Relocations;
StringRef Name;
@@ -117,6 +129,7 @@ struct Object {
LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Section)
LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Symbol)
LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::Relocation)
+LLVM_YAML_IS_SEQUENCE_VECTOR(COFFYAML::SectionDataEntry)
namespace llvm {
namespace yaml {
@@ -236,11 +249,27 @@ template <> struct MappingTraits<COFF::AuxiliaryCLRToken> {
static void mapping(IO &IO, COFF::AuxiliaryCLRToken &ACT);
};
+template <> struct MappingTraits<object::coff_load_configuration32> {
+ static void mapping(IO &IO, object::coff_load_configuration32 &ACT);
+};
+
+template <> struct MappingTraits<object::coff_load_configuration64> {
+ static void mapping(IO &IO, object::coff_load_configuration64 &ACT);
+};
+
+template <> struct MappingTraits<object::coff_load_config_code_integrity> {
+ static void mapping(IO &IO, object::coff_load_config_code_integrity &ACT);
+};
+
template <>
struct MappingTraits<COFFYAML::Symbol> {
static void mapping(IO &IO, COFFYAML::Symbol &S);
};
+template <> struct MappingTraits<COFFYAML::SectionDataEntry> {
+ static void mapping(IO &IO, COFFYAML::SectionDataEntry &Sec);
+};
+
template <>
struct MappingTraits<COFFYAML::Section> {
static void mapping(IO &IO, COFFYAML::Section &Sec);
diff --git a/llvm/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h b/llvm/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
index 9cbacb88b518..6c712956dfb5 100644
--- a/llvm/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
+++ b/llvm/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
@@ -115,8 +115,6 @@ struct YAMLDebugSubsection {
std::shared_ptr<detail::YAMLSubsectionBase> Subsection;
};
-struct DebugSubsectionState {};
-
Expected<std::vector<std::shared_ptr<codeview::DebugSubsection>>>
toCodeViewSubsectionList(BumpPtrAllocator &Allocator,
ArrayRef<YAMLDebugSubsection> Subsections,
diff --git a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h
index 30bb16deb810..ee421b2efc72 100644
--- a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h
+++ b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h
@@ -15,8 +15,8 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TargetParser/Host.h"
#include <memory>
namespace llvm {
diff --git a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h
index 55c61ff96b7c..5dff8b3db9d0 100644
--- a/llvm/include/llvm/ObjectYAML/DXContainerYAML.h
+++ b/llvm/include/llvm/ObjectYAML/DXContainerYAML.h
@@ -71,6 +71,26 @@ struct ShaderHash {
std::vector<llvm::yaml::Hex8> Digest;
};
+using ResourceBindInfo = dxbc::PSV::v2::ResourceBindInfo;
+
+struct PSVInfo {
+ // The version field isn't actually encoded in the file, but it is inferred by
+ // the size of data regions. We include it in the yaml because it simplifies
+ // the format.
+ uint32_t Version;
+
+ dxbc::PSV::v2::RuntimeInfo Info;
+ uint32_t ResourceStride;
+ std::vector<ResourceBindInfo> Resources;
+
+ void mapInfoForVersion(yaml::IO &IO);
+
+ PSVInfo();
+ PSVInfo(const dxbc::PSV::v0::RuntimeInfo *P, uint16_t Stage);
+ PSVInfo(const dxbc::PSV::v1::RuntimeInfo *P);
+ PSVInfo(const dxbc::PSV::v2::RuntimeInfo *P);
+};
+
struct Part {
Part() = default;
Part(std::string N, uint32_t S) : Name(N), Size(S) {}
@@ -79,6 +99,7 @@ struct Part {
std::optional<DXILProgram> Program;
std::optional<ShaderFlags> Flags;
std::optional<ShaderHash> Hash;
+ std::optional<PSVInfo> Info;
};
struct Object {
@@ -90,6 +111,7 @@ struct Object {
} // namespace llvm
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DXContainerYAML::Part)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DXContainerYAML::ResourceBindInfo)
namespace llvm {
class raw_ostream;
@@ -116,6 +138,10 @@ template <> struct MappingTraits<DXContainerYAML::ShaderHash> {
static void mapping(IO &IO, DXContainerYAML::ShaderHash &Hash);
};
+template <> struct MappingTraits<DXContainerYAML::PSVInfo> {
+ static void mapping(IO &IO, DXContainerYAML::PSVInfo &PSV);
+};
+
template <> struct MappingTraits<DXContainerYAML::Part> {
static void mapping(IO &IO, DXContainerYAML::Part &Version);
};
@@ -124,6 +150,10 @@ template <> struct MappingTraits<DXContainerYAML::Object> {
static void mapping(IO &IO, DXContainerYAML::Object &Obj);
};
+template <> struct MappingTraits<DXContainerYAML::ResourceBindInfo> {
+ static void mapping(IO &IO, DXContainerYAML::ResourceBindInfo &Res);
+};
+
} // namespace yaml
} // namespace llvm
diff --git a/llvm/include/llvm/Option/Arg.h b/llvm/include/llvm/Option/Arg.h
index 4be254ccdab4..5a718438bf4a 100644
--- a/llvm/include/llvm/Option/Arg.h
+++ b/llvm/include/llvm/Option/Arg.h
@@ -47,11 +47,17 @@ private:
/// ArgList.
unsigned Index;
- /// Was this argument used to effect compilation?
+ /// Was this argument used to affect compilation?
///
- /// This is used for generating "argument unused" diagnostics.
+ /// This is used to generate an "argument unused" warning (without
+ /// clang::driver::options::TargetSpecific) or "unsupported option" error
+ /// (with TargetSpecific).
mutable unsigned Claimed : 1;
+ /// Used by an unclaimed option with the TargetSpecific flag. If set, report
+ /// an "argument unused" warning instead of an "unsupported option" error.
+ unsigned IgnoredTargetSpecific : 1;
+
/// Does this argument own its values?
mutable unsigned OwnsValues : 1;
@@ -93,6 +99,7 @@ public:
const Arg &getBaseArg() const {
return BaseArg ? *BaseArg : *this;
}
+ Arg &getBaseArg() { return BaseArg ? const_cast<Arg &>(*BaseArg) : *this; }
void setBaseArg(const Arg *BaseArg) { this->BaseArg = BaseArg; }
/// Args are converted to their unaliased form. For args that originally
@@ -104,10 +111,15 @@ public:
void setOwnsValues(bool Value) const { OwnsValues = Value; }
bool isClaimed() const { return getBaseArg().Claimed; }
-
- /// Set the Arg claimed bit.
void claim() const { getBaseArg().Claimed = true; }
+ bool isIgnoredTargetSpecific() const {
+ return getBaseArg().IgnoredTargetSpecific;
+ }
+ void ignoreTargetSpecific() {
+ getBaseArg().IgnoredTargetSpecific = true;
+ }
+
unsigned getNumValues() const { return Values.size(); }
const char *getValue(unsigned N = 0) const {
diff --git a/llvm/include/llvm/Option/ArgList.h b/llvm/include/llvm/Option/ArgList.h
index 6a07e1c657dc..310c8900af9e 100644
--- a/llvm/include/llvm/Option/ArgList.h
+++ b/llvm/include/llvm/Option/ArgList.h
@@ -354,6 +354,12 @@ public:
/// option id.
void ClaimAllArgs(OptSpecifier Id0) const;
+ template <typename... OptSpecifiers>
+ void claimAllArgs(OptSpecifiers... Ids) const {
+ for (Arg *A : filtered(Ids...))
+ A->claim();
+ }
+
/// ClaimAllArgs - Claim all arguments.
///
void ClaimAllArgs() const;
diff --git a/llvm/include/llvm/Option/OptParser.td b/llvm/include/llvm/Option/OptParser.td
index 9c73f478db5e..94b945defac1 100644
--- a/llvm/include/llvm/Option/OptParser.td
+++ b/llvm/include/llvm/Option/OptParser.td
@@ -11,6 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_OPTION_OPTPARSER_TD
+#define LLVM_OPTION_OPTPARSER_TD
+
// Define the kinds of options.
class OptionKind<string name, int precedence = 0, bit sentinel = false> {
@@ -246,3 +249,5 @@ class ValueExtractor<code extractor> { code ValueExtractor = extractor; }
// aren't duplicated).
def INPUT : Option<[], "<input>", KIND_INPUT>;
def UNKNOWN : Option<[], "<unknown>", KIND_UNKNOWN>;
+
+#endif // LLVM_OPTION_OPTPARSER_TD
diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h
index 0cef9b65d2f2..6f3d6032e59a 100644
--- a/llvm/include/llvm/Option/OptTable.h
+++ b/llvm/include/llvm/Option/OptTable.h
@@ -62,6 +62,7 @@ private:
ArrayRef<Info> OptionInfos;
bool IgnoreCase;
bool GroupedShortOptions = false;
+ bool DashDashParsing = false;
const char *EnvVar = nullptr;
unsigned InputOptionID = 0;
@@ -139,6 +140,10 @@ public:
/// Support grouped short options. e.g. -ab represents -a -b.
void setGroupedShortOptions(bool Value) { GroupedShortOptions = Value; }
+ /// Set whether "--" stops option parsing and treats all subsequent arguments
+ /// as positional. E.g. -- -a -b gives two positional inputs.
+ void setDashDashParsing(bool Value) { DashDashParsing = Value; }
+
/// Find possible value for given flags. This is used for shell
/// autocompletion.
///
diff --git a/llvm/include/llvm/Pass.h b/llvm/include/llvm/Pass.h
index 6445e16ab68f..44b6dd95cc27 100644
--- a/llvm/include/llvm/Pass.h
+++ b/llvm/include/llvm/Pass.h
@@ -28,6 +28,9 @@
#ifndef LLVM_PASS_H
#define LLVM_PASS_H
+#ifdef EXPENSIVE_CHECKS
+#include <cstdint>
+#endif
#include <string>
namespace llvm {
diff --git a/llvm/include/llvm/PassRegistry.h b/llvm/include/llvm/PassRegistry.h
index b9a015430c10..5d7f3a84a6be 100644
--- a/llvm/include/llvm/PassRegistry.h
+++ b/llvm/include/llvm/PassRegistry.h
@@ -19,7 +19,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/RWMutex.h"
#include <memory>
#include <vector>
@@ -89,9 +88,6 @@ public:
void removeRegistrationListener(PassRegistrationListener *L);
};
-// Create wrappers for C Binding types (see CBindingWrapping.h).
-DEFINE_STDCXX_CONVERSION_FUNCTIONS(PassRegistry, LLVMPassRegistryRef)
-
} // end namespace llvm
#endif // LLVM_PASSREGISTRY_H
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index b7e6764d428b..fdb407263787 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -32,6 +32,10 @@ class StringRef;
class AAManager;
class TargetMachine;
class ModuleSummaryIndex;
+template <typename T> class IntrusiveRefCntPtr;
+namespace vfs {
+class FileSystem;
+} // namespace vfs
/// Tunable parameters for passes in the default pipelines.
class PipelineTuningOptions {
@@ -70,6 +74,9 @@ public:
/// that of the flag: `-enable-npm-call-graph-profile`.
bool CallGraphProfile;
+ // Add LTO pipeline tuning option to enable the unified LTO pipeline.
+ bool UnifiedLTO;
+
/// Tuning option to enable/disable function merging. Its default value is
/// false.
bool MergeFunctions;
@@ -227,14 +234,21 @@ public:
/// optimization and code generation without any link-time optimization. It
/// typically correspond to frontend "-O[123]" options for optimization
/// levels \c O1, \c O2 and \c O3 resp.
- ///
- /// Note that \p Level cannot be `O0` here. The pipelines produced are
- /// only intended for use when attempting to optimize code. If frontends
- /// require some transformations for semantic reasons, they should explicitly
- /// build them.
ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level,
bool LTOPreLink = false);
+ /// Build a fat object default optimization pipeline.
+ ///
+ /// This builds a pipeline that runs the LTO/ThinLTO pre-link pipeline, and
+ /// emits a section containing the pre-link bitcode along side the object code
+ /// generated by running the PerModuleDefaultPipeline, used when compiling
+ /// without LTO. It clones the module and runs the LTO/non-LTO pipelines
+ /// separately to avoid any inconsistencies with an ad-hoc pipeline that tries
+ /// to approximate the PerModuleDefaultPipeline from the pre-link LTO
+ /// pipelines.
+ ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level,
+ bool ThinLTO, bool EmitSummary);
+
/// Build a pre-link, ThinLTO-targeting default optimization pipeline to
/// a pass manager.
///
@@ -242,11 +256,6 @@ public:
/// a ThinLTO run. It works to minimize the IR which needs to be analyzed
/// without making irreversible decisions which could be made better during
/// the LTO run.
- ///
- /// Note that \p Level cannot be `O0` here. The pipelines produced are
- /// only intended for use when attempting to optimize code. If frontends
- /// require some transformations for semantic reasons, they should explicitly
- /// build them.
ModulePassManager buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level);
/// Build an ThinLTO default optimization pipeline to a pass manager.
@@ -255,11 +264,6 @@ public:
/// optimization and code generation. It is particularly tuned to fit well
/// when IR coming into the LTO phase was first run through \c
/// addPreLinkLTODefaultPipeline, and the two coordinate closely.
- ///
- /// Note that \p Level cannot be `O0` here. The pipelines produced are
- /// only intended for use when attempting to optimize code. If frontends
- /// require some transformations for semantic reasons, they should explicitly
- /// build them.
ModulePassManager
buildThinLTODefaultPipeline(OptimizationLevel Level,
const ModuleSummaryIndex *ImportSummary);
@@ -271,11 +275,6 @@ public:
/// run. It works to minimize the IR which needs to be analyzed without
/// making irreversible decisions which could be made better during the LTO
/// run.
- ///
- /// Note that \p Level cannot be `O0` here. The pipelines produced are
- /// only intended for use when attempting to optimize code. If frontends
- /// require some transformations for semantic reasons, they should explicitly
- /// build them.
ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level);
/// Build an LTO default optimization pipeline to a pass manager.
@@ -284,11 +283,6 @@ public:
/// optimization and code generation. It is particularly tuned to fit well
/// when IR coming into the LTO phase was first run through \c
/// addPreLinkLTODefaultPipeline, and the two coordinate closely.
- ///
- /// Note that \p Level cannot be `O0` here. The pipelines produced are
- /// only intended for use when attempting to optimize code. If frontends
- /// require some transformations for semantic reasons, they should explicitly
- /// build them.
ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level,
ModuleSummaryIndex *ExportSummary);
@@ -567,7 +561,8 @@ public:
/// Add PGOInstrumenation passes for O0 only.
void addPGOInstrPassesForO0(ModulePassManager &MPM, bool RunProfileGen,
bool IsCS, std::string ProfileFile,
- std::string ProfileRemappingFile);
+ std::string ProfileRemappingFile,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS);
/// Returns PIC. External libraries can use this to register pass
/// instrumentation callbacks.
@@ -575,6 +570,34 @@ public:
return PIC;
}
+ // Invoke the callbacks registered for the various extension points.
+ // Custom pipelines should use these to invoke the callbacks registered
+ // by TargetMachines and other clients.
+ void invokePeepholeEPCallbacks(FunctionPassManager &FPM,
+ OptimizationLevel Level);
+ void invokeLateLoopOptimizationsEPCallbacks(LoopPassManager &LPM,
+ OptimizationLevel Level);
+ void invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
+ OptimizationLevel Level);
+ void invokeScalarOptimizerLateEPCallbacks(FunctionPassManager &FPM,
+ OptimizationLevel Level);
+ void invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
+ OptimizationLevel Level);
+ void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
+ OptimizationLevel Level);
+ void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
+ OptimizationLevel Level);
+ void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
+ OptimizationLevel Level);
+ void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM,
+ OptimizationLevel Level);
+ void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM,
+ OptimizationLevel Level);
+ void invokePipelineStartEPCallbacks(ModulePassManager &MPM,
+ OptimizationLevel Level);
+ void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM,
+ OptimizationLevel Level);
+
private:
// O1 pass pipeline
FunctionPassManager
@@ -607,8 +630,8 @@ private:
void addPGOInstrPasses(ModulePassManager &MPM, OptimizationLevel Level,
bool RunProfileGen, bool IsCS, std::string ProfileFile,
std::string ProfileRemappingFile,
- ThinOrFullLTOPhase LTOPhase);
- void invokePeepholeEPCallbacks(FunctionPassManager &, OptimizationLevel);
+ ThinOrFullLTOPhase LTOPhase,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS);
// Extension Point callbacks
SmallVector<std::function<void(FunctionPassManager &, OptimizationLevel)>, 2>
diff --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h
index c8614ec49688..331130c6b22d 100644
--- a/llvm/include/llvm/Passes/StandardInstrumentations.h
+++ b/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -52,6 +52,8 @@ private:
bool shouldPrintBeforePass(StringRef PassID);
bool shouldPrintAfterPass(StringRef PassID);
+ bool shouldPrintPassNumbers();
+ bool shouldPrintAtPassNumber();
using PrintModuleDesc = std::tuple<const Module *, std::string, StringRef>;
@@ -62,6 +64,9 @@ private:
/// Stack of Module description, enough to print the module after a given
/// pass.
SmallVector<PrintModuleDesc, 2> ModuleDescStack;
+
+ /// Used for print-at-pass-number
+ unsigned CurrentPassNumber = 0;
};
class OptNoneInstrumentation {
@@ -152,9 +157,8 @@ public:
SmallVector<StringRef, 8> PassStack;
#endif
- static cl::opt<bool> VerifyPreservedCFG;
void registerCallbacks(PassInstrumentationCallbacks &PIC,
- FunctionAnalysisManager &FAM);
+ ModuleAnalysisManager &MAM);
};
// Base class for classes that report changes to the IR.
@@ -575,7 +579,7 @@ public:
// Register all the standard instrumentation callbacks. If \p FAM is nullptr
// then PreservedCFGChecker is not enabled.
void registerCallbacks(PassInstrumentationCallbacks &PIC,
- FunctionAnalysisManager *FAM = nullptr);
+ ModuleAnalysisManager *MAM = nullptr);
TimePassesHandler &getTimePasses() { return TimePasses; }
};
diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
index 4d48308d5509..3c8f940ba97b 100644
--- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Object/BuildID.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Compiler.h"
@@ -42,6 +43,14 @@ namespace llvm {
class IndexedInstrProfReader;
+namespace object {
+class BuildIDFetcher;
+} // namespace object
+
+namespace vfs {
+class FileSystem;
+} // namespace vfs
+
namespace coverage {
class CoverageMappingReader;
@@ -579,6 +588,13 @@ class CoverageMapping {
ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders,
IndexedInstrProfReader &ProfileReader, CoverageMapping &Coverage);
+ // Load coverage records from file.
+ static Error
+ loadFromFile(StringRef Filename, StringRef Arch, StringRef CompilationDir,
+ IndexedInstrProfReader &ProfileReader, CoverageMapping &Coverage,
+ bool &DataFound,
+ SmallVectorImpl<object::BuildID> *FoundBinaryIDs = nullptr);
+
/// Add a function record corresponding to \p Record.
Error loadFunctionRecord(const CoverageMappingRecord &Record,
IndexedInstrProfReader &ProfileReader);
@@ -604,8 +620,10 @@ public:
/// Ignores non-instrumented object files unless all are not instrumented.
static Expected<std::unique_ptr<CoverageMapping>>
load(ArrayRef<StringRef> ObjectFilenames, StringRef ProfileFilename,
- ArrayRef<StringRef> Arches = std::nullopt,
- StringRef CompilationDir = "");
+ vfs::FileSystem &FS, ArrayRef<StringRef> Arches = std::nullopt,
+ StringRef CompilationDir = "",
+ const object::BuildIDFetcher *BIDFetcher = nullptr,
+ bool CheckBinaryIDs = false);
/// The number of functions that couldn't have their profiles mapped.
///
diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
index 39c0045369be..326c1b0d3338 100644
--- a/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
+++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMappingReader.h
@@ -205,7 +205,8 @@ public:
static Expected<std::vector<std::unique_ptr<BinaryCoverageReader>>>
create(MemoryBufferRef ObjectBuffer, StringRef Arch,
SmallVectorImpl<std::unique_ptr<MemoryBuffer>> &ObjectFileBuffers,
- StringRef CompilationDir = "");
+ StringRef CompilationDir = "",
+ SmallVectorImpl<object::BuildIDRef> *BinaryIDs = nullptr);
static Expected<std::unique_ptr<BinaryCoverageReader>>
createCoverageReaderFromBuffer(StringRef Coverage,
diff --git a/llvm/include/llvm/ProfileData/GCOV.h b/llvm/include/llvm/ProfileData/GCOV.h
index fe56f84f28b6..674260c81fa6 100644
--- a/llvm/include/llvm/ProfileData/GCOV.h
+++ b/llvm/include/llvm/ProfileData/GCOV.h
@@ -15,6 +15,7 @@
#define LLVM_PROFILEDATA_GCOV_H
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
@@ -214,6 +215,9 @@ public:
SmallVectorImpl<std::unique_ptr<GCOVFunction>>::const_iterator>;
iterator begin() const { return iterator(functions.begin()); }
iterator end() const { return iterator(functions.end()); }
+
+private:
+ unsigned addNormalizedPathToMap(StringRef filename);
};
struct GCOVArc {
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 26c15b1fe860..f64d2e6cb739 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -20,19 +20,20 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProfData.inc"
+#include "llvm/Support/BalancedPartitioning.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -300,7 +301,9 @@ enum class InstrProfKind {
FunctionEntryOnly = 0x20,
// A memory profile collected using -fprofile=memory.
MemProf = 0x40,
- LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/MemProf)
+ // A temporal profile.
+ TemporalProfile = 0x80,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/TemporalProfile)
};
const std::error_category &instrprof_category();
@@ -328,7 +331,24 @@ enum class instrprof_error {
compress_failed,
uncompress_failed,
empty_raw_profile,
- zlib_unavailable
+ zlib_unavailable,
+ raw_profile_version_mismatch
+};
+
+/// An ordered list of functions identified by their NameRef found in
+/// INSTR_PROF_DATA
+struct TemporalProfTraceTy {
+ std::vector<uint64_t> FunctionNameRefs;
+ uint64_t Weight;
+ TemporalProfTraceTy(std::initializer_list<uint64_t> Trace = {},
+ uint64_t Weight = 1)
+ : FunctionNameRefs(Trace), Weight(Weight) {}
+
+ /// Use a set of temporal profile traces to create a list of balanced
+ /// partitioning function nodes used by BalancedPartitioning to generate a
+ /// function order that reduces page faults during startup
+ static std::vector<BPFunctionNode>
+ createBPFunctionNodes(ArrayRef<TemporalProfTraceTy> Traces);
};
inline std::error_code make_error_code(instrprof_error E) {
@@ -353,15 +373,18 @@ public:
instrprof_error get() const { return Err; }
const std::string &getMessage() const { return Msg; }
- /// Consume an Error and return the raw enum value contained within it. The
- /// Error must either be a success value, or contain a single InstrProfError.
- static instrprof_error take(Error E) {
+ /// Consume an Error and return the raw enum value contained within it, and
+ /// the optional error message. The Error must either be a success value, or
+ /// contain a single InstrProfError.
+ static std::pair<instrprof_error, std::string> take(Error E) {
auto Err = instrprof_error::success;
- handleAllErrors(std::move(E), [&Err](const InstrProfError &IPE) {
+ std::string Msg = "";
+ handleAllErrors(std::move(E), [&Err, &Msg](const InstrProfError &IPE) {
assert(Err == instrprof_error::success && "Multiple errors encountered");
Err = IPE.get();
+ Msg = IPE.getMessage();
});
- return Err;
+ return {Err, Msg};
}
static char ID;
@@ -371,61 +394,6 @@ private:
std::string Msg;
};
-class SoftInstrProfErrors {
- /// Count the number of soft instrprof_errors encountered and keep track of
- /// the first such error for reporting purposes.
-
- /// The first soft error encountered.
- instrprof_error FirstError = instrprof_error::success;
-
- /// The number of hash mismatches.
- unsigned NumHashMismatches = 0;
-
- /// The number of count mismatches.
- unsigned NumCountMismatches = 0;
-
- /// The number of counter overflows.
- unsigned NumCounterOverflows = 0;
-
- /// The number of value site count mismatches.
- unsigned NumValueSiteCountMismatches = 0;
-
-public:
- SoftInstrProfErrors() = default;
-
- ~SoftInstrProfErrors() {
- assert(FirstError == instrprof_error::success &&
- "Unchecked soft error encountered");
- }
-
- /// Track a soft error (\p IE) and increment its associated counter.
- void addError(instrprof_error IE);
-
- /// Get the number of hash mismatches.
- unsigned getNumHashMismatches() const { return NumHashMismatches; }
-
- /// Get the number of count mismatches.
- unsigned getNumCountMismatches() const { return NumCountMismatches; }
-
- /// Get the number of counter overflows.
- unsigned getNumCounterOverflows() const { return NumCounterOverflows; }
-
- /// Get the number of value site count mismatches.
- unsigned getNumValueSiteCountMismatches() const {
- return NumValueSiteCountMismatches;
- }
-
- /// Return the first encountered error and reset FirstError to a success
- /// value.
- Error takeError() {
- if (FirstError == instrprof_error::success)
- return Error::success();
- auto E = make_error<InstrProfError>(FirstError);
- FirstError = instrprof_error::success;
- return E;
- }
-};
-
namespace object {
class SectionRef;
@@ -557,10 +525,7 @@ public:
inline StringRef getNameData() const { return Data; }
/// Dump the symbols in this table.
- void dumpNames(raw_ostream &OS) const {
- for (StringRef S : NameTab.keys())
- OS << S << "\n";
- }
+ void dumpNames(raw_ostream &OS) const;
};
Error InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) {
@@ -1052,7 +1017,9 @@ enum ProfVersion {
Version8 = 8,
// Binary ids are added.
Version9 = 9,
- // The current version is 9.
+ // An additional (optional) temporal profile traces section is added.
+ Version10 = 10,
+ // The current version is 10.
CurrentVersion = INSTR_PROF_INDEX_VERSION
};
const uint64_t Version = ProfVersion::CurrentVersion;
@@ -1071,6 +1038,7 @@ struct Header {
uint64_t HashOffset;
uint64_t MemProfOffset;
uint64_t BinaryIdOffset;
+ uint64_t TemporalProfTracesOffset;
// New fields should only be added at the end to ensure that the size
// computation is correct. The methods below need to be updated to ensure that
// the new field is read correctly.
@@ -1221,10 +1189,6 @@ struct Header {
} // end namespace RawInstrProf
-// Parse MemOP Size range option.
-void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
- int64_t &RangeLast);
-
// Create the variable for the profile file name.
void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index 05419bf01f52..94261f4705b9 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -650,7 +650,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
/* Raw profile format version (start from 1). */
#define INSTR_PROF_RAW_VERSION 8
/* Indexed profile format version (start from 1). */
-#define INSTR_PROF_INDEX_VERSION 9
+#define INSTR_PROF_INDEX_VERSION 10
/* Coverage mapping format version (start from 0). */
#define INSTR_PROF_COVMAP_VERSION 5
@@ -663,6 +663,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
* The 60th bit indicates single byte coverage instrumentation.
* The 61st bit indicates function entry instrumentation only.
* The 62nd bit indicates whether memory profile information is present.
+ * The 63rd bit indicates if this is a temporal profile.
*/
#define VARIANT_MASKS_ALL 0xff00000000000000ULL
#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
@@ -673,9 +674,11 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
#define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60)
#define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61)
#define VARIANT_MASK_MEMPROF (0x1ULL << 62)
+#define VARIANT_MASK_TEMPORAL_PROF (0x1ULL << 63)
#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
#define INSTR_PROF_PROFILE_COUNTER_BIAS_VAR __llvm_profile_counter_bias
+#define INSTR_PROF_PROFILE_SET_TIMESTAMP __llvm_profile_set_timestamp
/* The variable that holds the name of the profile data
* specified via command line. */
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 9c216e57d005..80c5284d8a7d 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -41,6 +41,10 @@ namespace llvm {
class InstrProfReader;
+namespace vfs {
+class FileSystem;
+} // namespace vfs
+
/// A file format agnostic iterator over profiling data.
template <class record_type = NamedInstrProfRecord,
class reader_type = InstrProfReader>
@@ -131,6 +135,9 @@ public:
/// Return true if profile includes a memory profile.
virtual bool hasMemoryProfile() const = 0;
+ /// Return true if this has a temporal profile.
+ virtual bool hasTemporalProfile() const = 0;
+
/// Returns a BitsetEnum describing the attributes of the profile. To check
/// individual attributes prefer using the helpers above.
virtual InstrProfKind getProfileKind() const = 0;
@@ -152,6 +159,10 @@ public:
protected:
std::unique_ptr<InstrProfSymtab> Symtab;
+ /// A list of temporal profile traces.
+ SmallVector<TemporalProfTraceTy> TemporalProfTraces;
+ /// The total number of temporal profile traces seen.
+ uint64_t TemporalProfTraceStreamSize = 0;
/// Set the current error and return same.
Error error(instrprof_error Err, const std::string &ErrMsg = "") {
@@ -190,11 +201,26 @@ public:
/// Factory method to create an appropriately typed reader for the given
/// instrprof file.
static Expected<std::unique_ptr<InstrProfReader>>
- create(const Twine &Path, const InstrProfCorrelator *Correlator = nullptr);
+ create(const Twine &Path, vfs::FileSystem &FS,
+ const InstrProfCorrelator *Correlator = nullptr);
static Expected<std::unique_ptr<InstrProfReader>>
create(std::unique_ptr<MemoryBuffer> Buffer,
const InstrProfCorrelator *Correlator = nullptr);
+
+ /// \param Weight for raw profiles use this as the temporal profile trace
+ /// weight
+ /// \returns a list of temporal profile traces.
+ virtual SmallVector<TemporalProfTraceTy> &
+ getTemporalProfTraces(std::optional<uint64_t> Weight = {}) {
+ // For non-raw profiles we ignore the input weight and instead use the
+ // weights already in the traces.
+ return TemporalProfTraces;
+ }
+ /// \returns the total number of temporal profile traces seen.
+ uint64_t getTemporalProfTraceStreamSize() {
+ return TemporalProfTraceStreamSize;
+ }
};
/// Reader for the simple text based instrprof format.
@@ -216,6 +242,8 @@ private:
Error readValueProfileData(InstrProfRecord &Record);
+ Error readTemporalProfTraceData();
+
public:
TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
: DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
@@ -254,6 +282,10 @@ public:
return false;
}
+ bool hasTemporalProfile() const override {
+ return static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile);
+ }
+
InstrProfKind getProfileKind() const override { return ProfileKind; }
/// Read the header.
@@ -283,6 +315,8 @@ private:
/// If available, this hold the ProfileData array used to correlate raw
/// instrumentation data to their functions.
const InstrProfCorrelatorImpl<IntPtrT> *Correlator;
+ /// A list of timestamps paired with a function name reference.
+ std::vector<std::pair<uint64_t, uint64_t>> TemporalProfTimestamps;
bool ShouldSwapBytes;
// The value of the version field of the raw profile data header. The lower 56
// bits specifies the format version and the most significant 8 bits specify
@@ -354,6 +388,10 @@ public:
return false;
}
+ bool hasTemporalProfile() const override {
+ return (Version & VARIANT_MASK_TEMPORAL_PROF) != 0;
+ }
+
/// Returns a BitsetEnum describing the attributes of the raw instr profile.
InstrProfKind getProfileKind() const override;
@@ -362,6 +400,9 @@ public:
return *Symtab.get();
}
+ SmallVector<TemporalProfTraceTy> &
+ getTemporalProfTraces(std::optional<uint64_t> Weight = {}) override;
+
private:
Error createSymtab(InstrProfSymtab &Symtab);
Error readNextHeader(const char *CurrentPos);
@@ -499,6 +540,7 @@ struct InstrProfReaderIndexBase {
virtual bool hasSingleByteCoverage() const = 0;
virtual bool functionEntryOnly() const = 0;
virtual bool hasMemoryProfile() const = 0;
+ virtual bool hasTemporalProfile() const = 0;
virtual InstrProfKind getProfileKind() const = 0;
virtual Error populateSymtab(InstrProfSymtab &) = 0;
};
@@ -569,6 +611,10 @@ public:
return (FormatVersion & VARIANT_MASK_MEMPROF) != 0;
}
+ bool hasTemporalProfile() const override {
+ return (FormatVersion & VARIANT_MASK_TEMPORAL_PROF) != 0;
+ }
+
InstrProfKind getProfileKind() const override;
Error populateSymtab(InstrProfSymtab &Symtab) override {
@@ -648,6 +694,10 @@ public:
bool hasMemoryProfile() const override { return Index->hasMemoryProfile(); }
+ bool hasTemporalProfile() const override {
+ return Index->hasTemporalProfile();
+ }
+
/// Returns a BitsetEnum describing the attributes of the indexed instr
/// profile.
InstrProfKind getProfileKind() const override {
@@ -693,7 +743,8 @@ public:
/// Factory method to create an indexed reader.
static Expected<std::unique_ptr<IndexedInstrProfReader>>
- create(const Twine &Path, const Twine &RemappingPath = "");
+ create(const Twine &Path, vfs::FileSystem &FS,
+ const Twine &RemappingPath = "");
static Expected<std::unique_ptr<IndexedInstrProfReader>>
create(std::unique_ptr<MemoryBuffer> Buffer,
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 087f22996657..e50705ee053e 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -25,6 +25,7 @@
#include "llvm/Support/Error.h"
#include <cstdint>
#include <memory>
+#include <random>
namespace llvm {
@@ -41,6 +42,15 @@ public:
private:
bool Sparse;
StringMap<ProfilingData> FunctionData;
+ /// The maximum length of a single temporal profile trace.
+ uint64_t MaxTemporalProfTraceLength;
+ /// The maximum number of stored temporal profile traces.
+ uint64_t TemporalProfTraceReservoirSize;
+ /// The total number of temporal profile traces seen.
+ uint64_t TemporalProfTraceStreamSize = 0;
+ /// The list of temporal profile traces.
+ SmallVector<TemporalProfTraceTy> TemporalProfTraces;
+ std::mt19937 RNG;
// A map to hold memprof data per function. The lower 64 bits obtained from
// the md5 hash of the function name is used to index into the map.
@@ -60,7 +70,9 @@ private:
InstrProfRecordWriterTrait *InfoObj;
public:
- InstrProfWriter(bool Sparse = false);
+ InstrProfWriter(bool Sparse = false,
+ uint64_t TemporalProfTraceReservoirSize = 0,
+ uint64_t MaxTemporalProfTraceLength = 0);
~InstrProfWriter();
StringMap<ProfilingData> &getProfileData() { return FunctionData; }
@@ -74,6 +86,11 @@ public:
addRecord(std::move(I), 1, Warn);
}
+ /// Add \p SrcTraces using reservoir sampling where \p SrcStreamSize is the
+ /// total number of temporal profiling traces the source has seen.
+ void addTemporalProfileTraces(SmallVectorImpl<TemporalProfTraceTy> &SrcTraces,
+ uint64_t SrcStreamSize);
+
/// Add a memprof record for a function identified by its \p Id.
void addMemProfRecord(const GlobalValue::GUID Id,
const memprof::IndexedMemProfRecord &Record);
@@ -93,9 +110,16 @@ public:
/// Write the profile to \c OS
Error write(raw_fd_ostream &OS);
+ /// Write the profile to a string output stream \c OS
+ Error write(raw_string_ostream &OS);
+
/// Write the profile in text format to \c OS
Error writeText(raw_fd_ostream &OS);
+ /// Write temporal profile trace data to the header in text format to \c OS
+ void writeTextTemporalProfTraceData(raw_fd_ostream &OS,
+ InstrProfSymtab &Symtab);
+
Error validateRecord(const InstrProfRecord &Func);
/// Write \c Record in text format to \c OS
@@ -158,6 +182,8 @@ private:
void addRecord(StringRef Name, uint64_t Hash, InstrProfRecord &&I,
uint64_t Weight, function_ref<void(Error)> Warn);
bool shouldEncodeData(const ProfilingData &PD);
+ /// Add \p Trace using reservoir sampling.
+ void addTemporalProfileTrace(TemporalProfTraceTy Trace);
Error writeImpl(ProfOStream &OS);
};
diff --git a/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h b/llvm/include/llvm/ProfileData/ItaniumManglingCanonicalizer.h
index aa7997a0228b..e634f9c223e1 100644
--- a/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h
+++ b/llvm/include/llvm/ProfileData/ItaniumManglingCanonicalizer.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H
-#define LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H
+#ifndef LLVM_PROFILEDATA_ITANIUMMANGLINGCANONICALIZER_H
+#define LLVM_PROFILEDATA_ITANIUMMANGLINGCANONICALIZER_H
#include <cstdint>
@@ -90,4 +90,4 @@ private:
};
} // namespace llvm
-#endif // LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H
+#endif // LLVM_PROFILEDATA_ITANIUMMANGLINGCANONICALIZER_H
diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc
index c533073da751..b82a4baf6dd7 100644
--- a/llvm/include/llvm/ProfileData/MemProfData.inc
+++ b/llvm/include/llvm/ProfileData/MemProfData.inc
@@ -19,6 +19,7 @@
* synced up.
*
\*===----------------------------------------------------------------------===*/
+#include <string.h>
#ifdef _MSC_VER
#define PACKED(...) __pragma(pack(push,1)) __VA_ARGS__ __pragma(pack(pop))
@@ -32,7 +33,9 @@
(uint64_t)'o' << 24 | (uint64_t)'f' << 16 | (uint64_t)'r' << 8 | (uint64_t)129)
// The version number of the raw binary format.
-#define MEMPROF_RAW_VERSION 2ULL
+#define MEMPROF_RAW_VERSION 3ULL
+
+#define MEMPROF_BUILDID_MAX_SIZE 32ULL
namespace llvm {
namespace memprof {
@@ -46,37 +49,40 @@ PACKED(struct Header {
uint64_t StackOffset;
});
-
// A struct describing the information necessary to describe a /proc/maps
// segment entry for a particular binary/library identified by its build id.
PACKED(struct SegmentEntry {
uint64_t Start;
uint64_t End;
uint64_t Offset;
- // This field is unused until sanitizer procmaps support for build ids for
- // Linux-Elf is implemented.
- uint8_t BuildId[32] = {0};
+ uint64_t BuildIdSize;
+ uint8_t BuildId[MEMPROF_BUILDID_MAX_SIZE] = {0};
- SegmentEntry(uint64_t S, uint64_t E, uint64_t O) :
- Start(S), End(E), Offset(O) {}
+ // This constructor is only used in tests so don't set the BuildId.
+ SegmentEntry(uint64_t S, uint64_t E, uint64_t O)
+ : Start(S), End(E), Offset(O), BuildIdSize(0) {}
SegmentEntry(const SegmentEntry& S) {
Start = S.Start;
End = S.End;
Offset = S.Offset;
+ BuildIdSize = S.BuildIdSize;
+ memcpy(BuildId, S.BuildId, S.BuildIdSize);
}
SegmentEntry& operator=(const SegmentEntry& S) {
Start = S.Start;
End = S.End;
Offset = S.Offset;
+ BuildIdSize = S.BuildIdSize;
+ memcpy(BuildId, S.BuildId, S.BuildIdSize);
return *this;
}
bool operator==(const SegmentEntry& S) const {
- return Start == S.Start &&
- End == S.End &&
- Offset == S.Offset;
+ return Start == S.Start && End == S.End && Offset == S.Offset &&
+ BuildIdSize == S.BuildIdSize &&
+ memcmp(BuildId, S.BuildId, S.BuildIdSize) == 0;
}
});
diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h
index ad92af22d92e..4fe92cef4d72 100644
--- a/llvm/include/llvm/ProfileData/ProfileCommon.h
+++ b/llvm/include/llvm/ProfileData/ProfileCommon.h
@@ -34,9 +34,6 @@ class FunctionSamples;
} // end namespace sampleprof
-inline const char *getHotSectionPrefix() { return "hot"; }
-inline const char *getUnlikelySectionPrefix() { return "unlikely"; }
-
class ProfileSummaryBuilder {
private:
/// We keep track of the number of times a count (block count or samples)
diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h
index 998e845abb55..4141cfb42e0d 100644
--- a/llvm/include/llvm/ProfileData/RawMemProfReader.h
+++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h
@@ -51,12 +51,17 @@ public:
static bool hasFormat(const StringRef Path);
// Create a RawMemProfReader after sanity checking the contents of the file at
- // \p Path. The binary from which the profile has been collected is specified
- // via a path in \p ProfiledBinary.
+ // \p Path or the \p Buffer. The binary from which the profile has been
+ // collected is specified via a path in \p ProfiledBinary.
static Expected<std::unique_ptr<RawMemProfReader>>
- create(const Twine &Path, const StringRef ProfiledBinary,
+ create(const Twine &Path, StringRef ProfiledBinary, bool KeepName = false);
+ static Expected<std::unique_ptr<RawMemProfReader>>
+ create(std::unique_ptr<MemoryBuffer> Buffer, StringRef ProfiledBinary,
bool KeepName = false);
+ // Returns a list of build ids recorded in the segment information.
+ static std::vector<std::string> peekBuildIds(MemoryBuffer *DataBuffer);
+
using GuidMemProfRecordPair = std::pair<GlobalValue::GUID, MemProfRecord>;
using Iterator = InstrProfIterator<GuidMemProfRecordPair, RawMemProfReader>;
Iterator end() { return Iterator(); }
@@ -106,6 +111,8 @@ private:
Error initialize(std::unique_ptr<MemoryBuffer> DataBuffer);
// Read and parse the contents of the `DataBuffer` as a binary format profile.
Error readRawProfile(std::unique_ptr<MemoryBuffer> DataBuffer);
+ // Initialize the segment mapping information for symbolization.
+ Error setupForSymbolization();
// Symbolize and cache all the virtual addresses we encounter in the
// callstacks from the raw profile. Also prune callstack frames which we can't
// symbolize or those that belong to the runtime. For profile entries where
@@ -125,11 +132,21 @@ private:
object::SectionedAddress getModuleOffset(uint64_t VirtualAddress);
+ // The profiled binary.
object::OwningBinary<object::Binary> Binary;
+ // A symbolizer to translate virtual addresses to code locations.
std::unique_ptr<llvm::symbolize::SymbolizableModule> Symbolizer;
+ // The preferred load address of the executable segment.
+ uint64_t PreferredTextSegmentAddress = 0;
+ // The base address of the text segment in the process during profiling.
+ uint64_t ProfiledTextSegmentStart = 0;
+ // The limit address of the text segment in the process during profiling.
+ uint64_t ProfiledTextSegmentEnd = 0;
+
+ // The memory mapped segment information for all executable segments in the
+ // profiled binary (filtered from the raw profile using the build id).
+ llvm::SmallVector<SegmentEntry, 2> SegmentInfo;
- // The contents of the raw profile.
- llvm::SmallVector<SegmentEntry, 16> SegmentInfo;
// A map from callstack id (same as key in CallStackMap below) to the heap
// information recorded for that allocation context.
llvm::MapVector<uint64_t, MemInfoBlock> CallstackProfileData;
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 13f0157222ec..12cc1f2fd002 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -90,12 +90,18 @@ namespace sampleprof {
enum SampleProfileFormat {
SPF_None = 0,
SPF_Text = 0x1,
- SPF_Compact_Binary = 0x2,
+ SPF_Compact_Binary = 0x2, // Deprecated
SPF_GCC = 0x3,
SPF_Ext_Binary = 0x4,
SPF_Binary = 0xff
};
+enum SampleProfileLayout {
+ SPL_None = 0,
+ SPL_Nest = 0x1,
+ SPL_Flat = 0x2,
+};
+
static inline uint64_t SPMagic(SampleProfileFormat Format = SPF_Binary) {
return uint64_t('S') << (64 - 8) | uint64_t('P') << (64 - 16) |
uint64_t('R') << (64 - 24) | uint64_t('O') << (64 - 32) |
@@ -163,7 +169,7 @@ struct SecHdrTableEntry {
uint64_t Size;
// The index indicating the location of the current entry in
// SectionHdrLayout table.
- uint32_t LayoutIndex;
+ uint64_t LayoutIndex;
};
// Flags common for all sections are defined here. In SecHdrTableEntry::Flags,
@@ -427,6 +433,14 @@ public:
void print(raw_ostream &OS, unsigned Indent) const;
void dump() const;
+ bool operator==(const SampleRecord &Other) const {
+ return NumSamples == Other.NumSamples && CallTargets == Other.CallTargets;
+ }
+
+ bool operator!=(const SampleRecord &Other) const {
+ return !(*this == Other);
+ }
+
private:
uint64_t NumSamples = 0;
CallTargetMap CallTargets;
@@ -709,6 +723,8 @@ using BodySampleMap = std::map<LineLocation, SampleRecord>;
// memory, which is *very* significant for large profiles.
using FunctionSamplesMap = std::map<std::string, FunctionSamples, std::less<>>;
using CallsiteSampleMap = std::map<LineLocation, FunctionSamplesMap>;
+using LocToLocMap =
+ std::unordered_map<LineLocation, LineLocation, LineLocationHash>;
/// Representation of the samples collected for a function.
///
@@ -739,6 +755,8 @@ public:
void setTotalSamples(uint64_t Num) { TotalSamples = Num; }
+ void setHeadSamples(uint64_t Num) { TotalHeadSamples = Num; }
+
sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
bool Overflowed;
TotalHeadSamples =
@@ -761,6 +779,11 @@ public:
FName, Num, Weight);
}
+ sampleprof_error addSampleRecord(LineLocation Location,
+ const SampleRecord &SampleRecord, uint64_t Weight = 1) {
+ return BodySamples[Location].merge(SampleRecord, Weight);
+ }
+
// Remove a call target and decrease the body sample correspondingly. Return
// the number of body samples actually decreased.
uint64_t removeCalledTargetAndBodySample(uint32_t LineOffset,
@@ -777,11 +800,10 @@ public:
return Count;
}
- sampleprof_error addBodySamplesForProbe(uint32_t Index, uint64_t Num,
- uint64_t Weight = 1) {
- SampleRecord S;
- S.addSamples(Num, Weight);
- return BodySamples[LineLocation(Index, 0)].merge(S, Weight);
+ // Remove all call site samples for inlinees. This is needed when flattening
+ // a nested profile.
+ void removeAllCallsiteSamples() {
+ CallsiteSamples.clear();
}
// Accumulate all call target samples to update the body samples.
@@ -821,12 +843,26 @@ public:
}
}
+ // Query the stale profile matching results and remap the location.
+ const LineLocation &mapIRLocToProfileLoc(const LineLocation &IRLoc) const {
+ // There is no remapping if the profile is not stale or the matching gives
+ // the same location.
+ if (!IRToProfileLocationMap)
+ return IRLoc;
+ const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
+ if (ProfileLoc != IRToProfileLocationMap->end())
+ return ProfileLoc->second;
+ else
+ return IRLoc;
+ }
+
/// Return the number of samples collected at the given location.
/// Each location is specified by \p LineOffset and \p Discriminator.
/// If the location is not found in profile, return error.
ErrorOr<uint64_t> findSamplesAt(uint32_t LineOffset,
uint32_t Discriminator) const {
- const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
+ const auto &ret = BodySamples.find(
+ mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator)));
if (ret == BodySamples.end())
return std::error_code();
return ret->second.getSamples();
@@ -837,7 +873,8 @@ public:
/// If the location is not found in profile, return error.
ErrorOr<SampleRecord::CallTargetMap>
findCallTargetMapAt(uint32_t LineOffset, uint32_t Discriminator) const {
- const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
+ const auto &ret = BodySamples.find(
+ mapIRLocToProfileLoc(LineLocation(LineOffset, Discriminator)));
if (ret == BodySamples.end())
return std::error_code();
return ret->second.getCallTargets();
@@ -847,7 +884,7 @@ public:
/// CallSite. If the location is not found in profile, return error.
ErrorOr<SampleRecord::CallTargetMap>
findCallTargetMapAt(const LineLocation &CallSite) const {
- const auto &Ret = BodySamples.find(CallSite);
+ const auto &Ret = BodySamples.find(mapIRLocToProfileLoc(CallSite));
if (Ret == BodySamples.end())
return std::error_code();
return Ret->second.getCallTargets();
@@ -855,13 +892,13 @@ public:
/// Return the function samples at the given callsite location.
FunctionSamplesMap &functionSamplesAt(const LineLocation &Loc) {
- return CallsiteSamples[Loc];
+ return CallsiteSamples[mapIRLocToProfileLoc(Loc)];
}
/// Returns the FunctionSamplesMap at the given \p Loc.
const FunctionSamplesMap *
findFunctionSamplesMapAt(const LineLocation &Loc) const {
- auto iter = CallsiteSamples.find(Loc);
+ auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));
if (iter == CallsiteSamples.end())
return nullptr;
return &iter->second;
@@ -1023,6 +1060,11 @@ public:
uint64_t getFunctionHash() const { return FunctionHash; }
+ void setIRToProfileLocationMap(const LocToLocMap *LTLM) {
+ assert(IRToProfileLocationMap == nullptr && "this should be set only once");
+ IRToProfileLocationMap = LTLM;
+ }
+
/// Return the canonical name for a function, taking into account
/// suffix elision policy attributes.
static StringRef getCanonicalFnName(const Function &F) {
@@ -1149,6 +1191,21 @@ public:
// all the inline instances and names of call targets.
void findAllNames(DenseSet<StringRef> &NameSet) const;
+ bool operator==(const FunctionSamples &Other) const {
+ return (GUIDToFuncNameMap == Other.GUIDToFuncNameMap ||
+ (GUIDToFuncNameMap && Other.GUIDToFuncNameMap &&
+ *GUIDToFuncNameMap == *Other.GUIDToFuncNameMap)) &&
+ FunctionHash == Other.FunctionHash && Context == Other.Context &&
+ TotalSamples == Other.TotalSamples &&
+ TotalHeadSamples == Other.TotalHeadSamples &&
+ BodySamples == Other.BodySamples &&
+ CallsiteSamples == Other.CallsiteSamples;
+ }
+
+ bool operator!=(const FunctionSamples &Other) const {
+ return !(*this == Other);
+ }
+
private:
/// CFG hash value for the function.
uint64_t FunctionHash = 0;
@@ -1191,6 +1248,25 @@ private:
/// in the call to bar() at line offset 1, the other for all the samples
/// collected in the call to baz() at line offset 8.
CallsiteSampleMap CallsiteSamples;
+
+ /// IR to profile location map generated by stale profile matching.
+ ///
+ /// Each entry is a mapping from the location on current build to the matched
+ /// location in the "stale" profile. For example:
+ /// Profiled source code:
+ /// void foo() {
+ /// 1 bar();
+ /// }
+ ///
+ /// Current source code:
+ /// void foo() {
+ /// 1 // Code change
+ /// 2 bar();
+ /// }
+ /// Supposing the stale profile matching algorithm generated the mapping [2 ->
+ /// 1], the profile query using the location of bar on the IR which is 2 will
+ /// be remapped to 1 and find the location of bar in the profile.
+ const LocToLocMap *IRToProfileLocationMap = nullptr;
};
raw_ostream &operator<<(raw_ostream &OS, const FunctionSamples &FS);
@@ -1251,12 +1327,16 @@ private:
SampleProfileMap &ProfileMap;
};
-// CSProfileConverter converts a full context-sensitive flat sample profile into
-// a nested context-sensitive sample profile.
-class CSProfileConverter {
+/// Helper class for profile conversion.
+///
+/// It supports full context-sensitive profile to nested profile conversion,
+/// nested profile to flatten profile conversion, etc.
+class ProfileConverter {
public:
- CSProfileConverter(SampleProfileMap &Profiles);
- void convertProfiles();
+ ProfileConverter(SampleProfileMap &Profiles);
+ // Convert a full context-sensitive flat sample profile into a nested sample
+ // profile.
+ void convertCSProfiles();
struct FrameNode {
FrameNode(StringRef FName = StringRef(),
FunctionSamples *FSamples = nullptr,
@@ -1276,9 +1356,84 @@ public:
StringRef CalleeName);
};
+ static void flattenProfile(SampleProfileMap &ProfileMap,
+ bool ProfileIsCS = false) {
+ SampleProfileMap TmpProfiles;
+ flattenProfile(ProfileMap, TmpProfiles, ProfileIsCS);
+ ProfileMap = std::move(TmpProfiles);
+ }
+
+ static void flattenProfile(const SampleProfileMap &InputProfiles,
+ SampleProfileMap &OutputProfiles,
+ bool ProfileIsCS = false) {
+ if (ProfileIsCS) {
+ for (const auto &I : InputProfiles)
+ OutputProfiles[I.second.getName()].merge(I.second);
+ // Retain the profile name and clear the full context for each function
+ // profile.
+ for (auto &I : OutputProfiles)
+ I.second.setContext(SampleContext(I.first));
+ } else {
+ for (const auto &I : InputProfiles)
+ flattenNestedProfile(OutputProfiles, I.second);
+ }
+ }
+
private:
+ static void flattenNestedProfile(SampleProfileMap &OutputProfiles,
+ const FunctionSamples &FS) {
+ // To retain the context, checksum, attributes of the original profile, make
+ // a copy of it if no profile is found.
+ SampleContext &Context = FS.getContext();
+ auto Ret = OutputProfiles.try_emplace(Context, FS);
+ FunctionSamples &Profile = Ret.first->second;
+ if (Ret.second) {
+ // Clear nested inlinees' samples for the flattened copy. These inlinees
+ // will have their own top-level entries after flattening.
+ Profile.removeAllCallsiteSamples();
+ // We recompute TotalSamples later, so here set to zero.
+ Profile.setTotalSamples(0);
+ } else {
+ for (const auto &[LineLocation, SampleRecord] : FS.getBodySamples()) {
+ Profile.addSampleRecord(LineLocation, SampleRecord);
+ }
+ }
+
+ assert(Profile.getCallsiteSamples().empty() &&
+ "There should be no inlinees' profiles after flattening.");
+
+ // TotalSamples might not be equal to the sum of all samples from
+ // BodySamples and CallsiteSamples. So here we use "TotalSamples =
+ // Original_TotalSamples - All_of_Callsite_TotalSamples +
+ // All_of_Callsite_HeadSamples" to compute the new TotalSamples.
+ uint64_t TotalSamples = FS.getTotalSamples();
+
+ for (const auto &I : FS.getCallsiteSamples()) {
+ for (const auto &Callee : I.second) {
+ const auto &CalleeProfile = Callee.second;
+ // Add body sample.
+ Profile.addBodySamples(I.first.LineOffset, I.first.Discriminator,
+ CalleeProfile.getHeadSamplesEstimate());
+ // Add callsite sample.
+ Profile.addCalledTargetSamples(
+ I.first.LineOffset, I.first.Discriminator, CalleeProfile.getName(),
+ CalleeProfile.getHeadSamplesEstimate());
+ // Update total samples.
+ TotalSamples = TotalSamples >= CalleeProfile.getTotalSamples()
+ ? TotalSamples - CalleeProfile.getTotalSamples()
+ : 0;
+ TotalSamples += CalleeProfile.getHeadSamplesEstimate();
+ // Recursively convert callee profile.
+ flattenNestedProfile(OutputProfiles, CalleeProfile);
+ }
+ }
+ Profile.addTotalSamples(TotalSamples);
+
+ Profile.setHeadSamples(Profile.getHeadSamplesEstimate());
+ }
+
// Nest all children profiles into the profile of Node.
- void convertProfiles(FrameNode &Node);
+ void convertCSProfiles(FrameNode &Node);
FrameNode *getOrCreateContextPath(const SampleContext &Context);
SampleProfileMap &ProfileMap;
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 57e8c8c74e4e..e14b0bfc7912 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -170,7 +170,7 @@
// Number of samples to get to the desrired percentile.
//
// NAME TABLE
-// SIZE (uint32_t)
+// SIZE (uint64_t)
// Number of entries in the name table.
// NAMES
// A NUL-separated list of SIZE strings.
@@ -182,7 +182,7 @@
// NOTE: This field should only be present for top-level functions
// (i.e., not inlined into any caller). Inlined function calls
// have no prologue, so they don't need this.
-// NAME_IDX (uint32_t)
+// NAME_IDX (uint64_t)
// Index into the name table indicating the function name.
// SAMPLES (uint64_t)
// Total number of samples collected in this function.
@@ -204,7 +204,7 @@
// represent all the actual functions called at runtime.
// CALL_TARGETS
// A list of NUM_CALLS entries for each called function:
-// NAME_IDX (uint32_t)
+// NAME_IDX (uint64_t)
// Index into the name table with the callee name.
// SAMPLES (uint64_t)
// Number of samples collected at the call site.
@@ -232,11 +232,11 @@
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/GCOV.h"
#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/ProfileData/SymbolRemappingReader.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Discriminator.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SymbolRemappingReader.h"
#include <cstdint>
#include <list>
#include <memory>
@@ -251,6 +251,10 @@ namespace llvm {
class raw_ostream;
class Twine;
+namespace vfs {
+class FileSystem;
+} // namespace vfs
+
namespace sampleprof {
class SampleProfileReader;
@@ -270,8 +274,8 @@ public:
/// Create a remapper from the given remapping file. The remapper will
/// be used for profile read in by Reader.
static ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
- create(const std::string Filename, SampleProfileReader &Reader,
- LLVMContext &C);
+ create(const std::string Filename, vfs::FileSystem &FS,
+ SampleProfileReader &Reader, LLVMContext &C);
/// Create a remapper from the given Buffer. The remapper will
/// be used for profile read in by Reader.
@@ -450,7 +454,7 @@ public:
/// Create a remapper underlying if RemapFilename is not empty.
/// Parameter P specifies the FSDiscriminatorPass.
static ErrorOr<std::unique_ptr<SampleProfileReader>>
- create(const std::string Filename, LLVMContext &C,
+ create(const std::string Filename, LLVMContext &C, vfs::FileSystem &FS,
FSDiscriminatorPass P = FSDiscriminatorPass::Base,
const std::string RemapFilename = "");
@@ -458,7 +462,7 @@ public:
/// Create a remapper underlying if RemapFilename is not empty.
/// Parameter P specifies the FSDiscriminatorPass.
static ErrorOr<std::unique_ptr<SampleProfileReader>>
- create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
+ create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C, vfs::FileSystem &FS,
FSDiscriminatorPass P = FSDiscriminatorPass::Base,
const std::string RemapFilename = "");
@@ -479,6 +483,9 @@ public:
/// Whether input profile contains ShouldBeInlined contexts.
bool profileIsPreInlined() const { return ProfileIsPreInlined; }
+ /// Whether input profile is flow-sensitive.
+ bool profileIsFS() const { return ProfileIsFS; }
+
virtual std::unique_ptr<ProfileSymbolList> getProfileSymbolList() {
return nullptr;
};
@@ -489,7 +496,11 @@ public:
virtual bool dumpSectionInfo(raw_ostream &OS = dbgs()) { return false; };
/// Return whether names in the profile are all MD5 numbers.
- virtual bool useMD5() { return false; }
+ bool useMD5() const { return ProfileIsMD5; }
+
+ /// Force the profile to use MD5 in Sample contexts, even if function names
+ /// are present.
+ virtual void setProfileUseMD5() { ProfileIsMD5 = true; }
/// Don't read profile without context if the flag is set. This is only meaningful
/// for ExtBinary format.
@@ -559,6 +570,10 @@ protected:
/// Zero out the discriminator bits higher than bit MaskedBitFrom (0 based).
/// The default is to keep all the bits.
uint32_t MaskedBitFrom = 31;
+
+ /// Whether the profile uses MD5 for Sample Contexts and function names. This
+ /// can be one-way overriden by the user to force use MD5.
+ bool ProfileIsMD5 = false;
};
class SampleProfileReaderText : public SampleProfileReader {
@@ -575,6 +590,9 @@ public:
/// Return true if \p Buffer is in the format supported by this class.
static bool hasFormat(const MemoryBuffer &Buffer);
+ /// Text format sample profile does not support MD5 for now.
+ void setProfileUseMD5() override {}
+
private:
/// CSNameTable is used to save full context vectors. This serves as an
/// underlying immutable buffer for all clients.
@@ -619,10 +637,7 @@ protected:
ErrorOr<StringRef> readString();
/// Read the string index and check whether it overflows the table.
- template <typename T> inline ErrorOr<uint32_t> readStringIndex(T &Table);
-
- /// Return true if we've reached the end of file.
- bool at_eof() const { return Data >= End; }
+ template <typename T> inline ErrorOr<size_t> readStringIndex(T &Table);
/// Read the next function profile instance.
std::error_code readFuncProfile(const uint8_t *Start);
@@ -637,7 +652,17 @@ protected:
std::error_code readSummary();
/// Read the whole name table.
- virtual std::error_code readNameTable();
+ std::error_code readNameTable();
+
+ /// Read a string indirectly via the name table.
+ ErrorOr<StringRef> readStringFromTable();
+
+ /// Read a context indirectly via the CSNameTable.
+ ErrorOr<SampleContextFrames> readContextFromTable();
+
+ /// Read a context indirectly via the CSNameTable if the profile has context,
+ /// otherwise same as readStringFromTable.
+ ErrorOr<SampleContext> readSampleContextFromTable();
/// Points to the current location in the buffer.
const uint8_t *Data = nullptr;
@@ -648,9 +673,21 @@ protected:
/// Function name table.
std::vector<StringRef> NameTable;
- /// Read a string indirectly via the name table.
- virtual ErrorOr<StringRef> readStringFromTable();
- virtual ErrorOr<SampleContext> readSampleContextFromTable();
+ /// If MD5 is used in NameTable section, the section saves uint64_t data.
+ /// The uint64_t data has to be converted to a string and then the string
+ /// will be used to initialize StringRef in NameTable.
+ /// Note NameTable contains StringRef so it needs another buffer to own
+ /// the string data. MD5StringBuf serves as the string buffer that is
+ /// referenced by NameTable (vector of StringRef). We make sure
+ /// the lifetime of MD5StringBuf is not shorter than that of NameTable.
+ std::vector<std::string> MD5StringBuf;
+
+ /// The starting address of NameTable containing fixed length MD5.
+ const uint8_t *MD5NameMemStart = nullptr;
+
+ /// CSNameTable is used to save full context vectors. It is the backing buffer
+ /// for SampleContextFrames.
+ std::vector<SampleContextFrameVector> CSNameTable;
private:
std::error_code readSummaryEntry(std::vector<ProfileSummaryEntry> &Entries);
@@ -700,7 +737,7 @@ private:
protected:
std::vector<SecHdrTableEntry> SecHdrTable;
- std::error_code readSecHdrTableEntry(uint32_t Idx);
+ std::error_code readSecHdrTableEntry(uint64_t Idx);
std::error_code readSecHdrTable();
std::error_code readFuncMetadata(bool ProfileHasAttribute);
@@ -708,8 +745,7 @@ protected:
FunctionSamples *FProfile);
std::error_code readFuncOffsetTable();
std::error_code readFuncProfiles();
- std::error_code readMD5NameTable();
- std::error_code readNameTableSec(bool IsMD5);
+ std::error_code readNameTableSec(bool IsMD5, bool FixedLengthMD5);
std::error_code readCSNameTableSec();
std::error_code readProfileSymbolList();
@@ -719,48 +755,29 @@ protected:
const SecHdrTableEntry &Entry);
// placeholder for subclasses to dispatch their own section readers.
virtual std::error_code readCustomSection(const SecHdrTableEntry &Entry) = 0;
- ErrorOr<StringRef> readStringFromTable() override;
- ErrorOr<SampleContext> readSampleContextFromTable() override;
- ErrorOr<SampleContextFrames> readContextFromTable();
+
+ /// Determine which container readFuncOffsetTable() should populate, the list
+ /// FuncOffsetList or the map FuncOffsetTable.
+ bool useFuncOffsetList() const;
std::unique_ptr<ProfileSymbolList> ProfSymList;
/// The table mapping from function context to the offset of its
/// FunctionSample towards file start.
+ /// At most one of FuncOffsetTable and FuncOffsetList is populated.
DenseMap<SampleContext, uint64_t> FuncOffsetTable;
- /// Function offset mapping ordered by contexts.
- std::unique_ptr<std::vector<std::pair<SampleContext, uint64_t>>>
- OrderedFuncOffsets;
+ /// The list version of FuncOffsetTable. This is used if every entry is
+ /// being accessed.
+ std::vector<std::pair<SampleContext, uint64_t>> FuncOffsetList;
/// The set containing the functions to use when compiling a module.
DenseSet<StringRef> FuncsToUse;
- /// Use fixed length MD5 instead of ULEB128 encoding so NameTable doesn't
- /// need to be read in up front and can be directly accessed using index.
- bool FixedLengthMD5 = false;
- /// The starting address of NameTable containing fixed length MD5.
- const uint8_t *MD5NameMemStart = nullptr;
-
- /// If MD5 is used in NameTable section, the section saves uint64_t data.
- /// The uint64_t data has to be converted to a string and then the string
- /// will be used to initialize StringRef in NameTable.
- /// Note NameTable contains StringRef so it needs another buffer to own
- /// the string data. MD5StringBuf serves as the string buffer that is
- /// referenced by NameTable (vector of StringRef). We make sure
- /// the lifetime of MD5StringBuf is not shorter than that of NameTable.
- std::unique_ptr<std::vector<std::string>> MD5StringBuf;
-
- /// CSNameTable is used to save full context vectors. This serves as an
- /// underlying immutable buffer for all clients.
- std::unique_ptr<const std::vector<SampleContextFrameVector>> CSNameTable;
-
/// If SkipFlatProf is true, skip the sections with
/// SecFlagFlat flag.
bool SkipFlatProf = false;
- bool FuncOffsetsOrdered = false;
-
public:
SampleProfileReaderExtBinaryBase(std::unique_ptr<MemoryBuffer> B,
LLVMContext &C, SampleProfileFormat Format)
@@ -779,9 +796,6 @@ public:
/// the reader has been given a module.
bool collectFuncsFromModule() override;
- /// Return whether names in the profile are all MD5 numbers.
- bool useMD5() override { return MD5StringBuf.get(); }
-
std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
return std::move(ProfSymList);
};
@@ -807,41 +821,6 @@ public:
static bool hasFormat(const MemoryBuffer &Buffer);
};
-class SampleProfileReaderCompactBinary : public SampleProfileReaderBinary {
-private:
- /// Function name table.
- std::vector<std::string> NameTable;
- /// The table mapping from function name to the offset of its FunctionSample
- /// towards file start.
- DenseMap<StringRef, uint64_t> FuncOffsetTable;
- /// The set containing the functions to use when compiling a module.
- DenseSet<StringRef> FuncsToUse;
- std::error_code verifySPMagic(uint64_t Magic) override;
- std::error_code readNameTable() override;
- /// Read a string indirectly via the name table.
- ErrorOr<StringRef> readStringFromTable() override;
- std::error_code readHeader() override;
- std::error_code readFuncOffsetTable();
-
-public:
- SampleProfileReaderCompactBinary(std::unique_ptr<MemoryBuffer> B,
- LLVMContext &C)
- : SampleProfileReaderBinary(std::move(B), C, SPF_Compact_Binary) {}
-
- /// \brief Return true if \p Buffer is in the format supported by this class.
- static bool hasFormat(const MemoryBuffer &Buffer);
-
- /// Read samples only for functions to use.
- std::error_code readImpl() override;
-
- /// Collect functions with definitions in Module M. Return true if
- /// the reader has been given a module.
- bool collectFuncsFromModule() override;
-
- /// Return whether names in the profile are all MD5 numbers.
- bool useMD5() override { return true; }
-};
-
using InlineCallStack = SmallVector<FunctionSamples *, 10>;
// Supported histogram types in GCC. Currently, we only need support for
diff --git a/llvm/include/llvm/ProfileData/SampleProfWriter.h b/llvm/include/llvm/ProfileData/SampleProfWriter.h
index b1ed0335e9c9..1f19283ea1dd 100644
--- a/llvm/include/llvm/ProfileData/SampleProfWriter.h
+++ b/llvm/include/llvm/ProfileData/SampleProfWriter.h
@@ -35,6 +35,56 @@ enum SectionLayout {
NumOfLayout,
};
+/// When writing a profile with size limit, user may want to use a different
+/// strategy to reduce function count other than dropping functions with fewest
+/// samples first. In this case a class implementing the same interfaces should
+/// be provided to SampleProfileWriter::writeWithSizeLimit().
+class FunctionPruningStrategy {
+protected:
+ SampleProfileMap &ProfileMap;
+ size_t OutputSizeLimit;
+
+public:
+ /// \p ProfileMap A reference to the original profile map. It will be modified
+ /// by Erase().
+ /// \p OutputSizeLimit Size limit in bytes of the output profile. This is
+ /// necessary to estimate how many functions to remove.
+ FunctionPruningStrategy(SampleProfileMap &ProfileMap, size_t OutputSizeLimit)
+ : ProfileMap(ProfileMap), OutputSizeLimit(OutputSizeLimit) {}
+
+ virtual ~FunctionPruningStrategy() = default;
+
+ /// SampleProfileWriter::writeWithSizeLimit() calls this after every write
+ /// iteration if the output size still exceeds the limit. This function
+ /// should erase some functions from the profile map so that the writer tries
+ /// to write the profile again with fewer functions. At least 1 entry from the
+ /// profile map must be erased.
+ ///
+ /// \p CurrentOutputSize Number of bytes in the output if current profile map
+ /// is written.
+ virtual void Erase(size_t CurrentOutputSize) = 0;
+};
+
+class DefaultFunctionPruningStrategy : public FunctionPruningStrategy {
+ std::vector<NameFunctionSamples> SortedFunctions;
+
+public:
+ DefaultFunctionPruningStrategy(SampleProfileMap &ProfileMap,
+ size_t OutputSizeLimit);
+
+ /// In this default implementation, functions with fewest samples are dropped
+ /// first. Since the exact size of the output cannot be easily calculated due
+ /// to compression, we use a heuristic to remove as many functions as
+ /// necessary but not too many, aiming to minimize the number of write
+ /// iterations.
+ /// Empirically, functions with larger total sample count contain linearly
+ /// more sample entries, meaning it takes linearly more space to write them.
+ /// The cumulative length is therefore quadratic if all functions are sorted
+ /// by total sample count.
+ /// TODO: Find better heuristic.
+ void Erase(size_t CurrentOutputSize) override;
+};
+
/// Sample-based profile writer. Base class.
class SampleProfileWriter {
public:
@@ -50,6 +100,17 @@ public:
/// \returns status code of the file update operation.
virtual std::error_code write(const SampleProfileMap &ProfileMap);
+ /// Write sample profiles up to given size limit, using the pruning strategy
+ /// to drop some functions if necessary.
+ ///
+ /// \returns status code of the file update operation.
+ template <typename FunctionPruningStrategy = DefaultFunctionPruningStrategy>
+ std::error_code writeWithSizeLimit(SampleProfileMap &ProfileMap,
+ size_t OutputSizeLimit) {
+ FunctionPruningStrategy Strategy(ProfileMap, OutputSizeLimit);
+ return writeWithSizeLimitInternal(ProfileMap, OutputSizeLimit, &Strategy);
+ }
+
raw_ostream &getOutputStream() { return *OutputStream; }
/// Profile writer factory.
@@ -79,6 +140,15 @@ protected:
// Write function profiles to the profile file.
virtual std::error_code writeFuncProfiles(const SampleProfileMap &ProfileMap);
+ std::error_code writeWithSizeLimitInternal(SampleProfileMap &ProfileMap,
+ size_t OutputSizeLimit,
+ FunctionPruningStrategy *Strategy);
+
+ /// For writeWithSizeLimit in text mode, each newline takes 1 additional byte
+ /// on Windows when actually written to the file, but not written to a memory
+ /// buffer. This needs to be accounted for when rewriting the profile.
+ size_t LineCount;
+
/// Output stream where to emit the profile to.
std::unique_ptr<raw_ostream> OutputStream;
@@ -102,6 +172,7 @@ protected:
: SampleProfileWriter(OS), Indent(0) {}
std::error_code writeHeader(const SampleProfileMap &ProfileMap) override {
+ LineCount = 0;
return sampleprof_error::success;
}
@@ -343,55 +414,6 @@ private:
}
};
-// CompactBinary is a compact format of binary profile which both reduces
-// the profile size and the load time needed when compiling. It has two
-// major difference with Binary format.
-// 1. It represents all the strings in name table using md5 hash.
-// 2. It saves a function offset table which maps function name index to
-// the offset of its function profile to the start of the binary profile,
-// so by using the function offset table, for those function profiles which
-// will not be needed when compiling a module, the profile reader does't
-// have to read them and it saves compile time if the profile size is huge.
-// The layout of the compact format is shown as follows:
-//
-// Part1: Profile header, the same as binary format, containing magic
-// number, version, summary, name table...
-// Part2: Function Offset Table Offset, which saves the position of
-// Part4.
-// Part3: Function profile collection
-// function1 profile start
-// ....
-// function2 profile start
-// ....
-// function3 profile start
-// ....
-// ......
-// Part4: Function Offset Table
-// function1 name index --> function1 profile start
-// function2 name index --> function2 profile start
-// function3 name index --> function3 profile start
-//
-// We need Part2 because profile reader can use it to find out and read
-// function offset table without reading Part3 first.
-class SampleProfileWriterCompactBinary : public SampleProfileWriterBinary {
- using SampleProfileWriterBinary::SampleProfileWriterBinary;
-
-public:
- std::error_code writeSample(const FunctionSamples &S) override;
- std::error_code write(const SampleProfileMap &ProfileMap) override;
-
-protected:
- /// The table mapping from function name to the offset of its FunctionSample
- /// towards profile start.
- MapVector<StringRef, uint64_t> FuncOffsetTable;
- /// The offset of the slot to be filled with the offset of FuncOffsetTable
- /// towards profile start.
- uint64_t TableOffset;
- std::error_code writeNameTable() override;
- std::error_code writeHeader(const SampleProfileMap &ProfileMap) override;
- std::error_code writeFuncOffsetTable();
-};
-
} // end namespace sampleprof
} // end namespace llvm
diff --git a/llvm/include/llvm/Support/SymbolRemappingReader.h b/llvm/include/llvm/ProfileData/SymbolRemappingReader.h
index 4fdaf87be082..61d32134e981 100644
--- a/llvm/include/llvm/Support/SymbolRemappingReader.h
+++ b/llvm/include/llvm/ProfileData/SymbolRemappingReader.h
@@ -56,12 +56,12 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_SUPPORT_SYMBOLREMAPPINGREADER_H
-#define LLVM_SUPPORT_SYMBOLREMAPPINGREADER_H
+#ifndef LLVM_PROFILEDATA_SYMBOLREMAPPINGREADER_H
+#define LLVM_PROFILEDATA_SYMBOLREMAPPINGREADER_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/ProfileData/ItaniumManglingCanonicalizer.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/ItaniumManglingCanonicalizer.h"
namespace llvm {
@@ -130,4 +130,4 @@ private:
} // end namespace llvm
-#endif // LLVM_SUPPORT_SYMBOLREMAPPINGREADER_H
+#endif // LLVM_PROFILEDATA_SYMBOLREMAPPINGREADER_H
diff --git a/llvm/include/llvm/Remarks/Remark.h b/llvm/include/llvm/Remarks/Remark.h
index 2ac881be6196..a66f7ed73f2f 100644
--- a/llvm/include/llvm/Remarks/Remark.h
+++ b/llvm/include/llvm/Remarks/Remark.h
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/CBindingWrapping.h"
+#include "llvm/Support/raw_ostream.h"
#include <optional>
#include <string>
@@ -32,6 +33,9 @@ struct RemarkLocation {
StringRef SourceFilePath;
unsigned SourceLine = 0;
unsigned SourceColumn = 0;
+
+ /// Implement operator<< on RemarkLocation.
+ void print(raw_ostream &OS) const;
};
// Create wrappers for C Binding types (see CBindingWrapping.h).
@@ -45,6 +49,9 @@ struct Argument {
StringRef Val;
// If set, the debug location corresponding to the value.
std::optional<RemarkLocation> Loc;
+
+ /// Implement operator<< on Argument.
+ void print(raw_ostream &OS) const;
};
// Create wrappers for C Binding types (see CBindingWrapping.h).
@@ -63,6 +70,25 @@ enum class Type {
Last = Failure
};
+inline StringRef typeToStr(Type Ty) {
+ switch (Ty) {
+ case Type::Unknown:
+ return "Unknown";
+ case Type::Missed:
+ return "Missed";
+ case Type::Passed:
+ return "Passed";
+ case Type::Analysis:
+ return "Analysis";
+ case Type::AnalysisFPCommute:
+ return "AnalysisFPCommute";
+ case Type::AnalysisAliasing:
+ return "AnalysisAliasing";
+ default:
+ return "Failure";
+ }
+}
+
/// A remark type used for both emission and parsing.
struct Remark {
/// The type of the remark.
@@ -99,6 +125,9 @@ struct Remark {
/// Clone this remark to explicitly ask for a copy.
Remark clone() const { return *this; }
+ /// Implement operator<< on Remark.
+ void print(raw_ostream &OS) const;
+
private:
/// In order to avoid unwanted copies, "delete" the copy constructor.
/// If a copy is needed, it should be done through `Remark::clone()`.
@@ -171,6 +200,21 @@ inline bool operator<(const Remark &LHS, const Remark &RHS) {
RHS.FunctionName, RHS.Loc, RHS.Hotness, RHS.Args);
}
+inline raw_ostream &operator<<(raw_ostream &OS, const RemarkLocation &RLoc) {
+ RLoc.print(OS);
+ return OS;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS, const Argument &Arg) {
+ Arg.print(OS);
+ return OS;
+}
+
+inline raw_ostream &operator<<(raw_ostream &OS, const Remark &Remark) {
+ Remark.print(OS);
+ return OS;
+}
+
} // end namespace remarks
} // end namespace llvm
diff --git a/llvm/include/llvm/Remarks/RemarkLinker.h b/llvm/include/llvm/Remarks/RemarkLinker.h
index 307eb3f6e84a..f538718941c5 100644
--- a/llvm/include/llvm/Remarks/RemarkLinker.h
+++ b/llvm/include/llvm/Remarks/RemarkLinker.h
@@ -54,13 +54,26 @@ private:
/// A path to append before the external file path found in remark metadata.
std::optional<std::string> PrependPath;
+ /// If true, keep all remarks, otherwise only keep remarks with valid debug
+ /// locations.
+ bool KeepAllRemarks = true;
+
/// Keep this remark. If it's already in the set, discard it.
Remark &keep(std::unique_ptr<Remark> Remark);
+ /// Returns true if \p R should be kept. If KeepAllRemarks is false, only
+ /// return true if \p R has a valid debug location.
+ bool shouldKeepRemark(const Remark &R) {
+ return KeepAllRemarks ? true : R.Loc.has_value();
+ }
+
public:
/// Set a path to prepend to the external file path.
void setExternalFilePrependPath(StringRef PrependPath);
+ /// Set KeepAllRemarks to \p B.
+ void setKeepAllRemarks(bool B) { KeepAllRemarks = B; }
+
/// Link the remarks found in \p Buffer.
/// If \p RemarkFormat is not provided, try to deduce it from the metadata in
/// \p Buffer.
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h
deleted file mode 100644
index 54c4a2b786c9..000000000000
--- a/llvm/include/llvm/Support/AArch64TargetParser.h
+++ /dev/null
@@ -1,15 +0,0 @@
-//===-- llvm/Support/AArch64TargetParser.h ----------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This header is deprecated in favour of
-/// `llvm/TargetParser/AArch64TargetParser.h`.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/TargetParser/AArch64TargetParser.h"
diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 61b05743faf6..f56f23150ad7 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -9,6 +9,13 @@
/// \file
/// AMDHSA kernel descriptor definitions. For more information, visit
/// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor
+///
+/// \warning
+/// Any changes to this file should also be audited for corresponding changes
+/// needed in both the assembler and disassembler, namely:
+/// * AMDGPUAsmPrinter.{cpp,h}
+/// * AMDGPUTargetStreamer.{cpp,h}
+/// * AMDGPUDisassembler.{cpp,h}
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Support/ARMTargetParser.h b/llvm/include/llvm/Support/ARMTargetParser.h
deleted file mode 100644
index a0c0edd6d0f1..000000000000
--- a/llvm/include/llvm/Support/ARMTargetParser.h
+++ /dev/null
@@ -1,15 +0,0 @@
-//===-- llvm/Support/ARMTargetParser.h --------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This header is deprecated in favour of
-/// `llvm/TargetParser/ARMTargetParser.h`.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/TargetParser/ARMTargetParser.h"
diff --git a/llvm/include/llvm/Support/ARMTargetParserCommon.h b/llvm/include/llvm/Support/ARMTargetParserCommon.h
deleted file mode 100644
index 27522f9802d2..000000000000
--- a/llvm/include/llvm/Support/ARMTargetParserCommon.h
+++ /dev/null
@@ -1,15 +0,0 @@
-//===-- llvm/Support/ARMTargetParserCommon.def ------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This header is deprecated in favour of
-/// `llvm/TargetParser/ARMTargetParserCommon.h`.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/TargetParser/ARMTargetParserCommon.h"
diff --git a/llvm/include/llvm/Support/Alignment.h b/llvm/include/llvm/Support/Alignment.h
index 4577641818be..8d4a7e7ddce5 100644
--- a/llvm/include/llvm/Support/Alignment.h
+++ b/llvm/include/llvm/Support/Alignment.h
@@ -100,7 +100,7 @@ public:
/// Allow constructions of constexpr Align from types.
/// Compile time equivalent to Align(alignof(T)).
template <typename T> constexpr static Align Of() {
- return Constant<std::alignment_of<T>::value>();
+ return Constant<std::alignment_of_v<T>>();
}
/// Constexpr constructor from LogValue type.
diff --git a/llvm/include/llvm/Support/AllocatorBase.h b/llvm/include/llvm/Support/AllocatorBase.h
index 5d05d3f8777b..044243225006 100644
--- a/llvm/include/llvm/Support/AllocatorBase.h
+++ b/llvm/include/llvm/Support/AllocatorBase.h
@@ -19,6 +19,12 @@
#ifndef LLVM_SUPPORT_ALLOCATORBASE_H
#define LLVM_SUPPORT_ALLOCATORBASE_H
+#ifdef _MSC_VER
+#define LLVM_ALLOCATORHOLDER_EMPTYBASE __declspec(empty_bases)
+#else
+#define LLVM_ALLOCATORHOLDER_EMPTYBASE
+#endif // _MSC_VER
+
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MemAlloc.h"
#include <type_traits>
@@ -72,7 +78,7 @@ public:
/// Deallocate space for a sequence of objects without constructing them.
template <typename T>
- std::enable_if_t<!std::is_same<std::remove_cv_t<T>, void>::value, void>
+ std::enable_if_t<!std::is_same_v<std::remove_cv_t<T>, void>, void>
Deallocate(T *Ptr, size_t Num = 1) {
Deallocate(static_cast<const void *>(Ptr), Num * sizeof(T), alignof(T));
}
diff --git a/llvm/include/llvm/Support/AtomicOrdering.h b/llvm/include/llvm/Support/AtomicOrdering.h
index 1a0d108300bc..e08c1b262a92 100644
--- a/llvm/include/llvm/Support/AtomicOrdering.h
+++ b/llvm/include/llvm/Support/AtomicOrdering.h
@@ -74,7 +74,8 @@ bool operator>=(AtomicOrdering, AtomicOrdering) = delete;
// is a valid AtomicOrdering.
template <typename Int> inline bool isValidAtomicOrdering(Int I) {
return static_cast<Int>(AtomicOrdering::NotAtomic) <= I &&
- I <= static_cast<Int>(AtomicOrdering::SequentiallyConsistent);
+ I <= static_cast<Int>(AtomicOrdering::SequentiallyConsistent) &&
+ I != 3;
}
/// String used by LLVM IR to represent atomic ordering.
diff --git a/llvm/include/llvm/Support/BalancedPartitioning.h b/llvm/include/llvm/Support/BalancedPartitioning.h
new file mode 100644
index 000000000000..a8464ac0fe60
--- /dev/null
+++ b/llvm/include/llvm/Support/BalancedPartitioning.h
@@ -0,0 +1,202 @@
+//===- BalancedPartitioning.h ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements BalancedPartitioning, a recursive balanced graph
+// partitioning algorithm.
+//
+// The algorithm is used to find an ordering of FunctionNodes while optimizing
+// a specified objective. The algorithm uses recursive bisection; it starts
+// with a collection of unordered FunctionNodes and tries to split them into
+// two sets (buckets) of equal cardinality. Each bisection step is comprised of
+// iterations that greedily swap the FunctionNodes between the two buckets while
+// there is an improvement of the objective. Once the process converges, the
+// problem is divided into two sub-problems of half the size, which are
+// recursively applied for the two buckets. The final ordering of the
+// FunctionNodes is obtained by concatenating the two (recursively computed)
+// orderings.
+//
+// In order to speed up the computation, we limit the depth of the recursive
+// tree by a specified constant (SplitDepth) and apply at most a constant
+// number of greedy iterations per split (IterationsPerSplit). The worst-case
+// time complexity of the implementation is bounded by O(M*log^2 N), where
+// N is the number of FunctionNodes and M is the number of
+// FunctionNode-UtilityNode edges; (assuming that any collection of D
+// FunctionNodes contains O(D) UtilityNodes). Notice that the two different
+// recursive sub-problems are independent and thus can be efficiently processed
+// in parallel.
+//
+// Reference:
+// * Optimizing Function Layout for Mobile Applications,
+// https://arxiv.org/abs/2211.09285
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_BALANCED_PARTITIONING_H
+#define LLVM_SUPPORT_BALANCED_PARTITIONING_H
+
+#include "raw_ostream.h"
+#include "llvm/ADT/ArrayRef.h"
+
+#include <atomic>
+#include <condition_variable>
+#include <mutex>
+#include <random>
+#include <vector>
+
+namespace llvm {
+
+class ThreadPool;
+/// A function with a set of utility nodes where it is beneficial to order two
+/// functions close together if they have similar utility nodes
+class BPFunctionNode {
+ friend class BalancedPartitioning;
+
+public:
+ using IDT = uint64_t;
+ using UtilityNodeT = uint32_t;
+
+ /// \param UtilityNodes the set of utility nodes (must be unique'd)
+ BPFunctionNode(IDT Id, ArrayRef<UtilityNodeT> UtilityNodes)
+ : Id(Id), UtilityNodes(UtilityNodes) {}
+
+ /// The ID of this node
+ IDT Id;
+
+ void dump(raw_ostream &OS) const;
+
+protected:
+ /// The list of utility nodes associated with this node
+ SmallVector<UtilityNodeT, 4> UtilityNodes;
+ /// The bucket assigned by balanced partitioning
+ std::optional<unsigned> Bucket;
+ /// The index of the input order of the FunctionNodes
+ uint64_t InputOrderIndex = 0;
+
+ friend class BPFunctionNodeTest_Basic_Test;
+ friend class BalancedPartitioningTest_Basic_Test;
+ friend class BalancedPartitioningTest_Large_Test;
+};
+
+/// Algorithm parameters; default values are tuned on real-world binaries
+struct BalancedPartitioningConfig {
+ /// The depth of the recursive bisection
+ unsigned SplitDepth = 18;
+ /// The maximum number of bp iterations per split
+ unsigned IterationsPerSplit = 40;
+ /// The probability for a vertex to skip a move from its current bucket to
+ /// another bucket; it often helps to escape from a local optima
+ float SkipProbability = 0.1f;
+ /// Recursive subtasks up to the given depth are added to the queue and
+ /// distributed among threads by ThreadPool; all subsequent calls are executed
+ /// on the same thread
+ unsigned TaskSplitDepth = 9;
+};
+
+class BalancedPartitioning {
+public:
+ BalancedPartitioning(const BalancedPartitioningConfig &Config);
+
+ /// Run recursive graph partitioning that optimizes a given objective.
+ void run(std::vector<BPFunctionNode> &Nodes) const;
+
+private:
+ struct UtilitySignature;
+ using SignaturesT = SmallVector<UtilitySignature, 4>;
+ using FunctionNodeRange =
+ iterator_range<std::vector<BPFunctionNode>::iterator>;
+
+ /// A special ThreadPool that allows for spawning new tasks after blocking on
+ /// wait(). BalancedPartitioning recursively spawns new threads inside other
+ /// threads, so we need to track how many active threads that could spawn more
+ /// threads.
+ struct BPThreadPool {
+ ThreadPool &TheThreadPool;
+ std::mutex mtx;
+ std::condition_variable cv;
+ /// The number of threads that could spawn more threads
+ std::atomic<int> NumActiveThreads = 0;
+ /// Only true when all threads are down spawning new threads
+ bool IsFinishedSpawning = false;
+ /// Asynchronous submission of the task to the pool
+ template <typename Func> void async(Func &&F);
+ /// Blocking wait for all threads to complete. Unlike ThreadPool, it is
+ /// acceptable for other threads to add more tasks while blocking on this
+ /// call.
+ void wait();
+ BPThreadPool(ThreadPool &TheThreadPool) : TheThreadPool(TheThreadPool) {}
+ };
+
+ /// Run a recursive bisection of a given list of FunctionNodes
+ /// \param RecDepth the current depth of recursion
+ /// \param RootBucket the initial bucket of the dataVertices
+ /// \param Offset the assigned buckets are the range [Offset, Offset +
+ /// Nodes.size()]
+ void bisect(const FunctionNodeRange Nodes, unsigned RecDepth,
+ unsigned RootBucket, unsigned Offset,
+ std::optional<BPThreadPool> &TP) const;
+
+ /// Run bisection iterations
+ void runIterations(const FunctionNodeRange Nodes, unsigned RecDepth,
+ unsigned LeftBucket, unsigned RightBucket,
+ std::mt19937 &RNG) const;
+
+ /// Run a bisection iteration to improve the optimization goal
+ /// \returns the total number of moved FunctionNodes
+ unsigned runIteration(const FunctionNodeRange Nodes, unsigned LeftBucket,
+ unsigned RightBucket, SignaturesT &Signatures,
+ std::mt19937 &RNG) const;
+
+ /// Try to move \p N from one bucket to another
+ /// \returns true iff \p N is moved
+ bool moveFunctionNode(BPFunctionNode &N, unsigned LeftBucket,
+ unsigned RightBucket, SignaturesT &Signatures,
+ std::mt19937 &RNG) const;
+
+ /// Split all the FunctionNodes into 2 buckets, StartBucket and StartBucket +
+ /// 1 The method is used for an initial assignment before a bisection step
+ void split(const FunctionNodeRange Nodes, unsigned StartBucket) const;
+
+ /// The cost of the uniform log-gap cost, assuming a utility node has \p X
+ /// FunctionNodes in the left bucket and \p Y FunctionNodes in the right one.
+ float logCost(unsigned X, unsigned Y) const;
+
+ float log2Cached(unsigned i) const;
+
+ const BalancedPartitioningConfig &Config;
+
+ /// Precomputed values of log2(x). Table size is small enough to fit in cache.
+ static constexpr unsigned LOG_CACHE_SIZE = 16384;
+ float Log2Cache[LOG_CACHE_SIZE];
+
+ /// The signature of a particular utility node used for the bisection step,
+ /// i.e., the number of \p FunctionNodes in each of the two buckets
+ struct UtilitySignature {
+ /// The number of \p FunctionNodes in the left bucket
+ unsigned LeftCount = 0;
+ /// The number of \p FunctionNodes in the right bucket
+ unsigned RightCount = 0;
+ /// The cached gain of moving a \p FunctionNode from the left bucket to the
+ /// right bucket
+ float CachedGainLR;
+ /// The cached gain of moving a \p FunctionNode from the right bucket to the
+ /// left bucket
+ float CachedGainRL;
+ /// Whether \p CachedGainLR and \p CachedGainRL are valid
+ bool CachedGainIsValid = false;
+ };
+
+protected:
+ /// Compute the move gain for uniform log-gap cost
+ static float moveGain(const BPFunctionNode &N, bool FromLeftToRight,
+ const SignaturesT &Signatures);
+ friend class BalancedPartitioningTest_MoveGain_Test;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_BALANCED_PARTITIONING_H
diff --git a/llvm/include/llvm/Support/BlockFrequency.h b/llvm/include/llvm/Support/BlockFrequency.h
index bf0ad46ab499..6c624d7dad7d 100644
--- a/llvm/include/llvm/Support/BlockFrequency.h
+++ b/llvm/include/llvm/Support/BlockFrequency.h
@@ -13,6 +13,7 @@
#ifndef LLVM_SUPPORT_BLOCKFREQUENCY_H
#define LLVM_SUPPORT_BLOCKFREQUENCY_H
+#include <cassert>
#include <cstdint>
namespace llvm {
@@ -27,7 +28,7 @@ public:
BlockFrequency(uint64_t Freq = 0) : Frequency(Freq) { }
/// Returns the maximum possible frequency, the saturation value.
- static uint64_t getMaxFrequency() { return -1ULL; }
+ static uint64_t getMaxFrequency() { return UINT64_MAX; }
/// Returns the frequency as a fixpoint number scaled by the entry
/// frequency.
@@ -44,15 +45,49 @@ public:
BlockFrequency operator/(BranchProbability Prob) const;
/// Adds another block frequency using saturating arithmetic.
- BlockFrequency &operator+=(BlockFrequency Freq);
- BlockFrequency operator+(BlockFrequency Freq) const;
+ BlockFrequency &operator+=(BlockFrequency Freq) {
+ uint64_t Before = Freq.Frequency;
+ Frequency += Freq.Frequency;
+
+ // If overflow, set frequency to the maximum value.
+ if (Frequency < Before)
+ Frequency = UINT64_MAX;
+
+ return *this;
+ }
+ BlockFrequency operator+(BlockFrequency Freq) const {
+ BlockFrequency NewFreq(Frequency);
+ NewFreq += Freq;
+ return NewFreq;
+ }
/// Subtracts another block frequency using saturating arithmetic.
- BlockFrequency &operator-=(BlockFrequency Freq);
- BlockFrequency operator-(BlockFrequency Freq) const;
+ BlockFrequency &operator-=(BlockFrequency Freq) {
+ // If underflow, set frequency to 0.
+ if (Frequency <= Freq.Frequency)
+ Frequency = 0;
+ else
+ Frequency -= Freq.Frequency;
+ return *this;
+ }
+ BlockFrequency operator-(BlockFrequency Freq) const {
+ BlockFrequency NewFreq(Frequency);
+ NewFreq -= Freq;
+ return NewFreq;
+ }
/// Shift block frequency to the right by count digits saturating to 1.
- BlockFrequency &operator>>=(const unsigned count);
+ BlockFrequency &operator>>=(const unsigned count) {
+ // Frequency can never be 0 by design.
+ assert(Frequency != 0);
+
+ // Shift right by count.
+ Frequency >>= count;
+
+ // Saturate to 1 if we are 0.
+ Frequency |= Frequency == 0;
+ return *this;
+ }
bool operator<(BlockFrequency RHS) const {
return Frequency < RHS.Frequency;
@@ -75,6 +110,6 @@ public:
}
};
-}
+} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Support/CSKYTargetParser.h b/llvm/include/llvm/Support/CSKYTargetParser.h
deleted file mode 100644
index d3080ccd8970..000000000000
--- a/llvm/include/llvm/Support/CSKYTargetParser.h
+++ /dev/null
@@ -1,15 +0,0 @@
-//===-- llvm/Support/CSKYTargetParser.h -------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This header is deprecated in favour of
-/// `llvm/TargetParser/CSKYTargetParser.h`.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/TargetParser/CSKYTargetParser.h"
diff --git a/llvm/include/llvm/Support/CachePruning.h b/llvm/include/llvm/Support/CachePruning.h
index 30b5c08942c5..17e148830a73 100644
--- a/llvm/include/llvm/Support/CachePruning.h
+++ b/llvm/include/llvm/Support/CachePruning.h
@@ -28,8 +28,8 @@ class StringRef;
struct CachePruningPolicy {
/// The pruning interval. This is intended to be used to avoid scanning the
/// directory too often. It does not impact the decision of which file to
- /// prune. A value of 0 forces the scan to occur. A value of None disables
- /// pruning.
+ /// prune. A value of 0 forces the scan to occur. A value of std::nullopt
+ /// disables pruning.
std::optional<std::chrono::seconds> Interval = std::chrono::seconds(1200);
/// The expiration for a file. When a file hasn't been accessed for Expiration
diff --git a/llvm/include/llvm/Support/Casting.h b/llvm/include/llvm/Support/Casting.h
index 4ff5865185d7..a0a6fb053d31 100644
--- a/llvm/include/llvm/Support/Casting.h
+++ b/llvm/include/llvm/Support/Casting.h
@@ -66,7 +66,7 @@ template <typename To, typename From, typename Enabler = void> struct isa_impl {
// Always allow upcasts, and perform no dynamic check for them.
template <typename To, typename From>
-struct isa_impl<To, From, std::enable_if_t<std::is_base_of<To, From>::value>> {
+struct isa_impl<To, From, std::enable_if_t<std::is_base_of_v<To, From>>> {
static inline bool doit(const From &) { return true; }
};
@@ -231,7 +231,7 @@ struct cast_convert_val<To, FromTy *, FromTy *> {
template <class X> struct is_simple_type {
static const bool value =
- std::is_same<X, typename simplify_type<X>::SimpleType>::value;
+ std::is_same_v<X, typename simplify_type<X>::SimpleType>;
};
// } // namespace detail
@@ -275,8 +275,7 @@ struct CastIsPossible<To, std::optional<From>> {
/// Upcasting (from derived to base) and casting from a type to itself should
/// always be possible.
template <typename To, typename From>
-struct CastIsPossible<To, From,
- std::enable_if_t<std::is_base_of<To, From>::value>> {
+struct CastIsPossible<To, From, std::enable_if_t<std::is_base_of_v<To, From>>> {
static inline bool isPossible(const From &f) { return true; }
};
@@ -319,7 +318,7 @@ namespace detail {
/// A helper to derive the type to use with `Self` for cast traits, when the
/// provided CRTP derived type is allowed to be void.
template <typename OptionalDerived, typename Default>
-using SelfType = std::conditional_t<std::is_same<OptionalDerived, void>::value,
+using SelfType = std::conditional_t<std::is_same_v<OptionalDerived, void>,
Default, OptionalDerived>;
} // namespace detail
@@ -390,8 +389,8 @@ struct ConstStrippingForwardingCast {
// Remove the pointer if it exists, then we can get rid of consts/volatiles.
using DecayedFrom = std::remove_cv_t<std::remove_pointer_t<From>>;
// Now if it's a pointer, add it back. Otherwise, we want a ref.
- using NonConstFrom = std::conditional_t<std::is_pointer<From>::value,
- DecayedFrom *, DecayedFrom &>;
+ using NonConstFrom =
+ std::conditional_t<std::is_pointer_v<From>, DecayedFrom *, DecayedFrom &>;
static inline bool isPossible(const From &f) {
return ForwardTo::isPossible(const_cast<NonConstFrom>(f));
diff --git a/llvm/include/llvm/Support/CheckedArithmetic.h b/llvm/include/llvm/Support/CheckedArithmetic.h
index 81b703a03892..69dcdc74e015 100644
--- a/llvm/include/llvm/Support/CheckedArithmetic.h
+++ b/llvm/include/llvm/Support/CheckedArithmetic.h
@@ -25,8 +25,7 @@ namespace {
/// \p RHS.
/// \return Empty optional if the operation overflows, or result otherwise.
template <typename T, typename F>
-std::enable_if_t<std::is_integral<T>::value && sizeof(T) * 8 <= 64,
- std::optional<T>>
+std::enable_if_t<std::is_integral_v<T> && sizeof(T) * 8 <= 64, std::optional<T>>
checkedOp(T LHS, T RHS, F Op, bool Signed = true) {
llvm::APInt ALHS(sizeof(T) * 8, LHS, Signed);
llvm::APInt ARHS(sizeof(T) * 8, RHS, Signed);
@@ -44,8 +43,8 @@ namespace llvm {
/// \return Optional of sum if no signed overflow occurred,
/// \c std::nullopt otherwise.
template <typename T>
-std::enable_if_t<std::is_signed<T>::value, std::optional<T>>
-checkedAdd(T LHS, T RHS) {
+std::enable_if_t<std::is_signed_v<T>, std::optional<T>> checkedAdd(T LHS,
+ T RHS) {
return checkedOp(LHS, RHS, &llvm::APInt::sadd_ov);
}
@@ -53,8 +52,8 @@ checkedAdd(T LHS, T RHS) {
/// \return Optional of sum if no signed overflow occurred,
/// \c std::nullopt otherwise.
template <typename T>
-std::enable_if_t<std::is_signed<T>::value, std::optional<T>>
-checkedSub(T LHS, T RHS) {
+std::enable_if_t<std::is_signed_v<T>, std::optional<T>> checkedSub(T LHS,
+ T RHS) {
return checkedOp(LHS, RHS, &llvm::APInt::ssub_ov);
}
@@ -62,8 +61,8 @@ checkedSub(T LHS, T RHS) {
/// \return Optional of product if no signed overflow occurred,
/// \c std::nullopt otherwise.
template <typename T>
-std::enable_if_t<std::is_signed<T>::value, std::optional<T>>
-checkedMul(T LHS, T RHS) {
+std::enable_if_t<std::is_signed_v<T>, std::optional<T>> checkedMul(T LHS,
+ T RHS) {
return checkedOp(LHS, RHS, &llvm::APInt::smul_ov);
}
@@ -71,8 +70,8 @@ checkedMul(T LHS, T RHS) {
/// \return Optional of result if no signed overflow occurred,
/// \c std::nullopt otherwise.
template <typename T>
-std::enable_if_t<std::is_signed<T>::value, std::optional<T>>
-checkedMulAdd(T A, T B, T C) {
+std::enable_if_t<std::is_signed_v<T>, std::optional<T>> checkedMulAdd(T A, T B,
+ T C) {
if (auto Product = checkedMul(A, B))
return checkedAdd(*Product, C);
return std::nullopt;
@@ -82,7 +81,7 @@ checkedMulAdd(T A, T B, T C) {
/// \return Optional of sum if no unsigned overflow occurred,
/// \c std::nullopt otherwise.
template <typename T>
-std::enable_if_t<std::is_unsigned<T>::value, std::optional<T>>
+std::enable_if_t<std::is_unsigned_v<T>, std::optional<T>>
checkedAddUnsigned(T LHS, T RHS) {
return checkedOp(LHS, RHS, &llvm::APInt::uadd_ov, /*Signed=*/false);
}
@@ -91,7 +90,7 @@ checkedAddUnsigned(T LHS, T RHS) {
/// \return Optional of product if no unsigned overflow occurred,
/// \c std::nullopt otherwise.
template <typename T>
-std::enable_if_t<std::is_unsigned<T>::value, std::optional<T>>
+std::enable_if_t<std::is_unsigned_v<T>, std::optional<T>>
checkedMulUnsigned(T LHS, T RHS) {
return checkedOp(LHS, RHS, &llvm::APInt::umul_ov, /*Signed=*/false);
}
@@ -100,7 +99,7 @@ checkedMulUnsigned(T LHS, T RHS) {
/// \return Optional of result if no unsigned overflow occurred,
/// \c std::nullopt otherwise.
template <typename T>
-std::enable_if_t<std::is_unsigned<T>::value, std::optional<T>>
+std::enable_if_t<std::is_unsigned_v<T>, std::optional<T>>
checkedMulAddUnsigned(T A, T B, T C) {
if (auto Product = checkedMulUnsigned(A, B))
return checkedAddUnsigned(*Product, C);
diff --git a/llvm/include/llvm/Support/CommandLine.h b/llvm/include/llvm/Support/CommandLine.h
index 43c769c1fd0a..d2079fead668 100644
--- a/llvm/include/llvm/Support/CommandLine.h
+++ b/llvm/include/llvm/Support/CommandLine.h
@@ -220,7 +220,7 @@ public:
static SubCommand &getTopLevel();
// Get the special subcommand that can be used to put an option into all
- // subcomands.
+ // subcommands.
static SubCommand &getAll();
void reset();
@@ -315,7 +315,7 @@ public:
}
bool isInAllSubCommands() const {
- return llvm::is_contained(Subs, &SubCommand::getAll());
+ return Subs.contains(&SubCommand::getAll());
}
//-------------------------------------------------------------------------===
@@ -503,10 +503,10 @@ struct callback_traits<R (C::*)(Args...) const> {
using result_type = R;
using arg_type = std::tuple_element_t<0, std::tuple<Args...>>;
static_assert(sizeof...(Args) == 1, "callback function must have one and only one parameter");
- static_assert(std::is_same<result_type, void>::value,
+ static_assert(std::is_same_v<result_type, void>,
"callback return type must be void");
- static_assert(std::is_lvalue_reference<arg_type>::value &&
- std::is_const<std::remove_reference_t<arg_type>>::value,
+ static_assert(std::is_lvalue_reference_v<arg_type> &&
+ std::is_const_v<std::remove_reference_t<arg_type>>,
"callback arg_type must be a const lvalue reference");
};
} // namespace detail
@@ -613,7 +613,7 @@ protected:
// Top-level option class.
template <class DataType>
struct OptionValue final
- : OptionValueBase<DataType, std::is_class<DataType>::value> {
+ : OptionValueBase<DataType, std::is_class_v<DataType>> {
OptionValue() = default;
OptionValue(const DataType &V) { this->setValue(V); }
@@ -1407,9 +1407,9 @@ public:
//
template <class DataType, bool ExternalStorage = false,
class ParserClass = parser<DataType>>
-class opt : public Option,
- public opt_storage<DataType, ExternalStorage,
- std::is_class<DataType>::value> {
+class opt
+ : public Option,
+ public opt_storage<DataType, ExternalStorage, std::is_class_v<DataType>> {
ParserClass Parser;
bool handleOccurrence(unsigned pos, StringRef ArgName,
@@ -1448,8 +1448,7 @@ class opt : public Option,
}
}
- template <class T,
- class = std::enable_if_t<std::is_assignable<T &, T>::value>>
+ template <class T, class = std::enable_if_t<std::is_assignable_v<T &, T>>>
void setDefaultImpl() {
const OptionValue<DataType> &V = this->getDefault();
if (V.hasValue())
@@ -1458,8 +1457,7 @@ class opt : public Option,
this->setValue(T());
}
- template <class T,
- class = std::enable_if_t<!std::is_assignable<T &, T>::value>>
+ template <class T, class = std::enable_if_t<!std::is_assignable_v<T &, T>>>
void setDefaultImpl(...) {}
void setDefault() override { setDefaultImpl<DataType>(); }
diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h
index cf330662cf4b..10d5cec231a5 100644
--- a/llvm/include/llvm/Support/Compiler.h
+++ b/llvm/include/llvm/Support/Compiler.h
@@ -113,12 +113,24 @@
/// LLVM_EXTERNAL_VISIBILITY - classes, functions, and variables marked with
/// this attribute will be made public and visible outside of any shared library
/// they are linked in to.
-#if __has_attribute(visibility) && \
- (!(defined(_WIN32) || defined(__CYGWIN__)) || \
+
+#if LLVM_HAS_CPP_ATTRIBUTE(gnu::visibility)
+#define LLVM_ATTRIBUTE_VISIBILITY_HIDDEN [[gnu::visibility("hidden")]]
+#define LLVM_ATTRIBUTE_VISIBILITY_DEFAULT [[gnu::visibility("default")]]
+#elif __has_attribute(visibility)
+#define LLVM_ATTRIBUTE_VISIBILITY_HIDDEN __attribute__((visibility("hidden")))
+#define LLVM_ATTRIBUTE_VISIBILITY_DEFAULT __attribute__((visibility("default")))
+#else
+#define LLVM_ATTRIBUTE_VISIBILITY_HIDDEN
+#define LLVM_ATTRIBUTE_VISIBILITY_DEFAULT
+#endif
+
+
+#if (!(defined(_WIN32) || defined(__CYGWIN__)) || \
(defined(__MINGW32__) && defined(__clang__)))
-#define LLVM_LIBRARY_VISIBILITY __attribute__ ((visibility("hidden")))
+#define LLVM_LIBRARY_VISIBILITY LLVM_ATTRIBUTE_VISIBILITY_HIDDEN
#if defined(LLVM_BUILD_LLVM_DYLIB) || defined(LLVM_BUILD_SHARED_LIBS)
-#define LLVM_EXTERNAL_VISIBILITY __attribute__((visibility("default")))
+#define LLVM_EXTERNAL_VISIBILITY LLVM_ATTRIBUTE_VISIBILITY_DEFAULT
#else
#define LLVM_EXTERNAL_VISIBILITY
#endif
diff --git a/llvm/include/llvm/Support/ConvertEBCDIC.h b/llvm/include/llvm/Support/ConvertEBCDIC.h
new file mode 100644
index 000000000000..ea761b31e022
--- /dev/null
+++ b/llvm/include/llvm/Support/ConvertEBCDIC.h
@@ -0,0 +1,28 @@
+//===--- ConvertEBCDIC.h - UTF8/EBCDIC CharSet Conversion -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file provides utility functions for converting between EBCDIC-1047 and
+/// UTF-8.
+///
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include <system_error>
+
+namespace llvm {
+namespace ConverterEBCDIC {
+std::error_code convertToEBCDIC(StringRef Source,
+ SmallVectorImpl<char> &Result);
+
+void convertToUTF8(StringRef Source, SmallVectorImpl<char> &Result);
+
+} // namespace ConverterEBCDIC
+} // namespace llvm
diff --git a/llvm/include/llvm/Support/Discriminator.h b/llvm/include/llvm/Support/Discriminator.h
index 69cd82c87698..fa78cf3045de 100644
--- a/llvm/include/llvm/Support/Discriminator.h
+++ b/llvm/include/llvm/Support/Discriminator.h
@@ -65,8 +65,6 @@ enum FSDiscriminatorPass {
};
} // namespace sampleprof
-using namespace sampleprof;
-
// The number of bits reserved for the base discrimininator. The base
// discriminaitor starts from bit 0.
static const unsigned BaseDiscriminatorBitWidth = 8;
@@ -82,33 +80,36 @@ static const unsigned FSDiscriminatorBitWidth = 6;
// + FSDiscriminatorBitWidth * getNumFSPasses()
// needs to fit in an unsigned int type.
static inline unsigned getNumFSPasses() {
- return static_cast<unsigned>(FSDiscriminatorPass::PassLast);
+ return static_cast<unsigned>(sampleprof::FSDiscriminatorPass::PassLast);
}
// Return the ending bit for FSPass P.
-static inline unsigned getFSPassBitEnd(FSDiscriminatorPass P) {
+static inline unsigned getFSPassBitEnd(sampleprof::FSDiscriminatorPass P) {
unsigned I = static_cast<unsigned>(P);
assert(I <= getNumFSPasses() && "Invalid FS discriminator pass number.");
return BaseDiscriminatorBitWidth + I * FSDiscriminatorBitWidth - 1;
}
// Return the begining bit for FSPass P.
-static inline unsigned getFSPassBitBegin(FSDiscriminatorPass P) {
- if (P == FSDiscriminatorPass::Base)
+static inline unsigned getFSPassBitBegin(sampleprof::FSDiscriminatorPass P) {
+ if (P == sampleprof::FSDiscriminatorPass::Base)
return 0;
unsigned I = static_cast<unsigned>(P);
assert(I <= getNumFSPasses() && "Invalid FS discriminator pass number.");
- return getFSPassBitEnd(static_cast<FSDiscriminatorPass>(I - 1)) + 1;
+ return getFSPassBitEnd(static_cast<sampleprof::FSDiscriminatorPass>(I - 1)) +
+ 1;
}
// Return the beginning bit for the last FSPass.
static inline int getLastFSPassBitBegin() {
- return getFSPassBitBegin(static_cast<FSDiscriminatorPass>(getNumFSPasses()));
+ return getFSPassBitBegin(
+ static_cast<sampleprof::FSDiscriminatorPass>(getNumFSPasses()));
}
// Return the ending bit for the last FSPass.
static inline unsigned getLastFSPassBitEnd() {
- return getFSPassBitEnd(static_cast<FSDiscriminatorPass>(getNumFSPasses()));
+ return getFSPassBitEnd(
+ static_cast<sampleprof::FSDiscriminatorPass>(getNumFSPasses()));
}
// Return the beginning bit for the base (first) FSPass.
diff --git a/llvm/include/llvm/Support/EndianStream.h b/llvm/include/llvm/Support/EndianStream.h
index ed941c61fbeb..8ff87d23e83b 100644
--- a/llvm/include/llvm/Support/EndianStream.h
+++ b/llvm/include/llvm/Support/EndianStream.h
@@ -15,6 +15,7 @@
#define LLVM_SUPPORT_ENDIANSTREAM_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -32,13 +33,13 @@ inline void write(raw_ostream &os, value_type value, endianness endian) {
template <>
inline void write<float>(raw_ostream &os, float value, endianness endian) {
- write(os, FloatToBits(value), endian);
+ write(os, llvm::bit_cast<uint32_t>(value), endian);
}
template <>
inline void write<double>(raw_ostream &os, double value,
endianness endian) {
- write(os, DoubleToBits(value), endian);
+ write(os, llvm::bit_cast<uint64_t>(value), endian);
}
template <typename value_type>
@@ -48,6 +49,12 @@ inline void write(raw_ostream &os, ArrayRef<value_type> vals,
write(os, v, endian);
}
+template <typename value_type>
+inline void write(SmallVectorImpl<char> &Out, value_type V, endianness E) {
+ V = byte_swap<value_type>(V, E);
+ Out.append((const char *)&V, (const char *)&V + sizeof(value_type));
+}
+
/// Adapter to write values to a stream in a particular byte order.
struct Writer {
raw_ostream &OS;
diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h
index 8a984db5e681..2292770a97c4 100644
--- a/llvm/include/llvm/Support/Error.h
+++ b/llvm/include/llvm/Support/Error.h
@@ -14,8 +14,6 @@
#define LLVM_SUPPORT_ERROR_H
#include "llvm-c/Error.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/abi-breaking.h"
#include "llvm/Support/AlignOf.h"
@@ -471,7 +469,7 @@ template <class T> class [[nodiscard]] Expected {
template <class T1> friend class ExpectedAsOutParameter;
template <class OtherT> friend class Expected;
- static constexpr bool isRef = std::is_reference<T>::value;
+ static constexpr bool isRef = std::is_reference_v<T>;
using wrap = std::reference_wrapper<std::remove_reference_t<T>>;
@@ -577,9 +575,9 @@ public:
/// Returns \a takeError() after moving the held T (if any) into \p V.
template <class OtherT>
- Error moveInto(OtherT &Value,
- std::enable_if_t<std::is_assignable<OtherT &, T &&>::value> * =
- nullptr) && {
+ Error moveInto(
+ OtherT &Value,
+ std::enable_if_t<std::is_assignable_v<OtherT &, T &&>> * = nullptr) && {
if (*this)
Value = std::move(get());
return takeError();
@@ -1025,13 +1023,7 @@ void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner = {});
/// Write all error messages (if any) in E to a string. The newline character
/// is used to separate error messages.
-inline std::string toString(Error E) {
- SmallVector<std::string, 2> Errors;
- handleAllErrors(std::move(E), [&Errors](const ErrorInfoBase &EI) {
- Errors.push_back(EI.message());
- });
- return join(Errors.begin(), Errors.end(), "\n");
-}
+std::string toString(Error E);
/// Consume a Error without doing anything. This method should be used
/// only where an error can be considered a reasonable and expected return
diff --git a/llvm/include/llvm/Support/ErrorOr.h b/llvm/include/llvm/Support/ErrorOr.h
index b654c9c9c43b..97c7abe1f20c 100644
--- a/llvm/include/llvm/Support/ErrorOr.h
+++ b/llvm/include/llvm/Support/ErrorOr.h
@@ -56,7 +56,7 @@ template<class T>
class ErrorOr {
template <class OtherT> friend class ErrorOr;
- static constexpr bool isRef = std::is_reference<T>::value;
+ static constexpr bool isRef = std::is_reference_v<T>;
using wrap = std::reference_wrapper<std::remove_reference_t<T>>;
@@ -85,7 +85,7 @@ public:
template <class OtherT>
ErrorOr(OtherT &&Val,
- std::enable_if_t<std::is_convertible<OtherT, T>::value> * = nullptr)
+ std::enable_if_t<std::is_convertible_v<OtherT, T>> * = nullptr)
: HasError(false) {
new (getStorage()) storage_type(std::forward<OtherT>(Val));
}
@@ -96,15 +96,14 @@ public:
template <class OtherT>
ErrorOr(const ErrorOr<OtherT> &Other,
- std::enable_if_t<std::is_convertible<OtherT, T>::value> * = nullptr) {
+ std::enable_if_t<std::is_convertible_v<OtherT, T>> * = nullptr) {
copyConstruct(Other);
}
template <class OtherT>
explicit ErrorOr(
const ErrorOr<OtherT> &Other,
- std::enable_if_t<!std::is_convertible<OtherT, const T &>::value> * =
- nullptr) {
+ std::enable_if_t<!std::is_convertible_v<OtherT, const T &>> * = nullptr) {
copyConstruct(Other);
}
@@ -114,7 +113,7 @@ public:
template <class OtherT>
ErrorOr(ErrorOr<OtherT> &&Other,
- std::enable_if_t<std::is_convertible<OtherT, T>::value> * = nullptr) {
+ std::enable_if_t<std::is_convertible_v<OtherT, T>> * = nullptr) {
moveConstruct(std::move(Other));
}
@@ -123,7 +122,7 @@ public:
template <class OtherT>
explicit ErrorOr(
ErrorOr<OtherT> &&Other,
- std::enable_if_t<!std::is_convertible<OtherT, T>::value> * = nullptr) {
+ std::enable_if_t<!std::is_convertible_v<OtherT, T>> * = nullptr) {
moveConstruct(std::move(Other));
}
diff --git a/llvm/include/llvm/Support/ExitCodes.h b/llvm/include/llvm/Support/ExitCodes.h
index b9041f5557d5..4eb5dedc688b 100644
--- a/llvm/include/llvm/Support/ExitCodes.h
+++ b/llvm/include/llvm/Support/ExitCodes.h
@@ -20,9 +20,9 @@
#if HAVE_SYSEXITS_H
#include <sysexits.h>
-#elif __MVS__
-// <sysexits.h> does not exist on z/OS. The only value used in LLVM is
-// EX_IOERR, which is used to signal a special error condition (broken pipe).
+#elif __MVS__ || defined(_WIN32)
+// <sysexits.h> does not exist on z/OS and Windows. The only value used in LLVM
+// is EX_IOERR, which is used to signal a special error condition (broken pipe).
// Define the macro with its usual value from BSD systems, which is chosen to
// not clash with more standard exit codes like 1.
#define EX_IOERR 74
diff --git a/llvm/include/llvm/Support/FileUtilities.h b/llvm/include/llvm/Support/FileUtilities.h
index c9a72d5d14ec..9707724d6317 100644
--- a/llvm/include/llvm/Support/FileUtilities.h
+++ b/llvm/include/llvm/Support/FileUtilities.h
@@ -76,41 +76,6 @@ namespace llvm {
void releaseFile() { DeleteIt = false; }
};
- enum class atomic_write_error {
- failed_to_create_uniq_file = 0,
- output_stream_error,
- failed_to_rename_temp_file
- };
-
- class AtomicFileWriteError : public llvm::ErrorInfo<AtomicFileWriteError> {
- public:
- AtomicFileWriteError(atomic_write_error Error) : Error(Error) {}
-
- void log(raw_ostream &OS) const override;
-
- const atomic_write_error Error;
- static char ID;
-
- private:
- // Users are not expected to use error_code.
- std::error_code convertToErrorCode() const override {
- return llvm::inconvertibleErrorCode();
- }
- };
-
- // atomic_write_error + whatever the Writer can return
-
- /// Creates a unique file with name according to the given \p TempPathModel,
- /// writes content of \p Buffer to the file and renames it to \p FinalPath.
- ///
- /// \returns \c AtomicFileWriteError in case of error.
- llvm::Error writeFileAtomically(StringRef TempPathModel, StringRef FinalPath,
- StringRef Buffer);
-
- llvm::Error
- writeFileAtomically(StringRef TempPathModel, StringRef FinalPath,
- std::function<llvm::Error(llvm::raw_ostream &)> Writer);
-
/// FilePermssionsApplier helps to copy permissions from an input file to
/// an output one. It memorizes the status of the input file and can apply
/// permissions and dates to the output file.
diff --git a/llvm/include/llvm/Support/Format.h b/llvm/include/llvm/Support/Format.h
index c22c941ae06e..89b6ae35ba5d 100644
--- a/llvm/include/llvm/Support/Format.h
+++ b/llvm/include/llvm/Support/Format.h
@@ -28,6 +28,7 @@
#include "llvm/Support/DataTypes.h"
#include <cassert>
#include <cstdio>
+#include <optional>
#include <tuple>
#include <utility>
@@ -215,7 +216,8 @@ inline FormattedNumber format_decimal(int64_t N, unsigned Width) {
class FormattedBytes {
ArrayRef<uint8_t> Bytes;
- // If not None, display offsets for each line relative to starting value.
+ // If not std::nullopt, display offsets for each line relative to starting
+ // value.
std::optional<uint64_t> FirstByteOffset;
uint32_t IndentLevel; // Number of characters to indent each line.
uint32_t NumPerLine; // Number of bytes to show per line.
diff --git a/llvm/include/llvm/Support/FormatProviders.h b/llvm/include/llvm/Support/FormatProviders.h
index 44da741b456e..aa0773847161 100644
--- a/llvm/include/llvm/Support/FormatProviders.h
+++ b/llvm/include/llvm/Support/FormatProviders.h
@@ -35,7 +35,7 @@ struct use_integral_formatter
template <typename T>
struct use_char_formatter
- : public std::integral_constant<bool, std::is_same<T, char>::value> {};
+ : public std::integral_constant<bool, std::is_same_v<T, char>> {};
template <typename T>
struct is_cstring
@@ -46,16 +46,17 @@ struct is_cstring
template <typename T>
struct use_string_formatter
: public std::integral_constant<bool,
- std::is_convertible<T, llvm::StringRef>::value> {};
+ std::is_convertible_v<T, llvm::StringRef>> {
+};
template <typename T>
struct use_pointer_formatter
- : public std::integral_constant<bool, std::is_pointer<T>::value &&
+ : public std::integral_constant<bool, std::is_pointer_v<T> &&
!is_cstring<T>::value> {};
template <typename T>
struct use_double_formatter
- : public std::integral_constant<bool, std::is_floating_point<T>::value> {};
+ : public std::integral_constant<bool, std::is_floating_point_v<T>> {};
class HelperFunctions {
protected:
@@ -75,7 +76,7 @@ protected:
}
static bool consumeHexStyle(StringRef &Str, HexPrintStyle &Style) {
- if (!Str.startswith_insensitive("x"))
+ if (!Str.starts_with_insensitive("x"))
return false;
if (Str.consume_front("x-"))
diff --git a/llvm/include/llvm/Support/FormatVariadicDetails.h b/llvm/include/llvm/Support/FormatVariadicDetails.h
index 2204cff13a64..068c327df396 100644
--- a/llvm/include/llvm/Support/FormatVariadicDetails.h
+++ b/llvm/include/llvm/Support/FormatVariadicDetails.h
@@ -79,11 +79,11 @@ public:
using ConstRefT = const std::decay_t<T> &;
template <typename U>
- static char test(
- std::enable_if_t<std::is_same<decltype(std::declval<llvm::raw_ostream &>()
- << std::declval<U>()),
- llvm::raw_ostream &>::value,
- int *>);
+ static char test(std::enable_if_t<
+ std::is_same_v<decltype(std::declval<llvm::raw_ostream &>()
+ << std::declval<U>()),
+ llvm::raw_ostream &>,
+ int *>);
template <typename U> static double test(...);
@@ -95,8 +95,7 @@ public:
template <typename T>
struct uses_format_member
: public std::integral_constant<
- bool,
- std::is_base_of<format_adapter, std::remove_reference_t<T>>::value> {
+ bool, std::is_base_of_v<format_adapter, std::remove_reference_t<T>>> {
};
// Simple template that decides whether a type T should use the format_provider
@@ -147,7 +146,7 @@ build_format_adapter(T &&Item) {
// would be responsible for consuming it.
// Make the caller opt into this by calling fmt_consume().
static_assert(
- !std::is_same<llvm::Error, std::remove_cv_t<T>>::value,
+ !std::is_same_v<llvm::Error, std::remove_cv_t<T>>,
"llvm::Error-by-value must be wrapped in fmt_consume() for formatv");
return stream_operator_format_adapter<T>(std::forward<T>(Item));
}
diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h
index 1e5c0ae231d2..62186a368e96 100644
--- a/llvm/include/llvm/Support/GenericDomTree.h
+++ b/llvm/include/llvm/Support/GenericDomTree.h
@@ -227,7 +227,7 @@ template <typename NodeT> struct DomTreeNodeTraits {
using NodeType = NodeT;
using NodePtr = NodeT *;
using ParentPtr = decltype(std::declval<NodePtr>()->getParent());
- static_assert(std::is_pointer<ParentPtr>::value,
+ static_assert(std::is_pointer_v<ParentPtr>,
"Currently NodeT's parent must be a pointer type");
using ParentType = std::remove_pointer_t<ParentPtr>;
@@ -242,13 +242,13 @@ template <typename NodeT> struct DomTreeNodeTraits {
template <typename NodeT, bool IsPostDom>
class DominatorTreeBase {
public:
- static_assert(std::is_pointer<typename GraphTraits<NodeT *>::NodeRef>::value,
+ static_assert(std::is_pointer_v<typename GraphTraits<NodeT *>::NodeRef>,
"Currently DominatorTreeBase supports only pointer nodes");
using NodeTrait = DomTreeNodeTraits<NodeT>;
using NodeType = typename NodeTrait::NodeType;
using NodePtr = typename NodeTrait::NodePtr;
using ParentPtr = typename NodeTrait::ParentPtr;
- static_assert(std::is_pointer<ParentPtr>::value,
+ static_assert(std::is_pointer_v<ParentPtr>,
"Currently NodeT's parent must be a pointer type");
using ParentType = std::remove_pointer_t<ParentPtr>;
static constexpr bool IsPostDominator = IsPostDom;
diff --git a/llvm/include/llvm/Support/GenericLoopInfo.h b/llvm/include/llvm/Support/GenericLoopInfo.h
new file mode 100644
index 000000000000..ac4f2d7010b4
--- /dev/null
+++ b/llvm/include/llvm/Support/GenericLoopInfo.h
@@ -0,0 +1,727 @@
+//===- GenericLoopInfo - Generic Loop Info for graphs -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the LoopInfoBase class that is used to identify natural
+// loops and determine the loop depth of various nodes in a generic graph of
+// blocks. A natural loop has exactly one entry-point, which is called the
+// header. Note that natural loops may actually be several loops that share the
+// same header node.
+//
+// This analysis calculates the nesting structure of loops in a function. For
+// each natural loop identified, this analysis identifies natural loops
+// contained entirely within the loop and the basic blocks that make up the
+// loop.
+//
+// It can calculate on the fly various bits of information, for example:
+//
+// * whether there is a preheader for the loop
+// * the number of back edges to the header
+// * whether or not a particular block branches out of the loop
+// * the successor blocks of the loop
+// * the loop depth
+// * etc...
+//
+// Note that this analysis specifically identifies *Loops* not cycles or SCCs
+// in the graph. There can be strongly connected components in the graph which
+// this analysis will not recognize and that will not be represented by a Loop
+// instance. In particular, a Loop might be inside such a non-loop SCC, or a
+// non-loop SCC might contain a sub-SCC which is a Loop.
+//
+// For an overview of terminology used in this API (and thus all of our loop
+// analyses or transforms), see docs/LoopTerminology.rst.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_GENERICLOOPINFO_H
+#define LLVM_SUPPORT_GENERICLOOPINFO_H
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/GenericDomTree.h"
+
+namespace llvm {
+
+template <class N, class M> class LoopInfoBase;
+template <class N, class M> class LoopBase;
+
+//===----------------------------------------------------------------------===//
+/// Instances of this class are used to represent loops that are detected in the
+/// flow graph.
+///
+template <class BlockT, class LoopT> class LoopBase {
+ LoopT *ParentLoop;
+ // Loops contained entirely within this one.
+ std::vector<LoopT *> SubLoops;
+
+ // The list of blocks in this loop. First entry is the header node.
+ std::vector<BlockT *> Blocks;
+
+ SmallPtrSet<const BlockT *, 8> DenseBlockSet;
+
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ /// Indicator that this loop is no longer a valid loop.
+ bool IsInvalid = false;
+#endif
+
+ LoopBase(const LoopBase<BlockT, LoopT> &) = delete;
+ const LoopBase<BlockT, LoopT> &
+ operator=(const LoopBase<BlockT, LoopT> &) = delete;
+
+public:
+ /// Return the nesting level of this loop. An outer-most loop has depth 1,
+ /// for consistency with loop depth values used for basic blocks, where depth
+ /// 0 is used for blocks not inside any loops.
+ unsigned getLoopDepth() const {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ unsigned D = 1;
+ for (const LoopT *CurLoop = ParentLoop; CurLoop;
+ CurLoop = CurLoop->ParentLoop)
+ ++D;
+ return D;
+ }
+ BlockT *getHeader() const { return getBlocks().front(); }
+ /// Return the parent loop if it exists or nullptr for top
+ /// level loops.
+
+ /// A loop is either top-level in a function (that is, it is not
+ /// contained in any other loop) or it is entirely enclosed in
+ /// some other loop.
+ /// If a loop is top-level, it has no parent, otherwise its
+ /// parent is the innermost loop in which it is enclosed.
+ LoopT *getParentLoop() const { return ParentLoop; }
+
+ /// Get the outermost loop in which this loop is contained.
+ /// This may be the loop itself, if it already is the outermost loop.
+ const LoopT *getOutermostLoop() const {
+ const LoopT *L = static_cast<const LoopT *>(this);
+ while (L->ParentLoop)
+ L = L->ParentLoop;
+ return L;
+ }
+
+ LoopT *getOutermostLoop() {
+ LoopT *L = static_cast<LoopT *>(this);
+ while (L->ParentLoop)
+ L = L->ParentLoop;
+ return L;
+ }
+
+ /// This is a raw interface for bypassing addChildLoop.
+ void setParentLoop(LoopT *L) {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ ParentLoop = L;
+ }
+
+ /// Return true if the specified loop is contained within in this loop.
+ bool contains(const LoopT *L) const {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ if (L == this)
+ return true;
+ if (!L)
+ return false;
+ return contains(L->getParentLoop());
+ }
+
+ /// Return true if the specified basic block is in this loop.
+ bool contains(const BlockT *BB) const {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ return DenseBlockSet.count(BB);
+ }
+
+ /// Return true if the specified instruction is in this loop.
+ template <class InstT> bool contains(const InstT *Inst) const {
+ return contains(Inst->getParent());
+ }
+
+ /// Return the loops contained entirely within this loop.
+ const std::vector<LoopT *> &getSubLoops() const {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ return SubLoops;
+ }
+ std::vector<LoopT *> &getSubLoopsVector() {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ return SubLoops;
+ }
+ typedef typename std::vector<LoopT *>::const_iterator iterator;
+ typedef
+ typename std::vector<LoopT *>::const_reverse_iterator reverse_iterator;
+ iterator begin() const { return getSubLoops().begin(); }
+ iterator end() const { return getSubLoops().end(); }
+ reverse_iterator rbegin() const { return getSubLoops().rbegin(); }
+ reverse_iterator rend() const { return getSubLoops().rend(); }
+
+ // LoopInfo does not detect irreducible control flow, just natural
+ // loops. That is, it is possible that there is cyclic control
+ // flow within the "innermost loop" or around the "outermost
+ // loop".
+
+ /// Return true if the loop does not contain any (natural) loops.
+ bool isInnermost() const { return getSubLoops().empty(); }
+ /// Return true if the loop does not have a parent (natural) loop
+ // (i.e. it is outermost, which is the same as top-level).
+ bool isOutermost() const { return getParentLoop() == nullptr; }
+
+ /// Get a list of the basic blocks which make up this loop.
+ ArrayRef<BlockT *> getBlocks() const {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ return Blocks;
+ }
+ typedef typename ArrayRef<BlockT *>::const_iterator block_iterator;
+ block_iterator block_begin() const { return getBlocks().begin(); }
+ block_iterator block_end() const { return getBlocks().end(); }
+ inline iterator_range<block_iterator> blocks() const {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ return make_range(block_begin(), block_end());
+ }
+
+ /// Get the number of blocks in this loop in constant time.
+ /// Invalidate the loop, indicating that it is no longer a loop.
+ unsigned getNumBlocks() const {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ return Blocks.size();
+ }
+
+ /// Return a direct, mutable handle to the blocks vector so that we can
+ /// mutate it efficiently with techniques like `std::remove`.
+ std::vector<BlockT *> &getBlocksVector() {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ return Blocks;
+ }
+ /// Return a direct, mutable handle to the blocks set so that we can
+ /// mutate it efficiently.
+ SmallPtrSetImpl<const BlockT *> &getBlocksSet() {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ return DenseBlockSet;
+ }
+
+ /// Return a direct, immutable handle to the blocks set.
+ const SmallPtrSetImpl<const BlockT *> &getBlocksSet() const {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ return DenseBlockSet;
+ }
+
+ /// Return true if this loop is no longer valid. The only valid use of this
+ /// helper is "assert(L.isInvalid())" or equivalent, since IsInvalid is set to
+ /// true by the destructor. In other words, if this accessor returns true,
+ /// the caller has already triggered UB by calling this accessor; and so it
+ /// can only be called in a context where a return value of true indicates a
+ /// programmer error.
+ bool isInvalid() const {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ return IsInvalid;
+#else
+ return false;
+#endif
+ }
+
+ /// True if terminator in the block can branch to another block that is
+ /// outside of the current loop. \p BB must be inside the loop.
+ bool isLoopExiting(const BlockT *BB) const {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ assert(contains(BB) && "Exiting block must be part of the loop");
+ for (const auto *Succ : children<const BlockT *>(BB)) {
+ if (!contains(Succ))
+ return true;
+ }
+ return false;
+ }
+
+ /// Returns true if \p BB is a loop-latch.
+ /// A latch block is a block that contains a branch back to the header.
+ /// This function is useful when there are multiple latches in a loop
+ /// because \fn getLoopLatch will return nullptr in that case.
+ bool isLoopLatch(const BlockT *BB) const {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ assert(contains(BB) && "block does not belong to the loop");
+
+ BlockT *Header = getHeader();
+ auto PredBegin = GraphTraits<Inverse<BlockT *>>::child_begin(Header);
+ auto PredEnd = GraphTraits<Inverse<BlockT *>>::child_end(Header);
+ return std::find(PredBegin, PredEnd, BB) != PredEnd;
+ }
+
+ /// Calculate the number of back edges to the loop header.
+ unsigned getNumBackEdges() const {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ unsigned NumBackEdges = 0;
+ BlockT *H = getHeader();
+
+ for (const auto Pred : children<Inverse<BlockT *>>(H))
+ if (contains(Pred))
+ ++NumBackEdges;
+
+ return NumBackEdges;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // APIs for simple analysis of the loop.
+ //
+ // Note that all of these methods can fail on general loops (ie, there may not
+ // be a preheader, etc). For best success, the loop simplification and
+ // induction variable canonicalization pass should be used to normalize loops
+ // for easy analysis. These methods assume canonical loops.
+
+ /// Return all blocks inside the loop that have successors outside of the
+ /// loop. These are the blocks _inside of the current loop_ which branch out.
+ /// The returned list is always unique.
+ void getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const;
+
+ /// If getExitingBlocks would return exactly one block, return that block.
+ /// Otherwise return null.
+ BlockT *getExitingBlock() const;
+
+ /// Return all of the successor blocks of this loop. These are the blocks
+ /// _outside of the current loop_ which are branched to.
+ void getExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
+
+ /// If getExitBlocks would return exactly one block, return that block.
+ /// Otherwise return null.
+ BlockT *getExitBlock() const;
+
+ /// Return true if no exit block for the loop has a predecessor that is
+ /// outside the loop.
+ bool hasDedicatedExits() const;
+
+ /// Return all unique successor blocks of this loop.
+ /// These are the blocks _outside of the current loop_ which are branched to.
+ void getUniqueExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
+
+ /// Return all unique successor blocks of this loop except successors from
+ /// Latch block are not considered. If the exit comes from Latch has also
+ /// non Latch predecessor in a loop it will be added to ExitBlocks.
+ /// These are the blocks _outside of the current loop_ which are branched to.
+ void getUniqueNonLatchExitBlocks(SmallVectorImpl<BlockT *> &ExitBlocks) const;
+
+ /// If getUniqueExitBlocks would return exactly one block, return that block.
+ /// Otherwise return null.
+ BlockT *getUniqueExitBlock() const;
+
+ /// Return true if this loop does not have any exit blocks.
+ bool hasNoExitBlocks() const;
+
+ /// Edge type.
+ typedef std::pair<BlockT *, BlockT *> Edge;
+
+ /// Return all pairs of (_inside_block_,_outside_block_).
+ void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const;
+
+ /// If there is a preheader for this loop, return it. A loop has a preheader
+ /// if there is only one edge to the header of the loop from outside of the
+ /// loop. If this is the case, the block branching to the header of the loop
+ /// is the preheader node.
+ ///
+ /// This method returns null if there is no preheader for the loop.
+ BlockT *getLoopPreheader() const;
+
+ /// If the given loop's header has exactly one unique predecessor outside the
+ /// loop, return it. Otherwise return null.
+ /// This is less strict that the loop "preheader" concept, which requires
+ /// the predecessor to have exactly one successor.
+ BlockT *getLoopPredecessor() const;
+
+ /// If there is a single latch block for this loop, return it.
+ /// A latch block is a block that contains a branch back to the header.
+ BlockT *getLoopLatch() const;
+
+ /// Return all loop latch blocks of this loop. A latch block is a block that
+ /// contains a branch back to the header.
+ void getLoopLatches(SmallVectorImpl<BlockT *> &LoopLatches) const {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ BlockT *H = getHeader();
+ for (const auto Pred : children<Inverse<BlockT *>>(H))
+ if (contains(Pred))
+ LoopLatches.push_back(Pred);
+ }
+
+ /// Return all inner loops in the loop nest rooted by the loop in preorder,
+ /// with siblings in forward program order.
+ template <class Type>
+ static void getInnerLoopsInPreorder(const LoopT &L,
+ SmallVectorImpl<Type> &PreOrderLoops) {
+ SmallVector<LoopT *, 4> PreOrderWorklist;
+ PreOrderWorklist.append(L.rbegin(), L.rend());
+
+ while (!PreOrderWorklist.empty()) {
+ LoopT *L = PreOrderWorklist.pop_back_val();
+ // Sub-loops are stored in forward program order, but will process the
+ // worklist backwards so append them in reverse order.
+ PreOrderWorklist.append(L->rbegin(), L->rend());
+ PreOrderLoops.push_back(L);
+ }
+ }
+
+ /// Return all loops in the loop nest rooted by the loop in preorder, with
+ /// siblings in forward program order.
+ SmallVector<const LoopT *, 4> getLoopsInPreorder() const {
+ SmallVector<const LoopT *, 4> PreOrderLoops;
+ const LoopT *CurLoop = static_cast<const LoopT *>(this);
+ PreOrderLoops.push_back(CurLoop);
+ getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
+ return PreOrderLoops;
+ }
+ SmallVector<LoopT *, 4> getLoopsInPreorder() {
+ SmallVector<LoopT *, 4> PreOrderLoops;
+ LoopT *CurLoop = static_cast<LoopT *>(this);
+ PreOrderLoops.push_back(CurLoop);
+ getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
+ return PreOrderLoops;
+ }
+
+ //===--------------------------------------------------------------------===//
+ // APIs for updating loop information after changing the CFG
+ //
+
+ /// This method is used by other analyses to update loop information.
+ /// NewBB is set to be a new member of the current loop.
+ /// Because of this, it is added as a member of all parent loops, and is added
+ /// to the specified LoopInfo object as being in the current basic block. It
+ /// is not valid to replace the loop header with this method.
+ void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LI);
+
+ /// This is used when splitting loops up. It replaces the OldChild entry in
+ /// our children list with NewChild, and updates the parent pointer of
+ /// OldChild to be null and the NewChild to be this loop.
+ /// This updates the loop depth of the new child.
+ void replaceChildLoopWith(LoopT *OldChild, LoopT *NewChild);
+
+ /// Add the specified loop to be a child of this loop.
+ /// This updates the loop depth of the new child.
+ void addChildLoop(LoopT *NewChild) {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ assert(!NewChild->ParentLoop && "NewChild already has a parent!");
+ NewChild->ParentLoop = static_cast<LoopT *>(this);
+ SubLoops.push_back(NewChild);
+ }
+
+ /// This removes the specified child from being a subloop of this loop. The
+ /// loop is not deleted, as it will presumably be inserted into another loop.
+ LoopT *removeChildLoop(iterator I) {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ assert(I != SubLoops.end() && "Cannot remove end iterator!");
+ LoopT *Child = *I;
+ assert(Child->ParentLoop == this && "Child is not a child of this loop!");
+ SubLoops.erase(SubLoops.begin() + (I - begin()));
+ Child->ParentLoop = nullptr;
+ return Child;
+ }
+
+ /// This removes the specified child from being a subloop of this loop. The
+ /// loop is not deleted, as it will presumably be inserted into another loop.
+ LoopT *removeChildLoop(LoopT *Child) {
+ return removeChildLoop(llvm::find(*this, Child));
+ }
+
+ /// This adds a basic block directly to the basic block list.
+ /// This should only be used by transformations that create new loops. Other
+ /// transformations should use addBasicBlockToLoop.
+ void addBlockEntry(BlockT *BB) {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ Blocks.push_back(BB);
+ DenseBlockSet.insert(BB);
+ }
+
+ /// interface to reverse Blocks[from, end of loop] in this loop
+ void reverseBlock(unsigned from) {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ std::reverse(Blocks.begin() + from, Blocks.end());
+ }
+
+ /// interface to do reserve() for Blocks
+ void reserveBlocks(unsigned size) {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ Blocks.reserve(size);
+ }
+
+ /// This method is used to move BB (which must be part of this loop) to be the
+ /// loop header of the loop (the block that dominates all others).
+ void moveToHeader(BlockT *BB) {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ if (Blocks[0] == BB)
+ return;
+ for (unsigned i = 0;; ++i) {
+ assert(i != Blocks.size() && "Loop does not contain BB!");
+ if (Blocks[i] == BB) {
+ Blocks[i] = Blocks[0];
+ Blocks[0] = BB;
+ return;
+ }
+ }
+ }
+
+ /// This removes the specified basic block from the current loop, updating the
+ /// Blocks as appropriate. This does not update the mapping in the LoopInfo
+ /// class.
+ void removeBlockFromLoop(BlockT *BB) {
+ assert(!isInvalid() && "Loop not in a valid state!");
+ auto I = find(Blocks, BB);
+ assert(I != Blocks.end() && "N is not in this list!");
+ Blocks.erase(I);
+
+ DenseBlockSet.erase(BB);
+ }
+
+ /// Verify loop structure
+ void verifyLoop() const;
+
+ /// Verify loop structure of this loop and all nested loops.
+ void verifyLoopNest(DenseSet<const LoopT *> *Loops) const;
+
+ /// Returns true if the loop is annotated parallel.
+ ///
+ /// Derived classes can override this method using static template
+ /// polymorphism.
+ bool isAnnotatedParallel() const { return false; }
+
+ /// Print loop with all the BBs inside it.
+ void print(raw_ostream &OS, bool Verbose = false, bool PrintNested = true,
+ unsigned Depth = 0) const;
+
+protected:
+ friend class LoopInfoBase<BlockT, LoopT>;
+
+ /// This creates an empty loop.
+ LoopBase() : ParentLoop(nullptr) {}
+
+ explicit LoopBase(BlockT *BB) : ParentLoop(nullptr) {
+ Blocks.push_back(BB);
+ DenseBlockSet.insert(BB);
+ }
+
+ // Since loop passes like SCEV are allowed to key analysis results off of
+ // `Loop` pointers, we cannot re-use pointers within a loop pass manager.
+ // This means loop passes should not be `delete` ing `Loop` objects directly
+ // (and risk a later `Loop` allocation re-using the address of a previous one)
+ // but should be using LoopInfo::markAsRemoved, which keeps around the `Loop`
+ // pointer till the end of the lifetime of the `LoopInfo` object.
+ //
+ // To make it easier to follow this rule, we mark the destructor as
+ // non-public.
+ ~LoopBase() {
+ for (auto *SubLoop : SubLoops)
+ SubLoop->~LoopT();
+
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+ IsInvalid = true;
+#endif
+ SubLoops.clear();
+ Blocks.clear();
+ DenseBlockSet.clear();
+ ParentLoop = nullptr;
+ }
+};
+
+template <class BlockT, class LoopT>
+raw_ostream &operator<<(raw_ostream &OS, const LoopBase<BlockT, LoopT> &Loop) {
+ Loop.print(OS);
+ return OS;
+}
+
+//===----------------------------------------------------------------------===//
+/// This class builds and contains all of the top-level loop
+/// structures in the specified function.
+///
+
+template <class BlockT, class LoopT> class LoopInfoBase {
+ // BBMap - Mapping of basic blocks to the inner most loop they occur in
+ DenseMap<const BlockT *, LoopT *> BBMap;
+ std::vector<LoopT *> TopLevelLoops;
+ BumpPtrAllocator LoopAllocator;
+
+ friend class LoopBase<BlockT, LoopT>;
+ friend class LoopInfo;
+
+ void operator=(const LoopInfoBase &) = delete;
+ LoopInfoBase(const LoopInfoBase &) = delete;
+
+public:
+ LoopInfoBase() = default;
+ ~LoopInfoBase() { releaseMemory(); }
+
+ LoopInfoBase(LoopInfoBase &&Arg)
+ : BBMap(std::move(Arg.BBMap)),
+ TopLevelLoops(std::move(Arg.TopLevelLoops)),
+ LoopAllocator(std::move(Arg.LoopAllocator)) {
+ // We have to clear the arguments top level loops as we've taken ownership.
+ Arg.TopLevelLoops.clear();
+ }
+ LoopInfoBase &operator=(LoopInfoBase &&RHS) {
+ BBMap = std::move(RHS.BBMap);
+
+ for (auto *L : TopLevelLoops)
+ L->~LoopT();
+
+ TopLevelLoops = std::move(RHS.TopLevelLoops);
+ LoopAllocator = std::move(RHS.LoopAllocator);
+ RHS.TopLevelLoops.clear();
+ return *this;
+ }
+
+ void releaseMemory() {
+ BBMap.clear();
+
+ for (auto *L : TopLevelLoops)
+ L->~LoopT();
+ TopLevelLoops.clear();
+ LoopAllocator.Reset();
+ }
+
+ template <typename... ArgsTy> LoopT *AllocateLoop(ArgsTy &&...Args) {
+ LoopT *Storage = LoopAllocator.Allocate<LoopT>();
+ return new (Storage) LoopT(std::forward<ArgsTy>(Args)...);
+ }
+
+ /// iterator/begin/end - The interface to the top-level loops in the current
+ /// function.
+ ///
+ typedef typename std::vector<LoopT *>::const_iterator iterator;
+ typedef
+ typename std::vector<LoopT *>::const_reverse_iterator reverse_iterator;
+ iterator begin() const { return TopLevelLoops.begin(); }
+ iterator end() const { return TopLevelLoops.end(); }
+ reverse_iterator rbegin() const { return TopLevelLoops.rbegin(); }
+ reverse_iterator rend() const { return TopLevelLoops.rend(); }
+ bool empty() const { return TopLevelLoops.empty(); }
+
+ /// Return all of the loops in the function in preorder across the loop
+ /// nests, with siblings in forward program order.
+ ///
+ /// Note that because loops form a forest of trees, preorder is equivalent to
+ /// reverse postorder.
+ SmallVector<LoopT *, 4> getLoopsInPreorder() const;
+
+ /// Return all of the loops in the function in preorder across the loop
+ /// nests, with siblings in *reverse* program order.
+ ///
+ /// Note that because loops form a forest of trees, preorder is equivalent to
+ /// reverse postorder.
+ ///
+ /// Also note that this is *not* a reverse preorder. Only the siblings are in
+ /// reverse program order.
+ SmallVector<LoopT *, 4> getLoopsInReverseSiblingPreorder() const;
+
+ /// Return the inner most loop that BB lives in. If a basic block is in no
+ /// loop (for example the entry node), null is returned.
+ LoopT *getLoopFor(const BlockT *BB) const { return BBMap.lookup(BB); }
+
+ /// Same as getLoopFor.
+ const LoopT *operator[](const BlockT *BB) const { return getLoopFor(BB); }
+
+ /// Return the loop nesting level of the specified block. A depth of 0 means
+ /// the block is not inside any loop.
+ unsigned getLoopDepth(const BlockT *BB) const {
+ const LoopT *L = getLoopFor(BB);
+ return L ? L->getLoopDepth() : 0;
+ }
+
+ // True if the block is a loop header node
+ bool isLoopHeader(const BlockT *BB) const {
+ const LoopT *L = getLoopFor(BB);
+ return L && L->getHeader() == BB;
+ }
+
+ /// Return the top-level loops.
+ const std::vector<LoopT *> &getTopLevelLoops() const { return TopLevelLoops; }
+
+ /// Return the top-level loops.
+ std::vector<LoopT *> &getTopLevelLoopsVector() { return TopLevelLoops; }
+
+ /// This removes the specified top-level loop from this loop info object.
+ /// The loop is not deleted, as it will presumably be inserted into
+ /// another loop.
+ LoopT *removeLoop(iterator I) {
+ assert(I != end() && "Cannot remove end iterator!");
+ LoopT *L = *I;
+ assert(L->isOutermost() && "Not a top-level loop!");
+ TopLevelLoops.erase(TopLevelLoops.begin() + (I - begin()));
+ return L;
+ }
+
+ /// Change the top-level loop that contains BB to the specified loop.
+ /// This should be used by transformations that restructure the loop hierarchy
+ /// tree.
+ void changeLoopFor(BlockT *BB, LoopT *L) {
+ if (!L) {
+ BBMap.erase(BB);
+ return;
+ }
+ BBMap[BB] = L;
+ }
+
+ /// Replace the specified loop in the top-level loops list with the indicated
+ /// loop.
+ void changeTopLevelLoop(LoopT *OldLoop, LoopT *NewLoop) {
+ auto I = find(TopLevelLoops, OldLoop);
+ assert(I != TopLevelLoops.end() && "Old loop not at top level!");
+ *I = NewLoop;
+ assert(!NewLoop->ParentLoop && !OldLoop->ParentLoop &&
+ "Loops already embedded into a subloop!");
+ }
+
+ /// This adds the specified loop to the collection of top-level loops.
+ void addTopLevelLoop(LoopT *New) {
+ assert(New->isOutermost() && "Loop already in subloop!");
+ TopLevelLoops.push_back(New);
+ }
+
+ /// This method completely removes BB from all data structures,
+ /// including all of the Loop objects it is nested in and our mapping from
+ /// BasicBlocks to loops.
+ void removeBlock(BlockT *BB) {
+ auto I = BBMap.find(BB);
+ if (I != BBMap.end()) {
+ for (LoopT *L = I->second; L; L = L->getParentLoop())
+ L->removeBlockFromLoop(BB);
+
+ BBMap.erase(I);
+ }
+ }
+
+ // Internals
+
+ static bool isNotAlreadyContainedIn(const LoopT *SubLoop,
+ const LoopT *ParentLoop) {
+ if (!SubLoop)
+ return true;
+ if (SubLoop == ParentLoop)
+ return false;
+ return isNotAlreadyContainedIn(SubLoop->getParentLoop(), ParentLoop);
+ }
+
+ /// Create the loop forest using a stable algorithm.
+ void analyze(const DominatorTreeBase<BlockT, false> &DomTree);
+
+ // Debugging
+ void print(raw_ostream &OS) const;
+
+ void verify(const DominatorTreeBase<BlockT, false> &DomTree) const;
+
+ /// Destroy a loop that has been removed from the `LoopInfo` nest.
+ ///
+ /// This runs the destructor of the loop object making it invalid to
+ /// reference afterward. The memory is retained so that the *pointer* to the
+ /// loop remains valid.
+ ///
+ /// The caller is responsible for removing this loop from the loop nest and
+ /// otherwise disconnecting it from the broader `LoopInfo` data structures.
+ /// Callers that don't naturally handle this themselves should probably call
+ /// `erase' instead.
+ void destroy(LoopT *L) {
+ L->~LoopT();
+
+ // Since LoopAllocator is a BumpPtrAllocator, this Deallocate only poisons
+ // \c L, but the pointer remains valid for non-dereferencing uses.
+ LoopAllocator.Deallocate(L);
+ }
+};
+
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_GENERICLOOPINFO_H
diff --git a/llvm/include/llvm/Analysis/LoopInfoImpl.h b/llvm/include/llvm/Support/GenericLoopInfoImpl.h
index c509ee67cbac..85233d38f0f6 100644
--- a/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/llvm/include/llvm/Support/GenericLoopInfoImpl.h
@@ -1,4 +1,4 @@
-//===- llvm/Analysis/LoopInfoImpl.h - Natural Loop Calculator ---*- C++ -*-===//
+//===- GenericLoopInfoImp.h - Generic Loop Info Implementation --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,19 +6,19 @@
//
//===----------------------------------------------------------------------===//
//
-// This is the generic implementation of LoopInfo used for both Loops and
-// MachineLoops.
+// This fle contains the implementation of GenericLoopInfo. It should only be
+// included in files that explicitly instantiate a GenericLoopInfo.
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_ANALYSIS_LOOPINFOIMPL_H
-#define LLVM_ANALYSIS_LOOPINFOIMPL_H
+#ifndef LLVM_SUPPORT_GENERICLOOPINFOIMPL_H
+#define LLVM_SUPPORT_GENERICLOOPINFOIMPL_H
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/Dominators.h"
+#include "llvm/Support/GenericLoopInfo.h"
namespace llvm {
@@ -171,6 +171,22 @@ void LoopBase<BlockT, LoopT>::getExitEdges(
ExitEdges.emplace_back(BB, Succ);
}
+namespace detail {
+template <class BlockT>
+using has_hoist_check = decltype(&BlockT::isLegalToHoistInto);
+
+template <class BlockT>
+using detect_has_hoist_check = llvm::is_detected<has_hoist_check, BlockT>;
+
+/// SFINAE functions that dispatch to the isLegalToHoistInto member function or
+/// return false, if it doesn't exist.
+template <class BlockT> bool isLegalToHoistInto(BlockT *Block) {
+ if constexpr (detect_has_hoist_check<BlockT>::value)
+ return Block->isLegalToHoistInto();
+ return false;
+}
+} // namespace detail
+
/// getLoopPreheader - If there is a preheader for this loop, return it. A
/// loop has a preheader if there is only one edge to the header of the loop
/// from outside of the loop and it is legal to hoist instructions into the
@@ -188,7 +204,7 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopPreheader() const {
return nullptr;
// Make sure we are allowed to hoist instructions into the predecessor.
- if (!Out->isLegalToHoistInto())
+ if (!detail::isLegalToHoistInto(Out))
return nullptr;
// Make sure there is only one exit out of the preheader.
@@ -371,7 +387,7 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
// Check the parent loop pointer.
if (ParentLoop) {
- assert(is_contained(*ParentLoop, this) &&
+ assert(is_contained(ParentLoop->getSubLoops(), this) &&
"Loop is not a subloop of its parent!");
}
#endif
@@ -750,6 +766,6 @@ void LoopInfoBase<BlockT, LoopT>::verify(
#endif
}
-} // End llvm namespace
+} // namespace llvm
-#endif
+#endif // LLVM_SUPPORT_GENERICLOOPINFOIMPL_H
diff --git a/llvm/include/llvm/Support/GraphWriter.h b/llvm/include/llvm/Support/GraphWriter.h
index 515057e7e312..dfda605365de 100644
--- a/llvm/include/llvm/Support/GraphWriter.h
+++ b/llvm/include/llvm/Support/GraphWriter.h
@@ -73,7 +73,7 @@ class GraphWriter {
using child_iterator = typename GTraits::ChildIteratorType;
DOTTraits DTraits;
- static_assert(std::is_pointer<NodeRef>::value,
+ static_assert(std::is_pointer_v<NodeRef>,
"FIXME: Currently GraphWriter requires the NodeRef type to be "
"a pointer.\nThe pointer usage should be moved to "
"DOTGraphTraits, and removed from GraphWriter itself.");
diff --git a/llvm/include/llvm/Support/Host.h b/llvm/include/llvm/Support/Host.h
index 158667678ae7..113a252a12de 100644
--- a/llvm/include/llvm/Support/Host.h
+++ b/llvm/include/llvm/Support/Host.h
@@ -11,4 +11,8 @@
///
//===----------------------------------------------------------------------===//
+#ifdef __GNUC__
+#pragma GCC warning \
+ "This header is deprecated, please use llvm/TargetParser/Host.h"
+#endif
#include "llvm/TargetParser/Host.h"
diff --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h
index d35089122941..a81881c52d6c 100644
--- a/llvm/include/llvm/Support/JSON.h
+++ b/llvm/include/llvm/Support/JSON.h
@@ -74,6 +74,11 @@ namespace json {
// - When retrieving strings from Values (e.g. asString()), the result will
// always be valid UTF-8.
+template <typename T>
+constexpr bool is_uint_64_bit_v =
+ std::is_integral_v<T> && std::is_unsigned_v<T> &&
+ sizeof(T) == sizeof(uint64_t);
+
/// Returns true if \p S is valid UTF-8, which is required for use as JSON.
/// If it returns false, \p Offset is set to a byte offset near the first error.
bool isUTF8(llvm::StringRef S, size_t *ErrOffset = nullptr);
@@ -329,40 +334,37 @@ public:
Value(std::nullptr_t) : Type(T_Null) {}
// Boolean (disallow implicit conversions).
// (The last template parameter is a dummy to keep templates distinct.)
- template <typename T,
- typename = std::enable_if_t<std::is_same<T, bool>::value>,
+ template <typename T, typename = std::enable_if_t<std::is_same_v<T, bool>>,
bool = false>
Value(T B) : Type(T_Boolean) {
create<bool>(B);
}
- // Unsigned 64-bit long integers.
- template <typename T,
- typename = std::enable_if_t<std::is_same<T, uint64_t>::value>,
- bool = false, bool = false>
+ // Unsigned 64-bit integers.
+ template <typename T, typename = std::enable_if_t<is_uint_64_bit_v<T>>>
Value(T V) : Type(T_UINT64) {
create<uint64_t>(uint64_t{V});
}
// Integers (except boolean and uint64_t).
// Must be non-narrowing convertible to int64_t.
- template <typename T, typename = std::enable_if_t<std::is_integral<T>::value>,
- typename = std::enable_if_t<!std::is_same<T, bool>::value>,
- typename = std::enable_if_t<!std::is_same<T, uint64_t>::value>>
+ template <typename T, typename = std::enable_if_t<std::is_integral_v<T>>,
+ typename = std::enable_if_t<!std::is_same_v<T, bool>>,
+ typename = std::enable_if_t<!is_uint_64_bit_v<T>>>
Value(T I) : Type(T_Integer) {
create<int64_t>(int64_t{I});
}
// Floating point. Must be non-narrowing convertible to double.
template <typename T,
- typename = std::enable_if_t<std::is_floating_point<T>::value>,
+ typename = std::enable_if_t<std::is_floating_point_v<T>>,
double * = nullptr>
Value(T D) : Type(T_Double) {
create<double>(double{D});
}
// Serializable types: with a toJSON(const T&)->Value function, found by ADL.
template <typename T,
- typename = std::enable_if_t<std::is_same<
- Value, decltype(toJSON(*(const T *)nullptr))>::value>,
+ typename = std::enable_if_t<
+ std::is_same_v<Value, decltype(toJSON(*(const T *)nullptr))>>,
Value * = nullptr>
Value(const T &V) : Value(toJSON(V)) {}
@@ -424,6 +426,12 @@ public:
std::optional<int64_t> getAsInteger() const {
if (LLVM_LIKELY(Type == T_Integer))
return as<int64_t>();
+ if (LLVM_LIKELY(Type == T_UINT64)) {
+ uint64_t U = as<uint64_t>();
+ if (LLVM_LIKELY(U <= uint64_t(std::numeric_limits<int64_t>::max()))) {
+ return U;
+ }
+ }
if (LLVM_LIKELY(Type == T_Double)) {
double D = as<double>();
if (LLVM_LIKELY(std::modf(D, &D) == 0.0 &&
@@ -767,7 +775,7 @@ bool fromJSON(const Value &E, std::optional<T> &Out, Path P) {
Out = std::nullopt;
return true;
}
- T Result;
+ T Result = {};
if (!fromJSON(E, Result, P))
return false;
Out = std::move(Result);
diff --git a/llvm/include/llvm/Support/KnownBits.h b/llvm/include/llvm/Support/KnownBits.h
index 0fb056b25417..8462aa11202d 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -49,7 +49,7 @@ public:
/// Returns true if we know the value of all bits.
bool isConstant() const {
assert(!hasConflict() && "KnownBits conflict!");
- return Zero.countPopulation() + One.countPopulation() == getBitWidth();
+ return Zero.popcount() + One.popcount() == getBitWidth();
}
/// Returns the value when all bits have a known value. This just returns One
@@ -230,24 +230,16 @@ public:
KnownBits makeGE(const APInt &Val) const;
/// Returns the minimum number of trailing zero bits.
- unsigned countMinTrailingZeros() const {
- return Zero.countTrailingOnes();
- }
+ unsigned countMinTrailingZeros() const { return Zero.countr_one(); }
/// Returns the minimum number of trailing one bits.
- unsigned countMinTrailingOnes() const {
- return One.countTrailingOnes();
- }
+ unsigned countMinTrailingOnes() const { return One.countr_one(); }
/// Returns the minimum number of leading zero bits.
- unsigned countMinLeadingZeros() const {
- return Zero.countLeadingOnes();
- }
+ unsigned countMinLeadingZeros() const { return Zero.countl_one(); }
/// Returns the minimum number of leading one bits.
- unsigned countMinLeadingOnes() const {
- return One.countLeadingOnes();
- }
+ unsigned countMinLeadingOnes() const { return One.countl_one(); }
/// Returns the number of times the sign bit is replicated into the other
/// bits.
@@ -270,33 +262,23 @@ public:
}
/// Returns the maximum number of trailing zero bits possible.
- unsigned countMaxTrailingZeros() const {
- return One.countTrailingZeros();
- }
+ unsigned countMaxTrailingZeros() const { return One.countr_zero(); }
/// Returns the maximum number of trailing one bits possible.
- unsigned countMaxTrailingOnes() const {
- return Zero.countTrailingZeros();
- }
+ unsigned countMaxTrailingOnes() const { return Zero.countr_zero(); }
/// Returns the maximum number of leading zero bits possible.
- unsigned countMaxLeadingZeros() const {
- return One.countLeadingZeros();
- }
+ unsigned countMaxLeadingZeros() const { return One.countl_zero(); }
/// Returns the maximum number of leading one bits possible.
- unsigned countMaxLeadingOnes() const {
- return Zero.countLeadingZeros();
- }
+ unsigned countMaxLeadingOnes() const { return Zero.countl_zero(); }
/// Returns the number of bits known to be one.
- unsigned countMinPopulation() const {
- return One.countPopulation();
- }
+ unsigned countMinPopulation() const { return One.popcount(); }
/// Returns the maximum number of bits that could be one.
unsigned countMaxPopulation() const {
- return getBitWidth() - Zero.countPopulation();
+ return getBitWidth() - Zero.popcount();
}
/// Returns the maximum number of bits needed to represent all possible
@@ -311,9 +293,30 @@ public:
return KnownBits(~C, C);
}
+ /// Returns KnownBits information that is known to be true for both this and
+ /// RHS.
+ ///
+ /// When an operation is known to return one of its operands, this can be used
+ /// to combine information about the known bits of the operands to get the
+ /// information that must be true about the result.
+ KnownBits intersectWith(const KnownBits &RHS) const {
+ return KnownBits(Zero & RHS.Zero, One & RHS.One);
+ }
+
+ /// Returns KnownBits information that is known to be true for either this or
+ /// RHS or both.
+ ///
+ /// This can be used to combine different sources of information about the
+ /// known bits of a single value, e.g. information about the low bits and the
+ /// high bits of the result of a multiplication.
+ KnownBits unionWith(const KnownBits &RHS) const {
+ return KnownBits(Zero | RHS.Zero, One | RHS.One);
+ }
+
/// Compute known bits common to LHS and RHS.
+ LLVM_DEPRECATED("use intersectWith instead", "intersectWith")
static KnownBits commonBits(const KnownBits &LHS, const KnownBits &RHS) {
- return KnownBits(LHS.Zero & RHS.Zero, LHS.One & RHS.One);
+ return LHS.intersectWith(RHS);
}
/// Return true if LHS and RHS have no common bits set.
@@ -329,6 +332,18 @@ public:
static KnownBits computeForAddSub(bool Add, bool NSW, const KnownBits &LHS,
KnownBits RHS);
+ /// Compute knownbits resulting from llvm.sadd.sat(LHS, RHS)
+ static KnownBits sadd_sat(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Compute knownbits resulting from llvm.uadd.sat(LHS, RHS)
+ static KnownBits uadd_sat(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Compute knownbits resulting from llvm.ssub.sat(LHS, RHS)
+ static KnownBits ssub_sat(const KnownBits &LHS, const KnownBits &RHS);
+
+ /// Compute knownbits resulting from llvm.usub.sat(LHS, RHS)
+ static KnownBits usub_sat(const KnownBits &LHS, const KnownBits &RHS);
+
/// Compute known bits resulting from multiplying LHS and RHS.
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS,
bool NoUndefSelfMultiply = false);
@@ -339,8 +354,13 @@ public:
/// Compute known bits from zero-extended multiply-hi.
static KnownBits mulhu(const KnownBits &LHS, const KnownBits &RHS);
+ /// Compute known bits for sdiv(LHS, RHS).
+ static KnownBits sdiv(const KnownBits &LHS, const KnownBits &RHS,
+ bool Exact = false);
+
/// Compute known bits for udiv(LHS, RHS).
- static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS);
+ static KnownBits udiv(const KnownBits &LHS, const KnownBits &RHS,
+ bool Exact = false);
/// Compute known bits for urem(LHS, RHS).
static KnownBits urem(const KnownBits &LHS, const KnownBits &RHS);
@@ -362,15 +382,19 @@ public:
/// Compute known bits for shl(LHS, RHS).
/// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
- static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS);
+ static KnownBits shl(const KnownBits &LHS, const KnownBits &RHS,
+ bool NUW = false, bool NSW = false,
+ bool ShAmtNonZero = false);
/// Compute known bits for lshr(LHS, RHS).
/// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
- static KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS);
+ static KnownBits lshr(const KnownBits &LHS, const KnownBits &RHS,
+ bool ShAmtNonZero = false);
/// Compute known bits for ashr(LHS, RHS).
/// NOTE: RHS (shift amount) bitwidth doesn't need to be the same as LHS.
- static KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS);
+ static KnownBits ashr(const KnownBits &LHS, const KnownBits &RHS,
+ bool ShAmtNonZero = false);
/// Determine if these known bits always give the same ICMP_EQ result.
static std::optional<bool> eq(const KnownBits &LHS, const KnownBits &RHS);
@@ -422,6 +446,14 @@ public:
return KnownBits(Zero.reverseBits(), One.reverseBits());
}
+ /// Compute known bits for X & -X, which has only the lowest bit set of X set.
+ /// The name comes from the X86 BMI instruction
+ KnownBits blsi() const;
+
+ /// Compute known bits for X ^ (X - 1), which has all bits up to and including
+ /// the lowest set bit of X set. The name comes from the X86 BMI instruction.
+ KnownBits blsmsk() const;
+
bool operator==(const KnownBits &Other) const {
return Zero == Other.Zero && One == Other.One;
}
@@ -430,6 +462,11 @@ public:
void print(raw_ostream &OS) const;
void dump() const;
+
+private:
+ // Internal helper for getting the initial KnownBits for an `srem` or `urem`
+ // operation with the low-bits set.
+ static KnownBits remGetLowBits(const KnownBits &LHS, const KnownBits &RHS);
};
inline KnownBits operator&(KnownBits LHS, const KnownBits &RHS) {
@@ -462,6 +499,11 @@ inline KnownBits operator^(const KnownBits &LHS, KnownBits &&RHS) {
return std::move(RHS);
}
+inline raw_ostream &operator<<(raw_ostream &OS, const KnownBits &Known) {
+ Known.print(OS);
+ return OS;
+}
+
} // end namespace llvm
#endif
diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 9c419c82a19e..a5d367279aef 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -191,7 +191,7 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
} while (Byte >= 128);
// Sign extend negative numbers if needed.
if (Shift < 64 && (Byte & 0x40))
- Value |= (-1ULL) << Shift;
+ Value |= UINT64_MAX << Shift;
if (n)
*n = (unsigned)(p - orig_p);
return Value;
diff --git a/llvm/include/llvm/Support/LLVMDriver.h b/llvm/include/llvm/Support/LLVMDriver.h
new file mode 100644
index 000000000000..1c68f5070777
--- /dev/null
+++ b/llvm/include/llvm/Support/LLVMDriver.h
@@ -0,0 +1,29 @@
+//===- LLVMDriver.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_LLVMDRIVER_H
+#define LLVM_SUPPORT_LLVMDRIVER_H
+
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+struct ToolContext {
+ const char *Path;
+ const char *PrependArg;
+ // PrependArg will be added unconditionally by the llvm-driver, but
+ // NeedsPrependArg will be false if Path is adequate to reinvoke the tool.
+ // This is useful if realpath is ever called on Path, in which case it will
+ // point to the llvm-driver executable, where PrependArg will be needed to
+ // invoke the correct tool.
+ bool NeedsPrependArg;
+};
+
+} // namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/Support/LoongArchTargetParser.h b/llvm/include/llvm/Support/LoongArchTargetParser.h
deleted file mode 100644
index 4e735c893f82..000000000000
--- a/llvm/include/llvm/Support/LoongArchTargetParser.h
+++ /dev/null
@@ -1,15 +0,0 @@
-//===-- llvm/Support/LoongArchTargetParser.h --------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This header is deprecated in favour of
-/// `llvm/TargetParser/LoongArchTargetParser.h`.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/TargetParser/LoongArchTargetParser.h"
diff --git a/llvm/include/llvm/Support/LowLevelTypeImpl.h b/llvm/include/llvm/Support/LowLevelTypeImpl.h
deleted file mode 100644
index f33c1ecde039..000000000000
--- a/llvm/include/llvm/Support/LowLevelTypeImpl.h
+++ /dev/null
@@ -1,431 +0,0 @@
-//== llvm/Support/LowLevelTypeImpl.h --------------------------- -*- C++ -*-==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// Implement a low-level type suitable for MachineInstr level instruction
-/// selection.
-///
-/// For a type attached to a MachineInstr, we only care about 2 details: total
-/// size and the number of vector lanes (if any). Accordingly, there are 4
-/// possible valid type-kinds:
-///
-/// * `sN` for scalars and aggregates
-/// * `<N x sM>` for vectors, which must have at least 2 elements.
-/// * `pN` for pointers
-///
-/// Other information required for correct selection is expected to be carried
-/// by the opcode, or non-type flags. For example the distinction between G_ADD
-/// and G_FADD for int/float or fast-math flags.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_LOWLEVELTYPEIMPL_H
-#define LLVM_SUPPORT_LOWLEVELTYPEIMPL_H
-
-#include "llvm/ADT/DenseMapInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MachineValueType.h"
-#include <cassert>
-
-namespace llvm {
-
-class Type;
-class raw_ostream;
-
-class LLT {
-public:
- /// Get a low-level scalar or aggregate "bag of bits".
- static constexpr LLT scalar(unsigned SizeInBits) {
- return LLT{/*isPointer=*/false, /*isVector=*/false, /*isScalar=*/true,
- ElementCount::getFixed(0), SizeInBits,
- /*AddressSpace=*/0};
- }
-
- /// Get a low-level pointer in the given address space.
- static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits) {
- assert(SizeInBits > 0 && "invalid pointer size");
- return LLT{/*isPointer=*/true, /*isVector=*/false, /*isScalar=*/false,
- ElementCount::getFixed(0), SizeInBits, AddressSpace};
- }
-
- /// Get a low-level vector of some number of elements and element width.
- static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits) {
- assert(!EC.isScalar() && "invalid number of vector elements");
- return LLT{/*isPointer=*/false, /*isVector=*/true, /*isScalar=*/false,
- EC, ScalarSizeInBits, /*AddressSpace=*/0};
- }
-
- /// Get a low-level vector of some number of elements and element type.
- static constexpr LLT vector(ElementCount EC, LLT ScalarTy) {
- assert(!EC.isScalar() && "invalid number of vector elements");
- assert(!ScalarTy.isVector() && "invalid vector element type");
- return LLT{ScalarTy.isPointer(),
- /*isVector=*/true,
- /*isScalar=*/false,
- EC,
- ScalarTy.getSizeInBits().getFixedValue(),
- ScalarTy.isPointer() ? ScalarTy.getAddressSpace() : 0};
- }
-
- /// Get a low-level fixed-width vector of some number of elements and element
- /// width.
- static constexpr LLT fixed_vector(unsigned NumElements,
- unsigned ScalarSizeInBits) {
- return vector(ElementCount::getFixed(NumElements), ScalarSizeInBits);
- }
-
- /// Get a low-level fixed-width vector of some number of elements and element
- /// type.
- static constexpr LLT fixed_vector(unsigned NumElements, LLT ScalarTy) {
- return vector(ElementCount::getFixed(NumElements), ScalarTy);
- }
-
- /// Get a low-level scalable vector of some number of elements and element
- /// width.
- static constexpr LLT scalable_vector(unsigned MinNumElements,
- unsigned ScalarSizeInBits) {
- return vector(ElementCount::getScalable(MinNumElements), ScalarSizeInBits);
- }
-
- /// Get a low-level scalable vector of some number of elements and element
- /// type.
- static constexpr LLT scalable_vector(unsigned MinNumElements, LLT ScalarTy) {
- return vector(ElementCount::getScalable(MinNumElements), ScalarTy);
- }
-
- static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy) {
- return EC.isScalar() ? ScalarTy : LLT::vector(EC, ScalarTy);
- }
-
- static constexpr LLT scalarOrVector(ElementCount EC, uint64_t ScalarSize) {
- assert(ScalarSize <= std::numeric_limits<unsigned>::max() &&
- "Not enough bits in LLT to represent size");
- return scalarOrVector(EC, LLT::scalar(static_cast<unsigned>(ScalarSize)));
- }
-
- explicit constexpr LLT(bool isPointer, bool isVector, bool isScalar,
- ElementCount EC, uint64_t SizeInBits,
- unsigned AddressSpace)
- : LLT() {
- init(isPointer, isVector, isScalar, EC, SizeInBits, AddressSpace);
- }
- explicit constexpr LLT()
- : IsScalar(false), IsPointer(false), IsVector(false), RawData(0) {}
-
- explicit LLT(MVT VT);
-
- constexpr bool isValid() const { return IsScalar || RawData != 0; }
-
- constexpr bool isScalar() const { return IsScalar; }
-
- constexpr bool isPointer() const {
- return isValid() && IsPointer && !IsVector;
- }
-
- constexpr bool isVector() const { return isValid() && IsVector; }
-
- /// Returns the number of elements in a vector LLT. Must only be called on
- /// vector types.
- constexpr uint16_t getNumElements() const {
- if (isScalable())
- llvm::reportInvalidSizeRequest(
- "Possible incorrect use of LLT::getNumElements() for "
- "scalable vector. Scalable flag may be dropped, use "
- "LLT::getElementCount() instead");
- return getElementCount().getKnownMinValue();
- }
-
- /// Returns true if the LLT is a scalable vector. Must only be called on
- /// vector types.
- constexpr bool isScalable() const {
- assert(isVector() && "Expected a vector type");
- return IsPointer ? getFieldValue(PointerVectorScalableFieldInfo)
- : getFieldValue(VectorScalableFieldInfo);
- }
-
- constexpr ElementCount getElementCount() const {
- assert(IsVector && "cannot get number of elements on scalar/aggregate");
- return ElementCount::get(IsPointer
- ? getFieldValue(PointerVectorElementsFieldInfo)
- : getFieldValue(VectorElementsFieldInfo),
- isScalable());
- }
-
- /// Returns the total size of the type. Must only be called on sized types.
- constexpr TypeSize getSizeInBits() const {
- if (isPointer() || isScalar())
- return TypeSize::Fixed(getScalarSizeInBits());
- auto EC = getElementCount();
- return TypeSize(getScalarSizeInBits() * EC.getKnownMinValue(),
- EC.isScalable());
- }
-
- /// Returns the total size of the type in bytes, i.e. number of whole bytes
- /// needed to represent the size in bits. Must only be called on sized types.
- constexpr TypeSize getSizeInBytes() const {
- TypeSize BaseSize = getSizeInBits();
- return {(BaseSize.getKnownMinValue() + 7) / 8, BaseSize.isScalable()};
- }
-
- constexpr LLT getScalarType() const {
- return isVector() ? getElementType() : *this;
- }
-
- /// If this type is a vector, return a vector with the same number of elements
- /// but the new element type. Otherwise, return the new element type.
- constexpr LLT changeElementType(LLT NewEltTy) const {
- return isVector() ? LLT::vector(getElementCount(), NewEltTy) : NewEltTy;
- }
-
- /// If this type is a vector, return a vector with the same number of elements
- /// but the new element size. Otherwise, return the new element type. Invalid
- /// for pointer types. For pointer types, use changeElementType.
- constexpr LLT changeElementSize(unsigned NewEltSize) const {
- assert(!getScalarType().isPointer() &&
- "invalid to directly change element size for pointers");
- return isVector() ? LLT::vector(getElementCount(), NewEltSize)
- : LLT::scalar(NewEltSize);
- }
-
- /// Return a vector or scalar with the same element type and the new element
- /// count.
- constexpr LLT changeElementCount(ElementCount EC) const {
- return LLT::scalarOrVector(EC, getScalarType());
- }
-
- /// Return a type that is \p Factor times smaller. Reduces the number of
- /// elements if this is a vector, or the bitwidth for scalar/pointers. Does
- /// not attempt to handle cases that aren't evenly divisible.
- constexpr LLT divide(int Factor) const {
- assert(Factor != 1);
- assert((!isScalar() || getScalarSizeInBits() != 0) &&
- "cannot divide scalar of size zero");
- if (isVector()) {
- assert(getElementCount().isKnownMultipleOf(Factor));
- return scalarOrVector(getElementCount().divideCoefficientBy(Factor),
- getElementType());
- }
-
- assert(getScalarSizeInBits() % Factor == 0);
- return scalar(getScalarSizeInBits() / Factor);
- }
-
- /// Produce a vector type that is \p Factor times bigger, preserving the
- /// element type. For a scalar or pointer, this will produce a new vector with
- /// \p Factor elements.
- constexpr LLT multiplyElements(int Factor) const {
- if (isVector()) {
- return scalarOrVector(getElementCount().multiplyCoefficientBy(Factor),
- getElementType());
- }
-
- return fixed_vector(Factor, *this);
- }
-
- constexpr bool isByteSized() const {
- return getSizeInBits().isKnownMultipleOf(8);
- }
-
- constexpr unsigned getScalarSizeInBits() const {
- if (IsScalar)
- return getFieldValue(ScalarSizeFieldInfo);
- if (IsVector) {
- if (!IsPointer)
- return getFieldValue(VectorSizeFieldInfo);
- else
- return getFieldValue(PointerVectorSizeFieldInfo);
- } else if (IsPointer)
- return getFieldValue(PointerSizeFieldInfo);
- else
- llvm_unreachable("unexpected LLT");
- }
-
- constexpr unsigned getAddressSpace() const {
- assert(RawData != 0 && "Invalid Type");
- assert(IsPointer && "cannot get address space of non-pointer type");
- if (!IsVector)
- return getFieldValue(PointerAddressSpaceFieldInfo);
- else
- return getFieldValue(PointerVectorAddressSpaceFieldInfo);
- }
-
- /// Returns the vector's element type. Only valid for vector types.
- constexpr LLT getElementType() const {
- assert(isVector() && "cannot get element type of scalar/aggregate");
- if (IsPointer)
- return pointer(getAddressSpace(), getScalarSizeInBits());
- else
- return scalar(getScalarSizeInBits());
- }
-
- void print(raw_ostream &OS) const;
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- LLVM_DUMP_METHOD void dump() const {
- print(dbgs());
- dbgs() << '\n';
- }
-#endif
-
- constexpr bool operator==(const LLT &RHS) const {
- return IsPointer == RHS.IsPointer && IsVector == RHS.IsVector &&
- IsScalar == RHS.IsScalar && RHS.RawData == RawData;
- }
-
- constexpr bool operator!=(const LLT &RHS) const { return !(*this == RHS); }
-
- friend struct DenseMapInfo<LLT>;
- friend class GISelInstProfileBuilder;
-
-private:
- /// LLT is packed into 64 bits as follows:
- /// isScalar : 1
- /// isPointer : 1
- /// isVector : 1
- /// with 61 bits remaining for Kind-specific data, packed in bitfields
- /// as described below. As there isn't a simple portable way to pack bits
- /// into bitfields, here the different fields in the packed structure is
- /// described in static const *Field variables. Each of these variables
- /// is a 2-element array, with the first element describing the bitfield size
- /// and the second element describing the bitfield offset.
- typedef int BitFieldInfo[2];
- ///
- /// This is how the bitfields are packed per Kind:
- /// * Invalid:
- /// gets encoded as RawData == 0, as that is an invalid encoding, since for
- /// valid encodings, SizeInBits/SizeOfElement must be larger than 0.
- /// * Non-pointer scalar (isPointer == 0 && isVector == 0):
- /// SizeInBits: 32;
- static const constexpr BitFieldInfo ScalarSizeFieldInfo{32, 0};
- /// * Pointer (isPointer == 1 && isVector == 0):
- /// SizeInBits: 16;
- /// AddressSpace: 24;
- static const constexpr BitFieldInfo PointerSizeFieldInfo{16, 0};
- static const constexpr BitFieldInfo PointerAddressSpaceFieldInfo{
- 24, PointerSizeFieldInfo[0] + PointerSizeFieldInfo[1]};
- static_assert((PointerAddressSpaceFieldInfo[0] +
- PointerAddressSpaceFieldInfo[1]) <= 61,
- "Insufficient bits to encode all data");
- /// * Vector-of-non-pointer (isPointer == 0 && isVector == 1):
- /// NumElements: 16;
- /// SizeOfElement: 32;
- /// Scalable: 1;
- static const constexpr BitFieldInfo VectorElementsFieldInfo{16, 0};
- static const constexpr BitFieldInfo VectorSizeFieldInfo{
- 32, VectorElementsFieldInfo[0] + VectorElementsFieldInfo[1]};
- static const constexpr BitFieldInfo VectorScalableFieldInfo{
- 1, VectorSizeFieldInfo[0] + VectorSizeFieldInfo[1]};
- static_assert((VectorSizeFieldInfo[0] + VectorSizeFieldInfo[1]) <= 61,
- "Insufficient bits to encode all data");
- /// * Vector-of-pointer (isPointer == 1 && isVector == 1):
- /// NumElements: 16;
- /// SizeOfElement: 16;
- /// AddressSpace: 24;
- /// Scalable: 1;
- static const constexpr BitFieldInfo PointerVectorElementsFieldInfo{16, 0};
- static const constexpr BitFieldInfo PointerVectorSizeFieldInfo{
- 16,
- PointerVectorElementsFieldInfo[1] + PointerVectorElementsFieldInfo[0]};
- static const constexpr BitFieldInfo PointerVectorAddressSpaceFieldInfo{
- 24, PointerVectorSizeFieldInfo[1] + PointerVectorSizeFieldInfo[0]};
- static const constexpr BitFieldInfo PointerVectorScalableFieldInfo{
- 1, PointerVectorAddressSpaceFieldInfo[0] +
- PointerVectorAddressSpaceFieldInfo[1]};
- static_assert((PointerVectorAddressSpaceFieldInfo[0] +
- PointerVectorAddressSpaceFieldInfo[1]) <= 61,
- "Insufficient bits to encode all data");
-
- uint64_t IsScalar : 1;
- uint64_t IsPointer : 1;
- uint64_t IsVector : 1;
- uint64_t RawData : 61;
-
- static constexpr uint64_t getMask(const BitFieldInfo FieldInfo) {
- const int FieldSizeInBits = FieldInfo[0];
- return (((uint64_t)1) << FieldSizeInBits) - 1;
- }
- static constexpr uint64_t maskAndShift(uint64_t Val, uint64_t Mask,
- uint8_t Shift) {
- assert(Val <= Mask && "Value too large for field");
- return (Val & Mask) << Shift;
- }
- static constexpr uint64_t maskAndShift(uint64_t Val,
- const BitFieldInfo FieldInfo) {
- return maskAndShift(Val, getMask(FieldInfo), FieldInfo[1]);
- }
-
- constexpr uint64_t getFieldValue(const BitFieldInfo FieldInfo) const {
- return getMask(FieldInfo) & (RawData >> FieldInfo[1]);
- }
-
- constexpr void init(bool IsPointer, bool IsVector, bool IsScalar,
- ElementCount EC, uint64_t SizeInBits,
- unsigned AddressSpace) {
- assert(SizeInBits <= std::numeric_limits<unsigned>::max() &&
- "Not enough bits in LLT to represent size");
- this->IsPointer = IsPointer;
- this->IsVector = IsVector;
- this->IsScalar = IsScalar;
- if (IsScalar)
- RawData = maskAndShift(SizeInBits, ScalarSizeFieldInfo);
- else if (IsVector) {
- assert(EC.isVector() && "invalid number of vector elements");
- if (!IsPointer)
- RawData =
- maskAndShift(EC.getKnownMinValue(), VectorElementsFieldInfo) |
- maskAndShift(SizeInBits, VectorSizeFieldInfo) |
- maskAndShift(EC.isScalable() ? 1 : 0, VectorScalableFieldInfo);
- else
- RawData =
- maskAndShift(EC.getKnownMinValue(),
- PointerVectorElementsFieldInfo) |
- maskAndShift(SizeInBits, PointerVectorSizeFieldInfo) |
- maskAndShift(AddressSpace, PointerVectorAddressSpaceFieldInfo) |
- maskAndShift(EC.isScalable() ? 1 : 0,
- PointerVectorScalableFieldInfo);
- } else if (IsPointer)
- RawData = maskAndShift(SizeInBits, PointerSizeFieldInfo) |
- maskAndShift(AddressSpace, PointerAddressSpaceFieldInfo);
- else
- llvm_unreachable("unexpected LLT configuration");
- }
-
-public:
- constexpr uint64_t getUniqueRAWLLTData() const {
- return ((uint64_t)RawData) << 3 | ((uint64_t)IsScalar) << 2 |
- ((uint64_t)IsPointer) << 1 | ((uint64_t)IsVector);
- }
-};
-
-inline raw_ostream& operator<<(raw_ostream &OS, const LLT &Ty) {
- Ty.print(OS);
- return OS;
-}
-
-template<> struct DenseMapInfo<LLT> {
- static inline LLT getEmptyKey() {
- LLT Invalid;
- Invalid.IsPointer = true;
- return Invalid;
- }
- static inline LLT getTombstoneKey() {
- LLT Invalid;
- Invalid.IsVector = true;
- return Invalid;
- }
- static inline unsigned getHashValue(const LLT &Ty) {
- uint64_t Val = Ty.getUniqueRAWLLTData();
- return DenseMapInfo<uint64_t>::getHashValue(Val);
- }
- static bool isEqual(const LLT &LHS, const LLT &RHS) {
- return LHS == RHS;
- }
-};
-
-}
-
-#endif // LLVM_SUPPORT_LOWLEVELTYPEIMPL_H
diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h
deleted file mode 100644
index d7ad32737a45..000000000000
--- a/llvm/include/llvm/Support/MachineValueType.h
+++ /dev/null
@@ -1,1576 +0,0 @@
-//===- Support/MachineValueType.h - Machine-Level types ---------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the set of machine-level target independent types which
-// legal values in the code generator use.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_MACHINEVALUETYPE_H
-#define LLVM_SUPPORT_MACHINEVALUETYPE_H
-
-#include "llvm/ADT/Sequence.h"
-#include "llvm/ADT/iterator_range.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TypeSize.h"
-#include <cassert>
-
-namespace llvm {
-
- class Type;
-
- /// Machine Value Type. Every type that is supported natively by some
- /// processor targeted by LLVM occurs here. This means that any legal value
- /// type can be represented by an MVT.
- class MVT {
- public:
- enum SimpleValueType : uint8_t {
- // clang-format off
-
- // Simple value types that aren't explicitly part of this enumeration
- // are considered extended value types.
- INVALID_SIMPLE_VALUE_TYPE = 0,
-
- // If you change this numbering, you must change the values in
- // ValueTypes.td as well!
- Other = 1, // This is a non-standard value
- i1 = 2, // This is a 1 bit integer value
- i2 = 3, // This is a 2 bit integer value
- i4 = 4, // This is a 4 bit integer value
- i8 = 5, // This is an 8 bit integer value
- i16 = 6, // This is a 16 bit integer value
- i32 = 7, // This is a 32 bit integer value
- i64 = 8, // This is a 64 bit integer value
- i128 = 9, // This is a 128 bit integer value
-
- FIRST_INTEGER_VALUETYPE = i1,
- LAST_INTEGER_VALUETYPE = i128,
-
- bf16 = 10, // This is a 16 bit brain floating point value
- f16 = 11, // This is a 16 bit floating point value
- f32 = 12, // This is a 32 bit floating point value
- f64 = 13, // This is a 64 bit floating point value
- f80 = 14, // This is a 80 bit floating point value
- f128 = 15, // This is a 128 bit floating point value
- ppcf128 = 16, // This is a PPC 128-bit floating point value
-
- FIRST_FP_VALUETYPE = bf16,
- LAST_FP_VALUETYPE = ppcf128,
-
- v1i1 = 17, // 1 x i1
- v2i1 = 18, // 2 x i1
- v4i1 = 19, // 4 x i1
- v8i1 = 20, // 8 x i1
- v16i1 = 21, // 16 x i1
- v32i1 = 22, // 32 x i1
- v64i1 = 23, // 64 x i1
- v128i1 = 24, // 128 x i1
- v256i1 = 25, // 256 x i1
- v512i1 = 26, // 512 x i1
- v1024i1 = 27, // 1024 x i1
- v2048i1 = 28, // 2048 x i1
-
- v128i2 = 29, // 128 x i2
- v256i2 = 30, // 256 x i2
-
- v64i4 = 31, // 64 x i4
- v128i4 = 32, // 128 x i4
-
- v1i8 = 33, // 1 x i8
- v2i8 = 34, // 2 x i8
- v4i8 = 35, // 4 x i8
- v8i8 = 36, // 8 x i8
- v16i8 = 37, // 16 x i8
- v32i8 = 38, // 32 x i8
- v64i8 = 39, // 64 x i8
- v128i8 = 40, // 128 x i8
- v256i8 = 41, // 256 x i8
- v512i8 = 42, // 512 x i8
- v1024i8 = 43, // 1024 x i8
-
- v1i16 = 44, // 1 x i16
- v2i16 = 45, // 2 x i16
- v3i16 = 46, // 3 x i16
- v4i16 = 47, // 4 x i16
- v8i16 = 48, // 8 x i16
- v16i16 = 49, // 16 x i16
- v32i16 = 50, // 32 x i16
- v64i16 = 51, // 64 x i16
- v128i16 = 52, // 128 x i16
- v256i16 = 53, // 256 x i16
- v512i16 = 54, // 512 x i16
-
- v1i32 = 55, // 1 x i32
- v2i32 = 56, // 2 x i32
- v3i32 = 57, // 3 x i32
- v4i32 = 58, // 4 x i32
- v5i32 = 59, // 5 x i32
- v6i32 = 60, // 6 x i32
- v7i32 = 61, // 7 x i32
- v8i32 = 62, // 8 x i32
- v9i32 = 63, // 9 x i32
- v10i32 = 64, // 10 x i32
- v11i32 = 65, // 11 x i32
- v12i32 = 66, // 12 x i32
- v16i32 = 67, // 16 x i32
- v32i32 = 68, // 32 x i32
- v64i32 = 69, // 64 x i32
- v128i32 = 70, // 128 x i32
- v256i32 = 71, // 256 x i32
- v512i32 = 72, // 512 x i32
- v1024i32 = 73, // 1024 x i32
- v2048i32 = 74, // 2048 x i32
-
- v1i64 = 75, // 1 x i64
- v2i64 = 76, // 2 x i64
- v3i64 = 77, // 3 x i64
- v4i64 = 78, // 4 x i64
- v8i64 = 79, // 8 x i64
- v16i64 = 80, // 16 x i64
- v32i64 = 81, // 32 x i64
- v64i64 = 82, // 64 x i64
- v128i64 = 83, // 128 x i64
- v256i64 = 84, // 256 x i64
-
- v1i128 = 85, // 1 x i128
-
- FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
- LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128,
-
- v1f16 = 86, // 1 x f16
- v2f16 = 87, // 2 x f16
- v3f16 = 88, // 3 x f16
- v4f16 = 89, // 4 x f16
- v8f16 = 90, // 8 x f16
- v16f16 = 91, // 16 x f16
- v32f16 = 92, // 32 x f16
- v64f16 = 93, // 64 x f16
- v128f16 = 94, // 128 x f16
- v256f16 = 95, // 256 x f16
- v512f16 = 96, // 512 x f16
-
- v2bf16 = 97, // 2 x bf16
- v3bf16 = 98, // 3 x bf16
- v4bf16 = 99, // 4 x bf16
- v8bf16 = 100, // 8 x bf16
- v16bf16 = 101, // 16 x bf16
- v32bf16 = 102, // 32 x bf16
- v64bf16 = 103, // 64 x bf16
- v128bf16 = 104, // 128 x bf16
-
- v1f32 = 105, // 1 x f32
- v2f32 = 106, // 2 x f32
- v3f32 = 107, // 3 x f32
- v4f32 = 108, // 4 x f32
- v5f32 = 109, // 5 x f32
- v6f32 = 110, // 6 x f32
- v7f32 = 111, // 7 x f32
- v8f32 = 112, // 8 x f32
- v9f32 = 113, // 9 x f32
- v10f32 = 114, // 10 x f32
- v11f32 = 115, // 11 x f32
- v12f32 = 116, // 12 x f32
- v16f32 = 117, // 16 x f32
-
- v32f32 = 118, // 32 x f32
- v64f32 = 119, // 64 x f32
- v128f32 = 120, // 128 x f32
- v256f32 = 121, // 256 x f32
- v512f32 = 122, // 512 x f32
- v1024f32 = 123, // 1024 x f32
- v2048f32 = 124, // 2048 x f32
-
- v1f64 = 125, // 1 x f64
- v2f64 = 126, // 2 x f64
- v3f64 = 127, // 3 x f64
- v4f64 = 128, // 4 x f64
- v8f64 = 129, // 8 x f64
- v16f64 = 130, // 16 x f64
- v32f64 = 131, // 32 x f64
- v64f64 = 132, // 64 x f64
- v128f64 = 133, // 128 x f64
- v256f64 = 134, // 256 x f64
-
- FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v1f16,
- LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v256f64,
-
- FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
- LAST_FIXEDLEN_VECTOR_VALUETYPE = v256f64,
-
- nxv1i1 = 135, // n x 1 x i1
- nxv2i1 = 136, // n x 2 x i1
- nxv4i1 = 137, // n x 4 x i1
- nxv8i1 = 138, // n x 8 x i1
- nxv16i1 = 139, // n x 16 x i1
- nxv32i1 = 140, // n x 32 x i1
- nxv64i1 = 141, // n x 64 x i1
-
- nxv1i8 = 142, // n x 1 x i8
- nxv2i8 = 143, // n x 2 x i8
- nxv4i8 = 144, // n x 4 x i8
- nxv8i8 = 145, // n x 8 x i8
- nxv16i8 = 146, // n x 16 x i8
- nxv32i8 = 147, // n x 32 x i8
- nxv64i8 = 148, // n x 64 x i8
-
- nxv1i16 = 149, // n x 1 x i16
- nxv2i16 = 150, // n x 2 x i16
- nxv4i16 = 151, // n x 4 x i16
- nxv8i16 = 152, // n x 8 x i16
- nxv16i16 = 153, // n x 16 x i16
- nxv32i16 = 154, // n x 32 x i16
-
- nxv1i32 = 155, // n x 1 x i32
- nxv2i32 = 156, // n x 2 x i32
- nxv4i32 = 157, // n x 4 x i32
- nxv8i32 = 158, // n x 8 x i32
- nxv16i32 = 159, // n x 16 x i32
- nxv32i32 = 160, // n x 32 x i32
-
- nxv1i64 = 161, // n x 1 x i64
- nxv2i64 = 162, // n x 2 x i64
- nxv4i64 = 163, // n x 4 x i64
- nxv8i64 = 164, // n x 8 x i64
- nxv16i64 = 165, // n x 16 x i64
- nxv32i64 = 166, // n x 32 x i64
-
- FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1,
- LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64,
-
- nxv1f16 = 167, // n x 1 x f16
- nxv2f16 = 168, // n x 2 x f16
- nxv4f16 = 169, // n x 4 x f16
- nxv8f16 = 170, // n x 8 x f16
- nxv16f16 = 171, // n x 16 x f16
- nxv32f16 = 172, // n x 32 x f16
-
- nxv1bf16 = 173, // n x 1 x bf16
- nxv2bf16 = 174, // n x 2 x bf16
- nxv4bf16 = 175, // n x 4 x bf16
- nxv8bf16 = 176, // n x 8 x bf16
- nxv16bf16 = 177, // n x 16 x bf16
- nxv32bf16 = 178, // n x 32 x bf16
-
- nxv1f32 = 179, // n x 1 x f32
- nxv2f32 = 180, // n x 2 x f32
- nxv4f32 = 181, // n x 4 x f32
- nxv8f32 = 182, // n x 8 x f32
- nxv16f32 = 183, // n x 16 x f32
-
- nxv1f64 = 184, // n x 1 x f64
- nxv2f64 = 185, // n x 2 x f64
- nxv4f64 = 186, // n x 4 x f64
- nxv8f64 = 187, // n x 8 x f64
-
- FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv1f16,
- LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64,
-
- FIRST_SCALABLE_VECTOR_VALUETYPE = nxv1i1,
- LAST_SCALABLE_VECTOR_VALUETYPE = nxv8f64,
-
- FIRST_VECTOR_VALUETYPE = v1i1,
- LAST_VECTOR_VALUETYPE = nxv8f64,
-
- x86mmx = 188, // This is an X86 MMX value
-
- Glue = 189, // This glues nodes together during pre-RA sched
-
- isVoid = 190, // This has no value
-
- Untyped = 191, // This value takes a register, but has
- // unspecified type. The register class
- // will be determined by the opcode.
-
- funcref = 192, // WebAssembly's funcref type
- externref = 193, // WebAssembly's externref type
- x86amx = 194, // This is an X86 AMX value
- i64x8 = 195, // 8 Consecutive GPRs (AArch64)
-
- FIRST_VALUETYPE = 1, // This is always the beginning of the list.
- LAST_VALUETYPE = i64x8, // This always remains at the end of the list.
- VALUETYPE_SIZE = LAST_VALUETYPE + 1,
-
- // This is the current maximum for LAST_VALUETYPE.
- // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
- // This value must be a multiple of 32.
- MAX_ALLOWED_VALUETYPE = 224,
-
- // A value of type llvm::TokenTy
- token = 248,
-
- // This is MDNode or MDString.
- Metadata = 249,
-
- // An int value the size of the pointer of the current
- // target to any address space. This must only be used internal to
- // tblgen. Other than for overloading, we treat iPTRAny the same as iPTR.
- iPTRAny = 250,
-
- // A vector with any length and element size. This is used
- // for intrinsics that have overloadings based on vector types.
- // This is only for tblgen's consumption!
- vAny = 251,
-
- // Any floating-point or vector floating-point value. This is used
- // for intrinsics that have overloadings based on floating-point types.
- // This is only for tblgen's consumption!
- fAny = 252,
-
- // An integer or vector integer value of any bit width. This is
- // used for intrinsics that have overloadings based on integer bit widths.
- // This is only for tblgen's consumption!
- iAny = 253,
-
- // An int value the size of the pointer of the current
- // target. This should only be used internal to tblgen!
- iPTR = 254,
-
- // Any type. This is used for intrinsics that have overloadings.
- // This is only for tblgen's consumption!
- Any = 255
-
- // clang-format on
- };
-
- SimpleValueType SimpleTy = INVALID_SIMPLE_VALUE_TYPE;
-
- constexpr MVT() = default;
- constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {}
-
- bool operator>(const MVT& S) const { return SimpleTy > S.SimpleTy; }
- bool operator<(const MVT& S) const { return SimpleTy < S.SimpleTy; }
- bool operator==(const MVT& S) const { return SimpleTy == S.SimpleTy; }
- bool operator!=(const MVT& S) const { return SimpleTy != S.SimpleTy; }
- bool operator>=(const MVT& S) const { return SimpleTy >= S.SimpleTy; }
- bool operator<=(const MVT& S) const { return SimpleTy <= S.SimpleTy; }
-
- /// Return true if this is a valid simple valuetype.
- bool isValid() const {
- return (SimpleTy >= MVT::FIRST_VALUETYPE &&
- SimpleTy <= MVT::LAST_VALUETYPE);
- }
-
- /// Return true if this is a FP or a vector FP type.
- bool isFloatingPoint() const {
- return ((SimpleTy >= MVT::FIRST_FP_VALUETYPE &&
- SimpleTy <= MVT::LAST_FP_VALUETYPE) ||
- (SimpleTy >= MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE &&
- SimpleTy <= MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE) ||
- (SimpleTy >= MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE &&
- SimpleTy <= MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE));
- }
-
- /// Return true if this is an integer or a vector integer type.
- bool isInteger() const {
- return ((SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
- SimpleTy <= MVT::LAST_INTEGER_VALUETYPE) ||
- (SimpleTy >= MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE &&
- SimpleTy <= MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE) ||
- (SimpleTy >= MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE &&
- SimpleTy <= MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE));
- }
-
- /// Return true if this is an integer, not including vectors.
- bool isScalarInteger() const {
- return (SimpleTy >= MVT::FIRST_INTEGER_VALUETYPE &&
- SimpleTy <= MVT::LAST_INTEGER_VALUETYPE);
- }
-
- /// Return true if this is a vector value type.
- bool isVector() const {
- return (SimpleTy >= MVT::FIRST_VECTOR_VALUETYPE &&
- SimpleTy <= MVT::LAST_VECTOR_VALUETYPE);
- }
-
- /// Return true if this is a vector value type where the
- /// runtime length is machine dependent
- bool isScalableVector() const {
- return (SimpleTy >= MVT::FIRST_SCALABLE_VECTOR_VALUETYPE &&
- SimpleTy <= MVT::LAST_SCALABLE_VECTOR_VALUETYPE);
- }
-
- bool isFixedLengthVector() const {
- return (SimpleTy >= MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE &&
- SimpleTy <= MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE);
- }
-
- /// Return true if this is a 16-bit vector type.
- bool is16BitVector() const {
- return (SimpleTy == MVT::v2i8 || SimpleTy == MVT::v1i16 ||
- SimpleTy == MVT::v16i1 || SimpleTy == MVT::v1f16);
- }
-
- /// Return true if this is a 32-bit vector type.
- bool is32BitVector() const {
- return (SimpleTy == MVT::v32i1 || SimpleTy == MVT::v4i8 ||
- SimpleTy == MVT::v2i16 || SimpleTy == MVT::v1i32 ||
- SimpleTy == MVT::v2f16 || SimpleTy == MVT::v2bf16 ||
- SimpleTy == MVT::v1f32);
- }
-
- /// Return true if this is a 64-bit vector type.
- bool is64BitVector() const {
- return (SimpleTy == MVT::v64i1 || SimpleTy == MVT::v8i8 ||
- SimpleTy == MVT::v4i16 || SimpleTy == MVT::v2i32 ||
- SimpleTy == MVT::v1i64 || SimpleTy == MVT::v4f16 ||
- SimpleTy == MVT::v4bf16 ||SimpleTy == MVT::v2f32 ||
- SimpleTy == MVT::v1f64);
- }
-
- /// Return true if this is a 128-bit vector type.
- bool is128BitVector() const {
- return (SimpleTy == MVT::v128i1 || SimpleTy == MVT::v16i8 ||
- SimpleTy == MVT::v8i16 || SimpleTy == MVT::v4i32 ||
- SimpleTy == MVT::v2i64 || SimpleTy == MVT::v1i128 ||
- SimpleTy == MVT::v8f16 || SimpleTy == MVT::v8bf16 ||
- SimpleTy == MVT::v4f32 || SimpleTy == MVT::v2f64);
- }
-
- /// Return true if this is a 256-bit vector type.
- bool is256BitVector() const {
- return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v16bf16 ||
- SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 ||
- SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 ||
- SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64 ||
- SimpleTy == MVT::v256i1 || SimpleTy == MVT::v128i2 ||
- SimpleTy == MVT::v64i4);
- }
-
- /// Return true if this is a 512-bit vector type.
- bool is512BitVector() const {
- return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v32bf16 ||
- SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64 ||
- SimpleTy == MVT::v512i1 || SimpleTy == MVT::v256i2 ||
- SimpleTy == MVT::v128i4 || SimpleTy == MVT::v64i8 ||
- SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 ||
- SimpleTy == MVT::v8i64);
- }
-
- /// Return true if this is a 1024-bit vector type.
- bool is1024BitVector() const {
- return (SimpleTy == MVT::v1024i1 || SimpleTy == MVT::v128i8 ||
- SimpleTy == MVT::v64i16 || SimpleTy == MVT::v32i32 ||
- SimpleTy == MVT::v16i64 || SimpleTy == MVT::v64f16 ||
- SimpleTy == MVT::v32f32 || SimpleTy == MVT::v16f64 ||
- SimpleTy == MVT::v64bf16);
- }
-
- /// Return true if this is a 2048-bit vector type.
- bool is2048BitVector() const {
- return (SimpleTy == MVT::v256i8 || SimpleTy == MVT::v128i16 ||
- SimpleTy == MVT::v64i32 || SimpleTy == MVT::v32i64 ||
- SimpleTy == MVT::v128f16 || SimpleTy == MVT::v64f32 ||
- SimpleTy == MVT::v32f64 || SimpleTy == MVT::v128bf16 ||
- SimpleTy == MVT::v2048i1);
- }
-
- /// Return true if this is an overloaded type for TableGen.
- bool isOverloaded() const {
- return (SimpleTy == MVT::Any || SimpleTy == MVT::iAny ||
- SimpleTy == MVT::fAny || SimpleTy == MVT::vAny ||
- SimpleTy == MVT::iPTRAny);
- }
-
- /// Return a vector with the same number of elements as this vector, but
- /// with the element type converted to an integer type with the same
- /// bitwidth.
- MVT changeVectorElementTypeToInteger() const {
- MVT EltTy = getVectorElementType();
- MVT IntTy = MVT::getIntegerVT(EltTy.getSizeInBits());
- MVT VecTy = MVT::getVectorVT(IntTy, getVectorElementCount());
- assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE &&
- "Simple vector VT not representable by simple integer vector VT!");
- return VecTy;
- }
-
- /// Return a VT for a vector type whose attributes match ourselves
- /// with the exception of the element type that is chosen by the caller.
- MVT changeVectorElementType(MVT EltVT) const {
- MVT VecTy = MVT::getVectorVT(EltVT, getVectorElementCount());
- assert(VecTy.SimpleTy != MVT::INVALID_SIMPLE_VALUE_TYPE &&
- "Simple vector VT not representable by simple integer vector VT!");
- return VecTy;
- }
-
- /// Return the type converted to an equivalently sized integer or vector
- /// with integer element type. Similar to changeVectorElementTypeToInteger,
- /// but also handles scalars.
- MVT changeTypeToInteger() {
- if (isVector())
- return changeVectorElementTypeToInteger();
- return MVT::getIntegerVT(getSizeInBits());
- }
-
- /// Return a VT for a vector type with the same element type but
- /// half the number of elements.
- MVT getHalfNumVectorElementsVT() const {
- MVT EltVT = getVectorElementType();
- auto EltCnt = getVectorElementCount();
- assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
- return getVectorVT(EltVT, EltCnt.divideCoefficientBy(2));
- }
-
- /// Returns true if the given vector is a power of 2.
- bool isPow2VectorType() const {
- unsigned NElts = getVectorMinNumElements();
- return !(NElts & (NElts - 1));
- }
-
- /// Widens the length of the given vector MVT up to the nearest power of 2
- /// and returns that type.
- MVT getPow2VectorType() const {
- if (isPow2VectorType())
- return *this;
-
- ElementCount NElts = getVectorElementCount();
- unsigned NewMinCount = 1 << Log2_32_Ceil(NElts.getKnownMinValue());
- NElts = ElementCount::get(NewMinCount, NElts.isScalable());
- return MVT::getVectorVT(getVectorElementType(), NElts);
- }
-
- /// If this is a vector, return the element type, otherwise return this.
- MVT getScalarType() const {
- return isVector() ? getVectorElementType() : *this;
- }
-
- MVT getVectorElementType() const {
- // clang-format off
- switch (SimpleTy) {
- default:
- llvm_unreachable("Not a vector MVT!");
- case v1i1:
- case v2i1:
- case v4i1:
- case v8i1:
- case v16i1:
- case v32i1:
- case v64i1:
- case v128i1:
- case v256i1:
- case v512i1:
- case v1024i1:
- case v2048i1:
- case nxv1i1:
- case nxv2i1:
- case nxv4i1:
- case nxv8i1:
- case nxv16i1:
- case nxv32i1:
- case nxv64i1: return i1;
- case v128i2:
- case v256i2: return i2;
- case v64i4:
- case v128i4: return i4;
- case v1i8:
- case v2i8:
- case v4i8:
- case v8i8:
- case v16i8:
- case v32i8:
- case v64i8:
- case v128i8:
- case v256i8:
- case v512i8:
- case v1024i8:
- case nxv1i8:
- case nxv2i8:
- case nxv4i8:
- case nxv8i8:
- case nxv16i8:
- case nxv32i8:
- case nxv64i8: return i8;
- case v1i16:
- case v2i16:
- case v3i16:
- case v4i16:
- case v8i16:
- case v16i16:
- case v32i16:
- case v64i16:
- case v128i16:
- case v256i16:
- case v512i16:
- case nxv1i16:
- case nxv2i16:
- case nxv4i16:
- case nxv8i16:
- case nxv16i16:
- case nxv32i16: return i16;
- case v1i32:
- case v2i32:
- case v3i32:
- case v4i32:
- case v5i32:
- case v6i32:
- case v7i32:
- case v8i32:
- case v9i32:
- case v10i32:
- case v11i32:
- case v12i32:
- case v16i32:
- case v32i32:
- case v64i32:
- case v128i32:
- case v256i32:
- case v512i32:
- case v1024i32:
- case v2048i32:
- case nxv1i32:
- case nxv2i32:
- case nxv4i32:
- case nxv8i32:
- case nxv16i32:
- case nxv32i32: return i32;
- case v1i64:
- case v2i64:
- case v3i64:
- case v4i64:
- case v8i64:
- case v16i64:
- case v32i64:
- case v64i64:
- case v128i64:
- case v256i64:
- case nxv1i64:
- case nxv2i64:
- case nxv4i64:
- case nxv8i64:
- case nxv16i64:
- case nxv32i64: return i64;
- case v1i128: return i128;
- case v1f16:
- case v2f16:
- case v3f16:
- case v4f16:
- case v8f16:
- case v16f16:
- case v32f16:
- case v64f16:
- case v128f16:
- case v256f16:
- case v512f16:
- case nxv1f16:
- case nxv2f16:
- case nxv4f16:
- case nxv8f16:
- case nxv16f16:
- case nxv32f16: return f16;
- case v2bf16:
- case v3bf16:
- case v4bf16:
- case v8bf16:
- case v16bf16:
- case v32bf16:
- case v64bf16:
- case v128bf16:
- case nxv1bf16:
- case nxv2bf16:
- case nxv4bf16:
- case nxv8bf16:
- case nxv16bf16:
- case nxv32bf16: return bf16;
- case v1f32:
- case v2f32:
- case v3f32:
- case v4f32:
- case v5f32:
- case v6f32:
- case v7f32:
- case v8f32:
- case v9f32:
- case v10f32:
- case v11f32:
- case v12f32:
- case v16f32:
- case v32f32:
- case v64f32:
- case v128f32:
- case v256f32:
- case v512f32:
- case v1024f32:
- case v2048f32:
- case nxv1f32:
- case nxv2f32:
- case nxv4f32:
- case nxv8f32:
- case nxv16f32: return f32;
- case v1f64:
- case v2f64:
- case v3f64:
- case v4f64:
- case v8f64:
- case v16f64:
- case v32f64:
- case v64f64:
- case v128f64:
- case v256f64:
- case nxv1f64:
- case nxv2f64:
- case nxv4f64:
- case nxv8f64: return f64;
- }
- // clang-format on
- }
-
- /// Given a vector type, return the minimum number of elements it contains.
- unsigned getVectorMinNumElements() const {
- switch (SimpleTy) {
- default:
- llvm_unreachable("Not a vector MVT!");
- case v2048i1:
- case v2048i32:
- case v2048f32: return 2048;
- case v1024i1:
- case v1024i8:
- case v1024i32:
- case v1024f32: return 1024;
- case v512i1:
- case v512i8:
- case v512i16:
- case v512i32:
- case v512f16:
- case v512f32: return 512;
- case v256i1:
- case v256i2:
- case v256i8:
- case v256i16:
- case v256f16:
- case v256i32:
- case v256i64:
- case v256f32:
- case v256f64: return 256;
- case v128i1:
- case v128i2:
- case v128i4:
- case v128i8:
- case v128i16:
- case v128i32:
- case v128i64:
- case v128f16:
- case v128bf16:
- case v128f32:
- case v128f64: return 128;
- case v64i1:
- case v64i4:
- case v64i8:
- case v64i16:
- case v64i32:
- case v64i64:
- case v64f16:
- case v64bf16:
- case v64f32:
- case v64f64:
- case nxv64i1:
- case nxv64i8: return 64;
- case v32i1:
- case v32i8:
- case v32i16:
- case v32i32:
- case v32i64:
- case v32f16:
- case v32bf16:
- case v32f32:
- case v32f64:
- case nxv32i1:
- case nxv32i8:
- case nxv32i16:
- case nxv32i32:
- case nxv32i64:
- case nxv32f16:
- case nxv32bf16: return 32;
- case v16i1:
- case v16i8:
- case v16i16:
- case v16i32:
- case v16i64:
- case v16f16:
- case v16bf16:
- case v16f32:
- case v16f64:
- case nxv16i1:
- case nxv16i8:
- case nxv16i16:
- case nxv16i32:
- case nxv16i64:
- case nxv16f16:
- case nxv16bf16:
- case nxv16f32: return 16;
- case v12i32:
- case v12f32: return 12;
- case v11i32:
- case v11f32: return 11;
- case v10i32:
- case v10f32: return 10;
- case v9i32:
- case v9f32: return 9;
- case v8i1:
- case v8i8:
- case v8i16:
- case v8i32:
- case v8i64:
- case v8f16:
- case v8bf16:
- case v8f32:
- case v8f64:
- case nxv8i1:
- case nxv8i8:
- case nxv8i16:
- case nxv8i32:
- case nxv8i64:
- case nxv8f16:
- case nxv8bf16:
- case nxv8f32:
- case nxv8f64: return 8;
- case v7i32:
- case v7f32: return 7;
- case v6i32:
- case v6f32: return 6;
- case v5i32:
- case v5f32: return 5;
- case v4i1:
- case v4i8:
- case v4i16:
- case v4i32:
- case v4i64:
- case v4f16:
- case v4bf16:
- case v4f32:
- case v4f64:
- case nxv4i1:
- case nxv4i8:
- case nxv4i16:
- case nxv4i32:
- case nxv4i64:
- case nxv4f16:
- case nxv4bf16:
- case nxv4f32:
- case nxv4f64: return 4;
- case v3i16:
- case v3i32:
- case v3i64:
- case v3f16:
- case v3bf16:
- case v3f32:
- case v3f64: return 3;
- case v2i1:
- case v2i8:
- case v2i16:
- case v2i32:
- case v2i64:
- case v2f16:
- case v2bf16:
- case v2f32:
- case v2f64:
- case nxv2i1:
- case nxv2i8:
- case nxv2i16:
- case nxv2i32:
- case nxv2i64:
- case nxv2f16:
- case nxv2bf16:
- case nxv2f32:
- case nxv2f64: return 2;
- case v1i1:
- case v1i8:
- case v1i16:
- case v1i32:
- case v1i64:
- case v1i128:
- case v1f16:
- case v1f32:
- case v1f64:
- case nxv1i1:
- case nxv1i8:
- case nxv1i16:
- case nxv1i32:
- case nxv1i64:
- case nxv1f16:
- case nxv1bf16:
- case nxv1f32:
- case nxv1f64: return 1;
- }
- }
-
- ElementCount getVectorElementCount() const {
- return ElementCount::get(getVectorMinNumElements(), isScalableVector());
- }
-
- unsigned getVectorNumElements() const {
- if (isScalableVector())
- llvm::reportInvalidSizeRequest(
- "Possible incorrect use of MVT::getVectorNumElements() for "
- "scalable vector. Scalable flag may be dropped, use "
- "MVT::getVectorElementCount() instead");
- return getVectorMinNumElements();
- }
-
- /// Returns the size of the specified MVT in bits.
- ///
- /// If the value type is a scalable vector type, the scalable property will
- /// be set and the runtime size will be a positive integer multiple of the
- /// base size.
- TypeSize getSizeInBits() const {
- switch (SimpleTy) {
- default:
- llvm_unreachable("getSizeInBits called on extended MVT.");
- case Other:
- llvm_unreachable("Value type is non-standard value, Other.");
- case iPTR:
- llvm_unreachable("Value type size is target-dependent. Ask TLI.");
- case iPTRAny:
- case iAny:
- case fAny:
- case vAny:
- case Any:
- llvm_unreachable("Value type is overloaded.");
- case token:
- llvm_unreachable("Token type is a sentinel that cannot be used "
- "in codegen and has no size");
- case Metadata:
- llvm_unreachable("Value type is metadata.");
- case i1:
- case v1i1: return TypeSize::Fixed(1);
- case nxv1i1: return TypeSize::Scalable(1);
- case i2:
- case v2i1: return TypeSize::Fixed(2);
- case nxv2i1: return TypeSize::Scalable(2);
- case i4:
- case v4i1: return TypeSize::Fixed(4);
- case nxv4i1: return TypeSize::Scalable(4);
- case i8 :
- case v1i8:
- case v8i1: return TypeSize::Fixed(8);
- case nxv1i8:
- case nxv8i1: return TypeSize::Scalable(8);
- case i16 :
- case f16:
- case bf16:
- case v16i1:
- case v2i8:
- case v1i16:
- case v1f16: return TypeSize::Fixed(16);
- case nxv16i1:
- case nxv2i8:
- case nxv1i16:
- case nxv1bf16:
- case nxv1f16: return TypeSize::Scalable(16);
- case f32 :
- case i32 :
- case v32i1:
- case v4i8:
- case v2i16:
- case v2f16:
- case v2bf16:
- case v1f32:
- case v1i32: return TypeSize::Fixed(32);
- case nxv32i1:
- case nxv4i8:
- case nxv2i16:
- case nxv1i32:
- case nxv2f16:
- case nxv2bf16:
- case nxv1f32: return TypeSize::Scalable(32);
- case v3i16:
- case v3f16:
- case v3bf16: return TypeSize::Fixed(48);
- case x86mmx:
- case f64 :
- case i64 :
- case v64i1:
- case v8i8:
- case v4i16:
- case v2i32:
- case v1i64:
- case v4f16:
- case v4bf16:
- case v2f32:
- case v1f64: return TypeSize::Fixed(64);
- case nxv64i1:
- case nxv8i8:
- case nxv4i16:
- case nxv2i32:
- case nxv1i64:
- case nxv4f16:
- case nxv4bf16:
- case nxv2f32:
- case nxv1f64: return TypeSize::Scalable(64);
- case f80 : return TypeSize::Fixed(80);
- case v3i32:
- case v3f32: return TypeSize::Fixed(96);
- case f128:
- case ppcf128:
- case i128:
- case v128i1:
- case v16i8:
- case v8i16:
- case v4i32:
- case v2i64:
- case v1i128:
- case v8f16:
- case v8bf16:
- case v4f32:
- case v2f64: return TypeSize::Fixed(128);
- case nxv16i8:
- case nxv8i16:
- case nxv4i32:
- case nxv2i64:
- case nxv8f16:
- case nxv8bf16:
- case nxv4f32:
- case nxv2f64: return TypeSize::Scalable(128);
- case v5i32:
- case v5f32: return TypeSize::Fixed(160);
- case v6i32:
- case v3i64:
- case v6f32:
- case v3f64: return TypeSize::Fixed(192);
- case v7i32:
- case v7f32: return TypeSize::Fixed(224);
- case v256i1:
- case v128i2:
- case v64i4:
- case v32i8:
- case v16i16:
- case v8i32:
- case v4i64:
- case v16f16:
- case v16bf16:
- case v8f32:
- case v4f64: return TypeSize::Fixed(256);
- case nxv32i8:
- case nxv16i16:
- case nxv8i32:
- case nxv4i64:
- case nxv16f16:
- case nxv16bf16:
- case nxv8f32:
- case nxv4f64: return TypeSize::Scalable(256);
- case v9i32:
- case v9f32: return TypeSize::Fixed(288);
- case v10i32:
- case v10f32: return TypeSize::Fixed(320);
- case v11i32:
- case v11f32: return TypeSize::Fixed(352);
- case v12i32:
- case v12f32: return TypeSize::Fixed(384);
- case i64x8:
- case v512i1:
- case v256i2:
- case v128i4:
- case v64i8:
- case v32i16:
- case v16i32:
- case v8i64:
- case v32f16:
- case v32bf16:
- case v16f32:
- case v8f64: return TypeSize::Fixed(512);
- case nxv64i8:
- case nxv32i16:
- case nxv16i32:
- case nxv8i64:
- case nxv32f16:
- case nxv32bf16:
- case nxv16f32:
- case nxv8f64: return TypeSize::Scalable(512);
- case v1024i1:
- case v128i8:
- case v64i16:
- case v32i32:
- case v16i64:
- case v64f16:
- case v64bf16:
- case v32f32:
- case v16f64: return TypeSize::Fixed(1024);
- case nxv32i32:
- case nxv16i64: return TypeSize::Scalable(1024);
- case v2048i1:
- case v256i8:
- case v128i16:
- case v64i32:
- case v32i64:
- case v128f16:
- case v128bf16:
- case v64f32:
- case v32f64: return TypeSize::Fixed(2048);
- case nxv32i64: return TypeSize::Scalable(2048);
- case v512i8:
- case v256i16:
- case v128i32:
- case v64i64:
- case v256f16:
- case v128f32:
- case v64f64: return TypeSize::Fixed(4096);
- case v1024i8:
- case v512i16:
- case v256i32:
- case v128i64:
- case v512f16:
- case v256f32:
- case x86amx:
- case v128f64: return TypeSize::Fixed(8192);
- case v512i32:
- case v256i64:
- case v512f32:
- case v256f64: return TypeSize::Fixed(16384);
- case v1024i32:
- case v1024f32: return TypeSize::Fixed(32768);
- case v2048i32:
- case v2048f32: return TypeSize::Fixed(65536);
- case funcref:
- case externref: return TypeSize::Fixed(0); // opaque type
- }
- }
-
- /// Return the size of the specified fixed width value type in bits. The
- /// function will assert if the type is scalable.
- uint64_t getFixedSizeInBits() const {
- return getSizeInBits().getFixedValue();
- }
-
- uint64_t getScalarSizeInBits() const {
- return getScalarType().getSizeInBits().getFixedValue();
- }
-
- /// Return the number of bytes overwritten by a store of the specified value
- /// type.
- ///
- /// If the value type is a scalable vector type, the scalable property will
- /// be set and the runtime size will be a positive integer multiple of the
- /// base size.
- TypeSize getStoreSize() const {
- TypeSize BaseSize = getSizeInBits();
- return {(BaseSize.getKnownMinValue() + 7) / 8, BaseSize.isScalable()};
- }
-
- // Return the number of bytes overwritten by a store of this value type or
- // this value type's element type in the case of a vector.
- uint64_t getScalarStoreSize() const {
- return getScalarType().getStoreSize().getFixedValue();
- }
-
- /// Return the number of bits overwritten by a store of the specified value
- /// type.
- ///
- /// If the value type is a scalable vector type, the scalable property will
- /// be set and the runtime size will be a positive integer multiple of the
- /// base size.
- TypeSize getStoreSizeInBits() const {
- return getStoreSize() * 8;
- }
-
- /// Returns true if the number of bits for the type is a multiple of an
- /// 8-bit byte.
- bool isByteSized() const { return getSizeInBits().isKnownMultipleOf(8); }
-
- /// Return true if we know at compile time this has more bits than VT.
- bool knownBitsGT(MVT VT) const {
- return TypeSize::isKnownGT(getSizeInBits(), VT.getSizeInBits());
- }
-
- /// Return true if we know at compile time this has more than or the same
- /// bits as VT.
- bool knownBitsGE(MVT VT) const {
- return TypeSize::isKnownGE(getSizeInBits(), VT.getSizeInBits());
- }
-
- /// Return true if we know at compile time this has fewer bits than VT.
- bool knownBitsLT(MVT VT) const {
- return TypeSize::isKnownLT(getSizeInBits(), VT.getSizeInBits());
- }
-
- /// Return true if we know at compile time this has fewer than or the same
- /// bits as VT.
- bool knownBitsLE(MVT VT) const {
- return TypeSize::isKnownLE(getSizeInBits(), VT.getSizeInBits());
- }
-
- /// Return true if this has more bits than VT.
- bool bitsGT(MVT VT) const {
- assert(isScalableVector() == VT.isScalableVector() &&
- "Comparison between scalable and fixed types");
- return knownBitsGT(VT);
- }
-
- /// Return true if this has no less bits than VT.
- bool bitsGE(MVT VT) const {
- assert(isScalableVector() == VT.isScalableVector() &&
- "Comparison between scalable and fixed types");
- return knownBitsGE(VT);
- }
-
- /// Return true if this has less bits than VT.
- bool bitsLT(MVT VT) const {
- assert(isScalableVector() == VT.isScalableVector() &&
- "Comparison between scalable and fixed types");
- return knownBitsLT(VT);
- }
-
- /// Return true if this has no more bits than VT.
- bool bitsLE(MVT VT) const {
- assert(isScalableVector() == VT.isScalableVector() &&
- "Comparison between scalable and fixed types");
- return knownBitsLE(VT);
- }
-
- static MVT getFloatingPointVT(unsigned BitWidth) {
- switch (BitWidth) {
- default:
- llvm_unreachable("Bad bit width!");
- case 16:
- return MVT::f16;
- case 32:
- return MVT::f32;
- case 64:
- return MVT::f64;
- case 80:
- return MVT::f80;
- case 128:
- return MVT::f128;
- }
- }
-
- static MVT getIntegerVT(unsigned BitWidth) {
- switch (BitWidth) {
- default:
- return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
- case 1:
- return MVT::i1;
- case 2:
- return MVT::i2;
- case 4:
- return MVT::i4;
- case 8:
- return MVT::i8;
- case 16:
- return MVT::i16;
- case 32:
- return MVT::i32;
- case 64:
- return MVT::i64;
- case 128:
- return MVT::i128;
- }
- }
-
- static MVT getVectorVT(MVT VT, unsigned NumElements) {
- // clang-format off
- switch (VT.SimpleTy) {
- default:
- break;
- case MVT::i1:
- if (NumElements == 1) return MVT::v1i1;
- if (NumElements == 2) return MVT::v2i1;
- if (NumElements == 4) return MVT::v4i1;
- if (NumElements == 8) return MVT::v8i1;
- if (NumElements == 16) return MVT::v16i1;
- if (NumElements == 32) return MVT::v32i1;
- if (NumElements == 64) return MVT::v64i1;
- if (NumElements == 128) return MVT::v128i1;
- if (NumElements == 256) return MVT::v256i1;
- if (NumElements == 512) return MVT::v512i1;
- if (NumElements == 1024) return MVT::v1024i1;
- if (NumElements == 2048) return MVT::v2048i1;
- break;
- case MVT::i2:
- if (NumElements == 128) return MVT::v128i2;
- if (NumElements == 256) return MVT::v256i2;
- break;
- case MVT::i4:
- if (NumElements == 64) return MVT::v64i4;
- if (NumElements == 128) return MVT::v128i4;
- break;
- case MVT::i8:
- if (NumElements == 1) return MVT::v1i8;
- if (NumElements == 2) return MVT::v2i8;
- if (NumElements == 4) return MVT::v4i8;
- if (NumElements == 8) return MVT::v8i8;
- if (NumElements == 16) return MVT::v16i8;
- if (NumElements == 32) return MVT::v32i8;
- if (NumElements == 64) return MVT::v64i8;
- if (NumElements == 128) return MVT::v128i8;
- if (NumElements == 256) return MVT::v256i8;
- if (NumElements == 512) return MVT::v512i8;
- if (NumElements == 1024) return MVT::v1024i8;
- break;
- case MVT::i16:
- if (NumElements == 1) return MVT::v1i16;
- if (NumElements == 2) return MVT::v2i16;
- if (NumElements == 3) return MVT::v3i16;
- if (NumElements == 4) return MVT::v4i16;
- if (NumElements == 8) return MVT::v8i16;
- if (NumElements == 16) return MVT::v16i16;
- if (NumElements == 32) return MVT::v32i16;
- if (NumElements == 64) return MVT::v64i16;
- if (NumElements == 128) return MVT::v128i16;
- if (NumElements == 256) return MVT::v256i16;
- if (NumElements == 512) return MVT::v512i16;
- break;
- case MVT::i32:
- if (NumElements == 1) return MVT::v1i32;
- if (NumElements == 2) return MVT::v2i32;
- if (NumElements == 3) return MVT::v3i32;
- if (NumElements == 4) return MVT::v4i32;
- if (NumElements == 5) return MVT::v5i32;
- if (NumElements == 6) return MVT::v6i32;
- if (NumElements == 7) return MVT::v7i32;
- if (NumElements == 8) return MVT::v8i32;
- if (NumElements == 9) return MVT::v9i32;
- if (NumElements == 10) return MVT::v10i32;
- if (NumElements == 11) return MVT::v11i32;
- if (NumElements == 12) return MVT::v12i32;
- if (NumElements == 16) return MVT::v16i32;
- if (NumElements == 32) return MVT::v32i32;
- if (NumElements == 64) return MVT::v64i32;
- if (NumElements == 128) return MVT::v128i32;
- if (NumElements == 256) return MVT::v256i32;
- if (NumElements == 512) return MVT::v512i32;
- if (NumElements == 1024) return MVT::v1024i32;
- if (NumElements == 2048) return MVT::v2048i32;
- break;
- case MVT::i64:
- if (NumElements == 1) return MVT::v1i64;
- if (NumElements == 2) return MVT::v2i64;
- if (NumElements == 3) return MVT::v3i64;
- if (NumElements == 4) return MVT::v4i64;
- if (NumElements == 8) return MVT::v8i64;
- if (NumElements == 16) return MVT::v16i64;
- if (NumElements == 32) return MVT::v32i64;
- if (NumElements == 64) return MVT::v64i64;
- if (NumElements == 128) return MVT::v128i64;
- if (NumElements == 256) return MVT::v256i64;
- break;
- case MVT::i128:
- if (NumElements == 1) return MVT::v1i128;
- break;
- case MVT::f16:
- if (NumElements == 1) return MVT::v1f16;
- if (NumElements == 2) return MVT::v2f16;
- if (NumElements == 3) return MVT::v3f16;
- if (NumElements == 4) return MVT::v4f16;
- if (NumElements == 8) return MVT::v8f16;
- if (NumElements == 16) return MVT::v16f16;
- if (NumElements == 32) return MVT::v32f16;
- if (NumElements == 64) return MVT::v64f16;
- if (NumElements == 128) return MVT::v128f16;
- if (NumElements == 256) return MVT::v256f16;
- if (NumElements == 512) return MVT::v512f16;
- break;
- case MVT::bf16:
- if (NumElements == 2) return MVT::v2bf16;
- if (NumElements == 3) return MVT::v3bf16;
- if (NumElements == 4) return MVT::v4bf16;
- if (NumElements == 8) return MVT::v8bf16;
- if (NumElements == 16) return MVT::v16bf16;
- if (NumElements == 32) return MVT::v32bf16;
- if (NumElements == 64) return MVT::v64bf16;
- if (NumElements == 128) return MVT::v128bf16;
- break;
- case MVT::f32:
- if (NumElements == 1) return MVT::v1f32;
- if (NumElements == 2) return MVT::v2f32;
- if (NumElements == 3) return MVT::v3f32;
- if (NumElements == 4) return MVT::v4f32;
- if (NumElements == 5) return MVT::v5f32;
- if (NumElements == 6) return MVT::v6f32;
- if (NumElements == 7) return MVT::v7f32;
- if (NumElements == 8) return MVT::v8f32;
- if (NumElements == 9) return MVT::v9f32;
- if (NumElements == 10) return MVT::v10f32;
- if (NumElements == 11) return MVT::v11f32;
- if (NumElements == 12) return MVT::v12f32;
- if (NumElements == 16) return MVT::v16f32;
- if (NumElements == 32) return MVT::v32f32;
- if (NumElements == 64) return MVT::v64f32;
- if (NumElements == 128) return MVT::v128f32;
- if (NumElements == 256) return MVT::v256f32;
- if (NumElements == 512) return MVT::v512f32;
- if (NumElements == 1024) return MVT::v1024f32;
- if (NumElements == 2048) return MVT::v2048f32;
- break;
- case MVT::f64:
- if (NumElements == 1) return MVT::v1f64;
- if (NumElements == 2) return MVT::v2f64;
- if (NumElements == 3) return MVT::v3f64;
- if (NumElements == 4) return MVT::v4f64;
- if (NumElements == 8) return MVT::v8f64;
- if (NumElements == 16) return MVT::v16f64;
- if (NumElements == 32) return MVT::v32f64;
- if (NumElements == 64) return MVT::v64f64;
- if (NumElements == 128) return MVT::v128f64;
- if (NumElements == 256) return MVT::v256f64;
- break;
- }
- return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
- // clang-format on
- }
-
- static MVT getScalableVectorVT(MVT VT, unsigned NumElements) {
- switch(VT.SimpleTy) {
- default:
- break;
- case MVT::i1:
- if (NumElements == 1) return MVT::nxv1i1;
- if (NumElements == 2) return MVT::nxv2i1;
- if (NumElements == 4) return MVT::nxv4i1;
- if (NumElements == 8) return MVT::nxv8i1;
- if (NumElements == 16) return MVT::nxv16i1;
- if (NumElements == 32) return MVT::nxv32i1;
- if (NumElements == 64) return MVT::nxv64i1;
- break;
- case MVT::i8:
- if (NumElements == 1) return MVT::nxv1i8;
- if (NumElements == 2) return MVT::nxv2i8;
- if (NumElements == 4) return MVT::nxv4i8;
- if (NumElements == 8) return MVT::nxv8i8;
- if (NumElements == 16) return MVT::nxv16i8;
- if (NumElements == 32) return MVT::nxv32i8;
- if (NumElements == 64) return MVT::nxv64i8;
- break;
- case MVT::i16:
- if (NumElements == 1) return MVT::nxv1i16;
- if (NumElements == 2) return MVT::nxv2i16;
- if (NumElements == 4) return MVT::nxv4i16;
- if (NumElements == 8) return MVT::nxv8i16;
- if (NumElements == 16) return MVT::nxv16i16;
- if (NumElements == 32) return MVT::nxv32i16;
- break;
- case MVT::i32:
- if (NumElements == 1) return MVT::nxv1i32;
- if (NumElements == 2) return MVT::nxv2i32;
- if (NumElements == 4) return MVT::nxv4i32;
- if (NumElements == 8) return MVT::nxv8i32;
- if (NumElements == 16) return MVT::nxv16i32;
- if (NumElements == 32) return MVT::nxv32i32;
- break;
- case MVT::i64:
- if (NumElements == 1) return MVT::nxv1i64;
- if (NumElements == 2) return MVT::nxv2i64;
- if (NumElements == 4) return MVT::nxv4i64;
- if (NumElements == 8) return MVT::nxv8i64;
- if (NumElements == 16) return MVT::nxv16i64;
- if (NumElements == 32) return MVT::nxv32i64;
- break;
- case MVT::f16:
- if (NumElements == 1) return MVT::nxv1f16;
- if (NumElements == 2) return MVT::nxv2f16;
- if (NumElements == 4) return MVT::nxv4f16;
- if (NumElements == 8) return MVT::nxv8f16;
- if (NumElements == 16) return MVT::nxv16f16;
- if (NumElements == 32) return MVT::nxv32f16;
- break;
- case MVT::bf16:
- if (NumElements == 1) return MVT::nxv1bf16;
- if (NumElements == 2) return MVT::nxv2bf16;
- if (NumElements == 4) return MVT::nxv4bf16;
- if (NumElements == 8) return MVT::nxv8bf16;
- if (NumElements == 16) return MVT::nxv16bf16;
- if (NumElements == 32) return MVT::nxv32bf16;
- break;
- case MVT::f32:
- if (NumElements == 1) return MVT::nxv1f32;
- if (NumElements == 2) return MVT::nxv2f32;
- if (NumElements == 4) return MVT::nxv4f32;
- if (NumElements == 8) return MVT::nxv8f32;
- if (NumElements == 16) return MVT::nxv16f32;
- break;
- case MVT::f64:
- if (NumElements == 1) return MVT::nxv1f64;
- if (NumElements == 2) return MVT::nxv2f64;
- if (NumElements == 4) return MVT::nxv4f64;
- if (NumElements == 8) return MVT::nxv8f64;
- break;
- }
- return (MVT::SimpleValueType)(MVT::INVALID_SIMPLE_VALUE_TYPE);
- }
-
- static MVT getVectorVT(MVT VT, unsigned NumElements, bool IsScalable) {
- if (IsScalable)
- return getScalableVectorVT(VT, NumElements);
- return getVectorVT(VT, NumElements);
- }
-
- static MVT getVectorVT(MVT VT, ElementCount EC) {
- if (EC.isScalable())
- return getScalableVectorVT(VT, EC.getKnownMinValue());
- return getVectorVT(VT, EC.getKnownMinValue());
- }
-
- /// Return the value type corresponding to the specified type. This returns
- /// all pointers as iPTR. If HandleUnknown is true, unknown types are
- /// returned as Other, otherwise they are invalid.
- static MVT getVT(Type *Ty, bool HandleUnknown = false);
-
- public:
- /// SimpleValueType Iteration
- /// @{
- static auto all_valuetypes() {
- return enum_seq_inclusive(MVT::FIRST_VALUETYPE, MVT::LAST_VALUETYPE,
- force_iteration_on_noniterable_enum);
- }
-
- static auto integer_valuetypes() {
- return enum_seq_inclusive(MVT::FIRST_INTEGER_VALUETYPE,
- MVT::LAST_INTEGER_VALUETYPE,
- force_iteration_on_noniterable_enum);
- }
-
- static auto fp_valuetypes() {
- return enum_seq_inclusive(MVT::FIRST_FP_VALUETYPE, MVT::LAST_FP_VALUETYPE,
- force_iteration_on_noniterable_enum);
- }
-
- static auto vector_valuetypes() {
- return enum_seq_inclusive(MVT::FIRST_VECTOR_VALUETYPE,
- MVT::LAST_VECTOR_VALUETYPE,
- force_iteration_on_noniterable_enum);
- }
-
- static auto fixedlen_vector_valuetypes() {
- return enum_seq_inclusive(MVT::FIRST_FIXEDLEN_VECTOR_VALUETYPE,
- MVT::LAST_FIXEDLEN_VECTOR_VALUETYPE,
- force_iteration_on_noniterable_enum);
- }
-
- static auto scalable_vector_valuetypes() {
- return enum_seq_inclusive(MVT::FIRST_SCALABLE_VECTOR_VALUETYPE,
- MVT::LAST_SCALABLE_VECTOR_VALUETYPE,
- force_iteration_on_noniterable_enum);
- }
-
- static auto integer_fixedlen_vector_valuetypes() {
- return enum_seq_inclusive(MVT::FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
- MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE,
- force_iteration_on_noniterable_enum);
- }
-
- static auto fp_fixedlen_vector_valuetypes() {
- return enum_seq_inclusive(MVT::FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE,
- MVT::LAST_FP_FIXEDLEN_VECTOR_VALUETYPE,
- force_iteration_on_noniterable_enum);
- }
-
- static auto integer_scalable_vector_valuetypes() {
- return enum_seq_inclusive(MVT::FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
- MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE,
- force_iteration_on_noniterable_enum);
- }
-
- static auto fp_scalable_vector_valuetypes() {
- return enum_seq_inclusive(MVT::FIRST_FP_SCALABLE_VECTOR_VALUETYPE,
- MVT::LAST_FP_SCALABLE_VECTOR_VALUETYPE,
- force_iteration_on_noniterable_enum);
- }
- /// @}
- };
-
-} // end namespace llvm
-
-#endif // LLVM_SUPPORT_MACHINEVALUETYPE_H
diff --git a/llvm/include/llvm/Support/MathExtras.h b/llvm/include/llvm/Support/MathExtras.h
index ff136ba2a884..dc095941fdc8 100644
--- a/llvm/include/llvm/Support/MathExtras.h
+++ b/llvm/include/llvm/Support/MathExtras.h
@@ -24,14 +24,6 @@
namespace llvm {
-/// The behavior an operation has on an input of 0.
-enum ZeroBehavior {
- /// The returned value is undefined.
- ZB_Undefined,
- /// The returned value is numeric_limits<T>::max()
- ZB_Max
-};
-
/// Mathematical constants.
namespace numbers {
// TODO: Track C++20 std::numbers.
@@ -68,47 +60,10 @@ constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A
phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
} // namespace numbers
-/// Count number of 0's from the least significant bit to the most
-/// stopping at the first 1.
-///
-/// Only unsigned integral types are allowed.
-///
-/// Returns std::numeric_limits<T>::digits on an input of 0.
-template <typename T> unsigned countTrailingZeros(T Val) {
- static_assert(std::is_unsigned_v<T>,
- "Only unsigned integral types are allowed.");
- return llvm::countr_zero(Val);
-}
-
-/// Count number of 0's from the most significant bit to the least
-/// stopping at the first 1.
-///
-/// Only unsigned integral types are allowed.
-///
-/// Returns std::numeric_limits<T>::digits on an input of 0.
-template <typename T> unsigned countLeadingZeros(T Val) {
- static_assert(std::is_unsigned_v<T>,
- "Only unsigned integral types are allowed.");
- return llvm::countl_zero(Val);
-}
-
-/// Get the index of the first set bit starting from the least
-/// significant bit.
-///
-/// Only unsigned integral types are allowed.
-///
-/// \param ZB the behavior on an input of 0.
-template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
- if (ZB == ZB_Max && Val == 0)
- return std::numeric_limits<T>::max();
-
- return llvm::countr_zero(Val);
-}
-
/// Create a bitmask with the N right-most bits set to 1, and all other
/// bits set to 0. Only unsigned types are allowed.
template <typename T> T maskTrailingOnes(unsigned N) {
- static_assert(std::is_unsigned<T>::value, "Invalid type!");
+ static_assert(std::is_unsigned_v<T>, "Invalid type!");
const unsigned Bits = CHAR_BIT * sizeof(T);
assert(N <= Bits && "Invalid bit index");
return N == 0 ? 0 : (T(-1) >> (Bits - N));
@@ -132,21 +87,6 @@ template <typename T> T maskLeadingZeros(unsigned N) {
return maskTrailingOnes<T>(CHAR_BIT * sizeof(T) - N);
}
-/// Get the index of the last set bit starting from the least
-/// significant bit.
-///
-/// Only unsigned integral types are allowed.
-///
-/// \param ZB the behavior on an input of 0.
-template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
- if (ZB == ZB_Max && Val == 0)
- return std::numeric_limits<T>::max();
-
- // Use ^ instead of - because both gcc and llvm can remove the associated ^
- // in the __builtin_clz intrinsic on x86.
- return llvm::countl_zero(Val) ^ (std::numeric_limits<T>::digits - 1);
-}
-
/// Macro compressed bit reversal table for 256 bits.
///
/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
@@ -330,42 +270,6 @@ constexpr inline bool isPowerOf2_64(uint64_t Value) {
return llvm::has_single_bit(Value);
}
-/// Count the number of ones from the most significant bit to the first
-/// zero bit.
-///
-/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
-/// Only unsigned integral types are allowed.
-///
-/// Returns std::numeric_limits<T>::digits on an input of all ones.
-template <typename T> unsigned countLeadingOnes(T Value) {
- static_assert(std::is_unsigned_v<T>,
- "Only unsigned integral types are allowed.");
- return llvm::countl_one<T>(Value);
-}
-
-/// Count the number of ones from the least significant bit to the first
-/// zero bit.
-///
-/// Ex. countTrailingOnes(0x00FF00FF) == 8.
-/// Only unsigned integral types are allowed.
-///
-/// Returns std::numeric_limits<T>::digits on an input of all ones.
-template <typename T> unsigned countTrailingOnes(T Value) {
- static_assert(std::is_unsigned_v<T>,
- "Only unsigned integral types are allowed.");
- return llvm::countr_one<T>(Value);
-}
-
-/// Count the number of set bits in a value.
-/// Ex. countPopulation(0xF000F000) = 8
-/// Returns 0 if the word is zero.
-template <typename T>
-inline unsigned countPopulation(T Value) {
- static_assert(std::is_unsigned_v<T>,
- "Only unsigned integral types are allowed.");
- return (unsigned)llvm::popcount(Value);
-}
-
/// Return true if the argument contains a non-empty sequence of ones with the
/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
/// If true, \p MaskIdx will specify the index of the lowest set bit and \p
@@ -429,34 +333,6 @@ inline unsigned Log2_64_Ceil(uint64_t Value) {
return 64 - llvm::countl_zero(Value - 1);
}
-/// This function takes a 64-bit integer and returns the bit equivalent double.
-inline double BitsToDouble(uint64_t Bits) {
- static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
- return llvm::bit_cast<double>(Bits);
-}
-
-/// This function takes a 32-bit integer and returns the bit equivalent float.
-inline float BitsToFloat(uint32_t Bits) {
- static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
- return llvm::bit_cast<float>(Bits);
-}
-
-/// This function takes a double and returns the bit equivalent 64-bit integer.
-/// Note that copying doubles around changes the bits of NaNs on some hosts,
-/// notably x86, so this routine cannot be used if these bits are needed.
-inline uint64_t DoubleToBits(double Double) {
- static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
- return llvm::bit_cast<uint64_t>(Double);
-}
-
-/// This function takes a float and returns the bit equivalent 32-bit integer.
-/// Note that copying floats around changes the bits of NaNs on some hosts,
-/// notably x86, so this routine cannot be used if these bits are needed.
-inline uint32_t FloatToBits(float Float) {
- static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
- return llvm::bit_cast<uint32_t>(Float);
-}
-
/// A and B are either alignments or offsets. Return the minimum alignment that
/// may be assumed after adding the two together.
constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
@@ -480,12 +356,6 @@ constexpr inline uint64_t NextPowerOf2(uint64_t A) {
return A + 1;
}
-/// Returns the power of two which is less than or equal to the given value.
-/// Essentially, it is a floor operation across the domain of powers of two.
-inline uint64_t PowerOf2Floor(uint64_t A) {
- return llvm::bit_floor(A);
-}
-
/// Returns the power of two which is greater than or equal to the given value.
/// Essentially, it is a ceil operation across the domain of powers of two.
inline uint64_t PowerOf2Ceil(uint64_t A) {
@@ -593,7 +463,7 @@ inline int64_t SignExtend64(uint64_t X, unsigned B) {
/// Subtract two unsigned integers, X and Y, of type T and return the absolute
/// value of the result.
template <typename T>
-std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
+std::enable_if_t<std::is_unsigned_v<T>, T> AbsoluteDifference(T X, T Y) {
return X > Y ? (X - Y) : (Y - X);
}
@@ -601,7 +471,7 @@ std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
/// maximum representable value of T on overflow. ResultOverflowed indicates if
/// the result is larger than the maximum representable value of type T.
template <typename T>
-std::enable_if_t<std::is_unsigned<T>::value, T>
+std::enable_if_t<std::is_unsigned_v<T>, T>
SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
bool Dummy;
bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
@@ -630,7 +500,7 @@ std::enable_if_t<std::is_unsigned_v<T>, T> SaturatingAdd(T X, T Y, T Z,
/// maximum representable value of T on overflow. ResultOverflowed indicates if
/// the result is larger than the maximum representable value of type T.
template <typename T>
-std::enable_if_t<std::is_unsigned<T>::value, T>
+std::enable_if_t<std::is_unsigned_v<T>, T>
SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
bool Dummy;
bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
@@ -676,7 +546,7 @@ SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
/// overflow. ResultOverflowed indicates if the result is larger than the
/// maximum representable value of type T.
template <typename T>
-std::enable_if_t<std::is_unsigned<T>::value, T>
+std::enable_if_t<std::is_unsigned_v<T>, T>
SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
bool Dummy;
bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
@@ -695,7 +565,7 @@ extern const float huge_valf;
/// Add two signed integers, computing the two's complement truncated result,
/// returning true if overflow occurred.
template <typename T>
-std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
+std::enable_if_t<std::is_signed_v<T>, T> AddOverflow(T X, T Y, T &Result) {
#if __has_builtin(__builtin_add_overflow)
return __builtin_add_overflow(X, Y, &Result);
#else
@@ -721,7 +591,7 @@ std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
/// Subtract two signed integers, computing the two's complement truncated
/// result, returning true if an overflow ocurred.
template <typename T>
-std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
+std::enable_if_t<std::is_signed_v<T>, T> SubOverflow(T X, T Y, T &Result) {
#if __has_builtin(__builtin_sub_overflow)
return __builtin_sub_overflow(X, Y, &Result);
#else
@@ -747,7 +617,7 @@ std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
/// Multiply two signed integers, computing the two's complement truncated
/// result, returning true if an overflow ocurred.
template <typename T>
-std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) {
+std::enable_if_t<std::is_signed_v<T>, T> MulOverflow(T X, T Y, T &Result) {
// Perform the unsigned multiplication on absolute values.
using U = std::make_unsigned_t<T>;
const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
diff --git a/llvm/include/llvm/Support/ModRef.h b/llvm/include/llvm/Support/ModRef.h
index becfd2771249..7687280111a1 100644
--- a/llvm/include/llvm/Support/ModRef.h
+++ b/llvm/include/llvm/Support/ModRef.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_IR_MODREF_H
-#define LLVM_IR_MODREF_H
+#ifndef LLVM_SUPPORT_MODREF_H
+#define LLVM_SUPPORT_MODREF_H
#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/Sequence.h"
@@ -55,22 +55,23 @@ enum class ModRefInfo : uint8_t {
/// Debug print ModRefInfo.
raw_ostream &operator<<(raw_ostream &OS, ModRefInfo MR);
-/// Summary of how a function affects memory in the program.
-///
-/// Loads from constant globals are not considered memory accesses for this
-/// interface. Also, functions may freely modify stack space local to their
-/// invocation without having to report it through these interfaces.
-class MemoryEffects {
+/// The locations at which a function might access memory.
+enum class IRMemLocation {
+ /// Access to memory via argument pointers.
+ ArgMem = 0,
+ /// Memory that is inaccessible via LLVM IR.
+ InaccessibleMem = 1,
+ /// Any other memory.
+ Other = 2,
+
+ /// Helpers to iterate all locations in the MemoryEffectsBase class.
+ First = ArgMem,
+ Last = Other,
+};
+
+template <typename LocationEnum> class MemoryEffectsBase {
public:
- /// The locations at which a function might access memory.
- enum Location {
- /// Access to memory via argument pointers.
- ArgMem = 0,
- /// Memory that is inaccessible via LLVM IR.
- InaccessibleMem = 1,
- /// Any other memory.
- Other = 2,
- };
+ using Location = LocationEnum;
private:
uint32_t Data = 0;
@@ -82,79 +83,79 @@ private:
return (uint32_t)Loc * BitsPerLoc;
}
- MemoryEffects(uint32_t Data) : Data(Data) {}
+ MemoryEffectsBase(uint32_t Data) : Data(Data) {}
void setModRef(Location Loc, ModRefInfo MR) {
Data &= ~(LocMask << getLocationPos(Loc));
Data |= static_cast<uint32_t>(MR) << getLocationPos(Loc);
}
- friend raw_ostream &operator<<(raw_ostream &OS, MemoryEffects RMRB);
-
public:
/// Returns iterator over all supported location kinds.
static auto locations() {
- return enum_seq_inclusive(Location::ArgMem, Location::Other,
+ return enum_seq_inclusive(Location::First, Location::Last,
force_iteration_on_noniterable_enum);
}
- /// Create MemoryEffects that can access only the given location with the
+ /// Create MemoryEffectsBase that can access only the given location with the
/// given ModRefInfo.
- MemoryEffects(Location Loc, ModRefInfo MR) { setModRef(Loc, MR); }
+ MemoryEffectsBase(Location Loc, ModRefInfo MR) { setModRef(Loc, MR); }
- /// Create MemoryEffects that can access any location with the given
+ /// Create MemoryEffectsBase that can access any location with the given
/// ModRefInfo.
- explicit MemoryEffects(ModRefInfo MR) {
+ explicit MemoryEffectsBase(ModRefInfo MR) {
for (Location Loc : locations())
setModRef(Loc, MR);
}
- /// Create MemoryEffects that can read and write any memory.
- static MemoryEffects unknown() {
- return MemoryEffects(ModRefInfo::ModRef);
+ /// Create MemoryEffectsBase that can read and write any memory.
+ static MemoryEffectsBase unknown() {
+ return MemoryEffectsBase(ModRefInfo::ModRef);
}
- /// Create MemoryEffects that cannot read or write any memory.
- static MemoryEffects none() {
- return MemoryEffects(ModRefInfo::NoModRef);
+ /// Create MemoryEffectsBase that cannot read or write any memory.
+ static MemoryEffectsBase none() {
+ return MemoryEffectsBase(ModRefInfo::NoModRef);
}
- /// Create MemoryEffects that can read any memory.
- static MemoryEffects readOnly() {
- return MemoryEffects(ModRefInfo::Ref);
+ /// Create MemoryEffectsBase that can read any memory.
+ static MemoryEffectsBase readOnly() {
+ return MemoryEffectsBase(ModRefInfo::Ref);
}
- /// Create MemoryEffects that can write any memory.
- static MemoryEffects writeOnly() {
- return MemoryEffects(ModRefInfo::Mod);
+ /// Create MemoryEffectsBase that can write any memory.
+ static MemoryEffectsBase writeOnly() {
+ return MemoryEffectsBase(ModRefInfo::Mod);
}
- /// Create MemoryEffects that can only access argument memory.
- static MemoryEffects argMemOnly(ModRefInfo MR = ModRefInfo::ModRef) {
- return MemoryEffects(ArgMem, MR);
+ /// Create MemoryEffectsBase that can only access argument memory.
+ static MemoryEffectsBase argMemOnly(ModRefInfo MR = ModRefInfo::ModRef) {
+ return MemoryEffectsBase(Location::ArgMem, MR);
}
- /// Create MemoryEffects that can only access inaccessible memory.
- static MemoryEffects inaccessibleMemOnly(ModRefInfo MR = ModRefInfo::ModRef) {
- return MemoryEffects(InaccessibleMem, MR);
+ /// Create MemoryEffectsBase that can only access inaccessible memory.
+ static MemoryEffectsBase
+ inaccessibleMemOnly(ModRefInfo MR = ModRefInfo::ModRef) {
+ return MemoryEffectsBase(Location::InaccessibleMem, MR);
}
- /// Create MemoryEffects that can only access inaccessible or argument memory.
- static MemoryEffects
+ /// Create MemoryEffectsBase that can only access inaccessible or argument
+ /// memory.
+ static MemoryEffectsBase
inaccessibleOrArgMemOnly(ModRefInfo MR = ModRefInfo::ModRef) {
- MemoryEffects FRMB = none();
- FRMB.setModRef(ArgMem, MR);
- FRMB.setModRef(InaccessibleMem, MR);
+ MemoryEffectsBase FRMB = none();
+ FRMB.setModRef(Location::ArgMem, MR);
+ FRMB.setModRef(Location::InaccessibleMem, MR);
return FRMB;
}
- /// Create MemoryEffects from an encoded integer value (used by memory
+ /// Create MemoryEffectsBase from an encoded integer value (used by memory
/// attribute).
- static MemoryEffects createFromIntValue(uint32_t Data) {
- return MemoryEffects(Data);
+ static MemoryEffectsBase createFromIntValue(uint32_t Data) {
+ return MemoryEffectsBase(Data);
}
- /// Convert MemoryEffects into an encoded integer value (used by memory
+ /// Convert MemoryEffectsBase into an encoded integer value (used by memory
/// attribute).
uint32_t toIntValue() const {
return Data;
@@ -165,16 +166,16 @@ public:
return ModRefInfo((Data >> getLocationPos(Loc)) & LocMask);
}
- /// Get new MemoryEffects with modified ModRefInfo for Loc.
- MemoryEffects getWithModRef(Location Loc, ModRefInfo MR) const {
- MemoryEffects ME = *this;
+ /// Get new MemoryEffectsBase with modified ModRefInfo for Loc.
+ MemoryEffectsBase getWithModRef(Location Loc, ModRefInfo MR) const {
+ MemoryEffectsBase ME = *this;
ME.setModRef(Loc, MR);
return ME;
}
- /// Get new MemoryEffects with NoModRef on the given Loc.
- MemoryEffects getWithoutLoc(Location Loc) const {
- MemoryEffects ME = *this;
+ /// Get new MemoryEffectsBase with NoModRef on the given Loc.
+ MemoryEffectsBase getWithoutLoc(Location Loc) const {
+ MemoryEffectsBase ME = *this;
ME.setModRef(Loc, ModRefInfo::NoModRef);
return ME;
}
@@ -198,58 +199,74 @@ public:
/// Whether this function only (at most) accesses argument memory.
bool onlyAccessesArgPointees() const {
- return getWithoutLoc(ArgMem).doesNotAccessMemory();
+ return getWithoutLoc(Location::ArgMem).doesNotAccessMemory();
}
/// Whether this function may access argument memory.
bool doesAccessArgPointees() const {
- return isModOrRefSet(getModRef(ArgMem));
+ return isModOrRefSet(getModRef(Location::ArgMem));
}
/// Whether this function only (at most) accesses inaccessible memory.
bool onlyAccessesInaccessibleMem() const {
- return getWithoutLoc(InaccessibleMem).doesNotAccessMemory();
+ return getWithoutLoc(Location::InaccessibleMem).doesNotAccessMemory();
}
/// Whether this function only (at most) accesses argument and inaccessible
/// memory.
bool onlyAccessesInaccessibleOrArgMem() const {
- return isNoModRef(getModRef(Other));
+ return getWithoutLoc(Location::InaccessibleMem)
+ .getWithoutLoc(Location::ArgMem)
+ .doesNotAccessMemory();
}
- /// Intersect with other MemoryEffects.
- MemoryEffects operator&(MemoryEffects Other) const {
- return MemoryEffects(Data & Other.Data);
+ /// Intersect with other MemoryEffectsBase.
+ MemoryEffectsBase operator&(MemoryEffectsBase Other) const {
+ return MemoryEffectsBase(Data & Other.Data);
}
- /// Intersect (in-place) with other MemoryEffects.
- MemoryEffects &operator&=(MemoryEffects Other) {
+ /// Intersect (in-place) with other MemoryEffectsBase.
+ MemoryEffectsBase &operator&=(MemoryEffectsBase Other) {
Data &= Other.Data;
return *this;
}
- /// Union with other MemoryEffects.
- MemoryEffects operator|(MemoryEffects Other) const {
- return MemoryEffects(Data | Other.Data);
+ /// Union with other MemoryEffectsBase.
+ MemoryEffectsBase operator|(MemoryEffectsBase Other) const {
+ return MemoryEffectsBase(Data | Other.Data);
}
- /// Union (in-place) with other MemoryEffects.
- MemoryEffects &operator|=(MemoryEffects Other) {
+ /// Union (in-place) with other MemoryEffectsBase.
+ MemoryEffectsBase &operator|=(MemoryEffectsBase Other) {
Data |= Other.Data;
return *this;
}
- /// Check whether this is the same as other MemoryEffects.
- bool operator==(MemoryEffects Other) const {
- return Data == Other.Data;
+ /// Subtract other MemoryEffectsBase.
+ MemoryEffectsBase operator-(MemoryEffectsBase Other) const {
+ return MemoryEffectsBase(Data & ~Other.Data);
}
- /// Check whether this is different from other MemoryEffects.
- bool operator!=(MemoryEffects Other) const {
- return !operator==(Other);
+ /// Subtract (in-place) with other MemoryEffectsBase.
+ MemoryEffectsBase &operator-=(MemoryEffectsBase Other) {
+ Data &= ~Other.Data;
+ return *this;
}
+
+ /// Check whether this is the same as other MemoryEffectsBase.
+ bool operator==(MemoryEffectsBase Other) const { return Data == Other.Data; }
+
+ /// Check whether this is different from other MemoryEffectsBase.
+ bool operator!=(MemoryEffectsBase Other) const { return !operator==(Other); }
};
+/// Summary of how a function affects memory in the program.
+///
+/// Loads from constant globals are not considered memory accesses for this
+/// interface. Also, functions may freely modify stack space local to their
+/// invocation without having to report it through these interfaces.
+using MemoryEffects = MemoryEffectsBase<IRMemLocation>;
+
/// Debug print MemoryEffects.
raw_ostream &operator<<(raw_ostream &OS, MemoryEffects RMRB);
diff --git a/llvm/include/llvm/Support/OnDiskHashTable.h b/llvm/include/llvm/Support/OnDiskHashTable.h
index 07ee8e79423b..bb90d8fc3ac7 100644
--- a/llvm/include/llvm/Support/OnDiskHashTable.h
+++ b/llvm/include/llvm/Support/OnDiskHashTable.h
@@ -163,7 +163,7 @@ public:
//
// FIXME: Try computing a perfect hash function at this point.
unsigned TargetNumBuckets =
- NumEntries <= 2 ? 1 : NextPowerOf2(NumEntries * 4 / 3);
+ NumEntries <= 2 ? 1 : llvm::bit_ceil(NumEntries * 4 / 3 + 1);
if (TargetNumBuckets != NumBuckets)
resize(TargetNumBuckets);
diff --git a/llvm/include/llvm/Support/PGOOptions.h b/llvm/include/llvm/Support/PGOOptions.h
index 2141e2159c0c..35670c457745 100644
--- a/llvm/include/llvm/Support/PGOOptions.h
+++ b/llvm/include/llvm/Support/PGOOptions.h
@@ -14,51 +14,38 @@
#ifndef LLVM_SUPPORT_PGOOPTIONS_H
#define LLVM_SUPPORT_PGOOPTIONS_H
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/Support/Error.h"
namespace llvm {
+namespace vfs {
+class FileSystem;
+} // namespace vfs
+
/// A struct capturing PGO tunables.
struct PGOOptions {
enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
- PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
- std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
- CSPGOAction CSAction = NoCSAction,
+ PGOOptions(std::string ProfileFile, std::string CSProfileGenFile,
+ std::string ProfileRemappingFile, std::string MemoryProfile,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS,
+ PGOAction Action = NoAction, CSPGOAction CSAction = NoCSAction,
bool DebugInfoForProfiling = false,
- bool PseudoProbeForProfiling = false)
- : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
- ProfileRemappingFile(ProfileRemappingFile), Action(Action),
- CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
- (Action == SampleUse &&
- !PseudoProbeForProfiling)),
- PseudoProbeForProfiling(PseudoProbeForProfiling) {
- // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
- // callback with IRUse action without ProfileFile.
-
- // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
- assert(this->CSAction == NoCSAction ||
- (this->Action != IRInstr && this->Action != SampleUse));
-
- // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
- assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
-
- // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
- // a profile.
- assert(this->CSAction != CSIRUse || this->Action == IRUse);
+ bool PseudoProbeForProfiling = false);
+ PGOOptions(const PGOOptions &);
+ ~PGOOptions();
+ PGOOptions &operator=(const PGOOptions &);
- // If neither Action nor CSAction, DebugInfoForProfiling or
- // PseudoProbeForProfiling needs to be true.
- assert(this->Action != NoAction || this->CSAction != NoCSAction ||
- this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
- }
std::string ProfileFile;
std::string CSProfileGenFile;
std::string ProfileRemappingFile;
+ std::string MemoryProfile;
PGOAction Action;
CSPGOAction CSAction;
bool DebugInfoForProfiling;
bool PseudoProbeForProfiling;
+ IntrusiveRefCntPtr<vfs::FileSystem> FS;
};
} // namespace llvm
diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
index 219197c4eb29..8170da98f15a 100644
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -30,6 +30,14 @@ namespace parallel {
extern ThreadPoolStrategy strategy;
#if LLVM_ENABLE_THREADS
+#define GET_THREAD_INDEX_IMPL \
+ if (parallel::strategy.ThreadsRequested == 1) \
+ return 0; \
+ assert((threadIndex != UINT_MAX) && \
+ "getThreadIndex() must be called from a thread created by " \
+ "ThreadPoolExecutor"); \
+ return threadIndex;
+
#ifdef _WIN32
// Direct access to thread_local variables from a different DLL isn't
// possible with Windows Native TLS.
@@ -38,10 +46,13 @@ unsigned getThreadIndex();
// Don't access this directly, use the getThreadIndex wrapper.
extern thread_local unsigned threadIndex;
-inline unsigned getThreadIndex() { return threadIndex; }
+inline unsigned getThreadIndex() { GET_THREAD_INDEX_IMPL; }
#endif
+
+size_t getThreadCount();
#else
inline unsigned getThreadIndex() { return 0; }
+inline size_t getThreadCount() { return 1; }
#endif
namespace detail {
@@ -84,26 +95,15 @@ public:
~TaskGroup();
// Spawn a task, but does not wait for it to finish.
- void spawn(std::function<void()> f);
-
- // Similar to spawn, but execute the task immediately when ThreadsRequested ==
- // 1. The difference is to give the following pattern a more intuitive order
- // when single threading is requested.
- //
- // for (size_t begin = 0, i = 0, taskSize = 0;;) {
- // taskSize += ...
- // bool done = ++i == end;
- // if (done || taskSize >= taskSizeLimit) {
- // tg.execute([=] { fn(begin, i); });
- // if (done)
- // break;
- // begin = i;
- // taskSize = 0;
- // }
- // }
- void execute(std::function<void()> f);
+ // Tasks marked with \p Sequential will be executed
+ // exactly in the order which they were spawned.
+ // Note: Sequential tasks may be executed on different
+ // threads, but strictly in sequential order.
+ void spawn(std::function<void()> f, bool Sequential = false);
void sync() const { L.sync(); }
+
+ bool isParallel() const { return Parallel; }
};
namespace detail {
diff --git a/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h b/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h
new file mode 100644
index 000000000000..f94d18f62e9a
--- /dev/null
+++ b/llvm/include/llvm/Support/PerThreadBumpPtrAllocator.h
@@ -0,0 +1,120 @@
+//===- PerThreadBumpPtrAllocator.h ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H
+#define LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H
+
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Parallel.h"
+
+namespace llvm {
+namespace parallel {
+
+/// PerThreadAllocator is used in conjunction with ThreadPoolExecutor to allow
+/// per-thread allocations. It wraps a possibly thread-unsafe allocator,
+/// e.g. BumpPtrAllocator. PerThreadAllocator must be used with only main thread
+/// or threads created by ThreadPoolExecutor, as it utilizes getThreadIndex,
+/// which is set by ThreadPoolExecutor. To work properly, ThreadPoolExecutor
+/// should be initialized before PerThreadAllocator is created.
+/// TODO: The same approach might be implemented for ThreadPool.
+
+template <typename AllocatorTy>
+class PerThreadAllocator
+ : public AllocatorBase<PerThreadAllocator<AllocatorTy>> {
+public:
+ PerThreadAllocator()
+ : NumOfAllocators(parallel::getThreadCount()),
+ Allocators(std::make_unique<AllocatorTy[]>(NumOfAllocators)) {}
+
+ /// \defgroup Methods which could be called asynchronously:
+ ///
+ /// @{
+
+ using AllocatorBase<PerThreadAllocator<AllocatorTy>>::Allocate;
+
+ using AllocatorBase<PerThreadAllocator<AllocatorTy>>::Deallocate;
+
+ /// Allocate \a Size bytes of \a Alignment aligned memory.
+ void *Allocate(size_t Size, size_t Alignment) {
+ assert(getThreadIndex() < NumOfAllocators);
+ return Allocators[getThreadIndex()].Allocate(Size, Alignment);
+ }
+
+ /// Deallocate \a Ptr to \a Size bytes of memory allocated by this
+ /// allocator.
+ void Deallocate(const void *Ptr, size_t Size, size_t Alignment) {
+ assert(getThreadIndex() < NumOfAllocators);
+ return Allocators[getThreadIndex()].Deallocate(Ptr, Size, Alignment);
+ }
+
+ /// Return allocator corresponding to the current thread.
+ AllocatorTy &getThreadLocalAllocator() {
+ assert(getThreadIndex() < NumOfAllocators);
+ return Allocators[getThreadIndex()];
+ }
+
+ // Return number of used allocators.
+ size_t getNumberOfAllocators() const { return NumOfAllocators; }
+ /// @}
+
+ /// \defgroup Methods which could not be called asynchronously:
+ ///
+ /// @{
+
+ /// Reset state of allocators.
+ void Reset() {
+ for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++)
+ Allocators[Idx].Reset();
+ }
+
+ /// Return total memory size used by all allocators.
+ size_t getTotalMemory() const {
+ size_t TotalMemory = 0;
+
+ for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++)
+ TotalMemory += Allocators[Idx].getTotalMemory();
+
+ return TotalMemory;
+ }
+
+ /// Return allocated size by all allocators.
+ size_t getBytesAllocated() const {
+ size_t BytesAllocated = 0;
+
+ for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++)
+ BytesAllocated += Allocators[Idx].getBytesAllocated();
+
+ return BytesAllocated;
+ }
+
+ /// Set red zone for all allocators.
+ void setRedZoneSize(size_t NewSize) {
+ for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++)
+ Allocators[Idx].setRedZoneSize(NewSize);
+ }
+
+ /// Print statistic for each allocator.
+ void PrintStats() const {
+ for (size_t Idx = 0; Idx < getNumberOfAllocators(); Idx++) {
+ errs() << "\n Allocator " << Idx << "\n";
+ Allocators[Idx].PrintStats();
+ }
+ }
+ /// @}
+
+protected:
+ size_t NumOfAllocators;
+ std::unique_ptr<AllocatorTy[]> Allocators;
+};
+
+using PerThreadBumpPtrAllocator = PerThreadAllocator<BumpPtrAllocator>;
+
+} // end namespace parallel
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_PERTHREADBUMPPTRALLOCATOR_H
diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h
index 9070b88d710e..6eb085c32b5b 100644
--- a/llvm/include/llvm/Support/RISCVISAInfo.h
+++ b/llvm/include/llvm/Support/RISCVISAInfo.h
@@ -1,4 +1,4 @@
-//===-- RISCVISAInfo.h - RISCV ISA Information ------------------*- C++ -*-===//
+//===-- RISCVISAInfo.h - RISC-V ISA Information -----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -18,7 +18,6 @@
namespace llvm {
struct RISCVExtensionInfo {
- std::string ExtName;
unsigned MajorVersion;
unsigned MinorVersion;
};
@@ -45,17 +44,27 @@ public:
RISCVISAInfo(unsigned XLen, OrderedExtensionMap &Exts)
: XLen(XLen), FLen(0), MinVLen(0), MaxELen(0), MaxELenFp(0), Exts(Exts) {}
- /// Parse RISCV ISA info from arch string.
+ /// Parse RISC-V ISA info from arch string.
+ /// If IgnoreUnknown is set, any unrecognised extension names or
+ /// extensions with unrecognised versions will be silently dropped, except
+ /// for the special case of the base 'i' and 'e' extensions, where the
+ /// default version will be used (as ignoring the base is not possible).
static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
parseArchString(StringRef Arch, bool EnableExperimentalExtension,
bool ExperimentalExtensionVersionCheck = true,
bool IgnoreUnknown = false);
- /// Parse RISCV ISA info from feature vector.
+ /// Parse RISC-V ISA info from an arch string that is already in normalized
+ /// form (as defined in the psABI). Unlike parseArchString, this function
+ /// will not error for unrecognized extension names or extension versions.
+ static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+ parseNormalizedArchString(StringRef Arch);
+
+ /// Parse RISC-V ISA info from feature vector.
static llvm::Expected<std::unique_ptr<RISCVISAInfo>>
parseFeatures(unsigned XLen, const std::vector<std::string> &Features);
- /// Convert RISCV ISA info to a feature vector.
+ /// Convert RISC-V ISA info to a feature vector.
void toFeatures(std::vector<StringRef> &Features,
llvm::function_ref<StringRef(const Twine &)> StrAlloc,
bool AddAllExtensions) const;
diff --git a/llvm/include/llvm/Support/ReverseIteration.h b/llvm/include/llvm/Support/ReverseIteration.h
index 5e0238d81c4c..9e9411856369 100644
--- a/llvm/include/llvm/Support/ReverseIteration.h
+++ b/llvm/include/llvm/Support/ReverseIteration.h
@@ -15,5 +15,5 @@ bool shouldReverseIterate() {
#endif
}
-}
+} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Support/SMLoc.h b/llvm/include/llvm/Support/SMLoc.h
index 60b052a3b863..d7dde81ce0be 100644
--- a/llvm/include/llvm/Support/SMLoc.h
+++ b/llvm/include/llvm/Support/SMLoc.h
@@ -24,14 +24,14 @@ class SMLoc {
const char *Ptr = nullptr;
public:
- SMLoc() = default;
+ constexpr SMLoc() = default;
- bool isValid() const { return Ptr != nullptr; }
+ constexpr bool isValid() const { return Ptr != nullptr; }
- bool operator==(const SMLoc &RHS) const { return RHS.Ptr == Ptr; }
- bool operator!=(const SMLoc &RHS) const { return RHS.Ptr != Ptr; }
+ constexpr bool operator==(const SMLoc &RHS) const { return RHS.Ptr == Ptr; }
+ constexpr bool operator!=(const SMLoc &RHS) const { return RHS.Ptr != Ptr; }
- const char *getPointer() const { return Ptr; }
+ constexpr const char *getPointer() const { return Ptr; }
static SMLoc getFromPointer(const char *Ptr) {
SMLoc L;
diff --git a/llvm/include/llvm/Support/ScaledNumber.h b/llvm/include/llvm/Support/ScaledNumber.h
index a5261e419986..faf3ce351c3e 100644
--- a/llvm/include/llvm/Support/ScaledNumber.h
+++ b/llvm/include/llvm/Support/ScaledNumber.h
@@ -85,7 +85,7 @@ inline std::pair<DigitsT, int16_t> getAdjusted(uint64_t Digits,
return std::make_pair(Digits, Scale);
// Shift right and round.
- int Shift = 64 - Width - countLeadingZeros(Digits);
+ int Shift = llvm::bit_width(Digits) - Width;
return getRounded<DigitsT>(Digits >> Shift, Scale + Shift,
Digits & (UINT64_C(1) << (Shift - 1)));
}
@@ -192,7 +192,8 @@ inline std::pair<int32_t, int> getLgImpl(DigitsT Digits, int16_t Scale) {
return std::make_pair(INT32_MIN, 0);
// Get the floor of the lg of Digits.
- int32_t LocalFloor = sizeof(Digits) * 8 - countLeadingZeros(Digits) - 1;
+ static_assert(sizeof(Digits) <= sizeof(uint64_t));
+ int32_t LocalFloor = llvm::Log2_64(Digits);
// Get the actual floor.
int32_t Floor = Scale + LocalFloor;
@@ -304,7 +305,7 @@ int16_t matchScales(DigitsT &LDigits, int16_t &LScale, DigitsT &RDigits,
}
// Shift LDigits left as much as possible, then shift RDigits right.
- int32_t ShiftL = std::min<int32_t>(countLeadingZeros(LDigits), ScaleDiff);
+ int32_t ShiftL = std::min<int32_t>(llvm::countl_zero(LDigits), ScaleDiff);
assert(ShiftL < getWidth<DigitsT>() && "can't shift more than width");
int32_t ShiftR = ScaleDiff - ShiftL;
@@ -425,8 +426,8 @@ public:
unsigned Precision);
static std::string toString(uint64_t D, int16_t E, int Width,
unsigned Precision);
- static int countLeadingZeros32(uint32_t N) { return countLeadingZeros(N); }
- static int countLeadingZeros64(uint64_t N) { return countLeadingZeros(N); }
+ static int countLeadingZeros32(uint32_t N) { return llvm::countl_zero(N); }
+ static int countLeadingZeros64(uint64_t N) { return llvm::countl_zero(N); }
static uint64_t getHalf(uint64_t N) { return (N >> 1) + (N & 1); }
static std::pair<uint64_t, bool> splitSigned(int64_t N) {
diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h
index b91acb576ba5..aaaed3f5ceac 100644
--- a/llvm/include/llvm/Support/ScopedPrinter.h
+++ b/llvm/include/llvm/Support/ScopedPrinter.h
@@ -198,42 +198,62 @@ public:
printFlagsImpl(Label, hex(Value), SetFlags);
}
- virtual void printNumber(StringRef Label, uint64_t Value) {
+ virtual void printNumber(StringRef Label, char Value) {
+ startLine() << Label << ": " << static_cast<int>(Value) << "\n";
+ }
+
+ virtual void printNumber(StringRef Label, signed char Value) {
+ startLine() << Label << ": " << static_cast<int>(Value) << "\n";
+ }
+
+ virtual void printNumber(StringRef Label, unsigned char Value) {
+ startLine() << Label << ": " << static_cast<unsigned>(Value) << "\n";
+ }
+
+ virtual void printNumber(StringRef Label, short Value) {
startLine() << Label << ": " << Value << "\n";
}
- virtual void printNumber(StringRef Label, uint32_t Value) {
+ virtual void printNumber(StringRef Label, unsigned short Value) {
startLine() << Label << ": " << Value << "\n";
}
- virtual void printNumber(StringRef Label, uint16_t Value) {
+ virtual void printNumber(StringRef Label, int Value) {
startLine() << Label << ": " << Value << "\n";
}
- virtual void printNumber(StringRef Label, uint8_t Value) {
- startLine() << Label << ": " << unsigned(Value) << "\n";
+ virtual void printNumber(StringRef Label, unsigned int Value) {
+ startLine() << Label << ": " << Value << "\n";
}
- virtual void printNumber(StringRef Label, int64_t Value) {
+ virtual void printNumber(StringRef Label, long Value) {
startLine() << Label << ": " << Value << "\n";
}
- virtual void printNumber(StringRef Label, int32_t Value) {
+ virtual void printNumber(StringRef Label, unsigned long Value) {
startLine() << Label << ": " << Value << "\n";
}
- virtual void printNumber(StringRef Label, int16_t Value) {
+ virtual void printNumber(StringRef Label, long long Value) {
startLine() << Label << ": " << Value << "\n";
}
- virtual void printNumber(StringRef Label, int8_t Value) {
- startLine() << Label << ": " << int(Value) << "\n";
+ virtual void printNumber(StringRef Label, unsigned long long Value) {
+ startLine() << Label << ": " << Value << "\n";
}
virtual void printNumber(StringRef Label, const APSInt &Value) {
startLine() << Label << ": " << Value << "\n";
}
+ virtual void printNumber(StringRef Label, float Value) {
+ startLine() << Label << ": " << format("%5.1f", Value) << "\n";
+ }
+
+ virtual void printNumber(StringRef Label, double Value) {
+ startLine() << Label << ": " << format("%5.1f", Value) << "\n";
+ }
+
template <typename T>
void printNumber(StringRef Label, StringRef Str, T Value) {
printNumberImpl(Label, Str, to_string(Value));
@@ -554,35 +574,55 @@ public:
return SP->getKind() == ScopedPrinter::ScopedPrinterKind::JSON;
}
- void printNumber(StringRef Label, uint64_t Value) override {
+ void printNumber(StringRef Label, char Value) override {
+ JOS.attribute(Label, Value);
+ }
+
+ void printNumber(StringRef Label, signed char Value) override {
+ JOS.attribute(Label, Value);
+ }
+
+ void printNumber(StringRef Label, unsigned char Value) override {
+ JOS.attribute(Label, Value);
+ }
+
+ void printNumber(StringRef Label, short Value) override {
+ JOS.attribute(Label, Value);
+ }
+
+ void printNumber(StringRef Label, unsigned short Value) override {
+ JOS.attribute(Label, Value);
+ }
+
+ void printNumber(StringRef Label, int Value) override {
JOS.attribute(Label, Value);
}
- void printNumber(StringRef Label, uint32_t Value) override {
+ void printNumber(StringRef Label, unsigned int Value) override {
JOS.attribute(Label, Value);
}
- void printNumber(StringRef Label, uint16_t Value) override {
+ void printNumber(StringRef Label, long Value) override {
JOS.attribute(Label, Value);
}
- void printNumber(StringRef Label, uint8_t Value) override {
+ void printNumber(StringRef Label, unsigned long Value) override {
JOS.attribute(Label, Value);
}
- void printNumber(StringRef Label, int64_t Value) override {
+ void printNumber(StringRef Label, long long Value) override {
JOS.attribute(Label, Value);
}
- void printNumber(StringRef Label, int32_t Value) override {
+ void printNumber(StringRef Label, unsigned long long Value) override {
JOS.attribute(Label, Value);
}
- void printNumber(StringRef Label, int16_t Value) override {
+ void printNumber(StringRef Label, float Value) override {
JOS.attribute(Label, Value);
}
- void printNumber(StringRef Label, int8_t Value) override {
+ void printNumber(StringRef Label, double Value) override {
JOS.attribute(Label, Value);
}
@@ -682,7 +722,7 @@ private:
void printFlagsImpl(StringRef Label, HexNumber Value,
ArrayRef<FlagEntry> Flags) override {
JOS.attributeObject(Label, [&]() {
- JOS.attribute("RawFlags", hexNumberToInt(Value));
+ JOS.attribute("Value", hexNumberToInt(Value));
JOS.attributeArray("Flags", [&]() {
for (const FlagEntry &Flag : Flags) {
JOS.objectBegin();
@@ -697,7 +737,7 @@ private:
void printFlagsImpl(StringRef Label, HexNumber Value,
ArrayRef<HexNumber> Flags) override {
JOS.attributeObject(Label, [&]() {
- JOS.attribute("RawFlags", hexNumberToInt(Value));
+ JOS.attribute("Value", hexNumberToInt(Value));
JOS.attributeArray("Flags", [&]() {
for (const HexNumber &Flag : Flags) {
JOS.value(Flag.Value);
@@ -728,8 +768,8 @@ private:
void printHexImpl(StringRef Label, StringRef Str, HexNumber Value) override {
JOS.attributeObject(Label, [&]() {
- JOS.attribute("Value", Str);
- JOS.attribute("RawValue", hexNumberToInt(Value));
+ JOS.attribute("Name", Str);
+ JOS.attribute("Value", hexNumberToInt(Value));
});
}
@@ -744,8 +784,8 @@ private:
void printNumberImpl(StringRef Label, StringRef Str,
StringRef Value) override {
JOS.attributeObject(Label, [&]() {
- JOS.attribute("Value", Str);
- JOS.attributeBegin("RawValue");
+ JOS.attribute("Name", Str);
+ JOS.attributeBegin("Value");
JOS.rawValueBegin() << Value;
JOS.rawValueEnd();
JOS.attributeEnd();
diff --git a/llvm/include/llvm/Support/Signals.h b/llvm/include/llvm/Support/Signals.h
index 937e0572d4a7..70749ce30184 100644
--- a/llvm/include/llvm/Support/Signals.h
+++ b/llvm/include/llvm/Support/Signals.h
@@ -102,14 +102,17 @@ namespace sys {
/// functions. A null handler pointer disables the current installed
/// function. Note also that the handler may be executed on a
/// different thread on some platforms.
- ///
- /// This is a no-op on Windows.
void SetOneShotPipeSignalFunction(void (*Handler)());
- /// On Unix systems, this function exits with an "IO error" exit code.
- /// This is a no-op on Windows.
+ /// On Unix systems and Windows, this function exits with an "IO error" exit
+ /// code.
void DefaultOneShotPipeSignalHandler();
+#ifdef _WIN32
+ /// Windows does not support signals and this handler must be called manually.
+ void CallOneShotPipeSignalHandler();
+#endif
+
/// This function does the following:
/// - clean up any temporary files registered with RemoveFileOnSignal()
/// - dump the callstack from the exception context
diff --git a/llvm/include/llvm/Support/SpecialCaseList.h b/llvm/include/llvm/Support/SpecialCaseList.h
index 0d56c4b9912d..b6d1b56a0962 100644
--- a/llvm/include/llvm/Support/SpecialCaseList.h
+++ b/llvm/include/llvm/Support/SpecialCaseList.h
@@ -54,7 +54,6 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/Support/Regex.h"
-#include "llvm/Support/TrigramIndex.h"
#include <memory>
#include <string>
#include <vector>
@@ -128,7 +127,6 @@ protected:
private:
StringMap<unsigned> Strings;
- TrigramIndex Trigrams;
std::vector<std::pair<std::unique_ptr<Regex>, unsigned>> RegExes;
};
@@ -155,5 +153,4 @@ protected:
} // namespace llvm
-#endif // LLVM_SUPPORT_SPECIALCASELIST_H
-
+#endif // LLVM_SUPPORT_SPECIALCASELIST_H
diff --git a/llvm/include/llvm/Support/StringSaver.h b/llvm/include/llvm/Support/StringSaver.h
index 2ef87754a0cf..fa9db30eae30 100644
--- a/llvm/include/llvm/Support/StringSaver.h
+++ b/llvm/include/llvm/Support/StringSaver.h
@@ -55,5 +55,5 @@ public:
StringRef save(const std::string &S) { return save(StringRef(S)); }
};
-}
+} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Support/SuffixTree.h b/llvm/include/llvm/Support/SuffixTree.h
index 162a1de72f1a..4940fbbf308d 100644
--- a/llvm/include/llvm/Support/SuffixTree.h
+++ b/llvm/include/llvm/Support/SuffixTree.h
@@ -5,139 +5,42 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+// A data structure for fast substring queries.
//
-// This file defines the Suffix Tree class and Suffix Tree Node struct.
+// Suffix trees represent the suffixes of their input strings in their leaves.
+// A suffix tree is a type of compressed trie structure where each node
+// represents an entire substring rather than a single character. Each leaf
+// of the tree is a suffix.
//
+// A suffix tree can be seen as a type of state machine where each state is a
+// substring of the full string. The tree is structured so that, for a string
+// of length N, there are exactly N leaves in the tree. This structure allows
+// us to quickly find repeated substrings of the input string.
+//
+// In this implementation, a "string" is a vector of unsigned integers.
+// These integers may result from hashing some data type. A suffix tree can
+// contain 1 or many strings, which can then be queried as one large string.
+//
+// The suffix tree is implemented using Ukkonen's algorithm for linear-time
+// suffix tree construction. Ukkonen's algorithm is explained in more detail
+// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
+// paper is available at
+//
+// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
//===----------------------------------------------------------------------===//
+
#ifndef LLVM_SUPPORT_SUFFIXTREE_H
#define LLVM_SUPPORT_SUFFIXTREE_H
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/Allocator.h"
-#include <vector>
+#include "llvm/Support/SuffixTreeNode.h"
namespace llvm {
-
-/// Represents an undefined index in the suffix tree.
-const unsigned EmptyIdx = -1;
-
-/// A node in a suffix tree which represents a substring or suffix.
-///
-/// Each node has either no children or at least two children, with the root
-/// being a exception in the empty tree.
-///
-/// Children are represented as a map between unsigned integers and nodes. If
-/// a node N has a child M on unsigned integer k, then the mapping represented
-/// by N is a proper prefix of the mapping represented by M. Note that this,
-/// although similar to a trie is somewhat different: each node stores a full
-/// substring of the full mapping rather than a single character state.
-///
-/// Each internal node contains a pointer to the internal node representing
-/// the same string, but with the first character chopped off. This is stored
-/// in \p Link. Each leaf node stores the start index of its respective
-/// suffix in \p SuffixIdx.
-struct SuffixTreeNode {
-
- /// The children of this node.
- ///
- /// A child existing on an unsigned integer implies that from the mapping
- /// represented by the current node, there is a way to reach another
- /// mapping by tacking that character on the end of the current string.
- llvm::DenseMap<unsigned, SuffixTreeNode *> Children;
-
- /// The start index of this node's substring in the main string.
- unsigned StartIdx = EmptyIdx;
-
- /// The end index of this node's substring in the main string.
- ///
- /// Every leaf node must have its \p EndIdx incremented at the end of every
- /// step in the construction algorithm. To avoid having to update O(N)
- /// nodes individually at the end of every step, the end index is stored
- /// as a pointer.
- unsigned *EndIdx = nullptr;
-
- /// For leaves, the start index of the suffix represented by this node.
- ///
- /// For all other nodes, this is ignored.
- unsigned SuffixIdx = EmptyIdx;
-
- /// For internal nodes, a pointer to the internal node representing
- /// the same sequence with the first character chopped off.
- ///
- /// This acts as a shortcut in Ukkonen's algorithm. One of the things that
- /// Ukkonen's algorithm does to achieve linear-time construction is
- /// keep track of which node the next insert should be at. This makes each
- /// insert O(1), and there are a total of O(N) inserts. The suffix link
- /// helps with inserting children of internal nodes.
- ///
- /// Say we add a child to an internal node with associated mapping S. The
- /// next insertion must be at the node representing S - its first character.
- /// This is given by the way that we iteratively build the tree in Ukkonen's
- /// algorithm. The main idea is to look at the suffixes of each prefix in the
- /// string, starting with the longest suffix of the prefix, and ending with
- /// the shortest. Therefore, if we keep pointers between such nodes, we can
- /// move to the next insertion point in O(1) time. If we don't, then we'd
- /// have to query from the root, which takes O(N) time. This would make the
- /// construction algorithm O(N^2) rather than O(N).
- SuffixTreeNode *Link = nullptr;
-
- /// The length of the string formed by concatenating the edge labels from the
- /// root to this node.
- unsigned ConcatLen = 0;
-
- /// Returns true if this node is a leaf.
- bool isLeaf() const { return SuffixIdx != EmptyIdx; }
-
- /// Returns true if this node is the root of its owning \p SuffixTree.
- bool isRoot() const { return StartIdx == EmptyIdx; }
-
- /// Return the number of elements in the substring associated with this node.
- size_t size() const {
-
- // Is it the root? If so, it's the empty string so return 0.
- if (isRoot())
- return 0;
-
- assert(*EndIdx != EmptyIdx && "EndIdx is undefined!");
-
- // Size = the number of elements in the string.
- // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1.
- return *EndIdx - StartIdx + 1;
- }
-
- SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link)
- : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link) {}
-
- SuffixTreeNode() = default;
-};
-
-/// A data structure for fast substring queries.
-///
-/// Suffix trees represent the suffixes of their input strings in their leaves.
-/// A suffix tree is a type of compressed trie structure where each node
-/// represents an entire substring rather than a single character. Each leaf
-/// of the tree is a suffix.
-///
-/// A suffix tree can be seen as a type of state machine where each state is a
-/// substring of the full string. The tree is structured so that, for a string
-/// of length N, there are exactly N leaves in the tree. This structure allows
-/// us to quickly find repeated substrings of the input string.
-///
-/// In this implementation, a "string" is a vector of unsigned integers.
-/// These integers may result from hashing some data type. A suffix tree can
-/// contain 1 or many strings, which can then be queried as one large string.
-///
-/// The suffix tree is implemented using Ukkonen's algorithm for linear-time
-/// suffix tree construction. Ukkonen's algorithm is explained in more detail
-/// in the paper by Esko Ukkonen "On-line construction of suffix trees. The
-/// paper is available at
-///
-/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf
class SuffixTree {
public:
/// Each element is an integer representing an instruction in the module.
- llvm::ArrayRef<unsigned> Str;
+ ArrayRef<unsigned> Str;
/// A repeated substring in the tree.
struct RepeatedSubstring {
@@ -145,39 +48,32 @@ public:
unsigned Length;
/// The start indices of each occurrence.
- std::vector<unsigned> StartIndices;
+ SmallVector<unsigned> StartIndices;
};
private:
- /// Maintains each node in the tree.
- llvm::SpecificBumpPtrAllocator<SuffixTreeNode> NodeAllocator;
+ /// Maintains internal nodes in the tree.
+ SpecificBumpPtrAllocator<SuffixTreeInternalNode> InternalNodeAllocator;
+ /// Maintains leaf nodes in the tree.
+ SpecificBumpPtrAllocator<SuffixTreeLeafNode> LeafNodeAllocator;
/// The root of the suffix tree.
///
/// The root represents the empty string. It is maintained by the
/// \p NodeAllocator like every other node in the tree.
- SuffixTreeNode *Root = nullptr;
-
- /// Maintains the end indices of the internal nodes in the tree.
- ///
- /// Each internal node is guaranteed to never have its end index change
- /// during the construction algorithm; however, leaves must be updated at
- /// every step. Therefore, we need to store leaf end indices by reference
- /// to avoid updating O(N) leaves at every step of construction. Thus,
- /// every internal node must be allocated its own end index.
- llvm::BumpPtrAllocator InternalEndIdxAllocator;
+ SuffixTreeInternalNode *Root = nullptr;
/// The end index of each leaf in the tree.
- unsigned LeafEndIdx = -1;
+ unsigned LeafEndIdx = SuffixTreeNode::EmptyIdx;
/// Helper struct which keeps track of the next insertion point in
/// Ukkonen's algorithm.
struct ActiveState {
/// The next node to insert at.
- SuffixTreeNode *Node = nullptr;
+ SuffixTreeInternalNode *Node = nullptr;
/// The index of the first character in the substring currently being added.
- unsigned Idx = EmptyIdx;
+ unsigned Idx = SuffixTreeNode::EmptyIdx;
/// The length of the substring we have to add at the current step.
unsigned Len = 0;
@@ -194,7 +90,7 @@ private:
/// \param Edge The label on the edge leaving \p Parent to this node.
///
/// \returns A pointer to the allocated leaf node.
- SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx,
+ SuffixTreeNode *insertLeaf(SuffixTreeInternalNode &Parent, unsigned StartIdx,
unsigned Edge);
/// Allocate an internal node and add it to the tree.
@@ -205,8 +101,14 @@ private:
/// \param Edge The label on the edge leaving \p Parent to this node.
///
/// \returns A pointer to the allocated internal node.
- SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx,
- unsigned EndIdx, unsigned Edge);
+ SuffixTreeInternalNode *insertInternalNode(SuffixTreeInternalNode *Parent,
+ unsigned StartIdx, unsigned EndIdx,
+ unsigned Edge);
+
+ /// Allocate the root node and add it to the tree.
+ ///
+ /// \returns A pointer to the root.
+ SuffixTreeInternalNode *insertRoot();
/// Set the suffix indices of the leaves to the start indices of their
/// respective suffixes.
@@ -232,7 +134,7 @@ public:
/// Construct a suffix tree from a sequence of unsigned integers.
///
/// \param Str The string to construct the suffix tree for.
- SuffixTree(const std::vector<unsigned> &Str);
+ SuffixTree(const ArrayRef<unsigned> &Str);
/// Iterator for finding all repeated substrings in the suffix tree.
struct RepeatedSubstringIterator {
@@ -244,7 +146,7 @@ public:
RepeatedSubstring RS;
/// The nodes left to visit.
- std::vector<SuffixTreeNode *> ToVisit;
+ SmallVector<SuffixTreeInternalNode *> InternalNodesToVisit;
/// The minimum length of a repeated substring to find.
/// Since we're outlining, we want at least two instructions in the range.
@@ -253,59 +155,7 @@ public:
const unsigned MinLength = 2;
/// Move the iterator to the next repeated substring.
- void advance() {
- // Clear the current state. If we're at the end of the range, then this
- // is the state we want to be in.
- RS = RepeatedSubstring();
- N = nullptr;
-
- // Each leaf node represents a repeat of a string.
- std::vector<SuffixTreeNode *> LeafChildren;
-
- // Continue visiting nodes until we find one which repeats more than once.
- while (!ToVisit.empty()) {
- SuffixTreeNode *Curr = ToVisit.back();
- ToVisit.pop_back();
- LeafChildren.clear();
-
- // Keep track of the length of the string associated with the node. If
- // it's too short, we'll quit.
- unsigned Length = Curr->ConcatLen;
-
- // Iterate over each child, saving internal nodes for visiting, and
- // leaf nodes in LeafChildren. Internal nodes represent individual
- // strings, which may repeat.
- for (auto &ChildPair : Curr->Children) {
- // Save all of this node's children for processing.
- if (!ChildPair.second->isLeaf())
- ToVisit.push_back(ChildPair.second);
-
- // It's not an internal node, so it must be a leaf. If we have a
- // long enough string, then save the leaf children.
- else if (Length >= MinLength)
- LeafChildren.push_back(ChildPair.second);
- }
-
- // The root never represents a repeated substring. If we're looking at
- // that, then skip it.
- if (Curr->isRoot())
- continue;
-
- // Do we have any repeated substrings?
- if (LeafChildren.size() >= 2) {
- // Yes. Update the state to reflect this, and then bail out.
- N = Curr;
- RS.Length = Length;
- for (SuffixTreeNode *Leaf : LeafChildren)
- RS.StartIndices.push_back(Leaf->SuffixIdx);
- break;
- }
- }
-
- // At this point, either NewRS is an empty RepeatedSubstring, or it was
- // set in the above loop. Similarly, N is either nullptr, or the node
- // associated with NewRS.
- }
+ void advance();
public:
/// Return the current repeated substring.
@@ -329,14 +179,14 @@ public:
return !(*this == Other);
}
- RepeatedSubstringIterator(SuffixTreeNode *N) : N(N) {
+ RepeatedSubstringIterator(SuffixTreeInternalNode *N) : N(N) {
// Do we have a non-null node?
- if (N) {
- // Yes. At the first step, we need to visit all of N's children.
- // Note: This means that we visit N last.
- ToVisit.push_back(N);
- advance();
- }
+ if (!N)
+ return;
+ // Yes. At the first step, we need to visit all of N's children.
+ // Note: This means that we visit N last.
+ InternalNodesToVisit.push_back(N);
+ advance();
}
};
diff --git a/llvm/include/llvm/Support/SuffixTreeNode.h b/llvm/include/llvm/Support/SuffixTreeNode.h
new file mode 100644
index 000000000000..7d0d1cf0c58b
--- /dev/null
+++ b/llvm/include/llvm/Support/SuffixTreeNode.h
@@ -0,0 +1,171 @@
+//===- llvm/ADT/SuffixTreeNode.h - Nodes for SuffixTrees --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines nodes for use within a SuffixTree.
+//
+// Each node has either no children or at least two children, with the root
+// being a exception in the empty tree.
+//
+// Children are represented as a map between unsigned integers and nodes. If
+// a node N has a child M on unsigned integer k, then the mapping represented
+// by N is a proper prefix of the mapping represented by M. Note that this,
+// although similar to a trie is somewhat different: each node stores a full
+// substring of the full mapping rather than a single character state.
+//
+// Each internal node contains a pointer to the internal node representing
+// the same string, but with the first character chopped off. This is stored
+// in \p Link. Each leaf node stores the start index of its respective
+// suffix in \p SuffixIdx.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SUFFIXTREE_NODE_H
+#define LLVM_SUPPORT_SUFFIXTREE_NODE_H
+#include "llvm/ADT/DenseMap.h"
+
+namespace llvm {
+
+/// A node in a suffix tree which represents a substring or suffix.
+struct SuffixTreeNode {
+public:
+ /// Represents an undefined index in the suffix tree.
+ static const unsigned EmptyIdx = -1;
+ enum class NodeKind { ST_Leaf, ST_Internal };
+
+private:
+ const NodeKind Kind;
+
+ /// The start index of this node's substring in the main string.
+ unsigned StartIdx = EmptyIdx;
+
+ /// The length of the string formed by concatenating the edge labels from
+ /// the root to this node.
+ unsigned ConcatLen = 0;
+
+public:
+ // LLVM RTTI boilerplate.
+ NodeKind getKind() const { return Kind; }
+
+ /// \return the start index of this node's substring in the entire string.
+ unsigned getStartIdx() const;
+
+ /// \returns the end index of this node.
+ virtual unsigned getEndIdx() const = 0;
+
+ /// Advance this node's StartIdx by \p Inc.
+ void incrementStartIdx(unsigned Inc);
+
+ /// Set the length of the string from the root to this node to \p Len.
+ void setConcatLen(unsigned Len);
+
+ /// \returns the length of the string from the root to this node.
+ unsigned getConcatLen() const;
+
+ SuffixTreeNode(NodeKind Kind, unsigned StartIdx)
+ : Kind(Kind), StartIdx(StartIdx) {}
+ virtual ~SuffixTreeNode() = default;
+};
+
+// A node with two or more children, or the root.
+struct SuffixTreeInternalNode : SuffixTreeNode {
+private:
+ /// The end index of this node's substring in the main string.
+ ///
+ /// Every leaf node must have its \p EndIdx incremented at the end of every
+ /// step in the construction algorithm. To avoid having to update O(N)
+ /// nodes individually at the end of every step, the end index is stored
+ /// as a pointer.
+ unsigned EndIdx = EmptyIdx;
+
+ /// A pointer to the internal node representing the same sequence with the
+ /// first character chopped off.
+ ///
+ /// This acts as a shortcut in Ukkonen's algorithm. One of the things that
+ /// Ukkonen's algorithm does to achieve linear-time construction is
+ /// keep track of which node the next insert should be at. This makes each
+ /// insert O(1), and there are a total of O(N) inserts. The suffix link
+ /// helps with inserting children of internal nodes.
+ ///
+ /// Say we add a child to an internal node with associated mapping S. The
+ /// next insertion must be at the node representing S - its first character.
+ /// This is given by the way that we iteratively build the tree in Ukkonen's
+ /// algorithm. The main idea is to look at the suffixes of each prefix in the
+ /// string, starting with the longest suffix of the prefix, and ending with
+ /// the shortest. Therefore, if we keep pointers between such nodes, we can
+ /// move to the next insertion point in O(1) time. If we don't, then we'd
+ /// have to query from the root, which takes O(N) time. This would make the
+ /// construction algorithm O(N^2) rather than O(N).
+ SuffixTreeInternalNode *Link = nullptr;
+
+public:
+ // LLVM RTTI boilerplate.
+ static bool classof(const SuffixTreeNode *N) {
+ return N->getKind() == NodeKind::ST_Internal;
+ }
+
+ /// \returns true if this node is the root of its owning \p SuffixTree.
+ bool isRoot() const;
+
+ /// \returns the end index of this node's substring in the entire string.
+ unsigned getEndIdx() const override;
+
+ /// Sets \p Link to \p L. Assumes \p L is not null.
+ void setLink(SuffixTreeInternalNode *L);
+
+ /// \returns the pointer to the Link node.
+ SuffixTreeInternalNode *getLink() const;
+
+ /// The children of this node.
+ ///
+ /// A child existing on an unsigned integer implies that from the mapping
+ /// represented by the current node, there is a way to reach another
+ /// mapping by tacking that character on the end of the current string.
+ DenseMap<unsigned, SuffixTreeNode *> Children;
+
+ SuffixTreeInternalNode(unsigned StartIdx, unsigned EndIdx,
+ SuffixTreeInternalNode *Link)
+ : SuffixTreeNode(NodeKind::ST_Internal, StartIdx), EndIdx(EndIdx),
+ Link(Link) {}
+
+ virtual ~SuffixTreeInternalNode() = default;
+};
+
+// A node representing a suffix.
+struct SuffixTreeLeafNode : SuffixTreeNode {
+private:
+ /// The start index of the suffix represented by this leaf.
+ unsigned SuffixIdx = EmptyIdx;
+
+ /// The end index of this node's substring in the main string.
+ ///
+ /// Every leaf node must have its \p EndIdx incremented at the end of every
+ /// step in the construction algorithm. To avoid having to update O(N)
+ /// nodes individually at the end of every step, the end index is stored
+ /// as a pointer.
+ unsigned *EndIdx = nullptr;
+
+public:
+ // LLVM RTTI boilerplate.
+ static bool classof(const SuffixTreeNode *N) {
+ return N->getKind() == NodeKind::ST_Leaf;
+ }
+
+ /// \returns the end index of this node's substring in the entire string.
+ unsigned getEndIdx() const override;
+
+ /// \returns the start index of the suffix represented by this leaf.
+ unsigned getSuffixIdx() const;
+
+ /// Sets the start index of the suffix represented by this leaf to \p Idx.
+ void setSuffixIdx(unsigned Idx);
+ SuffixTreeLeafNode(unsigned StartIdx, unsigned *EndIdx)
+ : SuffixTreeNode(NodeKind::ST_Leaf, StartIdx), EndIdx(EndIdx) {}
+
+ virtual ~SuffixTreeLeafNode() = default;
+};
+} // namespace llvm
+#endif // LLVM_SUPPORT_SUFFIXTREE_NODE_H \ No newline at end of file
diff --git a/llvm/include/llvm/Support/SwapByteOrder.h b/llvm/include/llvm/Support/SwapByteOrder.h
index 9dd08665bd23..1bbc2e2f944e 100644
--- a/llvm/include/llvm/Support/SwapByteOrder.h
+++ b/llvm/include/llvm/Support/SwapByteOrder.h
@@ -46,16 +46,6 @@
namespace llvm {
-/// ByteSwap_16 - This function returns a byte-swapped representation of
-/// the 16-bit argument.
-inline uint16_t ByteSwap_16(uint16_t value) { return llvm::byteswap(value); }
-
-/// This function returns a byte-swapped representation of the 32-bit argument.
-inline uint32_t ByteSwap_32(uint32_t value) { return llvm::byteswap(value); }
-
-/// This function returns a byte-swapped representation of the 64-bit argument.
-inline uint64_t ByteSwap_64(uint64_t value) { return llvm::byteswap(value); }
-
namespace sys {
#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN
@@ -103,7 +93,7 @@ inline double getSwappedBytes(double C) {
}
template <typename T>
-inline std::enable_if_t<std::is_enum<T>::value, T> getSwappedBytes(T C) {
+inline std::enable_if_t<std::is_enum_v<T>, T> getSwappedBytes(T C) {
return static_cast<T>(
llvm::byteswap(static_cast<std::underlying_type_t<T>>(C)));
}
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index d3fe1eec38d7..186bea75ae96 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -290,6 +290,10 @@ HANDLE_TARGET_OPCODE(G_FRAME_INDEX)
/// Generic reference to global value.
HANDLE_TARGET_OPCODE(G_GLOBAL_VALUE)
+/// Generic instruction to materialize the address of an object in the constant
+/// pool.
+HANDLE_TARGET_OPCODE(G_CONSTANT_POOL)
+
/// Generic instruction to extract blocks of bits from the register given
/// (typically a sub-register COPY after instruction selection).
HANDLE_TARGET_OPCODE(G_EXTRACT)
@@ -328,6 +332,9 @@ HANDLE_TARGET_OPCODE(G_BITCAST)
/// Generic freeze.
HANDLE_TARGET_OPCODE(G_FREEZE)
+/// Constant folding barrier.
+HANDLE_TARGET_OPCODE(G_CONSTANT_FOLD_BARRIER)
+
// INTRINSIC fptrunc_round intrinsic.
HANDLE_TARGET_OPCODE(G_INTRINSIC_FPTRUNC_ROUND)
@@ -609,6 +616,12 @@ HANDLE_TARGET_OPCODE(G_FLOG2)
/// Floating point base-10 logarithm of a value.
HANDLE_TARGET_OPCODE(G_FLOG10)
+/// Floating point x * 2^n
+HANDLE_TARGET_OPCODE(G_FLDEXP)
+
+/// Floating point extract fraction and exponent.
+HANDLE_TARGET_OPCODE(G_FFREXP)
+
/// Generic FP negation.
HANDLE_TARGET_OPCODE(G_FNEG)
@@ -758,6 +771,7 @@ HANDLE_TARGET_OPCODE(G_STRICT_FDIV)
HANDLE_TARGET_OPCODE(G_STRICT_FREM)
HANDLE_TARGET_OPCODE(G_STRICT_FMA)
HANDLE_TARGET_OPCODE(G_STRICT_FSQRT)
+HANDLE_TARGET_OPCODE(G_STRICT_FLDEXP)
/// read_register intrinsic
HANDLE_TARGET_OPCODE(G_READ_REGISTER)
diff --git a/llvm/include/llvm/Support/TaskQueue.h b/llvm/include/llvm/Support/TaskQueue.h
deleted file mode 100644
index 1b44a163568a..000000000000
--- a/llvm/include/llvm/Support/TaskQueue.h
+++ /dev/null
@@ -1,138 +0,0 @@
-//===-- llvm/Support/TaskQueue.h - A TaskQueue implementation ---*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a crude C++11 based task queue.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_TASKQUEUE_H
-#define LLVM_SUPPORT_TASKQUEUE_H
-
-#include "llvm/Config/llvm-config.h"
-#include "llvm/Support/ThreadPool.h"
-#include "llvm/Support/thread.h"
-
-#include <atomic>
-#include <cassert>
-#include <condition_variable>
-#include <deque>
-#include <functional>
-#include <future>
-#include <memory>
-#include <mutex>
-#include <utility>
-
-namespace llvm {
-/// TaskQueue executes serialized work on a user-defined Thread Pool. It
-/// guarantees that if task B is enqueued after task A, task B begins after
-/// task A completes and there is no overlap between the two.
-class TaskQueue {
- // Because we don't have init capture to use move-only local variables that
- // are captured into a lambda, we create the promise inside an explicit
- // callable struct. We want to do as much of the wrapping in the
- // type-specialized domain (before type erasure) and then erase this into a
- // std::function.
- template <typename Callable> struct Task {
- using ResultTy = std::invoke_result_t<Callable>;
- explicit Task(Callable C, TaskQueue &Parent)
- : C(std::move(C)), P(std::make_shared<std::promise<ResultTy>>()),
- Parent(&Parent) {}
-
- template<typename T>
- void invokeCallbackAndSetPromise(T*) {
- P->set_value(C());
- }
-
- void invokeCallbackAndSetPromise(void*) {
- C();
- P->set_value();
- }
-
- void operator()() noexcept {
- ResultTy *Dummy = nullptr;
- invokeCallbackAndSetPromise(Dummy);
- Parent->completeTask();
- }
-
- Callable C;
- std::shared_ptr<std::promise<ResultTy>> P;
- TaskQueue *Parent;
- };
-
-public:
- /// Construct a task queue with no work.
- TaskQueue(ThreadPool &Scheduler) : Scheduler(Scheduler) { (void)Scheduler; }
-
- /// Blocking destructor: the queue will wait for all work to complete.
- ~TaskQueue() {
- Scheduler.wait();
- assert(Tasks.empty());
- }
-
- /// Asynchronous submission of a task to the queue. The returned future can be
- /// used to wait for the task (and all previous tasks that have not yet
- /// completed) to finish.
- template <typename Callable>
- std::future<std::invoke_result_t<Callable>> async(Callable &&C) {
-#if !LLVM_ENABLE_THREADS
- static_assert(false,
- "TaskQueue requires building with LLVM_ENABLE_THREADS!");
-#endif
- Task<Callable> T{std::move(C), *this};
- using ResultTy = std::invoke_result_t<Callable>;
- std::future<ResultTy> F = T.P->get_future();
- {
- std::lock_guard<std::mutex> Lock(QueueLock);
- // If there's already a task in flight, just queue this one up. If
- // there is not a task in flight, bypass the queue and schedule this
- // task immediately.
- if (IsTaskInFlight)
- Tasks.push_back(std::move(T));
- else {
- Scheduler.async(std::move(T));
- IsTaskInFlight = true;
- }
- }
- return F;
- }
-
-private:
- void completeTask() {
- // We just completed a task. If there are no more tasks in the queue,
- // update IsTaskInFlight to false and stop doing work. Otherwise
- // schedule the next task (while not holding the lock).
- std::function<void()> Continuation;
- {
- std::lock_guard<std::mutex> Lock(QueueLock);
- if (Tasks.empty()) {
- IsTaskInFlight = false;
- return;
- }
-
- Continuation = std::move(Tasks.front());
- Tasks.pop_front();
- }
- Scheduler.async(std::move(Continuation));
- }
-
- /// The thread pool on which to run the work.
- ThreadPool &Scheduler;
-
- /// State which indicates whether the queue currently is currently processing
- /// any work.
- bool IsTaskInFlight = false;
-
- /// Mutex for synchronizing access to the Tasks array.
- std::mutex QueueLock;
-
- /// Tasks waiting for execution in the queue.
- std::deque<std::function<void()>> Tasks;
-};
-} // namespace llvm
-
-#endif // LLVM_SUPPORT_TASKQUEUE_H
diff --git a/llvm/include/llvm/Support/Timer.h b/llvm/include/llvm/Support/Timer.h
index d72af3541af0..1a32832b6c65 100644
--- a/llvm/include/llvm/Support/Timer.h
+++ b/llvm/include/llvm/Support/Timer.h
@@ -23,15 +23,13 @@ class TimerGroup;
class raw_ostream;
class TimeRecord {
- double WallTime; ///< Wall clock time elapsed in seconds.
- double UserTime; ///< User time elapsed.
- double SystemTime; ///< System time elapsed.
- ssize_t MemUsed; ///< Memory allocated (in bytes).
- uint64_t InstructionsExecuted; ///< Number of instructions executed
+ double WallTime = 0.0; ///< Wall clock time elapsed in seconds.
+ double UserTime = 0.0; ///< User time elapsed.
+ double SystemTime = 0.0; ///< System time elapsed.
+ ssize_t MemUsed = 0; ///< Memory allocated (in bytes).
+ uint64_t InstructionsExecuted = 0; ///< Number of instructions executed
public:
- TimeRecord()
- : WallTime(0), UserTime(0), SystemTime(0), MemUsed(0),
- InstructionsExecuted(0) {}
+ TimeRecord() = default;
/// Get the current time and memory usage. If Start is true we get the memory
/// usage before the time, otherwise we get time before memory usage. This
diff --git a/llvm/include/llvm/Support/TrailingObjects.h b/llvm/include/llvm/Support/TrailingObjects.h
index f9e711a5dc17..f8a546b5c85a 100644
--- a/llvm/include/llvm/Support/TrailingObjects.h
+++ b/llvm/include/llvm/Support/TrailingObjects.h
@@ -310,7 +310,7 @@ public:
/// that it's clear what the counts are counting in callers.
template <typename... Tys>
static constexpr std::enable_if_t<
- std::is_same<Foo<TrailingTys...>, Foo<Tys...>>::value, size_t>
+ std::is_same_v<Foo<TrailingTys...>, Foo<Tys...>>, size_t>
additionalSizeToAlloc(typename trailing_objects_internal::ExtractSecondType<
TrailingTys, size_t>::type... Counts) {
return ParentType::additionalSizeToAllocImpl(0, Counts...);
@@ -322,7 +322,7 @@ public:
/// object.
template <typename... Tys>
static constexpr std::enable_if_t<
- std::is_same<Foo<TrailingTys...>, Foo<Tys...>>::value, size_t>
+ std::is_same_v<Foo<TrailingTys...>, Foo<Tys...>>, size_t>
totalSizeToAlloc(typename trailing_objects_internal::ExtractSecondType<
TrailingTys, size_t>::type... Counts) {
return sizeof(BaseTy) + ParentType::additionalSizeToAllocImpl(0, Counts...);
diff --git a/llvm/include/llvm/Support/TrigramIndex.h b/llvm/include/llvm/Support/TrigramIndex.h
deleted file mode 100644
index 0bfac498393f..000000000000
--- a/llvm/include/llvm/Support/TrigramIndex.h
+++ /dev/null
@@ -1,67 +0,0 @@
-//===-- TrigramIndex.h - a heuristic for SpecialCaseList --------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//===----------------------------------------------------------------------===//
-//
-// TrigramIndex implements a heuristic for SpecialCaseList that allows to
-// filter out ~99% incoming queries when all regular expressions in the
-// SpecialCaseList are simple wildcards with '*' and '.'. If rules are more
-// complicated, the check is defeated and it will always pass the queries to a
-// full regex.
-//
-// The basic idea is that in order for a wildcard to match a query, the query
-// needs to have all trigrams which occur in the wildcard. We create a trigram
-// index (trigram -> list of rules with it) and then count trigrams in the query
-// for each rule. If the count for one of the rules reaches the expected value,
-// the check passes the query to a regex. If none of the rules got enough
-// trigrams, the check tells that the query is definitely not matched by any
-// of the rules, and no regex matching is needed.
-// A similar idea was used in Google Code Search as described in the blog post:
-// https://swtch.com/~rsc/regexp/regexp4.html
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_TRIGRAMINDEX_H
-#define LLVM_SUPPORT_TRIGRAMINDEX_H
-
-#include "llvm/ADT/SmallVector.h"
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-namespace llvm {
-class StringRef;
-
-class TrigramIndex {
- public:
- /// Inserts a new Regex into the index.
- void insert(const std::string &Regex);
-
- /// Returns true, if special case list definitely does not have a line
- /// that matches the query. Returns false, if it's not sure.
- bool isDefinitelyOut(StringRef Query) const;
-
- /// Returned true, iff the heuristic is defeated and not useful.
- /// In this case isDefinitelyOut always returns false.
- bool isDefeated() { return Defeated; }
- private:
- // If true, the rules are too complicated for the check to work, and full
- // regex matching is needed for every rule.
- bool Defeated = false;
- // The minimum number of trigrams which should match for a rule to have a
- // chance to match the query. The number of elements equals the number of
- // regex rules in the SpecialCaseList.
- std::vector<unsigned> Counts;
- // Index holds a list of rules indices for each trigram. The same indices
- // are used in Counts to store per-rule limits.
- // If a trigram is too common (>4 rules with it), we stop tracking it,
- // which increases the probability for a need to match using regex, but
- // decreases the costs in the regular case.
- std::unordered_map<unsigned, SmallVector<size_t, 4>> Index{256};
-};
-
-} // namespace llvm
-
-#endif // LLVM_SUPPORT_TRIGRAMINDEX_H
diff --git a/llvm/include/llvm/Support/TypeName.h b/llvm/include/llvm/Support/TypeName.h
index 236490a25011..95f20f7dfde7 100644
--- a/llvm/include/llvm/Support/TypeName.h
+++ b/llvm/include/llvm/Support/TypeName.h
@@ -59,6 +59,6 @@ inline StringRef getTypeName() {
#endif
}
-}
+} // namespace llvm
#endif
diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h
index 0abd4b3db4ca..9683c82b2278 100644
--- a/llvm/include/llvm/Support/TypeSize.h
+++ b/llvm/include/llvm/Support/TypeSize.h
@@ -133,7 +133,7 @@ protected:
}
template <typename U = ScalarTy>
- friend constexpr std::enable_if_t<std::is_signed<U>::value, LeafTy>
+ friend constexpr std::enable_if_t<std::is_signed_v<U>, LeafTy>
operator-(const LeafTy &LHS) {
LeafTy Copy = LHS;
return Copy *= -1;
@@ -322,8 +322,8 @@ public:
static constexpr TypeSize getFixed(ScalarTy ExactSize) {
return TypeSize(ExactSize, false);
}
- static constexpr TypeSize getScalable(ScalarTy MinimunSize) {
- return TypeSize(MinimunSize, true);
+ static constexpr TypeSize getScalable(ScalarTy MinimumSize) {
+ return TypeSize(MinimumSize, true);
}
static constexpr TypeSize get(ScalarTy Quantity, bool Scalable) {
return TypeSize(Quantity, Scalable);
diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index 3ef17a7de379..697343c7e763 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -872,6 +872,9 @@ public:
/// Represents the result of a path lookup into the RedirectingFileSystem.
struct LookupResult {
+ /// Chain of parent directory entries for \c E.
+ llvm::SmallVector<Entry *, 32> Parents;
+
/// The entry the looked-up path corresponds to.
Entry *E;
@@ -895,6 +898,10 @@ public:
return FE->getExternalContentsPath();
return std::nullopt;
}
+
+ /// Get the (canonical) path of the found entry. This uses the as-written
+ /// path components from the VFS specification.
+ void getPath(llvm::SmallVectorImpl<char> &Path) const;
};
private:
@@ -984,9 +991,10 @@ private:
/// into the contents of \p From if it is a directory. Returns a LookupResult
/// giving the matched entry and, if that entry is a FileEntry or
/// DirectoryRemapEntry, the path it redirects to in the external file system.
- ErrorOr<LookupResult> lookupPathImpl(llvm::sys::path::const_iterator Start,
- llvm::sys::path::const_iterator End,
- Entry *From) const;
+ ErrorOr<LookupResult>
+ lookupPathImpl(llvm::sys::path::const_iterator Start,
+ llvm::sys::path::const_iterator End, Entry *From,
+ llvm::SmallVectorImpl<Entry *> &Entries) const;
/// Get the status for a path with the provided \c LookupResult.
ErrorOr<Status> status(const Twine &CanonicalPath, const Twine &OriginalPath,
diff --git a/llvm/include/llvm/Support/WithColor.h b/llvm/include/llvm/Support/WithColor.h
index b249f34da1fa..205400592847 100644
--- a/llvm/include/llvm/Support/WithColor.h
+++ b/llvm/include/llvm/Support/WithColor.h
@@ -60,7 +60,7 @@ public:
/// @param Mode Enable, disable or compute whether to use colors.
WithColor(raw_ostream &OS, HighlightColor S,
ColorMode Mode = ColorMode::Auto);
- /// To be used like this: WithColor(OS, raw_ostream::Black) << "text";
+ /// To be used like this: WithColor(OS, raw_ostream::BLACK) << "text";
/// @param OS The output stream
/// @param Color ANSI color to use, the special SAVEDCOLOR can be used to
/// change only the bold attribute, and keep colors untouched
diff --git a/llvm/include/llvm/Support/X86FoldTablesUtils.h b/llvm/include/llvm/Support/X86FoldTablesUtils.h
new file mode 100644
index 000000000000..bddff7068b82
--- /dev/null
+++ b/llvm/include/llvm/Support/X86FoldTablesUtils.h
@@ -0,0 +1,58 @@
+//===-- X86FoldTablesUtils.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_X86FOLDTABLESUTILS_H
+#define LLVM_SUPPORT_X86FOLDTABLESUTILS_H
+
+namespace llvm {
+enum {
+ // Select which memory operand is being unfolded.
+ // (stored in bits 0 - 2)
+ TB_INDEX_0 = 0,
+ TB_INDEX_1 = 1,
+ TB_INDEX_2 = 2,
+ TB_INDEX_3 = 3,
+ TB_INDEX_4 = 4,
+ TB_INDEX_MASK = 0x7,
+
+ // Do not insert the reverse map (MemOp -> RegOp) into the table.
+ // This may be needed because there is a many -> one mapping.
+ TB_NO_REVERSE = 1 << 3,
+
+ // Do not insert the forward map (RegOp -> MemOp) into the table.
+ // This is needed for Native Client, which prohibits branch
+ // instructions from using a memory operand.
+ TB_NO_FORWARD = 1 << 4,
+
+ TB_FOLDED_LOAD = 1 << 5,
+ TB_FOLDED_STORE = 1 << 6,
+ TB_FOLDED_BCAST = 1 << 7,
+
+ // Minimum alignment required for load/store.
+ // Used for RegOp->MemOp conversion. Encoded as Log2(Align)
+ // (stored in bits 9 - 11)
+ TB_ALIGN_SHIFT = 8,
+ TB_ALIGN_1 = 0 << TB_ALIGN_SHIFT,
+ TB_ALIGN_16 = 4 << TB_ALIGN_SHIFT,
+ TB_ALIGN_32 = 5 << TB_ALIGN_SHIFT,
+ TB_ALIGN_64 = 6 << TB_ALIGN_SHIFT,
+ TB_ALIGN_MASK = 0x7 << TB_ALIGN_SHIFT,
+
+ // Broadcast type.
+ // (stored in bits 12 - 13)
+ TB_BCAST_TYPE_SHIFT = TB_ALIGN_SHIFT + 3,
+ TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_MASK = 0x3 << TB_BCAST_TYPE_SHIFT,
+
+ // Unused bits 14-15
+};
+} // namespace llvm
+#endif // LLVM_SUPPORT_X86FOLDTABLESUTILS_H
diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def
deleted file mode 100644
index 416d583c3556..000000000000
--- a/llvm/include/llvm/Support/X86TargetParser.def
+++ /dev/null
@@ -1,15 +0,0 @@
-//===-- llvm/Support/X86TargetParser.def ------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This header is deprecated in favour of
-/// `llvm/TargetParser/X86TargetParser.def`.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/TargetParser/X86TargetParser.def"
diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h
deleted file mode 100644
index 351fb89d4601..000000000000
--- a/llvm/include/llvm/Support/X86TargetParser.h
+++ /dev/null
@@ -1,15 +0,0 @@
-//===-- llvm/Support/X86TargetParser.h --------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This header is deprecated in favour of
-/// `llvm/TargetParser/X86TargetParser.h`.
-///
-//===----------------------------------------------------------------------===//
-
-#include "llvm/TargetParser/X86TargetParser.h"
diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index ef74f38671b5..3ed29821fa8f 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -506,9 +506,7 @@ struct has_CustomMappingTraits
// has_FlowTraits<int> will cause an error with some compilers because
// it subclasses int. Using this wrapper only instantiates the
// real has_FlowTraits only if the template type is a class.
-template <typename T, bool Enabled = std::is_class<T>::value>
-class has_FlowTraits
-{
+template <typename T, bool Enabled = std::is_class_v<T>> class has_FlowTraits {
public:
static const bool value = false;
};
@@ -2011,8 +2009,7 @@ struct SequenceTraits<
// Sequences of fundamental types use flow formatting.
template <typename T>
-struct SequenceElementTraits<T,
- std::enable_if_t<std::is_fundamental<T>::value>> {
+struct SequenceElementTraits<T, std::enable_if_t<std::is_fundamental_v<T>>> {
static const bool flow = true;
};
diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h
index 7c42f355fd43..1e01eb9ea19c 100644
--- a/llvm/include/llvm/Support/raw_ostream.h
+++ b/llvm/include/llvm/Support/raw_ostream.h
@@ -414,8 +414,8 @@ private:
/// Call the appropriate insertion operator, given an rvalue reference to a
/// raw_ostream object and return a stream of the same type as the argument.
template <typename OStream, typename T>
-std::enable_if_t<!std::is_reference<OStream>::value &&
- std::is_base_of<raw_ostream, OStream>::value,
+std::enable_if_t<!std::is_reference_v<OStream> &&
+ std::is_base_of_v<raw_ostream, OStream>,
OStream &&>
operator<<(OStream &&OS, const T &Value) {
OS << Value;
diff --git a/llvm/include/llvm/Support/type_traits.h b/llvm/include/llvm/Support/type_traits.h
index a6046de87d1e..86f07c19477d 100644
--- a/llvm/include/llvm/Support/type_traits.h
+++ b/llvm/include/llvm/Support/type_traits.h
@@ -32,11 +32,11 @@ template <typename T> class is_integral_or_enum {
public:
static const bool value =
- !std::is_class<UnderlyingT>::value && // Filter conversion operators.
- !std::is_pointer<UnderlyingT>::value &&
- !std::is_floating_point<UnderlyingT>::value &&
- (std::is_enum<UnderlyingT>::value ||
- std::is_convertible<UnderlyingT, unsigned long long>::value);
+ !std::is_class_v<UnderlyingT> && // Filter conversion operators.
+ !std::is_pointer_v<UnderlyingT> &&
+ !std::is_floating_point_v<UnderlyingT> &&
+ (std::is_enum_v<UnderlyingT> ||
+ std::is_convertible_v<UnderlyingT, unsigned long long>);
};
/// If T is a pointer, just return it. If it is not, return T&.
@@ -45,7 +45,7 @@ struct add_lvalue_reference_if_not_pointer { using type = T &; };
template <typename T>
struct add_lvalue_reference_if_not_pointer<
- T, std::enable_if_t<std::is_pointer<T>::value>> {
+ T, std::enable_if_t<std::is_pointer_v<T>>> {
using type = T;
};
@@ -55,7 +55,7 @@ template<typename T, typename Enable = void>
struct add_const_past_pointer { using type = const T; };
template <typename T>
-struct add_const_past_pointer<T, std::enable_if_t<std::is_pointer<T>::value>> {
+struct add_const_past_pointer<T, std::enable_if_t<std::is_pointer_v<T>>> {
using type = const std::remove_pointer_t<T> *;
};
@@ -64,8 +64,7 @@ struct const_pointer_or_const_ref {
using type = const T &;
};
template <typename T>
-struct const_pointer_or_const_ref<T,
- std::enable_if_t<std::is_pointer<T>::value>> {
+struct const_pointer_or_const_ref<T, std::enable_if_t<std::is_pointer_v<T>>> {
using type = typename add_const_past_pointer<T>::type;
};
diff --git a/llvm/include/llvm/Support/xxhash.h b/llvm/include/llvm/Support/xxhash.h
index 6fd67ff9ce1c..0cef3a54e50d 100644
--- a/llvm/include/llvm/Support/xxhash.h
+++ b/llvm/include/llvm/Support/xxhash.h
@@ -44,6 +44,11 @@
namespace llvm {
uint64_t xxHash64(llvm::StringRef Data);
uint64_t xxHash64(llvm::ArrayRef<uint8_t> Data);
+
+uint64_t xxh3_64bits(ArrayRef<uint8_t> data);
+inline uint64_t xxh3_64bits(StringRef data) {
+ return xxh3_64bits(ArrayRef(data.bytes_begin(), data.size()));
+}
}
#endif
diff --git a/llvm/include/llvm/TableGen/DirectiveEmitter.h b/llvm/include/llvm/TableGen/DirectiveEmitter.h
index e85c13f4b7cc..4bca4b13d729 100644
--- a/llvm/include/llvm/TableGen/DirectiveEmitter.h
+++ b/llvm/include/llvm/TableGen/DirectiveEmitter.h
@@ -1,8 +1,13 @@
#ifndef LLVM_TABLEGEN_DIRECTIVEEMITTER_H
#define LLVM_TABLEGEN_DIRECTIVEEMITTER_H
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/TableGen/Record.h"
+#include <algorithm>
+#include <string>
+#include <vector>
namespace llvm {
diff --git a/llvm/include/llvm/TableGen/Error.h b/llvm/include/llvm/TableGen/Error.h
index da0132b10f4f..2e639224c9c0 100644
--- a/llvm/include/llvm/TableGen/Error.h
+++ b/llvm/include/llvm/TableGen/Error.h
@@ -47,6 +47,6 @@ void CheckAssert(SMLoc Loc, Init *Condition, Init *Message);
extern SourceMgr SrcMgr;
extern unsigned ErrorsPrinted;
-} // end namespace "llvm"
+} // end namespace llvm
#endif
diff --git a/llvm/include/llvm/TableGen/Main.h b/llvm/include/llvm/TableGen/Main.h
index 4e05da36168f..4639ec756e9b 100644
--- a/llvm/include/llvm/TableGen/Main.h
+++ b/llvm/include/llvm/TableGen/Main.h
@@ -13,6 +13,8 @@
#ifndef LLVM_TABLEGEN_MAIN_H
#define LLVM_TABLEGEN_MAIN_H
+#include <functional>
+
namespace llvm {
class raw_ostream;
@@ -22,7 +24,8 @@ class RecordKeeper;
/// Returns true on error, false otherwise.
using TableGenMainFn = bool (raw_ostream &OS, RecordKeeper &Records);
-int TableGenMain(const char *argv0, TableGenMainFn *MainFn);
+int TableGenMain(const char *argv0,
+ std::function<TableGenMainFn> MainFn = nullptr);
} // end namespace llvm
diff --git a/llvm/include/llvm/TableGen/Record.h b/llvm/include/llvm/TableGen/Record.h
index 76c555d25ebf..06e4abb27e59 100644
--- a/llvm/include/llvm/TableGen/Record.h
+++ b/llvm/include/llvm/TableGen/Record.h
@@ -36,6 +36,7 @@
#include <optional>
#include <string>
#include <utility>
+#include <variant>
#include <vector>
namespace llvm {
@@ -142,8 +143,6 @@ public:
std::string getAsString() const override;
bool typeIsConvertibleTo(const RecTy *RHS) const override;
-
- bool typeIsA(const RecTy *RHS) const override;
};
/// 'int' - Represent an integer value of no particular size
@@ -316,11 +315,11 @@ protected:
IK_AnonymousNameInit,
IK_StringInit,
IK_VarInit,
- IK_VarListElementInit,
IK_VarBitInit,
IK_VarDefInit,
IK_LastTypedInit,
- IK_UnsetInit
+ IK_UnsetInit,
+ IK_ArgumentInit,
};
private:
@@ -386,14 +385,6 @@ public:
return nullptr;
}
- /// This function is used to implement the list slice
- /// selection operator. Given a value, it selects the specified list
- /// elements, returning them as a new \p Init of type \p list. If it
- /// is not legal to use the slice operator, null is returned.
- virtual Init *convertInitListSlice(ArrayRef<unsigned> Elements) const {
- return nullptr;
- }
-
/// This function is used to implement the FieldInit class.
/// Implementors of this method should return the type of the named
/// field if they are of type record.
@@ -445,7 +436,6 @@ public:
Init *convertInitializerTo(RecTy *Ty) const override;
Init *convertInitializerBitRange(ArrayRef<unsigned> Bits) const override;
- Init *convertInitListSlice(ArrayRef<unsigned> Elements) const override;
/// This method is used to implement the FieldInit class.
/// Implementors of this method should return the type of the named field if
@@ -492,6 +482,68 @@ public:
std::string getAsString() const override { return "?"; }
};
+// Represent an argument.
+using ArgAuxType = std::variant<unsigned, Init *>;
+class ArgumentInit : public Init, public FoldingSetNode {
+public:
+ enum Kind {
+ Positional,
+ Named,
+ };
+
+private:
+ Init *Value;
+ ArgAuxType Aux;
+
+protected:
+ explicit ArgumentInit(Init *Value, ArgAuxType Aux)
+ : Init(IK_ArgumentInit), Value(Value), Aux(Aux) {}
+
+public:
+ ArgumentInit(const ArgumentInit &) = delete;
+ ArgumentInit &operator=(const ArgumentInit &) = delete;
+
+ static bool classof(const Init *I) { return I->getKind() == IK_ArgumentInit; }
+
+ RecordKeeper &getRecordKeeper() const { return Value->getRecordKeeper(); }
+
+ static ArgumentInit *get(Init *Value, ArgAuxType Aux);
+
+ bool isPositional() const { return Aux.index() == Positional; }
+ bool isNamed() const { return Aux.index() == Named; }
+
+ Init *getValue() const { return Value; }
+ unsigned getIndex() const {
+ assert(isPositional() && "Should be positional!");
+ return std::get<Positional>(Aux);
+ }
+ Init *getName() const {
+ assert(isNamed() && "Should be named!");
+ return std::get<Named>(Aux);
+ }
+ ArgumentInit *cloneWithValue(Init *Value) const { return get(Value, Aux); }
+
+ void Profile(FoldingSetNodeID &ID) const;
+
+ Init *resolveReferences(Resolver &R) const override;
+ std::string getAsString() const override {
+ if (isPositional())
+ return utostr(getIndex()) + ": " + Value->getAsString();
+ if (isNamed())
+ return getName()->getAsString() + ": " + Value->getAsString();
+ llvm_unreachable("Unsupported argument type!");
+ return "";
+ }
+
+ bool isComplete() const override { return false; }
+ bool isConcrete() const override { return false; }
+ Init *getBit(unsigned Bit) const override { return Value->getBit(Bit); }
+ Init *getCastTo(RecTy *Ty) const override { return Value->getCastTo(Ty); }
+ Init *convertInitializerTo(RecTy *Ty) const override {
+ return Value->convertInitializerTo(Ty);
+ }
+};
+
/// 'true'/'false' - Represent a concrete initializer for a bit.
class BitInit final : public TypedInit {
friend detail::RecordKeeperImpl;
@@ -726,8 +778,6 @@ public:
Record *getElementAsRecord(unsigned i) const;
- Init *convertInitListSlice(ArrayRef<unsigned> Elements) const override;
-
Init *convertInitializerTo(RecTy *Ty) const override;
/// This method is used by classes that refer to other
@@ -785,7 +835,18 @@ public:
///
class UnOpInit : public OpInit, public FoldingSetNode {
public:
- enum UnaryOp : uint8_t { CAST, NOT, HEAD, TAIL, SIZE, EMPTY, GETDAGOP, LOG2 };
+ enum UnaryOp : uint8_t {
+ TOLOWER,
+ TOUPPER,
+ CAST,
+ NOT,
+ HEAD,
+ TAIL,
+ SIZE,
+ EMPTY,
+ GETDAGOP,
+ LOG2
+ };
private:
Init *LHS;
@@ -848,6 +909,10 @@ public:
LISTCONCAT,
LISTSPLAT,
LISTREMOVE,
+ LISTELEM,
+ LISTSLICE,
+ RANGE,
+ RANGEC,
STRCONCAT,
INTERLEAVE,
CONCAT,
@@ -857,7 +922,9 @@ public:
LT,
GE,
GT,
- SETDAGOP
+ GETDAGARG,
+ GETDAGNAME,
+ SETDAGOP,
};
private:
@@ -915,7 +982,17 @@ public:
/// !op (X, Y, Z) - Combine two inits.
class TernOpInit : public OpInit, public FoldingSetNode {
public:
- enum TernaryOp : uint8_t { SUBST, FOREACH, FILTER, IF, DAG, SUBSTR, FIND };
+ enum TernaryOp : uint8_t {
+ SUBST,
+ FOREACH,
+ FILTER,
+ IF,
+ DAG,
+ SUBSTR,
+ FIND,
+ SETDAGARG,
+ SETDAGNAME,
+ };
private:
Init *LHS, *MHS, *RHS;
@@ -1229,39 +1306,6 @@ public:
}
};
-/// List[4] - Represent access to one element of a var or
-/// field.
-class VarListElementInit : public TypedInit {
- TypedInit *TI;
- unsigned Element;
-
- VarListElementInit(TypedInit *T, unsigned E)
- : TypedInit(IK_VarListElementInit,
- cast<ListRecTy>(T->getType())->getElementType()),
- TI(T), Element(E) {
- assert(T->getType() && isa<ListRecTy>(T->getType()) &&
- "Illegal VarBitInit expression!");
- }
-
-public:
- VarListElementInit(const VarListElementInit &) = delete;
- VarListElementInit &operator=(const VarListElementInit &) = delete;
-
- static bool classof(const Init *I) {
- return I->getKind() == IK_VarListElementInit;
- }
-
- static VarListElementInit *get(TypedInit *T, unsigned E);
-
- TypedInit *getVariable() const { return TI; }
- unsigned getElementNum() const { return Element; }
-
- std::string getAsString() const override;
- Init *resolveReferences(Resolver &R) const override;
-
- Init *getBit(unsigned Bit) const override;
-};
-
/// AL - Represent a reference to a 'def' in the description
class DefInit : public TypedInit {
friend class Record;
@@ -1298,8 +1342,9 @@ public:
/// classname<targs...> - Represent an uninstantiated anonymous class
/// instantiation.
-class VarDefInit final : public TypedInit, public FoldingSetNode,
- public TrailingObjects<VarDefInit, Init *> {
+class VarDefInit final : public TypedInit,
+ public FoldingSetNode,
+ public TrailingObjects<VarDefInit, ArgumentInit *> {
Record *Class;
DefInit *Def = nullptr; // after instantiation
unsigned NumArgs;
@@ -1318,7 +1363,7 @@ public:
static bool classof(const Init *I) {
return I->getKind() == IK_VarDefInit;
}
- static VarDefInit *get(Record *Class, ArrayRef<Init *> Args);
+ static VarDefInit *get(Record *Class, ArrayRef<ArgumentInit *> Args);
void Profile(FoldingSetNodeID &ID) const;
@@ -1327,20 +1372,24 @@ public:
std::string getAsString() const override;
- Init *getArg(unsigned i) const {
+ ArgumentInit *getArg(unsigned i) const {
assert(i < NumArgs && "Argument index out of range!");
- return getTrailingObjects<Init *>()[i];
+ return getTrailingObjects<ArgumentInit *>()[i];
}
- using const_iterator = Init *const *;
+ using const_iterator = ArgumentInit *const *;
- const_iterator args_begin() const { return getTrailingObjects<Init *>(); }
+ const_iterator args_begin() const {
+ return getTrailingObjects<ArgumentInit *>();
+ }
const_iterator args_end () const { return args_begin() + NumArgs; }
size_t args_size () const { return NumArgs; }
bool args_empty() const { return NumArgs == 0; }
- ArrayRef<Init *> args() const { return ArrayRef(args_begin(), NumArgs); }
+ ArrayRef<ArgumentInit *> args() const {
+ return ArrayRef(args_begin(), NumArgs);
+ }
Init *getBit(unsigned Bit) const override {
llvm_unreachable("Illegal bit reference off anonymous def");
@@ -1437,6 +1486,10 @@ public:
return getTrailingObjects<Init *>()[Num];
}
+ /// This method looks up the specified argument name and returns its argument
+ /// number or std::nullopt if that argument name does not exist.
+ std::optional<unsigned> getArgNo(StringRef Name) const;
+
StringInit *getArgName(unsigned Num) const {
assert(Num < NumArgNames && "Arg number out of range!");
return getTrailingObjects<StringInit *>()[Num];
@@ -1828,7 +1881,7 @@ public:
/// This method looks up the specified field and returns its value as a
/// string, throwing an exception if the value is not a string and
- /// llvm::Optional() if the field does not exist.
+ /// std::nullopt if the field does not exist.
std::optional<StringRef> getValueAsOptionalString(StringRef FieldName) const;
/// This method looks up the specified field and returns its value as a
diff --git a/llvm/include/llvm/TableGen/TableGenBackend.h b/llvm/include/llvm/TableGen/TableGenBackend.h
index a426e4217578..39f1e14bc950 100644
--- a/llvm/include/llvm/TableGen/TableGenBackend.h
+++ b/llvm/include/llvm/TableGen/TableGenBackend.h
@@ -13,17 +13,45 @@
#ifndef LLVM_TABLEGEN_TABLEGENBACKEND_H
#define LLVM_TABLEGEN_TABLEGENBACKEND_H
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+
namespace llvm {
-class StringRef;
+class RecordKeeper;
class raw_ostream;
+namespace TableGen::Emitter {
+using FnT = void (*)(RecordKeeper &Records, raw_ostream &OS);
+
+struct OptCreatorT {
+ static void *call();
+};
+
+extern ManagedStatic<cl::opt<FnT>, OptCreatorT> Action;
+
+struct Opt {
+ Opt(StringRef Name, FnT CB, StringRef Desc, bool ByDefault = false) {
+ if (ByDefault)
+ Action->setInitialValue(CB);
+ Action->getParser().addLiteralOption(Name, CB, Desc);
+ }
+};
+
+template <class EmitterC> class OptClass : Opt {
+ static void run(RecordKeeper &RK, raw_ostream &OS) { EmitterC(RK).run(OS); }
+
+public:
+ OptClass(StringRef Name, StringRef Desc) : Opt(Name, run, Desc) {}
+};
+
+} // namespace TableGen::Emitter
+
/// emitSourceFileHeader - Output an LLVM style file header to the specified
/// raw_ostream.
void emitSourceFileHeader(StringRef Desc, raw_ostream &OS);
-extern bool TimeRegions;
-
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 734717155daa..00d56d1c4bd5 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -23,7 +23,7 @@ class GenericInstruction : StandardPseudoInstruction {
// different instruction flags. This is intended to provide a
// convenient way to define strict floating point variants of ordinary
// floating point instructions.
-class ConstrainedIntruction<GenericInstruction baseInst> :
+class ConstrainedInstruction<GenericInstruction baseInst> :
GenericInstruction {
let OutOperandList = baseInst.OutOperandList;
let InOperandList = baseInst.InOperandList;
@@ -106,6 +106,12 @@ def G_GLOBAL_VALUE : GenericInstruction {
let hasSideEffects = false;
}
+def G_CONSTANT_POOL : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins unknown:$src);
+ let hasSideEffects = false;
+}
+
def G_INTTOPTR : GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type1:$src);
@@ -917,6 +923,20 @@ def G_FLOG10 : GenericInstruction {
let hasSideEffects = false;
}
+// Floating point x * 2^n
+def G_FLDEXP : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src0, type1:$src1);
+ let hasSideEffects = false;
+}
+
+// Floating point frexp
+def G_FFREXP : GenericInstruction {
+ let OutOperandList = (outs type0:$dst0, type1:$dst1);
+ let InOperandList = (ins type0:$src0);
+ let hasSideEffects = false;
+}
+
// Floating point ceiling of a value.
def G_FCEIL : GenericInstruction {
let OutOperandList = (outs type0:$dst);
@@ -1371,13 +1391,14 @@ def G_VECREDUCE_UMIN : VectorReduction;
// Constrained floating point ops
//------------------------------------------------------------------------------
-def G_STRICT_FADD : ConstrainedIntruction<G_FADD>;
-def G_STRICT_FSUB : ConstrainedIntruction<G_FSUB>;
-def G_STRICT_FMUL : ConstrainedIntruction<G_FMUL>;
-def G_STRICT_FDIV : ConstrainedIntruction<G_FDIV>;
-def G_STRICT_FREM : ConstrainedIntruction<G_FREM>;
-def G_STRICT_FMA : ConstrainedIntruction<G_FMA>;
-def G_STRICT_FSQRT : ConstrainedIntruction<G_FSQRT>;
+def G_STRICT_FADD : ConstrainedInstruction<G_FADD>;
+def G_STRICT_FSUB : ConstrainedInstruction<G_FSUB>;
+def G_STRICT_FMUL : ConstrainedInstruction<G_FMUL>;
+def G_STRICT_FDIV : ConstrainedInstruction<G_FDIV>;
+def G_STRICT_FREM : ConstrainedInstruction<G_FREM>;
+def G_STRICT_FMA : ConstrainedInstruction<G_FMA>;
+def G_STRICT_FSQRT : ConstrainedInstruction<G_FSQRT>;
+def G_STRICT_FLDEXP : ConstrainedInstruction<G_FLDEXP>;
//------------------------------------------------------------------------------
// Memory intrinsics
@@ -1467,3 +1488,10 @@ def G_ASSERT_ALIGN : GenericInstruction {
let InOperandList = (ins type0:$src, untyped_imm_0:$align);
let hasSideEffects = false;
}
+
+// Prevent constant folding of the source value with any users.
+def G_CONSTANT_FOLD_BARRIER : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = false;
+}
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 7aed4982cda3..08cbfc02b6bf 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -181,7 +181,6 @@ def sext_inreg_to_zext_inreg : GICombineRule<
Helper.getBuilder().setInstrAndDebugLoc(*${root});
Helper.getBuilder().buildZExtInReg(${dst}, ${src}, ${imm}.getImm());
${root}->eraseFromParent();
- return true;
}])
>;
@@ -205,6 +204,12 @@ def ptr_add_immed_chain : GICombineRule<
[{ return Helper.matchPtrAddImmedChain(*${d}, ${matchinfo}); }]),
(apply [{ Helper.applyPtrAddImmedChain(*${d}, ${matchinfo}); }])>;
+def shifts_too_big : GICombineRule<
+ (defs root:$root),
+ (match (wip_match_opcode G_SHL, G_ASHR, G_LSHR):$root,
+ [{ return Helper.matchShiftsTooBig(*${root}); }]),
+ (apply [{ Helper.replaceInstWithUndef(*${root}); }])>;
+
// Fold shift (shift base x), y -> shift base, (x+y), if shifts are same
def shift_immed_matchdata : GIDefMatchData<"RegisterImmPair">;
def shift_immed_chain : GICombineRule<
@@ -237,6 +242,14 @@ def reduce_shl_of_extend : GICombineRule<
[{ return Helper.matchCombineShlOfExtend(*${mi}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineShlOfExtend(*${mi}, ${matchinfo}); }])>;
+// Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+// Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
+def commute_shift : GICombineRule<
+ (defs root:$d, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_SHL):$d,
+ [{ return Helper.matchCommuteShift(*${d}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${d}, ${matchinfo}); }])>;
+
def narrow_binop_feeding_and : GICombineRule<
(defs root:$root, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_AND):$root,
@@ -315,7 +328,7 @@ def select_same_val: GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_SELECT):$root,
[{ return Helper.matchSelectSameVal(*${root}); }]),
- (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 2); }])
+ (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, 2); }])
>;
// Fold (undef ? x : y) -> y
@@ -323,7 +336,7 @@ def select_undef_cmp: GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_SELECT):$root,
[{ return Helper.matchUndefSelectCmp(*${root}); }]),
- (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 2); }])
+ (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, 2); }])
>;
// Fold (true ? x : y) -> x
@@ -333,7 +346,7 @@ def select_constant_cmp: GICombineRule<
(defs root:$root, select_constant_cmp_matchdata:$matchinfo),
(match (wip_match_opcode G_SELECT):$root,
[{ return Helper.matchConstantSelectCmp(*${root}, ${matchinfo}); }]),
- (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, ${matchinfo}); }])
+ (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, ${matchinfo}); }])
>;
def select_to_logical : GICombineRule<
@@ -367,7 +380,7 @@ def right_identity_zero: GICombineRule<
(match (wip_match_opcode G_SUB, G_ADD, G_OR, G_XOR, G_SHL, G_ASHR, G_LSHR,
G_PTR_ADD, G_ROTL, G_ROTR):$root,
[{ return Helper.matchConstantOp(${root}->getOperand(2), 0); }]),
- (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
+ (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
>;
// Fold x op 1 -> x
@@ -375,7 +388,7 @@ def right_identity_one: GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_MUL):$root,
[{ return Helper.matchConstantOp(${root}->getOperand(2), 1); }]),
- (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
+ (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
>;
// Fold (x op x) - > x
@@ -383,7 +396,7 @@ def binop_same_val: GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_AND, G_OR):$root,
[{ return Helper.matchBinOpSameVal(*${root}); }]),
- (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
+ (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
>;
// Fold (0 op x) - > 0
@@ -391,7 +404,7 @@ def binop_left_to_zero: GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_SDIV, G_UDIV, G_SREM, G_UREM):$root,
[{ return Helper.matchOperandIsZero(*${root}, 1); }]),
- (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
+ (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
>;
def urem_pow2_to_mask : GICombineRule<
@@ -418,7 +431,7 @@ def fold_binop_into_select : GICombineRule<
G_FMUL, G_FADD, G_FSUB, G_FDIV, G_FREM,
G_FMINNUM, G_FMAXNUM, G_FMINIMUM, G_FMAXIMUM):$root,
[{ return Helper.matchFoldBinOpIntoSelect(*${root}, ${select_op_no}); }]),
- (apply [{ return Helper.applyFoldBinOpIntoSelect(*${root}, ${select_op_no}); }])
+ (apply [{ Helper.applyFoldBinOpIntoSelect(*${root}, ${select_op_no}); }])
>;
// Transform d = [su]div(x, y) and r = [su]rem(x, y) - > d, r = [su]divrem(x, y)
@@ -435,7 +448,7 @@ def binop_right_to_zero: GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_MUL):$root,
[{ return Helper.matchOperandIsZero(*${root}, 2); }]),
- (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 2); }])
+ (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, 2); }])
>;
// Erase stores of undef values.
@@ -443,7 +456,7 @@ def erase_undef_store : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_STORE):$root,
[{ return Helper.matchUndefStore(*${root}); }]),
- (apply [{ return Helper.eraseInst(*${root}); }])
+ (apply [{ Helper.eraseInst(*${root}); }])
>;
def simplify_add_to_sub_matchinfo: GIDefMatchData<"std::tuple<Register, Register>">;
@@ -455,14 +468,26 @@ def simplify_add_to_sub: GICombineRule <
>;
// Fold fp_op(cst) to the constant result of the floating point operation.
-def constant_fp_op_matchinfo: GIDefMatchData<"std::optional<APFloat>">;
-def constant_fp_op: GICombineRule <
- (defs root:$root, constant_fp_op_matchinfo:$info),
- (match (wip_match_opcode G_FNEG, G_FABS, G_FPTRUNC, G_FSQRT, G_FLOG2):$root,
- [{ return Helper.matchCombineConstantFoldFpUnary(*${root}, ${info}); }]),
- (apply [{ Helper.applyCombineConstantFoldFpUnary(*${root}, ${info}); }])
+class constant_fold_unary_fp_op_rule<Instruction opcode> : GICombineRule <
+ (defs root:$dst),
+ (match (opcode $dst, $src0):$root, (G_FCONSTANT $src0, $cst)),
+ (apply [{ Helper.applyCombineConstantFoldFpUnary(*${root}, ${cst}.getFPImm()); }])
>;
+def constant_fold_fneg : constant_fold_unary_fp_op_rule<G_FNEG>;
+def constant_fold_fabs : constant_fold_unary_fp_op_rule<G_FABS>;
+def constant_fold_fsqrt : constant_fold_unary_fp_op_rule<G_FSQRT>;
+def constant_fold_flog2 : constant_fold_unary_fp_op_rule<G_FLOG2>;
+def constant_fold_fptrunc : constant_fold_unary_fp_op_rule<G_FPTRUNC>;
+
+def constant_fold_fp_ops : GICombineGroup<[
+ constant_fold_fneg,
+ constant_fold_fabs,
+ constant_fold_fsqrt,
+ constant_fold_flog2,
+ constant_fold_fptrunc
+]>;
+
// Fold int2ptr(ptr2int(x)) -> x
def p2i_to_i2p: GICombineRule<
(defs root:$root, register_matchinfo:$info),
@@ -476,7 +501,7 @@ def i2p_to_p2i: GICombineRule<
(defs root:$dst, register_matchinfo:$info),
(match (G_INTTOPTR $t, $ptr),
(G_PTRTOINT $dst, $t):$mi,
- [{ ${info} = ${ptr}.getReg(); }]),
+ [{ ${info} = ${ptr}.getReg(); return true; }]),
(apply [{ Helper.applyCombineP2IToI2P(*${mi}, ${info}); }])
>;
@@ -528,7 +553,7 @@ def redundant_and: GICombineRule <
(defs root:$root, register_matchinfo:$matchinfo),
(match (wip_match_opcode G_AND):$root,
[{ return Helper.matchRedundantAnd(*${root}, ${matchinfo}); }]),
- (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
+ (apply [{ Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
>;
// Fold (x | y) -> x or (x | y) -> y when (x | y) is known to equal x or equal y.
@@ -536,7 +561,7 @@ def redundant_or: GICombineRule <
(defs root:$root, register_matchinfo:$matchinfo),
(match (wip_match_opcode G_OR):$root,
[{ return Helper.matchRedundantOr(*${root}, ${matchinfo}); }]),
- (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
+ (apply [{ Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
>;
// If the input is already sign extended, just drop the extension.
@@ -546,7 +571,7 @@ def redundant_sext_inreg: GICombineRule <
(defs root:$root),
(match (wip_match_opcode G_SEXT_INREG):$root,
[{ return Helper.matchRedundantSExtInReg(*${root}); }]),
- (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
+ (apply [{ Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
>;
// Fold (anyext (trunc x)) -> x if the source type is same as
@@ -555,7 +580,7 @@ def anyext_trunc_fold: GICombineRule <
(defs root:$root, register_matchinfo:$matchinfo),
(match (wip_match_opcode G_ANYEXT):$root,
[{ return Helper.matchCombineAnyExtTrunc(*${root}, ${matchinfo}); }]),
- (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
+ (apply [{ Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
>;
// Fold (zext (trunc x)) -> x if the source type is same as the destination type
@@ -565,7 +590,7 @@ def zext_trunc_fold: GICombineRule <
(defs root:$root, zext_trunc_fold_matchinfo:$matchinfo),
(match (wip_match_opcode G_ZEXT):$root,
[{ return Helper.matchCombineZextTrunc(*${root}, ${matchinfo}); }]),
- (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
+ (apply [{ Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
>;
// Fold ([asz]ext ([asz]ext x)) -> ([asz]ext x).
@@ -590,8 +615,8 @@ def fneg_fneg_fold: GICombineRule <
(defs root:$dst, register_matchinfo:$matchinfo),
(match (G_FNEG $t, $src),
(G_FNEG $dst, $t):$mi,
- [{ ${matchinfo} = ${src}.getReg(); }]),
- (apply [{ return Helper.replaceSingleDefInstWithReg(*${mi}, ${matchinfo}); }])
+ [{ ${matchinfo} = ${src}.getReg(); return true; }]),
+ (apply [{ Helper.replaceSingleDefInstWithReg(*${mi}, ${matchinfo}); }])
>;
// Fold (unmerge(merge x, y, z)) -> z, y, z.
@@ -709,14 +734,6 @@ def load_or_combine : GICombineRule<
[{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]),
(apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
-
-def truncstore_merge_matcdata : GIDefMatchData<"MergeTruncStoresInfo">;
-def truncstore_merge : GICombineRule<
- (defs root:$root, truncstore_merge_matcdata:$info),
- (match (wip_match_opcode G_STORE):$root,
- [{ return Helper.matchTruncStoreMerge(*${root}, ${info}); }]),
- (apply [{ Helper.applyTruncStoreMerge(*${root}, ${info}); }])>;
-
def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">;
def extend_through_phis : GICombineRule<
(defs root:$root, extend_through_phis_matchdata:$matchinfo),
@@ -844,7 +861,13 @@ def reassoc_ptradd : GICombineRule<
[{ return Helper.matchReassocPtrAdd(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
-def reassocs : GICombineGroup<[reassoc_ptradd]>;
+def reassoc_comm_binops : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_ADD $root, $src1, $src2):$root,
+ [{ return Helper.matchReassocCommBinOp(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
+def reassocs : GICombineGroup<[reassoc_ptradd, reassoc_comm_binops]>;
// Constant fold operations.
def constant_fold : GICombineRule<
@@ -992,7 +1015,7 @@ def add_sub_reg: GICombineRule <
(defs root:$root, register_matchinfo:$matchinfo),
(match (wip_match_opcode G_ADD):$root,
[{ return Helper.matchAddSubSameReg(*${root}, ${matchinfo}); }]),
- (apply [{ return Helper.replaceSingleDefInstWithReg(*${root},
+ (apply [{ Helper.replaceSingleDefInstWithReg(*${root},
${matchinfo}); }])>;
def buildvector_identity_fold : GICombineRule<
@@ -1030,6 +1053,14 @@ def bitcast_bitcast_fold : GICombineRule<
[{ return MRI.getType(${src0}.getReg()) == MRI.getType(${dst}.getReg()); }]),
(apply [{ Helper.replaceSingleDefInstWithReg(*${op}, ${src0}.getReg()); }])>;
+
+def fptrunc_fpext_fold : GICombineRule<
+ (defs root:$dst),
+ (match (G_FPTRUNC $dst, $src1):$op, (G_FPEXT $src1, $src0),
+ [{ return MRI.getType(${src0}.getReg()) == MRI.getType(${dst}.getReg()); }]),
+ (apply [{ Helper.replaceSingleDefInstWithReg(*${op}, ${src0}.getReg()); }])>;
+
+
def select_to_minmax: GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_SELECT):$root,
@@ -1057,9 +1088,9 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
add_sub_reg, buildvector_identity_fold,
trunc_buildvector_fold,
trunc_lshr_buildvector_fold,
- bitcast_bitcast_fold]>;
+ bitcast_bitcast_fold, fptrunc_fpext_fold]>;
-def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p,
+def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
overlapping_and, mulo_by_2, mulo_by_0,
addo_by_0, adde_to_addo,
combine_minmax_nan]>;
@@ -1089,7 +1120,7 @@ def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma,
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
extract_vec_elt_combines, combines_for_extload,
combine_indexed_load_store, undef_combines, identity_combines, phi_combines,
- simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands,
+ simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big,
reassocs, ptr_add_immed_chain,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
width_reduction_combines, select_combines,
@@ -1099,7 +1130,7 @@ def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
unmerge_zext_to_zext, merge_unmerge, trunc_ext_fold, trunc_shift,
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
- truncstore_merge, div_rem_to_divrem, funnel_shift_combines,
+ div_rem_to_divrem, funnel_shift_combines, commute_shift,
form_bitfield_extract, constant_fold, fabs_fneg_fold,
intdiv_combines, mulh_combines, redundant_neg_operands,
and_or_disjoint_mask, fma_combines, fold_binop_into_select,
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 3ab0d1b75d84..41a95390cc45 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -103,6 +103,7 @@ def : GINodeEquiv<G_FREM, frem>;
def : GINodeEquiv<G_FPOW, fpow>;
def : GINodeEquiv<G_FEXP2, fexp2>;
def : GINodeEquiv<G_FLOG2, flog2>;
+def : GINodeEquiv<G_FLDEXP, fldexp>;
def : GINodeEquiv<G_FCANONICALIZE, fcanonicalize>;
def : GINodeEquiv<G_IS_FPCLASS, is_fpclass>;
def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain>;
@@ -150,6 +151,7 @@ def : GINodeEquiv<G_ROTR, rotr>;
def : GINodeEquiv<G_ROTL, rotl>;
def : GINodeEquiv<G_LROUND, lround>;
def : GINodeEquiv<G_LLROUND, llround>;
+def : GINodeEquiv<G_VECREDUCE_FADD, vecreduce_fadd>;
def : GINodeEquiv<G_STRICT_FADD, strict_fadd>;
def : GINodeEquiv<G_STRICT_FSUB, strict_fsub>;
@@ -158,6 +160,7 @@ def : GINodeEquiv<G_STRICT_FDIV, strict_fdiv>;
def : GINodeEquiv<G_STRICT_FREM, strict_frem>;
def : GINodeEquiv<G_STRICT_FMA, strict_fma>;
def : GINodeEquiv<G_STRICT_FSQRT, strict_fsqrt>;
+def : GINodeEquiv<G_STRICT_FLDEXP, strict_fldexp>;
// Broadly speaking G_LOAD is equivalent to ISD::LOAD but there are some
// complications that tablegen must take care of. For example, Predicates such
diff --git a/llvm/include/llvm/Target/GlobalISel/Target.td b/llvm/include/llvm/Target/GlobalISel/Target.td
index 135d4a5e0dd0..4cb3fd1bf79c 100644
--- a/llvm/include/llvm/Target/GlobalISel/Target.td
+++ b/llvm/include/llvm/Target/GlobalISel/Target.td
@@ -22,6 +22,8 @@ class LLT;
def s32 : LLT;
def s64 : LLT;
+def v2s32 : LLT;
+def v4s16 : LLT;
// Defines a matcher for complex operands. This is analogous to ComplexPattern
// from SelectionDAG.
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 181c8eb17511..06521fa584cb 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -14,11 +14,13 @@
// Include all information about LLVM intrinsics.
include "llvm/IR/Intrinsics.td"
+class Predicate; // Forward def
+
//===----------------------------------------------------------------------===//
// Register file description - These classes are used to fill in the target
// description classes.
-class HwMode<string FS> {
+class HwMode<string FS, list<Predicate> Ps> {
// A string representing subtarget features that turn on this HW mode.
// For example, "+feat1,-feat2" will indicate that the mode is active
// when "feat1" is enabled and "feat2" is disabled at the same time.
@@ -26,12 +28,15 @@ class HwMode<string FS> {
// When multiple modes are used, they should be mutually exclusive,
// otherwise the results are unpredictable.
string Features = FS;
+
+ // A list of predicates that turn on this HW mode.
+ list<Predicate> Predicates = Ps;
}
// A special mode recognized by tablegen. This mode is considered active
// when no other mode is active. For targets that do not use specific hw
// modes, this is the only mode.
-def DefaultMode : HwMode<"">;
+def DefaultMode : HwMode<"", []>;
// A class used to associate objects with HW modes. It is only intended to
// be used as a base class, where the derived class should contain a member
@@ -446,8 +451,6 @@ include "llvm/Target/TargetInstrPredicate.td"
//
include "llvm/Target/TargetSchedule.td"
-class Predicate; // Forward def
-
class InstructionEncoding {
// Size of encoded instruction.
int Size;
@@ -1058,45 +1061,6 @@ class InstrInfo {
//
// This option is a temporary migration help. It will go away.
bit guessInstructionProperties = true;
-
- // TableGen's instruction encoder generator has support for matching operands
- // to bit-field variables both by name and by position. Support for matching
- // by position is DEPRECATED, and WILL BE REMOVED. Positional matching is
- // confusing to use, and makes it very easy to accidentally write buggy
- // instruction definitions.
- //
- // In previous versions of LLVM, the ability to match operands by position was
- // enabled unconditionally. It is now controllable by this option -- and
- // disabled by default. The previous behavior can be restored by setting this
- // option to true.
- //
- // This option is temporary, and will go away once all in-tree targets have
- // migrated.
- //
- // TODO: clean up and remove these options.
- bit useDeprecatedPositionallyEncodedOperands = false;
-
- // If positional encoding rules are used for the encoder generator, they may
- // also need to be used by the decoder generator -- if so, enable this
- // variable.
- //
- // This option is a no-op unless useDeprecatedPositionallyEncodedOperands is
- // true.
- //
- // This option is temporary, and will go away once all in-tree targets have
- // migrated.
- bit decodePositionallyEncodedOperands = false;
-
- // When set, this indicates that there will be no overlap between those
- // operands that are matched by ordering (positional operands) and those
- // matched by name.
- //
- // This is a no-op unless useDeprecatedPositionallyEncodedOperands is true
- // (though it does modify the "would've used positional operand XXX" error.)
- //
- // This option is temporary, and will go away once all in-tree targets have
- // migrated.
- bit noNamedPositionallyEncodedOperands = false;
}
// Standard Pseudo Instructions.
@@ -1693,18 +1657,24 @@ class Target {
//===----------------------------------------------------------------------===//
// SubtargetFeature - A characteristic of the chip set.
//
-class SubtargetFeature<string n, string a, string v, string d,
+class SubtargetFeature<string n, string f, string v, string d,
list<SubtargetFeature> i = []> {
// Name - Feature name. Used by command line (-mattr=) to determine the
// appropriate target chip.
//
string Name = n;
- // Attribute - Attribute to be set by feature.
+ // FieldName - Field in XXXSubtarget to be set by feature.
//
- string Attribute = a;
+ string FieldName = f;
- // Value - Value the attribute to be set to by feature.
+ // Value - Value the XXXSubtarget field to be set to by feature.
+ //
+ // A value of "true" or "false" implies the field is a bool. Otherwise,
+ // it is assumed to be an integer. the integer value may be the name of an
+ // enum constant. If multiple features use the same integer field, the
+ // field will be set to the maximum value of all enabled features that
+ // share the field.
//
string Value = v;
diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index 6361373ba71b..b6ba36fb09b1 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -14,7 +14,6 @@
#define LLVM_TARGET_TARGETMACHINE_H
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/Allocator.h"
@@ -23,6 +22,7 @@
#include "llvm/Support/PGOOptions.h"
#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include <optional>
#include <string>
#include <utility>
@@ -235,6 +235,8 @@ public:
/// Set the code model.
void setCodeModel(CodeModel::Model CM) { CMModel = CM; }
+ bool isLargeData() const;
+
bool isPositionIndependent() const;
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const;
@@ -500,6 +502,9 @@ public:
/// The default variant to use in unqualified `asm` instructions.
/// If this returns 0, `asm "$(foo$|bar$)"` will evaluate to `asm "foo"`.
virtual int unqualifiedInlineAsmVariant() const { return 0; }
+
+ // MachineRegisterInfo callback function
+ virtual void registerMachineRegisterInfoCallback(MachineFunction &MF) const {}
};
/// Helper method for getting the code model, returning Default if
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
index d8a415918939..d6d767f3d22c 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -130,21 +130,20 @@ namespace llvm {
HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
GuaranteedTailCallOpt(false), StackSymbolOrdering(true),
EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false),
- LowerGlobalDtorsViaCxaAtExit(false), DisableIntegratedAS(false),
- RelaxELFRelocations(true), FunctionSections(false),
- DataSections(false), IgnoreXCOFFVisibility(false),
- XCOFFTracebackTable(true), UniqueSectionNames(true),
- UniqueBasicBlockSectionNames(false), TrapUnreachable(false),
- NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false),
- ExplicitEmulatedTLS(false), EnableIPRA(false),
- EmitStackSizeSection(false), EnableMachineOutliner(false),
- EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false),
- EmitAddrsig(false), EmitCallSiteInfo(false),
- SupportsDebugEntryValues(false), EnableDebugEntryValues(false),
- ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false),
- XRayOmitFunctionIndex(false), DebugStrictDwarf(false),
- Hotpatch(false), PPCGenScalarMASSEntries(false), JMCInstrument(false),
- EnableCFIFixup(false), MisExpect(false),
+ DisableIntegratedAS(false), RelaxELFRelocations(true),
+ FunctionSections(false), DataSections(false),
+ IgnoreXCOFFVisibility(false), XCOFFTracebackTable(true),
+ UniqueSectionNames(true), UniqueBasicBlockSectionNames(false),
+ TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0),
+ EmulatedTLS(false), EnableIPRA(false), EmitStackSizeSection(false),
+ EnableMachineOutliner(false), EnableMachineFunctionSplitter(false),
+ SupportsDefaultOutlining(false), EmitAddrsig(false),
+ EmitCallSiteInfo(false), SupportsDebugEntryValues(false),
+ EnableDebugEntryValues(false), ValueTrackingVariableLocations(false),
+ ForceDwarfFrameSection(false), XRayFunctionIndex(true),
+ DebugStrictDwarf(false), Hotpatch(false),
+ PPCGenScalarMASSEntries(false), JMCInstrument(false),
+ EnableCFIFixup(false), MisExpect(false), XCOFFReadOnlyPointers(false),
FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {}
/// DisableFramePointerElim - This returns true if frame pointer elimination
@@ -247,10 +246,6 @@ namespace llvm {
/// constructors.
unsigned UseInitArray : 1;
- /// Use __cxa_atexit to register global destructors; determines how
- /// llvm.global_dtors is lowered.
- unsigned LowerGlobalDtorsViaCxaAtExit : 1;
-
/// Disable the integrated assembler.
unsigned DisableIntegratedAS : 1;
@@ -290,9 +285,6 @@ namespace llvm {
/// function in the runtime library..
unsigned EmulatedTLS : 1;
- /// Whether -emulated-tls or -no-emulated-tls is set.
- unsigned ExplicitEmulatedTLS : 1;
-
/// This flag enables InterProcedural Register Allocation (IPRA).
unsigned EnableIPRA : 1;
@@ -342,7 +334,7 @@ namespace llvm {
unsigned ForceDwarfFrameSection : 1;
/// Emit XRay Function Index section
- unsigned XRayOmitFunctionIndex : 1;
+ unsigned XRayFunctionIndex : 1;
/// When set to true, don't use DWARF extensions in later DWARF versions.
/// By default, it is set to false.
@@ -364,6 +356,10 @@ namespace llvm {
/// By default, it is set to false
unsigned MisExpect : 1;
+ /// When set to true, const objects with relocatable address values are put
+ /// into the RO data section.
+ unsigned XCOFFReadOnlyPointers : 1;
+
/// Name of the stack usage file (i.e., .su file) if user passes
/// -fstack-usage. If empty, it can be implied that -fstack-usage is not
/// passed on the command line.
diff --git a/llvm/include/llvm/Target/TargetSchedule.td b/llvm/include/llvm/Target/TargetSchedule.td
index 3bc467fbbfb0..f5aa105ec0cb 100644
--- a/llvm/include/llvm/Target/TargetSchedule.td
+++ b/llvm/include/llvm/Target/TargetSchedule.td
@@ -117,6 +117,11 @@ class SchedMachineModel {
list<Predicate> UnsupportedFeatures = [];
bit NoModel = false; // Special tag to indicate missing machine model.
+
+ // Tells the MachineScheduler whether or not to track resource usage
+ // using intervals via ResourceSegments (see
+ // llvm/include/llvm/CodeGen/MachineScheduler.h).
+ bit EnableIntervals = false;
}
def NoSchedModel : SchedMachineModel {
@@ -250,6 +255,7 @@ class WriteSequence<list<SchedWrite> writes, int rep = 1> : SchedWrite {
class ProcWriteResources<list<ProcResourceKind> resources> {
list<ProcResourceKind> ProcResources = resources;
list<int> ResourceCycles = [];
+ list<int> StartAtCycles = [];
int Latency = 1;
int NumMicroOps = 1;
bit BeginGroup = false;
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index a841cf7eb070..c9024c8fa826 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -173,6 +173,9 @@ def SDTFPToIntOp : SDTypeProfile<1, 1, [ // fp_to_[su]int
def SDTFPToIntSatOp : SDTypeProfile<1, 2, [ // fp_to_[su]int_sat
SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>, SDTCisVT<2, OtherVT>
]>;
+def SDTFPExpOp : SDTypeProfile<1, 2, [ // ldexp
+ SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>
+]>;
def SDTExtInreg : SDTypeProfile<1, 2, [ // sext_inreg
SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisVT<2, OtherVT>,
SDTCisVTSmallerThanOp<2, 1>
@@ -469,6 +472,10 @@ def vecreduce_umax : SDNode<"ISD::VECREDUCE_UMAX", SDTVecReduce>;
def vecreduce_smin : SDNode<"ISD::VECREDUCE_SMIN", SDTVecReduce>;
def vecreduce_umin : SDNode<"ISD::VECREDUCE_UMIN", SDTVecReduce>;
def vecreduce_fadd : SDNode<"ISD::VECREDUCE_FADD", SDTFPVecReduce>;
+def vecreduce_fmin : SDNode<"ISD::VECREDUCE_FMIN", SDTFPVecReduce>;
+def vecreduce_fmax : SDNode<"ISD::VECREDUCE_FMAX", SDTFPVecReduce>;
+def vecreduce_fminimum : SDNode<"ISD::VECREDUCE_FMINIMUM", SDTFPVecReduce>;
+def vecreduce_fmaximum : SDNode<"ISD::VECREDUCE_FMAXIMUM", SDTFPVecReduce>;
def fadd : SDNode<"ISD::FADD" , SDTFPBinOp, [SDNPCommutative]>;
def fsub : SDNode<"ISD::FSUB" , SDTFPBinOp>;
@@ -499,6 +506,7 @@ def fcos : SDNode<"ISD::FCOS" , SDTFPUnaryOp>;
def fexp2 : SDNode<"ISD::FEXP2" , SDTFPUnaryOp>;
def fpow : SDNode<"ISD::FPOW" , SDTFPBinOp>;
def flog2 : SDNode<"ISD::FLOG2" , SDTFPUnaryOp>;
+def fldexp : SDNode<"ISD::FLDEXP" , SDTFPExpOp>;
def frint : SDNode<"ISD::FRINT" , SDTFPUnaryOp>;
def ftrunc : SDNode<"ISD::FTRUNC" , SDTFPUnaryOp>;
def fceil : SDNode<"ISD::FCEIL" , SDTFPUnaryOp>;
@@ -549,6 +557,8 @@ def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_fpow : SDNode<"ISD::STRICT_FPOW",
SDTFPBinOp, [SDNPHasChain]>;
+def strict_fldexp : SDNode<"ISD::STRICT_FLDEXP",
+ SDTFPExpOp, [SDNPHasChain]>;
def strict_flog2 : SDNode<"ISD::STRICT_FLOG2",
SDTFPUnaryOp, [SDNPHasChain]>;
def strict_frint : SDNode<"ISD::STRICT_FRINT",
@@ -890,6 +900,12 @@ class PatFrags<dag ops, list<dag> frags, code pred = [{}],
ValueType ScalarMemoryVT = ?;
}
+// Patterns and PatFrags can also subclass GISelFlags to set flags that affect
+// how GlobalISel behaves when matching them.
+class GISelFlags {
+ bit GIIgnoreCopies = ?;
+}
+
// PatFrag - A version of PatFrags matching only a single fragment.
class PatFrag<dag ops, dag frag, code pred = [{}],
SDNodeXForm xform = NOOP_SDNodeXForm>
@@ -1443,6 +1459,9 @@ def any_fexp2 : PatFrags<(ops node:$src),
def any_fpow : PatFrags<(ops node:$lhs, node:$rhs),
[(strict_fpow node:$lhs, node:$rhs),
(fpow node:$lhs, node:$rhs)]>;
+def any_fldexp : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fldexp node:$lhs, node:$rhs),
+ (fldexp node:$lhs, node:$rhs)]>;
def any_flog2 : PatFrags<(ops node:$src),
[(strict_flog2 node:$src),
(flog2 node:$src)]>;
@@ -1869,10 +1888,11 @@ class Pat<dag pattern, dag result> : Pattern<pattern, [result]>;
//
// Complex patterns, e.g. X86 addressing mode, requires pattern matching code
-// in C++. NumOperands is the number of operands returned by the select function;
-// SelectFunc is the name of the function used to pattern match the max. pattern;
-// RootNodes are the list of possible root nodes of the sub-dags to match.
-// e.g. X86 addressing mode - def addr : ComplexPattern<4, "SelectAddr", [add]>;
+// in C++. Ty is the type of return value; NumOperands is the number of operands
+// returned by the select function; SelectFunc is the name of the function used
+// to pattern match the max. pattern; RootNodes are the list of possible root nodes
+// of the sub-dags to match.
+// e.g. X86 addressing mode - def addr : ComplexPattern<iPTR, 4, "SelectAddr", [add]>;
//
class ComplexPattern<ValueType ty, int numops, string fn,
list<SDNode> roots = [], list<SDNodeProperty> props = [],
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 11fce886ffb8..dc4cdfa8e90a 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -25,6 +25,9 @@ namespace llvm {
class Triple;
namespace AArch64 {
+// Function Multi Versioning CPU features. They must be kept in sync with
+// compiler-rt enum CPUFeatures in lib/builtins/cpu_model.c with FEAT_MAX as
+// sentinel.
enum CPUFeatures {
FEAT_RNG,
FEAT_FLAGM,
@@ -87,11 +90,13 @@ enum CPUFeatures {
FEAT_MAX
};
+static_assert(FEAT_MAX <= 64,
+ "CPUFeatures enum must not have more than 64 entries");
+
// Arch extension modifiers for CPUs. These are labelled with their Arm ARM
// feature name (though the canonical reference for those is AArch64.td)
// clang-format off
enum ArchExtKind : uint64_t {
- AEK_INVALID = 0,
AEK_NONE = 1,
AEK_CRC = 1 << 1, // FEAT_CRC32
AEK_CRYPTO = 1 << 2,
@@ -148,6 +153,7 @@ enum ArchExtKind : uint64_t {
AEK_SPECRES2 = 1ULL << 53, // FEAT_SPECRES2
AEK_RASv2 = 1ULL << 54, // FEAT_RASv2
AEK_ITE = 1ULL << 55, // FEAT_ITE
+ AEK_GCS = 1ULL << 56, // FEAT_GCS
};
// clang-format on
@@ -156,19 +162,22 @@ enum ArchExtKind : uint64_t {
// SubtargetFeature which may represent either an actual extension or some
// internal LLVM property.
struct ExtensionInfo {
- StringRef Name; // Human readable name, e.g. "profile".
- ArchExtKind ID; // Corresponding to the ArchExtKind, this extensions
- // representation in the bitfield.
- StringRef Feature; // -mattr enable string, e.g. "+spe"
- StringRef NegFeature; // -mattr disable string, e.g. "-spe"
-
- // FIXME These were added by D127812 FMV support and need documenting:
- CPUFeatures CPUFeature; // Bitfield value set in __aarch64_cpu_features
- StringRef DependentFeatures;
- unsigned FmvPriority;
- static constexpr unsigned MaxFMVPriority = 1000;
+ StringRef Name; // Human readable name, e.g. "profile".
+ ArchExtKind ID; // Corresponding to the ArchExtKind, this
+ // extensions representation in the bitfield.
+ StringRef Feature; // -mattr enable string, e.g. "+spe"
+ StringRef NegFeature; // -mattr disable string, e.g. "-spe"
+ CPUFeatures CPUFeature; // Function Multi Versioning (FMV) bitfield value
+ // set in __aarch64_cpu_features
+ StringRef DependentFeatures; // FMV enabled features string,
+ // e.g. "+dotprod,+fp-armv8,+neon"
+ unsigned FmvPriority; // FMV feature priority
+ static constexpr unsigned MaxFMVPriority =
+ 1000; // Maximum priority for FMV feature
};
+// NOTE: If adding a new extension here, consider adding it to ExtensionMap
+// in AArch64AsmParser too, if supported as an extension name by binutils.
// clang-format off
inline constexpr ExtensionInfo Extensions[] = {
{"aes", AArch64::AEK_AES, "+aes", "-aes", FEAT_AES, "+fp-armv8,+neon", 150},
@@ -177,7 +186,7 @@ inline constexpr ExtensionInfo Extensions[] = {
{"brbe", AArch64::AEK_BRBE, "+brbe", "-brbe", FEAT_MAX, "", 0},
{"bti", AArch64::AEK_NONE, {}, {}, FEAT_BTI, "+bti", 510},
{"crc", AArch64::AEK_CRC, "+crc", "-crc", FEAT_CRC, "+crc", 110},
- {"crypto", AArch64::AEK_CRYPTO, "+crypto", "-crypto", FEAT_MAX, "", 0},
+ {"crypto", AArch64::AEK_CRYPTO, "+crypto", "-crypto", FEAT_MAX, "+aes,+sha2", 0},
{"cssc", AArch64::AEK_CSSC, "+cssc", "-cssc", FEAT_MAX, "", 0},
{"d128", AArch64::AEK_D128, "+d128", "-d128", FEAT_MAX, "", 0},
{"dgh", AArch64::AEK_NONE, {}, {}, FEAT_DGH, "", 260},
@@ -246,13 +255,13 @@ inline constexpr ExtensionInfo Extensions[] = {
{"sve2-sha3", AArch64::AEK_SVE2SHA3, "+sve2-sha3", "-sve2-sha3", FEAT_SVE_SHA3, "+sve2,+sve,+sve2-sha3,+fullfp16,+fp-armv8,+neon", 410},
{"sve2-sm4", AArch64::AEK_SVE2SM4, "+sve2-sm4", "-sve2-sm4", FEAT_SVE_SM4, "+sve2,+sve,+sve2-sm4,+fullfp16,+fp-armv8,+neon", 420},
{"sve2", AArch64::AEK_SVE2, "+sve2", "-sve2", FEAT_SVE2, "+sve2,+sve,+fullfp16,+fp-armv8,+neon", 370},
- {"sve2p1", AArch64::AEK_SVE2p1, "+sve2p1", "-sve2p1", FEAT_MAX, "", 0},
+ {"sve2p1", AArch64::AEK_SVE2p1, "+sve2p1", "-sve2p1", FEAT_MAX, "+sve2p1,+sve2,+sve,+fullfp16,+fp-armv8,+neon", 0},
{"the", AArch64::AEK_THE, "+the", "-the", FEAT_MAX, "", 0},
{"tme", AArch64::AEK_TME, "+tme", "-tme", FEAT_MAX, "", 0},
{"wfxt", AArch64::AEK_NONE, {}, {}, FEAT_WFXT, "+wfxt", 550},
+ {"gcs", AArch64::AEK_GCS, "+gcs", "-gcs", FEAT_MAX, "", 0},
// Special cases
{"none", AArch64::AEK_NONE, {}, {}, FEAT_MAX, "", ExtensionInfo::MaxFMVPriority},
- {"invalid", AArch64::AEK_INVALID, {}, {}, FEAT_MAX, "", 0},
};
// clang-format on
@@ -280,18 +289,20 @@ struct ArchInfo {
// v v v v v
// v8.9a > v8.8a > v8.7a > v8.6a > v8.5a > v8.4a > ... > v8a;
//
- // v8r and INVALID have no relation to anything. This is used to
- // determine which features to enable for a given architecture. See
+ // v8r has no relation to anything. This is used to determine which
+ // features to enable for a given architecture. See
// AArch64TargetInfo::setFeatureEnabled.
bool implies(const ArchInfo &Other) const {
if (this->Profile != Other.Profile)
- return false; // ARMV8R and INVALID
+ return false; // ARMV8R
if (this->Version.getMajor() == Other.Version.getMajor()) {
return this->Version > Other.Version;
}
if (this->Version.getMajor() == 9 && Other.Version.getMajor() == 8) {
- return this->Version.getMinor().value() + 5 >=
- Other.Version.getMinor().value();
+ assert(this->Version.getMinor() && Other.Version.getMinor() &&
+ "AArch64::ArchInfo should have a minor version.");
+ return this->Version.getMinor().value_or(0) + 5 >=
+ Other.Version.getMinor().value_or(0);
}
return false;
}
@@ -300,11 +311,10 @@ struct ArchInfo {
StringRef getSubArch() const { return ArchFeature.substr(1); }
// Search for ArchInfo by SubArch name
- static const ArchInfo &findBySubArch(StringRef SubArch);
+ static std::optional<ArchInfo> findBySubArch(StringRef SubArch);
};
// clang-format off
-inline constexpr ArchInfo INVALID = { VersionTuple{0, 0}, AProfile, "invalid", "+", (AArch64::AEK_NONE)};
inline constexpr ArchInfo ARMV8A = { VersionTuple{8, 0}, AProfile, "armv8-a", "+v8a", (AArch64::AEK_FP | AArch64::AEK_SIMD), };
inline constexpr ArchInfo ARMV8_1A = { VersionTuple{8, 1}, AProfile, "armv8.1-a", "+v8.1a", (ARMV8A.DefaultExts | AArch64::AEK_CRC | AArch64::AEK_LSE | AArch64::AEK_RDM)};
inline constexpr ArchInfo ARMV8_2A = { VersionTuple{8, 2}, AProfile, "armv8.2-a", "+v8.2a", (ARMV8_1A.DefaultExts | AArch64::AEK_RAS)};
@@ -325,10 +335,10 @@ inline constexpr ArchInfo ARMV8R = { VersionTuple{8, 0}, RProfile, "armv8-r",
// clang-format on
// The set of all architectures
-static constexpr std::array<const ArchInfo *, 17> ArchInfos = {
- &INVALID, &ARMV8A, &ARMV8_1A, &ARMV8_2A, &ARMV8_3A, &ARMV8_4A,
- &ARMV8_5A, &ARMV8_6A, &ARMV8_7A, &ARMV8_8A, &ARMV8_9A, &ARMV9A,
- &ARMV9_1A, &ARMV9_2A, &ARMV9_3A, &ARMV9_4A, &ARMV8R,
+static constexpr std::array<const ArchInfo *, 16> ArchInfos = {
+ &ARMV8A, &ARMV8_1A, &ARMV8_2A, &ARMV8_3A, &ARMV8_4A, &ARMV8_5A,
+ &ARMV8_6A, &ARMV8_7A, &ARMV8_8A, &ARMV8_9A, &ARMV9A, &ARMV9_1A,
+ &ARMV9_2A, &ARMV9_3A, &ARMV9_4A, &ARMV8R,
};
// Details of a specific CPU.
@@ -337,48 +347,60 @@ struct CpuInfo {
const ArchInfo &Arch;
uint64_t DefaultExtensions; // Default extensions for this CPU. These will be
// ORd with the architecture defaults.
+
+ uint64_t getImpliedExtensions() const {
+ return DefaultExtensions | Arch.DefaultExts;
+ }
};
inline constexpr CpuInfo CpuInfos[] = {
- {"cortex-a34", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_CRC)},
- {"cortex-a35", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_CRC)},
- {"cortex-a53", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_CRC)},
+ {"cortex-a34", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC)},
+ {"cortex-a35", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC)},
+ {"cortex-a53", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC)},
{"cortex-a55", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
- AArch64::AEK_RCPC)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16 |
+ AArch64::AEK_DOTPROD | AArch64::AEK_RCPC)},
{"cortex-a510", ARMV9A,
(AArch64::AEK_BF16 | AArch64::AEK_I8MM | AArch64::AEK_SB |
AArch64::AEK_PAUTH | AArch64::AEK_MTE | AArch64::AEK_SSBS |
AArch64::AEK_SVE | AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM |
AArch64::AEK_FP16FML)},
- {"cortex-a57", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_CRC)},
+ {"cortex-a57", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC)},
{"cortex-a65", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
- AArch64::AEK_RCPC | AArch64::AEK_SSBS)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_DOTPROD |
+ AArch64::AEK_FP16 | AArch64::AEK_RCPC | AArch64::AEK_SSBS)},
{"cortex-a65ae", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
- AArch64::AEK_RCPC | AArch64::AEK_SSBS)},
- {"cortex-a72", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_CRC)},
- {"cortex-a73", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_CRC)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_DOTPROD |
+ AArch64::AEK_FP16 | AArch64::AEK_RCPC | AArch64::AEK_SSBS)},
+ {"cortex-a72", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC)},
+ {"cortex-a73", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC)},
{"cortex-a75", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
- AArch64::AEK_RCPC)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16 |
+ AArch64::AEK_DOTPROD | AArch64::AEK_RCPC)},
{"cortex-a76", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
- AArch64::AEK_RCPC | AArch64::AEK_SSBS)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16 |
+ AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS)},
{"cortex-a76ae", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
- AArch64::AEK_RCPC | AArch64::AEK_SSBS)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16 |
+ AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS)},
{"cortex-a77", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_RCPC |
- AArch64::AEK_DOTPROD | AArch64::AEK_SSBS)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16 |
+ AArch64::AEK_RCPC | AArch64::AEK_DOTPROD | AArch64::AEK_SSBS)},
{"cortex-a78", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
- AArch64::AEK_RCPC | AArch64::AEK_SSBS | AArch64::AEK_PROFILE)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16 |
+ AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS |
+ AArch64::AEK_PROFILE)},
{"cortex-a78c", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
- AArch64::AEK_RCPC | AArch64::AEK_SSBS | AArch64::AEK_PROFILE |
- AArch64::AEK_FLAGM | AArch64::AEK_PAUTH | AArch64::AEK_FP16FML)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16 |
+ AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS |
+ AArch64::AEK_PROFILE | AArch64::AEK_FLAGM | AArch64::AEK_PAUTH |
+ AArch64::AEK_FP16FML)},
{"cortex-a710", ARMV9A,
(AArch64::AEK_MTE | AArch64::AEK_PAUTH | AArch64::AEK_FLAGM |
AArch64::AEK_SB | AArch64::AEK_I8MM | AArch64::AEK_FP16FML |
@@ -392,12 +414,13 @@ inline constexpr CpuInfo CpuInfos[] = {
AArch64::AEK_BF16 | AArch64::AEK_FLAGM)},
{"cortex-r82", ARMV8R, (AArch64::AEK_LSE)},
{"cortex-x1", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
- AArch64::AEK_RCPC | AArch64::AEK_SSBS | AArch64::AEK_PROFILE)},
- {"cortex-x1c", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_DOTPROD |
- AArch64::AEK_RCPC | AArch64::AEK_SSBS | AArch64::AEK_PAUTH |
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16 |
+ AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS |
AArch64::AEK_PROFILE)},
+ {"cortex-x1c", ARMV8_2A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16 |
+ AArch64::AEK_DOTPROD | AArch64::AEK_RCPC | AArch64::AEK_SSBS |
+ AArch64::AEK_PAUTH | AArch64::AEK_PROFILE)},
{"cortex-x2", ARMV9A,
(AArch64::AEK_MTE | AArch64::AEK_BF16 | AArch64::AEK_I8MM |
AArch64::AEK_PAUTH | AArch64::AEK_SSBS | AArch64::AEK_SB |
@@ -410,23 +433,27 @@ inline constexpr CpuInfo CpuInfos[] = {
AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_PREDRES |
AArch64::AEK_FLAGM | AArch64::AEK_SSBS)},
{"neoverse-e1", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
- AArch64::AEK_RCPC | AArch64::AEK_SSBS)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_DOTPROD |
+ AArch64::AEK_FP16 | AArch64::AEK_RCPC | AArch64::AEK_SSBS)},
{"neoverse-n1", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
- AArch64::AEK_PROFILE | AArch64::AEK_RCPC | AArch64::AEK_SSBS)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_DOTPROD |
+ AArch64::AEK_FP16 | AArch64::AEK_PROFILE | AArch64::AEK_RCPC |
+ AArch64::AEK_SSBS)},
{"neoverse-n2", ARMV8_5A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_BF16 | AArch64::AEK_DOTPROD |
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
+ AArch64::AEK_SM4 | AArch64::AEK_BF16 | AArch64::AEK_DOTPROD |
AArch64::AEK_FP16 | AArch64::AEK_I8MM | AArch64::AEK_MTE |
AArch64::AEK_SB | AArch64::AEK_SSBS | AArch64::AEK_SVE |
AArch64::AEK_SVE2 | AArch64::AEK_SVE2BITPERM)},
{"neoverse-512tvb", ARMV8_4A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_SVE | AArch64::AEK_SSBS |
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
+ AArch64::AEK_SM4 | AArch64::AEK_SVE | AArch64::AEK_SSBS |
AArch64::AEK_FP16 | AArch64::AEK_BF16 | AArch64::AEK_DOTPROD |
AArch64::AEK_PROFILE | AArch64::AEK_RAND | AArch64::AEK_FP16FML |
AArch64::AEK_I8MM)},
{"neoverse-v1", ARMV8_4A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_SVE | AArch64::AEK_SSBS |
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
+ AArch64::AEK_SM4 | AArch64::AEK_SVE | AArch64::AEK_SSBS |
AArch64::AEK_FP16 | AArch64::AEK_BF16 | AArch64::AEK_DOTPROD |
AArch64::AEK_PROFILE | AArch64::AEK_RAND | AArch64::AEK_FP16FML |
AArch64::AEK_I8MM)},
@@ -435,55 +462,78 @@ inline constexpr CpuInfo CpuInfos[] = {
AArch64::AEK_FP16 | AArch64::AEK_BF16 | AArch64::AEK_RAND |
AArch64::AEK_DOTPROD | AArch64::AEK_PROFILE | AArch64::AEK_SVE2BITPERM |
AArch64::AEK_FP16FML | AArch64::AEK_I8MM | AArch64::AEK_MTE)},
- {"cyclone", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_NONE)},
- {"apple-a7", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_NONE)},
- {"apple-a8", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_NONE)},
- {"apple-a9", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_NONE)},
+ {"cyclone", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_NONE)},
+ {"apple-a7", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_NONE)},
+ {"apple-a8", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_NONE)},
+ {"apple-a9", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_NONE)},
{"apple-a10", ARMV8A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_CRC | AArch64::AEK_RDM)},
- {"apple-a11", ARMV8_2A, (AArch64::AEK_CRYPTO | AArch64::AEK_FP16)},
- {"apple-a12", ARMV8_3A, (AArch64::AEK_CRYPTO | AArch64::AEK_FP16)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC |
+ AArch64::AEK_RDM)},
+ {"apple-a11", ARMV8_2A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16)},
+ {"apple-a12", ARMV8_3A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16)},
{"apple-a13", ARMV8_4A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_FP16FML |
- AArch64::AEK_SHA3)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
+ AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3)},
{"apple-a14", ARMV8_5A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_FP16FML |
- AArch64::AEK_SHA3)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
+ AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3)},
{"apple-a15", ARMV8_5A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_FP16FML |
- AArch64::AEK_SHA3 | AArch64::AEK_BF16 | AArch64::AEK_I8MM)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
+ AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
+ AArch64::AEK_BF16 | AArch64::AEK_I8MM)},
{"apple-a16", ARMV8_5A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_FP16FML |
- AArch64::AEK_SHA3 | AArch64::AEK_BF16 | AArch64::AEK_I8MM)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
+ AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
+ AArch64::AEK_BF16 | AArch64::AEK_I8MM)},
{"apple-m1", ARMV8_5A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_FP16FML |
- AArch64::AEK_SHA3)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
+ AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3)},
{"apple-m2", ARMV8_5A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_FP16FML |
- AArch64::AEK_SHA3 | AArch64::AEK_BF16 | AArch64::AEK_I8MM)},
- {"apple-s4", ARMV8_3A, (AArch64::AEK_CRYPTO | AArch64::AEK_FP16)},
- {"apple-s5", ARMV8_3A, (AArch64::AEK_CRYPTO | AArch64::AEK_FP16)},
- {"exynos-m3", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_CRC)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
+ AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_SHA3 |
+ AArch64::AEK_BF16 | AArch64::AEK_I8MM)},
+ {"apple-s4", ARMV8_3A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16)},
+ {"apple-s5", ARMV8_3A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16)},
+ {"exynos-m3", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC)},
{"exynos-m4", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_DOTPROD | AArch64::AEK_FP16)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_DOTPROD |
+ AArch64::AEK_FP16)},
{"exynos-m5", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_DOTPROD | AArch64::AEK_FP16)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_DOTPROD |
+ AArch64::AEK_FP16)},
{"falkor", ARMV8A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_CRC | AArch64::AEK_RDM)},
- {"saphira", ARMV8_3A, (AArch64::AEK_CRYPTO | AArch64::AEK_PROFILE)},
- {"kryo", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_CRC)},
- {"thunderx2t99", ARMV8_1A, (AArch64::AEK_CRYPTO)},
- {"thunderx3t110", ARMV8_3A, (AArch64::AEK_CRYPTO)},
- {"thunderx", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_CRC)},
- {"thunderxt88", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_CRC)},
- {"thunderxt81", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_CRC)},
- {"thunderxt83", ARMV8A, (AArch64::AEK_CRYPTO | AArch64::AEK_CRC)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC |
+ AArch64::AEK_RDM)},
+ {"saphira", ARMV8_3A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_PROFILE)},
+ {"kryo", ARMV8A, (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC)},
+ {"thunderx2t99", ARMV8_1A, (AArch64::AEK_AES | AArch64::AEK_SHA2)},
+ {"thunderx3t110", ARMV8_3A, (AArch64::AEK_AES | AArch64::AEK_SHA2)},
+ {"thunderx", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC)},
+ {"thunderxt88", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC)},
+ {"thunderxt81", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC)},
+ {"thunderxt83", ARMV8A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_CRC)},
{"tsv110", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_DOTPROD | AArch64::AEK_FP16 |
- AArch64::AEK_FP16FML | AArch64::AEK_PROFILE)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_DOTPROD |
+ AArch64::AEK_FP16 | AArch64::AEK_FP16FML | AArch64::AEK_PROFILE)},
{"a64fx", ARMV8_2A,
- (AArch64::AEK_CRYPTO | AArch64::AEK_FP16 | AArch64::AEK_SVE)},
- {"carmel", ARMV8_2A, (AArch64::AEK_CRYPTO | AArch64::AEK_FP16)},
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16 |
+ AArch64::AEK_SVE)},
+ {"carmel", ARMV8_2A,
+ (AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_FP16)},
{"ampere1", ARMV8_6A,
(AArch64::AEK_AES | AArch64::AEK_SHA2 | AArch64::AEK_SHA3 |
AArch64::AEK_FP16 | AArch64::AEK_SB | AArch64::AEK_SSBS |
@@ -492,8 +542,6 @@ inline constexpr CpuInfo CpuInfos[] = {
(AArch64::AEK_FP16 | AArch64::AEK_RAND | AArch64::AEK_SM4 |
AArch64::AEK_SHA3 | AArch64::AEK_SHA2 | AArch64::AEK_AES |
AArch64::AEK_MTE | AArch64::AEK_SB | AArch64::AEK_SSBS)},
- // Invalid CPU
- {"invalid", INVALID, (AArch64::AEK_INVALID)},
};
// An alias for a CPU.
@@ -511,19 +559,21 @@ StringRef getArchExtFeature(StringRef ArchExt);
StringRef resolveCPUAlias(StringRef CPU);
// Information by Name
-uint64_t getDefaultExtensions(StringRef CPU, const ArchInfo &AI);
-void getFeatureOption(StringRef Name, std::string &Feature);
-const ArchInfo &getArchForCpu(StringRef CPU);
+std::optional<ArchInfo> getArchForCpu(StringRef CPU);
// Parser
-const ArchInfo &parseArch(StringRef Arch);
-ArchExtKind parseArchExt(StringRef ArchExt);
+std::optional<ArchInfo> parseArch(StringRef Arch);
+std::optional<ExtensionInfo> parseArchExtension(StringRef Extension);
// Given the name of a CPU or alias, return the correponding CpuInfo.
-const CpuInfo &parseCpu(StringRef Name);
+std::optional<CpuInfo> parseCpu(StringRef Name);
// Used by target parser tests
void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values);
bool isX18ReservedByDefault(const Triple &TT);
+
+// For given feature names, return a bitmask corresponding to the entries of
+// AArch64::CPUFeatures. The values in CPUFeatures are not bitmasks
+// themselves, they are sequential (0, 1, 2, 3, ...).
uint64_t getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
} // namespace AArch64
diff --git a/llvm/include/llvm/TargetParser/ARMTargetParser.h b/llvm/include/llvm/TargetParser/ARMTargetParser.h
index 0723f4b2663c..9a81415681fd 100644
--- a/llvm/include/llvm/TargetParser/ARMTargetParser.h
+++ b/llvm/include/llvm/TargetParser/ARMTargetParser.h
@@ -181,7 +181,7 @@ struct ArchNames {
StringRef Name;
StringRef CPUAttr; // CPU class in build attributes.
StringRef ArchFeature;
- unsigned DefaultFPU;
+ FPUKind DefaultFPU;
uint64_t ArchBaseExtensions;
ArchKind ID;
ARMBuildAttrs::CPUArch ArchAttr; // Arch ID in build attributes.
@@ -213,12 +213,12 @@ inline ArchKind &operator--(ArchKind &Kind) {
}
// Information by ID
-StringRef getFPUName(unsigned FPUKind);
-FPUVersion getFPUVersion(unsigned FPUKind);
-NeonSupportLevel getFPUNeonSupportLevel(unsigned FPUKind);
-FPURestriction getFPURestriction(unsigned FPUKind);
+StringRef getFPUName(FPUKind FPUKind);
+FPUVersion getFPUVersion(FPUKind FPUKind);
+NeonSupportLevel getFPUNeonSupportLevel(FPUKind FPUKind);
+FPURestriction getFPURestriction(FPUKind FPUKind);
-bool getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features);
+bool getFPUFeatures(FPUKind FPUKind, std::vector<StringRef> &Features);
bool getHWDivFeatures(uint64_t HWDivKind, std::vector<StringRef> &Features);
bool getExtensionFeatures(uint64_t Extensions,
std::vector<StringRef> &Features);
@@ -231,11 +231,11 @@ StringRef getArchExtName(uint64_t ArchExtKind);
StringRef getArchExtFeature(StringRef ArchExt);
bool appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK, StringRef ArchExt,
std::vector<StringRef> &Features,
- unsigned &ArgFPUKind);
+ FPUKind &ArgFPUKind);
ArchKind convertV9toV8(ArchKind AK);
// Information by Name
-unsigned getDefaultFPU(StringRef CPU, ArchKind AK);
+FPUKind getDefaultFPU(StringRef CPU, ArchKind AK);
uint64_t getDefaultExtensions(StringRef CPU, ArchKind AK);
StringRef getDefaultCPU(StringRef Arch);
StringRef getCanonicalArchName(StringRef Arch);
@@ -243,7 +243,7 @@ StringRef getFPUSynonym(StringRef FPU);
// Parser
uint64_t parseHWDiv(StringRef HWDiv);
-unsigned parseFPU(StringRef FPU);
+FPUKind parseFPU(StringRef FPU);
ArchKind parseArch(StringRef Arch);
uint64_t parseArchExt(StringRef ArchExt);
ArchKind parseCPUArch(StringRef CPU);
diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
index 4ebdcc012bdb..b20d124953f8 100644
--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def
@@ -2,8 +2,6 @@
#define LOONGARCH_FEATURE(NAME, KIND)
#endif
-LOONGARCH_FEATURE("invalid", FK_INVALID)
-LOONGARCH_FEATURE("none", FK_NONE)
LOONGARCH_FEATURE("+64bit", FK_64BIT)
LOONGARCH_FEATURE("+f", FK_FP32)
LOONGARCH_FEATURE("+d", FK_FP64)
@@ -11,6 +9,7 @@ LOONGARCH_FEATURE("+lsx", FK_LSX)
LOONGARCH_FEATURE("+lasx", FK_LASX)
LOONGARCH_FEATURE("+lbt", FK_LBT)
LOONGARCH_FEATURE("+lvz", FK_LVZ)
+LOONGARCH_FEATURE("+ual", FK_UAL)
#undef LOONGARCH_FEATURE
@@ -18,8 +17,7 @@ LOONGARCH_FEATURE("+lvz", FK_LVZ)
#define LOONGARCH_ARCH(NAME, KIND, FEATURES)
#endif
-LOONGARCH_ARCH("invalid", AK_INVALID, FK_INVALID)
-LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64)
-LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX)
+LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL)
+LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL)
#undef LOONGARCH_ARCH
diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
index 53f9073e4439..2aa65ec070ec 100644
--- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
+++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h
@@ -23,9 +23,6 @@ class StringRef;
namespace LoongArch {
enum FeatureKind : uint32_t {
- FK_INVALID = 0,
- FK_NONE = 1,
-
// 64-bit ISA is available.
FK_64BIT = 1 << 1,
@@ -46,6 +43,9 @@ enum FeatureKind : uint32_t {
// Loongson Virtualization Extension is available.
FK_LVZ = 1 << 7,
+
+ // Allow memory accesses to be unaligned.
+ FK_UAL = 1 << 8,
};
struct FeatureInfo {
@@ -64,7 +64,7 @@ struct ArchInfo {
uint32_t Features;
};
-ArchKind parseArch(StringRef Arch);
+bool isValidArchName(StringRef Arch);
bool getArchFeatures(StringRef Arch, std::vector<StringRef> &Features);
} // namespace LoongArch
diff --git a/llvm/include/llvm/TargetParser/RISCVTargetParser.h b/llvm/include/llvm/TargetParser/RISCVTargetParser.h
index da2ecd8c1339..a4cb7988eb39 100644
--- a/llvm/include/llvm/TargetParser/RISCVTargetParser.h
+++ b/llvm/include/llvm/TargetParser/RISCVTargetParser.h
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This file implements a target parser to recognise hardware features
-// FOR RISC-V CPUS.
+// for RISC-V CPUs.
//
//===----------------------------------------------------------------------===//
@@ -18,25 +18,19 @@
#include <vector>
namespace llvm {
+
+class Triple;
+
namespace RISCV {
// We use 64 bits as the known part in the scalable vector types.
static constexpr unsigned RVVBitsPerBlock = 64;
-enum CPUKind : unsigned {
-#define PROC(ENUM, NAME, DEFAULT_MARCH) CK_##ENUM,
-#define TUNE_PROC(ENUM, NAME) CK_##ENUM,
-#include "llvm/TargetParser/RISCVTargetParserDef.inc"
-};
-
-bool checkCPUKind(CPUKind Kind, bool IsRV64);
-bool checkTuneCPUKind(CPUKind Kind, bool IsRV64);
-CPUKind parseCPUKind(StringRef CPU);
-CPUKind parseTuneCPUKind(StringRef CPU, bool IsRV64);
+bool parseCPU(StringRef CPU, bool IsRV64);
+bool parseTuneCPU(StringRef CPU, bool IsRV64);
StringRef getMArchFromMcpu(StringRef CPU);
void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64);
void fillValidTuneCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64);
-bool getCPUFeaturesExceptStdExt(CPUKind Kind, std::vector<StringRef> &Features);
} // namespace RISCV
} // namespace llvm
diff --git a/llvm/include/llvm/MC/SubtargetFeature.h b/llvm/include/llvm/TargetParser/SubtargetFeature.h
index c38b532f21e5..a898275c1493 100644
--- a/llvm/include/llvm/MC/SubtargetFeature.h
+++ b/llvm/include/llvm/TargetParser/SubtargetFeature.h
@@ -1,4 +1,4 @@
-//===- llvm/MC/SubtargetFeature.h - CPU characteristics ---------*- C++ -*-===//
+//=== llvm/TargetParser/SubtargetFeature.h - CPU characteristics-*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,8 +14,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_MC_SUBTARGETFEATURE_H
-#define LLVM_MC_SUBTARGETFEATURE_H
+#ifndef LLVM_TARGETPARSER_SUBTARGETFEATURE_H
+#define LLVM_TARGETPARSER_SUBTARGETFEATURE_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
@@ -233,4 +233,4 @@ public:
} // end namespace llvm
-#endif // LLVM_MC_SUBTARGETFEATURE_H
+#endif // LLVM_TARGETPARSER_SUBTARGETFEATURE_H
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index 243eaff0a865..a40599c88f28 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -14,11 +14,8 @@
#ifndef LLVM_TARGETPARSER_TARGETPARSER_H
#define LLVM_TARGETPARSER_TARGETPARSER_H
+#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include <cstdint>
-// FIXME: vector is used because that's what clang uses for subtarget feature
-// lists, but SmallVector would probably be better
-#include <vector>
namespace llvm {
@@ -87,6 +84,8 @@ enum GPUKind : uint32_t {
GK_GFX90A = 66,
GK_GFX90C = 67,
GK_GFX940 = 68,
+ GK_GFX941 = 69,
+ GK_GFX942 = 70,
GK_GFX1010 = 71,
GK_GFX1011 = 72,
@@ -104,9 +103,11 @@ enum GPUKind : uint32_t {
GK_GFX1101 = 91,
GK_GFX1102 = 92,
GK_GFX1103 = 93,
+ GK_GFX1150 = 94,
+ GK_GFX1151 = 95,
GK_AMDGCN_FIRST = GK_GFX600,
- GK_AMDGCN_LAST = GK_GFX1103,
+ GK_AMDGCN_LAST = GK_GFX1151,
};
/// Instruction set architecture version.
@@ -138,6 +139,9 @@ enum ArchFeatureKind : uint32_t {
// Sram-ecc is available.
FEATURE_SRAMECC = 1 << 8,
+
+ // WGP mode is supported.
+ FEATURE_WGP = 1 << 9,
};
StringRef getArchNameAMDGCN(GPUKind AK);
@@ -153,6 +157,14 @@ void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
IsaVersion getIsaVersion(StringRef GPU);
+/// Fills Features map with default values for given target GPU
+void fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
+ StringMap<bool> &Features);
+
+/// Inserts wave size feature for given GPU into features map
+bool insertWaveSizeFeature(StringRef GPU, const Triple &T,
+ StringMap<bool> &Features, std::string &ErrorMsg);
+
} // namespace AMDGPU
} // namespace llvm
diff --git a/llvm/include/llvm/TargetParser/Triple.h b/llvm/include/llvm/TargetParser/Triple.h
index 8d600989c8cf..79ccd644a50b 100644
--- a/llvm/include/llvm/TargetParser/Triple.h
+++ b/llvm/include/llvm/TargetParser/Triple.h
@@ -199,6 +199,7 @@ public:
NetBSD,
OpenBSD,
Solaris,
+ UEFI,
Win32,
ZOS,
Haiku,
@@ -223,7 +224,8 @@ public:
WASI, // Experimental WebAssembly OS
Emscripten,
ShaderModel, // DirectX ShaderModel
- LastOSType = ShaderModel
+ LiteOS,
+ LastOSType = LiteOS
};
enum EnvironmentType {
UnknownEnvironment,
@@ -273,8 +275,8 @@ public:
Callable,
Mesh,
Amplification,
-
- LastEnvironmentType = Amplification
+ OpenHOS,
+ LastEnvironmentType = OpenHOS
};
enum ObjectFormatType {
UnknownObjectFormat,
@@ -579,6 +581,11 @@ public:
return getOS() == Triple::Haiku;
}
+ /// Tests whether the OS is UEFI.
+ bool isUEFI() const {
+ return getOS() == Triple::UEFI;
+ }
+
/// Tests whether the OS is Windows.
bool isOSWindows() const {
return getOS() == Triple::Win32;
@@ -740,9 +747,18 @@ public:
return getEnvironment() == Triple::Musl ||
getEnvironment() == Triple::MuslEABI ||
getEnvironment() == Triple::MuslEABIHF ||
- getEnvironment() == Triple::MuslX32;
+ getEnvironment() == Triple::MuslX32 ||
+ getEnvironment() == Triple::OpenHOS || isOSLiteOS();
}
+ /// Tests whether the target is OHOS
+ /// LiteOS default enviroment is also OHOS, but omited on triple.
+ bool isOHOSFamily() const { return isOpenHOS() || isOSLiteOS(); }
+
+ bool isOpenHOS() const { return getEnvironment() == Triple::OpenHOS; }
+
+ bool isOSLiteOS() const { return getOS() == Triple::LiteOS; }
+
/// Tests whether the target is DXIL.
bool isDXIL() const {
return getArch() == Triple::dxil;
@@ -789,6 +805,7 @@ public:
getEnvironment() == Triple::MuslEABI ||
getEnvironment() == Triple::EABIHF ||
getEnvironment() == Triple::GNUEABIHF ||
+ getEnvironment() == Triple::OpenHOS ||
getEnvironment() == Triple::MuslEABIHF || isAndroid()) &&
isOSBinFormatELF();
}
@@ -846,10 +863,14 @@ public:
: PointerWidth == 64;
}
+ /// Tests whether the target is 32-bit LoongArch.
+ bool isLoongArch32() const { return getArch() == Triple::loongarch32; }
+
+ /// Tests whether the target is 64-bit LoongArch.
+ bool isLoongArch64() const { return getArch() == Triple::loongarch64; }
+
/// Tests whether the target is LoongArch (32- and 64-bit).
- bool isLoongArch() const {
- return getArch() == Triple::loongarch32 || getArch() == Triple::loongarch64;
- }
+ bool isLoongArch() const { return isLoongArch32() || isLoongArch64(); }
/// Tests whether the target is MIPS 32-bit (little and big endian).
bool isMIPS32() const {
@@ -882,6 +903,23 @@ public:
return getArch() == Triple::ppc64 || getArch() == Triple::ppc64le;
}
+ /// Tests whether the target 64-bit PowerPC big endian ABI is ELFv2.
+ bool isPPC64ELFv2ABI() const {
+ return (getArch() == Triple::ppc64 &&
+ ((getOS() == Triple::FreeBSD &&
+ (getOSMajorVersion() >= 13 || getOSVersion().empty())) ||
+ getOS() == Triple::OpenBSD || isMusl()));
+ }
+
+ /// Tests whether the target 32-bit PowerPC uses Secure PLT.
+ bool isPPC32SecurePlt() const {
+ return ((getArch() == Triple::ppc || getArch() == Triple::ppcle) &&
+ ((getOS() == Triple::FreeBSD &&
+ (getOSMajorVersion() >= 13 || getOSVersion().empty())) ||
+ getOS() == Triple::NetBSD || getOS() == Triple::OpenBSD ||
+ isMusl()));
+ }
+
/// Tests whether the target is 32-bit RISC-V.
bool isRISCV32() const { return getArch() == Triple::riscv32; }
@@ -951,8 +989,11 @@ public:
}
/// Tests whether the target uses emulated TLS as default.
+ ///
+ /// Note: Android API level 29 (10) introduced ELF TLS.
bool hasDefaultEmulatedTLS() const {
- return isAndroid() || isOSOpenBSD() || isWindowsCygwinEnvironment();
+ return (isAndroid() && isAndroidVersionLT(29)) || isOSOpenBSD() ||
+ isWindowsCygwinEnvironment() || isOHOSFamily();
}
/// Tests whether the target uses -data-sections as default.
@@ -1077,6 +1118,9 @@ public:
/// Get the canonical name for the \p Kind environment.
static StringRef getEnvironmentTypeName(EnvironmentType Kind);
+ /// Get the name for the \p Object format.
+ static StringRef getObjectFormatTypeName(ObjectFormatType ObjectFormat);
+
/// @}
/// @name Static helpers for converting alternate architecture names.
/// @{
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index feec0b81f526..7f874cfac4af 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -99,6 +99,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_ROCKETLAKE, "rocketlake")
X86_CPU_SUBTYPE(ZHAOXIN_FAM7H_LUJIAZUI, "zhaoxin_fam7h_lujiazui")
X86_CPU_SUBTYPE(AMDFAM19H_ZNVER4, "znver4")
X86_CPU_SUBTYPE(INTEL_COREI7_GRANITERAPIDS, "graniterapids")
+X86_CPU_SUBTYPE(INTEL_COREI7_GRANITERAPIDS_D,"graniterapids-d")
// Alternate names supported by __builtin_cpu_is and target multiversioning.
X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake")
@@ -167,6 +168,7 @@ X86_FEATURE (3DNOWA, "3dnowa")
X86_FEATURE (64BIT, "64bit")
X86_FEATURE (ADX, "adx")
X86_FEATURE (AMX_BF16, "amx-bf16")
+X86_FEATURE (AMX_COMPLEX, "amx-complex")
X86_FEATURE (AMX_INT8, "amx-int8")
X86_FEATURE (AMX_TILE, "amx-tile")
X86_FEATURE (CLDEMOTE, "cldemote")
@@ -226,6 +228,10 @@ X86_FEATURE (AVXNECONVERT, "avxneconvert")
X86_FEATURE (AVXVNNI, "avxvnni")
X86_FEATURE (AVXIFMA, "avxifma")
X86_FEATURE (AVXVNNIINT8, "avxvnniint8")
+X86_FEATURE (SHA512, "sha512")
+X86_FEATURE (SM3, "sm3")
+X86_FEATURE (SM4, "sm4")
+X86_FEATURE (AVXVNNIINT16, "avxvnniint16")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
@@ -234,49 +240,3 @@ X86_FEATURE (LVI_CFI, "lvi-cfi")
X86_FEATURE (LVI_LOAD_HARDENING, "lvi-load-hardening")
#undef X86_FEATURE_COMPAT
#undef X86_FEATURE
-
-#ifndef CPU_SPECIFIC
-#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES)
-#endif
-
-#ifndef CPU_SPECIFIC_ALIAS
-#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME)
-#endif
-
-CPU_SPECIFIC("generic", "generic", 'A', "")
-CPU_SPECIFIC("pentium", "pentium", 'B', "")
-CPU_SPECIFIC("pentium_pro", "pentiumpro", 'C', "+cmov")
-CPU_SPECIFIC("pentium_mmx", "pentium-mmx", 'D', "+mmx")
-CPU_SPECIFIC("pentium_ii", "pentium2", 'E', "+cmov,+mmx")
-CPU_SPECIFIC("pentium_iii", "pentium3", 'H', "+cmov,+mmx,+sse")
-CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium3", "pentium_iii")
-CPU_SPECIFIC("pentium_4", "pentium4", 'J', "+cmov,+mmx,+sse,+sse2")
-CPU_SPECIFIC("pentium_m", "pentium-m", 'K', "+cmov,+mmx,+sse,+sse2")
-CPU_SPECIFIC("pentium_4_sse3", "prescott", 'L', "+cmov,+mmx,+sse,+sse2,+sse3")
-CPU_SPECIFIC("core_2_duo_ssse3", "core2", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3")
-CPU_SPECIFIC("core_2_duo_sse4_1", "penryn", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1")
-CPU_SPECIFIC("atom", "atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe")
-CPU_SPECIFIC("atom_sse4_2", "silvermont", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
-CPU_SPECIFIC("core_i7_sse4_2", "nehalem", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
-CPU_SPECIFIC("core_aes_pclmulqdq", "westmere", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt")
-CPU_SPECIFIC("atom_sse4_2_movbe", "silvermont", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
-CPU_SPECIFIC("goldmont", "goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt")
-CPU_SPECIFIC("sandybridge", "sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx")
-CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge", "sandybridge")
-CPU_SPECIFIC("ivybridge", "ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx")
-CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge", "ivybridge")
-CPU_SPECIFIC("haswell", "haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
-CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell", "haswell")
-CPU_SPECIFIC("core_4th_gen_avx_tsx", "haswell", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2")
-CPU_SPECIFIC("broadwell", "broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
-CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell", "broadwell")
-CPU_SPECIFIC("core_5th_gen_avx_tsx", "broadwell", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx")
-CPU_SPECIFIC("knl", "knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd")
-CPU_SPECIFIC_ALIAS("mic_avx512", "knl", "knl")
-CPU_SPECIFIC("skylake", "skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx")
-CPU_SPECIFIC( "skylake_avx512", "skylake-avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb")
-CPU_SPECIFIC("cannonlake", "cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi")
-CPU_SPECIFIC("knm", "knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq")
-
-#undef CPU_SPECIFIC_ALIAS
-#undef CPU_SPECIFIC
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h b/llvm/include/llvm/TargetParser/X86TargetParser.h
index 919960fd6c37..bae31891e24b 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.h
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.h
@@ -109,6 +109,7 @@ enum CPUKind {
CK_Sierraforest,
CK_Grandridge,
CK_Graniterapids,
+ CK_GraniterapidsD,
CK_Emeraldrapids,
CK_KNL,
CK_KNM,
@@ -155,13 +156,17 @@ void fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values,
ProcessorFeatures getKeyFeature(CPUKind Kind);
/// Fill in the features that \p CPU supports into \p Features.
-void getFeaturesForCPU(StringRef CPU, SmallVectorImpl<StringRef> &Features);
+/// "+" will be append in front of each feature if IfNeedPlus is true.
+void getFeaturesForCPU(StringRef CPU, SmallVectorImpl<StringRef> &Features,
+ bool IfNeedPlus = false);
/// Set or clear entries in \p Features that are implied to be enabled/disabled
/// by the provided \p Feature.
void updateImpliedFeatures(StringRef Feature, bool Enabled,
StringMap<bool> &Features);
+char getCPUDispatchMangling(StringRef Name);
+bool validateCPUSpecificCPUDispatch(StringRef Name);
uint64_t getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs);
unsigned getFeaturePriority(ProcessorFeatures Feat);
diff --git a/llvm/include/llvm/TextAPI/InterfaceFile.h b/llvm/include/llvm/TextAPI/InterfaceFile.h
index 5f07397adaca..2f89605d24ba 100644
--- a/llvm/include/llvm/TextAPI/InterfaceFile.h
+++ b/llvm/include/llvm/TextAPI/InterfaceFile.h
@@ -24,6 +24,7 @@
#include "llvm/TextAPI/PackedVersion.h"
#include "llvm/TextAPI/Platform.h"
#include "llvm/TextAPI/Symbol.h"
+#include "llvm/TextAPI/SymbolSet.h"
#include "llvm/TextAPI/Target.h"
namespace llvm {
@@ -66,6 +67,9 @@ enum FileType : unsigned {
/// Text-based stub file (.tbd) version 4.0
TBD_V4 = 1U << 3,
+ /// Text-based stub file (.tbd) version 5.0
+ TBD_V5 = 1U << 4,
+
All = ~0U,
LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/All),
@@ -120,37 +124,15 @@ private:
} // end namespace MachO.
-struct SymbolsMapKey {
- MachO::SymbolKind Kind;
- StringRef Name;
-
- SymbolsMapKey(MachO::SymbolKind Kind, StringRef Name)
- : Kind(Kind), Name(Name) {}
-};
-template <> struct DenseMapInfo<SymbolsMapKey> {
- static inline SymbolsMapKey getEmptyKey() {
- return SymbolsMapKey(MachO::SymbolKind::GlobalSymbol, StringRef{});
- }
-
- static inline SymbolsMapKey getTombstoneKey() {
- return SymbolsMapKey(MachO::SymbolKind::ObjectiveCInstanceVariable,
- StringRef{});
- }
-
- static unsigned getHashValue(const SymbolsMapKey &Key) {
- return hash_combine(hash_value(Key.Kind), hash_value(Key.Name));
- }
-
- static bool isEqual(const SymbolsMapKey &LHS, const SymbolsMapKey &RHS) {
- return std::tie(LHS.Kind, LHS.Name) == std::tie(RHS.Kind, RHS.Name);
- }
-};
-
namespace MachO {
/// Defines the interface file.
class InterfaceFile {
public:
+ InterfaceFile(std::unique_ptr<SymbolSet> &&InputSymbols)
+ : SymbolsSet(std::move(InputSymbols)) {}
+
+ InterfaceFile() : SymbolsSet(std::make_unique<SymbolSet>()){};
/// Set the path from which this file was generated (if applicable).
///
/// \param Path_ The path to the source file.
@@ -260,12 +242,6 @@ public:
/// Get the Objective-C constraint.
ObjCConstraintType getObjCConstraint() const { return ObjcConstraint; }
- /// Specify if this file was generated during InstallAPI (or not).
- void setInstallAPI(bool V = true) { IsInstallAPI = V; }
-
- /// Check if this file was generated during InstallAPI.
- bool isInstallAPI() const { return IsInstallAPI; }
-
/// Set the parent umbrella frameworks.
/// \param Target_ The target applicable to Parent
/// \param Parent The name of Parent
@@ -311,25 +287,6 @@ public:
return ReexportedLibraries;
}
- /// Add an Target/UUID pair.
- ///
- /// \param Target The target triple for which this applies.
- /// \param UUID The UUID of the library for the specified architecture.
- void addUUID(const Target &Target, StringRef UUID);
-
- /// Add an Target/UUID pair.
- ///
- /// \param Target The target triple for which this applies.
- /// \param UUID The UUID of the library for the specified architecture.
- void addUUID(const Target &Target, uint8_t UUID[16]);
-
- /// Get the list of Target/UUID pairs.
- ///
- /// \return Returns a list of Target/UUID pairs.
- const std::vector<std::pair<Target, std::string>> &uuids() const {
- return UUIDs;
- }
-
/// Add a library for inlining to top level library.
///
///\param Document The library to inline with top level library.
@@ -345,60 +302,78 @@ public:
return Documents;
}
- /// Add a symbol to the symbols list or extend an existing one.
- void addSymbol(SymbolKind Kind, StringRef Name, const TargetList &Targets,
- SymbolFlags Flags = SymbolFlags::None);
-
- using SymbolMapType = DenseMap<SymbolsMapKey, Symbol *>;
- struct const_symbol_iterator
- : public iterator_adaptor_base<
- const_symbol_iterator, SymbolMapType::const_iterator,
- std::forward_iterator_tag, const Symbol *, ptrdiff_t,
- const Symbol *, const Symbol *> {
- const_symbol_iterator() = default;
-
- template <typename U>
- const_symbol_iterator(U &&u)
- : iterator_adaptor_base(std::forward<U &&>(u)) {}
-
- reference operator*() const { return I->second; }
- pointer operator->() const { return I->second; }
- };
+ /// Set the runpath search paths.
+ /// \param InputTarget The target applicable to runpath search path.
+ /// \param RPath The name of runpath.
+ void addRPath(const Target &InputTarget, StringRef RPath);
- using const_symbol_range = iterator_range<const_symbol_iterator>;
+ /// Get the list of runpath search paths.
+ ///
+ /// \return Returns a list of the rpaths per target.
+ const std::vector<std::pair<Target, std::string>> &rpaths() const {
+ return RPaths;
+ }
- using const_filtered_symbol_iterator =
- filter_iterator<const_symbol_iterator,
- std::function<bool(const Symbol *)>>;
- using const_filtered_symbol_range =
- iterator_range<const_filtered_symbol_iterator>;
+ /// Get symbol if exists in file.
+ ///
+ /// \param Kind The kind of global symbol to record.
+ /// \param Name The name of the symbol.
+ std::optional<const Symbol *> getSymbol(SymbolKind Kind,
+ StringRef Name) const {
+ if (auto *Sym = SymbolsSet->findSymbol(Kind, Name))
+ return Sym;
+ return std::nullopt;
+ }
- const_symbol_range symbols() const {
- return {Symbols.begin(), Symbols.end()};
+ /// Add a symbol to the symbols list or extend an existing one.
+ template <typename RangeT,
+ typename ElT = typename std::remove_reference<
+ decltype(*std::begin(std::declval<RangeT>()))>::type>
+ void addSymbol(SymbolKind Kind, StringRef Name, RangeT &&Targets,
+ SymbolFlags Flags = SymbolFlags::None) {
+ SymbolsSet->addGlobal(Kind, Name, Flags, Targets);
}
- size_t symbolsCount() const { return Symbols.size(); }
+ /// Add Symbol with multiple targets.
+ ///
+ /// \param Kind The kind of global symbol to record.
+ /// \param Name The name of the symbol.
+ /// \param Targets The list of targets the symbol is defined in.
+ /// \param Flags The properties the symbol holds.
+ void addSymbol(SymbolKind Kind, StringRef Name, TargetList &&Targets,
+ SymbolFlags Flags = SymbolFlags::None) {
+ SymbolsSet->addGlobal(Kind, Name, Flags, Targets);
+ }
- const_filtered_symbol_range exports() const {
- std::function<bool(const Symbol *)> fn = [](const Symbol *Symbol) {
- return !Symbol->isUndefined();
- };
- return make_filter_range(
- make_range<const_symbol_iterator>({Symbols.begin()}, {Symbols.end()}),
- fn);
+ /// Add Symbol with single target.
+ ///
+ /// \param Kind The kind of global symbol to record.
+ /// \param Name The name of the symbol.
+ /// \param Target The target the symbol is defined in.
+ /// \param Flags The properties the symbol holds.
+ void addSymbol(SymbolKind Kind, StringRef Name, Target &Target,
+ SymbolFlags Flags = SymbolFlags::None) {
+ SymbolsSet->addGlobal(Kind, Name, Flags, Target);
}
+ /// Get size of symbol set.
+ /// \return The number of symbols the file holds.
+ size_t symbolsCount() const { return SymbolsSet->size(); }
+
+ using const_symbol_range = SymbolSet::const_symbol_range;
+ using const_filtered_symbol_range = SymbolSet::const_filtered_symbol_range;
+
+ const_symbol_range symbols() const { return SymbolsSet->symbols(); };
+ const_filtered_symbol_range exports() const { return SymbolsSet->exports(); };
+ const_filtered_symbol_range reexports() const {
+ return SymbolsSet->reexports();
+ };
const_filtered_symbol_range undefineds() const {
- std::function<bool(const Symbol *)> fn = [](const Symbol *Symbol) {
- return Symbol->isUndefined();
- };
- return make_filter_range(
- make_range<const_symbol_iterator>({Symbols.begin()}, {Symbols.end()}),
- fn);
- }
+ return SymbolsSet->undefineds();
+ };
/// The equality is determined by attributes that impact linking
- /// compatibilities. UUIDs, Path, & FileKind are irrelevant since these by
+ /// compatibilities. Path, & FileKind are irrelevant since these by
/// itself should not impact linking.
/// This is an expensive operation.
bool operator==(const InterfaceFile &O) const;
@@ -418,37 +393,33 @@ private:
TargetList Targets;
std::string Path;
- FileType FileKind;
+ FileType FileKind{FileType::Invalid};
std::string InstallName;
PackedVersion CurrentVersion;
PackedVersion CompatibilityVersion;
uint8_t SwiftABIVersion{0};
bool IsTwoLevelNamespace{false};
bool IsAppExtensionSafe{false};
- bool IsInstallAPI{false};
ObjCConstraintType ObjcConstraint = ObjCConstraintType::None;
std::vector<std::pair<Target, std::string>> ParentUmbrellas;
std::vector<InterfaceFileRef> AllowableClients;
std::vector<InterfaceFileRef> ReexportedLibraries;
std::vector<std::shared_ptr<InterfaceFile>> Documents;
- std::vector<std::pair<Target, std::string>> UUIDs;
- SymbolMapType Symbols;
+ std::vector<std::pair<Target, std::string>> RPaths;
+ std::unique_ptr<SymbolSet> SymbolsSet;
InterfaceFile *Parent = nullptr;
};
-template <typename DerivedT, typename KeyInfoT, typename BucketT>
-bool operator==(const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *,
- KeyInfoT, BucketT> &LHS,
- const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *,
- KeyInfoT, BucketT> &RHS) {
- if (LHS.size() != RHS.size())
- return false;
- for (const auto &KV : LHS) {
- auto I = RHS.find(KV.first);
- if (I == RHS.end() || *I->second != *KV.second)
- return false;
- }
- return true;
+// Keep containers that hold InterfaceFileRefs in sorted order and uniqued.
+template <typename C>
+typename C::iterator addEntry(C &Container, StringRef InstallName) {
+ auto I = partition_point(Container, [=](const InterfaceFileRef &O) {
+ return O.getInstallName() < InstallName;
+ });
+ if (I != Container.end() && I->getInstallName() == InstallName)
+ return I;
+
+ return Container.emplace(I, InstallName);
}
} // end namespace MachO.
diff --git a/llvm/include/llvm/TextAPI/PackedVersion.h b/llvm/include/llvm/TextAPI/PackedVersion.h
index 24bec2ebe8fc..eafa50896735 100644
--- a/llvm/include/llvm/TextAPI/PackedVersion.h
+++ b/llvm/include/llvm/TextAPI/PackedVersion.h
@@ -14,6 +14,7 @@
#define LLVM_TEXTAPI_PACKEDVERSION_H
#include <cstdint>
+#include <string>
#include <utility>
namespace llvm {
@@ -53,6 +54,8 @@ public:
uint32_t rawValue() const { return Version; }
+ operator std::string() const;
+
void print(raw_ostream &OS) const;
};
diff --git a/llvm/include/llvm/TextAPI/Platform.h b/llvm/include/llvm/TextAPI/Platform.h
index d4225ca533fc..d828d9ac49f6 100644
--- a/llvm/include/llvm/TextAPI/Platform.h
+++ b/llvm/include/llvm/TextAPI/Platform.h
@@ -14,11 +14,13 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/Support/VersionTuple.h"
namespace llvm {
namespace MachO {
using PlatformSet = SmallSet<PlatformType, 3>;
+using PlatformVersionSet = SmallSet<std::pair<PlatformType, VersionTuple>, 3>;
PlatformType mapToPlatformType(PlatformType Platform, bool WantSim);
PlatformType mapToPlatformType(const Triple &Target);
@@ -27,6 +29,7 @@ StringRef getPlatformName(PlatformType Platform);
PlatformType getPlatformFromName(StringRef Name);
std::string getOSAndEnvironmentName(PlatformType Platform,
std::string Version = "");
+VersionTuple mapToSupportedOSVersion(const Triple &Triple);
} // end namespace MachO.
} // end namespace llvm.
diff --git a/llvm/include/llvm/TextAPI/Symbol.h b/llvm/include/llvm/TextAPI/Symbol.h
index 1c25295b299d..a20fcc785b40 100644
--- a/llvm/include/llvm/TextAPI/Symbol.h
+++ b/llvm/include/llvm/TextAPI/Symbol.h
@@ -40,7 +40,13 @@ enum class SymbolFlags : uint8_t {
/// Rexported
Rexported = 1U << 4,
- LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Rexported),
+ /// Data Segment
+ Data = 1U << 5,
+
+ /// Text Segment
+ Text = 1U << 6,
+
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/Text),
};
// clang-format on
@@ -59,12 +65,26 @@ constexpr StringLiteral ObjC2EHTypePrefix = "_OBJC_EHTYPE_$_";
constexpr StringLiteral ObjC2IVarPrefix = "_OBJC_IVAR_$_";
using TargetList = SmallVector<Target, 5>;
+
+// Keep containers that hold Targets in sorted order and uniqued.
+template <typename C>
+typename C::iterator addEntry(C &Container, const Target &Targ) {
+ auto Iter =
+ lower_bound(Container, Targ, [](const Target &LHS, const Target &RHS) {
+ return LHS < RHS;
+ });
+ if ((Iter != std::end(Container)) && !(Targ < *Iter))
+ return Iter;
+
+ return Container.insert(Iter, Targ);
+}
+
class Symbol {
public:
Symbol(SymbolKind Kind, StringRef Name, TargetList Targets, SymbolFlags Flags)
: Name(Name), Targets(std::move(Targets)), Kind(Kind), Flags(Flags) {}
- void addTarget(Target target) { Targets.emplace_back(target); }
+ void addTarget(Target InputTarget) { addEntry(Targets, InputTarget); }
SymbolKind getKind() const { return Kind; }
StringRef getName() const { return Name; }
ArchitectureSet getArchitectures() const {
@@ -93,6 +113,14 @@ public:
return (Flags & SymbolFlags::Rexported) == SymbolFlags::Rexported;
}
+ bool isData() const {
+ return (Flags & SymbolFlags::Data) == SymbolFlags::Data;
+ }
+
+ bool isText() const {
+ return (Flags & SymbolFlags::Text) == SymbolFlags::Text;
+ }
+
using const_target_iterator = TargetList::const_iterator;
using const_target_range = llvm::iterator_range<const_target_iterator>;
const_target_range targets() const { return {Targets}; }
@@ -109,10 +137,7 @@ public:
void dump() const { dump(llvm::errs()); }
#endif
- bool operator==(const Symbol &O) const {
- return std::tie(Name, Kind, Targets, Flags) ==
- std::tie(O.Name, O.Kind, O.Targets, O.Flags);
- }
+ bool operator==(const Symbol &O) const;
bool operator!=(const Symbol &O) const { return !(*this == O); }
diff --git a/llvm/include/llvm/TextAPI/SymbolSet.h b/llvm/include/llvm/TextAPI/SymbolSet.h
new file mode 100644
index 000000000000..238385178d60
--- /dev/null
+++ b/llvm/include/llvm/TextAPI/SymbolSet.h
@@ -0,0 +1,182 @@
+//===- llvm/TextAPI/SymbolSet.h - TAPI Symbol Set --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TEXTAPI_SYMBOLSET_H
+#define LLVM_TEXTAPI_SYMBOLSET_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/TextAPI/Architecture.h"
+#include "llvm/TextAPI/ArchitectureSet.h"
+#include "llvm/TextAPI/Symbol.h"
+#include <stddef.h>
+
+namespace llvm {
+
+struct SymbolsMapKey {
+ MachO::SymbolKind Kind;
+ StringRef Name;
+
+ SymbolsMapKey(MachO::SymbolKind Kind, StringRef Name)
+ : Kind(Kind), Name(Name) {}
+};
+template <> struct DenseMapInfo<SymbolsMapKey> {
+ static inline SymbolsMapKey getEmptyKey() {
+ return SymbolsMapKey(MachO::SymbolKind::GlobalSymbol, StringRef{});
+ }
+
+ static inline SymbolsMapKey getTombstoneKey() {
+ return SymbolsMapKey(MachO::SymbolKind::ObjectiveCInstanceVariable,
+ StringRef{});
+ }
+
+ static unsigned getHashValue(const SymbolsMapKey &Key) {
+ return hash_combine(hash_value(Key.Kind), hash_value(Key.Name));
+ }
+
+ static bool isEqual(const SymbolsMapKey &LHS, const SymbolsMapKey &RHS) {
+ return std::tie(LHS.Kind, LHS.Name) == std::tie(RHS.Kind, RHS.Name);
+ }
+};
+
+template <typename DerivedT, typename KeyInfoT, typename BucketT>
+bool operator==(const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *,
+ KeyInfoT, BucketT> &LHS,
+ const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *,
+ KeyInfoT, BucketT> &RHS) {
+ if (LHS.size() != RHS.size())
+ return false;
+ for (const auto &KV : LHS) {
+ auto I = RHS.find(KV.first);
+ if (I == RHS.end() || *I->second != *KV.second)
+ return false;
+ }
+ return true;
+}
+
+template <typename DerivedT, typename KeyInfoT, typename BucketT>
+bool operator!=(const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *,
+ KeyInfoT, BucketT> &LHS,
+ const DenseMapBase<DerivedT, SymbolsMapKey, MachO::Symbol *,
+ KeyInfoT, BucketT> &RHS) {
+ return !(LHS == RHS);
+}
+
+namespace MachO {
+
+class SymbolSet {
+private:
+ llvm::BumpPtrAllocator Allocator;
+ StringRef copyString(StringRef String) {
+ if (String.empty())
+ return {};
+ void *Ptr = Allocator.Allocate(String.size(), 1);
+ memcpy(Ptr, String.data(), String.size());
+ return StringRef(reinterpret_cast<const char *>(Ptr), String.size());
+ }
+
+ using SymbolsMapType = llvm::DenseMap<SymbolsMapKey, Symbol *>;
+ SymbolsMapType Symbols;
+
+ Symbol *addGlobalImpl(SymbolKind, StringRef Name, SymbolFlags Flags);
+
+public:
+ SymbolSet() = default;
+ Symbol *addGlobal(SymbolKind Kind, StringRef Name, SymbolFlags Flags,
+ const Target &Targ);
+ size_t size() const { return Symbols.size(); }
+
+ template <typename RangeT,
+ typename ElT = typename std::remove_reference<
+ decltype(*std::begin(std::declval<RangeT>()))>::type>
+ Symbol *addGlobal(SymbolKind Kind, StringRef Name, SymbolFlags Flags,
+ RangeT &&Targets) {
+ auto *Global = addGlobalImpl(Kind, Name, Flags);
+ for (const auto &Targ : Targets)
+ Global->addTarget(Targ);
+ if (Kind == SymbolKind::ObjectiveCClassEHType)
+ addGlobal(SymbolKind::ObjectiveCClass, Name, Flags, Targets);
+ return Global;
+ }
+
+ const Symbol *findSymbol(SymbolKind Kind, StringRef Name) const;
+
+ struct const_symbol_iterator
+ : public iterator_adaptor_base<
+ const_symbol_iterator, SymbolsMapType::const_iterator,
+ std::forward_iterator_tag, const Symbol *, ptrdiff_t,
+ const Symbol *, const Symbol *> {
+ const_symbol_iterator() = default;
+
+ template <typename U>
+ const_symbol_iterator(U &&u)
+ : iterator_adaptor_base(std::forward<U &&>(u)) {}
+
+ reference operator*() const { return I->second; }
+ pointer operator->() const { return I->second; }
+ };
+
+ using const_symbol_range = iterator_range<const_symbol_iterator>;
+
+ using const_filtered_symbol_iterator =
+ filter_iterator<const_symbol_iterator,
+ std::function<bool(const Symbol *)>>;
+ using const_filtered_symbol_range =
+ iterator_range<const_filtered_symbol_iterator>;
+
+ // Range that contains all symbols.
+ const_symbol_range symbols() const {
+ return {Symbols.begin(), Symbols.end()};
+ }
+
+ // Range that contains all defined and exported symbols.
+ const_filtered_symbol_range exports() const {
+ std::function<bool(const Symbol *)> fn = [](const Symbol *Symbol) {
+ return !Symbol->isUndefined() && !Symbol->isReexported();
+ };
+ return make_filter_range(
+ make_range<const_symbol_iterator>({Symbols.begin()}, {Symbols.end()}),
+ fn);
+ }
+
+ // Range that contains all reexported symbols.
+ const_filtered_symbol_range reexports() const {
+ std::function<bool(const Symbol *)> fn = [](const Symbol *Symbol) {
+ return Symbol->isReexported();
+ };
+ return make_filter_range(
+ make_range<const_symbol_iterator>({Symbols.begin()}, {Symbols.end()}),
+ fn);
+ }
+
+ // Range that contains all undefined and exported symbols.
+ const_filtered_symbol_range undefineds() const {
+ std::function<bool(const Symbol *)> fn = [](const Symbol *Symbol) {
+ return Symbol->isUndefined();
+ };
+ return make_filter_range(
+ make_range<const_symbol_iterator>({Symbols.begin()}, {Symbols.end()}),
+ fn);
+ }
+
+ bool operator==(const SymbolSet &O) const;
+
+ bool operator!=(const SymbolSet &O) const { return !(Symbols == O.Symbols); }
+
+ void *allocate(size_t Size, unsigned Align = 8) {
+ return Allocator.Allocate(Size, Align);
+ }
+};
+
+} // namespace MachO
+} // namespace llvm
+#endif // LLVM_TEXTAPI_SYMBOLSET_H
diff --git a/llvm/include/llvm/TextAPI/Target.h b/llvm/include/llvm/TextAPI/Target.h
index fbb76295f706..edcc0708d147 100644
--- a/llvm/include/llvm/TextAPI/Target.h
+++ b/llvm/include/llvm/TextAPI/Target.h
@@ -10,6 +10,8 @@
#define LLVM_TEXTAPI_TARGET_H
#include "llvm/Support/Error.h"
+#include "llvm/Support/VersionTuple.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/TextAPI/Architecture.h"
#include "llvm/TextAPI/ArchitectureSet.h"
#include "llvm/TextAPI/Platform.h"
@@ -26,10 +28,12 @@ namespace MachO {
class Target {
public:
Target() = default;
- Target(Architecture Arch, PlatformType Platform)
- : Arch(Arch), Platform(Platform) {}
+ Target(Architecture Arch, PlatformType Platform,
+ VersionTuple MinDeployment = {})
+ : Arch(Arch), Platform(Platform), MinDeployment(MinDeployment) {}
explicit Target(const llvm::Triple &Triple)
- : Arch(mapToArchitecture(Triple)), Platform(mapToPlatformType(Triple)) {}
+ : Arch(mapToArchitecture(Triple)), Platform(mapToPlatformType(Triple)),
+ MinDeployment(mapToSupportedOSVersion(Triple)) {}
static llvm::Expected<Target> create(StringRef Target);
@@ -37,17 +41,20 @@ public:
Architecture Arch;
PlatformType Platform;
+ VersionTuple MinDeployment;
};
inline bool operator==(const Target &LHS, const Target &RHS) {
+ // In most cases the deployment version is not useful to compare.
return std::tie(LHS.Arch, LHS.Platform) == std::tie(RHS.Arch, RHS.Platform);
}
inline bool operator!=(const Target &LHS, const Target &RHS) {
- return std::tie(LHS.Arch, LHS.Platform) != std::tie(RHS.Arch, RHS.Platform);
+ return !(LHS == RHS);
}
inline bool operator<(const Target &LHS, const Target &RHS) {
+ // In most cases the deployment version is not useful to compare.
return std::tie(LHS.Arch, LHS.Platform) < std::tie(RHS.Arch, RHS.Platform);
}
@@ -59,6 +66,7 @@ inline bool operator!=(const Target &LHS, const Architecture &RHS) {
return LHS.Arch != RHS;
}
+PlatformVersionSet mapToPlatformVersionSet(ArrayRef<Target> Targets);
PlatformSet mapToPlatformSet(ArrayRef<Target> Targets);
ArchitectureSet mapToArchitectureSet(ArrayRef<Target> Targets);
diff --git a/llvm/include/llvm/TextAPI/TextAPIWriter.h b/llvm/include/llvm/TextAPI/TextAPIWriter.h
index f9857a806f60..9bdaaf58d09f 100644
--- a/llvm/include/llvm/TextAPI/TextAPIWriter.h
+++ b/llvm/include/llvm/TextAPI/TextAPIWriter.h
@@ -22,7 +22,8 @@ class TextAPIWriter {
public:
TextAPIWriter() = delete;
- static Error writeToStream(raw_ostream &os, const InterfaceFile &);
+ static Error writeToStream(raw_ostream &OS, const InterfaceFile &File,
+ bool Compact = false);
};
} // end namespace MachO.
diff --git a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
index 2d76546316fa..3568417510f1 100644
--- a/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
+++ b/llvm/include/llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h
@@ -8,7 +8,7 @@
/// \file
///
/// AggressiveInstCombiner - Combine expression patterns to form expressions
-/// with fewer, simple instructions. This pass does not modify the CFG.
+/// with fewer, simple instructions.
///
//===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Transforms/Coroutines/CoroSplit.h b/llvm/include/llvm/Transforms/Coroutines/CoroSplit.h
index 7623c9c0eb68..a2be1099ff68 100644
--- a/llvm/include/llvm/Transforms/Coroutines/CoroSplit.h
+++ b/llvm/include/llvm/Transforms/Coroutines/CoroSplit.h
@@ -22,7 +22,13 @@
namespace llvm {
struct CoroSplitPass : PassInfoMixin<CoroSplitPass> {
- CoroSplitPass(bool OptimizeFrame = false) : OptimizeFrame(OptimizeFrame) {}
+ const std::function<bool(Instruction &)> MaterializableCallback;
+
+ CoroSplitPass(bool OptimizeFrame = false);
+ CoroSplitPass(std::function<bool(Instruction &)> MaterializableCallback,
+ bool OptimizeFrame = false)
+ : MaterializableCallback(MaterializableCallback),
+ OptimizeFrame(OptimizeFrame) {}
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h
index 9b1a2fb362f2..4995b000c454 100644
--- a/llvm/include/llvm/Transforms/IPO.h
+++ b/llvm/include/llvm/Transforms/IPO.h
@@ -20,115 +20,11 @@
namespace llvm {
-struct InlineParams;
class ModulePass;
class Pass;
-class BasicBlock;
-class GlobalValue;
class raw_ostream;
//===----------------------------------------------------------------------===//
-//
-// This pass adds !annotation metadata to entries in the
-// @llvm.global.annotations global constant.
-//
-ModulePass *createAnnotation2MetadataLegacyPass();
-
-//===----------------------------------------------------------------------===//
-//
-// These functions removes symbols from functions and modules. If OnlyDebugInfo
-// is true, only debugging information is removed from the module.
-//
-ModulePass *createStripSymbolsPass(bool OnlyDebugInfo = false);
-
-//===----------------------------------------------------------------------===//
-//
-// These functions strips symbols from functions and modules.
-// Only debugging information is not stripped.
-//
-ModulePass *createStripNonDebugSymbolsPass();
-
-//===----------------------------------------------------------------------===//
-//
-// This pass removes llvm.dbg.declare intrinsics.
-ModulePass *createStripDebugDeclarePass();
-
-//===----------------------------------------------------------------------===//
-//
-// This pass removes unused symbols' debug info.
-ModulePass *createStripDeadDebugInfoPass();
-
-//===----------------------------------------------------------------------===//
-/// createConstantMergePass - This function returns a new pass that merges
-/// duplicate global constants together into a single constant that is shared.
-/// This is useful because some passes (ie TraceValues) insert a lot of string
-/// constants into the program, regardless of whether or not they duplicate an
-/// existing string.
-///
-ModulePass *createConstantMergePass();
-
-//===----------------------------------------------------------------------===//
-/// createGlobalOptimizerPass - This function returns a new pass that optimizes
-/// non-address taken internal globals.
-///
-ModulePass *createGlobalOptimizerPass();
-
-//===----------------------------------------------------------------------===//
-/// createGlobalDCEPass - This transform is designed to eliminate unreachable
-/// internal globals (functions or global variables)
-///
-ModulePass *createGlobalDCEPass();
-
-//===----------------------------------------------------------------------===//
-/// This transform is designed to eliminate available external globals
-/// (functions or global variables)
-///
-ModulePass *createEliminateAvailableExternallyPass();
-
-//===----------------------------------------------------------------------===//
-/// createGVExtractionPass - If deleteFn is true, this pass deletes
-/// the specified global values. Otherwise, it deletes as much of the module as
-/// possible, except for the global values specified. If keepConstInit is true,
-/// the initializers of global constants are not deleted even if they are
-/// unused.
-///
-ModulePass *createGVExtractionPass(std::vector<GlobalValue*>& GVs, bool
- deleteFn = false, bool keepConstInit = false);
-
-//===----------------------------------------------------------------------===//
-/// createFunctionInliningPass - Return a new pass object that uses a heuristic
-/// to inline direct function calls to small functions.
-///
-/// The Threshold can be passed directly, or asked to be computed from the
-/// given optimization and size optimization arguments.
-///
-/// The -inline-threshold command line option takes precedence over the
-/// threshold given here.
-Pass *createFunctionInliningPass();
-Pass *createFunctionInliningPass(int Threshold);
-Pass *createFunctionInliningPass(unsigned OptLevel, unsigned SizeOptLevel,
- bool DisableInlineHotCallSite);
-Pass *createFunctionInliningPass(InlineParams &Params);
-
-//===----------------------------------------------------------------------===//
-/// createInternalizePass - This pass loops over all of the functions in the
-/// input module, internalizing all globals (functions and variables) it can.
-////
-/// Before internalizing a symbol, the callback \p MustPreserveGV is invoked and
-/// gives to the client the ability to prevent internalizing specific symbols.
-///
-/// The symbol in DSOList are internalized if it is safe to drop them from
-/// the symbol table.
-///
-/// Note that commandline options that are used with the above function are not
-/// used now!
-ModulePass *
-createInternalizePass(std::function<bool(const GlobalValue &)> MustPreserveGV);
-
-/// createInternalizePass - Same as above, but with an empty exportList.
-ModulePass *createInternalizePass();
-
-//===----------------------------------------------------------------------===//
/// createDeadArgEliminationPass - This pass removes arguments from functions
/// which are not used by the body of the function.
///
@@ -140,13 +36,6 @@ ModulePass *createDeadArgEliminationPass();
ModulePass *createDeadArgHackingPass();
//===----------------------------------------------------------------------===//
-/// createIPSCCPPass - This pass propagates constants from call sites into the
-/// bodies of functions, and keeps track of whether basic blocks are executable
-/// in the process.
-///
-ModulePass *createIPSCCPPass();
-
-//===----------------------------------------------------------------------===//
//
/// createLoopExtractorPass - This pass extracts all natural loops from the
/// program into a function if it can.
@@ -158,47 +47,11 @@ Pass *createLoopExtractorPass();
///
Pass *createSingleLoopExtractorPass();
-/// createStripDeadPrototypesPass - This pass removes any function declarations
-/// (prototypes) that are not used.
-ModulePass *createStripDeadPrototypesPass();
-
-//===----------------------------------------------------------------------===//
-/// createReversePostOrderFunctionAttrsPass - This pass walks SCCs of the call
-/// graph in RPO to deduce and propagate function attributes. Currently it
-/// only handles synthesizing norecurse attributes.
-///
-Pass *createReversePostOrderFunctionAttrsPass();
-
-//===----------------------------------------------------------------------===//
-/// createMergeFunctionsPass - This pass discovers identical functions and
-/// collapses them.
-///
-ModulePass *createMergeFunctionsPass();
-
-//===----------------------------------------------------------------------===//
-/// createHotColdSplittingPass - This pass outlines cold blocks into a separate
-/// function(s).
-ModulePass *createHotColdSplittingPass();
-
-//===----------------------------------------------------------------------===//
-/// createIROutlinerPass - This pass finds similar code regions and factors
-/// those regions out into functions.
-ModulePass *createIROutlinerPass();
-
-//===----------------------------------------------------------------------===//
-/// createPartialInliningPass - This pass inlines parts of functions.
-///
-ModulePass *createPartialInliningPass();
-
//===----------------------------------------------------------------------===//
/// createBarrierNoopPass - This pass is purely a module pass barrier in a pass
/// manager.
ModulePass *createBarrierNoopPass();
-/// createCalledValuePropagationPass - Attach metadata to indirct call sites
-/// indicating the set of functions they may target at run-time.
-ModulePass *createCalledValuePropagationPass();
-
/// What to do with the summary when running passes that operate on it.
enum class PassSummaryAction {
None, ///< Do nothing.
@@ -206,13 +59,6 @@ enum class PassSummaryAction {
Export, ///< Export information to summary.
};
-/// This pass export CFI checks for use by external modules.
-ModulePass *createCrossDSOCFIPass();
-
-/// This pass splits globals into pieces for the benefit of whole-program
-/// devirtualization and control-flow integrity.
-ModulePass *createGlobalSplitPass();
-
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index da171f894074..1da00acdf034 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -103,7 +103,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/CFG.h"
@@ -116,17 +115,22 @@
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/AbstractCallSite.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ModRef.h"
#include "llvm/Support/TimeProfiler.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include <limits>
@@ -148,7 +152,6 @@ struct AAIsDead;
struct AttributorCallGraph;
struct IRPosition;
-class AAResults;
class Function;
/// Abstract Attribute helper functions.
@@ -163,6 +166,9 @@ enum class GPUAddressSpace : unsigned {
Local = 5,
};
+/// Return true iff \p M target a GPU (and we can use GPU AS reasoning).
+bool isGPU(const Module &M);
+
/// Flags to distinguish intra-procedural queries from *potentially*
/// inter-procedural queries. Not that information can be valid for both and
/// therefore both bits might be set.
@@ -262,18 +268,24 @@ struct RangeTy {
}
RangeTy &operator&=(const RangeTy &R) {
- if (Offset == Unassigned)
- Offset = R.Offset;
- else if (R.Offset != Unassigned && R.Offset != Offset)
+ if (R.isUnassigned())
+ return *this;
+ if (isUnassigned())
+ return *this = R;
+ if (Offset == Unknown || R.Offset == Unknown)
Offset = Unknown;
-
- if (Size == Unassigned)
- Size = R.Size;
- else if (Size == Unknown || R.Size == Unknown)
+ if (Size == Unknown || R.Size == Unknown)
Size = Unknown;
- else if (R.Size != Unassigned)
+ if (offsetAndSizeAreUnknown())
+ return *this;
+ if (Offset == Unknown) {
Size = std::max(Size, R.Size);
-
+ } else if (Size == Unknown) {
+ Offset = std::min(Offset, R.Offset);
+ } else {
+ Offset = std::min(Offset, R.Offset);
+ Size = std::max(Offset + Size, R.Offset + R.Size) - Offset;
+ }
return *this;
}
@@ -306,7 +318,7 @@ inline bool operator==(const RangeTy &A, const RangeTy &B) {
inline bool operator!=(const RangeTy &A, const RangeTy &B) { return !(A == B); }
/// Return the initial value of \p Obj with type \p Ty if that is a constant.
-Constant *getInitialValueForObj(Value &Obj, Type &Ty,
+Constant *getInitialValueForObj(Attributor &A, Value &Obj, Type &Ty,
const TargetLibraryInfo *TLI,
const DataLayout &DL,
RangeTy *RangePtr = nullptr);
@@ -448,10 +460,12 @@ struct DenseMapInfo<const AA::InstExclusionSetTy *>
if (LHS == getEmptyKey() || RHS == getEmptyKey() ||
LHS == getTombstoneKey() || RHS == getTombstoneKey())
return false;
- if (!LHS || !RHS)
- return ((LHS && LHS->empty()) || (RHS && RHS->empty()));
- if (LHS->size() != RHS->size())
+ auto SizeLHS = LHS ? LHS->size() : 0;
+ auto SizeRHS = RHS ? RHS->size() : 0;
+ if (SizeLHS != SizeRHS)
return false;
+ if (SizeRHS == 0)
+ return true;
return llvm::set_is_subset(*LHS, *RHS);
}
};
@@ -483,32 +497,35 @@ struct AADepGraphNode {
public:
virtual ~AADepGraphNode() = default;
using DepTy = PointerIntPair<AADepGraphNode *, 1>;
+ using DepSetTy = SmallSetVector<DepTy, 2>;
protected:
/// Set of dependency graph nodes which should be updated if this one
/// is updated. The bit encodes if it is optional.
- TinyPtrVector<DepTy> Deps;
+ DepSetTy Deps;
- static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
- static AbstractAttribute *DepGetValAA(DepTy &DT) {
+ static AADepGraphNode *DepGetVal(const DepTy &DT) { return DT.getPointer(); }
+ static AbstractAttribute *DepGetValAA(const DepTy &DT) {
return cast<AbstractAttribute>(DT.getPointer());
}
operator AbstractAttribute *() { return cast<AbstractAttribute>(this); }
public:
- using iterator =
- mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+ using iterator = mapped_iterator<DepSetTy::iterator, decltype(&DepGetVal)>;
using aaiterator =
- mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>;
+ mapped_iterator<DepSetTy::iterator, decltype(&DepGetValAA)>;
aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); }
aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); }
iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); }
iterator child_end() { return iterator(Deps.end(), &DepGetVal); }
- virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; }
- TinyPtrVector<DepTy> &getDeps() { return Deps; }
+ void print(raw_ostream &OS) const { print(nullptr, OS); }
+ virtual void print(Attributor *, raw_ostream &OS) const {
+ OS << "AADepNode Impl\n";
+ }
+ DepSetTy &getDeps() { return Deps; }
friend struct Attributor;
friend struct AADepGraph;
@@ -524,9 +541,9 @@ struct AADepGraph {
~AADepGraph() = default;
using DepTy = AADepGraphNode::DepTy;
- static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
+ static AADepGraphNode *DepGetVal(const DepTy &DT) { return DT.getPointer(); }
using iterator =
- mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+ mapped_iterator<AADepGraphNode::DepSetTy::iterator, decltype(&DepGetVal)>;
/// There is no root node for the dependency graph. But the SCCIterator
/// requires a single entry point, so we maintain a fake("synthetic") root
@@ -693,7 +710,8 @@ struct IRPosition {
// function.
if (Argument *Arg = getAssociatedArgument())
return Arg->getParent();
- return CB->getCalledFunction();
+ return dyn_cast_if_present<Function>(
+ CB->getCalledOperand()->stripPointerCasts());
}
return getAnchorScope();
}
@@ -714,6 +732,17 @@ struct IRPosition {
}
}
+ /// Return true if this is a function or call site position.
+ bool isFunctionScope() const {
+ switch (getPositionKind()) {
+ case IRPosition::IRP_CALL_SITE:
+ case IRPosition::IRP_FUNCTION:
+ return true;
+ default:
+ return false;
+ };
+ }
+
/// Return the Function surrounding the anchor value.
Function *getAnchorScope() const {
Value &V = getAnchorValue();
@@ -787,6 +816,7 @@ struct IRPosition {
case IRPosition::IRP_CALL_SITE_RETURNED:
return AttributeList::ReturnIndex;
case IRPosition::IRP_ARGUMENT:
+ return getCalleeArgNo() + AttributeList::FirstArgIndex;
case IRPosition::IRP_CALL_SITE_ARGUMENT:
return getCallSiteArgNo() + AttributeList::FirstArgIndex;
}
@@ -794,6 +824,49 @@ struct IRPosition {
"There is no attribute index for a floating or invalid position!");
}
+ /// Return the value attributes are attached to.
+ Value *getAttrListAnchor() const {
+ if (auto *CB = dyn_cast<CallBase>(&getAnchorValue()))
+ return CB;
+ return getAssociatedFunction();
+ }
+
+ /// Return the attributes associated with this function or call site scope.
+ AttributeList getAttrList() const {
+ if (auto *CB = dyn_cast<CallBase>(&getAnchorValue()))
+ return CB->getAttributes();
+ return getAssociatedFunction()->getAttributes();
+ }
+
+ /// Update the attributes associated with this function or call site scope.
+ void setAttrList(const AttributeList &AttrList) const {
+ if (auto *CB = dyn_cast<CallBase>(&getAnchorValue()))
+ return CB->setAttributes(AttrList);
+ return getAssociatedFunction()->setAttributes(AttrList);
+ }
+
+ /// Return the number of arguments associated with this function or call site
+ /// scope.
+ unsigned getNumArgs() const {
+ assert((getPositionKind() == IRP_CALL_SITE ||
+ getPositionKind() == IRP_FUNCTION) &&
+ "Only valid for function/call site positions!");
+ if (auto *CB = dyn_cast<CallBase>(&getAnchorValue()))
+ return CB->arg_size();
+ return getAssociatedFunction()->arg_size();
+ }
+
+ /// Return theargument \p ArgNo associated with this function or call site
+ /// scope.
+ Value *getArg(unsigned ArgNo) const {
+ assert((getPositionKind() == IRP_CALL_SITE ||
+ getPositionKind() == IRP_FUNCTION) &&
+ "Only valid for function/call site positions!");
+ if (auto *CB = dyn_cast<CallBase>(&getAnchorValue()))
+ return CB->getArgOperand(ArgNo);
+ return getAssociatedFunction()->getArg(ArgNo);
+ }
+
/// Return the associated position kind.
Kind getPositionKind() const {
char EncodingBits = getEncodingBits();
@@ -815,53 +888,6 @@ struct IRPosition {
return IRP_FLOAT;
}
- /// TODO: Figure out if the attribute related helper functions should live
- /// here or somewhere else.
-
- /// Return true if any kind in \p AKs existing in the IR at a position that
- /// will affect this one. See also getAttrs(...).
- /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
- /// e.g., the function position if this is an
- /// argument position, should be ignored.
- bool hasAttr(ArrayRef<Attribute::AttrKind> AKs,
- bool IgnoreSubsumingPositions = false,
- Attributor *A = nullptr) const;
-
- /// Return the attributes of any kind in \p AKs existing in the IR at a
- /// position that will affect this one. While each position can only have a
- /// single attribute of any kind in \p AKs, there are "subsuming" positions
- /// that could have an attribute as well. This method returns all attributes
- /// found in \p Attrs.
- /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
- /// e.g., the function position if this is an
- /// argument position, should be ignored.
- void getAttrs(ArrayRef<Attribute::AttrKind> AKs,
- SmallVectorImpl<Attribute> &Attrs,
- bool IgnoreSubsumingPositions = false,
- Attributor *A = nullptr) const;
-
- /// Remove the attribute of kind \p AKs existing in the IR at this position.
- void removeAttrs(ArrayRef<Attribute::AttrKind> AKs) const {
- if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT)
- return;
-
- AttributeList AttrList;
- auto *CB = dyn_cast<CallBase>(&getAnchorValue());
- if (CB)
- AttrList = CB->getAttributes();
- else
- AttrList = getAssociatedFunction()->getAttributes();
-
- LLVMContext &Ctx = getAnchorValue().getContext();
- for (Attribute::AttrKind AK : AKs)
- AttrList = AttrList.removeAttributeAtIndex(Ctx, getAttrIdx(), AK);
-
- if (CB)
- CB->setAttributes(AttrList);
- else
- getAssociatedFunction()->setAttributes(AttrList);
- }
-
bool isAnyCallSitePosition() const {
switch (getPositionKind()) {
case IRPosition::IRP_CALL_SITE:
@@ -980,16 +1006,6 @@ private:
/// Verify internal invariants.
void verify();
- /// Return the attributes of kind \p AK existing in the IR as attribute.
- bool getAttrsFromIRAttr(Attribute::AttrKind AK,
- SmallVectorImpl<Attribute> &Attrs) const;
-
- /// Return the attributes of kind \p AK existing in the IR as operand bundles
- /// of an llvm.assume.
- bool getAttrsFromAssumes(Attribute::AttrKind AK,
- SmallVectorImpl<Attribute> &Attrs,
- Attributor &A) const;
-
/// Return the underlying pointer as Value *, valid for all positions but
/// IRP_CALL_SITE_ARGUMENT.
Value *getAsValuePtr() const {
@@ -1109,33 +1125,51 @@ struct AnalysisGetter {
// allow partial specialization, which is needed in this case. So instead, we
// use a constexpr bool to perform the SFINAE, and then use this information
// inside the function template.
- template <typename, typename = void> static constexpr bool HasLegacyWrapper = false;
+ template <typename, typename = void>
+ static constexpr bool HasLegacyWrapper = false;
template <typename Analysis>
- typename Analysis::Result *getAnalysis(const Function &F) {
- if (FAM)
+ typename Analysis::Result *getAnalysis(const Function &F,
+ bool RequestCachedOnly = false) {
+ if (!LegacyPass && !FAM)
+ return nullptr;
+ if (FAM) {
+ if (CachedOnly || RequestCachedOnly)
+ return FAM->getCachedResult<Analysis>(const_cast<Function &>(F));
return &FAM->getResult<Analysis>(const_cast<Function &>(F));
- if constexpr (HasLegacyWrapper<Analysis>)
- if (LegacyPass)
+ }
+ if constexpr (HasLegacyWrapper<Analysis>) {
+ if (!CachedOnly && !RequestCachedOnly)
return &LegacyPass
->getAnalysis<typename Analysis::LegacyWrapper>(
const_cast<Function &>(F))
.getResult();
+ if (auto *P =
+ LegacyPass
+ ->getAnalysisIfAvailable<typename Analysis::LegacyWrapper>())
+ return &P->getResult();
+ }
return nullptr;
}
- AnalysisGetter(FunctionAnalysisManager &FAM) : FAM(&FAM) {}
- AnalysisGetter(Pass *P) : LegacyPass(P) {}
+ AnalysisGetter(FunctionAnalysisManager &FAM, bool CachedOnly = false)
+ : FAM(&FAM), CachedOnly(CachedOnly) {}
+ AnalysisGetter(Pass *P, bool CachedOnly = false)
+ : LegacyPass(P), CachedOnly(CachedOnly) {}
AnalysisGetter() = default;
private:
FunctionAnalysisManager *FAM = nullptr;
Pass *LegacyPass = nullptr;
+
+ /// If \p CachedOnly is true, no pass is created, just existing results are
+ /// used. Also available per request.
+ bool CachedOnly = false;
};
template <typename Analysis>
constexpr bool AnalysisGetter::HasLegacyWrapper<
- Analysis, std::void_t<typename Analysis::LegacyWrapper>> = true;
+ Analysis, std::void_t<typename Analysis::LegacyWrapper>> = true;
/// Data structure to hold cached (LLVM-IR) information.
///
@@ -1151,24 +1185,24 @@ constexpr bool AnalysisGetter::HasLegacyWrapper<
/// instance down in the abstract attributes.
struct InformationCache {
InformationCache(const Module &M, AnalysisGetter &AG,
- BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC)
- : DL(M.getDataLayout()), Allocator(Allocator),
- Explorer(
- /* ExploreInterBlock */ true, /* ExploreCFGForward */ true,
- /* ExploreCFGBackward */ true,
- /* LIGetter */
- [&](const Function &F) { return AG.getAnalysis<LoopAnalysis>(F); },
- /* DTGetter */
- [&](const Function &F) {
- return AG.getAnalysis<DominatorTreeAnalysis>(F);
- },
- /* PDTGetter */
- [&](const Function &F) {
- return AG.getAnalysis<PostDominatorTreeAnalysis>(F);
- }),
- AG(AG), TargetTriple(M.getTargetTriple()) {
- if (CGSCC)
- initializeModuleSlice(*CGSCC);
+ BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
+ bool UseExplorer = true)
+ : CGSCC(CGSCC), DL(M.getDataLayout()), Allocator(Allocator), AG(AG),
+ TargetTriple(M.getTargetTriple()) {
+ if (UseExplorer)
+ Explorer = new (Allocator) MustBeExecutedContextExplorer(
+ /* ExploreInterBlock */ true, /* ExploreCFGForward */ true,
+ /* ExploreCFGBackward */ true,
+ /* LIGetter */
+ [&](const Function &F) { return AG.getAnalysis<LoopAnalysis>(F); },
+ /* DTGetter */
+ [&](const Function &F) {
+ return AG.getAnalysis<DominatorTreeAnalysis>(F);
+ },
+ /* PDTGetter */
+ [&](const Function &F) {
+ return AG.getAnalysis<PostDominatorTreeAnalysis>(F);
+ });
}
~InformationCache() {
@@ -1180,6 +1214,8 @@ struct InformationCache {
using AA::InstExclusionSetTy;
for (auto *BES : BESets)
BES->~InstExclusionSetTy();
+ if (Explorer)
+ Explorer->~MustBeExecutedContextExplorer();
}
/// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is
@@ -1204,45 +1240,8 @@ struct InformationCache {
}
}
- /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains
- /// (a subset of) all functions that we can look at during this SCC traversal.
- /// This includes functions (transitively) called from the SCC and the
- /// (transitive) callers of SCC functions. We also can look at a function if
- /// there is a "reference edge", i.a., if the function somehow uses (!=calls)
- /// a function in the SCC or a caller of a function in the SCC.
- void initializeModuleSlice(SetVector<Function *> &SCC) {
- ModuleSlice.insert(SCC.begin(), SCC.end());
-
- SmallPtrSet<Function *, 16> Seen;
- SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end());
- while (!Worklist.empty()) {
- Function *F = Worklist.pop_back_val();
- ModuleSlice.insert(F);
-
- for (Instruction &I : instructions(*F))
- if (auto *CB = dyn_cast<CallBase>(&I))
- if (Function *Callee = CB->getCalledFunction())
- if (Seen.insert(Callee).second)
- Worklist.push_back(Callee);
- }
-
- Seen.clear();
- Worklist.append(SCC.begin(), SCC.end());
- while (!Worklist.empty()) {
- Function *F = Worklist.pop_back_val();
- ModuleSlice.insert(F);
-
- // Traverse all transitive uses.
- foreachUse(*F, [&](Use &U) {
- if (auto *UsrI = dyn_cast<Instruction>(U.getUser()))
- if (Seen.insert(UsrI->getFunction()).second)
- Worklist.push_back(UsrI->getFunction());
- });
- }
- }
-
- /// The slice of the module we are allowed to look at.
- SmallPtrSet<Function *, 8> ModuleSlice;
+ /// The CG-SCC the pass is run on, or nullptr if it is a module pass.
+ const SetVector<Function *> *const CGSCC = nullptr;
/// A vector type to hold instructions.
using InstructionVectorTy = SmallVector<Instruction *, 8>;
@@ -1262,7 +1261,7 @@ struct InformationCache {
}
/// Return MustBeExecutedContextExplorer
- MustBeExecutedContextExplorer &getMustBeExecutedContextExplorer() {
+ MustBeExecutedContextExplorer *getMustBeExecutedContextExplorer() {
return Explorer;
}
@@ -1271,9 +1270,6 @@ struct InformationCache {
return AG.getAnalysis<TargetLibraryAnalysis>(F);
}
- /// Return AliasAnalysis Result for function \p F.
- AAResults *getAAResultsForFunction(const Function &F);
-
/// Return true if \p Arg is involved in a must-tail call, thus the argument
/// of the caller or callee.
bool isInvolvedInMustTailCall(const Argument &Arg) {
@@ -1287,8 +1283,9 @@ struct InformationCache {
/// Return the analysis result from a pass \p AP for function \p F.
template <typename AP>
- typename AP::Result *getAnalysisResultForFunction(const Function &F) {
- return AG.getAnalysis<AP>(F);
+ typename AP::Result *getAnalysisResultForFunction(const Function &F,
+ bool CachedOnly = false) {
+ return AG.getAnalysis<AP>(F, CachedOnly);
}
/// Return datalayout used in the module.
@@ -1297,23 +1294,19 @@ struct InformationCache {
/// Return the map conaining all the knowledge we have from `llvm.assume`s.
const RetainedKnowledgeMap &getKnowledgeMap() const { return KnowledgeMap; }
- /// Given \p BES, return a uniqued version. \p BES is destroyed in the
- /// process.
+ /// Given \p BES, return a uniqued version.
const AA::InstExclusionSetTy *
getOrCreateUniqueBlockExecutionSet(const AA::InstExclusionSetTy *BES) {
auto It = BESets.find(BES);
if (It != BESets.end())
return *It;
auto *UniqueBES = new (Allocator) AA::InstExclusionSetTy(*BES);
- BESets.insert(UniqueBES);
+ bool Success = BESets.insert(UniqueBES).second;
+ (void)Success;
+ assert(Success && "Expected only new entries to be added");
return UniqueBES;
}
- /// Check whether \p F is part of module slice.
- bool isInModuleSlice(const Function &F) {
- return ModuleSlice.empty() || ModuleSlice.count(const_cast<Function *>(&F));
- }
-
/// Return true if the stack (llvm::Alloca) can be accessed by other threads.
bool stackIsAccessibleByOtherThreads() { return !targetIsGPU(); }
@@ -1367,7 +1360,7 @@ private:
BumpPtrAllocator &Allocator;
/// MustBeExecutedContextExplorer
- MustBeExecutedContextExplorer Explorer;
+ MustBeExecutedContextExplorer *Explorer = nullptr;
/// A map with knowledge retained in `llvm.assume` instructions.
RetainedKnowledgeMap KnowledgeMap;
@@ -1376,7 +1369,7 @@ private:
SetVector<const Instruction *> AssumeOnlyValues;
/// Cache for block sets to allow reuse.
- DenseSet<AA::InstExclusionSetTy *> BESets;
+ DenseSet<const AA::InstExclusionSetTy *> BESets;
/// Getters for analysis.
AnalysisGetter &AG;
@@ -1416,6 +1409,9 @@ struct AttributorConfig {
/// function marked live. See also: InitializationCallback>
bool DefaultInitializeLiveInternals = true;
+ /// Flag to determine if we should skip all liveness checks early on.
+ bool UseLiveness = true;
+
/// Callback function to be invoked on internal functions marked live.
std::function<void(Attributor &A, const Function &F)> InitializationCallback =
nullptr;
@@ -1438,6 +1434,9 @@ struct AttributorConfig {
/// The name of the pass running the attributor, used to emit remarks.
const char *PassName = nullptr;
+
+ using IPOAmendableCBTy = function_ref<bool(const Function &F)>;
+ IPOAmendableCBTy IPOAmendableCB;
};
/// The fixpoint analysis framework that orchestrates the attribute deduction.
@@ -1512,7 +1511,7 @@ struct Attributor {
/// attribute is used for reasoning. To record the dependences explicitly use
/// the `Attributor::recordDependence` method.
template <typename AAType>
- const AAType &getAAFor(const AbstractAttribute &QueryingAA,
+ const AAType *getAAFor(const AbstractAttribute &QueryingAA,
const IRPosition &IRP, DepClassTy DepClass) {
return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass,
/* ForceUpdate */ false);
@@ -1524,7 +1523,7 @@ struct Attributor {
/// possible/useful that were not happening before as the abstract attribute
/// was assumed dead.
template <typename AAType>
- const AAType &getAndUpdateAAFor(const AbstractAttribute &QueryingAA,
+ const AAType *getAndUpdateAAFor(const AbstractAttribute &QueryingAA,
const IRPosition &IRP, DepClassTy DepClass) {
return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass,
/* ForceUpdate */ true);
@@ -1536,7 +1535,7 @@ struct Attributor {
/// function.
/// NOTE: ForceUpdate is ignored in any stage other than the update stage.
template <typename AAType>
- const AAType &getOrCreateAAFor(IRPosition IRP,
+ const AAType *getOrCreateAAFor(IRPosition IRP,
const AbstractAttribute *QueryingAA,
DepClassTy DepClass, bool ForceUpdate = false,
bool UpdateAfterInit = true) {
@@ -1547,9 +1546,13 @@ struct Attributor {
/* AllowInvalidState */ true)) {
if (ForceUpdate && Phase == AttributorPhase::UPDATE)
updateAA(*AAPtr);
- return *AAPtr;
+ return AAPtr;
}
+ bool ShouldUpdateAA;
+ if (!shouldInitialize<AAType>(IRP, ShouldUpdateAA))
+ return nullptr;
+
// No matching attribute found, create one.
// Use the static create method.
auto &AA = AAType::createForPosition(IRP, *this);
@@ -1561,52 +1564,24 @@ struct Attributor {
// If we are currenty seeding attributes, enforce seeding rules.
if (Phase == AttributorPhase::SEEDING && !shouldSeedAttribute(AA)) {
AA.getState().indicatePessimisticFixpoint();
- return AA;
+ return &AA;
}
- // For now we ignore naked and optnone functions.
- bool Invalidate =
- Configuration.Allowed && !Configuration.Allowed->count(&AAType::ID);
- const Function *AnchorFn = IRP.getAnchorScope();
- if (AnchorFn) {
- Invalidate |=
- AnchorFn->hasFnAttribute(Attribute::Naked) ||
- AnchorFn->hasFnAttribute(Attribute::OptimizeNone) ||
- (!isModulePass() && !getInfoCache().isInModuleSlice(*AnchorFn));
- }
-
- // Avoid too many nested initializations to prevent a stack overflow.
- Invalidate |= InitializationChainLength > MaxInitializationChainLength;
-
// Bootstrap the new attribute with an initial update to propagate
- // information, e.g., function -> call site. If it is not on a given
- // Allowed we will not perform updates at all.
- if (Invalidate) {
- AA.getState().indicatePessimisticFixpoint();
- return AA;
- }
-
+ // information, e.g., function -> call site.
{
- TimeTraceScope TimeScope(AA.getName() + "::initialize");
+ TimeTraceScope TimeScope("initialize", [&]() {
+ return AA.getName() +
+ std::to_string(AA.getIRPosition().getPositionKind());
+ });
++InitializationChainLength;
AA.initialize(*this);
--InitializationChainLength;
}
- // We update only AAs associated with functions in the Functions set or
- // call sites of them.
- if ((AnchorFn && !isRunOn(const_cast<Function *>(AnchorFn))) &&
- !isRunOn(IRP.getAssociatedFunction())) {
+ if (!ShouldUpdateAA) {
AA.getState().indicatePessimisticFixpoint();
- return AA;
- }
-
- // If this is queried in the manifest stage, we force the AA to indicate
- // pessimistic fixpoint immediately.
- if (Phase == AttributorPhase::MANIFEST ||
- Phase == AttributorPhase::CLEANUP) {
- AA.getState().indicatePessimisticFixpoint();
- return AA;
+ return &AA;
}
// Allow seeded attributes to declare dependencies.
@@ -1623,10 +1598,11 @@ struct Attributor {
if (QueryingAA && AA.getState().isValidState())
recordDependence(AA, const_cast<AbstractAttribute &>(*QueryingAA),
DepClass);
- return AA;
+ return &AA;
}
+
template <typename AAType>
- const AAType &getOrCreateAAFor(const IRPosition &IRP) {
+ const AAType *getOrCreateAAFor(const IRPosition &IRP) {
return getOrCreateAAFor<AAType>(IRP, /* QueryingAA */ nullptr,
DepClassTy::NONE);
}
@@ -1699,7 +1675,7 @@ struct Attributor {
// Register AA with the synthetic root only before the manifest stage.
if (Phase == AttributorPhase::SEEDING || Phase == AttributorPhase::UPDATE)
- DG.SyntheticRoot.Deps.push_back(
+ DG.SyntheticRoot.Deps.insert(
AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED)));
return AA;
@@ -1717,6 +1693,58 @@ struct Attributor {
return Functions.empty() || Functions.count(Fn);
}
+ template <typename AAType> bool shouldUpdateAA(const IRPosition &IRP) {
+ // If this is queried in the manifest stage, we force the AA to indicate
+ // pessimistic fixpoint immediately.
+ if (Phase == AttributorPhase::MANIFEST || Phase == AttributorPhase::CLEANUP)
+ return false;
+
+ Function *AssociatedFn = IRP.getAssociatedFunction();
+
+ // Check if we require a callee but there is none.
+ if (!AssociatedFn && AAType::requiresCalleeForCallBase() &&
+ IRP.isAnyCallSitePosition())
+ return false;
+
+ // Check if we require a calles but we can't see all.
+ if (AAType::requiresCallersForArgOrFunction())
+ if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION ||
+ IRP.getPositionKind() == IRPosition::IRP_ARGUMENT)
+ if (!AssociatedFn->hasLocalLinkage())
+ return false;
+
+ if (!AAType::isValidIRPositionForUpdate(*this, IRP))
+ return false;
+
+ // We update only AAs associated with functions in the Functions set or
+ // call sites of them.
+ return (!AssociatedFn || isModulePass() || isRunOn(AssociatedFn) ||
+ isRunOn(IRP.getAnchorScope()));
+ }
+
+ template <typename AAType>
+ bool shouldInitialize(const IRPosition &IRP, bool &ShouldUpdateAA) {
+ if (!AAType::isValidIRPositionForInit(*this, IRP))
+ return false;
+
+ if (Configuration.Allowed && !Configuration.Allowed->count(&AAType::ID))
+ return false;
+
+ // For now we skip anything in naked and optnone functions.
+ const Function *AnchorFn = IRP.getAnchorScope();
+ if (AnchorFn && (AnchorFn->hasFnAttribute(Attribute::Naked) ||
+ AnchorFn->hasFnAttribute(Attribute::OptimizeNone)))
+ return false;
+
+ // Avoid too many nested initializations to prevent a stack overflow.
+ if (InitializationChainLength > MaxInitializationChainLength)
+ return false;
+
+ ShouldUpdateAA = shouldUpdateAA<AAType>(IRP);
+
+ return !AAType::hasTrivialInitializer() || ShouldUpdateAA;
+ }
+
/// Determine opportunities to derive 'default' attributes in \p F and create
/// abstract attribute objects for them.
///
@@ -1734,7 +1762,8 @@ struct Attributor {
/// If a function is exactly defined or it has alwaysinline attribute
/// and is viable to be inlined, we say it is IPO amendable
bool isFunctionIPOAmendable(const Function &F) {
- return F.hasExactDefinition() || InfoCache.InlineableFunctions.count(&F);
+ return F.hasExactDefinition() || InfoCache.InlineableFunctions.count(&F) ||
+ (Configuration.IPOAmendableCB && Configuration.IPOAmendableCB(F));
}
/// Mark the internal function \p F as live.
@@ -1827,6 +1856,60 @@ struct Attributor {
ToBeDeletedFunctions.insert(&F);
}
+ /// Return the attributes of kind \p AK existing in the IR as operand bundles
+ /// of an llvm.assume.
+ bool getAttrsFromAssumes(const IRPosition &IRP, Attribute::AttrKind AK,
+ SmallVectorImpl<Attribute> &Attrs);
+
+ /// Return true if any kind in \p AKs existing in the IR at a position that
+ /// will affect this one. See also getAttrs(...).
+ /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
+ /// e.g., the function position if this is an
+ /// argument position, should be ignored.
+ bool hasAttr(const IRPosition &IRP, ArrayRef<Attribute::AttrKind> AKs,
+ bool IgnoreSubsumingPositions = false,
+ Attribute::AttrKind ImpliedAttributeKind = Attribute::None);
+
+ /// Return the attributes of any kind in \p AKs existing in the IR at a
+ /// position that will affect this one. While each position can only have a
+ /// single attribute of any kind in \p AKs, there are "subsuming" positions
+ /// that could have an attribute as well. This method returns all attributes
+ /// found in \p Attrs.
+ /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
+ /// e.g., the function position if this is an
+ /// argument position, should be ignored.
+ void getAttrs(const IRPosition &IRP, ArrayRef<Attribute::AttrKind> AKs,
+ SmallVectorImpl<Attribute> &Attrs,
+ bool IgnoreSubsumingPositions = false);
+
+ /// Remove all \p AttrKinds attached to \p IRP.
+ ChangeStatus removeAttrs(const IRPosition &IRP,
+ const ArrayRef<Attribute::AttrKind> &AttrKinds);
+
+ /// Attach \p DeducedAttrs to \p IRP, if \p ForceReplace is set we do this
+ /// even if the same attribute kind was already present.
+ ChangeStatus manifestAttrs(const IRPosition &IRP,
+ const ArrayRef<Attribute> &DeducedAttrs,
+ bool ForceReplace = false);
+
+private:
+ /// Helper to check \p Attrs for \p AK, if not found, check if \p
+ /// AAType::isImpliedByIR is true, and if not, create AAType for \p IRP.
+ template <Attribute::AttrKind AK, typename AAType>
+ void checkAndQueryIRAttr(const IRPosition &IRP, AttributeSet Attrs);
+
+ /// Helper to apply \p CB on all attributes of type \p AttrDescs of \p IRP.
+ template <typename DescTy>
+ ChangeStatus updateAttrMap(const IRPosition &IRP,
+ const ArrayRef<DescTy> &AttrDescs,
+ function_ref<bool(const DescTy &, AttributeSet,
+ AttributeMask &, AttrBuilder &)>
+ CB);
+
+ /// Mapping from functions/call sites to their attributes.
+ DenseMap<Value *, AttributeList> AttrsMap;
+
+public:
/// If \p IRP is assumed to be a constant, return it, if it is unclear yet,
/// return std::nullopt, otherwise return `nullptr`.
std::optional<Constant *> getAssumedConstant(const IRPosition &IRP,
@@ -1873,7 +1956,8 @@ struct Attributor {
const AbstractAttribute *AA,
SmallVectorImpl<AA::ValueAndContext> &Values,
AA::ValueScope S,
- bool &UsedAssumedInformation);
+ bool &UsedAssumedInformation,
+ bool RecurseForSelectAndPHI = true);
/// Register \p CB as a simplification callback.
/// `Attributor::getAssumedSimplified` will use these callbacks before
@@ -1892,6 +1976,40 @@ struct Attributor {
return SimplificationCallbacks.count(IRP);
}
+ /// Register \p CB as a simplification callback.
+ /// Similar to \p registerSimplificationCallback, the call back will be called
+ /// first when we simplify a global variable \p GV.
+ using GlobalVariableSimplifictionCallbackTy =
+ std::function<std::optional<Constant *>(
+ const GlobalVariable &, const AbstractAttribute *, bool &)>;
+ void registerGlobalVariableSimplificationCallback(
+ const GlobalVariable &GV,
+ const GlobalVariableSimplifictionCallbackTy &CB) {
+ GlobalVariableSimplificationCallbacks[&GV].emplace_back(CB);
+ }
+
+ /// Return true if there is a simplification callback for \p GV.
+ bool hasGlobalVariableSimplificationCallback(const GlobalVariable &GV) {
+ return GlobalVariableSimplificationCallbacks.count(&GV);
+ }
+
+ /// Return \p std::nullopt if there is no call back registered for \p GV or
+ /// the call back is still not sure if \p GV can be simplified. Return \p
+ /// nullptr if \p GV can't be simplified.
+ std::optional<Constant *>
+ getAssumedInitializerFromCallBack(const GlobalVariable &GV,
+ const AbstractAttribute *AA,
+ bool &UsedAssumedInformation) {
+ assert(GlobalVariableSimplificationCallbacks.contains(&GV));
+ for (auto &CB : GlobalVariableSimplificationCallbacks.lookup(&GV)) {
+ auto SimplifiedGV = CB(GV, AA, UsedAssumedInformation);
+ // For now we assume the call back will not return a std::nullopt.
+ assert(SimplifiedGV.has_value() && "SimplifiedGV has not value");
+ return *SimplifiedGV;
+ }
+ llvm_unreachable("there must be a callback registered");
+ }
+
using VirtualUseCallbackTy =
std::function<bool(Attributor &, const AbstractAttribute *)>;
void registerVirtualUseCallback(const Value &V,
@@ -1904,6 +2022,12 @@ private:
DenseMap<IRPosition, SmallVector<SimplifictionCallbackTy, 1>>
SimplificationCallbacks;
+ /// The vector with all simplification callbacks for global variables
+ /// registered by outside AAs.
+ DenseMap<const GlobalVariable *,
+ SmallVector<GlobalVariableSimplifictionCallbackTy, 1>>
+ GlobalVariableSimplificationCallbacks;
+
DenseMap<const Value *, SmallVector<VirtualUseCallbackTy, 1>>
VirtualUseCallbacks;
@@ -2148,22 +2272,15 @@ public:
bool &UsedAssumedInformation,
bool CheckPotentiallyDead = false);
- /// Check \p Pred on all values potentially returned by \p F.
- ///
- /// This method will evaluate \p Pred on all values potentially returned by
- /// the function associated with \p QueryingAA. The returned values are
- /// matched with their respective return instructions. Returns true if \p Pred
- /// holds on all of them.
- bool checkForAllReturnedValuesAndReturnInsts(
- function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred,
- const AbstractAttribute &QueryingAA);
-
/// Check \p Pred on all values potentially returned by the function
/// associated with \p QueryingAA.
///
/// This is the context insensitive version of the method above.
- bool checkForAllReturnedValues(function_ref<bool(Value &)> Pred,
- const AbstractAttribute &QueryingAA);
+ bool
+ checkForAllReturnedValues(function_ref<bool(Value &)> Pred,
+ const AbstractAttribute &QueryingAA,
+ AA::ValueScope S = AA::ValueScope::Intraprocedural,
+ bool RecurseForSelectAndPHI = true);
/// Check \p Pred on all instructions in \p Fn with an opcode present in
/// \p Opcodes.
@@ -2454,8 +2571,8 @@ struct AbstractState {
///
/// The interface ensures that the assumed bits are always a subset of the known
/// bits. Users can only add known bits and, except through adding known bits,
-/// they can only remove assumed bits. This should guarantee monotoniticy and
-/// thereby the existence of a fixpoint (if used corretly). The fixpoint is
+/// they can only remove assumed bits. This should guarantee monotonicity and
+/// thereby the existence of a fixpoint (if used correctly). The fixpoint is
/// reached when the assumed and known state/bits are equal. Users can
/// force/inidicate a fixpoint. If an optimistic one is indicated, the known
/// state will catch up with the assumed one, for a pessimistic fixpoint it is
@@ -2564,15 +2681,18 @@ template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0),
base_ty WorstState = 0>
struct BitIntegerState
: public IntegerStateBase<base_ty, BestState, WorstState> {
+ using super = IntegerStateBase<base_ty, BestState, WorstState>;
using base_t = base_ty;
+ BitIntegerState() = default;
+ BitIntegerState(base_t Assumed) : super(Assumed) {}
/// Return true if the bits set in \p BitsEncoding are "known bits".
- bool isKnown(base_t BitsEncoding) const {
+ bool isKnown(base_t BitsEncoding = BestState) const {
return (this->Known & BitsEncoding) == BitsEncoding;
}
/// Return true if the bits set in \p BitsEncoding are "assumed bits".
- bool isAssumed(base_t BitsEncoding) const {
+ bool isAssumed(base_t BitsEncoding = BestState) const {
return (this->Assumed & BitsEncoding) == BitsEncoding;
}
@@ -2597,7 +2717,7 @@ struct BitIntegerState
/// Keep only "assumed bits" also set in \p BitsEncoding but all known ones.
BitIntegerState &intersectAssumedBits(base_t BitsEncoding) {
- // Make sure we never loose any "known bits".
+ // Make sure we never lose any "known bits".
this->Assumed = (this->Assumed & BitsEncoding) | this->Known;
return *this;
}
@@ -2638,14 +2758,14 @@ struct IncIntegerState
/// Take minimum of assumed and \p Value.
IncIntegerState &takeAssumedMinimum(base_t Value) {
- // Make sure we never loose "known value".
+ // Make sure we never lose "known value".
this->Assumed = std::max(std::min(this->Assumed, Value), this->Known);
return *this;
}
/// Take maximum of known and \p Value.
IncIntegerState &takeKnownMaximum(base_t Value) {
- // Make sure we never loose "known value".
+ // Make sure we never lose "known value".
this->Assumed = std::max(Value, this->Assumed);
this->Known = std::max(Value, this->Known);
return *this;
@@ -2674,14 +2794,14 @@ struct DecIntegerState : public IntegerStateBase<base_ty, 0, ~base_ty(0)> {
/// Take maximum of assumed and \p Value.
DecIntegerState &takeAssumedMaximum(base_t Value) {
- // Make sure we never loose "known value".
+ // Make sure we never lose "known value".
this->Assumed = std::min(std::max(this->Assumed, Value), this->Known);
return *this;
}
/// Take minimum of known and \p Value.
DecIntegerState &takeKnownMinimum(base_t Value) {
- // Make sure we never loose "known value".
+ // Make sure we never lose "known value".
this->Assumed = std::min(Value, this->Assumed);
this->Known = std::min(Value, this->Known);
return *this;
@@ -2808,7 +2928,7 @@ struct IntegerRangeState : public AbstractState {
/// Unite assumed range with the passed state.
void unionAssumed(const ConstantRange &R) {
- // Don't loose a known range.
+ // Don't lose a known range.
Assumed = Assumed.unionWith(R).intersectWith(Known);
}
@@ -2960,6 +3080,7 @@ template <typename BaseTy> struct SetState : public AbstractState {
/// Performs the set intersection between this set and \p RHS. Returns true if
/// changes were made.
bool getIntersection(const SetContents &RHS) {
+ bool IsUniversal = Assumed.isUniversal();
unsigned SizeBefore = Assumed.getSet().size();
// Get intersection and make sure that the known set is still a proper
@@ -2967,7 +3088,8 @@ template <typename BaseTy> struct SetState : public AbstractState {
Assumed.getIntersection(RHS);
Assumed.getUnion(Known);
- return SizeBefore != Assumed.getSet().size();
+ return SizeBefore != Assumed.getSet().size() ||
+ IsUniversal != Assumed.isUniversal();
}
/// Performs the set union between this set and \p RHS. Returns true if
@@ -2984,14 +3106,6 @@ private:
bool IsAtFixedpoint;
};
-/// Helper struct necessary as the modular build fails if the virtual method
-/// IRAttribute::manifest is defined in the Attributor.cpp.
-struct IRAttributeManifest {
- static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP,
- const ArrayRef<Attribute> &DeducedAttrs,
- bool ForceReplace = false);
-};
-
/// Helper to tie a abstract state implementation to an abstract attribute.
template <typename StateTy, typename BaseType, class... Ts>
struct StateWrapper : public BaseType, public StateTy {
@@ -3009,31 +3123,36 @@ struct StateWrapper : public BaseType, public StateTy {
};
/// Helper class that provides common functionality to manifest IR attributes.
-template <Attribute::AttrKind AK, typename BaseType>
+template <Attribute::AttrKind AK, typename BaseType, typename AAType>
struct IRAttribute : public BaseType {
IRAttribute(const IRPosition &IRP) : BaseType(IRP) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- const IRPosition &IRP = this->getIRPosition();
- if (isa<UndefValue>(IRP.getAssociatedValue()) ||
- this->hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ false,
- &A)) {
- this->getState().indicateOptimisticFixpoint();
- return;
- }
+ /// Most boolean IRAttribute AAs don't do anything non-trivial
+ /// in their initializers while non-boolean ones often do. Subclasses can
+ /// change this.
+ static bool hasTrivialInitializer() { return Attribute::isEnumAttrKind(AK); }
- bool IsFnInterface = IRP.isFnInterfaceKind();
- const Function *FnScope = IRP.getAnchorScope();
- // TODO: Not all attributes require an exact definition. Find a way to
- // enable deduction for some but not all attributes in case the
- // definition might be changed at runtime, see also
- // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html.
- // TODO: We could always determine abstract attributes and if sufficient
- // information was found we could duplicate the functions that do not
- // have an exact definition.
- if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope)))
- this->getState().indicatePessimisticFixpoint();
+ /// Compile time access to the IR attribute kind.
+ static constexpr Attribute::AttrKind IRAttributeKind = AK;
+
+ /// Return true if the IR attribute(s) associated with this AA are implied for
+ /// an undef value.
+ static bool isImpliedByUndef() { return true; }
+
+ /// Return true if the IR attribute(s) associated with this AA are implied for
+ /// an poison value.
+ static bool isImpliedByPoison() { return true; }
+
+ static bool isImpliedByIR(Attributor &A, const IRPosition &IRP,
+ Attribute::AttrKind ImpliedAttributeKind = AK,
+ bool IgnoreSubsumingPositions = false) {
+ if (AAType::isImpliedByUndef() && isa<UndefValue>(IRP.getAssociatedValue()))
+ return true;
+ if (AAType::isImpliedByPoison() &&
+ isa<PoisonValue>(IRP.getAssociatedValue()))
+ return true;
+ return A.hasAttr(IRP, {ImpliedAttributeKind}, IgnoreSubsumingPositions,
+ ImpliedAttributeKind);
}
/// See AbstractAttribute::manifest(...).
@@ -3041,16 +3160,17 @@ struct IRAttribute : public BaseType {
if (isa<UndefValue>(this->getIRPosition().getAssociatedValue()))
return ChangeStatus::UNCHANGED;
SmallVector<Attribute, 4> DeducedAttrs;
- getDeducedAttributes(this->getAnchorValue().getContext(), DeducedAttrs);
- return IRAttributeManifest::manifestAttrs(A, this->getIRPosition(),
- DeducedAttrs);
+ getDeducedAttributes(A, this->getAnchorValue().getContext(), DeducedAttrs);
+ if (DeducedAttrs.empty())
+ return ChangeStatus::UNCHANGED;
+ return A.manifestAttrs(this->getIRPosition(), DeducedAttrs);
}
/// Return the kind that identifies the abstract attribute implementation.
Attribute::AttrKind getAttrKind() const { return AK; }
/// Return the deduced attributes in \p Attrs.
- virtual void getDeducedAttributes(LLVMContext &Ctx,
+ virtual void getDeducedAttributes(Attributor &A, LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const {
Attrs.emplace_back(Attribute::get(Ctx, getAttrKind()));
}
@@ -3115,6 +3235,40 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode {
/// Synthethis Node are of type AbstractAttribute
static bool classof(const AADepGraphNode *DGN) { return true; }
+ /// Return false if this AA does anything non-trivial (hence not done by
+ /// default) in its initializer.
+ static bool hasTrivialInitializer() { return false; }
+
+ /// Return true if this AA requires a "callee" (or an associted function) for
+ /// a call site positon. Default is optimistic to minimize AAs.
+ static bool requiresCalleeForCallBase() { return true; }
+
+ /// Return true if this AA requires all callees for an argument or function
+ /// positon.
+ static bool requiresCallersForArgOrFunction() { return false; }
+
+ /// Return false if an AA should not be created for \p IRP.
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ return true;
+ }
+
+ /// Return false if an AA should not be updated for \p IRP.
+ static bool isValidIRPositionForUpdate(Attributor &A, const IRPosition &IRP) {
+ Function *AssociatedFn = IRP.getAssociatedFunction();
+ bool IsFnInterface = IRP.isFnInterfaceKind();
+ assert((!IsFnInterface || AssociatedFn) &&
+ "Function interface without a function?");
+
+ // TODO: Not all attributes require an exact definition. Find a way to
+ // enable deduction for some but not all attributes in case the
+ // definition might be changed at runtime, see also
+ // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html.
+ // TODO: We could always determine abstract attributes and if sufficient
+ // information was found we could duplicate the functions that do not
+ // have an exact definition.
+ return !IsFnInterface || A.isFunctionIPOAmendable(*AssociatedFn);
+ }
+
/// Initialize the state with the information in the Attributor \p A.
///
/// This function is called by the Attributor once all abstract attributes
@@ -3143,12 +3297,13 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode {
/// Helper functions, for debug purposes only.
///{
- void print(raw_ostream &OS) const override;
+ void print(raw_ostream &OS) const { print(nullptr, OS); }
+ void print(Attributor *, raw_ostream &OS) const override;
virtual void printWithDeps(raw_ostream &OS) const;
- void dump() const { print(dbgs()); }
+ void dump() const { this->print(dbgs()); }
/// This function should return the "summarized" assumed state as string.
- virtual const std::string getAsStr() const = 0;
+ virtual const std::string getAsStr(Attributor *A) const = 0;
/// This function should return the name of the AbstractAttribute
virtual const std::string getName() const = 0;
@@ -3220,9 +3375,6 @@ struct AttributorCGSCCPass : public PassInfoMixin<AttributorCGSCCPass> {
LazyCallGraph &CG, CGSCCUpdateResult &UR);
};
-Pass *createAttributorLegacyPass();
-Pass *createAttributorCGSCCLegacyPass();
-
/// Helper function to clamp a state \p S of type \p StateType with the
/// information in \p R and indicate/return if \p S did change (as-in update is
/// required to be run again).
@@ -3238,55 +3390,10 @@ ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) {
/// Abstract Attribute Classes
/// ----------------------------------------------------------------------------
-/// An abstract attribute for the returned values of a function.
-struct AAReturnedValues
- : public IRAttribute<Attribute::Returned, AbstractAttribute> {
- AAReturnedValues(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
-
- /// Check \p Pred on all returned values.
- ///
- /// This method will evaluate \p Pred on returned values and return
- /// true if (1) all returned values are known, and (2) \p Pred returned true
- /// for all returned values.
- ///
- /// Note: Unlike the Attributor::checkForAllReturnedValuesAndReturnInsts
- /// method, this one will not filter dead return instructions.
- virtual bool checkForAllReturnedValuesAndReturnInsts(
- function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred)
- const = 0;
-
- using iterator =
- MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::iterator;
- using const_iterator =
- MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::const_iterator;
- virtual llvm::iterator_range<iterator> returned_values() = 0;
- virtual llvm::iterator_range<const_iterator> returned_values() const = 0;
-
- virtual size_t getNumReturnValues() const = 0;
-
- /// Create an abstract attribute view for the position \p IRP.
- static AAReturnedValues &createForPosition(const IRPosition &IRP,
- Attributor &A);
-
- /// See AbstractAttribute::getName()
- const std::string getName() const override { return "AAReturnedValues"; }
-
- /// See AbstractAttribute::getIdAddr()
- const char *getIdAddr() const override { return &ID; }
-
- /// This function should return true if the type of the \p AA is
- /// AAReturnedValues
- static bool classof(const AbstractAttribute *AA) {
- return (AA->getIdAddr() == &ID);
- }
-
- /// Unique ID (due to the unique address)
- static const char ID;
-};
-
struct AANoUnwind
: public IRAttribute<Attribute::NoUnwind,
- StateWrapper<BooleanState, AbstractAttribute>> {
+ StateWrapper<BooleanState, AbstractAttribute>,
+ AANoUnwind> {
AANoUnwind(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Returns true if nounwind is assumed.
@@ -3315,9 +3422,18 @@ struct AANoUnwind
struct AANoSync
: public IRAttribute<Attribute::NoSync,
- StateWrapper<BooleanState, AbstractAttribute>> {
+ StateWrapper<BooleanState, AbstractAttribute>,
+ AANoSync> {
AANoSync(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.isFunctionScope() &&
+ !IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return IRAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
/// Returns true if "nosync" is assumed.
bool isAssumedNoSync() const { return getAssumed(); }
@@ -3358,11 +3474,74 @@ struct AANoSync
};
/// An abstract interface for all nonnull attributes.
+struct AAMustProgress
+ : public IRAttribute<Attribute::MustProgress,
+ StateWrapper<BooleanState, AbstractAttribute>,
+ AAMustProgress> {
+ AAMustProgress(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+
+ static bool isImpliedByIR(Attributor &A, const IRPosition &IRP,
+ Attribute::AttrKind ImpliedAttributeKind,
+ bool IgnoreSubsumingPositions = false) {
+ // Note: This is also run for non-IPO amendable functions.
+ assert(ImpliedAttributeKind == Attribute::MustProgress);
+ return A.hasAttr(IRP, {Attribute::MustProgress, Attribute::WillReturn},
+ IgnoreSubsumingPositions, Attribute::MustProgress);
+ }
+
+ /// Return true if we assume that the underlying value is nonnull.
+ bool isAssumedMustProgress() const { return getAssumed(); }
+
+ /// Return true if we know that underlying value is nonnull.
+ bool isKnownMustProgress() const { return getKnown(); }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAMustProgress &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override { return "AAMustProgress"; }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAMustProgress
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+/// An abstract interface for all nonnull attributes.
struct AANonNull
: public IRAttribute<Attribute::NonNull,
- StateWrapper<BooleanState, AbstractAttribute>> {
+ StateWrapper<BooleanState, AbstractAttribute>,
+ AANonNull> {
AANonNull(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+ /// See AbstractAttribute::hasTrivialInitializer.
+ static bool hasTrivialInitializer() { return false; }
+
+ /// See IRAttribute::isImpliedByUndef.
+ /// Undef is not necessarily nonnull as nonnull + noundef would cause poison.
+ /// Poison implies nonnull though.
+ static bool isImpliedByUndef() { return false; }
+
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return IRAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
+ /// See AbstractAttribute::isImpliedByIR(...).
+ static bool isImpliedByIR(Attributor &A, const IRPosition &IRP,
+ Attribute::AttrKind ImpliedAttributeKind,
+ bool IgnoreSubsumingPositions = false);
+
/// Return true if we assume that the underlying value is nonnull.
bool isAssumedNonNull() const { return getAssumed(); }
@@ -3390,7 +3569,8 @@ struct AANonNull
/// An abstract attribute for norecurse.
struct AANoRecurse
: public IRAttribute<Attribute::NoRecurse,
- StateWrapper<BooleanState, AbstractAttribute>> {
+ StateWrapper<BooleanState, AbstractAttribute>,
+ AANoRecurse> {
AANoRecurse(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return true if "norecurse" is assumed.
@@ -3420,9 +3600,43 @@ struct AANoRecurse
/// An abstract attribute for willreturn.
struct AAWillReturn
: public IRAttribute<Attribute::WillReturn,
- StateWrapper<BooleanState, AbstractAttribute>> {
+ StateWrapper<BooleanState, AbstractAttribute>,
+ AAWillReturn> {
AAWillReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+ static bool isImpliedByIR(Attributor &A, const IRPosition &IRP,
+ Attribute::AttrKind ImpliedAttributeKind,
+ bool IgnoreSubsumingPositions = false) {
+ // Note: This is also run for non-IPO amendable functions.
+ assert(ImpliedAttributeKind == Attribute::WillReturn);
+ if (IRAttribute::isImpliedByIR(A, IRP, ImpliedAttributeKind,
+ IgnoreSubsumingPositions))
+ return true;
+ if (!isImpliedByMustprogressAndReadonly(A, IRP))
+ return false;
+ A.manifestAttrs(IRP, Attribute::get(IRP.getAnchorValue().getContext(),
+ Attribute::WillReturn));
+ return true;
+ }
+
+ /// Check for `mustprogress` and `readonly` as they imply `willreturn`.
+ static bool isImpliedByMustprogressAndReadonly(Attributor &A,
+ const IRPosition &IRP) {
+ // Check for `mustprogress` in the scope and the associated function which
+ // might be different if this is a call site.
+ if (!A.hasAttr(IRP, {Attribute::MustProgress}))
+ return false;
+
+ SmallVector<Attribute, 2> Attrs;
+ A.getAttrs(IRP, {Attribute::Memory}, Attrs,
+ /* IgnoreSubsumingPositions */ false);
+
+ MemoryEffects ME = MemoryEffects::unknown();
+ for (const Attribute &Attr : Attrs)
+ ME &= Attr.getMemoryEffects();
+ return ME.onlyReadsMemory();
+ }
+
/// Return true if "willreturn" is assumed.
bool isAssumedWillReturn() const { return getAssumed(); }
@@ -3521,9 +3735,28 @@ struct AAIntraFnReachability
/// An abstract interface for all noalias attributes.
struct AANoAlias
: public IRAttribute<Attribute::NoAlias,
- StateWrapper<BooleanState, AbstractAttribute>> {
+ StateWrapper<BooleanState, AbstractAttribute>,
+ AANoAlias> {
AANoAlias(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return IRAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
+ /// See IRAttribute::isImpliedByIR
+ static bool isImpliedByIR(Attributor &A, const IRPosition &IRP,
+ Attribute::AttrKind ImpliedAttributeKind,
+ bool IgnoreSubsumingPositions = false);
+
+ /// See AbstractAttribute::requiresCalleeForCallBase
+ static bool requiresCalleeForCallBase() { return false; }
+
+ /// See AbstractAttribute::requiresCallersForArgOrFunction
+ static bool requiresCallersForArgOrFunction() { return true; }
+
/// Return true if we assume that the underlying value is alias.
bool isAssumedNoAlias() const { return getAssumed(); }
@@ -3551,9 +3784,29 @@ struct AANoAlias
/// An AbstractAttribute for nofree.
struct AANoFree
: public IRAttribute<Attribute::NoFree,
- StateWrapper<BooleanState, AbstractAttribute>> {
+ StateWrapper<BooleanState, AbstractAttribute>,
+ AANoFree> {
AANoFree(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+ /// See IRAttribute::isImpliedByIR
+ static bool isImpliedByIR(Attributor &A, const IRPosition &IRP,
+ Attribute::AttrKind ImpliedAttributeKind,
+ bool IgnoreSubsumingPositions = false) {
+ // Note: This is also run for non-IPO amendable functions.
+ assert(ImpliedAttributeKind == Attribute::NoFree);
+ return A.hasAttr(
+ IRP, {Attribute::ReadNone, Attribute::ReadOnly, Attribute::NoFree},
+ IgnoreSubsumingPositions, Attribute::NoFree);
+ }
+
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.isFunctionScope() &&
+ !IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return IRAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
/// Return true if "nofree" is assumed.
bool isAssumedNoFree() const { return getAssumed(); }
@@ -3581,7 +3834,8 @@ struct AANoFree
/// An AbstractAttribute for noreturn.
struct AANoReturn
: public IRAttribute<Attribute::NoReturn,
- StateWrapper<BooleanState, AbstractAttribute>> {
+ StateWrapper<BooleanState, AbstractAttribute>,
+ AANoReturn> {
AANoReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
/// Return true if the underlying object is assumed to never return.
@@ -3838,25 +4092,20 @@ struct DerefState : AbstractState {
GlobalState |= R.GlobalState;
return *this;
}
-
-protected:
- const AANonNull *NonNullAA = nullptr;
};
/// An abstract interface for all dereferenceable attribute.
struct AADereferenceable
: public IRAttribute<Attribute::Dereferenceable,
- StateWrapper<DerefState, AbstractAttribute>> {
+ StateWrapper<DerefState, AbstractAttribute>,
+ AADereferenceable> {
AADereferenceable(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
- /// Return true if we assume that the underlying value is nonnull.
- bool isAssumedNonNull() const {
- return NonNullAA && NonNullAA->isAssumedNonNull();
- }
-
- /// Return true if we know that the underlying value is nonnull.
- bool isKnownNonNull() const {
- return NonNullAA && NonNullAA->isKnownNonNull();
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return IRAttribute::isValidIRPositionForInit(A, IRP);
}
/// Return true if we assume that underlying value is
@@ -3900,11 +4149,19 @@ struct AADereferenceable
using AAAlignmentStateType =
IncIntegerState<uint64_t, Value::MaximumAlignment, 1>;
/// An abstract interface for all align attributes.
-struct AAAlign : public IRAttribute<
- Attribute::Alignment,
- StateWrapper<AAAlignmentStateType, AbstractAttribute>> {
+struct AAAlign
+ : public IRAttribute<Attribute::Alignment,
+ StateWrapper<AAAlignmentStateType, AbstractAttribute>,
+ AAAlign> {
AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return IRAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
/// Return assumed alignment.
Align getAssumedAlign() const { return Align(getAssumed()); }
@@ -3973,9 +4230,28 @@ struct AAInstanceInfo : public StateWrapper<BooleanState, AbstractAttribute> {
struct AANoCapture
: public IRAttribute<
Attribute::NoCapture,
- StateWrapper<BitIntegerState<uint16_t, 7, 0>, AbstractAttribute>> {
+ StateWrapper<BitIntegerState<uint16_t, 7, 0>, AbstractAttribute>,
+ AANoCapture> {
AANoCapture(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+ /// See IRAttribute::isImpliedByIR
+ static bool isImpliedByIR(Attributor &A, const IRPosition &IRP,
+ Attribute::AttrKind ImpliedAttributeKind,
+ bool IgnoreSubsumingPositions = false);
+
+ /// Update \p State according to the capture capabilities of \p F for position
+ /// \p IRP.
+ static void determineFunctionCaptureCapabilities(const IRPosition &IRP,
+ const Function &F,
+ BitIntegerState &State);
+
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return IRAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
/// State encoding bits. A set bit in the state means the property holds.
/// NO_CAPTURE is the best possible state, 0 the worst possible state.
enum {
@@ -4187,12 +4463,22 @@ struct AAPrivatizablePtr
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAPrivatizablePtr(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
/// Returns true if pointer privatization is assumed to be possible.
bool isAssumedPrivatizablePtr() const { return getAssumed(); }
/// Returns true if pointer privatization is known to be possible.
bool isKnownPrivatizablePtr() const { return getKnown(); }
+ /// See AbstractAttribute::requiresCallersForArgOrFunction
+ static bool requiresCallersForArgOrFunction() { return true; }
+
/// Return the type we can choose for a private copy of the underlying
/// value. std::nullopt means it is not clear yet, nullptr means there is
/// none.
@@ -4223,9 +4509,21 @@ struct AAPrivatizablePtr
struct AAMemoryBehavior
: public IRAttribute<
Attribute::ReadNone,
- StateWrapper<BitIntegerState<uint8_t, 3>, AbstractAttribute>> {
+ StateWrapper<BitIntegerState<uint8_t, 3>, AbstractAttribute>,
+ AAMemoryBehavior> {
AAMemoryBehavior(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+ /// See AbstractAttribute::hasTrivialInitializer.
+ static bool hasTrivialInitializer() { return false; }
+
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.isFunctionScope() &&
+ !IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return IRAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
/// State encoding bits. A set bit in the state means the property holds.
/// BEST_STATE is the best possible state, 0 the worst possible state.
enum {
@@ -4286,11 +4584,23 @@ struct AAMemoryBehavior
struct AAMemoryLocation
: public IRAttribute<
Attribute::ReadNone,
- StateWrapper<BitIntegerState<uint32_t, 511>, AbstractAttribute>> {
+ StateWrapper<BitIntegerState<uint32_t, 511>, AbstractAttribute>,
+ AAMemoryLocation> {
using MemoryLocationsKind = StateType::base_t;
AAMemoryLocation(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+ /// See AbstractAttribute::hasTrivialInitializer.
+ static bool hasTrivialInitializer() { return false; }
+
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.isFunctionScope() &&
+ !IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return IRAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
/// Encoding of different locations that could be accessed by a memory
/// access.
enum {
@@ -4430,8 +4740,8 @@ struct AAMemoryLocation
static AAMemoryLocation &createForPosition(const IRPosition &IRP,
Attributor &A);
- /// See AbstractState::getAsStr().
- const std::string getAsStr() const override {
+ /// See AbstractState::getAsStr(Attributor).
+ const std::string getAsStr(Attributor *A) const override {
return getMemoryLocationsAsStr(getAssumedNotAccessedLocation());
}
@@ -4458,6 +4768,16 @@ struct AAValueConstantRange
AAValueConstantRange(const IRPosition &IRP, Attributor &A)
: Base(IRP, IRP.getAssociatedType()->getIntegerBitWidth()) {}
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isIntegerTy())
+ return false;
+ return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
+ /// See AbstractAttribute::requiresCallersForArgOrFunction
+ static bool requiresCallersForArgOrFunction() { return true; }
+
/// See AbstractAttribute::getState(...).
IntegerRangeState &getState() override { return *this; }
const IntegerRangeState &getState() const override { return *this; }
@@ -4720,6 +5040,16 @@ struct AAPotentialConstantValues
using Base = StateWrapper<PotentialConstantIntValuesState, AbstractAttribute>;
AAPotentialConstantValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isIntegerTy())
+ return false;
+ return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
+ /// See AbstractAttribute::requiresCallersForArgOrFunction
+ static bool requiresCallersForArgOrFunction() { return true; }
+
/// See AbstractAttribute::getState(...).
PotentialConstantIntValuesState &getState() override { return *this; }
const PotentialConstantIntValuesState &getState() const override {
@@ -4773,6 +5103,9 @@ struct AAPotentialValues
using Base = StateWrapper<PotentialLLVMValuesState, AbstractAttribute>;
AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+ /// See AbstractAttribute::requiresCallersForArgOrFunction
+ static bool requiresCallersForArgOrFunction() { return true; }
+
/// See AbstractAttribute::getState(...).
PotentialLLVMValuesState &getState() override { return *this; }
const PotentialLLVMValuesState &getState() const override { return *this; }
@@ -4802,10 +5135,9 @@ struct AAPotentialValues
static const char ID;
private:
- virtual bool
- getAssumedSimplifiedValues(Attributor &A,
- SmallVectorImpl<AA::ValueAndContext> &Values,
- AA::ValueScope) const = 0;
+ virtual bool getAssumedSimplifiedValues(
+ Attributor &A, SmallVectorImpl<AA::ValueAndContext> &Values,
+ AA::ValueScope, bool RecurseForSelectAndPHI = false) const = 0;
friend struct Attributor;
};
@@ -4813,9 +5145,21 @@ private:
/// An abstract interface for all noundef attributes.
struct AANoUndef
: public IRAttribute<Attribute::NoUndef,
- StateWrapper<BooleanState, AbstractAttribute>> {
+ StateWrapper<BooleanState, AbstractAttribute>,
+ AANoUndef> {
AANoUndef(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+ /// See IRAttribute::isImpliedByUndef
+ static bool isImpliedByUndef() { return false; }
+
+ /// See IRAttribute::isImpliedByPoison
+ static bool isImpliedByPoison() { return false; }
+
+ /// See IRAttribute::isImpliedByIR
+ static bool isImpliedByIR(Attributor &A, const IRPosition &IRP,
+ Attribute::AttrKind ImpliedAttributeKind,
+ bool IgnoreSubsumingPositions = false);
+
/// Return true if we assume that the underlying value is noundef.
bool isAssumedNoUndef() const { return getAssumed(); }
@@ -4840,6 +5184,53 @@ struct AANoUndef
static const char ID;
};
+struct AANoFPClass
+ : public IRAttribute<
+ Attribute::NoFPClass,
+ StateWrapper<BitIntegerState<uint32_t, fcAllFlags, fcNone>,
+ AbstractAttribute>,
+ AANoFPClass> {
+ using Base = StateWrapper<BitIntegerState<uint32_t, fcAllFlags, fcNone>,
+ AbstractAttribute>;
+
+ AANoFPClass(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
+
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ Type *Ty = IRP.getAssociatedType();
+ do {
+ if (Ty->isFPOrFPVectorTy())
+ return IRAttribute::isValidIRPositionForInit(A, IRP);
+ if (!Ty->isArrayTy())
+ break;
+ Ty = Ty->getArrayElementType();
+ } while (true);
+ return false;
+ }
+
+ /// Return true if we assume that the underlying value is nofpclass.
+ FPClassTest getAssumedNoFPClass() const {
+ return static_cast<FPClassTest>(getAssumed());
+ }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AANoFPClass &createForPosition(const IRPosition &IRP, Attributor &A);
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override { return "AANoFPClass"; }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is AANoFPClass
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
struct AACallGraphNode;
struct AACallEdges;
@@ -4889,6 +5280,10 @@ struct AACallEdges : public StateWrapper<BooleanState, AbstractAttribute>,
AACallEdges(const IRPosition &IRP, Attributor &A)
: Base(IRP), AACallGraphNode(A) {}
+ /// The callee value is tracked beyond a simple stripPointerCasts, so we allow
+ /// unknown callees.
+ static bool requiresCalleeForCallBase() { return false; }
+
/// Get the optimistic edges.
virtual const SetVector<Function *> &getOptimisticEdges() const = 0;
@@ -5054,9 +5449,16 @@ struct AAExecutionDomain
const Instruction &I) const = 0;
virtual ExecutionDomainTy getExecutionDomain(const BasicBlock &) const = 0;
- virtual ExecutionDomainTy getExecutionDomain(const CallBase &) const = 0;
+ /// Return the execution domain with which the call \p CB is entered and the
+ /// one with which it is left.
+ virtual std::pair<ExecutionDomainTy, ExecutionDomainTy>
+ getExecutionDomain(const CallBase &CB) const = 0;
virtual ExecutionDomainTy getFunctionExecutionDomain() const = 0;
+ /// Helper function to determine if \p FI is a no-op given the information
+ /// about its execution from \p ExecDomainAA.
+ virtual bool isNoOpFence(const FenceInst &FI) const = 0;
+
/// This function should return true if the type of the \p AA is
/// AAExecutionDomain.
static bool classof(const AbstractAttribute *AA) {
@@ -5108,10 +5510,50 @@ struct AAInterFnReachability
static const char ID;
};
+/// An abstract Attribute for determining the necessity of the convergent
+/// attribute.
+struct AANonConvergent : public StateWrapper<BooleanState, AbstractAttribute> {
+ using Base = StateWrapper<BooleanState, AbstractAttribute>;
+
+ AANonConvergent(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AANonConvergent &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// Return true if "non-convergent" is assumed.
+ bool isAssumedNotConvergent() const { return getAssumed(); }
+
+ /// Return true if "non-convergent" is known.
+ bool isKnownNotConvergent() const { return getKnown(); }
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override { return "AANonConvergent"; }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AANonConvergent.
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
/// An abstract interface for struct information.
struct AAPointerInfo : public AbstractAttribute {
AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {}
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
enum AccessKind {
// First two bits to distinguish may and must accesses.
AK_MUST = 1 << 0,
@@ -5443,8 +5885,8 @@ struct AAPointerInfo : public AbstractAttribute {
/// The instruction responsible for the access.
Instruction *RemoteI;
- /// The value written, if any. `llvm::none` means "not known yet", `nullptr`
- /// cannot be determined.
+ /// The value written, if any. `std::nullopt` means "not known yet",
+ /// `nullptr` cannot be determined.
std::optional<Value *> Content;
/// Set of potential ranges accessed from the base pointer.
@@ -5483,6 +5925,7 @@ struct AAPointerInfo : public AbstractAttribute {
/// read the intial value of the underlying memory.
virtual bool forallInterferingAccesses(
Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I,
+ bool FindInterferingWrites, bool FindInterferingReads,
function_ref<bool(const Access &, bool)> CB, bool &HasBeenWrittenTo,
AA::RangeTy &Range) const = 0;
@@ -5533,6 +5976,16 @@ struct AAAssumptionInfo
struct AAUnderlyingObjects : AbstractAttribute {
AAUnderlyingObjects(const IRPosition &IRP) : AbstractAttribute(IRP) {}
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
+ /// See AbstractAttribute::requiresCallersForArgOrFunction
+ static bool requiresCallersForArgOrFunction() { return true; }
+
/// Create an abstract attribute biew for the position \p IRP.
static AAUnderlyingObjects &createForPosition(const IRPosition &IRP,
Attributor &A);
@@ -5561,6 +6014,49 @@ struct AAUnderlyingObjects : AbstractAttribute {
AA::ValueScope Scope = AA::Interprocedural) const = 0;
};
+/// An abstract interface for address space information.
+struct AAAddressSpace : public StateWrapper<BooleanState, AbstractAttribute> {
+ AAAddressSpace(const IRPosition &IRP, Attributor &A)
+ : StateWrapper<BooleanState, AbstractAttribute>(IRP) {}
+
+ /// See AbstractAttribute::isValidIRPositionForInit
+ static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
+ if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
+ return false;
+ return AbstractAttribute::isValidIRPositionForInit(A, IRP);
+ }
+
+ /// See AbstractAttribute::requiresCallersForArgOrFunction
+ static bool requiresCallersForArgOrFunction() { return true; }
+
+ /// Return the address space of the associated value. \p NoAddressSpace is
+ /// returned if the associated value is dead. This functions is not supposed
+ /// to be called if the AA is invalid.
+ virtual int32_t getAddressSpace() const = 0;
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAAddressSpace &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override { return "AAAddressSpace"; }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAAssumptionInfo
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ // No address space which indicates the associated value is dead.
+ static const int32_t NoAddressSpace = -1;
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &);
/// Run options, used by the pass manager.
@@ -5571,6 +6067,50 @@ enum AttributorRunOption {
ALL = MODULE | CGSCC
};
+namespace AA {
+/// Helper to avoid creating an AA for IR Attributes that might already be set.
+template <Attribute::AttrKind AK, typename AAType = AbstractAttribute>
+bool hasAssumedIRAttr(Attributor &A, const AbstractAttribute *QueryingAA,
+ const IRPosition &IRP, DepClassTy DepClass, bool &IsKnown,
+ bool IgnoreSubsumingPositions = false,
+ const AAType **AAPtr = nullptr) {
+ IsKnown = false;
+ switch (AK) {
+#define CASE(ATTRNAME, AANAME, ...) \
+ case Attribute::ATTRNAME: { \
+ if (AANAME::isImpliedByIR(A, IRP, AK, IgnoreSubsumingPositions)) \
+ return IsKnown = true; \
+ if (!QueryingAA) \
+ return false; \
+ const auto *AA = A.getAAFor<AANAME>(*QueryingAA, IRP, DepClass); \
+ if (AAPtr) \
+ *AAPtr = reinterpret_cast<const AAType *>(AA); \
+ if (!AA || !AA->isAssumed(__VA_ARGS__)) \
+ return false; \
+ IsKnown = AA->isKnown(__VA_ARGS__); \
+ return true; \
+ }
+ CASE(NoUnwind, AANoUnwind, );
+ CASE(WillReturn, AAWillReturn, );
+ CASE(NoFree, AANoFree, );
+ CASE(NoCapture, AANoCapture, );
+ CASE(NoRecurse, AANoRecurse, );
+ CASE(NoReturn, AANoReturn, );
+ CASE(NoSync, AANoSync, );
+ CASE(NoAlias, AANoAlias, );
+ CASE(NonNull, AANonNull, );
+ CASE(MustProgress, AAMustProgress, );
+ CASE(NoUndef, AANoUndef, );
+ CASE(ReadNone, AAMemoryBehavior, AAMemoryBehavior::NO_ACCESSES);
+ CASE(ReadOnly, AAMemoryBehavior, AAMemoryBehavior::NO_WRITES);
+ CASE(WriteOnly, AAMemoryBehavior, AAMemoryBehavior::NO_READS);
+#undef CASE
+ default:
+ llvm_unreachable("hasAssumedIRAttr not available for this attribute kind");
+ };
+}
+} // namespace AA
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
diff --git a/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h b/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h
index a71fa3bf404d..63e1ad043d49 100644
--- a/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h
+++ b/llvm/include/llvm/Transforms/IPO/DeadArgumentElimination.h
@@ -136,6 +136,7 @@ private:
bool removeDeadStuffFromFunction(Function *F);
bool deleteDeadVarargs(Function &F);
bool removeDeadArgumentsFromCallers(Function &F);
+ void propagateVirtMustcallLiveness(const Module &M);
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/IPO/EmbedBitcodePass.h b/llvm/include/llvm/Transforms/IPO/EmbedBitcodePass.h
new file mode 100644
index 000000000000..f323c61483fd
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/EmbedBitcodePass.h
@@ -0,0 +1,58 @@
+//===-- EmbedBitcodePass.h - Embeds bitcode into global ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides a pass which clones the current module and runs the
+/// provided pass pipeline on the clone. The optimized module is stored into a
+/// global variable in the `.llvm.lto` section. Primarily, this pass is used
+/// to support the FatLTO pipeline, but could be used to generate a bitcode
+/// section for any arbitrary pass pipeline without changing the current module.
+///
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_TRANSFORMS_IPO_EMBEDBITCODEPASS_H
+#define LLVM_TRANSFORMS_IPO_EMBEDBITCODEPASS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Module;
+class ModulePass;
+class Pass;
+
+struct EmbedBitcodeOptions {
+ EmbedBitcodeOptions() : EmbedBitcodeOptions(false, false) {}
+ EmbedBitcodeOptions(bool IsThinLTO, bool EmitLTOSummary)
+ : IsThinLTO(IsThinLTO), EmitLTOSummary(EmitLTOSummary) {}
+ bool IsThinLTO;
+ bool EmitLTOSummary;
+};
+
+/// Pass embeds a copy of the module optimized with the provided pass pipeline
+/// into a global variable.
+class EmbedBitcodePass : public PassInfoMixin<EmbedBitcodePass> {
+ bool IsThinLTO;
+ bool EmitLTOSummary;
+ ModulePassManager MPM;
+
+public:
+ EmbedBitcodePass(EmbedBitcodeOptions Opts)
+ : EmbedBitcodePass(Opts.IsThinLTO, Opts.EmitLTOSummary,
+ ModulePassManager()) {}
+ EmbedBitcodePass(bool IsThinLTO, bool EmitLTOSummary, ModulePassManager &&MPM)
+ : IsThinLTO(IsThinLTO), EmitLTOSummary(EmitLTOSummary),
+ MPM(std::move(MPM)) {}
+
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
+
+ static bool isRequired() { return true; }
+};
+
+} // end namespace llvm.
+
+#endif
diff --git a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
index 07c7cac77354..52667e19bc88 100644
--- a/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/ForceFunctionAttrs.h
@@ -17,7 +17,6 @@
namespace llvm {
class Module;
-class Pass;
/// Pass which forces specific function attributes into the IR, primarily as
/// a debugging tool.
@@ -25,9 +24,6 @@ struct ForceFunctionAttrsPass : PassInfoMixin<ForceFunctionAttrsPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
};
-/// Create a legacy pass manager instance of a pass to force function attrs.
-Pass *createForceFunctionAttrsLegacyPass();
-
}
#endif // LLVM_TRANSFORMS_IPO_FORCEFUNCTIONATTRS_H
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
index fc7cca83496b..6a21ff616d50 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
@@ -26,7 +26,6 @@ class GlobalValueSummary;
class ModuleSummaryIndex;
class Function;
class Module;
-class Pass;
/// Returns the memory access properties of this copy of the function.
MemoryEffects computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR);
@@ -48,13 +47,17 @@ bool thinLTOPropagateFunctionAttrs(
/// attribute. It also discovers function arguments that are not captured by
/// the function and marks them with the nocapture attribute.
struct PostOrderFunctionAttrsPass : PassInfoMixin<PostOrderFunctionAttrsPass> {
+ PostOrderFunctionAttrsPass(bool SkipNonRecursive = false)
+ : SkipNonRecursive(SkipNonRecursive) {}
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
-};
-/// Create a legacy pass manager instance of a pass to compute function attrs
-/// in post-order.
-Pass *createPostOrderFunctionAttrsLegacyPass();
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+
+private:
+ bool SkipNonRecursive;
+};
/// A pass to do RPO deduction and propagation of function attributes.
///
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index c5bafb89fcb5..3e4b3eb30e77 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -136,6 +136,10 @@ public:
/// \p ModuleToDefinedGVSummaries contains for each Module a map
/// (GUID -> Summary) for every global defined in the module.
///
+/// \p isPrevailing is a callback that will be called with a global value's GUID
+/// and summary and should return whether the module corresponding to the
+/// summary contains the linker-prevailing copy of that value.
+///
/// \p ImportLists will be populated with an entry for every Module we are
/// importing into. This entry is itself a map that can be passed to
/// FunctionImporter::importFunctions() above (see description there).
@@ -146,16 +150,24 @@ public:
void ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing,
StringMap<FunctionImporter::ImportMapTy> &ImportLists,
StringMap<FunctionImporter::ExportSetTy> &ExportLists);
/// Compute all the imports for the given module using the Index.
///
+/// \p isPrevailing is a callback that will be called with a global value's GUID
+/// and summary and should return whether the module corresponding to the
+/// summary contains the linker-prevailing copy of that value.
+///
/// \p ImportList will be populated with a map that can be passed to
/// FunctionImporter::importFunctions() above (see description there).
void ComputeCrossModuleImportForModule(
- StringRef ModulePath, const ModuleSummaryIndex &Index,
- FunctionImporter::ImportMapTy &ImportList);
+ StringRef ModulePath,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing,
+ const ModuleSummaryIndex &Index, FunctionImporter::ImportMapTy &ImportList);
/// Mark all external summaries in \p Index for import into the given module.
/// Used for distributed builds using a distributed index.
diff --git a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
index 8db246f739ab..4e78d9db024c 100644
--- a/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
+++ b/llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h
@@ -48,10 +48,11 @@
#ifndef LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
#define LLVM_TRANSFORMS_IPO_FUNCTIONSPECIALIZATION_H
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/InstVisitor.h"
#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/SCCPSolver.h"
@@ -60,6 +61,18 @@
using namespace llvm;
namespace llvm {
+// Map of potential specializations for each function. The FunctionSpecializer
+// keeps the discovered specialisation opportunities for the module in a single
+// vector, where the specialisations of each function form a contiguous range.
+// This map's value is the beginning and the end of that range.
+using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>;
+
+// Just a shorter abbreviation to improve indentation.
+using Cost = InstructionCost;
+
+// Map of known constants found during the specialization bonus estimation.
+using ConstMap = DenseMap<Value *, Constant *>;
+
// Specialization signature, used to uniquely designate a specialization within
// a function.
struct SpecSig {
@@ -95,22 +108,64 @@ struct Spec {
SpecSig Sig;
// Profitability of the specialization.
- InstructionCost Gain;
+ Cost Score;
// List of call sites, matching this specialization.
SmallVector<CallBase *> CallSites;
- Spec(Function *F, const SpecSig &S, InstructionCost G)
- : F(F), Sig(S), Gain(G) {}
- Spec(Function *F, const SpecSig &&S, InstructionCost G)
- : F(F), Sig(S), Gain(G) {}
+ Spec(Function *F, const SpecSig &S, Cost Score)
+ : F(F), Sig(S), Score(Score) {}
+ Spec(Function *F, const SpecSig &&S, Cost Score)
+ : F(F), Sig(S), Score(Score) {}
};
-// Map of potential specializations for each function. The FunctionSpecializer
-// keeps the discovered specialisation opportunities for the module in a single
-// vector, where the specialisations of each function form a contiguous range.
-// This map's value is the beginning and the end of that range.
-using SpecMap = DenseMap<Function *, std::pair<unsigned, unsigned>>;
+class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
+ const DataLayout &DL;
+ BlockFrequencyInfo &BFI;
+ TargetTransformInfo &TTI;
+ SCCPSolver &Solver;
+
+ ConstMap KnownConstants;
+ // Basic blocks known to be unreachable after constant propagation.
+ DenseSet<BasicBlock *> DeadBlocks;
+ // PHI nodes we have visited before.
+ DenseSet<Instruction *> VisitedPHIs;
+ // PHI nodes we have visited once without successfully constant folding them.
+ // Once the InstCostVisitor has processed all the specialization arguments,
+ // it should be possible to determine whether those PHIs can be folded
+ // (some of their incoming values may have become constant or dead).
+ SmallVector<Instruction *> PendingPHIs;
+
+ ConstMap::iterator LastVisited;
+
+public:
+ InstCostVisitor(const DataLayout &DL, BlockFrequencyInfo &BFI,
+ TargetTransformInfo &TTI, SCCPSolver &Solver)
+ : DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {}
+
+ Cost getUserBonus(Instruction *User, Value *Use = nullptr,
+ Constant *C = nullptr);
+
+ Cost getBonusFromPendingPHIs();
+
+private:
+ friend class InstVisitor<InstCostVisitor, Constant *>;
+
+ Cost estimateSwitchInst(SwitchInst &I);
+ Cost estimateBranchInst(BranchInst &I);
+
+ Constant *visitInstruction(Instruction &I) { return nullptr; }
+ Constant *visitPHINode(PHINode &I);
+ Constant *visitFreezeInst(FreezeInst &I);
+ Constant *visitCallBase(CallBase &I);
+ Constant *visitLoadInst(LoadInst &I);
+ Constant *visitGetElementPtrInst(GetElementPtrInst &I);
+ Constant *visitSelectInst(SelectInst &I);
+ Constant *visitCastInst(CastInst &I);
+ Constant *visitCmpInst(CmpInst &I);
+ Constant *visitUnaryOperator(UnaryOperator &I);
+ Constant *visitBinaryOperator(BinaryOperator &I);
+};
class FunctionSpecializer {
@@ -123,37 +178,39 @@ class FunctionSpecializer {
FunctionAnalysisManager *FAM;
/// Analyses used to help determine if a function should be specialized.
+ std::function<BlockFrequencyInfo &(Function &)> GetBFI;
std::function<const TargetLibraryInfo &(Function &)> GetTLI;
std::function<TargetTransformInfo &(Function &)> GetTTI;
std::function<AssumptionCache &(Function &)> GetAC;
- // The number of functions specialised, used for collecting statistics and
- // also in the cost model.
- unsigned NbFunctionsSpecialized = 0;
-
- SmallPtrSet<Function *, 32> SpecializedFuncs;
+ SmallPtrSet<Function *, 32> Specializations;
SmallPtrSet<Function *, 32> FullySpecialized;
DenseMap<Function *, CodeMetrics> FunctionMetrics;
public:
FunctionSpecializer(
SCCPSolver &Solver, Module &M, FunctionAnalysisManager *FAM,
+ std::function<BlockFrequencyInfo &(Function &)> GetBFI,
std::function<const TargetLibraryInfo &(Function &)> GetTLI,
std::function<TargetTransformInfo &(Function &)> GetTTI,
std::function<AssumptionCache &(Function &)> GetAC)
- : Solver(Solver), M(M), FAM(FAM), GetTLI(GetTLI), GetTTI(GetTTI),
- GetAC(GetAC) {}
-
- ~FunctionSpecializer() {
- // Eliminate dead code.
- removeDeadFunctions();
- cleanUpSSA();
- }
+ : Solver(Solver), M(M), FAM(FAM), GetBFI(GetBFI), GetTLI(GetTLI),
+ GetTTI(GetTTI), GetAC(GetAC) {}
- bool isClonedFunction(Function *F) { return SpecializedFuncs.count(F); }
+ ~FunctionSpecializer();
bool run();
+ InstCostVisitor getInstCostVisitorFor(Function *F) {
+ auto &BFI = GetBFI(*F);
+ auto &TTI = GetTTI(*F);
+ return InstCostVisitor(M.getDataLayout(), BFI, TTI, Solver);
+ }
+
+ /// Compute a bonus for replacing argument \p A with constant \p C.
+ Cost getSpecializationBonus(Argument *A, Constant *C,
+ InstCostVisitor &Visitor);
+
private:
Constant *getPromotableAlloca(AllocaInst *Alloca, CallInst *Call);
@@ -162,10 +219,9 @@ private:
/// is a function argument.
Constant *getConstantStackValue(CallInst *Call, Value *Val);
- /// Iterate over the argument tracked functions see if there
- /// are any new constant values for the call instruction via
- /// stack variables.
- void promoteConstantStackValues();
+ /// See if there are any new constant values for the callers of \p F via
+ /// stack variables and promote them to global variables.
+ void promoteConstantStackValues(Function *F);
/// Clean up fully specialized functions.
void removeDeadFunctions();
@@ -173,17 +229,14 @@ private:
/// Remove any ssa_copy intrinsics that may have been introduced.
void cleanUpSSA();
- // Compute the code metrics for function \p F.
- CodeMetrics &analyzeFunction(Function *F);
-
/// @brief Find potential specialization opportunities.
/// @param F Function to specialize
- /// @param Cost Cost of specializing a function. Final gain is this cost
- /// minus benefit
+ /// @param SpecCost Cost of specializing a function. Final score is benefit
+ /// minus this cost.
/// @param AllSpecs A vector to add potential specializations to.
/// @param SM A map for a function's specialisation range
/// @return True, if any potential specializations were found
- bool findSpecializations(Function *F, InstructionCost Cost,
+ bool findSpecializations(Function *F, Cost SpecCost,
SmallVectorImpl<Spec> &AllSpecs, SpecMap &SM);
bool isCandidateFunction(Function *F);
@@ -194,13 +247,6 @@ private:
/// @return The new, cloned function
Function *createSpecialization(Function *F, const SpecSig &S);
- /// Compute and return the cost of specializing function \p F.
- InstructionCost getSpecializationCost(Function *F);
-
- /// Compute a bonus for replacing argument \p A with constant \p C.
- InstructionCost getSpecializationBonus(Argument *A, Constant *C,
- const LoopInfo &LI);
-
/// Determine if it is possible to specialise the function for constant values
/// of the formal parameter \p A.
bool isArgumentInteresting(Argument *A);
diff --git a/llvm/include/llvm/Transforms/IPO/GlobalDCE.h b/llvm/include/llvm/Transforms/IPO/GlobalDCE.h
index a24196efb83b..92c30d4b54a2 100644
--- a/llvm/include/llvm/Transforms/IPO/GlobalDCE.h
+++ b/llvm/include/llvm/Transforms/IPO/GlobalDCE.h
@@ -35,9 +35,16 @@ class Value;
/// Pass to remove unused function declarations.
class GlobalDCEPass : public PassInfoMixin<GlobalDCEPass> {
public:
+ GlobalDCEPass(bool InLTOPostLink = false) : InLTOPostLink(InLTOPostLink) {}
+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+
private:
+ bool InLTOPostLink = false;
+
SmallPtrSet<GlobalValue*, 32> AliveGlobals;
/// Global -> Global that uses this global.
@@ -61,7 +68,6 @@ private:
void UpdateGVDependencies(GlobalValue &GV);
void MarkLive(GlobalValue &GV,
SmallVectorImpl<GlobalValue *> *Updates = nullptr);
- bool RemoveUnusedGlobalValue(GlobalValue &GV);
// Dead virtual function elimination.
void AddVirtualFunctionDependencies(Module &M);
diff --git a/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h b/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
index 880af2b46d7f..8addf49fc0d8 100644
--- a/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
+++ b/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
@@ -19,7 +19,6 @@
namespace llvm {
class Module;
-class Pass;
/// A pass which infers function attributes from the names and signatures of
/// function declarations in a module.
@@ -27,10 +26,6 @@ struct InferFunctionAttrsPass : PassInfoMixin<InferFunctionAttrsPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
-/// Create a legacy pass manager instance of a pass to infer function
-/// attributes.
-Pass *createInferFunctionAttrsLegacyPass();
-
}
#endif // LLVM_TRANSFORMS_IPO_INFERFUNCTIONATTRS_H
diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h
index 1e154eb8f5da..401aa2d3a0cc 100644
--- a/llvm/include/llvm/Transforms/IPO/Inliner.h
+++ b/llvm/include/llvm/Transforms/IPO/Inliner.h
@@ -10,7 +10,6 @@
#define LLVM_TRANSFORMS_IPO_INLINER_H
#include "llvm/Analysis/CGSCCPassManager.h"
-#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyCallGraph.h"
@@ -19,66 +18,6 @@
namespace llvm {
-class AssumptionCacheTracker;
-class CallGraph;
-class ProfileSummaryInfo;
-
-/// This class contains all of the helper code which is used to perform the
-/// inlining operations that do not depend on the policy. It contains the core
-/// bottom-up inlining infrastructure that specific inliner passes use.
-struct LegacyInlinerBase : public CallGraphSCCPass {
- explicit LegacyInlinerBase(char &ID);
- explicit LegacyInlinerBase(char &ID, bool InsertLifetime);
-
- /// For this class, we declare that we require and preserve the call graph.
- /// If the derived class implements this method, it should always explicitly
- /// call the implementation here.
- void getAnalysisUsage(AnalysisUsage &Info) const override;
-
- using llvm::Pass::doInitialization;
-
- bool doInitialization(CallGraph &CG) override;
-
- /// Main run interface method, this implements the interface required by the
- /// Pass class.
- bool runOnSCC(CallGraphSCC &SCC) override;
-
- using llvm::Pass::doFinalization;
-
- /// Remove now-dead linkonce functions at the end of processing to avoid
- /// breaking the SCC traversal.
- bool doFinalization(CallGraph &CG) override;
-
- /// This method must be implemented by the subclass to determine the cost of
- /// inlining the specified call site. If the cost returned is greater than
- /// the current inline threshold, the call site is not inlined.
- virtual InlineCost getInlineCost(CallBase &CB) = 0;
-
- /// Remove dead functions.
- ///
- /// This also includes a hack in the form of the 'AlwaysInlineOnly' flag
- /// which restricts it to deleting functions with an 'AlwaysInline'
- /// attribute. This is useful for the InlineAlways pass that only wants to
- /// deal with that subset of the functions.
- bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly = false);
-
- /// This function performs the main work of the pass. The default of
- /// Inlinter::runOnSCC() calls skipSCC() before calling this method, but
- /// derived classes which cannot be skipped can override that method and call
- /// this function unconditionally.
- bool inlineCalls(CallGraphSCC &SCC);
-
-private:
- // Insert @llvm.lifetime intrinsics.
- bool InsertLifetime = true;
-
-protected:
- AssumptionCacheTracker *ACT;
- ProfileSummaryInfo *PSI;
- std::function<const TargetLibraryInfo &(Function &)> GetTLI;
- ImportedFunctionsInliningStatistics ImportedFunctionsStats;
-};
-
/// The inliner pass for the new pass manager.
///
/// This pass wires together the inlining utilities and the inline cost
diff --git a/llvm/include/llvm/Transforms/IPO/Internalize.h b/llvm/include/llvm/Transforms/IPO/Internalize.h
index adcf5a932be0..ece5bfe77b79 100644
--- a/llvm/include/llvm/Transforms/IPO/Internalize.h
+++ b/llvm/include/llvm/Transforms/IPO/Internalize.h
@@ -28,7 +28,6 @@
namespace llvm {
class Module;
-class CallGraph;
/// A pass that internalizes all functions and variables other than those that
/// must be preserved according to \c MustPreserveGV.
@@ -66,10 +65,7 @@ public:
/// Run the internalizer on \p TheModule, returns true if any changes was
/// made.
- ///
- /// If the CallGraph \p CG is supplied, it will be updated when
- /// internalizing a function (by removing any edge from the "external node")
- bool internalizeModule(Module &TheModule, CallGraph *CG = nullptr);
+ bool internalizeModule(Module &TheModule);
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
@@ -77,10 +73,9 @@ public:
/// Helper function to internalize functions and variables in a Module.
inline bool
internalizeModule(Module &TheModule,
- std::function<bool(const GlobalValue &)> MustPreserveGV,
- CallGraph *CG = nullptr) {
+ std::function<bool(const GlobalValue &)> MustPreserveGV) {
return InternalizePass(std::move(MustPreserveGV))
- .internalizeModule(TheModule, CG);
+ .internalizeModule(TheModule);
}
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
new file mode 100644
index 000000000000..f4c20a5749f0
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h
@@ -0,0 +1,59 @@
+//==- MemProfContextDisambiguation.h - Context Disambiguation ----*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements support for context disambiguation of allocation calls for profile
+// guided heap optimization using memprof metadata. See implementation file for
+// details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H
+#define LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/IR/PassManager.h"
+#include <functional>
+
+namespace llvm {
+class GlobalValueSummary;
+class Module;
+class OptimizationRemarkEmitter;
+
+class MemProfContextDisambiguation
+ : public PassInfoMixin<MemProfContextDisambiguation> {
+ /// Run the context disambiguator on \p M, returns true if any changes made.
+ bool processModule(
+ Module &M,
+ function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
+
+ /// In the ThinLTO backend, apply the cloning decisions in ImportSummary to
+ /// the IR.
+ bool applyImport(Module &M);
+
+ /// Import summary containing cloning decisions for the ThinLTO backend.
+ const ModuleSummaryIndex *ImportSummary;
+
+ // Owns the import summary specified by internal options for testing the
+ // ThinLTO backend via opt (to simulate distributed ThinLTO).
+ std::unique_ptr<ModuleSummaryIndex> ImportSummaryForTesting;
+
+public:
+ MemProfContextDisambiguation(const ModuleSummaryIndex *Summary = nullptr);
+
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+ void run(ModuleSummaryIndex &Index,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H
diff --git a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
index bf08336663b6..4ab0035f3b42 100644
--- a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
+++ b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
@@ -29,6 +29,9 @@ bool containsOpenMP(Module &M);
/// Helper to determine if \p M is a OpenMP target offloading device module.
bool isOpenMPDevice(Module &M);
+/// Return true iff \p Fn is a GPU kernel; \p Fn has the "kernel" attribute.
+bool isKernel(Function &Fn);
+
/// Get OpenMP device kernels in \p M.
KernelSet getDeviceKernels(Module &M);
@@ -37,13 +40,25 @@ KernelSet getDeviceKernels(Module &M);
/// OpenMP optimizations pass.
class OpenMPOptPass : public PassInfoMixin<OpenMPOptPass> {
public:
+ OpenMPOptPass() = default;
+ OpenMPOptPass(ThinOrFullLTOPhase LTOPhase) : LTOPhase(LTOPhase) {}
+
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+ const ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None;
};
class OpenMPOptCGSCCPass : public PassInfoMixin<OpenMPOptCGSCCPass> {
public:
+ OpenMPOptCGSCCPass() = default;
+ OpenMPOptCGSCCPass(ThinOrFullLTOPhase LTOPhase) : LTOPhase(LTOPhase) {}
+
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
+
+private:
+ const ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
deleted file mode 100644
index 4cc161e03df9..000000000000
--- a/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ /dev/null
@@ -1,135 +0,0 @@
-// llvm/Transforms/IPO/PassManagerBuilder.h - Build Standard Pass -*- C++ -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PassManagerBuilder class, which is used to set up a
-// "standard" optimization sequence suitable for languages like C and C++.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H
-#define LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H
-
-#include "llvm-c/Transforms/PassManagerBuilder.h"
-#include <functional>
-#include <string>
-#include <vector>
-
-namespace llvm {
-class ModuleSummaryIndex;
-class Pass;
-class TargetLibraryInfoImpl;
-
-// The old pass manager infrastructure is hidden in a legacy namespace now.
-namespace legacy {
-class FunctionPassManager;
-class PassManagerBase;
-}
-
-/// PassManagerBuilder - This class is used to set up a standard optimization
-/// sequence for languages like C and C++, allowing some APIs to customize the
-/// pass sequence in various ways. A simple example of using it would be:
-///
-/// PassManagerBuilder Builder;
-/// Builder.OptLevel = 2;
-/// Builder.populateFunctionPassManager(FPM);
-/// Builder.populateModulePassManager(MPM);
-///
-/// In addition to setting up the basic passes, PassManagerBuilder allows
-/// frontends to vend a plugin API, where plugins are allowed to add extensions
-/// to the default pass manager. They do this by specifying where in the pass
-/// pipeline they want to be added, along with a callback function that adds
-/// the pass(es). For example, a plugin that wanted to add a loop optimization
-/// could do something like this:
-///
-/// static void addMyLoopPass(const PMBuilder &Builder, PassManagerBase &PM) {
-/// if (Builder.getOptLevel() > 2 && Builder.getOptSizeLevel() == 0)
-/// PM.add(createMyAwesomePass());
-/// }
-/// ...
-/// Builder.addExtension(PassManagerBuilder::EP_LoopOptimizerEnd,
-/// addMyLoopPass);
-/// ...
-class PassManagerBuilder {
-public:
- /// Extensions are passed to the builder itself (so they can see how it is
- /// configured) as well as the pass manager to add stuff to.
- typedef std::function<void(const PassManagerBuilder &Builder,
- legacy::PassManagerBase &PM)>
- ExtensionFn;
- typedef int GlobalExtensionID;
-
- /// The Optimization Level - Specify the basic optimization level.
- /// 0 = -O0, 1 = -O1, 2 = -O2, 3 = -O3
- unsigned OptLevel;
-
- /// SizeLevel - How much we're optimizing for size.
- /// 0 = none, 1 = -Os, 2 = -Oz
- unsigned SizeLevel;
-
- /// LibraryInfo - Specifies information about the runtime library for the
- /// optimizer. If this is non-null, it is added to both the function and
- /// per-module pass pipeline.
- TargetLibraryInfoImpl *LibraryInfo;
-
- /// Inliner - Specifies the inliner to use. If this is non-null, it is
- /// added to the per-module passes.
- Pass *Inliner;
-
- /// The module summary index to use for exporting information from the
- /// regular LTO phase, for example for the CFI and devirtualization type
- /// tests.
- ModuleSummaryIndex *ExportSummary = nullptr;
-
- /// The module summary index to use for importing information to the
- /// thin LTO backends, for example for the CFI and devirtualization type
- /// tests.
- const ModuleSummaryIndex *ImportSummary = nullptr;
-
- bool DisableUnrollLoops;
- bool CallGraphProfile;
- bool SLPVectorize;
- bool LoopVectorize;
- bool LoopsInterleaved;
- bool DisableGVNLoadPRE;
- bool ForgetAllSCEVInLoopUnroll;
- bool VerifyInput;
- bool VerifyOutput;
- bool MergeFunctions;
- bool DivergentTarget;
- unsigned LicmMssaOptCap;
- unsigned LicmMssaNoAccForPromotionCap;
-
-public:
- PassManagerBuilder();
- ~PassManagerBuilder();
-
-private:
- void addInitialAliasAnalysisPasses(legacy::PassManagerBase &PM) const;
- void addFunctionSimplificationPasses(legacy::PassManagerBase &MPM);
- void addVectorPasses(legacy::PassManagerBase &PM, bool IsFullLTO);
-
-public:
- /// populateFunctionPassManager - This fills in the function pass manager,
- /// which is expected to be run on each function immediately as it is
- /// generated. The idea is to reduce the size of the IR in memory.
- void populateFunctionPassManager(legacy::FunctionPassManager &FPM);
-
- /// populateModulePassManager - This sets up the primary pass manager.
- void populateModulePassManager(legacy::PassManagerBase &MPM);
-};
-
-inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) {
- return reinterpret_cast<PassManagerBuilder*>(P);
-}
-
-inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) {
- return reinterpret_cast<LLVMPassManagerBuilderRef>(P);
-}
-
-} // end namespace llvm
-#endif
diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
index 5e12fcfeae1b..bc8360a80bc0 100644
--- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
+++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
@@ -64,16 +64,22 @@ public:
using iterator = ProfiledCallGraphNode::iterator;
// Constructor for non-CS profile.
- ProfiledCallGraph(SampleProfileMap &ProfileMap) {
+ ProfiledCallGraph(SampleProfileMap &ProfileMap,
+ uint64_t IgnoreColdCallThreshold = 0) {
assert(!FunctionSamples::ProfileIsCS &&
"CS flat profile is not handled here");
for (const auto &Samples : ProfileMap) {
addProfiledCalls(Samples.second);
}
+
+ // Trim edges with weight up to `IgnoreColdCallThreshold`. This aims
+ // for a more stable call graph with "determinstic" edges from run to run.
+ trimColdEges(IgnoreColdCallThreshold);
}
// Constructor for CS profile.
- ProfiledCallGraph(SampleContextTracker &ContextTracker) {
+ ProfiledCallGraph(SampleContextTracker &ContextTracker,
+ uint64_t IgnoreColdCallThreshold = 0) {
// BFS traverse the context profile trie to add call edges for calls shown
// in context.
std::queue<ContextTrieNode *> Queue;
@@ -121,11 +127,16 @@ public:
ContextTracker.getFuncNameFor(Callee), Weight);
}
}
+
+ // Trim edges with weight up to `IgnoreColdCallThreshold`. This aims
+ // for a more stable call graph with "determinstic" edges from run to run.
+ trimColdEges(IgnoreColdCallThreshold);
}
iterator begin() { return Root.Edges.begin(); }
iterator end() { return Root.Edges.end(); }
ProfiledCallGraphNode *getEntryNode() { return &Root; }
+
void addProfiledFunction(StringRef Name) {
if (!ProfiledFunctions.count(Name)) {
// Link to synthetic root to make sure every node is reachable
@@ -148,8 +159,9 @@ private:
auto EdgeIt = Edges.find(Edge);
if (EdgeIt == Edges.end()) {
Edges.insert(Edge);
- } else if (EdgeIt->Weight < Edge.Weight) {
- // Replace existing call edges with same target but smaller weight.
+ } else {
+ // Accumulate weight to the existing edge.
+ Edge.Weight += EdgeIt->Weight;
Edges.erase(EdgeIt);
Edges.insert(Edge);
}
@@ -175,6 +187,24 @@ private:
}
}
+ // Trim edges with weight up to `Threshold`. Do not trim anything if
+ // `Threshold` is zero.
+ void trimColdEges(uint64_t Threshold = 0) {
+ if (!Threshold)
+ return;
+
+ for (auto &Node : ProfiledFunctions) {
+ auto &Edges = Node.second.Edges;
+ auto I = Edges.begin();
+ while (I != Edges.end()) {
+ if (I->Weight <= Threshold)
+ I = Edges.erase(I);
+ else
+ I++;
+ }
+ }
+ }
+
ProfiledCallGraphNode Root;
StringMap<ProfiledCallGraphNode> ProfiledFunctions;
};
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfile.h b/llvm/include/llvm/Transforms/IPO/SampleProfile.h
index d838c8b8a83e..2ef55949e236 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfile.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfile.h
@@ -14,6 +14,7 @@
#ifndef LLVM_TRANSFORMS_IPO_SAMPLEPROFILE_H
#define LLVM_TRANSFORMS_IPO_SAMPLEPROFILE_H
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include <string>
@@ -22,14 +23,17 @@ namespace llvm {
class Module;
+namespace vfs {
+class FileSystem;
+} // namespace vfs
+
/// The sample profiler data loader pass.
class SampleProfileLoaderPass : public PassInfoMixin<SampleProfileLoaderPass> {
public:
SampleProfileLoaderPass(
std::string File = "", std::string RemappingFile = "",
- ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None)
- : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
- LTOPhase(LTOPhase) {}
+ ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS = nullptr);
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
@@ -37,6 +41,7 @@ private:
std::string ProfileFileName;
std::string ProfileRemappingFileName;
const ThinOrFullLTOPhase LTOPhase;
+ IntrusiveRefCntPtr<vfs::FileSystem> FS;
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index ebac3d6a24ef..601fe6ce8a2e 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -22,7 +22,6 @@
#include <unordered_map>
namespace llvm {
-class Any;
class BasicBlock;
class Function;
class Instruction;
@@ -41,16 +40,6 @@ using ProbeFactorMap = std::unordered_map<std::pair<uint64_t, uint64_t>, float,
pair_hash<uint64_t, uint64_t>>;
using FuncProbeFactorMap = StringMap<ProbeFactorMap>;
-class PseudoProbeDescriptor {
- uint64_t FunctionGUID;
- uint64_t FunctionHash;
-
-public:
- PseudoProbeDescriptor(uint64_t GUID, uint64_t Hash)
- : FunctionGUID(GUID), FunctionHash(Hash) {}
- uint64_t getFunctionGUID() const { return FunctionGUID; }
- uint64_t getFunctionHash() const { return FunctionHash; }
-};
// A pseudo probe verifier that can be run after each IR passes to detect the
// violation of updating probe factors. In principle, the sum of distribution
@@ -79,20 +68,6 @@ private:
const ProbeFactorMap &ProbeFactors);
};
-// This class serves sample counts correlation for SampleProfileLoader by
-// analyzing pseudo probes and their function descriptors injected by
-// SampleProfileProber.
-class PseudoProbeManager {
- DenseMap<uint64_t, PseudoProbeDescriptor> GUIDToProbeDescMap;
-
- const PseudoProbeDescriptor *getDesc(const Function &F) const;
-
-public:
- PseudoProbeManager(const Module &M);
- bool moduleIsProbed(const Module &M) const;
- bool profileIsValid(const Function &F, const FunctionSamples &Samples) const;
-};
-
/// Sample profile pseudo prober.
///
/// Insert pseudo probes for block sampling and value sampling.
diff --git a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
index a2296a064213..9e121d9c6f4e 100644
--- a/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -28,7 +28,6 @@ class Module;
template <typename T> class ArrayRef;
template <typename T> class MutableArrayRef;
-class Function;
class GlobalVariable;
class ModuleSummaryIndex;
struct ValueInfo;
@@ -118,14 +117,14 @@ struct TypeMemberInfo {
// A virtual call target, i.e. an entry in a particular vtable.
struct VirtualCallTarget {
- VirtualCallTarget(Function *Fn, const TypeMemberInfo *TM);
+ VirtualCallTarget(GlobalValue *Fn, const TypeMemberInfo *TM);
// For testing only.
VirtualCallTarget(const TypeMemberInfo *TM, bool IsBigEndian)
: Fn(nullptr), TM(TM), IsBigEndian(IsBigEndian), WasDevirt(false) {}
- // The function stored in the vtable.
- Function *Fn;
+ // The function (or an alias to a function) stored in the vtable.
+ GlobalValue *Fn;
// A pointer to the type identifier member through which the pointer to Fn is
// accessed.
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
index 35a3a8c3218b..3e1c6e0fcdc6 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
@@ -25,13 +25,34 @@
namespace llvm {
+static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
+
+struct InstCombineOptions {
+ bool UseLoopInfo = false;
+ unsigned MaxIterations = InstCombineDefaultMaxIterations;
+
+ InstCombineOptions() = default;
+
+ InstCombineOptions &setUseLoopInfo(bool Value) {
+ UseLoopInfo = Value;
+ return *this;
+ }
+
+ InstCombineOptions &setMaxIterations(unsigned Value) {
+ MaxIterations = Value;
+ return *this;
+ }
+};
+
class InstCombinePass : public PassInfoMixin<InstCombinePass> {
+private:
InstructionWorklist Worklist;
- const unsigned MaxIterations;
+ InstCombineOptions Options;
public:
- explicit InstCombinePass();
- explicit InstCombinePass(unsigned MaxIterations);
+ explicit InstCombinePass(InstCombineOptions Opts = {});
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
@@ -42,13 +63,11 @@ public:
/// will try to combine all instructions in the function.
class InstructionCombiningPass : public FunctionPass {
InstructionWorklist Worklist;
- const unsigned MaxIterations;
public:
static char ID; // Pass identification, replacement for typeid
explicit InstructionCombiningPass();
- explicit InstructionCombiningPass(unsigned MaxIterations);
void getAnalysisUsage(AnalysisUsage &AU) const override;
bool runOnFunction(Function &F) override;
@@ -67,7 +86,6 @@ public:
// %Z = add int 2, %X
//
FunctionPass *createInstructionCombiningPass();
-FunctionPass *createInstructionCombiningPass(unsigned MaxIterations);
}
#undef DEBUG_TYPE
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index a876385581e7..5569bc90caa6 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -34,6 +34,7 @@ namespace llvm {
class AAResults;
class AssumptionCache;
+class OptimizationRemarkEmitter;
class ProfileSummaryInfo;
class TargetLibraryInfo;
class TargetTransformInfo;
@@ -440,15 +441,17 @@ public:
/// Replace operand of instruction and add old operand to the worklist.
Instruction *replaceOperand(Instruction &I, unsigned OpNum, Value *V) {
- Worklist.addValue(I.getOperand(OpNum));
+ Value *OldOp = I.getOperand(OpNum);
I.setOperand(OpNum, V);
+ Worklist.handleUseCountDecrement(OldOp);
return &I;
}
/// Replace use and add the previously used value to the worklist.
void replaceUse(Use &U, Value *NewValue) {
- Worklist.addValue(U);
+ Value *OldOp = U;
U = NewValue;
+ Worklist.handleUseCountDecrement(OldOp);
}
/// Combiner aware instruction erasure.
@@ -529,6 +532,8 @@ public:
SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &UndefElts,
unsigned Depth = 0,
bool AllowMultipleUsers = false) = 0;
+
+ bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
};
} // namespace llvm
diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
index 7858a1c4b2fd..4affc1142901 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
@@ -27,18 +27,24 @@ public:
Use *PtrUse;
bool IsWrite;
Type *OpType;
- uint64_t TypeSize;
+ TypeSize TypeStoreSize = TypeSize::Fixed(0);
MaybeAlign Alignment;
// The mask Value, if we're looking at a masked load/store.
Value *MaybeMask;
+ // The EVL Value, if we're looking at a vp intrinsic.
+ Value *MaybeEVL;
+ // The Stride Value, if we're looking at a strided load/store.
+ Value *MaybeStride;
InterestingMemoryOperand(Instruction *I, unsigned OperandNo, bool IsWrite,
class Type *OpType, MaybeAlign Alignment,
- Value *MaybeMask = nullptr)
+ Value *MaybeMask = nullptr,
+ Value *MaybeEVL = nullptr,
+ Value *MaybeStride = nullptr)
: IsWrite(IsWrite), OpType(OpType), Alignment(Alignment),
- MaybeMask(MaybeMask) {
+ MaybeMask(MaybeMask), MaybeEVL(MaybeEVL), MaybeStride(MaybeStride) {
const DataLayout &DL = I->getModule()->getDataLayout();
- TypeSize = DL.getTypeStoreSizeInBits(OpType);
+ TypeStoreSize = DL.getTypeStoreSizeInBits(OpType);
PtrUse = &I->getOperandUse(OperandNo);
}
diff --git a/llvm/include/llvm/Transforms/Instrumentation/BlockCoverageInference.h b/llvm/include/llvm/Transforms/Instrumentation/BlockCoverageInference.h
new file mode 100644
index 000000000000..12f236481b25
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/BlockCoverageInference.h
@@ -0,0 +1,86 @@
+//===-- BlockCoverageInference.h - Minimal Execution Coverage ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file finds the minimum set of blocks on a CFG that must be instrumented
+/// to infer execution coverage for the whole graph.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_BLOCKCOVERAGEINFERENCE_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_BLOCKCOVERAGEINFERENCE_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+class Function;
+class BasicBlock;
+class DotFuncBCIInfo;
+
+class BlockCoverageInference {
+ friend class DotFuncBCIInfo;
+
+public:
+ using BlockSet = SmallSetVector<const BasicBlock *, 4>;
+
+ BlockCoverageInference(const Function &F, bool ForceInstrumentEntry);
+
+ /// \return true if \p BB should be instrumented for coverage.
+ bool shouldInstrumentBlock(const BasicBlock &BB) const;
+
+ /// \return the set of blocks \p Deps such that \p BB is covered iff any
+ /// blocks in \p Deps are covered.
+ BlockSet getDependencies(const BasicBlock &BB) const;
+
+ /// \return a hash that depends on the set of instrumented blocks.
+ uint64_t getInstrumentedBlocksHash() const;
+
+ /// Dump the inference graph.
+ void dump(raw_ostream &OS) const;
+
+ /// View the inferred block coverage as a dot file.
+ /// Filled gray blocks are instrumented, red outlined blocks are found to be
+ /// covered, red edges show that a block's coverage can be inferred from its
+ /// successors, and blue edges show that a block's coverage can be inferred
+ /// from its predecessors.
+ void viewBlockCoverageGraph(
+ const DenseMap<const BasicBlock *, bool> *Coverage = nullptr) const;
+
+private:
+ const Function &F;
+ bool ForceInstrumentEntry;
+
+ /// Maps blocks to a minimal list of predecessors that can be used to infer
+ /// this block's coverage.
+ DenseMap<const BasicBlock *, BlockSet> PredecessorDependencies;
+
+ /// Maps blocks to a minimal list of successors that can be used to infer
+ /// this block's coverage.
+ DenseMap<const BasicBlock *, BlockSet> SuccessorDependencies;
+
+ /// Compute \p PredecessorDependencies and \p SuccessorDependencies.
+ void findDependencies();
+
+ /// Find the set of basic blocks that are reachable from \p Start without the
+ /// basic block \p Avoid.
+ void getReachableAvoiding(const BasicBlock &Start, const BasicBlock &Avoid,
+ bool IsForward, BlockSet &Reachable) const;
+
+ static std::string getBlockNames(ArrayRef<const BasicBlock *> BBs);
+ static std::string getBlockNames(BlockSet BBs) {
+ return getBlockNames(ArrayRef<const BasicBlock *>(BBs.begin(), BBs.end()));
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_BLOCKCOVERAGEINFERENCE_H
diff --git a/llvm/lib/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
index 2abe8d12de3c..4d31898bb314 100644
--- a/llvm/lib/Transforms/Instrumentation/CFGMST.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
@@ -11,8 +11,8 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
-#define LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_CFGMST_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_CFGMST_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -100,7 +100,7 @@ public:
void buildEdges() {
LLVM_DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n");
- const BasicBlock *Entry = &(F.getEntryBlock());
+ BasicBlock *Entry = &(F.getEntryBlock());
uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2);
// If we want to instrument the entry count, lower the weight to 0.
if (InstrumentFuncEntry)
@@ -257,7 +257,7 @@ public:
}
// Add an edge to AllEdges with weight W.
- Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) {
+ Edge &addEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W) {
uint32_t Index = BBInfos.size();
auto Iter = BBInfos.end();
bool Inserted;
@@ -300,4 +300,4 @@ public:
#undef DEBUG_TYPE // "cfgmst"
-#endif // LLVM_LIB_TRANSFORMS_INSTRUMENTATION_CFGMST_H
+#endif // LLVM_TRANSFORMS_INSTRUMENTATION_CFGMST_H
diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
index 90fc0670448b..cb0c055dcb74 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
@@ -95,6 +95,10 @@ private:
/// Replace instrprof.cover with a store instruction to the coverage byte.
void lowerCover(InstrProfCoverInst *Inc);
+ /// Replace instrprof.timestamp with a call to
+ /// INSTR_PROF_PROFILE_SET_TIMESTAMP.
+ void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction);
+
/// Replace instrprof.increment with an increment of the appropriate value.
void lowerIncrement(InstrProfIncrementInst *Inc);
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
index b584b9984492..293133b29cd9 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -12,6 +12,7 @@
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_MEMPROFILER_H
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
@@ -20,6 +21,10 @@ class FunctionPass;
class Module;
class ModulePass;
+namespace vfs {
+class FileSystem;
+} // namespace vfs
+
/// Public interface to the memory profiler pass for instrumenting code to
/// profile memory accesses.
///
@@ -43,9 +48,16 @@ public:
static bool isRequired() { return true; }
};
-// Insert MemProfiler instrumentation
-FunctionPass *createMemProfilerFunctionPass();
-ModulePass *createModuleMemProfilerLegacyPassPass();
+class MemProfUsePass : public PassInfoMixin<MemProfUsePass> {
+public:
+ explicit MemProfUsePass(std::string MemoryProfileFile,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS = nullptr);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+ std::string MemoryProfileFileName;
+ IntrusiveRefCntPtr<vfs::FileSystem> FS;
+};
} // namespace llvm
diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
index 875a17094d2e..c77d3214ed01 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h
@@ -16,6 +16,7 @@
#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOINSTRUMENTATION_H
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/IR/PassManager.h"
#include <cstdint>
#include <string>
@@ -26,6 +27,10 @@ class Function;
class Instruction;
class Module;
+namespace vfs {
+class FileSystem;
+} // namespace vfs
+
/// The instrumentation (profile-instr-gen) pass for IR based PGO.
// We use this pass to create COMDAT profile variables for context
// sensitive PGO (CSPGO). The reason to have a pass for this is CSPGO
@@ -37,7 +42,7 @@ class PGOInstrumentationGenCreateVar
public:
PGOInstrumentationGenCreateVar(std::string CSInstrName = "")
: CSInstrName(CSInstrName) {}
- PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
private:
std::string CSInstrName;
@@ -47,7 +52,7 @@ private:
class PGOInstrumentationGen : public PassInfoMixin<PGOInstrumentationGen> {
public:
PGOInstrumentationGen(bool IsCS = false) : IsCS(IsCS) {}
- PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
private:
// If this is a context sensitive instrumentation.
@@ -58,15 +63,17 @@ private:
class PGOInstrumentationUse : public PassInfoMixin<PGOInstrumentationUse> {
public:
PGOInstrumentationUse(std::string Filename = "",
- std::string RemappingFilename = "", bool IsCS = false);
+ std::string RemappingFilename = "", bool IsCS = false,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS = nullptr);
- PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
private:
std::string ProfileFileName;
std::string ProfileRemappingFileName;
// If this is a context sensitive instrumentation.
bool IsCS;
+ IntrusiveRefCntPtr<vfs::FileSystem> FS;
};
/// The indirect function call promotion pass.
@@ -75,7 +82,7 @@ public:
PGOIndirectCallPromotion(bool IsInLTO = false, bool SamplePGO = false)
: InLTO(IsInLTO), SamplePGO(SamplePGO) {}
- PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
private:
bool InLTO;
@@ -87,7 +94,7 @@ class PGOMemOPSizeOpt : public PassInfoMixin<PGOMemOPSizeOpt> {
public:
PGOMemOPSizeOpt() = default;
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &MAM);
};
void setProfMetadata(Module *M, Instruction *TI, ArrayRef<uint64_t> EdgeCounts,
diff --git a/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h b/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
index 67e22d1aa681..800a1d583f80 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
@@ -12,6 +12,7 @@
#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERBINARYMETADATA_H
#define LLVM_TRANSFORMS_INSTRUMENTATION_SANITIZERBINARYMETADATA_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
@@ -28,12 +29,14 @@ struct SanitizerBinaryMetadataOptions {
inline constexpr int kSanitizerBinaryMetadataAtomicsBit = 0;
inline constexpr int kSanitizerBinaryMetadataUARBit = 1;
+inline constexpr int kSanitizerBinaryMetadataUARHasSizeBit = 2;
-inline constexpr uint32_t kSanitizerBinaryMetadataNone = 0;
-inline constexpr uint32_t kSanitizerBinaryMetadataAtomics =
+inline constexpr uint64_t kSanitizerBinaryMetadataAtomics =
1 << kSanitizerBinaryMetadataAtomicsBit;
-inline constexpr uint32_t kSanitizerBinaryMetadataUAR =
+inline constexpr uint64_t kSanitizerBinaryMetadataUAR =
1 << kSanitizerBinaryMetadataUARBit;
+inline constexpr uint64_t kSanitizerBinaryMetadataUARHasSize =
+ 1 << kSanitizerBinaryMetadataUARHasSizeBit;
inline constexpr char kSanitizerBinaryMetadataCoveredSection[] =
"sanmd_covered";
@@ -48,12 +51,14 @@ class SanitizerBinaryMetadataPass
: public PassInfoMixin<SanitizerBinaryMetadataPass> {
public:
explicit SanitizerBinaryMetadataPass(
- SanitizerBinaryMetadataOptions Opts = {});
+ SanitizerBinaryMetadataOptions Opts = {},
+ ArrayRef<std::string> IgnorelistFiles = {});
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
static bool isRequired() { return true; }
private:
const SanitizerBinaryMetadataOptions Options;
+ const ArrayRef<std::string> IgnorelistFiles;
};
} // namespace llvm
diff --git a/llvm/include/llvm/Transforms/ObjCARC.h b/llvm/include/llvm/Transforms/ObjCARC.h
index a4dd69656445..bd17c58e842e 100644
--- a/llvm/include/llvm/Transforms/ObjCARC.h
+++ b/llvm/include/llvm/Transforms/ObjCARC.h
@@ -22,28 +22,10 @@ class Pass;
//===----------------------------------------------------------------------===//
//
-// ObjCARCAPElim - ObjC ARC autorelease pool elimination.
-//
-Pass *createObjCARCAPElimPass();
-
-//===----------------------------------------------------------------------===//
-//
-// ObjCARCExpand - ObjC ARC preliminary simplifications.
-//
-Pass *createObjCARCExpandPass();
-
-//===----------------------------------------------------------------------===//
-//
// ObjCARCContract - Late ObjC ARC cleanups.
//
Pass *createObjCARCContractPass();
-//===----------------------------------------------------------------------===//
-//
-// ObjCARCOpt - ObjC ARC optimization.
-//
-Pass *createObjCARCOptPass();
-
struct ObjCARCOptPass : public PassInfoMixin<ObjCARCOptPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 3db3e41d241a..aaba710cfde6 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -21,24 +21,10 @@ namespace llvm {
class Function;
class FunctionPass;
-class ModulePass;
class Pass;
//===----------------------------------------------------------------------===//
//
-// AlignmentFromAssumptions - Use assume intrinsics to set load/store
-// alignments.
-//
-FunctionPass *createAlignmentFromAssumptionsPass();
-
-//===----------------------------------------------------------------------===//
-//
-// SCCP - Sparse conditional constant propagation.
-//
-FunctionPass *createSCCPPass();
-
-//===----------------------------------------------------------------------===//
-//
// RedundantDbgInstElimination - This pass removes redundant dbg intrinsics
// without modifying the CFG of the function. It is a FunctionPass.
//
@@ -53,27 +39,6 @@ Pass *createRedundantDbgInstEliminationPass();
//
FunctionPass *createDeadCodeEliminationPass();
-//===----------------------------------------------------------------------===//
-//
-// DeadStoreElimination - This pass deletes stores that are post-dominated by
-// must-aliased stores and are not loaded used between the stores.
-//
-FunctionPass *createDeadStoreEliminationPass();
-
-
-//===----------------------------------------------------------------------===//
-//
-// CallSiteSplitting - This pass split call-site based on its known argument
-// values.
-FunctionPass *createCallSiteSplittingPass();
-
-//===----------------------------------------------------------------------===//
-//
-// AggressiveDCE - This pass uses the SSA based Aggressive DCE algorithm. This
-// algorithm assumes instructions are dead until proven otherwise, which makes
-// it more successful are removing non-obviously dead instructions.
-//
-FunctionPass *createAggressiveDCEPass();
//===----------------------------------------------------------------------===//
//
@@ -96,39 +61,15 @@ Pass *createLoopGuardWideningPass();
//===----------------------------------------------------------------------===//
//
-// BitTrackingDCE - This pass uses a bit-tracking DCE algorithm in order to
-// remove computations of dead bits.
-//
-FunctionPass *createBitTrackingDCEPass();
-
-//===----------------------------------------------------------------------===//
-//
// SROA - Replace aggregates or pieces of aggregates with scalar SSA values.
//
FunctionPass *createSROAPass(bool PreserveCFG = true);
//===----------------------------------------------------------------------===//
//
-// InductiveRangeCheckElimination - Transform loops to elide range checks on
-// linear functions of the induction variable.
-//
-Pass *createInductiveRangeCheckEliminationPass();
-
-//===----------------------------------------------------------------------===//
-//
-// InductionVariableSimplify - Transform induction variables in a program to all
-// use a single canonical induction variable per loop.
-//
-Pass *createIndVarSimplifyPass();
-
-//===----------------------------------------------------------------------===//
-//
// LICM - This pass is a loop invariant code motion and memory promotion pass.
//
Pass *createLICMPass();
-Pass *createLICMPass(unsigned LicmMssaOptCap,
- unsigned LicmMssaNoAccForPromotionCap,
- bool AllowSpeculation);
//===----------------------------------------------------------------------===//
//
@@ -145,19 +86,6 @@ Pass *createLoopPredicationPass();
//===----------------------------------------------------------------------===//
//
-// LoopInterchange - This pass interchanges loops to provide a more
-// cache-friendly memory access patterns.
-//
-Pass *createLoopInterchangePass();
-
-//===----------------------------------------------------------------------===//
-//
-// LoopFlatten - This pass flattens nested loops into a single loop.
-//
-FunctionPass *createLoopFlattenPass();
-
-//===----------------------------------------------------------------------===//
-//
// LoopStrengthReduce - This pass is strength reduces GEP instructions that use
// a loop's canonical induction variable as one of their indices.
//
@@ -178,22 +106,6 @@ Pass *createLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
int Count = -1, int AllowPartial = -1,
int Runtime = -1, int UpperBound = -1,
int AllowPeeling = -1);
-// Create an unrolling pass for full unrolling that uses exact trip count only
-// and also does peeling.
-Pass *createSimpleLoopUnrollPass(int OptLevel = 2, bool OnlyWhenForced = false,
- bool ForgetAllSCEV = false);
-
-//===----------------------------------------------------------------------===//
-//
-// LoopUnrollAndJam - This pass is a simple loop unroll and jam pass.
-//
-Pass *createLoopUnrollAndJamPass(int OptLevel = 2);
-
-//===----------------------------------------------------------------------===//
-//
-// LoopReroll - This pass is a simple loop rerolling pass.
-//
-Pass *createLoopRerollPass();
//===----------------------------------------------------------------------===//
//
@@ -203,18 +115,6 @@ Pass *createLoopRotatePass(int MaxHeaderSize = -1, bool PrepareForLTO = false);
//===----------------------------------------------------------------------===//
//
-// LoopIdiom - This pass recognizes and replaces idioms in loops.
-//
-Pass *createLoopIdiomPass();
-
-//===----------------------------------------------------------------------===//
-//
-// LoopVersioningLICM - This pass is a loop versioning pass for LICM.
-//
-Pass *createLoopVersioningLICMPass();
-
-//===----------------------------------------------------------------------===//
-//
// DemoteRegisterToMemoryPass - This pass is used to demote registers to memory
// references. In basically undoes the PromoteMemoryToRegister pass to make cfg
// hacking easier.
@@ -233,22 +133,6 @@ FunctionPass *createReassociatePass();
//===----------------------------------------------------------------------===//
//
-// JumpThreading - Thread control through mult-pred/multi-succ blocks where some
-// preds always go to some succ. Thresholds other than minus one
-// override the internal BB duplication default threshold.
-//
-FunctionPass *createJumpThreadingPass(int Threshold = -1);
-
-//===----------------------------------------------------------------------===//
-//
-// DFAJumpThreading - When a switch statement inside a loop is used to
-// implement a deterministic finite automata we can jump thread the switch
-// statement reducing number of conditional jumps.
-//
-FunctionPass *createDFAJumpThreadingPass();
-
-//===----------------------------------------------------------------------===//
-//
// CFGSimplification - Merge basic blocks, eliminate unreachable blocks,
// simplify terminator instructions, convert switches to lookup tables, etc.
//
@@ -288,20 +172,6 @@ FunctionPass *createEarlyCSEPass(bool UseMemorySSA = false);
//===----------------------------------------------------------------------===//
//
-// GVNHoist - This pass performs a simple and fast GVN pass over the dominator
-// tree to hoist common expressions from sibling branches.
-//
-FunctionPass *createGVNHoistPass();
-
-//===----------------------------------------------------------------------===//
-//
-// GVNSink - This pass uses an "inverted" value numbering to decide the
-// similarity of expressions and sinks similar expressions into successors.
-//
-FunctionPass *createGVNSinkPass();
-
-//===----------------------------------------------------------------------===//
-//
// MergedLoadStoreMotion - This pass merges loads and stores in diamonds. Loads
// are hoisted into the header, while stores sink into the footer.
//
@@ -309,33 +179,6 @@ FunctionPass *createMergedLoadStoreMotionPass(bool SplitFooterBB = false);
//===----------------------------------------------------------------------===//
//
-// GVN - This pass performs global value numbering and redundant load
-// elimination cotemporaneously.
-//
-FunctionPass *createNewGVNPass();
-
-//===----------------------------------------------------------------------===//
-//
-// DivRemPairs - Hoist/decompose integer division and remainder instructions.
-//
-FunctionPass *createDivRemPairsPass();
-
-//===----------------------------------------------------------------------===//
-//
-// MemCpyOpt - This pass performs optimizations related to eliminating memcpy
-// calls and/or combining multiple stores into memset's.
-//
-FunctionPass *createMemCpyOptPass();
-
-//===----------------------------------------------------------------------===//
-//
-// LoopDeletion - This pass performs DCE of non-infinite loops that it
-// can prove are dead.
-//
-Pass *createLoopDeletionPass();
-
-//===----------------------------------------------------------------------===//
-//
// ConstantHoisting - This pass prepares a function for expensive constants.
//
FunctionPass *createConstantHoistingPass();
@@ -360,19 +203,6 @@ Pass *createLowerGuardIntrinsicPass();
//===----------------------------------------------------------------------===//
//
-// LowerMatrixIntrinsics - Lower matrix intrinsics to vector operations.
-//
-Pass *createLowerMatrixIntrinsicsPass();
-
-//===----------------------------------------------------------------------===//
-//
-// LowerMatrixIntrinsicsMinimal - Lower matrix intrinsics to vector operations
-// (lightweight, does not require extra analysis)
-//
-Pass *createLowerMatrixIntrinsicsMinimalPass();
-
-//===----------------------------------------------------------------------===//
-//
// LowerWidenableCondition - Lower widenable condition to i1 true.
//
Pass *createLowerWidenableConditionPass();
@@ -385,12 +215,6 @@ Pass *createMergeICmpsLegacyPass();
//===----------------------------------------------------------------------===//
//
-// ValuePropagation - Propagate CFG-derived value information
-//
-Pass *createCorrelatedValuePropagationPass();
-
-//===----------------------------------------------------------------------===//
-//
// InferAddressSpaces - Modify users of addrspacecast instructions with values
// in the source address space if using the destination address space is slower
// on the target. If AddressSpace is left to its default value, it will be
@@ -451,72 +275,18 @@ FunctionPass *createStraightLineStrengthReducePass();
//===----------------------------------------------------------------------===//
//
-// PlaceSafepoints - Rewrite any IR calls to gc.statepoints and insert any
-// safepoint polls (method entry, backedge) that might be required. This pass
-// does not generate explicit relocation sequences - that's handled by
-// RewriteStatepointsForGC which can be run at an arbitrary point in the pass
-// order following this pass.
-//
-FunctionPass *createPlaceSafepointsPass();
-
-//===----------------------------------------------------------------------===//
-//
-// RewriteStatepointsForGC - Rewrite any gc.statepoints which do not yet have
-// explicit relocations to include explicit relocations.
-//
-ModulePass *createRewriteStatepointsForGCLegacyPass();
-
-//===----------------------------------------------------------------------===//
-//
-// Float2Int - Demote floats to ints where possible.
-//
-FunctionPass *createFloat2IntPass();
-
-//===----------------------------------------------------------------------===//
-//
// NaryReassociate - Simplify n-ary operations by reassociation.
//
FunctionPass *createNaryReassociatePass();
//===----------------------------------------------------------------------===//
//
-// LoopDistribute - Distribute loops.
-//
-FunctionPass *createLoopDistributePass();
-
-//===----------------------------------------------------------------------===//
-//
-// LoopFuse - Fuse loops.
-//
-FunctionPass *createLoopFusePass();
-
-//===----------------------------------------------------------------------===//
-//
-// LoopLoadElimination - Perform loop-aware load elimination.
-//
-FunctionPass *createLoopLoadEliminationPass();
-
-//===----------------------------------------------------------------------===//
-//
-// LoopVersioning - Perform loop multi-versioning.
-//
-FunctionPass *createLoopVersioningPass();
-
-//===----------------------------------------------------------------------===//
-//
// LoopDataPrefetch - Perform data prefetching in loops.
//
FunctionPass *createLoopDataPrefetchPass();
//===----------------------------------------------------------------------===//
//
-// LibCallsShrinkWrap - Shrink-wraps a call to function if the result is not
-// used.
-//
-FunctionPass *createLibCallsShrinkWrapPass();
-
-//===----------------------------------------------------------------------===//
-//
// LoopSimplifyCFG - This pass performs basic CFG simplification on loops,
// primarily to help other loop passes.
//
@@ -524,13 +294,6 @@ Pass *createLoopSimplifyCFGPass();
//===----------------------------------------------------------------------===//
//
-// WarnMissedTransformations - This pass emits warnings for leftover forced
-// transformations.
-//
-Pass *createWarnMissedTransformationsPass();
-
-//===----------------------------------------------------------------------===//
-//
// This pass does instruction simplification on each
// instruction in a function.
//
diff --git a/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h b/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
index e59734b92244..fa13ed73d506 100644
--- a/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
+++ b/llvm/include/llvm/Transforms/Scalar/ConstantHoisting.h
@@ -36,6 +36,7 @@
#ifndef LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H
#define LLVM_TRANSFORMS_SCALAR_CONSTANTHOISTING_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerUnion.h"
@@ -168,9 +169,13 @@ private:
/// Keep track of cast instructions we already cloned.
MapVector<Instruction *, Instruction *> ClonedCastMap;
+ void collectMatInsertPts(
+ const consthoist::RebasedConstantListType &RebasedConstants,
+ SmallVectorImpl<Instruction *> &MatInsertPts) const;
Instruction *findMatInsertPt(Instruction *Inst, unsigned Idx = ~0U) const;
SetVector<Instruction *>
- findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo) const;
+ findConstantInsertionPoint(const consthoist::ConstantInfo &ConstInfo,
+ const ArrayRef<Instruction *> MatInsertPts) const;
void collectConstantCandidates(ConstCandMapType &ConstCandMap,
Instruction *Inst, unsigned Idx,
ConstantInt *ConstInt);
@@ -191,8 +196,19 @@ private:
// If BaseGV is nullptr, find base among Constant Integer candidates;
// otherwise find base among constant GEPs sharing BaseGV as base pointer.
void findBaseConstants(GlobalVariable *BaseGV);
- void emitBaseConstants(Instruction *Base, Constant *Offset, Type *Ty,
- const consthoist::ConstantUser &ConstUser);
+
+ /// A ConstantUser grouped with the Type and Constant adjustment. The user
+ /// will be adjusted by Offset.
+ struct UserAdjustment {
+ Constant *Offset;
+ Type *Ty;
+ Instruction *MatInsertPt;
+ const consthoist::ConstantUser User;
+ UserAdjustment(Constant *O, Type *T, Instruction *I,
+ consthoist::ConstantUser U)
+ : Offset(O), Ty(T), MatInsertPt(I), User(U) {}
+ };
+ void emitBaseConstants(Instruction *Base, UserAdjustment *Adj);
// If BaseGV is nullptr, emit Constant Integer base; otherwise emit
// constant GEP base.
bool emitBaseConstants(GlobalVariable *BaseGV);
diff --git a/llvm/include/llvm/Transforms/Scalar/GVN.h b/llvm/include/llvm/Transforms/Scalar/GVN.h
index 4666a5315616..0a00e3af03d2 100644
--- a/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -56,7 +56,7 @@ class TargetLibraryInfo;
class Value;
/// A private "module" namespace for types and utilities used by GVN. These
/// are implementation details and should not be used by clients.
-namespace gvn LLVM_LIBRARY_VISIBILITY {
+namespace LLVM_LIBRARY_VISIBILITY gvn {
struct AvailableValue;
struct AvailableValueInBlock;
@@ -329,6 +329,11 @@ private:
AvailValInBlkVect &ValuesPerBlock,
UnavailBlkVect &UnavailableBlocks);
+ /// Given a critical edge from Pred to LoadBB, find a load instruction
+ /// which is identical to Load from another successor of Pred.
+ LoadInst *findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
+ LoadInst *Load);
+
bool PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
UnavailBlkVect &UnavailableBlocks);
@@ -342,7 +347,8 @@ private:
/// AvailableLoads (connected by Phis if needed).
void eliminatePartiallyRedundantLoad(
LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
- MapVector<BasicBlock *, Value *> &AvailableLoads);
+ MapVector<BasicBlock *, Value *> &AvailableLoads,
+ MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad);
// Other helper routines
bool processInstruction(Instruction *I);
@@ -355,6 +361,7 @@ private:
BasicBlock *Curr, unsigned int ValNo);
Value *findLeader(const BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
+ void removeInstruction(Instruction *I);
void verifyRemoved(const Instruction *I) const;
bool splitCriticalEdges();
BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ);
diff --git a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index 09d08bf423a6..3364d7eaee42 100644
--- a/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -20,7 +20,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/IR/ValueHandle.h"
+#include <optional>
#include <utility>
namespace llvm {
@@ -31,7 +33,6 @@ class BinaryOperator;
class BranchInst;
class CmpInst;
class Constant;
-class DomTreeUpdater;
class Function;
class Instruction;
class IntrinsicInst;
@@ -75,14 +76,16 @@ enum ConstantPreference { WantInteger, WantBlockAddress };
/// In this case, the unconditional branch at the end of the first if can be
/// revectored to the false side of the second if.
class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
- TargetLibraryInfo *TLI;
- TargetTransformInfo *TTI;
- LazyValueInfo *LVI;
- AAResults *AA;
- DomTreeUpdater *DTU;
- std::unique_ptr<BlockFrequencyInfo> BFI;
- std::unique_ptr<BranchProbabilityInfo> BPI;
- bool HasProfileData = false;
+ Function *F = nullptr;
+ FunctionAnalysisManager *FAM = nullptr;
+ TargetLibraryInfo *TLI = nullptr;
+ TargetTransformInfo *TTI = nullptr;
+ LazyValueInfo *LVI = nullptr;
+ AAResults *AA = nullptr;
+ std::unique_ptr<DomTreeUpdater> DTU;
+ std::optional<BlockFrequencyInfo *> BFI;
+ std::optional<BranchProbabilityInfo *> BPI;
+ bool ChangedSinceLastAnalysisUpdate = false;
bool HasGuards = false;
#ifndef LLVM_ENABLE_ABI_BREAKING_CHECKS
SmallPtrSet<const BasicBlock *, 16> LoopHeaders;
@@ -97,18 +100,16 @@ public:
JumpThreadingPass(int T = -1);
// Glue for old PM.
- bool runImpl(Function &F, TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
- LazyValueInfo *LVI, AAResults *AA, DomTreeUpdater *DTU,
- bool HasProfileData, std::unique_ptr<BlockFrequencyInfo> BFI,
- std::unique_ptr<BranchProbabilityInfo> BPI);
+ bool runImpl(Function &F, FunctionAnalysisManager *FAM,
+ TargetLibraryInfo *TLI, TargetTransformInfo *TTI,
+ LazyValueInfo *LVI, AAResults *AA,
+ std::unique_ptr<DomTreeUpdater> DTU,
+ std::optional<BlockFrequencyInfo *> BFI,
+ std::optional<BranchProbabilityInfo *> BPI);
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
- void releaseMemory() {
- BFI.reset();
- BPI.reset();
- }
-
+ DomTreeUpdater *getDomTreeUpdater() const { return DTU.get(); }
void findLoopHeaders(Function &F);
bool processBlock(BasicBlock *BB);
bool maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB);
@@ -168,9 +169,41 @@ private:
BasicBlock *splitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
const char *Suffix);
void updateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB,
- BasicBlock *NewBB, BasicBlock *SuccBB);
+ BasicBlock *NewBB, BasicBlock *SuccBB,
+ BlockFrequencyInfo *BFI,
+ BranchProbabilityInfo *BPI,
+ bool HasProfile);
/// Check if the block has profile metadata for its outgoing edges.
bool doesBlockHaveProfileData(BasicBlock *BB);
+
+ /// Returns analysis preserved by the pass.
+ PreservedAnalyses getPreservedAnalysis() const;
+
+ /// Helper function to run "external" analysis in the middle of JumpThreading.
+ /// It takes care of updating/invalidating other existing analysis
+ /// before/after running the "external" one.
+ template <typename AnalysisT>
+ typename AnalysisT::Result *runExternalAnalysis();
+
+ /// Returns an existing instance of BPI if any, otherwise nullptr. By
+ /// "existing" we mean either cached result provided by FunctionAnalysisManger
+ /// or created by preceding call to 'getOrCreateBPI'.
+ BranchProbabilityInfo *getBPI();
+
+ /// Returns an existing instance of BFI if any, otherwise nullptr. By
+ /// "existing" we mean either cached result provided by FunctionAnalysisManger
+ /// or created by preceding call to 'getOrCreateBFI'.
+ BlockFrequencyInfo *getBFI();
+
+ /// Returns an existing instance of BPI if any, otherwise:
+ /// if 'HasProfile' is true creates new instance through
+ /// FunctionAnalysisManager, otherwise nullptr.
+ BranchProbabilityInfo *getOrCreateBPI(bool Force = false);
+
+ /// Returns an existing instance of BFI if any, otherwise:
+ /// if 'HasProfile' is true creates new instance through
+ /// FunctionAnalysisManager, otherwise nullptr.
+ BlockFrequencyInfo *getOrCreateBFI(bool Force = false);
};
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
index b996e9e3613e..c8c4d5e9a539 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -234,7 +234,7 @@ struct RequireAnalysisPass<AnalysisT, Loop, LoopAnalysisManager,
function_ref<StringRef(StringRef)> MapClassName2PassName) {
auto ClassName = AnalysisT::name();
auto PassName = MapClassName2PassName(ClassName);
- OS << "require<" << PassName << ">";
+ OS << "require<" << PassName << '>';
}
};
@@ -426,7 +426,7 @@ std::optional<PreservedAnalyses> LoopPassManager::runSinglePass(
/// The adaptor comes with two modes: the loop mode and the loop-nest mode, and
/// the worklist updater lived inside will be in the same mode as the adaptor
/// (refer to the documentation of \c LPMUpdater for more detailed explanation).
-/// Specifically, in loop mode, all loops in the funciton will be pushed into
+/// Specifically, in loop mode, all loops in the function will be pushed into
/// the worklist and processed by \p Pass, while only top-level loops are
/// processed in loop-nest mode. Please refer to the various specializations of
/// \fn createLoopFunctionToLoopPassAdaptor to see when loop mode and loop-nest
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopRotation.h b/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
index c0e6f105a412..cd108f7383e4 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
@@ -28,6 +28,9 @@ public:
PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR, LPMUpdater &U);
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
+
private:
const bool EnableHeaderDuplication;
const bool PrepareForLTO;
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 7c3b8ba3086e..d3e5e2591eea 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -20,6 +20,7 @@
namespace llvm {
class AAResults;
+class AllocaInst;
class BatchAAResults;
class AssumptionCache;
class CallBase;
@@ -73,9 +74,13 @@ private:
bool performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, MemSetInst *MemSet,
BatchAAResults &BAA);
bool processByValArgument(CallBase &CB, unsigned ArgNo);
+ bool processImmutArgument(CallBase &CB, unsigned ArgNo);
Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
Value *ByteVal);
bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI);
+ bool performStackMoveOptzn(Instruction *Load, Instruction *Store,
+ AllocaInst *DestAlloca, AllocaInst *SrcAlloca,
+ uint64_t Size, BatchAAResults &BAA);
void eraseInstruction(Instruction *I);
bool iterateOnFunction(Function &F);
diff --git a/llvm/include/llvm/Transforms/Scalar/PlaceSafepoints.h b/llvm/include/llvm/Transforms/Scalar/PlaceSafepoints.h
new file mode 100644
index 000000000000..60ffe6e58b11
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/PlaceSafepoints.h
@@ -0,0 +1,71 @@
+//===- PlaceSafepoints.h - Place GC Safepoints ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Place garbage collection safepoints at appropriate locations in the IR. This
+// does not make relocation semantics or variable liveness explicit. That's
+// done by RewriteStatepointsForGC.
+//
+// Terminology:
+// - A call is said to be "parseable" if there is a stack map generated for the
+// return PC of the call. A runtime can determine where values listed in the
+// deopt arguments and (after RewriteStatepointsForGC) gc arguments are located
+// on the stack when the code is suspended inside such a call. Every parse
+// point is represented by a call wrapped in an gc.statepoint intrinsic.
+// - A "poll" is an explicit check in the generated code to determine if the
+// runtime needs the generated code to cooperate by calling a helper routine
+// and thus suspending its execution at a known state. The call to the helper
+// routine will be parseable. The (gc & runtime specific) logic of a poll is
+// assumed to be provided in a function of the name "gc.safepoint_poll".
+//
+// We aim to insert polls such that running code can quickly be brought to a
+// well defined state for inspection by the collector. In the current
+// implementation, this is done via the insertion of poll sites at method entry
+// and the backedge of most loops. We try to avoid inserting more polls than
+// are necessary to ensure a finite period between poll sites. This is not
+// because the poll itself is expensive in the generated code; it's not. Polls
+// do tend to impact the optimizer itself in negative ways; we'd like to avoid
+// perturbing the optimization of the method as much as we can.
+//
+// We also need to make most call sites parseable. The callee might execute a
+// poll (or otherwise be inspected by the GC). If so, the entire stack
+// (including the suspended frame of the current method) must be parseable.
+//
+// This pass will insert:
+// - Call parse points ("call safepoints") for any call which may need to
+// reach a safepoint during the execution of the callee function.
+// - Backedge safepoint polls and entry safepoint polls to ensure that
+// executing code reaches a safepoint poll in a finite amount of time.
+//
+// We do not currently support return statepoints, but adding them would not
+// be hard. They are not required for correctness - entry safepoints are an
+// alternative - but some GCs may prefer them. Patches welcome.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_PLACESAFEPOINTS_H
+#define LLVM_TRANSFORMS_SCALAR_PLACESAFEPOINTS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class TargetLibraryInfo;
+
+class PlaceSafepointsPass : public PassInfoMixin<PlaceSafepointsPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+ bool runImpl(Function &F, const TargetLibraryInfo &TLI);
+
+ void cleanup() {}
+
+private:
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_PLACESAFEPOINTS_H
diff --git a/llvm/include/llvm/Transforms/Scalar/SCCP.h b/llvm/include/llvm/Transforms/Scalar/SCCP.h
index 9d5441a799f9..7803f008c618 100644
--- a/llvm/include/llvm/Transforms/Scalar/SCCP.h
+++ b/llvm/include/llvm/Transforms/Scalar/SCCP.h
@@ -20,19 +20,10 @@
#ifndef LLVM_TRANSFORMS_SCALAR_SCCP_H
#define LLVM_TRANSFORMS_SCALAR_SCCP_H
-#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/IR/PassManager.h"
-#include <functional>
-
namespace llvm {
-class AssumptionCache;
-class DataLayout;
class Function;
-class Module;
-class TargetLibraryInfo;
-class TargetTransformInfo;
-struct AnalysisResultsForFn;
/// This pass performs function-level constant propagation and merging.
class SCCPPass : public PassInfoMixin<SCCPPass> {
diff --git a/llvm/include/llvm/Transforms/Scalar/SROA.h b/llvm/include/llvm/Transforms/Scalar/SROA.h
index 26348da22021..b18e3054ef3a 100644
--- a/llvm/include/llvm/Transforms/Scalar/SROA.h
+++ b/llvm/include/llvm/Transforms/Scalar/SROA.h
@@ -21,6 +21,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
+#include <variant>
#include <vector>
namespace llvm {
@@ -39,7 +40,7 @@ class Use;
/// A private "module" namespace for types and utilities used by SROA. These
/// are implementation details and should not be used by clients.
-namespace sroa LLVM_LIBRARY_VISIBILITY {
+namespace LLVM_LIBRARY_VISIBILITY sroa {
class AllocaSliceRewriter;
class AllocaSlices;
@@ -105,7 +106,7 @@ class SROAPass : public PassInfoMixin<SROAPass> {
/// directly promoted. Finally, each time we rewrite a use of an alloca other
/// the one being actively rewritten, we add it back onto the list if not
/// already present to ensure it is re-visited.
- SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> Worklist;
+ SmallSetVector<AllocaInst *, 16> Worklist;
/// A collection of instructions to delete.
/// We try to batch deletions to simplify code and make things a bit more
@@ -120,7 +121,7 @@ class SROAPass : public PassInfoMixin<SROAPass> {
///
/// Note that we have to be very careful to clear allocas out of this list in
/// the event they are deleted.
- SetVector<AllocaInst *, SmallVector<AllocaInst *, 16>> PostPromotionWorklist;
+ SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
/// A collection of alloca instructions we can directly promote.
std::vector<AllocaInst *> PromotableAllocas;
@@ -130,7 +131,7 @@ class SROAPass : public PassInfoMixin<SROAPass> {
/// All of these PHIs have been checked for the safety of speculation and by
/// being speculated will allow promoting allocas currently in the promotable
/// queue.
- SetVector<PHINode *, SmallVector<PHINode *, 8>> SpeculatablePHIs;
+ SmallSetVector<PHINode *, 8> SpeculatablePHIs;
/// A worklist of select instructions to rewrite prior to promoting
/// allocas.
diff --git a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
index 80c3f187be8c..5524b55b81b5 100644
--- a/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/Scalarizer.h
@@ -7,8 +7,9 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// This pass converts vector operations into scalar operations, in order
-/// to expose optimization opportunities on the individual scalar operations.
+/// This pass converts vector operations into scalar operations (or, optionally,
+/// operations on smaller vector widths), in order to expose optimization
+/// opportunities on the individual scalar operations.
/// It is mainly intended for targets that do not have vector units, but it
/// may also be useful for revectorizing code to different vector widths.
//
@@ -26,24 +27,29 @@ class Function;
class FunctionPass;
struct ScalarizerPassOptions {
- // These optional booleans correspond 1:1 to cl::opt<bool> options defined in
+ // These options correspond 1:1 to cl::opt options defined in
// Scalarizer.cpp. When the cl::opt are specified, they take precedence.
- // When the cl::opt are not specified, the present optional booleans allow to
+ // When the cl::opt are not specified, the present optional values allow to
// override the cl::opt's default values.
std::optional<bool> ScalarizeVariableInsertExtract;
std::optional<bool> ScalarizeLoadStore;
+ std::optional<unsigned> ScalarizeMinBits;
};
class ScalarizerPass : public PassInfoMixin<ScalarizerPass> {
ScalarizerPassOptions Options;
public:
+ ScalarizerPass() = default;
+ ScalarizerPass(const ScalarizerPassOptions &Options) : Options(Options) {}
+
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
void setScalarizeVariableInsertExtract(bool Value) {
Options.ScalarizeVariableInsertExtract = Value;
}
void setScalarizeLoadStore(bool Value) { Options.ScalarizeLoadStore = Value; }
+ void setScalarizeMinBits(unsigned Value) { Options.ScalarizeMinBits = Value; }
};
/// Create a legacy pass manager instance of the Scalarizer pass
diff --git a/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h b/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h
index 5bd6ce164dc3..7b37eb7118a0 100644
--- a/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h
+++ b/llvm/include/llvm/Transforms/Scalar/SeparateConstOffsetFromGEP.h
@@ -19,6 +19,8 @@ class SeparateConstOffsetFromGEPPass
public:
SeparateConstOffsetFromGEPPass(bool LowerGEP = false) : LowerGEP(LowerGEP) {}
+ void printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName);
PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
};
diff --git a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
index 80d098a1ea52..8f8cad0aab6d 100644
--- a/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
+++ b/llvm/include/llvm/Transforms/Scalar/WarnMissedTransforms.h
@@ -16,10 +16,6 @@
#include "llvm/IR/PassManager.h"
namespace llvm {
-class Function;
-class Pass;
-class PassRegistry;
-
// New pass manager boilerplate.
class WarnMissedTransformationsPass
: public PassInfoMixin<WarnMissedTransformationsPass> {
@@ -28,10 +24,6 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
-
-// Legacy pass manager boilerplate.
-Pass *createWarnMissedTransformationsPass();
-void initializeWarnMissedTransformationsLegacyPass(PassRegistry &);
} // end namespace llvm
#endif // LLVM_TRANSFORMS_SCALAR_WARNMISSEDTRANSFORMS_H
diff --git a/llvm/include/llvm/Transforms/Utils.h b/llvm/include/llvm/Transforms/Utils.h
index 5abfb2cceb58..0fa6de3f6713 100644
--- a/llvm/include/llvm/Transforms/Utils.h
+++ b/llvm/include/llvm/Transforms/Utils.h
@@ -21,11 +21,6 @@ class FunctionPass;
class Pass;
//===----------------------------------------------------------------------===//
-// createMetaRenamerPass - Rename everything with metasyntatic names.
-//
-ModulePass *createMetaRenamerPass();
-
-//===----------------------------------------------------------------------===//
//
// LowerInvoke - This pass removes invoke instructions, converting them to call
// instructions.
@@ -35,13 +30,6 @@ extern char &LowerInvokePassID;
//===----------------------------------------------------------------------===//
//
-// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
-//
-FunctionPass *createInstructionNamerPass();
-extern char &InstructionNamerID;
-
-//===----------------------------------------------------------------------===//
-//
// LowerSwitch - This pass converts SwitchInst instructions into a sequence of
// chained binary branch instructions.
//
@@ -72,11 +60,6 @@ extern char &LCSSAID;
//===----------------------------------------------------------------------===//
//
-// AddDiscriminators - Add DWARF path discriminators to the IR.
-FunctionPass *createAddDiscriminatorsPass();
-
-//===----------------------------------------------------------------------===//
-//
// PromoteMemoryToRegister - This pass is used to promote memory references to
// be register references. A simple example of the transformation performed by
// this pass is:
@@ -87,7 +70,7 @@ FunctionPass *createAddDiscriminatorsPass();
// %Y = load i32* %X
// ret i32 %Y
//
-FunctionPass *createPromoteMemoryToRegisterPass();
+FunctionPass *createPromoteMemoryToRegisterPass(bool IsForced = false);
//===----------------------------------------------------------------------===//
//
@@ -100,17 +83,6 @@ FunctionPass *createPromoteMemoryToRegisterPass();
Pass *createLoopSimplifyPass();
extern char &LoopSimplifyID;
-/// This function returns a new pass that downgrades the debug info in the
-/// module to line tables only.
-ModulePass *createStripNonLineTableDebugLegacyPass();
-
-//===----------------------------------------------------------------------===//
-//
-// InjectTLIMappingsLegacy - populates the VFABI attribute with the
-// scalar-to-vector mappings from the TargetLibraryInfo.
-//
-FunctionPass *createInjectTLIMappingsLegacyPass();
-
//===----------------------------------------------------------------------===//
//
// UnifyLoopExits - For each loop, creates a new block N such that all exiting
@@ -128,13 +100,6 @@ FunctionPass *createFixIrreduciblePass();
//===----------------------------------------------------------------------===//
//
-// AssumeSimplify - remove redundant assumes and merge assumes in the same
-// BasicBlock when possible.
-//
-FunctionPass *createAssumeSimplifyPass();
-
-//===----------------------------------------------------------------------===//
-//
// CanonicalizeFreezeInLoops - Canonicalize freeze instructions in loops so they
// don't block SCEV.
//
diff --git a/llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h b/llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
index 65dbf47e9bbc..55e92c37a167 100644
--- a/llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
+++ b/llvm/include/llvm/Transforms/Utils/AMDGPUEmitPrintf.h
@@ -18,7 +18,8 @@
namespace llvm {
-Value *emitAMDGPUPrintfCall(IRBuilder<> &Builder, ArrayRef<Value *> Args);
+Value *emitAMDGPUPrintfCall(IRBuilder<> &Builder, ArrayRef<Value *> Args,
+ bool isBuffered);
} // end namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
index 991ecb8efbd0..7c3c8eb8d6a4 100644
--- a/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
+++ b/llvm/include/llvm/Transforms/Utils/AssumeBundleBuilder.h
@@ -22,7 +22,6 @@
namespace llvm {
class AssumeInst;
class Function;
-class FunctionPass;
class Instruction;
class AssumptionCache;
class DominatorTree;
@@ -40,7 +39,8 @@ AssumeInst *buildAssumeFromInst(Instruction *I);
/// become silently be invalid.
/// The DominatorTree can optionally be provided to enable cross-block
/// reasoning.
-void salvageKnowledge(Instruction *I, AssumptionCache *AC = nullptr,
+/// This returns if a change was made.
+bool salvageKnowledge(Instruction *I, AssumptionCache *AC = nullptr,
DominatorTree *DT = nullptr);
/// Build and return a new assume created from the provided knowledge
@@ -56,8 +56,6 @@ struct AssumeSimplifyPass : public PassInfoMixin<AssumeSimplifyPass> {
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
-FunctionPass *createAssumeSimplifyPass();
-
/// This pass will try to build an llvm.assume for every instruction in the
/// function. Its main purpose is testing.
struct AssumeBuilderPass : public PassInfoMixin<AssumeBuilderPass> {
diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index c97baaf4afc2..1c528a0100da 100644
--- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -32,6 +32,7 @@ class BlockFrequencyInfo;
class BranchProbabilityInfo;
class DomTreeUpdater;
class Function;
+class IRBuilderBase;
class LoopInfo;
class MDNode;
class MemoryDependenceResults;
@@ -415,42 +416,22 @@ ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
/// UnreachableInst, otherwise it branches to Tail.
/// Returns the NewBasicBlock's terminator.
///
-/// Updates DT and LI if given.
-///
-/// FIXME: deprecated, switch to the DomTreeUpdater-based one.
+/// Updates DTU and LI if given.
Instruction *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
- bool Unreachable, MDNode *BranchWeights,
- DominatorTree *DT,
+ bool Unreachable,
+ MDNode *BranchWeights = nullptr,
+ DomTreeUpdater *DTU = nullptr,
LoopInfo *LI = nullptr,
BasicBlock *ThenBlock = nullptr);
-/// Split the containing block at the specified instruction - everything before
-/// SplitBefore stays in the old basic block, and the rest of the instructions
-/// in the BB are moved to a new block. The two blocks are connected by a
-/// conditional branch (with value of Cmp being the condition).
-/// Before:
-/// Head
-/// SplitBefore
-/// Tail
-/// After:
-/// Head
-/// if (Cond)
-/// ThenBlock
-/// SplitBefore
-/// Tail
-///
-/// If \p ThenBlock is not specified, a new block will be created for it.
-/// If \p Unreachable is true, the newly created block will end with
-/// UnreachableInst, otherwise it branches to Tail.
-/// Returns the NewBasicBlock's terminator.
-///
-/// Updates DT and LI if given.
-Instruction *SplitBlockAndInsertIfThen(Value *Cond, Instruction *SplitBefore,
+/// Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false
+/// path of the branch.
+Instruction *SplitBlockAndInsertIfElse(Value *Cond, Instruction *SplitBefore,
bool Unreachable,
MDNode *BranchWeights = nullptr,
DomTreeUpdater *DTU = nullptr,
LoopInfo *LI = nullptr,
- BasicBlock *ThenBlock = nullptr);
+ BasicBlock *ElseBlock = nullptr);
/// SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen,
/// but also creates the ElseBlock.
@@ -472,7 +453,75 @@ void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
Instruction **ThenTerm,
Instruction **ElseTerm,
MDNode *BranchWeights = nullptr,
- DomTreeUpdater *DTU = nullptr);
+ DomTreeUpdater *DTU = nullptr,
+ LoopInfo *LI = nullptr);
+
+/// Split the containing block at the specified instruction - everything before
+/// SplitBefore stays in the old basic block, and the rest of the instructions
+/// in the BB are moved to a new block. The two blocks are connected by a
+/// conditional branch (with value of Cmp being the condition).
+/// Before:
+/// Head
+/// SplitBefore
+/// Tail
+/// After:
+/// Head
+/// if (Cond)
+/// TrueBlock
+/// else
+//// FalseBlock
+/// SplitBefore
+/// Tail
+///
+/// If \p ThenBlock is null, the resulting CFG won't contain the TrueBlock. If
+/// \p ThenBlock is non-null and points to non-null BasicBlock pointer, that
+/// block will be inserted as the TrueBlock. Otherwise a new block will be
+/// created. Likewise for the \p ElseBlock parameter.
+/// If \p UnreachableThen or \p UnreachableElse is true, the corresponding newly
+/// created blocks will end with UnreachableInst, otherwise with branches to
+/// Tail. The function will not modify existing basic blocks passed to it. The
+/// caller must ensure that Tail is reachable from Head.
+/// Returns the newly created blocks in \p ThenBlock and \p ElseBlock.
+/// Updates DTU and LI if given.
+void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
+ BasicBlock **ThenBlock,
+ BasicBlock **ElseBlock,
+ bool UnreachableThen = false,
+ bool UnreachableElse = false,
+ MDNode *BranchWeights = nullptr,
+ DomTreeUpdater *DTU = nullptr,
+ LoopInfo *LI = nullptr);
+
+/// Insert a for (int i = 0; i < End; i++) loop structure (with the exception
+/// that \p End is assumed > 0, and thus not checked on entry) at \p
+/// SplitBefore. Returns the first insert point in the loop body, and the
+/// PHINode for the induction variable (i.e. "i" above).
+std::pair<Instruction*, Value*>
+SplitBlockAndInsertSimpleForLoop(Value *End, Instruction *SplitBefore);
+
+/// Utility function for performing a given action on each lane of a vector
+/// with \p EC elements. To simplify porting legacy code, this defaults to
+/// unrolling the implied loop for non-scalable element counts, but this is
+/// not considered to be part of the contract of this routine, and is
+/// expected to change in the future. The callback takes as arguments an
+/// IRBuilder whose insert point is correctly set for instantiating the
+/// given index, and a value which is (at runtime) the index to access.
+/// This index *may* be a constant.
+void SplitBlockAndInsertForEachLane(ElementCount EC, Type *IndexTy,
+ Instruction *InsertBefore,
+ std::function<void(IRBuilderBase&, Value*)> Func);
+
+/// Utility function for performing a given action on each lane of a vector
+/// with \p EVL effective length. EVL is assumed > 0. To simplify porting legacy
+/// code, this defaults to unrolling the implied loop for non-scalable element
+/// counts, but this is not considered to be part of the contract of this
+/// routine, and is expected to change in the future. The callback takes as
+/// arguments an IRBuilder whose insert point is correctly set for instantiating
+/// the given index, and a value which is (at runtime) the index to access. This
+/// index *may* be a constant.
+void SplitBlockAndInsertForEachLane(
+ Value *End, Instruction *InsertBefore,
+ std::function<void(IRBuilderBase &, Value *)> Func);
/// Check whether BB is the merge point of a if-region.
/// If so, return the branch instruction that determines which entry into
@@ -585,6 +634,10 @@ BasicBlock *CreateControlFlowHub(
const SetVector<BasicBlock *> &Successors, const StringRef Prefix,
std::optional<unsigned> MaxControlFlowBooleans = std::nullopt);
+// Utility function for inverting branch condition and for swapping its
+// successors
+void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder);
+
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H
diff --git a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
index bb1cb07342d4..429970cbe304 100644
--- a/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/BuildLibCalls.h
@@ -248,6 +248,21 @@ namespace llvm {
/// Emit a call to the calloc function.
Value *emitCalloc(Value *Num, Value *Size, IRBuilderBase &B,
const TargetLibraryInfo &TLI);
+
+ /// Emit a call to the hot/cold operator new function.
+ Value *emitHotColdNew(Value *Num, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI, LibFunc NewFunc,
+ uint8_t HotCold);
+ Value *emitHotColdNewNoThrow(Value *Num, Value *NoThrow, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI, LibFunc NewFunc,
+ uint8_t HotCold);
+ Value *emitHotColdNewAligned(Value *Num, Value *Align, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI, LibFunc NewFunc,
+ uint8_t HotCold);
+ Value *emitHotColdNewAlignedNoThrow(Value *Num, Value *Align, Value *NoThrow,
+ IRBuilderBase &B,
+ const TargetLibraryInfo *TLI,
+ LibFunc NewFunc, uint8_t HotCold);
}
#endif
diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index 77050391746b..1c342b871a4a 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -33,7 +33,6 @@ class AAResults;
class AllocaInst;
class BasicBlock;
class BlockFrequencyInfo;
-class CallGraph;
class DebugInfoFinder;
class DominatorTree;
class Function;
@@ -203,18 +202,15 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
class InlineFunctionInfo {
public:
explicit InlineFunctionInfo(
- CallGraph *cg = nullptr,
function_ref<AssumptionCache &(Function &)> GetAssumptionCache = nullptr,
ProfileSummaryInfo *PSI = nullptr,
BlockFrequencyInfo *CallerBFI = nullptr,
BlockFrequencyInfo *CalleeBFI = nullptr, bool UpdateProfile = true)
- : CG(cg), GetAssumptionCache(GetAssumptionCache), PSI(PSI),
- CallerBFI(CallerBFI), CalleeBFI(CalleeBFI),
- UpdateProfile(UpdateProfile) {}
+ : GetAssumptionCache(GetAssumptionCache), PSI(PSI), CallerBFI(CallerBFI),
+ CalleeBFI(CalleeBFI), UpdateProfile(UpdateProfile) {}
/// If non-null, InlineFunction will update the callgraph to reflect the
/// changes it makes.
- CallGraph *CG;
function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
ProfileSummaryInfo *PSI;
BlockFrequencyInfo *CallerBFI, *CalleeBFI;
@@ -286,7 +282,7 @@ Loop *cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,
SmallVectorImpl<BasicBlock *> &Blocks);
/// Remaps instructions in \p Blocks using the mapping in \p VMap.
-void remapInstructionsInBlocks(const SmallVectorImpl<BasicBlock *> &Blocks,
+void remapInstructionsInBlocks(ArrayRef<BasicBlock *> Blocks,
ValueToValueMapTy &VMap);
/// Split edge between BB and PredBB and duplicate all non-Phi instructions
diff --git a/llvm/include/llvm/Transforms/Utils/CountVisits.h b/llvm/include/llvm/Transforms/Utils/CountVisits.h
new file mode 100644
index 000000000000..7000afbc4985
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/CountVisits.h
@@ -0,0 +1,28 @@
+//===- CountVisits.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_COUNT_VISITS_H
+#define LLVM_TRANSFORMS_UTILS_COUNT_VISITS_H
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+struct CountVisitsPass : PassInfoMixin<CountVisitsPass> {
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+
+private:
+ StringMap<uint32_t> Counts;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_COUNT_VISITS_H
diff --git a/llvm/include/llvm/Transforms/Utils/Debugify.h b/llvm/include/llvm/Transforms/Utils/Debugify.h
index 24b9eeab6ee4..d4440942a64e 100644
--- a/llvm/include/llvm/Transforms/Utils/Debugify.h
+++ b/llvm/include/llvm/Transforms/Utils/Debugify.h
@@ -192,8 +192,8 @@ class DebugifyEachInstrumentation {
DebugifyStatsMap *DIStatsMap = nullptr;
public:
-
- void registerCallbacks(PassInstrumentationCallbacks &PIC);
+ void registerCallbacks(PassInstrumentationCallbacks &PIC,
+ ModuleAnalysisManager &MAM);
// Used within DebugifyMode::SyntheticDebugInfo mode.
void setDIStatsMap(DebugifyStatsMap &StatMap) { DIStatsMap = &StatMap; }
const DebugifyStatsMap &getDebugifyStatsMap() const { return *DIStatsMap; }
diff --git a/llvm/include/llvm/Transforms/Utils/Evaluator.h b/llvm/include/llvm/Transforms/Utils/Evaluator.h
index 6b9b382dbaf4..5d53773b5d6b 100644
--- a/llvm/include/llvm/Transforms/Utils/Evaluator.h
+++ b/llvm/include/llvm/Transforms/Utils/Evaluator.h
@@ -55,15 +55,15 @@ class Evaluator {
~MutableValue() { clear(); }
Type *getType() const {
- if (auto *C = Val.dyn_cast<Constant *>())
+ if (auto *C = dyn_cast_if_present<Constant *>(Val))
return C->getType();
- return Val.get<MutableAggregate *>()->Ty;
+ return cast<MutableAggregate *>(Val)->Ty;
}
Constant *toConstant() const {
- if (auto *C = Val.dyn_cast<Constant *>())
+ if (auto *C = dyn_cast_if_present<Constant *>(Val))
return C;
- return Val.get<MutableAggregate *>()->toConstant();
+ return cast<MutableAggregate *>(Val)->toConstant();
}
Constant *read(Type *Ty, APInt Offset, const DataLayout &DL) const;
diff --git a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
index b6b53d0f10cb..78761fc78fee 100644
--- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
+++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
@@ -332,7 +332,9 @@ private:
int cmpOrderings(AtomicOrdering L, AtomicOrdering R) const;
int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
int cmpAttrs(const AttributeList L, const AttributeList R) const;
- int cmpRangeMetadata(const MDNode *L, const MDNode *R) const;
+ int cmpMDNode(const MDNode *L, const MDNode *R) const;
+ int cmpMetadata(const Metadata *L, const Metadata *R) const;
+ int cmpInstMetadata(Instruction const *L, Instruction const *R) const;
int cmpOperandBundlesSchema(const CallBase &LCS, const CallBase &RCS) const;
/// Compare two GEPs for equivalent pointer arithmetic.
diff --git a/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h b/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
index d2ce0c5d3988..b6c402402b17 100644
--- a/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
+++ b/llvm/include/llvm/Transforms/Utils/InjectTLIMappings.h
@@ -14,8 +14,6 @@
#define LLVM_TRANSFORMS_UTILS_INJECTTLIMAPPINGS_H
#include "llvm/IR/PassManager.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
namespace llvm {
class Function;
@@ -24,16 +22,5 @@ public:
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
-// Legacy pass
-class InjectTLIMappingsLegacy : public FunctionPass {
-public:
- static char ID;
- InjectTLIMappingsLegacy() : FunctionPass(ID) {
- initializeInjectTLIMappingsLegacyPass(*PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
-};
-
} // End namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_INJECTTLIMAPPINGS_H
diff --git a/llvm/include/llvm/Transforms/Utils/InstructionWorklist.h b/llvm/include/llvm/Transforms/Utils/InstructionWorklist.h
index a318c2cd28bb..c8f20636965e 100644
--- a/llvm/include/llvm/Transforms/Utils/InstructionWorklist.h
+++ b/llvm/include/llvm/Transforms/Utils/InstructionWorklist.h
@@ -108,6 +108,17 @@ public:
push(cast<Instruction>(U));
}
+ /// Should be called *after* decrementing the use-count on V.
+ void handleUseCountDecrement(Value *V) {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ add(I);
+ // Many folds have one-use limitations. If there's only one use left,
+ // revisit that use.
+ if (I->hasOneUse())
+ add(cast<Instruction>(*I->user_begin()));
+ }
+ }
+
/// Check that the worklist is empty and nuke the backing store for the map.
void zap() {
assert(WorklistMap.empty() && "Worklist empty, but map not?");
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 75d2351e2df3..4578af069814 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -240,17 +240,17 @@ CallInst *changeToCall(InvokeInst *II, DomTreeUpdater *DTU = nullptr);
///
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
-/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
+/// that has an associated llvm.dbg.declare intrinsic.
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
StoreInst *SI, DIBuilder &Builder);
/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
-/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
+/// that has an associated llvm.dbg.declare intrinsic.
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
LoadInst *LI, DIBuilder &Builder);
/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
-/// llvm.dbg.declare or llvm.dbg.addr intrinsic.
+/// llvm.dbg.declare intrinsic.
void ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
PHINode *LI, DIBuilder &Builder);
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index d63bee6fa321..c6864124e0bc 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -76,10 +76,14 @@ bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
/// This function may introduce unused PHI nodes. If \p PHIsToRemove is not
/// nullptr, those are added to it (before removing, the caller has to check if
/// they still do not have any uses). Otherwise the PHIs are directly removed.
+///
+/// If \p InsertedPHIs is not nullptr, inserted phis will be added to this
+/// vector.
bool formLCSSAForInstructions(
SmallVectorImpl<Instruction *> &Worklist, const DominatorTree &DT,
- const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder,
- SmallVectorImpl<PHINode *> *PHIsToRemove = nullptr);
+ const LoopInfo &LI, ScalarEvolution *SE,
+ SmallVectorImpl<PHINode *> *PHIsToRemove = nullptr,
+ SmallVectorImpl<PHINode *> *InsertedPHIs = nullptr);
/// Put loop into LCSSA form.
///
@@ -116,10 +120,9 @@ public:
// Explicitly set limits.
SinkAndHoistLICMFlags(unsigned LicmMssaOptCap,
unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
- Loop *L = nullptr, MemorySSA *MSSA = nullptr);
+ Loop &L, MemorySSA &MSSA);
// Use default limits.
- SinkAndHoistLICMFlags(bool IsSink, Loop *L = nullptr,
- MemorySSA *MSSA = nullptr);
+ SinkAndHoistLICMFlags(bool IsSink, Loop &L, MemorySSA &MSSA);
void setIsSink(bool B) { IsSink = B; }
bool getIsSink() { return IsSink; }
@@ -175,6 +178,11 @@ bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool,
bool AllowSpeculation);
+/// Return true if the induction variable \p IV in a Loop whose latch is
+/// \p LatchBlock would become dead if the exit test \p Cond were removed.
+/// Conservatively returns false if analysis is insufficient.
+bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond);
+
/// This function deletes dead loops. The caller of this function needs to
/// guarantee that the loop is infact dead.
/// The function requires a bunch or prerequisites to be present:
@@ -349,6 +357,9 @@ bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
SinkAndHoistLICMFlags &LICMFlags,
OptimizationRemarkEmitter *ORE = nullptr);
+/// Returns the min/max intrinsic used when expanding a min/max reduction.
+Intrinsic::ID getMinMaxReductionIntrinsicOp(RecurKind RK);
+
/// Returns the comparison predicate used when expanding a min/max reduction.
CmpInst::Predicate getMinMaxReductionPredicate(RecurKind RK);
@@ -425,6 +436,14 @@ bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE);
/// loop \p L.
bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
ScalarEvolution &SE);
+/// Returns true if we can prove that \p S is defined and always positive in
+/// loop \p L.
+bool isKnownPositiveInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE);
+
+/// Returns true if we can prove that \p S is defined and always non-positive in
+/// loop \p L.
+bool isKnownNonPositiveInLoop(const SCEV *S, const Loop *L,
+ ScalarEvolution &SE);
/// Returns true if \p S is defined and never is equal to signed/unsigned max.
bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
diff --git a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
index d0c27d198ae4..314435324b47 100644
--- a/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
+++ b/llvm/include/llvm/Transforms/Utils/LowerMemIntrinsics.h
@@ -50,8 +50,9 @@ void createMemCpyLoopKnownSize(
void expandMemCpyAsLoop(MemCpyInst *MemCpy, const TargetTransformInfo &TTI,
ScalarEvolution *SE = nullptr);
-/// Expand \p MemMove as a loop. \p MemMove is not deleted.
-void expandMemMoveAsLoop(MemMoveInst *MemMove);
+/// Expand \p MemMove as a loop. \p MemMove is not deleted. Returns true if the
+/// memmove was lowered.
+bool expandMemMoveAsLoop(MemMoveInst *MemMove, const TargetTransformInfo &TTI);
/// Expand \p MemSet as a loop. \p MemSet is not deleted.
void expandMemSetAsLoop(MemSetInst *MemSet);
diff --git a/llvm/include/llvm/Transforms/Utils/MoveAutoInit.h b/llvm/include/llvm/Transforms/Utils/MoveAutoInit.h
new file mode 100644
index 000000000000..980b55f46f11
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/MoveAutoInit.h
@@ -0,0 +1,29 @@
+//===- MoveAutoInit.h - Move insts marked as auto-init Pass --*- C++ -*-======//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions marked as auto-init closer to their use if
+// profitable, generally because it moves them under a guard, potentially
+// skipping the overhead of the auto-init under some execution paths.
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H
+#define LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class MoveAutoInitPass : public PassInfoMixin<MoveAutoInitPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_MOVEAUTOINIT_H
diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
index 633052f1c15e..7930d95e1dea 100644
--- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
+++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
@@ -39,14 +39,6 @@ class TargetLibraryInfo;
class Value;
class ValueLatticeElement;
-/// Helper struct for bundling up the analysis results per function for IPSCCP.
-struct AnalysisResultsForFn {
- std::unique_ptr<PredicateInfo> PredInfo;
- DominatorTree *DT;
- PostDominatorTree *PDT;
- LoopInfo *LI;
-};
-
/// Helper struct shared between Function Specialization and SCCP Solver.
struct ArgInfo {
Argument *Formal; // The Formal argument being analysed.
@@ -82,7 +74,7 @@ public:
~SCCPSolver();
- void addAnalysis(Function &F, AnalysisResultsForFn A);
+ void addPredicateInfo(Function &F, DominatorTree &DT, AssumptionCache &AC);
/// markBlockExecutable - This method can be used by clients to mark all of
/// the blocks that are known to be intrinsically live in the processed unit.
@@ -91,10 +83,6 @@ public:
const PredicateBase *getPredicateInfoFor(Instruction *I);
- const LoopInfo &getLoopInfo(Function &F);
-
- DomTreeUpdater getDTU(Function &F);
-
/// trackValueOfGlobalVariable - Clients can use this method to
/// inform the SCCPSolver that it should track loads and stores to the
/// specified global variable if it can. This is only legal to call if
@@ -132,6 +120,8 @@ public:
void solveWhileResolvedUndefsIn(SmallVectorImpl<Function *> &WorkList);
+ void solveWhileResolvedUndefs();
+
bool isBlockExecutable(BasicBlock *BB) const;
// isEdgeFeasible - Return true if the control flow edge from the 'From' basic
@@ -142,6 +132,10 @@ public:
void removeLatticeValueFor(Value *V);
+ /// Invalidate the Lattice Value of \p Call and its users after specializing
+ /// the call. Then recompute it.
+ void resetLatticeValueFor(CallBase *Call);
+
const ValueLatticeElement &getLatticeValueFor(Value *V) const;
/// getTrackedRetVals - Get the inferred return value map.
@@ -166,19 +160,20 @@ public:
/// Helper to return a Constant if \p LV is either a constant or a constant
/// range with a single element.
- Constant *getConstant(const ValueLatticeElement &LV) const;
+ Constant *getConstant(const ValueLatticeElement &LV, Type *Ty) const;
+
+ /// Return either a Constant or nullptr for a given Value.
+ Constant *getConstantOrNull(Value *V) const;
/// Return a reference to the set of argument tracked functions.
SmallPtrSetImpl<Function *> &getArgumentTrackedFunctions();
- /// Mark the constant arguments of a new function specialization. \p F points
- /// to the cloned function and \p Args contains a list of constant arguments
- /// represented as pairs of {formal,actual} values (the formal argument is
- /// associated with the original function definition). All other arguments of
- /// the specialization inherit the lattice state of their corresponding values
- /// in the original function.
- void markArgInFuncSpecialization(Function *F,
- const SmallVectorImpl<ArgInfo> &Args);
+ /// Set the Lattice Value for the arguments of a specialization \p F.
+ /// If an argument is Constant then its lattice value is marked with the
+ /// corresponding actual argument in \p Args. Otherwise, its lattice value
+ /// is inherited (copied) from the corresponding formal argument in \p Args.
+ void setLatticeValueForSpecializationArguments(Function *F,
+ const SmallVectorImpl<ArgInfo> &Args);
/// Mark all of the blocks in function \p F non-executable. Clients can used
/// this method to erase a function from the module (e.g., if it has been
diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
index c233e3dc168e..36fbf536f6d0 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -28,6 +28,7 @@ template <typename T> class SSAUpdaterTraits;
class Type;
class Use;
class Value;
+class DbgValueInst;
/// Helper class for SSA formation on a set of values defined in
/// multiple blocks.
@@ -114,6 +115,15 @@ public:
/// be below it.
void RewriteUse(Use &U);
+ /// Rewrite debug value intrinsics to conform to a new SSA form.
+ ///
+ /// This will scout out all the debug value instrinsics associated with
+ /// the instruction. Anything outside of its block will have its
+ /// value set to the new SSA value if available, and undef if not.
+ void UpdateDebugValues(Instruction *I);
+ void UpdateDebugValues(Instruction *I,
+ SmallVectorImpl<DbgValueInst *> &DbgValues);
+
/// Rewrite a use like \c RewriteUse but handling in-block definitions.
///
/// This version of the method can rewrite uses in the same block as
@@ -123,6 +133,7 @@ public:
private:
Value *GetValueAtEndOfBlockInternal(BasicBlock *BB);
+ void UpdateDebugValue(Instruction *I, DbgValueInst *DbgValue);
};
/// Helper class for promoting a collection of loads and stores into SSA
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
index fbfd25f8d81d..e9bc3d18bdcb 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -18,30 +18,8 @@
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Instruction.h"
-#include "llvm/IR/Instructions.h"
-
namespace llvm {
-class Function;
-class MachineBasicBlock;
-class MachineFunction;
-
-namespace afdo_detail {
-
-template <class BlockT> struct TypeMap {};
-template <> struct TypeMap<BasicBlock> {
- using BasicBlockT = BasicBlock;
- using FunctionT = Function;
-};
-template <> struct TypeMap<MachineBasicBlock> {
- using BasicBlockT = MachineBasicBlock;
- using FunctionT = MachineFunction;
-};
-
-} // end namespace afdo_detail
-
struct FlowJump;
/// A wrapper of a binary basic block.
@@ -138,10 +116,11 @@ void applyFlowInference(const ProfiParams &Params, FlowFunction &Func);
void applyFlowInference(FlowFunction &Func);
/// Sample profile inference pass.
-template <typename BT> class SampleProfileInference {
+template <typename FT> class SampleProfileInference {
public:
- using BasicBlockT = typename afdo_detail::TypeMap<BT>::BasicBlockT;
- using FunctionT = typename afdo_detail::TypeMap<BT>::FunctionT;
+ using NodeRef = typename GraphTraits<FT *>::NodeRef;
+ using BasicBlockT = typename std::remove_pointer<NodeRef>::type;
+ using FunctionT = FT;
using Edge = std::pair<const BasicBlockT *, const BasicBlockT *>;
using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>;
using EdgeWeightMap = DenseMap<Edge, uint64_t>;
@@ -157,9 +136,9 @@ public:
private:
/// Initialize flow function blocks, jumps and misc metadata.
- void initFunction(FlowFunction &Func,
- const std::vector<const BasicBlockT *> &BasicBlocks,
- DenseMap<const BasicBlockT *, uint64_t> &BlockIndex);
+ FlowFunction
+ createFlowFunction(const std::vector<const BasicBlockT *> &BasicBlocks,
+ DenseMap<const BasicBlockT *, uint64_t> &BlockIndex);
/// Try to infer branch probabilities mimicking implementation of
/// BranchProbabilityInfo. Unlikely taken branches are marked so that the
@@ -228,8 +207,7 @@ void SampleProfileInference<BT>::apply(BlockWeightMap &BlockWeights,
}
// Create necessary objects
- FlowFunction Func;
- initFunction(Func, BasicBlocks, BlockIndex);
+ FlowFunction Func = createFlowFunction(BasicBlocks, BlockIndex);
// Create and apply the inference network model.
applyFlowInference(Func);
@@ -261,9 +239,10 @@ void SampleProfileInference<BT>::apply(BlockWeightMap &BlockWeights,
}
template <typename BT>
-void SampleProfileInference<BT>::initFunction(
- FlowFunction &Func, const std::vector<const BasicBlockT *> &BasicBlocks,
+FlowFunction SampleProfileInference<BT>::createFlowFunction(
+ const std::vector<const BasicBlockT *> &BasicBlocks,
DenseMap<const BasicBlockT *, uint64_t> &BlockIndex) {
+ FlowFunction Func;
Func.Blocks.reserve(BasicBlocks.size());
// Create FlowBlocks
for (const auto *BB : BasicBlocks) {
@@ -314,6 +293,8 @@ void SampleProfileInference<BT>::initFunction(
EntryBlock.Weight = 1;
EntryBlock.HasUnknownWeight = false;
}
+
+ return Func;
}
template <typename BT>
@@ -321,40 +302,10 @@ inline void SampleProfileInference<BT>::findUnlikelyJumps(
const std::vector<const BasicBlockT *> &BasicBlocks,
BlockEdgeMap &Successors, FlowFunction &Func) {}
-template <>
-inline void SampleProfileInference<BasicBlock>::findUnlikelyJumps(
- const std::vector<const BasicBlockT *> &BasicBlocks,
- BlockEdgeMap &Successors, FlowFunction &Func) {
- for (auto &Jump : Func.Jumps) {
- const auto *BB = BasicBlocks[Jump.Source];
- const auto *Succ = BasicBlocks[Jump.Target];
- const Instruction *TI = BB->getTerminator();
- // Check if a block ends with InvokeInst and mark non-taken branch unlikely.
- // In that case block Succ should be a landing pad
- if (Successors[BB].size() == 2 && Successors[BB].back() == Succ) {
- if (isa<InvokeInst>(TI)) {
- Jump.IsUnlikely = true;
- }
- }
- const Instruction *SuccTI = Succ->getTerminator();
- // Check if the target block contains UnreachableInst and mark it unlikely
- if (SuccTI->getNumSuccessors() == 0) {
- if (isa<UnreachableInst>(SuccTI)) {
- Jump.IsUnlikely = true;
- }
- }
- }
-}
-
template <typename BT>
inline bool SampleProfileInference<BT>::isExit(const BasicBlockT *BB) {
return BB->succ_empty();
}
-template <>
-inline bool SampleProfileInference<BasicBlock>::isExit(const BasicBlock *BB) {
- return succ_empty(BB);
-}
-
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_SAMPLEPROFILEINFERENCE_H
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index 19aef8050d44..1c6ba530e3df 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -33,6 +34,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/Support/CommandLine.h"
@@ -46,6 +48,10 @@ using namespace sampleprof;
using namespace sampleprofutil;
using ProfileCount = Function::ProfileCount;
+namespace vfs {
+class FileSystem;
+} // namespace vfs
+
#define DEBUG_TYPE "sample-profile-impl"
namespace afdo_detail {
@@ -75,14 +81,66 @@ template <> struct IRTraits<BasicBlock> {
} // end namespace afdo_detail
+// This class serves sample counts correlation for SampleProfileLoader by
+// analyzing pseudo probes and their function descriptors injected by
+// SampleProfileProber.
+class PseudoProbeManager {
+ DenseMap<uint64_t, PseudoProbeDescriptor> GUIDToProbeDescMap;
+
+ const PseudoProbeDescriptor *getDesc(const Function &F) const {
+ auto I = GUIDToProbeDescMap.find(
+ Function::getGUID(FunctionSamples::getCanonicalFnName(F)));
+ return I == GUIDToProbeDescMap.end() ? nullptr : &I->second;
+ }
+
+public:
+ PseudoProbeManager(const Module &M) {
+ if (NamedMDNode *FuncInfo =
+ M.getNamedMetadata(PseudoProbeDescMetadataName)) {
+ for (const auto *Operand : FuncInfo->operands()) {
+ const auto *MD = cast<MDNode>(Operand);
+ auto GUID = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))
+ ->getZExtValue();
+ auto Hash = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))
+ ->getZExtValue();
+ GUIDToProbeDescMap.try_emplace(GUID, PseudoProbeDescriptor(GUID, Hash));
+ }
+ }
+ }
+
+ bool moduleIsProbed(const Module &M) const {
+ return M.getNamedMetadata(PseudoProbeDescMetadataName);
+ }
+
+ bool profileIsValid(const Function &F, const FunctionSamples &Samples) const {
+ const auto *Desc = getDesc(F);
+ if (!Desc) {
+ LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function "
+ << F.getName() << "\n");
+ return false;
+ }
+ if (Desc->getFunctionHash() != Samples.getFunctionHash()) {
+ LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName()
+ << "\n");
+ return false;
+ }
+ return true;
+ }
+};
+
+
+
extern cl::opt<bool> SampleProfileUseProfi;
-template <typename BT> class SampleProfileLoaderBaseImpl {
+template <typename FT> class SampleProfileLoaderBaseImpl {
public:
- SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
- : Filename(Name), RemappingFilename(RemapName) {}
+ SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS)
+ : Filename(Name), RemappingFilename(RemapName), FS(std::move(FS)) {}
void dump() { Reader->dump(); }
+ using NodeRef = typename GraphTraits<FT *>::NodeRef;
+ using BT = typename std::remove_pointer<NodeRef>::type;
using InstructionT = typename afdo_detail::IRTraits<BT>::InstructionT;
using BasicBlockT = typename afdo_detail::IRTraits<BT>::BasicBlockT;
using BlockFrequencyInfoT =
@@ -131,6 +189,7 @@ protected:
unsigned getFunctionLoc(FunctionT &Func);
virtual ErrorOr<uint64_t> getInstWeight(const InstructionT &Inst);
ErrorOr<uint64_t> getInstWeightImpl(const InstructionT &Inst);
+ virtual ErrorOr<uint64_t> getProbeWeight(const InstructionT &Inst);
ErrorOr<uint64_t> getBlockWeight(const BasicBlockT *BB);
mutable DenseMap<const DILocation *, const FunctionSamples *>
DILocation2SampleMap;
@@ -206,6 +265,9 @@ protected:
/// Profile reader object.
std::unique_ptr<SampleProfileReader> Reader;
+ // A pseudo probe helper to correlate the imported sample counts.
+ std::unique_ptr<PseudoProbeManager> ProbeManager;
+
/// Samples collected for the body of this function.
FunctionSamples *Samples = nullptr;
@@ -215,6 +277,9 @@ protected:
/// Name of the profile remapping file to load.
std::string RemappingFilename;
+ /// VirtualFileSystem to load profile files from.
+ IntrusiveRefCntPtr<vfs::FileSystem> FS;
+
/// Profile Summary Info computed from sample profile.
ProfileSummaryInfo *PSI = nullptr;
@@ -290,6 +355,8 @@ void SampleProfileLoaderBaseImpl<BT>::printBlockWeight(
template <typename BT>
ErrorOr<uint64_t>
SampleProfileLoaderBaseImpl<BT>::getInstWeight(const InstructionT &Inst) {
+ if (FunctionSamples::ProfileIsProbeBased)
+ return getProbeWeight(Inst);
return getInstWeightImpl(Inst);
}
@@ -337,6 +404,65 @@ SampleProfileLoaderBaseImpl<BT>::getInstWeightImpl(const InstructionT &Inst) {
return R;
}
+// Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
+// of non-probe instruction. So if all instructions of the BB give error_code,
+// tell the inference algorithm to infer the BB weight.
+template <typename BT>
+ErrorOr<uint64_t>
+SampleProfileLoaderBaseImpl<BT>::getProbeWeight(const InstructionT &Inst) {
+ assert(FunctionSamples::ProfileIsProbeBased &&
+ "Profile is not pseudo probe based");
+ std::optional<PseudoProbe> Probe = extractProbe(Inst);
+ // Ignore the non-probe instruction. If none of the instruction in the BB is
+ // probe, we choose to infer the BB's weight.
+ if (!Probe)
+ return std::error_code();
+
+ const FunctionSamples *FS = findFunctionSamples(Inst);
+ // If none of the instruction has FunctionSample, we choose to return zero
+ // value sample to indicate the BB is cold. This could happen when the
+ // instruction is from inlinee and no profile data is found.
+ // FIXME: This should not be affected by the source drift issue as 1) if the
+ // newly added function is top-level inliner, it won't match the CFG checksum
+ // in the function profile or 2) if it's the inlinee, the inlinee should have
+ // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
+ // we can improve it by adding a switch for profile-sample-block-accurate for
+ // block level counts in the future.
+ if (!FS)
+ return 0;
+
+ auto R = FS->findSamplesAt(Probe->Id, Probe->Discriminator);
+ if (R) {
+ uint64_t Samples = R.get() * Probe->Factor;
+ bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
+ if (FirstMark) {
+ ORE->emit([&]() {
+ OptRemarkAnalysisT Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
+ Remark << "Applied " << ore::NV("NumSamples", Samples);
+ Remark << " samples from profile (ProbeId=";
+ Remark << ore::NV("ProbeId", Probe->Id);
+ if (Probe->Discriminator) {
+ Remark << ".";
+ Remark << ore::NV("Discriminator", Probe->Discriminator);
+ }
+ Remark << ", Factor=";
+ Remark << ore::NV("Factor", Probe->Factor);
+ Remark << ", OriginalSamples=";
+ Remark << ore::NV("OriginalSamples", R.get());
+ Remark << ")";
+ return Remark;
+ });
+ }
+ LLVM_DEBUG({dbgs() << " " << Probe->Id;
+ if (Probe->Discriminator)
+ dbgs() << "." << Probe->Discriminator;
+ dbgs() << ":" << Inst << " - weight: " << R.get()
+ << " - factor: " << format("%0.2f", Probe->Factor) << ")\n";});
+ return Samples;
+ }
+ return R;
+}
+
/// Compute the weight of a basic block.
///
/// The weight of basic block \p BB is the maximum weight of all the
@@ -805,11 +931,11 @@ void SampleProfileLoaderBaseImpl<BT>::propagateWeights(FunctionT &F) {
}
}
-template <typename BT>
-void SampleProfileLoaderBaseImpl<BT>::applyProfi(
+template <typename FT>
+void SampleProfileLoaderBaseImpl<FT>::applyProfi(
FunctionT &F, BlockEdgeMap &Successors, BlockWeightMap &SampleBlockWeights,
BlockWeightMap &BlockWeights, EdgeWeightMap &EdgeWeights) {
- auto Infer = SampleProfileInference<BT>(F, Successors, SampleBlockWeights);
+ auto Infer = SampleProfileInference<FT>(F, Successors, SampleBlockWeights);
Infer.apply(BlockWeights, EdgeWeights);
}
@@ -989,18 +1115,6 @@ unsigned SampleProfileLoaderBaseImpl<BT>::getFunctionLoc(FunctionT &F) {
return 0;
}
-template <typename BT>
-void SampleProfileLoaderBaseImpl<BT>::computeDominanceAndLoopInfo(
- FunctionT &F) {
- DT.reset(new DominatorTree);
- DT->recalculate(F);
-
- PDT.reset(new PostDominatorTree(F));
-
- LI.reset(new LoopInfo);
- LI->analyze(*DT);
-}
-
#undef DEBUG_TYPE
} // namespace llvm
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 131e24f685e8..02b9e51f354a 100644
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -440,9 +440,7 @@ private:
/// Expand a SCEVAddExpr with a pointer type into a GEP instead of using
/// ptrtoint+arithmetic+inttoptr.
- Value *expandAddToGEP(const SCEV *const *op_begin, const SCEV *const *op_end,
- PointerType *PTy, Type *Ty, Value *V);
- Value *expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, Value *V);
+ Value *expandAddToGEP(const SCEV *Op, Type *Ty, Value *V);
/// Find a previous Value in ExprValueMap for expand.
Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt);
@@ -457,6 +455,8 @@ private:
Value *visitConstant(const SCEVConstant *S) { return S->getValue(); }
+ Value *visitVScale(const SCEVVScale *S);
+
Value *visitPtrToIntExpr(const SCEVPtrToIntExpr *S);
Value *visitTruncateExpr(const SCEVTruncateExpr *S);
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
index 7af879638a4d..8008fc6e8422 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
@@ -29,7 +29,7 @@ struct SimplifyCFGOptions {
bool HoistCommonInsts = false;
bool SinkCommonInsts = false;
bool SimplifyCondBranch = true;
- bool FoldTwoEntryPHINode = true;
+ bool SpeculateBlocks = true;
AssumptionCache *AC = nullptr;
@@ -71,8 +71,8 @@ struct SimplifyCFGOptions {
return *this;
}
- SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) {
- FoldTwoEntryPHINode = B;
+ SimplifyCFGOptions &speculateBlocks(bool B) {
+ SpeculateBlocks = B;
return *this;
}
};
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 90a5ffc5b4fa..eb10545ee149 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -18,6 +18,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
namespace llvm {
+class AssumptionCache;
class StringRef;
class Value;
class CallInst;
@@ -102,6 +103,7 @@ private:
FortifiedLibCallSimplifier FortifiedSimplifier;
const DataLayout &DL;
const TargetLibraryInfo *TLI;
+ AssumptionCache *AC;
OptimizationRemarkEmitter &ORE;
BlockFrequencyInfo *BFI;
ProfileSummaryInfo *PSI;
@@ -134,9 +136,9 @@ private:
public:
LibCallSimplifier(
- const DataLayout &DL, const TargetLibraryInfo *TLI,
- OptimizationRemarkEmitter &ORE,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI, AssumptionCache *AC,
+ OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI,
function_ref<void(Instruction *, Value *)> Replacer =
&replaceAllUsesWithDefault,
function_ref<void(Instruction *)> Eraser = &eraseFromParentDefault);
@@ -181,6 +183,7 @@ private:
Value *optimizeMemMove(CallInst *CI, IRBuilderBase &B);
Value *optimizeMemSet(CallInst *CI, IRBuilderBase &B);
Value *optimizeRealloc(CallInst *CI, IRBuilderBase &B);
+ Value *optimizeNew(CallInst *CI, IRBuilderBase &B, LibFunc &Func);
Value *optimizeWcslen(CallInst *CI, IRBuilderBase &B);
Value *optimizeBCopy(CallInst *CI, IRBuilderBase &B);
@@ -198,7 +201,7 @@ private:
Value *optimizeFMinFMax(CallInst *CI, IRBuilderBase &B);
Value *optimizeLog(CallInst *CI, IRBuilderBase &B);
Value *optimizeSqrt(CallInst *CI, IRBuilderBase &B);
- Value *optimizeSinCosPi(CallInst *CI, IRBuilderBase &B);
+ Value *optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B);
Value *optimizeTan(CallInst *CI, IRBuilderBase &B);
// Wrapper for all floating point library call optimizations
Value *optimizeFloatingPointLibCall(CallInst *CI, LibFunc Func,
diff --git a/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/llvm/include/llvm/Transforms/Utils/SizeOpts.h
index aa9e9bd6c69b..a9e72768f81e 100644
--- a/llvm/include/llvm/Transforms/Utils/SizeOpts.h
+++ b/llvm/include/llvm/Transforms/Utils/SizeOpts.h
@@ -47,7 +47,7 @@ static inline bool isPGSOColdCodeOnly(ProfileSummaryInfo *PSI) {
(PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize());
}
-template<typename AdapterT, typename FuncT, typename BFIT>
+template <typename FuncT, typename BFIT>
bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI,
BFIT *BFI, PGSOQueryType QueryType) {
assert(F);
@@ -58,19 +58,20 @@ bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI,
if (!EnablePGSO)
return false;
if (isPGSOColdCodeOnly(PSI))
- return AdapterT::isFunctionColdInCallGraph(F, PSI, *BFI);
+ return PSI->isFunctionColdInCallGraph(F, *BFI);
if (PSI->hasSampleProfile())
// The "isCold" check seems to work better for Sample PGO as it could have
// many profile-unannotated functions.
- return AdapterT::isFunctionColdInCallGraphNthPercentile(
- PgsoCutoffSampleProf, F, PSI, *BFI);
- return !AdapterT::isFunctionHotInCallGraphNthPercentile(PgsoCutoffInstrProf,
- F, PSI, *BFI);
+ return PSI->isFunctionColdInCallGraphNthPercentile(PgsoCutoffSampleProf, F,
+ *BFI);
+ return !PSI->isFunctionHotInCallGraphNthPercentile(PgsoCutoffInstrProf, F,
+ *BFI);
}
-template<typename AdapterT, typename BlockTOrBlockFreq, typename BFIT>
-bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq, ProfileSummaryInfo *PSI,
- BFIT *BFI, PGSOQueryType QueryType) {
+template <typename BlockTOrBlockFreq, typename BFIT>
+bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq,
+ ProfileSummaryInfo *PSI, BFIT *BFI,
+ PGSOQueryType QueryType) {
if (!PSI || !BFI || !PSI->hasProfileSummary())
return false;
if (ForcePGSO)
@@ -78,14 +79,13 @@ bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq, ProfileSummaryIn
if (!EnablePGSO)
return false;
if (isPGSOColdCodeOnly(PSI))
- return AdapterT::isColdBlock(BBOrBlockFreq, PSI, BFI);
+ return PSI->isColdBlock(BBOrBlockFreq, BFI);
if (PSI->hasSampleProfile())
// The "isCold" check seems to work better for Sample PGO as it could have
// many profile-unannotated functions.
- return AdapterT::isColdBlockNthPercentile(PgsoCutoffSampleProf,
- BBOrBlockFreq, PSI, BFI);
- return !AdapterT::isHotBlockNthPercentile(PgsoCutoffInstrProf, BBOrBlockFreq,
- PSI, BFI);
+ return PSI->isColdBlockNthPercentile(PgsoCutoffSampleProf, BBOrBlockFreq,
+ BFI);
+ return !PSI->isHotBlockNthPercentile(PgsoCutoffInstrProf, BBOrBlockFreq, BFI);
}
/// Returns true if function \p F is suggested to be size-optimized based on the
diff --git a/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h b/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h
index 1c6f90faf2b4..80dc6a42d931 100644
--- a/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h
+++ b/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h
@@ -114,9 +114,6 @@ private:
} // end namespace SymbolRewriter
-ModulePass *createRewriteSymbolsPass();
-ModulePass *createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &);
-
class RewriteSymbolPass : public PassInfoMixin<RewriteSymbolPass> {
public:
RewriteSymbolPass() { loadAndParseMapFiles(); }
diff --git a/llvm/include/llvm/Transforms/Utils/VNCoercion.h b/llvm/include/llvm/Transforms/Utils/VNCoercion.h
index 1cc751d1e78a..f1ea94bf60fc 100644
--- a/llvm/include/llvm/Transforms/Utils/VNCoercion.h
+++ b/llvm/include/llvm/Transforms/Utils/VNCoercion.h
@@ -70,26 +70,16 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
MemIntrinsic *DepMI, const DataLayout &DL);
-/// If analyzeLoadFromClobberingStore returned an offset, this function can be
-/// used to actually perform the extraction of the bits from the store. It
-/// inserts instructions to do so at InsertPt, and returns the extracted value.
-Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
+/// If analyzeLoadFromClobberingStore/Load returned an offset, this function
+/// can be used to actually perform the extraction of the bits from the store.
+/// It inserts instructions to do so at InsertPt, and returns the extracted
+/// value.
+Value *getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
Instruction *InsertPt, const DataLayout &DL);
-// This is the same as getStoreValueForLoad, except it performs no insertion
+// This is the same as getValueForLoad, except it performs no insertion.
// It only allows constant inputs.
-Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset,
- Type *LoadTy, const DataLayout &DL);
-
-/// If analyzeLoadFromClobberingLoad returned an offset, this function can be
-/// used to actually perform the extraction of the bits from the load, including
-/// any necessary load widening. It inserts instructions to do so at InsertPt,
-/// and returns the extracted value.
-Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
- Instruction *InsertPt, const DataLayout &DL);
-// This is the same as getLoadValueForLoad, except it is given the load value as
-// a constant. It returns nullptr if it would require widening the load.
-Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset,
- Type *LoadTy, const DataLayout &DL);
+Constant *getConstantValueForLoad(Constant *SrcVal, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL);
/// If analyzeLoadFromClobberingMemInst returned an offset, this function can be
/// used to actually perform the extraction of the bits from the memory
diff --git a/llvm/include/llvm/Transforms/Utils/ValueMapper.h b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
index 95fd0b14dd51..5f15af7f9990 100644
--- a/llvm/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/llvm/include/llvm/Transforms/Utils/ValueMapper.h
@@ -112,8 +112,9 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
/// There are a number of top-level entry points:
/// - \a mapValue() (and \a mapConstant());
/// - \a mapMetadata() (and \a mapMDNode());
-/// - \a remapInstruction(); and
-/// - \a remapFunction().
+/// - \a remapInstruction();
+/// - \a remapFunction(); and
+/// - \a remapGlobalObjectMetadata().
///
/// The \a ValueMaterializer can be used as a callback, but cannot invoke any
/// of these top-level functions recursively. Instead, callbacks should use
@@ -175,6 +176,7 @@ public:
void remapInstruction(Instruction &I);
void remapFunction(Function &F);
+ void remapGlobalObjectMetadata(GlobalObject &GO);
void scheduleMapGlobalInitializer(GlobalVariable &GV, Constant &Init,
unsigned MappingContextID = 0);
diff --git a/llvm/include/llvm/Transforms/Vectorize.h b/llvm/include/llvm/Transforms/Vectorize.h
index bc7514267778..1db501e0aeb0 100644
--- a/llvm/include/llvm/Transforms/Vectorize.h
+++ b/llvm/include/llvm/Transforms/Vectorize.h
@@ -15,135 +15,15 @@
#define LLVM_TRANSFORMS_VECTORIZE_H
namespace llvm {
-class BasicBlock;
class Pass;
//===----------------------------------------------------------------------===//
-/// Vectorize configuration.
-struct VectorizeConfig {
- //===--------------------------------------------------------------------===//
- // Target architecture related parameters
-
- /// The size of the native vector registers.
- unsigned VectorBits;
-
- /// Vectorize boolean values.
- bool VectorizeBools;
-
- /// Vectorize integer values.
- bool VectorizeInts;
-
- /// Vectorize floating-point values.
- bool VectorizeFloats;
-
- /// Vectorize pointer values.
- bool VectorizePointers;
-
- /// Vectorize casting (conversion) operations.
- bool VectorizeCasts;
-
- /// Vectorize floating-point math intrinsics.
- bool VectorizeMath;
-
- /// Vectorize bit intrinsics.
- bool VectorizeBitManipulations;
-
- /// Vectorize the fused-multiply-add intrinsic.
- bool VectorizeFMA;
-
- /// Vectorize select instructions.
- bool VectorizeSelect;
-
- /// Vectorize comparison instructions.
- bool VectorizeCmp;
-
- /// Vectorize getelementptr instructions.
- bool VectorizeGEP;
-
- /// Vectorize loads and stores.
- bool VectorizeMemOps;
-
- /// Only generate aligned loads and stores.
- bool AlignedOnly;
-
- //===--------------------------------------------------------------------===//
- // Misc parameters
-
- /// The required chain depth for vectorization.
- unsigned ReqChainDepth;
-
- /// The maximum search distance for instruction pairs.
- unsigned SearchLimit;
-
- /// The maximum number of candidate pairs with which to use a full
- /// cycle check.
- unsigned MaxCandPairsForCycleCheck;
-
- /// Replicating one element to a pair breaks the chain.
- bool SplatBreaksChain;
-
- /// The maximum number of pairable instructions per group.
- unsigned MaxInsts;
-
- /// The maximum number of candidate instruction pairs per group.
- unsigned MaxPairs;
-
- /// The maximum number of pairing iterations.
- unsigned MaxIter;
-
- /// Don't try to form odd-length vectors.
- bool Pow2LenOnly;
-
- /// Don't boost the chain-depth contribution of loads and stores.
- bool NoMemOpBoost;
-
- /// Use a fast instruction dependency analysis.
- bool FastDep;
-
- /// Initialize the VectorizeConfig from command line options.
- VectorizeConfig();
-};
-
-//===----------------------------------------------------------------------===//
-//
-// LoopVectorize - Create a loop vectorization pass.
-//
-Pass *createLoopVectorizePass();
-Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced,
- bool VectorizeOnlyWhenForced);
-
-//===----------------------------------------------------------------------===//
-//
-// SLPVectorizer - Create a bottom-up SLP vectorizer pass.
-//
-Pass *createSLPVectorizerPass();
-
-//===----------------------------------------------------------------------===//
-/// Vectorize the BasicBlock.
-///
-/// @param BB The BasicBlock to be vectorized
-/// @param P The current running pass, should require AliasAnalysis and
-/// ScalarEvolution. After the vectorization, AliasAnalysis,
-/// ScalarEvolution and CFG are preserved.
-///
-/// @return True if the BB is changed, false otherwise.
-///
-bool vectorizeBasicBlock(Pass *P, BasicBlock &BB,
- const VectorizeConfig &C = VectorizeConfig());
-
-//===----------------------------------------------------------------------===//
//
// LoadStoreVectorizer - Create vector loads and stores, but leave scalar
// operations.
//
Pass *createLoadStoreVectorizerPass();
-//===----------------------------------------------------------------------===//
-//
-// Optimize partial vector operations using target cost models.
-//
-Pass *createVectorCombinePass();
-
} // End llvm namespace
#endif
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index bbb73c637d8c..01c905946379 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -293,9 +293,6 @@ public:
/// Return the fixed-order recurrences found in the loop.
RecurrenceSet &getFixedOrderRecurrences() { return FixedOrderRecurrences; }
- /// Return the set of instructions to sink to handle fixed-order recurrences.
- MapVector<Instruction *, Instruction *> &getSinkAfter() { return SinkAfter; }
-
/// Returns the widest induction type.
Type *getWidestInductionType() { return WidestIndTy; }
@@ -350,12 +347,18 @@ public:
/// loop. Do not use after invoking 'createVectorizedLoopSkeleton' (PR34965).
int isConsecutivePtr(Type *AccessTy, Value *Ptr) const;
- /// Returns true if the value V is uniform within the loop.
- bool isUniform(Value *V) const;
+ /// Returns true if value V is uniform across \p VF lanes, when \p VF is
+ /// provided, and otherwise if \p V is invariant across all loop iterations.
+ bool isInvariant(Value *V) const;
+
+ /// Returns true if value V is uniform across \p VF lanes, when \p VF is
+ /// provided, and otherwise if \p V is invariant across all loop iterations.
+ bool isUniform(Value *V, ElementCount VF) const;
/// A uniform memory op is a load or store which accesses the same memory
- /// location on all lanes.
- bool isUniformMemOp(Instruction &I) const;
+ /// location on all \p VF lanes, if \p VF is provided and otherwise if the
+ /// memory location is invariant.
+ bool isUniformMemOp(Instruction &I, ElementCount VF) const;
/// Returns the information that we collected about runtime memory check.
const RuntimePointerChecking *getRuntimePointerChecking() const {
@@ -368,14 +371,10 @@ public:
return LAI->getDepChecker().isSafeForAnyVectorWidth();
}
- unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
-
uint64_t getMaxSafeVectorWidthInBits() const {
return LAI->getDepChecker().getMaxSafeVectorWidthInBits();
}
- bool hasStride(Value *V) { return LAI->hasStride(V); }
-
/// Returns true if vector representation of the instruction \p I
/// requires mask.
bool isMaskRequired(const Instruction *I) const {
@@ -391,6 +390,20 @@ public:
return ConditionalAssumes;
}
+ PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
+ return &PSE;
+ }
+
+ Loop *getLoop() const { return TheLoop; }
+
+ LoopInfo *getLoopInfo() const { return LI; }
+
+ AssumptionCache *getAssumptionCache() const { return AC; }
+
+ ScalarEvolution *getScalarEvolution() const { return PSE.getSE(); }
+
+ DominatorTree *getDominatorTree() const { return DT; }
+
private:
/// Return true if the pre-header, exiting and latch blocks of \p Lp and all
/// its nested loops are considered legal for vectorization. These legal
@@ -452,16 +465,6 @@ private:
void addInductionPhi(PHINode *Phi, const InductionDescriptor &ID,
SmallPtrSetImpl<Value *> &AllowedExit);
- /// If an access has a symbolic strides, this maps the pointer value to
- /// the stride symbol.
- const ValueToValueMap *getSymbolicStrides() const {
- // FIXME: Currently, the set of symbolic strides is sometimes queried before
- // it's collected. This happens from canVectorizeWithIfConvert, when the
- // pointer is checked to reference consecutive elements suitable for a
- // masked access.
- return LAI ? &LAI->getSymbolicStrides() : nullptr;
- }
-
/// The loop that we evaluate.
Loop *TheLoop;
@@ -515,10 +518,6 @@ private:
/// Holds the phi nodes that are fixed-order recurrences.
RecurrenceSet FixedOrderRecurrences;
- /// Holds instructions that need to sink past other instructions to handle
- /// fixed-order recurrences.
- MapVector<Instruction *, Instruction *> SinkAfter;
-
/// Holds the widest induction type encountered.
Type *WidestIndTy = nullptr;
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
index 2f01276d5cb1..da9fec8d63e9 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -15,7 +15,7 @@
// 'wide' iteration. After this transformation the index is incremented
// by the SIMD vector width, and not by one.
//
-// This pass has three parts:
+// This pass has four parts:
// 1. The main loop pass that drives the different parts.
// 2. LoopVectorizationLegality - A unit that checks for the legality
// of the vectorization.
@@ -189,7 +189,7 @@ public:
// Shim for old PM.
LoopVectorizeResult runImpl(Function &F, ScalarEvolution &SE_, LoopInfo &LI_,
TargetTransformInfo &TTI_, DominatorTree &DT_,
- BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
+ BlockFrequencyInfo *BFI_, TargetLibraryInfo *TLI_,
DemandedBits &DB_, AssumptionCache &AC_,
LoopAccessInfoManager &LAIs_,
OptimizationRemarkEmitter &ORE_,
diff --git a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index 0ba4f59ab85b..326006fbb880 100644
--- a/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -29,7 +29,6 @@ namespace llvm {
class AAResults;
class AssumptionCache;
class BasicBlock;
-class CmpInst;
class DemandedBits;
class DominatorTree;
class Function;
@@ -91,15 +90,11 @@ private:
/// every time we run into a memory barrier.
void collectSeedInstructions(BasicBlock *BB);
- /// Try to vectorize a chain that starts at two arithmetic instrs.
- bool tryToVectorizePair(Value *A, Value *B, slpvectorizer::BoUpSLP &R);
-
/// Try to vectorize a list of operands.
- /// \param LimitForRegisterSize Vectorize only using maximal allowed register
- /// size.
+ /// \param MaxVFOnly Vectorize only using maximal allowed register size.
/// \returns true if a value was vectorized.
bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
- bool LimitForRegisterSize = false);
+ bool MaxVFOnly = false);
/// Try to vectorize a chain that may start at the operands of \p I.
bool tryToVectorize(Instruction *I, slpvectorizer::BoUpSLP &R);
@@ -119,12 +114,12 @@ private:
/// Try to find horizontal reduction or otherwise, collect instructions
/// for postponed vectorization attempts.
/// \a P if not null designates phi node the reduction is fed into
- /// (with reduction operators \a V or one of its operands, in a basic block
+ /// (with reduction operators \a Root or one of its operands, in a basic block
/// \a BB).
/// \returns true if a horizontal reduction was matched and reduced.
/// \returns false if \a V is null or not an instruction,
/// or a horizontal reduction was not matched or not possible.
- bool vectorizeHorReduction(PHINode *P, Value *V, BasicBlock *BB,
+ bool vectorizeHorReduction(PHINode *P, Instruction *Root, BasicBlock *BB,
slpvectorizer::BoUpSLP &R,
TargetTransformInfo *TTI,
SmallVectorImpl<WeakTrackingVH> &PostponedInsts);
@@ -132,7 +127,7 @@ private:
/// Make an attempt to vectorize reduction and then try to vectorize
/// postponed binary operations.
/// \returns true on any successfull vectorization.
- bool vectorizeRootInstruction(PHINode *P, Value *V, BasicBlock *BB,
+ bool vectorizeRootInstruction(PHINode *P, Instruction *Root, BasicBlock *BB,
slpvectorizer::BoUpSLP &R,
TargetTransformInfo *TTI);
@@ -144,11 +139,15 @@ private:
bool vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB,
slpvectorizer::BoUpSLP &R);
- /// Tries to vectorize constructs started from CmpInst, InsertValueInst or
+ /// Tries to vectorize \p CmpInts. \Returns true on success.
+ template <typename ItT>
+ bool vectorizeCmpInsts(iterator_range<ItT> CmpInsts, BasicBlock *BB,
+ slpvectorizer::BoUpSLP &R);
+
+ /// Tries to vectorize constructs started from InsertValueInst or
/// InsertElementInst instructions.
- bool vectorizeSimpleInstructions(InstSetVector &Instructions, BasicBlock *BB,
- slpvectorizer::BoUpSLP &R,
- bool AtTerminator);
+ bool vectorizeInserts(InstSetVector &Instructions, BasicBlock *BB,
+ slpvectorizer::BoUpSLP &R);
/// Scan the basic block and look for patterns that are likely to start
/// a vectorization chain.
diff --git a/llvm/include/llvm/WindowsDriver/MSVCPaths.h b/llvm/include/llvm/WindowsDriver/MSVCPaths.h
index d1545b8a918b..51ffd6b6bc2c 100644
--- a/llvm/include/llvm/WindowsDriver/MSVCPaths.h
+++ b/llvm/include/llvm/WindowsDriver/MSVCPaths.h
@@ -11,7 +11,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/TargetParser/Triple.h"
#include <optional>
#include <string>
@@ -90,11 +90,15 @@ bool findVCToolChainViaEnvironment(vfs::FileSystem &VFS, std::string &Path,
ToolsetLayout &VSLayout);
// Query the Setup Config server for installs, then pick the newest version
-// and find its default VC toolchain.
+// and find its default VC toolchain. If `VCToolsVersion` is specified, that
+// version is preferred over the latest version.
+//
// This is the preferred way to discover new Visual Studios, as they're no
// longer listed in the registry.
-bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS, std::string &Path,
- ToolsetLayout &VSLayout);
+bool
+findVCToolChainViaSetupConfig(vfs::FileSystem &VFS,
+ std::optional<llvm::StringRef> VCToolsVersion,
+ std::string &Path, ToolsetLayout &VSLayout);
// Look in the registry for Visual Studio installs, and use that to get
// a toolchain path. VS2017 and newer don't get added to the registry.
diff --git a/llvm/include/llvm/XRay/XRayRecord.h b/llvm/include/llvm/XRay/XRayRecord.h
index bb3c346d05e7..238bf3daf6ea 100644
--- a/llvm/include/llvm/XRay/XRayRecord.h
+++ b/llvm/include/llvm/XRay/XRayRecord.h
@@ -34,10 +34,10 @@ struct XRayFileHeader {
/// Whether the CPU that produced the timestamp counters (TSC) move at a
/// constant rate.
- bool ConstantTSC;
+ bool ConstantTSC = false;
/// Whether the CPU that produced the timestamp counters (TSC) do not stop.
- bool NonstopTSC;
+ bool NonstopTSC = false;
/// The number of cycles per second for the CPU that produced the timestamp
/// counter (TSC) values. Useful for estimating the amount of time that
@@ -47,7 +47,7 @@ struct XRayFileHeader {
// This is different depending on the type of xray record. The naive format
// stores a Wallclock timespec. FDR logging stores the size of a thread
// buffer.
- char FreeFormData[16];
+ char FreeFormData[16] = {};
};
/// Determines the supported types of records that could be seen in XRay traces.
diff --git a/llvm/include/llvm/module.install.modulemap b/llvm/include/llvm/module.install.modulemap
deleted file mode 100644
index 1be59215cf19..000000000000
--- a/llvm/include/llvm/module.install.modulemap
+++ /dev/null
@@ -1,31 +0,0 @@
-
-module LLVM_Extern_Config_Def {
- textual header "Config/AsmParsers.def"
- textual header "Config/AsmPrinters.def"
- textual header "Config/Disassemblers.def"
- textual header "Config/Targets.def"
- export *
-}
-
-module LLVM_Extern_IR_Attributes_Gen {
- textual header "IR/Attributes.gen"
- textual header "IR/Attributes.inc"
-}
-
-module LLVM_Extern_IR_Intrinsics_Gen {
- textual header "IR/Intrinsics.gen"
- textual header "IR/Intrinsics.inc"
-}
-
-module LLVM_Extern_IR_Intrinsics_Enum {
- textual header "IR/IntrinsicEnums.inc"
-}
-
-module LLVM_Extern_Utils_DataTypes {
- header "Support/DataTypes.h"
- export *
-}
-
-module LLVM_Extern_TargetParser_Gen {
- textual header "TargetParser/RISCVTargetParserDef.inc"
-}
diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap
deleted file mode 100644
index 741e0a83b1b7..000000000000
--- a/llvm/include/llvm/module.modulemap
+++ /dev/null
@@ -1,462 +0,0 @@
-module LLVM_Analysis {
- requires cplusplus
- umbrella "Analysis"
- module * { export * }
-
- // This is intended for (repeated) textual inclusion.
- textual header "Analysis/ScalarFuncs.def"
- textual header "Analysis/TargetLibraryInfo.def"
- textual header "Analysis/VecFuncs.def"
-}
-
-module LLVM_AsmParser {
- requires cplusplus
- umbrella "AsmParser"
- module * { export * }
-}
-
-// A module covering CodeGen/ and Target/. These are intertwined
-// and codependent, and thus notionally form a single module.
-module LLVM_Backend {
- requires cplusplus
-
- module CodeGen {
- umbrella "CodeGen"
- module * { export * }
-
- // Exclude these; they're intended to be included into only a single
- // translation unit (or none) and aren't part of this module.
- exclude header "CodeGen/LinkAllAsmWriterComponents.h"
- exclude header "CodeGen/LinkAllCodegenComponents.h"
-
- // These are intended for (repeated) textual inclusion.
- textual header "CodeGen/DIEValue.def"
- textual header "CodeGen/MachinePassRegistry.def"
- }
-}
-
-// FIXME: Make this as a submodule of LLVM_Backend again.
-// Doing so causes a linker error in clang-format.
-module LLVM_Backend_Target {
- umbrella "Target"
- module * { export * }
-}
-
-module LLVM_Bitcode {
- requires cplusplus
- umbrella "Bitcode"
- module * { export * }
-}
-
-module LLVM_Bitstream {
- requires cplusplus
- umbrella "Bitstream"
- module * { export * }
-}
-
-module LLVM_BinaryFormat {
- requires cplusplus
- umbrella "BinaryFormat" module * { export * }
- textual header "BinaryFormat/Dwarf.def"
- textual header "BinaryFormat/DXContainerConstants.def"
- textual header "BinaryFormat/DynamicTags.def"
- textual header "BinaryFormat/MachO.def"
- textual header "BinaryFormat/MinidumpConstants.def"
- textual header "BinaryFormat/Swift.def"
- textual header "BinaryFormat/ELFRelocs/AArch64.def"
- textual header "BinaryFormat/ELFRelocs/AMDGPU.def"
- textual header "BinaryFormat/ELFRelocs/ARM.def"
- textual header "BinaryFormat/ELFRelocs/ARC.def"
- textual header "BinaryFormat/ELFRelocs/AVR.def"
- textual header "BinaryFormat/ELFRelocs/BPF.def"
- textual header "BinaryFormat/ELFRelocs/CSKY.def"
- textual header "BinaryFormat/ELFRelocs/Hexagon.def"
- textual header "BinaryFormat/ELFRelocs/i386.def"
- textual header "BinaryFormat/ELFRelocs/Lanai.def"
- textual header "BinaryFormat/ELFRelocs/LoongArch.def"
- textual header "BinaryFormat/ELFRelocs/M68k.def"
- textual header "BinaryFormat/ELFRelocs/Mips.def"
- textual header "BinaryFormat/ELFRelocs/MSP430.def"
- textual header "BinaryFormat/ELFRelocs/PowerPC64.def"
- textual header "BinaryFormat/ELFRelocs/PowerPC.def"
- textual header "BinaryFormat/ELFRelocs/RISCV.def"
- textual header "BinaryFormat/ELFRelocs/Sparc.def"
- textual header "BinaryFormat/ELFRelocs/SystemZ.def"
- textual header "BinaryFormat/ELFRelocs/VE.def"
- textual header "BinaryFormat/ELFRelocs/x86_64.def"
- textual header "BinaryFormat/ELFRelocs/Xtensa.def"
- textual header "BinaryFormat/WasmRelocs.def"
- textual header "BinaryFormat/MsgPack.def"
-}
-
-module LLVM_Config {
- requires cplusplus
- umbrella "Config"
- extern module LLVM_Extern_Config_Def "module.extern.modulemap"
- module * { export * }
-}
-
-module LLVM_DebugInfo {
- requires cplusplus
- module DIContext { header "DebugInfo/DIContext.h" export * }
-}
-
-module LLVM_DebugInfo_DWARF {
- requires cplusplus
-
- umbrella "DebugInfo/DWARF"
- module * { export * }
-}
-
-module LLVM_DebugInfo_PDB {
- requires cplusplus
-
- umbrella "DebugInfo/PDB"
- module * { export * }
-
- // Separate out this subdirectory; it's an optional component that depends on
- // a separate library which might not be available.
- //
- // FIXME: There should be a better way to specify this.
- exclude header "DebugInfo/PDB/DIA/DIADataStream.h"
- exclude header "DebugInfo/PDB/DIA/DIAEnumDebugStreams.h"
- exclude header "DebugInfo/PDB/DIA/DIAEnumFrameData.h"
- exclude header "DebugInfo/PDB/DIA/DIAEnumInjectedSources.h"
- exclude header "DebugInfo/PDB/DIA/DIAEnumLineNumbers.h"
- exclude header "DebugInfo/PDB/DIA/DIAEnumSectionContribs.h"
- exclude header "DebugInfo/PDB/DIA/DIAEnumSourceFiles.h"
- exclude header "DebugInfo/PDB/DIA/DIAEnumSymbols.h"
- exclude header "DebugInfo/PDB/DIA/DIAEnumTables.h"
- exclude header "DebugInfo/PDB/DIA/DIAError.h"
- exclude header "DebugInfo/PDB/DIA/DIAFrameData.h"
- exclude header "DebugInfo/PDB/DIA/DIAInjectedSource.h"
- exclude header "DebugInfo/PDB/DIA/DIALineNumber.h"
- exclude header "DebugInfo/PDB/DIA/DIARawSymbol.h"
- exclude header "DebugInfo/PDB/DIA/DIASectionContrib.h"
- exclude header "DebugInfo/PDB/DIA/DIASession.h"
- exclude header "DebugInfo/PDB/DIA/DIASourceFile.h"
- exclude header "DebugInfo/PDB/DIA/DIASupport.h"
- exclude header "DebugInfo/PDB/DIA/DIATable.h"
- exclude header "DebugInfo/PDB/DIA/DIAUtils.h"
-}
-
-module LLVM_DebugInfo_PDB_DIA {
- requires cplusplus
-
- umbrella "DebugInfo/PDB/DIA"
- module * { export * }
-}
-
-module LLVM_DebugInfo_MSF {
- requires cplusplus
-
- umbrella "DebugInfo/MSF"
- module * { export * }
-}
-
-module LLVM_DebugInfo_CodeView {
- requires cplusplus
-
- umbrella "DebugInfo/CodeView"
- module * { export * }
-
- // These are intended for (repeated) textual inclusion.
- textual header "DebugInfo/CodeView/CodeViewRegisters.def"
- textual header "DebugInfo/CodeView/CodeViewTypes.def"
- textual header "DebugInfo/CodeView/CodeViewSymbols.def"
-}
-
-module LLVM_DWARFLinker {
- requires cplusplus
-
- umbrella "DWARFLinker"
- module * { export * }
-}
-
-module LLVM_ExecutionEngine {
- requires cplusplus
-
- umbrella "ExecutionEngine"
- module * { export * }
-
- // Exclude this; it's an optional component of the ExecutionEngine.
- exclude header "ExecutionEngine/OProfileWrapper.h"
-
- // Exclude these; they're intended to be included into only a single
- // translation unit (or none) and aren't part of this module.
- exclude header "ExecutionEngine/MCJIT.h"
- exclude header "ExecutionEngine/Interpreter.h"
-
- // Exclude headers from LLVM_OrcSupport.
- exclude header "ExecutionEngine/Orc/Shared/OrcError.h"
-}
-
-module LLVM_FileCheck {
- requires cplusplus
-
- umbrella "FileCheck"
- module * { export * }
-}
-
-module LLVM_Frontend_OpenMP {
- requires cplusplus
-
- umbrella "Frontend/OpenMP"
- module * { export * }
-
- exclude header "Frontend/OpenMP/OMPKinds.def"
-}
-
-// Orc utilities that don't depend only on Support (not ExecutionEngine or
-// IR). This is a workaround for ExecutionEngine's broken layering, and will
-// be removed in the future.
-module LLVM_OrcSupport {
- requires cplusplus
-
- header "ExecutionEngine/Orc/Shared/OrcError.h"
-
- export *
-}
-
-module LLVM_Pass {
- module Pass {
- // PassSupport.h and PassAnalysisSupport.h are made available only through
- // Pass.h.
- header "Pass.h"
- textual header "PassSupport.h"
- textual header "PassAnalysisSupport.h"
- export *
- }
-
- module PassRegistry { header "PassRegistry.h" export * }
- module InitializePasses { header "InitializePasses.h" export * }
-}
-
-module LLVM_intrinsic_gen {
- requires cplusplus
-
- // Delay building the modules containing dependencies to Attributes.h and
- // Intrinsics.h because they need to be generated by tablegen first.
-
- // Attributes.h
- module IR_Argument { header "IR/Argument.h" export * }
- module IR_Attributes {
- header "IR/Attributes.h"
- extern module LLVM_Extern_IR_Attributes_Gen "module.extern.modulemap"
- export *
- }
- module IR_AbstractCallSite { header "IR/AbstractCallSite.h" export * }
- module IR_ConstantFold { header "IR/ConstantFold.h" export * }
- module IR_ConstantFolder { header "IR/ConstantFolder.h" export * }
- module IR_GlobalVariable { header "IR/GlobalVariable.h" export * }
- module IR_NoFolder { header "IR/NoFolder.h" export * }
- module IRBuilderFolder { header "IR/IRBuilderFolder.h" export * }
- module IR_Module { header "IR/Module.h" export * }
- module IR_ModuleSummaryIndex { header "IR/ModuleSummaryIndex.h" export * }
- module IR_ModuleSummaryIndexYAML { header "IR/ModuleSummaryIndexYAML.h" export * }
- module IR_Function { header "IR/Function.h" export * }
- module IR_InstrTypes { header "IR/InstrTypes.h" export * }
- module IR_Instructions { header "IR/Instructions.h" export * }
- module IR_TypeFinder { header "IR/TypeFinder.h" export * }
- module IR_VectorBuilder { header "IR/VectorBuilder.h" export * }
-
-
- // Intrinsics.h
- module IR_CFG { header "IR/CFG.h" export * }
- module IR_ConstantRange { header "IR/ConstantRange.h" export * }
- module IR_Dominators { header "IR/Dominators.h" export * }
- module IR_FixedPointBuilder { header "IR/FixedPointBuilder.h" export * }
- module Analysis_PostDominators { header "Analysis/PostDominators.h" export * }
- module Analysis_DomTreeUpdater { header "Analysis/DomTreeUpdater.h" export * }
- module IR_IRBuilder { header "IR/IRBuilder.h" export * }
- module IR_IRPrintingPasses { header "IR/IRPrintingPasses.h" export * }
- module IR_MatrixBuilder { header "IR/MatrixBuilder.h" export * }
- module IR_PassManager { header "IR/PassManager.h" export * }
- module IR_PassManagerImpl { header "IR/PassManagerImpl.h" export * }
- module IR_PredIteratorCache { header "IR/PredIteratorCache.h" export * }
- module IR_Verifier { header "IR/Verifier.h" export * }
- module IR_InstIterator { header "IR/InstIterator.h" export * }
- module IR_InstVisitor { header "IR/InstVisitor.h" export * }
- module IR_Intrinsics {
- header "IR/Intrinsics.h"
- extern module LLVM_Extern_IR_Intricsics_Gen "module.extern.modulemap"
- extern module LLVM_Extern_IR_Intrinsics_Enum "module.extern.modulemap"
- export *
- }
- module IR_IntrinsicInst { header "IR/IntrinsicInst.h" export * }
- module IR_PatternMatch { header "IR/PatternMatch.h" export * }
- module IR_SafepointIRVerifier { header "IR/SafepointIRVerifier.h" export * }
- module IR_Statepoint { header "IR/Statepoint.h" export * }
- module IR_DebugInfo { header "IR/DebugInfo.h" export * }
-
- export *
-}
-
-module LLVM_IR {
- requires cplusplus
-
- umbrella "IR"
- module * { export * }
-
- // These are intended for (repeated) textual inclusion.
- textual header "IR/ConstrainedOps.def"
- textual header "IR/DebugInfoFlags.def"
- textual header "IR/Instruction.def"
- textual header "IR/Metadata.def"
- textual header "IR/FixedMetadataKinds.def"
- textual header "IR/Value.def"
- textual header "IR/VPIntrinsics.def"
- textual header "IR/RuntimeLibcalls.def"
-}
-
-module LLVM_IRReader {
- requires cplusplus
- umbrella "IRReader"
- module * { export * }
-}
-
-module LLVM_LineEditor {
- requires cplusplus
- umbrella "LineEditor"
- module * { export * }
-}
-
-module LLVM_LTO {
- requires cplusplus
- umbrella "LTO"
- module * { export * }
-}
-
-module LLVM_MC {
- requires cplusplus
-
- umbrella "MC"
- module * { export * }
-}
-
-// Used by llvm-tblgen
-module LLVM_MC_TableGen {
- requires cplusplus
- module MC_LaneBitmask { header "MC/LaneBitmask.h" export * }
- module MC_InstrItineraries { header "MC/MCInstrItineraries.h" export * }
- module MC_Schedule { header "MC/MCSchedule.h" export * }
- module MC_SubtargetFeature { header "MC/SubtargetFeature.h" export * }
-}
-
-module LLVM_Object {
- requires cplusplus
- umbrella "Object"
- module * { export * }
-}
-
-module LLVM_Option {
- requires cplusplus
- umbrella "Option"
- module * { export * }
-}
-
-module LLVM_ProfileData {
- requires cplusplus
-
- umbrella "ProfileData"
- module * { export * }
-
- textual header "ProfileData/InstrProfData.inc"
- textual header "ProfileData/MemProfData.inc"
- textual header "ProfileData/MIBEntryDef.inc"
-}
-
-// FIXME: Mislayered?
-module LLVM_Support_TargetRegistry {
- requires cplusplus
- header "Support/TargetRegistry.h"
- export *
-}
-
-module LLVM_TableGen {
- requires cplusplus
- umbrella "TableGen"
- module * { export * }
-}
-
-module LLVM_Transforms {
- requires cplusplus
- umbrella "Transforms"
-
- module * { export * }
-
- // Requires DEBUG_TYPE to be defined by including file.
- exclude header "Transforms/Utils/InstructionWorklist.h"
-}
-
-extern module LLVM_Extern_Utils_DataTypes "module.extern.modulemap"
-
-// Build the module with the tablegen-generated files needed by the
-// TargetParser module before building the TargetParser module itself.
-module TargetParserGen {
- module RISCVTargetParserDef {
- header "TargetParser/RISCVTargetParser.h"
- extern module LLVM_Extern_TargetParser_Gen "module.extern.modulemap"
- export *
- }
-}
-
-// A module covering ADT/ and Support/. These are intertwined and
-// codependent, and notionally form a single module.
-module LLVM_Utils {
- module ADT {
- requires cplusplus
-
- umbrella "ADT"
- module * { export * }
- }
-
- module Support {
- requires cplusplus
-
- umbrella "Support"
- module * { export * }
-
- // Exclude this; it should only be used on Windows.
- exclude header "Support/Windows/WindowsSupport.h"
-
- // Exclude these; they are fundamentally non-modular.
- exclude header "Support/PluginLoader.h"
- exclude header "Support/Solaris/sys/regset.h"
- textual header "Support/TargetOpcodes.def"
-
- }
-
- module TargetParser {
- requires cplusplus
-
- umbrella "TargetParser"
- module * { export * }
-
- // These are intended for textual inclusion.
- textual header "TargetParser/ARMTargetParser.def"
- textual header "TargetParser/CSKYTargetParser.def"
- textual header "TargetParser/X86TargetParser.def"
- textual header "TargetParser/LoongArchTargetParser.def"
- }
-
- // This part of the module is usable from both C and C++ code.
- module ConvertUTF {
- header "Support/ConvertUTF.h"
- export *
- }
-}
-
-// This is used for a $src == $build compilation. Otherwise we use
-// LLVM_Support_DataTypes_Build, defined in a module map that is
-// copied into the build area.
-module LLVM_Support_DataTypes_Src {
- header "llvm/Support/DataTypes.h"
- export *
-}
-
-module LLVM_WindowsManifest {
- requires cplusplus
- umbrella "WindowsManifest"
- module * { export * }
-}
diff --git a/llvm/include/llvm/module.extern.modulemap b/llvm/include/module.extern.modulemap
index 8e726a3957cc..c69d2764e915 100644
--- a/llvm/include/llvm/module.extern.modulemap
+++ b/llvm/include/module.extern.modulemap
@@ -1,3 +1,4 @@
+module LLVM_Extern_CodeGenTypes_Gen {}
module LLVM_Extern_Config_Def {}
module LLVM_Extern_IR_Attributes_Gen {}
module LLVM_Extern_IR_Intrinsics_Gen {}
diff --git a/llvm/include/module.install.modulemap b/llvm/include/module.install.modulemap
new file mode 100644
index 000000000000..f7302830f561
--- /dev/null
+++ b/llvm/include/module.install.modulemap
@@ -0,0 +1,35 @@
+
+module LLVM_Extern_CodeGenTypes_Gen {
+ textual header "llvm/CodeGen/GenVT.inc"
+}
+
+module LLVM_Extern_Config_Def {
+ textual header "llvm/Config/AsmParsers.def"
+ textual header "llvm/Config/AsmPrinters.def"
+ textual header "llvm/Config/Disassemblers.def"
+ textual header "llvm/Config/Targets.def"
+ export *
+}
+
+module LLVM_Extern_IR_Attributes_Gen {
+ textual header "llvm/IR/Attributes.gen"
+ textual header "llvm/IR/Attributes.inc"
+}
+
+module LLVM_Extern_IR_Intrinsics_Gen {
+ textual header "llvm/IR/Intrinsics.gen"
+ textual header "llvm/IR/Intrinsics.inc"
+}
+
+module LLVM_Extern_IR_Intrinsics_Enum {
+ textual header "llvm/IR/IntrinsicEnums.inc"
+}
+
+module LLVM_Extern_Utils_DataTypes {
+ header "llvm/Support/DataTypes.h"
+ export *
+}
+
+module LLVM_Extern_TargetParser_Gen {
+ textual header "llvm/TargetParser/RISCVTargetParserDef.inc"
+}
diff --git a/llvm/include/module.modulemap b/llvm/include/module.modulemap
new file mode 100644
index 000000000000..4c2ba437edb9
--- /dev/null
+++ b/llvm/include/module.modulemap
@@ -0,0 +1,428 @@
+module LLVM_C {
+ umbrella "llvm-c"
+ module * { export * }
+}
+
+module LLVM_Analysis {
+ requires cplusplus
+ umbrella "llvm/Analysis"
+ module * { export * }
+
+ // This is intended for (repeated) textual inclusion.
+ textual header "llvm/Analysis/ScalarFuncs.def"
+ textual header "llvm/Analysis/TargetLibraryInfo.def"
+ textual header "llvm/Analysis/VecFuncs.def"
+}
+
+module LLVM_AsmParser {
+ requires cplusplus
+ umbrella "llvm/AsmParser"
+ module * { export * }
+}
+
+module LLVM_CodeGenTypes {
+ requires cplusplus
+
+ module LLT {
+ header "llvm/CodeGen/LowLevelType.h" export *
+ }
+ module MVT {
+ header "llvm/CodeGen/MachineValueType.h" export *
+ extern module LLVM_Extern_CodeGenTypes_Gen "module.extern.modulemap"
+ }
+}
+
+// A module covering CodeGen/ and Target/. These are intertwined
+// and codependent, and thus notionally form a single module.
+module LLVM_Backend {
+ requires cplusplus
+
+ module CodeGen {
+ umbrella "llvm/CodeGen"
+ module * { export * }
+
+ // Exclude these; they're intended to be included into only a single
+ // translation unit (or none) and aren't part of this module.
+ exclude header "llvm/CodeGen/LinkAllAsmWriterComponents.h"
+ exclude header "llvm/CodeGen/LinkAllCodegenComponents.h"
+
+ exclude header "llvm/CodeGen/CodeGenPassBuilder.h"
+
+ // These are intended for (repeated) textual inclusion.
+ textual header "llvm/CodeGen/DIEValue.def"
+ textual header "llvm/CodeGen/MachinePassRegistry.def"
+ }
+}
+
+// FIXME: Make this as a submodule of LLVM_Backend again.
+// Doing so causes a linker error in clang-format.
+module LLVM_Backend_Target {
+ umbrella "llvm/Target"
+ module * { export * }
+}
+
+module LLVM_Bitcode {
+ requires cplusplus
+ umbrella "llvm/Bitcode"
+ module * { export * }
+}
+
+module LLVM_Bitstream {
+ requires cplusplus
+ umbrella "llvm/Bitstream"
+ module * { export * }
+}
+
+module LLVM_BinaryFormat {
+ requires cplusplus
+ umbrella "llvm/BinaryFormat" module * { export * }
+ textual header "llvm/BinaryFormat/Dwarf.def"
+ textual header "llvm/BinaryFormat/DXContainerConstants.def"
+ textual header "llvm/BinaryFormat/DynamicTags.def"
+ textual header "llvm/BinaryFormat/MachO.def"
+ textual header "llvm/BinaryFormat/MinidumpConstants.def"
+ textual header "llvm/BinaryFormat/Swift.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/AArch64.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/AMDGPU.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/ARM.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/ARC.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/AVR.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/BPF.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/CSKY.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/Hexagon.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/i386.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/Lanai.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/LoongArch.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/M68k.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/Mips.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/MSP430.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/PowerPC64.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/PowerPC.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/RISCV.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/Sparc.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/SystemZ.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/VE.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/x86_64.def"
+ textual header "llvm/BinaryFormat/ELFRelocs/Xtensa.def"
+ textual header "llvm/BinaryFormat/WasmRelocs.def"
+ textual header "llvm/BinaryFormat/MsgPack.def"
+}
+
+module LLVM_Config {
+ requires cplusplus
+ umbrella "llvm/Config"
+ extern module LLVM_Extern_Config_Def "module.extern.modulemap"
+ module * { export * }
+}
+
+module LLVM_DebugInfo {
+ requires cplusplus
+ module DIContext { header "llvm/DebugInfo/DIContext.h" export * }
+}
+
+module LLVM_DebugInfo_DWARF {
+ requires cplusplus
+
+ umbrella "llvm/DebugInfo/DWARF"
+ module * { export * }
+}
+
+module LLVM_DebugInfo_PDB {
+ requires cplusplus
+
+ umbrella "llvm/DebugInfo/PDB"
+ module * { export * }
+
+ // Separate out this subdirectory; it's an optional component that depends on
+ // a separate library which might not be available.
+ //
+ // FIXME: There should be a better way to specify this.
+ exclude header "llvm/DebugInfo/PDB/DIA/DIADataStream.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIAEnumDebugStreams.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIAEnumFrameData.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIAEnumInjectedSources.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIAEnumLineNumbers.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIAEnumSectionContribs.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIAEnumSourceFiles.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIAEnumSymbols.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIAEnumTables.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIAError.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIAFrameData.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIAInjectedSource.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIALineNumber.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIARawSymbol.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIASectionContrib.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIASession.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIASourceFile.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIASupport.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIATable.h"
+ exclude header "llvm/DebugInfo/PDB/DIA/DIAUtils.h"
+}
+
+module LLVM_DebugInfo_PDB_DIA {
+ requires cplusplus
+
+ umbrella "llvm/DebugInfo/PDB/DIA"
+ module * { export * }
+}
+
+module LLVM_DebugInfo_MSF {
+ requires cplusplus
+
+ umbrella "llvm/DebugInfo/MSF"
+ module * { export * }
+}
+
+module LLVM_DebugInfo_CodeView {
+ requires cplusplus
+
+ umbrella "llvm/DebugInfo/CodeView"
+ module * { export * }
+
+ // These are intended for (repeated) textual inclusion.
+ textual header "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+ textual header "llvm/DebugInfo/CodeView/CodeViewTypes.def"
+ textual header "llvm/DebugInfo/CodeView/CodeViewSymbols.def"
+}
+
+module LLVM_DWARFLinker {
+ requires cplusplus
+
+ umbrella "llvm/DWARFLinker"
+ module * { export * }
+}
+
+module LLVM_ExecutionEngine {
+ requires cplusplus
+
+ umbrella "llvm/ExecutionEngine"
+ module * { export * }
+
+ // Exclude this; it's an optional component of the ExecutionEngine.
+ exclude header "llvm/ExecutionEngine/OProfileWrapper.h"
+
+ // Exclude these; they're intended to be included into only a single
+ // translation unit (or none) and aren't part of this module.
+ exclude header "llvm/ExecutionEngine/MCJIT.h"
+ exclude header "llvm/ExecutionEngine/Interpreter.h"
+
+ // Exclude headers from LLVM_OrcSupport.
+ exclude header "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
+}
+
+module LLVM_FileCheck {
+ requires cplusplus
+
+ umbrella "llvm/FileCheck"
+ module * { export * }
+}
+
+module LLVM_Frontend_OpenMP {
+ requires cplusplus
+
+ umbrella "llvm/Frontend/OpenMP"
+ module * { export * }
+
+ exclude header "llvm/Frontend/OpenMP/OMPKinds.def"
+}
+
+// Orc utilities that don't depend only on Support (not ExecutionEngine or
+// IR). This is a workaround for ExecutionEngine's broken layering, and will
+// be removed in the future.
+module LLVM_OrcSupport {
+ requires cplusplus
+
+ header "llvm/ExecutionEngine/Orc/Shared/OrcError.h"
+
+ export *
+}
+
+module LLVM_Pass {
+ module Pass {
+ // PassSupport.h and PassAnalysisSupport.h are made available only through
+ // Pass.h.
+ header "llvm/Pass.h"
+ textual header "llvm/PassSupport.h"
+ textual header "llvm/PassAnalysisSupport.h"
+ export *
+ }
+
+ module PassRegistry { header "llvm/PassRegistry.h" export * }
+ module InitializePasses { header "llvm/InitializePasses.h" export * }
+}
+
+module LLVM_IR {
+ requires cplusplus
+
+ umbrella "llvm/IR"
+ module * { export * }
+
+ extern module LLVM_Extern_IR_Attributes_Gen "module.extern.modulemap"
+ extern module LLVM_Extern_IR_Intrinsics_Gen "module.extern.modulemap"
+ extern module LLVM_Extern_IR_Intrinsics_Enum "module.extern.modulemap"
+
+ // These are intended for (repeated) textual inclusion.
+ textual header "llvm/IR/ConstrainedOps.def"
+ textual header "llvm/IR/DebugInfoFlags.def"
+ textual header "llvm/IR/Instruction.def"
+ textual header "llvm/IR/Metadata.def"
+ textual header "llvm/IR/FixedMetadataKinds.def"
+ textual header "llvm/IR/Value.def"
+ textual header "llvm/IR/VPIntrinsics.def"
+ textual header "llvm/IR/RuntimeLibcalls.def"
+}
+
+module LLVM_IRReader {
+ requires cplusplus
+ umbrella "llvm/IRReader"
+ module * { export * }
+}
+
+module LLVM_LineEditor {
+ requires cplusplus
+ umbrella "llvm/LineEditor"
+ module * { export * }
+}
+
+module LLVM_LTO {
+ requires cplusplus
+ umbrella "llvm/LTO"
+ module * { export * }
+}
+
+module LLVM_MC {
+ requires cplusplus
+
+ umbrella "llvm/MC"
+ module * { export * }
+}
+
+module LLVM_Object {
+ requires cplusplus
+ umbrella "llvm/Object"
+ module * { export * }
+}
+
+module LLVM_Option {
+ requires cplusplus
+ umbrella "llvm/Option"
+ module * { export * }
+}
+
+module LLVM_ProfileData {
+ requires cplusplus
+
+ umbrella "llvm/ProfileData"
+ module * { export * }
+
+ textual header "llvm/ProfileData/InstrProfData.inc"
+ textual header "llvm/ProfileData/MemProfData.inc"
+ textual header "llvm/ProfileData/MIBEntryDef.inc"
+}
+
+// FIXME: Mislayered?
+module LLVM_Support_TargetRegistry {
+ requires cplusplus
+ header "llvm/Support/TargetRegistry.h"
+ export *
+}
+
+module LLVM_TableGen {
+ requires cplusplus
+ umbrella "llvm/TableGen"
+ module * { export * }
+}
+
+module LLVM_Transforms {
+ requires cplusplus
+ umbrella "llvm/Transforms"
+
+ module * { export * }
+
+ // Requires DEBUG_TYPE to be defined by including file.
+ exclude header "llvm/Transforms/Utils/InstructionWorklist.h"
+}
+
+extern module LLVM_Extern_Utils_DataTypes "module.extern.modulemap"
+
+// Build the module with the tablegen-generated files needed by the
+// TargetParser module before building the TargetParser module itself.
+module TargetParserGen {
+ module RISCVTargetParserDef {
+ header "llvm/TargetParser/RISCVTargetParser.h"
+ extern module LLVM_Extern_TargetParser_Gen "module.extern.modulemap"
+ export *
+ }
+}
+
+// A module covering ADT/ and Support/. These are intertwined and
+// codependent, and notionally form a single module.
+module LLVM_Utils {
+ module ADT {
+ requires cplusplus
+
+ umbrella "llvm/ADT"
+ module * { export * }
+ }
+
+ module Demangle {
+ requires cplusplus
+
+ umbrella "llvm/Demangle"
+ module * { export * }
+
+ textual header "llvm/Demangle/ItaniumNodes.def"
+ }
+
+ module Support {
+ requires cplusplus
+
+ umbrella "llvm/Support"
+ module * { export * }
+
+ // Exclude this; deprecated.
+ exclude header "llvm/Support/Host.h"
+
+ // Exclude this; it should only be used on Windows.
+ exclude header "llvm/Support/Windows/WindowsSupport.h"
+
+ // Exclude these; they are fundamentally non-modular.
+ exclude header "llvm/Support/PluginLoader.h"
+ exclude header "llvm/Support/Solaris/sys/regset.h"
+ textual header "llvm/Support/TargetOpcodes.def"
+
+ }
+
+ module TargetParser {
+ requires cplusplus
+
+ umbrella "llvm/TargetParser"
+ module * { export * }
+
+ // These are intended for textual inclusion.
+ textual header "llvm/TargetParser/ARMTargetParser.def"
+ textual header "llvm/TargetParser/CSKYTargetParser.def"
+ textual header "llvm/TargetParser/X86TargetParser.def"
+ textual header "llvm/TargetParser/LoongArchTargetParser.def"
+ }
+
+ // This part of the module is usable from both C and C++ code.
+ module ConvertUTF {
+ header "llvm/Support/ConvertUTF.h"
+ export *
+ }
+}
+
+// This is used for a $src == $build compilation. Otherwise we use
+// LLVM_Support_DataTypes_Build, defined in a module map that is
+// copied into the build area.
+module LLVM_Support_DataTypes_Src {
+ header "llvm/Support/DataTypes.h"
+ export *
+}
+
+module LLVM_WindowsManifest {
+ requires cplusplus
+ umbrella "llvm/WindowsManifest"
+ module * { export * }
+}
diff --git a/llvm/include/llvm/module.modulemap.build b/llvm/include/module.modulemap.build
index 162a262a00a7..2a5b23f2a412 100644
--- a/llvm/include/llvm/module.modulemap.build
+++ b/llvm/include/module.modulemap.build
@@ -1,13 +1,13 @@
// This is copied into the build area for a $src != $build compilation.
module LLVM_Support_DataTypes {
- header "Support/DataTypes.h"
+ header "llvm/Support/DataTypes.h"
export *
}
module LLVM_Config_ABI_Breaking {
- header "Config/abi-breaking.h"
+ header "llvm/Config/abi-breaking.h"
export *
}
module LLVM_Config_Config {
- header "Config/llvm-config.h"
+ header "llvm/Config/llvm-config.h"
export *
}
diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp
index 9e24f6b87bdb..7b2f91f5392a 100644
--- a/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -227,12 +227,12 @@ ModRefInfo AAResults::getModRefInfo(const CallBase *Call,
// We can completely ignore inaccessible memory here, because MemoryLocations
// can only reference accessible memory.
auto ME = getMemoryEffects(Call, AAQI)
- .getWithoutLoc(MemoryEffects::InaccessibleMem);
+ .getWithoutLoc(IRMemLocation::InaccessibleMem);
if (ME.doesNotAccessMemory())
return ModRefInfo::NoModRef;
- ModRefInfo ArgMR = ME.getModRef(MemoryEffects::ArgMem);
- ModRefInfo OtherMR = ME.getWithoutLoc(MemoryEffects::ArgMem).getModRef();
+ ModRefInfo ArgMR = ME.getModRef(IRMemLocation::ArgMem);
+ ModRefInfo OtherMR = ME.getWithoutLoc(IRMemLocation::ArgMem).getModRef();
if ((ArgMR | OtherMR) != OtherMR) {
// Refine the modref info for argument memory. We only bother to do this
// if ArgMR is not a subset of OtherMR, otherwise this won't have an impact
@@ -442,15 +442,15 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, ModRefInfo MR) {
}
raw_ostream &llvm::operator<<(raw_ostream &OS, MemoryEffects ME) {
- for (MemoryEffects::Location Loc : MemoryEffects::locations()) {
+ for (IRMemLocation Loc : MemoryEffects::locations()) {
switch (Loc) {
- case MemoryEffects::ArgMem:
+ case IRMemLocation::ArgMem:
OS << "ArgMem: ";
break;
- case MemoryEffects::InaccessibleMem:
+ case IRMemLocation::InaccessibleMem:
OS << "InaccessibleMem: ";
break;
- case MemoryEffects::Other:
+ case IRMemLocation::Other:
OS << "Other: ";
break;
}
@@ -768,10 +768,6 @@ INITIALIZE_PASS_DEPENDENCY(TypeBasedAAWrapperPass)
INITIALIZE_PASS_END(AAResultsWrapperPass, "aa",
"Function Alias Analysis Results", false, true)
-FunctionPass *llvm::createAAResultsWrapperPass() {
- return new AAResultsWrapperPass();
-}
-
/// Run the wrapper pass to rebuild an aggregation over known AA passes.
///
/// This is the legacy pass manager's interface to the new-style AA results
@@ -840,29 +836,6 @@ AAManager::Result AAManager::run(Function &F, FunctionAnalysisManager &AM) {
return R;
}
-AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F,
- BasicAAResult &BAR) {
- AAResults AAR(P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F));
-
- // Add in our explicitly constructed BasicAA results.
- if (!DisableBasicAA)
- AAR.addAAResult(BAR);
-
- // Populate the results with the other currently available AAs.
- if (auto *WrapperPass =
- P.getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>())
- AAR.addAAResult(WrapperPass->getResult());
- if (auto *WrapperPass = P.getAnalysisIfAvailable<TypeBasedAAWrapperPass>())
- AAR.addAAResult(WrapperPass->getResult());
- if (auto *WrapperPass = P.getAnalysisIfAvailable<GlobalsAAWrapperPass>())
- AAR.addAAResult(WrapperPass->getResult());
- if (auto *WrapperPass = P.getAnalysisIfAvailable<ExternalAAWrapperPass>())
- if (WrapperPass->CB)
- WrapperPass->CB(P, F, AAR);
-
- return AAR;
-}
-
bool llvm::isNoAliasCall(const Value *V) {
if (const auto *Call = dyn_cast<CallBase>(V))
return Call->hasRetAttr(Attribute::NoAlias);
@@ -935,14 +908,3 @@ bool llvm::isNotVisibleOnUnwind(const Value *Object,
return false;
}
-
-void llvm::getAAResultsAnalysisUsage(AnalysisUsage &AU) {
- // This function needs to be in sync with llvm::createLegacyPMAAResults -- if
- // more alias analyses are added to llvm::createLegacyPMAAResults, they need
- // to be added here also.
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addUsedIfAvailable<ScopedNoAliasAAWrapperPass>();
- AU.addUsedIfAvailable<TypeBasedAAWrapperPass>();
- AU.addUsedIfAvailable<GlobalsAAWrapperPass>();
- AU.addUsedIfAvailable<ExternalAAWrapperPass>();
-}
diff --git a/llvm/lib/Analysis/AliasAnalysisSummary.cpp b/llvm/lib/Analysis/AliasAnalysisSummary.cpp
deleted file mode 100644
index a91791c0b4d5..000000000000
--- a/llvm/lib/Analysis/AliasAnalysisSummary.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-#include "AliasAnalysisSummary.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/Compiler.h"
-
-namespace llvm {
-namespace cflaa {
-
-namespace {
-const unsigned AttrEscapedIndex = 0;
-const unsigned AttrUnknownIndex = 1;
-const unsigned AttrGlobalIndex = 2;
-const unsigned AttrCallerIndex = 3;
-const unsigned AttrFirstArgIndex = 4;
-const unsigned AttrLastArgIndex = NumAliasAttrs;
-const unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
-
-// It would be *slightly* prettier if we changed these to AliasAttrs, but it
-// seems that both GCC and MSVC emit dynamic initializers for const bitsets.
-using AliasAttr = unsigned;
-const AliasAttr AttrNone = 0;
-const AliasAttr AttrEscaped = 1 << AttrEscapedIndex;
-const AliasAttr AttrUnknown = 1 << AttrUnknownIndex;
-const AliasAttr AttrGlobal = 1 << AttrGlobalIndex;
-const AliasAttr AttrCaller = 1 << AttrCallerIndex;
-const AliasAttr ExternalAttrMask = AttrEscaped | AttrUnknown | AttrGlobal;
-}
-
-AliasAttrs getAttrNone() { return AttrNone; }
-
-AliasAttrs getAttrUnknown() { return AttrUnknown; }
-bool hasUnknownAttr(AliasAttrs Attr) { return Attr.test(AttrUnknownIndex); }
-
-AliasAttrs getAttrCaller() { return AttrCaller; }
-bool hasCallerAttr(AliasAttrs Attr) { return Attr.test(AttrCaller); }
-bool hasUnknownOrCallerAttr(AliasAttrs Attr) {
- return Attr.test(AttrUnknownIndex) || Attr.test(AttrCallerIndex);
-}
-
-AliasAttrs getAttrEscaped() { return AttrEscaped; }
-bool hasEscapedAttr(AliasAttrs Attr) { return Attr.test(AttrEscapedIndex); }
-
-static AliasAttr argNumberToAttr(unsigned ArgNum) {
- if (ArgNum >= AttrMaxNumArgs)
- return AttrUnknown;
- // N.B. MSVC complains if we use `1U` here, since AliasAttr' ctor takes
- // an unsigned long long.
- return AliasAttr(1ULL << (ArgNum + AttrFirstArgIndex));
-}
-
-AliasAttrs getGlobalOrArgAttrFromValue(const Value &Val) {
- if (isa<GlobalValue>(Val))
- return AttrGlobal;
-
- if (auto *Arg = dyn_cast<Argument>(&Val))
- // Only pointer arguments should have the argument attribute,
- // because things can't escape through scalars without us seeing a
- // cast, and thus, interaction with them doesn't matter.
- if (!Arg->hasNoAliasAttr() && Arg->getType()->isPointerTy())
- return argNumberToAttr(Arg->getArgNo());
- return AttrNone;
-}
-
-bool isGlobalOrArgAttr(AliasAttrs Attr) {
- return Attr.reset(AttrEscapedIndex)
- .reset(AttrUnknownIndex)
- .reset(AttrCallerIndex)
- .any();
-}
-
-AliasAttrs getExternallyVisibleAttrs(AliasAttrs Attr) {
- return Attr & AliasAttrs(ExternalAttrMask);
-}
-
-std::optional<InstantiatedValue>
-instantiateInterfaceValue(InterfaceValue IValue, CallBase &Call) {
- auto Index = IValue.Index;
- auto *V = (Index == 0) ? &Call : Call.getArgOperand(Index - 1);
- if (V->getType()->isPointerTy())
- return InstantiatedValue{V, IValue.DerefLevel};
- return std::nullopt;
-}
-
-std::optional<InstantiatedRelation>
-instantiateExternalRelation(ExternalRelation ERelation, CallBase &Call) {
- auto From = instantiateInterfaceValue(ERelation.From, Call);
- if (!From)
- return std::nullopt;
- auto To = instantiateInterfaceValue(ERelation.To, Call);
- if (!To)
- return std::nullopt;
- return InstantiatedRelation{*From, *To, ERelation.Offset};
-}
-
-std::optional<InstantiatedAttr>
-instantiateExternalAttribute(ExternalAttribute EAttr, CallBase &Call) {
- auto Value = instantiateInterfaceValue(EAttr.IValue, Call);
- if (!Value)
- return std::nullopt;
- return InstantiatedAttr{*Value, EAttr.Attr};
-}
-}
-}
diff --git a/llvm/lib/Analysis/AliasAnalysisSummary.h b/llvm/lib/Analysis/AliasAnalysisSummary.h
deleted file mode 100644
index ab337bad22c7..000000000000
--- a/llvm/lib/Analysis/AliasAnalysisSummary.h
+++ /dev/null
@@ -1,268 +0,0 @@
-//=====- CFLSummary.h - Abstract stratified sets implementation. --------=====//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-/// \file
-/// This file defines various utility types and functions useful to
-/// summary-based alias analysis.
-///
-/// Summary-based analysis, also known as bottom-up analysis, is a style of
-/// interprocedrual static analysis that tries to analyze the callees before the
-/// callers get analyzed. The key idea of summary-based analysis is to first
-/// process each function independently, outline its behavior in a condensed
-/// summary, and then instantiate the summary at the callsite when the said
-/// function is called elsewhere. This is often in contrast to another style
-/// called top-down analysis, in which callers are always analyzed first before
-/// the callees.
-///
-/// In a summary-based analysis, functions must be examined independently and
-/// out-of-context. We have no information on the state of the memory, the
-/// arguments, the global values, and anything else external to the function. To
-/// carry out the analysis conservative assumptions have to be made about those
-/// external states. In exchange for the potential loss of precision, the
-/// summary we obtain this way is highly reusable, which makes the analysis
-/// easier to scale to large programs even if carried out context-sensitively.
-///
-/// Currently, all CFL-based alias analyses adopt the summary-based approach
-/// and therefore heavily rely on this header.
-///
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ANALYSIS_ALIASANALYSISSUMMARY_H
-#define LLVM_ANALYSIS_ALIASANALYSISSUMMARY_H
-
-#include "llvm/ADT/DenseMapInfo.h"
-#include "llvm/ADT/SmallVector.h"
-#include <bitset>
-#include <optional>
-
-namespace llvm {
-
-class CallBase;
-class Value;
-
-namespace cflaa {
-
-//===----------------------------------------------------------------------===//
-// AliasAttr related stuffs
-//===----------------------------------------------------------------------===//
-
-/// The number of attributes that AliasAttr should contain. Attributes are
-/// described below, and 32 was an arbitrary choice because it fits nicely in 32
-/// bits (because we use a bitset for AliasAttr).
-static const unsigned NumAliasAttrs = 32;
-
-/// These are attributes that an alias analysis can use to mark certain special
-/// properties of a given pointer. Refer to the related functions below to see
-/// what kinds of attributes are currently defined.
-typedef std::bitset<NumAliasAttrs> AliasAttrs;
-
-/// Attr represent whether the said pointer comes from an unknown source
-/// (such as opaque memory or an integer cast).
-AliasAttrs getAttrNone();
-
-/// AttrUnknown represent whether the said pointer comes from a source not known
-/// to alias analyses (such as opaque memory or an integer cast).
-AliasAttrs getAttrUnknown();
-bool hasUnknownAttr(AliasAttrs);
-
-/// AttrCaller represent whether the said pointer comes from a source not known
-/// to the current function but known to the caller. Values pointed to by the
-/// arguments of the current function have this attribute set
-AliasAttrs getAttrCaller();
-bool hasCallerAttr(AliasAttrs);
-bool hasUnknownOrCallerAttr(AliasAttrs);
-
-/// AttrEscaped represent whether the said pointer comes from a known source but
-/// escapes to the unknown world (e.g. casted to an integer, or passed as an
-/// argument to opaque function). Unlike non-escaped pointers, escaped ones may
-/// alias pointers coming from unknown sources.
-AliasAttrs getAttrEscaped();
-bool hasEscapedAttr(AliasAttrs);
-
-/// AttrGlobal represent whether the said pointer is a global value.
-/// AttrArg represent whether the said pointer is an argument, and if so, what
-/// index the argument has.
-AliasAttrs getGlobalOrArgAttrFromValue(const Value &);
-bool isGlobalOrArgAttr(AliasAttrs);
-
-/// Given an AliasAttrs, return a new AliasAttrs that only contains attributes
-/// meaningful to the caller. This function is primarily used for
-/// interprocedural analysis
-/// Currently, externally visible AliasAttrs include AttrUnknown, AttrGlobal,
-/// and AttrEscaped
-AliasAttrs getExternallyVisibleAttrs(AliasAttrs);
-
-//===----------------------------------------------------------------------===//
-// Function summary related stuffs
-//===----------------------------------------------------------------------===//
-
-/// The maximum number of arguments we can put into a summary.
-static const unsigned MaxSupportedArgsInSummary = 50;
-
-/// We use InterfaceValue to describe parameters/return value, as well as
-/// potential memory locations that are pointed to by parameters/return value,
-/// of a function.
-/// Index is an integer which represents a single parameter or a return value.
-/// When the index is 0, it refers to the return value. Non-zero index i refers
-/// to the i-th parameter.
-/// DerefLevel indicates the number of dereferences one must perform on the
-/// parameter/return value to get this InterfaceValue.
-struct InterfaceValue {
- unsigned Index;
- unsigned DerefLevel;
-};
-
-inline bool operator==(InterfaceValue LHS, InterfaceValue RHS) {
- return LHS.Index == RHS.Index && LHS.DerefLevel == RHS.DerefLevel;
-}
-inline bool operator!=(InterfaceValue LHS, InterfaceValue RHS) {
- return !(LHS == RHS);
-}
-inline bool operator<(InterfaceValue LHS, InterfaceValue RHS) {
- return LHS.Index < RHS.Index ||
- (LHS.Index == RHS.Index && LHS.DerefLevel < RHS.DerefLevel);
-}
-inline bool operator>(InterfaceValue LHS, InterfaceValue RHS) {
- return RHS < LHS;
-}
-inline bool operator<=(InterfaceValue LHS, InterfaceValue RHS) {
- return !(RHS < LHS);
-}
-inline bool operator>=(InterfaceValue LHS, InterfaceValue RHS) {
- return !(LHS < RHS);
-}
-
-// We use UnknownOffset to represent pointer offsets that cannot be determined
-// at compile time. Note that MemoryLocation::UnknownSize cannot be used here
-// because we require a signed value.
-static const int64_t UnknownOffset = INT64_MAX;
-
-inline int64_t addOffset(int64_t LHS, int64_t RHS) {
- if (LHS == UnknownOffset || RHS == UnknownOffset)
- return UnknownOffset;
- // FIXME: Do we need to guard against integer overflow here?
- return LHS + RHS;
-}
-
-/// We use ExternalRelation to describe an externally visible aliasing relations
-/// between parameters/return value of a function.
-struct ExternalRelation {
- InterfaceValue From, To;
- int64_t Offset;
-};
-
-inline bool operator==(ExternalRelation LHS, ExternalRelation RHS) {
- return LHS.From == RHS.From && LHS.To == RHS.To && LHS.Offset == RHS.Offset;
-}
-inline bool operator!=(ExternalRelation LHS, ExternalRelation RHS) {
- return !(LHS == RHS);
-}
-inline bool operator<(ExternalRelation LHS, ExternalRelation RHS) {
- if (LHS.From < RHS.From)
- return true;
- if (LHS.From > RHS.From)
- return false;
- if (LHS.To < RHS.To)
- return true;
- if (LHS.To > RHS.To)
- return false;
- return LHS.Offset < RHS.Offset;
-}
-inline bool operator>(ExternalRelation LHS, ExternalRelation RHS) {
- return RHS < LHS;
-}
-inline bool operator<=(ExternalRelation LHS, ExternalRelation RHS) {
- return !(RHS < LHS);
-}
-inline bool operator>=(ExternalRelation LHS, ExternalRelation RHS) {
- return !(LHS < RHS);
-}
-
-/// We use ExternalAttribute to describe an externally visible AliasAttrs
-/// for parameters/return value.
-struct ExternalAttribute {
- InterfaceValue IValue;
- AliasAttrs Attr;
-};
-
-/// AliasSummary is just a collection of ExternalRelation and ExternalAttribute
-struct AliasSummary {
- // RetParamRelations is a collection of ExternalRelations.
- SmallVector<ExternalRelation, 8> RetParamRelations;
-
- // RetParamAttributes is a collection of ExternalAttributes.
- SmallVector<ExternalAttribute, 8> RetParamAttributes;
-};
-
-/// This is the result of instantiating InterfaceValue at a particular call
-struct InstantiatedValue {
- Value *Val;
- unsigned DerefLevel;
-};
-std::optional<InstantiatedValue>
-instantiateInterfaceValue(InterfaceValue IValue, CallBase &Call);
-
-inline bool operator==(InstantiatedValue LHS, InstantiatedValue RHS) {
- return LHS.Val == RHS.Val && LHS.DerefLevel == RHS.DerefLevel;
-}
-inline bool operator!=(InstantiatedValue LHS, InstantiatedValue RHS) {
- return !(LHS == RHS);
-}
-inline bool operator<(InstantiatedValue LHS, InstantiatedValue RHS) {
- return std::less<Value *>()(LHS.Val, RHS.Val) ||
- (LHS.Val == RHS.Val && LHS.DerefLevel < RHS.DerefLevel);
-}
-inline bool operator>(InstantiatedValue LHS, InstantiatedValue RHS) {
- return RHS < LHS;
-}
-inline bool operator<=(InstantiatedValue LHS, InstantiatedValue RHS) {
- return !(RHS < LHS);
-}
-inline bool operator>=(InstantiatedValue LHS, InstantiatedValue RHS) {
- return !(LHS < RHS);
-}
-
-/// This is the result of instantiating ExternalRelation at a particular
-/// callsite
-struct InstantiatedRelation {
- InstantiatedValue From, To;
- int64_t Offset;
-};
-std::optional<InstantiatedRelation>
-instantiateExternalRelation(ExternalRelation ERelation, CallBase &Call);
-
-/// This is the result of instantiating ExternalAttribute at a particular
-/// callsite
-struct InstantiatedAttr {
- InstantiatedValue IValue;
- AliasAttrs Attr;
-};
-std::optional<InstantiatedAttr>
-instantiateExternalAttribute(ExternalAttribute EAttr, CallBase &Call);
-}
-
-template <> struct DenseMapInfo<cflaa::InstantiatedValue> {
- static inline cflaa::InstantiatedValue getEmptyKey() {
- return cflaa::InstantiatedValue{DenseMapInfo<Value *>::getEmptyKey(),
- DenseMapInfo<unsigned>::getEmptyKey()};
- }
- static inline cflaa::InstantiatedValue getTombstoneKey() {
- return cflaa::InstantiatedValue{DenseMapInfo<Value *>::getTombstoneKey(),
- DenseMapInfo<unsigned>::getTombstoneKey()};
- }
- static unsigned getHashValue(const cflaa::InstantiatedValue &IV) {
- return DenseMapInfo<std::pair<Value *, unsigned>>::getHashValue(
- std::make_pair(IV.Val, IV.DerefLevel));
- }
- static bool isEqual(const cflaa::InstantiatedValue &LHS,
- const cflaa::InstantiatedValue &RHS) {
- return LHS.Val == RHS.Val && LHS.DerefLevel == RHS.DerefLevel;
- }
-};
-}
-
-#endif
diff --git a/llvm/lib/Analysis/AliasSetTracker.cpp b/llvm/lib/Analysis/AliasSetTracker.cpp
index 1c9ebadf3649..91b889116dfa 100644
--- a/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/MemoryLocation.h"
diff --git a/llvm/lib/Analysis/Analysis.cpp b/llvm/lib/Analysis/Analysis.cpp
index c1b843d74600..5461ce07af0b 100644
--- a/llvm/lib/Analysis/Analysis.cpp
+++ b/llvm/lib/Analysis/Analysis.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm-c/Analysis.h"
-#include "llvm-c/Initialization.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
@@ -35,7 +34,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeCycleInfoWrapperPassPass(Registry);
initializeDependenceAnalysisWrapperPassPass(Registry);
initializeDelinearizationPass(Registry);
- initializeDemandedBitsWrapperPassPass(Registry);
initializeDominanceFrontierWrapperPassPass(Registry);
initializeDomViewerWrapperPassPass(Registry);
initializeDomPrinterWrapperPassPass(Registry);
@@ -55,16 +53,9 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeLazyBlockFrequencyInfoPassPass(Registry);
initializeLazyValueInfoWrapperPassPass(Registry);
initializeLazyValueInfoPrinterPass(Registry);
- initializeLegacyDivergenceAnalysisPass(Registry);
- initializeLintLegacyPassPass(Registry);
initializeLoopInfoWrapperPassPass(Registry);
- initializeMemDepPrinterPass(Registry);
- initializeMemDerefPrinterPass(Registry);
initializeMemoryDependenceWrapperPassPass(Registry);
- initializeModuleDebugInfoLegacyPrinterPass(Registry);
initializeModuleSummaryIndexWrapperPassPass(Registry);
- initializeMustExecutePrinterPass(Registry);
- initializeMustBeExecutedContextPrinterPass(Registry);
initializeOptimizationRemarkEmitterWrapperPassPass(Registry);
initializePhiValuesWrapperPassPass(Registry);
initializePostDominatorTreeWrapperPassPass(Registry);
@@ -82,15 +73,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
initializeScopedNoAliasAAWrapperPassPass(Registry);
initializeLCSSAVerificationPassPass(Registry);
initializeMemorySSAWrapperPassPass(Registry);
- initializeMemorySSAPrinterLegacyPassPass(Registry);
-}
-
-void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
- initializeAnalysis(*unwrap(R));
-}
-
-void LLVMInitializeIPA(LLVMPassRegistryRef R) {
- initializeAnalysis(*unwrap(R));
}
LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,
diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp
index 110cddb4a065..7440dbd29ccf 100644
--- a/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -162,7 +162,7 @@ llvm::getKnowledgeForValue(const Value *V,
return RetainedKnowledge::none();
if (AC) {
for (AssumptionCache::ResultElem &Elem : AC->assumptionsFor(V)) {
- auto *II = dyn_cast_or_null<AssumeInst>(Elem.Assume);
+ auto *II = cast_or_null<AssumeInst>(Elem.Assume);
if (!II || Elem.Index == AssumptionCache::ExprResultIdx)
continue;
if (RetainedKnowledge RK = getKnowledgeFromBundle(
diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp
index 2d648ccee46c..b439dc1e6a76 100644
--- a/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/llvm/lib/Analysis/AssumptionCache.cpp
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains a pass that keeps track of @llvm.assume and
-// @llvm.experimental.guard intrinsics in the functions of a module.
+// This file contains a pass that keeps track of @llvm.assume intrinsics in
+// the functions of a module.
//
//===----------------------------------------------------------------------===//
@@ -87,7 +87,7 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
AddAffected(Cond);
CmpInst::Predicate Pred;
- if (match(Cond, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
+ if (match(Cond, m_Cmp(Pred, m_Value(A), m_Value(B)))) {
AddAffected(A);
AddAffected(B);
@@ -128,7 +128,18 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
if (match(A, m_Add(m_Value(X), m_ConstantInt())) &&
match(B, m_ConstantInt()))
AddAffected(X);
+ } else if (CmpInst::isFPPredicate(Pred)) {
+ // fcmp fneg(x), y
+ // fcmp fabs(x), y
+ // fcmp fneg(fabs(x)), y
+ if (match(A, m_FNeg(m_Value(A))))
+ AddAffected(A);
+ if (match(A, m_FAbs(m_Value(A))))
+ AddAffected(A);
}
+ } else if (match(Cond, m_Intrinsic<Intrinsic::is_fpclass>(m_Value(A),
+ m_Value(B)))) {
+ AddAffected(A);
}
if (TTI) {
@@ -140,7 +151,7 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
}
}
-void AssumptionCache::updateAffectedValues(CondGuardInst *CI) {
+void AssumptionCache::updateAffectedValues(AssumeInst *CI) {
SmallVector<AssumptionCache::ResultElem, 16> Affected;
findAffectedValues(CI, TTI, Affected);
@@ -153,7 +164,7 @@ void AssumptionCache::updateAffectedValues(CondGuardInst *CI) {
}
}
-void AssumptionCache::unregisterAssumption(CondGuardInst *CI) {
+void AssumptionCache::unregisterAssumption(AssumeInst *CI) {
SmallVector<AssumptionCache::ResultElem, 16> Affected;
findAffectedValues(CI, TTI, Affected);
@@ -217,7 +228,7 @@ void AssumptionCache::scanFunction() {
// to this cache.
for (BasicBlock &B : F)
for (Instruction &I : B)
- if (isa<CondGuardInst>(&I))
+ if (isa<AssumeInst>(&I))
AssumeHandles.push_back({&I, ExprResultIdx});
// Mark the scan as complete.
@@ -225,10 +236,10 @@ void AssumptionCache::scanFunction() {
// Update affected values.
for (auto &A : AssumeHandles)
- updateAffectedValues(cast<CondGuardInst>(A));
+ updateAffectedValues(cast<AssumeInst>(A));
}
-void AssumptionCache::registerAssumption(CondGuardInst *CI) {
+void AssumptionCache::registerAssumption(AssumeInst *CI) {
// If we haven't scanned the function yet, just drop this assumption. It will
// be found when we scan later.
if (!Scanned)
@@ -238,9 +249,9 @@ void AssumptionCache::registerAssumption(CondGuardInst *CI) {
#ifndef NDEBUG
assert(CI->getParent() &&
- "Cannot a register CondGuardInst not in a basic block");
+ "Cannot register @llvm.assume call not in a basic block");
assert(&F == CI->getParent()->getParent() &&
- "Cannot a register CondGuardInst not in this function");
+ "Cannot register @llvm.assume call not in this function");
// We expect the number of assumptions to be small, so in an asserts build
// check that we don't accumulate duplicates and that all assumptions point
@@ -252,8 +263,8 @@ void AssumptionCache::registerAssumption(CondGuardInst *CI) {
assert(&F == cast<Instruction>(VH)->getParent()->getParent() &&
"Cached assumption not inside this function!");
- assert(isa<CondGuardInst>(VH) &&
- "Cached something other than CondGuardInst!");
+ assert(match(cast<CallInst>(VH), m_Intrinsic<Intrinsic::assume>()) &&
+ "Cached something other than a call to @llvm.assume!");
assert(AssumptionSet.insert(VH).second &&
"Cache contains multiple copies of a call!");
}
diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index dc728c1cbfeb..16e0e1f66524 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -461,6 +461,17 @@ struct VariableGEPIndex {
/// True if all operations in this expression are NSW.
bool IsNSW;
+ /// True if the index should be subtracted rather than added. We don't simply
+ /// negate the Scale, to avoid losing the NSW flag: X - INT_MIN*1 may be
+ /// non-wrapping, while X + INT_MIN*(-1) wraps.
+ bool IsNegated;
+
+ bool hasNegatedScaleOf(const VariableGEPIndex &Other) const {
+ if (IsNegated == Other.IsNegated)
+ return Scale == -Other.Scale;
+ return Scale == Other.Scale;
+ }
+
void dump() const {
print(dbgs());
dbgs() << "\n";
@@ -470,7 +481,9 @@ struct VariableGEPIndex {
<< ", zextbits=" << Val.ZExtBits
<< ", sextbits=" << Val.SExtBits
<< ", truncbits=" << Val.TruncBits
- << ", scale=" << Scale << ")";
+ << ", scale=" << Scale
+ << ", nsw=" << IsNSW
+ << ", negated=" << IsNegated << ")";
}
};
}
@@ -659,7 +672,8 @@ BasicAAResult::DecomposeGEPExpression(const Value *V, const DataLayout &DL,
Scale = adjustToIndexSize(Scale, IndexSize);
if (!!Scale) {
- VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW};
+ VariableGEPIndex Entry = {LE.Val, Scale, CxtI, LE.IsNSW,
+ /* IsNegated */ false};
Decomposed.VarIndices.push_back(Entry);
}
}
@@ -864,9 +878,11 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
if (!AI->isStaticAlloca() && isIntrinsicCall(Call, Intrinsic::stackrestore))
return ModRefInfo::Mod;
- // If the pointer is to a locally allocated object that does not escape,
- // then the call can not mod/ref the pointer unless the call takes the pointer
- // as an argument, and itself doesn't capture it.
+ // A call can access a locally allocated object either because it is passed as
+ // an argument to the call, or because it has escaped prior to the call.
+ //
+ // Make sure the object has not escaped here, and then check that none of the
+ // call arguments alias the object below.
if (!isa<Constant>(Object) && Call != Object &&
AAQI.CI->isNotCapturedBeforeOrAt(Object, Call)) {
@@ -877,12 +893,7 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
unsigned OperandNo = 0;
for (auto CI = Call->data_operands_begin(), CE = Call->data_operands_end();
CI != CE; ++CI, ++OperandNo) {
- // Only look at the no-capture or byval pointer arguments. If this
- // pointer were passed to arguments that were neither of these, then it
- // couldn't be no-capture.
- if (!(*CI)->getType()->isPointerTy() ||
- (!Call->doesNotCapture(OperandNo) && OperandNo < Call->arg_size() &&
- !Call->isByValArgument(OperandNo)))
+ if (!(*CI)->getType()->isPointerTy())
continue;
// Call doesn't access memory through this operand, so we don't care
@@ -1134,8 +1145,8 @@ AliasResult BasicAAResult::aliasGEP(
const APInt &Scale = Index.Scale;
APInt ScaleForGCD = Scale;
if (!Index.IsNSW)
- ScaleForGCD = APInt::getOneBitSet(Scale.getBitWidth(),
- Scale.countTrailingZeros());
+ ScaleForGCD =
+ APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
if (i == 0)
GCD = ScaleForGCD.abs();
@@ -1154,9 +1165,14 @@ AliasResult BasicAAResult::aliasGEP(
assert(OffsetRange.getBitWidth() == Scale.getBitWidth() &&
"Bit widths are normalized to MaxIndexSize");
if (Index.IsNSW)
- OffsetRange = OffsetRange.add(CR.smul_sat(ConstantRange(Scale)));
+ CR = CR.smul_sat(ConstantRange(Scale));
+ else
+ CR = CR.smul_fast(ConstantRange(Scale));
+
+ if (Index.IsNegated)
+ OffsetRange = OffsetRange.sub(CR);
else
- OffsetRange = OffsetRange.add(CR.smul_fast(ConstantRange(Scale)));
+ OffsetRange = OffsetRange.add(CR);
}
// We now have accesses at two offsets from the same base:
@@ -1223,7 +1239,7 @@ AliasResult BasicAAResult::aliasGEP(
// inequality of values across loop iterations.
const VariableGEPIndex &Var0 = DecompGEP1.VarIndices[0];
const VariableGEPIndex &Var1 = DecompGEP1.VarIndices[1];
- if (Var0.Scale == -Var1.Scale && Var0.Val.TruncBits == 0 &&
+ if (Var0.hasNegatedScaleOf(Var1) && Var0.Val.TruncBits == 0 &&
Var0.Val.hasSameCastsAs(Var1.Val) && !AAQI.MayBeCrossIteration &&
isKnownNonEqual(Var0.Val.V, Var1.Val.V, DL, &AC, /* CxtI */ nullptr,
DT))
@@ -1516,6 +1532,8 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
assert(OBU.Inputs.size() == 2);
const Value *Hint1 = OBU.Inputs[0].get();
const Value *Hint2 = OBU.Inputs[1].get();
+ // This is often a no-op; instcombine rewrites this for us. No-op
+ // getUnderlyingObject calls are fast, though.
const Value *HintO1 = getUnderlyingObject(Hint1);
const Value *HintO2 = getUnderlyingObject(Hint2);
@@ -1702,6 +1720,13 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
!Dest.Val.hasSameCastsAs(Src.Val))
continue;
+ // Normalize IsNegated if we're going to lose the NSW flag anyway.
+ if (Dest.IsNegated) {
+ Dest.Scale = -Dest.Scale;
+ Dest.IsNegated = false;
+ Dest.IsNSW = false;
+ }
+
// If we found it, subtract off Scale V's from the entry in Dest. If it
// goes to zero, remove the entry.
if (Dest.Scale != Src.Scale) {
@@ -1716,7 +1741,8 @@ void BasicAAResult::subtractDecomposedGEPs(DecomposedGEP &DestGEP,
// If we didn't consume this entry, add it to the end of the Dest list.
if (!Found) {
- VariableGEPIndex Entry = {Src.Val, -Src.Scale, Src.CxtI, Src.IsNSW};
+ VariableGEPIndex Entry = {Src.Val, Src.Scale, Src.CxtI, Src.IsNSW,
+ /* IsNegated */ true};
DestGEP.VarIndices.push_back(Entry);
}
}
@@ -1738,7 +1764,7 @@ bool BasicAAResult::constantOffsetHeuristic(const DecomposedGEP &GEP,
const VariableGEPIndex &Var0 = GEP.VarIndices[0], &Var1 = GEP.VarIndices[1];
if (Var0.Val.TruncBits != 0 || !Var0.Val.hasSameCastsAs(Var1.Val) ||
- Var0.Scale != -Var1.Scale ||
+ !Var0.hasNegatedScaleOf(Var1) ||
Var0.Val.V->getType() != Var1.Val.V->getType())
return false;
@@ -1825,10 +1851,3 @@ void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequiredTransitive<DominatorTreeWrapperPass>();
AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
}
-
-BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) {
- return BasicAAResult(
- F.getParent()->getDataLayout(), F,
- P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
- P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F));
-}
diff --git a/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index dd84336da604..b18d04cc73db 100644
--- a/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -333,9 +333,10 @@ bool BlockFrequencyInfoWrapperPass::runOnFunction(Function &F) {
AnalysisKey BlockFrequencyAnalysis::Key;
BlockFrequencyInfo BlockFrequencyAnalysis::run(Function &F,
FunctionAnalysisManager &AM) {
+ auto &BP = AM.getResult<BranchProbabilityAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
BlockFrequencyInfo BFI;
- BFI.calculate(F, AM.getResult<BranchProbabilityAnalysis>(F),
- AM.getResult<LoopAnalysis>(F));
+ BFI.calculate(F, BP, LI);
return BFI;
}
diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 0945c5688f1f..82b1e3b9eede 100644
--- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/BlockFrequency.h"
@@ -59,7 +60,7 @@ cl::opt<double> IterativeBFIPrecision(
"iterative-bfi-precision", cl::init(1e-12), cl::Hidden,
cl::desc("Iterative inference: delta convergence precision; smaller values "
"typically lead to better results at the cost of worsen runtime"));
-}
+} // namespace llvm
ScaledNumber<uint64_t> BlockMass::toScaled() const {
if (isFull())
@@ -256,7 +257,7 @@ void Distribution::normalize() {
if (DidOverflow)
Shift = 33;
else if (Total > UINT32_MAX)
- Shift = 33 - countLeadingZeros(Total);
+ Shift = 33 - llvm::countl_zero(Total);
// Early exit if nothing needs to be scaled.
if (!Shift) {
diff --git a/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index 7931001d0a2b..b45deccd913d 100644
--- a/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -1163,7 +1163,7 @@ void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock *Src,
assert(NumSuccessors == Dst->getTerminator()->getNumSuccessors());
if (NumSuccessors == 0)
return; // Nothing to set.
- if (this->Probs.find(std::make_pair(Src, 0)) == this->Probs.end())
+ if (!this->Probs.contains(std::make_pair(Src, 0)))
return; // No probability is set for edges from Src. Keep the same for Dst.
Handles.insert(BasicBlockCallbackVH(Dst, this));
@@ -1175,6 +1175,14 @@ void BranchProbabilityInfo::copyEdgeProbabilities(BasicBlock *Src,
}
}
+void BranchProbabilityInfo::swapSuccEdgesProbabilities(const BasicBlock *Src) {
+ assert(Src->getTerminator()->getNumSuccessors() == 2);
+ if (!Probs.contains(std::make_pair(Src, 0)))
+ return; // No probability is set for edges from Src
+ assert(Probs.contains(std::make_pair(Src, 1)));
+ std::swap(Probs[std::make_pair(Src, 0)], Probs[std::make_pair(Src, 1)]);
+}
+
raw_ostream &
BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,
const BasicBlock *Src,
@@ -1303,11 +1311,12 @@ void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS,
AnalysisKey BranchProbabilityAnalysis::Key;
BranchProbabilityInfo
BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
BranchProbabilityInfo BPI;
- BPI.calculate(F, AM.getResult<LoopAnalysis>(F),
- &AM.getResult<TargetLibraryAnalysis>(F),
- &AM.getResult<DominatorTreeAnalysis>(F),
- &AM.getResult<PostDominatorTreeAnalysis>(F));
+ BPI.calculate(F, LI, &TLI, &DT, &PDT);
return BPI;
}
diff --git a/llvm/lib/Analysis/CFGPrinter.cpp b/llvm/lib/Analysis/CFGPrinter.cpp
index f8eba1a00f28..f05dd6852d6d 100644
--- a/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/llvm/lib/Analysis/CFGPrinter.cpp
@@ -325,8 +325,7 @@ bool DOTGraphTraits<DOTFuncInfo *>::isNodeHidden(const BasicBlock *Node,
return true;
}
if (HideUnreachablePaths || HideDeoptimizePaths) {
- if (isOnDeoptOrUnreachablePath.find(Node) ==
- isOnDeoptOrUnreachablePath.end())
+ if (!isOnDeoptOrUnreachablePath.contains(Node))
computeDeoptOrUnreachablePaths(Node->getParent());
return isOnDeoptOrUnreachablePath[Node];
}
diff --git a/llvm/lib/Analysis/CGSCCPassManager.cpp b/llvm/lib/Analysis/CGSCCPassManager.cpp
index 2de19884014c..facb9c897da3 100644
--- a/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -86,11 +86,6 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
PreservedAnalyses PassPA = Pass->run(*C, AM, G, UR);
- if (UR.InvalidatedSCCs.count(C))
- PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA);
- else
- PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA);
-
// Update the SCC if necessary.
C = UR.UpdatedC ? UR.UpdatedC : C;
if (UR.UpdatedC) {
@@ -107,6 +102,7 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
// If the CGSCC pass wasn't able to provide a valid updated SCC, the
// current SCC may simply need to be skipped if invalid.
if (UR.InvalidatedSCCs.count(C)) {
+ PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA);
LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n");
break;
}
@@ -117,6 +113,8 @@ PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
// Update the analysis manager as each pass runs and potentially
// invalidates analyses.
AM.invalidate(*C, PassPA);
+
+ PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA);
}
// Before we mark all of *this* SCC's analyses as preserved below, intersect
@@ -276,11 +274,6 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
PreservedAnalyses PassPA = Pass->run(*C, CGAM, CG, UR);
- if (UR.InvalidatedSCCs.count(C))
- PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA);
- else
- PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA);
-
// Update the SCC and RefSCC if necessary.
C = UR.UpdatedC ? UR.UpdatedC : C;
@@ -301,6 +294,7 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
// If the CGSCC pass wasn't able to provide a valid updated SCC,
// the current SCC may simply need to be skipped if invalid.
if (UR.InvalidatedSCCs.count(C)) {
+ PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA);
LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n");
break;
}
@@ -316,6 +310,8 @@ ModuleToPostOrderCGSCCPassAdaptor::run(Module &M, ModuleAnalysisManager &AM) {
// processed.
CGAM.invalidate(*C, PassPA);
+ PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA);
+
// The pass may have restructured the call graph and refined the
// current SCC and/or RefSCC. We need to update our current SCC and
// RefSCC pointers to follow these. Also, when the current SCC is
@@ -408,25 +404,27 @@ PreservedAnalyses DevirtSCCRepeatedPass::run(LazyCallGraph::SCC &InitialC,
PreservedAnalyses PassPA = Pass->run(*C, AM, CG, UR);
- if (UR.InvalidatedSCCs.count(C))
- PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA);
- else
- PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA);
-
PA.intersect(PassPA);
- // If the SCC structure has changed, bail immediately and let the outer
- // CGSCC layer handle any iteration to reflect the refined structure.
- if (UR.UpdatedC && UR.UpdatedC != C)
- break;
-
// If the CGSCC pass wasn't able to provide a valid updated SCC, the
// current SCC may simply need to be skipped if invalid.
if (UR.InvalidatedSCCs.count(C)) {
+ PI.runAfterPassInvalidated<LazyCallGraph::SCC>(*Pass, PassPA);
LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n");
break;
}
+ // Update the analysis manager with each run and intersect the total set
+ // of preserved analyses so we're ready to iterate.
+ AM.invalidate(*C, PassPA);
+
+ PI.runAfterPass<LazyCallGraph::SCC>(*Pass, *C, PassPA);
+
+ // If the SCC structure has changed, bail immediately and let the outer
+ // CGSCC layer handle any iteration to reflect the refined structure.
+ if (UR.UpdatedC && UR.UpdatedC != C)
+ break;
+
assert(C->begin() != C->end() && "Cannot have an empty SCC!");
// Check whether any of the handles were devirtualized.
@@ -490,10 +488,6 @@ PreservedAnalyses DevirtSCCRepeatedPass::run(LazyCallGraph::SCC &InitialC,
// Move over the new call counts in preparation for iterating.
CallCounts = std::move(NewCallCounts);
-
- // Update the analysis manager with each run and intersect the total set
- // of preserved analyses so we're ready to iterate.
- AM.invalidate(*C, PassPA);
}
// Note that we don't add any preserved entries here unlike a more normal
@@ -539,14 +533,13 @@ PreservedAnalyses CGSCCToFunctionPassAdaptor::run(LazyCallGraph::SCC &C,
continue;
PreservedAnalyses PassPA = Pass->run(F, FAM);
- PI.runAfterPass<Function>(*Pass, F, PassPA);
// We know that the function pass couldn't have invalidated any other
// function's analyses (that's the contract of a function pass), so
// directly handle the function analysis manager's invalidation here.
FAM.invalidate(F, EagerlyInvalidate ? PreservedAnalyses::none() : PassPA);
- if (NoRerun)
- (void)FAM.getResult<ShouldNotRunFunctionPassesAnalysis>(F);
+
+ PI.runAfterPass<Function>(*Pass, F, PassPA);
// Then intersect the preserved set so that invalidation of module
// analyses will eventually occur when the module pass completes.
diff --git a/llvm/lib/Analysis/CallGraphSCCPass.cpp b/llvm/lib/Analysis/CallGraphSCCPass.cpp
index d66f1e261780..307dddd51ece 100644
--- a/llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ b/llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/AbstractCallSite.h"
#include "llvm/IR/Function.h"
diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp
index 7f3a2b49aca9..00e096af3110 100644
--- a/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/llvm/lib/Analysis/CaptureTracking.cpp
@@ -58,17 +58,16 @@ CaptureTracker::~CaptureTracker() = default;
bool CaptureTracker::shouldExplore(const Use *U) { return true; }
bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) {
- // An inbounds GEP can either be a valid pointer (pointing into
- // or to the end of an allocation), or be null in the default
- // address space. So for an inbounds GEP there is no way to let
- // the pointer escape using clever GEP hacking because doing so
- // would make the pointer point outside of the allocated object
- // and thus make the GEP result a poison value. Similarly, other
- // dereferenceable pointers cannot be manipulated without producing
- // poison.
- if (auto *GEP = dyn_cast<GetElementPtrInst>(O))
- if (GEP->isInBounds())
- return true;
+ // We want comparisons to null pointers to not be considered capturing,
+ // but need to guard against cases like gep(p, -ptrtoint(p2)) == null,
+ // which are equivalent to p == p2 and would capture the pointer.
+ //
+ // A dereferenceable pointer is a case where this is known to be safe,
+ // because the pointer resulting from such a construction would not be
+ // dereferenceable.
+ //
+ // It is not sufficient to check for inbounds GEP here, because GEP with
+ // zero offset is always inbounds.
bool CanBeNull, CanBeFreed;
return O->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
}
@@ -80,7 +79,10 @@ namespace {
const SmallPtrSetImpl<const Value *> &EphValues, bool ReturnCaptures)
: EphValues(EphValues), ReturnCaptures(ReturnCaptures) {}
- void tooManyUses() override { Captured = true; }
+ void tooManyUses() override {
+ LLVM_DEBUG(dbgs() << "Captured due to too many uses\n");
+ Captured = true;
+ }
bool captured(const Use *U) override {
if (isa<ReturnInst>(U->getUser()) && !ReturnCaptures)
@@ -89,6 +91,8 @@ namespace {
if (EphValues.contains(U->getUser()))
return false;
+ LLVM_DEBUG(dbgs() << "Captured by: " << *U->getUser() << "\n");
+
Captured = true;
return true;
}
@@ -233,12 +237,16 @@ bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures,
// take advantage of this.
(void)StoreCaptures;
+ LLVM_DEBUG(dbgs() << "Captured?: " << *V << " = ");
+
SimpleCaptureTracker SCT(EphValues, ReturnCaptures);
PointerMayBeCaptured(V, &SCT, MaxUsesToExplore);
if (SCT.Captured)
++NumCaptured;
- else
+ else {
++NumNotCaptured;
+ LLVM_DEBUG(dbgs() << "not captured\n");
+ }
return SCT.Captured;
}
@@ -403,12 +411,7 @@ UseCaptureKind llvm::DetermineUseCaptureKind(
return UseCaptureKind::NO_CAPTURE;
}
}
- // Comparison against value stored in global variable. Given the pointer
- // does not escape, its value cannot be guessed and stored separately in a
- // global variable.
- auto *LI = dyn_cast<LoadInst>(I->getOperand(OtherIdx));
- if (LI && isa<GlobalVariable>(LI->getPointerOperand()))
- return UseCaptureKind::NO_CAPTURE;
+
// Otherwise, be conservative. There are crazy ways to capture pointers
// using comparisons.
return UseCaptureKind::MAY_CAPTURE;
diff --git a/llvm/lib/Analysis/CmpInstAnalysis.cpp b/llvm/lib/Analysis/CmpInstAnalysis.cpp
index 20b1df6e1495..d6407e875073 100644
--- a/llvm/lib/Analysis/CmpInstAnalysis.cpp
+++ b/llvm/lib/Analysis/CmpInstAnalysis.cpp
@@ -79,7 +79,7 @@ bool llvm::decomposeBitTestICmp(Value *LHS, Value *RHS,
using namespace PatternMatch;
const APInt *C;
- if (!match(RHS, m_APInt(C)))
+ if (!match(RHS, m_APIntAllowUndef(C)))
return false;
switch (Pred) {
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 6a2d6ba767e7..38cccb3ea3c2 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -235,7 +235,8 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
// Mix it in.
- Elt = ConstantExpr::getOr(Elt, Src);
+ Elt = ConstantFoldBinaryOpOperands(Instruction::Or, Elt, Src, DL);
+ assert(Elt && "Constant folding cannot fail on plain integers");
}
Result.push_back(Elt);
}
@@ -429,18 +430,16 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
return true;
if (auto *CI = dyn_cast<ConstantInt>(C)) {
- if (CI->getBitWidth() > 64 ||
- (CI->getBitWidth() & 7) != 0)
+ if ((CI->getBitWidth() & 7) != 0)
return false;
-
- uint64_t Val = CI->getZExtValue();
+ const APInt &Val = CI->getValue();
unsigned IntBytes = unsigned(CI->getBitWidth()/8);
for (unsigned i = 0; i != BytesLeft && ByteOffset != IntBytes; ++i) {
- int n = ByteOffset;
+ unsigned n = ByteOffset;
if (!DL.isLittleEndian())
n = IntBytes - n - 1;
- CurPtr[i] = (unsigned char)(Val >> (n * 8));
+ CurPtr[i] = Val.extractBits(8, n * 8).getZExtValue();
++ByteOffset;
}
return true;
@@ -501,16 +500,22 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
if (isa<ConstantArray>(C) || isa<ConstantVector>(C) ||
isa<ConstantDataSequential>(C)) {
- uint64_t NumElts;
+ uint64_t NumElts, EltSize;
Type *EltTy;
if (auto *AT = dyn_cast<ArrayType>(C->getType())) {
NumElts = AT->getNumElements();
EltTy = AT->getElementType();
+ EltSize = DL.getTypeAllocSize(EltTy);
} else {
NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
EltTy = cast<FixedVectorType>(C->getType())->getElementType();
+ // TODO: For non-byte-sized vectors, current implementation assumes there is
+ // padding to the next byte boundary between elements.
+ if (!DL.typeSizeEqualsStoreSize(EltTy))
+ return false;
+
+ EltSize = DL.getTypeStoreSize(EltTy);
}
- uint64_t EltSize = DL.getTypeAllocSize(EltTy);
uint64_t Index = ByteOffset / EltSize;
uint64_t Offset = ByteOffset - Index * EltSize;
@@ -713,7 +718,7 @@ Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
return Result;
// Try hard to fold loads from bitcasted strange and non-type-safe things.
- if (Offset.getMinSignedBits() <= 64)
+ if (Offset.getSignificantBits() <= 64)
if (Constant *Result =
FoldReinterpretLoadFromConst(C, Ty, Offset.getSExtValue(), DL))
return Result;
@@ -729,26 +734,23 @@ Constant *llvm::ConstantFoldLoadFromConst(Constant *C, Type *Ty,
Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
APInt Offset,
const DataLayout &DL) {
+ // We can only fold loads from constant globals with a definitive initializer.
+ // Check this upfront, to skip expensive offset calculations.
+ auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C));
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ return nullptr;
+
C = cast<Constant>(C->stripAndAccumulateConstantOffsets(
DL, Offset, /* AllowNonInbounds */ true));
- if (auto *GV = dyn_cast<GlobalVariable>(C))
- if (GV->isConstant() && GV->hasDefinitiveInitializer())
- if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty,
- Offset, DL))
- return Result;
+ if (C == GV)
+ if (Constant *Result = ConstantFoldLoadFromConst(GV->getInitializer(), Ty,
+ Offset, DL))
+ return Result;
// If this load comes from anywhere in a uniform constant global, the value
// is always the same, regardless of the loaded offset.
- if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C))) {
- if (GV->isConstant() && GV->hasDefinitiveInitializer()) {
- if (Constant *Res =
- ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty))
- return Res;
- }
- }
-
- return nullptr;
+ return ConstantFoldLoadFromUniformValue(GV->getInitializer(), Ty);
}
Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty,
@@ -825,7 +827,8 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
/// If array indices are not pointer-sized integers, explicitly cast them so
/// that they aren't implicitly casted by the getelementptr.
Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
- Type *ResultTy, std::optional<unsigned> InRangeIndex,
+ Type *ResultTy, bool InBounds,
+ std::optional<unsigned> InRangeIndex,
const DataLayout &DL, const TargetLibraryInfo *TLI) {
Type *IntIdxTy = DL.getIndexType(ResultTy);
Type *IntIdxScalarTy = IntIdxTy->getScalarType();
@@ -854,23 +857,21 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
return nullptr;
Constant *C = ConstantExpr::getGetElementPtr(
- SrcElemTy, Ops[0], NewIdxs, /*InBounds=*/false, InRangeIndex);
+ SrcElemTy, Ops[0], NewIdxs, InBounds, InRangeIndex);
return ConstantFoldConstant(C, DL, TLI);
}
/// Strip the pointer casts, but preserve the address space information.
-Constant *StripPtrCastKeepAS(Constant *Ptr) {
+// TODO: This probably doesn't make sense with opaque pointers.
+static Constant *StripPtrCastKeepAS(Constant *Ptr) {
assert(Ptr->getType()->isPointerTy() && "Not a pointer type");
auto *OldPtrTy = cast<PointerType>(Ptr->getType());
Ptr = cast<Constant>(Ptr->stripPointerCasts());
auto *NewPtrTy = cast<PointerType>(Ptr->getType());
// Preserve the address space number of the pointer.
- if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) {
- Ptr = ConstantExpr::getPointerCast(
- Ptr, PointerType::getWithSamePointeeType(NewPtrTy,
- OldPtrTy->getAddressSpace()));
- }
+ if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace())
+ Ptr = ConstantExpr::getPointerCast(Ptr, OldPtrTy);
return Ptr;
}
@@ -889,7 +890,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
return nullptr;
if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy,
- GEP->getInRangeIndex(), DL, TLI))
+ GEP->isInBounds(), GEP->getInRangeIndex(),
+ DL, TLI))
return C;
Constant *Ptr = Ops[0];
@@ -952,14 +954,10 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
// Otherwise form a regular getelementptr. Recompute the indices so that
// we eliminate over-indexing of the notional static type array bounds.
// This makes it easy to determine if the getelementptr is "inbounds".
- // Also, this helps GlobalOpt do SROA on GlobalVariables.
- // For GEPs of GlobalValues, use the value type even for opaque pointers.
- // Otherwise use an i8 GEP.
+ // For GEPs of GlobalValues, use the value type, otherwise use an i8 GEP.
if (auto *GV = dyn_cast<GlobalValue>(Ptr))
SrcElemTy = GV->getValueType();
- else if (!PTy->isOpaque())
- SrcElemTy = PTy->getNonOpaquePointerElementType();
else
SrcElemTy = Type::getInt8Ty(Ptr->getContext());
@@ -1002,18 +1000,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
}
// Create a GEP.
- Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs,
- InBounds, InRangeIndex);
- assert(
- cast<PointerType>(C->getType())->isOpaqueOrPointeeTypeMatches(ElemTy) &&
- "Computed GetElementPtr has unexpected type!");
-
- // If we ended up indexing a member with a type that doesn't match
- // the type of what the original indices indexed, add a cast.
- if (C->getType() != ResTy)
- C = FoldBitCast(C, ResTy, DL);
-
- return C;
+ return ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs, InBounds,
+ InRangeIndex);
}
/// Attempt to constant fold an instruction with the
@@ -1053,11 +1041,15 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
return ConstantFoldCastOperand(Opcode, Ops[0], DestTy, DL);
if (auto *GEP = dyn_cast<GEPOperator>(InstOrCE)) {
+ Type *SrcElemTy = GEP->getSourceElementType();
+ if (!ConstantExpr::isSupportedGetElementPtr(SrcElemTy))
+ return nullptr;
+
if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))
return C;
- return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), Ops[0],
- Ops.slice(1), GEP->isInBounds(),
+ return ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], Ops.slice(1),
+ GEP->isInBounds(),
GEP->getInRangeIndex());
}
@@ -1086,7 +1078,7 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
}
return nullptr;
case Instruction::Select:
- return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
+ return ConstantFoldSelectInstruction(Ops[0], Ops[1], Ops[2]);
case Instruction::ExtractElement:
return ConstantExpr::getExtractElement(Ops[0], Ops[1]);
case Instruction::ExtractValue:
@@ -1323,7 +1315,11 @@ Constant *llvm::ConstantFoldCompareInstOperands(
// Flush any denormal constant float input according to denormal handling
// mode.
Ops0 = FlushFPConstant(Ops0, I, /* IsOutput */ false);
+ if (!Ops0)
+ return nullptr;
Ops1 = FlushFPConstant(Ops1, I, /* IsOutput */ false);
+ if (!Ops1)
+ return nullptr;
return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
}
@@ -1358,6 +1354,10 @@ Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I,
return Operand;
const APFloat &APF = CFP->getValueAPF();
+ // TODO: Should this canonicalize nans?
+ if (!APF.isDenormal())
+ return Operand;
+
Type *Ty = CFP->getType();
DenormalMode DenormMode =
I->getFunction()->getDenormalMode(Ty->getFltSemantics());
@@ -1366,7 +1366,8 @@ Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I,
switch (Mode) {
default:
llvm_unreachable("unknown denormal mode");
- return Operand;
+ case DenormalMode::Dynamic:
+ return nullptr;
case DenormalMode::IEEE:
return Operand;
case DenormalMode::PreserveSign:
@@ -1392,7 +1393,11 @@ Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
if (Instruction::isBinaryOp(Opcode)) {
// Flush denormal inputs if needed.
Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false);
+ if (!Op0)
+ return nullptr;
Constant *Op1 = FlushFPConstant(RHS, I, /* IsOutput */ false);
+ if (!Op1)
+ return nullptr;
// Calculate constant result.
Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL);
@@ -1571,6 +1576,7 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::powi:
case Intrinsic::fma:
case Intrinsic::fmuladd:
+ case Intrinsic::frexp:
case Intrinsic::fptoui_sat:
case Intrinsic::fptosi_sat:
case Intrinsic::convert_from_fp16:
@@ -1966,13 +1972,25 @@ static Constant *constantFoldCanonicalize(const Type *Ty, const CallBase *CI,
if (Src.isDenormal() && CI->getParent() && CI->getFunction()) {
DenormalMode DenormMode =
CI->getFunction()->getDenormalMode(Src.getSemantics());
+
if (DenormMode == DenormalMode::getIEEE())
+ return ConstantFP::get(CI->getContext(), Src);
+
+ if (DenormMode.Input == DenormalMode::Dynamic)
+ return nullptr;
+
+ // If we know if either input or output is flushed, we can fold.
+ if ((DenormMode.Input == DenormalMode::Dynamic &&
+ DenormMode.Output == DenormalMode::IEEE) ||
+ (DenormMode.Input == DenormalMode::IEEE &&
+ DenormMode.Output == DenormalMode::Dynamic))
return nullptr;
bool IsPositive =
(!Src.isNegative() || DenormMode.Input == DenormalMode::PositiveZero ||
(DenormMode.Output == DenormalMode::PositiveZero &&
DenormMode.Input == DenormalMode::IEEE));
+
return ConstantFP::get(CI->getContext(),
APFloat::getZero(Src.getSemantics(), !IsPositive));
}
@@ -2398,7 +2416,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name,
case Intrinsic::bswap:
return ConstantInt::get(Ty->getContext(), Op->getValue().byteSwap());
case Intrinsic::ctpop:
- return ConstantInt::get(Ty, Op->getValue().countPopulation());
+ return ConstantInt::get(Ty, Op->getValue().popcount());
case Intrinsic::bitreverse:
return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
case Intrinsic::convert_from_fp16: {
@@ -2580,7 +2598,7 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
// The legacy behaviour is that multiplying +/- 0.0 by anything, even
// NaN or infinity, gives +0.0.
if (Op1V.isZero() || Op2V.isZero())
- return ConstantFP::getNullValue(Ty);
+ return ConstantFP::getZero(Ty);
return ConstantFP::get(Ty->getContext(), Op1V * Op2V);
}
@@ -2633,18 +2651,18 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
} else if (auto *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
switch (IntrinsicID) {
case Intrinsic::is_fpclass: {
- uint32_t Mask = Op2C->getZExtValue();
+ FPClassTest Mask = static_cast<FPClassTest>(Op2C->getZExtValue());
bool Result =
((Mask & fcSNan) && Op1V.isNaN() && Op1V.isSignaling()) ||
((Mask & fcQNan) && Op1V.isNaN() && !Op1V.isSignaling()) ||
- ((Mask & fcNegInf) && Op1V.isInfinity() && Op1V.isNegative()) ||
+ ((Mask & fcNegInf) && Op1V.isNegInfinity()) ||
((Mask & fcNegNormal) && Op1V.isNormal() && Op1V.isNegative()) ||
((Mask & fcNegSubnormal) && Op1V.isDenormal() && Op1V.isNegative()) ||
((Mask & fcNegZero) && Op1V.isZero() && Op1V.isNegative()) ||
((Mask & fcPosZero) && Op1V.isZero() && !Op1V.isNegative()) ||
((Mask & fcPosSubnormal) && Op1V.isDenormal() && !Op1V.isNegative()) ||
((Mask & fcPosNormal) && Op1V.isNormal() && !Op1V.isNegative()) ||
- ((Mask & fcPosInf) && Op1V.isInfinity() && !Op1V.isNegative());
+ ((Mask & fcPosInf) && Op1V.isPosInfinity());
return ConstantInt::get(Ty, Result);
}
default:
@@ -2804,9 +2822,9 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
if (!C0)
return Constant::getNullValue(Ty);
if (IntrinsicID == Intrinsic::cttz)
- return ConstantInt::get(Ty, C0->countTrailingZeros());
+ return ConstantInt::get(Ty, C0->countr_zero());
else
- return ConstantInt::get(Ty, C0->countLeadingZeros());
+ return ConstantInt::get(Ty, C0->countl_zero());
case Intrinsic::abs:
assert(C1 && "Must be constant int");
@@ -3265,6 +3283,69 @@ static Constant *ConstantFoldScalableVectorCall(
return nullptr;
}
+static std::pair<Constant *, Constant *>
+ConstantFoldScalarFrexpCall(Constant *Op, Type *IntTy) {
+ if (isa<PoisonValue>(Op))
+ return {Op, PoisonValue::get(IntTy)};
+
+ auto *ConstFP = dyn_cast<ConstantFP>(Op);
+ if (!ConstFP)
+ return {};
+
+ const APFloat &U = ConstFP->getValueAPF();
+ int FrexpExp;
+ APFloat FrexpMant = frexp(U, FrexpExp, APFloat::rmNearestTiesToEven);
+ Constant *Result0 = ConstantFP::get(ConstFP->getType(), FrexpMant);
+
+ // The exponent is an "unspecified value" for inf/nan. We use zero to avoid
+ // using undef.
+ Constant *Result1 = FrexpMant.isFinite() ? ConstantInt::get(IntTy, FrexpExp)
+ : ConstantInt::getNullValue(IntTy);
+ return {Result0, Result1};
+}
+
+/// Handle intrinsics that return tuples, which may be tuples of vectors.
+static Constant *
+ConstantFoldStructCall(StringRef Name, Intrinsic::ID IntrinsicID,
+ StructType *StTy, ArrayRef<Constant *> Operands,
+ const DataLayout &DL, const TargetLibraryInfo *TLI,
+ const CallBase *Call) {
+
+ switch (IntrinsicID) {
+ case Intrinsic::frexp: {
+ Type *Ty0 = StTy->getContainedType(0);
+ Type *Ty1 = StTy->getContainedType(1)->getScalarType();
+
+ if (auto *FVTy0 = dyn_cast<FixedVectorType>(Ty0)) {
+ SmallVector<Constant *, 4> Results0(FVTy0->getNumElements());
+ SmallVector<Constant *, 4> Results1(FVTy0->getNumElements());
+
+ for (unsigned I = 0, E = FVTy0->getNumElements(); I != E; ++I) {
+ Constant *Lane = Operands[0]->getAggregateElement(I);
+ std::tie(Results0[I], Results1[I]) =
+ ConstantFoldScalarFrexpCall(Lane, Ty1);
+ if (!Results0[I])
+ return nullptr;
+ }
+
+ return ConstantStruct::get(StTy, ConstantVector::get(Results0),
+ ConstantVector::get(Results1));
+ }
+
+ auto [Result0, Result1] = ConstantFoldScalarFrexpCall(Operands[0], Ty1);
+ if (!Result0)
+ return nullptr;
+ return ConstantStruct::get(StTy, Result0, Result1);
+ }
+ default:
+ // TODO: Constant folding of vector intrinsics that fall through here does
+ // not work (e.g. overflow intrinsics)
+ return ConstantFoldScalarCall(Name, IntrinsicID, StTy, Operands, TLI, Call);
+ }
+
+ return nullptr;
+}
+
} // end anonymous namespace
Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
@@ -3276,7 +3357,8 @@ Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
return nullptr;
// If this is not an intrinsic and not recognized as a library call, bail out.
- if (F->getIntrinsicID() == Intrinsic::not_intrinsic) {
+ Intrinsic::ID IID = F->getIntrinsicID();
+ if (IID == Intrinsic::not_intrinsic) {
if (!TLI)
return nullptr;
LibFunc LibF;
@@ -3288,19 +3370,20 @@ Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
Type *Ty = F->getReturnType();
if (auto *FVTy = dyn_cast<FixedVectorType>(Ty))
return ConstantFoldFixedVectorCall(
- Name, F->getIntrinsicID(), FVTy, Operands,
- F->getParent()->getDataLayout(), TLI, Call);
+ Name, IID, FVTy, Operands, F->getParent()->getDataLayout(), TLI, Call);
if (auto *SVTy = dyn_cast<ScalableVectorType>(Ty))
return ConstantFoldScalableVectorCall(
- Name, F->getIntrinsicID(), SVTy, Operands,
- F->getParent()->getDataLayout(), TLI, Call);
+ Name, IID, SVTy, Operands, F->getParent()->getDataLayout(), TLI, Call);
+
+ if (auto *StTy = dyn_cast<StructType>(Ty))
+ return ConstantFoldStructCall(Name, IID, StTy, Operands,
+ F->getParent()->getDataLayout(), TLI, Call);
// TODO: If this is a library function, we already discovered that above,
// so we should pass the LibFunc, not the name (and it might be better
// still to separate intrinsic handling from libcalls).
- return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI,
- Call);
+ return ConstantFoldScalarCall(Name, IID, Ty, Operands, TLI, Call);
}
bool llvm::isMathLibCallNoop(const CallBase *Call,
diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp
index 49bc5381841c..8a802515b6f4 100644
--- a/llvm/lib/Analysis/ConstraintSystem.cpp
+++ b/llvm/lib/Analysis/ConstraintSystem.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Value.h"
#include "llvm/Support/Debug.h"
#include <string>
@@ -27,114 +28,169 @@ bool ConstraintSystem::eliminateUsingFM() {
// IEEE conference on Supercomputing. IEEE, 1991.
assert(!Constraints.empty() &&
"should only be called for non-empty constraint systems");
- unsigned NumVariables = Constraints[0].size();
- SmallVector<SmallVector<int64_t, 8>, 4> NewSystem;
- unsigned NumConstraints = Constraints.size();
uint32_t NewGCD = 1;
- // FIXME do not use copy
- for (unsigned R1 = 0; R1 < NumConstraints; R1++) {
- if (Constraints[R1][1] == 0) {
- SmallVector<int64_t, 8> NR;
- NR.push_back(Constraints[R1][0]);
- for (unsigned i = 2; i < NumVariables; i++) {
- NR.push_back(Constraints[R1][i]);
- }
- NewSystem.push_back(std::move(NR));
- continue;
+ unsigned LastIdx = NumVariables - 1;
+
+ // First, either remove the variable in place if it is 0 or add the row to
+ // RemainingRows and remove it from the system.
+ SmallVector<SmallVector<Entry, 8>, 4> RemainingRows;
+ for (unsigned R1 = 0; R1 < Constraints.size();) {
+ SmallVector<Entry, 8> &Row1 = Constraints[R1];
+ if (getLastCoefficient(Row1, LastIdx) == 0) {
+ if (Row1.size() > 0 && Row1.back().Id == LastIdx)
+ Row1.pop_back();
+ R1++;
+ } else {
+ std::swap(Constraints[R1], Constraints.back());
+ RemainingRows.push_back(std::move(Constraints.back()));
+ Constraints.pop_back();
}
+ }
+ // Process rows where the variable is != 0.
+ unsigned NumRemainingConstraints = RemainingRows.size();
+ for (unsigned R1 = 0; R1 < NumRemainingConstraints; R1++) {
// FIXME do not use copy
- for (unsigned R2 = R1 + 1; R2 < NumConstraints; R2++) {
+ for (unsigned R2 = R1 + 1; R2 < NumRemainingConstraints; R2++) {
if (R1 == R2)
continue;
- // FIXME: can we do better than just dropping things here?
- if (Constraints[R2][1] == 0)
- continue;
+ int64_t UpperLast = getLastCoefficient(RemainingRows[R2], LastIdx);
+ int64_t LowerLast = getLastCoefficient(RemainingRows[R1], LastIdx);
+ assert(
+ UpperLast != 0 && LowerLast != 0 &&
+ "RemainingRows should only contain rows where the variable is != 0");
- if ((Constraints[R1][1] < 0 && Constraints[R2][1] < 0) ||
- (Constraints[R1][1] > 0 && Constraints[R2][1] > 0))
+ if ((LowerLast < 0 && UpperLast < 0) || (LowerLast > 0 && UpperLast > 0))
continue;
unsigned LowerR = R1;
unsigned UpperR = R2;
- if (Constraints[UpperR][1] < 0)
+ if (UpperLast < 0) {
std::swap(LowerR, UpperR);
+ std::swap(LowerLast, UpperLast);
+ }
- SmallVector<int64_t, 8> NR;
- for (unsigned I = 0; I < NumVariables; I++) {
- if (I == 1)
- continue;
-
+ SmallVector<Entry, 8> NR;
+ unsigned IdxUpper = 0;
+ unsigned IdxLower = 0;
+ auto &LowerRow = RemainingRows[LowerR];
+ auto &UpperRow = RemainingRows[UpperR];
+ while (true) {
+ if (IdxUpper >= UpperRow.size() || IdxLower >= LowerRow.size())
+ break;
int64_t M1, M2, N;
- if (MulOverflow(Constraints[UpperR][I],
- ((-1) * Constraints[LowerR][1] / GCD), M1))
+ int64_t UpperV = 0;
+ int64_t LowerV = 0;
+ uint16_t CurrentId = std::numeric_limits<uint16_t>::max();
+ if (IdxUpper < UpperRow.size()) {
+ CurrentId = std::min(UpperRow[IdxUpper].Id, CurrentId);
+ }
+ if (IdxLower < LowerRow.size()) {
+ CurrentId = std::min(LowerRow[IdxLower].Id, CurrentId);
+ }
+
+ if (IdxUpper < UpperRow.size() && UpperRow[IdxUpper].Id == CurrentId) {
+ UpperV = UpperRow[IdxUpper].Coefficient;
+ IdxUpper++;
+ }
+
+ if (MulOverflow(UpperV, ((-1) * LowerLast / GCD), M1))
return false;
- if (MulOverflow(Constraints[LowerR][I],
- (Constraints[UpperR][1] / GCD), M2))
+ if (IdxLower < LowerRow.size() && LowerRow[IdxLower].Id == CurrentId) {
+ LowerV = LowerRow[IdxLower].Coefficient;
+ IdxLower++;
+ }
+
+ if (MulOverflow(LowerV, (UpperLast / GCD), M2))
return false;
if (AddOverflow(M1, M2, N))
return false;
- NR.push_back(N);
+ if (N == 0)
+ continue;
+ NR.emplace_back(N, CurrentId);
- NewGCD = APIntOps::GreatestCommonDivisor({32, (uint32_t)NR.back()},
- {32, NewGCD})
- .getZExtValue();
+ NewGCD =
+ APIntOps::GreatestCommonDivisor({32, (uint32_t)N}, {32, NewGCD})
+ .getZExtValue();
}
- NewSystem.push_back(std::move(NR));
+ if (NR.empty())
+ continue;
+ Constraints.push_back(std::move(NR));
// Give up if the new system gets too big.
- if (NewSystem.size() > 500)
+ if (Constraints.size() > 500)
return false;
}
}
- Constraints = std::move(NewSystem);
+ NumVariables -= 1;
GCD = NewGCD;
return true;
}
bool ConstraintSystem::mayHaveSolutionImpl() {
- while (!Constraints.empty() && Constraints[0].size() > 1) {
+ while (!Constraints.empty() && NumVariables > 1) {
if (!eliminateUsingFM())
return true;
}
- if (Constraints.empty() || Constraints[0].size() > 1)
+ if (Constraints.empty() || NumVariables > 1)
return true;
- return all_of(Constraints, [](auto &R) { return R[0] >= 0; });
+ return all_of(Constraints, [](auto &R) {
+ if (R.empty())
+ return true;
+ if (R[0].Id == 0)
+ return R[0].Coefficient >= 0;
+ return true;
+ });
}
-void ConstraintSystem::dump(ArrayRef<std::string> Names) const {
+SmallVector<std::string> ConstraintSystem::getVarNamesList() const {
+ SmallVector<std::string> Names(Value2Index.size(), "");
+#ifndef NDEBUG
+ for (auto &[V, Index] : Value2Index) {
+ std::string OperandName;
+ if (V->getName().empty())
+ OperandName = V->getNameOrAsOperand();
+ else
+ OperandName = std::string("%") + V->getName().str();
+ Names[Index - 1] = OperandName;
+ }
+#endif
+ return Names;
+}
+
+void ConstraintSystem::dump() const {
+#ifndef NDEBUG
if (Constraints.empty())
return;
-
+ SmallVector<std::string> Names = getVarNamesList();
for (const auto &Row : Constraints) {
SmallVector<std::string, 16> Parts;
- for (unsigned I = 1, S = Row.size(); I < S; ++I) {
- if (Row[I] == 0)
+ for (unsigned I = 0, S = Row.size(); I < S; ++I) {
+ if (Row[I].Id >= NumVariables)
+ break;
+ if (Row[I].Id == 0)
continue;
std::string Coefficient;
- if (Row[I] != 1)
- Coefficient = std::to_string(Row[I]) + " * ";
- Parts.push_back(Coefficient + Names[I - 1]);
+ if (Row[I].Coefficient != 1)
+ Coefficient = std::to_string(Row[I].Coefficient) + " * ";
+ Parts.push_back(Coefficient + Names[Row[I].Id - 1]);
}
- assert(!Parts.empty() && "need to have at least some parts");
+ // assert(!Parts.empty() && "need to have at least some parts");
+ int64_t ConstPart = 0;
+ if (Row[0].Id == 0)
+ ConstPart = Row[0].Coefficient;
LLVM_DEBUG(dbgs() << join(Parts, std::string(" + "))
- << " <= " << std::to_string(Row[0]) << "\n");
+ << " <= " << std::to_string(ConstPart) << "\n");
}
-}
-
-void ConstraintSystem::dump() const {
- SmallVector<std::string, 16> Names;
- for (unsigned i = 1; i < Constraints.back().size(); ++i)
- Names.push_back("x" + std::to_string(i));
- LLVM_DEBUG(dbgs() << "---\n");
- dump(Names);
+#endif
}
bool ConstraintSystem::mayHaveSolution() {
+ LLVM_DEBUG(dbgs() << "---\n");
LLVM_DEBUG(dump());
bool HasSolution = mayHaveSolutionImpl();
LLVM_DEBUG(dbgs() << (HasSolution ? "sat" : "unsat") << "\n");
@@ -150,6 +206,8 @@ bool ConstraintSystem::isConditionImplied(SmallVector<int64_t, 8> R) const {
// If there is no solution with the negation of R added to the system, the
// condition must hold based on the existing constraints.
R = ConstraintSystem::negate(R);
+ if (R.empty())
+ return false;
auto NewSystem = *this;
NewSystem.addVariableRow(R);
diff --git a/llvm/lib/Analysis/CycleAnalysis.cpp b/llvm/lib/Analysis/CycleAnalysis.cpp
index 17998123fce7..41a95a4fa220 100644
--- a/llvm/lib/Analysis/CycleAnalysis.cpp
+++ b/llvm/lib/Analysis/CycleAnalysis.cpp
@@ -17,9 +17,6 @@ namespace llvm {
class Module;
}
-template class llvm::GenericCycleInfo<SSAContext>;
-template class llvm::GenericCycle<SSAContext>;
-
CycleInfo CycleAnalysis::run(Function &F, FunctionAnalysisManager &) {
CycleInfo CI;
CI.compute(F);
diff --git a/llvm/lib/Analysis/DDG.cpp b/llvm/lib/Analysis/DDG.cpp
index da64ef153960..a0774096c512 100644
--- a/llvm/lib/Analysis/DDG.cpp
+++ b/llvm/lib/Analysis/DDG.cpp
@@ -241,11 +241,10 @@ bool DataDependenceGraph::addNode(DDGNode &N) {
}
const PiBlockDDGNode *DataDependenceGraph::getPiBlock(const NodeType &N) const {
- if (PiBlockMap.find(&N) == PiBlockMap.end())
+ if (!PiBlockMap.contains(&N))
return nullptr;
auto *Pi = PiBlockMap.find(&N)->second;
- assert(PiBlockMap.find(Pi) == PiBlockMap.end() &&
- "Nested pi-blocks detected.");
+ assert(!PiBlockMap.contains(Pi) && "Nested pi-blocks detected.");
return Pi;
}
diff --git a/llvm/lib/Analysis/DemandedBits.cpp b/llvm/lib/Analysis/DemandedBits.cpp
index e01ed48be376..c5017bf52498 100644
--- a/llvm/lib/Analysis/DemandedBits.cpp
+++ b/llvm/lib/Analysis/DemandedBits.cpp
@@ -34,8 +34,6 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
@@ -48,30 +46,6 @@ using namespace llvm::PatternMatch;
#define DEBUG_TYPE "demanded-bits"
-char DemandedBitsWrapperPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(DemandedBitsWrapperPass, "demanded-bits",
- "Demanded bits analysis", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(DemandedBitsWrapperPass, "demanded-bits",
- "Demanded bits analysis", false, false)
-
-DemandedBitsWrapperPass::DemandedBitsWrapperPass() : FunctionPass(ID) {
- initializeDemandedBitsWrapperPassPass(*PassRegistry::getPassRegistry());
-}
-
-void DemandedBitsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.setPreservesAll();
-}
-
-void DemandedBitsWrapperPass::print(raw_ostream &OS, const Module *M) const {
- DB->print(OS);
-}
-
static bool isAlwaysLive(Instruction *I) {
return I->isTerminator() || isa<DbgInfoIntrinsic>(I) || I->isEHPad() ||
I->mayHaveSideEffects();
@@ -109,7 +83,7 @@ void DemandedBits::determineLiveOperandBits(
default: break;
case Instruction::Call:
case Instruction::Invoke:
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI)) {
+ if (const auto *II = dyn_cast<IntrinsicInst>(UserI)) {
switch (II->getIntrinsicID()) {
default: break;
case Intrinsic::bswap:
@@ -170,7 +144,7 @@ void DemandedBits::determineLiveOperandBits(
case Intrinsic::smin:
// If low bits of result are not demanded, they are also not demanded
// for the min/max operands.
- AB = APInt::getBitsSetFrom(BitWidth, AOut.countTrailingZeros());
+ AB = APInt::getBitsSetFrom(BitWidth, AOut.countr_zero());
break;
}
}
@@ -206,7 +180,7 @@ void DemandedBits::determineLiveOperandBits(
// If the shift is nuw/nsw, then the high bits are not dead
// (because we've promised that they *must* be zero).
- const ShlOperator *S = cast<ShlOperator>(UserI);
+ const auto *S = cast<ShlOperator>(UserI);
if (S->hasNoSignedWrap())
AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1);
else if (S->hasNoUnsignedWrap())
@@ -310,17 +284,6 @@ void DemandedBits::determineLiveOperandBits(
}
}
-bool DemandedBitsWrapperPass::runOnFunction(Function &F) {
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- DB.emplace(F, AC, DT);
- return false;
-}
-
-void DemandedBitsWrapperPass::releaseMemory() {
- DB.reset();
-}
-
void DemandedBits::performAnalysis() {
if (Analyzed)
// Analysis already completed for this function.
@@ -353,7 +316,7 @@ void DemandedBits::performAnalysis() {
// Non-integer-typed instructions...
for (Use &OI : I.operands()) {
- if (Instruction *J = dyn_cast<Instruction>(OI)) {
+ if (auto *J = dyn_cast<Instruction>(OI)) {
Type *T = J->getType();
if (T->isIntOrIntVectorTy())
AliveBits[J] = APInt::getAllOnes(T->getScalarSizeInBits());
@@ -394,7 +357,7 @@ void DemandedBits::performAnalysis() {
for (Use &OI : UserI->operands()) {
// We also want to detect dead uses of arguments, but will only store
// demanded bits for instructions.
- Instruction *I = dyn_cast<Instruction>(OI);
+ auto *I = dyn_cast<Instruction>(OI);
if (!I && !isa<Argument>(OI))
continue;
@@ -447,7 +410,7 @@ APInt DemandedBits::getDemandedBits(Instruction *I) {
APInt DemandedBits::getDemandedBits(Use *U) {
Type *T = (*U)->getType();
- Instruction *UserI = cast<Instruction>(U->getUser());
+ auto *UserI = cast<Instruction>(U->getUser());
const DataLayout &DL = UserI->getModule()->getDataLayout();
unsigned BitWidth = DL.getTypeSizeInBits(T->getScalarType());
@@ -475,8 +438,7 @@ APInt DemandedBits::getDemandedBits(Use *U) {
bool DemandedBits::isInstructionDead(Instruction *I) {
performAnalysis();
- return !Visited.count(I) && AliveBits.find(I) == AliveBits.end() &&
- !isAlwaysLive(I);
+ return !Visited.count(I) && !AliveBits.contains(I) && !isAlwaysLive(I);
}
bool DemandedBits::isUseDead(Use *U) {
@@ -485,7 +447,7 @@ bool DemandedBits::isUseDead(Use *U) {
return false;
// Uses by always-live instructions are never dead.
- Instruction *UserI = cast<Instruction>(U->getUser());
+ auto *UserI = cast<Instruction>(U->getUser());
if (isAlwaysLive(UserI))
return false;
@@ -515,6 +477,7 @@ void DemandedBits::print(raw_ostream &OS) {
OS << *I << '\n';
};
+ OS << "Printing analysis 'Demanded Bits Analysis' for function '" << F.getName() << "':\n";
performAnalysis();
for (auto &KV : AliveBits) {
Instruction *I = KV.first;
@@ -606,10 +569,6 @@ APInt DemandedBits::determineLiveOperandBitsSub(unsigned OperandNo,
true);
}
-FunctionPass *llvm::createDemandedBitsWrapperPass() {
- return new DemandedBitsWrapperPass();
-}
-
AnalysisKey DemandedBitsAnalysis::Key;
DemandedBits DemandedBitsAnalysis::run(Function &F,
diff --git a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
index a91d2ffe6042..456d58660680 100644
--- a/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/DevelopmentModeInlineAdvisor.cpp
@@ -165,7 +165,6 @@ private:
bool isLogging() const { return !!Logger; }
std::unique_ptr<MLInlineAdvice> getMandatoryAdviceImpl(CallBase &CB) override;
- std::function<bool(CallBase &)> GetDefaultAdvice;
const bool IsDoingInference;
std::unique_ptr<TrainingLogger> Logger;
@@ -280,10 +279,10 @@ TrainingLogger::TrainingLogger(StringRef LogFileName,
append_range(FT, MUTR->extraOutputsForLoggingSpecs());
DefaultDecisionPos = FT.size();
- FT.push_back(TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1}));
+ FT.push_back(DefaultDecisionSpec);
DecisionPos = FT.size();
- FT.push_back(TensorSpec::createSpec<int64_t>(DecisionName, {1}));
+ FT.push_back(InlineDecisionSpec);
std::error_code EC;
auto OS = std::make_unique<raw_fd_ostream>(TrainingLog, EC);
if (EC)
@@ -331,8 +330,7 @@ DevelopmentModeMLInlineAdvisor::DevelopmentModeMLInlineAdvisor(
std::unique_ptr<MLModelRunner> ModelRunner,
std::function<bool(CallBase &)> GetDefaultAdvice,
std::unique_ptr<TrainingLogger> Logger)
- : MLInlineAdvisor(M, MAM, std::move(ModelRunner)),
- GetDefaultAdvice(GetDefaultAdvice),
+ : MLInlineAdvisor(M, MAM, std::move(ModelRunner), GetDefaultAdvice),
IsDoingInference(isa<ModelUnderTrainingRunner>(getModelRunner())),
Logger(std::move(Logger)),
InitialNativeSize(isLogging() ? getTotalSizeEstimate() : 0),
diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp
deleted file mode 100644
index 02c40d2640c1..000000000000
--- a/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ /dev/null
@@ -1,409 +0,0 @@
-//===---- DivergenceAnalysis.cpp --- Divergence Analysis Implementation ----==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a general divergence analysis for loop vectorization
-// and GPU programs. It determines which branches and values in a loop or GPU
-// program are divergent. It can help branch optimizations such as jump
-// threading and loop unswitching to make better decisions.
-//
-// GPU programs typically use the SIMD execution model, where multiple threads
-// in the same execution group have to execute in lock-step. Therefore, if the
-// code contains divergent branches (i.e., threads in a group do not agree on
-// which path of the branch to take), the group of threads has to execute all
-// the paths from that branch with different subsets of threads enabled until
-// they re-converge.
-//
-// Due to this execution model, some optimizations such as jump
-// threading and loop unswitching can interfere with thread re-convergence.
-// Therefore, an analysis that computes which branches in a GPU program are
-// divergent can help the compiler to selectively run these optimizations.
-//
-// This implementation is derived from the Vectorization Analysis of the
-// Region Vectorizer (RV). The analysis is based on the approach described in
-//
-// An abstract interpretation for SPMD divergence
-// on reducible control flow graphs.
-// Julian Rosemann, Simon Moll and Sebastian Hack
-// POPL '21
-//
-// This implementation is generic in the sense that it does
-// not itself identify original sources of divergence.
-// Instead specialized adapter classes, (LoopDivergenceAnalysis) for loops and
-// (DivergenceAnalysis) for functions, identify the sources of divergence
-// (e.g., special variables that hold the thread ID or the iteration variable).
-//
-// The generic implementation propagates divergence to variables that are data
-// or sync dependent on a source of divergence.
-//
-// While data dependency is a well-known concept, the notion of sync dependency
-// is worth more explanation. Sync dependence characterizes the control flow
-// aspect of the propagation of branch divergence. For example,
-//
-// %cond = icmp slt i32 %tid, 10
-// br i1 %cond, label %then, label %else
-// then:
-// br label %merge
-// else:
-// br label %merge
-// merge:
-// %a = phi i32 [ 0, %then ], [ 1, %else ]
-//
-// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
-// because %tid is not on its use-def chains, %a is sync dependent on %tid
-// because the branch "br i1 %cond" depends on %tid and affects which value %a
-// is assigned to.
-//
-// The sync dependence detection (which branch induces divergence in which join
-// points) is implemented in the SyncDependenceAnalysis.
-//
-// The current implementation has the following limitations:
-// 1. intra-procedural. It conservatively considers the arguments of a
-// non-kernel-entry function and the return value of a function call as
-// divergent.
-// 2. memory as black box. It conservatively considers values loaded from
-// generic or local address as divergent. This can be improved by leveraging
-// pointer analysis and/or by modelling non-escaping memory objects in SSA
-// as done in RV.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/DivergenceAnalysis.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Value.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "divergence"
-
-DivergenceAnalysisImpl::DivergenceAnalysisImpl(
- const Function &F, const Loop *RegionLoop, const DominatorTree &DT,
- const LoopInfo &LI, SyncDependenceAnalysis &SDA, bool IsLCSSAForm)
- : F(F), RegionLoop(RegionLoop), DT(DT), LI(LI), SDA(SDA),
- IsLCSSAForm(IsLCSSAForm) {}
-
-bool DivergenceAnalysisImpl::markDivergent(const Value &DivVal) {
- if (isAlwaysUniform(DivVal))
- return false;
- assert(isa<Instruction>(DivVal) || isa<Argument>(DivVal));
- assert(!isAlwaysUniform(DivVal) && "cannot be a divergent");
- return DivergentValues.insert(&DivVal).second;
-}
-
-void DivergenceAnalysisImpl::addUniformOverride(const Value &UniVal) {
- UniformOverrides.insert(&UniVal);
-}
-
-bool DivergenceAnalysisImpl::isTemporalDivergent(
- const BasicBlock &ObservingBlock, const Value &Val) const {
- const auto *Inst = dyn_cast<const Instruction>(&Val);
- if (!Inst)
- return false;
- // check whether any divergent loop carrying Val terminates before control
- // proceeds to ObservingBlock
- for (const auto *Loop = LI.getLoopFor(Inst->getParent());
- Loop != RegionLoop && !Loop->contains(&ObservingBlock);
- Loop = Loop->getParentLoop()) {
- if (DivergentLoops.contains(Loop))
- return true;
- }
-
- return false;
-}
-
-bool DivergenceAnalysisImpl::inRegion(const Instruction &I) const {
- return I.getParent() && inRegion(*I.getParent());
-}
-
-bool DivergenceAnalysisImpl::inRegion(const BasicBlock &BB) const {
- return RegionLoop ? RegionLoop->contains(&BB) : (BB.getParent() == &F);
-}
-
-void DivergenceAnalysisImpl::pushUsers(const Value &V) {
- const auto *I = dyn_cast<const Instruction>(&V);
-
- if (I && I->isTerminator()) {
- analyzeControlDivergence(*I);
- return;
- }
-
- for (const auto *User : V.users()) {
- const auto *UserInst = dyn_cast<const Instruction>(User);
- if (!UserInst)
- continue;
-
- // only compute divergent inside loop
- if (!inRegion(*UserInst))
- continue;
-
- // All users of divergent values are immediate divergent
- if (markDivergent(*UserInst))
- Worklist.push_back(UserInst);
- }
-}
-
-static const Instruction *getIfCarriedInstruction(const Use &U,
- const Loop &DivLoop) {
- const auto *I = dyn_cast<const Instruction>(&U);
- if (!I)
- return nullptr;
- if (!DivLoop.contains(I))
- return nullptr;
- return I;
-}
-
-void DivergenceAnalysisImpl::analyzeTemporalDivergence(
- const Instruction &I, const Loop &OuterDivLoop) {
- if (isAlwaysUniform(I))
- return;
- if (isDivergent(I))
- return;
-
- LLVM_DEBUG(dbgs() << "Analyze temporal divergence: " << I.getName() << "\n");
- assert((isa<PHINode>(I) || !IsLCSSAForm) &&
- "In LCSSA form all users of loop-exiting defs are Phi nodes.");
- for (const Use &Op : I.operands()) {
- const auto *OpInst = getIfCarriedInstruction(Op, OuterDivLoop);
- if (!OpInst)
- continue;
- if (markDivergent(I))
- pushUsers(I);
- return;
- }
-}
-
-// marks all users of loop-carried values of the loop headed by LoopHeader as
-// divergent
-void DivergenceAnalysisImpl::analyzeLoopExitDivergence(
- const BasicBlock &DivExit, const Loop &OuterDivLoop) {
- // All users are in immediate exit blocks
- if (IsLCSSAForm) {
- for (const auto &Phi : DivExit.phis()) {
- analyzeTemporalDivergence(Phi, OuterDivLoop);
- }
- return;
- }
-
- // For non-LCSSA we have to follow all live out edges wherever they may lead.
- const BasicBlock &LoopHeader = *OuterDivLoop.getHeader();
- SmallVector<const BasicBlock *, 8> TaintStack;
- TaintStack.push_back(&DivExit);
-
- // Otherwise potential users of loop-carried values could be anywhere in the
- // dominance region of DivLoop (including its fringes for phi nodes)
- DenseSet<const BasicBlock *> Visited;
- Visited.insert(&DivExit);
-
- do {
- auto *UserBlock = TaintStack.pop_back_val();
-
- // don't spread divergence beyond the region
- if (!inRegion(*UserBlock))
- continue;
-
- assert(!OuterDivLoop.contains(UserBlock) &&
- "irreducible control flow detected");
-
- // phi nodes at the fringes of the dominance region
- if (!DT.dominates(&LoopHeader, UserBlock)) {
- // all PHI nodes of UserBlock become divergent
- for (const auto &Phi : UserBlock->phis()) {
- analyzeTemporalDivergence(Phi, OuterDivLoop);
- }
- continue;
- }
-
- // Taint outside users of values carried by OuterDivLoop.
- for (const auto &I : *UserBlock) {
- analyzeTemporalDivergence(I, OuterDivLoop);
- }
-
- // visit all blocks in the dominance region
- for (const auto *SuccBlock : successors(UserBlock)) {
- if (!Visited.insert(SuccBlock).second) {
- continue;
- }
- TaintStack.push_back(SuccBlock);
- }
- } while (!TaintStack.empty());
-}
-
-void DivergenceAnalysisImpl::propagateLoopExitDivergence(
- const BasicBlock &DivExit, const Loop &InnerDivLoop) {
- LLVM_DEBUG(dbgs() << "\tpropLoopExitDiv " << DivExit.getName() << "\n");
-
- // Find outer-most loop that does not contain \p DivExit
- const Loop *DivLoop = &InnerDivLoop;
- const Loop *OuterDivLoop = DivLoop;
- const Loop *ExitLevelLoop = LI.getLoopFor(&DivExit);
- const unsigned LoopExitDepth =
- ExitLevelLoop ? ExitLevelLoop->getLoopDepth() : 0;
- while (DivLoop && DivLoop->getLoopDepth() > LoopExitDepth) {
- DivergentLoops.insert(DivLoop); // all crossed loops are divergent
- OuterDivLoop = DivLoop;
- DivLoop = DivLoop->getParentLoop();
- }
- LLVM_DEBUG(dbgs() << "\tOuter-most left loop: " << OuterDivLoop->getName()
- << "\n");
-
- analyzeLoopExitDivergence(DivExit, *OuterDivLoop);
-}
-
-// this is a divergent join point - mark all phi nodes as divergent and push
-// them onto the stack.
-void DivergenceAnalysisImpl::taintAndPushPhiNodes(const BasicBlock &JoinBlock) {
- LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << JoinBlock.getName()
- << "\n");
-
- // ignore divergence outside the region
- if (!inRegion(JoinBlock)) {
- return;
- }
-
- // push non-divergent phi nodes in JoinBlock to the worklist
- for (const auto &Phi : JoinBlock.phis()) {
- if (isDivergent(Phi))
- continue;
- // FIXME Theoretically ,the 'undef' value could be replaced by any other
- // value causing spurious divergence.
- if (Phi.hasConstantOrUndefValue())
- continue;
- if (markDivergent(Phi))
- Worklist.push_back(&Phi);
- }
-}
-
-void DivergenceAnalysisImpl::analyzeControlDivergence(const Instruction &Term) {
- LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Term.getParent()->getName()
- << "\n");
-
- // Don't propagate divergence from unreachable blocks.
- if (!DT.isReachableFromEntry(Term.getParent()))
- return;
-
- const auto *BranchLoop = LI.getLoopFor(Term.getParent());
-
- const auto &DivDesc = SDA.getJoinBlocks(Term);
-
- // Iterate over all blocks now reachable by a disjoint path join
- for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
- taintAndPushPhiNodes(*JoinBlock);
- }
-
- assert(DivDesc.LoopDivBlocks.empty() || BranchLoop);
- for (const auto *DivExitBlock : DivDesc.LoopDivBlocks) {
- propagateLoopExitDivergence(*DivExitBlock, *BranchLoop);
- }
-}
-
-void DivergenceAnalysisImpl::compute() {
- // Initialize worklist.
- auto DivValuesCopy = DivergentValues;
- for (const auto *DivVal : DivValuesCopy) {
- assert(isDivergent(*DivVal) && "Worklist invariant violated!");
- pushUsers(*DivVal);
- }
-
- // All values on the Worklist are divergent.
- // Their users may not have been updated yed.
- while (!Worklist.empty()) {
- const Instruction &I = *Worklist.back();
- Worklist.pop_back();
-
- // propagate value divergence to users
- assert(isDivergent(I) && "Worklist invariant violated!");
- pushUsers(I);
- }
-}
-
-bool DivergenceAnalysisImpl::isAlwaysUniform(const Value &V) const {
- return UniformOverrides.contains(&V);
-}
-
-bool DivergenceAnalysisImpl::isDivergent(const Value &V) const {
- return DivergentValues.contains(&V);
-}
-
-bool DivergenceAnalysisImpl::isDivergentUse(const Use &U) const {
- Value &V = *U.get();
- Instruction &I = *cast<Instruction>(U.getUser());
- return isDivergent(V) || isTemporalDivergent(*I.getParent(), V);
-}
-
-DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT,
- const PostDominatorTree &PDT, const LoopInfo &LI,
- const TargetTransformInfo &TTI,
- bool KnownReducible)
- : F(F) {
- if (!KnownReducible) {
- using RPOTraversal = ReversePostOrderTraversal<const Function *>;
- RPOTraversal FuncRPOT(&F);
- if (containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
- const LoopInfo>(FuncRPOT, LI)) {
- ContainsIrreducible = true;
- return;
- }
- }
- SDA = std::make_unique<SyncDependenceAnalysis>(DT, PDT, LI);
- DA = std::make_unique<DivergenceAnalysisImpl>(F, nullptr, DT, LI, *SDA,
- /* LCSSA */ false);
- for (auto &I : instructions(F)) {
- if (TTI.isSourceOfDivergence(&I)) {
- DA->markDivergent(I);
- } else if (TTI.isAlwaysUniform(&I)) {
- DA->addUniformOverride(I);
- }
- }
- for (auto &Arg : F.args()) {
- if (TTI.isSourceOfDivergence(&Arg)) {
- DA->markDivergent(Arg);
- }
- }
-
- DA->compute();
-}
-
-AnalysisKey DivergenceAnalysis::Key;
-
-DivergenceAnalysis::Result
-DivergenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- auto &LI = AM.getResult<LoopAnalysis>(F);
- auto &TTI = AM.getResult<TargetIRAnalysis>(F);
-
- return DivergenceInfo(F, DT, PDT, LI, TTI, /* KnownReducible = */ false);
-}
-
-PreservedAnalyses
-DivergenceAnalysisPrinterPass::run(Function &F, FunctionAnalysisManager &FAM) {
- auto &DI = FAM.getResult<DivergenceAnalysis>(F);
- OS << "'Divergence Analysis' for function '" << F.getName() << "':\n";
- if (DI.hasDivergence()) {
- for (auto &Arg : F.args()) {
- OS << (DI.isDivergent(Arg) ? "DIVERGENT: " : " ");
- OS << Arg << "\n";
- }
- for (const BasicBlock &BB : F) {
- OS << "\n " << BB.getName() << ":\n";
- for (const auto &I : BB.instructionsWithoutDebug()) {
- OS << (DI.isDivergent(I) ? "DIVERGENT: " : " ");
- OS << I << "\n";
- }
- }
- }
- return PreservedAnalyses::all();
-}
diff --git a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
index 782c11937507..6094f22a17fd 100644
--- a/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
+++ b/llvm/lib/Analysis/FunctionPropertiesAnalysis.cpp
@@ -82,13 +82,15 @@ void FunctionPropertiesInfo::updateAggregateStats(const Function &F,
}
FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo(
- const Function &F, FunctionAnalysisManager &FAM) {
+ Function &F, FunctionAnalysisManager &FAM) {
+ return getFunctionPropertiesInfo(F, FAM.getResult<DominatorTreeAnalysis>(F),
+ FAM.getResult<LoopAnalysis>(F));
+}
+
+FunctionPropertiesInfo FunctionPropertiesInfo::getFunctionPropertiesInfo(
+ const Function &F, const DominatorTree &DT, const LoopInfo &LI) {
FunctionPropertiesInfo FPI;
- // The const casts are due to the getResult API - there's no mutation of F.
- const auto &LI = FAM.getResult<LoopAnalysis>(const_cast<Function &>(F));
- const auto &DT =
- FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(F));
for (const auto &BB : F)
if (DT.isReachableFromEntry(&BB))
FPI.reIncludeBB(BB);
@@ -127,7 +129,7 @@ FunctionPropertiesPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
}
FunctionPropertiesUpdater::FunctionPropertiesUpdater(
- FunctionPropertiesInfo &FPI, const CallBase &CB)
+ FunctionPropertiesInfo &FPI, CallBase &CB)
: FPI(FPI), CallSiteBB(*CB.getParent()), Caller(*CallSiteBB.getParent()) {
assert(isa<CallInst>(CB) || isa<InvokeInst>(CB));
// For BBs that are likely to change, we subtract from feature totals their
@@ -247,5 +249,13 @@ void FunctionPropertiesUpdater::finish(FunctionAnalysisManager &FAM) const {
const auto &LI = FAM.getResult<LoopAnalysis>(const_cast<Function &>(Caller));
FPI.updateAggregateStats(Caller, LI);
- assert(FPI == FunctionPropertiesInfo::getFunctionPropertiesInfo(Caller, FAM));
}
+
+bool FunctionPropertiesUpdater::isUpdateValid(Function &F,
+ const FunctionPropertiesInfo &FPI,
+ FunctionAnalysisManager &FAM) {
+ DominatorTree DT(F);
+ LoopInfo LI(DT);
+ auto Fresh = FunctionPropertiesInfo::getFunctionPropertiesInfo(F, DT, LI);
+ return FPI == Fresh;
+} \ No newline at end of file
diff --git a/llvm/lib/Analysis/GuardUtils.cpp b/llvm/lib/Analysis/GuardUtils.cpp
index cd132c56991f..40b898e96f3b 100644
--- a/llvm/lib/Analysis/GuardUtils.cpp
+++ b/llvm/lib/Analysis/GuardUtils.cpp
@@ -32,12 +32,19 @@ bool llvm::isGuardAsWidenableBranch(const User *U) {
if (!parseWidenableBranch(U, Condition, WidenableCondition, GuardedBB,
DeoptBB))
return false;
- for (auto &Insn : *DeoptBB) {
- if (match(&Insn, m_Intrinsic<Intrinsic::experimental_deoptimize>()))
- return true;
- if (Insn.mayHaveSideEffects())
+ SmallPtrSet<const BasicBlock *, 2> Visited;
+ Visited.insert(DeoptBB);
+ do {
+ for (auto &Insn : *DeoptBB) {
+ if (match(&Insn, m_Intrinsic<Intrinsic::experimental_deoptimize>()))
+ return true;
+ if (Insn.mayHaveSideEffects())
+ return false;
+ }
+ DeoptBB = DeoptBB->getUniqueSuccessor();
+ if (!DeoptBB)
return false;
- }
+ } while (Visited.insert(DeoptBB).second);
return false;
}
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index f471e32344cb..f029c8342fde 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/IRSimilarityIdentifier.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetOperations.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/User.h"
@@ -97,7 +98,8 @@ void IRInstructionData::setBranchSuccessors(
int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
- for (BasicBlock *Successor : BI->successors()) {
+ for (Value *V : getBlockOperVals()) {
+ BasicBlock *Successor = cast<BasicBlock>(V);
BBNumIt = BasicBlockToInteger.find(Successor);
assert(BBNumIt != BasicBlockToInteger.end() &&
"Could not find number for BasicBlock!");
@@ -108,6 +110,25 @@ void IRInstructionData::setBranchSuccessors(
}
}
+ArrayRef<Value *> IRInstructionData::getBlockOperVals() {
+ assert((isa<BranchInst>(Inst) ||
+ isa<PHINode>(Inst)) && "Instruction must be branch or PHINode");
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(Inst))
+ return ArrayRef<Value *>(
+ std::next(OperVals.begin(), BI->isConditional() ? 1 : 0),
+ OperVals.end()
+ );
+
+ if (PHINode *PN = dyn_cast<PHINode>(Inst))
+ return ArrayRef<Value *>(
+ std::next(OperVals.begin(), PN->getNumIncomingValues()),
+ OperVals.end()
+ );
+
+ return ArrayRef<Value *>();
+}
+
void IRInstructionData::setCalleeName(bool MatchByName) {
CallInst *CI = dyn_cast<CallInst>(Inst);
assert(CI && "Instruction must be call");
@@ -159,7 +180,6 @@ void IRInstructionData::setPHIPredecessors(
int Relative = OtherBlockNumber - CurrentBlockNumber;
RelativeBlockLocations.push_back(Relative);
- RelativeBlockLocations.push_back(Relative);
}
}
@@ -439,7 +459,7 @@ IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
// Map the operand values to an unsigned integer if it does not already
// have an unsigned integer assigned to it.
for (Value *Arg : ID->OperVals)
- if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
+ if (!ValueToNumber.contains(Arg)) {
ValueToNumber.try_emplace(Arg, LocalValNumber);
NumberToValue.try_emplace(LocalValNumber, Arg);
LocalValNumber++;
@@ -447,7 +467,7 @@ IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
// Mapping the instructions to an unsigned integer if it is not already
// exist in the mapping.
- if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
+ if (!ValueToNumber.contains(ID->Inst)) {
ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
NumberToValue.try_emplace(LocalValNumber, ID->Inst);
LocalValNumber++;
@@ -464,7 +484,7 @@ IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
DenseSet<BasicBlock *> BBSet;
getBasicBlocks(BBSet);
for (BasicBlock *BB : BBSet) {
- if (ValueToNumber.find(BB) != ValueToNumber.end())
+ if (ValueToNumber.contains(BB))
continue;
ValueToNumber.try_emplace(BB, LocalValNumber);
@@ -698,11 +718,39 @@ bool IRSimilarityCandidate::compareCommutativeOperandMapping(
return true;
}
+bool IRSimilarityCandidate::compareAssignmentMapping(
+ const unsigned InstValA, const unsigned &InstValB,
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
+ DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
+ DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
+ bool WasInserted;
+ std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
+ std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
+ if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
+ return false;
+ else if (ValueMappingIt->second.size() != 1) {
+ for (unsigned OtherVal : ValueMappingIt->second) {
+ if (OtherVal == InstValB)
+ continue;
+ if (!ValueNumberMappingA.contains(OtherVal))
+ continue;
+ if (!ValueNumberMappingA[OtherVal].contains(InstValA))
+ continue;
+ ValueNumberMappingA[OtherVal].erase(InstValA);
+ }
+ ValueNumberMappingA.erase(ValueMappingIt);
+ std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
+ std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
+ }
+
+ return true;
+}
+
bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
RelativeLocMapping B) {
// Get the basic blocks the label refers to.
- BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal);
- BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal);
+ BasicBlock *ABB = cast<BasicBlock>(A.OperVal);
+ BasicBlock *BBB = cast<BasicBlock>(B.OperVal);
// Get the basic blocks contained in each region.
DenseSet<BasicBlock *> BasicBlockA;
@@ -715,7 +763,7 @@ bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
bool BContained = BasicBlockB.contains(BBB);
// Both blocks need to be contained in the region, or both need to be outside
- // the reigon.
+ // the region.
if (AContained != BContained)
return false;
@@ -755,8 +803,6 @@ bool IRSimilarityCandidate::compareStructure(
// in one candidate to values in the other candidate. If we create a set with
// one element, and that same element maps to the original element in the
// candidate we have a good mapping.
- DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
-
// Iterate over the instructions contained in each candidate
unsigned SectionLength = A.getStartIdx() + A.getLength();
@@ -779,16 +825,13 @@ bool IRSimilarityCandidate::compareStructure(
unsigned InstValA = A.ValueToNumber.find(IA)->second;
unsigned InstValB = B.ValueToNumber.find(IB)->second;
- bool WasInserted;
// Ensure that the mappings for the instructions exists.
- std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
- std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
- if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
+ if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA,
+ ValueNumberMappingB))
return false;
-
- std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert(
- std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
- if (!WasInserted && !ValueMappingIt->second.contains(InstValA))
+
+ if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB,
+ ValueNumberMappingA))
return false;
// We have different paths for commutative instructions and non-commutative
@@ -826,12 +869,22 @@ bool IRSimilarityCandidate::compareStructure(
SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
+ ArrayRef<Value *> ABL = ItA->getBlockOperVals();
+ ArrayRef<Value *> BBL = ItB->getBlockOperVals();
+
+ // Check to make sure that the number of operands, and branching locations
+ // between BranchInsts is the same.
if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
- OperValsA.size() != OperValsB.size())
+ ABL.size() != BBL.size())
return false;
+ assert(RelBlockLocsA.size() == ABL.size() &&
+ "Block information vectors not the same size.");
+ assert(RelBlockLocsB.size() == BBL.size() &&
+ "Block information vectors not the same size.");
+
ZippedRelativeLocationsT ZippedRelativeLocations =
- zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB);
+ zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
if (any_of(ZippedRelativeLocations,
[&A, &B](std::tuple<int, int, Value *, Value *> R) {
return !checkRelativeLocations(
@@ -1026,7 +1079,7 @@ void IRSimilarityCandidate::createCanonicalRelationFrom(
// We can skip the BasicBlock if the canonical numbering has already been
// found in a separate instruction.
- if (NumberToCanonNum.find(BBGVNForCurrCand) != NumberToCanonNum.end())
+ if (NumberToCanonNum.contains(BBGVNForCurrCand))
continue;
// If the basic block is the starting block, then the shared instruction may
@@ -1048,6 +1101,76 @@ void IRSimilarityCandidate::createCanonicalRelationFrom(
}
}
+void IRSimilarityCandidate::createCanonicalRelationFrom(
+ IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
+ IRSimilarityCandidate &TargetCandLarge) {
+ assert(!SourceCand.CanonNumToNumber.empty() &&
+ "Canonical Relationship is non-empty");
+ assert(!SourceCand.NumberToCanonNum.empty() &&
+ "Canonical Relationship is non-empty");
+
+ assert(!SourceCandLarge.CanonNumToNumber.empty() &&
+ "Canonical Relationship is non-empty");
+ assert(!SourceCandLarge.NumberToCanonNum.empty() &&
+ "Canonical Relationship is non-empty");
+
+ assert(!TargetCandLarge.CanonNumToNumber.empty() &&
+ "Canonical Relationship is non-empty");
+ assert(!TargetCandLarge.NumberToCanonNum.empty() &&
+ "Canonical Relationship is non-empty");
+
+ assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
+ assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");
+
+ // We're going to use the larger candidates as a "bridge" to create the
+ // canonical number for the target candidate since we have idetified two
+ // candidates as subsequences of larger sequences, and therefore must be
+ // structurally similar.
+ for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
+ Value *CurrVal = ValueNumPair.first;
+ unsigned TargetCandGVN = ValueNumPair.second;
+
+ // Find the numbering in the large candidate that surrounds the
+ // current candidate.
+ std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
+ assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
+
+ // Get the canonical numbering in the large target candidate.
+ std::optional<unsigned> OTargetCandCanon =
+ TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
+ assert(OTargetCandCanon.has_value() &&
+ "Canononical Number not found for GVN");
+
+ // Get the GVN in the large source candidate from the canonical numbering.
+ std::optional<unsigned> OLargeSourceGVN =
+ SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
+ assert(OLargeSourceGVN.has_value() &&
+ "GVN Number not found for Canonical Number");
+
+ // Get the Value from the GVN in the large source candidate.
+ std::optional<Value *> OLargeSourceV =
+ SourceCandLarge.fromGVN(OLargeSourceGVN.value());
+ assert(OLargeSourceV.has_value() && "Value not found for GVN");
+
+ // Get the GVN number for the Value in the source candidate.
+ std::optional<unsigned> OSourceGVN =
+ SourceCand.getGVN(OLargeSourceV.value());
+ assert(OSourceGVN.has_value() && "GVN Number not found for Value");
+
+ // Get the canonical numbering from the GVN/
+ std::optional<unsigned> OSourceCanon =
+ SourceCand.getCanonicalNum(OSourceGVN.value());
+ assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
+
+ // Insert the canonical numbering and GVN pair into their respective
+ // mappings.
+ CanonNumToNumber.insert(
+ std::make_pair(OSourceCanon.value(), TargetCandGVN));
+ NumberToCanonNum.insert(
+ std::make_pair(TargetCandGVN, OSourceCanon.value()));
+ }
+}
+
void IRSimilarityCandidate::createCanonicalMappingFor(
IRSimilarityCandidate &CurrCand) {
assert(CurrCand.CanonNumToNumber.size() == 0 &&
@@ -1065,6 +1188,81 @@ void IRSimilarityCandidate::createCanonicalMappingFor(
}
}
+/// Look for larger IRSimilarityCandidates From the previously matched
+/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is
+/// an overlap, return a pair of structurally similar, larger
+/// IRSimilarityCandidates.
+///
+/// \param [in] CandA - The first candidate we are trying to determine the
+/// structure of.
+/// \param [in] CandB - The second candidate we are trying to determine the
+/// structure of.
+/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
+/// a circuit to the IRSimilarityCandidates that include this instruction.
+/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
+/// number representing the structural group assigned to it.
+static std::optional<
+ std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
+CheckLargerCands(
+ IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB,
+ DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
+ DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) {
+ DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
+ DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
+ DenseSet<unsigned> IncludedGroupsA;
+ DenseSet<unsigned> IncludedGroupsB;
+
+ // Find the overall similarity group numbers that fully contain the candidate,
+ // and record the larger candidate for each group.
+ auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
+ std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
+ Result;
+
+ unsigned CandAStart = CandA.getStartIdx();
+ unsigned CandAEnd = CandA.getEndIdx();
+ unsigned CandBStart = CandB.getStartIdx();
+ unsigned CandBEnd = CandB.getEndIdx();
+ if (IdxToCandidateIt == IndexToIncludedCand.end())
+ return Result;
+ for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
+ if (MatchedCand->getStartIdx() > CandAStart ||
+ (MatchedCand->getEndIdx() < CandAEnd))
+ continue;
+ unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
+ IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
+ IncludedGroupsA.insert(GroupNum);
+ }
+
+ // Find the overall similarity group numbers that fully contain the next
+ // candidate, and record the larger candidate for each group.
+ IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
+ if (IdxToCandidateIt == IndexToIncludedCand.end())
+ return Result;
+ for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
+ if (MatchedCand->getStartIdx() > CandBStart ||
+ MatchedCand->getEndIdx() < CandBEnd)
+ continue;
+ unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
+ IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
+ IncludedGroupsB.insert(GroupNum);
+ }
+
+ // Find the intersection between the two groups, these are the groups where
+ // the larger candidates exist.
+ set_intersect(IncludedGroupsA, IncludedGroupsB);
+
+ // If there is no intersection between the sets, then we cannot determine
+ // whether or not there is a match.
+ if (IncludedGroupsA.empty())
+ return Result;
+
+ // Create a pair that contains the larger candidates.
+ auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
+ auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
+ Result = std::make_pair(ItA->second, ItB->second);
+ return Result;
+}
+
/// From the list of IRSimilarityCandidates, perform a comparison between each
/// IRSimilarityCandidate to determine if there are overlapping
/// IRInstructionData, or if they do not have the same structure.
@@ -1074,9 +1272,16 @@ void IRSimilarityCandidate::createCanonicalMappingFor(
/// \param [out] StructuralGroups - the mapping of unsigned integers to vector
/// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
/// vector are structurally similar to one another.
+/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
+/// a circuit to the IRSimilarityCandidates that include this instruction.
+/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
+/// number representing the structural group assigned to it.
static void findCandidateStructures(
std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
- DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
+ DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
+ DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
+ DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup
+ ) {
std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
InnerCandEndIt;
@@ -1139,6 +1344,24 @@ static void findCandidateStructures(
if (CandToGroupItInner != CandToGroup.end())
continue;
+ // Check if we have found structural similarity between two candidates
+ // that fully contains the first and second candidates.
+ std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
+ LargerPair = CheckLargerCands(
+ *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
+
+ // If a pair was found, it means that we can assume that these smaller
+ // substrings are also structurally similar. Use the larger candidates to
+ // determine the canonical mapping between the two sections.
+ if (LargerPair.has_value()) {
+ SameStructure = true;
+ InnerCandIt->createCanonicalRelationFrom(
+ *CandIt, *LargerPair.value().first, *LargerPair.value().second);
+ CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
+ CurrentGroupPair->second.push_back(*InnerCandIt);
+ continue;
+ }
+
// Otherwise we determine if they have the same structure and add it to
// vector if they match.
ValueNumberMappingA.clear();
@@ -1165,24 +1388,58 @@ void IRSimilarityIdentifier::findCandidates(
std::vector<SimilarityGroup> NewCandidateGroups;
DenseMap<unsigned, SimilarityGroup> StructuralGroups;
+ DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand;
+ DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
// Iterate over the subsequences found by the Suffix Tree to create
// IRSimilarityCandidates for each repeated subsequence and determine which
// instances are structurally similar to one another.
- for (SuffixTree::RepeatedSubstring &RS : ST) {
+
+ // Sort the suffix tree from longest substring to shortest.
+ std::vector<SuffixTree::RepeatedSubstring> RSes;
+ for (SuffixTree::RepeatedSubstring &RS : ST)
+ RSes.push_back(RS);
+
+ llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS,
+ const SuffixTree::RepeatedSubstring &RHS) {
+ return LHS.Length > RHS.Length;
+ });
+ for (SuffixTree::RepeatedSubstring &RS : RSes) {
createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
CandsForRepSubstring);
if (CandsForRepSubstring.size() < 2)
continue;
- findCandidateStructures(CandsForRepSubstring, StructuralGroups);
- for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
+ findCandidateStructures(CandsForRepSubstring, StructuralGroups,
+ IndexToIncludedCand, CandToGroup);
+ for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
// We only add the group if it contains more than one
// IRSimilarityCandidate. If there is only one, that means there is no
// other repeated subsequence with the same structure.
- if (Group.second.size() > 1)
+ if (Group.second.size() > 1) {
SimilarityCandidates->push_back(Group.second);
+ // Iterate over each candidate in the group, and add an entry for each
+ // instruction included with a mapping to a set of
+ // IRSimilarityCandidates that include that instruction.
+ for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
+ for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
+ Idx <= Edx; ++Idx) {
+ DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>>::iterator
+ IdIt;
+ IdIt = IndexToIncludedCand.find(Idx);
+ bool Inserted = false;
+ if (IdIt == IndexToIncludedCand.end())
+ std::tie(IdIt, Inserted) = IndexToIncludedCand.insert(
+ std::make_pair(Idx, DenseSet<IRSimilarityCandidate *>()));
+ IdIt->second.insert(&IRCand);
+ }
+ // Add mapping of candidate to the overall similarity group number.
+ CandToGroup.insert(
+ std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
+ }
+ }
+ }
CandsForRepSubstring.clear();
StructuralGroups.clear();
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 950541ace9d7..6c750b7baa40 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -107,7 +107,7 @@ static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
// must be positive (i.e., IsSigned = false), because if this were not the
// case, the sign bit would have been demanded.
auto Mask = DB->getDemandedBits(Exit);
- MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
+ MaxBitWidth = Mask.getBitWidth() - Mask.countl_zero();
}
if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
@@ -128,8 +128,7 @@ static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
++MaxBitWidth;
}
}
- if (!isPowerOf2_64(MaxBitWidth))
- MaxBitWidth = NextPowerOf2(MaxBitWidth);
+ MaxBitWidth = llvm::bit_ceil(MaxBitWidth);
return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
IsSigned);
@@ -707,6 +706,10 @@ RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
return InstDesc(Kind == RecurKind::FMin, I);
if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
return InstDesc(Kind == RecurKind::FMax, I);
+ if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())))
+ return InstDesc(Kind == RecurKind::FMinimum, I);
+ if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())))
+ return InstDesc(Kind == RecurKind::FMaximum, I);
return InstDesc(false, I);
}
@@ -746,15 +749,21 @@ RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) {
return InstDesc(false, I);
Value *Op1, *Op2;
- if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
- m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
- I1->isFast())
- return InstDesc(Kind == RecurKind::FAdd, SI);
+ if (!(((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
+ m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
+ I1->isFast()) ||
+ (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast())) ||
+ ((m_Add(m_Value(Op1), m_Value(Op2)).match(I1) ||
+ m_Sub(m_Value(Op1), m_Value(Op2)).match(I1))) ||
+ (m_Mul(m_Value(Op1), m_Value(Op2)).match(I1))))
+ return InstDesc(false, I);
- if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast()))
- return InstDesc(Kind == RecurKind::FMul, SI);
+ Instruction *IPhi = isa<PHINode>(*Op1) ? dyn_cast<Instruction>(Op1)
+ : dyn_cast<Instruction>(Op2);
+ if (!IPhi || IPhi != FalseVal)
+ return InstDesc(false, I);
- return InstDesc(false, I);
+ return InstDesc(true, SI);
}
RecurrenceDescriptor::InstDesc
@@ -787,7 +796,8 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
return InstDesc(Kind == RecurKind::FAdd, I,
I->hasAllowReassoc() ? nullptr : I);
case Instruction::Select:
- if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul)
+ if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul ||
+ Kind == RecurKind::Add || Kind == RecurKind::Mul)
return isConditionalRdxPattern(Kind, I);
[[fallthrough]];
case Instruction::FCmp:
@@ -795,11 +805,18 @@ RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
case Instruction::Call:
if (isSelectCmpRecurrenceKind(Kind))
return isSelectCmpPattern(L, OrigPhi, I, Prev);
+ auto HasRequiredFMF = [&]() {
+ if (FuncFMF.noNaNs() && FuncFMF.noSignedZeros())
+ return true;
+ if (isa<FPMathOperator>(I) && I->hasNoNaNs() && I->hasNoSignedZeros())
+ return true;
+ // minimum and maximum intrinsics do not require nsz and nnan flags since
+ // NaN and signed zeroes are propagated in the intrinsic implementation.
+ return match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())) ||
+ match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value()));
+ };
if (isIntMinMaxRecurrenceKind(Kind) ||
- (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
- (isa<FPMathOperator>(I) && I->hasNoNaNs() &&
- I->hasNoSignedZeros())) &&
- isFPMinMaxRecurrenceKind(Kind)))
+ (HasRequiredFMF() && isFPMinMaxRecurrenceKind(Kind)))
return isMinMaxPattern(I, Kind, Prev);
else if (isFMulAddIntrinsic(I))
return InstDesc(Kind == RecurKind::FMulAdd, I,
@@ -917,13 +934,22 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
return true;
}
+ if (AddReductionVar(Phi, RecurKind::FMaximum, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
+ LLVM_DEBUG(dbgs() << "Found a float MAXIMUM reduction PHI." << *Phi << "\n");
+ return true;
+ }
+ if (AddReductionVar(Phi, RecurKind::FMinimum, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
+ LLVM_DEBUG(dbgs() << "Found a float MINIMUM reduction PHI." << *Phi << "\n");
+ return true;
+ }
// Not a reduction of known type.
return false;
}
-bool RecurrenceDescriptor::isFixedOrderRecurrence(
- PHINode *Phi, Loop *TheLoop,
- MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
+bool RecurrenceDescriptor::isFixedOrderRecurrence(PHINode *Phi, Loop *TheLoop,
+ DominatorTree *DT) {
// Ensure the phi node is in the loop header and has two incoming values.
if (Phi->getParent() != TheLoop->getHeader() ||
@@ -959,8 +985,7 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(
Previous = dyn_cast<Instruction>(PrevPhi->getIncomingValueForBlock(Latch));
}
- if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
- SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
+ if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous))
return false;
// Ensure every user of the phi node (recursively) is dominated by the
@@ -969,27 +994,16 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(
// loop.
// TODO: Consider extending this sinking to handle memory instructions.
- // We optimistically assume we can sink all users after Previous. Keep a set
- // of instructions to sink after Previous ordered by dominance in the common
- // basic block. It will be applied to SinkAfter if all users can be sunk.
- auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) {
- return A->comesBefore(B);
- };
- std::set<Instruction *, decltype(CompareByComesBefore)> InstrsToSink(
- CompareByComesBefore);
-
+ SmallPtrSet<Value *, 8> Seen;
BasicBlock *PhiBB = Phi->getParent();
SmallVector<Instruction *, 8> WorkList;
auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
- // Already sunk SinkCandidate.
- if (SinkCandidate->getParent() == PhiBB &&
- InstrsToSink.find(SinkCandidate) != InstrsToSink.end())
- return true;
-
// Cyclic dependence.
if (Previous == SinkCandidate)
return false;
+ if (!Seen.insert(SinkCandidate).second)
+ return true;
if (DT->dominates(Previous,
SinkCandidate)) // We already are good w/o sinking.
return true;
@@ -999,55 +1013,12 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(
SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
return false;
- // Avoid sinking an instruction multiple times (if multiple operands are
- // fixed order recurrences) by sinking once - after the latest 'previous'
- // instruction.
- auto It = SinkAfter.find(SinkCandidate);
- if (It != SinkAfter.end()) {
- auto *OtherPrev = It->second;
- // Find the earliest entry in the 'sink-after' chain. The last entry in
- // the chain is the original 'Previous' for a recurrence handled earlier.
- auto EarlierIt = SinkAfter.find(OtherPrev);
- while (EarlierIt != SinkAfter.end()) {
- Instruction *EarlierInst = EarlierIt->second;
- EarlierIt = SinkAfter.find(EarlierInst);
- // Bail out if order has not been preserved.
- if (EarlierIt != SinkAfter.end() &&
- !DT->dominates(EarlierInst, OtherPrev))
- return false;
- OtherPrev = EarlierInst;
- }
- // Bail out if order has not been preserved.
- if (OtherPrev != It->second && !DT->dominates(It->second, OtherPrev))
- return false;
-
- // SinkCandidate is already being sunk after an instruction after
- // Previous. Nothing left to do.
- if (DT->dominates(Previous, OtherPrev) || Previous == OtherPrev)
- return true;
-
- // If there are other instructions to be sunk after SinkCandidate, remove
- // and re-insert SinkCandidate can break those instructions. Bail out for
- // simplicity.
- if (any_of(SinkAfter,
- [SinkCandidate](const std::pair<Instruction *, Instruction *> &P) {
- return P.second == SinkCandidate;
- }))
- return false;
-
- // Otherwise, Previous comes after OtherPrev and SinkCandidate needs to be
- // re-sunk to Previous, instead of sinking to OtherPrev. Remove
- // SinkCandidate from SinkAfter to ensure it's insert position is updated.
- SinkAfter.erase(SinkCandidate);
- }
-
// If we reach a PHI node that is not dominated by Previous, we reached a
// header PHI. No need for sinking.
if (isa<PHINode>(SinkCandidate))
return true;
// Sink User tentatively and check its users
- InstrsToSink.insert(SinkCandidate);
WorkList.push_back(SinkCandidate);
return true;
};
@@ -1062,11 +1033,6 @@ bool RecurrenceDescriptor::isFixedOrderRecurrence(
}
}
- // We can sink all users of Phi. Update the mapping.
- for (Instruction *I : InstrsToSink) {
- SinkAfter[I] = Previous;
- Previous = I;
- }
return true;
}
@@ -1101,7 +1067,7 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
return ConstantFP::get(Tp, 0.0L);
return ConstantFP::get(Tp, -0.0L);
case RecurKind::UMin:
- return ConstantInt::get(Tp, -1);
+ return ConstantInt::get(Tp, -1, true);
case RecurKind::UMax:
return ConstantInt::get(Tp, 0);
case RecurKind::SMin:
@@ -1118,6 +1084,10 @@ Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
assert((FMF.noNaNs() && FMF.noSignedZeros()) &&
"nnan, nsz is expected to be set for FP max reduction.");
return ConstantFP::getInfinity(Tp, true /*Negative*/);
+ case RecurKind::FMinimum:
+ return ConstantFP::getInfinity(Tp, false /*Negative*/);
+ case RecurKind::FMaximum:
+ return ConstantFP::getInfinity(Tp, true /*Negative*/);
case RecurKind::SelectICmp:
case RecurKind::SelectFCmp:
return getRecurrenceStartValue();
@@ -1152,6 +1122,8 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
return Instruction::ICmp;
case RecurKind::FMax:
case RecurKind::FMin:
+ case RecurKind::FMaximum:
+ case RecurKind::FMinimum:
case RecurKind::SelectFCmp:
return Instruction::FCmp;
default:
@@ -1264,10 +1236,8 @@ RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
const SCEV *Step, BinaryOperator *BOp,
- Type *ElementType,
SmallVectorImpl<Instruction *> *Casts)
- : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp),
- ElementType(ElementType) {
+ : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
assert(IK != IK_NoInduction && "Not an induction");
// Start value type should match the induction kind and the value
@@ -1282,8 +1252,6 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
"Step value is zero");
- assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
- "Step value should be constant for pointer induction");
assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
"StepValue is not an integer");
@@ -1295,11 +1263,6 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
InductionBinOp->getOpcode() == Instruction::FSub))) &&
"Binary opcode should be specified for FP induction");
- if (IK == IK_PtrInduction)
- assert(ElementType && "Pointer induction must have element type");
- else
- assert(!ElementType && "Non-pointer induction cannot have element type");
-
if (Casts) {
for (auto &Inst : *Casts) {
RedundantCasts.push_back(Inst);
@@ -1541,6 +1504,12 @@ bool InductionDescriptor::isInductionPHI(
return false;
}
+ // This function assumes that InductionPhi is called only on Phi nodes
+ // present inside loop headers. Check for the same, and throw an assert if
+ // the current Phi is not present inside the loop header.
+ assert(Phi->getParent() == AR->getLoop()->getHeader()
+ && "Invalid Phi node, not present in loop header");
+
Value *StartValue =
Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
@@ -1559,39 +1528,13 @@ bool InductionDescriptor::isInductionPHI(
BinaryOperator *BOp =
dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
- /* ElementType */ nullptr, CastsToIgnore);
+ CastsToIgnore);
return true;
}
assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
- // Pointer induction should be a constant.
- if (!ConstStep)
- return false;
-
- // Always use i8 element type for opaque pointer inductions.
- PointerType *PtrTy = cast<PointerType>(PhiTy);
- Type *ElementType = PtrTy->isOpaque()
- ? Type::getInt8Ty(PtrTy->getContext())
- : PtrTy->getNonOpaquePointerElementType();
- if (!ElementType->isSized())
- return false;
-
- ConstantInt *CV = ConstStep->getValue();
- const DataLayout &DL = Phi->getModule()->getDataLayout();
- TypeSize TySize = DL.getTypeAllocSize(ElementType);
- // TODO: We could potentially support this for scalable vectors if we can
- // prove at compile time that the constant step is always a multiple of
- // the scalable type.
- if (TySize.isZero() || TySize.isScalable())
- return false;
- int64_t Size = static_cast<int64_t>(TySize.getFixedValue());
- int64_t CVSize = CV->getSExtValue();
- if (CVSize % Size)
- return false;
- auto *StepValue =
- SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
- D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue,
- /* BinOp */ nullptr, ElementType);
+ // This allows induction variables w/non-constant steps.
+ D = InductionDescriptor(StartValue, IK_PtrInduction, Step);
return true;
}
diff --git a/llvm/lib/Analysis/IVUsers.cpp b/llvm/lib/Analysis/IVUsers.cpp
index 830211658353..5c7883fb3b37 100644
--- a/llvm/lib/Analysis/IVUsers.cpp
+++ b/llvm/lib/Analysis/IVUsers.cpp
@@ -334,8 +334,8 @@ const SCEV *IVUsers::getReplacementExpr(const IVStrideUse &IU) const {
/// getExpr - Return the expression for the use.
const SCEV *IVUsers::getExpr(const IVStrideUse &IU) const {
- return normalizeForPostIncUse(getReplacementExpr(IU), IU.getPostIncLoops(),
- *SE);
+ const SCEV *Replacement = getReplacementExpr(IU);
+ return normalizeForPostIncUse(Replacement, IU.getPostIncLoops(), *SE);
}
static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
@@ -356,7 +356,10 @@ static const SCEVAddRecExpr *findAddRecForLoop(const SCEV *S, const Loop *L) {
}
const SCEV *IVUsers::getStride(const IVStrideUse &IU, const Loop *L) const {
- if (const SCEVAddRecExpr *AR = findAddRecForLoop(getExpr(IU), L))
+ const SCEV *Expr = getExpr(IU);
+ if (!Expr)
+ return nullptr;
+ if (const SCEVAddRecExpr *AR = findAddRecForLoop(Expr, L))
return AR->getStepRecurrence(*SE);
return nullptr;
}
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index 540aad7ee0c0..e2480d51d372 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -13,6 +13,7 @@
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
@@ -208,6 +209,10 @@ bool InlineAdvisorAnalysis::Result::tryCreate(
Advisor.reset(DA.Factory(M, FAM, Params, IC));
return !!Advisor;
}
+ auto GetDefaultAdvice = [&FAM, Params](CallBase &CB) {
+ auto OIC = getDefaultInlineAdvice(CB, FAM, Params);
+ return OIC.has_value();
+ };
switch (Mode) {
case InliningAdvisorMode::Default:
LLVM_DEBUG(dbgs() << "Using default inliner heuristic.\n");
@@ -223,18 +228,12 @@ bool InlineAdvisorAnalysis::Result::tryCreate(
case InliningAdvisorMode::Development:
#ifdef LLVM_HAVE_TFLITE
LLVM_DEBUG(dbgs() << "Using development-mode inliner policy.\n");
- Advisor =
- llvm::getDevelopmentModeAdvisor(M, MAM, [&FAM, Params](CallBase &CB) {
- auto OIC = getDefaultInlineAdvice(CB, FAM, Params);
- return OIC.has_value();
- });
+ Advisor = llvm::getDevelopmentModeAdvisor(M, MAM, GetDefaultAdvice);
#endif
break;
case InliningAdvisorMode::Release:
-#ifdef LLVM_HAVE_TF_AOT
LLVM_DEBUG(dbgs() << "Using release-mode inliner policy.\n");
- Advisor = llvm::getReleaseModeAdvisor(M, MAM);
-#endif
+ Advisor = llvm::getReleaseModeAdvisor(M, MAM, GetDefaultAdvice);
break;
}
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 5bcc8a2f384a..9ff277f5334e 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -142,11 +142,11 @@ static cl::opt<size_t>
cl::desc("Do not inline functions with a stack size "
"that exceeds the specified limit"));
-static cl::opt<size_t>
- RecurStackSizeThreshold("recursive-inline-max-stacksize", cl::Hidden,
- cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller),
- cl::desc("Do not inline recursive functions with a stack "
- "size that exceeds the specified limit"));
+static cl::opt<size_t> RecurStackSizeThreshold(
+ "recursive-inline-max-stacksize", cl::Hidden,
+ cl::init(InlineConstants::TotalAllocaSizeRecursiveCaller),
+ cl::desc("Do not inline recursive functions with a stack "
+ "size that exceeds the specified limit"));
static cl::opt<bool> OptComputeFullInlineCost(
"inline-cost-full", cl::Hidden,
@@ -493,7 +493,7 @@ public:
InlineResult analyze();
std::optional<Constant *> getSimplifiedValue(Instruction *I) {
- if (SimplifiedValues.find(I) != SimplifiedValues.end())
+ if (SimplifiedValues.contains(I))
return SimplifiedValues[I];
return std::nullopt;
}
@@ -717,7 +717,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
void onInitializeSROAArg(AllocaInst *Arg) override {
assert(Arg != nullptr &&
"Should not initialize SROA costs for null value.");
- SROAArgCosts[Arg] = 0;
+ auto SROAArgCost = TTI.getCallerAllocaCost(&CandidateCall, Arg);
+ SROACostSavings += SROAArgCost;
+ SROAArgCosts[Arg] = SROAArgCost;
}
void onAggregateSROAUse(AllocaInst *SROAArg) override {
@@ -1054,7 +1056,7 @@ public:
void print(raw_ostream &OS);
std::optional<InstructionCostDetail> getCostDetails(const Instruction *I) {
- if (InstructionCostDetailMap.find(I) != InstructionCostDetailMap.end())
+ if (InstructionCostDetailMap.contains(I))
return InstructionCostDetailMap[I];
return std::nullopt;
}
@@ -1108,31 +1110,31 @@ private:
if (CostIt == SROACosts.end())
return;
- increment(InlineCostFeatureIndex::SROALosses, CostIt->second);
+ increment(InlineCostFeatureIndex::sroa_losses, CostIt->second);
SROACostSavingOpportunities -= CostIt->second;
SROACosts.erase(CostIt);
}
void onDisableLoadElimination() override {
- set(InlineCostFeatureIndex::LoadElimination, 1);
+ set(InlineCostFeatureIndex::load_elimination, 1);
}
void onCallPenalty() override {
- increment(InlineCostFeatureIndex::CallPenalty, CallPenalty);
+ increment(InlineCostFeatureIndex::call_penalty, CallPenalty);
}
void onCallArgumentSetup(const CallBase &Call) override {
- increment(InlineCostFeatureIndex::CallArgumentSetup,
+ increment(InlineCostFeatureIndex::call_argument_setup,
Call.arg_size() * InstrCost);
}
void onLoadRelativeIntrinsic() override {
- increment(InlineCostFeatureIndex::LoadRelativeIntrinsic, 3 * InstrCost);
+ increment(InlineCostFeatureIndex::load_relative_intrinsic, 3 * InstrCost);
}
void onLoweredCall(Function *F, CallBase &Call,
bool IsIndirectCall) override {
- increment(InlineCostFeatureIndex::LoweredCallArgSetup,
+ increment(InlineCostFeatureIndex::lowered_call_arg_setup,
Call.arg_size() * InstrCost);
if (IsIndirectCall) {
@@ -1153,9 +1155,9 @@ private:
GetAssumptionCache, GetBFI, PSI, ORE, false,
true);
if (CA.analyze().isSuccess()) {
- increment(InlineCostFeatureIndex::NestedInlineCostEstimate,
+ increment(InlineCostFeatureIndex::nested_inline_cost_estimate,
CA.getCost());
- increment(InlineCostFeatureIndex::NestedInlines, 1);
+ increment(InlineCostFeatureIndex::nested_inlines, 1);
}
} else {
onCallPenalty();
@@ -1168,12 +1170,12 @@ private:
if (JumpTableSize) {
int64_t JTCost = static_cast<int64_t>(JumpTableSize) * InstrCost +
JTCostMultiplier * InstrCost;
- increment(InlineCostFeatureIndex::JumpTablePenalty, JTCost);
+ increment(InlineCostFeatureIndex::jump_table_penalty, JTCost);
return;
}
if (NumCaseCluster <= 3) {
- increment(InlineCostFeatureIndex::CaseClusterPenalty,
+ increment(InlineCostFeatureIndex::case_cluster_penalty,
NumCaseCluster * CaseClusterCostMultiplier * InstrCost);
return;
}
@@ -1183,15 +1185,20 @@ private:
int64_t SwitchCost =
ExpectedNumberOfCompare * SwitchCostMultiplier * InstrCost;
- increment(InlineCostFeatureIndex::SwitchPenalty, SwitchCost);
+ increment(InlineCostFeatureIndex::switch_penalty, SwitchCost);
}
void onMissedSimplification() override {
- increment(InlineCostFeatureIndex::UnsimplifiedCommonInstructions,
+ increment(InlineCostFeatureIndex::unsimplified_common_instructions,
InstrCost);
}
- void onInitializeSROAArg(AllocaInst *Arg) override { SROACosts[Arg] = 0; }
+ void onInitializeSROAArg(AllocaInst *Arg) override {
+ auto SROAArgCost = TTI.getCallerAllocaCost(&CandidateCall, Arg);
+ SROACosts[Arg] = SROAArgCost;
+ SROACostSavingOpportunities += SROAArgCost;
+ }
+
void onAggregateSROAUse(AllocaInst *Arg) override {
SROACosts.find(Arg)->second += InstrCost;
SROACostSavingOpportunities += InstrCost;
@@ -1199,7 +1206,7 @@ private:
void onBlockAnalyzed(const BasicBlock *BB) override {
if (BB->getTerminator()->getNumSuccessors() > 1)
- set(InlineCostFeatureIndex::IsMultipleBlocks, 1);
+ set(InlineCostFeatureIndex::is_multiple_blocks, 1);
Threshold -= SingleBBBonus;
}
@@ -1212,24 +1219,24 @@ private:
// Ignore loops that will not be executed
if (DeadBlocks.count(L->getHeader()))
continue;
- increment(InlineCostFeatureIndex::NumLoops,
+ increment(InlineCostFeatureIndex::num_loops,
InlineConstants::LoopPenalty);
}
}
- set(InlineCostFeatureIndex::DeadBlocks, DeadBlocks.size());
- set(InlineCostFeatureIndex::SimplifiedInstructions,
+ set(InlineCostFeatureIndex::dead_blocks, DeadBlocks.size());
+ set(InlineCostFeatureIndex::simplified_instructions,
NumInstructionsSimplified);
- set(InlineCostFeatureIndex::ConstantArgs, NumConstantArgs);
- set(InlineCostFeatureIndex::ConstantOffsetPtrArgs,
+ set(InlineCostFeatureIndex::constant_args, NumConstantArgs);
+ set(InlineCostFeatureIndex::constant_offset_ptr_args,
NumConstantOffsetPtrArgs);
- set(InlineCostFeatureIndex::SROASavings, SROACostSavingOpportunities);
+ set(InlineCostFeatureIndex::sroa_savings, SROACostSavingOpportunities);
if (NumVectorInstructions <= NumInstructions / 10)
Threshold -= VectorBonus;
else if (NumVectorInstructions <= NumInstructions / 2)
Threshold -= VectorBonus / 2;
- set(InlineCostFeatureIndex::Threshold, Threshold);
+ set(InlineCostFeatureIndex::threshold, Threshold);
return InlineResult::success();
}
@@ -1237,17 +1244,17 @@ private:
bool shouldStop() override { return false; }
void onLoadEliminationOpportunity() override {
- increment(InlineCostFeatureIndex::LoadElimination, 1);
+ increment(InlineCostFeatureIndex::load_elimination, 1);
}
InlineResult onAnalysisStart() override {
- increment(InlineCostFeatureIndex::CallSiteCost,
+ increment(InlineCostFeatureIndex::callsite_cost,
-1 * getCallsiteCost(this->CandidateCall, DL));
- set(InlineCostFeatureIndex::ColdCcPenalty,
+ set(InlineCostFeatureIndex::cold_cc_penalty,
(F.getCallingConv() == CallingConv::Cold));
- set(InlineCostFeatureIndex::LastCallToStaticBonus,
+ set(InlineCostFeatureIndex::last_call_to_static_bonus,
isSoleCallToLocalFunction(CandidateCall, F));
// FIXME: we shouldn't repeat this logic in both the Features and Cost
@@ -1607,7 +1614,7 @@ bool CallAnalyzer::simplifyIntrinsicCallIsConstant(CallBase &CB) {
bool CallAnalyzer::simplifyIntrinsicCallObjectSize(CallBase &CB) {
// As per the langref, "The fourth argument to llvm.objectsize determines if
// the value should be evaluated at runtime."
- if(cast<ConstantInt>(CB.getArgOperand(3))->isOne())
+ if (cast<ConstantInt>(CB.getArgOperand(3))->isOne())
return false;
Value *V = lowerObjectSizeCall(&cast<IntrinsicInst>(CB), DL, nullptr,
@@ -1976,14 +1983,27 @@ bool CallAnalyzer::visitCmpInst(CmpInst &I) {
}
}
+ auto isImplicitNullCheckCmp = [](const CmpInst &I) {
+ for (auto *User : I.users())
+ if (auto *Instr = dyn_cast<Instruction>(User))
+ if (!Instr->getMetadata(LLVMContext::MD_make_implicit))
+ return false;
+ return true;
+ };
+
// If the comparison is an equality comparison with null, we can simplify it
// if we know the value (argument) can't be null
- if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1)) &&
- isKnownNonNullInCallee(I.getOperand(0))) {
- bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
- SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
- : ConstantInt::getFalse(I.getType());
- return true;
+ if (I.isEquality() && isa<ConstantPointerNull>(I.getOperand(1))) {
+ if (isKnownNonNullInCallee(I.getOperand(0))) {
+ bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE;
+ SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType())
+ : ConstantInt::getFalse(I.getType());
+ return true;
+ }
+ // Implicit null checks act as unconditional branches and their comparisons
+ // should be treated as simplified and free of cost.
+ if (isImplicitNullCheckCmp(I))
+ return true;
}
return handleSROA(I.getOperand(0), isa<ConstantPointerNull>(I.getOperand(1)));
}
@@ -2265,6 +2285,7 @@ bool CallAnalyzer::visitBranchInst(BranchInst &BI) {
// inliner more regular and predictable. Interestingly, conditional branches
// which will fold away are also free.
return BI.isUnconditional() || isa<ConstantInt>(BI.getCondition()) ||
+ BI.getMetadata(LLVMContext::MD_make_implicit) ||
isa_and_nonnull<ConstantInt>(
SimplifiedValues.lookup(BI.getCondition()));
}
@@ -2314,10 +2335,10 @@ bool CallAnalyzer::visitSelectInst(SelectInst &SI) {
: nullptr;
if (!SelectedV) {
// Condition is a vector constant that is not all 1s or all 0s. If all
- // operands are constants, ConstantExpr::getSelect() can handle the cases
- // such as select vectors.
+ // operands are constants, ConstantFoldSelectInstruction() can handle the
+ // cases such as select vectors.
if (TrueC && FalseC) {
- if (auto *C = ConstantExpr::getSelect(CondC, TrueC, FalseC)) {
+ if (auto *C = ConstantFoldSelectInstruction(CondC, TrueC, FalseC)) {
SimplifiedValues[&SI] = C;
return true;
}
@@ -2666,9 +2687,7 @@ InlineResult CallAnalyzer::analyze() {
// basic blocks in a breadth-first order as we insert live successors. To
// accomplish this, prioritizing for small iterations because we exit after
// crossing our threshold, we use a small-size optimized SetVector.
- typedef SetVector<BasicBlock *, SmallVector<BasicBlock *, 16>,
- SmallPtrSet<BasicBlock *, 16>>
- BBSetVector;
+ typedef SmallSetVector<BasicBlock *, 16> BBSetVector;
BBSetVector BBWorklist;
BBWorklist.insert(&F.getEntryBlock());
@@ -2787,16 +2806,14 @@ LLVM_DUMP_METHOD void InlineCostCallAnalyzer::dump() { print(dbgs()); }
/// Test that there are no attribute conflicts between Caller and Callee
/// that prevent inlining.
static bool functionsHaveCompatibleAttributes(
- Function *Caller, Function *Callee, TargetTransformInfo &TTI,
+ Function *Caller, Function *Callee,
function_ref<const TargetLibraryInfo &(Function &)> &GetTLI) {
// Note that CalleeTLI must be a copy not a reference. The legacy pass manager
// caches the most recently created TLI in the TargetLibraryInfoWrapperPass
// object, and always returns the same object (which is overwritten on each
// GetTLI call). Therefore we copy the first result.
auto CalleeTLI = GetTLI(*Callee);
- return (IgnoreTTIInlineCompatible ||
- TTI.areInlineCompatible(Caller, Callee)) &&
- GetTLI(*Caller).areInlineCompatible(CalleeTLI,
+ return GetTLI(*Caller).areInlineCompatible(CalleeTLI,
InlineCallerSupersetNoBuiltin) &&
AttributeFuncs::areInlineCompatible(*Caller, *Callee);
}
@@ -2912,6 +2929,12 @@ std::optional<InlineResult> llvm::getAttributeBasedInliningDecision(
" address space");
}
+ // Never inline functions with conflicting target attributes.
+ Function *Caller = Call.getCaller();
+ if (!IgnoreTTIInlineCompatible &&
+ !CalleeTTI.areInlineCompatible(Caller, Callee))
+ return InlineResult::failure("conflicting target attributes");
+
// Calls to functions with always-inline attributes should be inlined
// whenever possible.
if (Call.hasFnAttr(Attribute::AlwaysInline)) {
@@ -2926,8 +2949,12 @@ std::optional<InlineResult> llvm::getAttributeBasedInliningDecision(
// Never inline functions with conflicting attributes (unless callee has
// always-inline attribute).
- Function *Caller = Call.getCaller();
- if (!functionsHaveCompatibleAttributes(Caller, Callee, CalleeTTI, GetTLI))
+ // FIXME: functionsHaveCompatibleAttributes below checks for compatibilities
+ // of different kinds of function attributes -- sanitizer-related ones,
+ // checkDenormMode, no-builtin-memcpy, etc. It's unclear if we really want
+ // the always-inline attribute to take precedence over these different types
+ // of function attributes.
+ if (!functionsHaveCompatibleAttributes(Caller, Callee, GetTLI))
return InlineResult::failure("conflicting attributes");
// Don't inline this call if the caller has the optnone attribute.
diff --git a/llvm/lib/Analysis/InlineOrder.cpp b/llvm/lib/Analysis/InlineOrder.cpp
index 8d0e49936901..3b85820d7b8f 100644
--- a/llvm/lib/Analysis/InlineOrder.cpp
+++ b/llvm/lib/Analysis/InlineOrder.cpp
@@ -33,8 +33,7 @@ static cl::opt<InlinePriorityMode> UseInlinePriority(
"Use inline cost priority."),
clEnumValN(InlinePriorityMode::CostBenefit, "cost-benefit",
"Use cost-benefit ratio."),
- clEnumValN(InlinePriorityMode::ML, "ml",
- "Use ML.")));
+ clEnumValN(InlinePriorityMode::ML, "ml", "Use ML.")));
static cl::opt<int> ModuleInlinerTopPriorityThreshold(
"moudle-inliner-top-priority-threshold", cl::Hidden, cl::init(0),
@@ -281,8 +280,13 @@ private:
} // namespace
+AnalysisKey llvm::PluginInlineOrderAnalysis::Key;
+bool llvm::PluginInlineOrderAnalysis::HasBeenRegistered;
+
std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
-llvm::getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params) {
+llvm::getDefaultInlineOrder(FunctionAnalysisManager &FAM,
+ const InlineParams &Params,
+ ModuleAnalysisManager &MAM, Module &M) {
switch (UseInlinePriority) {
case InlinePriorityMode::Size:
LLVM_DEBUG(dbgs() << " Current used priority: Size priority ---- \n");
@@ -295,11 +299,22 @@ llvm::getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params) {
case InlinePriorityMode::CostBenefit:
LLVM_DEBUG(
dbgs() << " Current used priority: cost-benefit priority ---- \n");
- return std::make_unique<PriorityInlineOrder<CostBenefitPriority>>(FAM, Params);
+ return std::make_unique<PriorityInlineOrder<CostBenefitPriority>>(FAM,
+ Params);
case InlinePriorityMode::ML:
- LLVM_DEBUG(
- dbgs() << " Current used priority: ML priority ---- \n");
+ LLVM_DEBUG(dbgs() << " Current used priority: ML priority ---- \n");
return std::make_unique<PriorityInlineOrder<MLPriority>>(FAM, Params);
}
return nullptr;
}
+
+std::unique_ptr<InlineOrder<std::pair<CallBase *, int>>>
+llvm::getInlineOrder(FunctionAnalysisManager &FAM, const InlineParams &Params,
+ ModuleAnalysisManager &MAM, Module &M) {
+ if (llvm::PluginInlineOrderAnalysis::isRegistered()) {
+ LLVM_DEBUG(dbgs() << " Current used priority: plugin ---- \n");
+ return MAM.getResult<PluginInlineOrderAnalysis>(M).Factory(FAM, Params, MAM,
+ M);
+ }
+ return getDefaultInlineOrder(FAM, Params, MAM, M);
+} \ No newline at end of file
diff --git a/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp b/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
index 78e7f456ebc6..fba5859b74ce 100644
--- a/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
+++ b/llvm/lib/Analysis/InstructionPrecedenceTracking.cpp
@@ -47,9 +47,9 @@ const Instruction *InstructionPrecedenceTracking::getFirstSpecialInstruction(
validate(BB);
#endif
- if (FirstSpecialInsts.find(BB) == FirstSpecialInsts.end()) {
+ if (!FirstSpecialInsts.contains(BB)) {
fill(BB);
- assert(FirstSpecialInsts.find(BB) != FirstSpecialInsts.end() && "Must be!");
+ assert(FirstSpecialInsts.contains(BB) && "Must be!");
}
return FirstSpecialInsts[BB];
}
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index c83eb96bbc69..0bfea6140ab5 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -74,6 +74,10 @@ static Value *simplifyGEPInst(Type *, Value *, ArrayRef<Value *>, bool,
const SimplifyQuery &, unsigned);
static Value *simplifySelectInst(Value *, Value *, Value *,
const SimplifyQuery &, unsigned);
+static Value *simplifyInstructionWithOperands(Instruction *I,
+ ArrayRef<Value *> NewOps,
+ const SimplifyQuery &SQ,
+ unsigned MaxRecurse);
static Value *foldSelectWithBinaryOp(Value *Cond, Value *TrueVal,
Value *FalseVal) {
@@ -214,12 +218,6 @@ static bool valueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
// Arguments and constants dominate all instructions.
return true;
- // If we are processing instructions (and/or basic blocks) that have not been
- // fully added to a function, the parent nodes may still be null. Simply
- // return the conservative answer in these cases.
- if (!I->getParent() || !P->getParent() || !I->getFunction())
- return false;
-
// If we have a DominatorTree then do a precise test.
if (DT)
return DT->dominates(I, P);
@@ -539,12 +537,16 @@ static Value *threadBinOpOverPHI(Instruction::BinaryOps Opcode, Value *LHS,
// Evaluate the BinOp on the incoming phi values.
Value *CommonValue = nullptr;
- for (Value *Incoming : PI->incoming_values()) {
+ for (Use &Incoming : PI->incoming_values()) {
// If the incoming value is the phi node itself, it can safely be skipped.
if (Incoming == PI)
continue;
- Value *V = PI == LHS ? simplifyBinOp(Opcode, Incoming, RHS, Q, MaxRecurse)
- : simplifyBinOp(Opcode, LHS, Incoming, Q, MaxRecurse);
+ Instruction *InTI = PI->getIncomingBlock(Incoming)->getTerminator();
+ Value *V = PI == LHS
+ ? simplifyBinOp(Opcode, Incoming, RHS,
+ Q.getWithInstruction(InTI), MaxRecurse)
+ : simplifyBinOp(Opcode, LHS, Incoming,
+ Q.getWithInstruction(InTI), MaxRecurse);
// If the operation failed to simplify, or simplified to a different value
// to previously, then give up.
if (!V || (CommonValue && V != CommonValue))
@@ -992,6 +994,82 @@ Value *llvm::simplifyMulInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
return ::simplifyMulInst(Op0, Op1, IsNSW, IsNUW, Q, RecursionLimit);
}
+/// Given a predicate and two operands, return true if the comparison is true.
+/// This is a helper for div/rem simplification where we return some other value
+/// when we can prove a relationship between the operands.
+static bool isICmpTrue(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
+ const SimplifyQuery &Q, unsigned MaxRecurse) {
+ Value *V = simplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse);
+ Constant *C = dyn_cast_or_null<Constant>(V);
+ return (C && C->isAllOnesValue());
+}
+
+/// Return true if we can simplify X / Y to 0. Remainder can adapt that answer
+/// to simplify X % Y to X.
+static bool isDivZero(Value *X, Value *Y, const SimplifyQuery &Q,
+ unsigned MaxRecurse, bool IsSigned) {
+ // Recursion is always used, so bail out at once if we already hit the limit.
+ if (!MaxRecurse--)
+ return false;
+
+ if (IsSigned) {
+ // (X srem Y) sdiv Y --> 0
+ if (match(X, m_SRem(m_Value(), m_Specific(Y))))
+ return true;
+
+ // |X| / |Y| --> 0
+ //
+ // We require that 1 operand is a simple constant. That could be extended to
+ // 2 variables if we computed the sign bit for each.
+ //
+ // Make sure that a constant is not the minimum signed value because taking
+ // the abs() of that is undefined.
+ Type *Ty = X->getType();
+ const APInt *C;
+ if (match(X, m_APInt(C)) && !C->isMinSignedValue()) {
+ // Is the variable divisor magnitude always greater than the constant
+ // dividend magnitude?
+ // |Y| > |C| --> Y < -abs(C) or Y > abs(C)
+ Constant *PosDividendC = ConstantInt::get(Ty, C->abs());
+ Constant *NegDividendC = ConstantInt::get(Ty, -C->abs());
+ if (isICmpTrue(CmpInst::ICMP_SLT, Y, NegDividendC, Q, MaxRecurse) ||
+ isICmpTrue(CmpInst::ICMP_SGT, Y, PosDividendC, Q, MaxRecurse))
+ return true;
+ }
+ if (match(Y, m_APInt(C))) {
+ // Special-case: we can't take the abs() of a minimum signed value. If
+ // that's the divisor, then all we have to do is prove that the dividend
+ // is also not the minimum signed value.
+ if (C->isMinSignedValue())
+ return isICmpTrue(CmpInst::ICMP_NE, X, Y, Q, MaxRecurse);
+
+ // Is the variable dividend magnitude always less than the constant
+ // divisor magnitude?
+ // |X| < |C| --> X > -abs(C) and X < abs(C)
+ Constant *PosDivisorC = ConstantInt::get(Ty, C->abs());
+ Constant *NegDivisorC = ConstantInt::get(Ty, -C->abs());
+ if (isICmpTrue(CmpInst::ICMP_SGT, X, NegDivisorC, Q, MaxRecurse) &&
+ isICmpTrue(CmpInst::ICMP_SLT, X, PosDivisorC, Q, MaxRecurse))
+ return true;
+ }
+ return false;
+ }
+
+ // IsSigned == false.
+
+ // Is the unsigned dividend known to be less than a constant divisor?
+ // TODO: Convert this (and above) to range analysis
+ // ("computeConstantRangeIncludingKnownBits")?
+ const APInt *C;
+ if (match(Y, m_APInt(C)) &&
+ computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI, Q.DT).getMaxValue().ult(*C))
+ return true;
+
+ // Try again for any divisor:
+ // Is the dividend unsigned less than the divisor?
+ return isICmpTrue(ICmpInst::ICMP_ULT, X, Y, Q, MaxRecurse);
+}
+
/// Check for common or similar folds of integer division or integer remainder.
/// This applies to all 4 opcodes (sdiv/udiv/srem/urem).
static Value *simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0,
@@ -1046,19 +1124,28 @@ static Value *simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0,
if (Op0 == Op1)
return IsDiv ? ConstantInt::get(Ty, 1) : Constant::getNullValue(Ty);
+
+ KnownBits Known = computeKnownBits(Op1, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+ // X / 0 -> poison
+ // X % 0 -> poison
+ // If the divisor is known to be zero, just return poison. This can happen in
+ // some cases where its provable indirectly the denominator is zero but it's
+ // not trivially simplifiable (i.e known zero through a phi node).
+ if (Known.isZero())
+ return PoisonValue::get(Ty);
+
// X / 1 -> X
// X % 1 -> 0
- // If this is a boolean op (single-bit element type), we can't have
- // division-by-zero or remainder-by-zero, so assume the divisor is 1.
- // Similarly, if we're zero-extending a boolean divisor, then assume it's a 1.
- Value *X;
- if (match(Op1, m_One()) || Ty->isIntOrIntVectorTy(1) ||
- (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
+ // If the divisor can only be zero or one, we can't have division-by-zero
+ // or remainder-by-zero, so assume the divisor is 1.
+ // e.g. 1, zext (i8 X), sdiv X (Y and 1)
+ if (Known.countMinLeadingZeros() == Known.getBitWidth() - 1)
return IsDiv ? Op0 : Constant::getNullValue(Ty);
// If X * Y does not overflow, then:
// X * Y / Y -> X
// X * Y % Y -> 0
+ Value *X;
if (match(Op0, m_c_Mul(m_Value(X), m_Specific(Op1)))) {
auto *Mul = cast<OverflowingBinaryOperator>(Op0);
// The multiplication can't overflow if it is defined not to, or if
@@ -1071,82 +1158,25 @@ static Value *simplifyDivRem(Instruction::BinaryOps Opcode, Value *Op0,
}
}
+ if (isDivZero(Op0, Op1, Q, MaxRecurse, IsSigned))
+ return IsDiv ? Constant::getNullValue(Op0->getType()) : Op0;
+
if (Value *V = simplifyByDomEq(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
- return nullptr;
-}
-
-/// Given a predicate and two operands, return true if the comparison is true.
-/// This is a helper for div/rem simplification where we return some other value
-/// when we can prove a relationship between the operands.
-static bool isICmpTrue(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
- const SimplifyQuery &Q, unsigned MaxRecurse) {
- Value *V = simplifyICmpInst(Pred, LHS, RHS, Q, MaxRecurse);
- Constant *C = dyn_cast_or_null<Constant>(V);
- return (C && C->isAllOnesValue());
-}
-
-/// Return true if we can simplify X / Y to 0. Remainder can adapt that answer
-/// to simplify X % Y to X.
-static bool isDivZero(Value *X, Value *Y, const SimplifyQuery &Q,
- unsigned MaxRecurse, bool IsSigned) {
- // Recursion is always used, so bail out at once if we already hit the limit.
- if (!MaxRecurse--)
- return false;
-
- if (IsSigned) {
- // |X| / |Y| --> 0
- //
- // We require that 1 operand is a simple constant. That could be extended to
- // 2 variables if we computed the sign bit for each.
- //
- // Make sure that a constant is not the minimum signed value because taking
- // the abs() of that is undefined.
- Type *Ty = X->getType();
- const APInt *C;
- if (match(X, m_APInt(C)) && !C->isMinSignedValue()) {
- // Is the variable divisor magnitude always greater than the constant
- // dividend magnitude?
- // |Y| > |C| --> Y < -abs(C) or Y > abs(C)
- Constant *PosDividendC = ConstantInt::get(Ty, C->abs());
- Constant *NegDividendC = ConstantInt::get(Ty, -C->abs());
- if (isICmpTrue(CmpInst::ICMP_SLT, Y, NegDividendC, Q, MaxRecurse) ||
- isICmpTrue(CmpInst::ICMP_SGT, Y, PosDividendC, Q, MaxRecurse))
- return true;
- }
- if (match(Y, m_APInt(C))) {
- // Special-case: we can't take the abs() of a minimum signed value. If
- // that's the divisor, then all we have to do is prove that the dividend
- // is also not the minimum signed value.
- if (C->isMinSignedValue())
- return isICmpTrue(CmpInst::ICMP_NE, X, Y, Q, MaxRecurse);
-
- // Is the variable dividend magnitude always less than the constant
- // divisor magnitude?
- // |X| < |C| --> X > -abs(C) and X < abs(C)
- Constant *PosDivisorC = ConstantInt::get(Ty, C->abs());
- Constant *NegDivisorC = ConstantInt::get(Ty, -C->abs());
- if (isICmpTrue(CmpInst::ICMP_SGT, X, NegDivisorC, Q, MaxRecurse) &&
- isICmpTrue(CmpInst::ICMP_SLT, X, PosDivisorC, Q, MaxRecurse))
- return true;
- }
- return false;
- }
-
- // IsSigned == false.
+ // If the operation is with the result of a select instruction, check whether
+ // operating on either branch of the select always yields the same value.
+ if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
+ if (Value *V = threadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
+ return V;
- // Is the unsigned dividend known to be less than a constant divisor?
- // TODO: Convert this (and above) to range analysis
- // ("computeConstantRangeIncludingKnownBits")?
- const APInt *C;
- if (match(Y, m_APInt(C)) &&
- computeKnownBits(X, Q.DL, 0, Q.AC, Q.CxtI, Q.DT).getMaxValue().ult(*C))
- return true;
+ // If the operation is with the result of a phi instruction, check whether
+ // operating on all incoming values of the phi always yields the same value.
+ if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
+ if (Value *V = threadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
+ return V;
- // Try again for any divisor:
- // Is the dividend unsigned less than the divisor?
- return isICmpTrue(ICmpInst::ICMP_ULT, X, Y, Q, MaxRecurse);
+ return nullptr;
}
/// These are simplifications common to SDiv and UDiv.
@@ -1163,44 +1193,12 @@ static Value *simplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
// at least as many trailing zeros as the divisor to divide evenly. If it has
// less trailing zeros, then the result must be poison.
const APInt *DivC;
- if (IsExact && match(Op1, m_APInt(DivC)) && DivC->countTrailingZeros()) {
+ if (IsExact && match(Op1, m_APInt(DivC)) && DivC->countr_zero()) {
KnownBits KnownOp0 = computeKnownBits(Op0, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
- if (KnownOp0.countMaxTrailingZeros() < DivC->countTrailingZeros())
+ if (KnownOp0.countMaxTrailingZeros() < DivC->countr_zero())
return PoisonValue::get(Op0->getType());
}
- bool IsSigned = Opcode == Instruction::SDiv;
-
- // (X rem Y) / Y -> 0
- if ((IsSigned && match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
- (!IsSigned && match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
- return Constant::getNullValue(Op0->getType());
-
- // (X /u C1) /u C2 -> 0 if C1 * C2 overflow
- ConstantInt *C1, *C2;
- if (!IsSigned && match(Op0, m_UDiv(m_Value(), m_ConstantInt(C1))) &&
- match(Op1, m_ConstantInt(C2))) {
- bool Overflow;
- (void)C1->getValue().umul_ov(C2->getValue(), Overflow);
- if (Overflow)
- return Constant::getNullValue(Op0->getType());
- }
-
- // If the operation is with the result of a select instruction, check whether
- // operating on either branch of the select always yields the same value.
- if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = threadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
- return V;
-
- // If the operation is with the result of a phi instruction, check whether
- // operating on all incoming values of the phi always yields the same value.
- if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = threadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
- return V;
-
- if (isDivZero(Op0, Op1, Q, MaxRecurse, IsSigned))
- return Constant::getNullValue(Op0->getType());
-
return nullptr;
}
@@ -1213,13 +1211,6 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
if (Value *V = simplifyDivRem(Opcode, Op0, Op1, Q, MaxRecurse))
return V;
- // (X % Y) % Y -> X % Y
- if ((Opcode == Instruction::SRem &&
- match(Op0, m_SRem(m_Value(), m_Specific(Op1)))) ||
- (Opcode == Instruction::URem &&
- match(Op0, m_URem(m_Value(), m_Specific(Op1)))))
- return Op0;
-
// (X << Y) % X -> 0
if (Q.IIQ.UseInstrInfo &&
((Opcode == Instruction::SRem &&
@@ -1228,22 +1219,6 @@ static Value *simplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
match(Op0, m_NUWShl(m_Specific(Op1), m_Value())))))
return Constant::getNullValue(Op0->getType());
- // If the operation is with the result of a select instruction, check whether
- // operating on either branch of the select always yields the same value.
- if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1))
- if (Value *V = threadBinOpOverSelect(Opcode, Op0, Op1, Q, MaxRecurse))
- return V;
-
- // If the operation is with the result of a phi instruction, check whether
- // operating on all incoming values of the phi always yields the same value.
- if (isa<PHINode>(Op0) || isa<PHINode>(Op1))
- if (Value *V = threadBinOpOverPHI(Opcode, Op0, Op1, Q, MaxRecurse))
- return V;
-
- // If X / Y == 0, then X % Y == X.
- if (isDivZero(Op0, Op1, Q, MaxRecurse, Opcode == Instruction::SRem))
- return Op0;
-
return nullptr;
}
@@ -1407,8 +1382,8 @@ static Value *simplifyShift(Instruction::BinaryOps Opcode, Value *Op0,
return nullptr;
}
-/// Given operands for an Shl, LShr or AShr, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an LShr or AShr, see if we can fold the result. If not,
+/// this returns null.
static Value *simplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0,
Value *Op1, bool IsExact,
const SimplifyQuery &Q, unsigned MaxRecurse) {
@@ -1445,10 +1420,11 @@ static Value *simplifyShlInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
simplifyShift(Instruction::Shl, Op0, Op1, IsNSW, Q, MaxRecurse))
return V;
+ Type *Ty = Op0->getType();
// undef << X -> 0
// undef << X -> undef if (if it's NSW/NUW)
if (Q.isUndefValue(Op0))
- return IsNSW || IsNUW ? Op0 : Constant::getNullValue(Op0->getType());
+ return IsNSW || IsNUW ? Op0 : Constant::getNullValue(Ty);
// (X >> A) << A -> X
Value *X;
@@ -1462,6 +1438,13 @@ static Value *simplifyShlInst(Value *Op0, Value *Op1, bool IsNSW, bool IsNUW,
// NOTE: could use computeKnownBits() / LazyValueInfo,
// but the cost-benefit analysis suggests it isn't worth it.
+ // "nuw" guarantees that only zeros are shifted out, and "nsw" guarantees
+ // that the sign-bit does not change, so the only input that does not
+ // produce poison is 0, and "0 << (bitwidth-1) --> 0".
+ if (IsNSW && IsNUW &&
+ match(Op1, m_SpecificInt(Ty->getScalarSizeInBits() - 1)))
+ return Constant::getNullValue(Ty);
+
return nullptr;
}
@@ -1960,13 +1943,16 @@ static Value *simplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1,
return nullptr;
}
-static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI, FCmpInst *LHS,
+static Value *simplifyAndOrOfFCmps(const SimplifyQuery &Q, FCmpInst *LHS,
FCmpInst *RHS, bool IsAnd) {
Value *LHS0 = LHS->getOperand(0), *LHS1 = LHS->getOperand(1);
Value *RHS0 = RHS->getOperand(0), *RHS1 = RHS->getOperand(1);
if (LHS0->getType() != RHS0->getType())
return nullptr;
+ const DataLayout &DL = Q.DL;
+ const TargetLibraryInfo *TLI = Q.TLI;
+
FCmpInst::Predicate PredL = LHS->getPredicate(), PredR = RHS->getPredicate();
if ((PredL == FCmpInst::FCMP_ORD && PredR == FCmpInst::FCMP_ORD && IsAnd) ||
(PredL == FCmpInst::FCMP_UNO && PredR == FCmpInst::FCMP_UNO && !IsAnd)) {
@@ -1978,8 +1964,10 @@ static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI, FCmpInst *LHS,
// (fcmp uno NNAN, X) | (fcmp uno Y, X) --> fcmp uno Y, X
// (fcmp uno X, NNAN) | (fcmp uno X, Y) --> fcmp uno X, Y
// (fcmp uno X, NNAN) | (fcmp uno Y, X) --> fcmp uno Y, X
- if ((isKnownNeverNaN(LHS0, TLI) && (LHS1 == RHS0 || LHS1 == RHS1)) ||
- (isKnownNeverNaN(LHS1, TLI) && (LHS0 == RHS0 || LHS0 == RHS1)))
+ if (((LHS1 == RHS0 || LHS1 == RHS1) &&
+ isKnownNeverNaN(LHS0, DL, TLI, 0, Q.AC, Q.CxtI, Q.DT)) ||
+ ((LHS0 == RHS0 || LHS0 == RHS1) &&
+ isKnownNeverNaN(LHS1, DL, TLI, 0, Q.AC, Q.CxtI, Q.DT)))
return RHS;
// (fcmp ord X, Y) & (fcmp ord NNAN, X) --> fcmp ord X, Y
@@ -1990,8 +1978,10 @@ static Value *simplifyAndOrOfFCmps(const TargetLibraryInfo *TLI, FCmpInst *LHS,
// (fcmp uno Y, X) | (fcmp uno NNAN, X) --> fcmp uno Y, X
// (fcmp uno X, Y) | (fcmp uno X, NNAN) --> fcmp uno X, Y
// (fcmp uno Y, X) | (fcmp uno X, NNAN) --> fcmp uno Y, X
- if ((isKnownNeverNaN(RHS0, TLI) && (RHS1 == LHS0 || RHS1 == LHS1)) ||
- (isKnownNeverNaN(RHS1, TLI) && (RHS0 == LHS0 || RHS0 == LHS1)))
+ if (((RHS1 == LHS0 || RHS1 == LHS1) &&
+ isKnownNeverNaN(RHS0, DL, TLI, 0, Q.AC, Q.CxtI, Q.DT)) ||
+ ((RHS0 == LHS0 || RHS0 == LHS1) &&
+ isKnownNeverNaN(RHS1, DL, TLI, 0, Q.AC, Q.CxtI, Q.DT)))
return LHS;
}
@@ -2019,7 +2009,7 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, Value *Op0,
auto *FCmp0 = dyn_cast<FCmpInst>(Op0);
auto *FCmp1 = dyn_cast<FCmpInst>(Op1);
if (FCmp0 && FCmp1)
- V = simplifyAndOrOfFCmps(Q.TLI, FCmp0, FCmp1, IsAnd);
+ V = simplifyAndOrOfFCmps(Q, FCmp0, FCmp1, IsAnd);
if (!V)
return nullptr;
@@ -2642,7 +2632,7 @@ static bool isAllocDisjoint(const Value *V) {
// that might be resolve lazily to symbols in another dynamically-loaded
// library (and, thus, could be malloc'ed by the implementation).
if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
- return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
+ return AI->isStaticAlloca();
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
return (GV->hasLocalLinkage() || GV->hasHiddenVisibility() ||
GV->hasProtectedVisibility() || GV->hasGlobalUnnamedAddr()) &&
@@ -2727,16 +2717,13 @@ static bool haveNonOverlappingStorage(const Value *V1, const Value *V2) {
// this optimization.
static Constant *computePointerICmp(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const SimplifyQuery &Q) {
+ assert(LHS->getType() == RHS->getType() && "Must have same types");
const DataLayout &DL = Q.DL;
const TargetLibraryInfo *TLI = Q.TLI;
const DominatorTree *DT = Q.DT;
const Instruction *CxtI = Q.CxtI;
const InstrInfoQuery &IIQ = Q.IIQ;
- // First, skip past any trivial no-ops.
- LHS = LHS->stripPointerCasts();
- RHS = RHS->stripPointerCasts();
-
// A non-null pointer is not equal to a null pointer.
if (isa<ConstantPointerNull>(RHS) && ICmpInst::isEquality(Pred) &&
llvm::isKnownNonZero(LHS, DL, 0, nullptr, nullptr, nullptr,
@@ -2775,8 +2762,10 @@ static Constant *computePointerICmp(CmpInst::Predicate Pred, Value *LHS,
// Even if an non-inbounds GEP occurs along the path we can still optimize
// equality comparisons concerning the result.
bool AllowNonInbounds = ICmpInst::isEquality(Pred);
- APInt LHSOffset = stripAndComputeConstantOffsets(DL, LHS, AllowNonInbounds);
- APInt RHSOffset = stripAndComputeConstantOffsets(DL, RHS, AllowNonInbounds);
+ unsigned IndexSize = DL.getIndexTypeSizeInBits(LHS->getType());
+ APInt LHSOffset(IndexSize, 0), RHSOffset(IndexSize, 0);
+ LHS = LHS->stripAndAccumulateConstantOffsets(DL, LHSOffset, AllowNonInbounds);
+ RHS = RHS->stripAndAccumulateConstantOffsets(DL, RHSOffset, AllowNonInbounds);
// If LHS and RHS are related via constant offsets to the same base
// value, we can replace it with an icmp which just compares the offsets.
@@ -2804,11 +2793,11 @@ static Constant *computePointerICmp(CmpInst::Predicate Pred, Value *LHS,
}(LHS);
Opts.NullIsUnknownSize = F ? NullPointerIsDefined(F) : true;
if (getObjectSize(LHS, LHSSize, DL, TLI, Opts) &&
- getObjectSize(RHS, RHSSize, DL, TLI, Opts) &&
- !LHSOffset.isNegative() && !RHSOffset.isNegative() &&
- LHSOffset.ult(LHSSize) && RHSOffset.ult(RHSSize)) {
- return ConstantInt::get(getCompareTy(LHS),
- !CmpInst::isTrueWhenEqual(Pred));
+ getObjectSize(RHS, RHSSize, DL, TLI, Opts)) {
+ APInt Dist = LHSOffset - RHSOffset;
+ if (Dist.isNonNegative() ? Dist.ult(LHSSize) : (-Dist).ult(RHSSize))
+ return ConstantInt::get(getCompareTy(LHS),
+ !CmpInst::isTrueWhenEqual(Pred));
}
}
@@ -2850,11 +2839,35 @@ static Constant *computePointerICmp(CmpInst::Predicate Pred, Value *LHS,
else if (isAllocLikeFn(RHS, TLI) &&
llvm::isKnownNonZero(LHS, DL, 0, nullptr, CxtI, DT))
MI = RHS;
- // FIXME: We should also fold the compare when the pointer escapes, but the
- // compare dominates the pointer escape
- if (MI && !PointerMayBeCaptured(MI, true, true))
- return ConstantInt::get(getCompareTy(LHS),
- CmpInst::isFalseWhenEqual(Pred));
+ if (MI) {
+ // FIXME: This is incorrect, see PR54002. While we can assume that the
+ // allocation is at an address that makes the comparison false, this
+ // requires that *all* comparisons to that address be false, which
+ // InstSimplify cannot guarantee.
+ struct CustomCaptureTracker : public CaptureTracker {
+ bool Captured = false;
+ void tooManyUses() override { Captured = true; }
+ bool captured(const Use *U) override {
+ if (auto *ICmp = dyn_cast<ICmpInst>(U->getUser())) {
+ // Comparison against value stored in global variable. Given the
+ // pointer does not escape, its value cannot be guessed and stored
+ // separately in a global variable.
+ unsigned OtherIdx = 1 - U->getOperandNo();
+ auto *LI = dyn_cast<LoadInst>(ICmp->getOperand(OtherIdx));
+ if (LI && isa<GlobalVariable>(LI->getPointerOperand()))
+ return false;
+ }
+
+ Captured = true;
+ return true;
+ }
+ };
+ CustomCaptureTracker Tracker;
+ PointerMayBeCaptured(MI, &Tracker);
+ if (!Tracker.Captured)
+ return ConstantInt::get(getCompareTy(LHS),
+ CmpInst::isFalseWhenEqual(Pred));
+ }
}
// Otherwise, fail.
@@ -3394,8 +3407,26 @@ static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
return ConstantInt::getTrue(getCompareTy(RHS));
}
- if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
- LBO->getOperand(1) == RBO->getOperand(1)) {
+ if (!MaxRecurse || !LBO || !RBO || LBO->getOpcode() != RBO->getOpcode())
+ return nullptr;
+
+ if (LBO->getOperand(0) == RBO->getOperand(0)) {
+ switch (LBO->getOpcode()) {
+ default:
+ break;
+ case Instruction::Shl:
+ bool NUW = Q.IIQ.hasNoUnsignedWrap(LBO) && Q.IIQ.hasNoUnsignedWrap(RBO);
+ bool NSW = Q.IIQ.hasNoSignedWrap(LBO) && Q.IIQ.hasNoSignedWrap(RBO);
+ if (!NUW || (ICmpInst::isSigned(Pred) && !NSW) ||
+ !isKnownNonZero(LBO->getOperand(0), Q.DL))
+ break;
+ if (Value *V = simplifyICmpInst(Pred, LBO->getOperand(1),
+ RBO->getOperand(1), Q, MaxRecurse - 1))
+ return V;
+ }
+ }
+
+ if (LBO->getOperand(1) == RBO->getOperand(1)) {
switch (LBO->getOpcode()) {
default:
break;
@@ -3631,7 +3662,7 @@ static Value *simplifyICmpWithDominatingAssume(CmpInst::Predicate Predicate,
Value *LHS, Value *RHS,
const SimplifyQuery &Q) {
// Gracefully handle instructions that have not been inserted yet.
- if (!Q.AC || !Q.CxtI || !Q.CxtI->getParent())
+ if (!Q.AC || !Q.CxtI)
return nullptr;
for (Value *AssumeBaseOp : {LHS, RHS}) {
@@ -3650,6 +3681,36 @@ static Value *simplifyICmpWithDominatingAssume(CmpInst::Predicate Predicate,
return nullptr;
}
+static Value *simplifyICmpWithIntrinsicOnLHS(CmpInst::Predicate Pred,
+ Value *LHS, Value *RHS) {
+ auto *II = dyn_cast<IntrinsicInst>(LHS);
+ if (!II)
+ return nullptr;
+
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::uadd_sat:
+ // uadd.sat(X, Y) uge X, uadd.sat(X, Y) uge Y
+ if (II->getArgOperand(0) == RHS || II->getArgOperand(1) == RHS) {
+ if (Pred == ICmpInst::ICMP_UGE)
+ return ConstantInt::getTrue(getCompareTy(II));
+ if (Pred == ICmpInst::ICMP_ULT)
+ return ConstantInt::getFalse(getCompareTy(II));
+ }
+ return nullptr;
+ case Intrinsic::usub_sat:
+ // usub.sat(X, Y) ule X
+ if (II->getArgOperand(0) == RHS) {
+ if (Pred == ICmpInst::ICMP_ULE)
+ return ConstantInt::getTrue(getCompareTy(II));
+ if (Pred == ICmpInst::ICMP_UGT)
+ return ConstantInt::getFalse(getCompareTy(II));
+ }
+ return nullptr;
+ default:
+ return nullptr;
+ }
+}
+
/// Given operands for an ICmpInst, see if we can fold the result.
/// If not, this returns null.
static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
@@ -3764,22 +3825,27 @@ static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
// Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
// too. If not, then try to deduce the result of the comparison.
- else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ else if (match(RHS, m_ImmConstant())) {
+ Constant *C = dyn_cast<Constant>(RHS);
+ assert(C != nullptr);
+
// Compute the constant that would happen if we truncated to SrcTy then
// reextended to DstTy.
- Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *Trunc = ConstantExpr::getTrunc(C, SrcTy);
Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
+ Constant *AnyEq = ConstantExpr::getICmp(ICmpInst::ICMP_EQ, RExt, C);
- // If the re-extended constant didn't change then this is effectively
- // also a case of comparing two zero-extended values.
- if (RExt == CI && MaxRecurse)
+ // If the re-extended constant didn't change any of the elements then
+ // this is effectively also a case of comparing two zero-extended
+ // values.
+ if (AnyEq->isAllOnesValue() && MaxRecurse)
if (Value *V = simplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
SrcOp, Trunc, Q, MaxRecurse - 1))
return V;
// Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
// there. Use this to work out the result of the comparison.
- if (RExt != CI) {
+ if (AnyEq->isNullValue()) {
switch (Pred) {
default:
llvm_unreachable("Unknown ICmp predicate!");
@@ -3787,26 +3853,23 @@ static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
case ICmpInst::ICMP_EQ:
case ICmpInst::ICMP_UGT:
case ICmpInst::ICMP_UGE:
- return ConstantInt::getFalse(CI->getContext());
+ return Constant::getNullValue(ITy);
case ICmpInst::ICMP_NE:
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
- return ConstantInt::getTrue(CI->getContext());
+ return Constant::getAllOnesValue(ITy);
// LHS is non-negative. If RHS is negative then LHS >s LHS. If RHS
// is non-negative then LHS <s RHS.
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- return CI->getValue().isNegative()
- ? ConstantInt::getTrue(CI->getContext())
- : ConstantInt::getFalse(CI->getContext());
-
+ return ConstantExpr::getICmp(ICmpInst::ICMP_SLT, C,
+ Constant::getNullValue(C->getType()));
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- return CI->getValue().isNegative()
- ? ConstantInt::getFalse(CI->getContext())
- : ConstantInt::getTrue(CI->getContext());
+ return ConstantExpr::getICmp(ICmpInst::ICMP_SGE, C,
+ Constant::getNullValue(C->getType()));
}
}
}
@@ -3833,42 +3896,44 @@ static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
// Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
// too. If not, then try to deduce the result of the comparison.
- else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ else if (match(RHS, m_ImmConstant())) {
+ Constant *C = dyn_cast<Constant>(RHS);
+ assert(C != nullptr);
+
// Compute the constant that would happen if we truncated to SrcTy then
// reextended to DstTy.
- Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+ Constant *Trunc = ConstantExpr::getTrunc(C, SrcTy);
Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
+ Constant *AnyEq = ConstantExpr::getICmp(ICmpInst::ICMP_EQ, RExt, C);
// If the re-extended constant didn't change then this is effectively
// also a case of comparing two sign-extended values.
- if (RExt == CI && MaxRecurse)
+ if (AnyEq->isAllOnesValue() && MaxRecurse)
if (Value *V =
simplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse - 1))
return V;
// Otherwise the upper bits of LHS are all equal, while RHS has varying
// bits there. Use this to work out the result of the comparison.
- if (RExt != CI) {
+ if (AnyEq->isNullValue()) {
switch (Pred) {
default:
llvm_unreachable("Unknown ICmp predicate!");
case ICmpInst::ICMP_EQ:
- return ConstantInt::getFalse(CI->getContext());
+ return Constant::getNullValue(ITy);
case ICmpInst::ICMP_NE:
- return ConstantInt::getTrue(CI->getContext());
+ return Constant::getAllOnesValue(ITy);
// If RHS is non-negative then LHS <s RHS. If RHS is negative then
// LHS >s RHS.
case ICmpInst::ICMP_SGT:
case ICmpInst::ICMP_SGE:
- return CI->getValue().isNegative()
- ? ConstantInt::getTrue(CI->getContext())
- : ConstantInt::getFalse(CI->getContext());
+ return ConstantExpr::getICmp(ICmpInst::ICMP_SLT, C,
+ Constant::getNullValue(C->getType()));
case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_SLE:
- return CI->getValue().isNegative()
- ? ConstantInt::getFalse(CI->getContext())
- : ConstantInt::getTrue(CI->getContext());
+ return ConstantExpr::getICmp(ICmpInst::ICMP_SGE, C,
+ Constant::getNullValue(C->getType()));
// If LHS is non-negative then LHS <u RHS. If LHS is negative then
// LHS >u RHS.
@@ -3910,9 +3975,19 @@ static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse))
return V;
+ if (Value *V = simplifyICmpWithIntrinsicOnLHS(Pred, LHS, RHS))
+ return V;
+ if (Value *V = simplifyICmpWithIntrinsicOnLHS(
+ ICmpInst::getSwappedPredicate(Pred), RHS, LHS))
+ return V;
+
if (Value *V = simplifyICmpWithDominatingAssume(Pred, LHS, RHS, Q))
return V;
+ if (std::optional<bool> Res =
+ isImpliedByDomCondition(Pred, LHS, RHS, Q.CxtI, Q.DL))
+ return ConstantInt::getBool(ITy, *Res);
+
// Simplify comparisons of related pointers using a powerful, recursive
// GEP-walk when we have target data available..
if (LHS->getType()->isPointerTy())
@@ -3920,10 +3995,9 @@ static Value *simplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
return C;
if (auto *CLHS = dyn_cast<PtrToIntOperator>(LHS))
if (auto *CRHS = dyn_cast<PtrToIntOperator>(RHS))
- if (Q.DL.getTypeSizeInBits(CLHS->getPointerOperandType()) ==
- Q.DL.getTypeSizeInBits(CLHS->getType()) &&
- Q.DL.getTypeSizeInBits(CRHS->getPointerOperandType()) ==
- Q.DL.getTypeSizeInBits(CRHS->getType()))
+ if (CLHS->getPointerOperandType() == CRHS->getPointerOperandType() &&
+ Q.DL.getTypeSizeInBits(CLHS->getPointerOperandType()) ==
+ Q.DL.getTypeSizeInBits(CLHS->getType()))
if (auto *C = computePointerICmp(Pred, CLHS->getPointerOperand(),
CRHS->getPointerOperand(), Q))
return C;
@@ -3976,7 +4050,8 @@ static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// Fold (un)ordered comparison if we can determine there are no NaNs.
if (Pred == FCmpInst::FCMP_UNO || Pred == FCmpInst::FCMP_ORD)
if (FMF.noNaNs() ||
- (isKnownNeverNaN(LHS, Q.TLI) && isKnownNeverNaN(RHS, Q.TLI)))
+ (isKnownNeverNaN(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT) &&
+ isKnownNeverNaN(RHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT)))
return ConstantInt::get(RetTy, Pred == FCmpInst::FCMP_ORD);
// NaN is unordered; NaN is not ordered.
@@ -4038,18 +4113,20 @@ static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
}
// LHS == Inf
- if (Pred == FCmpInst::FCMP_OEQ && isKnownNeverInfinity(LHS, Q.TLI))
+ if (Pred == FCmpInst::FCMP_OEQ &&
+ isKnownNeverInfinity(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT))
return getFalse(RetTy);
// LHS != Inf
- if (Pred == FCmpInst::FCMP_UNE && isKnownNeverInfinity(LHS, Q.TLI))
+ if (Pred == FCmpInst::FCMP_UNE &&
+ isKnownNeverInfinity(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT))
return getTrue(RetTy);
// LHS == Inf || LHS == NaN
- if (Pred == FCmpInst::FCMP_UEQ && isKnownNeverInfinity(LHS, Q.TLI) &&
- isKnownNeverNaN(LHS, Q.TLI))
+ if (Pred == FCmpInst::FCMP_UEQ &&
+ isKnownNeverInfOrNaN(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT))
return getFalse(RetTy);
// LHS != Inf && LHS != NaN
- if (Pred == FCmpInst::FCMP_ONE && isKnownNeverInfinity(LHS, Q.TLI) &&
- isKnownNeverNaN(LHS, Q.TLI))
+ if (Pred == FCmpInst::FCMP_ONE &&
+ isKnownNeverInfOrNaN(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT))
return getTrue(RetTy);
}
if (C->isNegative() && !C->isNegZero()) {
@@ -4061,14 +4138,16 @@ static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
case FCmpInst::FCMP_UGT:
case FCmpInst::FCMP_UNE:
// (X >= 0) implies (X > C) when (C < 0)
- if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ if (cannotBeOrderedLessThanZero(LHS, Q.DL, Q.TLI, 0,
+ Q.AC, Q.CxtI, Q.DT))
return getTrue(RetTy);
break;
case FCmpInst::FCMP_OEQ:
case FCmpInst::FCMP_OLE:
case FCmpInst::FCMP_OLT:
// (X >= 0) implies !(X < C) when (C < 0)
- if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ if (cannotBeOrderedLessThanZero(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI,
+ Q.DT))
return getFalse(RetTy);
break;
default:
@@ -4125,18 +4204,23 @@ static Value *simplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
if (match(RHS, m_AnyZeroFP())) {
switch (Pred) {
case FCmpInst::FCMP_OGE:
- case FCmpInst::FCMP_ULT:
+ case FCmpInst::FCMP_ULT: {
+ FPClassTest Interested = FMF.noNaNs() ? fcNegative : fcNegative | fcNan;
+ KnownFPClass Known = computeKnownFPClass(LHS, Q.DL, Interested, 0,
+ Q.TLI, Q.AC, Q.CxtI, Q.DT);
+
// Positive or zero X >= 0.0 --> true
// Positive or zero X < 0.0 --> false
- if ((FMF.noNaNs() || isKnownNeverNaN(LHS, Q.TLI)) &&
- CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ if ((FMF.noNaNs() || Known.isKnownNeverNaN()) &&
+ Known.cannotBeOrderedLessThanZero())
return Pred == FCmpInst::FCMP_OGE ? getTrue(RetTy) : getFalse(RetTy);
break;
+ }
case FCmpInst::FCMP_UGE:
case FCmpInst::FCMP_OLT:
// Positive or zero or nan X >= 0.0 --> true
// Positive or zero or nan X < 0.0 --> false
- if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
+ if (cannotBeOrderedLessThanZero(LHS, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT))
return Pred == FCmpInst::FCMP_UGE ? getTrue(RetTy) : getFalse(RetTy);
break;
default:
@@ -4172,26 +4256,45 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
if (V == Op)
return RepOp;
+ if (!MaxRecurse--)
+ return nullptr;
+
// We cannot replace a constant, and shouldn't even try.
if (isa<Constant>(Op))
return nullptr;
auto *I = dyn_cast<Instruction>(V);
- if (!I || !is_contained(I->operands(), Op))
+ if (!I)
+ return nullptr;
+
+ // The arguments of a phi node might refer to a value from a previous
+ // cycle iteration.
+ if (isa<PHINode>(I))
return nullptr;
if (Op->getType()->isVectorTy()) {
// For vector types, the simplification must hold per-lane, so forbid
// potentially cross-lane operations like shufflevector.
- assert(I->getType()->isVectorTy() && "Vector type mismatch");
- if (isa<ShuffleVectorInst>(I) || isa<CallBase>(I))
+ if (!I->getType()->isVectorTy() || isa<ShuffleVectorInst>(I) ||
+ isa<CallBase>(I))
return nullptr;
}
// Replace Op with RepOp in instruction operands.
- SmallVector<Value *, 8> NewOps(I->getNumOperands());
- transform(I->operands(), NewOps.begin(),
- [&](Value *V) { return V == Op ? RepOp : V; });
+ SmallVector<Value *, 8> NewOps;
+ bool AnyReplaced = false;
+ for (Value *InstOp : I->operands()) {
+ if (Value *NewInstOp = simplifyWithOpReplaced(
+ InstOp, Op, RepOp, Q, AllowRefinement, MaxRecurse)) {
+ NewOps.push_back(NewInstOp);
+ AnyReplaced = InstOp != NewInstOp;
+ } else {
+ NewOps.push_back(InstOp);
+ }
+ }
+
+ if (!AnyReplaced)
+ return nullptr;
if (!AllowRefinement) {
// General InstSimplify functions may refine the result, e.g. by returning
@@ -4211,15 +4314,35 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
if ((Opcode == Instruction::And || Opcode == Instruction::Or) &&
NewOps[0] == NewOps[1])
return NewOps[0];
+
+ // x - x -> 0, x ^ x -> 0. This is non-refining, because x is non-poison
+ // by assumption and this case never wraps, so nowrap flags can be
+ // ignored.
+ if ((Opcode == Instruction::Sub || Opcode == Instruction::Xor) &&
+ NewOps[0] == RepOp && NewOps[1] == RepOp)
+ return Constant::getNullValue(I->getType());
+
+ // If we are substituting an absorber constant into a binop and extra
+ // poison can't leak if we remove the select -- because both operands of
+ // the binop are based on the same value -- then it may be safe to replace
+ // the value with the absorber constant. Examples:
+ // (Op == 0) ? 0 : (Op & -Op) --> Op & -Op
+ // (Op == 0) ? 0 : (Op * (binop Op, C)) --> Op * (binop Op, C)
+ // (Op == -1) ? -1 : (Op | (binop C, Op) --> Op | (binop C, Op)
+ Constant *Absorber =
+ ConstantExpr::getBinOpAbsorber(Opcode, I->getType());
+ if ((NewOps[0] == Absorber || NewOps[1] == Absorber) &&
+ impliesPoison(BO, Op))
+ return Absorber;
}
- if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
- // getelementptr x, 0 -> x
- if (NewOps.size() == 2 && match(NewOps[1], m_Zero()) &&
- !GEP->isInBounds())
+ if (isa<GetElementPtrInst>(I)) {
+ // getelementptr x, 0 -> x.
+ // This never returns poison, even if inbounds is set.
+ if (NewOps.size() == 2 && match(NewOps[1], m_Zero()))
return NewOps[0];
}
- } else if (MaxRecurse) {
+ } else {
// The simplification queries below may return the original value. Consider:
// %div = udiv i32 %arg, %arg2
// %mul = mul nsw i32 %div, %arg2
@@ -4233,23 +4356,8 @@ static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
return Simplified != V ? Simplified : nullptr;
};
- if (auto *B = dyn_cast<BinaryOperator>(I))
- return PreventSelfSimplify(simplifyBinOp(B->getOpcode(), NewOps[0],
- NewOps[1], Q, MaxRecurse - 1));
-
- if (CmpInst *C = dyn_cast<CmpInst>(I))
- return PreventSelfSimplify(simplifyCmpInst(C->getPredicate(), NewOps[0],
- NewOps[1], Q, MaxRecurse - 1));
-
- if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
- return PreventSelfSimplify(simplifyGEPInst(
- GEP->getSourceElementType(), NewOps[0], ArrayRef(NewOps).slice(1),
- GEP->isInBounds(), Q, MaxRecurse - 1));
-
- if (isa<SelectInst>(I))
- return PreventSelfSimplify(simplifySelectInst(
- NewOps[0], NewOps[1], NewOps[2], Q, MaxRecurse - 1));
- // TODO: We could hand off more cases to instsimplify here.
+ return PreventSelfSimplify(
+ ::simplifyInstructionWithOperands(I, NewOps, Q, MaxRecurse));
}
// If all operands are constant after substituting Op for RepOp then we can
@@ -4406,6 +4514,24 @@ static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *CmpRHS,
}
/// Try to simplify a select instruction when its condition operand is an
+/// integer equality comparison.
+static Value *simplifySelectWithICmpEq(Value *CmpLHS, Value *CmpRHS,
+ Value *TrueVal, Value *FalseVal,
+ const SimplifyQuery &Q,
+ unsigned MaxRecurse) {
+ if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
+ /* AllowRefinement */ false,
+ MaxRecurse) == TrueVal)
+ return FalseVal;
+ if (simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q,
+ /* AllowRefinement */ true,
+ MaxRecurse) == FalseVal)
+ return FalseVal;
+
+ return nullptr;
+}
+
+/// Try to simplify a select instruction when its condition operand is an
/// integer comparison.
static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
Value *FalseVal,
@@ -4493,20 +4619,38 @@ static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
// the arms of the select. See if substituting this value into the arm and
// simplifying the result yields the same value as the other arm.
if (Pred == ICmpInst::ICMP_EQ) {
- if (simplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q,
- /* AllowRefinement */ false,
- MaxRecurse) == TrueVal ||
- simplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q,
- /* AllowRefinement */ false,
- MaxRecurse) == TrueVal)
- return FalseVal;
- if (simplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q,
- /* AllowRefinement */ true,
- MaxRecurse) == FalseVal ||
- simplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q,
- /* AllowRefinement */ true,
- MaxRecurse) == FalseVal)
- return FalseVal;
+ if (Value *V = simplifySelectWithICmpEq(CmpLHS, CmpRHS, TrueVal, FalseVal,
+ Q, MaxRecurse))
+ return V;
+ if (Value *V = simplifySelectWithICmpEq(CmpRHS, CmpLHS, TrueVal, FalseVal,
+ Q, MaxRecurse))
+ return V;
+
+ Value *X;
+ Value *Y;
+ // select((X | Y) == 0 ? X : 0) --> 0 (commuted 2 ways)
+ if (match(CmpLHS, m_Or(m_Value(X), m_Value(Y))) &&
+ match(CmpRHS, m_Zero())) {
+ // (X | Y) == 0 implies X == 0 and Y == 0.
+ if (Value *V = simplifySelectWithICmpEq(X, CmpRHS, TrueVal, FalseVal, Q,
+ MaxRecurse))
+ return V;
+ if (Value *V = simplifySelectWithICmpEq(Y, CmpRHS, TrueVal, FalseVal, Q,
+ MaxRecurse))
+ return V;
+ }
+
+ // select((X & Y) == -1 ? X : -1) --> -1 (commuted 2 ways)
+ if (match(CmpLHS, m_And(m_Value(X), m_Value(Y))) &&
+ match(CmpRHS, m_AllOnes())) {
+ // (X & Y) == -1 implies X == -1 and Y == -1.
+ if (Value *V = simplifySelectWithICmpEq(X, CmpRHS, TrueVal, FalseVal, Q,
+ MaxRecurse))
+ return V;
+ if (Value *V = simplifySelectWithICmpEq(Y, CmpRHS, TrueVal, FalseVal, Q,
+ MaxRecurse))
+ return V;
+ }
}
return nullptr;
@@ -4550,7 +4694,8 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
if (auto *CondC = dyn_cast<Constant>(Cond)) {
if (auto *TrueC = dyn_cast<Constant>(TrueVal))
if (auto *FalseC = dyn_cast<Constant>(FalseVal))
- return ConstantFoldSelectInstruction(CondC, TrueC, FalseC);
+ if (Constant *C = ConstantFoldSelectInstruction(CondC, TrueC, FalseC))
+ return C;
// select poison, X, Y -> poison
if (isa<PoisonValue>(CondC))
@@ -4598,6 +4743,9 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
// !(X || Y) && X --> false (commuted 2 ways)
if (match(Cond, m_Not(m_c_LogicalOr(m_Specific(TrueVal), m_Value()))))
return ConstantInt::getFalse(Cond->getType());
+ // X && !(X || Y) --> false (commuted 2 ways)
+ if (match(TrueVal, m_Not(m_c_LogicalOr(m_Specific(Cond), m_Value()))))
+ return ConstantInt::getFalse(Cond->getType());
// (X || Y) && Y --> Y (commuted 2 ways)
if (match(Cond, m_c_LogicalOr(m_Specific(TrueVal), m_Value())))
@@ -4618,6 +4766,13 @@ static Value *simplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
// Match patterns that end in logical-or.
if (match(TrueVal, m_One())) {
+ // !(X && Y) || X --> true (commuted 2 ways)
+ if (match(Cond, m_Not(m_c_LogicalAnd(m_Specific(FalseVal), m_Value()))))
+ return ConstantInt::getTrue(Cond->getType());
+ // X || !(X && Y) --> true (commuted 2 ways)
+ if (match(FalseVal, m_Not(m_c_LogicalAnd(m_Specific(Cond), m_Value()))))
+ return ConstantInt::getTrue(Cond->getType());
+
// (X && Y) || Y --> Y (commuted 2 ways)
if (match(Cond, m_c_LogicalAnd(m_Specific(FalseVal), m_Value())))
return FalseVal;
@@ -4747,10 +4902,8 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
}
}
- // For opaque pointers an all-zero GEP is a no-op. For typed pointers,
- // it may be equivalent to a bitcast.
- if (Ptr->getType()->getScalarType()->isOpaquePointerTy() &&
- Ptr->getType() == GEPTy &&
+ // All-zero GEP is a no-op, unless it performs a vector splat.
+ if (Ptr->getType() == GEPTy &&
all_of(Indices, [](const auto *V) { return match(V, m_Zero()); }))
return Ptr;
@@ -4760,9 +4913,9 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
any_of(Indices, [](const auto *V) { return isa<PoisonValue>(V); }))
return PoisonValue::get(GEPTy);
+ // getelementptr undef, idx -> undef
if (Q.isUndefValue(Ptr))
- // If inbounds, we can choose an out-of-bounds pointer as a base pointer.
- return InBounds ? PoisonValue::get(GEPTy) : UndefValue::get(GEPTy);
+ return UndefValue::get(GEPTy);
bool IsScalableVec =
isa<ScalableVectorType>(SrcTy) || any_of(Indices, [](const Value *V) {
@@ -4853,6 +5006,10 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
!all_of(Indices, [](Value *V) { return isa<Constant>(V); }))
return nullptr;
+ if (!ConstantExpr::isSupportedGetElementPtr(SrcTy))
+ return ConstantFoldGetElementPtr(SrcTy, cast<Constant>(Ptr), InBounds,
+ std::nullopt, Indices);
+
auto *CE = ConstantExpr::getGetElementPtr(SrcTy, cast<Constant>(Ptr), Indices,
InBounds);
return ConstantFoldConstant(CE, Q.DL);
@@ -4882,8 +5039,11 @@ static Value *simplifyInsertValueInst(Value *Agg, Value *Val,
if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Val))
if (EV->getAggregateOperand()->getType() == Agg->getType() &&
EV->getIndices() == Idxs) {
- // insertvalue undef, (extractvalue y, n), n -> y
- if (Q.isUndefValue(Agg))
+ // insertvalue poison, (extractvalue y, n), n -> y
+ // insertvalue undef, (extractvalue y, n), n -> y if y cannot be poison
+ if (isa<PoisonValue>(Agg) ||
+ (Q.isUndefValue(Agg) &&
+ isGuaranteedNotToBePoison(EV->getAggregateOperand())))
return EV->getAggregateOperand();
// insertvalue y, (extractvalue y, n), n -> y
@@ -5151,8 +5311,8 @@ static Value *simplifyShuffleVectorInst(Value *Op0, Value *Op1,
ArrayRef<int> Mask, Type *RetTy,
const SimplifyQuery &Q,
unsigned MaxRecurse) {
- if (all_of(Mask, [](int Elem) { return Elem == UndefMaskElem; }))
- return UndefValue::get(RetTy);
+ if (all_of(Mask, [](int Elem) { return Elem == PoisonMaskElem; }))
+ return PoisonValue::get(RetTy);
auto *InVecTy = cast<VectorType>(Op0->getType());
unsigned MaskNumElts = Mask.size();
@@ -5217,11 +5377,11 @@ static Value *simplifyShuffleVectorInst(Value *Op0, Value *Op1,
})) {
assert(isa<UndefValue>(Op1) && "Expected undef operand 1 for splat");
- // Shuffle mask undefs become undefined constant result elements.
+ // Shuffle mask poisons become poison constant result elements.
SmallVector<Constant *, 16> VecC(MaskNumElts, C);
for (unsigned i = 0; i != MaskNumElts; ++i)
if (Indices[i] == -1)
- VecC[i] = UndefValue::get(C->getType());
+ VecC[i] = PoisonValue::get(C->getType());
return ConstantVector::get(VecC);
}
}
@@ -5299,28 +5459,42 @@ Value *llvm::simplifyFNegInst(Value *Op, FastMathFlags FMF,
/// Try to propagate existing NaN values when possible. If not, replace the
/// constant or elements in the constant with a canonical NaN.
static Constant *propagateNaN(Constant *In) {
- if (auto *VecTy = dyn_cast<FixedVectorType>(In->getType())) {
+ Type *Ty = In->getType();
+ if (auto *VecTy = dyn_cast<FixedVectorType>(Ty)) {
unsigned NumElts = VecTy->getNumElements();
SmallVector<Constant *, 32> NewC(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
Constant *EltC = In->getAggregateElement(i);
- // Poison and existing NaN elements propagate.
+ // Poison elements propagate. NaN propagates except signaling is quieted.
// Replace unknown or undef elements with canonical NaN.
- if (EltC && (isa<PoisonValue>(EltC) || EltC->isNaN()))
+ if (EltC && isa<PoisonValue>(EltC))
NewC[i] = EltC;
+ else if (EltC && EltC->isNaN())
+ NewC[i] = ConstantFP::get(
+ EltC->getType(), cast<ConstantFP>(EltC)->getValue().makeQuiet());
else
- NewC[i] = (ConstantFP::getNaN(VecTy->getElementType()));
+ NewC[i] = ConstantFP::getNaN(VecTy->getElementType());
}
return ConstantVector::get(NewC);
}
- // It is not a fixed vector, but not a simple NaN either?
+ // If it is not a fixed vector, but not a simple NaN either, return a
+ // canonical NaN.
if (!In->isNaN())
- return ConstantFP::getNaN(In->getType());
+ return ConstantFP::getNaN(Ty);
+
+ // If we known this is a NaN, and it's scalable vector, we must have a splat
+ // on our hands. Grab that before splatting a QNaN constant.
+ if (isa<ScalableVectorType>(Ty)) {
+ auto *Splat = In->getSplatValue();
+ assert(Splat && Splat->isNaN() &&
+ "Found a scalable-vector NaN but not a splat");
+ In = Splat;
+ }
- // Propagate the existing NaN constant when possible.
- // TODO: Should we quiet a signaling NaN?
- return In;
+ // Propagate an existing QNaN constant. If it is an SNaN, make it quiet, but
+ // preserve the sign/payload.
+ return ConstantFP::get(Ty, cast<ConstantFP>(In)->getValue().makeQuiet());
}
/// Perform folds that are common to any floating-point operation. This implies
@@ -5393,7 +5567,7 @@ simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
// fadd X, 0 ==> X, when we know X is not -0
if (canIgnoreSNaN(ExBehavior, FMF))
if (match(Op1, m_PosZeroFP()) &&
- (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
+ (FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q.DL, Q.TLI)))
return Op0;
if (!isDefaultFPEnvironment(ExBehavior, Rounding))
@@ -5413,11 +5587,11 @@ simplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF,
// X = 0.0: ( 0.0 - ( 0.0)) + ( 0.0) == ( 0.0) + ( 0.0) == 0.0
if (match(Op0, m_FSub(m_AnyZeroFP(), m_Specific(Op1))) ||
match(Op1, m_FSub(m_AnyZeroFP(), m_Specific(Op0))))
- return ConstantFP::getNullValue(Op0->getType());
+ return ConstantFP::getZero(Op0->getType());
if (match(Op0, m_FNeg(m_Specific(Op1))) ||
match(Op1, m_FNeg(m_Specific(Op0))))
- return ConstantFP::getNullValue(Op0->getType());
+ return ConstantFP::getZero(Op0->getType());
}
// (X - Y) + Y --> X
@@ -5455,7 +5629,7 @@ simplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF,
// fsub X, -0 ==> X, when we know X is not -0
if (canIgnoreSNaN(ExBehavior, FMF))
if (match(Op1, m_NegZeroFP()) &&
- (FMF.noSignedZeros() || CannotBeNegativeZero(Op0, Q.TLI)))
+ (FMF.noSignedZeros() || cannotBeNegativeZero(Op0, Q.DL, Q.TLI)))
return Op0;
// fsub -0.0, (fsub -0.0, X) ==> X
@@ -5521,11 +5695,12 @@ static Value *simplifyFMAFMul(Value *Op0, Value *Op1, FastMathFlags FMF,
if (match(Op1, m_AnyZeroFP())) {
// X * 0.0 --> 0.0 (with nnan and nsz)
if (FMF.noNaNs() && FMF.noSignedZeros())
- return ConstantFP::getNullValue(Op0->getType());
+ return ConstantFP::getZero(Op0->getType());
// +normal number * (-)0.0 --> (-)0.0
- if (isKnownNeverInfinity(Op0, Q.TLI) && isKnownNeverNaN(Op0, Q.TLI) &&
- SignBitMustBeZero(Op0, Q.TLI))
+ if (isKnownNeverInfOrNaN(Op0, Q.DL, Q.TLI, 0, Q.AC, Q.CxtI, Q.DT) &&
+ // TODO: Check SignBit from computeKnownFPClass when it's more complete.
+ SignBitMustBeZero(Op0, Q.DL, Q.TLI))
return Op1;
}
@@ -5610,7 +5785,7 @@ simplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
// Requires that NaNs are off (X could be zero) and signed zeroes are
// ignored (X could be positive or negative, so the output sign is unknown).
if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op0, m_AnyZeroFP()))
- return ConstantFP::getNullValue(Op0->getType());
+ return ConstantFP::getZero(Op0->getType());
if (FMF.noNaNs()) {
// X / X -> 1.0 is legal when NaNs are ignored.
@@ -5667,7 +5842,7 @@ simplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
if (FMF.noNaNs()) {
// +0 % X -> 0
if (match(Op0, m_PosZeroFP()))
- return ConstantFP::getNullValue(Op0->getType());
+ return ConstantFP::getZero(Op0->getType());
// -0 % X -> -0
if (match(Op0, m_NegZeroFP()))
return ConstantFP::getNegativeZero(Op0->getType());
@@ -5932,7 +6107,7 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
Value *X;
switch (IID) {
case Intrinsic::fabs:
- if (SignBitMustBeZero(Op0, Q.TLI))
+ if (SignBitMustBeZero(Op0, Q.DL, Q.TLI))
return Op0;
break;
case Intrinsic::bswap:
@@ -5998,6 +6173,15 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
if (isSplatValue(Op0))
return Op0;
break;
+ case Intrinsic::frexp: {
+ // Frexp is idempotent with the added complication of the struct return.
+ if (match(Op0, m_ExtractValue<0>(m_Value(X)))) {
+ if (match(X, m_Intrinsic<Intrinsic::frexp>(m_Value())))
+ return X;
+ }
+
+ break;
+ }
default:
break;
}
@@ -6030,6 +6214,51 @@ static Value *foldMinMaxSharedOp(Intrinsic::ID IID, Value *Op0, Value *Op1) {
return nullptr;
}
+/// Given a min/max intrinsic, see if it can be removed based on having an
+/// operand that is another min/max intrinsic with shared operand(s). The caller
+/// is expected to swap the operand arguments to handle commutation.
+static Value *foldMinimumMaximumSharedOp(Intrinsic::ID IID, Value *Op0,
+ Value *Op1) {
+ assert((IID == Intrinsic::maxnum || IID == Intrinsic::minnum ||
+ IID == Intrinsic::maximum || IID == Intrinsic::minimum) &&
+ "Unsupported intrinsic");
+
+ auto *M0 = dyn_cast<IntrinsicInst>(Op0);
+ // If Op0 is not the same intrinsic as IID, do not process.
+ // This is a difference with integer min/max handling. We do not process the
+ // case like max(min(X,Y),min(X,Y)) => min(X,Y). But it can be handled by GVN.
+ if (!M0 || M0->getIntrinsicID() != IID)
+ return nullptr;
+ Value *X0 = M0->getOperand(0);
+ Value *Y0 = M0->getOperand(1);
+ // Simple case, m(m(X,Y), X) => m(X, Y)
+ // m(m(X,Y), Y) => m(X, Y)
+ // For minimum/maximum, X is NaN => m(NaN, Y) == NaN and m(NaN, NaN) == NaN.
+ // For minimum/maximum, Y is NaN => m(X, NaN) == NaN and m(NaN, NaN) == NaN.
+ // For minnum/maxnum, X is NaN => m(NaN, Y) == Y and m(Y, Y) == Y.
+ // For minnum/maxnum, Y is NaN => m(X, NaN) == X and m(X, NaN) == X.
+ if (X0 == Op1 || Y0 == Op1)
+ return M0;
+
+ auto *M1 = dyn_cast<IntrinsicInst>(Op1);
+ if (!M1)
+ return nullptr;
+ Value *X1 = M1->getOperand(0);
+ Value *Y1 = M1->getOperand(1);
+ Intrinsic::ID IID1 = M1->getIntrinsicID();
+ // we have a case m(m(X,Y),m'(X,Y)) taking into account m' is commutative.
+ // if m' is m or inversion of m => m(m(X,Y),m'(X,Y)) == m(X,Y).
+ // For minimum/maximum, X is NaN => m(NaN,Y) == m'(NaN, Y) == NaN.
+ // For minimum/maximum, Y is NaN => m(X,NaN) == m'(X, NaN) == NaN.
+ // For minnum/maxnum, X is NaN => m(NaN,Y) == m'(NaN, Y) == Y.
+ // For minnum/maxnum, Y is NaN => m(X,NaN) == m'(X, NaN) == X.
+ if ((X0 == X1 && Y0 == Y1) || (X0 == Y1 && Y0 == X1))
+ if (IID1 == IID || getInverseMinMaxIntrinsic(IID1) == IID)
+ return M0;
+
+ return nullptr;
+}
+
static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
const SimplifyQuery &Q) {
Intrinsic::ID IID = F->getIntrinsicID();
@@ -6116,13 +6345,6 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
if (isICmpTrue(Pred, Op1, Op0, Q.getWithoutUndef(), RecursionLimit))
return Op1;
- if (std::optional<bool> Imp =
- isImpliedByDomCondition(Pred, Op0, Op1, Q.CxtI, Q.DL))
- return *Imp ? Op0 : Op1;
- if (std::optional<bool> Imp =
- isImpliedByDomCondition(Pred, Op1, Op0, Q.CxtI, Q.DL))
- return *Imp ? Op1 : Op0;
-
break;
}
case Intrinsic::usub_with_overflow:
@@ -6276,14 +6498,10 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
// Min/max of the same operation with common operand:
// m(m(X, Y)), X --> m(X, Y) (4 commuted variants)
- if (auto *M0 = dyn_cast<IntrinsicInst>(Op0))
- if (M0->getIntrinsicID() == IID &&
- (M0->getOperand(0) == Op1 || M0->getOperand(1) == Op1))
- return Op0;
- if (auto *M1 = dyn_cast<IntrinsicInst>(Op1))
- if (M1->getIntrinsicID() == IID &&
- (M1->getOperand(0) == Op0 || M1->getOperand(1) == Op0))
- return Op1;
+ if (Value *V = foldMinimumMaximumSharedOp(IID, Op0, Op1))
+ return V;
+ if (Value *V = foldMinimumMaximumSharedOp(IID, Op1, Op0))
+ return V;
break;
}
@@ -6307,10 +6525,13 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
return nullptr;
}
-static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
-
- unsigned NumOperands = Call->arg_size();
- Function *F = cast<Function>(Call->getCalledFunction());
+static Value *simplifyIntrinsic(CallBase *Call, Value *Callee,
+ ArrayRef<Value *> Args,
+ const SimplifyQuery &Q) {
+ // Operand bundles should not be in Args.
+ assert(Call->arg_size() == Args.size());
+ unsigned NumOperands = Args.size();
+ Function *F = cast<Function>(Callee);
Intrinsic::ID IID = F->getIntrinsicID();
// Most of the intrinsics with no operands have some kind of side effect.
@@ -6318,9 +6539,6 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
if (!NumOperands) {
switch (IID) {
case Intrinsic::vscale: {
- // Call may not be inserted into the IR yet at point of calling simplify.
- if (!Call->getParent() || !Call->getParent()->getParent())
- return nullptr;
auto Attr = Call->getFunction()->getFnAttribute(Attribute::VScaleRange);
if (!Attr.isValid())
return nullptr;
@@ -6336,18 +6554,17 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
}
if (NumOperands == 1)
- return simplifyUnaryIntrinsic(F, Call->getArgOperand(0), Q);
+ return simplifyUnaryIntrinsic(F, Args[0], Q);
if (NumOperands == 2)
- return simplifyBinaryIntrinsic(F, Call->getArgOperand(0),
- Call->getArgOperand(1), Q);
+ return simplifyBinaryIntrinsic(F, Args[0], Args[1], Q);
// Handle intrinsics with 3 or more arguments.
switch (IID) {
case Intrinsic::masked_load:
case Intrinsic::masked_gather: {
- Value *MaskArg = Call->getArgOperand(2);
- Value *PassthruArg = Call->getArgOperand(3);
+ Value *MaskArg = Args[2];
+ Value *PassthruArg = Args[3];
// If the mask is all zeros or undef, the "passthru" argument is the result.
if (maskIsAllZeroOrUndef(MaskArg))
return PassthruArg;
@@ -6355,8 +6572,7 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
}
case Intrinsic::fshl:
case Intrinsic::fshr: {
- Value *Op0 = Call->getArgOperand(0), *Op1 = Call->getArgOperand(1),
- *ShAmtArg = Call->getArgOperand(2);
+ Value *Op0 = Args[0], *Op1 = Args[1], *ShAmtArg = Args[2];
// If both operands are undef, the result is undef.
if (Q.isUndefValue(Op0) && Q.isUndefValue(Op1))
@@ -6364,14 +6580,14 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
// If shift amount is undef, assume it is zero.
if (Q.isUndefValue(ShAmtArg))
- return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1);
+ return Args[IID == Intrinsic::fshl ? 0 : 1];
const APInt *ShAmtC;
if (match(ShAmtArg, m_APInt(ShAmtC))) {
// If there's effectively no shift, return the 1st arg or 2nd arg.
APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth());
if (ShAmtC->urem(BitWidth).isZero())
- return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1);
+ return Args[IID == Intrinsic::fshl ? 0 : 1];
}
// Rotating zero by anything is zero.
@@ -6385,31 +6601,24 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
return nullptr;
}
case Intrinsic::experimental_constrained_fma: {
- Value *Op0 = Call->getArgOperand(0);
- Value *Op1 = Call->getArgOperand(1);
- Value *Op2 = Call->getArgOperand(2);
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- if (Value *V =
- simplifyFPOp({Op0, Op1, Op2}, {}, Q, *FPI->getExceptionBehavior(),
- *FPI->getRoundingMode()))
+ if (Value *V = simplifyFPOp(Args, {}, Q, *FPI->getExceptionBehavior(),
+ *FPI->getRoundingMode()))
return V;
return nullptr;
}
case Intrinsic::fma:
case Intrinsic::fmuladd: {
- Value *Op0 = Call->getArgOperand(0);
- Value *Op1 = Call->getArgOperand(1);
- Value *Op2 = Call->getArgOperand(2);
- if (Value *V = simplifyFPOp({Op0, Op1, Op2}, {}, Q, fp::ebIgnore,
+ if (Value *V = simplifyFPOp(Args, {}, Q, fp::ebIgnore,
RoundingMode::NearestTiesToEven))
return V;
return nullptr;
}
case Intrinsic::smul_fix:
case Intrinsic::smul_fix_sat: {
- Value *Op0 = Call->getArgOperand(0);
- Value *Op1 = Call->getArgOperand(1);
- Value *Op2 = Call->getArgOperand(2);
+ Value *Op0 = Args[0];
+ Value *Op1 = Args[1];
+ Value *Op2 = Args[2];
Type *ReturnType = F->getReturnType();
// Canonicalize constant operand as Op1 (ConstantFolding handles the case
@@ -6436,9 +6645,9 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
return nullptr;
}
case Intrinsic::vector_insert: {
- Value *Vec = Call->getArgOperand(0);
- Value *SubVec = Call->getArgOperand(1);
- Value *Idx = Call->getArgOperand(2);
+ Value *Vec = Args[0];
+ Value *SubVec = Args[1];
+ Value *Idx = Args[2];
Type *ReturnType = F->getReturnType();
// (insert_vector Y, (extract_vector X, 0), 0) -> X
@@ -6455,51 +6664,52 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) {
}
case Intrinsic::experimental_constrained_fadd: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return simplifyFAddInst(
- FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
- Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode());
+ return simplifyFAddInst(Args[0], Args[1], FPI->getFastMathFlags(), Q,
+ *FPI->getExceptionBehavior(),
+ *FPI->getRoundingMode());
}
case Intrinsic::experimental_constrained_fsub: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return simplifyFSubInst(
- FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
- Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode());
+ return simplifyFSubInst(Args[0], Args[1], FPI->getFastMathFlags(), Q,
+ *FPI->getExceptionBehavior(),
+ *FPI->getRoundingMode());
}
case Intrinsic::experimental_constrained_fmul: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return simplifyFMulInst(
- FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
- Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode());
+ return simplifyFMulInst(Args[0], Args[1], FPI->getFastMathFlags(), Q,
+ *FPI->getExceptionBehavior(),
+ *FPI->getRoundingMode());
}
case Intrinsic::experimental_constrained_fdiv: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return simplifyFDivInst(
- FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
- Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode());
+ return simplifyFDivInst(Args[0], Args[1], FPI->getFastMathFlags(), Q,
+ *FPI->getExceptionBehavior(),
+ *FPI->getRoundingMode());
}
case Intrinsic::experimental_constrained_frem: {
auto *FPI = cast<ConstrainedFPIntrinsic>(Call);
- return simplifyFRemInst(
- FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(),
- Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode());
+ return simplifyFRemInst(Args[0], Args[1], FPI->getFastMathFlags(), Q,
+ *FPI->getExceptionBehavior(),
+ *FPI->getRoundingMode());
}
default:
return nullptr;
}
}
-static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) {
- auto *F = dyn_cast<Function>(Call->getCalledOperand());
+static Value *tryConstantFoldCall(CallBase *Call, Value *Callee,
+ ArrayRef<Value *> Args,
+ const SimplifyQuery &Q) {
+ auto *F = dyn_cast<Function>(Callee);
if (!F || !canConstantFoldCallTo(Call, F))
return nullptr;
SmallVector<Constant *, 4> ConstantArgs;
- unsigned NumArgs = Call->arg_size();
- ConstantArgs.reserve(NumArgs);
- for (auto &Arg : Call->args()) {
- Constant *C = dyn_cast<Constant>(&Arg);
+ ConstantArgs.reserve(Args.size());
+ for (Value *Arg : Args) {
+ Constant *C = dyn_cast<Constant>(Arg);
if (!C) {
- if (isa<MetadataAsValue>(Arg.get()))
+ if (isa<MetadataAsValue>(Arg))
continue;
return nullptr;
}
@@ -6509,7 +6719,11 @@ static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) {
return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI);
}
-Value *llvm::simplifyCall(CallBase *Call, const SimplifyQuery &Q) {
+Value *llvm::simplifyCall(CallBase *Call, Value *Callee, ArrayRef<Value *> Args,
+ const SimplifyQuery &Q) {
+ // Args should not contain operand bundle operands.
+ assert(Call->arg_size() == Args.size());
+
// musttail calls can only be simplified if they are also DCEd.
// As we can't guarantee this here, don't simplify them.
if (Call->isMustTailCall())
@@ -6517,16 +6731,15 @@ Value *llvm::simplifyCall(CallBase *Call, const SimplifyQuery &Q) {
// call undef -> poison
// call null -> poison
- Value *Callee = Call->getCalledOperand();
if (isa<UndefValue>(Callee) || isa<ConstantPointerNull>(Callee))
return PoisonValue::get(Call->getType());
- if (Value *V = tryConstantFoldCall(Call, Q))
+ if (Value *V = tryConstantFoldCall(Call, Callee, Args, Q))
return V;
auto *F = dyn_cast<Function>(Callee);
if (F && F->isIntrinsic())
- if (Value *Ret = simplifyIntrinsic(Call, Q))
+ if (Value *Ret = simplifyIntrinsic(Call, Callee, Args, Q))
return Ret;
return nullptr;
@@ -6534,9 +6747,10 @@ Value *llvm::simplifyCall(CallBase *Call, const SimplifyQuery &Q) {
Value *llvm::simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q) {
assert(isa<ConstrainedFPIntrinsic>(Call));
- if (Value *V = tryConstantFoldCall(Call, Q))
+ SmallVector<Value *, 4> Args(Call->args());
+ if (Value *V = tryConstantFoldCall(Call, Call->getCalledOperand(), Args, Q))
return V;
- if (Value *Ret = simplifyIntrinsic(Call, Q))
+ if (Value *Ret = simplifyIntrinsic(Call, Call->getCalledOperand(), Args, Q))
return Ret;
return nullptr;
}
@@ -6554,27 +6768,38 @@ Value *llvm::simplifyFreezeInst(Value *Op0, const SimplifyQuery &Q) {
return ::simplifyFreezeInst(Op0, Q);
}
-static Value *simplifyLoadInst(LoadInst *LI, Value *PtrOp,
- const SimplifyQuery &Q) {
+Value *llvm::simplifyLoadInst(LoadInst *LI, Value *PtrOp,
+ const SimplifyQuery &Q) {
if (LI->isVolatile())
return nullptr;
- APInt Offset(Q.DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
- auto *PtrOpC = dyn_cast<Constant>(PtrOp);
+ if (auto *PtrOpC = dyn_cast<Constant>(PtrOp))
+ return ConstantFoldLoadFromConstPtr(PtrOpC, LI->getType(), Q.DL);
+
+ // We can only fold the load if it is from a constant global with definitive
+ // initializer. Skip expensive logic if this is not the case.
+ auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(PtrOp));
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ return nullptr;
+
+ // If GlobalVariable's initializer is uniform, then return the constant
+ // regardless of its offset.
+ if (Constant *C =
+ ConstantFoldLoadFromUniformValue(GV->getInitializer(), LI->getType()))
+ return C;
+
// Try to convert operand into a constant by stripping offsets while looking
- // through invariant.group intrinsics. Don't bother if the underlying object
- // is not constant, as calculating GEP offsets is expensive.
- if (!PtrOpC && isa<Constant>(getUnderlyingObject(PtrOp))) {
- PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
- Q.DL, Offset, /* AllowNonInbounts */ true,
- /* AllowInvariantGroup */ true);
+ // through invariant.group intrinsics.
+ APInt Offset(Q.DL.getIndexTypeSizeInBits(PtrOp->getType()), 0);
+ PtrOp = PtrOp->stripAndAccumulateConstantOffsets(
+ Q.DL, Offset, /* AllowNonInbounts */ true,
+ /* AllowInvariantGroup */ true);
+ if (PtrOp == GV) {
// Index size may have changed due to address space casts.
Offset = Offset.sextOrTrunc(Q.DL.getIndexTypeSizeInBits(PtrOp->getType()));
- PtrOpC = dyn_cast<Constant>(PtrOp);
+ return ConstantFoldLoadFromConstPtr(GV, LI->getType(), Offset, Q.DL);
}
- if (PtrOpC)
- return ConstantFoldLoadFromConstPtr(PtrOpC, LI->getType(), Offset, Q.DL);
return nullptr;
}
@@ -6584,7 +6809,8 @@ static Value *simplifyLoadInst(LoadInst *LI, Value *PtrOp,
static Value *simplifyInstructionWithOperands(Instruction *I,
ArrayRef<Value *> NewOps,
const SimplifyQuery &SQ,
- OptimizationRemarkEmitter *ORE) {
+ unsigned MaxRecurse) {
+ assert(I->getFunction() && "instruction should be inserted in a function");
const SimplifyQuery Q = SQ.CxtI ? SQ : SQ.getWithInstruction(I);
switch (I->getOpcode()) {
@@ -6597,97 +6823,112 @@ static Value *simplifyInstructionWithOperands(Instruction *I,
}
return nullptr;
case Instruction::FNeg:
- return simplifyFNegInst(NewOps[0], I->getFastMathFlags(), Q);
+ return simplifyFNegInst(NewOps[0], I->getFastMathFlags(), Q, MaxRecurse);
case Instruction::FAdd:
- return simplifyFAddInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+ return simplifyFAddInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q,
+ MaxRecurse);
case Instruction::Add:
- return simplifyAddInst(NewOps[0], NewOps[1],
- Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
- Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
+ return simplifyAddInst(
+ NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
+ Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q, MaxRecurse);
case Instruction::FSub:
- return simplifyFSubInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+ return simplifyFSubInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q,
+ MaxRecurse);
case Instruction::Sub:
- return simplifySubInst(NewOps[0], NewOps[1],
- Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
- Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
+ return simplifySubInst(
+ NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
+ Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q, MaxRecurse);
case Instruction::FMul:
- return simplifyFMulInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+ return simplifyFMulInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q,
+ MaxRecurse);
case Instruction::Mul:
- return simplifyMulInst(NewOps[0], NewOps[1],
- Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
- Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
+ return simplifyMulInst(
+ NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
+ Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q, MaxRecurse);
case Instruction::SDiv:
return simplifySDivInst(NewOps[0], NewOps[1],
- Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
+ Q.IIQ.isExact(cast<BinaryOperator>(I)), Q,
+ MaxRecurse);
case Instruction::UDiv:
return simplifyUDivInst(NewOps[0], NewOps[1],
- Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
+ Q.IIQ.isExact(cast<BinaryOperator>(I)), Q,
+ MaxRecurse);
case Instruction::FDiv:
- return simplifyFDivInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+ return simplifyFDivInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q,
+ MaxRecurse);
case Instruction::SRem:
- return simplifySRemInst(NewOps[0], NewOps[1], Q);
+ return simplifySRemInst(NewOps[0], NewOps[1], Q, MaxRecurse);
case Instruction::URem:
- return simplifyURemInst(NewOps[0], NewOps[1], Q);
+ return simplifyURemInst(NewOps[0], NewOps[1], Q, MaxRecurse);
case Instruction::FRem:
- return simplifyFRemInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q);
+ return simplifyFRemInst(NewOps[0], NewOps[1], I->getFastMathFlags(), Q,
+ MaxRecurse);
case Instruction::Shl:
- return simplifyShlInst(NewOps[0], NewOps[1],
- Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
- Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q);
+ return simplifyShlInst(
+ NewOps[0], NewOps[1], Q.IIQ.hasNoSignedWrap(cast<BinaryOperator>(I)),
+ Q.IIQ.hasNoUnsignedWrap(cast<BinaryOperator>(I)), Q, MaxRecurse);
case Instruction::LShr:
return simplifyLShrInst(NewOps[0], NewOps[1],
- Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
+ Q.IIQ.isExact(cast<BinaryOperator>(I)), Q,
+ MaxRecurse);
case Instruction::AShr:
return simplifyAShrInst(NewOps[0], NewOps[1],
- Q.IIQ.isExact(cast<BinaryOperator>(I)), Q);
+ Q.IIQ.isExact(cast<BinaryOperator>(I)), Q,
+ MaxRecurse);
case Instruction::And:
- return simplifyAndInst(NewOps[0], NewOps[1], Q);
+ return simplifyAndInst(NewOps[0], NewOps[1], Q, MaxRecurse);
case Instruction::Or:
- return simplifyOrInst(NewOps[0], NewOps[1], Q);
+ return simplifyOrInst(NewOps[0], NewOps[1], Q, MaxRecurse);
case Instruction::Xor:
- return simplifyXorInst(NewOps[0], NewOps[1], Q);
+ return simplifyXorInst(NewOps[0], NewOps[1], Q, MaxRecurse);
case Instruction::ICmp:
return simplifyICmpInst(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
- NewOps[1], Q);
+ NewOps[1], Q, MaxRecurse);
case Instruction::FCmp:
return simplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
- NewOps[1], I->getFastMathFlags(), Q);
+ NewOps[1], I->getFastMathFlags(), Q, MaxRecurse);
case Instruction::Select:
- return simplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q);
+ return simplifySelectInst(NewOps[0], NewOps[1], NewOps[2], Q, MaxRecurse);
break;
case Instruction::GetElementPtr: {
auto *GEPI = cast<GetElementPtrInst>(I);
return simplifyGEPInst(GEPI->getSourceElementType(), NewOps[0],
- ArrayRef(NewOps).slice(1), GEPI->isInBounds(), Q);
+ ArrayRef(NewOps).slice(1), GEPI->isInBounds(), Q,
+ MaxRecurse);
}
case Instruction::InsertValue: {
InsertValueInst *IV = cast<InsertValueInst>(I);
- return simplifyInsertValueInst(NewOps[0], NewOps[1], IV->getIndices(), Q);
+ return simplifyInsertValueInst(NewOps[0], NewOps[1], IV->getIndices(), Q,
+ MaxRecurse);
}
case Instruction::InsertElement:
return simplifyInsertElementInst(NewOps[0], NewOps[1], NewOps[2], Q);
case Instruction::ExtractValue: {
auto *EVI = cast<ExtractValueInst>(I);
- return simplifyExtractValueInst(NewOps[0], EVI->getIndices(), Q);
+ return simplifyExtractValueInst(NewOps[0], EVI->getIndices(), Q,
+ MaxRecurse);
}
case Instruction::ExtractElement:
- return simplifyExtractElementInst(NewOps[0], NewOps[1], Q);
+ return simplifyExtractElementInst(NewOps[0], NewOps[1], Q, MaxRecurse);
case Instruction::ShuffleVector: {
auto *SVI = cast<ShuffleVectorInst>(I);
return simplifyShuffleVectorInst(NewOps[0], NewOps[1],
- SVI->getShuffleMask(), SVI->getType(), Q);
+ SVI->getShuffleMask(), SVI->getType(), Q,
+ MaxRecurse);
}
case Instruction::PHI:
return simplifyPHINode(cast<PHINode>(I), NewOps, Q);
case Instruction::Call:
- // TODO: Use NewOps
- return simplifyCall(cast<CallInst>(I), Q);
+ return simplifyCall(
+ cast<CallInst>(I), NewOps.back(),
+ NewOps.drop_back(1 + cast<CallInst>(I)->getNumTotalBundleOperands()), Q);
case Instruction::Freeze:
return llvm::simplifyFreezeInst(NewOps[0], Q);
#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
#include "llvm/IR/Instruction.def"
#undef HANDLE_CAST_INST
- return simplifyCastInst(I->getOpcode(), NewOps[0], I->getType(), Q);
+ return simplifyCastInst(I->getOpcode(), NewOps[0], I->getType(), Q,
+ MaxRecurse);
case Instruction::Alloca:
// No simplifications for Alloca and it can't be constant folded.
return nullptr;
@@ -6698,17 +6939,15 @@ static Value *simplifyInstructionWithOperands(Instruction *I,
Value *llvm::simplifyInstructionWithOperands(Instruction *I,
ArrayRef<Value *> NewOps,
- const SimplifyQuery &SQ,
- OptimizationRemarkEmitter *ORE) {
+ const SimplifyQuery &SQ) {
assert(NewOps.size() == I->getNumOperands() &&
"Number of operands should match the instruction!");
- return ::simplifyInstructionWithOperands(I, NewOps, SQ, ORE);
+ return ::simplifyInstructionWithOperands(I, NewOps, SQ, RecursionLimit);
}
-Value *llvm::simplifyInstruction(Instruction *I, const SimplifyQuery &SQ,
- OptimizationRemarkEmitter *ORE) {
+Value *llvm::simplifyInstruction(Instruction *I, const SimplifyQuery &SQ) {
SmallVector<Value *, 8> Ops(I->operands());
- Value *Result = ::simplifyInstructionWithOperands(I, Ops, SQ, ORE);
+ Value *Result = ::simplifyInstructionWithOperands(I, Ops, SQ, RecursionLimit);
/// If called on unreachable code, the instruction may simplify to itself.
/// Make life easier for users by detecting that case here, and returning a
@@ -6747,10 +6986,7 @@ static bool replaceAndRecursivelySimplifyImpl(
// Replace the instruction with its simplified value.
I->replaceAllUsesWith(SimpleV);
- // Gracefully handle edge cases where the instruction is not wired into any
- // parent block.
- if (I->getParent() && !I->isEHPad() && !I->isTerminator() &&
- !I->mayHaveSideEffects())
+ if (!I->isEHPad() && !I->isTerminator() && !I->mayHaveSideEffects())
I->eraseFromParent();
} else {
Worklist.insert(I);
@@ -6779,10 +7015,7 @@ static bool replaceAndRecursivelySimplifyImpl(
// Replace the instruction with its simplified value.
I->replaceAllUsesWith(SimpleV);
- // Gracefully handle edge cases where the instruction is not wired into any
- // parent block.
- if (I->getParent() && !I->isEHPad() && !I->isTerminator() &&
- !I->mayHaveSideEffects())
+ if (!I->isEHPad() && !I->isTerminator() && !I->mayHaveSideEffects())
I->eraseFromParent();
}
return Simplified;
diff --git a/llvm/lib/Analysis/InteractiveModelRunner.cpp b/llvm/lib/Analysis/InteractiveModelRunner.cpp
new file mode 100644
index 000000000000..99b009b6616f
--- /dev/null
+++ b/llvm/lib/Analysis/InteractiveModelRunner.cpp
@@ -0,0 +1,82 @@
+//===- InteractiveModelRunner.cpp - noop ML model runner ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A runner that communicates with an external agent via 2 file descriptors.
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/InteractiveModelRunner.h"
+#include "llvm/Analysis/MLModelRunner.h"
+#include "llvm/Analysis/TensorSpec.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static cl::opt<bool> DebugReply(
+ "interactive-model-runner-echo-reply", cl::init(false), cl::Hidden,
+ cl::desc("The InteractiveModelRunner will echo back to stderr "
+ "the data received from the host (for debugging purposes)."));
+
+InteractiveModelRunner::InteractiveModelRunner(
+ LLVMContext &Ctx, const std::vector<TensorSpec> &Inputs,
+ const TensorSpec &Advice, StringRef OutboundName, StringRef InboundName)
+ : MLModelRunner(Ctx, MLModelRunner::Kind::Interactive, Inputs.size()),
+ InputSpecs(Inputs), OutputSpec(Advice),
+ InEC(sys::fs::openFileForRead(InboundName, Inbound)),
+ OutputBuffer(OutputSpec.getTotalTensorBufferSize()) {
+ if (InEC) {
+ Ctx.emitError("Cannot open inbound file: " + InEC.message());
+ return;
+ }
+ {
+ auto OutStream = std::make_unique<raw_fd_ostream>(OutboundName, OutEC);
+ if (OutEC) {
+ Ctx.emitError("Cannot open outbound file: " + OutEC.message());
+ return;
+ }
+ Log = std::make_unique<Logger>(std::move(OutStream), InputSpecs, Advice,
+ /*IncludeReward=*/false, Advice);
+ }
+ // Just like in the no inference case, this will allocate an appropriately
+ // sized buffer.
+ for (size_t I = 0; I < InputSpecs.size(); ++I)
+ setUpBufferForTensor(I, InputSpecs[I], nullptr);
+ Log->flush();
+}
+
+InteractiveModelRunner::~InteractiveModelRunner() {
+ sys::fs::file_t FDAsOSHandle = sys::fs::convertFDToNativeFile(Inbound);
+ sys::fs::closeFile(FDAsOSHandle);
+}
+
+void *InteractiveModelRunner::evaluateUntyped() {
+ Log->startObservation();
+ for (size_t I = 0; I < InputSpecs.size(); ++I)
+ Log->logTensorValue(I, reinterpret_cast<const char *>(getTensorUntyped(I)));
+ Log->endObservation();
+ Log->flush();
+
+ size_t InsPoint = 0;
+ char *Buff = OutputBuffer.data();
+ const size_t Limit = OutputBuffer.size();
+ while (InsPoint < Limit) {
+ auto ReadOrErr = ::sys::fs::readNativeFile(
+ sys::fs::convertFDToNativeFile(Inbound),
+ {Buff + InsPoint, OutputBuffer.size() - InsPoint});
+ if (ReadOrErr.takeError()) {
+ Ctx.emitError("Failed reading from inbound file");
+ break;
+ }
+ InsPoint += *ReadOrErr;
+ }
+ if (DebugReply)
+ dbgs() << OutputSpec.name() << ": "
+ << tensorValueToString(OutputBuffer.data(), OutputSpec) << "\n";
+ return OutputBuffer.data();
+}
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index f1587cecf9fb..33651783cb17 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -162,7 +162,7 @@ namespace {
struct BlockCacheEntry {
SmallDenseMap<AssertingVH<Value>, ValueLatticeElement, 4> LatticeElements;
SmallDenseSet<AssertingVH<Value>, 4> OverDefined;
- // None indicates that the nonnull pointers for this basic block
+ // std::nullopt indicates that the nonnull pointers for this basic block
// block have not been computed yet.
std::optional<NonNullPointerSet> NonNullPointers;
};
@@ -876,10 +876,14 @@ LazyValueInfoImpl::solveBlockValueSelect(SelectInst *SI, BasicBlock *BB) {
// condition itself? This shows up with idioms like e.g. select(a > 5, a, 5).
// TODO: We could potentially refine an overdefined true value above.
Value *Cond = SI->getCondition();
- TrueVal = intersect(TrueVal,
- getValueFromCondition(SI->getTrueValue(), Cond, true));
- FalseVal = intersect(FalseVal,
- getValueFromCondition(SI->getFalseValue(), Cond, false));
+ // If the value is undef, a different value may be chosen in
+ // the select condition.
+ if (isGuaranteedNotToBeUndefOrPoison(Cond, AC)) {
+ TrueVal = intersect(TrueVal,
+ getValueFromCondition(SI->getTrueValue(), Cond, true));
+ FalseVal = intersect(
+ FalseVal, getValueFromCondition(SI->getFalseValue(), Cond, false));
+ }
ValueLatticeElement Result = TrueVal;
Result.mergeIn(FalseVal);
@@ -990,10 +994,11 @@ LazyValueInfoImpl::solveBlockValueOverflowIntrinsic(WithOverflowInst *WO,
std::optional<ValueLatticeElement>
LazyValueInfoImpl::solveBlockValueIntrinsic(IntrinsicInst *II, BasicBlock *BB) {
+ ValueLatticeElement MetadataVal = getFromRangeMetadata(II);
if (!ConstantRange::isIntrinsicSupported(II->getIntrinsicID())) {
LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
<< "' - unknown intrinsic.\n");
- return getFromRangeMetadata(II);
+ return MetadataVal;
}
SmallVector<ConstantRange, 2> OpRanges;
@@ -1004,8 +1009,9 @@ LazyValueInfoImpl::solveBlockValueIntrinsic(IntrinsicInst *II, BasicBlock *BB) {
OpRanges.push_back(*Range);
}
- return ValueLatticeElement::getRange(
- ConstantRange::intrinsic(II->getIntrinsicID(), OpRanges));
+ return intersect(ValueLatticeElement::getRange(ConstantRange::intrinsic(
+ II->getIntrinsicID(), OpRanges)),
+ MetadataVal);
}
std::optional<ValueLatticeElement>
@@ -1123,7 +1129,7 @@ static ValueLatticeElement getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
// bit of Mask.
if (EdgePred == ICmpInst::ICMP_NE && !Mask->isZero() && C->isZero()) {
return ValueLatticeElement::getRange(ConstantRange::getNonEmpty(
- APInt::getOneBitSet(BitWidth, Mask->countTrailingZeros()),
+ APInt::getOneBitSet(BitWidth, Mask->countr_zero()),
APInt::getZero(BitWidth)));
}
}
@@ -1665,6 +1671,10 @@ ConstantRange LazyValueInfo::getConstantRangeAtUse(const Use &U,
std::optional<ValueLatticeElement> CondVal;
auto *CurrI = cast<Instruction>(CurrU->getUser());
if (auto *SI = dyn_cast<SelectInst>(CurrI)) {
+ // If the value is undef, a different value may be chosen in
+ // the select condition and at use.
+ if (!isGuaranteedNotToBeUndefOrPoison(SI->getCondition(), AC))
+ break;
if (CurrU->getOperandNo() == 1)
CondVal = getValueFromCondition(V, SI->getCondition(), true);
else if (CurrU->getOperandNo() == 2)
@@ -1673,11 +1683,6 @@ ConstantRange LazyValueInfo::getConstantRangeAtUse(const Use &U,
// TODO: Use non-local query?
CondVal =
getEdgeValueLocal(V, PHI->getIncomingBlock(*CurrU), PHI->getParent());
- } else if (!isSafeToSpeculativelyExecute(CurrI)) {
- // Stop walking if we hit a non-speculatable instruction. Even if the
- // result is only used under a specific condition, executing the
- // instruction itself may cause side effects or UB already.
- break;
}
if (CondVal && CondVal->isConstantRange())
CR = CR.intersectWith(CondVal->getConstantRange());
@@ -1685,7 +1690,13 @@ ConstantRange LazyValueInfo::getConstantRangeAtUse(const Use &U,
// Only follow one-use chain, to allow direct intersection of conditions.
// If there are multiple uses, we would have to intersect with the union of
// all conditions at different uses.
- if (!CurrI->hasOneUse())
+ // Stop walking if we hit a non-speculatable instruction. Even if the
+ // result is only used under a specific condition, executing the
+ // instruction itself may cause side effects or UB already.
+ // This also disallows looking through phi nodes: If the phi node is part
+ // of a cycle, we might end up reasoning about values from different cycle
+ // iterations (PR60629).
+ if (!CurrI->hasOneUse() || !isSafeToSpeculativelyExecute(CurrI))
break;
CurrU = &*CurrI->use_begin();
}
@@ -1738,7 +1749,7 @@ getPredicateResult(unsigned Pred, Constant *C, const ValueLatticeElement &Val,
Constant *Res = nullptr;
if (Val.isConstant()) {
Res = ConstantFoldCompareInstOperands(Pred, Val.getConstant(), C, DL, TLI);
- if (ConstantInt *ResCI = dyn_cast<ConstantInt>(Res))
+ if (ConstantInt *ResCI = dyn_cast_or_null<ConstantInt>(Res))
return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True;
return LazyValueInfo::Unknown;
}
@@ -1780,14 +1791,14 @@ getPredicateResult(unsigned Pred, Constant *C, const ValueLatticeElement &Val,
Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
Val.getNotConstant(), C, DL,
TLI);
- if (Res->isNullValue())
+ if (Res && Res->isNullValue())
return LazyValueInfo::False;
} else if (Pred == ICmpInst::ICMP_NE) {
// !C1 != C -> true iff C1 == C.
Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
Val.getNotConstant(), C, DL,
TLI);
- if (Res->isNullValue())
+ if (Res && Res->isNullValue())
return LazyValueInfo::True;
}
return LazyValueInfo::Unknown;
diff --git a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp b/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
deleted file mode 100644
index baa7e9daa0ae..000000000000
--- a/llvm/lib/Analysis/LegacyDivergenceAnalysis.cpp
+++ /dev/null
@@ -1,435 +0,0 @@
-//===- LegacyDivergenceAnalysis.cpp --------- Legacy Divergence Analysis
-//Implementation -==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements divergence analysis which determines whether a branch
-// in a GPU program is divergent.It can help branch optimizations such as jump
-// threading and loop unswitching to make better decisions.
-//
-// GPU programs typically use the SIMD execution model, where multiple threads
-// in the same execution group have to execute in lock-step. Therefore, if the
-// code contains divergent branches (i.e., threads in a group do not agree on
-// which path of the branch to take), the group of threads has to execute all
-// the paths from that branch with different subsets of threads enabled until
-// they converge at the immediately post-dominating BB of the paths.
-//
-// Due to this execution model, some optimizations such as jump
-// threading and loop unswitching can be unfortunately harmful when performed on
-// divergent branches. Therefore, an analysis that computes which branches in a
-// GPU program are divergent can help the compiler to selectively run these
-// optimizations.
-//
-// This file defines divergence analysis which computes a conservative but
-// non-trivial approximation of all divergent branches in a GPU program. It
-// partially implements the approach described in
-//
-// Divergence Analysis
-// Sampaio, Souza, Collange, Pereira
-// TOPLAS '13
-//
-// The divergence analysis identifies the sources of divergence (e.g., special
-// variables that hold the thread ID), and recursively marks variables that are
-// data or sync dependent on a source of divergence as divergent.
-//
-// While data dependency is a well-known concept, the notion of sync dependency
-// is worth more explanation. Sync dependence characterizes the control flow
-// aspect of the propagation of branch divergence. For example,
-//
-// %cond = icmp slt i32 %tid, 10
-// br i1 %cond, label %then, label %else
-// then:
-// br label %merge
-// else:
-// br label %merge
-// merge:
-// %a = phi i32 [ 0, %then ], [ 1, %else ]
-//
-// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
-// because %tid is not on its use-def chains, %a is sync dependent on %tid
-// because the branch "br i1 %cond" depends on %tid and affects which value %a
-// is assigned to.
-//
-// The current implementation has the following limitations:
-// 1. intra-procedural. It conservatively considers the arguments of a
-// non-kernel-entry function and the return value of a function call as
-// divergent.
-// 2. memory as black box. It conservatively considers values loaded from
-// generic or local address as divergent. This can be improved by leveraging
-// pointer analysis.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
-#include "llvm/ADT/PostOrderIterator.h"
-#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Analysis/PostDominators.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include <vector>
-using namespace llvm;
-
-#define DEBUG_TYPE "divergence"
-
-// transparently use the GPUDivergenceAnalysis
-static cl::opt<bool> UseGPUDA("use-gpu-divergence-analysis", cl::init(false),
- cl::Hidden,
- cl::desc("turn the LegacyDivergenceAnalysis into "
- "a wrapper for GPUDivergenceAnalysis"));
-
-namespace {
-
-class DivergencePropagator {
-public:
- DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT,
- PostDominatorTree &PDT, DenseSet<const Value *> &DV,
- DenseSet<const Use *> &DU)
- : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV), DU(DU) {}
- void populateWithSourcesOfDivergence();
- void propagate();
-
-private:
- // A helper function that explores data dependents of V.
- void exploreDataDependency(Value *V);
- // A helper function that explores sync dependents of TI.
- void exploreSyncDependency(Instruction *TI);
- // Computes the influence region from Start to End. This region includes all
- // basic blocks on any simple path from Start to End.
- void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,
- DenseSet<BasicBlock *> &InfluenceRegion);
- // Finds all users of I that are outside the influence region, and add these
- // users to Worklist.
- void findUsersOutsideInfluenceRegion(
- Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion);
-
- Function &F;
- TargetTransformInfo &TTI;
- DominatorTree &DT;
- PostDominatorTree &PDT;
- std::vector<Value *> Worklist; // Stack for DFS.
- DenseSet<const Value *> &DV; // Stores all divergent values.
- DenseSet<const Use *> &DU; // Stores divergent uses of possibly uniform
- // values.
-};
-
-void DivergencePropagator::populateWithSourcesOfDivergence() {
- Worklist.clear();
- DV.clear();
- DU.clear();
- for (auto &I : instructions(F)) {
- if (TTI.isSourceOfDivergence(&I)) {
- Worklist.push_back(&I);
- DV.insert(&I);
- }
- }
- for (auto &Arg : F.args()) {
- if (TTI.isSourceOfDivergence(&Arg)) {
- Worklist.push_back(&Arg);
- DV.insert(&Arg);
- }
- }
-}
-
-void DivergencePropagator::exploreSyncDependency(Instruction *TI) {
- // Propagation rule 1: if branch TI is divergent, all PHINodes in TI's
- // immediate post dominator are divergent. This rule handles if-then-else
- // patterns. For example,
- //
- // if (tid < 5)
- // a1 = 1;
- // else
- // a2 = 2;
- // a = phi(a1, a2); // sync dependent on (tid < 5)
- BasicBlock *ThisBB = TI->getParent();
-
- // Unreachable blocks may not be in the dominator tree.
- if (!DT.isReachableFromEntry(ThisBB))
- return;
-
- // If the function has no exit blocks or doesn't reach any exit blocks, the
- // post dominator may be null.
- DomTreeNode *ThisNode = PDT.getNode(ThisBB);
- if (!ThisNode)
- return;
-
- BasicBlock *IPostDom = ThisNode->getIDom()->getBlock();
- if (IPostDom == nullptr)
- return;
-
- for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {
- // A PHINode is uniform if it returns the same value no matter which path is
- // taken.
- if (!cast<PHINode>(I)->hasConstantOrUndefValue() && DV.insert(&*I).second)
- Worklist.push_back(&*I);
- }
-
- // Propagation rule 2: if a value defined in a loop is used outside, the user
- // is sync dependent on the condition of the loop exits that dominate the
- // user. For example,
- //
- // int i = 0;
- // do {
- // i++;
- // if (foo(i)) ... // uniform
- // } while (i < tid);
- // if (bar(i)) ... // divergent
- //
- // A program may contain unstructured loops. Therefore, we cannot leverage
- // LoopInfo, which only recognizes natural loops.
- //
- // The algorithm used here handles both natural and unstructured loops. Given
- // a branch TI, we first compute its influence region, the union of all simple
- // paths from TI to its immediate post dominator (IPostDom). Then, we search
- // for all the values defined in the influence region but used outside. All
- // these users are sync dependent on TI.
- DenseSet<BasicBlock *> InfluenceRegion;
- computeInfluenceRegion(ThisBB, IPostDom, InfluenceRegion);
- // An insight that can speed up the search process is that all the in-region
- // values that are used outside must dominate TI. Therefore, instead of
- // searching every basic blocks in the influence region, we search all the
- // dominators of TI until it is outside the influence region.
- BasicBlock *InfluencedBB = ThisBB;
- while (InfluenceRegion.count(InfluencedBB)) {
- for (auto &I : *InfluencedBB) {
- if (!DV.count(&I))
- findUsersOutsideInfluenceRegion(I, InfluenceRegion);
- }
- DomTreeNode *IDomNode = DT.getNode(InfluencedBB)->getIDom();
- if (IDomNode == nullptr)
- break;
- InfluencedBB = IDomNode->getBlock();
- }
-}
-
-void DivergencePropagator::findUsersOutsideInfluenceRegion(
- Instruction &I, const DenseSet<BasicBlock *> &InfluenceRegion) {
- for (Use &Use : I.uses()) {
- Instruction *UserInst = cast<Instruction>(Use.getUser());
- if (!InfluenceRegion.count(UserInst->getParent())) {
- DU.insert(&Use);
- if (DV.insert(UserInst).second)
- Worklist.push_back(UserInst);
- }
- }
-}
-
-// A helper function for computeInfluenceRegion that adds successors of "ThisBB"
-// to the influence region.
-static void
-addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End,
- DenseSet<BasicBlock *> &InfluenceRegion,
- std::vector<BasicBlock *> &InfluenceStack) {
- for (BasicBlock *Succ : successors(ThisBB)) {
- if (Succ != End && InfluenceRegion.insert(Succ).second)
- InfluenceStack.push_back(Succ);
- }
-}
-
-void DivergencePropagator::computeInfluenceRegion(
- BasicBlock *Start, BasicBlock *End,
- DenseSet<BasicBlock *> &InfluenceRegion) {
- assert(PDT.properlyDominates(End, Start) &&
- "End does not properly dominate Start");
-
- // The influence region starts from the end of "Start" to the beginning of
- // "End". Therefore, "Start" should not be in the region unless "Start" is in
- // a loop that doesn't contain "End".
- std::vector<BasicBlock *> InfluenceStack;
- addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);
- while (!InfluenceStack.empty()) {
- BasicBlock *BB = InfluenceStack.back();
- InfluenceStack.pop_back();
- addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);
- }
-}
-
-void DivergencePropagator::exploreDataDependency(Value *V) {
- // Follow def-use chains of V.
- for (User *U : V->users()) {
- if (!TTI.isAlwaysUniform(U) && DV.insert(U).second)
- Worklist.push_back(U);
- }
-}
-
-void DivergencePropagator::propagate() {
- // Traverse the dependency graph using DFS.
- while (!Worklist.empty()) {
- Value *V = Worklist.back();
- Worklist.pop_back();
- if (Instruction *I = dyn_cast<Instruction>(V)) {
- // Terminators with less than two successors won't introduce sync
- // dependency. Ignore them.
- if (I->isTerminator() && I->getNumSuccessors() > 1)
- exploreSyncDependency(I);
- }
- exploreDataDependency(V);
- }
-}
-
-} // namespace
-
-// Register this pass.
-char LegacyDivergenceAnalysis::ID = 0;
-LegacyDivergenceAnalysis::LegacyDivergenceAnalysis() : FunctionPass(ID) {
- initializeLegacyDivergenceAnalysisPass(*PassRegistry::getPassRegistry());
-}
-INITIALIZE_PASS_BEGIN(LegacyDivergenceAnalysis, "divergence",
- "Legacy Divergence Analysis", false, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(LegacyDivergenceAnalysis, "divergence",
- "Legacy Divergence Analysis", false, true)
-
-FunctionPass *llvm::createLegacyDivergenceAnalysisPass() {
- return new LegacyDivergenceAnalysis();
-}
-
-bool LegacyDivergenceAnalysisImpl::shouldUseGPUDivergenceAnalysis(
- const Function &F, const TargetTransformInfo &TTI, const LoopInfo &LI) {
- if (!(UseGPUDA || TTI.useGPUDivergenceAnalysis()))
- return false;
-
- // GPUDivergenceAnalysis requires a reducible CFG.
- using RPOTraversal = ReversePostOrderTraversal<const Function *>;
- RPOTraversal FuncRPOT(&F);
- return !containsIrreducibleCFG<const BasicBlock *, const RPOTraversal,
- const LoopInfo>(FuncRPOT, LI);
-}
-
-void LegacyDivergenceAnalysisImpl::run(Function &F,
- llvm::TargetTransformInfo &TTI,
- llvm::DominatorTree &DT,
- llvm::PostDominatorTree &PDT,
- const llvm::LoopInfo &LI) {
- if (shouldUseGPUDivergenceAnalysis(F, TTI, LI)) {
- // run the new GPU divergence analysis
- gpuDA = std::make_unique<DivergenceInfo>(F, DT, PDT, LI, TTI,
- /* KnownReducible = */ true);
-
- } else {
- // run LLVM's existing DivergenceAnalysis
- DivergencePropagator DP(F, TTI, DT, PDT, DivergentValues, DivergentUses);
- DP.populateWithSourcesOfDivergence();
- DP.propagate();
- }
-}
-
-bool LegacyDivergenceAnalysisImpl::isDivergent(const Value *V) const {
- if (gpuDA) {
- return gpuDA->isDivergent(*V);
- }
- return DivergentValues.count(V);
-}
-
-bool LegacyDivergenceAnalysisImpl::isDivergentUse(const Use *U) const {
- if (gpuDA) {
- return gpuDA->isDivergentUse(*U);
- }
- return DivergentValues.count(U->get()) || DivergentUses.count(U);
-}
-
-void LegacyDivergenceAnalysisImpl::print(raw_ostream &OS,
- const Module *) const {
- if ((!gpuDA || !gpuDA->hasDivergence()) && DivergentValues.empty())
- return;
-
- const Function *F = nullptr;
- if (!DivergentValues.empty()) {
- const Value *FirstDivergentValue = *DivergentValues.begin();
- if (const Argument *Arg = dyn_cast<Argument>(FirstDivergentValue)) {
- F = Arg->getParent();
- } else if (const Instruction *I =
- dyn_cast<Instruction>(FirstDivergentValue)) {
- F = I->getParent()->getParent();
- } else {
- llvm_unreachable("Only arguments and instructions can be divergent");
- }
- } else if (gpuDA) {
- F = &gpuDA->getFunction();
- }
- if (!F)
- return;
-
- // Dumps all divergent values in F, arguments and then instructions.
- for (const auto &Arg : F->args()) {
- OS << (isDivergent(&Arg) ? "DIVERGENT: " : " ");
- OS << Arg << "\n";
- }
- // Iterate instructions using instructions() to ensure a deterministic order.
- for (const BasicBlock &BB : *F) {
- OS << "\n " << BB.getName() << ":\n";
- for (const auto &I : BB.instructionsWithoutDebug()) {
- OS << (isDivergent(&I) ? "DIVERGENT: " : " ");
- OS << I << "\n";
- }
- }
- OS << "\n";
-}
-
-void LegacyDivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequiredTransitive<PostDominatorTreeWrapperPass>();
- AU.addRequiredTransitive<LoopInfoWrapperPass>();
- AU.setPreservesAll();
-}
-
-bool LegacyDivergenceAnalysis::runOnFunction(Function &F) {
- auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
- if (TTIWP == nullptr)
- return false;
-
- TargetTransformInfo &TTI = TTIWP->getTTI(F);
- // Fast path: if the target does not have branch divergence, we do not mark
- // any branch as divergent.
- if (!TTI.hasBranchDivergence())
- return false;
-
- DivergentValues.clear();
- DivergentUses.clear();
- gpuDA = nullptr;
-
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI);
- LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
- << ":\n";
- LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent()));
-
- return false;
-}
-
-PreservedAnalyses
-LegacyDivergenceAnalysisPass::run(Function &F, FunctionAnalysisManager &AM) {
- auto &TTI = AM.getResult<TargetIRAnalysis>(F);
- if (!TTI.hasBranchDivergence())
- return PreservedAnalyses::all();
-
- DivergentValues.clear();
- DivergentUses.clear();
- gpuDA = nullptr;
-
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
- auto &LI = AM.getResult<LoopAnalysis>(F);
- LegacyDivergenceAnalysisImpl::run(F, TTI, DT, PDT, LI);
- LLVM_DEBUG(dbgs() << "\nAfter divergence analysis on " << F.getName()
- << ":\n";
- LegacyDivergenceAnalysisImpl::print(dbgs(), F.getParent()));
- return PreservedAnalyses::all();
-}
diff --git a/llvm/lib/Analysis/Lint.cpp b/llvm/lib/Analysis/Lint.cpp
index d3120a41ac27..ff022006df65 100644
--- a/llvm/lib/Analysis/Lint.cpp
+++ b/llvm/lib/Analysis/Lint.cpp
@@ -40,11 +40,14 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/BasicBlock.h"
@@ -60,13 +63,10 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
@@ -93,8 +93,6 @@ class Lint : public InstVisitor<Lint> {
void visitCallBase(CallBase &CB);
void visitMemoryReference(Instruction &I, const MemoryLocation &Loc,
MaybeAlign Alignment, Type *Ty, unsigned Flags);
- void visitEHBeginCatch(IntrinsicInst *II);
- void visitEHEndCatch(IntrinsicInst *II);
void visitReturnInst(ReturnInst &I);
void visitLoadInst(LoadInst &I);
@@ -715,73 +713,35 @@ PreservedAnalyses LintPass::run(Function &F, FunctionAnalysisManager &AM) {
return PreservedAnalyses::all();
}
-namespace {
-class LintLegacyPass : public FunctionPass {
-public:
- static char ID; // Pass identification, replacement for typeid
- LintLegacyPass() : FunctionPass(ID) {
- initializeLintLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- }
- void print(raw_ostream &O, const Module *M) const override {}
-};
-} // namespace
-
-char LintLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR",
- false, true)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(LintLegacyPass, "lint", "Statically lint-checks LLVM IR",
- false, true)
-
-bool LintLegacyPass::runOnFunction(Function &F) {
- auto *Mod = F.getParent();
- auto *DL = &F.getParent()->getDataLayout();
- auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- Lint L(Mod, DL, AA, AC, DT, TLI);
- L.visit(F);
- dbgs() << L.MessagesStr.str();
- return false;
-}
-
//===----------------------------------------------------------------------===//
// Implement the public interfaces to this file...
//===----------------------------------------------------------------------===//
-FunctionPass *llvm::createLintLegacyPassPass() { return new LintLegacyPass(); }
-
/// lintFunction - Check a function for errors, printing messages on stderr.
///
void llvm::lintFunction(const Function &f) {
Function &F = const_cast<Function &>(f);
assert(!F.isDeclaration() && "Cannot lint external functions");
- legacy::FunctionPassManager FPM(F.getParent());
- auto *V = new LintLegacyPass();
- FPM.add(V);
- FPM.run(F);
+ FunctionAnalysisManager FAM;
+ FAM.registerPass([&] { return TargetLibraryAnalysis(); });
+ FAM.registerPass([&] { return DominatorTreeAnalysis(); });
+ FAM.registerPass([&] { return AssumptionAnalysis(); });
+ FAM.registerPass([&] {
+ AAManager AA;
+ AA.registerFunctionAnalysis<BasicAA>();
+ AA.registerFunctionAnalysis<ScopedNoAliasAA>();
+ AA.registerFunctionAnalysis<TypeBasedAA>();
+ return AA;
+ });
+ LintPass().run(F, FAM);
}
/// lintModule - Check a module for errors, printing messages on stderr.
///
void llvm::lintModule(const Module &M) {
- legacy::PassManager PM;
- auto *V = new LintLegacyPass();
- PM.add(V);
- PM.run(const_cast<Module &>(M));
+ for (const Function &F : M) {
+ if (!F.isDeclaration())
+ lintFunction(F);
+ }
}
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index f55333303f8d..97d21db86abf 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -29,9 +29,7 @@ using namespace llvm;
static bool isAligned(const Value *Base, const APInt &Offset, Align Alignment,
const DataLayout &DL) {
Align BA = Base->getPointerAlignment(DL);
- const APInt APAlign(Offset.getBitWidth(), Alignment.value());
- assert(APAlign.isPowerOf2() && "must be a power of 2!");
- return BA >= Alignment && !(Offset & (APAlign - 1));
+ return BA >= Alignment && Offset.isAligned(BA);
}
/// Test if V is always a pointer to allocated and suitably aligned memory for
@@ -204,7 +202,7 @@ bool llvm::isDereferenceableAndAlignedPointer(
const TargetLibraryInfo *TLI) {
// For unsized types or scalable vectors we don't know exactly how many bytes
// are dereferenced, so bail out.
- if (!Ty->isSized() || isa<ScalableVectorType>(Ty))
+ if (!Ty->isSized() || Ty->isScalableTy())
return false;
// When dereferenceability information is provided by a dereferenceable
@@ -286,21 +284,48 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L,
auto* Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(SE));
if (!Step)
return false;
- // TODO: generalize to access patterns which have gaps
- if (Step->getAPInt() != EltSize)
- return false;
auto TC = SE.getSmallConstantMaxTripCount(L);
if (!TC)
return false;
- const APInt AccessSize = TC * EltSize;
+ // TODO: Handle overlapping accesses.
+ // We should be computing AccessSize as (TC - 1) * Step + EltSize.
+ if (EltSize.sgt(Step->getAPInt()))
+ return false;
+
+ // Compute the total access size for access patterns with unit stride and
+ // patterns with gaps. For patterns with unit stride, Step and EltSize are the
+ // same.
+ // For patterns with gaps (i.e. non unit stride), we are
+ // accessing EltSize bytes at every Step.
+ APInt AccessSize = TC * Step->getAPInt();
+
+ assert(SE.isLoopInvariant(AddRec->getStart(), L) &&
+ "implied by addrec definition");
+ Value *Base = nullptr;
+ if (auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart())) {
+ Base = StartS->getValue();
+ } else if (auto *StartS = dyn_cast<SCEVAddExpr>(AddRec->getStart())) {
+ // Handle (NewBase + offset) as start value.
+ const auto *Offset = dyn_cast<SCEVConstant>(StartS->getOperand(0));
+ const auto *NewBase = dyn_cast<SCEVUnknown>(StartS->getOperand(1));
+ if (StartS->getNumOperands() == 2 && Offset && NewBase) {
+ // For the moment, restrict ourselves to the case where the offset is a
+ // multiple of the requested alignment and the base is aligned.
+ // TODO: generalize if a case found which warrants
+ if (Offset->getAPInt().urem(Alignment.value()) != 0)
+ return false;
+ Base = NewBase->getValue();
+ bool Overflow = false;
+ AccessSize = AccessSize.uadd_ov(Offset->getAPInt(), Overflow);
+ if (Overflow)
+ return false;
+ }
+ }
- auto *StartS = dyn_cast<SCEVUnknown>(AddRec->getStart());
- if (!StartS)
+ if (!Base)
return false;
- assert(SE.isLoopInvariant(StartS, L) && "implied by addrec definition");
- Value *Base = StartS->getValue();
// For the moment, restrict ourselves to the case where the access size is a
// multiple of the requested alignment and the base is aligned.
@@ -653,7 +678,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA,
// Try to find an available value first, and delay expensive alias analysis
// queries until later.
- Value *Available = nullptr;;
+ Value *Available = nullptr;
SmallVector<Instruction *> MustNotAliasInsts;
for (Instruction &Inst : make_range(++Load->getReverseIterator(),
ScanBB->rend())) {
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 9e110567e98e..fd0e81c51ac8 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -43,6 +43,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -136,37 +137,37 @@ static cl::opt<unsigned> MaxForkedSCEVDepth(
cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
cl::init(5));
+static cl::opt<bool> SpeculateUnitStride(
+ "laa-speculate-unit-stride", cl::Hidden,
+ cl::desc("Speculate that non-constant strides are unit in LAA"),
+ cl::init(true));
+
bool VectorizerParams::isInterleaveForced() {
return ::VectorizationInterleave.getNumOccurrences() > 0;
}
-Value *llvm::stripIntegerCast(Value *V) {
- if (auto *CI = dyn_cast<CastInst>(V))
- if (CI->getOperand(0)->getType()->isIntegerTy())
- return CI->getOperand(0);
- return V;
-}
-
const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
- const ValueToValueMap &PtrToStride,
+ const DenseMap<Value *, const SCEV *> &PtrToStride,
Value *Ptr) {
const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
// If there is an entry in the map return the SCEV of the pointer with the
// symbolic stride replaced by one.
- ValueToValueMap::const_iterator SI = PtrToStride.find(Ptr);
+ DenseMap<Value *, const SCEV *>::const_iterator SI = PtrToStride.find(Ptr);
if (SI == PtrToStride.end())
// For a non-symbolic stride, just return the original expression.
return OrigSCEV;
- Value *StrideVal = stripIntegerCast(SI->second);
+ const SCEV *StrideSCEV = SI->second;
+ // Note: This assert is both overly strong and overly weak. The actual
+ // invariant here is that StrideSCEV should be loop invariant. The only
+ // such invariant strides we happen to speculate right now are unknowns
+ // and thus this is a reasonable proxy of the actual invariant.
+ assert(isa<SCEVUnknown>(StrideSCEV) && "shouldn't be in map");
ScalarEvolution *SE = PSE.getSE();
- const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
- const auto *CT =
- static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
-
- PSE.addPredicate(*SE->getEqualPredicate(U, CT));
+ const auto *CT = SE->getOne(StrideSCEV->getType());
+ PSE.addPredicate(*SE->getEqualPredicate(StrideSCEV, CT));
auto *Expr = PSE.getSCEV(Ptr);
LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
@@ -231,6 +232,9 @@ void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
}
}
+ assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");
+ assert(SE->isLoopInvariant(ScEnd, Lp)&& "ScEnd needs to be invariant");
+
// Add the size of the pointed element to ScEnd.
auto &DL = Lp->getHeader()->getModule()->getDataLayout();
Type *IdxTy = DL.getIndexType(Ptr->getType());
@@ -652,7 +656,7 @@ public:
/// the bounds of the pointer.
bool createCheckForAccess(RuntimePointerChecking &RtCheck,
MemAccessInfo Access, Type *AccessTy,
- const ValueToValueMap &Strides,
+ const DenseMap<Value *, const SCEV *> &Strides,
DenseMap<Value *, unsigned> &DepSetId,
Loop *TheLoop, unsigned &RunningDepId,
unsigned ASId, bool ShouldCheckStride, bool Assume);
@@ -663,7 +667,7 @@ public:
/// Returns true if we need no check or if we do and we can generate them
/// (i.e. the pointers have computable bounds).
bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
- Loop *TheLoop, const ValueToValueMap &Strides,
+ Loop *TheLoop, const DenseMap<Value *, const SCEV *> &Strides,
Value *&UncomputablePtr, bool ShouldCheckWrap = false);
/// Goes over all memory accesses, checks whether a RT check is needed
@@ -758,7 +762,7 @@ static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
/// Check whether a pointer address cannot wrap.
static bool isNoWrap(PredicatedScalarEvolution &PSE,
- const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy,
+ const DenseMap<Value *, const SCEV *> &Strides, Value *Ptr, Type *AccessTy,
Loop *L) {
const SCEV *PtrScev = PSE.getSCEV(Ptr);
if (PSE.getSE()->isLoopInvariant(PtrScev, L))
@@ -951,7 +955,7 @@ static void findForkedSCEVs(
static SmallVector<PointerIntPair<const SCEV *, 1, bool>>
findForkedPointer(PredicatedScalarEvolution &PSE,
- const ValueToValueMap &StridesMap, Value *Ptr,
+ const DenseMap<Value *, const SCEV *> &StridesMap, Value *Ptr,
const Loop *L) {
ScalarEvolution *SE = PSE.getSE();
assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
@@ -976,7 +980,7 @@ findForkedPointer(PredicatedScalarEvolution &PSE,
bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
MemAccessInfo Access, Type *AccessTy,
- const ValueToValueMap &StridesMap,
+ const DenseMap<Value *, const SCEV *> &StridesMap,
DenseMap<Value *, unsigned> &DepSetId,
Loop *TheLoop, unsigned &RunningDepId,
unsigned ASId, bool ShouldCheckWrap,
@@ -1037,7 +1041,7 @@ bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
ScalarEvolution *SE, Loop *TheLoop,
- const ValueToValueMap &StridesMap,
+ const DenseMap<Value *, const SCEV *> &StridesMap,
Value *&UncomputablePtr, bool ShouldCheckWrap) {
// Find pointers with computable bounds. We are going to use this information
// to place a runtime bound check.
@@ -1311,20 +1315,18 @@ void AccessAnalysis::processMemAccesses() {
}
}
-static bool isInBoundsGep(Value *Ptr) {
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
- return GEP->isInBounds();
- return false;
-}
-
/// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
/// i.e. monotonically increasing/decreasing.
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
PredicatedScalarEvolution &PSE, const Loop *L) {
+
// FIXME: This should probably only return true for NUW.
if (AR->getNoWrapFlags(SCEV::NoWrapMask))
return true;
+ if (PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
+ return true;
+
// Scalar evolution does not propagate the non-wrapping flags to values that
// are derived from a non-wrapping induction variable because non-wrapping
// could be flow-sensitive.
@@ -1369,7 +1371,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
Type *AccessTy, Value *Ptr,
const Loop *Lp,
- const ValueToValueMap &StridesMap,
+ const DenseMap<Value *, const SCEV *> &StridesMap,
bool Assume, bool ShouldCheckWrap) {
Type *Ty = Ptr->getType();
assert(Ty->isPointerTy() && "Unexpected non-ptr");
@@ -1399,35 +1401,6 @@ std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
return std::nullopt;
}
- // The address calculation must not wrap. Otherwise, a dependence could be
- // inverted.
- // An inbounds getelementptr that is a AddRec with a unit stride
- // cannot wrap per definition. The unit stride requirement is checked later.
- // An getelementptr without an inbounds attribute and unit stride would have
- // to access the pointer value "0" which is undefined behavior in address
- // space 0, therefore we can also vectorize this case.
- unsigned AddrSpace = Ty->getPointerAddressSpace();
- bool IsInBoundsGEP = isInBoundsGep(Ptr);
- bool IsNoWrapAddRec = !ShouldCheckWrap ||
- PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
- isNoWrapAddRec(Ptr, AR, PSE, Lp);
- if (!IsNoWrapAddRec && !IsInBoundsGEP &&
- NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace)) {
- if (Assume) {
- PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
- IsNoWrapAddRec = true;
- LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
- << "LAA: Pointer: " << *Ptr << "\n"
- << "LAA: SCEV: " << *AR << "\n"
- << "LAA: Added an overflow assumption\n");
- } else {
- LLVM_DEBUG(
- dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
- << *Ptr << " SCEV: " << *AR << "\n");
- return std::nullopt;
- }
- }
-
// Check the step is constant.
const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
@@ -1456,25 +1429,42 @@ std::optional<int64_t> llvm::getPtrStride(PredicatedScalarEvolution &PSE,
if (Rem)
return std::nullopt;
- // If the SCEV could wrap but we have an inbounds gep with a unit stride we
- // know we can't "wrap around the address space". In case of address space
- // zero we know that this won't happen without triggering undefined behavior.
- if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
- (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
- AddrSpace))) {
- if (Assume) {
- // We can avoid this case by adding a run-time check.
- LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
- << "inbounds or in address space 0 may wrap:\n"
- << "LAA: Pointer: " << *Ptr << "\n"
- << "LAA: SCEV: " << *AR << "\n"
- << "LAA: Added an overflow assumption\n");
- PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
- } else
- return std::nullopt;
- }
+ if (!ShouldCheckWrap)
+ return Stride;
+
+ // The address calculation must not wrap. Otherwise, a dependence could be
+ // inverted.
+ if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
+ return Stride;
- return Stride;
+ // An inbounds getelementptr that is a AddRec with a unit stride
+ // cannot wrap per definition. If it did, the result would be poison
+ // and any memory access dependent on it would be immediate UB
+ // when executed.
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ GEP && GEP->isInBounds() && (Stride == 1 || Stride == -1))
+ return Stride;
+
+ // If the null pointer is undefined, then a access sequence which would
+ // otherwise access it can be assumed not to unsigned wrap. Note that this
+ // assumes the object in memory is aligned to the natural alignment.
+ unsigned AddrSpace = Ty->getPointerAddressSpace();
+ if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace) &&
+ (Stride == 1 || Stride == -1))
+ return Stride;
+
+ if (Assume) {
+ PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
+ LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
+ << "LAA: Pointer: " << *Ptr << "\n"
+ << "LAA: SCEV: " << *AR << "\n"
+ << "LAA: Added an overflow assumption\n");
+ return Stride;
+ }
+ LLVM_DEBUG(
+ dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
+ << *Ptr << " SCEV: " << *AR << "\n");
+ return std::nullopt;
}
std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
@@ -1483,10 +1473,6 @@ std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
ScalarEvolution &SE, bool StrictCheck,
bool CheckType) {
assert(PtrA && PtrB && "Expected non-nullptr pointers.");
- assert(cast<PointerType>(PtrA->getType())
- ->isOpaqueOrPointeeTypeMatches(ElemTyA) && "Wrong PtrA type");
- assert(cast<PointerType>(PtrB->getType())
- ->isOpaqueOrPointeeTypeMatches(ElemTyB) && "Wrong PtrB type");
// Make sure that A and B are different pointers.
if (PtrA == PtrB)
@@ -1830,7 +1816,7 @@ static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
MemoryDepChecker::Dependence::DepType
MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
const MemAccessInfo &B, unsigned BIdx,
- const ValueToValueMap &Strides) {
+ const DenseMap<Value *, const SCEV *> &Strides) {
assert (AIdx < BIdx && "Must pass arguments in program order");
auto [APtr, AIsWrite] = A;
@@ -2024,7 +2010,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
MemAccessInfoList &CheckDeps,
- const ValueToValueMap &Strides) {
+ const DenseMap<Value *, const SCEV *> &Strides) {
MaxSafeDepDistBytes = -1;
SmallPtrSet<MemAccessInfo, 8> Visited;
@@ -2303,7 +2289,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
for (StoreInst *ST : Stores) {
Value *Ptr = ST->getPointerOperand();
- if (isUniform(Ptr)) {
+ if (isInvariant(Ptr)) {
// Record store instructions to loop invariant addresses
StoresToInvariantAddresses.push_back(ST);
HasDependenceInvolvingLoopInvariantAddress |=
@@ -2545,15 +2531,151 @@ OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
return *Report;
}
-bool LoopAccessInfo::isUniform(Value *V) const {
+bool LoopAccessInfo::isInvariant(Value *V) const {
auto *SE = PSE->getSE();
- // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
- // never considered uniform.
// TODO: Is this really what we want? Even without FP SCEV, we may want some
- // trivially loop-invariant FP values to be considered uniform.
+ // trivially loop-invariant FP values to be considered invariant.
if (!SE->isSCEVable(V->getType()))
return false;
- return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
+ const SCEV *S = SE->getSCEV(V);
+ return SE->isLoopInvariant(S, TheLoop);
+}
+
+/// Find the operand of the GEP that should be checked for consecutive
+/// stores. This ignores trailing indices that have no effect on the final
+/// pointer.
+static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
+ const DataLayout &DL = Gep->getModule()->getDataLayout();
+ unsigned LastOperand = Gep->getNumOperands() - 1;
+ TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
+
+ // Walk backwards and try to peel off zeros.
+ while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
+ // Find the type we're currently indexing into.
+ gep_type_iterator GEPTI = gep_type_begin(Gep);
+ std::advance(GEPTI, LastOperand - 2);
+
+ // If it's a type with the same allocation size as the result of the GEP we
+ // can peel off the zero index.
+ if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
+ break;
+ --LastOperand;
+ }
+
+ return LastOperand;
+}
+
+/// If the argument is a GEP, then returns the operand identified by
+/// getGEPInductionOperand. However, if there is some other non-loop-invariant
+/// operand, it returns that instead.
+static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+ if (!GEP)
+ return Ptr;
+
+ unsigned InductionOperand = getGEPInductionOperand(GEP);
+
+ // Check that all of the gep indices are uniform except for our induction
+ // operand.
+ for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
+ if (i != InductionOperand &&
+ !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
+ return Ptr;
+ return GEP->getOperand(InductionOperand);
+}
+
+/// If a value has only one user that is a CastInst, return it.
+static Value *getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
+ Value *UniqueCast = nullptr;
+ for (User *U : Ptr->users()) {
+ CastInst *CI = dyn_cast<CastInst>(U);
+ if (CI && CI->getType() == Ty) {
+ if (!UniqueCast)
+ UniqueCast = CI;
+ else
+ return nullptr;
+ }
+ }
+ return UniqueCast;
+}
+
+/// Get the stride of a pointer access in a loop. Looks for symbolic
+/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
+static const SCEV *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
+ auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
+ if (!PtrTy || PtrTy->isAggregateType())
+ return nullptr;
+
+ // Try to remove a gep instruction to make the pointer (actually index at this
+ // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
+ // pointer, otherwise, we are analyzing the index.
+ Value *OrigPtr = Ptr;
+
+ // The size of the pointer access.
+ int64_t PtrAccessSize = 1;
+
+ Ptr = stripGetElementPtr(Ptr, SE, Lp);
+ const SCEV *V = SE->getSCEV(Ptr);
+
+ if (Ptr != OrigPtr)
+ // Strip off casts.
+ while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V))
+ V = C->getOperand();
+
+ const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
+ if (!S)
+ return nullptr;
+
+ // If the pointer is invariant then there is no stride and it makes no
+ // sense to add it here.
+ if (Lp != S->getLoop())
+ return nullptr;
+
+ V = S->getStepRecurrence(*SE);
+ if (!V)
+ return nullptr;
+
+ // Strip off the size of access multiplication if we are still analyzing the
+ // pointer.
+ if (OrigPtr == Ptr) {
+ if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
+ if (M->getOperand(0)->getSCEVType() != scConstant)
+ return nullptr;
+
+ const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
+
+ // Huge step value - give up.
+ if (APStepVal.getBitWidth() > 64)
+ return nullptr;
+
+ int64_t StepVal = APStepVal.getSExtValue();
+ if (PtrAccessSize != StepVal)
+ return nullptr;
+ V = M->getOperand(1);
+ }
+ }
+
+ // Note that the restriction after this loop invariant check are only
+ // profitability restrictions.
+ if (!SE->isLoopInvariant(V, Lp))
+ return nullptr;
+
+ // Look for the loop invariant symbolic value.
+ const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
+ if (!U) {
+ const auto *C = dyn_cast<SCEVIntegralCastExpr>(V);
+ if (!C)
+ return nullptr;
+ U = dyn_cast<SCEVUnknown>(C->getOperand());
+ if (!U)
+ return nullptr;
+
+ // Match legacy behavior - this is not needed for correctness
+ if (!getUniqueCastUse(U->getValue(), Lp, V->getType()))
+ return nullptr;
+ }
+
+ return V;
}
void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
@@ -2561,13 +2683,24 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
if (!Ptr)
return;
- Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
- if (!Stride)
+ // Note: getStrideFromPointer is a *profitability* heuristic. We
+ // could broaden the scope of values returned here - to anything
+ // which happens to be loop invariant and contributes to the
+ // computation of an interesting IV - but we chose not to as we
+ // don't have a cost model here, and broadening the scope exposes
+ // far too many unprofitable cases.
+ const SCEV *StrideExpr = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
+ if (!StrideExpr)
return;
LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
"versioning:");
- LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
+ LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *StrideExpr << "\n");
+
+ if (!SpeculateUnitStride) {
+ LLVM_DEBUG(dbgs() << " Chose not to due to -laa-speculate-unit-stride\n");
+ return;
+ }
// Avoid adding the "Stride == 1" predicate when we know that
// Stride >= Trip-Count. Such a predicate will effectively optimize a single
@@ -2582,7 +2715,6 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
// of various possible stride specializations, considering the alternatives
// of using gather/scatters (if available).
- const SCEV *StrideExpr = PSE->getSCEV(Stride);
const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
// Match the types so we can compare the stride and the BETakenCount.
@@ -2611,8 +2743,12 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
}
LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
- SymbolicStrides[Ptr] = Stride;
- StrideSet.insert(Stride);
+ // Strip back off the integer cast, and check that our result is a
+ // SCEVUnknown as we expect.
+ const SCEV *StrideBase = StrideExpr;
+ if (const auto *C = dyn_cast<SCEVIntegralCastExpr>(StrideBase))
+ StrideBase = C->getOperand();
+ SymbolicStrides[Ptr] = cast<SCEVUnknown>(StrideBase);
}
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
@@ -2680,55 +2816,32 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
return *I.first->second;
}
-LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) {
- initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry());
-}
-
-bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) {
- auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
- auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- LAIs = std::make_unique<LoopAccessInfoManager>(SE, AA, DT, LI, TLI);
- return false;
-}
-
-void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
- AU.addRequiredTransitive<AAResultsWrapperPass>();
- AU.addRequiredTransitive<DominatorTreeWrapperPass>();
- AU.addRequiredTransitive<LoopInfoWrapperPass>();
+bool LoopAccessInfoManager::invalidate(
+ Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ // Check whether our analysis is preserved.
+ auto PAC = PA.getChecker<LoopAccessAnalysis>();
+ if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
+ // If not, give up now.
+ return true;
- AU.setPreservesAll();
+ // Check whether the analyses we depend on became invalid for any reason.
+ // Skip checking TargetLibraryAnalysis as it is immutable and can't become
+ // invalid.
+ return Inv.invalidate<AAManager>(F, PA) ||
+ Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
+ Inv.invalidate<LoopAnalysis>(F, PA) ||
+ Inv.invalidate<DominatorTreeAnalysis>(F, PA);
}
LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
- FunctionAnalysisManager &AM) {
- return LoopAccessInfoManager(
- AM.getResult<ScalarEvolutionAnalysis>(F), AM.getResult<AAManager>(F),
- AM.getResult<DominatorTreeAnalysis>(F), AM.getResult<LoopAnalysis>(F),
- &AM.getResult<TargetLibraryAnalysis>(F));
+ FunctionAnalysisManager &FAM) {
+ auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &AA = FAM.getResult<AAManager>(F);
+ auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
+ auto &LI = FAM.getResult<LoopAnalysis>(F);
+ auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+ return LoopAccessInfoManager(SE, AA, DT, LI, &TLI);
}
-char LoopAccessLegacyAnalysis::ID = 0;
-static const char laa_name[] = "Loop Access Analysis";
-#define LAA_NAME "loop-accesses"
-
-INITIALIZE_PASS_BEGIN(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
-
AnalysisKey LoopAccessAnalysis::Key;
-
-namespace llvm {
-
- Pass *createLAAPass() {
- return new LoopAccessLegacyAnalysis();
- }
-
-} // end namespace llvm
diff --git a/llvm/lib/Analysis/LoopCacheAnalysis.cpp b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
index 46198f78b643..c3a56639b5c8 100644
--- a/llvm/lib/Analysis/LoopCacheAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopCacheAnalysis.cpp
@@ -297,7 +297,7 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
Type *WiderType = SE.getWiderType(Stride->getType(), TripCount->getType());
const SCEV *CacheLineSize = SE.getConstant(WiderType, CLS);
Stride = SE.getNoopOrAnyExtend(Stride, WiderType);
- TripCount = SE.getNoopOrAnyExtend(TripCount, WiderType);
+ TripCount = SE.getNoopOrZeroExtend(TripCount, WiderType);
const SCEV *Numerator = SE.getMulExpr(Stride, TripCount);
RefCost = SE.getUDivExpr(Numerator, CacheLineSize);
@@ -323,8 +323,8 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
const SCEV *TripCount =
computeTripCount(*AR->getLoop(), *Sizes.back(), SE);
Type *WiderType = SE.getWiderType(RefCost->getType(), TripCount->getType());
- RefCost = SE.getMulExpr(SE.getNoopOrAnyExtend(RefCost, WiderType),
- SE.getNoopOrAnyExtend(TripCount, WiderType));
+ RefCost = SE.getMulExpr(SE.getNoopOrZeroExtend(RefCost, WiderType),
+ SE.getNoopOrZeroExtend(TripCount, WiderType));
}
LLVM_DEBUG(dbgs().indent(4)
@@ -334,7 +334,7 @@ CacheCostTy IndexedReference::computeRefCost(const Loop &L,
// Attempt to fold RefCost into a constant.
if (auto ConstantCost = dyn_cast<SCEVConstant>(RefCost))
- return ConstantCost->getValue()->getSExtValue();
+ return ConstantCost->getValue()->getZExtValue();
LLVM_DEBUG(dbgs().indent(4)
<< "RefCost is not a constant! Setting to RefCost=InvalidCost "
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 69bcbcb11203..60a72079e864 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/IVDescriptors.h"
-#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/Analysis/LoopIterator.h"
#include "llvm/Analysis/LoopNestAnalysis.h"
#include "llvm/Analysis/MemorySSA.h"
@@ -36,6 +35,7 @@
#include "llvm/IR/PrintPasses.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GenericLoopInfoImpl.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -737,7 +737,7 @@ void UnloopUpdater::updateBlockParents() {
bool Changed = FoundIB;
for (unsigned NIters = 0; Changed; ++NIters) {
assert(NIters < Unloop.getNumBlocks() && "runaway iterative algorithm");
- (void) NIters;
+ (void)NIters;
// Iterate over the postorder list of blocks, propagating the nearest loop
// from successors to predecessors as before.
@@ -929,9 +929,8 @@ void LoopInfo::erase(Loop *Unloop) {
}
}
-bool
-LoopInfo::wouldBeOutOfLoopUseRequiringLCSSA(const Value *V,
- const BasicBlock *ExitBB) const {
+bool LoopInfo::wouldBeOutOfLoopUseRequiringLCSSA(
+ const Value *V, const BasicBlock *ExitBB) const {
if (V->getType()->isTokenTy())
// We can't form PHIs of token type, so the definition of LCSSA excludes
// values of that type.
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index a20c05243b77..0660a9993b6d 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -18,10 +18,12 @@
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/InlineModelFeatureMaps.h"
+#include "llvm/Analysis/InteractiveModelRunner.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ReleaseModeModelRunner.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
@@ -30,19 +32,50 @@
using namespace llvm;
+static cl::opt<std::string> InteractiveChannelBaseName(
+ "inliner-interactive-channel-base", cl::Hidden,
+ cl::desc(
+ "Base file path for the interactive mode. The incoming filename should "
+ "have the name <inliner-interactive-channel-base>.in, while the "
+ "outgoing name should be <inliner-interactive-channel-base>.out"));
+static const std::string InclDefaultMsg =
+ (Twine("In interactive mode, also send the default policy decision: ") +
+ DefaultDecisionName + ".")
+ .str();
+static cl::opt<bool>
+ InteractiveIncludeDefault("inliner-interactive-include-default", cl::Hidden,
+ cl::desc(InclDefaultMsg));
+
#if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
-#include "llvm/Analysis/ReleaseModeModelRunner.h"
// codegen-ed file
#include "InlinerSizeModel.h" // NOLINT
+using CompiledModelType = llvm::InlinerSizeModel;
+#else
+using CompiledModelType = NoopSavedModelImpl;
+#endif
std::unique_ptr<InlineAdvisor>
-llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
- auto AOTRunner =
- std::make_unique<ReleaseModeModelRunner<llvm::InlinerSizeModel>>(
- M.getContext(), FeatureMap, DecisionName);
- return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner));
+llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM,
+ std::function<bool(CallBase &)> GetDefaultAdvice) {
+ if (!llvm::isEmbeddedModelEvaluatorValid<CompiledModelType>() &&
+ InteractiveChannelBaseName.empty())
+ return nullptr;
+ std::unique_ptr<MLModelRunner> AOTRunner;
+ if (InteractiveChannelBaseName.empty())
+ AOTRunner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
+ M.getContext(), FeatureMap, DecisionName);
+ else {
+ auto Features = FeatureMap;
+ if (InteractiveIncludeDefault)
+ Features.push_back(DefaultDecisionSpec);
+ AOTRunner = std::make_unique<InteractiveModelRunner>(
+ M.getContext(), Features, InlineDecisionSpec,
+ InteractiveChannelBaseName + ".out",
+ InteractiveChannelBaseName + ".in");
+ }
+ return std::make_unique<MLInlineAdvisor>(M, MAM, std::move(AOTRunner),
+ GetDefaultAdvice);
}
-#endif
#define DEBUG_TYPE "inline-ml"
@@ -59,21 +92,23 @@ static cl::opt<bool> KeepFPICache(
cl::init(false));
// clang-format off
-const std::array<TensorSpec, NumberOfFeatures> llvm::FeatureMap{
-#define POPULATE_NAMES(_, NAME) TensorSpec::createSpec<int64_t>(NAME, {1} ),
+const std::vector<TensorSpec> llvm::FeatureMap{
+#define POPULATE_NAMES(DTYPE, SHAPE, NAME, __) TensorSpec::createSpec<DTYPE>(#NAME, SHAPE),
// InlineCost features - these must come first
INLINE_COST_FEATURE_ITERATOR(POPULATE_NAMES)
-#undef POPULATE_NAMES
// Non-cost features
-#define POPULATE_NAMES(_, NAME, __) TensorSpec::createSpec<int64_t>(NAME, {1} ),
INLINE_FEATURE_ITERATOR(POPULATE_NAMES)
#undef POPULATE_NAMES
};
// clang-format on
const char *const llvm::DecisionName = "inlining_decision";
+const TensorSpec llvm::InlineDecisionSpec =
+ TensorSpec::createSpec<int64_t>(DecisionName, {1});
const char *const llvm::DefaultDecisionName = "inlining_default";
+const TensorSpec llvm::DefaultDecisionSpec =
+ TensorSpec::createSpec<int64_t>(DefaultDecisionName, {1});
const char *const llvm::RewardName = "delta_size";
CallBase *getInlinableCS(Instruction &I) {
@@ -86,15 +121,17 @@ CallBase *getInlinableCS(Instruction &I) {
return nullptr;
}
-MLInlineAdvisor::MLInlineAdvisor(Module &M, ModuleAnalysisManager &MAM,
- std::unique_ptr<MLModelRunner> Runner)
+MLInlineAdvisor::MLInlineAdvisor(
+ Module &M, ModuleAnalysisManager &MAM,
+ std::unique_ptr<MLModelRunner> Runner,
+ std::function<bool(CallBase &)> GetDefaultAdvice)
: InlineAdvisor(
M, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager()),
- ModelRunner(std::move(Runner)),
+ ModelRunner(std::move(Runner)), GetDefaultAdvice(GetDefaultAdvice),
CG(MAM.getResult<LazyCallGraphAnalysis>(M)),
InitialIRSize(getModuleIRSize()), CurrentIRSize(InitialIRSize) {
assert(ModelRunner);
-
+ ModelRunner->switchContext("");
// Extract the 'call site height' feature - the position of a call site
// relative to the farthest statically reachable SCC node. We don't mutate
// this value while inlining happens. Empirically, this feature proved
@@ -344,26 +381,27 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
auto &CallerBefore = getCachedFPI(Caller);
auto &CalleeBefore = getCachedFPI(Callee);
- *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeBasicBlockCount) =
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::callee_basic_block_count) =
CalleeBefore.BasicBlockCount;
- *ModelRunner->getTensor<int64_t>(FeatureIndex::CallSiteHeight) =
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::callsite_height) =
getInitialFunctionLevel(Caller);
- *ModelRunner->getTensor<int64_t>(FeatureIndex::NodeCount) = NodeCount;
- *ModelRunner->getTensor<int64_t>(FeatureIndex::NrCtantParams) = NrCtantParams;
- *ModelRunner->getTensor<int64_t>(FeatureIndex::EdgeCount) = EdgeCount;
- *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerUsers) =
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::node_count) = NodeCount;
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::nr_ctant_params) =
+ NrCtantParams;
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::edge_count) = EdgeCount;
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::caller_users) =
CallerBefore.Uses;
*ModelRunner->getTensor<int64_t>(
- FeatureIndex::CallerConditionallyExecutedBlocks) =
+ FeatureIndex::caller_conditionally_executed_blocks) =
CallerBefore.BlocksReachedFromConditionalInstruction;
- *ModelRunner->getTensor<int64_t>(FeatureIndex::CallerBasicBlockCount) =
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::caller_basic_block_count) =
CallerBefore.BasicBlockCount;
*ModelRunner->getTensor<int64_t>(
- FeatureIndex::CalleeConditionallyExecutedBlocks) =
+ FeatureIndex::callee_conditionally_executed_blocks) =
CalleeBefore.BlocksReachedFromConditionalInstruction;
- *ModelRunner->getTensor<int64_t>(FeatureIndex::CalleeUsers) =
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::callee_users) =
CalleeBefore.Uses;
- *ModelRunner->getTensor<int64_t>(FeatureIndex::CostEstimate) = CostEstimate;
+ *ModelRunner->getTensor<int64_t>(FeatureIndex::cost_estimate) = CostEstimate;
// Add the cost features
for (size_t I = 0;
@@ -371,7 +409,10 @@ std::unique_ptr<InlineAdvice> MLInlineAdvisor::getAdviceImpl(CallBase &CB) {
*ModelRunner->getTensor<int64_t>(inlineCostFeatureToMlFeature(
static_cast<InlineCostFeatureIndex>(I))) = CostFeatures->at(I);
}
-
+ // This one would have been set up to be right at the end.
+ if (!InteractiveChannelBaseName.empty() && InteractiveIncludeDefault)
+ *ModelRunner->getTensor<int64_t>(InlineCostFeatureIndex::NumberOfFeatures) =
+ GetDefaultAdvice(CB);
return getAdviceFromModel(CB, ORE);
}
diff --git a/llvm/lib/Analysis/MemDepPrinter.cpp b/llvm/lib/Analysis/MemDepPrinter.cpp
deleted file mode 100644
index 305ae3e2a992..000000000000
--- a/llvm/lib/Analysis/MemDepPrinter.cpp
+++ /dev/null
@@ -1,164 +0,0 @@
-//===- MemDepPrinter.cpp - Printer for MemoryDependenceAnalysis -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SetVector.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/MemoryDependenceAnalysis.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
- struct MemDepPrinter : public FunctionPass {
- const Function *F;
-
- enum DepType {
- Clobber = 0,
- Def,
- NonFuncLocal,
- Unknown
- };
-
- static const char *const DepTypeStr[];
-
- typedef PointerIntPair<const Instruction *, 2, DepType> InstTypePair;
- typedef std::pair<InstTypePair, const BasicBlock *> Dep;
- typedef SmallSetVector<Dep, 4> DepSet;
- typedef DenseMap<const Instruction *, DepSet> DepSetMap;
- DepSetMap Deps;
-
- static char ID; // Pass identifcation, replacement for typeid
- MemDepPrinter() : FunctionPass(ID) {
- initializeMemDepPrinterPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void print(raw_ostream &OS, const Module * = nullptr) const override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredTransitive<AAResultsWrapperPass>();
- AU.addRequiredTransitive<MemoryDependenceWrapperPass>();
- AU.setPreservesAll();
- }
-
- void releaseMemory() override {
- Deps.clear();
- F = nullptr;
- }
-
- private:
- static InstTypePair getInstTypePair(MemDepResult dep) {
- if (dep.isClobber())
- return InstTypePair(dep.getInst(), Clobber);
- if (dep.isDef())
- return InstTypePair(dep.getInst(), Def);
- if (dep.isNonFuncLocal())
- return InstTypePair(dep.getInst(), NonFuncLocal);
- assert(dep.isUnknown() && "unexpected dependence type");
- return InstTypePair(dep.getInst(), Unknown);
- }
- };
-}
-
-char MemDepPrinter::ID = 0;
-INITIALIZE_PASS_BEGIN(MemDepPrinter, "print-memdeps",
- "Print MemDeps of function", false, true)
-INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
-INITIALIZE_PASS_END(MemDepPrinter, "print-memdeps",
- "Print MemDeps of function", false, true)
-
-FunctionPass *llvm::createMemDepPrinter() {
- return new MemDepPrinter();
-}
-
-const char *const MemDepPrinter::DepTypeStr[]
- = {"Clobber", "Def", "NonFuncLocal", "Unknown"};
-
-bool MemDepPrinter::runOnFunction(Function &F) {
- this->F = &F;
- MemoryDependenceResults &MDA = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
-
- // All this code uses non-const interfaces because MemDep is not
- // const-friendly, though nothing is actually modified.
- for (auto &I : instructions(F)) {
- Instruction *Inst = &I;
-
- if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory())
- continue;
-
- MemDepResult Res = MDA.getDependency(Inst);
- if (!Res.isNonLocal()) {
- Deps[Inst].insert(std::make_pair(getInstTypePair(Res),
- static_cast<BasicBlock *>(nullptr)));
- } else if (auto *Call = dyn_cast<CallBase>(Inst)) {
- const MemoryDependenceResults::NonLocalDepInfo &NLDI =
- MDA.getNonLocalCallDependency(Call);
-
- DepSet &InstDeps = Deps[Inst];
- for (const NonLocalDepEntry &I : NLDI) {
- const MemDepResult &Res = I.getResult();
- InstDeps.insert(std::make_pair(getInstTypePair(Res), I.getBB()));
- }
- } else {
- SmallVector<NonLocalDepResult, 4> NLDI;
- assert( (isa<LoadInst>(Inst) || isa<StoreInst>(Inst) ||
- isa<VAArgInst>(Inst)) && "Unknown memory instruction!");
- MDA.getNonLocalPointerDependency(Inst, NLDI);
-
- DepSet &InstDeps = Deps[Inst];
- for (const NonLocalDepResult &I : NLDI) {
- const MemDepResult &Res = I.getResult();
- InstDeps.insert(std::make_pair(getInstTypePair(Res), I.getBB()));
- }
- }
- }
-
- return false;
-}
-
-void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
- for (const auto &I : instructions(*F)) {
- const Instruction *Inst = &I;
-
- DepSetMap::const_iterator DI = Deps.find(Inst);
- if (DI == Deps.end())
- continue;
-
- const DepSet &InstDeps = DI->second;
-
- for (const auto &I : InstDeps) {
- const Instruction *DepInst = I.first.getPointer();
- DepType type = I.first.getInt();
- const BasicBlock *DepBB = I.second;
-
- OS << " ";
- OS << DepTypeStr[type];
- if (DepBB) {
- OS << " in block ";
- DepBB->printAsOperand(OS, /*PrintType=*/false, M);
- }
- if (DepInst) {
- OS << " from: ";
- DepInst->print(OS);
- }
- OS << "\n";
- }
-
- Inst->print(OS);
- OS << "\n\n";
- }
-}
diff --git a/llvm/lib/Analysis/MemDerefPrinter.cpp b/llvm/lib/Analysis/MemDerefPrinter.cpp
index 4dd5c76cc604..2632bc50d6e6 100644
--- a/llvm/lib/Analysis/MemDerefPrinter.cpp
+++ b/llvm/lib/Analysis/MemDerefPrinter.cpp
@@ -18,65 +18,6 @@
using namespace llvm;
-namespace {
- struct MemDerefPrinter : public FunctionPass {
- SmallVector<Value *, 4> Deref;
- SmallPtrSet<Value *, 4> DerefAndAligned;
-
- static char ID; // Pass identification, replacement for typeid
- MemDerefPrinter() : FunctionPass(ID) {
- initializeMemDerefPrinterPass(*PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
- bool runOnFunction(Function &F) override;
- void print(raw_ostream &OS, const Module * = nullptr) const override;
- void releaseMemory() override {
- Deref.clear();
- DerefAndAligned.clear();
- }
- };
-}
-
-char MemDerefPrinter::ID = 0;
-INITIALIZE_PASS_BEGIN(MemDerefPrinter, "print-memderefs",
- "Memory Dereferenciblity of pointers in function", false, true)
-INITIALIZE_PASS_END(MemDerefPrinter, "print-memderefs",
- "Memory Dereferenciblity of pointers in function", false, true)
-
-FunctionPass *llvm::createMemDerefPrinter() {
- return new MemDerefPrinter();
-}
-
-bool MemDerefPrinter::runOnFunction(Function &F) {
- const DataLayout &DL = F.getParent()->getDataLayout();
- for (auto &I: instructions(F)) {
- if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
- Value *PO = LI->getPointerOperand();
- if (isDereferenceablePointer(PO, LI->getType(), DL))
- Deref.push_back(PO);
- if (isDereferenceableAndAlignedPointer(PO, LI->getType(), LI->getAlign(),
- DL))
- DerefAndAligned.insert(PO);
- }
- }
- return false;
-}
-
-void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const {
- OS << "The following are dereferenceable:\n";
- for (Value *V: Deref) {
- OS << " ";
- V->print(OS);
- if (DerefAndAligned.count(V))
- OS << "\t(aligned)";
- else
- OS << "\t(unaligned)";
- OS << "\n";
- }
-}
-
PreservedAnalyses MemDerefPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
OS << "Memory Dereferencibility of pointers in function '" << F.getName()
diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index 0edad0557369..53e089ba1fea 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -115,17 +115,25 @@ static const std::pair<LibFunc, AllocFnsTy> AllocationFnData[] = {
{LibFunc_ZnwjSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned int, align_val_t)
{LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned int, align_val_t, nothrow)
{LibFunc_Znwm, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long)
+ {LibFunc_Znwm12__hot_cold_t, {OpNewLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long, __hot_cold_t)
{LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long, nothrow)
+ {LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, {MallocLike, 3, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long, nothrow, __hot_cold_t)
{LibFunc_ZnwmSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t)
+ {LibFunc_ZnwmSt11align_val_t12__hot_cold_t, {OpNewLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t, __hot_cold_t)
{LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t, nothrow)
+ {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, {MallocLike, 4, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t, nothrow, __hot_cold_t)
{LibFunc_Znaj, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned int)
{LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned int, nothrow)
{LibFunc_ZnajSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned int, align_val_t)
{LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned int, align_val_t, nothrow)
{LibFunc_Znam, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned long)
+ {LibFunc_Znam12__hot_cold_t, {OpNewLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new[](unsigned long, __hot_cold_t)
{LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned long, nothrow)
+ {LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, {MallocLike, 3, 0, -1, -1, MallocFamily::CPPNew}}, // new[](unsigned long, nothrow, __hot_cold_t)
{LibFunc_ZnamSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned long, align_val_t)
+ {LibFunc_ZnamSt11align_val_t12__hot_cold_t, {OpNewLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new[](unsigned long, align_val_t, __hot_cold_t)
{LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned long, align_val_t, nothrow)
+ {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, {MallocLike, 4, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new[](unsigned long, align_val_t, nothrow, __hot_cold_t)
{LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned int)
{LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned int, nothrow)
{LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned long long)
@@ -594,10 +602,10 @@ Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
MustSucceed);
}
-Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
- const DataLayout &DL,
- const TargetLibraryInfo *TLI, AAResults *AA,
- bool MustSucceed) {
+Value *llvm::lowerObjectSizeCall(
+ IntrinsicInst *ObjectSize, const DataLayout &DL,
+ const TargetLibraryInfo *TLI, AAResults *AA, bool MustSucceed,
+ SmallVectorImpl<Instruction *> *InsertedInstructions) {
assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize &&
"ObjectSize must be a call to llvm.objectsize!");
@@ -632,7 +640,11 @@ Value *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
Eval.compute(ObjectSize->getArgOperand(0));
if (SizeOffsetPair != ObjectSizeOffsetEvaluator::unknown()) {
- IRBuilder<TargetFolder> Builder(Ctx, TargetFolder(DL));
+ IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
+ Ctx, TargetFolder(DL), IRBuilderCallbackInserter([&](Instruction *I) {
+ if (InsertedInstructions)
+ InsertedInstructions->push_back(I);
+ }));
Builder.SetInsertPoint(ObjectSize);
// If we've outside the end of the object, then we can always access
@@ -818,7 +830,9 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) {
}
SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){
- if (!GV.hasDefinitiveInitializer())
+ if (!GV.getValueType()->isSized() || GV.hasExternalWeakLinkage() ||
+ ((!GV.hasInitializer() || GV.isInterposable()) &&
+ Options.EvalMode != ObjectSizeOpts::Mode::Min))
return unknown();
APInt Size(IntTyBits, DL.getTypeAllocSize(GV.getValueType()));
@@ -976,6 +990,8 @@ SizeOffsetType ObjectSizeOffsetVisitor::combineSizeOffset(SizeOffsetType LHS,
}
SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PN) {
+ if (PN.getNumIncomingValues() == 0)
+ return unknown();
auto IncomingValues = PN.incoming_values();
return std::accumulate(IncomingValues.begin() + 1, IncomingValues.end(),
compute(*IncomingValues.begin()),
@@ -1099,12 +1115,13 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
// must be a VLA
assert(I.isArrayAllocation());
- // If needed, adjust the alloca's operand size to match the pointer size.
- // Subsequent math operations expect the types to match.
+ // If needed, adjust the alloca's operand size to match the pointer indexing
+ // size. Subsequent math operations expect the types to match.
Value *ArraySize = Builder.CreateZExtOrTrunc(
- I.getArraySize(), DL.getIntPtrType(I.getContext()));
+ I.getArraySize(),
+ DL.getIndexType(I.getContext(), DL.getAllocaAddrSpace()));
assert(ArraySize->getType() == Zero->getType() &&
- "Expected zero constant to have pointer type");
+ "Expected zero constant to have pointer index type");
Value *Size = ConstantInt::get(ArraySize->getType(),
DL.getTypeAllocSize(I.getAllocatedType()));
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 93c388abb0fd..071ecdba8a54 100644
--- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -1238,7 +1238,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// phi translation to change it into a value live in the predecessor block.
// If not, we just add the predecessors to the worklist and scan them with
// the same Pointer.
- if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
+ if (!Pointer.needsPHITranslationFromBlock(BB)) {
SkipFirstBlock = false;
SmallVector<BasicBlock *, 16> NewBlocks;
for (BasicBlock *Pred : PredCache.get(BB)) {
@@ -1277,7 +1277,7 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// We do need to do phi translation, if we know ahead of time we can't phi
// translate this value, don't even try.
- if (!Pointer.IsPotentiallyPHITranslatable())
+ if (!Pointer.isPotentiallyPHITranslatable())
goto PredTranslationFailure;
// We may have added values to the cache list before this PHI translation.
@@ -1298,8 +1298,8 @@ bool MemoryDependenceResults::getNonLocalPointerDepFromBB(
// Get the PHI translated pointer in this predecessor. This can fail if
// not translatable, in which case the getAddr() returns null.
PHITransAddr &PredPointer = PredList.back().second;
- PredPointer.PHITranslateValue(BB, Pred, &DT, /*MustDominate=*/false);
- Value *PredPtrVal = PredPointer.getAddr();
+ Value *PredPtrVal =
+ PredPointer.translateValue(BB, Pred, &DT, /*MustDominate=*/false);
// Check to see if we have already visited this pred block with another
// pointer. If so, we can't do this lookup. This failure can occur
diff --git a/llvm/lib/Analysis/MemoryLocation.cpp b/llvm/lib/Analysis/MemoryLocation.cpp
index e839f9e0dfb2..0404b32be848 100644
--- a/llvm/lib/Analysis/MemoryLocation.cpp
+++ b/llvm/lib/Analysis/MemoryLocation.cpp
@@ -257,7 +257,7 @@ MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,
case LibFunc_memset_chk:
assert(ArgIdx == 0 && "Invalid argument index for memset_chk");
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case LibFunc_memcpy_chk: {
assert((ArgIdx == 0 || ArgIdx == 1) &&
"Invalid argument index for memcpy_chk");
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index 8ced1d2fd140..7fbcffc6489d 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -18,26 +18,47 @@ using namespace llvm::memprof;
#define DEBUG_TYPE "memory-profile-info"
-// Upper bound on accesses per byte for marking an allocation cold.
-cl::opt<float> MemProfAccessesPerByteColdThreshold(
- "memprof-accesses-per-byte-cold-threshold", cl::init(10.0), cl::Hidden,
- cl::desc("The threshold the accesses per byte must be under to consider "
- "an allocation cold"));
+// Upper bound on lifetime access density (accesses per byte per lifetime sec)
+// for marking an allocation cold.
+cl::opt<float> MemProfLifetimeAccessDensityColdThreshold(
+ "memprof-lifetime-access-density-cold-threshold", cl::init(0.05),
+ cl::Hidden,
+ cl::desc("The threshold the lifetime access density (accesses per byte per "
+ "lifetime sec) must be under to consider an allocation cold"));
// Lower bound on lifetime to mark an allocation cold (in addition to accesses
-// per byte above). This is to avoid pessimizing short lived objects.
-cl::opt<unsigned> MemProfMinLifetimeColdThreshold(
- "memprof-min-lifetime-cold-threshold", cl::init(200), cl::Hidden,
- cl::desc("The minimum lifetime (s) for an allocation to be considered "
+// per byte per sec above). This is to avoid pessimizing short lived objects.
+cl::opt<unsigned> MemProfAveLifetimeColdThreshold(
+ "memprof-ave-lifetime-cold-threshold", cl::init(200), cl::Hidden,
+ cl::desc("The average lifetime (s) for an allocation to be considered "
"cold"));
-AllocationType llvm::memprof::getAllocType(uint64_t MaxAccessCount,
- uint64_t MinSize,
- uint64_t MinLifetime) {
- if (((float)MaxAccessCount) / MinSize < MemProfAccessesPerByteColdThreshold &&
- // MinLifetime is expected to be in ms, so convert the threshold to ms.
- MinLifetime >= MemProfMinLifetimeColdThreshold * 1000)
+// Lower bound on average lifetime accesses density (total life time access
+// density / alloc count) for marking an allocation hot.
+cl::opt<unsigned> MemProfMinAveLifetimeAccessDensityHotThreshold(
+ "memprof-min-ave-lifetime-access-density-hot-threshold", cl::init(1000),
+ cl::Hidden,
+ cl::desc("The minimum TotalLifetimeAccessDensity / AllocCount for an "
+ "allocation to be considered hot"));
+
+AllocationType llvm::memprof::getAllocType(uint64_t TotalLifetimeAccessDensity,
+ uint64_t AllocCount,
+ uint64_t TotalLifetime) {
+ // The access densities are multiplied by 100 to hold 2 decimal places of
+ // precision, so need to divide by 100.
+ if (((float)TotalLifetimeAccessDensity) / AllocCount / 100 <
+ MemProfLifetimeAccessDensityColdThreshold
+ // Lifetime is expected to be in ms, so convert the threshold to ms.
+ && ((float)TotalLifetime) / AllocCount >=
+ MemProfAveLifetimeColdThreshold * 1000)
return AllocationType::Cold;
+
+ // The access densities are multiplied by 100 to hold 2 decimal places of
+ // precision, so need to divide by 100.
+ if (((float)TotalLifetimeAccessDensity) / AllocCount / 100 >
+ MemProfMinAveLifetimeAccessDensityHotThreshold)
+ return AllocationType::Hot;
+
return AllocationType::NotCold;
}
@@ -65,12 +86,15 @@ AllocationType llvm::memprof::getMIBAllocType(const MDNode *MIB) {
// types that can be applied based on the allocation profile data.
auto *MDS = dyn_cast<MDString>(MIB->getOperand(1));
assert(MDS);
- if (MDS->getString().equals("cold"))
+ if (MDS->getString().equals("cold")) {
return AllocationType::Cold;
+ } else if (MDS->getString().equals("hot")) {
+ return AllocationType::Hot;
+ }
return AllocationType::NotCold;
}
-static std::string getAllocTypeAttributeString(AllocationType Type) {
+std::string llvm::memprof::getAllocTypeAttributeString(AllocationType Type) {
switch (Type) {
case AllocationType::NotCold:
return "notcold";
@@ -78,6 +102,9 @@ static std::string getAllocTypeAttributeString(AllocationType Type) {
case AllocationType::Cold:
return "cold";
break;
+ case AllocationType::Hot:
+ return "hot";
+ break;
default:
assert(false && "Unexpected alloc type");
}
@@ -91,7 +118,7 @@ static void addAllocTypeAttribute(LLVMContext &Ctx, CallBase *CI,
CI->addFnAttr(A);
}
-static bool hasSingleAllocType(uint8_t AllocTypes) {
+bool llvm::memprof::hasSingleAllocType(uint8_t AllocTypes) {
const unsigned NumAllocTypes = llvm::popcount(AllocTypes);
assert(NumAllocTypes != 0);
return NumAllocTypes == 1;
@@ -242,3 +269,9 @@ CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::operator*() {
assert(StackIdCInt);
return StackIdCInt->getZExtValue();
}
+
+template <> uint64_t CallStack<MDNode, MDNode::op_iterator>::back() const {
+ assert(N);
+ return mdconst::dyn_extract<ConstantInt>(N->operands().back())
+ ->getZExtValue();
+}
diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp
index aefb66863b8f..d16658028266 100644
--- a/llvm/lib/Analysis/MemorySSA.cpp
+++ b/llvm/lib/Analysis/MemorySSA.cpp
@@ -71,12 +71,6 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MemorySSAWrapperPass, "memoryssa", "Memory SSA", false,
true)
-INITIALIZE_PASS_BEGIN(MemorySSAPrinterLegacyPass, "print-memoryssa",
- "Memory SSA Printer", false, false)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa",
- "Memory SSA Printer", false, false)
-
static cl::opt<unsigned> MaxCheckLimit(
"memssa-check-limit", cl::Hidden, cl::init(100),
cl::desc("The maximum number of stores/phis MemorySSA"
@@ -304,7 +298,6 @@ instructionClobbersQuery(const MemoryDef *MD, const MemoryLocation &UseLoc,
case Intrinsic::experimental_noalias_scope_decl:
case Intrinsic::pseudoprobe:
return false;
- case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_label:
case Intrinsic::dbg_value:
@@ -371,7 +364,8 @@ struct UpwardsMemoryQuery {
} // end anonymous namespace
-static bool isUseTriviallyOptimizableToLiveOnEntry(BatchAAResults &AA,
+template <typename AliasAnalysisType>
+static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysisType &AA,
const Instruction *I) {
// If the memory can't be changed, then loads of the memory can't be
// clobbered.
@@ -1368,11 +1362,6 @@ void MemorySSA::OptimizeUses::optimizeUsesInBlock(
if (MU->isOptimized())
continue;
- if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) {
- MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true);
- continue;
- }
-
MemoryLocOrCall UseMLOC(MU);
auto &LocInfo = LocStackInfo[UseMLOC];
// If the pop epoch changed, it means we've removed stuff from top of
@@ -1788,10 +1777,15 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I,
return nullptr;
MemoryUseOrDef *MUD;
- if (Def)
+ if (Def) {
MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++);
- else
+ } else {
MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent());
+ if (isUseTriviallyOptimizableToLiveOnEntry(*AAP, I)) {
+ MemoryAccess *LiveOnEntry = getLiveOnEntryDef();
+ MUD->setOptimized(LiveOnEntry);
+ }
+ }
ValueToMemoryAccess[I] = MUD;
return MUD;
}
@@ -2220,17 +2214,6 @@ void MemoryAccess::dump() const {
#endif
}
-char MemorySSAPrinterLegacyPass::ID = 0;
-
-MemorySSAPrinterLegacyPass::MemorySSAPrinterLegacyPass() : FunctionPass(ID) {
- initializeMemorySSAPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
-}
-
-void MemorySSAPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesAll();
- AU.addRequired<MemorySSAWrapperPass>();
-}
-
class DOTFuncMSSAInfo {
private:
const Function &F;
@@ -2315,20 +2298,6 @@ struct DOTGraphTraits<DOTFuncMSSAInfo *> : public DefaultDOTGraphTraits {
} // namespace llvm
-bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) {
- auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
- MSSA.ensureOptimizedUses();
- if (DotCFGMSSA != "") {
- DOTFuncMSSAInfo CFGInfo(F, MSSA);
- WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA);
- } else
- MSSA.print(dbgs());
-
- if (VerifyMemorySSA)
- MSSA.verifyMemorySSA();
- return false;
-}
-
AnalysisKey MemorySSAAnalysis::Key;
MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
@@ -2350,7 +2319,8 @@ bool MemorySSAAnalysis::Result::invalidate(
PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
- MSSA.ensureOptimizedUses();
+ if (EnsureOptimizedUses)
+ MSSA.ensureOptimizedUses();
if (DotCFGMSSA != "") {
DOTFuncMSSAInfo CFGInfo(F, MSSA);
WriteGraph(&CFGInfo, "", false, "MSSA", DotCFGMSSA);
diff --git a/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 756f92e1aac4..919f8f5c01d6 100644
--- a/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -25,39 +25,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-namespace {
-class ModuleDebugInfoLegacyPrinter : public ModulePass {
- DebugInfoFinder Finder;
-
-public:
- static char ID; // Pass identification, replacement for typeid
- ModuleDebugInfoLegacyPrinter() : ModulePass(ID) {
- initializeModuleDebugInfoLegacyPrinterPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
- void print(raw_ostream &O, const Module *M) const override;
-};
-}
-
-char ModuleDebugInfoLegacyPrinter::ID = 0;
-INITIALIZE_PASS(ModuleDebugInfoLegacyPrinter, "module-debuginfo",
- "Decodes module-level debug info", false, true)
-
-ModulePass *llvm::createModuleDebugInfoPrinterPass() {
- return new ModuleDebugInfoLegacyPrinter();
-}
-
-bool ModuleDebugInfoLegacyPrinter::runOnModule(Module &M) {
- Finder.processModule(M);
- return false;
-}
-
static void printFile(raw_ostream &O, StringRef Filename, StringRef Directory,
unsigned Line = 0) {
if (Filename.empty())
@@ -132,11 +99,6 @@ static void printModuleDebugInfo(raw_ostream &O, const Module *M,
}
}
-void ModuleDebugInfoLegacyPrinter::print(raw_ostream &O,
- const Module *M) const {
- printModuleDebugInfo(O, M, Finder);
-}
-
ModuleDebugInfoPrinterPass::ModuleDebugInfoPrinterPass(raw_ostream &OS)
: OS(OS) {}
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 3dfa2d821e83..2076ed48ea34 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -80,6 +80,8 @@ static cl::opt<std::string> ModuleSummaryDotFile(
"module-summary-dot-file", cl::Hidden, cl::value_desc("filename"),
cl::desc("File to emit dot graph of new summary into"));
+extern cl::opt<bool> ScalePartialSampleProfileWorkingSetSize;
+
// Walk through the operands of a given User via worklist iteration and populate
// the set of GlobalValue references encountered. Invoked either on an
// Instruction or a GlobalVariable (which walks its initializer).
@@ -196,6 +198,7 @@ static void addIntrinsicToSummary(
break;
}
+ case Intrinsic::type_checked_load_relative:
case Intrinsic::type_checked_load: {
auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(2));
auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
@@ -263,7 +266,9 @@ static void computeFunctionSummary(
unsigned NumInsts = 0;
// Map from callee ValueId to profile count. Used to accumulate profile
// counts for all static calls to a given callee.
- MapVector<ValueInfo, CalleeInfo> CallGraphEdges;
+ MapVector<ValueInfo, CalleeInfo, DenseMap<ValueInfo, unsigned>,
+ std::vector<std::pair<ValueInfo, CalleeInfo>>>
+ CallGraphEdges;
SetVector<ValueInfo> RefEdges, LoadRefEdges, StoreRefEdges;
SetVector<GlobalValue::GUID> TypeTests;
SetVector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
@@ -282,6 +287,10 @@ static void computeFunctionSummary(
std::vector<CallsiteInfo> Callsites;
std::vector<AllocInfo> Allocs;
+#ifndef NDEBUG
+ DenseSet<const CallBase *> CallsThatMayHaveMemprofSummary;
+#endif
+
bool HasInlineAsmMaybeReferencingInternal = false;
bool HasIndirBranchToBlockAddress = false;
bool HasUnknownCall = false;
@@ -425,6 +434,10 @@ static void computeFunctionSummary(
.updateHotness(getHotness(Candidate.Count, PSI));
}
+ // Summarize memprof related metadata. This is only needed for ThinLTO.
+ if (!IsThinLTO)
+ continue;
+
// TODO: Skip indirect calls for now. Need to handle these better, likely
// by creating multiple Callsites, one per target, then speculatively
// devirtualize while applying clone info in the ThinLTO backends. This
@@ -435,6 +448,14 @@ static void computeFunctionSummary(
if (!CalledFunction)
continue;
+ // Ensure we keep this analysis in sync with the handling in the ThinLTO
+ // backend (see MemProfContextDisambiguation::applyImport). Save this call
+ // so that we can skip it in checking the reverse case later.
+ assert(mayHaveMemprofSummary(CB));
+#ifndef NDEBUG
+ CallsThatMayHaveMemprofSummary.insert(CB);
+#endif
+
// Compute the list of stack ids first (so we can trim them from the stack
// ids on any MIBs).
CallStack<MDNode, MDNode::op_iterator> InstCallsite(
@@ -477,7 +498,9 @@ static void computeFunctionSummary(
}
}
}
- Index.addBlockCount(F.size());
+
+ if (PSI->hasPartialSampleProfile() && ScalePartialSampleProfileWorkingSetSize)
+ Index.addBlockCount(F.size());
std::vector<ValueInfo> Refs;
if (IsThinLTO) {
@@ -542,6 +565,25 @@ static void computeFunctionSummary(
? CalleeInfo::HotnessType::Cold
: CalleeInfo::HotnessType::Critical);
+#ifndef NDEBUG
+ // Make sure that all calls we decided could not have memprof summaries get a
+ // false value for mayHaveMemprofSummary, to ensure that this handling remains
+ // in sync with the ThinLTO backend handling.
+ if (IsThinLTO) {
+ for (const BasicBlock &BB : F) {
+ for (const Instruction &I : BB) {
+ const auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ continue;
+ // We already checked these above.
+ if (CallsThatMayHaveMemprofSummary.count(CB))
+ continue;
+ assert(!mayHaveMemprofSummary(CB));
+ }
+ }
+ }
+#endif
+
bool NonRenamableLocal = isNonRenamableLocal(F);
bool NotEligibleForImport = NonRenamableLocal ||
HasInlineAsmMaybeReferencingInternal ||
@@ -583,12 +625,17 @@ static void findFuncPointers(const Constant *I, uint64_t StartingOffset,
VTableFuncList &VTableFuncs) {
// First check if this is a function pointer.
if (I->getType()->isPointerTy()) {
- auto Fn = dyn_cast<Function>(I->stripPointerCasts());
- // We can disregard __cxa_pure_virtual as a possible call target, as
- // calls to pure virtuals are UB.
- if (Fn && Fn->getName() != "__cxa_pure_virtual")
- VTableFuncs.push_back({Index.getOrInsertValueInfo(Fn), StartingOffset});
- return;
+ auto C = I->stripPointerCasts();
+ auto A = dyn_cast<GlobalAlias>(C);
+ if (isa<Function>(C) || (A && isa<Function>(A->getAliasee()))) {
+ auto GV = dyn_cast<GlobalValue>(C);
+ assert(GV);
+ // We can disregard __cxa_pure_virtual as a possible call target, as
+ // calls to pure virtuals are UB.
+ if (GV && GV->getName() != "__cxa_pure_virtual")
+ VTableFuncs.push_back({Index.getOrInsertValueInfo(GV), StartingOffset});
+ return;
+ }
}
// Walk through the elements in the constant struct or array and recursively
@@ -741,10 +788,14 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
std::function<const StackSafetyInfo *(const Function &F)> GetSSICallback) {
assert(PSI);
bool EnableSplitLTOUnit = false;
+ bool UnifiedLTO = false;
if (auto *MD = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("EnableSplitLTOUnit")))
EnableSplitLTOUnit = MD->getZExtValue();
- ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit);
+ if (auto *MD =
+ mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("UnifiedLTO")))
+ UnifiedLTO = MD->getZExtValue();
+ ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit, UnifiedLTO);
// Identify the local values in the llvm.used and llvm.compiler.used sets,
// which should not be exported as they would then require renaming and
@@ -1033,3 +1084,36 @@ ImmutablePass *llvm::createImmutableModuleSummaryIndexWrapperPass(
INITIALIZE_PASS(ImmutableModuleSummaryIndexWrapperPass, "module-summary-info",
"Module summary info", false, true)
+
+bool llvm::mayHaveMemprofSummary(const CallBase *CB) {
+ if (!CB)
+ return false;
+ if (CB->isDebugOrPseudoInst())
+ return false;
+ auto *CI = dyn_cast<CallInst>(CB);
+ auto *CalledValue = CB->getCalledOperand();
+ auto *CalledFunction = CB->getCalledFunction();
+ if (CalledValue && !CalledFunction) {
+ CalledValue = CalledValue->stripPointerCasts();
+ // Stripping pointer casts can reveal a called function.
+ CalledFunction = dyn_cast<Function>(CalledValue);
+ }
+ // Check if this is an alias to a function. If so, get the
+ // called aliasee for the checks below.
+ if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
+ assert(!CalledFunction &&
+ "Expected null called function in callsite for alias");
+ CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
+ }
+ // Check if this is a direct call to a known function or a known
+ // intrinsic, or an indirect call with profile data.
+ if (CalledFunction) {
+ if (CI && CalledFunction->isIntrinsic())
+ return false;
+ } else {
+ // TODO: For now skip indirect calls. See comments in
+ // computeFunctionSummary for what is needed to handle this.
+ return false;
+ }
+ return true;
+}
diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp
index 2f68996e1c60..d4b31f2b0018 100644
--- a/llvm/lib/Analysis/MustExecute.cpp
+++ b/llvm/lib/Analysis/MustExecute.cpp
@@ -309,101 +309,6 @@ bool ICFLoopSafetyInfo::doesNotWriteMemoryBefore(const Instruction &I,
doesNotWriteMemoryBefore(BB, CurLoop);
}
-namespace {
-struct MustExecutePrinter : public FunctionPass {
-
- static char ID; // Pass identification, replacement for typeid
- MustExecutePrinter() : FunctionPass(ID) {
- initializeMustExecutePrinterPass(*PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- }
- bool runOnFunction(Function &F) override;
-};
-struct MustBeExecutedContextPrinter : public ModulePass {
- static char ID;
-
- MustBeExecutedContextPrinter() : ModulePass(ID) {
- initializeMustBeExecutedContextPrinterPass(
- *PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
- bool runOnModule(Module &M) override;
-};
-}
-
-char MustExecutePrinter::ID = 0;
-INITIALIZE_PASS_BEGIN(MustExecutePrinter, "print-mustexecute",
- "Instructions which execute on loop entry", false, true)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(MustExecutePrinter, "print-mustexecute",
- "Instructions which execute on loop entry", false, true)
-
-FunctionPass *llvm::createMustExecutePrinter() {
- return new MustExecutePrinter();
-}
-
-char MustBeExecutedContextPrinter::ID = 0;
-INITIALIZE_PASS_BEGIN(MustBeExecutedContextPrinter,
- "print-must-be-executed-contexts",
- "print the must-be-executed-context for all instructions",
- false, true)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(MustBeExecutedContextPrinter,
- "print-must-be-executed-contexts",
- "print the must-be-executed-context for all instructions",
- false, true)
-
-ModulePass *llvm::createMustBeExecutedContextPrinter() {
- return new MustBeExecutedContextPrinter();
-}
-
-bool MustBeExecutedContextPrinter::runOnModule(Module &M) {
- // We provide non-PM analysis here because the old PM doesn't like to query
- // function passes from a module pass.
- SmallVector<std::unique_ptr<PostDominatorTree>, 8> PDTs;
- SmallVector<std::unique_ptr<DominatorTree>, 8> DTs;
- SmallVector<std::unique_ptr<LoopInfo>, 8> LIs;
-
- GetterTy<LoopInfo> LIGetter = [&](const Function &F) {
- DTs.push_back(std::make_unique<DominatorTree>(const_cast<Function &>(F)));
- LIs.push_back(std::make_unique<LoopInfo>(*DTs.back()));
- return LIs.back().get();
- };
- GetterTy<DominatorTree> DTGetter = [&](const Function &F) {
- DTs.push_back(std::make_unique<DominatorTree>(const_cast<Function&>(F)));
- return DTs.back().get();
- };
- GetterTy<PostDominatorTree> PDTGetter = [&](const Function &F) {
- PDTs.push_back(
- std::make_unique<PostDominatorTree>(const_cast<Function &>(F)));
- return PDTs.back().get();
- };
- MustBeExecutedContextExplorer Explorer(
- /* ExploreInterBlock */ true,
- /* ExploreCFGForward */ true,
- /* ExploreCFGBackward */ true, LIGetter, DTGetter, PDTGetter);
-
- for (Function &F : M) {
- for (Instruction &I : instructions(F)) {
- dbgs() << "-- Explore context of: " << I << "\n";
- for (const Instruction *CI : Explorer.range(&I))
- dbgs() << " [F: " << CI->getFunction()->getName() << "] " << *CI
- << "\n";
- }
- }
-
- return false;
-}
-
static bool isMustExecuteIn(const Instruction &I, Loop *L, DominatorTree *DT) {
// TODO: merge these two routines. For the moment, we display the best
// result obtained by *either* implementation. This is a bit unfair since no
@@ -467,16 +372,6 @@ public:
};
} // namespace
-bool MustExecutePrinter::runOnFunction(Function &F) {
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- MustExecuteAnnotatedWriter Writer(F, DT, LI);
- F.print(dbgs(), &Writer);
-
- return false;
-}
-
/// Return true if \p L might be an endless loop.
static bool maybeEndlessLoop(const Loop &L) {
if (L.getHeader()->getParent()->hasFnAttribute(Attribute::WillReturn))
diff --git a/llvm/lib/Analysis/PHITransAddr.cpp b/llvm/lib/Analysis/PHITransAddr.cpp
index 1262530ae642..5700fd664a4c 100644
--- a/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/llvm/lib/Analysis/PHITransAddr.cpp
@@ -25,13 +25,8 @@ static cl::opt<bool> EnableAddPhiTranslation(
"gvn-add-phi-translation", cl::init(false), cl::Hidden,
cl::desc("Enable phi-translation of add instructions"));
-static bool CanPHITrans(Instruction *Inst) {
- if (isa<PHINode>(Inst) ||
- isa<GetElementPtrInst>(Inst))
- return true;
-
- if (isa<CastInst>(Inst) &&
- isSafeToSpeculativelyExecute(Inst))
+static bool canPHITrans(Instruction *Inst) {
+ if (isa<PHINode>(Inst) || isa<GetElementPtrInst>(Inst) || isa<CastInst>(Inst))
return true;
if (Inst->getOpcode() == Instruction::Add &&
@@ -53,47 +48,42 @@ LLVM_DUMP_METHOD void PHITransAddr::dump() const {
}
#endif
-
-static bool VerifySubExpr(Value *Expr,
- SmallVectorImpl<Instruction*> &InstInputs) {
+static bool verifySubExpr(Value *Expr,
+ SmallVectorImpl<Instruction *> &InstInputs) {
// If this is a non-instruction value, there is nothing to do.
Instruction *I = dyn_cast<Instruction>(Expr);
if (!I) return true;
// If it's an instruction, it is either in Tmp or its operands recursively
// are.
- SmallVectorImpl<Instruction *>::iterator Entry = find(InstInputs, I);
- if (Entry != InstInputs.end()) {
+ if (auto Entry = find(InstInputs, I); Entry != InstInputs.end()) {
InstInputs.erase(Entry);
return true;
}
// If it isn't in the InstInputs list it is a subexpr incorporated into the
// address. Validate that it is phi translatable.
- if (!CanPHITrans(I)) {
+ if (!canPHITrans(I)) {
errs() << "Instruction in PHITransAddr is not phi-translatable:\n";
errs() << *I << '\n';
llvm_unreachable("Either something is missing from InstInputs or "
- "CanPHITrans is wrong.");
+ "canPHITrans is wrong.");
}
// Validate the operands of the instruction.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (!VerifySubExpr(I->getOperand(i), InstInputs))
- return false;
-
- return true;
+ return all_of(I->operands(),
+ [&](Value *Op) { return verifySubExpr(Op, InstInputs); });
}
-/// Verify - Check internal consistency of this data structure. If the
+/// verify - Check internal consistency of this data structure. If the
/// structure is valid, it returns true. If invalid, it prints errors and
/// returns false.
-bool PHITransAddr::Verify() const {
+bool PHITransAddr::verify() const {
if (!Addr) return true;
SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());
- if (!VerifySubExpr(Addr, Tmp))
+ if (!verifySubExpr(Addr, Tmp))
return false;
if (!Tmp.empty()) {
@@ -107,26 +97,23 @@ bool PHITransAddr::Verify() const {
return true;
}
-
-/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
+/// isPotentiallyPHITranslatable - If this needs PHI translation, return true
/// if we have some hope of doing it. This should be used as a filter to
/// avoid calling PHITranslateValue in hopeless situations.
-bool PHITransAddr::IsPotentiallyPHITranslatable() const {
+bool PHITransAddr::isPotentiallyPHITranslatable() const {
// If the input value is not an instruction, or if it is not defined in CurBB,
// then we don't need to phi translate it.
Instruction *Inst = dyn_cast<Instruction>(Addr);
- return !Inst || CanPHITrans(Inst);
+ return !Inst || canPHITrans(Inst);
}
-
static void RemoveInstInputs(Value *V,
SmallVectorImpl<Instruction*> &InstInputs) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return;
// If the instruction is in the InstInputs list, remove it.
- SmallVectorImpl<Instruction *>::iterator Entry = find(InstInputs, I);
- if (Entry != InstInputs.end()) {
+ if (auto Entry = find(InstInputs, I); Entry != InstInputs.end()) {
InstInputs.erase(Entry);
return;
}
@@ -134,15 +121,14 @@ static void RemoveInstInputs(Value *V,
assert(!isa<PHINode>(I) && "Error, removing something that isn't an input");
// Otherwise, it must have instruction inputs itself. Zap them recursively.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
- RemoveInstInputs(Op, InstInputs);
- }
+ for (Value *Op : I->operands())
+ if (Instruction *OpInst = dyn_cast<Instruction>(Op))
+ RemoveInstInputs(OpInst, InstInputs);
}
-Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
- BasicBlock *PredBB,
- const DominatorTree *DT) {
+Value *PHITransAddr::translateSubExpr(Value *V, BasicBlock *CurBB,
+ BasicBlock *PredBB,
+ const DominatorTree *DT) {
// If this is a non-instruction value, it can't require PHI translation.
Instruction *Inst = dyn_cast<Instruction>(V);
if (!Inst) return V;
@@ -166,18 +152,17 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// If this is a PHI, go ahead and translate it.
if (PHINode *PN = dyn_cast<PHINode>(Inst))
- return AddAsInput(PN->getIncomingValueForBlock(PredBB));
+ return addAsInput(PN->getIncomingValueForBlock(PredBB));
// If this is a non-phi value, and it is analyzable, we can incorporate it
// into the expression by making all instruction operands be inputs.
- if (!CanPHITrans(Inst))
+ if (!canPHITrans(Inst))
return nullptr;
// All instruction operands are now inputs (and of course, they may also be
// defined in this block, so they may need to be phi translated themselves.
- for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
- if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i)))
- InstInputs.push_back(Op);
+ for (Value *Op : Inst->operands())
+ addAsInput(Op);
}
// Ok, it must be an intermediate result (either because it started that way
@@ -185,18 +170,19 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// operands need to be phi translated, and if so, reconstruct it.
if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
- if (!isSafeToSpeculativelyExecute(Cast)) return nullptr;
- Value *PHIIn = PHITranslateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
+ Value *PHIIn = translateSubExpr(Cast->getOperand(0), CurBB, PredBB, DT);
if (!PHIIn) return nullptr;
if (PHIIn == Cast->getOperand(0))
return Cast;
// Find an available version of this cast.
- // Constants are trivial to find.
- if (Constant *C = dyn_cast<Constant>(PHIIn))
- return AddAsInput(ConstantExpr::getCast(Cast->getOpcode(),
- C, Cast->getType()));
+ // Try to simplify cast first.
+ if (Value *V = simplifyCastInst(Cast->getOpcode(), PHIIn, Cast->getType(),
+ {DL, TLI, DT, AC})) {
+ RemoveInstInputs(PHIIn, InstInputs);
+ return addAsInput(V);
+ }
// Otherwise we have to see if a casted version of the incoming pointer
// is available. If so, we can use it, otherwise we have to fail.
@@ -214,11 +200,11 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
SmallVector<Value*, 8> GEPOps;
bool AnyChanged = false;
- for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
- Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
+ for (Value *Op : GEP->operands()) {
+ Value *GEPOp = translateSubExpr(Op, CurBB, PredBB, DT);
if (!GEPOp) return nullptr;
- AnyChanged |= GEPOp != GEP->getOperand(i);
+ AnyChanged |= GEPOp != Op;
GEPOps.push_back(GEPOp);
}
@@ -232,7 +218,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
RemoveInstInputs(GEPOps[i], InstInputs);
- return AddAsInput(V);
+ return addAsInput(V);
}
// Scan to see if we have this GEP available.
@@ -259,7 +245,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
- Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
+ Value *LHS = translateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
if (!LHS) return nullptr;
// If the PHI translated LHS is an add of a constant, fold the immediates.
@@ -273,7 +259,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// If the old 'LHS' was an input, add the new 'LHS' as an input.
if (is_contained(InstInputs, BOp)) {
RemoveInstInputs(BOp, InstInputs);
- AddAsInput(LHS);
+ addAsInput(LHS);
}
}
@@ -282,7 +268,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// If we simplified the operands, the LHS is no longer an input, but Res
// is.
RemoveInstInputs(LHS, InstInputs);
- return AddAsInput(Res);
+ return addAsInput(Res);
}
// If we didn't modify the add, just return it.
@@ -306,21 +292,19 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
return nullptr;
}
-
/// PHITranslateValue - PHI translate the current address up the CFG from
/// CurBB to Pred, updating our state to reflect any needed changes. If
-/// 'MustDominate' is true, the translated value must dominate
-/// PredBB. This returns true on failure and sets Addr to null.
-bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
- const DominatorTree *DT,
- bool MustDominate) {
+/// 'MustDominate' is true, the translated value must dominate PredBB.
+Value *PHITransAddr::translateValue(BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree *DT,
+ bool MustDominate) {
assert(DT || !MustDominate);
- assert(Verify() && "Invalid PHITransAddr!");
+ assert(verify() && "Invalid PHITransAddr!");
if (DT && DT->isReachableFromEntry(PredBB))
- Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT);
+ Addr = translateSubExpr(Addr, CurBB, PredBB, DT);
else
Addr = nullptr;
- assert(Verify() && "Invalid PHITransAddr!");
+ assert(verify() && "Invalid PHITransAddr!");
if (MustDominate)
// Make sure the value is live in the predecessor.
@@ -328,7 +312,7 @@ bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
if (!DT->dominates(Inst->getParent(), PredBB))
Addr = nullptr;
- return Addr == nullptr;
+ return Addr;
}
/// PHITranslateWithInsertion - PHI translate this value into the specified
@@ -338,14 +322,14 @@ bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
/// All newly created instructions are added to the NewInsts list. This
/// returns null on failure.
///
-Value *PHITransAddr::
-PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
- const DominatorTree &DT,
- SmallVectorImpl<Instruction*> &NewInsts) {
+Value *
+PHITransAddr::translateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree &DT,
+ SmallVectorImpl<Instruction *> &NewInsts) {
unsigned NISize = NewInsts.size();
// Attempt to PHI translate with insertion.
- Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts);
+ Addr = insertTranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts);
// If successful, return the new value.
if (Addr) return Addr;
@@ -356,21 +340,20 @@ PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
return nullptr;
}
-
-/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
+/// insertTranslatedSubExpr - Insert a computation of the PHI translated
/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
/// block. All newly created instructions are added to the NewInsts list.
/// This returns null on failure.
///
-Value *PHITransAddr::
-InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
- BasicBlock *PredBB, const DominatorTree &DT,
- SmallVectorImpl<Instruction*> &NewInsts) {
+Value *PHITransAddr::insertTranslatedSubExpr(
+ Value *InVal, BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree &DT, SmallVectorImpl<Instruction *> &NewInsts) {
// See if we have a version of this value already available and dominating
// PredBB. If so, there is no need to insert a new instance of it.
PHITransAddr Tmp(InVal, DL, AC);
- if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT, /*MustDominate=*/true))
- return Tmp.getAddr();
+ if (Value *Addr =
+ Tmp.translateValue(CurBB, PredBB, &DT, /*MustDominate=*/true))
+ return Addr;
// We don't need to PHI translate values which aren't instructions.
auto *Inst = dyn_cast<Instruction>(InVal);
@@ -379,9 +362,8 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
// Handle cast of PHI translatable value.
if (CastInst *Cast = dyn_cast<CastInst>(Inst)) {
- if (!isSafeToSpeculativelyExecute(Cast)) return nullptr;
- Value *OpVal = InsertPHITranslatedSubExpr(Cast->getOperand(0),
- CurBB, PredBB, DT, NewInsts);
+ Value *OpVal = insertTranslatedSubExpr(Cast->getOperand(0), CurBB, PredBB,
+ DT, NewInsts);
if (!OpVal) return nullptr;
// Otherwise insert a cast at the end of PredBB.
@@ -397,9 +379,8 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
SmallVector<Value*, 8> GEPOps;
BasicBlock *CurBB = GEP->getParent();
- for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
- Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i),
- CurBB, PredBB, DT, NewInsts);
+ for (Value *Op : GEP->operands()) {
+ Value *OpVal = insertTranslatedSubExpr(Op, CurBB, PredBB, DT, NewInsts);
if (!OpVal) return nullptr;
GEPOps.push_back(OpVal);
}
@@ -422,8 +403,8 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
// This needs to be evaluated carefully to consider its cost trade offs.
// PHI translate the LHS.
- Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
- CurBB, PredBB, DT, NewInsts);
+ Value *OpVal = insertTranslatedSubExpr(Inst->getOperand(0), CurBB, PredBB,
+ DT, NewInsts);
if (OpVal == nullptr)
return nullptr;
diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 6b9f15bf2f64..203f1e42733f 100644
--- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -95,129 +95,11 @@ std::optional<uint64_t> ProfileSummaryInfo::getProfileCount(
return std::nullopt;
}
-/// Returns true if the function's entry is hot. If it returns false, it
-/// either means it is not hot or it is unknown whether it is hot or not (for
-/// example, no profile data is available).
-bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) const {
- if (!F || !hasProfileSummary())
- return false;
- auto FunctionCount = F->getEntryCount();
- // FIXME: The heuristic used below for determining hotness is based on
- // preliminary SPEC tuning for inliner. This will eventually be a
- // convenience method that calls isHotCount.
- return FunctionCount && isHotCount(FunctionCount->getCount());
-}
-
-/// Returns true if the function contains hot code. This can include a hot
-/// function entry count, hot basic block, or (in the case of Sample PGO)
-/// hot total call edge count.
-/// If it returns false, it either means it is not hot or it is unknown
-/// (for example, no profile data is available).
-bool ProfileSummaryInfo::isFunctionHotInCallGraph(
- const Function *F, BlockFrequencyInfo &BFI) const {
- if (!F || !hasProfileSummary())
- return false;
- if (auto FunctionCount = F->getEntryCount())
- if (isHotCount(FunctionCount->getCount()))
- return true;
-
- if (hasSampleProfile()) {
- uint64_t TotalCallCount = 0;
- for (const auto &BB : *F)
- for (const auto &I : BB)
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr))
- TotalCallCount += *CallCount;
- if (isHotCount(TotalCallCount))
- return true;
- }
- for (const auto &BB : *F)
- if (isHotBlock(&BB, &BFI))
- return true;
- return false;
-}
-
-/// Returns true if the function only contains cold code. This means that
-/// the function entry and blocks are all cold, and (in the case of Sample PGO)
-/// the total call edge count is cold.
-/// If it returns false, it either means it is not cold or it is unknown
-/// (for example, no profile data is available).
-bool ProfileSummaryInfo::isFunctionColdInCallGraph(
- const Function *F, BlockFrequencyInfo &BFI) const {
- if (!F || !hasProfileSummary())
- return false;
- if (auto FunctionCount = F->getEntryCount())
- if (!isColdCount(FunctionCount->getCount()))
- return false;
-
- if (hasSampleProfile()) {
- uint64_t TotalCallCount = 0;
- for (const auto &BB : *F)
- for (const auto &I : BB)
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr))
- TotalCallCount += *CallCount;
- if (!isColdCount(TotalCallCount))
- return false;
- }
- for (const auto &BB : *F)
- if (!isColdBlock(&BB, &BFI))
- return false;
- return true;
-}
-
bool ProfileSummaryInfo::isFunctionHotnessUnknown(const Function &F) const {
assert(hasPartialSampleProfile() && "Expect partial sample profile");
return !F.getEntryCount();
}
-template <bool isHot>
-bool ProfileSummaryInfo::isFunctionHotOrColdInCallGraphNthPercentile(
- int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const {
- if (!F || !hasProfileSummary())
- return false;
- if (auto FunctionCount = F->getEntryCount()) {
- if (isHot &&
- isHotCountNthPercentile(PercentileCutoff, FunctionCount->getCount()))
- return true;
- if (!isHot &&
- !isColdCountNthPercentile(PercentileCutoff, FunctionCount->getCount()))
- return false;
- }
- if (hasSampleProfile()) {
- uint64_t TotalCallCount = 0;
- for (const auto &BB : *F)
- for (const auto &I : BB)
- if (isa<CallInst>(I) || isa<InvokeInst>(I))
- if (auto CallCount = getProfileCount(cast<CallBase>(I), nullptr))
- TotalCallCount += *CallCount;
- if (isHot && isHotCountNthPercentile(PercentileCutoff, TotalCallCount))
- return true;
- if (!isHot && !isColdCountNthPercentile(PercentileCutoff, TotalCallCount))
- return false;
- }
- for (const auto &BB : *F) {
- if (isHot && isHotBlockNthPercentile(PercentileCutoff, &BB, &BFI))
- return true;
- if (!isHot && !isColdBlockNthPercentile(PercentileCutoff, &BB, &BFI))
- return false;
- }
- return !isHot;
-}
-
-// Like isFunctionHotInCallGraph but for a given cutoff.
-bool ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile(
- int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const {
- return isFunctionHotOrColdInCallGraphNthPercentile<true>(
- PercentileCutoff, F, BFI);
-}
-
-bool ProfileSummaryInfo::isFunctionColdInCallGraphNthPercentile(
- int PercentileCutoff, const Function *F, BlockFrequencyInfo &BFI) const {
- return isFunctionHotOrColdInCallGraphNthPercentile<false>(
- PercentileCutoff, F, BFI);
-}
-
/// Returns true if the function's entry is a cold. If it returns false, it
/// either means it is not cold or it is unknown whether it is cold or not (for
/// example, no profile data is available).
@@ -325,38 +207,6 @@ uint64_t ProfileSummaryInfo::getOrCompColdCountThreshold() const {
return ColdCountThreshold.value_or(0);
}
-bool ProfileSummaryInfo::isHotBlock(const BasicBlock *BB,
- BlockFrequencyInfo *BFI) const {
- auto Count = BFI->getBlockProfileCount(BB);
- return Count && isHotCount(*Count);
-}
-
-bool ProfileSummaryInfo::isColdBlock(const BasicBlock *BB,
- BlockFrequencyInfo *BFI) const {
- auto Count = BFI->getBlockProfileCount(BB);
- return Count && isColdCount(*Count);
-}
-
-template <bool isHot>
-bool ProfileSummaryInfo::isHotOrColdBlockNthPercentile(
- int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const {
- auto Count = BFI->getBlockProfileCount(BB);
- if (isHot)
- return Count && isHotCountNthPercentile(PercentileCutoff, *Count);
- else
- return Count && isColdCountNthPercentile(PercentileCutoff, *Count);
-}
-
-bool ProfileSummaryInfo::isHotBlockNthPercentile(
- int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const {
- return isHotOrColdBlockNthPercentile<true>(PercentileCutoff, BB, BFI);
-}
-
-bool ProfileSummaryInfo::isColdBlockNthPercentile(
- int PercentileCutoff, const BasicBlock *BB, BlockFrequencyInfo *BFI) const {
- return isHotOrColdBlockNthPercentile<false>(PercentileCutoff, BB, BFI);
-}
-
bool ProfileSummaryInfo::isHotCallSite(const CallBase &CB,
BlockFrequencyInfo *BFI) const {
auto C = getProfileCount(CB, BFI);
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 8c62fc37c4a3..111d4d30aab9 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -71,11 +71,13 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -134,10 +136,10 @@ using namespace PatternMatch;
#define DEBUG_TYPE "scalar-evolution"
-STATISTIC(NumTripCountsComputed,
- "Number of loops with predictable loop counts");
-STATISTIC(NumTripCountsNotComputed,
- "Number of loops without predictable loop counts");
+STATISTIC(NumExitCountsComputed,
+ "Number of loop exits with predictable exit counts");
+STATISTIC(NumExitCountsNotComputed,
+ "Number of loop exits without predictable exit counts");
STATISTIC(NumBruteForceTripCountsComputed,
"Number of loops with trip counts computed by force");
@@ -160,10 +162,6 @@ static cl::opt<bool, true> VerifySCEVOpt(
static cl::opt<bool> VerifySCEVStrict(
"verify-scev-strict", cl::Hidden,
cl::desc("Enable stricter verification with -verify-scev is passed"));
-static cl::opt<bool>
- VerifySCEVMap("verify-scev-maps", cl::Hidden,
- cl::desc("Verify no dangling value in ScalarEvolution's "
- "ExprValueMap (slow)"));
static cl::opt<bool> VerifyIR(
"scev-verify-ir", cl::Hidden,
@@ -271,6 +269,9 @@ void SCEV::print(raw_ostream &OS) const {
case scConstant:
cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false);
return;
+ case scVScale:
+ OS << "vscale";
+ return;
case scPtrToInt: {
const SCEVPtrToIntExpr *PtrToInt = cast<SCEVPtrToIntExpr>(this);
const SCEV *Op = PtrToInt->getOperand();
@@ -366,31 +367,9 @@ void SCEV::print(raw_ostream &OS) const {
OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")";
return;
}
- case scUnknown: {
- const SCEVUnknown *U = cast<SCEVUnknown>(this);
- Type *AllocTy;
- if (U->isSizeOf(AllocTy)) {
- OS << "sizeof(" << *AllocTy << ")";
- return;
- }
- if (U->isAlignOf(AllocTy)) {
- OS << "alignof(" << *AllocTy << ")";
- return;
- }
-
- Type *CTy;
- Constant *FieldNo;
- if (U->isOffsetOf(CTy, FieldNo)) {
- OS << "offsetof(" << *CTy << ", ";
- FieldNo->printAsOperand(OS, false);
- OS << ")";
- return;
- }
-
- // Otherwise just print it normally.
- U->getValue()->printAsOperand(OS, false);
+ case scUnknown:
+ cast<SCEVUnknown>(this)->getValue()->printAsOperand(OS, false);
return;
- }
case scCouldNotCompute:
OS << "***COULDNOTCOMPUTE***";
return;
@@ -402,6 +381,8 @@ Type *SCEV::getType() const {
switch (getSCEVType()) {
case scConstant:
return cast<SCEVConstant>(this)->getType();
+ case scVScale:
+ return cast<SCEVVScale>(this)->getType();
case scPtrToInt:
case scTruncate:
case scZeroExtend:
@@ -433,6 +414,7 @@ Type *SCEV::getType() const {
ArrayRef<const SCEV *> SCEV::operands() const {
switch (getSCEVType()) {
case scConstant:
+ case scVScale:
case scUnknown:
return {};
case scPtrToInt:
@@ -515,6 +497,18 @@ ScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) {
return getConstant(ConstantInt::get(ITy, V, isSigned));
}
+const SCEV *ScalarEvolution::getVScale(Type *Ty) {
+ FoldingSetNodeID ID;
+ ID.AddInteger(scVScale);
+ ID.AddPointer(Ty);
+ void *IP = nullptr;
+ if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP))
+ return S;
+ SCEV *S = new (SCEVAllocator) SCEVVScale(ID.Intern(SCEVAllocator), Ty);
+ UniqueSCEVs.InsertNode(S, IP);
+ return S;
+}
+
SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, SCEVTypes SCEVTy,
const SCEV *op, Type *ty)
: SCEV(ID, SCEVTy, computeExpressionSize(op)), Op(op), Ty(ty) {}
@@ -574,67 +568,6 @@ void SCEVUnknown::allUsesReplacedWith(Value *New) {
setValPtr(New);
}
-bool SCEVUnknown::isSizeOf(Type *&AllocTy) const {
- if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
- if (VCE->getOpcode() == Instruction::PtrToInt)
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
- if (CE->getOpcode() == Instruction::GetElementPtr &&
- CE->getOperand(0)->isNullValue() &&
- CE->getNumOperands() == 2)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1)))
- if (CI->isOne()) {
- AllocTy = cast<GEPOperator>(CE)->getSourceElementType();
- return true;
- }
-
- return false;
-}
-
-bool SCEVUnknown::isAlignOf(Type *&AllocTy) const {
- if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
- if (VCE->getOpcode() == Instruction::PtrToInt)
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
- if (CE->getOpcode() == Instruction::GetElementPtr &&
- CE->getOperand(0)->isNullValue()) {
- Type *Ty = cast<GEPOperator>(CE)->getSourceElementType();
- if (StructType *STy = dyn_cast<StructType>(Ty))
- if (!STy->isPacked() &&
- CE->getNumOperands() == 3 &&
- CE->getOperand(1)->isNullValue()) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2)))
- if (CI->isOne() &&
- STy->getNumElements() == 2 &&
- STy->getElementType(0)->isIntegerTy(1)) {
- AllocTy = STy->getElementType(1);
- return true;
- }
- }
- }
-
- return false;
-}
-
-bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
- if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue()))
- if (VCE->getOpcode() == Instruction::PtrToInt)
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0)))
- if (CE->getOpcode() == Instruction::GetElementPtr &&
- CE->getNumOperands() == 3 &&
- CE->getOperand(0)->isNullValue() &&
- CE->getOperand(1)->isNullValue()) {
- Type *Ty = cast<GEPOperator>(CE)->getSourceElementType();
- // Ignore vector types here so that ScalarEvolutionExpander doesn't
- // emit getelementptrs that index into vectors.
- if (Ty->isStructTy() || Ty->isArrayTy()) {
- CTy = Ty;
- FieldNo = CE->getOperand(2);
- return true;
- }
- }
-
- return false;
-}
-
//===----------------------------------------------------------------------===//
// SCEV Utilities
//===----------------------------------------------------------------------===//
@@ -785,6 +718,12 @@ CompareSCEVComplexity(EquivalenceClasses<const SCEV *> &EqCacheSCEV,
return LA.ult(RA) ? -1 : 1;
}
+ case scVScale: {
+ const auto *LTy = cast<IntegerType>(cast<SCEVVScale>(LHS)->getType());
+ const auto *RTy = cast<IntegerType>(cast<SCEVVScale>(RHS)->getType());
+ return LTy->getBitWidth() - RTy->getBitWidth();
+ }
+
case scAddRecExpr: {
const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
@@ -798,9 +737,8 @@ CompareSCEVComplexity(EquivalenceClasses<const SCEV *> &EqCacheSCEV,
assert(LHead != RHead && "Two loops share the same header?");
if (DT.dominates(LHead, RHead))
return 1;
- else
- assert(DT.dominates(RHead, LHead) &&
- "No dominance between recurrences used by one SCEV?");
+ assert(DT.dominates(RHead, LHead) &&
+ "No dominance between recurrences used by one SCEV?");
return -1;
}
@@ -984,7 +922,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
unsigned T = 1;
for (unsigned i = 3; i <= K; ++i) {
APInt Mult(W, i);
- unsigned TwoFactors = Mult.countTrailingZeros();
+ unsigned TwoFactors = Mult.countr_zero();
T += TwoFactors;
Mult.lshrInPlace(TwoFactors);
OddFactorial *= Mult;
@@ -1252,10 +1190,9 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
if (numTruncs < 2) {
if (isa<SCEVAddExpr>(Op))
return getAddExpr(Operands);
- else if (isa<SCEVMulExpr>(Op))
+ if (isa<SCEVMulExpr>(Op))
return getMulExpr(Operands);
- else
- llvm_unreachable("Unexpected SCEV type for Op.");
+ llvm_unreachable("Unexpected SCEV type for Op.");
}
// Although we checked in the beginning that ID is not in the cache, it is
// possible that during recursion and different modification ID was inserted
@@ -1273,7 +1210,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty,
}
// Return zero if truncating to known zeros.
- uint32_t MinTrailingZeros = GetMinTrailingZeros(Op);
+ uint32_t MinTrailingZeros = getMinTrailingZeros(Op);
if (MinTrailingZeros >= getTypeSizeInBits(Ty))
return getZero(Ty);
@@ -1558,7 +1495,7 @@ static APInt extractConstantWithoutWrapping(ScalarEvolution &SE,
// Find number of trailing zeros of (x + y + ...) w/o the C first:
uint32_t TZ = BitWidth;
for (unsigned I = 1, E = WholeAddExpr->getNumOperands(); I < E && TZ; ++I)
- TZ = std::min(TZ, SE.GetMinTrailingZeros(WholeAddExpr->getOperand(I)));
+ TZ = std::min(TZ, SE.getMinTrailingZeros(WholeAddExpr->getOperand(I)));
if (TZ) {
// Set D to be as many least significant bits of C as possible while still
// guaranteeing that adding D to (C - D + x + y + ...) won't cause a wrap:
@@ -1575,7 +1512,7 @@ static APInt extractConstantWithoutWrapping(ScalarEvolution &SE,
const APInt &ConstantStart,
const SCEV *Step) {
const unsigned BitWidth = ConstantStart.getBitWidth();
- const uint32_t TZ = SE.GetMinTrailingZeros(Step);
+ const uint32_t TZ = SE.getMinTrailingZeros(Step);
if (TZ)
return TZ < BitWidth ? ConstantStart.trunc(TZ).zext(BitWidth)
: ConstantStart;
@@ -1614,10 +1551,7 @@ ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
assert(!Op->getType()->isPointerTy() && "Can't extend pointer!");
Ty = getEffectiveSCEVType(Ty);
- FoldID ID;
- ID.addInteger(scZeroExtend);
- ID.addPointer(Op);
- ID.addPointer(Ty);
+ FoldID ID(scZeroExtend, Op, Ty);
auto Iter = FoldCache.find(ID);
if (Iter != FoldCache.end())
return Iter->second;
@@ -1684,11 +1618,6 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
unsigned BitWidth = getTypeSizeInBits(AR->getType());
const Loop *L = AR->getLoop();
- if (!AR->hasNoUnsignedWrap()) {
- auto NewFlags = proveNoWrapViaConstantRanges(AR);
- setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
- }
-
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
if (AR->hasNoUnsignedWrap()) {
@@ -1771,7 +1700,8 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
// these to compute max backedge taken counts, but can still use
// these to prove lack of overflow. Use this fact to avoid
// doing extra work that may not pay off.
- if (!isa<SCEVCouldNotCompute>(MaxBECount) || !AC.assumptions().empty()) {
+ if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards ||
+ !AC.assumptions().empty()) {
auto NewFlags = proveNoUnsignedWrapViaInduction(AR);
setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
@@ -1917,6 +1847,27 @@ const SCEV *ScalarEvolution::getZeroExtendExprImpl(const SCEV *Op, Type *Ty,
}
}
+ // zext(umin(x, y)) -> umin(zext(x), zext(y))
+ // zext(umax(x, y)) -> umax(zext(x), zext(y))
+ if (isa<SCEVUMinExpr>(Op) || isa<SCEVUMaxExpr>(Op)) {
+ auto *MinMax = cast<SCEVMinMaxExpr>(Op);
+ SmallVector<const SCEV *, 4> Operands;
+ for (auto *Operand : MinMax->operands())
+ Operands.push_back(getZeroExtendExpr(Operand, Ty));
+ if (isa<SCEVUMinExpr>(MinMax))
+ return getUMinExpr(Operands);
+ return getUMaxExpr(Operands);
+ }
+
+ // zext(umin_seq(x, y)) -> umin_seq(zext(x), zext(y))
+ if (auto *MinMax = dyn_cast<SCEVSequentialMinMaxExpr>(Op)) {
+ assert(isa<SCEVSequentialUMinExpr>(MinMax) && "Not supported!");
+ SmallVector<const SCEV *, 4> Operands;
+ for (auto *Operand : MinMax->operands())
+ Operands.push_back(getZeroExtendExpr(Operand, Ty));
+ return getUMinExpr(Operands, /*Sequential*/ true);
+ }
+
// The cast wasn't folded; create an explicit cast node.
// Recompute the insert position, as it may have been invalidated.
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
@@ -1936,10 +1887,7 @@ ScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) {
assert(!Op->getType()->isPointerTy() && "Can't extend pointer!");
Ty = getEffectiveSCEVType(Ty);
- FoldID ID;
- ID.addInteger(scSignExtend);
- ID.addPointer(Op);
- ID.addPointer(Ty);
+ FoldID ID(scSignExtend, Op, Ty);
auto Iter = FoldCache.find(ID);
if (Iter != FoldCache.end())
return Iter->second;
@@ -2045,11 +1993,6 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
unsigned BitWidth = getTypeSizeInBits(AR->getType());
const Loop *L = AR->getLoop();
- if (!AR->hasNoSignedWrap()) {
- auto NewFlags = proveNoWrapViaConstantRanges(AR);
- setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR), NewFlags);
- }
-
// If we have special knowledge that this addrec won't overflow,
// we don't need to do any further analysis.
if (AR->hasNoSignedWrap()) {
@@ -2177,6 +2120,18 @@ const SCEV *ScalarEvolution::getSignExtendExprImpl(const SCEV *Op, Type *Ty,
if (isKnownNonNegative(Op))
return getZeroExtendExpr(Op, Ty, Depth + 1);
+ // sext(smin(x, y)) -> smin(sext(x), sext(y))
+ // sext(smax(x, y)) -> smax(sext(x), sext(y))
+ if (isa<SCEVSMinExpr>(Op) || isa<SCEVSMaxExpr>(Op)) {
+ auto *MinMax = cast<SCEVMinMaxExpr>(Op);
+ SmallVector<const SCEV *, 4> Operands;
+ for (auto *Operand : MinMax->operands())
+ Operands.push_back(getSignExtendExpr(Operand, Ty));
+ if (isa<SCEVSMinExpr>(MinMax))
+ return getSMinExpr(Operands);
+ return getSMaxExpr(Operands);
+ }
+
// The cast wasn't folded; create an explicit cast node.
// Recompute the insert position, as it may have been invalidated.
if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
@@ -2377,25 +2332,42 @@ bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
// Can we use context to prove the fact we need?
if (!CtxI)
return false;
- // We can prove that add(x, constant) doesn't wrap if isKnownPredicateAt can
- // guarantee that x <= max_int - constant at the given context.
- // TODO: Support other operations.
- if (BinOp != Instruction::Add)
+ // TODO: Support mul.
+ if (BinOp == Instruction::Mul)
return false;
auto *RHSC = dyn_cast<SCEVConstant>(RHS);
// TODO: Lift this limitation.
if (!RHSC)
return false;
APInt C = RHSC->getAPInt();
- // TODO: Also lift this limitation.
- if (Signed && C.isNegative())
- return false;
unsigned NumBits = C.getBitWidth();
- APInt Max =
- Signed ? APInt::getSignedMaxValue(NumBits) : APInt::getMaxValue(NumBits);
- APInt Limit = Max - C;
+ bool IsSub = (BinOp == Instruction::Sub);
+ bool IsNegativeConst = (Signed && C.isNegative());
+ // Compute the direction and magnitude by which we need to check overflow.
+ bool OverflowDown = IsSub ^ IsNegativeConst;
+ APInt Magnitude = C;
+ if (IsNegativeConst) {
+ if (C == APInt::getSignedMinValue(NumBits))
+ // TODO: SINT_MIN on inversion gives the same negative value, we don't
+ // want to deal with that.
+ return false;
+ Magnitude = -C;
+ }
+
ICmpInst::Predicate Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
- return isKnownPredicateAt(Pred, LHS, getConstant(Limit), CtxI);
+ if (OverflowDown) {
+ // To avoid overflow down, we need to make sure that MIN + Magnitude <= LHS.
+ APInt Min = Signed ? APInt::getSignedMinValue(NumBits)
+ : APInt::getMinValue(NumBits);
+ APInt Limit = Min + Magnitude;
+ return isKnownPredicateAt(Pred, getConstant(Limit), LHS, CtxI);
+ } else {
+ // To avoid overflow up, we need to make sure that LHS <= MAX - Magnitude.
+ APInt Max = Signed ? APInt::getSignedMaxValue(NumBits)
+ : APInt::getMaxValue(NumBits);
+ APInt Limit = Max - Magnitude;
+ return isKnownPredicateAt(Pred, LHS, getConstant(Limit), CtxI);
+ }
}
std::optional<SCEV::NoWrapFlags>
@@ -3229,9 +3201,20 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
for (const SCEV *AddRecOp : AddRec->operands())
Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap,
Depth + 1));
-
+ // Let M be the minimum representable signed value. AddRec with nsw
+ // multiplied by -1 can have signed overflow if and only if it takes a
+ // value of M: M * (-1) would stay M and (M + 1) * (-1) would be the
+ // maximum signed value. In all other cases signed overflow is
+ // impossible.
+ auto FlagsMask = SCEV::FlagNW;
+ if (hasFlags(AddRec->getNoWrapFlags(), SCEV::FlagNSW)) {
+ auto MinInt =
+ APInt::getSignedMinValue(getTypeSizeInBits(AddRec->getType()));
+ if (getSignedRangeMin(AddRec) != MinInt)
+ FlagsMask = setFlags(FlagsMask, SCEV::FlagNSW);
+ }
return getAddRecExpr(Operands, AddRec->getLoop(),
- AddRec->getNoWrapFlags(SCEV::FlagNW));
+ AddRec->getNoWrapFlags(FlagsMask));
}
}
}
@@ -3273,9 +3256,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// if they are loop invariant w.r.t. the recurrence.
SmallVector<const SCEV *, 8> LIOps;
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
- const Loop *AddRecLoop = AddRec->getLoop();
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) {
+ if (isAvailableAtLoopEntry(Ops[i], AddRec->getLoop())) {
LIOps.push_back(Ops[i]);
Ops.erase(Ops.begin()+i);
--i; --e;
@@ -3298,7 +3280,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// will be inferred if either NUW or NSW is true.
SCEV::NoWrapFlags Flags = ComputeFlags({Scale, AddRec});
const SCEV *NewRec = getAddRecExpr(
- NewOps, AddRecLoop, AddRec->getNoWrapFlags(Flags));
+ NewOps, AddRec->getLoop(), AddRec->getNoWrapFlags(Flags));
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
@@ -3332,7 +3314,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
++OtherIdx) {
const SCEVAddRecExpr *OtherAddRec =
dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]);
- if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop)
+ if (!OtherAddRec || OtherAddRec->getLoop() != AddRec->getLoop())
continue;
// Limit max number of arguments to avoid creation of unreasonably big
@@ -3371,7 +3353,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1));
}
if (!Overflow) {
- const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRecLoop,
+ const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRec->getLoop(),
SCEV::FlagAnyWrap);
if (Ops.size() == 2) return NewAddRec;
Ops[Idx] = NewAddRec;
@@ -3455,7 +3437,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
// its operands.
// TODO: Generalize this to non-constants by using known-bits information.
Type *Ty = LHS->getType();
- unsigned LZ = RHSC->getAPInt().countLeadingZeros();
+ unsigned LZ = RHSC->getAPInt().countl_zero();
unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;
// For non-power-of-two values, effectively round the value up to the
// nearest power of two.
@@ -3867,15 +3849,18 @@ const SCEV *ScalarEvolution::getMinMaxExpr(SCEVTypes Kind,
++Idx;
assert(Idx < Ops.size());
auto FoldOp = [&](const APInt &LHS, const APInt &RHS) {
- if (Kind == scSMaxExpr)
+ switch (Kind) {
+ case scSMaxExpr:
return APIntOps::smax(LHS, RHS);
- else if (Kind == scSMinExpr)
+ case scSMinExpr:
return APIntOps::smin(LHS, RHS);
- else if (Kind == scUMaxExpr)
+ case scUMaxExpr:
return APIntOps::umax(LHS, RHS);
- else if (Kind == scUMinExpr)
+ case scUMinExpr:
return APIntOps::umin(LHS, RHS);
- llvm_unreachable("Unknown SCEV min/max opcode");
+ default:
+ llvm_unreachable("Unknown SCEV min/max opcode");
+ }
};
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
@@ -4050,6 +4035,8 @@ public:
RetVal visitConstant(const SCEVConstant *Constant) { return Constant; }
+ RetVal visitVScale(const SCEVVScale *VScale) { return VScale; }
+
RetVal visitPtrToIntExpr(const SCEVPtrToIntExpr *Expr) { return Expr; }
RetVal visitTruncateExpr(const SCEVTruncateExpr *Expr) { return Expr; }
@@ -4096,6 +4083,7 @@ public:
static bool scevUnconditionallyPropagatesPoisonFromOperands(SCEVTypes Kind) {
switch (Kind) {
case scConstant:
+ case scVScale:
case scTruncate:
case scZeroExtend:
case scSignExtend:
@@ -4131,38 +4119,15 @@ static bool impliesPoison(const SCEV *AssumedPoison, const SCEV *S) {
// with the notable exception of umin_seq, where only poison from the first
// operand is (unconditionally) propagated.
struct SCEVPoisonCollector {
- bool LookThroughSeq;
+ bool LookThroughMaybePoisonBlocking;
SmallPtrSet<const SCEV *, 4> MaybePoison;
- SCEVPoisonCollector(bool LookThroughSeq) : LookThroughSeq(LookThroughSeq) {}
+ SCEVPoisonCollector(bool LookThroughMaybePoisonBlocking)
+ : LookThroughMaybePoisonBlocking(LookThroughMaybePoisonBlocking) {}
bool follow(const SCEV *S) {
- if (!scevUnconditionallyPropagatesPoisonFromOperands(S->getSCEVType())) {
- switch (S->getSCEVType()) {
- case scConstant:
- case scTruncate:
- case scZeroExtend:
- case scSignExtend:
- case scPtrToInt:
- case scAddExpr:
- case scMulExpr:
- case scUDivExpr:
- case scAddRecExpr:
- case scUMaxExpr:
- case scSMaxExpr:
- case scUMinExpr:
- case scSMinExpr:
- case scUnknown:
- llvm_unreachable("These all unconditionally propagate poison.");
- case scSequentialUMinExpr:
- // TODO: We can always follow the first operand,
- // but the SCEVTraversal API doesn't support this.
- if (!LookThroughSeq)
- return false;
- break;
- case scCouldNotCompute:
- llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
- }
- }
+ if (!LookThroughMaybePoisonBlocking &&
+ !scevUnconditionallyPropagatesPoisonFromOperands(S->getSCEVType()))
+ return false;
if (auto *SU = dyn_cast<SCEVUnknown>(S)) {
if (!isGuaranteedNotToBePoison(SU->getValue()))
@@ -4174,9 +4139,10 @@ static bool impliesPoison(const SCEV *AssumedPoison, const SCEV *S) {
};
// First collect all SCEVs that might result in AssumedPoison to be poison.
- // We need to look through umin_seq here, because we want to find all SCEVs
- // that *might* result in poison, not only those that are *required* to.
- SCEVPoisonCollector PC1(/* LookThroughSeq */ true);
+ // We need to look through potentially poison-blocking operations here,
+ // because we want to find all SCEVs that *might* result in poison, not only
+ // those that are *required* to.
+ SCEVPoisonCollector PC1(/* LookThroughMaybePoisonBlocking */ true);
visitAll(AssumedPoison, PC1);
// AssumedPoison is never poison. As the assumption is false, the implication
@@ -4185,9 +4151,9 @@ static bool impliesPoison(const SCEV *AssumedPoison, const SCEV *S) {
return true;
// Collect all SCEVs in S that, if poison, *will* result in S being poison
- // as well. We cannot look through umin_seq here, as its argument only *may*
- // make the result poison.
- SCEVPoisonCollector PC2(/* LookThroughSeq */ false);
+ // as well. We cannot look through potentially poison-blocking operations
+ // here, as their arguments only *may* make the result poison.
+ SCEVPoisonCollector PC2(/* LookThroughMaybePoisonBlocking */ false);
visitAll(S, PC2);
// Make sure that no matter which SCEV in PC1.MaybePoison is actually poison,
@@ -4348,33 +4314,19 @@ const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops,
}
const SCEV *
-ScalarEvolution::getSizeOfScalableVectorExpr(Type *IntTy,
- ScalableVectorType *ScalableTy) {
- Constant *NullPtr = Constant::getNullValue(ScalableTy->getPointerTo());
- Constant *One = ConstantInt::get(IntTy, 1);
- Constant *GEP = ConstantExpr::getGetElementPtr(ScalableTy, NullPtr, One);
- // Note that the expression we created is the final expression, we don't
- // want to simplify it any further Also, if we call a normal getSCEV(),
- // we'll end up in an endless recursion. So just create an SCEVUnknown.
- return getUnknown(ConstantExpr::getPtrToInt(GEP, IntTy));
+ScalarEvolution::getSizeOfExpr(Type *IntTy, TypeSize Size) {
+ const SCEV *Res = getConstant(IntTy, Size.getKnownMinValue());
+ if (Size.isScalable())
+ Res = getMulExpr(Res, getVScale(IntTy));
+ return Res;
}
const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
- if (auto *ScalableAllocTy = dyn_cast<ScalableVectorType>(AllocTy))
- return getSizeOfScalableVectorExpr(IntTy, ScalableAllocTy);
- // We can bypass creating a target-independent constant expression and then
- // folding it back into a ConstantInt. This is just a compile-time
- // optimization.
- return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
+ return getSizeOfExpr(IntTy, getDataLayout().getTypeAllocSize(AllocTy));
}
const SCEV *ScalarEvolution::getStoreSizeOfExpr(Type *IntTy, Type *StoreTy) {
- if (auto *ScalableStoreTy = dyn_cast<ScalableVectorType>(StoreTy))
- return getSizeOfScalableVectorExpr(IntTy, ScalableStoreTy);
- // We can bypass creating a target-independent constant expression and then
- // folding it back into a ConstantInt. This is just a compile-time
- // optimization.
- return getConstant(IntTy, getDataLayout().getTypeStoreSize(StoreTy));
+ return getSizeOfExpr(IntTy, getDataLayout().getTypeStoreSize(StoreTy));
}
const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
@@ -4383,8 +4335,10 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,
// We can bypass creating a target-independent constant expression and then
// folding it back into a ConstantInt. This is just a compile-time
// optimization.
- return getConstant(
- IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));
+ const StructLayout *SL = getDataLayout().getStructLayout(STy);
+ assert(!SL->getSizeInBits().isScalable() &&
+ "Cannot get offset for structure containing scalable vector types");
+ return getConstant(IntTy, SL->getElementOffset(FieldNo));
}
const SCEV *ScalarEvolution::getUnknown(Value *V) {
@@ -4494,13 +4448,6 @@ ArrayRef<Value *> ScalarEvolution::getSCEVValues(const SCEV *S) {
ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
if (SI == ExprValueMap.end())
return std::nullopt;
-#ifndef NDEBUG
- if (VerifySCEVMap) {
- // Check there is no dangling Value in the set returned.
- for (Value *V : SI->second)
- assert(ValueExprMap.count(V));
- }
-#endif
return SI->second.getArrayRef();
}
@@ -4529,6 +4476,18 @@ void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) {
}
}
+/// Determine whether this instruction is either not SCEVable or will always
+/// produce a SCEVUnknown. We do not have to walk past such instructions when
+/// invalidating.
+static bool isAlwaysUnknown(const Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return true;
+ default:
+ return false;
+ }
+}
+
/// Return an existing SCEV if it exists, otherwise analyze the expression and
/// create a new one.
const SCEV *ScalarEvolution::getSCEV(Value *V) {
@@ -4536,7 +4495,11 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) {
if (const SCEV *S = getExistingSCEV(V))
return S;
- return createSCEVIter(V);
+ const SCEV *S = createSCEVIter(V);
+ assert((!isa<Instruction>(V) || !isAlwaysUnknown(cast<Instruction>(V)) ||
+ isa<SCEVUnknown>(S)) &&
+ "isAlwaysUnknown() instruction is not SCEVUnknown");
+ return S;
}
const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
@@ -4837,6 +4800,8 @@ static void PushDefUseChildren(Instruction *I,
// Push the def-use children onto the Worklist stack.
for (User *U : I->users()) {
auto *UserInsn = cast<Instruction>(U);
+ if (isAlwaysUnknown(UserInsn))
+ continue;
if (Visited.insert(UserInsn).second)
Worklist.push_back(UserInsn);
}
@@ -5054,6 +5019,18 @@ ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) {
SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap;
+ if (!AR->hasNoSelfWrap()) {
+ const SCEV *BECount = getConstantMaxBackedgeTakenCount(AR->getLoop());
+ if (const SCEVConstant *BECountMax = dyn_cast<SCEVConstant>(BECount)) {
+ ConstantRange StepCR = getSignedRange(AR->getStepRecurrence(*this));
+ const APInt &BECountAP = BECountMax->getAPInt();
+ unsigned NoOverflowBitWidth =
+ BECountAP.getActiveBits() + StepCR.getMinSignedBits();
+ if (NoOverflowBitWidth <= getTypeSizeInBits(AR->getType()))
+ Result = ScalarEvolution::setFlags(Result, SCEV::FlagNW);
+ }
+ }
+
if (!AR->hasNoSignedWrap()) {
ConstantRange AddRecRange = getSignedRange(AR);
ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this));
@@ -5112,7 +5089,8 @@ ScalarEvolution::proveNoSignedWrapViaInduction(const SCEVAddRecExpr *AR) {
// these to prove lack of overflow. Use this fact to avoid
// doing extra work that may not pay off.
- if (isa<SCEVCouldNotCompute>(MaxBECount) && AC.assumptions().empty())
+ if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
+ AC.assumptions().empty())
return Result;
// If the backedge is guarded by a comparison with the pre-inc value the
@@ -5165,7 +5143,8 @@ ScalarEvolution::proveNoUnsignedWrapViaInduction(const SCEVAddRecExpr *AR) {
// these to prove lack of overflow. Use this fact to avoid
// doing extra work that may not pay off.
- if (isa<SCEVCouldNotCompute>(MaxBECount) && AC.assumptions().empty())
+ if (isa<SCEVCouldNotCompute>(MaxBECount) && !HasGuards &&
+ AC.assumptions().empty())
return Result;
// If the backedge is guarded by a comparison with the pre-inc value the
@@ -5733,6 +5712,12 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
insertValueToMap(PN, PHISCEV);
+ if (auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) {
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR),
+ (SCEV::NoWrapFlags)(AR->getNoWrapFlags() |
+ proveNoWrapViaConstantRanges(AR)));
+ }
+
// We can add Flags to the post-inc expression only if we
// know that it is *undefined behavior* for BEValueV to
// overflow.
@@ -5838,9 +5823,7 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
// indices form a positive value.
if (GEP->isInBounds() && GEP->getOperand(0) == PN) {
Flags = setFlags(Flags, SCEV::FlagNW);
-
- const SCEV *Ptr = getSCEV(GEP->getPointerOperand());
- if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr)))
+ if (isKnownPositive(Accum))
Flags = setFlags(Flags, SCEV::FlagNUW);
}
@@ -5858,6 +5841,12 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
forgetMemoizedResults(SymbolicName);
insertValueToMap(PN, PHISCEV);
+ if (auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) {
+ setNoWrapFlags(const_cast<SCEVAddRecExpr *>(AR),
+ (SCEV::NoWrapFlags)(AR->getNoWrapFlags() |
+ proveNoWrapViaConstantRanges(AR)));
+ }
+
// We can add Flags to the post-inc expression only if we
// know that it is *undefined behavior* for BEValueV to
// overflow.
@@ -5903,89 +5892,6 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
return nullptr;
}
-// Checks if the SCEV S is available at BB. S is considered available at BB
-// if S can be materialized at BB without introducing a fault.
-static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
- BasicBlock *BB) {
- struct CheckAvailable {
- bool TraversalDone = false;
- bool Available = true;
-
- const Loop *L = nullptr; // The loop BB is in (can be nullptr)
- BasicBlock *BB = nullptr;
- DominatorTree &DT;
-
- CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT)
- : L(L), BB(BB), DT(DT) {}
-
- bool setUnavailable() {
- TraversalDone = true;
- Available = false;
- return false;
- }
-
- bool follow(const SCEV *S) {
- switch (S->getSCEVType()) {
- case scConstant:
- case scPtrToInt:
- case scTruncate:
- case scZeroExtend:
- case scSignExtend:
- case scAddExpr:
- case scMulExpr:
- case scUMaxExpr:
- case scSMaxExpr:
- case scUMinExpr:
- case scSMinExpr:
- case scSequentialUMinExpr:
- // These expressions are available if their operand(s) is/are.
- return true;
-
- case scAddRecExpr: {
- // We allow add recurrences that are on the loop BB is in, or some
- // outer loop. This guarantees availability because the value of the
- // add recurrence at BB is simply the "current" value of the induction
- // variable. We can relax this in the future; for instance an add
- // recurrence on a sibling dominating loop is also available at BB.
- const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop();
- if (L && (ARLoop == L || ARLoop->contains(L)))
- return true;
-
- return setUnavailable();
- }
-
- case scUnknown: {
- // For SCEVUnknown, we check for simple dominance.
- const auto *SU = cast<SCEVUnknown>(S);
- Value *V = SU->getValue();
-
- if (isa<Argument>(V))
- return false;
-
- if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB))
- return false;
-
- return setUnavailable();
- }
-
- case scUDivExpr:
- case scCouldNotCompute:
- // We do not try to smart about these at all.
- return setUnavailable();
- }
- llvm_unreachable("Unknown SCEV kind!");
- }
-
- bool isDone() { return TraversalDone; }
- };
-
- CheckAvailable CA(L, BB, DT);
- SCEVTraversal<CheckAvailable> ST(CA);
-
- ST.visitAll(S);
- return CA.Available;
-}
-
// Try to match a control flow sequence that branches out at BI and merges back
// at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful
// match.
@@ -6023,13 +5929,6 @@ const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
auto IsReachable =
[&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); };
if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)) {
- const Loop *L = LI.getLoopFor(PN->getParent());
-
- // We don't want to break LCSSA, even in a SCEV expression tree.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (LI.getLoopFor(PN->getIncomingBlock(i)) != L)
- return nullptr;
-
// Try to match
//
// br %cond, label %left, label %right
@@ -6050,8 +5949,8 @@ const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
if (BI && BI->isConditional() &&
BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) &&
- IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) &&
- IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent()))
+ properlyDominates(getSCEV(LHS), PN->getParent()) &&
+ properlyDominates(getSCEV(RHS), PN->getParent()))
return createNodeForSelectOrPHI(PN, Cond, LHS, RHS);
}
@@ -6062,12 +5961,12 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
if (const SCEV *S = createAddRecFromPHI(PN))
return S;
- if (const SCEV *S = createNodeFromSelectLikePHI(PN))
- return S;
-
if (Value *V = simplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC}))
return getSCEV(V);
+ if (const SCEV *S = createNodeFromSelectLikePHI(PN))
+ return S;
+
// If it's not a loop phi, we can't handle it yet.
return getUnknown(PN);
}
@@ -6310,63 +6209,85 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
return getGEPExpr(GEP, IndexExprs);
}
-uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) {
+APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) {
+ uint64_t BitWidth = getTypeSizeInBits(S->getType());
+ auto GetShiftedByZeros = [BitWidth](uint32_t TrailingZeros) {
+ return TrailingZeros >= BitWidth
+ ? APInt::getZero(BitWidth)
+ : APInt::getOneBitSet(BitWidth, TrailingZeros);
+ };
+ auto GetGCDMultiple = [this](const SCEVNAryExpr *N) {
+ // The result is GCD of all operands results.
+ APInt Res = getConstantMultiple(N->getOperand(0));
+ for (unsigned I = 1, E = N->getNumOperands(); I < E && Res != 1; ++I)
+ Res = APIntOps::GreatestCommonDivisor(
+ Res, getConstantMultiple(N->getOperand(I)));
+ return Res;
+ };
+
switch (S->getSCEVType()) {
case scConstant:
- return cast<SCEVConstant>(S)->getAPInt().countTrailingZeros();
+ return cast<SCEVConstant>(S)->getAPInt();
+ case scPtrToInt:
+ return getConstantMultiple(cast<SCEVPtrToIntExpr>(S)->getOperand());
+ case scUDivExpr:
+ case scVScale:
+ return APInt(BitWidth, 1);
case scTruncate: {
+ // Only multiples that are a power of 2 will hold after truncation.
const SCEVTruncateExpr *T = cast<SCEVTruncateExpr>(S);
- return std::min(GetMinTrailingZeros(T->getOperand()),
- (uint32_t)getTypeSizeInBits(T->getType()));
+ uint32_t TZ = getMinTrailingZeros(T->getOperand());
+ return GetShiftedByZeros(TZ);
}
case scZeroExtend: {
- const SCEVZeroExtendExpr *E = cast<SCEVZeroExtendExpr>(S);
- uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
- return OpRes == getTypeSizeInBits(E->getOperand()->getType())
- ? getTypeSizeInBits(E->getType())
- : OpRes;
+ const SCEVZeroExtendExpr *Z = cast<SCEVZeroExtendExpr>(S);
+ return getConstantMultiple(Z->getOperand()).zext(BitWidth);
}
case scSignExtend: {
const SCEVSignExtendExpr *E = cast<SCEVSignExtendExpr>(S);
- uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
- return OpRes == getTypeSizeInBits(E->getOperand()->getType())
- ? getTypeSizeInBits(E->getType())
- : OpRes;
+ return getConstantMultiple(E->getOperand()).sext(BitWidth);
}
case scMulExpr: {
const SCEVMulExpr *M = cast<SCEVMulExpr>(S);
- // The result is the sum of all operands results.
- uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
- uint32_t BitWidth = getTypeSizeInBits(M->getType());
- for (unsigned i = 1, e = M->getNumOperands();
- SumOpRes != BitWidth && i != e; ++i)
- SumOpRes =
- std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), BitWidth);
- return SumOpRes;
+ if (M->hasNoUnsignedWrap()) {
+ // The result is the product of all operand results.
+ APInt Res = getConstantMultiple(M->getOperand(0));
+ for (const SCEV *Operand : M->operands().drop_front())
+ Res = Res * getConstantMultiple(Operand);
+ return Res;
+ }
+
+ // If there are no wrap guarentees, find the trailing zeros, which is the
+ // sum of trailing zeros for all its operands.
+ uint32_t TZ = 0;
+ for (const SCEV *Operand : M->operands())
+ TZ += getMinTrailingZeros(Operand);
+ return GetShiftedByZeros(TZ);
}
- case scUDivExpr:
- return 0;
- case scPtrToInt:
case scAddExpr:
- case scAddRecExpr:
+ case scAddRecExpr: {
+ const SCEVNAryExpr *N = cast<SCEVNAryExpr>(S);
+ if (N->hasNoUnsignedWrap())
+ return GetGCDMultiple(N);
+ // Find the trailing bits, which is the minimum of its operands.
+ uint32_t TZ = getMinTrailingZeros(N->getOperand(0));
+ for (const SCEV *Operand : N->operands().drop_front())
+ TZ = std::min(TZ, getMinTrailingZeros(Operand));
+ return GetShiftedByZeros(TZ);
+ }
case scUMaxExpr:
case scSMaxExpr:
case scUMinExpr:
case scSMinExpr:
- case scSequentialUMinExpr: {
- // The result is the min of all operands results.
- ArrayRef<const SCEV *> Ops = S->operands();
- uint32_t MinOpRes = GetMinTrailingZeros(Ops[0]);
- for (unsigned I = 1, E = Ops.size(); MinOpRes && I != E; ++I)
- MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(Ops[I]));
- return MinOpRes;
- }
+ case scSequentialUMinExpr:
+ return GetGCDMultiple(cast<SCEVNAryExpr>(S));
case scUnknown: {
+ // ask ValueTracking for known bits
const SCEVUnknown *U = cast<SCEVUnknown>(S);
- // For a SCEVUnknown, ask ValueTracking.
- KnownBits Known =
- computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT);
- return Known.countMinTrailingZeros();
+ unsigned Known =
+ computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT)
+ .countMinTrailingZeros();
+ return GetShiftedByZeros(Known);
}
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
@@ -6374,17 +6295,27 @@ uint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) {
llvm_unreachable("Unknown SCEV kind!");
}
-uint32_t ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {
- auto I = MinTrailingZerosCache.find(S);
- if (I != MinTrailingZerosCache.end())
+APInt ScalarEvolution::getConstantMultiple(const SCEV *S) {
+ auto I = ConstantMultipleCache.find(S);
+ if (I != ConstantMultipleCache.end())
return I->second;
- uint32_t Result = GetMinTrailingZerosImpl(S);
- auto InsertPair = MinTrailingZerosCache.insert({S, Result});
+ APInt Result = getConstantMultipleImpl(S);
+ auto InsertPair = ConstantMultipleCache.insert({S, Result});
assert(InsertPair.second && "Should insert a new key");
return InsertPair.first->second;
}
+APInt ScalarEvolution::getNonZeroConstantMultiple(const SCEV *S) {
+ APInt Multiple = getConstantMultiple(S);
+ return Multiple == 0 ? APInt(Multiple.getBitWidth(), 1) : Multiple;
+}
+
+uint32_t ScalarEvolution::getMinTrailingZeros(const SCEV *S) {
+ return std::min(getConstantMultiple(S).countTrailingZeros(),
+ (unsigned)getTypeSizeInBits(S->getType()));
+}
+
/// Helper method to assign a range to V from metadata present in the IR.
static std::optional<ConstantRange> GetRangeFromMetadata(Value *V) {
if (Instruction *I = dyn_cast<Instruction>(V))
@@ -6400,6 +6331,7 @@ void ScalarEvolution::setNoWrapFlags(SCEVAddRecExpr *AddRec,
AddRec->setNoWrapFlags(Flags);
UnsignedRanges.erase(AddRec);
SignedRanges.erase(AddRec);
+ ConstantMultipleCache.erase(AddRec);
}
}
@@ -6536,7 +6468,7 @@ ScalarEvolution::getRangeRefIter(const SCEV *S,
auto AddToWorklist = [&WorkList, &Seen, &Cache](const SCEV *Expr) {
if (!Seen.insert(Expr).second)
return;
- if (Cache.find(Expr) != Cache.end())
+ if (Cache.contains(Expr))
return;
switch (Expr->getSCEVType()) {
case scUnknown:
@@ -6544,6 +6476,7 @@ ScalarEvolution::getRangeRefIter(const SCEV *S,
break;
[[fallthrough]];
case scConstant:
+ case scVScale:
case scTruncate:
case scZeroExtend:
case scSignExtend:
@@ -6632,21 +6565,28 @@ const ConstantRange &ScalarEvolution::getRangeRef(
// If the value has known zeros, the maximum value will have those known zeros
// as well.
- uint32_t TZ = GetMinTrailingZeros(S);
- if (TZ != 0) {
- if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED)
+ if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {
+ APInt Multiple = getNonZeroConstantMultiple(S);
+ APInt Remainder = APInt::getMaxValue(BitWidth).urem(Multiple);
+ if (!Remainder.isZero())
ConservativeResult =
ConstantRange(APInt::getMinValue(BitWidth),
- APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1);
- else
+ APInt::getMaxValue(BitWidth) - Remainder + 1);
+ }
+ else {
+ uint32_t TZ = getMinTrailingZeros(S);
+ if (TZ != 0) {
ConservativeResult = ConstantRange(
APInt::getSignedMinValue(BitWidth),
APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1);
+ }
}
switch (S->getSCEVType()) {
case scConstant:
llvm_unreachable("Already handled above.");
+ case scVScale:
+ return setRange(S, SignHint, getVScaleRange(&F, BitWidth));
case scTruncate: {
const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(S);
ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint, Depth + 1);
@@ -6742,21 +6682,30 @@ const ConstantRange &ScalarEvolution::getRangeRef(
// TODO: non-affine addrec
if (AddRec->isAffine()) {
- const SCEV *MaxBECount =
+ const SCEV *MaxBEScev =
getConstantMaxBackedgeTakenCount(AddRec->getLoop());
- if (!isa<SCEVCouldNotCompute>(MaxBECount) &&
- getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) {
- auto RangeFromAffine = getRangeForAffineAR(
- AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
- BitWidth);
- ConservativeResult =
- ConservativeResult.intersectWith(RangeFromAffine, RangeType);
+ if (!isa<SCEVCouldNotCompute>(MaxBEScev)) {
+ APInt MaxBECount = cast<SCEVConstant>(MaxBEScev)->getAPInt();
+
+ // Adjust MaxBECount to the same bitwidth as AddRec. We can truncate if
+ // MaxBECount's active bits are all <= AddRec's bit width.
+ if (MaxBECount.getBitWidth() > BitWidth &&
+ MaxBECount.getActiveBits() <= BitWidth)
+ MaxBECount = MaxBECount.trunc(BitWidth);
+ else if (MaxBECount.getBitWidth() < BitWidth)
+ MaxBECount = MaxBECount.zext(BitWidth);
+
+ if (MaxBECount.getBitWidth() == BitWidth) {
+ auto RangeFromAffine = getRangeForAffineAR(
+ AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount);
+ ConservativeResult =
+ ConservativeResult.intersectWith(RangeFromAffine, RangeType);
- auto RangeFromFactoring = getRangeViaFactoring(
- AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount,
- BitWidth);
- ConservativeResult =
- ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
+ auto RangeFromFactoring = getRangeViaFactoring(
+ AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount);
+ ConservativeResult =
+ ConservativeResult.intersectWith(RangeFromFactoring, RangeType);
+ }
}
// Now try symbolic BE count and more powerful methods.
@@ -6764,7 +6713,7 @@ const ConstantRange &ScalarEvolution::getRangeRef(
const SCEV *SymbolicMaxBECount =
getSymbolicMaxBackedgeTakenCount(AddRec->getLoop());
if (!isa<SCEVCouldNotCompute>(SymbolicMaxBECount) &&
- getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
+ getTypeSizeInBits(MaxBEScev->getType()) <= BitWidth &&
AddRec->hasNoSelfWrap()) {
auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR(
AddRec, SymbolicMaxBECount, BitWidth, SignHint);
@@ -6810,9 +6759,10 @@ const ConstantRange &ScalarEvolution::getRangeRef(
}
case scUnknown: {
const SCEVUnknown *U = cast<SCEVUnknown>(S);
+ Value *V = U->getValue();
// Check if the IR explicitly contains !range metadata.
- std::optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue());
+ std::optional<ConstantRange> MDRange = GetRangeFromMetadata(V);
if (MDRange)
ConservativeResult =
ConservativeResult.intersectWith(*MDRange, RangeType);
@@ -6825,13 +6775,13 @@ const ConstantRange &ScalarEvolution::getRangeRef(
// See if ValueTracking can give us a useful range.
const DataLayout &DL = getDataLayout();
- KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
+ KnownBits Known = computeKnownBits(V, DL, 0, &AC, nullptr, &DT);
if (Known.getBitWidth() != BitWidth)
Known = Known.zextOrTrunc(BitWidth);
// ValueTracking may be able to compute a tighter result for the number of
// sign bits than for the value of those sign bits.
- unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);
+ unsigned NS = ComputeNumSignBits(V, DL, 0, &AC, nullptr, &DT);
if (U->getType()->isPointerTy()) {
// If the pointer size is larger than the index size type, this can cause
// NS to be larger than BitWidth. So compensate for this.
@@ -6859,8 +6809,36 @@ const ConstantRange &ScalarEvolution::getRangeRef(
APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1),
RangeType);
+ if (U->getType()->isPointerTy() && SignHint == HINT_RANGE_UNSIGNED) {
+ // Strengthen the range if the underlying IR value is a
+ // global/alloca/heap allocation using the size of the object.
+ ObjectSizeOpts Opts;
+ Opts.RoundToAlign = false;
+ Opts.NullIsUnknownSize = true;
+ uint64_t ObjSize;
+ if ((isa<GlobalVariable>(V) || isa<AllocaInst>(V) ||
+ isAllocationFn(V, &TLI)) &&
+ getObjectSize(V, ObjSize, DL, &TLI, Opts) && ObjSize > 1) {
+ // The highest address the object can start is ObjSize bytes before the
+ // end (unsigned max value). If this value is not a multiple of the
+ // alignment, the last possible start value is the next lowest multiple
+ // of the alignment. Note: The computations below cannot overflow,
+ // because if they would there's no possible start address for the
+ // object.
+ APInt MaxVal = APInt::getMaxValue(BitWidth) - APInt(BitWidth, ObjSize);
+ uint64_t Align = U->getValue()->getPointerAlignment(DL).value();
+ uint64_t Rem = MaxVal.urem(Align);
+ MaxVal -= APInt(BitWidth, Rem);
+ APInt MinVal = APInt::getZero(BitWidth);
+ if (llvm::isKnownNonZero(V, DL))
+ MinVal = Align;
+ ConservativeResult = ConservativeResult.intersectWith(
+ {MinVal, MaxVal + 1}, RangeType);
+ }
+ }
+
// A range of Phi is a subset of union of all ranges of its input.
- if (PHINode *Phi = dyn_cast<PHINode>(U->getValue())) {
+ if (PHINode *Phi = dyn_cast<PHINode>(V)) {
// Make sure that we do not run over cycled Phis.
if (PendingPhiRanges.insert(Phi).second) {
ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false);
@@ -6881,7 +6859,7 @@ const ConstantRange &ScalarEvolution::getRangeRef(
}
// vscale can't be equal to zero
- if (const auto *II = dyn_cast<IntrinsicInst>(U->getValue()))
+ if (const auto *II = dyn_cast<IntrinsicInst>(V))
if (II->getIntrinsicID() == Intrinsic::vscale) {
ConstantRange Disallowed = APInt::getZero(BitWidth);
ConservativeResult = ConservativeResult.difference(Disallowed);
@@ -6903,7 +6881,10 @@ const ConstantRange &ScalarEvolution::getRangeRef(
static ConstantRange getRangeForAffineARHelper(APInt Step,
const ConstantRange &StartRange,
const APInt &MaxBECount,
- unsigned BitWidth, bool Signed) {
+ bool Signed) {
+ unsigned BitWidth = Step.getBitWidth();
+ assert(BitWidth == StartRange.getBitWidth() &&
+ BitWidth == MaxBECount.getBitWidth() && "mismatched bit widths");
// If either Step or MaxBECount is 0, then the expression won't change, and we
// just need to return the initial range.
if (Step == 0 || MaxBECount == 0)
@@ -6962,14 +6943,11 @@ static ConstantRange getRangeForAffineARHelper(APInt Step,
ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
const SCEV *Step,
- const SCEV *MaxBECount,
- unsigned BitWidth) {
- assert(!isa<SCEVCouldNotCompute>(MaxBECount) &&
- getTypeSizeInBits(MaxBECount->getType()) <= BitWidth &&
- "Precondition!");
-
- MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType());
- APInt MaxBECountValue = getUnsignedRangeMax(MaxBECount);
+ const APInt &MaxBECount) {
+ assert(getTypeSizeInBits(Start->getType()) ==
+ getTypeSizeInBits(Step->getType()) &&
+ getTypeSizeInBits(Start->getType()) == MaxBECount.getBitWidth() &&
+ "mismatched bit widths");
// First, consider step signed.
ConstantRange StartSRange = getSignedRange(Start);
@@ -6977,17 +6955,16 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
// If Step can be both positive and negative, we need to find ranges for the
// maximum absolute step values in both directions and union them.
- ConstantRange SR =
- getRangeForAffineARHelper(StepSRange.getSignedMin(), StartSRange,
- MaxBECountValue, BitWidth, /* Signed = */ true);
+ ConstantRange SR = getRangeForAffineARHelper(
+ StepSRange.getSignedMin(), StartSRange, MaxBECount, /* Signed = */ true);
SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(),
- StartSRange, MaxBECountValue,
- BitWidth, /* Signed = */ true));
+ StartSRange, MaxBECount,
+ /* Signed = */ true));
// Next, consider step unsigned.
ConstantRange UR = getRangeForAffineARHelper(
- getUnsignedRangeMax(Step), getUnsignedRange(Start),
- MaxBECountValue, BitWidth, /* Signed = */ false);
+ getUnsignedRangeMax(Step), getUnsignedRange(Start), MaxBECount,
+ /* Signed = */ false);
// Finally, intersect signed and unsigned ranges.
return SR.intersectWith(UR, ConstantRange::Smallest);
@@ -7038,7 +7015,7 @@ ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
// outside and inside the range [Min(Start, End), Max(Start, End)]. Using that
// knowledge, let's try to prove that we are dealing with Case 1. It is so if
// Start <= End and step is positive, or Start >= End and step is negative.
- const SCEV *Start = AddRec->getStart();
+ const SCEV *Start = applyLoopGuards(AddRec->getStart(), AddRec->getLoop());
ConstantRange StartRange = getRangeRef(Start, SignHint);
ConstantRange EndRange = getRangeRef(End, SignHint);
ConstantRange RangeBetween = StartRange.unionWith(EndRange);
@@ -7055,7 +7032,7 @@ ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
if (isKnownPositive(Step) &&
isKnownPredicateViaConstantRanges(LEPred, Start, End))
return RangeBetween;
- else if (isKnownNegative(Step) &&
+ if (isKnownNegative(Step) &&
isKnownPredicateViaConstantRanges(GEPred, Start, End))
return RangeBetween;
return ConstantRange::getFull(BitWidth);
@@ -7063,11 +7040,15 @@ ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR(
ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
const SCEV *Step,
- const SCEV *MaxBECount,
- unsigned BitWidth) {
+ const APInt &MaxBECount) {
// RangeOf({C?A:B,+,C?P:Q}) == RangeOf(C?{A,+,P}:{B,+,Q})
// == RangeOf({A,+,P}) union RangeOf({B,+,Q})
+ unsigned BitWidth = MaxBECount.getBitWidth();
+ assert(getTypeSizeInBits(Start->getType()) == BitWidth &&
+ getTypeSizeInBits(Step->getType()) == BitWidth &&
+ "mismatched bit widths");
+
struct SelectPattern {
Value *Condition = nullptr;
APInt TrueValue;
@@ -7169,9 +7150,9 @@ ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start,
const SCEV *FalseStep = this->getConstant(StepPattern.FalseValue);
ConstantRange TrueRange =
- this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount, BitWidth);
+ this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount);
ConstantRange FalseRange =
- this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount, BitWidth);
+ this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount);
return TrueRange.unionWith(FalseRange);
}
@@ -7294,62 +7275,43 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
if (isSCEVExprNeverPoison(I))
return true;
- // For an add recurrence specifically, we assume that infinite loops without
- // side effects are undefined behavior, and then reason as follows:
+ // If the loop only has one exit, then we know that, if the loop is entered,
+ // any instruction dominating that exit will be executed. If any such
+ // instruction would result in UB, the addrec cannot be poison.
//
- // If the add recurrence is poison in any iteration, it is poison on all
- // future iterations (since incrementing poison yields poison). If the result
- // of the add recurrence is fed into the loop latch condition and the loop
- // does not contain any throws or exiting blocks other than the latch, we now
- // have the ability to "choose" whether the backedge is taken or not (by
- // choosing a sufficiently evil value for the poison feeding into the branch)
- // for every iteration including and after the one in which \p I first became
- // poison. There are two possibilities (let's call the iteration in which \p
- // I first became poison as K):
- //
- // 1. In the set of iterations including and after K, the loop body executes
- // no side effects. In this case executing the backege an infinte number
- // of times will yield undefined behavior.
- //
- // 2. In the set of iterations including and after K, the loop body executes
- // at least one side effect. In this case, that specific instance of side
- // effect is control dependent on poison, which also yields undefined
- // behavior.
+ // This is basically the same reasoning as in isSCEVExprNeverPoison(), but
+ // also handles uses outside the loop header (they just need to dominate the
+ // single exit).
auto *ExitingBB = L->getExitingBlock();
- auto *LatchBB = L->getLoopLatch();
- if (!ExitingBB || !LatchBB || ExitingBB != LatchBB)
+ if (!ExitingBB || !loopHasNoAbnormalExits(L))
return false;
- SmallPtrSet<const Instruction *, 16> Pushed;
- SmallVector<const Instruction *, 8> PoisonStack;
+ SmallPtrSet<const Value *, 16> KnownPoison;
+ SmallVector<const Instruction *, 8> Worklist;
// We start by assuming \c I, the post-inc add recurrence, is poison. Only
// things that are known to be poison under that assumption go on the
- // PoisonStack.
- Pushed.insert(I);
- PoisonStack.push_back(I);
+ // Worklist.
+ KnownPoison.insert(I);
+ Worklist.push_back(I);
- bool LatchControlDependentOnPoison = false;
- while (!PoisonStack.empty() && !LatchControlDependentOnPoison) {
- const Instruction *Poison = PoisonStack.pop_back_val();
+ while (!Worklist.empty()) {
+ const Instruction *Poison = Worklist.pop_back_val();
for (const Use &U : Poison->uses()) {
- const User *PoisonUser = U.getUser();
- if (propagatesPoison(U)) {
- if (Pushed.insert(cast<Instruction>(PoisonUser)).second)
- PoisonStack.push_back(cast<Instruction>(PoisonUser));
- } else if (auto *BI = dyn_cast<BranchInst>(PoisonUser)) {
- assert(BI->isConditional() && "Only possibility!");
- if (BI->getParent() == LatchBB) {
- LatchControlDependentOnPoison = true;
- break;
- }
- }
+ const Instruction *PoisonUser = cast<Instruction>(U.getUser());
+ if (mustTriggerUB(PoisonUser, KnownPoison) &&
+ DT.dominates(PoisonUser->getParent(), ExitingBB))
+ return true;
+
+ if (propagatesPoison(U) && L->contains(PoisonUser))
+ if (KnownPoison.insert(PoisonUser).second)
+ Worklist.push_back(PoisonUser);
}
}
- return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);
+ return false;
}
ScalarEvolution::LoopProperties
@@ -7448,13 +7410,9 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) {
return getUnknown(PoisonValue::get(V->getType()));
} else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
return getConstant(CI);
- else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
- if (!GA->isInterposable()) {
- Ops.push_back(GA->getAliasee());
- return nullptr;
- }
+ else if (isa<GlobalAlias>(V))
return getUnknown(V);
- } else if (!isa<ConstantExpr>(V))
+ else if (!isa<ConstantExpr>(V))
return getUnknown(V);
Operator *U = cast<Operator>(V);
@@ -7478,18 +7436,18 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) {
auto NewBO = MatchBinaryOp(BO->LHS, getDataLayout(), AC, DT,
dyn_cast<Instruction>(V));
if (!NewBO ||
- (U->getOpcode() == Instruction::Add &&
+ (BO->Opcode == Instruction::Add &&
(NewBO->Opcode != Instruction::Add &&
NewBO->Opcode != Instruction::Sub)) ||
- (U->getOpcode() == Instruction::Mul &&
+ (BO->Opcode == Instruction::Mul &&
NewBO->Opcode != Instruction::Mul)) {
Ops.push_back(BO->LHS);
break;
}
// CreateSCEV calls getNoWrapFlagsFromUB, which under certain conditions
// requires a SCEV for the LHS.
- if (NewBO->Op && (NewBO->IsNSW || NewBO->IsNUW)) {
- auto *I = dyn_cast<Instruction>(NewBO->Op);
+ if (BO->Op && (BO->IsNSW || BO->IsNUW)) {
+ auto *I = dyn_cast<Instruction>(BO->Op);
if (I && programUndefinedIfPoison(I)) {
Ops.push_back(BO->LHS);
break;
@@ -7511,7 +7469,7 @@ ScalarEvolution::getOperandsToCreate(Value *V, SmallVectorImpl<Value *> &Ops) {
break;
case Instruction::And:
case Instruction::Or:
- if (!IsConstArg && BO->LHS->getType()->isIntegerTy(1))
+ if (!IsConstArg && !BO->LHS->getType()->isIntegerTy(1))
return nullptr;
break;
case Instruction::LShr:
@@ -7638,8 +7596,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
return getUnknown(PoisonValue::get(V->getType()));
} else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
return getConstant(CI);
- else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))
- return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee());
+ else if (isa<GlobalAlias>(V))
+ return getUnknown(V);
else if (!isa<ConstantExpr>(V))
return getUnknown(V);
@@ -7762,8 +7720,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// constants, obscuring what would otherwise be a low-bits mask.
// Use computeKnownBits to compute what ShrinkDemandedConstant
// knew about to reconstruct a low-bits mask value.
- unsigned LZ = A.countLeadingZeros();
- unsigned TZ = A.countTrailingZeros();
+ unsigned LZ = A.countl_zero();
+ unsigned TZ = A.countr_zero();
unsigned BitWidth = A.getBitWidth();
KnownBits Known(BitWidth);
computeKnownBits(BO->LHS, Known, getDataLayout(),
@@ -7778,7 +7736,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
if (auto *LHSMul = dyn_cast<SCEVMulExpr>(LHS)) {
if (auto *OpC = dyn_cast<SCEVConstant>(LHSMul->getOperand(0))) {
// For an expression like (x * 8) & 8, simplify the multiply.
- unsigned MulZeros = OpC->getAPInt().countTrailingZeros();
+ unsigned MulZeros = OpC->getAPInt().countr_zero();
unsigned GCD = std::min(MulZeros, TZ);
APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD);
SmallVector<const SCEV*, 4> MulOps;
@@ -8057,6 +8015,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// A start_loop_iterations or llvm.annotation or llvm.prt.annotation is
// just eqivalent to the first operand for SCEV purposes.
return getSCEV(II->getArgOperand(0));
+ case Intrinsic::vscale:
+ return getVScale(II->getType());
default:
break;
}
@@ -8071,21 +8031,45 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
// Iteration Count Computation Code
//
-const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount,
- bool Extend) {
+const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount) {
if (isa<SCEVCouldNotCompute>(ExitCount))
return getCouldNotCompute();
auto *ExitCountType = ExitCount->getType();
assert(ExitCountType->isIntegerTy());
+ auto *EvalTy = Type::getIntNTy(ExitCountType->getContext(),
+ 1 + ExitCountType->getScalarSizeInBits());
+ return getTripCountFromExitCount(ExitCount, EvalTy, nullptr);
+}
+
+const SCEV *ScalarEvolution::getTripCountFromExitCount(const SCEV *ExitCount,
+ Type *EvalTy,
+ const Loop *L) {
+ if (isa<SCEVCouldNotCompute>(ExitCount))
+ return getCouldNotCompute();
+
+ unsigned ExitCountSize = getTypeSizeInBits(ExitCount->getType());
+ unsigned EvalSize = EvalTy->getPrimitiveSizeInBits();
- if (!Extend)
- return getAddExpr(ExitCount, getOne(ExitCountType));
+ auto CanAddOneWithoutOverflow = [&]() {
+ ConstantRange ExitCountRange =
+ getRangeRef(ExitCount, RangeSignHint::HINT_RANGE_UNSIGNED);
+ if (!ExitCountRange.contains(APInt::getMaxValue(ExitCountSize)))
+ return true;
+
+ return L && isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
+ getMinusOne(ExitCount->getType()));
+ };
+
+ // If we need to zero extend the backedge count, check if we can add one to
+ // it prior to zero extending without overflow. Provided this is safe, it
+ // allows better simplification of the +1.
+ if (EvalSize > ExitCountSize && CanAddOneWithoutOverflow())
+ return getZeroExtendExpr(
+ getAddExpr(ExitCount, getOne(ExitCount->getType())), EvalTy);
- auto *WiderType = Type::getIntNTy(ExitCountType->getContext(),
- 1 + ExitCountType->getScalarSizeInBits());
- return getAddExpr(getNoopOrZeroExtend(ExitCount, WiderType),
- getOne(WiderType));
+ // Get the total trip count from the count by adding 1. This may wrap.
+ return getAddExpr(getTruncateOrZeroExtend(ExitCount, EvalTy), getOne(EvalTy));
}
static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
@@ -8124,126 +8108,6 @@ unsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) {
return getConstantTripCount(MaxExitCount);
}
-const SCEV *ScalarEvolution::getConstantMaxTripCountFromArray(const Loop *L) {
- // We can't infer from Array in Irregular Loop.
- // FIXME: It's hard to infer loop bound from array operated in Nested Loop.
- if (!L->isLoopSimplifyForm() || !L->isInnermost())
- return getCouldNotCompute();
-
- // FIXME: To make the scene more typical, we only analysis loops that have
- // one exiting block and that block must be the latch. To make it easier to
- // capture loops that have memory access and memory access will be executed
- // in each iteration.
- const BasicBlock *LoopLatch = L->getLoopLatch();
- assert(LoopLatch && "See defination of simplify form loop.");
- if (L->getExitingBlock() != LoopLatch)
- return getCouldNotCompute();
-
- const DataLayout &DL = getDataLayout();
- SmallVector<const SCEV *> InferCountColl;
- for (auto *BB : L->getBlocks()) {
- // Go here, we can know that Loop is a single exiting and simplified form
- // loop. Make sure that infer from Memory Operation in those BBs must be
- // executed in loop. First step, we can make sure that max execution time
- // of MemAccessBB in loop represents latch max excution time.
- // If MemAccessBB does not dom Latch, skip.
- // Entry
- // │
- // ┌─────▼─────┐
- // │Loop Header◄─────┐
- // └──┬──────┬─┘ │
- // │ │ │
- // ┌────────▼──┐ ┌─▼─────┐ │
- // │MemAccessBB│ │OtherBB│ │
- // └────────┬──┘ └─┬─────┘ │
- // │ │ │
- // ┌─▼──────▼─┐ │
- // │Loop Latch├─────┘
- // └────┬─────┘
- // ▼
- // Exit
- if (!DT.dominates(BB, LoopLatch))
- continue;
-
- for (Instruction &Inst : *BB) {
- // Find Memory Operation Instruction.
- auto *GEP = getLoadStorePointerOperand(&Inst);
- if (!GEP)
- continue;
-
- auto *ElemSize = dyn_cast<SCEVConstant>(getElementSize(&Inst));
- // Do not infer from scalar type, eg."ElemSize = sizeof()".
- if (!ElemSize)
- continue;
-
- // Use a existing polynomial recurrence on the trip count.
- auto *AddRec = dyn_cast<SCEVAddRecExpr>(getSCEV(GEP));
- if (!AddRec)
- continue;
- auto *ArrBase = dyn_cast<SCEVUnknown>(getPointerBase(AddRec));
- auto *Step = dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*this));
- if (!ArrBase || !Step)
- continue;
- assert(isLoopInvariant(ArrBase, L) && "See addrec definition");
-
- // Only handle { %array + step },
- // FIXME: {(SCEVAddRecExpr) + step } could not be analysed here.
- if (AddRec->getStart() != ArrBase)
- continue;
-
- // Memory operation pattern which have gaps.
- // Or repeat memory opreation.
- // And index of GEP wraps arround.
- if (Step->getAPInt().getActiveBits() > 32 ||
- Step->getAPInt().getZExtValue() !=
- ElemSize->getAPInt().getZExtValue() ||
- Step->isZero() || Step->getAPInt().isNegative())
- continue;
-
- // Only infer from stack array which has certain size.
- // Make sure alloca instruction is not excuted in loop.
- AllocaInst *AllocateInst = dyn_cast<AllocaInst>(ArrBase->getValue());
- if (!AllocateInst || L->contains(AllocateInst->getParent()))
- continue;
-
- // Make sure only handle normal array.
- auto *Ty = dyn_cast<ArrayType>(AllocateInst->getAllocatedType());
- auto *ArrSize = dyn_cast<ConstantInt>(AllocateInst->getArraySize());
- if (!Ty || !ArrSize || !ArrSize->isOne())
- continue;
-
- // FIXME: Since gep indices are silently zext to the indexing type,
- // we will have a narrow gep index which wraps around rather than
- // increasing strictly, we shoule ensure that step is increasing
- // strictly by the loop iteration.
- // Now we can infer a max execution time by MemLength/StepLength.
- const SCEV *MemSize =
- getConstant(Step->getType(), DL.getTypeAllocSize(Ty));
- auto *MaxExeCount =
- dyn_cast<SCEVConstant>(getUDivCeilSCEV(MemSize, Step));
- if (!MaxExeCount || MaxExeCount->getAPInt().getActiveBits() > 32)
- continue;
-
- // If the loop reaches the maximum number of executions, we can not
- // access bytes starting outside the statically allocated size without
- // being immediate UB. But it is allowed to enter loop header one more
- // time.
- auto *InferCount = dyn_cast<SCEVConstant>(
- getAddExpr(MaxExeCount, getOne(MaxExeCount->getType())));
- // Discard the maximum number of execution times under 32bits.
- if (!InferCount || InferCount->getAPInt().getActiveBits() > 32)
- continue;
-
- InferCountColl.push_back(InferCount);
- }
- }
-
- if (InferCountColl.size() == 0)
- return getCouldNotCompute();
-
- return getUMinFromMismatchedTypes(InferCountColl);
-}
-
unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) {
SmallVector<BasicBlock *, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -8264,26 +8128,14 @@ unsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L,
return 1;
// Get the trip count
- const SCEV *TCExpr = getTripCountFromExitCount(ExitCount);
-
- const SCEVConstant *TC = dyn_cast<SCEVConstant>(TCExpr);
- if (!TC)
- // Attempt to factor more general cases. Returns the greatest power of
- // two divisor. If overflow happens, the trip count expression is still
- // divisible by the greatest power of 2 divisor returned.
- return 1U << std::min((uint32_t)31,
- GetMinTrailingZeros(applyLoopGuards(TCExpr, L)));
-
- ConstantInt *Result = TC->getValue();
-
- // Guard against huge trip counts (this requires checking
- // for zero to handle the case where the trip count == -1 and the
- // addition wraps).
- if (!Result || Result->getValue().getActiveBits() > 32 ||
- Result->getValue().getActiveBits() == 0)
- return 1;
+ const SCEV *TCExpr = getTripCountFromExitCount(applyLoopGuards(ExitCount, L));
- return (unsigned)Result->getZExtValue();
+ APInt Multiple = getNonZeroConstantMultiple(TCExpr);
+ // If a trip multiple is huge (>=2^32), the trip count is still divisible by
+ // the greatest power of 2 divisor less than 2^32.
+ return Multiple.getActiveBits() > 32
+ ? 1U << std::min((unsigned)31, Multiple.countTrailingZeros())
+ : (unsigned)Multiple.zextOrTrunc(32).getZExtValue();
}
/// Returns the largest constant divisor of the trip count of this loop as a
@@ -8391,23 +8243,6 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// must be cleared in this scope.
BackedgeTakenInfo Result = computeBackedgeTakenCount(L);
- // In product build, there are no usage of statistic.
- (void)NumTripCountsComputed;
- (void)NumTripCountsNotComputed;
-#if LLVM_ENABLE_STATS || !defined(NDEBUG)
- const SCEV *BEExact = Result.getExact(L, this);
- if (BEExact != getCouldNotCompute()) {
- assert(isLoopInvariant(BEExact, L) &&
- isLoopInvariant(Result.getConstantMax(this), L) &&
- "Computed backedge-taken count isn't loop invariant for loop!");
- ++NumTripCountsComputed;
- } else if (Result.getConstantMax(this) == getCouldNotCompute() &&
- isa<PHINode>(L->getHeader()->begin())) {
- // Only count loops that have phi nodes as not being computable.
- ++NumTripCountsNotComputed;
- }
-#endif // LLVM_ENABLE_STATS || !defined(NDEBUG)
-
// Now that we know more about the trip count for this loop, forget any
// existing SCEV values for PHI nodes in this loop since they are only
// conservative estimates made without the benefit of trip count
@@ -8454,11 +8289,32 @@ void ScalarEvolution::forgetAllLoops() {
SignedRanges.clear();
ExprValueMap.clear();
HasRecMap.clear();
- MinTrailingZerosCache.clear();
+ ConstantMultipleCache.clear();
PredicatedSCEVRewrites.clear();
FoldCache.clear();
FoldCacheUser.clear();
}
+void ScalarEvolution::visitAndClearUsers(
+ SmallVectorImpl<Instruction *> &Worklist,
+ SmallPtrSetImpl<Instruction *> &Visited,
+ SmallVectorImpl<const SCEV *> &ToForget) {
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.pop_back_val();
+ if (!isSCEVable(I->getType()))
+ continue;
+
+ ValueExprMapType::iterator It =
+ ValueExprMap.find_as(static_cast<Value *>(I));
+ if (It != ValueExprMap.end()) {
+ eraseValueFromMap(It->first);
+ ToForget.push_back(It->second);
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
+ }
+
+ PushDefUseChildren(I, Worklist, Visited);
+ }
+}
void ScalarEvolution::forgetLoop(const Loop *L) {
SmallVector<const Loop *, 16> LoopWorklist(1, L);
@@ -8492,21 +8348,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
// Drop information about expressions based on loop-header PHIs.
PushLoopPHIs(CurrL, Worklist, Visited);
-
- while (!Worklist.empty()) {
- Instruction *I = Worklist.pop_back_val();
-
- ValueExprMapType::iterator It =
- ValueExprMap.find_as(static_cast<Value *>(I));
- if (It != ValueExprMap.end()) {
- eraseValueFromMap(It->first);
- ToForget.push_back(It->second);
- if (PHINode *PN = dyn_cast<PHINode>(I))
- ConstantEvolutionLoopExitValue.erase(PN);
- }
-
- PushDefUseChildren(I, Worklist, Visited);
- }
+ visitAndClearUsers(Worklist, Visited, ToForget);
LoopPropertiesCache.erase(CurrL);
// Forget all contained loops too, to avoid dangling entries in the
@@ -8530,20 +8372,8 @@ void ScalarEvolution::forgetValue(Value *V) {
SmallVector<const SCEV *, 8> ToForget;
Worklist.push_back(I);
Visited.insert(I);
+ visitAndClearUsers(Worklist, Visited, ToForget);
- while (!Worklist.empty()) {
- I = Worklist.pop_back_val();
- ValueExprMapType::iterator It =
- ValueExprMap.find_as(static_cast<Value *>(I));
- if (It != ValueExprMap.end()) {
- eraseValueFromMap(It->first);
- ToForget.push_back(It->second);
- if (PHINode *PN = dyn_cast<PHINode>(I))
- ConstantEvolutionLoopExitValue.erase(PN);
- }
-
- PushDefUseChildren(I, Worklist, Visited);
- }
forgetMemoizedResults(ToForget);
}
@@ -8798,7 +8628,9 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
// 1. For each exit that can be computed, add an entry to ExitCounts.
// CouldComputeBECount is true only if all exits can be computed.
- if (EL.ExactNotTaken == getCouldNotCompute())
+ if (EL.ExactNotTaken != getCouldNotCompute())
+ ++NumExitCountsComputed;
+ else
// We couldn't compute an exact value for this exit, so
// we won't be able to compute an exact value for the loop.
CouldComputeBECount = false;
@@ -8806,9 +8638,11 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
// Exact always implies symbolic, only check symbolic.
if (EL.SymbolicMaxNotTaken != getCouldNotCompute())
ExitCounts.emplace_back(ExitBB, EL);
- else
+ else {
assert(EL.ExactNotTaken == getCouldNotCompute() &&
"Exact is known but symbolic isn't?");
+ ++NumExitCountsNotComputed;
+ }
// 2. Derive the loop's MaxBECount from each exit's max number of
// non-exiting iterations. Partition the loop exits into two kinds:
@@ -8878,9 +8712,9 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
assert(ExitIfTrue == L->contains(BI->getSuccessor(1)) &&
"It should have one successor in loop and one exit block!");
// Proceed to the next level to examine the exit condition expression.
- return computeExitLimitFromCond(
- L, BI->getCondition(), ExitIfTrue,
- /*ControlsExit=*/IsOnlyExit, AllowPredicates);
+ return computeExitLimitFromCond(L, BI->getCondition(), ExitIfTrue,
+ /*ControlsOnlyExit=*/IsOnlyExit,
+ AllowPredicates);
}
if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) {
@@ -8893,24 +8727,25 @@ ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
Exit = SBB;
}
assert(Exit && "Exiting block must have at least one exit");
- return computeExitLimitFromSingleExitSwitch(L, SI, Exit,
- /*ControlsExit=*/IsOnlyExit);
+ return computeExitLimitFromSingleExitSwitch(
+ L, SI, Exit,
+ /*ControlsOnlyExit=*/IsOnlyExit);
}
return getCouldNotCompute();
}
ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond(
- const Loop *L, Value *ExitCond, bool ExitIfTrue,
- bool ControlsExit, bool AllowPredicates) {
+ const Loop *L, Value *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit,
+ bool AllowPredicates) {
ScalarEvolution::ExitLimitCacheTy Cache(L, ExitIfTrue, AllowPredicates);
return computeExitLimitFromCondCached(Cache, L, ExitCond, ExitIfTrue,
- ControlsExit, AllowPredicates);
+ ControlsOnlyExit, AllowPredicates);
}
std::optional<ScalarEvolution::ExitLimit>
ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond,
- bool ExitIfTrue, bool ControlsExit,
+ bool ExitIfTrue, bool ControlsOnlyExit,
bool AllowPredicates) {
(void)this->L;
(void)this->ExitIfTrue;
@@ -8919,7 +8754,7 @@ ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond,
assert(this->L == L && this->ExitIfTrue == ExitIfTrue &&
this->AllowPredicates == AllowPredicates &&
"Variance in assumed invariant key components!");
- auto Itr = TripCountMap.find({ExitCond, ControlsExit});
+ auto Itr = TripCountMap.find({ExitCond, ControlsOnlyExit});
if (Itr == TripCountMap.end())
return std::nullopt;
return Itr->second;
@@ -8927,14 +8762,14 @@ ScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond,
void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond,
bool ExitIfTrue,
- bool ControlsExit,
+ bool ControlsOnlyExit,
bool AllowPredicates,
const ExitLimit &EL) {
assert(this->L == L && this->ExitIfTrue == ExitIfTrue &&
this->AllowPredicates == AllowPredicates &&
"Variance in assumed invariant key components!");
- auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL});
+ auto InsertResult = TripCountMap.insert({{ExitCond, ControlsOnlyExit}, EL});
assert(InsertResult.second && "Expected successful insertion!");
(void)InsertResult;
(void)ExitIfTrue;
@@ -8942,36 +8777,37 @@ void ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond,
ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached(
ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
- bool ControlsExit, bool AllowPredicates) {
+ bool ControlsOnlyExit, bool AllowPredicates) {
- if (auto MaybeEL =
- Cache.find(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates))
+ if (auto MaybeEL = Cache.find(L, ExitCond, ExitIfTrue, ControlsOnlyExit,
+ AllowPredicates))
return *MaybeEL;
- ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, ExitIfTrue,
- ControlsExit, AllowPredicates);
- Cache.insert(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates, EL);
+ ExitLimit EL = computeExitLimitFromCondImpl(
+ Cache, L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates);
+ Cache.insert(L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates, EL);
return EL;
}
ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
- bool ControlsExit, bool AllowPredicates) {
+ bool ControlsOnlyExit, bool AllowPredicates) {
// Handle BinOp conditions (And, Or).
if (auto LimitFromBinOp = computeExitLimitFromCondFromBinOp(
- Cache, L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates))
+ Cache, L, ExitCond, ExitIfTrue, ControlsOnlyExit, AllowPredicates))
return *LimitFromBinOp;
// With an icmp, it may be feasible to compute an exact backedge-taken count.
// Proceed to the next level to examine the icmp.
if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) {
ExitLimit EL =
- computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit);
+ computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsOnlyExit);
if (EL.hasFullInfo() || !AllowPredicates)
return EL;
// Try again, but use SCEV predicates this time.
- return computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit,
+ return computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue,
+ ControlsOnlyExit,
/*AllowPredicates=*/true);
}
@@ -8983,9 +8819,8 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
if (ExitIfTrue == !CI->getZExtValue())
// The backedge is always taken.
return getCouldNotCompute();
- else
- // The backedge is never taken.
- return getZero(CI->getType());
+ // The backedge is never taken.
+ return getZero(CI->getType());
}
// If we're exiting based on the overflow flag of an x.with.overflow intrinsic
@@ -9007,8 +8842,9 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
if (Offset != 0)
LHS = getAddExpr(LHS, getConstant(Offset));
auto EL = computeExitLimitFromICmp(L, Pred, LHS, getConstant(NewRHSC),
- ControlsExit, AllowPredicates);
- if (EL.hasAnyInfo()) return EL;
+ ControlsOnlyExit, AllowPredicates);
+ if (EL.hasAnyInfo())
+ return EL;
}
// If it's not an integer or pointer comparison then compute it the hard way.
@@ -9018,7 +8854,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl(
std::optional<ScalarEvolution::ExitLimit>
ScalarEvolution::computeExitLimitFromCondFromBinOp(
ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue,
- bool ControlsExit, bool AllowPredicates) {
+ bool ControlsOnlyExit, bool AllowPredicates) {
// Check if the controlling expression for this loop is an And or Or.
Value *Op0, *Op1;
bool IsAnd = false;
@@ -9033,12 +8869,12 @@ ScalarEvolution::computeExitLimitFromCondFromBinOp(
// br (and Op0 Op1), loop, exit
// br (or Op0 Op1), exit, loop
bool EitherMayExit = IsAnd ^ ExitIfTrue;
- ExitLimit EL0 = computeExitLimitFromCondCached(Cache, L, Op0, ExitIfTrue,
- ControlsExit && !EitherMayExit,
- AllowPredicates);
- ExitLimit EL1 = computeExitLimitFromCondCached(Cache, L, Op1, ExitIfTrue,
- ControlsExit && !EitherMayExit,
- AllowPredicates);
+ ExitLimit EL0 = computeExitLimitFromCondCached(
+ Cache, L, Op0, ExitIfTrue, ControlsOnlyExit && !EitherMayExit,
+ AllowPredicates);
+ ExitLimit EL1 = computeExitLimitFromCondCached(
+ Cache, L, Op1, ExitIfTrue, ControlsOnlyExit && !EitherMayExit,
+ AllowPredicates);
// Be robust against unsimplified IR for the form "op i1 X, NeutralElement"
const Constant *NeutralElement = ConstantInt::get(ExitCond->getType(), IsAnd);
@@ -9096,12 +8932,9 @@ ScalarEvolution::computeExitLimitFromCondFromBinOp(
{ &EL0.Predicates, &EL1.Predicates });
}
-ScalarEvolution::ExitLimit
-ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
- ICmpInst *ExitCond,
- bool ExitIfTrue,
- bool ControlsExit,
- bool AllowPredicates) {
+ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp(
+ const Loop *L, ICmpInst *ExitCond, bool ExitIfTrue, bool ControlsOnlyExit,
+ bool AllowPredicates) {
// If the condition was exit on true, convert the condition to exit on false
ICmpInst::Predicate Pred;
if (!ExitIfTrue)
@@ -9113,9 +8946,10 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
const SCEV *RHS = getSCEV(ExitCond->getOperand(1));
- ExitLimit EL = computeExitLimitFromICmp(L, Pred, LHS, RHS, ControlsExit,
+ ExitLimit EL = computeExitLimitFromICmp(L, Pred, LHS, RHS, ControlsOnlyExit,
AllowPredicates);
- if (EL.hasAnyInfo()) return EL;
+ if (EL.hasAnyInfo())
+ return EL;
auto *ExhaustiveCount =
computeExitCountExhaustively(L, ExitCond, ExitIfTrue);
@@ -9126,12 +8960,9 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
return computeShiftCompareExitLimit(ExitCond->getOperand(0),
ExitCond->getOperand(1), L, OriginalPred);
}
-ScalarEvolution::ExitLimit
-ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
- ICmpInst::Predicate Pred,
- const SCEV *LHS, const SCEV *RHS,
- bool ControlsExit,
- bool AllowPredicates) {
+ScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromICmp(
+ const Loop *L, ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
+ bool ControlsOnlyExit, bool AllowPredicates) {
// Try to evaluate any dependencies out of the loop.
LHS = getSCEVAtScope(LHS, L);
@@ -9145,12 +8976,10 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
Pred = ICmpInst::getSwappedPredicate(Pred);
}
- bool ControllingFiniteLoop =
- ControlsExit && loopHasNoAbnormalExits(L) && loopIsFiniteByAssumption(L);
+ bool ControllingFiniteLoop = ControlsOnlyExit && loopHasNoAbnormalExits(L) &&
+ loopIsFiniteByAssumption(L);
// Simplify the operands before analyzing them.
- (void)SimplifyICmpOperands(Pred, LHS, RHS, /*Depth=*/0,
- (EnableFiniteLoopControl ? ControllingFiniteLoop
- : false));
+ (void)SimplifyICmpOperands(Pred, LHS, RHS, /*Depth=*/0);
// If we have a comparison of a chrec against a constant, try to use value
// ranges to answer this query.
@@ -9202,9 +9031,10 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
if (isa<SCEVCouldNotCompute>(RHS))
return RHS;
}
- ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit,
+ ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsOnlyExit,
AllowPredicates);
- if (EL.hasAnyInfo()) return EL;
+ if (EL.hasAnyInfo())
+ return EL;
break;
}
case ICmpInst::ICMP_EQ: { // while (X == Y)
@@ -9223,21 +9053,40 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
if (EL.hasAnyInfo()) return EL;
break;
}
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_ULE:
+ // Since the loop is finite, an invariant RHS cannot include the boundary
+ // value, otherwise it would loop forever.
+ if (!EnableFiniteLoopControl || !ControllingFiniteLoop ||
+ !isLoopInvariant(RHS, L))
+ break;
+ RHS = getAddExpr(getOne(RHS->getType()), RHS);
+ [[fallthrough]];
case ICmpInst::ICMP_SLT:
- case ICmpInst::ICMP_ULT: { // while (X < Y)
- bool IsSigned = Pred == ICmpInst::ICMP_SLT;
- ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit,
+ case ICmpInst::ICMP_ULT: { // while (X < Y)
+ bool IsSigned = ICmpInst::isSigned(Pred);
+ ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsOnlyExit,
AllowPredicates);
- if (EL.hasAnyInfo()) return EL;
+ if (EL.hasAnyInfo())
+ return EL;
break;
}
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGE:
+ // Since the loop is finite, an invariant RHS cannot include the boundary
+ // value, otherwise it would loop forever.
+ if (!EnableFiniteLoopControl || !ControllingFiniteLoop ||
+ !isLoopInvariant(RHS, L))
+ break;
+ RHS = getAddExpr(getMinusOne(RHS->getType()), RHS);
+ [[fallthrough]];
case ICmpInst::ICMP_SGT:
- case ICmpInst::ICMP_UGT: { // while (X > Y)
- bool IsSigned = Pred == ICmpInst::ICMP_SGT;
- ExitLimit EL =
- howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit,
- AllowPredicates);
- if (EL.hasAnyInfo()) return EL;
+ case ICmpInst::ICMP_UGT: { // while (X > Y)
+ bool IsSigned = ICmpInst::isSigned(Pred);
+ ExitLimit EL = howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsOnlyExit,
+ AllowPredicates);
+ if (EL.hasAnyInfo())
+ return EL;
break;
}
default:
@@ -9251,7 +9100,7 @@ ScalarEvolution::ExitLimit
ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,
SwitchInst *Switch,
BasicBlock *ExitingBlock,
- bool ControlsExit) {
+ bool ControlsOnlyExit) {
assert(!L->contains(ExitingBlock) && "Not an exiting block!");
// Give up if the exit is the default dest of a switch.
@@ -9264,7 +9113,7 @@ ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,
const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock));
// while (X != Y) --> while (X-Y != 0)
- ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit);
+ ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsOnlyExit);
if (EL.hasAnyInfo())
return EL;
@@ -9762,6 +9611,7 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
switch (V->getSCEVType()) {
case scCouldNotCompute:
case scAddRecExpr:
+ case scVScale:
return nullptr;
case scConstant:
return cast<SCEVConstant>(V)->getValue();
@@ -9842,9 +9692,46 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
llvm_unreachable("Unknown SCEV kind!");
}
+const SCEV *
+ScalarEvolution::getWithOperands(const SCEV *S,
+ SmallVectorImpl<const SCEV *> &NewOps) {
+ switch (S->getSCEVType()) {
+ case scTruncate:
+ case scZeroExtend:
+ case scSignExtend:
+ case scPtrToInt:
+ return getCastExpr(S->getSCEVType(), NewOps[0], S->getType());
+ case scAddRecExpr: {
+ auto *AddRec = cast<SCEVAddRecExpr>(S);
+ return getAddRecExpr(NewOps, AddRec->getLoop(), AddRec->getNoWrapFlags());
+ }
+ case scAddExpr:
+ return getAddExpr(NewOps, cast<SCEVAddExpr>(S)->getNoWrapFlags());
+ case scMulExpr:
+ return getMulExpr(NewOps, cast<SCEVMulExpr>(S)->getNoWrapFlags());
+ case scUDivExpr:
+ return getUDivExpr(NewOps[0], NewOps[1]);
+ case scUMaxExpr:
+ case scSMaxExpr:
+ case scUMinExpr:
+ case scSMinExpr:
+ return getMinMaxExpr(S->getSCEVType(), NewOps);
+ case scSequentialUMinExpr:
+ return getSequentialMinMaxExpr(S->getSCEVType(), NewOps);
+ case scConstant:
+ case scVScale:
+ case scUnknown:
+ return S;
+ case scCouldNotCompute:
+ llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+ }
+ llvm_unreachable("Unknown SCEV kind!");
+}
+
const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
switch (V->getSCEVType()) {
case scConstant:
+ case scVScale:
return V;
case scAddRecExpr: {
// If this is a loop recurrence for a loop that does not contain L, then we
@@ -9923,32 +9810,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
NewOps.push_back(OpAtScope);
}
- switch (V->getSCEVType()) {
- case scTruncate:
- case scZeroExtend:
- case scSignExtend:
- case scPtrToInt:
- return getCastExpr(V->getSCEVType(), NewOps[0], V->getType());
- case scAddExpr:
- return getAddExpr(NewOps, cast<SCEVAddExpr>(V)->getNoWrapFlags());
- case scMulExpr:
- return getMulExpr(NewOps, cast<SCEVMulExpr>(V)->getNoWrapFlags());
- case scUDivExpr:
- return getUDivExpr(NewOps[0], NewOps[1]);
- case scUMaxExpr:
- case scSMaxExpr:
- case scUMinExpr:
- case scSMinExpr:
- return getMinMaxExpr(V->getSCEVType(), NewOps);
- case scSequentialUMinExpr:
- return getSequentialMinMaxExpr(V->getSCEVType(), NewOps);
- case scConstant:
- case scAddRecExpr:
- case scUnknown:
- case scCouldNotCompute:
- llvm_unreachable("Can not get those expressions here.");
- }
- llvm_unreachable("Unknown n-ary-like SCEV type!");
+ return getWithOperands(V, NewOps);
}
}
// If we got here, all operands are loop invariant.
@@ -10012,17 +9874,6 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
return getSCEV(RV);
}
}
-
- // If there is a single-input Phi, evaluate it at our scope. If we can
- // prove that this replacement does not break LCSSA form, use new value.
- if (PN->getNumOperands() == 1) {
- const SCEV *Input = getSCEV(PN->getOperand(0));
- const SCEV *InputAtScope = getSCEVAtScope(Input, L);
- // TODO: We can generalize it using LI.replacementPreservesLCSSAForm,
- // for the simplest case just support constants.
- if (isa<SCEVConstant>(InputAtScope))
- return InputAtScope;
- }
}
// Okay, this is an expression that we cannot symbolically evaluate
@@ -10108,14 +9959,14 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B,
//
// The gcd of A and N may have only one prime factor: 2. The number of
// trailing zeros in A is its multiplicity
- uint32_t Mult2 = A.countTrailingZeros();
+ uint32_t Mult2 = A.countr_zero();
// D = 2^Mult2
// 2. Check if B is divisible by D.
//
// B is divisible by D if and only if the multiplicity of prime factor 2 for B
// is not less than multiplicity of this prime factor for D.
- if (SE.GetMinTrailingZeros(B) < Mult2)
+ if (SE.getMinTrailingZeros(B) < Mult2)
return SE.getCouldNotCompute();
// 3. Compute I: the multiplicative inverse of (A / D) in arithmetic
@@ -10410,9 +10261,10 @@ SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec,
return TruncIfPossible(MinOptional(SL.first, SU.first), BitWidth);
}
-ScalarEvolution::ExitLimit
-ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
- bool AllowPredicates) {
+ScalarEvolution::ExitLimit ScalarEvolution::howFarToZero(const SCEV *V,
+ const Loop *L,
+ bool ControlsOnlyExit,
+ bool AllowPredicates) {
// This is only used for loops with a "x != y" exit test. The exit condition
// is now expressed as a single expression, V = x-y. So the exit test is
@@ -10521,7 +10373,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
// compute the backedge count. In this case, the step may not divide the
// distance, but we don't care because if the condition is "missed" the loop
// will have undefined behavior due to wrapping.
- if (ControlsExit && AddRec->hasNoSelfWrap() &&
+ if (ControlsOnlyExit && AddRec->hasNoSelfWrap() &&
loopHasNoAbnormalExits(AddRec->getLoop())) {
const SCEV *Exact =
getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
@@ -10616,8 +10468,7 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) {
bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
const SCEV *&LHS, const SCEV *&RHS,
- unsigned Depth,
- bool ControllingFiniteLoop) {
+ unsigned Depth) {
bool Changed = false;
// Simplifies ICMP to trivial true or false by turning it into '0 == 0' or
// '0 != 0'.
@@ -10638,8 +10489,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
LHSC->getValue(),
RHSC->getValue())->isNullValue())
return TrivialCase(false);
- else
- return TrivialCase(true);
+ return TrivialCase(true);
}
// Otherwise swap the operands to put the constant on the right.
std::swap(LHS, RHS);
@@ -10670,7 +10520,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA);
if (ExactCR.isFullSet())
return TrivialCase(true);
- else if (ExactCR.isEmptySet())
+ if (ExactCR.isEmptySet())
return TrivialCase(false);
APInt NewRHS;
@@ -10746,15 +10596,10 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
}
// If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by
- // adding or subtracting 1 from one of the operands. This can be done for
- // one of two reasons:
- // 1) The range of the RHS does not include the (signed/unsigned) boundaries
- // 2) The loop is finite, with this comparison controlling the exit. Since the
- // loop is finite, the bound cannot include the corresponding boundary
- // (otherwise it would loop forever).
+ // adding or subtracting 1 from one of the operands.
switch (Pred) {
case ICmpInst::ICMP_SLE:
- if (ControllingFiniteLoop || !getSignedRangeMax(RHS).isMaxSignedValue()) {
+ if (!getSignedRangeMax(RHS).isMaxSignedValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SLT;
@@ -10767,7 +10612,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
}
break;
case ICmpInst::ICMP_SGE:
- if (ControllingFiniteLoop || !getSignedRangeMin(RHS).isMinSignedValue()) {
+ if (!getSignedRangeMin(RHS).isMinSignedValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS,
SCEV::FlagNSW);
Pred = ICmpInst::ICMP_SGT;
@@ -10780,7 +10625,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
}
break;
case ICmpInst::ICMP_ULE:
- if (ControllingFiniteLoop || !getUnsignedRangeMax(RHS).isMaxValue()) {
+ if (!getUnsignedRangeMax(RHS).isMaxValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS,
SCEV::FlagNUW);
Pred = ICmpInst::ICMP_ULT;
@@ -10792,7 +10637,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
}
break;
case ICmpInst::ICMP_UGE:
- if (ControllingFiniteLoop || !getUnsignedRangeMin(RHS).isMinValue()) {
+ if (!getUnsignedRangeMin(RHS).isMinValue()) {
RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);
Pred = ICmpInst::ICMP_UGT;
Changed = true;
@@ -10812,8 +10657,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
// Recursively simplify until we either hit a recursion limit or nothing
// changes.
if (Changed)
- return SimplifyICmpOperands(Pred, LHS, RHS, Depth + 1,
- ControllingFiniteLoop);
+ return SimplifyICmpOperands(Pred, LHS, RHS, Depth + 1);
return Changed;
}
@@ -10921,7 +10765,7 @@ std::optional<bool> ScalarEvolution::evaluatePredicate(ICmpInst::Predicate Pred,
const SCEV *RHS) {
if (isKnownPredicate(Pred, LHS, RHS))
return true;
- else if (isKnownPredicate(ICmpInst::getInversePredicate(Pred), LHS, RHS))
+ if (isKnownPredicate(ICmpInst::getInversePredicate(Pred), LHS, RHS))
return false;
return std::nullopt;
}
@@ -10943,7 +10787,7 @@ ScalarEvolution::evaluatePredicateAt(ICmpInst::Predicate Pred, const SCEV *LHS,
if (isBasicBlockEntryGuardedByCond(CtxI->getParent(), Pred, LHS, RHS))
return true;
- else if (isBasicBlockEntryGuardedByCond(CtxI->getParent(),
+ if (isBasicBlockEntryGuardedByCond(CtxI->getParent(),
ICmpInst::getInversePredicate(Pred),
LHS, RHS))
return false;
@@ -11004,22 +10848,21 @@ ScalarEvolution::getMonotonicPredicateTypeImpl(const SCEVAddRecExpr *LHS,
if (!LHS->hasNoUnsignedWrap())
return std::nullopt;
return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
- } else {
- assert(ICmpInst::isSigned(Pred) &&
- "Relational predicate is either signed or unsigned!");
- if (!LHS->hasNoSignedWrap())
- return std::nullopt;
+ }
+ assert(ICmpInst::isSigned(Pred) &&
+ "Relational predicate is either signed or unsigned!");
+ if (!LHS->hasNoSignedWrap())
+ return std::nullopt;
- const SCEV *Step = LHS->getStepRecurrence(*this);
+ const SCEV *Step = LHS->getStepRecurrence(*this);
- if (isKnownNonNegative(Step))
- return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
+ if (isKnownNonNegative(Step))
+ return IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
- if (isKnownNonPositive(Step))
- return !IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
+ if (isKnownNonPositive(Step))
+ return !IsGreater ? MonotonicallyIncreasing : MonotonicallyDecreasing;
- return std::nullopt;
- }
+ return std::nullopt;
}
std::optional<ScalarEvolution::LoopInvariantPredicate>
@@ -11353,7 +11196,7 @@ bool ScalarEvolution::isImpliedViaGuard(const BasicBlock *BB,
ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS) {
// No need to even try if we know the module has no guards.
- if (AC.assumptions().empty())
+ if (!HasGuards)
return false;
return any_of(*BB, [&](const Instruction &I) {
@@ -11563,6 +11406,15 @@ bool ScalarEvolution::isBasicBlockEntryGuardedByCond(const BasicBlock *BB,
return true;
}
+ // Check conditions due to any @llvm.experimental.guard intrinsics.
+ auto *GuardDecl = F.getParent()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_guard));
+ if (GuardDecl)
+ for (const auto *GU : GuardDecl->users())
+ if (const auto *Guard = dyn_cast<IntrinsicInst>(GU))
+ if (Guard->getFunction() == BB->getParent() && DT.dominates(Guard, BB))
+ if (ProveViaCond(Guard->getArgOperand(0), false))
+ return true;
return false;
}
@@ -12731,7 +12583,7 @@ const SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start,
ScalarEvolution::ExitLimit
ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
const Loop *L, bool IsSigned,
- bool ControlsExit, bool AllowPredicates) {
+ bool ControlsOnlyExit, bool AllowPredicates) {
SmallPtrSet<const SCEVPredicate *, 4> Predicates;
const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
@@ -12759,7 +12611,7 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
if (!StrideC || !StrideC->getAPInt().isPowerOf2())
return false;
- if (!ControlsExit || !loopHasNoAbnormalExits(L))
+ if (!ControlsOnlyExit || !loopHasNoAbnormalExits(L))
return false;
return loopIsFiniteByAssumption(L);
@@ -12834,7 +12686,7 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
// implicit/exceptional) which causes the loop to execute before the
// exiting instruction we're analyzing would trigger UB.
auto WrapType = IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW;
- bool NoWrap = ControlsExit && IV->getNoWrapFlags(WrapType);
+ bool NoWrap = ControlsOnlyExit && IV->getNoWrapFlags(WrapType);
ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
const SCEV *Stride = IV->getStepRecurrence(*this);
@@ -13154,10 +13006,9 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
Predicates);
}
-ScalarEvolution::ExitLimit
-ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
- const Loop *L, bool IsSigned,
- bool ControlsExit, bool AllowPredicates) {
+ScalarEvolution::ExitLimit ScalarEvolution::howManyGreaterThans(
+ const SCEV *LHS, const SCEV *RHS, const Loop *L, bool IsSigned,
+ bool ControlsOnlyExit, bool AllowPredicates) {
SmallPtrSet<const SCEVPredicate *, 4> Predicates;
// We handle only IV > Invariant
if (!isLoopInvariant(RHS, L))
@@ -13175,7 +13026,7 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
return getCouldNotCompute();
auto WrapType = IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW;
- bool NoWrap = ControlsExit && IV->getNoWrapFlags(WrapType);
+ bool NoWrap = ControlsOnlyExit && IV->getNoWrapFlags(WrapType);
ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this));
@@ -13435,16 +13286,30 @@ ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI,
LoopInfo &LI)
: F(F), TLI(TLI), AC(AC), DT(DT), LI(LI),
CouldNotCompute(new SCEVCouldNotCompute()), ValuesAtScopes(64),
- LoopDispositions(64), BlockDispositions(64) {}
+ LoopDispositions(64), BlockDispositions(64) {
+ // To use guards for proving predicates, we need to scan every instruction in
+ // relevant basic blocks, and not just terminators. Doing this is a waste of
+ // time if the IR does not actually contain any calls to
+ // @llvm.experimental.guard, so do a quick check and remember this beforehand.
+ //
+ // This pessimizes the case where a pass that preserves ScalarEvolution wants
+ // to _add_ guards to the module when there weren't any before, and wants
+ // ScalarEvolution to optimize based on those guards. For now we prefer to be
+ // efficient in lieu of being smart in that rather obscure case.
+
+ auto *GuardDecl = F.getParent()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_guard));
+ HasGuards = GuardDecl && !GuardDecl->use_empty();
+}
ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
- : F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI),
- CouldNotCompute(std::move(Arg.CouldNotCompute)),
+ : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT),
+ LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
ValueExprMap(std::move(Arg.ValueExprMap)),
PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
PendingPhiRanges(std::move(Arg.PendingPhiRanges)),
PendingMerges(std::move(Arg.PendingMerges)),
- MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)),
+ ConstantMultipleCache(std::move(Arg.ConstantMultipleCache)),
BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
PredicatedBackedgeTakenCounts(
std::move(Arg.PredicatedBackedgeTakenCounts)),
@@ -13580,16 +13445,36 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
}
}
-static StringRef loopDispositionToStr(ScalarEvolution::LoopDisposition LD) {
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, ScalarEvolution::LoopDisposition LD) {
switch (LD) {
case ScalarEvolution::LoopVariant:
- return "Variant";
+ OS << "Variant";
+ break;
case ScalarEvolution::LoopInvariant:
- return "Invariant";
+ OS << "Invariant";
+ break;
case ScalarEvolution::LoopComputable:
- return "Computable";
+ OS << "Computable";
+ break;
}
- llvm_unreachable("Unknown ScalarEvolution::LoopDisposition kind!");
+ return OS;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, ScalarEvolution::BlockDisposition BD) {
+ switch (BD) {
+ case ScalarEvolution::DoesNotDominateBlock:
+ OS << "DoesNotDominate";
+ break;
+ case ScalarEvolution::DominatesBlock:
+ OS << "Dominates";
+ break;
+ case ScalarEvolution::ProperlyDominatesBlock:
+ OS << "ProperlyDominates";
+ break;
+ }
+ return OS;
+}
}
void ScalarEvolution::print(raw_ostream &OS) const {
@@ -13651,7 +13536,7 @@ void ScalarEvolution::print(raw_ostream &OS) const {
}
Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false);
- OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter));
+ OS << ": " << SE.getLoopDisposition(SV, Iter);
}
for (const auto *InnerL : depth_first(L)) {
@@ -13665,7 +13550,7 @@ void ScalarEvolution::print(raw_ostream &OS) const {
}
InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
- OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL));
+ OS << ": " << SE.getLoopDisposition(SV, InnerL);
}
OS << " }";
@@ -13705,6 +13590,7 @@ ScalarEvolution::LoopDisposition
ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
switch (S->getSCEVType()) {
case scConstant:
+ case scVScale:
return LoopInvariant;
case scAddRecExpr: {
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
@@ -13803,6 +13689,7 @@ ScalarEvolution::BlockDisposition
ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
switch (S->getSCEVType()) {
case scConstant:
+ case scVScale:
return ProperlyDominatesBlock;
case scAddRecExpr: {
// This uses a "dominates" query instead of "properly dominates" query
@@ -13917,7 +13804,7 @@ void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) {
UnsignedRanges.erase(S);
SignedRanges.erase(S);
HasRecMap.erase(S);
- MinTrailingZerosCache.erase(S);
+ ConstantMultipleCache.erase(S);
if (auto *AR = dyn_cast<SCEVAddRecExpr>(S)) {
UnsignedWrapViaInductionTried.erase(AR);
@@ -14249,9 +14136,8 @@ void ScalarEvolution::verify() const {
const auto RecomputedDisposition = SE2.getLoopDisposition(S, Loop);
if (CachedDisposition != RecomputedDisposition) {
dbgs() << "Cached disposition of " << *S << " for loop " << *Loop
- << " is incorrect: cached "
- << loopDispositionToStr(CachedDisposition) << ", actual "
- << loopDispositionToStr(RecomputedDisposition) << "\n";
+ << " is incorrect: cached " << CachedDisposition << ", actual "
+ << RecomputedDisposition << "\n";
std::abort();
}
}
@@ -14263,7 +14149,8 @@ void ScalarEvolution::verify() const {
const auto RecomputedDisposition = SE2.getBlockDisposition(S, BB);
if (CachedDisposition != RecomputedDisposition) {
dbgs() << "Cached disposition of " << *S << " for block %"
- << BB->getName() << " is incorrect! \n";
+ << BB->getName() << " is incorrect: cached " << CachedDisposition
+ << ", actual " << RecomputedDisposition << "\n";
std::abort();
}
}
@@ -14297,6 +14184,23 @@ void ScalarEvolution::verify() const {
}
}
}
+
+ // Verify that ConstantMultipleCache computations are correct. We check that
+ // cached multiples and recomputed multiples are multiples of each other to
+ // verify correctness. It is possible that a recomputed multiple is different
+ // from the cached multiple due to strengthened no wrap flags or changes in
+ // KnownBits computations.
+ for (auto [S, Multiple] : ConstantMultipleCache) {
+ APInt RecomputedMultiple = SE2.getConstantMultiple(S);
+ if ((Multiple != 0 && RecomputedMultiple != 0 &&
+ Multiple.urem(RecomputedMultiple) != 0 &&
+ RecomputedMultiple.urem(Multiple) != 0)) {
+ dbgs() << "Incorrect cached computation in ConstantMultipleCache for "
+ << *S << " : Computed " << RecomputedMultiple
+ << " but cache contains " << Multiple << "!\n";
+ std::abort();
+ }
+ }
}
bool ScalarEvolution::invalidate(
@@ -14315,10 +14219,11 @@ AnalysisKey ScalarEvolutionAnalysis::Key;
ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
FunctionAnalysisManager &AM) {
- return ScalarEvolution(F, AM.getResult<TargetLibraryAnalysis>(F),
- AM.getResult<AssumptionAnalysis>(F),
- AM.getResult<DominatorTreeAnalysis>(F),
- AM.getResult<LoopAnalysis>(F));
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ return ScalarEvolution(F, TLI, AC, DT, LI);
}
PreservedAnalyses
@@ -14603,8 +14508,7 @@ void SCEVComparePredicate::print(raw_ostream &OS, unsigned Depth) const {
if (Pred == ICmpInst::ICMP_EQ)
OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n";
else
- OS.indent(Depth) << "Compare predicate: " << *LHS
- << " " << CmpInst::getPredicateName(Pred) << ") "
+ OS.indent(Depth) << "Compare predicate: " << *LHS << " " << Pred << ") "
<< *RHS << "\n";
}
@@ -14933,9 +14837,6 @@ ScalarEvolution::computeSymbolicMaxBackedgeTakenCount(const Loop *L) {
/// A rewriter to replace SCEV expressions in Map with the corresponding entry
/// in the map. It skips AddRecExpr because we cannot guarantee that the
/// replacement is loop invariant in the loop of the AddRec.
-///
-/// At the moment only rewriting SCEVUnknown and SCEVZeroExtendExpr is
-/// supported.
class SCEVLoopGuardRewriter : public SCEVRewriteVisitor<SCEVLoopGuardRewriter> {
const DenseMap<const SCEV *, const SCEV *> &Map;
@@ -14955,9 +14856,47 @@ public:
const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
auto I = Map.find(Expr);
- if (I == Map.end())
+ if (I == Map.end()) {
+ // If we didn't find the extact ZExt expr in the map, check if there's an
+ // entry for a smaller ZExt we can use instead.
+ Type *Ty = Expr->getType();
+ const SCEV *Op = Expr->getOperand(0);
+ unsigned Bitwidth = Ty->getScalarSizeInBits() / 2;
+ while (Bitwidth % 8 == 0 && Bitwidth >= 8 &&
+ Bitwidth > Op->getType()->getScalarSizeInBits()) {
+ Type *NarrowTy = IntegerType::get(SE.getContext(), Bitwidth);
+ auto *NarrowExt = SE.getZeroExtendExpr(Op, NarrowTy);
+ auto I = Map.find(NarrowExt);
+ if (I != Map.end())
+ return SE.getZeroExtendExpr(I->second, Ty);
+ Bitwidth = Bitwidth / 2;
+ }
+
return SCEVRewriteVisitor<SCEVLoopGuardRewriter>::visitZeroExtendExpr(
Expr);
+ }
+ return I->second;
+ }
+
+ const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) {
+ auto I = Map.find(Expr);
+ if (I == Map.end())
+ return SCEVRewriteVisitor<SCEVLoopGuardRewriter>::visitSignExtendExpr(
+ Expr);
+ return I->second;
+ }
+
+ const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) {
+ auto I = Map.find(Expr);
+ if (I == Map.end())
+ return SCEVRewriteVisitor<SCEVLoopGuardRewriter>::visitUMinExpr(Expr);
+ return I->second;
+ }
+
+ const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) {
+ auto I = Map.find(Expr);
+ if (I == Map.end())
+ return SCEVRewriteVisitor<SCEVLoopGuardRewriter>::visitSMinExpr(Expr);
return I->second;
}
};
@@ -15012,6 +14951,93 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
if (MatchRangeCheckIdiom())
return;
+ // Return true if \p Expr is a MinMax SCEV expression with a non-negative
+ // constant operand. If so, return in \p SCTy the SCEV type and in \p RHS
+ // the non-constant operand and in \p LHS the constant operand.
+ auto IsMinMaxSCEVWithNonNegativeConstant =
+ [&](const SCEV *Expr, SCEVTypes &SCTy, const SCEV *&LHS,
+ const SCEV *&RHS) {
+ if (auto *MinMax = dyn_cast<SCEVMinMaxExpr>(Expr)) {
+ if (MinMax->getNumOperands() != 2)
+ return false;
+ if (auto *C = dyn_cast<SCEVConstant>(MinMax->getOperand(0))) {
+ if (C->getAPInt().isNegative())
+ return false;
+ SCTy = MinMax->getSCEVType();
+ LHS = MinMax->getOperand(0);
+ RHS = MinMax->getOperand(1);
+ return true;
+ }
+ }
+ return false;
+ };
+
+ // Checks whether Expr is a non-negative constant, and Divisor is a positive
+ // constant, and returns their APInt in ExprVal and in DivisorVal.
+ auto GetNonNegExprAndPosDivisor = [&](const SCEV *Expr, const SCEV *Divisor,
+ APInt &ExprVal, APInt &DivisorVal) {
+ auto *ConstExpr = dyn_cast<SCEVConstant>(Expr);
+ auto *ConstDivisor = dyn_cast<SCEVConstant>(Divisor);
+ if (!ConstExpr || !ConstDivisor)
+ return false;
+ ExprVal = ConstExpr->getAPInt();
+ DivisorVal = ConstDivisor->getAPInt();
+ return ExprVal.isNonNegative() && !DivisorVal.isNonPositive();
+ };
+
+ // Return a new SCEV that modifies \p Expr to the closest number divides by
+ // \p Divisor and greater or equal than Expr.
+ // For now, only handle constant Expr and Divisor.
+ auto GetNextSCEVDividesByDivisor = [&](const SCEV *Expr,
+ const SCEV *Divisor) {
+ APInt ExprVal;
+ APInt DivisorVal;
+ if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal))
+ return Expr;
+ APInt Rem = ExprVal.urem(DivisorVal);
+ if (!Rem.isZero())
+ // return the SCEV: Expr + Divisor - Expr % Divisor
+ return getConstant(ExprVal + DivisorVal - Rem);
+ return Expr;
+ };
+
+ // Return a new SCEV that modifies \p Expr to the closest number divides by
+ // \p Divisor and less or equal than Expr.
+ // For now, only handle constant Expr and Divisor.
+ auto GetPreviousSCEVDividesByDivisor = [&](const SCEV *Expr,
+ const SCEV *Divisor) {
+ APInt ExprVal;
+ APInt DivisorVal;
+ if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal))
+ return Expr;
+ APInt Rem = ExprVal.urem(DivisorVal);
+ // return the SCEV: Expr - Expr % Divisor
+ return getConstant(ExprVal - Rem);
+ };
+
+ // Apply divisibilty by \p Divisor on MinMaxExpr with constant values,
+ // recursively. This is done by aligning up/down the constant value to the
+ // Divisor.
+ std::function<const SCEV *(const SCEV *, const SCEV *)>
+ ApplyDivisibiltyOnMinMaxExpr = [&](const SCEV *MinMaxExpr,
+ const SCEV *Divisor) {
+ const SCEV *MinMaxLHS = nullptr, *MinMaxRHS = nullptr;
+ SCEVTypes SCTy;
+ if (!IsMinMaxSCEVWithNonNegativeConstant(MinMaxExpr, SCTy, MinMaxLHS,
+ MinMaxRHS))
+ return MinMaxExpr;
+ auto IsMin =
+ isa<SCEVSMinExpr>(MinMaxExpr) || isa<SCEVUMinExpr>(MinMaxExpr);
+ assert(isKnownNonNegative(MinMaxLHS) &&
+ "Expected non-negative operand!");
+ auto *DivisibleExpr =
+ IsMin ? GetPreviousSCEVDividesByDivisor(MinMaxLHS, Divisor)
+ : GetNextSCEVDividesByDivisor(MinMaxLHS, Divisor);
+ SmallVector<const SCEV *> Ops = {
+ ApplyDivisibiltyOnMinMaxExpr(MinMaxRHS, Divisor), DivisibleExpr};
+ return getMinMaxExpr(SCTy, Ops);
+ };
+
// If we have LHS == 0, check if LHS is computing a property of some unknown
// SCEV %v which we can rewrite %v to express explicitly.
const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS);
@@ -15023,7 +15049,12 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
const SCEV *URemRHS = nullptr;
if (matchURem(LHS, URemLHS, URemRHS)) {
if (const SCEVUnknown *LHSUnknown = dyn_cast<SCEVUnknown>(URemLHS)) {
- const auto *Multiple = getMulExpr(getUDivExpr(URemLHS, URemRHS), URemRHS);
+ auto I = RewriteMap.find(LHSUnknown);
+ const SCEV *RewrittenLHS =
+ I != RewriteMap.end() ? I->second : LHSUnknown;
+ RewrittenLHS = ApplyDivisibiltyOnMinMaxExpr(RewrittenLHS, URemRHS);
+ const auto *Multiple =
+ getMulExpr(getUDivExpr(RewrittenLHS, URemRHS), URemRHS);
RewriteMap[LHSUnknown] = Multiple;
ExprsToRewrite.push_back(LHSUnknown);
return;
@@ -15041,62 +15072,170 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
Predicate = CmpInst::getSwappedPredicate(Predicate);
}
- // Limit to expressions that can be rewritten.
- if (!isa<SCEVUnknown>(LHS) && !isa<SCEVZeroExtendExpr>(LHS))
- return;
+ // Puts rewrite rule \p From -> \p To into the rewrite map. Also if \p From
+ // and \p FromRewritten are the same (i.e. there has been no rewrite
+ // registered for \p From), then puts this value in the list of rewritten
+ // expressions.
+ auto AddRewrite = [&](const SCEV *From, const SCEV *FromRewritten,
+ const SCEV *To) {
+ if (From == FromRewritten)
+ ExprsToRewrite.push_back(From);
+ RewriteMap[From] = To;
+ };
+
+ // Checks whether \p S has already been rewritten. In that case returns the
+ // existing rewrite because we want to chain further rewrites onto the
+ // already rewritten value. Otherwise returns \p S.
+ auto GetMaybeRewritten = [&](const SCEV *S) {
+ auto I = RewriteMap.find(S);
+ return I != RewriteMap.end() ? I->second : S;
+ };
- // Check whether LHS has already been rewritten. In that case we want to
- // chain further rewrites onto the already rewritten value.
- auto I = RewriteMap.find(LHS);
- const SCEV *RewrittenLHS = I != RewriteMap.end() ? I->second : LHS;
+ // Check for the SCEV expression (A /u B) * B while B is a constant, inside
+ // \p Expr. The check is done recuresively on \p Expr, which is assumed to
+ // be a composition of Min/Max SCEVs. Return whether the SCEV expression (A
+ // /u B) * B was found, and return the divisor B in \p DividesBy. For
+ // example, if Expr = umin (umax ((A /u 8) * 8, 16), 64), return true since
+ // (A /u 8) * 8 matched the pattern, and return the constant SCEV 8 in \p
+ // DividesBy.
+ std::function<bool(const SCEV *, const SCEV *&)> HasDivisibiltyInfo =
+ [&](const SCEV *Expr, const SCEV *&DividesBy) {
+ if (auto *Mul = dyn_cast<SCEVMulExpr>(Expr)) {
+ if (Mul->getNumOperands() != 2)
+ return false;
+ auto *MulLHS = Mul->getOperand(0);
+ auto *MulRHS = Mul->getOperand(1);
+ if (isa<SCEVConstant>(MulLHS))
+ std::swap(MulLHS, MulRHS);
+ if (auto *Div = dyn_cast<SCEVUDivExpr>(MulLHS))
+ if (Div->getOperand(1) == MulRHS) {
+ DividesBy = MulRHS;
+ return true;
+ }
+ }
+ if (auto *MinMax = dyn_cast<SCEVMinMaxExpr>(Expr))
+ return HasDivisibiltyInfo(MinMax->getOperand(0), DividesBy) ||
+ HasDivisibiltyInfo(MinMax->getOperand(1), DividesBy);
+ return false;
+ };
- const SCEV *RewrittenRHS = nullptr;
+ // Return true if Expr known to divide by \p DividesBy.
+ std::function<bool(const SCEV *, const SCEV *&)> IsKnownToDivideBy =
+ [&](const SCEV *Expr, const SCEV *DividesBy) {
+ if (getURemExpr(Expr, DividesBy)->isZero())
+ return true;
+ if (auto *MinMax = dyn_cast<SCEVMinMaxExpr>(Expr))
+ return IsKnownToDivideBy(MinMax->getOperand(0), DividesBy) &&
+ IsKnownToDivideBy(MinMax->getOperand(1), DividesBy);
+ return false;
+ };
+
+ const SCEV *RewrittenLHS = GetMaybeRewritten(LHS);
+ const SCEV *DividesBy = nullptr;
+ if (HasDivisibiltyInfo(RewrittenLHS, DividesBy))
+ // Check that the whole expression is divided by DividesBy
+ DividesBy =
+ IsKnownToDivideBy(RewrittenLHS, DividesBy) ? DividesBy : nullptr;
+
+ // Collect rewrites for LHS and its transitive operands based on the
+ // condition.
+ // For min/max expressions, also apply the guard to its operands:
+ // 'min(a, b) >= c' -> '(a >= c) and (b >= c)',
+ // 'min(a, b) > c' -> '(a > c) and (b > c)',
+ // 'max(a, b) <= c' -> '(a <= c) and (b <= c)',
+ // 'max(a, b) < c' -> '(a < c) and (b < c)'.
+
+ // We cannot express strict predicates in SCEV, so instead we replace them
+ // with non-strict ones against plus or minus one of RHS depending on the
+ // predicate.
+ const SCEV *One = getOne(RHS->getType());
switch (Predicate) {
- case CmpInst::ICMP_ULT:
- RewrittenRHS =
- getUMinExpr(RewrittenLHS, getMinusSCEV(RHS, getOne(RHS->getType())));
- break;
- case CmpInst::ICMP_SLT:
- RewrittenRHS =
- getSMinExpr(RewrittenLHS, getMinusSCEV(RHS, getOne(RHS->getType())));
- break;
- case CmpInst::ICMP_ULE:
- RewrittenRHS = getUMinExpr(RewrittenLHS, RHS);
- break;
- case CmpInst::ICMP_SLE:
- RewrittenRHS = getSMinExpr(RewrittenLHS, RHS);
- break;
- case CmpInst::ICMP_UGT:
- RewrittenRHS =
- getUMaxExpr(RewrittenLHS, getAddExpr(RHS, getOne(RHS->getType())));
- break;
- case CmpInst::ICMP_SGT:
- RewrittenRHS =
- getSMaxExpr(RewrittenLHS, getAddExpr(RHS, getOne(RHS->getType())));
- break;
- case CmpInst::ICMP_UGE:
- RewrittenRHS = getUMaxExpr(RewrittenLHS, RHS);
- break;
- case CmpInst::ICMP_SGE:
- RewrittenRHS = getSMaxExpr(RewrittenLHS, RHS);
- break;
- case CmpInst::ICMP_EQ:
- if (isa<SCEVConstant>(RHS))
- RewrittenRHS = RHS;
- break;
- case CmpInst::ICMP_NE:
- if (isa<SCEVConstant>(RHS) &&
- cast<SCEVConstant>(RHS)->getValue()->isNullValue())
- RewrittenRHS = getUMaxExpr(RewrittenLHS, getOne(RHS->getType()));
- break;
- default:
- break;
+ case CmpInst::ICMP_ULT:
+ if (RHS->getType()->isPointerTy())
+ return;
+ RHS = getUMaxExpr(RHS, One);
+ [[fallthrough]];
+ case CmpInst::ICMP_SLT: {
+ RHS = getMinusSCEV(RHS, One);
+ RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) : RHS;
+ break;
+ }
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_SGT:
+ RHS = getAddExpr(RHS, One);
+ RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) : RHS;
+ break;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SLE:
+ RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) : RHS;
+ break;
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_SGE:
+ RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) : RHS;
+ break;
+ default:
+ break;
}
- if (RewrittenRHS) {
- RewriteMap[LHS] = RewrittenRHS;
- if (LHS == RewrittenLHS)
- ExprsToRewrite.push_back(LHS);
+ SmallVector<const SCEV *, 16> Worklist(1, LHS);
+ SmallPtrSet<const SCEV *, 16> Visited;
+
+ auto EnqueueOperands = [&Worklist](const SCEVNAryExpr *S) {
+ append_range(Worklist, S->operands());
+ };
+
+ while (!Worklist.empty()) {
+ const SCEV *From = Worklist.pop_back_val();
+ if (isa<SCEVConstant>(From))
+ continue;
+ if (!Visited.insert(From).second)
+ continue;
+ const SCEV *FromRewritten = GetMaybeRewritten(From);
+ const SCEV *To = nullptr;
+
+ switch (Predicate) {
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_ULE:
+ To = getUMinExpr(FromRewritten, RHS);
+ if (auto *UMax = dyn_cast<SCEVUMaxExpr>(FromRewritten))
+ EnqueueOperands(UMax);
+ break;
+ case CmpInst::ICMP_SLT:
+ case CmpInst::ICMP_SLE:
+ To = getSMinExpr(FromRewritten, RHS);
+ if (auto *SMax = dyn_cast<SCEVSMaxExpr>(FromRewritten))
+ EnqueueOperands(SMax);
+ break;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_UGE:
+ To = getUMaxExpr(FromRewritten, RHS);
+ if (auto *UMin = dyn_cast<SCEVUMinExpr>(FromRewritten))
+ EnqueueOperands(UMin);
+ break;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::ICMP_SGE:
+ To = getSMaxExpr(FromRewritten, RHS);
+ if (auto *SMin = dyn_cast<SCEVSMinExpr>(FromRewritten))
+ EnqueueOperands(SMin);
+ break;
+ case CmpInst::ICMP_EQ:
+ if (isa<SCEVConstant>(RHS))
+ To = RHS;
+ break;
+ case CmpInst::ICMP_NE:
+ if (isa<SCEVConstant>(RHS) &&
+ cast<SCEVConstant>(RHS)->getValue()->isNullValue()) {
+ const SCEV *OneAlignedUp =
+ DividesBy ? GetNextSCEVDividesByDivisor(One, DividesBy) : One;
+ To = getUMaxExpr(FromRewritten, OneAlignedUp);
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (To)
+ AddRewrite(From, FromRewritten, To);
}
};
@@ -15112,7 +15251,16 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) {
Terms.emplace_back(AssumeI->getOperand(0), true);
}
- // Second, collect conditions from dominating branches. Starting at the loop
+ // Second, collect information from llvm.experimental.guards dominating the loop.
+ auto *GuardDecl = F.getParent()->getFunction(
+ Intrinsic::getName(Intrinsic::experimental_guard));
+ if (GuardDecl)
+ for (const auto *GU : GuardDecl->users())
+ if (const auto *Guard = dyn_cast<IntrinsicInst>(GU))
+ if (Guard->getFunction() == Header->getParent() && DT.dominates(Guard, Header))
+ Terms.emplace_back(Guard->getArgOperand(0), true);
+
+ // Third, collect conditions from dominating branches. Starting at the loop
// predecessor, climb up the predecessor chain, as long as there are
// predecessors that can be found that have unique successors leading to the
// original header.
diff --git a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
index 0619569bf816..e1dd834cfb10 100644
--- a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
+++ b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
@@ -126,6 +126,10 @@ void SCEVDivision::visitConstant(const SCEVConstant *Numerator) {
}
}
+void SCEVDivision::visitVScale(const SCEVVScale *Numerator) {
+ return cannotDivide(Numerator);
+}
+
void SCEVDivision::visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
const SCEV *StartQ, *StartR, *StepQ, *StepR;
if (!Numerator->isAffine())
diff --git a/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
index 22dff5efec5c..cfc5b8455454 100644
--- a/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -96,11 +96,20 @@ NormalizeDenormalizeRewriter::visitAddRecExpr(const SCEVAddRecExpr *AR) {
const SCEV *llvm::normalizeForPostIncUse(const SCEV *S,
const PostIncLoopSet &Loops,
- ScalarEvolution &SE) {
+ ScalarEvolution &SE,
+ bool CheckInvertible) {
+ if (Loops.empty())
+ return S;
auto Pred = [&](const SCEVAddRecExpr *AR) {
return Loops.count(AR->getLoop());
};
- return NormalizeDenormalizeRewriter(Normalize, Pred, SE).visit(S);
+ const SCEV *Normalized =
+ NormalizeDenormalizeRewriter(Normalize, Pred, SE).visit(S);
+ const SCEV *Denormalized = denormalizeForPostIncUse(Normalized, Loops, SE);
+ // If the normalized expression isn't invertible.
+ if (CheckInvertible && Denormalized != S)
+ return nullptr;
+ return Normalized;
}
const SCEV *llvm::normalizeForPostIncUseIf(const SCEV *S, NormalizePredTy Pred,
@@ -111,6 +120,8 @@ const SCEV *llvm::normalizeForPostIncUseIf(const SCEV *S, NormalizePredTy Pred,
const SCEV *llvm::denormalizeForPostIncUse(const SCEV *S,
const PostIncLoopSet &Loops,
ScalarEvolution &SE) {
+ if (Loops.empty())
+ return S;
auto Pred = [&](const SCEVAddRecExpr *AR) {
return Loops.count(AR->getLoop());
};
diff --git a/llvm/lib/Analysis/StackLifetime.cpp b/llvm/lib/Analysis/StackLifetime.cpp
index ee77e81fc978..3e1b5dea6f6c 100644
--- a/llvm/lib/Analysis/StackLifetime.cpp
+++ b/llvm/lib/Analysis/StackLifetime.cpp
@@ -39,7 +39,7 @@ StackLifetime::getLiveRange(const AllocaInst *AI) const {
}
bool StackLifetime::isReachable(const Instruction *I) const {
- return BlockInstRange.find(I->getParent()) != BlockInstRange.end();
+ return BlockInstRange.contains(I->getParent());
}
bool StackLifetime::isAliveAfter(const AllocaInst *AI,
@@ -414,7 +414,7 @@ void StackLifetimePrinterPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<StackLifetimePrinterPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
switch (Type) {
case StackLifetime::LivenessType::May:
OS << "may";
@@ -423,5 +423,5 @@ void StackLifetimePrinterPass::printPipeline(
OS << "must";
break;
}
- OS << ">";
+ OS << '>';
}
diff --git a/llvm/lib/Analysis/StratifiedSets.h b/llvm/lib/Analysis/StratifiedSets.h
deleted file mode 100644
index 193e4a461e66..000000000000
--- a/llvm/lib/Analysis/StratifiedSets.h
+++ /dev/null
@@ -1,595 +0,0 @@
-//===- StratifiedSets.h - Abstract stratified sets implementation. --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ADT_STRATIFIEDSETS_H
-#define LLVM_ADT_STRATIFIEDSETS_H
-
-#include "AliasAnalysisSummary.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include <bitset>
-#include <cassert>
-#include <cmath>
-#include <type_traits>
-#include <utility>
-#include <vector>
-
-namespace llvm {
-namespace cflaa {
-/// An index into Stratified Sets.
-typedef unsigned StratifiedIndex;
-/// NOTE: ^ This can't be a short -- bootstrapping clang has a case where
-/// ~1M sets exist.
-
-// Container of information related to a value in a StratifiedSet.
-struct StratifiedInfo {
- StratifiedIndex Index;
- /// For field sensitivity, etc. we can tack fields on here.
-};
-
-/// A "link" between two StratifiedSets.
-struct StratifiedLink {
- /// This is a value used to signify "does not exist" where the
- /// StratifiedIndex type is used.
- ///
- /// This is used instead of std::optional<StratifiedIndex> because
- /// std::optional<StratifiedIndex> would eat up a considerable amount of extra
- /// memory, after struct padding/alignment is taken into account.
- static const StratifiedIndex SetSentinel;
-
- /// The index for the set "above" current
- StratifiedIndex Above;
-
- /// The link for the set "below" current
- StratifiedIndex Below;
-
- /// Attributes for these StratifiedSets.
- AliasAttrs Attrs;
-
- StratifiedLink() : Above(SetSentinel), Below(SetSentinel) {}
-
- bool hasBelow() const { return Below != SetSentinel; }
- bool hasAbove() const { return Above != SetSentinel; }
-
- void clearBelow() { Below = SetSentinel; }
- void clearAbove() { Above = SetSentinel; }
-};
-
-/// These are stratified sets, as described in "Fast algorithms for
-/// Dyck-CFL-reachability with applications to Alias Analysis" by Zhang Q, Lyu M
-/// R, Yuan H, and Su Z. -- in short, this is meant to represent different sets
-/// of Value*s. If two Value*s are in the same set, or if both sets have
-/// overlapping attributes, then the Value*s are said to alias.
-///
-/// Sets may be related by position, meaning that one set may be considered as
-/// above or below another. In CFL Alias Analysis, this gives us an indication
-/// of how two variables are related; if the set of variable A is below a set
-/// containing variable B, then at some point, a variable that has interacted
-/// with B (or B itself) was either used in order to extract the variable A, or
-/// was used as storage of variable A.
-///
-/// Sets may also have attributes (as noted above). These attributes are
-/// generally used for noting whether a variable in the set has interacted with
-/// a variable whose origins we don't quite know (i.e. globals/arguments), or if
-/// the variable may have had operations performed on it (modified in a function
-/// call). All attributes that exist in a set A must exist in all sets marked as
-/// below set A.
-template <typename T> class StratifiedSets {
-public:
- StratifiedSets() = default;
- StratifiedSets(StratifiedSets &&) = default;
- StratifiedSets &operator=(StratifiedSets &&) = default;
-
- StratifiedSets(DenseMap<T, StratifiedInfo> Map,
- std::vector<StratifiedLink> Links)
- : Values(std::move(Map)), Links(std::move(Links)) {}
-
- std::optional<StratifiedInfo> find(const T &Elem) const {
- auto Iter = Values.find(Elem);
- if (Iter == Values.end())
- return std::nullopt;
- return Iter->second;
- }
-
- const StratifiedLink &getLink(StratifiedIndex Index) const {
- assert(inbounds(Index));
- return Links[Index];
- }
-
-private:
- DenseMap<T, StratifiedInfo> Values;
- std::vector<StratifiedLink> Links;
-
- bool inbounds(StratifiedIndex Idx) const { return Idx < Links.size(); }
-};
-
-/// Generic Builder class that produces StratifiedSets instances.
-///
-/// The goal of this builder is to efficiently produce correct StratifiedSets
-/// instances. To this end, we use a few tricks:
-/// > Set chains (A method for linking sets together)
-/// > Set remaps (A method for marking a set as an alias [irony?] of another)
-///
-/// ==== Set chains ====
-/// This builder has a notion of some value A being above, below, or with some
-/// other value B:
-/// > The `A above B` relationship implies that there is a reference edge
-/// going from A to B. Namely, it notes that A can store anything in B's set.
-/// > The `A below B` relationship is the opposite of `A above B`. It implies
-/// that there's a dereference edge going from A to B.
-/// > The `A with B` relationship states that there's an assignment edge going
-/// from A to B, and that A and B should be treated as equals.
-///
-/// As an example, take the following code snippet:
-///
-/// %a = alloca i32, align 4
-/// %ap = alloca i32*, align 8
-/// %app = alloca i32**, align 8
-/// store %a, %ap
-/// store %ap, %app
-/// %aw = getelementptr %ap, i32 0
-///
-/// Given this, the following relations exist:
-/// - %a below %ap & %ap above %a
-/// - %ap below %app & %app above %ap
-/// - %aw with %ap & %ap with %aw
-///
-/// These relations produce the following sets:
-/// [{%a}, {%ap, %aw}, {%app}]
-///
-/// ...Which state that the only MayAlias relationship in the above program is
-/// between %ap and %aw.
-///
-/// Because LLVM allows arbitrary casts, code like the following needs to be
-/// supported:
-/// %ip = alloca i64, align 8
-/// %ipp = alloca i64*, align 8
-/// %i = bitcast i64** ipp to i64
-/// store i64* %ip, i64** %ipp
-/// store i64 %i, i64* %ip
-///
-/// Which, because %ipp ends up *both* above and below %ip, is fun.
-///
-/// This is solved by merging %i and %ipp into a single set (...which is the
-/// only way to solve this, since their bit patterns are equivalent). Any sets
-/// that ended up in between %i and %ipp at the time of merging (in this case,
-/// the set containing %ip) also get conservatively merged into the set of %i
-/// and %ipp. In short, the resulting StratifiedSet from the above code would be
-/// {%ip, %ipp, %i}.
-///
-/// ==== Set remaps ====
-/// More of an implementation detail than anything -- when merging sets, we need
-/// to update the numbers of all of the elements mapped to those sets. Rather
-/// than doing this at each merge, we note in the BuilderLink structure that a
-/// remap has occurred, and use this information so we can defer renumbering set
-/// elements until build time.
-template <typename T> class StratifiedSetsBuilder {
- /// Represents a Stratified Set, with information about the Stratified
- /// Set above it, the set below it, and whether the current set has been
- /// remapped to another.
- struct BuilderLink {
- const StratifiedIndex Number;
-
- BuilderLink(StratifiedIndex N) : Number(N) {
- Remap = StratifiedLink::SetSentinel;
- }
-
- bool hasAbove() const {
- assert(!isRemapped());
- return Link.hasAbove();
- }
-
- bool hasBelow() const {
- assert(!isRemapped());
- return Link.hasBelow();
- }
-
- void setBelow(StratifiedIndex I) {
- assert(!isRemapped());
- Link.Below = I;
- }
-
- void setAbove(StratifiedIndex I) {
- assert(!isRemapped());
- Link.Above = I;
- }
-
- void clearBelow() {
- assert(!isRemapped());
- Link.clearBelow();
- }
-
- void clearAbove() {
- assert(!isRemapped());
- Link.clearAbove();
- }
-
- StratifiedIndex getBelow() const {
- assert(!isRemapped());
- assert(hasBelow());
- return Link.Below;
- }
-
- StratifiedIndex getAbove() const {
- assert(!isRemapped());
- assert(hasAbove());
- return Link.Above;
- }
-
- AliasAttrs getAttrs() {
- assert(!isRemapped());
- return Link.Attrs;
- }
-
- void setAttrs(AliasAttrs Other) {
- assert(!isRemapped());
- Link.Attrs |= Other;
- }
-
- bool isRemapped() const { return Remap != StratifiedLink::SetSentinel; }
-
- /// For initial remapping to another set
- void remapTo(StratifiedIndex Other) {
- assert(!isRemapped());
- Remap = Other;
- }
-
- StratifiedIndex getRemapIndex() const {
- assert(isRemapped());
- return Remap;
- }
-
- /// Should only be called when we're already remapped.
- void updateRemap(StratifiedIndex Other) {
- assert(isRemapped());
- Remap = Other;
- }
-
- /// Prefer the above functions to calling things directly on what's returned
- /// from this -- they guard against unexpected calls when the current
- /// BuilderLink is remapped.
- const StratifiedLink &getLink() const { return Link; }
-
- private:
- StratifiedLink Link;
- StratifiedIndex Remap;
- };
-
- /// This function performs all of the set unioning/value renumbering
- /// that we've been putting off, and generates a vector<StratifiedLink> that
- /// may be placed in a StratifiedSets instance.
- void finalizeSets(std::vector<StratifiedLink> &StratLinks) {
- DenseMap<StratifiedIndex, StratifiedIndex> Remaps;
- for (auto &Link : Links) {
- if (Link.isRemapped())
- continue;
-
- StratifiedIndex Number = StratLinks.size();
- Remaps.insert(std::make_pair(Link.Number, Number));
- StratLinks.push_back(Link.getLink());
- }
-
- for (auto &Link : StratLinks) {
- if (Link.hasAbove()) {
- auto &Above = linksAt(Link.Above);
- auto Iter = Remaps.find(Above.Number);
- assert(Iter != Remaps.end());
- Link.Above = Iter->second;
- }
-
- if (Link.hasBelow()) {
- auto &Below = linksAt(Link.Below);
- auto Iter = Remaps.find(Below.Number);
- assert(Iter != Remaps.end());
- Link.Below = Iter->second;
- }
- }
-
- for (auto &Pair : Values) {
- auto &Info = Pair.second;
- auto &Link = linksAt(Info.Index);
- auto Iter = Remaps.find(Link.Number);
- assert(Iter != Remaps.end());
- Info.Index = Iter->second;
- }
- }
-
- /// There's a guarantee in StratifiedLink where all bits set in a
- /// Link.externals will be set in all Link.externals "below" it.
- static void propagateAttrs(std::vector<StratifiedLink> &Links) {
- const auto getHighestParentAbove = [&Links](StratifiedIndex Idx) {
- const auto *Link = &Links[Idx];
- while (Link->hasAbove()) {
- Idx = Link->Above;
- Link = &Links[Idx];
- }
- return Idx;
- };
-
- SmallSet<StratifiedIndex, 16> Visited;
- for (unsigned I = 0, E = Links.size(); I < E; ++I) {
- auto CurrentIndex = getHighestParentAbove(I);
- if (!Visited.insert(CurrentIndex).second)
- continue;
-
- while (Links[CurrentIndex].hasBelow()) {
- auto &CurrentBits = Links[CurrentIndex].Attrs;
- auto NextIndex = Links[CurrentIndex].Below;
- auto &NextBits = Links[NextIndex].Attrs;
- NextBits |= CurrentBits;
- CurrentIndex = NextIndex;
- }
- }
- }
-
-public:
- /// Builds a StratifiedSet from the information we've been given since either
- /// construction or the prior build() call.
- StratifiedSets<T> build() {
- std::vector<StratifiedLink> StratLinks;
- finalizeSets(StratLinks);
- propagateAttrs(StratLinks);
- Links.clear();
- return StratifiedSets<T>(std::move(Values), std::move(StratLinks));
- }
-
- bool has(const T &Elem) const { return get(Elem).has_value(); }
-
- bool add(const T &Main) {
- if (get(Main))
- return false;
-
- auto NewIndex = getNewUnlinkedIndex();
- return addAtMerging(Main, NewIndex);
- }
-
- /// Restructures the stratified sets as necessary to make "ToAdd" in a
- /// set above "Main". There are some cases where this is not possible (see
- /// above), so we merge them such that ToAdd and Main are in the same set.
- bool addAbove(const T &Main, const T &ToAdd) {
- assert(has(Main));
- auto Index = *indexOf(Main);
- if (!linksAt(Index).hasAbove())
- addLinkAbove(Index);
-
- auto Above = linksAt(Index).getAbove();
- return addAtMerging(ToAdd, Above);
- }
-
- /// Restructures the stratified sets as necessary to make "ToAdd" in a
- /// set below "Main". There are some cases where this is not possible (see
- /// above), so we merge them such that ToAdd and Main are in the same set.
- bool addBelow(const T &Main, const T &ToAdd) {
- assert(has(Main));
- auto Index = *indexOf(Main);
- if (!linksAt(Index).hasBelow())
- addLinkBelow(Index);
-
- auto Below = linksAt(Index).getBelow();
- return addAtMerging(ToAdd, Below);
- }
-
- bool addWith(const T &Main, const T &ToAdd) {
- assert(has(Main));
- auto MainIndex = *indexOf(Main);
- return addAtMerging(ToAdd, MainIndex);
- }
-
- void noteAttributes(const T &Main, AliasAttrs NewAttrs) {
- assert(has(Main));
- auto *Info = *get(Main);
- auto &Link = linksAt(Info->Index);
- Link.setAttrs(NewAttrs);
- }
-
-private:
- DenseMap<T, StratifiedInfo> Values;
- std::vector<BuilderLink> Links;
-
- /// Adds the given element at the given index, merging sets if necessary.
- bool addAtMerging(const T &ToAdd, StratifiedIndex Index) {
- StratifiedInfo Info = {Index};
- auto Pair = Values.insert(std::make_pair(ToAdd, Info));
- if (Pair.second)
- return true;
-
- auto &Iter = Pair.first;
- auto &IterSet = linksAt(Iter->second.Index);
- auto &ReqSet = linksAt(Index);
-
- // Failed to add where we wanted to. Merge the sets.
- if (&IterSet != &ReqSet)
- merge(IterSet.Number, ReqSet.Number);
-
- return false;
- }
-
- /// Gets the BuilderLink at the given index, taking set remapping into
- /// account.
- BuilderLink &linksAt(StratifiedIndex Index) {
- auto *Start = &Links[Index];
- if (!Start->isRemapped())
- return *Start;
-
- auto *Current = Start;
- while (Current->isRemapped())
- Current = &Links[Current->getRemapIndex()];
-
- auto NewRemap = Current->Number;
-
- // Run through everything that has yet to be updated, and update them to
- // remap to NewRemap
- Current = Start;
- while (Current->isRemapped()) {
- auto *Next = &Links[Current->getRemapIndex()];
- Current->updateRemap(NewRemap);
- Current = Next;
- }
-
- return *Current;
- }
-
- /// Merges two sets into one another. Assumes that these sets are not
- /// already one in the same.
- void merge(StratifiedIndex Idx1, StratifiedIndex Idx2) {
- assert(inbounds(Idx1) && inbounds(Idx2));
- assert(&linksAt(Idx1) != &linksAt(Idx2) &&
- "Merging a set into itself is not allowed");
-
- // CASE 1: If the set at `Idx1` is above or below `Idx2`, we need to merge
- // both the
- // given sets, and all sets between them, into one.
- if (tryMergeUpwards(Idx1, Idx2))
- return;
-
- if (tryMergeUpwards(Idx2, Idx1))
- return;
-
- // CASE 2: The set at `Idx1` is not in the same chain as the set at `Idx2`.
- // We therefore need to merge the two chains together.
- mergeDirect(Idx1, Idx2);
- }
-
- /// Merges two sets assuming that the set at `Idx1` is unreachable from
- /// traversing above or below the set at `Idx2`.
- void mergeDirect(StratifiedIndex Idx1, StratifiedIndex Idx2) {
- assert(inbounds(Idx1) && inbounds(Idx2));
-
- auto *LinksInto = &linksAt(Idx1);
- auto *LinksFrom = &linksAt(Idx2);
- // Merging everything above LinksInto then proceeding to merge everything
- // below LinksInto becomes problematic, so we go as far "up" as possible!
- while (LinksInto->hasAbove() && LinksFrom->hasAbove()) {
- LinksInto = &linksAt(LinksInto->getAbove());
- LinksFrom = &linksAt(LinksFrom->getAbove());
- }
-
- if (LinksFrom->hasAbove()) {
- LinksInto->setAbove(LinksFrom->getAbove());
- auto &NewAbove = linksAt(LinksInto->getAbove());
- NewAbove.setBelow(LinksInto->Number);
- }
-
- // Merging strategy:
- // > If neither has links below, stop.
- // > If only `LinksInto` has links below, stop.
- // > If only `LinksFrom` has links below, reset `LinksInto.Below` to
- // match `LinksFrom.Below`
- // > If both have links above, deal with those next.
- while (LinksInto->hasBelow() && LinksFrom->hasBelow()) {
- auto FromAttrs = LinksFrom->getAttrs();
- LinksInto->setAttrs(FromAttrs);
-
- // Remap needs to happen after getBelow(), but before
- // assignment of LinksFrom
- auto *NewLinksFrom = &linksAt(LinksFrom->getBelow());
- LinksFrom->remapTo(LinksInto->Number);
- LinksFrom = NewLinksFrom;
- LinksInto = &linksAt(LinksInto->getBelow());
- }
-
- if (LinksFrom->hasBelow()) {
- LinksInto->setBelow(LinksFrom->getBelow());
- auto &NewBelow = linksAt(LinksInto->getBelow());
- NewBelow.setAbove(LinksInto->Number);
- }
-
- LinksInto->setAttrs(LinksFrom->getAttrs());
- LinksFrom->remapTo(LinksInto->Number);
- }
-
- /// Checks to see if lowerIndex is at a level lower than upperIndex. If so, it
- /// will merge lowerIndex with upperIndex (and all of the sets between) and
- /// return true. Otherwise, it will return false.
- bool tryMergeUpwards(StratifiedIndex LowerIndex, StratifiedIndex UpperIndex) {
- assert(inbounds(LowerIndex) && inbounds(UpperIndex));
- auto *Lower = &linksAt(LowerIndex);
- auto *Upper = &linksAt(UpperIndex);
- if (Lower == Upper)
- return true;
-
- SmallVector<BuilderLink *, 8> Found;
- auto *Current = Lower;
- auto Attrs = Current->getAttrs();
- while (Current->hasAbove() && Current != Upper) {
- Found.push_back(Current);
- Attrs |= Current->getAttrs();
- Current = &linksAt(Current->getAbove());
- }
-
- if (Current != Upper)
- return false;
-
- Upper->setAttrs(Attrs);
-
- if (Lower->hasBelow()) {
- auto NewBelowIndex = Lower->getBelow();
- Upper->setBelow(NewBelowIndex);
- auto &NewBelow = linksAt(NewBelowIndex);
- NewBelow.setAbove(UpperIndex);
- } else {
- Upper->clearBelow();
- }
-
- for (const auto &Ptr : Found)
- Ptr->remapTo(Upper->Number);
-
- return true;
- }
-
- std::optional<const StratifiedInfo *> get(const T &Val) const {
- auto Result = Values.find(Val);
- if (Result == Values.end())
- return std::nullopt;
- return &Result->second;
- }
-
- std::optional<StratifiedInfo *> get(const T &Val) {
- auto Result = Values.find(Val);
- if (Result == Values.end())
- return std::nullopt;
- return &Result->second;
- }
-
- std::optional<StratifiedIndex> indexOf(const T &Val) {
- auto MaybeVal = get(Val);
- if (!MaybeVal)
- return std::nullopt;
- auto *Info = *MaybeVal;
- auto &Link = linksAt(Info->Index);
- return Link.Number;
- }
-
- StratifiedIndex addLinkBelow(StratifiedIndex Set) {
- auto At = addLinks();
- Links[Set].setBelow(At);
- Links[At].setAbove(Set);
- return At;
- }
-
- StratifiedIndex addLinkAbove(StratifiedIndex Set) {
- auto At = addLinks();
- Links[At].setBelow(Set);
- Links[Set].setAbove(At);
- return At;
- }
-
- StratifiedIndex getNewUnlinkedIndex() { return addLinks(); }
-
- StratifiedIndex addLinks() {
- auto Link = Links.size();
- Links.push_back(BuilderLink(Link));
- return Link;
- }
-
- bool inbounds(StratifiedIndex N) const { return N < Links.size(); }
-};
-}
-}
-#endif // LLVM_ADT_STRATIFIEDSETS_H
diff --git a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp b/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
deleted file mode 100644
index 17d7676024a5..000000000000
--- a/llvm/lib/Analysis/SyncDependenceAnalysis.cpp
+++ /dev/null
@@ -1,478 +0,0 @@
-//===--- SyncDependenceAnalysis.cpp - Compute Control Divergence Effects --===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements an algorithm that returns for a divergent branch
-// the set of basic blocks whose phi nodes become divergent due to divergent
-// control. These are the blocks that are reachable by two disjoint paths from
-// the branch or loop exits that have a reaching path that is disjoint from a
-// path to the loop latch.
-//
-// The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
-// control-induced divergence in phi nodes.
-//
-//
-// -- Reference --
-// The algorithm is presented in Section 5 of
-//
-// An abstract interpretation for SPMD divergence
-// on reducible control flow graphs.
-// Julian Rosemann, Simon Moll and Sebastian Hack
-// POPL '21
-//
-//
-// -- Sync dependence --
-// Sync dependence characterizes the control flow aspect of the
-// propagation of branch divergence. For example,
-//
-// %cond = icmp slt i32 %tid, 10
-// br i1 %cond, label %then, label %else
-// then:
-// br label %merge
-// else:
-// br label %merge
-// merge:
-// %a = phi i32 [ 0, %then ], [ 1, %else ]
-//
-// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
-// because %tid is not on its use-def chains, %a is sync dependent on %tid
-// because the branch "br i1 %cond" depends on %tid and affects which value %a
-// is assigned to.
-//
-//
-// -- Reduction to SSA construction --
-// There are two disjoint paths from A to X, if a certain variant of SSA
-// construction places a phi node in X under the following set-up scheme.
-//
-// This variant of SSA construction ignores incoming undef values.
-// That is paths from the entry without a definition do not result in
-// phi nodes.
-//
-// entry
-// / \
-// A \
-// / \ Y
-// B C /
-// \ / \ /
-// D E
-// \ /
-// F
-//
-// Assume that A contains a divergent branch. We are interested
-// in the set of all blocks where each block is reachable from A
-// via two disjoint paths. This would be the set {D, F} in this
-// case.
-// To generally reduce this query to SSA construction we introduce
-// a virtual variable x and assign to x different values in each
-// successor block of A.
-//
-// entry
-// / \
-// A \
-// / \ Y
-// x = 0 x = 1 /
-// \ / \ /
-// D E
-// \ /
-// F
-//
-// Our flavor of SSA construction for x will construct the following
-//
-// entry
-// / \
-// A \
-// / \ Y
-// x0 = 0 x1 = 1 /
-// \ / \ /
-// x2 = phi E
-// \ /
-// x3 = phi
-//
-// The blocks D and F contain phi nodes and are thus each reachable
-// by two disjoins paths from A.
-//
-// -- Remarks --
-// * In case of loop exits we need to check the disjoint path criterion for loops.
-// To this end, we check whether the definition of x differs between the
-// loop exit and the loop header (_after_ SSA construction).
-//
-// -- Known Limitations & Future Work --
-// * The algorithm requires reducible loops because the implementation
-// implicitly performs a single iteration of the underlying data flow analysis.
-// This was done for pragmatism, simplicity and speed.
-//
-// Relevant related work for extending the algorithm to irreducible control:
-// A simple algorithm for global data flow analysis problems.
-// Matthew S. Hecht and Jeffrey D. Ullman.
-// SIAM Journal on Computing, 4(4):519–532, December 1975.
-//
-// * Another reason for requiring reducible loops is that points of
-// synchronization in irreducible loops aren't 'obvious' - there is no unique
-// header where threads 'should' synchronize when entering or coming back
-// around from the latch.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/SyncDependenceAnalysis.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CFG.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-
-#include <functional>
-
-#define DEBUG_TYPE "sync-dependence"
-
-// The SDA algorithm operates on a modified CFG - we modify the edges leaving
-// loop headers as follows:
-//
-// * We remove all edges leaving all loop headers.
-// * We add additional edges from the loop headers to their exit blocks.
-//
-// The modification is virtual, that is whenever we visit a loop header we
-// pretend it had different successors.
-namespace {
-using namespace llvm;
-
-// Custom Post-Order Traveral
-//
-// We cannot use the vanilla (R)PO computation of LLVM because:
-// * We (virtually) modify the CFG.
-// * We want a loop-compact block enumeration, that is the numbers assigned to
-// blocks of a loop form an interval
-//
-using POCB = std::function<void(const BasicBlock &)>;
-using VisitedSet = std::set<const BasicBlock *>;
-using BlockStack = std::vector<const BasicBlock *>;
-
-// forward
-static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack,
- VisitedSet &Finalized);
-
-// for a nested region (top-level loop or nested loop)
-static void computeStackPO(BlockStack &Stack, const LoopInfo &LI, Loop *Loop,
- POCB CallBack, VisitedSet &Finalized) {
- const auto *LoopHeader = Loop ? Loop->getHeader() : nullptr;
- while (!Stack.empty()) {
- const auto *NextBB = Stack.back();
-
- auto *NestedLoop = LI.getLoopFor(NextBB);
- bool IsNestedLoop = NestedLoop != Loop;
-
- // Treat the loop as a node
- if (IsNestedLoop) {
- SmallVector<BasicBlock *, 3> NestedExits;
- NestedLoop->getUniqueExitBlocks(NestedExits);
- bool PushedNodes = false;
- for (const auto *NestedExitBB : NestedExits) {
- if (NestedExitBB == LoopHeader)
- continue;
- if (Loop && !Loop->contains(NestedExitBB))
- continue;
- if (Finalized.count(NestedExitBB))
- continue;
- PushedNodes = true;
- Stack.push_back(NestedExitBB);
- }
- if (!PushedNodes) {
- // All loop exits finalized -> finish this node
- Stack.pop_back();
- computeLoopPO(LI, *NestedLoop, CallBack, Finalized);
- }
- continue;
- }
-
- // DAG-style
- bool PushedNodes = false;
- for (const auto *SuccBB : successors(NextBB)) {
- if (SuccBB == LoopHeader)
- continue;
- if (Loop && !Loop->contains(SuccBB))
- continue;
- if (Finalized.count(SuccBB))
- continue;
- PushedNodes = true;
- Stack.push_back(SuccBB);
- }
- if (!PushedNodes) {
- // Never push nodes twice
- Stack.pop_back();
- if (!Finalized.insert(NextBB).second)
- continue;
- CallBack(*NextBB);
- }
- }
-}
-
-static void computeTopLevelPO(Function &F, const LoopInfo &LI, POCB CallBack) {
- VisitedSet Finalized;
- BlockStack Stack;
- Stack.reserve(24); // FIXME made-up number
- Stack.push_back(&F.getEntryBlock());
- computeStackPO(Stack, LI, nullptr, CallBack, Finalized);
-}
-
-static void computeLoopPO(const LoopInfo &LI, Loop &Loop, POCB CallBack,
- VisitedSet &Finalized) {
- /// Call CallBack on all loop blocks.
- std::vector<const BasicBlock *> Stack;
- const auto *LoopHeader = Loop.getHeader();
-
- // Visit the header last
- Finalized.insert(LoopHeader);
- CallBack(*LoopHeader);
-
- // Initialize with immediate successors
- for (const auto *BB : successors(LoopHeader)) {
- if (!Loop.contains(BB))
- continue;
- if (BB == LoopHeader)
- continue;
- Stack.push_back(BB);
- }
-
- // Compute PO inside region
- computeStackPO(Stack, LI, &Loop, CallBack, Finalized);
-}
-
-} // namespace
-
-namespace llvm {
-
-ControlDivergenceDesc SyncDependenceAnalysis::EmptyDivergenceDesc;
-
-SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
- const PostDominatorTree &PDT,
- const LoopInfo &LI)
- : DT(DT), PDT(PDT), LI(LI) {
- computeTopLevelPO(*DT.getRoot()->getParent(), LI,
- [&](const BasicBlock &BB) { LoopPO.appendBlock(BB); });
-}
-
-SyncDependenceAnalysis::~SyncDependenceAnalysis() = default;
-
-namespace {
-// divergence propagator for reducible CFGs
-struct DivergencePropagator {
- const ModifiedPO &LoopPOT;
- const DominatorTree &DT;
- const PostDominatorTree &PDT;
- const LoopInfo &LI;
- const BasicBlock &DivTermBlock;
-
- // * if BlockLabels[IndexOf(B)] == C then C is the dominating definition at
- // block B
- // * if BlockLabels[IndexOf(B)] ~ undef then we haven't seen B yet
- // * if BlockLabels[IndexOf(B)] == B then B is a join point of disjoint paths
- // from X or B is an immediate successor of X (initial value).
- using BlockLabelVec = std::vector<const BasicBlock *>;
- BlockLabelVec BlockLabels;
- // divergent join and loop exit descriptor.
- std::unique_ptr<ControlDivergenceDesc> DivDesc;
-
- DivergencePropagator(const ModifiedPO &LoopPOT, const DominatorTree &DT,
- const PostDominatorTree &PDT, const LoopInfo &LI,
- const BasicBlock &DivTermBlock)
- : LoopPOT(LoopPOT), DT(DT), PDT(PDT), LI(LI), DivTermBlock(DivTermBlock),
- BlockLabels(LoopPOT.size(), nullptr),
- DivDesc(new ControlDivergenceDesc) {}
-
- void printDefs(raw_ostream &Out) {
- Out << "Propagator::BlockLabels {\n";
- for (int BlockIdx = (int)BlockLabels.size() - 1; BlockIdx > 0; --BlockIdx) {
- const auto *Label = BlockLabels[BlockIdx];
- Out << LoopPOT.getBlockAt(BlockIdx)->getName().str() << "(" << BlockIdx
- << ") : ";
- if (!Label) {
- Out << "<null>\n";
- } else {
- Out << Label->getName() << "\n";
- }
- }
- Out << "}\n";
- }
-
- // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
- // causes a divergent join.
- bool computeJoin(const BasicBlock &SuccBlock, const BasicBlock &PushedLabel) {
- auto SuccIdx = LoopPOT.getIndexOf(SuccBlock);
-
- // unset or same reaching label
- const auto *OldLabel = BlockLabels[SuccIdx];
- if (!OldLabel || (OldLabel == &PushedLabel)) {
- BlockLabels[SuccIdx] = &PushedLabel;
- return false;
- }
-
- // Update the definition
- BlockLabels[SuccIdx] = &SuccBlock;
- return true;
- }
-
- // visiting a virtual loop exit edge from the loop header --> temporal
- // divergence on join
- bool visitLoopExitEdge(const BasicBlock &ExitBlock,
- const BasicBlock &DefBlock, bool FromParentLoop) {
- // Pushing from a non-parent loop cannot cause temporal divergence.
- if (!FromParentLoop)
- return visitEdge(ExitBlock, DefBlock);
-
- if (!computeJoin(ExitBlock, DefBlock))
- return false;
-
- // Identified a divergent loop exit
- DivDesc->LoopDivBlocks.insert(&ExitBlock);
- LLVM_DEBUG(dbgs() << "\tDivergent loop exit: " << ExitBlock.getName()
- << "\n");
- return true;
- }
-
- // process \p SuccBlock with reaching definition \p DefBlock
- bool visitEdge(const BasicBlock &SuccBlock, const BasicBlock &DefBlock) {
- if (!computeJoin(SuccBlock, DefBlock))
- return false;
-
- // Divergent, disjoint paths join.
- DivDesc->JoinDivBlocks.insert(&SuccBlock);
- LLVM_DEBUG(dbgs() << "\tDivergent join: " << SuccBlock.getName());
- return true;
- }
-
- std::unique_ptr<ControlDivergenceDesc> computeJoinPoints() {
- assert(DivDesc);
-
- LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: " << DivTermBlock.getName()
- << "\n");
-
- const auto *DivBlockLoop = LI.getLoopFor(&DivTermBlock);
-
- // Early stopping criterion
- int FloorIdx = LoopPOT.size() - 1;
- const BasicBlock *FloorLabel = nullptr;
-
- // bootstrap with branch targets
- int BlockIdx = 0;
-
- for (const auto *SuccBlock : successors(&DivTermBlock)) {
- auto SuccIdx = LoopPOT.getIndexOf(*SuccBlock);
- BlockLabels[SuccIdx] = SuccBlock;
-
- // Find the successor with the highest index to start with
- BlockIdx = std::max<int>(BlockIdx, SuccIdx);
- FloorIdx = std::min<int>(FloorIdx, SuccIdx);
-
- // Identify immediate divergent loop exits
- if (!DivBlockLoop)
- continue;
-
- const auto *BlockLoop = LI.getLoopFor(SuccBlock);
- if (BlockLoop && DivBlockLoop->contains(BlockLoop))
- continue;
- DivDesc->LoopDivBlocks.insert(SuccBlock);
- LLVM_DEBUG(dbgs() << "\tImmediate divergent loop exit: "
- << SuccBlock->getName() << "\n");
- }
-
- // propagate definitions at the immediate successors of the node in RPO
- for (; BlockIdx >= FloorIdx; --BlockIdx) {
- LLVM_DEBUG(dbgs() << "Before next visit:\n"; printDefs(dbgs()));
-
- // Any label available here
- const auto *Label = BlockLabels[BlockIdx];
- if (!Label)
- continue;
-
- // Ok. Get the block
- const auto *Block = LoopPOT.getBlockAt(BlockIdx);
- LLVM_DEBUG(dbgs() << "SDA::joins. visiting " << Block->getName() << "\n");
-
- auto *BlockLoop = LI.getLoopFor(Block);
- bool IsLoopHeader = BlockLoop && BlockLoop->getHeader() == Block;
- bool CausedJoin = false;
- int LoweredFloorIdx = FloorIdx;
- if (IsLoopHeader) {
- // Disconnect from immediate successors and propagate directly to loop
- // exits.
- SmallVector<BasicBlock *, 4> BlockLoopExits;
- BlockLoop->getExitBlocks(BlockLoopExits);
-
- bool IsParentLoop = BlockLoop->contains(&DivTermBlock);
- for (const auto *BlockLoopExit : BlockLoopExits) {
- CausedJoin |= visitLoopExitEdge(*BlockLoopExit, *Label, IsParentLoop);
- LoweredFloorIdx = std::min<int>(LoweredFloorIdx,
- LoopPOT.getIndexOf(*BlockLoopExit));
- }
- } else {
- // Acyclic successor case
- for (const auto *SuccBlock : successors(Block)) {
- CausedJoin |= visitEdge(*SuccBlock, *Label);
- LoweredFloorIdx =
- std::min<int>(LoweredFloorIdx, LoopPOT.getIndexOf(*SuccBlock));
- }
- }
-
- // Floor update
- if (CausedJoin) {
- // 1. Different labels pushed to successors
- FloorIdx = LoweredFloorIdx;
- } else if (FloorLabel != Label) {
- // 2. No join caused BUT we pushed a label that is different than the
- // last pushed label
- FloorIdx = LoweredFloorIdx;
- FloorLabel = Label;
- }
- }
-
- LLVM_DEBUG(dbgs() << "SDA::joins. After propagation:\n"; printDefs(dbgs()));
-
- return std::move(DivDesc);
- }
-};
-} // end anonymous namespace
-
-#ifndef NDEBUG
-static void printBlockSet(ConstBlockSet &Blocks, raw_ostream &Out) {
- Out << "[";
- ListSeparator LS;
- for (const auto *BB : Blocks)
- Out << LS << BB->getName();
- Out << "]";
-}
-#endif
-
-const ControlDivergenceDesc &
-SyncDependenceAnalysis::getJoinBlocks(const Instruction &Term) {
- // trivial case
- if (Term.getNumSuccessors() <= 1) {
- return EmptyDivergenceDesc;
- }
-
- // already available in cache?
- auto ItCached = CachedControlDivDescs.find(&Term);
- if (ItCached != CachedControlDivDescs.end())
- return *ItCached->second;
-
- // compute all join points
- // Special handling of divergent loop exits is not needed for LCSSA
- const auto &TermBlock = *Term.getParent();
- DivergencePropagator Propagator(LoopPO, DT, PDT, LI, TermBlock);
- auto DivDesc = Propagator.computeJoinPoints();
-
- LLVM_DEBUG(dbgs() << "Result (" << Term.getParent()->getName() << "):\n";
- dbgs() << "JoinDivBlocks: ";
- printBlockSet(DivDesc->JoinDivBlocks, dbgs());
- dbgs() << "\nLoopDivBlocks: ";
- printBlockSet(DivDesc->LoopDivBlocks, dbgs()); dbgs() << "\n";);
-
- auto ItInserted = CachedControlDivDescs.emplace(&Term, std::move(DivDesc));
- assert(ItInserted.second);
- return *ItInserted.first->second;
-}
-
-} // namespace llvm
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 31cc0e7ec30e..05fa67d0bbf1 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -11,10 +11,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constants.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
@@ -33,7 +33,9 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
"Intel SVML library"),
clEnumValN(TargetLibraryInfoImpl::SLEEFGNUABI, "sleefgnuabi",
- "SIMD Library for Evaluating Elementary Functions")));
+ "SIMD Library for Evaluating Elementary Functions"),
+ clEnumValN(TargetLibraryInfoImpl::ArmPL, "ArmPL",
+ "Arm Performance Libraries")));
StringLiteral const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] =
{
@@ -474,6 +476,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_ZnajSt11align_val_tRKSt9nothrow_t);
TLI.setUnavailable(LibFunc_Znam);
TLI.setUnavailable(LibFunc_ZnamRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t);
TLI.setUnavailable(LibFunc_ZnamSt11align_val_t);
TLI.setUnavailable(LibFunc_ZnamSt11align_val_tRKSt9nothrow_t);
TLI.setUnavailable(LibFunc_Znwj);
@@ -482,8 +485,15 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
TLI.setUnavailable(LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t);
TLI.setUnavailable(LibFunc_Znwm);
TLI.setUnavailable(LibFunc_ZnwmRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t);
TLI.setUnavailable(LibFunc_ZnwmSt11align_val_t);
TLI.setUnavailable(LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t);
+ TLI.setUnavailable(LibFunc_Znwm12__hot_cold_t);
+ TLI.setUnavailable(LibFunc_ZnwmSt11align_val_t12__hot_cold_t);
+ TLI.setUnavailable(LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t);
+ TLI.setUnavailable(LibFunc_Znam12__hot_cold_t);
+ TLI.setUnavailable(LibFunc_ZnamSt11align_val_t12__hot_cold_t);
+ TLI.setUnavailable(LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t);
} else {
// Not MSVC, assume it's Itanium.
TLI.setUnavailable(LibFunc_msvc_new_int);
@@ -1181,10 +1191,17 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
case SLEEFGNUABI: {
const VecDesc VecFuncs_VF2[] = {
#define TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF, /* MASK = */ false},
#include "llvm/Analysis/VecFuncs.def"
};
const VecDesc VecFuncs_VF4[] = {
#define TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF, /* MASK = */ false},
+#include "llvm/Analysis/VecFuncs.def"
+ };
+ const VecDesc VecFuncs_VFScalable[] = {
+#define TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK) {SCAL, VEC, VF, MASK},
#include "llvm/Analysis/VecFuncs.def"
};
@@ -1195,6 +1212,24 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
case llvm::Triple::aarch64_be:
addVectorizableFunctions(VecFuncs_VF2);
addVectorizableFunctions(VecFuncs_VF4);
+ addVectorizableFunctions(VecFuncs_VFScalable);
+ break;
+ }
+ break;
+ }
+ case ArmPL: {
+ const VecDesc VecFuncs[] = {
+#define TLI_DEFINE_ARMPL_VECFUNCS
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF, MASK) {SCAL, VEC, VF, MASK},
+#include "llvm/Analysis/VecFuncs.def"
+ };
+
+ switch (TargetTriple.getArch()) {
+ default:
+ break;
+ case llvm::Triple::aarch64:
+ case llvm::Triple::aarch64_be:
+ addVectorizableFunctions(VecFuncs);
break;
}
break;
@@ -1214,16 +1249,16 @@ bool TargetLibraryInfoImpl::isFunctionVectorizable(StringRef funcName) const {
return I != VectorDescs.end() && StringRef(I->ScalarFnName) == funcName;
}
-StringRef
-TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
- const ElementCount &VF) const {
+StringRef TargetLibraryInfoImpl::getVectorizedFunction(StringRef F,
+ const ElementCount &VF,
+ bool Masked) const {
F = sanitizeFunctionName(F);
if (F.empty())
return F;
std::vector<VecDesc>::const_iterator I =
llvm::lower_bound(VectorDescs, F, compareWithScalarFnName);
while (I != VectorDescs.end() && StringRef(I->ScalarFnName) == F) {
- if (I->VectorizationFactor == VF)
+ if ((I->VectorizationFactor == VF) && (I->Masked == Masked))
return I->VectorFnName;
++I;
}
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index ad7e5432d4c5..c751d174a48a 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -37,6 +37,11 @@ static cl::opt<unsigned> CacheLineSize(
cl::desc("Use this to override the target cache line size when "
"specified by the user."));
+static cl::opt<unsigned> PredictableBranchThreshold(
+ "predictable-branch-threshold", cl::init(99), cl::Hidden,
+ cl::desc(
+ "Use this to override the target's predictable branch threshold (%)."));
+
namespace {
/// No-op implementation of the TTI interface using the utility base
/// classes.
@@ -103,6 +108,14 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
}
+HardwareLoopInfo::HardwareLoopInfo(Loop *L) : L(L) {
+ // Match default options:
+ // - hardware-loop-counter-bitwidth = 32
+ // - hardware-loop-decrement = 1
+ CountType = Type::getInt32Ty(L->getHeader()->getContext());
+ LoopDecrement = ConstantInt::get(CountType, 1);
+}
+
bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
LoopInfo &LI, DominatorTree &DT,
bool ForceNestedLoop,
@@ -204,15 +217,28 @@ TargetTransformInfo::adjustInliningThreshold(const CallBase *CB) const {
return TTIImpl->adjustInliningThreshold(CB);
}
+unsigned TargetTransformInfo::getCallerAllocaCost(const CallBase *CB,
+ const AllocaInst *AI) const {
+ return TTIImpl->getCallerAllocaCost(CB, AI);
+}
+
int TargetTransformInfo::getInlinerVectorBonusPercent() const {
return TTIImpl->getInlinerVectorBonusPercent();
}
-InstructionCost
-TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands,
- TTI::TargetCostKind CostKind) const {
- return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, CostKind);
+InstructionCost TargetTransformInfo::getGEPCost(
+ Type *PointeeType, const Value *Ptr, ArrayRef<const Value *> Operands,
+ Type *AccessType, TTI::TargetCostKind CostKind) const {
+ return TTIImpl->getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
+}
+
+InstructionCost TargetTransformInfo::getPointersChainCost(
+ ArrayRef<const Value *> Ptrs, const Value *Base,
+ const TTI::PointersChainInfo &Info, Type *AccessTy,
+ TTI::TargetCostKind CostKind) const {
+ assert((Base || !Info.isSameBase()) &&
+ "If pointers have same base address it has to be provided.");
+ return TTIImpl->getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
}
unsigned TargetTransformInfo::getEstimatedNumberOfCaseClusters(
@@ -232,15 +258,13 @@ TargetTransformInfo::getInstructionCost(const User *U,
}
BranchProbability TargetTransformInfo::getPredictableBranchThreshold() const {
- return TTIImpl->getPredictableBranchThreshold();
+ return PredictableBranchThreshold.getNumOccurrences() > 0
+ ? BranchProbability(PredictableBranchThreshold, 100)
+ : TTIImpl->getPredictableBranchThreshold();
}
-bool TargetTransformInfo::hasBranchDivergence() const {
- return TTIImpl->hasBranchDivergence();
-}
-
-bool TargetTransformInfo::useGPUDivergenceAnalysis() const {
- return TTIImpl->useGPUDivergenceAnalysis();
+bool TargetTransformInfo::hasBranchDivergence(const Function *F) const {
+ return TTIImpl->hasBranchDivergence(F);
}
bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
@@ -251,6 +275,16 @@ bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const {
return TTIImpl->isAlwaysUniform(V);
}
+bool llvm::TargetTransformInfo::isValidAddrSpaceCast(unsigned FromAS,
+ unsigned ToAS) const {
+ return TTIImpl->isValidAddrSpaceCast(FromAS, ToAS);
+}
+
+bool llvm::TargetTransformInfo::addrspacesMayAlias(unsigned FromAS,
+ unsigned ToAS) const {
+ return TTIImpl->addrspacesMayAlias(FromAS, ToAS);
+}
+
unsigned TargetTransformInfo::getFlatAddressSpace() const {
return TTIImpl->getFlatAddressSpace();
}
@@ -299,14 +333,13 @@ bool TargetTransformInfo::isHardwareLoopProfitable(
}
bool TargetTransformInfo::preferPredicateOverEpilogue(
- Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
- TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL,
- InterleavedAccessInfo *IAI) const {
- return TTIImpl->preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
+ TailFoldingInfo *TFI) const {
+ return TTIImpl->preferPredicateOverEpilogue(TFI);
}
-PredicationStyle TargetTransformInfo::emitGetActiveLaneMask() const {
- return TTIImpl->emitGetActiveLaneMask();
+TailFoldingStyle TargetTransformInfo::getPreferredTailFoldingStyle(
+ bool IVUpdateMayOverflow) const {
+ return TTIImpl->getPreferredTailFoldingStyle(IVUpdateMayOverflow);
}
std::optional<Instruction *>
@@ -664,6 +697,10 @@ std::optional<unsigned> TargetTransformInfo::getVScaleForTuning() const {
return TTIImpl->getVScaleForTuning();
}
+bool TargetTransformInfo::isVScaleKnownToBeAPowerOfTwo() const {
+ return TTIImpl->isVScaleKnownToBeAPowerOfTwo();
+}
+
bool TargetTransformInfo::shouldMaximizeVectorBandwidth(
TargetTransformInfo::RegisterKind K) const {
return TTIImpl->shouldMaximizeVectorBandwidth(K);
@@ -728,7 +765,7 @@ bool TargetTransformInfo::shouldPrefetchAddressSpace(unsigned AS) const {
return TTIImpl->shouldPrefetchAddressSpace(AS);
}
-unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
+unsigned TargetTransformInfo::getMaxInterleaveFactor(ElementCount VF) const {
return TTIImpl->getMaxInterleaveFactor(VF);
}
@@ -1007,6 +1044,10 @@ InstructionCost TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
return Cost;
}
+uint64_t TargetTransformInfo::getMaxMemIntrinsicInlineSizeThreshold() const {
+ return TTIImpl->getMaxMemIntrinsicInlineSizeThreshold();
+}
+
InstructionCost TargetTransformInfo::getArithmeticReductionCost(
unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) const {
@@ -1017,17 +1058,17 @@ InstructionCost TargetTransformInfo::getArithmeticReductionCost(
}
InstructionCost TargetTransformInfo::getMinMaxReductionCost(
- VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
+ Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
TTI::TargetCostKind CostKind) const {
InstructionCost Cost =
- TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
+ TTIImpl->getMinMaxReductionCost(IID, Ty, FMF, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
InstructionCost TargetTransformInfo::getExtendedReductionCost(
unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
- std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) const {
+ FastMathFlags FMF, TTI::TargetCostKind CostKind) const {
return TTIImpl->getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
CostKind);
}
@@ -1163,6 +1204,14 @@ TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
return TTIImpl->getVPLegalizationStrategy(VPI);
}
+bool TargetTransformInfo::hasArmWideBranch(bool Thumb) const {
+ return TTIImpl->hasArmWideBranch(Thumb);
+}
+
+unsigned TargetTransformInfo::getMaxNumArgs() const {
+ return TTIImpl->getMaxNumArgs();
+}
+
bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
return TTIImpl->shouldExpandReduction(II);
}
diff --git a/llvm/lib/Analysis/TensorSpec.cpp b/llvm/lib/Analysis/TensorSpec.cpp
index 4f7428ded85e..8dd1a054af88 100644
--- a/llvm/lib/Analysis/TensorSpec.cpp
+++ b/llvm/lib/Analysis/TensorSpec.cpp
@@ -10,8 +10,10 @@
// utils.
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Config/config.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/TensorSpec.h"
#include "llvm/Support/CommandLine.h"
@@ -102,4 +104,23 @@ std::optional<TensorSpec> getTensorSpecFromJSON(LLVMContext &Ctx,
return std::nullopt;
}
+std::string tensorValueToString(const char *Buffer, const TensorSpec &Spec) {
+ switch (Spec.type()) {
+#define _IMR_DBG_PRINTER(T, N) \
+ case TensorType::N: { \
+ const T *TypedBuff = reinterpret_cast<const T *>(Buffer); \
+ auto R = llvm::make_range(TypedBuff, TypedBuff + Spec.getElementCount()); \
+ return llvm::join( \
+ llvm::map_range(R, [](T V) { return std::to_string(V); }), ","); \
+ }
+ SUPPORTED_TENSOR_TYPES(_IMR_DBG_PRINTER)
+#undef _IMR_DBG_PRINTER
+ case TensorType::Total:
+ case TensorType::Invalid:
+ llvm_unreachable("invalid tensor type");
+ }
+ // To appease warnings about not all control paths returning a value.
+ return "";
+}
+
} // namespace llvm
diff --git a/llvm/lib/Analysis/TrainingLogger.cpp b/llvm/lib/Analysis/TrainingLogger.cpp
index dcee8d40c53d..e236890aa2bc 100644
--- a/llvm/lib/Analysis/TrainingLogger.cpp
+++ b/llvm/lib/Analysis/TrainingLogger.cpp
@@ -32,7 +32,7 @@ static cl::opt<bool>
UseSimpleLogger("tfutils-use-simplelogger", cl::init(true), cl::Hidden,
cl::desc("Output simple (non-protobuf) log."));
-void Logger::writeHeader() {
+void Logger::writeHeader(std::optional<TensorSpec> AdviceSpec) {
json::OStream JOS(*OS);
JOS.object([&]() {
JOS.attributeArray("features", [&]() {
@@ -44,6 +44,11 @@ void Logger::writeHeader() {
RewardSpec.toJSON(JOS);
JOS.attributeEnd();
}
+ if (AdviceSpec.has_value()) {
+ JOS.attributeBegin("advice");
+ AdviceSpec->toJSON(JOS);
+ JOS.attributeEnd();
+ }
});
*OS << "\n";
}
@@ -81,8 +86,9 @@ void Logger::logRewardImpl(const char *RawData) {
Logger::Logger(std::unique_ptr<raw_ostream> OS,
const std::vector<TensorSpec> &FeatureSpecs,
- const TensorSpec &RewardSpec, bool IncludeReward)
+ const TensorSpec &RewardSpec, bool IncludeReward,
+ std::optional<TensorSpec> AdviceSpec)
: OS(std::move(OS)), FeatureSpecs(FeatureSpecs), RewardSpec(RewardSpec),
IncludeReward(IncludeReward) {
- writeHeader();
+ writeHeader(AdviceSpec);
}
diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp
index 1c9354fbe01f..bbaee06ed8a5 100644
--- a/llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp
@@ -99,7 +99,9 @@ void llvm::findDevirtualizableCallsForTypeCheckedLoad(
SmallVectorImpl<Instruction *> &Preds, bool &HasNonCallUses,
const CallInst *CI, DominatorTree &DT) {
assert(CI->getCalledFunction()->getIntrinsicID() ==
- Intrinsic::type_checked_load);
+ Intrinsic::type_checked_load ||
+ CI->getCalledFunction()->getIntrinsicID() ==
+ Intrinsic::type_checked_load_relative);
auto *Offset = dyn_cast<ConstantInt>(CI->getArgOperand(1));
if (!Offset) {
@@ -161,7 +163,7 @@ Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M,
// (Swift-specific) relative-pointer support starts here.
if (auto *CI = dyn_cast<ConstantInt>(I)) {
- if (Offset == 0 && CI->getZExtValue() == 0) {
+ if (Offset == 0 && CI->isZero()) {
return I;
}
}
diff --git a/llvm/lib/Analysis/UniformityAnalysis.cpp b/llvm/lib/Analysis/UniformityAnalysis.cpp
index 8ed5af8a8d1c..bf0b194dcd70 100644
--- a/llvm/lib/Analysis/UniformityAnalysis.cpp
+++ b/llvm/lib/Analysis/UniformityAnalysis.cpp
@@ -1,4 +1,4 @@
-//===- ConvergenceUtils.cpp -----------------------------------------------===//
+//===- UniformityAnalysis.cpp ---------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -26,18 +26,16 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::hasDivergentDefs(
template <>
bool llvm::GenericUniformityAnalysisImpl<SSAContext>::markDefsDivergent(
- const Instruction &Instr, bool AllDefsDivergent) {
- return markDivergent(&Instr);
+ const Instruction &Instr) {
+ return markDivergent(cast<Value>(&Instr));
}
template <> void llvm::GenericUniformityAnalysisImpl<SSAContext>::initialize() {
for (auto &I : instructions(F)) {
- if (TTI->isSourceOfDivergence(&I)) {
- assert(!I.isTerminator());
+ if (TTI->isSourceOfDivergence(&I))
markDivergent(I);
- } else if (TTI->isAlwaysUniform(&I)) {
+ else if (TTI->isAlwaysUniform(&I))
addUniformOverride(I);
- }
}
for (auto &Arg : F.args()) {
if (TTI->isSourceOfDivergence(&Arg)) {
@@ -50,13 +48,8 @@ template <>
void llvm::GenericUniformityAnalysisImpl<SSAContext>::pushUsers(
const Value *V) {
for (const auto *User : V->users()) {
- const auto *UserInstr = dyn_cast<const Instruction>(User);
- if (!UserInstr)
- continue;
- if (isAlwaysUniform(*UserInstr))
- continue;
- if (markDivergent(*UserInstr)) {
- Worklist.push_back(UserInstr);
+ if (const auto *UserInstr = dyn_cast<const Instruction>(User)) {
+ markDivergent(*UserInstr);
}
}
}
@@ -73,8 +66,7 @@ void llvm::GenericUniformityAnalysisImpl<SSAContext>::pushUsers(
template <>
bool llvm::GenericUniformityAnalysisImpl<SSAContext>::usesValueFromCycle(
const Instruction &I, const Cycle &DefCycle) const {
- if (isAlwaysUniform(I))
- return false;
+ assert(!isAlwaysUniform(I));
for (const Use &U : I.operands()) {
if (auto *I = dyn_cast<Instruction>(&U)) {
if (DefCycle.contains(I->getParent()))
@@ -84,6 +76,33 @@ bool llvm::GenericUniformityAnalysisImpl<SSAContext>::usesValueFromCycle(
return false;
}
+template <>
+void llvm::GenericUniformityAnalysisImpl<
+ SSAContext>::propagateTemporalDivergence(const Instruction &I,
+ const Cycle &DefCycle) {
+ if (isDivergent(I))
+ return;
+ for (auto *User : I.users()) {
+ auto *UserInstr = cast<Instruction>(User);
+ if (DefCycle.contains(UserInstr->getParent()))
+ continue;
+ markDivergent(*UserInstr);
+ }
+}
+
+template <>
+bool llvm::GenericUniformityAnalysisImpl<SSAContext>::isDivergentUse(
+ const Use &U) const {
+ const auto *V = U.get();
+ if (isDivergent(V))
+ return true;
+ if (const auto *DefInstr = dyn_cast<Instruction>(V)) {
+ const auto *UseInstr = cast<Instruction>(U.getUser());
+ return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
+ }
+ return false;
+}
+
// This ensures explicit instantiation of
// GenericUniformityAnalysisImpl::ImplDeleter::operator()
template class llvm::GenericUniformityInfo<SSAContext>;
@@ -99,7 +118,12 @@ llvm::UniformityInfo UniformityInfoAnalysis::run(Function &F,
auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
auto &CI = FAM.getResult<CycleAnalysis>(F);
- return UniformityInfo{F, DT, CI, &TTI};
+ UniformityInfo UI{F, DT, CI, &TTI};
+ // Skip computation if we can assume everything is uniform.
+ if (TTI.hasBranchDivergence(&F))
+ UI.compute();
+
+ return UI;
}
AnalysisKey UniformityInfoAnalysis::Key;
@@ -125,17 +149,18 @@ UniformityInfoWrapperPass::UniformityInfoWrapperPass() : FunctionPass(ID) {
initializeUniformityInfoWrapperPassPass(*PassRegistry::getPassRegistry());
}
-INITIALIZE_PASS_BEGIN(UniformityInfoWrapperPass, "uniforminfo",
- "Uniform Info Analysis", true, true)
+INITIALIZE_PASS_BEGIN(UniformityInfoWrapperPass, "uniformity",
+ "Uniformity Analysis", true, true)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(CycleInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(UniformityInfoWrapperPass, "uniforminfo",
- "Uniform Info Analysis", true, true)
+INITIALIZE_PASS_END(UniformityInfoWrapperPass, "uniformity",
+ "Uniformity Analysis", true, true)
void UniformityInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<CycleInfoWrapperPass>();
+ AU.addRequiredTransitive<CycleInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
}
@@ -148,6 +173,11 @@ bool UniformityInfoWrapperPass::runOnFunction(Function &F) {
m_function = &F;
m_uniformityInfo =
UniformityInfo{F, domTree, cycleInfo, &targetTransformInfo};
+
+ // Skip computation if we can assume everything is uniform.
+ if (targetTransformInfo.hasBranchDivergence(m_function))
+ m_uniformityInfo.compute();
+
return false;
}
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index a13bdade320f..5d526858e00e 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -25,7 +26,6 @@
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/Loads.h"
@@ -42,6 +42,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalAlias.h"
@@ -53,6 +54,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/LLVMContext.h"
@@ -93,33 +95,6 @@ static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
return DL.getPointerTypeSizeInBits(Ty);
}
-namespace {
-
-// Simplifying using an assume can only be done in a particular control-flow
-// context (the context instruction provides that context). If an assume and
-// the context instruction are not in the same block then the DT helps in
-// figuring out if we can use it.
-struct Query {
- const DataLayout &DL;
- AssumptionCache *AC;
- const Instruction *CxtI;
- const DominatorTree *DT;
-
- // Unlike the other analyses, this may be a nullptr because not all clients
- // provide it currently.
- OptimizationRemarkEmitter *ORE;
-
- /// If true, it is safe to use metadata during simplification.
- InstrInfoQuery IIQ;
-
- Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT, bool UseInstrInfo,
- OptimizationRemarkEmitter *ORE = nullptr)
- : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {}
-};
-
-} // end anonymous namespace
-
// Given the provided Value and, potentially, a context instruction, return
// the preferred context instruction (if any).
static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
@@ -170,10 +145,11 @@ static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
}
static void computeKnownBits(const Value *V, const APInt &DemandedElts,
- KnownBits &Known, unsigned Depth, const Query &Q);
+ KnownBits &Known, unsigned Depth,
+ const SimplifyQuery &Q);
static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
- const Query &Q) {
+ const SimplifyQuery &Q) {
// Since the number of lanes in a scalable vector is unknown at compile time,
// we track one bit which is implicitly broadcast to all lanes. This means
// that all lanes in a scalable vector are considered demanded.
@@ -186,46 +162,44 @@ static void computeKnownBits(const Value *V, KnownBits &Known, unsigned Depth,
void llvm::computeKnownBits(const Value *V, KnownBits &Known,
const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT,
- OptimizationRemarkEmitter *ORE, bool UseInstrInfo) {
+ const DominatorTree *DT, bool UseInstrInfo) {
::computeKnownBits(V, Known, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
+ SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
+ safeCxtI(V, CxtI), UseInstrInfo));
}
void llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
KnownBits &Known, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI, const DominatorTree *DT,
- OptimizationRemarkEmitter *ORE, bool UseInstrInfo) {
+ bool UseInstrInfo) {
::computeKnownBits(V, DemandedElts, Known, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
+ SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
+ safeCxtI(V, CxtI), UseInstrInfo));
}
static KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
- unsigned Depth, const Query &Q);
+ unsigned Depth, const SimplifyQuery &Q);
static KnownBits computeKnownBits(const Value *V, unsigned Depth,
- const Query &Q);
+ const SimplifyQuery &Q);
KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
- const DominatorTree *DT,
- OptimizationRemarkEmitter *ORE,
- bool UseInstrInfo) {
- return ::computeKnownBits(
- V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
+ const DominatorTree *DT, bool UseInstrInfo) {
+ return ::computeKnownBits(V, Depth,
+ SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
+ safeCxtI(V, CxtI), UseInstrInfo));
}
KnownBits llvm::computeKnownBits(const Value *V, const APInt &DemandedElts,
const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
- const DominatorTree *DT,
- OptimizationRemarkEmitter *ORE,
- bool UseInstrInfo) {
- return ::computeKnownBits(
- V, DemandedElts, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo, ORE));
+ const DominatorTree *DT, bool UseInstrInfo) {
+ return ::computeKnownBits(V, DemandedElts, Depth,
+ SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
+ safeCxtI(V, CxtI), UseInstrInfo));
}
bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
@@ -282,11 +256,18 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
IntegerType *IT = cast<IntegerType>(LHS->getType()->getScalarType());
KnownBits LHSKnown(IT->getBitWidth());
KnownBits RHSKnown(IT->getBitWidth());
- computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
- computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, nullptr, UseInstrInfo);
+ computeKnownBits(LHS, LHSKnown, DL, 0, AC, CxtI, DT, UseInstrInfo);
+ computeKnownBits(RHS, RHSKnown, DL, 0, AC, CxtI, DT, UseInstrInfo);
return KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown);
}
+bool llvm::isOnlyUsedInZeroComparison(const Instruction *I) {
+ return !I->user_empty() && all_of(I->users(), [](const User *U) {
+ ICmpInst::Predicate P;
+ return match(U, m_ICmp(P, m_Value(), m_Zero()));
+ });
+}
+
bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
return !I->user_empty() && all_of(I->users(), [](const User *U) {
ICmpInst::Predicate P;
@@ -295,34 +276,37 @@ bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *I) {
}
static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
- const Query &Q);
+ const SimplifyQuery &Q);
bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
bool OrZero, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT, bool UseInstrInfo) {
- return ::isKnownToBeAPowerOfTwo(
- V, OrZero, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
+ return ::isKnownToBeAPowerOfTwo(V, OrZero, Depth,
+ SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
+ safeCxtI(V, CxtI),
+ UseInstrInfo));
}
static bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
- unsigned Depth, const Query &Q);
+ unsigned Depth, const SimplifyQuery &Q);
-static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q);
+static bool isKnownNonZero(const Value *V, unsigned Depth,
+ const SimplifyQuery &Q);
bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT, bool UseInstrInfo) {
return ::isKnownNonZero(V, Depth,
- Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
+ SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
+ safeCxtI(V, CxtI), UseInstrInfo));
}
bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI, const DominatorTree *DT,
bool UseInstrInfo) {
- KnownBits Known =
- computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
+ KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT, UseInstrInfo);
return Known.isNonNegative();
}
@@ -341,39 +325,39 @@ bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT, bool UseInstrInfo) {
- KnownBits Known =
- computeKnownBits(V, DL, Depth, AC, CxtI, DT, nullptr, UseInstrInfo);
+ KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT, UseInstrInfo);
return Known.isNegative();
}
static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
- const Query &Q);
+ const SimplifyQuery &Q);
bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
const DataLayout &DL, AssumptionCache *AC,
const Instruction *CxtI, const DominatorTree *DT,
bool UseInstrInfo) {
return ::isKnownNonEqual(V1, V2, 0,
- Query(DL, AC, safeCxtI(V2, V1, CxtI), DT,
- UseInstrInfo, /*ORE=*/nullptr));
+ SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
+ safeCxtI(V2, V1, CxtI), UseInstrInfo));
}
static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
- const Query &Q);
+ const SimplifyQuery &Q);
bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT, bool UseInstrInfo) {
- return ::MaskedValueIsZero(
- V, Mask, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
+ return ::MaskedValueIsZero(V, Mask, Depth,
+ SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
+ safeCxtI(V, CxtI), UseInstrInfo));
}
static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
- unsigned Depth, const Query &Q);
+ unsigned Depth, const SimplifyQuery &Q);
static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
- const Query &Q) {
+ const SimplifyQuery &Q) {
auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
APInt DemandedElts =
FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
@@ -384,8 +368,9 @@ unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
unsigned Depth, AssumptionCache *AC,
const Instruction *CxtI,
const DominatorTree *DT, bool UseInstrInfo) {
- return ::ComputeNumSignBits(
- V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT, UseInstrInfo));
+ return ::ComputeNumSignBits(V, Depth,
+ SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
+ safeCxtI(V, CxtI), UseInstrInfo));
}
unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL,
@@ -399,7 +384,7 @@ unsigned llvm::ComputeMaxSignificantBits(const Value *V, const DataLayout &DL,
static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
bool NSW, const APInt &DemandedElts,
KnownBits &KnownOut, KnownBits &Known2,
- unsigned Depth, const Query &Q) {
+ unsigned Depth, const SimplifyQuery &Q) {
computeKnownBits(Op1, DemandedElts, KnownOut, Depth + 1, Q);
// If one operand is unknown and we have no nowrap information,
@@ -414,7 +399,7 @@ static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
const APInt &DemandedElts, KnownBits &Known,
KnownBits &Known2, unsigned Depth,
- const Query &Q) {
+ const SimplifyQuery &Q) {
computeKnownBits(Op1, DemandedElts, Known, Depth + 1, Q);
computeKnownBits(Op0, DemandedElts, Known2, Depth + 1, Q);
@@ -479,7 +464,7 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
// The first CommonPrefixBits of all values in Range are equal.
unsigned CommonPrefixBits =
- (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros();
+ (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countl_zero();
APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits);
APInt UnsignedMax = Range.getUnsignedMax().zextOrTrunc(BitWidth);
Known.One &= UnsignedMax & Mask;
@@ -579,6 +564,11 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
return false;
}
+// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
+// we still have enough information about `RHS` to conclude non-zero. For
+// example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
+// so the extra compile time may not be worth it, but possibly a second API
+// should be created for use outside of loops.
static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
// v u> y implies v != 0.
if (Pred == ICmpInst::ICMP_UGT)
@@ -597,7 +587,7 @@ static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
return !TrueValues.contains(APInt::getZero(C->getBitWidth()));
}
-static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) {
+static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
// Use of assumptions is context-sensitive. If we don't have a context, we
// cannot use them!
if (!Q.AC || !Q.CxtI)
@@ -616,7 +606,7 @@ static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) {
for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
if (!AssumeVH)
continue;
- CondGuardInst *I = cast<CondGuardInst>(AssumeVH);
+ CallInst *I = cast<CallInst>(AssumeVH);
assert(I->getFunction() == Q.CxtI->getFunction() &&
"Got assumption for the wrong function!");
@@ -624,6 +614,9 @@ static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) {
// We're running this loop for once for each value queried resulting in a
// runtime of ~O(#assumes * #values).
+ assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
+ "must be an assume intrinsic");
+
Value *RHS;
CmpInst::Predicate Pred;
auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
@@ -637,8 +630,167 @@ static bool isKnownNonZeroFromAssume(const Value *V, const Query &Q) {
return false;
}
-static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
- unsigned Depth, const Query &Q) {
+static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
+ KnownBits &Known, unsigned Depth,
+ const SimplifyQuery &Q) {
+ unsigned BitWidth = Known.getBitWidth();
+ // We are attempting to compute known bits for the operands of an assume.
+ // Do not try to use other assumptions for those recursive calls because
+ // that can lead to mutual recursion and a compile-time explosion.
+ // An example of the mutual recursion: computeKnownBits can call
+ // isKnownNonZero which calls computeKnownBitsFromAssume (this function)
+ // and so on.
+ SimplifyQuery QueryNoAC = Q;
+ QueryNoAC.AC = nullptr;
+
+ // Note that ptrtoint may change the bitwidth.
+ Value *A, *B;
+ auto m_V =
+ m_CombineOr(m_Specific(V), m_PtrToIntSameSize(Q.DL, m_Specific(V)));
+
+ CmpInst::Predicate Pred;
+ uint64_t C;
+ switch (Cmp->getPredicate()) {
+ case ICmpInst::ICMP_EQ:
+ // assume(v = a)
+ if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A)))) {
+ KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
+ Known = Known.unionWith(RHSKnown);
+ // assume(v & b = a)
+ } else if (match(Cmp,
+ m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A)))) {
+ KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
+ KnownBits MaskKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
+
+ // For those bits in the mask that are known to be one, we can propagate
+ // known bits from the RHS to V.
+ Known.Zero |= RHSKnown.Zero & MaskKnown.One;
+ Known.One |= RHSKnown.One & MaskKnown.One;
+ // assume(~(v & b) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
+ m_Value(A)))) {
+ KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
+ KnownBits MaskKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
+
+ // For those bits in the mask that are known to be one, we can propagate
+ // inverted known bits from the RHS to V.
+ Known.Zero |= RHSKnown.One & MaskKnown.One;
+ Known.One |= RHSKnown.Zero & MaskKnown.One;
+ // assume(v | b = a)
+ } else if (match(Cmp,
+ m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A)))) {
+ KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
+ KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
+
+ // For those bits in B that are known to be zero, we can propagate known
+ // bits from the RHS to V.
+ Known.Zero |= RHSKnown.Zero & BKnown.Zero;
+ Known.One |= RHSKnown.One & BKnown.Zero;
+ // assume(~(v | b) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
+ m_Value(A)))) {
+ KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
+ KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
+
+ // For those bits in B that are known to be zero, we can propagate
+ // inverted known bits from the RHS to V.
+ Known.Zero |= RHSKnown.One & BKnown.Zero;
+ Known.One |= RHSKnown.Zero & BKnown.Zero;
+ // assume(v ^ b = a)
+ } else if (match(Cmp,
+ m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A)))) {
+ KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
+ KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
+
+ // For those bits in B that are known to be zero, we can propagate known
+ // bits from the RHS to V. For those bits in B that are known to be one,
+ // we can propagate inverted known bits from the RHS to V.
+ Known.Zero |= RHSKnown.Zero & BKnown.Zero;
+ Known.One |= RHSKnown.One & BKnown.Zero;
+ Known.Zero |= RHSKnown.One & BKnown.One;
+ Known.One |= RHSKnown.Zero & BKnown.One;
+ // assume(~(v ^ b) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
+ m_Value(A)))) {
+ KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
+ KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
+
+ // For those bits in B that are known to be zero, we can propagate
+ // inverted known bits from the RHS to V. For those bits in B that are
+ // known to be one, we can propagate known bits from the RHS to V.
+ Known.Zero |= RHSKnown.One & BKnown.Zero;
+ Known.One |= RHSKnown.Zero & BKnown.Zero;
+ Known.Zero |= RHSKnown.Zero & BKnown.One;
+ Known.One |= RHSKnown.One & BKnown.One;
+ // assume(v << c = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
+ m_Value(A))) &&
+ C < BitWidth) {
+ KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
+
+ // For those bits in RHS that are known, we can propagate them to known
+ // bits in V shifted to the right by C.
+ RHSKnown.Zero.lshrInPlace(C);
+ RHSKnown.One.lshrInPlace(C);
+ Known = Known.unionWith(RHSKnown);
+ // assume(~(v << c) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
+ m_Value(A))) &&
+ C < BitWidth) {
+ KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
+ // For those bits in RHS that are known, we can propagate them inverted
+ // to known bits in V shifted to the right by C.
+ RHSKnown.One.lshrInPlace(C);
+ Known.Zero |= RHSKnown.One;
+ RHSKnown.Zero.lshrInPlace(C);
+ Known.One |= RHSKnown.Zero;
+ // assume(v >> c = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
+ m_Value(A))) &&
+ C < BitWidth) {
+ KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
+ // For those bits in RHS that are known, we can propagate them to known
+ // bits in V shifted to the right by C.
+ Known.Zero |= RHSKnown.Zero << C;
+ Known.One |= RHSKnown.One << C;
+ // assume(~(v >> c) = a)
+ } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
+ m_Value(A))) &&
+ C < BitWidth) {
+ KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
+ // For those bits in RHS that are known, we can propagate them inverted
+ // to known bits in V shifted to the right by C.
+ Known.Zero |= RHSKnown.One << C;
+ Known.One |= RHSKnown.Zero << C;
+ }
+ break;
+ case ICmpInst::ICMP_NE: {
+ // assume (v & b != 0) where b is a power of 2
+ const APInt *BPow2;
+ if (match(Cmp, m_ICmp(Pred, m_c_And(m_V, m_Power2(BPow2)), m_Zero()))) {
+ Known.One |= *BPow2;
+ }
+ break;
+ }
+ default:
+ const APInt *Offset = nullptr;
+ if (match(Cmp, m_ICmp(Pred, m_CombineOr(m_V, m_Add(m_V, m_APInt(Offset))),
+ m_Value(A)))) {
+ KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
+ ConstantRange RHSRange =
+ ConstantRange::fromKnownBits(RHSKnown, Cmp->isSigned());
+ ConstantRange LHSRange =
+ ConstantRange::makeAllowedICmpRegion(Pred, RHSRange);
+ if (Offset)
+ LHSRange = LHSRange.sub(*Offset);
+ Known = Known.unionWith(LHSRange.toKnownBits());
+ }
+ break;
+ }
+}
+
+void llvm::computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
+ unsigned Depth, const SimplifyQuery &Q) {
// Use of assumptions is context-sensitive. If we don't have a context, we
// cannot use them!
if (!Q.AC || !Q.CxtI)
@@ -649,7 +801,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
// Refine Known set if the pointer alignment is set by assume bundles.
if (V->getType()->isPointerTy()) {
if (RetainedKnowledge RK = getKnowledgeValidInContext(
- V, {Attribute::Alignment}, Q.CxtI, Q.DT, Q.AC)) {
+ V, { Attribute::Alignment }, Q.CxtI, Q.DT, Q.AC)) {
if (isPowerOf2_64(RK.ArgValue))
Known.Zero.setLowBits(Log2_64(RK.ArgValue));
}
@@ -661,7 +813,7 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
if (!AssumeVH)
continue;
- CondGuardInst *I = cast<CondGuardInst>(AssumeVH);
+ CallInst *I = cast<CallInst>(AssumeVH);
assert(I->getParent()->getParent() == Q.CxtI->getParent()->getParent() &&
"Got assumption for the wrong function!");
@@ -669,16 +821,21 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
// We're running this loop for once for each value queried resulting in a
// runtime of ~O(#assumes * #values).
+ assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
+ "must be an assume intrinsic");
+
Value *Arg = I->getArgOperand(0);
if (Arg == V && isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
assert(BitWidth == 1 && "assume operand is not i1?");
+ (void)BitWidth;
Known.setAllOnes();
return;
}
if (match(Arg, m_Not(m_Specific(V))) &&
isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
assert(BitWidth == 1 && "assume operand is not i1?");
+ (void)BitWidth;
Known.setAllZero();
return;
}
@@ -691,278 +848,16 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
if (!Cmp)
continue;
- // We are attempting to compute known bits for the operands of an assume.
- // Do not try to use other assumptions for those recursive calls because
- // that can lead to mutual recursion and a compile-time explosion.
- // An example of the mutual recursion: computeKnownBits can call
- // isKnownNonZero which calls computeKnownBitsFromAssume (this function)
- // and so on.
- Query QueryNoAC = Q;
- QueryNoAC.AC = nullptr;
-
- // Note that ptrtoint may change the bitwidth.
- Value *A, *B;
- auto m_V = m_CombineOr(m_Specific(V), m_PtrToInt(m_Specific(V)));
-
- CmpInst::Predicate Pred;
- uint64_t C;
- switch (Cmp->getPredicate()) {
- default:
- break;
- case ICmpInst::ICMP_EQ:
- // assume(v = a)
- if (match(Cmp, m_c_ICmp(Pred, m_V, m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
- Known.Zero |= RHSKnown.Zero;
- Known.One |= RHSKnown.One;
- // assume(v & b = a)
- } else if (match(Cmp,
- m_c_ICmp(Pred, m_c_And(m_V, m_Value(B)), m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
- KnownBits MaskKnown =
- computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
-
- // For those bits in the mask that are known to be one, we can propagate
- // known bits from the RHS to V.
- Known.Zero |= RHSKnown.Zero & MaskKnown.One;
- Known.One |= RHSKnown.One & MaskKnown.One;
- // assume(~(v & b) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
- m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
- KnownBits MaskKnown =
- computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
-
- // For those bits in the mask that are known to be one, we can propagate
- // inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & MaskKnown.One;
- Known.One |= RHSKnown.Zero & MaskKnown.One;
- // assume(v | b = a)
- } else if (match(Cmp,
- m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
- KnownBits BKnown =
- computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
-
- // For those bits in B that are known to be zero, we can propagate known
- // bits from the RHS to V.
- Known.Zero |= RHSKnown.Zero & BKnown.Zero;
- Known.One |= RHSKnown.One & BKnown.Zero;
- // assume(~(v | b) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
- m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
- KnownBits BKnown =
- computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
-
- // For those bits in B that are known to be zero, we can propagate
- // inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & BKnown.Zero;
- Known.One |= RHSKnown.Zero & BKnown.Zero;
- // assume(v ^ b = a)
- } else if (match(Cmp,
- m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
- KnownBits BKnown =
- computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
-
- // For those bits in B that are known to be zero, we can propagate known
- // bits from the RHS to V. For those bits in B that are known to be one,
- // we can propagate inverted known bits from the RHS to V.
- Known.Zero |= RHSKnown.Zero & BKnown.Zero;
- Known.One |= RHSKnown.One & BKnown.Zero;
- Known.Zero |= RHSKnown.One & BKnown.One;
- Known.One |= RHSKnown.Zero & BKnown.One;
- // assume(~(v ^ b) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
- m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
- KnownBits BKnown =
- computeKnownBits(B, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
-
- // For those bits in B that are known to be zero, we can propagate
- // inverted known bits from the RHS to V. For those bits in B that are
- // known to be one, we can propagate known bits from the RHS to V.
- Known.Zero |= RHSKnown.One & BKnown.Zero;
- Known.One |= RHSKnown.Zero & BKnown.Zero;
- Known.Zero |= RHSKnown.Zero & BKnown.One;
- Known.One |= RHSKnown.One & BKnown.One;
- // assume(v << c = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
- m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
-
- // For those bits in RHS that are known, we can propagate them to known
- // bits in V shifted to the right by C.
- RHSKnown.Zero.lshrInPlace(C);
- Known.Zero |= RHSKnown.Zero;
- RHSKnown.One.lshrInPlace(C);
- Known.One |= RHSKnown.One;
- // assume(~(v << c) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
- m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
- // For those bits in RHS that are known, we can propagate them inverted
- // to known bits in V shifted to the right by C.
- RHSKnown.One.lshrInPlace(C);
- Known.Zero |= RHSKnown.One;
- RHSKnown.Zero.lshrInPlace(C);
- Known.One |= RHSKnown.Zero;
- // assume(v >> c = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
- m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
- // For those bits in RHS that are known, we can propagate them to known
- // bits in V shifted to the right by C.
- Known.Zero |= RHSKnown.Zero << C;
- Known.One |= RHSKnown.One << C;
- // assume(~(v >> c) = a)
- } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
- m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT) && C < BitWidth) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
- // For those bits in RHS that are known, we can propagate them inverted
- // to known bits in V shifted to the right by C.
- Known.Zero |= RHSKnown.One << C;
- Known.One |= RHSKnown.Zero << C;
- }
- break;
- case ICmpInst::ICMP_SGE:
- // assume(v >=_s c) where c is non-negative
- if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth + 1, QueryNoAC).anyextOrTrunc(BitWidth);
-
- if (RHSKnown.isNonNegative()) {
- // We know that the sign bit is zero.
- Known.makeNonNegative();
- }
- }
- break;
- case ICmpInst::ICMP_SGT:
- // assume(v >_s c) where c is at least -1.
- if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth + 1, QueryNoAC).anyextOrTrunc(BitWidth);
-
- if (RHSKnown.isAllOnes() || RHSKnown.isNonNegative()) {
- // We know that the sign bit is zero.
- Known.makeNonNegative();
- }
- }
- break;
- case ICmpInst::ICMP_SLE:
- // assume(v <=_s c) where c is negative
- if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth + 1, QueryNoAC).anyextOrTrunc(BitWidth);
-
- if (RHSKnown.isNegative()) {
- // We know that the sign bit is one.
- Known.makeNegative();
- }
- }
- break;
- case ICmpInst::ICMP_SLT:
- // assume(v <_s c) where c is non-positive
- if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
-
- if (RHSKnown.isZero() || RHSKnown.isNegative()) {
- // We know that the sign bit is one.
- Known.makeNegative();
- }
- }
- break;
- case ICmpInst::ICMP_ULE:
- // assume(v <=_u c)
- if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
-
- // Whatever high bits in c are zero are known to be zero.
- Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
- }
- break;
- case ICmpInst::ICMP_ULT:
- // assume(v <_u c)
- if (match(Cmp, m_ICmp(Pred, m_V, m_Value(A))) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- KnownBits RHSKnown =
- computeKnownBits(A, Depth+1, QueryNoAC).anyextOrTrunc(BitWidth);
-
- // If the RHS is known zero, then this assumption must be wrong (nothing
- // is unsigned less than zero). Signal a conflict and get out of here.
- if (RHSKnown.isZero()) {
- Known.Zero.setAllBits();
- Known.One.setAllBits();
- break;
- }
+ if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
+ continue;
- // Whatever high bits in c are zero are known to be zero (if c is a power
- // of 2, then one more).
- if (isKnownToBeAPowerOfTwo(A, false, Depth + 1, QueryNoAC))
- Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros() + 1);
- else
- Known.Zero.setHighBits(RHSKnown.countMinLeadingZeros());
- }
- break;
- case ICmpInst::ICMP_NE: {
- // assume (v & b != 0) where b is a power of 2
- const APInt *BPow2;
- if (match(Cmp, m_ICmp(Pred, m_c_And(m_V, m_Power2(BPow2)), m_Zero())) &&
- isValidAssumeForContext(I, Q.CxtI, Q.DT)) {
- Known.One |= BPow2->zextOrTrunc(BitWidth);
- }
- } break;
- }
+ computeKnownBitsFromCmp(V, Cmp, Known, Depth, Q);
}
- // If assumptions conflict with each other or previous known bits, then we
- // have a logical fallacy. It's possible that the assumption is not reachable,
- // so this isn't a real bug. On the other hand, the program may have undefined
- // behavior, or we might have a bug in the compiler. We can't assert/crash, so
- // clear out the known bits, try to warn the user, and hope for the best.
- if (Known.Zero.intersects(Known.One)) {
+ // Conflicting assumption: Undefined behavior will occur on this execution
+ // path.
+ if (Known.hasConflict())
Known.resetAll();
-
- if (Q.ORE)
- Q.ORE->emit([&]() {
- auto *CxtI = const_cast<Instruction *>(Q.CxtI);
- return OptimizationRemarkAnalysis("value-tracking", "BadAssumption",
- CxtI)
- << "Detected conflicting code assumptions. Program may "
- "have undefined behavior, or compiler may have "
- "internal error.";
- });
- }
}
/// Compute known bits from a shift operator, including those with a
@@ -975,93 +870,128 @@ static void computeKnownBitsFromAssume(const Value *V, KnownBits &Known,
/// combined for all permitted shift amounts.
static void computeKnownBitsFromShiftOperator(
const Operator *I, const APInt &DemandedElts, KnownBits &Known,
- KnownBits &Known2, unsigned Depth, const Query &Q,
- function_ref<KnownBits(const KnownBits &, const KnownBits &)> KF) {
- unsigned BitWidth = Known.getBitWidth();
+ KnownBits &Known2, unsigned Depth, const SimplifyQuery &Q,
+ function_ref<KnownBits(const KnownBits &, const KnownBits &, bool)> KF) {
computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
+ // To limit compile-time impact, only query isKnownNonZero() if we know at
+ // least something about the shift amount.
+ bool ShAmtNonZero =
+ Known.isNonZero() ||
+ (Known.getMaxValue().ult(Known.getBitWidth()) &&
+ isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q));
+ Known = KF(Known2, Known, ShAmtNonZero);
+}
+
+static KnownBits
+getKnownBitsFromAndXorOr(const Operator *I, const APInt &DemandedElts,
+ const KnownBits &KnownLHS, const KnownBits &KnownRHS,
+ unsigned Depth, const SimplifyQuery &Q) {
+ unsigned BitWidth = KnownLHS.getBitWidth();
+ KnownBits KnownOut(BitWidth);
+ bool IsAnd = false;
+ bool HasKnownOne = !KnownLHS.One.isZero() || !KnownRHS.One.isZero();
+ Value *X = nullptr, *Y = nullptr;
- // Note: We cannot use Known.Zero.getLimitedValue() here, because if
- // BitWidth > 64 and any upper bits are known, we'll end up returning the
- // limit value (which implies all bits are known).
- uint64_t ShiftAmtKZ = Known.Zero.zextOrTrunc(64).getZExtValue();
- uint64_t ShiftAmtKO = Known.One.zextOrTrunc(64).getZExtValue();
- bool ShiftAmtIsConstant = Known.isConstant();
- bool MaxShiftAmtIsOutOfRange = Known.getMaxValue().uge(BitWidth);
-
- if (ShiftAmtIsConstant) {
- Known = KF(Known2, Known);
-
- // If the known bits conflict, this must be an overflowing left shift, so
- // the shift result is poison. We can return anything we want. Choose 0 for
- // the best folding opportunity.
- if (Known.hasConflict())
- Known.setAllZero();
-
- return;
+ switch (I->getOpcode()) {
+ case Instruction::And:
+ KnownOut = KnownLHS & KnownRHS;
+ IsAnd = true;
+ // and(x, -x) is common idioms that will clear all but lowest set
+ // bit. If we have a single known bit in x, we can clear all bits
+ // above it.
+ // TODO: instcombine often reassociates independent `and` which can hide
+ // this pattern. Try to match and(x, and(-x, y)) / and(and(x, y), -x).
+ if (HasKnownOne && match(I, m_c_And(m_Value(X), m_Neg(m_Deferred(X))))) {
+ // -(-x) == x so using whichever (LHS/RHS) gets us a better result.
+ if (KnownLHS.countMaxTrailingZeros() <= KnownRHS.countMaxTrailingZeros())
+ KnownOut = KnownLHS.blsi();
+ else
+ KnownOut = KnownRHS.blsi();
+ }
+ break;
+ case Instruction::Or:
+ KnownOut = KnownLHS | KnownRHS;
+ break;
+ case Instruction::Xor:
+ KnownOut = KnownLHS ^ KnownRHS;
+ // xor(x, x-1) is common idioms that will clear all but lowest set
+ // bit. If we have a single known bit in x, we can clear all bits
+ // above it.
+ // TODO: xor(x, x-1) is often rewritting as xor(x, x-C) where C !=
+ // -1 but for the purpose of demanded bits (xor(x, x-C) &
+ // Demanded) == (xor(x, x-1) & Demanded). Extend the xor pattern
+ // to use arbitrary C if xor(x, x-C) as the same as xor(x, x-1).
+ if (HasKnownOne &&
+ match(I, m_c_Xor(m_Value(X), m_c_Add(m_Deferred(X), m_AllOnes())))) {
+ const KnownBits &XBits = I->getOperand(0) == X ? KnownLHS : KnownRHS;
+ KnownOut = XBits.blsmsk();
+ }
+ break;
+ default:
+ llvm_unreachable("Invalid Op used in 'analyzeKnownBitsFromAndXorOr'");
+ }
+
+ // and(x, add (x, -1)) is a common idiom that always clears the low bit;
+ // xor/or(x, add (x, -1)) is an idiom that will always set the low bit.
+ // here we handle the more general case of adding any odd number by
+ // matching the form and/xor/or(x, add(x, y)) where y is odd.
+ // TODO: This could be generalized to clearing any bit set in y where the
+ // following bit is known to be unset in y.
+ if (!KnownOut.Zero[0] && !KnownOut.One[0] &&
+ (match(I, m_c_BinOp(m_Value(X), m_c_Add(m_Deferred(X), m_Value(Y)))) ||
+ match(I, m_c_BinOp(m_Value(X), m_Sub(m_Deferred(X), m_Value(Y)))) ||
+ match(I, m_c_BinOp(m_Value(X), m_Sub(m_Value(Y), m_Deferred(X)))))) {
+ KnownBits KnownY(BitWidth);
+ computeKnownBits(Y, DemandedElts, KnownY, Depth + 1, Q);
+ if (KnownY.countMinTrailingOnes() > 0) {
+ if (IsAnd)
+ KnownOut.Zero.setBit(0);
+ else
+ KnownOut.One.setBit(0);
+ }
}
+ return KnownOut;
+}
- // If the shift amount could be greater than or equal to the bit-width of the
- // LHS, the value could be poison, but bail out because the check below is
- // expensive.
- // TODO: Should we just carry on?
- if (MaxShiftAmtIsOutOfRange) {
- Known.resetAll();
- return;
- }
+// Public so this can be used in `SimplifyDemandedUseBits`.
+KnownBits llvm::analyzeKnownBitsFromAndXorOr(
+ const Operator *I, const KnownBits &KnownLHS, const KnownBits &KnownRHS,
+ unsigned Depth, const DataLayout &DL, AssumptionCache *AC,
+ const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) {
+ auto *FVTy = dyn_cast<FixedVectorType>(I->getType());
+ APInt DemandedElts =
+ FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
- // It would be more-clearly correct to use the two temporaries for this
- // calculation. Reusing the APInts here to prevent unnecessary allocations.
- Known.resetAll();
+ return getKnownBitsFromAndXorOr(I, DemandedElts, KnownLHS, KnownRHS, Depth,
+ SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC,
+ safeCxtI(I, CxtI),
+ UseInstrInfo));
+}
- // If we know the shifter operand is nonzero, we can sometimes infer more
- // known bits. However this is expensive to compute, so be lazy about it and
- // only compute it when absolutely necessary.
- std::optional<bool> ShifterOperandIsNonZero;
-
- // Early exit if we can't constrain any well-defined shift amount.
- if (!(ShiftAmtKZ & (PowerOf2Ceil(BitWidth) - 1)) &&
- !(ShiftAmtKO & (PowerOf2Ceil(BitWidth) - 1))) {
- ShifterOperandIsNonZero =
- isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q);
- if (!*ShifterOperandIsNonZero)
- return;
- }
+ConstantRange llvm::getVScaleRange(const Function *F, unsigned BitWidth) {
+ Attribute Attr = F->getFnAttribute(Attribute::VScaleRange);
+ // Without vscale_range, we only know that vscale is non-zero.
+ if (!Attr.isValid())
+ return ConstantRange(APInt(BitWidth, 1), APInt::getZero(BitWidth));
- Known.Zero.setAllBits();
- Known.One.setAllBits();
- for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) {
- // Combine the shifted known input bits only for those shift amounts
- // compatible with its known constraints.
- if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt)
- continue;
- if ((ShiftAmt | ShiftAmtKO) != ShiftAmt)
- continue;
- // If we know the shifter is nonzero, we may be able to infer more known
- // bits. This check is sunk down as far as possible to avoid the expensive
- // call to isKnownNonZero if the cheaper checks above fail.
- if (ShiftAmt == 0) {
- if (!ShifterOperandIsNonZero)
- ShifterOperandIsNonZero =
- isKnownNonZero(I->getOperand(1), DemandedElts, Depth + 1, Q);
- if (*ShifterOperandIsNonZero)
- continue;
- }
+ unsigned AttrMin = Attr.getVScaleRangeMin();
+ // Minimum is larger than vscale width, result is always poison.
+ if ((unsigned)llvm::bit_width(AttrMin) > BitWidth)
+ return ConstantRange::getEmpty(BitWidth);
- Known = KnownBits::commonBits(
- Known, KF(Known2, KnownBits::makeConstant(APInt(32, ShiftAmt))));
- }
+ APInt Min(BitWidth, AttrMin);
+ std::optional<unsigned> AttrMax = Attr.getVScaleRangeMax();
+ if (!AttrMax || (unsigned)llvm::bit_width(*AttrMax) > BitWidth)
+ return ConstantRange(Min, APInt::getZero(BitWidth));
- // If the known bits conflict, the result is poison. Return a 0 and hope the
- // caller can further optimize that.
- if (Known.hasConflict())
- Known.setAllZero();
+ return ConstantRange(Min, APInt(BitWidth, *AttrMax) + 1);
}
static void computeKnownBitsFromOperator(const Operator *I,
const APInt &DemandedElts,
KnownBits &Known, unsigned Depth,
- const Query &Q) {
+ const SimplifyQuery &Q) {
unsigned BitWidth = Known.getBitWidth();
KnownBits Known2(BitWidth);
@@ -1072,39 +1002,23 @@ static void computeKnownBitsFromOperator(const Operator *I,
Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_range))
computeKnownBitsFromRangeMetadata(*MD, Known);
break;
- case Instruction::And: {
- // If either the LHS or the RHS are Zero, the result is zero.
+ case Instruction::And:
computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
- Known &= Known2;
-
- // and(x, add (x, -1)) is a common idiom that always clears the low bit;
- // here we handle the more general case of adding any odd number by
- // matching the form add(x, add(x, y)) where y is odd.
- // TODO: This could be generalized to clearing any bit set in y where the
- // following bit is known to be unset in y.
- Value *X = nullptr, *Y = nullptr;
- if (!Known.Zero[0] && !Known.One[0] &&
- match(I, m_c_BinOp(m_Value(X), m_Add(m_Deferred(X), m_Value(Y))))) {
- Known2.resetAll();
- computeKnownBits(Y, DemandedElts, Known2, Depth + 1, Q);
- if (Known2.countMinTrailingOnes() > 0)
- Known.Zero.setBit(0);
- }
+ Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
break;
- }
case Instruction::Or:
computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
- Known |= Known2;
+ Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
break;
case Instruction::Xor:
computeKnownBits(I->getOperand(1), DemandedElts, Known, Depth + 1, Q);
computeKnownBits(I->getOperand(0), DemandedElts, Known2, Depth + 1, Q);
- Known ^= Known2;
+ Known = getKnownBitsFromAndXorOr(I, DemandedElts, Known2, Known, Depth, Q);
break;
case Instruction::Mul: {
bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
@@ -1115,7 +1029,15 @@ static void computeKnownBitsFromOperator(const Operator *I,
case Instruction::UDiv: {
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
- Known = KnownBits::udiv(Known, Known2);
+ Known =
+ KnownBits::udiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
+ break;
+ }
+ case Instruction::SDiv: {
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+ Known =
+ KnownBits::sdiv(Known, Known2, Q.IIQ.isExact(cast<BinaryOperator>(I)));
break;
}
case Instruction::Select: {
@@ -1147,7 +1069,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
if (SPF == SPF_ABS) {
// RHS from matchSelectPattern returns the negation part of abs pattern.
@@ -1254,42 +1176,37 @@ static void computeKnownBitsFromOperator(const Operator *I,
break;
}
case Instruction::Shl: {
+ bool NUW = Q.IIQ.hasNoUnsignedWrap(cast<OverflowingBinaryOperator>(I));
bool NSW = Q.IIQ.hasNoSignedWrap(cast<OverflowingBinaryOperator>(I));
- auto KF = [NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
- KnownBits Result = KnownBits::shl(KnownVal, KnownAmt);
- // If this shift has "nsw" keyword, then the result is either a poison
- // value or has the same sign bit as the first operand.
- if (NSW) {
- if (KnownVal.Zero.isSignBitSet())
- Result.Zero.setSignBit();
- if (KnownVal.One.isSignBitSet())
- Result.One.setSignBit();
- }
- return Result;
+ auto KF = [NUW, NSW](const KnownBits &KnownVal, const KnownBits &KnownAmt,
+ bool ShAmtNonZero) {
+ return KnownBits::shl(KnownVal, KnownAmt, NUW, NSW, ShAmtNonZero);
};
computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
KF);
// Trailing zeros of a right-shifted constant never decrease.
const APInt *C;
if (match(I->getOperand(0), m_APInt(C)))
- Known.Zero.setLowBits(C->countTrailingZeros());
+ Known.Zero.setLowBits(C->countr_zero());
break;
}
case Instruction::LShr: {
- auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
- return KnownBits::lshr(KnownVal, KnownAmt);
+ auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt,
+ bool ShAmtNonZero) {
+ return KnownBits::lshr(KnownVal, KnownAmt, ShAmtNonZero);
};
computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
KF);
// Leading zeros of a left-shifted constant never decrease.
const APInt *C;
if (match(I->getOperand(0), m_APInt(C)))
- Known.Zero.setHighBits(C->countLeadingZeros());
+ Known.Zero.setHighBits(C->countl_zero());
break;
}
case Instruction::AShr: {
- auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt) {
- return KnownBits::ashr(KnownVal, KnownAmt);
+ auto KF = [](const KnownBits &KnownVal, const KnownBits &KnownAmt,
+ bool ShAmtNonZero) {
+ return KnownBits::ashr(KnownVal, KnownAmt, ShAmtNonZero);
};
computeKnownBitsFromShiftOperator(I, DemandedElts, Known, Known2, Depth, Q,
KF);
@@ -1376,7 +1293,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
if (IndexTypeSize.isScalable()) {
// For scalable types the only thing we know about sizeof is
// that this is a multiple of the minimum size.
- ScalingFactor.Zero.setLowBits(countTrailingZeros(TypeSizeInBytes));
+ ScalingFactor.Zero.setLowBits(llvm::countr_zero(TypeSizeInBytes));
} else if (IndexBits.isConstant()) {
APInt IndexConst = IndexBits.getConstant();
APInt ScalingFactor(IndexBitWidth, TypeSizeInBytes);
@@ -1431,7 +1348,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// inferred hold at original context instruction. TODO: It may be
// correct to use the original context. IF warranted, explore and
// add sufficient tests to cover.
- Query RecQ = Q;
+ SimplifyQuery RecQ = Q;
RecQ.CxtI = P;
computeKnownBits(R, DemandedElts, Known2, Depth + 1, RecQ);
switch (Opcode) {
@@ -1464,7 +1381,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// phi. This is important because that is where the value is actually
// "evaluated" even though it is used later somewhere else. (see also
// D69571).
- Query RecQ = Q;
+ SimplifyQuery RecQ = Q;
unsigned OpNum = P->getOperand(0) == R ? 0 : 1;
Instruction *RInst = P->getIncomingBlock(OpNum)->getTerminator();
@@ -1526,7 +1443,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// Otherwise take the unions of the known bit sets of the operands,
// taking conservative care to avoid excessive recursion.
- if (Depth < MaxAnalysisRecursionDepth - 1 && !Known.Zero && !Known.One) {
+ if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) {
// Skip if every incoming value references to ourself.
if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
break;
@@ -1542,7 +1459,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// phi. This is important because that is where the value is actually
// "evaluated" even though it is used later somewhere else. (see also
// D69571).
- Query RecQ = Q;
+ SimplifyQuery RecQ = Q;
RecQ.CxtI = P->getIncomingBlock(u)->getTerminator();
Known2 = KnownBits(BitWidth);
@@ -1572,10 +1489,10 @@ static void computeKnownBitsFromOperator(const Operator *I,
Known2 = KnownBits::makeConstant(*RHSC);
break;
case CmpInst::Predicate::ICMP_ULE:
- Known2.Zero.setHighBits(RHSC->countLeadingZeros());
+ Known2.Zero.setHighBits(RHSC->countl_zero());
break;
case CmpInst::Predicate::ICMP_ULT:
- Known2.Zero.setHighBits((*RHSC - 1).countLeadingZeros());
+ Known2.Zero.setHighBits((*RHSC - 1).countl_zero());
break;
default:
// TODO - add additional integer predicate handling.
@@ -1585,7 +1502,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
}
}
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
// If all bits have been ruled out, there's no need to check
// more operands.
if (Known.isUnknown())
@@ -1604,8 +1521,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
computeKnownBitsFromRangeMetadata(*MD, Known);
if (const Value *RV = cast<CallBase>(I)->getReturnedArgOperand()) {
computeKnownBits(RV, Known2, Depth + 1, Q);
- Known.Zero |= Known2.Zero;
- Known.One |= Known2.One;
+ Known = Known.unionWith(Known2);
}
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
@@ -1681,36 +1597,25 @@ static void computeKnownBitsFromOperator(const Operator *I,
break;
}
case Intrinsic::uadd_sat:
- case Intrinsic::usub_sat: {
- bool IsAdd = II->getIntrinsicID() == Intrinsic::uadd_sat;
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
-
- // Add: Leading ones of either operand are preserved.
- // Sub: Leading zeros of LHS and leading ones of RHS are preserved
- // as leading zeros in the result.
- unsigned LeadingKnown;
- if (IsAdd)
- LeadingKnown = std::max(Known.countMinLeadingOnes(),
- Known2.countMinLeadingOnes());
- else
- LeadingKnown = std::max(Known.countMinLeadingZeros(),
- Known2.countMinLeadingOnes());
-
- Known = KnownBits::computeForAddSub(
- IsAdd, /* NSW */ false, Known, Known2);
-
- // We select between the operation result and all-ones/zero
- // respectively, so we can preserve known ones/zeros.
- if (IsAdd) {
- Known.One.setHighBits(LeadingKnown);
- Known.Zero.clearAllBits();
- } else {
- Known.Zero.setHighBits(LeadingKnown);
- Known.One.clearAllBits();
- }
+ Known = KnownBits::uadd_sat(Known, Known2);
+ break;
+ case Intrinsic::usub_sat:
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+ Known = KnownBits::usub_sat(Known, Known2);
+ break;
+ case Intrinsic::sadd_sat:
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+ Known = KnownBits::sadd_sat(Known, Known2);
+ break;
+ case Intrinsic::ssub_sat:
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+ computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
+ Known = KnownBits::ssub_sat(Known, Known2);
break;
- }
case Intrinsic::umin:
computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
@@ -1731,42 +1636,31 @@ static void computeKnownBitsFromOperator(const Operator *I,
computeKnownBits(I->getOperand(1), Known2, Depth + 1, Q);
Known = KnownBits::smax(Known, Known2);
break;
+ case Intrinsic::ptrmask: {
+ computeKnownBits(I->getOperand(0), Known, Depth + 1, Q);
+
+ const Value *Mask = I->getOperand(1);
+ Known2 = KnownBits(Mask->getType()->getScalarSizeInBits());
+ computeKnownBits(Mask, Known2, Depth + 1, Q);
+ // This is basically a pointer typed and.
+ Known &= Known2.zextOrTrunc(Known.getBitWidth());
+ break;
+ }
case Intrinsic::x86_sse42_crc32_64_64:
Known.Zero.setBitsFrom(32);
break;
case Intrinsic::riscv_vsetvli:
case Intrinsic::riscv_vsetvlimax:
- // Assume that VL output is positive and would fit in an int32_t.
- // TODO: VLEN might be capped at 16 bits in a future V spec update.
- if (BitWidth >= 32)
- Known.Zero.setBitsFrom(31);
+ // Assume that VL output is >= 65536.
+ // TODO: Take SEW and LMUL into account.
+ if (BitWidth > 17)
+ Known.Zero.setBitsFrom(17);
break;
case Intrinsic::vscale: {
- if (!II->getParent() || !II->getFunction() ||
- !II->getFunction()->hasFnAttribute(Attribute::VScaleRange))
+ if (!II->getParent() || !II->getFunction())
break;
- auto Attr = II->getFunction()->getFnAttribute(Attribute::VScaleRange);
- std::optional<unsigned> VScaleMax = Attr.getVScaleRangeMax();
-
- if (!VScaleMax)
- break;
-
- unsigned VScaleMin = Attr.getVScaleRangeMin();
-
- // If vscale min = max then we know the exact value at compile time
- // and hence we know the exact bits.
- if (VScaleMin == VScaleMax) {
- Known.One = VScaleMin;
- Known.Zero = VScaleMin;
- Known.Zero.flipAllBits();
- break;
- }
-
- unsigned FirstZeroHighBit = llvm::bit_width(*VScaleMax);
- if (FirstZeroHighBit < BitWidth)
- Known.Zero.setBitsFrom(FirstZeroHighBit);
-
+ Known = getVScaleRange(II->getFunction(), BitWidth).toKnownBits();
break;
}
}
@@ -1798,7 +1692,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
if (!!DemandedRHS) {
const Value *RHS = Shuf->getOperand(1);
computeKnownBits(RHS, DemandedRHS, Known2, Depth + 1, Q);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
break;
}
@@ -1831,7 +1725,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
DemandedVecElts.clearBit(EltIdx);
if (!!DemandedVecElts) {
computeKnownBits(Vec, DemandedVecElts, Known2, Depth + 1, Q);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
break;
}
@@ -1892,7 +1786,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
/// Determine which bits of V are known to be either zero or one and return
/// them.
KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
- unsigned Depth, const Query &Q) {
+ unsigned Depth, const SimplifyQuery &Q) {
KnownBits Known(getBitWidth(V->getType(), Q.DL));
computeKnownBits(V, DemandedElts, Known, Depth, Q);
return Known;
@@ -1900,7 +1794,8 @@ KnownBits computeKnownBits(const Value *V, const APInt &DemandedElts,
/// Determine which bits of V are known to be either zero or one and return
/// them.
-KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) {
+KnownBits computeKnownBits(const Value *V, unsigned Depth,
+ const SimplifyQuery &Q) {
KnownBits Known(getBitWidth(V->getType(), Q.DL));
computeKnownBits(V, Known, Depth, Q);
return Known;
@@ -1922,7 +1817,8 @@ KnownBits computeKnownBits(const Value *V, unsigned Depth, const Query &Q) {
/// same width as the vector element, and the bit is set only if it is true
/// for all of the demanded elements in the vector specified by DemandedElts.
void computeKnownBits(const Value *V, const APInt &DemandedElts,
- KnownBits &Known, unsigned Depth, const Query &Q) {
+ KnownBits &Known, unsigned Depth,
+ const SimplifyQuery &Q) {
if (!DemandedElts) {
// No demanded elts, better to assume we don't know anything.
Known.resetAll();
@@ -2032,6 +1928,10 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
if (const Operator *I = dyn_cast<Operator>(V))
computeKnownBitsFromOperator(I, DemandedElts, Known, Depth, Q);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (std::optional<ConstantRange> CR = GV->getAbsoluteSymbolRange())
+ Known = CR->toKnownBits();
+ }
// Aligned pointers have trailing zeros - refine Known.Zero set
if (isa<PointerType>(V->getType())) {
@@ -2051,7 +1951,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
/// Try to detect a recurrence that the value of the induction variable is
/// always a power of two (or zero).
static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
- unsigned Depth, Query &Q) {
+ unsigned Depth, SimplifyQuery &Q) {
BinaryOperator *BO = nullptr;
Value *Start = nullptr, *Step = nullptr;
if (!matchSimpleRecurrence(PN, BO, Start, Step))
@@ -2110,7 +2010,7 @@ static bool isPowerOfTwoRecurrence(const PHINode *PN, bool OrZero,
/// be a power of two when defined. Supports values with integer or pointer
/// types and vectors of integers.
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
- const Query &Q) {
+ const SimplifyQuery &Q) {
assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
// Attempt to match against constants.
@@ -2118,6 +2018,11 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
return true;
if (match(V, m_Power2()))
return true;
+ if (Q.CxtI && match(V, m_VScale())) {
+ const Function *F = Q.CxtI->getFunction();
+ // The vscale_range indicates vscale is a power-of-two.
+ return F->hasFnAttribute(Attribute::VScaleRange);
+ }
// 1 << X is clearly a power of two if the one is not shifted off the end. If
// it is shifted off the end then the result is undefined.
@@ -2199,7 +2104,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
// A PHI node is power of two if all incoming values are power of two, or if
// it is an induction variable where in each step its value is a power of two.
if (const PHINode *PN = dyn_cast<PHINode>(V)) {
- Query RecQ = Q;
+ SimplifyQuery RecQ = Q;
// Check if it is an induction variable and always power of two.
if (isPowerOfTwoRecurrence(PN, OrZero, Depth, RecQ))
@@ -2239,7 +2144,7 @@ bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
///
/// Currently this routine does not support vector GEPs.
static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
- const Query &Q) {
+ const SimplifyQuery &Q) {
const Function *F = nullptr;
if (const Instruction *I = dyn_cast<Instruction>(GEP))
F = I->getFunction();
@@ -2302,8 +2207,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
static bool isKnownNonNullFromDominatingCondition(const Value *V,
const Instruction *CtxI,
const DominatorTree *DT) {
- if (isa<Constant>(V))
- return false;
+ assert(!isa<Constant>(V) && "Called for constant?");
if (!CtxI || !DT)
return false;
@@ -2437,131 +2341,156 @@ static bool isNonZeroRecurrence(const PHINode *PN) {
}
}
-/// Return true if the given value is known to be non-zero when defined. For
-/// vectors, return true if every demanded element is known to be non-zero when
-/// defined. For pointers, if the context instruction and dominator tree are
-/// specified, perform context-sensitive analysis and return true if the
-/// pointer couldn't possibly be null at the specified instruction.
-/// Supports values with integer or pointer type and vectors of integers.
-bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
- const Query &Q) {
+static bool isNonZeroAdd(const APInt &DemandedElts, unsigned Depth,
+ const SimplifyQuery &Q, unsigned BitWidth, Value *X,
+ Value *Y, bool NSW) {
+ KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
+ KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
-#ifndef NDEBUG
- Type *Ty = V->getType();
- assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
-
- if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
- assert(
- FVTy->getNumElements() == DemandedElts.getBitWidth() &&
- "DemandedElt width should equal the fixed vector number of elements");
- } else {
- assert(DemandedElts == APInt(1, 1) &&
- "DemandedElt width should be 1 for scalars");
- }
-#endif
-
- if (auto *C = dyn_cast<Constant>(V)) {
- if (C->isNullValue())
- return false;
- if (isa<ConstantInt>(C))
- // Must be non-zero due to null test above.
+ // If X and Y are both non-negative (as signed values) then their sum is not
+ // zero unless both X and Y are zero.
+ if (XKnown.isNonNegative() && YKnown.isNonNegative())
+ if (isKnownNonZero(Y, DemandedElts, Depth, Q) ||
+ isKnownNonZero(X, DemandedElts, Depth, Q))
return true;
- // For constant vectors, check that all elements are undefined or known
- // non-zero to determine that the whole vector is known non-zero.
- if (auto *VecTy = dyn_cast<FixedVectorType>(C->getType())) {
- for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
- if (!DemandedElts[i])
- continue;
- Constant *Elt = C->getAggregateElement(i);
- if (!Elt || Elt->isNullValue())
- return false;
- if (!isa<UndefValue>(Elt) && !isa<ConstantInt>(Elt))
- return false;
- }
+ // If X and Y are both negative (as signed values) then their sum is not
+ // zero unless both X and Y equal INT_MIN.
+ if (XKnown.isNegative() && YKnown.isNegative()) {
+ APInt Mask = APInt::getSignedMaxValue(BitWidth);
+ // The sign bit of X is set. If some other bit is set then X is not equal
+ // to INT_MIN.
+ if (XKnown.One.intersects(Mask))
+ return true;
+ // The sign bit of Y is set. If some other bit is set then Y is not equal
+ // to INT_MIN.
+ if (YKnown.One.intersects(Mask))
return true;
- }
-
- // A global variable in address space 0 is non null unless extern weak
- // or an absolute symbol reference. Other address spaces may have null as a
- // valid address for a global, so we can't assume anything.
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
- GV->getType()->getAddressSpace() == 0)
- return true;
- }
-
- // For constant expressions, fall through to the Operator code below.
- if (!isa<ConstantExpr>(V))
- return false;
- }
-
- if (auto *I = dyn_cast<Instruction>(V)) {
- if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) {
- // If the possible ranges don't contain zero, then the value is
- // definitely non-zero.
- if (auto *Ty = dyn_cast<IntegerType>(V->getType())) {
- const APInt ZeroValue(Ty->getBitWidth(), 0);
- if (rangeMetadataExcludesValue(Ranges, ZeroValue))
- return true;
- }
- }
}
- if (!isa<Constant>(V) && isKnownNonZeroFromAssume(V, Q))
+ // The sum of a non-negative number and a power of two is not zero.
+ if (XKnown.isNonNegative() &&
+ isKnownToBeAPowerOfTwo(Y, /*OrZero*/ false, Depth, Q))
+ return true;
+ if (YKnown.isNonNegative() &&
+ isKnownToBeAPowerOfTwo(X, /*OrZero*/ false, Depth, Q))
return true;
- // Some of the tests below are recursive, so bail out if we hit the limit.
- if (Depth++ >= MaxAnalysisRecursionDepth)
- return false;
-
- // Check for pointer simplifications.
+ return KnownBits::computeForAddSub(/*Add*/ true, NSW, XKnown, YKnown)
+ .isNonZero();
+}
- if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) {
- // Alloca never returns null, malloc might.
- if (isa<AllocaInst>(V) && Q.DL.getAllocaAddrSpace() == 0)
+static bool isNonZeroSub(const APInt &DemandedElts, unsigned Depth,
+ const SimplifyQuery &Q, unsigned BitWidth, Value *X,
+ Value *Y) {
+ if (auto *C = dyn_cast<Constant>(X))
+ if (C->isNullValue() && isKnownNonZero(Y, DemandedElts, Depth, Q))
return true;
- // A byval, inalloca may not be null in a non-default addres space. A
- // nonnull argument is assumed never 0.
- if (const Argument *A = dyn_cast<Argument>(V)) {
- if (((A->hasPassPointeeByValueCopyAttr() &&
- !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) ||
- A->hasNonNullAttr()))
- return true;
+ KnownBits XKnown = computeKnownBits(X, DemandedElts, Depth, Q);
+ if (XKnown.isUnknown())
+ return false;
+ KnownBits YKnown = computeKnownBits(Y, DemandedElts, Depth, Q);
+ // If X != Y then X - Y is non zero.
+ std::optional<bool> ne = KnownBits::ne(XKnown, YKnown);
+ // If we are unable to compute if X != Y, we won't be able to do anything
+ // computing the knownbits of the sub expression so just return here.
+ return ne && *ne;
+}
+
+static bool isNonZeroShift(const Operator *I, const APInt &DemandedElts,
+ unsigned Depth, const SimplifyQuery &Q,
+ const KnownBits &KnownVal) {
+ auto ShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
+ switch (I->getOpcode()) {
+ case Instruction::Shl:
+ return Lhs.shl(Rhs);
+ case Instruction::LShr:
+ return Lhs.lshr(Rhs);
+ case Instruction::AShr:
+ return Lhs.ashr(Rhs);
+ default:
+ llvm_unreachable("Unknown Shift Opcode");
}
+ };
- // A Load tagged with nonnull metadata is never null.
- if (const LoadInst *LI = dyn_cast<LoadInst>(V))
- if (Q.IIQ.getMetadata(LI, LLVMContext::MD_nonnull))
- return true;
-
- if (const auto *Call = dyn_cast<CallBase>(V)) {
- if (Call->isReturnNonNull())
- return true;
- if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
- return isKnownNonZero(RP, Depth, Q);
+ auto InvShiftOp = [&](const APInt &Lhs, const APInt &Rhs) {
+ switch (I->getOpcode()) {
+ case Instruction::Shl:
+ return Lhs.lshr(Rhs);
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return Lhs.shl(Rhs);
+ default:
+ llvm_unreachable("Unknown Shift Opcode");
}
- }
+ };
- if (!isa<Constant>(V) &&
- isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
- return true;
+ if (KnownVal.isUnknown())
+ return false;
- const Operator *I = dyn_cast<Operator>(V);
- if (!I)
+ KnownBits KnownCnt =
+ computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
+ APInt MaxShift = KnownCnt.getMaxValue();
+ unsigned NumBits = KnownVal.getBitWidth();
+ if (MaxShift.uge(NumBits))
return false;
- unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), Q.DL);
+ if (!ShiftOp(KnownVal.One, MaxShift).isZero())
+ return true;
+
+ // If all of the bits shifted out are known to be zero, and Val is known
+ // non-zero then at least one non-zero bit must remain.
+ if (InvShiftOp(KnownVal.Zero, NumBits - MaxShift)
+ .eq(InvShiftOp(APInt::getAllOnes(NumBits), NumBits - MaxShift)) &&
+ isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q))
+ return true;
+
+ return false;
+}
+
+static bool isKnownNonZeroFromOperator(const Operator *I,
+ const APInt &DemandedElts,
+ unsigned Depth, const SimplifyQuery &Q) {
+ unsigned BitWidth = getBitWidth(I->getType()->getScalarType(), Q.DL);
switch (I->getOpcode()) {
case Instruction::GetElementPtr:
if (I->getType()->isPointerTy())
return isGEPKnownNonNull(cast<GEPOperator>(I), Depth, Q);
break;
- case Instruction::BitCast:
- if (I->getType()->isPointerTy())
+ case Instruction::BitCast: {
+ // We need to be a bit careful here. We can only peek through the bitcast
+ // if the scalar size of elements in the operand are smaller than and a
+ // multiple of the size they are casting too. Take three cases:
+ //
+ // 1) Unsafe:
+ // bitcast <2 x i16> %NonZero to <4 x i8>
+ //
+ // %NonZero can have 2 non-zero i16 elements, but isKnownNonZero on a
+ // <4 x i8> requires that all 4 i8 elements be non-zero which isn't
+ // guranteed (imagine just sign bit set in the 2 i16 elements).
+ //
+ // 2) Unsafe:
+ // bitcast <4 x i3> %NonZero to <3 x i4>
+ //
+ // Even though the scalar size of the src (`i3`) is smaller than the
+ // scalar size of the dst `i4`, because `i3` is not a multiple of `i4`
+ // its possible for the `3 x i4` elements to be zero because there are
+ // some elements in the destination that don't contain any full src
+ // element.
+ //
+ // 3) Safe:
+ // bitcast <4 x i8> %NonZero to <2 x i16>
+ //
+ // This is always safe as non-zero in the 4 i8 elements implies
+ // non-zero in the combination of any two adjacent ones. Since i8 is a
+ // multiple of i16, each i16 is guranteed to have 2 full i8 elements.
+ // This all implies the 2 i16 elements are non-zero.
+ Type *FromTy = I->getOperand(0)->getType();
+ if ((FromTy->isIntOrIntVectorTy() || FromTy->isPtrOrPtrVectorTy()) &&
+ (BitWidth % getBitWidth(FromTy->getScalarType(), Q.DL)) == 0)
return isKnownNonZero(I->getOperand(0), Depth, Q);
- break;
+ } break;
case Instruction::IntToPtr:
// Note that we have to take special care to avoid looking through
// truncating casts, e.g., int2ptr/ptr2int with appropriate sizes, as well
@@ -2579,19 +2508,22 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
Q.DL.getTypeSizeInBits(I->getType()).getFixedValue())
return isKnownNonZero(I->getOperand(0), Depth, Q);
break;
+ case Instruction::Sub:
+ return isNonZeroSub(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
+ I->getOperand(1));
case Instruction::Or:
// X | Y != 0 if X != 0 or Y != 0.
- return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q) ||
- isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q);
+ return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q) ||
+ isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q);
case Instruction::SExt:
case Instruction::ZExt:
// ext X != 0 if X != 0.
return isKnownNonZero(I->getOperand(0), Depth, Q);
case Instruction::Shl: {
- // shl nuw can't remove any non-zero bits.
- const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
- if (Q.IIQ.hasNoUnsignedWrap(BO))
+ // shl nsw/nuw can't remove any non-zero bits.
+ const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
+ if (Q.IIQ.hasNoUnsignedWrap(BO) || Q.IIQ.hasNoSignedWrap(BO))
return isKnownNonZero(I->getOperand(0), Depth, Q);
// shl X, Y != 0 if X is odd. Note that the value of the shift is undefined
@@ -2600,12 +2532,13 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
computeKnownBits(I->getOperand(0), DemandedElts, Known, Depth, Q);
if (Known.One[0])
return true;
- break;
+
+ return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
}
case Instruction::LShr:
case Instruction::AShr: {
// shr exact can only shift out zero bits.
- const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
+ const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(I);
if (BO->isExact())
return isKnownNonZero(I->getOperand(0), Depth, Q);
@@ -2616,86 +2549,110 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
if (Known.isNegative())
return true;
- // If the shifter operand is a constant, and all of the bits shifted
- // out are known to be zero, and X is known non-zero then at least one
- // non-zero bit must remain.
- if (ConstantInt *Shift = dyn_cast<ConstantInt>(I->getOperand(1))) {
- auto ShiftVal = Shift->getLimitedValue(BitWidth - 1);
- // Is there a known one in the portion not shifted out?
- if (Known.countMaxLeadingZeros() < BitWidth - ShiftVal)
- return true;
- // Are all the bits to be shifted out known zero?
- if (Known.countMinTrailingZeros() >= ShiftVal)
- return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q);
- }
- break;
+ return isNonZeroShift(I, DemandedElts, Depth, Q, Known);
}
case Instruction::UDiv:
case Instruction::SDiv:
+ // X / Y
// div exact can only produce a zero if the dividend is zero.
if (cast<PossiblyExactOperator>(I)->isExact())
return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q);
+ if (I->getOpcode() == Instruction::UDiv) {
+ std::optional<bool> XUgeY;
+ KnownBits XKnown =
+ computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
+ if (!XKnown.isUnknown()) {
+ KnownBits YKnown =
+ computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
+ // If X u>= Y then div is non zero (0/0 is UB).
+ XUgeY = KnownBits::uge(XKnown, YKnown);
+ }
+ // If X is total unknown or X u< Y we won't be able to prove non-zero
+ // with compute known bits so just return early.
+ return XUgeY && *XUgeY;
+ }
break;
case Instruction::Add: {
// X + Y.
+
+ // If Add has nuw wrap flag, then if either X or Y is non-zero the result is
+ // non-zero.
+ auto *BO = cast<OverflowingBinaryOperator>(I);
+ if (Q.IIQ.hasNoUnsignedWrap(BO))
+ return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q) ||
+ isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q);
+
+ return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth, I->getOperand(0),
+ I->getOperand(1), Q.IIQ.hasNoSignedWrap(BO));
+ }
+ case Instruction::Mul: {
+ // If X and Y are non-zero then so is X * Y as long as the multiplication
+ // does not overflow.
+ const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(I);
+ if (Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO))
+ return isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q) &&
+ isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q);
+
+ // If either X or Y is odd, then if the other is non-zero the result can't
+ // be zero.
KnownBits XKnown =
computeKnownBits(I->getOperand(0), DemandedElts, Depth, Q);
+ if (XKnown.One[0])
+ return isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q);
+
KnownBits YKnown =
computeKnownBits(I->getOperand(1), DemandedElts, Depth, Q);
+ if (YKnown.One[0])
+ return XKnown.isNonZero() ||
+ isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q);
+
+ // If there exists any subset of X (sX) and subset of Y (sY) s.t sX * sY is
+ // non-zero, then X * Y is non-zero. We can find sX and sY by just taking
+ // the lowest known One of X and Y. If they are non-zero, the result
+ // must be non-zero. We can check if LSB(X) * LSB(Y) != 0 by doing
+ // X.CountLeadingZeros + Y.CountLeadingZeros < BitWidth.
+ return (XKnown.countMaxTrailingZeros() + YKnown.countMaxTrailingZeros()) <
+ BitWidth;
+ }
+ case Instruction::Select: {
+ // (C ? X : Y) != 0 if X != 0 and Y != 0.
- // If X and Y are both non-negative (as signed values) then their sum is not
- // zero unless both X and Y are zero.
- if (XKnown.isNonNegative() && YKnown.isNonNegative())
- if (isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q) ||
- isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q))
+ // First check if the arm is non-zero using `isKnownNonZero`. If that fails,
+ // then see if the select condition implies the arm is non-zero. For example
+ // (X != 0 ? X : Y), we know the true arm is non-zero as the `X` "return" is
+ // dominated by `X != 0`.
+ auto SelectArmIsNonZero = [&](bool IsTrueArm) {
+ Value *Op;
+ Op = IsTrueArm ? I->getOperand(1) : I->getOperand(2);
+ // Op is trivially non-zero.
+ if (isKnownNonZero(Op, DemandedElts, Depth, Q))
return true;
- // If X and Y are both negative (as signed values) then their sum is not
- // zero unless both X and Y equal INT_MIN.
- if (XKnown.isNegative() && YKnown.isNegative()) {
- APInt Mask = APInt::getSignedMaxValue(BitWidth);
- // The sign bit of X is set. If some other bit is set then X is not equal
- // to INT_MIN.
- if (XKnown.One.intersects(Mask))
- return true;
- // The sign bit of Y is set. If some other bit is set then Y is not equal
- // to INT_MIN.
- if (YKnown.One.intersects(Mask))
- return true;
- }
+ // The condition of the select dominates the true/false arm. Check if the
+ // condition implies that a given arm is non-zero.
+ Value *X;
+ CmpInst::Predicate Pred;
+ if (!match(I->getOperand(0), m_c_ICmp(Pred, m_Specific(Op), m_Value(X))))
+ return false;
- // The sum of a non-negative number and a power of two is not zero.
- if (XKnown.isNonNegative() &&
- isKnownToBeAPowerOfTwo(I->getOperand(1), /*OrZero*/ false, Depth, Q))
- return true;
- if (YKnown.isNonNegative() &&
- isKnownToBeAPowerOfTwo(I->getOperand(0), /*OrZero*/ false, Depth, Q))
- return true;
- break;
- }
- case Instruction::Mul: {
- // If X and Y are non-zero then so is X * Y as long as the multiplication
- // does not overflow.
- const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
- if ((Q.IIQ.hasNoSignedWrap(BO) || Q.IIQ.hasNoUnsignedWrap(BO)) &&
- isKnownNonZero(I->getOperand(0), DemandedElts, Depth, Q) &&
- isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q))
+ if (!IsTrueArm)
+ Pred = ICmpInst::getInversePredicate(Pred);
+
+ return cmpExcludesZero(Pred, X);
+ };
+
+ if (SelectArmIsNonZero(/* IsTrueArm */ true) &&
+ SelectArmIsNonZero(/* IsTrueArm */ false))
return true;
break;
}
- case Instruction::Select:
- // (C ? X : Y) != 0 if X != 0 and Y != 0.
- if (isKnownNonZero(I->getOperand(1), DemandedElts, Depth, Q) &&
- isKnownNonZero(I->getOperand(2), DemandedElts, Depth, Q))
- return true;
- break;
case Instruction::PHI: {
auto *PN = cast<PHINode>(I);
if (Q.IIQ.UseInstrInfo && isNonZeroRecurrence(PN))
return true;
// Check if all incoming values are non-zero using recursion.
- Query RecQ = Q;
+ SimplifyQuery RecQ = Q;
unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
return llvm::all_of(PN->operands(), [&](const Use &U) {
if (U.get() == PN)
@@ -2705,7 +2662,7 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
});
}
case Instruction::ExtractElement:
- if (const auto *EEI = dyn_cast<ExtractElementInst>(V)) {
+ if (const auto *EEI = dyn_cast<ExtractElementInst>(I)) {
const Value *Vec = EEI->getVectorOperand();
const Value *Idx = EEI->getIndexOperand();
auto *CIdx = dyn_cast<ConstantInt>(Idx);
@@ -2722,18 +2679,198 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
return isKnownNonZero(I->getOperand(0), Depth, Q) &&
isGuaranteedNotToBePoison(I->getOperand(0), Q.AC, Q.CxtI, Q.DT,
Depth);
- case Instruction::Call:
- if (cast<CallInst>(I)->getIntrinsicID() == Intrinsic::vscale)
+ case Instruction::Load:
+ // A Load tagged with nonnull metadata is never null.
+ if (Q.IIQ.getMetadata(cast<LoadInst>(I), LLVMContext::MD_nonnull))
return true;
+
+ // No need to fall through to computeKnownBits as range metadata is already
+ // handled in isKnownNonZero.
+ return false;
+ case Instruction::Call:
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::sshl_sat:
+ case Intrinsic::ushl_sat:
+ case Intrinsic::abs:
+ case Intrinsic::bitreverse:
+ case Intrinsic::bswap:
+ case Intrinsic::ctpop:
+ return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q);
+ case Intrinsic::ssub_sat:
+ return isNonZeroSub(DemandedElts, Depth, Q, BitWidth,
+ II->getArgOperand(0), II->getArgOperand(1));
+ case Intrinsic::sadd_sat:
+ return isNonZeroAdd(DemandedElts, Depth, Q, BitWidth,
+ II->getArgOperand(0), II->getArgOperand(1),
+ /*NSW*/ true);
+ case Intrinsic::umax:
+ case Intrinsic::uadd_sat:
+ return isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q) ||
+ isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q);
+ case Intrinsic::smin:
+ case Intrinsic::smax: {
+ auto KnownOpImpliesNonZero = [&](const KnownBits &K) {
+ return II->getIntrinsicID() == Intrinsic::smin
+ ? K.isNegative()
+ : K.isStrictlyPositive();
+ };
+ KnownBits XKnown =
+ computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q);
+ if (KnownOpImpliesNonZero(XKnown))
+ return true;
+ KnownBits YKnown =
+ computeKnownBits(II->getArgOperand(1), DemandedElts, Depth, Q);
+ if (KnownOpImpliesNonZero(YKnown))
+ return true;
+
+ if (XKnown.isNonZero() && YKnown.isNonZero())
+ return true;
+ }
+ [[fallthrough]];
+ case Intrinsic::umin:
+ return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q) &&
+ isKnownNonZero(II->getArgOperand(1), DemandedElts, Depth, Q);
+ case Intrinsic::cttz:
+ return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
+ .Zero[0];
+ case Intrinsic::ctlz:
+ return computeKnownBits(II->getArgOperand(0), DemandedElts, Depth, Q)
+ .isNonNegative();
+ case Intrinsic::fshr:
+ case Intrinsic::fshl:
+ // If Op0 == Op1, this is a rotate. rotate(x, y) != 0 iff x != 0.
+ if (II->getArgOperand(0) == II->getArgOperand(1))
+ return isKnownNonZero(II->getArgOperand(0), DemandedElts, Depth, Q);
+ break;
+ case Intrinsic::vscale:
+ return true;
+ default:
+ break;
+ }
+ }
break;
}
KnownBits Known(BitWidth);
- computeKnownBits(V, DemandedElts, Known, Depth, Q);
+ computeKnownBits(I, DemandedElts, Known, Depth, Q);
return Known.One != 0;
}
-bool isKnownNonZero(const Value* V, unsigned Depth, const Query& Q) {
+/// Return true if the given value is known to be non-zero when defined. For
+/// vectors, return true if every demanded element is known to be non-zero when
+/// defined. For pointers, if the context instruction and dominator tree are
+/// specified, perform context-sensitive analysis and return true if the
+/// pointer couldn't possibly be null at the specified instruction.
+/// Supports values with integer or pointer type and vectors of integers.
+bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
+ const SimplifyQuery &Q) {
+
+#ifndef NDEBUG
+ Type *Ty = V->getType();
+ assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+
+ if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
+ assert(
+ FVTy->getNumElements() == DemandedElts.getBitWidth() &&
+ "DemandedElt width should equal the fixed vector number of elements");
+ } else {
+ assert(DemandedElts == APInt(1, 1) &&
+ "DemandedElt width should be 1 for scalars");
+ }
+#endif
+
+ if (auto *C = dyn_cast<Constant>(V)) {
+ if (C->isNullValue())
+ return false;
+ if (isa<ConstantInt>(C))
+ // Must be non-zero due to null test above.
+ return true;
+
+ // For constant vectors, check that all elements are undefined or known
+ // non-zero to determine that the whole vector is known non-zero.
+ if (auto *VecTy = dyn_cast<FixedVectorType>(C->getType())) {
+ for (unsigned i = 0, e = VecTy->getNumElements(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ Constant *Elt = C->getAggregateElement(i);
+ if (!Elt || Elt->isNullValue())
+ return false;
+ if (!isa<UndefValue>(Elt) && !isa<ConstantInt>(Elt))
+ return false;
+ }
+ return true;
+ }
+
+ // A global variable in address space 0 is non null unless extern weak
+ // or an absolute symbol reference. Other address spaces may have null as a
+ // valid address for a global, so we can't assume anything.
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
+ if (!GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
+ GV->getType()->getAddressSpace() == 0)
+ return true;
+ }
+
+ // For constant expressions, fall through to the Operator code below.
+ if (!isa<ConstantExpr>(V))
+ return false;
+ }
+
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ if (MDNode *Ranges = Q.IIQ.getMetadata(I, LLVMContext::MD_range)) {
+ // If the possible ranges don't contain zero, then the value is
+ // definitely non-zero.
+ if (auto *Ty = dyn_cast<IntegerType>(V->getType())) {
+ const APInt ZeroValue(Ty->getBitWidth(), 0);
+ if (rangeMetadataExcludesValue(Ranges, ZeroValue))
+ return true;
+ }
+ }
+ }
+
+ if (!isa<Constant>(V) && isKnownNonZeroFromAssume(V, Q))
+ return true;
+
+ // Some of the tests below are recursive, so bail out if we hit the limit.
+ if (Depth++ >= MaxAnalysisRecursionDepth)
+ return false;
+
+ // Check for pointer simplifications.
+
+ if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) {
+ // Alloca never returns null, malloc might.
+ if (isa<AllocaInst>(V) && PtrTy->getAddressSpace() == 0)
+ return true;
+
+ // A byval, inalloca may not be null in a non-default addres space. A
+ // nonnull argument is assumed never 0.
+ if (const Argument *A = dyn_cast<Argument>(V)) {
+ if (((A->hasPassPointeeByValueCopyAttr() &&
+ !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) ||
+ A->hasNonNullAttr()))
+ return true;
+ }
+
+ if (const auto *Call = dyn_cast<CallBase>(V)) {
+ if (Call->isReturnNonNull())
+ return true;
+ if (const auto *RP = getArgumentAliasingToReturnedPointer(Call, true))
+ return isKnownNonZero(RP, Depth, Q);
+ }
+ }
+
+ if (const auto *I = dyn_cast<Operator>(V))
+ if (isKnownNonZeroFromOperator(I, DemandedElts, Depth, Q))
+ return true;
+
+ if (!isa<Constant>(V) &&
+ isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT))
+ return true;
+
+ return false;
+}
+
+bool isKnownNonZero(const Value *V, unsigned Depth, const SimplifyQuery &Q) {
auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
APInt DemandedElts =
FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
@@ -2849,7 +2986,7 @@ getInvertibleOperands(const Operator *Op1,
/// Return true if V2 == V1 + X, where X is known non-zero.
static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth,
- const Query &Q) {
+ const SimplifyQuery &Q) {
const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
if (!BO || BO->getOpcode() != Instruction::Add)
return false;
@@ -2866,7 +3003,7 @@ static bool isAddOfNonZero(const Value *V1, const Value *V2, unsigned Depth,
/// Return true if V2 == V1 * C, where V1 is known non-zero, C is not 0/1 and
/// the multiplication is nuw or nsw.
static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth,
- const Query &Q) {
+ const SimplifyQuery &Q) {
if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
const APInt *C;
return match(OBO, m_Mul(m_Specific(V1), m_APInt(C))) &&
@@ -2879,7 +3016,7 @@ static bool isNonEqualMul(const Value *V1, const Value *V2, unsigned Depth,
/// Return true if V2 == V1 << C, where V1 is known non-zero, C is not 0 and
/// the shift is nuw or nsw.
static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth,
- const Query &Q) {
+ const SimplifyQuery &Q) {
if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(V2)) {
const APInt *C;
return match(OBO, m_Shl(m_Specific(V1), m_APInt(C))) &&
@@ -2890,7 +3027,7 @@ static bool isNonEqualShl(const Value *V1, const Value *V2, unsigned Depth,
}
static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
- unsigned Depth, const Query &Q) {
+ unsigned Depth, const SimplifyQuery &Q) {
// Check two PHIs are in same block.
if (PN1->getParent() != PN2->getParent())
return false;
@@ -2910,7 +3047,7 @@ static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
if (UsedFullRecursion)
return false;
- Query RecQ = Q;
+ SimplifyQuery RecQ = Q;
RecQ.CxtI = IncomBB->getTerminator();
if (!isKnownNonEqual(IV1, IV2, Depth + 1, RecQ))
return false;
@@ -2921,7 +3058,7 @@ static bool isNonEqualPHIs(const PHINode *PN1, const PHINode *PN2,
/// Return true if it is known that V1 != V2.
static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
- const Query &Q) {
+ const SimplifyQuery &Q) {
if (V1 == V2)
return false;
if (V1->getType() != V2->getType())
@@ -2981,7 +3118,7 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2, unsigned Depth,
/// same width as the vector element, and the bit is set only if it is true
/// for all of the elements in the vector.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
- const Query &Q) {
+ const SimplifyQuery &Q) {
KnownBits Known(Mask.getBitWidth());
computeKnownBits(V, Known, Depth, Q);
return Mask.isSubsetOf(Known.Zero);
@@ -3065,10 +3202,10 @@ static unsigned computeNumSignBitsVectorConstant(const Value *V,
static unsigned ComputeNumSignBitsImpl(const Value *V,
const APInt &DemandedElts,
- unsigned Depth, const Query &Q);
+ unsigned Depth, const SimplifyQuery &Q);
static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
- unsigned Depth, const Query &Q) {
+ unsigned Depth, const SimplifyQuery &Q) {
unsigned Result = ComputeNumSignBitsImpl(V, DemandedElts, Depth, Q);
assert(Result > 0 && "At least one sign bit needs to be present!");
return Result;
@@ -3083,7 +3220,7 @@ static unsigned ComputeNumSignBits(const Value *V, const APInt &DemandedElts,
/// elements in the vector specified by DemandedElts.
static unsigned ComputeNumSignBitsImpl(const Value *V,
const APInt &DemandedElts,
- unsigned Depth, const Query &Q) {
+ unsigned Depth, const SimplifyQuery &Q) {
Type *Ty = V->getType();
#ifndef NDEBUG
assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
@@ -3303,7 +3440,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
// Take the minimum of all incoming values. This can't infinitely loop
// because of our depth threshold.
- Query RecQ = Q;
+ SimplifyQuery RecQ = Q;
Tmp = TyBits;
for (unsigned i = 0, e = NumIncomingValues; i != e; ++i) {
if (Tmp == 1) return Tmp;
@@ -3511,68 +3648,13 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
return Intrinsic::not_intrinsic;
}
-/// Return true if we can prove that the specified FP value is never equal to
-/// -0.0.
-/// NOTE: Do not check 'nsz' here because that fast-math-flag does not guarantee
-/// that a value is not -0.0. It only guarantees that -0.0 may be treated
-/// the same as +0.0 in floating-point ops.
-bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
- unsigned Depth) {
- if (auto *CFP = dyn_cast<ConstantFP>(V))
- return !CFP->getValueAPF().isNegZero();
-
- if (Depth == MaxAnalysisRecursionDepth)
- return false;
-
- auto *Op = dyn_cast<Operator>(V);
- if (!Op)
- return false;
-
- // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
- if (match(Op, m_FAdd(m_Value(), m_PosZeroFP())))
- return true;
-
- // sitofp and uitofp turn into +0.0 for zero.
- if (isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op))
- return true;
-
- if (auto *Call = dyn_cast<CallInst>(Op)) {
- Intrinsic::ID IID = getIntrinsicForCallSite(*Call, TLI);
- switch (IID) {
- default:
- break;
- // sqrt(-0.0) = -0.0, no other negative results are possible.
- case Intrinsic::sqrt:
- case Intrinsic::canonicalize:
- return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
- case Intrinsic::experimental_constrained_sqrt: {
- // NOTE: This rounding mode restriction may be too strict.
- const auto *CI = cast<ConstrainedFPIntrinsic>(Call);
- if (CI->getRoundingMode() == RoundingMode::NearestTiesToEven)
- return CannotBeNegativeZero(Call->getArgOperand(0), TLI, Depth + 1);
- else
- return false;
- }
- // fabs(x) != -0.0
- case Intrinsic::fabs:
- return true;
- // sitofp and uitofp turn into +0.0 for zero.
- case Intrinsic::experimental_constrained_sitofp:
- case Intrinsic::experimental_constrained_uitofp:
- return true;
- }
- }
-
- return false;
-}
-
/// If \p SignBitOnly is true, test for a known 0 sign bit rather than a
/// standard ordered compare. e.g. make -0.0 olt 0.0 be true because of the sign
/// bit despite comparing equal.
static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
+ const DataLayout &DL,
const TargetLibraryInfo *TLI,
- bool SignBitOnly,
- unsigned Depth) {
+ bool SignBitOnly, unsigned Depth) {
// TODO: This function does not do the right thing when SignBitOnly is true
// and we're lowering to a hypothetical IEEE 754-compliant-but-evil platform
// which flips the sign bits of NaNs. See
@@ -3621,9 +3703,9 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
return true;
// Set SignBitOnly for RHS, because X / -0.0 is -Inf (or NaN).
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
- Depth + 1) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI,
+ return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
+ SignBitOnly, Depth + 1) &&
+ cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
/*SignBitOnly*/ true, Depth + 1);
case Instruction::FMul:
// X * X is always non-negative or a NaN.
@@ -3634,26 +3716,26 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
[[fallthrough]];
case Instruction::FAdd:
case Instruction::FRem:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
- Depth + 1) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
- Depth + 1);
+ return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
+ SignBitOnly, Depth + 1) &&
+ cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
+ SignBitOnly, Depth + 1);
case Instruction::Select:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
- Depth + 1) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
- Depth + 1);
+ return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
+ SignBitOnly, Depth + 1) &&
+ cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI,
+ SignBitOnly, Depth + 1);
case Instruction::FPExt:
case Instruction::FPTrunc:
// Widening/narrowing never change sign.
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
- Depth + 1);
+ return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
+ SignBitOnly, Depth + 1);
case Instruction::ExtractElement:
// Look through extract element. At the moment we keep this simple and skip
// tracking the specific element. But at least we might find information
// valid for all elements of the vector.
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
- Depth + 1);
+ return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
+ SignBitOnly, Depth + 1);
case Instruction::Call:
const auto *CI = cast<CallInst>(I);
Intrinsic::ID IID = getIntrinsicForCallSite(*CI, TLI);
@@ -3670,7 +3752,8 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
case Intrinsic::round:
case Intrinsic::roundeven:
case Intrinsic::fptrunc_round:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly, Depth + 1);
+ return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
+ SignBitOnly, Depth + 1);
case Intrinsic::maxnum: {
Value *V0 = I->getOperand(0), *V1 = I->getOperand(1);
auto isPositiveNum = [&](Value *V) {
@@ -3685,8 +3768,8 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
// -0.0 compares equal to 0.0, so if this operand is at least -0.0,
// maxnum can't be ordered-less-than-zero.
- return isKnownNeverNaN(V, TLI) &&
- cannotBeOrderedLessThanZeroImpl(V, TLI, false, Depth + 1);
+ return isKnownNeverNaN(V, DL, TLI) &&
+ cannotBeOrderedLessThanZeroImpl(V, DL, TLI, false, Depth + 1);
};
// TODO: This could be improved. We could also check that neither operand
@@ -3695,30 +3778,31 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
}
case Intrinsic::maximum:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
- Depth + 1) ||
- cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
- Depth + 1);
+ return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
+ SignBitOnly, Depth + 1) ||
+ cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
+ SignBitOnly, Depth + 1);
case Intrinsic::minnum:
case Intrinsic::minimum:
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
- Depth + 1) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, SignBitOnly,
- Depth + 1);
+ return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
+ SignBitOnly, Depth + 1) &&
+ cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI,
+ SignBitOnly, Depth + 1);
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::fabs:
return true;
case Intrinsic::copysign:
// Only the sign operand matters.
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI, true,
+ return cannotBeOrderedLessThanZeroImpl(I->getOperand(1), DL, TLI, true,
Depth + 1);
case Intrinsic::sqrt:
// sqrt(x) is always >= -0 or NaN. Moreover, sqrt(x) == -0 iff x == -0.
if (!SignBitOnly)
return true;
- return CI->hasNoNaNs() && (CI->hasNoSignedZeros() ||
- CannotBeNegativeZero(CI->getOperand(0), TLI));
+ return CI->hasNoNaNs() &&
+ (CI->hasNoSignedZeros() ||
+ cannotBeNegativeZero(CI->getOperand(0), DL, TLI));
case Intrinsic::powi:
if (ConstantInt *Exponent = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -3739,264 +3823,1423 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
// but we must return false if x == -0. Unfortunately we do not currently
// have a way of expressing this constraint. See details in
// https://llvm.org/bugs/show_bug.cgi?id=31702.
- return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
- Depth + 1);
+ return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), DL, TLI,
+ SignBitOnly, Depth + 1);
case Intrinsic::fma:
case Intrinsic::fmuladd:
// x*x+y is non-negative if y is non-negative.
return I->getOperand(0) == I->getOperand(1) &&
(!SignBitOnly || cast<FPMathOperator>(I)->hasNoNaNs()) &&
- cannotBeOrderedLessThanZeroImpl(I->getOperand(2), TLI, SignBitOnly,
- Depth + 1);
+ cannotBeOrderedLessThanZeroImpl(I->getOperand(2), DL, TLI,
+ SignBitOnly, Depth + 1);
}
break;
}
return false;
}
-bool llvm::CannotBeOrderedLessThanZero(const Value *V,
+bool llvm::CannotBeOrderedLessThanZero(const Value *V, const DataLayout &DL,
const TargetLibraryInfo *TLI) {
- return cannotBeOrderedLessThanZeroImpl(V, TLI, false, 0);
+ return cannotBeOrderedLessThanZeroImpl(V, DL, TLI, false, 0);
}
-bool llvm::SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI) {
- return cannotBeOrderedLessThanZeroImpl(V, TLI, true, 0);
+bool llvm::SignBitMustBeZero(const Value *V, const DataLayout &DL,
+ const TargetLibraryInfo *TLI) {
+ return cannotBeOrderedLessThanZeroImpl(V, DL, TLI, true, 0);
}
-bool llvm::isKnownNeverInfinity(const Value *V, const TargetLibraryInfo *TLI,
- unsigned Depth) {
- assert(V->getType()->isFPOrFPVectorTy() && "Querying for Inf on non-FP type");
+/// Return true if it's possible to assume IEEE treatment of input denormals in
+/// \p F for \p Val.
+static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
+ Ty = Ty->getScalarType();
+ return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
+}
- // If we're told that infinities won't happen, assume they won't.
- if (auto *FPMathOp = dyn_cast<FPMathOperator>(V))
- if (FPMathOp->hasNoInfs())
- return true;
+static bool inputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
+ Ty = Ty->getScalarType();
+ DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
+ return Mode.Input == DenormalMode::IEEE ||
+ Mode.Input == DenormalMode::PositiveZero;
+}
- // Handle scalar constants.
- if (auto *CFP = dyn_cast<ConstantFP>(V))
- return !CFP->isInfinity();
+static bool outputDenormalIsIEEEOrPosZero(const Function &F, const Type *Ty) {
+ Ty = Ty->getScalarType();
+ DenormalMode Mode = F.getDenormalMode(Ty->getFltSemantics());
+ return Mode.Output == DenormalMode::IEEE ||
+ Mode.Output == DenormalMode::PositiveZero;
+}
- if (Depth == MaxAnalysisRecursionDepth)
+bool KnownFPClass::isKnownNeverLogicalZero(const Function &F, Type *Ty) const {
+ return isKnownNeverZero() &&
+ (isKnownNeverSubnormal() || inputDenormalIsIEEE(F, Ty));
+}
+
+bool KnownFPClass::isKnownNeverLogicalNegZero(const Function &F,
+ Type *Ty) const {
+ return isKnownNeverNegZero() &&
+ (isKnownNeverNegSubnormal() || inputDenormalIsIEEEOrPosZero(F, Ty));
+}
+
+bool KnownFPClass::isKnownNeverLogicalPosZero(const Function &F,
+ Type *Ty) const {
+ if (!isKnownNeverPosZero())
return false;
- if (auto *Inst = dyn_cast<Instruction>(V)) {
- switch (Inst->getOpcode()) {
- case Instruction::Select: {
- return isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1) &&
- isKnownNeverInfinity(Inst->getOperand(2), TLI, Depth + 1);
+ // If we know there are no denormals, nothing can be flushed to zero.
+ if (isKnownNeverSubnormal())
+ return true;
+
+ DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
+ switch (Mode.Input) {
+ case DenormalMode::IEEE:
+ return true;
+ case DenormalMode::PreserveSign:
+ // Negative subnormal won't flush to +0
+ return isKnownNeverPosSubnormal();
+ case DenormalMode::PositiveZero:
+ default:
+ // Both positive and negative subnormal could flush to +0
+ return false;
+ }
+
+ llvm_unreachable("covered switch over denormal mode");
+}
+
+void KnownFPClass::propagateDenormal(const KnownFPClass &Src, const Function &F,
+ Type *Ty) {
+ KnownFPClasses = Src.KnownFPClasses;
+ // If we aren't assuming the source can't be a zero, we don't have to check if
+ // a denormal input could be flushed.
+ if (!Src.isKnownNeverPosZero() && !Src.isKnownNeverNegZero())
+ return;
+
+ // If we know the input can't be a denormal, it can't be flushed to 0.
+ if (Src.isKnownNeverSubnormal())
+ return;
+
+ DenormalMode Mode = F.getDenormalMode(Ty->getScalarType()->getFltSemantics());
+
+ if (!Src.isKnownNeverPosSubnormal() && Mode != DenormalMode::getIEEE())
+ KnownFPClasses |= fcPosZero;
+
+ if (!Src.isKnownNeverNegSubnormal() && Mode != DenormalMode::getIEEE()) {
+ if (Mode != DenormalMode::getPositiveZero())
+ KnownFPClasses |= fcNegZero;
+
+ if (Mode.Input == DenormalMode::PositiveZero ||
+ Mode.Output == DenormalMode::PositiveZero ||
+ Mode.Input == DenormalMode::Dynamic ||
+ Mode.Output == DenormalMode::Dynamic)
+ KnownFPClasses |= fcPosZero;
+ }
+}
+
+void KnownFPClass::propagateCanonicalizingSrc(const KnownFPClass &Src,
+ const Function &F, Type *Ty) {
+ propagateDenormal(Src, F, Ty);
+ propagateNaN(Src, /*PreserveSign=*/true);
+}
+
+/// Returns a pair of values, which if passed to llvm.is.fpclass, returns the
+/// same result as an fcmp with the given operands.
+std::pair<Value *, FPClassTest> llvm::fcmpToClassTest(FCmpInst::Predicate Pred,
+ const Function &F,
+ Value *LHS, Value *RHS,
+ bool LookThroughSrc) {
+ const APFloat *ConstRHS;
+ if (!match(RHS, m_APFloat(ConstRHS)))
+ return {nullptr, fcNone};
+
+ // fcmp ord x, zero|normal|subnormal|inf -> ~fcNan
+ if (Pred == FCmpInst::FCMP_ORD && !ConstRHS->isNaN())
+ return {LHS, ~fcNan};
+
+ // fcmp uno x, zero|normal|subnormal|inf -> fcNan
+ if (Pred == FCmpInst::FCMP_UNO && !ConstRHS->isNaN())
+ return {LHS, fcNan};
+
+ if (ConstRHS->isZero()) {
+ // Compares with fcNone are only exactly equal to fcZero if input denormals
+ // are not flushed.
+ // TODO: Handle DAZ by expanding masks to cover subnormal cases.
+ if (Pred != FCmpInst::FCMP_ORD && Pred != FCmpInst::FCMP_UNO &&
+ !inputDenormalIsIEEE(F, LHS->getType()))
+ return {nullptr, fcNone};
+
+ switch (Pred) {
+ case FCmpInst::FCMP_OEQ: // Match x == 0.0
+ return {LHS, fcZero};
+ case FCmpInst::FCMP_UEQ: // Match isnan(x) || (x == 0.0)
+ return {LHS, fcZero | fcNan};
+ case FCmpInst::FCMP_UNE: // Match (x != 0.0)
+ return {LHS, ~fcZero};
+ case FCmpInst::FCMP_ONE: // Match !isnan(x) && x != 0.0
+ return {LHS, ~fcNan & ~fcZero};
+ case FCmpInst::FCMP_ORD:
+ // Canonical form of ord/uno is with a zero. We could also handle
+ // non-canonical other non-NaN constants or LHS == RHS.
+ return {LHS, ~fcNan};
+ case FCmpInst::FCMP_UNO:
+ return {LHS, fcNan};
+ case FCmpInst::FCMP_OGT: // x > 0
+ return {LHS, fcPosSubnormal | fcPosNormal | fcPosInf};
+ case FCmpInst::FCMP_UGT: // isnan(x) || x > 0
+ return {LHS, fcPosSubnormal | fcPosNormal | fcPosInf | fcNan};
+ case FCmpInst::FCMP_OGE: // x >= 0
+ return {LHS, fcPositive | fcNegZero};
+ case FCmpInst::FCMP_UGE: // isnan(x) || x >= 0
+ return {LHS, fcPositive | fcNegZero | fcNan};
+ case FCmpInst::FCMP_OLT: // x < 0
+ return {LHS, fcNegSubnormal | fcNegNormal | fcNegInf};
+ case FCmpInst::FCMP_ULT: // isnan(x) || x < 0
+ return {LHS, fcNegSubnormal | fcNegNormal | fcNegInf | fcNan};
+ case FCmpInst::FCMP_OLE: // x <= 0
+ return {LHS, fcNegative | fcPosZero};
+ case FCmpInst::FCMP_ULE: // isnan(x) || x <= 0
+ return {LHS, fcNegative | fcPosZero | fcNan};
+ default:
+ break;
}
- case Instruction::SIToFP:
- case Instruction::UIToFP: {
- // Get width of largest magnitude integer (remove a bit if signed).
- // This still works for a signed minimum value because the largest FP
- // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).
- int IntSize = Inst->getOperand(0)->getType()->getScalarSizeInBits();
- if (Inst->getOpcode() == Instruction::SIToFP)
- --IntSize;
- // If the exponent of the largest finite FP value can hold the largest
- // integer, the result of the cast must be finite.
- Type *FPTy = Inst->getType()->getScalarType();
- return ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize;
+ return {nullptr, fcNone};
+ }
+
+ Value *Src = LHS;
+ const bool IsFabs = LookThroughSrc && match(LHS, m_FAbs(m_Value(Src)));
+
+ // Compute the test mask that would return true for the ordered comparisons.
+ FPClassTest Mask;
+
+ if (ConstRHS->isInfinity()) {
+ switch (Pred) {
+ case FCmpInst::FCMP_OEQ:
+ case FCmpInst::FCMP_UNE: {
+ // Match __builtin_isinf patterns
+ //
+ // fcmp oeq x, +inf -> is_fpclass x, fcPosInf
+ // fcmp oeq fabs(x), +inf -> is_fpclass x, fcInf
+ // fcmp oeq x, -inf -> is_fpclass x, fcNegInf
+ // fcmp oeq fabs(x), -inf -> is_fpclass x, 0 -> false
+ //
+ // fcmp une x, +inf -> is_fpclass x, ~fcPosInf
+ // fcmp une fabs(x), +inf -> is_fpclass x, ~fcInf
+ // fcmp une x, -inf -> is_fpclass x, ~fcNegInf
+ // fcmp une fabs(x), -inf -> is_fpclass x, fcAllFlags -> true
+
+ if (ConstRHS->isNegative()) {
+ Mask = fcNegInf;
+ if (IsFabs)
+ Mask = fcNone;
+ } else {
+ Mask = fcPosInf;
+ if (IsFabs)
+ Mask |= fcNegInf;
+ }
+
+ break;
}
- case Instruction::FNeg:
- case Instruction::FPExt: {
- // Peek through to source op. If it is not infinity, this is not infinity.
- return isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1);
+ case FCmpInst::FCMP_ONE:
+ case FCmpInst::FCMP_UEQ: {
+ // Match __builtin_isinf patterns
+ // fcmp one x, -inf -> is_fpclass x, fcNegInf
+ // fcmp one fabs(x), -inf -> is_fpclass x, ~fcNegInf & ~fcNan
+ // fcmp one x, +inf -> is_fpclass x, ~fcNegInf & ~fcNan
+ // fcmp one fabs(x), +inf -> is_fpclass x, ~fcInf & fcNan
+ //
+ // fcmp ueq x, +inf -> is_fpclass x, fcPosInf|fcNan
+ // fcmp ueq (fabs x), +inf -> is_fpclass x, fcInf|fcNan
+ // fcmp ueq x, -inf -> is_fpclass x, fcNegInf|fcNan
+ // fcmp ueq fabs(x), -inf -> is_fpclass x, fcNan
+ if (ConstRHS->isNegative()) {
+ Mask = ~fcNegInf & ~fcNan;
+ if (IsFabs)
+ Mask = ~fcNan;
+ } else {
+ Mask = ~fcPosInf & ~fcNan;
+ if (IsFabs)
+ Mask &= ~fcNegInf;
+ }
+
+ break;
}
- case Instruction::FPTrunc: {
- // Need a range check.
- return false;
+ case FCmpInst::FCMP_OLT:
+ case FCmpInst::FCMP_UGE: {
+ if (ConstRHS->isNegative()) {
+ // No value is ordered and less than negative infinity.
+ // All values are unordered with or at least negative infinity.
+ // fcmp olt x, -inf -> false
+ // fcmp uge x, -inf -> true
+ Mask = fcNone;
+ break;
+ }
+
+ // fcmp olt fabs(x), +inf -> fcFinite
+ // fcmp uge fabs(x), +inf -> ~fcFinite
+ // fcmp olt x, +inf -> fcFinite|fcNegInf
+ // fcmp uge x, +inf -> ~(fcFinite|fcNegInf)
+ Mask = fcFinite;
+ if (!IsFabs)
+ Mask |= fcNegInf;
+ break;
+ }
+ case FCmpInst::FCMP_OGE:
+ case FCmpInst::FCMP_ULT: {
+ if (ConstRHS->isNegative()) // TODO
+ return {nullptr, fcNone};
+
+ // fcmp oge fabs(x), +inf -> fcInf
+ // fcmp oge x, +inf -> fcPosInf
+ // fcmp ult fabs(x), +inf -> ~fcInf
+ // fcmp ult x, +inf -> ~fcPosInf
+ Mask = fcPosInf;
+ if (IsFabs)
+ Mask |= fcNegInf;
+ break;
+ }
+ case FCmpInst::FCMP_OGT:
+ case FCmpInst::FCMP_ULE: {
+ if (ConstRHS->isNegative())
+ return {nullptr, fcNone};
+
+ // No value is ordered and greater than infinity.
+ Mask = fcNone;
+ break;
}
default:
+ return {nullptr, fcNone};
+ }
+ } else if (ConstRHS->isSmallestNormalized() && !ConstRHS->isNegative()) {
+ // Match pattern that's used in __builtin_isnormal.
+ switch (Pred) {
+ case FCmpInst::FCMP_OLT:
+ case FCmpInst::FCMP_UGE: {
+ // fcmp olt x, smallest_normal -> fcNegInf|fcNegNormal|fcSubnormal|fcZero
+ // fcmp olt fabs(x), smallest_normal -> fcSubnormal|fcZero
+ // fcmp uge x, smallest_normal -> fcNan|fcPosNormal|fcPosInf
+ // fcmp uge fabs(x), smallest_normal -> ~(fcSubnormal|fcZero)
+ Mask = fcZero | fcSubnormal;
+ if (!IsFabs)
+ Mask |= fcNegNormal | fcNegInf;
+
break;
}
+ case FCmpInst::FCMP_OGE:
+ case FCmpInst::FCMP_ULT: {
+ // fcmp oge x, smallest_normal -> fcPosNormal | fcPosInf
+ // fcmp oge fabs(x), smallest_normal -> fcInf | fcNormal
+ // fcmp ult x, smallest_normal -> ~(fcPosNormal | fcPosInf)
+ // fcmp ult fabs(x), smallest_normal -> ~(fcInf | fcNormal)
+ Mask = fcPosInf | fcPosNormal;
+ if (IsFabs)
+ Mask |= fcNegInf | fcNegNormal;
+ break;
+ }
+ default:
+ return {nullptr, fcNone};
+ }
+ } else if (ConstRHS->isNaN()) {
+ // fcmp o__ x, nan -> false
+ // fcmp u__ x, nan -> true
+ Mask = fcNone;
+ } else
+ return {nullptr, fcNone};
- if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
- switch (II->getIntrinsicID()) {
+ // Invert the comparison for the unordered cases.
+ if (FCmpInst::isUnordered(Pred))
+ Mask = ~Mask;
+
+ return {Src, Mask};
+}
+
+static FPClassTest computeKnownFPClassFromAssumes(const Value *V,
+ const SimplifyQuery &Q) {
+ FPClassTest KnownFromAssume = fcAllFlags;
+
+ // Try to restrict the floating-point classes based on information from
+ // assumptions.
+ for (auto &AssumeVH : Q.AC->assumptionsFor(V)) {
+ if (!AssumeVH)
+ continue;
+ CallInst *I = cast<CallInst>(AssumeVH);
+ const Function *F = I->getFunction();
+
+ assert(F == Q.CxtI->getParent()->getParent() &&
+ "Got assumption for the wrong function!");
+ assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
+ "must be an assume intrinsic");
+
+ if (!isValidAssumeForContext(I, Q.CxtI, Q.DT))
+ continue;
+
+ CmpInst::Predicate Pred;
+ Value *LHS, *RHS;
+ uint64_t ClassVal = 0;
+ if (match(I->getArgOperand(0), m_FCmp(Pred, m_Value(LHS), m_Value(RHS)))) {
+ auto [TestedValue, TestedMask] =
+ fcmpToClassTest(Pred, *F, LHS, RHS, true);
+ // First see if we can fold in fabs/fneg into the test.
+ if (TestedValue == V)
+ KnownFromAssume &= TestedMask;
+ else {
+ // Try again without the lookthrough if we found a different source
+ // value.
+ auto [TestedValue, TestedMask] =
+ fcmpToClassTest(Pred, *F, LHS, RHS, false);
+ if (TestedValue == V)
+ KnownFromAssume &= TestedMask;
+ }
+ } else if (match(I->getArgOperand(0),
+ m_Intrinsic<Intrinsic::is_fpclass>(
+ m_Value(LHS), m_ConstantInt(ClassVal)))) {
+ KnownFromAssume &= static_cast<FPClassTest>(ClassVal);
+ }
+ }
+
+ return KnownFromAssume;
+}
+
+void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
+ FPClassTest InterestedClasses, KnownFPClass &Known,
+ unsigned Depth, const SimplifyQuery &Q);
+
+static void computeKnownFPClass(const Value *V, KnownFPClass &Known,
+ FPClassTest InterestedClasses, unsigned Depth,
+ const SimplifyQuery &Q) {
+ auto *FVTy = dyn_cast<FixedVectorType>(V->getType());
+ APInt DemandedElts =
+ FVTy ? APInt::getAllOnes(FVTy->getNumElements()) : APInt(1, 1);
+ computeKnownFPClass(V, DemandedElts, InterestedClasses, Known, Depth, Q);
+}
+
+static void computeKnownFPClassForFPTrunc(const Operator *Op,
+ const APInt &DemandedElts,
+ FPClassTest InterestedClasses,
+ KnownFPClass &Known, unsigned Depth,
+ const SimplifyQuery &Q) {
+ if ((InterestedClasses &
+ (KnownFPClass::OrderedLessThanZeroMask | fcNan)) == fcNone)
+ return;
+
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses,
+ KnownSrc, Depth + 1, Q);
+
+ // Sign should be preserved
+ // TODO: Handle cannot be ordered greater than zero
+ if (KnownSrc.cannotBeOrderedLessThanZero())
+ Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
+
+ Known.propagateNaN(KnownSrc, true);
+
+ // Infinity needs a range check.
+}
+
+// TODO: Merge implementation of cannotBeOrderedLessThanZero into here.
+void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
+ FPClassTest InterestedClasses, KnownFPClass &Known,
+ unsigned Depth, const SimplifyQuery &Q) {
+ assert(Known.isUnknown() && "should not be called with known information");
+
+ if (!DemandedElts) {
+ // No demanded elts, better to assume we don't know anything.
+ Known.resetAll();
+ return;
+ }
+
+ assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+
+ if (auto *CFP = dyn_cast_or_null<ConstantFP>(V)) {
+ Known.KnownFPClasses = CFP->getValueAPF().classify();
+ Known.SignBit = CFP->isNegative();
+ return;
+ }
+
+ // Try to handle fixed width vector constants
+ auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
+ const Constant *CV = dyn_cast<Constant>(V);
+ if (VFVTy && CV) {
+ Known.KnownFPClasses = fcNone;
+
+ // For vectors, verify that each element is not NaN.
+ unsigned NumElts = VFVTy->getNumElements();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ Constant *Elt = CV->getAggregateElement(i);
+ if (!Elt) {
+ Known = KnownFPClass();
+ return;
+ }
+ if (isa<UndefValue>(Elt))
+ continue;
+ auto *CElt = dyn_cast<ConstantFP>(Elt);
+ if (!CElt) {
+ Known = KnownFPClass();
+ return;
+ }
+
+ KnownFPClass KnownElt{CElt->getValueAPF().classify(), CElt->isNegative()};
+ Known |= KnownElt;
+ }
+
+ return;
+ }
+
+ FPClassTest KnownNotFromFlags = fcNone;
+ if (const auto *CB = dyn_cast<CallBase>(V))
+ KnownNotFromFlags |= CB->getRetNoFPClass();
+ else if (const auto *Arg = dyn_cast<Argument>(V))
+ KnownNotFromFlags |= Arg->getNoFPClass();
+
+ const Operator *Op = dyn_cast<Operator>(V);
+ if (const FPMathOperator *FPOp = dyn_cast_or_null<FPMathOperator>(Op)) {
+ if (FPOp->hasNoNaNs())
+ KnownNotFromFlags |= fcNan;
+ if (FPOp->hasNoInfs())
+ KnownNotFromFlags |= fcInf;
+ }
+
+ if (Q.AC) {
+ FPClassTest AssumedClasses = computeKnownFPClassFromAssumes(V, Q);
+ KnownNotFromFlags |= ~AssumedClasses;
+ }
+
+ // We no longer need to find out about these bits from inputs if we can
+ // assume this from flags/attributes.
+ InterestedClasses &= ~KnownNotFromFlags;
+
+ auto ClearClassesFromFlags = make_scope_exit([=, &Known] {
+ Known.knownNot(KnownNotFromFlags);
+ });
+
+ if (!Op)
+ return;
+
+ // All recursive calls that increase depth must come after this.
+ if (Depth == MaxAnalysisRecursionDepth)
+ return;
+
+ const unsigned Opc = Op->getOpcode();
+ switch (Opc) {
+ case Instruction::FNeg: {
+ computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses,
+ Known, Depth + 1, Q);
+ Known.fneg();
+ break;
+ }
+ case Instruction::Select: {
+ Value *Cond = Op->getOperand(0);
+ Value *LHS = Op->getOperand(1);
+ Value *RHS = Op->getOperand(2);
+
+ FPClassTest FilterLHS = fcAllFlags;
+ FPClassTest FilterRHS = fcAllFlags;
+
+ Value *TestedValue = nullptr;
+ FPClassTest TestedMask = fcNone;
+ uint64_t ClassVal = 0;
+ const Function *F = cast<Instruction>(Op)->getFunction();
+ CmpInst::Predicate Pred;
+ Value *CmpLHS, *CmpRHS;
+ if (F && match(Cond, m_FCmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS)))) {
+ // If the select filters out a value based on the class, it no longer
+ // participates in the class of the result
+
+ // TODO: In some degenerate cases we can infer something if we try again
+ // without looking through sign operations.
+ bool LookThroughFAbsFNeg = CmpLHS != LHS && CmpLHS != RHS;
+ std::tie(TestedValue, TestedMask) =
+ fcmpToClassTest(Pred, *F, CmpLHS, CmpRHS, LookThroughFAbsFNeg);
+ } else if (match(Cond,
+ m_Intrinsic<Intrinsic::is_fpclass>(
+ m_Value(TestedValue), m_ConstantInt(ClassVal)))) {
+ TestedMask = static_cast<FPClassTest>(ClassVal);
+ }
+
+ if (TestedValue == LHS) {
+ // match !isnan(x) ? x : y
+ FilterLHS = TestedMask;
+ } else if (TestedValue == RHS) {
+ // match !isnan(x) ? y : x
+ FilterRHS = ~TestedMask;
+ }
+
+ KnownFPClass Known2;
+ computeKnownFPClass(LHS, DemandedElts, InterestedClasses & FilterLHS, Known,
+ Depth + 1, Q);
+ Known.KnownFPClasses &= FilterLHS;
+
+ computeKnownFPClass(RHS, DemandedElts, InterestedClasses & FilterRHS,
+ Known2, Depth + 1, Q);
+ Known2.KnownFPClasses &= FilterRHS;
+
+ Known |= Known2;
+ break;
+ }
+ case Instruction::Call: {
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Op)) {
+ const Intrinsic::ID IID = II->getIntrinsicID();
+ switch (IID) {
+ case Intrinsic::fabs: {
+ if ((InterestedClasses & (fcNan | fcPositive)) != fcNone) {
+ // If we only care about the sign bit we don't need to inspect the
+ // operand.
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts,
+ InterestedClasses, Known, Depth + 1, Q);
+ }
+
+ Known.fabs();
+ break;
+ }
+ case Intrinsic::copysign: {
+ KnownFPClass KnownSign;
+
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts,
+ InterestedClasses, Known, Depth + 1, Q);
+ computeKnownFPClass(II->getArgOperand(1), DemandedElts,
+ InterestedClasses, KnownSign, Depth + 1, Q);
+ Known.copysign(KnownSign);
+ break;
+ }
+ case Intrinsic::fma:
+ case Intrinsic::fmuladd: {
+ if ((InterestedClasses & fcNegative) == fcNone)
+ break;
+
+ if (II->getArgOperand(0) != II->getArgOperand(1))
+ break;
+
+ // The multiply cannot be -0 and therefore the add can't be -0
+ Known.knownNot(fcNegZero);
+
+ // x * x + y is non-negative if y is non-negative.
+ KnownFPClass KnownAddend;
+ computeKnownFPClass(II->getArgOperand(2), DemandedElts,
+ InterestedClasses, KnownAddend, Depth + 1, Q);
+
+ // TODO: Known sign bit with no nans
+ if (KnownAddend.cannotBeOrderedLessThanZero())
+ Known.knownNot(fcNegative);
+ break;
+ }
+ case Intrinsic::sqrt:
+ case Intrinsic::experimental_constrained_sqrt: {
+ KnownFPClass KnownSrc;
+ FPClassTest InterestedSrcs = InterestedClasses;
+ if (InterestedClasses & fcNan)
+ InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
+
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts,
+ InterestedSrcs, KnownSrc, Depth + 1, Q);
+
+ if (KnownSrc.isKnownNeverPosInfinity())
+ Known.knownNot(fcPosInf);
+ if (KnownSrc.isKnownNever(fcSNan))
+ Known.knownNot(fcSNan);
+
+ // Any negative value besides -0 returns a nan.
+ if (KnownSrc.isKnownNeverNaN() &&
+ KnownSrc.cannotBeOrderedLessThanZero())
+ Known.knownNot(fcNan);
+
+ // The only negative value that can be returned is -0 for -0 inputs.
+ Known.knownNot(fcNegInf | fcNegSubnormal | fcNegNormal);
+
+ // If the input denormal mode could be PreserveSign, a negative
+ // subnormal input could produce a negative zero output.
+ const Function *F = II->getFunction();
+ if (Q.IIQ.hasNoSignedZeros(II) ||
+ (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))) {
+ Known.knownNot(fcNegZero);
+ if (KnownSrc.isKnownNeverNaN())
+ Known.SignBit = false;
+ }
+
+ break;
+ }
case Intrinsic::sin:
- case Intrinsic::cos:
+ case Intrinsic::cos: {
// Return NaN on infinite inputs.
- return true;
- case Intrinsic::fabs:
- case Intrinsic::sqrt:
- case Intrinsic::canonicalize:
- case Intrinsic::copysign:
- case Intrinsic::arithmetic_fence:
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts,
+ InterestedClasses, KnownSrc, Depth + 1, Q);
+ Known.knownNot(fcInf);
+ if (KnownSrc.isKnownNeverNaN() && KnownSrc.isKnownNeverInfinity())
+ Known.knownNot(fcNan);
+ break;
+ }
+
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximum: {
+ KnownFPClass KnownLHS, KnownRHS;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts,
+ InterestedClasses, KnownLHS, Depth + 1, Q);
+ computeKnownFPClass(II->getArgOperand(1), DemandedElts,
+ InterestedClasses, KnownRHS, Depth + 1, Q);
+
+ bool NeverNaN =
+ KnownLHS.isKnownNeverNaN() || KnownRHS.isKnownNeverNaN();
+ Known = KnownLHS | KnownRHS;
+
+ // If either operand is not NaN, the result is not NaN.
+ if (NeverNaN && (IID == Intrinsic::minnum || IID == Intrinsic::maxnum))
+ Known.knownNot(fcNan);
+
+ if (IID == Intrinsic::maxnum) {
+ // If at least one operand is known to be positive, the result must be
+ // positive.
+ if ((KnownLHS.cannotBeOrderedLessThanZero() &&
+ KnownLHS.isKnownNeverNaN()) ||
+ (KnownRHS.cannotBeOrderedLessThanZero() &&
+ KnownRHS.isKnownNeverNaN()))
+ Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
+ } else if (IID == Intrinsic::maximum) {
+ // If at least one operand is known to be positive, the result must be
+ // positive.
+ if (KnownLHS.cannotBeOrderedLessThanZero() ||
+ KnownRHS.cannotBeOrderedLessThanZero())
+ Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
+ } else if (IID == Intrinsic::minnum) {
+ // If at least one operand is known to be negative, the result must be
+ // negative.
+ if ((KnownLHS.cannotBeOrderedGreaterThanZero() &&
+ KnownLHS.isKnownNeverNaN()) ||
+ (KnownRHS.cannotBeOrderedGreaterThanZero() &&
+ KnownRHS.isKnownNeverNaN()))
+ Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
+ } else {
+ // If at least one operand is known to be negative, the result must be
+ // negative.
+ if (KnownLHS.cannotBeOrderedGreaterThanZero() ||
+ KnownRHS.cannotBeOrderedGreaterThanZero())
+ Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
+ }
+
+ // Fixup zero handling if denormals could be returned as a zero.
+ //
+ // As there's no spec for denormal flushing, be conservative with the
+ // treatment of denormals that could be flushed to zero. For older
+ // subtargets on AMDGPU the min/max instructions would not flush the
+ // output and return the original value.
+ //
+ // TODO: This could be refined based on the sign
+ if ((Known.KnownFPClasses & fcZero) != fcNone &&
+ !Known.isKnownNeverSubnormal()) {
+ const Function *Parent = II->getFunction();
+ if (!Parent)
+ break;
+
+ DenormalMode Mode = Parent->getDenormalMode(
+ II->getType()->getScalarType()->getFltSemantics());
+ if (Mode != DenormalMode::getIEEE())
+ Known.KnownFPClasses |= fcZero;
+ }
+
+ break;
+ }
+ case Intrinsic::canonicalize: {
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts,
+ InterestedClasses, KnownSrc, Depth + 1, Q);
+
+ // This is essentially a stronger form of
+ // propagateCanonicalizingSrc. Other "canonicalizing" operations don't
+ // actually have an IR canonicalization guarantee.
+
+ // Canonicalize may flush denormals to zero, so we have to consider the
+ // denormal mode to preserve known-not-0 knowledge.
+ Known.KnownFPClasses = KnownSrc.KnownFPClasses | fcZero | fcQNan;
+
+ // Stronger version of propagateNaN
+ // Canonicalize is guaranteed to quiet signaling nans.
+ if (KnownSrc.isKnownNeverNaN())
+ Known.knownNot(fcNan);
+ else
+ Known.knownNot(fcSNan);
+
+ const Function *F = II->getFunction();
+ if (!F)
+ break;
+
+ // If the parent function flushes denormals, the canonical output cannot
+ // be a denormal.
+ const fltSemantics &FPType =
+ II->getType()->getScalarType()->getFltSemantics();
+ DenormalMode DenormMode = F->getDenormalMode(FPType);
+ if (DenormMode == DenormalMode::getIEEE()) {
+ if (KnownSrc.isKnownNever(fcPosZero))
+ Known.knownNot(fcPosZero);
+ if (KnownSrc.isKnownNever(fcNegZero))
+ Known.knownNot(fcNegZero);
+ break;
+ }
+
+ if (DenormMode.inputsAreZero() || DenormMode.outputsAreZero())
+ Known.knownNot(fcSubnormal);
+
+ if (DenormMode.Input == DenormalMode::PositiveZero ||
+ (DenormMode.Output == DenormalMode::PositiveZero &&
+ DenormMode.Input == DenormalMode::IEEE))
+ Known.knownNot(fcNegZero);
+
+ break;
+ }
case Intrinsic::trunc:
- return isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1);
case Intrinsic::floor:
case Intrinsic::ceil:
case Intrinsic::rint:
case Intrinsic::nearbyint:
case Intrinsic::round:
- case Intrinsic::roundeven:
- // PPC_FP128 is a special case.
- if (V->getType()->isMultiUnitFPType())
- return false;
- return isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1);
- case Intrinsic::fptrunc_round:
- // Requires knowing the value range.
- return false;
- case Intrinsic::minnum:
- case Intrinsic::maxnum:
- case Intrinsic::minimum:
- case Intrinsic::maximum:
- return isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1) &&
- isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1);
+ case Intrinsic::roundeven: {
+ KnownFPClass KnownSrc;
+ FPClassTest InterestedSrcs = InterestedClasses;
+ if (InterestedSrcs & fcPosFinite)
+ InterestedSrcs |= fcPosFinite;
+ if (InterestedSrcs & fcNegFinite)
+ InterestedSrcs |= fcNegFinite;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts,
+ InterestedSrcs, KnownSrc, Depth + 1, Q);
+
+ // Integer results cannot be subnormal.
+ Known.knownNot(fcSubnormal);
+
+ Known.propagateNaN(KnownSrc, true);
+
+ // Pass through infinities, except PPC_FP128 is a special case for
+ // intrinsics other than trunc.
+ if (IID == Intrinsic::trunc || !V->getType()->isMultiUnitFPType()) {
+ if (KnownSrc.isKnownNeverPosInfinity())
+ Known.knownNot(fcPosInf);
+ if (KnownSrc.isKnownNeverNegInfinity())
+ Known.knownNot(fcNegInf);
+ }
+
+ // Negative round ups to 0 produce -0
+ if (KnownSrc.isKnownNever(fcPosFinite))
+ Known.knownNot(fcPosFinite);
+ if (KnownSrc.isKnownNever(fcNegFinite))
+ Known.knownNot(fcNegFinite);
+
+ break;
+ }
+ case Intrinsic::exp:
+ case Intrinsic::exp2: {
+ Known.knownNot(fcNegative);
+ if ((InterestedClasses & fcNan) == fcNone)
+ break;
+
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts,
+ InterestedClasses, KnownSrc, Depth + 1, Q);
+ if (KnownSrc.isKnownNeverNaN()) {
+ Known.knownNot(fcNan);
+ Known.SignBit = false;
+ }
+
+ break;
+ }
+ case Intrinsic::fptrunc_round: {
+ computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses,
+ Known, Depth, Q);
+ break;
+ }
case Intrinsic::log:
case Intrinsic::log10:
case Intrinsic::log2:
+ case Intrinsic::experimental_constrained_log:
+ case Intrinsic::experimental_constrained_log10:
+ case Intrinsic::experimental_constrained_log2: {
// log(+inf) -> +inf
// log([+-]0.0) -> -inf
// log(-inf) -> nan
// log(-x) -> nan
- // TODO: We lack API to check the == 0 case.
- return false;
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::pow:
- case Intrinsic::powi:
- case Intrinsic::fma:
- case Intrinsic::fmuladd:
- // These can return infinities on overflow cases, so it's hard to prove
- // anything about it.
- return false;
+ if ((InterestedClasses & (fcNan | fcInf)) == fcNone)
+ break;
+
+ FPClassTest InterestedSrcs = InterestedClasses;
+ if ((InterestedClasses & fcNegInf) != fcNone)
+ InterestedSrcs |= fcZero | fcSubnormal;
+ if ((InterestedClasses & fcNan) != fcNone)
+ InterestedSrcs |= fcNan | (fcNegative & ~fcNan);
+
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts, InterestedSrcs,
+ KnownSrc, Depth + 1, Q);
+
+ if (KnownSrc.isKnownNeverPosInfinity())
+ Known.knownNot(fcPosInf);
+
+ if (KnownSrc.isKnownNeverNaN() &&
+ KnownSrc.cannotBeOrderedLessThanZero())
+ Known.knownNot(fcNan);
+
+ const Function *F = II->getFunction();
+ if (F && KnownSrc.isKnownNeverLogicalZero(*F, II->getType()))
+ Known.knownNot(fcNegInf);
+
+ break;
+ }
+ case Intrinsic::powi: {
+ if ((InterestedClasses & fcNegative) == fcNone)
+ break;
+
+ const Value *Exp = II->getArgOperand(1);
+ Type *ExpTy = Exp->getType();
+ unsigned BitWidth = ExpTy->getScalarType()->getIntegerBitWidth();
+ KnownBits ExponentKnownBits(BitWidth);
+ computeKnownBits(Exp,
+ isa<VectorType>(ExpTy) ? DemandedElts : APInt(1, 1),
+ ExponentKnownBits, Depth + 1, Q);
+
+ if (ExponentKnownBits.Zero[0]) { // Is even
+ Known.knownNot(fcNegative);
+ break;
+ }
+
+ // Given that exp is an integer, here are the
+ // ways that pow can return a negative value:
+ //
+ // pow(-x, exp) --> negative if exp is odd and x is negative.
+ // pow(-0, exp) --> -inf if exp is negative odd.
+ // pow(-0, exp) --> -0 if exp is positive odd.
+ // pow(-inf, exp) --> -0 if exp is negative odd.
+ // pow(-inf, exp) --> -inf if exp is positive odd.
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts, fcNegative,
+ KnownSrc, Depth + 1, Q);
+ if (KnownSrc.isKnownNever(fcNegative))
+ Known.knownNot(fcNegative);
+ break;
+ }
+ case Intrinsic::ldexp: {
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts,
+ InterestedClasses, KnownSrc, Depth + 1, Q);
+ Known.propagateNaN(KnownSrc, /*PropagateSign=*/true);
+
+ // Sign is preserved, but underflows may produce zeroes.
+ if (KnownSrc.isKnownNever(fcNegative))
+ Known.knownNot(fcNegative);
+ else if (KnownSrc.cannotBeOrderedLessThanZero())
+ Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
+
+ if (KnownSrc.isKnownNever(fcPositive))
+ Known.knownNot(fcPositive);
+ else if (KnownSrc.cannotBeOrderedGreaterThanZero())
+ Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
+
+ // Can refine inf/zero handling based on the exponent operand.
+ const FPClassTest ExpInfoMask = fcZero | fcSubnormal | fcInf;
+ if ((InterestedClasses & ExpInfoMask) == fcNone)
+ break;
+ if ((KnownSrc.KnownFPClasses & ExpInfoMask) == fcNone)
+ break;
+
+ const fltSemantics &Flt
+ = II->getType()->getScalarType()->getFltSemantics();
+ unsigned Precision = APFloat::semanticsPrecision(Flt);
+ const Value *ExpArg = II->getArgOperand(1);
+ ConstantRange ExpRange = computeConstantRange(
+ ExpArg, true, Q.IIQ.UseInstrInfo, Q.AC, Q.CxtI, Q.DT, Depth + 1);
+
+ const int MantissaBits = Precision - 1;
+ if (ExpRange.getSignedMin().sge(static_cast<int64_t>(MantissaBits)))
+ Known.knownNot(fcSubnormal);
+
+ const Function *F = II->getFunction();
+ const APInt *ConstVal = ExpRange.getSingleElement();
+ if (ConstVal && ConstVal->isZero()) {
+ // ldexp(x, 0) -> x, so propagate everything.
+ Known.propagateCanonicalizingSrc(KnownSrc, *F,
+ II->getType());
+ } else if (ExpRange.isAllNegative()) {
+ // If we know the power is <= 0, can't introduce inf
+ if (KnownSrc.isKnownNeverPosInfinity())
+ Known.knownNot(fcPosInf);
+ if (KnownSrc.isKnownNeverNegInfinity())
+ Known.knownNot(fcNegInf);
+ } else if (ExpRange.isAllNonNegative()) {
+ // If we know the power is >= 0, can't introduce subnormal or zero
+ if (KnownSrc.isKnownNeverPosSubnormal())
+ Known.knownNot(fcPosSubnormal);
+ if (KnownSrc.isKnownNeverNegSubnormal())
+ Known.knownNot(fcNegSubnormal);
+ if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, II->getType()))
+ Known.knownNot(fcPosZero);
+ if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, II->getType()))
+ Known.knownNot(fcNegZero);
+ }
+
+ break;
+ }
+ case Intrinsic::arithmetic_fence: {
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts,
+ InterestedClasses, Known, Depth + 1, Q);
+ break;
+ }
+ case Intrinsic::experimental_constrained_sitofp:
+ case Intrinsic::experimental_constrained_uitofp:
+ // Cannot produce nan
+ Known.knownNot(fcNan);
+
+ // sitofp and uitofp turn into +0.0 for zero.
+ Known.knownNot(fcNegZero);
+
+ // Integers cannot be subnormal
+ Known.knownNot(fcSubnormal);
+
+ if (IID == Intrinsic::experimental_constrained_uitofp)
+ Known.signBitMustBeZero();
+
+ // TODO: Copy inf handling from instructions
+ break;
default:
break;
}
}
+
+ break;
}
+ case Instruction::FAdd:
+ case Instruction::FSub: {
+ KnownFPClass KnownLHS, KnownRHS;
+ bool WantNegative =
+ Op->getOpcode() == Instruction::FAdd &&
+ (InterestedClasses & KnownFPClass::OrderedLessThanZeroMask) != fcNone;
+ bool WantNaN = (InterestedClasses & fcNan) != fcNone;
+ bool WantNegZero = (InterestedClasses & fcNegZero) != fcNone;
+
+ if (!WantNaN && !WantNegative && !WantNegZero)
+ break;
- // try to handle fixed width vector constants
- auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
- if (VFVTy && isa<Constant>(V)) {
- // For vectors, verify that each element is not infinity.
- unsigned NumElts = VFVTy->getNumElements();
- for (unsigned i = 0; i != NumElts; ++i) {
- Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
- if (!Elt)
- return false;
- if (isa<UndefValue>(Elt))
- continue;
- auto *CElt = dyn_cast<ConstantFP>(Elt);
- if (!CElt || CElt->isInfinity())
- return false;
+ FPClassTest InterestedSrcs = InterestedClasses;
+ if (WantNegative)
+ InterestedSrcs |= KnownFPClass::OrderedLessThanZeroMask;
+ if (InterestedClasses & fcNan)
+ InterestedSrcs |= fcInf;
+ computeKnownFPClass(Op->getOperand(1), DemandedElts, InterestedSrcs,
+ KnownRHS, Depth + 1, Q);
+
+ if ((WantNaN && KnownRHS.isKnownNeverNaN()) ||
+ (WantNegative && KnownRHS.cannotBeOrderedLessThanZero()) ||
+ WantNegZero || Opc == Instruction::FSub) {
+
+ // RHS is canonically cheaper to compute. Skip inspecting the LHS if
+ // there's no point.
+ computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedSrcs,
+ KnownLHS, Depth + 1, Q);
+ // Adding positive and negative infinity produces NaN.
+ // TODO: Check sign of infinities.
+ if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
+ (KnownLHS.isKnownNeverInfinity() || KnownRHS.isKnownNeverInfinity()))
+ Known.knownNot(fcNan);
+
+ // FIXME: Context function should always be passed in separately
+ const Function *F = cast<Instruction>(Op)->getFunction();
+
+ if (Op->getOpcode() == Instruction::FAdd) {
+ if (KnownLHS.cannotBeOrderedLessThanZero() &&
+ KnownRHS.cannotBeOrderedLessThanZero())
+ Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
+ if (!F)
+ break;
+
+ // (fadd x, 0.0) is guaranteed to return +0.0, not -0.0.
+ if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) ||
+ KnownRHS.isKnownNeverLogicalNegZero(*F, Op->getType())) &&
+ // Make sure output negative denormal can't flush to -0
+ outputDenormalIsIEEEOrPosZero(*F, Op->getType()))
+ Known.knownNot(fcNegZero);
+ } else {
+ if (!F)
+ break;
+
+ // Only fsub -0, +0 can return -0
+ if ((KnownLHS.isKnownNeverLogicalNegZero(*F, Op->getType()) ||
+ KnownRHS.isKnownNeverLogicalPosZero(*F, Op->getType())) &&
+ // Make sure output negative denormal can't flush to -0
+ outputDenormalIsIEEEOrPosZero(*F, Op->getType()))
+ Known.knownNot(fcNegZero);
+ }
}
- // All elements were confirmed non-infinity or undefined.
- return true;
+
+ break;
}
+ case Instruction::FMul: {
+ // X * X is always non-negative or a NaN.
+ if (Op->getOperand(0) == Op->getOperand(1))
+ Known.knownNot(fcNegative);
- // was not able to prove that V never contains infinity
- return false;
-}
+ if ((InterestedClasses & fcNan) != fcNan)
+ break;
-bool llvm::isKnownNeverNaN(const Value *V, const TargetLibraryInfo *TLI,
- unsigned Depth) {
- assert(V->getType()->isFPOrFPVectorTy() && "Querying for NaN on non-FP type");
+ // fcSubnormal is only needed in case of DAZ.
+ const FPClassTest NeedForNan = fcNan | fcInf | fcZero | fcSubnormal;
- // If we're told that NaNs won't happen, assume they won't.
- if (auto *FPMathOp = dyn_cast<FPMathOperator>(V))
- if (FPMathOp->hasNoNaNs())
- return true;
+ KnownFPClass KnownLHS, KnownRHS;
+ computeKnownFPClass(Op->getOperand(1), DemandedElts, NeedForNan, KnownRHS,
+ Depth + 1, Q);
+ if (!KnownRHS.isKnownNeverNaN())
+ break;
- // Handle scalar constants.
- if (auto *CFP = dyn_cast<ConstantFP>(V))
- return !CFP->isNaN();
+ computeKnownFPClass(Op->getOperand(0), DemandedElts, NeedForNan, KnownLHS,
+ Depth + 1, Q);
+ if (!KnownLHS.isKnownNeverNaN())
+ break;
- if (Depth == MaxAnalysisRecursionDepth)
- return false;
+ // If 0 * +/-inf produces NaN.
+ if (KnownLHS.isKnownNeverInfinity() && KnownRHS.isKnownNeverInfinity()) {
+ Known.knownNot(fcNan);
+ break;
+ }
- if (auto *Inst = dyn_cast<Instruction>(V)) {
- switch (Inst->getOpcode()) {
- case Instruction::FAdd:
- case Instruction::FSub:
- // Adding positive and negative infinity produces NaN.
- return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1) &&
- isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
- (isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1) ||
- isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1));
-
- case Instruction::FMul:
- // Zero multiplied with infinity produces NaN.
- // FIXME: If neither side can be zero fmul never produces NaN.
- return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1) &&
- isKnownNeverInfinity(Inst->getOperand(0), TLI, Depth + 1) &&
- isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
- isKnownNeverInfinity(Inst->getOperand(1), TLI, Depth + 1);
-
- case Instruction::FDiv:
- case Instruction::FRem:
- // FIXME: Only 0/0, Inf/Inf, Inf REM x and x REM 0 produce NaN.
- return false;
+ const Function *F = cast<Instruction>(Op)->getFunction();
+ if (!F)
+ break;
- case Instruction::Select: {
- return isKnownNeverNaN(Inst->getOperand(1), TLI, Depth + 1) &&
- isKnownNeverNaN(Inst->getOperand(2), TLI, Depth + 1);
+ if ((KnownRHS.isKnownNeverInfinity() ||
+ KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) &&
+ (KnownLHS.isKnownNeverInfinity() ||
+ KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())))
+ Known.knownNot(fcNan);
+
+ break;
+ }
+ case Instruction::FDiv:
+ case Instruction::FRem: {
+ if (Op->getOperand(0) == Op->getOperand(1)) {
+ // TODO: Could filter out snan if we inspect the operand
+ if (Op->getOpcode() == Instruction::FDiv) {
+ // X / X is always exactly 1.0 or a NaN.
+ Known.KnownFPClasses = fcNan | fcPosNormal;
+ } else {
+ // X % X is always exactly [+-]0.0 or a NaN.
+ Known.KnownFPClasses = fcNan | fcZero;
+ }
+
+ break;
}
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- return true;
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::FNeg:
- return isKnownNeverNaN(Inst->getOperand(0), TLI, Depth + 1);
- default:
+
+ const bool WantNan = (InterestedClasses & fcNan) != fcNone;
+ const bool WantNegative = (InterestedClasses & fcNegative) != fcNone;
+ const bool WantPositive =
+ Opc == Instruction::FRem && (InterestedClasses & fcPositive) != fcNone;
+ if (!WantNan && !WantNegative && !WantPositive)
break;
+
+ KnownFPClass KnownLHS, KnownRHS;
+
+ computeKnownFPClass(Op->getOperand(1), DemandedElts,
+ fcNan | fcInf | fcZero | fcNegative, KnownRHS,
+ Depth + 1, Q);
+
+ bool KnowSomethingUseful =
+ KnownRHS.isKnownNeverNaN() || KnownRHS.isKnownNever(fcNegative);
+
+ if (KnowSomethingUseful || WantPositive) {
+ const FPClassTest InterestedLHS =
+ WantPositive ? fcAllFlags
+ : fcNan | fcInf | fcZero | fcSubnormal | fcNegative;
+
+ computeKnownFPClass(Op->getOperand(0), DemandedElts,
+ InterestedClasses & InterestedLHS, KnownLHS,
+ Depth + 1, Q);
}
+
+ const Function *F = cast<Instruction>(Op)->getFunction();
+
+ if (Op->getOpcode() == Instruction::FDiv) {
+ // Only 0/0, Inf/Inf produce NaN.
+ if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
+ (KnownLHS.isKnownNeverInfinity() ||
+ KnownRHS.isKnownNeverInfinity()) &&
+ ((F && KnownLHS.isKnownNeverLogicalZero(*F, Op->getType())) ||
+ (F && KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())))) {
+ Known.knownNot(fcNan);
+ }
+
+ // X / -0.0 is -Inf (or NaN).
+ // +X / +X is +X
+ if (KnownLHS.isKnownNever(fcNegative) && KnownRHS.isKnownNever(fcNegative))
+ Known.knownNot(fcNegative);
+ } else {
+ // Inf REM x and x REM 0 produce NaN.
+ if (KnownLHS.isKnownNeverNaN() && KnownRHS.isKnownNeverNaN() &&
+ KnownLHS.isKnownNeverInfinity() && F &&
+ KnownRHS.isKnownNeverLogicalZero(*F, Op->getType())) {
+ Known.knownNot(fcNan);
+ }
+
+ // The sign for frem is the same as the first operand.
+ if (KnownLHS.cannotBeOrderedLessThanZero())
+ Known.knownNot(KnownFPClass::OrderedLessThanZeroMask);
+ if (KnownLHS.cannotBeOrderedGreaterThanZero())
+ Known.knownNot(KnownFPClass::OrderedGreaterThanZeroMask);
+
+ // See if we can be more aggressive about the sign of 0.
+ if (KnownLHS.isKnownNever(fcNegative))
+ Known.knownNot(fcNegative);
+ if (KnownLHS.isKnownNever(fcPositive))
+ Known.knownNot(fcPositive);
+ }
+
+ break;
}
+ case Instruction::FPExt: {
+ // Infinity, nan and zero propagate from source.
+ computeKnownFPClass(Op->getOperand(0), DemandedElts, InterestedClasses,
+ Known, Depth + 1, Q);
- if (const auto *II = dyn_cast<IntrinsicInst>(V)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::canonicalize:
- case Intrinsic::fabs:
- case Intrinsic::copysign:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::floor:
- case Intrinsic::ceil:
- case Intrinsic::trunc:
- case Intrinsic::rint:
- case Intrinsic::nearbyint:
- case Intrinsic::round:
- case Intrinsic::roundeven:
- case Intrinsic::arithmetic_fence:
- return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1);
- case Intrinsic::sqrt:
- return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) &&
- CannotBeOrderedLessThanZero(II->getArgOperand(0), TLI);
- case Intrinsic::minnum:
- case Intrinsic::maxnum:
- // If either operand is not NaN, the result is not NaN.
- return isKnownNeverNaN(II->getArgOperand(0), TLI, Depth + 1) ||
- isKnownNeverNaN(II->getArgOperand(1), TLI, Depth + 1);
- default:
- return false;
+ const fltSemantics &DstTy =
+ Op->getType()->getScalarType()->getFltSemantics();
+ const fltSemantics &SrcTy =
+ Op->getOperand(0)->getType()->getScalarType()->getFltSemantics();
+
+ // All subnormal inputs should be in the normal range in the result type.
+ if (APFloat::isRepresentableAsNormalIn(SrcTy, DstTy))
+ Known.knownNot(fcSubnormal);
+
+ // Sign bit of a nan isn't guaranteed.
+ if (!Known.isKnownNeverNaN())
+ Known.SignBit = std::nullopt;
+ break;
+ }
+ case Instruction::FPTrunc: {
+ computeKnownFPClassForFPTrunc(Op, DemandedElts, InterestedClasses, Known,
+ Depth, Q);
+ break;
+ }
+ case Instruction::SIToFP:
+ case Instruction::UIToFP: {
+ // Cannot produce nan
+ Known.knownNot(fcNan);
+
+ // Integers cannot be subnormal
+ Known.knownNot(fcSubnormal);
+
+ // sitofp and uitofp turn into +0.0 for zero.
+ Known.knownNot(fcNegZero);
+ if (Op->getOpcode() == Instruction::UIToFP)
+ Known.signBitMustBeZero();
+
+ if (InterestedClasses & fcInf) {
+ // Get width of largest magnitude integer (remove a bit if signed).
+ // This still works for a signed minimum value because the largest FP
+ // value is scaled by some fraction close to 2.0 (1.0 + 0.xxxx).
+ int IntSize = Op->getOperand(0)->getType()->getScalarSizeInBits();
+ if (Op->getOpcode() == Instruction::SIToFP)
+ --IntSize;
+
+ // If the exponent of the largest finite FP value can hold the largest
+ // integer, the result of the cast must be finite.
+ Type *FPTy = Op->getType()->getScalarType();
+ if (ilogb(APFloat::getLargest(FPTy->getFltSemantics())) >= IntSize)
+ Known.knownNot(fcInf);
}
+
+ break;
}
+ case Instruction::ExtractElement: {
+ // Look through extract element. If the index is non-constant or
+ // out-of-range demand all elements, otherwise just the extracted element.
+ const Value *Vec = Op->getOperand(0);
+ const Value *Idx = Op->getOperand(1);
+ auto *CIdx = dyn_cast<ConstantInt>(Idx);
- // Try to handle fixed width vector constants
- auto *VFVTy = dyn_cast<FixedVectorType>(V->getType());
- if (VFVTy && isa<Constant>(V)) {
- // For vectors, verify that each element is not NaN.
- unsigned NumElts = VFVTy->getNumElements();
- for (unsigned i = 0; i != NumElts; ++i) {
- Constant *Elt = cast<Constant>(V)->getAggregateElement(i);
- if (!Elt)
- return false;
- if (isa<UndefValue>(Elt))
- continue;
- auto *CElt = dyn_cast<ConstantFP>(Elt);
- if (!CElt || CElt->isNaN())
- return false;
+ if (auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType())) {
+ unsigned NumElts = VecTy->getNumElements();
+ APInt DemandedVecElts = APInt::getAllOnes(NumElts);
+ if (CIdx && CIdx->getValue().ult(NumElts))
+ DemandedVecElts = APInt::getOneBitSet(NumElts, CIdx->getZExtValue());
+ return computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known,
+ Depth + 1, Q);
}
- // All elements were confirmed not-NaN or undefined.
- return true;
+
+ break;
}
+ case Instruction::InsertElement: {
+ if (isa<ScalableVectorType>(Op->getType()))
+ return;
- // Was not able to prove that V never contains NaN
- return false;
+ const Value *Vec = Op->getOperand(0);
+ const Value *Elt = Op->getOperand(1);
+ auto *CIdx = dyn_cast<ConstantInt>(Op->getOperand(2));
+ // Early out if the index is non-constant or out-of-range.
+ unsigned NumElts = DemandedElts.getBitWidth();
+ if (!CIdx || CIdx->getValue().uge(NumElts))
+ return;
+
+ unsigned EltIdx = CIdx->getZExtValue();
+ // Do we demand the inserted element?
+ if (DemandedElts[EltIdx]) {
+ computeKnownFPClass(Elt, Known, InterestedClasses, Depth + 1, Q);
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ } else {
+ Known.KnownFPClasses = fcNone;
+ }
+
+ // We don't need the base vector element that has been inserted.
+ APInt DemandedVecElts = DemandedElts;
+ DemandedVecElts.clearBit(EltIdx);
+ if (!!DemandedVecElts) {
+ KnownFPClass Known2;
+ computeKnownFPClass(Vec, DemandedVecElts, InterestedClasses, Known2,
+ Depth + 1, Q);
+ Known |= Known2;
+ }
+
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ // For undef elements, we don't know anything about the common state of
+ // the shuffle result.
+ APInt DemandedLHS, DemandedRHS;
+ auto *Shuf = dyn_cast<ShuffleVectorInst>(Op);
+ if (!Shuf || !getShuffleDemandedElts(Shuf, DemandedElts, DemandedLHS, DemandedRHS))
+ return;
+
+ if (!!DemandedLHS) {
+ const Value *LHS = Shuf->getOperand(0);
+ computeKnownFPClass(LHS, DemandedLHS, InterestedClasses, Known,
+ Depth + 1, Q);
+
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ } else {
+ Known.KnownFPClasses = fcNone;
+ }
+
+ if (!!DemandedRHS) {
+ KnownFPClass Known2;
+ const Value *RHS = Shuf->getOperand(1);
+ computeKnownFPClass(RHS, DemandedRHS, InterestedClasses, Known2,
+ Depth + 1, Q);
+ Known |= Known2;
+ }
+
+ break;
+ }
+ case Instruction::ExtractValue: {
+ const ExtractValueInst *Extract = cast<ExtractValueInst>(Op);
+ ArrayRef<unsigned> Indices = Extract->getIndices();
+ const Value *Src = Extract->getAggregateOperand();
+ if (isa<StructType>(Src->getType()) && Indices.size() == 1 &&
+ Indices[0] == 0) {
+ if (const auto *II = dyn_cast<IntrinsicInst>(Src)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::frexp: {
+ Known.knownNot(fcSubnormal);
+
+ KnownFPClass KnownSrc;
+ computeKnownFPClass(II->getArgOperand(0), DemandedElts,
+ InterestedClasses, KnownSrc, Depth + 1, Q);
+
+ const Function *F = cast<Instruction>(Op)->getFunction();
+
+ if (KnownSrc.isKnownNever(fcNegative))
+ Known.knownNot(fcNegative);
+ else {
+ if (F && KnownSrc.isKnownNeverLogicalNegZero(*F, Op->getType()))
+ Known.knownNot(fcNegZero);
+ if (KnownSrc.isKnownNever(fcNegInf))
+ Known.knownNot(fcNegInf);
+ }
+
+ if (KnownSrc.isKnownNever(fcPositive))
+ Known.knownNot(fcPositive);
+ else {
+ if (F && KnownSrc.isKnownNeverLogicalPosZero(*F, Op->getType()))
+ Known.knownNot(fcPosZero);
+ if (KnownSrc.isKnownNever(fcPosInf))
+ Known.knownNot(fcPosInf);
+ }
+
+ Known.propagateNaN(KnownSrc);
+ return;
+ }
+ default:
+ break;
+ }
+ }
+ }
+
+ computeKnownFPClass(Src, DemandedElts, InterestedClasses, Known, Depth + 1,
+ Q);
+ break;
+ }
+ case Instruction::PHI: {
+ const PHINode *P = cast<PHINode>(Op);
+ // Unreachable blocks may have zero-operand PHI nodes.
+ if (P->getNumIncomingValues() == 0)
+ break;
+
+ // Otherwise take the unions of the known bit sets of the operands,
+ // taking conservative care to avoid excessive recursion.
+ const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2;
+
+ if (Depth < PhiRecursionLimit) {
+ // Skip if every incoming value references to ourself.
+ if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
+ break;
+
+ bool First = true;
+
+ for (Value *IncValue : P->incoming_values()) {
+ // Skip direct self references.
+ if (IncValue == P)
+ continue;
+
+ KnownFPClass KnownSrc;
+ // Recurse, but cap the recursion to two levels, because we don't want
+ // to waste time spinning around in loops. We need at least depth 2 to
+ // detect known sign bits.
+ computeKnownFPClass(IncValue, DemandedElts, InterestedClasses, KnownSrc,
+ PhiRecursionLimit, Q);
+
+ if (First) {
+ Known = KnownSrc;
+ First = false;
+ } else {
+ Known |= KnownSrc;
+ }
+
+ if (Known.KnownFPClasses == fcAllFlags)
+ break;
+ }
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+KnownFPClass llvm::computeKnownFPClass(
+ const Value *V, const APInt &DemandedElts, const DataLayout &DL,
+ FPClassTest InterestedClasses, unsigned Depth, const TargetLibraryInfo *TLI,
+ AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
+ bool UseInstrInfo) {
+ KnownFPClass KnownClasses;
+ ::computeKnownFPClass(
+ V, DemandedElts, InterestedClasses, KnownClasses, Depth,
+ SimplifyQuery(DL, TLI, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
+ return KnownClasses;
+}
+
+KnownFPClass llvm::computeKnownFPClass(
+ const Value *V, const DataLayout &DL, FPClassTest InterestedClasses,
+ unsigned Depth, const TargetLibraryInfo *TLI, AssumptionCache *AC,
+ const Instruction *CxtI, const DominatorTree *DT, bool UseInstrInfo) {
+ KnownFPClass Known;
+ ::computeKnownFPClass(
+ V, Known, InterestedClasses, Depth,
+ SimplifyQuery(DL, TLI, DT, AC, safeCxtI(V, CxtI), UseInstrInfo));
+ return Known;
}
Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) {
@@ -4530,6 +5773,16 @@ bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing(
case Intrinsic::strip_invariant_group:
case Intrinsic::aarch64_irg:
case Intrinsic::aarch64_tagp:
+ // The amdgcn_make_buffer_rsrc function does not alter the address of the
+ // input pointer (and thus preserve null-ness for the purposes of escape
+ // analysis, which is where the MustPreserveNullness flag comes in to play).
+ // However, it will not necessarily map ptr addrspace(N) null to ptr
+ // addrspace(8) null, aka the "null descriptor", which has "all loads return
+ // 0, all stores are dropped" semantics. Given the context of this intrinsic
+ // list, no one should be relying on such a strict interpretation of
+ // MustPreserveNullness (and, at time of writing, they are not), but we
+ // document this fact out of an abundance of caution.
+ case Intrinsic::amdgcn_make_buffer_rsrc:
return true;
case Intrinsic::ptrmask:
return !MustPreserveNullness;
@@ -4941,11 +6194,10 @@ static OverflowResult mapOverflowResult(ConstantRange::OverflowResult OR) {
static ConstantRange computeConstantRangeIncludingKnownBits(
const Value *V, bool ForSigned, const DataLayout &DL, unsigned Depth,
AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
- OptimizationRemarkEmitter *ORE = nullptr, bool UseInstrInfo = true) {
- KnownBits Known = computeKnownBits(
- V, DL, Depth, AC, CxtI, DT, ORE, UseInstrInfo);
+ bool UseInstrInfo = true) {
+ KnownBits Known = computeKnownBits(V, DL, Depth, AC, CxtI, DT, UseInstrInfo);
ConstantRange CR1 = ConstantRange::fromKnownBits(Known, ForSigned);
- ConstantRange CR2 = computeConstantRange(V, UseInstrInfo);
+ ConstantRange CR2 = computeConstantRange(V, ForSigned, UseInstrInfo);
ConstantRange::PreferredRangeType RangeType =
ForSigned ? ConstantRange::Signed : ConstantRange::Unsigned;
return CR1.intersectWith(CR2, RangeType);
@@ -4956,9 +6208,9 @@ OverflowResult llvm::computeOverflowForUnsignedMul(
AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
bool UseInstrInfo) {
KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
- nullptr, UseInstrInfo);
+ UseInstrInfo);
KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
- nullptr, UseInstrInfo);
+ UseInstrInfo);
ConstantRange LHSRange = ConstantRange::fromKnownBits(LHSKnown, false);
ConstantRange RHSRange = ConstantRange::fromKnownBits(RHSKnown, false);
return mapOverflowResult(LHSRange.unsignedMulMayOverflow(RHSRange));
@@ -4998,9 +6250,9 @@ llvm::computeOverflowForSignedMul(const Value *LHS, const Value *RHS,
// E.g. mul i16 with 17 sign bits: 0xff00 * 0xff80 = 0x8000
// For simplicity we just check if at least one side is not negative.
KnownBits LHSKnown = computeKnownBits(LHS, DL, /*Depth=*/0, AC, CxtI, DT,
- nullptr, UseInstrInfo);
+ UseInstrInfo);
KnownBits RHSKnown = computeKnownBits(RHS, DL, /*Depth=*/0, AC, CxtI, DT,
- nullptr, UseInstrInfo);
+ UseInstrInfo);
if (LHSKnown.isNonNegative() || RHSKnown.isNonNegative())
return OverflowResult::NeverOverflows;
}
@@ -5012,11 +6264,9 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(
AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT,
bool UseInstrInfo) {
ConstantRange LHSRange = computeConstantRangeIncludingKnownBits(
- LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT,
- nullptr, UseInstrInfo);
+ LHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, UseInstrInfo);
ConstantRange RHSRange = computeConstantRangeIncludingKnownBits(
- RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT,
- nullptr, UseInstrInfo);
+ RHS, /*ForSigned=*/false, DL, /*Depth=*/0, AC, CxtI, DT, UseInstrInfo);
return mapOverflowResult(LHSRange.unsignedAddMayOverflow(RHSRange));
}
@@ -5074,7 +6324,8 @@ static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) {
KnownBits AddKnown(LHSRange.getBitWidth());
computeKnownBitsFromAssume(
- Add, AddKnown, /*Depth=*/0, Query(DL, AC, CxtI, DT, true));
+ Add, AddKnown, /*Depth=*/0,
+ SimplifyQuery(DL, /*TLI*/ nullptr, DT, AC, CxtI, DT));
if ((AddKnown.isNonNegative() && LHSOrRHSKnownNonNegative) ||
(AddKnown.isNegative() && LHSOrRHSKnownNegative))
return OverflowResult::NeverOverflows;
@@ -5346,7 +6597,7 @@ static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly,
ArrayRef<int> Mask = isa<ConstantExpr>(Op)
? cast<ConstantExpr>(Op)->getShuffleMask()
: cast<ShuffleVectorInst>(Op)->getShuffleMask();
- return is_contained(Mask, UndefMaskElem);
+ return is_contained(Mask, PoisonMaskElem);
}
case Instruction::FNeg:
case Instruction::PHI:
@@ -5421,7 +6672,7 @@ static bool directlyImpliesPoison(const Value *ValAssumedPoison,
static bool impliesPoison(const Value *ValAssumedPoison, const Value *V,
unsigned Depth) {
- if (isGuaranteedNotToBeUndefOrPoison(ValAssumedPoison))
+ if (isGuaranteedNotToBePoison(ValAssumedPoison))
return true;
if (directlyImpliesPoison(ValAssumedPoison, V, /* Depth */ 0))
@@ -5459,7 +6710,9 @@ static bool isGuaranteedNotToBeUndefOrPoison(const Value *V,
return false;
if (const auto *A = dyn_cast<Argument>(V)) {
- if (A->hasAttribute(Attribute::NoUndef))
+ if (A->hasAttribute(Attribute::NoUndef) ||
+ A->hasAttribute(Attribute::Dereferenceable) ||
+ A->hasAttribute(Attribute::DereferenceableOrNull))
return true;
}
@@ -5592,6 +6845,50 @@ bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC,
return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, true);
}
+/// Return true if undefined behavior would provably be executed on the path to
+/// OnPathTo if Root produced a posion result. Note that this doesn't say
+/// anything about whether OnPathTo is actually executed or whether Root is
+/// actually poison. This can be used to assess whether a new use of Root can
+/// be added at a location which is control equivalent with OnPathTo (such as
+/// immediately before it) without introducing UB which didn't previously
+/// exist. Note that a false result conveys no information.
+bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
+ Instruction *OnPathTo,
+ DominatorTree *DT) {
+ // Basic approach is to assume Root is poison, propagate poison forward
+ // through all users we can easily track, and then check whether any of those
+ // users are provable UB and must execute before out exiting block might
+ // exit.
+
+ // The set of all recursive users we've visited (which are assumed to all be
+ // poison because of said visit)
+ SmallSet<const Value *, 16> KnownPoison;
+ SmallVector<const Instruction*, 16> Worklist;
+ Worklist.push_back(Root);
+ while (!Worklist.empty()) {
+ const Instruction *I = Worklist.pop_back_val();
+
+ // If we know this must trigger UB on a path leading our target.
+ if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
+ return true;
+
+ // If we can't analyze propagation through this instruction, just skip it
+ // and transitive users. Safe as false is a conservative result.
+ if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) {
+ return KnownPoison.contains(U) && propagatesPoison(U);
+ }))
+ continue;
+
+ if (KnownPoison.insert(I).second)
+ for (const User *User : I->users())
+ Worklist.push_back(cast<Instruction>(User));
+ }
+
+ // Might be non-UB, or might have a path we couldn't prove must execute on
+ // way to exiting bb.
+ return false;
+}
+
OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
const DataLayout &DL,
AssumptionCache *AC,
@@ -5756,7 +7053,8 @@ void llvm::getGuaranteedWellDefinedOps(
Operands.push_back(CB->getCalledOperand());
for (unsigned i = 0; i < CB->arg_size(); ++i) {
if (CB->paramHasAttr(i, Attribute::NoUndef) ||
- CB->paramHasAttr(i, Attribute::Dereferenceable))
+ CB->paramHasAttr(i, Attribute::Dereferenceable) ||
+ CB->paramHasAttr(i, Attribute::DereferenceableOrNull))
Operands.push_back(CB->getArgOperand(i));
}
break;
@@ -5796,7 +7094,7 @@ void llvm::getGuaranteedNonPoisonOps(const Instruction *I,
}
bool llvm::mustTriggerUB(const Instruction *I,
- const SmallSet<const Value *, 16>& KnownPoison) {
+ const SmallPtrSetImpl<const Value *> &KnownPoison) {
SmallVector<const Value *, 4> NonPoisonOps;
getGuaranteedNonPoisonOps(I, NonPoisonOps);
@@ -5882,6 +7180,15 @@ static bool programUndefinedIfUndefOrPoison(const Value *V,
break;
}
}
+
+ // Special handling for select, which returns poison if its operand 0 is
+ // poison (handled in the loop above) *or* if both its true/false operands
+ // are poison (handled here).
+ if (I.getOpcode() == Instruction::Select &&
+ YieldsPoison.count(I.getOperand(1)) &&
+ YieldsPoison.count(I.getOperand(2))) {
+ YieldsPoison.insert(&I);
+ }
}
BB = BB->getSingleSuccessor();
@@ -6618,6 +7925,12 @@ Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) {
case Intrinsic::smin: return Intrinsic::smax;
case Intrinsic::umax: return Intrinsic::umin;
case Intrinsic::umin: return Intrinsic::umax;
+ // Please note that next four intrinsics may produce the same result for
+ // original and inverted case even if X != Y due to NaN is handled specially.
+ case Intrinsic::maximum: return Intrinsic::minimum;
+ case Intrinsic::minimum: return Intrinsic::maximum;
+ case Intrinsic::maxnum: return Intrinsic::minnum;
+ case Intrinsic::minnum: return Intrinsic::maxnum;
default: llvm_unreachable("Unexpected intrinsic");
}
}
@@ -6765,6 +8078,10 @@ static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
if (match(RHS, m_NUWAdd(m_Specific(LHS), m_APInt(C))))
return true;
+ // RHS >> V u<= RHS for any V
+ if (match(LHS, m_LShr(m_Specific(RHS), m_Value())))
+ return true;
+
// Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B,
const Value *&X,
@@ -6813,12 +8130,26 @@ isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS,
return true;
return std::nullopt;
+ case CmpInst::ICMP_SGT:
+ case CmpInst::ICMP_SGE:
+ if (isTruePredicate(CmpInst::ICMP_SLE, ALHS, BLHS, DL, Depth) &&
+ isTruePredicate(CmpInst::ICMP_SLE, BRHS, ARHS, DL, Depth))
+ return true;
+ return std::nullopt;
+
case CmpInst::ICMP_ULT:
case CmpInst::ICMP_ULE:
if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth) &&
isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth))
return true;
return std::nullopt;
+
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_UGE:
+ if (isTruePredicate(CmpInst::ICMP_ULE, ALHS, BLHS, DL, Depth) &&
+ isTruePredicate(CmpInst::ICMP_ULE, BRHS, ARHS, DL, Depth))
+ return true;
+ return std::nullopt;
}
}
@@ -7119,7 +8450,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
} else if (match(BO.getOperand(0), m_APInt(C))) {
unsigned ShiftAmount = Width - 1;
if (!C->isZero() && IIQ.isExact(&BO))
- ShiftAmount = C->countTrailingZeros();
+ ShiftAmount = C->countr_zero();
if (C->isNegative()) {
// 'ashr C, x' produces [C, C >> (Width-1)]
Lower = *C;
@@ -7140,7 +8471,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
// 'lshr C, x' produces [C >> (Width-1), C].
unsigned ShiftAmount = Width - 1;
if (!C->isZero() && IIQ.isExact(&BO))
- ShiftAmount = C->countTrailingZeros();
+ ShiftAmount = C->countr_zero();
Lower = C->lshr(ShiftAmount);
Upper = *C + 1;
}
@@ -7151,16 +8482,16 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
if (IIQ.hasNoUnsignedWrap(&BO)) {
// 'shl nuw C, x' produces [C, C << CLZ(C)]
Lower = *C;
- Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
+ Upper = Lower.shl(Lower.countl_zero()) + 1;
} else if (BO.hasNoSignedWrap()) { // TODO: What if both nuw+nsw?
if (C->isNegative()) {
// 'shl nsw C, x' produces [C << CLO(C)-1, C]
- unsigned ShiftAmount = C->countLeadingOnes() - 1;
+ unsigned ShiftAmount = C->countl_one() - 1;
Lower = C->shl(ShiftAmount);
Upper = *C + 1;
} else {
// 'shl nsw C, x' produces [C, C << CLZ(C)-1]
- unsigned ShiftAmount = C->countLeadingZeros() - 1;
+ unsigned ShiftAmount = C->countl_zero() - 1;
Lower = *C;
Upper = C->shl(ShiftAmount) + 1;
}
@@ -7177,7 +8508,7 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
// where C != -1 and C != 0 and C != 1
Lower = IntMin + 1;
Upper = IntMax + 1;
- } else if (C->countLeadingZeros() < Width - 1) {
+ } else if (C->countl_zero() < Width - 1) {
// 'sdiv x, C' produces [INT_MIN / C, INT_MAX / C]
// where C != -1 and C != 0 and C != 1
Lower = IntMin.sdiv(*C);
@@ -7229,67 +8560,67 @@ static void setLimitsForBinOp(const BinaryOperator &BO, APInt &Lower,
}
}
-static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower,
- APInt &Upper) {
- unsigned Width = Lower.getBitWidth();
+static ConstantRange getRangeForIntrinsic(const IntrinsicInst &II) {
+ unsigned Width = II.getType()->getScalarSizeInBits();
const APInt *C;
switch (II.getIntrinsicID()) {
case Intrinsic::ctpop:
case Intrinsic::ctlz:
case Intrinsic::cttz:
// Maximum of set/clear bits is the bit width.
- assert(Lower == 0 && "Expected lower bound to be zero");
- Upper = Width + 1;
- break;
+ return ConstantRange::getNonEmpty(APInt::getZero(Width),
+ APInt(Width, Width + 1));
case Intrinsic::uadd_sat:
// uadd.sat(x, C) produces [C, UINT_MAX].
if (match(II.getOperand(0), m_APInt(C)) ||
match(II.getOperand(1), m_APInt(C)))
- Lower = *C;
+ return ConstantRange::getNonEmpty(*C, APInt::getZero(Width));
break;
case Intrinsic::sadd_sat:
if (match(II.getOperand(0), m_APInt(C)) ||
match(II.getOperand(1), m_APInt(C))) {
- if (C->isNegative()) {
+ if (C->isNegative())
// sadd.sat(x, -C) produces [SINT_MIN, SINT_MAX + (-C)].
- Lower = APInt::getSignedMinValue(Width);
- Upper = APInt::getSignedMaxValue(Width) + *C + 1;
- } else {
- // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
- Lower = APInt::getSignedMinValue(Width) + *C;
- Upper = APInt::getSignedMaxValue(Width) + 1;
- }
+ return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
+ APInt::getSignedMaxValue(Width) + *C +
+ 1);
+
+ // sadd.sat(x, +C) produces [SINT_MIN + C, SINT_MAX].
+ return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) + *C,
+ APInt::getSignedMaxValue(Width) + 1);
}
break;
case Intrinsic::usub_sat:
// usub.sat(C, x) produces [0, C].
if (match(II.getOperand(0), m_APInt(C)))
- Upper = *C + 1;
+ return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1);
+
// usub.sat(x, C) produces [0, UINT_MAX - C].
- else if (match(II.getOperand(1), m_APInt(C)))
- Upper = APInt::getMaxValue(Width) - *C + 1;
+ if (match(II.getOperand(1), m_APInt(C)))
+ return ConstantRange::getNonEmpty(APInt::getZero(Width),
+ APInt::getMaxValue(Width) - *C + 1);
break;
case Intrinsic::ssub_sat:
if (match(II.getOperand(0), m_APInt(C))) {
- if (C->isNegative()) {
+ if (C->isNegative())
// ssub.sat(-C, x) produces [SINT_MIN, -SINT_MIN + (-C)].
- Lower = APInt::getSignedMinValue(Width);
- Upper = *C - APInt::getSignedMinValue(Width) + 1;
- } else {
- // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
- Lower = *C - APInt::getSignedMaxValue(Width);
- Upper = APInt::getSignedMaxValue(Width) + 1;
- }
+ return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
+ *C - APInt::getSignedMinValue(Width) +
+ 1);
+
+ // ssub.sat(+C, x) produces [-SINT_MAX + C, SINT_MAX].
+ return ConstantRange::getNonEmpty(*C - APInt::getSignedMaxValue(Width),
+ APInt::getSignedMaxValue(Width) + 1);
} else if (match(II.getOperand(1), m_APInt(C))) {
- if (C->isNegative()) {
+ if (C->isNegative())
// ssub.sat(x, -C) produces [SINT_MIN - (-C), SINT_MAX]:
- Lower = APInt::getSignedMinValue(Width) - *C;
- Upper = APInt::getSignedMaxValue(Width) + 1;
- } else {
- // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
- Lower = APInt::getSignedMinValue(Width);
- Upper = APInt::getSignedMaxValue(Width) - *C + 1;
- }
+ return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width) - *C,
+ APInt::getSignedMaxValue(Width) + 1);
+
+ // ssub.sat(x, +C) produces [SINT_MIN, SINT_MAX - C].
+ return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
+ APInt::getSignedMaxValue(Width) - *C +
+ 1);
}
break;
case Intrinsic::umin:
@@ -7302,19 +8633,15 @@ static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower,
switch (II.getIntrinsicID()) {
case Intrinsic::umin:
- Upper = *C + 1;
- break;
+ return ConstantRange::getNonEmpty(APInt::getZero(Width), *C + 1);
case Intrinsic::umax:
- Lower = *C;
- break;
+ return ConstantRange::getNonEmpty(*C, APInt::getZero(Width));
case Intrinsic::smin:
- Lower = APInt::getSignedMinValue(Width);
- Upper = *C + 1;
- break;
+ return ConstantRange::getNonEmpty(APInt::getSignedMinValue(Width),
+ *C + 1);
case Intrinsic::smax:
- Lower = *C;
- Upper = APInt::getSignedMaxValue(Width) + 1;
- break;
+ return ConstantRange::getNonEmpty(*C,
+ APInt::getSignedMaxValue(Width) + 1);
default:
llvm_unreachable("Must be min/max intrinsic");
}
@@ -7323,13 +8650,20 @@ static void setLimitsForIntrinsic(const IntrinsicInst &II, APInt &Lower,
// If abs of SIGNED_MIN is poison, then the result is [0..SIGNED_MAX],
// otherwise it is [0..SIGNED_MIN], as -SIGNED_MIN == SIGNED_MIN.
if (match(II.getOperand(1), m_One()))
- Upper = APInt::getSignedMaxValue(Width) + 1;
- else
- Upper = APInt::getSignedMinValue(Width) + 1;
- break;
+ return ConstantRange::getNonEmpty(APInt::getZero(Width),
+ APInt::getSignedMaxValue(Width) + 1);
+
+ return ConstantRange::getNonEmpty(APInt::getZero(Width),
+ APInt::getSignedMinValue(Width) + 1);
+ case Intrinsic::vscale:
+ if (!II.getParent() || !II.getFunction())
+ break;
+ return getVScaleRange(II.getFunction(), Width);
default:
break;
}
+
+ return ConstantRange::getFull(Width);
}
static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
@@ -7418,18 +8752,28 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
InstrInfoQuery IIQ(UseInstrInfo);
unsigned BitWidth = V->getType()->getScalarSizeInBits();
- APInt Lower = APInt(BitWidth, 0);
- APInt Upper = APInt(BitWidth, 0);
- if (auto *BO = dyn_cast<BinaryOperator>(V))
+ ConstantRange CR = ConstantRange::getFull(BitWidth);
+ if (auto *BO = dyn_cast<BinaryOperator>(V)) {
+ APInt Lower = APInt(BitWidth, 0);
+ APInt Upper = APInt(BitWidth, 0);
+ // TODO: Return ConstantRange.
setLimitsForBinOp(*BO, Lower, Upper, IIQ, ForSigned);
- else if (auto *II = dyn_cast<IntrinsicInst>(V))
- setLimitsForIntrinsic(*II, Lower, Upper);
- else if (auto *SI = dyn_cast<SelectInst>(V))
+ CR = ConstantRange::getNonEmpty(Lower, Upper);
+ } else if (auto *II = dyn_cast<IntrinsicInst>(V))
+ CR = getRangeForIntrinsic(*II);
+ else if (auto *SI = dyn_cast<SelectInst>(V)) {
+ APInt Lower = APInt(BitWidth, 0);
+ APInt Upper = APInt(BitWidth, 0);
+ // TODO: Return ConstantRange.
setLimitsForSelectPattern(*SI, Lower, Upper, IIQ);
- else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V))
+ CR = ConstantRange::getNonEmpty(Lower, Upper);
+ } else if (isa<FPToUIInst>(V) || isa<FPToSIInst>(V)) {
+ APInt Lower = APInt(BitWidth, 0);
+ APInt Upper = APInt(BitWidth, 0);
+ // TODO: Return ConstantRange.
setLimitForFPToI(cast<Instruction>(V), Lower, Upper);
-
- ConstantRange CR = ConstantRange::getNonEmpty(Lower, Upper);
+ CR = ConstantRange::getNonEmpty(Lower, Upper);
+ }
if (auto *I = dyn_cast<Instruction>(V))
if (auto *Range = IIQ.getMetadata(I, LLVMContext::MD_range))
@@ -7440,9 +8784,11 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
for (auto &AssumeVH : AC->assumptionsFor(V)) {
if (!AssumeVH)
continue;
- IntrinsicInst *I = cast<IntrinsicInst>(AssumeVH);
+ CallInst *I = cast<CallInst>(AssumeVH);
assert(I->getParent()->getParent() == CtxI->getParent()->getParent() &&
"Got assumption for the wrong function!");
+ assert(I->getCalledFunction()->getIntrinsicID() == Intrinsic::assume &&
+ "must be an assume intrinsic");
if (!isValidAssumeForContext(I, CtxI, DT))
continue;
@@ -7462,74 +8808,3 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
return CR;
}
-
-static std::optional<int64_t>
-getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) {
- // Skip over the first indices.
- gep_type_iterator GTI = gep_type_begin(GEP);
- for (unsigned i = 1; i != Idx; ++i, ++GTI)
- /*skip along*/;
-
- // Compute the offset implied by the rest of the indices.
- int64_t Offset = 0;
- for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
- ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (!OpC)
- return std::nullopt;
- if (OpC->isZero())
- continue; // No offset.
-
- // Handle struct indices, which add their field offset to the pointer.
- if (StructType *STy = GTI.getStructTypeOrNull()) {
- Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
- continue;
- }
-
- // Otherwise, we have a sequential type like an array or fixed-length
- // vector. Multiply the index by the ElementSize.
- TypeSize Size = DL.getTypeAllocSize(GTI.getIndexedType());
- if (Size.isScalable())
- return std::nullopt;
- Offset += Size.getFixedValue() * OpC->getSExtValue();
- }
-
- return Offset;
-}
-
-std::optional<int64_t> llvm::isPointerOffset(const Value *Ptr1,
- const Value *Ptr2,
- const DataLayout &DL) {
- APInt Offset1(DL.getIndexTypeSizeInBits(Ptr1->getType()), 0);
- APInt Offset2(DL.getIndexTypeSizeInBits(Ptr2->getType()), 0);
- Ptr1 = Ptr1->stripAndAccumulateConstantOffsets(DL, Offset1, true);
- Ptr2 = Ptr2->stripAndAccumulateConstantOffsets(DL, Offset2, true);
-
- // Handle the trivial case first.
- if (Ptr1 == Ptr2)
- return Offset2.getSExtValue() - Offset1.getSExtValue();
-
- const GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
- const GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
-
- // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
- // base. After that base, they may have some number of common (and
- // potentially variable) indices. After that they handle some constant
- // offset, which determines their offset from each other. At this point, we
- // handle no other case.
- if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0) ||
- GEP1->getSourceElementType() != GEP2->getSourceElementType())
- return std::nullopt;
-
- // Skip any common indices and track the GEP types.
- unsigned Idx = 1;
- for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
- if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
- break;
-
- auto IOffset1 = getOffsetFromIndex(GEP1, Idx, DL);
- auto IOffset2 = getOffsetFromIndex(GEP2, Idx, DL);
- if (!IOffset1 || !IOffset2)
- return std::nullopt;
- return *IOffset2 - *IOffset1 + Offset2.getSExtValue() -
- Offset1.getSExtValue();
-}
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 1e48d3e2fbca..87f0bb690477 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -12,6 +12,7 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
@@ -20,7 +21,6 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
@@ -87,6 +87,7 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
case Intrinsic::pow:
case Intrinsic::fma:
case Intrinsic::fmuladd:
+ case Intrinsic::is_fpclass:
case Intrinsic::powi:
case Intrinsic::canonicalize:
case Intrinsic::fptosi_sat:
@@ -104,6 +105,7 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
case Intrinsic::abs:
case Intrinsic::ctlz:
case Intrinsic::cttz:
+ case Intrinsic::is_fpclass:
case Intrinsic::powi:
return (ScalarOpdIdx == 1);
case Intrinsic::smul_fix:
@@ -117,15 +119,17 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
}
bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
- unsigned OpdIdx) {
+ int OpdIdx) {
switch (ID) {
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat:
+ return OpdIdx == -1 || OpdIdx == 0;
+ case Intrinsic::is_fpclass:
return OpdIdx == 0;
case Intrinsic::powi:
- return OpdIdx == 1;
+ return OpdIdx == -1 || OpdIdx == 1;
default:
- return false;
+ return OpdIdx == -1;
}
}
@@ -146,139 +150,6 @@ Intrinsic::ID llvm::getVectorIntrinsicIDForCall(const CallInst *CI,
return Intrinsic::not_intrinsic;
}
-/// Find the operand of the GEP that should be checked for consecutive
-/// stores. This ignores trailing indices that have no effect on the final
-/// pointer.
-unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
- const DataLayout &DL = Gep->getModule()->getDataLayout();
- unsigned LastOperand = Gep->getNumOperands() - 1;
- TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
-
- // Walk backwards and try to peel off zeros.
- while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
- // Find the type we're currently indexing into.
- gep_type_iterator GEPTI = gep_type_begin(Gep);
- std::advance(GEPTI, LastOperand - 2);
-
- // If it's a type with the same allocation size as the result of the GEP we
- // can peel off the zero index.
- if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
- break;
- --LastOperand;
- }
-
- return LastOperand;
-}
-
-/// If the argument is a GEP, then returns the operand identified by
-/// getGEPInductionOperand. However, if there is some other non-loop-invariant
-/// operand, it returns that instead.
-Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
- GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
- if (!GEP)
- return Ptr;
-
- unsigned InductionOperand = getGEPInductionOperand(GEP);
-
- // Check that all of the gep indices are uniform except for our induction
- // operand.
- for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i)
- if (i != InductionOperand &&
- !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(i)), Lp))
- return Ptr;
- return GEP->getOperand(InductionOperand);
-}
-
-/// If a value has only one user that is a CastInst, return it.
-Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) {
- Value *UniqueCast = nullptr;
- for (User *U : Ptr->users()) {
- CastInst *CI = dyn_cast<CastInst>(U);
- if (CI && CI->getType() == Ty) {
- if (!UniqueCast)
- UniqueCast = CI;
- else
- return nullptr;
- }
- }
- return UniqueCast;
-}
-
-/// Get the stride of a pointer access in a loop. Looks for symbolic
-/// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
-Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
- auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
- if (!PtrTy || PtrTy->isAggregateType())
- return nullptr;
-
- // Try to remove a gep instruction to make the pointer (actually index at this
- // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
- // pointer, otherwise, we are analyzing the index.
- Value *OrigPtr = Ptr;
-
- // The size of the pointer access.
- int64_t PtrAccessSize = 1;
-
- Ptr = stripGetElementPtr(Ptr, SE, Lp);
- const SCEV *V = SE->getSCEV(Ptr);
-
- if (Ptr != OrigPtr)
- // Strip off casts.
- while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V))
- V = C->getOperand();
-
- const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
- if (!S)
- return nullptr;
-
- V = S->getStepRecurrence(*SE);
- if (!V)
- return nullptr;
-
- // Strip off the size of access multiplication if we are still analyzing the
- // pointer.
- if (OrigPtr == Ptr) {
- if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
- if (M->getOperand(0)->getSCEVType() != scConstant)
- return nullptr;
-
- const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
-
- // Huge step value - give up.
- if (APStepVal.getBitWidth() > 64)
- return nullptr;
-
- int64_t StepVal = APStepVal.getSExtValue();
- if (PtrAccessSize != StepVal)
- return nullptr;
- V = M->getOperand(1);
- }
- }
-
- // Strip off casts.
- Type *StripedOffRecurrenceCast = nullptr;
- if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) {
- StripedOffRecurrenceCast = C->getType();
- V = C->getOperand();
- }
-
- // Look for the loop invariant symbolic value.
- const SCEVUnknown *U = dyn_cast<SCEVUnknown>(V);
- if (!U)
- return nullptr;
-
- Value *Stride = U->getValue();
- if (!Lp->isLoopInvariant(Stride))
- return nullptr;
-
- // If we have stripped off the recurrence cast we have to make sure that we
- // return the value that is used in this loop so that we can replace it later.
- if (StripedOffRecurrenceCast)
- Stride = getUniqueCastUse(Stride, Lp, StripedOffRecurrenceCast);
-
- return Stride;
-}
-
/// Given a vector and an element number, see if the scalar value is
/// already around as a register, for example if it were inserted then extracted
/// from the vector.
@@ -574,13 +445,13 @@ void llvm::processShuffleMasks(
int Idx = I * SzDest + K;
if (Idx == Sz)
break;
- if (Mask[Idx] >= Sz || Mask[Idx] == UndefMaskElem)
+ if (Mask[Idx] >= Sz || Mask[Idx] == PoisonMaskElem)
continue;
int SrcRegIdx = Mask[Idx] / SzSrc;
// Add a cost of PermuteTwoSrc for each new source register permute,
// if we have more than one source registers.
if (RegMasks[SrcRegIdx].empty())
- RegMasks[SrcRegIdx].assign(SzDest, UndefMaskElem);
+ RegMasks[SrcRegIdx].assign(SzDest, PoisonMaskElem);
RegMasks[SrcRegIdx][K] = Mask[Idx] % SzSrc;
}
}
@@ -612,8 +483,8 @@ void llvm::processShuffleMasks(
auto &&CombineMasks = [](MutableArrayRef<int> FirstMask,
ArrayRef<int> SecondMask) {
for (int Idx = 0, VF = FirstMask.size(); Idx < VF; ++Idx) {
- if (SecondMask[Idx] != UndefMaskElem) {
- assert(FirstMask[Idx] == UndefMaskElem &&
+ if (SecondMask[Idx] != PoisonMaskElem) {
+ assert(FirstMask[Idx] == PoisonMaskElem &&
"Expected undefined mask element.");
FirstMask[Idx] = SecondMask[Idx] + VF;
}
@@ -621,7 +492,7 @@ void llvm::processShuffleMasks(
};
auto &&NormalizeMask = [](MutableArrayRef<int> Mask) {
for (int Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
- if (Mask[Idx] != UndefMaskElem)
+ if (Mask[Idx] != PoisonMaskElem)
Mask[Idx] = Idx;
}
};
@@ -770,11 +641,9 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
LeaderDemandedBits |= DBits[M];
- uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) -
- llvm::countLeadingZeros(LeaderDemandedBits);
+ uint64_t MinBW = llvm::bit_width(LeaderDemandedBits);
// Round up to a power of 2
- if (!isPowerOf2_64((uint64_t)MinBW))
- MinBW = NextPowerOf2(MinBW);
+ MinBW = llvm::bit_ceil(MinBW);
// We don't modify the types of PHIs. Reductions will already have been
// truncated if possible, and inductions' sizes will have been chosen by
@@ -790,13 +659,32 @@ llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB,
continue;
for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) {
- if (!isa<Instruction>(M))
+ auto *MI = dyn_cast<Instruction>(M);
+ if (!MI)
continue;
Type *Ty = M->getType();
if (Roots.count(M))
- Ty = cast<Instruction>(M)->getOperand(0)->getType();
- if (MinBW < Ty->getScalarSizeInBits())
- MinBWs[cast<Instruction>(M)] = MinBW;
+ Ty = MI->getOperand(0)->getType();
+
+ if (MinBW >= Ty->getScalarSizeInBits())
+ continue;
+
+ // If any of M's operands demand more bits than MinBW then M cannot be
+ // performed safely in MinBW.
+ if (any_of(MI->operands(), [&DB, MinBW](Use &U) {
+ auto *CI = dyn_cast<ConstantInt>(U);
+ // For constants shift amounts, check if the shift would result in
+ // poison.
+ if (CI &&
+ isa<ShlOperator, LShrOperator, AShrOperator>(U.getUser()) &&
+ U.getOperandNo() == 1)
+ return CI->uge(MinBW);
+ uint64_t BW = bit_width(DB.getDemandedBits(&U).getZExtValue());
+ return bit_ceil(BW) > MinBW;
+ }))
+ continue;
+
+ MinBWs[MI] = MinBW;
}
}
@@ -1143,7 +1031,7 @@ bool InterleavedAccessInfo::isStrided(int Stride) {
void InterleavedAccessInfo::collectConstStrideAccesses(
MapVector<Instruction *, StrideDescriptor> &AccessStrideInfo,
- const ValueToValueMap &Strides) {
+ const DenseMap<Value*, const SCEV*> &Strides) {
auto &DL = TheLoop->getHeader()->getModule()->getDataLayout();
// Since it's desired that the load/store instructions be maintained in
@@ -1223,7 +1111,7 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
void InterleavedAccessInfo::analyzeInterleaving(
bool EnablePredicatedInterleavedMemAccesses) {
LLVM_DEBUG(dbgs() << "LV: Analyzing interleaved accesses...\n");
- const ValueToValueMap &Strides = LAI->getSymbolicStrides();
+ const auto &Strides = LAI->getSymbolicStrides();
// Holds all accesses with a constant stride.
MapVector<Instruction *, StrideDescriptor> AccessStrideInfo;
@@ -1239,6 +1127,8 @@ void InterleavedAccessInfo::analyzeInterleaving(
SmallSetVector<InterleaveGroup<Instruction> *, 4> StoreGroups;
// Holds all interleaved load groups temporarily.
SmallSetVector<InterleaveGroup<Instruction> *, 4> LoadGroups;
+ // Groups added to this set cannot have new members added.
+ SmallPtrSet<InterleaveGroup<Instruction> *, 4> CompletedLoadGroups;
// Search in bottom-up program order for pairs of accesses (A and B) that can
// form interleaved load or store groups. In the algorithm below, access A
@@ -1260,19 +1150,22 @@ void InterleavedAccessInfo::analyzeInterleaving(
// Initialize a group for B if it has an allowable stride. Even if we don't
// create a group for B, we continue with the bottom-up algorithm to ensure
// we don't break any of B's dependences.
- InterleaveGroup<Instruction> *Group = nullptr;
+ InterleaveGroup<Instruction> *GroupB = nullptr;
if (isStrided(DesB.Stride) &&
(!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) {
- Group = getInterleaveGroup(B);
- if (!Group) {
+ GroupB = getInterleaveGroup(B);
+ if (!GroupB) {
LLVM_DEBUG(dbgs() << "LV: Creating an interleave group with:" << *B
<< '\n');
- Group = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
+ GroupB = createInterleaveGroup(B, DesB.Stride, DesB.Alignment);
+ } else if (CompletedLoadGroups.contains(GroupB)) {
+ // Skip B if no new instructions can be added to its load group.
+ continue;
}
if (B->mayWriteToMemory())
- StoreGroups.insert(Group);
+ StoreGroups.insert(GroupB);
else
- LoadGroups.insert(Group);
+ LoadGroups.insert(GroupB);
}
for (auto AI = std::next(BI); AI != E; ++AI) {
@@ -1313,6 +1206,16 @@ void InterleavedAccessInfo::analyzeInterleaving(
StoreGroups.remove(StoreGroup);
releaseGroup(StoreGroup);
}
+ // If B is a load and part of an interleave group, no earlier loads can
+ // be added to B's interleave group, because this would mean the load B
+ // would need to be moved across store A. Mark the interleave group as
+ // complete.
+ if (GroupB && isa<LoadInst>(B)) {
+ LLVM_DEBUG(dbgs() << "LV: Marking interleave group for " << *B
+ << " as complete.\n");
+
+ CompletedLoadGroups.insert(GroupB);
+ }
// If a dependence exists and A is not already in a group (or it was
// and we just released it), B might be hoisted above A (if B is a
@@ -1371,18 +1274,18 @@ void InterleavedAccessInfo::analyzeInterleaving(
// The index of A is the index of B plus A's distance to B in multiples
// of the size.
int IndexA =
- Group->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
+ GroupB->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size);
// Try to insert A into B's group.
- if (Group->insertMember(A, IndexA, DesA.Alignment)) {
+ if (GroupB->insertMember(A, IndexA, DesA.Alignment)) {
LLVM_DEBUG(dbgs() << "LV: Inserted:" << *A << '\n'
<< " into the interleave group with" << *B
<< '\n');
- InterleaveGroupMap[A] = Group;
+ InterleaveGroupMap[A] = GroupB;
// Set the first load in program order as the insert position.
if (A->mayReadFromMemory())
- Group->setInsertPos(A);
+ GroupB->setInsertPos(A);
}
} // Iteration over A accesses.
} // Iteration over B accesses.
@@ -1531,10 +1434,10 @@ void InterleaveGroup<Instruction>::addMetadata(Instruction *NewInst) const {
std::string VFABI::mangleTLIVectorName(StringRef VectorName,
StringRef ScalarName, unsigned numArgs,
- ElementCount VF) {
+ ElementCount VF, bool Masked) {
SmallString<256> Buffer;
llvm::raw_svector_ostream Out(Buffer);
- Out << "_ZGV" << VFABI::_LLVM_ << "N";
+ Out << "_ZGV" << VFABI::_LLVM_ << (Masked ? "M" : "N");
if (VF.isScalable())
Out << 'x';
else
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index a9cac4de0c2f..466bdebc001f 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -158,8 +158,7 @@ static const char *isLabelTail(const char *CurPtr) {
LLLexer::LLLexer(StringRef StartBuf, SourceMgr &SM, SMDiagnostic &Err,
LLVMContext &C)
- : CurBuf(StartBuf), ErrorInfo(Err), SM(SM), Context(C), APFloatVal(0.0),
- IgnoreColonInIdentifiers(false) {
+ : CurBuf(StartBuf), ErrorInfo(Err), SM(SM), Context(C) {
CurPtr = CurBuf.begin();
}
@@ -628,6 +627,8 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(amdgpu_gs);
KEYWORD(amdgpu_ps);
KEYWORD(amdgpu_cs);
+ KEYWORD(amdgpu_cs_chain);
+ KEYWORD(amdgpu_cs_chain_preserve);
KEYWORD(amdgpu_kernel);
KEYWORD(amdgpu_gfx);
KEYWORD(tailcc);
@@ -653,6 +654,24 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(inaccessiblememonly);
KEYWORD(inaccessiblemem_or_argmemonly);
+ // nofpclass attribute
+ KEYWORD(all);
+ KEYWORD(nan);
+ KEYWORD(snan);
+ KEYWORD(qnan);
+ KEYWORD(inf);
+ // ninf already a keyword
+ KEYWORD(pinf);
+ KEYWORD(norm);
+ KEYWORD(nnorm);
+ KEYWORD(pnorm);
+ // sub already a keyword
+ KEYWORD(nsub);
+ KEYWORD(psub);
+ KEYWORD(zero);
+ KEYWORD(nzero);
+ KEYWORD(pzero);
+
KEYWORD(type);
KEYWORD(opaque);
@@ -781,7 +800,6 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(versions);
KEYWORD(memProf);
KEYWORD(notcold);
- KEYWORD(notcoldandcold);
#undef KEYWORD
@@ -807,15 +825,7 @@ lltok::Kind LLLexer::LexIdentifier() {
TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context));
TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context));
TYPEKEYWORD("token", Type::getTokenTy(Context));
-
- if (Keyword == "ptr") {
- if (Context.supportsTypedPointers()) {
- Warning("ptr type is only supported in -opaque-pointers mode");
- return lltok::Error;
- }
- TyVal = PointerType::getUnqual(Context);
- return lltok::Type;
- }
+ TYPEKEYWORD("ptr", PointerType::getUnqual(Context));
#undef TYPEKEYWORD
@@ -928,7 +938,8 @@ lltok::Kind LLLexer::LexIdentifier() {
return lltok::EmissionKind;
}
- if (Keyword == "GNU" || Keyword == "None" || Keyword == "Default") {
+ if (Keyword == "GNU" || Keyword == "Apple" || Keyword == "None" ||
+ Keyword == "Default") {
StrVal.assign(Keyword.begin(), Keyword.end());
return lltok::NameTableKind;
}
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 7f451620a08e..5f0d1a76de79 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -150,7 +150,7 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) {
// If the alignment was parsed as an attribute, move to the alignment
// field.
if (MaybeAlign A = FnAttrs.getAlignment()) {
- Fn->setAlignment(A);
+ Fn->setAlignment(*A);
FnAttrs.removeAttribute(Attribute::Alignment);
}
@@ -974,14 +974,6 @@ static void maybeSetDSOLocal(bool DSOLocal, GlobalValue &GV) {
GV.setDSOLocal(true);
}
-static std::string typeComparisonErrorMessage(StringRef Message, Type *Ty1,
- Type *Ty2) {
- std::string ErrString;
- raw_string_ostream ErrOS(ErrString);
- ErrOS << Message << " (" << *Ty1 << " vs " << *Ty2 << ")";
- return ErrOS.str();
-}
-
/// parseAliasOrIFunc:
/// ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
/// OptionalVisibility OptionalDLLStorageClass
@@ -1053,20 +1045,6 @@ bool LLParser::parseAliasOrIFunc(const std::string &Name, LocTy NameLoc,
return error(AliaseeLoc, "An alias or ifunc must have pointer type");
unsigned AddrSpace = PTy->getAddressSpace();
- if (IsAlias) {
- if (!PTy->isOpaqueOrPointeeTypeMatches(Ty))
- return error(
- ExplicitTypeLoc,
- typeComparisonErrorMessage(
- "explicit pointee type doesn't match operand's pointee type", Ty,
- PTy->getNonOpaquePointerElementType()));
- } else {
- if (!PTy->isOpaque() &&
- !PTy->getNonOpaquePointerElementType()->isFunctionTy())
- return error(ExplicitTypeLoc,
- "explicit pointee type should be a function type");
- }
-
GlobalValue *GVal = nullptr;
// See if the alias was forward referenced, if so, prepare to replace the
@@ -1141,9 +1119,9 @@ bool LLParser::parseAliasOrIFunc(const std::string &Name, LocTy NameLoc,
// Insert into the module, we know its name won't collide now.
if (IsAlias)
- M->getAliasList().push_back(GA.release());
+ M->insertAlias(GA.release());
else
- M->getIFuncList().push_back(GI.release());
+ M->insertIFunc(GI.release());
assert(GV->getName() == Name && "Should not be a name conflict!");
return false;
@@ -1279,7 +1257,7 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc,
GV->setUnnamedAddr(UnnamedAddr);
if (GVal) {
- if (GVal->getType() != Ty->getPointerTo(AddrSpace))
+ if (GVal->getAddressSpace() != AddrSpace)
return error(
TyLoc,
"forward reference and definition of global have different types");
@@ -1306,7 +1284,8 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc,
MaybeAlign Alignment;
if (parseOptionalAlignment(Alignment))
return true;
- GV->setAlignment(Alignment);
+ if (Alignment)
+ GV->setAlignment(*Alignment);
} else if (Lex.getKind() == lltok::MetadataVar) {
if (parseGlobalObjectMetadataAttachment(*GV))
return true;
@@ -1468,6 +1447,15 @@ bool LLParser::parseEnumAttribute(Attribute::AttrKind Attr, AttrBuilder &B,
B.addMemoryAttr(*ME);
return false;
}
+ case Attribute::NoFPClass: {
+ if (FPClassTest NoFPClass =
+ static_cast<FPClassTest>(parseNoFPClassAttr())) {
+ B.addNoFPClassAttr(NoFPClass);
+ return false;
+ }
+
+ return true;
+ }
default:
B.addAttribute(Attr);
Lex.Lex();
@@ -1573,22 +1561,12 @@ bool LLParser::parseFnAttributeValuePairs(AttrBuilder &B,
//===----------------------------------------------------------------------===//
static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy) {
- // For opaque pointers, the used global type does not matter. We will later
- // RAUW it with a global/function of the correct type.
- if (PTy->isOpaque())
- return new GlobalVariable(*M, Type::getInt8Ty(M->getContext()), false,
- GlobalValue::ExternalWeakLinkage, nullptr, "",
- nullptr, GlobalVariable::NotThreadLocal,
- PTy->getAddressSpace());
-
- Type *ElemTy = PTy->getNonOpaquePointerElementType();
- if (auto *FT = dyn_cast<FunctionType>(ElemTy))
- return Function::Create(FT, GlobalValue::ExternalWeakLinkage,
- PTy->getAddressSpace(), "", M);
- else
- return new GlobalVariable(
- *M, ElemTy, false, GlobalValue::ExternalWeakLinkage, nullptr, "",
- nullptr, GlobalVariable::NotThreadLocal, PTy->getAddressSpace());
+ // The used global type does not matter. We will later RAUW it with a
+ // global/function of the correct type.
+ return new GlobalVariable(*M, Type::getInt8Ty(M->getContext()), false,
+ GlobalValue::ExternalWeakLinkage, nullptr, "",
+ nullptr, GlobalVariable::NotThreadLocal,
+ PTy->getAddressSpace());
}
Value *LLParser::checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
@@ -2017,6 +1995,8 @@ void LLParser::parseOptionalDLLStorageClass(unsigned &Res) {
/// ::= 'amdgpu_gs'
/// ::= 'amdgpu_ps'
/// ::= 'amdgpu_cs'
+/// ::= 'amdgpu_cs_chain'
+/// ::= 'amdgpu_cs_chain_preserve'
/// ::= 'amdgpu_kernel'
/// ::= 'tailcc'
/// ::= 'cc' UINT
@@ -2064,8 +2044,12 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
case lltok::kw_swiftcc: CC = CallingConv::Swift; break;
case lltok::kw_swifttailcc: CC = CallingConv::SwiftTail; break;
case lltok::kw_x86_intrcc: CC = CallingConv::X86_INTR; break;
- case lltok::kw_hhvmcc: CC = CallingConv::HHVM; break;
- case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break;
+ case lltok::kw_hhvmcc:
+ CC = CallingConv::DUMMY_HHVM;
+ break;
+ case lltok::kw_hhvm_ccc:
+ CC = CallingConv::DUMMY_HHVM_C;
+ break;
case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break;
case lltok::kw_amdgpu_vs: CC = CallingConv::AMDGPU_VS; break;
case lltok::kw_amdgpu_gfx: CC = CallingConv::AMDGPU_Gfx; break;
@@ -2075,6 +2059,12 @@ bool LLParser::parseOptionalCallingConv(unsigned &CC) {
case lltok::kw_amdgpu_gs: CC = CallingConv::AMDGPU_GS; break;
case lltok::kw_amdgpu_ps: CC = CallingConv::AMDGPU_PS; break;
case lltok::kw_amdgpu_cs: CC = CallingConv::AMDGPU_CS; break;
+ case lltok::kw_amdgpu_cs_chain:
+ CC = CallingConv::AMDGPU_CS_Chain;
+ break;
+ case lltok::kw_amdgpu_cs_chain_preserve:
+ CC = CallingConv::AMDGPU_CS_ChainPreserve;
+ break;
case lltok::kw_amdgpu_kernel: CC = CallingConv::AMDGPU_KERNEL; break;
case lltok::kw_tailcc: CC = CallingConv::Tail; break;
case lltok::kw_cc: {
@@ -2257,9 +2247,9 @@ bool LLParser::parseAllocKind(AllocFnKind &Kind) {
static std::optional<MemoryEffects::Location> keywordToLoc(lltok::Kind Tok) {
switch (Tok) {
case lltok::kw_argmem:
- return MemoryEffects::ArgMem;
+ return IRMemLocation::ArgMem;
case lltok::kw_inaccessiblemem:
- return MemoryEffects::InaccessibleMem;
+ return IRMemLocation::InaccessibleMem;
default:
return std::nullopt;
}
@@ -2296,7 +2286,7 @@ std::optional<MemoryEffects> LLParser::parseMemoryAttr() {
bool SeenLoc = false;
do {
- std::optional<MemoryEffects::Location> Loc = keywordToLoc(Lex.getKind());
+ std::optional<IRMemLocation> Loc = keywordToLoc(Lex.getKind());
if (Loc) {
Lex.Lex();
if (!EatIfPresent(lltok::colon)) {
@@ -2335,6 +2325,86 @@ std::optional<MemoryEffects> LLParser::parseMemoryAttr() {
return std::nullopt;
}
+static unsigned keywordToFPClassTest(lltok::Kind Tok) {
+ switch (Tok) {
+ case lltok::kw_all:
+ return fcAllFlags;
+ case lltok::kw_nan:
+ return fcNan;
+ case lltok::kw_snan:
+ return fcSNan;
+ case lltok::kw_qnan:
+ return fcQNan;
+ case lltok::kw_inf:
+ return fcInf;
+ case lltok::kw_ninf:
+ return fcNegInf;
+ case lltok::kw_pinf:
+ return fcPosInf;
+ case lltok::kw_norm:
+ return fcNormal;
+ case lltok::kw_nnorm:
+ return fcNegNormal;
+ case lltok::kw_pnorm:
+ return fcPosNormal;
+ case lltok::kw_sub:
+ return fcSubnormal;
+ case lltok::kw_nsub:
+ return fcNegSubnormal;
+ case lltok::kw_psub:
+ return fcPosSubnormal;
+ case lltok::kw_zero:
+ return fcZero;
+ case lltok::kw_nzero:
+ return fcNegZero;
+ case lltok::kw_pzero:
+ return fcPosZero;
+ default:
+ return 0;
+ }
+}
+
+unsigned LLParser::parseNoFPClassAttr() {
+ unsigned Mask = fcNone;
+
+ Lex.Lex();
+ if (!EatIfPresent(lltok::lparen)) {
+ tokError("expected '('");
+ return 0;
+ }
+
+ do {
+ uint64_t Value = 0;
+ unsigned TestMask = keywordToFPClassTest(Lex.getKind());
+ if (TestMask != 0) {
+ Mask |= TestMask;
+ // TODO: Disallow overlapping masks to avoid copy paste errors
+ } else if (Mask == 0 && Lex.getKind() == lltok::APSInt &&
+ !parseUInt64(Value)) {
+ if (Value == 0 || (Value & ~static_cast<unsigned>(fcAllFlags)) != 0) {
+ error(Lex.getLoc(), "invalid mask value for 'nofpclass'");
+ return 0;
+ }
+
+ if (!EatIfPresent(lltok::rparen)) {
+ error(Lex.getLoc(), "expected ')'");
+ return 0;
+ }
+
+ return Value;
+ } else {
+ error(Lex.getLoc(), "expected nofpclass test mask");
+ return 0;
+ }
+
+ Lex.Lex();
+ if (EatIfPresent(lltok::rparen))
+ return Mask;
+ } while (1);
+
+ llvm_unreachable("unterminated nofpclass attribute");
+}
+
/// parseOptionalCommaAlign
/// ::=
/// ::= ',' align 4
@@ -2573,7 +2643,7 @@ bool LLParser::parseType(Type *&Result, const Twine &Msg, bool AllowVoid) {
// Handle "ptr" opaque pointer type.
//
// Type ::= ptr ('addrspace' '(' uint32 ')')?
- if (Result->isOpaquePointerTy()) {
+ if (Result->isPointerTy()) {
unsigned AddrSpace;
if (parseOptionalAddrSpace(AddrSpace))
return true;
@@ -3236,6 +3306,12 @@ Value *LLParser::PerFunctionState::getVal(const std::string &Name, Type *Ty,
} else {
FwdVal = new Argument(Ty, Name);
}
+ if (FwdVal->getName() != Name) {
+ P.error(Loc, "name is too long which can result in name collisions, "
+ "consider making the name shorter or "
+ "increasing -non-global-value-max-name-size");
+ return nullptr;
+ }
ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
return FwdVal;
@@ -3782,6 +3858,8 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
return error(ID.Loc, "frem constexprs are no longer supported");
case lltok::kw_fneg:
return error(ID.Loc, "fneg constexprs are no longer supported");
+ case lltok::kw_select:
+ return error(ID.Loc, "select constexprs are no longer supported");
case lltok::kw_icmp:
case lltok::kw_fcmp: {
unsigned PredVal, Opc = Lex.getUIntVal();
@@ -3911,8 +3989,7 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
case lltok::kw_getelementptr:
case lltok::kw_shufflevector:
case lltok::kw_insertelement:
- case lltok::kw_extractelement:
- case lltok::kw_select: {
+ case lltok::kw_extractelement: {
unsigned Opc = Lex.getUIntVal();
SmallVector<Constant*, 16> Elts;
bool InBounds = false;
@@ -3925,7 +4002,6 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
if (parseToken(lltok::lparen, "expected '(' in constantexpr"))
return true;
- LocTy ExplicitTypeLoc = Lex.getLoc();
if (Opc == Instruction::GetElementPtr) {
if (parseType(Ty) ||
parseToken(lltok::comma, "expected comma after getelementptr's type"))
@@ -3944,15 +4020,6 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
return error(ID.Loc, "base of getelementptr must be a pointer");
Type *BaseType = Elts[0]->getType();
- auto *BasePointerType = cast<PointerType>(BaseType->getScalarType());
- if (!BasePointerType->isOpaqueOrPointeeTypeMatches(Ty)) {
- return error(
- ExplicitTypeLoc,
- typeComparisonErrorMessage(
- "explicit pointee type doesn't match operand's pointee type",
- Ty, BasePointerType->getNonOpaquePointerElementType()));
- }
-
unsigned GEPWidth =
BaseType->isVectorTy()
? cast<FixedVectorType>(BaseType)->getNumElements()
@@ -3991,13 +4058,6 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
ID.ConstantVal = ConstantExpr::getGetElementPtr(Ty, Elts[0], Indices,
InBounds, InRangeOp);
- } else if (Opc == Instruction::Select) {
- if (Elts.size() != 3)
- return error(ID.Loc, "expected three operands to select");
- if (const char *Reason = SelectInst::areInvalidOperands(Elts[0], Elts[1],
- Elts[2]))
- return error(ID.Loc, Reason);
- ID.ConstantVal = ConstantExpr::getSelect(Elts[0], Elts[1], Elts[2]);
} else if (Opc == Instruction::ShuffleVector) {
if (Elts.size() != 3)
return error(ID.Loc, "expected three operands to shufflevector");
@@ -5993,10 +6053,6 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) {
auto FRVI = ForwardRefVals.find(FunctionName);
if (FRVI != ForwardRefVals.end()) {
FwdFn = FRVI->second.first;
- if (!FwdFn->getType()->isOpaque() &&
- !FwdFn->getType()->getNonOpaquePointerElementType()->isFunctionTy())
- return error(FRVI->second.second, "invalid forward reference to "
- "function as global value!");
if (FwdFn->getType() != PFT)
return error(FRVI->second.second,
"invalid forward reference to "
@@ -6047,7 +6103,8 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) {
Fn->setCallingConv(CC);
Fn->setAttributes(PAL);
Fn->setUnnamedAddr(UnnamedAddr);
- Fn->setAlignment(MaybeAlign(Alignment));
+ if (Alignment)
+ Fn->setAlignment(*Alignment);
Fn->setSection(Section);
Fn->setPartition(Partition);
Fn->setComdat(C);
@@ -7576,13 +7633,6 @@ int LLParser::parseLoad(Instruction *&Inst, PerFunctionState &PFS) {
Ordering == AtomicOrdering::AcquireRelease)
return error(Loc, "atomic load cannot use Release ordering");
- if (!cast<PointerType>(Val->getType())->isOpaqueOrPointeeTypeMatches(Ty)) {
- return error(
- ExplicitTypeLoc,
- typeComparisonErrorMessage(
- "explicit pointee type doesn't match operand's pointee type", Ty,
- Val->getType()->getNonOpaquePointerElementType()));
- }
SmallPtrSet<Type *, 4> Visited;
if (!Alignment && !Ty->isSized(&Visited))
return error(ExplicitTypeLoc, "loading unsized types is not allowed");
@@ -7627,9 +7677,6 @@ int LLParser::parseStore(Instruction *&Inst, PerFunctionState &PFS) {
return error(PtrLoc, "store operand must be a pointer");
if (!Val->getType()->isFirstClassType())
return error(Loc, "store operand must be a first class value");
- if (!cast<PointerType>(Ptr->getType())
- ->isOpaqueOrPointeeTypeMatches(Val->getType()))
- return error(Loc, "stored value and pointer type do not match");
if (isAtomic && !Alignment)
return error(Loc, "atomic store must have explicit non-zero alignment");
if (Ordering == AtomicOrdering::Acquire ||
@@ -7681,12 +7728,6 @@ int LLParser::parseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) {
return tokError("invalid cmpxchg failure ordering");
if (!Ptr->getType()->isPointerTy())
return error(PtrLoc, "cmpxchg operand must be a pointer");
- if (!cast<PointerType>(Ptr->getType())
- ->isOpaqueOrPointeeTypeMatches(Cmp->getType()))
- return error(CmpLoc, "compare value and pointer type do not match");
- if (!cast<PointerType>(Ptr->getType())
- ->isOpaqueOrPointeeTypeMatches(New->getType()))
- return error(NewLoc, "new value and pointer type do not match");
if (Cmp->getType() != New->getType())
return error(NewLoc, "compare value and new value type do not match");
if (!New->getType()->isFirstClassType())
@@ -7772,9 +7813,6 @@ int LLParser::parseAtomicRMW(Instruction *&Inst, PerFunctionState &PFS) {
return tokError("atomicrmw cannot be unordered");
if (!Ptr->getType()->isPointerTy())
return error(PtrLoc, "atomicrmw operand must be a pointer");
- if (!cast<PointerType>(Ptr->getType())
- ->isOpaqueOrPointeeTypeMatches(Val->getType()))
- return error(ValLoc, "atomicrmw value and pointer type do not match");
if (Operation == AtomicRMWInst::Xchg) {
if (!Val->getType()->isIntegerTy() &&
@@ -7843,7 +7881,6 @@ int LLParser::parseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
bool InBounds = EatIfPresent(lltok::kw_inbounds);
Type *Ty = nullptr;
- LocTy ExplicitTypeLoc = Lex.getLoc();
if (parseType(Ty) ||
parseToken(lltok::comma, "expected comma after getelementptr's type") ||
parseTypeAndValue(Ptr, Loc, PFS))
@@ -7854,14 +7891,6 @@ int LLParser::parseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
if (!BasePointerType)
return error(Loc, "base of getelementptr must be a pointer");
- if (!BasePointerType->isOpaqueOrPointeeTypeMatches(Ty)) {
- return error(
- ExplicitTypeLoc,
- typeComparisonErrorMessage(
- "explicit pointee type doesn't match operand's pointee type", Ty,
- BasePointerType->getNonOpaquePointerElementType()));
- }
-
SmallVector<Value*, 16> Indices;
bool AteExtraComma = false;
// GEP returns a vector of pointers if at least one of parameters is a vector.
@@ -7895,6 +7924,11 @@ int LLParser::parseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
if (!Indices.empty() && !Ty->isSized(&Visited))
return error(Loc, "base element of getelementptr must be sized");
+ auto *STy = dyn_cast<StructType>(Ty);
+ if (STy && STy->containsScalableVectorType())
+ return error(Loc, "getelementptr cannot target structure that contains "
+ "scalable vector type");
+
if (!GetElementPtrInst::getIndexedType(Ty, Indices))
return error(Loc, "invalid getelementptr indices");
Inst = GetElementPtrInst::Create(Ty, Ptr, Indices);
@@ -9884,7 +9918,7 @@ bool LLParser::parseMemProfs(std::vector<MIBInfo> &MIBs) {
}
/// AllocType
-/// := ('none'|'notcold'|'cold'|'notcoldandcold')
+/// := ('none'|'notcold'|'cold'|'hot')
bool LLParser::parseAllocType(uint8_t &AllocType) {
switch (Lex.getKind()) {
case lltok::kw_none:
@@ -9896,9 +9930,8 @@ bool LLParser::parseAllocType(uint8_t &AllocType) {
case lltok::kw_cold:
AllocType = (uint8_t)AllocationType::Cold;
break;
- case lltok::kw_notcoldandcold:
- AllocType =
- (uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold;
+ case lltok::kw_hot:
+ AllocType = (uint8_t)AllocationType::Hot;
break;
default:
return error(Lex.getLoc(), "invalid alloc type");
diff --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp
index 035eea81378e..eded892f358a 100644
--- a/llvm/lib/AsmParser/Parser.cpp
+++ b/llvm/lib/AsmParser/Parser.cpp
@@ -28,9 +28,9 @@ static bool parseAssemblyInto(MemoryBufferRef F, Module *M,
std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(F);
SM.AddNewSourceBuffer(std::move(Buf), SMLoc());
- LLVMContext Context;
+ std::optional<LLVMContext> OptContext;
return LLParser(F.getBuffer(), SM, Err, M, Index,
- M ? M->getContext() : Context, Slots)
+ M ? M->getContext() : OptContext.emplace(), Slots)
.Run(UpgradeDebugInfo, DataLayoutCallback);
}
diff --git a/llvm/lib/BinaryFormat/Dwarf.cpp b/llvm/lib/BinaryFormat/Dwarf.cpp
index a9bbe41125b1..e4e5b5dd8c0e 100644
--- a/llvm/lib/BinaryFormat/Dwarf.cpp
+++ b/llvm/lib/BinaryFormat/Dwarf.cpp
@@ -12,8 +12,8 @@
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
using namespace dwarf;
@@ -172,6 +172,40 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
.Default(0);
}
+static StringRef LlvmUserOperationEncodingString(unsigned Encoding) {
+ switch (Encoding) {
+ default:
+ llvm_unreachable("unhandled DWARF operation with LLVM user op");
+#define HANDLE_DW_OP_LLVM_USEROP(ID, NAME) \
+ case DW_OP_LLVM_##NAME: \
+ return "DW_OP_LLVM_" #NAME;
+#include "llvm/BinaryFormat/Dwarf.def"
+ }
+}
+
+static unsigned
+getLlvmUserOperationEncoding(StringRef LlvmUserOperationEncodingString) {
+ unsigned E = StringSwitch<unsigned>(LlvmUserOperationEncodingString)
+#define HANDLE_DW_OP_LLVM_USEROP(ID, NAME) .Case(#NAME, DW_OP_LLVM_##NAME)
+#include "llvm/BinaryFormat/Dwarf.def"
+ .Default(0);
+ assert(E && "unhandled DWARF operation string with LLVM user op");
+ return E;
+}
+
+StringRef llvm::dwarf::SubOperationEncodingString(unsigned OpEncoding,
+ unsigned SubOpEncoding) {
+ assert(OpEncoding == DW_OP_LLVM_user);
+ return LlvmUserOperationEncodingString(SubOpEncoding);
+}
+
+unsigned
+llvm::dwarf::getSubOperationEncoding(unsigned OpEncoding,
+ StringRef SubOperationEncodingString) {
+ assert(OpEncoding == DW_OP_LLVM_user);
+ return getLlvmUserOperationEncoding(SubOperationEncodingString);
+}
+
unsigned llvm::dwarf::OperationVersion(dwarf::LocationAtom Op) {
switch (Op) {
default:
@@ -737,6 +771,7 @@ std::optional<uint8_t> llvm::dwarf::getFixedFormByteSize(dwarf::Form Form,
return 2;
case DW_FORM_strx3:
+ case DW_FORM_addrx3:
return 3;
case DW_FORM_data4:
diff --git a/llvm/lib/BinaryFormat/MachO.cpp b/llvm/lib/BinaryFormat/MachO.cpp
index 02a515c94399..1b5941cf5275 100644
--- a/llvm/lib/BinaryFormat/MachO.cpp
+++ b/llvm/lib/BinaryFormat/MachO.cpp
@@ -7,8 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/BinaryFormat/MachO.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Support/ARMTargetParser.h"
+#include "llvm/TargetParser/ARMTargetParser.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/BinaryFormat/Magic.cpp b/llvm/lib/BinaryFormat/Magic.cpp
index 0e5a5ea279b5..025334f9f3f4 100644
--- a/llvm/lib/BinaryFormat/Magic.cpp
+++ b/llvm/lib/BinaryFormat/Magic.cpp
@@ -228,11 +228,14 @@ file_magic llvm::identify_magic(StringRef Magic) {
return file_magic::coff_object;
break;
- case 0x2d: // YAML '-'
+ case 0x2d: // YAML '-' MachO TBD.
if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
return file_magic::tapi_file;
break;
-
+ case 0x7b: // JSON '{' MachO TBD.
+ return file_magic::tapi_file;
+ break;
+
case 'D': // DirectX container file - DXBC
if (startswith(Magic, "DXBC"))
return file_magic::dxcontainer_object;
@@ -243,6 +246,11 @@ file_magic llvm::identify_magic(StringRef Magic) {
return file_magic::coff_object;
break;
+ case 0x4e: // ARM64X windows
+ if (Magic[1] == char(0xA6))
+ return file_magic::coff_object;
+ break;
+
default:
break;
}
diff --git a/llvm/lib/BinaryFormat/MsgPackDocument.cpp b/llvm/lib/BinaryFormat/MsgPackDocument.cpp
index 81ea4cee1a9d..21ffa35dfb6e 100644
--- a/llvm/lib/BinaryFormat/MsgPackDocument.cpp
+++ b/llvm/lib/BinaryFormat/MsgPackDocument.cpp
@@ -80,6 +80,10 @@ DocNode &DocNode::operator=(StringRef Val) {
*this = getDocument()->getNode(Val);
return *this;
}
+DocNode &DocNode::operator=(MemoryBufferRef Val) {
+ *this = getDocument()->getNode(Val);
+ return *this;
+}
DocNode &DocNode::operator=(bool Val) {
*this = getDocument()->getNode(Val);
return *this;
@@ -167,6 +171,9 @@ bool Document::readFromBlob(
case Type::String:
Node = getNode(Obj.Raw);
break;
+ case Type::Binary:
+ Node = getNode(MemoryBufferRef(Obj.Raw, ""));
+ break;
case Type::Map:
Node = getMapNode();
break;
@@ -277,6 +284,9 @@ void Document::writeToBlob(std::string &Blob) {
case Type::String:
MPWriter.write(Node.getString());
break;
+ case Type::Binary:
+ MPWriter.write(Node.getBinary());
+ break;
case Type::Empty:
llvm_unreachable("unhandled empty msgpack node");
default:
diff --git a/llvm/lib/BinaryFormat/MsgPackReader.cpp b/llvm/lib/BinaryFormat/MsgPackReader.cpp
index 146edaa95b81..2599d75efa08 100644
--- a/llvm/lib/BinaryFormat/MsgPackReader.cpp
+++ b/llvm/lib/BinaryFormat/MsgPackReader.cpp
@@ -74,7 +74,8 @@ Expected<bool> Reader::read(Object &Obj) {
return make_error<StringError>(
"Invalid Float32 with insufficient payload",
std::make_error_code(std::errc::invalid_argument));
- Obj.Float = BitsToFloat(endian::read<uint32_t, Endianness>(Current));
+ Obj.Float =
+ llvm::bit_cast<float>(endian::read<uint32_t, Endianness>(Current));
Current += sizeof(float);
return true;
case FirstByte::Float64:
@@ -83,7 +84,8 @@ Expected<bool> Reader::read(Object &Obj) {
return make_error<StringError>(
"Invalid Float64 with insufficient payload",
std::make_error_code(std::errc::invalid_argument));
- Obj.Float = BitsToDouble(endian::read<uint64_t, Endianness>(Current));
+ Obj.Float =
+ llvm::bit_cast<double>(endian::read<uint64_t, Endianness>(Current));
Current += sizeof(double);
return true;
case FirstByte::Str8:
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index f014521264c1..3797a44c1793 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -17,13 +17,13 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Bitcode/BitcodeCommon.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
#include "llvm/Bitstream/BitstreamReader.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Argument.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/BasicBlock.h"
@@ -72,6 +72,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ModRef.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -1355,13 +1356,7 @@ Type *BitcodeReader::getPtrElementTypeByID(unsigned ID) {
if (!Ty->isPointerTy())
return nullptr;
- Type *ElemTy = getTypeByID(getContainedTypeID(ID, 0));
- if (!ElemTy)
- return nullptr;
-
- assert(cast<PointerType>(Ty)->isOpaqueOrPointeeTypeMatches(ElemTy) &&
- "Incorrect element type");
- return ElemTy;
+ return getTypeByID(getContainedTypeID(ID, 0));
}
unsigned BitcodeReader::getVirtualTypeID(Type *Ty,
@@ -1380,17 +1375,6 @@ unsigned BitcodeReader::getVirtualTypeID(Type *Ty,
return It->second;
}
-#ifndef NDEBUG
- if (!Ty->isOpaquePointerTy()) {
- assert(Ty->getNumContainedTypes() == ChildTypeIDs.size() &&
- "Wrong number of contained types");
- for (auto Pair : zip(Ty->subtypes(), ChildTypeIDs)) {
- assert(std::get<0>(Pair) == getTypeByID(std::get<1>(Pair)) &&
- "Incorrect contained type ID");
- }
- }
-#endif
-
unsigned TypeID = TypeList.size();
TypeList.push_back(Ty);
if (!ChildTypeIDs.empty())
@@ -1399,7 +1383,9 @@ unsigned BitcodeReader::getVirtualTypeID(Type *Ty,
return TypeID;
}
-static bool isConstExprSupported(uint8_t Opcode) {
+static bool isConstExprSupported(const BitcodeConstant *BC) {
+ uint8_t Opcode = BC->Opcode;
+
// These are not real constant expressions, always consider them supported.
if (Opcode >= BitcodeConstant::FirstSpecialOpcode)
return true;
@@ -1412,7 +1398,16 @@ static bool isConstExprSupported(uint8_t Opcode) {
if (Instruction::isBinaryOp(Opcode))
return ConstantExpr::isSupportedBinOp(Opcode);
- return Opcode != Instruction::FNeg;
+ if (Opcode == Instruction::GetElementPtr)
+ return ConstantExpr::isSupportedGetElementPtr(BC->SrcElemTy);
+
+ switch (Opcode) {
+ case Instruction::FNeg:
+ case Instruction::Select:
+ return false;
+ default:
+ return true;
+ }
}
Expected<Value *> BitcodeReader::materializeValue(unsigned StartValID,
@@ -1467,7 +1462,7 @@ Expected<Value *> BitcodeReader::materializeValue(unsigned StartValID,
ConstOps.push_back(C);
// Materialize as constant expression if possible.
- if (isConstExprSupported(BC->Opcode) && ConstOps.size() == Ops.size()) {
+ if (isConstExprSupported(BC) && ConstOps.size() == Ops.size()) {
Constant *C;
if (Instruction::isCast(BC->Opcode)) {
C = UpgradeBitCastExpr(BC->Opcode, ConstOps[0], BC->getType());
@@ -1544,9 +1539,6 @@ Expected<Value *> BitcodeReader::materializeValue(unsigned StartValID,
ArrayRef(ConstOps).drop_front(),
BC->Flags, BC->getInRangeIndex());
break;
- case Instruction::Select:
- C = ConstantExpr::getSelect(ConstOps[0], ConstOps[1], ConstOps[2]);
- break;
case Instruction::ExtractElement:
C = ConstantExpr::getExtractElement(ConstOps[0], ConstOps[1]);
break;
@@ -1928,6 +1920,8 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
return Attribute::JumpTable;
case bitc::ATTR_KIND_MEMORY:
return Attribute::Memory;
+ case bitc::ATTR_KIND_NOFPCLASS:
+ return Attribute::NoFPClass;
case bitc::ATTR_KIND_MIN_SIZE:
return Attribute::MinSize;
case bitc::ATTR_KIND_NAKED:
@@ -2205,6 +2199,9 @@ Error BitcodeReader::parseAttributeGroupBlock() {
B.addAllocKindAttr(static_cast<AllocFnKind>(Record[++i]));
else if (Kind == Attribute::Memory)
B.addMemoryAttr(MemoryEffects::createFromIntValue(Record[++i]));
+ else if (Kind == Attribute::NoFPClass)
+ B.addNoFPClassAttr(
+ static_cast<FPClassTest>(Record[++i] & fcAllFlags));
} else if (Record[i] == 3 || Record[i] == 4) { // String attribute
bool HasValue = (Record[i++] == 4);
SmallString<64> KindStr;
@@ -2369,9 +2366,6 @@ Error BitcodeReader::parseTypeTableBody() {
case bitc::TYPE_CODE_OPAQUE_POINTER: { // OPAQUE_POINTER: [addrspace]
if (Record.size() != 1)
return error("Invalid opaque pointer record");
- if (Context.supportsTypedPointers())
- return error(
- "Opaque pointers are only supported in -opaque-pointers mode");
unsigned AddressSpace = Record[0];
ResultTy = PointerType::get(Context, AddressSpace);
break;
@@ -3273,9 +3267,7 @@ Error BitcodeReader::parseConstants() {
PointeeType = getPtrElementTypeByID(BaseTypeID);
if (!PointeeType)
return error("Missing element type for old-style constant GEP");
- } else if (!OrigPtrTy->isOpaqueOrPointeeTypeMatches(PointeeType))
- return error("Explicit gep operator type does not match pointee type "
- "of pointer operand");
+ }
V = BitcodeConstant::create(Alloc, CurTy,
{Instruction::GetElementPtr, InBounds,
@@ -3693,7 +3685,7 @@ Error BitcodeReader::globalCleanup() {
UpgradedVariables.emplace_back(&GV, Upgraded);
for (auto &Pair : UpgradedVariables) {
Pair.first->eraseFromParent();
- TheModule->getGlobalList().push_back(Pair.second);
+ TheModule->insertGlobalVariable(Pair.second);
}
// Force deallocation of memory for these vectors to favor the client that
@@ -3868,7 +3860,8 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
GlobalVariable *NewGV =
new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, Name,
nullptr, TLM, AddressSpace, ExternallyInitialized);
- NewGV->setAlignment(Alignment);
+ if (Alignment)
+ NewGV->setAlignment(*Alignment);
if (!Section.empty())
NewGV->setSection(Section);
NewGV->setVisibility(Visibility);
@@ -4027,7 +4020,8 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
MaybeAlign Alignment;
if (Error Err = parseAlignmentValue(Record[5], Alignment))
return Err;
- Func->setAlignment(Alignment);
+ if (Alignment)
+ Func->setAlignment(*Alignment);
if (Record[6]) {
if (Record[6] - 1 >= SectionTable.size())
return error("Invalid ID");
@@ -4513,10 +4507,6 @@ Error BitcodeReader::parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata,
Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) {
if (!isa<PointerType>(PtrType))
return error("Load/Store operand is not a pointer type");
-
- if (!cast<PointerType>(PtrType)->isOpaqueOrPointeeTypeMatches(ValType))
- return error("Explicit load/store type does not match pointee "
- "type of pointer operand");
if (!PointerType::isLoadableOrStorableType(ValType))
return error("Cannot load/store from pointer");
return Error::success();
@@ -4943,10 +4933,6 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
if (BasePtr->getType()->isVectorTy())
TyID = getContainedTypeID(TyID);
Ty = getTypeByID(TyID);
- } else if (!cast<PointerType>(BasePtr->getType()->getScalarType())
- ->isOpaqueOrPointeeTypeMatches(Ty)) {
- return error(
- "Explicit gep type does not match pointee type of pointer operand");
}
SmallVector<Value*, 16> GEPIdx;
@@ -5537,9 +5523,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
FTy = dyn_cast_or_null<FunctionType>(getTypeByID(FTyID));
if (!FTy)
return error("Callee is not of pointer to function type");
- } else if (!CalleeTy->isOpaqueOrPointeeTypeMatches(FTy))
- return error("Explicit invoke type does not match pointee type of "
- "callee operand");
+ }
if (Record.size() < FTy->getNumParams() + OpNum)
return error("Insufficient operands to call");
@@ -5633,9 +5617,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
FTy = dyn_cast_or_null<FunctionType>(getTypeByID(FTyID));
if (!FTy)
return error("Callee is not of pointer to function type");
- } else if (!OpTy->isOpaqueOrPointeeTypeMatches(FTy))
- return error("Explicit call type does not match pointee type of "
- "callee operand");
+ }
if (Record.size() < FTy->getNumParams() + OpNum)
return error("Insufficient operands to call");
@@ -6343,9 +6325,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
FTy = dyn_cast_or_null<FunctionType>(getTypeByID(FTyID));
if (!FTy)
return error("Callee is not of pointer to function type");
- } else if (!OpTy->isOpaqueOrPointeeTypeMatches(FTy))
- return error("Explicit call type does not match pointee type of "
- "callee operand");
+ }
if (Record.size() < FTy->getNumParams() + OpNum)
return error("Insufficient operands to call");
@@ -8017,14 +7997,17 @@ Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
return std::move(Index);
}
-static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
- unsigned ID) {
+static Expected<std::pair<bool, bool>>
+getEnableSplitLTOUnitAndUnifiedFlag(BitstreamCursor &Stream,
+ unsigned ID,
+ BitcodeLTOInfo &LTOInfo) {
if (Error Err = Stream.EnterSubBlock(ID))
return std::move(Err);
SmallVector<uint64_t, 64> Record;
while (true) {
BitstreamEntry Entry;
+ std::pair<bool, bool> Result = {false,false};
if (Error E = Stream.advanceSkippingSubblocks().moveInto(Entry))
return std::move(E);
@@ -8032,10 +8015,10 @@ static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
case BitstreamEntry::SubBlock: // Handled for us already.
case BitstreamEntry::Error:
return error("Malformed block");
- case BitstreamEntry::EndBlock:
- // If no flags record found, conservatively return true to mimic
- // behavior before this flag was added.
- return true;
+ case BitstreamEntry::EndBlock: {
+ // If no flags record found, set both flags to false.
+ return Result;
+ }
case BitstreamEntry::Record:
// The interesting case.
break;
@@ -8052,9 +8035,13 @@ static Expected<bool> getEnableSplitLTOUnitFlag(BitstreamCursor &Stream,
case bitc::FS_FLAGS: { // [flags]
uint64_t Flags = Record[0];
// Scan flags.
- assert(Flags <= 0xff && "Unexpected bits in flag");
+ assert(Flags <= 0x2ff && "Unexpected bits in flag");
+
+ bool EnableSplitLTOUnit = Flags & 0x8;
+ bool UnifiedLTO = Flags & 0x200;
+ Result = {EnableSplitLTOUnit, UnifiedLTO};
- return Flags & 0x8;
+ return Result;
}
}
}
@@ -8080,25 +8067,31 @@ Expected<BitcodeLTOInfo> BitcodeModule::getLTOInfo() {
return error("Malformed block");
case BitstreamEntry::EndBlock:
return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/false,
- /*EnableSplitLTOUnit=*/false};
+ /*EnableSplitLTOUnit=*/false, /*UnifiedLTO=*/false};
case BitstreamEntry::SubBlock:
if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) {
- Expected<bool> EnableSplitLTOUnit =
- getEnableSplitLTOUnitFlag(Stream, Entry.ID);
- if (!EnableSplitLTOUnit)
- return EnableSplitLTOUnit.takeError();
- return BitcodeLTOInfo{/*IsThinLTO=*/true, /*HasSummary=*/true,
- *EnableSplitLTOUnit};
+ BitcodeLTOInfo LTOInfo;
+ Expected<std::pair<bool, bool>> Flags =
+ getEnableSplitLTOUnitAndUnifiedFlag(Stream, Entry.ID, LTOInfo);
+ if (!Flags)
+ return Flags.takeError();
+ std::tie(LTOInfo.EnableSplitLTOUnit, LTOInfo.UnifiedLTO) = Flags.get();
+ LTOInfo.IsThinLTO = true;
+ LTOInfo.HasSummary = true;
+ return LTOInfo;
}
if (Entry.ID == bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID) {
- Expected<bool> EnableSplitLTOUnit =
- getEnableSplitLTOUnitFlag(Stream, Entry.ID);
- if (!EnableSplitLTOUnit)
- return EnableSplitLTOUnit.takeError();
- return BitcodeLTOInfo{/*IsThinLTO=*/false, /*HasSummary=*/true,
- *EnableSplitLTOUnit};
+ BitcodeLTOInfo LTOInfo;
+ Expected<std::pair<bool, bool>> Flags =
+ getEnableSplitLTOUnitAndUnifiedFlag(Stream, Entry.ID, LTOInfo);
+ if (!Flags)
+ return Flags.takeError();
+ std::tie(LTOInfo.EnableSplitLTOUnit, LTOInfo.UnifiedLTO) = Flags.get();
+ LTOInfo.IsThinLTO = false;
+ LTOInfo.HasSummary = true;
+ return LTOInfo;
}
// Ignore other sub-blocks.
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 4b5cfedaa99c..0a9a80688a41 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -53,6 +54,7 @@
#include <deque>
#include <iterator>
#include <limits>
+#include <map>
#include <optional>
#include <string>
#include <tuple>
@@ -463,6 +465,9 @@ class MetadataLoader::MetadataLoaderImpl {
bool NeedUpgradeToDIGlobalVariableExpression = false;
bool NeedDeclareExpressionUpgrade = false;
+ /// Map DILocalScope to the enclosing DISubprogram, if any.
+ DenseMap<DILocalScope *, DISubprogram *> ParentSubprogram;
+
/// True if metadata is being parsed for a module being ThinLTO imported.
bool IsImporting = false;
@@ -521,6 +526,84 @@ class MetadataLoader::MetadataLoaderImpl {
}
}
+ DISubprogram *findEnclosingSubprogram(DILocalScope *S) {
+ if (!S)
+ return nullptr;
+ if (auto *SP = ParentSubprogram[S]) {
+ return SP;
+ }
+
+ DILocalScope *InitialScope = S;
+ DenseSet<DILocalScope *> Visited;
+ while (S && !isa<DISubprogram>(S)) {
+ S = dyn_cast_or_null<DILocalScope>(S->getScope());
+ if (Visited.contains(S))
+ break;
+ Visited.insert(S);
+ }
+ ParentSubprogram[InitialScope] = llvm::dyn_cast_or_null<DISubprogram>(S);
+
+ return ParentSubprogram[InitialScope];
+ }
+
+ /// Move local imports from DICompileUnit's 'imports' field to
+ /// DISubprogram's retainedNodes.
+ void upgradeCULocals() {
+ if (NamedMDNode *CUNodes = TheModule.getNamedMetadata("llvm.dbg.cu")) {
+ for (unsigned I = 0, E = CUNodes->getNumOperands(); I != E; ++I) {
+ auto *CU = dyn_cast<DICompileUnit>(CUNodes->getOperand(I));
+ if (!CU)
+ continue;
+
+ if (auto *RawImported = CU->getRawImportedEntities()) {
+ // Collect a set of imported entities to be moved.
+ SetVector<Metadata *> EntitiesToRemove;
+ for (Metadata *Op : CU->getImportedEntities()->operands()) {
+ auto *IE = cast<DIImportedEntity>(Op);
+ if (auto *S = dyn_cast_or_null<DILocalScope>(IE->getScope())) {
+ EntitiesToRemove.insert(IE);
+ }
+ }
+
+ if (!EntitiesToRemove.empty()) {
+ // Make a new list of CU's 'imports'.
+ SmallVector<Metadata *> NewImports;
+ for (Metadata *Op : CU->getImportedEntities()->operands()) {
+ if (!EntitiesToRemove.contains(cast<DIImportedEntity>(Op))) {
+ NewImports.push_back(Op);
+ }
+ }
+
+ // Find DISubprogram corresponding to each entity.
+ std::map<DISubprogram *, SmallVector<Metadata *>> SPToEntities;
+ for (auto *I : EntitiesToRemove) {
+ auto *Entity = cast<DIImportedEntity>(I);
+ if (auto *SP = findEnclosingSubprogram(
+ cast<DILocalScope>(Entity->getScope()))) {
+ SPToEntities[SP].push_back(Entity);
+ }
+ }
+
+ // Update DISubprograms' retainedNodes.
+ for (auto I = SPToEntities.begin(); I != SPToEntities.end(); ++I) {
+ auto *SP = I->first;
+ auto RetainedNodes = SP->getRetainedNodes();
+ SmallVector<Metadata *> MDs(RetainedNodes.begin(),
+ RetainedNodes.end());
+ MDs.append(I->second);
+ SP->replaceRetainedNodes(MDNode::get(Context, MDs));
+ }
+
+ // Remove entities with local scope from CU.
+ CU->replaceImportedEntities(MDTuple::get(Context, NewImports));
+ }
+ }
+ }
+ }
+
+ ParentSubprogram.clear();
+ }
+
/// Remove a leading DW_OP_deref from DIExpressions in a dbg.declare that
/// describes a function argument.
void upgradeDeclareExpressions(Function &F) {
@@ -625,6 +708,7 @@ class MetadataLoader::MetadataLoaderImpl {
void upgradeDebugInfo() {
upgradeCUSubprograms();
upgradeCUVariables();
+ upgradeCULocals();
}
void callMDTypeCallback(Metadata **Val, unsigned TypeID);
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index f85fd86c114c..9416c7f5a03e 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -23,7 +23,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Bitcode/BitcodeCommon.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
@@ -69,6 +68,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -681,6 +681,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
return bitc::ATTR_KIND_ALLOC_KIND;
case Attribute::Memory:
return bitc::ATTR_KIND_MEMORY;
+ case Attribute::NoFPClass:
+ return bitc::ATTR_KIND_NOFPCLASS;
case Attribute::Naked:
return bitc::ATTR_KIND_NAKED;
case Attribute::Nest:
@@ -899,15 +901,8 @@ void ModuleBitcodeWriter::writeTypeTable() {
uint64_t NumBits = VE.computeBitsRequiredForTypeIndicies();
- // Abbrev for TYPE_CODE_POINTER.
- auto Abbv = std::make_shared<BitCodeAbbrev>();
- Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER));
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits));
- Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0
- unsigned PtrAbbrev = Stream.EmitAbbrev(std::move(Abbv));
-
// Abbrev for TYPE_CODE_OPAQUE_POINTER.
- Abbv = std::make_shared<BitCodeAbbrev>();
+ auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_OPAQUE_POINTER));
Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0
unsigned OpaquePtrAbbrev = Stream.EmitAbbrev(std::move(Abbv));
@@ -982,20 +977,11 @@ void ModuleBitcodeWriter::writeTypeTable() {
case Type::PointerTyID: {
PointerType *PTy = cast<PointerType>(T);
unsigned AddressSpace = PTy->getAddressSpace();
- if (PTy->isOpaque()) {
- // OPAQUE_POINTER: [address space]
- Code = bitc::TYPE_CODE_OPAQUE_POINTER;
- TypeVals.push_back(AddressSpace);
- if (AddressSpace == 0)
- AbbrevToUse = OpaquePtrAbbrev;
- } else {
- // POINTER: [pointee type, address space]
- Code = bitc::TYPE_CODE_POINTER;
- TypeVals.push_back(VE.getTypeID(PTy->getNonOpaquePointerElementType()));
- TypeVals.push_back(AddressSpace);
- if (AddressSpace == 0)
- AbbrevToUse = PtrAbbrev;
- }
+ // OPAQUE_POINTER: [address space]
+ Code = bitc::TYPE_CODE_OPAQUE_POINTER;
+ TypeVals.push_back(AddressSpace);
+ if (AddressSpace == 0)
+ AbbrevToUse = OpaquePtrAbbrev;
break;
}
case Type::FunctionTyID: {
@@ -2683,12 +2669,6 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
}
break;
}
- case Instruction::Select:
- Code = bitc::CST_CODE_CE_SELECT;
- Record.push_back(VE.getValueID(C->getOperand(0)));
- Record.push_back(VE.getValueID(C->getOperand(1)));
- Record.push_back(VE.getValueID(C->getOperand(2)));
- break;
case Instruction::ExtractElement:
Code = bitc::CST_CODE_CE_EXTRACTELT;
Record.push_back(VE.getTypeID(C->getOperand(0)->getType()));
@@ -4099,6 +4079,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
// Bits 1-3 are set only in the combined index, skip them.
if (Index->enableSplitLTOUnit())
Flags |= 0x8;
+ if (Index->hasUnifiedLTO())
+ Flags |= 0x200;
+
Stream.EmitRecord(bitc::FS_FLAGS, ArrayRef<uint64_t>{Flags});
if (Index->begin() == Index->end()) {
@@ -4125,7 +4108,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE));
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
- Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // flags
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // flags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // fflags
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs
@@ -4260,8 +4243,9 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
NameVals.clear();
}
- Stream.EmitRecord(bitc::FS_BLOCK_COUNT,
- ArrayRef<uint64_t>{Index->getBlockCount()});
+ if (Index->getBlockCount())
+ Stream.EmitRecord(bitc::FS_BLOCK_COUNT,
+ ArrayRef<uint64_t>{Index->getBlockCount()});
Stream.ExitBlock();
}
@@ -4591,8 +4575,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
}
}
- Stream.EmitRecord(bitc::FS_BLOCK_COUNT,
- ArrayRef<uint64_t>{Index.getBlockCount()});
+ if (Index.getBlockCount())
+ Stream.EmitRecord(bitc::FS_BLOCK_COUNT,
+ ArrayRef<uint64_t>{Index.getBlockCount()});
Stream.ExitBlock();
}
diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index bb71d72256d8..886c4db069f1 100644
--- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -31,7 +32,6 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <utility>
@@ -200,7 +200,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
LLVM_DEBUG(dbgs() << "\tRegs:");
std::vector<unsigned> &DefIndices = State->GetDefIndices();
- for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ for (unsigned Reg = 1; Reg != TRI->getNumRegs(); ++Reg) {
// If Reg is current live, then mark that it can't be renamed as
// we don't know the extent of its live-range anymore (now that it
// has been scheduled). If it is not live but was defined in the
@@ -246,9 +246,8 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(
if ((MO.isDef() && MI.isRegTiedToUseOperand(i)) ||
IsImplicitDefUse(MI, MO)) {
const Register Reg = MO.getReg();
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- PassthruRegs.insert(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ PassthruRegs.insert(SubReg);
}
}
}
@@ -322,8 +321,7 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
// was not live because otherwise, regardless whether we have an explicit
// use of the subregister, the subregister's contents are needed for the
// uses of the superregister.
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubregReg = *SubRegs;
+ for (MCPhysReg SubregReg : TRI->subregs(Reg)) {
if (!State->IsLive(SubregReg)) {
KillIndices[SubregReg] = KillIdx;
DefIndices[SubregReg] = ~0u;
@@ -353,8 +351,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
// dead, or because only a subregister is live at the def. If we
// don't do this the dead def will be incorrectly merged into the
// previous def.
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef()) continue;
+ for (const MachineOperand &MO : MI.all_defs()) {
Register Reg = MO.getReg();
if (Reg == 0) continue;
@@ -778,7 +775,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
#ifndef NDEBUG
LLVM_DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n");
LLVM_DEBUG(dbgs() << "Available regs:");
- for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ for (unsigned Reg = 1; Reg < TRI->getNumRegs(); ++Reg) {
if (!State->IsLive(Reg))
LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));
}
@@ -922,7 +919,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
}
assert(AntiDepReg != 0);
- if (AntiDepReg == 0) continue;
// Determine AntiDepReg's register group.
const unsigned GroupIndex = State->GetGroup(AntiDepReg);
diff --git a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
index 419cb7626945..cece217e645c 100644
--- a/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -132,6 +132,9 @@ class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState {
AggressiveAntiDepBreaker(MachineFunction &MFi,
const RegisterClassInfo &RCI,
TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
+ AggressiveAntiDepBreaker &
+ operator=(const AggressiveAntiDepBreaker &other) = delete;
+ AggressiveAntiDepBreaker(const AggressiveAntiDepBreaker &other) = delete;
~AggressiveAntiDepBreaker() override;
/// Initialize anti-dep breaking for a new basic block.
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index b9579441a0ba..2065bfbd1c44 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -79,8 +79,8 @@ unsigned llvm::ComputeLinearIndex(Type *Ty,
void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
SmallVectorImpl<EVT> *MemVTs,
- SmallVectorImpl<uint64_t> *Offsets,
- uint64_t StartingOffset) {
+ SmallVectorImpl<TypeSize> *Offsets,
+ TypeSize StartingOffset) {
// Given a struct type, recursively traverse the elements.
if (StructType *STy = dyn_cast<StructType>(Ty)) {
// If the Offsets aren't needed, don't query the struct layout. This allows
@@ -92,7 +92,8 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
EE = STy->element_end();
EI != EE; ++EI) {
// Don't compute the element offset if we didn't get a StructLayout above.
- uint64_t EltOffset = SL ? SL->getElementOffset(EI - EB) : 0;
+ TypeSize EltOffset = SL ? SL->getElementOffset(EI - EB)
+ : TypeSize::get(0, StartingOffset.isScalable());
ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets,
StartingOffset + EltOffset);
}
@@ -101,7 +102,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
// Given an array type, recursively traverse the elements.
if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
Type *EltTy = ATy->getElementType();
- uint64_t EltSize = DL.getTypeAllocSize(EltTy).getFixedValue();
+ TypeSize EltSize = DL.getTypeAllocSize(EltTy);
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets,
StartingOffset + i * EltSize);
@@ -120,12 +121,62 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
- SmallVectorImpl<uint64_t> *Offsets,
- uint64_t StartingOffset) {
+ SmallVectorImpl<TypeSize> *Offsets,
+ TypeSize StartingOffset) {
return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets,
StartingOffset);
}
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<TypeSize> *Offsets,
+ uint64_t StartingOffset) {
+ TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
+ return ComputeValueVTs(TLI, DL, Ty, ValueVTs, Offsets, Offset);
+}
+
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *FixedOffsets,
+ uint64_t StartingOffset) {
+ TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
+ SmallVector<TypeSize, 4> Offsets;
+ if (FixedOffsets)
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, &Offsets, Offset);
+ else
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, nullptr, Offset);
+
+ if (FixedOffsets)
+ for (TypeSize Offset : Offsets)
+ FixedOffsets->push_back(Offset.getKnownMinValue());
+}
+
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<EVT> *MemVTs,
+ SmallVectorImpl<TypeSize> *Offsets,
+ uint64_t StartingOffset) {
+ TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
+ return ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, Offsets, Offset);
+}
+
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<EVT> *MemVTs,
+ SmallVectorImpl<uint64_t> *FixedOffsets,
+ uint64_t StartingOffset) {
+ TypeSize Offset = TypeSize::get(StartingOffset, Ty->isScalableTy());
+ SmallVector<TypeSize, 4> Offsets;
+ if (FixedOffsets)
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, &Offsets, Offset);
+ else
+ ComputeValueVTs(TLI, DL, Ty, ValueVTs, MemVTs, nullptr, Offset);
+
+ if (FixedOffsets)
+ for (TypeSize Offset : Offsets)
+ FixedOffsets->push_back(Offset.getKnownMinValue());
+}
+
void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty,
SmallVectorImpl<LLT> &ValueTys,
SmallVectorImpl<uint64_t> *Offsets,
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 22ecc5199742..aab3c2681339 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -194,8 +194,8 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
uint32_t CompUnitCount;
uint32_t LocalTypeUnitCount = 0;
uint32_t ForeignTypeUnitCount = 0;
- uint32_t BucketCount;
- uint32_t NameCount;
+ uint32_t BucketCount = 0;
+ uint32_t NameCount = 0;
uint32_t AbbrevTableSize = 0;
uint32_t AugmentationStringSize = sizeof(AugmentationString);
char AugmentationString[8] = {'L', 'L', 'V', 'M', '0', '7', '0', '0'};
@@ -549,9 +549,13 @@ void llvm::emitDWARF5AccelTable(
SmallVector<unsigned, 1> CUIndex(CUs.size());
int Count = 0;
for (const auto &CU : enumerate(CUs)) {
- if (CU.value()->getCUNode()->getNameTableKind() !=
- DICompileUnit::DebugNameTableKind::Default)
+ switch (CU.value()->getCUNode()->getNameTableKind()) {
+ case DICompileUnit::DebugNameTableKind::Default:
+ case DICompileUnit::DebugNameTableKind::Apple:
+ break;
+ default:
continue;
+ }
CUIndex[CU.index()] = Count++;
assert(CU.index() == CU.value()->getUniqueID());
const DwarfCompileUnit *MainCU =
@@ -660,9 +664,9 @@ void AccelTableBase::HashData::print(raw_ostream &OS) const {
void AccelTableBase::print(raw_ostream &OS) const {
// Print Content.
OS << "Entries: \n";
- for (const auto &Entry : Entries) {
- OS << "Name: " << Entry.first() << "\n";
- for (auto *V : Entry.second.Values)
+ for (const auto &[Name, Data] : Entries) {
+ OS << "Name: " << Name << "\n";
+ for (auto *V : Data.Values)
V->print(OS);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 32d8dc793510..00ee4e1b47a8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include <utility>
@@ -24,7 +25,7 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
}
MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
- static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize();
+ static const uint8_t AddrSize = Asm.MAI->getCodePointerSize();
MCSymbol *EndLabel =
Asm.emitDwarfUnitLength("debug_addr", "Length of contribution");
@@ -65,7 +66,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
: MCSymbolRefExpr::create(I.first, Asm.OutContext);
for (const MCExpr *Entry : Entries)
- Asm.OutStreamer->emitValue(Entry, Asm.getDataLayout().getPointerSize());
+ Asm.OutStreamer->emitValue(Entry, Asm.MAI->getCodePointerSize());
if (EndLabel)
Asm.OutStreamer->emitLabel(EndLabel);
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 8c126d20fc9a..5381dfdd184c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -26,12 +26,11 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/COFF.h"
@@ -39,6 +38,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -67,6 +67,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GCStrategy.h"
#include "llvm/IR/GlobalAlias.h"
@@ -99,6 +100,7 @@
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
+#include "llvm/Object/ELFTypes.h"
#include "llvm/Pass.h"
#include "llvm/Remarks/RemarkStreamer.h"
#include "llvm/Support/Casting.h"
@@ -113,6 +115,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cinttypes>
@@ -128,6 +131,13 @@ using namespace llvm;
#define DEBUG_TYPE "asm-printer"
+static cl::opt<std::string> BasicBlockProfileDump(
+ "mbb-profile-dump", cl::Hidden,
+ cl::desc("Basic block profile dump for external cost modelling. If "
+ "matching up BBs with afterwards, the compilation must be "
+ "performed with -basic-block-sections=labels. Enabling this "
+ "flag during in-process ThinLTO is not supported."));
+
const char DWARFGroupName[] = "dwarf";
const char DWARFGroupDescription[] = "DWARF Emission";
const char DbgTimerName[] = "emit";
@@ -414,6 +424,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
+ AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
}
bool AsmPrinter::doInitialization(Module &M) {
@@ -475,6 +486,11 @@ bool AsmPrinter::doInitialization(Module &M) {
}
}
+ // On AIX, emit bytes for llvm.commandline metadata after .file so that the
+ // C_INFO symbol is preserved if any csect is kept by the linker.
+ if (TM.getTargetTriple().isOSBinFormatXCOFF())
+ emitModuleCommandLines(M);
+
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
for (const auto &I : *MI)
@@ -531,7 +547,7 @@ bool AsmPrinter::doInitialization(Module &M) {
break;
}
assert(MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI ||
- ModuleCFISection != CFISection::EH);
+ usesCFIWithoutEH() || ModuleCFISection != CFISection::EH);
break;
default:
break;
@@ -540,7 +556,7 @@ bool AsmPrinter::doInitialization(Module &M) {
EHStreamer *ES = nullptr;
switch (MAI->getExceptionHandlingType()) {
case ExceptionHandling::None:
- if (!needsCFIForDebug())
+ if (!usesCFIWithoutEH())
break;
[[fallthrough]];
case ExceptionHandling::SjLj:
@@ -585,6 +601,16 @@ bool AsmPrinter::doInitialization(Module &M) {
HI.Handler->beginModule(&M);
}
+ if (!BasicBlockProfileDump.empty()) {
+ std::error_code PossibleFileError;
+ MBBProfileDumpFileOutput = std::make_unique<raw_fd_ostream>(
+ BasicBlockProfileDump, PossibleFileError);
+ if (PossibleFileError) {
+ M.getContext().emitError("Failed to open file for MBB Profile Dump: " +
+ PossibleFileError.message() + "\n");
+ }
+ }
+
return false;
}
@@ -704,8 +730,8 @@ void AsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
if (T.getArch() != Triple::aarch64 || !T.isAndroid())
OutContext.reportError(SMLoc(),
- "Tagged symbols (-fsanitize=memtag-globals) are "
- "only supported on aarch64 + Android.");
+ "tagged symbols (-fsanitize=memtag-globals) are "
+ "only supported on AArch64 Android");
OutStreamer->emitSymbolAttribute(EmittedSym, MAI->getMemtagAttr());
}
@@ -908,13 +934,6 @@ void AsmPrinter::emitFunctionHeader() {
if (F.hasFnAttribute(Attribute::Cold))
OutStreamer->emitSymbolAttribute(CurrentFnSym, MCSA_Cold);
- if (isVerbose()) {
- F.printAsOperand(OutStreamer->getCommentOS(),
- /*PrintType=*/false, F.getParent());
- emitFunctionHeaderComment();
- OutStreamer->getCommentOS() << '\n';
- }
-
// Emit the prefix data.
if (F.hasPrefixData()) {
if (MAI->hasSubsectionsViaSymbols()) {
@@ -958,6 +977,23 @@ void AsmPrinter::emitFunctionHeader() {
CurrentPatchableFunctionEntrySym = CurrentFnBegin;
}
+ // Emit the function prologue data for the indirect call sanitizer.
+ if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
+ assert(MD->getNumOperands() == 2);
+
+ auto *PrologueSig = mdconst::extract<Constant>(MD->getOperand(0));
+ auto *TypeHash = mdconst::extract<Constant>(MD->getOperand(1));
+ emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
+ emitGlobalConstant(F.getParent()->getDataLayout(), TypeHash);
+ }
+
+ if (isVerbose()) {
+ F.printAsOperand(OutStreamer->getCommentOS(),
+ /*PrintType=*/false, F.getParent());
+ emitFunctionHeaderComment();
+ OutStreamer->getCommentOS() << '\n';
+ }
+
// Emit the function descriptor. This is a virtual function to allow targets
// to emit their specific function descriptor. Right now it is only used by
// the AIX target. The PowerPC 64-bit V1 ELF target also uses function
@@ -1005,24 +1041,6 @@ void AsmPrinter::emitFunctionHeader() {
// Emit the prologue data.
if (F.hasPrologueData())
emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData());
-
- // Emit the function prologue data for the indirect call sanitizer.
- if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) {
- assert(TM.getTargetTriple().getArch() == Triple::x86 ||
- TM.getTargetTriple().getArch() == Triple::x86_64);
- assert(MD->getNumOperands() == 2);
-
- auto *PrologueSig = mdconst::extract<Constant>(MD->getOperand(0));
- auto *FTRTTIProxy = mdconst::extract<Constant>(MD->getOperand(1));
- assert(PrologueSig && FTRTTIProxy);
- emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig);
-
- const MCExpr *Proxy = lowerConstant(FTRTTIProxy);
- const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext);
- const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext);
- // Use 32 bit since only small code model is supported.
- OutStreamer->emitValue(PCRel, 4u);
- }
}
/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
@@ -1254,6 +1272,10 @@ AsmPrinter::getFunctionCFISectionType(const Function &F) const {
F.needsUnwindTableEntry())
return CFISection::EH;
+ if (MAI->usesCFIWithoutEH() && F.hasUWTable())
+ return CFISection::EH;
+
+ assert(MMI != nullptr && "Invalid machine module info");
if (MMI->hasDebugInfo() || TM.Options.ForceDwarfFrameSection)
return CFISection::Debug;
@@ -1269,14 +1291,13 @@ bool AsmPrinter::needsSEHMoves() {
return MAI->usesWindowsCFI() && MF->getFunction().needsUnwindTableEntry();
}
-bool AsmPrinter::needsCFIForDebug() const {
- return MAI->getExceptionHandlingType() == ExceptionHandling::None &&
- MAI->doesUseCFIForDebug() && ModuleCFISection == CFISection::Debug;
+bool AsmPrinter::usesCFIWithoutEH() const {
+ return MAI->usesCFIWithoutEH() && ModuleCFISection != CFISection::None;
}
void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType();
- if (!needsCFIForDebug() &&
+ if (!usesCFIWithoutEH() &&
ExceptionHandlingType != ExceptionHandling::DwarfCFI &&
ExceptionHandlingType != ExceptionHandling::ARM)
return;
@@ -1310,21 +1331,16 @@ void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) {
MCConstantExpr::create(FrameOffset, OutContext));
}
-/// Returns the BB metadata to be emitted in the .llvm_bb_addr_map section for a
-/// given basic block. This can be used to capture more precise profile
-/// information. We use the last 4 bits (LSBs) to encode the following
-/// information:
-/// * (1): set if return block (ret or tail call).
-/// * (2): set if ends with a tail call.
-/// * (3): set if exception handling (EH) landing pad.
-/// * (4): set if the block can fall through to its next.
-/// The remaining bits are zero.
-static unsigned getBBAddrMapMetadata(const MachineBasicBlock &MBB) {
+/// Returns the BB metadata to be emitted in the SHT_LLVM_BB_ADDR_MAP section
+/// for a given basic block. This can be used to capture more precise profile
+/// information.
+static uint32_t getBBAddrMapMetadata(const MachineBasicBlock &MBB) {
const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
- return ((unsigned)MBB.isReturnBlock()) |
- ((!MBB.empty() && TII->isTailCall(MBB.back())) << 1) |
- (MBB.isEHPad() << 2) |
- (const_cast<MachineBasicBlock &>(MBB).canFallThrough() << 3);
+ return object::BBAddrMap::BBEntry::Metadata{
+ MBB.isReturnBlock(), !MBB.empty() && TII->isTailCall(MBB.back()),
+ MBB.isEHPad(), const_cast<MachineBasicBlock &>(MBB).canFallThrough(),
+ !MBB.empty() && MBB.rbegin()->isIndirectBranch()}
+ .encode();
}
void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
@@ -1346,7 +1362,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
OutStreamer->AddComment("number of basic blocks");
OutStreamer->emitULEB128IntValue(MF.size());
const MCSymbol *PrevMBBEndSymbol = FunctionSymbol;
- // Emit BB Information for each basic block in the funciton.
+ // Emit BB Information for each basic block in the function.
for (const MachineBasicBlock &MBB : MF) {
const MCSymbol *MBBSymbol =
MBB.isEntryBlock() ? FunctionSymbol : MBB.getSymbol();
@@ -1496,9 +1512,22 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) {
// constants may appear, which will simply be emitted into the current
// section (the user of MD_pcsections decides the format of encoded data).
assert(isa<MDString>(MD.getOperand(0)) && "first operand not a string");
+ bool ConstULEB128 = false;
for (const MDOperand &MDO : MD.operands()) {
if (auto *S = dyn_cast<MDString>(MDO)) {
- SwitchSection(S->getString());
+ // Found string, start of new section!
+ // Find options for this section "<section>!<opts>" - supported options:
+ // C = Compress constant integers of size 2-8 bytes as ULEB128.
+ const StringRef SecWithOpt = S->getString();
+ const size_t OptStart = SecWithOpt.find('!'); // likely npos
+ const StringRef Sec = SecWithOpt.substr(0, OptStart);
+ const StringRef Opts = SecWithOpt.substr(OptStart); // likely empty
+ ConstULEB128 = Opts.find('C') != StringRef::npos;
+#ifndef NDEBUG
+ for (char O : Opts)
+ assert((O == '!' || O == 'C') && "Invalid !pcsections options");
+#endif
+ SwitchSection(Sec);
const MCSymbol *Prev = Syms.front();
for (const MCSymbol *Sym : Syms) {
if (Sym == Prev || !Deltas) {
@@ -1510,17 +1539,30 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) {
// `base + addr`.
emitLabelDifference(Sym, Base, RelativeRelocSize);
} else {
- emitLabelDifference(Sym, Prev, 4);
+ // Emit delta between symbol and previous symbol.
+ if (ConstULEB128)
+ emitLabelDifferenceAsULEB128(Sym, Prev);
+ else
+ emitLabelDifference(Sym, Prev, 4);
}
Prev = Sym;
}
} else {
+ // Emit auxiliary data after PC.
assert(isa<MDNode>(MDO) && "expecting either string or tuple");
const auto *AuxMDs = cast<MDNode>(MDO);
for (const MDOperand &AuxMDO : AuxMDs->operands()) {
assert(isa<ConstantAsMetadata>(AuxMDO) && "expecting a constant");
- const auto *C = cast<ConstantAsMetadata>(AuxMDO);
- emitGlobalConstant(F.getParent()->getDataLayout(), C->getValue());
+ const Constant *C = cast<ConstantAsMetadata>(AuxMDO)->getValue();
+ const DataLayout &DL = F.getParent()->getDataLayout();
+ const uint64_t Size = DL.getTypeStoreSize(C->getType());
+
+ if (auto *CI = dyn_cast<ConstantInt>(C);
+ CI && ConstULEB128 && Size > 1 && Size <= 8) {
+ emitULEB128(CI->getZExtValue());
+ } else {
+ emitGlobalConstant(DL, C);
+ }
}
}
}
@@ -1582,6 +1624,7 @@ void AsmPrinter::emitFunctionBody() {
// Print out code for the function.
bool HasAnyRealCode = false;
int NumInstsInFunction = 0;
+ bool IsEHa = MMI->getModule()->getModuleFlag("eh-asynch");
bool CanDoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
for (auto &MBB : *MF) {
@@ -1620,10 +1663,25 @@ void AsmPrinter::emitFunctionBody() {
emitFrameAlloc(MI);
break;
case TargetOpcode::ANNOTATION_LABEL:
- case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
OutStreamer->emitLabel(MI.getOperand(0).getMCSymbol());
break;
+ case TargetOpcode::EH_LABEL:
+ OutStreamer->emitLabel(MI.getOperand(0).getMCSymbol());
+ // For AsynchEH, insert a Nop if followed by a trap inst
+ // Or the exception won't be caught.
+ // (see MCConstantExpr::create(1,..) in WinException.cpp)
+ // Ignore SDiv/UDiv because a DIV with Const-0 divisor
+ // must have being turned into an UndefValue.
+ // Div with variable opnds won't be the first instruction in
+ // an EH region as it must be led by at least a Load
+ {
+ auto MI2 = std::next(MI.getIterator());
+ if (IsEHa && MI2 != MBB.end() &&
+ (MI2->mayLoadOrStore() || MI2->mayRaiseFPException()))
+ emitNops(1);
+ }
+ break;
case TargetOpcode::INLINEASM:
case TargetOpcode::INLINEASM_BR:
emitInlineAsm(&MI);
@@ -1862,6 +1920,23 @@ void AsmPrinter::emitFunctionBody() {
OutStreamer->getCommentOS() << "-- End function\n";
OutStreamer->addBlankLine();
+
+ // Output MBB ids, function names, and frequencies if the flag to dump
+ // MBB profile information has been set
+ if (MBBProfileDumpFileOutput) {
+ if (!MF->hasBBLabels())
+ MF->getContext().reportError(
+ SMLoc(),
+ "Unable to find BB labels for MBB profile dump. -mbb-profile-dump "
+ "must be called with -basic-block-sections=labels");
+ MachineBlockFrequencyInfo &MBFI =
+ getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI();
+ for (const auto &MBB : *MF) {
+ *MBBProfileDumpFileOutput.get()
+ << MF->getName() << "," << MBB.getBBID() << ","
+ << MBFI.getBlockFreqRelativeToEntryBlock(&MBB) << "\n";
+ }
+ }
}
/// Compute the number of Global Variables that uses a Constant.
@@ -2235,6 +2310,8 @@ bool AsmPrinter::doFinalization(Module &M) {
SmallVector<const GlobalAlias *, 16> AliasStack;
SmallPtrSet<const GlobalAlias *, 16> AliasVisited;
for (const auto &Alias : M.aliases()) {
+ if (Alias.hasAvailableExternallyLinkage())
+ continue;
for (const GlobalAlias *Cur = &Alias; Cur;
Cur = dyn_cast<GlobalAlias>(Cur->getAliasee())) {
if (!AliasVisited.insert(Cur).second)
@@ -2258,7 +2335,9 @@ bool AsmPrinter::doFinalization(Module &M) {
emitModuleIdents(M);
// Emit bytes for llvm.commandline metadata.
- emitModuleCommandLines(M);
+ // The command line metadata is emitted earlier on XCOFF.
+ if (!TM.getTargetTriple().isOSBinFormatXCOFF())
+ emitModuleCommandLines(M);
// Emit .note.GNU-split-stack and .note.GNU-no-split-stack sections if
// split-stack is used.
@@ -2786,6 +2865,22 @@ void AsmPrinter::emitInt16(int Value) const { OutStreamer->emitInt16(Value); }
/// Emit a long directive and value.
void AsmPrinter::emitInt32(int Value) const { OutStreamer->emitInt32(Value); }
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const {
+ if (isVerbose() && Desc)
+ OutStreamer->AddComment(Desc);
+
+ OutStreamer->emitSLEB128IntValue(Value);
+}
+
+void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc,
+ unsigned PadTo) const {
+ if (isVerbose() && Desc)
+ OutStreamer->AddComment(Desc);
+
+ OutStreamer->emitULEB128IntValue(Value, PadTo);
+}
+
/// Emit a long long directive and value.
void AsmPrinter::emitInt64(uint64_t Value) const {
OutStreamer->emitInt64(Value);
@@ -2799,6 +2894,12 @@ void AsmPrinter::emitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size);
}
+/// Emit something like ".uleb128 Hi-Lo".
+void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
+ const MCSymbol *Lo) const {
+ OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
+}
+
/// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
/// where the size in bytes of the directive is specified by Size and Label
/// specifies the label. This implicitly uses .set if it is available.
@@ -3288,7 +3389,8 @@ static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) {
ExtraBitsSize = alignTo(ExtraBitsSize, 8);
ExtraBits = Realigned.getRawData()[0] &
(((uint64_t)-1) >> (64 - ExtraBitsSize));
- Realigned.lshrInPlace(ExtraBitsSize);
+ if (BitWidth >= 64)
+ Realigned.lshrInPlace(ExtraBitsSize);
} else
ExtraBits = Realigned.getRawData()[BitWidth / 64];
}
@@ -3917,16 +4019,18 @@ void AsmPrinter::emitXRayTable() {
Flags, 0, GroupName, F.hasComdat(),
MCSection::NonUniqueID, LinkedToSym);
- if (!TM.Options.XRayOmitFunctionIndex)
+ if (TM.Options.XRayFunctionIndex)
FnSledIndex = OutContext.getELFSection(
- "xray_fn_idx", ELF::SHT_PROGBITS, Flags | ELF::SHF_WRITE, 0,
- GroupName, F.hasComdat(), MCSection::NonUniqueID, LinkedToSym);
+ "xray_fn_idx", ELF::SHT_PROGBITS, Flags, 0, GroupName, F.hasComdat(),
+ MCSection::NonUniqueID, LinkedToSym);
} else if (MF->getSubtarget().getTargetTriple().isOSBinFormatMachO()) {
- InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
+ InstMap = OutContext.getMachOSection("__DATA", "xray_instr_map",
+ MachO::S_ATTR_LIVE_SUPPORT,
SectionKind::getReadOnlyWithRel());
- if (!TM.Options.XRayOmitFunctionIndex)
- FnSledIndex = OutContext.getMachOSection(
- "__DATA", "xray_fn_idx", 0, SectionKind::getReadOnlyWithRel());
+ if (TM.Options.XRayFunctionIndex)
+ FnSledIndex = OutContext.getMachOSection("__DATA", "xray_fn_idx",
+ MachO::S_ATTR_LIVE_SUPPORT,
+ SectionKind::getReadOnly());
} else {
llvm_unreachable("Unsupported target");
}
@@ -3937,7 +4041,8 @@ void AsmPrinter::emitXRayTable() {
// per-function, we are able to create an index entry that will represent the
// range of sleds associated with a function.
auto &Ctx = OutContext;
- MCSymbol *SledsStart = OutContext.createTempSymbol("xray_sleds_start", true);
+ MCSymbol *SledsStart =
+ OutContext.createLinkerPrivateSymbol("xray_sleds_start");
OutStreamer->switchSection(InstMap);
OutStreamer->emitLabel(SledsStart);
for (const auto &Sled : Sleds) {
@@ -3968,8 +4073,17 @@ void AsmPrinter::emitXRayTable() {
OutStreamer->switchSection(FnSledIndex);
OutStreamer->emitCodeAlignment(Align(2 * WordSizeBytes),
&getSubtargetInfo());
- OutStreamer->emitSymbolValue(SledsStart, WordSizeBytes, false);
- OutStreamer->emitSymbolValue(SledsEnd, WordSizeBytes, false);
+ // For Mach-O, use an "l" symbol as the atom of this subsection. The label
+ // difference uses a SUBTRACTOR external relocation which references the
+ // symbol.
+ MCSymbol *Dot = Ctx.createLinkerPrivateSymbol("xray_fn_idx");
+ OutStreamer->emitLabel(Dot);
+ OutStreamer->emitValueImpl(
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(SledsStart, Ctx),
+ MCSymbolRefExpr::create(Dot, Ctx), Ctx),
+ WordSizeBytes);
+ OutStreamer->emitValueImpl(MCConstantExpr::create(Sleds.size(), Ctx),
+ WordSizeBytes);
OutStreamer->switchSection(PrevSection);
}
Sleds.clear();
@@ -4041,7 +4155,7 @@ unsigned int AsmPrinter::getDwarfOffsetByteSize() const {
}
dwarf::FormParams AsmPrinter::getDwarfFormParams() const {
- return {getDwarfVersion(), uint8_t(getPointerSize()),
+ return {getDwarfVersion(), uint8_t(MAI->getCodePointerSize()),
OutStreamer->getContext().getDwarfFormat(),
doesDwarfUseRelocationsAcrossSections()};
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index ecaa64afab4d..21d0d070c247 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -32,28 +32,6 @@ using namespace llvm;
// Dwarf Emission Helper Routines
//===----------------------------------------------------------------------===//
-/// EmitSLEB128 - emit the specified signed leb128 value.
-void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const {
- if (isVerbose() && Desc)
- OutStreamer->AddComment(Desc);
-
- OutStreamer->emitSLEB128IntValue(Value);
-}
-
-void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc,
- unsigned PadTo) const {
- if (isVerbose() && Desc)
- OutStreamer->AddComment(Desc);
-
- OutStreamer->emitULEB128IntValue(Value, PadTo);
-}
-
-/// Emit something like ".uleb128 Hi-Lo".
-void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
- const MCSymbol *Lo) const {
- OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
-}
-
static const char *DecodeDWARFEncoding(unsigned Encoding) {
switch (Encoding) {
case dwarf::DW_EH_PE_absptr:
@@ -130,7 +108,7 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const {
default:
llvm_unreachable("Invalid encoded value.");
case dwarf::DW_EH_PE_absptr:
- return MF->getDataLayout().getPointerSize();
+ return MAI->getCodePointerSize();
case dwarf::DW_EH_PE_udata2:
return 2;
case dwarf::DW_EH_PE_udata4:
@@ -226,58 +204,59 @@ void AsmPrinter::emitCallSiteValue(uint64_t Value, unsigned Encoding) const {
//===----------------------------------------------------------------------===//
void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const {
+ SMLoc Loc = Inst.getLoc();
switch (Inst.getOperation()) {
default:
llvm_unreachable("Unexpected instruction");
case MCCFIInstruction::OpDefCfaOffset:
- OutStreamer->emitCFIDefCfaOffset(Inst.getOffset());
+ OutStreamer->emitCFIDefCfaOffset(Inst.getOffset(), Loc);
break;
case MCCFIInstruction::OpAdjustCfaOffset:
- OutStreamer->emitCFIAdjustCfaOffset(Inst.getOffset());
+ OutStreamer->emitCFIAdjustCfaOffset(Inst.getOffset(), Loc);
break;
case MCCFIInstruction::OpDefCfa:
- OutStreamer->emitCFIDefCfa(Inst.getRegister(), Inst.getOffset());
+ OutStreamer->emitCFIDefCfa(Inst.getRegister(), Inst.getOffset(), Loc);
break;
case MCCFIInstruction::OpDefCfaRegister:
- OutStreamer->emitCFIDefCfaRegister(Inst.getRegister());
+ OutStreamer->emitCFIDefCfaRegister(Inst.getRegister(), Loc);
break;
case MCCFIInstruction::OpLLVMDefAspaceCfa:
OutStreamer->emitCFILLVMDefAspaceCfa(Inst.getRegister(), Inst.getOffset(),
- Inst.getAddressSpace());
+ Inst.getAddressSpace(), Loc);
break;
case MCCFIInstruction::OpOffset:
- OutStreamer->emitCFIOffset(Inst.getRegister(), Inst.getOffset());
+ OutStreamer->emitCFIOffset(Inst.getRegister(), Inst.getOffset(), Loc);
break;
case MCCFIInstruction::OpRegister:
- OutStreamer->emitCFIRegister(Inst.getRegister(), Inst.getRegister2());
+ OutStreamer->emitCFIRegister(Inst.getRegister(), Inst.getRegister2(), Loc);
break;
case MCCFIInstruction::OpWindowSave:
- OutStreamer->emitCFIWindowSave();
+ OutStreamer->emitCFIWindowSave(Loc);
break;
case MCCFIInstruction::OpNegateRAState:
- OutStreamer->emitCFINegateRAState();
+ OutStreamer->emitCFINegateRAState(Loc);
break;
case MCCFIInstruction::OpSameValue:
- OutStreamer->emitCFISameValue(Inst.getRegister());
+ OutStreamer->emitCFISameValue(Inst.getRegister(), Loc);
break;
case MCCFIInstruction::OpGnuArgsSize:
- OutStreamer->emitCFIGnuArgsSize(Inst.getOffset());
+ OutStreamer->emitCFIGnuArgsSize(Inst.getOffset(), Loc);
break;
case MCCFIInstruction::OpEscape:
OutStreamer->AddComment(Inst.getComment());
- OutStreamer->emitCFIEscape(Inst.getValues());
+ OutStreamer->emitCFIEscape(Inst.getValues(), Loc);
break;
case MCCFIInstruction::OpRestore:
- OutStreamer->emitCFIRestore(Inst.getRegister());
+ OutStreamer->emitCFIRestore(Inst.getRegister(), Loc);
break;
case MCCFIInstruction::OpUndefined:
- OutStreamer->emitCFIUndefined(Inst.getRegister());
+ OutStreamer->emitCFIUndefined(Inst.getRegister(), Loc);
break;
case MCCFIInstruction::OpRememberState:
- OutStreamer->emitCFIRememberState();
+ OutStreamer->emitCFIRememberState(Loc);
break;
case MCCFIInstruction::OpRestoreState:
- OutStreamer->emitCFIRestoreState();
+ OutStreamer->emitCFIRestoreState(Loc);
break;
}
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index c1588aaea05e..32674bbeb061 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 0a67c4b6beb6..8161de57b58e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -65,6 +64,7 @@
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cctype>
@@ -488,10 +488,10 @@ void CodeViewDebug::recordLocalVariable(LocalVariable &&Var,
// This variable was inlined. Associate it with the InlineSite.
const DISubprogram *Inlinee = Var.DIVar->getScope()->getSubprogram();
InlineSite &Site = getInlineSite(InlinedAt, Inlinee);
- Site.InlinedLocals.emplace_back(Var);
+ Site.InlinedLocals.emplace_back(std::move(Var));
} else {
// This variable goes into the corresponding lexical scope.
- ScopeVariables[LS].emplace_back(Var);
+ ScopeVariables[LS].emplace_back(std::move(Var));
}
}
@@ -569,7 +569,6 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
case dwarf::DW_LANG_C89:
case dwarf::DW_LANG_C99:
case dwarf::DW_LANG_C11:
- case dwarf::DW_LANG_ObjC:
return SourceLanguage::C;
case dwarf::DW_LANG_C_plus_plus:
case dwarf::DW_LANG_C_plus_plus_03:
@@ -595,6 +594,10 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
return SourceLanguage::Swift;
case dwarf::DW_LANG_Rust:
return SourceLanguage::Rust;
+ case dwarf::DW_LANG_ObjC:
+ return SourceLanguage::ObjC;
+ case dwarf::DW_LANG_ObjC_plus_plus:
+ return SourceLanguage::ObjCpp;
default:
// There's no CodeView representation for this language, and CV doesn't
// have an "unknown" option for the language field, so we'll use MASM,
@@ -788,7 +791,6 @@ void CodeViewDebug::emitObjName() {
// Don't emit the filename if we're writing to stdout or to /dev/null.
PathRef = {};
} else {
- llvm::sys::path::remove_dots(PathStore, /*remove_dot_dot=*/true);
PathRef = PathStore;
}
@@ -1158,7 +1160,14 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
OS.AddComment("Function section index");
OS.emitCOFFSectionIndex(Fn);
OS.AddComment("Flags");
- OS.emitInt8(0);
+ ProcSymFlags ProcFlags = ProcSymFlags::HasOptimizedDebugInfo;
+ if (FI.HasFramePointer)
+ ProcFlags |= ProcSymFlags::HasFP;
+ if (GV->hasFnAttribute(Attribute::NoReturn))
+ ProcFlags |= ProcSymFlags::IsNoReturn;
+ if (GV->hasFnAttribute(Attribute::NoInline))
+ ProcFlags |= ProcSymFlags::IsNoInline;
+ OS.emitInt8(static_cast<uint8_t>(ProcFlags));
// Emit the function display name as a null-terminated string.
OS.AddComment("Function name");
// Truncate the name so we won't overflow the record length field.
@@ -1262,7 +1271,8 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
const TargetFrameLowering *TFI = TSI.getFrameLowering();
const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
- for (const MachineFunction::VariableDbgInfo &VI : MF.getVariableDbgInfo()) {
+ for (const MachineFunction::VariableDbgInfo &VI :
+ MF.getInStackSlotVariableDbgInfo()) {
if (!VI.Var)
continue;
assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
@@ -1290,7 +1300,8 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
// Get the frame register used and the offset.
Register FrameReg;
- StackOffset FrameOffset = TFI->getFrameIndexReference(*Asm->MF, VI.Slot, FrameReg);
+ StackOffset FrameOffset =
+ TFI->getFrameIndexReference(*Asm->MF, VI.getStackSlot(), FrameReg);
uint16_t CVReg = TRI->getCodeViewRegNum(FrameReg);
assert(!FrameOffset.getScalable() &&
@@ -1476,6 +1487,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
CurFn->EncodedLocalFramePtrReg = EncodedFramePtrReg::StackPtr;
CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::StackPtr;
} else {
+ CurFn->HasFramePointer = true;
// If there is an FP, parameters are always relative to it.
CurFn->EncodedParamFramePtrReg = EncodedFramePtrReg::FramePtr;
if (CurFn->HasStackRealignment) {
@@ -1717,12 +1729,13 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
// Otherwise, if it has an upperboud, use (upperbound - lowerbound + 1),
// where lowerbound is from the LowerBound field of the Subrange,
// or the language default lowerbound if that field is unspecified.
- if (auto *CI = Subrange->getCount().dyn_cast<ConstantInt *>())
+ if (auto *CI = dyn_cast_if_present<ConstantInt *>(Subrange->getCount()))
Count = CI->getSExtValue();
- else if (auto *UI = Subrange->getUpperBound().dyn_cast<ConstantInt *>()) {
+ else if (auto *UI = dyn_cast_if_present<ConstantInt *>(
+ Subrange->getUpperBound())) {
// Fortran uses 1 as the default lowerbound; other languages use 0.
int64_t Lowerbound = (moduleIsInFortran()) ? 1 : 0;
- auto *LI = Subrange->getLowerBound().dyn_cast<ConstantInt *>();
+ auto *LI = dyn_cast_if_present<ConstantInt *>(Subrange->getLowerBound());
Lowerbound = (LI) ? LI->getSExtValue() : Lowerbound;
Count = UI->getSExtValue() - Lowerbound + 1;
}
@@ -1793,12 +1806,14 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
}
break;
case dwarf::DW_ATE_complex_float:
+ // The CodeView size for a complex represents the size of
+ // an individual component.
switch (ByteSize) {
- case 2: STK = SimpleTypeKind::Complex16; break;
- case 4: STK = SimpleTypeKind::Complex32; break;
- case 8: STK = SimpleTypeKind::Complex64; break;
- case 10: STK = SimpleTypeKind::Complex80; break;
- case 16: STK = SimpleTypeKind::Complex128; break;
+ case 4: STK = SimpleTypeKind::Complex16; break;
+ case 8: STK = SimpleTypeKind::Complex32; break;
+ case 16: STK = SimpleTypeKind::Complex64; break;
+ case 20: STK = SimpleTypeKind::Complex80; break;
+ case 32: STK = SimpleTypeKind::Complex128; break;
}
break;
case dwarf::DW_ATE_float:
@@ -3279,7 +3294,7 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
// Second, emit each global that is in a comdat into its own .debug$S
// section along with its own symbol substream.
for (const CVGlobalVariable &CVGV : ComdatVariables) {
- const GlobalVariable *GV = CVGV.GVInfo.get<const GlobalVariable *>();
+ const GlobalVariable *GV = cast<const GlobalVariable *>(CVGV.GVInfo);
MCSymbol *GVSym = Asm->getSymbol(GV);
OS.AddComment("Symbol subsection for " +
Twine(GlobalValue::dropLLVMManglingEscape(GV->getName())));
@@ -3388,7 +3403,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
: getFullyQualifiedName(Scope, DIGV->getName());
if (const GlobalVariable *GV =
- CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) {
+ dyn_cast_if_present<const GlobalVariable *>(CVGV.GVInfo)) {
// DataSym record, see SymbolRecord.h for more info. Thread local data
// happens to have the same format as global data.
MCSymbol *GVSym = Asm->getSymbol(GV);
@@ -3403,7 +3418,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
OS.AddComment("DataOffset");
uint64_t Offset = 0;
- if (CVGlobalVariableOffsets.find(DIGV) != CVGlobalVariableOffsets.end())
+ if (CVGlobalVariableOffsets.contains(DIGV))
// Use the offset seen while collecting info on globals.
Offset = CVGlobalVariableOffsets[DIGV];
OS.emitCOFFSecRel32(GVSym, Offset);
@@ -3415,7 +3430,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
emitNullTerminatedSymbolName(OS, QualifiedName, LengthOfDataRecord);
endSymbolRecord(DataEnd);
} else {
- const DIExpression *DIE = CVGV.GVInfo.get<const DIExpression *>();
+ const DIExpression *DIE = cast<const DIExpression *>(CVGV.GVInfo);
assert(DIE->isConstant() &&
"Global constant variables must contain a constant expression.");
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 495822a6e653..1455ac417824 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_CODEVIEWDEBUG_H
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -191,6 +192,8 @@ private:
bool HasStackRealignment = false;
bool HaveLineInfo = false;
+
+ bool HasFramePointer = false;
};
FunctionInfo *CurFn = nullptr;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 308d4b1b5d61..619155cafe92 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -173,9 +173,7 @@ void DIEAbbrevSet::Emit(const AsmPrinter *AP, MCSection *Section) const {
// DIE Implementation
//===----------------------------------------------------------------------===//
-DIE *DIE::getParent() const {
- return Owner.dyn_cast<DIE*>();
-}
+DIE *DIE::getParent() const { return dyn_cast_if_present<DIE *>(Owner); }
DIEAbbrev DIE::generateAbbrev() const {
DIEAbbrev Abbrev(Tag, hasChildren());
@@ -209,7 +207,7 @@ const DIE *DIE::getUnitDie() const {
DIEUnit *DIE::getUnit() const {
const DIE *UnitDie = getUnitDie();
if (UnitDie)
- return UnitDie->Owner.dyn_cast<DIEUnit*>();
+ return dyn_cast_if_present<DIEUnit *>(UnitDie->Owner);
return nullptr;
}
@@ -385,6 +383,7 @@ void DIEInteger::emitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
case dwarf::DW_FORM_strx2:
case dwarf::DW_FORM_addrx2:
case dwarf::DW_FORM_strx3:
+ case dwarf::DW_FORM_addrx3:
case dwarf::DW_FORM_strp:
case dwarf::DW_FORM_ref4:
case dwarf::DW_FORM_data4:
diff --git a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 0b40cdb0c3cc..55a0afcf7a33 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -138,6 +138,9 @@ void DbgValueHistoryMap::trimLocationRanges(
// references if any entries are removed.
SmallVector<size_t, 4> Offsets;
+ LLVM_DEBUG(dbgs() << "Trimming location ranges for function '" << MF.getName()
+ << "'\n");
+
for (auto &Record : VarEntries) {
auto &HistoryMapEntries = Record.second;
if (HistoryMapEntries.empty())
@@ -213,6 +216,8 @@ void DbgValueHistoryMap::trimLocationRanges(
// count of the closing entry, if one exists.
if (EndIndex != NoEntry)
ReferenceCount[EndIndex] -= 1;
+ LLVM_DEBUG(dbgs() << "Dropping value outside scope range of variable: ";
+ StartMI->print(llvm::dbgs()););
}
}
@@ -253,6 +258,8 @@ void DbgValueHistoryMap::trimLocationRanges(
// ToRemove indices are valid after each erase.
for (EntryIndex Idx : llvm::reverse(ToRemove))
HistoryMapEntries.erase(HistoryMapEntries.begin() + Idx);
+ LLVM_DEBUG(llvm::dbgs() << "New HistoryMap('" << LocalVar->getName()
+ << "') size: " << HistoryMapEntries.size() << "\n");
}
}
@@ -555,8 +562,8 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const {
- dbgs() << "DbgValueHistoryMap:\n";
+LLVM_DUMP_METHOD void DbgValueHistoryMap::dump(StringRef FuncName) const {
+ dbgs() << "DbgValueHistoryMap('" << FuncName << "'):\n";
for (const auto &VarRangePair : *this) {
const InlinedEntity &Var = VarRangePair.first;
const Entries &Entries = VarRangePair.second;
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 858a3e75e515..eb2d992c7e75 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -223,6 +223,7 @@ bool DebugHandlerBase::isUnsignedDIType(const DIType *Ty) {
Encoding == dwarf::DW_ATE_signed_char ||
Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF ||
Encoding == dwarf::DW_ATE_boolean ||
+ Encoding == dwarf::DW_ATE_complex_float ||
(Ty->getTag() == dwarf::DW_TAG_unspecified_type &&
Ty->getName() == "decltype(nullptr)")) &&
"Unsupported encoding");
@@ -273,7 +274,7 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
InstOrdering.initialize(*MF);
if (TrimVarLocs)
DbgValues.trimLocationRanges(*MF, LScopes, InstOrdering);
- LLVM_DEBUG(DbgValues.dump());
+ LLVM_DEBUG(DbgValues.dump(MF->getName()));
// Request labels for the full history.
for (const auto &I : DbgValues) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 2008aa39ff87..726aba18bb80 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -126,7 +126,7 @@ public:
: Expression(Expr), ValueLocEntries(Locs.begin(), Locs.end()),
IsVariadic(IsVariadic) {
#ifndef NDEBUG
- assert(cast<DIExpression>(Expr)->isValid() ||
+ assert(Expr->isValid() ||
!any_of(Locs, [](auto LE) { return LE.isLocation(); }));
if (!IsVariadic) {
assert(ValueLocEntries.size() == 1);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 0515173b4a24..a96bdd034918 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -49,7 +49,7 @@ private:
SmallVector<Entry, 32> Entries;
SmallString<256> DWARFBytes;
std::vector<std::string> Comments;
- MCSymbol *Sym;
+ MCSymbol *Sym = nullptr;
/// Only verbose textual output needs comments. This will be set to
/// true for that case, and false otherwise.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index df4fe8d49806..10c844ddb14a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -94,7 +94,7 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
shouldEmitCFI =
MAI.usesCFIForEH() && (shouldEmitPersonality || shouldEmitMoves);
else
- shouldEmitCFI = Asm->needsCFIForDebug() && shouldEmitMoves;
+ shouldEmitCFI = Asm->usesCFIWithoutEH() && shouldEmitMoves;
}
void DwarfCFIException::beginBasicBlockSection(const MachineBasicBlock &MBB) {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 6dde50375a60..58ed21379d29 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
@@ -266,7 +267,7 @@ void DwarfCompileUnit::addLocationAttribute(
// 16-bit platforms like MSP430 and AVR take this path, so sink this
// assert to platforms that use it.
auto GetPointerSizedFormAndOp = [this]() {
- unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ unsigned PointerSize = Asm->MAI->getCodePointerSize();
assert((PointerSize == 4 || PointerSize == 8) &&
"Add support for other sizes if necessary");
struct FormAndOp {
@@ -278,7 +279,16 @@ void DwarfCompileUnit::addLocationAttribute(
: FormAndOp{dwarf::DW_FORM_data8, dwarf::DW_OP_const8u};
};
if (Global->isThreadLocal()) {
- if (Asm->TM.useEmulatedTLS()) {
+ if (Asm->TM.getTargetTriple().isWasm()) {
+ // FIXME This is not guaranteed, but in practice, in static linking,
+ // if present, __tls_base's index is 1. This doesn't hold for dynamic
+ // linking, so TLS variables used in dynamic linking won't have
+ // correct debug info for now. See
+ // https://github.com/llvm/llvm-project/blob/19afbfe33156d211fa959dadeea46cd17b9c723c/lld/wasm/Driver.cpp#L786-L823
+ addWasmRelocBaseGlobal(Loc, "__tls_base", 1);
+ addOpAddress(*Loc, Sym);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ } else if (Asm->TM.useEmulatedTLS()) {
// TODO: add debug info for emulated thread local mode.
} else {
// FIXME: Make this work with -gsplit-dwarf.
@@ -301,6 +311,14 @@ void DwarfCompileUnit::addLocationAttribute(
DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
: dwarf::DW_OP_form_tls_address);
}
+ } else if (Asm->TM.getTargetTriple().isWasm() &&
+ Asm->TM.getRelocationModel() == Reloc::PIC_) {
+ // FIXME This is not guaranteed, but in practice, if present,
+ // __memory_base's index is 1. See
+ // https://github.com/llvm/llvm-project/blob/19afbfe33156d211fa959dadeea46cd17b9c723c/lld/wasm/Driver.cpp#L786-L823
+ addWasmRelocBaseGlobal(Loc, "__memory_base", 1);
+ addOpAddress(*Loc, Sym);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
} else if ((Asm->TM.getRelocationModel() == Reloc::RWPI ||
Asm->TM.getRelocationModel() == Reloc::ROPI_RWPI) &&
!Asm->getObjFileLowering()
@@ -449,6 +467,39 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
return ContextCU->updateSubprogramScopeDIEImpl(SP, SPDie);
}
+// Add info for Wasm-global-based relocation.
+// 'GlobalIndex' is used for split dwarf, which currently relies on a few
+// assumptions that are not guaranteed in a formal way but work in practice.
+void DwarfCompileUnit::addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName,
+ uint64_t GlobalIndex) {
+ // FIXME: duplicated from Target/WebAssembly/WebAssembly.h
+ // don't want to depend on target specific headers in this code?
+ const unsigned TI_GLOBAL_RELOC = 3;
+ unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+ auto *Sym = cast<MCSymbolWasm>(Asm->GetExternalSymbolSymbol(GlobalName));
+ // FIXME: this repeats what WebAssemblyMCInstLower::
+ // GetExternalSymbolSymbol does, since if there's no code that
+ // refers to this symbol, we have to set it here.
+ Sym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+ Sym->setGlobalType(wasm::WasmGlobalType{
+ static_cast<uint8_t>(PointerSize == 4 ? wasm::WASM_TYPE_I32
+ : wasm::WASM_TYPE_I64),
+ true});
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
+ addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);
+ if (!isDwoUnit()) {
+ addLabel(*Loc, dwarf::DW_FORM_data4, Sym);
+ } else {
+ // FIXME: when writing dwo, we need to avoid relocations. Probably
+ // the "right" solution is to treat globals the way func and data
+ // symbols are (with entries in .debug_addr).
+ // For now we hardcode the indices in the callsites. Global indices are not
+ // fixed, but in practice a few are fixed; for example, __stack_pointer is
+ // always index 0.
+ addUInt(*Loc, dwarf::DW_FORM_data4, GlobalIndex);
+ }
+}
+
DIE &DwarfCompileUnit::updateSubprogramScopeDIEImpl(const DISubprogram *SP,
DIE *SPDie) {
SmallVector<RangeSpan, 2> BB_List;
@@ -480,40 +531,24 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIEImpl(const DISubprogram *SP,
case TargetFrameLowering::DwarfFrameBase::CFA: {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_call_frame_cfa);
+ if (FrameBase.Location.Offset != 0) {
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_consts);
+ addSInt(*Loc, dwarf::DW_FORM_sdata, FrameBase.Location.Offset);
+ addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ }
addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
break;
}
case TargetFrameLowering::DwarfFrameBase::WasmFrameBase: {
// FIXME: duplicated from Target/WebAssembly/WebAssembly.h
- // don't want to depend on target specific headers in this code?
const unsigned TI_GLOBAL_RELOC = 3;
if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC) {
// These need to be relocatable.
- assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far.
- auto SPSym = cast<MCSymbolWasm>(
- Asm->GetExternalSymbolSymbol("__stack_pointer"));
- // FIXME: this repeats what WebAssemblyMCInstLower::
- // GetExternalSymbolSymbol does, since if there's no code that
- // refers to this symbol, we have to set it here.
- SPSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
- SPSym->setGlobalType(wasm::WasmGlobalType{
- uint8_t(Asm->getSubtargetInfo().getTargetTriple().getArch() ==
- Triple::wasm64
- ? wasm::WASM_TYPE_I64
- : wasm::WASM_TYPE_I32),
- true});
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
- addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
- addSInt(*Loc, dwarf::DW_FORM_sdata, TI_GLOBAL_RELOC);
- if (!isDwoUnit()) {
- addLabel(*Loc, dwarf::DW_FORM_data4, SPSym);
- } else {
- // FIXME: when writing dwo, we need to avoid relocations. Probably
- // the "right" solution is to treat globals the way func and data
- // symbols are (with entries in .debug_addr).
- // For now, since we only ever use index 0, this should work as-is.
- addUInt(*Loc, dwarf::DW_FORM_data4, FrameBase.Location.WasmLoc.Index);
- }
+ assert(FrameBase.Location.WasmLoc.Index == 0); // Only SP so far.
+ // For now, since we only ever use index 0, this should work as-is.
+ addWasmRelocBaseGlobal(Loc, "__stack_pointer",
+ FrameBase.Location.WasmLoc.Index);
addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
} else {
@@ -608,7 +643,7 @@ void DwarfCompileUnit::attachRangesOrLowHighPC(
assert(!Ranges.empty());
if (!DD->useRangesSection() ||
(Ranges.size() == 1 &&
- (!DD->alwaysUseRanges() ||
+ (!DD->alwaysUseRanges(*this) ||
DD->getSectionLabel(&Ranges.front().Begin->getSection()) ==
Ranges.front().Begin))) {
const RangeSpan &Front = Ranges.front();
@@ -659,7 +694,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope,
auto *InlinedSP = getDISubprogram(DS);
// Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
// was inlined from another compile unit.
- DIE *OriginDIE = getAbstractSPDies()[InlinedSP];
+ DIE *OriginDIE = getAbstractScopeDIEs()[InlinedSP];
assert(OriginDIE && "Unable to find original DIE for an inlined subprogram.");
auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine);
@@ -691,10 +726,20 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope,
DIE *DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) {
if (DD->isLexicalScopeDIENull(Scope))
return nullptr;
+ const auto *DS = Scope->getScopeNode();
auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_lexical_block);
- if (Scope->isAbstractScope())
+ if (Scope->isAbstractScope()) {
+ assert(!getAbstractScopeDIEs().count(DS) &&
+ "Abstract DIE for this scope exists!");
+ getAbstractScopeDIEs()[DS] = ScopeDIE;
return ScopeDIE;
+ }
+ if (!Scope->getInlinedAt()) {
+ assert(!LexicalBlockDIEs.count(DS) &&
+ "Concrete out-of-line DIE for this scope exists!");
+ LexicalBlockDIEs[DS] = ScopeDIE;
+ }
attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges());
@@ -929,29 +974,29 @@ static SmallVector<const DIVariable *, 2> dependencies(DbgVariable *Var) {
for (auto *El : Array->getElements()) {
if (auto *Subrange = dyn_cast<DISubrange>(El)) {
if (auto Count = Subrange->getCount())
- if (auto *Dependency = Count.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(Count))
Result.push_back(Dependency);
if (auto LB = Subrange->getLowerBound())
- if (auto *Dependency = LB.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(LB))
Result.push_back(Dependency);
if (auto UB = Subrange->getUpperBound())
- if (auto *Dependency = UB.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(UB))
Result.push_back(Dependency);
if (auto ST = Subrange->getStride())
- if (auto *Dependency = ST.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(ST))
Result.push_back(Dependency);
} else if (auto *GenericSubrange = dyn_cast<DIGenericSubrange>(El)) {
if (auto Count = GenericSubrange->getCount())
- if (auto *Dependency = Count.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(Count))
Result.push_back(Dependency);
if (auto LB = GenericSubrange->getLowerBound())
- if (auto *Dependency = LB.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(LB))
Result.push_back(Dependency);
if (auto UB = GenericSubrange->getUpperBound())
- if (auto *Dependency = UB.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(UB))
Result.push_back(Dependency);
if (auto ST = GenericSubrange->getStride())
- if (auto *Dependency = ST.dyn_cast<DIVariable *>())
+ if (auto *Dependency = dyn_cast_if_present<DIVariable *>(ST))
Result.push_back(Dependency);
}
}
@@ -1062,35 +1107,35 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
for (DbgVariable *DV : Locals)
ScopeDIE.addChild(constructVariableDIE(*DV, *Scope, ObjectPointer));
- // Emit imported entities (skipped in gmlt-like data).
- if (!includeMinimalInlineScopes()) {
- for (const auto *IE : ImportedEntities[Scope->getScopeNode()])
- ScopeDIE.addChild(constructImportedEntityDIE(cast<DIImportedEntity>(IE)));
- }
-
// Emit labels.
for (DbgLabel *DL : DU->getScopeLabels().lookup(Scope))
ScopeDIE.addChild(constructLabelDIE(*DL, *Scope));
+ // Track other local entities (skipped in gmlt-like data).
+ // This creates mapping between CU and a set of local declarations that
+ // should be emitted for subprograms in this CU.
+ if (!includeMinimalInlineScopes() && !Scope->getInlinedAt()) {
+ auto &LocalDecls = DD->getLocalDeclsForScope(Scope->getScopeNode());
+ DeferredLocalDecls.insert(LocalDecls.begin(), LocalDecls.end());
+ }
+
// Emit inner lexical scopes.
- auto needToEmitLexicalScope = [this](LexicalScope *LS) {
- if (isa<DISubprogram>(LS->getScopeNode()))
- return true;
- auto Vars = DU->getScopeVariables().lookup(LS);
+ auto skipLexicalScope = [this](LexicalScope *S) -> bool {
+ if (isa<DISubprogram>(S->getScopeNode()))
+ return false;
+ auto Vars = DU->getScopeVariables().lookup(S);
if (!Vars.Args.empty() || !Vars.Locals.empty())
- return true;
- if (!includeMinimalInlineScopes() &&
- !ImportedEntities[LS->getScopeNode()].empty())
- return true;
- return false;
+ return false;
+ return includeMinimalInlineScopes() ||
+ DD->getLocalDeclsForScope(S->getScopeNode()).empty();
};
for (LexicalScope *LS : Scope->getChildren()) {
// If the lexical block doesn't have non-scope children, skip
// its emission and put its children directly to the parent scope.
- if (needToEmitLexicalScope(LS))
- constructScopeDIE(LS, ScopeDIE);
- else
+ if (skipLexicalScope(LS))
createAndAddScopeChildren(LS, ScopeDIE);
+ else
+ constructScopeDIE(LS, ScopeDIE);
}
return ObjectPointer;
@@ -1098,11 +1143,9 @@ DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope,
void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
LexicalScope *Scope) {
- DIE *&AbsDef = getAbstractSPDies()[Scope->getScopeNode()];
- if (AbsDef)
- return;
-
auto *SP = cast<DISubprogram>(Scope->getScopeNode());
+ if (getAbstractScopeDIEs().count(SP))
+ return;
DIE *ContextDIE;
DwarfCompileUnit *ContextCU = this;
@@ -1126,14 +1169,19 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
// Passing null as the associated node because the abstract definition
// shouldn't be found by lookup.
- AbsDef = &ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr);
- ContextCU->applySubprogramAttributesToDefinition(SP, *AbsDef);
- ContextCU->addSInt(*AbsDef, dwarf::DW_AT_inline,
+ DIE &AbsDef = ContextCU->createAndAddDIE(dwarf::DW_TAG_subprogram,
+ *ContextDIE, nullptr);
+
+ // Store the DIE before creating children.
+ ContextCU->getAbstractScopeDIEs()[SP] = &AbsDef;
+
+ ContextCU->applySubprogramAttributesToDefinition(SP, AbsDef);
+ ContextCU->addSInt(AbsDef, dwarf::DW_AT_inline,
DD->getDwarfVersion() <= 4 ? std::optional<dwarf::Form>()
: dwarf::DW_FORM_implicit_const,
dwarf::DW_INL_inlined);
- if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, *AbsDef))
- ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
+ if (DIE *ObjectPointer = ContextCU->createAndAddScopeChildren(Scope, AbsDef))
+ ContextCU->addDIEEntry(AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
}
bool DwarfCompileUnit::useGNUAnalogForDwarf5Feature() const {
@@ -1277,21 +1325,37 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
EntityDie = getOrCreateNameSpace(NS);
else if (auto *M = dyn_cast<DIModule>(Entity))
EntityDie = getOrCreateModule(M);
- else if (auto *SP = dyn_cast<DISubprogram>(Entity))
- EntityDie = getOrCreateSubprogramDIE(SP);
- else if (auto *T = dyn_cast<DIType>(Entity))
+ else if (auto *SP = dyn_cast<DISubprogram>(Entity)) {
+ // If there is an abstract subprogram, refer to it. Note that this assumes
+ // that all the abstract subprograms have been already created (which is
+ // correct until imported entities get emitted in DwarfDebug::endModule()).
+ if (auto *AbsSPDie = getAbstractScopeDIEs().lookup(SP))
+ EntityDie = AbsSPDie;
+ else
+ EntityDie = getOrCreateSubprogramDIE(SP);
+ } else if (auto *T = dyn_cast<DIType>(Entity))
EntityDie = getOrCreateTypeDIE(T);
else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity))
EntityDie = getOrCreateGlobalVariableDIE(GV, {});
+ else if (auto *IE = dyn_cast<DIImportedEntity>(Entity))
+ EntityDie = getOrCreateImportedEntityDIE(IE);
else
EntityDie = getDIE(Entity);
assert(EntityDie);
addSourceLine(*IMDie, Module->getLine(), Module->getFile());
addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie);
StringRef Name = Module->getName();
- if (!Name.empty())
+ if (!Name.empty()) {
addString(*IMDie, dwarf::DW_AT_name, Name);
+ // FIXME: if consumers ever start caring about handling
+ // unnamed import declarations such as `using ::nullptr_t`
+ // or `using namespace std::ranges`, we could add the
+ // import declaration into the accelerator table with the
+ // name being the one of the entity being imported.
+ DD->addAccelNamespace(*CUNode, Name, *IMDie);
+ }
+
// This is for imported module with renamed entities (such as variables and
// subprograms).
DINodeArray Elements = Module->getElements();
@@ -1305,9 +1369,24 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
return IMDie;
}
+DIE *DwarfCompileUnit::getOrCreateImportedEntityDIE(
+ const DIImportedEntity *IE) {
+
+ // Check for pre-existence.
+ if (DIE *Die = getDIE(IE))
+ return Die;
+
+ DIE *ContextDIE = getOrCreateContextDIE(IE->getScope());
+ assert(ContextDIE && "Empty scope for the imported entity!");
+
+ DIE *IMDie = constructImportedEntityDIE(IE);
+ ContextDIE->addChild(IMDie);
+ return IMDie;
+}
+
void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
DIE *D = getDIE(SP);
- if (DIE *AbsSPDIE = getAbstractSPDies().lookup(SP)) {
+ if (DIE *AbsSPDIE = getAbstractScopeDIEs().lookup(SP)) {
if (D)
// If this subprogram has an abstract definition, reference that
addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
@@ -1356,8 +1435,8 @@ void DwarfCompileUnit::createAbstractEntity(const DINode *Node,
assert(Scope && Scope->isAbstractScope());
auto &Entity = getAbstractEntities()[Node];
if (isa<const DILocalVariable>(Node)) {
- Entity = std::make_unique<DbgVariable>(
- cast<const DILocalVariable>(Node), nullptr /* IA */);;
+ Entity = std::make_unique<DbgVariable>(cast<const DILocalVariable>(Node),
+ nullptr /* IA */);
DU->addScopeVariable(Scope, cast<DbgVariable>(Entity.get()));
} else if (isa<const DILabel>(Node)) {
Entity = std::make_unique<DbgLabel>(
@@ -1389,6 +1468,8 @@ bool DwarfCompileUnit::hasDwarfPubSections() const {
// generated for things like Gold's gdb_index generation.
case DICompileUnit::DebugNameTableKind::GNU:
return true;
+ case DICompileUnit::DebugNameTableKind::Apple:
+ return false;
case DICompileUnit::DebugNameTableKind::Default:
return DD->tuneForGDB() && !includeMinimalInlineScopes() &&
!CUNode->isDebugDirectivesOnly() &&
@@ -1599,3 +1680,29 @@ void DwarfCompileUnit::createBaseTypeDIEs() {
Btr.Die = &Die;
}
}
+
+DIE *DwarfCompileUnit::getLexicalBlockDIE(const DILexicalBlock *LB) {
+ // Assume if there is an abstract tree all the DIEs are already emitted.
+ bool isAbstract = getAbstractScopeDIEs().count(LB->getSubprogram());
+ if (isAbstract && getAbstractScopeDIEs().count(LB))
+ return getAbstractScopeDIEs()[LB];
+ assert(!isAbstract && "Missed lexical block DIE in abstract tree!");
+
+ // Return a concrete DIE if it exists or nullptr otherwise.
+ return LexicalBlockDIEs.lookup(LB);
+}
+
+DIE *DwarfCompileUnit::getOrCreateContextDIE(const DIScope *Context) {
+ if (isa_and_nonnull<DILocalScope>(Context)) {
+ if (auto *LFScope = dyn_cast<DILexicalBlockFile>(Context))
+ Context = LFScope->getNonLexicalBlockFileScope();
+ if (auto *LScope = dyn_cast<DILexicalBlock>(Context))
+ return getLexicalBlockDIE(LScope);
+
+ // Otherwise the context must be a DISubprogram.
+ auto *SPScope = cast<DISubprogram>(Context);
+ if (getAbstractScopeDIEs().count(SPScope))
+ return getAbstractScopeDIEs()[SPScope];
+ }
+ return DwarfUnit::getOrCreateContextDIE(Context);
+}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 7d87f35021bb..6ef73ebd4f7f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -61,11 +61,6 @@ class DwarfCompileUnit final : public DwarfUnit {
/// The start of the unit macro info within macro section.
MCSymbol *MacroLabelBegin;
- using ImportedEntityList = SmallVector<const MDNode *, 8>;
- using ImportedEntityMap = DenseMap<const MDNode *, ImportedEntityList>;
-
- ImportedEntityMap ImportedEntities;
-
/// GlobalNames - A map of globally visible named entities for this unit.
StringMap<const DIE *> GlobalNames;
@@ -79,7 +74,20 @@ class DwarfCompileUnit final : public DwarfUnit {
// ranges/locs.
const MCSymbol *BaseAddress = nullptr;
- DenseMap<const MDNode *, DIE *> AbstractSPDies;
+ using MDNodeSetVector =
+ SetVector<const MDNode *, SmallVector<const MDNode *, 4>,
+ SmallPtrSet<const MDNode *, 4>>;
+
+ // List of entities (either static locals, types or imports) that
+ // belong to subprograms within this CU.
+ MDNodeSetVector DeferredLocalDecls;
+
+ // List of concrete lexical block scopes belong to subprograms within this CU.
+ DenseMap<const DILocalScope *, DIE *> LexicalBlockDIEs;
+
+ // List of abstract local scopes (either DISubprogram or DILexicalBlock).
+ DenseMap<const DILocalScope *, DIE *> AbstractLocalScopeDIEs;
+
DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
/// DWO ID for correlating skeleton and split units.
@@ -94,10 +102,10 @@ class DwarfCompileUnit final : public DwarfUnit {
bool isDwoUnit() const override;
- DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
+ DenseMap<const DILocalScope *, DIE *> &getAbstractScopeDIEs() {
if (isDwoUnit() && !DD->shareAcrossDWOCUs())
- return AbstractSPDies;
- return DU->getAbstractSPDies();
+ return AbstractLocalScopeDIEs;
+ return DU->getAbstractScopeDIEs();
}
DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
@@ -108,6 +116,10 @@ class DwarfCompileUnit final : public DwarfUnit {
void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override;
+ /// Add info for Wasm-global-based relocation.
+ void addWasmRelocBaseGlobal(DIELoc *Loc, StringRef GlobalName,
+ uint64_t GlobalIndex);
+
public:
DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU,
@@ -171,17 +183,6 @@ public:
unsigned getOrCreateSourceID(const DIFile *File) override;
- void addImportedEntity(const DIImportedEntity* IE) {
- DIScope *Scope = IE->getScope();
- assert(Scope && "Invalid Scope encoding!");
- if (!isa<DILocalScope>(Scope))
- // No need to add imported enities that are not local declaration.
- return;
-
- auto *LocalScope = cast<DILocalScope>(Scope)->getNonLexicalBlockFileScope();
- ImportedEntities[LocalScope].push_back(IE);
- }
-
/// addRange - Add an address range to the list of ranges for this unit.
void addRange(RangeSpan Range);
@@ -213,6 +214,11 @@ public:
/// attach DW_AT_low_pc/DW_AT_high_pc labels.
DIE *constructLexicalScopeDIE(LexicalScope *Scope);
+ /// Get a DIE for the given DILexicalBlock.
+ /// Note that this function assumes that the DIE has been already created
+ /// and it's an error, if it hasn't.
+ DIE *getLexicalBlockDIE(const DILexicalBlock *LB);
+
/// constructVariableDIE - Construct a DIE for the given DbgVariable.
DIE *constructVariableDIE(DbgVariable &DV, bool Abstract = false);
@@ -224,6 +230,10 @@ public:
void createBaseTypeDIEs();
+ /// Construct a DIE for a given scope.
+ /// This instance of 'getOrCreateContextDIE()' can handle DILocalScope.
+ DIE *getOrCreateContextDIE(const DIScope *Ty) override;
+
/// Construct a DIE for this subprogram scope.
DIE &constructSubprogramScopeDIE(const DISubprogram *Sub,
LexicalScope *Scope);
@@ -262,8 +272,9 @@ public:
void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE,
SmallVector<DbgCallSiteParam, 4> &Params);
- /// Construct import_module DIE.
- DIE *constructImportedEntityDIE(const DIImportedEntity *Module);
+ /// Get or create a DIE for an imported entity.
+ DIE *getOrCreateImportedEntityDIE(const DIImportedEntity *IE);
+ DIE *constructImportedEntityDIE(const DIImportedEntity *IE);
void finishSubprogramDefinition(const DISubprogram *SP);
void finishEntityDefinition(const DbgEntity *Entity);
@@ -360,6 +371,8 @@ public:
bool hasDwarfPubSections() const;
void addBaseTypeRef(DIEValueList &Die, int64_t Idx);
+
+ MDNodeSetVector &getDeferredLocalDecls() { return DeferredLocalDecls; }
};
} // end namespace llvm
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index cde790cc77fb..1ae17ec9b874 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -18,7 +18,7 @@
#include "DwarfUnit.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DIE.h"
@@ -53,6 +53,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cstddef>
#include <iterator>
@@ -452,14 +453,8 @@ DwarfDebug::DwarfDebug(AsmPrinter *A)
// Split DWARF would benefit object size significantly by trading reductions
// in address pool usage for slightly increased range list encodings.
- if (DwarfVersion >= 5) {
+ if (DwarfVersion >= 5)
MinimizeAddr = MinimizeAddrInV5Option;
- // FIXME: In the future, enable this by default for Split DWARF where the
- // tradeoff is more pronounced due to being able to offload the range
- // lists to the dwo file and shrink object files/reduce relocations there.
- if (MinimizeAddr == MinimizeAddrInV5::Default)
- MinimizeAddr = MinimizeAddrInV5::Disabled;
- }
Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion);
Asm->OutStreamer->getContext().setDwarfFormat(Dwarf64 ? dwarf::DWARF64
@@ -500,6 +495,7 @@ static StringRef getObjCMethodName(StringRef In) {
void DwarfDebug::addSubprogramNames(const DICompileUnit &CU,
const DISubprogram *SP, DIE &Die) {
if (getAccelTableKind() != AccelTableKind::Apple &&
+ CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Apple &&
CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
return;
@@ -513,7 +509,7 @@ void DwarfDebug::addSubprogramNames(const DICompileUnit &CU,
// well into the name table. Only do that if we are going to actually emit
// that name.
if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName() &&
- (useAllLinkageNames() || InfoHolder.getAbstractSPDies().lookup(SP)))
+ (useAllLinkageNames() || InfoHolder.getAbstractScopeDIEs().lookup(SP)))
addAccelName(CU, SP->getLinkageName(), Die);
// If this is an Objective-C selector name add it to the ObjC accelerator
@@ -710,13 +706,13 @@ static void interpretValues(const MachineInstr *CurMI,
if (MI.isDebugInstr())
return;
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isDef() && MO.getReg().isPhysical()) {
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (MO.getReg().isPhysical()) {
for (auto &FwdReg : ForwardedRegWorklist)
if (TRI.regsOverlap(FwdReg.first, MO.getReg()))
Defs.insert(FwdReg.first);
- for (MCRegUnitIterator Units(MO.getReg(), &TRI); Units.isValid(); ++Units)
- NewClobberedRegUnits.insert(*Units);
+ for (MCRegUnit Unit : TRI.regunits(MO.getReg()))
+ NewClobberedRegUnits.insert(Unit);
}
}
};
@@ -1050,11 +1046,11 @@ void DwarfDebug::finishUnitAttributes(const DICompileUnit *DIUnit,
if (!SDK.empty())
NewCU.addString(Die, dwarf::DW_AT_APPLE_sdk, SDK);
- // Add DW_str_offsets_base to the unit DIE, except for split units.
- if (useSegmentedStringOffsetsTable() && !useSplitDwarf())
- NewCU.addStringOffsetsStart();
-
if (!useSplitDwarf()) {
+ // Add DW_str_offsets_base to the unit DIE, except for split units.
+ if (useSegmentedStringOffsetsTable())
+ NewCU.addStringOffsetsStart();
+
NewCU.initStmtList();
// If we're using split dwarf the compilation dir is going to be in the
@@ -1097,6 +1093,13 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
if (auto *CU = CUMap.lookup(DIUnit))
return *CU;
+ if (useSplitDwarf() &&
+ !shareAcrossDWOCUs() &&
+ (!DIUnit->getSplitDebugInlining() ||
+ DIUnit->getEmissionKind() == DICompileUnit::FullDebug) &&
+ !CUMap.empty()) {
+ return *CUMap.begin()->second;
+ }
CompilationDir = DIUnit->getDirectory();
auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
@@ -1104,9 +1107,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
DwarfCompileUnit &NewCU = *OwnedUnit;
InfoHolder.addUnit(std::move(OwnedUnit));
- for (auto *IE : DIUnit->getImportedEntities())
- NewCU.addImportedEntity(IE);
-
// LTO with assembly output shares a single line table amongst multiple CUs.
// To avoid the compilation directory being ambiguous, let the line table
// explicitly describe the directory of all files, never relying on the
@@ -1129,14 +1129,6 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
return NewCU;
}
-void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
- const DIImportedEntity *N) {
- if (isa<DILocalScope>(N->getScope()))
- return;
- if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope()))
- D->addChild(TheCU.constructImportedEntityDIE(N));
-}
-
/// Sort and unique GVEs by comparing their fragment offset.
static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
@@ -1214,16 +1206,8 @@ void DwarfDebug::beginModule(Module *M) {
DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
for (DICompileUnit *CUNode : M->debug_compile_units()) {
- // FIXME: Move local imported entities into a list attached to the
- // subprogram, then this search won't be needed and a
- // getImportedEntities().empty() test should go below with the rest.
- bool HasNonLocalImportedEntities = llvm::any_of(
- CUNode->getImportedEntities(), [](const DIImportedEntity *IE) {
- return !isa<DILocalScope>(IE->getScope());
- });
-
- if (!HasNonLocalImportedEntities && CUNode->getEnumTypes().empty() &&
- CUNode->getRetainedTypes().empty() &&
+ if (CUNode->getImportedEntities().empty() &&
+ CUNode->getEnumTypes().empty() && CUNode->getRetainedTypes().empty() &&
CUNode->getGlobalVariables().empty() && CUNode->getMacros().empty())
continue;
@@ -1257,10 +1241,6 @@ void DwarfDebug::beginModule(Module *M) {
// There is no point in force-emitting a forward declaration.
CU.getOrCreateTypeDIE(RT);
}
- // Emit imported_modules last so that the relevant context is already
- // available.
- for (auto *IE : CUNode->getImportedEntities())
- constructAndAddImportedEntityDIE(CU, IE);
}
}
@@ -1300,6 +1280,8 @@ void DwarfDebug::finalizeModuleInfo() {
if (CUMap.size() > 1)
DWOName = Asm->TM.Options.MCOptions.SplitDwarfFile;
+ bool HasEmittedSplitCU = false;
+
// Handle anything that needs to be done on a per-unit basis after
// all other generation.
for (const auto &P : CUMap) {
@@ -1318,6 +1300,10 @@ void DwarfDebug::finalizeModuleInfo() {
bool HasSplitUnit = SkCU && !TheCU.getUnitDie().children().empty();
if (HasSplitUnit) {
+ (void)HasEmittedSplitCU;
+ assert((shareAcrossDWOCUs() || !HasEmittedSplitCU) &&
+ "Multiple CUs emitted into a single dwo file");
+ HasEmittedSplitCU = true;
dwarf::Attribute attrDWOName = getDwarfVersion() >= 5
? dwarf::DW_AT_dwo_name
: dwarf::DW_AT_GNU_dwo_name;
@@ -1377,11 +1363,10 @@ void DwarfDebug::finalizeModuleInfo() {
if (U.hasRangeLists())
U.addRnglistsBase();
- if (!DebugLocs.getLists().empty()) {
- if (!useSplitDwarf())
- U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
- DebugLocs.getSym(),
- TLOF.getDwarfLoclistsSection()->getBeginSymbol());
+ if (!DebugLocs.getLists().empty() && !useSplitDwarf()) {
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
+ DebugLocs.getSym(),
+ TLOF.getDwarfLoclistsSection()->getBeginSymbol());
}
}
@@ -1436,8 +1421,24 @@ void DwarfDebug::endModule() {
assert(CurMI == nullptr);
for (const auto &P : CUMap) {
- auto &CU = *P.second;
- CU.createBaseTypeDIEs();
+ const auto *CUNode = cast<DICompileUnit>(P.first);
+ DwarfCompileUnit *CU = &*P.second;
+
+ // Emit imported entities.
+ for (auto *IE : CUNode->getImportedEntities()) {
+ assert(!isa_and_nonnull<DILocalScope>(IE->getScope()) &&
+ "Unexpected function-local entity in 'imports' CU field.");
+ CU->getOrCreateImportedEntityDIE(IE);
+ }
+ for (const auto *D : CU->getDeferredLocalDecls()) {
+ if (auto *IE = dyn_cast<DIImportedEntity>(D))
+ CU->getOrCreateImportedEntityDIE(IE);
+ else
+ llvm_unreachable("Unexpected local retained node!");
+ }
+
+ // Emit base types.
+ CU->createBaseTypeDIEs();
}
// If we aren't actually generating debug info (check beginModule -
@@ -1511,16 +1512,6 @@ void DwarfDebug::endModule() {
// FIXME: AbstractVariables.clear();
}
-void DwarfDebug::ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
- const DINode *Node,
- const MDNode *ScopeNode) {
- if (CU.getExistingAbstractEntity(Node))
- return;
-
- CU.createAbstractEntity(Node, LScopes.getOrCreateAbstractScope(
- cast<DILocalScope>(ScopeNode)));
-}
-
void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
const DINode *Node, const MDNode *ScopeNode) {
if (CU.getExistingAbstractEntity(Node))
@@ -1531,6 +1522,21 @@ void DwarfDebug::ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
CU.createAbstractEntity(Node, Scope);
}
+static const DILocalScope *getRetainedNodeScope(const MDNode *N) {
+ const DIScope *S;
+ if (const auto *LV = dyn_cast<DILocalVariable>(N))
+ S = LV->getScope();
+ else if (const auto *L = dyn_cast<DILabel>(N))
+ S = L->getScope();
+ else if (const auto *IE = dyn_cast<DIImportedEntity>(N))
+ S = IE->getScope();
+ else
+ llvm_unreachable("Unexpected retained node!");
+
+ // Ensure the scope is not a DILexicalBlockFile.
+ return cast<DILocalScope>(S)->getNonLexicalBlockFileScope();
+}
+
// Collect variable information from side table maintained by MF.
void DwarfDebug::collectVariableInfoFromMFTable(
DwarfCompileUnit &TheCU, DenseSet<InlinedEntity> &Processed) {
@@ -1556,13 +1562,24 @@ void DwarfDebug::collectVariableInfoFromMFTable(
ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode());
auto RegVar = std::make_unique<DbgVariable>(
cast<DILocalVariable>(Var.first), Var.second);
- RegVar->initializeMMI(VI.Expr, VI.Slot);
+ if (VI.inStackSlot())
+ RegVar->initializeMMI(VI.Expr, VI.getStackSlot());
+ else {
+ MachineLocation MLoc(VI.getEntryValueRegister(), /*IsIndirect*/ true);
+ auto LocEntry = DbgValueLocEntry(MLoc);
+ RegVar->initializeDbgValue(DbgValueLoc(VI.Expr, LocEntry));
+ }
LLVM_DEBUG(dbgs() << "Created DbgVariable for " << VI.Var->getName()
<< "\n");
- if (DbgVariable *DbgVar = MFVars.lookup(Var))
- DbgVar->addMMIEntry(*RegVar);
- else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
+ if (DbgVariable *DbgVar = MFVars.lookup(Var)) {
+ if (DbgVar->getValueLoc())
+ LLVM_DEBUG(dbgs() << "Dropping repeated entry value debug info for "
+ "variable "
+ << VI.Var->getName() << "\n");
+ else
+ DbgVar->addMMIEntry(*RegVar);
+ } else if (InfoHolder.addScopeVariable(Scope, RegVar.get())) {
MFVars.insert({Var, RegVar.get()});
ConcreteEntities.push_back(std::move(RegVar));
}
@@ -1964,19 +1981,18 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
createConcreteEntity(TheCU, *Scope, Label, IL.second, Sym);
}
- // Collect info for variables/labels that were optimized out.
+ // Collect info for retained nodes.
for (const DINode *DN : SP->getRetainedNodes()) {
- if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
- continue;
- LexicalScope *Scope = nullptr;
- if (auto *DV = dyn_cast<DILocalVariable>(DN)) {
- Scope = LScopes.findLexicalScope(DV->getScope());
- } else if (auto *DL = dyn_cast<DILabel>(DN)) {
- Scope = LScopes.findLexicalScope(DL->getScope());
+ const auto *LS = getRetainedNodeScope(DN);
+ if (isa<DILocalVariable>(DN) || isa<DILabel>(DN)) {
+ if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
+ continue;
+ LexicalScope *LexS = LScopes.findLexicalScope(LS);
+ if (LexS)
+ createConcreteEntity(TheCU, *LexS, DN, nullptr);
+ } else {
+ LocalDeclsPerLS[LS].insert(DN);
}
-
- if (Scope)
- createConcreteEntity(TheCU, *Scope, DN, nullptr);
}
}
@@ -2046,7 +2062,10 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
unsigned LastAsmLine =
Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
- if (DL == PrevInstLoc) {
+ bool PrevInstInSameSection =
+ (!PrevInstBB ||
+ PrevInstBB->getSectionIDNum() == MI->getParent()->getSectionIDNum());
+ if (DL == PrevInstLoc && PrevInstInSameSection) {
// If we have an ongoing unspecified location, nothing to do here.
if (!DL)
return;
@@ -2114,25 +2133,35 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
PrevInstLoc = DL;
}
-static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
+static std::pair<DebugLoc, bool> findPrologueEndLoc(const MachineFunction *MF) {
// First known non-DBG_VALUE and non-frame setup location marks
// the beginning of the function body.
DebugLoc LineZeroLoc;
+ const Function &F = MF->getFunction();
+
+ // Some instructions may be inserted into prologue after this function. Must
+ // keep prologue for these cases.
+ bool IsEmptyPrologue =
+ !(F.hasPrologueData() || F.getMetadata(LLVMContext::MD_func_sanitize));
for (const auto &MBB : *MF) {
for (const auto &MI : MBB) {
- if (!MI.isMetaInstruction() && !MI.getFlag(MachineInstr::FrameSetup) &&
- MI.getDebugLoc()) {
- // Scan forward to try to find a non-zero line number. The prologue_end
- // marks the first breakpoint in the function after the frame setup, and
- // a compiler-generated line 0 location is not a meaningful breakpoint.
- // If none is found, return the first location after the frame setup.
- if (MI.getDebugLoc().getLine())
- return MI.getDebugLoc();
- LineZeroLoc = MI.getDebugLoc();
+ if (!MI.isMetaInstruction()) {
+ if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
+ // Scan forward to try to find a non-zero line number. The
+ // prologue_end marks the first breakpoint in the function after the
+ // frame setup, and a compiler-generated line 0 location is not a
+ // meaningful breakpoint. If none is found, return the first
+ // location after the frame setup.
+ if (MI.getDebugLoc().getLine())
+ return std::make_pair(MI.getDebugLoc(), IsEmptyPrologue);
+
+ LineZeroLoc = MI.getDebugLoc();
+ }
+ IsEmptyPrologue = false;
}
}
}
- return LineZeroLoc;
+ return std::make_pair(LineZeroLoc, IsEmptyPrologue);
}
/// Register a source line with debug info. Returns the unique label that was
@@ -2159,8 +2188,16 @@ static void recordSourceLine(AsmPrinter &Asm, unsigned Line, unsigned Col,
DebugLoc DwarfDebug::emitInitialLocDirective(const MachineFunction &MF,
unsigned CUID) {
+ std::pair<DebugLoc, bool> PrologEnd = findPrologueEndLoc(&MF);
+ DebugLoc PrologEndLoc = PrologEnd.first;
+ bool IsEmptyPrologue = PrologEnd.second;
+
// Get beginning of function.
- if (DebugLoc PrologEndLoc = findPrologueEndLoc(&MF)) {
+ if (PrologEndLoc) {
+ // If the prolog is empty, no need to generate scope line for the proc.
+ if (IsEmptyPrologue)
+ return PrologEndLoc;
+
// Ensure the compile unit is created if the function is called before
// beginFunction().
(void)getOrCreateDwarfCompileUnit(
@@ -2239,7 +2276,7 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
assert(!FnScope || SP == FnScope->getScopeNode());
- DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit());
+ DwarfCompileUnit &TheCU = getOrCreateDwarfCompileUnit(SP->getUnit());
if (TheCU.getCUNode()->isDebugDirectivesOnly()) {
PrevLabel = nullptr;
CurFn = nullptr;
@@ -2260,6 +2297,9 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
if (!TheCU.getCUNode()->getDebugInfoForProfiling() &&
TheCU.getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly &&
LScopes.getAbstractScopesList().empty() && !IsDarwin) {
+ for (const auto &R : Asm->MBBSectionRanges)
+ addArangeLabel(SymbolCU(&TheCU, R.second.BeginLabel));
+
assert(InfoHolder.getScopeVariables().empty());
PrevLabel = nullptr;
CurFn = nullptr;
@@ -2267,27 +2307,28 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
}
#ifndef NDEBUG
- size_t NumAbstractScopes = LScopes.getAbstractScopesList().size();
+ size_t NumAbstractSubprograms = LScopes.getAbstractScopesList().size();
#endif
- // Construct abstract scopes.
for (LexicalScope *AScope : LScopes.getAbstractScopesList()) {
const auto *SP = cast<DISubprogram>(AScope->getScopeNode());
for (const DINode *DN : SP->getRetainedNodes()) {
- if (!Processed.insert(InlinedEntity(DN, nullptr)).second)
- continue;
-
- const MDNode *Scope = nullptr;
- if (auto *DV = dyn_cast<DILocalVariable>(DN))
- Scope = DV->getScope();
- else if (auto *DL = dyn_cast<DILabel>(DN))
- Scope = DL->getScope();
- else
- llvm_unreachable("Unexpected DI type!");
-
- // Collect info for variables/labels that were optimized out.
- ensureAbstractEntityIsCreated(TheCU, DN, Scope);
- assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes
- && "ensureAbstractEntityIsCreated inserted abstract scopes");
+ const auto *LS = getRetainedNodeScope(DN);
+ // Ensure LexicalScope is created for the scope of this node.
+ auto *LexS = LScopes.getOrCreateAbstractScope(LS);
+ assert(LexS && "Expected the LexicalScope to be created.");
+ if (isa<DILocalVariable>(DN) || isa<DILabel>(DN)) {
+ // Collect info for variables/labels that were optimized out.
+ if (!Processed.insert(InlinedEntity(DN, nullptr)).second ||
+ TheCU.getExistingAbstractEntity(DN))
+ continue;
+ TheCU.createAbstractEntity(DN, LexS);
+ } else {
+ // Remember the node if this is a local declarations.
+ LocalDeclsPerLS[LS].insert(DN);
+ }
+ assert(
+ LScopes.getAbstractScopesList().size() == NumAbstractSubprograms &&
+ "getOrCreateAbstractScope() inserted an abstract subprogram scope");
}
constructAbstractSubprogramScopeDIE(TheCU, AScope);
}
@@ -2308,6 +2349,7 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
// can be used cross-function)
InfoHolder.getScopeVariables().clear();
InfoHolder.getScopeLabels().clear();
+ LocalDeclsPerLS.clear();
PrevLabel = nullptr;
CurFn = nullptr;
}
@@ -2507,10 +2549,13 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
Asm->emitDwarfLengthOrOffset(TheU->getLength());
// Emit the pubnames for this compilation unit.
- for (const auto &GI : Globals) {
- const char *Name = GI.getKeyData();
- const DIE *Entity = GI.second;
-
+ SmallVector<std::pair<StringRef, const DIE *>, 0> Vec;
+ for (const auto &GI : Globals)
+ Vec.emplace_back(GI.first(), GI.second);
+ llvm::sort(Vec, [](auto &A, auto &B) {
+ return A.second->getOffset() < B.second->getOffset();
+ });
+ for (const auto &[Name, Entity] : Vec) {
Asm->OutStreamer->AddComment("DIE offset");
Asm->emitDwarfLengthOrOffset(Entity->getOffset());
@@ -2523,7 +2568,7 @@ void DwarfDebug::emitDebugPubSection(bool GnuStyle, StringRef Name,
}
Asm->OutStreamer->AddComment("External Name");
- Asm->OutStreamer->emitBytes(StringRef(Name, GI.getKeyLength() + 1));
+ Asm->OutStreamer->emitBytes(StringRef(Name.data(), Name.size() + 1));
}
Asm->OutStreamer->AddComment("End Mark");
@@ -2566,11 +2611,10 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
for (const auto &Op : Expr) {
assert(Op.getCode() != dwarf::DW_OP_const_type &&
"3 operand ops not yet supported");
+ assert(!Op.getSubCode() && "SubOps not yet supported");
Streamer.emitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
Offset++;
- for (unsigned I = 0; I < 2; ++I) {
- if (Op.getDescription().Op[I] == Encoding::SizeNA)
- continue;
+ for (unsigned I = 0; I < Op.getDescription().Op.size(); ++I) {
if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) {
unsigned Length =
Streamer.emitDIERef(*CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die);
@@ -3495,10 +3539,11 @@ template <typename DataT>
void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
AccelTable<DataT> &AppleAccel, StringRef Name,
const DIE &Die) {
- if (getAccelTableKind() == AccelTableKind::None)
+ if (getAccelTableKind() == AccelTableKind::None || Name.empty())
return;
if (getAccelTableKind() != AccelTableKind::Apple &&
+ CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Apple &&
CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Default)
return;
@@ -3555,11 +3600,9 @@ dwarf::Form DwarfDebug::getDwarfSectionOffsetForm() const {
}
const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
- auto I = SectionLabels.find(S);
- if (I == SectionLabels.end())
- return nullptr;
- return I->second;
+ return SectionLabels.lookup(S);
}
+
void DwarfDebug::insertSectionLabel(const MCSymbol *S) {
if (SectionLabels.insert(std::make_pair(&S->getSection(), S)).second)
if (useSplitDwarf() || getDwarfVersion() >= 5)
@@ -3583,3 +3626,13 @@ DwarfDebug::getMD5AsBytes(const DIFile *File) const {
std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.data());
return CKMem;
}
+
+bool DwarfDebug::alwaysUseRanges(const DwarfCompileUnit &CU) const {
+ if (MinimizeAddr == MinimizeAddrInV5::Ranges)
+ return true;
+ if (MinimizeAddr != MinimizeAddrInV5::Default)
+ return false;
+ if (useSplitDwarf())
+ return true;
+ return false;
+}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 5d2ef8ee79a7..1af4b643eb17 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -318,9 +318,14 @@ class DwarfDebug : public DebugHandlerBase {
/// This is a collection of subprogram MDNodes that are processed to
/// create DIEs.
- SetVector<const DISubprogram *, SmallVector<const DISubprogram *, 16>,
- SmallPtrSet<const DISubprogram *, 16>>
- ProcessedSPNodes;
+ SmallSetVector<const DISubprogram *, 16> ProcessedSPNodes;
+
+ /// Map function-local imported entities to their parent local scope
+ /// (either DILexicalBlock or DISubprogram) for a processed function
+ /// (including inlined subprograms).
+ using MDNodeSet = SetVector<const MDNode *, SmallVector<const MDNode *, 2>,
+ SmallPtrSet<const MDNode *, 2>>;
+ DenseMap<const DILocalScope *, MDNodeSet> LocalDeclsPerLS;
/// If nonnull, stores the current machine function we're processing.
const MachineFunction *CurFn = nullptr;
@@ -456,9 +461,6 @@ private:
using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
- void ensureAbstractEntityIsCreated(DwarfCompileUnit &CU,
- const DINode *Node,
- const MDNode *Scope);
void ensureAbstractEntityIsCreatedIfScoped(DwarfCompileUnit &CU,
const DINode *Node,
const MDNode *Scope);
@@ -598,10 +600,6 @@ private:
void finishUnitAttributes(const DICompileUnit *DIUnit,
DwarfCompileUnit &NewCU);
- /// Construct imported_module or imported_declaration DIE.
- void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
- const DIImportedEntity *N);
-
/// Register a source line with debug info. Returns the unique
/// label that was emitted and which provides correspondence to the
/// source line list.
@@ -696,9 +694,7 @@ public:
/// Returns whether range encodings should be used for single entry range
/// lists.
- bool alwaysUseRanges() const {
- return MinimizeAddr == MinimizeAddrInV5::Ranges;
- }
+ bool alwaysUseRanges(const DwarfCompileUnit &) const;
// Returns whether novel exprloc addrx+offset encodings should be used to
// reduce debug_addr size.
@@ -842,6 +838,10 @@ public:
/// If the \p File has an MD5 checksum, return it as an MD5Result
/// allocated in the MCContext.
std::optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
+
+ MDNodeSet &getLocalDeclsForScope(const DILocalScope *S) {
+ return LocalDeclsPerLS[S];
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index ab6967f50e30..7623b7fb7c5d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -117,10 +117,10 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
// Walk up the super-register chain until we find a valid number.
// For example, EAX on x86_64 is a 32-bit fragment of RAX with offset 0.
- for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
- Reg = TRI.getDwarfRegNum(*SR, false);
+ for (MCPhysReg SR : TRI.superregs(MachineReg)) {
+ Reg = TRI.getDwarfRegNum(SR, false);
if (Reg >= 0) {
- unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg);
+ unsigned Idx = TRI.getSubRegIndex(SR, MachineReg);
unsigned Size = TRI.getSubRegIdxSize(Idx);
unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
DwarfRegs.push_back(Register::createRegister(Reg, "super-register"));
@@ -142,11 +142,11 @@ bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
// this doesn't find a combination of subregisters that fully cover
// the register (even though one may exist).
SmallBitVector Coverage(RegSize, false);
- for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
- unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR);
+ for (MCPhysReg SR : TRI.subregs(MachineReg)) {
+ unsigned Idx = TRI.getSubRegIndex(MachineReg, SR);
unsigned Size = TRI.getSubRegIdxSize(Idx);
unsigned Offset = TRI.getSubRegIdxOffset(Idx);
- Reg = TRI.getDwarfRegNum(*SR, false);
+ Reg = TRI.getDwarfRegNum(SR, false);
if (Reg < 0)
continue;
@@ -566,6 +566,12 @@ bool DwarfExpression::addExpression(
case dwarf::DW_OP_dup:
case dwarf::DW_OP_push_object_address:
case dwarf::DW_OP_over:
+ case dwarf::DW_OP_eq:
+ case dwarf::DW_OP_ne:
+ case dwarf::DW_OP_gt:
+ case dwarf::DW_OP_ge:
+ case dwarf::DW_OP_lt:
+ case dwarf::DW_OP_le:
emitOp(OpNum);
break;
case dwarf::DW_OP_deref:
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index 79a6ce7801b7..464f4f048016 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -26,6 +26,7 @@ class DbgEntity;
class DbgVariable;
class DbgLabel;
class DINode;
+class DILocalScope;
class DwarfCompileUnit;
class DwarfUnit;
class LexicalScope;
@@ -87,7 +88,7 @@ class DwarfFile {
DenseMap<LexicalScope *, LabelList> ScopeLabels;
// Collection of abstract subprogram DIEs.
- DenseMap<const MDNode *, DIE *> AbstractSPDies;
+ DenseMap<const DILocalScope *, DIE *> AbstractLocalScopeDIEs;
DenseMap<const DINode *, std::unique_ptr<DbgEntity>> AbstractEntities;
/// Maps MDNodes for type system with the corresponding DIEs. These DIEs can
@@ -162,8 +163,8 @@ public:
return ScopeLabels;
}
- DenseMap<const MDNode *, DIE *> &getAbstractSPDies() {
- return AbstractSPDies;
+ DenseMap<const DILocalScope *, DIE *> &getAbstractScopeDIEs() {
+ return AbstractLocalScopeDIEs;
}
DenseMap<const DINode *, std::unique_ptr<DbgEntity>> &getAbstractEntities() {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index c2ff899c04ab..d30f0ef7af34 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -543,7 +543,7 @@ void DwarfUnit::addAccess(DIE &Die, DINode::DIFlags Flags) {
}
DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
- if (!Context || isa<DIFile>(Context))
+ if (!Context || isa<DIFile>(Context) || isa<DICompileUnit>(Context))
return &getUnitDie();
if (auto *T = dyn_cast<DIType>(Context))
return getOrCreateTypeDIE(T);
@@ -1223,7 +1223,7 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
"decl has a linkage name and it is different");
if (DeclLinkageName.empty() &&
// Always emit it for abstract subprograms.
- (DD->useAllLinkageNames() || DU->getAbstractSPDies().lookup(SP)))
+ (DD->useAllLinkageNames() || DU->getAbstractScopeDIEs().lookup(SP)))
addLinkageName(SPDie, LinkageName);
if (!DeclDie)
@@ -1362,16 +1362,16 @@ void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
DISubrange::BoundType Bound) -> void {
- if (auto *BV = Bound.dyn_cast<DIVariable *>()) {
+ if (auto *BV = dyn_cast_if_present<DIVariable *>(Bound)) {
if (auto *VarDIE = getDIE(BV))
addDIEEntry(DW_Subrange, Attr, *VarDIE);
- } else if (auto *BE = Bound.dyn_cast<DIExpression *>()) {
+ } else if (auto *BE = dyn_cast_if_present<DIExpression *>(Bound)) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
DwarfExpr.setMemoryLocationKind();
DwarfExpr.addExpression(BE);
addBlock(DW_Subrange, Attr, DwarfExpr.finalize());
- } else if (auto *BI = Bound.dyn_cast<ConstantInt *>()) {
+ } else if (auto *BI = dyn_cast_if_present<ConstantInt *>(Bound)) {
if (Attr == dwarf::DW_AT_count) {
if (BI->getSExtValue() != -1)
addUInt(DW_Subrange, Attr, std::nullopt, BI->getSExtValue());
@@ -1401,10 +1401,10 @@ void DwarfUnit::constructGenericSubrangeDIE(DIE &Buffer,
auto AddBoundTypeEntry = [&](dwarf::Attribute Attr,
DIGenericSubrange::BoundType Bound) -> void {
- if (auto *BV = Bound.dyn_cast<DIVariable *>()) {
+ if (auto *BV = dyn_cast_if_present<DIVariable *>(Bound)) {
if (auto *VarDIE = getDIE(BV))
addDIEEntry(DwGenericSubrange, Attr, *VarDIE);
- } else if (auto *BE = Bound.dyn_cast<DIExpression *>()) {
+ } else if (auto *BE = dyn_cast_if_present<DIExpression *>(Bound)) {
if (BE->isConstant() &&
DIExpression::SignedOrUnsignedConstant::SignedConstant ==
*BE->isConstant()) {
@@ -1463,7 +1463,7 @@ static bool hasVectorBeenPadded(const DICompositeType *CTy) {
const auto Subrange = cast<DISubrange>(Elements[0]);
const auto NumVecElements =
Subrange->getCount()
- ? Subrange->getCount().get<ConstantInt *>()->getSExtValue()
+ ? cast<ConstantInt *>(Subrange->getCount())->getSExtValue()
: 0;
// Ensure we found the element count and that the actual size is wide
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 0caa6adbfa62..8f17e94c2d1c 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -245,10 +245,10 @@ public:
DIE *createTypeDIE(const DIScope *Context, DIE &ContextDIE, const DIType *Ty);
/// Find existing DIE or create new DIE for the given type.
- DIE *getOrCreateTypeDIE(const MDNode *TyNode);
+ virtual DIE *getOrCreateTypeDIE(const MDNode *TyNode);
/// Get context owner's DIE.
- DIE *getOrCreateContextDIE(const DIScope *Context);
+ virtual DIE *getOrCreateContextDIE(const DIScope *Context);
/// Construct DIEs for types that contain vtables.
void constructContainingTypeDIEs();
diff --git a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 67e2c0e07095..eef6b1d93f36 100644
--- a/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -410,7 +410,7 @@ MCSymbol *EHStreamer::emitExceptionTable() {
computeActionsTable(LandingPads, Actions, FirstActions);
// Compute the call-site table and call-site ranges. Normally, there is only
- // one call-site-range which covers the whole funciton. With
+ // one call-site-range which covers the whole function. With
// -basic-block-sections, there is one call-site-range per basic block
// section.
SmallVector<CallSiteEntry, 64> CallSites;
diff --git a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
index 3e75b4371033..59c3fa15885e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/PseudoProbePrinter.cpp
@@ -32,11 +32,7 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
SmallVector<InlineSite, 8> ReversedInlineStack;
auto *InlinedAt = DebugLoc ? DebugLoc->getInlinedAt() : nullptr;
while (InlinedAt) {
- const DISubprogram *SP = InlinedAt->getScope()->getSubprogram();
- // Use linkage name for C++ if possible.
- auto Name = SP->getLinkageName();
- if (Name.empty())
- Name = SP->getName();
+ auto Name = InlinedAt->getSubprogramLinkageName();
// Use caching to avoid redundant md5 computation for build speed.
uint64_t &CallerGuid = NameGuidMap[Name];
if (!CallerGuid)
@@ -46,8 +42,15 @@ void PseudoProbeHandler::emitPseudoProbe(uint64_t Guid, uint64_t Index,
ReversedInlineStack.emplace_back(CallerGuid, CallerProbeId);
InlinedAt = InlinedAt->getInlinedAt();
}
-
+ uint64_t Discriminator = 0;
+ // For now only block probes have FS discriminators. See
+ // MIRFSDiscriminator.cpp for more details.
+ if (EnableFSDiscriminator && DebugLoc &&
+ (Type == (uint64_t)PseudoProbeType::Block))
+ Discriminator = DebugLoc->getDiscriminator();
+ assert((EnableFSDiscriminator || Discriminator == 0) &&
+ "Discriminator should not be set in non-FSAFDO mode");
SmallVector<InlineSite, 8> InlineStack(llvm::reverse(ReversedInlineStack));
- Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, InlineStack,
- Asm->CurrentFnSym);
+ Asm->OutStreamer->emitPseudoProbe(Guid, Index, Type, Attr, Discriminator,
+ InlineStack, Asm->CurrentFnSym);
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index 7a800438592c..6d6432b61f2d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -638,7 +638,7 @@ void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
const SEHUnwindMapEntry &UME = FuncInfo.SEHUnwindMap[State];
const MCExpr *FilterOrFinally;
const MCExpr *ExceptOrNull;
- auto *Handler = UME.Handler.get<MachineBasicBlock *>();
+ auto *Handler = cast<MachineBasicBlock *>(UME.Handler);
if (UME.IsFinally) {
FilterOrFinally = create32bitRef(getMCSymbolForMBB(Asm, Handler));
ExceptOrNull = MCConstantExpr::create(0, Ctx);
@@ -762,7 +762,11 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
OS.emitInt32(0);
AddComment("EHFlags");
- OS.emitInt32(1);
+ if (MMI->getModule()->getModuleFlag("eh-asynch")) {
+ OS.emitInt32(0);
+ } else {
+ OS.emitInt32(1);
+ }
// UnwindMapEntry {
// int32_t ToState;
@@ -771,8 +775,8 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
if (UnwindMapXData) {
OS.emitLabel(UnwindMapXData);
for (const CxxUnwindMapEntry &UME : FuncInfo.CxxUnwindMap) {
- MCSymbol *CleanupSym =
- getMCSymbolForMBB(Asm, UME.Cleanup.dyn_cast<MachineBasicBlock *>());
+ MCSymbol *CleanupSym = getMCSymbolForMBB(
+ Asm, dyn_cast_if_present<MachineBasicBlock *>(UME.Cleanup));
AddComment("ToState");
OS.emitInt32(UME.ToState);
@@ -859,8 +863,8 @@ void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) {
FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext);
}
- MCSymbol *HandlerSym =
- getMCSymbolForMBB(Asm, HT.Handler.dyn_cast<MachineBasicBlock *>());
+ MCSymbol *HandlerSym = getMCSymbolForMBB(
+ Asm, dyn_cast_if_present<MachineBasicBlock *>(HT.Handler));
AddComment("Adjectives");
OS.emitInt32(HT.Adjectives);
@@ -1065,7 +1069,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
assert(!FuncInfo.SEHUnwindMap.empty());
for (const SEHUnwindMapEntry &UME : FuncInfo.SEHUnwindMap) {
- auto *Handler = UME.Handler.get<MachineBasicBlock *>();
+ auto *Handler = cast<MachineBasicBlock *>(UME.Handler);
const MCSymbol *ExceptOrFinally =
UME.IsFinally ? getMCSymbolForMBB(Asm, Handler) : Handler->getSymbol();
// -1 is usually the base state for "unwind to caller", but for
@@ -1136,7 +1140,7 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
DenseMap<const MachineBasicBlock *, int> HandlerStates;
for (int State = 0; State < NumStates; ++State) {
MachineBasicBlock *HandlerBlock =
- FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>();
+ cast<MachineBasicBlock *>(FuncInfo.ClrEHUnwindMap[State].Handler);
HandlerStates[HandlerBlock] = State;
// Use this loop through all handlers to verify our assumption (used in
// the MinEnclosingState computation) that enclosing funclets have lower
@@ -1297,7 +1301,7 @@ void WinException::emitCLRExceptionTable(const MachineFunction *MF) {
const MCExpr *ClauseEnd = getOffsetPlusOne(Clause.EndLabel, FuncBeginSym);
const ClrEHUnwindMapEntry &Entry = FuncInfo.ClrEHUnwindMap[Clause.State];
- MachineBasicBlock *HandlerBlock = Entry.Handler.get<MachineBasicBlock *>();
+ MachineBasicBlock *HandlerBlock = cast<MachineBasicBlock *>(Entry.Handler);
MCSymbol *BeginSym = getMCSymbolForMBB(Asm, HandlerBlock);
const MCExpr *HandlerBegin = getOffset(BeginSym, FuncBeginSym);
MCSymbol *EndSym = EndSymbolMap[Clause.State];
diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
index 7098824dbe4b..5ef850d09d92 100644
--- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
+++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -1,4 +1,6 @@
#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
+#include "LiveDebugValues/LiveDebugValues.h"
+#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/PostOrderIterator.h"
@@ -47,6 +49,12 @@ static cl::opt<bool> EnableMemLocFragFill("mem-loc-frag-fill", cl::init(true),
static cl::opt<bool> PrintResults("print-debug-ata", cl::init(false),
cl::Hidden);
+/// Coalesce adjacent dbg locs describing memory locations that have contiguous
+/// fragments. This reduces the cost of LiveDebugValues which does SSA
+/// construction for each explicitly stated variable fragment.
+static cl::opt<cl::boolOrDefault>
+ CoalesceAdjacentFragmentsOpt("debug-ata-coalesce-frags", cl::Hidden);
+
// Implicit conversions are disabled for enum class types, so unfortunately we
// need to create a DenseMapInfo wrapper around the specified underlying type.
template <> struct llvm::DenseMapInfo<VariableID> {
@@ -79,6 +87,8 @@ class FunctionVarLocsBuilder {
SmallVector<VarLocInfo> SingleLocVars;
public:
+ unsigned getNumVariables() const { return Variables.size(); }
+
/// Find or insert \p V and return the ID.
VariableID insertVariable(DebugVariable V) {
return static_cast<VariableID>(Variables.insert(V));
@@ -105,23 +115,23 @@ public:
/// Add a def for a variable that is valid for its lifetime.
void addSingleLocVar(DebugVariable Var, DIExpression *Expr, DebugLoc DL,
- Value *V) {
+ RawLocationWrapper R) {
VarLocInfo VarLoc;
VarLoc.VariableID = insertVariable(Var);
VarLoc.Expr = Expr;
VarLoc.DL = DL;
- VarLoc.V = V;
+ VarLoc.Values = R;
SingleLocVars.emplace_back(VarLoc);
}
/// Add a def to the wedge of defs just before /p Before.
void addVarLoc(Instruction *Before, DebugVariable Var, DIExpression *Expr,
- DebugLoc DL, Value *V) {
+ DebugLoc DL, RawLocationWrapper R) {
VarLocInfo VarLoc;
VarLoc.VariableID = insertVariable(Var);
VarLoc.Expr = Expr;
VarLoc.DL = DL;
- VarLoc.V = V;
+ VarLoc.Values = R;
VarLocsBeforeInst[Before].emplace_back(VarLoc);
}
};
@@ -148,7 +158,11 @@ void FunctionVarLocs::print(raw_ostream &OS, const Function &Fn) const {
auto PrintLoc = [&OS](const VarLocInfo &Loc) {
OS << "DEF Var=[" << (unsigned)Loc.VariableID << "]"
- << " Expr=" << *Loc.Expr << " V=" << *Loc.V << "\n";
+ << " Expr=" << *Loc.Expr << " Values=(";
+ for (auto *Op : Loc.Values.location_ops()) {
+ errs() << Op->getName() << " ";
+ }
+ errs() << ")\n";
};
// Print the single location variables.
@@ -234,13 +248,13 @@ getDerefOffsetInBytes(const DIExpression *DIExpr) {
int64_t Offset = 0;
const unsigned NumElements = DIExpr->getNumElements();
const auto Elements = DIExpr->getElements();
- unsigned NextElement = 0;
+ unsigned ExpectedDerefIdx = 0;
// Extract the offset.
if (NumElements > 2 && Elements[0] == dwarf::DW_OP_plus_uconst) {
Offset = Elements[1];
- NextElement = 2;
+ ExpectedDerefIdx = 2;
} else if (NumElements > 3 && Elements[0] == dwarf::DW_OP_constu) {
- NextElement = 3;
+ ExpectedDerefIdx = 3;
if (Elements[2] == dwarf::DW_OP_plus)
Offset = Elements[1];
else if (Elements[2] == dwarf::DW_OP_minus)
@@ -250,19 +264,21 @@ getDerefOffsetInBytes(const DIExpression *DIExpr) {
}
// If that's all there is it means there's no deref.
- if (NextElement >= NumElements)
+ if (ExpectedDerefIdx >= NumElements)
return std::nullopt;
// Check the next element is DW_OP_deref - otherwise this is too complex or
// isn't a deref expression.
- if (Elements[NextElement] != dwarf::DW_OP_deref)
+ if (Elements[ExpectedDerefIdx] != dwarf::DW_OP_deref)
return std::nullopt;
// Check the final operation is either the DW_OP_deref or is a fragment.
- if (NumElements == NextElement + 1)
+ if (NumElements == ExpectedDerefIdx + 1)
return Offset; // Ends with deref.
- else if (NumElements == NextElement + 3 &&
- Elements[NextElement] == dwarf::DW_OP_LLVM_fragment)
+ unsigned ExpectedFragFirstIdx = ExpectedDerefIdx + 1;
+ unsigned ExpectedFragFinalIdx = ExpectedFragFirstIdx + 2;
+ if (NumElements == ExpectedFragFinalIdx + 1 &&
+ Elements[ExpectedFragFirstIdx] == dwarf::DW_OP_LLVM_fragment)
return Offset; // Ends with deref + fragment.
// Don't bother trying to interpret anything more complex.
@@ -278,6 +294,24 @@ static DebugAggregate getAggregate(const DebugVariable &Var) {
return DebugAggregate(Var.getVariable(), Var.getInlinedAt());
}
+static bool shouldCoalesceFragments(Function &F) {
+ // Enabling fragment coalescing reduces compiler run time when instruction
+ // referencing is enabled. However, it may cause LiveDebugVariables to create
+ // incorrect locations. Since instruction-referencing mode effectively
+ // bypasses LiveDebugVariables we only enable coalescing if the cl::opt flag
+ // has not been explicitly set and instruction-referencing is turned on.
+ switch (CoalesceAdjacentFragmentsOpt) {
+ case cl::boolOrDefault::BOU_UNSET:
+ return debuginfoShouldUseDebugInstrRef(
+ Triple(F.getParent()->getTargetTriple()));
+ case cl::boolOrDefault::BOU_TRUE:
+ return true;
+ case cl::boolOrDefault::BOU_FALSE:
+ return false;
+ }
+ llvm_unreachable("Unknown boolOrDefault value");
+}
+
namespace {
/// In dwarf emission, the following sequence
/// 1. dbg.value ... Fragment(0, 64)
@@ -301,6 +335,7 @@ class MemLocFragmentFill {
Function &Fn;
FunctionVarLocsBuilder *FnVarLocs;
const DenseSet<DebugAggregate> *VarsWithStackSlot;
+ bool CoalesceAdjacentFragments;
// 0 = no memory location.
using BaseAddress = unsigned;
@@ -315,7 +350,7 @@ class MemLocFragmentFill {
/// IDs for memory location base addresses in maps. Use 0 to indicate that
/// there's no memory location.
- UniqueVector<Value *> Bases;
+ UniqueVector<RawLocationWrapper> Bases;
UniqueVector<DebugAggregate> Aggregates;
DenseMap<const BasicBlock *, VarFragMap> LiveIn;
DenseMap<const BasicBlock *, VarFragMap> LiveOut;
@@ -368,7 +403,7 @@ class MemLocFragmentFill {
/// Return a string for the value that \p BaseID represents.
std::string toString(unsigned BaseID) {
if (BaseID)
- return Bases[BaseID]->getName().str();
+ return Bases[BaseID].getVariableLocationOp(0)->getName().str();
else
return "None";
}
@@ -565,6 +600,31 @@ class MemLocFragmentFill {
<< " bits [" << StartBit << ", " << EndBit << ")\n");
}
+ /// Inserts a new dbg def if the interval found when looking up \p StartBit
+ /// in \p FragMap starts before \p StartBit or ends after \p EndBit (which
+ /// indicates - assuming StartBit->EndBit has just been inserted - that the
+ /// slice has been coalesced in the map).
+ void coalesceFragments(BasicBlock &BB, Instruction &Before, unsigned Var,
+ unsigned StartBit, unsigned EndBit, unsigned Base,
+ DebugLoc DL, const FragsInMemMap &FragMap) {
+ if (!CoalesceAdjacentFragments)
+ return;
+ // We've inserted the location into the map. The map will have coalesced
+ // adjacent intervals (variable fragments) that describe the same memory
+ // location. Use this knowledge to insert a debug location that describes
+ // that coalesced fragment. This may eclipse other locs we've just
+ // inserted. This is okay as redundant locs will be cleaned up later.
+ auto CoalescedFrag = FragMap.find(StartBit);
+ // Bail if no coalescing has taken place.
+ if (CoalescedFrag.start() == StartBit && CoalescedFrag.stop() == EndBit)
+ return;
+
+ LLVM_DEBUG(dbgs() << "- Insert loc for bits " << CoalescedFrag.start()
+ << " to " << CoalescedFrag.stop() << "\n");
+ insertMemLoc(BB, Before, Var, CoalescedFrag.start(), CoalescedFrag.stop(),
+ Base, DL);
+ }
+
void addDef(const VarLocInfo &VarLoc, Instruction &Before, BasicBlock &BB,
VarFragMap &LiveSet) {
DebugVariable DbgVar = FnVarLocs->getVariable(VarLoc.VariableID);
@@ -601,7 +661,7 @@ class MemLocFragmentFill {
const auto DerefOffsetInBytes = getDerefOffsetInBytes(DIExpr);
const unsigned Base =
DerefOffsetInBytes && *DerefOffsetInBytes * 8 == StartBit
- ? Bases.insert(VarLoc.V)
+ ? Bases.insert(VarLoc.Values)
: 0;
LLVM_DEBUG(dbgs() << "DEF " << DbgVar.getVariable()->getName() << " ["
<< StartBit << ", " << EndBit << "): " << toString(Base)
@@ -630,6 +690,8 @@ class MemLocFragmentFill {
if (!FragMap.overlaps(StartBit, EndBit)) {
LLVM_DEBUG(dbgs() << "- No overlaps\n");
FragMap.insert(StartBit, EndBit, Base);
+ coalesceFragments(BB, Before, Var, StartBit, EndBit, Base, VarLoc.DL,
+ FragMap);
return;
}
// There is at least one overlap.
@@ -720,6 +782,9 @@ class MemLocFragmentFill {
LLVM_DEBUG(dbgs() << "- Insert DEF into now-empty space\n");
FragMap.insert(StartBit, EndBit, Base);
}
+
+ coalesceFragments(BB, Before, Var, StartBit, EndBit, Base, VarLoc.DL,
+ FragMap);
}
bool skipVariable(const DILocalVariable *V) { return !V->getSizeInBits(); }
@@ -737,8 +802,10 @@ class MemLocFragmentFill {
public:
MemLocFragmentFill(Function &Fn,
- const DenseSet<DebugAggregate> *VarsWithStackSlot)
- : Fn(Fn), VarsWithStackSlot(VarsWithStackSlot) {}
+ const DenseSet<DebugAggregate> *VarsWithStackSlot,
+ bool CoalesceAdjacentFragments)
+ : Fn(Fn), VarsWithStackSlot(VarsWithStackSlot),
+ CoalesceAdjacentFragments(CoalesceAdjacentFragments) {}
/// Add variable locations to \p FnVarLocs so that any bits of a variable
/// with a memory location have that location explicitly reinstated at each
@@ -845,18 +912,20 @@ public:
}
// Insert new location defs.
- for (auto Pair : BBInsertBeforeMap) {
+ for (auto &Pair : BBInsertBeforeMap) {
InsertMap &Map = Pair.second;
- for (auto Pair : Map) {
+ for (auto &Pair : Map) {
Instruction *InsertBefore = Pair.first;
assert(InsertBefore && "should never be null");
auto FragMemLocs = Pair.second;
auto &Ctx = Fn.getContext();
- for (auto FragMemLoc : FragMemLocs) {
+ for (auto &FragMemLoc : FragMemLocs) {
DIExpression *Expr = DIExpression::get(Ctx, std::nullopt);
- Expr = *DIExpression::createFragmentExpression(
- Expr, FragMemLoc.OffsetInBits, FragMemLoc.SizeInBits);
+ if (FragMemLoc.SizeInBits !=
+ *Aggregates[FragMemLoc.Var].first->getSizeInBits())
+ Expr = *DIExpression::createFragmentExpression(
+ Expr, FragMemLoc.OffsetInBits, FragMemLoc.SizeInBits);
Expr = DIExpression::prepend(Expr, DIExpression::DerefAfter,
FragMemLoc.OffsetInBits / 8);
DebugVariable Var(Aggregates[FragMemLoc.Var].first, Expr,
@@ -961,14 +1030,17 @@ public:
}
};
- using AssignmentMap = DenseMap<VariableID, Assignment>;
- using LocMap = DenseMap<VariableID, LocKind>;
- using OverlapMap = DenseMap<VariableID, SmallVector<VariableID, 4>>;
+ using AssignmentMap = SmallVector<Assignment>;
+ using LocMap = SmallVector<LocKind>;
+ using OverlapMap = DenseMap<VariableID, SmallVector<VariableID>>;
using UntaggedStoreAssignmentMap =
DenseMap<const Instruction *,
SmallVector<std::pair<VariableID, at::AssignmentInfo>>>;
private:
+ /// The highest numbered VariableID for partially promoted variables plus 1,
+ /// the values for which start at 1.
+ unsigned TrackedVariablesVectorSize = 0;
/// Map a variable to the set of variables that it fully contains.
OverlapMap VarContains;
/// Map untagged stores to the variable fragments they assign to. Used by
@@ -984,30 +1056,23 @@ private:
void emitDbgValue(LocKind Kind, const DbgVariableIntrinsic *Source,
Instruction *After);
- static bool mapsAreEqual(const AssignmentMap &A, const AssignmentMap &B) {
- if (A.size() != B.size())
- return false;
- for (const auto &Pair : A) {
- VariableID Var = Pair.first;
- const Assignment &AV = Pair.second;
- auto R = B.find(Var);
- // Check if this entry exists in B, otherwise ret false.
- if (R == B.end())
- return false;
- // Check that the assignment value is the same.
- if (!AV.isSameSourceAssignment(R->second))
- return false;
- }
- return true;
+ static bool mapsAreEqual(const BitVector &Mask, const AssignmentMap &A,
+ const AssignmentMap &B) {
+ return llvm::all_of(Mask.set_bits(), [&](unsigned VarID) {
+ return A[VarID].isSameSourceAssignment(B[VarID]);
+ });
}
/// Represents the stack and debug assignments in a block. Used to describe
/// the live-in and live-out values for blocks, as well as the "current"
/// value as we process each instruction in a block.
struct BlockInfo {
- /// Dominating assignment to memory for each variable.
+ /// The set of variables (VariableID) being tracked in this block.
+ BitVector VariableIDsInBlock;
+ /// Dominating assignment to memory for each variable, indexed by
+ /// VariableID.
AssignmentMap StackHomeValue;
- /// Dominating assignemnt to each variable.
+ /// Dominating assignemnt to each variable, indexed by VariableID.
AssignmentMap DebugValue;
/// Location kind for each variable. LiveLoc indicates whether the
/// dominating assignment in StackHomeValue (LocKind::Mem), DebugValue
@@ -1018,20 +1083,138 @@ private:
/// merge of multiple assignments (both are Status::NoneOrPhi). In other
/// words, the memory location may well be valid while both DebugValue and
/// StackHomeValue contain Assignments that have a Status of NoneOrPhi.
+ /// Indexed by VariableID.
LocMap LiveLoc;
+ public:
+ enum AssignmentKind { Stack, Debug };
+ const AssignmentMap &getAssignmentMap(AssignmentKind Kind) const {
+ switch (Kind) {
+ case Stack:
+ return StackHomeValue;
+ case Debug:
+ return DebugValue;
+ }
+ llvm_unreachable("Unknown AssignmentKind");
+ }
+ AssignmentMap &getAssignmentMap(AssignmentKind Kind) {
+ return const_cast<AssignmentMap &>(
+ const_cast<const BlockInfo *>(this)->getAssignmentMap(Kind));
+ }
+
+ bool isVariableTracked(VariableID Var) const {
+ return VariableIDsInBlock[static_cast<unsigned>(Var)];
+ }
+
+ const Assignment &getAssignment(AssignmentKind Kind, VariableID Var) const {
+ assert(isVariableTracked(Var) && "Var not tracked in block");
+ return getAssignmentMap(Kind)[static_cast<unsigned>(Var)];
+ }
+
+ LocKind getLocKind(VariableID Var) const {
+ assert(isVariableTracked(Var) && "Var not tracked in block");
+ return LiveLoc[static_cast<unsigned>(Var)];
+ }
+
+ /// Set LocKind for \p Var only: does not set LocKind for VariableIDs of
+ /// fragments contained win \p Var.
+ void setLocKind(VariableID Var, LocKind K) {
+ VariableIDsInBlock.set(static_cast<unsigned>(Var));
+ LiveLoc[static_cast<unsigned>(Var)] = K;
+ }
+
+ /// Set the assignment in the \p Kind assignment map for \p Var only: does
+ /// not set the assignment for VariableIDs of fragments contained win \p
+ /// Var.
+ void setAssignment(AssignmentKind Kind, VariableID Var,
+ const Assignment &AV) {
+ VariableIDsInBlock.set(static_cast<unsigned>(Var));
+ getAssignmentMap(Kind)[static_cast<unsigned>(Var)] = AV;
+ }
+
+ /// Return true if there is an assignment matching \p AV in the \p Kind
+ /// assignment map. Does consider assignments for VariableIDs of fragments
+ /// contained win \p Var.
+ bool hasAssignment(AssignmentKind Kind, VariableID Var,
+ const Assignment &AV) const {
+ if (!isVariableTracked(Var))
+ return false;
+ return AV.isSameSourceAssignment(getAssignment(Kind, Var));
+ }
+
/// Compare every element in each map to determine structural equality
/// (slow).
bool operator==(const BlockInfo &Other) const {
- return LiveLoc == Other.LiveLoc &&
- mapsAreEqual(StackHomeValue, Other.StackHomeValue) &&
- mapsAreEqual(DebugValue, Other.DebugValue);
+ return VariableIDsInBlock == Other.VariableIDsInBlock &&
+ LiveLoc == Other.LiveLoc &&
+ mapsAreEqual(VariableIDsInBlock, StackHomeValue,
+ Other.StackHomeValue) &&
+ mapsAreEqual(VariableIDsInBlock, DebugValue, Other.DebugValue);
}
bool operator!=(const BlockInfo &Other) const { return !(*this == Other); }
bool isValid() {
return LiveLoc.size() == DebugValue.size() &&
LiveLoc.size() == StackHomeValue.size();
}
+
+ /// Clear everything and initialise with ⊤-values for all variables.
+ void init(int NumVars) {
+ StackHomeValue.clear();
+ DebugValue.clear();
+ LiveLoc.clear();
+ VariableIDsInBlock = BitVector(NumVars);
+ StackHomeValue.insert(StackHomeValue.begin(), NumVars,
+ Assignment::makeNoneOrPhi());
+ DebugValue.insert(DebugValue.begin(), NumVars,
+ Assignment::makeNoneOrPhi());
+ LiveLoc.insert(LiveLoc.begin(), NumVars, LocKind::None);
+ }
+
+ /// Helper for join.
+ template <typename ElmtType, typename FnInputType>
+ static void joinElmt(int Index, SmallVector<ElmtType> &Target,
+ const SmallVector<ElmtType> &A,
+ const SmallVector<ElmtType> &B,
+ ElmtType (*Fn)(FnInputType, FnInputType)) {
+ Target[Index] = Fn(A[Index], B[Index]);
+ }
+
+ /// See comment for AssignmentTrackingLowering::joinBlockInfo.
+ static BlockInfo join(const BlockInfo &A, const BlockInfo &B, int NumVars) {
+ // Join A and B.
+ //
+ // Intersect = join(a, b) for a in A, b in B where Var(a) == Var(b)
+ // Difference = join(x, ⊤) for x where Var(x) is in A xor B
+ // Join = Intersect ∪ Difference
+ //
+ // This is achieved by performing a join on elements from A and B with
+ // variables common to both A and B (join elements indexed by var
+ // intersect), then adding ⊤-value elements for vars in A xor B. The
+ // latter part is equivalent to performing join on elements with variables
+ // in A xor B with the ⊤-value for the map element since join(x, ⊤) = ⊤.
+ // BlockInfo::init initializes all variable entries to the ⊤ value so we
+ // don't need to explicitly perform that step as Join.VariableIDsInBlock
+ // is set to the union of the variables in A and B at the end of this
+ // function.
+ BlockInfo Join;
+ Join.init(NumVars);
+
+ BitVector Intersect = A.VariableIDsInBlock;
+ Intersect &= B.VariableIDsInBlock;
+
+ for (auto VarID : Intersect.set_bits()) {
+ joinElmt(VarID, Join.LiveLoc, A.LiveLoc, B.LiveLoc, joinKind);
+ joinElmt(VarID, Join.DebugValue, A.DebugValue, B.DebugValue,
+ joinAssignment);
+ joinElmt(VarID, Join.StackHomeValue, A.StackHomeValue, B.StackHomeValue,
+ joinAssignment);
+ }
+
+ Join.VariableIDsInBlock = A.VariableIDsInBlock;
+ Join.VariableIDsInBlock |= B.VariableIDsInBlock;
+ assert(Join.isValid());
+ return Join;
+ }
};
Function &Fn;
@@ -1076,11 +1259,8 @@ private:
/// (⊤) in this case (unknown location / assignment).
///@{
static LocKind joinKind(LocKind A, LocKind B);
- static LocMap joinLocMap(const LocMap &A, const LocMap &B);
static Assignment joinAssignment(const Assignment &A, const Assignment &B);
- static AssignmentMap joinAssignmentMap(const AssignmentMap &A,
- const AssignmentMap &B);
- static BlockInfo joinBlockInfo(const BlockInfo &A, const BlockInfo &B);
+ BlockInfo joinBlockInfo(const BlockInfo &A, const BlockInfo &B);
///@}
/// Process the instructions in \p BB updating \p LiveSet along the way. \p
@@ -1092,7 +1272,7 @@ private:
/// location information).
///@{
void processNonDbgInstruction(Instruction &I, BlockInfo *LiveSet);
- void processDbgInstruction(Instruction &I, BlockInfo *LiveSet);
+ void processDbgInstruction(DbgInfoIntrinsic &I, BlockInfo *LiveSet);
/// Update \p LiveSet after encountering an instruction with a DIAssignID
/// attachment, \p I.
void processTaggedInstruction(Instruction &I, BlockInfo *LiveSet);
@@ -1113,8 +1293,15 @@ private:
/// have been called for \p Var first.
LocKind getLocKind(BlockInfo *LiveSet, VariableID Var);
/// Return true if \p Var has an assignment in \p M matching \p AV.
- bool hasVarWithAssignment(VariableID Var, const Assignment &AV,
- const AssignmentMap &M);
+ bool hasVarWithAssignment(BlockInfo *LiveSet, BlockInfo::AssignmentKind Kind,
+ VariableID Var, const Assignment &AV);
+ /// Return the set of VariableIDs corresponding the fragments contained fully
+ /// within the variable/fragment \p Var.
+ ArrayRef<VariableID> getContainedFragments(VariableID Var) const;
+
+ /// Mark \p Var as having been touched this frame. Note, this applies only
+ /// to the exact fragment \p Var and not to any fragments contained within.
+ void touchFragment(VariableID Var);
/// Emit info for variables that are fully promoted.
bool emitPromotedVarLocs(FunctionVarLocsBuilder *FnVarLocs);
@@ -1129,66 +1316,60 @@ public:
};
} // namespace
+ArrayRef<VariableID>
+AssignmentTrackingLowering::getContainedFragments(VariableID Var) const {
+ auto R = VarContains.find(Var);
+ if (R == VarContains.end())
+ return std::nullopt;
+ return R->second;
+}
+
+void AssignmentTrackingLowering::touchFragment(VariableID Var) {
+ VarsTouchedThisFrame.insert(Var);
+}
+
void AssignmentTrackingLowering::setLocKind(BlockInfo *LiveSet, VariableID Var,
LocKind K) {
auto SetKind = [this](BlockInfo *LiveSet, VariableID Var, LocKind K) {
- VarsTouchedThisFrame.insert(Var);
- LiveSet->LiveLoc[Var] = K;
+ LiveSet->setLocKind(Var, K);
+ touchFragment(Var);
};
SetKind(LiveSet, Var, K);
// Update the LocKind for all fragments contained within Var.
- for (VariableID Frag : VarContains[Var])
+ for (VariableID Frag : getContainedFragments(Var))
SetKind(LiveSet, Frag, K);
}
AssignmentTrackingLowering::LocKind
AssignmentTrackingLowering::getLocKind(BlockInfo *LiveSet, VariableID Var) {
- auto Pair = LiveSet->LiveLoc.find(Var);
- assert(Pair != LiveSet->LiveLoc.end());
- return Pair->second;
+ return LiveSet->getLocKind(Var);
}
void AssignmentTrackingLowering::addMemDef(BlockInfo *LiveSet, VariableID Var,
const Assignment &AV) {
- auto AddDef = [](BlockInfo *LiveSet, VariableID Var, Assignment AV) {
- LiveSet->StackHomeValue[Var] = AV;
- // Add default (Var -> ⊤) to DebugValue if Var isn't in DebugValue yet.
- LiveSet->DebugValue.insert({Var, Assignment::makeNoneOrPhi()});
- // Add default (Var -> ⊤) to LiveLocs if Var isn't in LiveLocs yet. Callers
- // of addMemDef will call setLocKind to override.
- LiveSet->LiveLoc.insert({Var, LocKind::None});
- };
- AddDef(LiveSet, Var, AV);
+ LiveSet->setAssignment(BlockInfo::Stack, Var, AV);
// Use this assigment for all fragments contained within Var, but do not
// provide a Source because we cannot convert Var's value to a value for the
// fragment.
Assignment FragAV = AV;
FragAV.Source = nullptr;
- for (VariableID Frag : VarContains[Var])
- AddDef(LiveSet, Frag, FragAV);
+ for (VariableID Frag : getContainedFragments(Var))
+ LiveSet->setAssignment(BlockInfo::Stack, Frag, FragAV);
}
void AssignmentTrackingLowering::addDbgDef(BlockInfo *LiveSet, VariableID Var,
const Assignment &AV) {
- auto AddDef = [](BlockInfo *LiveSet, VariableID Var, Assignment AV) {
- LiveSet->DebugValue[Var] = AV;
- // Add default (Var -> ⊤) to StackHome if Var isn't in StackHome yet.
- LiveSet->StackHomeValue.insert({Var, Assignment::makeNoneOrPhi()});
- // Add default (Var -> ⊤) to LiveLocs if Var isn't in LiveLocs yet. Callers
- // of addDbgDef will call setLocKind to override.
- LiveSet->LiveLoc.insert({Var, LocKind::None});
- };
- AddDef(LiveSet, Var, AV);
+ LiveSet->setAssignment(BlockInfo::Debug, Var, AV);
// Use this assigment for all fragments contained within Var, but do not
// provide a Source because we cannot convert Var's value to a value for the
// fragment.
Assignment FragAV = AV;
FragAV.Source = nullptr;
- for (VariableID Frag : VarContains[Var])
- AddDef(LiveSet, Frag, FragAV);
+ for (VariableID Frag : getContainedFragments(Var))
+ LiveSet->setAssignment(BlockInfo::Debug, Frag, FragAV);
}
static DIAssignID *getIDFromInst(const Instruction &I) {
@@ -1200,24 +1381,16 @@ static DIAssignID *getIDFromMarker(const DbgAssignIntrinsic &DAI) {
}
/// Return true if \p Var has an assignment in \p M matching \p AV.
-bool AssignmentTrackingLowering::hasVarWithAssignment(VariableID Var,
- const Assignment &AV,
- const AssignmentMap &M) {
- auto AssignmentIsMapped = [](VariableID Var, const Assignment &AV,
- const AssignmentMap &M) {
- auto R = M.find(Var);
- if (R == M.end())
- return false;
- return AV.isSameSourceAssignment(R->second);
- };
-
- if (!AssignmentIsMapped(Var, AV, M))
+bool AssignmentTrackingLowering::hasVarWithAssignment(
+ BlockInfo *LiveSet, BlockInfo::AssignmentKind Kind, VariableID Var,
+ const Assignment &AV) {
+ if (!LiveSet->hasAssignment(Kind, Var, AV))
return false;
// Check all the frags contained within Var as these will have all been
// mapped to AV at the last store to Var.
- for (VariableID Frag : VarContains[Var])
- if (!AssignmentIsMapped(Frag, AV, M))
+ for (VariableID Frag : getContainedFragments(Var))
+ if (!LiveSet->hasAssignment(Kind, Frag, AV))
return false;
return true;
}
@@ -1242,10 +1415,11 @@ void AssignmentTrackingLowering::emitDbgValue(
const DbgVariableIntrinsic *Source, Instruction *After) {
DILocation *DL = Source->getDebugLoc();
- auto Emit = [this, Source, After, DL](Value *Val, DIExpression *Expr) {
+ auto Emit = [this, Source, After, DL](Metadata *Val, DIExpression *Expr) {
assert(Expr);
if (!Val)
- Val = PoisonValue::get(Type::getInt1Ty(Source->getContext()));
+ Val = ValueAsMetadata::get(
+ PoisonValue::get(Type::getInt1Ty(Source->getContext())));
// Find a suitable insert point.
Instruction *InsertBefore = After->getNextNode();
@@ -1255,7 +1429,7 @@ void AssignmentTrackingLowering::emitDbgValue(
VarLocInfo VarLoc;
VarLoc.VariableID = static_cast<VariableID>(Var);
VarLoc.Expr = Expr;
- VarLoc.V = Val;
+ VarLoc.Values = RawLocationWrapper(Val);
VarLoc.DL = DL;
// Insert it into the map for later.
InsertBeforeMap[InsertBefore].push_back(VarLoc);
@@ -1284,16 +1458,13 @@ void AssignmentTrackingLowering::emitDbgValue(
// The address-expression has an implicit deref, add it now.
std::tie(Val, Expr) =
walkToAllocaAndPrependOffsetDeref(Layout, Val, Expr);
- Emit(Val, Expr);
+ Emit(ValueAsMetadata::get(Val), Expr);
return;
}
}
if (Kind == LocKind::Val) {
- /// Get the value component, converting to Undef if it is variadic.
- Value *Val =
- Source->hasArgList() ? nullptr : Source->getVariableLocationOp(0);
- Emit(Val, Source->getExpression());
+ Emit(Source->getRawLocation(), Source->getExpression());
return;
}
@@ -1371,7 +1542,8 @@ void AssignmentTrackingLowering::processUntaggedInstruction(
VarLocInfo VarLoc;
VarLoc.VariableID = static_cast<VariableID>(Var);
VarLoc.Expr = DIE;
- VarLoc.V = const_cast<AllocaInst *>(Info.Base);
+ VarLoc.Values = RawLocationWrapper(
+ ValueAsMetadata::get(const_cast<AllocaInst *>(Info.Base)));
VarLoc.DL = DILoc;
// 3. Insert it into the map for later.
InsertBeforeMap[InsertBefore].push_back(VarLoc);
@@ -1405,13 +1577,14 @@ void AssignmentTrackingLowering::processTaggedInstruction(
// The last assignment to the stack is now AV. Check if the last debug
// assignment has a matching Assignment.
- if (hasVarWithAssignment(Var, AV, LiveSet->DebugValue)) {
+ if (hasVarWithAssignment(LiveSet, BlockInfo::Debug, Var, AV)) {
// The StackHomeValue and DebugValue for this variable match so we can
// emit a stack home location here.
LLVM_DEBUG(dbgs() << "Mem, Stack matches Debug program\n";);
LLVM_DEBUG(dbgs() << " Stack val: "; AV.dump(dbgs()); dbgs() << "\n");
LLVM_DEBUG(dbgs() << " Debug val: ";
- LiveSet->DebugValue[Var].dump(dbgs()); dbgs() << "\n");
+ LiveSet->DebugValue[static_cast<unsigned>(Var)].dump(dbgs());
+ dbgs() << "\n");
setLocKind(LiveSet, Var, LocKind::Mem);
emitDbgValue(LocKind::Mem, DAI, &I);
continue;
@@ -1434,7 +1607,8 @@ void AssignmentTrackingLowering::processTaggedInstruction(
// There's been an assignment to memory that we were using as a
// location for this variable, and the Assignment doesn't match what
// we'd expect to see in memory.
- if (LiveSet->DebugValue[Var].Status == Assignment::NoneOrPhi) {
+ Assignment DbgAV = LiveSet->getAssignment(BlockInfo::Debug, Var);
+ if (DbgAV.Status == Assignment::NoneOrPhi) {
// We need to terminate any previously open location now.
LLVM_DEBUG(dbgs() << "None, No Debug value available\n";);
setLocKind(LiveSet, Var, LocKind::None);
@@ -1443,9 +1617,8 @@ void AssignmentTrackingLowering::processTaggedInstruction(
// The previous DebugValue Value can be used here.
LLVM_DEBUG(dbgs() << "Val, Debug value is Known\n";);
setLocKind(LiveSet, Var, LocKind::Val);
- Assignment PrevAV = LiveSet->DebugValue.lookup(Var);
- if (PrevAV.Source) {
- emitDbgValue(LocKind::Val, PrevAV.Source, &I);
+ if (DbgAV.Source) {
+ emitDbgValue(LocKind::Val, DbgAV.Source, &I);
} else {
// PrevAV.Source is nullptr so we must emit undef here.
emitDbgValue(LocKind::None, DAI, &I);
@@ -1479,7 +1652,7 @@ void AssignmentTrackingLowering::processDbgAssign(DbgAssignIntrinsic &DAI,
// Check if the DebugValue and StackHomeValue both hold the same
// Assignment.
- if (hasVarWithAssignment(Var, AV, LiveSet->StackHomeValue)) {
+ if (hasVarWithAssignment(LiveSet, BlockInfo::Stack, Var, AV)) {
// They match. We can use the stack home because the debug intrinsics state
// that an assignment happened here, and we know that specific assignment
// was the last one to take place in memory for this variable.
@@ -1529,9 +1702,22 @@ void AssignmentTrackingLowering::processDbgValue(DbgValueInst &DVI,
emitDbgValue(LocKind::Val, &DVI, &DVI);
}
+static bool hasZeroSizedFragment(DbgVariableIntrinsic &DVI) {
+ if (auto F = DVI.getExpression()->getFragmentInfo())
+ return F->SizeInBits == 0;
+ return false;
+}
+
void AssignmentTrackingLowering::processDbgInstruction(
- Instruction &I, AssignmentTrackingLowering::BlockInfo *LiveSet) {
- assert(!isa<DbgAddrIntrinsic>(&I) && "unexpected dbg.addr");
+ DbgInfoIntrinsic &I, AssignmentTrackingLowering::BlockInfo *LiveSet) {
+ auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I);
+ if (!DVI)
+ return;
+
+ // Ignore assignments to zero bits of the variable.
+ if (hasZeroSizedFragment(*DVI))
+ return;
+
if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(&I))
processDbgAssign(*DAI, LiveSet);
else if (auto *DVI = dyn_cast<DbgValueInst>(&I))
@@ -1561,10 +1747,11 @@ void AssignmentTrackingLowering::process(BasicBlock &BB, BlockInfo *LiveSet) {
++II;
}
while (II != EI) {
- if (!isa<DbgInfoIntrinsic>(&*II))
+ auto *Dbg = dyn_cast<DbgInfoIntrinsic>(&*II);
+ if (!Dbg)
break;
resetInsertionPoint(*II);
- processDbgInstruction(*II, LiveSet);
+ processDbgInstruction(*Dbg, LiveSet);
assert(LiveSet->isValid());
++II;
}
@@ -1597,54 +1784,6 @@ AssignmentTrackingLowering::joinKind(LocKind A, LocKind B) {
return A == B ? A : LocKind::None;
}
-AssignmentTrackingLowering::LocMap
-AssignmentTrackingLowering::joinLocMap(const LocMap &A, const LocMap &B) {
- // Join A and B.
- //
- // U = join(a, b) for a in A, b in B where Var(a) == Var(b)
- // D = join(x, ⊤) for x where Var(x) is in A xor B
- // Join = U ∪ D
- //
- // This is achieved by performing a join on elements from A and B with
- // variables common to both A and B (join elements indexed by var intersect),
- // then adding LocKind::None elements for vars in A xor B. The latter part is
- // equivalent to performing join on elements with variables in A xor B with
- // LocKind::None (⊤) since join(x, ⊤) = ⊤.
- LocMap Join;
- SmallVector<VariableID, 16> SymmetricDifference;
- // Insert the join of the elements with common vars into Join. Add the
- // remaining elements to into SymmetricDifference.
- for (const auto &[Var, Loc] : A) {
- // If this Var doesn't exist in B then add it to the symmetric difference
- // set.
- auto R = B.find(Var);
- if (R == B.end()) {
- SymmetricDifference.push_back(Var);
- continue;
- }
- // There is an entry for Var in both, join it.
- Join[Var] = joinKind(Loc, R->second);
- }
- unsigned IntersectSize = Join.size();
- (void)IntersectSize;
-
- // Add the elements in B with variables that are not in A into
- // SymmetricDifference.
- for (const auto &Pair : B) {
- VariableID Var = Pair.first;
- if (A.count(Var) == 0)
- SymmetricDifference.push_back(Var);
- }
-
- // Add SymmetricDifference elements to Join and return the result.
- for (const auto &Var : SymmetricDifference)
- Join.insert({Var, LocKind::None});
-
- assert(Join.size() == (IntersectSize + SymmetricDifference.size()));
- assert(Join.size() >= A.size() && Join.size() >= B.size());
- return Join;
-}
-
AssignmentTrackingLowering::Assignment
AssignmentTrackingLowering::joinAssignment(const Assignment &A,
const Assignment &B) {
@@ -1687,107 +1826,80 @@ AssignmentTrackingLowering::joinAssignment(const Assignment &A,
return Assignment::make(A.ID, Source);
}
-AssignmentTrackingLowering::AssignmentMap
-AssignmentTrackingLowering::joinAssignmentMap(const AssignmentMap &A,
- const AssignmentMap &B) {
- // Join A and B.
- //
- // U = join(a, b) for a in A, b in B where Var(a) == Var(b)
- // D = join(x, ⊤) for x where Var(x) is in A xor B
- // Join = U ∪ D
- //
- // This is achieved by performing a join on elements from A and B with
- // variables common to both A and B (join elements indexed by var intersect),
- // then adding LocKind::None elements for vars in A xor B. The latter part is
- // equivalent to performing join on elements with variables in A xor B with
- // Status::NoneOrPhi (⊤) since join(x, ⊤) = ⊤.
- AssignmentMap Join;
- SmallVector<VariableID, 16> SymmetricDifference;
- // Insert the join of the elements with common vars into Join. Add the
- // remaining elements to into SymmetricDifference.
- for (const auto &[Var, AV] : A) {
- // If this Var doesn't exist in B then add it to the symmetric difference
- // set.
- auto R = B.find(Var);
- if (R == B.end()) {
- SymmetricDifference.push_back(Var);
- continue;
- }
- // There is an entry for Var in both, join it.
- Join[Var] = joinAssignment(AV, R->second);
- }
- unsigned IntersectSize = Join.size();
- (void)IntersectSize;
-
- // Add the elements in B with variables that are not in A into
- // SymmetricDifference.
- for (const auto &Pair : B) {
- VariableID Var = Pair.first;
- if (A.count(Var) == 0)
- SymmetricDifference.push_back(Var);
- }
-
- // Add SymmetricDifference elements to Join and return the result.
- for (auto Var : SymmetricDifference)
- Join.insert({Var, Assignment::makeNoneOrPhi()});
-
- assert(Join.size() == (IntersectSize + SymmetricDifference.size()));
- assert(Join.size() >= A.size() && Join.size() >= B.size());
- return Join;
-}
-
AssignmentTrackingLowering::BlockInfo
AssignmentTrackingLowering::joinBlockInfo(const BlockInfo &A,
const BlockInfo &B) {
- BlockInfo Join;
- Join.LiveLoc = joinLocMap(A.LiveLoc, B.LiveLoc);
- Join.StackHomeValue = joinAssignmentMap(A.StackHomeValue, B.StackHomeValue);
- Join.DebugValue = joinAssignmentMap(A.DebugValue, B.DebugValue);
- assert(Join.isValid());
- return Join;
+ return BlockInfo::join(A, B, TrackedVariablesVectorSize);
}
bool AssignmentTrackingLowering::join(
const BasicBlock &BB, const SmallPtrSet<BasicBlock *, 16> &Visited) {
- BlockInfo BBLiveIn;
- bool FirstJoin = true;
- // LiveIn locs for BB is the join of the already-processed preds' LiveOut
- // locs.
+
+ SmallVector<const BasicBlock *> VisitedPreds;
+ // Ignore backedges if we have not visited the predecessor yet. As the
+ // predecessor hasn't yet had locations propagated into it, most locations
+ // will not yet be valid, so treat them as all being uninitialized and
+ // potentially valid. If a location guessed to be correct here is
+ // invalidated later, we will remove it when we revisit this block. This
+ // is essentially the same as initialising all LocKinds and Assignments to
+ // an implicit ⊥ value which is the identity value for the join operation.
for (auto I = pred_begin(&BB), E = pred_end(&BB); I != E; I++) {
- // Ignore backedges if we have not visited the predecessor yet. As the
- // predecessor hasn't yet had locations propagated into it, most locations
- // will not yet be valid, so treat them as all being uninitialized and
- // potentially valid. If a location guessed to be correct here is
- // invalidated later, we will remove it when we revisit this block. This
- // is essentially the same as initialising all LocKinds and Assignments to
- // an implicit ⊥ value which is the identity value for the join operation.
const BasicBlock *Pred = *I;
- if (!Visited.count(Pred))
- continue;
+ if (Visited.count(Pred))
+ VisitedPreds.push_back(Pred);
+ }
+
+ // No preds visited yet.
+ if (VisitedPreds.empty()) {
+ auto It = LiveIn.try_emplace(&BB, BlockInfo());
+ bool DidInsert = It.second;
+ if (DidInsert)
+ It.first->second.init(TrackedVariablesVectorSize);
+ return /*Changed*/ DidInsert;
+ }
- auto PredLiveOut = LiveOut.find(Pred);
- // Pred must have been processed already. See comment at start of this loop.
- assert(PredLiveOut != LiveOut.end());
+ // Exactly one visited pred. Copy the LiveOut from that pred into BB LiveIn.
+ if (VisitedPreds.size() == 1) {
+ const BlockInfo &PredLiveOut = LiveOut.find(VisitedPreds[0])->second;
+ auto CurrentLiveInEntry = LiveIn.find(&BB);
- // Perform the join of BBLiveIn (current live-in info) and PrevLiveOut.
- if (FirstJoin)
- BBLiveIn = PredLiveOut->second;
+ // Check if there isn't an entry, or there is but the LiveIn set has
+ // changed (expensive check).
+ if (CurrentLiveInEntry == LiveIn.end())
+ LiveIn.insert(std::make_pair(&BB, PredLiveOut));
+ else if (PredLiveOut != CurrentLiveInEntry->second)
+ CurrentLiveInEntry->second = PredLiveOut;
else
- BBLiveIn = joinBlockInfo(std::move(BBLiveIn), PredLiveOut->second);
- FirstJoin = false;
+ return /*Changed*/ false;
+ return /*Changed*/ true;
+ }
+
+ // More than one pred. Join LiveOuts of blocks 1 and 2.
+ assert(VisitedPreds.size() > 1);
+ const BlockInfo &PredLiveOut0 = LiveOut.find(VisitedPreds[0])->second;
+ const BlockInfo &PredLiveOut1 = LiveOut.find(VisitedPreds[1])->second;
+ BlockInfo BBLiveIn = joinBlockInfo(PredLiveOut0, PredLiveOut1);
+
+ // Join the LiveOuts of subsequent blocks.
+ ArrayRef Tail = ArrayRef(VisitedPreds).drop_front(2);
+ for (const BasicBlock *Pred : Tail) {
+ const auto &PredLiveOut = LiveOut.find(Pred);
+ assert(PredLiveOut != LiveOut.end() &&
+ "block should have been processed already");
+ BBLiveIn = joinBlockInfo(std::move(BBLiveIn), PredLiveOut->second);
}
+ // Save the joined result for BB.
auto CurrentLiveInEntry = LiveIn.find(&BB);
// Check if there isn't an entry, or there is but the LiveIn set has changed
// (expensive check).
- if (CurrentLiveInEntry == LiveIn.end() ||
- BBLiveIn != CurrentLiveInEntry->second) {
- LiveIn[&BB] = std::move(BBLiveIn);
- // A change has occured.
- return true;
- }
- // No change.
- return false;
+ if (CurrentLiveInEntry == LiveIn.end())
+ LiveIn.try_emplace(&BB, std::move(BBLiveIn));
+ else if (BBLiveIn != CurrentLiveInEntry->second)
+ CurrentLiveInEntry->second = std::move(BBLiveIn);
+ else
+ return /*Changed*/ false;
+ return /*Changed*/ true;
}
/// Return true if A fully contains B.
@@ -1823,7 +1935,13 @@ getUntaggedStoreAssignmentInfo(const Instruction &I, const DataLayout &Layout) {
/// y does not contain all overlaps because partial overlaps are excluded.
///
/// While we're iterating over the function, add single location defs for
-/// dbg.declares to \p FnVarLocs
+/// dbg.declares to \p FnVarLocs.
+///
+/// Variables that are interesting to this pass in are added to
+/// FnVarLocs->Variables first. TrackedVariablesVectorSize is set to the ID of
+/// the last interesting variable plus 1, meaning variables with ID 1
+/// (inclusive) to TrackedVariablesVectorSize (exclusive) are interesting. The
+/// subsequent variables are either stack homed or fully promoted.
///
/// Finally, populate UntaggedStoreVars with a mapping of untagged stores to
/// the stored-to variable fragments.
@@ -1832,7 +1950,9 @@ getUntaggedStoreAssignmentInfo(const Instruction &I, const DataLayout &Layout) {
/// to iterate over the function as they can be achieved together in one pass.
static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
Function &Fn, FunctionVarLocsBuilder *FnVarLocs,
- AssignmentTrackingLowering::UntaggedStoreAssignmentMap &UntaggedStoreVars) {
+ const DenseSet<DebugAggregate> &VarsWithStackSlot,
+ AssignmentTrackingLowering::UntaggedStoreAssignmentMap &UntaggedStoreVars,
+ unsigned &TrackedVariablesVectorSize) {
DenseSet<DebugVariable> Seen;
// Map of Variable: [Fragments].
DenseMap<DebugAggregate, SmallVector<DebugVariable, 8>> FragmentMap;
@@ -1843,14 +1963,16 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
// UntaggedStoreVars.
// We need to add fragments for untagged stores too so that we can correctly
// clobber overlapped fragment locations later.
+ SmallVector<DbgDeclareInst *> Declares;
for (auto &BB : Fn) {
for (auto &I : BB) {
if (auto *DDI = dyn_cast<DbgDeclareInst>(&I)) {
- FnVarLocs->addSingleLocVar(DebugVariable(DDI), DDI->getExpression(),
- DDI->getDebugLoc(), DDI->getAddress());
+ Declares.push_back(DDI);
} else if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) {
DebugVariable DV = DebugVariable(DII);
DebugAggregate DA = {DV.getVariable(), DV.getInlinedAt()};
+ if (!VarsWithStackSlot.contains(DA))
+ continue;
if (Seen.insert(DV).second)
FragmentMap[DA].push_back(DV);
} else if (auto Info = getUntaggedStoreAssignmentInfo(
@@ -1875,6 +1997,8 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
DebugVariable DV = DebugVariable(DAI->getVariable(), FragInfo,
DAI->getDebugLoc().getInlinedAt());
DebugAggregate DA = {DV.getVariable(), DV.getInlinedAt()};
+ if (!VarsWithStackSlot.contains(DA))
+ continue;
// Cache this info for later.
UntaggedStoreVars[&I].push_back(
@@ -1887,21 +2011,22 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
}
}
- // Sort the fragment map for each DebugAggregate in non-descending
- // order of fragment size. Assert no entries are duplicates.
+ // Sort the fragment map for each DebugAggregate in ascending
+ // order of fragment size - there should be no duplicates.
for (auto &Pair : FragmentMap) {
SmallVector<DebugVariable, 8> &Frags = Pair.second;
- std::sort(
- Frags.begin(), Frags.end(), [](DebugVariable Next, DebugVariable Elmt) {
- assert(!(Elmt.getFragmentOrDefault() == Next.getFragmentOrDefault()));
- return Elmt.getFragmentOrDefault().SizeInBits >
- Next.getFragmentOrDefault().SizeInBits;
- });
+ std::sort(Frags.begin(), Frags.end(),
+ [](const DebugVariable &Next, const DebugVariable &Elmt) {
+ return Elmt.getFragmentOrDefault().SizeInBits >
+ Next.getFragmentOrDefault().SizeInBits;
+ });
+ // Check for duplicates.
+ assert(std::adjacent_find(Frags.begin(), Frags.end()) == Frags.end());
}
// Build the map.
AssignmentTrackingLowering::OverlapMap Map;
- for (auto Pair : FragmentMap) {
+ for (auto &Pair : FragmentMap) {
auto &Frags = Pair.second;
for (auto It = Frags.begin(), IEnd = Frags.end(); It != IEnd; ++It) {
DIExpression::FragmentInfo Frag = It->getFragmentOrDefault();
@@ -1922,6 +2047,15 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares(
}
}
+ // VariableIDs are 1-based so the variable-tracking bitvector needs
+ // NumVariables plus 1 bits.
+ TrackedVariablesVectorSize = FnVarLocs->getNumVariables() + 1;
+
+ // Finally, insert the declares afterwards, so the first IDs are all
+ // partially stack homed vars.
+ for (auto *DDI : Declares)
+ FnVarLocs->addSingleLocVar(DebugVariable(DDI), DDI->getExpression(),
+ DDI->getDebugLoc(), DDI->getWrappedLocation());
return Map;
}
@@ -1942,8 +2076,9 @@ bool AssignmentTrackingLowering::run(FunctionVarLocsBuilder *FnVarLocsBuilder) {
// Note that this pass doesn't handle partial overlaps correctly (FWIW
// neither does LiveDebugVariables) because that is difficult to do and
// appears to be rare occurance.
- VarContains =
- buildOverlapMapAndRecordDeclares(Fn, FnVarLocs, UntaggedStoreVars);
+ VarContains = buildOverlapMapAndRecordDeclares(
+ Fn, FnVarLocs, *VarsWithStackSlot, UntaggedStoreVars,
+ TrackedVariablesVectorSize);
// Prepare for traversal.
ReversePostOrderTraversal<Function *> RPOT(&Fn);
@@ -2059,14 +2194,14 @@ bool AssignmentTrackingLowering::run(FunctionVarLocsBuilder *FnVarLocsBuilder) {
//
// Unless we've already done so, create the single location def now.
if (AlwaysStackHomed.insert(Aggr).second) {
- assert(isa<AllocaInst>(VarLoc.V));
+ assert(!VarLoc.Values.hasArgList());
// TODO: When more complex cases are handled VarLoc.Expr should be
// built appropriately rather than always using an empty DIExpression.
// The assert below is a reminder.
assert(Simple);
VarLoc.Expr = DIExpression::get(Fn.getContext(), std::nullopt);
DebugVariable Var = FnVarLocs->getVariable(VarLoc.VariableID);
- FnVarLocs->addSingleLocVar(Var, VarLoc.Expr, VarLoc.DL, VarLoc.V);
+ FnVarLocs->addSingleLocVar(Var, VarLoc.Expr, VarLoc.DL, VarLoc.Values);
InsertedAnyIntrinsics = true;
}
}
@@ -2109,20 +2244,11 @@ bool AssignmentTrackingLowering::emitPromotedVarLocs(
// already.
if (VarsWithStackSlot->contains(getAggregate(DVI)))
continue;
- // Wrapper to get a single value (or undef) from DVI.
- auto GetValue = [DVI]() -> Value * {
- // We can't handle variadic DIExpressions yet so treat those as
- // kill locations.
- if (DVI->isKillLocation() || DVI->getValue() == nullptr ||
- DVI->hasArgList())
- return PoisonValue::get(Type::getInt32Ty(DVI->getContext()));
- return DVI->getValue();
- };
Instruction *InsertBefore = I.getNextNode();
assert(InsertBefore && "Unexpected: debug intrinsics after a terminator");
FnVarLocs->addVarLoc(InsertBefore, DebugVariable(DVI),
DVI->getExpression(), DVI->getDebugLoc(),
- GetValue());
+ DVI->getWrappedLocation());
InsertedAnyIntrinsics = true;
}
}
@@ -2140,15 +2266,14 @@ static bool
removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB,
FunctionVarLocsBuilder &FnVarLocs) {
bool Changed = false;
- SmallDenseSet<DebugVariable> VariableSet;
-
+ SmallDenseMap<DebugAggregate, BitVector> VariableDefinedBits;
// Scan over the entire block, not just over the instructions mapped by
// FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
// instructions.
for (const Instruction &I : reverse(*BB)) {
if (!isa<DbgVariableIntrinsic>(I)) {
// Sequence of consecutive defs ended. Clear map for the next one.
- VariableSet.clear();
+ VariableDefinedBits.clear();
}
// Get the location defs that start just before this instruction.
@@ -2164,21 +2289,44 @@ removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB,
// Iterate over the existing defs in reverse.
for (auto RIt = Locs->rbegin(), REnd = Locs->rend(); RIt != REnd; ++RIt) {
NumDefsScanned++;
- const DebugVariable &Key = FnVarLocs.getVariable(RIt->VariableID);
- bool FirstDefOfFragment = VariableSet.insert(Key).second;
+ DebugAggregate Aggr =
+ getAggregate(FnVarLocs.getVariable(RIt->VariableID));
+ uint64_t SizeInBits = Aggr.first->getSizeInBits().value_or(0);
- // If the same variable fragment is described more than once it is enough
- // to keep the last one (i.e. the first found in this reverse iteration).
- if (FirstDefOfFragment) {
- // New def found: keep it.
+ if (SizeInBits == 0) {
+ // If the size is unknown (0) then keep this location def to be safe.
NewDefsReversed.push_back(*RIt);
- } else {
- // Redundant def found: throw it away. Since the wedge of defs is being
- // rebuilt, doing nothing is the same as deleting an entry.
- ChangedThisWedge = true;
- NumDefsRemoved++;
+ continue;
}
- continue;
+
+ // Only keep this location definition if it is not fully eclipsed by
+ // other definitions in this wedge that come after it
+
+ // Inert the bits the location definition defines.
+ auto InsertResult =
+ VariableDefinedBits.try_emplace(Aggr, BitVector(SizeInBits));
+ bool FirstDefinition = InsertResult.second;
+ BitVector &DefinedBits = InsertResult.first->second;
+
+ DIExpression::FragmentInfo Fragment =
+ RIt->Expr->getFragmentInfo().value_or(
+ DIExpression::FragmentInfo(SizeInBits, 0));
+ bool InvalidFragment = Fragment.endInBits() > SizeInBits;
+
+ // If this defines any previously undefined bits, keep it.
+ if (FirstDefinition || InvalidFragment ||
+ DefinedBits.find_first_unset_in(Fragment.startInBits(),
+ Fragment.endInBits()) != -1) {
+ if (!InvalidFragment)
+ DefinedBits.set(Fragment.startInBits(), Fragment.endInBits());
+ NewDefsReversed.push_back(*RIt);
+ continue;
+ }
+
+ // Redundant def found: throw it away. Since the wedge of defs is being
+ // rebuilt, doing nothing is the same as deleting an entry.
+ ChangedThisWedge = true;
+ NumDefsRemoved++;
}
// Un-reverse the defs and replace the wedge with the pruned version.
@@ -2204,7 +2352,8 @@ static bool
removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB,
FunctionVarLocsBuilder &FnVarLocs) {
bool Changed = false;
- DenseMap<DebugVariable, std::pair<Value *, DIExpression *>> VariableMap;
+ DenseMap<DebugVariable, std::pair<RawLocationWrapper, DIExpression *>>
+ VariableMap;
// Scan over the entire block, not just over the instructions mapped by
// FnVarLocs, because wedges in FnVarLocs may only be seperated by debug
@@ -2229,9 +2378,9 @@ removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB,
// Update the map if we found a new value/expression describing the
// variable, or if the variable wasn't mapped already.
- if (VMI == VariableMap.end() || VMI->second.first != Loc.V ||
+ if (VMI == VariableMap.end() || VMI->second.first != Loc.Values ||
VMI->second.second != Loc.Expr) {
- VariableMap[Key] = {Loc.V, Loc.Expr};
+ VariableMap[Key] = {Loc.Values, Loc.Expr};
NewDefs.push_back(Loc);
continue;
}
@@ -2311,7 +2460,7 @@ removeUndefDbgLocsFromEntryBlock(const BasicBlock *BB,
// Remove undef entries that are encountered before any non-undef
// intrinsics from the entry block.
- if (isa<UndefValue>(Loc.V) && !HasDefinedBits(Aggr, Var)) {
+ if (Loc.Values.isKillLocation(Loc.Expr) && !HasDefinedBits(Aggr, Var)) {
// Did not insert this Loc, which is the same as removing it.
NumDefsRemoved++;
ChangedThisWedge = true;
@@ -2381,7 +2530,8 @@ static void analyzeFunction(Function &Fn, const DataLayout &Layout,
}
if (Changed) {
- MemLocFragmentFill Pass(Fn, &VarsWithStackSlot);
+ MemLocFragmentFill Pass(Fn, &VarsWithStackSlot,
+ shouldCoalesceFragments(Fn));
Pass.run(FnVarLocs);
// Remove redundant entries. As well as reducing memory consumption and
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 8f71ec2b490c..80a0bb957cfc 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1576,6 +1576,11 @@ bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
CreateCmpXchgInstFun CreateCmpXchg) {
ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
+ Builder.setIsFPConstrained(
+ AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
+
+ // FIXME: If FP exceptions are observable, we should force them off for the
+ // loop for the FP atomics.
Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
AI->getOrdering(), AI->getSyncScopeID(),
diff --git a/llvm/lib/CodeGen/BasicBlockSections.cpp b/llvm/lib/CodeGen/BasicBlockSections.cpp
index e7e73606de07..6967ca5160c0 100644
--- a/llvm/lib/CodeGen/BasicBlockSections.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSections.cpp
@@ -91,7 +91,7 @@ cl::opt<std::string> llvm::BBSectionsColdTextPrefix(
cl::desc("The text prefix to use for cold basic block clusters"),
cl::init(".text.split."), cl::Hidden);
-cl::opt<bool> BBSectionsDetectSourceDrift(
+static cl::opt<bool> BBSectionsDetectSourceDrift(
"bbsections-detect-source-drift",
cl::desc("This checks if there is a fdo instr. profile hash "
"mismatch for this function"),
@@ -123,10 +123,16 @@ public:
} // end anonymous namespace
char BasicBlockSections::ID = 0;
-INITIALIZE_PASS(BasicBlockSections, "bbsections-prepare",
- "Prepares for basic block sections, by splitting functions "
- "into clusters of basic blocks.",
- false, false)
+INITIALIZE_PASS_BEGIN(
+ BasicBlockSections, "bbsections-prepare",
+ "Prepares for basic block sections, by splitting functions "
+ "into clusters of basic blocks.",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(BasicBlockSectionsProfileReader)
+INITIALIZE_PASS_END(BasicBlockSections, "bbsections-prepare",
+ "Prepares for basic block sections, by splitting functions "
+ "into clusters of basic blocks.",
+ false, false)
// This function updates and optimizes the branching instructions of every basic
// block in a given function to account for changes in the layout.
@@ -300,7 +306,7 @@ static bool hasInstrProfHashMismatch(MachineFunction &MF) {
if (Existing) {
MDTuple *Tuple = cast<MDTuple>(Existing);
for (const auto &N : Tuple->operands())
- if (cast<MDString>(N.get())->getString() == MetadataName)
+ if (N.equalsStr(MetadataName))
return true;
}
diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
index 5bc8d82debc3..5dede452ec34 100644
--- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
+++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp
@@ -14,12 +14,17 @@
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include <llvm/ADT/STLExtras.h>
using namespace llvm;
@@ -35,13 +40,10 @@ bool BasicBlockSectionsProfileReader::isFunctionHot(StringRef FuncName) const {
std::pair<bool, SmallVector<BBClusterInfo>>
BasicBlockSectionsProfileReader::getBBClusterInfoForFunction(
StringRef FuncName) const {
- std::pair<bool, SmallVector<BBClusterInfo>> cluster_info(false, {});
auto R = ProgramBBClusterInfo.find(getAliasName(FuncName));
- if (R != ProgramBBClusterInfo.end()) {
- cluster_info.second = R->second;
- cluster_info.first = true;
- }
- return cluster_info;
+ return R != ProgramBBClusterInfo.end()
+ ? std::pair(true, R->second)
+ : std::pair(false, SmallVector<BBClusterInfo>{});
}
// Basic Block Sections can be enabled for a subset of machine basic blocks.
@@ -49,17 +51,19 @@ BasicBlockSectionsProfileReader::getBBClusterInfoForFunction(
// block sections are desired. Additionally, machine basic block ids of the
// functions can also be specified for a finer granularity. Moreover, a cluster
// of basic blocks could be assigned to the same section.
+// Optionally, a debug-info filename can be specified for each function to allow
+// distinguishing internal-linkage functions of the same name.
// A file with basic block sections for all of function main and three blocks
// for function foo (of which 1 and 2 are placed in a cluster) looks like this:
+// (Profile for function foo is only loaded when its debug-info filename
+// matches 'path/to/foo_file.cc').
// ----------------------------
// list.txt:
// !main
-// !foo
+// !foo M=path/to/foo_file.cc
// !!1 2
// !!4
-static Error getBBClusterInfo(const MemoryBuffer *MBuf,
- ProgramBBClusterInfoMapTy &ProgramBBClusterInfo,
- StringMap<StringRef> &FuncAliasMap) {
+Error BasicBlockSectionsProfileReader::ReadProfile() {
assert(MBuf);
line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
@@ -90,9 +94,10 @@ static Error getBBClusterInfo(const MemoryBuffer *MBuf,
break;
// Check for second "!" which indicates a cluster of basic blocks.
if (S.consume_front("!")) {
+ // Skip the profile when we the profile iterator (FI) refers to the
+ // past-the-end element.
if (FI == ProgramBBClusterInfo.end())
- return invalidProfileError(
- "Cluster list does not follow a function name specifier.");
+ continue;
SmallVector<StringRef, 4> BBIDs;
S.split(BBIDs, ' ');
// Reset current cluster position.
@@ -112,18 +117,52 @@ static Error getBBClusterInfo(const MemoryBuffer *MBuf,
BBClusterInfo{((unsigned)BBID), CurrentCluster, CurrentPosition++});
}
CurrentCluster++;
- } else { // This is a function name specifier.
+ } else {
+ // This is a function name specifier. It may include a debug info filename
+ // specifier starting with `M=`.
+ auto [AliasesStr, DIFilenameStr] = S.split(' ');
+ SmallString<128> DIFilename;
+ if (DIFilenameStr.startswith("M=")) {
+ DIFilename =
+ sys::path::remove_leading_dotslash(DIFilenameStr.substr(2));
+ if (DIFilename.empty())
+ return invalidProfileError("Empty module name specifier.");
+ } else if (!DIFilenameStr.empty()) {
+ return invalidProfileError("Unknown string found: '" + DIFilenameStr +
+ "'.");
+ }
// Function aliases are separated using '/'. We use the first function
// name for the cluster info mapping and delegate all other aliases to
// this one.
SmallVector<StringRef, 4> Aliases;
- S.split(Aliases, '/');
+ AliasesStr.split(Aliases, '/');
+ bool FunctionFound = any_of(Aliases, [&](StringRef Alias) {
+ auto It = FunctionNameToDIFilename.find(Alias);
+ // No match if this function name is not found in this module.
+ if (It == FunctionNameToDIFilename.end())
+ return false;
+ // Return a match if debug-info-filename is not specified. Otherwise,
+ // check for equality.
+ return DIFilename.empty() || It->second.equals(DIFilename);
+ });
+ if (!FunctionFound) {
+ // Skip the following profile by setting the profile iterator (FI) to
+ // the past-the-end element.
+ FI = ProgramBBClusterInfo.end();
+ continue;
+ }
for (size_t i = 1; i < Aliases.size(); ++i)
FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
// Prepare for parsing clusters of this function name.
// Start a new cluster map for this function name.
- FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first;
+ auto R = ProgramBBClusterInfo.try_emplace(Aliases.front());
+ // Report error when multiple profiles have been specified for the same
+ // function.
+ if (!R.second)
+ return invalidProfileError("Duplicate profile for function '" +
+ Aliases.front() + "'.");
+ FI = R.first;
CurrentCluster = 0;
FuncBBIDs.clear();
}
@@ -131,11 +170,28 @@ static Error getBBClusterInfo(const MemoryBuffer *MBuf,
return Error::success();
}
-void BasicBlockSectionsProfileReader::initializePass() {
+bool BasicBlockSectionsProfileReader::doInitialization(Module &M) {
if (!MBuf)
- return;
- if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap))
+ return false;
+ // Get the function name to debug info filename mapping.
+ FunctionNameToDIFilename.clear();
+ for (const Function &F : M) {
+ SmallString<128> DIFilename;
+ if (F.isDeclaration())
+ continue;
+ DISubprogram *Subprogram = F.getSubprogram();
+ if (Subprogram) {
+ llvm::DICompileUnit *CU = Subprogram->getUnit();
+ if (CU)
+ DIFilename = sys::path::remove_leading_dotslash(CU->getFilename());
+ }
+ [[maybe_unused]] bool inserted =
+ FunctionNameToDIFilename.try_emplace(F.getName(), DIFilename).second;
+ assert(inserted);
+ }
+ if (auto Err = ReadProfile())
report_fatal_error(std::move(Err));
+ return false;
}
ImmutablePass *
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index d491691135dc..3830f25debaf 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -860,6 +860,14 @@ void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {
for (Register Reg : NewLiveIns) {
if (!LiveRegs.available(*MRI, Reg))
continue;
+
+ // Skip the register if we are about to add one of its super registers.
+ // TODO: Common this up with the same logic in addLineIns().
+ if (any_of(TRI->superregs(Reg), [&](MCPhysReg SReg) {
+ return NewLiveIns.contains(SReg) && !MRI->isReserved(SReg);
+ }))
+ continue;
+
DebugLoc DL;
BuildMI(*Pred, InsertBefore, DL, TII->get(TargetOpcode::IMPLICIT_DEF),
Reg);
@@ -1207,7 +1215,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
MadeChange |= OptimizeBlock(&MBB);
// If it is dead, remove it.
- if (MBB.pred_empty()) {
+ if (MBB.pred_empty() && !MBB.isMachineBlockAddressTaken()) {
RemoveDeadBlock(&MBB);
MadeChange = true;
++NumDeadBlocks;
@@ -1507,42 +1515,43 @@ ReoptimizeBlock:
}
}
- bool OptForSize =
- MF.getFunction().hasOptSize() ||
- llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo);
- if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
- // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
- // direction, thereby defeating careful block placement and regressing
- // performance. Therefore, only consider this for optsize functions.
+ if (!IsEmptyBlock(MBB)) {
MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
if (TII->isUnconditionalTailCall(TailCall)) {
- MachineBasicBlock *Pred = *MBB->pred_begin();
- MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
- SmallVector<MachineOperand, 4> PredCond;
- bool PredAnalyzable =
- !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
-
- if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB &&
- PredTBB != PredFBB) {
- // The predecessor has a conditional branch to this block which consists
- // of only a tail call. Try to fold the tail call into the conditional
- // branch.
- if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
- // TODO: It would be nice if analyzeBranch() could provide a pointer
- // to the branch instruction so replaceBranchWithTailCall() doesn't
- // have to search for it.
- TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
- ++NumTailCalls;
- Pred->removeSuccessor(MBB);
- MadeChange = true;
- return MadeChange;
+ SmallVector<MachineBasicBlock *> PredsChanged;
+ for (auto &Pred : MBB->predecessors()) {
+ MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+ SmallVector<MachineOperand, 4> PredCond;
+ bool PredAnalyzable =
+ !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
+ // Only eliminate if MBB == TBB (Taken Basic Block)
+ if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB &&
+ PredTBB != PredFBB) {
+ // The predecessor has a conditional branch to this block which
+ // consists of only a tail call. Try to fold the tail call into the
+ // conditional branch.
+ if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
+ // TODO: It would be nice if analyzeBranch() could provide a pointer
+ // to the branch instruction so replaceBranchWithTailCall() doesn't
+ // have to search for it.
+ TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
+ PredsChanged.push_back(Pred);
+ }
}
+ // If the predecessor is falling through to this block, we could reverse
+ // the branch condition and fold the tail call into that. However, after
+ // that we might have to re-arrange the CFG to fall through to the other
+ // block and there is a high risk of regressing code size rather than
+ // improving it.
+ }
+ if (!PredsChanged.empty()) {
+ NumTailCalls += PredsChanged.size();
+ for (auto &Pred : PredsChanged)
+ Pred->removeSuccessor(MBB);
+
+ return true;
}
- // If the predecessor is falling through to this block, we could reverse
- // the branch condition and fold the tail call into that. However, after
- // that we might have to re-arrange the CFG to fall through to the other
- // block and there is a high risk of regressing code size rather than
- // improving it.
}
}
@@ -1876,8 +1885,8 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
} else {
if (Uses.erase(Reg)) {
if (Reg.isPhysical()) {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- Uses.erase(*SubRegs); // Use sub-registers to be conservative
+ for (MCPhysReg SubReg : TRI->subregs(Reg))
+ Uses.erase(SubReg); // Use sub-registers to be conservative
}
}
addRegAndItsAliases(Reg, TRI, Defs);
@@ -1988,8 +1997,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
break;
// Remove kills from ActiveDefsSet, these registers had short live ranges.
- for (const MachineOperand &MO : TIB->operands()) {
- if (!MO.isReg() || !MO.isUse() || !MO.isKill())
+ for (const MachineOperand &MO : TIB->all_uses()) {
+ if (!MO.isKill())
continue;
Register Reg = MO.getReg();
if (!Reg)
@@ -2006,8 +2015,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
}
// Track local defs so we can update liveins.
- for (const MachineOperand &MO : TIB->operands()) {
- if (!MO.isReg() || !MO.isDef() || MO.isDead())
+ for (const MachineOperand &MO : TIB->all_defs()) {
+ if (MO.isDead())
continue;
Register Reg = MO.getReg();
if (!Reg || Reg.isVirtual())
diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h
index d0b6ed5ebe05..63b2ef04b21b 100644
--- a/llvm/lib/CodeGen/BranchFolding.h
+++ b/llvm/lib/CodeGen/BranchFolding.h
@@ -113,15 +113,15 @@ class TargetRegisterInfo;
};
std::vector<SameTailElt> SameTails;
- bool AfterBlockPlacement;
- bool EnableTailMerge;
- bool EnableHoistCommonCode;
- bool UpdateLiveIns;
+ bool AfterBlockPlacement = false;
+ bool EnableTailMerge = false;
+ bool EnableHoistCommonCode = false;
+ bool UpdateLiveIns = false;
unsigned MinCommonTailLength;
- const TargetInstrInfo *TII;
- const MachineRegisterInfo *MRI;
- const TargetRegisterInfo *TRI;
- MachineLoopInfo *MLI;
+ const TargetInstrInfo *TII = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineLoopInfo *MLI = nullptr;
LivePhysRegs LiveRegs;
private:
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp b/llvm/lib/CodeGen/BranchRelaxation.cpp
index 016c81dc5aa4..05494f1ddc67 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -81,9 +81,9 @@ class BranchRelaxation : public MachineFunctionPass {
std::unique_ptr<RegScavenger> RS;
LivePhysRegs LiveRegs;
- MachineFunction *MF;
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
+ MachineFunction *MF = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
bool relaxBranchInstructions();
void scanFunction();
@@ -132,6 +132,19 @@ void BranchRelaxation::verify() {
assert(BlockInfo[Num].Size == computeBlockSize(MBB));
PrevNum = Num;
}
+
+ for (MachineBasicBlock &MBB : *MF) {
+ for (MachineBasicBlock::iterator J = MBB.getFirstTerminator();
+ J != MBB.end(); J = std::next(J)) {
+ MachineInstr &MI = *J;
+ if (!MI.isConditionalBranch() && !MI.isUnconditionalBranch())
+ continue;
+ if (MI.getOpcode() == TargetOpcode::FAULTING_OP)
+ continue;
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+ assert(isBlockInRange(MI, *DestBB));
+ }
+ }
#endif
}
diff --git a/llvm/lib/CodeGen/BreakFalseDeps.cpp b/llvm/lib/CodeGen/BreakFalseDeps.cpp
index 310273173647..618e41894b29 100644
--- a/llvm/lib/CodeGen/BreakFalseDeps.cpp
+++ b/llvm/lib/CodeGen/BreakFalseDeps.cpp
@@ -17,6 +17,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/ReachingDefAnalysis.h"
@@ -34,9 +35,9 @@ namespace llvm {
class BreakFalseDeps : public MachineFunctionPass {
private:
- MachineFunction *MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ MachineFunction *MF = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
RegisterClassInfo RegClassInfo;
/// List of undefined register reads in this block in forward order.
@@ -45,7 +46,7 @@ private:
/// Storage for register unit liveness.
LivePhysRegs LiveRegSet;
- ReachingDefAnalysis *RDA;
+ ReachingDefAnalysis *RDA = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
@@ -123,9 +124,9 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
MCRegister OriginalReg = MO.getReg().asMCReg();
// Update only undef operands that have reg units that are mapped to one root.
- for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) {
+ for (MCRegUnit Unit : TRI->regunits(OriginalReg)) {
unsigned NumRoots = 0;
- for (MCRegUnitRootIterator Root(*Unit, TRI); Root.isValid(); ++Root) {
+ for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
NumRoots++;
if (NumRoots > 1)
return false;
@@ -139,9 +140,8 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
// If the instruction has a true dependency, we can hide the false depdency
// behind it.
- for (MachineOperand &CurrMO : MI->operands()) {
- if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() ||
- !OpRC->contains(CurrMO.getReg()))
+ for (MachineOperand &CurrMO : MI->all_uses()) {
+ if (CurrMO.isUndef() || !OpRC->contains(CurrMO.getReg()))
continue;
// We found a true dependency - replace the undef register with the true
// dependency.
@@ -290,10 +290,16 @@ bool BreakFalseDeps::runOnMachineFunction(MachineFunction &mf) {
LLVM_DEBUG(dbgs() << "********** BREAK FALSE DEPENDENCIES **********\n");
+ // Skip Dead blocks due to ReachingDefAnalysis has no idea about instructions
+ // in them.
+ df_iterator_default_set<MachineBasicBlock *> Reachable;
+ for (MachineBasicBlock *MBB : depth_first_ext(&mf, Reachable))
+ (void)MBB /* Mark all reachable blocks */;
+
// Traverse the basic blocks.
- for (MachineBasicBlock &MBB : mf) {
- processBasicBlock(&MBB);
- }
+ for (MachineBasicBlock &MBB : mf)
+ if (Reachable.count(&MBB))
+ processBasicBlock(&MBB);
return false;
}
diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
index 25741686a829..6a024287f002 100644
--- a/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -141,6 +141,7 @@ INITIALIZE_PASS(CFIInstrInserter, "cfi-instr-inserter",
FunctionPass *llvm::createCFIInstrInserter() { return new CFIInstrInserter(); }
void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
// Initial CFA offset value i.e. the one valid at the beginning of the
// function.
int InitialOffset =
@@ -149,7 +150,7 @@ void CFIInstrInserter::calculateCFAInfo(MachineFunction &MF) {
// function.
Register InitialRegister =
MF.getSubtarget().getFrameLowering()->getInitialCFARegister(MF);
- const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ InitialRegister = TRI.getDwarfRegNum(InitialRegister, true);
unsigned NumRegs = TRI.getNumRegs();
// Initialize MBBMap.
diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 615687abad81..0377bc002067 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -97,7 +97,7 @@ bool VirtRegAuxInfo::isRematerializable(const LiveInterval &LI,
// Trace copies introduced by live range splitting. The inline
// spiller can rematerialize through these copies, so the spill
// weight must reflect this.
- while (MI->isFullCopy()) {
+ while (TII.isFullCopyInstr(*MI)) {
// The copy destination must match the interval register.
if (MI->getOperand(0).getReg() != Reg)
return false;
@@ -133,7 +133,7 @@ bool VirtRegAuxInfo::isLiveAtStatepointVarArg(LiveInterval &LI) {
MachineInstr *MI = MO.getParent();
if (MI->getOpcode() != TargetOpcode::STATEPOINT)
return false;
- return StatepointOpers(MI).getVarIdx() <= MI->getOperandNo(&MO);
+ return StatepointOpers(MI).getVarIdx() <= MO.getOperandNo();
});
}
@@ -157,7 +157,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
unsigned NumInstr = 0; // Number of instructions using LI
SmallPtrSet<MachineInstr *, 8> Visited;
- std::pair<Register, Register> TargetHint = MRI.getRegAllocationHint(LI.reg());
+ std::pair<unsigned, Register> TargetHint = MRI.getRegAllocationHint(LI.reg());
if (LI.isSpillable()) {
Register Reg = LI.reg();
@@ -224,7 +224,16 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
continue;
NumInstr++;
- if (MI->isIdentityCopy() || MI->isImplicitDef())
+ bool identityCopy = false;
+ auto DestSrc = TII.isCopyInstr(*MI);
+ if (DestSrc) {
+ const MachineOperand *DestRegOp = DestSrc->Destination;
+ const MachineOperand *SrcRegOp = DestSrc->Source;
+ identityCopy = DestRegOp->getReg() == SrcRegOp->getReg() &&
+ DestRegOp->getSubReg() == SrcRegOp->getSubReg();
+ }
+
+ if (identityCopy || MI->isImplicitDef())
continue;
if (!Visited.insert(MI).second)
continue;
@@ -258,7 +267,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
}
// Get allocation hints from copies.
- if (!MI->isCopy())
+ if (!TII.isCopyInstr(*MI))
continue;
Register HintReg = copyHint(MI, LI.reg(), TRI, MRI);
if (!HintReg)
diff --git a/llvm/lib/CodeGen/CallBrPrepare.cpp b/llvm/lib/CodeGen/CallBrPrepare.cpp
new file mode 100644
index 000000000000..db243a0bfebe
--- /dev/null
+++ b/llvm/lib/CodeGen/CallBrPrepare.cpp
@@ -0,0 +1,231 @@
+//===-- CallBrPrepare - Prepare callbr for code generation ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers callbrs in LLVM IR in order to to assist SelectionDAG's
+// codegen.
+//
+// In particular, this pass assists in inserting register copies for the output
+// values of a callbr along the edges leading to the indirect target blocks.
+// Though the output SSA value is defined by the callbr instruction itself in
+// the IR representation, the value cannot be copied to the appropriate virtual
+// registers prior to jumping to an indirect label, since the jump occurs
+// within the user-provided assembly blob.
+//
+// Instead, those copies must occur separately at the beginning of each
+// indirect target. That requires that we create a separate SSA definition in
+// each of them (via llvm.callbr.landingpad), and may require splitting
+// critical edges so we have a location to place the intrinsic. Finally, we
+// remap users of the original callbr output SSA value to instead point to the
+// appropriate llvm.callbr.landingpad value.
+//
+// Ideally, this could be done inside SelectionDAG, or in the
+// MachineInstruction representation, without the use of an IR-level intrinsic.
+// But, within the current framework, it’s simpler to implement as an IR pass.
+// (If support for callbr in GlobalISel is implemented, it’s worth considering
+// whether this is still required.)
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "callbrprepare"
+
+namespace {
+
+class CallBrPrepare : public FunctionPass {
+ bool SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs, DominatorTree &DT);
+ bool InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
+ DominatorTree &DT) const;
+ void UpdateSSA(DominatorTree &DT, CallBrInst *CBR, CallInst *Intrinsic,
+ SSAUpdater &SSAUpdate) const;
+
+public:
+ CallBrPrepare() : FunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &Fn) override;
+ static char ID;
+};
+
+} // end anonymous namespace
+
+char CallBrPrepare::ID = 0;
+INITIALIZE_PASS_BEGIN(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_END(CallBrPrepare, DEBUG_TYPE, "Prepare callbr", false, false)
+
+FunctionPass *llvm::createCallBrPass() { return new CallBrPrepare(); }
+
+void CallBrPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+}
+
+static SmallVector<CallBrInst *, 2> FindCallBrs(Function &Fn) {
+ SmallVector<CallBrInst *, 2> CBRs;
+ for (BasicBlock &BB : Fn)
+ if (auto *CBR = dyn_cast<CallBrInst>(BB.getTerminator()))
+ if (!CBR->getType()->isVoidTy() && !CBR->use_empty())
+ CBRs.push_back(CBR);
+ return CBRs;
+}
+
+bool CallBrPrepare::SplitCriticalEdges(ArrayRef<CallBrInst *> CBRs,
+ DominatorTree &DT) {
+ bool Changed = false;
+ CriticalEdgeSplittingOptions Options(&DT);
+ Options.setMergeIdenticalEdges();
+
+ // The indirect destination might be duplicated between another parameter...
+ // %0 = callbr ... [label %x, label %x]
+ // ...hence MergeIdenticalEdges and AllowIndentical edges, but we don't need
+ // to split the default destination if it's duplicated between an indirect
+ // destination...
+ // %1 = callbr ... to label %x [label %x]
+ // ...hence starting at 1 and checking against successor 0 (aka the default
+ // destination).
+ for (CallBrInst *CBR : CBRs)
+ for (unsigned i = 1, e = CBR->getNumSuccessors(); i != e; ++i)
+ if (CBR->getSuccessor(i) == CBR->getSuccessor(0) ||
+ isCriticalEdge(CBR, i, /*AllowIdenticalEdges*/ true))
+ if (SplitKnownCriticalEdge(CBR, i, Options))
+ Changed = true;
+ return Changed;
+}
+
+bool CallBrPrepare::InsertIntrinsicCalls(ArrayRef<CallBrInst *> CBRs,
+ DominatorTree &DT) const {
+ bool Changed = false;
+ SmallPtrSet<const BasicBlock *, 4> Visited;
+ IRBuilder<> Builder(CBRs[0]->getContext());
+ for (CallBrInst *CBR : CBRs) {
+ if (!CBR->getNumIndirectDests())
+ continue;
+
+ SSAUpdater SSAUpdate;
+ SSAUpdate.Initialize(CBR->getType(), CBR->getName());
+ SSAUpdate.AddAvailableValue(CBR->getParent(), CBR);
+ SSAUpdate.AddAvailableValue(CBR->getDefaultDest(), CBR);
+
+ for (BasicBlock *IndDest : CBR->getIndirectDests()) {
+ if (!Visited.insert(IndDest).second)
+ continue;
+ Builder.SetInsertPoint(&*IndDest->begin());
+ CallInst *Intrinsic = Builder.CreateIntrinsic(
+ CBR->getType(), Intrinsic::callbr_landingpad, {CBR});
+ SSAUpdate.AddAvailableValue(IndDest, Intrinsic);
+ UpdateSSA(DT, CBR, Intrinsic, SSAUpdate);
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+static bool IsInSameBasicBlock(const Use &U, const BasicBlock *BB) {
+ const auto *I = dyn_cast<Instruction>(U.getUser());
+ return I && I->getParent() == BB;
+}
+
+#ifndef NDEBUG
+static void PrintDebugDomInfo(const DominatorTree &DT, const Use &U,
+ const BasicBlock *BB, bool IsDefaultDest) {
+ if (!isa<Instruction>(U.getUser()))
+ return;
+ LLVM_DEBUG(dbgs() << "Use: " << *U.getUser() << ", in block "
+ << cast<Instruction>(U.getUser())->getParent()->getName()
+ << ", is " << (DT.dominates(BB, U) ? "" : "NOT ")
+ << "dominated by " << BB->getName() << " ("
+ << (IsDefaultDest ? "in" : "") << "direct)\n");
+}
+#endif
+
+void CallBrPrepare::UpdateSSA(DominatorTree &DT, CallBrInst *CBR,
+ CallInst *Intrinsic,
+ SSAUpdater &SSAUpdate) const {
+
+ SmallPtrSet<Use *, 4> Visited;
+ BasicBlock *DefaultDest = CBR->getDefaultDest();
+ BasicBlock *LandingPad = Intrinsic->getParent();
+
+ SmallVector<Use *, 4> Uses(make_pointer_range(CBR->uses()));
+ for (Use *U : Uses) {
+ if (!Visited.insert(U).second)
+ continue;
+
+#ifndef NDEBUG
+ PrintDebugDomInfo(DT, *U, LandingPad, /*IsDefaultDest*/ false);
+ PrintDebugDomInfo(DT, *U, DefaultDest, /*IsDefaultDest*/ true);
+#endif
+
+ // Don't rewrite the use in the newly inserted intrinsic.
+ if (const auto *II = dyn_cast<IntrinsicInst>(U->getUser()))
+ if (II->getIntrinsicID() == Intrinsic::callbr_landingpad)
+ continue;
+
+ // If the Use is in the same BasicBlock as the Intrinsic call, replace
+ // the Use with the value of the Intrinsic call.
+ if (IsInSameBasicBlock(*U, LandingPad)) {
+ U->set(Intrinsic);
+ continue;
+ }
+
+ // If the Use is dominated by the default dest, do not touch it.
+ if (DT.dominates(DefaultDest, *U))
+ continue;
+
+ SSAUpdate.RewriteUse(*U);
+ }
+}
+
+bool CallBrPrepare::runOnFunction(Function &Fn) {
+ bool Changed = false;
+ SmallVector<CallBrInst *, 2> CBRs = FindCallBrs(Fn);
+
+ if (CBRs.empty())
+ return Changed;
+
+ // It's highly likely that most programs do not contain CallBrInsts. Follow a
+ // similar pattern from SafeStackLegacyPass::runOnFunction to reuse previous
+ // domtree analysis if available, otherwise compute it lazily. This avoids
+ // forcing Dominator Tree Construction at -O0 for programs that likely do not
+ // contain CallBrInsts. It does pessimize programs with callbr at higher
+ // optimization levels, as the DominatorTree created here is not reused by
+ // subsequent passes.
+ DominatorTree *DT;
+ std::optional<DominatorTree> LazilyComputedDomTree;
+ if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
+ DT = &DTWP->getDomTree();
+ else {
+ LazilyComputedDomTree.emplace(Fn);
+ DT = &*LazilyComputedDomTree;
+ }
+
+ if (SplitCriticalEdges(CBRs, *DT))
+ Changed = true;
+
+ if (InsertIntrinsicCalls(CBRs, *DT))
+ Changed = true;
+
+ return Changed;
+}
diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp
index ce1ef571c9df..b7152587a9fa 100644
--- a/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -25,12 +25,15 @@
using namespace llvm;
-CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
- SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
- : CallingConv(CC), IsVarArg(isVarArg), MF(mf),
- TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C) {
+CCState::CCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
+ SmallVectorImpl<CCValAssign> &Locs, LLVMContext &Context,
+ bool NegativeOffsets)
+ : CallingConv(CC), IsVarArg(IsVarArg), MF(MF),
+ TRI(*MF.getSubtarget().getRegisterInfo()), Locs(Locs), Context(Context),
+ NegativeOffsets(NegativeOffsets) {
+
// No stack is used.
- StackOffset = 0;
+ StackSize = 0;
clearByValRegsInfo();
UsedRegs.resize((TRI.getNumRegs()+31)/32);
@@ -51,7 +54,7 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, MVT LocVT,
ensureMaxAlignment(Alignment);
MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Alignment);
Size = unsigned(alignTo(Size, MinAlign));
- unsigned Offset = AllocateStack(Size, Alignment);
+ uint64_t Offset = AllocateStack(Size, Alignment);
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
}
@@ -129,7 +132,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Call operand #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString() << '\n';
+ << ArgVT << '\n';
#endif
llvm_unreachable(nullptr);
}
@@ -147,7 +150,7 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Call operand #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString() << '\n';
+ << ArgVT << '\n';
#endif
llvm_unreachable(nullptr);
}
@@ -164,7 +167,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
#ifndef NDEBUG
dbgs() << "Call result #" << i << " has unhandled type "
- << EVT(VT).getEVTString() << '\n';
+ << VT << '\n';
#endif
llvm_unreachable(nullptr);
}
@@ -176,7 +179,7 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
#ifndef NDEBUG
dbgs() << "Call result has unhandled type "
- << EVT(VT).getEVTString() << '\n';
+ << VT << '\n';
#endif
llvm_unreachable(nullptr);
}
@@ -197,7 +200,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
MVT VT, CCAssignFn Fn) {
- unsigned SavedStackOffset = StackOffset;
+ uint64_t SavedStackSize = StackSize;
Align SavedMaxStackArgAlign = MaxStackArgAlign;
unsigned NumLocs = Locs.size();
@@ -212,7 +215,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
do {
if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) {
#ifndef NDEBUG
- dbgs() << "Call has unhandled type " << EVT(VT).getEVTString()
+ dbgs() << "Call has unhandled type " << VT
<< " while computing remaining regparms\n";
#endif
llvm_unreachable(nullptr);
@@ -229,7 +232,7 @@ void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
// Clear the assigned values and stack memory. We leave the registers marked
// as allocated so that future queries don't return the same registers, i.e.
// when i64 and f64 are both passed in GPRs.
- StackOffset = SavedStackOffset;
+ StackSize = SavedStackSize;
MaxStackArgAlign = SavedMaxStackArgAlign;
Locs.truncate(NumLocs);
}
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 398ff56f737c..6272b654b329 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm-c/Initialization.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassRegistry.h"
@@ -24,6 +23,8 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeBasicBlockSectionsPass(Registry);
initializeBranchFolderPassPass(Registry);
initializeBranchRelaxationPass(Registry);
+ initializeBreakFalseDepsPass(Registry);
+ initializeCallBrPreparePass(Registry);
initializeCFGuardLongjmpPass(Registry);
initializeCFIFixupPass(Registry);
initializeCFIInstrInserterPass(Registry);
@@ -48,7 +49,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeFuncletLayoutPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
- initializeHardwareLoopsPass(Registry);
+ initializeHardwareLoopsLegacyPass(Registry);
initializeIfConverterPass(Registry);
initializeImplicitNullChecksPass(Registry);
initializeIndirectBrExpandPassPass(Registry);
@@ -140,7 +141,3 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeWinEHPreparePass(Registry);
initializeXRayInstrumentationPass(Registry);
}
-
-void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
- initializeCodeGen(*unwrap(R));
-}
diff --git a/llvm/lib/CodeGen/CodeGenCommonISel.cpp b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
index a5215969c0dd..577c5dbc8e2d 100644
--- a/llvm/lib/CodeGen/CodeGenCommonISel.cpp
+++ b/llvm/lib/CodeGen/CodeGenCommonISel.cpp
@@ -173,11 +173,11 @@ llvm::findSplitPointForStackProtector(MachineBasicBlock *BB,
return SplitPoint;
}
-unsigned llvm::getInvertedFPClassTest(unsigned Test) {
- unsigned InvertedTest = ~Test & fcAllFlags;
- switch (InvertedTest) {
- default:
- break;
+FPClassTest llvm::invertFPClassTestIfSimpler(FPClassTest Test) {
+ FPClassTest InvertedTest = ~Test;
+ // Pick the direction with fewer tests
+ // TODO: Handle more combinations of cases that can be handled together
+ switch (static_cast<unsigned>(InvertedTest)) {
case fcNan:
case fcSNan:
case fcQNan:
@@ -196,9 +196,15 @@ unsigned llvm::getInvertedFPClassTest(unsigned Test) {
case fcFinite:
case fcPosFinite:
case fcNegFinite:
+ case fcZero | fcNan:
+ case fcSubnormal | fcZero:
+ case fcSubnormal | fcZero | fcNan:
return InvertedTest;
+ default:
+ return fcNone;
}
- return 0;
+
+ llvm_unreachable("covered FPClassTest");
}
static MachineOperand *getSalvageOpsForCopy(const MachineRegisterInfo &MRI,
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index dd431cc6f4f5..b00df0b6c6cb 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
@@ -82,7 +83,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -257,13 +257,17 @@ static cl::opt<bool>
"CodeGenPrepare."));
static cl::opt<bool>
- OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(false),
+ OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
cl::desc("Enable converting phi types in CodeGenPrepare"));
static cl::opt<unsigned>
HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
cl::desc("Least BB number of huge function."));
+static cl::opt<unsigned>
+ MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
+ cl::Hidden,
+ cl::desc("Max number of address users to look at"));
namespace {
enum ExtType {
@@ -294,16 +298,16 @@ class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
const TargetMachine *TM = nullptr;
- const TargetSubtargetInfo *SubtargetInfo;
+ const TargetSubtargetInfo *SubtargetInfo = nullptr;
const TargetLowering *TLI = nullptr;
- const TargetRegisterInfo *TRI;
+ const TargetRegisterInfo *TRI = nullptr;
const TargetTransformInfo *TTI = nullptr;
const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
- const TargetLibraryInfo *TLInfo;
- const LoopInfo *LI;
+ const TargetLibraryInfo *TLInfo = nullptr;
+ LoopInfo *LI = nullptr;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
- ProfileSummaryInfo *PSI;
+ ProfileSummaryInfo *PSI = nullptr;
/// As we scan instructions optimizing them, this is the next instruction
/// to optimize. Transforms that can invalidate this should update it.
@@ -373,6 +377,15 @@ public:
bool runOnFunction(Function &F) override;
+ void releaseMemory() override {
+ // Clear per function information.
+ InsertedInsts.clear();
+ PromotedInsts.clear();
+ FreshBBs.clear();
+ BPI.reset();
+ BFI.reset();
+ }
+
StringRef getPassName() const override { return "CodeGen Prepare"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -413,7 +426,7 @@ private:
void removeAllAssertingVHReferences(Value *V);
bool eliminateAssumptions(Function &F);
- bool eliminateFallThrough(Function &F);
+ bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
@@ -494,10 +507,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
DL = &F.getParent()->getDataLayout();
bool EverMadeChange = false;
- // Clear per function information.
- InsertedInsts.clear();
- PromotedInsts.clear();
- FreshBBs.clear();
TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
SubtargetInfo = TM->getSubtargetImpl(F);
@@ -574,11 +583,15 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Because the basic algorithm's complex is near O(N!).
IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
+ // Transformations above may invalidate dominator tree and/or loop info.
+ DT.reset();
+ LI->releaseMemory();
+ LI->analyze(getDT(F));
+
bool MadeChange = true;
bool FuncIterated = false;
while (MadeChange) {
MadeChange = false;
- DT.reset();
for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
if (FuncIterated && !FreshBBs.contains(&BB))
@@ -587,6 +600,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
+ if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
+ DT.reset();
+
MadeChange |= Changed;
if (IsHugeFunc) {
// If the BB is updated, it may still has chance to be optimized.
@@ -602,9 +618,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
FreshBBs.insert(&BB);
else if (FuncIterated)
FreshBBs.erase(&BB);
-
- if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
- DT.reset();
} else {
// For small/normal functions, we restart BB iteration if the dominator
// tree of the Function was changed.
@@ -622,7 +635,12 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= optimizePhiTypes(F);
if (MadeChange)
- eliminateFallThrough(F);
+ eliminateFallThrough(F, DT.get());
+
+#ifndef NDEBUG
+ if (MadeChange && VerifyLoopInfo)
+ LI->verify(getDT(F));
+#endif
// Really free removed instructions during promotion.
for (Instruction *I : RemovedInsts)
@@ -755,7 +773,7 @@ void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
/// Merge basic blocks which are connected by a single edge, where one of the
/// basic blocks has a single successor pointing to the other basic block,
/// which has a single predecessor.
-bool CodeGenPrepare::eliminateFallThrough(Function &F) {
+bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
bool Changed = false;
// Scan all of the blocks in the function, except for the entry block.
// Use a temporary array to avoid iterator being invalidated when
@@ -777,13 +795,19 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
continue;
+ // Make an effort to skip unreachable blocks.
+ if (DT && !DT->isReachableFromEntry(BB))
+ continue;
+
BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
if (Term && !Term->isConditional()) {
Changed = true;
LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
// Merge BB into SinglePred and delete it.
- MergeBlockIntoPredecessor(BB);
+ MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
+ /* MemDep */ nullptr,
+ /* PredecessorWithTwoSuccessors */ false, DT);
Preds.insert(SinglePred);
if (IsHugeFunc) {
@@ -1579,6 +1603,7 @@ static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
/// intrinsic. Return true if any changes were made.
bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
ModifyDT &ModifiedDT) {
+ bool EdgeCase = false;
Value *A, *B;
BinaryOperator *Add;
if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
@@ -1587,11 +1612,12 @@ bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
// Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
A = Add->getOperand(0);
B = Add->getOperand(1);
+ EdgeCase = true;
}
if (!TLI->shouldFormOverflowOp(ISD::UADDO,
TLI->getValueType(*DL, Add->getType()),
- Add->hasNUsesOrMore(2)))
+ Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
return false;
// We don't want to move around uses of condition values this late, so we
@@ -1660,7 +1686,7 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
if (!TLI->shouldFormOverflowOp(ISD::USUBO,
TLI->getValueType(*DL, Sub->getType()),
- Sub->hasNUsesOrMore(2)))
+ Sub->hasNUsesOrMore(1)))
return false;
if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
@@ -1825,6 +1851,37 @@ static bool foldICmpWithDominatingICmp(CmpInst *Cmp,
return true;
}
+/// Many architectures use the same instruction for both subtract and cmp. Try
+/// to swap cmp operands to match subtract operations to allow for CSE.
+static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp) {
+ Value *Op0 = Cmp->getOperand(0);
+ Value *Op1 = Cmp->getOperand(1);
+ if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
+ isa<Constant>(Op1) || Op0 == Op1)
+ return false;
+
+ // If a subtract already has the same operands as a compare, swapping would be
+ // bad. If a subtract has the same operands as a compare but in reverse order,
+ // then swapping is good.
+ int GoodToSwap = 0;
+ unsigned NumInspected = 0;
+ for (const User *U : Op0->users()) {
+ // Avoid walking many users.
+ if (++NumInspected > 128)
+ return false;
+ if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
+ GoodToSwap++;
+ else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
+ GoodToSwap--;
+ }
+
+ if (GoodToSwap > 0) {
+ Cmp->swapOperands();
+ return true;
+ }
+ return false;
+}
+
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (sinkCmpExpression(Cmp, *TLI))
return true;
@@ -1838,6 +1895,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (foldICmpWithDominatingICmp(Cmp, *TLI))
return true;
+ if (swapICmpOperandsToExposeCSEOpportunities(Cmp))
+ return true;
+
return false;
}
@@ -2129,6 +2189,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
///
/// If the transform is performed, return true and set ModifiedDT to true.
static bool despeculateCountZeros(IntrinsicInst *CountZeros,
+ LoopInfo &LI,
const TargetLowering *TLI,
const DataLayout *DL, ModifyDT &ModifiedDT,
SmallSet<BasicBlock *, 32> &FreshBBs,
@@ -2168,6 +2229,13 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
if (IsHugeFunc)
FreshBBs.insert(EndBlock);
+ // Update the LoopInfo. The new blocks are in the same loop as the start
+ // block.
+ if (Loop *L = LI.getLoopFor(StartBlock)) {
+ L->addBasicBlockToLoop(CallBlock, LI);
+ L->addBasicBlockToLoop(EndBlock, LI);
+ }
+
// Set up a builder to create a compare, conditional branch, and PHI.
IRBuilder<> Builder(CountZeros->getContext());
Builder.SetInsertPoint(StartBlock->getTerminator());
@@ -2279,7 +2347,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
if (!Arg->getType()->isPointerTy())
continue;
unsigned AS = Arg->getType()->getPointerAddressSpace();
- return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
+ if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
+ return true;
}
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
@@ -2341,7 +2410,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
case Intrinsic::cttz:
case Intrinsic::ctlz:
// If counting zeros is expensive, try to avoid it.
- return despeculateCountZeros(II, TLI, DL, ModifiedDT, FreshBBs,
+ return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
IsHugeFunc);
case Intrinsic::fshl:
case Intrinsic::fshr:
@@ -2349,24 +2418,6 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
case Intrinsic::dbg_assign:
case Intrinsic::dbg_value:
return fixupDbgValue(II);
- case Intrinsic::vscale: {
- // If datalayout has no special restrictions on vector data layout,
- // replace `llvm.vscale` by an equivalent constant expression
- // to benefit from cheap constant propagation.
- Type *ScalableVectorTy =
- VectorType::get(Type::getInt8Ty(II->getContext()), 1, true);
- if (DL->getTypeAllocSize(ScalableVectorTy).getKnownMinValue() == 8) {
- auto *Null = Constant::getNullValue(ScalableVectorTy->getPointerTo());
- auto *One = ConstantInt::getSigned(II->getType(), 1);
- auto *CGep =
- ConstantExpr::getGetElementPtr(ScalableVectorTy, Null, One);
- replaceAllUsesWith(II, ConstantExpr::getPtrToInt(CGep, II->getType()),
- FreshBBs, IsHugeFunc);
- II->eraseFromParent();
- return true;
- }
- break;
- }
case Intrinsic::masked_gather:
return optimizeGatherScatterInst(II, II->getArgOperand(0));
case Intrinsic::masked_scatter:
@@ -2442,6 +2493,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
if (!RetI)
return false;
+ assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
+
PHINode *PN = nullptr;
ExtractValueInst *EVI = nullptr;
BitCastInst *BCI = nullptr;
@@ -2687,7 +2740,7 @@ void ExtAddrMode::print(raw_ostream &OS) const {
if (InBounds)
OS << "inbounds ";
if (BaseGV) {
- OS << (NeedPlus ? " + " : "") << "GV:";
+ OS << "GV:";
BaseGV->printAsOperand(OS, /*PrintType=*/false);
NeedPlus = true;
}
@@ -3073,6 +3126,9 @@ class TypePromotionTransaction {
~InstructionRemover() override { delete Replacer; }
+ InstructionRemover &operator=(const InstructionRemover &other) = delete;
+ InstructionRemover(const InstructionRemover &other) = delete;
+
/// Resurrect the instruction and reassign it to the proper uses if
/// new value was provided when build this action.
void undo() override {
@@ -3258,7 +3314,7 @@ class AddressingModeMatcher {
bool IgnoreProfitability;
/// True if we are optimizing for size.
- bool OptSize;
+ bool OptSize = false;
ProfileSummaryInfo *PSI;
BlockFrequencyInfo *BFI;
@@ -3574,10 +3630,15 @@ private:
/// Original Address.
Value *Original;
+ /// Common value among addresses
+ Value *CommonValue = nullptr;
+
public:
AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
: SQ(_SQ), Original(OriginalValue) {}
+ ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
+
/// Get the combined AddrMode
const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
@@ -3662,13 +3723,21 @@ public:
if (!initializeMap(Map))
return false;
- Value *CommonValue = findCommon(Map);
+ CommonValue = findCommon(Map);
if (CommonValue)
AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
return CommonValue != nullptr;
}
private:
+ /// `CommonValue` may be a placeholder inserted by us.
+ /// If the placeholder is not used, we should remove this dead instruction.
+ void eraseCommonValueIfDead() {
+ if (CommonValue && CommonValue->getNumUses() == 0)
+ if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
+ CommonInst->eraseFromParent();
+ }
+
/// Initialize Map with anchor values. For address seen
/// we set the value of different field saw in this address.
/// At the same time we find a common type for different field we will
@@ -3866,17 +3935,17 @@ private:
SimplificationTracker &ST) {
while (!TraverseOrder.empty()) {
Value *Current = TraverseOrder.pop_back_val();
- assert(Map.find(Current) != Map.end() && "No node to fill!!!");
+ assert(Map.contains(Current) && "No node to fill!!!");
Value *V = Map[Current];
if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
// CurrentValue also must be Select.
auto *CurrentSelect = cast<SelectInst>(Current);
auto *TrueValue = CurrentSelect->getTrueValue();
- assert(Map.find(TrueValue) != Map.end() && "No True Value!");
+ assert(Map.contains(TrueValue) && "No True Value!");
Select->setTrueValue(ST.Get(Map[TrueValue]));
auto *FalseValue = CurrentSelect->getFalseValue();
- assert(Map.find(FalseValue) != Map.end() && "No False Value!");
+ assert(Map.contains(FalseValue) && "No False Value!");
Select->setFalseValue(ST.Get(Map[FalseValue]));
} else {
// Must be a Phi node then.
@@ -3884,7 +3953,7 @@ private:
// Fill the Phi node with values from predecessors.
for (auto *B : predecessors(PHI->getParent())) {
Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
- assert(Map.find(PV) != Map.end() && "No predecessor Value!");
+ assert(Map.contains(PV) && "No predecessor Value!");
PHI->addIncoming(ST.Get(Map[PV]), B);
}
}
@@ -3908,7 +3977,7 @@ private:
while (!Worklist.empty()) {
Value *Current = Worklist.pop_back_val();
// if it is already visited or it is an ending value then skip it.
- if (Map.find(Current) != Map.end())
+ if (Map.contains(Current))
continue;
TraverseOrder.push_back(Current);
@@ -4627,7 +4696,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
return false;
}
case Instruction::Add: {
- // Check to see if we can merge in the RHS then the LHS. If so, we win.
+ // Check to see if we can merge in one operand, then the other. If so, we
+ // win.
ExtAddrMode BackupAddrMode = AddrMode;
unsigned OldSize = AddrModeInsts.size();
// Start a transaction at this point.
@@ -4637,9 +4707,15 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
+ // Try to match an integer constant second to increase its chance of ending
+ // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
+ int First = 0, Second = 1;
+ if (isa<ConstantInt>(AddrInst->getOperand(First))
+ && !isa<ConstantInt>(AddrInst->getOperand(Second)))
+ std::swap(First, Second);
AddrMode.InBounds = false;
- if (matchAddr(AddrInst->getOperand(1), Depth + 1) &&
- matchAddr(AddrInst->getOperand(0), Depth + 1))
+ if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
+ matchAddr(AddrInst->getOperand(Second), Depth + 1))
return true;
// Restore the old addr mode info.
@@ -4647,9 +4723,10 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
AddrModeInsts.resize(OldSize);
TPT.rollback(LastKnownGood);
- // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
- if (matchAddr(AddrInst->getOperand(0), Depth + 1) &&
- matchAddr(AddrInst->getOperand(1), Depth + 1))
+ // Otherwise this was over-aggressive. Try merging operands in the opposite
+ // order.
+ if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
+ matchAddr(AddrInst->getOperand(First), Depth + 1))
return true;
// Otherwise we definitely can't merge the ADD in.
@@ -4698,7 +4775,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
if (ConstantInt *CI =
dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
const APInt &CVal = CI->getValue();
- if (CVal.getMinSignedBits() <= 64) {
+ if (CVal.getSignificantBits() <= 64) {
ConstantOffset += CVal.getSExtValue() * TypeSize;
continue;
}
@@ -4718,36 +4795,35 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
// just add it to the disp field and check validity.
if (VariableOperand == -1) {
AddrMode.BaseOffs += ConstantOffset;
- if (ConstantOffset == 0 ||
- TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
- // Check to see if we can fold the base pointer in too.
- if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
+ if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
if (!cast<GEPOperator>(AddrInst)->isInBounds())
AddrMode.InBounds = false;
return true;
- }
- } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
- TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
- ConstantOffset > 0) {
- // Record GEPs with non-zero offsets as candidates for splitting in the
- // event that the offset cannot fit into the r+i addressing mode.
- // Simple and common case that only one GEP is used in calculating the
- // address for the memory access.
- Value *Base = AddrInst->getOperand(0);
- auto *BaseI = dyn_cast<Instruction>(Base);
- auto *GEP = cast<GetElementPtrInst>(AddrInst);
- if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
- (BaseI && !isa<CastInst>(BaseI) &&
- !isa<GetElementPtrInst>(BaseI))) {
- // Make sure the parent block allows inserting non-PHI instructions
- // before the terminator.
- BasicBlock *Parent =
- BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
- if (!Parent->getTerminator()->isEHPad())
- LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
- }
}
AddrMode.BaseOffs -= ConstantOffset;
+
+ if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
+ TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
+ ConstantOffset > 0) {
+ // Record GEPs with non-zero offsets as candidates for splitting in
+ // the event that the offset cannot fit into the r+i addressing mode.
+ // Simple and common case that only one GEP is used in calculating the
+ // address for the memory access.
+ Value *Base = AddrInst->getOperand(0);
+ auto *BaseI = dyn_cast<Instruction>(Base);
+ auto *GEP = cast<GetElementPtrInst>(AddrInst);
+ if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
+ (BaseI && !isa<CastInst>(BaseI) &&
+ !isa<GetElementPtrInst>(BaseI))) {
+ // Make sure the parent block allows inserting non-PHI instructions
+ // before the terminator.
+ BasicBlock *Parent = BaseI ? BaseI->getParent()
+ : &GEP->getFunction()->getEntryBlock();
+ if (!Parent->getTerminator()->isEHPad())
+ LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
+ }
+ }
+
return false;
}
@@ -4963,18 +5039,14 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
return true;
}
-// Max number of memory uses to look at before aborting the search to conserve
-// compile time.
-static constexpr int MaxMemoryUsesToScan = 20;
-
/// Recursively walk all the uses of I until we find a memory use.
/// If we find an obviously non-foldable instruction, return true.
/// Add accessed addresses and types to MemoryUses.
static bool FindAllMemoryUses(
- Instruction *I, SmallVectorImpl<std::pair<Value *, Type *>> &MemoryUses,
+ Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
- BlockFrequencyInfo *BFI, int SeenInsts = 0) {
+ BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@@ -4987,33 +5059,33 @@ static bool FindAllMemoryUses(
for (Use &U : I->uses()) {
// Conservatively return true if we're seeing a large number or a deep chain
// of users. This avoids excessive compilation times in pathological cases.
- if (SeenInsts++ >= MaxMemoryUsesToScan)
+ if (SeenInsts++ >= MaxAddressUsersToScan)
return true;
Instruction *UserI = cast<Instruction>(U.getUser());
if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
- MemoryUses.push_back({U.get(), LI->getType()});
+ MemoryUses.push_back({&U, LI->getType()});
continue;
}
if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back({U.get(), SI->getValueOperand()->getType()});
+ MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
continue;
}
if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back({U.get(), RMW->getValOperand()->getType()});
+ MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
continue;
}
if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
return true; // Storing addr, not into addr.
- MemoryUses.push_back({U.get(), CmpX->getCompareOperand()->getType()});
+ MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
continue;
}
@@ -5045,6 +5117,17 @@ static bool FindAllMemoryUses(
return false;
}
+static bool FindAllMemoryUses(
+ Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
+ const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
+ ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
+ unsigned SeenInsts = 0;
+ SmallPtrSet<Instruction *, 16> ConsideredInsts;
+ return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
+ PSI, BFI, SeenInsts);
+}
+
+
/// Return true if Val is already known to be live at the use site that we're
/// folding it into. If so, there is no cost to include it in the addressing
/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
@@ -5126,10 +5209,8 @@ bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
// we can remove the addressing mode and effectively trade one live register
// for another (at worst.) In this context, folding an addressing mode into
// the use is just a particularly nice way of sinking it.
- SmallVector<std::pair<Value *, Type *>, 16> MemoryUses;
- SmallPtrSet<Instruction *, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, PSI,
- BFI))
+ SmallVector<std::pair<Use *, Type *>, 16> MemoryUses;
+ if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
@@ -5142,8 +5223,9 @@ bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
// growth since most architectures have some reasonable small and fast way to
// compute an effective address. (i.e LEA on x86)
SmallVector<Instruction *, 32> MatchedAddrModeInsts;
- for (const std::pair<Value *, Type *> &Pair : MemoryUses) {
- Value *Address = Pair.first;
+ for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
+ Value *Address = Pair.first->get();
+ Instruction *UserI = cast<Instruction>(Pair.first->getUser());
Type *AddressAccessTy = Pair.second;
unsigned AS = Address->getType()->getPointerAddressSpace();
@@ -5156,7 +5238,7 @@ bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
- AddressAccessTy, AS, MemoryInst, Result,
+ AddressAccessTy, AS, UserI, Result,
InsertedInsts, PromotedInsts, TPT,
LargeOffsetGEP, OptSize, PSI, BFI);
Matcher.IgnoreProfitability = true;
@@ -5693,7 +5775,8 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
// Create a scalar GEP if there are more than 2 operands.
if (Ops.size() != 2) {
// Replace the last index with 0.
- Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
+ Ops[FinalIndex] =
+ Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
SourceTy = GetElementPtrInst::getIndexedType(
SourceTy, ArrayRef(Ops).drop_front());
@@ -6027,6 +6110,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
int64_t Offset = LargeOffsetGEP->second;
if (Offset != BaseOffset) {
TargetLowering::AddrMode AddrMode;
+ AddrMode.HasBaseReg = true;
AddrMode.BaseOffs = Offset - BaseOffset;
// The result type of the GEP might not be the type of the memory
// access.
@@ -6044,7 +6128,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
// Generate a new GEP to replace the current one.
LLVMContext &Ctx = GEP->getContext();
- Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ Type *PtrIdxTy = DL->getIndexType(GEP->getType());
Type *I8PtrTy =
Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
Type *I8Ty = Type::getInt8Ty(Ctx);
@@ -6062,7 +6146,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
NewBaseInsertBB =
- SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
+ SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
} else
NewBaseInsertPt = std::next(BaseI->getIterator());
@@ -6074,7 +6158,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
}
IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
// Create a new base.
- Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
+ Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
NewBaseGEP = OldBase;
if (NewBaseGEP->getType() != I8PtrTy)
NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
@@ -6090,7 +6174,7 @@ bool CodeGenPrepare::splitLargeGEPOffsets() {
NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
} else {
// Calculate the new offset for the new GEP.
- Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
+ Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
if (GEP->getType() != I8PtrTy)
@@ -6872,9 +6956,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
return false;
TargetLowering::SelectSupportKind SelectKind;
- if (VectorCond)
- SelectKind = TargetLowering::VectorMaskSelect;
- else if (SI->getType()->isVectorTy())
+ if (SI->getType()->isVectorTy())
SelectKind = TargetLowering::ScalarCondVectorVal;
else
SelectKind = TargetLowering::ScalarValSelect;
@@ -6915,88 +6997,88 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
// first branch will point directly to select.end, and the corresponding PHI
// predecessor block will be the start block.
- // First, we split the block containing the select into 2 blocks.
+ // Collect values that go on the true side and the values that go on the false
+ // side.
+ SmallVector<Instruction *> TrueInstrs, FalseInstrs;
+ for (SelectInst *SI : ASI) {
+ if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
+ TrueInstrs.push_back(cast<Instruction>(V));
+ if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
+ FalseInstrs.push_back(cast<Instruction>(V));
+ }
+
+ // Split the select block, according to how many (if any) values go on each
+ // side.
BasicBlock *StartBlock = SI->getParent();
BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
- BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
- if (IsHugeFunc)
- FreshBBs.insert(EndBlock);
- BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
- // Delete the unconditional branch that was just created by the split.
- StartBlock->getTerminator()->eraseFromParent();
+ IRBuilder<> IB(SI);
+ auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
- // These are the new basic blocks for the conditional branch.
- // At least one will become an actual new basic block.
BasicBlock *TrueBlock = nullptr;
BasicBlock *FalseBlock = nullptr;
+ BasicBlock *EndBlock = nullptr;
BranchInst *TrueBranch = nullptr;
BranchInst *FalseBranch = nullptr;
-
- // Sink expensive instructions into the conditional blocks to avoid executing
- // them speculatively.
- for (SelectInst *SI : ASI) {
- if (sinkSelectOperand(TTI, SI->getTrueValue())) {
- if (TrueBlock == nullptr) {
- TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
- EndBlock->getParent(), EndBlock);
- TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
- if (IsHugeFunc)
- FreshBBs.insert(TrueBlock);
- TrueBranch->setDebugLoc(SI->getDebugLoc());
- }
- auto *TrueInst = cast<Instruction>(SI->getTrueValue());
- TrueInst->moveBefore(TrueBranch);
- }
- if (sinkSelectOperand(TTI, SI->getFalseValue())) {
- if (FalseBlock == nullptr) {
- FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
- EndBlock->getParent(), EndBlock);
- if (IsHugeFunc)
- FreshBBs.insert(FalseBlock);
- FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
- FalseBranch->setDebugLoc(SI->getDebugLoc());
- }
- auto *FalseInst = cast<Instruction>(SI->getFalseValue());
- FalseInst->moveBefore(FalseBranch);
- }
+ if (TrueInstrs.size() == 0) {
+ FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
+ CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+ FalseBlock = FalseBranch->getParent();
+ EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
+ } else if (FalseInstrs.size() == 0) {
+ TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
+ CondFr, &*SplitPt, false, nullptr, nullptr, LI));
+ TrueBlock = TrueBranch->getParent();
+ EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
+ } else {
+ Instruction *ThenTerm = nullptr;
+ Instruction *ElseTerm = nullptr;
+ SplitBlockAndInsertIfThenElse(CondFr, &*SplitPt, &ThenTerm, &ElseTerm,
+ nullptr, nullptr, LI);
+ TrueBranch = cast<BranchInst>(ThenTerm);
+ FalseBranch = cast<BranchInst>(ElseTerm);
+ TrueBlock = TrueBranch->getParent();
+ FalseBlock = FalseBranch->getParent();
+ EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
+ }
+
+ EndBlock->setName("select.end");
+ if (TrueBlock)
+ TrueBlock->setName("select.true.sink");
+ if (FalseBlock)
+ FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
+ : "select.false.sink");
+
+ if (IsHugeFunc) {
+ if (TrueBlock)
+ FreshBBs.insert(TrueBlock);
+ if (FalseBlock)
+ FreshBBs.insert(FalseBlock);
+ FreshBBs.insert(EndBlock);
}
- // If there was nothing to sink, then arbitrarily choose the 'false' side
- // for a new input value to the PHI.
- if (TrueBlock == FalseBlock) {
- assert(TrueBlock == nullptr &&
- "Unexpected basic block transform while optimizing select");
+ BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());
- FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
- EndBlock->getParent(), EndBlock);
- if (IsHugeFunc)
- FreshBBs.insert(FalseBlock);
- auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
- FalseBranch->setDebugLoc(SI->getDebugLoc());
- }
+ static const unsigned MD[] = {
+ LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
+ LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
+ StartBlock->getTerminator()->copyMetadata(*SI, MD);
+
+ // Sink expensive instructions into the conditional blocks to avoid executing
+ // them speculatively.
+ for (Instruction *I : TrueInstrs)
+ I->moveBefore(TrueBranch);
+ for (Instruction *I : FalseInstrs)
+ I->moveBefore(FalseBranch);
- // Insert the real conditional branch based on the original condition.
// If we did not create a new block for one of the 'true' or 'false' paths
// of the condition, it means that side of the branch goes to the end block
// directly and the path originates from the start block from the point of
// view of the new PHI.
- BasicBlock *TT, *FT;
- if (TrueBlock == nullptr) {
- TT = EndBlock;
- FT = FalseBlock;
+ if (TrueBlock == nullptr)
TrueBlock = StartBlock;
- } else if (FalseBlock == nullptr) {
- TT = TrueBlock;
- FT = EndBlock;
+ else if (FalseBlock == nullptr)
FalseBlock = StartBlock;
- } else {
- TT = TrueBlock;
- FT = FalseBlock;
- }
- IRBuilder<> IB(SI);
- auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
- IB.CreateCondBr(CondFr, TT, FT, SI);
SmallPtrSet<const Instruction *, 2> INS;
INS.insert(ASI.begin(), ASI.end());
@@ -7105,7 +7187,7 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
if (IsHugeFunc) {
// Now we clone an instruction, its operands' defs may sink to this BB
- // now. So we put the operands defs' BBs into FreshBBs to do optmization.
+ // now. So we put the operands defs' BBs into FreshBBs to do optimization.
for (unsigned I = 0; I < NI->getNumOperands(); ++I) {
auto *OpDef = dyn_cast<Instruction>(NI->getOperand(I));
if (!OpDef)
@@ -7696,7 +7778,7 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
// whereas scalable vectors would have to be shifted by
// <2log(vscale) + number of bits> in order to store the
// low/high parts. Bailing out for now.
- if (isa<ScalableVectorType>(StoreType))
+ if (StoreType->isScalableTy())
return false;
if (!DL.typeSizeEqualsStoreSize(StoreType) ||
@@ -8051,8 +8133,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
return true;
if ((isa<UIToFPInst>(I) || isa<FPToUIInst>(I) || isa<TruncInst>(I)) &&
- TLI->optimizeExtendOrTruncateConversion(I,
- LI->getLoopFor(I->getParent())))
+ TLI->optimizeExtendOrTruncateConversion(
+ I, LI->getLoopFor(I->getParent()), *TTI))
return true;
if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
@@ -8064,7 +8146,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
return SinkCast(CI);
} else {
if (TLI->optimizeExtendOrTruncateConversion(
- I, LI->getLoopFor(I->getParent())))
+ I, LI->getLoopFor(I->getParent()), *TTI))
return true;
bool MadeChange = optimizeExt(I);
@@ -8128,7 +8210,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
GEPI->getName(), GEPI);
NC->setDebugLoc(GEPI->getDebugLoc());
replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
- GEPI->eraseFromParent();
+ RecursivelyDeleteTriviallyDeadInstructions(
+ GEPI, TLInfo, nullptr,
+ [&](Value *V) { removeAllAssertingVHReferences(V); });
++NumGEPsElim;
optimizeInst(NC, ModifiedDT);
return true;
diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp
index 48cd8e998ec9..c34a52a6f2de 100644
--- a/llvm/lib/CodeGen/CommandFlags.cpp
+++ b/llvm/lib/CodeGen/CommandFlags.cpp
@@ -14,15 +14,15 @@
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
#include <optional>
using namespace llvm;
@@ -81,7 +81,7 @@ CGOPT(bool, StackSymbolOrdering)
CGOPT(bool, StackRealign)
CGOPT(std::string, TrapFuncName)
CGOPT(bool, UseCtors)
-CGOPT(bool, LowerGlobalDtorsViaCxaAtExit)
+CGOPT(bool, DisableIntegratedAS)
CGOPT(bool, RelaxELFRelocations)
CGOPT_EXP(bool, DataSections)
CGOPT_EXP(bool, FunctionSections)
@@ -89,7 +89,7 @@ CGOPT(bool, IgnoreXCOFFVisibility)
CGOPT(bool, XCOFFTracebackTable)
CGOPT(std::string, BBSections)
CGOPT(unsigned, TLSSize)
-CGOPT(bool, EmulatedTLS)
+CGOPT_EXP(bool, EmulatedTLS)
CGOPT(bool, UniqueSectionNames)
CGOPT(bool, UniqueBasicBlockSectionNames)
CGOPT(EABI, EABIVersion)
@@ -100,10 +100,11 @@ CGOPT(bool, EmitCallSiteInfo)
CGOPT(bool, EnableMachineFunctionSplitter)
CGOPT(bool, EnableDebugEntryValues)
CGOPT(bool, ForceDwarfFrameSection)
-CGOPT(bool, XRayOmitFunctionIndex)
+CGOPT(bool, XRayFunctionIndex)
CGOPT(bool, DebugStrictDwarf)
CGOPT(unsigned, AlignLoops)
CGOPT(bool, JMCInstrument)
+CGOPT(bool, XCOFFReadOnlyPointers)
codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
#define CGBINDOPT(NAME) \
@@ -241,14 +242,15 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(EnableNoTrappingFPMath);
- static const auto DenormFlagEnumOptions =
- cl::values(clEnumValN(DenormalMode::IEEE, "ieee",
- "IEEE 754 denormal numbers"),
- clEnumValN(DenormalMode::PreserveSign, "preserve-sign",
- "the sign of a flushed-to-zero number is preserved "
- "in the sign of 0"),
- clEnumValN(DenormalMode::PositiveZero, "positive-zero",
- "denormals are flushed to positive zero"));
+ static const auto DenormFlagEnumOptions = cl::values(
+ clEnumValN(DenormalMode::IEEE, "ieee", "IEEE 754 denormal numbers"),
+ clEnumValN(DenormalMode::PreserveSign, "preserve-sign",
+ "the sign of a flushed-to-zero number is preserved "
+ "in the sign of 0"),
+ clEnumValN(DenormalMode::PositiveZero, "positive-zero",
+ "denormals are flushed to positive zero"),
+ clEnumValN(DenormalMode::Dynamic, "dynamic",
+ "denormals have unknown treatment"));
// FIXME: Doesn't have way to specify separate input and output modes.
static cl::opt<DenormalMode::DenormalModeKind> DenormalFPMath(
@@ -349,12 +351,6 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(UseCtors);
- static cl::opt<bool> LowerGlobalDtorsViaCxaAtExit(
- "lower-global-dtors-via-cxa-atexit",
- cl::desc("Lower llvm.global_dtors (global destructors) via __cxa_atexit"),
- cl::init(true));
- CGBINDOPT(LowerGlobalDtorsViaCxaAtExit);
-
static cl::opt<bool> RelaxELFRelocations(
"relax-elf-relocations",
cl::desc(
@@ -466,10 +462,10 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::desc("Always emit a debug frame section."), cl::init(false));
CGBINDOPT(ForceDwarfFrameSection);
- static cl::opt<bool> XRayOmitFunctionIndex(
- "no-xray-index", cl::desc("Don't emit xray_fn_idx section"),
- cl::init(false));
- CGBINDOPT(XRayOmitFunctionIndex);
+ static cl::opt<bool> XRayFunctionIndex("xray-function-index",
+ cl::desc("Emit xray_fn_idx section"),
+ cl::init(true));
+ CGBINDOPT(XRayFunctionIndex);
static cl::opt<bool> DebugStrictDwarf(
"strict-dwarf", cl::desc("use strict dwarf"), cl::init(false));
@@ -485,6 +481,18 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() {
cl::init(false));
CGBINDOPT(JMCInstrument);
+ static cl::opt<bool> XCOFFReadOnlyPointers(
+ "mxcoff-roptr",
+ cl::desc("When set to true, const objects with relocatable address "
+ "values are put into the RO data section."),
+ cl::init(false));
+ CGBINDOPT(XCOFFReadOnlyPointers);
+
+ static cl::opt<bool> DisableIntegratedAS(
+ "no-integrated-as", cl::desc("Disable integrated assembler"),
+ cl::init(false));
+ CGBINDOPT(DisableIntegratedAS);
+
#undef CGBINDOPT
mc::RegisterMCTargetOptionsFlags();
@@ -538,7 +546,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.GuaranteedTailCallOpt = getEnableGuaranteedTailCallOpt();
Options.StackSymbolOrdering = getStackSymbolOrdering();
Options.UseInitArray = !getUseCtors();
- Options.LowerGlobalDtorsViaCxaAtExit = getLowerGlobalDtorsViaCxaAtExit();
+ Options.DisableIntegratedAS = getDisableIntegratedAS();
Options.RelaxELFRelocations = getRelaxELFRelocations();
Options.DataSections =
getExplicitDataSections().value_or(TheTriple.hasDefaultDataSections());
@@ -549,8 +557,8 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.UniqueSectionNames = getUniqueSectionNames();
Options.UniqueBasicBlockSectionNames = getUniqueBasicBlockSectionNames();
Options.TLSSize = getTLSSize();
- Options.EmulatedTLS = getEmulatedTLS();
- Options.ExplicitEmulatedTLS = EmulatedTLSView->getNumOccurrences() > 0;
+ Options.EmulatedTLS =
+ getExplicitEmulatedTLS().value_or(TheTriple.hasDefaultEmulatedTLS());
Options.ExceptionModel = getExceptionModel();
Options.EmitStackSizeSection = getEnableStackSizeSection();
Options.EnableMachineFunctionSplitter = getEnableMachineFunctionSplitter();
@@ -558,10 +566,11 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) {
Options.EmitCallSiteInfo = getEmitCallSiteInfo();
Options.EnableDebugEntryValues = getEnableDebugEntryValues();
Options.ForceDwarfFrameSection = getForceDwarfFrameSection();
- Options.XRayOmitFunctionIndex = getXRayOmitFunctionIndex();
+ Options.XRayFunctionIndex = getXRayFunctionIndex();
Options.DebugStrictDwarf = getDebugStrictDwarf();
Options.LoopAlignment = getAlignLoops();
Options.JMCInstrument = getJMCInstrument();
+ Options.XCOFFReadOnlyPointers = getXCOFFReadOnlyPointers();
Options.MCOptions = mc::InitMCTargetOptionsFromFlags();
diff --git a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
index 9b1f7117fa57..02c67e500bdc 100644
--- a/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
+++ b/llvm/lib/CodeGen/ComplexDeinterleavingPass.cpp
@@ -18,6 +18,11 @@
// pairs. Validity of each node is expected to be done upon creation, and any
// validation errors should halt traversal and prevent further graph
// construction.
+// Instead of relying on Shuffle operations, vector interleaving and
+// deinterleaving can be represented by vector.interleave2 and
+// vector.deinterleave2 intrinsics. Scalable vectors can be represented only by
+// these intrinsics, whereas, fixed-width vectors are recognized for both
+// shufflevector instruction and intrinsics.
//
// Replacement:
// This step traverses the graph built up by identification, delegating to the
@@ -62,6 +67,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -94,6 +100,13 @@ static bool isInterleavingMask(ArrayRef<int> Mask);
/// <1, 3, 5, 7>).
static bool isDeinterleavingMask(ArrayRef<int> Mask);
+/// Returns true if the operation is a negation of V, and it works for both
+/// integers and floats.
+static bool isNeg(Value *V);
+
+/// Returns the operand for negation operation.
+static Value *getNegOperand(Value *V);
+
namespace {
class ComplexDeinterleavingLegacyPass : public FunctionPass {
@@ -124,7 +137,7 @@ class ComplexDeinterleavingGraph;
struct ComplexDeinterleavingCompositeNode {
ComplexDeinterleavingCompositeNode(ComplexDeinterleavingOperation Op,
- Instruction *R, Instruction *I)
+ Value *R, Value *I)
: Operation(Op), Real(R), Imag(I) {}
private:
@@ -134,22 +147,21 @@ private:
public:
ComplexDeinterleavingOperation Operation;
- Instruction *Real;
- Instruction *Imag;
+ Value *Real;
+ Value *Imag;
- // Instructions that should only exist within this node, there should be no
- // users of these instructions outside the node. An example of these would be
- // the multiply instructions of a partial multiply operation.
- SmallVector<Instruction *> InternalInstructions;
- ComplexDeinterleavingRotation Rotation;
+ // This two members are required exclusively for generating
+ // ComplexDeinterleavingOperation::Symmetric operations.
+ unsigned Opcode;
+ std::optional<FastMathFlags> Flags;
+
+ ComplexDeinterleavingRotation Rotation =
+ ComplexDeinterleavingRotation::Rotation_0;
SmallVector<RawNodePtr> Operands;
Value *ReplacementNode = nullptr;
- void addInstruction(Instruction *I) { InternalInstructions.push_back(I); }
void addOperand(NodePtr Node) { Operands.push_back(Node.get()); }
- bool hasAllInternalUses(SmallPtrSet<Instruction *, 16> &AllInstructions);
-
void dump() { dump(dbgs()); }
void dump(raw_ostream &OS) {
auto PrintValue = [&](Value *V) {
@@ -181,40 +193,105 @@ public:
OS << " - ";
PrintNodeRef(Op);
}
- OS << " InternalInstructions:\n";
- for (const auto &I : InternalInstructions) {
- OS << " - \"";
- I->print(OS, true);
- OS << "\"\n";
- }
}
};
class ComplexDeinterleavingGraph {
public:
+ struct Product {
+ Value *Multiplier;
+ Value *Multiplicand;
+ bool IsPositive;
+ };
+
+ using Addend = std::pair<Value *, bool>;
using NodePtr = ComplexDeinterleavingCompositeNode::NodePtr;
using RawNodePtr = ComplexDeinterleavingCompositeNode::RawNodePtr;
- explicit ComplexDeinterleavingGraph(const TargetLowering *tl) : TL(tl) {}
+
+ // Helper struct for holding info about potential partial multiplication
+ // candidates
+ struct PartialMulCandidate {
+ Value *Common;
+ NodePtr Node;
+ unsigned RealIdx;
+ unsigned ImagIdx;
+ bool IsNodeInverted;
+ };
+
+ explicit ComplexDeinterleavingGraph(const TargetLowering *TL,
+ const TargetLibraryInfo *TLI)
+ : TL(TL), TLI(TLI) {}
private:
- const TargetLowering *TL;
- Instruction *RootValue;
- NodePtr RootNode;
+ const TargetLowering *TL = nullptr;
+ const TargetLibraryInfo *TLI = nullptr;
SmallVector<NodePtr> CompositeNodes;
- SmallPtrSet<Instruction *, 16> AllInstructions;
+
+ SmallPtrSet<Instruction *, 16> FinalInstructions;
+
+ /// Root instructions are instructions from which complex computation starts
+ std::map<Instruction *, NodePtr> RootToNode;
+
+ /// Topologically sorted root instructions
+ SmallVector<Instruction *, 1> OrderedRoots;
+
+ /// When examining a basic block for complex deinterleaving, if it is a simple
+ /// one-block loop, then the only incoming block is 'Incoming' and the
+ /// 'BackEdge' block is the block itself."
+ BasicBlock *BackEdge = nullptr;
+ BasicBlock *Incoming = nullptr;
+
+ /// ReductionInfo maps from %ReductionOp to %PHInode and Instruction
+ /// %OutsideUser as it is shown in the IR:
+ ///
+ /// vector.body:
+ /// %PHInode = phi <vector type> [ zeroinitializer, %entry ],
+ /// [ %ReductionOp, %vector.body ]
+ /// ...
+ /// %ReductionOp = fadd i64 ...
+ /// ...
+ /// br i1 %condition, label %vector.body, %middle.block
+ ///
+ /// middle.block:
+ /// %OutsideUser = llvm.vector.reduce.fadd(..., %ReductionOp)
+ ///
+ /// %OutsideUser can be `llvm.vector.reduce.fadd` or `fadd` preceding
+ /// `llvm.vector.reduce.fadd` when unroll factor isn't one.
+ std::map<Instruction *, std::pair<PHINode *, Instruction *>> ReductionInfo;
+
+ /// In the process of detecting a reduction, we consider a pair of
+ /// %ReductionOP, which we refer to as real and imag (or vice versa), and
+ /// traverse the use-tree to detect complex operations. As this is a reduction
+ /// operation, it will eventually reach RealPHI and ImagPHI, which corresponds
+ /// to the %ReductionOPs that we suspect to be complex.
+ /// RealPHI and ImagPHI are used by the identifyPHINode method.
+ PHINode *RealPHI = nullptr;
+ PHINode *ImagPHI = nullptr;
+
+ /// Set this flag to true if RealPHI and ImagPHI were reached during reduction
+ /// detection.
+ bool PHIsFound = false;
+
+ /// OldToNewPHI maps the original real PHINode to a new, double-sized PHINode.
+ /// The new PHINode corresponds to a vector of deinterleaved complex numbers.
+ /// This mapping is populated during
+ /// ComplexDeinterleavingOperation::ReductionPHI node replacement. It is then
+ /// used in the ComplexDeinterleavingOperation::ReductionOperation node
+ /// replacement process.
+ std::map<PHINode *, PHINode *> OldToNewPHI;
NodePtr prepareCompositeNode(ComplexDeinterleavingOperation Operation,
- Instruction *R, Instruction *I) {
+ Value *R, Value *I) {
+ assert(((Operation != ComplexDeinterleavingOperation::ReductionPHI &&
+ Operation != ComplexDeinterleavingOperation::ReductionOperation) ||
+ (R && I)) &&
+ "Reduction related nodes must have Real and Imaginary parts");
return std::make_shared<ComplexDeinterleavingCompositeNode>(Operation, R,
I);
}
NodePtr submitCompositeNode(NodePtr Node) {
CompositeNodes.push_back(Node);
- AllInstructions.insert(Node->Real);
- AllInstructions.insert(Node->Imag);
- for (auto *I : Node->InternalInstructions)
- AllInstructions.insert(I);
return Node;
}
@@ -242,9 +319,9 @@ private:
/// Identify the other branch of a Partial Mul, taking the CommonOperandI that
/// is partially known from identifyPartialMul, filling in the other half of
/// the complex pair.
- NodePtr identifyNodeWithImplicitAdd(
- Instruction *I, Instruction *J,
- std::pair<Instruction *, Instruction *> &CommonOperandI);
+ NodePtr
+ identifyNodeWithImplicitAdd(Instruction *I, Instruction *J,
+ std::pair<Value *, Value *> &CommonOperandI);
/// Identifies a complex add pattern and its rotation, based on the following
/// patterns.
@@ -254,10 +331,76 @@ private:
/// 270: r: ar + bi
/// i: ai - br
NodePtr identifyAdd(Instruction *Real, Instruction *Imag);
-
- NodePtr identifyNode(Instruction *I, Instruction *J);
-
- Value *replaceNode(RawNodePtr Node);
+ NodePtr identifySymmetricOperation(Instruction *Real, Instruction *Imag);
+
+ NodePtr identifyNode(Value *R, Value *I);
+
+ /// Determine if a sum of complex numbers can be formed from \p RealAddends
+ /// and \p ImagAddens. If \p Accumulator is not null, add the result to it.
+ /// Return nullptr if it is not possible to construct a complex number.
+ /// \p Flags are needed to generate symmetric Add and Sub operations.
+ NodePtr identifyAdditions(std::list<Addend> &RealAddends,
+ std::list<Addend> &ImagAddends,
+ std::optional<FastMathFlags> Flags,
+ NodePtr Accumulator);
+
+ /// Extract one addend that have both real and imaginary parts positive.
+ NodePtr extractPositiveAddend(std::list<Addend> &RealAddends,
+ std::list<Addend> &ImagAddends);
+
+ /// Determine if sum of multiplications of complex numbers can be formed from
+ /// \p RealMuls and \p ImagMuls. If \p Accumulator is not null, add the result
+ /// to it. Return nullptr if it is not possible to construct a complex number.
+ NodePtr identifyMultiplications(std::vector<Product> &RealMuls,
+ std::vector<Product> &ImagMuls,
+ NodePtr Accumulator);
+
+ /// Go through pairs of multiplication (one Real and one Imag) and find all
+ /// possible candidates for partial multiplication and put them into \p
+ /// Candidates. Returns true if all Product has pair with common operand
+ bool collectPartialMuls(const std::vector<Product> &RealMuls,
+ const std::vector<Product> &ImagMuls,
+ std::vector<PartialMulCandidate> &Candidates);
+
+ /// If the code is compiled with -Ofast or expressions have `reassoc` flag,
+ /// the order of complex computation operations may be significantly altered,
+ /// and the real and imaginary parts may not be executed in parallel. This
+ /// function takes this into consideration and employs a more general approach
+ /// to identify complex computations. Initially, it gathers all the addends
+ /// and multiplicands and then constructs a complex expression from them.
+ NodePtr identifyReassocNodes(Instruction *I, Instruction *J);
+
+ NodePtr identifyRoot(Instruction *I);
+
+ /// Identifies the Deinterleave operation applied to a vector containing
+ /// complex numbers. There are two ways to represent the Deinterleave
+ /// operation:
+ /// * Using two shufflevectors with even indices for /pReal instruction and
+ /// odd indices for /pImag instructions (only for fixed-width vectors)
+ /// * Using two extractvalue instructions applied to `vector.deinterleave2`
+ /// intrinsic (for both fixed and scalable vectors)
+ NodePtr identifyDeinterleave(Instruction *Real, Instruction *Imag);
+
+ /// identifying the operation that represents a complex number repeated in a
+ /// Splat vector. There are two possible types of splats: ConstantExpr with
+ /// the opcode ShuffleVector and ShuffleVectorInstr. Both should have an
+ /// initialization mask with all values set to zero.
+ NodePtr identifySplat(Value *Real, Value *Imag);
+
+ NodePtr identifyPHINode(Instruction *Real, Instruction *Imag);
+
+ /// Identifies SelectInsts in a loop that has reduction with predication masks
+ /// and/or predicated tail folding
+ NodePtr identifySelectNode(Instruction *Real, Instruction *Imag);
+
+ Value *replaceNode(IRBuilderBase &Builder, RawNodePtr Node);
+
+ /// Complete IR modifications after producing new reduction operation:
+ /// * Populate the PHINode generated for
+ /// ComplexDeinterleavingOperation::ReductionPHI
+ /// * Deinterleave the final value outside of the loop and repurpose original
+ /// reduction users
+ void processReductionOperation(Value *OperationReplacement, RawNodePtr Node);
public:
void dump() { dump(dbgs()); }
@@ -270,9 +413,18 @@ public:
/// current graph.
bool identifyNodes(Instruction *RootI);
+ /// In case \pB is one-block loop, this function seeks potential reductions
+ /// and populates ReductionInfo. Returns true if any reductions were
+ /// identified.
+ bool collectPotentialReductions(BasicBlock *B);
+
+ void identifyReductionNodes();
+
+ /// Check that every instruction, from the roots to the leaves, has internal
+ /// uses.
+ bool checkNodes();
+
/// Perform the actual replacement of the underlying instruction graph.
- /// Returns false if the deinterleaving operation should be cancelled for the
- /// current graph.
void replaceNodes();
};
@@ -368,43 +520,39 @@ static bool isDeinterleavingMask(ArrayRef<int> Mask) {
return true;
}
-bool ComplexDeinterleaving::evaluateBasicBlock(BasicBlock *B) {
- bool Changed = false;
+bool isNeg(Value *V) {
+ return match(V, m_FNeg(m_Value())) || match(V, m_Neg(m_Value()));
+}
- SmallVector<Instruction *> DeadInstrRoots;
+Value *getNegOperand(Value *V) {
+ assert(isNeg(V));
+ auto *I = cast<Instruction>(V);
+ if (I->getOpcode() == Instruction::FNeg)
+ return I->getOperand(0);
- for (auto &I : *B) {
- auto *SVI = dyn_cast<ShuffleVectorInst>(&I);
- if (!SVI)
- continue;
+ return I->getOperand(1);
+}
- // Look for a shufflevector that takes separate vectors of the real and
- // imaginary components and recombines them into a single vector.
- if (!isInterleavingMask(SVI->getShuffleMask()))
- continue;
+bool ComplexDeinterleaving::evaluateBasicBlock(BasicBlock *B) {
+ ComplexDeinterleavingGraph Graph(TL, TLI);
+ if (Graph.collectPotentialReductions(B))
+ Graph.identifyReductionNodes();
- ComplexDeinterleavingGraph Graph(TL);
- if (!Graph.identifyNodes(SVI))
- continue;
+ for (auto &I : *B)
+ Graph.identifyNodes(&I);
+ if (Graph.checkNodes()) {
Graph.replaceNodes();
- DeadInstrRoots.push_back(SVI);
- Changed = true;
- }
-
- for (const auto &I : DeadInstrRoots) {
- if (!I || I->getParent() == nullptr)
- continue;
- llvm::RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
+ return true;
}
- return Changed;
+ return false;
}
ComplexDeinterleavingGraph::NodePtr
ComplexDeinterleavingGraph::identifyNodeWithImplicitAdd(
Instruction *Real, Instruction *Imag,
- std::pair<Instruction *, Instruction *> &PartialMatch) {
+ std::pair<Value *, Value *> &PartialMatch) {
LLVM_DEBUG(dbgs() << "identifyNodeWithImplicitAdd " << *Real << " / " << *Imag
<< "\n");
@@ -413,58 +561,47 @@ ComplexDeinterleavingGraph::identifyNodeWithImplicitAdd(
return nullptr;
}
- if (Real->getOpcode() != Instruction::FMul ||
- Imag->getOpcode() != Instruction::FMul) {
- LLVM_DEBUG(dbgs() << " - Real or imaginary instruction is not fmul\n");
+ if ((Real->getOpcode() != Instruction::FMul &&
+ Real->getOpcode() != Instruction::Mul) ||
+ (Imag->getOpcode() != Instruction::FMul &&
+ Imag->getOpcode() != Instruction::Mul)) {
+ LLVM_DEBUG(
+ dbgs() << " - Real or imaginary instruction is not fmul or mul\n");
return nullptr;
}
- Instruction *R0 = dyn_cast<Instruction>(Real->getOperand(0));
- Instruction *R1 = dyn_cast<Instruction>(Real->getOperand(1));
- Instruction *I0 = dyn_cast<Instruction>(Imag->getOperand(0));
- Instruction *I1 = dyn_cast<Instruction>(Imag->getOperand(1));
- if (!R0 || !R1 || !I0 || !I1) {
- LLVM_DEBUG(dbgs() << " - Mul operand not Instruction\n");
- return nullptr;
- }
+ Value *R0 = Real->getOperand(0);
+ Value *R1 = Real->getOperand(1);
+ Value *I0 = Imag->getOperand(0);
+ Value *I1 = Imag->getOperand(1);
// A +/+ has a rotation of 0. If any of the operands are fneg, we flip the
// rotations and use the operand.
unsigned Negs = 0;
- SmallVector<Instruction *> FNegs;
- if (R0->getOpcode() == Instruction::FNeg ||
- R1->getOpcode() == Instruction::FNeg) {
+ Value *Op;
+ if (match(R0, m_Neg(m_Value(Op)))) {
Negs |= 1;
- if (R0->getOpcode() == Instruction::FNeg) {
- FNegs.push_back(R0);
- R0 = dyn_cast<Instruction>(R0->getOperand(0));
- } else {
- FNegs.push_back(R1);
- R1 = dyn_cast<Instruction>(R1->getOperand(0));
- }
- if (!R0 || !R1)
- return nullptr;
+ R0 = Op;
+ } else if (match(R1, m_Neg(m_Value(Op)))) {
+ Negs |= 1;
+ R1 = Op;
}
- if (I0->getOpcode() == Instruction::FNeg ||
- I1->getOpcode() == Instruction::FNeg) {
+
+ if (isNeg(I0)) {
Negs |= 2;
Negs ^= 1;
- if (I0->getOpcode() == Instruction::FNeg) {
- FNegs.push_back(I0);
- I0 = dyn_cast<Instruction>(I0->getOperand(0));
- } else {
- FNegs.push_back(I1);
- I1 = dyn_cast<Instruction>(I1->getOperand(0));
- }
- if (!I0 || !I1)
- return nullptr;
+ I0 = Op;
+ } else if (match(I1, m_Neg(m_Value(Op)))) {
+ Negs |= 2;
+ Negs ^= 1;
+ I1 = Op;
}
ComplexDeinterleavingRotation Rotation = (ComplexDeinterleavingRotation)Negs;
- Instruction *CommonOperand;
- Instruction *UncommonRealOp;
- Instruction *UncommonImagOp;
+ Value *CommonOperand;
+ Value *UncommonRealOp;
+ Value *UncommonImagOp;
if (R0 == I0 || R0 == I1) {
CommonOperand = R0;
@@ -512,7 +649,6 @@ ComplexDeinterleavingGraph::identifyNodeWithImplicitAdd(
Node->Rotation = Rotation;
Node->addOperand(CommonNode);
Node->addOperand(UncommonNode);
- Node->InternalInstructions.append(FNegs);
return submitCompositeNode(Node);
}
@@ -522,26 +658,29 @@ ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,
LLVM_DEBUG(dbgs() << "identifyPartialMul " << *Real << " / " << *Imag
<< "\n");
// Determine rotation
+ auto IsAdd = [](unsigned Op) {
+ return Op == Instruction::FAdd || Op == Instruction::Add;
+ };
+ auto IsSub = [](unsigned Op) {
+ return Op == Instruction::FSub || Op == Instruction::Sub;
+ };
ComplexDeinterleavingRotation Rotation;
- if (Real->getOpcode() == Instruction::FAdd &&
- Imag->getOpcode() == Instruction::FAdd)
+ if (IsAdd(Real->getOpcode()) && IsAdd(Imag->getOpcode()))
Rotation = ComplexDeinterleavingRotation::Rotation_0;
- else if (Real->getOpcode() == Instruction::FSub &&
- Imag->getOpcode() == Instruction::FAdd)
+ else if (IsSub(Real->getOpcode()) && IsAdd(Imag->getOpcode()))
Rotation = ComplexDeinterleavingRotation::Rotation_90;
- else if (Real->getOpcode() == Instruction::FSub &&
- Imag->getOpcode() == Instruction::FSub)
+ else if (IsSub(Real->getOpcode()) && IsSub(Imag->getOpcode()))
Rotation = ComplexDeinterleavingRotation::Rotation_180;
- else if (Real->getOpcode() == Instruction::FAdd &&
- Imag->getOpcode() == Instruction::FSub)
+ else if (IsAdd(Real->getOpcode()) && IsSub(Imag->getOpcode()))
Rotation = ComplexDeinterleavingRotation::Rotation_270;
else {
LLVM_DEBUG(dbgs() << " - Unhandled rotation.\n");
return nullptr;
}
- if (!Real->getFastMathFlags().allowContract() ||
- !Imag->getFastMathFlags().allowContract()) {
+ if (isa<FPMathOperator>(Real) &&
+ (!Real->getFastMathFlags().allowContract() ||
+ !Imag->getFastMathFlags().allowContract())) {
LLVM_DEBUG(dbgs() << " - Contract is missing from the FastMath flags.\n");
return nullptr;
}
@@ -560,18 +699,14 @@ ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,
return nullptr;
}
- Instruction *R0 = dyn_cast<Instruction>(RealMulI->getOperand(0));
- Instruction *R1 = dyn_cast<Instruction>(RealMulI->getOperand(1));
- Instruction *I0 = dyn_cast<Instruction>(ImagMulI->getOperand(0));
- Instruction *I1 = dyn_cast<Instruction>(ImagMulI->getOperand(1));
- if (!R0 || !R1 || !I0 || !I1) {
- LLVM_DEBUG(dbgs() << " - Mul operand not Instruction\n");
- return nullptr;
- }
+ Value *R0 = RealMulI->getOperand(0);
+ Value *R1 = RealMulI->getOperand(1);
+ Value *I0 = ImagMulI->getOperand(0);
+ Value *I1 = ImagMulI->getOperand(1);
- Instruction *CommonOperand;
- Instruction *UncommonRealOp;
- Instruction *UncommonImagOp;
+ Value *CommonOperand;
+ Value *UncommonRealOp;
+ Value *UncommonImagOp;
if (R0 == I0 || R0 == I1) {
CommonOperand = R0;
@@ -589,7 +724,7 @@ ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,
Rotation == ComplexDeinterleavingRotation::Rotation_270)
std::swap(UncommonRealOp, UncommonImagOp);
- std::pair<Instruction *, Instruction *> PartialMatch(
+ std::pair<Value *, Value *> PartialMatch(
(Rotation == ComplexDeinterleavingRotation::Rotation_0 ||
Rotation == ComplexDeinterleavingRotation::Rotation_180)
? CommonOperand
@@ -598,8 +733,16 @@ ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,
Rotation == ComplexDeinterleavingRotation::Rotation_270)
? CommonOperand
: nullptr);
- NodePtr CNode = identifyNodeWithImplicitAdd(
- cast<Instruction>(CR), cast<Instruction>(CI), PartialMatch);
+
+ auto *CRInst = dyn_cast<Instruction>(CR);
+ auto *CIInst = dyn_cast<Instruction>(CI);
+
+ if (!CRInst || !CIInst) {
+ LLVM_DEBUG(dbgs() << " - Common operands are not instructions.\n");
+ return nullptr;
+ }
+
+ NodePtr CNode = identifyNodeWithImplicitAdd(CRInst, CIInst, PartialMatch);
if (!CNode) {
LLVM_DEBUG(dbgs() << " - No cnode identified\n");
return nullptr;
@@ -620,8 +763,6 @@ ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real,
NodePtr Node = prepareCompositeNode(
ComplexDeinterleavingOperation::CMulPartial, Real, Imag);
- Node->addInstruction(RealMulI);
- Node->addInstruction(ImagMulI);
Node->Rotation = Rotation;
Node->addOperand(CommonRes);
Node->addOperand(UncommonRes);
@@ -696,129 +837,603 @@ static bool isInstructionPairMul(Instruction *A, Instruction *B) {
return match(A, Pattern) && match(B, Pattern);
}
+static bool isInstructionPotentiallySymmetric(Instruction *I) {
+ switch (I->getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ case Instruction::FMul:
+ case Instruction::FNeg:
+ case Instruction::Add:
+ case Instruction::Sub:
+ case Instruction::Mul:
+ return true;
+ default:
+ return false;
+ }
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifySymmetricOperation(Instruction *Real,
+ Instruction *Imag) {
+ if (Real->getOpcode() != Imag->getOpcode())
+ return nullptr;
+
+ if (!isInstructionPotentiallySymmetric(Real) ||
+ !isInstructionPotentiallySymmetric(Imag))
+ return nullptr;
+
+ auto *R0 = Real->getOperand(0);
+ auto *I0 = Imag->getOperand(0);
+
+ NodePtr Op0 = identifyNode(R0, I0);
+ NodePtr Op1 = nullptr;
+ if (Op0 == nullptr)
+ return nullptr;
+
+ if (Real->isBinaryOp()) {
+ auto *R1 = Real->getOperand(1);
+ auto *I1 = Imag->getOperand(1);
+ Op1 = identifyNode(R1, I1);
+ if (Op1 == nullptr)
+ return nullptr;
+ }
+
+ if (isa<FPMathOperator>(Real) &&
+ Real->getFastMathFlags() != Imag->getFastMathFlags())
+ return nullptr;
+
+ auto Node = prepareCompositeNode(ComplexDeinterleavingOperation::Symmetric,
+ Real, Imag);
+ Node->Opcode = Real->getOpcode();
+ if (isa<FPMathOperator>(Real))
+ Node->Flags = Real->getFastMathFlags();
+
+ Node->addOperand(Op0);
+ if (Real->isBinaryOp())
+ Node->addOperand(Op1);
+
+ return submitCompositeNode(Node);
+}
+
ComplexDeinterleavingGraph::NodePtr
-ComplexDeinterleavingGraph::identifyNode(Instruction *Real, Instruction *Imag) {
- LLVM_DEBUG(dbgs() << "identifyNode on " << *Real << " / " << *Imag << "\n");
- if (NodePtr CN = getContainingComposite(Real, Imag)) {
+ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I) {
+ LLVM_DEBUG(dbgs() << "identifyNode on " << *R << " / " << *I << "\n");
+ assert(R->getType() == I->getType() &&
+ "Real and imaginary parts should not have different types");
+ if (NodePtr CN = getContainingComposite(R, I)) {
LLVM_DEBUG(dbgs() << " - Folding to existing node\n");
return CN;
}
- auto *RealShuffle = dyn_cast<ShuffleVectorInst>(Real);
- auto *ImagShuffle = dyn_cast<ShuffleVectorInst>(Imag);
- if (RealShuffle && ImagShuffle) {
- Value *RealOp1 = RealShuffle->getOperand(1);
- if (!isa<UndefValue>(RealOp1) && !isa<ConstantAggregateZero>(RealOp1)) {
- LLVM_DEBUG(dbgs() << " - RealOp1 is not undef or zero.\n");
- return nullptr;
- }
- Value *ImagOp1 = ImagShuffle->getOperand(1);
- if (!isa<UndefValue>(ImagOp1) && !isa<ConstantAggregateZero>(ImagOp1)) {
- LLVM_DEBUG(dbgs() << " - ImagOp1 is not undef or zero.\n");
- return nullptr;
- }
+ if (NodePtr CN = identifySplat(R, I))
+ return CN;
+
+ auto *Real = dyn_cast<Instruction>(R);
+ auto *Imag = dyn_cast<Instruction>(I);
+ if (!Real || !Imag)
+ return nullptr;
+
+ if (NodePtr CN = identifyDeinterleave(Real, Imag))
+ return CN;
+
+ if (NodePtr CN = identifyPHINode(Real, Imag))
+ return CN;
+
+ if (NodePtr CN = identifySelectNode(Real, Imag))
+ return CN;
+
+ auto *VTy = cast<VectorType>(Real->getType());
+ auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
+
+ bool HasCMulSupport = TL->isComplexDeinterleavingOperationSupported(
+ ComplexDeinterleavingOperation::CMulPartial, NewVTy);
+ bool HasCAddSupport = TL->isComplexDeinterleavingOperationSupported(
+ ComplexDeinterleavingOperation::CAdd, NewVTy);
+
+ if (HasCMulSupport && isInstructionPairMul(Real, Imag)) {
+ if (NodePtr CN = identifyPartialMul(Real, Imag))
+ return CN;
+ }
+
+ if (HasCAddSupport && isInstructionPairAdd(Real, Imag)) {
+ if (NodePtr CN = identifyAdd(Real, Imag))
+ return CN;
+ }
+
+ if (HasCMulSupport && HasCAddSupport) {
+ if (NodePtr CN = identifyReassocNodes(Real, Imag))
+ return CN;
+ }
+
+ if (NodePtr CN = identifySymmetricOperation(Real, Imag))
+ return CN;
- Value *RealOp0 = RealShuffle->getOperand(0);
- Value *ImagOp0 = ImagShuffle->getOperand(0);
+ LLVM_DEBUG(dbgs() << " - Not recognised as a valid pattern.\n");
+ return nullptr;
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyReassocNodes(Instruction *Real,
+ Instruction *Imag) {
+ auto IsOperationSupported = [](unsigned Opcode) -> bool {
+ return Opcode == Instruction::FAdd || Opcode == Instruction::FSub ||
+ Opcode == Instruction::FNeg || Opcode == Instruction::Add ||
+ Opcode == Instruction::Sub;
+ };
+
+ if (!IsOperationSupported(Real->getOpcode()) ||
+ !IsOperationSupported(Imag->getOpcode()))
+ return nullptr;
- if (RealOp0 != ImagOp0) {
- LLVM_DEBUG(dbgs() << " - Shuffle operands are not equal.\n");
+ std::optional<FastMathFlags> Flags;
+ if (isa<FPMathOperator>(Real)) {
+ if (Real->getFastMathFlags() != Imag->getFastMathFlags()) {
+ LLVM_DEBUG(dbgs() << "The flags in Real and Imaginary instructions are "
+ "not identical\n");
return nullptr;
}
- ArrayRef<int> RealMask = RealShuffle->getShuffleMask();
- ArrayRef<int> ImagMask = ImagShuffle->getShuffleMask();
- if (!isDeinterleavingMask(RealMask) || !isDeinterleavingMask(ImagMask)) {
- LLVM_DEBUG(dbgs() << " - Masks are not deinterleaving.\n");
+ Flags = Real->getFastMathFlags();
+ if (!Flags->allowReassoc()) {
+ LLVM_DEBUG(
+ dbgs()
+ << "the 'Reassoc' attribute is missing in the FastMath flags\n");
return nullptr;
}
+ }
- if (RealMask[0] != 0 || ImagMask[0] != 1) {
- LLVM_DEBUG(dbgs() << " - Masks do not have the correct initial value.\n");
- return nullptr;
+ // Collect multiplications and addend instructions from the given instruction
+ // while traversing it operands. Additionally, verify that all instructions
+ // have the same fast math flags.
+ auto Collect = [&Flags](Instruction *Insn, std::vector<Product> &Muls,
+ std::list<Addend> &Addends) -> bool {
+ SmallVector<PointerIntPair<Value *, 1, bool>> Worklist = {{Insn, true}};
+ SmallPtrSet<Value *, 8> Visited;
+ while (!Worklist.empty()) {
+ auto [V, IsPositive] = Worklist.back();
+ Worklist.pop_back();
+ if (!Visited.insert(V).second)
+ continue;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ Addends.emplace_back(V, IsPositive);
+ continue;
+ }
+
+ // If an instruction has more than one user, it indicates that it either
+ // has an external user, which will be later checked by the checkNodes
+ // function, or it is a subexpression utilized by multiple expressions. In
+ // the latter case, we will attempt to separately identify the complex
+ // operation from here in order to create a shared
+ // ComplexDeinterleavingCompositeNode.
+ if (I != Insn && I->getNumUses() > 1) {
+ LLVM_DEBUG(dbgs() << "Found potential sub-expression: " << *I << "\n");
+ Addends.emplace_back(I, IsPositive);
+ continue;
+ }
+ switch (I->getOpcode()) {
+ case Instruction::FAdd:
+ case Instruction::Add:
+ Worklist.emplace_back(I->getOperand(1), IsPositive);
+ Worklist.emplace_back(I->getOperand(0), IsPositive);
+ break;
+ case Instruction::FSub:
+ Worklist.emplace_back(I->getOperand(1), !IsPositive);
+ Worklist.emplace_back(I->getOperand(0), IsPositive);
+ break;
+ case Instruction::Sub:
+ if (isNeg(I)) {
+ Worklist.emplace_back(getNegOperand(I), !IsPositive);
+ } else {
+ Worklist.emplace_back(I->getOperand(1), !IsPositive);
+ Worklist.emplace_back(I->getOperand(0), IsPositive);
+ }
+ break;
+ case Instruction::FMul:
+ case Instruction::Mul: {
+ Value *A, *B;
+ if (isNeg(I->getOperand(0))) {
+ A = getNegOperand(I->getOperand(0));
+ IsPositive = !IsPositive;
+ } else {
+ A = I->getOperand(0);
+ }
+
+ if (isNeg(I->getOperand(1))) {
+ B = getNegOperand(I->getOperand(1));
+ IsPositive = !IsPositive;
+ } else {
+ B = I->getOperand(1);
+ }
+ Muls.push_back(Product{A, B, IsPositive});
+ break;
+ }
+ case Instruction::FNeg:
+ Worklist.emplace_back(I->getOperand(0), !IsPositive);
+ break;
+ default:
+ Addends.emplace_back(I, IsPositive);
+ continue;
+ }
+
+ if (Flags && I->getFastMathFlags() != *Flags) {
+ LLVM_DEBUG(dbgs() << "The instruction's fast math flags are "
+ "inconsistent with the root instructions' flags: "
+ << *I << "\n");
+ return false;
+ }
}
+ return true;
+ };
- // Type checking, the shuffle type should be a vector type of the same
- // scalar type, but half the size
- auto CheckType = [&](ShuffleVectorInst *Shuffle) {
- Value *Op = Shuffle->getOperand(0);
- auto *ShuffleTy = cast<FixedVectorType>(Shuffle->getType());
- auto *OpTy = cast<FixedVectorType>(Op->getType());
+ std::vector<Product> RealMuls, ImagMuls;
+ std::list<Addend> RealAddends, ImagAddends;
+ if (!Collect(Real, RealMuls, RealAddends) ||
+ !Collect(Imag, ImagMuls, ImagAddends))
+ return nullptr;
- if (OpTy->getScalarType() != ShuffleTy->getScalarType())
- return false;
- if ((ShuffleTy->getNumElements() * 2) != OpTy->getNumElements())
- return false;
+ if (RealAddends.size() != ImagAddends.size())
+ return nullptr;
- return true;
- };
+ NodePtr FinalNode;
+ if (!RealMuls.empty() || !ImagMuls.empty()) {
+ // If there are multiplicands, extract positive addend and use it as an
+ // accumulator
+ FinalNode = extractPositiveAddend(RealAddends, ImagAddends);
+ FinalNode = identifyMultiplications(RealMuls, ImagMuls, FinalNode);
+ if (!FinalNode)
+ return nullptr;
+ }
- auto CheckDeinterleavingShuffle = [&](ShuffleVectorInst *Shuffle) -> bool {
- if (!CheckType(Shuffle))
- return false;
+ // Identify and process remaining additions
+ if (!RealAddends.empty() || !ImagAddends.empty()) {
+ FinalNode = identifyAdditions(RealAddends, ImagAddends, Flags, FinalNode);
+ if (!FinalNode)
+ return nullptr;
+ }
+ assert(FinalNode && "FinalNode can not be nullptr here");
+ // Set the Real and Imag fields of the final node and submit it
+ FinalNode->Real = Real;
+ FinalNode->Imag = Imag;
+ submitCompositeNode(FinalNode);
+ return FinalNode;
+}
- ArrayRef<int> Mask = Shuffle->getShuffleMask();
- int Last = *Mask.rbegin();
+bool ComplexDeinterleavingGraph::collectPartialMuls(
+ const std::vector<Product> &RealMuls, const std::vector<Product> &ImagMuls,
+ std::vector<PartialMulCandidate> &PartialMulCandidates) {
+ // Helper function to extract a common operand from two products
+ auto FindCommonInstruction = [](const Product &Real,
+ const Product &Imag) -> Value * {
+ if (Real.Multiplicand == Imag.Multiplicand ||
+ Real.Multiplicand == Imag.Multiplier)
+ return Real.Multiplicand;
- Value *Op = Shuffle->getOperand(0);
- auto *OpTy = cast<FixedVectorType>(Op->getType());
- int NumElements = OpTy->getNumElements();
+ if (Real.Multiplier == Imag.Multiplicand ||
+ Real.Multiplier == Imag.Multiplier)
+ return Real.Multiplier;
- // Ensure that the deinterleaving shuffle only pulls from the first
- // shuffle operand.
- return Last < NumElements;
- };
+ return nullptr;
+ };
+
+ // Iterating over real and imaginary multiplications to find common operands
+ // If a common operand is found, a partial multiplication candidate is created
+ // and added to the candidates vector The function returns false if no common
+ // operands are found for any product
+ for (unsigned i = 0; i < RealMuls.size(); ++i) {
+ bool FoundCommon = false;
+ for (unsigned j = 0; j < ImagMuls.size(); ++j) {
+ auto *Common = FindCommonInstruction(RealMuls[i], ImagMuls[j]);
+ if (!Common)
+ continue;
+
+ auto *A = RealMuls[i].Multiplicand == Common ? RealMuls[i].Multiplier
+ : RealMuls[i].Multiplicand;
+ auto *B = ImagMuls[j].Multiplicand == Common ? ImagMuls[j].Multiplier
+ : ImagMuls[j].Multiplicand;
+
+ auto Node = identifyNode(A, B);
+ if (Node) {
+ FoundCommon = true;
+ PartialMulCandidates.push_back({Common, Node, i, j, false});
+ }
+
+ Node = identifyNode(B, A);
+ if (Node) {
+ FoundCommon = true;
+ PartialMulCandidates.push_back({Common, Node, i, j, true});
+ }
+ }
+ if (!FoundCommon)
+ return false;
+ }
+ return true;
+}
- if (RealShuffle->getType() != ImagShuffle->getType()) {
- LLVM_DEBUG(dbgs() << " - Shuffle types aren't equal.\n");
- return nullptr;
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyMultiplications(
+ std::vector<Product> &RealMuls, std::vector<Product> &ImagMuls,
+ NodePtr Accumulator = nullptr) {
+ if (RealMuls.size() != ImagMuls.size())
+ return nullptr;
+
+ std::vector<PartialMulCandidate> Info;
+ if (!collectPartialMuls(RealMuls, ImagMuls, Info))
+ return nullptr;
+
+ // Map to store common instruction to node pointers
+ std::map<Value *, NodePtr> CommonToNode;
+ std::vector<bool> Processed(Info.size(), false);
+ for (unsigned I = 0; I < Info.size(); ++I) {
+ if (Processed[I])
+ continue;
+
+ PartialMulCandidate &InfoA = Info[I];
+ for (unsigned J = I + 1; J < Info.size(); ++J) {
+ if (Processed[J])
+ continue;
+
+ PartialMulCandidate &InfoB = Info[J];
+ auto *InfoReal = &InfoA;
+ auto *InfoImag = &InfoB;
+
+ auto NodeFromCommon = identifyNode(InfoReal->Common, InfoImag->Common);
+ if (!NodeFromCommon) {
+ std::swap(InfoReal, InfoImag);
+ NodeFromCommon = identifyNode(InfoReal->Common, InfoImag->Common);
+ }
+ if (!NodeFromCommon)
+ continue;
+
+ CommonToNode[InfoReal->Common] = NodeFromCommon;
+ CommonToNode[InfoImag->Common] = NodeFromCommon;
+ Processed[I] = true;
+ Processed[J] = true;
}
- if (!CheckDeinterleavingShuffle(RealShuffle)) {
- LLVM_DEBUG(dbgs() << " - RealShuffle is invalid type.\n");
+ }
+
+ std::vector<bool> ProcessedReal(RealMuls.size(), false);
+ std::vector<bool> ProcessedImag(ImagMuls.size(), false);
+ NodePtr Result = Accumulator;
+ for (auto &PMI : Info) {
+ if (ProcessedReal[PMI.RealIdx] || ProcessedImag[PMI.ImagIdx])
+ continue;
+
+ auto It = CommonToNode.find(PMI.Common);
+ // TODO: Process independent complex multiplications. Cases like this:
+ // A.real() * B where both A and B are complex numbers.
+ if (It == CommonToNode.end()) {
+ LLVM_DEBUG({
+ dbgs() << "Unprocessed independent partial multiplication:\n";
+ for (auto *Mul : {&RealMuls[PMI.RealIdx], &RealMuls[PMI.RealIdx]})
+ dbgs().indent(4) << (Mul->IsPositive ? "+" : "-") << *Mul->Multiplier
+ << " multiplied by " << *Mul->Multiplicand << "\n";
+ });
return nullptr;
}
- if (!CheckDeinterleavingShuffle(ImagShuffle)) {
- LLVM_DEBUG(dbgs() << " - ImagShuffle is invalid type.\n");
- return nullptr;
+
+ auto &RealMul = RealMuls[PMI.RealIdx];
+ auto &ImagMul = ImagMuls[PMI.ImagIdx];
+
+ auto NodeA = It->second;
+ auto NodeB = PMI.Node;
+ auto IsMultiplicandReal = PMI.Common == NodeA->Real;
+ // The following table illustrates the relationship between multiplications
+ // and rotations. If we consider the multiplication (X + iY) * (U + iV), we
+ // can see:
+ //
+ // Rotation | Real | Imag |
+ // ---------+--------+--------+
+ // 0 | x * u | x * v |
+ // 90 | -y * v | y * u |
+ // 180 | -x * u | -x * v |
+ // 270 | y * v | -y * u |
+ //
+ // Check if the candidate can indeed be represented by partial
+ // multiplication
+ // TODO: Add support for multiplication by complex one
+ if ((IsMultiplicandReal && PMI.IsNodeInverted) ||
+ (!IsMultiplicandReal && !PMI.IsNodeInverted))
+ continue;
+
+ // Determine the rotation based on the multiplications
+ ComplexDeinterleavingRotation Rotation;
+ if (IsMultiplicandReal) {
+ // Detect 0 and 180 degrees rotation
+ if (RealMul.IsPositive && ImagMul.IsPositive)
+ Rotation = llvm::ComplexDeinterleavingRotation::Rotation_0;
+ else if (!RealMul.IsPositive && !ImagMul.IsPositive)
+ Rotation = llvm::ComplexDeinterleavingRotation::Rotation_180;
+ else
+ continue;
+
+ } else {
+ // Detect 90 and 270 degrees rotation
+ if (!RealMul.IsPositive && ImagMul.IsPositive)
+ Rotation = llvm::ComplexDeinterleavingRotation::Rotation_90;
+ else if (RealMul.IsPositive && !ImagMul.IsPositive)
+ Rotation = llvm::ComplexDeinterleavingRotation::Rotation_270;
+ else
+ continue;
}
- NodePtr PlaceholderNode =
- prepareCompositeNode(llvm::ComplexDeinterleavingOperation::Shuffle,
- RealShuffle, ImagShuffle);
- PlaceholderNode->ReplacementNode = RealShuffle->getOperand(0);
- return submitCompositeNode(PlaceholderNode);
+ LLVM_DEBUG({
+ dbgs() << "Identified partial multiplication (X, Y) * (U, V):\n";
+ dbgs().indent(4) << "X: " << *NodeA->Real << "\n";
+ dbgs().indent(4) << "Y: " << *NodeA->Imag << "\n";
+ dbgs().indent(4) << "U: " << *NodeB->Real << "\n";
+ dbgs().indent(4) << "V: " << *NodeB->Imag << "\n";
+ dbgs().indent(4) << "Rotation - " << (int)Rotation * 90 << "\n";
+ });
+
+ NodePtr NodeMul = prepareCompositeNode(
+ ComplexDeinterleavingOperation::CMulPartial, nullptr, nullptr);
+ NodeMul->Rotation = Rotation;
+ NodeMul->addOperand(NodeA);
+ NodeMul->addOperand(NodeB);
+ if (Result)
+ NodeMul->addOperand(Result);
+ submitCompositeNode(NodeMul);
+ Result = NodeMul;
+ ProcessedReal[PMI.RealIdx] = true;
+ ProcessedImag[PMI.ImagIdx] = true;
}
- if (RealShuffle || ImagShuffle)
+
+ // Ensure all products have been processed, if not return nullptr.
+ if (!all_of(ProcessedReal, [](bool V) { return V; }) ||
+ !all_of(ProcessedImag, [](bool V) { return V; })) {
+
+ // Dump debug information about which partial multiplications are not
+ // processed.
+ LLVM_DEBUG({
+ dbgs() << "Unprocessed products (Real):\n";
+ for (size_t i = 0; i < ProcessedReal.size(); ++i) {
+ if (!ProcessedReal[i])
+ dbgs().indent(4) << (RealMuls[i].IsPositive ? "+" : "-")
+ << *RealMuls[i].Multiplier << " multiplied by "
+ << *RealMuls[i].Multiplicand << "\n";
+ }
+ dbgs() << "Unprocessed products (Imag):\n";
+ for (size_t i = 0; i < ProcessedImag.size(); ++i) {
+ if (!ProcessedImag[i])
+ dbgs().indent(4) << (ImagMuls[i].IsPositive ? "+" : "-")
+ << *ImagMuls[i].Multiplier << " multiplied by "
+ << *ImagMuls[i].Multiplicand << "\n";
+ }
+ });
return nullptr;
+ }
- auto *VTy = cast<FixedVectorType>(Real->getType());
- auto *NewVTy =
- FixedVectorType::get(VTy->getScalarType(), VTy->getNumElements() * 2);
+ return Result;
+}
- if (TL->isComplexDeinterleavingOperationSupported(
- ComplexDeinterleavingOperation::CMulPartial, NewVTy) &&
- isInstructionPairMul(Real, Imag)) {
- return identifyPartialMul(Real, Imag);
- }
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyAdditions(
+ std::list<Addend> &RealAddends, std::list<Addend> &ImagAddends,
+ std::optional<FastMathFlags> Flags, NodePtr Accumulator = nullptr) {
+ if (RealAddends.size() != ImagAddends.size())
+ return nullptr;
- if (TL->isComplexDeinterleavingOperationSupported(
- ComplexDeinterleavingOperation::CAdd, NewVTy) &&
- isInstructionPairAdd(Real, Imag)) {
- return identifyAdd(Real, Imag);
+ NodePtr Result;
+ // If we have accumulator use it as first addend
+ if (Accumulator)
+ Result = Accumulator;
+ // Otherwise find an element with both positive real and imaginary parts.
+ else
+ Result = extractPositiveAddend(RealAddends, ImagAddends);
+
+ if (!Result)
+ return nullptr;
+
+ while (!RealAddends.empty()) {
+ auto ItR = RealAddends.begin();
+ auto [R, IsPositiveR] = *ItR;
+
+ bool FoundImag = false;
+ for (auto ItI = ImagAddends.begin(); ItI != ImagAddends.end(); ++ItI) {
+ auto [I, IsPositiveI] = *ItI;
+ ComplexDeinterleavingRotation Rotation;
+ if (IsPositiveR && IsPositiveI)
+ Rotation = ComplexDeinterleavingRotation::Rotation_0;
+ else if (!IsPositiveR && IsPositiveI)
+ Rotation = ComplexDeinterleavingRotation::Rotation_90;
+ else if (!IsPositiveR && !IsPositiveI)
+ Rotation = ComplexDeinterleavingRotation::Rotation_180;
+ else
+ Rotation = ComplexDeinterleavingRotation::Rotation_270;
+
+ NodePtr AddNode;
+ if (Rotation == ComplexDeinterleavingRotation::Rotation_0 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_180) {
+ AddNode = identifyNode(R, I);
+ } else {
+ AddNode = identifyNode(I, R);
+ }
+ if (AddNode) {
+ LLVM_DEBUG({
+ dbgs() << "Identified addition:\n";
+ dbgs().indent(4) << "X: " << *R << "\n";
+ dbgs().indent(4) << "Y: " << *I << "\n";
+ dbgs().indent(4) << "Rotation - " << (int)Rotation * 90 << "\n";
+ });
+
+ NodePtr TmpNode;
+ if (Rotation == llvm::ComplexDeinterleavingRotation::Rotation_0) {
+ TmpNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::Symmetric, nullptr, nullptr);
+ if (Flags) {
+ TmpNode->Opcode = Instruction::FAdd;
+ TmpNode->Flags = *Flags;
+ } else {
+ TmpNode->Opcode = Instruction::Add;
+ }
+ } else if (Rotation ==
+ llvm::ComplexDeinterleavingRotation::Rotation_180) {
+ TmpNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::Symmetric, nullptr, nullptr);
+ if (Flags) {
+ TmpNode->Opcode = Instruction::FSub;
+ TmpNode->Flags = *Flags;
+ } else {
+ TmpNode->Opcode = Instruction::Sub;
+ }
+ } else {
+ TmpNode = prepareCompositeNode(ComplexDeinterleavingOperation::CAdd,
+ nullptr, nullptr);
+ TmpNode->Rotation = Rotation;
+ }
+
+ TmpNode->addOperand(Result);
+ TmpNode->addOperand(AddNode);
+ submitCompositeNode(TmpNode);
+ Result = TmpNode;
+ RealAddends.erase(ItR);
+ ImagAddends.erase(ItI);
+ FoundImag = true;
+ break;
+ }
+ }
+ if (!FoundImag)
+ return nullptr;
}
+ return Result;
+}
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::extractPositiveAddend(
+ std::list<Addend> &RealAddends, std::list<Addend> &ImagAddends) {
+ for (auto ItR = RealAddends.begin(); ItR != RealAddends.end(); ++ItR) {
+ for (auto ItI = ImagAddends.begin(); ItI != ImagAddends.end(); ++ItI) {
+ auto [R, IsPositiveR] = *ItR;
+ auto [I, IsPositiveI] = *ItI;
+ if (IsPositiveR && IsPositiveI) {
+ auto Result = identifyNode(R, I);
+ if (Result) {
+ RealAddends.erase(ItR);
+ ImagAddends.erase(ItI);
+ return Result;
+ }
+ }
+ }
+ }
return nullptr;
}
bool ComplexDeinterleavingGraph::identifyNodes(Instruction *RootI) {
- Instruction *Real;
- Instruction *Imag;
- if (!match(RootI, m_Shuffle(m_Instruction(Real), m_Instruction(Imag))))
- return false;
+ // This potential root instruction might already have been recognized as
+ // reduction. Because RootToNode maps both Real and Imaginary parts to
+ // CompositeNode we should choose only one either Real or Imag instruction to
+ // use as an anchor for generating complex instruction.
+ auto It = RootToNode.find(RootI);
+ if (It != RootToNode.end() && It->second->Real == RootI) {
+ OrderedRoots.push_back(RootI);
+ return true;
+ }
- RootValue = RootI;
- AllInstructions.insert(RootI);
- RootNode = identifyNode(Real, Imag);
+ auto RootNode = identifyRoot(RootI);
+ if (!RootNode)
+ return false;
LLVM_DEBUG({
Function *F = RootI->getFunction();
@@ -828,62 +1443,627 @@ bool ComplexDeinterleavingGraph::identifyNodes(Instruction *RootI) {
dump(dbgs());
dbgs() << "\n";
});
+ RootToNode[RootI] = RootNode;
+ OrderedRoots.push_back(RootI);
+ return true;
+}
- // Check all instructions have internal uses
- for (const auto &Node : CompositeNodes) {
- if (!Node->hasAllInternalUses(AllInstructions)) {
- LLVM_DEBUG(dbgs() << " - Invalid internal uses\n");
- return false;
+bool ComplexDeinterleavingGraph::collectPotentialReductions(BasicBlock *B) {
+ bool FoundPotentialReduction = false;
+
+ auto *Br = dyn_cast<BranchInst>(B->getTerminator());
+ if (!Br || Br->getNumSuccessors() != 2)
+ return false;
+
+ // Identify simple one-block loop
+ if (Br->getSuccessor(0) != B && Br->getSuccessor(1) != B)
+ return false;
+
+ SmallVector<PHINode *> PHIs;
+ for (auto &PHI : B->phis()) {
+ if (PHI.getNumIncomingValues() != 2)
+ continue;
+
+ if (!PHI.getType()->isVectorTy())
+ continue;
+
+ auto *ReductionOp = dyn_cast<Instruction>(PHI.getIncomingValueForBlock(B));
+ if (!ReductionOp)
+ continue;
+
+ // Check if final instruction is reduced outside of current block
+ Instruction *FinalReduction = nullptr;
+ auto NumUsers = 0u;
+ for (auto *U : ReductionOp->users()) {
+ ++NumUsers;
+ if (U == &PHI)
+ continue;
+ FinalReduction = dyn_cast<Instruction>(U);
+ }
+
+ if (NumUsers != 2 || !FinalReduction || FinalReduction->getParent() == B ||
+ isa<PHINode>(FinalReduction))
+ continue;
+
+ ReductionInfo[ReductionOp] = {&PHI, FinalReduction};
+ BackEdge = B;
+ auto BackEdgeIdx = PHI.getBasicBlockIndex(B);
+ auto IncomingIdx = BackEdgeIdx == 0 ? 1 : 0;
+ Incoming = PHI.getIncomingBlock(IncomingIdx);
+ FoundPotentialReduction = true;
+
+ // If the initial value of PHINode is an Instruction, consider it a leaf
+ // value of a complex deinterleaving graph.
+ if (auto *InitPHI =
+ dyn_cast<Instruction>(PHI.getIncomingValueForBlock(Incoming)))
+ FinalInstructions.insert(InitPHI);
+ }
+ return FoundPotentialReduction;
+}
+
+void ComplexDeinterleavingGraph::identifyReductionNodes() {
+ SmallVector<bool> Processed(ReductionInfo.size(), false);
+ SmallVector<Instruction *> OperationInstruction;
+ for (auto &P : ReductionInfo)
+ OperationInstruction.push_back(P.first);
+
+ // Identify a complex computation by evaluating two reduction operations that
+ // potentially could be involved
+ for (size_t i = 0; i < OperationInstruction.size(); ++i) {
+ if (Processed[i])
+ continue;
+ for (size_t j = i + 1; j < OperationInstruction.size(); ++j) {
+ if (Processed[j])
+ continue;
+
+ auto *Real = OperationInstruction[i];
+ auto *Imag = OperationInstruction[j];
+ if (Real->getType() != Imag->getType())
+ continue;
+
+ RealPHI = ReductionInfo[Real].first;
+ ImagPHI = ReductionInfo[Imag].first;
+ PHIsFound = false;
+ auto Node = identifyNode(Real, Imag);
+ if (!Node) {
+ std::swap(Real, Imag);
+ std::swap(RealPHI, ImagPHI);
+ Node = identifyNode(Real, Imag);
+ }
+
+ // If a node is identified and reduction PHINode is used in the chain of
+ // operations, mark its operation instructions as used to prevent
+ // re-identification and attach the node to the real part
+ if (Node && PHIsFound) {
+ LLVM_DEBUG(dbgs() << "Identified reduction starting from instructions: "
+ << *Real << " / " << *Imag << "\n");
+ Processed[i] = true;
+ Processed[j] = true;
+ auto RootNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::ReductionOperation, Real, Imag);
+ RootNode->addOperand(Node);
+ RootToNode[Real] = RootNode;
+ RootToNode[Imag] = RootNode;
+ submitCompositeNode(RootNode);
+ break;
+ }
}
}
- return RootNode != nullptr;
+
+ RealPHI = nullptr;
+ ImagPHI = nullptr;
}
-Value *ComplexDeinterleavingGraph::replaceNode(
- ComplexDeinterleavingGraph::RawNodePtr Node) {
- if (Node->ReplacementNode)
- return Node->ReplacementNode;
+bool ComplexDeinterleavingGraph::checkNodes() {
+ // Collect all instructions from roots to leaves
+ SmallPtrSet<Instruction *, 16> AllInstructions;
+ SmallVector<Instruction *, 8> Worklist;
+ for (auto &Pair : RootToNode)
+ Worklist.push_back(Pair.first);
- Value *Input0 = replaceNode(Node->Operands[0]);
- Value *Input1 = replaceNode(Node->Operands[1]);
- Value *Accumulator =
- Node->Operands.size() > 2 ? replaceNode(Node->Operands[2]) : nullptr;
+ // Extract all instructions that are used by all XCMLA/XCADD/ADD/SUB/NEG
+ // chains
+ while (!Worklist.empty()) {
+ auto *I = Worklist.back();
+ Worklist.pop_back();
- assert(Input0->getType() == Input1->getType() &&
- "Node inputs need to be of the same type");
+ if (!AllInstructions.insert(I).second)
+ continue;
- Node->ReplacementNode = TL->createComplexDeinterleavingIR(
- Node->Real, Node->Operation, Node->Rotation, Input0, Input1, Accumulator);
+ for (Value *Op : I->operands()) {
+ if (auto *OpI = dyn_cast<Instruction>(Op)) {
+ if (!FinalInstructions.count(I))
+ Worklist.emplace_back(OpI);
+ }
+ }
+ }
- assert(Node->ReplacementNode && "Target failed to create Intrinsic call.");
- NumComplexTransformations += 1;
- return Node->ReplacementNode;
+ // Find instructions that have users outside of chain
+ SmallVector<Instruction *, 2> OuterInstructions;
+ for (auto *I : AllInstructions) {
+ // Skip root nodes
+ if (RootToNode.count(I))
+ continue;
+
+ for (User *U : I->users()) {
+ if (AllInstructions.count(cast<Instruction>(U)))
+ continue;
+
+ // Found an instruction that is not used by XCMLA/XCADD chain
+ Worklist.emplace_back(I);
+ break;
+ }
+ }
+
+ // If any instructions are found to be used outside, find and remove roots
+ // that somehow connect to those instructions.
+ SmallPtrSet<Instruction *, 16> Visited;
+ while (!Worklist.empty()) {
+ auto *I = Worklist.back();
+ Worklist.pop_back();
+ if (!Visited.insert(I).second)
+ continue;
+
+ // Found an impacted root node. Removing it from the nodes to be
+ // deinterleaved
+ if (RootToNode.count(I)) {
+ LLVM_DEBUG(dbgs() << "Instruction " << *I
+ << " could be deinterleaved but its chain of complex "
+ "operations have an outside user\n");
+ RootToNode.erase(I);
+ }
+
+ if (!AllInstructions.count(I) || FinalInstructions.count(I))
+ continue;
+
+ for (User *U : I->users())
+ Worklist.emplace_back(cast<Instruction>(U));
+
+ for (Value *Op : I->operands()) {
+ if (auto *OpI = dyn_cast<Instruction>(Op))
+ Worklist.emplace_back(OpI);
+ }
+ }
+ return !RootToNode.empty();
}
-void ComplexDeinterleavingGraph::replaceNodes() {
- Value *R = replaceNode(RootNode.get());
- assert(R && "Unable to find replacement for RootValue");
- RootValue->replaceAllUsesWith(R);
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyRoot(Instruction *RootI) {
+ if (auto *Intrinsic = dyn_cast<IntrinsicInst>(RootI)) {
+ if (Intrinsic->getIntrinsicID() !=
+ Intrinsic::experimental_vector_interleave2)
+ return nullptr;
+
+ auto *Real = dyn_cast<Instruction>(Intrinsic->getOperand(0));
+ auto *Imag = dyn_cast<Instruction>(Intrinsic->getOperand(1));
+ if (!Real || !Imag)
+ return nullptr;
+
+ return identifyNode(Real, Imag);
+ }
+
+ auto *SVI = dyn_cast<ShuffleVectorInst>(RootI);
+ if (!SVI)
+ return nullptr;
+
+ // Look for a shufflevector that takes separate vectors of the real and
+ // imaginary components and recombines them into a single vector.
+ if (!isInterleavingMask(SVI->getShuffleMask()))
+ return nullptr;
+
+ Instruction *Real;
+ Instruction *Imag;
+ if (!match(RootI, m_Shuffle(m_Instruction(Real), m_Instruction(Imag))))
+ return nullptr;
+
+ return identifyNode(Real, Imag);
}
-bool ComplexDeinterleavingCompositeNode::hasAllInternalUses(
- SmallPtrSet<Instruction *, 16> &AllInstructions) {
- if (Operation == ComplexDeinterleavingOperation::Shuffle)
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyDeinterleave(Instruction *Real,
+ Instruction *Imag) {
+ Instruction *I = nullptr;
+ Value *FinalValue = nullptr;
+ if (match(Real, m_ExtractValue<0>(m_Instruction(I))) &&
+ match(Imag, m_ExtractValue<1>(m_Specific(I))) &&
+ match(I, m_Intrinsic<Intrinsic::experimental_vector_deinterleave2>(
+ m_Value(FinalValue)))) {
+ NodePtr PlaceholderNode = prepareCompositeNode(
+ llvm::ComplexDeinterleavingOperation::Deinterleave, Real, Imag);
+ PlaceholderNode->ReplacementNode = FinalValue;
+ FinalInstructions.insert(Real);
+ FinalInstructions.insert(Imag);
+ return submitCompositeNode(PlaceholderNode);
+ }
+
+ auto *RealShuffle = dyn_cast<ShuffleVectorInst>(Real);
+ auto *ImagShuffle = dyn_cast<ShuffleVectorInst>(Imag);
+ if (!RealShuffle || !ImagShuffle) {
+ if (RealShuffle || ImagShuffle)
+ LLVM_DEBUG(dbgs() << " - There's a shuffle where there shouldn't be.\n");
+ return nullptr;
+ }
+
+ Value *RealOp1 = RealShuffle->getOperand(1);
+ if (!isa<UndefValue>(RealOp1) && !isa<ConstantAggregateZero>(RealOp1)) {
+ LLVM_DEBUG(dbgs() << " - RealOp1 is not undef or zero.\n");
+ return nullptr;
+ }
+ Value *ImagOp1 = ImagShuffle->getOperand(1);
+ if (!isa<UndefValue>(ImagOp1) && !isa<ConstantAggregateZero>(ImagOp1)) {
+ LLVM_DEBUG(dbgs() << " - ImagOp1 is not undef or zero.\n");
+ return nullptr;
+ }
+
+ Value *RealOp0 = RealShuffle->getOperand(0);
+ Value *ImagOp0 = ImagShuffle->getOperand(0);
+
+ if (RealOp0 != ImagOp0) {
+ LLVM_DEBUG(dbgs() << " - Shuffle operands are not equal.\n");
+ return nullptr;
+ }
+
+ ArrayRef<int> RealMask = RealShuffle->getShuffleMask();
+ ArrayRef<int> ImagMask = ImagShuffle->getShuffleMask();
+ if (!isDeinterleavingMask(RealMask) || !isDeinterleavingMask(ImagMask)) {
+ LLVM_DEBUG(dbgs() << " - Masks are not deinterleaving.\n");
+ return nullptr;
+ }
+
+ if (RealMask[0] != 0 || ImagMask[0] != 1) {
+ LLVM_DEBUG(dbgs() << " - Masks do not have the correct initial value.\n");
+ return nullptr;
+ }
+
+ // Type checking, the shuffle type should be a vector type of the same
+ // scalar type, but half the size
+ auto CheckType = [&](ShuffleVectorInst *Shuffle) {
+ Value *Op = Shuffle->getOperand(0);
+ auto *ShuffleTy = cast<FixedVectorType>(Shuffle->getType());
+ auto *OpTy = cast<FixedVectorType>(Op->getType());
+
+ if (OpTy->getScalarType() != ShuffleTy->getScalarType())
+ return false;
+ if ((ShuffleTy->getNumElements() * 2) != OpTy->getNumElements())
+ return false;
+
return true;
+ };
- for (auto *User : Real->users()) {
- if (!AllInstructions.contains(cast<Instruction>(User)))
+ auto CheckDeinterleavingShuffle = [&](ShuffleVectorInst *Shuffle) -> bool {
+ if (!CheckType(Shuffle))
return false;
+
+ ArrayRef<int> Mask = Shuffle->getShuffleMask();
+ int Last = *Mask.rbegin();
+
+ Value *Op = Shuffle->getOperand(0);
+ auto *OpTy = cast<FixedVectorType>(Op->getType());
+ int NumElements = OpTy->getNumElements();
+
+ // Ensure that the deinterleaving shuffle only pulls from the first
+ // shuffle operand.
+ return Last < NumElements;
+ };
+
+ if (RealShuffle->getType() != ImagShuffle->getType()) {
+ LLVM_DEBUG(dbgs() << " - Shuffle types aren't equal.\n");
+ return nullptr;
}
- for (auto *User : Imag->users()) {
- if (!AllInstructions.contains(cast<Instruction>(User)))
- return false;
+ if (!CheckDeinterleavingShuffle(RealShuffle)) {
+ LLVM_DEBUG(dbgs() << " - RealShuffle is invalid type.\n");
+ return nullptr;
}
- for (auto *I : InternalInstructions) {
- for (auto *User : I->users()) {
- if (!AllInstructions.contains(cast<Instruction>(User)))
+ if (!CheckDeinterleavingShuffle(ImagShuffle)) {
+ LLVM_DEBUG(dbgs() << " - ImagShuffle is invalid type.\n");
+ return nullptr;
+ }
+
+ NodePtr PlaceholderNode =
+ prepareCompositeNode(llvm::ComplexDeinterleavingOperation::Deinterleave,
+ RealShuffle, ImagShuffle);
+ PlaceholderNode->ReplacementNode = RealShuffle->getOperand(0);
+ FinalInstructions.insert(RealShuffle);
+ FinalInstructions.insert(ImagShuffle);
+ return submitCompositeNode(PlaceholderNode);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifySplat(Value *R, Value *I) {
+ auto IsSplat = [](Value *V) -> bool {
+ // Fixed-width vector with constants
+ if (isa<ConstantDataVector>(V))
+ return true;
+
+ VectorType *VTy;
+ ArrayRef<int> Mask;
+ // Splats are represented differently depending on whether the repeated
+ // value is a constant or an Instruction
+ if (auto *Const = dyn_cast<ConstantExpr>(V)) {
+ if (Const->getOpcode() != Instruction::ShuffleVector)
return false;
+ VTy = cast<VectorType>(Const->getType());
+ Mask = Const->getShuffleMask();
+ } else if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) {
+ VTy = Shuf->getType();
+ Mask = Shuf->getShuffleMask();
+ } else {
+ return false;
}
+
+ // When the data type is <1 x Type>, it's not possible to differentiate
+ // between the ComplexDeinterleaving::Deinterleave and
+ // ComplexDeinterleaving::Splat operations.
+ if (!VTy->isScalableTy() && VTy->getElementCount().getKnownMinValue() == 1)
+ return false;
+
+ return all_equal(Mask) && Mask[0] == 0;
+ };
+
+ if (!IsSplat(R) || !IsSplat(I))
+ return nullptr;
+
+ auto *Real = dyn_cast<Instruction>(R);
+ auto *Imag = dyn_cast<Instruction>(I);
+ if ((!Real && Imag) || (Real && !Imag))
+ return nullptr;
+
+ if (Real && Imag) {
+ // Non-constant splats should be in the same basic block
+ if (Real->getParent() != Imag->getParent())
+ return nullptr;
+
+ FinalInstructions.insert(Real);
+ FinalInstructions.insert(Imag);
}
- return true;
+ NodePtr PlaceholderNode =
+ prepareCompositeNode(ComplexDeinterleavingOperation::Splat, R, I);
+ return submitCompositeNode(PlaceholderNode);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifyPHINode(Instruction *Real,
+ Instruction *Imag) {
+ if (Real != RealPHI || Imag != ImagPHI)
+ return nullptr;
+
+ PHIsFound = true;
+ NodePtr PlaceholderNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::ReductionPHI, Real, Imag);
+ return submitCompositeNode(PlaceholderNode);
+}
+
+ComplexDeinterleavingGraph::NodePtr
+ComplexDeinterleavingGraph::identifySelectNode(Instruction *Real,
+ Instruction *Imag) {
+ auto *SelectReal = dyn_cast<SelectInst>(Real);
+ auto *SelectImag = dyn_cast<SelectInst>(Imag);
+ if (!SelectReal || !SelectImag)
+ return nullptr;
+
+ Instruction *MaskA, *MaskB;
+ Instruction *AR, *AI, *RA, *BI;
+ if (!match(Real, m_Select(m_Instruction(MaskA), m_Instruction(AR),
+ m_Instruction(RA))) ||
+ !match(Imag, m_Select(m_Instruction(MaskB), m_Instruction(AI),
+ m_Instruction(BI))))
+ return nullptr;
+
+ if (MaskA != MaskB && !MaskA->isIdenticalTo(MaskB))
+ return nullptr;
+
+ if (!MaskA->getType()->isVectorTy())
+ return nullptr;
+
+ auto NodeA = identifyNode(AR, AI);
+ if (!NodeA)
+ return nullptr;
+
+ auto NodeB = identifyNode(RA, BI);
+ if (!NodeB)
+ return nullptr;
+
+ NodePtr PlaceholderNode = prepareCompositeNode(
+ ComplexDeinterleavingOperation::ReductionSelect, Real, Imag);
+ PlaceholderNode->addOperand(NodeA);
+ PlaceholderNode->addOperand(NodeB);
+ FinalInstructions.insert(MaskA);
+ FinalInstructions.insert(MaskB);
+ return submitCompositeNode(PlaceholderNode);
+}
+
+static Value *replaceSymmetricNode(IRBuilderBase &B, unsigned Opcode,
+ std::optional<FastMathFlags> Flags,
+ Value *InputA, Value *InputB) {
+ Value *I;
+ switch (Opcode) {
+ case Instruction::FNeg:
+ I = B.CreateFNeg(InputA);
+ break;
+ case Instruction::FAdd:
+ I = B.CreateFAdd(InputA, InputB);
+ break;
+ case Instruction::Add:
+ I = B.CreateAdd(InputA, InputB);
+ break;
+ case Instruction::FSub:
+ I = B.CreateFSub(InputA, InputB);
+ break;
+ case Instruction::Sub:
+ I = B.CreateSub(InputA, InputB);
+ break;
+ case Instruction::FMul:
+ I = B.CreateFMul(InputA, InputB);
+ break;
+ case Instruction::Mul:
+ I = B.CreateMul(InputA, InputB);
+ break;
+ default:
+ llvm_unreachable("Incorrect symmetric opcode");
+ }
+ if (Flags)
+ cast<Instruction>(I)->setFastMathFlags(*Flags);
+ return I;
+}
+
+Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder,
+ RawNodePtr Node) {
+ if (Node->ReplacementNode)
+ return Node->ReplacementNode;
+
+ auto ReplaceOperandIfExist = [&](RawNodePtr &Node, unsigned Idx) -> Value * {
+ return Node->Operands.size() > Idx
+ ? replaceNode(Builder, Node->Operands[Idx])
+ : nullptr;
+ };
+
+ Value *ReplacementNode;
+ switch (Node->Operation) {
+ case ComplexDeinterleavingOperation::CAdd:
+ case ComplexDeinterleavingOperation::CMulPartial:
+ case ComplexDeinterleavingOperation::Symmetric: {
+ Value *Input0 = ReplaceOperandIfExist(Node, 0);
+ Value *Input1 = ReplaceOperandIfExist(Node, 1);
+ Value *Accumulator = ReplaceOperandIfExist(Node, 2);
+ assert(!Input1 || (Input0->getType() == Input1->getType() &&
+ "Node inputs need to be of the same type"));
+ assert(!Accumulator ||
+ (Input0->getType() == Accumulator->getType() &&
+ "Accumulator and input need to be of the same type"));
+ if (Node->Operation == ComplexDeinterleavingOperation::Symmetric)
+ ReplacementNode = replaceSymmetricNode(Builder, Node->Opcode, Node->Flags,
+ Input0, Input1);
+ else
+ ReplacementNode = TL->createComplexDeinterleavingIR(
+ Builder, Node->Operation, Node->Rotation, Input0, Input1,
+ Accumulator);
+ break;
+ }
+ case ComplexDeinterleavingOperation::Deinterleave:
+ llvm_unreachable("Deinterleave node should already have ReplacementNode");
+ break;
+ case ComplexDeinterleavingOperation::Splat: {
+ auto *NewTy = VectorType::getDoubleElementsVectorType(
+ cast<VectorType>(Node->Real->getType()));
+ auto *R = dyn_cast<Instruction>(Node->Real);
+ auto *I = dyn_cast<Instruction>(Node->Imag);
+ if (R && I) {
+ // Splats that are not constant are interleaved where they are located
+ Instruction *InsertPoint = (I->comesBefore(R) ? R : I)->getNextNode();
+ IRBuilder<> IRB(InsertPoint);
+ ReplacementNode =
+ IRB.CreateIntrinsic(Intrinsic::experimental_vector_interleave2, NewTy,
+ {Node->Real, Node->Imag});
+ } else {
+ ReplacementNode =
+ Builder.CreateIntrinsic(Intrinsic::experimental_vector_interleave2,
+ NewTy, {Node->Real, Node->Imag});
+ }
+ break;
+ }
+ case ComplexDeinterleavingOperation::ReductionPHI: {
+ // If Operation is ReductionPHI, a new empty PHINode is created.
+ // It is filled later when the ReductionOperation is processed.
+ auto *VTy = cast<VectorType>(Node->Real->getType());
+ auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
+ auto *NewPHI = PHINode::Create(NewVTy, 0, "", BackEdge->getFirstNonPHI());
+ OldToNewPHI[dyn_cast<PHINode>(Node->Real)] = NewPHI;
+ ReplacementNode = NewPHI;
+ break;
+ }
+ case ComplexDeinterleavingOperation::ReductionOperation:
+ ReplacementNode = replaceNode(Builder, Node->Operands[0]);
+ processReductionOperation(ReplacementNode, Node);
+ break;
+ case ComplexDeinterleavingOperation::ReductionSelect: {
+ auto *MaskReal = cast<Instruction>(Node->Real)->getOperand(0);
+ auto *MaskImag = cast<Instruction>(Node->Imag)->getOperand(0);
+ auto *A = replaceNode(Builder, Node->Operands[0]);
+ auto *B = replaceNode(Builder, Node->Operands[1]);
+ auto *NewMaskTy = VectorType::getDoubleElementsVectorType(
+ cast<VectorType>(MaskReal->getType()));
+ auto *NewMask =
+ Builder.CreateIntrinsic(Intrinsic::experimental_vector_interleave2,
+ NewMaskTy, {MaskReal, MaskImag});
+ ReplacementNode = Builder.CreateSelect(NewMask, A, B);
+ break;
+ }
+ }
+
+ assert(ReplacementNode && "Target failed to create Intrinsic call.");
+ NumComplexTransformations += 1;
+ Node->ReplacementNode = ReplacementNode;
+ return ReplacementNode;
+}
+
+void ComplexDeinterleavingGraph::processReductionOperation(
+ Value *OperationReplacement, RawNodePtr Node) {
+ auto *Real = cast<Instruction>(Node->Real);
+ auto *Imag = cast<Instruction>(Node->Imag);
+ auto *OldPHIReal = ReductionInfo[Real].first;
+ auto *OldPHIImag = ReductionInfo[Imag].first;
+ auto *NewPHI = OldToNewPHI[OldPHIReal];
+
+ auto *VTy = cast<VectorType>(Real->getType());
+ auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy);
+
+ // We have to interleave initial origin values coming from IncomingBlock
+ Value *InitReal = OldPHIReal->getIncomingValueForBlock(Incoming);
+ Value *InitImag = OldPHIImag->getIncomingValueForBlock(Incoming);
+
+ IRBuilder<> Builder(Incoming->getTerminator());
+ auto *NewInit = Builder.CreateIntrinsic(
+ Intrinsic::experimental_vector_interleave2, NewVTy, {InitReal, InitImag});
+
+ NewPHI->addIncoming(NewInit, Incoming);
+ NewPHI->addIncoming(OperationReplacement, BackEdge);
+
+ // Deinterleave complex vector outside of loop so that it can be finally
+ // reduced
+ auto *FinalReductionReal = ReductionInfo[Real].second;
+ auto *FinalReductionImag = ReductionInfo[Imag].second;
+
+ Builder.SetInsertPoint(
+ &*FinalReductionReal->getParent()->getFirstInsertionPt());
+ auto *Deinterleave = Builder.CreateIntrinsic(
+ Intrinsic::experimental_vector_deinterleave2,
+ OperationReplacement->getType(), OperationReplacement);
+
+ auto *NewReal = Builder.CreateExtractValue(Deinterleave, (uint64_t)0);
+ FinalReductionReal->replaceUsesOfWith(Real, NewReal);
+
+ Builder.SetInsertPoint(FinalReductionImag);
+ auto *NewImag = Builder.CreateExtractValue(Deinterleave, 1);
+ FinalReductionImag->replaceUsesOfWith(Imag, NewImag);
+}
+
+void ComplexDeinterleavingGraph::replaceNodes() {
+ SmallVector<Instruction *, 16> DeadInstrRoots;
+ for (auto *RootInstruction : OrderedRoots) {
+ // Check if this potential root went through check process and we can
+ // deinterleave it
+ if (!RootToNode.count(RootInstruction))
+ continue;
+
+ IRBuilder<> Builder(RootInstruction);
+ auto RootNode = RootToNode[RootInstruction];
+ Value *R = replaceNode(Builder, RootNode.get());
+
+ if (RootNode->Operation ==
+ ComplexDeinterleavingOperation::ReductionOperation) {
+ auto *RootReal = cast<Instruction>(RootNode->Real);
+ auto *RootImag = cast<Instruction>(RootNode->Imag);
+ ReductionInfo[RootReal].first->removeIncomingValue(BackEdge);
+ ReductionInfo[RootImag].first->removeIncomingValue(BackEdge);
+ DeadInstrRoots.push_back(cast<Instruction>(RootReal));
+ DeadInstrRoots.push_back(cast<Instruction>(RootImag));
+ } else {
+ assert(R && "Unable to find replacement for RootInstruction");
+ DeadInstrRoots.push_back(RootInstruction);
+ RootInstruction->replaceAllUsesWith(R);
+ }
+ }
+
+ for (auto *I : DeadInstrRoots)
+ RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
}
diff --git a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index eb2d449bc4af..106db7c51f27 100644
--- a/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -49,7 +49,7 @@ CriticalAntiDepBreaker::~CriticalAntiDepBreaker() = default;
void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
const unsigned BBSize = BB->size();
- for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i) {
// Clear out the register class data.
Classes[i] = nullptr;
@@ -111,7 +111,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr &MI, unsigned Count,
return;
assert(Count < InsertPosIndex && "Instruction index out of expected range!");
- for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ for (unsigned Reg = 1; Reg != TRI->getNumRegs(); ++Reg) {
if (KillIndices[Reg] != ~0u) {
// If Reg is currently live, then mark that it can't be renamed as
// we don't know the extent of its live-range anymore (now that it
@@ -213,9 +213,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
if (MO.isUse() && Special) {
if (!KeepRegs.test(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- KeepRegs.set(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ KeepRegs.set(SubReg);
}
}
}
@@ -238,13 +237,11 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
// itself can't be changed.
if (MI.isRegTiedToUseOperand(I) &&
Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs) {
- KeepRegs.set(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) {
+ KeepRegs.set(SubReg);
}
- for (MCSuperRegIterator SuperRegs(Reg, TRI);
- SuperRegs.isValid(); ++SuperRegs) {
- KeepRegs.set(*SuperRegs);
+ for (MCPhysReg SuperReg : TRI->superregs(Reg)) {
+ KeepRegs.set(SuperReg);
}
}
}
@@ -264,14 +261,11 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
if (MO.isRegMask()) {
auto ClobbersPhysRegAndSubRegs = [&](unsigned PhysReg) {
- for (MCSubRegIterator SRI(PhysReg, TRI, true); SRI.isValid(); ++SRI)
- if (!MO.clobbersPhysReg(*SRI))
- return false;
-
- return true;
+ return all_of(TRI->subregs_inclusive(PhysReg),
+ [&](MCPhysReg SR) { return MO.clobbersPhysReg(SR); });
};
- for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
+ for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i) {
if (ClobbersPhysRegAndSubRegs(i)) {
DefIndices[i] = Count;
KillIndices[i] = ~0u;
@@ -297,8 +291,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
// For the reg itself and all subregs: update the def to current;
// reset the kill state, any restrictions, and references.
- for (MCSubRegIterator SRI(Reg, TRI, true); SRI.isValid(); ++SRI) {
- unsigned SubregReg = *SRI;
+ for (MCPhysReg SubregReg : TRI->subregs_inclusive(Reg)) {
DefIndices[SubregReg] = Count;
KillIndices[SubregReg] = ~0u;
Classes[SubregReg] = nullptr;
@@ -307,8 +300,8 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
KeepRegs.reset(SubregReg);
}
// Conservatively mark super-registers as unusable.
- for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
- Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1);
+ for (MCPhysReg SR : TRI->superregs(Reg))
+ Classes[SR] = reinterpret_cast<TargetRegisterClass *>(-1);
}
}
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
@@ -470,7 +463,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
LLVM_DEBUG(dbgs() << "Critical path has total latency "
<< (Max->getDepth() + Max->Latency) << "\n");
LLVM_DEBUG(dbgs() << "Available regs:");
- for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
+ for (unsigned Reg = 1; Reg < TRI->getNumRegs(); ++Reg) {
if (KillIndices[Reg] == ~0u)
LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI));
}
diff --git a/llvm/lib/CodeGen/DFAPacketizer.cpp b/llvm/lib/CodeGen/DFAPacketizer.cpp
index 34fb1d286a58..48bb4a07662e 100644
--- a/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -29,8 +29,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrDesc.h"
@@ -98,34 +96,6 @@ unsigned DFAPacketizer::getUsedResources(unsigned InstIdx) {
return RS[InstIdx] ^ RS[InstIdx - 1];
}
-namespace llvm {
-
-// This class extends ScheduleDAGInstrs and overrides the schedule method
-// to build the dependence graph.
-class DefaultVLIWScheduler : public ScheduleDAGInstrs {
-private:
- AAResults *AA;
- /// Ordered list of DAG postprocessing steps.
- std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
-
-public:
- DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
- AAResults *AA);
-
- // Actual scheduling work.
- void schedule() override;
-
- /// DefaultVLIWScheduler takes ownership of the Mutation object.
- void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
- Mutations.push_back(std::move(Mutation));
- }
-
-protected:
- void postprocessDAG();
-};
-
-} // end namespace llvm
-
DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
MachineLoopInfo &MLI,
AAResults *AA)
@@ -134,7 +104,7 @@ DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
}
/// Apply each ScheduleDAGMutation step in order.
-void DefaultVLIWScheduler::postprocessDAG() {
+void DefaultVLIWScheduler::postProcessDAG() {
for (auto &M : Mutations)
M->apply(this);
}
@@ -142,7 +112,7 @@ void DefaultVLIWScheduler::postprocessDAG() {
void DefaultVLIWScheduler::schedule() {
// Build the scheduling graph.
buildSchedGraph(AA);
- postprocessDAG();
+ postProcessDAG();
}
VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
@@ -264,7 +234,7 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
"added to packet\n "
<< MI);
// End the packet if resource is not available, or if the instruction
- // shoud not be added to the current packet.
+ // should not be added to the current packet.
endPacket(MBB, MI);
}
diff --git a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index e36db43567c5..6a7de3b241fe 100644
--- a/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -31,8 +31,8 @@ namespace {
class DeadMachineInstructionElim : public MachineFunctionPass {
bool runOnMachineFunction(MachineFunction &MF) override;
- const MachineRegisterInfo *MRI;
- const TargetInstrInfo *TII;
+ const MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
LiveRegUnits LivePhysRegs;
public:
@@ -75,27 +75,25 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
return false;
// Examine each operand.
- for (const MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.isDef()) {
- Register Reg = MO.getReg();
- if (Reg.isPhysical()) {
- // Don't delete live physreg defs, or any reserved register defs.
- if (!LivePhysRegs.available(Reg) || MRI->isReserved(Reg))
- return false;
- } else {
- if (MO.isDead()) {
+ for (const MachineOperand &MO : MI->all_defs()) {
+ Register Reg = MO.getReg();
+ if (Reg.isPhysical()) {
+ // Don't delete live physreg defs, or any reserved register defs.
+ if (!LivePhysRegs.available(Reg) || MRI->isReserved(Reg))
+ return false;
+ } else {
+ if (MO.isDead()) {
#ifndef NDEBUG
- // Basic check on the register. All of them should be 'undef'.
- for (auto &U : MRI->use_nodbg_operands(Reg))
- assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
+ // Basic check on the register. All of them should be 'undef'.
+ for (auto &U : MRI->use_nodbg_operands(Reg))
+ assert(U.isUndef() && "'Undef' use on a 'dead' register is found!");
#endif
- continue;
- }
- for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
- if (&Use != MI)
- // This def has a non-debug use. Don't delete the instruction!
- return false;
- }
+ continue;
+ }
+ for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
+ if (&Use != MI)
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
}
}
}
diff --git a/llvm/lib/CodeGen/DetectDeadLanes.cpp b/llvm/lib/CodeGen/DetectDeadLanes.cpp
index bbb89855cfff..86e9f3abe010 100644
--- a/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -25,7 +25,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/BitVector.h"
+#include "llvm/CodeGen/DetectDeadLanes.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -33,98 +33,19 @@
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include <deque>
using namespace llvm;
#define DEBUG_TYPE "detect-dead-lanes"
-namespace {
-
-/// Contains a bitmask of which lanes of a given virtual register are
-/// defined and which ones are actually used.
-struct VRegInfo {
- LaneBitmask UsedLanes;
- LaneBitmask DefinedLanes;
-};
-
-class DetectDeadLanes : public MachineFunctionPass {
-public:
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- static char ID;
- DetectDeadLanes() : MachineFunctionPass(ID) {}
-
- StringRef getPassName() const override { return "Detect Dead Lanes"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
-private:
- /// Add used lane bits on the register used by operand \p MO. This translates
- /// the bitmask based on the operands subregister, and puts the register into
- /// the worklist if any new bits were added.
- void addUsedLanesOnOperand(const MachineOperand &MO, LaneBitmask UsedLanes);
-
- /// Given a bitmask \p UsedLanes for the used lanes on a def output of a
- /// COPY-like instruction determine the lanes used on the use operands
- /// and call addUsedLanesOnOperand() for them.
- void transferUsedLanesStep(const MachineInstr &MI, LaneBitmask UsedLanes);
-
- /// Given a use regiser operand \p Use and a mask of defined lanes, check
- /// if the operand belongs to a lowersToCopies() instruction, transfer the
- /// mask to the def and put the instruction into the worklist.
- void transferDefinedLanesStep(const MachineOperand &Use,
- LaneBitmask DefinedLanes);
-
- /// Given a mask \p DefinedLanes of lanes defined at operand \p OpNum
- /// of COPY-like instruction, determine which lanes are defined at the output
- /// operand \p Def.
- LaneBitmask transferDefinedLanes(const MachineOperand &Def, unsigned OpNum,
- LaneBitmask DefinedLanes) const;
-
- /// Given a mask \p UsedLanes used from the output of instruction \p MI
- /// determine which lanes are used from operand \p MO of this instruction.
- LaneBitmask transferUsedLanes(const MachineInstr &MI, LaneBitmask UsedLanes,
- const MachineOperand &MO) const;
-
- std::pair<bool, bool> runOnce(MachineFunction &MF);
-
- LaneBitmask determineInitialDefinedLanes(unsigned Reg);
- LaneBitmask determineInitialUsedLanes(unsigned Reg);
-
- bool isUndefRegAtInput(const MachineOperand &MO,
- const VRegInfo &RegInfo) const;
-
- bool isUndefInput(const MachineOperand &MO, bool *CrossCopy) const;
-
- const MachineRegisterInfo *MRI;
- const TargetRegisterInfo *TRI;
-
- void PutInWorklist(unsigned RegIdx) {
- if (WorklistMembers.test(RegIdx))
- return;
- WorklistMembers.set(RegIdx);
- Worklist.push_back(RegIdx);
- }
-
- VRegInfo *VRegInfos;
- /// Worklist containing virtreg indexes.
- std::deque<unsigned> Worklist;
- BitVector WorklistMembers;
- /// This bitvector is set for each vreg index where the vreg is defined
- /// by an instruction where lowersToCopies()==true.
- BitVector DefinedByCopy;
-};
-
-} // end anonymous namespace
-
-char DetectDeadLanes::ID = 0;
-char &llvm::DetectDeadLanesID = DetectDeadLanes::ID;
-
-INITIALIZE_PASS(DetectDeadLanes, DEBUG_TYPE, "Detect Dead Lanes", false, false)
+DeadLaneDetector::DeadLaneDetector(const MachineRegisterInfo *MRI,
+ const TargetRegisterInfo *TRI)
+ : MRI(MRI), TRI(TRI) {
+ unsigned NumVirtRegs = MRI->getNumVirtRegs();
+ VRegInfos = std::unique_ptr<VRegInfo[]>(new VRegInfo[NumVirtRegs]);
+ WorklistMembers.resize(NumVirtRegs);
+ DefinedByCopy.resize(NumVirtRegs);
+}
/// Returns true if \p MI will get lowered to a series of COPY instructions.
/// We call this a COPY-like instruction.
@@ -159,11 +80,11 @@ static bool isCrossCopy(const MachineRegisterInfo &MRI,
unsigned DstSubIdx = 0;
switch (MI.getOpcode()) {
case TargetOpcode::INSERT_SUBREG:
- if (MI.getOperandNo(&MO) == 2)
+ if (MO.getOperandNo() == 2)
DstSubIdx = MI.getOperand(3).getImm();
break;
case TargetOpcode::REG_SEQUENCE: {
- unsigned OpNum = MI.getOperandNo(&MO);
+ unsigned OpNum = MO.getOperandNo();
DstSubIdx = MI.getOperand(OpNum+1).getImm();
break;
}
@@ -184,8 +105,8 @@ static bool isCrossCopy(const MachineRegisterInfo &MRI,
return !TRI.getCommonSubClass(SrcRC, DstRC);
}
-void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
- LaneBitmask UsedLanes) {
+void DeadLaneDetector::addUsedLanesOnOperand(const MachineOperand &MO,
+ LaneBitmask UsedLanes) {
if (!MO.readsReg())
return;
Register MOReg = MO.getReg();
@@ -198,7 +119,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg);
unsigned MORegIdx = Register::virtReg2Index(MOReg);
- VRegInfo &MORegInfo = VRegInfos[MORegIdx];
+ DeadLaneDetector::VRegInfo &MORegInfo = VRegInfos[MORegIdx];
LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes;
// Any change at all?
if ((UsedLanes & ~PrevUsedLanes).none())
@@ -210,8 +131,8 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
PutInWorklist(MORegIdx);
}
-void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI,
- LaneBitmask UsedLanes) {
+void DeadLaneDetector::transferUsedLanesStep(const MachineInstr &MI,
+ LaneBitmask UsedLanes) {
for (const MachineOperand &MO : MI.uses()) {
if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
@@ -220,10 +141,11 @@ void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI,
}
}
-LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI,
- LaneBitmask UsedLanes,
- const MachineOperand &MO) const {
- unsigned OpNum = MI.getOperandNo(&MO);
+LaneBitmask
+DeadLaneDetector::transferUsedLanes(const MachineInstr &MI,
+ LaneBitmask UsedLanes,
+ const MachineOperand &MO) const {
+ unsigned OpNum = MO.getOperandNo();
assert(lowersToCopies(MI) &&
DefinedByCopy[Register::virtReg2Index(MI.getOperand(0).getReg())]);
@@ -265,8 +187,8 @@ LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI,
}
}
-void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
- LaneBitmask DefinedLanes) {
+void DeadLaneDetector::transferDefinedLanesStep(const MachineOperand &Use,
+ LaneBitmask DefinedLanes) {
if (!Use.readsReg())
return;
// Check whether the operand writes a vreg and is part of a COPY-like
@@ -286,7 +208,7 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
if (!DefinedByCopy.test(DefRegIdx))
return;
- unsigned OpNum = MI.getOperandNo(&Use);
+ unsigned OpNum = Use.getOperandNo();
DefinedLanes =
TRI->reverseComposeSubRegIndexLaneMask(Use.getSubReg(), DefinedLanes);
DefinedLanes = transferDefinedLanes(Def, OpNum, DefinedLanes);
@@ -301,8 +223,8 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
PutInWorklist(DefRegIdx);
}
-LaneBitmask DetectDeadLanes::transferDefinedLanes(const MachineOperand &Def,
- unsigned OpNum, LaneBitmask DefinedLanes) const {
+LaneBitmask DeadLaneDetector::transferDefinedLanes(
+ const MachineOperand &Def, unsigned OpNum, LaneBitmask DefinedLanes) const {
const MachineInstr &MI = *Def.getParent();
// Translate DefinedLanes if necessary.
switch (MI.getOpcode()) {
@@ -343,7 +265,7 @@ LaneBitmask DetectDeadLanes::transferDefinedLanes(const MachineOperand &Def,
return DefinedLanes;
}
-LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
+LaneBitmask DeadLaneDetector::determineInitialDefinedLanes(unsigned Reg) {
// Live-In or unused registers have no definition but are considered fully
// defined.
if (!MRI->hasOneDef(Reg))
@@ -395,7 +317,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
MOSubReg, MODefinedLanes);
}
- unsigned OpNum = DefMI.getOperandNo(&MO);
+ unsigned OpNum = MO.getOperandNo();
DefinedLanes |= transferDefinedLanes(Def, OpNum, MODefinedLanes);
}
return DefinedLanes;
@@ -408,7 +330,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
return MRI->getMaxLaneMaskForVReg(Reg);
}
-LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
+LaneBitmask DeadLaneDetector::determineInitialUsedLanes(unsigned Reg) {
LaneBitmask UsedLanes = LaneBitmask::getNone();
for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
if (!MO.readsReg())
@@ -449,14 +371,58 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
return UsedLanes;
}
-bool DetectDeadLanes::isUndefRegAtInput(const MachineOperand &MO,
- const VRegInfo &RegInfo) const {
+namespace {
+
+class DetectDeadLanes : public MachineFunctionPass {
+public:
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+ DetectDeadLanes() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "Detect Dead Lanes"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ /// update the operand status.
+ /// The first return value shows whether MF been changed.
+ /// The second return value indicates we need to call
+ /// DeadLaneDetector::computeSubRegisterLaneBitInfo and this function again
+ /// to propagate changes.
+ std::pair<bool, bool>
+ modifySubRegisterOperandStatus(const DeadLaneDetector &DLD,
+ MachineFunction &MF);
+
+ bool isUndefRegAtInput(const MachineOperand &MO,
+ const DeadLaneDetector::VRegInfo &RegInfo) const;
+
+ bool isUndefInput(const DeadLaneDetector &DLD, const MachineOperand &MO,
+ bool *CrossCopy) const;
+
+ const MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+};
+
+} // end anonymous namespace
+
+char DetectDeadLanes::ID = 0;
+char &llvm::DetectDeadLanesID = DetectDeadLanes::ID;
+
+INITIALIZE_PASS(DetectDeadLanes, DEBUG_TYPE, "Detect Dead Lanes", false, false)
+
+bool DetectDeadLanes::isUndefRegAtInput(
+ const MachineOperand &MO, const DeadLaneDetector::VRegInfo &RegInfo) const {
unsigned SubReg = MO.getSubReg();
LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask).none();
}
-bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
+bool DetectDeadLanes::isUndefInput(const DeadLaneDetector &DLD,
+ const MachineOperand &MO,
bool *CrossCopy) const {
if (!MO.isUse())
return false;
@@ -468,11 +434,11 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
if (!DefReg.isVirtual())
return false;
unsigned DefRegIdx = Register::virtReg2Index(DefReg);
- if (!DefinedByCopy.test(DefRegIdx))
+ if (!DLD.isDefinedByCopy(DefRegIdx))
return false;
- const VRegInfo &DefRegInfo = VRegInfos[DefRegIdx];
- LaneBitmask UsedLanes = transferUsedLanes(MI, DefRegInfo.UsedLanes, MO);
+ const DeadLaneDetector::VRegInfo &DefRegInfo = DLD.getVRegInfo(DefRegIdx);
+ LaneBitmask UsedLanes = DLD.transferUsedLanes(MI, DefRegInfo.UsedLanes, MO);
if (UsedLanes.any())
return false;
@@ -484,7 +450,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
return true;
}
-std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
+void DeadLaneDetector::computeSubRegisterLaneBitInfo() {
// First pass: Populate defs/uses of vregs with initial values
unsigned NumVirtRegs = MRI->getNumVirtRegs();
for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
@@ -524,7 +490,11 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
}
dbgs() << "\n";
});
+}
+std::pair<bool, bool>
+DetectDeadLanes::modifySubRegisterOperandStatus(const DeadLaneDetector &DLD,
+ MachineFunction &MF) {
bool Changed = false;
bool Again = false;
// Mark operands as dead/unused.
@@ -537,7 +507,7 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
if (!Reg.isVirtual())
continue;
unsigned RegIdx = Register::virtReg2Index(Reg);
- const VRegInfo &RegInfo = VRegInfos[RegIdx];
+ const DeadLaneDetector::VRegInfo &RegInfo = DLD.getVRegInfo(RegIdx);
if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) {
LLVM_DEBUG(dbgs()
<< "Marking operand '" << MO << "' as dead in " << MI);
@@ -551,7 +521,7 @@ std::pair<bool, bool> DetectDeadLanes::runOnce(MachineFunction &MF) {
<< "Marking operand '" << MO << "' as undef in " << MI);
MO.setIsUndef();
Changed = true;
- } else if (isUndefInput(MO, &CrossCopy)) {
+ } else if (isUndefInput(DLD, MO, &CrossCopy)) {
LLVM_DEBUG(dbgs()
<< "Marking operand '" << MO << "' as undef in " << MI);
MO.setIsUndef();
@@ -581,21 +551,16 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
TRI = MRI->getTargetRegisterInfo();
- unsigned NumVirtRegs = MRI->getNumVirtRegs();
- VRegInfos = new VRegInfo[NumVirtRegs];
- WorklistMembers.resize(NumVirtRegs);
- DefinedByCopy.resize(NumVirtRegs);
+ DeadLaneDetector DLD(MRI, TRI);
bool Changed = false;
bool Again;
do {
+ DLD.computeSubRegisterLaneBitInfo();
bool LocalChanged;
- std::tie(LocalChanged, Again) = runOnce(MF);
+ std::tie(LocalChanged, Again) = modifySubRegisterOperandStatus(DLD, MF);
Changed |= LocalChanged;
- } while(Again);
+ } while (Again);
- DefinedByCopy.clear();
- WorklistMembers.clear();
- delete[] VRegInfos;
return Changed;
}
diff --git a/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index aa81f618dc59..32c94de7280c 100644
--- a/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -14,10 +14,8 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -28,6 +26,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -36,6 +35,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cstddef>
diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 00626604d81c..61867d74bfa2 100644
--- a/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -119,10 +119,10 @@ public:
SmallVector<PHIInfo, 8> PHIs;
-private:
/// The branch condition determined by analyzeBranch.
SmallVector<MachineOperand, 4> Cond;
+private:
/// Instructions in Head that define values used by the conditional blocks.
/// The hoisted instructions must be inserted after these instructions.
SmallPtrSet<MachineInstr*, 8> InsertAfter;
@@ -263,9 +263,8 @@ bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) {
// Remember clobbered regunits.
if (MO.isDef() && Reg.isPhysical())
- for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
- ++Units)
- ClobberedRegUnits.set(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
+ ClobberedRegUnits.set(Unit);
if (!MO.readsReg() || !Reg.isVirtual())
continue;
@@ -343,8 +342,11 @@ bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) {
// Apply predicate to all instructions in the machine block.
void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) {
auto Condition = Cond;
- if (ReversePredicate)
- TII->reverseBranchCondition(Condition);
+ if (ReversePredicate) {
+ bool CanRevCond = !TII->reverseBranchCondition(Condition);
+ assert(CanRevCond && "Reversed predicate is not supported");
+ (void)CanRevCond;
+ }
// Terminators don't need to be predicated as they will be removed.
for (MachineBasicBlock::iterator I = MBB->begin(),
E = MBB->getFirstTerminator();
@@ -391,19 +393,17 @@ bool SSAIfConv::findInsertionPoint() {
continue;
// I clobbers Reg, so it isn't live before I.
if (MO.isDef())
- for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
- ++Units)
- LiveRegUnits.erase(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
+ LiveRegUnits.erase(Unit);
// Unless I reads Reg.
if (MO.readsReg())
Reads.push_back(Reg.asMCReg());
}
// Anything read by I is live before I.
while (!Reads.empty())
- for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid();
- ++Units)
- if (ClobberedRegUnits.test(*Units))
- LiveRegUnits.insert(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Reads.pop_back_val()))
+ if (ClobberedRegUnits.test(Unit))
+ LiveRegUnits.insert(Unit);
// We can't insert before a terminator.
if (I != FirstTerm && I->isTerminator())
@@ -760,14 +760,14 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks,
namespace {
class EarlyIfConverter : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
MCSchedModel SchedModel;
- MachineRegisterInfo *MRI;
- MachineDominatorTree *DomTree;
- MachineLoopInfo *Loops;
- MachineTraceMetrics *Traces;
- MachineTraceMetrics::Ensemble *MinInstr;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineDominatorTree *DomTree = nullptr;
+ MachineLoopInfo *Loops = nullptr;
+ MachineTraceMetrics *Traces = nullptr;
+ MachineTraceMetrics::Ensemble *MinInstr = nullptr;
SSAIfConv IfConv;
public:
@@ -873,8 +873,40 @@ bool EarlyIfConverter::shouldConvertIf() {
if (Stress)
return true;
+ // Do not try to if-convert if the condition has a high chance of being
+ // predictable.
+ MachineLoop *CurrentLoop = Loops->getLoopFor(IfConv.Head);
+ // If the condition is in a loop, consider it predictable if the condition
+ // itself or all its operands are loop-invariant. E.g. this considers a load
+ // from a loop-invariant address predictable; we were unable to prove that it
+ // doesn't alias any of the memory-writes in the loop, but it is likely to
+ // read to same value multiple times.
+ if (CurrentLoop && any_of(IfConv.Cond, [&](MachineOperand &MO) {
+ if (!MO.isReg() || !MO.isUse())
+ return false;
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg))
+ return false;
+
+ MachineInstr *Def = MRI->getVRegDef(Reg);
+ return CurrentLoop->isLoopInvariant(*Def) ||
+ all_of(Def->operands(), [&](MachineOperand &Op) {
+ if (Op.isImm())
+ return true;
+ if (!MO.isReg() || !MO.isUse())
+ return false;
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg))
+ return false;
+
+ MachineInstr *Def = MRI->getVRegDef(Reg);
+ return CurrentLoop->isLoopInvariant(*Def);
+ });
+ }))
+ return false;
+
if (!MinInstr)
- MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+ MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred());
MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred());
@@ -1084,13 +1116,13 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
namespace {
class EarlyIfPredicator : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
TargetSchedModel SchedModel;
- MachineRegisterInfo *MRI;
- MachineDominatorTree *DomTree;
- MachineBranchProbabilityInfo *MBPI;
- MachineLoopInfo *Loops;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineDominatorTree *DomTree = nullptr;
+ MachineBranchProbabilityInfo *MBPI = nullptr;
+ MachineLoopInfo *Loops = nullptr;
SSAIfConv IfConv;
public:
diff --git a/llvm/lib/CodeGen/ExecutionDomainFix.cpp b/llvm/lib/CodeGen/ExecutionDomainFix.cpp
index 9621ad4b1248..21a7d02a320c 100644
--- a/llvm/lib/CodeGen/ExecutionDomainFix.cpp
+++ b/llvm/lib/CodeGen/ExecutionDomainFix.cpp
@@ -318,7 +318,7 @@ void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
// If the collapsed operands force a single domain, propagate the collapse.
if (isPowerOf2_32(available)) {
- unsigned domain = countTrailingZeros(available);
+ unsigned domain = llvm::countr_zero(available);
TII->setExecutionDomain(*mi, domain);
visitHardInstr(mi, domain);
return;
diff --git a/llvm/lib/CodeGen/ExpandMemCmp.cpp b/llvm/lib/CodeGen/ExpandMemCmp.cpp
index 3838eaadd1d2..500f31bd8e89 100644
--- a/llvm/lib/CodeGen/ExpandMemCmp.cpp
+++ b/llvm/lib/CodeGen/ExpandMemCmp.cpp
@@ -71,18 +71,18 @@ class MemCmpExpansion {
ResultBlock() = default;
};
- CallInst *const CI;
+ CallInst *const CI = nullptr;
ResultBlock ResBlock;
const uint64_t Size;
unsigned MaxLoadSize = 0;
uint64_t NumLoadsNonOneByte = 0;
const uint64_t NumLoadsPerBlockForZeroCmp;
std::vector<BasicBlock *> LoadCmpBlocks;
- BasicBlock *EndBlock;
- PHINode *PhiRes;
+ BasicBlock *EndBlock = nullptr;
+ PHINode *PhiRes = nullptr;
const bool IsUsedForZeroCmp;
const DataLayout &DL;
- DomTreeUpdater *DTU;
+ DomTreeUpdater *DTU = nullptr;
IRBuilder<> Builder;
// Represents the decomposition in blocks of the expansion. For example,
// comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
@@ -288,17 +288,11 @@ MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
Align RhsAlign = RhsSource->getPointerAlignment(DL);
if (OffsetBytes > 0) {
auto *ByteType = Type::getInt8Ty(CI->getContext());
- LhsSource = Builder.CreateConstGEP1_64(
- ByteType, Builder.CreateBitCast(LhsSource, ByteType->getPointerTo()),
- OffsetBytes);
- RhsSource = Builder.CreateConstGEP1_64(
- ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()),
- OffsetBytes);
+ LhsSource = Builder.CreateConstGEP1_64(ByteType, LhsSource, OffsetBytes);
+ RhsSource = Builder.CreateConstGEP1_64(ByteType, RhsSource, OffsetBytes);
LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
}
- LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo());
- RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo());
// Create a constant or a load from the source.
Value *Lhs = nullptr;
diff --git a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
index cc63984158c8..3a79f20f4732 100644
--- a/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -28,8 +28,8 @@ using namespace llvm;
namespace {
struct ExpandPostRA : public MachineFunctionPass {
private:
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
@@ -47,9 +47,6 @@ public:
private:
bool LowerSubregToReg(MachineInstr *MI);
- bool LowerCopy(MachineInstr *MI);
-
- void TransferImplicitOperands(MachineInstr *MI);
};
} // end anonymous namespace
@@ -59,25 +56,6 @@ char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID;
INITIALIZE_PASS(ExpandPostRA, DEBUG_TYPE,
"Post-RA pseudo instruction expansion pass", false, false)
-/// TransferImplicitOperands - MI is a pseudo-instruction, and the lowered
-/// replacement instructions immediately precede it. Copy any implicit
-/// operands from MI to the replacement instruction.
-void ExpandPostRA::TransferImplicitOperands(MachineInstr *MI) {
- MachineBasicBlock::iterator CopyMI = MI;
- --CopyMI;
-
- Register DstReg = MI->getOperand(0).getReg();
- for (const MachineOperand &MO : MI->implicit_operands()) {
- CopyMI->addOperand(MO);
-
- // Be conservative about preserving kills when subregister defs are
- // involved. If there was implicit kill of a super-register overlapping the
- // copy result, we would kill the subregisters previous copies defined.
- if (MO.isKill() && TRI->regsOverlap(DstReg, MO.getReg()))
- CopyMI->getOperand(CopyMI->getNumOperands() - 1).setIsKill(false);
- }
-}
-
bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
MachineBasicBlock *MBB = MI->getParent();
assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
@@ -137,50 +115,6 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
return true;
}
-bool ExpandPostRA::LowerCopy(MachineInstr *MI) {
-
- if (MI->allDefsAreDead()) {
- LLVM_DEBUG(dbgs() << "dead copy: " << *MI);
- MI->setDesc(TII->get(TargetOpcode::KILL));
- LLVM_DEBUG(dbgs() << "replaced by: " << *MI);
- return true;
- }
-
- MachineOperand &DstMO = MI->getOperand(0);
- MachineOperand &SrcMO = MI->getOperand(1);
-
- bool IdentityCopy = (SrcMO.getReg() == DstMO.getReg());
- if (IdentityCopy || SrcMO.isUndef()) {
- LLVM_DEBUG(dbgs() << (IdentityCopy ? "identity copy: " : "undef copy: ")
- << *MI);
- // No need to insert an identity copy instruction, but replace with a KILL
- // if liveness is changed.
- if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
- // We must make sure the super-register gets killed. Replace the
- // instruction with KILL.
- MI->setDesc(TII->get(TargetOpcode::KILL));
- LLVM_DEBUG(dbgs() << "replaced by: " << *MI);
- return true;
- }
- // Vanilla identity copy.
- MI->eraseFromParent();
- return true;
- }
-
- LLVM_DEBUG(dbgs() << "real copy: " << *MI);
- TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
- DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
-
- if (MI->getNumOperands() > 2)
- TransferImplicitOperands(MI);
- LLVM_DEBUG({
- MachineBasicBlock::iterator dMI = MI;
- dbgs() << "replaced by: " << *(--dMI);
- });
- MI->eraseFromParent();
- return true;
-}
-
/// runOnMachineFunction - Reduce subregister inserts and extracts to register
/// copies.
///
@@ -211,7 +145,8 @@ bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) {
MadeChange |= LowerSubregToReg(&MI);
break;
case TargetOpcode::COPY:
- MadeChange |= LowerCopy(&MI);
+ TII->lowerCopy(&MI, TRI);
+ MadeChange = true;
break;
case TargetOpcode::DBG_VALUE:
continue;
diff --git a/llvm/lib/CodeGen/ExpandReductions.cpp b/llvm/lib/CodeGen/ExpandReductions.cpp
index f08c47d220ea..79b6dc9154b3 100644
--- a/llvm/lib/CodeGen/ExpandReductions.cpp
+++ b/llvm/lib/CodeGen/ExpandReductions.cpp
@@ -1,4 +1,4 @@
-//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
+//===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -133,10 +133,38 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
}
break;
}
+ case Intrinsic::vector_reduce_and:
+ case Intrinsic::vector_reduce_or: {
+ // Canonicalize logical or/and reductions:
+ // Or reduction for i1 is represented as:
+ // %val = bitcast <ReduxWidth x i1> to iReduxWidth
+ // %res = cmp ne iReduxWidth %val, 0
+ // And reduction for i1 is represented as:
+ // %val = bitcast <ReduxWidth x i1> to iReduxWidth
+ // %res = cmp eq iReduxWidth %val, 11111
+ Value *Vec = II->getArgOperand(0);
+ auto *FTy = cast<FixedVectorType>(Vec->getType());
+ unsigned NumElts = FTy->getNumElements();
+ if (!isPowerOf2_32(NumElts))
+ continue;
+
+ if (FTy->getElementType() == Builder.getInt1Ty()) {
+ Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
+ if (ID == Intrinsic::vector_reduce_and) {
+ Rdx = Builder.CreateICmpEQ(
+ Rdx, ConstantInt::getAllOnesValue(Rdx->getType()));
+ } else {
+ assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction.");
+ Rdx = Builder.CreateIsNotNull(Rdx);
+ }
+ break;
+ }
+
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
+ break;
+ }
case Intrinsic::vector_reduce_add:
case Intrinsic::vector_reduce_mul:
- case Intrinsic::vector_reduce_and:
- case Intrinsic::vector_reduce_or:
case Intrinsic::vector_reduce_xor:
case Intrinsic::vector_reduce_smax:
case Intrinsic::vector_reduce_smin:
diff --git a/llvm/lib/CodeGen/ExpandVectorPredication.cpp b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
index 5ee76ff567fb..9807be0bea39 100644
--- a/llvm/lib/CodeGen/ExpandVectorPredication.cpp
+++ b/llvm/lib/CodeGen/ExpandVectorPredication.cpp
@@ -171,6 +171,10 @@ struct CachingVPExpander {
Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
VPIntrinsic &PI);
+ /// Lower this VP fp call to a unpredicated fp call.
+ Value *expandPredicationToFPCall(IRBuilder<> &Builder, VPIntrinsic &PI,
+ unsigned UnpredicatedIntrinsicID);
+
/// Lower this VP reduction to a call to an unpredicated reduction intrinsic.
Value *expandPredicationInReduction(IRBuilder<> &Builder,
VPReductionIntrinsic &PI);
@@ -271,6 +275,38 @@ CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
return NewBinOp;
}
+Value *CachingVPExpander::expandPredicationToFPCall(
+ IRBuilder<> &Builder, VPIntrinsic &VPI, unsigned UnpredicatedIntrinsicID) {
+ assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) &&
+ "Implicitly dropping %evl in non-speculatable operator!");
+
+ switch (UnpredicatedIntrinsicID) {
+ case Intrinsic::fabs:
+ case Intrinsic::sqrt: {
+ Value *Op0 = VPI.getOperand(0);
+ Function *Fn = Intrinsic::getDeclaration(
+ VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
+ Value *NewOp = Builder.CreateCall(Fn, {Op0}, VPI.getName());
+ replaceOperation(*NewOp, VPI);
+ return NewOp;
+ }
+ case Intrinsic::experimental_constrained_fma:
+ case Intrinsic::experimental_constrained_fmuladd: {
+ Value *Op0 = VPI.getOperand(0);
+ Value *Op1 = VPI.getOperand(1);
+ Value *Op2 = VPI.getOperand(2);
+ Function *Fn = Intrinsic::getDeclaration(
+ VPI.getModule(), UnpredicatedIntrinsicID, {VPI.getType()});
+ Value *NewOp =
+ Builder.CreateConstrainedFPCall(Fn, {Op0, Op1, Op2}, VPI.getName());
+ replaceOperation(*NewOp, VPI);
+ return NewOp;
+ }
+ }
+
+ return nullptr;
+}
+
static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI,
Type *EltTy) {
bool Negative = false;
@@ -565,6 +601,15 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
switch (VPI.getIntrinsicID()) {
default:
break;
+ case Intrinsic::vp_fneg: {
+ Value *NewNegOp = Builder.CreateFNeg(VPI.getOperand(0), VPI.getName());
+ replaceOperation(*NewNegOp, VPI);
+ return NewNegOp;
+ }
+ case Intrinsic::vp_fabs:
+ return expandPredicationToFPCall(Builder, VPI, Intrinsic::fabs);
+ case Intrinsic::vp_sqrt:
+ return expandPredicationToFPCall(Builder, VPI, Intrinsic::sqrt);
case Intrinsic::vp_load:
case Intrinsic::vp_store:
case Intrinsic::vp_gather:
@@ -572,6 +617,10 @@ Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
return expandPredicationInMemoryIntrinsic(Builder, VPI);
}
+ if (auto CID = VPI.getConstrainedIntrinsicID())
+ if (Value *Call = expandPredicationToFPCall(Builder, VPI, *CID))
+ return Call;
+
return &VPI;
}
diff --git a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
index 55d939de426e..75504ef32250 100644
--- a/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
+++ b/llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp
@@ -388,7 +388,7 @@ public:
Register Reg = MO.getReg();
assert(Reg.isPhysical() && "Only physical regs are expected");
- if (isCalleeSaved(Reg) && (AllowGCPtrInCSR || !is_contained(GCRegs, Reg)))
+ if (isCalleeSaved(Reg) && (AllowGCPtrInCSR || !GCRegs.contains(Reg)))
continue;
LLVM_DEBUG(dbgs() << "Will spill " << printReg(Reg, &TRI) << " at index "
@@ -407,7 +407,6 @@ public:
void spillRegisters() {
for (Register Reg : RegsToSpill) {
int FI = CacheFI.getFrameIndex(Reg, EHPad);
- const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
NumSpilledRegisters++;
RegToSlotIdx[Reg] = FI;
@@ -419,6 +418,7 @@ public:
bool IsKill = true;
MachineBasicBlock::iterator InsertBefore(MI);
Reg = performCopyPropagation(Reg, InsertBefore, IsKill, TII, TRI);
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
LLVM_DEBUG(dbgs() << "Insert spill before " << *InsertBefore);
TII.storeRegToStackSlot(*MI.getParent(), InsertBefore, Reg, IsKill, FI,
diff --git a/llvm/lib/CodeGen/GCRootLowering.cpp b/llvm/lib/CodeGen/GCRootLowering.cpp
index 80feb0045406..c0ce37091933 100644
--- a/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -52,8 +52,8 @@ public:
/// in the machine code. It inserts labels at safe points and populates a
/// GCMetadata record for each function.
class GCMachineCodeAnalysis : public MachineFunctionPass {
- GCFunctionInfo *FI;
- const TargetInstrInfo *TII;
+ GCFunctionInfo *FI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
void FindSafePoints(MachineFunction &MF);
void VisitCallPoint(MachineBasicBlock::iterator CI);
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 356d208fc881..e047996f9aa8 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -217,10 +217,14 @@ void GISelCSEInfo::handleRemoveInst(MachineInstr *MI) {
}
void GISelCSEInfo::handleRecordedInsts() {
+ if (HandlingRecordedInstrs)
+ return;
+ HandlingRecordedInstrs = true;
while (!TemporaryInsts.empty()) {
auto *MI = TemporaryInsts.pop_back_val();
handleRecordedInst(MI);
}
+ HandlingRecordedInstrs = false;
}
bool GISelCSEInfo::shouldCSE(unsigned Opc) const {
@@ -392,9 +396,10 @@ GISelInstProfileBuilder::addNodeIDReg(Register Reg) const {
addNodeIDRegType(Ty);
if (const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(Reg)) {
- if (const auto *RB = RCOrRB.dyn_cast<const RegisterBank *>())
+ if (const auto *RB = dyn_cast_if_present<const RegisterBank *>(RCOrRB))
addNodeIDRegType(RB);
- else if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
+ else if (const auto *RC =
+ dyn_cast_if_present<const TargetRegisterClass *>(RCOrRB))
addNodeIDRegType(RC);
}
return *this;
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 89872259cfca..28c33e2038e4 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -846,7 +846,7 @@ void CallLowering::insertSRetLoads(MachineIRBuilder &MIRBuilder, Type *RetTy,
unsigned NumValues = SplitVTs.size();
Align BaseAlign = DL.getPrefTypeAlign(RetTy);
Type *RetPtrTy = RetTy->getPointerTo(DL.getAllocaAddrSpace());
- LLT OffsetLLTy = getLLTForType(*DL.getIntPtrType(RetPtrTy), DL);
+ LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetPtrTy), DL);
MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
@@ -876,8 +876,7 @@ void CallLowering::insertSRetStores(MachineIRBuilder &MIRBuilder, Type *RetTy,
unsigned NumValues = SplitVTs.size();
Align BaseAlign = DL.getPrefTypeAlign(RetTy);
unsigned AS = DL.getAllocaAddrSpace();
- LLT OffsetLLTy =
- getLLTForType(*DL.getIntPtrType(RetTy->getPointerTo(AS)), DL);
+ LLT OffsetLLTy = getLLTForType(*DL.getIndexType(RetTy->getPointerTo(AS)), DL);
MachinePointerInfo PtrInfo(AS);
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index af4bb1634746..cc7fb3ee1109 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -16,7 +16,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -399,7 +399,8 @@ namespace {
/// Select a preference between two uses. CurrentUse is the current preference
/// while *ForCandidate is attributes of the candidate under consideration.
-PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
+PreferredTuple ChoosePreferredUse(MachineInstr &LoadMI,
+ PreferredTuple &CurrentUse,
const LLT TyForCandidate,
unsigned OpcodeForCandidate,
MachineInstr *MIForCandidate) {
@@ -425,8 +426,10 @@ PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
// Prefer sign extensions to zero extensions as sign-extensions tend to be
- // more expensive.
- if (CurrentUse.Ty == TyForCandidate) {
+ // more expensive. Don't do this if the load is already a zero-extend load
+ // though, otherwise we'll rewrite a zero-extend load into a sign-extend
+ // later.
+ if (!isa<GZExtLoad>(LoadMI) && CurrentUse.Ty == TyForCandidate) {
if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
OpcodeForCandidate == TargetOpcode::G_ZEXT)
return CurrentUse;
@@ -535,7 +538,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
// For non power-of-2 types, they will very likely be legalized into multiple
// loads. Don't bother trying to match them into extending loads.
- if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
+ if (!llvm::has_single_bit<uint32_t>(LoadValueTy.getSizeInBits()))
return false;
// Find the preferred type aside from the any-extends (unless it's the only
@@ -566,7 +569,7 @@ bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
.Action != LegalizeActions::Legal)
continue;
}
- Preferred = ChoosePreferredUse(Preferred,
+ Preferred = ChoosePreferredUse(MI, Preferred,
MRI.getType(UseMI.getOperand(0).getReg()),
UseMI.getOpcode(), &UseMI);
}
@@ -727,7 +730,7 @@ bool CombinerHelper::matchCombineLoadWithAndMask(MachineInstr &MI,
Register PtrReg = LoadMI->getPointerReg();
unsigned RegSize = RegTy.getSizeInBits();
uint64_t LoadSizeBits = LoadMI->getMemSizeInBits();
- unsigned MaskSizeBits = MaskVal.countTrailingOnes();
+ unsigned MaskSizeBits = MaskVal.countr_one();
// The mask may not be larger than the in-memory type, as it might cover sign
// extended bits
@@ -1189,16 +1192,22 @@ void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
// Check which instruction is first in the block so we don't break def-use
- // deps by "moving" the instruction incorrectly.
- if (dominates(MI, *OtherMI))
+ // deps by "moving" the instruction incorrectly. Also keep track of which
+ // instruction is first so we pick it's operands, avoiding use-before-def
+ // bugs.
+ MachineInstr *FirstInst;
+ if (dominates(MI, *OtherMI)) {
Builder.setInstrAndDebugLoc(MI);
- else
+ FirstInst = &MI;
+ } else {
Builder.setInstrAndDebugLoc(*OtherMI);
+ FirstInst = OtherMI;
+ }
Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
: TargetOpcode::G_UDIVREM,
{DestDivReg, DestRemReg},
- {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
+ { FirstInst->getOperand(1), FirstInst->getOperand(2) });
MI.eraseFromParent();
OtherMI->eraseFromParent();
}
@@ -1285,65 +1294,57 @@ bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
LegalizerHelper::LegalizeResult::Legalized;
}
-static std::optional<APFloat>
-constantFoldFpUnary(unsigned Opcode, LLT DstTy, const Register Op,
- const MachineRegisterInfo &MRI) {
- const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
- if (!MaybeCst)
- return std::nullopt;
-
- APFloat V = MaybeCst->getValueAPF();
- switch (Opcode) {
+static APFloat constantFoldFpUnary(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI,
+ const APFloat &Val) {
+ APFloat Result(Val);
+ switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode!");
case TargetOpcode::G_FNEG: {
- V.changeSign();
- return V;
+ Result.changeSign();
+ return Result;
}
case TargetOpcode::G_FABS: {
- V.clearSign();
- return V;
+ Result.clearSign();
+ return Result;
+ }
+ case TargetOpcode::G_FPTRUNC: {
+ bool Unused;
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ Result.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven,
+ &Unused);
+ return Result;
}
- case TargetOpcode::G_FPTRUNC:
- break;
case TargetOpcode::G_FSQRT: {
bool Unused;
- V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
- V = APFloat(sqrt(V.convertToDouble()));
+ Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &Unused);
+ Result = APFloat(sqrt(Result.convertToDouble()));
break;
}
case TargetOpcode::G_FLOG2: {
bool Unused;
- V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
- V = APFloat(log2(V.convertToDouble()));
+ Result.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &Unused);
+ Result = APFloat(log2(Result.convertToDouble()));
break;
}
}
// Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
- // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
- // and `G_FLOG2` reach here.
+ // `buildFConstant` will assert on size mismatch. Only `G_FSQRT`, and
+ // `G_FLOG2` reach here.
bool Unused;
- V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
- return V;
+ Result.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &Unused);
+ return Result;
}
-bool CombinerHelper::matchCombineConstantFoldFpUnary(
- MachineInstr &MI, std::optional<APFloat> &Cst) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
- return Cst.has_value();
-}
-
-void CombinerHelper::applyCombineConstantFoldFpUnary(
- MachineInstr &MI, std::optional<APFloat> &Cst) {
- assert(Cst && "Optional is unexpectedly empty!");
+void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
+ const ConstantFP *Cst) {
Builder.setInstrAndDebugLoc(MI);
- MachineFunction &MF = Builder.getMF();
- auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
- Register DstReg = MI.getOperand(0).getReg();
- Builder.buildFConstant(DstReg, *FPVal);
+ APFloat Folded = constantFoldFpUnary(MI, MRI, Cst->getValue());
+ const ConstantFP *NewCst = ConstantFP::get(Builder.getContext(), Folded);
+ Builder.buildFConstant(MI.getOperand(0), *NewCst);
MI.eraseFromParent();
}
@@ -1621,6 +1622,41 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
MI.eraseFromParent();
}
+bool CombinerHelper::matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
+ // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
+ auto &Shl = cast<GenericMachineInstr>(MI);
+ Register DstReg = Shl.getReg(0);
+ Register SrcReg = Shl.getReg(1);
+ Register ShiftReg = Shl.getReg(2);
+ Register X, C1;
+
+ if (!getTargetLowering().isDesirableToCommuteWithShift(MI, !isPreLegalize()))
+ return false;
+
+ if (!mi_match(SrcReg, MRI,
+ m_OneNonDBGUse(m_any_of(m_GAdd(m_Reg(X), m_Reg(C1)),
+ m_GOr(m_Reg(X), m_Reg(C1))))))
+ return false;
+
+ APInt C1Val, C2Val;
+ if (!mi_match(C1, MRI, m_ICstOrSplat(C1Val)) ||
+ !mi_match(ShiftReg, MRI, m_ICstOrSplat(C2Val)))
+ return false;
+
+ auto *SrcDef = MRI.getVRegDef(SrcReg);
+ assert((SrcDef->getOpcode() == TargetOpcode::G_ADD ||
+ SrcDef->getOpcode() == TargetOpcode::G_OR) && "Unexpected op");
+ LLT SrcTy = MRI.getType(SrcReg);
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto S1 = B.buildShl(SrcTy, X, ShiftReg);
+ auto S2 = B.buildShl(SrcTy, C1, ShiftReg);
+ B.buildInstr(SrcDef->getOpcode(), {DstReg}, {S1, S2});
+ };
+ return true;
+}
+
bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
unsigned &ShiftVal) {
assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
@@ -1658,9 +1694,9 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
!mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
return false;
- // TODO: Should handle vector splat.
Register RHS = MI.getOperand(2).getReg();
- auto MaybeShiftAmtVal = getIConstantVRegValWithLookThrough(RHS, MRI);
+ MachineInstr *MIShiftAmt = MRI.getVRegDef(RHS);
+ auto MaybeShiftAmtVal = isConstantOrConstantSplatVector(*MIShiftAmt, MRI);
if (!MaybeShiftAmtVal)
return false;
@@ -1675,12 +1711,13 @@ bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
return false;
}
- int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
+ int64_t ShiftAmt = MaybeShiftAmtVal->getSExtValue();
MatchData.Reg = ExtSrc;
MatchData.Imm = ShiftAmt;
- unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
- return MinLeadingZeros >= ShiftAmt;
+ unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countl_one();
+ unsigned SrcTySize = MRI.getType(ExtSrc).getScalarSizeInBits();
+ return MinLeadingZeros >= ShiftAmt && ShiftAmt < SrcTySize;
}
void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
@@ -1763,6 +1800,15 @@ void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
Register DstReg = MI.getOperand(Idx).getReg();
Register SrcReg = Operands[Idx];
+
+ // This combine may run after RegBankSelect, so we need to be aware of
+ // register banks.
+ const auto &DstCB = MRI.getRegClassOrRegBank(DstReg);
+ if (!DstCB.isNull() && DstCB != MRI.getRegClassOrRegBank(SrcReg)) {
+ SrcReg = Builder.buildCopy(MRI.getType(SrcReg), SrcReg).getReg(0);
+ MRI.setRegClassOrRegBank(SrcReg, DstCB);
+ }
+
if (CanReuseInputDirectly)
replaceRegWith(MRI, DstReg, SrcReg);
else
@@ -2426,10 +2472,7 @@ bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
return true;
}
-bool CombinerHelper::eraseInst(MachineInstr &MI) {
- MI.eraseFromParent();
- return true;
-}
+void CombinerHelper::eraseInst(MachineInstr &MI) { MI.eraseFromParent(); }
bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
const MachineOperand &MOP2) {
@@ -2537,7 +2580,7 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
MaybeCst->getSExtValue() == C;
}
-bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
+void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
unsigned OpIdx) {
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
Register OldReg = MI.getOperand(0).getReg();
@@ -2545,17 +2588,15 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
MI.eraseFromParent();
replaceRegWith(MRI, OldReg, Replacement);
- return true;
}
-bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
+void CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
Register Replacement) {
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
Register OldReg = MI.getOperand(0).getReg();
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
MI.eraseFromParent();
replaceRegWith(MRI, OldReg, Replacement);
- return true;
}
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
@@ -2590,36 +2631,32 @@ bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
}
-bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
+void CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildFConstant(MI.getOperand(0), C);
MI.eraseFromParent();
- return true;
}
-bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
+void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildConstant(MI.getOperand(0), C);
MI.eraseFromParent();
- return true;
}
-bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
+void CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildConstant(MI.getOperand(0), C);
MI.eraseFromParent();
- return true;
}
-bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
+void CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
assert(MI.getNumDefs() == 1 && "Expected only one def?");
Builder.setInstr(MI);
Builder.buildUndef(MI.getOperand(0));
MI.eraseFromParent();
- return true;
}
bool CombinerHelper::matchSimplifyAddToSub(
@@ -2750,9 +2787,7 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
Register Y = RightHandInst->getOperand(1).getReg();
LLT XTy = MRI.getType(X);
LLT YTy = MRI.getType(Y);
- if (XTy != YTy)
- return false;
- if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+ if (!XTy.isValid() || XTy != YTy)
return false;
// Optional extra source register.
@@ -2779,6 +2814,9 @@ bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
}
}
+ if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
+ return false;
+
// Record the steps to build the new instructions.
//
// Steps to build (logic x, y)
@@ -3227,7 +3265,7 @@ bool CombinerHelper::matchFoldBinOpIntoSelect(MachineInstr &MI,
/// \p SelectOperand is the operand in binary operator \p MI that is the select
/// to fold.
-bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
+void CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
const unsigned &SelectOperand) {
Builder.setInstrAndDebugLoc(MI);
@@ -3263,8 +3301,6 @@ bool CombinerHelper::applyFoldBinOpIntoSelect(MachineInstr &MI,
Builder.buildSelect(Dst, SelectCond, FoldTrue, FoldFalse, MI.getFlags());
MI.eraseFromParent();
-
- return true;
}
std::optional<SmallVector<Register, 8>>
@@ -3612,275 +3648,6 @@ bool CombinerHelper::matchLoadOrCombine(
return true;
}
-/// Check if the store \p Store is a truncstore that can be merged. That is,
-/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
-/// Register then it does not need to match and SrcVal is set to the source
-/// value found.
-/// On match, returns the start byte offset of the \p SrcVal that is being
-/// stored.
-static std::optional<int64_t>
-getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
- MachineRegisterInfo &MRI) {
- Register TruncVal;
- if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
- return std::nullopt;
-
- // The shift amount must be a constant multiple of the narrow type.
- // It is translated to the offset address in the wide source value "y".
- //
- // x = G_LSHR y, ShiftAmtC
- // s8 z = G_TRUNC x
- // store z, ...
- Register FoundSrcVal;
- int64_t ShiftAmt;
- if (!mi_match(TruncVal, MRI,
- m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
- m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
- if (!SrcVal.isValid() || TruncVal == SrcVal) {
- if (!SrcVal.isValid())
- SrcVal = TruncVal;
- return 0; // If it's the lowest index store.
- }
- return std::nullopt;
- }
-
- unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
- if (ShiftAmt % NarrowBits!= 0)
- return std::nullopt;
- const unsigned Offset = ShiftAmt / NarrowBits;
-
- if (SrcVal.isValid() && FoundSrcVal != SrcVal)
- return std::nullopt;
-
- if (!SrcVal.isValid())
- SrcVal = FoundSrcVal;
- else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
- return std::nullopt;
- return Offset;
-}
-
-/// Match a pattern where a wide type scalar value is stored by several narrow
-/// stores. Fold it into a single store or a BSWAP and a store if the targets
-/// supports it.
-///
-/// Assuming little endian target:
-/// i8 *p = ...
-/// i32 val = ...
-/// p[0] = (val >> 0) & 0xFF;
-/// p[1] = (val >> 8) & 0xFF;
-/// p[2] = (val >> 16) & 0xFF;
-/// p[3] = (val >> 24) & 0xFF;
-/// =>
-/// *((i32)p) = val;
-///
-/// i8 *p = ...
-/// i32 val = ...
-/// p[0] = (val >> 24) & 0xFF;
-/// p[1] = (val >> 16) & 0xFF;
-/// p[2] = (val >> 8) & 0xFF;
-/// p[3] = (val >> 0) & 0xFF;
-/// =>
-/// *((i32)p) = BSWAP(val);
-bool CombinerHelper::matchTruncStoreMerge(MachineInstr &MI,
- MergeTruncStoresInfo &MatchInfo) {
- auto &StoreMI = cast<GStore>(MI);
- LLT MemTy = StoreMI.getMMO().getMemoryType();
-
- // We only handle merging simple stores of 1-4 bytes.
- if (!MemTy.isScalar())
- return false;
- switch (MemTy.getSizeInBits()) {
- case 8:
- case 16:
- case 32:
- break;
- default:
- return false;
- }
- if (!StoreMI.isSimple())
- return false;
-
- // We do a simple search for mergeable stores prior to this one.
- // Any potential alias hazard along the way terminates the search.
- SmallVector<GStore *> FoundStores;
-
- // We're looking for:
- // 1) a (store(trunc(...)))
- // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
- // the partial value stored.
- // 3) where the offsets form either a little or big-endian sequence.
-
- auto &LastStore = StoreMI;
-
- // The single base pointer that all stores must use.
- Register BaseReg;
- int64_t LastOffset;
- if (!mi_match(LastStore.getPointerReg(), MRI,
- m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
- BaseReg = LastStore.getPointerReg();
- LastOffset = 0;
- }
-
- GStore *LowestIdxStore = &LastStore;
- int64_t LowestIdxOffset = LastOffset;
-
- Register WideSrcVal;
- auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, MRI);
- if (!LowestShiftAmt)
- return false; // Didn't match a trunc.
- assert(WideSrcVal.isValid());
-
- LLT WideStoreTy = MRI.getType(WideSrcVal);
- // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
- if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
- return false;
- const unsigned NumStoresRequired =
- WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
-
- SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
- OffsetMap[*LowestShiftAmt] = LastOffset;
- FoundStores.emplace_back(&LastStore);
-
- // Search the block up for more stores.
- // We use a search threshold of 10 instructions here because the combiner
- // works top-down within a block, and we don't want to search an unbounded
- // number of predecessor instructions trying to find matching stores.
- // If we moved this optimization into a separate pass then we could probably
- // use a more efficient search without having a hard-coded threshold.
- const int MaxInstsToCheck = 10;
- int NumInstsChecked = 0;
- for (auto II = ++LastStore.getReverseIterator();
- II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
- ++II) {
- NumInstsChecked++;
- GStore *NewStore;
- if ((NewStore = dyn_cast<GStore>(&*II))) {
- if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
- break;
- } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
- break;
- } else {
- continue; // This is a safe instruction we can look past.
- }
-
- Register NewBaseReg;
- int64_t MemOffset;
- // Check we're storing to the same base + some offset.
- if (!mi_match(NewStore->getPointerReg(), MRI,
- m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
- NewBaseReg = NewStore->getPointerReg();
- MemOffset = 0;
- }
- if (BaseReg != NewBaseReg)
- break;
-
- auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, MRI);
- if (!ShiftByteOffset)
- break;
- if (MemOffset < LowestIdxOffset) {
- LowestIdxOffset = MemOffset;
- LowestIdxStore = NewStore;
- }
-
- // Map the offset in the store and the offset in the combined value, and
- // early return if it has been set before.
- if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
- OffsetMap[*ShiftByteOffset] != INT64_MAX)
- break;
- OffsetMap[*ShiftByteOffset] = MemOffset;
-
- FoundStores.emplace_back(NewStore);
- // Reset counter since we've found a matching inst.
- NumInstsChecked = 0;
- if (FoundStores.size() == NumStoresRequired)
- break;
- }
-
- if (FoundStores.size() != NumStoresRequired) {
- return false;
- }
-
- const auto &DL = LastStore.getMF()->getDataLayout();
- auto &C = LastStore.getMF()->getFunction().getContext();
- // Check that a store of the wide type is both allowed and fast on the target
- unsigned Fast = 0;
- bool Allowed = getTargetLowering().allowsMemoryAccess(
- C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
- if (!Allowed || !Fast)
- return false;
-
- // Check if the pieces of the value are going to the expected places in memory
- // to merge the stores.
- unsigned NarrowBits = MemTy.getScalarSizeInBits();
- auto checkOffsets = [&](bool MatchLittleEndian) {
- if (MatchLittleEndian) {
- for (unsigned i = 0; i != NumStoresRequired; ++i)
- if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
- return false;
- } else { // MatchBigEndian by reversing loop counter.
- for (unsigned i = 0, j = NumStoresRequired - 1; i != NumStoresRequired;
- ++i, --j)
- if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
- return false;
- }
- return true;
- };
-
- // Check if the offsets line up for the native data layout of this target.
- bool NeedBswap = false;
- bool NeedRotate = false;
- if (!checkOffsets(DL.isLittleEndian())) {
- // Special-case: check if byte offsets line up for the opposite endian.
- if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
- NeedBswap = true;
- else if (NumStoresRequired == 2 && checkOffsets(DL.isBigEndian()))
- NeedRotate = true;
- else
- return false;
- }
-
- if (NeedBswap &&
- !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}))
- return false;
- if (NeedRotate &&
- !isLegalOrBeforeLegalizer({TargetOpcode::G_ROTR, {WideStoreTy}}))
- return false;
-
- MatchInfo.NeedBSwap = NeedBswap;
- MatchInfo.NeedRotate = NeedRotate;
- MatchInfo.LowestIdxStore = LowestIdxStore;
- MatchInfo.WideSrcVal = WideSrcVal;
- MatchInfo.FoundStores = std::move(FoundStores);
- return true;
-}
-
-void CombinerHelper::applyTruncStoreMerge(MachineInstr &MI,
- MergeTruncStoresInfo &MatchInfo) {
-
- Builder.setInstrAndDebugLoc(MI);
- Register WideSrcVal = MatchInfo.WideSrcVal;
- LLT WideStoreTy = MRI.getType(WideSrcVal);
-
- if (MatchInfo.NeedBSwap) {
- WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
- } else if (MatchInfo.NeedRotate) {
- assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
- "Unexpected type for rotate");
- auto RotAmt =
- Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
- WideSrcVal =
- Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
- }
-
- Builder.buildStore(WideSrcVal, MatchInfo.LowestIdxStore->getPointerReg(),
- MatchInfo.LowestIdxStore->getMMO().getPointerInfo(),
- MatchInfo.LowestIdxStore->getMMO().getAlign());
-
- // Erase the old stores.
- for (auto *ST : MatchInfo.FoundStores)
- ST->eraseFromParent();
-}
-
bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
MachineInstr *&ExtMI) {
assert(MI.getOpcode() == TargetOpcode::G_PHI);
@@ -4395,7 +4162,7 @@ bool CombinerHelper::matchBitfieldExtractFromAnd(
if (static_cast<uint64_t>(LSBImm) >= Size)
return false;
- uint64_t Width = APInt(Size, AndImm).countTrailingOnes();
+ uint64_t Width = APInt(Size, AndImm).countr_one();
MatchInfo = [=](MachineIRBuilder &B) {
auto WidthCst = B.buildConstant(ExtractTy, Width);
auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
@@ -4496,7 +4263,7 @@ bool CombinerHelper::matchBitfieldExtractFromShrAnd(
// Calculate start position and width of the extract.
const int64_t Pos = ShrAmt;
- const int64_t Width = countTrailingOnes(UMask) - ShrAmt;
+ const int64_t Width = llvm::countr_one(UMask) - ShrAmt;
// It's preferable to keep the shift, rather than form G_SBFX.
// TODO: remove the G_AND via demanded bits analysis.
@@ -4695,6 +4462,62 @@ bool CombinerHelper::matchReassocPtrAdd(MachineInstr &MI,
return false;
}
+bool CombinerHelper::tryReassocBinOp(unsigned Opc, Register DstReg,
+ Register OpLHS, Register OpRHS,
+ BuildFnTy &MatchInfo) {
+ LLT OpRHSTy = MRI.getType(OpRHS);
+ MachineInstr *OpLHSDef = MRI.getVRegDef(OpLHS);
+
+ if (OpLHSDef->getOpcode() != Opc)
+ return false;
+
+ MachineInstr *OpRHSDef = MRI.getVRegDef(OpRHS);
+ Register OpLHSLHS = OpLHSDef->getOperand(1).getReg();
+ Register OpLHSRHS = OpLHSDef->getOperand(2).getReg();
+
+ // If the inner op is (X op C), pull the constant out so it can be folded with
+ // other constants in the expression tree. Folding is not guaranteed so we
+ // might have (C1 op C2). In that case do not pull a constant out because it
+ // won't help and can lead to infinite loops.
+ if (isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSRHS), MRI) &&
+ !isConstantOrConstantSplatVector(*MRI.getVRegDef(OpLHSLHS), MRI)) {
+ if (isConstantOrConstantSplatVector(*OpRHSDef, MRI)) {
+ // (Opc (Opc X, C1), C2) -> (Opc X, (Opc C1, C2))
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto NewCst = B.buildInstr(Opc, {OpRHSTy}, {OpLHSRHS, OpRHS});
+ B.buildInstr(Opc, {DstReg}, {OpLHSLHS, NewCst});
+ };
+ return true;
+ }
+ if (getTargetLowering().isReassocProfitable(MRI, OpLHS, OpRHS)) {
+ // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
+ // iff (op x, c1) has one use
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto NewLHSLHS = B.buildInstr(Opc, {OpRHSTy}, {OpLHSLHS, OpRHS});
+ B.buildInstr(Opc, {DstReg}, {NewLHSLHS, OpLHSRHS});
+ };
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool CombinerHelper::matchReassocCommBinOp(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ // We don't check if the reassociation will break a legal addressing mode
+ // here since pointer arithmetic is handled by G_PTR_ADD.
+ unsigned Opc = MI.getOpcode();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register LHSReg = MI.getOperand(1).getReg();
+ Register RHSReg = MI.getOperand(2).getReg();
+
+ if (tryReassocBinOp(Opc, DstReg, LHSReg, RHSReg, MatchInfo))
+ return true;
+ if (tryReassocBinOp(Opc, DstReg, RHSReg, LHSReg, MatchInfo))
+ return true;
+ return false;
+}
bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
Register Op1 = MI.getOperand(1).getReg();
@@ -4766,7 +4589,7 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd(
return false;
// No point in combining if there's nothing to truncate.
- unsigned NarrowWidth = Mask.countTrailingOnes();
+ unsigned NarrowWidth = Mask.countr_one();
if (NarrowWidth == WideTy.getSizeInBits())
return false;
LLT NarrowTy = LLT::scalar(NarrowWidth);
@@ -4956,7 +4779,7 @@ MachineInstr *CombinerHelper::buildUDivUsingMul(MachineInstr &MI) {
// Magic algorithm doesn't work for division by 1. We need to emit a select
// at the end.
// TODO: Use undef values for divisor of 1.
- if (!Divisor.isOneValue()) {
+ if (!Divisor.isOne()) {
UnsignedDivisionByConstantInfo magics =
UnsignedDivisionByConstantInfo::get(Divisor);
@@ -5144,7 +4967,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) {
auto *CI = cast<ConstantInt>(C);
APInt Divisor = CI->getValue();
- unsigned Shift = Divisor.countTrailingZeros();
+ unsigned Shift = Divisor.countr_zero();
if (Shift) {
Divisor.ashrInPlace(Shift);
UseSRA = true;
@@ -6185,6 +6008,16 @@ bool CombinerHelper::matchRedundantBinOpInEquality(MachineInstr &MI,
return CmpInst::isEquality(Pred) && Y.isValid();
}
+bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) {
+ Register ShiftReg = MI.getOperand(2).getReg();
+ LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
+ auto IsShiftTooBig = [&](const Constant *C) {
+ auto *CI = dyn_cast<ConstantInt>(C);
+ return CI && CI->uge(ResTy.getScalarSizeInBits());
+ };
+ return matchUnaryPredicate(MRI, ShiftReg, IsShiftTooBig);
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
new file mode 100644
index 000000000000..d747cbf5aadc
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/GIMatchTableExecutor.cpp
@@ -0,0 +1,68 @@
+//===- llvm/CodeGen/GlobalISel/GIMatchTableExecutor.cpp -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file implements the GIMatchTableExecutor class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+#define DEBUG_TYPE "gi-match-table-executor"
+
+using namespace llvm;
+
+GIMatchTableExecutor::MatcherState::MatcherState(unsigned MaxRenderers)
+ : Renderers(MaxRenderers) {}
+
+GIMatchTableExecutor::GIMatchTableExecutor() = default;
+
+bool GIMatchTableExecutor::isOperandImmEqual(
+ const MachineOperand &MO, int64_t Value,
+ const MachineRegisterInfo &MRI) const {
+ if (MO.isReg() && MO.getReg())
+ if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI))
+ return VRegVal->Value.getSExtValue() == Value;
+ return false;
+}
+
+bool GIMatchTableExecutor::isBaseWithConstantOffset(
+ const MachineOperand &Root, const MachineRegisterInfo &MRI) const {
+ if (!Root.isReg())
+ return false;
+
+ MachineInstr *RootI = MRI.getVRegDef(Root.getReg());
+ if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
+ return false;
+
+ MachineOperand &RHS = RootI->getOperand(2);
+ MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg());
+ if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT)
+ return false;
+
+ return true;
+}
+
+bool GIMatchTableExecutor::isObviouslySafeToFold(MachineInstr &MI,
+ MachineInstr &IntoMI) const {
+ // Immediate neighbours are already folded.
+ if (MI.getParent() == IntoMI.getParent() &&
+ std::next(MI.getIterator()) == IntoMI.getIterator())
+ return true;
+
+ // Convergent instructions cannot be moved in the CFG.
+ if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
+ return false;
+
+ return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
+ !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index bfbe7e1c3e55..363ffbfa90b5 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -115,7 +116,7 @@ void GISelKnownBits::computeKnownBitsMin(Register Src0, Register Src1,
computeKnownBitsImpl(Src0, Known2, DemandedElts, Depth);
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
// Bitfield extract is computed as (Src >> Offset) & Mask, where Mask is
@@ -191,7 +192,7 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
Depth + 1);
// Known bits are the values that are shared by every demanded element.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -235,10 +236,10 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
// For COPYs we don't do anything, don't increase the depth.
computeKnownBitsImpl(SrcReg, Known2, DemandedElts,
Depth + (Opcode != TargetOpcode::COPY));
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
// If we reach a point where we don't know anything
// just stop looking through the operands.
- if (Known.One == 0 && Known.Zero == 0)
+ if (Known.isUnknown())
break;
} else {
// We know nothing.
@@ -750,7 +751,7 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
// Okay, we know that the sign bit in Mask is set. Use CLO to determine
// the number of identical bits in the top of the input value.
Mask <<= Mask.getBitWidth() - TyBits;
- return std::max(FirstAnswer, Mask.countLeadingOnes());
+ return std::max(FirstAnswer, Mask.countl_one());
}
unsigned GISelKnownBits::computeNumSignBits(Register R, unsigned Depth) {
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 7d811dc0ad8f..9a67a8d05a4d 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/GlobalISel/InlineAsmLowering.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -43,6 +44,7 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -74,7 +76,6 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -300,7 +301,7 @@ bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Op1 = getOrCreateVReg(*U.getOperand(1));
Register Res = getOrCreateVReg(U);
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (isa<Instruction>(U)) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
@@ -314,7 +315,7 @@ bool IRTranslator::translateUnaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Res = getOrCreateVReg(U);
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (isa<Instruction>(U)) {
const Instruction &I = cast<Instruction>(U);
Flags = MachineInstr::copyFlagsFromInstruction(I);
@@ -345,7 +346,7 @@ bool IRTranslator::translateCompare(const User &U,
MIRBuilder.buildCopy(
Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
else {
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (CI)
Flags = MachineInstr::copyFlagsFromInstruction(*CI);
MIRBuilder.buildFCmp(Pred, Res, Op0, Op1, Flags);
@@ -844,8 +845,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
// For conditional branch lowering, we might try to do something silly like
// emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so,
// just re-use the existing condition vreg.
- if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI &&
- CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
+ if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && CI->isOne() &&
+ CB.PredInfo.Pred == CmpInst::ICMP_EQ) {
Cond = CondLHS;
} else {
Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
@@ -1018,7 +1019,7 @@ void IRTranslator::emitBitTestHeader(SwitchCG::BitTestBlock &B,
LLT MaskTy = SwitchOpTy;
if (MaskTy.getSizeInBits() > PtrTy.getSizeInBits() ||
- !isPowerOf2_32(MaskTy.getSizeInBits()))
+ !llvm::has_single_bit<uint32_t>(MaskTy.getSizeInBits()))
MaskTy = LLT::scalar(PtrTy.getSizeInBits());
else {
// Ensure that the type will fit the mask value.
@@ -1074,14 +1075,14 @@ void IRTranslator::emitBitTestCase(SwitchCG::BitTestBlock &BB,
// Testing for a single bit; just compare the shift count with what it
// would need to be to shift a 1 bit in that position.
auto MaskTrailingZeros =
- MIB.buildConstant(SwitchTy, countTrailingZeros(B.Mask));
+ MIB.buildConstant(SwitchTy, llvm::countr_zero(B.Mask));
Cmp =
MIB.buildICmp(ICmpInst::ICMP_EQ, LLT::scalar(1), Reg, MaskTrailingZeros)
.getReg(0);
} else if (PopCount == BB.Range) {
// There is only one zero bit in the range, test for it directly.
auto MaskTrailingOnes =
- MIB.buildConstant(SwitchTy, countTrailingOnes(B.Mask));
+ MIB.buildConstant(SwitchTy, llvm::countr_one(B.Mask));
Cmp = MIB.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Reg, MaskTrailingOnes)
.getReg(0);
} else {
@@ -1294,7 +1295,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
AAMDNodes AAInfo = LI.getAAMetadata();
const Value *Ptr = LI.getPointerOperand();
- Type *OffsetIRTy = DL->getIntPtrType(Ptr->getType());
+ Type *OffsetIRTy = DL->getIndexType(Ptr->getType());
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
if (CLI->supportSwiftError() && isSwiftError(Ptr)) {
@@ -1342,7 +1343,7 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
Register Base = getOrCreateVReg(*SI.getPointerOperand());
- Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType());
+ Type *OffsetIRTy = DL->getIndexType(SI.getPointerOperandType());
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) {
@@ -1438,7 +1439,7 @@ bool IRTranslator::translateSelect(const User &U,
ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
- uint16_t Flags = 0;
+ uint32_t Flags = 0;
if (const SelectInst *SI = dyn_cast<SelectInst>(&U))
Flags = MachineInstr::copyFlagsFromInstruction(*SI);
@@ -1468,8 +1469,14 @@ bool IRTranslator::translateBitCast(const User &U,
MachineIRBuilder &MIRBuilder) {
// If we're bitcasting to the source type, we can reuse the source vreg.
if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
- getLLTForType(*U.getType(), *DL))
+ getLLTForType(*U.getType(), *DL)) {
+ // If the source is a ConstantInt then it was probably created by
+ // ConstantHoisting and we should leave it alone.
+ if (isa<ConstantInt>(U.getOperand(0)))
+ return translateCast(TargetOpcode::G_CONSTANT_FOLD_BARRIER, U,
+ MIRBuilder);
return translateCopy(U, *U.getOperand(0), MIRBuilder);
+ }
return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
}
@@ -1488,7 +1495,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
Register BaseReg = getOrCreateVReg(Op0);
Type *PtrIRTy = Op0.getType();
LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
- Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
+ Type *OffsetIRTy = DL->getIndexType(PtrIRTy);
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
// Normalize Vector GEP - all scalar operands should be converted to the
@@ -1513,7 +1520,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
.getReg(0);
PtrIRTy = FixedVectorType::get(PtrIRTy, VectorWidth);
PtrTy = getLLTForType(*PtrIRTy, *DL);
- OffsetIRTy = DL->getIntPtrType(PtrIRTy);
+ OffsetIRTy = DL->getIndexType(PtrIRTy);
OffsetTy = getLLTForType(*OffsetIRTy, *DL);
}
@@ -1759,6 +1766,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_FLOG2;
case Intrinsic::log10:
return TargetOpcode::G_FLOG10;
+ case Intrinsic::ldexp:
+ return TargetOpcode::G_FLDEXP;
case Intrinsic::nearbyint:
return TargetOpcode::G_FNEARBYINT;
case Intrinsic::pow:
@@ -1851,6 +1860,8 @@ static unsigned getConstrainedOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_STRICT_FMA;
case Intrinsic::experimental_constrained_sqrt:
return TargetOpcode::G_STRICT_FSQRT;
+ case Intrinsic::experimental_constrained_ldexp:
+ return TargetOpcode::G_STRICT_FLDEXP;
default:
return 0;
}
@@ -1864,7 +1875,7 @@ bool IRTranslator::translateConstrainedFPIntrinsic(
if (!Opcode)
return false;
- unsigned Flags = MachineInstr::copyFlagsFromInstruction(FPI);
+ uint32_t Flags = MachineInstr::copyFlagsFromInstruction(FPI);
if (EB == fp::ExceptionBehavior::ebIgnore)
Flags |= MachineInstr::NoFPExcept;
@@ -1879,6 +1890,60 @@ bool IRTranslator::translateConstrainedFPIntrinsic(
return true;
}
+std::optional<MCRegister> IRTranslator::getArgPhysReg(Argument &Arg) {
+ auto VRegs = getOrCreateVRegs(Arg);
+ if (VRegs.size() != 1)
+ return std::nullopt;
+
+ // Arguments are lowered as a copy of a livein physical register.
+ auto *VRegDef = MF->getRegInfo().getVRegDef(VRegs[0]);
+ if (!VRegDef || !VRegDef->isCopy())
+ return std::nullopt;
+ return VRegDef->getOperand(1).getReg().asMCReg();
+}
+
+bool IRTranslator::translateIfEntryValueArgument(const DbgValueInst &DebugInst,
+ MachineIRBuilder &MIRBuilder) {
+ auto *Arg = dyn_cast<Argument>(DebugInst.getValue());
+ if (!Arg)
+ return false;
+
+ const DIExpression *Expr = DebugInst.getExpression();
+ if (!Expr->isEntryValue())
+ return false;
+
+ std::optional<MCRegister> PhysReg = getArgPhysReg(*Arg);
+ if (!PhysReg) {
+ LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find a physical register\n"
+ << DebugInst << "\n");
+ return true;
+ }
+
+ MIRBuilder.buildDirectDbgValue(*PhysReg, DebugInst.getVariable(),
+ DebugInst.getExpression());
+ return true;
+}
+
+bool IRTranslator::translateIfEntryValueArgument(
+ const DbgDeclareInst &DebugInst) {
+ auto *Arg = dyn_cast<Argument>(DebugInst.getAddress());
+ if (!Arg)
+ return false;
+
+ const DIExpression *Expr = DebugInst.getExpression();
+ if (!Expr->isEntryValue())
+ return false;
+
+ std::optional<MCRegister> PhysReg = getArgPhysReg(*Arg);
+ if (!PhysReg)
+ return false;
+
+ MF->setVariableDbgInfo(DebugInst.getVariable(), Expr, *PhysReg,
+ DebugInst.getDebugLoc());
+ return true;
+}
+
bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder) {
if (auto *MI = dyn_cast<AnyMemIntrinsic>(&CI)) {
@@ -1945,12 +2010,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// instructions (in fact, they get ignored if they *do* exist).
MF->setVariableDbgInfo(DI.getVariable(), DI.getExpression(),
getOrCreateFrameIndex(*AI), DI.getDebugLoc());
- } else {
- // A dbg.declare describes the address of a source variable, so lower it
- // into an indirect DBG_VALUE.
- MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
- DI.getVariable(), DI.getExpression());
+ return true;
}
+
+ if (translateIfEntryValueArgument(DI))
+ return true;
+
+ // A dbg.declare describes the address of a source variable, so lower it
+ // into an indirect DBG_VALUE.
+ MIRBuilder.buildIndirectDbgValue(getOrCreateVReg(*Address),
+ DI.getVariable(), DI.getExpression());
return true;
}
case Intrinsic::dbg_label: {
@@ -1991,16 +2060,32 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
// DI cannot produce a valid DBG_VALUE, so produce an undef DBG_VALUE to
// terminate any prior location.
MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
- } else if (const auto *CI = dyn_cast<Constant>(V)) {
+ return true;
+ }
+ if (const auto *CI = dyn_cast<Constant>(V)) {
MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
- } else {
- for (Register Reg : getOrCreateVRegs(*V)) {
- // FIXME: This does not handle register-indirect values at offset 0. The
- // direct/indirect thing shouldn't really be handled by something as
- // implicit as reg+noreg vs reg+imm in the first place, but it seems
- // pretty baked in right now.
- MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
- }
+ return true;
+ }
+ if (auto *AI = dyn_cast<AllocaInst>(V);
+ AI && AI->isStaticAlloca() && DI.getExpression()->startsWithDeref()) {
+ // If the value is an alloca and the expression starts with a
+ // dereference, track a stack slot instead of a register, as registers
+ // may be clobbered.
+ auto ExprOperands = DI.getExpression()->getElements();
+ auto *ExprDerefRemoved =
+ DIExpression::get(AI->getContext(), ExprOperands.drop_front());
+ MIRBuilder.buildFIDbgValue(getOrCreateFrameIndex(*AI), DI.getVariable(),
+ ExprDerefRemoved);
+ return true;
+ }
+ if (translateIfEntryValueArgument(DI, MIRBuilder))
+ return true;
+ for (Register Reg : getOrCreateVRegs(*V)) {
+ // FIXME: This does not handle register-indirect values at offset 0. The
+ // direct/indirect thing shouldn't really be handled by something as
+ // implicit as reg+noreg vs reg+imm in the first place, but it seems
+ // pretty baked in right now.
+ MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
}
return true;
}
@@ -2090,6 +2175,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getOrCreateVReg(*CI.getArgOperand(0)),
MachineInstr::copyFlagsFromInstruction(CI));
return true;
+ case Intrinsic::frexp: {
+ ArrayRef<Register> VRegs = getOrCreateVRegs(CI);
+ MIRBuilder.buildFFrexp(VRegs[0], VRegs[1],
+ getOrCreateVReg(*CI.getArgOperand(0)),
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+ }
case Intrinsic::memcpy_inline:
return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE);
case Intrinsic::memcpy:
@@ -2296,7 +2388,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return CLI->lowerCall(MIRBuilder, Info);
}
case Intrinsic::fptrunc_round: {
- unsigned Flags = MachineInstr::copyFlagsFromInstruction(CI);
+ uint32_t Flags = MachineInstr::copyFlagsFromInstruction(CI);
// Convert the metadata argument to a constant integer
Metadata *MD = cast<MetadataAsValue>(CI.getArgOperand(1))->getMetadata();
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index e0357c50e555..3925611f1485 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -391,10 +391,12 @@ bool InlineAsmLowering::lowerInlineAsm(
Inst.addReg(SourceRegs[0]);
} else {
// Otherwise, this outputs to a register (directly for C_Register /
- // C_RegisterClass. Find a register that we can use.
+ // C_RegisterClass/C_Other.
assert(OpInfo.ConstraintType == TargetLowering::C_Register ||
- OpInfo.ConstraintType == TargetLowering::C_RegisterClass);
+ OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
+ OpInfo.ConstraintType == TargetLowering::C_Other);
+ // Find a register that we can use.
if (OpInfo.Regs.empty()) {
LLVM_DEBUG(dbgs()
<< "Couldn't allocate output register for constraint\n");
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index f780050ca3f1..9bbef11067ae 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/config.h"
@@ -104,7 +105,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
CodeGenCoverage CoverageInfo;
assert(ISel && "Cannot work without InstructionSelector");
- ISel->setupMF(MF, KB, CoverageInfo, PSI, BFI);
+ ISel->setupMF(MF, KB, &CoverageInfo, PSI, BFI);
// An optimization remark emitter. Used to report failures.
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
@@ -165,12 +166,12 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- // Eliminate hints.
- if (isPreISelGenericOptimizationHint(MI.getOpcode())) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+ // Eliminate hints or G_CONSTANT_FOLD_BARRIER.
+ if (isPreISelGenericOptimizationHint(MI.getOpcode()) ||
+ MI.getOpcode() == TargetOpcode::G_CONSTANT_FOLD_BARRIER) {
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
- // At this point, the destination register class of the hint may have
+ // At this point, the destination register class of the op may have
// been decided.
//
// Propagate that through to the source register.
diff --git a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 8959d215ecd1..c48591cc2f02 100644
--- a/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -5,64 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-/// \file
-/// This file implements the InstructionSelector class.
-//
-//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-
-#define DEBUG_TYPE "instructionselector"
-
-using namespace llvm;
-
-InstructionSelector::MatcherState::MatcherState(unsigned MaxRenderers)
- : Renderers(MaxRenderers) {}
-
-InstructionSelector::InstructionSelector() = default;
-
-bool InstructionSelector::isOperandImmEqual(
- const MachineOperand &MO, int64_t Value,
- const MachineRegisterInfo &MRI) const {
- if (MO.isReg() && MO.getReg())
- if (auto VRegVal = getIConstantVRegValWithLookThrough(MO.getReg(), MRI))
- return VRegVal->Value.getSExtValue() == Value;
- return false;
-}
-
-bool InstructionSelector::isBaseWithConstantOffset(
- const MachineOperand &Root, const MachineRegisterInfo &MRI) const {
- if (!Root.isReg())
- return false;
-
- MachineInstr *RootI = MRI.getVRegDef(Root.getReg());
- if (RootI->getOpcode() != TargetOpcode::G_PTR_ADD)
- return false;
-
- MachineOperand &RHS = RootI->getOperand(2);
- MachineInstr *RHSI = MRI.getVRegDef(RHS.getReg());
- if (RHSI->getOpcode() != TargetOpcode::G_CONSTANT)
- return false;
-
- return true;
-}
-bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
- MachineInstr &IntoMI) const {
- // Immediate neighbours are already folded.
- if (MI.getParent() == IntoMI.getParent() &&
- std::next(MI.getIterator()) == IntoMI.getIterator())
- return true;
+namespace llvm {
- // Convergent instructions cannot be moved in the CFG.
- if (MI.isConvergent() && MI.getParent() != IntoMI.getParent())
- return false;
+// vtable anchor
+InstructionSelector::~InstructionSelector() = default;
- return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
- !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
-}
+} // namespace llvm
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 54a82cac95d5..2c77ed8b0600 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -164,7 +164,8 @@ LegalityPredicate LegalityPredicates::sizeNotMultipleOf(unsigned TypeIdx,
LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
- return QueryTy.isScalar() && !isPowerOf2_32(QueryTy.getSizeInBits());
+ return QueryTy.isScalar() &&
+ !llvm::has_single_bit<uint32_t>(QueryTy.getSizeInBits());
};
}
@@ -184,14 +185,16 @@ LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
- return !isPowerOf2_32(Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
+ return !llvm::has_single_bit<uint32_t>(
+ Query.MMODescrs[MMOIdx].MemoryTy.getSizeInBytes());
};
}
LegalityPredicate LegalityPredicates::memSizeNotByteSizePow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
const LLT MemTy = Query.MMODescrs[MMOIdx].MemoryTy;
- return !MemTy.isByteSized() || !isPowerOf2_32(MemTy.getSizeInBytes());
+ return !MemTy.isByteSized() ||
+ !llvm::has_single_bit<uint32_t>(MemTy.getSizeInBytes());
};
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 1a13f39c100c..aecbe0b7604c 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
#include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
@@ -75,6 +76,7 @@ INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE,
false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE,
"Legalize the Machine IR a function's Machine IR", false,
false)
@@ -85,6 +87,8 @@ void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
AU.addRequired<GISelCSEAnalysisWrapperPass>();
AU.addPreserved<GISelCSEAnalysisWrapperPass>();
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -173,7 +177,8 @@ Legalizer::MFResult
Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
ArrayRef<GISelChangeObserver *> AuxObservers,
LostDebugLocObserver &LocObserver,
- MachineIRBuilder &MIRBuilder) {
+ MachineIRBuilder &MIRBuilder,
+ GISelKnownBits *KB) {
MIRBuilder.setMF(MF);
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -212,7 +217,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
// Now install the observer as the delegate to MF.
// This will keep all the observers notified about new insertions/deletions.
RAIIMFObsDelInstaller Installer(MF, WrapperObserver);
- LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder);
+ LegalizerHelper Helper(MF, LI, WrapperObserver, MIRBuilder, KB);
LegalizationArtifactCombiner ArtCombiner(MIRBuilder, MRI, LI);
bool Changed = false;
SmallVector<MachineInstr *, 128> RetryList;
@@ -314,8 +319,6 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
- const size_t NumBlocks = MF.size();
-
std::unique_ptr<MachineIRBuilder> MIRBuilder;
GISelCSEInfo *CSEInfo = nullptr;
bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences()
@@ -338,25 +341,18 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
if (VerifyDebugLocs > DebugLocVerifyLevel::None)
AuxObservers.push_back(&LocObserver);
+ // This allows Known Bits Analysis in the legalizer.
+ GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+
const LegalizerInfo &LI = *MF.getSubtarget().getLegalizerInfo();
- MFResult Result =
- legalizeMachineFunction(MF, LI, AuxObservers, LocObserver, *MIRBuilder);
+ MFResult Result = legalizeMachineFunction(MF, LI, AuxObservers, LocObserver,
+ *MIRBuilder, KB);
if (Result.FailedOn) {
reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
"unable to legalize instruction", *Result.FailedOn);
return false;
}
- // For now don't support if new blocks are inserted - we would need to fix the
- // outer loop for that.
- if (MF.size() != NumBlocks) {
- MachineOptimizationRemarkMissed R("gisel-legalize", "GISelFailure",
- MF.getFunction().getSubprogram(),
- /*MBB=*/nullptr);
- R << "inserting blocks is not supported yet";
- reportGISelFailure(MF, TPC, MORE, R);
- return false;
- }
if (LocObserver.getNumLostDebugLocs()) {
MachineOptimizationRemarkMissed R("gisel-legalize", "LostDebugLoc",
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 8a1fce2d3d65..f0da0d88140f 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -15,12 +15,14 @@
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
@@ -102,13 +104,13 @@ LegalizerHelper::LegalizerHelper(MachineFunction &MF,
MachineIRBuilder &Builder)
: MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
LI(*MF.getSubtarget().getLegalizerInfo()),
- TLI(*MF.getSubtarget().getTargetLowering()) { }
+ TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
GISelChangeObserver &Observer,
- MachineIRBuilder &B)
- : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
- TLI(*MF.getSubtarget().getTargetLowering()) { }
+ MachineIRBuilder &B, GISelKnownBits *KB)
+ : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
+ TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
@@ -540,6 +542,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(LOG_F);
case TargetOpcode::G_FLOG2:
RTLIBCASE(LOG2_F);
+ case TargetOpcode::G_FLDEXP:
+ RTLIBCASE(LDEXP_F);
case TargetOpcode::G_FCEIL:
RTLIBCASE(CEIL_F);
case TargetOpcode::G_FFLOOR:
@@ -824,6 +828,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FLOG10:
case TargetOpcode::G_FLOG:
case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FLDEXP:
case TargetOpcode::G_FEXP:
case TargetOpcode::G_FEXP2:
case TargetOpcode::G_FCEIL:
@@ -1411,6 +1416,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_FLDEXP:
+ case TargetOpcode::G_STRICT_FLDEXP:
+ return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
}
}
@@ -1504,13 +1512,11 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
+ auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
if (DstTy.isVector())
return UnableToLegalize;
- Register Src1 = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(Src1);
+ LLT SrcTy = MRI.getType(Src1Reg);
const int DstSize = DstTy.getSizeInBits();
const int SrcSize = SrcTy.getSizeInBits();
const int WideSize = WideTy.getSizeInBits();
@@ -1522,7 +1528,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
if (WideSize >= DstSize) {
// Directly pack the bits in the target type.
- Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
+ Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
for (unsigned I = 2; I != NumOps; ++I) {
const unsigned Offset = (I - 1) * PartSize;
@@ -1753,11 +1759,7 @@ LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
LLT WideTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
-
- LLT DstTy = MRI.getType(DstReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Offset = MI.getOperand(2).getImm();
if (TypeIdx == 0) {
@@ -1978,10 +1980,7 @@ LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
}
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
- Register Result = MI.getOperand(0).getReg();
- Register OriginalOverflow = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
LLT SrcTy = MRI.getType(LHS);
LLT OverflowTy = MRI.getType(OriginalOverflow);
unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
@@ -2560,12 +2559,41 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_FPOWI: {
- if (TypeIdx != 0)
- return UnableToLegalize;
+ case TargetOpcode::G_FPOWI:
+ case TargetOpcode::G_FLDEXP:
+ case TargetOpcode::G_STRICT_FLDEXP: {
+ if (TypeIdx == 0) {
+ if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (TypeIdx == 1) {
+ // For some reason SelectionDAG tries to promote to a libcall without
+ // actually changing the integer type for promotion.
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+ }
+ case TargetOpcode::G_FFREXP: {
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+
+ if (TypeIdx == 0) {
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ } else {
+ widenScalarDst(MI, WideTy, 1);
+ }
+
Observer.changedInstr(MI);
return Legalized;
}
@@ -2631,12 +2659,34 @@ static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
}
LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerBitcast(MachineInstr &MI) {
+LegalizerHelper::lowerFConstant(MachineInstr &MI) {
Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ MachineFunction &MF = MIRBuilder.getMF();
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+
+ unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
+ LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+ Align Alignment = Align(DL.getABITypeAlign(
+ getFloatTypeForLLT(MF.getFunction().getContext(), MRI.getType(Dst))));
+
+ auto Addr = MIRBuilder.buildConstantPool(
+ AddrPtrTy, MF.getConstantPool()->getConstantPoolIndex(
+ MI.getOperand(1).getFPImm(), Alignment));
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
+ MRI.getType(Dst), Alignment);
+
+ MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Addr, *MMO);
+ MI.eraseFromParent();
+
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerBitcast(MachineInstr &MI) {
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
if (SrcTy.isVector()) {
LLT SrcEltTy = SrcTy.getElementType();
SmallVector<Register, 8> SrcRegs;
@@ -2732,11 +2782,7 @@ LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register Dst = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register Idx = MI.getOperand(2).getReg();
- LLT SrcVecTy = MRI.getType(SrcVec);
- LLT IdxTy = MRI.getType(Idx);
+ auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
LLT SrcEltTy = SrcVecTy.getElementType();
unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
@@ -2872,13 +2918,9 @@ LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 0)
return UnableToLegalize;
- Register Dst = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
- Register Val = MI.getOperand(2).getReg();
- Register Idx = MI.getOperand(3).getReg();
-
- LLT VecTy = MRI.getType(Dst);
- LLT IdxTy = MRI.getType(Idx);
+ auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
+ MI.getFirst4RegLLTs();
+ LLT VecTy = DstTy;
LLT VecEltTy = VecTy.getElementType();
LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
@@ -3004,7 +3046,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
if (!isPowerOf2_32(MemSizeInBits)) {
// This load needs splitting into power of 2 sized loads.
- LargeSplitSize = PowerOf2Floor(MemSizeInBits);
+ LargeSplitSize = llvm::bit_floor(MemSizeInBits);
SmallSplitSize = MemSizeInBits - LargeSplitSize;
} else {
// This is already a power of 2, but we still need to split this in half.
@@ -3122,7 +3164,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
uint64_t LargeSplitSize, SmallSplitSize;
if (!isPowerOf2_32(MemSizeInBits)) {
- LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
+ LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
} else {
auto &Ctx = MF.getFunction().getContext();
@@ -3250,6 +3292,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
switch(MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_FCONSTANT:
+ return lowerFConstant(MI);
case TargetOpcode::G_BITCAST:
return lowerBitcast(MI);
case TargetOpcode::G_SREM:
@@ -3274,10 +3318,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case TargetOpcode::G_UMULO: {
// Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
// result.
- Register Res = MI.getOperand(0).getReg();
- Register Overflow = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
LLT Ty = MRI.getType(Res);
unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
@@ -3308,7 +3349,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case TargetOpcode::G_FNEG: {
- Register Res = MI.getOperand(0).getReg();
+ auto [Res, SubByReg] = MI.getFirst2Regs();
LLT Ty = MRI.getType(Res);
// TODO: Handle vector types once we are able to
@@ -3317,23 +3358,16 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return UnableToLegalize;
auto SignMask =
MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
- Register SubByReg = MI.getOperand(1).getReg();
MIRBuilder.buildXor(Res, SubByReg, SignMask);
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_FSUB:
case TargetOpcode::G_STRICT_FSUB: {
- Register Res = MI.getOperand(0).getReg();
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Res);
// Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
- // First, check if G_FNEG is marked as Lower. If so, we may
- // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
- if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
- return UnableToLegalize;
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
@@ -3357,11 +3391,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
- Register OldValRes = MI.getOperand(0).getReg();
- Register SuccessRes = MI.getOperand(1).getReg();
- Register Addr = MI.getOperand(2).getReg();
- Register CmpVal = MI.getOperand(3).getReg();
- Register NewVal = MI.getOperand(4).getReg();
+ auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
**MI.memoperands_begin());
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
@@ -3381,10 +3411,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
case TargetOpcode::G_CTPOP:
return lowerBitCount(MI);
case G_UADDO: {
- Register Res = MI.getOperand(0).getReg();
- Register CarryOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
MIRBuilder.buildAdd(Res, LHS, RHS);
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
@@ -3393,11 +3420,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case G_UADDE: {
- Register Res = MI.getOperand(0).getReg();
- Register CarryOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
- Register CarryIn = MI.getOperand(4).getReg();
+ auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
LLT Ty = MRI.getType(Res);
auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
@@ -3409,10 +3432,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case G_USUBO: {
- Register Res = MI.getOperand(0).getReg();
- Register BorrowOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
MIRBuilder.buildSub(Res, LHS, RHS);
MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
@@ -3421,11 +3441,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return Legalized;
}
case G_USUBE: {
- Register Res = MI.getOperand(0).getReg();
- Register BorrowOut = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
- Register BorrowIn = MI.getOperand(4).getReg();
+ auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
const LLT CondTy = MRI.getType(BorrowOut);
const LLT Ty = MRI.getType(Res);
@@ -3470,8 +3486,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
assert(MI.getOperand(2).isImm() && "Expected immediate");
int64_t SizeInBits = MI.getOperand(2).getImm();
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
LLT DstTy = MRI.getType(DstReg);
Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
@@ -3869,9 +3884,7 @@ LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
// Requires compatible types. Otherwise user of DstReg did not perform unmerge
// that should have been artifact combined. Most likely instruction that uses
// DstReg has to do more/fewer elements legalization compatible with NarrowTy.
@@ -3958,8 +3971,7 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowVecTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcVec = MI.getOperand(1).getReg();
+ auto [DstReg, SrcVec] = MI.getFirst2Regs();
Register InsertVal;
bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
@@ -4159,6 +4171,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FLOG:
case G_FLOG2:
case G_FLOG10:
+ case G_FLDEXP:
case G_FNEARBYINT:
case G_FCEIL:
case G_FFLOOR:
@@ -4234,6 +4247,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_STRICT_FSUB:
case G_STRICT_FMUL:
case G_STRICT_FMA:
+ case G_STRICT_FLDEXP:
+ case G_FFREXP:
return fewerElementsVectorMultiEltType(GMI, NumElts);
case G_ICMP:
case G_FCMP:
@@ -4278,13 +4293,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
if (TypeIdx != 0)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register Src1Reg = MI.getOperand(1).getReg();
- Register Src2Reg = MI.getOperand(2).getReg();
+ auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
+ MI.getFirst3RegLLTs();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- LLT DstTy = MRI.getType(DstReg);
- LLT Src1Ty = MRI.getType(Src1Reg);
- LLT Src2Ty = MRI.getType(Src2Reg);
// The shuffle should be canonicalized by now.
if (DstTy != Src1Ty)
return UnableToLegalize;
@@ -4474,10 +4485,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
// The semantics of the normal non-sequential reductions allow us to freely
// re-associate the operation.
- Register SrcReg = MI.getOperand(1).getReg();
- LLT SrcTy = MRI.getType(SrcReg);
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
if (NarrowTy.isVector() &&
(SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
@@ -4865,6 +4873,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
case TargetOpcode::G_EXTRACT:
if (TypeIdx != 1)
return UnableToLegalize;
@@ -4873,6 +4882,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_INSERT:
+ case TargetOpcode::G_INSERT_VECTOR_ELT:
case TargetOpcode::G_FREEZE:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FABS:
@@ -4887,10 +4897,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SELECT: {
- Register DstReg = MI.getOperand(0).getReg();
- Register CondReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT CondTy = MRI.getType(CondReg);
+ auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
if (TypeIdx == 1) {
if (!CondTy.isScalar() ||
DstTy.getElementCount() != MoreTy.getElementCount())
@@ -4943,28 +4950,50 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FPEXT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ LLT SrcTy = LLT::fixed_vector(
+ MoreTy.getNumElements(),
+ MRI.getType(MI.getOperand(1).getReg()).getElementType());
+ moreElementsVectorSrc(MI, SrcTy, 1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
default:
return UnableToLegalize;
}
}
-/// Expand source vectors to the size of destination vector.
-static LegalizerHelper::LegalizeResult
-equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) {
- MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
-
- LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+LegalizerHelper::LegalizeResult
+LegalizerHelper::equalizeVectorShuffleLengths(MachineInstr &MI) {
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
unsigned MaskNumElts = Mask.size();
unsigned SrcNumElts = SrcTy.getNumElements();
- Register DstReg = MI.getOperand(0).getReg();
LLT DestEltTy = DstTy.getElementType();
- // TODO: Normalize the shuffle vector since mask and vector length don't
- // match.
- if (MaskNumElts <= SrcNumElts) {
- return LegalizerHelper::LegalizeResult::UnableToLegalize;
+ if (MaskNumElts == SrcNumElts)
+ return Legalized;
+
+ if (MaskNumElts < SrcNumElts) {
+ // Extend mask to match new destination vector size with
+ // undef values.
+ SmallVector<int, 16> NewMask(Mask);
+ for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
+ NewMask.push_back(-1);
+
+ moreElementsVectorDst(MI, SrcTy, 0);
+ MIRBuilder.setInstrAndDebugLoc(MI);
+ MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg(),
+ MI.getOperand(2).getReg(), NewMask);
+ MI.eraseFromParent();
+
+ return Legalized;
}
unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
@@ -5014,19 +5043,14 @@ equalizeVectorShuffleLengths(MachineInstr &MI, MachineIRBuilder &MIRBuilder) {
LegalizerHelper::LegalizeResult
LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
unsigned int TypeIdx, LLT MoreTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src1Reg = MI.getOperand(1).getReg();
- Register Src2Reg = MI.getOperand(2).getReg();
+ auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
- LLT DstTy = MRI.getType(DstReg);
- LLT Src1Ty = MRI.getType(Src1Reg);
- LLT Src2Ty = MRI.getType(Src2Reg);
unsigned NumElts = DstTy.getNumElements();
unsigned WidenNumElts = MoreTy.getNumElements();
if (DstTy.isVector() && Src1Ty.isVector() &&
- DstTy.getNumElements() > Src1Ty.getNumElements()) {
- return equalizeVectorShuffleLengths(MI, MIRBuilder);
+ DstTy.getNumElements() != Src1Ty.getNumElements()) {
+ return equalizeVectorShuffleLengths(MI);
}
if (TypeIdx != 0)
@@ -5218,9 +5242,7 @@ LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
LegalizerHelper::LegalizeResult
LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src1 = MI.getOperand(1).getReg();
- Register Src2 = MI.getOperand(2).getReg();
+ auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
LLT Ty = MRI.getType(DstReg);
if (Ty.isVector())
@@ -5471,8 +5493,7 @@ LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 0)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
LLT DstTy = MRI.getType(DstReg);
if (DstTy.isVector())
@@ -5539,10 +5560,7 @@ LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned NarrowSize = NarrowTy.getSizeInBits();
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
@@ -5575,10 +5593,7 @@ LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned NarrowSize = NarrowTy.getSizeInBits();
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
@@ -5611,9 +5626,7 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
if (TypeIdx != 1)
return UnableToLegalize;
- Register DstReg = MI.getOperand(0).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned NarrowSize = NarrowTy.getSizeInBits();
if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
@@ -5631,6 +5644,31 @@ LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ MachineIRBuilder &B = MIRBuilder;
+ Register ExpReg = MI.getOperand(2).getReg();
+ LLT ExpTy = MRI.getType(ExpReg);
+
+ unsigned ClampSize = NarrowTy.getScalarSizeInBits();
+
+ // Clamp the exponent to the range of the target type.
+ auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
+ auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
+ auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
+ auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
+
+ auto Trunc = B.buildTrunc(NarrowTy, Clamp);
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(Trunc.getReg(0));
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitCount(MachineInstr &MI) {
unsigned Opc = MI.getOpcode();
const auto &TII = MIRBuilder.getTII();
@@ -5649,10 +5687,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
return Legalized;
}
case TargetOpcode::G_CTLZ: {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Len = SrcTy.getSizeInBits();
if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
@@ -5699,10 +5734,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI) {
return Legalized;
}
case TargetOpcode::G_CTTZ: {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Len = SrcTy.getSizeInBits();
if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
@@ -5808,10 +5840,7 @@ static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register X = MI.getOperand(1).getReg();
- Register Y = MI.getOperand(2).getReg();
- Register Z = MI.getOperand(3).getReg();
+ auto [Dst, X, Y, Z] = MI.getFirst4Regs();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(Z);
@@ -5850,10 +5879,7 @@ LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register X = MI.getOperand(1).getReg();
- Register Y = MI.getOperand(2).getReg();
- Register Z = MI.getOperand(3).getReg();
+ auto [Dst, X, Y, Z] = MI.getFirst4Regs();
LLT Ty = MRI.getType(Dst);
LLT ShTy = MRI.getType(Z);
@@ -5932,10 +5958,7 @@ LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Amt = MI.getOperand(2).getReg();
- LLT AmtTy = MRI.getType(Amt);
+ auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
@@ -5946,12 +5969,7 @@ LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Amt = MI.getOperand(2).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
- LLT AmtTy = MRI.getType(Amt);
+ auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
@@ -6021,8 +6039,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
// representation.
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ auto [Dst, Src] = MI.getFirst2Regs();
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
const LLT S1 = LLT::scalar(1);
@@ -6077,10 +6094,7 @@ LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
if (SrcTy == LLT::scalar(1)) {
auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
@@ -6105,10 +6119,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
@@ -6151,10 +6162,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
@@ -6194,10 +6202,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
const LLT S64 = LLT::scalar(64);
const LLT S32 = LLT::scalar(32);
@@ -6263,17 +6268,27 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
// f64 -> f16 conversion using round-to-nearest-even rounding mode.
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ const LLT S1 = LLT::scalar(1);
+ const LLT S32 = LLT::scalar(32);
+
+ auto [Dst, Src] = MI.getFirst2Regs();
+ assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
+ MRI.getType(Src).getScalarType() == LLT::scalar(64));
if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
return UnableToLegalize;
+ if (MIRBuilder.getMF().getTarget().Options.UnsafeFPMath) {
+ unsigned Flags = MI.getFlags();
+ auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
+ MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
const unsigned ExpMask = 0x7ff;
const unsigned ExpBiasf64 = 1023;
const unsigned ExpBiasf16 = 15;
- const LLT S32 = LLT::scalar(32);
- const LLT S1 = LLT::scalar(1);
auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
Register U = Unmerge.getReg(0);
@@ -6368,11 +6383,7 @@ LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
-
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
+ auto [DstTy, SrcTy] = MI.getFirst2LLTs();
const LLT S64 = LLT::scalar(64);
const LLT S16 = LLT::scalar(16);
@@ -6385,9 +6396,7 @@ LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
// multiplication tree.
LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
+ auto [Dst, Src0, Src1] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Dst);
auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
@@ -6412,9 +6421,7 @@ static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
+ auto [Dst, Src0, Src1] = MI.getFirst3Regs();
const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
LLT CmpType = MRI.getType(Dst).changeElementSize(1);
@@ -6428,13 +6435,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
-
- const LLT Src0Ty = MRI.getType(Src0);
- const LLT Src1Ty = MRI.getType(Src1);
-
+ auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
const int Src0Size = Src0Ty.getScalarSizeInBits();
const int Src1Size = Src1Ty.getScalarSizeInBits();
@@ -6475,9 +6476,7 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
- Register Dst = MI.getOperand(0).getReg();
- Register Src0 = MI.getOperand(1).getReg();
- Register Src1 = MI.getOperand(2).getReg();
+ auto [Dst, Src0, Src1] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Dst);
if (!MI.getFlag(MachineInstr::FmNoNans)) {
@@ -6516,8 +6515,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register X = MI.getOperand(1).getReg();
+ auto [DstReg, X] = MI.getFirst2Regs();
const unsigned Flags = MI.getFlags();
const LLT Ty = MRI.getType(DstReg);
const LLT CondTy = Ty.changeElementSize(1);
@@ -6547,10 +6545,8 @@ LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerFFloor(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFFloor(MachineInstr &MI) {
+ auto [DstReg, SrcReg] = MI.getFirst2Regs();
unsigned Flags = MI.getFlags();
LLT Ty = MRI.getType(DstReg);
const LLT CondTy = Ty.changeElementSize(1);
@@ -6577,11 +6573,8 @@ LegalizerHelper::lowerFFloor(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
const unsigned NumOps = MI.getNumOperands();
- Register DstReg = MI.getOperand(0).getReg();
- Register Src0Reg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(Src0Reg);
- unsigned PartSize = SrcTy.getSizeInBits();
+ auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
+ unsigned PartSize = Src0Ty.getSizeInBits();
LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
@@ -6729,11 +6722,8 @@ LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register Src0Reg = MI.getOperand(1).getReg();
- Register Src1Reg = MI.getOperand(2).getReg();
- LLT Src0Ty = MRI.getType(Src0Reg);
- LLT DstTy = MRI.getType(DstReg);
+ auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
+ MI.getFirst3RegLLTs();
LLT IdxTy = LLT::scalar(32);
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
@@ -6822,13 +6812,9 @@ LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerExtract(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
unsigned Offset = MI.getOperand(2).getImm();
- LLT DstTy = MRI.getType(Dst);
- LLT SrcTy = MRI.getType(Src);
-
// Extract sub-vector or one element
if (SrcTy.isVector()) {
unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
@@ -6837,7 +6823,7 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
(Offset + DstSize <= SrcTy.getSizeInBits())) {
// Unmerge and allow access to each Src element for the artifact combiner.
- auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), Src);
+ auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
// Take element(s) we need to extract and copy it (merge them).
SmallVector<Register, 8> SubVectorElts;
@@ -6846,9 +6832,9 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
SubVectorElts.push_back(Unmerge.getReg(Idx));
}
if (SubVectorElts.size() == 1)
- MIRBuilder.buildCopy(Dst, SubVectorElts[0]);
+ MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
else
- MIRBuilder.buildMergeLikeInstr(Dst, SubVectorElts);
+ MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
MI.eraseFromParent();
return Legalized;
@@ -6861,15 +6847,15 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
LLT SrcIntTy = SrcTy;
if (!SrcTy.isScalar()) {
SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
- Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
+ SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
}
if (Offset == 0)
- MIRBuilder.buildTrunc(Dst, Src);
+ MIRBuilder.buildTrunc(DstReg, SrcReg);
else {
auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
- auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
- MIRBuilder.buildTrunc(Dst, Shr);
+ auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
+ MIRBuilder.buildTrunc(DstReg, Shr);
}
MI.eraseFromParent();
@@ -6880,9 +6866,7 @@ LegalizerHelper::lowerExtract(MachineInstr &MI) {
}
LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register InsertSrc = MI.getOperand(2).getReg();
+ auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
uint64_t Offset = MI.getOperand(3).getImm();
LLT DstTy = MRI.getType(Src);
@@ -6972,14 +6956,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
- Register Dst0 = MI.getOperand(0).getReg();
- Register Dst1 = MI.getOperand(1).getReg();
- Register LHS = MI.getOperand(2).getReg();
- Register RHS = MI.getOperand(3).getReg();
+ auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
+ MI.getFirst4RegLLTs();
const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
- LLT Ty = MRI.getType(Dst0);
- LLT BoolTy = MRI.getType(Dst1);
+ LLT Ty = Dst0Ty;
+ LLT BoolTy = Dst1Ty;
if (IsAdd)
MIRBuilder.buildAdd(Dst0, LHS, RHS);
@@ -7008,9 +6990,7 @@ LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Res);
bool IsSigned;
bool IsAdd;
@@ -7085,9 +7065,7 @@ LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Res);
LLT BoolTy = Ty.changeElementSize(1);
bool IsSigned;
@@ -7157,9 +7135,7 @@ LegalizerHelper::lowerShlSat(MachineInstr &MI) {
MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
"Expected shlsat opcode!");
bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
- Register Res = MI.getOperand(0).getReg();
- Register LHS = MI.getOperand(1).getReg();
- Register RHS = MI.getOperand(2).getReg();
+ auto [Res, LHS, RHS] = MI.getFirst3Regs();
LLT Ty = MRI.getType(Res);
LLT BoolTy = Ty.changeElementSize(1);
@@ -7185,10 +7161,8 @@ LegalizerHelper::lowerShlSat(MachineInstr &MI) {
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::lowerBswap(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerBswap(MachineInstr &MI) {
+ auto [Dst, Src] = MI.getFirst2Regs();
const LLT Ty = MRI.getType(Src);
unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
@@ -7233,8 +7207,7 @@ static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
+ auto [Dst, Src] = MI.getFirst2Regs();
const LLT Ty = MRI.getType(Src);
unsigned Size = Ty.getSizeInBits();
@@ -7312,23 +7285,23 @@ LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
LegalizerHelper::LegalizeResult
LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
- Register DstReg = MI.getOperand(0).getReg();
- Register SrcReg = MI.getOperand(1).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT SrcTy = MRI.getType(SrcReg);
- uint64_t Mask = MI.getOperand(2).getImm();
+ auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
+ FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
- if (Mask == 0) {
+ if (Mask == fcNone) {
MIRBuilder.buildConstant(DstReg, 0);
MI.eraseFromParent();
return Legalized;
}
- if ((Mask & fcAllFlags) == fcAllFlags) {
+ if (Mask == fcAllFlags) {
MIRBuilder.buildConstant(DstReg, 1);
MI.eraseFromParent();
return Legalized;
}
+ // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
+ // version
+
unsigned BitSize = SrcTy.getScalarSizeInBits();
const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
@@ -7345,7 +7318,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
APInt QNaNBitMask =
APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
- APInt InvertionMask = APInt::getAllOnesValue(DstTy.getScalarSizeInBits());
+ APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
@@ -7358,8 +7331,10 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
auto Res = MIRBuilder.buildConstant(DstTy, 0);
+ // Clang doesn't support capture of structured bindings:
+ LLT DstTyCopy = DstTy;
const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
- Res = MIRBuilder.buildOr(DstTy, Res, ToAppend);
+ Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
};
// Tests that involve more than one class should be processed first.
@@ -7382,8 +7357,20 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
Mask &= ~fcNegFinite;
}
+ if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
+ // fcZero | fcSubnormal => test all exponent bits are 0
+ // TODO: Handle sign bit specific cases
+ // TODO: Handle inverted case
+ if (PartialCheck == (fcZero | fcSubnormal)) {
+ auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
+ appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
+ ExpBits, ZeroC));
+ Mask &= ~PartialCheck;
+ }
+ }
+
// Check for individual classes.
- if (unsigned PartialCheck = Mask & fcZero) {
+ if (FPClassTest PartialCheck = Mask & fcZero) {
if (PartialCheck == fcPosZero)
appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
AsInt, ZeroC));
@@ -7395,7 +7382,21 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
AsInt, SignBitC));
}
- if (unsigned PartialCheck = Mask & fcInf) {
+ if (FPClassTest PartialCheck = Mask & fcSubnormal) {
+ // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
+ // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
+ auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
+ auto OneC = MIRBuilder.buildConstant(IntTy, 1);
+ auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
+ auto SubnormalRes =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
+ MIRBuilder.buildConstant(IntTy, AllOneMantissa));
+ if (PartialCheck == fcNegSubnormal)
+ SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
+ appendToRes(SubnormalRes);
+ }
+
+ if (FPClassTest PartialCheck = Mask & fcInf) {
if (PartialCheck == fcPosInf)
appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
AsInt, InfC));
@@ -7410,7 +7411,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
}
}
- if (unsigned PartialCheck = Mask & fcNan) {
+ if (FPClassTest PartialCheck = Mask & fcNan) {
auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
if (PartialCheck == fcNan) {
// isnan(V) ==> abs(V) u> int(inf)
@@ -7431,21 +7432,7 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
}
}
- if (unsigned PartialCheck = Mask & fcSubnormal) {
- // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
- // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
- auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
- auto OneC = MIRBuilder.buildConstant(IntTy, 1);
- auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
- auto SubnormalRes =
- MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
- MIRBuilder.buildConstant(IntTy, AllOneMantissa));
- if (PartialCheck == fcNegSubnormal)
- SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
- appendToRes(SubnormalRes);
- }
-
- if (unsigned PartialCheck = Mask & fcNormal) {
+ if (FPClassTest PartialCheck = Mask & fcNormal) {
// isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
// (max_exp-1))
APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
@@ -7472,12 +7459,8 @@ LegalizerHelper::lowerISFPCLASS(MachineInstr &MI) {
LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
// Implement vector G_SELECT in terms of XOR, AND, OR.
- Register DstReg = MI.getOperand(0).getReg();
- Register MaskReg = MI.getOperand(1).getReg();
- Register Op1Reg = MI.getOperand(2).getReg();
- Register Op2Reg = MI.getOperand(3).getReg();
- LLT DstTy = MRI.getType(DstReg);
- LLT MaskTy = MRI.getType(MaskReg);
+ auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
+ MI.getFirst4RegLLTs();
if (!DstTy.isVector())
return UnableToLegalize;
@@ -7591,7 +7574,7 @@ LegalizerHelper::lowerVectorReduction(MachineInstr &MI) {
Observer.changedInstr(MI);
return Legalized;
}
- return UnableToLegalize;;
+ return UnableToLegalize;
}
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
@@ -7638,7 +7621,7 @@ static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
// SDAGisms map cleanly to GISel concepts.
if (NewTy.isVector())
NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
- NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
+ NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
unsigned NewTySize = NewTy.getSizeInBytes();
assert(NewTySize > 0 && "Could not find appropriate type");
@@ -7826,9 +7809,7 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
+ auto [Dst, Src, Len] = MI.getFirst3Regs();
const auto *MMOIt = MI.memoperands_begin();
const MachineMemOperand *MemOp = *MMOIt;
@@ -8091,9 +8072,7 @@ LegalizerHelper::lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
Align DstAlign = MemOp->getBaseAlign();
Align SrcAlign;
- Register Dst = MI.getOperand(0).getReg();
- Register Src = MI.getOperand(1).getReg();
- Register Len = MI.getOperand(2).getReg();
+ auto [Dst, Src, Len] = MI.getFirst3Regs();
if (Opc != TargetOpcode::G_MEMSET) {
assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 4b6c3a156709..1f2e481c63e0 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -13,6 +13,7 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -21,7 +22,6 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include <algorithm>
using namespace llvm;
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index 7c6eac8c8ce0..49f40495d6fc 100644
--- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -10,6 +10,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -18,7 +20,7 @@
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -305,7 +307,7 @@ bool LoadStoreOpt::mergeStores(SmallVectorImpl<GStore *> &StoresToMerge) {
const auto &DL = MF->getFunction().getParent()->getDataLayout();
bool AnyMerged = false;
do {
- unsigned NumPow2 = PowerOf2Floor(StoresToMerge.size());
+ unsigned NumPow2 = llvm::bit_floor(StoresToMerge.size());
unsigned MaxSizeBits = NumPow2 * OrigTy.getSizeInBits().getFixedValue();
// Compute the biggest store we can generate to handle the number of stores.
unsigned MergeSizeBits;
@@ -400,7 +402,9 @@ bool LoadStoreOpt::doSingleStoreMerge(SmallVectorImpl<GStore *> &Stores) {
auto NewStore =
Builder.buildStore(WideReg, FirstStore->getPointerReg(), *WideMMO);
(void) NewStore;
- LLVM_DEBUG(dbgs() << "Created merged store: " << *NewStore);
+ LLVM_DEBUG(dbgs() << "Merged " << Stores.size()
+ << " stores into merged store: " << *NewStore);
+ LLVM_DEBUG(for (auto *MI : Stores) dbgs() << " " << *MI;);
NumStoresMerged += Stores.size();
MachineOptimizationRemarkEmitter MORE(*MF, nullptr);
@@ -445,20 +449,19 @@ bool LoadStoreOpt::processMergeCandidate(StoreMergeCandidate &C) {
for (auto AliasInfo : reverse(C.PotentialAliases)) {
MachineInstr *PotentialAliasOp = AliasInfo.first;
unsigned PreCheckedIdx = AliasInfo.second;
- if (static_cast<unsigned>(Idx) > PreCheckedIdx) {
- // Need to check this alias.
- if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
- AA)) {
- LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
- << " detected\n");
- return true;
- }
- } else {
+ if (static_cast<unsigned>(Idx) < PreCheckedIdx) {
// Once our store index is lower than the index associated with the
// potential alias, we know that we've already checked for this alias
// and all of the earlier potential aliases too.
return false;
}
+ // Need to check this alias.
+ if (GISelAddressing::instMayAlias(CheckStore, *PotentialAliasOp, *MRI,
+ AA)) {
+ LLVM_DEBUG(dbgs() << "Potential alias " << *PotentialAliasOp
+ << " detected\n");
+ return true;
+ }
}
return false;
};
@@ -616,11 +619,304 @@ bool LoadStoreOpt::mergeBlockStores(MachineBasicBlock &MBB) {
return Changed;
}
+/// Check if the store \p Store is a truncstore that can be merged. That is,
+/// it's a store of a shifted value of \p SrcVal. If \p SrcVal is an empty
+/// Register then it does not need to match and SrcVal is set to the source
+/// value found.
+/// On match, returns the start byte offset of the \p SrcVal that is being
+/// stored.
+static std::optional<int64_t>
+getTruncStoreByteOffset(GStore &Store, Register &SrcVal,
+ MachineRegisterInfo &MRI) {
+ Register TruncVal;
+ if (!mi_match(Store.getValueReg(), MRI, m_GTrunc(m_Reg(TruncVal))))
+ return std::nullopt;
+
+ // The shift amount must be a constant multiple of the narrow type.
+ // It is translated to the offset address in the wide source value "y".
+ //
+ // x = G_LSHR y, ShiftAmtC
+ // s8 z = G_TRUNC x
+ // store z, ...
+ Register FoundSrcVal;
+ int64_t ShiftAmt;
+ if (!mi_match(TruncVal, MRI,
+ m_any_of(m_GLShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt)),
+ m_GAShr(m_Reg(FoundSrcVal), m_ICst(ShiftAmt))))) {
+ if (!SrcVal.isValid() || TruncVal == SrcVal) {
+ if (!SrcVal.isValid())
+ SrcVal = TruncVal;
+ return 0; // If it's the lowest index store.
+ }
+ return std::nullopt;
+ }
+
+ unsigned NarrowBits = Store.getMMO().getMemoryType().getScalarSizeInBits();
+ if (ShiftAmt % NarrowBits != 0)
+ return std::nullopt;
+ const unsigned Offset = ShiftAmt / NarrowBits;
+
+ if (SrcVal.isValid() && FoundSrcVal != SrcVal)
+ return std::nullopt;
+
+ if (!SrcVal.isValid())
+ SrcVal = FoundSrcVal;
+ else if (MRI.getType(SrcVal) != MRI.getType(FoundSrcVal))
+ return std::nullopt;
+ return Offset;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+bool LoadStoreOpt::mergeTruncStore(GStore &StoreMI,
+ SmallPtrSetImpl<GStore *> &DeletedStores) {
+ LLT MemTy = StoreMI.getMMO().getMemoryType();
+
+ // We only handle merging simple stores of 1-4 bytes.
+ if (!MemTy.isScalar())
+ return false;
+ switch (MemTy.getSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ break;
+ default:
+ return false;
+ }
+ if (!StoreMI.isSimple())
+ return false;
+
+ // We do a simple search for mergeable stores prior to this one.
+ // Any potential alias hazard along the way terminates the search.
+ SmallVector<GStore *> FoundStores;
+
+ // We're looking for:
+ // 1) a (store(trunc(...)))
+ // 2) of an LSHR/ASHR of a single wide value, by the appropriate shift to get
+ // the partial value stored.
+ // 3) where the offsets form either a little or big-endian sequence.
+
+ auto &LastStore = StoreMI;
+
+ // The single base pointer that all stores must use.
+ Register BaseReg;
+ int64_t LastOffset;
+ if (!mi_match(LastStore.getPointerReg(), *MRI,
+ m_GPtrAdd(m_Reg(BaseReg), m_ICst(LastOffset)))) {
+ BaseReg = LastStore.getPointerReg();
+ LastOffset = 0;
+ }
+
+ GStore *LowestIdxStore = &LastStore;
+ int64_t LowestIdxOffset = LastOffset;
+
+ Register WideSrcVal;
+ auto LowestShiftAmt = getTruncStoreByteOffset(LastStore, WideSrcVal, *MRI);
+ if (!LowestShiftAmt)
+ return false; // Didn't match a trunc.
+ assert(WideSrcVal.isValid());
+
+ LLT WideStoreTy = MRI->getType(WideSrcVal);
+ // The wide type might not be a multiple of the memory type, e.g. s48 and s32.
+ if (WideStoreTy.getSizeInBits() % MemTy.getSizeInBits() != 0)
+ return false;
+ const unsigned NumStoresRequired =
+ WideStoreTy.getSizeInBits() / MemTy.getSizeInBits();
+
+ SmallVector<int64_t, 8> OffsetMap(NumStoresRequired, INT64_MAX);
+ OffsetMap[*LowestShiftAmt] = LastOffset;
+ FoundStores.emplace_back(&LastStore);
+
+ const int MaxInstsToCheck = 10;
+ int NumInstsChecked = 0;
+ for (auto II = ++LastStore.getReverseIterator();
+ II != LastStore.getParent()->rend() && NumInstsChecked < MaxInstsToCheck;
+ ++II) {
+ NumInstsChecked++;
+ GStore *NewStore;
+ if ((NewStore = dyn_cast<GStore>(&*II))) {
+ if (NewStore->getMMO().getMemoryType() != MemTy || !NewStore->isSimple())
+ break;
+ } else if (II->isLoadFoldBarrier() || II->mayLoad()) {
+ break;
+ } else {
+ continue; // This is a safe instruction we can look past.
+ }
+
+ Register NewBaseReg;
+ int64_t MemOffset;
+ // Check we're storing to the same base + some offset.
+ if (!mi_match(NewStore->getPointerReg(), *MRI,
+ m_GPtrAdd(m_Reg(NewBaseReg), m_ICst(MemOffset)))) {
+ NewBaseReg = NewStore->getPointerReg();
+ MemOffset = 0;
+ }
+ if (BaseReg != NewBaseReg)
+ break;
+
+ auto ShiftByteOffset = getTruncStoreByteOffset(*NewStore, WideSrcVal, *MRI);
+ if (!ShiftByteOffset)
+ break;
+ if (MemOffset < LowestIdxOffset) {
+ LowestIdxOffset = MemOffset;
+ LowestIdxStore = NewStore;
+ }
+
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (*ShiftByteOffset < 0 || *ShiftByteOffset >= NumStoresRequired ||
+ OffsetMap[*ShiftByteOffset] != INT64_MAX)
+ break;
+ OffsetMap[*ShiftByteOffset] = MemOffset;
+
+ FoundStores.emplace_back(NewStore);
+ // Reset counter since we've found a matching inst.
+ NumInstsChecked = 0;
+ if (FoundStores.size() == NumStoresRequired)
+ break;
+ }
+
+ if (FoundStores.size() != NumStoresRequired) {
+ if (FoundStores.size() == 1)
+ return false;
+ // We didn't find enough stores to merge into the size of the original
+ // source value, but we may be able to generate a smaller store if we
+ // truncate the source value.
+ WideStoreTy = LLT::scalar(FoundStores.size() * MemTy.getScalarSizeInBits());
+ }
+
+ unsigned NumStoresFound = FoundStores.size();
+
+ const auto &DL = LastStore.getMF()->getDataLayout();
+ auto &C = LastStore.getMF()->getFunction().getContext();
+ // Check that a store of the wide type is both allowed and fast on the target
+ unsigned Fast = 0;
+ bool Allowed = TLI->allowsMemoryAccess(
+ C, DL, WideStoreTy, LowestIdxStore->getMMO(), &Fast);
+ if (!Allowed || !Fast)
+ return false;
+
+ // Check if the pieces of the value are going to the expected places in memory
+ // to merge the stores.
+ unsigned NarrowBits = MemTy.getScalarSizeInBits();
+ auto checkOffsets = [&](bool MatchLittleEndian) {
+ if (MatchLittleEndian) {
+ for (unsigned i = 0; i != NumStoresFound; ++i)
+ if (OffsetMap[i] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ } else { // MatchBigEndian by reversing loop counter.
+ for (unsigned i = 0, j = NumStoresFound - 1; i != NumStoresFound;
+ ++i, --j)
+ if (OffsetMap[j] != i * (NarrowBits / 8) + LowestIdxOffset)
+ return false;
+ }
+ return true;
+ };
+
+ // Check if the offsets line up for the native data layout of this target.
+ bool NeedBswap = false;
+ bool NeedRotate = false;
+ if (!checkOffsets(DL.isLittleEndian())) {
+ // Special-case: check if byte offsets line up for the opposite endian.
+ if (NarrowBits == 8 && checkOffsets(DL.isBigEndian()))
+ NeedBswap = true;
+ else if (NumStoresFound == 2 && checkOffsets(DL.isBigEndian()))
+ NeedRotate = true;
+ else
+ return false;
+ }
+
+ if (NeedBswap &&
+ !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {WideStoreTy}}, *MF))
+ return false;
+ if (NeedRotate &&
+ !isLegalOrBeforeLegalizer(
+ {TargetOpcode::G_ROTR, {WideStoreTy, WideStoreTy}}, *MF))
+ return false;
+
+ Builder.setInstrAndDebugLoc(StoreMI);
+
+ if (WideStoreTy != MRI->getType(WideSrcVal))
+ WideSrcVal = Builder.buildTrunc(WideStoreTy, WideSrcVal).getReg(0);
+
+ if (NeedBswap) {
+ WideSrcVal = Builder.buildBSwap(WideStoreTy, WideSrcVal).getReg(0);
+ } else if (NeedRotate) {
+ assert(WideStoreTy.getSizeInBits() % 2 == 0 &&
+ "Unexpected type for rotate");
+ auto RotAmt =
+ Builder.buildConstant(WideStoreTy, WideStoreTy.getSizeInBits() / 2);
+ WideSrcVal =
+ Builder.buildRotateRight(WideStoreTy, WideSrcVal, RotAmt).getReg(0);
+ }
+
+ Builder.buildStore(WideSrcVal, LowestIdxStore->getPointerReg(),
+ LowestIdxStore->getMMO().getPointerInfo(),
+ LowestIdxStore->getMMO().getAlign());
+
+ // Erase the old stores.
+ for (auto *ST : FoundStores) {
+ ST->eraseFromParent();
+ DeletedStores.insert(ST);
+ }
+ return true;
+}
+
+bool LoadStoreOpt::mergeTruncStoresBlock(MachineBasicBlock &BB) {
+ bool Changed = false;
+ SmallVector<GStore *, 16> Stores;
+ SmallPtrSet<GStore *, 8> DeletedStores;
+ // Walk up the block so we can see the most eligible stores.
+ for (MachineInstr &MI : llvm::reverse(BB))
+ if (auto *StoreMI = dyn_cast<GStore>(&MI))
+ Stores.emplace_back(StoreMI);
+
+ for (auto *StoreMI : Stores) {
+ if (DeletedStores.count(StoreMI))
+ continue;
+ if (mergeTruncStore(*StoreMI, DeletedStores))
+ Changed = true;
+ }
+ return Changed;
+}
+
bool LoadStoreOpt::mergeFunctionStores(MachineFunction &MF) {
bool Changed = false;
- for (auto &BB : MF) {
+ for (auto &BB : MF){
Changed |= mergeBlockStores(BB);
+ Changed |= mergeTruncStoresBlock(BB);
+ }
+
+ // Erase all dead instructions left over by the merging.
+ if (Changed) {
+ for (auto &BB : MF) {
+ for (auto &I : make_early_inc_range(make_range(BB.rbegin(), BB.rend()))) {
+ if (isTriviallyDead(I, *MRI))
+ I.eraseFromParent();
+ }
+ }
}
+
return Changed;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index bf4dcc2c2459..55984423e5bc 100644
--- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -54,7 +54,7 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
MachineInstr &MIUse = *MOUse.getParent();
InsertMBB = MIUse.getParent();
if (MIUse.isPHI())
- InsertMBB = MIUse.getOperand(MIUse.getOperandNo(&MOUse) + 1).getMBB();
+ InsertMBB = MIUse.getOperand(MOUse.getOperandNo() + 1).getMBB();
return InsertMBB == Def.getParent();
}
@@ -99,7 +99,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
MachineBasicBlock *InsertMBB;
LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
dbgs() << "Checking use: " << MIUse
- << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
+ << " #Opd: " << MOUse.getOperandNo() << '\n');
if (isLocalUse(MOUse, MI, InsertMBB)) {
// Even if we're in the same block, if the block is very large we could
// still have many long live ranges. Try to do intra-block localization
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 9100e064f30f..962b54ec5d6b 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -80,11 +80,11 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
assert(
cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
"Expected inlined-at fields to agree");
- return buildInstr(TargetOpcode::DBG_VALUE)
- .addFrameIndex(FI)
- .addImm(0)
- .addMetadata(Variable)
- .addMetadata(Expr);
+ return insertInstr(buildInstrNoInsert(TargetOpcode::DBG_VALUE)
+ .addFrameIndex(FI)
+ .addImm(0)
+ .addMetadata(Variable)
+ .addMetadata(Expr));
}
MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
@@ -164,6 +164,15 @@ MachineInstrBuilder MachineIRBuilder::buildGlobalValue(const DstOp &Res,
return MIB;
}
+MachineInstrBuilder MachineIRBuilder::buildConstantPool(const DstOp &Res,
+ unsigned Idx) {
+ assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+ auto MIB = buildInstr(TargetOpcode::G_CONSTANT_POOL);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addConstantPoolIndex(Idx);
+ return MIB;
+}
+
MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy,
unsigned JTI) {
return buildInstr(TargetOpcode::G_JUMP_TABLE, {PtrTy}, {})
@@ -229,17 +238,25 @@ MachineIRBuilder::buildPadVectorWithUndefElements(const DstOp &Res,
LLT ResTy = Res.getLLTTy(*getMRI());
LLT Op0Ty = Op0.getLLTTy(*getMRI());
- assert((ResTy.isVector() && Op0Ty.isVector()) && "Non vector type");
- assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
- "Different vector element types");
- assert((ResTy.getNumElements() > Op0Ty.getNumElements()) &&
- "Op0 has more elements");
+ assert(ResTy.isVector() && "Res non vector type");
- auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
SmallVector<Register, 8> Regs;
- for (auto Op : Unmerge.getInstr()->defs())
- Regs.push_back(Op.getReg());
- Register Undef = buildUndef(Op0Ty.getElementType()).getReg(0);
+ if (Op0Ty.isVector()) {
+ assert((ResTy.getElementType() == Op0Ty.getElementType()) &&
+ "Different vector element types");
+ assert((ResTy.getNumElements() > Op0Ty.getNumElements()) &&
+ "Op0 has more elements");
+ auto Unmerge = buildUnmerge(Op0Ty.getElementType(), Op0);
+
+ for (auto Op : Unmerge.getInstr()->defs())
+ Regs.push_back(Op.getReg());
+ } else {
+ assert((ResTy.getSizeInBits() > Op0Ty.getSizeInBits()) &&
+ "Op0 has more size");
+ Regs.push_back(Op0.getReg());
+ }
+ Register Undef =
+ buildUndef(Op0Ty.isVector() ? Op0Ty.getElementType() : Op0Ty).getReg(0);
unsigned NumberOfPadElts = ResTy.getNumElements() - Regs.size();
for (unsigned i = 0; i < NumberOfPadElts; ++i)
Regs.push_back(Undef);
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 080f3ca540f2..885a1056b2ea 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -69,8 +69,8 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
"Assign register bank of generic virtual registers", false,
false)
-RegBankSelect::RegBankSelect(Mode RunningMode)
- : MachineFunctionPass(ID), OptMode(RunningMode) {
+RegBankSelect::RegBankSelect(char &PassID, Mode RunningMode)
+ : MachineFunctionPass(PassID), OptMode(RunningMode) {
if (RegBankSelectMode.getNumOccurrences() != 0) {
OptMode = RegBankSelectMode;
if (RegBankSelectMode != RunningMode)
@@ -162,8 +162,10 @@ bool RegBankSelect::repairReg(
MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY)
.addDef(Dst)
.addUse(Src);
- LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
- << '\n');
+ LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << ':'
+ << printRegClassOrBank(Src, *MRI, TRI)
+ << " to: " << printReg(Dst) << ':'
+ << printRegClassOrBank(Dst, *MRI, TRI) << '\n');
} else {
// TODO: Support with G_IMPLICIT_DEF + G_INSERT sequence or G_EXTRACT
// sequence.
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 07448548c295..080600d3cc98 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -230,10 +230,7 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
return false;
// Instructions without side-effects are dead iff they only define dead vregs.
- for (const auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
-
+ for (const auto &MO : MI.all_defs()) {
Register Reg = MO.getReg();
if (Reg.isPhysical() || !MRI.use_nodbg_empty(Reg))
return false;
@@ -711,14 +708,14 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
Align llvm::inferAlignFromPtrInfo(MachineFunction &MF,
const MachinePointerInfo &MPO) {
- auto PSV = MPO.V.dyn_cast<const PseudoSourceValue *>();
+ auto PSV = dyn_cast_if_present<const PseudoSourceValue *>(MPO.V);
if (auto FSPV = dyn_cast_or_null<FixedStackPseudoSourceValue>(PSV)) {
MachineFrameInfo &MFI = MF.getFrameInfo();
return commonAlignment(MFI.getObjectAlign(FSPV->getFrameIndex()),
MPO.Offset);
}
- if (const Value *V = MPO.V.dyn_cast<const Value *>()) {
+ if (const Value *V = dyn_cast_if_present<const Value *>(MPO.V)) {
const Module *M = MF.getFunction().getParent();
return V->getPointerAlignment(M->getDataLayout());
}
@@ -797,7 +794,7 @@ llvm::ConstantFoldCTLZ(Register Src, const MachineRegisterInfo &MRI) {
auto MaybeCst = getIConstantVRegVal(R, MRI);
if (!MaybeCst)
return std::nullopt;
- return MaybeCst->countLeadingZeros();
+ return MaybeCst->countl_zero();
};
if (Ty.isVector()) {
// Try to constant fold each element.
diff --git a/llvm/lib/CodeGen/GlobalMerge.cpp b/llvm/lib/CodeGen/GlobalMerge.cpp
index 2ccf2def48f8..f259cbc1d788 100644
--- a/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -67,7 +67,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/BasicBlock.h"
@@ -92,6 +91,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -156,7 +156,7 @@ namespace {
/// Whether we should merge global variables that have external linkage.
bool MergeExternalGlobals = false;
- bool IsMachO;
+ bool IsMachO = false;
bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const;
@@ -652,6 +652,14 @@ bool GlobalMerge::doInitialization(Module &M) {
if (isMustKeepGlobalVariable(&GV))
continue;
+ // Don't merge tagged globals, as each global should have its own unique
+ // memory tag at runtime. TODO(hctim): This can be relaxed: constant globals
+ // with compatible alignment and the same contents may be merged as long as
+ // the globals occupy the same number of tag granules (i.e. `size_a / 16 ==
+ // size_b / 16`).
+ if (GV.isTagged())
+ continue;
+
Type *Ty = GV.getValueType();
if (DL.getTypeAllocSize(Ty) < MaxOffset) {
if (TM &&
diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp
index 258ad1931b12..e7b14d700a44 100644
--- a/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -15,8 +15,10 @@
///
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/HardwareLoops.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -115,12 +117,12 @@ namespace {
using TTI = TargetTransformInfo;
- class HardwareLoops : public FunctionPass {
+ class HardwareLoopsLegacy : public FunctionPass {
public:
static char ID;
- HardwareLoops() : FunctionPass(ID) {
- initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
+ HardwareLoopsLegacy() : FunctionPass(ID) {
+ initializeHardwareLoopsLegacyPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -131,29 +133,44 @@ namespace {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ AU.addPreserved<BranchProbabilityInfoWrapperPass>();
}
+ };
+
+ class HardwareLoopsImpl {
+ public:
+ HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
+ DominatorTree &DT, const DataLayout &DL,
+ const TargetTransformInfo &TTI, TargetLibraryInfo *TLI,
+ AssumptionCache &AC, OptimizationRemarkEmitter *ORE,
+ HardwareLoopOptions &Opts)
+ : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),
+ TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }
+ bool run(Function &F);
+
+ private:
// Try to convert the given Loop into a hardware loop.
- bool TryConvertLoop(Loop *L);
+ bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
// Given that the target believes the loop to be profitable, try to
// convert it.
bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
- private:
- ScalarEvolution *SE = nullptr;
- LoopInfo *LI = nullptr;
- const DataLayout *DL = nullptr;
- OptimizationRemarkEmitter *ORE = nullptr;
- const TargetTransformInfo *TTI = nullptr;
- DominatorTree *DT = nullptr;
- bool PreserveLCSSA = false;
- AssumptionCache *AC = nullptr;
- TargetLibraryInfo *LibInfo = nullptr;
- Module *M = nullptr;
+ ScalarEvolution &SE;
+ LoopInfo &LI;
+ bool PreserveLCSSA;
+ DominatorTree &DT;
+ const DataLayout &DL;
+ const TargetTransformInfo &TTI;
+ TargetLibraryInfo *TLI = nullptr;
+ AssumptionCache &AC;
+ OptimizationRemarkEmitter *ORE;
+ HardwareLoopOptions &Opts;
bool MadeChange = false;
};
@@ -182,8 +199,9 @@ namespace {
public:
HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
const DataLayout &DL,
- OptimizationRemarkEmitter *ORE) :
- SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
+ OptimizationRemarkEmitter *ORE,
+ HardwareLoopOptions &Opts) :
+ SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),
ExitCount(Info.ExitCount),
CountType(Info.CountType),
ExitBranch(Info.ExitBranch),
@@ -197,6 +215,7 @@ namespace {
ScalarEvolution &SE;
const DataLayout &DL;
OptimizationRemarkEmitter *ORE = nullptr;
+ HardwareLoopOptions &Opts;
Loop *L = nullptr;
Module *M = nullptr;
const SCEV *ExitCount = nullptr;
@@ -209,40 +228,83 @@ namespace {
};
}
-char HardwareLoops::ID = 0;
+char HardwareLoopsLegacy::ID = 0;
-bool HardwareLoops::runOnFunction(Function &F) {
+bool HardwareLoopsLegacy::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- DL = &F.getParent()->getDataLayout();
- ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto &DL = F.getParent()->getDataLayout();
+ auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
- PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- M = F.getParent();
+ auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ HardwareLoopOptions Opts;
+ if (ForceHardwareLoops.getNumOccurrences())
+ Opts.setForce(ForceHardwareLoops);
+ if (ForceHardwareLoopPHI.getNumOccurrences())
+ Opts.setForcePhi(ForceHardwareLoopPHI);
+ if (ForceNestedLoop.getNumOccurrences())
+ Opts.setForceNested(ForceNestedLoop);
+ if (ForceGuardLoopEntry.getNumOccurrences())
+ Opts.setForceGuard(ForceGuardLoopEntry);
+ if (LoopDecrement.getNumOccurrences())
+ Opts.setDecrement(LoopDecrement);
+ if (CounterBitWidth.getNumOccurrences())
+ Opts.setCounterBitwidth(CounterBitWidth);
- for (Loop *L : *LI)
- if (L->isOutermost())
- TryConvertLoop(L);
+ HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,
+ Opts);
+ return Impl.run(F);
+}
+
+PreservedAnalyses HardwareLoopsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ auto &DL = F.getParent()->getDataLayout();
+
+ HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
+ bool Changed = Impl.run(F);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<BranchProbabilityAnalysis>();
+ return PA;
+}
+bool HardwareLoopsImpl::run(Function &F) {
+ LLVMContext &Ctx = F.getParent()->getContext();
+ for (Loop *L : LI)
+ if (L->isOutermost())
+ TryConvertLoop(L, Ctx);
return MadeChange;
}
// Return true if the search should stop, which will be when an inner loop is
// converted and the parent loop doesn't support containing a hardware loop.
-bool HardwareLoops::TryConvertLoop(Loop *L) {
+bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
// Process nested loops first.
bool AnyChanged = false;
for (Loop *SL : *L)
- AnyChanged |= TryConvertLoop(SL);
+ AnyChanged |= TryConvertLoop(SL, Ctx);
if (AnyChanged) {
reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
ORE, L);
@@ -252,39 +314,39 @@ bool HardwareLoops::TryConvertLoop(Loop *L) {
LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
HardwareLoopInfo HWLoopInfo(L);
- if (!HWLoopInfo.canAnalyze(*LI)) {
+ if (!HWLoopInfo.canAnalyze(LI)) {
reportHWLoopFailure("cannot analyze loop, irreducible control flow",
"HWLoopCannotAnalyze", ORE, L);
return false;
}
- if (!ForceHardwareLoops &&
- !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
+ if (!Opts.Force &&
+ !TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
reportHWLoopFailure("it's not profitable to create a hardware-loop",
"HWLoopNotProfitable", ORE, L);
return false;
}
// Allow overriding of the counter width and loop decrement value.
- if (CounterBitWidth.getNumOccurrences())
- HWLoopInfo.CountType =
- IntegerType::get(M->getContext(), CounterBitWidth);
+ if (Opts.Bitwidth.has_value()) {
+ HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value());
+ }
- if (LoopDecrement.getNumOccurrences())
+ if (Opts.Decrement.has_value())
HWLoopInfo.LoopDecrement =
- ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
+ ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());
MadeChange |= TryConvertLoop(HWLoopInfo);
- return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
+ return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
}
-bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
+bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
Loop *L = HWLoopInfo.L;
LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
- if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
- ForceHardwareLoopPHI)) {
+ if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(),
+ Opts.getForcePhi())) {
// TODO: there can be many reasons a loop is not considered a
// candidate, so we should let isHardwareLoopCandidate fill in the
// reason and then report a better message here.
@@ -300,11 +362,11 @@ bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
// If we don't have a preheader, then insert one.
if (!Preheader)
- Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
+ Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA);
if (!Preheader)
return false;
- HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
+ HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);
HWLoop.Create();
++NumHWLoops;
return true;
@@ -322,7 +384,7 @@ void HardwareLoop::Create() {
Value *Setup = InsertIterationSetup(LoopCountInit);
- if (UsePHICounter || ForceHardwareLoopPHI) {
+ if (UsePHICounter || Opts.ForcePhi) {
Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
Value *EltsRem = InsertPHICounter(Setup, LoopDec);
LoopDec->setOperand(0, EltsRem);
@@ -397,7 +459,8 @@ Value *HardwareLoop::InitLoopCount() {
if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
SE.getZero(ExitCount->getType()))) {
LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
- UseLoopGuard |= ForceGuardLoopEntry;
+ if (Opts.ForceGuard)
+ UseLoopGuard = true;
} else
UseLoopGuard = false;
@@ -441,7 +504,7 @@ Value *HardwareLoop::InitLoopCount() {
Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
IRBuilder<> Builder(BeginBB->getTerminator());
Type *Ty = LoopCountInit->getType();
- bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
+ bool UsePhi = UsePHICounter || Opts.ForcePhi;
Intrinsic::ID ID = UseLoopGuard
? (UsePhi ? Intrinsic::test_start_loop_iterations
: Intrinsic::test_set_loop_iterations)
@@ -533,11 +596,11 @@ void HardwareLoop::UpdateBranch(Value *EltsRem) {
RecursivelyDeleteTriviallyDeadInstructions(OldCond);
}
-INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
-FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
+FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp
index 105ab908d3fa..2ad5820bd9fb 100644
--- a/llvm/lib/CodeGen/IfConversion.cpp
+++ b/llvm/lib/CodeGen/IfConversion.cpp
@@ -71,8 +71,6 @@ static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
cl::init(false), cl::Hidden);
static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
- cl::init(false), cl::Hidden);
static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
cl::init(false), cl::Hidden);
static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond",
@@ -189,16 +187,16 @@ namespace {
std::vector<BBInfo> BBAnalysis;
TargetSchedModel SchedModel;
- const TargetLoweringBase *TLI;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const MachineBranchProbabilityInfo *MBPI;
- MachineRegisterInfo *MRI;
+ const TargetLoweringBase *TLI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const MachineBranchProbabilityInfo *MBPI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
LivePhysRegs Redefs;
- bool PreRegAlloc;
- bool MadeChange;
+ bool PreRegAlloc = true;
+ bool MadeChange = false;
int FnNum = -1;
std::function<bool(const MachineFunction &)> PredicateFtor;
@@ -532,7 +530,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (DisableTriangle && !isFalse && !isRev) break;
if (DisableTriangleR && !isFalse && isRev) break;
if (DisableTriangleF && isFalse && !isRev) break;
- if (DisableTriangleFR && isFalse && isRev) break;
LLVM_DEBUG(dbgs() << "Ifcvt (Triangle");
if (isFalse)
LLVM_DEBUG(dbgs() << " false");
@@ -1512,19 +1509,9 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
MIB.addReg(Reg, RegState::Implicit | RegState::Define);
continue;
}
- if (LiveBeforeMI.count(Reg))
+ if (any_of(TRI->subregs_inclusive(Reg),
+ [&](MCPhysReg S) { return LiveBeforeMI.count(S); }))
MIB.addReg(Reg, RegState::Implicit);
- else {
- bool HasLiveSubReg = false;
- for (MCSubRegIterator S(Reg, TRI); S.isValid(); ++S) {
- if (!LiveBeforeMI.count(*S))
- continue;
- HasLiveSubReg = true;
- break;
- }
- if (HasLiveSubReg)
- MIB.addReg(Reg, RegState::Implicit);
- }
}
}
@@ -1958,17 +1945,15 @@ bool IfConverter::IfConvertDiamondCommon(
} else if (!RedefsByFalse.count(Reg)) {
// These are defined before ctrl flow reach the 'false' instructions.
// They cannot be modified by the 'true' instructions.
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- ExtUses.insert(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ ExtUses.insert(SubReg);
}
}
for (MCPhysReg Reg : Defs) {
if (!ExtUses.count(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- RedefsByFalse.insert(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ RedefsByFalse.insert(SubReg);
}
}
}
@@ -2244,6 +2229,15 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
assert(!FromMBB.hasAddressTaken() &&
"Removing a BB whose address is taken!");
+ // If we're about to splice an INLINEASM_BR from FromBBI, we need to update
+ // ToBBI's successor list accordingly.
+ if (FromMBB.mayHaveInlineAsmBr())
+ for (MachineInstr &MI : FromMBB)
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
+ for (MachineOperand &MO : MI.operands())
+ if (MO.isMBB() && !ToBBI.BB->isSuccessor(MO.getMBB()))
+ ToBBI.BB->addSuccessor(MO.getMBB(), BranchProbability::getZero());
+
// In case FromMBB contains terminators (e.g. return instruction),
// first move the non-terminator instructions, then the terminators.
MachineBasicBlock::iterator FromTI = FromMBB.getFirstTerminator();
diff --git a/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index fa493af0eea7..b2a7aad73411 100644
--- a/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -94,7 +94,7 @@ class ImplicitNullChecks : public MachineFunctionPass {
/// computeDependence).
bool CanReorder;
- /// If non-None, then an instruction in \p Insts that also must be
+ /// If non-std::nullopt, then an instruction in \p Insts that also must be
/// hoisted.
std::optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence;
@@ -778,9 +778,7 @@ void ImplicitNullChecks::rewriteNullChecks(
// The original operation may define implicit-defs alongside
// the value.
MachineBasicBlock *MBB = NC.getMemOperation()->getParent();
- for (const MachineOperand &MO : FaultingInstr->operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
+ for (const MachineOperand &MO : FaultingInstr->all_defs()) {
Register Reg = MO.getReg();
if (!Reg || MBB->isLiveIn(Reg))
continue;
@@ -788,8 +786,8 @@ void ImplicitNullChecks::rewriteNullChecks(
}
if (auto *DepMI = NC.getOnlyDependency()) {
- for (auto &MO : DepMI->operands()) {
- if (!MO.isReg() || !MO.getReg() || !MO.isDef() || MO.isDead())
+ for (auto &MO : DepMI->all_defs()) {
+ if (!MO.getReg() || MO.isDead())
continue;
if (!NC.getNotNullSucc()->isLiveIn(MO.getReg()))
NC.getNotNullSucc()->addLiveIn(MO.getReg());
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index cf4fff878ad1..c62f3db9d321 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -165,8 +165,8 @@ class InlineSpiller : public Spiller {
const MachineBlockFrequencyInfo &MBFI;
// Variables that are valid during spill(), but used by multiple methods.
- LiveRangeEdit *Edit;
- LiveInterval *StackInt;
+ LiveRangeEdit *Edit = nullptr;
+ LiveInterval *StackInt = nullptr;
int StackSlot;
Register Original;
@@ -175,6 +175,7 @@ class InlineSpiller : public Spiller {
// All COPY instructions to/from snippets.
// They are ignored since both operands refer to the same stack slot.
+ // For bundled copies, this will only include the first header copy.
SmallPtrSet<MachineInstr*, 8> SnippetCopies;
// Values that failed to remat at some point.
@@ -257,19 +258,64 @@ Spiller *llvm::createInlineSpiller(MachineFunctionPass &Pass,
/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
/// otherwise return 0.
-static Register isFullCopyOf(const MachineInstr &MI, Register Reg) {
- if (!MI.isFullCopy())
+static Register isCopyOf(const MachineInstr &MI, Register Reg,
+ const TargetInstrInfo &TII) {
+ if (!TII.isCopyInstr(MI))
return Register();
- if (MI.getOperand(0).getReg() == Reg)
- return MI.getOperand(1).getReg();
- if (MI.getOperand(1).getReg() == Reg)
- return MI.getOperand(0).getReg();
+
+ const MachineOperand &DstOp = MI.getOperand(0);
+ const MachineOperand &SrcOp = MI.getOperand(1);
+
+ // TODO: Probably only worth allowing subreg copies with undef dests.
+ if (DstOp.getSubReg() != SrcOp.getSubReg())
+ return Register();
+ if (DstOp.getReg() == Reg)
+ return SrcOp.getReg();
+ if (SrcOp.getReg() == Reg)
+ return DstOp.getReg();
+ return Register();
+}
+
+/// Check for a copy bundle as formed by SplitKit.
+static Register isCopyOfBundle(const MachineInstr &FirstMI, Register Reg,
+ const TargetInstrInfo &TII) {
+ if (!FirstMI.isBundled())
+ return isCopyOf(FirstMI, Reg, TII);
+
+ assert(!FirstMI.isBundledWithPred() && FirstMI.isBundledWithSucc() &&
+ "expected to see first instruction in bundle");
+
+ Register SnipReg;
+ MachineBasicBlock::const_instr_iterator I = FirstMI.getIterator();
+ while (I->isBundledWithSucc()) {
+ const MachineInstr &MI = *I;
+ auto CopyInst = TII.isCopyInstr(MI);
+ if (!CopyInst)
+ return Register();
+
+ const MachineOperand &DstOp = *CopyInst->Destination;
+ const MachineOperand &SrcOp = *CopyInst->Source;
+ if (DstOp.getReg() == Reg) {
+ if (!SnipReg)
+ SnipReg = SrcOp.getReg();
+ else if (SnipReg != SrcOp.getReg())
+ return Register();
+ } else if (SrcOp.getReg() == Reg) {
+ if (!SnipReg)
+ SnipReg = DstOp.getReg();
+ else if (SnipReg != DstOp.getReg())
+ return Register();
+ }
+
+ ++I;
+ }
+
return Register();
}
static void getVDefInterval(const MachineInstr &MI, LiveIntervals &LIS) {
- for (const MachineOperand &MO : MI.operands())
- if (MO.isReg() && MO.isDef() && MO.getReg().isVirtual())
+ for (const MachineOperand &MO : MI.all_defs())
+ if (MO.getReg().isVirtual())
LIS.getInterval(MO.getReg());
}
@@ -307,14 +353,14 @@ bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) {
MachineInstr *UseMI = nullptr;
// Check that all uses satisfy our criteria.
- for (MachineRegisterInfo::reg_instr_nodbg_iterator
- RI = MRI.reg_instr_nodbg_begin(SnipLI.reg()),
- E = MRI.reg_instr_nodbg_end();
+ for (MachineRegisterInfo::reg_bundle_nodbg_iterator
+ RI = MRI.reg_bundle_nodbg_begin(SnipLI.reg()),
+ E = MRI.reg_bundle_nodbg_end();
RI != E;) {
MachineInstr &MI = *RI++;
// Allow copies to/from Reg.
- if (isFullCopyOf(MI, Reg))
+ if (isCopyOfBundle(MI, Reg, TII))
continue;
// Allow stack slot loads.
@@ -351,9 +397,8 @@ void InlineSpiller::collectRegsToSpill() {
if (Original == Reg)
return;
- for (MachineInstr &MI :
- llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {
- Register SnipReg = isFullCopyOf(MI, Reg);
+ for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_bundles(Reg))) {
+ Register SnipReg = isCopyOfBundle(MI, Reg, TII);
if (!isSibling(SnipReg))
continue;
LiveInterval &SnipLI = LIS.getInterval(SnipReg);
@@ -475,21 +520,22 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) {
// Find all spills and copies of VNI.
for (MachineInstr &MI :
- llvm::make_early_inc_range(MRI.use_nodbg_instructions(Reg))) {
- if (!MI.isCopy() && !MI.mayStore())
+ llvm::make_early_inc_range(MRI.use_nodbg_bundles(Reg))) {
+ if (!MI.mayStore() && !TII.isCopyInstr(MI))
continue;
SlotIndex Idx = LIS.getInstructionIndex(MI);
if (LI->getVNInfoAt(Idx) != VNI)
continue;
// Follow sibling copies down the dominator tree.
- if (Register DstReg = isFullCopyOf(MI, Reg)) {
+ if (Register DstReg = isCopyOfBundle(MI, Reg, TII)) {
if (isSibling(DstReg)) {
- LiveInterval &DstLI = LIS.getInterval(DstReg);
- VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
- assert(DstVNI && "Missing defined value");
- assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
- WorkList.push_back(std::make_pair(&DstLI, DstVNI));
+ LiveInterval &DstLI = LIS.getInterval(DstReg);
+ VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot());
+ assert(DstVNI && "Missing defined value");
+ assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot");
+
+ WorkList.push_back(std::make_pair(&DstLI, DstVNI));
}
continue;
}
@@ -593,8 +639,8 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
if (!ParentVNI) {
LLVM_DEBUG(dbgs() << "\tadding <undef> flags: ");
- for (MachineOperand &MO : MI.operands())
- if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg())
+ for (MachineOperand &MO : MI.all_uses())
+ if (MO.getReg() == VirtReg.reg())
MO.setIsUndef();
LLVM_DEBUG(dbgs() << UseIdx << '\t' << MI);
return true;
@@ -826,7 +872,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
if (Ops.back().first != MI || MI->isBundled())
return false;
- bool WasCopy = MI->isCopy();
+ bool WasCopy = TII.isCopyInstr(*MI).has_value();
Register ImpReg;
// TII::foldMemoryOperand will do what we need here for statepoint
@@ -1111,7 +1157,7 @@ void InlineSpiller::spillAroundUses(Register Reg) {
Idx = VNI->def;
// Check for a sibling copy.
- Register SibReg = isFullCopyOf(MI, Reg);
+ Register SibReg = isCopyOfBundle(MI, Reg, TII);
if (SibReg && isSibling(SibReg)) {
// This may actually be a copy between snippets.
if (isRegToSpill(SibReg)) {
@@ -1202,8 +1248,8 @@ void InlineSpiller::spillAll() {
llvm::make_early_inc_range(MRI.reg_instructions(Reg))) {
assert(SnippetCopies.count(&MI) && "Remaining use wasn't a snippet copy");
// FIXME: Do this with a LiveRangeEdit callback.
- LIS.RemoveMachineInstrFromMaps(MI);
- MI.eraseFromParent();
+ LIS.getSlotIndexes()->removeSingleMachineInstrFromMaps(MI);
+ MI.eraseFromBundle();
}
}
@@ -1250,7 +1296,7 @@ void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot,
LiveInterval &OrigLI = LIS.getInterval(Original);
// save a copy of LiveInterval in StackSlotToOrigLI because the original
// LiveInterval may be cleared after all its references are spilled.
- if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) {
+ if (!StackSlotToOrigLI.contains(StackSlot)) {
auto LI = std::make_unique<LiveInterval>(OrigLI.reg(), OrigLI.weight());
LI->assign(OrigLI, Allocator);
StackSlotToOrigLI[StackSlot] = std::move(LI);
@@ -1459,7 +1505,7 @@ void HoistSpillHelper::runHoistSpills(
MachineBasicBlock *Block = (*RIt)->getBlock();
// If Block contains an original spill, simply continue.
- if (SpillsToKeep.find(*RIt) != SpillsToKeep.end() && !SpillsToKeep[*RIt]) {
+ if (SpillsToKeep.contains(*RIt) && !SpillsToKeep[*RIt]) {
SpillsInSubTreeMap[*RIt].first.insert(*RIt);
// SpillsInSubTreeMap[*RIt].second contains the cost of spill.
SpillsInSubTreeMap[*RIt].second = MBFI.getBlockFreq(Block);
@@ -1469,7 +1515,7 @@ void HoistSpillHelper::runHoistSpills(
// Collect spills in subtree of current node (*RIt) to
// SpillsInSubTreeMap[*RIt].first.
for (MachineDomTreeNode *Child : (*RIt)->children()) {
- if (SpillsInSubTreeMap.find(Child) == SpillsInSubTreeMap.end())
+ if (!SpillsInSubTreeMap.contains(Child))
continue;
// The stmt "SpillsInSubTree = SpillsInSubTreeMap[*RIt].first" below
// should be placed before getting the begin and end iterators of
@@ -1508,8 +1554,7 @@ void HoistSpillHelper::runHoistSpills(
for (auto *const SpillBB : SpillsInSubTree) {
// When SpillBB is a BB contains original spill, insert the spill
// to SpillsToRm.
- if (SpillsToKeep.find(SpillBB) != SpillsToKeep.end() &&
- !SpillsToKeep[SpillBB]) {
+ if (SpillsToKeep.contains(SpillBB) && !SpillsToKeep[SpillBB]) {
MachineInstr *SpillToRm = SpillBBToSpill[SpillBB];
SpillsToRm.push_back(SpillToRm);
}
diff --git a/llvm/lib/CodeGen/InterferenceCache.cpp b/llvm/lib/CodeGen/InterferenceCache.cpp
index 3cab9e5734ee..ae197ee5553a 100644
--- a/llvm/lib/CodeGen/InterferenceCache.cpp
+++ b/llvm/lib/CodeGen/InterferenceCache.cpp
@@ -93,8 +93,8 @@ void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray,
// Invalidate all iterators.
PrevPos = SlotIndex();
unsigned i = 0;
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i)
- RegUnits[i].VirtTag = LIUArray[*Units].getTag();
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ RegUnits[i++].VirtTag = LIUArray[Unit].getTag();
}
void InterferenceCache::Entry::reset(MCRegister physReg,
@@ -110,20 +110,21 @@ void InterferenceCache::Entry::reset(MCRegister physReg,
// Reset iterators.
PrevPos = SlotIndex();
RegUnits.clear();
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- RegUnits.push_back(LIUArray[*Units]);
- RegUnits.back().Fixed = &LIS->getRegUnit(*Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ RegUnits.push_back(LIUArray[Unit]);
+ RegUnits.back().Fixed = &LIS->getRegUnit(Unit);
}
}
bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray,
const TargetRegisterInfo *TRI) {
unsigned i = 0, e = RegUnits.size();
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
if (i == e)
return false;
- if (LIUArray[*Units].changedSince(RegUnits[i].VirtTag))
+ if (LIUArray[Unit].changedSince(RegUnits[i].VirtTag))
return false;
+ ++i;
}
return i == e;
}
diff --git a/llvm/lib/CodeGen/InterferenceCache.h b/llvm/lib/CodeGen/InterferenceCache.h
index 97464da9f17b..2a176b4f2cf7 100644
--- a/llvm/lib/CodeGen/InterferenceCache.h
+++ b/llvm/lib/CodeGen/InterferenceCache.h
@@ -54,7 +54,7 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
unsigned RefCount = 0;
/// MF - The current function.
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
/// Indexes - Mapping block numbers to SlotIndex ranges.
SlotIndexes *Indexes = nullptr;
@@ -156,7 +156,8 @@ class LLVM_LIBRARY_VISIBILITY InterferenceCache {
public:
InterferenceCache() = default;
-
+ InterferenceCache &operator=(const InterferenceCache &other) = delete;
+ InterferenceCache(const InterferenceCache &other) = delete;
~InterferenceCache() {
free(PhysRegEntries);
}
diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 0582378be4cd..6b3848531569 100644
--- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -58,6 +58,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
@@ -103,7 +104,7 @@ private:
const TargetLowering *TLI = nullptr;
/// The maximum supported interleave factor.
- unsigned MaxFactor;
+ unsigned MaxFactor = 0u;
/// Transform an interleaved load into target specific intrinsics.
bool lowerInterleavedLoad(LoadInst *LI,
@@ -113,6 +114,16 @@ private:
bool lowerInterleavedStore(StoreInst *SI,
SmallVector<Instruction *, 32> &DeadInsts);
+ /// Transform a load and a deinterleave intrinsic into target specific
+ /// instructions.
+ bool lowerDeinterleaveIntrinsic(IntrinsicInst *II,
+ SmallVector<Instruction *, 32> &DeadInsts);
+
+ /// Transform an interleave intrinsic and a store into target specific
+ /// instructions.
+ bool lowerInterleaveIntrinsic(IntrinsicInst *II,
+ SmallVector<Instruction *, 32> &DeadInsts);
+
/// Returns true if the uses of an interleaved load by the
/// extractelement instructions in \p Extracts can be replaced by uses of the
/// shufflevector instructions in \p Shuffles instead. If so, the necessary
@@ -202,86 +213,15 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
/// The particular case of an RE-interleave mask is:
/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
-static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
- unsigned MaxFactor, unsigned OpNumElts) {
- unsigned NumElts = Mask.size();
+static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
+ unsigned MaxFactor) {
+ unsigned NumElts = SVI->getShuffleMask().size();
if (NumElts < 4)
return false;
// Check potential Factors.
for (Factor = 2; Factor <= MaxFactor; Factor++) {
- if (NumElts % Factor)
- continue;
-
- unsigned LaneLen = NumElts / Factor;
- if (!isPowerOf2_32(LaneLen))
- continue;
-
- // Check whether each element matches the general interleaved rule.
- // Ignore undef elements, as long as the defined elements match the rule.
- // Outer loop processes all factors (x, y, z in the above example)
- unsigned I = 0, J;
- for (; I < Factor; I++) {
- unsigned SavedLaneValue;
- unsigned SavedNoUndefs = 0;
-
- // Inner loop processes consecutive accesses (x, x+1... in the example)
- for (J = 0; J < LaneLen - 1; J++) {
- // Lane computes x's position in the Mask
- unsigned Lane = J * Factor + I;
- unsigned NextLane = Lane + Factor;
- int LaneValue = Mask[Lane];
- int NextLaneValue = Mask[NextLane];
-
- // If both are defined, values must be sequential
- if (LaneValue >= 0 && NextLaneValue >= 0 &&
- LaneValue + 1 != NextLaneValue)
- break;
-
- // If the next value is undef, save the current one as reference
- if (LaneValue >= 0 && NextLaneValue < 0) {
- SavedLaneValue = LaneValue;
- SavedNoUndefs = 1;
- }
-
- // Undefs are allowed, but defined elements must still be consecutive:
- // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
- // Verify this by storing the last non-undef followed by an undef
- // Check that following non-undef masks are incremented with the
- // corresponding distance.
- if (SavedNoUndefs > 0 && LaneValue < 0) {
- SavedNoUndefs++;
- if (NextLaneValue >= 0 &&
- SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
- break;
- }
- }
-
- if (J < LaneLen - 1)
- break;
-
- int StartMask = 0;
- if (Mask[I] >= 0) {
- // Check that the start of the I range (J=0) is greater than 0
- StartMask = Mask[I];
- } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
- // StartMask defined by the last value in lane
- StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
- } else if (SavedNoUndefs > 0) {
- // StartMask defined by some non-zero value in the j loop
- StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
- }
- // else StartMask remains set to 0, i.e. all elements are undefs
-
- if (StartMask < 0)
- break;
- // We must stay within the vectors; This case can happen with undefs.
- if (StartMask + LaneLen > OpNumElts*2)
- break;
- }
-
- // Found an interleaved mask of current factor.
- if (I == Factor)
+ if (SVI->isInterleave(Factor))
return true;
}
@@ -311,8 +251,10 @@ bool InterleavedAccess::lowerInterleavedLoad(
continue;
}
if (auto *BI = dyn_cast<BinaryOperator>(User)) {
- if (all_of(BI->users(),
- [](auto *U) { return isa<ShuffleVectorInst>(U); })) {
+ if (all_of(BI->users(), [](auto *U) {
+ auto *SVI = dyn_cast<ShuffleVectorInst>(U);
+ return SVI && isa<UndefValue>(SVI->getOperand(1));
+ })) {
for (auto *SVI : BI->users())
BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI));
continue;
@@ -500,9 +442,7 @@ bool InterleavedAccess::lowerInterleavedStore(
// Check if the shufflevector is RE-interleave shuffle.
unsigned Factor;
- unsigned OpNumElts =
- cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements();
- if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor, OpNumElts))
+ if (!isReInterleaveMask(SVI, Factor, MaxFactor))
return false;
LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
@@ -517,6 +457,47 @@ bool InterleavedAccess::lowerInterleavedStore(
return true;
}
+bool InterleavedAccess::lowerDeinterleaveIntrinsic(
+ IntrinsicInst *DI, SmallVector<Instruction *, 32> &DeadInsts) {
+ LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0));
+
+ if (!LI || !LI->hasOneUse() || !LI->isSimple())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n");
+
+ // Try and match this with target specific intrinsics.
+ if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI))
+ return false;
+
+ // We now have a target-specific load, so delete the old one.
+ DeadInsts.push_back(DI);
+ DeadInsts.push_back(LI);
+ return true;
+}
+
+bool InterleavedAccess::lowerInterleaveIntrinsic(
+ IntrinsicInst *II, SmallVector<Instruction *, 32> &DeadInsts) {
+ if (!II->hasOneUse())
+ return false;
+
+ StoreInst *SI = dyn_cast<StoreInst>(*(II->users().begin()));
+
+ if (!SI || !SI->isSimple())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
+
+ // Try and match this with target specific intrinsics.
+ if (!TLI->lowerInterleaveIntrinsicToStore(II, SI))
+ return false;
+
+ // We now have a target-specific store, so delete the old one.
+ DeadInsts.push_back(SI);
+ DeadInsts.push_back(II);
+ return true;
+}
+
bool InterleavedAccess::runOnFunction(Function &F) {
auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
if (!TPC || !LowerInterleavedAccesses)
@@ -539,6 +520,15 @@ bool InterleavedAccess::runOnFunction(Function &F) {
if (auto *SI = dyn_cast<StoreInst>(&I))
Changed |= lowerInterleavedStore(SI, DeadInsts);
+
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ // At present, we only have intrinsics to represent (de)interleaving
+ // with a factor of 2.
+ if (II->getIntrinsicID() == Intrinsic::experimental_vector_deinterleave2)
+ Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
+ if (II->getIntrinsicID() == Intrinsic::experimental_vector_interleave2)
+ Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
+ }
}
for (auto *I : DeadInsts)
diff --git a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 0d36badfa10f..d0ad6e45b4d3 100644
--- a/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/llvm/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -318,7 +318,7 @@ public:
// See Proof(2): Trailing zero bits indicate a left shift. This removes
// leading bits from the result even if they are undefined.
- decErrorMSBs(C.countTrailingZeros());
+ decErrorMSBs(C.countr_zero());
A *= C;
pushBOperation(Mul, C);
@@ -475,7 +475,7 @@ public:
//
// If this can be proven add shiftAmt to the error counter
// `ErrorMSBs`. Otherwise set all bits as undefined.
- if (A.countTrailingZeros() < shiftAmt)
+ if (A.countr_zero() < shiftAmt)
ErrorMSBs = A.getBitWidth();
else
incErrorMSBs(shiftAmt);
@@ -678,6 +678,8 @@ public:
EI = new ElementInfo[VTy->getNumElements()];
}
+ VectorInfo &operator=(const VectorInfo &other) = delete;
+
virtual ~VectorInfo() { delete[] EI; }
unsigned getDimension() const { return VTy->getNumElements(); }
diff --git a/llvm/lib/Target/AArch64/AArch64KCFI.cpp b/llvm/lib/CodeGen/KCFI.cpp
index 271001cb71a6..bffa02ca8afd 100644
--- a/llvm/lib/Target/AArch64/AArch64KCFI.cpp
+++ b/llvm/lib/CodeGen/KCFI.cpp
@@ -1,4 +1,4 @@
-//===---- AArch64KCFI.cpp - Implements KCFI -------------------------------===//
+//===---- KCFI.cpp - Implements Kernel Control-Flow Integrity (KCFI) ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,40 +6,46 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements KCFI indirect call checking.
+// This pass implements Kernel Control-Flow Integrity (KCFI) indirect call
+// check lowering. For each call instruction with a cfi-type attribute, it
+// emits an arch-specific check before the call, and bundles the check and
+// the call to prevent unintentional modifications.
//
//===----------------------------------------------------------------------===//
-#include "AArch64.h"
-#include "AArch64InstrInfo.h"
-#include "AArch64Subtarget.h"
-#include "AArch64TargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/InitializePasses.h"
using namespace llvm;
-#define DEBUG_TYPE "aarch64-kcfi"
-#define AARCH64_KCFI_PASS_NAME "Insert KCFI indirect call checks"
+#define DEBUG_TYPE "kcfi"
+#define KCFI_PASS_NAME "Insert KCFI indirect call checks"
STATISTIC(NumKCFIChecksAdded, "Number of indirect call checks added");
namespace {
-class AArch64KCFI : public MachineFunctionPass {
+class KCFI : public MachineFunctionPass {
public:
static char ID;
- AArch64KCFI() : MachineFunctionPass(ID) {}
+ KCFI() : MachineFunctionPass(ID) {}
- StringRef getPassName() const override { return AARCH64_KCFI_PASS_NAME; }
+ StringRef getPassName() const override { return KCFI_PASS_NAME; }
bool runOnMachineFunction(MachineFunction &MF) override;
private:
/// Machine instruction info used throughout the class.
- const AArch64InstrInfo *TII = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+
+ /// Target lowering for arch-specific parts.
+ const TargetLowering *TLI = nullptr;
/// Emits a KCFI check before an indirect call.
/// \returns true if the check was added and false otherwise.
@@ -47,41 +53,29 @@ private:
MachineBasicBlock::instr_iterator I) const;
};
-char AArch64KCFI::ID = 0;
+char KCFI::ID = 0;
} // end anonymous namespace
-INITIALIZE_PASS(AArch64KCFI, DEBUG_TYPE, AARCH64_KCFI_PASS_NAME, false, false)
+INITIALIZE_PASS(KCFI, DEBUG_TYPE, KCFI_PASS_NAME, false, false)
-FunctionPass *llvm::createAArch64KCFIPass() { return new AArch64KCFI(); }
+FunctionPass *llvm::createKCFIPass() { return new KCFI(); }
-bool AArch64KCFI::emitCheck(MachineBasicBlock &MBB,
- MachineBasicBlock::instr_iterator MBBI) const {
+bool KCFI::emitCheck(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator MBBI) const {
assert(TII && "Target instruction info was not initialized");
+ assert(TLI && "Target lowering was not initialized");
// If the call instruction is bundled, we can only emit a check safely if
// it's the first instruction in the bundle.
if (MBBI->isBundled() && !std::prev(MBBI)->isBundle())
report_fatal_error("Cannot emit a KCFI check for a bundled call");
- switch (MBBI->getOpcode()) {
- case AArch64::BLR:
- case AArch64::BLRNoIP:
- case AArch64::TCRETURNri:
- case AArch64::TCRETURNriBTI:
- break;
- default:
- llvm_unreachable("Unexpected CFI call opcode");
- }
-
- MachineOperand &Target = MBBI->getOperand(0);
- assert(Target.isReg() && "Invalid target operand for an indirect call");
- Target.setIsRenamable(false);
+ // Emit a KCFI check for the call instruction at MBBI. The implementation
+ // must unfold memory operands if applicable.
+ MachineInstr *Check = TLI->EmitKCFICheck(MBB, MBBI, TII);
- MachineInstr *Check =
- BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(AArch64::KCFI_CHECK))
- .addReg(Target.getReg())
- .addImm(MBBI->getCFIType())
- .getInstr();
+ // Clear the original call's CFI type.
+ assert(MBBI->isCall() && "Unexpected instruction type");
MBBI->setCFIType(*MBB.getParent(), 0);
// If not already bundled, bundle the check and the call to prevent
@@ -93,16 +87,18 @@ bool AArch64KCFI::emitCheck(MachineBasicBlock &MBB,
return true;
}
-bool AArch64KCFI::runOnMachineFunction(MachineFunction &MF) {
+bool KCFI::runOnMachineFunction(MachineFunction &MF) {
const Module *M = MF.getMMI().getModule();
if (!M->getModuleFlag("kcfi"))
return false;
- const auto &SubTarget = MF.getSubtarget<AArch64Subtarget>();
+ const auto &SubTarget = MF.getSubtarget();
TII = SubTarget.getInstrInfo();
+ TLI = SubTarget.getTargetLowering();
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
+ // Use instr_iterator because we don't want to skip bundles.
for (MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),
MIE = MBB.instr_end();
MII != MIE; ++MII) {
diff --git a/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 3192dcadb5f5..d02ec1db1165 100644
--- a/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -274,16 +274,17 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
// emission fails.
const MCSubtargetInfo &STI = *getMCSubtargetInfo();
const MCRegisterInfo &MRI = *getMCRegisterInfo();
- MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getMCInstrInfo(), *Ctx);
- MCAsmBackend *MAB =
- getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions);
+ std::unique_ptr<MCCodeEmitter> MCE(
+ getTarget().createMCCodeEmitter(*getMCInstrInfo(), *Ctx));
+ std::unique_ptr<MCAsmBackend> MAB(
+ getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));
if (!MCE || !MAB)
return true;
const Triple &T = getTargetTriple();
std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer(
- T, *Ctx, std::unique_ptr<MCAsmBackend>(MAB), MAB->createObjectWriter(Out),
- std::unique_ptr<MCCodeEmitter>(MCE), STI, Options.MCOptions.MCRelaxAll,
+ T, *Ctx, std::move(MAB), MAB->createObjectWriter(Out), std::move(MCE),
+ STI, Options.MCOptions.MCRelaxAll,
Options.MCOptions.MCIncrementalLinkerCompatible,
/*DWARFMustBeAtTheEnd*/ true));
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index ba417322d4f6..57df9b67fd02 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -601,7 +601,7 @@ public:
if (Var.getInlinedAt())
return false;
- if (Expr->getNumElements() > 0)
+ if (Expr->getNumElements() > 0 && !Expr->isDeref())
return false;
return true;
@@ -1544,12 +1544,12 @@ std::optional<ValueIDNum> InstrRefBasedLDV::getValueForInstrRef(
if (Size != MainRegSize || Offset) {
// Enumerate all subregisters, searching.
Register NewReg = 0;
- for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
- unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI);
+ for (MCPhysReg SR : TRI->subregs(Reg)) {
+ unsigned Subreg = TRI->getSubRegIndex(Reg, SR);
unsigned SubregSize = TRI->getSubRegIdxSize(Subreg);
unsigned SubregOffset = TRI->getSubRegIdxOffset(Subreg);
if (SubregSize == Size && SubregOffset == Offset) {
- NewReg = *SRI;
+ NewReg = SR;
break;
}
}
@@ -2066,12 +2066,12 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
};
// Then, transfer subreg bits.
- for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
+ for (MCPhysReg SR : TRI->subregs(Reg)) {
// Ensure this reg is tracked,
- (void)MTracker->lookupOrTrackRegister(*SRI);
- unsigned SubregIdx = TRI->getSubRegIndex(Reg, *SRI);
+ (void)MTracker->lookupOrTrackRegister(SR);
+ unsigned SubregIdx = TRI->getSubRegIndex(Reg, SR);
unsigned SpillID = MTracker->getLocID(Loc, SubregIdx);
- DoTransfer(*SRI, SpillID);
+ DoTransfer(SR, SpillID);
}
// Directly lookup size of main source reg, and transfer.
@@ -2101,10 +2101,10 @@ bool InstrRefBasedLDV::transferSpillOrRestoreInst(MachineInstr &MI) {
MTracker->setReg(DestReg, ReadValue);
};
- for (MCSubRegIterator SRI(Reg, TRI, false); SRI.isValid(); ++SRI) {
- unsigned Subreg = TRI->getSubRegIndex(Reg, *SRI);
+ for (MCPhysReg SR : TRI->subregs(Reg)) {
+ unsigned Subreg = TRI->getSubRegIndex(Reg, SR);
unsigned SpillID = MTracker->getLocID(*Loc, Subreg);
- DoTransfer(*SRI, SpillID);
+ DoTransfer(SR, SpillID);
}
// Directly look up this registers slot idx by size, and transfer.
@@ -2513,8 +2513,8 @@ void InstrRefBasedLDV::placeMLocPHIs(
Register R = MTracker->LocIdxToLocID[L];
SmallSet<Register, 8> FoundRegUnits;
bool AnyIllegal = false;
- for (MCRegUnitIterator RUI(R.asMCReg(), TRI); RUI.isValid(); ++RUI) {
- for (MCRegUnitRootIterator URoot(*RUI, TRI); URoot.isValid(); ++URoot){
+ for (MCRegUnit Unit : TRI->regunits(R.asMCReg())) {
+ for (MCRegUnitRootIterator URoot(Unit, TRI); URoot.isValid(); ++URoot) {
if (!MTracker->isRegisterTracked(*URoot)) {
// Not all roots were loaded into the tracking map: this register
// isn't actually def'd anywhere, we only read from it. Generate PHIs
@@ -3179,7 +3179,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
SmallPtrSet<MachineBasicBlock *, 32> DefBlocks;
for (const MachineBasicBlock *ExpMBB : BlocksToExplore) {
auto &TransferFunc = AllTheVLocs[ExpMBB->getNumber()].Vars;
- if (TransferFunc.find(Var) != TransferFunc.end())
+ if (TransferFunc.contains(Var))
DefBlocks.insert(const_cast<MachineBasicBlock *>(ExpMBB));
}
@@ -3295,7 +3295,7 @@ void InstrRefBasedLDV::buildVLocValueMap(
// to be visited next time around.
for (auto *s : MBB->successors()) {
// Ignore out of scope / not-to-be-explored successors.
- if (LiveInIdx.find(s) == LiveInIdx.end())
+ if (!LiveInIdx.contains(s))
continue;
if (BBToOrder[s] > BBToOrder[MBB]) {
@@ -3411,7 +3411,7 @@ void InstrRefBasedLDV::initialSetup(MachineFunction &MF) {
for (MachineBasicBlock *MBB : RPOT)
processMBB(MBB);
for (MachineBasicBlock &MBB : MF)
- if (BBToOrder.find(&MBB) == BBToOrder.end())
+ if (!BBToOrder.contains(&MBB))
processMBB(&MBB);
// Order value substitutions by their "source" operand pair, for quick lookup.
@@ -3716,7 +3716,12 @@ bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF,
unsigned BlockNo = Num.getBlock();
LocIdx LocNo = Num.getLoc();
- Num = MInLocs[BlockNo][LocNo.asU64()];
+ ValueIDNum ResolvedValue = MInLocs[BlockNo][LocNo.asU64()];
+ // If there is no resolved value for this live-in then it is not directly
+ // reachable from the entry block -- model it as a PHI on entry to this
+ // block, which means we leave the ValueIDNum unchanged.
+ if (ResolvedValue != ValueIDNum::EmptyValue)
+ Num = ResolvedValue;
}
// Later, we'll be looking up ranges of instruction numbers.
llvm::sort(DebugPHINumToValue);
@@ -4050,10 +4055,7 @@ public:
/// ValueIsPHI - Check if the instruction that defines the specified value
/// is a PHI instruction.
static LDVSSAPhi *ValueIsPHI(BlockValueNum Val, LDVSSAUpdater *Updater) {
- auto PHIIt = Updater->PHIs.find(Val);
- if (PHIIt == Updater->PHIs.end())
- return nullptr;
- return PHIIt->second;
+ return Updater->PHIs.lookup(Val);
}
/// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
@@ -4195,7 +4197,7 @@ std::optional<ValueIDNum> InstrRefBasedLDV::resolveDbgPHIsImpl(
// Are all these things actually defined?
for (auto &PHIIt : PHI->IncomingValues) {
// Any undef input means DBG_PHIs didn't dominate the use point.
- if (Updater.UndefMap.find(&PHIIt.first->BB) != Updater.UndefMap.end())
+ if (Updater.UndefMap.contains(&PHIIt.first->BB))
return std::nullopt;
ValueIDNum ValueToCheck;
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
index 2fdc37c6dda2..30de18e53c4f 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h
@@ -656,7 +656,7 @@ public:
// If we discover a new machine location, assign it an mphi with this
// block number.
- unsigned CurBB;
+ unsigned CurBB = -1;
/// Cached local copy of the number of registers the target has.
unsigned NumRegs;
@@ -740,7 +740,7 @@ public:
unsigned getLocID(SpillLocationNo Spill, StackSlotPos Idx) {
unsigned SlotNo = Spill.id() - 1;
SlotNo *= NumSlotIdxes;
- assert(StackSlotIdxes.find(Idx) != StackSlotIdxes.end());
+ assert(StackSlotIdxes.contains(Idx));
SlotNo += StackSlotIdxes[Idx];
SlotNo += NumRegs;
return SlotNo;
@@ -1094,7 +1094,7 @@ private:
MLocTracker *MTracker = nullptr;
/// Number of the current block LiveDebugValues is stepping through.
- unsigned CurBB;
+ unsigned CurBB = -1;
/// Number of the current instruction LiveDebugValues is evaluating.
unsigned CurInst;
@@ -1197,7 +1197,7 @@ private:
/// For an instruction reference given by \p InstNo and \p OpNo in instruction
/// \p MI returns the Value pointed to by that instruction reference if any
- /// exists, otherwise returns None.
+ /// exists, otherwise returns std::nullopt.
std::optional<ValueIDNum> getValueForInstrRef(unsigned InstNo, unsigned OpNo,
MachineInstr &MI,
const ValueTable *MLiveOuts,
diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 9dba9a88f703..0c0a4e13c7c9 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -8,7 +8,6 @@
#include "LiveDebugValues.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -19,6 +18,7 @@
#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
/// \file LiveDebugValues.cpp
///
@@ -81,7 +81,7 @@ public:
private:
std::unique_ptr<LDVImpl> InstrRefImpl;
std::unique_ptr<LDVImpl> VarLocImpl;
- TargetPassConfig *TPC;
+ TargetPassConfig *TPC = nullptr;
MachineDominatorTree MDT;
};
} // namespace
diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
index b78757b855f4..116c6b7e2d19 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp
@@ -1116,7 +1116,7 @@ VarLocBasedLDV::~VarLocBasedLDV() = default;
/// location, erase the variable from the Vars set.
void VarLocBasedLDV::OpenRangesSet::erase(const VarLoc &VL) {
// Erasure helper.
- auto DoErase = [VL, this](DebugVariable VarToErase) {
+ auto DoErase = [&VL, this](DebugVariable VarToErase) {
auto *EraseFrom = VL.isEntryBackupLoc() ? &EntryValuesBackupVars : &Vars;
auto It = EraseFrom->find(VarToErase);
if (It != EraseFrom->end()) {
@@ -1312,7 +1312,7 @@ void VarLocBasedLDV::cleanupEntryValueTransfers(
return;
auto TransRange = EntryValTransfers.equal_range(TRInst);
- for (auto TDPair : llvm::make_range(TransRange.first, TransRange.second)) {
+ for (auto &TDPair : llvm::make_range(TransRange.first, TransRange.second)) {
const VarLoc &EmittedEV = VarLocIDs[TDPair.second];
if (std::tie(EntryVL.Var, EntryVL.Locs[0].Value.RegNo, EntryVL.Expr) ==
std::tie(EmittedEV.Var, EmittedEV.Locs[0].Value.RegNo,
@@ -1347,7 +1347,7 @@ void VarLocBasedLDV::removeEntryValue(const MachineInstr &MI,
// Try to get non-debug instruction responsible for the DBG_VALUE.
const MachineInstr *TransferInst = nullptr;
Register Reg = MI.getDebugOperand(0).getReg();
- if (Reg.isValid() && RegSetInstrs.find(Reg) != RegSetInstrs.end())
+ if (Reg.isValid() && RegSetInstrs.contains(Reg))
TransferInst = RegSetInstrs.find(Reg)->second;
// Case of the parameter's DBG_VALUE at the start of entry MBB.
@@ -2151,7 +2151,9 @@ bool VarLocBasedLDV::isEntryValueCandidate(
// TODO: Add support for parameters that have a pre-existing debug expressions
// (e.g. fragments).
- if (MI.getDebugExpression()->getNumElements() > 0)
+ // A simple deref expression is equivalent to an indirect debug value.
+ const DIExpression *Expr = MI.getDebugExpression();
+ if (Expr->getNumElements() > 0 && !Expr->isDeref())
return false;
return true;
@@ -2160,8 +2162,8 @@ bool VarLocBasedLDV::isEntryValueCandidate(
/// Collect all register defines (including aliases) for the given instruction.
static void collectRegDefs(const MachineInstr &MI, DefinedRegsSet &Regs,
const TargetRegisterInfo *TRI) {
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isDef() && MO.getReg() && MO.getReg().isPhysical()) {
+ for (const MachineOperand &MO : MI.all_defs()) {
+ if (MO.getReg() && MO.getReg().isPhysical()) {
Regs.insert(MO.getReg());
for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
Regs.insert(*AI);
diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp
index 7cd3d26cf5b3..1cf354349c56 100644
--- a/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/llvm/lib/CodeGen/LiveInterval.cpp
@@ -445,7 +445,7 @@ bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP,
while (true) {
// J has just been advanced to satisfy:
- assert(J->end >= I->start);
+ assert(J->end > I->start);
// Check for an overlap.
if (J->start < I->end) {
// I and J are overlapping. Find the later start.
@@ -460,11 +460,11 @@ bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP,
std::swap(I, J);
std::swap(IE, JE);
}
- // Advance J until J->end >= I->start.
+ // Advance J until J->end > I->start.
do
if (++J == JE)
return false;
- while (J->end < I->start);
+ while (J->end <= I->start);
}
}
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index a49f6b0604c5..da55e7f7284b 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -280,9 +280,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
bool IsReserved = false;
for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
bool IsRootReserved = true;
- for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
- Super.isValid(); ++Super) {
- MCRegister Reg = *Super;
+ for (MCPhysReg Reg : TRI->superregs_inclusive(*Root)) {
if (!MRI->reg_empty(Reg))
LICalc->createDeadDefs(LR, Reg);
// A register unit is considered reserved if all its roots and all their
@@ -299,9 +297,7 @@ void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) {
// Ignore uses of reserved registers. We only track defs of those.
if (!IsReserved) {
for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
- for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
- Super.isValid(); ++Super) {
- MCRegister Reg = *Super;
+ for (MCPhysReg Reg : TRI->superregs_inclusive(*Root)) {
if (!MRI->reg_empty(Reg))
LICalc->extendToUses(LR, Reg);
}
@@ -333,8 +329,7 @@ void LiveIntervals::computeLiveInRegUnits() {
SlotIndex Begin = Indexes->getMBBStartIdx(&MBB);
LLVM_DEBUG(dbgs() << Begin << "\t" << printMBBReference(MBB));
for (const auto &LI : MBB.liveins()) {
- for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) {
- unsigned Unit = *Units;
+ for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) {
LiveRange *LR = RegUnitRanges[Unit];
if (!LR) {
// Use segment set to speed-up initial computation of the live range.
@@ -708,9 +703,8 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
// Find the regunit intervals for the assigned register. They may overlap
// the virtual register live range, cancelling any kills.
RU.clear();
- for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid();
- ++Unit) {
- const LiveRange &RURange = getRegUnit(*Unit);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ const LiveRange &RURange = getRegUnit(Unit);
if (RURange.empty())
continue;
RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end)));
@@ -1056,10 +1050,9 @@ public:
// For physregs, only update the regunits that actually have a
// precomputed live range.
- for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid();
- ++Units)
- if (LiveRange *LR = getRegUnitLI(*Units))
- updateRange(*LR, *Units, LaneBitmask::getNone());
+ for (MCRegUnit Unit : TRI.regunits(Reg.asMCReg()))
+ if (LiveRange *LR = getRegUnitLI(Unit))
+ updateRange(*LR, Unit, LaneBitmask::getNone());
}
if (hasRegMask)
updateRegMaskSlots();
@@ -1707,8 +1700,8 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
}
void LiveIntervals::removePhysRegDefAt(MCRegister Reg, SlotIndex Pos) {
- for (MCRegUnitIterator Unit(Reg, TRI); Unit.isValid(); ++Unit) {
- if (LiveRange *LR = getCachedRegUnit(*Unit))
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ if (LiveRange *LR = getCachedRegUnit(Unit))
if (VNInfo *VNI = LR->getVNInfoAt(Pos))
LR->removeValNo(VNI);
}
diff --git a/llvm/lib/CodeGen/LivePhysRegs.cpp b/llvm/lib/CodeGen/LivePhysRegs.cpp
index d4848f16dcf2..96380d408482 100644
--- a/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -265,14 +265,9 @@ void llvm::addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs) {
if (MRI.isReserved(Reg))
continue;
// Skip the register if we are about to add one of its super registers.
- bool ContainsSuperReg = false;
- for (MCSuperRegIterator SReg(Reg, &TRI); SReg.isValid(); ++SReg) {
- if (LiveRegs.contains(*SReg) && !MRI.isReserved(*SReg)) {
- ContainsSuperReg = true;
- break;
- }
- }
- if (ContainsSuperReg)
+ if (any_of(TRI.superregs(Reg), [&](MCPhysReg SReg) {
+ return LiveRegs.contains(SReg) && !MRI.isReserved(SReg);
+ }))
continue;
MBB.addLiveIn(Reg);
}
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index d8b024fbdfea..ff49e080090c 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -82,7 +82,7 @@ void LiveRangeEdit::scanRemattable() {
for (VNInfo *VNI : getParent().valnos) {
if (VNI->isUnused())
continue;
- unsigned Original = VRM->getOriginal(getReg());
+ Register Original = VRM->getOriginal(getReg());
LiveInterval &OrigLI = LIS.getInterval(Original);
VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
if (!OrigVNI)
@@ -181,11 +181,9 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, VNInfo *OrigVNI,
SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- unsigned DestReg,
- const Remat &RM,
+ Register DestReg, const Remat &RM,
const TargetRegisterInfo &tri,
- bool Late,
- unsigned SubIdx,
+ bool Late, unsigned SubIdx,
MachineInstr *ReplaceIndexMI) {
assert(RM.OrigMI && "Invalid remat");
TII.reMaterialize(MBB, MI, DestReg, SubIdx, *RM.OrigMI, tri);
@@ -288,8 +286,12 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// Never delete a bundled instruction.
if (MI->isBundled()) {
+ // TODO: Handle deleting copy bundles
+ LLVM_DEBUG(dbgs() << "Won't delete dead bundled inst: " << Idx << '\t'
+ << *MI);
return;
}
+
// Never delete inline asm.
if (MI->isInlineAsm()) {
LLVM_DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI);
@@ -306,7 +308,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
LLVM_DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI);
// Collect virtual registers to be erased after MI is gone.
- SmallVector<unsigned, 8> RegsToErase;
+ SmallVector<Register, 8> RegsToErase;
bool ReadsPhysRegs = false;
bool isOrigDef = false;
Register Dest;
@@ -350,7 +352,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) {
// unlikely to change anything. We typically don't want to shrink the
// PIC base register that has lots of uses everywhere.
// Always shrink COPY uses that probably come from live range splitting.
- if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MO.isDef())) ||
+ if ((MI->readsVirtualRegister(Reg) &&
+ (MO.isDef() || TII.isCopyInstr(*MI))) ||
(MO.readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, MO))))
ToShrink.insert(&LI);
else if (MO.readsReg())
diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp
index 93f5314539cd..af7d6c4403b8 100644
--- a/llvm/lib/CodeGen/LiveRangeShrink.cpp
+++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
@@ -109,6 +110,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
return false;
MachineRegisterInfo &MRI = MF.getRegInfo();
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
@@ -197,7 +199,7 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
// is because it needs more accurate model to handle register
// pressure correctly.
MachineInstr &DefInstr = *MRI.def_instr_begin(Reg);
- if (!DefInstr.isCopy())
+ if (!TII.isCopyInstr(DefInstr))
NumEligibleUse++;
Insert = FindDominatedInstruction(DefInstr, Insert, IOM);
} else {
diff --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 6ca7f00a7885..6df7e5c10862 100644
--- a/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -93,8 +93,8 @@ static bool foreachUnit(const TargetRegisterInfo *TRI,
}
}
} else {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- if (Func(*Units, VRegInterval))
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (Func(Unit, VRegInterval))
return true;
}
}
@@ -136,8 +136,8 @@ void LiveRegMatrix::unassign(const LiveInterval &VirtReg) {
}
bool LiveRegMatrix::isPhysRegUsed(MCRegister PhysReg) const {
- for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
- if (!Matrix[*Unit].empty())
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (!Matrix[Unit].empty())
return true;
}
return false;
@@ -216,7 +216,7 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
LR.addSegment(Seg);
// Check for interference with that segment
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
// LR is stack-allocated. LiveRegMatrix caches queries by a key that
// includes the address of the live range. If (for the same reg unit) this
// checkInterference overload is called twice, without any other query()
@@ -230,7 +230,7 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
// subtle bugs due to query identity. Avoiding caching, for example, would
// greatly simplify things.
LiveIntervalUnion::Query Q;
- Q.reset(UserTag, LR, Matrix[*Units]);
+ Q.reset(UserTag, LR, Matrix[Unit]);
if (Q.checkInterference())
return true;
}
@@ -239,8 +239,8 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
Register LiveRegMatrix::getOneVReg(unsigned PhysReg) const {
const LiveInterval *VRegInterval = nullptr;
- for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
- if ((VRegInterval = Matrix[*Unit].getOneVReg()))
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if ((VRegInterval = Matrix[Unit].getOneVReg()))
return VRegInterval->reg();
}
diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index 34c81c92707e..9cd74689ba10 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -191,8 +191,7 @@ LiveVariables::FindLastPartialDef(Register Reg,
unsigned LastDefReg = 0;
unsigned LastDefDist = 0;
MachineInstr *LastDef = nullptr;
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
MachineInstr *Def = PhysRegDef[SubReg];
if (!Def)
continue;
@@ -208,15 +207,13 @@ LiveVariables::FindLastPartialDef(Register Reg,
return nullptr;
PartDefRegs.insert(LastDefReg);
- for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = LastDef->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
+ for (MachineOperand &MO : LastDef->all_defs()) {
+ if (MO.getReg() == 0)
continue;
Register DefReg = MO.getReg();
if (TRI->isSubRegister(Reg, DefReg)) {
- for (MCSubRegIterator SubRegs(DefReg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- PartDefRegs.insert(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(DefReg))
+ PartDefRegs.insert(SubReg);
}
}
return LastDef;
@@ -245,8 +242,7 @@ void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
true/*IsImp*/));
PhysRegDef[Reg] = LastPartialDef;
SmallSet<unsigned, 8> Processed;
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
if (Processed.count(SubReg))
continue;
if (PartDefRegs.count(SubReg))
@@ -257,8 +253,8 @@ void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
false/*IsDef*/,
true/*IsImp*/));
PhysRegDef[SubReg] = LastPartialDef;
- for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
- Processed.insert(*SS);
+ for (MCPhysReg SS : TRI->subregs(SubReg))
+ Processed.insert(SS);
}
}
} else if (LastDef && !PhysRegUse[Reg] &&
@@ -268,9 +264,8 @@ void LiveVariables::HandlePhysRegUse(Register Reg, MachineInstr &MI) {
true/*IsImp*/));
// Remember this use.
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- PhysRegUse[*SubRegs] = &MI;
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ PhysRegUse[SubReg] = &MI;
}
/// FindLastRefOrPartRef - Return the last reference or partial reference of
@@ -284,8 +279,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(Register Reg) {
MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
unsigned LastPartDefDist = 0;
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
MachineInstr *Def = PhysRegDef[SubReg];
if (Def && Def != LastDef) {
// There was a def of this sub-register in between. This is a partial
@@ -333,8 +327,7 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
MachineInstr *LastPartDef = nullptr;
unsigned LastPartDefDist = 0;
SmallSet<unsigned, 8> PartUses;
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
MachineInstr *Def = PhysRegDef[SubReg];
if (Def && Def != LastDef) {
// There was a def of this sub-register in between. This is a partial
@@ -347,9 +340,8 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
continue;
}
if (MachineInstr *Use = PhysRegUse[SubReg]) {
- for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); SS.isValid();
- ++SS)
- PartUses.insert(*SS);
+ for (MCPhysReg SS : TRI->subregs_inclusive(SubReg))
+ PartUses.insert(SS);
unsigned Dist = DistanceMap[Use];
if (Dist > LastRefOrPartRefDist) {
LastRefOrPartRefDist = Dist;
@@ -364,8 +356,7 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
// dead EAX = op implicit-def AL
// That is, EAX def is dead but AL def extends pass it.
PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
if (!PartUses.count(SubReg))
continue;
bool NeedDef = true;
@@ -384,12 +375,11 @@ bool LiveVariables::HandlePhysRegKill(Register Reg, MachineInstr *MI) {
LastSubRef->addRegisterKilled(SubReg, TRI, true);
else {
LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true);
- for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true);
- SS.isValid(); ++SS)
- PhysRegUse[*SS] = LastRefOrPartRef;
+ for (MCPhysReg SS : TRI->subregs_inclusive(SubReg))
+ PhysRegUse[SS] = LastRefOrPartRef;
}
- for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS)
- PartUses.erase(*SS);
+ for (MCPhysReg SS : TRI->subregs(SubReg))
+ PartUses.erase(SS);
}
} else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) {
if (LastPartDef)
@@ -430,9 +420,9 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) {
// Kill the largest clobbered super-register.
// This avoids needless implicit operands.
unsigned Super = Reg;
- for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR)
- if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR))
- Super = *SR;
+ for (MCPhysReg SR : TRI->superregs(Reg))
+ if ((PhysRegDef[SR] || PhysRegUse[SR]) && MO.clobbersPhysReg(SR))
+ Super = SR;
HandlePhysRegKill(Super, nullptr);
}
}
@@ -442,12 +432,10 @@ void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI,
// What parts of the register are previously defined?
SmallSet<unsigned, 32> Live;
if (PhysRegDef[Reg] || PhysRegUse[Reg]) {
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- Live.insert(*SubRegs);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
+ Live.insert(SubReg);
} else {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
// If a register isn't itself defined, but all parts that make up of it
// are defined, then consider it also defined.
// e.g.
@@ -457,9 +445,8 @@ void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI,
if (Live.count(SubReg))
continue;
if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) {
- for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true);
- SS.isValid(); ++SS)
- Live.insert(*SS);
+ for (MCPhysReg SS : TRI->subregs_inclusive(SubReg))
+ Live.insert(SS);
}
}
}
@@ -468,8 +455,7 @@ void LiveVariables::HandlePhysRegDef(Register Reg, MachineInstr *MI,
// is referenced.
HandlePhysRegKill(Reg, MI);
// Only some of the sub-registers are used.
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
if (!Live.count(SubReg))
// Skip if this sub-register isn't defined.
continue;
@@ -484,9 +470,7 @@ void LiveVariables::UpdatePhysRegDefs(MachineInstr &MI,
SmallVectorImpl<unsigned> &Defs) {
while (!Defs.empty()) {
Register Reg = Defs.pop_back_val();
- for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) {
PhysRegDef[SubReg] = &MI;
PhysRegUse[SubReg] = nullptr;
}
@@ -699,7 +683,7 @@ void LiveVariables::recomputeForSingleDefVirtReg(Register Reg) {
if (UseMI.isPHI()) {
// If Reg is used in a phi then it is live-to-end of the corresponding
// predecessor.
- unsigned Idx = UseMI.getOperandNo(&UseMO);
+ unsigned Idx = UseMO.getOperandNo();
LiveToEndBlocks.push_back(UseMI.getOperand(Idx + 1).getMBB());
} else if (&UseBB == &DefBB) {
// A non-phi use in the same BB as the single def must come after the def.
diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp
index b47c96e50831..24c30b756737 100644
--- a/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/llvm/lib/CodeGen/LowLevelType.cpp
@@ -12,74 +12,55 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LowLevelType.h"
-#include "llvm/ADT/APFloat.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
- if (auto VTy = dyn_cast<VectorType>(&Ty)) {
- auto EC = VTy->getElementCount();
- LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL);
- if (EC.isScalar())
- return ScalarTy;
- return LLT::vector(EC, ScalarTy);
- }
-
- if (auto PTy = dyn_cast<PointerType>(&Ty)) {
- unsigned AddrSpace = PTy->getAddressSpace();
- return LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
- }
-
- if (Ty.isSized()) {
+LLT::LLT(MVT VT) {
+ if (VT.isVector()) {
+ bool asVector = VT.getVectorMinNumElements() > 1;
+ init(/*IsPointer=*/false, asVector, /*IsScalar=*/!asVector,
+ VT.getVectorElementCount(), VT.getVectorElementType().getSizeInBits(),
+ /*AddressSpace=*/0);
+ } else if (VT.isValid() && !VT.isScalableTargetExtVT()) {
// Aggregates are no different from real scalars as far as GlobalISel is
// concerned.
- auto SizeInBits = DL.getTypeSizeInBits(&Ty);
- assert(SizeInBits != 0 && "invalid zero-sized type");
- return LLT::scalar(SizeInBits);
+ init(/*IsPointer=*/false, /*IsVector=*/false, /*IsScalar=*/true,
+ ElementCount::getFixed(0), VT.getSizeInBits(), /*AddressSpace=*/0);
+ } else {
+ IsScalar = false;
+ IsPointer = false;
+ IsVector = false;
+ RawData = 0;
}
-
- return LLT();
}
-MVT llvm::getMVTForLLT(LLT Ty) {
- if (!Ty.isVector())
- return MVT::getIntegerVT(Ty.getSizeInBits());
-
- return MVT::getVectorVT(
- MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
- Ty.getNumElements());
+void LLT::print(raw_ostream &OS) const {
+ if (isVector()) {
+ OS << "<";
+ OS << getElementCount() << " x " << getElementType() << ">";
+ } else if (isPointer())
+ OS << "p" << getAddressSpace();
+ else if (isValid()) {
+ assert(isScalar() && "unexpected type");
+ OS << "s" << getScalarSizeInBits();
+ } else
+ OS << "LLT_invalid";
}
-EVT llvm::getApproximateEVTForLLT(LLT Ty, const DataLayout &DL,
- LLVMContext &Ctx) {
- if (Ty.isVector()) {
- EVT EltVT = getApproximateEVTForLLT(Ty.getElementType(), DL, Ctx);
- return EVT::getVectorVT(Ctx, EltVT, Ty.getElementCount());
- }
-
- return EVT::getIntegerVT(Ctx, Ty.getSizeInBits());
-}
-
-LLT llvm::getLLTForMVT(MVT Ty) {
- if (!Ty.isVector())
- return LLT::scalar(Ty.getSizeInBits());
-
- return LLT::scalarOrVector(Ty.getVectorElementCount(),
- Ty.getVectorElementType().getSizeInBits());
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void LLT::dump() const {
+ print(dbgs());
+ dbgs() << '\n';
}
+#endif
-const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) {
- assert(Ty.isScalar() && "Expected a scalar type.");
- switch (Ty.getSizeInBits()) {
- case 16:
- return APFloat::IEEEhalf();
- case 32:
- return APFloat::IEEEsingle();
- case 64:
- return APFloat::IEEEdouble();
- case 128:
- return APFloat::IEEEquad();
- }
- llvm_unreachable("Invalid FP type size.");
-}
+const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::VectorScalableFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::VectorSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorElementsFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorScalableFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorSizeFieldInfo;
+const constexpr LLT::BitFieldInfo LLT::PointerVectorAddressSpaceFieldInfo;
diff --git a/llvm/lib/CodeGen/LowLevelTypeUtils.cpp b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
new file mode 100644
index 000000000000..bc2ea3f05b6d
--- /dev/null
+++ b/llvm/lib/CodeGen/LowLevelTypeUtils.cpp
@@ -0,0 +1,85 @@
+//===-- llvm/CodeGen/LowLevelTypeUtils.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the more header-heavy bits of the LLT class to
+/// avoid polluting users' namespaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+using namespace llvm;
+
+LLT llvm::getLLTForType(Type &Ty, const DataLayout &DL) {
+ if (auto VTy = dyn_cast<VectorType>(&Ty)) {
+ auto EC = VTy->getElementCount();
+ LLT ScalarTy = getLLTForType(*VTy->getElementType(), DL);
+ if (EC.isScalar())
+ return ScalarTy;
+ return LLT::vector(EC, ScalarTy);
+ }
+
+ if (auto PTy = dyn_cast<PointerType>(&Ty)) {
+ unsigned AddrSpace = PTy->getAddressSpace();
+ return LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
+ }
+
+ if (Ty.isSized() && !Ty.isScalableTargetExtTy()) {
+ // Aggregates are no different from real scalars as far as GlobalISel is
+ // concerned.
+ auto SizeInBits = DL.getTypeSizeInBits(&Ty);
+ assert(SizeInBits != 0 && "invalid zero-sized type");
+ return LLT::scalar(SizeInBits);
+ }
+
+ return LLT();
+}
+
+MVT llvm::getMVTForLLT(LLT Ty) {
+ if (!Ty.isVector())
+ return MVT::getIntegerVT(Ty.getSizeInBits());
+
+ return MVT::getVectorVT(
+ MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
+ Ty.getNumElements());
+}
+
+EVT llvm::getApproximateEVTForLLT(LLT Ty, const DataLayout &DL,
+ LLVMContext &Ctx) {
+ if (Ty.isVector()) {
+ EVT EltVT = getApproximateEVTForLLT(Ty.getElementType(), DL, Ctx);
+ return EVT::getVectorVT(Ctx, EltVT, Ty.getElementCount());
+ }
+
+ return EVT::getIntegerVT(Ctx, Ty.getSizeInBits());
+}
+
+LLT llvm::getLLTForMVT(MVT Ty) {
+ if (!Ty.isVector())
+ return LLT::scalar(Ty.getSizeInBits());
+
+ return LLT::scalarOrVector(Ty.getVectorElementCount(),
+ Ty.getVectorElementType().getSizeInBits());
+}
+
+const llvm::fltSemantics &llvm::getFltSemanticForLLT(LLT Ty) {
+ assert(Ty.isScalar() && "Expected a scalar type.");
+ switch (Ty.getSizeInBits()) {
+ case 16:
+ return APFloat::IEEEhalf();
+ case 32:
+ return APFloat::IEEEsingle();
+ case 64:
+ return APFloat::IEEEdouble();
+ case 128:
+ return APFloat::IEEEquad();
+ }
+ llvm_unreachable("Invalid FP type size.");
+}
diff --git a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
index ad8a17f25ec5..8d17cceeb3cd 100644
--- a/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
+++ b/llvm/lib/CodeGen/MIRFSDiscriminator.cpp
@@ -18,10 +18,13 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/xxhash.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
using namespace llvm;
@@ -30,6 +33,13 @@ using namespace sampleprofutil;
#define DEBUG_TYPE "mirfs-discriminators"
+// TODO(xur): Remove this option and related code once we make true as the
+// default.
+cl::opt<bool> ImprovedFSDiscriminator(
+ "improved-fs-discriminator", cl::Hidden, cl::init(false),
+ cl::desc("New FS discriminators encoding (incompatible with the original "
+ "encoding)"));
+
char MIRAddFSDiscriminators::ID = 0;
INITIALIZE_PASS(MIRAddFSDiscriminators, DEBUG_TYPE,
@@ -42,11 +52,12 @@ FunctionPass *llvm::createMIRAddFSDiscriminatorsPass(FSDiscriminatorPass P) {
return new MIRAddFSDiscriminators(P);
}
+// TODO(xur): Remove this once we switch to ImprovedFSDiscriminator.
// Compute a hash value using debug line number, and the line numbers from the
// inline stack.
-static uint64_t getCallStackHash(const MachineBasicBlock &BB,
- const MachineInstr &MI,
- const DILocation *DIL) {
+static uint64_t getCallStackHashV0(const MachineBasicBlock &BB,
+ const MachineInstr &MI,
+ const DILocation *DIL) {
auto updateHash = [](const StringRef &Str) -> uint64_t {
if (Str.empty())
return 0;
@@ -62,6 +73,19 @@ static uint64_t getCallStackHash(const MachineBasicBlock &BB,
return Ret;
}
+static uint64_t getCallStackHash(const DILocation *DIL) {
+ auto hashCombine = [](const uint64_t Seed, const uint64_t Val) {
+ std::hash<uint64_t> Hasher;
+ return Seed ^ (Hasher(Val) + 0x9e3779b9 + (Seed << 6) + (Seed >> 2));
+ };
+ uint64_t Ret = 0;
+ for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {
+ Ret = hashCombine(Ret, xxh3_64bits(ArrayRef<uint8_t>(DIL->getLine())));
+ Ret = hashCombine(Ret, xxh3_64bits(DIL->getSubprogramLinkageName()));
+ }
+ return Ret;
+}
+
// Traverse the CFG and assign FD discriminators. If two instructions
// have the same lineno and discriminator, but residing in different BBs,
// the latter instruction will get a new discriminator value. The new
@@ -70,11 +94,16 @@ static uint64_t getCallStackHash(const MachineBasicBlock &BB,
bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
if (!EnableFSDiscriminator)
return false;
- if (!MF.getFunction().shouldEmitDebugInfoForProfiling())
+
+ bool HasPseudoProbe = MF.getFunction().getParent()->getNamedMetadata(
+ PseudoProbeDescMetadataName);
+
+ if (!HasPseudoProbe && !MF.getFunction().shouldEmitDebugInfoForProfiling())
return false;
bool Changed = false;
- using LocationDiscriminator = std::tuple<StringRef, unsigned, unsigned>;
+ using LocationDiscriminator =
+ std::tuple<StringRef, unsigned, unsigned, uint64_t>;
using BBSet = DenseSet<const MachineBasicBlock *>;
using LocationDiscriminatorBBMap = DenseMap<LocationDiscriminator, BBSet>;
using LocationDiscriminatorCurrPassMap =
@@ -84,7 +113,12 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
LocationDiscriminatorCurrPassMap LDCM;
// Mask of discriminators before this pass.
- unsigned BitMaskBefore = getN1Bits(LowBit);
+ // TODO(xur): simplify this once we switch to ImprovedFSDiscriminator.
+ unsigned LowBitTemp = LowBit;
+ assert(LowBit > 0 && "LowBit in FSDiscriminator cannot be 0");
+ if (ImprovedFSDiscriminator)
+ LowBitTemp -= 1;
+ unsigned BitMaskBefore = getN1Bits(LowBitTemp);
// Mask of discriminators including this pass.
unsigned BitMaskNow = getN1Bits(HighBit);
// Mask of discriminators for bits specific to this pass.
@@ -92,17 +126,42 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
unsigned NumNewD = 0;
LLVM_DEBUG(dbgs() << "MIRAddFSDiscriminators working on Func: "
- << MF.getFunction().getName() << "\n");
+ << MF.getFunction().getName() << " Highbit=" << HighBit
+ << "\n");
+
for (MachineBasicBlock &BB : MF) {
for (MachineInstr &I : BB) {
+ if (HasPseudoProbe) {
+ // Only assign discriminators to pseudo probe instructions. Call
+ // instructions are excluded since their dwarf discriminators are used
+ // for other purposes, i.e, storing probe ids.
+ if (!I.isPseudoProbe())
+ continue;
+ } else if (ImprovedFSDiscriminator && I.isMetaInstruction()) {
+ continue;
+ }
const DILocation *DIL = I.getDebugLoc().get();
if (!DIL)
continue;
- unsigned LineNo = DIL->getLine();
+
+ // Use the id of pseudo probe to compute the discriminator.
+ unsigned LineNo =
+ I.isPseudoProbe() ? I.getOperand(1).getImm() : DIL->getLine();
if (LineNo == 0)
continue;
unsigned Discriminator = DIL->getDiscriminator();
- LocationDiscriminator LD{DIL->getFilename(), LineNo, Discriminator};
+ // Clean up discriminators for pseudo probes at the first FS discriminator
+ // pass as their discriminators should not ever be used.
+ if ((Pass == FSDiscriminatorPass::Pass1) && I.isPseudoProbe()) {
+ Discriminator = 0;
+ I.setDebugLoc(DIL->cloneWithDiscriminator(0));
+ }
+ uint64_t CallStackHashVal = 0;
+ if (ImprovedFSDiscriminator)
+ CallStackHashVal = getCallStackHash(DIL);
+
+ LocationDiscriminator LD{DIL->getFilename(), LineNo, Discriminator,
+ CallStackHashVal};
auto &BBMap = LDBM[LD];
auto R = BBMap.insert(&BB);
if (BBMap.size() == 1)
@@ -111,7 +170,8 @@ bool MIRAddFSDiscriminators::runOnMachineFunction(MachineFunction &MF) {
unsigned DiscriminatorCurrPass;
DiscriminatorCurrPass = R.second ? ++LDCM[LD] : LDCM[LD];
DiscriminatorCurrPass = DiscriminatorCurrPass << LowBit;
- DiscriminatorCurrPass += getCallStackHash(BB, I, DIL);
+ if (!ImprovedFSDiscriminator)
+ DiscriminatorCurrPass += getCallStackHashV0(BB, I, DIL);
DiscriminatorCurrPass &= BitMaskThisPass;
unsigned NewD = Discriminator | DiscriminatorCurrPass;
const auto *const NewDIL = DIL->cloneWithDiscriminator(NewD);
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index c136b08223b8..a4c1ba340e46 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -214,6 +214,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("nsw", MIToken::kw_nsw)
.Case("exact", MIToken::kw_exact)
.Case("nofpexcept", MIToken::kw_nofpexcept)
+ .Case("unpredictable", MIToken::kw_unpredictable)
.Case("debug-location", MIToken::kw_debug_location)
.Case("debug-instr-number", MIToken::kw_debug_instr_number)
.Case("dbg-instr-ref", MIToken::kw_dbg_instr_ref)
diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h
index ac484cdfd6c8..7149c29d6ba7 100644
--- a/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -73,6 +73,7 @@ struct MIToken {
kw_nsw,
kw_exact,
kw_nofpexcept,
+ kw_unpredictable,
kw_debug_location,
kw_debug_instr_number,
kw_dbg_instr_ref,
diff --git a/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index 525f49347fc4..bfd9286ff59c 100644
--- a/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MIRFormatter.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -62,7 +63,6 @@
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
@@ -470,7 +470,7 @@ public:
bool parseJumpTableIndexOperand(MachineOperand &Dest);
bool parseExternalSymbolOperand(MachineOperand &Dest);
bool parseMCSymbolOperand(MachineOperand &Dest);
- bool parseMDNode(MDNode *&Node);
+ [[nodiscard]] bool parseMDNode(MDNode *&Node);
bool parseDIExpression(MDNode *&Expr);
bool parseDILocation(MDNode *&Expr);
bool parseMetadataOperand(MachineOperand &Dest);
@@ -1451,7 +1451,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Token.is(MIToken::kw_nuw) ||
Token.is(MIToken::kw_nsw) ||
Token.is(MIToken::kw_exact) ||
- Token.is(MIToken::kw_nofpexcept)) {
+ Token.is(MIToken::kw_nofpexcept) ||
+ Token.is(MIToken::kw_unpredictable)) {
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
Flags |= MachineInstr::FrameSetup;
@@ -1479,6 +1480,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Flags |= MachineInstr::IsExact;
if (Token.is(MIToken::kw_nofpexcept))
Flags |= MachineInstr::NoFPExcept;
+ if (Token.is(MIToken::kw_unpredictable))
+ Flags |= MachineInstr::Unpredictable;
lex();
}
@@ -2414,7 +2417,7 @@ bool MIParser::parseMetadataOperand(MachineOperand &Dest) {
bool MIParser::parseCFIOffset(int &Offset) {
if (Token.isNot(MIToken::IntegerLiteral))
return error("expected a cfi offset");
- if (Token.integerValue().getMinSignedBits() > 32)
+ if (Token.integerValue().getSignificantBits() > 32)
return error("expected a 32 bit integer (the cfi offset is too large)");
Offset = (int)Token.integerValue().getExtValue();
lex();
@@ -2520,7 +2523,7 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
parseCFIAddressSpace(AddressSpace))
return true;
CFIIndex = MF.addFrameInst(MCCFIInstruction::createLLVMDefAspaceCfa(
- nullptr, Reg, Offset, AddressSpace));
+ nullptr, Reg, Offset, AddressSpace, SMLoc()));
break;
case MIToken::kw_cfi_remember_state:
CFIIndex = MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
@@ -3001,7 +3004,7 @@ bool MIParser::parseOffset(int64_t &Offset) {
lex();
if (Token.isNot(MIToken::IntegerLiteral))
return error("expected an integer literal after '" + Sign + "'");
- if (Token.integerValue().getMinSignedBits() > 64)
+ if (Token.integerValue().getSignificantBits() > 64)
return error("expected 64-bit integer (too large)");
Offset = Token.integerValue().getExtValue();
if (IsNegative)
@@ -3471,7 +3474,8 @@ bool MIParser::parseHeapAllocMarker(MDNode *&Node) {
assert(Token.is(MIToken::kw_heap_alloc_marker) &&
"Invalid token for a heap alloc marker!");
lex();
- parseMDNode(Node);
+ if (parseMDNode(Node))
+ return true;
if (!Node)
return error("expected a MDNode after 'heap-alloc-marker'");
if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
@@ -3487,7 +3491,8 @@ bool MIParser::parsePCSections(MDNode *&Node) {
assert(Token.is(MIToken::kw_pcsections) &&
"Invalid token for a PC sections!");
lex();
- parseMDNode(Node);
+ if (parseMDNode(Node))
+ return true;
if (!Node)
return error("expected a MDNode after 'pcsections'");
if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) ||
diff --git a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index a20c2bfe6c0f..b2e570c5e67e 100644
--- a/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -130,6 +130,16 @@ public:
const yaml::StringValue &RegisterSource,
bool IsRestored, int FrameIdx);
+ struct VarExprLoc {
+ DILocalVariable *DIVar = nullptr;
+ DIExpression *DIExpr = nullptr;
+ DILocation *DILoc = nullptr;
+ };
+
+ std::optional<VarExprLoc> parseVarExprLoc(PerFunctionMIParsingState &PFS,
+ const yaml::StringValue &VarStr,
+ const yaml::StringValue &ExprStr,
+ const yaml::StringValue &LocStr);
template <typename T>
bool parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
const T &Object,
@@ -392,7 +402,7 @@ bool MIRParserImpl::initializeCallSiteInfo(
MachineFunction &MF = PFS.MF;
SMDiagnostic Error;
const LLVMTargetMachine &TM = MF.getTarget();
- for (auto YamlCSInfo : YamlMF.CallSitesInfo) {
+ for (auto &YamlCSInfo : YamlMF.CallSitesInfo) {
yaml::CallSiteInfo::MachineInstrLoc MILoc = YamlCSInfo.CallLocation;
if (MILoc.BlockNum >= MF.size())
return error(Twine(MF.getName()) +
@@ -468,6 +478,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
MF.setHasEHCatchret(YamlMF.HasEHCatchret);
MF.setHasEHScopes(YamlMF.HasEHScopes);
MF.setHasEHFunclets(YamlMF.HasEHFunclets);
+ MF.setIsOutlined(YamlMF.IsOutlined);
if (YamlMF.Legalized)
MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
@@ -792,6 +803,24 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
return true;
}
+ for (const auto &Object : YamlMF.EntryValueObjects) {
+ SMDiagnostic Error;
+ Register Reg;
+ if (parseNamedRegisterReference(PFS, Reg, Object.EntryValueRegister.Value,
+ Error))
+ return error(Error, Object.EntryValueRegister.SourceRange);
+ if (!Reg.isPhysical())
+ return error(Object.EntryValueRegister.SourceRange.Start,
+ "Expected physical register for entry value field");
+ std::optional<VarExprLoc> MaybeInfo = parseVarExprLoc(
+ PFS, Object.DebugVar, Object.DebugExpr, Object.DebugLoc);
+ if (!MaybeInfo)
+ return true;
+ if (MaybeInfo->DIVar || MaybeInfo->DIExpr || MaybeInfo->DILoc)
+ PFS.MF.setVariableDbgInfo(MaybeInfo->DIVar, MaybeInfo->DIExpr,
+ Reg.asMCReg(), MaybeInfo->DILoc);
+ }
+
// Initialize the ordinary frame objects.
for (const auto &Object : YamlMF.StackObjects) {
int ObjectIdx;
@@ -887,26 +916,37 @@ static bool typecheckMDNode(T *&Result, MDNode *Node,
return false;
}
-template <typename T>
-bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
- const T &Object, int FrameIdx) {
- // Debug information can only be attached to stack objects; Fixed stack
- // objects aren't supported.
- MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr;
- if (parseMDNode(PFS, Var, Object.DebugVar) ||
- parseMDNode(PFS, Expr, Object.DebugExpr) ||
- parseMDNode(PFS, Loc, Object.DebugLoc))
- return true;
- if (!Var && !Expr && !Loc)
- return false;
+std::optional<MIRParserImpl::VarExprLoc> MIRParserImpl::parseVarExprLoc(
+ PerFunctionMIParsingState &PFS, const yaml::StringValue &VarStr,
+ const yaml::StringValue &ExprStr, const yaml::StringValue &LocStr) {
+ MDNode *Var = nullptr;
+ MDNode *Expr = nullptr;
+ MDNode *Loc = nullptr;
+ if (parseMDNode(PFS, Var, VarStr) || parseMDNode(PFS, Expr, ExprStr) ||
+ parseMDNode(PFS, Loc, LocStr))
+ return std::nullopt;
DILocalVariable *DIVar = nullptr;
DIExpression *DIExpr = nullptr;
DILocation *DILoc = nullptr;
- if (typecheckMDNode(DIVar, Var, Object.DebugVar, "DILocalVariable", *this) ||
- typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) ||
- typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this))
+ if (typecheckMDNode(DIVar, Var, VarStr, "DILocalVariable", *this) ||
+ typecheckMDNode(DIExpr, Expr, ExprStr, "DIExpression", *this) ||
+ typecheckMDNode(DILoc, Loc, LocStr, "DILocation", *this))
+ return std::nullopt;
+ return VarExprLoc{DIVar, DIExpr, DILoc};
+}
+
+template <typename T>
+bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
+ const T &Object, int FrameIdx) {
+ std::optional<VarExprLoc> MaybeInfo =
+ parseVarExprLoc(PFS, Object.DebugVar, Object.DebugExpr, Object.DebugLoc);
+ if (!MaybeInfo)
return true;
- PFS.MF.setVariableDbgInfo(DIVar, DIExpr, FrameIdx, DILoc);
+ // Debug information can only be attached to stack objects; Fixed stack
+ // objects aren't supported.
+ if (MaybeInfo->DIVar || MaybeInfo->DIExpr || MaybeInfo->DILoc)
+ PFS.MF.setVariableDbgInfo(MaybeInfo->DIVar, MaybeInfo->DIExpr, FrameIdx,
+ MaybeInfo->DILoc);
return false;
}
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 0a4b28ac79a7..b91d9c4727fc 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -47,7 +48,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -119,6 +119,9 @@ public:
const MachineJumpTableInfo &JTI);
void convertStackObjects(yaml::MachineFunction &YMF,
const MachineFunction &MF, ModuleSlotTracker &MST);
+ void convertEntryValueObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST);
void convertCallSiteObjects(yaml::MachineFunction &YMF,
const MachineFunction &MF,
ModuleSlotTracker &MST);
@@ -200,6 +203,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
YamlMF.HasEHCatchret = MF.hasEHCatchret();
YamlMF.HasEHScopes = MF.hasEHScopes();
YamlMF.HasEHFunclets = MF.hasEHFunclets();
+ YamlMF.IsOutlined = MF.isOutlined();
YamlMF.UseDebugInstrRef = MF.useDebugInstrRef();
YamlMF.Legalized = MF.getProperties().hasProperty(
@@ -220,6 +224,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
MST.incorporateFunction(MF.getFunction());
convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
convertStackObjects(YamlMF, MF, MST);
+ convertEntryValueObjects(YamlMF, MF, MST);
convertCallSiteObjects(YamlMF, MF, MST);
for (const auto &Sub : MF.DebugValueSubstitutions) {
const auto &SubSrc = Sub.Src;
@@ -372,6 +377,19 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
}
}
+void MIRPrinter::convertEntryValueObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (const MachineFunction::VariableDbgInfo &DebugVar :
+ MF.getEntryValueVariableDbgInfo()) {
+ yaml::EntryValueObject &Obj = YMF.EntryValueObjects.emplace_back();
+ printStackObjectDbgInfo(DebugVar, Obj, MST);
+ MCRegister EntryValReg = DebugVar.getEntryValueRegister();
+ printRegMIR(EntryValReg, Obj.EntryValueRegister, TRI);
+ }
+}
+
void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
const MachineFunction &MF,
ModuleSlotTracker &MST) {
@@ -490,17 +508,17 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
// Print the debug variable information.
for (const MachineFunction::VariableDbgInfo &DebugVar :
- MF.getVariableDbgInfo()) {
- assert(DebugVar.Slot >= MFI.getObjectIndexBegin() &&
- DebugVar.Slot < MFI.getObjectIndexEnd() &&
+ MF.getInStackSlotVariableDbgInfo()) {
+ int Idx = DebugVar.getStackSlot();
+ assert(Idx >= MFI.getObjectIndexBegin() && Idx < MFI.getObjectIndexEnd() &&
"Invalid stack object index");
- if (DebugVar.Slot < 0) { // Negative index means fixed objects.
+ if (Idx < 0) { // Negative index means fixed objects.
auto &Object =
- YMF.FixedStackObjects[FixedStackObjectsIdx[DebugVar.Slot +
+ YMF.FixedStackObjects[FixedStackObjectsIdx[Idx +
MFI.getNumFixedObjects()]];
printStackObjectDbgInfo(DebugVar, Object, MST);
} else {
- auto &Object = YMF.StackObjects[StackObjectsIdx[DebugVar.Slot]];
+ auto &Object = YMF.StackObjects[StackObjectsIdx[Idx]];
printStackObjectDbgInfo(DebugVar, Object, MST);
}
}
@@ -783,6 +801,8 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << "nofpexcept ";
if (MI.getFlag(MachineInstr::NoMerge))
OS << "nomerge ";
+ if (MI.getFlag(MachineInstr::Unpredictable))
+ OS << "unpredictable ";
OS << TII->getName(MI.getOpcode());
if (I < E)
diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp
index a8996a586909..96f8589e682d 100644
--- a/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -18,17 +18,21 @@
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+#include <optional>
using namespace llvm;
using namespace sampleprof;
@@ -57,6 +61,7 @@ static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
cl::init(false),
cl::desc("View BFI after MIR loader"));
+extern cl::opt<bool> ImprovedFSDiscriminator;
char MIRProfileLoaderPass::ID = 0;
INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
@@ -72,10 +77,11 @@ INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile",
char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID;
-FunctionPass *llvm::createMIRProfileLoaderPass(std::string File,
- std::string RemappingFile,
- FSDiscriminatorPass P) {
- return new MIRProfileLoaderPass(File, RemappingFile, P);
+FunctionPass *
+llvm::createMIRProfileLoaderPass(std::string File, std::string RemappingFile,
+ FSDiscriminatorPass P,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS) {
+ return new MIRProfileLoaderPass(File, RemappingFile, P, std::move(FS));
}
namespace llvm {
@@ -89,6 +95,22 @@ extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
extern cl::opt<std::string> ViewBlockFreqFuncName;
+std::optional<PseudoProbe> extractProbe(const MachineInstr &MI) {
+ if (MI.isPseudoProbe()) {
+ PseudoProbe Probe;
+ Probe.Id = MI.getOperand(1).getImm();
+ Probe.Type = MI.getOperand(2).getImm();
+ Probe.Attr = MI.getOperand(3).getImm();
+ Probe.Factor = 1;
+ DILocation *DebugLoc = MI.getDebugLoc();
+ Probe.Discriminator = DebugLoc ? DebugLoc->getDiscriminator() : 0;
+ return Probe;
+ }
+
+ // Ignore callsite probes since they do not have FS discriminators.
+ return std::nullopt;
+}
+
namespace afdo_detail {
template <> struct IRTraits<MachineBasicBlock> {
using InstructionT = MachineInstr;
@@ -118,7 +140,7 @@ template <> struct IRTraits<MachineBasicBlock> {
} // namespace afdo_detail
class MIRProfileLoader final
- : public SampleProfileLoaderBaseImpl<MachineBasicBlock> {
+ : public SampleProfileLoaderBaseImpl<MachineFunction> {
public:
void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT,
MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI,
@@ -136,9 +158,10 @@ public:
assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
}
- MIRProfileLoader(StringRef Name, StringRef RemapName)
- : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)) {
- }
+ MIRProfileLoader(StringRef Name, StringRef RemapName,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS)
+ : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName),
+ std::move(FS)) {}
void setBranchProbs(MachineFunction &F);
bool runOnFunction(MachineFunction &F);
@@ -162,11 +185,18 @@ protected:
unsigned HighBit;
bool ProfileIsValid = true;
+ ErrorOr<uint64_t> getInstWeight(const MachineInstr &MI) override {
+ if (FunctionSamples::ProfileIsProbeBased)
+ return getProbeWeight(MI);
+ if (ImprovedFSDiscriminator && MI.isMetaInstruction())
+ return std::error_code();
+ return getInstWeightImpl(MI);
+ }
};
template <>
-void SampleProfileLoaderBaseImpl<
- MachineBasicBlock>::computeDominanceAndLoopInfo(MachineFunction &F) {}
+void SampleProfileLoaderBaseImpl<MachineFunction>::computeDominanceAndLoopInfo(
+ MachineFunction &F) {}
void MIRProfileLoader::setBranchProbs(MachineFunction &F) {
LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n");
@@ -254,8 +284,8 @@ void MIRProfileLoader::setBranchProbs(MachineFunction &F) {
bool MIRProfileLoader::doInitialization(Module &M) {
auto &Ctx = M.getContext();
- auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, P,
- RemappingFilename);
+ auto ReaderOrErr = sampleprof::SampleProfileReader::create(
+ Filename, Ctx, *FS, P, RemappingFilename);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();
Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
@@ -265,20 +295,41 @@ bool MIRProfileLoader::doInitialization(Module &M) {
Reader = std::move(ReaderOrErr.get());
Reader->setModule(&M);
ProfileIsValid = (Reader->read() == sampleprof_error::success);
- Reader->getSummary();
+
+ // Load pseudo probe descriptors for probe-based function samples.
+ if (Reader->profileIsProbeBased()) {
+ ProbeManager = std::make_unique<PseudoProbeManager>(M);
+ if (!ProbeManager->moduleIsProbed(M)) {
+ return false;
+ }
+ }
return true;
}
bool MIRProfileLoader::runOnFunction(MachineFunction &MF) {
+ // Do not load non-FS profiles. A line or probe can get a zero-valued
+ // discriminator at certain pass which could result in accidentally loading
+ // the corresponding base counter in the non-FS profile, while a non-zero
+ // discriminator would end up getting zero samples. This could in turn undo
+ // the sample distribution effort done by previous BFI maintenance and the
+ // probe distribution factor work for pseudo probes.
+ if (!Reader->profileIsFS())
+ return false;
+
Function &Func = MF.getFunction();
clearFunctionData(false);
Samples = Reader->getSamplesFor(Func);
if (!Samples || Samples->empty())
return false;
- if (getFunctionLoc(MF) == 0)
- return false;
+ if (FunctionSamples::ProfileIsProbeBased) {
+ if (!ProbeManager->profileIsValid(MF.getFunction(), *Samples))
+ return false;
+ } else {
+ if (getFunctionLoc(MF) == 0)
+ return false;
+ }
DenseSet<GlobalValue::GUID> InlinedGUIDs;
bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs);
@@ -291,14 +342,16 @@ bool MIRProfileLoader::runOnFunction(MachineFunction &MF) {
} // namespace llvm
-MIRProfileLoaderPass::MIRProfileLoaderPass(std::string FileName,
- std::string RemappingFileName,
- FSDiscriminatorPass P)
- : MachineFunctionPass(ID), ProfileFileName(FileName), P(P),
- MIRSampleLoader(
- std::make_unique<MIRProfileLoader>(FileName, RemappingFileName)) {
+MIRProfileLoaderPass::MIRProfileLoaderPass(
+ std::string FileName, std::string RemappingFileName, FSDiscriminatorPass P,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS)
+ : MachineFunctionPass(ID), ProfileFileName(FileName), P(P) {
LowBit = getFSPassBitBegin(P);
HighBit = getFSPassBitEnd(P);
+
+ auto VFS = FS ? std::move(FS) : vfs::getRealFileSystem();
+ MIRSampleLoader = std::make_unique<MIRProfileLoader>(
+ FileName, RemappingFileName, std::move(VFS));
assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
}
diff --git a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
index e634a2b284c3..812d57984e6c 100644
--- a/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
+++ b/llvm/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -39,7 +39,7 @@ VRegRenamer::getVRegRenameMap(const std::vector<NamedVReg> &VRegs) {
StringMap<unsigned> VRegNameCollisionMap;
auto GetUniqueVRegName = [&VRegNameCollisionMap](const NamedVReg &Reg) {
- if (VRegNameCollisionMap.find(Reg.getName()) == VRegNameCollisionMap.end())
+ if (!VRegNameCollisionMap.contains(Reg.getName()))
VRegNameCollisionMap[Reg.getName()] = 0;
const unsigned Counter = ++VRegNameCollisionMap[Reg.getName()];
return Reg.getName() + "__" + std::to_string(Counter);
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
index 5cc8ad3d609e..7b3746fde503 100644
--- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -13,6 +13,7 @@
#include "AllocationOrder.h"
#include "RegAllocEvictionAdvisor.h"
#include "RegAllocGreedy.h"
+#include "llvm/Analysis/InteractiveModelRunner.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/TensorSpec.h"
#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL) || defined(LLVM_HAVE_TFLITE)
@@ -38,6 +39,7 @@
#include "llvm/Support/ErrorHandling.h"
#include <array>
+#include <bitset>
#include <memory>
using namespace llvm;
@@ -52,6 +54,14 @@ using CompiledModelType = RegallocEvictModel;
using CompiledModelType = NoopSavedModelImpl;
#endif
+static cl::opt<std::string> InteractiveChannelBaseName(
+ "regalloc-evict-interactive-channel-base", cl::Hidden,
+ cl::desc(
+ "Base file path for the interactive mode. The incoming filename should "
+ "have the name <regalloc-evict-interactive-channel-base>.in, while the "
+ "outgoing name should be "
+ "<regalloc-evict-interactive-channel-base>.out"));
+
// Options that only make sense in development mode
#ifdef LLVM_HAVE_TFLITE
#include "RegAllocScore.h"
@@ -74,12 +84,12 @@ static cl::opt<bool> EnableDevelopmentFeatures(
static const bool EnableDevelopmentFeatures = false;
#endif // #ifdef LLVM_HAVE_TFLITE
-extern cl::opt<unsigned> EvictInterferenceCutoff;
-
/// The score injection pass.
/// This pass calculates the score for a function and inserts it in the log, but
/// this happens only in development mode. It's a no-op otherwise.
namespace llvm {
+extern cl::opt<unsigned> EvictInterferenceCutoff;
+
class RegAllocScoring : public MachineFunctionPass {
public:
static char ID;
@@ -213,6 +223,8 @@ static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
// will be guaranteed to be to a mask == 1 position. Using a macro here to
// avoid 'not used' warnings (and keep cond compilation to a minimum)
#define DecisionName "index_to_evict"
+static const TensorSpec DecisionSpec =
+ TensorSpec::createSpec<int64_t>(DecisionName, {1});
// Named features index.
enum FeatureIDs {
@@ -382,14 +394,21 @@ private:
std::unique_ptr<RegAllocEvictionAdvisor>
getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
- if (!Runner)
- Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
- MF.getFunction().getContext(), InputFeatures, DecisionName);
+ if (!Runner) {
+ if (InteractiveChannelBaseName.empty())
+ Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
+ MF.getFunction().getContext(), InputFeatures, DecisionName);
+ else
+ Runner = std::make_unique<InteractiveModelRunner>(
+ MF.getFunction().getContext(), InputFeatures, DecisionSpec,
+ InteractiveChannelBaseName + ".out",
+ InteractiveChannelBaseName + ".in");
+ }
return std::make_unique<MLEvictAdvisor>(
MF, RA, Runner.get(), getAnalysis<MachineBlockFrequencyInfo>(),
getAnalysis<MachineLoopInfo>());
}
- std::unique_ptr<ReleaseModeModelRunner<CompiledModelType>> Runner;
+ std::unique_ptr<MLModelRunner> Runner;
};
// ===================================
@@ -398,8 +417,6 @@ private:
//
// Features we log
#ifdef LLVM_HAVE_TFLITE
-static const TensorSpec Output =
- TensorSpec::createSpec<int64_t>(DecisionName, {1});
static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
// Features we bind on the model. The tensor names have a prefix, and we also
@@ -458,7 +475,7 @@ public:
void logRewardIfNeeded(const MachineFunction &MF,
llvm::function_ref<float()> GetReward) override {
- if (!Log)
+ if (!Log || !Log->hasAnyObservationForContext(MF.getName()))
return;
// The function pass manager would run all the function passes for a
// function, so we assume the last context belongs to this function. If
@@ -512,7 +529,7 @@ private:
// We always log the output; in particular, if we're not evaluating, we
// don't have an output spec json file. That's why we handle the
// 'normal' output separately.
- LFS.push_back(Output);
+ LFS.push_back(DecisionSpec);
Log = std::make_unique<Logger>(std::move(OS), LFS, Reward,
/*IncludeReward*/ true);
@@ -557,6 +574,7 @@ MLEvictAdvisor::MLEvictAdvisor(const MachineFunction &MF, const RAGreedy &RA,
Runner(std::move(Runner)), MBFI(MBFI), Loops(Loops),
InitialQSize(MLEvictAdvisor::getInitialQueueSize(MF)) {
assert(this->Runner);
+ Runner->switchContext(MF.getName());
DoNotNormalize.set(FeatureIDs::mask);
DoNotNormalize.set(FeatureIDs::is_free);
DoNotNormalize.set(FeatureIDs::is_hint);
@@ -594,8 +612,8 @@ bool MLEvictAdvisor::loadInterferenceFeatures(
unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
SmallVector<const LiveInterval *, MaxInterferences> InterferingIntervals;
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
// Different from the default heuristic, we don't make any assumptions
// about what having more than 10 results in the query may mean.
const auto &IFIntervals = Q.interferingVRegs(EvictInterferenceCutoff);
@@ -1134,7 +1152,10 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
#endif // #ifdef LLVM_HAVE_TFLITE
RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() {
- return new ReleaseModeEvictionAdvisorAnalysis();
+ return llvm::isEmbeddedModelEvaluatorValid<CompiledModelType>() ||
+ !InteractiveChannelBaseName.empty()
+ ? new ReleaseModeEvictionAdvisorAnalysis()
+ : nullptr;
}
// In all cases except development mode, we don't need scoring.
diff --git a/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp
index 320a184bdcc5..422781593a9c 100644
--- a/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegallocPriorityAdvisor.cpp
@@ -14,6 +14,7 @@
#include "RegAllocGreedy.h"
#include "RegAllocPriorityAdvisor.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InteractiveModelRunner.h"
#include "llvm/Analysis/MLModelRunner.h"
#include "llvm/Analysis/ReleaseModeModelRunner.h"
#include "llvm/Analysis/TensorSpec.h"
@@ -40,6 +41,16 @@
using namespace llvm;
+static cl::opt<std::string> InteractiveChannelBaseName(
+ "regalloc-priority-interactive-channel-base", cl::Hidden,
+ cl::desc(
+ "Base file path for the interactive mode. The incoming filename should "
+ "have the name <regalloc-priority-interactive-channel-base>.in, while "
+ "the outgoing name should be "
+ "<regalloc-priority-interactive-channel-base>.out"));
+
+using CompiledModelType = NoopSavedModelImpl;
+
// Options that only make sense in development mode
#ifdef LLVM_HAVE_TFLITE
#include "RegAllocScore.h"
@@ -65,6 +76,9 @@ static const std::vector<int64_t> PerLiveRangeShape{1};
M(float, weight, PerLiveRangeShape, "weight")
#define DecisionName "priority"
+static const TensorSpec DecisionSpec =
+ TensorSpec::createSpec<float>(DecisionName, {1});
+
// Named features index.
enum FeatureIDs {
@@ -125,13 +139,20 @@ private:
std::unique_ptr<RegAllocPriorityAdvisor>
getAdvisor(const MachineFunction &MF, const RAGreedy &RA) override {
- if (!Runner)
- Runner = std::make_unique<ReleaseModeModelRunner<NoopSavedModelImpl>>(
- MF.getFunction().getContext(), InputFeatures, DecisionName);
+ if (!Runner) {
+ if (InteractiveChannelBaseName.empty())
+ Runner = std::make_unique<ReleaseModeModelRunner<CompiledModelType>>(
+ MF.getFunction().getContext(), InputFeatures, DecisionName);
+ else
+ Runner = std::make_unique<InteractiveModelRunner>(
+ MF.getFunction().getContext(), InputFeatures, DecisionSpec,
+ InteractiveChannelBaseName + ".out",
+ InteractiveChannelBaseName + ".in");
+ }
return std::make_unique<MLPriorityAdvisor>(
MF, RA, &getAnalysis<SlotIndexes>(), Runner.get());
}
- std::unique_ptr<ReleaseModeModelRunner<NoopSavedModelImpl>> Runner;
+ std::unique_ptr<MLModelRunner> Runner;
};
// ===================================
@@ -140,9 +161,6 @@ private:
//
// Features we log
#ifdef LLVM_HAVE_TFLITE
-
-static const TensorSpec Output =
- TensorSpec::createSpec<float>(DecisionName, {1});
static const TensorSpec Reward = TensorSpec::createSpec<float>("reward", {1});
#define _DECL_TRAIN_FEATURES(type, name, shape, _) \
@@ -179,7 +197,7 @@ public:
void logRewardIfNeeded(const MachineFunction &MF,
llvm::function_ref<float()> GetReward) override {
- if (!Log)
+ if (!Log || !Log->hasAnyObservationForContext(MF.getName()))
return;
// The function pass manager would run all the function passes for a
// function, so we assume the last context belongs to this function. If
@@ -231,7 +249,7 @@ private:
// We always log the output; in particular, if we're not evaluating, we
// don't have an output spec json file. That's why we handle the
// 'normal' output separately.
- LFS.push_back(Output);
+ LFS.push_back(DecisionSpec);
Log = std::make_unique<Logger>(std::move(OS), LFS, Reward,
/*IncludeReward*/ true);
@@ -258,7 +276,10 @@ private:
} // namespace llvm
RegAllocPriorityAdvisorAnalysis *llvm::createReleaseModePriorityAdvisor() {
- return new ReleaseModePriorityAdvisorAnalysis();
+ return llvm::isEmbeddedModelEvaluatorValid<CompiledModelType>() ||
+ !InteractiveChannelBaseName.empty()
+ ? new ReleaseModePriorityAdvisorAnalysis()
+ : nullptr;
}
MLPriorityAdvisor::MLPriorityAdvisor(const MachineFunction &MF,
@@ -268,6 +289,7 @@ MLPriorityAdvisor::MLPriorityAdvisor(const MachineFunction &MF,
: RegAllocPriorityAdvisor(MF, RA, Indexes), DefaultAdvisor(MF, RA, Indexes),
Runner(std::move(Runner)) {
assert(this->Runner);
+ Runner->switchContext(MF.getName());
}
float MLPriorityAdvisor::getPriorityImpl(const LiveInterval &LI) const {
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 5ef377f2a1c0..231544494c32 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -12,12 +12,14 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
@@ -664,6 +666,15 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
getParent()->splice(++NewBefore->getIterator(), getIterator());
}
+static int findJumpTableIndex(const MachineBasicBlock &MBB) {
+ MachineBasicBlock::const_iterator TerminatorI = MBB.getFirstTerminator();
+ if (TerminatorI == MBB.end())
+ return -1;
+ const MachineInstr &Terminator = *TerminatorI;
+ const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
+ return TII->getJumpTableIndex(Terminator);
+}
+
void MachineBasicBlock::updateTerminator(
MachineBasicBlock *PreviousLayoutSuccessor) {
LLVM_DEBUG(dbgs() << "Updating terminators on " << printMBBReference(*this)
@@ -975,8 +986,8 @@ MachineBasicBlock *MachineBasicBlock::getFallThrough(bool JumpToFallThrough) {
// If there is some explicit branch to the fallthrough block, it can obviously
// reach, even though the branch should get folded to fall through implicitly.
- if (!JumpToFallThrough && (MachineFunction::iterator(TBB) == Fallthrough ||
- MachineFunction::iterator(FBB) == Fallthrough))
+ if (JumpToFallThrough && (MachineFunction::iterator(TBB) == Fallthrough ||
+ MachineFunction::iterator(FBB) == Fallthrough))
return &*Fallthrough;
// If it's an unconditional branch to some block not the fall through, it
@@ -1033,6 +1044,50 @@ MachineBasicBlock *MachineBasicBlock::splitAt(MachineInstr &MI,
return SplitBB;
}
+// Returns `true` if there are possibly other users of the jump table at
+// `JumpTableIndex` except for the ones in `IgnoreMBB`.
+static bool jumpTableHasOtherUses(const MachineFunction &MF,
+ const MachineBasicBlock &IgnoreMBB,
+ int JumpTableIndex) {
+ assert(JumpTableIndex >= 0 && "need valid index");
+ const MachineJumpTableInfo &MJTI = *MF.getJumpTableInfo();
+ const MachineJumpTableEntry &MJTE = MJTI.getJumpTables()[JumpTableIndex];
+ // Take any basic block from the table; every user of the jump table must
+ // show up in the predecessor list.
+ const MachineBasicBlock *MBB = nullptr;
+ for (MachineBasicBlock *B : MJTE.MBBs) {
+ if (B != nullptr) {
+ MBB = B;
+ break;
+ }
+ }
+ if (MBB == nullptr)
+ return true; // can't rule out other users if there isn't any block.
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+ SmallVector<MachineOperand, 4> Cond;
+ for (MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (Pred == &IgnoreMBB)
+ continue;
+ MachineBasicBlock *DummyT = nullptr;
+ MachineBasicBlock *DummyF = nullptr;
+ Cond.clear();
+ if (!TII.analyzeBranch(*Pred, DummyT, DummyF, Cond,
+ /*AllowModify=*/false)) {
+ // analyzable direct jump
+ continue;
+ }
+ int PredJTI = findJumpTableIndex(*Pred);
+ if (PredJTI >= 0) {
+ if (PredJTI == JumpTableIndex)
+ return true;
+ continue;
+ }
+ // Be conservative for unanalyzable jumps.
+ return true;
+ }
+ return false;
+}
+
MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
MachineBasicBlock *Succ, Pass &P,
std::vector<SparseBitVector<>> *LiveInSets) {
@@ -1044,6 +1099,16 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
DebugLoc DL; // FIXME: this is nowhere
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+
+ // Is there an indirect jump with jump table?
+ bool ChangedIndirectJump = false;
+ int JTI = findJumpTableIndex(*this);
+ if (JTI >= 0) {
+ MachineJumpTableInfo &MJTI = *MF->getJumpTableInfo();
+ MJTI.ReplaceMBBInJumpTable(JTI, Succ, NMBB);
+ ChangedIndirectJump = true;
+ }
+
MF->insert(std::next(MachineFunction::iterator(this)), NMBB);
LLVM_DEBUG(dbgs() << "Splitting critical edge: " << printMBBReference(*this)
<< " -- " << printMBBReference(*NMBB) << " -- "
@@ -1066,9 +1131,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
if (LV)
for (MachineInstr &MI :
llvm::make_range(getFirstInstrTerminator(), instr_end())) {
- for (MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse() || !MO.isKill() ||
- MO.isUndef())
+ for (MachineOperand &MO : MI.all_uses()) {
+ if (MO.getReg() == 0 || !MO.isKill() || MO.isUndef())
continue;
Register Reg = MO.getReg();
if (Reg.isPhysical() || LV->getVarInfo(Reg).removeKill(MI)) {
@@ -1109,7 +1173,9 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(
// as the fallthrough successor
if (Succ == PrevFallthrough)
PrevFallthrough = NMBB;
- updateTerminator(PrevFallthrough);
+
+ if (!ChangedIndirectJump)
+ updateTerminator(PrevFallthrough);
if (Indexes) {
SmallVector<MachineInstr*, 4> NewTerminators;
@@ -1284,8 +1350,13 @@ bool MachineBasicBlock::canSplitCriticalEdge(
if (MF->getTarget().requiresStructuredCFG())
return false;
+ // Do we have an Indirect jump with a jumptable that we can rewrite?
+ int JTI = findJumpTableIndex(*this);
+ if (JTI >= 0 && !jumpTableHasOtherUses(*MF, *this, JTI))
+ return true;
+
// We may need to update this's terminator, but we can't do that if
- // analyzeBranch fails. If this uses a jump table, we won't touch it.
+ // analyzeBranch fails.
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
SmallVector<MachineOperand, 4> Cond;
@@ -1391,7 +1462,7 @@ void MachineBasicBlock::replacePhiUsesWith(MachineBasicBlock *Old,
}
}
-/// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE
+/// Find the next valid DebugLoc starting at MBBI, skipping any debug
/// instructions. Return UnknownLoc if there is none.
DebugLoc
MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
@@ -1403,6 +1474,8 @@ MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
}
DebugLoc MachineBasicBlock::rfindDebugLoc(reverse_instr_iterator MBBI) {
+ if (MBBI == instr_rend())
+ return findDebugLoc(instr_begin());
// Skip debug declarations, we don't want a DebugLoc from them.
MBBI = skipDebugInstructionsBackward(MBBI, instr_rbegin());
if (!MBBI->isDebugInstr())
@@ -1410,13 +1483,15 @@ DebugLoc MachineBasicBlock::rfindDebugLoc(reverse_instr_iterator MBBI) {
return {};
}
-/// Find the previous valid DebugLoc preceding MBBI, skipping and DBG_VALUE
+/// Find the previous valid DebugLoc preceding MBBI, skipping any debug
/// instructions. Return UnknownLoc if there is none.
DebugLoc MachineBasicBlock::findPrevDebugLoc(instr_iterator MBBI) {
- if (MBBI == instr_begin()) return {};
+ if (MBBI == instr_begin())
+ return {};
// Skip debug instructions, we don't want a DebugLoc from them.
MBBI = prev_nodbg(MBBI, instr_begin());
- if (!MBBI->isDebugInstr()) return MBBI->getDebugLoc();
+ if (!MBBI->isDebugInstr())
+ return MBBI->getDebugLoc();
return {};
}
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 7bbc347a8cf8..912e9ec993e3 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -213,10 +213,9 @@ static cl::opt<bool> RenumberBlocksBeforeView(
"into a dot graph. Only used when a function is being printed."),
cl::init(false), cl::Hidden);
+namespace llvm {
extern cl::opt<bool> EnableExtTspBlockPlacement;
extern cl::opt<bool> ApplyExtTspWithoutProfile;
-
-namespace llvm {
extern cl::opt<unsigned> StaticLikelyProb;
extern cl::opt<unsigned> ProfileLikelyProb;
@@ -354,15 +353,15 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// Pair struct containing basic block and taildup profitability
struct BlockAndTailDupResult {
- MachineBasicBlock *BB;
+ MachineBasicBlock *BB = nullptr;
bool ShouldTailDup;
};
/// Triple struct containing edge weight and the edge.
struct WeightedEdge {
BlockFrequency Weight;
- MachineBasicBlock *Src;
- MachineBasicBlock *Dest;
+ MachineBasicBlock *Src = nullptr;
+ MachineBasicBlock *Dest = nullptr;
};
/// work lists of blocks that are ready to be laid out
@@ -373,32 +372,32 @@ class MachineBlockPlacement : public MachineFunctionPass {
DenseMap<const MachineBasicBlock *, BlockAndTailDupResult> ComputedEdges;
/// Machine Function
- MachineFunction *F;
+ MachineFunction *F = nullptr;
/// A handle to the branch probability pass.
- const MachineBranchProbabilityInfo *MBPI;
+ const MachineBranchProbabilityInfo *MBPI = nullptr;
/// A handle to the function-wide block frequency pass.
std::unique_ptr<MBFIWrapper> MBFI;
/// A handle to the loop info.
- MachineLoopInfo *MLI;
+ MachineLoopInfo *MLI = nullptr;
/// Preferred loop exit.
/// Member variable for convenience. It may be removed by duplication deep
/// in the call stack.
- MachineBasicBlock *PreferredLoopExit;
+ MachineBasicBlock *PreferredLoopExit = nullptr;
/// A handle to the target's instruction info.
- const TargetInstrInfo *TII;
+ const TargetInstrInfo *TII = nullptr;
/// A handle to the target's lowering info.
- const TargetLoweringBase *TLI;
+ const TargetLoweringBase *TLI = nullptr;
/// A handle to the post dominator tree.
- MachinePostDominatorTree *MPDT;
+ MachinePostDominatorTree *MPDT = nullptr;
- ProfileSummaryInfo *PSI;
+ ProfileSummaryInfo *PSI = nullptr;
/// Duplicator used to duplicate tails during placement.
///
@@ -412,7 +411,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// True: use block profile count to compute tail duplication cost.
/// False: use block frequency to compute tail duplication cost.
- bool UseProfileCount;
+ bool UseProfileCount = false;
/// Allocator and owner of BlockChain structures.
///
@@ -1160,7 +1159,7 @@ bool MachineBlockPlacement::canTailDuplicateUnplacedPreds(
// tail-duplicated into.
// Skip any blocks that are already placed or not in this loop.
if (Pred == BB || (BlockFilter && !BlockFilter->count(Pred))
- || BlockToChain[Pred] == &Chain)
+ || (BlockToChain[Pred] == &Chain && !Succ->succ_empty()))
continue;
if (!TailDup.canTailDuplicate(Succ, Pred)) {
if (Successors.size() > 1 && hasSameSuccessors(*Pred, Successors))
@@ -2018,7 +2017,7 @@ MachineBlockPlacement::FallThroughGains(
for (MachineBasicBlock *Succ : BestPred->successors()) {
if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
continue;
- if (ComputedEdges.find(Succ) != ComputedEdges.end())
+ if (ComputedEdges.contains(Succ))
continue;
BlockChain *SuccChain = BlockToChain[Succ];
if ((SuccChain && (Succ != *SuccChain->begin())) ||
diff --git a/llvm/lib/CodeGen/MachineCSE.cpp b/llvm/lib/CodeGen/MachineCSE.cpp
index cd8644029530..f879c5fcf20c 100644
--- a/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/llvm/lib/CodeGen/MachineCSE.cpp
@@ -68,12 +68,12 @@ static cl::opt<int>
namespace {
class MachineCSE : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- AliasAnalysis *AA;
- MachineDominatorTree *DT;
- MachineRegisterInfo *MRI;
- MachineBlockFrequencyInfo *MBFI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ AliasAnalysis *AA = nullptr;
+ MachineDominatorTree *DT = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
public:
static char ID; // Pass identification
@@ -175,9 +175,7 @@ INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
MachineBasicBlock *MBB) {
bool Changed = false;
- for (MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || !MO.isUse())
- continue;
+ for (MachineOperand &MO : MI->all_uses()) {
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
@@ -291,9 +289,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
PhysDefVector &PhysDefs,
bool &PhysUseDef) const {
// First, add all uses to PhysRefs.
- for (const MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || MO.isDef())
- continue;
+ for (const MachineOperand &MO : MI->all_uses()) {
Register Reg = MO.getReg();
if (!Reg)
continue;
@@ -483,8 +479,8 @@ bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
// Heuristics #2: If the expression doesn't not use a vr and the only use
// of the redundant computation are copies, do not cse.
bool HasVRegUse = false;
- for (const MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual()) {
+ for (const MachineOperand &MO : MI->all_uses()) {
+ if (MO.getReg().isVirtual()) {
HasVRegUse = true;
break;
}
diff --git a/llvm/lib/CodeGen/MachineCheckDebugify.cpp b/llvm/lib/CodeGen/MachineCheckDebugify.cpp
index 1e5b8dd0bbb0..874f726d2947 100644
--- a/llvm/lib/CodeGen/MachineCheckDebugify.cpp
+++ b/llvm/lib/CodeGen/MachineCheckDebugify.cpp
@@ -11,6 +11,7 @@
/// DILocalVariable which mir-debugifiy generated before.
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp
index 974d570ece51..c65937935ed8 100644
--- a/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -63,22 +63,22 @@ static cl::opt<bool> VerifyPatternOrder(
namespace {
class MachineCombiner : public MachineFunctionPass {
- const TargetSubtargetInfo *STI;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ const TargetSubtargetInfo *STI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
MCSchedModel SchedModel;
- MachineRegisterInfo *MRI;
- MachineLoopInfo *MLI; // Current MachineLoopInfo
- MachineTraceMetrics *Traces;
- MachineTraceMetrics::Ensemble *MinInstr;
- MachineBlockFrequencyInfo *MBFI;
- ProfileSummaryInfo *PSI;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineLoopInfo *MLI = nullptr; // Current MachineLoopInfo
+ MachineTraceMetrics *Traces = nullptr;
+ MachineTraceMetrics::Ensemble *TraceEnsemble = nullptr;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
+ ProfileSummaryInfo *PSI = nullptr;
RegisterClassInfo RegClassInfo;
TargetSchedModel TSchedModel;
/// True if optimizing for code size.
- bool OptSize;
+ bool OptSize = false;
public:
static char ID;
@@ -95,7 +95,8 @@ private:
bool isTransientMI(const MachineInstr *MI);
unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineTraceMetrics::Trace BlockTrace);
+ MachineTraceMetrics::Trace BlockTrace,
+ const MachineBasicBlock &MBB);
unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot,
MachineTraceMetrics::Trace BlockTrace);
bool
@@ -148,7 +149,8 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
+MachineInstr *
+MachineCombiner::getOperandDef(const MachineOperand &MO) {
MachineInstr *DefInstr = nullptr;
// We need a virtual register definition.
if (MO.isReg() && MO.getReg().isVirtual())
@@ -207,18 +209,17 @@ bool MachineCombiner::isTransientMI(const MachineInstr *MI) {
unsigned
MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
- MachineTraceMetrics::Trace BlockTrace) {
+ MachineTraceMetrics::Trace BlockTrace,
+ const MachineBasicBlock &MBB) {
SmallVector<unsigned, 16> InstrDepth;
// For each instruction in the new sequence compute the depth based on the
// operands. Use the trace information when possible. For new operands which
// are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
for (auto *InstrPtr : InsInstrs) { // for each Use
unsigned IDepth = 0;
- for (const MachineOperand &MO : InstrPtr->operands()) {
+ for (const MachineOperand &MO : InstrPtr->all_uses()) {
// Check for virtual register operand.
- if (!(MO.isReg() && MO.getReg().isVirtual()))
- continue;
- if (!MO.isUse())
+ if (!MO.getReg().isVirtual())
continue;
unsigned DepthOp = 0;
unsigned LatencyOp = 0;
@@ -237,7 +238,9 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
InstrPtr, UseIdx);
} else {
MachineInstr *DefInstr = getOperandDef(MO);
- if (DefInstr) {
+ if (DefInstr && (TII->getMachineCombinerTraceStrategy() !=
+ MachineTraceStrategy::TS_Local ||
+ DefInstr->getParent() == &MBB)) {
DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth;
if (!isTransientMI(DefInstr))
LatencyOp = TSchedModel.computeOperandLatency(
@@ -267,11 +270,9 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
// Check each definition in NewRoot and compute the latency
unsigned NewRootLatency = 0;
- for (const MachineOperand &MO : NewRoot->operands()) {
+ for (const MachineOperand &MO : NewRoot->all_defs()) {
// Check for virtual register operand.
- if (!(MO.isReg() && MO.getReg().isVirtual()))
- continue;
- if (!MO.isDef())
+ if (!MO.getReg().isVirtual())
continue;
// Get the first instruction that uses MO
MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg());
@@ -374,7 +375,8 @@ bool MachineCombiner::improvesCriticalPathLen(
MachineCombinerPattern Pattern,
bool SlackIsAccurate) {
// Get depth and latency of NewRoot and Root.
- unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace);
+ unsigned NewRootDepth =
+ getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, *MBB);
unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth;
LLVM_DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: "
@@ -399,8 +401,13 @@ bool MachineCombiner::improvesCriticalPathLen(
// Account for the latency of the inserted and deleted instructions by
unsigned NewRootLatency, RootLatency;
- std::tie(NewRootLatency, RootLatency) =
- getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
+ if (TII->accumulateInstrSeqToRootLatency(*Root)) {
+ std::tie(NewRootLatency, RootLatency) =
+ getLatenciesForInstrSequences(*Root, InsInstrs, DelInstrs, BlockTrace);
+ } else {
+ NewRootLatency = TSchedModel.computeInstrLatency(InsInstrs.back());
+ RootLatency = TSchedModel.computeInstrLatency(Root);
+ }
unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
unsigned NewCycleCount = NewRootDepth + NewRootLatency;
@@ -483,20 +490,19 @@ bool MachineCombiner::preservesResourceLen(
/// \param MI current machine instruction
/// \param InsInstrs new instructions to insert in \p MBB
/// \param DelInstrs instruction to delete from \p MBB
-/// \param MinInstr is a pointer to the machine trace information
+/// \param TraceEnsemble is a pointer to the machine trace information
/// \param RegUnits set of live registers, needed to compute instruction depths
/// \param TII is target instruction info, used to call target hook
/// \param Pattern is used to call target hook finalizeInsInstrs
/// \param IncrementalUpdate if true, compute instruction depths incrementally,
/// otherwise invalidate the trace
-static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
- SmallVector<MachineInstr *, 16> InsInstrs,
- SmallVector<MachineInstr *, 16> DelInstrs,
- MachineTraceMetrics::Ensemble *MinInstr,
- SparseSet<LiveRegUnit> &RegUnits,
- const TargetInstrInfo *TII,
- MachineCombinerPattern Pattern,
- bool IncrementalUpdate) {
+static void insertDeleteInstructions(
+ MachineBasicBlock *MBB, MachineInstr &MI,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ MachineTraceMetrics::Ensemble *TraceEnsemble,
+ SparseSet<LiveRegUnit> &RegUnits, const TargetInstrInfo *TII,
+ MachineCombinerPattern Pattern, bool IncrementalUpdate) {
// If we want to fix up some placeholder for some target, do it now.
// We need this because in genAlternativeCodeSequence, we have not decided the
// better pattern InsInstrs or DelInstrs, so we don't want generate some
@@ -522,9 +528,9 @@ static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI,
if (IncrementalUpdate)
for (auto *InstrPtr : InsInstrs)
- MinInstr->updateDepth(MBB, *InstrPtr, RegUnits);
+ TraceEnsemble->updateDepth(MBB, *InstrPtr, RegUnits);
else
- MinInstr->invalidate(MBB);
+ TraceEnsemble->invalidate(MBB);
NumInstCombined++;
}
@@ -550,7 +556,7 @@ void MachineCombiner::verifyPatternOrder(
unsigned NewRootLatency, RootLatency;
std::tie(NewRootLatency, RootLatency) = getLatenciesForInstrSequences(
- Root, InsInstrs, DelInstrs, MinInstr->getTrace(MBB));
+ Root, InsInstrs, DelInstrs, TraceEnsemble->getTrace(MBB));
long CurrentLatencyDiff = ((long)RootLatency) - ((long)NewRootLatency);
assert(CurrentLatencyDiff <= PrevLatencyDiff &&
"Current pattern is better than previous pattern.");
@@ -574,8 +580,8 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
decltype(BlockIter) LastUpdate;
// Check if the block is in a loop.
const MachineLoop *ML = MLI->getLoopFor(MBB);
- if (!MinInstr)
- MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+ if (!TraceEnsemble)
+ TraceEnsemble = Traces->getEnsemble(TII->getMachineCombinerTraceStrategy());
SparseSet<LiveRegUnit> RegUnits;
RegUnits.setUniverse(TRI->getNumRegUnits());
@@ -647,7 +653,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (IncrementalUpdate && LastUpdate != BlockIter) {
// Update depths since the last incremental update.
- MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits);
+ TraceEnsemble->updateDepths(LastUpdate, BlockIter, RegUnits);
LastUpdate = BlockIter;
}
@@ -661,7 +667,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
}
if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) {
// Replace DelInstrs with InsInstrs.
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
RegUnits, TII, P, IncrementalUpdate);
Changed |= true;
@@ -674,7 +680,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
if (ML && TII->isThroughputPattern(P)) {
LLVM_DEBUG(dbgs() << "\t Replacing due to throughput pattern in loop\n");
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
RegUnits, TII, P, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
Changed = true;
@@ -683,7 +689,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "\t Replacing due to OptForSize ("
<< InsInstrs.size() << " < "
<< DelInstrs.size() << ")\n");
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
RegUnits, TII, P, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
Changed = true;
@@ -694,7 +700,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
// instruction depths incrementally.
// NOTE: Only the instruction depths up to MI are accurate. All other
// trace information is not updated.
- MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB);
+ MachineTraceMetrics::Trace BlockTrace = TraceEnsemble->getTrace(MBB);
Traces->verifyAnalysis();
if (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, DelInstrs,
InstrIdxForVirtReg, P,
@@ -706,7 +712,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
LastUpdate = BlockIter;
}
- insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr,
+ insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, TraceEnsemble,
RegUnits, TII, P, IncrementalUpdate);
// Eagerly stop after the first pattern fires.
@@ -741,7 +747,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
MBFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
nullptr;
- MinInstr = nullptr;
+ TraceEnsemble = nullptr;
OptSize = MF.getFunction().hasOptSize();
RegClassInfo.runOnMachineFunction(MF);
diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 871824553aa4..3453e6c0b8be 100644
--- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -80,11 +80,15 @@ using namespace llvm;
STATISTIC(NumDeletes, "Number of dead copies deleted");
STATISTIC(NumCopyForwards, "Number of copy uses forwarded");
STATISTIC(NumCopyBackwardPropagated, "Number of copy defs backward propagated");
+STATISTIC(SpillageChainsLength, "Length of spillage chains");
+STATISTIC(NumSpillageChains, "Number of spillage chains");
DEBUG_COUNTER(FwdCounter, "machine-cp-fwd",
"Controls which register COPYs are forwarded");
static cl::opt<bool> MCPUseCopyInstr("mcp-use-is-copy-instr", cl::init(false),
cl::Hidden);
+static cl::opt<cl::boolOrDefault>
+ EnableSpillageCopyElimination("enable-spill-copy-elim", cl::Hidden);
namespace {
@@ -103,7 +107,7 @@ static std::optional<DestSourcePair> isCopyInstr(const MachineInstr &MI,
class CopyTracker {
struct CopyInfo {
- MachineInstr *MI;
+ MachineInstr *MI, *LastSeenUseInCopy;
SmallVector<MCRegister, 4> DefRegs;
bool Avail;
};
@@ -117,8 +121,8 @@ public:
const TargetRegisterInfo &TRI) {
for (MCRegister Reg : Regs) {
// Source of copy is no longer available for propagation.
- for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
- auto CI = Copies.find(*RUI);
+ for (MCRegUnit Unit : TRI.regunits(Reg)) {
+ auto CI = Copies.find(Unit);
if (CI != Copies.end())
CI->second.Avail = false;
}
@@ -133,8 +137,8 @@ public:
// and invalidate all of them.
SmallSet<MCRegister, 8> RegsToInvalidate;
RegsToInvalidate.insert(Reg);
- for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
- auto I = Copies.find(*RUI);
+ for (MCRegUnit Unit : TRI.regunits(Reg)) {
+ auto I = Copies.find(Unit);
if (I != Copies.end()) {
if (MachineInstr *MI = I->second.MI) {
std::optional<DestSourcePair> CopyOperands =
@@ -150,15 +154,15 @@ public:
}
}
for (MCRegister InvalidReg : RegsToInvalidate)
- for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI)
- Copies.erase(*RUI);
+ for (MCRegUnit Unit : TRI.regunits(InvalidReg))
+ Copies.erase(Unit);
}
/// Clobber a single register, removing it from the tracker's copy maps.
void clobberRegister(MCRegister Reg, const TargetRegisterInfo &TRI,
const TargetInstrInfo &TII, bool UseCopyInstr) {
- for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) {
- auto I = Copies.find(*RUI);
+ for (MCRegUnit Unit : TRI.regunits(Reg)) {
+ auto I = Copies.find(Unit);
if (I != Copies.end()) {
// When we clobber the source of a copy, we need to clobber everything
// it defined.
@@ -188,16 +192,17 @@ public:
MCRegister Def = CopyOperands->Destination->getReg().asMCReg();
// Remember Def is defined by the copy.
- for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI)
- Copies[*RUI] = {MI, {}, true};
+ for (MCRegUnit Unit : TRI.regunits(Def))
+ Copies[Unit] = {MI, nullptr, {}, true};
// Remember source that's copied to Def. Once it's clobbered, then
// it's no longer available for copy propagation.
- for (MCRegUnitIterator RUI(Src, &TRI); RUI.isValid(); ++RUI) {
- auto I = Copies.insert({*RUI, {nullptr, {}, false}});
+ for (MCRegUnit Unit : TRI.regunits(Src)) {
+ auto I = Copies.insert({Unit, {nullptr, nullptr, {}, false}});
auto &Copy = I.first->second;
if (!is_contained(Copy.DefRegs, Def))
Copy.DefRegs.push_back(Def);
+ Copy.LastSeenUseInCopy = MI;
}
}
@@ -223,16 +228,16 @@ public:
return nullptr;
if (CI->second.DefRegs.size() != 1)
return nullptr;
- MCRegUnitIterator RUI(CI->second.DefRegs[0], &TRI);
- return findCopyForUnit(*RUI, TRI, true);
+ MCRegUnit RU = *TRI.regunits(CI->second.DefRegs[0]).begin();
+ return findCopyForUnit(RU, TRI, true);
}
MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg,
const TargetRegisterInfo &TRI,
const TargetInstrInfo &TII,
bool UseCopyInstr) {
- MCRegUnitIterator RUI(Reg, &TRI);
- MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI);
+ MCRegUnit RU = *TRI.regunits(Reg).begin();
+ MachineInstr *AvailCopy = findCopyDefViaUnit(RU, TRI);
if (!AvailCopy)
return nullptr;
@@ -260,9 +265,9 @@ public:
const TargetInstrInfo &TII, bool UseCopyInstr) {
// We check the first RegUnit here, since we'll only be interested in the
// copy if it copies the entire register anyway.
- MCRegUnitIterator RUI(Reg, &TRI);
+ MCRegUnit RU = *TRI.regunits(Reg).begin();
MachineInstr *AvailCopy =
- findCopyForUnit(*RUI, TRI, /*MustBeAvailable=*/true);
+ findCopyForUnit(RU, TRI, /*MustBeAvailable=*/true);
if (!AvailCopy)
return nullptr;
@@ -286,15 +291,57 @@ public:
return AvailCopy;
}
+ // Find last COPY that defines Reg before Current MachineInstr.
+ MachineInstr *findLastSeenDefInCopy(const MachineInstr &Current,
+ MCRegister Reg,
+ const TargetRegisterInfo &TRI,
+ const TargetInstrInfo &TII,
+ bool UseCopyInstr) {
+ MCRegUnit RU = *TRI.regunits(Reg).begin();
+ auto CI = Copies.find(RU);
+ if (CI == Copies.end() || !CI->second.Avail)
+ return nullptr;
+
+ MachineInstr *DefCopy = CI->second.MI;
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(*DefCopy, TII, UseCopyInstr);
+ Register Def = CopyOperands->Destination->getReg();
+ if (!TRI.isSubRegisterEq(Def, Reg))
+ return nullptr;
+
+ for (const MachineInstr &MI :
+ make_range(static_cast<const MachineInstr *>(DefCopy)->getIterator(),
+ Current.getIterator()))
+ for (const MachineOperand &MO : MI.operands())
+ if (MO.isRegMask())
+ if (MO.clobbersPhysReg(Def)) {
+ LLVM_DEBUG(dbgs() << "MCP: Removed tracking of "
+ << printReg(Def, &TRI) << "\n");
+ return nullptr;
+ }
+
+ return DefCopy;
+ }
+
+ // Find last COPY that uses Reg.
+ MachineInstr *findLastSeenUseInCopy(MCRegister Reg,
+ const TargetRegisterInfo &TRI) {
+ MCRegUnit RU = *TRI.regunits(Reg).begin();
+ auto CI = Copies.find(RU);
+ if (CI == Copies.end())
+ return nullptr;
+ return CI->second.LastSeenUseInCopy;
+ }
+
void clear() {
Copies.clear();
}
};
class MachineCopyPropagation : public MachineFunctionPass {
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
- const MachineRegisterInfo *MRI;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
// Return true if this is a copy instruction and false otherwise.
bool UseCopyInstr;
@@ -325,6 +372,7 @@ private:
void ReadRegister(MCRegister Reg, MachineInstr &Reader, DebugType DT);
void ForwardCopyPropagateBlock(MachineBasicBlock &MBB);
void BackwardCopyPropagateBlock(MachineBasicBlock &MBB);
+ void EliminateSpillageCopies(MachineBasicBlock &MBB);
bool eraseIfRedundant(MachineInstr &Copy, MCRegister Src, MCRegister Def);
void forwardUses(MachineInstr &MI);
void propagateDefs(MachineInstr &MI);
@@ -345,7 +393,7 @@ private:
CopyTracker Tracker;
- bool Changed;
+ bool Changed = false;
};
} // end anonymous namespace
@@ -362,8 +410,8 @@ void MachineCopyPropagation::ReadRegister(MCRegister Reg, MachineInstr &Reader,
// If 'Reg' is defined by a copy, the copy is no longer a candidate
// for elimination. If a copy is "read" by a debug user, record the user
// for propagation.
- for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
- if (MachineInstr *Copy = Tracker.findCopyForUnit(*RUI, *TRI)) {
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI)) {
if (DT == RegularUse) {
LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump());
MaybeDeadCopies.remove(Copy);
@@ -433,6 +481,12 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy,
make_range(PrevCopy->getIterator(), Copy.getIterator()))
MI.clearRegisterKills(CopyDef, TRI);
+ // Clear undef flag from remaining copy if needed.
+ if (!CopyOperands->Source->isUndef()) {
+ PrevCopy->getOperand(PrevCopyOperands->Source->getOperandNo())
+ .setIsUndef(false);
+ }
+
Copy.eraseFromParent();
Changed = true;
++NumDeletes;
@@ -595,12 +649,19 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
const MachineOperand &CopySrc = *CopyOperands->Source;
Register CopySrcReg = CopySrc.getReg();
- // FIXME: Don't handle partial uses of wider COPYs yet.
+ Register ForwardedReg = CopySrcReg;
+ // MI might use a sub-register of the Copy destination, in which case the
+ // forwarded register is the matching sub-register of the Copy source.
if (MOUse.getReg() != CopyDstReg) {
- LLVM_DEBUG(
- dbgs() << "MCP: FIXME! Not forwarding COPY to sub-register use:\n "
- << MI);
- continue;
+ unsigned SubRegIdx = TRI->getSubRegIndex(CopyDstReg, MOUse.getReg());
+ assert(SubRegIdx &&
+ "MI source is not a sub-register of Copy destination");
+ ForwardedReg = TRI->getSubReg(CopySrcReg, SubRegIdx);
+ if (!ForwardedReg) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy source does not have sub-register "
+ << TRI->getSubRegIndexName(SubRegIdx) << '\n');
+ continue;
+ }
}
// Don't forward COPYs of reserved regs unless they are constant.
@@ -630,10 +691,11 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
}
LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI)
- << "\n with " << printReg(CopySrcReg, TRI)
+ << "\n with " << printReg(ForwardedReg, TRI)
<< "\n in " << MI << " from " << *Copy);
- MOUse.setReg(CopySrcReg);
+ MOUse.setReg(ForwardedReg);
+
if (!CopySrc.isRenamable())
MOUse.setIsRenamable(false);
MOUse.setIsUndef(CopySrc.isUndef());
@@ -844,16 +906,11 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) {
Tracker.clear();
}
-static bool isBackwardPropagatableCopy(MachineInstr &MI,
+static bool isBackwardPropagatableCopy(const DestSourcePair &CopyOperands,
const MachineRegisterInfo &MRI,
- const TargetInstrInfo &TII,
- bool UseCopyInstr) {
- std::optional<DestSourcePair> CopyOperands =
- isCopyInstr(MI, TII, UseCopyInstr);
- assert(CopyOperands && "MI is expected to be a COPY");
-
- Register Def = CopyOperands->Destination->getReg();
- Register Src = CopyOperands->Source->getReg();
+ const TargetInstrInfo &TII) {
+ Register Def = CopyOperands.Destination->getReg();
+ Register Src = CopyOperands.Source->getReg();
if (!Def || !Src)
return false;
@@ -861,7 +918,7 @@ static bool isBackwardPropagatableCopy(MachineInstr &MI,
if (MRI.isReserved(Def) || MRI.isReserved(Src))
return false;
- return CopyOperands->Source->isRenamable() && CopyOperands->Source->isKill();
+ return CopyOperands.Source->isRenamable() && CopyOperands.Source->isKill();
}
void MachineCopyPropagation::propagateDefs(MachineInstr &MI) {
@@ -936,14 +993,13 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
Register SrcReg = CopyOperands->Source->getReg();
if (!TRI->regsOverlap(DefReg, SrcReg)) {
- MCRegister Def = DefReg.asMCReg();
- MCRegister Src = SrcReg.asMCReg();
-
// Unlike forward cp, we don't invoke propagateDefs here,
// just let forward cp do COPY-to-COPY propagation.
- if (isBackwardPropagatableCopy(MI, *MRI, *TII, UseCopyInstr)) {
- Tracker.invalidateRegister(Src, *TRI, *TII, UseCopyInstr);
- Tracker.invalidateRegister(Def, *TRI, *TII, UseCopyInstr);
+ if (isBackwardPropagatableCopy(*CopyOperands, *MRI, *TII)) {
+ Tracker.invalidateRegister(SrcReg.asMCReg(), *TRI, *TII,
+ UseCopyInstr);
+ Tracker.invalidateRegister(DefReg.asMCReg(), *TRI, *TII,
+ UseCopyInstr);
Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr);
continue;
}
@@ -976,9 +1032,8 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
// Check if the register in the debug instruction is utilized
// in a copy instruction, so we can update the debug info if the
// register is changed.
- for (MCRegUnitIterator RUI(MO.getReg().asMCReg(), TRI); RUI.isValid();
- ++RUI) {
- if (auto *Copy = Tracker.findCopyDefViaUnit(*RUI, *TRI)) {
+ for (MCRegUnit Unit : TRI->regunits(MO.getReg().asMCReg())) {
+ if (auto *Copy = Tracker.findCopyDefViaUnit(Unit, *TRI)) {
CopyDbgUsers[Copy].insert(&MI);
}
}
@@ -1008,10 +1063,345 @@ void MachineCopyPropagation::BackwardCopyPropagateBlock(
Tracker.clear();
}
+static void LLVM_ATTRIBUTE_UNUSED printSpillReloadChain(
+ DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &SpillChain,
+ DenseMap<MachineInstr *, SmallVector<MachineInstr *>> &ReloadChain,
+ MachineInstr *Leader) {
+ auto &SC = SpillChain[Leader];
+ auto &RC = ReloadChain[Leader];
+ for (auto I = SC.rbegin(), E = SC.rend(); I != E; ++I)
+ (*I)->dump();
+ for (MachineInstr *MI : RC)
+ MI->dump();
+}
+
+// Remove spill-reload like copy chains. For example
+// r0 = COPY r1
+// r1 = COPY r2
+// r2 = COPY r3
+// r3 = COPY r4
+// <def-use r4>
+// r4 = COPY r3
+// r3 = COPY r2
+// r2 = COPY r1
+// r1 = COPY r0
+// will be folded into
+// r0 = COPY r1
+// r1 = COPY r4
+// <def-use r4>
+// r4 = COPY r1
+// r1 = COPY r0
+// TODO: Currently we don't track usage of r0 outside the chain, so we
+// conservatively keep its value as it was before the rewrite.
+//
+// The algorithm is trying to keep
+// property#1: No Def of spill COPY in the chain is used or defined until the
+// paired reload COPY in the chain uses the Def.
+//
+// property#2: NO Source of COPY in the chain is used or defined until the next
+// COPY in the chain defines the Source, except the innermost spill-reload
+// pair.
+//
+// The algorithm is conducted by checking every COPY inside the MBB, assuming
+// the COPY is a reload COPY, then try to find paired spill COPY by searching
+// the COPY defines the Src of the reload COPY backward. If such pair is found,
+// it either belongs to an existing chain or a new chain depends on
+// last available COPY uses the Def of the reload COPY.
+// Implementation notes, we use CopyTracker::findLastDefCopy(Reg, ...) to find
+// out last COPY that defines Reg; we use CopyTracker::findLastUseCopy(Reg, ...)
+// to find out last COPY that uses Reg. When we are encountered with a Non-COPY
+// instruction, we check registers in the operands of this instruction. If this
+// Reg is defined by a COPY, we untrack this Reg via
+// CopyTracker::clobberRegister(Reg, ...).
+void MachineCopyPropagation::EliminateSpillageCopies(MachineBasicBlock &MBB) {
+ // ChainLeader maps MI inside a spill-reload chain to its innermost reload COPY.
+ // Thus we can track if a MI belongs to an existing spill-reload chain.
+ DenseMap<MachineInstr *, MachineInstr *> ChainLeader;
+ // SpillChain maps innermost reload COPY of a spill-reload chain to a sequence
+ // of COPYs that forms spills of a spill-reload chain.
+ // ReloadChain maps innermost reload COPY of a spill-reload chain to a
+ // sequence of COPYs that forms reloads of a spill-reload chain.
+ DenseMap<MachineInstr *, SmallVector<MachineInstr *>> SpillChain, ReloadChain;
+ // If a COPY's Source has use or def until next COPY defines the Source,
+ // we put the COPY in this set to keep property#2.
+ DenseSet<const MachineInstr *> CopySourceInvalid;
+
+ auto TryFoldSpillageCopies =
+ [&, this](const SmallVectorImpl<MachineInstr *> &SC,
+ const SmallVectorImpl<MachineInstr *> &RC) {
+ assert(SC.size() == RC.size() && "Spill-reload should be paired");
+
+ // We need at least 3 pairs of copies for the transformation to apply,
+ // because the first outermost pair cannot be removed since we don't
+ // recolor outside of the chain and that we need at least one temporary
+ // spill slot to shorten the chain. If we only have a chain of two
+ // pairs, we already have the shortest sequence this code can handle:
+ // the outermost pair for the temporary spill slot, and the pair that
+ // use that temporary spill slot for the other end of the chain.
+ // TODO: We might be able to simplify to one spill-reload pair if collecting
+ // more infomation about the outermost COPY.
+ if (SC.size() <= 2)
+ return;
+
+ // If violate property#2, we don't fold the chain.
+ for (const MachineInstr *Spill : make_range(SC.begin() + 1, SC.end()))
+ if (CopySourceInvalid.count(Spill))
+ return;
+
+ for (const MachineInstr *Reload : make_range(RC.begin(), RC.end() - 1))
+ if (CopySourceInvalid.count(Reload))
+ return;
+
+ auto CheckCopyConstraint = [this](Register Def, Register Src) {
+ for (const TargetRegisterClass *RC : TRI->regclasses()) {
+ if (RC->contains(Def) && RC->contains(Src))
+ return true;
+ }
+ return false;
+ };
+
+ auto UpdateReg = [](MachineInstr *MI, const MachineOperand *Old,
+ const MachineOperand *New) {
+ for (MachineOperand &MO : MI->operands()) {
+ if (&MO == Old)
+ MO.setReg(New->getReg());
+ }
+ };
+
+ std::optional<DestSourcePair> InnerMostSpillCopy =
+ isCopyInstr(*SC[0], *TII, UseCopyInstr);
+ std::optional<DestSourcePair> OuterMostSpillCopy =
+ isCopyInstr(*SC.back(), *TII, UseCopyInstr);
+ std::optional<DestSourcePair> InnerMostReloadCopy =
+ isCopyInstr(*RC[0], *TII, UseCopyInstr);
+ std::optional<DestSourcePair> OuterMostReloadCopy =
+ isCopyInstr(*RC.back(), *TII, UseCopyInstr);
+ if (!CheckCopyConstraint(OuterMostSpillCopy->Source->getReg(),
+ InnerMostSpillCopy->Source->getReg()) ||
+ !CheckCopyConstraint(InnerMostReloadCopy->Destination->getReg(),
+ OuterMostReloadCopy->Destination->getReg()))
+ return;
+
+ SpillageChainsLength += SC.size() + RC.size();
+ NumSpillageChains += 1;
+ UpdateReg(SC[0], InnerMostSpillCopy->Destination,
+ OuterMostSpillCopy->Source);
+ UpdateReg(RC[0], InnerMostReloadCopy->Source,
+ OuterMostReloadCopy->Destination);
+
+ for (size_t I = 1; I < SC.size() - 1; ++I) {
+ SC[I]->eraseFromParent();
+ RC[I]->eraseFromParent();
+ NumDeletes += 2;
+ }
+ };
+
+ auto IsFoldableCopy = [this](const MachineInstr &MaybeCopy) {
+ if (MaybeCopy.getNumImplicitOperands() > 0)
+ return false;
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(MaybeCopy, *TII, UseCopyInstr);
+ if (!CopyOperands)
+ return false;
+ Register Src = CopyOperands->Source->getReg();
+ Register Def = CopyOperands->Destination->getReg();
+ return Src && Def && !TRI->regsOverlap(Src, Def) &&
+ CopyOperands->Source->isRenamable() &&
+ CopyOperands->Destination->isRenamable();
+ };
+
+ auto IsSpillReloadPair = [&, this](const MachineInstr &Spill,
+ const MachineInstr &Reload) {
+ if (!IsFoldableCopy(Spill) || !IsFoldableCopy(Reload))
+ return false;
+ std::optional<DestSourcePair> SpillCopy =
+ isCopyInstr(Spill, *TII, UseCopyInstr);
+ std::optional<DestSourcePair> ReloadCopy =
+ isCopyInstr(Reload, *TII, UseCopyInstr);
+ if (!SpillCopy || !ReloadCopy)
+ return false;
+ return SpillCopy->Source->getReg() == ReloadCopy->Destination->getReg() &&
+ SpillCopy->Destination->getReg() == ReloadCopy->Source->getReg();
+ };
+
+ auto IsChainedCopy = [&, this](const MachineInstr &Prev,
+ const MachineInstr &Current) {
+ if (!IsFoldableCopy(Prev) || !IsFoldableCopy(Current))
+ return false;
+ std::optional<DestSourcePair> PrevCopy =
+ isCopyInstr(Prev, *TII, UseCopyInstr);
+ std::optional<DestSourcePair> CurrentCopy =
+ isCopyInstr(Current, *TII, UseCopyInstr);
+ if (!PrevCopy || !CurrentCopy)
+ return false;
+ return PrevCopy->Source->getReg() == CurrentCopy->Destination->getReg();
+ };
+
+ for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
+ std::optional<DestSourcePair> CopyOperands =
+ isCopyInstr(MI, *TII, UseCopyInstr);
+
+ // Update track information via non-copy instruction.
+ SmallSet<Register, 8> RegsToClobber;
+ if (!CopyOperands) {
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ MachineInstr *LastUseCopy =
+ Tracker.findLastSeenUseInCopy(Reg.asMCReg(), *TRI);
+ if (LastUseCopy) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy source of\n");
+ LLVM_DEBUG(LastUseCopy->dump());
+ LLVM_DEBUG(dbgs() << "might be invalidated by\n");
+ LLVM_DEBUG(MI.dump());
+ CopySourceInvalid.insert(LastUseCopy);
+ }
+ // Must be noted Tracker.clobberRegister(Reg, ...) removes tracking of
+ // Reg, i.e, COPY that defines Reg is removed from the mapping as well
+ // as marking COPYs that uses Reg unavailable.
+ // We don't invoke CopyTracker::clobberRegister(Reg, ...) if Reg is not
+ // defined by a previous COPY, since we don't want to make COPYs uses
+ // Reg unavailable.
+ if (Tracker.findLastSeenDefInCopy(MI, Reg.asMCReg(), *TRI, *TII,
+ UseCopyInstr))
+ // Thus we can keep the property#1.
+ RegsToClobber.insert(Reg);
+ }
+ for (Register Reg : RegsToClobber) {
+ Tracker.clobberRegister(Reg, *TRI, *TII, UseCopyInstr);
+ LLVM_DEBUG(dbgs() << "MCP: Removed tracking of " << printReg(Reg, TRI)
+ << "\n");
+ }
+ continue;
+ }
+
+ Register Src = CopyOperands->Source->getReg();
+ Register Def = CopyOperands->Destination->getReg();
+ // Check if we can find a pair spill-reload copy.
+ LLVM_DEBUG(dbgs() << "MCP: Searching paired spill for reload: ");
+ LLVM_DEBUG(MI.dump());
+ MachineInstr *MaybeSpill =
+ Tracker.findLastSeenDefInCopy(MI, Src.asMCReg(), *TRI, *TII, UseCopyInstr);
+ bool MaybeSpillIsChained = ChainLeader.count(MaybeSpill);
+ if (!MaybeSpillIsChained && MaybeSpill &&
+ IsSpillReloadPair(*MaybeSpill, MI)) {
+ // Check if we already have an existing chain. Now we have a
+ // spill-reload pair.
+ // L2: r2 = COPY r3
+ // L5: r3 = COPY r2
+ // Looking for a valid COPY before L5 which uses r3.
+ // This can be serverial cases.
+ // Case #1:
+ // No COPY is found, which can be r3 is def-use between (L2, L5), we
+ // create a new chain for L2 and L5.
+ // Case #2:
+ // L2: r2 = COPY r3
+ // L5: r3 = COPY r2
+ // Such COPY is found and is L2, we create a new chain for L2 and L5.
+ // Case #3:
+ // L2: r2 = COPY r3
+ // L3: r1 = COPY r3
+ // L5: r3 = COPY r2
+ // we create a new chain for L2 and L5.
+ // Case #4:
+ // L2: r2 = COPY r3
+ // L3: r1 = COPY r3
+ // L4: r3 = COPY r1
+ // L5: r3 = COPY r2
+ // Such COPY won't be found since L4 defines r3. we create a new chain
+ // for L2 and L5.
+ // Case #5:
+ // L2: r2 = COPY r3
+ // L3: r3 = COPY r1
+ // L4: r1 = COPY r3
+ // L5: r3 = COPY r2
+ // COPY is found and is L4 which belongs to an existing chain, we add
+ // L2 and L5 to this chain.
+ LLVM_DEBUG(dbgs() << "MCP: Found spill: ");
+ LLVM_DEBUG(MaybeSpill->dump());
+ MachineInstr *MaybePrevReload =
+ Tracker.findLastSeenUseInCopy(Def.asMCReg(), *TRI);
+ auto Leader = ChainLeader.find(MaybePrevReload);
+ MachineInstr *L = nullptr;
+ if (Leader == ChainLeader.end() ||
+ (MaybePrevReload && !IsChainedCopy(*MaybePrevReload, MI))) {
+ L = &MI;
+ assert(!SpillChain.count(L) &&
+ "SpillChain should not have contained newly found chain");
+ } else {
+ assert(MaybePrevReload &&
+ "Found a valid leader through nullptr should not happend");
+ L = Leader->second;
+ assert(SpillChain[L].size() > 0 &&
+ "Existing chain's length should be larger than zero");
+ }
+ assert(!ChainLeader.count(&MI) && !ChainLeader.count(MaybeSpill) &&
+ "Newly found paired spill-reload should not belong to any chain "
+ "at this point");
+ ChainLeader.insert({MaybeSpill, L});
+ ChainLeader.insert({&MI, L});
+ SpillChain[L].push_back(MaybeSpill);
+ ReloadChain[L].push_back(&MI);
+ LLVM_DEBUG(dbgs() << "MCP: Chain " << L << " now is:\n");
+ LLVM_DEBUG(printSpillReloadChain(SpillChain, ReloadChain, L));
+ } else if (MaybeSpill && !MaybeSpillIsChained) {
+ // MaybeSpill is unable to pair with MI. That's to say adding MI makes
+ // the chain invalid.
+ // The COPY defines Src is no longer considered as a candidate of a
+ // valid chain. Since we expect the Def of a spill copy isn't used by
+ // any COPY instruction until a reload copy. For example:
+ // L1: r1 = COPY r2
+ // L2: r3 = COPY r1
+ // If we later have
+ // L1: r1 = COPY r2
+ // L2: r3 = COPY r1
+ // L3: r2 = COPY r1
+ // L1 and L3 can't be a valid spill-reload pair.
+ // Thus we keep the property#1.
+ LLVM_DEBUG(dbgs() << "MCP: Not paired spill-reload:\n");
+ LLVM_DEBUG(MaybeSpill->dump());
+ LLVM_DEBUG(MI.dump());
+ Tracker.clobberRegister(Src.asMCReg(), *TRI, *TII, UseCopyInstr);
+ LLVM_DEBUG(dbgs() << "MCP: Removed tracking of " << printReg(Src, TRI)
+ << "\n");
+ }
+ Tracker.trackCopy(&MI, *TRI, *TII, UseCopyInstr);
+ }
+
+ for (auto I = SpillChain.begin(), E = SpillChain.end(); I != E; ++I) {
+ auto &SC = I->second;
+ assert(ReloadChain.count(I->first) &&
+ "Reload chain of the same leader should exist");
+ auto &RC = ReloadChain[I->first];
+ TryFoldSpillageCopies(SC, RC);
+ }
+
+ MaybeDeadCopies.clear();
+ CopyDbgUsers.clear();
+ Tracker.clear();
+}
+
bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
+ bool isSpillageCopyElimEnabled = false;
+ switch (EnableSpillageCopyElimination) {
+ case cl::BOU_UNSET:
+ isSpillageCopyElimEnabled =
+ MF.getSubtarget().enableSpillageCopyElimination();
+ break;
+ case cl::BOU_TRUE:
+ isSpillageCopyElimEnabled = true;
+ break;
+ case cl::BOU_FALSE:
+ isSpillageCopyElimEnabled = false;
+ break;
+ }
+
Changed = false;
TRI = MF.getSubtarget().getRegisterInfo();
@@ -1019,6 +1409,8 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
for (MachineBasicBlock &MBB : MF) {
+ if (isSpillageCopyElimEnabled)
+ EliminateSpillageCopies(MBB);
BackwardCopyPropagateBlock(MBB);
ForwardCopyPropagateBlock(MBB);
}
diff --git a/llvm/lib/CodeGen/MachineDebugify.cpp b/llvm/lib/CodeGen/MachineDebugify.cpp
index adf1b51a950d..c264e199cf47 100644
--- a/llvm/lib/CodeGen/MachineDebugify.cpp
+++ b/llvm/lib/CodeGen/MachineDebugify.cpp
@@ -116,8 +116,8 @@ bool applyDebugifyMetadataToMachineFunction(MachineModuleInfo &MMI,
// Emit DBG_VALUEs for register definitions.
SmallVector<MachineOperand *, 4> RegDefs;
- for (MachineOperand &MO : MI.operands())
- if (MO.isReg() && MO.isDef() && MO.getReg())
+ for (MachineOperand &MO : MI.all_defs())
+ if (MO.getReg())
RegDefs.push_back(&MO);
for (MachineOperand *MO : RegDefs)
BuildMI(MBB, InsertBeforeIt, MI.getDebugLoc(), DbgValDesc,
diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp
index daf6a218165d..280d3a6a41ed 100644
--- a/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -128,8 +128,8 @@ BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const {
// Saved CSRs are not pristine.
for (const auto &I : getCalleeSavedInfo())
- for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S)
- BV.reset(*S);
+ for (MCPhysReg S : TRI->subregs_inclusive(I.getReg()))
+ BV.reset(S);
return BV;
}
diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp
index 59e6647fa643..88939e96e07f 100644
--- a/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/llvm/lib/CodeGen/MachineFunction.cpp
@@ -22,7 +22,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -45,6 +45,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Instruction.h"
@@ -119,7 +120,7 @@ void setUnsafeStackSize(const Function &F, MachineFrameInfo &FrameInfo) {
auto *MetadataName = "unsafe-stack-size";
if (auto &N = Existing->getOperand(0)) {
- if (cast<MDString>(N.get())->getString() == MetadataName) {
+ if (N.equalsStr(MetadataName)) {
if (auto &Op = Existing->getOperand(1)) {
auto Val = mdconst::extract<ConstantInt>(Op)->getZExtValue();
FrameInfo.setUnsafeStackSize(Val);
@@ -211,6 +212,14 @@ void MachineFunction::init() {
Alignment = std::max(Alignment,
STI->getTargetLowering()->getPrefFunctionAlignment());
+ // -fsanitize=function and -fsanitize=kcfi instrument indirect function calls
+ // to load a type hash before the function label. Ensure functions are aligned
+ // by a least 4 to avoid unaligned access, which is especially important for
+ // -mno-unaligned-access.
+ if (F.hasMetadata(LLVMContext::MD_func_sanitize) ||
+ F.getMetadata(LLVMContext::MD_kcfi_type))
+ Alignment = std::max(Alignment, Align(4));
+
if (AlignAllFunctions)
Alignment = Align(1ULL << AlignAllFunctions);
@@ -427,8 +436,7 @@ void MachineFunction::deleteMachineInstr(MachineInstr *MI) {
// be triggered during the implementation of support for the
// call site info of a new architecture. If the assertion is triggered,
// back trace will tell where to insert a call to updateCallSiteInfo().
- assert((!MI->isCandidateForCallSiteEntry() ||
- CallSitesInfo.find(MI) == CallSitesInfo.end()) &&
+ assert((!MI->isCandidateForCallSiteEntry() || !CallSitesInfo.contains(MI)) &&
"Call site info was not updated!");
// Strip it for parts. The operand array and the MI object itself are
// independently recyclable.
@@ -1083,11 +1091,10 @@ auto MachineFunction::salvageCopySSAImpl(MachineInstr &MI)
if (State.first.isVirtual()) {
// Virtual register def -- we can just look up where this happens.
MachineInstr *Inst = MRI.def_begin(State.first)->getParent();
- for (auto &MO : Inst->operands()) {
- if (!MO.isReg() || !MO.isDef() || MO.getReg() != State.first)
+ for (auto &MO : Inst->all_defs()) {
+ if (MO.getReg() != State.first)
continue;
- return ApplySubregisters(
- {Inst->getDebugInstrNum(), Inst->getOperandNo(&MO)});
+ return ApplySubregisters({Inst->getDebugInstrNum(), MO.getOperandNo()});
}
llvm_unreachable("Vreg def with no corresponding operand?");
@@ -1102,14 +1109,13 @@ auto MachineFunction::salvageCopySSAImpl(MachineInstr &MI)
auto RMII = CurInst->getReverseIterator();
auto PrevInstrs = make_range(RMII, CurInst->getParent()->instr_rend());
for (auto &ToExamine : PrevInstrs) {
- for (auto &MO : ToExamine.operands()) {
+ for (auto &MO : ToExamine.all_defs()) {
// Test for operand that defines something aliasing RegToSeek.
- if (!MO.isReg() || !MO.isDef() ||
- !TRI.regsOverlap(RegToSeek, MO.getReg()))
+ if (!TRI.regsOverlap(RegToSeek, MO.getReg()))
continue;
return ApplySubregisters(
- {ToExamine.getDebugInstrNum(), ToExamine.getOperandNo(&MO)});
+ {ToExamine.getDebugInstrNum(), MO.getOperandNo()});
}
}
@@ -1395,7 +1401,7 @@ MachineConstantPool::~MachineConstantPool() {
}
/// Test whether the given two constants can be allocated the same constant pool
-/// entry.
+/// entry referenced by \param A.
static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
const DataLayout &DL) {
// Handle the trivial case quickly.
@@ -1415,6 +1421,8 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
if (StoreSize != DL.getTypeStoreSize(B->getType()) || StoreSize > 128)
return false;
+ bool ContainsUndefOrPoisonA = A->containsUndefOrPoisonElement();
+
Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8);
// Try constant folding a bitcast of both instructions to an integer. If we
@@ -1434,7 +1442,14 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B,
B = ConstantFoldCastOperand(Instruction::BitCast, const_cast<Constant *>(B),
IntTy, DL);
- return A == B;
+ if (A != B)
+ return false;
+
+ // Constants only safely match if A doesn't contain undef/poison.
+ // As we'll be reusing A, it doesn't matter if B contain undef/poison.
+ // TODO: Handle cases where A and B have the same undef/poison elements.
+ // TODO: Merge A and B with mismatching undef/poison elements.
+ return !ContainsUndefOrPoisonA;
}
/// Create a new entry in the constant pool or return an existing one.
@@ -1490,6 +1505,17 @@ void MachineConstantPool::print(raw_ostream &OS) const {
}
}
+//===----------------------------------------------------------------------===//
+// Template specialization for MachineFunction implementation of
+// ProfileSummaryInfo::getEntryCount().
+//===----------------------------------------------------------------------===//
+template <>
+std::optional<Function::ProfileCount>
+ProfileSummaryInfo::getEntryCount<llvm::MachineFunction>(
+ const llvm::MachineFunction *F) const {
+ return F->getFunction().getEntryCount();
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void MachineConstantPool::dump() const { print(dbgs()); }
#endif
diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index 613c52900331..fbc071536d22 100644
--- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -24,6 +24,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/EHUtils.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/BasicBlockSectionUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -83,88 +86,44 @@ public:
} // end anonymous namespace
/// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
-/// only by EH pad as cold. This will help mark EH pads statically cold instead
-/// of relying on profile data.
-static void
-setDescendantEHBlocksCold(SmallVectorImpl<MachineBasicBlock *> &EHBlocks,
- MachineFunction &MF) {
- MachineBasicBlock *StartBlock = &MF.front();
- // A block can be unknown if its not reachable from anywhere
- // EH if its only reachable from start blocks via some path through EH pads
- // NonEH if it's reachable from Non EH blocks as well.
- enum Status { Unknown = 0, EH = 1, NonEH = 2 };
- DenseSet<MachineBasicBlock *> WorkList;
- DenseMap<MachineBasicBlock *, Status> Statuses;
-
- auto getStatus = [&](MachineBasicBlock *MBB) {
- if (Statuses.find(MBB) != Statuses.end())
- return Statuses[MBB];
- else
- return Unknown;
- };
-
- auto checkPredecessors = [&](MachineBasicBlock *MBB, Status Stat) {
- for (auto *PredMBB : MBB->predecessors()) {
- Status PredStatus = getStatus(PredMBB);
- // If status of predecessor block has gone above current block
- // we update current blocks status.
- if (PredStatus > Stat)
- Stat = PredStatus;
- }
- return Stat;
- };
-
- auto addSuccesors = [&](MachineBasicBlock *MBB) {
- for (auto *SuccMBB : MBB->successors()) {
- if (!SuccMBB->isEHPad())
- WorkList.insert(SuccMBB);
- }
- };
-
- // Insert the successors of start block
- // and landing pads successor.
- Statuses[StartBlock] = NonEH;
- addSuccesors(StartBlock);
- for (auto *LP : EHBlocks) {
- addSuccesors(LP);
- Statuses[LP] = EH;
- }
-
- // Worklist iterative algorithm.
- while (!WorkList.empty()) {
- auto *MBB = *WorkList.begin();
- WorkList.erase(MBB);
-
- Status OldStatus = getStatus(MBB);
-
- // Check on predecessors and check for
- // Status update.
- Status NewStatus = checkPredecessors(MBB, OldStatus);
-
- // Did the block status change?
- bool changed = OldStatus != NewStatus;
- if (changed) {
- addSuccesors(MBB);
- Statuses[MBB] = NewStatus;
- }
+/// only by EH pad as cold. This will help mark EH pads statically cold
+/// instead of relying on profile data.
+static void setDescendantEHBlocksCold(MachineFunction &MF) {
+ DenseSet<MachineBasicBlock *> EHBlocks;
+ computeEHOnlyBlocks(MF, EHBlocks);
+ for (auto Block : EHBlocks) {
+ Block->setSectionID(MBBSectionID::ColdSectionID);
}
+}
- for (auto Entry : Statuses) {
- if (Entry.second == EH)
- Entry.first->setSectionID(MBBSectionID::ColdSectionID);
- }
+static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) {
+ auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
+ return X.getSectionID().Type < Y.getSectionID().Type;
+ };
+ llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
+ llvm::avoidZeroOffsetLandingPad(MF);
}
static bool isColdBlock(const MachineBasicBlock &MBB,
const MachineBlockFrequencyInfo *MBFI,
ProfileSummaryInfo *PSI) {
std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
- if (!Count)
- return true;
-
- if (PercentileCutoff > 0) {
- return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
+ // For instrumentation profiles and sample profiles, we use different ways
+ // to judge whether a block is cold and should be split.
+ if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) {
+ // If using instrument profile, which is deemed "accurate", no count means
+ // cold.
+ if (!Count)
+ return true;
+ if (PercentileCutoff > 0)
+ return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
+ // Fallthrough to end of function.
+ } else if (PSI->hasSampleProfile()) {
+ // For sample profile, no count means "do not judege coldness".
+ if (!Count)
+ return false;
}
+
return (*Count < ColdCountThreshold);
}
@@ -204,6 +163,17 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
if (UseProfileData) {
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ // If we don't have a good profile (sample profile is not deemed
+ // as a "good profile") and the function is not hot, then early
+ // return. (Because we can only trust hot functions when profile
+ // quality is not good.)
+ if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) {
+ // Split all EH code and it's descendant statically by default.
+ if (SplitAllEHCode)
+ setDescendantEHBlocksCold(MF);
+ finishAdjustingBasicBlocksAndLandingPads(MF);
+ return true;
+ }
}
SmallVector<MachineBasicBlock *, 2> LandingPads;
@@ -219,9 +189,10 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
// Split all EH code and it's descendant statically by default.
if (SplitAllEHCode)
- setDescendantEHBlocksCold(LandingPads, MF);
+ setDescendantEHBlocksCold(MF);
// We only split out eh pads if all of them are cold.
else {
+ // Here we have UseProfileData == true.
bool HasHotLandingPads = false;
for (const MachineBasicBlock *LP : LandingPads) {
if (!isColdBlock(*LP, MBFI, PSI))
@@ -232,11 +203,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
LP->setSectionID(MBBSectionID::ColdSectionID);
}
}
- auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
- return X.getSectionID().Type < Y.getSectionID().Type;
- };
- llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
- llvm::avoidZeroOffsetLandingPad(MF);
+
+ finishAdjustingBasicBlocksAndLandingPads(MF);
return true;
}
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 8e0777f8438a..a9309487a7a7 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -28,6 +29,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -49,7 +51,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
@@ -95,7 +96,8 @@ void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) {
/// the MCInstrDesc.
MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID,
DebugLoc DL, bool NoImp)
- : MCID(&TID), DbgLoc(std::move(DL)), DebugInstrNum(0) {
+ : MCID(&TID), NumOperands(0), Flags(0), AsmPrinterFlags(0),
+ DbgLoc(std::move(DL)), DebugInstrNum(0) {
assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor");
// Reserve space for the expected number of operands.
@@ -113,8 +115,8 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &TID,
/// Does not copy the number from debug instruction numbering, to preserve
/// uniqueness.
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : MCID(&MI.getDesc()), Info(MI.Info), DbgLoc(MI.getDebugLoc()),
- DebugInstrNum(0) {
+ : MCID(&MI.getDesc()), NumOperands(0), Flags(0), AsmPrinterFlags(0),
+ Info(MI.Info), DbgLoc(MI.getDebugLoc()), DebugInstrNum(0) {
assert(DbgLoc.hasTrivialDestructor() && "Expected trivial destructor");
CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -149,6 +151,12 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
return nullptr;
}
+const MachineRegisterInfo *MachineInstr::getRegInfo() const {
+ if (const MachineBasicBlock *MBB = getParent())
+ return &MBB->getParent()->getRegInfo();
+ return nullptr;
+}
+
void MachineInstr::removeRegOperandsFromUseLists(MachineRegisterInfo &MRI) {
for (MachineOperand &MO : operands())
if (MO.isReg())
@@ -185,6 +193,8 @@ static void moveOperands(MachineOperand *Dst, MachineOperand *Src,
/// an explicit operand it is added at the end of the explicit operand list
/// (before the first implicit operand).
void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
+ assert(isUInt<LLVM_MI_NUMOPERANDS_BITS>(NumOperands + 1) &&
+ "Cannot add more operands.");
assert(MCID && "Cannot add operands before providing an instr descriptor");
// Check if we're adding one of our existing operands.
@@ -526,14 +536,14 @@ void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
setPCSections(MF, MI.getPCSections());
}
-uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
+uint32_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
// For now, the just return the union of the flags. If the flags get more
// complicated over time, we might need more logic here.
return getFlags() | Other.getFlags();
}
-uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
- uint16_t MIFlags = 0;
+uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
+ uint32_t MIFlags = 0;
// Copy the wrapping flags.
if (const OverflowingBinaryOperator *OB =
dyn_cast<OverflowingBinaryOperator>(&I)) {
@@ -567,6 +577,9 @@ uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
MIFlags |= MachineInstr::MIFlag::FmReassoc;
}
+ if (I.getMetadata(LLVMContext::MD_unpredictable))
+ MIFlags |= MachineInstr::MIFlag::Unpredictable;
+
return MIFlags;
}
@@ -1715,7 +1728,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
if (FirstOp) FirstOp = false; else OS << ",";
OS << " ";
- if (isDebugValue() && MO.isMetadata()) {
+ if (isDebugValueLike() && MO.isMetadata()) {
// Pretty print DBG_VALUE* instructions.
auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata());
if (DIV && !DIV->getName().empty())
@@ -1871,7 +1884,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
}
// Print extra comments for DEBUG_VALUE.
- if (isDebugValue() && getDebugVariableOp().isMetadata()) {
+ if (isDebugValueLike() && getDebugVariableOp().isMetadata()) {
if (!HaveSemi) {
OS << ";";
HaveSemi = true;
@@ -2378,3 +2391,72 @@ unsigned MachineInstr::getDebugInstrNum(MachineFunction &MF) {
DebugInstrNum = MF.getNewDebugInstrNum();
return DebugInstrNum;
}
+
+std::tuple<LLT, LLT> MachineInstr::getFirst2LLTs() const {
+ return std::tuple(getRegInfo()->getType(getOperand(0).getReg()),
+ getRegInfo()->getType(getOperand(1).getReg()));
+}
+
+std::tuple<LLT, LLT, LLT> MachineInstr::getFirst3LLTs() const {
+ return std::tuple(getRegInfo()->getType(getOperand(0).getReg()),
+ getRegInfo()->getType(getOperand(1).getReg()),
+ getRegInfo()->getType(getOperand(2).getReg()));
+}
+
+std::tuple<LLT, LLT, LLT, LLT> MachineInstr::getFirst4LLTs() const {
+ return std::tuple(getRegInfo()->getType(getOperand(0).getReg()),
+ getRegInfo()->getType(getOperand(1).getReg()),
+ getRegInfo()->getType(getOperand(2).getReg()),
+ getRegInfo()->getType(getOperand(3).getReg()));
+}
+
+std::tuple<LLT, LLT, LLT, LLT, LLT> MachineInstr::getFirst5LLTs() const {
+ return std::tuple(getRegInfo()->getType(getOperand(0).getReg()),
+ getRegInfo()->getType(getOperand(1).getReg()),
+ getRegInfo()->getType(getOperand(2).getReg()),
+ getRegInfo()->getType(getOperand(3).getReg()),
+ getRegInfo()->getType(getOperand(4).getReg()));
+}
+
+std::tuple<Register, LLT, Register, LLT>
+MachineInstr::getFirst2RegLLTs() const {
+ Register Reg0 = getOperand(0).getReg();
+ Register Reg1 = getOperand(1).getReg();
+ return std::tuple(Reg0, getRegInfo()->getType(Reg0), Reg1,
+ getRegInfo()->getType(Reg1));
+}
+
+std::tuple<Register, LLT, Register, LLT, Register, LLT>
+MachineInstr::getFirst3RegLLTs() const {
+ Register Reg0 = getOperand(0).getReg();
+ Register Reg1 = getOperand(1).getReg();
+ Register Reg2 = getOperand(2).getReg();
+ return std::tuple(Reg0, getRegInfo()->getType(Reg0), Reg1,
+ getRegInfo()->getType(Reg1), Reg2,
+ getRegInfo()->getType(Reg2));
+}
+
+std::tuple<Register, LLT, Register, LLT, Register, LLT, Register, LLT>
+MachineInstr::getFirst4RegLLTs() const {
+ Register Reg0 = getOperand(0).getReg();
+ Register Reg1 = getOperand(1).getReg();
+ Register Reg2 = getOperand(2).getReg();
+ Register Reg3 = getOperand(3).getReg();
+ return std::tuple(
+ Reg0, getRegInfo()->getType(Reg0), Reg1, getRegInfo()->getType(Reg1),
+ Reg2, getRegInfo()->getType(Reg2), Reg3, getRegInfo()->getType(Reg3));
+}
+
+std::tuple<Register, LLT, Register, LLT, Register, LLT, Register, LLT, Register,
+ LLT>
+MachineInstr::getFirst5RegLLTs() const {
+ Register Reg0 = getOperand(0).getReg();
+ Register Reg1 = getOperand(1).getReg();
+ Register Reg2 = getOperand(2).getReg();
+ Register Reg3 = getOperand(3).getReg();
+ Register Reg4 = getOperand(4).getReg();
+ return std::tuple(
+ Reg0, getRegInfo()->getType(Reg0), Reg1, getRegInfo()->getType(Reg1),
+ Reg2, getRegInfo()->getType(Reg2), Reg3, getRegInfo()->getType(Reg3),
+ Reg4, getRegInfo()->getType(Reg4));
+}
diff --git a/llvm/lib/CodeGen/MachineInstrBundle.cpp b/llvm/lib/CodeGen/MachineInstrBundle.cpp
index 0c059a145ca4..b9db34f7be95 100644
--- a/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -58,8 +58,7 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
if (MI->isBundle()) {
while (++MII != MIE && MII->isBundledWithPred()) {
MII->unbundleFromPred();
- for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MII->getOperand(i);
+ for (MachineOperand &MO : MII->operands()) {
if (MO.isReg() && MO.isInternalRead())
MO.setIsInternalRead(false);
}
@@ -149,8 +148,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
if (MII->isDebugInstr())
continue;
- for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MII->getOperand(i);
+ for (MachineOperand &MO : MII->operands()) {
if (!MO.isReg())
continue;
if (MO.isDef()) {
@@ -199,8 +197,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
}
if (!MO.isDead() && Reg.isPhysical()) {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
- unsigned SubReg = *SubRegs;
+ for (MCPhysReg SubReg : TRI->subregs(Reg)) {
if (LocalDefSet.insert(SubReg).second)
LocalDefs.push_back(SubReg);
}
@@ -310,6 +307,34 @@ VirtRegInfo llvm::AnalyzeVirtRegInBundle(
return RI;
}
+std::pair<LaneBitmask, LaneBitmask>
+llvm::AnalyzeVirtRegLanesInBundle(const MachineInstr &MI, Register Reg,
+ const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI) {
+
+ LaneBitmask UseMask, DefMask;
+
+ for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
+ const MachineOperand &MO = *O;
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+
+ unsigned SubReg = MO.getSubReg();
+ if (SubReg == 0 && MO.isUse() && !MO.isUndef())
+ UseMask |= MRI.getMaxLaneMaskForVReg(Reg);
+
+ LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
+ if (MO.isDef()) {
+ if (!MO.isUndef())
+ UseMask |= ~SubRegMask;
+ DefMask |= SubRegMask;
+ } else if (!MO.isUndef())
+ UseMask |= SubRegMask;
+ }
+
+ return {UseMask, DefMask};
+}
+
PhysRegInfo llvm::AnalyzePhysRegInBundle(const MachineInstr &MI, Register Reg,
const TargetRegisterInfo *TRI) {
bool AllDefsDead = true;
diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 1c09c01df3aa..4e80e9b58c06 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -112,26 +112,26 @@ STATISTIC(NumNotHoistedDueToHotness,
namespace {
class MachineLICMBase : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetLoweringBase *TLI;
- const TargetRegisterInfo *TRI;
- const MachineFrameInfo *MFI;
- MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetLoweringBase *TLI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const MachineFrameInfo *MFI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
TargetSchedModel SchedModel;
- bool PreRegAlloc;
- bool HasProfileData;
+ bool PreRegAlloc = false;
+ bool HasProfileData = false;
// Various analyses that we use...
- AliasAnalysis *AA; // Alias analysis info.
- MachineBlockFrequencyInfo *MBFI; // Machine block frequncy info
- MachineLoopInfo *MLI; // Current MachineLoopInfo
- MachineDominatorTree *DT; // Machine dominator tree for the cur loop
+ AliasAnalysis *AA = nullptr; // Alias analysis info.
+ MachineBlockFrequencyInfo *MBFI = nullptr; // Machine block frequncy info
+ MachineLoopInfo *MLI = nullptr; // Current MachineLoopInfo
+ MachineDominatorTree *DT = nullptr; // Machine dominator tree for the cur loop
// State that is updated as we process loops
- bool Changed; // True if a loop is changed.
- bool FirstInLoop; // True if it's the first LICM in the loop.
- MachineLoop *CurLoop; // The current loop we are working on.
- MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
+ bool Changed = false; // True if a loop is changed.
+ bool FirstInLoop = false; // True if it's the first LICM in the loop.
+ MachineLoop *CurLoop = nullptr; // The current loop we are working on.
+ MachineBasicBlock *CurPreheader = nullptr; // The preheader for CurLoop.
// Exit blocks for CurLoop.
SmallVector<MachineBasicBlock *, 8> ExitBlocks;
@@ -163,7 +163,7 @@ namespace {
// If a MBB does not dominate loop exiting blocks then it may not safe
// to hoist loads from this block.
// Tri-state: 0 - false, 1 - true, 2 - unknown
- unsigned SpeculationState;
+ unsigned SpeculationState = SpeculateUnknown;
public:
MachineLICMBase(char &PassID, bool PreRegAlloc)
@@ -575,8 +575,8 @@ void MachineLICMBase::HoistRegionPostRA() {
if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
bool Safe = true;
MachineInstr *MI = Candidate.MI;
- for (const MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || MO.isDef() || !MO.getReg())
+ for (const MachineOperand &MO : MI->all_uses()) {
+ if (!MO.getReg())
continue;
Register Reg = MO.getReg();
if (PhysRegDefs.test(Reg) ||
@@ -600,8 +600,9 @@ void MachineLICMBase::AddToLiveIns(MCRegister Reg) {
if (!BB->isLiveIn(Reg))
BB->addLiveIn(Reg);
for (MachineInstr &MI : *BB) {
- for (MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
+ for (MachineOperand &MO : MI.all_uses()) {
+ if (!MO.getReg())
+ continue;
if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
MO.setIsKill(false);
}
@@ -669,8 +670,8 @@ bool MachineLICMBase::isTriviallyReMaterializable(
if (!TII->isTriviallyReMaterializable(MI))
return false;
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
+ for (const MachineOperand &MO : MI.all_uses()) {
+ if (MO.getReg().isVirtual())
return false;
}
@@ -866,7 +867,7 @@ MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
continue;
const int *PS = TRI->getRegClassPressureSets(RC);
for (; *PS != -1; ++PS) {
- if (Cost.find(*PS) == Cost.end())
+ if (!Cost.contains(*PS))
Cost[*PS] = RCCost;
else
Cost[*PS] += RCCost;
@@ -1014,9 +1015,7 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
SmallVector<const MachineInstr*, 8> Work(1, MI);
do {
MI = Work.pop_back_val();
- for (const MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
+ for (const MachineOperand &MO : MI->all_defs()) {
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
@@ -1455,8 +1454,8 @@ bool MachineLICMBase::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// Clear the kill flags of any register this instruction defines,
// since they may need to be live throughout the entire loop
// rather than just live for part of it.
- for (MachineOperand &MO : MI->operands())
- if (MO.isReg() && MO.isDef() && !MO.isDead())
+ for (MachineOperand &MO : MI->all_defs())
+ if (!MO.isDead())
MRI->clearKillFlags(MO.getReg());
// Add to the CSE map.
diff --git a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
index c400ce190b46..c44b968b317d 100644
--- a/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
+++ b/llvm/lib/CodeGen/MachineLateInstrsCleanup.cpp
@@ -39,17 +39,29 @@ STATISTIC(NumRemoved, "Number of redundant instructions removed.");
namespace {
class MachineLateInstrsCleanup : public MachineFunctionPass {
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+
+ // Data structures to map regs to their definitions and kills per MBB.
+ struct Reg2MIMap : public SmallDenseMap<Register, MachineInstr *> {
+ bool hasIdentical(Register Reg, MachineInstr *ArgMI) {
+ MachineInstr *MI = lookup(Reg);
+ return MI && MI->isIdenticalTo(*ArgMI);
+ }
+ };
- // Data structures to map regs to their definitions per MBB.
- using Reg2DefMap = std::map<Register, MachineInstr*>;
- std::vector<Reg2DefMap> RegDefs;
+ std::vector<Reg2MIMap> RegDefs;
+ std::vector<Reg2MIMap> RegKills;
// Walk through the instructions in MBB and remove any redundant
// instructions.
bool processBlock(MachineBasicBlock *MBB);
+ void removeRedundantDef(MachineInstr *MI);
+ void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ BitVector &VisitedPreds);
+
public:
static char ID; // Pass identification, replacement for typeid
@@ -88,6 +100,8 @@ bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
RegDefs.clear();
RegDefs.resize(MF.getNumBlockIDs());
+ RegKills.clear();
+ RegKills.resize(MF.getNumBlockIDs());
// Visit all MBBs in an order that maximises the reuse from predecessors.
bool Changed = false;
@@ -102,41 +116,36 @@ bool MachineLateInstrsCleanup::runOnMachineFunction(MachineFunction &MF) {
// in MBB and if needed continue in predecessors until a use/def of Reg is
// encountered. This seems to be faster in practice than tracking kill flags
// in a map.
-static void clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
- MachineBasicBlock::iterator I,
- BitVector &VisitedPreds,
- const TargetRegisterInfo *TRI) {
+void MachineLateInstrsCleanup::
+clearKillsForDef(Register Reg, MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I,
+ BitVector &VisitedPreds) {
VisitedPreds.set(MBB->getNumber());
- while (I != MBB->begin()) {
- --I;
- bool Found = false;
- for (auto &MO : I->operands())
- if (MO.isReg() && TRI->regsOverlap(MO.getReg(), Reg)) {
- if (MO.isDef())
- return;
- if (MO.readsReg()) {
- MO.setIsKill(false);
- Found = true; // Keep going for an implicit kill of the super-reg.
- }
- }
- if (Found)
- return;
+
+ // Kill flag in MBB
+ if (MachineInstr *KillMI = RegKills[MBB->getNumber()].lookup(Reg)) {
+ KillMI->clearRegisterKills(Reg, TRI);
+ return;
}
+ // Def in MBB (missing kill flag)
+ if (MachineInstr *DefMI = RegDefs[MBB->getNumber()].lookup(Reg))
+ if (DefMI->getParent() == MBB)
+ return;
+
// If an earlier def is not in MBB, continue in predecessors.
if (!MBB->isLiveIn(Reg))
MBB->addLiveIn(Reg);
assert(!MBB->pred_empty() && "Predecessor def not found!");
for (MachineBasicBlock *Pred : MBB->predecessors())
if (!VisitedPreds.test(Pred->getNumber()))
- clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds, TRI);
+ clearKillsForDef(Reg, Pred, Pred->end(), VisitedPreds);
}
-static void removeRedundantDef(MachineInstr *MI,
- const TargetRegisterInfo *TRI) {
+void MachineLateInstrsCleanup::removeRedundantDef(MachineInstr *MI) {
Register Reg = MI->getOperand(0).getReg();
BitVector VisitedPreds(MI->getMF()->getNumBlockIDs());
- clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds, TRI);
+ clearKillsForDef(Reg, MI->getParent(), MI->getIterator(), VisitedPreds);
MI->eraseFromParent();
++NumRemoved;
}
@@ -172,18 +181,18 @@ static bool isCandidate(const MachineInstr *MI, Register &DefedReg,
bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
bool Changed = false;
- Reg2DefMap &MBBDefs = RegDefs[MBB->getNumber()];
+ Reg2MIMap &MBBDefs = RegDefs[MBB->getNumber()];
+ Reg2MIMap &MBBKills = RegKills[MBB->getNumber()];
// Find reusable definitions in the predecessor(s).
- if (!MBB->pred_empty() && !MBB->isEHPad()) {
+ if (!MBB->pred_empty() && !MBB->isEHPad() &&
+ !MBB->isInlineAsmBrIndirectTarget()) {
MachineBasicBlock *FirstPred = *MBB->pred_begin();
for (auto [Reg, DefMI] : RegDefs[FirstPred->getNumber()])
if (llvm::all_of(
drop_begin(MBB->predecessors()),
[&, &Reg = Reg, &DefMI = DefMI](const MachineBasicBlock *Pred) {
- auto PredDefI = RegDefs[Pred->getNumber()].find(Reg);
- return PredDefI != RegDefs[Pred->getNumber()].end() &&
- DefMI->isIdenticalTo(*PredDefI->second);
+ return RegDefs[Pred->getNumber()].hasIdentical(Reg, DefMI);
})) {
MBBDefs[Reg] = DefMI;
LLVM_DEBUG(dbgs() << "Reusable instruction from pred(s): in "
@@ -200,6 +209,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
// it) are valid.
if (MI.modifiesRegister(FrameReg, TRI)) {
MBBDefs.clear();
+ MBBKills.clear();
continue;
}
@@ -207,24 +217,23 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
bool IsCandidate = isCandidate(&MI, DefedReg, FrameReg);
// Check for an earlier identical and reusable instruction.
- if (IsCandidate) {
- auto DefI = MBBDefs.find(DefedReg);
- if (DefI != MBBDefs.end() && MI.isIdenticalTo(*DefI->second)) {
- LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
- << printMBBReference(*MBB) << ": " << MI;);
- removeRedundantDef(&MI, TRI);
- Changed = true;
- continue;
- }
+ if (IsCandidate && MBBDefs.hasIdentical(DefedReg, &MI)) {
+ LLVM_DEBUG(dbgs() << "Removing redundant instruction in "
+ << printMBBReference(*MBB) << ": " << MI;);
+ removeRedundantDef(&MI);
+ Changed = true;
+ continue;
}
// Clear any entries in map that MI clobbers.
- for (auto DefI = MBBDefs.begin(); DefI != MBBDefs.end();) {
- Register Reg = DefI->first;
- if (MI.modifiesRegister(Reg, TRI))
- DefI = MBBDefs.erase(DefI);
- else
- ++DefI;
+ for (auto DefI : llvm::make_early_inc_range(MBBDefs)) {
+ Register Reg = DefI.first;
+ if (MI.modifiesRegister(Reg, TRI)) {
+ MBBDefs.erase(Reg);
+ MBBKills.erase(Reg);
+ } else if (MI.findRegisterUseOperandIdx(Reg, true /*isKill*/, TRI) != -1)
+ // Keep track of register kills.
+ MBBKills[Reg] = &MI;
}
// Record this MI for potential later reuse.
@@ -232,6 +241,7 @@ bool MachineLateInstrsCleanup::processBlock(MachineBasicBlock *MBB) {
LLVM_DEBUG(dbgs() << "Found interesting instruction in "
<< printMBBReference(*MBB) << ": " << MI;);
MBBDefs[DefedReg] = &MI;
+ assert(!MBBKills.count(DefedReg) && "Should already have been removed.");
}
}
diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp
index fb3af385a0c1..37a0ff3d71c8 100644
--- a/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -14,7 +14,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/Analysis/LoopInfoImpl.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -23,6 +22,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
+#include "llvm/Support/GenericLoopInfoImpl.h"
using namespace llvm;
diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp
index a0c0166d06f0..921feb253d64 100644
--- a/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -56,11 +56,10 @@ void MachineModuleInfo::finalize() {
MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
: TM(std::move(MMI.TM)),
- Context(MMI.TM.getTargetTriple(), MMI.TM.getMCAsmInfo(),
- MMI.TM.getMCRegisterInfo(), MMI.TM.getMCSubtargetInfo(), nullptr,
- &MMI.TM.Options.MCOptions, false),
+ Context(TM.getTargetTriple(), TM.getMCAsmInfo(), TM.getMCRegisterInfo(),
+ TM.getMCSubtargetInfo(), nullptr, &TM.Options.MCOptions, false),
MachineFunctions(std::move(MMI.MachineFunctions)) {
- Context.setObjectFileInfo(MMI.TM.getObjFileLowering());
+ Context.setObjectFileInfo(TM.getObjFileLowering());
ObjFileMMI = MMI.ObjFileMMI;
CurCallSite = MMI.CurCallSite;
ExternalContext = MMI.ExternalContext;
@@ -107,6 +106,10 @@ MachineFunction &MachineModuleInfo::getOrCreateMachineFunction(Function &F) {
const TargetSubtargetInfo &STI = *TM.getSubtargetImpl(F);
MF = new MachineFunction(F, TM, STI, NextFnNum++, *this);
MF->initTargetMachineFunctionInfo(STI);
+
+ // MRI callback for target specific initializations.
+ TM.registerMachineRegisterInfoCallback(*MF);
+
// Update the set entry.
I.first->second.reset(MF);
} else {
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index 0a7b12e9ccb9..788c134b6ee8 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/CodeGen/MIRFormatter.h"
@@ -53,6 +52,11 @@ static MachineFunction *getMFIfAvailable(MachineOperand &MO) {
getMFIfAvailable(const_cast<const MachineOperand &>(MO)));
}
+unsigned MachineOperand::getOperandNo() const {
+ assert(getParent() && "Operand does not belong to any instruction!");
+ return getParent()->getOperandNo(this);
+}
+
void MachineOperand::setReg(Register Reg) {
if (getReg() == Reg)
return; // No change.
@@ -986,7 +990,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
case MachineOperand::MO_Predicate: {
auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
OS << (CmpInst::isIntPredicate(Pred) ? "int" : "float") << "pred("
- << CmpInst::getPredicateName(Pred) << ')';
+ << Pred << ')';
break;
}
case MachineOperand::MO_ShuffleMask:
@@ -1022,10 +1026,10 @@ unsigned MachinePointerInfo::getAddrSpace() const { return AddrSpace; }
/// Offset + Size byte.
bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
const DataLayout &DL) const {
- if (!V.is<const Value *>())
+ if (!isa<const Value *>(V))
return false;
- const Value *BasePtr = V.get<const Value *>();
+ const Value *BasePtr = cast<const Value *>(V);
if (BasePtr == nullptr)
return false;
@@ -1070,8 +1074,8 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
AtomicOrdering FailureOrdering)
: PtrInfo(ptrinfo), MemoryType(type), FlagVals(f), BaseAlign(a),
AAInfo(AAInfo), Ranges(Ranges) {
- assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue *>() ||
- isa<PointerType>(PtrInfo.V.get<const Value *>()->getType())) &&
+ assert((PtrInfo.V.isNull() || isa<const PseudoSourceValue *>(PtrInfo.V) ||
+ isa<PointerType>(cast<const Value *>(PtrInfo.V)->getType())) &&
"invalid pointer value");
assert((isLoad() || isStore()) && "Not a load/store!");
@@ -1093,16 +1097,6 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
s == ~UINT64_C(0) ? LLT() : LLT::scalar(8 * s), a,
AAInfo, Ranges, SSID, Ordering, FailureOrdering) {}
-/// Profile - Gather unique data for the object.
-///
-void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
- ID.AddInteger(getOffset());
- ID.AddInteger(getMemoryType().getUniqueRAWLLTData());
- ID.AddPointer(getOpaqueValue());
- ID.AddInteger(getFlags());
- ID.AddInteger(getBaseAlign().value());
-}
-
void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
// The Value and Offset may differ due to CSE. But the flags and size
// should be the same.
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index c7ba66bd3678..a0769105c929 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -89,11 +89,14 @@ STATISTIC(NumOutlined, "Number of candidates outlined");
STATISTIC(FunctionsCreated, "Number of functions created");
// Statistics for instruction mapping.
-STATISTIC(NumLegalInUnsignedVec, "Number of legal instrs in unsigned vector");
+STATISTIC(NumLegalInUnsignedVec, "Outlinable instructions mapped");
STATISTIC(NumIllegalInUnsignedVec,
- "Number of illegal instrs in unsigned vector");
-STATISTIC(NumInvisible, "Number of invisible instrs in unsigned vector");
-STATISTIC(UnsignedVecSize, "Size of unsigned vector");
+ "Unoutlinable instructions mapped + number of sentinel values");
+STATISTIC(NumSentinels, "Sentinel values inserted during mapping");
+STATISTIC(NumInvisible,
+ "Invisible instructions skipped during mapping");
+STATISTIC(UnsignedVecSize,
+ "Total number of instructions mapped and saved to mapping vector");
// Set to true if the user wants the outliner to run on linkonceodr linkage
// functions. This is false by default because the linker can dedupe linkonceodr
@@ -113,6 +116,11 @@ static cl::opt<unsigned> OutlinerReruns(
cl::desc(
"Number of times to rerun the outliner after the initial outline"));
+static cl::opt<unsigned> OutlinerBenefitThreshold(
+ "outliner-benefit-threshold", cl::init(1), cl::Hidden,
+ cl::desc(
+ "The minimum size in bytes before an outlining candidate is accepted"));
+
namespace {
/// Maps \p MachineInstrs to unsigned integers and stores the mappings.
@@ -136,11 +144,11 @@ struct InstructionMapper {
DenseMap<MachineBasicBlock *, unsigned> MBBFlagsMap;
/// The vector of unsigned integers that the module is mapped to.
- std::vector<unsigned> UnsignedVec;
+ SmallVector<unsigned> UnsignedVec;
/// Stores the location of the instruction associated with the integer
/// at index i in \p UnsignedVec for each index i.
- std::vector<MachineBasicBlock::iterator> InstrList;
+ SmallVector<MachineBasicBlock::iterator> InstrList;
// Set if we added an illegal number in the previous step.
// Since each illegal number is unique, we only need one of them between
@@ -157,8 +165,8 @@ struct InstructionMapper {
unsigned mapToLegalUnsigned(
MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
bool &HaveLegalRange, unsigned &NumLegalInBlock,
- std::vector<unsigned> &UnsignedVecForMBB,
- std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
+ SmallVector<unsigned> &UnsignedVecForMBB,
+ SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
// We added something legal, so we should unset the AddedLegalLastTime
// flag.
AddedIllegalLastTime = false;
@@ -211,8 +219,8 @@ struct InstructionMapper {
/// \returns The integer that \p *It was mapped to.
unsigned mapToIllegalUnsigned(
MachineBasicBlock::iterator &It, bool &CanOutlineWithPrevInstr,
- std::vector<unsigned> &UnsignedVecForMBB,
- std::vector<MachineBasicBlock::iterator> &InstrListForMBB) {
+ SmallVector<unsigned> &UnsignedVecForMBB,
+ SmallVector<MachineBasicBlock::iterator> &InstrListForMBB) {
// Can't outline an illegal instruction. Set the flag.
CanOutlineWithPrevInstr = false;
@@ -254,12 +262,20 @@ struct InstructionMapper {
/// \param TII \p TargetInstrInfo for the function.
void convertToUnsignedVec(MachineBasicBlock &MBB,
const TargetInstrInfo &TII) {
+ LLVM_DEBUG(dbgs() << "*** Converting MBB '" << MBB.getName()
+ << "' to unsigned vector ***\n");
unsigned Flags = 0;
// Don't even map in this case.
if (!TII.isMBBSafeToOutlineFrom(MBB, Flags))
return;
+ auto OutlinableRanges = TII.getOutlinableRanges(MBB, Flags);
+ LLVM_DEBUG(dbgs() << MBB.getName() << ": " << OutlinableRanges.size()
+ << " outlinable range(s)\n");
+ if (OutlinableRanges.empty())
+ return;
+
// Store info for the MBB for later outlining.
MBBFlagsMap[&MBB] = Flags;
@@ -279,40 +295,71 @@ struct InstructionMapper {
// FIXME: Should this all just be handled in the target, rather than using
// repeated calls to getOutliningType?
- std::vector<unsigned> UnsignedVecForMBB;
- std::vector<MachineBasicBlock::iterator> InstrListForMBB;
-
- for (MachineBasicBlock::iterator Et = MBB.end(); It != Et; ++It) {
- // Keep track of where this instruction is in the module.
- switch (TII.getOutliningType(It, Flags)) {
- case InstrType::Illegal:
+ SmallVector<unsigned> UnsignedVecForMBB;
+ SmallVector<MachineBasicBlock::iterator> InstrListForMBB;
+
+ LLVM_DEBUG(dbgs() << "*** Mapping outlinable ranges ***\n");
+ for (auto &OutlinableRange : OutlinableRanges) {
+ auto OutlinableRangeBegin = OutlinableRange.first;
+ auto OutlinableRangeEnd = OutlinableRange.second;
+#ifndef NDEBUG
+ LLVM_DEBUG(
+ dbgs() << "Mapping "
+ << std::distance(OutlinableRangeBegin, OutlinableRangeEnd)
+ << " instruction range\n");
+ // Everything outside of an outlinable range is illegal.
+ unsigned NumSkippedInRange = 0;
+#endif
+ for (; It != OutlinableRangeBegin; ++It) {
+#ifndef NDEBUG
+ ++NumSkippedInRange;
+#endif
mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
InstrListForMBB);
- break;
-
- case InstrType::Legal:
- mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
- NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
- break;
-
- case InstrType::LegalTerminator:
- mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
- NumLegalInBlock, UnsignedVecForMBB, InstrListForMBB);
- // The instruction also acts as a terminator, so we have to record that
- // in the string.
- mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ }
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << "Skipped " << NumSkippedInRange
+ << " instructions outside outlinable range\n");
+#endif
+ assert(It != MBB.end() && "Should still have instructions?");
+ // `It` is now positioned at the beginning of a range of instructions
+ // which may be outlinable. Check if each instruction is known to be safe.
+ for (; It != OutlinableRangeEnd; ++It) {
+ // Keep track of where this instruction is in the module.
+ switch (TII.getOutliningType(It, Flags)) {
+ case InstrType::Illegal:
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
+ break;
+
+ case InstrType::Legal:
+ mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
+ NumLegalInBlock, UnsignedVecForMBB,
+ InstrListForMBB);
+ break;
+
+ case InstrType::LegalTerminator:
+ mapToLegalUnsigned(It, CanOutlineWithPrevInstr, HaveLegalRange,
+ NumLegalInBlock, UnsignedVecForMBB,
InstrListForMBB);
- break;
-
- case InstrType::Invisible:
- // Normally this is set by mapTo(Blah)Unsigned, but we just want to
- // skip this instruction. So, unset the flag here.
- ++NumInvisible;
- AddedIllegalLastTime = false;
- break;
+ // The instruction also acts as a terminator, so we have to record
+ // that in the string.
+ mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
+ InstrListForMBB);
+ break;
+
+ case InstrType::Invisible:
+ // Normally this is set by mapTo(Blah)Unsigned, but we just want to
+ // skip this instruction. So, unset the flag here.
+ ++NumInvisible;
+ AddedIllegalLastTime = false;
+ break;
+ }
}
}
+ LLVM_DEBUG(dbgs() << "HaveLegalRange = " << HaveLegalRange << "\n");
+
// Are there enough legal instructions in the block for outlining to be
// possible?
if (HaveLegalRange) {
@@ -322,8 +369,9 @@ struct InstructionMapper {
// repeated substring.
mapToIllegalUnsigned(It, CanOutlineWithPrevInstr, UnsignedVecForMBB,
InstrListForMBB);
- llvm::append_range(InstrList, InstrListForMBB);
- llvm::append_range(UnsignedVec, UnsignedVecForMBB);
+ ++NumSentinels;
+ append_range(InstrList, InstrListForMBB);
+ append_range(UnsignedVec, UnsignedVecForMBB);
}
}
@@ -533,11 +581,19 @@ void MachineOutliner::findCandidates(
// First, find all of the repeated substrings in the tree of minimum length
// 2.
std::vector<Candidate> CandidatesForRepeatedSeq;
+ LLVM_DEBUG(dbgs() << "*** Discarding overlapping candidates *** \n");
+ LLVM_DEBUG(
+ dbgs() << "Searching for overlaps in all repeated sequences...\n");
for (const SuffixTree::RepeatedSubstring &RS : ST) {
CandidatesForRepeatedSeq.clear();
unsigned StringLen = RS.Length;
+ LLVM_DEBUG(dbgs() << " Sequence length: " << StringLen << "\n");
+ // Debug code to keep track of how many candidates we removed.
+#ifndef NDEBUG
+ unsigned NumDiscarded = 0;
+ unsigned NumKept = 0;
+#endif
for (const unsigned &StartIdx : RS.StartIndices) {
- unsigned EndIdx = StartIdx + StringLen - 1;
// Trick: Discard some candidates that would be incompatible with the
// ones we've already found for this sequence. This will save us some
// work in candidate selection.
@@ -559,23 +615,39 @@ void MachineOutliner::findCandidates(
// That is, one must either
// * End before the other starts
// * Start after the other ends
- if (llvm::all_of(CandidatesForRepeatedSeq, [&StartIdx,
- &EndIdx](const Candidate &C) {
- return (EndIdx < C.getStartIdx() || StartIdx > C.getEndIdx());
- })) {
- // It doesn't overlap with anything, so we can outline it.
- // Each sequence is over [StartIt, EndIt].
- // Save the candidate and its location.
-
- MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
- MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
- MachineBasicBlock *MBB = StartIt->getParent();
-
- CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt,
- EndIt, MBB, FunctionList.size(),
- Mapper.MBBFlagsMap[MBB]);
+ unsigned EndIdx = StartIdx + StringLen - 1;
+ auto FirstOverlap = find_if(
+ CandidatesForRepeatedSeq, [StartIdx, EndIdx](const Candidate &C) {
+ return EndIdx >= C.getStartIdx() && StartIdx <= C.getEndIdx();
+ });
+ if (FirstOverlap != CandidatesForRepeatedSeq.end()) {
+#ifndef NDEBUG
+ ++NumDiscarded;
+ LLVM_DEBUG(dbgs() << " .. DISCARD candidate @ [" << StartIdx
+ << ", " << EndIdx << "]; overlaps with candidate @ ["
+ << FirstOverlap->getStartIdx() << ", "
+ << FirstOverlap->getEndIdx() << "]\n");
+#endif
+ continue;
}
+ // It doesn't overlap with anything, so we can outline it.
+ // Each sequence is over [StartIt, EndIt].
+ // Save the candidate and its location.
+#ifndef NDEBUG
+ ++NumKept;
+#endif
+ MachineBasicBlock::iterator StartIt = Mapper.InstrList[StartIdx];
+ MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx];
+ MachineBasicBlock *MBB = StartIt->getParent();
+ CandidatesForRepeatedSeq.emplace_back(StartIdx, StringLen, StartIt, EndIt,
+ MBB, FunctionList.size(),
+ Mapper.MBBFlagsMap[MBB]);
}
+#ifndef NDEBUG
+ LLVM_DEBUG(dbgs() << " Candidates discarded: " << NumDiscarded
+ << "\n");
+ LLVM_DEBUG(dbgs() << " Candidates kept: " << NumKept << "\n\n");
+#endif
// We've found something we might want to outline.
// Create an OutlinedFunction to store it and check if it'd be beneficial
@@ -588,21 +660,21 @@ void MachineOutliner::findCandidates(
const TargetInstrInfo *TII =
CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo();
- OutlinedFunction OF =
+ std::optional<OutlinedFunction> OF =
TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq);
// If we deleted too many candidates, then there's nothing worth outlining.
// FIXME: This should take target-specified instruction sizes into account.
- if (OF.Candidates.size() < 2)
+ if (!OF || OF->Candidates.size() < 2)
continue;
// Is it better to outline this candidate than not?
- if (OF.getBenefit() < 1) {
- emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF);
+ if (OF->getBenefit() < OutlinerBenefitThreshold) {
+ emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, *OF);
continue;
}
- FunctionList.push_back(OF);
+ FunctionList.push_back(*OF);
}
}
@@ -616,6 +688,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
if (OutlineRepeatedNum > 0)
FunctionName += std::to_string(OutlineRepeatedNum + 1) + "_";
FunctionName += std::to_string(Name);
+ LLVM_DEBUG(dbgs() << "NEW FUNCTION: " << FunctionName << "\n");
// Create the function using an IR-level function.
LLVMContext &C = M.getContext();
@@ -653,6 +726,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
+ MF.setIsOutlined(true);
MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
// Insert the new function into the module.
@@ -720,7 +794,7 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
Mangler Mg;
// Get the mangled name of the function for the linkage name.
std::string Dummy;
- llvm::raw_string_ostream MangledNameStream(Dummy);
+ raw_string_ostream MangledNameStream(Dummy);
Mg.getNameWithPrefix(MangledNameStream, F, false);
DISubprogram *OutlinedSP = DB.createFunction(
@@ -750,30 +824,51 @@ bool MachineOutliner::outline(Module &M,
std::vector<OutlinedFunction> &FunctionList,
InstructionMapper &Mapper,
unsigned &OutlinedFunctionNum) {
-
+ LLVM_DEBUG(dbgs() << "*** Outlining ***\n");
+ LLVM_DEBUG(dbgs() << "NUMBER OF POTENTIAL FUNCTIONS: " << FunctionList.size()
+ << "\n");
bool OutlinedSomething = false;
// Sort by benefit. The most beneficial functions should be outlined first.
- llvm::stable_sort(FunctionList, [](const OutlinedFunction &LHS,
- const OutlinedFunction &RHS) {
- return LHS.getBenefit() > RHS.getBenefit();
- });
+ stable_sort(FunctionList,
+ [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
+ return LHS.getBenefit() > RHS.getBenefit();
+ });
// Walk over each function, outlining them as we go along. Functions are
// outlined greedily, based off the sort above.
+ auto *UnsignedVecBegin = Mapper.UnsignedVec.begin();
+ LLVM_DEBUG(dbgs() << "WALKING FUNCTION LIST\n");
for (OutlinedFunction &OF : FunctionList) {
+#ifndef NDEBUG
+ auto NumCandidatesBefore = OF.Candidates.size();
+#endif
// If we outlined something that overlapped with a candidate in a previous
// step, then we can't outline from it.
- erase_if(OF.Candidates, [&Mapper](Candidate &C) {
- return std::any_of(
- Mapper.UnsignedVec.begin() + C.getStartIdx(),
- Mapper.UnsignedVec.begin() + C.getEndIdx() + 1,
- [](unsigned I) { return (I == static_cast<unsigned>(-1)); });
+ erase_if(OF.Candidates, [&UnsignedVecBegin](Candidate &C) {
+ return std::any_of(UnsignedVecBegin + C.getStartIdx(),
+ UnsignedVecBegin + C.getEndIdx() + 1, [](unsigned I) {
+ return I == static_cast<unsigned>(-1);
+ });
});
+#ifndef NDEBUG
+ auto NumCandidatesAfter = OF.Candidates.size();
+ LLVM_DEBUG(dbgs() << "PRUNED: " << NumCandidatesBefore - NumCandidatesAfter
+ << "/" << NumCandidatesBefore << " candidates\n");
+#endif
+
// If we made it unbeneficial to outline this function, skip it.
- if (OF.getBenefit() < 1)
+ if (OF.getBenefit() < OutlinerBenefitThreshold) {
+ LLVM_DEBUG(dbgs() << "SKIP: Expected benefit (" << OF.getBenefit()
+ << " B) < threshold (" << OutlinerBenefitThreshold
+ << " B)\n");
continue;
+ }
+
+ LLVM_DEBUG(dbgs() << "OUTLINE: Expected benefit (" << OF.getBenefit()
+ << " B) > threshold (" << OutlinerBenefitThreshold
+ << " B)\n");
// It's beneficial. Create the function and outline its sequence's
// occurrences.
@@ -786,6 +881,7 @@ bool MachineOutliner::outline(Module &M,
const TargetInstrInfo &TII = *STI.getInstrInfo();
// Replace occurrences of the sequence with calls to the new function.
+ LLVM_DEBUG(dbgs() << "CREATE OUTLINED CALLS\n");
for (Candidate &C : OF.Candidates) {
MachineBasicBlock &MBB = *C.getMBB();
MachineBasicBlock::iterator StartIt = C.front();
@@ -793,6 +889,18 @@ bool MachineOutliner::outline(Module &M,
// Insert the call.
auto CallInst = TII.insertOutlinedCall(M, MBB, StartIt, *MF, C);
+// Insert the call.
+#ifndef NDEBUG
+ auto MBBBeingOutlinedFromName =
+ MBB.getName().empty() ? "<unknown>" : MBB.getName().str();
+ auto MFBeingOutlinedFromName = MBB.getParent()->getName().empty()
+ ? "<unknown>"
+ : MBB.getParent()->getName().str();
+ LLVM_DEBUG(dbgs() << " CALL: " << MF->getName() << " in "
+ << MFBeingOutlinedFromName << ":"
+ << MBBBeingOutlinedFromName << "\n");
+ LLVM_DEBUG(dbgs() << " .. " << *CallInst);
+#endif
// If the caller tracks liveness, then we need to make sure that
// anything we outline doesn't break liveness assumptions. The outlined
@@ -859,9 +967,8 @@ bool MachineOutliner::outline(Module &M,
MBB.erase(std::next(StartIt), std::next(EndIt));
// Keep track of what we removed by marking them all as -1.
- for (unsigned &I :
- llvm::make_range(Mapper.UnsignedVec.begin() + C.getStartIdx(),
- Mapper.UnsignedVec.begin() + C.getEndIdx() + 1))
+ for (unsigned &I : make_range(UnsignedVecBegin + C.getStartIdx(),
+ UnsignedVecBegin + C.getEndIdx() + 1))
I = static_cast<unsigned>(-1);
OutlinedSomething = true;
@@ -878,13 +985,12 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
MachineModuleInfo &MMI) {
// Build instruction mappings for each function in the module. Start by
// iterating over each Function in M.
+ LLVM_DEBUG(dbgs() << "*** Populating mapper ***\n");
for (Function &F : M) {
+ LLVM_DEBUG(dbgs() << "MAPPING FUNCTION: " << F.getName() << "\n");
if (F.hasFnAttribute("nooutline")) {
- LLVM_DEBUG({
- dbgs() << "... Skipping function with nooutline attribute: "
- << F.getName() << "\n";
- });
+ LLVM_DEBUG(dbgs() << "SKIP: Function has nooutline attribute\n");
continue;
}
@@ -894,44 +1000,58 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
// If it doesn't, then there's nothing to outline from. Move to the next
// Function.
- if (!MF)
+ if (!MF) {
+ LLVM_DEBUG(dbgs() << "SKIP: Function does not have a MachineFunction\n");
continue;
+ }
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
-
- if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF))
+ if (!RunOnAllFunctions && !TII->shouldOutlineFromFunctionByDefault(*MF)) {
+ LLVM_DEBUG(dbgs() << "SKIP: Target does not want to outline from "
+ "function by default\n");
continue;
+ }
// We have a MachineFunction. Ask the target if it's suitable for outlining.
// If it isn't, then move on to the next Function in the module.
- if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs))
+ if (!TII->isFunctionSafeToOutlineFrom(*MF, OutlineFromLinkOnceODRs)) {
+ LLVM_DEBUG(dbgs() << "SKIP: " << MF->getName()
+ << ": unsafe to outline from\n");
continue;
+ }
// We have a function suitable for outlining. Iterate over every
// MachineBasicBlock in MF and try to map its instructions to a list of
// unsigned integers.
+ const unsigned MinMBBSize = 2;
+
for (MachineBasicBlock &MBB : *MF) {
+ LLVM_DEBUG(dbgs() << " MAPPING MBB: '" << MBB.getName() << "'\n");
// If there isn't anything in MBB, then there's no point in outlining from
// it.
// If there are fewer than 2 instructions in the MBB, then it can't ever
// contain something worth outlining.
// FIXME: This should be based off of the maximum size in B of an outlined
// call versus the size in B of the MBB.
- if (MBB.empty() || MBB.size() < 2)
+ if (MBB.size() < MinMBBSize) {
+ LLVM_DEBUG(dbgs() << " SKIP: MBB size less than minimum size of "
+ << MinMBBSize << "\n");
continue;
+ }
// Check if MBB could be the target of an indirect branch. If it is, then
// we don't want to outline from it.
- if (MBB.hasAddressTaken())
+ if (MBB.hasAddressTaken()) {
+ LLVM_DEBUG(dbgs() << " SKIP: MBB's address is taken\n");
continue;
+ }
// MBB is suitable for outlining. Map it to a list of unsigneds.
Mapper.convertToUnsignedVec(MBB, *TII);
}
-
- // Statistics.
- UnsignedVecSize = Mapper.UnsignedVec.size();
}
+ // Statistics.
+ UnsignedVecSize = Mapper.UnsignedVec.size();
}
void MachineOutliner::initSizeRemarkInfo(
diff --git a/llvm/lib/CodeGen/MachinePassManager.cpp b/llvm/lib/CodeGen/MachinePassManager.cpp
index 039634f3d047..439ff8babcc6 100644
--- a/llvm/lib/CodeGen/MachinePassManager.cpp
+++ b/llvm/lib/CodeGen/MachinePassManager.cpp
@@ -91,8 +91,8 @@ Error MachineFunctionPassManager::run(Module &M,
// TODO: EmitSizeRemarks
PreservedAnalyses PassPA = P->run(MF, MFAM);
- PI.runAfterPass(*P, MF, PassPA);
MFAM.invalidate(MF, PassPA);
+ PI.runAfterPass(*P, MF, PassPA);
}
}
} while (true);
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index adb630469003..c7e7497dab36 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -496,7 +496,7 @@ void SwingSchedulerDAG::schedule() {
updatePhiDependences();
Topo.InitDAGTopologicalSorting();
changeDependences();
- postprocessDAG();
+ postProcessDAG();
LLVM_DEBUG(dump());
NodeSetType NodeSets;
@@ -865,13 +865,11 @@ void SwingSchedulerDAG::updatePhiDependences() {
unsigned HasPhiDef = 0;
MachineInstr *MI = I.getInstr();
// Iterate over each operand, and we process the definitions.
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end();
- MOI != MOE; ++MOI) {
- if (!MOI->isReg())
+ for (const MachineOperand &MO : MI->operands()) {
+ if (!MO.isReg())
continue;
- Register Reg = MOI->getReg();
- if (MOI->isDef()) {
+ Register Reg = MO.getReg();
+ if (MO.isDef()) {
// If the register is used by a Phi, then create an anti dependence.
for (MachineRegisterInfo::use_instr_iterator
UI = MRI.use_instr_begin(Reg),
@@ -893,7 +891,7 @@ void SwingSchedulerDAG::updatePhiDependences() {
}
}
}
- } else if (MOI->isUse()) {
+ } else if (MO.isUse()) {
// If the register is defined by a Phi, then create a true dependence.
MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
if (DefMI == nullptr)
@@ -903,7 +901,7 @@ void SwingSchedulerDAG::updatePhiDependences() {
if (!MI->isPHI()) {
SDep Dep(SU, SDep::Data, Reg);
Dep.setLatency(0);
- ST.adjustSchedDependency(SU, 0, &I, MI->getOperandNo(MOI), Dep);
+ ST.adjustSchedDependency(SU, 0, &I, MO.getOperandNo(), Dep);
I.addPred(Dep);
} else {
HasPhiUse = Reg;
@@ -1559,31 +1557,28 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
const MachineInstr *MI = SU->getInstr();
if (MI->isPHI())
continue;
- for (const MachineOperand &MO : MI->operands())
- if (MO.isReg() && MO.isUse()) {
- Register Reg = MO.getReg();
- if (Reg.isVirtual())
- Uses.insert(Reg);
- else if (MRI.isAllocatable(Reg))
- for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
- ++Units)
- Uses.insert(*Units);
- }
+ for (const MachineOperand &MO : MI->all_uses()) {
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual())
+ Uses.insert(Reg);
+ else if (MRI.isAllocatable(Reg))
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
+ Uses.insert(Unit);
+ }
}
for (SUnit *SU : NS)
- for (const MachineOperand &MO : SU->getInstr()->operands())
- if (MO.isReg() && MO.isDef() && !MO.isDead()) {
+ for (const MachineOperand &MO : SU->getInstr()->all_defs())
+ if (!MO.isDead()) {
Register Reg = MO.getReg();
if (Reg.isVirtual()) {
if (!Uses.count(Reg))
LiveOutRegs.push_back(RegisterMaskPair(Reg,
LaneBitmask::getNone()));
} else if (MRI.isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
- ++Units)
- if (!Uses.count(*Units))
- LiveOutRegs.push_back(RegisterMaskPair(*Units,
- LaneBitmask::getNone()));
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg()))
+ if (!Uses.count(Unit))
+ LiveOutRegs.push_back(
+ RegisterMaskPair(Unit, LaneBitmask::getNone()));
}
}
RPTracker.addLiveRegs(LiveOutRegs);
@@ -2316,7 +2311,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
}
-void SwingSchedulerDAG::postprocessDAG() {
+void SwingSchedulerDAG::postProcessDAG() {
for (auto &M : Mutations)
M->apply(this);
}
@@ -2654,10 +2649,7 @@ bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
if (!isLoopCarried(SSD, *Phi))
return false;
unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
- for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
- MachineOperand &DMO = Def->getOperand(i);
- if (!DMO.isReg() || !DMO.isDef())
- continue;
+ for (MachineOperand &DMO : Def->all_defs()) {
if (DMO.getReg() == LoopReg)
return true;
}
diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 1ad08e19feae..0048918fc53b 100644
--- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -101,13 +101,13 @@ MachineRegisterInfo::constrainRegAttrs(Register Reg,
const auto RegCB = getRegClassOrRegBank(Reg);
if (RegCB.isNull())
setRegClassOrRegBank(Reg, ConstrainingRegCB);
- else if (RegCB.is<const TargetRegisterClass *>() !=
- ConstrainingRegCB.is<const TargetRegisterClass *>())
+ else if (isa<const TargetRegisterClass *>(RegCB) !=
+ isa<const TargetRegisterClass *>(ConstrainingRegCB))
return false;
- else if (RegCB.is<const TargetRegisterClass *>()) {
+ else if (isa<const TargetRegisterClass *>(RegCB)) {
if (!::constrainRegClass(
- *this, Reg, RegCB.get<const TargetRegisterClass *>(),
- ConstrainingRegCB.get<const TargetRegisterClass *>(), MinNumRegs))
+ *this, Reg, cast<const TargetRegisterClass *>(RegCB),
+ cast<const TargetRegisterClass *>(ConstrainingRegCB), MinNumRegs))
return false;
} else if (RegCB != ConstrainingRegCB)
return false;
@@ -644,16 +644,8 @@ void MachineRegisterInfo::setCalleeSavedRegs(ArrayRef<MCPhysReg> CSRs) {
bool MachineRegisterInfo::isReservedRegUnit(unsigned Unit) const {
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
for (MCRegUnitRootIterator Root(Unit, TRI); Root.isValid(); ++Root) {
- bool IsRootReserved = true;
- for (MCSuperRegIterator Super(*Root, TRI, /*IncludeSelf=*/true);
- Super.isValid(); ++Super) {
- MCRegister Reg = *Super;
- if (!isReserved(Reg)) {
- IsRootReserved = false;
- break;
- }
- }
- if (IsRootReserved)
+ if (all_of(TRI->superregs_inclusive(*Root),
+ [&](MCPhysReg Super) { return isReserved(Super); }))
return true;
}
return false;
diff --git a/llvm/lib/CodeGen/MachineSSAContext.cpp b/llvm/lib/CodeGen/MachineSSAContext.cpp
index 6de8f8da9254..324084fb9c32 100644
--- a/llvm/lib/CodeGen/MachineSSAContext.cpp
+++ b/llvm/lib/CodeGen/MachineSSAContext.cpp
@@ -21,8 +21,6 @@
using namespace llvm;
-const Register MachineSSAContext::ValueRefNull{};
-
void MachineSSAContext::setFunction(MachineFunction &Fn) {
MF = &Fn;
RegInfo = &MF->getRegInfo();
@@ -42,10 +40,8 @@ void MachineSSAContext::appendBlockTerms(
void MachineSSAContext::appendBlockDefs(SmallVectorImpl<Register> &defs,
const MachineBasicBlock &block) {
for (const MachineInstr &instr : block.instrs()) {
- for (const MachineOperand &op : instr.operands()) {
- if (op.isReg() && op.isDef())
- defs.push_back(op.getReg());
- }
+ for (const MachineOperand &op : instr.all_defs())
+ defs.push_back(op.getReg());
}
}
@@ -56,7 +52,7 @@ MachineBasicBlock *MachineSSAContext::getDefBlock(Register value) const {
return RegInfo->getVRegDef(value)->getParent();
}
-bool MachineSSAContext::isConstantValuePhi(const MachineInstr &Phi) {
+bool MachineSSAContext::isConstantOrUndefValuePhi(const MachineInstr &Phi) {
return Phi.isConstantValuePHI();
}
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 5ab5a40e7574..ba5432459d12 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
@@ -56,7 +57,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -98,9 +98,13 @@ cl::opt<bool> PrintDAGs("misched-print-dags", cl::Hidden,
cl::opt<bool> MISchedDumpReservedCycles(
"misched-dump-reserved-cycles", cl::Hidden, cl::init(false),
cl::desc("Dump resource usage at schedule boundary."));
+cl::opt<bool> MischedDetailResourceBooking(
+ "misched-detail-resource-booking", cl::Hidden, cl::init(false),
+ cl::desc("Show details of invoking getNextResoufceCycle."));
#else
const bool ViewMISchedDAGs = false;
const bool PrintDAGs = false;
+const bool MischedDetailResourceBooking = false;
#ifdef LLVM_ENABLE_DUMP
const bool MISchedDumpReservedCycles = false;
#endif // LLVM_ENABLE_DUMP
@@ -147,6 +151,28 @@ static cl::opt<unsigned>
cl::desc("The threshold for fast cluster"),
cl::init(1000));
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static cl::opt<bool> MISchedDumpScheduleTrace(
+ "misched-dump-schedule-trace", cl::Hidden, cl::init(false),
+ cl::desc("Dump resource usage at schedule boundary."));
+static cl::opt<unsigned>
+ HeaderColWidth("misched-dump-schedule-trace-col-header-width", cl::Hidden,
+ cl::desc("Set width of the columns with "
+ "the resources and schedule units"),
+ cl::init(19));
+static cl::opt<unsigned>
+ ColWidth("misched-dump-schedule-trace-col-width", cl::Hidden,
+ cl::desc("Set width of the columns showing resource booking."),
+ cl::init(5));
+static cl::opt<bool> MISchedSortResourcesInTrace(
+ "misched-sort-resources-in-trace", cl::Hidden, cl::init(true),
+ cl::desc("Sort the resources printed in the dump trace"));
+#endif
+
+static cl::opt<unsigned>
+ MIResourceCutOff("misched-resource-cutoff", cl::Hidden,
+ cl::desc("Number of intervals to track"), cl::init(10));
+
// DAG subtrees must have at least this many nodes.
static const unsigned MinSubtreeSize = 8;
@@ -777,7 +803,7 @@ void ScheduleDAGMI::schedule() {
// Build the DAG.
buildSchedGraph(AA);
- postprocessDAG();
+ postProcessDAG();
SmallVector<SUnit*, 8> TopRoots, BotRoots;
findRootsAndBiasEdges(TopRoots, BotRoots);
@@ -844,7 +870,7 @@ void ScheduleDAGMI::schedule() {
}
/// Apply each ScheduleDAGMutation step in order.
-void ScheduleDAGMI::postprocessDAG() {
+void ScheduleDAGMI::postProcessDAG() {
for (auto &m : Mutations)
m->apply(this);
}
@@ -931,7 +957,181 @@ void ScheduleDAGMI::placeDebugValues() {
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static const char *scheduleTableLegend = " i: issue\n x: resource booked";
+
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceTopDown() const {
+ // Bail off when there is no schedule model to query.
+ if (!SchedModel.hasInstrSchedModel())
+ return;
+
+ // Nothing to show if there is no or just one instruction.
+ if (BB->size() < 2)
+ return;
+
+ dbgs() << " * Schedule table (TopDown):\n";
+ dbgs() << scheduleTableLegend << "\n";
+ const unsigned FirstCycle = getSUnit(&*(std::begin(*this)))->TopReadyCycle;
+ unsigned LastCycle = getSUnit(&*(std::prev(std::end(*this))))->TopReadyCycle;
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU)
+ continue;
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ if (SU->TopReadyCycle + PI->Cycles - 1 > LastCycle)
+ LastCycle = SU->TopReadyCycle + PI->Cycles - 1;
+ }
+ }
+ // Print the header with the cycles
+ dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
+ for (unsigned C = FirstCycle; C <= LastCycle; ++C)
+ dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
+ dbgs() << "|\n";
+
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU) {
+ dbgs() << "Missing SUnit\n";
+ continue;
+ }
+ std::string NodeName("SU(");
+ NodeName += std::to_string(SU->NodeNum) + ")";
+ dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
+ unsigned C = FirstCycle;
+ for (; C <= LastCycle; ++C) {
+ if (C == SU->TopReadyCycle)
+ dbgs() << llvm::left_justify("| i", ColWidth);
+ else
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ dbgs() << "|\n";
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+
+ SmallVector<MCWriteProcResEntry, 4> ResourcesIt(
+ make_range(SchedModel.getWriteProcResBegin(SC),
+ SchedModel.getWriteProcResEnd(SC)));
+
+ if (MISchedSortResourcesInTrace)
+ llvm::stable_sort(ResourcesIt,
+ [](const MCWriteProcResEntry &LHS,
+ const MCWriteProcResEntry &RHS) -> bool {
+ return LHS.StartAtCycle < RHS.StartAtCycle ||
+ (LHS.StartAtCycle == RHS.StartAtCycle &&
+ LHS.Cycles < RHS.Cycles);
+ });
+ for (const MCWriteProcResEntry &PI : ResourcesIt) {
+ C = FirstCycle;
+ const std::string ResName =
+ SchedModel.getResourceName(PI.ProcResourceIdx);
+ dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
+ for (; C < SU->TopReadyCycle + PI.StartAtCycle; ++C) {
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ for (unsigned I = 0, E = PI.Cycles - PI.StartAtCycle; I != E; ++I, ++C)
+ dbgs() << llvm::left_justify("| x", ColWidth);
+ while (C++ <= LastCycle)
+ dbgs() << llvm::left_justify("|", ColWidth);
+ // Place end char
+ dbgs() << "| \n";
+ }
+ }
+}
+
+LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
+ // Bail off when there is no schedule model to query.
+ if (!SchedModel.hasInstrSchedModel())
+ return;
+
+ // Nothing to show if there is no or just one instruction.
+ if (BB->size() < 2)
+ return;
+
+ dbgs() << " * Schedule table (BottomUp):\n";
+ dbgs() << scheduleTableLegend << "\n";
+
+ const int FirstCycle = getSUnit(&*(std::begin(*this)))->BotReadyCycle;
+ int LastCycle = getSUnit(&*(std::prev(std::end(*this))))->BotReadyCycle;
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU)
+ continue;
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ for (TargetSchedModel::ProcResIter PI = SchedModel.getWriteProcResBegin(SC),
+ PE = SchedModel.getWriteProcResEnd(SC);
+ PI != PE; ++PI) {
+ if ((int)SU->BotReadyCycle - PI->Cycles + 1 < LastCycle)
+ LastCycle = (int)SU->BotReadyCycle - PI->Cycles + 1;
+ }
+ }
+ // Print the header with the cycles
+ dbgs() << llvm::left_justify("Cycle", HeaderColWidth);
+ for (int C = FirstCycle; C >= LastCycle; --C)
+ dbgs() << llvm::left_justify("| " + std::to_string(C), ColWidth);
+ dbgs() << "|\n";
+
+ for (MachineInstr &MI : *this) {
+ SUnit *SU = getSUnit(&MI);
+ if (!SU) {
+ dbgs() << "Missing SUnit\n";
+ continue;
+ }
+ std::string NodeName("SU(");
+ NodeName += std::to_string(SU->NodeNum) + ")";
+ dbgs() << llvm::left_justify(NodeName, HeaderColWidth);
+ int C = FirstCycle;
+ for (; C >= LastCycle; --C) {
+ if (C == (int)SU->BotReadyCycle)
+ dbgs() << llvm::left_justify("| i", ColWidth);
+ else
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ dbgs() << "|\n";
+ const MCSchedClassDesc *SC = getSchedClass(SU);
+ SmallVector<MCWriteProcResEntry, 4> ResourcesIt(
+ make_range(SchedModel.getWriteProcResBegin(SC),
+ SchedModel.getWriteProcResEnd(SC)));
+
+ if (MISchedSortResourcesInTrace)
+ llvm::stable_sort(ResourcesIt,
+ [](const MCWriteProcResEntry &LHS,
+ const MCWriteProcResEntry &RHS) -> bool {
+ return LHS.StartAtCycle < RHS.StartAtCycle ||
+ (LHS.StartAtCycle == RHS.StartAtCycle &&
+ LHS.Cycles < RHS.Cycles);
+ });
+ for (const MCWriteProcResEntry &PI : ResourcesIt) {
+ C = FirstCycle;
+ const std::string ResName =
+ SchedModel.getResourceName(PI.ProcResourceIdx);
+ dbgs() << llvm::right_justify(ResName + " ", HeaderColWidth);
+ for (; C > ((int)SU->BotReadyCycle - (int)PI.StartAtCycle); --C) {
+ dbgs() << llvm::left_justify("|", ColWidth);
+ }
+ for (unsigned I = 0, E = PI.Cycles - PI.StartAtCycle; I != E; ++I, --C)
+ dbgs() << llvm::left_justify("| x", ColWidth);
+ while (C-- >= LastCycle)
+ dbgs() << llvm::left_justify("|", ColWidth);
+ // Place end char
+ dbgs() << "| \n";
+ }
+ }
+}
+#endif
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
+ if (MISchedDumpScheduleTrace) {
+ if (ForceTopDown)
+ dumpScheduleTraceTopDown();
+ else if (ForceBottomUp)
+ dumpScheduleTraceBottomUp();
+ else {
+ dbgs() << "* Schedule table (Bidirectional): not implemented\n";
+ }
+ }
+
for (MachineInstr &MI : *this) {
if (SUnit *SU = getSUnit(&MI))
dumpNode(*SU);
@@ -967,8 +1167,8 @@ void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
// Ignore re-defs.
if (TrackLaneMasks) {
bool FoundDef = false;
- for (const MachineOperand &MO2 : MI.operands()) {
- if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
+ for (const MachineOperand &MO2 : MI.all_defs()) {
+ if (MO2.getReg() == Reg && !MO2.isDead()) {
FoundDef = true;
break;
}
@@ -1223,7 +1423,7 @@ void ScheduleDAGMILive::schedule() {
LLVM_DEBUG(SchedImpl->dumpPolicy());
buildDAGWithRegPressure();
- postprocessDAG();
+ postProcessDAG();
SmallVector<SUnit*, 8> TopRoots, BotRoots;
findRootsAndBiasEdges(TopRoots, BotRoots);
@@ -2008,6 +2208,7 @@ void SchedBoundary::reset() {
ZoneCritResIdx = 0;
IsResourceLimited = false;
ReservedCycles.clear();
+ ReservedResourceSegments.clear();
ReservedCyclesIndex.clear();
ResourceGroupSubUnitMasks.clear();
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
@@ -2036,7 +2237,8 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned PIdx = PI->ProcResourceIdx;
unsigned Factor = SchedModel->getResourceFactor(PIdx);
- RemainingCounts[PIdx] += (Factor * PI->Cycles);
+ assert(PI->Cycles >= PI->StartAtCycle);
+ RemainingCounts[PIdx] += (Factor * (PI->Cycles - PI->StartAtCycle));
}
}
}
@@ -2089,14 +2291,24 @@ unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
/// Compute the next cycle at which the given processor resource unit
/// can be scheduled.
unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
- unsigned Cycles) {
+ unsigned Cycles,
+ unsigned StartAtCycle) {
+ if (SchedModel && SchedModel->enableIntervals()) {
+ if (isTop())
+ return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromTop(
+ CurrCycle, StartAtCycle, Cycles);
+
+ return ReservedResourceSegments[InstanceIdx].getFirstAvailableAtFromBottom(
+ CurrCycle, StartAtCycle, Cycles);
+ }
+
unsigned NextUnreserved = ReservedCycles[InstanceIdx];
// If this resource has never been used, always return cycle zero.
if (NextUnreserved == InvalidCycle)
- return 0;
+ return CurrCycle;
// For bottom-up scheduling add the cycles needed for the current operation.
if (!isTop())
- NextUnreserved += Cycles;
+ NextUnreserved = std::max(CurrCycle, NextUnreserved + Cycles);
return NextUnreserved;
}
@@ -2105,8 +2317,12 @@ unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
/// instance in the reserved cycles vector.
std::pair<unsigned, unsigned>
SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
- unsigned Cycles) {
-
+ unsigned Cycles, unsigned StartAtCycle) {
+ if (MischedDetailResourceBooking) {
+ LLVM_DEBUG(dbgs() << " Resource booking (@" << CurrCycle << "c): \n");
+ LLVM_DEBUG(dumpReservedCycles());
+ LLVM_DEBUG(dbgs() << " getNextResourceCycle (@" << CurrCycle << "c): \n");
+ }
unsigned MinNextUnreserved = InvalidCycle;
unsigned InstanceIdx = 0;
unsigned StartIndex = ReservedCyclesIndex[PIdx];
@@ -2134,7 +2350,7 @@ SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) {
unsigned NextUnreserved, NextInstanceIdx;
std::tie(NextUnreserved, NextInstanceIdx) =
- getNextResourceCycle(SC, SubUnits[I], Cycles);
+ getNextResourceCycle(SC, SubUnits[I], Cycles, StartAtCycle);
if (MinNextUnreserved > NextUnreserved) {
InstanceIdx = NextInstanceIdx;
MinNextUnreserved = NextUnreserved;
@@ -2145,12 +2361,21 @@ SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx,
for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
++I) {
- unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
+ unsigned NextUnreserved =
+ getNextResourceCycleByInstance(I, Cycles, StartAtCycle);
+ if (MischedDetailResourceBooking)
+ LLVM_DEBUG(dbgs() << " Instance " << I - StartIndex << " available @"
+ << NextUnreserved << "c\n");
if (MinNextUnreserved > NextUnreserved) {
InstanceIdx = I;
MinNextUnreserved = NextUnreserved;
}
}
+ if (MischedDetailResourceBooking)
+ LLVM_DEBUG(dbgs() << " selecting " << SchedModel->getResourceName(PIdx)
+ << "[" << InstanceIdx - StartIndex << "]"
+ << " available @" << MinNextUnreserved << "c"
+ << "\n");
return std::make_pair(MinNextUnreserved, InstanceIdx);
}
@@ -2195,8 +2420,10 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
SchedModel->getWriteProcResEnd(SC))) {
unsigned ResIdx = PE.ProcResourceIdx;
unsigned Cycles = PE.Cycles;
+ unsigned StartAtCycle = PE.StartAtCycle;
unsigned NRCycle, InstanceIdx;
- std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles);
+ std::tie(NRCycle, InstanceIdx) =
+ getNextResourceCycle(SC, ResIdx, Cycles, StartAtCycle);
if (NRCycle > CurrCycle) {
#if LLVM_ENABLE_ABI_BREAKING_CHECKS
MaxObservedStall = std::max(Cycles, MaxObservedStall);
@@ -2347,9 +2574,10 @@ void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
/// \return the next cycle at which the instruction may execute without
/// oversubscribing resources.
unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
- unsigned Cycles, unsigned NextCycle) {
+ unsigned Cycles, unsigned NextCycle,
+ unsigned StartAtCycle) {
unsigned Factor = SchedModel->getResourceFactor(PIdx);
- unsigned Count = Factor * Cycles;
+ unsigned Count = Factor * (Cycles - StartAtCycle);
LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +"
<< Cycles << "x" << Factor << "u\n");
@@ -2369,7 +2597,8 @@ unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx,
}
// For reserved resources, record the highest cycle using the resource.
unsigned NextAvailable, InstanceIdx;
- std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles);
+ std::tie(NextAvailable, InstanceIdx) =
+ getNextResourceCycle(SC, PIdx, Cycles, StartAtCycle);
if (NextAvailable > CurrCycle) {
LLVM_DEBUG(dbgs() << " Resource conflict: "
<< SchedModel->getResourceName(PIdx)
@@ -2448,8 +2677,8 @@ void SchedBoundary::bumpNode(SUnit *SU) {
for (TargetSchedModel::ProcResIter
PI = SchedModel->getWriteProcResBegin(SC),
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
- unsigned RCycle =
- countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle);
+ unsigned RCycle = countResource(SC, PI->ProcResourceIdx, PI->Cycles,
+ NextCycle, PI->StartAtCycle);
if (RCycle > NextCycle)
NextCycle = RCycle;
}
@@ -2463,14 +2692,33 @@ void SchedBoundary::bumpNode(SUnit *SU) {
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned PIdx = PI->ProcResourceIdx;
if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
- unsigned ReservedUntil, InstanceIdx;
- std::tie(ReservedUntil, InstanceIdx) =
- getNextResourceCycle(SC, PIdx, 0);
- if (isTop()) {
- ReservedCycles[InstanceIdx] =
- std::max(ReservedUntil, NextCycle + PI->Cycles);
- } else
- ReservedCycles[InstanceIdx] = NextCycle;
+
+ if (SchedModel && SchedModel->enableIntervals()) {
+ unsigned ReservedUntil, InstanceIdx;
+ std::tie(ReservedUntil, InstanceIdx) =
+ getNextResourceCycle(SC, PIdx, PI->Cycles, PI->StartAtCycle);
+ if (isTop()) {
+ ReservedResourceSegments[InstanceIdx].add(
+ ResourceSegments::getResourceIntervalTop(
+ NextCycle, PI->StartAtCycle, PI->Cycles),
+ MIResourceCutOff);
+ } else {
+ ReservedResourceSegments[InstanceIdx].add(
+ ResourceSegments::getResourceIntervalBottom(
+ NextCycle, PI->StartAtCycle, PI->Cycles),
+ MIResourceCutOff);
+ }
+ } else {
+
+ unsigned ReservedUntil, InstanceIdx;
+ std::tie(ReservedUntil, InstanceIdx) =
+ getNextResourceCycle(SC, PIdx, PI->Cycles, PI->StartAtCycle);
+ if (isTop()) {
+ ReservedCycles[InstanceIdx] =
+ std::max(ReservedUntil, NextCycle + PI->Cycles);
+ } else
+ ReservedCycles[InstanceIdx] = NextCycle;
+ }
}
}
}
@@ -2610,8 +2858,14 @@ LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const {
const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits;
std::string ResName = SchedModel->getResourceName(ResIdx);
for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) {
- dbgs() << ResName << "(" << UnitIdx
- << ") = " << ReservedCycles[StartIdx + UnitIdx] << "\n";
+ dbgs() << ResName << "(" << UnitIdx << ") = ";
+ if (SchedModel && SchedModel->enableIntervals()) {
+ if (ReservedResourceSegments.count(StartIdx + UnitIdx))
+ dbgs() << ReservedResourceSegments.at(StartIdx + UnitIdx);
+ else
+ dbgs() << "{ }\n";
+ } else
+ dbgs() << ReservedCycles[StartIdx + UnitIdx] << "\n";
}
StartIdx += NumUnits;
}
@@ -3978,3 +4232,101 @@ void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
void ScheduleDAGMI::viewGraph() {
viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
}
+
+/// Sort predicate for the intervals stored in an instance of
+/// ResourceSegments. Intervals are always disjoint (no intersection
+/// for any pairs of intervals), therefore we can sort the totality of
+/// the intervals by looking only at the left boundary.
+static bool sortIntervals(const ResourceSegments::IntervalTy &A,
+ const ResourceSegments::IntervalTy &B) {
+ return A.first < B.first;
+}
+
+unsigned ResourceSegments::getFirstAvailableAt(
+ unsigned CurrCycle, unsigned StartAtCycle, unsigned Cycle,
+ std::function<ResourceSegments::IntervalTy(unsigned, unsigned, unsigned)>
+ IntervalBuilder) const {
+ assert(std::is_sorted(std::begin(_Intervals), std::end(_Intervals),
+ sortIntervals) &&
+ "Cannot execute on an un-sorted set of intervals.");
+ unsigned RetCycle = CurrCycle;
+ ResourceSegments::IntervalTy NewInterval =
+ IntervalBuilder(RetCycle, StartAtCycle, Cycle);
+ for (auto &Interval : _Intervals) {
+ if (!intersects(NewInterval, Interval))
+ continue;
+
+ // Move the interval right next to the top of the one it
+ // intersects.
+ assert(Interval.second > NewInterval.first &&
+ "Invalid intervals configuration.");
+ RetCycle += (unsigned)Interval.second - (unsigned)NewInterval.first;
+ NewInterval = IntervalBuilder(RetCycle, StartAtCycle, Cycle);
+ }
+ return RetCycle;
+}
+
+void ResourceSegments::add(ResourceSegments::IntervalTy A,
+ const unsigned CutOff) {
+ assert(A.first < A.second && "Cannot add empty resource usage");
+ assert(CutOff > 0 && "0-size interval history has no use.");
+ assert(all_of(_Intervals,
+ [&A](const ResourceSegments::IntervalTy &Interval) -> bool {
+ return !intersects(A, Interval);
+ }) &&
+ "A resource is being overwritten");
+ _Intervals.push_back(A);
+
+ sortAndMerge();
+
+ // Do not keep the full history of the intervals, just the
+ // latest #CutOff.
+ while (_Intervals.size() > CutOff)
+ _Intervals.pop_front();
+}
+
+bool ResourceSegments::intersects(ResourceSegments::IntervalTy A,
+ ResourceSegments::IntervalTy B) {
+ assert(A.first <= A.second && "Invalid interval");
+ assert(B.first <= B.second && "Invalid interval");
+
+ // Share one boundary.
+ if ((A.first == B.first) || (A.second == B.second))
+ return true;
+
+ // full intersersect: [ *** ) B
+ // [***) A
+ if ((A.first > B.first) && (A.second < B.second))
+ return true;
+
+ // right intersect: [ ***) B
+ // [*** ) A
+ if ((A.first > B.first) && (A.first < B.second) && (A.second > B.second))
+ return true;
+
+ // left intersect: [*** ) B
+ // [ ***) A
+ if ((A.first < B.first) && (B.first < A.second) && (B.second > B.first))
+ return true;
+
+ return false;
+}
+
+void ResourceSegments::sortAndMerge() {
+ if (_Intervals.size() <= 1)
+ return;
+
+ // First sort the collection.
+ _Intervals.sort(sortIntervals);
+
+ // can use next because I have at least 2 elements in the list
+ auto next = std::next(std::begin(_Intervals));
+ auto E = std::end(_Intervals);
+ for (; next != E; ++next) {
+ if (std::prev(next)->second >= next->first) {
+ next->first = std::prev(next)->first;
+ _Intervals.erase(std::prev(next));
+ continue;
+ }
+ }
+}
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 8429d468254a..8da97dc7e742 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -115,15 +115,15 @@ STATISTIC(NumPostRACopySink, "Number of copies sunk after RA");
namespace {
class MachineSinking : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- MachineRegisterInfo *MRI; // Machine register information
- MachineDominatorTree *DT; // Machine dominator tree
- MachinePostDominatorTree *PDT; // Machine post dominator tree
- MachineCycleInfo *CI;
- MachineBlockFrequencyInfo *MBFI;
- const MachineBranchProbabilityInfo *MBPI;
- AliasAnalysis *AA;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineRegisterInfo *MRI = nullptr; // Machine register information
+ MachineDominatorTree *DT = nullptr; // Machine dominator tree
+ MachinePostDominatorTree *PDT = nullptr; // Machine post dominator tree
+ MachineCycleInfo *CI = nullptr;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
+ const MachineBranchProbabilityInfo *MBPI = nullptr;
+ AliasAnalysis *AA = nullptr;
RegisterClassInfo RegClassInfo;
// Remember which edges have been considered for breaking.
@@ -268,6 +268,44 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(MachineSinking, DEBUG_TYPE,
"Machine code sinking", false, false)
+/// Return true if a target defined block prologue instruction interferes
+/// with a sink candidate.
+static bool blockPrologueInterferes(const MachineBasicBlock *BB,
+ MachineBasicBlock::const_iterator End,
+ const MachineInstr &MI,
+ const TargetRegisterInfo *TRI,
+ const TargetInstrInfo *TII,
+ const MachineRegisterInfo *MRI) {
+ for (MachineBasicBlock::const_iterator PI = BB->getFirstNonPHI(); PI != End;
+ ++PI) {
+ // Only check target defined prologue instructions
+ if (!TII->isBasicBlockPrologue(*PI))
+ continue;
+ for (auto &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isUse()) {
+ if (Reg.isPhysical() && MRI && MRI->isConstantPhysReg(Reg))
+ continue;
+ if (PI->modifiesRegister(Reg, TRI))
+ return true;
+ } else {
+ if (PI->readsRegister(Reg, TRI))
+ return true;
+ // Check for interference with non-dead defs
+ auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
+ if (DefOp && !DefOp->isDead())
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
MachineBasicBlock *MBB) {
if (!MI.isCopy())
@@ -331,7 +369,7 @@ bool MachineSinking::AllUsesDominatedByBlock(Register Reg,
// %p = PHI %y, %bb.0, %def, %bb.1
if (all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) {
MachineInstr *UseInst = MO.getParent();
- unsigned OpNo = UseInst->getOperandNo(&MO);
+ unsigned OpNo = MO.getOperandNo();
MachineBasicBlock *UseBlock = UseInst->getParent();
return UseBlock == MBB && UseInst->isPHI() &&
UseInst->getOperand(OpNo + 1).getMBB() == DefMBB;
@@ -602,9 +640,7 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
// MI is cheap, we probably don't want to break the critical edge for it.
// However, if this would allow some definitions of its source operands
// to be sunk then it's probably worth it.
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isUse())
- continue;
+ for (const MachineOperand &MO : MI.all_uses()) {
Register Reg = MO.getReg();
if (Reg == 0)
continue;
@@ -806,12 +842,10 @@ bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI,
continue;
if (Reg.isPhysical()) {
- if (MO.isUse() &&
- (MRI->isConstantPhysReg(Reg) || TII->isIgnorableUse(MO)))
- continue;
-
- // Don't handle non-constant and non-ignorable physical register.
- return false;
+ // Don't handle non-constant and non-ignorable physical register uses.
+ if (MO.isUse() && !MRI->isConstantPhysReg(Reg) && !TII->isIgnorableUse(MO))
+ return false;
+ continue;
}
// Users for the defs are all dominated by SuccToSinkTo.
@@ -972,16 +1006,24 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
if (MBB == SuccToSinkTo)
return nullptr;
+ if (!SuccToSinkTo)
+ return nullptr;
+
// It's not safe to sink instructions to EH landing pad. Control flow into
// landing pad is implicitly defined.
- if (SuccToSinkTo && SuccToSinkTo->isEHPad())
+ if (SuccToSinkTo->isEHPad())
return nullptr;
// It ought to be okay to sink instructions into an INLINEASM_BR target, but
// only if we make sure that MI occurs _before_ an INLINEASM_BR instruction in
// the source block (which this code does not yet do). So for now, forbid
// doing so.
- if (SuccToSinkTo && SuccToSinkTo->isInlineAsmBrIndirectTarget())
+ if (SuccToSinkTo->isInlineAsmBrIndirectTarget())
+ return nullptr;
+
+ MachineBasicBlock::const_iterator InsertPos =
+ SuccToSinkTo->SkipPHIsAndLabels(SuccToSinkTo->begin());
+ if (blockPrologueInterferes(SuccToSinkTo, InsertPos, MI, TRI, TII, MRI))
return nullptr;
return SuccToSinkTo;
@@ -1302,45 +1344,6 @@ bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) {
return true;
}
-/// Return true if a target defined block prologue instruction interferes
-/// with a sink candidate.
-static bool blockPrologueInterferes(MachineBasicBlock *BB,
- MachineBasicBlock::iterator End,
- MachineInstr &MI,
- const TargetRegisterInfo *TRI,
- const TargetInstrInfo *TII,
- const MachineRegisterInfo *MRI) {
- if (BB->begin() == End)
- return false; // no prologue
- for (MachineBasicBlock::iterator PI = BB->getFirstNonPHI(); PI != End; ++PI) {
- // Only check target defined prologue instructions
- if (!TII->isBasicBlockPrologue(*PI))
- continue;
- for (auto &MO : MI.operands()) {
- if (!MO.isReg())
- continue;
- Register Reg = MO.getReg();
- if (!Reg)
- continue;
- if (MO.isUse()) {
- if (Reg.isPhysical() &&
- (TII->isIgnorableUse(MO) || (MRI && MRI->isConstantPhysReg(Reg))))
- continue;
- if (PI->modifiesRegister(Reg, TRI))
- return true;
- } else {
- if (PI->readsRegister(Reg, TRI))
- return true;
- // Check for interference with non-dead defs
- auto *DefOp = PI->findRegisterDefOperand(Reg, false, true, TRI);
- if (DefOp && !DefOp->isDead())
- return true;
- }
- }
- }
- return false;
-}
-
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
@@ -1383,9 +1386,7 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// If the instruction to move defines a dead physical register which is live
// when leaving the basic block, don't move it because it could turn into a
// "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || MO.isUse())
- continue;
+ for (const MachineOperand &MO : MI.all_defs()) {
Register Reg = MO.getReg();
if (Reg == 0 || !Reg.isPhysical())
continue;
@@ -1463,8 +1464,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// Collect debug users of any vreg that this inst defines.
SmallVector<MIRegs, 4> DbgUsersToSink;
- for (auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
+ for (auto &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual())
continue;
if (!SeenDbgUsers.count(MO.getReg()))
continue;
@@ -1498,10 +1499,8 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
// Note that we have to clear the kill flags for any register this instruction
// uses as we may sink over another instruction which currently kills the
// used registers.
- for (MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isUse())
- RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags.
- }
+ for (MachineOperand &MO : MI.all_uses())
+ RegsToClearKillFlags.insert(MO.getReg()); // Remember to clear kill flags.
return true;
}
@@ -1517,8 +1516,8 @@ void MachineSinking::SalvageUnsunkDebugUsersOfCopy(
SmallVector<MachineInstr *, 4> DbgDefUsers;
SmallVector<Register, 4> DbgUseRegs;
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
- for (auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
+ for (auto &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual())
continue;
DbgUseRegs.push_back(MO.getReg());
for (auto &User : MRI.use_instructions(MO.getReg())) {
@@ -1700,8 +1699,8 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
MachineFunction &MF = *SuccBB->getParent();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
for (unsigned DefReg : DefedRegsInCopy)
- for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
- SuccBB->removeLiveIn(*S);
+ for (MCPhysReg S : TRI->subregs_inclusive(DefReg))
+ SuccBB->removeLiveIn(S);
for (auto U : UsedOpsInCopy) {
Register SrcReg = MI->getOperand(U).getReg();
LaneBitmask Mask;
@@ -1793,9 +1792,8 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
}
// Record debug use of each reg unit.
- for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid();
- ++RI)
- MIUnits[*RI].push_back(MO.getReg());
+ for (MCRegUnit Unit : TRI->regunits(MO.getReg()))
+ MIUnits[Unit].push_back(MO.getReg());
}
}
if (IsValid) {
@@ -1844,12 +1842,9 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// recorded which reg units that DBG_VALUEs read, if this instruction
// writes any of those units then the corresponding DBG_VALUEs must sink.
MapVector<MachineInstr *, MIRegs::second_type> DbgValsToSinkMap;
- for (auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
-
- for (auto RI = MCRegUnitIterator(MO.getReg(), TRI); RI.isValid(); ++RI) {
- for (const auto &MIRegs : SeenDbgInstrs.lookup(*RI)) {
+ for (auto &MO : MI.all_defs()) {
+ for (MCRegUnit Unit : TRI->regunits(MO.getReg())) {
+ for (const auto &MIRegs : SeenDbgInstrs.lookup(Unit)) {
auto &Regs = DbgValsToSinkMap[MIRegs.first];
for (unsigned Reg : MIRegs.second)
Regs.push_back(Reg);
diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp
index 28712d1a816b..53bed7397d09 100644
--- a/llvm/lib/CodeGen/MachineSizeOpts.cpp
+++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp
@@ -24,168 +24,11 @@ extern cl::opt<bool> ForcePGSO;
extern cl::opt<int> PgsoCutoffInstrProf;
extern cl::opt<int> PgsoCutoffSampleProf;
-namespace {
-namespace machine_size_opts_detail {
-
-/// Like ProfileSummaryInfo::isColdBlock but for MachineBasicBlock.
-bool isColdBlock(const MachineBasicBlock *MBB,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- auto Count = MBFI->getBlockProfileCount(MBB);
- return Count && PSI->isColdCount(*Count);
-}
-
-bool isColdBlock(BlockFrequency BlockFreq,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
- return Count && PSI->isColdCount(*Count);
-}
-
-/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock.
-static bool isHotBlockNthPercentile(int PercentileCutoff,
- const MachineBasicBlock *MBB,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- auto Count = MBFI->getBlockProfileCount(MBB);
- return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
-}
-
-static bool isHotBlockNthPercentile(int PercentileCutoff,
- BlockFrequency BlockFreq,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
- return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
-}
-
-static bool isColdBlockNthPercentile(int PercentileCutoff,
- const MachineBasicBlock *MBB,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- auto Count = MBFI->getBlockProfileCount(MBB);
- return Count && PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
-}
-
-static bool isColdBlockNthPercentile(int PercentileCutoff,
- BlockFrequency BlockFreq,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
- return Count && PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
-}
-
-/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for
-/// MachineFunction.
-bool isFunctionColdInCallGraph(
- const MachineFunction *MF,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo &MBFI) {
- if (auto FunctionCount = MF->getFunction().getEntryCount())
- if (!PSI->isColdCount(FunctionCount->getCount()))
- return false;
- for (const auto &MBB : *MF)
- if (!isColdBlock(&MBB, PSI, &MBFI))
- return false;
- return true;
-}
-
-/// Like ProfileSummaryInfo::isFunctionHotInCallGraphNthPercentile but for
-/// MachineFunction.
-bool isFunctionHotInCallGraphNthPercentile(
- int PercentileCutoff,
- const MachineFunction *MF,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo &MBFI) {
- if (auto FunctionCount = MF->getFunction().getEntryCount())
- if (PSI->isHotCountNthPercentile(PercentileCutoff,
- FunctionCount->getCount()))
- return true;
- for (const auto &MBB : *MF)
- if (isHotBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
- return true;
- return false;
-}
-
-bool isFunctionColdInCallGraphNthPercentile(
- int PercentileCutoff, const MachineFunction *MF, ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo &MBFI) {
- if (auto FunctionCount = MF->getFunction().getEntryCount())
- if (!PSI->isColdCountNthPercentile(PercentileCutoff,
- FunctionCount->getCount()))
- return false;
- for (const auto &MBB : *MF)
- if (!isColdBlockNthPercentile(PercentileCutoff, &MBB, PSI, &MBFI))
- return false;
- return true;
-}
-} // namespace machine_size_opts_detail
-
-struct MachineBasicBlockBFIAdapter {
- static bool isFunctionColdInCallGraph(const MachineFunction *MF,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo &MBFI) {
- return machine_size_opts_detail::isFunctionColdInCallGraph(MF, PSI, MBFI);
- }
- static bool isFunctionHotInCallGraphNthPercentile(
- int CutOff,
- const MachineFunction *MF,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo &MBFI) {
- return machine_size_opts_detail::isFunctionHotInCallGraphNthPercentile(
- CutOff, MF, PSI, MBFI);
- }
- static bool isFunctionColdInCallGraphNthPercentile(
- int CutOff, const MachineFunction *MF, ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo &MBFI) {
- return machine_size_opts_detail::isFunctionColdInCallGraphNthPercentile(
- CutOff, MF, PSI, MBFI);
- }
- static bool isColdBlock(const MachineBasicBlock *MBB,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI);
- }
- static bool isColdBlock(BlockFrequency BlockFreq,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- return machine_size_opts_detail::isColdBlock(BlockFreq, PSI, MBFI);
- }
- static bool isHotBlockNthPercentile(int CutOff,
- const MachineBasicBlock *MBB,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- return machine_size_opts_detail::isHotBlockNthPercentile(
- CutOff, MBB, PSI, MBFI);
- }
- static bool isHotBlockNthPercentile(int CutOff,
- BlockFrequency BlockFreq,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- return machine_size_opts_detail::isHotBlockNthPercentile(
- CutOff, BlockFreq, PSI, MBFI);
- }
- static bool isColdBlockNthPercentile(int CutOff, const MachineBasicBlock *MBB,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- return machine_size_opts_detail::isColdBlockNthPercentile(CutOff, MBB, PSI,
- MBFI);
- }
- static bool isColdBlockNthPercentile(int CutOff, BlockFrequency BlockFreq,
- ProfileSummaryInfo *PSI,
- const MachineBlockFrequencyInfo *MBFI) {
- return machine_size_opts_detail::isColdBlockNthPercentile(CutOff, BlockFreq,
- PSI, MBFI);
- }
-};
-} // end anonymous namespace
-
bool llvm::shouldOptimizeForSize(const MachineFunction *MF,
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI,
PGSOQueryType QueryType) {
- return shouldFuncOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
- MF, PSI, MBFI, QueryType);
+ return shouldFuncOptimizeForSizeImpl(MF, PSI, MBFI, QueryType);
}
bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
@@ -193,8 +36,7 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
const MachineBlockFrequencyInfo *MBFI,
PGSOQueryType QueryType) {
assert(MBB);
- return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
- MBB, PSI, MBFI, QueryType);
+ return shouldOptimizeForSizeImpl(MBB, PSI, MBFI, QueryType);
}
bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
@@ -205,6 +47,6 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
if (!PSI || !MBFIW)
return false;
BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB);
- return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
- BlockFreq, PSI, &MBFIW->getMBFI(), QueryType);
+ return shouldOptimizeForSizeImpl(BlockFreq, PSI, &MBFIW->getMBFI(),
+ QueryType);
}
diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 5c6efd4af074..4f66f2e672d1 100644
--- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -318,6 +318,21 @@ public:
: MachineTraceMetrics::Ensemble(mtm) {}
};
+/// Pick only the current basic block for the trace and do not choose any
+/// predecessors/successors.
+class LocalEnsemble : public MachineTraceMetrics::Ensemble {
+ const char *getName() const override { return "Local"; }
+ const MachineBasicBlock *pickTracePred(const MachineBasicBlock *) override {
+ return nullptr;
+ };
+ const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock *) override {
+ return nullptr;
+ };
+
+public:
+ LocalEnsemble(MachineTraceMetrics *MTM)
+ : MachineTraceMetrics::Ensemble(MTM) {}
+};
} // end anonymous namespace
// Select the preferred predecessor for MBB.
@@ -380,15 +395,19 @@ MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) {
// Get an Ensemble sub-class for the requested trace strategy.
MachineTraceMetrics::Ensemble *
-MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) {
- assert(strategy < TS_NumStrategies && "Invalid trace strategy enum");
- Ensemble *&E = Ensembles[strategy];
+MachineTraceMetrics::getEnsemble(MachineTraceStrategy strategy) {
+ assert(strategy < MachineTraceStrategy::TS_NumStrategies &&
+ "Invalid trace strategy enum");
+ Ensemble *&E = Ensembles[static_cast<size_t>(strategy)];
if (E)
return E;
// Allocate new Ensemble on demand.
switch (strategy) {
- case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this));
+ case MachineTraceStrategy::TS_MinInstrCount:
+ return (E = new MinInstrCountEnsemble(this));
+ case MachineTraceStrategy::TS_Local:
+ return (E = new LocalEnsemble(this));
default: llvm_unreachable("Invalid trace strategy enum");
}
}
@@ -655,9 +674,7 @@ static bool getDataDeps(const MachineInstr &UseMI,
return false;
bool HasPhysRegs = false;
- for (MachineInstr::const_mop_iterator I = UseMI.operands_begin(),
- E = UseMI.operands_end(); I != E; ++I) {
- const MachineOperand &MO = *I;
+ for (const MachineOperand &MO : UseMI.operands()) {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
@@ -669,7 +686,7 @@ static bool getDataDeps(const MachineInstr &UseMI,
}
// Collect virtual register reads.
if (MO.readsReg())
- Deps.push_back(DataDep(MRI, Reg, UseMI.getOperandNo(I)));
+ Deps.push_back(DataDep(MRI, Reg, MO.getOperandNo()));
}
return HasPhysRegs;
}
@@ -703,9 +720,7 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
SmallVector<MCRegister, 8> Kills;
SmallVector<unsigned, 8> LiveDefOps;
- for (MachineInstr::const_mop_iterator MI = UseMI->operands_begin(),
- ME = UseMI->operands_end(); MI != ME; ++MI) {
- const MachineOperand &MO = *MI;
+ for (const MachineOperand &MO : UseMI->operands()) {
if (!MO.isReg() || !MO.getReg().isPhysical())
continue;
MCRegister Reg = MO.getReg().asMCReg();
@@ -714,17 +729,17 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
if (MO.isDead())
Kills.push_back(Reg);
else
- LiveDefOps.push_back(UseMI->getOperandNo(MI));
+ LiveDefOps.push_back(MO.getOperandNo());
} else if (MO.isKill())
Kills.push_back(Reg);
// Identify dependencies.
if (!MO.readsReg())
continue;
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
- SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
if (I == RegUnits.end())
continue;
- Deps.push_back(DataDep(I->MI, I->Op, UseMI->getOperandNo(MI)));
+ Deps.push_back(DataDep(I->MI, I->Op, MO.getOperandNo()));
break;
}
}
@@ -732,15 +747,14 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
// Update RegUnits to reflect live registers after UseMI.
// First kills.
for (MCRegister Kill : Kills)
- for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units)
- RegUnits.erase(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Kill))
+ RegUnits.erase(Unit);
// Second, live defs.
for (unsigned DefOp : LiveDefOps) {
- for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg().asMCReg(),
- TRI);
- Units.isValid(); ++Units) {
- LiveRegUnit &LRU = RegUnits[*Units];
+ for (MCRegUnit Unit :
+ TRI->regunits(UseMI->getOperand(DefOp).getReg().asMCReg())) {
+ LiveRegUnit &LRU = RegUnits[Unit];
LRU.MI = UseMI;
LRU.Op = DefOp;
}
@@ -895,31 +909,27 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
const TargetRegisterInfo *TRI) {
SmallVector<unsigned, 8> ReadOps;
- for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
- MOE = MI.operands_end();
- MOI != MOE; ++MOI) {
- const MachineOperand &MO = *MOI;
+ for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
if (!Reg.isPhysical())
continue;
if (MO.readsReg())
- ReadOps.push_back(MI.getOperandNo(MOI));
+ ReadOps.push_back(MO.getOperandNo());
if (!MO.isDef())
continue;
// This is a def of Reg. Remove corresponding entries from RegUnits, and
// update MI Height to consider the physreg dependencies.
- for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
- ++Units) {
- SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
+ SparseSet<LiveRegUnit>::iterator I = RegUnits.find(Unit);
if (I == RegUnits.end())
continue;
unsigned DepHeight = I->Cycle;
if (!MI.isTransient()) {
// We may not know the UseMI of this dependency, if it came from the
// live-in list. SchedModel can handle a NULL UseMI.
- DepHeight += SchedModel.computeOperandLatency(&MI, MI.getOperandNo(MOI),
+ DepHeight += SchedModel.computeOperandLatency(&MI, MO.getOperandNo(),
I->MI, I->Op);
}
Height = std::max(Height, DepHeight);
@@ -931,8 +941,8 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
// Now we know the height of MI. Update any regunits read.
for (size_t I = 0, E = ReadOps.size(); I != E; ++I) {
MCRegister Reg = MI.getOperand(ReadOps[I]).getReg().asMCReg();
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
- LiveRegUnit &LRU = RegUnits[*Units];
+ for (MCRegUnit Unit : TRI->regunits(Reg)) {
+ LiveRegUnit &LRU = RegUnits[Unit];
// Set the height to the highest reader of the unit.
if (LRU.Cycle <= Height && LRU.MI != &MI) {
LRU.Cycle = Height;
@@ -1087,10 +1097,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
}
// Go through the block backwards.
- for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin();
- BI != BB;) {
- const MachineInstr &MI = *--BI;
-
+ for (const MachineInstr &MI : reverse(*MBB)) {
// Find the MI height as determined by virtual register uses in the
// trace below.
unsigned Cycle = 0;
@@ -1137,11 +1144,10 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
}
// Transfer the live regunits to the live-in list.
- for (SparseSet<LiveRegUnit>::const_iterator
- RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) {
- TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle));
- LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RI->RegUnit, MTM.TRI) << '@'
- << RI->Cycle);
+ for (const LiveRegUnit &RU : RegUnits) {
+ TBI.LiveIns.push_back(LiveInReg(RU.RegUnit, RU.Cycle));
+ LLVM_DEBUG(dbgs() << ' ' << printRegUnit(RU.RegUnit, MTM.TRI) << '@'
+ << RU.Cycle);
}
LLVM_DEBUG(dbgs() << '\n');
diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
index 2fe5e40a58c2..0e02c50284c6 100644
--- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
+++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp
@@ -20,9 +20,7 @@ using namespace llvm;
template <>
bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::hasDivergentDefs(
const MachineInstr &I) const {
- for (auto &op : I.operands()) {
- if (!op.isReg() || !op.isDef())
- continue;
+ for (auto &op : I.all_defs()) {
if (isDivergent(op.getReg()))
return true;
}
@@ -31,21 +29,17 @@ bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::hasDivergentDefs(
template <>
bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::markDefsDivergent(
- const MachineInstr &Instr, bool AllDefsDivergent) {
+ const MachineInstr &Instr) {
bool insertedDivergent = false;
const auto &MRI = F.getRegInfo();
+ const auto &RBI = *F.getSubtarget().getRegBankInfo();
const auto &TRI = *MRI.getTargetRegisterInfo();
- for (auto &op : Instr.operands()) {
- if (!op.isReg() || !op.isDef())
- continue;
+ for (auto &op : Instr.all_defs()) {
if (!op.getReg().isVirtual())
continue;
assert(!op.getSubReg());
- if (!AllDefsDivergent) {
- auto *RC = MRI.getRegClassOrNull(op.getReg());
- if (RC && !TRI.isDivergentRegClass(RC))
- continue;
- }
+ if (TRI.isUniformReg(MRI, RBI, op.getReg()))
+ continue;
insertedDivergent |= markDivergent(op.getReg());
}
return insertedDivergent;
@@ -64,7 +58,7 @@ void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::initialize() {
}
if (uniformity == InstructionUniformity::NeverUniform) {
- markDefsDivergent(instr, /* AllDefsDivergent = */ false);
+ markDivergent(instr);
}
}
}
@@ -73,12 +67,10 @@ void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::initialize() {
template <>
void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::pushUsers(
Register Reg) {
+ assert(isDivergent(Reg));
const auto &RegInfo = F.getRegInfo();
for (MachineInstr &UserInstr : RegInfo.use_instructions(Reg)) {
- if (isAlwaysUniform(UserInstr))
- continue;
- if (markDivergent(UserInstr))
- Worklist.push_back(&UserInstr);
+ markDivergent(UserInstr);
}
}
@@ -88,9 +80,10 @@ void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::pushUsers(
assert(!isAlwaysUniform(Instr));
if (Instr.isTerminator())
return;
- for (const MachineOperand &op : Instr.operands()) {
- if (op.isReg() && op.isDef() && op.getReg().isVirtual())
- pushUsers(op.getReg());
+ for (const MachineOperand &op : Instr.all_defs()) {
+ auto Reg = op.getReg();
+ if (isDivergent(Reg))
+ pushUsers(Reg);
}
}
@@ -102,7 +95,12 @@ bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::usesValueFromCycle(
if (!Op.isReg() || !Op.readsReg())
continue;
auto Reg = Op.getReg();
- assert(Reg.isVirtual());
+
+ // FIXME: Physical registers need to be properly checked instead of always
+ // returning true
+ if (Reg.isPhysical())
+ return true;
+
auto *Def = F.getRegInfo().getVRegDef(Reg);
if (DefCycle.contains(Def->getParent()))
return true;
@@ -110,18 +108,59 @@ bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::usesValueFromCycle(
return false;
}
+template <>
+void llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::
+ propagateTemporalDivergence(const MachineInstr &I,
+ const MachineCycle &DefCycle) {
+ const auto &RegInfo = F.getRegInfo();
+ for (auto &Op : I.all_defs()) {
+ if (!Op.getReg().isVirtual())
+ continue;
+ auto Reg = Op.getReg();
+ if (isDivergent(Reg))
+ continue;
+ for (MachineInstr &UserInstr : RegInfo.use_instructions(Reg)) {
+ if (DefCycle.contains(UserInstr.getParent()))
+ continue;
+ markDivergent(UserInstr);
+ }
+ }
+}
+
+template <>
+bool llvm::GenericUniformityAnalysisImpl<MachineSSAContext>::isDivergentUse(
+ const MachineOperand &U) const {
+ if (!U.isReg())
+ return false;
+
+ auto Reg = U.getReg();
+ if (isDivergent(Reg))
+ return true;
+
+ const auto &RegInfo = F.getRegInfo();
+ auto *Def = RegInfo.getOneDef(Reg);
+ if (!Def)
+ return true;
+
+ auto *DefInstr = Def->getParent();
+ auto *UseInstr = U.getParent();
+ return isTemporalDivergent(*UseInstr->getParent(), *DefInstr);
+}
+
// This ensures explicit instantiation of
// GenericUniformityAnalysisImpl::ImplDeleter::operator()
template class llvm::GenericUniformityInfo<MachineSSAContext>;
template struct llvm::GenericUniformityAnalysisImplDeleter<
llvm::GenericUniformityAnalysisImpl<MachineSSAContext>>;
-MachineUniformityInfo
-llvm::computeMachineUniformityInfo(MachineFunction &F,
- const MachineCycleInfo &cycleInfo,
- const MachineDomTree &domTree) {
+MachineUniformityInfo llvm::computeMachineUniformityInfo(
+ MachineFunction &F, const MachineCycleInfo &cycleInfo,
+ const MachineDomTree &domTree, bool HasBranchDivergence) {
assert(F.getRegInfo().isSSA() && "Expected to be run on SSA form!");
- return MachineUniformityInfo(F, domTree, cycleInfo);
+ MachineUniformityInfo UI(F, domTree, cycleInfo);
+ if (HasBranchDivergence)
+ UI.compute();
+ return UI;
}
namespace {
@@ -181,7 +220,9 @@ void MachineUniformityAnalysisPass::getAnalysisUsage(AnalysisUsage &AU) const {
bool MachineUniformityAnalysisPass::runOnMachineFunction(MachineFunction &MF) {
auto &DomTree = getAnalysis<MachineDominatorTree>().getBase();
auto &CI = getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
- UI = computeMachineUniformityInfo(MF, CI, DomTree);
+ // FIXME: Query TTI::hasBranchDivergence. -run-pass seems to end up with a
+ // default NoTTI
+ UI = computeMachineUniformityInfo(MF, CI, DomTree, true);
return false;
}
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index ddd5a027c2cd..7acd3c4039e8 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -31,13 +31,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -58,6 +58,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
@@ -71,7 +72,6 @@
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ModRef.h"
#include "llvm/Support/raw_ostream.h"
@@ -95,19 +95,19 @@ namespace {
Pass *const PASS;
const char *Banner;
- const MachineFunction *MF;
- const TargetMachine *TM;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const MachineRegisterInfo *MRI;
- const RegisterBankInfo *RBI;
+ const MachineFunction *MF = nullptr;
+ const TargetMachine *TM = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const MachineRegisterInfo *MRI = nullptr;
+ const RegisterBankInfo *RBI = nullptr;
- unsigned foundErrors;
+ unsigned foundErrors = 0;
// Avoid querying the MachineFunctionProperties for each operand.
- bool isFunctionRegBankSelected;
- bool isFunctionSelected;
- bool isFunctionTracksDebugUserValues;
+ bool isFunctionRegBankSelected = false;
+ bool isFunctionSelected = false;
+ bool isFunctionTracksDebugUserValues = false;
using RegVector = SmallVector<Register, 16>;
using RegMaskVector = SmallVector<const uint32_t *, 4>;
@@ -115,8 +115,8 @@ namespace {
using RegMap = DenseMap<Register, const MachineInstr *>;
using BlockSet = SmallPtrSet<const MachineBasicBlock *, 8>;
- const MachineInstr *FirstNonPHI;
- const MachineInstr *FirstTerminator;
+ const MachineInstr *FirstNonPHI = nullptr;
+ const MachineInstr *FirstTerminator = nullptr;
BlockSet FunctionBlocks;
BitVector regsReserved;
@@ -208,10 +208,10 @@ namespace {
}
// Analysis information if available
- LiveVariables *LiveVars;
- LiveIntervals *LiveInts;
- LiveStacks *LiveStks;
- SlotIndexes *Indexes;
+ LiveVariables *LiveVars = nullptr;
+ LiveIntervals *LiveInts = nullptr;
+ LiveStacks *LiveStks = nullptr;
+ SlotIndexes *Indexes = nullptr;
void visitMachineFunctionBefore();
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
@@ -296,6 +296,8 @@ namespace {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addUsedIfAvailable<LiveStacks>();
AU.addUsedIfAvailable<LiveVariables>();
+ AU.addUsedIfAvailable<SlotIndexes>();
+ AU.addUsedIfAvailable<LiveIntervals>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -627,8 +629,11 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
// it is an entry block or landing pad.
for (const auto &LI : MBB->liveins()) {
if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() &&
- MBB->getIterator() != MBB->getParent()->begin()) {
- report("MBB has allocatable live-in, but isn't entry or landing-pad.", MBB);
+ MBB->getIterator() != MBB->getParent()->begin() &&
+ !MBB->isInlineAsmBrIndirectTarget()) {
+ report("MBB has allocatable live-in, but isn't entry, landing-pad, or "
+ "inlineasm-br-indirect-target.",
+ MBB);
report_context(LI.PhysReg);
}
}
@@ -1746,6 +1751,13 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
report("alignment immediate must be >= 1", MI);
break;
}
+ case TargetOpcode::G_CONSTANT_POOL: {
+ if (!MI->getOperand(1).isCPI())
+ report("Src operand 1 must be a constant pool index", MI);
+ if (!MRI->getType(MI->getOperand(0).getReg()).isPointer())
+ report("Dst operand 0 must be a pointer", MI);
+ break;
+ }
default:
break;
}
@@ -2162,6 +2174,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
+ const RegisterBankInfo *RBI = MF->getSubtarget().getRegBankInfo();
// If we're post-RegBankSelect, the gvreg must have a bank.
if (!RegBank && isFunctionRegBankSelected) {
@@ -2173,12 +2186,12 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// Make sure the register fits into its register bank if any.
if (RegBank && Ty.isValid() &&
- RegBank->getSize() < Ty.getSizeInBits()) {
+ RBI->getMaximumSize(RegBank->getID()) < Ty.getSizeInBits()) {
report("Register bank is too small for virtual register", MO,
MONum);
errs() << "Register bank " << RegBank->getName() << " too small("
- << RegBank->getSize() << ") to fit " << Ty.getSizeInBits()
- << "-bits\n";
+ << RBI->getMaximumSize(RegBank->getID()) << ") to fit "
+ << Ty.getSizeInBits() << "-bits\n";
return;
}
}
@@ -2427,12 +2440,11 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI);
// Check the cached regunit intervals.
if (Reg.isPhysical() && !isReserved(Reg)) {
- for (MCRegUnitIterator Units(Reg.asMCReg(), TRI); Units.isValid();
- ++Units) {
- if (MRI->isReservedRegUnit(*Units))
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
+ if (MRI->isReservedRegUnit(Unit))
continue;
- if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units))
- checkLivenessAtUse(MO, MONum, UseIdx, *LR, *Units);
+ if (const LiveRange *LR = LiveInts->getCachedRegUnit(Unit))
+ checkLivenessAtUse(MO, MONum, UseIdx, *LR, Unit);
}
}
@@ -3096,108 +3108,109 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
return;
}
- // No more checks for live-out segments.
- if (S.end == LiveInts->getMBBEndIdx(EndMBB))
- return;
-
- // RegUnit intervals are allowed dead phis.
- if (!Reg.isVirtual() && VNI->isPHIDef() && S.start == VNI->def &&
- S.end == VNI->def.getDeadSlot())
- return;
-
- // The live segment is ending inside EndMBB
- const MachineInstr *MI =
- LiveInts->getInstructionFromIndex(S.end.getPrevSlot());
- if (!MI) {
- report("Live segment doesn't end at a valid instruction", EndMBB);
- report_context(LR, Reg, LaneMask);
- report_context(S);
- return;
- }
-
- // The block slot must refer to a basic block boundary.
- if (S.end.isBlock()) {
- report("Live segment ends at B slot of an instruction", EndMBB);
- report_context(LR, Reg, LaneMask);
- report_context(S);
- }
+ // Checks for non-live-out segments.
+ if (S.end != LiveInts->getMBBEndIdx(EndMBB)) {
+ // RegUnit intervals are allowed dead phis.
+ if (!Reg.isVirtual() && VNI->isPHIDef() && S.start == VNI->def &&
+ S.end == VNI->def.getDeadSlot())
+ return;
- if (S.end.isDead()) {
- // Segment ends on the dead slot.
- // That means there must be a dead def.
- if (!SlotIndex::isSameInstr(S.start, S.end)) {
- report("Live segment ending at dead slot spans instructions", EndMBB);
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(S.end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB);
report_context(LR, Reg, LaneMask);
report_context(S);
+ return;
}
- }
- // After tied operands are rewritten, a live segment can only end at an
- // early-clobber slot if it is being redefined by an early-clobber def.
- // TODO: Before tied operands are rewritten, a live segment can only end at an
- // early-clobber slot if the last use is tied to an early-clobber def.
- if (MF->getProperties().hasProperty(
- MachineFunctionProperties::Property::TiedOpsRewritten) &&
- S.end.isEarlyClobber()) {
- if (I+1 == LR.end() || (I+1)->start != S.end) {
- report("Live segment ending at early clobber slot must be "
- "redefined by an EC def in the same instruction", EndMBB);
+ // The block slot must refer to a basic block boundary.
+ if (S.end.isBlock()) {
+ report("Live segment ends at B slot of an instruction", EndMBB);
report_context(LR, Reg, LaneMask);
report_context(S);
}
- }
- // The following checks only apply to virtual registers. Physreg liveness
- // is too weird to check.
- if (Reg.isVirtual()) {
- // A live segment can end with either a redefinition, a kill flag on a
- // use, or a dead flag on a def.
- bool hasRead = false;
- bool hasSubRegDef = false;
- bool hasDeadDef = false;
- for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
- if (!MOI->isReg() || MOI->getReg() != Reg)
- continue;
- unsigned Sub = MOI->getSubReg();
- LaneBitmask SLM = Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub)
- : LaneBitmask::getAll();
- if (MOI->isDef()) {
- if (Sub != 0) {
- hasSubRegDef = true;
- // An operand %0:sub0 reads %0:sub1..n. Invert the lane
- // mask for subregister defs. Read-undef defs will be handled by
- // readsReg below.
- SLM = ~SLM;
- }
- if (MOI->isDead())
- hasDeadDef = true;
+ if (S.end.isDead()) {
+ // Segment ends on the dead slot.
+ // That means there must be a dead def.
+ if (!SlotIndex::isSameInstr(S.start, S.end)) {
+ report("Live segment ending at dead slot spans instructions", EndMBB);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
}
- if (LaneMask.any() && (LaneMask & SLM).none())
- continue;
- if (MOI->readsReg())
- hasRead = true;
}
- if (S.end.isDead()) {
- // Make sure that the corresponding machine operand for a "dead" live
- // range has the dead flag. We cannot perform this check for subregister
- // liveranges as partially dead values are allowed.
- if (LaneMask.none() && !hasDeadDef) {
- report("Instruction ending live segment on dead slot has no dead flag",
- MI);
+
+ // After tied operands are rewritten, a live segment can only end at an
+ // early-clobber slot if it is being redefined by an early-clobber def.
+ // TODO: Before tied operands are rewritten, a live segment can only end at
+ // an early-clobber slot if the last use is tied to an early-clobber def.
+ if (MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::TiedOpsRewritten) &&
+ S.end.isEarlyClobber()) {
+ if (I + 1 == LR.end() || (I + 1)->start != S.end) {
+ report("Live segment ending at early clobber slot must be "
+ "redefined by an EC def in the same instruction",
+ EndMBB);
report_context(LR, Reg, LaneMask);
report_context(S);
}
- } else {
- if (!hasRead) {
- // When tracking subregister liveness, the main range must start new
- // values on partial register writes, even if there is no read.
- if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask.any() ||
- !hasSubRegDef) {
- report("Instruction ending live segment doesn't read the register",
- MI);
+ }
+
+ // The following checks only apply to virtual registers. Physreg liveness
+ // is too weird to check.
+ if (Reg.isVirtual()) {
+ // A live segment can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ bool hasRead = false;
+ bool hasSubRegDef = false;
+ bool hasDeadDef = false;
+ for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || MOI->getReg() != Reg)
+ continue;
+ unsigned Sub = MOI->getSubReg();
+ LaneBitmask SLM =
+ Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub) : LaneBitmask::getAll();
+ if (MOI->isDef()) {
+ if (Sub != 0) {
+ hasSubRegDef = true;
+ // An operand %0:sub0 reads %0:sub1..n. Invert the lane
+ // mask for subregister defs. Read-undef defs will be handled by
+ // readsReg below.
+ SLM = ~SLM;
+ }
+ if (MOI->isDead())
+ hasDeadDef = true;
+ }
+ if (LaneMask.any() && (LaneMask & SLM).none())
+ continue;
+ if (MOI->readsReg())
+ hasRead = true;
+ }
+ if (S.end.isDead()) {
+ // Make sure that the corresponding machine operand for a "dead" live
+ // range has the dead flag. We cannot perform this check for subregister
+ // liveranges as partially dead values are allowed.
+ if (LaneMask.none() && !hasDeadDef) {
+ report(
+ "Instruction ending live segment on dead slot has no dead flag",
+ MI);
report_context(LR, Reg, LaneMask);
report_context(S);
}
+ } else {
+ if (!hasRead) {
+ // When tracking subregister liveness, the main range must start new
+ // values on partial register writes, even if there is no read.
+ if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask.any() ||
+ !hasSubRegDef) {
+ report("Instruction ending live segment doesn't read the register",
+ MI);
+ report_context(LR, Reg, LaneMask);
+ report_context(S);
+ }
+ }
}
}
}
diff --git a/llvm/lib/CodeGen/ModuloSchedule.cpp b/llvm/lib/CodeGen/ModuloSchedule.cpp
index af9fef0720f9..0bef513342ff 100644
--- a/llvm/lib/CodeGen/ModuloSchedule.cpp
+++ b/llvm/lib/CodeGen/ModuloSchedule.cpp
@@ -74,10 +74,7 @@ void ModuloScheduleExpander::expand() {
// stage difference for each use. Keep the maximum value.
for (MachineInstr *MI : Schedule.getInstructions()) {
int DefStage = Schedule.getStage(MI);
- for (const MachineOperand &Op : MI->operands()) {
- if (!Op.isReg() || !Op.isDef())
- continue;
-
+ for (const MachineOperand &Op : MI->all_defs()) {
Register Reg = Op.getReg();
unsigned MaxDiff = 0;
bool PhiIsSwapped = false;
@@ -743,9 +740,7 @@ void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
continue;
}
bool used = true;
- for (const MachineOperand &MO : MI->operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
+ for (const MachineOperand &MO : MI->all_defs()) {
Register reg = MO.getReg();
// Assume physical registers are used, unless they are marked dead.
if (reg.isPhysical()) {
diff --git a/llvm/lib/CodeGen/OptimizePHIs.cpp b/llvm/lib/CodeGen/OptimizePHIs.cpp
index e68a6398cf51..d997fbbed5a6 100644
--- a/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -34,8 +34,8 @@ STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles");
namespace {
class OptimizePHIs : public MachineFunctionPass {
- MachineRegisterInfo *MRI;
- const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
public:
static char ID; // Pass identification
diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp
index 51035d2e442f..dbb9a9ffdf60 100644
--- a/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/llvm/lib/CodeGen/PHIElimination.cpp
@@ -63,9 +63,9 @@ static cl::opt<bool> NoPhiElimLiveOutEarlyExit(
namespace {
class PHIElimination : public MachineFunctionPass {
- MachineRegisterInfo *MRI; // Machine register information
- LiveVariables *LV;
- LiveIntervals *LIS;
+ MachineRegisterInfo *MRI = nullptr; // Machine register information
+ LiveVariables *LV = nullptr;
+ LiveIntervals *LIS = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index c3458be0f883..a08cc78f11b1 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -150,11 +150,11 @@ namespace {
class RecurrenceInstr;
class PeepholeOptimizer : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- MachineRegisterInfo *MRI;
- MachineDominatorTree *DT; // Machine dominator tree
- MachineLoopInfo *MLI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ MachineDominatorTree *DT = nullptr; // Machine dominator tree
+ MachineLoopInfo *MLI = nullptr;
public:
static char ID; // Pass identification
diff --git a/llvm/lib/CodeGen/PostRASchedulerList.cpp b/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 98fc7e07a1b4..170008ab67cb 100644
--- a/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -182,7 +182,7 @@ namespace {
private:
/// Apply each ScheduleDAGMutation step in order.
- void postprocessDAG();
+ void postProcessDAG();
void ReleaseSucc(SUnit *SU, SDep *SuccEdge);
void ReleaseSuccessors(SUnit *SU);
@@ -407,7 +407,7 @@ void SchedulePostRATDList::schedule() {
}
}
- postprocessDAG();
+ postProcessDAG();
LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n");
LLVM_DEBUG(dump());
@@ -436,7 +436,7 @@ void SchedulePostRATDList::finishBlock() {
}
/// Apply each ScheduleDAGMutation step in order.
-void SchedulePostRATDList::postprocessDAG() {
+void SchedulePostRATDList::postProcessDAG() {
for (auto &M : Mutations)
M->apply(this);
}
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 87e2f9f20021..3448c56e4994 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -6,14 +6,16 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass implements IR lowering for the llvm.load.relative and llvm.objc.*
-// intrinsics.
+// This pass implements IR lowering for the llvm.memcpy, llvm.memmove,
+// llvm.memset, llvm.load.relative and llvm.objc.* intrinsics.
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
#include "llvm/Analysis/ObjCARCInstKind.h"
#include "llvm/Analysis/ObjCARCUtil.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -24,9 +26,44 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
using namespace llvm;
+/// Threshold to leave statically sized memory intrinsic calls. Calls of known
+/// size larger than this will be expanded by the pass. Calls of unknown or
+/// lower size will be left for expansion in codegen.
+static cl::opt<int64_t> MemIntrinsicExpandSizeThresholdOpt(
+ "mem-intrinsic-expand-size",
+ cl::desc("Set minimum mem intrinsic size to expand in IR"), cl::init(-1),
+ cl::Hidden);
+
+namespace {
+
+struct PreISelIntrinsicLowering {
+ const function_ref<TargetTransformInfo &(Function &)> LookupTTI;
+ const function_ref<TargetLibraryInfo &(Function &)> LookupLibInfo;
+
+ /// If this is true, assume it's preferably to leave memory intrinsic calls
+ /// for replacement with a library call later. Otherwise this depends on
+ /// TargetLibraryInfo availability of the corresponding function.
+ const bool UseMemIntrinsicLibFunc;
+
+ explicit PreISelIntrinsicLowering(
+ function_ref<TargetTransformInfo &(Function &)> LookupTTI_,
+ function_ref<TargetLibraryInfo &(Function &)> LookupLibInfo_,
+ bool UseMemIntrinsicLibFunc_ = true)
+ : LookupTTI(LookupTTI_), LookupLibInfo(LookupLibInfo_),
+ UseMemIntrinsicLibFunc(UseMemIntrinsicLibFunc_) {}
+
+ static bool shouldExpandMemIntrinsicWithSize(Value *Size,
+ const TargetTransformInfo &TTI);
+ bool expandMemIntrinsicUses(Function &F) const;
+ bool lowerIntrinsics(Module &M) const;
+};
+
+} // namespace
+
static bool lowerLoadRelative(Function &F) {
if (F.use_empty())
return false;
@@ -133,16 +170,104 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
return true;
}
-static bool lowerIntrinsics(Module &M) {
+// TODO: Should refine based on estimated number of accesses (e.g. does it
+// require splitting based on alignment)
+bool PreISelIntrinsicLowering::shouldExpandMemIntrinsicWithSize(
+ Value *Size, const TargetTransformInfo &TTI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(Size);
+ if (!CI)
+ return true;
+ uint64_t Threshold = MemIntrinsicExpandSizeThresholdOpt.getNumOccurrences()
+ ? MemIntrinsicExpandSizeThresholdOpt
+ : TTI.getMaxMemIntrinsicInlineSizeThreshold();
+ uint64_t SizeVal = CI->getZExtValue();
+
+ // Treat a threshold of 0 as a special case to force expansion of all
+ // intrinsics, including size 0.
+ return SizeVal > Threshold || Threshold == 0;
+}
+
+// TODO: Handle atomic memcpy and memcpy.inline
+// TODO: Pass ScalarEvolution
+bool PreISelIntrinsicLowering::expandMemIntrinsicUses(Function &F) const {
+ Intrinsic::ID ID = F.getIntrinsicID();
bool Changed = false;
- for (Function &F : M) {
- if (F.getName().startswith("llvm.load.relative.")) {
- Changed |= lowerLoadRelative(F);
- continue;
+
+ for (User *U : llvm::make_early_inc_range(F.users())) {
+ Instruction *Inst = cast<Instruction>(U);
+
+ switch (ID) {
+ case Intrinsic::memcpy: {
+ auto *Memcpy = cast<MemCpyInst>(Inst);
+ Function *ParentFunc = Memcpy->getFunction();
+ const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
+ if (shouldExpandMemIntrinsicWithSize(Memcpy->getLength(), TTI)) {
+ if (UseMemIntrinsicLibFunc &&
+ LookupLibInfo(*ParentFunc).has(LibFunc_memcpy))
+ break;
+
+ expandMemCpyAsLoop(Memcpy, TTI);
+ Changed = true;
+ Memcpy->eraseFromParent();
+ }
+
+ break;
+ }
+ case Intrinsic::memmove: {
+ auto *Memmove = cast<MemMoveInst>(Inst);
+ Function *ParentFunc = Memmove->getFunction();
+ const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
+ if (shouldExpandMemIntrinsicWithSize(Memmove->getLength(), TTI)) {
+ if (UseMemIntrinsicLibFunc &&
+ LookupLibInfo(*ParentFunc).has(LibFunc_memmove))
+ break;
+
+ if (expandMemMoveAsLoop(Memmove, TTI)) {
+ Changed = true;
+ Memmove->eraseFromParent();
+ }
+ }
+
+ break;
}
+ case Intrinsic::memset: {
+ auto *Memset = cast<MemSetInst>(Inst);
+ Function *ParentFunc = Memset->getFunction();
+ const TargetTransformInfo &TTI = LookupTTI(*ParentFunc);
+ if (shouldExpandMemIntrinsicWithSize(Memset->getLength(), TTI)) {
+ if (UseMemIntrinsicLibFunc &&
+ LookupLibInfo(*Memset->getFunction()).has(LibFunc_memset))
+ break;
+
+ expandMemSetAsLoop(Memset);
+ Changed = true;
+ Memset->eraseFromParent();
+ }
+
+ break;
+ }
+ default:
+ llvm_unreachable("unhandled intrinsic");
+ }
+ }
+
+ return Changed;
+}
+
+bool PreISelIntrinsicLowering::lowerIntrinsics(Module &M) const {
+ bool Changed = false;
+ for (Function &F : M) {
switch (F.getIntrinsicID()) {
default:
break;
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ Changed |= expandMemIntrinsicUses(F);
+ break;
+ case Intrinsic::load_relative:
+ Changed |= lowerLoadRelative(F);
+ break;
case Intrinsic::objc_autorelease:
Changed |= lowerObjCCall(F, "objc_autorelease");
break;
@@ -231,7 +356,23 @@ public:
PreISelIntrinsicLoweringLegacyPass() : ModulePass(ID) {}
- bool runOnModule(Module &M) override { return lowerIntrinsics(M); }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+ bool runOnModule(Module &M) override {
+ auto LookupTTI = [this](Function &F) -> TargetTransformInfo & {
+ return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ };
+
+ auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
+ return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ };
+
+ PreISelIntrinsicLowering Lowering(LookupTTI, LookupTLI);
+ return Lowering.lowerIntrinsics(M);
+ }
};
} // end anonymous namespace
@@ -248,7 +389,18 @@ ModulePass *llvm::createPreISelIntrinsicLoweringPass() {
PreservedAnalyses PreISelIntrinsicLoweringPass::run(Module &M,
ModuleAnalysisManager &AM) {
- if (!lowerIntrinsics(M))
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+ return FAM.getResult<TargetLibraryAnalysis>(F);
+ };
+
+ auto LookupTTI = [&FAM](Function &F) -> TargetTransformInfo & {
+ return FAM.getResult<TargetIRAnalysis>(F);
+ };
+
+ PreISelIntrinsicLowering Lowering(LookupTTI, LookupTLI);
+ if (!Lowering.lowerIntrinsics(M))
return PreservedAnalyses::all();
else
return PreservedAnalyses::none();
diff --git a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
index 7e46dd35ce47..be81ecab9c89 100644
--- a/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/llvm/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -27,9 +27,9 @@ namespace {
/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def
/// for each use. Add isUndef marker to implicit_def defs and their uses.
class ProcessImplicitDefs : public MachineFunctionPass {
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
SmallSetVector<MachineInstr*, 16> WorkList;
@@ -72,8 +72,8 @@ bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) {
!MI->isRegSequence() &&
!MI->isPHI())
return false;
- for (const MachineOperand &MO : MI->operands())
- if (MO.isReg() && MO.isUse() && MO.readsReg())
+ for (const MachineOperand &MO : MI->all_uses())
+ if (MO.readsReg())
return false;
return true;
}
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index cc70ec477650..e323aaaeefaf 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -96,7 +96,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
private:
- RegScavenger *RS;
+ RegScavenger *RS = nullptr;
// MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved
// stack frame indexes.
@@ -111,11 +111,11 @@ private:
// Flag to control whether to use the register scavenger to resolve
// frame index materialization registers. Set according to
// TRI->requiresFrameIndexScavenging() for the current function.
- bool FrameIndexVirtualScavenging;
+ bool FrameIndexVirtualScavenging = false;
// Flag to control whether the scavenger should be passed even though
// FrameIndexVirtualScavenging is used.
- bool FrameIndexEliminationScavenging;
+ bool FrameIndexEliminationScavenging = false;
// Emit remarks.
MachineOptimizationRemarkEmitter *ORE = nullptr;
@@ -309,19 +309,20 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
SpillSize += MFI.getObjectSize(Idx);
}
- float SpillPct =
+ [[maybe_unused]] float SpillPct =
static_cast<float>(SpillSize) / static_cast<float>(StackSize);
- float VarPct = 1.0f - SpillPct;
- int64_t VariableSize = StackSize - SpillSize;
- dbgs() << formatv("{0}/{1} ({3:P}) spills, {2}/{1} ({4:P}) variables",
- SpillSize, StackSize, VariableSize, SpillPct, VarPct);
+ LLVM_DEBUG(
+ dbgs() << formatv("{0}/{1} ({3:P}) spills, {2}/{1} ({4:P}) variables",
+ SpillSize, StackSize, StackSize - SpillSize, SpillPct,
+ 1.0f - SpillPct));
if (UnsafeStackSize != 0) {
- float UnsafePct =
- static_cast<float>(UnsafeStackSize) / static_cast<float>(StackSize);
- dbgs() << formatv(", {0}/{2} ({1:P}) unsafe stack", UnsafeStackSize,
- UnsafePct, StackSize);
+ LLVM_DEBUG(dbgs() << formatv(", {0}/{2} ({1:P}) unsafe stack",
+ UnsafeStackSize,
+ static_cast<float>(UnsafeStackSize) /
+ static_cast<float>(StackSize),
+ StackSize));
}
- dbgs() << "\n";
+ LLVM_DEBUG(dbgs() << "\n");
}
ORE->emit([&]() {
@@ -375,8 +376,8 @@ void PEI::calculateCallFrameInfo(MachineFunction &MF) {
}
assert(!MFI.isMaxCallFrameSizeComputed() ||
- (MFI.getMaxCallFrameSize() == MaxCallFrameSize &&
- MFI.adjustsStack() == AdjustsStack));
+ (MFI.getMaxCallFrameSize() >= MaxCallFrameSize &&
+ !(AdjustsStack && !MFI.adjustsStack())));
MFI.setAdjustsStack(AdjustsStack);
MFI.setMaxCallFrameSize(MaxCallFrameSize);
@@ -692,7 +693,7 @@ void PEI::spillCalleeSavedRegs(MachineFunction &MF) {
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
bool StackGrowsDown, int64_t &Offset,
- Align &MaxAlign, unsigned Skew) {
+ Align &MaxAlign) {
// If the stack grows down, add the object size to find the lowest address.
if (StackGrowsDown)
Offset += MFI.getObjectSize(FrameIdx);
@@ -704,7 +705,7 @@ static inline void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
MaxAlign = std::max(MaxAlign, Alignment);
// Adjust to alignment boundary.
- Offset = alignTo(Offset, Alignment, Skew);
+ Offset = alignTo(Offset, Alignment);
if (StackGrowsDown) {
LLVM_DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset
@@ -828,11 +829,10 @@ static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx,
static void AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
SmallSet<int, 16> &ProtectedObjs,
MachineFrameInfo &MFI, bool StackGrowsDown,
- int64_t &Offset, Align &MaxAlign,
- unsigned Skew) {
+ int64_t &Offset, Align &MaxAlign) {
for (int i : UnassignedObjs) {
- AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew);
+ AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
ProtectedObjs.insert(i);
}
}
@@ -858,9 +858,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
&& "Local area offset should be in direction of stack growth");
int64_t Offset = LocalAreaOffset;
- // Skew to be applied to alignment.
- unsigned Skew = TFI.getStackAlignmentSkew(MF);
-
#ifdef EXPENSIVE_CHECKS
for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i)
if (!MFI.isDeadObjectIndex(i) &&
@@ -908,8 +905,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
if (!StackGrowsDown && MFI.isDeadObjectIndex(FrameIndex))
continue;
- AdjustStackOffset(MFI, FrameIndex, StackGrowsDown, Offset, MaxAlign,
- Skew);
+ AdjustStackOffset(MFI, FrameIndex, StackGrowsDown, Offset, MaxAlign);
}
}
@@ -930,7 +926,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
SmallVector<int, 2> SFIs;
RS->getScavengingFrameIndices(SFIs);
for (int SFI : SFIs)
- AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign, Skew);
+ AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
}
// FIXME: Once this is working, then enable flag will change to a target
@@ -941,7 +937,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
Align Alignment = MFI.getLocalFrameMaxAlign();
// Adjust to alignment boundary.
- Offset = alignTo(Offset, Alignment, Skew);
+ Offset = alignTo(Offset, Alignment);
LLVM_DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
@@ -987,8 +983,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
"Stack protector on non-default stack expected to not be "
"pre-allocated by LocalStackSlotPass.");
} else if (!MFI.getUseLocalStackAllocationBlock()) {
- AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, MaxAlign,
- Skew);
+ AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset,
+ MaxAlign);
} else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex())) {
llvm_unreachable(
"Stack protector not pre-allocated by LocalStackSlotPass.");
@@ -1036,11 +1032,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
"LocalStackSlotPass.");
AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
- Offset, MaxAlign, Skew);
+ Offset, MaxAlign);
AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
- Offset, MaxAlign, Skew);
+ Offset, MaxAlign);
AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown,
- Offset, MaxAlign, Skew);
+ Offset, MaxAlign);
}
SmallVector<int, 8> ObjectsToAllocate;
@@ -1071,7 +1067,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// Allocate the EH registration node first if one is present.
if (EHRegNodeFrameIndex != std::numeric_limits<int>::max())
AdjustStackOffset(MFI, EHRegNodeFrameIndex, StackGrowsDown, Offset,
- MaxAlign, Skew);
+ MaxAlign);
// Give the targets a chance to order the objects the way they like it.
if (MF.getTarget().getOptLevel() != CodeGenOpt::None &&
@@ -1093,7 +1089,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
for (auto &Object : ObjectsToAllocate)
if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign,
StackBytesFree))
- AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew);
+ AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign);
// Make sure the special register scavenging spill slot is closest to the
// stack pointer.
@@ -1101,7 +1097,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
SmallVector<int, 2> SFIs;
RS->getScavengingFrameIndices(SFIs);
for (int SFI : SFIs)
- AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign, Skew);
+ AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
}
if (!TFI.targetHandlesStackFrameRounding()) {
@@ -1127,7 +1123,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// SP not FP. Align to MaxAlign so this works.
StackAlign = std::max(StackAlign, MaxAlign);
int64_t OffsetBeforeAlignment = Offset;
- Offset = alignTo(Offset, StackAlign, Skew);
+ Offset = alignTo(Offset, StackAlign);
// If we have increased the offset to fulfill the alignment constrants,
// then the scavenging spill slots may become harder to reach from the
@@ -1291,8 +1287,8 @@ void PEI::insertZeroCallUsedRegs(MachineFunction &MF) {
MCRegister Reg = MO.getReg();
// This picks up sibling registers (e.q. %al -> %ah).
- for (MCRegUnitIterator Unit(Reg, &TRI); Unit.isValid(); ++Unit)
- RegsToZero.reset(*Unit);
+ for (MCRegUnit Unit : TRI.regunits(Reg))
+ RegsToZero.reset(Unit);
for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(Reg))
RegsToZero.reset(SReg);
@@ -1463,14 +1459,24 @@ void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB,
assert(MF.getSubtarget().getRegisterInfo() &&
"getRegisterInfo() must be implemented!");
+ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
- RS->enterBasicBlockEnd(*BB);
+ RegScavenger *LocalRS = FrameIndexEliminationScavenging ? RS : nullptr;
+ if (LocalRS)
+ LocalRS->enterBasicBlockEnd(*BB);
for (MachineInstr &MI : make_early_inc_range(reverse(*BB))) {
+ if (TII.isFrameInstr(MI)) {
+ TFI.eliminateCallFramePseudoInstr(MF, *BB, &MI);
+ continue;
+ }
+
+ // Step backwards to get the liveness state at (immedately after) MI.
+ if (LocalRS)
+ LocalRS->backward(MI);
- // Register scavenger backward step
- MachineBasicBlock::iterator Step(MI);
for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
if (!MI.getOperand(i).isFI())
continue;
@@ -1478,49 +1484,20 @@ void PEI::replaceFrameIndicesBackward(MachineBasicBlock *BB,
if (replaceFrameIndexDebugInstr(MF, MI, i, SPAdj))
continue;
- // If this instruction has a FrameIndex operand, we need to
- // use that target machine register info object to eliminate
- // it.
-
- // TRI.eliminateFrameIndex may lower the frame index to a sequence of
- // instructions. It also can remove/change instructions passed by the
- // iterator and invalidate the iterator. We have to take care of this. For
- // that we support two iterators: *Step* - points to the position up to
- // which the scavenger should scan by the next iteration to have liveness
- // information up to date. *Curr* - keeps track of the correct RS->MBBI -
- // the scan start point. It points to the currently processed instruction
- // right before the frame lowering.
+ // Eliminate this FrameIndex operand.
//
- // ITERATORS WORK AS FOLLOWS:
- // *Step* is shifted one step back right before the frame lowering and
- // one step forward right after it. No matter how many instructions were
- // inserted, *Step* will be right after the position which is going to be
- // processed in the next iteration, thus, in the correct position for the
- // scavenger to go up to.
- // *Curr* is shifted one step forward right before calling
- // TRI.eliminateFrameIndex and one step backward after. Thus, we make sure
- // it points right to the position that is the correct starting point for
- // the scavenger to scan.
- MachineBasicBlock::iterator Curr = ++RS->getCurrentPosition();
-
- // Shift back
- --Step;
-
+ // Save and restore the scavenger's position around the call to
+ // eliminateFrameIndex in case it erases MI and invalidates the iterator.
+ MachineBasicBlock::iterator Save;
+ if (LocalRS)
+ Save = std::next(LocalRS->getCurrentPosition());
bool Removed = TRI.eliminateFrameIndex(MI, SPAdj, i, RS);
- // Restore to unify logic with a shift back that happens in the end of
- // the outer loop.
- ++Step;
- RS->skipTo(--Curr);
+ if (LocalRS)
+ LocalRS->skipTo(std::prev(Save));
+
if (Removed)
break;
}
-
- // Shift it to make RS collect reg info up to the current instruction.
- if (Step != BB->begin())
- Step--;
-
- // Update register states.
- RS->backward(Step);
}
}
@@ -1532,7 +1509,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
- if (RS && TRI.supportsBackwardScavenger())
+ if (TRI.supportsBackwardScavenger())
return replaceFrameIndicesBackward(BB, MF, SPAdj);
if (RS && FrameIndexEliminationScavenging)
diff --git a/llvm/lib/CodeGen/PseudoProbeInserter.cpp b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
index 86ea3ec67178..913e0035b046 100644
--- a/llvm/lib/CodeGen/PseudoProbeInserter.cpp
+++ b/llvm/lib/CodeGen/PseudoProbeInserter.cpp
@@ -128,10 +128,7 @@ public:
private:
uint64_t getFuncGUID(Module *M, DILocation *DL) {
- auto *SP = DL->getScope()->getSubprogram();
- auto Name = SP->getLinkageName();
- if (Name.empty())
- Name = SP->getName();
+ auto Name = DL->getSubprogramLinkageName();
return Function::getGUID(Name);
}
diff --git a/llvm/lib/CodeGen/RDFGraph.cpp b/llvm/lib/CodeGen/RDFGraph.cpp
index dcb1a44c75e4..abf3b1e6fbb9 100644
--- a/llvm/lib/CodeGen/RDFGraph.cpp
+++ b/llvm/lib/CodeGen/RDFGraph.cpp
@@ -8,7 +8,6 @@
//
// Target-independent, SSA-based data flow graph for register data flow (RDF).
//
-#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -19,6 +18,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RDFGraph.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -38,64 +38,69 @@
#include <utility>
#include <vector>
-using namespace llvm;
-using namespace rdf;
-
// Printing functions. Have them here first, so that the rest of the code
// can use them.
-namespace llvm {
-namespace rdf {
-
-raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P) {
- if (!P.Mask.all())
- OS << ':' << PrintLaneMask(P.Mask);
- return OS;
-}
+namespace llvm::rdf {
-raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) {
- auto &TRI = P.G.getTRI();
- if (P.Obj.Reg > 0 && P.Obj.Reg < TRI.getNumRegs())
- OS << TRI.getName(P.Obj.Reg);
- else
- OS << '#' << P.Obj.Reg;
- OS << PrintLaneMaskOpt(P.Obj.Mask);
+raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterRef> &P) {
+ P.G.getPRI().print(OS, P.Obj);
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
- auto NA = P.G.addr<NodeBase*>(P.Obj);
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeId> &P) {
+ if (P.Obj == 0)
+ return OS << "null";
+ auto NA = P.G.addr<NodeBase *>(P.Obj);
uint16_t Attrs = NA.Addr->getAttrs();
uint16_t Kind = NodeAttrs::kind(Attrs);
uint16_t Flags = NodeAttrs::flags(Attrs);
switch (NodeAttrs::type(Attrs)) {
- case NodeAttrs::Code:
- switch (Kind) {
- case NodeAttrs::Func: OS << 'f'; break;
- case NodeAttrs::Block: OS << 'b'; break;
- case NodeAttrs::Stmt: OS << 's'; break;
- case NodeAttrs::Phi: OS << 'p'; break;
- default: OS << "c?"; break;
- }
+ case NodeAttrs::Code:
+ switch (Kind) {
+ case NodeAttrs::Func:
+ OS << 'f';
break;
- case NodeAttrs::Ref:
- if (Flags & NodeAttrs::Undef)
- OS << '/';
- if (Flags & NodeAttrs::Dead)
- OS << '\\';
- if (Flags & NodeAttrs::Preserving)
- OS << '+';
- if (Flags & NodeAttrs::Clobbering)
- OS << '~';
- switch (Kind) {
- case NodeAttrs::Use: OS << 'u'; break;
- case NodeAttrs::Def: OS << 'd'; break;
- case NodeAttrs::Block: OS << 'b'; break;
- default: OS << "r?"; break;
- }
+ case NodeAttrs::Block:
+ OS << 'b';
+ break;
+ case NodeAttrs::Stmt:
+ OS << 's';
+ break;
+ case NodeAttrs::Phi:
+ OS << 'p';
break;
default:
- OS << '?';
+ OS << "c?";
+ break;
+ }
+ break;
+ case NodeAttrs::Ref:
+ if (Flags & NodeAttrs::Undef)
+ OS << '/';
+ if (Flags & NodeAttrs::Dead)
+ OS << '\\';
+ if (Flags & NodeAttrs::Preserving)
+ OS << '+';
+ if (Flags & NodeAttrs::Clobbering)
+ OS << '~';
+ switch (Kind) {
+ case NodeAttrs::Use:
+ OS << 'u';
break;
+ case NodeAttrs::Def:
+ OS << 'd';
+ break;
+ case NodeAttrs::Block:
+ OS << 'b';
+ break;
+ default:
+ OS << "r?";
+ break;
+ }
+ break;
+ default:
+ OS << '?';
+ break;
}
OS << P.Obj;
if (Flags & NodeAttrs::Shadow)
@@ -103,15 +108,14 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
return OS;
}
-static void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
- const DataFlowGraph &G) {
- OS << Print(RA.Id, G) << '<'
- << Print(RA.Addr->getRegRef(G), G) << '>';
+static void printRefHeader(raw_ostream &OS, const Ref RA,
+ const DataFlowGraph &G) {
+ OS << Print(RA.Id, G) << '<' << Print(RA.Addr->getRegRef(G), G) << '>';
if (RA.Addr->getFlags() & NodeAttrs::Fixed)
OS << '!';
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Def> &P) {
printRefHeader(OS, P.Obj, P.G);
OS << '(';
if (NodeId N = P.Obj.Addr->getReachingDef())
@@ -128,7 +132,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) {
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Use> &P) {
printRefHeader(OS, P.Obj, P.G);
OS << '(';
if (NodeId N = P.Obj.Addr->getReachingDef())
@@ -139,8 +143,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) {
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<NodeAddr<PhiUseNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<PhiUse> &P) {
printRefHeader(OS, P.Obj, P.G);
OS << '(';
if (NodeId N = P.Obj.Addr->getReachingDef())
@@ -154,22 +157,22 @@ raw_ostream &operator<< (raw_ostream &OS,
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Ref> &P) {
switch (P.Obj.Addr->getKind()) {
- case NodeAttrs::Def:
- OS << PrintNode<DefNode*>(P.Obj, P.G);
- break;
- case NodeAttrs::Use:
- if (P.Obj.Addr->getFlags() & NodeAttrs::PhiRef)
- OS << PrintNode<PhiUseNode*>(P.Obj, P.G);
- else
- OS << PrintNode<UseNode*>(P.Obj, P.G);
- break;
+ case NodeAttrs::Def:
+ OS << PrintNode<DefNode *>(P.Obj, P.G);
+ break;
+ case NodeAttrs::Use:
+ if (P.Obj.Addr->getFlags() & NodeAttrs::PhiRef)
+ OS << PrintNode<PhiUseNode *>(P.Obj, P.G);
+ else
+ OS << PrintNode<UseNode *>(P.Obj, P.G);
+ break;
}
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeList> &P) {
unsigned N = P.Obj.size();
for (auto I : P.Obj) {
OS << Print(I.Id, P.G);
@@ -179,7 +182,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<NodeSet> &P) {
unsigned N = P.Obj.size();
for (auto I : P.Obj) {
OS << Print(I, P.G);
@@ -191,45 +194,43 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
namespace {
- template <typename T>
- struct PrintListV {
- PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
+template <typename T> struct PrintListV {
+ PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
- using Type = T;
- const NodeList &List;
- const DataFlowGraph &G;
- };
+ using Type = T;
+ const NodeList &List;
+ const DataFlowGraph &G;
+};
- template <typename T>
- raw_ostream &operator<< (raw_ostream &OS, const PrintListV<T> &P) {
- unsigned N = P.List.size();
- for (NodeAddr<T> A : P.List) {
- OS << PrintNode<T>(A, P.G);
- if (--N)
- OS << ", ";
- }
- return OS;
+template <typename T>
+raw_ostream &operator<<(raw_ostream &OS, const PrintListV<T> &P) {
+ unsigned N = P.List.size();
+ for (NodeAddr<T> A : P.List) {
+ OS << PrintNode<T>(A, P.G);
+ if (--N)
+ OS << ", ";
}
+ return OS;
+}
} // end anonymous namespace
-raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Phi> &P) {
OS << Print(P.Obj.Id, P.G) << ": phi ["
- << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
+ << PrintListV<RefNode *>(P.Obj.Addr->members(P.G), P.G) << ']';
return OS;
}
-raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<StmtNode *>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Stmt> &P) {
const MachineInstr &MI = *P.Obj.Addr->getCode();
unsigned Opc = MI.getOpcode();
OS << Print(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc);
// Print the target for calls and branches (for readability).
if (MI.isCall() || MI.isBranch()) {
MachineInstr::const_mop_iterator T =
- llvm::find_if(MI.operands(),
- [] (const MachineOperand &Op) -> bool {
- return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
- });
+ llvm::find_if(MI.operands(), [](const MachineOperand &Op) -> bool {
+ return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
+ });
if (T != MI.operands_end()) {
OS << ' ';
if (T->isMBB())
@@ -240,32 +241,30 @@ raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<StmtNode *>> &P) {
OS << T->getSymbolName();
}
}
- OS << " [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
+ OS << " [" << PrintListV<RefNode *>(P.Obj.Addr->members(P.G), P.G) << ']';
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<NodeAddr<InstrNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Instr> &P) {
switch (P.Obj.Addr->getKind()) {
- case NodeAttrs::Phi:
- OS << PrintNode<PhiNode*>(P.Obj, P.G);
- break;
- case NodeAttrs::Stmt:
- OS << PrintNode<StmtNode*>(P.Obj, P.G);
- break;
- default:
- OS << "instr? " << Print(P.Obj.Id, P.G);
- break;
+ case NodeAttrs::Phi:
+ OS << PrintNode<PhiNode *>(P.Obj, P.G);
+ break;
+ case NodeAttrs::Stmt:
+ OS << PrintNode<StmtNode *>(P.Obj, P.G);
+ break;
+ default:
+ OS << "instr? " << Print(P.Obj.Id, P.G);
+ break;
}
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<NodeAddr<BlockNode*>> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<Block> &P) {
MachineBasicBlock *BB = P.Obj.Addr->getCode();
unsigned NP = BB->pred_size();
std::vector<int> Ns;
- auto PrintBBs = [&OS] (std::vector<int> Ns) -> void {
+ auto PrintBBs = [&OS](std::vector<int> Ns) -> void {
unsigned N = Ns.size();
for (int I : Ns) {
OS << "%bb." << I;
@@ -289,20 +288,21 @@ raw_ostream &operator<< (raw_ostream &OS,
OS << '\n';
for (auto I : P.Obj.Addr->members(P.G))
- OS << PrintNode<InstrNode*>(I, P.G) << '\n';
+ OS << PrintNode<InstrNode *>(I, P.G) << '\n';
return OS;
}
-raw_ostream &operator<<(raw_ostream &OS, const Print<NodeAddr<FuncNode *>> &P) {
- OS << "DFG dump:[\n" << Print(P.Obj.Id, P.G) << ": Function: "
- << P.Obj.Addr->getCode()->getName() << '\n';
+raw_ostream &operator<<(raw_ostream &OS, const Print<Func> &P) {
+ OS << "DFG dump:[\n"
+ << Print(P.Obj.Id, P.G)
+ << ": Function: " << P.Obj.Addr->getCode()->getName() << '\n';
for (auto I : P.Obj.Addr->members(P.G))
- OS << PrintNode<BlockNode*>(I, P.G) << '\n';
+ OS << PrintNode<BlockNode *>(I, P.G) << '\n';
OS << "]\n";
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
+raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterSet> &P) {
OS << '{';
for (auto I : P.Obj)
OS << ' ' << Print(I, P.G);
@@ -310,16 +310,16 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterAggr> &P) {
- P.Obj.print(OS);
+raw_ostream &operator<<(raw_ostream &OS, const Print<RegisterAggr> &P) {
+ OS << P.Obj;
return OS;
}
-raw_ostream &operator<< (raw_ostream &OS,
- const Print<DataFlowGraph::DefStack> &P) {
- for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) {
- OS << Print(I->Id, P.G)
- << '<' << Print(I->Addr->getRegRef(P.G), P.G) << '>';
+raw_ostream &operator<<(raw_ostream &OS,
+ const Print<DataFlowGraph::DefStack> &P) {
+ for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E;) {
+ OS << Print(I->Id, P.G) << '<' << Print(I->Addr->getRegRef(P.G), P.G)
+ << '>';
I.down();
if (I != E)
OS << ' ';
@@ -327,9 +327,6 @@ raw_ostream &operator<< (raw_ostream &OS,
return OS;
}
-} // end namespace rdf
-} // end namespace llvm
-
// Node allocation functions.
//
// Node allocator is like a slab memory allocator: it allocates blocks of
@@ -340,13 +337,13 @@ raw_ostream &operator<< (raw_ostream &OS,
// and within that block is described in the header file.
//
void NodeAllocator::startNewBlock() {
- void *T = MemPool.Allocate(NodesPerBlock*NodeMemSize, NodeMemSize);
- char *P = static_cast<char*>(T);
+ void *T = MemPool.Allocate(NodesPerBlock * NodeMemSize, NodeMemSize);
+ char *P = static_cast<char *>(T);
Blocks.push_back(P);
// Check if the block index is still within the allowed range, i.e. less
// than 2^N, where N is the number of bits in NodeId for the block index.
// BitsPerIndex is the number of bits per node index.
- assert((Blocks.size() < ((size_t)1 << (8*sizeof(NodeId)-BitsPerIndex))) &&
+ assert((Blocks.size() < ((size_t)1 << (8 * sizeof(NodeId) - BitsPerIndex))) &&
"Out of bits for block index");
ActiveEnd = P;
}
@@ -356,18 +353,17 @@ bool NodeAllocator::needNewBlock() {
return true;
char *ActiveBegin = Blocks.back();
- uint32_t Index = (ActiveEnd-ActiveBegin)/NodeMemSize;
+ uint32_t Index = (ActiveEnd - ActiveBegin) / NodeMemSize;
return Index >= NodesPerBlock;
}
-NodeAddr<NodeBase*> NodeAllocator::New() {
+Node NodeAllocator::New() {
if (needNewBlock())
startNewBlock();
- uint32_t ActiveB = Blocks.size()-1;
- uint32_t Index = (ActiveEnd - Blocks[ActiveB])/NodeMemSize;
- NodeAddr<NodeBase*> NA = { reinterpret_cast<NodeBase*>(ActiveEnd),
- makeId(ActiveB, Index) };
+ uint32_t ActiveB = Blocks.size() - 1;
+ uint32_t Index = (ActiveEnd - Blocks[ActiveB]) / NodeMemSize;
+ Node NA = {reinterpret_cast<NodeBase *>(ActiveEnd), makeId(ActiveB, Index)};
ActiveEnd += NodeMemSize;
return NA;
}
@@ -376,9 +372,9 @@ NodeId NodeAllocator::id(const NodeBase *P) const {
uintptr_t A = reinterpret_cast<uintptr_t>(P);
for (unsigned i = 0, n = Blocks.size(); i != n; ++i) {
uintptr_t B = reinterpret_cast<uintptr_t>(Blocks[i]);
- if (A < B || A >= B + NodesPerBlock*NodeMemSize)
+ if (A < B || A >= B + NodesPerBlock * NodeMemSize)
continue;
- uint32_t Idx = (A-B)/NodeMemSize;
+ uint32_t Idx = (A - B) / NodeMemSize;
return makeId(i, Idx);
}
llvm_unreachable("Invalid node address");
@@ -391,7 +387,7 @@ void NodeAllocator::clear() {
}
// Insert node NA after "this" in the circular chain.
-void NodeBase::append(NodeAddr<NodeBase*> NA) {
+void NodeBase::append(Node NA) {
NodeId Nx = Next;
// If NA is already "next", do nothing.
if (Next != NA.Id) {
@@ -406,9 +402,9 @@ void NodeBase::append(NodeAddr<NodeBase*> NA) {
RegisterRef RefNode::getRegRef(const DataFlowGraph &G) const {
assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
if (NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef)
- return G.unpack(Ref.PR);
- assert(Ref.Op != nullptr);
- return G.makeRegRef(*Ref.Op);
+ return G.unpack(RefData.PR);
+ assert(RefData.Op != nullptr);
+ return G.makeRegRef(*RefData.Op);
}
// Set the register reference in the reference node directly (for references
@@ -416,7 +412,7 @@ RegisterRef RefNode::getRegRef(const DataFlowGraph &G) const {
void RefNode::setRegRef(RegisterRef RR, DataFlowGraph &G) {
assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
assert(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef);
- Ref.PR = G.pack(RR);
+ RefData.PR = G.pack(RR);
}
// Set the register reference in the reference node based on a machine
@@ -425,83 +421,82 @@ void RefNode::setRegRef(MachineOperand *Op, DataFlowGraph &G) {
assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
assert(!(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef));
(void)G;
- Ref.Op = Op;
+ RefData.Op = Op;
}
// Get the owner of a given reference node.
-NodeAddr<NodeBase*> RefNode::getOwner(const DataFlowGraph &G) {
- NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext());
+Node RefNode::getOwner(const DataFlowGraph &G) {
+ Node NA = G.addr<NodeBase *>(getNext());
while (NA.Addr != this) {
if (NA.Addr->getType() == NodeAttrs::Code)
return NA;
- NA = G.addr<NodeBase*>(NA.Addr->getNext());
+ NA = G.addr<NodeBase *>(NA.Addr->getNext());
}
llvm_unreachable("No owner in circular list");
}
// Connect the def node to the reaching def node.
-void DefNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) {
- Ref.RD = DA.Id;
- Ref.Sib = DA.Addr->getReachedDef();
+void DefNode::linkToDef(NodeId Self, Def DA) {
+ RefData.RD = DA.Id;
+ RefData.Sib = DA.Addr->getReachedDef();
DA.Addr->setReachedDef(Self);
}
// Connect the use node to the reaching def node.
-void UseNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) {
- Ref.RD = DA.Id;
- Ref.Sib = DA.Addr->getReachedUse();
+void UseNode::linkToDef(NodeId Self, Def DA) {
+ RefData.RD = DA.Id;
+ RefData.Sib = DA.Addr->getReachedUse();
DA.Addr->setReachedUse(Self);
}
// Get the first member of the code node.
-NodeAddr<NodeBase*> CodeNode::getFirstMember(const DataFlowGraph &G) const {
- if (Code.FirstM == 0)
- return NodeAddr<NodeBase*>();
- return G.addr<NodeBase*>(Code.FirstM);
+Node CodeNode::getFirstMember(const DataFlowGraph &G) const {
+ if (CodeData.FirstM == 0)
+ return Node();
+ return G.addr<NodeBase *>(CodeData.FirstM);
}
// Get the last member of the code node.
-NodeAddr<NodeBase*> CodeNode::getLastMember(const DataFlowGraph &G) const {
- if (Code.LastM == 0)
- return NodeAddr<NodeBase*>();
- return G.addr<NodeBase*>(Code.LastM);
+Node CodeNode::getLastMember(const DataFlowGraph &G) const {
+ if (CodeData.LastM == 0)
+ return Node();
+ return G.addr<NodeBase *>(CodeData.LastM);
}
// Add node NA at the end of the member list of the given code node.
-void CodeNode::addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
- NodeAddr<NodeBase*> ML = getLastMember(G);
+void CodeNode::addMember(Node NA, const DataFlowGraph &G) {
+ Node ML = getLastMember(G);
if (ML.Id != 0) {
ML.Addr->append(NA);
} else {
- Code.FirstM = NA.Id;
+ CodeData.FirstM = NA.Id;
NodeId Self = G.id(this);
NA.Addr->setNext(Self);
}
- Code.LastM = NA.Id;
+ CodeData.LastM = NA.Id;
}
// Add node NA after member node MA in the given code node.
-void CodeNode::addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
- const DataFlowGraph &G) {
+void CodeNode::addMemberAfter(Node MA, Node NA, const DataFlowGraph &G) {
MA.Addr->append(NA);
- if (Code.LastM == MA.Id)
- Code.LastM = NA.Id;
+ if (CodeData.LastM == MA.Id)
+ CodeData.LastM = NA.Id;
}
// Remove member node NA from the given code node.
-void CodeNode::removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
- NodeAddr<NodeBase*> MA = getFirstMember(G);
+void CodeNode::removeMember(Node NA, const DataFlowGraph &G) {
+ Node MA = getFirstMember(G);
assert(MA.Id != 0);
// Special handling if the member to remove is the first member.
if (MA.Id == NA.Id) {
- if (Code.LastM == MA.Id) {
+ if (CodeData.LastM == MA.Id) {
// If it is the only member, set both first and last to 0.
- Code.FirstM = Code.LastM = 0;
+ CodeData.FirstM = CodeData.LastM = 0;
} else {
// Otherwise, advance the first member.
- Code.FirstM = MA.Addr->getNext();
+ CodeData.FirstM = MA.Addr->getNext();
}
return;
}
@@ -512,37 +507,37 @@ void CodeNode::removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
MA.Addr->setNext(NA.Addr->getNext());
// If the member to remove happens to be the last one, update the
// LastM indicator.
- if (Code.LastM == NA.Id)
- Code.LastM = MA.Id;
+ if (CodeData.LastM == NA.Id)
+ CodeData.LastM = MA.Id;
return;
}
- MA = G.addr<NodeBase*>(MX);
+ MA = G.addr<NodeBase *>(MX);
}
llvm_unreachable("No such member");
}
// Return the list of all members of the code node.
NodeList CodeNode::members(const DataFlowGraph &G) const {
- static auto True = [] (NodeAddr<NodeBase*>) -> bool { return true; };
+ static auto True = [](Node) -> bool { return true; };
return members_if(True, G);
}
// Return the owner of the given instr node.
-NodeAddr<NodeBase*> InstrNode::getOwner(const DataFlowGraph &G) {
- NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext());
+Node InstrNode::getOwner(const DataFlowGraph &G) {
+ Node NA = G.addr<NodeBase *>(getNext());
while (NA.Addr != this) {
assert(NA.Addr->getType() == NodeAttrs::Code);
if (NA.Addr->getKind() == NodeAttrs::Block)
return NA;
- NA = G.addr<NodeBase*>(NA.Addr->getNext());
+ NA = G.addr<NodeBase *>(NA.Addr->getNext());
}
llvm_unreachable("No owner in circular list");
}
// Add the phi node PA to the given block node.
-void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) {
- NodeAddr<NodeBase*> M = getFirstMember(G);
+void BlockNode::addPhi(Phi PA, const DataFlowGraph &G) {
+ Node M = getFirstMember(G);
if (M.Id == 0) {
addMember(PA, G);
return;
@@ -552,15 +547,15 @@ void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) {
if (M.Addr->getKind() == NodeAttrs::Stmt) {
// If the first member of the block is a statement, insert the phi as
// the first member.
- Code.FirstM = PA.Id;
+ CodeData.FirstM = PA.Id;
PA.Addr->setNext(M.Id);
} else {
// If the first member is a phi, find the last phi, and append PA to it.
assert(M.Addr->getKind() == NodeAttrs::Phi);
- NodeAddr<NodeBase*> MN = M;
+ Node MN = M;
do {
M = MN;
- MN = G.addr<NodeBase*>(M.Addr->getNext());
+ MN = G.addr<NodeBase *>(M.Addr->getNext());
assert(MN.Addr->getType() == NodeAttrs::Code);
} while (MN.Addr->getKind() == NodeAttrs::Phi);
@@ -571,19 +566,17 @@ void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) {
// Find the block node corresponding to the machine basic block BB in the
// given func node.
-NodeAddr<BlockNode*> FuncNode::findBlock(const MachineBasicBlock *BB,
- const DataFlowGraph &G) const {
- auto EqBB = [BB] (NodeAddr<NodeBase*> NA) -> bool {
- return NodeAddr<BlockNode*>(NA).Addr->getCode() == BB;
- };
+Block FuncNode::findBlock(const MachineBasicBlock *BB,
+ const DataFlowGraph &G) const {
+ auto EqBB = [BB](Node NA) -> bool { return Block(NA).Addr->getCode() == BB; };
NodeList Ms = members_if(EqBB, G);
if (!Ms.empty())
return Ms[0];
- return NodeAddr<BlockNode*>();
+ return Block();
}
// Get the block node for the entry block in the given function.
-NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
+Block FuncNode::getEntryBlock(const DataFlowGraph &G) {
MachineBasicBlock *EntryB = &getCode()->front();
return findBlock(EntryB, G);
}
@@ -593,14 +586,14 @@ NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
// For a given instruction, check if there are any bits of RR that can remain
// unchanged across this def.
-bool TargetOperandInfo::isPreserving(const MachineInstr &In, unsigned OpNum)
- const {
+bool TargetOperandInfo::isPreserving(const MachineInstr &In,
+ unsigned OpNum) const {
return TII.isPredicated(In);
}
// Check if the definition of RR produces an unspecified value.
-bool TargetOperandInfo::isClobbering(const MachineInstr &In, unsigned OpNum)
- const {
+bool TargetOperandInfo::isClobbering(const MachineInstr &In,
+ unsigned OpNum) const {
const MachineOperand &Op = In.getOperand(OpNum);
if (Op.isRegMask())
return true;
@@ -612,8 +605,8 @@ bool TargetOperandInfo::isClobbering(const MachineInstr &In, unsigned OpNum)
}
// Check if the given instruction specifically requires
-bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
- const {
+bool TargetOperandInfo::isFixedReg(const MachineInstr &In,
+ unsigned OpNum) const {
if (In.isCall() || In.isReturn() || In.isInlineAsm())
return true;
// Check for a tail call.
@@ -642,19 +635,20 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
//
DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
- const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
- const MachineDominanceFrontier &mdf)
+ const TargetRegisterInfo &tri,
+ const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf)
: DefaultTOI(std::make_unique<TargetOperandInfo>(tii)), MF(mf), TII(tii),
TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(*DefaultTOI),
- LiveIns(PRI) {
-}
+ LiveIns(PRI) {}
DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
- const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
- const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi)
+ const TargetRegisterInfo &tri,
+ const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf,
+ const TargetOperandInfo &toi)
: MF(mf), TII(tii), TRI(tri), PRI(tri, mf), MDT(mdt), MDF(mdf), TOI(toi),
- LiveIns(PRI) {
-}
+ LiveIns(PRI) {}
// The implementation of the definition stack.
// Each register reference has its own definition stack. In particular,
@@ -663,7 +657,8 @@ DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
// Construct a stack iterator.
DataFlowGraph::DefStack::Iterator::Iterator(const DataFlowGraph::DefStack &S,
- bool Top) : DS(S) {
+ bool Top)
+ : DS(S) {
if (!Top) {
// Initialize to bottom.
Pos = 0;
@@ -671,7 +666,7 @@ DataFlowGraph::DefStack::Iterator::Iterator(const DataFlowGraph::DefStack &S,
}
// Initialize to the top, i.e. top-most non-delimiter (or 0, if empty).
Pos = DS.Stack.size();
- while (Pos > 0 && DS.isDelimiter(DS.Stack[Pos-1]))
+ while (Pos > 0 && DS.isDelimiter(DS.Stack[Pos - 1]))
Pos--;
}
@@ -695,7 +690,7 @@ void DataFlowGraph::DefStack::pop() {
// Push a delimiter for block node N on the stack.
void DataFlowGraph::DefStack::start_block(NodeId N) {
assert(N != 0);
- Stack.push_back(NodeAddr<DefNode*>(nullptr, N));
+ Stack.push_back(Def(nullptr, N));
}
// Remove all nodes from the top of the stack, until the delimited for
@@ -705,7 +700,7 @@ void DataFlowGraph::DefStack::clear_block(NodeId N) {
assert(N != 0);
unsigned P = Stack.size();
while (P > 0) {
- bool Found = isDelimiter(Stack[P-1], N);
+ bool Found = isDelimiter(Stack[P - 1], N);
P--;
if (Found)
break;
@@ -723,7 +718,7 @@ unsigned DataFlowGraph::DefStack::nextUp(unsigned P) const {
assert(P < SS);
do {
P++;
- IsDelim = isDelimiter(Stack[P-1]);
+ IsDelim = isDelimiter(Stack[P - 1]);
} while (P < SS && IsDelim);
assert(!IsDelim);
return P;
@@ -734,11 +729,11 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
// Get the preceding valid position before P (skipping all delimiters).
// The input position P does not have to point to a non-delimiter.
assert(P > 0 && P <= Stack.size());
- bool IsDelim = isDelimiter(Stack[P-1]);
+ bool IsDelim = isDelimiter(Stack[P - 1]);
do {
if (--P == 0)
break;
- IsDelim = isDelimiter(Stack[P-1]);
+ IsDelim = isDelimiter(Stack[P - 1]);
} while (P > 0 && IsDelim);
assert(!IsDelim);
return P;
@@ -746,11 +741,10 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
// Register information.
-RegisterSet DataFlowGraph::getLandingPadLiveIns() const {
- RegisterSet LR;
+RegisterAggr DataFlowGraph::getLandingPadLiveIns() const {
+ RegisterAggr LR(getPRI());
const Function &F = MF.getFunction();
- const Constant *PF = F.hasPersonalityFn() ? F.getPersonalityFn()
- : nullptr;
+ const Constant *PF = F.hasPersonalityFn() ? F.getPersonalityFn() : nullptr;
const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
if (RegisterId R = TLI.getExceptionPointerRegister(PF))
LR.insert(RegisterRef(R));
@@ -778,8 +772,8 @@ NodeId DataFlowGraph::id(const NodeBase *P) const {
}
// Allocate a new node and set the attributes to Attrs.
-NodeAddr<NodeBase*> DataFlowGraph::newNode(uint16_t Attrs) {
- NodeAddr<NodeBase*> P = Memory.New();
+Node DataFlowGraph::newNode(uint16_t Attrs) {
+ Node P = Memory.New();
P.Addr->init();
P.Addr->setAttrs(Attrs);
return P;
@@ -787,16 +781,16 @@ NodeAddr<NodeBase*> DataFlowGraph::newNode(uint16_t Attrs) {
// Make a copy of the given node B, except for the data-flow links, which
// are set to 0.
-NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
- NodeAddr<NodeBase*> NA = newNode(0);
+Node DataFlowGraph::cloneNode(const Node B) {
+ Node NA = newNode(0);
memcpy(NA.Addr, B.Addr, sizeof(NodeBase));
// Ref nodes need to have the data-flow links reset.
if (NA.Addr->getType() == NodeAttrs::Ref) {
- NodeAddr<RefNode*> RA = NA;
+ Ref RA = NA;
RA.Addr->setReachingDef(0);
RA.Addr->setSibling(0);
if (NA.Addr->getKind() == NodeAttrs::Def) {
- NodeAddr<DefNode*> DA = NA;
+ Def DA = NA;
DA.Addr->setReachedDef(0);
DA.Addr->setReachedUse(0);
}
@@ -806,75 +800,105 @@ NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
// Allocation routines for specific node types/kinds.
-NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner,
- MachineOperand &Op, uint16_t Flags) {
- NodeAddr<UseNode*> UA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
+Use DataFlowGraph::newUse(Instr Owner, MachineOperand &Op, uint16_t Flags) {
+ Use UA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
UA.Addr->setRegRef(&Op, *this);
return UA;
}
-NodeAddr<PhiUseNode*> DataFlowGraph::newPhiUse(NodeAddr<PhiNode*> Owner,
- RegisterRef RR, NodeAddr<BlockNode*> PredB, uint16_t Flags) {
- NodeAddr<PhiUseNode*> PUA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
+PhiUse DataFlowGraph::newPhiUse(Phi Owner, RegisterRef RR, Block PredB,
+ uint16_t Flags) {
+ PhiUse PUA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
assert(Flags & NodeAttrs::PhiRef);
PUA.Addr->setRegRef(RR, *this);
PUA.Addr->setPredecessor(PredB.Id);
return PUA;
}
-NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
- MachineOperand &Op, uint16_t Flags) {
- NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
+Def DataFlowGraph::newDef(Instr Owner, MachineOperand &Op, uint16_t Flags) {
+ Def DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
DA.Addr->setRegRef(&Op, *this);
return DA;
}
-NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
- RegisterRef RR, uint16_t Flags) {
- NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
+Def DataFlowGraph::newDef(Instr Owner, RegisterRef RR, uint16_t Flags) {
+ Def DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
assert(Flags & NodeAttrs::PhiRef);
DA.Addr->setRegRef(RR, *this);
return DA;
}
-NodeAddr<PhiNode*> DataFlowGraph::newPhi(NodeAddr<BlockNode*> Owner) {
- NodeAddr<PhiNode*> PA = newNode(NodeAttrs::Code | NodeAttrs::Phi);
+Phi DataFlowGraph::newPhi(Block Owner) {
+ Phi PA = newNode(NodeAttrs::Code | NodeAttrs::Phi);
Owner.Addr->addPhi(PA, *this);
return PA;
}
-NodeAddr<StmtNode*> DataFlowGraph::newStmt(NodeAddr<BlockNode*> Owner,
- MachineInstr *MI) {
- NodeAddr<StmtNode*> SA = newNode(NodeAttrs::Code | NodeAttrs::Stmt);
+Stmt DataFlowGraph::newStmt(Block Owner, MachineInstr *MI) {
+ Stmt SA = newNode(NodeAttrs::Code | NodeAttrs::Stmt);
SA.Addr->setCode(MI);
Owner.Addr->addMember(SA, *this);
return SA;
}
-NodeAddr<BlockNode*> DataFlowGraph::newBlock(NodeAddr<FuncNode*> Owner,
- MachineBasicBlock *BB) {
- NodeAddr<BlockNode*> BA = newNode(NodeAttrs::Code | NodeAttrs::Block);
+Block DataFlowGraph::newBlock(Func Owner, MachineBasicBlock *BB) {
+ Block BA = newNode(NodeAttrs::Code | NodeAttrs::Block);
BA.Addr->setCode(BB);
Owner.Addr->addMember(BA, *this);
return BA;
}
-NodeAddr<FuncNode*> DataFlowGraph::newFunc(MachineFunction *MF) {
- NodeAddr<FuncNode*> FA = newNode(NodeAttrs::Code | NodeAttrs::Func);
+Func DataFlowGraph::newFunc(MachineFunction *MF) {
+ Func FA = newNode(NodeAttrs::Code | NodeAttrs::Func);
FA.Addr->setCode(MF);
return FA;
}
// Build the data flow graph.
-void DataFlowGraph::build(unsigned Options) {
+void DataFlowGraph::build(const Config &config) {
reset();
- Func = newFunc(&MF);
+ BuildCfg = config;
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ ReservedRegs = MRI.getReservedRegs();
+ bool SkipReserved = BuildCfg.Options & BuildOptions::OmitReserved;
+
+ auto Insert = [](auto &Set, auto &&Range) {
+ Set.insert(Range.begin(), Range.end());
+ };
+
+ if (BuildCfg.TrackRegs.empty()) {
+ std::set<RegisterId> BaseSet;
+ if (BuildCfg.Classes.empty()) {
+ // Insert every register.
+ for (unsigned R = 0, E = getPRI().getTRI().getNumRegs(); R != E; ++R)
+ BaseSet.insert(R);
+ } else {
+ for (const TargetRegisterClass *RC : BuildCfg.Classes) {
+ for (MCPhysReg R : *RC)
+ BaseSet.insert(R);
+ }
+ }
+ for (RegisterId R : BaseSet) {
+ if (SkipReserved && ReservedRegs[R])
+ continue;
+ Insert(TrackedUnits, getPRI().getUnits(RegisterRef(R)));
+ }
+ } else {
+ // Track set in Config overrides everything.
+ for (unsigned R : BuildCfg.TrackRegs) {
+ if (SkipReserved && ReservedRegs[R])
+ continue;
+ Insert(TrackedUnits, getPRI().getUnits(RegisterRef(R)));
+ }
+ }
+
+ TheFunc = newFunc(&MF);
if (MF.empty())
return;
for (MachineBasicBlock &B : MF) {
- NodeAddr<BlockNode*> BA = newBlock(Func, &B);
+ Block BA = newBlock(TheFunc, &B);
BlockNodes.insert(std::make_pair(&B, BA));
for (MachineInstr &I : B) {
if (I.isDebugInstr())
@@ -883,21 +907,13 @@ void DataFlowGraph::build(unsigned Options) {
}
}
- NodeAddr<BlockNode*> EA = Func.Addr->getEntryBlock(*this);
- NodeList Blocks = Func.Addr->members(*this);
-
- // Collect information about block references.
- RegisterSet AllRefs;
- for (NodeAddr<BlockNode*> BA : Blocks)
- for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this))
- for (NodeAddr<RefNode*> RA : IA.Addr->members(*this))
- AllRefs.insert(RA.Addr->getRegRef(*this));
+ Block EA = TheFunc.Addr->getEntryBlock(*this);
+ NodeList Blocks = TheFunc.Addr->members(*this);
// Collect function live-ins and entry block live-ins.
- MachineRegisterInfo &MRI = MF.getRegInfo();
MachineBasicBlock &EntryB = *EA.Addr->getCode();
assert(EntryB.pred_empty() && "Function entry block has predecessors");
- for (std::pair<unsigned,unsigned> P : MRI.liveins())
+ for (std::pair<unsigned, unsigned> P : MRI.liveins())
LiveIns.insert(RegisterRef(P.first));
if (MRI.tracksLiveness()) {
for (auto I : EntryB.liveins())
@@ -905,12 +921,12 @@ void DataFlowGraph::build(unsigned Options) {
}
// Add function-entry phi nodes for the live-in registers.
- //for (std::pair<RegisterId,LaneBitmask> P : LiveIns) {
- for (auto I = LiveIns.rr_begin(), E = LiveIns.rr_end(); I != E; ++I) {
- RegisterRef RR = *I;
- NodeAddr<PhiNode*> PA = newPhi(EA);
+ for (RegisterRef RR : LiveIns.refs()) {
+ if (RR.isReg() && !isTracked(RR)) // isReg is likely guaranteed
+ continue;
+ Phi PA = newPhi(EA);
uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
- NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
+ Def DA = newDef(PA, RR, PhiFlags);
PA.Addr->addMember(DA, *this);
}
@@ -919,9 +935,9 @@ void DataFlowGraph::build(unsigned Options) {
// branches in the program, or fall-throughs from other blocks. They
// are entered from the exception handling runtime and target's ABI
// may define certain registers as defined on entry to such a block.
- RegisterSet EHRegs = getLandingPadLiveIns();
+ RegisterAggr EHRegs = getLandingPadLiveIns();
if (!EHRegs.empty()) {
- for (NodeAddr<BlockNode*> BA : Blocks) {
+ for (Block BA : Blocks) {
const MachineBasicBlock &B = *BA.Addr->getCode();
if (!B.isEHPad())
continue;
@@ -932,15 +948,17 @@ void DataFlowGraph::build(unsigned Options) {
Preds.push_back(findBlock(PB));
// Build phi nodes for each live-in.
- for (RegisterRef RR : EHRegs) {
- NodeAddr<PhiNode*> PA = newPhi(BA);
+ for (RegisterRef RR : EHRegs.refs()) {
+ if (RR.isReg() && !isTracked(RR))
+ continue;
+ Phi PA = newPhi(BA);
uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
// Add def:
- NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
+ Def DA = newDef(PA, RR, PhiFlags);
PA.Addr->addMember(DA, *this);
// Add uses (no reaching defs for phi uses):
- for (NodeAddr<BlockNode*> PBA : Preds) {
- NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA);
+ for (Block PBA : Preds) {
+ PhiUse PUA = newPhiUse(PA, RR, PBA);
PA.Addr->addMember(PUA, *this);
}
}
@@ -949,24 +967,23 @@ void DataFlowGraph::build(unsigned Options) {
// Build a map "PhiM" which will contain, for each block, the set
// of references that will require phi definitions in that block.
- BlockRefsMap PhiM;
- for (NodeAddr<BlockNode*> BA : Blocks)
+ BlockRefsMap PhiM(getPRI());
+ for (Block BA : Blocks)
recordDefsForDF(PhiM, BA);
- for (NodeAddr<BlockNode*> BA : Blocks)
- buildPhis(PhiM, AllRefs, BA);
+ for (Block BA : Blocks)
+ buildPhis(PhiM, BA);
// Link all the refs. This will recursively traverse the dominator tree.
DefStackMap DM;
linkBlockRefs(DM, EA);
// Finally, remove all unused phi nodes.
- if (!(Options & BuildOptions::KeepDeadPhis))
+ if (!(BuildCfg.Options & BuildOptions::KeepDeadPhis))
removeUnusedPhis();
}
RegisterRef DataFlowGraph::makeRegRef(unsigned Reg, unsigned Sub) const {
- assert(PhysicalRegisterInfo::isRegMaskId(Reg) ||
- Register::isPhysicalRegister(Reg));
+ assert(RegisterRef::isRegId(Reg) || RegisterRef::isMaskId(Reg));
assert(Reg != 0);
if (Sub != 0)
Reg = TRI.getSubReg(Reg, Sub);
@@ -977,7 +994,8 @@ RegisterRef DataFlowGraph::makeRegRef(const MachineOperand &Op) const {
assert(Op.isReg() || Op.isRegMask());
if (Op.isReg())
return makeRegRef(Op.getReg(), Op.getSubReg());
- return RegisterRef(PRI.getRegMaskId(Op.getRegMask()), LaneBitmask::getAll());
+ return RegisterRef(getPRI().getRegMaskId(Op.getRegMask()),
+ LaneBitmask::getAll());
}
// For each stack in the map DefM, push the delimiter for block B on it.
@@ -1006,14 +1024,14 @@ void DataFlowGraph::releaseBlock(NodeId B, DefStackMap &DefM) {
// Push all definitions from the instruction node IA to an appropriate
// stack in DefM.
-void DataFlowGraph::pushAllDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
+void DataFlowGraph::pushAllDefs(Instr IA, DefStackMap &DefM) {
pushClobbers(IA, DefM);
pushDefs(IA, DefM);
}
// Push all definitions from the instruction node IA to an appropriate
// stack in DefM.
-void DataFlowGraph::pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
+void DataFlowGraph::pushClobbers(Instr IA, DefStackMap &DefM) {
NodeSet Visited;
std::set<RegisterId> Defined;
@@ -1029,35 +1047,37 @@ void DataFlowGraph::pushClobbers(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
// unspecified order), but the order does not matter from the data-
// -flow perspective.
- for (NodeAddr<DefNode*> DA : IA.Addr->members_if(IsDef, *this)) {
+ for (Def DA : IA.Addr->members_if(IsDef, *this)) {
if (Visited.count(DA.Id))
continue;
if (!(DA.Addr->getFlags() & NodeAttrs::Clobbering))
continue;
NodeList Rel = getRelatedRefs(IA, DA);
- NodeAddr<DefNode*> PDA = Rel.front();
+ Def PDA = Rel.front();
RegisterRef RR = PDA.Addr->getRegRef(*this);
// Push the definition on the stack for the register and all aliases.
// The def stack traversal in linkNodeUp will check the exact aliasing.
DefM[RR.Reg].push(DA);
Defined.insert(RR.Reg);
- for (RegisterId A : PRI.getAliasSet(RR.Reg)) {
+ for (RegisterId A : getPRI().getAliasSet(RR.Reg)) {
+ if (RegisterRef::isRegId(A) && !isTracked(RegisterRef(A)))
+ continue;
// Check that we don't push the same def twice.
assert(A != RR.Reg);
if (!Defined.count(A))
DefM[A].push(DA);
}
// Mark all the related defs as visited.
- for (NodeAddr<NodeBase*> T : Rel)
+ for (Node T : Rel)
Visited.insert(T.Id);
}
}
// Push all definitions from the instruction node IA to an appropriate
// stack in DefM.
-void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
+void DataFlowGraph::pushDefs(Instr IA, DefStackMap &DefM) {
NodeSet Visited;
#ifndef NDEBUG
std::set<RegisterId> Defined;
@@ -1075,44 +1095,45 @@ void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
// unspecified order), but the order does not matter from the data-
// -flow perspective.
- for (NodeAddr<DefNode*> DA : IA.Addr->members_if(IsDef, *this)) {
+ for (Def DA : IA.Addr->members_if(IsDef, *this)) {
if (Visited.count(DA.Id))
continue;
if (DA.Addr->getFlags() & NodeAttrs::Clobbering)
continue;
NodeList Rel = getRelatedRefs(IA, DA);
- NodeAddr<DefNode*> PDA = Rel.front();
+ Def PDA = Rel.front();
RegisterRef RR = PDA.Addr->getRegRef(*this);
#ifndef NDEBUG
// Assert if the register is defined in two or more unrelated defs.
// This could happen if there are two or more def operands defining it.
if (!Defined.insert(RR.Reg).second) {
- MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
- dbgs() << "Multiple definitions of register: "
- << Print(RR, *this) << " in\n " << *MI << "in "
- << printMBBReference(*MI->getParent()) << '\n';
+ MachineInstr *MI = Stmt(IA).Addr->getCode();
+ dbgs() << "Multiple definitions of register: " << Print(RR, *this)
+ << " in\n " << *MI << "in " << printMBBReference(*MI->getParent())
+ << '\n';
llvm_unreachable(nullptr);
}
#endif
// Push the definition on the stack for the register and all aliases.
// The def stack traversal in linkNodeUp will check the exact aliasing.
DefM[RR.Reg].push(DA);
- for (RegisterId A : PRI.getAliasSet(RR.Reg)) {
+ for (RegisterId A : getPRI().getAliasSet(RR.Reg)) {
+ if (RegisterRef::isRegId(A) && !isTracked(RegisterRef(A)))
+ continue;
// Check that we don't push the same def twice.
assert(A != RR.Reg);
DefM[A].push(DA);
}
// Mark all the related defs as visited.
- for (NodeAddr<NodeBase*> T : Rel)
+ for (Node T : Rel)
Visited.insert(T.Id);
}
}
// Return the list of all reference nodes related to RA, including RA itself.
// See "getNextRelated" for the meaning of a "related reference".
-NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const {
+NodeList DataFlowGraph::getRelatedRefs(Instr IA, Ref RA) const {
assert(IA.Id != 0 && RA.Id != 0);
NodeList Refs;
@@ -1128,7 +1149,9 @@ NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA,
void DataFlowGraph::reset() {
Memory.clear();
BlockNodes.clear();
- Func = NodeAddr<FuncNode*>();
+ TrackedUnits.clear();
+ ReservedRegs.clear();
+ TheFunc = Func();
}
// Return the next reference node in the instruction node IA that is related
@@ -1137,36 +1160,38 @@ void DataFlowGraph::reset() {
// characteristics. Specific examples of related nodes are shadow reference
// nodes.
// Return the equivalent of nullptr if there are no more related references.
-NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const {
+Ref DataFlowGraph::getNextRelated(Instr IA, Ref RA) const {
assert(IA.Id != 0 && RA.Id != 0);
- auto Related = [this,RA](NodeAddr<RefNode*> TA) -> bool {
+ auto IsRelated = [this, RA](Ref TA) -> bool {
if (TA.Addr->getKind() != RA.Addr->getKind())
return false;
- if (TA.Addr->getRegRef(*this) != RA.Addr->getRegRef(*this))
+ if (!getPRI().equal_to(TA.Addr->getRegRef(*this),
+ RA.Addr->getRegRef(*this))) {
return false;
+ }
return true;
};
- auto RelatedStmt = [&Related,RA](NodeAddr<RefNode*> TA) -> bool {
- return Related(TA) &&
- &RA.Addr->getOp() == &TA.Addr->getOp();
- };
- auto RelatedPhi = [&Related,RA](NodeAddr<RefNode*> TA) -> bool {
- if (!Related(TA))
+
+ RegisterRef RR = RA.Addr->getRegRef(*this);
+ if (IA.Addr->getKind() == NodeAttrs::Stmt) {
+ auto Cond = [&IsRelated, RA](Ref TA) -> bool {
+ return IsRelated(TA) && &RA.Addr->getOp() == &TA.Addr->getOp();
+ };
+ return RA.Addr->getNextRef(RR, Cond, true, *this);
+ }
+
+ assert(IA.Addr->getKind() == NodeAttrs::Phi);
+ auto Cond = [&IsRelated, RA](Ref TA) -> bool {
+ if (!IsRelated(TA))
return false;
if (TA.Addr->getKind() != NodeAttrs::Use)
return true;
// For phi uses, compare predecessor blocks.
- const NodeAddr<const PhiUseNode*> TUA = TA;
- const NodeAddr<const PhiUseNode*> RUA = RA;
- return TUA.Addr->getPredecessor() == RUA.Addr->getPredecessor();
+ return PhiUse(TA).Addr->getPredecessor() ==
+ PhiUse(RA).Addr->getPredecessor();
};
-
- RegisterRef RR = RA.Addr->getRegRef(*this);
- if (IA.Addr->getKind() == NodeAttrs::Stmt)
- return RA.Addr->getNextRef(RR, RelatedStmt, true, *this);
- return RA.Addr->getNextRef(RR, RelatedPhi, true, *this);
+ return RA.Addr->getNextRef(RR, Cond, true, *this);
}
// Find the next node related to RA in IA that satisfies condition P.
@@ -1175,12 +1200,11 @@ NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA,
// first element is the element after which such a node should be inserted,
// and the second element is a null-address.
template <typename Predicate>
-std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>>
-DataFlowGraph::locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
- Predicate P) const {
+std::pair<Ref, Ref> DataFlowGraph::locateNextRef(Instr IA, Ref RA,
+ Predicate P) const {
assert(IA.Id != 0 && RA.Id != 0);
- NodeAddr<RefNode*> NA;
+ Ref NA;
NodeId Start = RA.Id;
while (true) {
NA = getNextRelated(IA, RA);
@@ -1193,17 +1217,16 @@ DataFlowGraph::locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
if (NA.Id != 0 && NA.Id != Start)
return std::make_pair(RA, NA);
- return std::make_pair(RA, NodeAddr<RefNode*>());
+ return std::make_pair(RA, Ref());
}
// Get the next shadow node in IA corresponding to RA, and optionally create
// such a node if it does not exist.
-NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA, bool Create) {
+Ref DataFlowGraph::getNextShadow(Instr IA, Ref RA, bool Create) {
assert(IA.Id != 0 && RA.Id != 0);
uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow;
- auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool {
+ auto IsShadow = [Flags](Ref TA) -> bool {
return TA.Addr->getFlags() == Flags;
};
auto Loc = locateNextRef(IA, RA, IsShadow);
@@ -1211,30 +1234,18 @@ NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
return Loc.second;
// Create a copy of RA and mark is as shadow.
- NodeAddr<RefNode*> NA = cloneNode(RA);
+ Ref NA = cloneNode(RA);
NA.Addr->setFlags(Flags | NodeAttrs::Shadow);
IA.Addr->addMemberAfter(Loc.first, NA, *this);
return NA;
}
-// Get the next shadow node in IA corresponding to RA. Return null-address
-// if such a node does not exist.
-NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
- NodeAddr<RefNode*> RA) const {
- assert(IA.Id != 0 && RA.Id != 0);
- uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow;
- auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool {
- return TA.Addr->getFlags() == Flags;
- };
- return locateNextRef(IA, RA, IsShadow).second;
-}
-
// Create a new statement node in the block node BA that corresponds to
// the machine instruction MI.
-void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
- NodeAddr<StmtNode*> SA = newStmt(BA, &In);
+void DataFlowGraph::buildStmt(Block BA, MachineInstr &In) {
+ Stmt SA = newStmt(BA, &In);
- auto isCall = [] (const MachineInstr &In) -> bool {
+ auto isCall = [](const MachineInstr &In) -> bool {
if (In.isCall())
return true;
// Is tail call?
@@ -1251,14 +1262,14 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
return false;
};
- auto isDefUndef = [this] (const MachineInstr &In, RegisterRef DR) -> bool {
+ auto isDefUndef = [this](const MachineInstr &In, RegisterRef DR) -> bool {
// This instruction defines DR. Check if there is a use operand that
// would make DR live on entry to the instruction.
- for (const MachineOperand &Op : In.operands()) {
- if (!Op.isReg() || Op.getReg() == 0 || !Op.isUse() || Op.isUndef())
+ for (const MachineOperand &Op : In.all_uses()) {
+ if (Op.getReg() == 0 || Op.isUndef())
continue;
RegisterRef UR = makeRegRef(Op);
- if (PRI.alias(DR, UR))
+ if (getPRI().alias(DR, UR))
return false;
}
return true;
@@ -1278,7 +1289,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
continue;
Register R = Op.getReg();
- if (!R || !R.isPhysical())
+ if (!R || !R.isPhysical() || !isTracked(RegisterRef(R)))
continue;
uint16_t Flags = NodeAttrs::None;
if (TOI.isPreserving(In, OpN)) {
@@ -1293,7 +1304,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
Flags |= NodeAttrs::Fixed;
if (IsCall && Op.isDead())
Flags |= NodeAttrs::Dead;
- NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+ Def DA = newDef(SA, Op, Flags);
SA.Addr->addMember(DA, *this);
assert(!DoneDefs.test(R));
DoneDefs.set(R);
@@ -1305,15 +1316,17 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
MachineOperand &Op = In.getOperand(OpN);
if (!Op.isRegMask())
continue;
- uint16_t Flags = NodeAttrs::Clobbering | NodeAttrs::Fixed |
- NodeAttrs::Dead;
- NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+ uint16_t Flags = NodeAttrs::Clobbering | NodeAttrs::Fixed | NodeAttrs::Dead;
+ Def DA = newDef(SA, Op, Flags);
SA.Addr->addMember(DA, *this);
// Record all clobbered registers in DoneDefs.
const uint32_t *RM = Op.getRegMask();
- for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i)
- if (!(RM[i/32] & (1u << (i%32))))
+ for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
+ if (!isTracked(RegisterRef(i)))
+ continue;
+ if (!(RM[i / 32] & (1u << (i % 32))))
DoneClobbers.set(i);
+ }
}
// Process implicit defs, skipping those that have already been added
@@ -1323,7 +1336,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
if (!Op.isReg() || !Op.isDef() || !Op.isImplicit())
continue;
Register R = Op.getReg();
- if (!R || !R.isPhysical() || DoneDefs.test(R))
+ if (!R || !R.isPhysical() || !isTracked(RegisterRef(R)) || DoneDefs.test(R))
continue;
RegisterRef RR = makeRegRef(Op);
uint16_t Flags = NodeAttrs::None;
@@ -1342,7 +1355,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
continue;
Flags |= NodeAttrs::Dead;
}
- NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+ Def DA = newDef(SA, Op, Flags);
SA.Addr->addMember(DA, *this);
DoneDefs.set(R);
}
@@ -1352,22 +1365,21 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
if (!Op.isReg() || !Op.isUse())
continue;
Register R = Op.getReg();
- if (!R || !R.isPhysical())
+ if (!R || !R.isPhysical() || !isTracked(RegisterRef(R)))
continue;
uint16_t Flags = NodeAttrs::None;
if (Op.isUndef())
Flags |= NodeAttrs::Undef;
if (TOI.isFixedReg(In, OpN))
Flags |= NodeAttrs::Fixed;
- NodeAddr<UseNode*> UA = newUse(SA, Op, Flags);
+ Use UA = newUse(SA, Op, Flags);
SA.Addr->addMember(UA, *this);
}
}
// Scan all defs in the block node BA and record in PhiM the locations of
// phi nodes corresponding to these defs.
-void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM,
- NodeAddr<BlockNode*> BA) {
+void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, Block BA) {
// Check all defs from block BA and record them in each block in BA's
// iterated dominance frontier. This information will later be used to
// create phi nodes.
@@ -1382,14 +1394,18 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM,
// in the block's iterated dominance frontier.
// This is done to make sure that each defined reference gets only one
// phi node, even if it is defined multiple times.
- RegisterSet Defs;
- for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this))
- for (NodeAddr<RefNode*> RA : IA.Addr->members_if(IsDef, *this))
- Defs.insert(RA.Addr->getRegRef(*this));
+ RegisterAggr Defs(getPRI());
+ for (Instr IA : BA.Addr->members(*this)) {
+ for (Ref RA : IA.Addr->members_if(IsDef, *this)) {
+ RegisterRef RR = RA.Addr->getRegRef(*this);
+ if (RR.isReg() && isTracked(RR))
+ Defs.insert(RR);
+ }
+ }
// Calculate the iterated dominance frontier of BB.
const MachineDominanceFrontier::DomSetType &DF = DFLoc->second;
- SetVector<MachineBasicBlock*> IDF(DF.begin(), DF.end());
+ SetVector<MachineBasicBlock *> IDF(DF.begin(), DF.end());
for (unsigned i = 0; i < IDF.size(); ++i) {
auto F = MDF.find(IDF[i]);
if (F != MDF.end())
@@ -1399,98 +1415,37 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM,
// Finally, add the set of defs to each block in the iterated dominance
// frontier.
for (auto *DB : IDF) {
- NodeAddr<BlockNode*> DBA = findBlock(DB);
- PhiM[DBA.Id].insert(Defs.begin(), Defs.end());
+ Block DBA = findBlock(DB);
+ PhiM[DBA.Id].insert(Defs);
}
}
// Given the locations of phi nodes in the map PhiM, create the phi nodes
// that are located in the block node BA.
-void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, RegisterSet &AllRefs,
- NodeAddr<BlockNode*> BA) {
+void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, Block BA) {
// Check if this blocks has any DF defs, i.e. if there are any defs
// that this block is in the iterated dominance frontier of.
auto HasDF = PhiM.find(BA.Id);
if (HasDF == PhiM.end() || HasDF->second.empty())
return;
- // First, remove all R in Refs in such that there exists T in Refs
- // such that T covers R. In other words, only leave those refs that
- // are not covered by another ref (i.e. maximal with respect to covering).
-
- auto MaxCoverIn = [this] (RegisterRef RR, RegisterSet &RRs) -> RegisterRef {
- for (RegisterRef I : RRs)
- if (I != RR && RegisterAggr::isCoverOf(I, RR, PRI))
- RR = I;
- return RR;
- };
-
- RegisterSet MaxDF;
- for (RegisterRef I : HasDF->second)
- MaxDF.insert(MaxCoverIn(I, HasDF->second));
-
- std::vector<RegisterRef> MaxRefs;
- for (RegisterRef I : MaxDF)
- MaxRefs.push_back(MaxCoverIn(I, AllRefs));
-
- // Now, for each R in MaxRefs, get the alias closure of R. If the closure
- // only has R in it, create a phi a def for R. Otherwise, create a phi,
- // and add a def for each S in the closure.
-
- // Sort the refs so that the phis will be created in a deterministic order.
- llvm::sort(MaxRefs);
- // Remove duplicates.
- auto NewEnd = std::unique(MaxRefs.begin(), MaxRefs.end());
- MaxRefs.erase(NewEnd, MaxRefs.end());
-
- auto Aliased = [this,&MaxRefs](RegisterRef RR,
- std::vector<unsigned> &Closure) -> bool {
- for (unsigned I : Closure)
- if (PRI.alias(RR, MaxRefs[I]))
- return true;
- return false;
- };
-
// Prepare a list of NodeIds of the block's predecessors.
NodeList Preds;
const MachineBasicBlock *MBB = BA.Addr->getCode();
for (MachineBasicBlock *PB : MBB->predecessors())
Preds.push_back(findBlock(PB));
- while (!MaxRefs.empty()) {
- // Put the first element in the closure, and then add all subsequent
- // elements from MaxRefs to it, if they alias at least one element
- // already in the closure.
- // ClosureIdx: vector of indices in MaxRefs of members of the closure.
- std::vector<unsigned> ClosureIdx = { 0 };
- for (unsigned i = 1; i != MaxRefs.size(); ++i)
- if (Aliased(MaxRefs[i], ClosureIdx))
- ClosureIdx.push_back(i);
-
- // Build a phi for the closure.
- unsigned CS = ClosureIdx.size();
- NodeAddr<PhiNode*> PA = newPhi(BA);
-
- // Add defs.
- for (unsigned X = 0; X != CS; ++X) {
- RegisterRef RR = MaxRefs[ClosureIdx[X]];
- uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
- NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
- PA.Addr->addMember(DA, *this);
- }
+ const RegisterAggr &Defs = PhiM[BA.Id];
+ uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
+
+ for (RegisterRef RR : Defs.refs()) {
+ Phi PA = newPhi(BA);
+ PA.Addr->addMember(newDef(PA, RR, PhiFlags), *this);
+
// Add phi uses.
- for (NodeAddr<BlockNode*> PBA : Preds) {
- for (unsigned X = 0; X != CS; ++X) {
- RegisterRef RR = MaxRefs[ClosureIdx[X]];
- NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA);
- PA.Addr->addMember(PUA, *this);
- }
+ for (Block PBA : Preds) {
+ PA.Addr->addMember(newPhiUse(PA, RR, PBA), *this);
}
-
- // Erase from MaxRefs all elements in the closure.
- auto Begin = MaxRefs.begin();
- for (unsigned Idx : llvm::reverse(ClosureIdx))
- MaxRefs.erase(Begin + Idx);
}
}
@@ -1503,16 +1458,16 @@ void DataFlowGraph::removeUnusedPhis() {
// that are easily determinable to be unnecessary.
SetVector<NodeId> PhiQ;
- for (NodeAddr<BlockNode*> BA : Func.Addr->members(*this)) {
+ for (Block BA : TheFunc.Addr->members(*this)) {
for (auto P : BA.Addr->members_if(IsPhi, *this))
PhiQ.insert(P.Id);
}
static auto HasUsedDef = [](NodeList &Ms) -> bool {
- for (NodeAddr<NodeBase*> M : Ms) {
+ for (Node M : Ms) {
if (M.Addr->getKind() != NodeAttrs::Def)
continue;
- NodeAddr<DefNode*> DA = M;
+ Def DA = M;
if (DA.Addr->getReachedDef() != 0 || DA.Addr->getReachedUse() != 0)
return true;
}
@@ -1523,15 +1478,15 @@ void DataFlowGraph::removeUnusedPhis() {
// For each removed phi, collect the potentially affected phis and add
// them back to the queue.
while (!PhiQ.empty()) {
- auto PA = addr<PhiNode*>(PhiQ[0]);
+ auto PA = addr<PhiNode *>(PhiQ[0]);
PhiQ.remove(PA.Id);
NodeList Refs = PA.Addr->members(*this);
if (HasUsedDef(Refs))
continue;
- for (NodeAddr<RefNode*> RA : Refs) {
+ for (Ref RA : Refs) {
if (NodeId RD = RA.Addr->getReachingDef()) {
- auto RDA = addr<DefNode*>(RD);
- NodeAddr<InstrNode*> OA = RDA.Addr->getOwner(*this);
+ auto RDA = addr<DefNode *>(RD);
+ Instr OA = RDA.Addr->getOwner(*this);
if (IsPhi(OA))
PhiQ.insert(OA.Id);
}
@@ -1540,7 +1495,7 @@ void DataFlowGraph::removeUnusedPhis() {
else
unlinkUse(RA, true);
}
- NodeAddr<BlockNode*> BA = PA.Addr->getOwner(*this);
+ Block BA = PA.Addr->getOwner(*this);
BA.Addr->removeMember(PA, *this);
}
}
@@ -1549,15 +1504,14 @@ void DataFlowGraph::removeUnusedPhis() {
// reaching def of TA to the appropriate def node. Create any shadow nodes
// as appropriate.
template <typename T>
-void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
- DefStack &DS) {
+void DataFlowGraph::linkRefUp(Instr IA, NodeAddr<T> TA, DefStack &DS) {
if (DS.empty())
return;
RegisterRef RR = TA.Addr->getRegRef(*this);
NodeAddr<T> TAP;
// References from the def stack that have been examined so far.
- RegisterAggr Defs(PRI);
+ RegisterAggr Defs(getPRI());
for (auto I = DS.top(), E = DS.bottom(); I != E; I.down()) {
RegisterRef QR = I->Addr->getRegRef(*this);
@@ -1573,7 +1527,7 @@ void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
}
// The reaching def.
- NodeAddr<DefNode*> RDA = *I;
+ Def RDA = *I;
// Pick the reached node.
if (TAP.Id == 0) {
@@ -1594,14 +1548,13 @@ void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
// Create data-flow links for all reference nodes in the statement node SA.
template <typename Predicate>
-void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA,
- Predicate P) {
+void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, Stmt SA, Predicate P) {
#ifndef NDEBUG
- RegisterSet Defs;
+ RegisterSet Defs(getPRI());
#endif
// Link all nodes (upwards in the data-flow) with their reaching defs.
- for (NodeAddr<RefNode*> RA : SA.Addr->members_if(P, *this)) {
+ for (Ref RA : SA.Addr->members_if(P, *this)) {
uint16_t Kind = RA.Addr->getKind();
assert(Kind == NodeAttrs::Def || Kind == NodeAttrs::Use);
RegisterRef RR = RA.Addr->getRegRef(*this);
@@ -1616,9 +1569,9 @@ void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA,
continue;
DefStack &DS = F->second;
if (Kind == NodeAttrs::Use)
- linkRefUp<UseNode*>(SA, RA, DS);
+ linkRefUp<UseNode *>(SA, RA, DS);
else if (Kind == NodeAttrs::Def)
- linkRefUp<DefNode*>(SA, RA, DS);
+ linkRefUp<DefNode *>(SA, RA, DS);
else
llvm_unreachable("Unexpected node in instruction");
}
@@ -1626,14 +1579,14 @@ void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA,
// Create data-flow links for all instructions in the block node BA. This
// will include updating any phi nodes in BA.
-void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
+void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, Block BA) {
// Push block delimiters.
markBlock(BA.Id, DefM);
- auto IsClobber = [] (NodeAddr<RefNode*> RA) -> bool {
+ auto IsClobber = [](Ref RA) -> bool {
return IsDef(RA) && (RA.Addr->getFlags() & NodeAttrs::Clobbering);
};
- auto IsNoClobber = [] (NodeAddr<RefNode*> RA) -> bool {
+ auto IsNoClobber = [](Ref RA) -> bool {
return IsDef(RA) && !(RA.Addr->getFlags() & NodeAttrs::Clobbering);
};
@@ -1641,7 +1594,7 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
// For each non-phi instruction in the block, link all the defs and uses
// to their reaching defs. For any member of the block (including phis),
// push the defs on the corresponding stacks.
- for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) {
+ for (Instr IA : BA.Addr->members(*this)) {
// Ignore phi nodes here. They will be linked part by part from the
// predecessors.
if (IA.Addr->getKind() == NodeAttrs::Stmt) {
@@ -1662,39 +1615,38 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode());
for (auto *I : *N) {
MachineBasicBlock *SB = I->getBlock();
- NodeAddr<BlockNode*> SBA = findBlock(SB);
+ Block SBA = findBlock(SB);
linkBlockRefs(DefM, SBA);
}
// Link the phi uses from the successor blocks.
- auto IsUseForBA = [BA](NodeAddr<NodeBase*> NA) -> bool {
+ auto IsUseForBA = [BA](Node NA) -> bool {
if (NA.Addr->getKind() != NodeAttrs::Use)
return false;
assert(NA.Addr->getFlags() & NodeAttrs::PhiRef);
- NodeAddr<PhiUseNode*> PUA = NA;
- return PUA.Addr->getPredecessor() == BA.Id;
+ return PhiUse(NA).Addr->getPredecessor() == BA.Id;
};
- RegisterSet EHLiveIns = getLandingPadLiveIns();
+ RegisterAggr EHLiveIns = getLandingPadLiveIns();
MachineBasicBlock *MBB = BA.Addr->getCode();
for (MachineBasicBlock *SB : MBB->successors()) {
bool IsEHPad = SB->isEHPad();
- NodeAddr<BlockNode*> SBA = findBlock(SB);
- for (NodeAddr<InstrNode*> IA : SBA.Addr->members_if(IsPhi, *this)) {
+ Block SBA = findBlock(SB);
+ for (Instr IA : SBA.Addr->members_if(IsPhi, *this)) {
// Do not link phi uses for landing pad live-ins.
if (IsEHPad) {
// Find what register this phi is for.
- NodeAddr<RefNode*> RA = IA.Addr->getFirstMember(*this);
+ Ref RA = IA.Addr->getFirstMember(*this);
assert(RA.Id != 0);
- if (EHLiveIns.count(RA.Addr->getRegRef(*this)))
+ if (EHLiveIns.hasCoverOf(RA.Addr->getRegRef(*this)))
continue;
}
// Go over each phi use associated with MBB, and link it.
for (auto U : IA.Addr->members_if(IsUseForBA, *this)) {
- NodeAddr<PhiUseNode*> PUA = U;
+ PhiUse PUA = U;
RegisterRef RR = PUA.Addr->getRegRef(*this);
- linkRefUp<UseNode*>(IA, PUA, DefM[RR.Reg]);
+ linkRefUp<UseNode *>(IA, PUA, DefM[RR.Reg]);
}
}
}
@@ -1704,7 +1656,7 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
}
// Remove the use node UA from any data-flow and structural links.
-void DataFlowGraph::unlinkUseDF(NodeAddr<UseNode*> UA) {
+void DataFlowGraph::unlinkUseDF(Use UA) {
NodeId RD = UA.Addr->getReachingDef();
NodeId Sib = UA.Addr->getSibling();
@@ -1713,8 +1665,8 @@ void DataFlowGraph::unlinkUseDF(NodeAddr<UseNode*> UA) {
return;
}
- auto RDA = addr<DefNode*>(RD);
- auto TA = addr<UseNode*>(RDA.Addr->getReachedUse());
+ auto RDA = addr<DefNode *>(RD);
+ auto TA = addr<UseNode *>(RDA.Addr->getReachedUse());
if (TA.Id == UA.Id) {
RDA.Addr->setReachedUse(Sib);
return;
@@ -1726,12 +1678,12 @@ void DataFlowGraph::unlinkUseDF(NodeAddr<UseNode*> UA) {
TA.Addr->setSibling(UA.Addr->getSibling());
return;
}
- TA = addr<UseNode*>(S);
+ TA = addr<UseNode *>(S);
}
}
// Remove the def node DA from any data-flow and structural links.
-void DataFlowGraph::unlinkDefDF(NodeAddr<DefNode*> DA) {
+void DataFlowGraph::unlinkDefDF(Def DA) {
//
// RD
// | reached
@@ -1756,10 +1708,10 @@ void DataFlowGraph::unlinkDefDF(NodeAddr<DefNode*> DA) {
// Also, defs reached by DA are now "promoted" to being reached by RD,
// so all of them will need to be spliced into the sibling chain where
// DA belongs.
- auto getAllNodes = [this] (NodeId N) -> NodeList {
+ auto getAllNodes = [this](NodeId N) -> NodeList {
NodeList Res;
while (N) {
- auto RA = addr<RefNode*>(N);
+ auto RA = addr<RefNode *>(N);
// Keep the nodes in the exact sibling order.
Res.push_back(RA);
N = RA.Addr->getSibling();
@@ -1770,14 +1722,14 @@ void DataFlowGraph::unlinkDefDF(NodeAddr<DefNode*> DA) {
NodeList ReachedUses = getAllNodes(DA.Addr->getReachedUse());
if (RD == 0) {
- for (NodeAddr<RefNode*> I : ReachedDefs)
+ for (Ref I : ReachedDefs)
I.Addr->setSibling(0);
- for (NodeAddr<RefNode*> I : ReachedUses)
+ for (Ref I : ReachedUses)
I.Addr->setSibling(0);
}
- for (NodeAddr<DefNode*> I : ReachedDefs)
+ for (Def I : ReachedDefs)
I.Addr->setReachingDef(RD);
- for (NodeAddr<UseNode*> I : ReachedUses)
+ for (Use I : ReachedUses)
I.Addr->setReachingDef(RD);
NodeId Sib = DA.Addr->getSibling();
@@ -1787,8 +1739,8 @@ void DataFlowGraph::unlinkDefDF(NodeAddr<DefNode*> DA) {
}
// Update the reaching def node and remove DA from the sibling list.
- auto RDA = addr<DefNode*>(RD);
- auto TA = addr<DefNode*>(RDA.Addr->getReachedDef());
+ auto RDA = addr<DefNode *>(RD);
+ auto TA = addr<DefNode *>(RDA.Addr->getReachedDef());
if (TA.Id == DA.Id) {
// If DA is the first reached def, just update the RD's reached def
// to the DA's sibling.
@@ -1802,20 +1754,46 @@ void DataFlowGraph::unlinkDefDF(NodeAddr<DefNode*> DA) {
TA.Addr->setSibling(Sib);
break;
}
- TA = addr<DefNode*>(S);
+ TA = addr<DefNode *>(S);
}
}
// Splice the DA's reached defs into the RDA's reached def chain.
if (!ReachedDefs.empty()) {
- auto Last = NodeAddr<DefNode*>(ReachedDefs.back());
+ auto Last = Def(ReachedDefs.back());
Last.Addr->setSibling(RDA.Addr->getReachedDef());
RDA.Addr->setReachedDef(ReachedDefs.front().Id);
}
// Splice the DA's reached uses into the RDA's reached use chain.
if (!ReachedUses.empty()) {
- auto Last = NodeAddr<UseNode*>(ReachedUses.back());
+ auto Last = Use(ReachedUses.back());
Last.Addr->setSibling(RDA.Addr->getReachedUse());
RDA.Addr->setReachedUse(ReachedUses.front().Id);
}
}
+
+bool DataFlowGraph::isTracked(RegisterRef RR) const {
+ return !disjoint(getPRI().getUnits(RR), TrackedUnits);
+}
+
+bool DataFlowGraph::hasUntrackedRef(Stmt S, bool IgnoreReserved) const {
+ SmallVector<MachineOperand *> Ops;
+
+ for (Ref R : S.Addr->members(*this)) {
+ Ops.push_back(&R.Addr->getOp());
+ RegisterRef RR = R.Addr->getRegRef(*this);
+ if (IgnoreReserved && RR.isReg() && ReservedRegs[RR.idx()])
+ continue;
+ if (!isTracked(RR))
+ return true;
+ }
+ for (const MachineOperand &Op : S.Addr->getCode()->operands()) {
+ if (!Op.isReg() && !Op.isRegMask())
+ continue;
+ if (llvm::find(Ops, &Op) == Ops.end())
+ return true;
+ }
+ return false;
+}
+
+} // end namespace llvm::rdf
diff --git a/llvm/lib/CodeGen/RDFLiveness.cpp b/llvm/lib/CodeGen/RDFLiveness.cpp
index 902b29d41ce1..11f3fedaa5f9 100644
--- a/llvm/lib/CodeGen/RDFLiveness.cpp
+++ b/llvm/lib/CodeGen/RDFLiveness.cpp
@@ -22,7 +22,6 @@
// and Embedded Architectures and Compilers", 8 (4),
// <10.1145/2086696.2086706>. <hal-00647369>
//
-#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -34,6 +33,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/RDFGraph.h"
+#include "llvm/CodeGen/RDFLiveness.h"
#include "llvm/CodeGen/RDFRegisters.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/MC/LaneBitmask.h"
@@ -51,31 +51,27 @@
#include <vector>
using namespace llvm;
-using namespace rdf;
static cl::opt<unsigned> MaxRecNest("rdf-liveness-max-rec", cl::init(25),
- cl::Hidden, cl::desc("Maximum recursion level"));
-
-namespace llvm {
-namespace rdf {
-
- raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) {
- OS << '{';
- for (const auto &I : P.Obj) {
- OS << ' ' << printReg(I.first, &P.G.getTRI()) << '{';
- for (auto J = I.second.begin(), E = I.second.end(); J != E; ) {
- OS << Print(J->first, P.G) << PrintLaneMaskOpt(J->second);
- if (++J != E)
- OS << ',';
- }
- OS << '}';
+ cl::Hidden,
+ cl::desc("Maximum recursion level"));
+
+namespace llvm::rdf {
+
+raw_ostream &operator<<(raw_ostream &OS, const Print<Liveness::RefMap> &P) {
+ OS << '{';
+ for (const auto &I : P.Obj) {
+ OS << ' ' << printReg(I.first, &P.G.getTRI()) << '{';
+ for (auto J = I.second.begin(), E = I.second.end(); J != E;) {
+ OS << Print(J->first, P.G) << PrintLaneMaskShort(J->second);
+ if (++J != E)
+ OS << ',';
}
- OS << " }";
- return OS;
+ OS << '}';
}
-
-} // end namespace rdf
-} // end namespace llvm
+ OS << " }";
+ return OS;
+}
// The order in the returned sequence is the order of reaching defs in the
// upward traversal: the first def is the closest to the given reference RefA,
@@ -106,11 +102,12 @@ namespace rdf {
// the data-flow.
NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
- NodeAddr<RefNode*> RefA, bool TopShadows, bool FullChain,
- const RegisterAggr &DefRRs) {
+ NodeAddr<RefNode *> RefA, bool TopShadows,
+ bool FullChain,
+ const RegisterAggr &DefRRs) {
NodeList RDefs; // Return value.
SetVector<NodeId> DefQ;
- DenseMap<MachineInstr*, uint32_t> OrdMap;
+ DenseMap<MachineInstr *, uint32_t> OrdMap;
// Dead defs will be treated as if they were live, since they are actually
// on the data-flow path. They cannot be ignored because even though they
@@ -124,12 +121,12 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
// whole point of a shadow is that it will have a reaching def that
// is not aliased to the reaching defs of the related shadows.
NodeId Start = RefA.Id;
- auto SNA = DFG.addr<RefNode*>(Start);
+ auto SNA = DFG.addr<RefNode *>(Start);
if (NodeId RD = SNA.Addr->getReachingDef())
DefQ.insert(RD);
if (TopShadows) {
for (auto S : DFG.getRelatedRefs(RefA.Addr->getOwner(DFG), RefA))
- if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
+ if (NodeId RD = NodeAddr<RefNode *>(S).Addr->getReachingDef())
DefQ.insert(RD);
}
@@ -140,7 +137,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
// It is possible that a collection of non-covering (individually) defs
// will be sufficient, but keep going until a covering one is found.
for (unsigned i = 0; i < DefQ.size(); ++i) {
- auto TA = DFG.addr<DefNode*>(DefQ[i]);
+ auto TA = DFG.addr<DefNode *>(DefQ[i]);
if (TA.Addr->getFlags() & NodeAttrs::PhiRef)
continue;
// Stop at the covering/overwriting def of the initial register reference.
@@ -151,7 +148,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
// Get the next level of reaching defs. This will include multiple
// reaching defs for shadows.
for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA))
- if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
+ if (NodeId RD = NodeAddr<RefNode *>(S).Addr->getReachingDef())
DefQ.insert(RD);
// Don't visit sibling defs. They share the same reaching def (which
// will be visited anyway), but they define something not aliased to
@@ -159,42 +156,42 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
}
// Return the MachineBasicBlock containing a given instruction.
- auto Block = [this] (NodeAddr<InstrNode*> IA) -> MachineBasicBlock* {
+ auto Block = [this](NodeAddr<InstrNode *> IA) -> MachineBasicBlock * {
if (IA.Addr->getKind() == NodeAttrs::Stmt)
- return NodeAddr<StmtNode*>(IA).Addr->getCode()->getParent();
+ return NodeAddr<StmtNode *>(IA).Addr->getCode()->getParent();
assert(IA.Addr->getKind() == NodeAttrs::Phi);
- NodeAddr<PhiNode*> PA = IA;
- NodeAddr<BlockNode*> BA = PA.Addr->getOwner(DFG);
+ NodeAddr<PhiNode *> PA = IA;
+ NodeAddr<BlockNode *> BA = PA.Addr->getOwner(DFG);
return BA.Addr->getCode();
};
- SmallSet<NodeId,32> Defs;
+ SmallSet<NodeId, 32> Defs;
// Remove all non-phi defs that are not aliased to RefRR, and separate
// the the remaining defs into buckets for containing blocks.
- std::map<NodeId, NodeAddr<InstrNode*>> Owners;
- std::map<MachineBasicBlock*, SmallVector<NodeId,32>> Blocks;
+ std::map<NodeId, NodeAddr<InstrNode *>> Owners;
+ std::map<MachineBasicBlock *, SmallVector<NodeId, 32>> Blocks;
for (NodeId N : DefQ) {
- auto TA = DFG.addr<DefNode*>(N);
+ auto TA = DFG.addr<DefNode *>(N);
bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
if (!IsPhi && !PRI.alias(RefRR, TA.Addr->getRegRef(DFG)))
continue;
Defs.insert(TA.Id);
- NodeAddr<InstrNode*> IA = TA.Addr->getOwner(DFG);
+ NodeAddr<InstrNode *> IA = TA.Addr->getOwner(DFG);
Owners[TA.Id] = IA;
Blocks[Block(IA)].push_back(IA.Id);
}
- auto Precedes = [this,&OrdMap] (NodeId A, NodeId B) {
+ auto Precedes = [this, &OrdMap](NodeId A, NodeId B) {
if (A == B)
return false;
- NodeAddr<InstrNode*> OA = DFG.addr<InstrNode*>(A);
- NodeAddr<InstrNode*> OB = DFG.addr<InstrNode*>(B);
+ NodeAddr<InstrNode *> OA = DFG.addr<InstrNode *>(A);
+ NodeAddr<InstrNode *> OB = DFG.addr<InstrNode *>(B);
bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt;
bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt;
if (StmtA && StmtB) {
- const MachineInstr *InA = NodeAddr<StmtNode*>(OA).Addr->getCode();
- const MachineInstr *InB = NodeAddr<StmtNode*>(OB).Addr->getCode();
+ const MachineInstr *InA = NodeAddr<StmtNode *>(OA).Addr->getCode();
+ const MachineInstr *InB = NodeAddr<StmtNode *>(OB).Addr->getCode();
assert(InA->getParent() == InB->getParent());
auto FA = OrdMap.find(InA);
if (FA != OrdMap.end())
@@ -217,14 +214,14 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
return !StmtA;
};
- auto GetOrder = [&OrdMap] (MachineBasicBlock &B) {
+ auto GetOrder = [&OrdMap](MachineBasicBlock &B) {
uint32_t Pos = 0;
for (MachineInstr &In : B)
OrdMap.insert({&In, ++Pos});
};
// For each block, sort the nodes in it.
- std::vector<MachineBasicBlock*> TmpBB;
+ std::vector<MachineBasicBlock *> TmpBB;
for (auto &Bucket : Blocks) {
TmpBB.push_back(Bucket.first);
if (Bucket.second.size() > 2)
@@ -261,18 +258,17 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
RegisterAggr RRs(DefRRs);
- auto DefInSet = [&Defs] (NodeAddr<RefNode*> TA) -> bool {
- return TA.Addr->getKind() == NodeAttrs::Def &&
- Defs.count(TA.Id);
+ auto DefInSet = [&Defs](NodeAddr<RefNode *> TA) -> bool {
+ return TA.Addr->getKind() == NodeAttrs::Def && Defs.count(TA.Id);
};
for (NodeId T : TmpInst) {
if (!FullChain && RRs.hasCoverOf(RefRR))
break;
- auto TA = DFG.addr<InstrNode*>(T);
+ auto TA = DFG.addr<InstrNode *>(T);
bool IsPhi = DFG.IsCode<NodeAttrs::Phi>(TA);
NodeList Ds;
- for (NodeAddr<DefNode*> DA : TA.Addr->members_if(DefInSet, DFG)) {
+ for (NodeAddr<DefNode *> DA : TA.Addr->members_if(DefInSet, DFG)) {
RegisterRef QR = DA.Addr->getRegRef(DFG);
// Add phi defs even if they are covered by subsequent defs. This is
// for cases where the reached use is not covered by any of the defs
@@ -286,7 +282,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
Ds.push_back(DA);
}
llvm::append_range(RDefs, Ds);
- for (NodeAddr<DefNode*> DA : Ds) {
+ for (NodeAddr<DefNode *> DA : Ds) {
// When collecting a full chain of definitions, do not consider phi
// defs to actually define a register.
uint16_t Flags = DA.Addr->getFlags();
@@ -296,7 +292,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
}
}
- auto DeadP = [](const NodeAddr<DefNode*> DA) -> bool {
+ auto DeadP = [](const NodeAddr<DefNode *> DA) -> bool {
return DA.Addr->getFlags() & NodeAttrs::Dead;
};
llvm::erase_if(RDefs, DeadP);
@@ -304,81 +300,82 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
return RDefs;
}
-std::pair<NodeSet,bool>
-Liveness::getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
- NodeSet &Visited, const NodeSet &Defs) {
+std::pair<NodeSet, bool>
+Liveness::getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode *> RefA,
+ NodeSet &Visited, const NodeSet &Defs) {
return getAllReachingDefsRecImpl(RefRR, RefA, Visited, Defs, 0, MaxRecNest);
}
-std::pair<NodeSet,bool>
-Liveness::getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
- NodeSet &Visited, const NodeSet &Defs, unsigned Nest, unsigned MaxNest) {
+std::pair<NodeSet, bool>
+Liveness::getAllReachingDefsRecImpl(RegisterRef RefRR, NodeAddr<RefNode *> RefA,
+ NodeSet &Visited, const NodeSet &Defs,
+ unsigned Nest, unsigned MaxNest) {
if (Nest > MaxNest)
- return { NodeSet(), false };
+ return {NodeSet(), false};
// Collect all defined registers. Do not consider phis to be defining
// anything, only collect "real" definitions.
RegisterAggr DefRRs(PRI);
for (NodeId D : Defs) {
- const auto DA = DFG.addr<const DefNode*>(D);
+ const auto DA = DFG.addr<const DefNode *>(D);
if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef))
DefRRs.insert(DA.Addr->getRegRef(DFG));
}
NodeList RDs = getAllReachingDefs(RefRR, RefA, false, true, DefRRs);
if (RDs.empty())
- return { Defs, true };
+ return {Defs, true};
// Make a copy of the preexisting definitions and add the newly found ones.
NodeSet TmpDefs = Defs;
- for (NodeAddr<NodeBase*> R : RDs)
+ for (NodeAddr<NodeBase *> R : RDs)
TmpDefs.insert(R.Id);
NodeSet Result = Defs;
- for (NodeAddr<DefNode*> DA : RDs) {
+ for (NodeAddr<DefNode *> DA : RDs) {
Result.insert(DA.Id);
if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef))
continue;
- NodeAddr<PhiNode*> PA = DA.Addr->getOwner(DFG);
+ NodeAddr<PhiNode *> PA = DA.Addr->getOwner(DFG);
if (!Visited.insert(PA.Id).second)
continue;
// Go over all phi uses and get the reaching defs for each use.
for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
const auto &T = getAllReachingDefsRecImpl(RefRR, U, Visited, TmpDefs,
- Nest+1, MaxNest);
+ Nest + 1, MaxNest);
if (!T.second)
- return { T.first, false };
+ return {T.first, false};
Result.insert(T.first.begin(), T.first.end());
}
}
- return { Result, true };
+ return {Result, true};
}
/// Find the nearest ref node aliased to RefRR, going upwards in the data
/// flow, starting from the instruction immediately preceding Inst.
-NodeAddr<RefNode*> Liveness::getNearestAliasedRef(RegisterRef RefRR,
- NodeAddr<InstrNode*> IA) {
- NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+NodeAddr<RefNode *> Liveness::getNearestAliasedRef(RegisterRef RefRR,
+ NodeAddr<InstrNode *> IA) {
+ NodeAddr<BlockNode *> BA = IA.Addr->getOwner(DFG);
NodeList Ins = BA.Addr->members(DFG);
NodeId FindId = IA.Id;
auto E = Ins.rend();
- auto B = std::find_if(Ins.rbegin(), E,
- [FindId] (const NodeAddr<InstrNode*> T) {
- return T.Id == FindId;
- });
+ auto B =
+ std::find_if(Ins.rbegin(), E, [FindId](const NodeAddr<InstrNode *> T) {
+ return T.Id == FindId;
+ });
// Do not scan IA (which is what B would point to).
if (B != E)
++B;
do {
// Process the range of instructions from B to E.
- for (NodeAddr<InstrNode*> I : make_range(B, E)) {
+ for (NodeAddr<InstrNode *> I : make_range(B, E)) {
NodeList Refs = I.Addr->members(DFG);
- NodeAddr<RefNode*> Clob, Use;
+ NodeAddr<RefNode *> Clob, Use;
// Scan all the refs in I aliased to RefRR, and return the one that
// is the closest to the output of I, i.e. def > clobber > use.
- for (NodeAddr<RefNode*> R : Refs) {
+ for (NodeAddr<RefNode *> R : Refs) {
if (!PRI.alias(R.Addr->getRegRef(DFG), RefRR))
continue;
if (DFG.IsDef(R)) {
@@ -398,7 +395,7 @@ NodeAddr<RefNode*> Liveness::getNearestAliasedRef(RegisterRef RefRR,
// Go up to the immediate dominator, if any.
MachineBasicBlock *BB = BA.Addr->getCode();
- BA = NodeAddr<BlockNode*>();
+ BA = NodeAddr<BlockNode *>();
if (MachineDomTreeNode *N = MDT.getNode(BB)) {
if ((N = N->getIDom()))
BA = DFG.findBlock(N->getBlock());
@@ -411,11 +408,11 @@ NodeAddr<RefNode*> Liveness::getNearestAliasedRef(RegisterRef RefRR,
E = Ins.rend();
} while (true);
- return NodeAddr<RefNode*>();
+ return NodeAddr<RefNode *>();
}
-NodeSet Liveness::getAllReachedUses(RegisterRef RefRR,
- NodeAddr<DefNode*> DefA, const RegisterAggr &DefRRs) {
+NodeSet Liveness::getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode *> DefA,
+ const RegisterAggr &DefRRs) {
NodeSet Uses;
// If the original register is already covered by all the intervening
@@ -428,7 +425,7 @@ NodeSet Liveness::getAllReachedUses(RegisterRef RefRR,
bool IsDead = DefA.Addr->getFlags() & NodeAttrs::Dead;
NodeId U = !IsDead ? DefA.Addr->getReachedUse() : 0;
while (U != 0) {
- auto UA = DFG.addr<UseNode*>(U);
+ auto UA = DFG.addr<UseNode *>(U);
if (!(UA.Addr->getFlags() & NodeAttrs::Undef)) {
RegisterRef UR = UA.Addr->getRegRef(DFG);
if (PRI.alias(RefRR, UR) && !DefRRs.hasCoverOf(UR))
@@ -439,7 +436,7 @@ NodeSet Liveness::getAllReachedUses(RegisterRef RefRR,
// Traverse all reached defs. This time dead defs cannot be ignored.
for (NodeId D = DefA.Addr->getReachedDef(), NextD; D != 0; D = NextD) {
- auto DA = DFG.addr<DefNode*>(D);
+ auto DA = DFG.addr<DefNode *>(D);
NextD = DA.Addr->getSibling();
RegisterRef DR = DA.Addr->getRegRef(DFG);
// If this def is already covered, it cannot reach anything new.
@@ -464,20 +461,21 @@ void Liveness::computePhiInfo() {
RealUseMap.clear();
NodeList Phis;
- NodeAddr<FuncNode*> FA = DFG.getFunc();
+ NodeAddr<FuncNode *> FA = DFG.getFunc();
NodeList Blocks = FA.Addr->members(DFG);
- for (NodeAddr<BlockNode*> BA : Blocks) {
+ for (NodeAddr<BlockNode *> BA : Blocks) {
auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
llvm::append_range(Phis, Ps);
}
// phi use -> (map: reaching phi -> set of registers defined in between)
- std::map<NodeId,std::map<NodeId,RegisterAggr>> PhiUp;
- std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation.
- std::unordered_map<NodeId,RegisterAggr> PhiDRs; // Phi -> registers defined by it.
+ std::map<NodeId, std::map<NodeId, RegisterAggr>> PhiUp;
+ std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation.
+ std::unordered_map<NodeId, RegisterAggr>
+ PhiDRs; // Phi -> registers defined by it.
// Go over all phis.
- for (NodeAddr<PhiNode*> PhiA : Phis) {
+ for (NodeAddr<PhiNode *> PhiA : Phis) {
// Go over all defs and collect the reached uses that are non-phi uses
// (i.e. the "real uses").
RefMap &RealUses = RealUseMap[PhiA.Id];
@@ -488,7 +486,7 @@ void Liveness::computePhiInfo() {
SetVector<NodeId> DefQ;
NodeSet PhiDefs;
RegisterAggr DRs(PRI);
- for (NodeAddr<RefNode*> R : PhiRefs) {
+ for (NodeAddr<RefNode *> R : PhiRefs) {
if (!DFG.IsRef<NodeAttrs::Def>(R))
continue;
DRs.insert(R.Addr->getRegRef(DFG));
@@ -503,17 +501,17 @@ void Liveness::computePhiInfo() {
// This set of uses will later be trimmed to only contain these uses that
// are actually reached by the phi defs.
for (unsigned i = 0; i < DefQ.size(); ++i) {
- NodeAddr<DefNode*> DA = DFG.addr<DefNode*>(DefQ[i]);
+ NodeAddr<DefNode *> DA = DFG.addr<DefNode *>(DefQ[i]);
// Visit all reached uses. Phi defs should not really have the "dead"
// flag set, but check it anyway for consistency.
bool IsDead = DA.Addr->getFlags() & NodeAttrs::Dead;
NodeId UN = !IsDead ? DA.Addr->getReachedUse() : 0;
while (UN != 0) {
- NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN);
+ NodeAddr<UseNode *> A = DFG.addr<UseNode *>(UN);
uint16_t F = A.Addr->getFlags();
if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) {
RegisterRef R = A.Addr->getRegRef(DFG);
- RealUses[R.Reg].insert({A.Id,R.Mask});
+ RealUses[R.Reg].insert({A.Id, R.Mask});
}
UN = A.Addr->getSibling();
}
@@ -522,9 +520,9 @@ void Liveness::computePhiInfo() {
// later.
NodeId DN = DA.Addr->getReachedDef();
while (DN != 0) {
- NodeAddr<DefNode*> A = DFG.addr<DefNode*>(DN);
+ NodeAddr<DefNode *> A = DFG.addr<DefNode *>(DN);
for (auto T : DFG.getRelatedRefs(A.Addr->getOwner(DFG), A)) {
- uint16_t Flags = NodeAddr<DefNode*>(T).Addr->getFlags();
+ uint16_t Flags = NodeAddr<DefNode *>(T).Addr->getFlags();
// Must traverse the reached-def chain. Consider:
// def(D0) -> def(R0) -> def(R0) -> use(D0)
// The reachable use of D0 passes through a def of R0.
@@ -546,21 +544,25 @@ void Liveness::computePhiInfo() {
// = R1:0 u6 Not reached by d1 (covered collectively
// by d3 and d5), but following reached
// defs and uses from d1 will lead here.
- for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE; ) {
+ for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE;) {
// For each reached register UI->first, there is a set UI->second, of
// uses of it. For each such use, check if it is reached by this phi,
// i.e. check if the set of its reaching uses intersects the set of
// this phi's defs.
NodeRefSet Uses = UI->second;
UI->second.clear();
- for (std::pair<NodeId,LaneBitmask> I : Uses) {
- auto UA = DFG.addr<UseNode*>(I.first);
+ for (std::pair<NodeId, LaneBitmask> I : Uses) {
+ auto UA = DFG.addr<UseNode *>(I.first);
// Undef flag is checked above.
assert((UA.Addr->getFlags() & NodeAttrs::Undef) == 0);
- RegisterRef R(UI->first, I.second);
+ RegisterRef UseR(UI->first, I.second); // Ref from Uses
+ // R = intersection of the ref from the phi and the ref from Uses
+ RegisterRef R = PhiDRs.at(PhiA.Id).intersectWith(UseR);
+ if (!R)
+ continue;
// Calculate the exposed part of the reached use.
RegisterAggr Covered(PRI);
- for (NodeAddr<DefNode*> DA : getAllReachingDefs(R, UA)) {
+ for (NodeAddr<DefNode *> DA : getAllReachingDefs(R, UA)) {
if (PhiDefs.count(DA.Id))
break;
Covered.insert(DA.Addr->getRegRef(DFG));
@@ -590,7 +592,7 @@ void Liveness::computePhiInfo() {
for (auto I : PhiRefs) {
if (!DFG.IsRef<NodeAttrs::Use>(I) || SeenUses.count(I.Id))
continue;
- NodeAddr<PhiUseNode*> PUA = I;
+ NodeAddr<PhiUseNode *> PUA = I;
if (PUA.Addr->getReachingDef() == 0)
continue;
@@ -598,10 +600,10 @@ void Liveness::computePhiInfo() {
NodeList Ds = getAllReachingDefs(UR, PUA, true, false, NoRegs);
RegisterAggr DefRRs(PRI);
- for (NodeAddr<DefNode*> D : Ds) {
+ for (NodeAddr<DefNode *> D : Ds) {
if (D.Addr->getFlags() & NodeAttrs::PhiRef) {
NodeId RP = D.Addr->getOwner(DFG).Id;
- std::map<NodeId,RegisterAggr> &M = PhiUp[PUA.Id];
+ std::map<NodeId, RegisterAggr> &M = PhiUp[PUA.Id];
auto F = M.find(RP);
if (F == M.end())
M.insert(std::make_pair(RP, DefRRs));
@@ -611,7 +613,7 @@ void Liveness::computePhiInfo() {
DefRRs.insert(D.Addr->getRegRef(DFG));
}
- for (NodeAddr<PhiUseNode*> T : DFG.getRelatedRefs(PhiA, PUA))
+ for (NodeAddr<PhiUseNode *> T : DFG.getRelatedRefs(PhiA, PUA))
SeenUses.insert(T.Id);
}
}
@@ -652,9 +654,11 @@ void Liveness::computePhiInfo() {
// The operation "clearIn" can be expensive. For a given set of intervening
// defs, cache the result of subtracting these defs from a given register
// ref.
+ using RefHash = std::hash<RegisterRef>;
+ using RefEqual = std::equal_to<RegisterRef>;
using SubMap = std::unordered_map<RegisterRef, RegisterRef>;
std::unordered_map<RegisterAggr, SubMap> Subs;
- auto ClearIn = [] (RegisterRef RR, const RegisterAggr &Mid, SubMap &SM) {
+ auto ClearIn = [](RegisterRef RR, const RegisterAggr &Mid, SubMap &SM) {
if (Mid.empty())
return RR;
auto F = SM.find(RR);
@@ -667,12 +671,12 @@ void Liveness::computePhiInfo() {
// Go over all phis.
for (unsigned i = 0; i < PhiUQ.size(); ++i) {
- auto PA = DFG.addr<PhiNode*>(PhiUQ[i]);
+ auto PA = DFG.addr<PhiNode *>(PhiUQ[i]);
NodeList PUs = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG);
RefMap &RUM = RealUseMap[PA.Id];
- for (NodeAddr<UseNode*> UA : PUs) {
- std::map<NodeId,RegisterAggr> &PUM = PhiUp[UA.Id];
+ for (NodeAddr<UseNode *> UA : PUs) {
+ std::map<NodeId, RegisterAggr> &PUM = PhiUp[UA.Id];
RegisterRef UR = UA.Addr->getRegRef(DFG);
for (const std::pair<const NodeId, RegisterAggr> &P : PUM) {
bool Changed = false;
@@ -683,7 +687,10 @@ void Liveness::computePhiInfo() {
if (MidDefs.hasCoverOf(UR))
continue;
- SubMap &SM = Subs[MidDefs];
+ if (Subs.find(MidDefs) == Subs.end()) {
+ Subs.insert({MidDefs, SubMap(1, RefHash(), RefEqual(PRI))});
+ }
+ SubMap &SM = Subs.at(MidDefs);
// General algorithm:
// for each (R,U) : U is use node of R, U is reached by PA
@@ -699,13 +706,13 @@ void Liveness::computePhiInfo() {
if (!DRs.hasAliasOf(R))
continue;
R = PRI.mapTo(DRs.intersectWith(R), T.first);
- for (std::pair<NodeId,LaneBitmask> V : T.second) {
+ for (std::pair<NodeId, LaneBitmask> V : T.second) {
LaneBitmask M = R.Mask & V.second;
if (M.none())
continue;
if (RegisterRef SS = ClearIn(RegisterRef(R.Reg, M), MidDefs, SM)) {
NodeRefSet &RS = RealUseMap[P.first][SS.Reg];
- Changed |= RS.insert({V.first,SS.Mask}).second;
+ Changed |= RS.insert({V.first, SS.Mask}).second;
}
}
}
@@ -720,10 +727,10 @@ void Liveness::computePhiInfo() {
dbgs() << "Real use map:\n";
for (auto I : RealUseMap) {
dbgs() << "phi " << Print(I.first, DFG);
- NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first);
+ NodeAddr<PhiNode *> PA = DFG.addr<PhiNode *>(I.first);
NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG);
if (!Ds.empty()) {
- RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef(DFG);
+ RegisterRef RR = NodeAddr<DefNode *>(Ds[0]).Addr->getRegRef(DFG);
dbgs() << '<' << Print(RR, DFG) << '>';
} else {
dbgs() << "<noreg>";
@@ -737,10 +744,10 @@ void Liveness::computeLiveIns() {
// Populate the node-to-block map. This speeds up the calculations
// significantly.
NBMap.clear();
- for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) {
+ for (NodeAddr<BlockNode *> BA : DFG.getFunc().Addr->members(DFG)) {
MachineBasicBlock *BB = BA.Addr->getCode();
- for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
- for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
+ for (NodeAddr<InstrNode *> IA : BA.Addr->members(DFG)) {
+ for (NodeAddr<RefNode *> RA : IA.Addr->members(DFG))
NBMap.insert(std::make_pair(RA.Id, BB));
NBMap.insert(std::make_pair(IA.Id, BB));
}
@@ -754,7 +761,7 @@ void Liveness::computeLiveIns() {
auto F1 = MDF.find(&B);
if (F1 == MDF.end())
continue;
- SetVector<MachineBasicBlock*> IDFB(F1->second.begin(), F1->second.end());
+ SetVector<MachineBasicBlock *> IDFB(F1->second.begin(), F1->second.end());
for (unsigned i = 0; i < IDFB.size(); ++i) {
auto F2 = MDF.find(IDFB[i]);
if (F2 != MDF.end())
@@ -771,16 +778,17 @@ void Liveness::computeLiveIns() {
computePhiInfo();
- NodeAddr<FuncNode*> FA = DFG.getFunc();
+ NodeAddr<FuncNode *> FA = DFG.getFunc();
NodeList Blocks = FA.Addr->members(DFG);
// Build the phi live-on-entry map.
- for (NodeAddr<BlockNode*> BA : Blocks) {
+ for (NodeAddr<BlockNode *> BA : Blocks) {
MachineBasicBlock *MB = BA.Addr->getCode();
RefMap &LON = PhiLON[MB];
- for (auto P : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG))
+ for (auto P : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG)) {
for (const RefMap::value_type &S : RealUseMap[P.Id])
LON[S.first].insert(S.second.begin(), S.second.end());
+ }
}
if (Trace) {
@@ -793,9 +801,9 @@ void Liveness::computeLiveIns() {
// Build the phi live-on-exit map. Each phi node has some set of reached
// "real" uses. Propagate this set backwards into the block predecessors
// through the reaching defs of the corresponding phi uses.
- for (NodeAddr<BlockNode*> BA : Blocks) {
+ for (NodeAddr<BlockNode *> BA : Blocks) {
NodeList Phis = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
- for (NodeAddr<PhiNode*> PA : Phis) {
+ for (NodeAddr<PhiNode *> PA : Phis) {
RefMap &RUs = RealUseMap[PA.Id];
if (RUs.empty())
continue;
@@ -804,7 +812,7 @@ void Liveness::computeLiveIns() {
for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
if (!SeenUses.insert(U.Id).second)
continue;
- NodeAddr<PhiUseNode*> PUA = U;
+ NodeAddr<PhiUseNode *> PUA = U;
if (PUA.Addr->getReachingDef() == 0)
continue;
@@ -819,18 +827,18 @@ void Liveness::computeLiveIns() {
// For each reached "real" use, identify the set of reaching defs
// coming from each predecessor P, and add them to PhiLOX[P].
//
- auto PrA = DFG.addr<BlockNode*>(PUA.Addr->getPredecessor());
+ auto PrA = DFG.addr<BlockNode *>(PUA.Addr->getPredecessor());
RefMap &LOX = PhiLOX[PrA.Addr->getCode()];
for (const std::pair<const RegisterId, NodeRefSet> &RS : RUs) {
// We need to visit each individual use.
- for (std::pair<NodeId,LaneBitmask> P : RS.second) {
+ for (std::pair<NodeId, LaneBitmask> P : RS.second) {
// Create a register ref corresponding to the use, and find
// all reaching defs starting from the phi use, and treating
// all related shadows as a single use cluster.
RegisterRef S(RS.first, P.second);
NodeList Ds = getAllReachingDefs(S, PUA, true, false, NoRegs);
- for (NodeAddr<DefNode*> D : Ds) {
+ for (NodeAddr<DefNode *> D : Ds) {
// Calculate the mask corresponding to the visited def.
RegisterAggr TA(PRI);
TA.insert(D.Addr->getRegRef(DFG)).intersect(S);
@@ -840,11 +848,11 @@ void Liveness::computeLiveIns() {
}
}
- for (NodeAddr<PhiUseNode*> T : DFG.getRelatedRefs(PA, PUA))
+ for (NodeAddr<PhiUseNode *> T : DFG.getRelatedRefs(PA, PUA))
SeenUses.insert(T.Id);
- } // for U : phi uses
- } // for P : Phis
- } // for B : Blocks
+ } // for U : phi uses
+ } // for P : Phis
+ } // for B : Blocks
if (Trace) {
dbgs() << "Phi live-on-exit map:\n";
@@ -865,23 +873,21 @@ void Liveness::computeLiveIns() {
std::vector<RegisterRef> LV;
for (const MachineBasicBlock::RegisterMaskPair &LI : B.liveins())
LV.push_back(RegisterRef(LI.PhysReg, LI.LaneMask));
- llvm::sort(LV);
+ llvm::sort(LV, std::less<RegisterRef>(PRI));
dbgs() << printMBBReference(B) << "\t rec = {";
for (auto I : LV)
dbgs() << ' ' << Print(I, DFG);
dbgs() << " }\n";
- //dbgs() << "\tcomp = " << Print(LiveMap[&B], DFG) << '\n';
+ // dbgs() << "\tcomp = " << Print(LiveMap[&B], DFG) << '\n';
LV.clear();
- const RegisterAggr &LG = LiveMap[&B];
- for (auto I = LG.rr_begin(), E = LG.rr_end(); I != E; ++I)
- LV.push_back(*I);
- llvm::sort(LV);
+ for (RegisterRef RR : LiveMap[&B].refs())
+ LV.push_back(RR);
+ llvm::sort(LV, std::less<RegisterRef>(PRI));
dbgs() << "\tcomp = {";
for (auto I : LV)
dbgs() << ' ' << Print(I, DFG);
dbgs() << " }\n";
-
}
}
}
@@ -896,7 +902,7 @@ void Liveness::resetLiveIns() {
B.removeLiveIn(I);
// Add the newly computed live-ins.
const RegisterAggr &LiveIns = LiveMap[&B];
- for (const RegisterRef R : make_range(LiveIns.rr_begin(), LiveIns.rr_end()))
+ for (RegisterRef R : LiveIns.refs())
B.addLiveIn({MCPhysReg(R.Reg), R.Mask});
}
}
@@ -907,7 +913,7 @@ void Liveness::resetKills() {
}
void Liveness::resetKills(MachineBasicBlock *B) {
- auto CopyLiveIns = [this] (MachineBasicBlock *B, BitVector &LV) -> void {
+ auto CopyLiveIns = [this](MachineBasicBlock *B, BitVector &LV) -> void {
for (auto I : B->liveins()) {
MCSubRegIndexIterator S(I.PhysReg, &TRI);
if (!S.isValid()) {
@@ -933,21 +939,21 @@ void Liveness::resetKills(MachineBasicBlock *B) {
continue;
MI.clearKillInfo();
- for (auto &Op : MI.operands()) {
+ for (auto &Op : MI.all_defs()) {
// An implicit def of a super-register may not necessarily start a
// live range of it, since an implicit use could be used to keep parts
// of it live. Instead of analyzing the implicit operands, ignore
// implicit defs.
- if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
+ if (Op.isImplicit())
continue;
Register R = Op.getReg();
if (!R.isPhysical())
continue;
- for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
- Live.reset(*SR);
+ for (MCPhysReg SR : TRI.subregs_inclusive(R))
+ Live.reset(SR);
}
- for (auto &Op : MI.operands()) {
- if (!Op.isReg() || !Op.isUse() || Op.isUndef())
+ for (auto &Op : MI.all_uses()) {
+ if (Op.isUndef())
continue;
Register R = Op.getReg();
if (!R.isPhysical())
@@ -961,8 +967,8 @@ void Liveness::resetKills(MachineBasicBlock *B) {
}
if (!IsLive)
Op.setIsKill(true);
- for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
- Live.set(*SR);
+ for (MCPhysReg SR : TRI.subregs_inclusive(R))
+ Live.set(SR);
}
}
}
@@ -1048,9 +1054,9 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
const NodeRefSet &OldDefs = LE.second;
for (NodeRef OR : OldDefs) {
// R is a def node that was live-on-exit
- auto DA = DFG.addr<DefNode*>(OR.first);
- NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG);
- NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+ auto DA = DFG.addr<DefNode *>(OR.first);
+ NodeAddr<InstrNode *> IA = DA.Addr->getOwner(DFG);
+ NodeAddr<BlockNode *> BA = IA.Addr->getOwner(DFG);
if (B != BA.Addr->getCode()) {
// Defs from a different block need to be preserved. Defs from this
// block will need to be processed further, except for phi defs, the
@@ -1081,10 +1087,10 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
// There could be other defs in this block that are a part of that
// chain. Check that now: accumulate the registers from these defs,
// and if they all together cover LRef, it is not live-on-entry.
- for (NodeAddr<DefNode*> TA : getAllReachingDefs(DA)) {
+ for (NodeAddr<DefNode *> TA : getAllReachingDefs(DA)) {
// DefNode -> InstrNode -> BlockNode.
- NodeAddr<InstrNode*> ITA = TA.Addr->getOwner(DFG);
- NodeAddr<BlockNode*> BTA = ITA.Addr->getOwner(DFG);
+ NodeAddr<InstrNode *> ITA = TA.Addr->getOwner(DFG);
+ NodeAddr<BlockNode *> BTA = ITA.Addr->getOwner(DFG);
// Reaching defs are ordered in the upward direction.
if (BTA.Addr->getCode() != B) {
// We have reached past the beginning of B, and the accumulated
@@ -1093,7 +1099,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
// Subtract all accumulated defs (RRs) from LRef.
RegisterRef T = RRs.clearIn(LRef);
assert(T);
- NewDefs.insert({TA.Id,T.Mask});
+ NewDefs.insert({TA.Id, T.Mask});
break;
}
@@ -1118,16 +1124,16 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
// Scan the block for upward-exposed uses and add them to the tracking set.
for (auto I : DFG.getFunc().Addr->findBlock(B, DFG).Addr->members(DFG)) {
- NodeAddr<InstrNode*> IA = I;
+ NodeAddr<InstrNode *> IA = I;
if (IA.Addr->getKind() != NodeAttrs::Stmt)
continue;
- for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
+ for (NodeAddr<UseNode *> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
if (UA.Addr->getFlags() & NodeAttrs::Undef)
continue;
RegisterRef RR = UA.Addr->getRegRef(DFG);
- for (NodeAddr<DefNode*> D : getAllReachingDefs(UA))
+ for (NodeAddr<DefNode *> D : getAllReachingDefs(UA))
if (getBlockWithRef(D.Id) != B)
- LiveIn[RR.Reg].insert({D.Id,RR.Mask});
+ LiveIn[RR.Reg].insert({D.Id, RR.Mask});
}
}
@@ -1145,7 +1151,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
LaneBitmask M;
for (auto P : R.second)
M |= P.second;
- Local.insert(RegisterRef(R.first,M));
+ Local.insert(RegisterRef(R.first, M));
}
if (Trace) {
@@ -1164,6 +1170,8 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
}
void Liveness::emptify(RefMap &M) {
- for (auto I = M.begin(), E = M.end(); I != E; )
+ for (auto I = M.begin(), E = M.end(); I != E;)
I = I->second.empty() ? M.erase(I) : std::next(I);
}
+
+} // namespace llvm::rdf
diff --git a/llvm/lib/CodeGen/RDFRegisters.cpp b/llvm/lib/CodeGen/RDFRegisters.cpp
index 8760ba118934..90520c4c3c71 100644
--- a/llvm/lib/CodeGen/RDFRegisters.cpp
+++ b/llvm/lib/CodeGen/RDFRegisters.cpp
@@ -15,17 +15,18 @@
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
#include <set>
#include <utility>
-using namespace llvm;
-using namespace rdf;
+namespace llvm::rdf {
PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
- const MachineFunction &mf)
+ const MachineFunction &mf)
: TRI(tri) {
RegInfos.resize(TRI.getNumRegs());
@@ -57,7 +58,7 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
UnitInfos[U].Reg = F;
} else {
for (MCRegUnitMaskIterator I(F, &TRI); I.isValid(); ++I) {
- std::pair<uint32_t,LaneBitmask> P = *I;
+ std::pair<uint32_t, LaneBitmask> P = *I;
UnitInfo &UI = UnitInfos[P.first];
UI.Reg = F;
if (P.second.any()) {
@@ -80,15 +81,15 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
if (Op.isRegMask())
RegMasks.insert(Op.getRegMask());
- MaskInfos.resize(RegMasks.size()+1);
+ MaskInfos.resize(RegMasks.size() + 1);
for (uint32_t M = 1, NM = RegMasks.size(); M <= NM; ++M) {
BitVector PU(TRI.getNumRegUnits());
const uint32_t *MB = RegMasks.get(M);
for (unsigned I = 1, E = TRI.getNumRegs(); I != E; ++I) {
if (!(MB[I / 32] & (1u << (I % 32))))
continue;
- for (MCRegUnitIterator U(MCRegister::from(I), &TRI); U.isValid(); ++U)
- PU.set(*U);
+ for (MCRegUnit Unit : TRI.regunits(MCRegister::from(I)))
+ PU.set(Unit);
}
MaskInfos[M].Units = PU.flip();
}
@@ -97,134 +98,75 @@ PhysicalRegisterInfo::PhysicalRegisterInfo(const TargetRegisterInfo &tri,
for (uint32_t U = 0, NU = TRI.getNumRegUnits(); U != NU; ++U) {
BitVector AS(TRI.getNumRegs());
for (MCRegUnitRootIterator R(U, &TRI); R.isValid(); ++R)
- for (MCSuperRegIterator S(*R, &TRI, true); S.isValid(); ++S)
- AS.set(*S);
+ for (MCPhysReg S : TRI.superregs_inclusive(*R))
+ AS.set(S);
AliasInfos[U].Regs = AS;
}
}
+bool PhysicalRegisterInfo::alias(RegisterRef RA, RegisterRef RB) const {
+ return !disjoint(getUnits(RA), getUnits(RB));
+}
+
std::set<RegisterId> PhysicalRegisterInfo::getAliasSet(RegisterId Reg) const {
- // Do not include RR in the alias set.
+ // Do not include Reg in the alias set.
std::set<RegisterId> AS;
- assert(isRegMaskId(Reg) || Register::isPhysicalRegister(Reg));
- if (isRegMaskId(Reg)) {
+ assert(!RegisterRef::isUnitId(Reg) && "No units allowed");
+ if (RegisterRef::isMaskId(Reg)) {
// XXX SLOW
const uint32_t *MB = getRegMaskBits(Reg);
for (unsigned i = 1, e = TRI.getNumRegs(); i != e; ++i) {
- if (MB[i/32] & (1u << (i%32)))
+ if (MB[i / 32] & (1u << (i % 32)))
continue;
AS.insert(i);
}
- for (const uint32_t *RM : RegMasks) {
- RegisterId MI = getRegMaskId(RM);
- if (MI != Reg && aliasMM(RegisterRef(Reg), RegisterRef(MI)))
- AS.insert(MI);
- }
return AS;
}
+ assert(RegisterRef::isRegId(Reg));
for (MCRegAliasIterator AI(Reg, &TRI, false); AI.isValid(); ++AI)
AS.insert(*AI);
- for (const uint32_t *RM : RegMasks) {
- RegisterId MI = getRegMaskId(RM);
- if (aliasRM(RegisterRef(Reg), RegisterRef(MI)))
- AS.insert(MI);
- }
+
return AS;
}
-bool PhysicalRegisterInfo::aliasRR(RegisterRef RA, RegisterRef RB) const {
- assert(Register::isPhysicalRegister(RA.Reg));
- assert(Register::isPhysicalRegister(RB.Reg));
-
- MCRegUnitMaskIterator UMA(RA.Reg, &TRI);
- MCRegUnitMaskIterator UMB(RB.Reg, &TRI);
- // Reg units are returned in the numerical order.
- while (UMA.isValid() && UMB.isValid()) {
- // Skip units that are masked off in RA.
- std::pair<RegisterId,LaneBitmask> PA = *UMA;
- if (PA.second.any() && (PA.second & RA.Mask).none()) {
- ++UMA;
- continue;
- }
- // Skip units that are masked off in RB.
- std::pair<RegisterId,LaneBitmask> PB = *UMB;
- if (PB.second.any() && (PB.second & RB.Mask).none()) {
- ++UMB;
- continue;
- }
+std::set<RegisterId> PhysicalRegisterInfo::getUnits(RegisterRef RR) const {
+ std::set<RegisterId> Units;
- if (PA.first == PB.first)
- return true;
- if (PA.first < PB.first)
- ++UMA;
- else if (PB.first < PA.first)
- ++UMB;
- }
- return false;
-}
+ if (RR.Reg == 0)
+ return Units; // Empty
-bool PhysicalRegisterInfo::aliasRM(RegisterRef RR, RegisterRef RM) const {
- assert(Register::isPhysicalRegister(RR.Reg) && isRegMaskId(RM.Reg));
- const uint32_t *MB = getRegMaskBits(RM.Reg);
- bool Preserved = MB[RR.Reg/32] & (1u << (RR.Reg%32));
- // If the lane mask information is "full", e.g. when the given lane mask
- // is a superset of the lane mask from the register class, check the regmask
- // bit directly.
- if (RR.Mask == LaneBitmask::getAll())
- return !Preserved;
- const TargetRegisterClass *RC = RegInfos[RR.Reg].RegClass;
- if (RC != nullptr && (RR.Mask & RC->LaneMask) == RC->LaneMask)
- return !Preserved;
-
- // Otherwise, check all subregisters whose lane mask overlaps the given
- // mask. For each such register, if it is preserved by the regmask, then
- // clear the corresponding bits in the given mask. If at the end, all
- // bits have been cleared, the register does not alias the regmask (i.e.
- // is it preserved by it).
- LaneBitmask M = RR.Mask;
- for (MCSubRegIndexIterator SI(RR.Reg, &TRI); SI.isValid(); ++SI) {
- LaneBitmask SM = TRI.getSubRegIndexLaneMask(SI.getSubRegIndex());
- if ((SM & RR.Mask).none())
- continue;
- unsigned SR = SI.getSubReg();
- if (!(MB[SR/32] & (1u << (SR%32))))
- continue;
- // The subregister SR is preserved.
- M &= ~SM;
- if (M.none())
- return false;
+ if (RR.isReg()) {
+ if (RR.Mask.none())
+ return Units; // Empty
+ for (MCRegUnitMaskIterator UM(RR.idx(), &TRI); UM.isValid(); ++UM) {
+ auto [U, M] = *UM;
+ if (M.none() || (M & RR.Mask).any())
+ Units.insert(U);
+ }
+ return Units;
}
- return true;
-}
-
-bool PhysicalRegisterInfo::aliasMM(RegisterRef RM, RegisterRef RN) const {
- assert(isRegMaskId(RM.Reg) && isRegMaskId(RN.Reg));
+ assert(RR.isMask());
unsigned NumRegs = TRI.getNumRegs();
- const uint32_t *BM = getRegMaskBits(RM.Reg);
- const uint32_t *BN = getRegMaskBits(RN.Reg);
-
- for (unsigned w = 0, nw = NumRegs/32; w != nw; ++w) {
- // Intersect the negations of both words. Disregard reg=0,
- // i.e. 0th bit in the 0th word.
- uint32_t C = ~BM[w] & ~BN[w];
- if (w == 0)
- C &= ~1;
- if (C)
- return true;
+ const uint32_t *MB = getRegMaskBits(RR.idx());
+ for (unsigned I = 0, E = (NumRegs + 31) / 32; I != E; ++I) {
+ uint32_t C = ~MB[I]; // Clobbered regs
+ if (I == 0) // Reg 0 should be ignored
+ C &= maskLeadingOnes<unsigned>(31);
+ if (I + 1 == E && NumRegs % 32 != 0) // Last word may be partial
+ C &= maskTrailingOnes<unsigned>(NumRegs % 32);
+ if (C == 0)
+ continue;
+ while (C != 0) {
+ unsigned T = llvm::countr_zero(C);
+ unsigned CR = 32 * I + T; // Clobbered reg
+ for (MCRegUnit U : TRI.regunits(CR))
+ Units.insert(U);
+ C &= ~(1u << T);
+ }
}
-
- // Check the remaining registers in the last word.
- unsigned TailRegs = NumRegs % 32;
- if (TailRegs == 0)
- return false;
- unsigned TW = NumRegs / 32;
- uint32_t TailMask = (1u << TailRegs) - 1;
- if (~BM[TW] & ~BN[TW] & TailMask)
- return true;
-
- return false;
+ return Units;
}
RegisterRef PhysicalRegisterInfo::mapTo(RegisterRef RR, unsigned R) const {
@@ -234,20 +176,133 @@ RegisterRef PhysicalRegisterInfo::mapTo(RegisterRef RR, unsigned R) const {
return RegisterRef(R, TRI.composeSubRegIndexLaneMask(Idx, RR.Mask));
if (unsigned Idx = TRI.getSubRegIndex(RR.Reg, R)) {
const RegInfo &RI = RegInfos[R];
- LaneBitmask RCM = RI.RegClass ? RI.RegClass->LaneMask
- : LaneBitmask::getAll();
+ LaneBitmask RCM =
+ RI.RegClass ? RI.RegClass->LaneMask : LaneBitmask::getAll();
LaneBitmask M = TRI.reverseComposeSubRegIndexLaneMask(Idx, RR.Mask);
return RegisterRef(R, M & RCM);
}
llvm_unreachable("Invalid arguments: unrelated registers?");
}
+bool PhysicalRegisterInfo::equal_to(RegisterRef A, RegisterRef B) const {
+ if (!A.isReg() || !B.isReg()) {
+ // For non-regs, or comparing reg and non-reg, use only the Reg member.
+ return A.Reg == B.Reg;
+ }
+
+ if (A.Reg == B.Reg)
+ return A.Mask == B.Mask;
+
+ // Compare reg units lexicographically.
+ MCRegUnitMaskIterator AI(A.Reg, &getTRI());
+ MCRegUnitMaskIterator BI(B.Reg, &getTRI());
+ while (AI.isValid() && BI.isValid()) {
+ auto [AReg, AMask] = *AI;
+ auto [BReg, BMask] = *BI;
+
+ // Lane masks are "none" for units that don't correspond to subregs
+ // e.g. a single unit in a leaf register, or aliased unit.
+ if (AMask.none())
+ AMask = LaneBitmask::getAll();
+ if (BMask.none())
+ BMask = LaneBitmask::getAll();
+
+ // If both iterators point to a unit contained in both A and B, then
+ // compare the units.
+ if ((AMask & A.Mask).any() && (BMask & B.Mask).any()) {
+ if (AReg != BReg)
+ return false;
+ // Units are equal, move on to the next ones.
+ ++AI;
+ ++BI;
+ continue;
+ }
+
+ if ((AMask & A.Mask).none())
+ ++AI;
+ if ((BMask & B.Mask).none())
+ ++BI;
+ }
+ // One or both have reached the end.
+ return static_cast<int>(AI.isValid()) == static_cast<int>(BI.isValid());
+}
+
+bool PhysicalRegisterInfo::less(RegisterRef A, RegisterRef B) const {
+ if (!A.isReg() || !B.isReg()) {
+ // For non-regs, or comparing reg and non-reg, use only the Reg member.
+ return A.Reg < B.Reg;
+ }
+
+ if (A.Reg == B.Reg)
+ return A.Mask < B.Mask;
+ if (A.Mask == B.Mask)
+ return A.Reg < B.Reg;
+
+ // Compare reg units lexicographically.
+ llvm::MCRegUnitMaskIterator AI(A.Reg, &getTRI());
+ llvm::MCRegUnitMaskIterator BI(B.Reg, &getTRI());
+ while (AI.isValid() && BI.isValid()) {
+ auto [AReg, AMask] = *AI;
+ auto [BReg, BMask] = *BI;
+
+ // Lane masks are "none" for units that don't correspond to subregs
+ // e.g. a single unit in a leaf register, or aliased unit.
+ if (AMask.none())
+ AMask = LaneBitmask::getAll();
+ if (BMask.none())
+ BMask = LaneBitmask::getAll();
+
+ // If both iterators point to a unit contained in both A and B, then
+ // compare the units.
+ if ((AMask & A.Mask).any() && (BMask & B.Mask).any()) {
+ if (AReg != BReg)
+ return AReg < BReg;
+ // Units are equal, move on to the next ones.
+ ++AI;
+ ++BI;
+ continue;
+ }
+
+ if ((AMask & A.Mask).none())
+ ++AI;
+ if ((BMask & B.Mask).none())
+ ++BI;
+ }
+ // One or both have reached the end: assume invalid < valid.
+ return static_cast<int>(AI.isValid()) < static_cast<int>(BI.isValid());
+}
+
+void PhysicalRegisterInfo::print(raw_ostream &OS, RegisterRef A) const {
+ if (A.Reg == 0 || A.isReg()) {
+ if (0 < A.idx() && A.idx() < TRI.getNumRegs())
+ OS << TRI.getName(A.idx());
+ else
+ OS << printReg(A.idx(), &TRI);
+ OS << PrintLaneMaskShort(A.Mask);
+ } else if (A.isUnit()) {
+ OS << printRegUnit(A.idx(), &TRI);
+ } else {
+ assert(A.isMask());
+ // RegMask SS flag is preserved by idx().
+ unsigned Idx = Register::stackSlot2Index(A.idx());
+ const char *Fmt = Idx < 0x10000 ? "%04x" : "%08x";
+ OS << "M#" << format(Fmt, Idx);
+ }
+}
+
+void PhysicalRegisterInfo::print(raw_ostream &OS, const RegisterAggr &A) const {
+ OS << '{';
+ for (unsigned U : A.units())
+ OS << ' ' << printRegUnit(U, &TRI);
+ OS << " }";
+}
+
bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
- if (PhysicalRegisterInfo::isRegMaskId(RR.Reg))
+ if (RR.isMask())
return Units.anyCommon(PRI.getMaskUnits(RR.Reg));
for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
- std::pair<uint32_t,LaneBitmask> P = *U;
+ std::pair<uint32_t, LaneBitmask> P = *U;
if (P.second.none() || (P.second & RR.Mask).any())
if (Units.test(P.first))
return true;
@@ -256,13 +311,13 @@ bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
}
bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
- if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
+ if (RR.isMask()) {
BitVector T(PRI.getMaskUnits(RR.Reg));
return T.reset(Units).none();
}
for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
- std::pair<uint32_t,LaneBitmask> P = *U;
+ std::pair<uint32_t, LaneBitmask> P = *U;
if (P.second.none() || (P.second & RR.Mask).any())
if (!Units.test(P.first))
return false;
@@ -271,13 +326,13 @@ bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
}
RegisterAggr &RegisterAggr::insert(RegisterRef RR) {
- if (PhysicalRegisterInfo::isRegMaskId(RR.Reg)) {
+ if (RR.isMask()) {
Units |= PRI.getMaskUnits(RR.Reg);
return *this;
}
for (MCRegUnitMaskIterator U(RR.Reg, &PRI.getTRI()); U.isValid(); ++U) {
- std::pair<uint32_t,LaneBitmask> P = *U;
+ std::pair<uint32_t, LaneBitmask> P = *U;
if (P.second.none() || (P.second & RR.Mask).any())
Units.set(P.first);
}
@@ -350,22 +405,14 @@ RegisterRef RegisterAggr::makeRegRef() const {
LaneBitmask M;
for (MCRegUnitMaskIterator I(F, &PRI.getTRI()); I.isValid(); ++I) {
- std::pair<uint32_t,LaneBitmask> P = *I;
+ std::pair<uint32_t, LaneBitmask> P = *I;
if (Units.test(P.first))
M |= P.second.none() ? LaneBitmask::getAll() : P.second;
}
return RegisterRef(F, M);
}
-void RegisterAggr::print(raw_ostream &OS) const {
- OS << '{';
- for (int U = Units.find_first(); U >= 0; U = Units.find_next(U))
- OS << ' ' << printRegUnit(U, &PRI.getTRI());
- OS << " }";
-}
-
-RegisterAggr::rr_iterator::rr_iterator(const RegisterAggr &RG,
- bool End)
+RegisterAggr::ref_iterator::ref_iterator(const RegisterAggr &RG, bool End)
: Owner(&RG) {
for (int U = RG.Units.find_first(); U >= 0; U = RG.Units.find_next(U)) {
RegisterRef R = RG.PRI.getRefForUnit(U);
@@ -375,7 +422,23 @@ RegisterAggr::rr_iterator::rr_iterator(const RegisterAggr &RG,
Index = End ? Masks.size() : 0;
}
-raw_ostream &rdf::operator<<(raw_ostream &OS, const RegisterAggr &A) {
- A.print(OS);
+raw_ostream &operator<<(raw_ostream &OS, const RegisterAggr &A) {
+ A.getPRI().print(OS, A);
return OS;
}
+
+raw_ostream &operator<<(raw_ostream &OS, const PrintLaneMaskShort &P) {
+ if (P.Mask.all())
+ return OS;
+ if (P.Mask.none())
+ return OS << ":*none*";
+
+ LaneBitmask::Type Val = P.Mask.getAsInteger();
+ if ((Val & 0xffff) == Val)
+ return OS << ':' << format("%04llX", Val);
+ if ((Val & 0xffffffff) == Val)
+ return OS << ':' << format("%08llX", Val);
+ return OS << ':' << PrintLaneMask(P.Mask);
+}
+
+} // namespace llvm::rdf
diff --git a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
index d9ced9191fae..75fbc8ba35b1 100644
--- a/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/llvm/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -65,13 +65,13 @@ void ReachingDefAnalysis::enterBasicBlock(MachineBasicBlock *MBB) {
// This is the entry block.
if (MBB->pred_empty()) {
for (const auto &LI : MBB->liveins()) {
- for (MCRegUnitIterator Unit(LI.PhysReg, TRI); Unit.isValid(); ++Unit) {
+ for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) {
// Treat function live-ins as if they were defined just before the first
// instruction. Usually, function arguments are set up immediately
// before the call.
- if (LiveRegs[*Unit] != -1) {
- LiveRegs[*Unit] = -1;
- MBBReachingDefs[MBBNumber][*Unit].push_back(-1);
+ if (LiveRegs[Unit] != -1) {
+ LiveRegs[Unit] = -1;
+ MBBReachingDefs[MBBNumber][Unit].push_back(-1);
}
}
}
@@ -128,16 +128,15 @@ void ReachingDefAnalysis::processDefs(MachineInstr *MI) {
for (auto &MO : MI->operands()) {
if (!isValidRegDef(MO))
continue;
- for (MCRegUnitIterator Unit(MO.getReg().asMCReg(), TRI); Unit.isValid();
- ++Unit) {
+ for (MCRegUnit Unit : TRI->regunits(MO.getReg().asMCReg())) {
// This instruction explicitly defines the current reg unit.
- LLVM_DEBUG(dbgs() << printRegUnit(*Unit, TRI) << ":\t" << CurInstr
- << '\t' << *MI);
+ LLVM_DEBUG(dbgs() << printRegUnit(Unit, TRI) << ":\t" << CurInstr << '\t'
+ << *MI);
// How many instructions since this reg unit was last written?
- if (LiveRegs[*Unit] != CurInstr) {
- LiveRegs[*Unit] = CurInstr;
- MBBReachingDefs[MBBNumber][*Unit].push_back(CurInstr);
+ if (LiveRegs[Unit] != CurInstr) {
+ LiveRegs[Unit] = CurInstr;
+ MBBReachingDefs[MBBNumber][Unit].push_back(CurInstr);
}
}
}
@@ -269,8 +268,8 @@ int ReachingDefAnalysis::getReachingDef(MachineInstr *MI,
assert(MBBNumber < MBBReachingDefs.size() &&
"Unexpected basic block number.");
int LatestDef = ReachingDefDefaultVal;
- for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) {
- for (int Def : MBBReachingDefs[MBBNumber][*Unit]) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ for (int Def : MBBReachingDefs[MBBNumber][Unit]) {
if (Def >= InstId)
break;
DefRes = Def;
diff --git a/llvm/lib/CodeGen/RegAllocBasic.cpp b/llvm/lib/CodeGen/RegAllocBasic.cpp
index 91795f3d27fe..666199139630 100644
--- a/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -58,7 +58,7 @@ class RABasic : public MachineFunctionPass,
public RegAllocBase,
private LiveRangeEdit::Delegate {
// context
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
// state
std::unique_ptr<Spiller> SpillerInstance;
@@ -213,8 +213,8 @@ bool RABasic::spillInterferences(const LiveInterval &VirtReg,
SmallVector<const LiveInterval *, 8> Intfs;
// Collect interferences assigned to any alias of the physical register.
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
for (const auto *Intf : reverse(Q.interferingVRegs())) {
if (!Intf->isSpillable() || Intf->weight() > VirtReg.weight())
return false;
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
index b1743d3f987d..81f3d2c8099f 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -43,6 +43,7 @@ static cl::opt<bool> EnableLocalReassignment(
"may be compile time intensive"),
cl::init(false));
+namespace llvm {
cl::opt<unsigned> EvictInterferenceCutoff(
"regalloc-eviction-max-interference-cutoff", cl::Hidden,
cl::desc("Number of interferences after which we declare "
@@ -50,6 +51,7 @@ cl::opt<unsigned> EvictInterferenceCutoff(
"is a compilation cost-saving consideration. To "
"disable, pass a very large number."),
cl::init(10));
+}
#define DEBUG_TYPE "regalloc"
#ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL
@@ -100,9 +102,7 @@ template <> Pass *llvm::callDefaultCtor<RegAllocEvictionAdvisorAnalysis>() {
#endif
break;
case RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release:
-#if defined(LLVM_HAVE_TF_AOT)
Ret = createReleaseModeAdvisor();
-#endif
break;
}
if (Ret)
@@ -201,8 +201,8 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
unsigned Cascade = RA.getExtraInfo().getCascadeOrCurrentNext(VirtReg.reg());
EvictionCost Cost;
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
// If there is 10 or more interferences, chances are one is heavier.
const auto &Interferences = Q.interferingVRegs(EvictInterferenceCutoff);
if (Interferences.size() >= EvictInterferenceCutoff)
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
index 46838570a2fc..52dd946a6854 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -121,7 +121,7 @@ public:
protected:
RegAllocEvictionAdvisor(const MachineFunction &MF, const RAGreedy &RA);
- Register canReassign(const LiveInterval &VirtReg, Register PrevReg) const;
+ bool canReassign(const LiveInterval &VirtReg, MCRegister FromReg) const;
// Get the upper limit of elements in the given Order we need to analize.
// TODO: is this heuristic, we could consider learning it.
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 775e66e48406..864beb8720f4 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -75,15 +75,15 @@ namespace {
}
private:
- MachineFrameInfo *MFI;
- MachineRegisterInfo *MRI;
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
+ MachineFrameInfo *MFI = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
RegisterClassInfo RegClassInfo;
const RegClassFilterFunc ShouldAllocateClass;
/// Basic block currently being allocated.
- MachineBasicBlock *MBB;
+ MachineBasicBlock *MBB = nullptr;
/// Maps virtual regs to the frame index where these values are spilled.
IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
@@ -106,7 +106,7 @@ namespace {
}
};
- using LiveRegMap = SparseSet<LiveReg>;
+ using LiveRegMap = SparseSet<LiveReg, identity<unsigned>, uint16_t>;
/// This map contains entries for each virtual register that is currently
/// available in a physical register.
LiveRegMap LiveVirtRegs;
@@ -161,8 +161,8 @@ namespace {
/// Mark a physreg as used in this instruction.
void markRegUsedInInstr(MCPhysReg PhysReg) {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- UsedInInstr.insert(*Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ UsedInInstr.insert(Unit);
}
// Check if physreg is clobbered by instruction's regmask(s).
@@ -176,10 +176,10 @@ namespace {
bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const {
if (LookAtPhysRegUses && isClobberedByRegMasks(PhysReg))
return true;
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- if (UsedInInstr.count(*Units))
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (UsedInInstr.count(Unit))
return true;
- if (LookAtPhysRegUses && PhysRegUses.count(*Units))
+ if (LookAtPhysRegUses && PhysRegUses.count(Unit))
return true;
}
return false;
@@ -188,14 +188,14 @@ namespace {
/// Mark physical register as being used in a register use operand.
/// This is only used by the special livethrough handling code.
void markPhysRegUsedInInstr(MCPhysReg PhysReg) {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- PhysRegUses.insert(*Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ PhysRegUses.insert(Unit);
}
/// Remove mark of physical register being used in the instruction.
void unmarkRegUsedInInstr(MCPhysReg PhysReg) {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units)
- UsedInInstr.erase(*Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ UsedInInstr.erase(Unit);
}
enum : unsigned {
@@ -240,6 +240,8 @@ namespace {
void addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts,
Register Reg) const;
+ void findAndSortDefOperandIndexes(const MachineInstr &MI);
+
void allocateInstruction(MachineInstr &MI);
void handleDebugValue(MachineInstr &MI);
void handleBundle(MachineInstr &MI);
@@ -265,18 +267,18 @@ namespace {
void allocVirtRegUndef(MachineOperand &MO);
void assignDanglingDebugValues(MachineInstr &Def, Register VirtReg,
MCPhysReg Reg);
- void defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
+ bool defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg);
- void defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
+ bool defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg,
bool LookAtPhysRegUses = false);
- void useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg);
+ bool useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg);
MachineBasicBlock::iterator
getMBBBeginInsertionPoint(MachineBasicBlock &MBB,
SmallSet<Register, 2> &PrologLiveIns) const;
void reloadAtBegin(MachineBasicBlock &MBB);
- void setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+ bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
Register traceCopies(Register VirtReg) const;
Register traceCopyChain(Register Reg) const;
@@ -308,13 +310,13 @@ bool RegAllocFast::shouldAllocateRegister(const Register Reg) const {
}
void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) {
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI)
- RegUnitStates[*UI] = NewState;
+ for (MCRegUnit Unit : TRI->regunits(PhysReg))
+ RegUnitStates[Unit] = NewState;
}
bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const {
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- if (RegUnitStates[*UI] != regFree)
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (RegUnitStates[Unit] != regFree)
return false;
}
return true;
@@ -552,7 +554,7 @@ void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) {
if (PhysReg == 0)
continue;
- MCRegister FirstUnit = *MCRegUnitIterator(PhysReg, TRI);
+ MCRegister FirstUnit = *TRI->regunits(PhysReg).begin();
if (RegUnitStates[FirstUnit] == regLiveIn)
continue;
@@ -593,8 +595,7 @@ bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) {
bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) {
bool displacedAny = false;
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- unsigned Unit = *UI;
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
switch (unsigned VirtReg = RegUnitStates[Unit]) {
default: {
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
@@ -623,7 +624,7 @@ bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) {
void RegAllocFast::freePhysReg(MCPhysReg PhysReg) {
LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':');
- MCRegister FirstUnit = *MCRegUnitIterator(PhysReg, TRI);
+ MCRegister FirstUnit = *TRI->regunits(PhysReg).begin();
switch (unsigned VirtReg = RegUnitStates[FirstUnit]) {
case regFree:
LLVM_DEBUG(dbgs() << '\n');
@@ -648,8 +649,8 @@ void RegAllocFast::freePhysReg(MCPhysReg PhysReg) {
/// disabled - it can be allocated directly.
/// \returns spillImpossible when PhysReg or an alias can't be spilled.
unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const {
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- switch (unsigned VirtReg = RegUnitStates[*UI]) {
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ switch (unsigned VirtReg = RegUnitStates[Unit]) {
case regFree:
break;
case regPreAssigned:
@@ -875,10 +876,11 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
/// Variation of defineVirtReg() with special handling for livethrough regs
/// (tied or earlyclobber) that may interfere with preassigned uses.
-void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
+/// \return true if MI's MachineOperands were re-arranged/invalidated.
+bool RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg) {
if (!shouldAllocateRegister(VirtReg))
- return;
+ return false;
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
if (LRI != LiveVirtRegs.end()) {
MCPhysReg PrevReg = LRI->PhysReg;
@@ -909,11 +911,13 @@ void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum,
/// perform an allocation if:
/// - It is a dead definition without any uses.
/// - The value is live out and all uses are in different basic blocks.
-void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
+///
+/// \return true if MI's MachineOperands were re-arranged/invalidated.
+bool RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg, bool LookAtPhysRegUses) {
assert(VirtReg.isVirtual() && "Not a virtual register");
if (!shouldAllocateRegister(VirtReg))
- return;
+ return false;
MachineOperand &MO = MI.getOperand(OpNum);
LiveRegMap::iterator LRI;
bool New;
@@ -948,6 +952,23 @@ void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
<< LRI->Reloaded << '\n');
bool Kill = LRI->LastUse == nullptr;
spill(SpillBefore, VirtReg, PhysReg, Kill, LRI->LiveOut);
+
+ // We need to place additional spills for each indirect destination of an
+ // INLINEASM_BR.
+ if (MI.getOpcode() == TargetOpcode::INLINEASM_BR) {
+ int FI = StackSlotForVirtReg[VirtReg];
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ for (MachineOperand &MO : MI.operands()) {
+ if (MO.isMBB()) {
+ MachineBasicBlock *Succ = MO.getMBB();
+ TII->storeRegToStackSlot(*Succ, Succ->begin(), PhysReg, Kill,
+ FI, &RC, TRI, VirtReg);
+ ++NumStores;
+ Succ->addLiveIn(PhysReg);
+ }
+ }
+ }
+
LRI->LastUse = nullptr;
}
LRI->LiveOut = false;
@@ -957,15 +978,16 @@ void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
BundleVirtRegsMap[VirtReg] = PhysReg;
}
markRegUsedInInstr(PhysReg);
- setPhysReg(MI, MO, PhysReg);
+ return setPhysReg(MI, MO, PhysReg);
}
/// Allocates a register for a VirtReg use.
-void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
+/// \return true if MI's MachineOperands were re-arranged/invalidated.
+bool RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
Register VirtReg) {
assert(VirtReg.isVirtual() && "Not a virtual register");
if (!shouldAllocateRegister(VirtReg))
- return;
+ return false;
MachineOperand &MO = MI.getOperand(OpNum);
LiveRegMap::iterator LRI;
bool New;
@@ -1002,8 +1024,7 @@ void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
if (LRI->Error) {
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
- setPhysReg(MI, MO, *AllocationOrder.begin());
- return;
+ return setPhysReg(MI, MO, *AllocationOrder.begin());
}
}
@@ -1013,18 +1034,17 @@ void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum,
BundleVirtRegsMap[VirtReg] = LRI->PhysReg;
}
markRegUsedInInstr(LRI->PhysReg);
- setPhysReg(MI, MO, LRI->PhysReg);
+ return setPhysReg(MI, MO, LRI->PhysReg);
}
-/// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This
-/// may invalidate any operand pointers. Return true if the operand kills its
-/// register.
-void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
+/// Changes operand OpNum in MI the refer the PhysReg, considering subregs.
+/// \return true if MI's MachineOperands were re-arranged/invalidated.
+bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
MCPhysReg PhysReg) {
if (!MO.getSubReg()) {
MO.setReg(PhysReg);
MO.setIsRenamable(true);
- return;
+ return false;
}
// Handle subregister index.
@@ -1040,7 +1060,8 @@ void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
// register kill.
if (MO.isKill()) {
MI.addRegisterKilled(PhysReg, TRI, true);
- return;
+ // Conservatively assume implicit MOs were re-arranged
+ return true;
}
// A <def,read-undef> of a sub-register requires an implicit def of the full
@@ -1050,7 +1071,10 @@ void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
MI.addRegisterDead(PhysReg, TRI, true);
else
MI.addRegisterDefined(PhysReg, TRI);
+ // Conservatively assume implicit MOs were re-arranged
+ return true;
}
+ return false;
}
#ifndef NDEBUG
@@ -1090,8 +1114,8 @@ void RegAllocFast::dumpState() const {
if (PhysReg != 0) {
assert(Register::isPhysicalRegister(PhysReg) &&
"mapped to physreg");
- for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) {
- assert(RegUnitStates[*UI] == VirtReg && "inverse map valid");
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ assert(RegUnitStates[Unit] == VirtReg && "inverse map valid");
}
}
}
@@ -1130,6 +1154,72 @@ void RegAllocFast::addRegClassDefCounts(std::vector<unsigned> &RegClassDefCounts
}
}
+/// Compute \ref DefOperandIndexes so it contains the indices of "def" operands
+/// that are to be allocated. Those are ordered in a way that small classes,
+/// early clobbers and livethroughs are allocated first.
+void RegAllocFast::findAndSortDefOperandIndexes(const MachineInstr &MI) {
+ DefOperandIndexes.clear();
+
+ // Track number of defs which may consume a register from the class.
+ std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
+ assert(RegClassDefCounts[0] == 0);
+
+ LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n");
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (MO.readsReg()) {
+ if (Reg.isPhysical()) {
+ LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI) << '\n');
+ markPhysRegUsedInInstr(Reg);
+ }
+ }
+
+ if (MO.isDef()) {
+ if (Reg.isVirtual() && shouldAllocateRegister(Reg))
+ DefOperandIndexes.push_back(I);
+
+ addRegClassDefCounts(RegClassDefCounts, Reg);
+ }
+ }
+
+ llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) {
+ const MachineOperand &MO0 = MI.getOperand(I0);
+ const MachineOperand &MO1 = MI.getOperand(I1);
+ Register Reg0 = MO0.getReg();
+ Register Reg1 = MO1.getReg();
+ const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0);
+ const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1);
+
+ // Identify regclass that are easy to use up completely just in this
+ // instruction.
+ unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size();
+ unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size();
+
+ bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()];
+ bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()];
+ if (SmallClass0 > SmallClass1)
+ return true;
+ if (SmallClass0 < SmallClass1)
+ return false;
+
+ // Allocate early clobbers and livethrough operands first.
+ bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() ||
+ (MO0.getSubReg() == 0 && !MO0.isUndef());
+ bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() ||
+ (MO1.getSubReg() == 0 && !MO1.isUndef());
+ if (Livethrough0 > Livethrough1)
+ return true;
+ if (Livethrough0 < Livethrough1)
+ return false;
+
+ // Tie-break rule: operand index.
+ return I0 < I1;
+ });
+}
+
void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// The basic algorithm here is:
// 1. Mark registers of def operands as free
@@ -1201,6 +1291,10 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Allocate virtreg defs.
if (HasDef) {
if (HasVRegDef) {
+ // Note that Implicit MOs can get re-arranged by defineVirtReg(), so loop
+ // multiple times to ensure no operand is missed.
+ bool ReArrangedImplicitOps = true;
+
// Special handling for early clobbers, tied operands or subregister defs:
// Compared to "normal" defs these:
// - Must not use a register that is pre-assigned for a use operand.
@@ -1208,90 +1302,45 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// heuristic to figure out a good operand order before doing
// assignments.
if (NeedToAssignLiveThroughs) {
- DefOperandIndexes.clear();
PhysRegUses.clear();
- // Track number of defs which may consume a register from the class.
- std::vector<unsigned> RegClassDefCounts(TRI->getNumRegClasses(), 0);
- assert(RegClassDefCounts[0] == 0);
-
- LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n");
- for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
- const MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg())
- continue;
- Register Reg = MO.getReg();
- if (MO.readsReg()) {
- if (Reg.isPhysical()) {
- LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI)
- << '\n');
- markPhysRegUsedInInstr(Reg);
+ while (ReArrangedImplicitOps) {
+ ReArrangedImplicitOps = false;
+ findAndSortDefOperandIndexes(MI);
+ for (uint16_t OpIdx : DefOperandIndexes) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
+ unsigned Reg = MO.getReg();
+ if (MO.isEarlyClobber() ||
+ (MO.isTied() && !TiedOpIsUndef(MO, OpIdx)) ||
+ (MO.getSubReg() && !MO.isUndef())) {
+ ReArrangedImplicitOps = defineLiveThroughVirtReg(MI, OpIdx, Reg);
+ } else {
+ ReArrangedImplicitOps = defineVirtReg(MI, OpIdx, Reg);
+ }
+ if (ReArrangedImplicitOps) {
+ // Implicit operands of MI were re-arranged,
+ // re-compute DefOperandIndexes.
+ break;
}
- }
-
- if (MO.isDef()) {
- if (Reg.isVirtual() && shouldAllocateRegister(Reg))
- DefOperandIndexes.push_back(I);
-
- addRegClassDefCounts(RegClassDefCounts, Reg);
- }
- }
-
- llvm::sort(DefOperandIndexes, [&](uint16_t I0, uint16_t I1) {
- const MachineOperand &MO0 = MI.getOperand(I0);
- const MachineOperand &MO1 = MI.getOperand(I1);
- Register Reg0 = MO0.getReg();
- Register Reg1 = MO1.getReg();
- const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0);
- const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1);
-
- // Identify regclass that are easy to use up completely just in this
- // instruction.
- unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size();
- unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size();
-
- bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()];
- bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()];
- if (SmallClass0 > SmallClass1)
- return true;
- if (SmallClass0 < SmallClass1)
- return false;
-
- // Allocate early clobbers and livethrough operands first.
- bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() ||
- (MO0.getSubReg() == 0 && !MO0.isUndef());
- bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() ||
- (MO1.getSubReg() == 0 && !MO1.isUndef());
- if (Livethrough0 > Livethrough1)
- return true;
- if (Livethrough0 < Livethrough1)
- return false;
-
- // Tie-break rule: operand index.
- return I0 < I1;
- });
-
- for (uint16_t OpIdx : DefOperandIndexes) {
- MachineOperand &MO = MI.getOperand(OpIdx);
- LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n');
- unsigned Reg = MO.getReg();
- if (MO.isEarlyClobber() ||
- (MO.isTied() && !TiedOpIsUndef(MO, OpIdx)) ||
- (MO.getSubReg() && !MO.isUndef())) {
- defineLiveThroughVirtReg(MI, OpIdx, Reg);
- } else {
- defineVirtReg(MI, OpIdx, Reg);
}
}
} else {
// Assign virtual register defs.
- for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
- MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isDef())
- continue;
- Register Reg = MO.getReg();
- if (Reg.isVirtual())
- defineVirtReg(MI, I, Reg);
+ while (ReArrangedImplicitOps) {
+ ReArrangedImplicitOps = false;
+ for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ if (Reg.isVirtual()) {
+ ReArrangedImplicitOps = defineVirtReg(MI, I, Reg);
+ if (ReArrangedImplicitOps) {
+ break;
+ }
+ }
+ }
}
}
}
@@ -1304,9 +1353,11 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
if (!MO.isReg() || !MO.isDef())
continue;
+ Register Reg = MO.getReg();
+
// subreg defs don't free the full register. We left the subreg number
// around as a marker in setPhysReg() to recognize this case here.
- if (MO.getSubReg() != 0) {
+ if (Reg.isPhysical() && MO.getSubReg() != 0) {
MO.setSubReg(0);
continue;
}
@@ -1317,7 +1368,6 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Do not free tied operands and early clobbers.
if ((MO.isTied() && !TiedOpIsUndef(MO, I)) || MO.isEarlyClobber())
continue;
- Register Reg = MO.getReg();
if (!Reg)
continue;
if (Reg.isVirtual()) {
@@ -1364,38 +1414,42 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
}
// Allocate virtreg uses and insert reloads as necessary.
+ // Implicit MOs can get moved/removed by useVirtReg(), so loop multiple
+ // times to ensure no operand is missed.
bool HasUndefUse = false;
- for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
- MachineOperand &MO = MI.getOperand(I);
- if (!MO.isReg() || !MO.isUse())
- continue;
- Register Reg = MO.getReg();
- if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
- continue;
-
- if (MO.isUndef()) {
- HasUndefUse = true;
- continue;
- }
-
+ bool ReArrangedImplicitMOs = true;
+ while (ReArrangedImplicitMOs) {
+ ReArrangedImplicitMOs = false;
+ for (unsigned I = 0; I < MI.getNumOperands(); ++I) {
+ MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
+ continue;
- // Populate MayLiveAcrossBlocks in case the use block is allocated before
- // the def block (removing the vreg uses).
- mayLiveIn(Reg);
+ if (MO.isUndef()) {
+ HasUndefUse = true;
+ continue;
+ }
+ // Populate MayLiveAcrossBlocks in case the use block is allocated before
+ // the def block (removing the vreg uses).
+ mayLiveIn(Reg);
- assert(!MO.isInternalRead() && "Bundles not supported");
- assert(MO.readsReg() && "reading use");
- useVirtReg(MI, I, Reg);
+ assert(!MO.isInternalRead() && "Bundles not supported");
+ assert(MO.readsReg() && "reading use");
+ ReArrangedImplicitMOs = useVirtReg(MI, I, Reg);
+ if (ReArrangedImplicitMOs)
+ break;
+ }
}
// Allocate undef operands. This is a separate step because in a situation
// like ` = OP undef %X, %X` both operands need the same register assign
// so we should perform the normal assignment first.
if (HasUndefUse) {
- for (MachineOperand &MO : MI.uses()) {
- if (!MO.isReg() || !MO.isUse())
- continue;
+ for (MachineOperand &MO : MI.all_uses()) {
Register Reg = MO.getReg();
if (!Reg.isVirtual() || !shouldAllocateRegister(Reg))
continue;
@@ -1407,8 +1461,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Free early clobbers.
if (HasEarlyClobber) {
- for (MachineOperand &MO : llvm::reverse(MI.operands())) {
- if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber())
+ for (MachineOperand &MO : llvm::reverse(MI.all_defs())) {
+ if (!MO.isEarlyClobber())
continue;
assert(!MO.getSubReg() && "should be already handled in def processing");
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index b43a4d2a4b85..68f6ea3268a9 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -444,31 +444,27 @@ MCRegister RAGreedy::tryAssign(const LiveInterval &VirtReg,
// Interference eviction
//===----------------------------------------------------------------------===//
-Register RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
- Register PrevReg) const {
- auto Order =
- AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix);
- MCRegister PhysReg;
- for (auto I = Order.begin(), E = Order.end(); I != E && !PhysReg; ++I) {
- if ((*I).id() == PrevReg.id())
- continue;
+bool RegAllocEvictionAdvisor::canReassign(const LiveInterval &VirtReg,
+ MCRegister FromReg) const {
+ auto HasRegUnitInterference = [&](MCRegUnit Unit) {
+ // Instantiate a "subquery", not to be confused with the Queries array.
+ LiveIntervalUnion::Query SubQ(VirtReg, Matrix->getLiveUnions()[Unit]);
+ return SubQ.checkInterference();
+ };
- MCRegUnitIterator Units(*I, TRI);
- for (; Units.isValid(); ++Units) {
- // Instantiate a "subquery", not to be confused with the Queries array.
- LiveIntervalUnion::Query subQ(VirtReg, Matrix->getLiveUnions()[*Units]);
- if (subQ.checkInterference())
- break;
+ for (MCRegister Reg :
+ AllocationOrder::create(VirtReg.reg(), *VRM, RegClassInfo, Matrix)) {
+ if (Reg == FromReg)
+ continue;
+ // If no units have interference, reassignment is possible.
+ if (none_of(TRI->regunits(Reg), HasRegUnitInterference)) {
+ LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
+ << printReg(FromReg, TRI) << " to "
+ << printReg(Reg, TRI) << '\n');
+ return true;
}
- // If no units have interference, break out with the current PhysReg.
- if (!Units.isValid())
- PhysReg = *I;
}
- if (PhysReg)
- LLVM_DEBUG(dbgs() << "can reassign: " << VirtReg << " from "
- << printReg(PrevReg, TRI) << " to "
- << printReg(PhysReg, TRI) << '\n');
- return PhysReg;
+ return false;
}
/// evictInterference - Evict any interferring registers that prevent VirtReg
@@ -487,8 +483,8 @@ void RAGreedy::evictInterference(const LiveInterval &VirtReg,
// Collect all interfering virtregs first.
SmallVector<const LiveInterval *, 8> Intfs;
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
// We usually have the interfering VRegs cached so collectInterferingVRegs()
// should be fast, we may need to recalculate if when different physregs
// overlap the same register unit so we had different SubRanges queried
@@ -1286,10 +1282,12 @@ static LaneBitmask getInstReadLaneMask(const MachineRegisterInfo &MRI,
/// VirtReg.
static bool readsLaneSubset(const MachineRegisterInfo &MRI,
const MachineInstr *MI, const LiveInterval &VirtReg,
- const TargetRegisterInfo *TRI, SlotIndex Use) {
+ const TargetRegisterInfo *TRI, SlotIndex Use,
+ const TargetInstrInfo *TII) {
// Early check the common case.
- if (MI->isCopy() &&
- MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg())
+ auto DestSrc = TII->isCopyInstr(*MI);
+ if (DestSrc &&
+ DestSrc->Destination->getSubReg() == DestSrc->Source->getSubReg())
return false;
// FIXME: We're only considering uses, but should be consider defs too?
@@ -1348,14 +1346,14 @@ unsigned RAGreedy::tryInstructionSplit(const LiveInterval &VirtReg,
// the allocation.
for (const SlotIndex Use : Uses) {
if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) {
- if (MI->isFullCopy() ||
+ if (TII->isFullCopyInstr(*MI) ||
(SplitSubClass &&
SuperRCNumAllocatableRegs ==
getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC,
TII, TRI, RegClassInfo)) ||
// TODO: Handle split for subranges with subclass constraints?
(!SplitSubClass && VirtReg.hasSubRanges() &&
- !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use))) {
+ !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) {
LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI);
continue;
}
@@ -1404,9 +1402,9 @@ void RAGreedy::calcGapWeights(MCRegister PhysReg,
GapWeight.assign(NumGaps, 0.0f);
// Add interference from each overlapping register.
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units)
- .checkInterference())
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ if (!Matrix->query(const_cast<LiveInterval &>(SA->getParent()), Unit)
+ .checkInterference())
continue;
// We know that VirtReg is a continuous interval from FirstInstr to
@@ -1417,7 +1415,7 @@ void RAGreedy::calcGapWeights(MCRegister PhysReg,
// StartIdx and after StopIdx.
//
LiveIntervalUnion::SegmentIter IntI =
- Matrix->getLiveUnions()[*Units] .find(StartIdx);
+ Matrix->getLiveUnions()[Unit].find(StartIdx);
for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
// Skip the gaps before IntI.
while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
@@ -1439,8 +1437,8 @@ void RAGreedy::calcGapWeights(MCRegister PhysReg,
}
// Add fixed interference.
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- const LiveRange &LR = LIS->getRegUnit(*Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ const LiveRange &LR = LIS->getRegUnit(Unit);
LiveRange::const_iterator I = LR.find(StartIdx);
LiveRange::const_iterator E = LR.end();
@@ -1771,8 +1769,8 @@ bool RAGreedy::mayRecolorAllInterferences(
SmallLISet &RecoloringCandidates, const SmallVirtRegSet &FixedRegisters) {
const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg());
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
- LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, Unit);
// If there is LastChanceRecoloringMaxInterference or more interferences,
// chances are one would not be recolorable.
if (Q.interferingVRegs(LastChanceRecoloringMaxInterference).size() >=
@@ -1960,7 +1958,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(const LiveInterval &VirtReg,
// don't add it to NewVRegs because its physical register will be restored
// below. Other vregs in CurrentNewVRegs are created by calling
// selectOrSplit and should be added into NewVRegs.
- for (Register &R : CurrentNewVRegs) {
+ for (Register R : CurrentNewVRegs) {
if (RecoloringCandidates.count(&LIS->getInterval(R)))
continue;
NewVRegs.push_back(R);
@@ -2142,7 +2140,7 @@ void RAGreedy::initializeCSRCost() {
/// \p Out is not cleared before being populated.
void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) {
for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) {
- if (!Instr.isFullCopy())
+ if (!TII->isFullCopyInstr(Instr))
continue;
// Look for the other end of the copy.
Register OtherReg = Instr.getOperand(0).getReg();
@@ -2457,21 +2455,22 @@ RAGreedy::RAGreedyStats RAGreedy::computeStats(MachineBasicBlock &MBB) {
MI.getOpcode() == TargetOpcode::STATEPOINT;
};
for (MachineInstr &MI : MBB) {
- if (MI.isCopy()) {
- const MachineOperand &Dest = MI.getOperand(0);
- const MachineOperand &Src = MI.getOperand(1);
+ auto DestSrc = TII->isCopyInstr(MI);
+ if (DestSrc) {
+ const MachineOperand &Dest = *DestSrc->Destination;
+ const MachineOperand &Src = *DestSrc->Source;
Register SrcReg = Src.getReg();
Register DestReg = Dest.getReg();
// Only count `COPY`s with a virtual register as source or destination.
if (SrcReg.isVirtual() || DestReg.isVirtual()) {
if (SrcReg.isVirtual()) {
SrcReg = VRM->getPhys(SrcReg);
- if (Src.getSubReg())
+ if (SrcReg && Src.getSubReg())
SrcReg = TRI->getSubReg(SrcReg, Src.getSubReg());
}
if (DestReg.isVirtual()) {
DestReg = VRM->getPhys(DestReg);
- if (Dest.getSubReg())
+ if (DestReg && Dest.getSubReg())
DestReg = TRI->getSubReg(DestReg, Dest.getSubReg());
}
if (SrcReg != DestReg)
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h
index e0ac88c0aeb9..0f8f9a7d5811 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.h
+++ b/llvm/lib/CodeGen/RegAllocGreedy.h
@@ -166,20 +166,20 @@ private:
SmallVector<std::pair<const LiveInterval *, MCRegister>, 8>;
// context
- MachineFunction *MF;
+ MachineFunction *MF = nullptr;
// Shortcuts to some useful interface.
- const TargetInstrInfo *TII;
+ const TargetInstrInfo *TII = nullptr;
// analyses
- SlotIndexes *Indexes;
- MachineBlockFrequencyInfo *MBFI;
- MachineDominatorTree *DomTree;
- MachineLoopInfo *Loops;
- MachineOptimizationRemarkEmitter *ORE;
- EdgeBundles *Bundles;
- SpillPlacement *SpillPlacer;
- LiveDebugVariables *DebugVars;
+ SlotIndexes *Indexes = nullptr;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
+ MachineDominatorTree *DomTree = nullptr;
+ MachineLoopInfo *Loops = nullptr;
+ MachineOptimizationRemarkEmitter *ORE = nullptr;
+ EdgeBundles *Bundles = nullptr;
+ SpillPlacement *SpillPlacer = nullptr;
+ LiveDebugVariables *DebugVars = nullptr;
// state
std::unique_ptr<Spiller> SpillerInstance;
@@ -204,7 +204,7 @@ private:
CO_Interf = 2
};
- uint8_t CutOffInfo;
+ uint8_t CutOffInfo = CutOffStage::CO_None;
#ifndef NDEBUG
static const char *const StageName[];
@@ -278,9 +278,9 @@ private:
/// Flags for the live range priority calculation, determined once per
/// machine function.
- bool RegClassPriorityTrumpsGlobalness;
+ bool RegClassPriorityTrumpsGlobalness = false;
- bool ReverseLocalAssignment;
+ bool ReverseLocalAssignment = false;
public:
RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses);
diff --git a/llvm/lib/CodeGen/RegAllocPBQP.cpp b/llvm/lib/CodeGen/RegAllocPBQP.cpp
index b3d926eeb552..925a0f085c4b 100644
--- a/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -634,8 +634,8 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM,
// vregLI overlaps fixed regunit interference.
bool Interference = false;
- for (MCRegUnitIterator Units(PReg, &TRI); Units.isValid(); ++Units) {
- if (VRegLI.overlaps(LIS.getRegUnit(*Units))) {
+ for (MCRegUnit Unit : TRI.regunits(PReg)) {
+ if (VRegLI.overlaps(LIS.getRegUnit(Unit))) {
Interference = true;
break;
}
diff --git a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
index b3a13cc92316..e031019a4c91 100644
--- a/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
+++ b/llvm/lib/CodeGen/RegAllocPriorityAdvisor.cpp
@@ -81,9 +81,7 @@ template <> Pass *llvm::callDefaultCtor<RegAllocPriorityAdvisorAnalysis>() {
#endif
break;
case RegAllocPriorityAdvisorAnalysis::AdvisorMode::Release:
-#if defined(LLVM_HAVE_TF_AOT_REGALLOCPRIORITYMODEL)
Ret = createReleaseModePriorityAdvisor();
-#endif
break;
}
if (Ret)
diff --git a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index 16afd15e29e4..6657cf3c1ef4 100644
--- a/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -208,8 +208,8 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
MCPhysReg Reg = CSRegs[i];
if (SavedRegs.test(Reg)) {
// Save subregisters
- for (MCSubRegIterator SR(Reg, &TRI); SR.isValid(); ++SR)
- SavedRegs.set(*SR);
+ for (MCPhysReg SR : TRI.subregs(Reg))
+ SavedRegs.set(SR);
}
}
}
diff --git a/llvm/lib/CodeGen/RegisterBank.cpp b/llvm/lib/CodeGen/RegisterBank.cpp
index 512b21aeacaf..8e0a0b0dc282 100644
--- a/llvm/lib/CodeGen/RegisterBank.cpp
+++ b/llvm/lib/CodeGen/RegisterBank.cpp
@@ -11,6 +11,7 @@
#include "llvm/CodeGen/RegisterBank.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Debug.h"
@@ -21,15 +22,16 @@ using namespace llvm;
const unsigned RegisterBank::InvalidID = UINT_MAX;
-RegisterBank::RegisterBank(
- unsigned ID, const char *Name, unsigned Size,
- const uint32_t *CoveredClasses, unsigned NumRegClasses)
- : ID(ID), Name(Name), Size(Size) {
+RegisterBank::RegisterBank(unsigned ID, const char *Name,
+ const uint32_t *CoveredClasses,
+ unsigned NumRegClasses)
+ : ID(ID), Name(Name) {
ContainedRegClasses.resize(NumRegClasses);
ContainedRegClasses.setBitsInMask(CoveredClasses);
}
-bool RegisterBank::verify(const TargetRegisterInfo &TRI) const {
+bool RegisterBank::verify(const RegisterBankInfo &RBI,
+ const TargetRegisterInfo &TRI) const {
assert(isValid() && "Invalid register bank");
for (unsigned RCId = 0, End = TRI.getNumRegClasses(); RCId != End; ++RCId) {
const TargetRegisterClass &RC = *TRI.getRegClass(RCId);
@@ -50,7 +52,7 @@ bool RegisterBank::verify(const TargetRegisterInfo &TRI) const {
// Verify that the Size of the register bank is big enough to cover
// all the register classes it covers.
- assert(getSize() >= TRI.getRegSizeInBits(SubRC) &&
+ assert(RBI.getMaximumSize(getID()) >= TRI.getRegSizeInBits(SubRC) &&
"Size is not big enough for all the subclasses!");
assert(covers(SubRC) && "Not all subclasses are covered");
}
@@ -64,7 +66,7 @@ bool RegisterBank::covers(const TargetRegisterClass &RC) const {
}
bool RegisterBank::isValid() const {
- return ID != InvalidID && Name != nullptr && Size != 0 &&
+ return ID != InvalidID && Name != nullptr &&
// A register bank that does not cover anything is useless.
!ContainedRegClasses.empty();
}
@@ -89,7 +91,7 @@ void RegisterBank::print(raw_ostream &OS, bool IsForDebug,
OS << getName();
if (!IsForDebug)
return;
- OS << "(ID:" << getID() << ", Size:" << getSize() << ")\n"
+ OS << "(ID:" << getID() << ")\n"
<< "isValid:" << isValid() << '\n'
<< "Number of Covered register classes: " << ContainedRegClasses.count()
<< '\n';
diff --git a/llvm/lib/CodeGen/RegisterBankInfo.cpp b/llvm/lib/CodeGen/RegisterBankInfo.cpp
index 27ed17b9f4f6..658a09fd8700 100644
--- a/llvm/lib/CodeGen/RegisterBankInfo.cpp
+++ b/llvm/lib/CodeGen/RegisterBankInfo.cpp
@@ -10,6 +10,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/RegisterBankInfo.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
@@ -52,9 +53,11 @@ const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1;
//------------------------------------------------------------------------------
// RegisterBankInfo implementation.
//------------------------------------------------------------------------------
-RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks,
- unsigned NumRegBanks)
- : RegBanks(RegBanks), NumRegBanks(NumRegBanks) {
+RegisterBankInfo::RegisterBankInfo(const RegisterBank **RegBanks,
+ unsigned NumRegBanks, const unsigned *Sizes,
+ unsigned HwMode)
+ : RegBanks(RegBanks), NumRegBanks(NumRegBanks), Sizes(Sizes),
+ HwMode(HwMode) {
#ifndef NDEBUG
for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank");
@@ -70,7 +73,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
assert(Idx == RegBank.getID() &&
"ID does not match the index in the array");
LLVM_DEBUG(dbgs() << "Verify " << RegBank << '\n');
- assert(RegBank.verify(TRI) && "RegBank is invalid");
+ assert(RegBank.verify(*this, TRI) && "RegBank is invalid");
}
#endif // NDEBUG
return true;
@@ -79,31 +82,32 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
const RegisterBank *
RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
- if (Reg.isPhysical()) {
+ if (!Reg.isVirtual()) {
// FIXME: This was probably a copy to a virtual register that does have a
// type we could use.
- return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI), LLT());
+ const TargetRegisterClass *RC = getMinimalPhysRegClass(Reg, TRI);
+ return RC ? &getRegBankFromRegClass(*RC, LLT()) : nullptr;
}
- assert(Reg && "NoRegister does not have a register bank");
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
- if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>())
+ if (auto *RB = dyn_cast_if_present<const RegisterBank *>(RegClassOrBank))
return RB;
- if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
+ if (auto *RC =
+ dyn_cast_if_present<const TargetRegisterClass *>(RegClassOrBank))
return &getRegBankFromRegClass(*RC, MRI.getType(Reg));
return nullptr;
}
-const TargetRegisterClass &
+const TargetRegisterClass *
RegisterBankInfo::getMinimalPhysRegClass(Register Reg,
const TargetRegisterInfo &TRI) const {
assert(Reg.isPhysical() && "Reg must be a physreg");
const auto &RegRCIt = PhysRegMinimalRCs.find(Reg);
if (RegRCIt != PhysRegMinimalRCs.end())
- return *RegRCIt->second;
- const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClass(Reg);
+ return RegRCIt->second;
+ const TargetRegisterClass *PhysRC = TRI.getMinimalPhysRegClassLLT(Reg, LLT());
PhysRegMinimalRCs[Reg] = PhysRC;
- return *PhysRC;
+ return PhysRC;
}
const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
@@ -131,10 +135,10 @@ const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister(
// If the register already has a class, fallback to MRI::constrainRegClass.
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
- if (RegClassOrBank.is<const TargetRegisterClass *>())
+ if (isa<const TargetRegisterClass *>(RegClassOrBank))
return MRI.constrainRegClass(Reg, &RC);
- const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
+ const RegisterBank *RB = cast<const RegisterBank *>(RegClassOrBank);
// Otherwise, all we can do is ensure the bank covers the class, and set it.
if (RB && !RB->covers(RC))
return nullptr;
@@ -498,7 +502,7 @@ unsigned RegisterBankInfo::getSizeInBits(Register Reg,
// Instead, we need to access a register class that contains Reg and
// get the size of that register class.
// Because this is expensive, we'll cache the register class by calling
- auto *RC = &getMinimalPhysRegClass(Reg, TRI);
+ auto *RC = getMinimalPhysRegClass(Reg, TRI);
assert(RC && "Expecting Register class");
return TRI.getRegSizeInBits(*RC);
}
@@ -515,12 +519,14 @@ LLVM_DUMP_METHOD void RegisterBankInfo::PartialMapping::dump() const {
}
#endif
-bool RegisterBankInfo::PartialMapping::verify() const {
+bool RegisterBankInfo::PartialMapping::verify(
+ const RegisterBankInfo &RBI) const {
assert(RegBank && "Register bank not set");
assert(Length && "Empty mapping");
assert((StartIdx <= getHighBitIdx()) && "Overflow, switch to APInt?");
// Check if the minimum width fits into RegBank.
- assert(RegBank->getSize() >= Length && "Register bank too small for Mask");
+ assert(RBI.getMaximumSize(RegBank->getID()) >= Length &&
+ "Register bank too small for Mask");
return true;
}
@@ -545,13 +551,14 @@ bool RegisterBankInfo::ValueMapping::partsAllUniform() const {
return true;
}
-bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
+bool RegisterBankInfo::ValueMapping::verify(const RegisterBankInfo &RBI,
+ unsigned MeaningfulBitWidth) const {
assert(NumBreakDowns && "Value mapped nowhere?!");
unsigned OrigValueBitWidth = 0;
for (const RegisterBankInfo::PartialMapping &PartMap : *this) {
// Check that each register bank is big enough to hold the partial value:
// this check is done by PartialMapping::verify
- assert(PartMap.verify() && "Partial mapping is invalid");
+ assert(PartMap.verify(RBI) && "Partial mapping is invalid");
// The original value should completely be mapped.
// Thus the maximum accessed index + 1 is the size of the original value.
OrigValueBitWidth =
@@ -625,8 +632,9 @@ bool RegisterBankInfo::InstructionMapping::verify(
(void)MOMapping;
// Register size in bits.
// This size must match what the mapping expects.
- assert(MOMapping.verify(RBI->getSizeInBits(
- Reg, MF.getRegInfo(), *MF.getSubtarget().getRegisterInfo())) &&
+ assert(MOMapping.verify(*RBI, RBI->getSizeInBits(
+ Reg, MF.getRegInfo(),
+ *MF.getSubtarget().getRegisterInfo())) &&
"Value mapping is invalid");
}
return true;
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index ab1215974fc5..e49885b6ad96 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -116,7 +116,7 @@ static cl::opt<unsigned> LargeIntervalFreqThreshold(
cl::desc("For a large interval, if it is coalesed with other live "
"intervals many times more than the threshold, stop its "
"coalescing to control the compile time. "),
- cl::init(100));
+ cl::init(256));
namespace {
@@ -153,12 +153,6 @@ namespace {
using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>;
DenseMap<Register, std::vector<DbgValueLoc>> DbgVRegToValues;
- /// VRegs may be repeatedly coalesced, and have many DBG_VALUEs attached.
- /// To avoid repeatedly merging sets of DbgValueLocs, instead record
- /// which vregs have been coalesced, and where to. This map is from
- /// vreg => {set of vregs merged in}.
- DenseMap<Register, SmallVector<Register, 4>> DbgMergedVRegNums;
-
/// A LaneMask to remember on which subregister live ranges we need to call
/// shrinkToUses() later.
LaneBitmask ShrinkMask;
@@ -404,14 +398,14 @@ char RegisterCoalescer::ID = 0;
char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
-INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
- "Simple Register Coalescing", false, false)
+INITIALIZE_PASS_BEGIN(RegisterCoalescer, "register-coalescer",
+ "Register Coalescer", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
- "Simple Register Coalescing", false, false)
+INITIALIZE_PASS_END(RegisterCoalescer, "register-coalescer",
+ "Register Coalescer", false, false)
[[nodiscard]] static bool isMoveInstr(const TargetRegisterInfo &tri,
const MachineInstr *MI, Register &Src,
@@ -1257,8 +1251,8 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
static bool definesFullReg(const MachineInstr &MI, Register Reg) {
assert(!Reg.isPhysical() && "This code cannot handle physreg aliasing");
- for (const MachineOperand &Op : MI.operands()) {
- if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
+ for (const MachineOperand &Op : MI.all_defs()) {
+ if (Op.getReg() != Reg)
continue;
// Return true if we define the full register or don't care about the value
// inside other subregisters.
@@ -1502,11 +1496,18 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
LLVM_DEBUG(dbgs()
<< "Removing undefined SubRange "
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
- // VNI is in ValNo - remove any segments in this SubRange that have this ValNo
+
if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
+ // VNI is in ValNo - remove any segments in this SubRange that have
+ // this ValNo
SR.removeValNo(RmValNo);
- UpdatedSubRanges = true;
}
+
+ // We may not have a defined value at this point, but still need to
+ // clear out any empty subranges tentatively created by
+ // updateRegDefUses. The original subrange def may have only undefed
+ // some lanes.
+ UpdatedSubRanges = true;
} else {
// We know that this lane is defined by this instruction,
// but at this point it may be empty because it is not used by
@@ -1545,9 +1546,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// no live-ranges would have been created for ECX.
// Fix that!
SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
- for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI);
- Units.isValid(); ++Units)
- if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
+ for (MCRegUnit Unit : TRI->regunits(NewMI.getOperand(0).getReg()))
+ if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}
@@ -1561,8 +1561,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
MCRegister Reg = NewMIImplDefs[i];
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
- if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ if (LiveRange *LR = LIS->getCachedRegUnit(Unit))
LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
}
@@ -1713,8 +1713,8 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
// is still part of the function (but about to be erased), mark all
// defs of DstReg in it as <undef>, so that shrinkToUses would
// ignore them.
- for (MachineOperand &MO : CopyMI->operands())
- if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
+ for (MachineOperand &MO : CopyMI->all_defs())
+ if (MO.getReg() == DstReg)
MO.setIsUndef(true);
LIS->shrinkToUses(&DstLI);
@@ -2164,14 +2164,14 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// Deny any overlapping intervals. This depends on all the reserved
// register live ranges to look like dead defs.
if (!MRI->isConstantPhysReg(DstReg)) {
- for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
+ for (MCRegUnit Unit : TRI->regunits(DstReg)) {
// Abort if not all the regunits are reserved.
- for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
+ for (MCRegUnitRootIterator RI(Unit, TRI); RI.isValid(); ++RI) {
if (!MRI->isReserved(*RI))
return false;
}
- if (RHS.overlaps(LIS->getRegUnit(*UI))) {
- LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI)
+ if (RHS.overlaps(LIS->getRegUnit(Unit))) {
+ LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(Unit, TRI)
<< '\n');
return false;
}
@@ -2202,6 +2202,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
// ...
// use %physreg_x
CopyMI = MRI->getVRegDef(SrcReg);
+ deleteInstr(CopyMI);
} else {
// VReg is copied into physreg:
// %y = def
@@ -2246,15 +2247,15 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
<< printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
LIS->removePhysRegDefAt(DstReg.asMCReg(), CopyRegIdx);
+ deleteInstr(CopyMI);
+
// Create a new dead def at the new def location.
- for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
- LiveRange &LR = LIS->getRegUnit(*UI);
+ for (MCRegUnit Unit : TRI->regunits(DstReg)) {
+ LiveRange &LR = LIS->getRegUnit(Unit);
LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
}
}
- deleteInstr(CopyMI);
-
// We don't track kills for reserved registers.
MRI->clearKillFlags(CP.getSrcReg());
@@ -2569,8 +2570,8 @@ public:
LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
const {
LaneBitmask L;
- for (const MachineOperand &MO : DefMI->operands()) {
- if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
+ for (const MachineOperand &MO : DefMI->all_defs()) {
+ if (MO.getReg() != Reg)
continue;
L |= TRI->getSubRegIndexLaneMask(
TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
@@ -2786,13 +2787,22 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
//
// When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
// to erase the IMPLICIT_DEF instruction.
- if (DefMI &&
- DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
+ MachineBasicBlock *OtherMBB = Indexes->getMBBFromIndex(V.OtherVNI->def);
+ if (DefMI && DefMI->getParent() != OtherMBB) {
LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
<< " extends into "
<< printMBBReference(*DefMI->getParent())
<< ", keeping it.\n");
OtherV.ErasableImplicitDef = false;
+ } else if (OtherMBB->hasEHPadSuccessor()) {
+ // If OtherV is defined in a basic block that has EH pad successors then
+ // we get the same problem not just if OtherV is live beyond its basic
+ // block, but beyond the last call instruction in its basic block. Handle
+ // this case conservatively.
+ LLVM_DEBUG(
+ dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
+ << " may be live into EH pad successors, keeping it.\n");
+ OtherV.ErasableImplicitDef = false;
} else {
// We deferred clearing these lanes in case we needed to save them
OtherV.ValidLanes &= ~OtherV.WriteLanes;
@@ -2952,7 +2962,7 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
// its lanes.
if (OtherV.ErasableImplicitDef &&
TrackSubRegLiveness &&
- (OtherV.WriteLanes & ~V.ValidLanes).any()) {
+ (OtherV.ValidLanes & ~V.ValidLanes).any()) {
LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n");
OtherV.ErasableImplicitDef = false;
@@ -3029,8 +3039,8 @@ bool JoinVals::usesLanes(const MachineInstr &MI, Register Reg, unsigned SubIdx,
LaneBitmask Lanes) const {
if (MI.isDebugOrPseudoInstr())
return false;
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg)
+ for (const MachineOperand &MO : MI.all_uses()) {
+ if (MO.getReg() != Reg)
continue;
if (!MO.readsReg())
continue;
@@ -3759,18 +3769,9 @@ void RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP,
checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals);
};
- // Scan for potentially unsound DBG_VALUEs: examine first the register number
- // Reg, and then any other vregs that may have been merged into it.
- auto PerformScan = [this](Register Reg, std::function<void(Register)> Func) {
- Func(Reg);
- if (DbgMergedVRegNums.count(Reg))
- for (Register X : DbgMergedVRegNums[Reg])
- Func(X);
- };
-
// Scan for unsound updates of both the source and destination register.
- PerformScan(CP.getSrcReg(), ScanForSrcReg);
- PerformScan(CP.getDstReg(), ScanForDstReg);
+ ScanForSrcReg(CP.getSrcReg());
+ ScanForDstReg(CP.getDstReg());
}
void RegisterCoalescer::checkMergingChangesDbgValuesImpl(Register Reg,
@@ -4099,7 +4100,7 @@ void RegisterCoalescer::releaseMemory() {
}
bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
- LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+ LLVM_DEBUG(dbgs() << "********** REGISTER COALESCER **********\n"
<< "********** Function: " << fn.getName() << '\n');
// Variables changed between a setjmp and a longjump can have undefined value
@@ -4151,7 +4152,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
MF->verify(this, "Before register coalescing");
DbgVRegToValues.clear();
- DbgMergedVRegNums.clear();
buildVRegToDbgValueMap(fn);
RegClassInfo.runOnMachineFunction(fn);
diff --git a/llvm/lib/CodeGen/RegisterPressure.cpp b/llvm/lib/CodeGen/RegisterPressure.cpp
index d4c29f96a4f9..f86aa3a16720 100644
--- a/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -521,9 +521,8 @@ class RegisterOperandsCollector {
if (Reg.isVirtual()) {
addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll()));
} else if (MRI.isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid();
- ++Units)
- addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
+ for (MCRegUnit Unit : TRI.regunits(Reg.asMCReg()))
+ addRegLanes(RegUnits, RegisterMaskPair(Unit, LaneBitmask::getAll()));
}
}
@@ -557,9 +556,8 @@ class RegisterOperandsCollector {
: MRI.getMaxLaneMaskForVReg(Reg);
addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask));
} else if (MRI.isAllocatable(Reg)) {
- for (MCRegUnitIterator Units(Reg.asMCReg(), &TRI); Units.isValid();
- ++Units)
- addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
+ for (MCRegUnit Unit : TRI.regunits(Reg.asMCReg()))
+ addRegLanes(RegUnits, RegisterMaskPair(Unit, LaneBitmask::getAll()));
}
}
};
diff --git a/llvm/lib/CodeGen/RegisterScavenging.cpp b/llvm/lib/CodeGen/RegisterScavenging.cpp
index 8d10a5558315..c00d3fde6426 100644
--- a/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -96,13 +96,13 @@ void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
}
void RegScavenger::addRegUnits(BitVector &BV, MCRegister Reg) {
- for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
- BV.set(*RUI);
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ BV.set(Unit);
}
void RegScavenger::removeRegUnits(BitVector &BV, MCRegister Reg) {
- for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
- BV.reset(*RUI);
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ BV.reset(Unit);
}
void RegScavenger::determineKillsAndDefs() {
@@ -198,25 +198,13 @@ void RegScavenger::forward() {
// S1 is can be freely clobbered.
// Ideally we would like a way to model this, but leaving the
// insert_subreg around causes both correctness and performance issues.
- bool SubUsed = false;
- for (const MCPhysReg &SubReg : TRI->subregs(Reg))
- if (isRegUsed(SubReg)) {
- SubUsed = true;
- break;
- }
- bool SuperUsed = false;
- for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) {
- if (isRegUsed(*SR)) {
- SuperUsed = true;
- break;
- }
- }
- if (!SubUsed && !SuperUsed) {
+ if (none_of(TRI->subregs(Reg),
+ [&](MCPhysReg SR) { return isRegUsed(SR); }) &&
+ none_of(TRI->superregs(Reg),
+ [&](MCPhysReg SR) { return isRegUsed(SR); })) {
MBB->getParent()->verify(nullptr, "In Register Scavenger");
llvm_unreachable("Using an undefined register!");
}
- (void)SubUsed;
- (void)SuperUsed;
}
} else {
assert(MO.isDef());
@@ -282,70 +270,6 @@ BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
return Mask;
}
-Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
- BitVector &Candidates,
- unsigned InstrLimit,
- MachineBasicBlock::iterator &UseMI) {
- int Survivor = Candidates.find_first();
- assert(Survivor > 0 && "No candidates for scavenging");
-
- MachineBasicBlock::iterator ME = MBB->getFirstTerminator();
- assert(StartMI != ME && "MI already at terminator");
- MachineBasicBlock::iterator RestorePointMI = StartMI;
- MachineBasicBlock::iterator MI = StartMI;
-
- bool inVirtLiveRange = false;
- for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
- if (MI->isDebugOrPseudoInstr()) {
- ++InstrLimit; // Don't count debug instructions
- continue;
- }
- bool isVirtKillInsn = false;
- bool isVirtDefInsn = false;
- // Remove any candidates touched by instruction.
- for (const MachineOperand &MO : MI->operands()) {
- if (MO.isRegMask())
- Candidates.clearBitsNotInMask(MO.getRegMask());
- if (!MO.isReg() || MO.isUndef() || !MO.getReg())
- continue;
- if (MO.getReg().isVirtual()) {
- if (MO.isDef())
- isVirtDefInsn = true;
- else if (MO.isKill())
- isVirtKillInsn = true;
- continue;
- }
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
- Candidates.reset(*AI);
- }
- // If we're not in a virtual reg's live range, this is a valid
- // restore point.
- if (!inVirtLiveRange) RestorePointMI = MI;
-
- // Update whether we're in the live range of a virtual register
- if (isVirtKillInsn) inVirtLiveRange = false;
- if (isVirtDefInsn) inVirtLiveRange = true;
-
- // Was our survivor untouched by this instruction?
- if (Candidates.test(Survivor))
- continue;
-
- // All candidates gone?
- if (Candidates.none())
- break;
-
- Survivor = Candidates.find_first();
- }
- // If we ran off the end, that's where we want to restore.
- if (MI == ME) RestorePointMI = ME;
- assert(RestorePointMI != StartMI &&
- "No available scavenger restore location!");
-
- // We ran out of candidates, so stop the search.
- UseMI = RestorePointMI;
- return Survivor;
-}
-
/// Given the bitvector \p Available of free register units at position
/// \p From. Search backwards to find a register that is part of \p
/// Candidates and not used/clobbered until the point \p To. If there is
@@ -522,73 +446,6 @@ RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
return Scavenged[SI];
}
-Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
- MachineBasicBlock::iterator I,
- int SPAdj, bool AllowSpill) {
- MachineInstr &MI = *I;
- const MachineFunction &MF = *MI.getMF();
- // Consider all allocatable registers in the register class initially
- BitVector Candidates = TRI->getAllocatableSet(MF, RC);
-
- // Exclude all the registers being used by the instruction.
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
- !MO.getReg().isVirtual())
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
- Candidates.reset(*AI);
- }
-
- // If we have already scavenged some registers, remove them from the
- // candidates. If we end up recursively calling eliminateFrameIndex, we don't
- // want to be clobbering previously scavenged registers or their associated
- // stack slots.
- for (ScavengedInfo &SI : Scavenged) {
- if (SI.Reg) {
- if (isRegUsed(SI.Reg)) {
- LLVM_DEBUG(
- dbgs() << "Removing " << printReg(SI.Reg, TRI) <<
- " from scavenging candidates since it was already scavenged\n");
- for (MCRegAliasIterator AI(SI.Reg, TRI, true); AI.isValid(); ++AI)
- Candidates.reset(*AI);
- }
- }
- }
-
- // Try to find a register that's unused if there is one, as then we won't
- // have to spill.
- BitVector Available = getRegsAvailable(RC);
- Available &= Candidates;
- if (Available.any())
- Candidates = Available;
-
- // Find the register whose use is furthest away.
- MachineBasicBlock::iterator UseMI;
- Register SReg = findSurvivorReg(I, Candidates, 25, UseMI);
-
- // If we found an unused register there is no reason to spill it.
- if (!isRegUsed(SReg)) {
- LLVM_DEBUG(dbgs() << "Scavenged register: " << printReg(SReg, TRI) << "\n");
- return SReg;
- }
-
- if (!AllowSpill)
- return 0;
-
-#ifndef NDEBUG
- for (ScavengedInfo &SI : Scavenged) {
- assert(SI.Reg != SReg && "scavenged a previously scavenged register");
- }
-#endif
-
- ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI);
- Scavenged.Restore = &*std::prev(UseMI);
-
- LLVM_DEBUG(dbgs() << "Scavenged register (with spill): "
- << printReg(SReg, TRI) << "\n");
-
- return SReg;
-}
-
Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator To,
bool RestoreAfter, int SPAdj,
diff --git a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index 05bbd1a2d03b..bc3ef1c0329a 100644
--- a/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -101,9 +101,9 @@ private:
const SmallVectorImpl<LiveInterval*> &Intervals) const;
- LiveIntervals *LIS;
- MachineRegisterInfo *MRI;
- const TargetInstrInfo *TII;
+ LiveIntervals *LIS = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
};
} // end anonymous namespace
@@ -249,7 +249,7 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
/// Undef use operands are not tracked in the equivalence class,
/// but need to be updated if they are tied; take care to only
/// update the tied operand.
- unsigned OperandNo = MI->getOperandNo(&MO);
+ unsigned OperandNo = MO.getOperandNo();
unsigned TiedIdx = MI->findTiedOperandIdx(OperandNo);
MI->getOperand(TiedIdx).setReg(VReg);
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
index 87b8ac59bdba..57cd1fcffb61 100644
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -229,8 +229,6 @@ void ReplaceWithVeclibLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<TargetLibraryInfoWrapperPass>();
AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<LoopAccessLegacyAnalysis>();
- AU.addPreserved<DemandedBitsWrapperPass>();
AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
AU.addPreserved<GlobalsAAWrapperPass>();
}
diff --git a/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
index 0ad6ef84220a..11bdf3bb2ba8 100644
--- a/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
#define DEBUG_TYPE "reset-machine-function"
@@ -68,6 +69,10 @@ namespace {
MF.reset();
MF.initTargetMachineFunctionInfo(MF.getSubtarget());
+ const LLVMTargetMachine &TM = MF.getTarget();
+ // MRI callback for target specific initializations.
+ TM.registerMachineRegisterInfoCallback(MF);
+
if (EmitFallbackDiag) {
const Function &F = MF.getFunction();
DiagnosticInfoISelFallback DiagFallback(F);
diff --git a/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp b/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
index dd70a2f23e45..cc29bdce1210 100644
--- a/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
+++ b/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
@@ -52,12 +52,13 @@ bool MachineSanitizerBinaryMetadata::runOnMachineFunction(MachineFunction &MF) {
if (!MD)
return false;
const auto &Section = *cast<MDString>(MD->getOperand(0));
- if (!Section.getString().equals(kSanitizerBinaryMetadataCoveredSection))
+ if (!Section.getString().startswith(kSanitizerBinaryMetadataCoveredSection))
return false;
auto &AuxMDs = *cast<MDTuple>(MD->getOperand(1));
// Assume it currently only has features.
assert(AuxMDs.getNumOperands() == 1);
- auto *Features = cast<ConstantAsMetadata>(AuxMDs.getOperand(0))->getValue();
+ Constant *Features =
+ cast<ConstantAsMetadata>(AuxMDs.getOperand(0))->getValue();
if (!Features->getUniqueInteger()[kSanitizerBinaryMetadataUARBit])
return false;
// Calculate size of stack args for the function.
@@ -69,12 +70,18 @@ bool MachineSanitizerBinaryMetadata::runOnMachineFunction(MachineFunction &MF) {
Align = std::max(Align, MFI.getObjectAlign(i).value());
}
Size = (Size + Align - 1) & ~(Align - 1);
+ if (!Size)
+ return false;
+ // Non-zero size, update metadata.
auto &F = MF.getFunction();
IRBuilder<> IRB(F.getContext());
MDBuilder MDB(F.getContext());
// Keep the features and append size of stack args to the metadata.
- F.setMetadata(LLVMContext::MD_pcsections,
- MDB.createPCSections(
- {{Section.getString(), {Features, IRB.getInt32(Size)}}}));
+ APInt NewFeatures = Features->getUniqueInteger();
+ NewFeatures.setBit(kSanitizerBinaryMetadataUARHasSizeBit);
+ F.setMetadata(
+ LLVMContext::MD_pcsections,
+ MDB.createPCSections({{Section.getString(),
+ {IRB.getInt(NewFeatures), IRB.getInt32(Size)}}}));
return false;
}
diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp
index 696b29018ae6..14ec41920e3e 100644
--- a/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -183,8 +183,6 @@ void SUnit::removePred(const SDep &D) {
SUnit *N = D.getSUnit();
SmallVectorImpl<SDep>::iterator Succ = llvm::find(N->Succs, P);
assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
- N->Succs.erase(Succ);
- Preds.erase(I);
// Update the bookkeeping.
if (P.getKind() == SDep::Data) {
assert(NumPreds > 0 && "NumPreds will underflow!");
@@ -193,21 +191,25 @@ void SUnit::removePred(const SDep &D) {
--N->NumSuccs;
}
if (!N->isScheduled) {
- if (D.isWeak())
+ if (D.isWeak()) {
+ assert(WeakPredsLeft > 0 && "WeakPredsLeft will underflow!");
--WeakPredsLeft;
- else {
+ } else {
assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!");
--NumPredsLeft;
}
}
if (!isScheduled) {
- if (D.isWeak())
+ if (D.isWeak()) {
+ assert(WeakSuccsLeft > 0 && "WeakSuccsLeft will underflow!");
--N->WeakSuccsLeft;
- else {
+ } else {
assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!");
--N->NumSuccsLeft;
}
}
+ N->Succs.erase(Succ);
+ Preds.erase(I);
if (P.getLatency() != 0) {
this->setDepthDirty();
N->setHeightDirty();
@@ -722,6 +724,8 @@ void ScheduleDAGTopologicalSort::AddSUnitWithoutPredecessors(const SUnit *SU) {
bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
const SUnit *TargetSU) {
+ assert(TargetSU != nullptr && "Invalid target SUnit");
+ assert(SU != nullptr && "Invalid SUnit");
FixOrder();
// If insertion of the edge SU->TargetSU would create a cycle
// then there is a path from TargetSU to SU.
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 1b213e87e75c..239b44857c28 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -208,13 +208,12 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
ExitSU.setInstr(ExitMI);
// Add dependencies on the defs and uses of the instruction.
if (ExitMI) {
- for (const MachineOperand &MO : ExitMI->operands()) {
- if (!MO.isReg() || MO.isDef()) continue;
+ for (const MachineOperand &MO : ExitMI->all_uses()) {
Register Reg = MO.getReg();
if (Reg.isPhysical()) {
Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
} else if (Reg.isVirtual() && MO.readsReg()) {
- addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
+ addVRegUseDeps(&ExitSU, MO.getOperandNo());
}
}
}
@@ -334,11 +333,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
addPhysRegDataDeps(SU, OperIdx);
// Clear previous uses and defs of this register and its subergisters.
- for (MCSubRegIterator SubReg(Reg, TRI, true); SubReg.isValid(); ++SubReg) {
- if (Uses.contains(*SubReg))
- Uses.eraseAll(*SubReg);
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg)) {
+ if (Uses.contains(SubReg))
+ Uses.eraseAll(SubReg);
if (!MO.isDead())
- Defs.eraseAll(*SubReg);
+ Defs.eraseAll(SubReg);
}
if (MO.isDead() && SU->isCall) {
// Calls will not be reordered because of chain dependencies (see
@@ -1026,15 +1025,14 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
void ScheduleDAGInstrs::Value2SUsMap::dump() {
for (const auto &[ValType, SUs] : *this) {
- if (ValType.is<const Value*>()) {
- const Value *V = ValType.get<const Value*>();
+ if (isa<const Value *>(ValType)) {
+ const Value *V = cast<const Value *>(ValType);
if (isa<UndefValue>(V))
dbgs() << "Unknown";
else
V->printAsOperand(dbgs());
- }
- else if (ValType.is<const PseudoSourceValue*>())
- dbgs() << ValType.get<const PseudoSourceValue*>();
+ } else if (isa<const PseudoSourceValue *>(ValType))
+ dbgs() << cast<const PseudoSourceValue *>(ValType);
else
llvm_unreachable("Unknown Value type.");
@@ -1522,7 +1520,7 @@ LLVM_DUMP_METHOD void ILPValue::dump() const {
namespace llvm {
-LLVM_DUMP_METHOD
+LLVM_ATTRIBUTE_UNUSED
raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
Val.print(OS);
return OS;
diff --git a/llvm/lib/CodeGen/SelectOptimize.cpp b/llvm/lib/CodeGen/SelectOptimize.cpp
index 5fd78eccf732..30d959704745 100644
--- a/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -98,15 +99,15 @@ namespace {
class SelectOptimize : public FunctionPass {
const TargetMachine *TM = nullptr;
- const TargetSubtargetInfo *TSI;
+ const TargetSubtargetInfo *TSI = nullptr;
const TargetLowering *TLI = nullptr;
const TargetTransformInfo *TTI = nullptr;
- const LoopInfo *LI;
- DominatorTree *DT;
+ const LoopInfo *LI = nullptr;
+ DominatorTree *DT = nullptr;
std::unique_ptr<BlockFrequencyInfo> BFI;
std::unique_ptr<BranchProbabilityInfo> BPI;
- ProfileSummaryInfo *PSI;
- OptimizationRemarkEmitter *ORE;
+ ProfileSummaryInfo *PSI = nullptr;
+ OptimizationRemarkEmitter *ORE = nullptr;
TargetSchedModel TSchedModel;
public:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0a3ebd73d272..de909cc10795 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -30,11 +30,14 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/ByteProvider.h"
#include "llvm/CodeGen/DAGCombine.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
@@ -57,7 +60,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -169,7 +171,8 @@ namespace {
/// them) when they are deleted from the underlying DAG. It relies on
/// stable indices of nodes within the worklist.
DenseMap<SDNode *, unsigned> WorklistMap;
- /// This records all nodes attempted to add to the worklist since we
+
+ /// This records all nodes attempted to be added to the worklist since we
/// considered a new worklist entry. As we keep do not add duplicate nodes
/// in the worklist, this is different from the tail of the worklist.
SmallSetVector<SDNode *, 32> PruningList;
@@ -262,7 +265,7 @@ namespace {
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
- void AddToWorklist(SDNode *N) {
+ void AddToWorklist(SDNode *N, bool IsCandidateForPruning = true) {
assert(N->getOpcode() != ISD::DELETED_NODE &&
"Deleted Node added to Worklist");
@@ -271,7 +274,8 @@ namespace {
if (N->getOpcode() == ISD::HANDLENODE)
return;
- ConsiderForPruning(N);
+ if (IsCandidateForPruning)
+ ConsiderForPruning(N);
if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
Worklist.push_back(N);
@@ -362,6 +366,11 @@ namespace {
SDValue SplitIndexingFromLoad(LoadSDNode *LD);
bool SliceUpLoad(SDNode *N);
+ // Looks up the chain to find a unique (unaliased) store feeding the passed
+ // load. If no such store is found, returns a nullptr.
+ // Note: This will look past a CALLSEQ_START if the load is chained to it so
+ // so that it can find stack stores for byval params.
+ StoreSDNode *getUniqueStoreFeeding(LoadSDNode *LD, int64_t &Offset);
// Scalars have size 0 to distinguish from singleton vectors.
SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
@@ -417,11 +426,12 @@ namespace {
SDValue visitSUBC(SDNode *N);
SDValue visitSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
- SDValue visitADDCARRY(SDNode *N);
+ SDValue visitUADDO_CARRY(SDNode *N);
SDValue visitSADDO_CARRY(SDNode *N);
- SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
+ SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
+ SDNode *N);
SDValue visitSUBE(SDNode *N);
- SDValue visitSUBCARRY(SDNode *N);
+ SDValue visitUSUBO_CARRY(SDNode *N);
SDValue visitSSUBO_CARRY(SDNode *N);
SDValue visitMUL(SDNode *N);
SDValue visitMULFIX(SDNode *N);
@@ -434,6 +444,7 @@ namespace {
SDValue visitMULHU(SDNode *N);
SDValue visitMULHS(SDNode *N);
SDValue visitAVG(SDNode *N);
+ SDValue visitABD(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
SDValue visitMULO(SDNode *N);
@@ -476,10 +487,12 @@ namespace {
SDValue visitFREEZE(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
+ SDValue visitVP_FADD(SDNode *N);
+ SDValue visitVP_FSUB(SDNode *N);
SDValue visitSTRICT_FADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
SDValue visitFMUL(SDNode *N);
- SDValue visitFMA(SDNode *N);
+ template <class MatchContextClass> SDValue visitFMA(SDNode *N);
SDValue visitFDIV(SDNode *N);
SDValue visitFREM(SDNode *N);
SDValue visitFSQRT(SDNode *N);
@@ -495,6 +508,7 @@ namespace {
SDValue visitFABS(SDNode *N);
SDValue visitFCEIL(SDNode *N);
SDValue visitFTRUNC(SDNode *N);
+ SDValue visitFFREXP(SDNode *N);
SDValue visitFFLOOR(SDNode *N);
SDValue visitFMinMax(SDNode *N);
SDValue visitBRCOND(SDNode *N);
@@ -503,6 +517,7 @@ namespace {
SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
+ SDValue replaceStoreOfInsertLoad(StoreSDNode *ST);
bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N);
@@ -527,8 +542,12 @@ namespace {
SDValue visitFP_TO_BF16(SDNode *N);
SDValue visitVECREDUCE(SDNode *N);
SDValue visitVPOp(SDNode *N);
+ SDValue visitGET_FPENV_MEM(SDNode *N);
+ SDValue visitSET_FPENV_MEM(SDNode *N);
+ template <class MatchContextClass>
SDValue visitFADDForFMACombine(SDNode *N);
+ template <class MatchContextClass>
SDValue visitFSUBForFMACombine(SDNode *N);
SDValue visitFMULForFMADistributiveCombine(SDNode *N);
@@ -539,9 +558,12 @@ namespace {
SDValue N0,
SDValue N1);
SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
- SDValue N1);
+ SDValue N1, SDNodeFlags Flags);
SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags);
+ SDValue reassociateReduction(unsigned ResOpc, unsigned Opc, const SDLoc &DL,
+ EVT VT, SDValue N0, SDValue N1,
+ SDNodeFlags Flags = SDNodeFlags());
SDValue visitShiftByConstant(SDNode *N);
@@ -579,11 +601,15 @@ namespace {
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
+ SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI);
+
SDValue CombineExtLoad(SDNode *N);
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
+ SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
@@ -713,6 +739,11 @@ namespace {
SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumStores);
+ /// Helper function for mergeConsecutiveStores which checks if all the store
+ /// nodes have the same underlying object. We can still reuse the first
+ /// store's pointer info if all the stores are from the same object.
+ bool hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes);
+
/// This is a helper function for mergeConsecutiveStores. When the source
/// elements of the consecutive stores are all constants or all extracted
/// vector elements, try to merge them into one larger store introducing
@@ -841,6 +872,138 @@ public:
void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
};
+class EmptyMatchContext {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+
+public:
+ EmptyMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
+ : DAG(DAG), TLI(TLI) {}
+
+ bool match(SDValue OpN, unsigned Opcode) const {
+ return Opcode == OpN->getOpcode();
+ }
+
+ // Same as SelectionDAG::getNode().
+ template <typename... ArgT> SDValue getNode(ArgT &&...Args) {
+ return DAG.getNode(std::forward<ArgT>(Args)...);
+ }
+
+ bool isOperationLegalOrCustom(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ return TLI.isOperationLegalOrCustom(Op, VT, LegalOnly);
+ }
+};
+
+class VPMatchContext {
+ SelectionDAG &DAG;
+ const TargetLowering &TLI;
+ SDValue RootMaskOp;
+ SDValue RootVectorLenOp;
+
+public:
+ VPMatchContext(SelectionDAG &DAG, const TargetLowering &TLI, SDNode *Root)
+ : DAG(DAG), TLI(TLI), RootMaskOp(), RootVectorLenOp() {
+ assert(Root->isVPOpcode());
+ if (auto RootMaskPos = ISD::getVPMaskIdx(Root->getOpcode()))
+ RootMaskOp = Root->getOperand(*RootMaskPos);
+
+ if (auto RootVLenPos =
+ ISD::getVPExplicitVectorLengthIdx(Root->getOpcode()))
+ RootVectorLenOp = Root->getOperand(*RootVLenPos);
+ }
+
+ /// whether \p OpVal is a node that is functionally compatible with the
+ /// NodeType \p Opc
+ bool match(SDValue OpVal, unsigned Opc) const {
+ if (!OpVal->isVPOpcode())
+ return OpVal->getOpcode() == Opc;
+
+ auto BaseOpc = ISD::getBaseOpcodeForVP(OpVal->getOpcode(),
+ !OpVal->getFlags().hasNoFPExcept());
+ if (BaseOpc != Opc)
+ return false;
+
+ // Make sure the mask of OpVal is true mask or is same as Root's.
+ unsigned VPOpcode = OpVal->getOpcode();
+ if (auto MaskPos = ISD::getVPMaskIdx(VPOpcode)) {
+ SDValue MaskOp = OpVal.getOperand(*MaskPos);
+ if (RootMaskOp != MaskOp &&
+ !ISD::isConstantSplatVectorAllOnes(MaskOp.getNode()))
+ return false;
+ }
+
+ // Make sure the EVL of OpVal is same as Root's.
+ if (auto VLenPos = ISD::getVPExplicitVectorLengthIdx(VPOpcode))
+ if (RootVectorLenOp != OpVal.getOperand(*VLenPos))
+ return false;
+ return true;
+ }
+
+ // Specialize based on number of operands.
+ // TODO emit VP intrinsics where MaskOp/VectorLenOp != null
+ // SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { return
+ // DAG.getNode(Opcode, DL, VT); }
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {Operand, RootMaskOp, RootVectorLenOp});
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {N1, N2, RootMaskOp, RootVectorLenOp});
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDValue N3) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {N1, N2, N3, RootMaskOp, RootVectorLenOp});
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue Operand,
+ SDNodeFlags Flags) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 1 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 2);
+ return DAG.getNode(VPOpcode, DL, VT, {Operand, RootMaskOp, RootVectorLenOp},
+ Flags);
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDNodeFlags Flags) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 2 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 3);
+ return DAG.getNode(VPOpcode, DL, VT, {N1, N2, RootMaskOp, RootVectorLenOp},
+ Flags);
+ }
+
+ SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1,
+ SDValue N2, SDValue N3, SDNodeFlags Flags) {
+ unsigned VPOpcode = ISD::getVPForBaseOpcode(Opcode);
+ assert(ISD::getVPMaskIdx(VPOpcode) == 3 &&
+ ISD::getVPExplicitVectorLengthIdx(VPOpcode) == 4);
+ return DAG.getNode(VPOpcode, DL, VT,
+ {N1, N2, N3, RootMaskOp, RootVectorLenOp}, Flags);
+ }
+
+ bool isOperationLegalOrCustom(unsigned Op, EVT VT,
+ bool LegalOnly = false) const {
+ unsigned VPOp = ISD::getVPForBaseOpcode(Op);
+ return TLI.isOperationLegalOrCustom(VPOp, VT, LegalOnly);
+ }
+};
+
} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -1099,7 +1262,8 @@ bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
- SDValue N0, SDValue N1) {
+ SDValue N0, SDValue N1,
+ SDNodeFlags Flags) {
EVT VT = N0.getValueType();
if (N0.getOpcode() != Opc)
@@ -1118,8 +1282,12 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
if (TLI.isReassocProfitable(DAG, N0, N1)) {
// Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
// iff (op x, c1) has one use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1);
- return DAG.getNode(Opc, DL, VT, OpNode, N01);
+ SDNodeFlags NewFlags;
+ if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() &&
+ Flags.hasNoUnsignedWrap())
+ NewFlags.setNoUnsignedWrap(true);
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags);
+ return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags);
}
}
@@ -1177,13 +1345,32 @@ SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
return SDValue();
- if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1, Flags))
return Combined;
- if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0, Flags))
return Combined;
return SDValue();
}
+// Try to fold Opc(vecreduce(x), vecreduce(y)) -> vecreduce(Opc(x, y))
+// Note that we only expect Flags to be passed from FP operations. For integer
+// operations they need to be dropped.
+SDValue DAGCombiner::reassociateReduction(unsigned RedOpc, unsigned Opc,
+ const SDLoc &DL, EVT VT, SDValue N0,
+ SDValue N1, SDNodeFlags Flags) {
+ if (N0.getOpcode() == RedOpc && N1.getOpcode() == RedOpc &&
+ N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
+ N0->hasOneUse() && N1->hasOneUse() &&
+ TLI.isOperationLegalOrCustom(Opc, N0.getOperand(0).getValueType()) &&
+ TLI.shouldReassociateReduction(RedOpc, N0.getOperand(0).getValueType())) {
+ SelectionDAG::FlagInserter FlagsInserter(DAG, Flags);
+ return DAG.getNode(RedOpc, DL, VT,
+ DAG.getNode(Opc, DL, N0.getOperand(0).getValueType(),
+ N0.getOperand(0), N1.getOperand(0)));
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
bool AddTo) {
assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
@@ -1591,8 +1778,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
WorklistInserter AddNodes(*this);
// Add all the dag nodes to the worklist.
+ //
+ // Note: All nodes are not added to PruningList here, this is because the only
+ // nodes which can be deleted are those which have no uses and all other nodes
+ // which would otherwise be added to the worklist by the first call to
+ // getNextWorklistEntry are already present in it.
for (SDNode &Node : DAG.allnodes())
- AddToWorklist(&Node);
+ AddToWorklist(&Node, /* IsCandidateForPruning */ Node.use_empty());
// Create a dummy node (which is not added to allnodes), that adds a reference
// to the root node, preventing it from being deleted, and tracking any
@@ -1627,11 +1819,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// Add any operands of the new node which have not yet been combined to the
// worklist as well. Because the worklist uniques things already, this
// won't repeatedly process the same operand.
- CombinedNodes.insert(N);
for (const SDValue &ChildN : N->op_values())
if (!CombinedNodes.count(ChildN.getNode()))
AddToWorklist(ChildN.getNode());
+ CombinedNodes.insert(N);
SDValue RV = combine(N);
if (!RV.getNode())
@@ -1665,10 +1857,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// out), because re-visiting the EntryToken and its users will not uncover
// any additional opportunities, but there may be a large number of such
// users, potentially causing compile time explosion.
- if (RV.getOpcode() != ISD::EntryToken) {
- AddToWorklist(RV.getNode());
- AddUsersToWorklist(RV.getNode());
- }
+ if (RV.getOpcode() != ISD::EntryToken)
+ AddToWorklistWithUsers(RV.getNode());
// Finally, if the node is now dead, remove it from the graph. The node
// may not be dead if the replacement process recursively simplified to
@@ -1700,10 +1890,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SSUBO:
case ISD::USUBO: return visitSUBO(N);
case ISD::ADDE: return visitADDE(N);
- case ISD::ADDCARRY: return visitADDCARRY(N);
+ case ISD::UADDO_CARRY: return visitUADDO_CARRY(N);
case ISD::SADDO_CARRY: return visitSADDO_CARRY(N);
case ISD::SUBE: return visitSUBE(N);
- case ISD::SUBCARRY: return visitSUBCARRY(N);
+ case ISD::USUBO_CARRY: return visitUSUBO_CARRY(N);
case ISD::SSUBO_CARRY: return visitSSUBO_CARRY(N);
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
@@ -1720,6 +1910,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::AVGFLOORU:
case ISD::AVGCEILS:
case ISD::AVGCEILU: return visitAVG(N);
+ case ISD::ABDS:
+ case ISD::ABDU: return visitABD(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
case ISD::SMULO:
@@ -1770,7 +1962,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::STRICT_FADD: return visitSTRICT_FADD(N);
case ISD::FSUB: return visitFSUB(N);
case ISD::FMUL: return visitFMUL(N);
- case ISD::FMA: return visitFMA(N);
+ case ISD::FMA: return visitFMA<EmptyMatchContext>(N);
case ISD::FDIV: return visitFDIV(N);
case ISD::FREM: return visitFREM(N);
case ISD::FSQRT: return visitFSQRT(N);
@@ -1791,6 +1983,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FMAXIMUM: return visitFMinMax(N);
case ISD::FCEIL: return visitFCEIL(N);
case ISD::FTRUNC: return visitFTRUNC(N);
+ case ISD::FFREXP: return visitFFREXP(N);
case ISD::BRCOND: return visitBRCOND(N);
case ISD::BR_CC: return visitBR_CC(N);
case ISD::LOAD: return visitLOAD(N);
@@ -1812,6 +2005,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
case ISD::FP_TO_BF16: return visitFP_TO_BF16(N);
case ISD::FREEZE: return visitFREEZE(N);
+ case ISD::GET_FPENV_MEM: return visitGET_FPENV_MEM(N);
+ case ISD::SET_FPENV_MEM: return visitSET_FPENV_MEM(N);
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_ADD:
@@ -1824,7 +2019,9 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
- case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
+ case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM: return visitVECREDUCE(N);
#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC:
#include "llvm/IR/VPIntrinsics.def"
return visitVPOp(N);
@@ -2131,6 +2328,39 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
}
+// isTruncateOf - If N is a truncate of some other value, return true, record
+// the value being truncated in Op and which of Op's bits are zero/one in Known.
+// This function computes KnownBits to avoid a duplicated call to
+// computeKnownBits in the caller.
+static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
+ KnownBits &Known) {
+ if (N->getOpcode() == ISD::TRUNCATE) {
+ Op = N->getOperand(0);
+ Known = DAG.computeKnownBits(Op);
+ return true;
+ }
+
+ if (N.getOpcode() != ISD::SETCC ||
+ N.getValueType().getScalarType() != MVT::i1 ||
+ cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
+ return false;
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ assert(Op0.getValueType() == Op1.getValueType());
+
+ if (isNullOrNullSplat(Op0))
+ Op = Op1;
+ else if (isNullOrNullSplat(Op1))
+ Op = Op0;
+ else
+ return false;
+
+ Known = DAG.computeKnownBits(Op);
+
+ return (Known.Zero | 1).isAllOnes();
+}
+
/// Return true if 'Use' is a load or a store that uses N as its base pointer
/// and that N may be folded in the load / store addressing mode.
static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG,
@@ -2206,11 +2436,12 @@ static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG,
if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse())
return SDValue();
- // We can't hoist div/rem because of immediate UB (not speculatable).
- unsigned Opcode = N->getOpcode();
- if (!DAG.isSafeToSpeculativelyExecute(Opcode))
+ // We can't hoist all instructions because of immediate UB (not speculatable).
+ // For example div/rem by zero.
+ if (!DAG.isSafeToSpeculativelyExecuteNode(N))
return SDValue();
+ unsigned Opcode = N->getOpcode();
EVT VT = N->getValueType(0);
SDValue Cond = N1.getOperand(0);
SDValue TVal = N1.getOperand(1);
@@ -2258,6 +2489,17 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
SelOpNo = 1;
Sel = BO->getOperand(1);
+
+ // Peek through trunc to shift amount type.
+ if ((BinOpcode == ISD::SHL || BinOpcode == ISD::SRA ||
+ BinOpcode == ISD::SRL) && Sel.hasOneUse()) {
+ // This is valid when the truncated bits of x are already zero.
+ SDValue Op;
+ KnownBits Known;
+ if (isTruncateOf(DAG, Sel, Op, Known) &&
+ Known.countMaxActiveBits() < Sel.getScalarValueSizeInBits())
+ Sel = Op;
+ }
}
if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
@@ -2310,18 +2552,14 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
// constant. Eliminate the binop by pulling the constant math into the
// select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT +
// CBO, CF + CBO
- NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
- : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
- if (!CanFoldNonConst && !NewCT.isUndef() &&
- !isConstantOrConstantVector(NewCT, true) &&
- !DAG.isConstantFPBuildVectorOrConstantFP(NewCT))
+ NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT})
+ : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO});
+ if (!NewCT)
return SDValue();
- NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
- : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
- if (!CanFoldNonConst && !NewCF.isUndef() &&
- !isConstantOrConstantVector(NewCF, true) &&
- !DAG.isConstantFPBuildVectorOrConstantFP(NewCF))
+ NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF})
+ : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO});
+ if (!NewCF)
return SDValue();
}
@@ -2420,6 +2658,12 @@ static bool isADDLike(SDValue V, const SelectionDAG &DAG) {
return false;
}
+static bool
+areBitwiseNotOfEachother(SDValue Op0, SDValue Op1) {
+ return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) ||
+ (isBitwiseNot(Op1) && Op1.getOperand(0) == Op0);
+}
+
/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
/// are no common bits set in the operands).
@@ -2444,6 +2688,10 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
+ if (areBitwiseNotOfEachother(N0, N1))
+ return DAG.getConstant(APInt::getAllOnes(VT.getScalarSizeInBits()),
+ SDLoc(N), VT);
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
@@ -2509,12 +2757,22 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// equivalent to (add x, c).
// Reassociate (add (xor x, c), y) -> (add add(x, y), c)) if (xor x, c) is
// equivalent to (add x, c).
+ // Do this optimization only when adding c does not introduce instructions
+ // for adding carries.
auto ReassociateAddOr = [&](SDValue N0, SDValue N1) {
if (isADDLike(N0, DAG) && N0.hasOneUse() &&
isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
- return DAG.getNode(ISD::ADD, DL, VT,
- DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
- N0.getOperand(1));
+ // If N0's type does not split or is a sign mask, it does not introduce
+ // add carry.
+ auto TyActn = TLI.getTypeAction(*DAG.getContext(), N0.getValueType());
+ bool NoAddCarry = TyActn == TargetLoweringBase::TypeLegal ||
+ TyActn == TargetLoweringBase::TypePromoteInteger ||
+ isMinSignedConstant(N0.getOperand(1));
+ if (NoAddCarry)
+ return DAG.getNode(
+ ISD::ADD, DL, VT,
+ DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
+ N0.getOperand(1));
}
return SDValue();
};
@@ -2522,6 +2780,11 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
return Add;
if (SDValue Add = ReassociateAddOr(N1, N0))
return Add;
+
+ // Fold add(vecreduce(x), vecreduce(y)) -> vecreduce(add(x, y))
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_ADD, ISD::ADD, DL, VT, N0, N1))
+ return SD;
}
// fold ((0-A) + B) -> B-A
if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
@@ -2626,7 +2889,10 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
// And if the target does not like this form then turn into:
// sub y, (xor x, -1)
if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
- N0.hasOneUse()) {
+ N0.hasOneUse() &&
+ // Limit this to after legalization if the add has wrap flags
+ (Level >= AfterLegalizeDAG || (!N->getFlags().hasNoUnsignedWrap() &&
+ !N->getFlags().hasNoSignedWrap()))) {
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
@@ -2714,6 +2980,7 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ bool IsSigned = Opcode == ISD::SADDSAT;
SDLoc DL(N);
// fold (add_sat x, undef) -> -1
@@ -2744,14 +3011,14 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) {
return N0;
// If it cannot overflow, transform into an add.
- if (Opcode == ISD::UADDSAT)
- if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
- return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
+ if (DAG.computeOverflowForAdd(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
return SDValue();
}
-static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
+static SDValue getAsCarry(const TargetLowering &TLI, SDValue V,
+ bool ForceCarryReconstruction = false) {
bool Masked = false;
// First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
@@ -2762,11 +3029,17 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
}
if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
+ if (ForceCarryReconstruction)
+ return V;
+
Masked = true;
V = V.getOperand(0);
continue;
}
+ if (ForceCarryReconstruction && V.getValueType() == MVT::i1)
+ return V;
+
break;
}
@@ -2774,7 +3047,7 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
if (V.getResNo() != 1)
return SDValue();
- if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
+ if (V.getOpcode() != ISD::UADDO_CARRY && V.getOpcode() != ISD::USUBO_CARRY &&
V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
return SDValue();
@@ -2842,7 +3115,10 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
// And if the target does not like this form then turn into:
// sub y, (xor x, -1)
if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.getOpcode() == ISD::ADD &&
- N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1))) {
+ N0.hasOneUse() && isOneOrOneSplat(N0.getOperand(1)) &&
+ // Limit this to after legalization if the add has wrap flags
+ (Level >= AfterLegalizeDAG || (!N0->getFlags().hasNoUnsignedWrap() &&
+ !N0->getFlags().hasNoSignedWrap()))) {
SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
@@ -2864,6 +3140,15 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
}
}
+ // add (mul x, C), x -> mul x, C+1
+ if (N0.getOpcode() == ISD::MUL && N0.getOperand(0) == N1 &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true) &&
+ N0.hasOneUse()) {
+ SDValue NewC = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
+ DAG.getConstant(1, DL, VT));
+ return DAG.getNode(ISD::MUL, DL, VT, N0.getOperand(0), NewC);
+ }
+
// If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
// rather than 'add 0/-1' (the zext should get folded).
// add (sext i1 Y), X --> sub X, (zext i1 Y)
@@ -2884,16 +3169,16 @@ SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
}
}
- // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
- if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
+ // (add X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
+ if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1)) &&
N1.getResNo() == 0)
- return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
+ return DAG.getNode(ISD::UADDO_CARRY, DL, N1->getVTList(),
N0, N1.getOperand(0), N1.getOperand(2));
- // (add X, Carry) -> (addcarry X, 0, Carry)
- if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
+ // (add X, Carry) -> (uaddo_carry X, 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT))
if (SDValue Carry = getAsCarry(TLI, N1))
- return DAG.getNode(ISD::ADDCARRY, DL,
+ return DAG.getNode(ISD::UADDO_CARRY, DL,
DAG.getVTList(VT, Carry.getValueType()), N0,
DAG.getConstant(0, DL, VT), Carry);
@@ -2923,7 +3208,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
DL, MVT::Glue));
// If it cannot overflow, transform into an add.
- if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ if (DAG.computeOverflowForUnsignedAdd(N0, N1) == SelectionDAG::OFK_Never)
return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
@@ -2995,12 +3280,12 @@ SDValue DAGCombiner::visitADDO(SDNode *N) {
if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
- if (!IsSigned) {
- // If it cannot overflow, transform into an add.
- if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
- return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
- DAG.getConstant(0, DL, CarryVT));
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowForAdd(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
+ if (!IsSigned) {
// fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
@@ -3024,20 +3309,20 @@ SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
if (VT.isVector())
return SDValue();
- // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
+ // (uaddo X, (uaddo_carry Y, 0, Carry)) -> (uaddo_carry X, Y, Carry)
// If Y + 1 cannot overflow.
- if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
+ if (N1.getOpcode() == ISD::UADDO_CARRY && isNullConstant(N1.getOperand(1))) {
SDValue Y = N1.getOperand(0);
SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
- if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
- return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
+ if (DAG.computeOverflowForUnsignedAdd(Y, One) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0, Y,
N1.getOperand(2));
}
- // (uaddo X, Carry) -> (addcarry X, 0, Carry)
- if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
+ // (uaddo X, Carry) -> (uaddo_carry X, 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT))
if (SDValue Carry = getAsCarry(TLI, N1))
- return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
+ return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(), N0,
DAG.getConstant(0, SDLoc(N), VT), Carry);
return SDValue();
@@ -3062,7 +3347,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
+SDValue DAGCombiner::visitUADDO_CARRY(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
@@ -3072,16 +3357,16 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
if (N0C && !N1C)
- return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
+ return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
- // fold (addcarry x, y, false) -> (uaddo x, y)
+ // fold (uaddo_carry x, y, false) -> (uaddo x, y)
if (isNullConstant(CarryIn)) {
if (!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
}
- // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
+ // fold (uaddo_carry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
if (isNullConstant(N0) && isNullConstant(N1)) {
EVT VT = N0.getValueType();
EVT CarryVT = CarryIn.getValueType();
@@ -3092,73 +3377,52 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
DAG.getConstant(0, DL, CarryVT));
}
- if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
+ if (SDValue Combined = visitUADDO_CARRYLike(N0, N1, CarryIn, N))
return Combined;
- if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
+ if (SDValue Combined = visitUADDO_CARRYLike(N1, N0, CarryIn, N))
return Combined;
// We want to avoid useless duplication.
- // TODO: This is done automatically for binary operations. As ADDCARRY is
+ // TODO: This is done automatically for binary operations. As UADDO_CARRY is
// not a binary operation, this is not really possible to leverage this
// existing mechanism for it. However, if more operations require the same
// deduplication logic, then it may be worth generalize.
SDValue Ops[] = {N1, N0, CarryIn};
SDNode *CSENode =
- DAG.getNodeIfExists(ISD::ADDCARRY, N->getVTList(), Ops, N->getFlags());
+ DAG.getNodeIfExists(ISD::UADDO_CARRY, N->getVTList(), Ops, N->getFlags());
if (CSENode)
return SDValue(CSENode, 0);
return SDValue();
}
-SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- SDValue N1 = N->getOperand(1);
- SDValue CarryIn = N->getOperand(2);
- SDLoc DL(N);
-
- // canonicalize constant to RHS
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (N0C && !N1C)
- return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
-
- // fold (saddo_carry x, y, false) -> (saddo x, y)
- if (isNullConstant(CarryIn)) {
- if (!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
- return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
- }
-
- return SDValue();
-}
-
/**
* If we are facing some sort of diamond carry propapagtion pattern try to
* break it up to generate something like:
- * (addcarry X, 0, (addcarry A, B, Z):Carry)
+ * (uaddo_carry X, 0, (uaddo_carry A, B, Z):Carry)
*
* The end result is usually an increase in operation required, but because the
* carry is now linearized, other transforms can kick in and optimize the DAG.
*
* Patterns typically look something like
- * (uaddo A, B)
- * / \
- * Carry Sum
- * | \
- * | (addcarry *, 0, Z)
- * | /
- * \ Carry
- * | /
- * (addcarry X, *, *)
+ * (uaddo A, B)
+ * / \
+ * Carry Sum
+ * | \
+ * | (uaddo_carry *, 0, Z)
+ * | /
+ * \ Carry
+ * | /
+ * (uaddo_carry X, *, *)
*
* But numerous variation exist. Our goal is to identify A, B, X and Z and
* produce a combine with a single path for carry propagation.
*/
-static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
- SDValue X, SDValue Carry0, SDValue Carry1,
- SDNode *N) {
+static SDValue combineUADDO_CARRYDiamond(DAGCombiner &Combiner,
+ SelectionDAG &DAG, SDValue X,
+ SDValue Carry0, SDValue Carry1,
+ SDNode *N) {
if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
return SDValue();
if (Carry1.getOpcode() != ISD::UADDO)
@@ -3168,9 +3432,9 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
/**
* First look for a suitable Z. It will present itself in the form of
- * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
+ * (uaddo_carry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
*/
- if (Carry0.getOpcode() == ISD::ADDCARRY &&
+ if (Carry0.getOpcode() == ISD::UADDO_CARRY &&
isNullConstant(Carry0.getOperand(1))) {
Z = Carry0.getOperand(2);
} else if (Carry0.getOpcode() == ISD::UADDO &&
@@ -3185,26 +3449,27 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
auto cancelDiamond = [&](SDValue A,SDValue B) {
SDLoc DL(N);
- SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
+ SDValue NewY =
+ DAG.getNode(ISD::UADDO_CARRY, DL, Carry0->getVTList(), A, B, Z);
Combiner.AddToWorklist(NewY.getNode());
- return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
+ return DAG.getNode(ISD::UADDO_CARRY, DL, N->getVTList(), X,
DAG.getConstant(0, DL, X.getValueType()),
NewY.getValue(1));
};
/**
- * (uaddo A, B)
- * |
- * Sum
- * |
- * (addcarry *, 0, Z)
+ * (uaddo A, B)
+ * |
+ * Sum
+ * |
+ * (uaddo_carry *, 0, Z)
*/
if (Carry0.getOperand(0) == Carry1.getValue(0)) {
return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
}
/**
- * (addcarry A, 0, Z)
+ * (uaddo_carry A, 0, Z)
* |
* Sum
* |
@@ -3241,12 +3506,12 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
// | /
// CarryOut = (or *, *)
//
-// And generate ADDCARRY (or SUBCARRY) with two result values:
+// And generate UADDO_CARRY (or USUBO_CARRY) with two result values:
//
-// {AddCarrySum, CarryOut} = (addcarry A, B, CarryIn)
+// {AddCarrySum, CarryOut} = (uaddo_carry A, B, CarryIn)
//
-// Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
-// a single path for carry/borrow out propagation:
+// Our goal is to identify A, B, and CarryIn and produce UADDO_CARRY/USUBO_CARRY
+// with a single path for carry/borrow out propagation.
static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
SDValue N0, SDValue N1, SDNode *N) {
SDValue Carry0 = getAsCarry(TLI, N0);
@@ -3279,16 +3544,13 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
return SDValue();
SDValue CarryIn = Carry1.getOperand(CarryInOperandNum);
- unsigned NewOp = Opcode == ISD::UADDO ? ISD::ADDCARRY : ISD::SUBCARRY;
+ unsigned NewOp = Opcode == ISD::UADDO ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
if (!TLI.isOperationLegalOrCustom(NewOp, Carry0.getValue(0).getValueType()))
return SDValue();
// Verify that the carry/borrow in is plausibly a carry/borrow bit.
- // TODO: make getAsCarry() aware of how partial carries are merged.
- if (CarryIn.getOpcode() != ISD::ZERO_EXTEND)
- return SDValue();
- CarryIn = CarryIn.getOperand(0);
- if (CarryIn.getValueType() != MVT::i1)
+ CarryIn = getAsCarry(TLI, CarryIn, true);
+ if (!CarryIn)
return SDValue();
SDLoc DL(N);
@@ -3315,45 +3577,68 @@ static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
return Merged.getValue(1);
}
-SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
- SDNode *N) {
- // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
+SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1,
+ SDValue CarryIn, SDNode *N) {
+ // fold (uaddo_carry (xor a, -1), b, c) -> (usubo_carry b, a, !c) and flip
+ // carry.
if (isBitwiseNot(N0))
if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
SDLoc DL(N);
- SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
+ SDValue Sub = DAG.getNode(ISD::USUBO_CARRY, DL, N->getVTList(), N1,
N0.getOperand(0), NotC);
return CombineTo(
N, Sub, DAG.getLogicalNOT(DL, Sub.getValue(1), Sub->getValueType(1)));
}
// Iff the flag result is dead:
- // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
+ // (uaddo_carry (add|uaddo X, Y), 0, Carry) -> (uaddo_carry X, Y, Carry)
// Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
// or the dependency between the instructions.
if ((N0.getOpcode() == ISD::ADD ||
(N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
N0.getValue(1) != CarryIn)) &&
isNullConstant(N1) && !N->hasAnyUseOfValue(1))
- return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
+ return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), N->getVTList(),
N0.getOperand(0), N0.getOperand(1), CarryIn);
/**
- * When one of the addcarry argument is itself a carry, we may be facing
+ * When one of the uaddo_carry argument is itself a carry, we may be facing
* a diamond carry propagation. In which case we try to transform the DAG
* to ensure linear carry propagation if that is possible.
*/
if (auto Y = getAsCarry(TLI, N1)) {
// Because both are carries, Y and Z can be swapped.
- if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
+ if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
return R;
- if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
+ if (auto R = combineUADDO_CARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
return R;
}
return SDValue();
}
+SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue CarryIn = N->getOperand(2);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS
+ ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ if (N0C && !N1C)
+ return DAG.getNode(ISD::SADDO_CARRY, DL, N->getVTList(), N1, N0, CarryIn);
+
+ // fold (saddo_carry x, y, false) -> (saddo x, y)
+ if (isNullConstant(CarryIn)) {
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::SADDO, N->getValueType(0)))
+ return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1);
+ }
+
+ return SDValue();
+}
+
// Attempt to create a USUBSAT(LHS, RHS) node with DstVT, performing a
// clamp/truncation if necessary.
static SDValue getTruncatedUSUBSAT(EVT DstVT, EVT SrcVT, SDValue LHS,
@@ -3720,11 +4005,6 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// If the relocation model supports it, consider symbol offsets.
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
- // fold (sub Sym, c) -> Sym-c
- if (N1C && GA->getOpcode() == ISD::GlobalAddress)
- return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
- GA->getOffset() -
- (uint64_t)N1C->getSExtValue());
// fold (sub Sym+c1, Sym+c2) -> c1-c2
if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
if (GA->getGlobal() == GB->getGlobal())
@@ -3776,19 +4056,19 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
}
- // (sub (subcarry X, 0, Carry), Y) -> (subcarry X, Y, Carry)
- if (N0.getOpcode() == ISD::SUBCARRY && isNullConstant(N0.getOperand(1)) &&
+ // (sub (usubo_carry X, 0, Carry), Y) -> (usubo_carry X, Y, Carry)
+ if (N0.getOpcode() == ISD::USUBO_CARRY && isNullConstant(N0.getOperand(1)) &&
N0.getResNo() == 0 && N0.hasOneUse())
- return DAG.getNode(ISD::SUBCARRY, DL, N0->getVTList(),
+ return DAG.getNode(ISD::USUBO_CARRY, DL, N0->getVTList(),
N0.getOperand(0), N1, N0.getOperand(2));
- if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
- // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
+ if (TLI.isOperationLegalOrCustom(ISD::UADDO_CARRY, VT)) {
+ // (sub Carry, X) -> (uaddo_carry (sub 0, X), 0, Carry)
if (SDValue Carry = getAsCarry(TLI, N0)) {
SDValue X = N1;
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
- return DAG.getNode(ISD::ADDCARRY, DL,
+ return DAG.getNode(ISD::UADDO_CARRY, DL,
DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
Carry);
}
@@ -3814,7 +4094,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
(N0.getOperand(0) != N1.getOperand(1) ||
N0.getOperand(1) != N1.getOperand(0)))
return SDValue();
- if (!TLI.isOperationLegalOrCustom(Abd, VT))
+ if (!hasOperation(Abd, VT))
return SDValue();
return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1));
};
@@ -3827,9 +4107,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
+ bool IsSigned = Opcode == ISD::SSUBSAT;
SDLoc DL(N);
// fold (sub_sat x, undef) -> 0
@@ -3841,7 +4123,7 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
return DAG.getConstant(0, DL, VT);
// fold (sub_sat c1, c2) -> c3
- if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1}))
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
return C;
// fold vector ops
@@ -3858,6 +4140,10 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
if (isNullConstant(N1))
return N0;
+ // If it cannot overflow, transform into an sub.
+ if (DAG.computeOverflowForSub(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ return DAG.getNode(ISD::SUB, DL, VT, N0, N1);
+
return SDValue();
}
@@ -3911,7 +4197,7 @@ SDValue DAGCombiner::visitSUBO(SDNode *N) {
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
// fold (subox, c) -> (addo x, -c)
- if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
+ if (IsSigned && N1C && !N1C->isMinSignedValue()) {
return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
}
@@ -3920,6 +4206,11 @@ SDValue DAGCombiner::visitSUBO(SDNode *N) {
if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
+ // If it cannot overflow, transform into an sub.
+ if (DAG.computeOverflowForSub(IsSigned, N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
+
// Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
@@ -3940,12 +4231,12 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
+SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue CarryIn = N->getOperand(2);
- // fold (subcarry x, y, false) -> (usubo x, y)
+ // fold (usubo_carry x, y, false) -> (usubo x, y)
if (isNullConstant(CarryIn)) {
if (!LegalOperations ||
TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
@@ -4062,13 +4353,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+
// FIXME: If the input is something that is easily negated (e.g. a
// single-use add), we should put the negate there.
return DAG.getNode(ISD::SUB, DL, VT,
DAG.getConstant(0, DL, VT),
DAG.getNode(ISD::SHL, DL, VT, N0,
- DAG.getConstant(Log2Val, DL,
- getShiftAmountTy(N0.getValueType()))));
+ DAG.getConstant(Log2Val, DL, ShiftVT)));
}
// Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the
@@ -4108,7 +4400,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
unsigned MathOp = ISD::DELETED_NODE;
APInt MulC = ConstValue1.abs();
// The constant `2` should be treated as (2^0 + 1).
- unsigned TZeros = MulC == 2 ? 0 : MulC.countTrailingZeros();
+ unsigned TZeros = MulC == 2 ? 0 : MulC.countr_zero();
MulC.lshrInPlace(TZeros);
if ((MulC - 1).isPowerOf2())
MathOp = ISD::ADD;
@@ -4163,8 +4455,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
}
// fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
- if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
- N0.getOpcode() == ISD::ADD &&
+ if (N0.getOpcode() == ISD::ADD &&
+ DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
isMulAddWithConstProfitable(N, N0, N1))
return DAG.getNode(
@@ -4223,6 +4515,11 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags()))
return RMUL;
+ // Fold mul(vecreduce(x), vecreduce(y)) -> vecreduce(mul(x, y))
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_MUL, ISD::MUL, DL, VT, N0, N1))
+ return SD;
+
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -4386,7 +4683,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
return DAG.getNegative(N0, DL, VT);
// fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
- if (N1C && N1C->getAPIntValue().isMinSignedValue())
+ if (N1C && N1C->isMinSignedValue())
return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
DAG.getConstant(1, DL, VT),
DAG.getConstant(0, DL, VT));
@@ -4886,11 +5183,57 @@ SDValue DAGCombiner::visitAVG(SDNode *N) {
if (N1.isUndef())
return N0;
+ // Fold (avg x, x) --> x
+ if (N0 == N1 && Level >= AfterLegalizeTypes)
+ return N0;
+
// TODO If we use avg for scalars anywhere, we can add (avgfl x, 0) -> x >> 1
return SDValue();
}
+SDValue DAGCombiner::visitABD(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // fold (abd c1, c2)
+ if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}))
+ return C;
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(Opcode, DL, N->getVTList(), N1, N0);
+
+ if (VT.isVector()) {
+ if (SDValue FoldedVOp = SimplifyVBinOp(N, DL))
+ return FoldedVOp;
+
+ // fold (abds x, 0) -> abs x
+ // fold (abdu x, 0) -> x
+ if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) {
+ if (Opcode == ISD::ABDS)
+ return DAG.getNode(ISD::ABS, DL, VT, N0);
+ if (Opcode == ISD::ABDU)
+ return N0;
+ }
+ }
+
+ // fold (abd x, undef) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, DL, VT);
+
+ // fold (abds x, y) -> (abdu x, y) iff both args are known positive
+ if (Opcode == ISD::ABDS && hasOperation(ISD::ABDU, VT) &&
+ DAG.SignBitIsZero(N0) && DAG.SignBitIsZero(N1))
+ return DAG.getNode(ISD::ABDU, DL, VT, N1, N0);
+
+ return SDValue();
+}
+
/// Perform optimizations common to nodes that compute two values. LoOp and HiOp
/// give the opcodes for the two computations that are being performed. Return
/// true if a simplification was made.
@@ -5108,7 +5451,7 @@ SDValue DAGCombiner::visitMULO(SDNode *N) {
// same as SimplifySelectCC. N0<N1 ? N2 : N3.
static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
SDValue N3, ISD::CondCode CC, unsigned &BW,
- bool &Unsigned) {
+ bool &Unsigned, SelectionDAG &DAG) {
auto isSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC) {
// The compare and select operand should be the same or the select operands
@@ -5132,6 +5475,26 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
if (!Opcode0)
return SDValue();
+ // We could only need one range check, if the fptosi could never produce
+ // the upper value.
+ if (N0.getOpcode() == ISD::FP_TO_SINT && Opcode0 == ISD::SMAX) {
+ if (isNullOrNullSplat(N3)) {
+ EVT IntVT = N0.getValueType().getScalarType();
+ EVT FPVT = N0.getOperand(0).getValueType().getScalarType();
+ if (FPVT.isSimple()) {
+ Type *InputTy = FPVT.getTypeForEVT(*DAG.getContext());
+ const fltSemantics &Semantics = InputTy->getFltSemantics();
+ uint32_t MinBitWidth =
+ APFloatBase::semanticsIntSizeInBits(Semantics, /*isSigned*/ true);
+ if (IntVT.getSizeInBits() >= MinBitWidth) {
+ Unsigned = true;
+ BW = PowerOf2Ceil(MinBitWidth);
+ return N0;
+ }
+ }
+ }
+ }
+
SDValue N00, N01, N02, N03;
ISD::CondCode N0CC;
switch (N0.getOpcode()) {
@@ -5194,7 +5557,7 @@ static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
SelectionDAG &DAG) {
unsigned BW;
bool Unsigned;
- SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned);
+ SDValue Fp = isSaturatingMinMax(N0, N1, N2, N3, CC, BW, Unsigned, DAG);
if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT)
return SDValue();
EVT FPVT = Fp.getOperand(0).getValueType();
@@ -5208,8 +5571,7 @@ static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
SDLoc DL(Fp);
SDValue Sat = DAG.getNode(NewOpc, DL, NewVT, Fp.getOperand(0),
DAG.getValueType(NewVT.getScalarType()));
- return Unsigned ? DAG.getZExtOrTrunc(Sat, DL, N2->getValueType(0))
- : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
+ return DAG.getExtOrTrunc(!Unsigned, Sat, DL, N2->getValueType(0));
}
static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
@@ -5298,6 +5660,25 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
return S;
+ // Fold min/max(vecreduce(x), vecreduce(y)) -> vecreduce(min/max(x, y))
+ auto ReductionOpcode = [](unsigned Opcode) {
+ switch (Opcode) {
+ case ISD::SMIN:
+ return ISD::VECREDUCE_SMIN;
+ case ISD::SMAX:
+ return ISD::VECREDUCE_SMAX;
+ case ISD::UMIN:
+ return ISD::VECREDUCE_UMIN;
+ case ISD::UMAX:
+ return ISD::VECREDUCE_UMAX;
+ default:
+ llvm_unreachable("Unexpected opcode");
+ }
+ };
+ if (SDValue SD = reassociateReduction(ReductionOpcode(Opcode), Opcode,
+ SDLoc(N), VT, N0, N1))
+ return SD;
+
// Simplify the operands using demanded-bits information.
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -5312,8 +5693,7 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
EVT VT = N0.getValueType();
unsigned LogicOpcode = N->getOpcode();
unsigned HandOpcode = N0.getOpcode();
- assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
- LogicOpcode == ISD::XOR) && "Expected logic opcode");
+ assert(ISD::isBitwiseLogicOp(LogicOpcode) && "Expected logic opcode");
assert(HandOpcode == N1.getOpcode() && "Bad input!");
// Bail early if none of these transforms apply.
@@ -5323,13 +5703,14 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
// FIXME: We should check number of uses of the operands to not increase
// the instruction count for all transforms.
- // Handle size-changing casts.
+ // Handle size-changing casts (or sign_extend_inreg).
SDValue X = N0.getOperand(0);
SDValue Y = N1.getOperand(0);
EVT XVT = X.getValueType();
SDLoc DL(N);
- if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
- HandOpcode == ISD::SIGN_EXTEND) {
+ if (ISD::isExtOpcode(HandOpcode) || ISD::isExtVecInRegOpcode(HandOpcode) ||
+ (HandOpcode == ISD::SIGN_EXTEND_INREG &&
+ N0.getOperand(1) == N1.getOperand(1))) {
// If both operands have other uses, this transform would create extra
// instructions without eliminating anything.
if (!N0.hasOneUse() && !N1.hasOneUse())
@@ -5344,11 +5725,14 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
return SDValue();
// Avoid infinite looping with PromoteIntBinOp.
// TODO: Should we apply desirable/legal constraints to all opcodes?
- if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
- !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
+ if ((HandOpcode == ISD::ANY_EXTEND ||
+ HandOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ LegalTypes && !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
return SDValue();
// logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
+ if (HandOpcode == ISD::SIGN_EXTEND_INREG)
+ return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
return DAG.getNode(HandOpcode, DL, VT, Logic);
}
@@ -5629,6 +6013,172 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
return SDValue();
}
+static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
+ using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
+ assert(
+ (LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
+ "Invalid Op to combine SETCC with");
+
+ // TODO: Search past casts/truncates.
+ SDValue LHS = LogicOp->getOperand(0);
+ SDValue RHS = LogicOp->getOperand(1);
+ if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ AndOrSETCCFoldKind TargetPreference = TLI.isDesirableToCombineLogicOpOfSETCC(
+ LogicOp, LHS.getNode(), RHS.getNode());
+
+ SDValue LHS0 = LHS->getOperand(0);
+ SDValue RHS0 = RHS->getOperand(0);
+ SDValue LHS1 = LHS->getOperand(1);
+ SDValue RHS1 = RHS->getOperand(1);
+ // TODO: We don't actually need a splat here, for vectors we just need the
+ // invariants to hold for each element.
+ auto *LHS1C = isConstOrConstSplat(LHS1);
+ auto *RHS1C = isConstOrConstSplat(RHS1);
+ ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
+ ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
+ EVT VT = LogicOp->getValueType(0);
+ EVT OpVT = LHS0.getValueType();
+ SDLoc DL(LogicOp);
+
+ // Check if the operands of an and/or operation are comparisons and if they
+ // compare against the same value. Replace the and/or-cmp-cmp sequence with
+ // min/max cmp sequence. If LHS1 is equal to RHS1, then the or-cmp-cmp
+ // sequence will be replaced with min-cmp sequence:
+ // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1
+ // and and-cmp-cmp will be replaced with max-cmp sequence:
+ // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1
+ if (OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) &&
+ TLI.isOperationLegal(ISD::SMAX, OpVT) &&
+ TLI.isOperationLegal(ISD::UMIN, OpVT) &&
+ TLI.isOperationLegal(ISD::SMIN, OpVT)) {
+ if (LHS->getOpcode() == ISD::SETCC && RHS->getOpcode() == ISD::SETCC &&
+ LHS->hasOneUse() && RHS->hasOneUse() &&
+ // The two comparisons should have either the same predicate or the
+ // predicate of one of the comparisons is the opposite of the other one.
+ (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR)) &&
+ // The optimization does not work for `==` or `!=` .
+ !ISD::isIntEqualitySetCC(CCL) && !ISD::isIntEqualitySetCC(CCR)) {
+ SDValue CommonValue, Operand1, Operand2;
+ ISD::CondCode CC = ISD::SETCC_INVALID;
+ if (CCL == CCR) {
+ if (LHS0 == RHS0) {
+ CommonValue = LHS0;
+ Operand1 = LHS1;
+ Operand2 = RHS1;
+ CC = ISD::getSetCCSwappedOperands(CCL);
+ } else if (LHS1 == RHS1) {
+ CommonValue = LHS1;
+ Operand1 = LHS0;
+ Operand2 = RHS0;
+ CC = CCL;
+ }
+ } else {
+ assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC");
+ if (LHS0 == RHS1) {
+ CommonValue = LHS0;
+ Operand1 = LHS1;
+ Operand2 = RHS0;
+ CC = ISD::getSetCCSwappedOperands(CCL);
+ } else if (RHS0 == LHS1) {
+ CommonValue = LHS1;
+ Operand1 = LHS0;
+ Operand2 = RHS1;
+ CC = CCL;
+ }
+ }
+
+ if (CC != ISD::SETCC_INVALID) {
+ unsigned NewOpcode;
+ bool IsSigned = isSignedIntSetCC(CC);
+ if (((CC == ISD::SETLE || CC == ISD::SETULE || CC == ISD::SETLT ||
+ CC == ISD::SETULT) &&
+ (LogicOp->getOpcode() == ISD::OR)) ||
+ ((CC == ISD::SETGE || CC == ISD::SETUGE || CC == ISD::SETGT ||
+ CC == ISD::SETUGT) &&
+ (LogicOp->getOpcode() == ISD::AND)))
+ NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN;
+ else
+ NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX;
+
+ SDValue MinMaxValue =
+ DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2);
+ return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC);
+ }
+ }
+ }
+
+ if (TargetPreference == AndOrSETCCFoldKind::None)
+ return SDValue();
+
+ if (CCL == CCR &&
+ CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
+ LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() &&
+ RHS.hasOneUse()) {
+ const APInt &APLhs = LHS1C->getAPIntValue();
+ const APInt &APRhs = RHS1C->getAPIntValue();
+
+ // Preference is to use ISD::ABS or we already have an ISD::ABS (in which
+ // case this is just a compare).
+ if (APLhs == (-APRhs) &&
+ ((TargetPreference & AndOrSETCCFoldKind::ABS) ||
+ DAG.doesNodeExist(ISD::ABS, DAG.getVTList(OpVT), {LHS0}))) {
+ const APInt &C = APLhs.isNegative() ? APRhs : APLhs;
+ // (icmp eq A, C) | (icmp eq A, -C)
+ // -> (icmp eq Abs(A), C)
+ // (icmp ne A, C) & (icmp ne A, -C)
+ // -> (icmp ne Abs(A), C)
+ SDValue AbsOp = DAG.getNode(ISD::ABS, DL, OpVT, LHS0);
+ return DAG.getNode(ISD::SETCC, DL, VT, AbsOp,
+ DAG.getConstant(C, DL, OpVT), LHS.getOperand(2));
+ } else if (TargetPreference &
+ (AndOrSETCCFoldKind::AddAnd | AndOrSETCCFoldKind::NotAnd)) {
+
+ // AndOrSETCCFoldKind::AddAnd:
+ // A == C0 | A == C1
+ // IF IsPow2(smax(C0, C1)-smin(C0, C1))
+ // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) == 0
+ // A != C0 & A != C1
+ // IF IsPow2(smax(C0, C1)-smin(C0, C1))
+ // -> ((A - smin(C0, C1)) & ~(smax(C0, C1)-smin(C0, C1))) != 0
+
+ // AndOrSETCCFoldKind::NotAnd:
+ // A == C0 | A == C1
+ // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
+ // -> ~A & smin(C0, C1) == 0
+ // A != C0 & A != C1
+ // IF smax(C0, C1) == -1 AND IsPow2(smax(C0, C1) - smin(C0, C1))
+ // -> ~A & smin(C0, C1) != 0
+
+ const APInt &MaxC = APIntOps::smax(APRhs, APLhs);
+ const APInt &MinC = APIntOps::smin(APRhs, APLhs);
+ APInt Dif = MaxC - MinC;
+ if (!Dif.isZero() && Dif.isPowerOf2()) {
+ if (MaxC.isAllOnes() &&
+ (TargetPreference & AndOrSETCCFoldKind::NotAnd)) {
+ SDValue NotOp = DAG.getNOT(DL, LHS0, OpVT);
+ SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, NotOp,
+ DAG.getConstant(MinC, DL, OpVT));
+ return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
+ DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
+ } else if (TargetPreference & AndOrSETCCFoldKind::AddAnd) {
+
+ SDValue AddOp = DAG.getNode(ISD::ADD, DL, OpVT, LHS0,
+ DAG.getConstant(-MinC, DL, OpVT));
+ SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
+ DAG.getConstant(~Dif, DL, OpVT));
+ return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
+ DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
+ }
+ }
+ }
+ }
+
+ return SDValue();
+}
+
/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
@@ -5644,6 +6194,11 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
return V;
+ // Canonicalize:
+ // and(x, add) -> and(add, x)
+ if (N1.getOpcode() == ISD::ADD)
+ std::swap(N0, N1);
+
// TODO: Rewrite this to return a new 'AND' instead of using CombineTo.
if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
VT.getSizeInBits() <= 64 && N0->hasOneUse()) {
@@ -5655,8 +6210,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
// in a register.
APInt ADDC = ADDI->getAPIntValue();
APInt SRLC = SRLI->getAPIntValue();
- if (ADDC.getMinSignedBits() <= 64 &&
- SRLC.ult(VT.getSizeInBits()) &&
+ if (ADDC.getSignificantBits() <= 64 && SRLC.ult(VT.getSizeInBits()) &&
!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
SRLC.getZExtValue());
@@ -5677,55 +6231,6 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
}
}
- // Reduce bit extract of low half of an integer to the narrower type.
- // (and (srl i64:x, K), KMask) ->
- // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
- if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
- if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
- if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- unsigned Size = VT.getSizeInBits();
- const APInt &AndMask = CAnd->getAPIntValue();
- unsigned ShiftBits = CShift->getZExtValue();
-
- // Bail out, this node will probably disappear anyway.
- if (ShiftBits == 0)
- return SDValue();
-
- unsigned MaskBits = AndMask.countTrailingOnes();
- EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
-
- if (AndMask.isMask() &&
- // Required bits must not span the two halves of the integer and
- // must fit in the half size type.
- (ShiftBits + MaskBits <= Size / 2) &&
- TLI.isNarrowingProfitable(VT, HalfVT) &&
- TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
- TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
- TLI.isTruncateFree(VT, HalfVT) &&
- TLI.isZExtFree(HalfVT, VT)) {
- // The isNarrowingProfitable is to avoid regressions on PPC and
- // AArch64 which match a few 64-bit bit insert / bit extract patterns
- // on downstream users of this. Those patterns could probably be
- // extended to handle extensions mixed in.
-
- SDValue SL(N0);
- assert(MaskBits <= Size);
-
- // Extracting the highest bit of the low half.
- EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
- SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
- N0.getOperand(0));
-
- SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
- SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
- SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
- SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
- return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
- }
- }
- }
- }
-
return SDValue();
}
@@ -5734,7 +6239,7 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
if (!AndC->getAPIntValue().isMask())
return false;
- unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
+ unsigned ActiveBits = AndC->getAPIntValue().countr_one();
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
EVT LoadedVT = LoadN->getMemoryVT();
@@ -5898,7 +6403,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
}
case ISD::ZERO_EXTEND:
case ISD::AssertZext: {
- unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
+ unsigned ActiveBits = Mask->getAPIntValue().countr_one();
EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
EVT VT = Op.getOpcode() == ISD::AssertZext ?
cast<VTSDNode>(Op.getOperand(1))->getVT() :
@@ -6071,12 +6576,6 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
- // This is probably not worthwhile without a supported type.
- EVT VT = And->getValueType(0);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!TLI.isTypeLegal(VT))
- return SDValue();
-
// Look through an optional extension.
SDValue And0 = And->getOperand(0), And1 = And->getOperand(1);
if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse())
@@ -6104,13 +6603,17 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse())
return SDValue();
+ // This is probably not worthwhile without a supported type.
+ EVT SrcVT = Src.getValueType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(SrcVT))
+ return SDValue();
+
// We might have looked through casts that make this transform invalid.
- // TODO: If the source type is wider than the result type, do the mask and
- // compare in the source type.
- unsigned VTBitWidth = VT.getScalarSizeInBits();
+ unsigned BitWidth = SrcVT.getScalarSizeInBits();
SDValue ShiftAmt = Src.getOperand(1);
auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt);
- if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(VTBitWidth))
+ if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(BitWidth))
return SDValue();
// Set source to shift source.
@@ -6131,14 +6634,15 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
// and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
// and (srl (not X), C)), 1 --> (and X, 1<<C) == 0
SDLoc DL(And);
- SDValue X = DAG.getZExtOrTrunc(Src, DL, VT);
- EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue X = DAG.getZExtOrTrunc(Src, DL, SrcVT);
+ EVT CCVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
SDValue Mask = DAG.getConstant(
- APInt::getOneBitSet(VTBitWidth, ShiftAmtC->getZExtValue()), DL, VT);
- SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
- SDValue Zero = DAG.getConstant(0, DL, VT);
+ APInt::getOneBitSet(BitWidth, ShiftAmtC->getZExtValue()), DL, SrcVT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, SrcVT, X, Mask);
+ SDValue Zero = DAG.getConstant(0, DL, SrcVT);
SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
- return DAG.getZExtOrTrunc(Setcc, DL, VT);
+ return DAG.getZExtOrTrunc(Setcc, DL, And->getValueType(0));
}
/// For targets that support usubsat, match a bit-hack form of that operation
@@ -6181,9 +6685,8 @@ static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) {
static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
SelectionDAG &DAG) {
unsigned LogicOpcode = N->getOpcode();
- assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
- LogicOpcode == ISD::XOR)
- && "Expected bitwise logic operation");
+ assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
+ "Expected bitwise logic operation");
if (!LogicOp.hasOneUse() || !ShiftOp.hasOneUse())
return SDValue();
@@ -6230,8 +6733,8 @@ static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp,
static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand,
SDValue RightHand, SelectionDAG &DAG) {
unsigned LogicOpcode = N->getOpcode();
- assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
- LogicOpcode == ISD::XOR));
+ assert(ISD::isBitwiseLogicOp(LogicOpcode) &&
+ "Expected bitwise logic operation");
if (LeftHand.getOpcode() != LogicOpcode ||
RightHand.getOpcode() != LogicOpcode)
return SDValue();
@@ -6276,6 +6779,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
!DAG.isConstantIntBuildVectorOrConstantInt(N1))
return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
+ if (areBitwiseNotOfEachother(N0, N1))
+ return DAG.getConstant(APInt::getZero(VT.getScalarSizeInBits()), SDLoc(N),
+ VT);
+
// fold vector ops
if (VT.isVector()) {
if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N)))
@@ -6330,6 +6837,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);
+ if (SDValue R = foldAndOrOfSETCC(N, DAG))
+ return R;
+
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -6337,6 +6847,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
return RAND;
+ // Fold and(vecreduce(x), vecreduce(y)) -> vecreduce(and(x, y))
+ if (SDValue SD = reassociateReduction(ISD::VECREDUCE_AND, ISD::AND, SDLoc(N),
+ VT, N0, N1))
+ return SD;
+
// fold (and (or x, C), D) -> D if (C & D) == D
auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
@@ -6345,13 +6860,27 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
return N1;
- // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N0Op0 = N0.getOperand(0);
+ EVT SrcVT = N0Op0.getValueType();
+ unsigned SrcBitWidth = SrcVT.getScalarSizeInBits();
APInt Mask = ~N1C->getAPIntValue();
- Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
+ Mask = Mask.trunc(SrcBitWidth);
+
+ // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
if (DAG.MaskedValueIsZero(N0Op0, Mask))
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0);
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0Op0);
+
+ // fold (and (any_ext V), c) -> (zero_ext (and (trunc V), c)) if profitable.
+ if (N1C->getAPIntValue().countLeadingZeros() >= (BitWidth - SrcBitWidth) &&
+ TLI.isTruncateFree(VT, SrcVT) && TLI.isZExtFree(SrcVT, VT) &&
+ TLI.isTypeDesirableForOp(ISD::AND, SrcVT) &&
+ TLI.isNarrowingProfitable(VT, SrcVT)) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT,
+ DAG.getNode(ISD::AND, DL, SrcVT, N0Op0,
+ DAG.getZExtOrTrunc(N1, DL, SrcVT)));
+ }
}
// fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2)))
@@ -7046,24 +7575,39 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1,
SDNode *N) {
EVT VT = N0.getValueType();
- if (N0.getOpcode() == ISD::AND) {
- SDValue N00 = N0.getOperand(0);
- SDValue N01 = N0.getOperand(1);
+
+ auto peekThroughResize = [](SDValue V) {
+ if (V->getOpcode() == ISD::ZERO_EXTEND || V->getOpcode() == ISD::TRUNCATE)
+ return V->getOperand(0);
+ return V;
+ };
+
+ SDValue N0Resized = peekThroughResize(N0);
+ if (N0Resized.getOpcode() == ISD::AND) {
+ SDValue N1Resized = peekThroughResize(N1);
+ SDValue N00 = N0Resized.getOperand(0);
+ SDValue N01 = N0Resized.getOperand(1);
// fold or (and x, y), x --> x
- if (N00 == N1 || N01 == N1)
+ if (N00 == N1Resized || N01 == N1Resized)
return N1;
// fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
// TODO: Set AllowUndefs = true.
- if (getBitwiseNotOperand(N01, N00,
- /* AllowUndefs */ false) == N1)
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1);
+ if (SDValue NotOperand = getBitwiseNotOperand(N01, N00,
+ /* AllowUndefs */ false)) {
+ if (peekThroughResize(NotOperand) == N1Resized)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT,
+ DAG.getZExtOrTrunc(N00, SDLoc(N), VT), N1);
+ }
// fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
- if (getBitwiseNotOperand(N00, N01,
- /* AllowUndefs */ false) == N1)
- return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1);
+ if (SDValue NotOperand = getBitwiseNotOperand(N00, N01,
+ /* AllowUndefs */ false)) {
+ if (peekThroughResize(NotOperand) == N1Resized)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT,
+ DAG.getZExtOrTrunc(N01, SDLoc(N), VT), N1);
+ }
}
if (N0.getOpcode() == ISD::XOR) {
@@ -7215,6 +7759,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;
+ if (SDValue R = foldAndOrOfSETCC(N, DAG))
+ return R;
+
if (SDValue Combined = visitORLike(N0, N1, N))
return Combined;
@@ -7231,6 +7778,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
return ROR;
+ // Fold or(vecreduce(x), vecreduce(y)) -> vecreduce(or(x, y))
+ if (SDValue SD = reassociateReduction(ISD::VECREDUCE_OR, ISD::OR, SDLoc(N),
+ VT, N0, N1))
+ return SD;
+
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
// iff (c1 & c2) != 0 or c1/c2 are undef.
auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
@@ -7898,42 +8450,6 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
return SDValue();
}
-namespace {
-
-/// Represents known origin of an individual byte in load combine pattern. The
-/// value of the byte is either constant zero or comes from memory.
-struct ByteProvider {
- // For constant zero providers Load is set to nullptr. For memory providers
- // Load represents the node which loads the byte from memory.
- // ByteOffset is the offset of the byte in the value produced by the load.
- LoadSDNode *Load = nullptr;
- unsigned ByteOffset = 0;
- unsigned VectorOffset = 0;
-
- ByteProvider() = default;
-
- static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset,
- unsigned VectorOffset) {
- return ByteProvider(Load, ByteOffset, VectorOffset);
- }
-
- static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0, 0); }
-
- bool isConstantZero() const { return !Load; }
- bool isMemory() const { return Load; }
-
- bool operator==(const ByteProvider &Other) const {
- return Other.Load == Load && Other.ByteOffset == ByteOffset &&
- Other.VectorOffset == VectorOffset;
- }
-
-private:
- ByteProvider(LoadSDNode *Load, unsigned ByteOffset, unsigned VectorOffset)
- : Load(Load), ByteOffset(ByteOffset), VectorOffset(VectorOffset) {}
-};
-
-} // end anonymous namespace
-
/// Recursively traverses the expression calculating the origin of the requested
/// byte of the given value. Returns std::nullopt if the provider can't be
/// calculated.
@@ -7975,7 +8491,9 @@ private:
/// LOAD
///
/// *ExtractVectorElement
-static const std::optional<ByteProvider>
+using SDByteProvider = ByteProvider<SDNode *>;
+
+static const std::optional<SDByteProvider>
calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
std::optional<uint64_t> VectorIndex,
unsigned StartingIndex = 0) {
@@ -8034,7 +8552,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
// provide, then do not provide anything. Otherwise, subtract the index by
// the amount we shifted by.
return Index < ByteShift
- ? ByteProvider::getConstantZero()
+ ? SDByteProvider::getConstantZero()
: calculateByteProvider(Op->getOperand(0), Index - ByteShift,
Depth + 1, VectorIndex, Index);
}
@@ -8049,7 +8567,8 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
if (Index >= NarrowByteWidth)
return Op.getOpcode() == ISD::ZERO_EXTEND
- ? std::optional<ByteProvider>(ByteProvider::getConstantZero())
+ ? std::optional<SDByteProvider>(
+ SDByteProvider::getConstantZero())
: std::nullopt;
return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex,
StartingIndex);
@@ -8099,11 +8618,12 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
// question
if (Index >= NarrowByteWidth)
return L->getExtensionType() == ISD::ZEXTLOAD
- ? std::optional<ByteProvider>(ByteProvider::getConstantZero())
+ ? std::optional<SDByteProvider>(
+ SDByteProvider::getConstantZero())
: std::nullopt;
unsigned BPVectorIndex = VectorIndex.value_or(0U);
- return ByteProvider::getMemory(L, Index, BPVectorIndex);
+ return SDByteProvider::getSrc(L, Index, BPVectorIndex);
}
}
@@ -8191,9 +8711,12 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
!N->isSimple() || N->isIndexed())
return SDValue();
- // Collect all of the stores in the chain.
+ // Collect all of the stores in the chain, upto the maximum store width (i64).
SDValue Chain = N->getChain();
SmallVector<StoreSDNode *, 8> Stores = {N};
+ unsigned NarrowNumBits = MemVT.getScalarSizeInBits();
+ unsigned MaxWideNumBits = 64;
+ unsigned MaxStores = MaxWideNumBits / NarrowNumBits;
while (auto *Store = dyn_cast<StoreSDNode>(Chain)) {
// All stores must be the same size to ensure that we are writing all of the
// bytes in the wide value.
@@ -8207,6 +8730,8 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
+ if (MaxStores < Stores.size())
+ return SDValue();
}
// There is no reason to continue if we do not have at least a pair of stores.
if (Stores.size() < 2)
@@ -8215,7 +8740,6 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) {
// Handle simple types only.
LLVMContext &Context = *DAG.getContext();
unsigned NumStores = Stores.size();
- unsigned NarrowNumBits = N->getMemoryVT().getScalarSizeInBits();
unsigned WideNumBits = NumStores * NarrowNumBits;
EVT WideVT = EVT::getIntegerVT(Context, WideNumBits);
if (WideVT != MVT::i16 && WideVT != MVT::i32 && WideVT != MVT::i64)
@@ -8397,23 +8921,24 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
unsigned ByteWidth = VT.getSizeInBits() / 8;
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
- auto MemoryByteOffset = [&] (ByteProvider P) {
- assert(P.isMemory() && "Must be a memory byte provider");
- unsigned LoadBitWidth = P.Load->getMemoryVT().getScalarSizeInBits();
+ auto MemoryByteOffset = [&](SDByteProvider P) {
+ assert(P.hasSrc() && "Must be a memory byte provider");
+ auto *Load = cast<LoadSDNode>(P.Src.value());
+
+ unsigned LoadBitWidth = Load->getMemoryVT().getScalarSizeInBits();
assert(LoadBitWidth % 8 == 0 &&
"can only analyze providers for individual bytes not bit");
unsigned LoadByteWidth = LoadBitWidth / 8;
- return IsBigEndianTarget
- ? bigEndianByteAt(LoadByteWidth, P.ByteOffset)
- : littleEndianByteAt(LoadByteWidth, P.ByteOffset);
+ return IsBigEndianTarget ? bigEndianByteAt(LoadByteWidth, P.DestOffset)
+ : littleEndianByteAt(LoadByteWidth, P.DestOffset);
};
std::optional<BaseIndexOffset> Base;
SDValue Chain;
SmallPtrSet<LoadSDNode *, 8> Loads;
- std::optional<ByteProvider> FirstByteProvider;
+ std::optional<SDByteProvider> FirstByteProvider;
int64_t FirstOffset = INT64_MAX;
// Check if all the bytes of the OR we are looking at are loaded from the same
@@ -8434,9 +8959,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
continue;
}
- assert(P->isMemory() && "provenance should either be memory or zero");
-
- LoadSDNode *L = P->Load;
+ assert(P->hasSrc() && "provenance should either be memory or zero");
+ auto *L = cast<LoadSDNode>(P->Src.value());
// All loads must share the same chain
SDValue LChain = L->getChain();
@@ -8460,7 +8984,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits();
if (LoadWidthInBit % 8 != 0)
return SDValue();
- unsigned ByteOffsetFromVector = P->VectorOffset * LoadWidthInBit / 8;
+ unsigned ByteOffsetFromVector = P->SrcOffset * LoadWidthInBit / 8;
Ptr.addToOffset(ByteOffsetFromVector);
}
@@ -8517,7 +9041,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// So the combined value can be loaded from the first load address.
if (MemoryByteOffset(*FirstByteProvider) != 0)
return SDValue();
- LoadSDNode *FirstLoad = FirstByteProvider->Load;
+ auto *FirstLoad = cast<LoadSDNode>(FirstByteProvider->Src.value());
// The node we are looking at matches with the pattern, check if we can
// replace it with a single (possibly zero-extended) load and bswap + shift if
@@ -8715,6 +9239,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
return RXOR;
+ // Fold xor(vecreduce(x), vecreduce(y)) -> vecreduce(xor(x, y))
+ if (SDValue SD =
+ reassociateReduction(ISD::VECREDUCE_XOR, ISD::XOR, DL, VT, N0, N1))
+ return SD;
+
// fold (a^b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
DAG.haveNoCommonBitsSet(N0, N1))
@@ -9462,7 +9991,7 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
SDValue MulhRightOp;
if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
unsigned ActiveBits = IsSignExt
- ? Constant->getAPIntValue().getMinSignedBits()
+ ? Constant->getAPIntValue().getSignificantBits()
: Constant->getAPIntValue().getActiveBits();
if (ActiveBits > NarrowVTSize)
return SDValue();
@@ -9499,14 +10028,59 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG,
// we use mulhs. Othewise, zero extends (zext) use mulhu.
unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU;
- // Combine to mulh if mulh is legal/custom for the narrow type on the target.
- if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
- return SDValue();
+ // Combine to mulh if mulh is legal/custom for the narrow type on the target
+ // or if it is a vector type then we could transform to an acceptable type and
+ // rely on legalization to split/combine the result.
+ if (NarrowVT.isVector()) {
+ EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), NarrowVT);
+ if (TransformVT.getVectorElementType() != NarrowVT.getVectorElementType() ||
+ !TLI.isOperationLegalOrCustom(MulhOpcode, TransformVT))
+ return SDValue();
+ } else {
+ if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT))
+ return SDValue();
+ }
SDValue Result =
DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp);
- return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT)
- : DAG.getZExtOrTrunc(Result, DL, WideVT));
+ bool IsSigned = N->getOpcode() == ISD::SRA;
+ return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
+}
+
+// fold (bswap (logic_op(bswap(x),y))) -> logic_op(x,bswap(y))
+// This helper function accept SDNode with opcode ISD::BSWAP and ISD::BITREVERSE
+static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opcode = N->getOpcode();
+ if (Opcode != ISD::BSWAP && Opcode != ISD::BITREVERSE)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) && N0.hasOneUse()) {
+ SDValue OldLHS = N0.getOperand(0);
+ SDValue OldRHS = N0.getOperand(1);
+
+ // If both operands are bswap/bitreverse, ignore the multiuse
+ // Otherwise need to ensure logic_op and bswap/bitreverse(x) have one use.
+ if (OldLHS.getOpcode() == Opcode && OldRHS.getOpcode() == Opcode) {
+ return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
+ OldRHS.getOperand(0));
+ }
+
+ if (OldLHS.getOpcode() == Opcode && OldLHS.hasOneUse()) {
+ SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldRHS);
+ return DAG.getNode(N0.getOpcode(), DL, VT, OldLHS.getOperand(0),
+ NewBitReorder);
+ }
+
+ if (OldRHS.getOpcode() == Opcode && OldRHS.hasOneUse()) {
+ SDValue NewBitReorder = DAG.getNode(Opcode, DL, VT, OldLHS);
+ return DAG.getNode(N0.getOpcode(), DL, VT, NewBitReorder,
+ OldRHS.getOperand(0));
+ }
+ }
+ return SDValue();
}
SDValue DAGCombiner::visitSRA(SDNode *N) {
@@ -9892,8 +10466,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
}
- // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit).
+ // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit), and x has a power
+ // of two bitwidth. The "5" represents (log2 (bitwidth x)).
if (N1C && N0.getOpcode() == ISD::CTLZ &&
+ isPowerOf2_32(OpSizeInBits) &&
N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
@@ -9912,7 +10488,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// could be set on input to the CTLZ node. If this bit is set, the SRL
// will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
// to an SRL/XOR pair, which is likely to simplify more.
- unsigned ShAmt = UnknownBits.countTrailingZeros();
+ unsigned ShAmt = UnknownBits.countr_zero();
SDValue Op = N0.getOperand(0);
if (ShAmt) {
@@ -10138,13 +10714,23 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) {
return SDValue();
}
-// Given a ABS node, detect the following pattern:
+// Given a ABS node, detect the following patterns:
// (ABS (SUB (EXTEND a), (EXTEND b))).
+// (TRUNC (ABS (SUB (EXTEND a), (EXTEND b)))).
// Generates UABD/SABD instruction.
SDValue DAGCombiner::foldABSToABD(SDNode *N) {
+ EVT SrcVT = N->getValueType(0);
+
+ if (N->getOpcode() == ISD::TRUNCATE)
+ N = N->getOperand(0).getNode();
+
+ if (N->getOpcode() != ISD::ABS)
+ return SDValue();
+
EVT VT = N->getValueType(0);
SDValue AbsOp1 = N->getOperand(0);
SDValue Op0, Op1;
+ SDLoc DL(N);
if (AbsOp1.getOpcode() != ISD::SUB)
return SDValue();
@@ -10157,9 +10743,11 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
if (Opc0 != Op1.getOpcode() ||
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) {
// fold (abs (sub nsw x, y)) -> abds(x, y)
- if (AbsOp1->getFlags().hasNoSignedWrap() &&
- TLI.isOperationLegalOrCustom(ISD::ABDS, VT))
- return DAG.getNode(ISD::ABDS, SDLoc(N), VT, Op0, Op1);
+ if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
+ TLI.preferABDSToABSWithNSW(VT)) {
+ SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
+ return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
+ }
return SDValue();
}
@@ -10170,17 +10758,20 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) {
// fold abs(sext(x) - sext(y)) -> zext(abds(x, y))
// fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
// NOTE: Extensions must be equivalent.
- if (VT1 == VT2 && TLI.isOperationLegalOrCustom(ABDOpcode, VT1)) {
+ if (VT1 == VT2 && hasOperation(ABDOpcode, VT1)) {
Op0 = Op0.getOperand(0);
Op1 = Op1.getOperand(0);
- SDValue ABD = DAG.getNode(ABDOpcode, SDLoc(N), VT1, Op0, Op1);
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, ABD);
+ SDValue ABD = DAG.getNode(ABDOpcode, DL, VT1, Op0, Op1);
+ ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD);
+ return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
}
// fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
// fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
- if (TLI.isOperationLegalOrCustom(ABDOpcode, VT))
- return DAG.getNode(ABDOpcode, SDLoc(N), VT, Op0, Op1);
+ if (hasOperation(ABDOpcode, VT)) {
+ SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
+ return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
+ }
return SDValue();
}
@@ -10190,8 +10781,8 @@ SDValue DAGCombiner::visitABS(SDNode *N) {
EVT VT = N->getValueType(0);
// fold (abs c1) -> c2
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
- return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, SDLoc(N), VT, {N0}))
+ return C;
// fold (abs (abs x)) -> (abs x)
if (N0.getOpcode() == ISD::ABS)
return N0;
@@ -10277,6 +10868,9 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
}
}
+ if (SDValue V = foldBitOrderCrossLogicOp(N, DAG))
+ return V;
+
return SDValue();
}
@@ -10447,7 +11041,8 @@ SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
if (NegRHS == False) {
SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue,
False, CC, TLI, DAG);
- return DAG.getNode(ISD::FNEG, DL, VT, Combined);
+ if (Combined)
+ return DAG.getNode(ISD::FNEG, DL, VT, Combined);
}
}
}
@@ -11091,6 +11686,23 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
if (ISD::isConstantSplatVectorAllZeros(Mask.getNode()))
return Chain;
+ // Remove a masked store if base pointers and masks are equal.
+ if (MaskedStoreSDNode *MST1 = dyn_cast<MaskedStoreSDNode>(Chain)) {
+ if (MST->isUnindexed() && MST->isSimple() && MST1->isUnindexed() &&
+ MST1->isSimple() && MST1->getBasePtr() == Ptr &&
+ !MST->getBasePtr().isUndef() &&
+ ((Mask == MST1->getMask() && MST->getMemoryVT().getStoreSize() ==
+ MST1->getMemoryVT().getStoreSize()) ||
+ ISD::isConstantSplatVectorAllOnes(Mask.getNode())) &&
+ TypeSize::isKnownLE(MST1->getMemoryVT().getStoreSize(),
+ MST->getMemoryVT().getStoreSize())) {
+ CombineTo(MST1, MST1->getChain());
+ if (N->getOpcode() != ISD::DELETED_NODE)
+ AddToWorklist(N);
+ return SDValue(N, 0);
+ }
+ }
+
// If this is a masked load with an all ones mask, we can use a unmasked load.
// FIXME: Can we do this for indexed, compressing, or truncating stores?
if (ISD::isConstantSplatVectorAllOnes(Mask.getNode()) && MST->isUnindexed() &&
@@ -11391,6 +12003,38 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
}
}
+ // Match VSELECTs with absolute difference patterns.
+ // (vselect (setcc a, b, set?gt), (sub a, b), (sub b, a)) --> (abd? a, b)
+ // (vselect (setcc a, b, set?ge), (sub a, b), (sub b, a)) --> (abd? a, b)
+ // (vselect (setcc a, b, set?lt), (sub b, a), (sub a, b)) --> (abd? a, b)
+ // (vselect (setcc a, b, set?le), (sub b, a), (sub a, b)) --> (abd? a, b)
+ if (N1.getOpcode() == ISD::SUB && N2.getOpcode() == ISD::SUB &&
+ N1.getOperand(0) == N2.getOperand(1) &&
+ N1.getOperand(1) == N2.getOperand(0)) {
+ bool IsSigned = isSignedIntSetCC(CC);
+ unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
+ if (hasOperation(ABDOpc, VT)) {
+ switch (CC) {
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ if (LHS == N1.getOperand(0) && RHS == N1.getOperand(1))
+ return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+ break;
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ if (RHS == N1.getOperand(0) && LHS == N1.getOperand(1) )
+ return DAG.getNode(ABDOpc, DL, VT, LHS, RHS);
+ break;
+ default:
+ break;
+ }
+ }
+ }
+
// Match VSELECTs into add with unsigned saturation.
if (hasOperation(ISD::UADDSAT, VT)) {
// Check if one of the arms of the VSELECT is vector with all bits set.
@@ -11612,57 +12256,6 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
EVT VT = N->getValueType(0);
- // SETCC(FREEZE(X), CONST, Cond)
- // =>
- // FREEZE(SETCC(X, CONST, Cond))
- // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
- // isn't equivalent to true or false.
- // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
- // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
- //
- // This transformation is beneficial because visitBRCOND can fold
- // BRCOND(FREEZE(X)) to BRCOND(X).
-
- // Conservatively optimize integer comparisons only.
- if (PreferSetCC) {
- // Do this only when SETCC is going to be used by BRCOND.
-
- SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- bool Updated = false;
-
- // Is 'X Cond C' always true or false?
- auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
- bool False = (Cond == ISD::SETULT && C->isZero()) ||
- (Cond == ISD::SETLT && C->isMinSignedValue()) ||
- (Cond == ISD::SETUGT && C->isAllOnes()) ||
- (Cond == ISD::SETGT && C->isMaxSignedValue());
- bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
- (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
- (Cond == ISD::SETUGE && C->isZero()) ||
- (Cond == ISD::SETGE && C->isMinSignedValue());
- return True || False;
- };
-
- if (N0->getOpcode() == ISD::FREEZE && N0.hasOneUse() && N1C) {
- if (!IsAlwaysTrueOrFalse(Cond, N1C)) {
- N0 = N0->getOperand(0);
- Updated = true;
- }
- }
- if (N1->getOpcode() == ISD::FREEZE && N1.hasOneUse() && N0C) {
- if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond),
- N0C)) {
- N1 = N1->getOperand(0);
- Updated = true;
- }
- }
-
- if (Updated)
- return DAG.getFreeze(DAG.getSetCC(SDLoc(N), VT, N0, N1, Cond));
- }
-
SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond,
SDLoc(N), !PreferSetCC);
@@ -11733,7 +12326,8 @@ static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) {
/// This function is called by the DAGCombiner when visiting sext/zext/aext
/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG,
+ CombineLevel Level) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -11758,10 +12352,14 @@ static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI,
else if (Opcode == ISD::ZERO_EXTEND)
ExtLoadOpcode = ISD::ZEXTLOAD;
+ // Illegal VSELECT may ISel fail if happen after legalization (DAG
+ // Combine2), so we should conservatively check the OperationAction.
LoadSDNode *Load1 = cast<LoadSDNode>(Op1);
LoadSDNode *Load2 = cast<LoadSDNode>(Op2);
if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) ||
- !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()))
+ !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) ||
+ (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes &&
+ TLI.getOperationAction(ISD::VSELECT, VT) != TargetLowering::Legal))
return SDValue();
SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1);
@@ -11782,11 +12380,7 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
EVT VT = N->getValueType(0);
SDLoc DL(N);
- assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
- Opcode == ISD::ANY_EXTEND ||
- Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
- Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
- Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ assert((ISD::isExtOpcode(Opcode) || ISD::isExtVecInRegOpcode(Opcode)) &&
"Expected EXTEND dag node in input!");
// fold (sext c1) -> c1
@@ -12052,8 +12646,7 @@ SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
// and/or/xor
SDValue N0 = N->getOperand(0);
- if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
- N0.getOpcode() == ISD::XOR) ||
+ if (!ISD::isBitwiseLogicOp(N0.getOpcode()) ||
N0.getOperand(1).getOpcode() != ISD::Constant ||
(LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
return SDValue();
@@ -12449,11 +13042,19 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
+ // fold (sext (aext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
+ // fold (sext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
+ return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
+ N0.getOperand(0));
+
// fold (sext (sext_inreg x)) -> (sext (trunc x))
if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
SDValue N00 = N0.getOperand(0);
EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
- if (N00.getOpcode() == ISD::TRUNCATE && (!LegalOperations || TLI.isTypeLegal(ExtVT))) {
+ if (N00.getOpcode() == ISD::TRUNCATE &&
+ (!LegalTypes || TLI.isTypeLegal(ExtVT))) {
SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00.getOperand(0));
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
}
@@ -12532,8 +13133,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext (and/or/xor (load x), cst)) ->
// (and/or/xor (sextload x), (sext cst))
- if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
- N0.getOpcode() == ISD::XOR) &&
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
@@ -12630,45 +13230,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
}
- if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
return Res;
return SDValue();
}
-// isTruncateOf - If N is a truncate of some other value, return true, record
-// the value being truncated in Op and which of Op's bits are zero/one in Known.
-// This function computes KnownBits to avoid a duplicated call to
-// computeKnownBits in the caller.
-static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
- KnownBits &Known) {
- if (N->getOpcode() == ISD::TRUNCATE) {
- Op = N->getOperand(0);
- Known = DAG.computeKnownBits(Op);
- return true;
- }
-
- if (N.getOpcode() != ISD::SETCC ||
- N.getValueType().getScalarType() != MVT::i1 ||
- cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
- return false;
-
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- assert(Op0.getValueType() == Op1.getValueType());
-
- if (isNullOrNullSplat(Op0))
- Op = Op1;
- else if (isNullOrNullSplat(Op1))
- Op = Op0;
- else
- return false;
-
- Known = DAG.computeKnownBits(Op);
-
- return (Known.Zero | 1).isAllOnes();
-}
-
/// Given an extending node with a pop-count operand, if the target does not
/// support a pop-count in the narrow source type but does support it in the
/// destination type, widen the pop-count to the destination type.
@@ -12722,14 +13289,15 @@ static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) {
SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
if (VT.isVector())
- if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N)))
+ if (SDValue FoldedVOp = SimplifyVCastOp(N, DL))
return FoldedVOp;
// zext(undef) = 0
if (N0.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
return Res;
@@ -12737,7 +13305,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (zext x)) -> (zext x)
// fold (zext (aext x)) -> (zext x)
if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
+ return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+
+ // fold (zext (aext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
+ // fold (zext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)
+ return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(N), VT,
N0.getOperand(0));
// fold (zext (truncate x)) -> (zext x) or
@@ -12754,7 +13328,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
std::min(Op.getScalarValueSizeInBits(),
VT.getScalarSizeInBits()));
if (TruncatedBits.isSubsetOf(Known.Zero))
- return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ return DAG.getZExtOrTrunc(Op, DL, VT);
}
// fold (zext (truncate x)) -> (and x, mask)
@@ -12780,9 +13354,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
SDValue Op = N0.getOperand(0);
- Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
+ Op = DAG.getZeroExtendInReg(Op, DL, MinVT);
AddToWorklist(Op.getNode());
- SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
+ SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT);
// Transfer the debug info; the new node is equivalent to N0.
DAG.transferDbgValues(N0, ZExtOrTrunc);
return ZExtOrTrunc;
@@ -12790,9 +13364,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
- SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
+ SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), DL, VT);
AddToWorklist(Op.getNode());
- SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT);
+ SDValue And = DAG.getZeroExtendInReg(Op, DL, MinVT);
// We may safely transfer the debug info describing the truncate node over
// to the equivalent and operation.
DAG.transferDbgValues(N0, And);
@@ -12811,7 +13385,6 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDValue X = N0.getOperand(0).getOperand(0);
X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
- SDLoc DL(N);
return DAG.getNode(ISD::AND, DL, VT,
X, DAG.getConstant(Mask, DL, VT));
}
@@ -12836,8 +13409,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// (and/or/xor (zextload x), (zext cst))
// Unless (and (load x) cst) will match as a zextload already and has
// additional users.
- if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
- N0.getOpcode() == ISD::XOR) &&
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) &&
isa<LoadSDNode>(N0.getOperand(0)) &&
N0.getOperand(1).getOpcode() == ISD::Constant &&
(!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
@@ -12865,7 +13437,6 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
LN00->getMemoryVT(),
LN00->getMemOperand());
APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
- SDLoc DL(N);
SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
ExtLoad, DAG.getConstant(Mask, DL, VT));
ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
@@ -12919,7 +13490,6 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// that matter). Check to see that they are the same size. If so, we know
// that the element size of the sext'd result matches the element size of
// the compare operands.
- SDLoc DL(N);
if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
// zext(setcc) -> zext_in_reg(vsetcc) for vectors.
SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
@@ -12939,7 +13509,6 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
// zext(setcc x,y,cc) -> zext(select x, y, true, false, cc)
- SDLoc DL(N);
EVT N0VT = N0.getValueType();
EVT N00VT = N0.getOperand(0).getValueType();
if (SDValue SCC = SimplifySelectCC(
@@ -12952,29 +13521,29 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// (zext (shl (zext x), cst)) -> (shl (zext x), cst)
if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
- isa<ConstantSDNode>(N0.getOperand(1)) &&
- N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
- N0.hasOneUse()) {
+ !TLI.isZExtFree(N0, VT)) {
+ SDValue ShVal = N0.getOperand(0);
SDValue ShAmt = N0.getOperand(1);
- if (N0.getOpcode() == ISD::SHL) {
- SDValue InnerZExt = N0.getOperand(0);
- // If the original shl may be shifting out bits, do not perform this
- // transformation.
- unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
- InnerZExt.getOperand(0).getValueSizeInBits();
- if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
- return SDValue();
- }
-
- SDLoc DL(N);
+ if (auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt)) {
+ if (ShVal.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::SHL) {
+ // If the original shl may be shifting out bits, do not perform this
+ // transformation.
+ // TODO: Add MaskedValueIsZero check.
+ unsigned KnownZeroBits = ShVal.getValueSizeInBits() -
+ ShVal.getOperand(0).getValueSizeInBits();
+ if (ShAmtC->getAPIntValue().ugt(KnownZeroBits))
+ return SDValue();
+ }
- // Ensure that the shift amount is wide enough for the shifted value.
- if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
- ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+ // Ensure that the shift amount is wide enough for the shifted value.
+ if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits())
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
- return DAG.getNode(N0.getOpcode(), DL, VT,
- DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
- ShAmt);
+ return DAG.getNode(N0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ShVal), ShAmt);
+ }
+ }
}
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
@@ -12986,7 +13555,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (SDValue V = widenAbs(N, DAG))
return V;
- if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
return Res;
return SDValue();
@@ -13011,6 +13580,14 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
N0.getOpcode() == ISD::SIGN_EXTEND)
return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+ // fold (aext (aext_extend_vector_inreg x)) -> (aext_extend_vector_inreg x)
+ // fold (aext (zext_extend_vector_inreg x)) -> (zext_extend_vector_inreg x)
+ // fold (aext (sext_extend_vector_inreg x)) -> (sext_extend_vector_inreg x)
+ if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG ||
+ N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG)
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
+
// fold (aext (truncate (load x))) -> (aext (smaller load x))
// fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
if (N0.getOpcode() == ISD::TRUNCATE) {
@@ -13147,7 +13724,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
if (SDValue NewCtPop = widenCtPop(N, DAG))
return NewCtPop;
- if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG))
+ if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level))
return Res;
return SDValue();
@@ -13305,7 +13882,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
const APInt &Mask = AndC->getAPIntValue();
unsigned ActiveBits = 0;
if (Mask.isMask()) {
- ActiveBits = Mask.countTrailingOnes();
+ ActiveBits = Mask.countr_one();
} else if (Mask.isShiftedMask(ShAmt, ActiveBits)) {
HasShiftedOffset = true;
} else {
@@ -13373,8 +13950,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
isa<ConstantSDNode>(Mask->getOperand(1))) {
const APInt& ShiftMask = Mask->getConstantOperandAPInt(1);
if (ShiftMask.isMask()) {
- EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
- ShiftMask.countTrailingOnes());
+ EVT MaskedVT =
+ EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countr_one());
// If the mask is smaller, recompute the type.
if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
@@ -13520,9 +14097,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
// if x is small enough or if we know that x has more than 1 sign bit and the
// sign_extend_inreg is extending from one of them.
- if (N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
- N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
- N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
+ if (ISD::isExtVecInRegOpcode(N0.getOpcode())) {
SDValue N00 = N0.getOperand(0);
unsigned N00Bits = N00.getScalarValueSizeInBits();
unsigned DstElts = N0.getValueType().getVectorMinNumElements();
@@ -13543,7 +14118,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue N00 = N0.getOperand(0);
if (N00.getScalarValueSizeInBits() == ExtVTBits &&
(!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
- return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
+ return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
}
// fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
@@ -13690,9 +14265,7 @@ foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI,
Src.getValueType().getVectorElementType(),
VT.getVectorElementCount());
- assert((InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
- InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
- InregOpcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
+ assert(ISD::isExtVecInRegOpcode(InregOpcode) &&
"Expected EXTEND_VECTOR_INREG dag node in input!");
// Profitability check: our operand must be an one-use CONCAT_VECTORS.
@@ -13752,11 +14325,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
// fold (truncate c1) -> c1
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
- SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
- if (C.getNode() != N)
- return C;
- }
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, SDLoc(N), VT, {N0}))
+ return C;
// fold (truncate (ext x)) -> (ext x) or (truncate x) or x
if (N0.getOpcode() == ISD::ZERO_EXTEND ||
@@ -13860,6 +14430,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
if (SDValue V = foldSubToUSubSat(VT, N0.getNode()))
return V;
+ if (SDValue ABD = foldABSToABD(N))
+ return ABD;
+
// Attempt to pre-truncate BUILD_VECTOR sources.
if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) &&
@@ -14036,12 +14609,13 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
break;
case ISD::ADDE:
- case ISD::ADDCARRY:
+ case ISD::UADDO_CARRY:
// (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
- // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
+ // (trunc uaddo_carry(X, Y, Carry)) ->
+ // (uaddo_carry trunc(X), trunc(Y), Carry)
// When the adde's carry is not used.
- // We only do for addcarry before legalize operation
- if (((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
+ // We only do for uaddo_carry before legalize operation
+ if (((!LegalOperations && N0.getOpcode() == ISD::UADDO_CARRY) ||
TLI.isOperationLegal(N0.getOpcode(), VT)) &&
N0.hasOneUse() && !N0->hasAnyUseOfValue(1)) {
SDLoc DL(N);
@@ -14114,18 +14688,19 @@ static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
return DAG.getDataLayout().isBigEndian() ? 1 : 0;
}
-static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
- const TargetLowering &TLI) {
+SDValue DAGCombiner::foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
// If this is not a bitcast to an FP type or if the target doesn't have
// IEEE754-compliant FP logic, we're done.
EVT VT = N->getValueType(0);
- if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
+ SDValue N0 = N->getOperand(0);
+ EVT SourceVT = N0.getValueType();
+
+ if (!VT.isFloatingPoint())
return SDValue();
// TODO: Handle cases where the integer constant is a different scalar
// bitwidth to the FP.
- SDValue N0 = N->getOperand(0);
- EVT SourceVT = N0.getValueType();
if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
return SDValue();
@@ -14148,6 +14723,19 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+ if (LegalOperations && !TLI.isOperationLegal(FPOpcode, VT))
+ return SDValue();
+
+ // This needs to be the inverse of logic in foldSignChangeInBitcast.
+ // FIXME: I don't think looking for bitcast intrinsically makes sense, but
+ // removing this would require more changes.
+ auto IsBitCastOrFree = [&TLI, FPOpcode](SDValue Op, EVT VT) {
+ if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT)
+ return true;
+
+ return FPOpcode == ISD::FABS ? TLI.isFAbsFree(VT) : TLI.isFNegFree(VT);
+ };
+
// Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
// Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
// Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
@@ -14155,9 +14743,9 @@ static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
SDValue LogicOp0 = N0.getOperand(0);
ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
- LogicOp0.getOpcode() == ISD::BITCAST &&
- LogicOp0.getOperand(0).getValueType() == VT) {
- SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
+ IsBitCastOrFree(LogicOp0, VT)) {
+ SDValue CastOp0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, LogicOp0);
+ SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, CastOp0);
NumFPLogicOpsConv++;
if (N0.getOpcode() == ISD::OR)
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
@@ -14209,6 +14797,22 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (N0.getOpcode() == ISD::BITCAST)
return DAG.getBitcast(VT, N0.getOperand(0));
+ // fold (conv (logicop (conv x), (c))) -> (logicop x, (conv c))
+ // iff the current bitwise logicop type isn't legal
+ if (ISD::isBitwiseLogicOp(N0.getOpcode()) && VT.isInteger() &&
+ !TLI.isTypeLegal(N0.getOperand(0).getValueType())) {
+ auto IsFreeBitcast = [VT](SDValue V) {
+ return (V.getOpcode() == ISD::BITCAST &&
+ V.getOperand(0).getValueType() == VT) ||
+ (ISD::isBuildVectorOfConstantSDNodes(V.getNode()) &&
+ V->hasOneUse());
+ };
+ if (IsFreeBitcast(N0.getOperand(0)) && IsFreeBitcast(N0.getOperand(1)))
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
+ DAG.getBitcast(VT, N0.getOperand(0)),
+ DAG.getBitcast(VT, N0.getOperand(1)));
+ }
+
// fold (conv (load x)) -> (load (conv*)x)
// If the resultant load doesn't need a higher alignment than the original!
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
@@ -14437,7 +15041,9 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
N0->getNumValues() != 1 || !N0->hasOneUse())
return SDValue();
- bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR;
+ bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
+ N0.getOpcode() == ISD::BUILD_PAIR ||
+ N0.getOpcode() == ISD::CONCAT_VECTORS;
SmallSetVector<SDValue, 8> MaybePoisonOperands;
for (SDValue Op : N0->ops()) {
@@ -14474,6 +15080,10 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
}
}
+ // This node has been merged with another.
+ if (N->getOpcode() == ISD::DELETED_NODE)
+ return SDValue(N, 0);
+
// The whole node may have been updated, so the value we were holding
// may no longer be valid. Re-fetch the operand we're `freeze`ing.
N0 = N->getOperand(0);
@@ -14585,21 +15195,26 @@ static bool hasNoInfs(const TargetOptions &Options, SDValue N) {
}
/// Try to perform FMA combining on a given FADD node.
+template <class MatchContextClass>
SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
-
+ MatchContextClass matcher(DAG, TLI, N);
const TargetOptions &Options = DAG.getTarget().Options;
+ bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
+
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
+ // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
+ // FIXME: Add VP_FMAD opcode.
+ bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
- (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+ (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
@@ -14613,6 +15228,13 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
if (!AllowFusionGlobally && !N->getFlags().hasAllowContract())
return SDValue();
+ // Folding fadd (fmul x, y), (fmul x, y) -> fma x, y, (fmul x, y) is never
+ // beneficial. It does not reduce latency. It increases register pressure. It
+ // replaces an fadd with an fma which is a more complex instruction, so is
+ // likely to have a larger encoding, use more functional units, etc.
+ if (N0 == N1)
+ return SDValue();
+
if (TLI.generateFMAsInMachineCombiner(VT, OptLevel))
return SDValue();
@@ -14621,14 +15243,13 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
auto isFusedOp = [&](SDValue N) {
- unsigned Opcode = N.getOpcode();
- return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
};
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
- auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
- if (N.getOpcode() != ISD::FMUL)
+ auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
+ if (!matcher.match(N, ISD::FMUL))
return false;
return AllowFusionGlobally || N->getFlags().hasAllowContract();
};
@@ -14641,15 +15262,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// fold (fadd (fmul x, y), z) -> (fma x, y, z)
if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
- N0.getOperand(1), N1);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
+ N0.getOperand(1), N1);
}
// fold (fadd x, (fmul y, z)) -> (fma y, z, x)
// Note: Commutes FADD operands.
if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
- N1.getOperand(1), N0);
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(0),
+ N1.getOperand(1), N0);
}
// fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
@@ -14673,10 +15294,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
SDValue TmpFMA = FMA;
while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) {
SDValue FMul = TmpFMA->getOperand(2);
- if (FMul.getOpcode() == ISD::FMUL && FMul.hasOneUse()) {
+ if (matcher.match(FMul, ISD::FMUL) && FMul.hasOneUse()) {
SDValue C = FMul.getOperand(0);
SDValue D = FMul.getOperand(1);
- SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
+ SDValue CDE = matcher.getNode(PreferredFusedOpcode, SL, VT, C, D, E);
DAG.ReplaceAllUsesOfValueWith(FMul, CDE);
// Replacing the inner FMul could cause the outer FMA to be simplified
// away.
@@ -14690,29 +15311,29 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// Look through FP_EXTEND nodes to do more combining.
// fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
- if (N0.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N0, ISD::FP_EXTEND)) {
SDValue N00 = N0.getOperand(0);
if (isContractableFMUL(N00) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
- N1);
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), N1);
}
}
// fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
// Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N1, ISD::FP_EXTEND)) {
SDValue N10 = N1.getOperand(0);
if (isContractableFMUL(N10) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N10.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)),
- N0);
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
}
}
@@ -14722,15 +15343,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// -> (fma x, y, (fma (fpext u), (fpext v), z))
auto FoldFAddFMAFPExtFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
SDValue Z) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT, X, Y,
+ matcher.getNode(PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
};
if (isFusedOp(N0)) {
SDValue N02 = N0.getOperand(2);
- if (N02.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N02, ISD::FP_EXTEND)) {
SDValue N020 = N02.getOperand(0);
if (isContractableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
@@ -14749,12 +15370,13 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// interesting for all targets, especially GPUs.
auto FoldFAddFPExtFMAFMul = [&](SDValue X, SDValue Y, SDValue U, SDValue V,
SDValue Z) {
- return DAG.getNode(
- PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, X),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ matcher.getNode(PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, U),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, V), Z));
};
if (N0.getOpcode() == ISD::FP_EXTEND) {
SDValue N00 = N0.getOperand(0);
@@ -14810,20 +15432,26 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
/// Try to perform FMA combining on a given FSUB node.
+template <class MatchContextClass>
SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
SDLoc SL(N);
-
+ MatchContextClass matcher(DAG, TLI, N);
const TargetOptions &Options = DAG.getTarget().Options;
+
+ bool UseVP = std::is_same_v<MatchContextClass, VPMatchContext>;
+
// Floating-point multiply-add with intermediate rounding.
- bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
+ // FIXME: Make isFMADLegal have specific behavior when using VPMatchContext.
+ // FIXME: Add VP_FMAD opcode.
+ bool HasFMAD = !UseVP && (LegalOperations && TLI.isFMADLegal(DAG, N));
// Floating-point multiply-add without intermediate rounding.
bool HasFMA =
TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) &&
- (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
+ (!LegalOperations || matcher.isOperationLegalOrCustom(ISD::FMA, VT));
// No valid opcode, do not combine.
if (!HasFMAD && !HasFMA)
@@ -14847,8 +15475,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
- auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
- if (N.getOpcode() != ISD::FMUL)
+ auto isContractableFMUL = [AllowFusionGlobally, &matcher](SDValue N) {
+ if (!matcher.match(N, ISD::FMUL))
return false;
return AllowFusionGlobally || N->getFlags().hasAllowContract();
};
@@ -14856,8 +15484,9 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) {
if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
- XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z));
+ return matcher.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0),
+ XY.getOperand(1),
+ matcher.getNode(ISD::FNEG, SL, VT, Z));
}
return SDValue();
};
@@ -14866,9 +15495,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// Note: Commutes FSUB operands.
auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) {
if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
- YZ.getOperand(1), X);
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)),
+ YZ.getOperand(1), X);
}
return SDValue();
};
@@ -14893,44 +15523,46 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
- if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
+ if (matcher.match(N0, ISD::FNEG) && isContractableFMUL(N0.getOperand(0)) &&
(Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
SDValue N00 = N0.getOperand(0).getOperand(0);
SDValue N01 = N0.getOperand(0).getOperand(1);
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+ return matcher.getNode(PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT, N00), N01,
+ matcher.getNode(ISD::FNEG, SL, VT, N1));
}
// Look through FP_EXTEND nodes to do more combining.
// fold (fsub (fpext (fmul x, y)), z)
// -> (fma (fpext x), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N0, ISD::FP_EXTEND)) {
SDValue N00 = N0.getOperand(0);
if (isContractableFMUL(N00) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ matcher.getNode(ISD::FNEG, SL, VT, N1));
}
}
// fold (fsub x, (fpext (fmul y, z)))
// -> (fma (fneg (fpext y)), (fpext z), x)
// Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N1, ISD::FP_EXTEND)) {
SDValue N10 = N1.getOperand(0);
if (isContractableFMUL(N10) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N10.getValueType())) {
- return DAG.getNode(
+ return matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
+ matcher.getNode(
+ ISD::FNEG, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0))),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), N0);
}
}
@@ -14940,19 +15572,20 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
// orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
// from implementing the canonicalization in visitFSUB.
- if (N0.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N0, ISD::FP_EXTEND)) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FNEG) {
+ if (matcher.match(N00, ISD::FNEG)) {
SDValue N000 = N00.getOperand(0);
if (isContractableFMUL(N000) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(
+ return matcher.getNode(
ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
- N1));
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
+ N1));
}
}
}
@@ -14963,24 +15596,25 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
// orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
// from implementing the canonicalization in visitFSUB.
- if (N0.getOpcode() == ISD::FNEG) {
+ if (matcher.match(N0, ISD::FNEG)) {
SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N00, ISD::FP_EXTEND)) {
SDValue N000 = N00.getOperand(0);
if (isContractableFMUL(N000) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N000.getValueType())) {
- return DAG.getNode(
+ return matcher.getNode(
ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
- N1));
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)),
+ N1));
}
}
}
- auto isReassociable = [Options](SDNode *N) {
+ auto isReassociable = [&Options](SDNode *N) {
return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
};
@@ -14990,8 +15624,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
};
auto isFusedOp = [&](SDValue N) {
- unsigned Opcode = N.getOpcode();
- return Opcode == ISD::FMA || Opcode == ISD::FMAD;
+ return matcher.match(N, ISD::FMA) || matcher.match(N, ISD::FMAD);
};
// More folding opportunities when target permits.
@@ -15002,12 +15635,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
if (CanFuse && isFusedOp(N0) &&
isContractableAndReassociableFMUL(N0.getOperand(2)) &&
N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0),
- N0.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(2).getOperand(0),
- N0.getOperand(2).getOperand(1),
- DAG.getNode(ISD::FNEG, SL, VT, N1)));
+ return matcher.getNode(
+ PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
+ matcher.getNode(PreferredFusedOpcode, SL, VT,
+ N0.getOperand(2).getOperand(0),
+ N0.getOperand(2).getOperand(1),
+ matcher.getNode(ISD::FNEG, SL, VT, N1)));
}
// fold (fsub x, (fma y, z, (fmul u, v)))
@@ -15017,29 +15650,30 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N1->hasOneUse() && NoSignedZero) {
SDValue N20 = N1.getOperand(2).getOperand(0);
SDValue N21 = N1.getOperand(2).getOperand(1);
- return DAG.getNode(
+ return matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
+ matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ matcher.getNode(PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT, N20), N21, N0));
}
// fold (fsub (fma x, y, (fpext (fmul u, v))), z)
// -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
if (isFusedOp(N0) && N0->hasOneUse()) {
SDValue N02 = N0.getOperand(2);
- if (N02.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N02, ISD::FP_EXTEND)) {
SDValue N020 = N02.getOperand(0);
if (isContractableAndReassociableFMUL(N020) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N020.getValueType())) {
- return DAG.getNode(
+ return matcher.getNode(
PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(
+ matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1)));
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)),
+ matcher.getNode(ISD::FNEG, SL, VT, N1)));
}
}
}
@@ -15050,29 +15684,29 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- if (N0.getOpcode() == ISD::FP_EXTEND) {
+ if (matcher.match(N0, ISD::FP_EXTEND)) {
SDValue N00 = N0.getOperand(0);
if (isFusedOp(N00)) {
SDValue N002 = N00.getOperand(2);
if (isContractableAndReassociableFMUL(N002) &&
TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT,
N00.getValueType())) {
- return DAG.getNode(
+ return matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
- DAG.getNode(
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)),
+ matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1)));
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(0)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)),
+ matcher.getNode(ISD::FNEG, SL, VT, N1)));
}
}
}
// fold (fsub x, (fma y, z, (fpext (fmul u, v))))
// -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
- if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND &&
+ if (isFusedOp(N1) && matcher.match(N1.getOperand(2), ISD::FP_EXTEND) &&
N1->hasOneUse()) {
SDValue N120 = N1.getOperand(2).getOperand(0);
if (isContractableAndReassociableFMUL(N120) &&
@@ -15080,13 +15714,15 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N120.getValueType())) {
SDValue N1200 = N120.getOperand(0);
SDValue N1201 = N120.getOperand(1);
- return DAG.getNode(
+ return matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), N1.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
+ matcher.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N1200)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N1201), N0));
}
}
@@ -15096,7 +15732,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// FIXME: This turns two single-precision and one double-precision
// operation into two double-precision operations, which might not be
// interesting for all targets, especially GPUs.
- if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) {
+ if (matcher.match(N1, ISD::FP_EXTEND) && isFusedOp(N1.getOperand(0))) {
SDValue CvtSrc = N1.getOperand(0);
SDValue N100 = CvtSrc.getOperand(0);
SDValue N101 = CvtSrc.getOperand(1);
@@ -15106,15 +15742,16 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
CvtSrc.getValueType())) {
SDValue N1020 = N102.getOperand(0);
SDValue N1021 = N102.getOperand(1);
- return DAG.getNode(
+ return matcher.getNode(
PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N100)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
+ matcher.getNode(ISD::FNEG, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N100)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ matcher.getNode(
+ PreferredFusedOpcode, SL, VT,
+ matcher.getNode(ISD::FNEG, SL, VT,
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N1020)),
+ matcher.getNode(ISD::FP_EXTEND, SL, VT, N1021), N0));
}
}
}
@@ -15217,6 +15854,17 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVP_FADD(SDNode *N) {
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ // FADD -> FMA combines:
+ if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFADD(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -15394,10 +16042,15 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
DAG.getConstantFP(4.0, DL, VT));
}
}
+
+ // Fold fadd(vecreduce(x), vecreduce(y)) -> vecreduce(fadd(x, y))
+ if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FADD, ISD::FADD, DL,
+ VT, N0, N1, Flags))
+ return SD;
} // enable-unsafe-fp-math
// FADD -> FMA combines:
- if (SDValue Fused = visitFADDForFMACombine(N)) {
+ if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
@@ -15507,7 +16160,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1);
// FSUB -> FMA combines:
- if (SDValue Fused = visitFSUBForFMACombine(N)) {
+ if (SDValue Fused = visitFSUBForFMACombine<EmptyMatchContext>(N)) {
AddToWorklist(Fused.getNode());
return Fused;
}
@@ -15568,6 +16221,11 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1);
return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts);
}
+
+ // Fold fmul(vecreduce(x), vecreduce(y)) -> vecreduce(fmul(x, y))
+ if (SDValue SD = reassociateReduction(ISD::VECREDUCE_FMUL, ISD::FMUL, DL,
+ VT, N0, N1, Flags))
+ return SD;
}
// fold (fmul X, 2.0) -> (fadd X, X)
@@ -15653,7 +16311,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitFMA(SDNode *N) {
+template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
@@ -15664,6 +16322,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
const TargetOptions &Options = DAG.getTarget().Options;
// FMA nodes have flags that propagate to the created nodes.
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+ MatchContextClass matcher(DAG, TLI, N);
bool CanReassociate =
Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
@@ -15672,7 +16331,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (isa<ConstantFPSDNode>(N0) &&
isa<ConstantFPSDNode>(N1) &&
isa<ConstantFPSDNode>(N2)) {
- return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
+ return matcher.getNode(ISD::FMA, DL, VT, N0, N1, N2);
}
// (-N0 * -N1) + N2 --> (N0 * N1) + N2
@@ -15688,7 +16347,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1);
if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper ||
CostN1 == TargetLowering::NegatibleCost::Cheaper))
- return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
+ return matcher.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2);
}
// FIXME: use fast math flags instead of Options.UnsafeFPMath
@@ -15699,70 +16358,74 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
return N2;
}
+ // FIXME: Support splat of constant.
if (N0CFP && N0CFP->isExactlyValue(1.0))
- return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
+ return matcher.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
if (N1CFP && N1CFP->isExactlyValue(1.0))
- return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
+ return matcher.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
// Canonicalize (fma c, x, y) -> (fma x, c, y)
if (DAG.isConstantFPBuildVectorOrConstantFP(N0) &&
!DAG.isConstantFPBuildVectorOrConstantFP(N1))
- return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
+ return matcher.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
if (CanReassociate) {
// (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
- if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
+ if (matcher.match(N2, ISD::FMUL) && N0 == N2.getOperand(0) &&
DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
DAG.isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
- return DAG.getNode(ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
+ return matcher.getNode(
+ ISD::FMUL, DL, VT, N0,
+ matcher.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1)));
}
// (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
- if (N0.getOpcode() == ISD::FMUL &&
+ if (matcher.match(N0, ISD::FMUL) &&
DAG.isConstantFPBuildVectorOrConstantFP(N1) &&
DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
- return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)),
- N2);
+ return matcher.getNode(
+ ISD::FMA, DL, VT, N0.getOperand(0),
+ matcher.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1)), N2);
}
}
// (fma x, -1, y) -> (fadd (fneg x), y)
+ // FIXME: Support splat of constant.
if (N1CFP) {
if (N1CFP->isExactlyValue(1.0))
- return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
+ return matcher.getNode(ISD::FADD, DL, VT, N0, N2);
if (N1CFP->isExactlyValue(-1.0) &&
(!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
- SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
+ SDValue RHSNeg = matcher.getNode(ISD::FNEG, DL, VT, N0);
AddToWorklist(RHSNeg.getNode());
- return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
+ return matcher.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
}
// fma (fneg x), K, y -> fma x -K, y
- if (N0.getOpcode() == ISD::FNEG &&
+ if (matcher.match(N0, ISD::FNEG) &&
(TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
- ForCodeSize)))) {
- return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
- DAG.getNode(ISD::FNEG, DL, VT, N1), N2);
+ (N1.hasOneUse() &&
+ !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT, ForCodeSize)))) {
+ return matcher.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
+ matcher.getNode(ISD::FNEG, DL, VT, N1), N2);
}
}
+ // FIXME: Support splat of constant.
if (CanReassociate) {
// (fma x, c, x) -> (fmul x, (c+1))
if (N1CFP && N0 == N2) {
- return DAG.getNode(
- ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(1.0, DL, VT)));
+ return matcher.getNode(ISD::FMUL, DL, VT, N0,
+ matcher.getNode(ISD::FADD, DL, VT, N1,
+ DAG.getConstantFP(1.0, DL, VT)));
}
// (fma x, c, (fneg x)) -> (fmul x, (c-1))
- if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
- return DAG.getNode(
- ISD::FMUL, DL, VT, N0,
- DAG.getNode(ISD::FADD, DL, VT, N1, DAG.getConstantFP(-1.0, DL, VT)));
+ if (N1CFP && matcher.match(N2, ISD::FNEG) && N2.getOperand(0) == N0) {
+ return matcher.getNode(ISD::FMUL, DL, VT, N0,
+ matcher.getNode(ISD::FADD, DL, VT, N1,
+ DAG.getConstantFP(-1.0, DL, VT)));
}
}
@@ -15771,7 +16434,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
if (!TLI.isFNegFree(VT))
if (SDValue Neg = TLI.getCheaperNegatedExpression(
SDValue(N, 0), DAG, LegalOperations, ForCodeSize))
- return DAG.getNode(ISD::FNEG, DL, VT, Neg);
+ return matcher.getNode(ISD::FNEG, DL, VT, Neg);
return SDValue();
}
@@ -16043,27 +16706,30 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
/// copysign(x, fp_extend(y)) -> copysign(x, y)
/// copysign(x, fp_round(y)) -> copysign(x, y)
-static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
- SDValue N1 = N->getOperand(1);
- if ((N1.getOpcode() == ISD::FP_EXTEND ||
- N1.getOpcode() == ISD::FP_ROUND)) {
- EVT N1VT = N1->getValueType(0);
- EVT N1Op0VT = N1->getOperand(0).getValueType();
+/// Operands to the functions are the type of X and Y respectively.
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(EVT XTy, EVT YTy) {
+ // Always fold no-op FP casts.
+ if (XTy == YTy)
+ return true;
- // Always fold no-op FP casts.
- if (N1VT == N1Op0VT)
- return true;
+ // Do not optimize out type conversion of f128 type yet.
+ // For some targets like x86_64, configuration is changed to keep one f128
+ // value in one SSE register, but instruction selection cannot handle
+ // FCOPYSIGN on SSE registers yet.
+ if (YTy == MVT::f128)
+ return false;
- // Do not optimize out type conversion of f128 type yet.
- // For some targets like x86_64, configuration is changed to keep one f128
- // value in one SSE register, but instruction selection cannot handle
- // FCOPYSIGN on SSE registers yet.
- if (N1Op0VT == MVT::f128)
- return false;
+ return !YTy.isVector() || EnableVectorFCopySignExtendRound;
+}
- return !N1Op0VT.isVector() || EnableVectorFCopySignExtendRound;
- }
- return false;
+static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
+ SDValue N1 = N->getOperand(1);
+ if (N1.getOpcode() != ISD::FP_EXTEND &&
+ N1.getOpcode() != ISD::FP_ROUND)
+ return false;
+ EVT N1VT = N1->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0).getValueType();
+ return CanCombineFCOPYSIGN_EXTEND_ROUND(N1VT, N1Op0VT);
}
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
@@ -16399,6 +17065,10 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
+ // Avoid folding legal fp_rounds into non-legal ones.
+ if (!hasOperation(ISD::FP_ROUND, VT))
+ return SDValue();
+
// Skip this folding if it results in an fp_round from f80 to f16.
//
// f80 to f16 always generates an expensive (and as yet, unimplemented)
@@ -16423,7 +17093,13 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
}
// fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
- if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse()) {
+ // Note: From a legality perspective, this is a two step transform. First,
+ // we duplicate the fp_round to the arguments of the copysign, then we
+ // eliminate the fp_round on Y. The second step requires an additional
+ // predicate to match the implementation above.
+ if (N0.getOpcode() == ISD::FCOPYSIGN && N0->hasOneUse() &&
+ CanCombineFCOPYSIGN_EXTEND_ROUND(VT,
+ N0.getValueType())) {
SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
N0.getOperand(0), N1);
AddToWorklist(Tmp.getNode());
@@ -16529,6 +17205,15 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitFFREXP(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+
+ // fold (ffrexp c1) -> ffrexp(c1)
+ if (DAG.isConstantFPBuildVectorOrConstantFP(N0))
+ return DAG.getNode(ISD::FFREXP, SDLoc(N), N->getVTList(), N0);
+ return SDValue();
+}
+
SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -16618,6 +17303,13 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) {
}
}
+ if (SDValue SD = reassociateReduction(
+ PropagatesNaN
+ ? (IsMin ? ISD::VECREDUCE_FMINIMUM : ISD::VECREDUCE_FMAXIMUM)
+ : (IsMin ? ISD::VECREDUCE_FMIN : ISD::VECREDUCE_FMAX),
+ Opc, SDLoc(N), VT, N0, N1, Flags))
+ return SD;
+
return SDValue();
}
@@ -16656,6 +17348,55 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
N1->getOperand(0), N2);
}
+ // Variant of the previous fold where there is a SETCC in between:
+ // BRCOND(SETCC(FREEZE(X), CONST, Cond))
+ // =>
+ // BRCOND(FREEZE(SETCC(X, CONST, Cond)))
+ // =>
+ // BRCOND(SETCC(X, CONST, Cond))
+ // This is correct if FREEZE(X) has one use and SETCC(FREEZE(X), CONST, Cond)
+ // isn't equivalent to true or false.
+ // For example, SETCC(FREEZE(X), -128, SETULT) cannot be folded to
+ // FREEZE(SETCC(X, -128, SETULT)) because X can be poison.
+ if (N1->getOpcode() == ISD::SETCC && N1.hasOneUse()) {
+ SDValue S0 = N1->getOperand(0), S1 = N1->getOperand(1);
+ ISD::CondCode Cond = cast<CondCodeSDNode>(N1->getOperand(2))->get();
+ ConstantSDNode *S0C = dyn_cast<ConstantSDNode>(S0);
+ ConstantSDNode *S1C = dyn_cast<ConstantSDNode>(S1);
+ bool Updated = false;
+
+ // Is 'X Cond C' always true or false?
+ auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) {
+ bool False = (Cond == ISD::SETULT && C->isZero()) ||
+ (Cond == ISD::SETLT && C->isMinSignedValue()) ||
+ (Cond == ISD::SETUGT && C->isAllOnes()) ||
+ (Cond == ISD::SETGT && C->isMaxSignedValue());
+ bool True = (Cond == ISD::SETULE && C->isAllOnes()) ||
+ (Cond == ISD::SETLE && C->isMaxSignedValue()) ||
+ (Cond == ISD::SETUGE && C->isZero()) ||
+ (Cond == ISD::SETGE && C->isMinSignedValue());
+ return True || False;
+ };
+
+ if (S0->getOpcode() == ISD::FREEZE && S0.hasOneUse() && S1C) {
+ if (!IsAlwaysTrueOrFalse(Cond, S1C)) {
+ S0 = S0->getOperand(0);
+ Updated = true;
+ }
+ }
+ if (S1->getOpcode() == ISD::FREEZE && S1.hasOneUse() && S0C) {
+ if (!IsAlwaysTrueOrFalse(ISD::getSetCCSwappedOperands(Cond), S0C)) {
+ S1 = S1->getOperand(0);
+ Updated = true;
+ }
+ }
+
+ if (Updated)
+ return DAG.getNode(
+ ISD::BRCOND, SDLoc(N), MVT::Other, Chain,
+ DAG.getSetCC(SDLoc(N1), N1->getValueType(0), S0, S1, Cond), N2);
+ }
+
// If N is a constant we could fold this into a fallthrough or unconditional
// branch. However that doesn't happen very often in normal code, because
// Instcombine/SimplifyCFG should have handled the available opportunities.
@@ -17288,11 +18029,53 @@ bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
return false;
}
+StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD,
+ int64_t &Offset) {
+ SDValue Chain = LD->getOperand(0);
+
+ // Look through CALLSEQ_START.
+ if (Chain.getOpcode() == ISD::CALLSEQ_START)
+ Chain = Chain->getOperand(0);
+
+ StoreSDNode *ST = nullptr;
+ SmallVector<SDValue, 8> Aliases;
+ if (Chain.getOpcode() == ISD::TokenFactor) {
+ // Look for unique store within the TokenFactor.
+ for (SDValue Op : Chain->ops()) {
+ StoreSDNode *Store = dyn_cast<StoreSDNode>(Op.getNode());
+ if (!Store)
+ continue;
+ BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
+ BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
+ if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
+ continue;
+ // Make sure the store is not aliased with any nodes in TokenFactor.
+ GatherAllAliases(Store, Chain, Aliases);
+ if (Aliases.empty() ||
+ (Aliases.size() == 1 && Aliases.front().getNode() == Store))
+ ST = Store;
+ break;
+ }
+ } else {
+ StoreSDNode *Store = dyn_cast<StoreSDNode>(Chain.getNode());
+ if (Store) {
+ BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
+ BaseIndexOffset BasePtrST = BaseIndexOffset::match(Store, DAG);
+ if (BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
+ ST = Store;
+ }
+ }
+
+ return ST;
+}
+
SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
if (OptLevel == CodeGenOpt::None || !LD->isSimple())
return SDValue();
SDValue Chain = LD->getOperand(0);
- StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
+ int64_t Offset;
+
+ StoreSDNode *ST = getUniqueStoreFeeding(LD, Offset);
// TODO: Relax this restriction for unordered atomics (see D66309)
if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace())
return SDValue();
@@ -17309,8 +18092,8 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
// 2. The store is scalable and the load is fixed width. We could
// potentially support a limited number of cases here, but there has been
// no cost-benefit analysis to prove it's worth it.
- bool LdStScalable = LDMemType.isScalableVector();
- if (LdStScalable != STMemType.isScalableVector())
+ bool LdStScalable = LDMemType.isScalableVT();
+ if (LdStScalable != STMemType.isScalableVT())
return SDValue();
// If we are dealing with scalable vectors on a big endian platform the
@@ -17320,12 +18103,6 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
if (LdStScalable && DAG.getDataLayout().isBigEndian())
return SDValue();
- BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
- BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
- int64_t Offset;
- if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
- return SDValue();
-
// Normalize for Endianness. After this Offset=0 will denote that the least
// significant bit in the loaded value maps to the least significant bit in
// the stored value). With Offset=n (for n > 0) the loaded value starts at the
@@ -17682,7 +18459,7 @@ struct LoadedSlice {
/// Get the size of the slice to be loaded in bytes.
unsigned getLoadedSize() const {
- unsigned SliceSize = getUsedBits().countPopulation();
+ unsigned SliceSize = getUsedBits().popcount();
assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
return SliceSize / 8;
}
@@ -17867,9 +18644,9 @@ static bool areUsedBitsDense(const APInt &UsedBits) {
return true;
// Get rid of the unused bits on the right.
- APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
+ APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countr_zero());
// Get rid of the unused bits on the left.
- if (NarrowedUsedBits.countLeadingZeros())
+ if (NarrowedUsedBits.countl_zero())
NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
// Check that the chunk of bits is completely used.
return NarrowedUsedBits.isAllOnes();
@@ -18125,14 +18902,14 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
// 0 and the bits being kept are 1. Use getSExtValue so that leading bits
// follow the sign bit for uniformity.
uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
- unsigned NotMaskLZ = countLeadingZeros(NotMask);
+ unsigned NotMaskLZ = llvm::countl_zero(NotMask);
if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
- unsigned NotMaskTZ = countTrailingZeros(NotMask);
+ unsigned NotMaskTZ = llvm::countr_zero(NotMask);
if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
if (NotMaskLZ == 64) return Result; // All zero mask.
// See if we have a continuous run of bits. If so, we have 0*1+0*
- if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
+ if (llvm::countr_one(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
return Result;
// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
@@ -18199,6 +18976,11 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
UseTruncStore = true;
else
return SDValue();
+
+ // Can't do this for indexed stores.
+ if (St->isIndexed())
+ return SDValue();
+
// Check that the target doesn't think this is a bad idea.
if (St->getMemOperand() &&
!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
@@ -18309,8 +19091,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
Imm ^= APInt::getAllOnes(BitWidth);
if (Imm == 0 || Imm.isAllOnes())
return SDValue();
- unsigned ShAmt = Imm.countTrailingZeros();
- unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+ unsigned ShAmt = Imm.countr_zero();
+ unsigned MSB = BitWidth - Imm.countl_zero() - 1;
unsigned NewBW = NextPowerOf2(MSB - ShAmt);
EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
// The narrowing should be profitable, the load/store operation should be
@@ -18527,6 +19309,30 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
return DAG.getTokenFactor(StoreDL, Chains);
}
+bool DAGCombiner::hasSameUnderlyingObj(ArrayRef<MemOpLink> StoreNodes) {
+ const Value *UnderlyingObj = nullptr;
+ for (const auto &MemOp : StoreNodes) {
+ const MachineMemOperand *MMO = MemOp.MemNode->getMemOperand();
+ // Pseudo value like stack frame has its own frame index and size, should
+ // not use the first store's frame index for other frames.
+ if (MMO->getPseudoValue())
+ return false;
+
+ if (!MMO->getValue())
+ return false;
+
+ const Value *Obj = getUnderlyingObject(MMO->getValue());
+
+ if (UnderlyingObj && UnderlyingObj != Obj)
+ return false;
+
+ if (!UnderlyingObj)
+ UnderlyingObj = Obj;
+ }
+
+ return true;
+}
+
bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
bool IsConstantSrc, bool UseVector, bool UseTrunc) {
@@ -18678,13 +19484,21 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
+ bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
// make sure we use trunc store if it's necessary to be legal.
+ // When generate the new widen store, if the first store's pointer info can
+ // not be reused, discard the pointer info except the address space because
+ // now the widen store can not be represented by the original pointer info
+ // which is for the narrow memory object.
SDValue NewStore;
if (!UseTrunc) {
- NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(),
- FirstInChain->getAlign(), *Flags, AAInfo);
+ NewStore = DAG.getStore(
+ NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
+ CanReusePtrInfo
+ ? FirstInChain->getPointerInfo()
+ : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
+ FirstInChain->getAlign(), *Flags, AAInfo);
} else { // Must be realized as a trunc store
EVT LegalizedStoredValTy =
TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
@@ -18695,8 +19509,11 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts(
LegalizedStoredValTy);
NewStore = DAG.getTruncStore(
NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
- FirstInChain->getAlign(), *Flags, AAInfo);
+ CanReusePtrInfo
+ ? FirstInChain->getPointerInfo()
+ : MachinePointerInfo(FirstInChain->getPointerInfo().getAddrSpace()),
+ StoredVal.getValueType() /*TVT*/, FirstInChain->getAlign(), *Flags,
+ AAInfo);
}
// Replace all merged stores with the new store.
@@ -18749,6 +19566,8 @@ void DAGCombiner::getStoreMergeCandidates(
// Don't mix temporal stores with non-temporal stores.
if (St->isNonTemporal() != Other->isNonTemporal())
return false;
+ if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*St, *Other))
+ return false;
SDValue OtherBC = peekThroughBitcasts(Other->getValue());
// Allow merging constants of different types as integers.
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
@@ -18774,6 +19593,9 @@ void DAGCombiner::getStoreMergeCandidates(
// Don't mix temporal loads with non-temporal loads.
if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
return false;
+ if (!TLI.areTwoSDNodeTargetMMOFlagsMergeable(*cast<LoadSDNode>(Val),
+ *OtherLd))
+ return false;
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
break;
@@ -19042,11 +19864,9 @@ bool DAGCombiner::tryStoreMergeOfConstants(
}
}
- // We only use vectors if the constant is known to be zero or the
- // target allows it and the function is not marked with the
- // noimplicitfloat attribute.
- if ((!NonZero ||
- TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
+ // We only use vectors if the target allows it and the function is not
+ // marked with the noimplicitfloat attribute.
+ if (TLI.storeOfVectorConstantIsCheap(!NonZero, MemVT, i + 1, FirstStoreAS) &&
AllowVectors) {
// Find a legal type for the vector store.
unsigned Elts = (i + 1) * NumMemElts;
@@ -19389,6 +20209,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
// using the first's chain is acceptable.
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
+ bool CanReusePtrInfo = hasSameUnderlyingObj(StoreNodes);
AddToWorklist(NewStoreChain.getNode());
MachineMemOperand::Flags LdMMOFlags =
@@ -19397,10 +20218,14 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
if (IsNonTemporalLoad)
LdMMOFlags |= MachineMemOperand::MONonTemporal;
+ LdMMOFlags |= TLI.getTargetMMOFlags(*FirstLoad);
+
MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore
? MachineMemOperand::MONonTemporal
: MachineMemOperand::MONone;
+ StMMOFlags |= TLI.getTargetMMOFlags(*StoreNodes[0].MemNode);
+
SDValue NewLoad, NewStore;
if (UseVectorTy || !DoIntegerTruncate) {
NewLoad = DAG.getLoad(
@@ -19418,7 +20243,9 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
}
NewStore = DAG.getStore(
NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
+ CanReusePtrInfo ? FirstInChain->getPointerInfo()
+ : MachinePointerInfo(FirstStoreAS),
+ FirstStoreAlign, StMMOFlags);
} else { // This must be the truncstore/extload case
EVT ExtendedTy =
TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
@@ -19428,8 +20255,10 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
FirstLoadAlign, LdMMOFlags);
NewStore = DAG.getTruncStore(
NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), JointMemOpVT,
- FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags());
+ CanReusePtrInfo ? FirstInChain->getPointerInfo()
+ : MachinePointerInfo(FirstStoreAS),
+ JointMemOpVT, FirstInChain->getAlign(),
+ FirstInChain->getMemOperand()->getFlags());
}
// Transfer chain users from old loads to the new load.
@@ -19465,7 +20294,7 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) {
// store since we know <vscale x 16 x i8> is exactly twice as large as
// <vscale x 8 x i8>). Until then, bail out for scalable vectors.
EVT MemVT = St->getMemoryVT();
- if (MemVT.isScalableVector())
+ if (MemVT.isScalableVT())
return false;
if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
return false;
@@ -19647,6 +20476,62 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
}
}
+// (store (insert_vector_elt (load p), x, i), p) -> (store x, p+offset)
+//
+// If a store of a load with an element inserted into it has no other
+// uses in between the chain, then we can consider the vector store
+// dead and replace it with just the single scalar element store.
+SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) {
+ SDLoc DL(ST);
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ SDValue Chain = ST->getChain();
+ if (Value.getOpcode() != ISD::INSERT_VECTOR_ELT || !Value.hasOneUse())
+ return SDValue();
+
+ SDValue Elt = Value.getOperand(1);
+ SDValue Idx = Value.getOperand(2);
+
+ // If the element isn't byte sized then we can't compute an offset
+ EVT EltVT = Elt.getValueType();
+ if (!EltVT.isByteSized())
+ return SDValue();
+
+ auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0));
+ if (!Ld || Ld->getBasePtr() != Ptr ||
+ ST->getMemoryVT() != Ld->getMemoryVT() || !ST->isSimple() ||
+ !ISD::isNormalStore(ST) ||
+ Ld->getAddressSpace() != ST->getAddressSpace() ||
+ !Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1)))
+ return SDValue();
+
+ unsigned IsFast;
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ Elt.getValueType(), ST->getAddressSpace(),
+ ST->getAlign(), ST->getMemOperand()->getFlags(),
+ &IsFast) ||
+ !IsFast)
+ return SDValue();
+ EVT PtrVT = Ptr.getValueType();
+
+ SDValue Offset =
+ DAG.getNode(ISD::MUL, DL, PtrVT, Idx,
+ DAG.getConstant(EltVT.getSizeInBits() / 8, DL, PtrVT));
+ SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, Offset);
+ MachinePointerInfo PointerInfo(ST->getAddressSpace());
+
+ // If the offset is a known constant then try to recover the pointer
+ // info
+ if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8;
+ NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(COffset), DL);
+ PointerInfo = ST->getPointerInfo().getWithOffset(COffset);
+ }
+
+ return DAG.getStore(Chain, DL, Elt, NewPtr, PointerInfo, ST->getAlign(),
+ ST->getMemOperand()->getFlags());
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -19768,9 +20653,13 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
// If this is a load followed by a store to the same location, then the store
- // is dead/noop.
+ // is dead/noop. Peek through any truncates if canCombineTruncStore failed.
+ // TODO: Add big-endian truncate support with test coverage.
// TODO: Can relax for unordered atomics (see D66309)
- if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
+ SDValue TruncVal = DAG.getDataLayout().isLittleEndian()
+ ? peekThroughTruncates(Value)
+ : Value;
+ if (auto *Ld = dyn_cast<LoadSDNode>(TruncVal)) {
if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
ST->isUnindexed() && ST->isSimple() &&
Ld->getAddressSpace() == ST->getAddressSpace() &&
@@ -19782,6 +20671,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
+ // Try scalarizing vector stores of loads where we only change one element
+ if (SDValue NewST = replaceStoreOfInsertLoad(ST))
+ return NewST;
+
// TODO: Can relax for unordered atomics (see D66309)
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
if (ST->isUnindexed() && ST->isSimple() &&
@@ -19796,22 +20689,32 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
!ST1->getBasePtr().isUndef() &&
- // BaseIndexOffset and the code below requires knowing the size
- // of a vector, so bail out if MemoryVT is scalable.
- !ST->getMemoryVT().isScalableVector() &&
- !ST1->getMemoryVT().isScalableVector() &&
ST->getAddressSpace() == ST1->getAddressSpace()) {
- const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
- const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
- unsigned STBitSize = ST->getMemoryVT().getFixedSizeInBits();
- unsigned ChainBitSize = ST1->getMemoryVT().getFixedSizeInBits();
- // If this is a store who's preceding store to a subset of the current
- // location and no one other node is chained to that store we can
- // effectively drop the store. Do not remove stores to undef as they may
- // be used as data sinks.
- if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
- CombineTo(ST1, ST1->getChain());
- return SDValue();
+ // If we consider two stores and one smaller in size is a scalable
+ // vector type and another one a bigger size store with a fixed type,
+ // then we could not allow the scalable store removal because we don't
+ // know its final size in the end.
+ if (ST->getMemoryVT().isScalableVector() ||
+ ST1->getMemoryVT().isScalableVector()) {
+ if (ST1->getBasePtr() == Ptr &&
+ TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(),
+ ST->getMemoryVT().getStoreSize())) {
+ CombineTo(ST1, ST1->getChain());
+ return SDValue();
+ }
+ } else {
+ const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
+ const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
+ // If this is a store who's preceding store to a subset of the current
+ // location and no one other node is chained to that store we can
+ // effectively drop the store. Do not remove stores to undef as they
+ // may be used as data sinks.
+ if (STBase.contains(DAG, ST->getMemoryVT().getFixedSizeInBits(),
+ ChainBase,
+ ST1->getMemoryVT().getFixedSizeInBits())) {
+ CombineTo(ST1, ST1->getChain());
+ return SDValue();
+ }
}
}
}
@@ -20183,6 +21086,99 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
return DAG.getBitcast(VT, Shuf);
}
+// Combine insert(shuffle(load, <u,0,1,2>), load, 0) into a single load if
+// possible and the new load will be quick. We use more loads but less shuffles
+// and inserts.
+SDValue DAGCombiner::combineInsertEltToLoad(SDNode *N, unsigned InsIndex) {
+ EVT VT = N->getValueType(0);
+
+ // InsIndex is expected to be the first of last lane.
+ if (!VT.isFixedLengthVector() ||
+ (InsIndex != 0 && InsIndex != VT.getVectorNumElements() - 1))
+ return SDValue();
+
+ // Look for a shuffle with the mask u,0,1,2,3,4,5,6 or 1,2,3,4,5,6,7,u
+ // depending on the InsIndex.
+ auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0));
+ SDValue Scalar = N->getOperand(1);
+ if (!Shuffle || !all_of(enumerate(Shuffle->getMask()), [&](auto P) {
+ return InsIndex == P.index() || P.value() < 0 ||
+ (InsIndex == 0 && P.value() == (int)P.index() - 1) ||
+ (InsIndex == VT.getVectorNumElements() - 1 &&
+ P.value() == (int)P.index() + 1);
+ }))
+ return SDValue();
+
+ // We optionally skip over an extend so long as both loads are extended in the
+ // same way from the same type.
+ unsigned Extend = 0;
+ if (Scalar.getOpcode() == ISD::ZERO_EXTEND ||
+ Scalar.getOpcode() == ISD::SIGN_EXTEND ||
+ Scalar.getOpcode() == ISD::ANY_EXTEND) {
+ Extend = Scalar.getOpcode();
+ Scalar = Scalar.getOperand(0);
+ }
+
+ auto *ScalarLoad = dyn_cast<LoadSDNode>(Scalar);
+ if (!ScalarLoad)
+ return SDValue();
+
+ SDValue Vec = Shuffle->getOperand(0);
+ if (Extend) {
+ if (Vec.getOpcode() != Extend)
+ return SDValue();
+ Vec = Vec.getOperand(0);
+ }
+ auto *VecLoad = dyn_cast<LoadSDNode>(Vec);
+ if (!VecLoad || Vec.getValueType().getScalarType() != Scalar.getValueType())
+ return SDValue();
+
+ int EltSize = ScalarLoad->getValueType(0).getScalarSizeInBits();
+ if (EltSize == 0 || EltSize % 8 != 0 || !ScalarLoad->isSimple() ||
+ !VecLoad->isSimple() || VecLoad->getExtensionType() != ISD::NON_EXTLOAD ||
+ ScalarLoad->getExtensionType() != ISD::NON_EXTLOAD ||
+ ScalarLoad->getAddressSpace() != VecLoad->getAddressSpace())
+ return SDValue();
+
+ // Check that the offset between the pointers to produce a single continuous
+ // load.
+ if (InsIndex == 0) {
+ if (!DAG.areNonVolatileConsecutiveLoads(ScalarLoad, VecLoad, EltSize / 8,
+ -1))
+ return SDValue();
+ } else {
+ if (!DAG.areNonVolatileConsecutiveLoads(
+ VecLoad, ScalarLoad, VT.getVectorNumElements() * EltSize / 8, -1))
+ return SDValue();
+ }
+
+ // And that the new unaligned load will be fast.
+ unsigned IsFast = 0;
+ Align NewAlign = commonAlignment(VecLoad->getAlign(), EltSize / 8);
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
+ Vec.getValueType(), VecLoad->getAddressSpace(),
+ NewAlign, VecLoad->getMemOperand()->getFlags(),
+ &IsFast) ||
+ !IsFast)
+ return SDValue();
+
+ // Calculate the new Ptr and create the new load.
+ SDLoc DL(N);
+ SDValue Ptr = ScalarLoad->getBasePtr();
+ if (InsIndex != 0)
+ Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), VecLoad->getBasePtr(),
+ DAG.getConstant(EltSize / 8, DL, Ptr.getValueType()));
+ MachinePointerInfo PtrInfo =
+ InsIndex == 0 ? ScalarLoad->getPointerInfo()
+ : VecLoad->getPointerInfo().getWithOffset(EltSize / 8);
+
+ SDValue Load = DAG.getLoad(VecLoad->getValueType(0), DL,
+ ScalarLoad->getChain(), Ptr, PtrInfo, NewAlign);
+ DAG.makeEquivalentMemoryOrdering(ScalarLoad, Load.getValue(1));
+ DAG.makeEquivalentMemoryOrdering(VecLoad, Load.getValue(1));
+ return Extend ? DAG.getNode(Extend, DL, VT, Load) : Load;
+}
+
SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue InVec = N->getOperand(0);
SDValue InVal = N->getOperand(1);
@@ -20254,6 +21250,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
return Shuf;
+ if (SDValue Shuf = combineInsertEltToLoad(N, Elt))
+ return Shuf;
+
// Attempt to convert an insert_vector_elt chain into a legal build_vector.
if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
// vXi1 vector - we don't need to recurse.
@@ -20349,6 +21348,20 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
return NewShuffle;
}
+ // If all insertions are zero value, try to convert to AND mask.
+ // TODO: Do this for -1 with OR mask?
+ if (!LegalOperations && llvm::isNullConstant(InVal) &&
+ all_of(Ops, [InVal](SDValue Op) { return !Op || Op == InVal; }) &&
+ count_if(Ops, [InVal](SDValue Op) { return Op == InVal; }) >= 2) {
+ SDValue Zero = DAG.getConstant(0, DL, MaxEltVT);
+ SDValue AllOnes = DAG.getAllOnesConstant(DL, MaxEltVT);
+ SmallVector<SDValue, 8> Mask(NumElts);
+ for (unsigned I = 0; I != NumElts; ++I)
+ Mask[I] = Ops[I] ? Zero : AllOnes;
+ return DAG.getNode(ISD::AND, DL, VT, CurVec,
+ DAG.getBuildVector(VT, DL, Mask));
+ }
+
// Failed to find a match in the chain - bail.
break;
}
@@ -20701,8 +21714,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// extract_vector_elt (build_vector x, y), 1 -> y
if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) ||
VecOp.getOpcode() == ISD::SPLAT_VECTOR) &&
- TLI.isTypeLegal(VecVT) &&
- (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
+ TLI.isTypeLegal(VecVT)) {
assert((VecOp.getOpcode() != ISD::BUILD_VECTOR ||
VecVT.isFixedLengthVector()) &&
"BUILD_VECTOR used for scalable vectors");
@@ -20711,12 +21723,15 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue Elt = VecOp.getOperand(IndexVal);
EVT InEltVT = Elt.getValueType();
- // Sometimes build_vector's scalar input types do not match result type.
- if (ScalarVT == InEltVT)
- return Elt;
+ if (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT) ||
+ isNullConstant(Elt)) {
+ // Sometimes build_vector's scalar input types do not match result type.
+ if (ScalarVT == InEltVT)
+ return Elt;
- // TODO: It may be useful to truncate if free if the build_vector implicitly
- // converts.
+ // TODO: It may be useful to truncate if free if the build_vector
+ // implicitly converts.
+ }
}
if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
@@ -21025,9 +22040,10 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
// same source type and all of the inputs must be any or zero extend.
// Scalar sizes must be a power of two.
EVT OutScalarTy = VT.getScalarType();
- bool ValidTypes = SourceType != MVT::Other &&
- isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
- isPowerOf2_32(SourceType.getSizeInBits());
+ bool ValidTypes =
+ SourceType != MVT::Other &&
+ llvm::has_single_bit<uint32_t>(OutScalarTy.getSizeInBits()) &&
+ llvm::has_single_bit<uint32_t>(SourceType.getSizeInBits());
// Create a new simpler BUILD_VECTOR sequence which other optimizations can
// turn into a single shuffle instruction.
@@ -21157,7 +22173,7 @@ SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) {
}
// Only cast if the size is the same
- if (Src.getValueType().getSizeInBits() != VT.getSizeInBits())
+ if (!Src || Src.getValueType().getSizeInBits() != VT.getSizeInBits())
return SDValue();
return DAG.getBitcast(VT, Src);
@@ -21359,10 +22375,9 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
// the source vector. The high bits map to zero. We will use a zero vector
// as the 2nd source operand of the shuffle, so use the 1st element of
// that vector (mask value is number-of-elements) for the high bits.
- if (i % ZextRatio == 0)
- ShufMask[i] = Extract.getConstantOperandVal(1);
- else
- ShufMask[i] = NumMaskElts;
+ int Low = DAG.getDataLayout().isBigEndian() ? (ZextRatio - 1) : 0;
+ ShufMask[i] = (i % ZextRatio == Low) ? Extract.getConstantOperandVal(1)
+ : NumMaskElts;
}
// Undef elements of the build vector remain undef because we initialize
@@ -21917,7 +22932,7 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
EVT OpVT = N->getOperand(0).getValueType();
// If the operands are legal vectors, leave them alone.
- if (TLI.isTypeLegal(OpVT))
+ if (TLI.isTypeLegal(OpVT) || OpVT.isScalableVector())
return SDValue();
SDLoc DL(N);
@@ -22273,7 +23288,13 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
// If the input is a concat_vectors, just make a larger concat by padding
// with smaller undefs.
- if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
+ //
+ // Legalizing in AArch64TargetLowering::LowerCONCAT_VECTORS() and combining
+ // here could cause an infinite loop. That legalizing happens when LegalDAG
+ // is true and input of AArch64TargetLowering::LowerCONCAT_VECTORS() is
+ // scalable.
+ if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse() &&
+ !(LegalDAG && In.getValueType().isScalableVector())) {
unsigned NumOps = N->getNumOperands() * In.getNumOperands();
SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
@@ -22767,10 +23788,6 @@ static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
continue;
}
- // Profitability check: only deal with extractions from the first subvector.
- if (OpSubvecIdx != 0)
- return SDValue();
-
const std::pair<SDValue, int> DemandedSubvector =
std::make_pair(Op, OpSubvecIdx);
@@ -22800,6 +23817,14 @@ static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N,
if (DemandedSubvectors.empty())
return DAG.getUNDEF(NarrowVT);
+ // Profitability check: only deal with extractions from the first subvector
+ // unless the mask becomes an identity mask.
+ if (!ShuffleVectorInst::isIdentityMask(NewMask) ||
+ any_of(NewMask, [](int M) { return M < 0; }))
+ for (auto &DemandedSubvector : DemandedSubvectors)
+ if (DemandedSubvector.second != 0)
+ return SDValue();
+
// We still perform the exact same EXTRACT_SUBVECTOR, just on different
// operand[s]/index[es], so there is no point in checking for it's legality.
@@ -22975,7 +24000,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
if (NumElems == 1) {
SDValue Src = V->getOperand(IdxVal);
if (EltVT != Src.getValueType())
- Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
+ Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Src);
return DAG.getBitcast(NVT, Src);
}
@@ -23450,9 +24475,7 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
unsigned Opcode = N0.getOpcode();
- if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
- Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
- Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
+ if (!ISD::isExtVecInRegOpcode(Opcode))
return SDValue();
SDValue N00 = N0.getOperand(0);
@@ -23518,7 +24541,7 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?");
DemandedElts.setBit(Idx);
}
- assert(DemandedElts.countPopulation() > 1 && "Is a splat shuffle already?");
+ assert(DemandedElts.popcount() > 1 && "Is a splat shuffle already?");
APInt UndefElts;
if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) {
// Even if all demanded elements are splat, some of them could be undef.
@@ -24072,8 +25095,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
bool IsInLaneMask = true;
ArrayRef<int> Mask = SVN->getMask();
SmallVector<int, 16> ClearMask(NumElts, -1);
- APInt DemandedLHS = APInt::getNullValue(NumElts);
- APInt DemandedRHS = APInt::getNullValue(NumElts);
+ APInt DemandedLHS = APInt::getZero(NumElts);
+ APInt DemandedRHS = APInt::getZero(NumElts);
for (int I = 0; I != (int)NumElts; ++I) {
int M = Mask[I];
if (M < 0)
@@ -24086,12 +25109,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
}
// TODO: Should we try to mask with N1 as well?
- if (!IsInLaneMask &&
- (!DemandedLHS.isNullValue() || !DemandedRHS.isNullValue()) &&
- (DemandedLHS.isNullValue() ||
- DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
- (DemandedRHS.isNullValue() ||
- DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
+ if (!IsInLaneMask && (!DemandedLHS.isZero() || !DemandedRHS.isZero()) &&
+ (DemandedLHS.isZero() || DAG.MaskedVectorIsZero(N0, DemandedLHS)) &&
+ (DemandedRHS.isZero() || DAG.MaskedVectorIsZero(N1, DemandedRHS))) {
SDLoc DL(N);
EVT IntVT = VT.changeVectorElementTypeToInteger();
EVT IntSVT = VT.getVectorElementType().changeTypeToInteger();
@@ -24771,6 +25791,17 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVP_FSUB(SDNode *N) {
+ SelectionDAG::FlagInserter FlagsInserter(DAG, N);
+
+ // FSUB -> FMA combines:
+ if (SDValue Fused = visitFSUBForFMACombine<VPMatchContext>(N)) {
+ AddToWorklist(Fused.getNode());
+ return Fused;
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVPOp(SDNode *N) {
if (N->getOpcode() == ISD::VP_GATHER)
@@ -24792,8 +25823,17 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode());
// This is the only generic VP combine we support for now.
- if (!AreAllEltsDisabled)
+ if (!AreAllEltsDisabled) {
+ switch (N->getOpcode()) {
+ case ISD::VP_FADD:
+ return visitVP_FADD(N);
+ case ISD::VP_FSUB:
+ return visitVP_FSUB(N);
+ case ISD::VP_FMA:
+ return visitFMA<VPMatchContext>(N);
+ }
return SDValue();
+ }
// Binary operations can be replaced by UNDEF.
if (ISD::isVPBinaryOp(N->getOpcode()))
@@ -24814,6 +25854,97 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitGET_FPENV_MEM(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
+
+ // Check if the memory, where FP state is written to, is used only in a single
+ // load operation.
+ LoadSDNode *LdNode = nullptr;
+ for (auto *U : Ptr->uses()) {
+ if (U == N)
+ continue;
+ if (auto *Ld = dyn_cast<LoadSDNode>(U)) {
+ if (LdNode && LdNode != Ld)
+ return SDValue();
+ LdNode = Ld;
+ continue;
+ }
+ return SDValue();
+ }
+ if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
+ !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
+ !LdNode->getChain().reachesChainWithoutSideEffects(SDValue(N, 0)))
+ return SDValue();
+
+ // Check if the loaded value is used only in a store operation.
+ StoreSDNode *StNode = nullptr;
+ for (auto I = LdNode->use_begin(), E = LdNode->use_end(); I != E; ++I) {
+ SDUse &U = I.getUse();
+ if (U.getResNo() == 0) {
+ if (auto *St = dyn_cast<StoreSDNode>(U.getUser())) {
+ if (StNode)
+ return SDValue();
+ StNode = St;
+ } else {
+ return SDValue();
+ }
+ }
+ }
+ if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
+ !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
+ !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
+ return SDValue();
+
+ // Create new node GET_FPENV_MEM, which uses the store address to write FP
+ // environment.
+ SDValue Res = DAG.getGetFPEnv(Chain, SDLoc(N), StNode->getBasePtr(), MemVT,
+ StNode->getMemOperand());
+ CombineTo(StNode, Res, false);
+ return Res;
+}
+
+SDValue DAGCombiner::visitSET_FPENV_MEM(SDNode *N) {
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ EVT MemVT = cast<FPStateAccessSDNode>(N)->getMemoryVT();
+
+ // Check if the address of FP state is used also in a store operation only.
+ StoreSDNode *StNode = nullptr;
+ for (auto *U : Ptr->uses()) {
+ if (U == N)
+ continue;
+ if (auto *St = dyn_cast<StoreSDNode>(U)) {
+ if (StNode && StNode != St)
+ return SDValue();
+ StNode = St;
+ continue;
+ }
+ return SDValue();
+ }
+ if (!StNode || !StNode->isSimple() || StNode->isIndexed() ||
+ !StNode->getOffset().isUndef() || StNode->getMemoryVT() != MemVT ||
+ !Chain.reachesChainWithoutSideEffects(SDValue(StNode, 0)))
+ return SDValue();
+
+ // Check if the stored value is loaded from some location and the loaded
+ // value is used only in the store operation.
+ SDValue StValue = StNode->getValue();
+ auto *LdNode = dyn_cast<LoadSDNode>(StValue);
+ if (!LdNode || !LdNode->isSimple() || LdNode->isIndexed() ||
+ !LdNode->getOffset().isUndef() || LdNode->getMemoryVT() != MemVT ||
+ !StNode->getChain().reachesChainWithoutSideEffects(SDValue(LdNode, 1)))
+ return SDValue();
+
+ // Create new node SET_FPENV_MEM, which uses the load address to read FP
+ // environment.
+ SDValue Res =
+ DAG.getSetFPEnv(LdNode->getChain(), SDLoc(N), LdNode->getBasePtr(), MemVT,
+ LdNode->getMemOperand());
+ return Res;
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -24960,8 +26091,6 @@ SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
- EVT SrcVT = N0->getValueType(0);
- EVT SrcEltVT = SrcVT.getVectorElementType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// TODO: promote operation might be also good here?
@@ -24971,7 +26100,9 @@ SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) {
(N0.getOpcode() == ISD::SPLAT_VECTOR ||
TLI.isExtractVecEltCheap(VT, Index0)) &&
TLI.isOperationLegalOrCustom(Opcode, EltVT) &&
- TLI.preferScalarizeSplat(Opcode)) {
+ TLI.preferScalarizeSplat(N)) {
+ EVT SrcVT = N0.getValueType();
+ EVT SrcEltVT = SrcVT.getVectorElementType();
SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL);
SDValue Elt =
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC);
@@ -25588,14 +26719,14 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
SDValue AndLHS = N0->getOperand(0);
auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
- if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+ if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) {
// Shift the tested bit over the sign bit.
const APInt &AndMask = ConstAndRHS->getAPIntValue();
unsigned ShCt = AndMask.getBitWidth() - 1;
if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) {
SDValue ShlAmt =
- DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
- getShiftAmountTy(AndLHS.getValueType()));
+ DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS),
+ getShiftAmountTy(AndLHS.getValueType()));
SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
// Now arithmetic right shift it all the way over, so the result is
@@ -25991,7 +27122,7 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
Reciprocal)) {
AddToWorklist(Est.getNode());
- if (Iterations)
+ if (Iterations > 0)
Est = UseOneConstNR
? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
: buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
@@ -26334,7 +27465,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
// BaseIndexOffset assumes that offsets are fixed-size, which
// is not valid for scalable vectors where the offsets are
// scaled by `vscale`, so bail out early.
- if (St->getMemoryVT().isScalableVector())
+ if (St->getMemoryVT().isScalableVT())
return false;
// Add ST's interval.
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 2f2ae6e29855..f0affce7b6b8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -59,6 +59,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -95,7 +96,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -454,8 +454,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
if (!TLI.isTypeLegal(VT)) {
// MVT::i1 is special. Allow AND, OR, or XOR because they
// don't require additional zeroing, which makes them easy.
- if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR ||
- ISDOpcode == ISD::XOR))
+ if (VT == MVT::i1 && ISD::isBitwiseLogicOp(ISDOpcode))
VT = TLI.getTypeToTransformTo(I->getContext(), VT);
else
return false;
@@ -894,7 +893,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
bool FastISel::selectXRayCustomEvent(const CallInst *I) {
const auto &Triple = TM.getTargetTriple();
- if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ if (Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64)
return true; // don't do anything to this instruction.
SmallVector<MachineOperand, 8> Ops;
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
@@ -913,7 +912,7 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) {
bool FastISel::selectXRayTypedEvent(const CallInst *I) {
const auto &Triple = TM.getTargetTriple();
- if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
+ if (Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64)
return true; // don't do anything to this instruction.
SmallVector<MachineOperand, 8> Ops;
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
@@ -1209,6 +1208,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
return true;
}
+ if (FuncInfo.PreprocessedDbgDeclares.contains(DI))
+ return true;
+
const Value *Address = DI->getAddress();
if (!Address || isa<UndefValue>(Address)) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI
@@ -1216,13 +1218,6 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
return true;
}
- // Byval arguments with frame indices were already handled after argument
- // lowering and before isel.
- const auto *Arg =
- dyn_cast<Argument>(Address->stripInBoundsConstantOffsets());
- if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX)
- return true;
-
std::optional<MachineOperand> Op;
if (Register Reg = lookUpRegForValue(Address))
Op = MachineOperand::CreateReg(Reg, false);
@@ -1277,60 +1272,85 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
const DbgValueInst *DI = cast<DbgValueInst>(II);
const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
const Value *V = DI->getValue();
- assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) &&
+ DIExpression *Expr = DI->getExpression();
+ DILocalVariable *Var = DI->getVariable();
+ assert(Var->isValidLocationForIntrinsic(MIMD.getDL()) &&
"Expected inlined-at fields to agree");
if (!V || isa<UndefValue>(V) || DI->hasArgList()) {
// DI is either undef or cannot produce a valid DBG_VALUE, so produce an
// undef DBG_VALUE to terminate any prior location.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, false, 0U,
- DI->getVariable(), DI->getExpression());
- } else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
+ Var, Expr);
+ return true;
+ }
+ if (const auto *CI = dyn_cast<ConstantInt>(V)) {
// See if there's an expression to constant-fold.
- DIExpression *Expr = DI->getExpression();
if (Expr)
std::tie(Expr, CI) = Expr->constantFold(CI);
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addCImm(CI)
.addImm(0U)
- .addMetadata(DI->getVariable())
+ .addMetadata(Var)
.addMetadata(Expr);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addImm(CI->getZExtValue())
.addImm(0U)
- .addMetadata(DI->getVariable())
+ .addMetadata(Var)
.addMetadata(Expr);
- } else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
+ return true;
+ }
+ if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
.addFPImm(CF)
.addImm(0U)
- .addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
- } else if (Register Reg = lookUpRegForValue(V)) {
+ .addMetadata(Var)
+ .addMetadata(Expr);
+ return true;
+ }
+ if (const auto *Arg = dyn_cast<Argument>(V);
+ Arg && Expr && Expr->isEntryValue()) {
+ // As per the Verifier, this case is only valid for swift async Args.
+ assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync));
+
+ Register Reg = getRegForValue(Arg);
+ for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
+ if (Reg == VirtReg || Reg == PhysReg) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II,
+ false /*IsIndirect*/, PhysReg, Var, Expr);
+ return true;
+ }
+
+ LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find a physical register\n"
+ << *DI << "\n");
+ return true;
+ }
+ if (Register Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
if (!FuncInfo.MF->useDebugInstrRef()) {
bool IsIndirect = false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, IsIndirect,
- Reg, DI->getVariable(), DI->getExpression());
- } else {
- // If using instruction referencing, produce this as a DBG_INSTR_REF,
- // to be later patched up by finalizeDebugInstrRefs.
- SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg(
- /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
- /* isKill */ false, /* isDead */ false,
- /* isUndef */ false, /* isEarlyClobber */ false,
- /* SubReg */ 0, /* isDebug */ true)});
- SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0});
- auto *NewExpr = DIExpression::prependOpcodes(DI->getExpression(), Ops);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(),
- TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, MOs,
- DI->getVariable(), NewExpr);
+ Reg, Var, Expr);
+ return true;
}
- } else {
- // We don't know how to handle other cases, so we drop.
- LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ // If using instruction referencing, produce this as a DBG_INSTR_REF,
+ // to be later patched up by finalizeDebugInstrRefs.
+ SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg(
+ /* Reg */ Reg, /* isDef */ false, /* isImp */ false,
+ /* isKill */ false, /* isDead */ false,
+ /* isUndef */ false, /* isEarlyClobber */ false,
+ /* SubReg */ 0, /* isDebug */ true)});
+ SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0});
+ auto *NewExpr = DIExpression::prependOpcodes(Expr, Ops);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(),
+ TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, MOs,
+ Var, NewExpr);
+ return true;
}
+ // We don't know how to handle other cases, so we drop.
+ LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
return true;
}
case Intrinsic::dbg_label: {
diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index c18cd39ed296..1d0a03ccfcdc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,7 +13,7 @@
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -83,7 +83,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
- DA = DAG->getDivergenceAnalysis();
+ UA = DAG->getUniformityInfo();
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
@@ -128,20 +128,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
for (const Instruction &I : BB) {
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
Type *Ty = AI->getAllocatedType();
- Align TyPrefAlign = MF->getDataLayout().getPrefTypeAlign(Ty);
- // The "specified" alignment is the alignment written on the alloca,
- // or the preferred alignment of the type if none is specified.
- //
- // (Unspecified alignment on allocas will be going away soon.)
- Align SpecifiedAlign = AI->getAlign();
-
- // If the preferred alignment of the type is higher than the specified
- // alignment of the alloca, promote the alignment, as long as it doesn't
- // require realigning the stack.
- //
- // FIXME: Do we really want to second-guess the IR in isel?
- Align Alignment =
- std::max(std::min(TyPrefAlign, StackAlign), SpecifiedAlign);
+ Align Alignment = AI->getAlign();
// Static allocas can be folded into the initial stack frame
// adjustment. For targets that don't realign the stack, don't
@@ -165,9 +152,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
false, AI);
}
- // Scalable vectors may need a special StackID to distinguish
- // them from other (fixed size) stack objects.
- if (isa<ScalableVectorType>(Ty))
+ // Scalable vectors and structures that contain scalable vectors may
+ // need a special StackID to distinguish them from other (fixed size)
+ // stack objects.
+ if (Ty->isScalableTy())
MF->getFrameInfo().setStackID(FrameIndex,
TFI->getStackIDForScalableVectors());
@@ -305,18 +293,18 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
for (WinEHHandlerType &H : TBME.HandlerArray) {
if (H.Handler)
- H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()];
+ H.Handler = MBBMap[cast<const BasicBlock *>(H.Handler)];
}
}
for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap)
if (UME.Cleanup)
- UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()];
+ UME.Cleanup = MBBMap[cast<const BasicBlock *>(UME.Cleanup)];
for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) {
- const auto *BB = UME.Handler.get<const BasicBlock *>();
+ const auto *BB = cast<const BasicBlock *>(UME.Handler);
UME.Handler = MBBMap[BB];
}
for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) {
- const auto *BB = CME.Handler.get<const BasicBlock *>();
+ const auto *BB = cast<const BasicBlock *>(CME.Handler);
CME.Handler = MBBMap[BB];
}
} else if (Personality == EHPersonality::Wasm_CXX) {
@@ -326,18 +314,18 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
// Map all BB references in the Wasm EH data to MBBs.
DenseMap<BBOrMBB, BBOrMBB> SrcToUnwindDest;
for (auto &KV : EHInfo.SrcToUnwindDest) {
- const auto *Src = KV.first.get<const BasicBlock *>();
- const auto *Dest = KV.second.get<const BasicBlock *>();
+ const auto *Src = cast<const BasicBlock *>(KV.first);
+ const auto *Dest = cast<const BasicBlock *>(KV.second);
SrcToUnwindDest[MBBMap[Src]] = MBBMap[Dest];
}
EHInfo.SrcToUnwindDest = std::move(SrcToUnwindDest);
DenseMap<BBOrMBB, SmallPtrSet<BBOrMBB, 4>> UnwindDestToSrcs;
for (auto &KV : EHInfo.UnwindDestToSrcs) {
- const auto *Dest = KV.first.get<const BasicBlock *>();
+ const auto *Dest = cast<const BasicBlock *>(KV.first);
UnwindDestToSrcs[MBBMap[Dest]] = SmallPtrSet<BBOrMBB, 4>();
for (const auto P : KV.second)
UnwindDestToSrcs[MBBMap[Dest]].insert(
- MBBMap[P.get<const BasicBlock *>()]);
+ MBBMap[cast<const BasicBlock *>(P)]);
}
EHInfo.UnwindDestToSrcs = std::move(UnwindDestToSrcs);
}
@@ -361,6 +349,7 @@ void FunctionLoweringInfo::clear() {
StatepointStackSlots.clear();
StatepointRelocationMaps.clear();
PreferredExtendType.clear();
+ PreprocessedDbgDeclares.clear();
}
/// CreateReg - Allocate a single virtual register for the given type.
@@ -394,8 +383,8 @@ Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
}
Register FunctionLoweringInfo::CreateRegs(const Value *V) {
- return CreateRegs(V->getType(), DA && DA->isDivergent(V) &&
- !TLI->requiresUniformRegister(*MF, V));
+ return CreateRegs(V->getType(), UA && UA->isDivergent(V) &&
+ !TLI->requiresUniformRegister(*MF, V));
}
/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
@@ -517,7 +506,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
return;
}
DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits);
- DestLOI.Known = KnownBits::commonBits(DestLOI.Known, SrcLOI->Known);
+ DestLOI.Known = DestLOI.Known.intersectWith(SrcLOI->Known);
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 338172e4e10a..4e7895c0b3cf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1078,6 +1078,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (Flags.hasNoFPExcept())
MI->setFlag(MachineInstr::MIFlag::NoFPExcept);
+
+ if (Flags.hasUnpredictable())
+ MI->setFlag(MachineInstr::MIFlag::Unpredictable);
}
// Emit all of the actual operands of this instruction, adding them to the
@@ -1161,6 +1164,13 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
}
}
+ // Add rounding control registers as implicit def for function call.
+ if (II.isCall() && MF->getFunction().hasFnAttribute(Attribute::StrictFP)) {
+ ArrayRef<MCPhysReg> RCRegs = TLI->getRoundingControlRegisters();
+ for (MCPhysReg Reg : RCRegs)
+ UsedRegs.push_back(Reg);
+ }
+
// Finally mark unused registers as dead.
if (!UsedRegs.empty() || !II.implicit_defs().empty() || II.hasOptionalDef())
MIB->setPhysRegsDeadExcept(UsedRegs, *TRI);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c3106216a060..61fc31715d71 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -41,7 +42,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -133,8 +133,11 @@ private:
SDValue N1, SDValue N2,
ArrayRef<int> Mask) const;
- SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ TargetLowering::ArgListTy &&Args, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ void ExpandFrexpLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall LC,
SmallVectorImpl<SDValue> &Results);
void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
@@ -172,6 +175,9 @@ private:
SDValue ExpandFCOPYSIGN(SDNode *Node) const;
SDValue ExpandFABS(SDNode *Node) const;
SDValue ExpandFNEG(SDNode *Node) const;
+ SDValue expandLdexp(SDNode *Node) const;
+ SDValue expandFrexp(SDNode *Node) const;
+
SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain);
void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl,
SmallVectorImpl<SDValue> &Results);
@@ -880,8 +886,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// If the source type is not legal, see if there is a legal extload to
// an intermediate type that we can then extend further.
EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT());
- if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT?
- TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) {
+ if ((LoadVT.isFloatingPoint() == SrcVT.isFloatingPoint()) &&
+ (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT?
+ TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT))) {
// If we are loading a legal type, this is a non-extload followed by a
// full extend.
ISD::LoadExtType MidExtType =
@@ -999,6 +1006,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (Action != TargetLowering::Promote)
Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other);
break;
+ case ISD::SET_FPENV:
+ Action = TLI.getOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType());
+ break;
case ISD::FP_TO_FP16:
case ISD::FP_TO_BF16:
case ISD::SINT_TO_FP:
@@ -1199,6 +1210,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
case ISD::IS_FPCLASS:
Action = TLI.getOperationAction(
Node->getOpcode(), Node->getOperand(0).getValueType());
@@ -1546,7 +1559,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State,
auto &DataLayout = DAG.getDataLayout();
// Store the float to memory, then load the sign part out as an integer.
- MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8);
+ MVT LoadTy = TLI.getRegisterType(MVT::i8);
// First create a temporary that is aligned for both the load and store.
SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
@@ -2015,23 +2028,14 @@ SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) {
return DAG.getSplatBuildVector(VT, DL, SplatVal);
}
-// Expand a node into a call to a libcall. If the result value
-// does not fit into a register, return the lo part and set the hi part to the
-// by-reg argument. If it does fit into a single register, return the result
-// and leave the Hi part unset.
-SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+// Expand a node into a call to a libcall, returning the value as the first
+// result and the chain as the second. If the result value does not fit into a
+// register, return the lo part and set the hi part to the by-reg argument in
+// the first. If it does fit into a single register, return the result and
+// leave the Hi part unset.
+std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ TargetLowering::ArgListTy &&Args,
bool isSigned) {
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- for (const SDValue &Op : Node->op_values()) {
- EVT ArgVT = Op.getValueType();
- Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
- Entry.Node = Op;
- Entry.Ty = ArgTy;
- Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned);
- Entry.IsZExt = !TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned);
- Args.push_back(Entry);
- }
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
TLI.getPointerTy(DAG.getDataLayout()));
@@ -2070,11 +2074,69 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
if (!CallInfo.second.getNode()) {
LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump(&DAG));
// It's a tailcall, return the chain (which is the DAG root).
- return DAG.getRoot();
+ return {DAG.getRoot(), DAG.getRoot()};
}
LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump(&DAG));
- return CallInfo.first;
+ return CallInfo;
+}
+
+std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
+ bool isSigned) {
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (const SDValue &Op : Node->op_values()) {
+ EVT ArgVT = Op.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Op;
+ Entry.Ty = ArgTy;
+ Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned);
+ Entry.IsZExt = !Entry.IsSExt;
+ Args.push_back(Entry);
+ }
+
+ return ExpandLibCall(LC, Node, std::move(Args), isSigned);
+}
+
+void SelectionDAGLegalize::ExpandFrexpLibCall(
+ SDNode *Node, SmallVectorImpl<SDValue> &Results) {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT ExpVT = Node->getValueType(1);
+
+ SDValue FPOp = Node->getOperand(0);
+
+ EVT ArgVT = FPOp.getValueType();
+ Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+ TargetLowering::ArgListEntry FPArgEntry;
+ FPArgEntry.Node = FPOp;
+ FPArgEntry.Ty = ArgTy;
+
+ SDValue StackSlot = DAG.CreateStackTemporary(ExpVT);
+ TargetLowering::ArgListEntry PtrArgEntry;
+ PtrArgEntry.Node = StackSlot;
+ PtrArgEntry.Ty = PointerType::get(*DAG.getContext(),
+ DAG.getDataLayout().getAllocaAddrSpace());
+
+ TargetLowering::ArgListTy Args = {FPArgEntry, PtrArgEntry};
+
+ RTLIB::Libcall LC = RTLIB::getFREXP(VT);
+ auto [Call, Chain] = ExpandLibCall(LC, Node, std::move(Args), false);
+
+ // FIXME: Get type of int for libcall declaration and cast
+
+ int FrameIdx = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ auto PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+
+ SDValue LoadExp = DAG.getLoad(ExpVT, dl, Chain, StackSlot, PtrInfo);
+ SDValue OutputChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ LoadExp.getValue(1), DAG.getRoot());
+ DAG.setRoot(OutputChain);
+
+ Results.push_back(Call);
+ Results.push_back(LoadExp);
}
void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
@@ -2095,7 +2157,7 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
Results.push_back(Tmp.first);
Results.push_back(Tmp.second);
} else {
- SDValue Tmp = ExpandLibCall(LC, Node, false);
+ SDValue Tmp = ExpandLibCall(LC, Node, false).first;
Results.push_back(Tmp);
}
}
@@ -2129,7 +2191,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
case MVT::i64: LC = Call_I64; break;
case MVT::i128: LC = Call_I128; break;
}
- return ExpandLibCall(LC, Node, isSigned);
+ return ExpandLibCall(LC, Node, isSigned).first;
}
/// Expand the node to a libcall based on first argument type (for instance
@@ -2309,6 +2371,237 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node,
DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, MachinePointerInfo()));
}
+SDValue SelectionDAGLegalize::expandLdexp(SDNode *Node) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue X = Node->getOperand(0);
+ SDValue N = Node->getOperand(1);
+ EVT ExpVT = N.getValueType();
+ EVT AsIntVT = VT.changeTypeToInteger();
+ if (AsIntVT == EVT()) // TODO: How to handle f80?
+ return SDValue();
+
+ if (Node->getOpcode() == ISD::STRICT_FLDEXP) // TODO
+ return SDValue();
+
+ SDNodeFlags NSW;
+ NSW.setNoSignedWrap(true);
+ SDNodeFlags NUW_NSW;
+ NUW_NSW.setNoUnsignedWrap(true);
+ NUW_NSW.setNoSignedWrap(true);
+
+ EVT SetCCVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ExpVT);
+ const fltSemantics &FltSem = SelectionDAG::EVTToAPFloatSemantics(VT);
+
+ const APFloat::ExponentType MaxExpVal = APFloat::semanticsMaxExponent(FltSem);
+ const APFloat::ExponentType MinExpVal = APFloat::semanticsMinExponent(FltSem);
+ const int Precision = APFloat::semanticsPrecision(FltSem);
+
+ const SDValue MaxExp = DAG.getConstant(MaxExpVal, dl, ExpVT);
+ const SDValue MinExp = DAG.getConstant(MinExpVal, dl, ExpVT);
+
+ const SDValue DoubleMaxExp = DAG.getConstant(2 * MaxExpVal, dl, ExpVT);
+
+ const APFloat One(FltSem, "1.0");
+ APFloat ScaleUpK = scalbn(One, MaxExpVal, APFloat::rmNearestTiesToEven);
+
+ // Offset by precision to avoid denormal range.
+ APFloat ScaleDownK =
+ scalbn(One, MinExpVal + Precision, APFloat::rmNearestTiesToEven);
+
+ // TODO: Should really introduce control flow and use a block for the >
+ // MaxExp, < MinExp cases
+
+ // First, handle exponents Exp > MaxExp and scale down.
+ SDValue NGtMaxExp = DAG.getSetCC(dl, SetCCVT, N, MaxExp, ISD::SETGT);
+
+ SDValue DecN0 = DAG.getNode(ISD::SUB, dl, ExpVT, N, MaxExp, NSW);
+ SDValue ClampMaxVal = DAG.getConstant(3 * MaxExpVal, dl, ExpVT);
+ SDValue ClampN_Big = DAG.getNode(ISD::SMIN, dl, ExpVT, N, ClampMaxVal);
+ SDValue DecN1 =
+ DAG.getNode(ISD::SUB, dl, ExpVT, ClampN_Big, DoubleMaxExp, NSW);
+
+ SDValue ScaleUpTwice =
+ DAG.getSetCC(dl, SetCCVT, N, DoubleMaxExp, ISD::SETUGT);
+
+ const SDValue ScaleUpVal = DAG.getConstantFP(ScaleUpK, dl, VT);
+ SDValue ScaleUp0 = DAG.getNode(ISD::FMUL, dl, VT, X, ScaleUpVal);
+ SDValue ScaleUp1 = DAG.getNode(ISD::FMUL, dl, VT, ScaleUp0, ScaleUpVal);
+
+ SDValue SelectN_Big =
+ DAG.getNode(ISD::SELECT, dl, ExpVT, ScaleUpTwice, DecN1, DecN0);
+ SDValue SelectX_Big =
+ DAG.getNode(ISD::SELECT, dl, VT, ScaleUpTwice, ScaleUp1, ScaleUp0);
+
+ // Now handle exponents Exp < MinExp
+ SDValue NLtMinExp = DAG.getSetCC(dl, SetCCVT, N, MinExp, ISD::SETLT);
+
+ SDValue Increment0 = DAG.getConstant(-(MinExpVal + Precision), dl, ExpVT);
+ SDValue Increment1 = DAG.getConstant(-2 * (MinExpVal + Precision), dl, ExpVT);
+
+ SDValue IncN0 = DAG.getNode(ISD::ADD, dl, ExpVT, N, Increment0, NUW_NSW);
+
+ SDValue ClampMinVal =
+ DAG.getConstant(3 * MinExpVal + 2 * Precision, dl, ExpVT);
+ SDValue ClampN_Small = DAG.getNode(ISD::SMAX, dl, ExpVT, N, ClampMinVal);
+ SDValue IncN1 =
+ DAG.getNode(ISD::ADD, dl, ExpVT, ClampN_Small, Increment1, NSW);
+
+ const SDValue ScaleDownVal = DAG.getConstantFP(ScaleDownK, dl, VT);
+ SDValue ScaleDown0 = DAG.getNode(ISD::FMUL, dl, VT, X, ScaleDownVal);
+ SDValue ScaleDown1 = DAG.getNode(ISD::FMUL, dl, VT, ScaleDown0, ScaleDownVal);
+
+ SDValue ScaleDownTwice = DAG.getSetCC(
+ dl, SetCCVT, N, DAG.getConstant(2 * MinExpVal + Precision, dl, ExpVT),
+ ISD::SETULT);
+
+ SDValue SelectN_Small =
+ DAG.getNode(ISD::SELECT, dl, ExpVT, ScaleDownTwice, IncN1, IncN0);
+ SDValue SelectX_Small =
+ DAG.getNode(ISD::SELECT, dl, VT, ScaleDownTwice, ScaleDown1, ScaleDown0);
+
+ // Now combine the two out of range exponent handling cases with the base
+ // case.
+ SDValue NewX = DAG.getNode(
+ ISD::SELECT, dl, VT, NGtMaxExp, SelectX_Big,
+ DAG.getNode(ISD::SELECT, dl, VT, NLtMinExp, SelectX_Small, X));
+
+ SDValue NewN = DAG.getNode(
+ ISD::SELECT, dl, ExpVT, NGtMaxExp, SelectN_Big,
+ DAG.getNode(ISD::SELECT, dl, ExpVT, NLtMinExp, SelectN_Small, N));
+
+ SDValue BiasedN = DAG.getNode(ISD::ADD, dl, ExpVT, NewN, MaxExp, NSW);
+
+ SDValue ExponentShiftAmt =
+ DAG.getShiftAmountConstant(Precision - 1, ExpVT, dl);
+ SDValue CastExpToValTy = DAG.getZExtOrTrunc(BiasedN, dl, AsIntVT);
+
+ SDValue AsInt = DAG.getNode(ISD::SHL, dl, AsIntVT, CastExpToValTy,
+ ExponentShiftAmt, NUW_NSW);
+ SDValue AsFP = DAG.getNode(ISD::BITCAST, dl, VT, AsInt);
+ return DAG.getNode(ISD::FMUL, dl, VT, NewX, AsFP);
+}
+
+SDValue SelectionDAGLegalize::expandFrexp(SDNode *Node) const {
+ SDLoc dl(Node);
+ SDValue Val = Node->getOperand(0);
+ EVT VT = Val.getValueType();
+ EVT ExpVT = Node->getValueType(1);
+ EVT AsIntVT = VT.changeTypeToInteger();
+ if (AsIntVT == EVT()) // TODO: How to handle f80?
+ return SDValue();
+
+ const fltSemantics &FltSem = SelectionDAG::EVTToAPFloatSemantics(VT);
+ const APFloat::ExponentType MinExpVal = APFloat::semanticsMinExponent(FltSem);
+ const unsigned Precision = APFloat::semanticsPrecision(FltSem);
+ const unsigned BitSize = VT.getScalarSizeInBits();
+
+ // TODO: Could introduce control flow and skip over the denormal handling.
+
+ // scale_up = fmul value, scalbn(1.0, precision + 1)
+ // extracted_exp = (bitcast value to uint) >> precision - 1
+ // biased_exp = extracted_exp + min_exp
+ // extracted_fract = (bitcast value to uint) & (fract_mask | sign_mask)
+ //
+ // is_denormal = val < smallest_normalized
+ // computed_fract = is_denormal ? scale_up : extracted_fract
+ // computed_exp = is_denormal ? biased_exp + (-precision - 1) : biased_exp
+ //
+ // result_0 = (!isfinite(val) || iszero(val)) ? val : computed_fract
+ // result_1 = (!isfinite(val) || iszero(val)) ? 0 : computed_exp
+
+ SDValue NegSmallestNormalizedInt = DAG.getConstant(
+ APFloat::getSmallestNormalized(FltSem, true).bitcastToAPInt(), dl,
+ AsIntVT);
+
+ SDValue SmallestNormalizedInt = DAG.getConstant(
+ APFloat::getSmallestNormalized(FltSem, false).bitcastToAPInt(), dl,
+ AsIntVT);
+
+ // Masks out the exponent bits.
+ SDValue ExpMask =
+ DAG.getConstant(APFloat::getInf(FltSem).bitcastToAPInt(), dl, AsIntVT);
+
+ // Mask out the exponent part of the value.
+ //
+ // e.g, for f32 FractSignMaskVal = 0x807fffff
+ APInt FractSignMaskVal = APInt::getBitsSet(BitSize, 0, Precision - 1);
+ FractSignMaskVal.setBit(BitSize - 1); // Set the sign bit
+
+ APInt SignMaskVal = APInt::getSignedMaxValue(BitSize);
+ SDValue SignMask = DAG.getConstant(SignMaskVal, dl, AsIntVT);
+
+ SDValue FractSignMask = DAG.getConstant(FractSignMaskVal, dl, AsIntVT);
+
+ const APFloat One(FltSem, "1.0");
+ // Scale a possible denormal input.
+ // e.g., for f64, 0x1p+54
+ APFloat ScaleUpKVal =
+ scalbn(One, Precision + 1, APFloat::rmNearestTiesToEven);
+
+ SDValue ScaleUpK = DAG.getConstantFP(ScaleUpKVal, dl, VT);
+ SDValue ScaleUp = DAG.getNode(ISD::FMUL, dl, VT, Val, ScaleUpK);
+
+ EVT SetCCVT =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ SDValue AsInt = DAG.getNode(ISD::BITCAST, dl, AsIntVT, Val);
+
+ SDValue Abs = DAG.getNode(ISD::AND, dl, AsIntVT, AsInt, SignMask);
+
+ SDValue AddNegSmallestNormal =
+ DAG.getNode(ISD::ADD, dl, AsIntVT, Abs, NegSmallestNormalizedInt);
+ SDValue DenormOrZero = DAG.getSetCC(dl, SetCCVT, AddNegSmallestNormal,
+ NegSmallestNormalizedInt, ISD::SETULE);
+
+ SDValue IsDenormal =
+ DAG.getSetCC(dl, SetCCVT, Abs, SmallestNormalizedInt, ISD::SETULT);
+
+ SDValue MinExp = DAG.getConstant(MinExpVal, dl, ExpVT);
+ SDValue Zero = DAG.getConstant(0, dl, ExpVT);
+
+ SDValue ScaledAsInt = DAG.getNode(ISD::BITCAST, dl, AsIntVT, ScaleUp);
+ SDValue ScaledSelect =
+ DAG.getNode(ISD::SELECT, dl, AsIntVT, IsDenormal, ScaledAsInt, AsInt);
+
+ SDValue ExpMaskScaled =
+ DAG.getNode(ISD::AND, dl, AsIntVT, ScaledAsInt, ExpMask);
+
+ SDValue ScaledValue =
+ DAG.getNode(ISD::SELECT, dl, AsIntVT, IsDenormal, ExpMaskScaled, Abs);
+
+ // Extract the exponent bits.
+ SDValue ExponentShiftAmt =
+ DAG.getShiftAmountConstant(Precision - 1, AsIntVT, dl);
+ SDValue ShiftedExp =
+ DAG.getNode(ISD::SRL, dl, AsIntVT, ScaledValue, ExponentShiftAmt);
+ SDValue Exp = DAG.getSExtOrTrunc(ShiftedExp, dl, ExpVT);
+
+ SDValue NormalBiasedExp = DAG.getNode(ISD::ADD, dl, ExpVT, Exp, MinExp);
+ SDValue DenormalOffset = DAG.getConstant(-Precision - 1, dl, ExpVT);
+ SDValue DenormalExpBias =
+ DAG.getNode(ISD::SELECT, dl, ExpVT, IsDenormal, DenormalOffset, Zero);
+
+ SDValue MaskedFractAsInt =
+ DAG.getNode(ISD::AND, dl, AsIntVT, ScaledSelect, FractSignMask);
+ const APFloat Half(FltSem, "0.5");
+ SDValue FPHalf = DAG.getConstant(Half.bitcastToAPInt(), dl, AsIntVT);
+ SDValue Or = DAG.getNode(ISD::OR, dl, AsIntVT, MaskedFractAsInt, FPHalf);
+ SDValue MaskedFract = DAG.getNode(ISD::BITCAST, dl, VT, Or);
+
+ SDValue ComputedExp =
+ DAG.getNode(ISD::ADD, dl, ExpVT, NormalBiasedExp, DenormalExpBias);
+
+ SDValue Result0 =
+ DAG.getNode(ISD::SELECT, dl, VT, DenormOrZero, Val, MaskedFract);
+
+ SDValue Result1 =
+ DAG.getNode(ISD::SELECT, dl, ExpVT, DenormOrZero, Zero, ComputedExp);
+
+ return DAG.getMergeValues({Result0, Result1}, dl);
+}
+
/// This function is responsible for legalizing a
/// INT_TO_FP operation of the specified operand when the target requests that
/// we expand it. At this point, we know that the result and operand types are
@@ -2365,10 +2658,10 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node,
SDValue Load =
DAG.getLoad(MVT::f64, dl, MemChain, StackSlot, MachinePointerInfo());
// FP constant to bias correct the final result
- SDValue Bias = DAG.getConstantFP(isSigned ?
- BitsToDouble(0x4330000080000000ULL) :
- BitsToDouble(0x4330000000000000ULL),
- dl, MVT::f64);
+ SDValue Bias = DAG.getConstantFP(
+ isSigned ? llvm::bit_cast<double>(0x4330000080000000ULL)
+ : llvm::bit_cast<double>(0x4330000000000000ULL),
+ dl, MVT::f64);
// Subtract the bias and get the final result.
SDValue Sub;
SDValue Result;
@@ -2696,6 +2989,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if ((Tmp1 = TLI.expandABS(Node, DAG)))
Results.push_back(Tmp1);
break;
+ case ISD::ABDS:
+ case ISD::ABDU:
+ if ((Tmp1 = TLI.expandABD(Node, DAG)))
+ Results.push_back(Tmp1);
+ break;
case ISD::CTPOP:
if ((Tmp1 = TLI.expandCTPOP(Node, DAG)))
Results.push_back(Tmp1);
@@ -3241,6 +3539,36 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
}
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP: {
+ EVT VT = Node->getValueType(0);
+ RTLIB::Libcall LC = RTLIB::getLDEXP(VT);
+ // Use the LibCall instead, it is very likely faster
+ // FIXME: Use separate LibCall action.
+ if (TLI.getLibcallName(LC))
+ break;
+
+ if (SDValue Expanded = expandLdexp(Node)) {
+ Results.push_back(Expanded);
+ if (Node->getOpcode() == ISD::STRICT_FLDEXP)
+ Results.push_back(Expanded.getValue(1));
+ }
+
+ break;
+ }
+ case ISD::FFREXP: {
+ RTLIB::Libcall LC = RTLIB::getFREXP(Node->getValueType(0));
+ // Use the LibCall instead, it is very likely faster
+ // FIXME: Use separate LibCall action.
+ if (TLI.getLibcallName(LC))
+ break;
+
+ if (SDValue Expanded = expandFrexp(Node)) {
+ Results.push_back(Expanded);
+ Results.push_back(Expanded.getValue(1));
+ }
+ break;
+ }
case ISD::FMAD:
llvm_unreachable("Illegal fmad should never be formed");
@@ -3477,13 +3805,13 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// if we were allowed to generate libcalls to division functions of illegal
// type. But we cannot do that.
llvm_unreachable("Cannot expand DIVFIX!");
- case ISD::ADDCARRY:
- case ISD::SUBCARRY: {
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: {
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
SDValue Carry = Node->getOperand(2);
- bool IsAdd = Node->getOpcode() == ISD::ADDCARRY;
+ bool IsAdd = Node->getOpcode() == ISD::UADDO_CARRY;
// Initial add of the 2 operands.
unsigned Op = IsAdd ? ISD::ADD : ISD::SUB;
@@ -3628,9 +3956,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
} else {
// We test only the i1 bit. Skip the AND if UNDEF or another AND.
if (Tmp2.isUndef() ||
- (Tmp2.getOpcode() == ISD::AND &&
- isa<ConstantSDNode>(Tmp2.getOperand(1)) &&
- cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1))
+ (Tmp2.getOpcode() == ISD::AND && isOneConstant(Tmp2.getOperand(1))))
Tmp3 = Tmp2;
else
Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
@@ -3864,6 +4190,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
Results.push_back(TLI.expandVecReduce(Node, DAG));
break;
case ISD::GLOBAL_OFFSET_TABLE:
@@ -4029,6 +4357,9 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::FMIN_F80, RTLIB::FMIN_F128,
RTLIB::FMIN_PPCF128, Results);
break;
+ // FIXME: We do not have libcalls for FMAXIMUM and FMINIMUM. So, we cannot use
+ // libcall legalization for these nodes, but there is no default expasion for
+ // these nodes either (see PR63267 for example).
case ISD::FMAXNUM:
case ISD::STRICT_FMAXNUM:
ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64,
@@ -4135,6 +4466,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::ROUNDEVEN_F128,
RTLIB::ROUNDEVEN_PPCF128, Results);
break;
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP:
+ ExpandFPLibCall(Node, RTLIB::LDEXP_F32, RTLIB::LDEXP_F64, RTLIB::LDEXP_F80,
+ RTLIB::LDEXP_F128, RTLIB::LDEXP_PPCF128, Results);
+ break;
+ case ISD::FFREXP: {
+ ExpandFrexpLibCall(Node, Results);
+ break;
+ }
case ISD::FPOWI:
case ISD::STRICT_FPOWI: {
RTLIB::Libcall LC = RTLIB::getPOWI(Node->getSimpleValueType(0));
@@ -4241,7 +4581,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
break;
case ISD::FP16_TO_FP:
if (Node->getValueType(0) == MVT::f32) {
- Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
+ Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first);
}
break;
case ISD::STRICT_FP16_TO_FP: {
@@ -4259,14 +4599,14 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::Libcall LC =
RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16");
- Results.push_back(ExpandLibCall(LC, Node, false));
+ Results.push_back(ExpandLibCall(LC, Node, false).first);
break;
}
case ISD::FP_TO_BF16: {
RTLIB::Libcall LC =
RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::bf16);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_bf16");
- Results.push_back(ExpandLibCall(LC, Node, false));
+ Results.push_back(ExpandLibCall(LC, Node, false).first);
break;
}
case ISD::STRICT_SINT_TO_FP:
@@ -4381,7 +4721,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
Results.push_back(
ExpandLibCall(RTLIB::getFPEXT(Node->getOperand(0).getValueType(),
Node->getValueType(0)),
- Node, false));
+ Node, false).first);
break;
}
case ISD::STRICT_FP_EXTEND:
@@ -4447,16 +4787,39 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
default:
llvm_unreachable("LibCall explicitly requested, but not available");
case MVT::i32:
- Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false));
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false).first);
break;
case MVT::i64:
- Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false));
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false).first);
break;
case MVT::i128:
- Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false));
+ Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false).first);
break;
}
break;
+ case ISD::RESET_FPENV: {
+ // It is legalized to call 'fesetenv(FE_DFL_ENV)'. On most targets
+ // FE_DFL_ENV is defined as '((const fenv_t *) -1)' in glibc.
+ SDValue Ptr = DAG.getIntPtrConstant(-1LL, dl);
+ SDValue Chain = Node->getOperand(0);
+ Results.push_back(
+ DAG.makeStateFunctionCall(RTLIB::FESETENV, Ptr, Chain, dl));
+ break;
+ }
+ case ISD::GET_FPENV_MEM: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue EnvPtr = Node->getOperand(1);
+ Results.push_back(
+ DAG.makeStateFunctionCall(RTLIB::FEGETENV, EnvPtr, Chain, dl));
+ break;
+ }
+ case ISD::SET_FPENV_MEM: {
+ SDValue Chain = Node->getOperand(0);
+ SDValue EnvPtr = Node->getOperand(1);
+ Results.push_back(
+ DAG.makeStateFunctionCall(RTLIB::FESETENV, EnvPtr, Chain, dl));
+ break;
+ }
}
// Replace the original node with the legalized result.
@@ -4785,6 +5148,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FREM:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
case ISD::FPOW:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
@@ -4841,6 +5206,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(Tmp4.getValue(1));
break;
case ISD::FCOPYSIGN:
+ case ISD::FLDEXP:
case ISD::FPOWI: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = Node->getOperand(1);
@@ -4867,6 +5233,17 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(Tmp3);
Results.push_back(Tmp3.getValue(1));
break;
+ case ISD::FFREXP: {
+ Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::FFREXP, dl, {NVT, Node->getValueType(1)}, Tmp1);
+
+ Results.push_back(
+ DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2,
+ DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)));
+
+ Results.push_back(Tmp2.getValue(1));
+ break;
+ }
case ISD::FFLOOR:
case ISD::FCEIL:
case ISD::FRINT:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index f1e80ce7e037..7e035d21ef71 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -59,7 +59,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
dbgs() << "SoftenFloatResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to soften the result of this operator!");
+ report_fatal_error("Do not know how to soften the result of this "
+ "operator!");
case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break;
case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
@@ -107,10 +108,16 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
+ case ISD::BF16_TO_FP: R = SoftenFloatRes_BF16_TO_FP(N); break;
case ISD::STRICT_FPOW:
case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break;
case ISD::STRICT_FPOWI:
- case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break;
+ case ISD::FPOWI:
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break;
+ case ISD::FFREXP:
+ R = SoftenFloatRes_FFREXP(N);
+ break;
case ISD::STRICT_FREM:
case ISD::FREM: R = SoftenFloatRes_FREM(N); break;
case ISD::STRICT_FRINT:
@@ -142,6 +149,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
R = SoftenFloatRes_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -510,10 +519,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
return BitConvertToInteger(Op);
}
- // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's
- // entirely possible for both f16 and f32 to be legal, so use the fully
- // hard-float FP_EXTEND rather than FP16_TO_FP.
- if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) {
+ // There's only a libcall for f16 -> f32 and shifting is only valid for bf16
+ // -> f32, so proceed in two stages. Also, it's entirely possible for both
+ // f16 and f32 to be legal, so use the fully hard-float FP_EXTEND rather
+ // than FP16_TO_FP.
+ if ((Op.getValueType() == MVT::f16 || Op.getValueType() == MVT::bf16) &&
+ N->getValueType(0) != MVT::f32) {
if (IsStrict) {
Op = DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N),
{ MVT::f32, MVT::Other }, { Chain, Op });
@@ -523,6 +534,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
}
}
+ if (Op.getValueType() == MVT::bf16)
+ return SoftenFloatRes_BF16_TO_FP(N);
+
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
TargetLowering::MakeLibCallOptions CallOptions;
@@ -555,6 +569,21 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first;
}
+// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
+// nodes?
+SDValue DAGTypeLegalizer::SoftenFloatRes_BF16_TO_FP(SDNode *N) {
+ assert(N->getValueType(0) == MVT::f32 &&
+ "Can only soften BF16_TO_FP with f32 result");
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
+ SDValue Op = N->getOperand(0);
+ SDLoc DL(N);
+ Op = DAG.getNode(ISD::ANY_EXTEND, DL, NVT,
+ DAG.getNode(ISD::BITCAST, DL, MVT::i16, Op));
+ SDValue Res = DAG.getNode(ISD::SHL, DL, NVT, Op,
+ DAG.getShiftAmountConstant(16, NVT, DL));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -582,13 +611,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
RTLIB::POW_PPCF128));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_ExpOp(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
unsigned Offset = IsStrict ? 1 : 0;
assert((N->getOperand(1 + Offset).getValueType() == MVT::i16 ||
N->getOperand(1 + Offset).getValueType() == MVT::i32) &&
"Unsupported power type!");
- RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0));
+ bool IsPowI =
+ N->getOpcode() == ISD::FPOWI || N->getOpcode() == ISD::STRICT_FPOWI;
+
+ RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0))
+ : RTLIB::getLDEXP(N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
if (!TLI.getLibcallName(LC)) {
// Some targets don't have a powi libcall; use pow instead.
@@ -621,6 +654,45 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
return Tmp.first;
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) {
+ assert(!N->isStrictFPOpcode() && "strictfp not implemented for frexp");
+ EVT VT0 = N->getValueType(0);
+ EVT VT1 = N->getValueType(1);
+ RTLIB::Libcall LC = RTLIB::getFREXP(VT0);
+
+ if (DAG.getLibInfo().getIntSize() != VT1.getSizeInBits()) {
+ // If the exponent does not match with sizeof(int) a libcall would use the
+ // wrong type for the argument.
+ // TODO: Should be able to handle mismatches.
+ DAG.getContext()->emitError("ffrexp exponent does not match sizeof(int)");
+ return DAG.getUNDEF(N->getValueType(0));
+ }
+
+ EVT NVT0 = TLI.getTypeToTransformTo(*DAG.getContext(), VT0);
+ SDValue StackSlot = DAG.CreateStackTemporary(VT1);
+
+ SDLoc DL(N);
+
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SDValue Ops[2] = {GetSoftenedFloat(N->getOperand(0)), StackSlot};
+ EVT OpsVT[2] = {VT0, StackSlot.getValueType()};
+
+ // TODO: setTypeListBeforeSoften can't properly express multiple return types,
+ // but we only really need to handle the 0th one for softening anyway.
+ CallOptions.setTypeListBeforeSoften({OpsVT}, VT0, true);
+
+ auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT0, Ops, CallOptions, DL,
+ /*Chain=*/SDValue());
+ int FrameIdx = cast<FrameIndexSDNode>(StackSlot)->getIndex();
+ auto PtrInfo =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
+
+ SDValue LoadExp = DAG.getLoad(VT1, DL, Chain, StackSlot, PtrInfo);
+
+ ReplaceValueWith(SDValue(N, 1), LoadExp);
+ return ReturnVal;
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
return SoftenFloatRes_Binary(N, GetFPLibCall(N->getValueType(0),
RTLIB::REM_F32,
@@ -828,7 +900,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to soften this operator's operand!");
+ report_fatal_error("Do not know how to soften this operator's operand!");
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
@@ -1199,7 +1271,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
dbgs() << "ExpandFloatResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to expand the result of this operator!");
+ report_fatal_error("Do not know how to expand the result of this "
+ "operator!");
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
@@ -1253,6 +1326,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break;
case ISD::STRICT_FPOWI:
case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break;
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP: ExpandFloatRes_FLDEXP(N, Lo, Hi); break;
case ISD::FREEZE: ExpandFloatRes_FREEZE(N, Lo, Hi); break;
case ISD::STRICT_FRINT:
case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break;
@@ -1548,6 +1623,11 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N,
ExpandFloatRes_Binary(N, RTLIB::getPOWI(N->getValueType(0)), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_FLDEXP(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ ExpandFloatRes_Binary(N, RTLIB::getLDEXP(N->getValueType(0)), Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FREEZE(SDNode *N,
SDValue &Lo, SDValue &Hi) {
assert(N->getValueType(0) == MVT::ppcf128 &&
@@ -1785,7 +1865,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
dbgs() << "ExpandFloatOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to expand this operator's operand!");
+ report_fatal_error("Do not know how to expand this operator's operand!");
case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
@@ -2106,7 +2186,7 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
dbgs() << "PromoteFloatOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to promote this operator's operand!");
+ report_fatal_error("Do not know how to promote this operator's operand!");
case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break;
case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break;
@@ -2245,7 +2325,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
dbgs() << "PromoteFloatResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to promote this operator's result!");
+ report_fatal_error("Do not know how to promote this operator's result!");
case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break;
case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break;
@@ -2289,7 +2369,9 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMA: // FMA is same as FMAD
case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break;
- case ISD::FPOWI: R = PromoteFloatRes_FPOWI(N); break;
+ case ISD::FPOWI:
+ case ISD::FLDEXP: R = PromoteFloatRes_ExpOp(N); break;
+ case ISD::FFREXP: R = PromoteFloatRes_FFREXP(N); break;
case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break;
case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break;
@@ -2304,6 +2386,8 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
R = PromoteFloatRes_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -2458,7 +2542,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) {
}
// Promote the Float (first) operand and retain the Integer (second) operand
-SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteFloatRes_ExpOp(SDNode *N) {
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Op0 = GetPromotedFloat(N->getOperand(0));
@@ -2467,6 +2551,17 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) {
return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1);
}
+SDValue DAGTypeLegalizer::PromoteFloatRes_FFREXP(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ SDValue Op = GetPromotedFloat(N->getOperand(0));
+ SDValue Res =
+ DAG.getNode(N->getOpcode(), SDLoc(N), {NVT, N->getValueType(1)}, Op);
+
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
// Explicit operation to reduce precision. Reduce the value to half precision
// and promote it back to the legal type.
SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) {
@@ -2608,7 +2703,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
dbgs() << "SoftPromoteHalfResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to soft promote this operator's result!");
+ report_fatal_error("Do not know how to soft promote this operator's "
+ "result!");
case ISD::BITCAST: R = SoftPromoteHalfRes_BITCAST(N); break;
case ISD::ConstantFP: R = SoftPromoteHalfRes_ConstantFP(N); break;
@@ -2655,7 +2751,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::FMA: // FMA is same as FMAD
case ISD::FMAD: R = SoftPromoteHalfRes_FMAD(N); break;
- case ISD::FPOWI: R = SoftPromoteHalfRes_FPOWI(N); break;
+ case ISD::FPOWI:
+ case ISD::FLDEXP: R = SoftPromoteHalfRes_ExpOp(N); break;
case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break;
case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break;
@@ -2668,6 +2765,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
R = SoftPromoteHalfRes_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -2767,7 +2866,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) {
return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res);
}
-SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOWI(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ExpOp(SDNode *N) {
EVT OVT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0));
@@ -2916,7 +3015,8 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) {
dbgs() << "SoftPromoteHalfOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to soft promote this operator's operand!");
+ report_fatal_error("Do not know how to soft promote this operator's "
+ "operand!");
case ISD::BITCAST: Res = SoftPromoteHalfOp_BITCAST(N); break;
case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index c9ce9071a25d..df5878fcdf2e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -20,6 +20,7 @@
#include "LegalizeTypes.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -54,7 +55,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
dbgs() << "PromoteIntegerResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to promote this operator!");
+ report_fatal_error("Do not know how to promote this operator!");
case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break;
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
@@ -115,6 +116,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
case ISD::VECTOR_SPLICE:
Res = PromoteIntRes_VECTOR_SPLICE(N); break;
+ case ISD::VECTOR_INTERLEAVE:
+ case ISD::VECTOR_DEINTERLEAVE:
+ Res = PromoteIntRes_VECTOR_INTERLEAVE_DEINTERLEAVE(N);
+ return;
case ISD::INSERT_VECTOR_ELT:
Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
case ISD::BUILD_VECTOR:
@@ -134,7 +139,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break;
case ISD::SIGN_EXTEND:
+ case ISD::VP_SIGN_EXTEND:
case ISD::ZERO_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
case ISD::VP_FP_TO_SINT:
@@ -191,8 +198,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ADDE:
case ISD::SUBE:
- case ISD::ADDCARRY:
- case ISD::SUBCARRY: Res = PromoteIntRes_ADDSUBCARRY(N, ResNo); break;
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: Res = PromoteIntRes_UADDSUBO_CARRY(N, ResNo); break;
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY: Res = PromoteIntRes_SADDSUBO_CARRY(N, ResNo); break;
@@ -279,6 +286,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::IS_FPCLASS:
Res = PromoteIntRes_IS_FPCLASS(N);
break;
+ case ISD::FFREXP:
+ Res = PromoteIntRes_FFREXP(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -753,8 +763,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!");
// If the result and operand types are the same after promotion, simplify
- // to an in-register extension.
- if (NVT == Res.getValueType()) {
+ // to an in-register extension. Unless this is a VP_*_EXTEND.
+ if (NVT == Res.getValueType() && N->getNumOperands() == 1) {
// The high bits are not guaranteed to be anything. Insert an extend.
if (N->getOpcode() == ISD::SIGN_EXTEND)
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
@@ -767,6 +777,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
}
// Otherwise, just extend the original operand all the way to the larger type.
+ if (N->getNumOperands() != 1) {
+ assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+ assert(N->isVPOpcode() && "Expected VP opcode");
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0),
+ N->getOperand(1), N->getOperand(2));
+ }
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
}
@@ -1023,14 +1039,8 @@ static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
if (VT.isVector())
WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
VT.getVectorElementCount());
- if (Signed) {
- LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT);
- RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT);
- } else {
- LHS = DAG.getZExtOrTrunc(LHS, dl, WideVT);
- RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT);
- }
-
+ LHS = DAG.getExtOrTrunc(Signed, LHS, dl, WideVT);
+ RHS = DAG.getExtOrTrunc(Signed, RHS, dl, WideVT);
SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
DAG);
assert(Res && "Expanding DIVFIX with wide type failed?");
@@ -1177,16 +1187,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) {
// Get the SETCC result using the canonical SETCC type.
SDValue SetCC;
if (N->isStrictFPOpcode()) {
- EVT VTs[] = {SVT, MVT::Other};
+ SDVTList VTs = DAG.getVTList({SVT, MVT::Other});
SDValue Opers[] = {N->getOperand(0), N->getOperand(1),
N->getOperand(2), N->getOperand(3)};
- SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers);
+ SetCC = DAG.getNode(N->getOpcode(), dl, VTs, Opers, N->getFlags());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), SetCC.getValue(1));
} else
SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0),
- N->getOperand(1), N->getOperand(2));
+ N->getOperand(1), N->getOperand(2), N->getFlags());
// Convert to the expected type.
return DAG.getSExtOrTrunc(SetCC, dl, NVT);
@@ -1200,6 +1210,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_IS_FPCLASS(SDNode *N) {
return DAG.getNode(ISD::IS_FPCLASS, DL, NResVT, Arg, Test);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_FFREXP(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
+ EVT VT = N->getValueType(0);
+
+ SDLoc dl(N);
+ SDValue Res =
+ DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, NVT), N->getOperand(0));
+
+ ReplaceValueWith(SDValue(N, 0), Res);
+ return Res.getValue(1);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) {
SDValue LHS = GetPromotedInteger(N->getOperand(0));
SDValue RHS = N->getOperand(1);
@@ -1445,23 +1467,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
return Res;
}
-// Handle promotion for the ADDE/SUBE/ADDCARRY/SUBCARRY nodes. Notice that
+// Handle promotion for the ADDE/SUBE/UADDO_CARRY/USUBO_CARRY nodes. Notice that
// the third operand of ADDE/SUBE nodes is carry flag, which differs from
-// the ADDCARRY/SUBCARRY nodes in that the third operand is carry Boolean.
-SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
+// the UADDO_CARRY/USUBO_CARRY nodes in that the third operand is carry Boolean.
+SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO_CARRY(SDNode *N,
+ unsigned ResNo) {
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
// We need to sign-extend the operands so the carry value computed by the
// wide operation will be equivalent to the carry value computed by the
// narrow operation.
- // An ADDCARRY can generate carry only if any of the operands has its
+ // An UADDO_CARRY can generate carry only if any of the operands has its
// most significant bit set. Sign extension propagates the most significant
// bit into the higher bits which means the extra bit that the narrow
// addition would need (i.e. the carry) will be propagated through the higher
// bits of the wide addition.
- // A SUBCARRY can generate borrow only if LHS < RHS and this property will be
- // preserved by sign extension.
+ // A USUBO_CARRY can generate borrow only if LHS < RHS and this property will
+ // be preserved by sign extension.
SDValue LHS = SExtPromotedInteger(N->getOperand(0));
SDValue RHS = SExtPromotedInteger(N->getOperand(1));
@@ -1629,7 +1652,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to promote this operator's operand!");
+ report_fatal_error("Do not know how to promote this operator's operand!");
case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
case ISD::ATOMIC_STORE:
@@ -1655,6 +1678,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::VP_SETCC:
case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break;
+ case ISD::VP_SIGN_EXTEND: Res = PromoteIntOp_VP_SIGN_EXTEND(N); break;
case ISD::VP_SINT_TO_FP:
case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break;
case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break;
@@ -1676,6 +1700,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break;
case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break;
case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break;
+ case ISD::VP_ZERO_EXTEND: Res = PromoteIntOp_VP_ZERO_EXTEND(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break;
case ISD::INSERT_SUBVECTOR: Res = PromoteIntOp_INSERT_SUBVECTOR(N); break;
@@ -1690,8 +1715,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY:
- case ISD::ADDCARRY:
- case ISD::SUBCARRY: Res = PromoteIntOp_ADDSUBCARRY(N, OpNo); break;
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: Res = PromoteIntOp_ADDSUBO_CARRY(N, OpNo); break;
case ISD::FRAMEADDR:
case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break;
@@ -1706,10 +1731,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SDIVFIXSAT:
case ISD::UDIVFIX:
case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break;
-
case ISD::FPOWI:
- case ISD::STRICT_FPOWI: Res = PromoteIntOp_FPOWI(N); break;
-
+ case ISD::STRICT_FPOWI:
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP: Res = PromoteIntOp_ExpOp(N); break;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
case ISD::VECREDUCE_AND:
@@ -2005,6 +2030,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
Op, DAG.getValueType(N->getOperand(0).getValueType()));
}
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_SIGN_EXTEND(SDNode *N) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ // FIXME: There is no VP_ANY_EXTEND yet.
+ Op = DAG.getNode(ISD::VP_ZERO_EXTEND, dl, VT, Op, N->getOperand(1),
+ N->getOperand(2));
+ unsigned Diff =
+ VT.getScalarSizeInBits() - N->getOperand(0).getScalarValueSizeInBits();
+ SDValue ShAmt = DAG.getShiftAmountConstant(Diff, VT, dl);
+ // FIXME: There is no VP_SIGN_EXTEND_INREG so use a pair of shifts.
+ SDValue Shl = DAG.getNode(ISD::VP_SHL, dl, VT, Op, ShAmt, N->getOperand(1),
+ N->getOperand(2));
+ return DAG.getNode(ISD::VP_ASHR, dl, VT, Shl, ShAmt, N->getOperand(1),
+ N->getOperand(2));
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
if (N->getOpcode() == ISD::VP_SINT_TO_FP)
return SDValue(DAG.UpdateNodeOperands(N,
@@ -2156,7 +2198,20 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
}
-SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
+SDValue DAGTypeLegalizer::PromoteIntOp_VP_ZERO_EXTEND(SDNode *N) {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue Op = GetPromotedInteger(N->getOperand(0));
+ // FIXME: There is no VP_ANY_EXTEND yet.
+ Op = DAG.getNode(ISD::VP_ZERO_EXTEND, dl, VT, Op, N->getOperand(1),
+ N->getOperand(2));
+ APInt Imm = APInt::getLowBitsSet(VT.getScalarSizeInBits(),
+ N->getOperand(0).getScalarValueSizeInBits());
+ return DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(Imm, dl, VT),
+ N->getOperand(1), N->getOperand(2));
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo) {
assert(OpNo == 2 && "Don't know how to promote this operand!");
SDValue LHS = N->getOperand(0);
@@ -2193,26 +2248,29 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) {
bool IsStrict = N->isStrictFPOpcode();
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
- // The integer operand is the last operand in FPOWI (so the result and
- // floating point operand is already type legalized).
+ bool IsPowI =
+ N->getOpcode() == ISD::FPOWI || N->getOpcode() == ISD::STRICT_FPOWI;
+
+ // The integer operand is the last operand in FPOWI (or FLDEXP) (so the result
+ // and floating point operand is already type legalized).
+ RTLIB::Libcall LC = IsPowI ? RTLIB::getPOWI(N->getValueType(0))
+ : RTLIB::getLDEXP(N->getValueType(0));
+
+ if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
+ SDValue Op = SExtPromotedInteger(N->getOperand(1));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
+ }
// We can't just promote the exponent type in FPOWI, since we want to lower
// the node to a libcall and we if we promote to a type larger than
// sizeof(int) the libcall might not be according to the targets ABI. Instead
// we rewrite to a libcall here directly, letting makeLibCall handle promotion
// if the target accepts it according to shouldSignExtendTypeInLibCall.
- RTLIB::Libcall LC = RTLIB::getPOWI(N->getValueType(0));
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fpowi.");
- if (!TLI.getLibcallName(LC)) {
- // Some targets don't have a powi libcall; use pow instead.
- // FIXME: Implement this if some target needs it.
- DAG.getContext()->emitError("Don't know how to promote fpowi to fpow");
- return DAG.getUNDEF(N->getValueType(0));
- }
+
unsigned OpOffset = IsStrict ? 1 : 0;
// The exponent should fit in a sizeof(int) type for the libcall to be valid.
assert(DAG.getLibInfo().getIntSize() ==
@@ -2290,16 +2348,40 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
// An i1 vecreduce_or is equivalent to vecreduce_umax, use that instead if
// vecreduce_or is not legal
else if (Opcode == ISD::VECREDUCE_OR && OrigEltVT == MVT::i1 &&
- !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) &&
- TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT))
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_OR, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMAX, InVT)) {
Opcode = ISD::VECREDUCE_UMAX;
+ // Can't use promoteTargetBoolean here because we still need
+ // to either sign_ext or zero_ext in the undefined case.
+ switch (TLI.getBooleanContents(InVT)) {
+ case TargetLoweringBase::UndefinedBooleanContent:
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ Op = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ Op = SExtPromotedInteger(N->getOperand(0));
+ break;
+ }
+ }
// An i1 vecreduce_and is equivalent to vecreduce_umin, use that instead if
// vecreduce_and is not legal
else if (Opcode == ISD::VECREDUCE_AND && OrigEltVT == MVT::i1 &&
- !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) &&
- TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT))
+ !TLI.isOperationLegalOrCustom(ISD::VECREDUCE_AND, InVT) &&
+ TLI.isOperationLegalOrCustom(ISD::VECREDUCE_UMIN, InVT)) {
Opcode = ISD::VECREDUCE_UMIN;
+ // Can't use promoteTargetBoolean here because we still need
+ // to either sign_ext or zero_ext in the undefined case.
+ switch (TLI.getBooleanContents(InVT)) {
+ case TargetLoweringBase::UndefinedBooleanContent:
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ Op = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ Op = SExtPromotedInteger(N->getOperand(0));
+ break;
+ }
+ }
if (ResVT.bitsGE(EltVT))
return DAG.getNode(Opcode, SDLoc(N), ResVT, Op);
@@ -2512,8 +2594,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ADDE:
case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break;
- case ISD::ADDCARRY:
- case ISD::SUBCARRY: ExpandIntRes_ADDSUBCARRY(N, Lo, Hi); break;
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: ExpandIntRes_UADDSUBO_CARRY(N, Lo, Hi); break;
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY: ExpandIntRes_SADDSUBO_CARRY(N, Lo, Hi); break;
@@ -2874,48 +2956,118 @@ static std::pair<ISD::CondCode, ISD::NodeType> getExpandedMinMaxOps(int Op) {
void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc DL(N);
- ISD::NodeType LoOpc;
- ISD::CondCode CondC;
- std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode());
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- // Expand the subcomponents.
- SDValue LHSL, LHSH, RHSL, RHSH;
- GetExpandedInteger(LHS, LHSL, LHSH);
- GetExpandedInteger(RHS, RHSL, RHSH);
-
- // Value types
- EVT NVT = LHSL.getValueType();
- EVT CCT = getSetCCResultType(NVT);
-
// If the upper halves are all sign bits, then we can perform the MINMAX on
// the lower half and sign-extend the result to the upper half.
- unsigned NumHalfBits = NVT.getScalarSizeInBits();
+ unsigned NumBits = N->getValueType(0).getScalarSizeInBits();
+ unsigned NumHalfBits = NumBits / 2;
if (DAG.ComputeNumSignBits(LHS) > NumHalfBits &&
DAG.ComputeNumSignBits(RHS) > NumHalfBits) {
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ EVT NVT = LHSL.getValueType();
+
Lo = DAG.getNode(N->getOpcode(), DL, NVT, LHSL, RHSL);
Hi = DAG.getNode(ISD::SRA, DL, NVT, Lo,
DAG.getShiftAmountConstant(NumHalfBits - 1, NVT, DL));
return;
}
- // Hi part is always the same op
- Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
+ // The Lo of smin(X, -1) is LHSL if X is negative. Otherwise it's -1.
+ // The Lo of smax(X, 0) is 0 if X is negative. Otherwise it's LHSL.
+ if ((N->getOpcode() == ISD::SMAX && isNullConstant(RHS)) ||
+ (N->getOpcode() == ISD::SMIN && isAllOnesConstant(RHS))) {
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ EVT NVT = LHSL.getValueType();
+ EVT CCT = getSetCCResultType(NVT);
- // We need to know whether to select Lo part that corresponds to 'winning'
- // Hi part or if Hi parts are equal.
- SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC);
- SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ);
+ SDValue HiNeg =
+ DAG.getSetCC(DL, CCT, LHSH, DAG.getConstant(0, DL, NVT), ISD::SETLT);
+ if (N->getOpcode() == ISD::SMIN) {
+ Lo = DAG.getSelect(DL, NVT, HiNeg, LHSL, DAG.getConstant(-1, DL, NVT));
+ } else {
+ Lo = DAG.getSelect(DL, NVT, HiNeg, DAG.getConstant(0, DL, NVT), LHSL);
+ }
+ Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
+ return;
+ }
- // Lo part corresponding to the 'winning' Hi part
- SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);
+ const APInt *RHSVal = nullptr;
+ if (auto *RHSConst = dyn_cast<ConstantSDNode>(RHS))
+ RHSVal = &RHSConst->getAPIntValue();
- // Recursed Lo part if Hi parts are equal, this uses unsigned version
- SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL});
+ // The high half of MIN/MAX is always just the the MIN/MAX of the
+ // high halves of the operands. Expand this way if it appears profitable.
+ if (RHSVal && (N->getOpcode() == ISD::UMIN || N->getOpcode() == ISD::UMAX) &&
+ (RHSVal->countLeadingOnes() >= NumHalfBits ||
+ RHSVal->countLeadingZeros() >= NumHalfBits)) {
+ SDValue LHSL, LHSH, RHSL, RHSH;
+ GetExpandedInteger(LHS, LHSL, LHSH);
+ GetExpandedInteger(RHS, RHSL, RHSH);
+ EVT NVT = LHSL.getValueType();
+ EVT CCT = getSetCCResultType(NVT);
+
+ ISD::NodeType LoOpc;
+ ISD::CondCode CondC;
+ std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode());
+
+ Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH});
+ // We need to know whether to select Lo part that corresponds to 'winning'
+ // Hi part or if Hi parts are equal.
+ SDValue IsHiLeft = DAG.getSetCC(DL, CCT, LHSH, RHSH, CondC);
+ SDValue IsHiEq = DAG.getSetCC(DL, CCT, LHSH, RHSH, ISD::SETEQ);
- Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
+ // Lo part corresponding to the 'winning' Hi part
+ SDValue LoCmp = DAG.getSelect(DL, NVT, IsHiLeft, LHSL, RHSL);
+
+ // Recursed Lo part if Hi parts are equal, this uses unsigned version
+ SDValue LoMinMax = DAG.getNode(LoOpc, DL, NVT, {LHSL, RHSL});
+
+ Lo = DAG.getSelect(DL, NVT, IsHiEq, LoMinMax, LoCmp);
+ return;
+ }
+
+ // Expand to "a < b ? a : b" etc. Prefer ge/le if that simplifies
+ // the compare.
+ ISD::CondCode Pred;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("How did we get here?");
+ case ISD::SMAX:
+ if (RHSVal && RHSVal->countTrailingZeros() >= NumHalfBits)
+ Pred = ISD::SETGE;
+ else
+ Pred = ISD::SETGT;
+ break;
+ case ISD::SMIN:
+ if (RHSVal && RHSVal->countTrailingOnes() >= NumHalfBits)
+ Pred = ISD::SETLE;
+ else
+ Pred = ISD::SETLT;
+ break;
+ case ISD::UMAX:
+ if (RHSVal && RHSVal->countTrailingZeros() >= NumHalfBits)
+ Pred = ISD::SETUGE;
+ else
+ Pred = ISD::SETUGT;
+ break;
+ case ISD::UMIN:
+ if (RHSVal && RHSVal->countTrailingOnes() >= NumHalfBits)
+ Pred = ISD::SETULE;
+ else
+ Pred = ISD::SETULT;
+ break;
+ }
+ EVT VT = N->getValueType(0);
+ EVT CCT = getSetCCResultType(VT);
+ SDValue Cond = DAG.getSetCC(DL, CCT, LHS, RHS, Pred);
+ SDValue Result = DAG.getSelect(DL, VT, Cond, LHS, RHS);
+ SplitInteger(Result, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
@@ -2931,7 +3083,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
SDValue HiOps[3] = { LHSH, RHSH };
bool HasOpCarry = TLI.isOperationLegalOrCustom(
- N->getOpcode() == ISD::ADD ? ISD::ADDCARRY : ISD::SUBCARRY,
+ N->getOpcode() == ISD::ADD ? ISD::UADDO_CARRY : ISD::USUBO_CARRY,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (HasOpCarry) {
SDVTList VTList = DAG.getVTList(NVT, getSetCCResultType(NVT));
@@ -2940,13 +3092,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
HiOps[2] = Lo.getValue(1);
Hi = DAG.computeKnownBits(HiOps[2]).isZero()
? DAG.getNode(ISD::UADDO, dl, VTList, ArrayRef(HiOps, 2))
- : DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps);
+ : DAG.getNode(ISD::UADDO_CARRY, dl, VTList, HiOps);
} else {
Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps);
HiOps[2] = Lo.getValue(1);
Hi = DAG.computeKnownBits(HiOps[2]).isZero()
? DAG.getNode(ISD::USUBO, dl, VTList, ArrayRef(HiOps, 2))
- : DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps);
+ : DAG.getNode(ISD::USUBO_CARRY, dl, VTList, HiOps);
}
return;
}
@@ -3014,8 +3166,22 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
if (N->getOpcode() == ISD::ADD) {
Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps);
Hi = DAG.getNode(ISD::ADD, dl, NVT, ArrayRef(HiOps, 2));
- SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
- ISD::SETULT);
+ SDValue Cmp;
+ // Special case: X+1 has a carry out if X+1==0. This may reduce the live
+ // range of X. We assume comparing with 0 is cheap.
+ if (isOneConstant(LoOps[1]))
+ Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo,
+ DAG.getConstant(0, dl, NVT), ISD::SETEQ);
+ else if (isAllOnesConstant(LoOps[1])) {
+ if (isAllOnesConstant(HiOps[1]))
+ Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), LoOps[0],
+ DAG.getConstant(0, dl, NVT), ISD::SETEQ);
+ else
+ Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), LoOps[0],
+ DAG.getConstant(0, dl, NVT), ISD::SETNE);
+ } else
+ Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
SDValue Carry;
if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent)
@@ -3024,7 +3190,10 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
Carry = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT),
DAG.getConstant(0, dl, NVT));
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
+ if (isAllOnesConstant(LoOps[1]) && isAllOnesConstant(HiOps[1]))
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps[0], Carry);
+ else
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry);
} else {
Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps);
Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2));
@@ -3101,12 +3270,12 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
ISD::CondCode Cond;
switch(N->getOpcode()) {
case ISD::UADDO:
- CarryOp = ISD::ADDCARRY;
+ CarryOp = ISD::UADDO_CARRY;
NoCarryOp = ISD::ADD;
Cond = ISD::SETULT;
break;
case ISD::USUBO:
- CarryOp = ISD::SUBCARRY;
+ CarryOp = ISD::USUBO_CARRY;
NoCarryOp = ISD::SUB;
Cond = ISD::SETUGT;
break;
@@ -3137,9 +3306,22 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
SDValue Sum = DAG.getNode(NoCarryOp, dl, LHS.getValueType(), LHS, RHS);
SplitInteger(Sum, Lo, Hi);
- // Calculate the overflow: addition overflows iff a + b < a, and subtraction
- // overflows iff a - b > a.
- Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond);
+ if (N->getOpcode() == ISD::UADDO && isOneConstant(RHS)) {
+ // Special case: uaddo X, 1 overflowed if X+1 == 0. We can detect this
+ // with (Lo | Hi) == 0.
+ SDValue Or = DAG.getNode(ISD::OR, dl, Lo.getValueType(), Lo, Hi);
+ Ovf = DAG.getSetCC(dl, N->getValueType(1), Or,
+ DAG.getConstant(0, dl, Lo.getValueType()), ISD::SETEQ);
+ } else if (N->getOpcode() == ISD::UADDO && isAllOnesConstant(RHS)) {
+ // Special case: uaddo X, -1 overflows if X == 0.
+ Ovf =
+ DAG.getSetCC(dl, N->getValueType(1), LHS,
+ DAG.getConstant(0, dl, LHS.getValueType()), ISD::SETNE);
+ } else {
+ // Calculate the overflow: addition overflows iff a + b < a, and
+ // subtraction overflows iff a - b > a.
+ Ovf = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, Cond);
+ }
}
// Legalized the flag result - switch anything that used the old flag to
@@ -3147,8 +3329,8 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Ovf);
}
-void DAGTypeLegalizer::ExpandIntRes_ADDSUBCARRY(SDNode *N,
- SDValue &Lo, SDValue &Hi) {
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO_CARRY(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
// Expand the subcomponents.
SDValue LHSL, LHSH, RHSL, RHSH;
SDLoc dl(N);
@@ -3177,8 +3359,8 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO_CARRY(SDNode *N,
SDVTList VTList = DAG.getVTList(LHSL.getValueType(), N->getValueType(1));
// We need to use an unsigned carry op for the lo part.
- unsigned CarryOp = N->getOpcode() == ISD::SADDO_CARRY ? ISD::ADDCARRY
- : ISD::SUBCARRY;
+ unsigned CarryOp =
+ N->getOpcode() == ISD::SADDO_CARRY ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
Lo = DAG.getNode(CarryOp, dl, VTList, { LHSL, RHSL, N->getOperand(2) });
Hi = DAG.getNode(N->getOpcode(), dl, VTList, { LHSH, RHSH, Lo.getValue(1) });
@@ -3308,14 +3490,14 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
return;
}
- // If we have SUBCARRY, use the expanded form of the sra+xor+sub sequence we
- // use in LegalizeDAG. The SUB part of the expansion is based on
- // ExpandIntRes_ADDSUB which also uses SUBCARRY/USUBO after checking that
- // SUBCARRY is LegalOrCustom. Each of the pieces here can be further expanded
- // if needed. Shift expansion has a special case for filling with sign bits
- // so that we will only end up with one SRA.
+ // If we have USUBO_CARRY, use the expanded form of the sra+xor+sub sequence
+ // we use in LegalizeDAG. The SUB part of the expansion is based on
+ // ExpandIntRes_ADDSUB which also uses USUBO_CARRY/USUBO after checking that
+ // USUBO_CARRY is LegalOrCustom. Each of the pieces here can be further
+ // expanded if needed. Shift expansion has a special case for filling with
+ // sign bits so that we will only end up with one SRA.
bool HasSubCarry = TLI.isOperationLegalOrCustom(
- ISD::SUBCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
+ ISD::USUBO_CARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (HasSubCarry) {
SDValue Sign = DAG.getNode(
ISD::SRA, dl, NVT, Hi,
@@ -3324,7 +3506,7 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
Lo = DAG.getNode(ISD::XOR, dl, NVT, Lo, Sign);
Hi = DAG.getNode(ISD::XOR, dl, NVT, Hi, Sign);
Lo = DAG.getNode(ISD::USUBO, dl, VTList, Lo, Sign);
- Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
+ Hi = DAG.getNode(ISD::USUBO_CARRY, dl, VTList, Hi, Sign, Lo.getValue(1));
return;
}
@@ -4956,8 +5138,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
ConstantSDNode *LoCmpC = dyn_cast<ConstantSDNode>(LoCmp.getNode());
ConstantSDNode *HiCmpC = dyn_cast<ConstantSDNode>(HiCmp.getNode());
- bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
- CCCode == ISD::SETUGE || CCCode == ISD::SETULE);
+ bool EqAllowed = ISD::isTrueWhenEqual(CCCode);
// FIXME: Is the HiCmpC->isOne() here correct for
// ZeroOrNegativeOneBooleanContent.
@@ -5088,9 +5269,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) {
GetExpandedInteger(LHS, LHSLo, LHSHi);
GetExpandedInteger(RHS, RHSLo, RHSHi);
- // Expand to a SUBE for the low part and a smaller SETCCCARRY for the high.
+ // Expand to a USUBO_CARRY for the low part and a SETCCCARRY for the high.
SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), Carry.getValueType());
- SDValue LowCmp = DAG.getNode(ISD::SUBCARRY, dl, VTList, LHSLo, RHSLo, Carry);
+ SDValue LowCmp =
+ DAG.getNode(ISD::USUBO_CARRY, dl, VTList, LHSLo, RHSLo, Carry);
return DAG.getNode(ISD::SETCCCARRY, dl, N->getValueType(0), LHSHi, RHSHi,
LowCmp.getValue(1), Cond);
}
@@ -5293,6 +5475,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) {
return DAG.getNode(ISD::VECTOR_SPLICE, dl, OutVT, V0, V1, N->getOperand(2));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_INTERLEAVE_DEINTERLEAVE(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue V0 = GetPromotedInteger(N->getOperand(0));
+ SDValue V1 = GetPromotedInteger(N->getOperand(1));
+ EVT ResVT = V0.getValueType();
+ SDValue Res = DAG.getNode(N->getOpcode(), dl,
+ DAG.getVTList(ResVT, ResVT), V0, V1);
+ SetPromotedInteger(SDValue(N, 0), Res.getValue(0));
+ SetPromotedInteger(SDValue(N, 1), Res.getValue(1));
+ return SDValue();
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT OutVT = N->getValueType(0);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 5e0349593139..328939e44dcb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -245,8 +245,7 @@ bool DAGTypeLegalizer::run() {
// types are illegal.
for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
EVT ResultVT = N->getValueType(i);
- LLVM_DEBUG(dbgs() << "Analyzing result type: " << ResultVT.getEVTString()
- << "\n");
+ LLVM_DEBUG(dbgs() << "Analyzing result type: " << ResultVT << "\n");
switch (getTypeAction(ResultVT)) {
case TargetLowering::TypeLegal:
LLVM_DEBUG(dbgs() << "Legal result type\n");
@@ -716,7 +715,6 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
auto &OpIdEntry = PromotedIntegers[getTableId(Op)];
assert((OpIdEntry == 0) && "Node is already promoted!");
OpIdEntry = getTableId(Result);
- Result->setFlags(Op->getFlags());
DAG.transferDbgValues(Op, Result);
}
@@ -989,10 +987,7 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(Pair);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType());
- Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
- DAG.getIntPtrConstant(0, dl));
- Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair,
- DAG.getIntPtrConstant(1, dl));
+ std::tie(Lo, Hi) = DAG.SplitScalar(Pair, dl, NVT, NVT);
}
/// Build an integer with low bits Lo and high bits Hi.
@@ -1005,7 +1000,7 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
LVT.getSizeInBits() + HVT.getSizeInBits());
- EVT ShiftAmtVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout(), false);
+ EVT ShiftAmtVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo);
Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi);
Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index b97e44a01319..db8f61eee606 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -307,6 +307,7 @@ private:
SDValue PromoteIntRes_VECTOR_REVERSE(SDNode *N);
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_VECTOR_SPLICE(SDNode *N);
+ SDValue PromoteIntRes_VECTOR_INTERLEAVE_DEINTERLEAVE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntRes_ScalarOp(SDNode *N);
SDValue PromoteIntRes_STEP_VECTOR(SDNode *N);
@@ -331,6 +332,7 @@ private:
SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N);
SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N);
SDValue PromoteIntRes_Overflow(SDNode *N);
+ SDValue PromoteIntRes_FFREXP(SDNode *N);
SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_Select(SDNode *N);
SDValue PromoteIntRes_SELECT_CC(SDNode *N);
@@ -345,7 +347,7 @@ private:
SDValue PromoteIntRes_SRL(SDNode *N);
SDValue PromoteIntRes_TRUNCATE(SDNode *N);
SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo);
- SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo);
+ SDValue PromoteIntRes_UADDSUBO_CARRY(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_SADDSUBO_CARRY(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_UNDEF(SDNode *N);
SDValue PromoteIntRes_VAARG(SDNode *N);
@@ -383,6 +385,7 @@ private:
SDValue PromoteIntOp_Shift(SDNode *N);
SDValue PromoteIntOp_FunnelShift(SDNode *N);
SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_VP_SIGN_EXTEND(SDNode *N);
SDValue PromoteIntOp_SINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_STRICT_SINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -390,15 +393,16 @@ private:
SDValue PromoteIntOp_UINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_STRICT_UINT_TO_FP(SDNode *N);
SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N);
+ SDValue PromoteIntOp_VP_ZERO_EXTEND(SDNode *N);
SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FIX(SDNode *N);
- SDValue PromoteIntOp_FPOWI(SDNode *N);
+ SDValue PromoteIntOp_ExpOp(SDNode *N);
SDValue PromoteIntOp_VECREDUCE(SDNode *N);
SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SET_ROUNDING(SDNode *N);
@@ -447,7 +451,7 @@ private:
void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_ADDSUBCARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UADDSUBO_CARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_SADDSUBO_CARRY (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -558,9 +562,11 @@ private:
SDValue SoftenFloatRes_FNEG(SDNode *N);
SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
+ SDValue SoftenFloatRes_BF16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
SDValue SoftenFloatRes_FPOW(SDNode *N);
- SDValue SoftenFloatRes_FPOWI(SDNode *N);
+ SDValue SoftenFloatRes_ExpOp(SDNode *N);
+ SDValue SoftenFloatRes_FFREXP(SDNode *N);
SDValue SoftenFloatRes_FREEZE(SDNode *N);
SDValue SoftenFloatRes_FREM(SDNode *N);
SDValue SoftenFloatRes_FRINT(SDNode *N);
@@ -638,6 +644,7 @@ private:
void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandFloatRes_FLDEXP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -687,7 +694,8 @@ private:
SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N);
SDValue PromoteFloatRes_FMAD(SDNode *N);
- SDValue PromoteFloatRes_FPOWI(SDNode *N);
+ SDValue PromoteFloatRes_ExpOp(SDNode *N);
+ SDValue PromoteFloatRes_FFREXP(SDNode *N);
SDValue PromoteFloatRes_FP_ROUND(SDNode *N);
SDValue PromoteFloatRes_LOAD(SDNode *N);
SDValue PromoteFloatRes_SELECT(SDNode *N);
@@ -728,7 +736,7 @@ private:
SDValue SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SoftPromoteHalfRes_FCOPYSIGN(SDNode *N);
SDValue SoftPromoteHalfRes_FMAD(SDNode *N);
- SDValue SoftPromoteHalfRes_FPOWI(SDNode *N);
+ SDValue SoftPromoteHalfRes_ExpOp(SDNode *N);
SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N);
SDValue SoftPromoteHalfRes_LOAD(SDNode *N);
SDValue SoftPromoteHalfRes_SELECT(SDNode *N);
@@ -781,7 +789,7 @@ private:
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
- SDValue ScalarizeVecRes_FPOWI(SDNode *N);
+ SDValue ScalarizeVecRes_ExpOp(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
@@ -795,6 +803,7 @@ private:
SDValue ScalarizeVecRes_IS_FPCLASS(SDNode *N);
SDValue ScalarizeVecRes_FIX(SDNode *N);
+ SDValue ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo);
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -843,6 +852,7 @@ private:
void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -857,8 +867,7 @@ private:
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
@@ -875,6 +884,8 @@ private:
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
SDValue &Hi);
void SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N);
+ void SplitVecRes_VECTOR_INTERLEAVE(SDNode *N);
void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -901,7 +912,7 @@ private:
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
SDValue SplitVecOp_VSETCC(SDNode *N);
SDValue SplitVecOp_FP_ROUND(SDNode *N);
- SDValue SplitVecOp_FCOPYSIGN(SDNode *N);
+ SDValue SplitVecOp_FPOpDifferentTypes(SDNode *N);
SDValue SplitVecOp_FP_TO_XINT_SAT(SDNode *N);
//===--------------------------------------------------------------------===//
@@ -942,6 +953,7 @@ private:
// Widen Vector Result Promotion.
void WidenVectorResult(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo);
+ SDValue WidenVecRes_AssertZext(SDNode* N);
SDValue WidenVecRes_BITCAST(SDNode* N);
SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
@@ -976,7 +988,7 @@ private:
SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_IS_FPCLASS(SDNode *N);
- SDValue WidenVecRes_POWI(SDNode *N);
+ SDValue WidenVecRes_ExpOp(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
@@ -1001,11 +1013,12 @@ private:
SDValue WidenVecOp_Convert(SDNode *N);
SDValue WidenVecOp_FP_TO_XINT_SAT(SDNode *N);
- SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
+ SDValue WidenVecOp_UnrollVectorOp(SDNode *N);
SDValue WidenVecOp_IS_FPCLASS(SDNode *N);
SDValue WidenVecOp_VECREDUCE(SDNode *N);
SDValue WidenVecOp_VECREDUCE_SEQ(SDNode *N);
SDValue WidenVecOp_VP_REDUCE(SDNode *N);
+ SDValue WidenVecOp_ExpOp(SDNode *N);
/// Helper function to generate a set of operations to perform
/// a vector operation for a wider type.
@@ -1071,6 +1084,7 @@ private:
// Generic Result Splitting.
void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_Select (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 21b5255c8f72..296242c00401 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -571,6 +571,16 @@ void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) {
Hi = DAG.getUNDEF(HiVT);
}
+void DAGTypeLegalizer::SplitVecRes_AssertZext(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue L, H;
+ SDLoc dl(N);
+ GetSplitOp(N->getOperand(0), L, H);
+
+ Lo = DAG.getNode(ISD::AssertZext, dl, L.getValueType(), L, N->getOperand(1));
+ Hi = DAG.getNode(ISD::AssertZext, dl, H.getValueType(), H, N->getOperand(1));
+}
+
void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue L, H;
SDLoc dl(N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index e245b3cb4c6d..3862fd241897 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -38,7 +39,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -296,7 +296,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
ValVT = Node->getOperand(1).getValueType();
- Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
+ if (Op.getOpcode() == ISD::STRICT_FSETCC ||
+ Op.getOpcode() == ISD::STRICT_FSETCCS) {
+ MVT OpVT = Node->getOperand(1).getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(3))->get();
+ Action = TLI.getCondCodeAction(CCCode, OpVT);
+ if (Action == TargetLowering::Legal)
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
+ } else {
+ Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
+ }
// If we're asked to expand a strict vector floating-point operation,
// by default we're going to simply unroll it. That is usually the
// best approach, except in the case where the resulting strict (scalar)
@@ -368,6 +377,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FSQRT:
case ISD::FSIN:
case ISD::FCOS:
+ case ISD::FLDEXP:
case ISD::FPOWI:
case ISD::FPOW:
case ISD::FLOG:
@@ -402,6 +412,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::SMULO:
case ISD::UMULO:
case ISD::FCANONICALIZE:
+ case ISD::FFREXP:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@@ -441,6 +452,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
@@ -454,7 +467,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
Action = TLI.getCondCodeAction(CCCode, OpVT);
if (Action == TargetLowering::Legal)
- Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
break;
}
@@ -785,6 +798,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
break;
+ case ISD::ABDS:
+ case ISD::ABDU:
+ if (SDValue Expanded = TLI.expandABD(Node, DAG)) {
+ Results.push_back(Expanded);
+ return;
+ }
+ break;
case ISD::BITREVERSE:
ExpandBITREVERSE(Node, Results);
return;
@@ -943,6 +963,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::VECREDUCE_FMUL:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
Results.push_back(TLI.expandVecReduce(Node, DAG));
return;
case ISD::VECREDUCE_SEQ_FADD:
@@ -958,7 +980,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
return;
}
- Results.push_back(DAG.UnrollVectorOp(Node));
+ SDValue Unrolled = DAG.UnrollVectorOp(Node);
+ for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
+ Results.push_back(Unrolled.getValue(I));
}
SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
@@ -1304,11 +1328,11 @@ SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
return DAG.UnrollVectorOp(Node);
SDValue Ones = DAG.getAllOnesConstant(DL, VT);
- SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Mask, EVL);
+ SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Ones, EVL);
- Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Mask, EVL);
- Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Mask, EVL);
- return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Mask, EVL);
+ Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Ones, EVL);
+ Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Ones, EVL);
+ return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Ones, EVL);
}
SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
@@ -1516,39 +1540,54 @@ void VectorLegalizer::ExpandSETCC(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
bool NeedInvert = false;
bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
- SDLoc dl(Node);
- MVT OpVT = Node->getOperand(0).getSimpleValueType();
- ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
+ bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS;
+ bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
+ unsigned Offset = IsStrict ? 1 : 0;
+
+ SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
+ SDValue LHS = Node->getOperand(0 + Offset);
+ SDValue RHS = Node->getOperand(1 + Offset);
+ SDValue CC = Node->getOperand(2 + Offset);
+
+ MVT OpVT = LHS.getSimpleValueType();
+ ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
+ if (IsStrict) {
+ UnrollStrictFPOp(Node, Results);
+ return;
+ }
Results.push_back(UnrollVSETCC(Node));
return;
}
- SDValue Chain;
- SDValue LHS = Node->getOperand(0);
- SDValue RHS = Node->getOperand(1);
- SDValue CC = Node->getOperand(2);
SDValue Mask, EVL;
if (IsVP) {
- Mask = Node->getOperand(3);
- EVL = Node->getOperand(4);
+ Mask = Node->getOperand(3 + Offset);
+ EVL = Node->getOperand(4 + Offset);
}
+ SDLoc dl(Node);
bool Legalized =
TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask,
- EVL, NeedInvert, dl, Chain);
+ EVL, NeedInvert, dl, Chain, IsSignaling);
if (Legalized) {
// If we expanded the SETCC by swapping LHS and RHS, or by inverting the
// condition code, create a new SETCC node.
if (CC.getNode()) {
- if (!IsVP)
- LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
- Node->getFlags());
- else
+ if (IsStrict) {
+ LHS = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
+ {Chain, LHS, RHS, CC}, Node->getFlags());
+ Chain = LHS.getValue(1);
+ } else if (IsVP) {
LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0),
{LHS, RHS, CC, Mask, EVL}, Node->getFlags());
+ } else {
+ LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
+ Node->getFlags());
+ }
}
// If we expanded the SETCC by inverting the condition code, then wrap
@@ -1560,6 +1599,8 @@ void VectorLegalizer::ExpandSETCC(SDNode *Node,
LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0));
}
} else {
+ assert(!IsStrict && "Don't know how to expand for strict nodes.");
+
// Otherwise, SETCC for the given comparison type must be completely
// illegal; expand it into a SELECT_CC.
EVT VT = Node->getValueType(0);
@@ -1571,6 +1612,8 @@ void VectorLegalizer::ExpandSETCC(SDNode *Node,
}
Results.push_back(LHS);
+ if (IsStrict)
+ Results.push_back(Chain);
}
void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
@@ -1618,6 +1661,12 @@ void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
return;
}
+ if (Node->getOpcode() == ISD::STRICT_FSETCC ||
+ Node->getOpcode() == ISD::STRICT_FSETCCS) {
+ ExpandSETCC(Node, Results);
+ return;
+ }
+
UnrollStrictFPOp(Node, Results);
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index af5ea1ce5f45..8c117c1c74dc 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -57,7 +57,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
- case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
+ case ISD::FPOWI: R = ScalarizeVecRes_ExpOp(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
@@ -113,7 +113,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCANONICALIZE:
R = ScalarizeVecRes_UnaryOp(N);
break;
-
+ case ISD::FFREXP:
+ R = ScalarizeVecRes_FFREXP(N, ResNo);
+ break;
case ISD::ADD:
case ISD::AND:
case ISD::FADD:
@@ -126,6 +128,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FMAXNUM_IEEE:
case ISD::FMINIMUM:
case ISD::FMAXIMUM:
+ case ISD::FLDEXP:
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -142,6 +145,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FREM:
case ISD::FSUB:
case ISD::MUL:
+ case ISD::MULHS:
+ case ISD::MULHU:
case ISD::OR:
case ISD::SDIV:
case ISD::SREM:
@@ -221,6 +226,34 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
Op2, N->getFlags());
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexpected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(0));
+
+ EVT VT0 = N->getValueType(0);
+ EVT VT1 = N->getValueType(1);
+ SDLoc dl(N);
+
+ SDNode *ScalarNode =
+ DAG.getNode(N->getOpcode(), dl,
+ {VT0.getScalarType(), VT1.getScalarType()}, Elt)
+ .getNode();
+
+ // Replace the other vector result not being explicitly scalarized here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) {
+ SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo));
+ } else {
+ SDValue OtherVal = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, OtherVT,
+ SDValue(ScalarNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+
+ return SDValue(ScalarNode, ResNo);
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
EVT VT = N->getValueType(0).getVectorElementType();
unsigned NumOpers = N->getNumOperands();
@@ -348,10 +381,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
N->getOperand(1));
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_ExpOp(SDNode *N) {
SDValue Op = GetScalarizedVector(N->getOperand(0));
- return DAG.getNode(ISD::FPOWI, SDLoc(N),
- Op.getValueType(), Op, N->getOperand(1));
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op,
+ N->getOperand(1));
}
SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
@@ -695,6 +728,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
Res = ScalarizeVecOp_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -948,6 +983,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
"operator!\n");
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
+ case ISD::AssertZext: SplitVecRes_AssertZext(N, Lo, Hi); break;
case ISD::VSELECT:
case ISD::SELECT:
case ISD::VP_MERGE:
@@ -959,8 +995,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
- case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
- case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
+ case ISD::FPOWI:
+ case ISD::FLDEXP:
+ case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break;
case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
case ISD::SPLAT_VECTOR:
@@ -1000,6 +1037,12 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VECTOR_SPLICE:
SplitVecRes_VECTOR_SPLICE(N, Lo, Hi);
break;
+ case ISD::VECTOR_DEINTERLEAVE:
+ SplitVecRes_VECTOR_DEINTERLEAVE(N);
+ return;
+ case ISD::VECTOR_INTERLEAVE:
+ SplitVecRes_VECTOR_INTERLEAVE(N);
+ return;
case ISD::VAARG:
SplitVecRes_VAARG(N, Lo, Hi);
break;
@@ -1069,6 +1112,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FCANONICALIZE:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
+ case ISD::FFREXP:
+ SplitVecRes_FFREXP(N, ResNo, Lo, Hi);
+ break;
case ISD::ANY_EXTEND:
case ISD::SIGN_EXTEND:
@@ -1456,16 +1502,11 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MPI, SmallestAlign);
}
-void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
- SDLoc dl(N);
- GetSplitVector(N->getOperand(0), Lo, Hi);
- Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1));
- Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1));
-}
-
-void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+// Handle splitting an FP where the second operand does not match the first
+// type. The second operand may be a scalar, or a vector that has exactly as
+// many elements as the first
+void DAGTypeLegalizer::SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDLoc DL(N);
@@ -1473,14 +1514,18 @@ void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo,
SDValue RHSLo, RHSHi;
SDValue RHS = N->getOperand(1);
EVT RHSVT = RHS.getValueType();
- if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
- GetSplitVector(RHS, RHSLo, RHSHi);
- else
- std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
-
+ if (RHSVT.isVector()) {
+ if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(RHS, RHSLo, RHSHi);
+ else
+ std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
- Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo);
- Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi);
+ Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHSLo);
+ Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHSHi);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHS);
+ Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHS);
+ }
}
void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo,
@@ -2284,6 +2329,42 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(Opcode, dl, HiVT, {Hi, MaskHi, EVLHi}, Flags);
}
+void DAGTypeLegalizer::SplitVecRes_FFREXP(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0));
+ auto [LoVT1, HiVT1] = DAG.GetSplitDestVTs(N->getValueType(1));
+
+ // If the input also splits, handle it directly for a compile time speedup.
+ // Otherwise split it by hand.
+ EVT InVT = N->getOperand(0).getValueType();
+ if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ else
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+
+ Lo = DAG.getNode(N->getOpcode(), dl, {LoVT, LoVT1}, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, {HiVT, HiVT1}, Hi);
+ Lo->setFlags(N->getFlags());
+ Hi->setFlags(N->getFlags());
+
+ SDNode *HiNode = Hi.getNode();
+ SDNode *LoNode = Lo.getNode();
+
+ // Replace the other vector result not being explicitly split here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) {
+ SetSplitVector(SDValue(N, OtherNo), SDValue(LoNode, OtherNo),
+ SDValue(HiNode, OtherNo));
+ } else {
+ SDValue OtherVal =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, OtherVT, SDValue(LoNode, OtherNo),
+ SDValue(HiNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+}
+
void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDLoc dl(N);
@@ -2377,7 +2458,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
EVT EltVT = NewVT.getVectorElementType();
SmallVector<SDValue> Ops(NewElts, DAG.getUNDEF(EltVT));
for (unsigned I = 0; I < NewElts; ++I) {
- if (Mask[I] == UndefMaskElem)
+ if (Mask[I] == PoisonMaskElem)
continue;
unsigned Idx = Mask[I];
if (Idx >= NewElts)
@@ -2417,11 +2498,11 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// Use shuffles operands instead of shuffles themselves.
// 1. Adjust mask.
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
if (Inputs[SrcRegIdx].isUndef()) {
- Idx = UndefMaskElem;
+ Idx = PoisonMaskElem;
continue;
}
auto *Shuffle =
@@ -2429,8 +2510,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
if (!Shuffle || !is_contained(P.second, SrcRegIdx))
continue;
int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
- if (MaskElt == UndefMaskElem) {
- Idx = UndefMaskElem;
+ if (MaskElt == PoisonMaskElem) {
+ Idx = PoisonMaskElem;
continue;
}
Idx = MaskElt % NewElts +
@@ -2449,11 +2530,11 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// Check if any concat_vectors can be simplified.
SmallBitVector UsedSubVector(2 * std::size(Inputs));
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
if (Inputs[SrcRegIdx].isUndef()) {
- Idx = UndefMaskElem;
+ Idx = PoisonMaskElem;
continue;
}
TargetLowering::LegalizeTypeAction TypeAction =
@@ -2483,7 +2564,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
if (!Pairs.empty() && Pairs.front().size() > 1) {
// Adjust mask.
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
auto *It = find_if(
@@ -2525,14 +2606,14 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
!Shuffle->getOperand(1).isUndef()) {
// Find the only used operand, if possible.
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
if (SrcRegIdx != I)
continue;
int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
- if (MaskElt == UndefMaskElem) {
- Idx = UndefMaskElem;
+ if (MaskElt == PoisonMaskElem) {
+ Idx = PoisonMaskElem;
continue;
}
int OpIdx = MaskElt / NewElts;
@@ -2558,14 +2639,14 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// Found that operand is used already.
// 1. Fix the mask for the reused operand.
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
if (SrcRegIdx != I)
continue;
int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
- if (MaskElt == UndefMaskElem) {
- Idx = UndefMaskElem;
+ if (MaskElt == PoisonMaskElem) {
+ Idx = PoisonMaskElem;
continue;
}
int MaskIdx = MaskElt / NewElts;
@@ -2582,7 +2663,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
Inputs[I] = Shuffle->getOperand(Op);
// Adjust mask.
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
if (SrcRegIdx != I)
@@ -2616,11 +2697,11 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
auto &&UniqueConstantVec = UniqueConstantInputs.takeVector();
unsigned ConstNum = UniqueConstantVec.size();
for (int &Idx : Mask) {
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
unsigned SrcRegIdx = Idx / NewElts;
if (Inputs[SrcRegIdx].isUndef()) {
- Idx = UndefMaskElem;
+ Idx = PoisonMaskElem;
continue;
}
const auto It = find(UniqueConstantVec, Inputs[SrcRegIdx]);
@@ -2649,7 +2730,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
// Build a shuffle mask for the output, discovering on the fly which
// input vectors to use as shuffle operands.
unsigned FirstMaskIdx = High * NewElts;
- SmallVector<int> Mask(NewElts * std::size(Inputs), UndefMaskElem);
+ SmallVector<int> Mask(NewElts * std::size(Inputs), PoisonMaskElem);
copy(ArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin());
assert(!Output && "Expected default initialized initial value.");
TryPeekThroughShufflesInputs(Mask);
@@ -2768,6 +2849,37 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
}
+void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) {
+
+ SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ EVT VT = Op0Lo.getValueType();
+ SDLoc DL(N);
+ SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
+ DAG.getVTList(VT, VT), Op0Lo, Op0Hi);
+ SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
+ DAG.getVTList(VT, VT), Op1Lo, Op1Hi);
+
+ SetSplitVector(SDValue(N, 0), ResLo.getValue(0), ResHi.getValue(0));
+ SetSplitVector(SDValue(N, 1), ResLo.getValue(1), ResHi.getValue(1));
+}
+
+void DAGTypeLegalizer::SplitVecRes_VECTOR_INTERLEAVE(SDNode *N) {
+ SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi;
+ GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
+ GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
+ EVT VT = Op0Lo.getValueType();
+ SDLoc DL(N);
+ SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
+ DAG.getVTList(VT, VT), Op0Lo, Op1Lo),
+ DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
+ DAG.getVTList(VT, VT), Op0Hi, Op1Hi)};
+
+ SetSplitVector(SDValue(N, 0), Res[0].getValue(0), Res[0].getValue(1));
+ SetSplitVector(SDValue(N, 1), Res[1].getValue(0), Res[1].getValue(1));
+}
+
//===----------------------------------------------------------------------===//
// Operand Vector Splitting
//===----------------------------------------------------------------------===//
@@ -2808,7 +2920,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FP_ROUND:
case ISD::VP_FP_ROUND:
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
- case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
+ case ISD::FCOPYSIGN: Res = SplitVecOp_FPOpDifferentTypes(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -2862,6 +2974,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FTRUNC:
Res = SplitVecOp_UnaryOp(N);
break;
+ case ISD::FLDEXP:
+ Res = SplitVecOp_FPOpDifferentTypes(N);
+ break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
@@ -2882,6 +2997,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
Res = SplitVecOp_VECREDUCE(N, OpNo);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -3807,10 +3924,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
}
-SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
- // The result (and the first input) has a legal vector type, but the second
- // input needs splitting.
-
+// Split a vector type in an FP binary operation where the second operand has a
+// different type from the first.
+//
+// The result (and the first input) has a legal vector type, but the second
+// input needs splitting.
+SDValue DAGTypeLegalizer::SplitVecOp_FPOpDifferentTypes(SDNode *N) {
SDLoc DL(N);
EVT LHSLoVT, LHSHiVT;
@@ -3826,8 +3945,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) {
SDValue RHSLo, RHSHi;
std::tie(RHSLo, RHSHi) = DAG.SplitVector(N->getOperand(1), DL);
- SDValue Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLoVT, LHSLo, RHSLo);
- SDValue Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHiVT, LHSHi, RHSHi);
+ SDValue Lo = DAG.getNode(N->getOpcode(), DL, LHSLoVT, LHSLo, RHSLo);
+ SDValue Hi = DAG.getNode(N->getOpcode(), DL, LHSHiVT, LHSHi, RHSHi);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi);
}
@@ -3885,9 +4004,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to widen the result of this operator!");
+ report_fatal_error("Do not know how to widen the result of this operator!");
case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
+ case ISD::AssertZext: Res = WidenVecRes_AssertZext(N); break;
case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
@@ -4036,8 +4156,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_IS_FPCLASS(N);
break;
+ case ISD::FLDEXP:
case ISD::FPOWI:
- Res = WidenVecRes_POWI(N);
+ if (!unrollExpandedOp())
+ Res = WidenVecRes_ExpOp(N);
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
@@ -4394,10 +4516,18 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
for (unsigned i = 1; i < NumOpers; ++i) {
SDValue Oper = N->getOperand(i);
- if (Oper.getValueType().isVector()) {
- assert(Oper.getValueType() == N->getValueType(0) &&
- "Invalid operand type to widen!");
- Oper = GetWidenedVector(Oper);
+ EVT OpVT = Oper.getValueType();
+ if (OpVT.isVector()) {
+ if (getTypeAction(OpVT) == TargetLowering::TypeWidenVector)
+ Oper = GetWidenedVector(Oper);
+ else {
+ EVT WideOpVT =
+ EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(),
+ WidenVT.getVectorElementCount());
+ Oper = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
+ DAG.getUNDEF(WideOpVT), Oper,
+ DAG.getVectorIdxConstant(0, dl));
+ }
}
InOps.push_back(Oper);
@@ -4415,9 +4545,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
for (unsigned i = 0; i < NumOpers; ++i) {
SDValue Op = InOps[i];
- if (Op.getValueType().isVector())
- Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op,
+ EVT OpVT = Op.getValueType();
+ if (OpVT.isVector()) {
+ EVT OpExtractVT =
+ EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(),
+ VT.getVectorElementCount());
+ Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpExtractVT, Op,
DAG.getVectorIdxConstant(Idx, dl));
+ }
EOps.push_back(Op);
}
@@ -4441,8 +4576,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
for (unsigned i = 0; i < NumOpers; ++i) {
SDValue Op = InOps[i];
- if (Op.getValueType().isVector())
- Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op,
+ EVT OpVT = Op.getValueType();
+ if (OpVT.isVector())
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+ OpVT.getVectorElementType(), Op,
DAG.getVectorIdxConstant(Idx, dl));
EOps.push_back(Op);
@@ -4751,11 +4888,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) {
N->getFlags());
}
-SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
- SDValue ShOp = N->getOperand(1);
- return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp);
+ SDValue RHS = N->getOperand(1);
+ SDValue ExpOp = RHS.getValueType().isVector() ? GetWidenedVector(RHS) : RHS;
+
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ExpOp);
}
SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
@@ -4763,7 +4902,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
if (N->getNumOperands() == 1)
- return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, N->getFlags());
assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
assert(N->isVPOpcode() && "Expected VP opcode");
@@ -4863,7 +5002,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
WidenSize / InEltVT.getSizeInBits());
} else {
- NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumParts);
+ // For big endian systems, using the promoted input scalar type
+ // to produce the scalar_to_vector would put the desired bits into
+ // the least significant byte(s) of the wider element zero. This
+ // will mean that the users of the result vector are using incorrect
+ // bits. Use the original input type instead. Although either input
+ // type can be used on little endian systems, for consistency we
+ // use the original type there as well.
+ EVT OrigInVT = N->getOperand(0).getValueType();
+ NewNumParts = WidenSize / OrigInVT.getSizeInBits();
+ NewInVT = EVT::getVectorVT(*DAG.getContext(), OrigInVT, NewNumParts);
}
if (TLI.isTypeLegal(NewInVT)) {
@@ -5080,6 +5228,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getBuildVector(WidenVT, dl, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_AssertZext(SDNode *N) {
+ SDValue InOp = ModifyToType(
+ N->getOperand(0),
+ TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), true);
+ return DAG.getNode(ISD::AssertZext, SDLoc(N), InOp.getValueType(), InOp,
+ N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
SDValue InOp = GetWidenedVector(N->getOperand(0));
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N),
@@ -5105,30 +5261,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
return SDValue();
}
- SDValue Result;
- SmallVector<SDValue, 16> LdChain; // Chain for the series of load
- if (ExtType != ISD::NON_EXTLOAD)
- Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
- else
- Result = GenWidenVectorLoads(LdChain, LD);
-
- if (Result) {
- // If we generate a single load, we can use that for the chain. Otherwise,
- // build a factor node to remember the multiple loads are independent and
- // chain to that.
- SDValue NewChain;
- if (LdChain.size() == 1)
- NewChain = LdChain[0];
- else
- NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
-
- // Modified the chain - switch anything that used the old chain to use
- // the new one.
- ReplaceValueWith(SDValue(N, 1), NewChain);
-
- return Result;
- }
-
// Generate a vector-predicated load if it is custom/legal on the target. To
// avoid possible recursion, only do this if the widened mask type is legal.
// FIXME: Not all targets may support EVL in VP_LOAD. These will have been
@@ -5138,15 +5270,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT);
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WideVT.getVectorElementCount());
- if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() &&
+ if (ExtType == ISD::NON_EXTLOAD &&
TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&
TLI.isTypeLegal(WideMaskVT)) {
SDLoc DL(N);
SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
- MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
- unsigned NumVTElts = LdVT.getVectorMinNumElements();
- SDValue EVL =
- DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
+ LdVT.getVectorElementCount());
const auto *MMO = LD->getMemOperand();
SDValue NewLoad =
DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL,
@@ -5160,6 +5290,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
return NewLoad;
}
+ SDValue Result;
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ if (ExtType != ISD::NON_EXTLOAD)
+ Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+ else
+ Result = GenWidenVectorLoads(LdChain, LD);
+
+ if (Result) {
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+ }
+
report_fatal_error("Unable to widen vector load");
}
@@ -5780,7 +5934,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to widen this operator's operand!");
+ report_fatal_error("Do not know how to widen this operator's operand!");
case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
@@ -5800,7 +5954,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STRICT_FSETCC:
case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
- case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
+ case ISD::FLDEXP:
+ case ISD::FCOPYSIGN: Res = WidenVecOp_UnrollVectorOp(N); break;
case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break;
case ISD::ANY_EXTEND:
@@ -5843,6 +5998,8 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::VECREDUCE_UMIN:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
Res = WidenVecOp_VECREDUCE(N);
break;
case ISD::VECREDUCE_SEQ_FADD:
@@ -5947,7 +6104,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
}
}
-SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecOp_UnrollVectorOp(SDNode *N) {
// The result (and first input) is legal, but the second input is illegal.
// We can't do much to fix that, so just unroll and let the extracts off of
// the second input be widened as needed later.
@@ -6192,14 +6349,6 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
if (ST->isTruncatingStore())
return TLI.scalarizeVectorStore(ST, DAG);
- SmallVector<SDValue, 16> StChain;
- if (GenWidenVectorStores(StChain, ST)) {
- if (StChain.size() == 1)
- return StChain[0];
-
- return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
- }
-
// Generate a vector-predicated store if it is custom/legal on the target.
// To avoid possible recursion, only do this if the widened mask type is
// legal.
@@ -6211,23 +6360,29 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WideVT.getVectorElementCount());
- if (WideVT.isScalableVector() &&
- TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
+
+ if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
TLI.isTypeLegal(WideMaskVT)) {
// Widen the value.
SDLoc DL(N);
StVal = GetWidenedVector(StVal);
SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
- MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
- unsigned NumVTElts = StVT.getVectorMinNumElements();
- SDValue EVL =
- DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
+ StVT.getVectorElementCount());
return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(),
DAG.getUNDEF(ST->getBasePtr().getValueType()), Mask,
- EVL, StVal.getValueType(), ST->getMemOperand(),
+ EVL, StVT, ST->getMemOperand(),
ST->getAddressingMode());
}
+ SmallVector<SDValue, 16> StChain;
+ if (GenWidenVectorStores(StChain, ST)) {
+ if (StChain.size() == 1)
+ return StChain[0];
+
+ return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
+ }
+
report_fatal_error("Unable to widen vector store");
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 9fcf692babdc..c31b971e7fc3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -230,7 +230,7 @@ public:
bool isEmitted() const { return Emitted; }
/// clearIsEmitted - Reset Emitted flag, for certain special cases where
- /// dbg.addr is emitted twice.
+ /// SDDbgValue is emitted twice. DBG_INSTR_REF depends on this behaviour.
void clearIsEmitted() { Emitted = false; }
LLVM_DUMP_METHOD void dump() const;
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 2d93adea6b9b..5b01743d23e0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -69,7 +69,7 @@ private:
/// LiveRegDefs - A set of physical registers and their definition
/// that are "live". These nodes must be scheduled before any other nodes that
/// modifies the registers can be scheduled.
- unsigned NumLiveRegs;
+ unsigned NumLiveRegs = 0u;
std::vector<SUnit*> LiveRegDefs;
std::vector<unsigned> LiveRegCycles;
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index c252046ef10b..458f50c54824 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -45,7 +46,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -156,16 +156,16 @@ private:
unsigned CurCycle = 0;
/// MinAvailableCycle - Cycle of the soonest available instruction.
- unsigned MinAvailableCycle;
+ unsigned MinAvailableCycle = ~0u;
/// IssueCount - Count instructions issued in this cycle
/// Currently valid only for bottom-up scheduling.
- unsigned IssueCount;
+ unsigned IssueCount = 0u;
/// LiveRegDefs - A set of physical registers and their definition
/// that are "live". These nodes must be scheduled before any other nodes that
/// modifies the registers can be scheduled.
- unsigned NumLiveRegs;
+ unsigned NumLiveRegs = 0u;
std::unique_ptr<SUnit*[]> LiveRegDefs;
std::unique_ptr<SUnit*[]> LiveRegGens;
@@ -1744,12 +1744,12 @@ protected:
bool SrcOrder;
// SUnits - The SUnits for the current graph.
- std::vector<SUnit> *SUnits;
+ std::vector<SUnit> *SUnits = nullptr;
MachineFunction &MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const TargetLowering *TLI;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetLowering *TLI = nullptr;
ScheduleDAGRRList *scheduleDAG = nullptr;
// SethiUllmanNumbers - The SethiUllman number for each node.
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 2e1fd1e8a758..0579c1664d5c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -667,7 +667,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
// This copy is a liveout value. It is likely coalesced, so reduce the
// latency so not to penalize the def.
// FIXME: need target specific adjustment here?
- Latency = (Latency > 1) ? Latency - 1 : 1;
+ Latency = Latency - 1;
}
if (Latency >= 0)
dep.setLatency(Latency);
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 99bbaeb19182..439ccfdc3275 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -16,10 +16,10 @@
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/MachineValueType.h"
#include <cassert>
#include <string>
#include <vector>
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 9a3609bc183b..5c1b19eba1c1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -17,11 +17,11 @@
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -61,12 +62,12 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
@@ -200,10 +201,10 @@ bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) {
SDValue NotZero = N->getOperand(i);
unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
- if (CN->getAPIntValue().countTrailingOnes() < EltSize)
+ if (CN->getAPIntValue().countr_one() < EltSize)
return false;
} else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) {
- if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize)
+ if (CFPN->getValueAPF().bitcastToAPInt().countr_one() < EltSize)
return false;
} else
return false;
@@ -244,10 +245,10 @@ bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) {
// constants are.
unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) {
- if (CN->getAPIntValue().countTrailingZeros() < EltSize)
+ if (CN->getAPIntValue().countr_zero() < EltSize)
return false;
} else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) {
- if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize)
+ if (CFPN->getValueAPF().bitcastToAPInt().countr_zero() < EltSize)
return false;
} else
return false;
@@ -454,6 +455,10 @@ ISD::NodeType ISD::getVecReduceBaseOpcode(unsigned VecReduceOpcode) {
case ISD::VECREDUCE_FMIN:
case ISD::VP_REDUCE_FMIN:
return ISD::FMINNUM;
+ case ISD::VECREDUCE_FMAXIMUM:
+ return ISD::FMAXIMUM;
+ case ISD::VECREDUCE_FMINIMUM:
+ return ISD::FMINIMUM;
}
}
@@ -516,6 +521,31 @@ std::optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) {
}
}
+std::optional<unsigned> ISD::getBaseOpcodeForVP(unsigned VPOpcode,
+ bool hasFPExcept) {
+ // FIXME: Return strict opcodes in case of fp exceptions.
+ switch (VPOpcode) {
+ default:
+ return std::nullopt;
+#define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) case ISD::VPOPC:
+#define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) return ISD::SDOPC;
+#define END_REGISTER_VP_SDNODE(VPOPC) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ return std::nullopt;
+}
+
+unsigned ISD::getVPForBaseOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ default:
+ llvm_unreachable("can not translate this Opcode to VP.");
+#define BEGIN_REGISTER_VP_SDNODE(VPOPC, ...) break;
+#define VP_PROPERTY_FUNCTIONAL_SDOPC(SDOPC) case ISD::SDOPC:
+#define END_REGISTER_VP_SDNODE(VPOPC) return ISD::VPOPC;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+}
+
ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) {
switch (ExtType) {
case ISD::EXTLOAD:
@@ -866,12 +896,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
ID.AddInteger(AT->getMemOperand()->getFlags());
break;
}
- case ISD::PREFETCH: {
- const MemSDNode *PF = cast<MemSDNode>(N);
- ID.AddInteger(PF->getPointerInfo().getAddrSpace());
- ID.AddInteger(PF->getMemOperand()->getFlags());
- break;
- }
case ISD::VECTOR_SHUFFLE: {
const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements();
@@ -890,14 +914,20 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
case ISD::AssertAlign:
ID.AddInteger(cast<AssertAlignSDNode>(N)->getAlign().value());
break;
+ case ISD::PREFETCH:
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
+ // Handled by MemIntrinsicSDNode check after the switch.
+ break;
} // end switch (N->getOpcode())
- // Target specific memory nodes could also have address spaces and flags
+ // MemIntrinsic nodes could also have subclass data, address spaces, and flags
// to check.
- if (N->isTargetMemoryOpcode()) {
- const MemSDNode *MN = cast<MemSDNode>(N);
+ if (auto *MN = dyn_cast<MemIntrinsicSDNode>(N)) {
+ ID.AddInteger(MN->getRawSubclassData());
ID.AddInteger(MN->getPointerInfo().getAddrSpace());
ID.AddInteger(MN->getMemOperand()->getFlags());
+ ID.AddInteger(MN->getMemoryVT().getRawBits());
}
}
@@ -1285,8 +1315,8 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
void SelectionDAG::init(MachineFunction &NewMF,
OptimizationRemarkEmitter &NewORE, Pass *PassPtr,
const TargetLibraryInfo *LibraryInfo,
- LegacyDivergenceAnalysis *Divergence,
- ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin,
+ UniformityInfo *NewUA, ProfileSummaryInfo *PSIin,
+ BlockFrequencyInfo *BFIin,
FunctionVarLocs const *VarLocs) {
MF = &NewMF;
SDAGISelPass = PassPtr;
@@ -1295,7 +1325,7 @@ void SelectionDAG::init(MachineFunction &NewMF,
TSI = getSubtarget().getSelectionDAGInfo();
LibInfo = LibraryInfo;
Context = &MF->getFunction().getContext();
- DA = Divergence;
+ UA = NewUA;
PSI = PSIin;
BFI = BFIin;
FnVarLocs = VarLocs;
@@ -1910,6 +1940,34 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
return SDValue(CondCodeNodes[Cond], 0);
}
+SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm,
+ bool ConstantFold) {
+ assert(MulImm.getBitWidth() == VT.getSizeInBits() &&
+ "APInt size does not match type size!");
+
+ if (ConstantFold) {
+ const MachineFunction &MF = getMachineFunction();
+ auto Attr = MF.getFunction().getFnAttribute(Attribute::VScaleRange);
+ if (Attr.isValid()) {
+ unsigned VScaleMin = Attr.getVScaleRangeMin();
+ if (std::optional<unsigned> VScaleMax = Attr.getVScaleRangeMax())
+ if (*VScaleMax == VScaleMin)
+ return getConstant(MulImm * VScaleMin, DL, VT);
+ }
+ }
+
+ return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT));
+}
+
+SDValue SelectionDAG::getElementCount(const SDLoc &DL, EVT VT, ElementCount EC,
+ bool ConstantFold) {
+ if (EC.isScalable())
+ return getVScale(DL, VT,
+ APInt(VT.getSizeInBits(), EC.getKnownMinValue()));
+
+ return getConstant(EC.getKnownMinValue(), DL, VT);
+}
+
SDValue SelectionDAG::getStepVector(const SDLoc &DL, EVT ResVT) {
APInt One(ResVT.getScalarSizeInBits(), 1);
return getStepVector(DL, ResVT, One);
@@ -2128,7 +2186,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
return SDValue(E, 0);
auto *N = newSDNode<RegisterSDNode>(RegNo, VT);
- N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA);
+ N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, UA);
CSEMap.InsertNode(N, IP);
InsertNode(N);
return SDValue(N, 0);
@@ -2381,6 +2439,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
ISD::CondCode Cond, const SDLoc &dl) {
EVT OpVT = N1.getValueType();
+ auto GetUndefBooleanConstant = [&]() {
+ if (VT.getScalarType() == MVT::i1 ||
+ TLI->getBooleanContents(OpVT) ==
+ TargetLowering::UndefinedBooleanContent)
+ return getUNDEF(VT);
+ // ZeroOrOne / ZeroOrNegative require specific values for the high bits,
+ // so we cannot use getUNDEF(). Return zero instead.
+ return getConstant(0, dl, VT);
+ };
+
// These setcc operations always fold.
switch (Cond) {
default: break;
@@ -2410,12 +2478,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
// icmp eq/ne X, undef -> undef.
if ((N1.isUndef() || N2.isUndef()) &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE))
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
// If both operands are undef, we can return undef for int comparison.
// icmp undef, undef -> undef.
if (N1.isUndef() && N2.isUndef())
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
// icmp X, X -> true/false
// icmp X, undef -> true/false because undef could be X.
@@ -2441,34 +2509,34 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
switch (Cond) {
default: break;
case ISD::SETEQ: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
[[fallthrough]];
case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
OpVT);
case ISD::SETNE: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
[[fallthrough]];
case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
R==APFloat::cmpLessThan, dl, VT,
OpVT);
case ISD::SETLT: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
[[fallthrough]];
case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
OpVT);
case ISD::SETGT: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
[[fallthrough]];
case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
VT, OpVT);
case ISD::SETLE: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
[[fallthrough]];
case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
R==APFloat::cmpEqual, dl, VT,
OpVT);
case ISD::SETGE: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
[[fallthrough]];
case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
R==APFloat::cmpEqual, dl, VT, OpVT);
@@ -2513,7 +2581,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
case 1: // Known true.
return getBoolConstant(true, dl, VT, OpVT);
case 2: // Undefined.
- return getUNDEF(VT);
+ return GetUndefBooleanConstant();
}
}
@@ -2567,7 +2635,7 @@ APInt SelectionDAG::computeVectorKnownZeroElements(SDValue Op,
unsigned NumElts = VT.getVectorNumElements();
assert(DemandedElts.getBitWidth() == NumElts && "Unexpected demanded mask.");
- APInt KnownZeroElements = APInt::getNullValue(NumElts);
+ APInt KnownZeroElements = APInt::getZero(NumElts);
for (unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) {
if (!DemandedElts[EltIdx])
continue; // Don't query elements that are not demanded.
@@ -2661,8 +2729,8 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
}
case ISD::VECTOR_SHUFFLE: {
// Check if this is a shuffle node doing a splat or a shuffle of a splat.
- APInt DemandedLHS = APInt::getNullValue(NumElts);
- APInt DemandedRHS = APInt::getNullValue(NumElts);
+ APInt DemandedLHS = APInt::getZero(NumElts);
+ APInt DemandedRHS = APInt::getZero(NumElts);
ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(V)->getMask();
for (int i = 0; i != (int)NumElts; ++i) {
int M = Mask[i];
@@ -2689,7 +2757,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
// TODO: Handle source ops splats with undefs.
auto CheckSplatSrc = [&](SDValue Src, const APInt &SrcElts) {
APInt SrcUndefs;
- return (SrcElts.countPopulation() == 1) ||
+ return (SrcElts.popcount() == 1) ||
(isSplatValue(Src, SrcElts, SrcUndefs, Depth + 1) &&
(SrcElts & SrcUndefs).isZero());
};
@@ -2808,7 +2876,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
SplatIdx = 0;
return getUNDEF(VT);
}
- SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
+ SplatIdx = (UndefElts & DemandedElts).countr_one();
}
return V;
}
@@ -3005,7 +3073,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
// Known bits are the values that are shared by every demanded element.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -3028,7 +3096,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (!!DemandedLHS) {
SDValue LHS = Op.getOperand(0);
Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -3036,10 +3104,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (!!DemandedRHS) {
SDValue RHS = Op.getOperand(1);
Known2 = computeKnownBits(RHS, DemandedRHS, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
break;
}
+ case ISD::VSCALE: {
+ const Function &F = getMachineFunction().getFunction();
+ const APInt &Multiplier = Op.getConstantOperandAPInt(0);
+ Known = getVScaleRange(&F, BitWidth).multiply(Multiplier).toKnownBits();
+ break;
+ }
case ISD::CONCAT_VECTORS: {
if (Op.getValueType().isScalableVector())
break;
@@ -3054,7 +3128,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (!!DemandedSub) {
SDValue Sub = Op.getOperand(i);
Known2 = computeKnownBits(Sub, DemandedSub, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
// If we don't know any bits, early out.
if (Known.isUnknown())
@@ -3084,7 +3158,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
if (!!DemandedSrcElts) {
Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
break;
}
@@ -3174,8 +3248,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
if (DemandedElts[i]) {
unsigned Shifts = IsLE ? i : NumElts - 1 - i;
unsigned Offset = (Shifts % SubScale) * BitWidth;
- Known = KnownBits::commonBits(Known,
- Known2.extractBits(BitWidth, Offset));
+ Known = Known.intersectWith(Known2.extractBits(BitWidth, Offset));
// If we don't know any bits, early out.
if (Known.isUnknown())
break;
@@ -3273,7 +3346,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
case ISD::SELECT_CC:
Known = computeKnownBits(Op.getOperand(3), DemandedElts, Depth+1);
@@ -3283,7 +3356,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
case ISD::SMULO:
case ISD::UMULO:
@@ -3334,7 +3407,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
Known = KnownBits::ashr(Known, Known2);
- // TODO: Add minimum shift high known sign bits.
break;
case ISD::FSHL:
case ISD::FSHR:
@@ -3364,8 +3436,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known2.One.lshrInPlace(Amt);
Known2.Zero.lshrInPlace(Amt);
}
- Known.One |= Known2.One;
- Known.Zero |= Known2.Zero;
+ Known = Known.unionWith(Known2);
}
break;
case ISD::SHL_PARTS:
@@ -3588,9 +3659,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// All bits are zero except the low bit.
Known.Zero.setBitsFrom(1);
break;
+ case ISD::ADD:
+ case ISD::SUB: {
+ SDNodeFlags Flags = Op.getNode()->getFlags();
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD,
+ Flags.hasNoSignedWrap(), Known, Known2);
+ break;
+ }
case ISD::USUBO:
case ISD::SSUBO:
- case ISD::SUBCARRY:
+ case ISD::USUBO_CARRY:
case ISD::SSUBO_CARRY:
if (Op.getResNo() == 1) {
// If we know the result of a setcc has the top bits zero, use this info.
@@ -3601,13 +3681,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
[[fallthrough]];
- case ISD::SUB:
case ISD::SUBC: {
assert(Op.getResNo() == 0 &&
"We only compute knownbits for the difference here.");
// TODO: Compute influence of the carry operand.
- if (Opcode == ISD::SUBCARRY || Opcode == ISD::SSUBO_CARRY)
+ if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY)
break;
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
@@ -3618,7 +3697,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::UADDO:
case ISD::SADDO:
- case ISD::ADDCARRY:
+ case ISD::UADDO_CARRY:
case ISD::SADDO_CARRY:
if (Op.getResNo() == 1) {
// If we know the result of a setcc has the top bits zero, use this info.
@@ -3629,17 +3708,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
}
[[fallthrough]];
- case ISD::ADD:
case ISD::ADDC:
case ISD::ADDE: {
assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here.");
- // With ADDE and ADDCARRY, a carry bit may be added in.
+ // With ADDE and UADDO_CARRY, a carry bit may be added in.
KnownBits Carry(1);
if (Opcode == ISD::ADDE)
// Can't track carry from glue, set carry to unknown.
Carry.resetAll();
- else if (Opcode == ISD::ADDCARRY || Opcode == ISD::SADDO_CARRY)
+ else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY)
// TODO: Compute known bits for the carry operand. Not sure if it is worth
// the trouble (how often will we find a known carry bit). And I haven't
// tested this very much yet, but something like this might work:
@@ -3657,7 +3735,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::UDIV: {
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- Known = KnownBits::udiv(Known, Known2);
+ Known = KnownBits::udiv(Known, Known2, Op->getFlags().hasExact());
+ break;
+ }
+ case ISD::SDIV: {
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::sdiv(Known, Known2, Op->getFlags().hasExact());
break;
}
case ISD::SREM: {
@@ -3735,11 +3819,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.Zero.setAllBits();
if (DemandedVal) {
Known2 = computeKnownBits(InVal, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2.zextOrTrunc(BitWidth));
+ Known = Known.intersectWith(Known2.zextOrTrunc(BitWidth));
}
if (!!DemandedVecElts) {
Known2 = computeKnownBits(InVec, DemandedVecElts, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
break;
}
@@ -3897,38 +3981,87 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
return Known;
}
-SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
- SDValue N1) const {
+/// Convert ConstantRange OverflowResult into SelectionDAG::OverflowKind.
+static SelectionDAG::OverflowKind mapOverflowResult(ConstantRange::OverflowResult OR) {
+ switch (OR) {
+ case ConstantRange::OverflowResult::MayOverflow:
+ return SelectionDAG::OFK_Sometime;
+ case ConstantRange::OverflowResult::AlwaysOverflowsLow:
+ case ConstantRange::OverflowResult::AlwaysOverflowsHigh:
+ return SelectionDAG::OFK_Always;
+ case ConstantRange::OverflowResult::NeverOverflows:
+ return SelectionDAG::OFK_Never;
+ }
+ llvm_unreachable("Unknown OverflowResult");
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForSignedAdd(SDValue N0, SDValue N1) const {
// X + 0 never overflow
if (isNullConstant(N1))
return OFK_Never;
- KnownBits N1Known = computeKnownBits(N1);
- if (N1Known.Zero.getBoolValue()) {
- KnownBits N0Known = computeKnownBits(N0);
+ // If both operands each have at least two sign bits, the addition
+ // cannot overflow.
+ if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1)
+ return OFK_Never;
- bool overflow;
- (void)N0Known.getMaxValue().uadd_ov(N1Known.getMaxValue(), overflow);
- if (!overflow)
- return OFK_Never;
- }
+ // TODO: Add ConstantRange::signedAddMayOverflow handling.
+ return OFK_Sometime;
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForUnsignedAdd(SDValue N0, SDValue N1) const {
+ // X + 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
// mulhi + 1 never overflow
+ KnownBits N1Known = computeKnownBits(N1);
if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 &&
- (N1Known.getMaxValue() & 0x01) == N1Known.getMaxValue())
+ N1Known.getMaxValue().ult(2))
return OFK_Never;
- if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) {
- KnownBits N0Known = computeKnownBits(N0);
+ KnownBits N0Known = computeKnownBits(N0);
+ if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1 &&
+ N0Known.getMaxValue().ult(2))
+ return OFK_Never;
- if ((N0Known.getMaxValue() & 0x01) == N0Known.getMaxValue())
- return OFK_Never;
- }
+ // Fallback to ConstantRange::unsignedAddMayOverflow handling.
+ ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false);
+ ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false);
+ return mapOverflowResult(N0Range.unsignedAddMayOverflow(N1Range));
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForSignedSub(SDValue N0, SDValue N1) const {
+ // X - 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
+
+ // If both operands each have at least two sign bits, the subtraction
+ // cannot overflow.
+ if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1)
+ return OFK_Never;
+
+ // TODO: Add ConstantRange::signedSubMayOverflow handling.
+ return OFK_Sometime;
+}
+
+SelectionDAG::OverflowKind
+SelectionDAG::computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const {
+ // X - 0 never overflow
+ if (isNullConstant(N1))
+ return OFK_Never;
+ // TODO: Add ConstantRange::unsignedSubMayOverflow handling.
return OFK_Sometime;
}
-bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
+bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const {
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
EVT OpVT = Val.getValueType();
unsigned BitWidth = OpVT.getScalarSizeInBits();
@@ -3970,15 +4103,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
// vscale(power-of-two) is a power-of-two for some targets
if (Val.getOpcode() == ISD::VSCALE &&
getTargetLoweringInfo().isVScaleKnownToBeAPowerOfTwo() &&
- isKnownToBeAPowerOfTwo(Val.getOperand(0)))
+ isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1))
return true;
// More could be done here, though the above checks are enough
// to handle some common cases.
-
- // Fall back to computeKnownBits to catch other known cases.
- KnownBits Known = computeKnownBits(Val);
- return (Known.countMaxPopulation() == 1) && (Known.countMinPopulation() == 1);
+ return false;
}
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
@@ -4041,14 +4171,20 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
continue;
SDValue SrcOp = Op.getOperand(i);
- Tmp2 = ComputeNumSignBits(SrcOp, Depth + 1);
+ // BUILD_VECTOR can implicitly truncate sources, we handle this specially
+ // for constant nodes to ensure we only look at the sign bits.
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SrcOp)) {
+ APInt T = C->getAPIntValue().trunc(VTBits);
+ Tmp2 = T.getNumSignBits();
+ } else {
+ Tmp2 = ComputeNumSignBits(SrcOp, Depth + 1);
- // BUILD_VECTOR can implicitly truncate sources, we must handle this.
- if (SrcOp.getValueSizeInBits() != VTBits) {
- assert(SrcOp.getValueSizeInBits() > VTBits &&
- "Expected BUILD_VECTOR implicit truncation");
- unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits;
- Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1);
+ if (SrcOp.getValueSizeInBits() != VTBits) {
+ assert(SrcOp.getValueSizeInBits() > VTBits &&
+ "Expected BUILD_VECTOR implicit truncation");
+ unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits;
+ Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1);
+ }
}
Tmp = std::min(Tmp, Tmp2);
}
@@ -4225,11 +4361,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::SADDO:
case ISD::UADDO:
case ISD::SADDO_CARRY:
- case ISD::ADDCARRY:
+ case ISD::UADDO_CARRY:
case ISD::SSUBO:
case ISD::USUBO:
case ISD::SSUBO_CARRY:
- case ISD::SUBCARRY:
+ case ISD::USUBO_CARRY:
case ISD::SMULO:
case ISD::UMULO:
if (Op.getResNo() != 1)
@@ -4733,6 +4869,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::AssertSext:
case ISD::AssertZext:
case ISD::FREEZE:
+ case ISD::CONCAT_VECTORS:
case ISD::INSERT_SUBVECTOR:
case ISD::AND:
case ISD::OR:
@@ -4753,6 +4890,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::ZERO_EXTEND_VECTOR_INREG:
case ISD::BITCAST:
case ISD::BUILD_VECTOR:
+ case ISD::BUILD_PAIR:
return false;
case ISD::ADD:
@@ -4771,6 +4909,13 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() ||
Op->getFlags().hasNoUnsignedWrap());
+ case ISD::INSERT_VECTOR_ELT:{
+ // Ensure that the element index is in bounds.
+ EVT VecVT = Op.getOperand(0).getValueType();
+ KnownBits KnownIdx = computeKnownBits(Op.getOperand(2), Depth + 1);
+ return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements());
+ }
+
default:
// Allow the target to implement this method for its nodes.
if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN ||
@@ -4835,7 +4980,8 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
case ISD::FROUND:
case ISD::FROUNDEVEN:
case ISD::FRINT:
- case ISD::FNEARBYINT: {
+ case ISD::FNEARBYINT:
+ case ISD::FLDEXP: {
if (SNaN)
return true;
return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1);
@@ -4918,13 +5064,28 @@ bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
"Floating point type expected");
// If the value is a constant, we can obviously see if it is a zero or not.
- // TODO: Add BuildVector support.
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
return !C->isZero();
+
+ // Return false if we find any zero in a vector.
+ if (Op->getOpcode() == ISD::BUILD_VECTOR ||
+ Op->getOpcode() == ISD::SPLAT_VECTOR) {
+ for (const SDValue &OpVal : Op->op_values()) {
+ if (OpVal.isUndef())
+ return false;
+ if (auto *C = dyn_cast<ConstantFPSDNode>(OpVal))
+ if (C->isZero())
+ return false;
+ }
+ return true;
+ }
return false;
}
-bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
+ if (Depth >= MaxRecursionDepth)
+ return false; // Limit search depth.
+
assert(!Op.getValueType().isFloatingPoint() &&
"Floating point types unsupported - use isKnownNeverZeroFloat");
@@ -4933,24 +5094,105 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
[](ConstantSDNode *C) { return !C->isZero(); }))
return true;
- // TODO: Recognize more cases here.
+ // TODO: Recognize more cases here. Most of the cases are also incomplete to
+ // some degree.
switch (Op.getOpcode()) {
- default: break;
+ default:
+ break;
+
case ISD::OR:
- if (isKnownNeverZero(Op.getOperand(1)) ||
- isKnownNeverZero(Op.getOperand(0)))
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::VSELECT:
+ case ISD::SELECT:
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(2), Depth + 1);
+
+ case ISD::SHL:
+ if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ // 1 << X is never zero. TODO: This can be expanded if we can bound X.
+ // The expression is really !Known.One[BitWidth-MaxLog2(Known):0].isZero()
+ if (computeKnownBits(Op.getOperand(0), Depth + 1).One[0])
return true;
break;
+
+ case ISD::UADDSAT:
+ case ISD::UMAX:
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::UMIN:
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::ROTL:
+ case ISD::ROTR:
+ case ISD::BITREVERSE:
+ case ISD::BSWAP:
+ case ISD::CTPOP:
+ case ISD::ABS:
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+
+ case ISD::SRA:
+ case ISD::SRL:
+ if (Op->getFlags().hasExact())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ // Signed >> X is never zero. TODO: This can be expanded if we can bound X.
+ // The expression is really
+ // !Known.One[SignBit:SignBit-(BitWidth-MaxLog2(Known))].isZero()
+ if (computeKnownBits(Op.getOperand(0), Depth + 1).isNegative())
+ return true;
+ break;
+
+ case ISD::UDIV:
+ case ISD::SDIV:
+ // div exact can only produce a zero if the dividend is zero.
+ // TODO: For udiv this is also true if Op1 u<= Op0
+ if (Op->getFlags().hasExact())
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ break;
+
+ case ISD::ADD:
+ if (Op->getFlags().hasNoUnsignedWrap())
+ if (isKnownNeverZero(Op.getOperand(1), Depth + 1) ||
+ isKnownNeverZero(Op.getOperand(0), Depth + 1))
+ return true;
+ // TODO: There are a lot more cases we can prove for add.
+ break;
+
+ case ISD::SUB: {
+ if (isNullConstant(Op.getOperand(0)))
+ return isKnownNeverZero(Op.getOperand(1), Depth + 1);
+
+ std::optional<bool> ne =
+ KnownBits::ne(computeKnownBits(Op.getOperand(0), Depth + 1),
+ computeKnownBits(Op.getOperand(1), Depth + 1));
+ return ne && *ne;
}
- return false;
+ case ISD::MUL:
+ if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap())
+ if (isKnownNeverZero(Op.getOperand(1), Depth + 1) &&
+ isKnownNeverZero(Op.getOperand(0), Depth + 1))
+ return true;
+ break;
+
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ return isKnownNeverZero(Op.getOperand(0), Depth + 1);
+ }
+
+ return computeKnownBits(Op, Depth).isNonZero();
}
bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
// Check the obvious case.
if (A == B) return true;
- // For for negative and positive zero.
+ // For negative and positive zero.
if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
if (CA->isZero() && CB->isZero()) return true;
@@ -4986,6 +5228,10 @@ static bool haveNoCommonBitsSetCommutative(SDValue A, SDValue B) {
SDValue Other) {
if (SDValue NotOperand =
getBitwiseNotOperand(Not, Mask, /* AllowUndefs */ true)) {
+ if (NotOperand->getOpcode() == ISD::ZERO_EXTEND ||
+ NotOperand->getOpcode() == ISD::TRUNCATE)
+ NotOperand = NotOperand->getOperand(0);
+
if (Other == NotOperand)
return true;
if (Other->getOpcode() == ISD::AND)
@@ -4994,6 +5240,13 @@ static bool haveNoCommonBitsSetCommutative(SDValue A, SDValue B) {
}
return false;
};
+
+ if (A->getOpcode() == ISD::ZERO_EXTEND || A->getOpcode() == ISD::TRUNCATE)
+ A = A->getOperand(0);
+
+ if (B->getOpcode() == ISD::ZERO_EXTEND || B->getOpcode() == ISD::TRUNCATE)
+ B = B->getOperand(0);
+
if (A->getOpcode() == ISD::AND)
return MatchNoCommonBitsPattern(A->getOperand(0), A->getOperand(1), B) ||
MatchNoCommonBitsPattern(A->getOperand(1), A->getOperand(0), B);
@@ -5159,23 +5412,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
- SDValue Operand) {
+ SDValue N1) {
SDNodeFlags Flags;
if (Inserter)
Flags = Inserter->getFlags();
- return getNode(Opcode, DL, VT, Operand, Flags);
+ return getNode(Opcode, DL, VT, N1, Flags);
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
- SDValue Operand, const SDNodeFlags Flags) {
- assert(Operand.getOpcode() != ISD::DELETED_NODE &&
- "Operand is DELETED_NODE!");
+ SDValue N1, const SDNodeFlags Flags) {
+ assert(N1.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!");
// Constant fold unary operations with an integer constant operand. Even
// opaque constant will be folded, because the folding of unary operations
// doesn't create new constants with different values. Nevertheless, the
// opaque flag is preserved during folding to prevent future folding with
// other constants.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand)) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
const APInt &Val = C->getAPIntValue();
switch (Opcode) {
default: break;
@@ -5191,7 +5443,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
C->isTargetOpcode(), C->isOpaque());
case ISD::ANY_EXTEND:
// Some targets like RISCV prefer to sign extend some types.
- if (TLI->isSExtCheaperThanZExt(Operand.getValueType(), VT))
+ if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT))
return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT,
C->isTargetOpcode(), C->isOpaque());
return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT,
@@ -5225,15 +5477,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::CTPOP:
- return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(),
+ return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
- return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(),
+ return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
- return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(),
+ return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(),
C->isOpaque());
case ISD::FP16_TO_FP:
case ISD::BF16_TO_FP: {
@@ -5249,7 +5501,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getConstantFP(FPV, DL, VT);
}
case ISD::STEP_VECTOR: {
- if (SDValue V = FoldSTEP_VECTOR(DL, VT, Operand, *this))
+ if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this))
return V;
break;
}
@@ -5257,7 +5509,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
// Constant fold unary operations with a floating point constant operand.
- if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand)) {
+ if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N1)) {
APFloat V = C->getValueAPF(); // make copy
switch (Opcode) {
case ISD::FNEG:
@@ -5354,262 +5606,250 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTPOP: {
- SDValue Ops = {Operand};
+ SDValue Ops = {N1};
if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops))
return Fold;
}
}
- unsigned OpOpcode = Operand.getNode()->getOpcode();
+ unsigned OpOpcode = N1.getNode()->getOpcode();
switch (Opcode) {
case ISD::STEP_VECTOR:
assert(VT.isScalableVector() &&
"STEP_VECTOR can only be used with scalable types");
assert(OpOpcode == ISD::TargetConstant &&
- VT.getVectorElementType() == Operand.getValueType() &&
+ VT.getVectorElementType() == N1.getValueType() &&
"Unexpected step operand");
break;
case ISD::FREEZE:
- assert(VT == Operand.getValueType() && "Unexpected VT!");
- if (isGuaranteedNotToBeUndefOrPoison(Operand, /*PoisonOnly*/ false,
+ assert(VT == N1.getValueType() && "Unexpected VT!");
+ if (isGuaranteedNotToBeUndefOrPoison(N1, /*PoisonOnly*/ false,
/*Depth*/ 1))
- return Operand;
+ return N1;
break;
case ISD::TokenFactor:
case ISD::MERGE_VALUES:
case ISD::CONCAT_VECTORS:
- return Operand; // Factor, merge or concat of one node? No need.
+ return N1; // Factor, merge or concat of one node? No need.
case ISD::BUILD_VECTOR: {
// Attempt to simplify BUILD_VECTOR.
- SDValue Ops[] = {Operand};
+ SDValue Ops[] = {N1};
if (SDValue V = FoldBUILD_VECTOR(DL, VT, Ops, *this))
return V;
break;
}
case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node");
case ISD::FP_EXTEND:
- assert(VT.isFloatingPoint() &&
- Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
- if (Operand.getValueType() == VT) return Operand; // noop conversion.
- assert((!VT.isVector() ||
- VT.getVectorElementCount() ==
- Operand.getValueType().getVectorElementCount()) &&
+ assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() &&
+ "Invalid FP cast!");
+ if (N1.getValueType() == VT) return N1; // noop conversion.
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
- assert(Operand.getValueType().bitsLT(VT) &&
- "Invalid fpext node, dst < src!");
- if (Operand.isUndef())
+ assert(N1.getValueType().bitsLT(VT) && "Invalid fpext node, dst < src!");
+ if (N1.isUndef())
return getUNDEF(VT);
break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
- if (Operand.isUndef())
+ if (N1.isUndef())
return getUNDEF(VT);
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
// [us]itofp(undef) = 0, because the result value is bounded.
- if (Operand.isUndef())
+ if (N1.isUndef())
return getConstantFP(0.0, DL, VT);
break;
case ISD::SIGN_EXTEND:
- assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ assert(VT.isInteger() && N1.getValueType().isInteger() &&
"Invalid SIGN_EXTEND!");
- assert(VT.isVector() == Operand.getValueType().isVector() &&
+ assert(VT.isVector() == N1.getValueType().isVector() &&
"SIGN_EXTEND result type type should be vector iff the operand "
"type is vector!");
- if (Operand.getValueType() == VT) return Operand; // noop extension
- assert((!VT.isVector() ||
- VT.getVectorElementCount() ==
- Operand.getValueType().getVectorElementCount()) &&
+ if (N1.getValueType() == VT) return N1; // noop extension
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
- assert(Operand.getValueType().bitsLT(VT) &&
- "Invalid sext node, dst < src!");
+ assert(N1.getValueType().bitsLT(VT) && "Invalid sext node, dst < src!");
if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
- return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
+ return getNode(OpOpcode, DL, VT, N1.getOperand(0));
if (OpOpcode == ISD::UNDEF)
// sext(undef) = 0, because the top bits will all be the same.
return getConstant(0, DL, VT);
break;
case ISD::ZERO_EXTEND:
- assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ assert(VT.isInteger() && N1.getValueType().isInteger() &&
"Invalid ZERO_EXTEND!");
- assert(VT.isVector() == Operand.getValueType().isVector() &&
+ assert(VT.isVector() == N1.getValueType().isVector() &&
"ZERO_EXTEND result type type should be vector iff the operand "
"type is vector!");
- if (Operand.getValueType() == VT) return Operand; // noop extension
- assert((!VT.isVector() ||
- VT.getVectorElementCount() ==
- Operand.getValueType().getVectorElementCount()) &&
+ if (N1.getValueType() == VT) return N1; // noop extension
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
- assert(Operand.getValueType().bitsLT(VT) &&
- "Invalid zext node, dst < src!");
- if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
- return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getOperand(0));
+ assert(N1.getValueType().bitsLT(VT) && "Invalid zext node, dst < src!");
+ if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
+ return getNode(ISD::ZERO_EXTEND, DL, VT, N1.getOperand(0));
if (OpOpcode == ISD::UNDEF)
// zext(undef) = 0, because the top bits will be zero.
return getConstant(0, DL, VT);
break;
case ISD::ANY_EXTEND:
- assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ assert(VT.isInteger() && N1.getValueType().isInteger() &&
"Invalid ANY_EXTEND!");
- assert(VT.isVector() == Operand.getValueType().isVector() &&
+ assert(VT.isVector() == N1.getValueType().isVector() &&
"ANY_EXTEND result type type should be vector iff the operand "
"type is vector!");
- if (Operand.getValueType() == VT) return Operand; // noop extension
- assert((!VT.isVector() ||
- VT.getVectorElementCount() ==
- Operand.getValueType().getVectorElementCount()) &&
+ if (N1.getValueType() == VT) return N1; // noop extension
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
- assert(Operand.getValueType().bitsLT(VT) &&
- "Invalid anyext node, dst < src!");
+ assert(N1.getValueType().bitsLT(VT) && "Invalid anyext node, dst < src!");
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND)
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
- return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
+ return getNode(OpOpcode, DL, VT, N1.getOperand(0));
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
// (ext (trunc x)) -> x
if (OpOpcode == ISD::TRUNCATE) {
- SDValue OpOp = Operand.getOperand(0);
+ SDValue OpOp = N1.getOperand(0);
if (OpOp.getValueType() == VT) {
- transferDbgValues(Operand, OpOp);
+ transferDbgValues(N1, OpOp);
return OpOp;
}
}
break;
case ISD::TRUNCATE:
- assert(VT.isInteger() && Operand.getValueType().isInteger() &&
+ assert(VT.isInteger() && N1.getValueType().isInteger() &&
"Invalid TRUNCATE!");
- assert(VT.isVector() == Operand.getValueType().isVector() &&
+ assert(VT.isVector() == N1.getValueType().isVector() &&
"TRUNCATE result type type should be vector iff the operand "
"type is vector!");
- if (Operand.getValueType() == VT) return Operand; // noop truncate
- assert((!VT.isVector() ||
- VT.getVectorElementCount() ==
- Operand.getValueType().getVectorElementCount()) &&
+ if (N1.getValueType() == VT) return N1; // noop truncate
+ assert((!VT.isVector() || VT.getVectorElementCount() ==
+ N1.getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
- assert(Operand.getValueType().bitsGT(VT) &&
- "Invalid truncate node, src < dst!");
+ assert(N1.getValueType().bitsGT(VT) && "Invalid truncate node, src < dst!");
if (OpOpcode == ISD::TRUNCATE)
- return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
+ return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0));
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND) {
// If the source is smaller than the dest, we still need an extend.
- if (Operand.getOperand(0).getValueType().getScalarType()
- .bitsLT(VT.getScalarType()))
- return getNode(OpOpcode, DL, VT, Operand.getOperand(0));
- if (Operand.getOperand(0).getValueType().bitsGT(VT))
- return getNode(ISD::TRUNCATE, DL, VT, Operand.getOperand(0));
- return Operand.getOperand(0);
+ if (N1.getOperand(0).getValueType().getScalarType().bitsLT(
+ VT.getScalarType()))
+ return getNode(OpOpcode, DL, VT, N1.getOperand(0));
+ if (N1.getOperand(0).getValueType().bitsGT(VT))
+ return getNode(ISD::TRUNCATE, DL, VT, N1.getOperand(0));
+ return N1.getOperand(0);
}
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes)
- return getVScale(DL, VT, Operand.getConstantOperandAPInt(0));
+ return getVScale(DL, VT,
+ N1.getConstantOperandAPInt(0).trunc(VT.getSizeInBits()));
break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
assert(VT.isVector() && "This DAG node is restricted to vector types.");
- assert(Operand.getValueType().bitsLE(VT) &&
+ assert(N1.getValueType().bitsLE(VT) &&
"The input must be the same size or smaller than the result.");
assert(VT.getVectorMinNumElements() <
- Operand.getValueType().getVectorMinNumElements() &&
+ N1.getValueType().getVectorMinNumElements() &&
"The destination vector type must have fewer lanes than the input.");
break;
case ISD::ABS:
- assert(VT.isInteger() && VT == Operand.getValueType() &&
- "Invalid ABS!");
+ assert(VT.isInteger() && VT == N1.getValueType() && "Invalid ABS!");
if (OpOpcode == ISD::UNDEF)
return getConstant(0, DL, VT);
break;
case ISD::BSWAP:
- assert(VT.isInteger() && VT == Operand.getValueType() &&
- "Invalid BSWAP!");
+ assert(VT.isInteger() && VT == N1.getValueType() && "Invalid BSWAP!");
assert((VT.getScalarSizeInBits() % 16 == 0) &&
"BSWAP types must be a multiple of 16 bits!");
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
// bswap(bswap(X)) -> X.
if (OpOpcode == ISD::BSWAP)
- return Operand.getOperand(0);
+ return N1.getOperand(0);
break;
case ISD::BITREVERSE:
- assert(VT.isInteger() && VT == Operand.getValueType() &&
- "Invalid BITREVERSE!");
+ assert(VT.isInteger() && VT == N1.getValueType() && "Invalid BITREVERSE!");
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
case ISD::BITCAST:
- assert(VT.getSizeInBits() == Operand.getValueSizeInBits() &&
+ assert(VT.getSizeInBits() == N1.getValueSizeInBits() &&
"Cannot BITCAST between types of different sizes!");
- if (VT == Operand.getValueType()) return Operand; // noop conversion.
- if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
- return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
+ if (VT == N1.getValueType()) return N1; // noop conversion.
+ if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BITCAST, DL, VT, N1.getOperand(0));
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
case ISD::SCALAR_TO_VECTOR:
- assert(VT.isVector() && !Operand.getValueType().isVector() &&
- (VT.getVectorElementType() == Operand.getValueType() ||
+ assert(VT.isVector() && !N1.getValueType().isVector() &&
+ (VT.getVectorElementType() == N1.getValueType() ||
(VT.getVectorElementType().isInteger() &&
- Operand.getValueType().isInteger() &&
- VT.getVectorElementType().bitsLE(Operand.getValueType()))) &&
+ N1.getValueType().isInteger() &&
+ VT.getVectorElementType().bitsLE(N1.getValueType()))) &&
"Illegal SCALAR_TO_VECTOR node!");
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
// scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined.
if (OpOpcode == ISD::EXTRACT_VECTOR_ELT &&
- isa<ConstantSDNode>(Operand.getOperand(1)) &&
- Operand.getConstantOperandVal(1) == 0 &&
- Operand.getOperand(0).getValueType() == VT)
- return Operand.getOperand(0);
+ isa<ConstantSDNode>(N1.getOperand(1)) &&
+ N1.getConstantOperandVal(1) == 0 &&
+ N1.getOperand(0).getValueType() == VT)
+ return N1.getOperand(0);
break;
case ISD::FNEG:
// Negation of an unknown bag of bits is still completely undefined.
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
- if (OpOpcode == ISD::FNEG) // --X -> X
- return Operand.getOperand(0);
+ if (OpOpcode == ISD::FNEG) // --X -> X
+ return N1.getOperand(0);
break;
case ISD::FABS:
- if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
- return getNode(ISD::FABS, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X)
+ return getNode(ISD::FABS, DL, VT, N1.getOperand(0));
break;
case ISD::VSCALE:
- assert(VT == Operand.getValueType() && "Unexpected VT!");
+ assert(VT == N1.getValueType() && "Unexpected VT!");
break;
case ISD::CTPOP:
- if (Operand.getValueType().getScalarType() == MVT::i1)
- return Operand;
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return N1;
break;
case ISD::CTLZ:
case ISD::CTTZ:
- if (Operand.getValueType().getScalarType() == MVT::i1)
- return getNOT(DL, Operand, Operand.getValueType());
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return getNOT(DL, N1, N1.getValueType());
break;
case ISD::VECREDUCE_ADD:
- if (Operand.getValueType().getScalarType() == MVT::i1)
- return getNode(ISD::VECREDUCE_XOR, DL, VT, Operand);
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_XOR, DL, VT, N1);
break;
case ISD::VECREDUCE_SMIN:
case ISD::VECREDUCE_UMAX:
- if (Operand.getValueType().getScalarType() == MVT::i1)
- return getNode(ISD::VECREDUCE_OR, DL, VT, Operand);
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_OR, DL, VT, N1);
break;
case ISD::VECREDUCE_SMAX:
case ISD::VECREDUCE_UMIN:
- if (Operand.getValueType().getScalarType() == MVT::i1)
- return getNode(ISD::VECREDUCE_AND, DL, VT, Operand);
+ if (N1.getValueType().getScalarType() == MVT::i1)
+ return getNode(ISD::VECREDUCE_AND, DL, VT, N1);
break;
}
SDNode *N;
SDVTList VTs = getVTList(VT);
- SDValue Ops[] = {Operand};
+ SDValue Ops[] = {N1};
if (VT != MVT::Glue) { // Don't CSE flag producing nodes
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops);
@@ -5710,6 +5950,10 @@ static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1,
APInt C2Ext = C2.zext(FullWidth);
return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1);
}
+ case ISD::ABDS:
+ return APIntOps::smax(C1, C2) - APIntOps::smin(C1, C2);
+ case ISD::ABDU:
+ return APIntOps::umax(C1, C2) - APIntOps::umin(C1, C2);
}
return std::nullopt;
}
@@ -6678,7 +6922,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
case ISD::VECTOR_SPLICE: {
- if (cast<ConstantSDNode>(N3)->isNullValue())
+ if (cast<ConstantSDNode>(N3)->isZero())
return N1;
break;
}
@@ -6745,6 +6989,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N1.getValueType() == VT)
return N1;
break;
+ case ISD::VP_TRUNCATE:
+ case ISD::VP_SIGN_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
+ // Don't create noop casts.
+ if (N1.getValueType() == VT)
+ return N1;
+ break;
}
// Memoize node if it doesn't produce a flag.
@@ -7042,7 +7293,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
AAMDNodes NewAAInfo = AAInfo;
NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr;
- const Value *SrcVal = SrcPtrInfo.V.dyn_cast<const Value *>();
+ const Value *SrcVal = dyn_cast_if_present<const Value *>(SrcPtrInfo.V);
bool isConstant =
AA && SrcVal &&
AA->pointsToConstantMemory(MemoryLocation(SrcVal, Size, AAInfo));
@@ -7321,8 +7572,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
- bool IsZeroVal =
- isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero();
+ bool IsZeroVal = isNullConstant(Src);
unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize);
if (!TLI.findOptimalMemOpLowering(
@@ -7870,7 +8120,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
assert((Opcode == ISD::INTRINSIC_VOID ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
Opcode == ISD::PREFETCH ||
- ((int)Opcode <= std::numeric_limits<int>::max() &&
+ (Opcode <= (unsigned)std::numeric_limits<int>::max() &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
@@ -7883,6 +8133,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
Opcode, dl.getIROrder(), VTList, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
ID.AddInteger(MMO->getFlags());
+ ID.AddInteger(MemVT.getRawBits());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
@@ -8307,7 +8558,7 @@ SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM,
SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL};
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::VP_LOAD, VTs, Ops);
- ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>(
dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
@@ -9051,6 +9302,60 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl,
return V;
}
+SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr,
+ EVT MemVT, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Ptr};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::GET_FPENV_MEM, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<FPStateAccessSDNode>(
+ ISD::GET_FPENV_MEM, dl.getIROrder(), VTs, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<FPStateAccessSDNode>(ISD::GET_FPENV_MEM, dl.getIROrder(),
+ dl.getDebugLoc(), VTs, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
+SDValue SelectionDAG::getSetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr,
+ EVT MemVT, MachineMemOperand *MMO) {
+ assert(Chain.getValueType() == MVT::Other && "Invalid chain type");
+ SDVTList VTs = getVTList(MVT::Other);
+ SDValue Ops[] = {Chain, Ptr};
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, ISD::SET_FPENV_MEM, VTs, Ops);
+ ID.AddInteger(MemVT.getRawBits());
+ ID.AddInteger(getSyntheticNodeSubclassData<FPStateAccessSDNode>(
+ ISD::SET_FPENV_MEM, dl.getIROrder(), VTs, MemVT, MMO));
+ ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+ ID.AddInteger(MMO->getFlags());
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ auto *N = newSDNode<FPStateAccessSDNode>(ISD::SET_FPENV_MEM, dl.getIROrder(),
+ dl.getDebugLoc(), VTs, MemVT, MMO);
+ createOperands(N, Ops);
+
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
// select undef, T, F --> T (if T is a constant), otherwise F
// select, ?, undef, F --> F
@@ -9348,6 +9653,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
"Binary operator types must match!");
break;
}
+ case ISD::FFREXP: {
+ assert(VTList.NumVTs == 2 && Ops.size() == 1 && "Invalid ffrexp op!");
+ assert(VTList.VTs[0].isFloatingPoint() && VTList.VTs[1].isInteger() &&
+ VTList.VTs[0] == Ops[0].getValueType() && "frexp type mismatch");
+
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Ops[0])) {
+ int FrexpExp;
+ APFloat FrexpMant =
+ frexp(C->getValueAPF(), FrexpExp, APFloat::rmNearestTiesToEven);
+ SDValue Result0 = getConstantFP(FrexpMant, DL, VTList.VTs[0]);
+ SDValue Result1 =
+ getConstant(FrexpMant.isFinite() ? FrexpExp : 0, DL, VTList.VTs[1]);
+ return getNode(ISD::MERGE_VALUES, DL, VTList, {Result0, Result1}, Flags);
+ }
+
+ break;
+ }
case ISD::STRICT_FP_EXTEND:
assert(VTList.NumVTs == 2 && Ops.size() == 2 &&
"Invalid STRICT_FP_EXTEND!");
@@ -9357,8 +9679,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
"STRICT_FP_EXTEND result type should be vector iff the operand "
"type is vector!");
assert((!VTList.VTs[0].isVector() ||
- VTList.VTs[0].getVectorNumElements() ==
- Ops[1].getValueType().getVectorNumElements()) &&
+ VTList.VTs[0].getVectorElementCount() ==
+ Ops[1].getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(Ops[1].getValueType().bitsLT(VTList.VTs[0]) &&
"Invalid fpext node, dst <= src!");
@@ -9369,8 +9691,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
"STRICT_FP_ROUND result type should be vector iff the operand "
"type is vector!");
assert((!VTList.VTs[0].isVector() ||
- VTList.VTs[0].getVectorNumElements() ==
- Ops[1].getValueType().getVectorNumElements()) &&
+ VTList.VTs[0].getVectorElementCount() ==
+ Ops[1].getValueType().getVectorElementCount()) &&
"Vector element count mismatch!");
assert(VTList.VTs[0].isFloatingPoint() &&
Ops[1].getValueType().isFloatingPoint() &&
@@ -10247,8 +10569,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
case ISD::ADD:
SDValue N0 = N.getOperand(0);
SDValue N1 = N.getOperand(1);
- if (!isConstantIntBuildVectorOrConstantInt(N0) &&
- isConstantIntBuildVectorOrConstantInt(N1)) {
+ if (!isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1)) {
uint64_t Offset = N.getConstantOperandVal(1);
// Rewrite an ADD constant node into a DIExpression. Since we are
@@ -10594,11 +10915,11 @@ public:
bool SelectionDAG::calculateDivergence(SDNode *N) {
if (TLI->isSDNodeAlwaysUniform(N)) {
- assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, DA) &&
+ assert(!TLI->isSDNodeSourceOfDivergence(N, FLI, UA) &&
"Conflicting divergence information!");
return false;
}
- if (TLI->isSDNodeSourceOfDivergence(N, FLI, DA))
+ if (TLI->isSDNodeSourceOfDivergence(N, FLI, UA))
return true;
for (const auto &Op : N->ops()) {
if (Op.Val.getValueType() != MVT::Other && Op.getNode()->isDivergent())
@@ -10975,6 +11296,12 @@ SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
return V;
}
+SDValue llvm::peekThroughTruncates(SDValue V) {
+ while (V.getOpcode() == ISD::TRUNCATE)
+ V = V.getOperand(0);
+ return V;
+}
+
bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
if (V.getOpcode() != ISD::XOR)
return false;
@@ -10982,7 +11309,7 @@ bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
unsigned NumBits = V.getScalarValueSizeInBits();
ConstantSDNode *C =
isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true);
- return C && (C->getAPIntValue().countTrailingOnes() >= NumBits);
+ return C && (C->getAPIntValue().countr_one() >= NumBits);
}
ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
@@ -11394,16 +11721,11 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
}
SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
- assert(N->getNumValues() == 1 &&
- "Can't unroll a vector with multiple results!");
-
EVT VT = N->getValueType(0);
- unsigned NE = VT.getVectorNumElements();
EVT EltVT = VT.getVectorElementType();
- SDLoc dl(N);
+ unsigned NE = VT.getVectorNumElements();
- SmallVector<SDValue, 8> Scalars;
- SmallVector<SDValue, 4> Operands(N->getNumOperands());
+ SDLoc dl(N);
// If ResNE is 0, fully unroll the vector op.
if (ResNE == 0)
@@ -11411,6 +11733,40 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
else if (NE > ResNE)
NE = ResNE;
+ if (N->getNumValues() == 2) {
+ SmallVector<SDValue, 8> Scalars0, Scalars1;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+ EVT VT1 = N->getValueType(1);
+ EVT EltVT1 = VT1.getVectorElementType();
+
+ unsigned i;
+ for (i = 0; i != NE; ++i) {
+ for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+
+ // A vector operand; extract a single element.
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT,
+ Operand, getVectorIdxConstant(i, dl));
+ }
+
+ SDValue EltOp = getNode(N->getOpcode(), dl, {EltVT, EltVT1}, Operands);
+ Scalars0.push_back(EltOp);
+ Scalars1.push_back(EltOp.getValue(1));
+ }
+
+ SDValue Vec0 = getBuildVector(VT, dl, Scalars0);
+ SDValue Vec1 = getBuildVector(VT1, dl, Scalars1);
+ return getMergeValues({Vec0, Vec1}, dl);
+ }
+
+ assert(N->getNumValues() == 1 &&
+ "Can't unroll a vector with multiple results!");
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
unsigned i;
for (i= 0; i != NE; ++i) {
for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) {
@@ -11533,7 +11889,7 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
int64_t Offset = 0;
if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset))
- return (Dist * Bytes == Offset);
+ return (Dist * (int64_t)Bytes == Offset);
return false;
}
@@ -11573,6 +11929,21 @@ MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const {
return std::nullopt;
}
+/// Split the scalar node with EXTRACT_ELEMENT using the provided
+/// VTs and return the low/high part.
+std::pair<SDValue, SDValue> SelectionDAG::SplitScalar(const SDValue &N,
+ const SDLoc &DL,
+ const EVT &LoVT,
+ const EVT &HiVT) {
+ assert(!LoVT.isVector() && !HiVT.isVector() && !N.getValueType().isVector() &&
+ "Split node must be a scalar type");
+ SDValue Lo =
+ getNode(ISD::EXTRACT_ELEMENT, DL, LoVT, N, getIntPtrConstant(0, DL));
+ SDValue Hi =
+ getNode(ISD::EXTRACT_ELEMENT, DL, HiVT, N, getIntPtrConstant(1, DL));
+ return std::make_pair(Lo, Hi);
+}
+
/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type
/// which is split (or expanded) into two not necessarily identical pieces.
std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const {
@@ -11786,7 +12157,7 @@ SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,
}
if (!Splatted) {
- unsigned FirstDemandedIdx = DemandedElts.countTrailingZeros();
+ unsigned FirstDemandedIdx = DemandedElts.countr_zero();
assert(getOperand(FirstDemandedIdx).isUndef() &&
"Can only have a splat without a constant for all undefs.");
return getOperand(FirstDemandedIdx);
@@ -11908,7 +12279,7 @@ bool BuildVectorSDNode::getConstantRawBits(
// Extract raw src bits.
SmallVector<APInt> SrcBitElements(NumSrcOps,
- APInt::getNullValue(SrcEltSizeInBits));
+ APInt::getZero(SrcEltSizeInBits));
BitVector SrcUndeElements(NumSrcOps, false);
for (unsigned I = 0; I != NumSrcOps; ++I) {
@@ -11946,7 +12317,7 @@ void BuildVectorSDNode::recastRawBits(bool IsLittleEndian,
unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits;
DstUndefElements.clear();
DstUndefElements.resize(NumDstOps, false);
- DstBitElements.assign(NumDstOps, APInt::getNullValue(DstEltSizeInBits));
+ DstBitElements.assign(NumDstOps, APInt::getZero(DstEltSizeInBits));
// Concatenate src elements constant bits together into dst element.
if (SrcEltSizeInBits <= DstEltSizeInBits) {
@@ -12093,7 +12464,7 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
Node->NumOperands = Vals.size();
Node->OperandList = Ops;
if (!TLI->isSDNodeAlwaysUniform(Node)) {
- IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA);
+ IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, UA);
Node->SDNodeBits.IsDivergent = IsDivergent;
}
checkForCycles(Node);
@@ -12147,9 +12518,53 @@ SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL,
return getConstantFP(NeutralAF, DL, VT);
}
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM: {
+ // Neutral element for fminimum is Inf or FLT_MAX, depending on FMF.
+ const fltSemantics &Semantics = EVTToAPFloatSemantics(VT);
+ APFloat NeutralAF = !Flags.hasNoInfs() ? APFloat::getInf(Semantics)
+ : APFloat::getLargest(Semantics);
+ if (Opcode == ISD::FMAXIMUM)
+ NeutralAF.changeSign();
+
+ return getConstantFP(NeutralAF, DL, VT);
+ }
+
}
}
+/// Helper used to make a call to a library function that has one argument of
+/// pointer type.
+///
+/// Such functions include 'fegetmode', 'fesetenv' and some others, which are
+/// used to get or set floating-point state. They have one argument of pointer
+/// type, which points to the memory region containing bits of the
+/// floating-point state. The value returned by such function is ignored in the
+/// created call.
+///
+/// \param LibFunc Reference to library function (value of RTLIB::Libcall).
+/// \param Ptr Pointer used to save/load state.
+/// \param InChain Ingoing token chain.
+/// \returns Outgoing chain token.
+SDValue SelectionDAG::makeStateFunctionCall(unsigned LibFunc, SDValue Ptr,
+ SDValue InChain,
+ const SDLoc &DLoc) {
+ assert(InChain.getValueType() == MVT::Other && "Expected token chain");
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Ptr;
+ Entry.Ty = Ptr.getValueType().getTypeForEVT(*getContext());
+ Args.push_back(Entry);
+ RTLIB::Libcall LC = static_cast<RTLIB::Libcall>(LibFunc);
+ SDValue Callee = getExternalSymbol(TLI->getLibcallName(LC),
+ TLI->getPointerTy(getDataLayout()));
+ TargetLowering::CallLoweringInfo CLI(*this);
+ CLI.setDebugLoc(DLoc).setChain(InChain).setLibCallee(
+ TLI->getLibcallCallingConv(LC), Type::getVoidTy(*getContext()), Callee,
+ std::move(Args));
+ return TLI->LowerCallTo(CLI).second;
+}
+
void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
assert(From && To && "Invalid SDNode; empty source SDValue?");
auto I = SDEI.find(From);
@@ -12158,8 +12573,90 @@ void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) {
// Use of operator[] on the DenseMap may cause an insertion, which invalidates
// the iterator, hence the need to make a copy to prevent a use-after-free.
- NodeExtraInfo Copy = I->second;
- SDEI[To] = std::move(Copy);
+ NodeExtraInfo NEI = I->second;
+ if (LLVM_LIKELY(!NEI.PCSections)) {
+ // No deep copy required for the types of extra info set.
+ //
+ // FIXME: Investigate if other types of extra info also need deep copy. This
+ // depends on the types of nodes they can be attached to: if some extra info
+ // is only ever attached to nodes where a replacement To node is always the
+ // node where later use and propagation of the extra info has the intended
+ // semantics, no deep copy is required.
+ SDEI[To] = std::move(NEI);
+ return;
+ }
+
+ // We need to copy NodeExtraInfo to all _new_ nodes that are being introduced
+ // through the replacement of From with To. Otherwise, replacements of a node
+ // (From) with more complex nodes (To and its operands) may result in lost
+ // extra info where the root node (To) is insignificant in further propagating
+ // and using extra info when further lowering to MIR.
+ //
+ // In the first step pre-populate the visited set with the nodes reachable
+ // from the old From node. This avoids copying NodeExtraInfo to parts of the
+ // DAG that is not new and should be left untouched.
+ SmallVector<const SDNode *> Leafs{From}; // Leafs reachable with VisitFrom.
+ DenseSet<const SDNode *> FromReach; // The set of nodes reachable from From.
+ auto VisitFrom = [&](auto &&Self, const SDNode *N, int MaxDepth) {
+ if (MaxDepth == 0) {
+ // Remember this node in case we need to increase MaxDepth and continue
+ // populating FromReach from this node.
+ Leafs.emplace_back(N);
+ return;
+ }
+ if (!FromReach.insert(N).second)
+ return;
+ for (const SDValue &Op : N->op_values())
+ Self(Self, Op.getNode(), MaxDepth - 1);
+ };
+
+ // Copy extra info to To and all its transitive operands (that are new).
+ SmallPtrSet<const SDNode *, 8> Visited;
+ auto DeepCopyTo = [&](auto &&Self, const SDNode *N) {
+ if (FromReach.contains(N))
+ return true;
+ if (!Visited.insert(N).second)
+ return true;
+ if (getEntryNode().getNode() == N)
+ return false;
+ for (const SDValue &Op : N->op_values()) {
+ if (!Self(Self, Op.getNode()))
+ return false;
+ }
+ // Copy only if entry node was not reached.
+ SDEI[N] = NEI;
+ return true;
+ };
+
+ // We first try with a lower MaxDepth, assuming that the path to common
+ // operands between From and To is relatively short. This significantly
+ // improves performance in the common case. The initial MaxDepth is big
+ // enough to avoid retry in the common case; the last MaxDepth is large
+ // enough to avoid having to use the fallback below (and protects from
+ // potential stack exhaustion from recursion).
+ for (int PrevDepth = 0, MaxDepth = 16; MaxDepth <= 1024;
+ PrevDepth = MaxDepth, MaxDepth *= 2, Visited.clear()) {
+ // StartFrom is the previous (or initial) set of leafs reachable at the
+ // previous maximum depth.
+ SmallVector<const SDNode *> StartFrom;
+ std::swap(StartFrom, Leafs);
+ for (const SDNode *N : StartFrom)
+ VisitFrom(VisitFrom, N, MaxDepth - PrevDepth);
+ if (LLVM_LIKELY(DeepCopyTo(DeepCopyTo, To)))
+ return;
+ // This should happen very rarely (reached the entry node).
+ LLVM_DEBUG(dbgs() << __func__ << ": MaxDepth=" << MaxDepth << " too low\n");
+ assert(!Leafs.empty());
+ }
+
+ // This should not happen - but if it did, that means the subgraph reachable
+ // from From has depth greater or equal to maximum MaxDepth, and VisitFrom()
+ // could not visit all reachable common operands. Consequently, we were able
+ // to reach the entry node.
+ errs() << "warning: incomplete propagation of SelectionDAG::NodeExtraInfo\n";
+ assert(false && "From subgraph too complex - increase max. MaxDepth?");
+ // Best-effort fallback if assertions disabled.
+ SDEI[To] = std::move(NEI);
}
#ifndef NDEBUG
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 0bdfdac6a65f..9595da9d0d8a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -19,21 +19,21 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -67,6 +67,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InlineAsm.h"
@@ -96,6 +97,7 @@
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cstddef>
#include <iterator>
@@ -416,6 +418,10 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
return Val;
if (PartEVT.isInteger() && ValueVT.isFloatingPoint())
return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
+
+ // Vector/Vector bitcast (e.g. <2 x bfloat> -> <2 x half>).
+ if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
}
// Promoted vector extract
@@ -495,7 +501,6 @@ getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
CallConv);
- unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
"Copying to an illegal type!");
@@ -511,6 +516,7 @@ getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
return;
}
+ unsigned PartBits = PartVT.getSizeInBits();
if (NumParts * PartBits > ValueVT.getSizeInBits()) {
// If the parts cover more bits than the value has, promote the value.
if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
@@ -621,6 +627,8 @@ static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
return SDValue();
EVT ValueVT = Val.getValueType();
+ EVT PartEVT = PartVT.getVectorElementType();
+ EVT ValueEVT = ValueVT.getVectorElementType();
ElementCount PartNumElts = PartVT.getVectorElementCount();
ElementCount ValueNumElts = ValueVT.getVectorElementCount();
@@ -628,9 +636,18 @@ static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
// fixed/scalable properties. If a target needs to widen a fixed-length type
// to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) ||
- PartNumElts.isScalable() != ValueNumElts.isScalable() ||
- PartVT.getVectorElementType() != ValueVT.getVectorElementType())
+ PartNumElts.isScalable() != ValueNumElts.isScalable())
+ return SDValue();
+
+ // Have a try for bf16 because some targets share its ABI with fp16.
+ if (ValueEVT == MVT::bf16 && PartEVT == MVT::f16) {
+ assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
+ "Cannot widen to illegal type");
+ Val = DAG.getNode(ISD::BITCAST, DL,
+ ValueVT.changeVectorElementType(MVT::f16), Val);
+ } else if (PartEVT != ValueEVT) {
return SDValue();
+ }
// Widening a scalable vector to another scalable vector is done by inserting
// the vector into a larger undef one.
@@ -638,12 +655,11 @@ static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
Val, DAG.getVectorIdxConstant(0, DL));
- EVT ElementVT = PartVT.getVectorElementType();
// Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
// undef elements.
SmallVector<SDValue, 16> Ops;
DAG.ExtractVectorElements(Val, Ops);
- SDValue EltUndef = DAG.getUNDEF(ElementVT);
+ SDValue EltUndef = DAG.getUNDEF(PartEVT);
Ops.append((PartNumElts - ValueNumElts).getFixedValue(), EltUndef);
// FIXME: Use CONCAT for 2x -> 4x.
@@ -833,7 +849,7 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
FunctionLoweringInfo &FuncInfo,
const SDLoc &dl, SDValue &Chain,
- SDValue *Flag, const Value *V) const {
+ SDValue *Glue, const Value *V) const {
// A Value with type {} or [0 x %t] needs no registers.
if (ValueVTs.empty())
return SDValue();
@@ -855,11 +871,11 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
Parts.resize(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
SDValue P;
- if (!Flag) {
+ if (!Glue) {
P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
} else {
- P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
- *Flag = P.getValue(2);
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Glue);
+ *Glue = P.getValue(2);
}
Chain = P.getValue(1);
@@ -918,7 +934,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
}
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
- const SDLoc &dl, SDValue &Chain, SDValue *Flag,
+ const SDLoc &dl, SDValue &Chain, SDValue *Glue,
const Value *V,
ISD::NodeType PreferredExtendType) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -947,18 +963,18 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG,
SmallVector<SDValue, 8> Chains(NumRegs);
for (unsigned i = 0; i != NumRegs; ++i) {
SDValue Part;
- if (!Flag) {
+ if (!Glue) {
Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
} else {
- Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
- *Flag = Part.getValue(1);
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Glue);
+ *Glue = Part.getValue(1);
}
Chains[i] = Part.getValue(0);
}
- if (NumRegs == 1 || Flag)
- // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+ if (NumRegs == 1 || Glue)
+ // If NumRegs > 1 && Glue is used then the use of the last CopyToReg is
// flagged to it. That is the CopyToReg nodes and the user are considered
// a single scheduling unit. If we create a TokenFactor and return it as
// chain, then the TokenFactor is both a predecessor (operand) of the
@@ -1050,6 +1066,8 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
Context = DAG.getContext();
LPadToCallSiteMap.clear();
SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
+ AssignmentTrackingEnabled = isAssignmentTrackingEnabled(
+ *DAG.getMachineFunction().getFunction().getParent());
}
void SelectionDAGBuilder::clear() {
@@ -1144,8 +1162,13 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
It != End; ++It) {
auto *Var = FnVarLocs->getDILocalVariable(It->VariableID);
dropDanglingDebugInfo(Var, It->Expr);
- if (!handleDebugValue(It->V, Var, It->Expr, It->DL, SDNodeOrder,
- /*IsVariadic=*/false))
+ if (It->Values.isKillLocation(It->Expr)) {
+ handleKillDebugValue(Var, It->Expr, It->DL, SDNodeOrder);
+ continue;
+ }
+ SmallVector<Value *> Values(It->Values.location_ops());
+ if (!handleDebugValue(Values, Var, It->Expr, It->DL, SDNodeOrder,
+ It->Values.hasArgList()))
addDanglingDebugInfo(It, SDNodeOrder);
}
}
@@ -1205,27 +1228,46 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
}
}
+static bool handleDanglingVariadicDebugInfo(SelectionDAG &DAG,
+ DILocalVariable *Variable,
+ DebugLoc DL, unsigned Order,
+ RawLocationWrapper Values,
+ DIExpression *Expression) {
+ if (!Values.hasArgList())
+ return false;
+ // For variadic dbg_values we will now insert an undef.
+ // FIXME: We can potentially recover these!
+ SmallVector<SDDbgOperand, 2> Locs;
+ for (const Value *V : Values.location_ops()) {
+ auto *Undef = UndefValue::get(V->getType());
+ Locs.push_back(SDDbgOperand::fromConst(Undef));
+ }
+ SDDbgValue *SDV = DAG.getDbgValueList(Variable, Expression, Locs, {},
+ /*IsIndirect=*/false, DL, Order,
+ /*IsVariadic=*/true);
+ DAG.AddDbgValue(SDV, /*isParameter=*/false);
+ return true;
+}
+
void SelectionDAGBuilder::addDanglingDebugInfo(const VarLocInfo *VarLoc,
unsigned Order) {
- DanglingDebugInfoMap[VarLoc->V].emplace_back(VarLoc, Order);
+ if (!handleDanglingVariadicDebugInfo(
+ DAG,
+ const_cast<DILocalVariable *>(DAG.getFunctionVarLocs()
+ ->getVariable(VarLoc->VariableID)
+ .getVariable()),
+ VarLoc->DL, Order, VarLoc->Values, VarLoc->Expr)) {
+ DanglingDebugInfoMap[VarLoc->Values.getVariableLocationOp(0)].emplace_back(
+ VarLoc, Order);
+ }
}
void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI,
unsigned Order) {
// We treat variadic dbg_values differently at this stage.
- if (DI->hasArgList()) {
- // For variadic dbg_values we will now insert an undef.
- // FIXME: We can potentially recover these!
- SmallVector<SDDbgOperand, 2> Locs;
- for (const Value *V : DI->getValues()) {
- auto Undef = UndefValue::get(V->getType());
- Locs.push_back(SDDbgOperand::fromConst(Undef));
- }
- SDDbgValue *SDV = DAG.getDbgValueList(
- DI->getVariable(), DI->getExpression(), Locs, {},
- /*IsIndirect=*/false, DI->getDebugLoc(), Order, /*IsVariadic=*/true);
- DAG.AddDbgValue(SDV, /*isParameter=*/false);
- } else {
+ if (!handleDanglingVariadicDebugInfo(
+ DAG, DI->getVariable(), DI->getDebugLoc(), Order,
+ DI->getWrappedLocation(), DI->getExpression())) {
// TODO: Dangling debug info will eventually either be resolved or produce
// an Undef DBG_VALUE. However in the resolution case, a gap may appear
// between the original dbg.value location and its resolved DBG_VALUE,
@@ -1382,6 +1424,17 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
<< "\n");
}
+void SelectionDAGBuilder::handleKillDebugValue(DILocalVariable *Var,
+ DIExpression *Expr,
+ DebugLoc DbgLoc,
+ unsigned Order) {
+ Value *Poison = PoisonValue::get(Type::getInt1Ty(*Context));
+ DIExpression *NewExpr =
+ const_cast<DIExpression *>(DIExpression::convertToUndefExpression(Expr));
+ handleDebugValue(Poison, Var, NewExpr, DbgLoc, Order,
+ /*IsVariadic*/ false);
+}
+
bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values,
DILocalVariable *Var,
DIExpression *Expr, DebugLoc DbgLoc,
@@ -1569,7 +1622,7 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
// If we already have an SDValue for this value, use it.
SDValue &N = NodeMap[V];
if (N.getNode()) {
- if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
+ if (isIntOrFPConstant(N)) {
// Remove the debug location from the node as the node is about to be used
// in a location which may differ from the original debug location. This
// is relevant to Constant and ConstantFP nodes because they can appear
@@ -1606,7 +1659,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
TLI.getPointerTy(DAG.getDataLayout(), AS));
}
- if (match(C, m_VScale(DAG.getDataLayout())))
+ if (match(C, m_VScale()))
return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1));
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
@@ -1976,8 +2029,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// registers the usual way.
SmallVector<EVT, 1> PtrValueVTs;
ComputeValueVTs(TLI, DL,
- F->getReturnType()->getPointerTo(
- DAG.getDataLayout().getAllocaAddrSpace()),
+ PointerType::get(F->getContext(),
+ DAG.getDataLayout().getAllocaAddrSpace()),
PtrValueVTs);
SDValue RetPtr =
@@ -1987,7 +2040,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs,
- &Offsets);
+ &Offsets, 0);
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
@@ -2123,7 +2176,8 @@ void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
- assert(!V->use_empty() && "Unused value assigned virtual registers!");
+ assert((!V->use_empty() || isa<CallBrInst>(V)) &&
+ "Unused value assigned virtual registers!");
CopyValueToVirtualRegister(V, VMI->second);
}
}
@@ -2424,10 +2478,12 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If this is not a fall-through branch or optimizations are switched off,
// emit the branch.
- if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
- DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
- MVT::Other, getControlRoot(),
- DAG.getBasicBlock(Succ0MBB)));
+ if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) {
+ auto Br = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
+ getControlRoot(), DAG.getBasicBlock(Succ0MBB));
+ setValue(&I, Br);
+ DAG.setRoot(Br);
+ }
return;
}
@@ -2901,14 +2957,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
// would need to be to shift a 1 bit in that position.
Cmp = DAG.getSetCC(
dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
- ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
+ ShiftOp, DAG.getConstant(llvm::countr_zero(B.Mask), dl, VT),
ISD::SETEQ);
} else if (PopCount == BB.Range) {
// There is only one zero bit in the range, test for it directly.
Cmp = DAG.getSetCC(
dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
- ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
- ISD::SETNE);
+ ShiftOp, DAG.getConstant(llvm::countr_one(B.Mask), dl, VT), ISD::SETNE);
} else {
// Make desired shift
SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
@@ -2950,6 +3005,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
// catchswitch for successors.
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
const BasicBlock *EHPadBB = I.getSuccessor(1);
+ MachineBasicBlock *EHPadMBB = FuncInfo.MBBMap[EHPadBB];
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
@@ -2974,6 +3030,10 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
case Intrinsic::seh_scope_begin:
case Intrinsic::seh_try_end:
case Intrinsic::seh_scope_end:
+ if (EHPadMBB)
+ // a block referenced by EH table
+ // so dtor-funclet not removed by opts
+ EHPadMBB->setMachineBlockAddressTaken();
break;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
@@ -3338,6 +3398,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPOp);
+ Flags.setUnpredictable(
+ cast<SelectInst>(I).getMetadata(LLVMContext::MD_unpredictable));
+
// Min/max matching is only viable if all output VTs are the same.
if (all_equal(ValueVTs)) {
EVT VT = ValueVTs[0];
@@ -3355,6 +3418,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
bool UseScalarMinMax = VT.isVector() &&
!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
+ // ValueTracking's select pattern matching does not account for -0.0,
+ // so we can't lower to FMINIMUM/FMAXIMUM because those nodes specify that
+ // -0.0 is less than +0.0.
Value *LHS, *RHS;
auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
ISD::NodeType Opc = ISD::DELETED_NODE;
@@ -3366,34 +3432,26 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
case SPF_FMINNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break;
+ case SPNB_RETURNS_NAN: break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
- case SPNB_RETURNS_ANY: {
- if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
+ case SPNB_RETURNS_ANY:
+ if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT) ||
+ (UseScalarMinMax &&
+ TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType())))
Opc = ISD::FMINNUM;
- else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT))
- Opc = ISD::FMINIMUM;
- else if (UseScalarMinMax)
- Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
- ISD::FMINNUM : ISD::FMINIMUM;
break;
}
- }
break;
case SPF_FMAXNUM:
switch (SPR.NaNBehavior) {
case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
- case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break;
+ case SPNB_RETURNS_NAN: break;
case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
case SPNB_RETURNS_ANY:
-
- if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
+ if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT) ||
+ (UseScalarMinMax &&
+ TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType())))
Opc = ISD::FMAXNUM;
- else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT))
- Opc = ISD::FMAXIMUM;
- else if (UseScalarMinMax)
- Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
- ISD::FMAXNUM : ISD::FMAXIMUM;
break;
}
break;
@@ -4123,7 +4181,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets, 0);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -4196,7 +4254,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Chains[ChainI] = L.getValue(1);
if (MemVTs[i] != ValueVTs[i])
- L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]);
+ L = DAG.getPtrExtOrTrunc(L, dl, ValueVTs[i]);
Values[i] = L;
}
@@ -4222,7 +4280,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
SmallVector<uint64_t, 4> Offsets;
const Value *SrcV = I.getOperand(0);
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
- SrcV->getType(), ValueVTs, &Offsets);
+ SrcV->getType(), ValueVTs, &Offsets, 0);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
@@ -4258,7 +4316,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Ty,
- ValueVTs, &Offsets);
+ ValueVTs, &Offsets, 0);
assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
"expect a single EVT for swifterror");
@@ -4295,7 +4353,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
- SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
+ SrcV->getType(), ValueVTs, &MemVTs, &Offsets, 0);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -4447,11 +4505,13 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy())
return false;
- uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType());
+ TypeSize ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType());
+ if (ScaleVal.isScalable())
+ return false;
// Target may not support the required addressing mode.
if (ScaleVal != 1 &&
- !TLI.isLegalScaleForGatherScatter(ScaleVal, ElemSize))
+ !TLI.isLegalScaleForGatherScatter(ScaleVal.getFixedValue(), ElemSize))
return false;
Base = SDB->getValue(BasePtr);
@@ -4919,8 +4979,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
Result = lowerRangeToAssertZExt(DAG, I, Result);
MaybeAlign Alignment = I.getRetAlign();
- if (!Alignment)
- Alignment = F->getAttributes().getRetAlignment();
+
// Insert `assertalign` node if there's an alignment.
if (InsertAssertAlign && Alignment) {
Result =
@@ -5504,13 +5563,8 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
PromVT = EVT::getVectorVT(Ctx, PromVT, VT.getVectorElementCount());
} else
llvm_unreachable("Wrong VT for DIVFIX?");
- if (Signed) {
- LHS = DAG.getSExtOrTrunc(LHS, DL, PromVT);
- RHS = DAG.getSExtOrTrunc(RHS, DL, PromVT);
- } else {
- LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
- RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
- }
+ LHS = DAG.getExtOrTrunc(Signed, LHS, DL, PromVT);
+ RHS = DAG.getExtOrTrunc(Signed, RHS, DL, PromVT);
EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout());
// For saturating operations, we need to shift up the LHS to get the
// proper saturation width, and then shift down again afterwards.
@@ -5767,6 +5821,26 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (!Op)
return false;
+ // If the expression refers to the entry value of an Argument, use the
+ // corresponding livein physical register. As per the Verifier, this is only
+ // allowed for swiftasync Arguments.
+ if (Op->isReg() && Expr->isEntryValue()) {
+ assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync));
+ auto OpReg = Op->getReg();
+ for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
+ if (OpReg == VirtReg || OpReg == PhysReg) {
+ SDDbgValue *SDV = DAG.getVRegDbgValue(
+ Variable, Expr, PhysReg,
+ Kind != FuncArgumentDbgValueKind::Value /*is indirect*/, DL,
+ SDNodeOrder);
+ DAG.AddDbgValue(SDV, false /*treat as dbg.declare byval parameter*/);
+ return true;
+ }
+ LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but "
+ "couldn't find a physical register\n");
+ return true;
+ }
+
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
MachineInstr *NewMI = nullptr;
@@ -5873,7 +5947,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
visitTargetIntrinsic(I, Intrinsic);
return;
case Intrinsic::vscale: {
- match(&I, m_VScale(DAG.getDataLayout()));
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
setValue(&I, DAG.getVScale(sdl, VT, APInt(VT.getSizeInBits(), 1)));
return;
@@ -6092,14 +6165,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.setRoot(Res.getValue(1));
return;
}
- case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
- // Debug intrinsics are handled seperately in assignment tracking mode.
- if (isAssignmentTrackingEnabled(*I.getFunction()->getParent()))
+ const auto &DI = cast<DbgDeclareInst>(I);
+ // Debug intrinsics are handled separately in assignment tracking mode.
+ // Some intrinsics are handled right after Argument lowering.
+ if (AssignmentTrackingEnabled ||
+ FuncInfo.PreprocessedDbgDeclares.count(&DI))
return;
- // Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e.
- // they are non-variadic.
- const auto &DI = cast<DbgVariableIntrinsic>(I);
+ // Assume dbg.declare can not currently use DIArgList, i.e.
+ // it is non-variadic.
assert(!DI.hasArgList() && "Only dbg.value should currently use DIArgList");
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
@@ -6118,37 +6192,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
bool isParameter = Variable->isParameter() || isa<Argument>(Address);
- // Check if this variable can be described by a frame index, typically
- // either as a static alloca or a byval parameter.
- int FI = std::numeric_limits<int>::max();
- if (const auto *AI =
- dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
- if (AI->isStaticAlloca()) {
- auto I = FuncInfo.StaticAllocaMap.find(AI);
- if (I != FuncInfo.StaticAllocaMap.end())
- FI = I->second;
- }
- } else if (const auto *Arg = dyn_cast<Argument>(
- Address->stripInBoundsConstantOffsets())) {
- FI = FuncInfo.getArgumentFrameIndex(Arg);
- }
-
- // llvm.dbg.addr is control dependent and always generates indirect
- // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
- // the MachineFunction variable table.
- if (FI != std::numeric_limits<int>::max()) {
- if (Intrinsic == Intrinsic::dbg_addr) {
- SDDbgValue *SDV = DAG.getFrameIndexDbgValue(
- Variable, Expression, FI, getRoot().getNode(), /*IsIndirect*/ true,
- dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, isParameter);
- } else {
- LLVM_DEBUG(dbgs() << "Skipping " << DI
- << " (variable info stashed in MF side table)\n");
- }
- return;
- }
-
SDValue &N = NodeMap[Address];
if (!N.getNode() && isa<Argument>(Address))
// Check unused arguments map.
@@ -6198,13 +6241,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::dbg_assign: {
// Debug intrinsics are handled seperately in assignment tracking mode.
- assert(isAssignmentTrackingEnabled(*I.getFunction()->getParent()) &&
- "expected assignment tracking to be enabled");
- return;
+ if (AssignmentTrackingEnabled)
+ return;
+ // If assignment tracking hasn't been enabled then fall through and treat
+ // the dbg.assign as a dbg.value.
+ [[fallthrough]];
}
case Intrinsic::dbg_value: {
// Debug intrinsics are handled seperately in assignment tracking mode.
- if (isAssignmentTrackingEnabled(*I.getFunction()->getParent()))
+ if (AssignmentTrackingEnabled)
return;
const DbgValueInst &DI = cast<DbgValueInst>(I);
assert(DI.getVariable() && "Missing variable");
@@ -6212,11 +6257,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
dropDanglingDebugInfo(Variable, Expression);
- SmallVector<Value *, 4> Values(DI.getValues());
- if (Values.empty())
+
+ if (DI.isKillLocation()) {
+ handleKillDebugValue(Variable, Expression, DI.getDebugLoc(), SDNodeOrder);
return;
+ }
- if (llvm::is_contained(Values, nullptr))
+ SmallVector<Value *, 4> Values(DI.getValues());
+ if (Values.empty())
return;
bool IsVariadic = DI.hasArgList();
@@ -6413,6 +6461,20 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), Flags));
return;
+ case Intrinsic::ldexp:
+ setValue(&I, DAG.getNode(ISD::FLDEXP, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), Flags));
+ return;
+ case Intrinsic::frexp: {
+ SmallVector<EVT, 2> ValueVTs;
+ ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
+ SDVTList VTs = DAG.getVTList(ValueVTs);
+ setValue(&I,
+ DAG.getNode(ISD::FFREXP, sdl, VTs, getValue(I.getArgOperand(0))));
+ return;
+ }
case Intrinsic::arithmetic_fence: {
setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl,
getValue(I.getArgOperand(0)).getValueType(),
@@ -6515,7 +6577,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
const DataLayout DLayout = DAG.getDataLayout();
EVT DestVT = TLI.getValueType(DLayout, I.getType());
EVT ArgVT = TLI.getValueType(DLayout, I.getArgOperand(0)->getType());
- unsigned Test = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ FPClassTest Test = static_cast<FPClassTest>(
+ cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
MachineFunction &MF = DAG.getMachineFunction();
const Function &F = MF.getFunction();
SDValue Op = getValue(I.getArgOperand(0));
@@ -6536,6 +6599,64 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, V);
return;
}
+ case Intrinsic::get_fpenv: {
+ const DataLayout DLayout = DAG.getDataLayout();
+ EVT EnvVT = TLI.getValueType(DLayout, I.getType());
+ Align TempAlign = DAG.getEVTAlign(EnvVT);
+ SDValue Chain = getRoot();
+ // Use GET_FPENV if it is legal or custom. Otherwise use memory-based node
+ // and temporary storage in stack.
+ if (TLI.isOperationLegalOrCustom(ISD::GET_FPENV, EnvVT)) {
+ Res = DAG.getNode(
+ ISD::GET_FPENV, sdl,
+ DAG.getVTList(TLI.getValueType(DAG.getDataLayout(), I.getType()),
+ MVT::Other),
+ Chain);
+ } else {
+ SDValue Temp = DAG.CreateStackTemporary(EnvVT, TempAlign.value());
+ int SPFI = cast<FrameIndexSDNode>(Temp.getNode())->getIndex();
+ auto MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize,
+ TempAlign);
+ Chain = DAG.getGetFPEnv(Chain, sdl, Temp, EnvVT, MMO);
+ Res = DAG.getLoad(EnvVT, sdl, Chain, Temp, MPI);
+ }
+ setValue(&I, Res);
+ DAG.setRoot(Res.getValue(1));
+ return;
+ }
+ case Intrinsic::set_fpenv: {
+ const DataLayout DLayout = DAG.getDataLayout();
+ SDValue Env = getValue(I.getArgOperand(0));
+ EVT EnvVT = Env.getValueType();
+ Align TempAlign = DAG.getEVTAlign(EnvVT);
+ SDValue Chain = getRoot();
+ // If SET_FPENV is custom or legal, use it. Otherwise use loading
+ // environment from memory.
+ if (TLI.isOperationLegalOrCustom(ISD::SET_FPENV, EnvVT)) {
+ Chain = DAG.getNode(ISD::SET_FPENV, sdl, MVT::Other, Chain, Env);
+ } else {
+ // Allocate space in stack, copy environment bits into it and use this
+ // memory in SET_FPENV_MEM.
+ SDValue Temp = DAG.CreateStackTemporary(EnvVT, TempAlign.value());
+ int SPFI = cast<FrameIndexSDNode>(Temp.getNode())->getIndex();
+ auto MPI =
+ MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
+ Chain = DAG.getStore(Chain, sdl, Env, Temp, MPI, TempAlign,
+ MachineMemOperand::MOStore);
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize,
+ TempAlign);
+ Chain = DAG.getSetFPEnv(Chain, sdl, Temp, EnvVT, MMO);
+ }
+ DAG.setRoot(Chain);
+ return;
+ }
+ case Intrinsic::reset_fpenv:
+ DAG.setRoot(DAG.getNode(ISD::RESET_FPENV, sdl, MVT::Other, getRoot()));
+ return;
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
@@ -7020,6 +7141,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
llvm_unreachable("instrprof failed to lower a cover");
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
+ case Intrinsic::instrprof_timestamp:
+ llvm_unreachable("instrprof failed to lower a timestamp");
case Intrinsic::instrprof_value_profile:
llvm_unreachable("instrprof failed to lower a value profiling call");
case Intrinsic::localescape: {
@@ -7093,10 +7216,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::xray_customevent: {
// Here we want to make sure that the intrinsic behaves as if it has a
- // specific calling convention, and only for x86_64.
- // FIXME: Support other platforms later.
+ // specific calling convention.
const auto &Triple = DAG.getTarget().getTargetTriple();
- if (Triple.getArch() != Triple::x86_64)
+ if (!Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64)
return;
SmallVector<SDValue, 8> Ops;
@@ -7123,10 +7245,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::xray_typedevent: {
// Here we want to make sure that the intrinsic behaves as if it has a
- // specific calling convention, and only for x86_64.
- // FIXME: Support other platforms later.
+ // specific calling convention.
const auto &Triple = DAG.getTarget().getTargetTriple();
- if (Triple.getArch() != Triple::x86_64)
+ if (!Triple.isAArch64(64) && Triple.getArch() != Triple::x86_64)
return;
SmallVector<SDValue, 8> Ops;
@@ -7174,6 +7295,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::vector_reduce_umin:
case Intrinsic::vector_reduce_fmax:
case Intrinsic::vector_reduce_fmin:
+ case Intrinsic::vector_reduce_fmaximum:
+ case Intrinsic::vector_reduce_fminimum:
visitVectorReduce(I, Intrinsic);
return;
@@ -7285,6 +7408,40 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, SetCC);
return;
}
+ case Intrinsic::experimental_get_vector_length: {
+ assert(cast<ConstantInt>(I.getOperand(1))->getSExtValue() > 0 &&
+ "Expected positive VF");
+ unsigned VF = cast<ConstantInt>(I.getOperand(1))->getZExtValue();
+ bool IsScalable = cast<ConstantInt>(I.getOperand(2))->isOne();
+
+ SDValue Count = getValue(I.getOperand(0));
+ EVT CountVT = Count.getValueType();
+
+ if (!TLI.shouldExpandGetVectorLength(CountVT, VF, IsScalable)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
+ }
+
+ // Expand to a umin between the trip count and the maximum elements the type
+ // can hold.
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ // Extend the trip count to at least the result VT.
+ if (CountVT.bitsLT(VT)) {
+ Count = DAG.getNode(ISD::ZERO_EXTEND, sdl, VT, Count);
+ CountVT = VT;
+ }
+
+ SDValue MaxEVL = DAG.getElementCount(sdl, CountVT,
+ ElementCount::get(VF, IsScalable));
+
+ SDValue UMin = DAG.getNode(ISD::UMIN, sdl, CountVT, Count, MaxEVL);
+ // Clip to the result type if needed.
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, sdl, VT, UMin);
+
+ setValue(&I, Trunc);
+ return;
+ }
case Intrinsic::vector_insert: {
SDValue Vec = getValue(I.getOperand(0));
SDValue SubVec = getValue(I.getOperand(1));
@@ -7324,6 +7481,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::experimental_vector_splice:
visitVectorSplice(I);
return;
+ case Intrinsic::callbr_landingpad:
+ visitCallBrLandingPad(I);
+ return;
+ case Intrinsic::experimental_vector_interleave2:
+ visitVectorInterleave(I);
+ return;
+ case Intrinsic::experimental_vector_deinterleave2:
+ visitVectorDeinterleave(I);
+ return;
}
}
@@ -7442,12 +7608,12 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
std::optional<unsigned> ResOPC;
switch (VPIntrin.getIntrinsicID()) {
case Intrinsic::vp_ctlz: {
- bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne();
+ bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(1))->isOne();
ResOPC = IsZeroUndef ? ISD::VP_CTLZ_ZERO_UNDEF : ISD::VP_CTLZ;
break;
}
case Intrinsic::vp_cttz: {
- bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne();
+ bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(1))->isOne();
ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ;
break;
}
@@ -7472,21 +7638,21 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) {
return *ResOPC;
}
-void SelectionDAGBuilder::visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues) {
+void SelectionDAGBuilder::visitVPLoad(
+ const VPIntrinsic &VPIntrin, EVT VT,
+ const SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
Value *PtrOperand = VPIntrin.getArgOperand(0);
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
AAMDNodes AAInfo = VPIntrin.getAAMetadata();
const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range);
SDValue LD;
- bool AddToChain = true;
// Do not serialize variable-length loads of constant memory with
// anything.
if (!Alignment)
Alignment = DAG.getEVTAlign(VT);
MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo);
- AddToChain = !AA || !AA->pointsToConstantMemory(ML);
+ bool AddToChain = !AA || !AA->pointsToConstantMemory(ML);
SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad,
@@ -7498,8 +7664,9 @@ void SelectionDAGBuilder::visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
setValue(&VPIntrin, LD);
}
-void SelectionDAGBuilder::visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues) {
+void SelectionDAGBuilder::visitVPGather(
+ const VPIntrinsic &VPIntrin, EVT VT,
+ const SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value *PtrOperand = VPIntrin.getArgOperand(0);
@@ -7539,8 +7706,8 @@ void SelectionDAGBuilder::visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
setValue(&VPIntrin, LD);
}
-void SelectionDAGBuilder::visitVPStore(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues) {
+void SelectionDAGBuilder::visitVPStore(
+ const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
Value *PtrOperand = VPIntrin.getArgOperand(1);
EVT VT = OpValues[0].getValueType();
@@ -7561,8 +7728,8 @@ void SelectionDAGBuilder::visitVPStore(const VPIntrinsic &VPIntrin,
setValue(&VPIntrin, ST);
}
-void SelectionDAGBuilder::visitVPScatter(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues) {
+void SelectionDAGBuilder::visitVPScatter(
+ const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value *PtrOperand = VPIntrin.getArgOperand(1);
@@ -7604,7 +7771,8 @@ void SelectionDAGBuilder::visitVPScatter(const VPIntrinsic &VPIntrin,
}
void SelectionDAGBuilder::visitVPStridedLoad(
- const VPIntrinsic &VPIntrin, EVT VT, SmallVectorImpl<SDValue> &OpValues) {
+ const VPIntrinsic &VPIntrin, EVT VT,
+ const SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
Value *PtrOperand = VPIntrin.getArgOperand(0);
MaybeAlign Alignment = VPIntrin.getPointerAlignment();
@@ -7629,7 +7797,7 @@ void SelectionDAGBuilder::visitVPStridedLoad(
}
void SelectionDAGBuilder::visitVPStridedStore(
- const VPIntrinsic &VPIntrin, SmallVectorImpl<SDValue> &OpValues) {
+ const VPIntrinsic &VPIntrin, const SmallVectorImpl<SDValue> &OpValues) {
SDLoc DL = getCurSDLoc();
Value *PtrOperand = VPIntrin.getArgOperand(1);
EVT VT = OpValues[0].getValueType();
@@ -7790,10 +7958,8 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic(
case ISD::VP_CTLZ_ZERO_UNDEF:
case ISD::VP_CTTZ:
case ISD::VP_CTTZ_ZERO_UNDEF: {
- // Pop is_zero_poison operand for cp.ctlz/cttz or
- // is_int_min_poison operand for vp.abs.
- OpValues.pop_back();
- SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues);
+ SDValue Result =
+ DAG.getNode(Opcode, DL, VTs, {OpValues[0], OpValues[2], OpValues[3]});
setValue(&VPIntrin, Result);
break;
}
@@ -8068,10 +8234,7 @@ void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I,
bool IsSigned) {
EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType(), true);
- if (IsSigned)
- Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT);
- else
- Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT);
+ Value = DAG.getExtOrTrunc(IsSigned, Value, getCurSDLoc(), VT);
setValue(&I, Value);
}
@@ -8206,14 +8369,13 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
// DAG::getMemcpy needs Alignment to be defined.
Align Alignment = std::min(DstAlign, SrcAlign);
- bool isVol = false;
SDLoc sdl = getCurSDLoc();
// In the mempcpy context we need to pass in a false value for isTailCall
// because the return pointer needs to be adjusted by the size of
// the copied memory.
- SDValue Root = isVol ? getRoot() : getMemoryRoot();
- SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false,
+ SDValue Root = getMemoryRoot();
+ SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, false, false,
/*isTailCall=*/false,
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)),
@@ -8498,6 +8660,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitUnaryFloatCall(I, ISD::FEXP2))
return;
break;
+ case LibFunc_ldexp:
+ case LibFunc_ldexpf:
+ case LibFunc_ldexpl:
+ if (visitBinaryFloatCall(I, ISD::FLDEXP))
+ return;
+ break;
case LibFunc_memcmp:
if (visitMemCmpBCmpCall(I))
return;
@@ -8897,7 +9065,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// We won't need to flush pending loads if this asm doesn't touch
// memory and is nonvolatile.
- SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
+ SDValue Glue, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
bool EmitEHLabels = isa<InvokeInst>(Call);
if (EmitEHLabels) {
@@ -9124,7 +9292,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
SDLoc dl = getCurSDLoc();
// Use the produced MatchedRegs object to
- MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, &Call);
+ MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue, &Call);
MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
true, OpInfo.getMatchedOperand(), dl,
DAG, AsmNodeOperands);
@@ -9202,10 +9370,6 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
}
if (OpInfo.ConstraintType == TargetLowering::C_Address) {
- assert(InOperandVal.getValueType() ==
- TLI.getPointerTy(DAG.getDataLayout()) &&
- "Address operands expect pointer values");
-
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
assert(ConstraintID != InlineAsm::Constraint_Unknown &&
@@ -9258,7 +9422,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
SDLoc dl = getCurSDLoc();
- OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag,
+ OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue,
&Call);
OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
@@ -9278,12 +9442,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
// Finish up input operands. Set the input chain and add the flag last.
AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
- if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
+ if (Glue.getNode()) AsmNodeOperands.push_back(Glue);
unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
// Do additional work to generate outputs.
@@ -9341,11 +9505,11 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call,
case TargetLowering::C_Register:
case TargetLowering::C_RegisterClass:
Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
- Chain, &Flag, &Call);
+ Chain, &Glue, &Call);
break;
case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
- Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
+ Val = TLI.LowerAsmOutputForConstraint(Chain, Glue, getCurSDLoc(),
OpInfo, DAG);
break;
case TargetLowering::C_Memory:
@@ -9576,7 +9740,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value.");
- SDValue Chain, InFlag, Callee;
+ SDValue Chain, InGlue, Callee;
SmallVector<SDValue, 32> Ops;
SDLoc DL = getCurSDLoc();
@@ -9593,11 +9757,11 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
// chain, flag = CALLSEQ_END(chain, 0, 0, flag)
//
Chain = DAG.getCALLSEQ_START(getRoot(), 0, 0, DL);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
// Add the STACKMAP operands, starting with DAG house-keeping.
Ops.push_back(Chain);
- Ops.push_back(InFlag);
+ Ops.push_back(InGlue);
// Add the <id>, <numShadowBytes> operands.
//
@@ -9621,9 +9785,9 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
// Create the STACKMAP node.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
- Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, DL);
+ Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, DL);
// Stackmaps don't generate values, so nothing goes into the NodeMap.
@@ -9847,6 +10011,12 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
case Intrinsic::vector_reduce_fmin:
Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
break;
+ case Intrinsic::vector_reduce_fmaximum:
+ Res = DAG.getNode(ISD::VECREDUCE_FMAXIMUM, dl, VT, Op1, SDFlags);
+ break;
+ case Intrinsic::vector_reduce_fminimum:
+ Res = DAG.getNode(ISD::VECREDUCE_FMINIMUM, dl, VT, Op1, SDFlags);
+ break;
default:
llvm_unreachable("Unhandled vector reduce intrinsic");
}
@@ -9880,7 +10050,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
SmallVector<EVT, 4> RetTys;
SmallVector<uint64_t, 4> Offsets;
auto &DL = CLI.DAG.getDataLayout();
- ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
+ ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets, 0);
if (CLI.IsPostTypeLegalization) {
// If we are lowering a libcall after legalization, split the return type.
@@ -10200,7 +10370,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// The instruction result is the result of loading from the
// hidden sret parameter.
SmallVector<EVT, 1> PVTs;
- Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace());
+ Type *PtrRetTy =
+ PointerType::get(OrigRetTy->getContext(), DL.getAllocaAddrSpace());
ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
assert(PVTs.size() == 1 && "Pointers should fit in one register");
@@ -10452,9 +10623,9 @@ static void tryToElideArgumentCopy(
DenseMap<int, int> &ArgCopyElisionFrameIndexMap,
SmallPtrSetImpl<const Instruction *> &ElidedArgCopyInstrs,
ArgCopyElisionMapTy &ArgCopyElisionCandidates, const Argument &Arg,
- SDValue ArgVal, bool &ArgHasUses) {
+ ArrayRef<SDValue> ArgVals, bool &ArgHasUses) {
// Check if this is a load from a fixed stack object.
- auto *LNode = dyn_cast<LoadSDNode>(ArgVal);
+ auto *LNode = dyn_cast<LoadSDNode>(ArgVals[0]);
if (!LNode)
return;
auto *FINode = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode());
@@ -10497,7 +10668,8 @@ static void tryToElideArgumentCopy(
MFI.setIsImmutableObjectIndex(FixedIndex, false);
AllocaIndex = FixedIndex;
ArgCopyElisionFrameIndexMap.insert({OldIndex, FixedIndex});
- Chains.push_back(ArgVal.getValue(1));
+ for (SDValue ArgVal : ArgVals)
+ Chains.push_back(ArgVal.getValue(1));
// Avoid emitting code for the store implementing the copy.
const StoreInst *SI = ArgCopyIter->second.second;
@@ -10527,8 +10699,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(),
- F.getReturnType()->getPointerTo(
- DAG.getDataLayout().getAllocaAddrSpace()),
+ PointerType::get(F.getContext(),
+ DAG.getDataLayout().getAllocaAddrSpace()),
ValueVTs);
// NOTE: Assuming that a pointer will never break down to more than one VT
@@ -10721,8 +10893,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// from the sret argument into it.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(),
- F.getReturnType()->getPointerTo(
- DAG.getDataLayout().getAllocaAddrSpace()),
+ PointerType::get(F.getContext(),
+ DAG.getDataLayout().getAllocaAddrSpace()),
ValueVTs);
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
@@ -10758,9 +10930,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Elide the copying store if the target loaded this argument from a
// suitable fixed stack object.
if (Ins[i].Flags.isCopyElisionCandidate()) {
+ unsigned NumParts = 0;
+ for (EVT VT : ValueVTs)
+ NumParts += TLI->getNumRegistersForCallingConv(*CurDAG->getContext(),
+ F.getCallingConv(), VT);
+
tryToElideArgumentCopy(*FuncInfo, Chains, ArgCopyElisionFrameIndexMap,
ElidedArgCopyInstrs, ArgCopyElisionCandidates, Arg,
- InVals[i], ArgHasUses);
+ ArrayRef(&InVals[i], NumParts), ArgHasUses);
}
// If this argument is unused then remember its value. It is used to generate
@@ -10872,12 +11049,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// If any argument copy elisions occurred and we have debug info, update the
// stale frame indices used in the dbg.declare variable info table.
- MachineFunction::VariableDbgInfoMapTy &DbgDeclareInfo = MF->getVariableDbgInfo();
- if (!DbgDeclareInfo.empty() && !ArgCopyElisionFrameIndexMap.empty()) {
- for (MachineFunction::VariableDbgInfo &VI : DbgDeclareInfo) {
- auto I = ArgCopyElisionFrameIndexMap.find(VI.Slot);
+ if (!ArgCopyElisionFrameIndexMap.empty()) {
+ for (MachineFunction::VariableDbgInfo &VI :
+ MF->getInStackSlotVariableDbgInfo()) {
+ auto I = ArgCopyElisionFrameIndexMap.find(VI.getStackSlot());
if (I != ArgCopyElisionFrameIndexMap.end())
- VI.Slot = I->second;
+ VI.updateStackSlot(I->second);
}
}
@@ -11554,6 +11731,62 @@ void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
}
+void SelectionDAGBuilder::visitVectorDeinterleave(const CallInst &I) {
+ auto DL = getCurSDLoc();
+ SDValue InVec = getValue(I.getOperand(0));
+ EVT OutVT =
+ InVec.getValueType().getHalfNumVectorElementsVT(*DAG.getContext());
+
+ unsigned OutNumElts = OutVT.getVectorMinNumElements();
+
+ // ISD Node needs the input vectors split into two equal parts
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, InVec,
+ DAG.getVectorIdxConstant(0, DL));
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT, InVec,
+ DAG.getVectorIdxConstant(OutNumElts, DL));
+
+ // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing
+ // legalisation and combines.
+ if (OutVT.isFixedLengthVector()) {
+ SDValue Even = DAG.getVectorShuffle(OutVT, DL, Lo, Hi,
+ createStrideMask(0, 2, OutNumElts));
+ SDValue Odd = DAG.getVectorShuffle(OutVT, DL, Lo, Hi,
+ createStrideMask(1, 2, OutNumElts));
+ SDValue Res = DAG.getMergeValues({Even, Odd}, getCurSDLoc());
+ setValue(&I, Res);
+ return;
+ }
+
+ SDValue Res = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
+ DAG.getVTList(OutVT, OutVT), Lo, Hi);
+ setValue(&I, Res);
+}
+
+void SelectionDAGBuilder::visitVectorInterleave(const CallInst &I) {
+ auto DL = getCurSDLoc();
+ EVT InVT = getValue(I.getOperand(0)).getValueType();
+ SDValue InVec0 = getValue(I.getOperand(0));
+ SDValue InVec1 = getValue(I.getOperand(1));
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT OutVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+
+ // Use VECTOR_SHUFFLE for fixed-length vectors to benefit from existing
+ // legalisation and combines.
+ if (OutVT.isFixedLengthVector()) {
+ unsigned NumElts = InVT.getVectorMinNumElements();
+ SDValue V = DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, InVec0, InVec1);
+ setValue(&I, DAG.getVectorShuffle(OutVT, DL, V, DAG.getUNDEF(OutVT),
+ createInterleaveMask(NumElts, 2)));
+ return;
+ }
+
+ SDValue Res = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
+ DAG.getVTList(InVT, InVT), InVec0, InVec1);
+ Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, Res.getValue(0),
+ Res.getValue(1));
+ setValue(&I, Res);
+}
+
void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
@@ -11599,3 +11832,113 @@ void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
Mask.push_back(Idx + i);
setValue(&I, DAG.getVectorShuffle(VT, DL, V1, V2, Mask));
}
+
+// Consider the following MIR after SelectionDAG, which produces output in
+// phyregs in the first case or virtregs in the second case.
+//
+// INLINEASM_BR ..., implicit-def $ebx, ..., implicit-def $edx
+// %5:gr32 = COPY $ebx
+// %6:gr32 = COPY $edx
+// %1:gr32 = COPY %6:gr32
+// %0:gr32 = COPY %5:gr32
+//
+// INLINEASM_BR ..., def %5:gr32, ..., def %6:gr32
+// %1:gr32 = COPY %6:gr32
+// %0:gr32 = COPY %5:gr32
+//
+// Given %0, we'd like to return $ebx in the first case and %5 in the second.
+// Given %1, we'd like to return $edx in the first case and %6 in the second.
+//
+// If a callbr has outputs, it will have a single mapping in FuncInfo.ValueMap
+// to a single virtreg (such as %0). The remaining outputs monotonically
+// increase in virtreg number from there. If a callbr has no outputs, then it
+// should not have a corresponding callbr landingpad; in fact, the callbr
+// landingpad would not even be able to refer to such a callbr.
+static Register FollowCopyChain(MachineRegisterInfo &MRI, Register Reg) {
+ MachineInstr *MI = MRI.def_begin(Reg)->getParent();
+ // There is definitely at least one copy.
+ assert(MI->getOpcode() == TargetOpcode::COPY &&
+ "start of copy chain MUST be COPY");
+ Reg = MI->getOperand(1).getReg();
+ MI = MRI.def_begin(Reg)->getParent();
+ // There may be an optional second copy.
+ if (MI->getOpcode() == TargetOpcode::COPY) {
+ assert(Reg.isVirtual() && "expected COPY of virtual register");
+ Reg = MI->getOperand(1).getReg();
+ assert(Reg.isPhysical() && "expected COPY of physical register");
+ MI = MRI.def_begin(Reg)->getParent();
+ }
+ // The start of the chain must be an INLINEASM_BR.
+ assert(MI->getOpcode() == TargetOpcode::INLINEASM_BR &&
+ "end of copy chain MUST be INLINEASM_BR");
+ return Reg;
+}
+
+// We must do this walk rather than the simpler
+// setValue(&I, getCopyFromRegs(CBR, CBR->getType()));
+// otherwise we will end up with copies of virtregs only valid along direct
+// edges.
+void SelectionDAGBuilder::visitCallBrLandingPad(const CallInst &I) {
+ SmallVector<EVT, 8> ResultVTs;
+ SmallVector<SDValue, 8> ResultValues;
+ const auto *CBR =
+ cast<CallBrInst>(I.getParent()->getUniquePredecessor()->getTerminator());
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+
+ unsigned InitialDef = FuncInfo.ValueMap[CBR];
+ SDValue Chain = DAG.getRoot();
+
+ // Re-parse the asm constraints string.
+ TargetLowering::AsmOperandInfoVector TargetConstraints =
+ TLI.ParseConstraints(DAG.getDataLayout(), TRI, *CBR);
+ for (auto &T : TargetConstraints) {
+ SDISelAsmOperandInfo OpInfo(T);
+ if (OpInfo.Type != InlineAsm::isOutput)
+ continue;
+
+ // Pencil in OpInfo.ConstraintType and OpInfo.ConstraintVT based on the
+ // individual constraint.
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
+
+ switch (OpInfo.ConstraintType) {
+ case TargetLowering::C_Register:
+ case TargetLowering::C_RegisterClass: {
+ // Fill in OpInfo.AssignedRegs.Regs.
+ getRegistersForValue(DAG, getCurSDLoc(), OpInfo, OpInfo);
+
+ // getRegistersForValue may produce 1 to many registers based on whether
+ // the OpInfo.ConstraintVT is legal on the target or not.
+ for (size_t i = 0, e = OpInfo.AssignedRegs.Regs.size(); i != e; ++i) {
+ Register OriginalDef = FollowCopyChain(MRI, InitialDef++);
+ if (Register::isPhysicalRegister(OriginalDef))
+ FuncInfo.MBB->addLiveIn(OriginalDef);
+ // Update the assigned registers to use the original defs.
+ OpInfo.AssignedRegs.Regs[i] = OriginalDef;
+ }
+
+ SDValue V = OpInfo.AssignedRegs.getCopyFromRegs(
+ DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, CBR);
+ ResultValues.push_back(V);
+ ResultVTs.push_back(OpInfo.ConstraintVT);
+ break;
+ }
+ case TargetLowering::C_Other: {
+ SDValue Flag;
+ SDValue V = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
+ OpInfo, DAG);
+ ++InitialDef;
+ ResultValues.push_back(V);
+ ResultVTs.push_back(OpInfo.ConstraintVT);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ResultVTs), ResultValues);
+ setValue(&I, V);
+}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index bf2111013461..f2496f24973a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -30,7 +31,6 @@
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -119,25 +119,25 @@ class SelectionDAGBuilder {
: Info(VarLoc), SDNodeOrder(SDNO) {}
DILocalVariable *getVariable(const FunctionVarLocs *Locs) const {
- if (Info.is<VarLocTy>())
- return Locs->getDILocalVariable(Info.get<VarLocTy>()->VariableID);
- return Info.get<DbgValTy>()->getVariable();
+ if (isa<VarLocTy>(Info))
+ return Locs->getDILocalVariable(cast<VarLocTy>(Info)->VariableID);
+ return cast<DbgValTy>(Info)->getVariable();
}
DIExpression *getExpression() const {
- if (Info.is<VarLocTy>())
- return Info.get<VarLocTy>()->Expr;
- return Info.get<DbgValTy>()->getExpression();
+ if (isa<VarLocTy>(Info))
+ return cast<VarLocTy>(Info)->Expr;
+ return cast<DbgValTy>(Info)->getExpression();
}
Value *getVariableLocationOp(unsigned Idx) const {
assert(Idx == 0 && "Dangling variadic debug values not supported yet");
- if (Info.is<VarLocTy>())
- return Info.get<VarLocTy>()->V;
- return Info.get<DbgValTy>()->getVariableLocationOp(Idx);
+ if (isa<VarLocTy>(Info))
+ return cast<VarLocTy>(Info)->Values.getVariableLocationOp(Idx);
+ return cast<DbgValTy>(Info)->getVariableLocationOp(Idx);
}
DebugLoc getDebugLoc() const {
- if (Info.is<VarLocTy>())
- return Info.get<VarLocTy>()->DL;
- return Info.get<DbgValTy>()->getDebugLoc();
+ if (isa<VarLocTy>(Info))
+ return cast<VarLocTy>(Info)->DL;
+ return cast<DbgValTy>(Info)->getDebugLoc();
}
unsigned getSDNodeOrder() const { return SDNodeOrder; }
@@ -175,6 +175,10 @@ class SelectionDAGBuilder {
/// We defer handling these until we do see it.
MapVector<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap;
+ /// Cache the module flag for whether we should use debug-info assignment
+ /// tracking.
+ bool AssignmentTrackingEnabled = false;
+
public:
/// Loads are not emitted to the program immediately. We bunch them up and
/// then emit token factor nodes when possible. This allows us to get simple
@@ -243,7 +247,7 @@ public:
SelectionDAG &DAG;
AAResults *AA = nullptr;
AssumptionCache *AC = nullptr;
- const TargetLibraryInfo *LibInfo;
+ const TargetLibraryInfo *LibInfo = nullptr;
class SDAGSwitchLowering : public SwitchCG::SwitchLowering {
public:
@@ -257,7 +261,7 @@ public:
}
private:
- SelectionDAGBuilder *SDB;
+ SelectionDAGBuilder *SDB = nullptr;
};
// Data related to deferred switch lowerings. Used to construct additional
@@ -279,7 +283,7 @@ public:
SwiftErrorValueTracking &SwiftError;
/// Garbage collection metadata for the function.
- GCFunctionInfo *GFI;
+ GCFunctionInfo *GFI = nullptr;
/// Map a landing pad to the call site indexes.
DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap;
@@ -288,7 +292,7 @@ public:
/// a tail call. In this case, no subsequent DAG nodes should be created.
bool HasTailCall = false;
- LLVMContext *Context;
+ LLVMContext *Context = nullptr;
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)
@@ -372,6 +376,10 @@ public:
DIExpression *Expr, DebugLoc DbgLoc, unsigned Order,
bool IsVariadic);
+ /// Create a record for a kill location debug intrinsic.
+ void handleKillDebugValue(DILocalVariable *Var, DIExpression *Expr,
+ DebugLoc DbgLoc, unsigned Order);
+
/// Evict any dangling debug information, attempting to salvage it first.
void resolveOrClearDbgInfo();
@@ -534,6 +542,7 @@ private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
void visitCallBr(const CallBrInst &I);
+ void visitCallBrLandingPad(const CallInst &I);
void visitResume(const ResumeInst &I);
void visitUnary(const User &I, unsigned Opcode);
@@ -620,17 +629,17 @@ private:
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues);
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPStore(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues);
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVector<SDValue, 7> &OpValues);
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPScatter(const VPIntrinsic &VPIntrin,
- SmallVector<SDValue, 7> &OpValues);
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPStridedLoad(const VPIntrinsic &VPIntrin, EVT VT,
- SmallVectorImpl<SDValue> &OpValues);
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPStridedStore(const VPIntrinsic &VPIntrin,
- SmallVectorImpl<SDValue> &OpValues);
+ const SmallVectorImpl<SDValue> &OpValues);
void visitVPCmp(const VPCmpIntrinsic &VPIntrin);
void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin);
@@ -648,6 +657,8 @@ private:
void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
void visitVectorReverse(const CallInst &I);
void visitVectorSplice(const CallInst &I);
+ void visitVectorInterleave(const CallInst &I);
+ void visitVectorDeinterleave(const CallInst &I);
void visitStepVector(const CallInst &I);
void visitUserOp1(const Instruction &I) {
@@ -669,7 +680,6 @@ private:
/// EmitFuncArgumentDbgValue.
enum class FuncArgumentDbgValueKind {
Value, // This was originally a llvm.dbg.value.
- Addr, // This was originally a llvm.dbg.addr.
Declare, // This was originally a llvm.dbg.declare.
};
@@ -760,7 +770,7 @@ struct RegsForValue {
/// updates them for the output Chain/Flag. If the Flag pointer is NULL, no
/// flag is used.
SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
- const SDLoc &dl, SDValue &Chain, SDValue *Flag,
+ const SDLoc &dl, SDValue &Chain, SDValue *Glue,
const Value *V = nullptr) const;
/// Emit a series of CopyToReg nodes that copies the specified value into the
@@ -769,7 +779,7 @@ struct RegsForValue {
/// flag is used. If V is not nullptr, then it is used in printing better
/// diagnostic messages on error.
void getCopyToRegs(SDValue Val, SelectionDAG &DAG, const SDLoc &dl,
- SDValue &Chain, SDValue *Flag, const Value *V = nullptr,
+ SDValue &Chain, SDValue *Glue, const Value *V = nullptr,
ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const;
/// Add this value to the specified inlineasm node operand list. This adds the
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index fe4261291fc5..03a1ead5bbb4 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -40,7 +41,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -283,6 +283,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UMIN: return "umin";
case ISD::UMAX: return "umax";
+ case ISD::FLDEXP: return "fldexp";
+ case ISD::STRICT_FLDEXP: return "strict_fldexp";
+ case ISD::FFREXP: return "ffrexp";
case ISD::FPOWI: return "fpowi";
case ISD::STRICT_FPOWI: return "strict_fpowi";
case ISD::SETCC: return "setcc";
@@ -297,6 +300,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CONCAT_VECTORS: return "concat_vectors";
case ISD::INSERT_SUBVECTOR: return "insert_subvector";
case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
+ case ISD::VECTOR_DEINTERLEAVE: return "vector_deinterleave";
+ case ISD::VECTOR_INTERLEAVE: return "vector_interleave";
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
case ISD::VECTOR_SPLICE: return "vector_splice";
@@ -307,7 +312,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::CARRY_FALSE: return "carry_false";
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
- case ISD::ADDCARRY: return "addcarry";
+ case ISD::UADDO_CARRY: return "uaddo_carry";
case ISD::SADDO_CARRY: return "saddo_carry";
case ISD::SADDO: return "saddo";
case ISD::UADDO: return "uaddo";
@@ -317,7 +322,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UMULO: return "umulo";
case ISD::SUBC: return "subc";
case ISD::SUBE: return "sube";
- case ISD::SUBCARRY: return "subcarry";
+ case ISD::USUBO_CARRY: return "usubo_carry";
case ISD::SSUBO_CARRY: return "ssubo_carry";
case ISD::SHL_PARTS: return "shl_parts";
case ISD::SRA_PARTS: return "sra_parts";
@@ -429,6 +434,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
// Floating point environment manipulation
case ISD::GET_ROUNDING: return "get_rounding";
case ISD::SET_ROUNDING: return "set_rounding";
+ case ISD::GET_FPENV: return "get_fpenv";
+ case ISD::SET_FPENV: return "set_fpenv";
+ case ISD::RESET_FPENV: return "reset_fpenv";
+ case ISD::GET_FPENV_MEM: return "get_fpenv_mem";
+ case ISD::SET_FPENV_MEM: return "set_fpenv_mem";
// Bit manipulation
case ISD::ABS: return "abs";
@@ -491,6 +501,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::VECREDUCE_UMIN: return "vecreduce_umin";
case ISD::VECREDUCE_FMAX: return "vecreduce_fmax";
case ISD::VECREDUCE_FMIN: return "vecreduce_fmin";
+ case ISD::VECREDUCE_FMAXIMUM: return "vecreduce_fmaximum";
+ case ISD::VECREDUCE_FMINIMUM: return "vecreduce_fminimum";
case ISD::STACKMAP:
return "stackmap";
case ISD::PATCHPOINT:
@@ -698,7 +710,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
else
OS << "<null>";
} else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) {
- OS << ":" << N->getVT().getEVTString();
+ OS << ":" << N->getVT();
}
else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) {
OS << "<";
@@ -713,7 +725,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
case ISD::ZEXTLOAD: OS << ", zext"; break;
}
if (doExt)
- OS << " from " << LD->getMemoryVT().getEVTString();
+ OS << " from " << LD->getMemoryVT();
const char *AM = getIndexedModeName(LD->getAddressingMode());
if (*AM)
@@ -725,7 +737,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
printMemOperand(OS, *ST->getMemOperand(), G);
if (ST->isTruncatingStore())
- OS << ", trunc to " << ST->getMemoryVT().getEVTString();
+ OS << ", trunc to " << ST->getMemoryVT();
const char *AM = getIndexedModeName(ST->getAddressingMode());
if (*AM)
@@ -745,7 +757,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
case ISD::ZEXTLOAD: OS << ", zext"; break;
}
if (doExt)
- OS << " from " << MLd->getMemoryVT().getEVTString();
+ OS << " from " << MLd->getMemoryVT();
const char *AM = getIndexedModeName(MLd->getAddressingMode());
if (*AM)
@@ -760,7 +772,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
printMemOperand(OS, *MSt->getMemOperand(), G);
if (MSt->isTruncatingStore())
- OS << ", trunc to " << MSt->getMemoryVT().getEVTString();
+ OS << ", trunc to " << MSt->getMemoryVT();
const char *AM = getIndexedModeName(MSt->getAddressingMode());
if (*AM)
@@ -782,7 +794,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
case ISD::ZEXTLOAD: OS << ", zext"; break;
}
if (doExt)
- OS << " from " << MGather->getMemoryVT().getEVTString();
+ OS << " from " << MGather->getMemoryVT();
auto Signed = MGather->isIndexSigned() ? "signed" : "unsigned";
auto Scaled = MGather->isIndexScaled() ? "scaled" : "unscaled";
@@ -794,7 +806,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
printMemOperand(OS, *MScatter->getMemOperand(), G);
if (MScatter->isTruncatingStore())
- OS << ", trunc to " << MScatter->getMemoryVT().getEVTString();
+ OS << ", trunc to " << MScatter->getMemoryVT();
auto Signed = MScatter->isIndexSigned() ? "signed" : "unsigned";
auto Scaled = MScatter->isIndexScaled() ? "scaled" : "unscaled";
@@ -849,6 +861,12 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
Dbg->print(OS);
} else if (getHasDebugValue())
OS << " [NoOfDbgValues>0]";
+
+ if (const auto *MD = G ? G->getPCSections(this) : nullptr) {
+ OS << " [pcsections ";
+ MD->printAsOperand(OS, G->getMachineFunction().getFunction().getParent());
+ OS << ']';
+ }
}
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 902f46115557..35abd990f968 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -25,13 +25,12 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CodeGenCommonISel.h"
#include "llvm/CodeGen/FastISel.h"
@@ -49,6 +48,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -60,6 +60,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
@@ -67,6 +68,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstIterator.h"
@@ -91,7 +93,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -425,9 +426,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
- CurDAG->init(*MF, *ORE, this, LibInfo,
- getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI,
- FnVarLocs);
+ UniformityInfo *UA = nullptr;
+ if (auto *UAPass = getAnalysisIfAvailable<UniformityInfoWrapperPass>())
+ UA = &UAPass->getUniformityInfo();
+ CurDAG->init(*MF, *ORE, this, LibInfo, UA, PSI, BFI, FnVarLocs);
FuncInfo->set(Fn, *MF, CurDAG);
SwiftError->setFunction(*MF);
@@ -1291,6 +1293,43 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
return true;
}
+// Mark and Report IPToState for each Block under IsEHa
+void SelectionDAGISel::reportIPToStateForBlocks(MachineFunction *MF) {
+ MachineModuleInfo &MMI = MF->getMMI();
+ llvm::WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo();
+ if (!EHInfo)
+ return;
+ for (auto MBBI = MF->begin(), E = MF->end(); MBBI != E; ++MBBI) {
+ MachineBasicBlock *MBB = &*MBBI;
+ const BasicBlock *BB = MBB->getBasicBlock();
+ int State = EHInfo->BlockToStateMap[BB];
+ if (BB->getFirstMayFaultInst()) {
+ // Report IP range only for blocks with Faulty inst
+ auto MBBb = MBB->getFirstNonPHI();
+ MachineInstr *MIb = &*MBBb;
+ if (MIb->isTerminator())
+ continue;
+
+ // Insert EH Labels
+ MCSymbol *BeginLabel = MMI.getContext().createTempSymbol();
+ MCSymbol *EndLabel = MMI.getContext().createTempSymbol();
+ EHInfo->addIPToStateRange(State, BeginLabel, EndLabel);
+ BuildMI(*MBB, MBBb, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::EH_LABEL))
+ .addSym(BeginLabel);
+ auto MBBe = MBB->instr_end();
+ MachineInstr *MIe = &*(--MBBe);
+ // insert before (possible multiple) terminators
+ while (MIe->isTerminator())
+ MIe = &*(--MBBe);
+ ++MBBe;
+ BuildMI(*MBB, MBBe, SDB->getCurDebugLoc(),
+ TII->get(TargetOpcode::EH_LABEL))
+ .addSym(EndLabel);
+ }
+ }
+}
+
/// isFoldedOrDeadInstruction - Return true if the specified instruction is
/// side-effect free and is either dead or folded into a generated instruction.
/// Return false if it needs to be emitted.
@@ -1303,9 +1342,42 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
!FuncInfo.isExportedInst(I); // Exported instrs must be computed.
}
-static void processDbgDeclare(FunctionLoweringInfo &FuncInfo,
+static bool processIfEntryValueDbgDeclare(FunctionLoweringInfo &FuncInfo,
+ const Value *Arg, DIExpression *Expr,
+ DILocalVariable *Var,
+ DebugLoc DbgLoc) {
+ if (!Expr->isEntryValue() || !isa<Argument>(Arg))
+ return false;
+
+ auto ArgIt = FuncInfo.ValueMap.find(Arg);
+ if (ArgIt == FuncInfo.ValueMap.end())
+ return false;
+ Register ArgVReg = ArgIt->getSecond();
+
+ // Find the corresponding livein physical register to this argument.
+ for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins())
+ if (VirtReg == ArgVReg) {
+ FuncInfo.MF->setVariableDbgInfo(Var, Expr, PhysReg, DbgLoc);
+ LLVM_DEBUG(dbgs() << "processDbgDeclare: setVariableDbgInfo Var=" << *Var
+ << ", Expr=" << *Expr << ", MCRegister=" << PhysReg
+ << ", DbgLoc=" << DbgLoc << "\n");
+ return true;
+ }
+ return false;
+}
+
+static bool processDbgDeclare(FunctionLoweringInfo &FuncInfo,
const Value *Address, DIExpression *Expr,
DILocalVariable *Var, DebugLoc DbgLoc) {
+ if (!Address) {
+ LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *Var
+ << " (bad address)\n");
+ return false;
+ }
+
+ if (processIfEntryValueDbgDeclare(FuncInfo, Address, Expr, Var, DbgLoc))
+ return true;
+
MachineFunction *MF = FuncInfo.MF;
const DataLayout &DL = MF->getDataLayout();
@@ -1329,7 +1401,7 @@ static void processDbgDeclare(FunctionLoweringInfo &FuncInfo,
FI = FuncInfo.getArgumentFrameIndex(Arg);
if (FI == std::numeric_limits<int>::max())
- return;
+ return false;
if (Offset.getBoolValue())
Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
@@ -1339,24 +1411,17 @@ static void processDbgDeclare(FunctionLoweringInfo &FuncInfo,
<< ", Expr=" << *Expr << ", FI=" << FI
<< ", DbgLoc=" << DbgLoc << "\n");
MF->setVariableDbgInfo(Var, Expr, FI, DbgLoc);
+ return true;
}
/// Collect llvm.dbg.declare information. This is done after argument lowering
/// in case the declarations refer to arguments.
static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
- for (const BasicBlock &BB : *FuncInfo.Fn) {
- for (const Instruction &I : BB) {
- if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I)) {
- Value *Address = DI->getAddress();
- if (!Address) {
- LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI
- << " (bad address)\n");
- continue;
- }
- processDbgDeclare(FuncInfo, Address, DI->getExpression(),
- DI->getVariable(), DI->getDebugLoc());
- }
- }
+ for (const auto &I : instructions(*FuncInfo.Fn)) {
+ const auto *DI = dyn_cast<DbgDeclareInst>(&I);
+ if (DI && processDbgDeclare(FuncInfo, DI->getAddress(), DI->getExpression(),
+ DI->getVariable(), DI->getDebugLoc()))
+ FuncInfo.PreprocessedDbgDeclares.insert(DI);
}
}
@@ -1367,9 +1432,11 @@ static void processSingleLocVars(FunctionLoweringInfo &FuncInfo,
FunctionVarLocs const *FnVarLocs) {
for (auto It = FnVarLocs->single_locs_begin(),
End = FnVarLocs->single_locs_end();
- It != End; ++It)
- processDbgDeclare(FuncInfo, It->V, It->Expr,
+ It != End; ++It) {
+ assert(!It->Values.hasArgList() && "Single loc variadic ops not supported");
+ processDbgDeclare(FuncInfo, It->Values.getVariableLocationOp(0), It->Expr,
FnVarLocs->getDILocalVariable(It->VariableID), It->DL);
+ }
}
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
@@ -1408,7 +1475,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
Fn.getSubprogram(),
&Fn.getEntryBlock());
R << "FastISel didn't lower all arguments: "
- << ore::NV("Prototype", Fn.getType());
+ << ore::NV("Prototype", Fn.getFunctionType());
reportFastISelFailure(*MF, *ORE, R, EnableFastISelAbort > 1);
// Use SelectionDAG argument lowering
@@ -1646,6 +1713,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
ElidedArgCopyInstrs.clear();
}
+ // AsynchEH: Report Block State under -AsynchEH
+ if (Fn.getParent()->getModuleFlag("eh-asynch"))
+ reportIPToStateForBlocks(MF);
+
SP.copyToMachineFrameInfo(MF->getFrameInfo());
SwiftError->propagateVRegs();
@@ -2273,7 +2344,7 @@ void SelectionDAGISel::Select_STACKMAP(SDNode *N) {
// Stash the chain and glue operands so we can move them to the end.
SDValue Chain = *It++;
- SDValue InFlag = *It++;
+ SDValue InGlue = *It++;
// <id> operand.
SDValue ID = *It++;
@@ -2290,7 +2361,7 @@ void SelectionDAGISel::Select_STACKMAP(SDNode *N) {
pushStackMapLiveVariable(Ops, *It, DL);
Ops.push_back(Chain);
- Ops.push_back(InFlag);
+ Ops.push_back(InGlue);
SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue);
CurDAG->SelectNodeTo(N, TargetOpcode::STACKMAP, NodeTys, Ops);
@@ -3240,7 +3311,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
if (CaseSize == 0) break;
// Otherwise, execute the case we found.
- LLVM_DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+ LLVM_DEBUG(dbgs() << " TypeSwitch[" << CurNodeVT
<< "] from " << SwitchStart << " to " << MatcherIndex
<< '\n');
continue;
diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 57bfe344dbab..5afd05648772 100644
--- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -42,7 +43,6 @@
#include "llvm/IR/Type.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include <cassert>
@@ -258,8 +258,7 @@ static bool willLowerDirectly(SDValue Incoming) {
if (Incoming.getValueType().getSizeInBits() > 64)
return false;
- return (isa<ConstantSDNode>(Incoming) || isa<ConstantFPSDNode>(Incoming) ||
- Incoming.isUndef());
+ return isIntOrFPConstant(Incoming) || Incoming.isUndef();
}
/// Try to find existing copies of the incoming values in stack slots used for
@@ -490,7 +489,7 @@ lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot,
Ops.push_back(std::get<0>(Res));
if (auto *MMO = std::get<2>(Res))
MemRefs.push_back(MMO);
- Chain = std::get<1>(Res);;
+ Chain = std::get<1>(Res);
Builder.DAG.setRoot(Chain);
}
@@ -1250,7 +1249,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
// All the reloads are independent and are reading memory only modified by
// statepoints (i.e. no other aliasing stores); informing SelectionDAG of
- // this this let's CSE kick in for free and allows reordering of
+ // this lets CSE kick in for free and allows reordering of
// instructions if possible. The lowering for statepoint sets the root,
// so this is ordering all reloads with the either
// a) the statepoint node itself, or
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8d4c8802f71c..a84d35a6ea4e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -504,6 +504,11 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
+ // Early-out if we've ended up calling an undemanded node, leave this to
+ // constant folding.
+ if (DemandedBits.isZero() || DemandedElts.isZero())
+ return false;
+
// Do target-specific constant optimization.
if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return TLO.New.getNode();
@@ -552,18 +557,19 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
/// generalized for targets with other types of implicit widening casts.
bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
- const APInt &Demanded,
+ const APInt &DemandedBits,
TargetLoweringOpt &TLO) const {
assert(Op.getNumOperands() == 2 &&
"ShrinkDemandedOp only supports binary operators!");
assert(Op.getNode()->getNumValues() == 1 &&
"ShrinkDemandedOp only supports nodes with one result!");
+ EVT VT = Op.getValueType();
SelectionDAG &DAG = TLO.DAG;
SDLoc dl(Op);
// Early return, as this function cannot handle vector types.
- if (Op.getValueType().isVector())
+ if (VT.isVector())
return false;
// Don't do this if the node has another user, which may require the
@@ -574,21 +580,18 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
// Search for the smallest integer type with free casts to and from
// Op's type. For expedience, just check power-of-2 integer types.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned DemandedSize = Demanded.getActiveBits();
- unsigned SmallVTBits = DemandedSize;
- if (!isPowerOf2_32(SmallVTBits))
- SmallVTBits = NextPowerOf2(SmallVTBits);
- for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
+ unsigned DemandedSize = DemandedBits.getActiveBits();
+ for (unsigned SmallVTBits = llvm::bit_ceil(DemandedSize);
+ SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
- if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
- TLI.isZExtFree(SmallVT, Op.getValueType())) {
+ if (TLI.isTruncateFree(VT, SmallVT) && TLI.isZExtFree(SmallVT, VT)) {
// We found a type with free casts.
SDValue X = DAG.getNode(
Op.getOpcode(), dl, SmallVT,
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
- SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
+ SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, VT, X);
return TLO.CombineTo(Op, Z);
}
}
@@ -773,7 +776,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
unsigned ShAmt = MaxSA->getZExtValue();
unsigned NumSignBits =
DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
- unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
return Op0;
}
@@ -805,7 +808,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
unsigned ExBits = ExVT.getScalarSizeInBits();
- if (DemandedBits.getActiveBits() <= ExBits)
+ if (DemandedBits.getActiveBits() <= ExBits &&
+ shouldRemoveRedundantExtend(Op))
return Op0;
// If the input is already sign extended, just drop the extension.
unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
@@ -856,15 +860,6 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
// If we don't demand the inserted subvector, return the base vector.
if (DemandedSubElts == 0)
return Vec;
- // If this simply widens the lowest subvector, see if we can do it earlier.
- // TODO: REMOVE ME - SimplifyMultipleUseDemandedBits shouldn't be creating
- // general nodes like this.
- if (Idx == 0 && Vec.isUndef()) {
- if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
- Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- Op.getOperand(0), NewSub, Op.getOperand(2));
- }
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -954,33 +949,30 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
SDValue ExtOpA = Add.getOperand(0);
SDValue ExtOpB = Add.getOperand(1);
- auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) {
+ SDValue Add2;
+ auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3, SDValue A) {
ConstantSDNode *ConstOp;
- if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) &&
- ConstOp->isOne()) {
- ExtOpA = Op2;
- ExtOpB = Op3;
- return true;
- }
if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) &&
ConstOp->isOne()) {
ExtOpA = Op1;
ExtOpB = Op3;
+ Add2 = A;
return true;
}
if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) &&
ConstOp->isOne()) {
ExtOpA = Op1;
ExtOpB = Op2;
+ Add2 = A;
return true;
}
return false;
};
bool IsCeil =
(ExtOpA.getOpcode() == ISD::ADD &&
- MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) ||
+ MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB, ExtOpA)) ||
(ExtOpB.getOpcode() == ISD::ADD &&
- MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA));
+ MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA, ExtOpB));
// If the shift is signed (sra):
// - Needs >= 2 sign bit for both operands.
@@ -1040,11 +1032,25 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG,
EVT VT = Op.getValueType();
unsigned MinWidth =
std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8);
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth));
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), llvm::bit_ceil(MinWidth));
if (VT.isVector())
NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount());
- if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT))
- return SDValue();
+ if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) {
+ // If we could not transform, and (both) adds are nuw/nsw, we can use the
+ // larger type size to do the transform.
+ if (!TLI.isOperationLegalOrCustom(AVGOpc, VT))
+ return SDValue();
+
+ if (DAG.computeOverflowForAdd(IsSigned, Add.getOperand(0),
+ Add.getOperand(1)) ==
+ SelectionDAG::OFK_Never &&
+ (!Add2 || DAG.computeOverflowForAdd(IsSigned, Add2.getOperand(0),
+ Add2.getOperand(1)) ==
+ SelectionDAG::OFK_Never))
+ NVT = VT;
+ else
+ return SDValue();
+ }
SDLoc DL(Op);
SDValue ResultAVG =
@@ -1198,7 +1204,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
if (!!DemandedVecElts)
- Known = KnownBits::commonBits(Known, KnownVec);
+ Known = Known.intersectWith(KnownVec);
return false;
}
@@ -1226,9 +1232,9 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero.setAllBits();
Known.One.setAllBits();
if (!!DemandedSubElts)
- Known = KnownBits::commonBits(Known, KnownSub);
+ Known = Known.intersectWith(KnownSub);
if (!!DemandedSrcElts)
- Known = KnownBits::commonBits(Known, KnownSrc);
+ Known = Known.intersectWith(KnownSrc);
// Attempt to avoid multi-use src if we don't need anything from it.
if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
@@ -1290,7 +1296,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Known bits are shared by every demanded subvector element.
if (!!DemandedSubElts)
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
break;
}
@@ -1314,13 +1320,13 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
Depth + 1))
return true;
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
if (!!DemandedRHS) {
if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
Depth + 1))
return true;
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
// Attempt to avoid multi-use ops if we don't need anything from them.
@@ -1622,7 +1628,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
case ISD::VSELECT:
if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts,
@@ -1635,7 +1641,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
case ISD::SELECT_CC:
if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO,
@@ -1652,7 +1658,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
case ISD::SETCC: {
SDValue Op0 = Op.getOperand(0);
@@ -1724,12 +1730,9 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned InnerBits = InnerVT.getScalarSizeInBits();
if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
- EVT ShTy = getShiftAmountTy(InnerVT, DL);
- if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
- ShTy = InnerVT;
- SDValue NarrowShl =
- TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
- TLO.DAG.getConstant(ShAmt, dl, ShTy));
+ SDValue NarrowShl = TLO.DAG.getNode(
+ ISD::SHL, dl, InnerVT, InnerOp,
+ TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl));
return TLO.CombineTo(
Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
}
@@ -1748,7 +1751,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (InnerShAmt < ShAmt && InnerShAmt < InnerBits &&
DemandedBits.getActiveBits() <=
(InnerBits - InnerShAmt + ShAmt) &&
- DemandedBits.countTrailingZeros() >= ShAmt) {
+ DemandedBits.countr_zero() >= ShAmt) {
SDValue NewSA =
TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT);
SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
@@ -1771,7 +1774,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero.setLowBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
@@ -1789,7 +1792,7 @@ bool TargetLowering::SimplifyDemandedBits(
// This is a variable shift, so we can't shift the demand mask by a known
// amount. But if we are not demanding high bits, then we are not
// demanding those bits from the pre-shifted operand either.
- if (unsigned CTLZ = DemandedBits.countLeadingZeros()) {
+ if (unsigned CTLZ = DemandedBits.countl_zero()) {
APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO,
Depth + 1)) {
@@ -1814,7 +1817,7 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned ShAmt = MaxSA->getZExtValue();
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
- unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits))
return TLO.CombineTo(Op, Op0);
}
@@ -1865,6 +1868,27 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op->getFlags().hasExact())
InDemandedMask.setLowBits(ShAmt);
+ // Narrow shift to lower half - similar to ShrinkDemandedOp.
+ // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K))
+ if ((BitWidth % 2) == 0 && !VT.isVector() &&
+ ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) ||
+ TLO.DAG.MaskedValueIsZero(
+ Op0, APInt::getHighBitsSet(BitWidth, BitWidth / 2)))) {
+ EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2);
+ if (isNarrowingProfitable(VT, HalfVT) &&
+ isTypeDesirableForOp(ISD::SRL, HalfVT) &&
+ isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) &&
+ (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) {
+ SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0);
+ SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant(
+ ShAmt, HalfVT, dl, TLO.LegalTypes());
+ SDValue NewShift =
+ TLO.DAG.getNode(ISD::SRL, dl, HalfVT, NewOp, NewShiftAmt);
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift));
+ }
+ }
+
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
Depth + 1))
@@ -1876,7 +1900,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known.Zero.setHighBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
@@ -1884,6 +1908,10 @@ bool TargetLowering::SimplifyDemandedBits(
return TLO.CombineTo(Op, NewOp);
}
}
+ } else {
+ // Use generic knownbits computation as it has support for non-uniform
+ // shift amounts.
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
}
break;
}
@@ -1894,7 +1922,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only want bits that already match the signbit then we don't need
// to shift.
- unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ unsigned NumHiDemandedBits = BitWidth - DemandedBits.countr_zero();
if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >=
NumHiDemandedBits)
return TLO.CombineTo(Op, Op0);
@@ -1926,7 +1954,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
- if (DemandedBits.countLeadingZeros() < ShAmt)
+ if (DemandedBits.countl_zero() < ShAmt)
InDemandedMask.setSignBit();
if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
@@ -1939,7 +1967,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (Known.Zero[BitWidth - ShAmt - 1] ||
- DemandedBits.countLeadingZeros() >= ShAmt) {
+ DemandedBits.countl_zero() >= ShAmt) {
SDNodeFlags Flags;
Flags.setExact(Op->getFlags().hasExact());
return TLO.CombineTo(
@@ -2003,8 +2031,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
- Known.One |= Known2.One;
- Known.Zero |= Known2.Zero;
+ Known = Known.unionWith(Known2);
// Attempt to avoid multi-use ops if we don't need anything from them.
if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() ||
@@ -2059,12 +2086,12 @@ bool TargetLowering::SimplifyDemandedBits(
// See if we don't demand either half of the rotated bits.
if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) &&
- DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) {
+ DemandedBits.countr_zero() >= (IsROTL ? Amt : RevAmt)) {
Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType());
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1));
}
if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) &&
- DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) {
+ DemandedBits.countl_zero() >= (IsROTL ? RevAmt : Amt)) {
Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType());
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
}
@@ -2120,8 +2147,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If the only bits demanded come from one byte of the bswap result,
// just shift the input byte into position to eliminate the bswap.
- unsigned NLZ = DemandedBits.countLeadingZeros();
- unsigned NTZ = DemandedBits.countTrailingZeros();
+ unsigned NLZ = DemandedBits.countl_zero();
+ unsigned NTZ = DemandedBits.countr_zero();
// Round NTZ down to the next byte. If we have 11 trailing zeros, then
// we need all the bits down to bit 8. Likewise, round NLZ. If we
@@ -2557,6 +2584,15 @@ bool TargetLowering::SimplifyDemandedBits(
if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
KnownSrcBits, TLO, Depth + 1))
return true;
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
+ if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
+ SDValue NewOp = TLO.DAG.getBitcast(VT, DemandedSrc);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
}
// If this is a bitcast, let computeKnownBits handle it. Only do this on a
@@ -2572,9 +2608,9 @@ bool TargetLowering::SimplifyDemandedBits(
// The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
// If we demand exactly one bit N and we have "X * (C' << N)" where C' is
// odd (has LSB set), then the left-shifted low bit of X is the answer.
- unsigned CTZ = DemandedBits.countTrailingZeros();
+ unsigned CTZ = DemandedBits.countr_zero();
ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts);
- if (C && C->getAPIntValue().countTrailingZeros() == CTZ) {
+ if (C && C->getAPIntValue().countr_zero() == CTZ) {
EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout());
SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy);
SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC);
@@ -2596,11 +2632,12 @@ bool TargetLowering::SimplifyDemandedBits(
// of the highest bit demanded of them.
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
SDNodeFlags Flags = Op.getNode()->getFlags();
- unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
+ unsigned DemandedBitsLZ = DemandedBits.countl_zero();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
- if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
+ KnownBits KnownOp0, KnownOp1;
+ if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, KnownOp0, TLO,
Depth + 1) ||
- SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
+ SimplifyDemandedBits(Op1, LoMask, DemandedElts, KnownOp1, TLO,
Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
@@ -2697,7 +2734,14 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
- [[fallthrough]];
+ if (Op.getOpcode() == ISD::MUL) {
+ Known = KnownBits::mul(KnownOp0, KnownOp1);
+ } else { // Op.getOpcode() is either ISD::ADD or ISD::SUB.
+ Known = KnownBits::computeForAddSub(Op.getOpcode() == ISD::ADD,
+ Flags.hasNoSignedWrap(), KnownOp0,
+ KnownOp1);
+ }
+ break;
}
default:
// We also ask the target about intrinsics (which could be specific to it).
@@ -3914,8 +3958,7 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL) const {
- assert(isConstOrConstSplat(N1C) &&
- isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
+ assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() &&
"Should be a comparison with 0.");
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Valid only for [in]equality comparisons.");
@@ -4081,8 +4124,12 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
- SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
+ // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
+ // check before the emit a potentially unnecessary op.
+ if (DAG.isKnownNeverZero(CTOp))
+ return RHS;
+ SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
}
@@ -4219,12 +4266,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
bool N1ConstOrSplat =
isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
- // Ensure that the constant occurs on the RHS and fold constant comparisons.
+ // Canonicalize toward having the constant on the RHS.
// TODO: Handle non-splat vector constants. All undef causes trouble.
// FIXME: We can't yet fold constant scalable vector splats, so avoid an
// infinite loop here when we encounter one.
ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
- if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
+ if (N0ConstOrSplat && !N1ConstOrSplat &&
(DCI.isBeforeLegalizeOps() ||
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
@@ -4275,7 +4322,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// zero.
if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
- isPowerOf2_32(N0.getScalarValueSizeInBits())) {
+ llvm::has_single_bit<uint32_t>(N0.getScalarValueSizeInBits())) {
if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) {
@@ -4315,7 +4362,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// DAGCombine turns costly ZExts into ANDs
if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
if ((C->getAPIntValue()+1).isPowerOf2()) {
- MinBits = C->getAPIntValue().countTrailingOnes();
+ MinBits = C->getAPIntValue().countr_one();
PreExt = N0->getOperand(0);
}
} else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
@@ -4336,7 +4383,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
// Figure out how many bits we need to preserve this constant.
- unsigned ReqdBits = Signed ? C1.getMinSignedBits() : C1.getActiveBits();
+ unsigned ReqdBits = Signed ? C1.getSignificantBits() : C1.getActiveBits();
// Make sure we're not losing bits from the constant.
if (MinBits > 0 &&
@@ -4510,7 +4557,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If the constant doesn't fit into the number of bits for the source of
// the sign extension, it is impossible for both sides to be equal.
- if (C1.getMinSignedBits() > ExtSrcTyBits)
+ if (C1.getSignificantBits() > ExtSrcTyBits)
return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT);
assert(ExtDstTy == N0.getOperand(0).getValueType() &&
@@ -4744,8 +4791,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// For example, when high 32-bits of i64 X are known clear:
// all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
// all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
- bool CmpZero = N1C->getAPIntValue().isZero();
- bool CmpNegOne = N1C->getAPIntValue().isAllOnes();
+ bool CmpZero = N1C->isZero();
+ bool CmpNegOne = N1C->isAllOnes();
if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
// Match or(lo,shl(hi,bw/2)) pattern.
auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
@@ -4866,7 +4913,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
- if (C1.getMinSignedBits() <= 64 &&
+ if (C1.getSignificantBits() <= 64 &&
!isLegalICmpImmediate(C1.getSExtValue())) {
EVT ShiftTy = getShiftAmountTy(ShValTy, Layout, !DCI.isBeforeLegalize());
// (X & -256) == 256 -> (X >> 8) == 1
@@ -4875,7 +4922,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
const APInt &AndRHSC = AndRHS->getAPIntValue();
if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
- unsigned ShiftBits = AndRHSC.countTrailingZeros();
+ unsigned ShiftBits = AndRHSC.countr_zero();
if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
SDValue Shift =
DAG.getNode(ISD::SRL, dl, ShValTy, N0.getOperand(0),
@@ -4896,14 +4943,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
APInt NewC = C1;
ISD::CondCode NewCond = Cond;
if (AdjOne) {
- ShiftBits = C1.countTrailingOnes();
+ ShiftBits = C1.countr_one();
NewC = NewC + 1;
NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
} else {
- ShiftBits = C1.countTrailingZeros();
+ ShiftBits = C1.countr_zero();
}
NewC.lshrInPlace(ShiftBits);
- if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
+ if (ShiftBits && NewC.getSignificantBits() <= 64 &&
isLegalICmpImmediate(NewC.getSExtValue()) &&
!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
SDValue Shift = DAG.getNode(ISD::SRL, dl, ShValTy, N0,
@@ -4980,6 +5027,23 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
+ // ~X > ~Y --> Y > X
+ // ~X < ~Y --> Y < X
+ // ~X < C --> X > ~C
+ // ~X > C --> X < ~C
+ if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) &&
+ N0.getValueType().isInteger()) {
+ if (isBitwiseNot(N0)) {
+ if (isBitwiseNot(N1))
+ return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ SDValue Not = DAG.getNOT(dl, N1, OpVT);
+ return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond);
+ }
+ }
+ }
+
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
N0.getValueType().isInteger()) {
if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
@@ -5225,7 +5289,7 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
}
SDValue TargetLowering::LowerAsmOutputForConstraint(
- SDValue &Chain, SDValue &Flag, const SDLoc &DL,
+ SDValue &Chain, SDValue &Glue, const SDLoc &DL,
const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
return SDValue();
}
@@ -5308,10 +5372,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
}
-void TargetLowering::CollectTargetIntrinsicOperands(const CallInst &I,
- SmallVectorImpl<SDValue> &Ops,
- SelectionDAG &DAG) const {
- return;
+void TargetLowering::CollectTargetIntrinsicOperands(
+ const CallInst &I, SmallVectorImpl<SDValue> &Ops, SelectionDAG &DAG) const {
}
std::pair<unsigned, const TargetRegisterClass *>
@@ -5782,7 +5844,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
if (C->isZero())
return false;
APInt Divisor = C->getAPIntValue();
- unsigned Shift = Divisor.countTrailingZeros();
+ unsigned Shift = Divisor.countr_zero();
if (Shift) {
Divisor.ashrInPlace(Shift);
UseSRA = true;
@@ -5972,6 +6034,20 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
return SDValue(LoHi.getNode(), 1);
}
+ // If type twice as wide legal, widen and use a mul plus a shift.
+ unsigned Size = VT.getScalarSizeInBits();
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
+ if (VT.isVector())
+ WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+ VT.getVectorElementCount());
+ if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
+ X = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, X);
+ Y = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, Y);
+ Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
+ Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
+ DAG.getShiftAmountConstant(EltBits, WideVT, dl));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
+ }
return SDValue();
};
@@ -6045,9 +6121,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
// UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
// the dividend exceeds the leading zeros for the divisor.
- LeadingZeros =
- std::min(LeadingZeros,
- cast<ConstantSDNode>(N1)->getAPIntValue().countLeadingZeros());
+ LeadingZeros = std::min(
+ LeadingZeros, cast<ConstantSDNode>(N1)->getAPIntValue().countl_zero());
}
bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
@@ -6146,6 +6221,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
return SDValue(LoHi.getNode(), 1);
}
+ // If type twice as wide legal, widen and use a mul plus a shift.
+ unsigned Size = VT.getScalarSizeInBits();
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), Size * 2);
+ if (VT.isVector())
+ WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+ VT.getVectorElementCount());
+ if (isOperationLegalOrCustom(ISD::MUL, WideVT)) {
+ X = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, X);
+ Y = DAG.getNode(ISD::ZERO_EXTEND, dl, WideVT, Y);
+ Y = DAG.getNode(ISD::MUL, dl, WideVT, X, Y);
+ Y = DAG.getNode(ISD::SRL, dl, WideVT, Y,
+ DAG.getShiftAmountConstant(EltBits, WideVT, dl));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Y);
+ }
return SDValue(); // No mulhu or equivalent
};
@@ -6298,7 +6387,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
// Decompose D into D0 * 2^K
- unsigned K = D.countTrailingZeros();
+ unsigned K = D.countr_zero();
assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
@@ -6540,7 +6629,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
AllDivisorsAreOnes &= D.isOne();
// Decompose D into D0 * 2^K
- unsigned K = D.countTrailingZeros();
+ unsigned K = D.countr_zero();
assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
@@ -6696,9 +6785,9 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// NOTE: we avoid letting illegal types through even if we're before legalize
// ops – legalization has a hard time producing good code for the code that
// follows.
- if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
+ if (!isOperationLegalOrCustom(ISD::SETCC, SETCCVT) ||
!isOperationLegalOrCustom(ISD::AND, VT) ||
- !isOperationLegalOrCustom(Cond, VT) ||
+ !isCondCodeLegalOrCustom(Cond, VT.getSimpleVT()) ||
!isOperationLegalOrCustom(ISD::VSELECT, SETCCVT))
return SDValue();
@@ -6748,20 +6837,23 @@ SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
EVT VT = Op.getValueType();
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
- // Testing it with denormal inputs to avoid wrong estimate.
- if (Mode.Input == DenormalMode::IEEE) {
- // This is specifically a check for the handling of denormal inputs,
- // not the result.
- // Test = fabs(X) < SmallestNormal
- const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
- APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
- SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
- SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
- return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
+ // This is specifically a check for the handling of denormal inputs, not the
+ // result.
+ if (Mode.Input == DenormalMode::PreserveSign ||
+ Mode.Input == DenormalMode::PositiveZero) {
+ // Test = X == 0.0
+ return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
}
- // Test = X == 0.0
- return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+
+ // Testing it with denormal inputs to avoid wrong estimate.
+ //
+ // Test = fabs(X) < SmallestNormal
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+ APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
+ SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
+ SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
+ return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
}
SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
@@ -6769,7 +6861,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
NegatibleCost &Cost,
unsigned Depth) const {
// fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG) {
+ if (Op.getOpcode() == ISD::FNEG || Op.getOpcode() == ISD::VP_FNEG) {
Cost = NegatibleCost::Cheaper;
return Op.getOperand(0);
}
@@ -7212,7 +7304,7 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
Merge(Lo, Hi));
else
- Next = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(VT, BoolType), Next,
+ Next = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(VT, BoolType), Next,
Merge(Lo, Hi), DAG.getConstant(0, dl, BoolType));
SDValue Carry = Next.getValue(1);
@@ -7226,7 +7318,7 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl,
Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
Carry);
else
- Hi = DAG.getNode(ISD::ADDCARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
+ Hi = DAG.getNode(ISD::UADDO_CARRY, dl, DAG.getVTList(HiLoVT, BoolType), Hi,
Zero, Carry);
Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
@@ -7330,7 +7422,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
// If the divisor is even, shift it until it becomes odd.
unsigned TrailingZeros = 0;
if (!Divisor[0]) {
- TrailingZeros = Divisor.countTrailingZeros();
+ TrailingZeros = Divisor.countr_zero();
Divisor.lshrInPlace(TrailingZeros);
}
@@ -7342,14 +7434,10 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
// then add in the carry.
// TODO: If we can't split it in half, we might be able to split into 3 or
// more pieces using a smaller bit width.
- if (HalfMaxPlus1.urem(Divisor).isOneValue()) {
+ if (HalfMaxPlus1.urem(Divisor).isOne()) {
assert(!LL == !LH && "Expected both input halves or no input halves!");
- if (!LL) {
- LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
- DAG.getIntPtrConstant(0, dl));
- LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0),
- DAG.getIntPtrConstant(1, dl));
- }
+ if (!LL)
+ std::tie(LL, LH) = DAG.SplitScalar(N->getOperand(0), dl, HiLoVT, HiLoVT);
// Shift the input by the number of TrailingZeros in the divisor. The
// shifted out bits will be added to the remainder later.
@@ -7372,13 +7460,13 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl));
}
- // Use addcarry if we can, otherwise use a compare to detect overflow.
+ // Use uaddo_carry if we can, otherwise use a compare to detect overflow.
EVT SetCCType =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT);
- if (isOperationLegalOrCustom(ISD::ADDCARRY, HiLoVT)) {
+ if (isOperationLegalOrCustom(ISD::UADDO_CARRY, HiLoVT)) {
SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType);
Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH);
- Sum = DAG.getNode(ISD::ADDCARRY, dl, VTList, Sum,
+ Sum = DAG.getNode(ISD::UADDO_CARRY, dl, VTList, Sum,
DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1));
} else {
Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH);
@@ -7423,10 +7511,8 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N,
DAG.getConstant(MulFactor, dl, VT));
// Split the quotient into low and high parts.
- SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
- DAG.getIntPtrConstant(0, dl));
- SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient,
- DAG.getIntPtrConstant(1, dl));
+ SDValue QuotL, QuotH;
+ std::tie(QuotL, QuotH) = DAG.SplitScalar(Quotient, dl, HiLoVT, HiLoVT);
Result.push_back(QuotL);
Result.push_back(QuotH);
}
@@ -7915,7 +8001,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
// -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
- BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT);
+ llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT);
SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT);
SDValue HiShift = DAG.getConstant(32, dl, ShiftVT);
@@ -7988,14 +8074,19 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
}
// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
- // instead if there are no NaNs.
- if (Node->getFlags().hasNoNaNs()) {
+ // instead if there are no NaNs and there can't be an incompatible zero
+ // compare: at least one operand isn't +/-0, or there are no signed-zeros.
+ if ((Node->getFlags().hasNoNaNs() ||
+ (DAG.isKnownNeverNaN(Node->getOperand(0)) &&
+ DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
+ (Node->getFlags().hasNoSignedZeros() ||
+ DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
+ DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {
unsigned IEEE2018Op =
Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
- if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
+ if (isOperationLegalOrCustom(IEEE2018Op, VT))
return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
Node->getOperand(1), Node->getFlags());
- }
}
if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG))
@@ -8004,15 +8095,39 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
+/// Returns a true value if if this FPClassTest can be performed with an ordered
+/// fcmp to 0, and a false value if it's an unordered fcmp to 0. Returns
+/// std::nullopt if it cannot be performed as a compare with 0.
+static std::optional<bool> isFCmpEqualZero(FPClassTest Test,
+ const fltSemantics &Semantics,
+ const MachineFunction &MF) {
+ FPClassTest OrderedMask = Test & ~fcNan;
+ FPClassTest NanTest = Test & fcNan;
+ bool IsOrdered = NanTest == fcNone;
+ bool IsUnordered = NanTest == fcNan;
+
+ // Skip cases that are testing for only a qnan or snan.
+ if (!IsOrdered && !IsUnordered)
+ return std::nullopt;
+
+ if (OrderedMask == fcZero &&
+ MF.getDenormalMode(Semantics).Input == DenormalMode::IEEE)
+ return IsOrdered;
+ if (OrderedMask == (fcZero | fcSubnormal) &&
+ MF.getDenormalMode(Semantics).inputsAreZero())
+ return IsOrdered;
+ return std::nullopt;
+}
+
SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
- unsigned Test, SDNodeFlags Flags,
+ FPClassTest Test, SDNodeFlags Flags,
const SDLoc &DL,
SelectionDAG &DAG) const {
EVT OperandVT = Op.getValueType();
assert(OperandVT.isFloatingPoint());
// Degenerated cases.
- if (Test == 0)
+ if (Test == fcNone)
return DAG.getBoolConstant(false, DL, ResultVT, OperandVT);
if ((Test & fcAllFlags) == fcAllFlags)
return DAG.getBoolConstant(true, DL, ResultVT, OperandVT);
@@ -8028,7 +8143,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
// Some checks may be represented as inversion of simpler check, for example
// "inf|normal|subnormal|zero" => !"nan".
bool IsInverted = false;
- if (unsigned InvertedCheck = getInvertedFPClassTest(Test)) {
+ if (FPClassTest InvertedCheck = invertFPClassTestIfSimpler(Test)) {
IsInverted = true;
Test = InvertedCheck;
}
@@ -8043,13 +8158,40 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
// exceptions are ignored.
if (Flags.hasNoFPExcept() &&
isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) {
- if (Test == fcZero)
+ ISD::CondCode OrderedCmpOpcode = IsInverted ? ISD::SETUNE : ISD::SETOEQ;
+ ISD::CondCode UnorderedCmpOpcode = IsInverted ? ISD::SETONE : ISD::SETUEQ;
+
+ if (std::optional<bool> IsCmp0 =
+ isFCmpEqualZero(Test, Semantics, DAG.getMachineFunction());
+ IsCmp0 && (isCondCodeLegalOrCustom(
+ *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode,
+ OperandVT.getScalarType().getSimpleVT()))) {
+
+ // If denormals could be implicitly treated as 0, this is not equivalent
+ // to a compare with 0 since it will also be true for denormals.
return DAG.getSetCC(DL, ResultVT, Op,
DAG.getConstantFP(0.0, DL, OperandVT),
- IsInverted ? ISD::SETUNE : ISD::SETOEQ);
- if (Test == fcNan)
+ *IsCmp0 ? OrderedCmpOpcode : UnorderedCmpOpcode);
+ }
+
+ if (Test == fcNan &&
+ isCondCodeLegalOrCustom(IsInverted ? ISD::SETO : ISD::SETUO,
+ OperandVT.getScalarType().getSimpleVT())) {
return DAG.getSetCC(DL, ResultVT, Op, Op,
IsInverted ? ISD::SETO : ISD::SETUO);
+ }
+
+ if (Test == fcInf &&
+ isCondCodeLegalOrCustom(IsInverted ? ISD::SETUNE : ISD::SETOEQ,
+ OperandVT.getScalarType().getSimpleVT()) &&
+ isOperationLegalOrCustom(ISD::FABS, OperandVT.getScalarType())) {
+ // isinf(x) --> fabs(x) == inf
+ SDValue Abs = DAG.getNode(ISD::FABS, DL, OperandVT, Op);
+ SDValue Inf =
+ DAG.getConstantFP(APFloat::getInf(Semantics), DL, OperandVT);
+ return DAG.getSetCC(DL, ResultVT, Abs, Inf,
+ IsInverted ? ISD::SETUNE : ISD::SETOEQ);
+ }
}
// In the general case use integer operations.
@@ -8071,7 +8213,7 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
APInt QNaNBitMask =
APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
- APInt InvertionMask = APInt::getAllOnesValue(ResultVT.getScalarSizeInBits());
+ APInt InvertionMask = APInt::getAllOnes(ResultVT.getScalarSizeInBits());
SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT);
SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT);
@@ -8129,6 +8271,18 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
}
appendResult(PartialRes);
+ if (FPClassTest PartialCheck = Test & (fcZero | fcSubnormal)) {
+ // fcZero | fcSubnormal => test all exponent bits are 0
+ // TODO: Handle sign bit specific cases
+ if (PartialCheck == (fcZero | fcSubnormal)) {
+ SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ExpMaskV);
+ SDValue ExpIsZero =
+ DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ);
+ appendResult(ExpIsZero);
+ Test &= ~PartialCheck & fcAllFlags;
+ }
+ }
+
// Check for individual classes.
if (unsigned PartialCheck = Test & fcZero) {
@@ -8141,6 +8295,19 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
appendResult(PartialRes);
}
+ if (unsigned PartialCheck = Test & fcSubnormal) {
+ // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
+ // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
+ SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
+ SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
+ SDValue VMinusOneV =
+ DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
+ PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
+ if (PartialCheck == fcNegSubnormal)
+ PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
+ appendResult(PartialRes);
+ }
+
if (unsigned PartialCheck = Test & fcInf) {
if (PartialCheck == fcPosInf)
PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ);
@@ -8185,19 +8352,6 @@ SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op,
appendResult(PartialRes);
}
- if (unsigned PartialCheck = Test & fcSubnormal) {
- // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set)
- // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set)
- SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV;
- SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT);
- SDValue VMinusOneV =
- DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT));
- PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT);
- if (PartialCheck == fcNegSubnormal)
- PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV);
- appendResult(PartialRes);
- }
-
if (unsigned PartialCheck = Test & fcNormal) {
// isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1))
APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
@@ -8609,6 +8763,38 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
}
+SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
+ SDLoc dl(N);
+ EVT VT = N->getValueType(0);
+ SDValue LHS = DAG.getFreeze(N->getOperand(0));
+ SDValue RHS = DAG.getFreeze(N->getOperand(1));
+ bool IsSigned = N->getOpcode() == ISD::ABDS;
+
+ // abds(lhs, rhs) -> sub(smax(lhs,rhs), smin(lhs,rhs))
+ // abdu(lhs, rhs) -> sub(umax(lhs,rhs), umin(lhs,rhs))
+ unsigned MaxOpc = IsSigned ? ISD::SMAX : ISD::UMAX;
+ unsigned MinOpc = IsSigned ? ISD::SMIN : ISD::UMIN;
+ if (isOperationLegal(MaxOpc, VT) && isOperationLegal(MinOpc, VT)) {
+ SDValue Max = DAG.getNode(MaxOpc, dl, VT, LHS, RHS);
+ SDValue Min = DAG.getNode(MinOpc, dl, VT, LHS, RHS);
+ return DAG.getNode(ISD::SUB, dl, VT, Max, Min);
+ }
+
+ // abdu(lhs, rhs) -> or(usubsat(lhs,rhs), usubsat(rhs,lhs))
+ if (!IsSigned && isOperationLegal(ISD::USUBSAT, VT))
+ return DAG.getNode(ISD::OR, dl, VT,
+ DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
+ DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
+
+ // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
+ // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
+ EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
+ SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
+ return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
+ DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
+}
+
SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
@@ -8796,8 +8982,7 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
Tmp2 =
DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT));
- APInt Shift(Sz, 1);
- Shift <<= J;
+ APInt Shift = APInt::getOneBitSet(Sz, J);
Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2);
}
@@ -9494,10 +9679,21 @@ SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
EVT VT = Op0.getValueType();
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
unsigned Opcode = Node->getOpcode();
SDLoc DL(Node);
+ // umax(x,1) --> sub(x,cmpeq(x,0)) iff cmp result is allbits
+ if (Opcode == ISD::UMAX && llvm::isOneOrOneSplat(Op1, true) && BoolVT == VT &&
+ getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ Op0 = DAG.getFreeze(Op0);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::SUB, DL, VT, Op0,
+ DAG.getSetCC(DL, VT, Op0, Zero, ISD::SETEQ));
+ }
+
// umin(x,y) -> sub(x,usubsat(x,y))
+ // TODO: Missing freeze(Op0)?
if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::USUBSAT, VT)) {
return DAG.getNode(ISD::SUB, DL, VT, Op0,
@@ -9505,30 +9701,59 @@ SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const {
}
// umax(x,y) -> add(x,usubsat(y,x))
+ // TODO: Missing freeze(Op0)?
if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) &&
isOperationLegal(ISD::USUBSAT, VT)) {
return DAG.getNode(ISD::ADD, DL, VT, Op0,
DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0));
}
- // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
- ISD::CondCode CC;
- switch (Opcode) {
- default: llvm_unreachable("How did we get here?");
- case ISD::SMAX: CC = ISD::SETGT; break;
- case ISD::SMIN: CC = ISD::SETLT; break;
- case ISD::UMAX: CC = ISD::SETUGT; break;
- case ISD::UMIN: CC = ISD::SETULT; break;
- }
-
// FIXME: Should really try to split the vector in case it's legal on a
// subvector.
if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
return DAG.UnrollVectorOp(Node);
- EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
- SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
- return DAG.getSelect(DL, VT, Cond, Op0, Op1);
+ // Attempt to find an existing SETCC node that we can reuse.
+ // TODO: Do we need a generic doesSETCCNodeExist?
+ // TODO: Missing freeze(Op0)/freeze(Op1)?
+ auto buildMinMax = [&](ISD::CondCode PrefCC, ISD::CondCode AltCC,
+ ISD::CondCode PrefCommuteCC,
+ ISD::CondCode AltCommuteCC) {
+ SDVTList BoolVTList = DAG.getVTList(BoolVT);
+ for (ISD::CondCode CC : {PrefCC, AltCC}) {
+ if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
+ {Op0, Op1, DAG.getCondCode(CC)})) {
+ SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
+ return DAG.getSelect(DL, VT, Cond, Op0, Op1);
+ }
+ }
+ for (ISD::CondCode CC : {PrefCommuteCC, AltCommuteCC}) {
+ if (DAG.doesNodeExist(ISD::SETCC, BoolVTList,
+ {Op0, Op1, DAG.getCondCode(CC)})) {
+ SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, CC);
+ return DAG.getSelect(DL, VT, Cond, Op1, Op0);
+ }
+ }
+ SDValue Cond = DAG.getSetCC(DL, BoolVT, Op0, Op1, PrefCC);
+ return DAG.getSelect(DL, VT, Cond, Op0, Op1);
+ };
+
+ // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
+ // -> Y = (A < B) ? B : A
+ // -> Y = (A >= B) ? A : B
+ // -> Y = (A <= B) ? B : A
+ switch (Opcode) {
+ case ISD::SMAX:
+ return buildMinMax(ISD::SETGT, ISD::SETGE, ISD::SETLT, ISD::SETLE);
+ case ISD::SMIN:
+ return buildMinMax(ISD::SETLT, ISD::SETLE, ISD::SETGT, ISD::SETGE);
+ case ISD::UMAX:
+ return buildMinMax(ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE);
+ case ISD::UMIN:
+ return buildMinMax(ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE);
+ }
+
+ llvm_unreachable("How did we get here?");
}
SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
@@ -9607,6 +9832,37 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
}
+ if (Opcode == ISD::SADDSAT || Opcode == ISD::SSUBSAT) {
+ APInt MinVal = APInt::getSignedMinValue(BitWidth);
+ APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
+
+ KnownBits KnownLHS = DAG.computeKnownBits(LHS);
+ KnownBits KnownRHS = DAG.computeKnownBits(RHS);
+
+ // If either of the operand signs are known, then they are guaranteed to
+ // only saturate in one direction. If non-negative they will saturate
+ // towards SIGNED_MAX, if negative they will saturate towards SIGNED_MIN.
+ //
+ // In the case of ISD::SSUBSAT, 'x - y' is equivalent to 'x + (-y)', so the
+ // sign of 'y' has to be flipped.
+
+ bool LHSIsNonNegative = KnownLHS.isNonNegative();
+ bool RHSIsNonNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNonNegative()
+ : KnownRHS.isNegative();
+ if (LHSIsNonNegative || RHSIsNonNegative) {
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ return DAG.getSelect(dl, VT, Overflow, SatMax, SumDiff);
+ }
+
+ bool LHSIsNegative = KnownLHS.isNegative();
+ bool RHSIsNegative = Opcode == ISD::SADDSAT ? KnownRHS.isNegative()
+ : KnownRHS.isNonNegative();
+ if (LHSIsNegative || RHSIsNegative) {
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ return DAG.getSelect(dl, VT, Overflow, SatMin, SumDiff);
+ }
+ }
+
// Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
APInt MinVal = APInt::getSignedMinValue(BitWidth);
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
@@ -9892,8 +10148,8 @@ void TargetLowering::expandUADDSUBO(
SDValue RHS = Node->getOperand(1);
bool IsAdd = Node->getOpcode() == ISD::UADDO;
- // If ADD/SUBCARRY is legal, use that instead.
- unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
+ // If UADDO_CARRY/SUBO_CARRY is legal, use that instead.
+ unsigned OpcCarry = IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
@@ -9919,6 +10175,11 @@ void TargetLowering::expandUADDSUBO(
SetCC =
DAG.getSetCC(dl, SetCCType, Result,
DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ);
+ } else if (IsAdd && isAllOnesConstant(RHS)) {
+ // Special case: uaddo X, -1 overflows if X != 0.
+ SetCC =
+ DAG.getSetCC(dl, SetCCType, LHS,
+ DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETNE);
} else {
ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
@@ -10271,8 +10532,10 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
// Otherwise, select 0 if Src is NaN.
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
- return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt,
- ISD::CondCode::SETUO);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+ SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
+ return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, FpToInt);
}
SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT);
@@ -10286,13 +10549,16 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
SDValue Select = FpToInt;
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+
// If Src ULT MinFloat, select MinInt. In particular, this also selects
// MinInt if Src is NaN.
- Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select,
- ISD::CondCode::SETULT);
+ SDValue ULT = DAG.getSetCC(dl, SetCCVT, Src, MinFloatNode, ISD::SETULT);
+ Select = DAG.getSelect(dl, DstVT, ULT, MinIntNode, Select);
// If Src OGT MaxFloat, select MaxInt.
- Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select,
- ISD::CondCode::SETOGT);
+ SDValue OGT = DAG.getSetCC(dl, SetCCVT, Src, MaxFloatNode, ISD::SETOGT);
+ Select = DAG.getSelect(dl, DstVT, OGT, MaxIntNode, Select);
// In the unsigned case we are done, because we mapped NaN to MinInt, which
// is already zero.
@@ -10301,7 +10567,8 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
// Otherwise, select 0 if Src is NaN.
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
- return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
+ SDValue IsNan = DAG.getSetCC(dl, SetCCVT, Src, Src, ISD::CondCode::SETUO);
+ return DAG.getSelect(dl, DstVT, IsNan, ZeroInt, Select);
}
SDValue TargetLowering::expandVectorSplice(SDNode *Node,
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index 2411b1ad5203..4b1d3637a746 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -53,6 +53,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -97,6 +98,9 @@ STATISTIC(NumCandidatesDropped,
static cl::opt<cl::boolOrDefault>
EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
cl::desc("enable the shrink-wrapping pass"));
+static cl::opt<bool> EnablePostShrinkWrapOpt(
+ "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden,
+ cl::desc("enable splitting of the restore block if possible"));
namespace {
@@ -110,44 +114,44 @@ namespace {
class ShrinkWrap : public MachineFunctionPass {
/// Hold callee-saved information.
RegisterClassInfo RCI;
- MachineDominatorTree *MDT;
- MachinePostDominatorTree *MPDT;
+ MachineDominatorTree *MDT = nullptr;
+ MachinePostDominatorTree *MPDT = nullptr;
/// Current safe point found for the prologue.
/// The prologue will be inserted before the first instruction
/// in this basic block.
- MachineBasicBlock *Save;
+ MachineBasicBlock *Save = nullptr;
/// Current safe point found for the epilogue.
/// The epilogue will be inserted before the first terminator instruction
/// in this basic block.
- MachineBasicBlock *Restore;
+ MachineBasicBlock *Restore = nullptr;
/// Hold the information of the basic block frequency.
/// Use to check the profitability of the new points.
- MachineBlockFrequencyInfo *MBFI;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
/// Hold the loop information. Used to determine if Save and Restore
/// are in the same loop.
- MachineLoopInfo *MLI;
+ MachineLoopInfo *MLI = nullptr;
// Emit remarks.
MachineOptimizationRemarkEmitter *ORE = nullptr;
/// Frequency of the Entry block.
- uint64_t EntryFreq;
+ uint64_t EntryFreq = 0;
/// Current opcode for frame setup.
- unsigned FrameSetupOpcode;
+ unsigned FrameSetupOpcode = ~0u;
/// Current opcode for frame destroy.
- unsigned FrameDestroyOpcode;
+ unsigned FrameDestroyOpcode = ~0u;
/// Stack pointer register, used by llvm.{savestack,restorestack}
Register SP;
/// Entry block.
- const MachineBasicBlock *Entry;
+ const MachineBasicBlock *Entry = nullptr;
using SetOfRegs = SmallSetVector<unsigned, 16>;
@@ -155,12 +159,18 @@ class ShrinkWrap : public MachineFunctionPass {
mutable SetOfRegs CurrentCSRs;
/// Current MachineFunction.
- MachineFunction *MachineFunc;
+ MachineFunction *MachineFunc = nullptr;
+
+ /// Is `true` for block numbers where we can guarantee no stack access
+ /// or computation of stack-relative addresses on any CFG path including
+ /// the block itself.
+ BitVector StackAddressUsedBlockInfo;
/// Check if \p MI uses or defines a callee-saved register or
/// a frame index. If this is the case, this means \p MI must happen
/// after Save and before Restore.
- bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const;
+ bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
+ bool StackAddressUsed) const;
const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const {
if (CurrentCSRs.empty()) {
@@ -184,6 +194,32 @@ class ShrinkWrap : public MachineFunctionPass {
/// this call.
void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
+ // Try to find safe point based on dominance and block frequency without
+ // any change in IR.
+ bool performShrinkWrapping(
+ const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
+ RegScavenger *RS);
+
+ /// This function tries to split the restore point if doing so can shrink the
+ /// save point further. \return True if restore point is split.
+ bool postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
+ RegScavenger *RS);
+
+ /// This function analyzes if the restore point can split to create a new
+ /// restore point. This function collects
+ /// 1. Any preds of current restore that are reachable by callee save/FI
+ /// blocks
+ /// - indicated by DirtyPreds
+ /// 2. Any preds of current restore that are not DirtyPreds - indicated by
+ /// CleanPreds
+ /// Both sets should be non-empty for considering restore point split.
+ bool checkIfRestoreSplittable(
+ const MachineBasicBlock *CurRestore,
+ const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+ SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
+ SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
+ const TargetInstrInfo *TII, RegScavenger *RS);
+
/// Initialize the pass for \p MF.
void init(MachineFunction &MF) {
RCI.runOnMachineFunction(MF);
@@ -257,15 +293,32 @@ INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
-bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
- RegScavenger *RS) const {
- // This prevents premature stack popping when occurs a indirect stack
- // access. It is overly aggressive for the moment.
- // TODO: - Obvious non-stack loads and store, such as global values,
- // are known to not access the stack.
- // - Further, data dependency and alias analysis can validate
- // that load and stores never derive from the stack pointer.
- if (MI.mayLoadOrStore())
+bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
+ bool StackAddressUsed) const {
+ /// Check if \p Op is known to access an address not on the function's stack .
+ /// At the moment, accesses where the underlying object is a global, function
+ /// argument, or jump table are considered non-stack accesses. Note that the
+ /// caller's stack may get accessed when passing an argument via the stack,
+ /// but not the stack of the current function.
+ ///
+ auto IsKnownNonStackPtr = [](MachineMemOperand *Op) {
+ if (Op->getValue()) {
+ const Value *UO = getUnderlyingObject(Op->getValue());
+ if (!UO)
+ return false;
+ if (auto *Arg = dyn_cast<Argument>(UO))
+ return !Arg->hasPassPointeeByValueCopyAttr();
+ return isa<GlobalValue>(UO);
+ }
+ if (const PseudoSourceValue *PSV = Op->getPseudoValue())
+ return PSV->isJumpTable();
+ return false;
+ };
+ // Load/store operations may access the stack indirectly when we previously
+ // computed an address to a stack location.
+ if (StackAddressUsed && MI.mayLoadOrStore() &&
+ (MI.isCall() || MI.hasUnmodeledSideEffects() || MI.memoperands_empty() ||
+ !all_of(MI.memoperands(), IsKnownNonStackPtr)))
return true;
if (MI.getOpcode() == FrameSetupOpcode ||
@@ -320,18 +373,314 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
/// Helper function to find the immediate (post) dominator.
template <typename ListOfBBs, typename DominanceAnalysis>
static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
- DominanceAnalysis &Dom) {
+ DominanceAnalysis &Dom, bool Strict = true) {
MachineBasicBlock *IDom = &Block;
for (MachineBasicBlock *BB : BBs) {
IDom = Dom.findNearestCommonDominator(IDom, BB);
if (!IDom)
break;
}
- if (IDom == &Block)
+ if (Strict && IDom == &Block)
return nullptr;
return IDom;
}
+static bool isAnalyzableBB(const TargetInstrInfo &TII,
+ MachineBasicBlock &Entry) {
+ // Check if the block is analyzable.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ return !TII.analyzeBranch(Entry, TBB, FBB, Cond);
+}
+
+/// Determines if any predecessor of MBB is on the path from block that has use
+/// or def of CSRs/FI to MBB.
+/// ReachableByDirty: All blocks reachable from block that has use or def of
+/// CSR/FI.
+static bool
+hasDirtyPred(const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+ const MachineBasicBlock &MBB) {
+ for (const MachineBasicBlock *PredBB : MBB.predecessors())
+ if (ReachableByDirty.count(PredBB))
+ return true;
+ return false;
+}
+
+/// Derives the list of all the basic blocks reachable from MBB.
+static void markAllReachable(DenseSet<const MachineBasicBlock *> &Visited,
+ const MachineBasicBlock &MBB) {
+ SmallVector<MachineBasicBlock *, 4> Worklist(MBB.succ_begin(),
+ MBB.succ_end());
+ Visited.insert(&MBB);
+ while (!Worklist.empty()) {
+ MachineBasicBlock *SuccMBB = Worklist.pop_back_val();
+ if (!Visited.insert(SuccMBB).second)
+ continue;
+ Worklist.append(SuccMBB->succ_begin(), SuccMBB->succ_end());
+ }
+}
+
+/// Collect blocks reachable by use or def of CSRs/FI.
+static void collectBlocksReachableByDirty(
+ const DenseSet<const MachineBasicBlock *> &DirtyBBs,
+ DenseSet<const MachineBasicBlock *> &ReachableByDirty) {
+ for (const MachineBasicBlock *MBB : DirtyBBs) {
+ if (ReachableByDirty.count(MBB))
+ continue;
+ // Mark all offsprings as reachable.
+ markAllReachable(ReachableByDirty, *MBB);
+ }
+}
+
+/// \return true if there is a clean path from SavePoint to the original
+/// Restore.
+static bool
+isSaveReachableThroughClean(const MachineBasicBlock *SavePoint,
+ ArrayRef<MachineBasicBlock *> CleanPreds) {
+ DenseSet<const MachineBasicBlock *> Visited;
+ SmallVector<MachineBasicBlock *, 4> Worklist(CleanPreds.begin(),
+ CleanPreds.end());
+ while (!Worklist.empty()) {
+ MachineBasicBlock *CleanBB = Worklist.pop_back_val();
+ if (CleanBB == SavePoint)
+ return true;
+ if (!Visited.insert(CleanBB).second || !CleanBB->pred_size())
+ continue;
+ Worklist.append(CleanBB->pred_begin(), CleanBB->pred_end());
+ }
+ return false;
+}
+
+/// This function updates the branches post restore point split.
+///
+/// Restore point has been split.
+/// Old restore point: MBB
+/// New restore point: NMBB
+/// Any basic block(say BBToUpdate) which had a fallthrough to MBB
+/// previously should
+/// 1. Fallthrough to NMBB iff NMBB is inserted immediately above MBB in the
+/// block layout OR
+/// 2. Branch unconditionally to NMBB iff NMBB is inserted at any other place.
+static void updateTerminator(MachineBasicBlock *BBToUpdate,
+ MachineBasicBlock *NMBB,
+ const TargetInstrInfo *TII) {
+ DebugLoc DL = BBToUpdate->findBranchDebugLoc();
+ // if NMBB isn't the new layout successor for BBToUpdate, insert unconditional
+ // branch to it
+ if (!BBToUpdate->isLayoutSuccessor(NMBB))
+ TII->insertUnconditionalBranch(*BBToUpdate, NMBB, DL);
+}
+
+/// This function splits the restore point and returns new restore point/BB.
+///
+/// DirtyPreds: Predessors of \p MBB that are ReachableByDirty
+///
+/// Decision has been made to split the restore point.
+/// old restore point: \p MBB
+/// new restore point: \p NMBB
+/// This function makes the necessary block layout changes so that
+/// 1. \p NMBB points to \p MBB unconditionally
+/// 2. All dirtyPreds that previously pointed to \p MBB point to \p NMBB
+static MachineBasicBlock *
+tryToSplitRestore(MachineBasicBlock *MBB,
+ ArrayRef<MachineBasicBlock *> DirtyPreds,
+ const TargetInstrInfo *TII) {
+ MachineFunction *MF = MBB->getParent();
+
+ // get the list of DirtyPreds who have a fallthrough to MBB
+ // before the block layout change. This is just to ensure that if the NMBB is
+ // inserted after MBB, then we create unconditional branch from
+ // DirtyPred/CleanPred to NMBB
+ SmallPtrSet<MachineBasicBlock *, 8> MBBFallthrough;
+ for (MachineBasicBlock *BB : DirtyPreds)
+ if (BB->getFallThrough(false) == MBB)
+ MBBFallthrough.insert(BB);
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ // Insert this block at the end of the function. Inserting in between may
+ // interfere with control flow optimizer decisions.
+ MF->insert(MF->end(), NMBB);
+
+ for (const MachineBasicBlock::RegisterMaskPair &LI : MBB->liveins())
+ NMBB->addLiveIn(LI.PhysReg);
+
+ TII->insertUnconditionalBranch(*NMBB, MBB, DebugLoc());
+
+ // After splitting, all predecessors of the restore point should be dirty
+ // blocks.
+ for (MachineBasicBlock *SuccBB : DirtyPreds)
+ SuccBB->ReplaceUsesOfBlockWith(MBB, NMBB);
+
+ NMBB->addSuccessor(MBB);
+
+ for (MachineBasicBlock *BBToUpdate : MBBFallthrough)
+ updateTerminator(BBToUpdate, NMBB, TII);
+
+ return NMBB;
+}
+
+/// This function undoes the restore point split done earlier.
+///
+/// DirtyPreds: All predecessors of \p NMBB that are ReachableByDirty.
+///
+/// Restore point was split and the change needs to be unrolled. Make necessary
+/// changes to reset restore point from \p NMBB to \p MBB.
+static void rollbackRestoreSplit(MachineFunction &MF, MachineBasicBlock *NMBB,
+ MachineBasicBlock *MBB,
+ ArrayRef<MachineBasicBlock *> DirtyPreds,
+ const TargetInstrInfo *TII) {
+ // For a BB, if NMBB is fallthrough in the current layout, then in the new
+ // layout a. BB should fallthrough to MBB OR b. BB should undconditionally
+ // branch to MBB
+ SmallPtrSet<MachineBasicBlock *, 8> NMBBFallthrough;
+ for (MachineBasicBlock *BB : DirtyPreds)
+ if (BB->getFallThrough(false) == NMBB)
+ NMBBFallthrough.insert(BB);
+
+ NMBB->removeSuccessor(MBB);
+ for (MachineBasicBlock *SuccBB : DirtyPreds)
+ SuccBB->ReplaceUsesOfBlockWith(NMBB, MBB);
+
+ NMBB->erase(NMBB->begin(), NMBB->end());
+ NMBB->eraseFromParent();
+
+ for (MachineBasicBlock *BBToUpdate : NMBBFallthrough)
+ updateTerminator(BBToUpdate, MBB, TII);
+}
+
+// A block is deemed fit for restore point split iff there exist
+// 1. DirtyPreds - preds of CurRestore reachable from use or def of CSR/FI
+// 2. CleanPreds - preds of CurRestore that arent DirtyPreds
+bool ShrinkWrap::checkIfRestoreSplittable(
+ const MachineBasicBlock *CurRestore,
+ const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+ SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
+ SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
+ const TargetInstrInfo *TII, RegScavenger *RS) {
+ for (const MachineInstr &MI : *CurRestore)
+ if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true))
+ return false;
+
+ for (MachineBasicBlock *PredBB : CurRestore->predecessors()) {
+ if (!isAnalyzableBB(*TII, *PredBB))
+ return false;
+
+ if (ReachableByDirty.count(PredBB))
+ DirtyPreds.push_back(PredBB);
+ else
+ CleanPreds.push_back(PredBB);
+ }
+
+ return !(CleanPreds.empty() || DirtyPreds.empty());
+}
+
+bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
+ RegScavenger *RS) {
+ if (!EnablePostShrinkWrapOpt)
+ return false;
+
+ MachineBasicBlock *InitSave = nullptr;
+ MachineBasicBlock *InitRestore = nullptr;
+
+ if (HasCandidate) {
+ InitSave = Save;
+ InitRestore = Restore;
+ } else {
+ InitRestore = nullptr;
+ InitSave = &MF.front();
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHFuncletEntry())
+ return false;
+ if (MBB.isReturnBlock()) {
+ // Do not support multiple restore points.
+ if (InitRestore)
+ return false;
+ InitRestore = &MBB;
+ }
+ }
+ }
+
+ if (!InitSave || !InitRestore || InitRestore == InitSave ||
+ !MDT->dominates(InitSave, InitRestore) ||
+ !MPDT->dominates(InitRestore, InitSave))
+ return false;
+
+ // Bail out of the optimization if any of the basic block is target of
+ // INLINEASM_BR instruction
+ for (MachineBasicBlock &MBB : MF)
+ if (MBB.isInlineAsmBrIndirectTarget())
+ return false;
+
+ DenseSet<const MachineBasicBlock *> DirtyBBs;
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHPad()) {
+ DirtyBBs.insert(&MBB);
+ continue;
+ }
+ for (const MachineInstr &MI : MBB)
+ if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true)) {
+ DirtyBBs.insert(&MBB);
+ break;
+ }
+ }
+
+ // Find blocks reachable from the use or def of CSRs/FI.
+ DenseSet<const MachineBasicBlock *> ReachableByDirty;
+ collectBlocksReachableByDirty(DirtyBBs, ReachableByDirty);
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SmallVector<MachineBasicBlock *, 2> DirtyPreds;
+ SmallVector<MachineBasicBlock *, 2> CleanPreds;
+ if (!checkIfRestoreSplittable(InitRestore, ReachableByDirty, DirtyPreds,
+ CleanPreds, TII, RS))
+ return false;
+
+ // Trying to reach out to the new save point which dominates all dirty blocks.
+ MachineBasicBlock *NewSave =
+ FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false);
+
+ while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) ||
+ EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency() ||
+ /*Entry freq has been observed more than a loop block in
+ some cases*/
+ MLI->getLoopFor(NewSave)))
+ NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT,
+ false);
+
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ if (!NewSave || NewSave == InitSave ||
+ isSaveReachableThroughClean(NewSave, CleanPreds) ||
+ !TFI->canUseAsPrologue(*NewSave))
+ return false;
+
+ // Now we know that splitting a restore point can isolate the restore point
+ // from clean blocks and doing so can shrink the save point.
+ MachineBasicBlock *NewRestore =
+ tryToSplitRestore(InitRestore, DirtyPreds, TII);
+
+ // Make sure if the new restore point is valid as an epilogue, depending on
+ // targets.
+ if (!TFI->canUseAsEpilogue(*NewRestore)) {
+ rollbackRestoreSplit(MF, NewRestore, InitRestore, DirtyPreds, TII);
+ return false;
+ }
+
+ Save = NewSave;
+ Restore = NewRestore;
+
+ MDT->runOnMachineFunction(MF);
+ MPDT->runOnMachineFunction(MF);
+
+ assert((MDT->dominates(Save, Restore) && MPDT->dominates(Restore, Save)) &&
+ "Incorrect save or restore point due to dominance relations");
+ assert((!MLI->getLoopFor(Save) && !MLI->getLoopFor(Restore)) &&
+ "Unexpected save or restore point in a loop");
+ assert((EntryFreq >= MBFI->getBlockFreq(Save).getFrequency() &&
+ EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) &&
+ "Incorrect save or restore point based on block frequency");
+ return true;
+}
+
void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
RegScavenger *RS) {
// Get rid of the easy cases first.
@@ -356,7 +705,7 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
// terminator.
if (Restore == &MBB) {
for (const MachineInstr &Terminator : MBB.terminators()) {
- if (!useOrDefCSROrFI(Terminator, RS))
+ if (!useOrDefCSROrFI(Terminator, RS, /*StackAddressUsed=*/true))
continue;
// One of the terminator needs to happen before the restore point.
if (MBB.succ_empty()) {
@@ -463,47 +812,24 @@ static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
return false;
}
-bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
- return false;
-
- LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
-
- init(MF);
-
- ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
- if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
- // If MF is irreducible, a block may be in a loop without
- // MachineLoopInfo reporting it. I.e., we may use the
- // post-dominance property in loops, which lead to incorrect
- // results. Moreover, we may miss that the prologue and
- // epilogue are not in the same loop, leading to unbalanced
- // construction/deconstruction of the stack frame.
- return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
- "Irreducible CFGs are not supported yet.",
- MF.getFunction().getSubprogram(), &MF.front());
- }
-
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- std::unique_ptr<RegScavenger> RS(
- TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
-
- for (MachineBasicBlock &MBB : MF) {
- LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' '
- << MBB.getName() << '\n');
+bool ShrinkWrap::performShrinkWrapping(
+ const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
+ RegScavenger *RS) {
+ for (MachineBasicBlock *MBB : RPOT) {
+ LLVM_DEBUG(dbgs() << "Look into: " << printMBBReference(*MBB) << '\n');
- if (MBB.isEHFuncletEntry())
+ if (MBB->isEHFuncletEntry())
return giveUpWithRemarks(ORE, "UnsupportedEHFunclets",
"EH Funclets are not supported yet.",
- MBB.front().getDebugLoc(), &MBB);
+ MBB->front().getDebugLoc(), MBB);
- if (MBB.isEHPad() || MBB.isInlineAsmBrIndirectTarget()) {
+ if (MBB->isEHPad() || MBB->isInlineAsmBrIndirectTarget()) {
// Push the prologue and epilogue outside of the region that may throw (or
// jump out via inlineasm_br), by making sure that all the landing pads
// are at least at the boundary of the save and restore points. The
// problem is that a basic block can jump out from the middle in these
// cases, which we do not handle.
- updateSaveRestorePoints(MBB, RS.get());
+ updateSaveRestorePoints(*MBB, RS);
if (!ArePointsInteresting()) {
LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n");
return false;
@@ -511,22 +837,37 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- for (const MachineInstr &MI : MBB) {
- if (!useOrDefCSROrFI(MI, RS.get()))
- continue;
- // Save (resp. restore) point must dominate (resp. post dominate)
- // MI. Look for the proper basic block for those.
- updateSaveRestorePoints(MBB, RS.get());
- // If we are at a point where we cannot improve the placement of
- // save/restore instructions, just give up.
- if (!ArePointsInteresting()) {
- LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
- return false;
+ bool StackAddressUsed = false;
+ // Check if we found any stack accesses in the predecessors. We are not
+ // doing a full dataflow analysis here to keep things simple but just
+ // rely on a reverse portorder traversal (RPOT) to guarantee predecessors
+ // are already processed except for loops (and accept the conservative
+ // result for loops).
+ for (const MachineBasicBlock *Pred : MBB->predecessors()) {
+ if (StackAddressUsedBlockInfo.test(Pred->getNumber())) {
+ StackAddressUsed = true;
+ break;
}
- // No need to look for other instructions, this basic block
- // will already be part of the handled region.
- break;
}
+
+ for (const MachineInstr &MI : *MBB) {
+ if (useOrDefCSROrFI(MI, RS, StackAddressUsed)) {
+ // Save (resp. restore) point must dominate (resp. post dominate)
+ // MI. Look for the proper basic block for those.
+ updateSaveRestorePoints(*MBB, RS);
+ // If we are at a point where we cannot improve the placement of
+ // save/restore instructions, just give up.
+ if (!ArePointsInteresting()) {
+ LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
+ return false;
+ }
+ // No need to look for other instructions, this basic block
+ // will already be part of the handled region.
+ StackAddressUsed = true;
+ break;
+ }
+ }
+ StackAddressUsedBlockInfo[MBB->getNumber()] = StackAddressUsed;
}
if (!ArePointsInteresting()) {
// If the points are not interesting at this point, then they must be null
@@ -540,13 +881,13 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq
<< '\n');
- const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ const TargetFrameLowering *TFI =
+ MachineFunc->getSubtarget().getFrameLowering();
do {
LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
- << Save->getNumber() << ' ' << Save->getName() << ' '
+ << printMBBReference(*Save) << ' '
<< MBFI->getBlockFreq(Save).getFrequency()
- << "\nRestore: " << Restore->getNumber() << ' '
- << Restore->getName() << ' '
+ << "\nRestore: " << printMBBReference(*Restore) << ' '
<< MBFI->getBlockFreq(Restore).getFrequency() << '\n');
bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
@@ -570,24 +911,61 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
break;
NewBB = Restore;
}
- updateSaveRestorePoints(*NewBB, RS.get());
+ updateSaveRestorePoints(*NewBB, RS);
} while (Save && Restore);
if (!ArePointsInteresting()) {
++NumCandidatesDropped;
return false;
}
+ return true;
+}
+
+bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+ init(MF);
+
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
+ // If MF is irreducible, a block may be in a loop without
+ // MachineLoopInfo reporting it. I.e., we may use the
+ // post-dominance property in loops, which lead to incorrect
+ // results. Moreover, we may miss that the prologue and
+ // epilogue are not in the same loop, leading to unbalanced
+ // construction/deconstruction of the stack frame.
+ return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
+ "Irreducible CFGs are not supported yet.",
+ MF.getFunction().getSubprogram(), &MF.front());
+ }
+
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ std::unique_ptr<RegScavenger> RS(
+ TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
+
+ bool Changed = false;
+
+ StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true);
+ bool HasCandidate = performShrinkWrapping(RPOT, RS.get());
+ StackAddressUsedBlockInfo.clear();
+ Changed = postShrinkWrapping(HasCandidate, MF, RS.get());
+ if (!HasCandidate && !Changed)
+ return false;
+ if (!ArePointsInteresting())
+ return Changed;
LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
- << Save->getNumber() << ' ' << Save->getName()
- << "\nRestore: " << Restore->getNumber() << ' '
- << Restore->getName() << '\n');
+ << printMBBReference(*Save) << ' '
+ << "\nRestore: " << printMBBReference(*Restore) << '\n');
MachineFrameInfo &MFI = MF.getFrameInfo();
MFI.setSavePoint(Save);
MFI.setRestorePoint(Restore);
++NumCandidates;
- return false;
+ return Changed;
}
bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {
diff --git a/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index 3fed707a9eb1..d09953e76a80 100644
--- a/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -38,21 +38,21 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges");
namespace {
class SjLjEHPrepare : public FunctionPass {
- IntegerType *DataTy;
- Type *doubleUnderDataTy;
- Type *doubleUnderJBufTy;
- Type *FunctionContextTy;
+ IntegerType *DataTy = nullptr;
+ Type *doubleUnderDataTy = nullptr;
+ Type *doubleUnderJBufTy = nullptr;
+ Type *FunctionContextTy = nullptr;
FunctionCallee RegisterFn;
FunctionCallee UnregisterFn;
- Function *BuiltinSetupDispatchFn;
- Function *FrameAddrFn;
- Function *StackAddrFn;
- Function *StackRestoreFn;
- Function *LSDAAddrFn;
- Function *CallSiteFn;
- Function *FuncCtxFn;
- AllocaInst *FuncCtx;
- const TargetMachine *TM;
+ Function *BuiltinSetupDispatchFn = nullptr;
+ Function *FrameAddrFn = nullptr;
+ Function *StackAddrFn = nullptr;
+ Function *StackRestoreFn = nullptr;
+ Function *LSDAAddrFn = nullptr;
+ Function *CallSiteFn = nullptr;
+ Function *FuncCtxFn = nullptr;
+ AllocaInst *FuncCtx = nullptr;
+ const TargetMachine *TM = nullptr;
public:
static char ID; // Pass identification, replacement for typeid
diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp
index ee3a0164564e..47ee36971d0e 100644
--- a/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -215,7 +215,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
--MBBI;
else
pastStart = true;
- } else if (MI && mi2iMap.find(MI) == mi2iMap.end()) {
+ } else if (MI && !mi2iMap.contains(MI)) {
if (MBBI != Begin)
--MBBI;
else
@@ -232,7 +232,7 @@ void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB,
for (MachineBasicBlock::iterator I = End; I != Begin;) {
--I;
MachineInstr &MI = *I;
- if (!MI.isDebugOrPseudoInstr() && mi2iMap.find(&MI) == mi2iMap.end())
+ if (!MI.isDebugOrPseudoInstr() && !mi2iMap.contains(&MI))
insertMachineInstrInMaps(MI);
}
}
diff --git a/llvm/lib/CodeGen/SpillPlacement.h b/llvm/lib/CodeGen/SpillPlacement.h
index d2273a163025..bd37d85c6c0d 100644
--- a/llvm/lib/CodeGen/SpillPlacement.h
+++ b/llvm/lib/CodeGen/SpillPlacement.h
@@ -42,15 +42,15 @@ class MachineLoopInfo;
class SpillPlacement : public MachineFunctionPass {
struct Node;
- const MachineFunction *MF;
- const EdgeBundles *bundles;
- const MachineLoopInfo *loops;
- const MachineBlockFrequencyInfo *MBFI;
+ const MachineFunction *MF = nullptr;
+ const EdgeBundles *bundles = nullptr;
+ const MachineLoopInfo *loops = nullptr;
+ const MachineBlockFrequencyInfo *MBFI = nullptr;
Node *nodes = nullptr;
// Nodes that are active in the current computation. Owned by the prepare()
// caller.
- BitVector *ActiveNodes;
+ BitVector *ActiveNodes = nullptr;
// Nodes with active links. Populated by scanActiveBundles.
SmallVector<unsigned, 8> Linked;
diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp
index 92e820c9d3d8..83964eced597 100644
--- a/llvm/lib/CodeGen/SplitKit.cpp
+++ b/llvm/lib/CodeGen/SplitKit.cpp
@@ -514,10 +514,10 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo &ParentVNI) {
VFP = ValueForcePair(nullptr, true);
}
-SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,
- MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
- unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def) {
- const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+SlotIndex SplitEditor::buildSingleSubRegCopy(
+ Register FromReg, Register ToReg, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, unsigned SubIdx,
+ LiveInterval &DestLI, bool Late, SlotIndex Def, const MCInstrDesc &Desc) {
bool FirstCopy = !Def.isValid();
MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc)
.addReg(ToReg, RegState::Define | getUndefRegState(FirstCopy)
@@ -536,7 +536,8 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(Register FromReg, Register ToReg,
SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
LaneBitmask LaneMask, MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) {
- const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY);
+ const MCInstrDesc &Desc =
+ TII.get(TII.getLiveRangeSplitOpcode(FromReg, *MBB.getParent()));
SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) {
// The full vreg is copied.
@@ -564,7 +565,7 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg,
SlotIndex Def;
for (unsigned BestIdx : SubIndexes) {
Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx,
- DestLI, Late, Def);
+ DestLI, Late, Def, Desc);
}
BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
@@ -1365,7 +1366,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
// The point we want to extend is 0d to 16e not 16r in this case, but if
// we use 16r here we will extend nothing because that already contained
// in [16e, 32d).
- unsigned OpIdx = MI->getOperandNo(&MO);
+ unsigned OpIdx = MO.getOperandNo();
unsigned DefOpIdx = MI->findTiedOperandIdx(OpIdx);
const MachineOperand &DefOp = MI->getOperand(DefOpIdx);
IsEarlyClobber = DefOp.isEarlyClobber();
@@ -1584,7 +1585,9 @@ bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI,
if (BI.LiveIn && BI.LiveOut)
return true;
// No point in isolating a copy. It has no register class constraints.
- if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike())
+ MachineInstr *MI = LIS.getInstructionFromIndex(BI.FirstInstr);
+ bool copyLike = TII.isCopyInstr(*MI) || MI->isSubregToReg();
+ if (copyLike)
return false;
// Finally, don't isolate an end point that was created by earlier splits.
return isOriginalEndpoint(BI.FirstInstr);
diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h
index 5a3428a5e91f..1174e392e4e4 100644
--- a/llvm/lib/CodeGen/SplitKit.h
+++ b/llvm/lib/CodeGen/SplitKit.h
@@ -151,13 +151,13 @@ private:
/// NumGapBlocks - Number of duplicate entries in UseBlocks for blocks where
/// the live range has a gap.
- unsigned NumGapBlocks;
+ unsigned NumGapBlocks = 0u;
/// ThroughBlocks - Block numbers where CurLI is live through without uses.
BitVector ThroughBlocks;
/// NumThroughBlocks - Number of live-through blocks.
- unsigned NumThroughBlocks;
+ unsigned NumThroughBlocks = 0u;
// Sumarize statistics by counting instructions using CurLI.
void analyzeUses();
@@ -428,8 +428,11 @@ private:
bool Late, unsigned RegIdx);
SlotIndex buildSingleSubRegCopy(Register FromReg, Register ToReg,
- MachineBasicBlock &MB, MachineBasicBlock::iterator InsertBefore,
- unsigned SubIdx, LiveInterval &DestLI, bool Late, SlotIndex Def);
+ MachineBasicBlock &MB,
+ MachineBasicBlock::iterator InsertBefore,
+ unsigned SubIdx, LiveInterval &DestLI,
+ bool Late, SlotIndex Def,
+ const MCInstrDesc &Desc);
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
diff --git a/llvm/lib/CodeGen/StackColoring.cpp b/llvm/lib/CodeGen/StackColoring.cpp
index 11c6bdc69956..66b9086e1d88 100644
--- a/llvm/lib/CodeGen/StackColoring.cpp
+++ b/llvm/lib/CodeGen/StackColoring.cpp
@@ -407,8 +407,8 @@ namespace {
/// StackColoring - A machine pass for merging disjoint stack allocations,
/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
class StackColoring : public MachineFunctionPass {
- MachineFrameInfo *MFI;
- MachineFunction *MF;
+ MachineFrameInfo *MFI = nullptr;
+ MachineFunction *MF = nullptr;
/// A class representing liveness information for a single basic block.
/// Each bit in the BitVector represents the liveness property
@@ -448,7 +448,7 @@ class StackColoring : public MachineFunctionPass {
VNInfo::Allocator VNInfoAllocator;
/// SlotIndex analysis object.
- SlotIndexes *Indexes;
+ SlotIndexes *Indexes = nullptr;
/// The list of lifetime markers found. These markers are to be removed
/// once the coloring is done.
@@ -935,12 +935,13 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
// Remap debug information that refers to stack slots.
for (auto &VI : MF->getVariableDbgInfo()) {
- if (!VI.Var)
+ if (!VI.Var || !VI.inStackSlot())
continue;
- if (SlotRemap.count(VI.Slot)) {
+ int Slot = VI.getStackSlot();
+ if (SlotRemap.count(Slot)) {
LLVM_DEBUG(dbgs() << "Remapping debug info for ["
<< cast<DILocalVariable>(VI.Var)->getName() << "].\n");
- VI.Slot = SlotRemap[VI.Slot];
+ VI.updateStackSlot(SlotRemap[Slot]);
FixedDbg++;
}
}
diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
index 3a48dd5b0a03..5d3903ed84ce 100644
--- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
+++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
@@ -210,8 +210,9 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
SlotDbgMap SlotDebugMap;
// add variables to the map
- for (MachineFunction::VariableDbgInfo &DI : MF.getVariableDbgInfo())
- SlotDebugMap[DI.Slot].insert(DI.Var);
+ for (MachineFunction::VariableDbgInfo &DI :
+ MF.getInStackSlotVariableDbgInfo())
+ SlotDebugMap[DI.getStackSlot()].insert(DI.Var);
// Then add all the spills that have debug data
for (MachineBasicBlock &MBB : MF) {
diff --git a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index b83c56903133..778ac1f5701c 100644
--- a/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -48,7 +48,7 @@ namespace {
/// information provided by this pass is optional and not required by the
/// aformentioned intrinsic to function.
class StackMapLiveness : public MachineFunctionPass {
- const TargetRegisterInfo *TRI;
+ const TargetRegisterInfo *TRI = nullptr;
LivePhysRegs LiveRegs;
public:
diff --git a/llvm/lib/CodeGen/StackMaps.cpp b/llvm/lib/CodeGen/StackMaps.cpp
index bb7a51e49edb..f9115e434878 100644
--- a/llvm/lib/CodeGen/StackMaps.cpp
+++ b/llvm/lib/CodeGen/StackMaps.cpp
@@ -149,7 +149,7 @@ unsigned StatepointOpers::getGCPointerMap(
bool StatepointOpers::isFoldableReg(Register Reg) const {
unsigned FoldableAreaStart = getVarIdx();
for (const MachineOperand &MO : MI->uses()) {
- if (MI->getOperandNo(&MO) >= FoldableAreaStart)
+ if (MO.getOperandNo() >= FoldableAreaStart)
break;
if (MO.isReg() && MO.getReg() == Reg)
return false;
@@ -193,9 +193,12 @@ unsigned StackMaps::getNextMetaArgIdx(const MachineInstr *MI, unsigned CurIdx) {
/// Go up the super-register chain until we hit a valid dwarf register number.
static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) {
- int RegNum = TRI->getDwarfRegNum(Reg, false);
- for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNum < 0; ++SR)
- RegNum = TRI->getDwarfRegNum(*SR, false);
+ int RegNum;
+ for (MCPhysReg SR : TRI->superregs_inclusive(Reg)) {
+ RegNum = TRI->getDwarfRegNum(SR, false);
+ if (RegNum >= 0)
+ break;
+ }
assert(RegNum >= 0 && "Invalid Dwarf register number.");
return (unsigned)RegNum;
@@ -389,7 +392,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
break;
}
I->Size = std::max(I->Size, II->Size);
- if (TRI->isSuperRegister(I->Reg, II->Reg))
+ if (I->Reg && TRI->isSuperRegister(I->Reg, II->Reg))
I->Reg = II->Reg;
II->Reg = 0; // mark for deletion.
}
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index 46685f7b8208..387b653f8815 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -15,9 +15,9 @@
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Passes.h"
@@ -30,6 +30,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
@@ -96,7 +97,7 @@ bool StackProtector::runOnFunction(Function &Fn) {
SSPBufferSize = Fn.getFnAttributeAsParsedInteger(
"stack-protector-buffer-size", DefaultSSPBufferSize);
- if (!RequiresStackProtector())
+ if (!requiresStackProtector(F, &Layout))
return false;
// TODO(etienneb): Functions with funclets are not correctly supported now.
@@ -121,9 +122,9 @@ bool StackProtector::runOnFunction(Function &Fn) {
/// \param [out] IsLarge is set to true if a protectable array is found and
/// it is "large" ( >= ssp-buffer-size). In the case of a structure with
/// multiple arrays, this gets set if any of them is large.
-bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
- bool Strong,
- bool InStruct) const {
+static bool ContainsProtectableArray(Type *Ty, Module *M, unsigned SSPBufferSize,
+ bool &IsLarge, bool Strong,
+ bool InStruct) {
if (!Ty)
return false;
if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
@@ -132,7 +133,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
// add stack protectors unless the array is a character array.
// However, in strong mode any array, regardless of type and size,
// triggers a protector.
- if (!Strong && (InStruct || !Trip.isOSDarwin()))
+ if (!Strong && (InStruct || !Triple(M->getTargetTriple()).isOSDarwin()))
return false;
}
@@ -154,7 +155,7 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
bool NeedsProtector = false;
for (Type *ET : ST->elements())
- if (ContainsProtectableArray(ET, IsLarge, Strong, true)) {
+ if (ContainsProtectableArray(ET, M, SSPBufferSize, IsLarge, Strong, true)) {
// If the element is a protectable array and is large (>= SSPBufferSize)
// then we are done. If the protectable array is not large, then
// keep looking in case a subsequent element is a large array.
@@ -166,8 +167,10 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
return NeedsProtector;
}
-bool StackProtector::HasAddressTaken(const Instruction *AI,
- TypeSize AllocSize) {
+/// Check whether a stack allocation has its address taken.
+static bool HasAddressTaken(const Instruction *AI, TypeSize AllocSize,
+ Module *M,
+ SmallPtrSet<const PHINode *, 16> &VisitedPHIs) {
const DataLayout &DL = M->getDataLayout();
for (const User *U : AI->users()) {
const auto *I = cast<Instruction>(U);
@@ -221,14 +224,14 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// assume the scalable value is of minimum size.
TypeSize NewAllocSize =
TypeSize::Fixed(AllocSize.getKnownMinValue()) - OffsetSize;
- if (HasAddressTaken(I, NewAllocSize))
+ if (HasAddressTaken(I, NewAllocSize, M, VisitedPHIs))
return true;
break;
}
case Instruction::BitCast:
case Instruction::Select:
case Instruction::AddrSpaceCast:
- if (HasAddressTaken(I, AllocSize))
+ if (HasAddressTaken(I, AllocSize, M, VisitedPHIs))
return true;
break;
case Instruction::PHI: {
@@ -236,7 +239,7 @@ bool StackProtector::HasAddressTaken(const Instruction *AI,
// they are only visited once.
const auto *PN = cast<PHINode>(I);
if (VisitedPHIs.insert(PN).second)
- if (HasAddressTaken(PN, AllocSize))
+ if (HasAddressTaken(PN, AllocSize, M, VisitedPHIs))
return true;
break;
}
@@ -282,10 +285,19 @@ static const CallInst *findStackProtectorIntrinsic(Function &F) {
/// functions with aggregates that contain any buffer regardless of type and
/// size, and functions that contain stack-based variables that have had their
/// address taken.
-bool StackProtector::RequiresStackProtector() {
+bool StackProtector::requiresStackProtector(Function *F, SSPLayoutMap *Layout) {
+ Module *M = F->getParent();
bool Strong = false;
bool NeedsProtector = false;
+ // The set of PHI nodes visited when determining if a variable's reference has
+ // been taken. This set is maintained to ensure we don't visit the same PHI
+ // node multiple times.
+ SmallPtrSet<const PHINode *, 16> VisitedPHIs;
+
+ unsigned SSPBufferSize = F->getFnAttributeAsParsedInteger(
+ "stack-protector-buffer-size", DefaultSSPBufferSize);
+
if (F->hasFnAttribute(Attribute::SafeStack))
return false;
@@ -295,6 +307,8 @@ bool StackProtector::RequiresStackProtector() {
OptimizationRemarkEmitter ORE(F);
if (F->hasFnAttribute(Attribute::StackProtectReq)) {
+ if (!Layout)
+ return true;
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "StackProtectorRequested", F)
<< "Stack protection applied to function "
@@ -324,21 +338,27 @@ bool StackProtector::RequiresStackProtector() {
if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
// A call to alloca with size >= SSPBufferSize requires
// stack protectors.
- Layout.insert(std::make_pair(AI,
- MachineFrameInfo::SSPLK_LargeArray));
+ if (!Layout)
+ return true;
+ Layout->insert(
+ std::make_pair(AI, MachineFrameInfo::SSPLK_LargeArray));
ORE.emit(RemarkBuilder);
NeedsProtector = true;
} else if (Strong) {
// Require protectors for all alloca calls in strong mode.
- Layout.insert(std::make_pair(AI,
- MachineFrameInfo::SSPLK_SmallArray));
+ if (!Layout)
+ return true;
+ Layout->insert(
+ std::make_pair(AI, MachineFrameInfo::SSPLK_SmallArray));
ORE.emit(RemarkBuilder);
NeedsProtector = true;
}
} else {
// A call to alloca with a variable size requires protectors.
- Layout.insert(std::make_pair(AI,
- MachineFrameInfo::SSPLK_LargeArray));
+ if (!Layout)
+ return true;
+ Layout->insert(
+ std::make_pair(AI, MachineFrameInfo::SSPLK_LargeArray));
ORE.emit(RemarkBuilder);
NeedsProtector = true;
}
@@ -346,10 +366,13 @@ bool StackProtector::RequiresStackProtector() {
}
bool IsLarge = false;
- if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) {
- Layout.insert(std::make_pair(AI, IsLarge
- ? MachineFrameInfo::SSPLK_LargeArray
- : MachineFrameInfo::SSPLK_SmallArray));
+ if (ContainsProtectableArray(AI->getAllocatedType(), M, SSPBufferSize,
+ IsLarge, Strong, false)) {
+ if (!Layout)
+ return true;
+ Layout->insert(std::make_pair(
+ AI, IsLarge ? MachineFrameInfo::SSPLK_LargeArray
+ : MachineFrameInfo::SSPLK_SmallArray));
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "StackProtectorBuffer", &I)
<< "Stack protection applied to function "
@@ -361,10 +384,14 @@ bool StackProtector::RequiresStackProtector() {
continue;
}
- if (Strong && HasAddressTaken(AI, M->getDataLayout().getTypeAllocSize(
- AI->getAllocatedType()))) {
+ if (Strong &&
+ HasAddressTaken(
+ AI, M->getDataLayout().getTypeAllocSize(AI->getAllocatedType()),
+ M, VisitedPHIs)) {
++NumAddrTaken;
- Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf));
+ if (!Layout)
+ return true;
+ Layout->insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf));
ORE.emit([&]() {
return OptimizationRemark(DEBUG_TYPE, "StackProtectorAddressTaken",
&I)
@@ -455,18 +482,15 @@ bool StackProtector::InsertStackProtectors() {
if (&BB == FailBB)
continue;
Instruction *CheckLoc = dyn_cast<ReturnInst>(BB.getTerminator());
- if (!CheckLoc && !DisableCheckNoReturn) {
- for (auto &Inst : BB) {
- auto *CB = dyn_cast<CallBase>(&Inst);
- if (!CB)
- continue;
- if (!CB->doesNotReturn())
- continue;
- // Do stack check before non-return calls (e.g: __cxa_throw)
- CheckLoc = CB;
- break;
- }
- }
+ if (!CheckLoc && !DisableCheckNoReturn)
+ for (auto &Inst : BB)
+ if (auto *CB = dyn_cast<CallBase>(&Inst))
+ // Do stack check before noreturn calls that aren't nounwind (e.g:
+ // __cxa_throw).
+ if (CB->doesNotReturn() && !CB->doesNotThrow()) {
+ CheckLoc = CB;
+ break;
+ }
if (!CheckLoc)
continue;
@@ -594,18 +618,19 @@ BasicBlock *StackProtector::CreateFailBB() {
if (F->getSubprogram())
B.SetCurrentDebugLocation(
DILocation::get(Context, 0, 0, F->getSubprogram()));
+ FunctionCallee StackChkFail;
+ SmallVector<Value *, 1> Args;
if (Trip.isOSOpenBSD()) {
- FunctionCallee StackChkFail = M->getOrInsertFunction(
- "__stack_smash_handler", Type::getVoidTy(Context),
- Type::getInt8PtrTy(Context));
-
- B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH"));
+ StackChkFail = M->getOrInsertFunction("__stack_smash_handler",
+ Type::getVoidTy(Context),
+ Type::getInt8PtrTy(Context));
+ Args.push_back(B.CreateGlobalStringPtr(F->getName(), "SSH"));
} else {
- FunctionCallee StackChkFail =
+ StackChkFail =
M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context));
-
- B.CreateCall(StackChkFail, {});
}
+ cast<Function>(StackChkFail.getCallee())->addFnAttr(Attribute::NoReturn);
+ B.CreateCall(StackChkFail, Args);
B.CreateUnreachable();
return FailBB;
}
diff --git a/llvm/lib/CodeGen/StackSlotColoring.cpp b/llvm/lib/CodeGen/StackSlotColoring.cpp
index b8c750688914..6d933ab12041 100644
--- a/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalUnion.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -58,10 +59,10 @@ STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated");
namespace {
class StackSlotColoring : public MachineFunctionPass {
- LiveStacks* LS;
- MachineFrameInfo *MFI;
- const TargetInstrInfo *TII;
- const MachineBlockFrequencyInfo *MBFI;
+ LiveStacks *LS = nullptr;
+ MachineFrameInfo *MFI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const MachineBlockFrequencyInfo *MBFI = nullptr;
// SSIntervals - Spill slot intervals.
std::vector<LiveInterval*> SSIntervals;
@@ -90,8 +91,50 @@ namespace {
// UsedColors - "Colors" that have been assigned. This is per stack ID
SmallVector<BitVector, 2> UsedColors;
+ // Join all intervals sharing one color into a single LiveIntervalUnion to
+ // speedup range overlap test.
+ class ColorAssignmentInfo {
+ // Single liverange (used to avoid creation of LiveIntervalUnion).
+ LiveInterval *SingleLI = nullptr;
+ // LiveIntervalUnion to perform overlap test.
+ LiveIntervalUnion *LIU = nullptr;
+ // LiveIntervalUnion has a parameter in its constructor so doing this
+ // dirty magic.
+ uint8_t LIUPad[sizeof(LiveIntervalUnion)];
+
+ public:
+ ~ColorAssignmentInfo() {
+ if (LIU)
+ LIU->~LiveIntervalUnion(); // Dirty magic again.
+ }
+
+ // Return true if LiveInterval overlaps with any
+ // intervals that have already been assigned to this color.
+ bool overlaps(LiveInterval *LI) const {
+ if (LIU)
+ return LiveIntervalUnion::Query(*LI, *LIU).checkInterference();
+ return SingleLI ? SingleLI->overlaps(*LI) : false;
+ }
+
+ // Add new LiveInterval to this color.
+ void add(LiveInterval *LI, LiveIntervalUnion::Allocator &Alloc) {
+ assert(!overlaps(LI));
+ if (LIU) {
+ LIU->unify(*LI, *LI);
+ } else if (SingleLI) {
+ LIU = new (LIUPad) LiveIntervalUnion(Alloc);
+ LIU->unify(*SingleLI, *SingleLI);
+ LIU->unify(*LI, *LI);
+ SingleLI = nullptr;
+ } else
+ SingleLI = LI;
+ }
+ };
+
+ LiveIntervalUnion::Allocator LIUAlloc;
+
// Assignments - Color to intervals mapping.
- SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments;
+ SmallVector<ColorAssignmentInfo, 16> Assignments;
public:
static char ID; // Pass identification
@@ -116,7 +159,6 @@ namespace {
private:
void InitializeSlots();
void ScanForSpillSlotRefs(MachineFunction &MF);
- bool OverlapWithAssignments(LiveInterval *li, int Color) const;
int ColorSlot(LiveInterval *li);
bool ColorSlots(MachineFunction &MF);
void RewriteInstruction(MachineInstr &MI, SmallVectorImpl<int> &SlotMapping,
@@ -247,19 +289,6 @@ void StackSlotColoring::InitializeSlots() {
NextColors[I] = AllColors[I].find_first();
}
-/// OverlapWithAssignments - Return true if LiveInterval overlaps with any
-/// LiveIntervals that have already been assigned to the specified color.
-bool
-StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
- const SmallVectorImpl<LiveInterval *> &OtherLIs = Assignments[Color];
- for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) {
- LiveInterval *OtherLI = OtherLIs[i];
- if (OtherLI->overlaps(*li))
- return true;
- }
- return false;
-}
-
/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot.
int StackSlotColoring::ColorSlot(LiveInterval *li) {
int Color = -1;
@@ -272,7 +301,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
// Check if it's possible to reuse any of the used colors.
Color = UsedColors[StackID].find_first();
while (Color != -1) {
- if (!OverlapWithAssignments(li, Color)) {
+ if (!Assignments[Color].overlaps(li)) {
Share = true;
++NumEliminated;
break;
@@ -298,7 +327,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
assert(MFI->getStackID(Color) == MFI->getStackID(FI));
// Record the assignment.
- Assignments[Color].push_back(li);
+ Assignments[Color].add(li, LIUAlloc);
LLVM_DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
// Change size and alignment of the allocated slot. If there are multiple
@@ -515,8 +544,6 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
OrigSizes.clear();
AllColors.clear();
UsedColors.clear();
- for (unsigned i = 0, e = Assignments.size(); i != e; ++i)
- Assignments[i].clear();
Assignments.clear();
return Changed;
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 865add28f781..5ed67bd0a121 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -427,7 +427,13 @@ void TailDuplicator::duplicateInstruction(
} else {
// For mapped registers that do not have sub-registers, simply
// restrict their class to match the original one.
- ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC);
+
+ // We don't want debug instructions affecting the resulting code so
+ // if we're cloning a debug instruction then just use MappedRC
+ // rather than constraining the register class further.
+ ConstrRC = NewMI.isDebugInstr()
+ ? MappedRC
+ : MRI->constrainRegClass(VI->second.Reg, OrigRC);
}
if (ConstrRC) {
@@ -436,16 +442,13 @@ void TailDuplicator::duplicateInstruction(
MO.setReg(VI->second.Reg);
// We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
// sub-register, we need to compose the sub-register indices.
- MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(),
- VI->second.SubReg));
+ MO.setSubReg(
+ TRI->composeSubRegIndices(VI->second.SubReg, MO.getSubReg()));
} else {
// The direct replacement is not possible, due to failing register
// class constraints. An explicit COPY is necessary. Create one
- // that can be reused
- auto *NewRC = MI->getRegClassConstraint(i, TII, TRI);
- if (NewRC == nullptr)
- NewRC = OrigRC;
- Register NewReg = MRI->createVirtualRegister(NewRC);
+ // that can be reused.
+ Register NewReg = MRI->createVirtualRegister(OrigRC);
BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(),
TII->get(TargetOpcode::COPY), NewReg)
.addReg(VI->second.Reg, 0, VI->second.SubReg);
@@ -1016,13 +1019,11 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
DenseMap<Register, RegSubRegPair> LocalVRMap;
SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
- MachineBasicBlock::iterator I = TailBB->begin();
// Process PHI instructions first.
- while (I != TailBB->end() && I->isPHI()) {
+ for (MachineInstr &MI : make_early_inc_range(TailBB->phis())) {
// Replace the uses of the def of the PHI with the register coming
// from PredBB.
- MachineInstr *MI = &*I++;
- processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
+ processPHI(&MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
}
appendCopies(PredBB, CopyInfos, Copies);
}
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 9430e86fe44d..48a2094f5d45 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -131,16 +130,6 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
}
}
-unsigned TargetFrameLowering::getStackAlignmentSkew(
- const MachineFunction &MF) const {
- // When HHVM function is called, the stack is skewed as the return address
- // is removed from the stack before we enter the function.
- if (LLVM_UNLIKELY(MF.getFunction().getCallingConv() == CallingConv::HHVM))
- return MF.getTarget().getAllocaPointerSize();
-
- return 0;
-}
-
bool TargetFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
const MachineFunction &MF) const {
if (!hasFP(MF))
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 0f6cf11ca9d1..09dcddc17b06 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/StackMaps.h"
@@ -439,8 +440,9 @@ MachineInstr &TargetInstrInfo::duplicate(MachineBasicBlock &MBB,
// If the COPY instruction in MI can be folded to a stack operation, return
// the register class to use.
static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
+ const TargetInstrInfo &TII,
unsigned FoldIdx) {
- assert(MI.isCopy() && "MI must be a COPY instruction");
+ assert(TII.isCopyInstr(MI) && "MI must be a COPY instruction");
if (MI.getNumOperands() != 2)
return nullptr;
assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
@@ -629,10 +631,10 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
}
// Straight COPY may fold as load/store.
- if (!MI.isCopy() || Ops.size() != 1)
+ if (!isCopyInstr(MI) || Ops.size() != 1)
return nullptr;
- const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
+ const TargetRegisterClass *RC = canFoldCopy(MI, *this, Ops[0]);
if (!RC)
return nullptr;
@@ -695,6 +697,61 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
return NewMI;
}
+/// transferImplicitOperands - MI is a pseudo-instruction, and the lowered
+/// replacement instructions immediately precede it. Copy any implicit
+/// operands from MI to the replacement instruction.
+static void transferImplicitOperands(MachineInstr *MI,
+ const TargetRegisterInfo *TRI) {
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+
+ Register DstReg = MI->getOperand(0).getReg();
+ for (const MachineOperand &MO : MI->implicit_operands()) {
+ CopyMI->addOperand(MO);
+
+ // Be conservative about preserving kills when subregister defs are
+ // involved. If there was implicit kill of a super-register overlapping the
+ // copy result, we would kill the subregisters previous copies defined.
+
+ if (MO.isKill() && TRI->regsOverlap(DstReg, MO.getReg()))
+ CopyMI->getOperand(CopyMI->getNumOperands() - 1).setIsKill(false);
+ }
+}
+
+void TargetInstrInfo::lowerCopy(MachineInstr *MI,
+ const TargetRegisterInfo *TRI) const {
+ if (MI->allDefsAreDead()) {
+ MI->setDesc(get(TargetOpcode::KILL));
+ return;
+ }
+
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
+
+ bool IdentityCopy = (SrcMO.getReg() == DstMO.getReg());
+ if (IdentityCopy || SrcMO.isUndef()) {
+ // No need to insert an identity copy instruction, but replace with a KILL
+ // if liveness is changed.
+ if (SrcMO.isUndef() || MI->getNumOperands() > 2) {
+ // We must make sure the super-register gets killed. Replace the
+ // instruction with KILL.
+ MI->setDesc(get(TargetOpcode::KILL));
+ return;
+ }
+ // Vanilla identity copy.
+ MI->eraseFromParent();
+ return;
+ }
+
+ copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), DstMO.getReg(),
+ SrcMO.getReg(), SrcMO.isKill());
+
+ if (MI->getNumOperands() > 2)
+ transferImplicitOperands(MI, TRI);
+ MI->eraseFromParent();
+ return;
+}
+
bool TargetInstrInfo::hasReassociableOperands(
const MachineInstr &Inst, const MachineBasicBlock *MBB) const {
const MachineOperand &Op1 = Inst.getOperand(1);
@@ -1016,6 +1073,17 @@ void TargetInstrInfo::reassociateOps(
InsInstrs.push_back(MIB2);
DelInstrs.push_back(&Prev);
DelInstrs.push_back(&Root);
+
+ // We transformed:
+ // B = A op X (Prev)
+ // C = B op Y (Root)
+ // Into:
+ // B = X op Y (MIB1)
+ // C = A op B (MIB2)
+ // C has the same value as before, B doesn't; as such, keep the debug number
+ // of C but not of B.
+ if (unsigned OldRootNum = Root.peekDebugInstrNum())
+ MIB2.getInstr()->setDebugInstrNum(OldRootNum);
}
void TargetInstrInfo::genAlternativeCodeSequence(
@@ -1037,18 +1105,20 @@ void TargetInstrInfo::genAlternativeCodeSequence(
Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
break;
default:
- break;
+ llvm_unreachable("Unknown pattern for machine combiner");
}
// Don't reassociate if Prev and Root are in different blocks.
if (Prev->getParent() != Root.getParent())
return;
- assert(Prev && "Unknown pattern for machine combiner");
-
reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg);
}
+MachineTraceStrategy TargetInstrInfo::getMachineCombinerTraceStrategy() const {
+ return MachineTraceStrategy::TS_MinInstrCount;
+}
+
bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
const MachineInstr &MI) const {
const MachineFunction &MF = *MI.getMF();
@@ -1329,11 +1399,7 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
if (Reg == DestReg)
return ParamLoadedValue(*DestSrc->Source, Expr);
- // Cases where super- or sub-registers needs to be described should
- // be handled by the target's hook implementation.
- assert(!TRI->isSuperOrSubRegisterEq(Reg, DestReg) &&
- "TargetInstrInfo::describeLoadedValue can't describe super- or "
- "sub-regs for copy instructions");
+ // If the target's hook couldn't describe this copy, give up.
return std::nullopt;
} else if (auto RegImm = isAddImmediate(MI, Reg)) {
Register SrcReg = RegImm->Reg;
@@ -1555,15 +1621,107 @@ void TargetInstrInfo::mergeOutliningCandidateAttributes(
F.addFnAttr(Attribute::NoUnwind);
}
+outliner::InstrType TargetInstrInfo::getOutliningType(
+ MachineBasicBlock::iterator &MIT, unsigned Flags) const {
+ MachineInstr &MI = *MIT;
+
+ // NOTE: MI.isMetaInstruction() will match CFI_INSTRUCTION, but some targets
+ // have support for outlining those. Special-case that here.
+ if (MI.isCFIInstruction())
+ // Just go right to the target implementation.
+ return getOutliningTypeImpl(MIT, Flags);
+
+ // Be conservative about inline assembly.
+ if (MI.isInlineAsm())
+ return outliner::InstrType::Illegal;
+
+ // Labels generally can't safely be outlined.
+ if (MI.isLabel())
+ return outliner::InstrType::Illegal;
+
+ // Don't let debug instructions impact analysis.
+ if (MI.isDebugInstr())
+ return outliner::InstrType::Invisible;
+
+ // Some other special cases.
+ switch (MI.getOpcode()) {
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::LIFETIME_START:
+ case TargetOpcode::LIFETIME_END:
+ return outliner::InstrType::Invisible;
+ default:
+ break;
+ }
+
+ // Is this a terminator for a basic block?
+ if (MI.isTerminator()) {
+ // If this is a branch to another block, we can't outline it.
+ if (!MI.getParent()->succ_empty())
+ return outliner::InstrType::Illegal;
+
+ // Don't outline if the branch is not unconditional.
+ if (isPredicated(MI))
+ return outliner::InstrType::Illegal;
+ }
+
+ // Make sure none of the operands of this instruction do anything that
+ // might break if they're moved outside their current function.
+ // This includes MachineBasicBlock references, BlockAddressses,
+ // Constant pool indices and jump table indices.
+ //
+ // A quick note on MO_TargetIndex:
+ // This doesn't seem to be used in any of the architectures that the
+ // MachineOutliner supports, but it was still filtered out in all of them.
+ // There was one exception (RISC-V), but MO_TargetIndex also isn't used there.
+ // As such, this check is removed both here and in the target-specific
+ // implementations. Instead, we assert to make sure this doesn't
+ // catch anyone off-guard somewhere down the line.
+ for (const MachineOperand &MOP : MI.operands()) {
+ // If you hit this assertion, please remove it and adjust
+ // `getOutliningTypeImpl` for your target appropriately if necessary.
+ // Adding the assertion back to other supported architectures
+ // would be nice too :)
+ assert(!MOP.isTargetIndex() && "This isn't used quite yet!");
+
+ // CFI instructions should already have been filtered out at this point.
+ assert(!MOP.isCFIIndex() && "CFI instructions handled elsewhere!");
+
+ // PrologEpilogInserter should've already run at this point.
+ assert(!MOP.isFI() && "FrameIndex instructions should be gone by now!");
+
+ if (MOP.isMBB() || MOP.isBlockAddress() || MOP.isCPI() || MOP.isJTI())
+ return outliner::InstrType::Illegal;
+ }
+
+ // If we don't know, delegate to the target-specific hook.
+ return getOutliningTypeImpl(MIT, Flags);
+}
+
bool TargetInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
unsigned &Flags) const {
// Some instrumentations create special TargetOpcode at the start which
// expands to special code sequences which must be present.
auto First = MBB.getFirstNonDebugInstr();
- if (First != MBB.end() &&
- (First->getOpcode() == TargetOpcode::FENTRY_CALL ||
- First->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_ENTER))
+ if (First == MBB.end())
+ return true;
+
+ if (First->getOpcode() == TargetOpcode::FENTRY_CALL ||
+ First->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_ENTER)
+ return false;
+
+ // Some instrumentations create special pseudo-instructions at or just before
+ // the end that must be present.
+ auto Last = MBB.getLastNonDebugInstr();
+ if (Last->getOpcode() == TargetOpcode::PATCHABLE_RET ||
+ Last->getOpcode() == TargetOpcode::PATCHABLE_TAIL_CALL)
return false;
+ if (Last != First && Last->isReturn()) {
+ --Last;
+ if (Last->getOpcode() == TargetOpcode::PATCHABLE_FUNCTION_EXIT ||
+ Last->getOpcode() == TargetOpcode::PATCHABLE_TAIL_CALL)
+ return false;
+ }
return true;
}
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index da8b87babc2d..badb7fe53333 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -29,6 +28,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/StackMaps.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -49,10 +49,10 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
#include <algorithm>
#include <cassert>
@@ -209,6 +209,18 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
if (TT.isOSOpenBSD()) {
setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr);
}
+
+ if (TT.isOSWindows() && !TT.isOSCygMing()) {
+ setLibcallName(RTLIB::LDEXP_F32, nullptr);
+ setLibcallName(RTLIB::LDEXP_F80, nullptr);
+ setLibcallName(RTLIB::LDEXP_F128, nullptr);
+ setLibcallName(RTLIB::LDEXP_PPCF128, nullptr);
+
+ setLibcallName(RTLIB::FREXP_F32, nullptr);
+ setLibcallName(RTLIB::FREXP_F80, nullptr);
+ setLibcallName(RTLIB::FREXP_F128, nullptr);
+ setLibcallName(RTLIB::FREXP_PPCF128, nullptr);
+ }
}
/// GetFPLibCall - Helper to return the right libcall for the given floating
@@ -498,6 +510,16 @@ RTLIB::Libcall RTLIB::getPOWI(EVT RetVT) {
POWI_PPCF128);
}
+RTLIB::Libcall RTLIB::getLDEXP(EVT RetVT) {
+ return getFPLibCall(RetVT, LDEXP_F32, LDEXP_F64, LDEXP_F80, LDEXP_F128,
+ LDEXP_PPCF128);
+}
+
+RTLIB::Libcall RTLIB::getFREXP(EVT RetVT) {
+ return getFPLibCall(RetVT, FREXP_F32, FREXP_F64, FREXP_F80, FREXP_F128,
+ FREXP_PPCF128);
+}
+
RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order,
MVT VT) {
unsigned ModeN, ModelN;
@@ -724,7 +746,9 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
// with the Target-specific changes necessary.
MaxAtomicSizeInBitsSupported = 1024;
- MaxDivRemBitWidthSupported = llvm::IntegerType::MAX_INT_BITS;
+ // Assume that even with libcalls, no target supports wider than 128 bit
+ // division.
+ MaxDivRemBitWidthSupported = 128;
MaxLargeFPConvertBitWidthSupported = llvm::IntegerType::MAX_INT_BITS;
@@ -819,8 +843,8 @@ void TargetLoweringBase::initActions() {
ISD::SMULO, ISD::UMULO},
VT, Expand);
- // ADDCARRY operations default to expand
- setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY, ISD::SETCCCARRY,
+ // Carry-using overflow operations default to expand.
+ setOperationAction({ISD::UADDO_CARRY, ISD::USUBO_CARRY, ISD::SETCCCARRY,
ISD::SADDO_CARRY, ISD::SSUBO_CARRY},
VT, Expand);
@@ -843,7 +867,9 @@ void TargetLoweringBase::initActions() {
setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand);
// These library functions default to expand.
- setOperationAction({ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI}, VT, Expand);
+ setOperationAction(
+ {ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP},
+ VT, Expand);
// These operations default to expand for vector types.
if (VT.isVector())
@@ -867,16 +893,22 @@ void TargetLoweringBase::initActions() {
ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_FMAX,
- ISD::VECREDUCE_FMIN, ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL},
+ ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAXIMUM, ISD::VECREDUCE_FMINIMUM,
+ ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL},
VT, Expand);
// Named vector shuffles default to expand.
setOperationAction(ISD::VECTOR_SPLICE, VT, Expand);
- // VP_SREM/UREM default to expand.
- // TODO: Expand all VP intrinsics.
- setOperationAction(ISD::VP_SREM, VT, Expand);
- setOperationAction(ISD::VP_UREM, VT, Expand);
+ // VP operations default to expand.
+#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \
+ setOperationAction(ISD::SDOPC, VT, Expand);
+#include "llvm/IR/VPIntrinsics.def"
+
+ // FP environment operations default to expand.
+ setOperationAction(ISD::GET_FPENV, VT, Expand);
+ setOperationAction(ISD::SET_FPENV, VT, Expand);
+ setOperationAction(ISD::RESET_FPENV, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
@@ -907,6 +939,9 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand);
+
+ setOperationAction(ISD::GET_FPENV_MEM, MVT::Other, Expand);
+ setOperationAction(ISD::SET_FPENV_MEM, MVT::Other, Expand);
}
MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
@@ -1137,8 +1172,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
unsigned LaneSizeInBits = NewVT.getScalarSizeInBits();
// Convert sizes such as i33 to i64.
- if (!isPowerOf2_32(LaneSizeInBits))
- LaneSizeInBits = NextPowerOf2(LaneSizeInBits);
+ LaneSizeInBits = llvm::bit_ceil(LaneSizeInBits);
MVT DestVT = TLI->getRegisterType(NewVT);
RegisterVT = DestVT;
@@ -1627,7 +1661,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,
if (EVT(DestVT).bitsLT(NewVT)) { // Value is expanded, e.g. i64 -> i16.
TypeSize NewVTSize = NewVT.getSizeInBits();
// Convert sizes such as i33 to i64.
- if (!isPowerOf2_32(NewVTSize.getKnownMinValue()))
+ if (!llvm::has_single_bit<uint32_t>(NewVTSize.getKnownMinValue()))
NewVTSize = NewVTSize.coefficientNextPowerOf2();
return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
}
@@ -1691,7 +1725,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
// conventions. The frontend should mark functions whose return values
// require promoting with signext or zeroext attributes.
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
- MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+ MVT MinVT = TLI.getRegisterType(MVT::i32);
if (VT.bitsLT(MinVT))
VT = MinVT;
}
@@ -1976,9 +2010,10 @@ void TargetLoweringBase::insertSSPDeclarations(Module &M) const {
"__stack_chk_guard");
// FreeBSD has "__stack_chk_guard" defined externally on libc.so
- if (TM.getRelocationModel() == Reloc::Static &&
+ if (M.getDirectAccessExternalData() &&
!TM.getTargetTriple().isWindowsGNUEnvironment() &&
- !TM.getTargetTriple().isOSFreeBSD())
+ !TM.getTargetTriple().isOSFreeBSD() &&
+ !TM.getTargetTriple().isOSDarwin())
GV->setDSOLocal(true);
}
}
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index e760564779c2..3994552884c4 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -65,12 +64,17 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <string>
using namespace llvm;
using namespace dwarf;
+static cl::opt<bool> JumpTableInFunctionSection(
+ "jumptable-in-function-section", cl::Hidden, cl::init(false),
+ cl::desc("Putting Jump Table in function section"));
+
static void GetObjCImageInfo(Module &M, unsigned &Version, unsigned &Flags,
StringRef &Section) {
SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
@@ -182,26 +186,14 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
// The small model guarantees static code/data size < 4GB, but not where it
// will be in memory. Most of these could end up >2GB away so even a signed
// pc-relative 32-bit address is insufficient, theoretically.
- if (isPositionIndependent()) {
- // ILP32 uses sdata4 instead of sdata8
- if (TgtM.getTargetTriple().getEnvironment() == Triple::GNUILP32) {
- PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4;
- LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata4;
- } else {
- PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata8;
- LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8;
- TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
- dwarf::DW_EH_PE_sdata8;
- }
- } else {
- PersonalityEncoding = dwarf::DW_EH_PE_absptr;
- LSDAEncoding = dwarf::DW_EH_PE_absptr;
- TTypeEncoding = dwarf::DW_EH_PE_absptr;
- }
+ //
+ // Use DW_EH_PE_indirect even for -fno-pic to avoid copy relocations.
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel |
+ (TgtM.getTargetTriple().getEnvironment() == Triple::GNUILP32
+ ? dwarf::DW_EH_PE_sdata4
+ : dwarf::DW_EH_PE_sdata8);
+ PersonalityEncoding = LSDAEncoding | dwarf::DW_EH_PE_indirect;
+ TTypeEncoding = LSDAEncoding | dwarf::DW_EH_PE_indirect;
break;
case Triple::lanai:
LSDAEncoding = dwarf::DW_EH_PE_absptr;
@@ -591,14 +583,7 @@ static const MCSymbolELF *getLinkedToSymbol(const GlobalObject *GO,
if (!MD)
return nullptr;
- const MDOperand &Op = MD->getOperand(0);
- if (!Op.get())
- return nullptr;
-
- auto *VM = dyn_cast<ValueAsMetadata>(Op);
- if (!VM)
- report_fatal_error("MD_associated operand is not ValueAsMetadata");
-
+ auto *VM = cast<ValueAsMetadata>(MD->getOperand(0).get());
auto *OtherGV = dyn_cast<GlobalValue>(VM->getValue());
return OtherGV ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGV)) : nullptr;
}
@@ -629,21 +614,21 @@ static unsigned getEntrySizeForKind(SectionKind Kind) {
/// Return the section prefix name used by options FunctionsSections and
/// DataSections.
-static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
+static StringRef getSectionPrefixForGlobal(SectionKind Kind, bool IsLarge) {
if (Kind.isText())
return ".text";
if (Kind.isReadOnly())
- return ".rodata";
+ return IsLarge ? ".lrodata" : ".rodata";
if (Kind.isBSS())
- return ".bss";
+ return IsLarge ? ".lbss" : ".bss";
if (Kind.isThreadData())
return ".tdata";
if (Kind.isThreadBSS())
return ".tbss";
if (Kind.isData())
- return ".data";
+ return IsLarge ? ".ldata" : ".data";
if (Kind.isReadOnlyWithRel())
- return ".data.rel.ro";
+ return IsLarge ? ".ldata.rel.ro" : ".data.rel.ro";
llvm_unreachable("Unknown section kind");
}
@@ -665,7 +650,10 @@ getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
Name = ".rodata.cst";
Name += utostr(EntrySize);
} else {
- Name = getSectionPrefixForGlobal(Kind);
+ bool IsLarge = false;
+ if (isa<GlobalVariable>(GO))
+ IsLarge = TM.isLargeData();
+ Name = getSectionPrefixForGlobal(Kind, IsLarge);
}
bool HasPrefix = false;
@@ -867,6 +855,12 @@ static MCSectionELF *selectELFSectionForGlobal(
Group = C->getName();
IsComdat = C->getSelectionKind() == Comdat::Any;
}
+ if (isa<GlobalVariable>(GO)) {
+ if (TM.isLargeData()) {
+ assert(TM.getTargetTriple().getArch() == Triple::x86_64);
+ Flags |= ELF::SHF_X86_64_LARGE;
+ }
+ }
// Get the section entry size based on the kind.
unsigned EntrySize = getEntrySizeForKind(Kind);
@@ -1217,11 +1211,12 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
MCSection *TargetLoweringObjectFileMachO::getStaticDtorSection(
unsigned Priority, const MCSymbol *KeySym) const {
- // TODO(yln): Remove -lower-global-dtors-via-cxa-atexit fallback flag
- // (LowerGlobalDtorsViaCxaAtExit) and always issue a fatal error here.
- if (TM->Options.LowerGlobalDtorsViaCxaAtExit)
- report_fatal_error("@llvm.global_dtors should have been lowered already");
return StaticDtorSection;
+ // In userspace, we lower global destructors via atexit(), but kernel/kext
+ // environments do not provide this function so we still need to support the
+ // legacy way here.
+ // See the -disable-atexit-based-global-dtor-lowering CodeGen flag for more
+ // context.
}
void TargetLoweringObjectFileMachO::emitModuleMetadata(MCStreamer &Streamer,
@@ -1282,6 +1277,20 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
StringRef SectionName = GO->getSection();
+ const GlobalVariable *GV = dyn_cast<GlobalVariable>(GO);
+ if (GV && GV->hasImplicitSection()) {
+ auto Attrs = GV->getAttributes();
+ if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) {
+ SectionName = Attrs.getAttribute("bss-section").getValueAsString();
+ } else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) {
+ SectionName = Attrs.getAttribute("rodata-section").getValueAsString();
+ } else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) {
+ SectionName = Attrs.getAttribute("relro-section").getValueAsString();
+ } else if (Attrs.hasAttribute("data-section") && Kind.isData()) {
+ SectionName = Attrs.getAttribute("data-section").getValueAsString();
+ }
+ }
+
const Function *F = dyn_cast<Function>(GO);
if (F && F->hasFnAttribute("implicit-section-name")) {
SectionName = F->getFnAttribute("implicit-section-name").getValueAsString();
@@ -1411,6 +1420,11 @@ MCSection *TargetLoweringObjectFileMachO::getSectionForConstant(
return ReadOnlySection; // .const
}
+MCSection *TargetLoweringObjectFileMachO::getSectionForCommandLines() const {
+ return getContext().getMachOSection("__TEXT", "__command_line", 0,
+ SectionKind::getReadOnly());
+}
+
const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
MachineModuleInfo *MMI, MCStreamer &Streamer) const {
@@ -1796,6 +1810,19 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);
}
+bool TargetLoweringObjectFileCOFF::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ if (TM->getTargetTriple().getArch() == Triple::x86_64) {
+ if (!JumpTableInFunctionSection) {
+ // We can always create relative relocations, so use another section
+ // that can be marked non-executable.
+ return false;
+ }
+ }
+ return TargetLoweringObjectFile::shouldPutJumpTableInFunctionSection(
+ UsesLabelDifference, F);
+}
+
void TargetLoweringObjectFileCOFF::emitModuleMetadata(MCStreamer &Streamer,
Module &M) const {
emitLinkerDirectives(Streamer, M);
@@ -2152,7 +2179,7 @@ static MCSectionWasm *selectWasmSectionForGlobal(
}
bool UniqueSectionNames = TM.getUniqueSectionNames();
- SmallString<128> Name = getSectionPrefixForGlobal(Kind);
+ SmallString<128> Name = getSectionPrefixForGlobal(Kind, /*IsLarge=*/false);
if (const auto *F = dyn_cast<Function>(GO)) {
const auto &OptionalPrefix = F->getSectionPrefix();
@@ -2335,8 +2362,11 @@ MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
XCOFF::StorageMappingClass MappingClass;
if (Kind.isText())
MappingClass = XCOFF::XMC_PR;
- else if (Kind.isData() || Kind.isReadOnlyWithRel() || Kind.isBSS())
+ else if (Kind.isData() || Kind.isBSS())
MappingClass = XCOFF::XMC_RW;
+ else if (Kind.isReadOnlyWithRel())
+ MappingClass =
+ TM.Options.XCOFFReadOnlyPointers ? XCOFF::XMC_RO : XCOFF::XMC_RW;
else if (Kind.isReadOnly())
MappingClass = XCOFF::XMC_RO;
else
@@ -2421,9 +2451,18 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
return TextSection;
}
- // TODO: We may put Kind.isReadOnlyWithRel() under option control, because
- // user may want to have read-only data with relocations placed into a
- // read-only section by the compiler.
+ if (TM.Options.XCOFFReadOnlyPointers && Kind.isReadOnlyWithRel()) {
+ if (!TM.getDataSections())
+ report_fatal_error(
+ "ReadOnlyPointers is supported only if data sections is turned on");
+
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ return getContext().getXCOFFSection(
+ Name, SectionKind::getReadOnly(),
+ XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD));
+ }
+
// For BSS kind, zero initialized data must be emitted to the .data section
// because external linkage control sections that get mapped to the .bss
// section will be linked as tentative defintions, which is only appropriate
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 3127328c363e..98ea2f21b3c8 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -42,6 +42,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/Threading.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/WithColor.h"
#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Scalar.h"
@@ -99,6 +101,9 @@ static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden,
cl::desc("Disable Copy Propagation pass"));
static cl::opt<bool> DisablePartialLibcallInlining("disable-partial-libcall-inlining",
cl::Hidden, cl::desc("Disable Partial Libcall Inlining"));
+static cl::opt<bool> DisableAtExitBasedGlobalDtorLowering(
+ "disable-atexit-based-global-dtor-lowering", cl::Hidden,
+ cl::desc("For MachO, disable atexit()-based global destructor lowering"));
static cl::opt<bool> EnableImplicitNullChecks(
"enable-implicit-null-checks",
cl::desc("Fold null checks into faulting memory operations"),
@@ -168,12 +173,6 @@ static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort(
clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2",
"Disable the abort but emit a diagnostic on failure")));
-// An option that disables inserting FS-AFDO discriminators before emit.
-// This is mainly for debugging and tuning purpose.
-static cl::opt<bool>
- FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden,
- cl::desc("Do not insert FS-AFDO discriminators before "
- "emit."));
// Disable MIRProfileLoader before RegAlloc. This is for for debugging and
// tuning purpose.
static cl::opt<bool> DisableRAFSProfileLoader(
@@ -878,7 +877,7 @@ void TargetPassConfig::addIRPasses() {
// For MachO, lower @llvm.global_dtors into @llvm.global_ctors with
// __cxa_atexit() calls to avoid emitting the deprecated __mod_term_func.
if (TM->getTargetTriple().isOSBinFormatMachO() &&
- TM->Options.LowerGlobalDtorsViaCxaAtExit)
+ !DisableAtExitBasedGlobalDtorLowering)
addPass(createLowerGlobalDtorsLegacyPass());
// Make sure that no unreachable blocks are instruction selected.
@@ -977,6 +976,8 @@ void TargetPassConfig::addISelPrepare() {
if (requiresCodeGenSCCOrder())
addPass(new DummyCGSCCPass);
+ addPass(createCallBrPass());
+
// Add both the safe stack and the stack protection passes: each of them will
// only protect functions that have corresponding attributes.
addPass(createSafeStackPass());
@@ -1082,8 +1083,8 @@ bool TargetPassConfig::addISelPasses() {
if (TM->useEmulatedTLS())
addPass(createLowerEmuTLSPass());
- addPass(createPreISelIntrinsicLoweringPass());
PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+ addPass(createPreISelIntrinsicLoweringPass());
addPass(createExpandLargeDivRemPass());
addPass(createExpandLargeFpConvertPass());
addIRPasses();
@@ -1149,9 +1150,9 @@ void TargetPassConfig::addMachinePasses() {
sampleprof::FSDiscriminatorPass::Pass1));
const std::string ProfileFile = getFSProfileFile(TM);
if (!ProfileFile.empty() && !DisableRAFSProfileLoader)
- addPass(
- createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
- sampleprof::FSDiscriminatorPass::Pass1));
+ addPass(createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::Pass1,
+ nullptr));
}
// Run register allocation and passes that are tightly coupled with it,
@@ -1219,14 +1220,6 @@ void TargetPassConfig::addMachinePasses() {
addPass(&XRayInstrumentationID);
addPass(&PatchableFunctionID);
- if (EnableFSDiscriminator && !FSNoFinalDiscrim)
- // Add FS discriminators here so that all the instruction duplicates
- // in different BBs get their own discriminators. With this, we can "sum"
- // the SampleFDO counters instead of using MAX. This will improve the
- // SampleFDO profile quality.
- addPass(createMIRAddFSDiscriminatorsPass(
- sampleprof::FSDiscriminatorPass::PassLast));
-
addPreEmitPass();
if (TM->Options.EnableIPRA)
@@ -1252,6 +1245,10 @@ void TargetPassConfig::addMachinePasses() {
addPass(createMachineOutlinerPass(RunOnAllFunctions));
}
+ if (EnableFSDiscriminator)
+ addPass(createMIRAddFSDiscriminatorsPass(
+ sampleprof::FSDiscriminatorPass::PassLast));
+
// Machine function splitter uses the basic block sections feature. Both
// cannot be enabled at the same time. Basic block sections takes precedence.
// FIXME: In principle, BasicBlockSection::Labels and splitting can used
@@ -1264,9 +1261,25 @@ void TargetPassConfig::addMachinePasses() {
addPass(llvm::createBasicBlockSectionsPass());
} else if (TM->Options.EnableMachineFunctionSplitter ||
EnableMachineFunctionSplitter) {
+ const std::string ProfileFile = getFSProfileFile(TM);
+ if (!ProfileFile.empty()) {
+ if (EnableFSDiscriminator) {
+ addPass(createMIRProfileLoaderPass(
+ ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::PassLast, nullptr));
+ } else {
+ // Sample profile is given, but FSDiscriminator is not
+ // enabled, this may result in performance regression.
+ WithColor::warning()
+ << "Using AutoFDO without FSDiscriminator for MFS may regress "
+ "performance.";
+ }
+ }
addPass(createMachineFunctionSplitterPass());
}
+ addPostBBSections();
+
if (!DisableCFIFixup && TM->Options.EnableCFIFixup)
addPass(createCFIFixup());
@@ -1525,9 +1538,9 @@ void TargetPassConfig::addBlockPlacement() {
sampleprof::FSDiscriminatorPass::Pass2));
const std::string ProfileFile = getFSProfileFile(TM);
if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader)
- addPass(
- createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
- sampleprof::FSDiscriminatorPass::Pass2));
+ addPass(createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+ sampleprof::FSDiscriminatorPass::Pass2,
+ nullptr));
}
if (addPass(&MachineBlockPlacementID)) {
// Run a separate pass to collect block placement statistics.
diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index a41d5999d961..77d2dfcf2323 100644
--- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -33,7 +34,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
@@ -79,8 +79,8 @@ bool TargetRegisterInfo::shouldRegionSplitForVirtReg(
void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet,
MCRegister Reg) const {
- for (MCSuperRegIterator AI(Reg, this, true); AI.isValid(); ++AI)
- RegisterSet.set(*AI);
+ for (MCPhysReg SR : superregs_inclusive(Reg))
+ RegisterSet.set(SR);
}
bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
@@ -90,9 +90,9 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
for (unsigned Reg : RegisterSet.set_bits()) {
if (Checked[Reg])
continue;
- for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) {
- if (!RegisterSet[*SR] && !is_contained(Exceptions, Reg)) {
- dbgs() << "Error: Super register " << printReg(*SR, this)
+ for (MCPhysReg SR : superregs(Reg)) {
+ if (!RegisterSet[SR] && !is_contained(Exceptions, Reg)) {
+ dbgs() << "Error: Super register " << printReg(SR, this)
<< " of reserved register " << printReg(Reg, this)
<< " is not reserved.\n";
return false;
@@ -100,7 +100,7 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
// We transitively check superregs. So we can remember this for later
// to avoid compiletime explosion in deep register hierarchies.
- Checked.set(*SR);
+ Checked.set(SR);
}
}
return true;
@@ -281,7 +281,7 @@ const TargetRegisterClass *firstCommonClass(const uint32_t *A,
const TargetRegisterInfo *TRI) {
for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
if (unsigned Common = *A++ & *B++)
- return TRI->getRegClass(I + countTrailingZeros(Common));
+ return TRI->getRegClass(I + llvm::countr_zero(Common));
return nullptr;
}
@@ -424,8 +424,8 @@ bool TargetRegisterInfo::getRegAllocationHints(
SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo &MRI = MF.getRegInfo();
- const std::pair<Register, SmallVector<Register, 4>> &Hints_MRI =
- MRI.getRegAllocationHints(VirtReg);
+ const std::pair<unsigned, SmallVector<Register, 4>> &Hints_MRI =
+ MRI.getRegAllocationHints(VirtReg);
SmallSet<Register, 32> HintedRegs;
// First hint may be a target hint.
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 8cb3667aea28..c3ea76bf8cea 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -87,18 +87,18 @@ static cl::opt<unsigned> MaxDataFlowEdge(
namespace {
class TwoAddressInstructionPass : public MachineFunctionPass {
- MachineFunction *MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const InstrItineraryData *InstrItins;
- MachineRegisterInfo *MRI;
- LiveVariables *LV;
- LiveIntervals *LIS;
- AliasAnalysis *AA;
- CodeGenOpt::Level OptLevel;
+ MachineFunction *MF = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const InstrItineraryData *InstrItins = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ LiveVariables *LV = nullptr;
+ LiveIntervals *LIS = nullptr;
+ AliasAnalysis *AA = nullptr;
+ CodeGenOpt::Level OptLevel = CodeGenOpt::None;
// The current basic block being processed.
- MachineBasicBlock *MBB;
+ MachineBasicBlock *MBB = nullptr;
// Keep track the distance of a MI from the start of the current basic block.
DenseMap<MachineInstr*, unsigned> DistanceMap;
@@ -198,8 +198,6 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE,
"Two-Address instruction pass", false, false)
-static bool isPlainlyKilled(MachineInstr *MI, Register Reg, LiveIntervals *LIS);
-
/// Return the MachineInstr* if it is the single def of the Reg in current BB.
static MachineInstr *getSingleDef(Register Reg, MachineBasicBlock *BB,
const MachineRegisterInfo *MRI) {
@@ -287,7 +285,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
/// Test if the given register value, which is used by the
/// given instruction, is killed by the given instruction.
-static bool isPlainlyKilled(MachineInstr *MI, Register Reg,
+static bool isPlainlyKilled(const MachineInstr *MI, Register Reg,
LiveIntervals *LIS) {
if (LIS && Reg.isVirtual() && !LIS->isNotInMIMap(*MI)) {
// FIXME: Sometimes tryInstructionTransform() will add instructions and
@@ -311,6 +309,12 @@ static bool isPlainlyKilled(MachineInstr *MI, Register Reg,
return MI->killsRegister(Reg);
}
+/// Test if the register used by the given operand is killed by the operand's
+/// instruction.
+static bool isPlainlyKilled(const MachineOperand &MO, LiveIntervals *LIS) {
+ return MO.isKill() || isPlainlyKilled(MO.getParent(), MO.getReg(), LIS);
+}
+
/// Test if the given register value, which is used by the given
/// instruction, is killed by the given instruction. This looks through
/// coalescable copies to see if the original value is potentially not killed.
@@ -404,7 +408,7 @@ findOnlyInterestingUse(Register Reg, MachineBasicBlock *MBB,
}
if (UseMI.isCommutable()) {
unsigned Src1 = TargetInstrInfo::CommuteAnyOperandIndex;
- unsigned Src2 = UseMI.getOperandNo(UseOp);
+ unsigned Src2 = UseOp->getOperandNo();
if (TII->findCommutedOpIndices(UseMI, Src1, Src2)) {
MachineOperand &MO = UseMI.getOperand(Src1);
if (MO.isReg() && MO.isUse() &&
@@ -693,10 +697,8 @@ bool TwoAddressInstructionPass::convertInstTo3Addr(
assert(NewMI->getNumExplicitDefs() == 1);
// Find the old and new def location.
- auto OldIt = mi->defs().begin();
- auto NewIt = NewMI->defs().begin();
- unsigned OldIdx = mi->getOperandNo(OldIt);
- unsigned NewIdx = NewMI->getOperandNo(NewIt);
+ unsigned OldIdx = mi->defs().begin()->getOperandNo();
+ unsigned NewIdx = NewMI->defs().begin()->getOperandNo();
// Record that one def has been replaced by the other.
unsigned NewInstrNum = NewMI->getDebugInstrNum();
@@ -863,8 +865,7 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
Defs.push_back(MOReg);
else {
Uses.push_back(MOReg);
- if (MOReg != Reg && (MO.isKill() ||
- (LIS && isPlainlyKilled(MI, MOReg, LIS))))
+ if (MOReg != Reg && isPlainlyKilled(MO, LIS))
Kills.push_back(MOReg);
}
}
@@ -915,8 +916,7 @@ bool TwoAddressInstructionPass::rescheduleMIBelowKill(
} else {
if (regOverlapsSet(Defs, MOReg, TRI))
return false;
- bool isKill =
- MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS));
+ bool isKill = isPlainlyKilled(MO, LIS);
if (MOReg != Reg && ((isKill && regOverlapsSet(Uses, MOReg, TRI)) ||
regOverlapsSet(Kills, MOReg, TRI)))
// Don't want to extend other live ranges and update kills.
@@ -1044,7 +1044,7 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
continue;
if (isDefTooClose(MOReg, DI->second, MI))
return false;
- bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS));
+ bool isKill = isPlainlyKilled(MO, LIS);
if (MOReg == Reg && !isKill)
return false;
Uses.push_back(MOReg);
@@ -1086,8 +1086,7 @@ bool TwoAddressInstructionPass::rescheduleKillAboveMI(
if (regOverlapsSet(Kills, MOReg, TRI))
// Don't want to extend other live ranges and update kills.
return false;
- if (&OtherMI != MI && MOReg == Reg &&
- !(MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS))))
+ if (&OtherMI != MI && MOReg == Reg && !isPlainlyKilled(MO, LIS))
// We can't schedule across a use of the register in question.
return false;
} else {
@@ -1533,8 +1532,8 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
S.addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
}
} else {
- for (MCRegUnitIterator Unit(RegA, TRI); Unit.isValid(); ++Unit) {
- if (LiveRange *LR = LIS->getCachedRegUnit(*Unit)) {
+ for (MCRegUnit Unit : TRI->regunits(RegA)) {
+ if (LiveRange *LR = LIS->getCachedRegUnit(Unit)) {
VNInfo *VNI =
LR->getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
LR->addSegment(LiveRange::Segment(LastCopyIdx, endIdx, VNI));
@@ -1566,8 +1565,8 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
if (AllUsesCopied) {
LaneBitmask RemainingUses = LaneBitmask::getNone();
// Replace other (un-tied) uses of regB with LastCopiedReg.
- for (MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ for (MachineOperand &MO : MI->all_uses()) {
+ if (MO.getReg() == RegB) {
if (MO.getSubReg() == SubRegB && !IsEarlyClobber) {
if (MO.isKill()) {
MO.setIsKill(false);
@@ -1619,8 +1618,8 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
// regB is still used in this instruction, but a kill flag was
// removed from a different tied use of regB, so now we need to add
// a kill flag to one of the remaining uses of regB.
- for (MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) {
+ for (MachineOperand &MO : MI->all_uses()) {
+ if (MO.getReg() == RegB) {
MO.setIsKill(true);
break;
}
diff --git a/llvm/lib/CodeGen/TypePromotion.cpp b/llvm/lib/CodeGen/TypePromotion.cpp
index e6c0b3242d67..426292345a14 100644
--- a/llvm/lib/CodeGen/TypePromotion.cpp
+++ b/llvm/lib/CodeGen/TypePromotion.cpp
@@ -235,8 +235,6 @@ bool TypePromotionImpl::isSource(Value *V) {
return true;
else if (isa<LoadInst>(V))
return true;
- else if (isa<BitCastInst>(V))
- return true;
else if (auto *Call = dyn_cast<CallInst>(V))
return Call->hasRetAttr(Attribute::AttrKind::ZExt);
else if (auto *Trunc = dyn_cast<TruncInst>(V))
@@ -724,8 +722,9 @@ bool TypePromotionImpl::isSupportedValue(Value *V) {
case Instruction::Ret:
case Instruction::Load:
case Instruction::Trunc:
- case Instruction::BitCast:
return isSupportedType(I);
+ case Instruction::BitCast:
+ return I->getOperand(0)->getType() == I->getType();
case Instruction::ZExt:
return isSupportedType(I->getOperand(0));
case Instruction::ICmp:
@@ -960,8 +959,8 @@ bool TypePromotionImpl::run(Function &F, const TargetMachine *TM,
if (isa<ZExtInst>(&I) && isa<PHINode>(I.getOperand(0)) &&
isa<IntegerType>(I.getType()) && BBIsInLoop(&BB)) {
- LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: " << I.getOperand(0)
- << "\n");
+ LLVM_DEBUG(dbgs() << "IR Promotion: Searching from: "
+ << *I.getOperand(0) << "\n");
EVT ZExtVT = TLI->getValueType(DL, I.getType());
Instruction *Phi = static_cast<Instruction *>(I.getOperand(0));
auto PromoteWidth = ZExtVT.getFixedSizeInBits();
diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 5e8514f525e9..f17450d264ba 100644
--- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -120,16 +120,14 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
while (BB.succ_begin() != BB.succ_end()) {
MachineBasicBlock* succ = *BB.succ_begin();
- MachineBasicBlock::iterator start = succ->begin();
- while (start != succ->end() && start->isPHI()) {
- for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2)
- if (start->getOperand(i).isMBB() &&
- start->getOperand(i).getMBB() == &BB) {
- start->removeOperand(i);
- start->removeOperand(i-1);
+ for (MachineInstr &Phi : succ->phis()) {
+ for (unsigned i = Phi.getNumOperands() - 1; i >= 2; i -= 2) {
+ if (Phi.getOperand(i).isMBB() &&
+ Phi.getOperand(i).getMBB() == &BB) {
+ Phi.removeOperand(i);
+ Phi.removeOperand(i - 1);
}
-
- start++;
+ }
}
BB.removeSuccessor(BB.succ_begin());
@@ -152,18 +150,18 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
// Prune unneeded PHI entries.
SmallPtrSet<MachineBasicBlock*, 8> preds(BB.pred_begin(),
BB.pred_end());
- MachineBasicBlock::iterator phi = BB.begin();
- while (phi != BB.end() && phi->isPHI()) {
- for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2)
- if (!preds.count(phi->getOperand(i).getMBB())) {
- phi->removeOperand(i);
- phi->removeOperand(i-1);
+ for (MachineInstr &Phi : make_early_inc_range(BB.phis())) {
+ for (unsigned i = Phi.getNumOperands() - 1; i >= 2; i -= 2) {
+ if (!preds.count(Phi.getOperand(i).getMBB())) {
+ Phi.removeOperand(i);
+ Phi.removeOperand(i - 1);
ModifiedPHI = true;
}
+ }
- if (phi->getNumOperands() == 3) {
- const MachineOperand &Input = phi->getOperand(1);
- const MachineOperand &Output = phi->getOperand(0);
+ if (Phi.getNumOperands() == 3) {
+ const MachineOperand &Input = Phi.getOperand(1);
+ const MachineOperand &Output = Phi.getOperand(0);
Register InputReg = Input.getReg();
Register OutputReg = Output.getReg();
assert(Output.getSubReg() == 0 && "Cannot have output subregister");
@@ -182,16 +180,13 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
// insert a COPY instead of simply replacing the output
// with the input.
const TargetInstrInfo *TII = F.getSubtarget().getInstrInfo();
- BuildMI(BB, BB.getFirstNonPHI(), phi->getDebugLoc(),
+ BuildMI(BB, BB.getFirstNonPHI(), Phi.getDebugLoc(),
TII->get(TargetOpcode::COPY), OutputReg)
.addReg(InputReg, getRegState(Input), InputSub);
}
- phi++->eraseFromParent();
+ Phi.eraseFromParent();
}
- continue;
}
-
- ++phi;
}
}
diff --git a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
index 88460971338c..fc1cbfefb0db 100644
--- a/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
+++ b/llvm/lib/CodeGen/VLIWMachineScheduler.cpp
@@ -209,7 +209,7 @@ void VLIWMachineScheduler::schedule() {
Topo.InitDAGTopologicalSorting();
// Postprocess the DAG to add platform-specific artificial dependencies.
- postprocessDAG();
+ postProcessDAG();
SmallVector<SUnit *, 8> TopRoots, BotRoots;
findRootsAndBiasEdges(TopRoots, BotRoots);
diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index 608434800bc3..d514e1642e29 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -10,6 +10,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Support/WithColor.h"
@@ -173,9 +174,20 @@ std::string EVT::getEVTString() const {
case MVT::Untyped: return "Untyped";
case MVT::funcref: return "funcref";
case MVT::externref: return "externref";
+ case MVT::aarch64svcount:
+ return "aarch64svcount";
+ case MVT::spirvbuiltin:
+ return "spirvbuiltin";
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void EVT::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
/// getTypeForEVT - This method returns an LLVM type corresponding to the
/// specified EVT. For integer types, this returns an unsigned type. Note
/// that this will abort for types that cannot be represented.
@@ -202,14 +214,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::f128: return Type::getFP128Ty(Context);
case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
case MVT::x86mmx: return Type::getX86_MMXTy(Context);
+ case MVT::aarch64svcount:
+ return TargetExtType::get(Context, "aarch64.svcount");
case MVT::x86amx: return Type::getX86_AMXTy(Context);
case MVT::i64x8: return IntegerType::get(Context, 512);
- case MVT::externref:
- // pointer to opaque struct in addrspace(10)
- return PointerType::get(StructType::create(Context), 10);
- case MVT::funcref:
- // pointer to i8 addrspace(20)
- return PointerType::get(Type::getInt8Ty(Context), 20);
+ case MVT::externref: return Type::getWasm_ExternrefTy(Context);
+ case MVT::funcref: return Type::getWasm_FuncrefTy(Context);
case MVT::v1i1:
return FixedVectorType::get(Type::getInt1Ty(Context), 1);
case MVT::v2i1:
@@ -561,6 +571,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
/// pointers as MVT::iPTR. If HandleUnknown is true, unknown types are returned
/// as Other, otherwise they are invalid.
MVT MVT::getVT(Type *Ty, bool HandleUnknown){
+ assert(Ty != nullptr && "Invalid type");
switch (Ty->getTypeID()) {
default:
if (HandleUnknown) return MVT(MVT::Other);
@@ -575,6 +586,16 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){
case Type::DoubleTyID: return MVT(MVT::f64);
case Type::X86_FP80TyID: return MVT(MVT::f80);
case Type::X86_MMXTyID: return MVT(MVT::x86mmx);
+ case Type::TargetExtTyID: {
+ TargetExtType *TargetExtTy = cast<TargetExtType>(Ty);
+ if (TargetExtTy->getName() == "aarch64.svcount")
+ return MVT(MVT::aarch64svcount);
+ else if (TargetExtTy->getName().starts_with("spirv."))
+ return MVT(MVT::spirvbuiltin);
+ if (HandleUnknown)
+ return MVT(MVT::Other);
+ llvm_unreachable("Unknown target ext type!");
+ }
case Type::X86_AMXTyID: return MVT(MVT::x86amx);
case Type::FP128TyID: return MVT(MVT::f128);
case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
@@ -607,3 +628,15 @@ EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
}
}
}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void MVT::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
+void MVT::print(raw_ostream &OS) const {
+ OS << EVT(*this).getEVTString();
+}
+
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index f80b06d7e9b7..a816bd5b52de 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -116,10 +116,10 @@ bool VirtRegMap::hasPreferredPhys(Register VirtReg) const {
}
bool VirtRegMap::hasKnownPreference(Register VirtReg) const {
- std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg);
- if (Register::isPhysicalRegister(Hint.second))
+ std::pair<unsigned, Register> Hint = MRI->getRegAllocationHint(VirtReg);
+ if (Hint.second.isPhysical())
return true;
- if (Register::isVirtualRegister(Hint.second))
+ if (Hint.second.isVirtual())
return hasPhys(Hint.second);
return false;
}
@@ -181,14 +181,14 @@ LLVM_DUMP_METHOD void VirtRegMap::dump() const {
namespace {
class VirtRegRewriter : public MachineFunctionPass {
- MachineFunction *MF;
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
- MachineRegisterInfo *MRI;
- SlotIndexes *Indexes;
- LiveIntervals *LIS;
- VirtRegMap *VRM;
- LiveDebugVariables *DebugVars;
+ MachineFunction *MF = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ MachineRegisterInfo *MRI = nullptr;
+ SlotIndexes *Indexes = nullptr;
+ LiveIntervals *LIS = nullptr;
+ VirtRegMap *VRM = nullptr;
+ LiveDebugVariables *DebugVars = nullptr;
DenseSet<Register> RewriteRegs;
bool ClearVirtRegs;
@@ -514,8 +514,8 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
SlotIndex MIIndex = LIS->getInstructionIndex(MI);
SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex();
- for (MCRegUnitIterator Unit(SuperPhysReg, TRI); Unit.isValid(); ++Unit) {
- const LiveRange &UnitRange = LIS->getRegUnit(*Unit);
+ for (MCRegUnit Unit : TRI->regunits(SuperPhysReg)) {
+ const LiveRange &UnitRange = LIS->getRegUnit(Unit);
// If the regunit is live both before and after MI,
// we assume it is live through.
// Generally speaking, this is not true, because something like
@@ -633,9 +633,8 @@ void VirtRegRewriter::rewrite() {
// Don't bother maintaining accurate LiveIntervals for registers which were
// already allocated.
for (Register PhysReg : RewriteRegs) {
- for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid();
- ++Units) {
- LIS->removeRegUnit(*Units);
+ for (MCRegUnit Unit : TRI->regunits(PhysReg)) {
+ LIS->removeRegUnit(Unit);
}
}
}
diff --git a/llvm/lib/CodeGen/WasmEHPrepare.cpp b/llvm/lib/CodeGen/WasmEHPrepare.cpp
index 361f185243b1..cc04807e8455 100644
--- a/llvm/lib/CodeGen/WasmEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WasmEHPrepare.cpp
@@ -80,6 +80,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WasmEHFuncInfo.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/InitializePasses.h"
@@ -209,6 +210,12 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
if (CatchPads.empty() && CleanupPads.empty())
return false;
+ if (!F.hasPersonalityFn() ||
+ !isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) {
+ report_fatal_error("Function '" + F.getName() +
+ "' does not have a correct Wasm personality function "
+ "'__gxx_wasm_personality_v0'");
+ }
assert(F.hasPersonalityFn() && "Personality function not found");
// __wasm_lpad_context global variable.
diff --git a/llvm/lib/CodeGen/WinEHPrepare.cpp b/llvm/lib/CodeGen/WinEHPrepare.cpp
index dfca2be0a114..11597b119893 100644
--- a/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -18,12 +18,11 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
@@ -31,6 +30,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -216,6 +216,127 @@ static void calculateStateNumbersForInvokes(const Function *Fn,
}
}
+// See comments below for calculateSEHStateForAsynchEH().
+// State - incoming State of normal paths
+struct WorkItem {
+ const BasicBlock *Block;
+ int State;
+ WorkItem(const BasicBlock *BB, int St) {
+ Block = BB;
+ State = St;
+ }
+};
+void llvm::calculateCXXStateForAsynchEH(const BasicBlock *BB, int State,
+ WinEHFuncInfo &EHInfo) {
+ SmallVector<struct WorkItem *, 8> WorkList;
+ struct WorkItem *WI = new WorkItem(BB, State);
+ WorkList.push_back(WI);
+
+ while (!WorkList.empty()) {
+ WI = WorkList.pop_back_val();
+ const BasicBlock *BB = WI->Block;
+ int State = WI->State;
+ delete WI;
+ if (EHInfo.BlockToStateMap.count(BB) && EHInfo.BlockToStateMap[BB] <= State)
+ continue; // skip blocks already visited by lower State
+
+ const llvm::Instruction *I = BB->getFirstNonPHI();
+ const llvm::Instruction *TI = BB->getTerminator();
+ if (I->isEHPad())
+ State = EHInfo.EHPadStateMap[I];
+ EHInfo.BlockToStateMap[BB] = State; // Record state, also flag visiting
+
+ if ((isa<CleanupReturnInst>(TI) || isa<CatchReturnInst>(TI)) && State > 0) {
+ // Retrive the new State
+ State = EHInfo.CxxUnwindMap[State].ToState; // Retrive next State
+ } else if (isa<InvokeInst>(TI)) {
+ auto *Call = cast<CallBase>(TI);
+ const Function *Fn = Call->getCalledFunction();
+ if (Fn && Fn->isIntrinsic() &&
+ (Fn->getIntrinsicID() == Intrinsic::seh_scope_begin ||
+ Fn->getIntrinsicID() == Intrinsic::seh_try_begin))
+ // Retrive the new State from seh_scope_begin
+ State = EHInfo.InvokeStateMap[cast<InvokeInst>(TI)];
+ else if (Fn && Fn->isIntrinsic() &&
+ (Fn->getIntrinsicID() == Intrinsic::seh_scope_end ||
+ Fn->getIntrinsicID() == Intrinsic::seh_try_end)) {
+ // In case of conditional ctor, let's retrieve State from Invoke
+ State = EHInfo.InvokeStateMap[cast<InvokeInst>(TI)];
+ // end of current state, retrive new state from UnwindMap
+ State = EHInfo.CxxUnwindMap[State].ToState;
+ }
+ }
+ // Continue push successors into worklist
+ for (auto *SuccBB : successors(BB)) {
+ WI = new WorkItem(SuccBB, State);
+ WorkList.push_back(WI);
+ }
+ }
+}
+
+// The central theory of this routine is based on the following:
+// A _try scope is always a SEME (Single Entry Multiple Exits) region
+// as jumping into a _try is not allowed
+// The single entry must start with a seh_try_begin() invoke with a
+// correct State number that is the initial state of the SEME.
+// Through control-flow, state number is propagated into all blocks.
+// Side exits marked by seh_try_end() will unwind to parent state via
+// existing SEHUnwindMap[].
+// Side exits can ONLY jump into parent scopes (lower state number).
+// Thus, when a block succeeds various states from its predecessors,
+// the lowest State trumphs others.
+// If some exits flow to unreachable, propagation on those paths terminate,
+// not affecting remaining blocks.
+void llvm::calculateSEHStateForAsynchEH(const BasicBlock *BB, int State,
+ WinEHFuncInfo &EHInfo) {
+ SmallVector<struct WorkItem *, 8> WorkList;
+ struct WorkItem *WI = new WorkItem(BB, State);
+ WorkList.push_back(WI);
+
+ while (!WorkList.empty()) {
+ WI = WorkList.pop_back_val();
+ const BasicBlock *BB = WI->Block;
+ int State = WI->State;
+ delete WI;
+ if (EHInfo.BlockToStateMap.count(BB) && EHInfo.BlockToStateMap[BB] <= State)
+ continue; // skip blocks already visited by lower State
+
+ const llvm::Instruction *I = BB->getFirstNonPHI();
+ const llvm::Instruction *TI = BB->getTerminator();
+ if (I->isEHPad())
+ State = EHInfo.EHPadStateMap[I];
+ EHInfo.BlockToStateMap[BB] = State; // Record state
+
+ if (isa<CatchPadInst>(I) && isa<CatchReturnInst>(TI)) {
+ const Constant *FilterOrNull = cast<Constant>(
+ cast<CatchPadInst>(I)->getArgOperand(0)->stripPointerCasts());
+ const Function *Filter = dyn_cast<Function>(FilterOrNull);
+ if (!Filter || !Filter->getName().startswith("__IsLocalUnwind"))
+ State = EHInfo.SEHUnwindMap[State].ToState; // Retrive next State
+ } else if ((isa<CleanupReturnInst>(TI) || isa<CatchReturnInst>(TI)) &&
+ State > 0) {
+ // Retrive the new State.
+ State = EHInfo.SEHUnwindMap[State].ToState; // Retrive next State
+ } else if (isa<InvokeInst>(TI)) {
+ auto *Call = cast<CallBase>(TI);
+ const Function *Fn = Call->getCalledFunction();
+ if (Fn && Fn->isIntrinsic() &&
+ Fn->getIntrinsicID() == Intrinsic::seh_try_begin)
+ // Retrive the new State from seh_try_begin
+ State = EHInfo.InvokeStateMap[cast<InvokeInst>(TI)];
+ else if (Fn && Fn->isIntrinsic() &&
+ Fn->getIntrinsicID() == Intrinsic::seh_try_end)
+ // end of current state, retrive new state from UnwindMap
+ State = EHInfo.SEHUnwindMap[State].ToState;
+ }
+ // Continue push successors into worklist
+ for (auto *SuccBB : successors(BB)) {
+ WI = new WorkItem(SuccBB, State);
+ WorkList.push_back(WI);
+ }
+ }
+}
+
// Given BB which ends in an unwind edge, return the EHPad that this BB belongs
// to. If the unwind edge came from an invoke, return null.
static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB,
@@ -276,6 +397,7 @@ static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo,
for (const auto *CatchPad : Handlers) {
FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow;
+ FuncInfo.EHPadStateMap[CatchPad] = CatchLow;
for (const User *U : CatchPad->users()) {
const auto *UserI = cast<Instruction>(U);
if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) {
@@ -384,6 +506,7 @@ static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo,
// Everything in the __try block uses TryState as its parent state.
FuncInfo.EHPadStateMap[CatchSwitch] = TryState;
+ FuncInfo.EHPadStateMap[CatchPad] = TryState;
LLVM_DEBUG(dbgs() << "Assigning state #" << TryState << " to BB "
<< CatchPadBB->getName() << '\n');
for (const BasicBlock *PredBlock : predecessors(BB))
@@ -464,6 +587,12 @@ void llvm::calculateSEHStateNumbers(const Function *Fn,
}
calculateStateNumbersForInvokes(Fn, FuncInfo);
+
+ bool IsEHa = Fn->getParent()->getModuleFlag("eh-asynch");
+ if (IsEHa) {
+ const BasicBlock *EntryBB = &(Fn->getEntryBlock());
+ calculateSEHStateForAsynchEH(EntryBB, -1, FuncInfo);
+ }
}
void llvm::calculateWinCXXEHStateNumbers(const Function *Fn,
@@ -482,6 +611,12 @@ void llvm::calculateWinCXXEHStateNumbers(const Function *Fn,
}
calculateStateNumbersForInvokes(Fn, FuncInfo);
+
+ bool IsEHa = Fn->getParent()->getModuleFlag("eh-asynch");
+ if (IsEHa) {
+ const BasicBlock *EntryBB = &(Fn->getEntryBlock());
+ calculateCXXStateForAsynchEH(EntryBB, -1, FuncInfo);
+ }
}
static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int HandlerParentState,
@@ -602,7 +737,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
// so visit pads in descendant-most to ancestor-most order.
for (ClrEHUnwindMapEntry &Entry : llvm::reverse(FuncInfo.ClrEHUnwindMap)) {
const Instruction *Pad =
- Entry.Handler.get<const BasicBlock *>()->getFirstNonPHI();
+ cast<const BasicBlock *>(Entry.Handler)->getFirstNonPHI();
// For most pads, the TryParentState is the state associated with the
// unwind dest of exceptional exits from it.
const BasicBlock *UnwindDest;
@@ -638,8 +773,8 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
int UserUnwindState =
FuncInfo.ClrEHUnwindMap[UserState].TryParentState;
if (UserUnwindState != -1)
- UserUnwindDest = FuncInfo.ClrEHUnwindMap[UserUnwindState]
- .Handler.get<const BasicBlock *>();
+ UserUnwindDest = cast<const BasicBlock *>(
+ FuncInfo.ClrEHUnwindMap[UserUnwindState].Handler);
}
// Not having an unwind dest for this user might indicate that it
@@ -1253,4 +1388,9 @@ void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II,
LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd);
}
+void WinEHFuncInfo::addIPToStateRange(int State, MCSymbol* InvokeBegin,
+ MCSymbol* InvokeEnd) {
+ LabelToStateMap[InvokeBegin] = std::make_pair(State, InvokeEnd);
+}
+
WinEHFuncInfo::WinEHFuncInfo() = default;
diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp b/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 13f45ae048bb..d40725838c94 100644
--- a/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -15,7 +15,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -29,6 +28,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
@@ -226,6 +226,7 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
case Triple::ArchType::thumb:
case Triple::ArchType::aarch64:
case Triple::ArchType::hexagon:
+ case Triple::ArchType::loongarch64:
case Triple::ArchType::mips:
case Triple::ArchType::mipsel:
case Triple::ArchType::mips64:
diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp
index 9f6e54377ede..e6eccb20114a 100644
--- a/llvm/lib/DWARFLinker/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp
@@ -10,8 +10,10 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/NonRelocatableStringpool.h"
#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h"
+#include "llvm/DWARFLinker/DWARFStreamer.h"
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
@@ -419,6 +421,99 @@ void DWARFLinker::cleanupAuxiliarryData(LinkContext &Context) {
DIEAlloc.Reset();
}
+static bool isTlsAddressCode(uint8_t DW_OP_Code) {
+ return DW_OP_Code == dwarf::DW_OP_form_tls_address ||
+ DW_OP_Code == dwarf::DW_OP_GNU_push_tls_address;
+}
+
+std::pair<bool, std::optional<int64_t>>
+DWARFLinker::getVariableRelocAdjustment(AddressesMap &RelocMgr,
+ const DWARFDie &DIE) {
+ assert((DIE.getTag() == dwarf::DW_TAG_variable ||
+ DIE.getTag() == dwarf::DW_TAG_constant) &&
+ "Wrong type of input die");
+
+ const auto *Abbrev = DIE.getAbbreviationDeclarationPtr();
+
+ // Check if DIE has DW_AT_location attribute.
+ DWARFUnit *U = DIE.getDwarfUnit();
+ std::optional<uint32_t> LocationIdx =
+ Abbrev->findAttributeIndex(dwarf::DW_AT_location);
+ if (!LocationIdx)
+ return std::make_pair(false, std::nullopt);
+
+ // Get offset to the DW_AT_location attribute.
+ uint64_t AttrOffset =
+ Abbrev->getAttributeOffsetFromIndex(*LocationIdx, DIE.getOffset(), *U);
+
+ // Get value of the DW_AT_location attribute.
+ std::optional<DWARFFormValue> LocationValue =
+ Abbrev->getAttributeValueFromOffset(*LocationIdx, AttrOffset, *U);
+ if (!LocationValue)
+ return std::make_pair(false, std::nullopt);
+
+ // Check that DW_AT_location attribute is of 'exprloc' class.
+ // Handling value of location expressions for attributes of 'loclist'
+ // class is not implemented yet.
+ std::optional<ArrayRef<uint8_t>> Expr = LocationValue->getAsBlock();
+ if (!Expr)
+ return std::make_pair(false, std::nullopt);
+
+ // Parse 'exprloc' expression.
+ DataExtractor Data(toStringRef(*Expr), U->getContext().isLittleEndian(),
+ U->getAddressByteSize());
+ DWARFExpression Expression(Data, U->getAddressByteSize(),
+ U->getFormParams().Format);
+
+ bool HasLocationAddress = false;
+ uint64_t CurExprOffset = 0;
+ for (DWARFExpression::iterator It = Expression.begin();
+ It != Expression.end(); ++It) {
+ DWARFExpression::iterator NextIt = It;
+ ++NextIt;
+
+ const DWARFExpression::Operation &Op = *It;
+ switch (Op.getCode()) {
+ case dwarf::DW_OP_const4u:
+ case dwarf::DW_OP_const8u:
+ case dwarf::DW_OP_const4s:
+ case dwarf::DW_OP_const8s:
+ if (NextIt == Expression.end() || !isTlsAddressCode(NextIt->getCode()))
+ break;
+ [[fallthrough]];
+ case dwarf::DW_OP_addr: {
+ HasLocationAddress = true;
+ // Check relocation for the address.
+ if (std::optional<int64_t> RelocAdjustment =
+ RelocMgr.getExprOpAddressRelocAdjustment(
+ *U, Op, AttrOffset + CurExprOffset,
+ AttrOffset + Op.getEndOffset()))
+ return std::make_pair(HasLocationAddress, *RelocAdjustment);
+ } break;
+ case dwarf::DW_OP_constx:
+ case dwarf::DW_OP_addrx: {
+ HasLocationAddress = true;
+ if (std::optional<uint64_t> AddressOffset =
+ DIE.getDwarfUnit()->getIndexedAddressOffset(
+ Op.getRawOperand(0))) {
+ // Check relocation for the address.
+ if (std::optional<int64_t> RelocAdjustment =
+ RelocMgr.getExprOpAddressRelocAdjustment(
+ *U, Op, *AddressOffset,
+ *AddressOffset + DIE.getDwarfUnit()->getAddressByteSize()))
+ return std::make_pair(HasLocationAddress, *RelocAdjustment);
+ }
+ } break;
+ default: {
+ // Nothing to do.
+ } break;
+ }
+ CurExprOffset = Op.getEndOffset();
+ }
+
+ return std::make_pair(HasLocationAddress, std::nullopt);
+}
+
/// Check if a variable describing DIE should be kept.
/// \returns updated TraversalFlags.
unsigned DWARFLinker::shouldKeepVariableDIE(AddressesMap &RelocMgr,
@@ -439,9 +534,20 @@ unsigned DWARFLinker::shouldKeepVariableDIE(AddressesMap &RelocMgr,
// if the variable has a valid relocation, so that the DIEInfo is filled.
// However, we don't want a static variable in a function to force us to keep
// the enclosing function, unless requested explicitly.
- const bool HasLiveMemoryLocation = RelocMgr.isLiveVariable(DIE, MyInfo);
- if (!HasLiveMemoryLocation || ((Flags & TF_InFunctionScope) &&
- !LLVM_UNLIKELY(Options.KeepFunctionForStatic)))
+ std::pair<bool, std::optional<int64_t>> LocExprAddrAndRelocAdjustment =
+ getVariableRelocAdjustment(RelocMgr, DIE);
+
+ if (LocExprAddrAndRelocAdjustment.first)
+ MyInfo.HasLocationExpressionAddr = true;
+
+ if (!LocExprAddrAndRelocAdjustment.second)
+ return Flags;
+
+ MyInfo.AddrAdjust = *LocExprAddrAndRelocAdjustment.second;
+ MyInfo.InDebugMap = true;
+
+ if (((Flags & TF_InFunctionScope) &&
+ !LLVM_UNLIKELY(Options.KeepFunctionForStatic)))
return Flags;
if (Options.Verbose) {
@@ -458,9 +564,8 @@ unsigned DWARFLinker::shouldKeepVariableDIE(AddressesMap &RelocMgr,
/// Check if a function describing DIE should be kept.
/// \returns updated TraversalFlags.
unsigned DWARFLinker::shouldKeepSubprogramDIE(
- AddressesMap &RelocMgr, RangesTy &Ranges, const DWARFDie &DIE,
- const DWARFFile &File, CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo,
- unsigned Flags) {
+ AddressesMap &RelocMgr, const DWARFDie &DIE, const DWARFFile &File,
+ CompileUnit &Unit, CompileUnit::DIEInfo &MyInfo, unsigned Flags) {
Flags |= TF_InFunctionScope;
auto LowPc = dwarf::toAddress(DIE.find(dwarf::DW_AT_low_pc));
@@ -468,9 +573,14 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE(
return Flags;
assert(LowPc && "low_pc attribute is not an address.");
- if (!RelocMgr.isLiveSubprogram(DIE, MyInfo))
+ std::optional<int64_t> RelocAdjustment =
+ RelocMgr.getSubprogramRelocAdjustment(DIE);
+ if (!RelocAdjustment)
return Flags;
+ MyInfo.AddrAdjust = *RelocAdjustment;
+ MyInfo.InDebugMap = true;
+
if (Options.Verbose) {
outs() << "Keeping subprogram DIE:";
DIDumpOptions DumpOpts;
@@ -510,16 +620,14 @@ unsigned DWARFLinker::shouldKeepSubprogramDIE(
}
// Replace the debug map range with a more accurate one.
- Ranges.insert({*LowPc, *HighPc}, MyInfo.AddrAdjust);
Unit.addFunctionRange(*LowPc, *HighPc, MyInfo.AddrAdjust);
return Flags;
}
/// Check if a DIE should be kept.
/// \returns updated TraversalFlags.
-unsigned DWARFLinker::shouldKeepDIE(AddressesMap &RelocMgr, RangesTy &Ranges,
- const DWARFDie &DIE, const DWARFFile &File,
- CompileUnit &Unit,
+unsigned DWARFLinker::shouldKeepDIE(AddressesMap &RelocMgr, const DWARFDie &DIE,
+ const DWARFFile &File, CompileUnit &Unit,
CompileUnit::DIEInfo &MyInfo,
unsigned Flags) {
switch (DIE.getTag()) {
@@ -528,8 +636,7 @@ unsigned DWARFLinker::shouldKeepDIE(AddressesMap &RelocMgr, RangesTy &Ranges,
return shouldKeepVariableDIE(RelocMgr, DIE, MyInfo, Flags);
case dwarf::DW_TAG_subprogram:
case dwarf::DW_TAG_label:
- return shouldKeepSubprogramDIE(RelocMgr, Ranges, DIE, File, Unit, MyInfo,
- Flags);
+ return shouldKeepSubprogramDIE(RelocMgr, DIE, File, Unit, MyInfo, Flags);
case dwarf::DW_TAG_base_type:
// DWARF Expressions may reference basic types, but scanning them
// is expensive. Basic types are tiny, so just keep all of them.
@@ -749,7 +856,7 @@ void DWARFLinker::lookForParentDIEsToKeep(
///
/// The return value indicates whether the DIE is incomplete.
void DWARFLinker::lookForDIEsToKeep(AddressesMap &AddressesMap,
- RangesTy &Ranges, const UnitListTy &Units,
+ const UnitListTy &Units,
const DWARFDie &Die, const DWARFFile &File,
CompileUnit &Cu, unsigned Flags) {
// LIFO work list.
@@ -803,11 +910,9 @@ void DWARFLinker::lookForDIEsToKeep(AddressesMap &AddressesMap,
if ((Current.Flags & TF_DependencyWalk) && AlreadyKept)
continue;
- // We must not call shouldKeepDIE while called from keepDIEAndDependencies,
- // because it would screw up the relocation finding logic.
if (!(Current.Flags & TF_DependencyWalk))
- Current.Flags = shouldKeepDIE(AddressesMap, Ranges, Current.Die, File,
- Current.CU, MyInfo, Current.Flags);
+ Current.Flags = shouldKeepDIE(AddressesMap, Current.Die, File, Current.CU,
+ MyInfo, Current.Flags);
// We need to mark context for the canonical die in the end of normal
// traversing(not TF_DependencyWalk) or after normal traversing if die
@@ -928,7 +1033,7 @@ void DWARFLinker::assignAbbrev(DIEAbbrev &Abbrev) {
Abbreviations.push_back(
std::make_unique<DIEAbbrev>(Abbrev.getTag(), Abbrev.hasChildren()));
for (const auto &Attr : Abbrev.getData())
- Abbreviations.back()->AddAttribute(Attr.getAttribute(), Attr.getForm());
+ Abbreviations.back()->AddAttribute(Attr);
AbbreviationsSet.InsertNode(Abbreviations.back().get(), InsertToken);
// Assign the unique abbreviation number.
Abbrev.setNumber(Abbreviations.size());
@@ -936,24 +1041,33 @@ void DWARFLinker::assignAbbrev(DIEAbbrev &Abbrev) {
}
}
-unsigned DWARFLinker::DIECloner::cloneStringAttribute(
- DIE &Die, AttributeSpec AttrSpec, const DWARFFormValue &Val,
- const DWARFUnit &, OffsetsStringPool &StringPool, AttributesInfo &Info) {
+unsigned DWARFLinker::DIECloner::cloneStringAttribute(DIE &Die,
+ AttributeSpec AttrSpec,
+ const DWARFFormValue &Val,
+ const DWARFUnit &,
+ AttributesInfo &Info) {
std::optional<const char *> String = dwarf::toString(Val);
if (!String)
return 0;
- // Switch everything to out of line strings.
- auto StringEntry = StringPool.getEntry(*String);
+ DwarfStringPoolEntryRef StringEntry;
+ if (AttrSpec.Form == dwarf::DW_FORM_line_strp) {
+ StringEntry = DebugLineStrPool.getEntry(*String);
+ } else {
+ StringEntry = DebugStrPool.getEntry(*String);
+
+ // Update attributes info.
+ if (AttrSpec.Attr == dwarf::DW_AT_name)
+ Info.Name = StringEntry;
+ else if (AttrSpec.Attr == dwarf::DW_AT_MIPS_linkage_name ||
+ AttrSpec.Attr == dwarf::DW_AT_linkage_name)
+ Info.MangledName = StringEntry;
- // Update attributes info.
- if (AttrSpec.Attr == dwarf::DW_AT_name)
- Info.Name = StringEntry;
- else if (AttrSpec.Attr == dwarf::DW_AT_MIPS_linkage_name ||
- AttrSpec.Attr == dwarf::DW_AT_linkage_name)
- Info.MangledName = StringEntry;
+ // Switch everything to out of line strings.
+ AttrSpec.Form = dwarf::DW_FORM_strp;
+ }
- Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr), dwarf::DW_FORM_strp,
+ Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr), AttrSpec.Form,
DIEInteger(StringEntry.getOffset()));
return 4;
@@ -1032,31 +1146,35 @@ unsigned DWARFLinker::DIECloner::cloneDieReferenceAttribute(
void DWARFLinker::DIECloner::cloneExpression(
DataExtractor &Data, DWARFExpression Expression, const DWARFFile &File,
- CompileUnit &Unit, SmallVectorImpl<uint8_t> &OutputBuffer) {
+ CompileUnit &Unit, SmallVectorImpl<uint8_t> &OutputBuffer,
+ int64_t AddrRelocAdjustment, bool IsLittleEndian) {
using Encoding = DWARFExpression::Operation::Encoding;
+ uint8_t OrigAddressByteSize = Unit.getOrigUnit().getAddressByteSize();
+
uint64_t OpOffset = 0;
for (auto &Op : Expression) {
- auto Description = Op.getDescription();
+ auto Desc = Op.getDescription();
// DW_OP_const_type is variable-length and has 3
- // operands. DWARFExpression thus far only supports 2.
- auto Op0 = Description.Op[0];
- auto Op1 = Description.Op[1];
- if ((Op0 == Encoding::BaseTypeRef && Op1 != Encoding::SizeNA) ||
- (Op1 == Encoding::BaseTypeRef && Op0 != Encoding::Size1))
+ // operands. Thus far we only support 2.
+ if ((Desc.Op.size() == 2 && Desc.Op[0] == Encoding::BaseTypeRef) ||
+ (Desc.Op.size() == 2 && Desc.Op[1] == Encoding::BaseTypeRef &&
+ Desc.Op[0] != Encoding::Size1))
Linker.reportWarning("Unsupported DW_OP encoding.", File);
- if ((Op0 == Encoding::BaseTypeRef && Op1 == Encoding::SizeNA) ||
- (Op1 == Encoding::BaseTypeRef && Op0 == Encoding::Size1)) {
+ if ((Desc.Op.size() == 1 && Desc.Op[0] == Encoding::BaseTypeRef) ||
+ (Desc.Op.size() == 2 && Desc.Op[1] == Encoding::BaseTypeRef &&
+ Desc.Op[0] == Encoding::Size1)) {
// This code assumes that the other non-typeref operand fits into 1 byte.
assert(OpOffset < Op.getEndOffset());
uint32_t ULEBsize = Op.getEndOffset() - OpOffset - 1;
assert(ULEBsize <= 16);
// Copy over the operation.
+ assert(!Op.getSubCode() && "SubOps not yet supported");
OutputBuffer.push_back(Op.getCode());
uint64_t RefOffset;
- if (Op1 == Encoding::SizeNA) {
+ if (Desc.Op.size() == 1) {
RefOffset = Op.getRawOperand(0);
} else {
OutputBuffer.push_back(Op.getRawOperand(0));
@@ -1086,6 +1204,59 @@ void DWARFLinker::DIECloner::cloneExpression(
assert(RealSize == ULEBsize && "padding failed");
ArrayRef<uint8_t> ULEBbytes(ULEB, ULEBsize);
OutputBuffer.append(ULEBbytes.begin(), ULEBbytes.end());
+ } else if (!Linker.Options.Update && Op.getCode() == dwarf::DW_OP_addrx) {
+ if (std::optional<object::SectionedAddress> SA =
+ Unit.getOrigUnit().getAddrOffsetSectionItem(
+ Op.getRawOperand(0))) {
+ // DWARFLinker does not use addrx forms since it generates relocated
+ // addresses. Replace DW_OP_addrx with DW_OP_addr here.
+ // Argument of DW_OP_addrx should be relocated here as it is not
+ // processed by applyValidRelocs.
+ OutputBuffer.push_back(dwarf::DW_OP_addr);
+ uint64_t LinkedAddress = SA->Address + AddrRelocAdjustment;
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ sys::swapByteOrder(LinkedAddress);
+ ArrayRef<uint8_t> AddressBytes(
+ reinterpret_cast<const uint8_t *>(&LinkedAddress),
+ OrigAddressByteSize);
+ OutputBuffer.append(AddressBytes.begin(), AddressBytes.end());
+ } else
+ Linker.reportWarning("cannot read DW_OP_addrx operand.", File);
+ } else if (!Linker.Options.Update && Op.getCode() == dwarf::DW_OP_constx) {
+ if (std::optional<object::SectionedAddress> SA =
+ Unit.getOrigUnit().getAddrOffsetSectionItem(
+ Op.getRawOperand(0))) {
+ // DWARFLinker does not use constx forms since it generates relocated
+ // addresses. Replace DW_OP_constx with DW_OP_const[*]u here.
+ // Argument of DW_OP_constx should be relocated here as it is not
+ // processed by applyValidRelocs.
+ std::optional<uint8_t> OutOperandKind;
+ switch (OrigAddressByteSize) {
+ case 4:
+ OutOperandKind = dwarf::DW_OP_const4u;
+ break;
+ case 8:
+ OutOperandKind = dwarf::DW_OP_const8u;
+ break;
+ default:
+ Linker.reportWarning(
+ formatv(("unsupported address size: {0}."), OrigAddressByteSize),
+ File);
+ break;
+ }
+
+ if (OutOperandKind) {
+ OutputBuffer.push_back(*OutOperandKind);
+ uint64_t LinkedAddress = SA->Address + AddrRelocAdjustment;
+ if (IsLittleEndian != sys::IsLittleEndianHost)
+ sys::swapByteOrder(LinkedAddress);
+ ArrayRef<uint8_t> AddressBytes(
+ reinterpret_cast<const uint8_t *>(&LinkedAddress),
+ OrigAddressByteSize);
+ OutputBuffer.append(AddressBytes.begin(), AddressBytes.end());
+ }
+ } else
+ Linker.reportWarning("cannot read DW_OP_constx operand.", File);
} else {
// Copy over everything else unmodified.
StringRef Bytes = Data.getData().slice(OpOffset, Op.getEndOffset());
@@ -1096,8 +1267,9 @@ void DWARFLinker::DIECloner::cloneExpression(
}
unsigned DWARFLinker::DIECloner::cloneBlockAttribute(
- DIE &Die, const DWARFFile &File, CompileUnit &Unit, AttributeSpec AttrSpec,
- const DWARFFormValue &Val, unsigned AttrSize, bool IsLittleEndian) {
+ DIE &Die, const DWARFDie &InputDIE, const DWARFFile &File,
+ CompileUnit &Unit, AttributeSpec AttrSpec, const DWARFFormValue &Val,
+ bool IsLittleEndian) {
DIEValueList *Attr;
DIEValue Value;
DIELoc *Loc = nullptr;
@@ -1112,13 +1284,7 @@ unsigned DWARFLinker::DIECloner::cloneBlockAttribute(
Attr = Loc ? static_cast<DIEValueList *>(Loc)
: static_cast<DIEValueList *>(Block);
- if (Loc)
- Value = DIEValue(dwarf::Attribute(AttrSpec.Attr),
- dwarf::Form(AttrSpec.Form), Loc);
- else
- Value = DIEValue(dwarf::Attribute(AttrSpec.Attr),
- dwarf::Form(AttrSpec.Form), Block);
-
+ DWARFUnit &OrigUnit = Unit.getOrigUnit();
// If the block is a DWARF Expression, clone it into the temporary
// buffer using cloneExpression(), otherwise copy the data directly.
SmallVector<uint8_t, 32> Buffer;
@@ -1126,12 +1292,12 @@ unsigned DWARFLinker::DIECloner::cloneBlockAttribute(
if (DWARFAttribute::mayHaveLocationExpr(AttrSpec.Attr) &&
(Val.isFormClass(DWARFFormValue::FC_Block) ||
Val.isFormClass(DWARFFormValue::FC_Exprloc))) {
- DWARFUnit &OrigUnit = Unit.getOrigUnit();
DataExtractor Data(StringRef((const char *)Bytes.data(), Bytes.size()),
IsLittleEndian, OrigUnit.getAddressByteSize());
DWARFExpression Expr(Data, OrigUnit.getAddressByteSize(),
OrigUnit.getFormParams().Format);
- cloneExpression(Data, Expr, File, Unit, Buffer);
+ cloneExpression(Data, Expr, File, Unit, Buffer,
+ Unit.getInfo(InputDIE).AddrAdjust, IsLittleEndian);
Bytes = Buffer;
}
for (auto Byte : Bytes)
@@ -1146,89 +1312,89 @@ unsigned DWARFLinker::DIECloner::cloneBlockAttribute(
else
Block->setSize(Bytes.size());
- Die.addValue(DIEAlloc, Value);
- return AttrSize;
+ if (Loc)
+ Value = DIEValue(dwarf::Attribute(AttrSpec.Attr),
+ dwarf::Form(AttrSpec.Form), Loc);
+ else {
+ // The expression location data might be updated and exceed the original
+ // size. Check whether the new data fits into the original form.
+ if ((AttrSpec.Form == dwarf::DW_FORM_block1 &&
+ (Bytes.size() > UINT8_MAX)) ||
+ (AttrSpec.Form == dwarf::DW_FORM_block2 &&
+ (Bytes.size() > UINT16_MAX)) ||
+ (AttrSpec.Form == dwarf::DW_FORM_block4 && (Bytes.size() > UINT32_MAX)))
+ AttrSpec.Form = dwarf::DW_FORM_block;
+
+ Value = DIEValue(dwarf::Attribute(AttrSpec.Attr),
+ dwarf::Form(AttrSpec.Form), Block);
+ }
+
+ return Die.addValue(DIEAlloc, Value)->sizeOf(OrigUnit.getFormParams());
}
unsigned DWARFLinker::DIECloner::cloneAddressAttribute(
- DIE &Die, AttributeSpec AttrSpec, unsigned AttrSize,
- const DWARFFormValue &Val, const CompileUnit &Unit, AttributesInfo &Info) {
+ DIE &Die, const DWARFDie &InputDIE, AttributeSpec AttrSpec,
+ unsigned AttrSize, const DWARFFormValue &Val, const CompileUnit &Unit,
+ AttributesInfo &Info) {
+ if (AttrSpec.Attr == dwarf::DW_AT_low_pc)
+ Info.HasLowPc = true;
+
if (LLVM_UNLIKELY(Linker.Options.Update)) {
- if (AttrSpec.Attr == dwarf::DW_AT_low_pc)
- Info.HasLowPc = true;
Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
dwarf::Form(AttrSpec.Form), DIEInteger(Val.getRawUValue()));
return AttrSize;
}
- dwarf::Form Form = AttrSpec.Form;
- uint64_t Addr = 0;
- if (Form == dwarf::DW_FORM_addrx) {
- if (std::optional<uint64_t> AddrOffsetSectionBase =
- Unit.getOrigUnit().getAddrOffsetSectionBase()) {
- uint64_t StartOffset =
- *AddrOffsetSectionBase +
- Val.getRawUValue() * Unit.getOrigUnit().getAddressByteSize();
- uint64_t EndOffset =
- StartOffset + Unit.getOrigUnit().getAddressByteSize();
- if (llvm::Expected<uint64_t> RelocAddr =
- ObjFile.Addresses->relocateIndexedAddr(StartOffset, EndOffset))
- Addr = *RelocAddr;
- else
- Linker.reportWarning(toString(RelocAddr.takeError()), ObjFile);
- } else
- Linker.reportWarning("no base offset for address table", ObjFile);
+ // Cloned Die may have address attributes relocated to a
+ // totally unrelated value. This can happen:
+ // - If high_pc is an address (Dwarf version == 2), then it might have been
+ // relocated to a totally unrelated value (because the end address in the
+ // object file might be start address of another function which got moved
+ // independently by the linker).
+ // - If address relocated in an inline_subprogram that happens at the
+ // beginning of its inlining function.
+ // To avoid above cases and to not apply relocation twice (in applyValidRelocs
+ // and here), read address attribute from InputDIE and apply Info.PCOffset
+ // here.
+
+ std::optional<DWARFFormValue> AddrAttribute = InputDIE.find(AttrSpec.Attr);
+ if (!AddrAttribute)
+ llvm_unreachable("Cann't find attribute.");
+
+ std::optional<uint64_t> Addr = AddrAttribute->getAsAddress();
+ if (!Addr) {
+ Linker.reportWarning("Cann't read address attribute value.", ObjFile);
+ return 0;
+ }
- // Generation of DWARFv5 .debug_addr table is not supported yet.
- // Convert attribute into the dwarf::DW_FORM_addr.
- Form = dwarf::DW_FORM_addr;
- } else
- Addr = *Val.getAsAddress();
-
- if (AttrSpec.Attr == dwarf::DW_AT_low_pc) {
- if (Die.getTag() == dwarf::DW_TAG_inlined_subroutine ||
- Die.getTag() == dwarf::DW_TAG_lexical_block ||
- Die.getTag() == dwarf::DW_TAG_label) {
- // The low_pc of a block or inline subroutine might get
- // relocated because it happens to match the low_pc of the
- // enclosing subprogram. To prevent issues with that, always use
- // the low_pc from the input DIE if relocations have been applied.
- Addr = (Info.OrigLowPc != std::numeric_limits<uint64_t>::max()
- ? Info.OrigLowPc
- : Addr) +
- Info.PCOffset;
- } else if (Die.getTag() == dwarf::DW_TAG_compile_unit) {
- if (std::optional<uint64_t> LowPC = Unit.getLowPc())
- Addr = *LowPC;
- else
- return 0;
- }
- Info.HasLowPc = true;
- } else if (AttrSpec.Attr == dwarf::DW_AT_high_pc) {
- if (Die.getTag() == dwarf::DW_TAG_compile_unit) {
- if (uint64_t HighPc = Unit.getHighPc())
- Addr = HighPc;
- else
- return 0;
- } else
- // If we have a high_pc recorded for the input DIE, use
- // it. Otherwise (when no relocations where applied) just use the
- // one we just decoded.
- Addr = (Info.OrigHighPc ? Info.OrigHighPc : Addr) + Info.PCOffset;
- } else if (AttrSpec.Attr == dwarf::DW_AT_call_return_pc) {
- // Relocate a return PC address within a call site entry.
- if (Die.getTag() == dwarf::DW_TAG_call_site)
- Addr = (Info.OrigCallReturnPc ? Info.OrigCallReturnPc : Addr) +
- Info.PCOffset;
- } else if (AttrSpec.Attr == dwarf::DW_AT_call_pc) {
- // Relocate the address of a branch instruction within a call site entry.
- if (Die.getTag() == dwarf::DW_TAG_call_site)
- Addr = (Info.OrigCallPc ? Info.OrigCallPc : Addr) + Info.PCOffset;
- }
-
- Die.addValue(DIEAlloc, static_cast<dwarf::Attribute>(AttrSpec.Attr),
- static_cast<dwarf::Form>(Form), DIEInteger(Addr));
- return Unit.getOrigUnit().getAddressByteSize();
+ if (InputDIE.getTag() == dwarf::DW_TAG_compile_unit &&
+ AttrSpec.Attr == dwarf::DW_AT_low_pc) {
+ if (std::optional<uint64_t> LowPC = Unit.getLowPc())
+ Addr = *LowPC;
+ else
+ return 0;
+ } else if (InputDIE.getTag() == dwarf::DW_TAG_compile_unit &&
+ AttrSpec.Attr == dwarf::DW_AT_high_pc) {
+ if (uint64_t HighPc = Unit.getHighPc())
+ Addr = HighPc;
+ else
+ return 0;
+ } else {
+ *Addr += Info.PCOffset;
+ }
+
+ if (AttrSpec.Form == dwarf::DW_FORM_addr) {
+ Die.addValue(DIEAlloc, static_cast<dwarf::Attribute>(AttrSpec.Attr),
+ AttrSpec.Form, DIEInteger(*Addr));
+ return Unit.getOrigUnit().getAddressByteSize();
+ }
+
+ auto AddrIndex = AddrPool.getAddrIndex(*Addr);
+
+ return Die
+ .addValue(DIEAlloc, static_cast<dwarf::Attribute>(AttrSpec.Attr),
+ dwarf::Form::DW_FORM_addrx, DIEInteger(AddrIndex))
+ ->sizeOf(Unit.getOrigUnit().getFormParams());
}
unsigned DWARFLinker::DIECloner::cloneScalarAttribute(
@@ -1270,13 +1436,61 @@ unsigned DWARFLinker::DIECloner::cloneScalarAttribute(
}
if (AttrSpec.Attr == dwarf::DW_AT_declaration && Value)
Info.IsDeclaration = true;
- Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
- dwarf::Form(AttrSpec.Form), DIEInteger(Value));
+
+ if (AttrSpec.Form == dwarf::DW_FORM_loclistx)
+ Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
+ dwarf::Form(AttrSpec.Form), DIELocList(Value));
+ else
+ Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
+ dwarf::Form(AttrSpec.Form), DIEInteger(Value));
return AttrSize;
}
- if (AttrSpec.Attr == dwarf::DW_AT_high_pc &&
- Die.getTag() == dwarf::DW_TAG_compile_unit) {
+ [[maybe_unused]] dwarf::Form OriginalForm = AttrSpec.Form;
+ if (AttrSpec.Form == dwarf::DW_FORM_rnglistx) {
+ // DWARFLinker does not generate .debug_addr table. Thus we need to change
+ // all "addrx" related forms to "addr" version. Change DW_FORM_rnglistx
+ // to DW_FORM_sec_offset here.
+ std::optional<uint64_t> Index = Val.getAsSectionOffset();
+ if (!Index) {
+ Linker.reportWarning("Cannot read the attribute. Dropping.", File,
+ &InputDIE);
+ return 0;
+ }
+ std::optional<uint64_t> Offset =
+ Unit.getOrigUnit().getRnglistOffset(*Index);
+ if (!Offset) {
+ Linker.reportWarning("Cannot read the attribute. Dropping.", File,
+ &InputDIE);
+ return 0;
+ }
+
+ Value = *Offset;
+ AttrSpec.Form = dwarf::DW_FORM_sec_offset;
+ AttrSize = Unit.getOrigUnit().getFormParams().getDwarfOffsetByteSize();
+ } else if (AttrSpec.Form == dwarf::DW_FORM_loclistx) {
+ // DWARFLinker does not generate .debug_addr table. Thus we need to change
+ // all "addrx" related forms to "addr" version. Change DW_FORM_loclistx
+ // to DW_FORM_sec_offset here.
+ std::optional<uint64_t> Index = Val.getAsSectionOffset();
+ if (!Index) {
+ Linker.reportWarning("Cannot read the attribute. Dropping.", File,
+ &InputDIE);
+ return 0;
+ }
+ std::optional<uint64_t> Offset =
+ Unit.getOrigUnit().getLoclistOffset(*Index);
+ if (!Offset) {
+ Linker.reportWarning("Cannot read the attribute. Dropping.", File,
+ &InputDIE);
+ return 0;
+ }
+
+ Value = *Offset;
+ AttrSpec.Form = dwarf::DW_FORM_sec_offset;
+ AttrSize = Unit.getOrigUnit().getFormParams().getDwarfOffsetByteSize();
+ } else if (AttrSpec.Attr == dwarf::DW_AT_high_pc &&
+ Die.getTag() == dwarf::DW_TAG_compile_unit) {
std::optional<uint64_t> LowPC = Unit.getLowPc();
if (!LowPC)
return 0;
@@ -1294,24 +1508,30 @@ unsigned DWARFLinker::DIECloner::cloneScalarAttribute(
&InputDIE);
return 0;
}
- PatchLocation Patch =
+
+ DIE::value_iterator Patch =
Die.addValue(DIEAlloc, dwarf::Attribute(AttrSpec.Attr),
dwarf::Form(AttrSpec.Form), DIEInteger(Value));
- if (AttrSpec.Attr == dwarf::DW_AT_ranges) {
+ if (AttrSpec.Attr == dwarf::DW_AT_ranges ||
+ AttrSpec.Attr == dwarf::DW_AT_start_scope) {
Unit.noteRangeAttribute(Die, Patch);
Info.HasRanges = true;
- }
-
- // A more generic way to check for location attributes would be
- // nice, but it's very unlikely that any other attribute needs a
- // location list.
- // FIXME: use DWARFAttribute::mayHaveLocationDescription().
- else if (AttrSpec.Attr == dwarf::DW_AT_location ||
- AttrSpec.Attr == dwarf::DW_AT_frame_base) {
- Unit.noteLocationAttribute(Patch, Info.PCOffset);
+ } else if (DWARFAttribute::mayHaveLocationList(AttrSpec.Attr) &&
+ dwarf::doesFormBelongToClass(AttrSpec.Form,
+ DWARFFormValue::FC_SectionOffset,
+ Unit.getOrigUnit().getVersion())) {
+
+ CompileUnit::DIEInfo &LocationDieInfo = Unit.getInfo(InputDIE);
+ Unit.noteLocationAttribute({Patch, LocationDieInfo.InDebugMap
+ ? LocationDieInfo.AddrAdjust
+ : Info.PCOffset});
} else if (AttrSpec.Attr == dwarf::DW_AT_declaration && Value)
Info.IsDeclaration = true;
+ // check that all dwarf::DW_FORM_rnglistx are handled previously.
+ assert((Info.HasRanges || (OriginalForm != dwarf::DW_FORM_rnglistx)) &&
+ "Unhandled DW_FORM_rnglistx attribute");
+
return AttrSize;
}
@@ -1320,20 +1540,20 @@ unsigned DWARFLinker::DIECloner::cloneScalarAttribute(
/// \returns the size of the cloned attribute.
unsigned DWARFLinker::DIECloner::cloneAttribute(
DIE &Die, const DWARFDie &InputDIE, const DWARFFile &File,
- CompileUnit &Unit, OffsetsStringPool &StringPool, const DWARFFormValue &Val,
- const AttributeSpec AttrSpec, unsigned AttrSize, AttributesInfo &Info,
- bool IsLittleEndian) {
+ CompileUnit &Unit, const DWARFFormValue &Val, const AttributeSpec AttrSpec,
+ unsigned AttrSize, AttributesInfo &Info, bool IsLittleEndian) {
const DWARFUnit &U = Unit.getOrigUnit();
switch (AttrSpec.Form) {
case dwarf::DW_FORM_strp:
+ case dwarf::DW_FORM_line_strp:
case dwarf::DW_FORM_string:
case dwarf::DW_FORM_strx:
case dwarf::DW_FORM_strx1:
case dwarf::DW_FORM_strx2:
case dwarf::DW_FORM_strx3:
case dwarf::DW_FORM_strx4:
- return cloneStringAttribute(Die, AttrSpec, Val, U, StringPool, Info);
+ return cloneStringAttribute(Die, AttrSpec, Val, U, Info);
case dwarf::DW_FORM_ref_addr:
case dwarf::DW_FORM_ref1:
case dwarf::DW_FORM_ref2:
@@ -1346,11 +1566,16 @@ unsigned DWARFLinker::DIECloner::cloneAttribute(
case dwarf::DW_FORM_block2:
case dwarf::DW_FORM_block4:
case dwarf::DW_FORM_exprloc:
- return cloneBlockAttribute(Die, File, Unit, AttrSpec, Val, AttrSize,
+ return cloneBlockAttribute(Die, InputDIE, File, Unit, AttrSpec, Val,
IsLittleEndian);
case dwarf::DW_FORM_addr:
case dwarf::DW_FORM_addrx:
- return cloneAddressAttribute(Die, AttrSpec, AttrSize, Val, Unit, Info);
+ case dwarf::DW_FORM_addrx1:
+ case dwarf::DW_FORM_addrx2:
+ case dwarf::DW_FORM_addrx3:
+ case dwarf::DW_FORM_addrx4:
+ return cloneAddressAttribute(Die, InputDIE, AttrSpec, AttrSize, Val, Unit,
+ Info);
case dwarf::DW_FORM_data1:
case dwarf::DW_FORM_data2:
case dwarf::DW_FORM_data4:
@@ -1360,6 +1585,9 @@ unsigned DWARFLinker::DIECloner::cloneAttribute(
case dwarf::DW_FORM_sec_offset:
case dwarf::DW_FORM_flag:
case dwarf::DW_FORM_flag_present:
+ case dwarf::DW_FORM_rnglistx:
+ case dwarf::DW_FORM_loclistx:
+ case dwarf::DW_FORM_implicit_const:
return cloneScalarAttribute(Die, InputDIE, File, Unit, AttrSpec, Val,
AttrSize, Info);
default:
@@ -1419,9 +1647,10 @@ void DWARFLinker::DIECloner::addObjCAccelerator(CompileUnit &Unit,
}
}
-static bool shouldSkipAttribute(
- bool Update, DWARFAbbreviationDeclaration::AttributeSpec AttrSpec,
- uint16_t Tag, bool InDebugMap, bool SkipPC, bool InFunctionScope) {
+static bool
+shouldSkipAttribute(bool Update,
+ DWARFAbbreviationDeclaration::AttributeSpec AttrSpec,
+ bool SkipPC) {
switch (AttrSpec.Attr) {
default:
return false;
@@ -1429,26 +1658,29 @@ static bool shouldSkipAttribute(
case dwarf::DW_AT_high_pc:
case dwarf::DW_AT_ranges:
return !Update && SkipPC;
+ case dwarf::DW_AT_rnglists_base:
+ // In case !Update the .debug_addr table is not generated/preserved.
+ // Thus instead of DW_FORM_rnglistx the DW_FORM_sec_offset is used.
+ // Since DW_AT_rnglists_base is used for only DW_FORM_rnglistx the
+ // DW_AT_rnglists_base is removed.
+ return !Update;
case dwarf::DW_AT_str_offsets_base:
// FIXME: Use the string offset table with Dwarf 5.
return true;
+ case dwarf::DW_AT_loclists_base:
+ // In case !Update the .debug_addr table is not generated/preserved.
+ // Thus instead of DW_FORM_loclistx the DW_FORM_sec_offset is used.
+ // Since DW_AT_loclists_base is used for only DW_FORM_loclistx the
+ // DW_AT_loclists_base is removed.
+ return !Update;
case dwarf::DW_AT_location:
case dwarf::DW_AT_frame_base:
- // FIXME: for some reason dsymutil-classic keeps the location attributes
- // when they are of block type (i.e. not location lists). This is totally
- // wrong for globals where we will keep a wrong address. It is mostly
- // harmless for locals, but there is no point in keeping these anyway when
- // the function wasn't linked.
- return !Update &&
- (SkipPC || (!InFunctionScope && Tag == dwarf::DW_TAG_variable &&
- !InDebugMap)) &&
- !DWARFFormValue(AttrSpec.Form).isFormClass(DWARFFormValue::FC_Block);
+ return !Update && SkipPC;
}
}
DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
const DWARFFile &File, CompileUnit &Unit,
- OffsetsStringPool &StringPool,
int64_t PCOffset, uint32_t OutOffset,
unsigned Flags, bool IsLittleEndian,
DIE *Die) {
@@ -1500,27 +1732,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
DWARFDataExtractor(DIECopy, Data.isLittleEndian(), Data.getAddressSize());
// Modify the copy with relocated addresses.
- if (ObjFile.Addresses->applyValidRelocs(DIECopy, Offset,
- Data.isLittleEndian())) {
- // If we applied relocations, we store the value of high_pc that was
- // potentially stored in the input DIE. If high_pc is an address
- // (Dwarf version == 2), then it might have been relocated to a
- // totally unrelated value (because the end address in the object
- // file might be start address of another function which got moved
- // independently by the linker). The computation of the actual
- // high_pc value is done in cloneAddressAttribute().
- AttrInfo.OrigHighPc =
- dwarf::toAddress(InputDIE.find(dwarf::DW_AT_high_pc), 0);
- // Also store the low_pc. It might get relocated in an
- // inline_subprogram that happens at the beginning of its
- // inlining function.
- AttrInfo.OrigLowPc = dwarf::toAddress(InputDIE.find(dwarf::DW_AT_low_pc),
- std::numeric_limits<uint64_t>::max());
- AttrInfo.OrigCallReturnPc =
- dwarf::toAddress(InputDIE.find(dwarf::DW_AT_call_return_pc), 0);
- AttrInfo.OrigCallPc =
- dwarf::toAddress(InputDIE.find(dwarf::DW_AT_call_pc), 0);
- }
+ ObjFile.Addresses->applyValidRelocs(DIECopy, Offset, Data.isLittleEndian());
// Reset the Offset to 0 as we will be working on the local copy of
// the data.
@@ -1543,23 +1755,27 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
// is not, e.g., inlined functions.
if ((Flags & TF_InFunctionScope) && Info.InDebugMap)
Flags &= ~TF_SkipPC;
+ // Location expressions referencing an address which is not in debug map
+ // should be deleted.
+ else if (!Info.InDebugMap && Info.HasLocationExpressionAddr &&
+ LLVM_LIKELY(!Update))
+ Flags |= TF_SkipPC;
}
for (const auto &AttrSpec : Abbrev->attributes()) {
- if (shouldSkipAttribute(Update, AttrSpec, Die->getTag(), Info.InDebugMap,
- Flags & TF_SkipPC, Flags & TF_InFunctionScope)) {
+ if (shouldSkipAttribute(Update, AttrSpec, Flags & TF_SkipPC)) {
DWARFFormValue::skipValue(AttrSpec.Form, Data, &Offset,
U.getFormParams());
continue;
}
- DWARFFormValue Val(AttrSpec.Form);
+ DWARFFormValue Val = AttrSpec.getFormValue();
uint64_t AttrSize = Offset;
Val.extractValue(Data, &Offset, U.getFormParams(), &U);
AttrSize = Offset - AttrSize;
- OutOffset += cloneAttribute(*Die, InputDIE, File, Unit, StringPool, Val,
- AttrSpec, AttrSize, AttrInfo, IsLittleEndian);
+ OutOffset += cloneAttribute(*Die, InputDIE, File, Unit, Val, AttrSpec,
+ AttrSize, AttrInfo, IsLittleEndian);
}
// Look for accelerator entries.
@@ -1569,7 +1785,7 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
// accelerator tables too. For now stick with dsymutil's behavior.
if ((Info.InDebugMap || AttrInfo.HasLowPc || AttrInfo.HasRanges) &&
Tag != dwarf::DW_TAG_compile_unit &&
- getDIENames(InputDIE, AttrInfo, StringPool,
+ getDIENames(InputDIE, AttrInfo, DebugStrPool,
Tag != dwarf::DW_TAG_inlined_subroutine)) {
if (AttrInfo.MangledName && AttrInfo.MangledName != AttrInfo.Name)
Unit.addNameAccelerator(Die, AttrInfo.MangledName,
@@ -1582,15 +1798,17 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
Tag == dwarf::DW_TAG_inlined_subroutine);
}
if (AttrInfo.Name && isObjCSelector(AttrInfo.Name.getString()))
- addObjCAccelerator(Unit, Die, AttrInfo.Name, StringPool,
+ addObjCAccelerator(Unit, Die, AttrInfo.Name, DebugStrPool,
/* SkipPubSection =*/true);
} else if (Tag == dwarf::DW_TAG_namespace) {
if (!AttrInfo.Name)
- AttrInfo.Name = StringPool.getEntry("(anonymous namespace)");
+ AttrInfo.Name = DebugStrPool.getEntry("(anonymous namespace)");
+ Unit.addNamespaceAccelerator(Die, AttrInfo.Name);
+ } else if (Tag == dwarf::DW_TAG_imported_declaration && AttrInfo.Name) {
Unit.addNamespaceAccelerator(Die, AttrInfo.Name);
} else if (isTypeTag(Tag) && !AttrInfo.IsDeclaration &&
- getDIENames(InputDIE, AttrInfo, StringPool) && AttrInfo.Name &&
+ getDIENames(InputDIE, AttrInfo, DebugStrPool) && AttrInfo.Name &&
AttrInfo.Name.getString()[0]) {
uint32_t Hash = hashFullyQualifiedName(InputDIE, Unit, File);
uint64_t RuntimeLang =
@@ -1633,8 +1851,8 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
// Recursively clone children.
for (auto Child : InputDIE.children()) {
- if (DIE *Clone = cloneDIE(Child, File, Unit, StringPool, PCOffset,
- OutOffset, Flags, IsLittleEndian)) {
+ if (DIE *Clone = cloneDIE(Child, File, Unit, PCOffset, OutOffset, Flags,
+ IsLittleEndian)) {
Die->addChild(Clone);
OutOffset = Clone->getOffset() + Clone->getSize();
}
@@ -1647,77 +1865,160 @@ DIE *DWARFLinker::DIECloner::cloneDIE(const DWARFDie &InputDIE,
return Die;
}
-/// Patch the input object file relevant debug_ranges entries
-/// and emit them in the output file. Update the relevant attributes
+/// Patch the input object file relevant debug_ranges or debug_rnglists
+/// entries and emit them in the output file. Update the relevant attributes
/// to point at the new entries.
-void DWARFLinker::patchRangesForUnit(const CompileUnit &Unit,
- DWARFContext &OrigDwarf,
+void DWARFLinker::generateUnitRanges(CompileUnit &Unit,
const DWARFFile &File) const {
- DWARFDebugRangeList RangeList;
+ if (LLVM_UNLIKELY(Options.Update))
+ return;
+
const auto &FunctionRanges = Unit.getFunctionRanges();
- unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
- DWARFDataExtractor RangeExtractor(OrigDwarf.getDWARFObj(),
- OrigDwarf.getDWARFObj().getRangesSection(),
- OrigDwarf.isLittleEndian(), AddressSize);
- std::optional<std::pair<AddressRange, int64_t>> CachedRange;
- DWARFUnit &OrigUnit = Unit.getOrigUnit();
- auto OrigUnitDie = OrigUnit.getUnitDIE(false);
- uint64_t UnitBaseAddress =
- dwarf::toAddress(OrigUnitDie.find(dwarf::DW_AT_low_pc), 0);
-
- for (const auto &RangeAttribute : Unit.getRangesAttributes()) {
- uint64_t Offset = RangeAttribute.get();
- RangeAttribute.set(TheDwarfEmitter->getRangesSectionSize());
- if (Error E = RangeList.extract(RangeExtractor, &Offset)) {
- llvm::consumeError(std::move(E));
- reportWarning("invalid range list ignored.", File);
- RangeList.clear();
+
+ // Build set of linked address ranges for unit function ranges.
+ AddressRanges LinkedFunctionRanges;
+ for (const AddressRangeValuePair &Range : FunctionRanges)
+ LinkedFunctionRanges.insert(
+ {Range.Range.start() + Range.Value, Range.Range.end() + Range.Value});
+
+ // Emit LinkedFunctionRanges into .debug_aranges
+ if (!LinkedFunctionRanges.empty())
+ TheDwarfEmitter->emitDwarfDebugArangesTable(Unit, LinkedFunctionRanges);
+
+ RngListAttributesTy AllRngListAttributes = Unit.getRangesAttributes();
+ std::optional<PatchLocation> UnitRngListAttribute =
+ Unit.getUnitRangesAttribute();
+
+ if (!AllRngListAttributes.empty() || UnitRngListAttribute) {
+ std::optional<AddressRangeValuePair> CachedRange;
+ MCSymbol *EndLabel = TheDwarfEmitter->emitDwarfDebugRangeListHeader(Unit);
+
+ // Read original address ranges, apply relocation value, emit linked address
+ // ranges.
+ for (PatchLocation &AttributePatch : AllRngListAttributes) {
+ // Get ranges from the source DWARF corresponding to the current
+ // attribute.
+ AddressRanges LinkedRanges;
+ if (Expected<DWARFAddressRangesVector> OriginalRanges =
+ Unit.getOrigUnit().findRnglistFromOffset(AttributePatch.get())) {
+ // Apply relocation adjustment.
+ for (const auto &Range : *OriginalRanges) {
+ if (!CachedRange || !CachedRange->Range.contains(Range.LowPC))
+ CachedRange = FunctionRanges.getRangeThatContains(Range.LowPC);
+
+ // All range entries should lie in the function range.
+ if (!CachedRange) {
+ reportWarning("inconsistent range data.", File);
+ continue;
+ }
+
+ // Store range for emiting.
+ LinkedRanges.insert({Range.LowPC + CachedRange->Value,
+ Range.HighPC + CachedRange->Value});
+ }
+ } else {
+ llvm::consumeError(OriginalRanges.takeError());
+ reportWarning("invalid range list ignored.", File);
+ }
+
+ // Emit linked ranges.
+ TheDwarfEmitter->emitDwarfDebugRangeListFragment(Unit, LinkedRanges,
+ AttributePatch);
}
- const auto &Entries = RangeList.getEntries();
- uint64_t BaseAddress = UnitBaseAddress;
- AddressRanges LinkedRanges;
+ // Emit ranges for Unit AT_ranges attribute.
+ if (UnitRngListAttribute.has_value())
+ TheDwarfEmitter->emitDwarfDebugRangeListFragment(
+ Unit, LinkedFunctionRanges, *UnitRngListAttribute);
- if (!Entries.empty()) {
- for (const auto &Range : Entries) {
- if (Range.isBaseAddressSelectionEntry(
- Unit.getOrigUnit().getAddressByteSize())) {
- BaseAddress = Range.EndAddress;
- continue;
- }
+ // Emit ranges footer.
+ TheDwarfEmitter->emitDwarfDebugRangeListFooter(Unit, EndLabel);
+ }
+}
- if (!CachedRange ||
- !CachedRange->first.contains(Range.StartAddress + BaseAddress))
- CachedRange = FunctionRanges.getRangeValueThatContains(
- Range.StartAddress + BaseAddress);
+void DWARFLinker::DIECloner::generateUnitLocations(
+ CompileUnit &Unit, const DWARFFile &File,
+ ExpressionHandlerRef ExprHandler) {
+ if (LLVM_UNLIKELY(Linker.Options.Update))
+ return;
- // All range entries should lie in the function range.
- if (!CachedRange) {
- reportWarning("inconsistent range data.", File);
- continue;
- }
+ const LocListAttributesTy &AllLocListAttributes =
+ Unit.getLocationAttributes();
+
+ if (AllLocListAttributes.empty())
+ return;
+
+ // Emit locations list table header.
+ MCSymbol *EndLabel = Emitter->emitDwarfDebugLocListHeader(Unit);
+
+ for (auto &CurLocAttr : AllLocListAttributes) {
+ // Get location expressions vector corresponding to the current attribute
+ // from the source DWARF.
+ Expected<DWARFLocationExpressionsVector> OriginalLocations =
+ Unit.getOrigUnit().findLoclistFromOffset(CurLocAttr.get());
+
+ if (!OriginalLocations) {
+ llvm::consumeError(OriginalLocations.takeError());
+ Linker.reportWarning("Invalid location attribute ignored.", File);
+ continue;
+ }
- LinkedRanges.insert(
- {Range.StartAddress + BaseAddress + CachedRange->second,
- Range.EndAddress + BaseAddress + CachedRange->second});
+ DWARFLocationExpressionsVector LinkedLocationExpressions;
+ for (DWARFLocationExpression &CurExpression : *OriginalLocations) {
+ DWARFLocationExpression LinkedExpression;
+
+ if (CurExpression.Range) {
+ // Relocate address range.
+ LinkedExpression.Range = {
+ CurExpression.Range->LowPC + CurLocAttr.RelocAdjustment,
+ CurExpression.Range->HighPC + CurLocAttr.RelocAdjustment};
}
+
+ // Clone expression.
+ LinkedExpression.Expr.reserve(CurExpression.Expr.size());
+ ExprHandler(CurExpression.Expr, LinkedExpression.Expr,
+ CurLocAttr.RelocAdjustment);
+
+ LinkedLocationExpressions.push_back(LinkedExpression);
}
- TheDwarfEmitter->emitDwarfDebugRangesTableFragment(Unit, LinkedRanges);
+ // Emit locations list table fragment corresponding to the CurLocAttr.
+ Emitter->emitDwarfDebugLocListFragment(Unit, LinkedLocationExpressions,
+ CurLocAttr, AddrPool);
}
+
+ // Emit locations list table footer.
+ Emitter->emitDwarfDebugLocListFooter(Unit, EndLabel);
}
-/// Generate the debug_aranges entries for \p Unit and if the
-/// unit has a DW_AT_ranges attribute, also emit the debug_ranges
-/// contribution for this attribute.
-/// FIXME: this could actually be done right in patchRangesForUnit,
-/// but for the sake of initial bit-for-bit compatibility with legacy
-/// dsymutil, we have to do it in a delayed pass.
-void DWARFLinker::generateUnitRanges(CompileUnit &Unit) const {
- auto Attr = Unit.getUnitRangesAttribute();
- if (Attr)
- Attr->set(TheDwarfEmitter->getRangesSectionSize());
- TheDwarfEmitter->emitUnitRangesEntries(Unit, static_cast<bool>(Attr));
+static void patchAddrBase(DIE &Die, DIEInteger Offset) {
+ for (auto &V : Die.values())
+ if (V.getAttribute() == dwarf::DW_AT_addr_base) {
+ V = DIEValue(V.getAttribute(), V.getForm(), Offset);
+ return;
+ }
+
+ llvm_unreachable("Didn't find a DW_AT_addr_base in cloned DIE!");
+}
+
+void DWARFLinker::DIECloner::emitDebugAddrSection(
+ CompileUnit &Unit,
+ const uint16_t DwarfVersion) const {
+
+ if (LLVM_UNLIKELY(Linker.Options.Update))
+ return;
+
+ if (DwarfVersion < 5)
+ return;
+
+ if (AddrPool.Addrs.empty())
+ return;
+
+ MCSymbol *EndLabel = Emitter->emitDwarfDebugAddrsHeader(Unit);
+ patchAddrBase(*Unit.getOutputUnitDIE(),
+ DIEInteger(Emitter->getDebugAddrSectionSize()));
+ Emitter->emitDwarfDebugAddrs(AddrPool.Addrs, Unit.getOrigUnit().getAddressByteSize());
+ Emitter->emitDwarfDebugAddrsFooter(Unit, EndLabel);
}
/// Insert the new line info sequence \p Seq into the current
@@ -1739,7 +2040,7 @@ static void insertLineSequence(std::vector<DWARFDebugLine::Row> &Seq,
// FIXME: this only removes the unneeded end_sequence if the
// sequences have been inserted in order. Using a global sort like
- // described in patchLineTableForUnit() and delaying the end_sequene
+ // described in generateLineTableForUnit() and delaying the end_sequene
// elimination to emitLineTableForUnit() we can get rid of all of them.
if (InsertPoint != Rows.end() && InsertPoint->Address == Front &&
InsertPoint->EndSequence) {
@@ -1762,142 +2063,6 @@ static void patchStmtList(DIE &Die, DIEInteger Offset) {
llvm_unreachable("Didn't find DW_AT_stmt_list in cloned DIE!");
}
-/// Extract the line table for \p Unit from \p OrigDwarf, and
-/// recreate a relocated version of these for the address ranges that
-/// are present in the binary.
-void DWARFLinker::patchLineTableForUnit(CompileUnit &Unit,
- DWARFContext &OrigDwarf,
- const DWARFFile &File) {
- DWARFDie CUDie = Unit.getOrigUnit().getUnitDIE();
- auto StmtList = dwarf::toSectionOffset(CUDie.find(dwarf::DW_AT_stmt_list));
- if (!StmtList)
- return;
-
- // Update the cloned DW_AT_stmt_list with the correct debug_line offset.
- if (auto *OutputDIE = Unit.getOutputUnitDIE())
- patchStmtList(*OutputDIE,
- DIEInteger(TheDwarfEmitter->getLineSectionSize()));
-
- RangesTy &Ranges = File.Addresses->getValidAddressRanges();
-
- // Parse the original line info for the unit.
- DWARFDebugLine::LineTable LineTable;
- uint64_t StmtOffset = *StmtList;
- DWARFDataExtractor LineExtractor(
- OrigDwarf.getDWARFObj(), OrigDwarf.getDWARFObj().getLineSection(),
- OrigDwarf.isLittleEndian(), Unit.getOrigUnit().getAddressByteSize());
- if (needToTranslateStrings())
- return TheDwarfEmitter->translateLineTable(LineExtractor, StmtOffset);
-
- if (Error Err =
- LineTable.parse(LineExtractor, &StmtOffset, OrigDwarf,
- &Unit.getOrigUnit(), OrigDwarf.getWarningHandler()))
- OrigDwarf.getWarningHandler()(std::move(Err));
-
- // This vector is the output line table.
- std::vector<DWARFDebugLine::Row> NewRows;
- NewRows.reserve(LineTable.Rows.size());
-
- // Current sequence of rows being extracted, before being inserted
- // in NewRows.
- std::vector<DWARFDebugLine::Row> Seq;
- const auto &FunctionRanges = Unit.getFunctionRanges();
- std::optional<std::pair<AddressRange, int64_t>> CurrRange;
-
- // FIXME: This logic is meant to generate exactly the same output as
- // Darwin's classic dsymutil. There is a nicer way to implement this
- // by simply putting all the relocated line info in NewRows and simply
- // sorting NewRows before passing it to emitLineTableForUnit. This
- // should be correct as sequences for a function should stay
- // together in the sorted output. There are a few corner cases that
- // look suspicious though, and that required to implement the logic
- // this way. Revisit that once initial validation is finished.
-
- // Iterate over the object file line info and extract the sequences
- // that correspond to linked functions.
- for (auto &Row : LineTable.Rows) {
- // Check whether we stepped out of the range. The range is
- // half-open, but consider accept the end address of the range if
- // it is marked as end_sequence in the input (because in that
- // case, the relocation offset is accurate and that entry won't
- // serve as the start of another function).
- if (!CurrRange || !CurrRange->first.contains(Row.Address.Address) ||
- (Row.Address.Address == CurrRange->first.end() && !Row.EndSequence)) {
- // We just stepped out of a known range. Insert a end_sequence
- // corresponding to the end of the range.
- uint64_t StopAddress =
- CurrRange ? CurrRange->first.end() + CurrRange->second : -1ULL;
- CurrRange = FunctionRanges.getRangeValueThatContains(Row.Address.Address);
- if (!CurrRange) {
- if (StopAddress != -1ULL) {
- // Try harder by looking in the Address ranges map.
- // There are corner cases where this finds a
- // valid entry. It's unclear if this is right or wrong, but
- // for now do as dsymutil.
- // FIXME: Understand exactly what cases this addresses and
- // potentially remove it along with the Ranges map.
- if (std::optional<std::pair<AddressRange, int64_t>> Range =
- Ranges.getRangeValueThatContains(Row.Address.Address))
- StopAddress = Row.Address.Address + (*Range).second;
- }
- }
- if (StopAddress != -1ULL && !Seq.empty()) {
- // Insert end sequence row with the computed end address, but
- // the same line as the previous one.
- auto NextLine = Seq.back();
- NextLine.Address.Address = StopAddress;
- NextLine.EndSequence = 1;
- NextLine.PrologueEnd = 0;
- NextLine.BasicBlock = 0;
- NextLine.EpilogueBegin = 0;
- Seq.push_back(NextLine);
- insertLineSequence(Seq, NewRows);
- }
-
- if (!CurrRange)
- continue;
- }
-
- // Ignore empty sequences.
- if (Row.EndSequence && Seq.empty())
- continue;
-
- // Relocate row address and add it to the current sequence.
- Row.Address.Address += CurrRange->second;
- Seq.emplace_back(Row);
-
- if (Row.EndSequence)
- insertLineSequence(Seq, NewRows);
- }
-
- // Finished extracting, now emit the line tables.
- // FIXME: LLVM hard-codes its prologue values. We just copy the
- // prologue over and that works because we act as both producer and
- // consumer. It would be nicer to have a real configurable line
- // table emitter.
- if (LineTable.Prologue.getVersion() < 2 ||
- LineTable.Prologue.getVersion() > 5 ||
- LineTable.Prologue.DefaultIsStmt != DWARF2_LINE_DEFAULT_IS_STMT ||
- LineTable.Prologue.OpcodeBase > 13)
- reportWarning("line table parameters mismatch. Cannot emit.", File);
- else {
- uint32_t PrologueEnd = *StmtList + 10 + LineTable.Prologue.PrologueLength;
- // DWARF v5 has an extra 2 bytes of information before the header_length
- // field.
- if (LineTable.Prologue.getVersion() == 5)
- PrologueEnd += 2;
- StringRef LineData = OrigDwarf.getDWARFObj().getLineSection().Data;
- MCDwarfLineTableParams Params;
- Params.DWARF2LineOpcodeBase = LineTable.Prologue.OpcodeBase;
- Params.DWARF2LineBase = LineTable.Prologue.LineBase;
- Params.DWARF2LineRange = LineTable.Prologue.LineRange;
- TheDwarfEmitter->emitLineTableForUnit(
- Params, LineData.slice(*StmtList + 4, PrologueEnd),
- LineTable.Prologue.MinInstLength, NewRows,
- Unit.getOrigUnit().getAddressByteSize());
- }
-}
-
void DWARFLinker::DIECloner::rememberUnitForMacroOffset(CompileUnit &Unit) {
DWARFUnit &OrigUnit = Unit.getOrigUnit();
DWARFDie OrigUnitDie = OrigUnit.getUnitDIE();
@@ -1915,10 +2080,114 @@ void DWARFLinker::DIECloner::rememberUnitForMacroOffset(CompileUnit &Unit) {
}
}
+void DWARFLinker::DIECloner::generateLineTableForUnit(CompileUnit &Unit) {
+ if (LLVM_UNLIKELY(Emitter == nullptr))
+ return;
+
+ // Check whether DW_AT_stmt_list attribute is presented.
+ DWARFDie CUDie = Unit.getOrigUnit().getUnitDIE();
+ auto StmtList = dwarf::toSectionOffset(CUDie.find(dwarf::DW_AT_stmt_list));
+ if (!StmtList)
+ return;
+
+ // Update the cloned DW_AT_stmt_list with the correct debug_line offset.
+ if (auto *OutputDIE = Unit.getOutputUnitDIE())
+ patchStmtList(*OutputDIE, DIEInteger(Emitter->getLineSectionSize()));
+
+ if (const DWARFDebugLine::LineTable *LT =
+ ObjFile.Dwarf->getLineTableForUnit(&Unit.getOrigUnit())) {
+
+ DWARFDebugLine::LineTable LineTable;
+
+ // Set Line Table header.
+ LineTable.Prologue = LT->Prologue;
+
+ // Set Line Table Rows.
+ if (Linker.Options.Update) {
+ LineTable.Rows = LT->Rows;
+ // If all the line table contains is a DW_LNE_end_sequence, clear the line
+ // table rows, it will be inserted again in the DWARFStreamer.
+ if (LineTable.Rows.size() == 1 && LineTable.Rows[0].EndSequence)
+ LineTable.Rows.clear();
+
+ LineTable.Sequences = LT->Sequences;
+ } else {
+ // This vector is the output line table.
+ std::vector<DWARFDebugLine::Row> NewRows;
+ NewRows.reserve(LT->Rows.size());
+
+ // Current sequence of rows being extracted, before being inserted
+ // in NewRows.
+ std::vector<DWARFDebugLine::Row> Seq;
+
+ const auto &FunctionRanges = Unit.getFunctionRanges();
+ std::optional<AddressRangeValuePair> CurrRange;
+
+ // FIXME: This logic is meant to generate exactly the same output as
+ // Darwin's classic dsymutil. There is a nicer way to implement this
+ // by simply putting all the relocated line info in NewRows and simply
+ // sorting NewRows before passing it to emitLineTableForUnit. This
+ // should be correct as sequences for a function should stay
+ // together in the sorted output. There are a few corner cases that
+ // look suspicious though, and that required to implement the logic
+ // this way. Revisit that once initial validation is finished.
+
+ // Iterate over the object file line info and extract the sequences
+ // that correspond to linked functions.
+ for (DWARFDebugLine::Row Row : LT->Rows) {
+ // Check whether we stepped out of the range. The range is
+ // half-open, but consider accept the end address of the range if
+ // it is marked as end_sequence in the input (because in that
+ // case, the relocation offset is accurate and that entry won't
+ // serve as the start of another function).
+ if (!CurrRange || !CurrRange->Range.contains(Row.Address.Address)) {
+ // We just stepped out of a known range. Insert a end_sequence
+ // corresponding to the end of the range.
+ uint64_t StopAddress =
+ CurrRange ? CurrRange->Range.end() + CurrRange->Value : -1ULL;
+ CurrRange = FunctionRanges.getRangeThatContains(Row.Address.Address);
+ if (StopAddress != -1ULL && !Seq.empty()) {
+ // Insert end sequence row with the computed end address, but
+ // the same line as the previous one.
+ auto NextLine = Seq.back();
+ NextLine.Address.Address = StopAddress;
+ NextLine.EndSequence = 1;
+ NextLine.PrologueEnd = 0;
+ NextLine.BasicBlock = 0;
+ NextLine.EpilogueBegin = 0;
+ Seq.push_back(NextLine);
+ insertLineSequence(Seq, NewRows);
+ }
+
+ if (!CurrRange)
+ continue;
+ }
+
+ // Ignore empty sequences.
+ if (Row.EndSequence && Seq.empty())
+ continue;
+
+ // Relocate row address and add it to the current sequence.
+ Row.Address.Address += CurrRange->Value;
+ Seq.emplace_back(Row);
+
+ if (Row.EndSequence)
+ insertLineSequence(Seq, NewRows);
+ }
+
+ LineTable.Rows = std::move(NewRows);
+ }
+
+ Emitter->emitLineTableForUnit(LineTable, Unit, DebugStrPool,
+ DebugLineStrPool);
+ } else
+ Linker.reportWarning("Cann't load line table.", ObjFile);
+}
+
void DWARFLinker::emitAcceleratorEntriesForUnit(CompileUnit &Unit) {
- for (DwarfLinkerAccelTableKind AccelTableKind : Options.AccelTables) {
+ for (AccelTableKind AccelTableKind : Options.AccelTables) {
switch (AccelTableKind) {
- case DwarfLinkerAccelTableKind::Apple: {
+ case AccelTableKind::Apple: {
// Add namespaces.
for (const auto &Namespace : Unit.getNamespaces())
AppleNamespaces.addName(Namespace.Name, Namespace.Die->getOffset() +
@@ -1940,11 +2209,11 @@ void DWARFLinker::emitAcceleratorEntriesForUnit(CompileUnit &Unit) {
AppleObjc.addName(ObjC.Name,
ObjC.Die->getOffset() + Unit.getStartOffset());
} break;
- case DwarfLinkerAccelTableKind::Pub: {
+ case AccelTableKind::Pub: {
TheDwarfEmitter->emitPubNamesForUnit(Unit);
TheDwarfEmitter->emitPubTypesForUnit(Unit);
} break;
- case DwarfLinkerAccelTableKind::DebugNames: {
+ case AccelTableKind::DebugNames: {
for (const auto &Namespace : Unit.getNamespaces())
DebugNames.addName(Namespace.Name, Namespace.Die->getOffset(),
Namespace.Die->getTag(), Unit.getUniqueID());
@@ -1965,14 +2234,20 @@ void DWARFLinker::emitAcceleratorEntriesForUnit(CompileUnit &Unit) {
/// This is actually pretty easy as the data of the CIEs and FDEs can
/// be considered as black boxes and moved as is. The only thing to do
/// is to patch the addresses in the headers.
-void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File,
- RangesTy &Ranges,
- DWARFContext &OrigDwarf,
- unsigned AddrSize) {
+void DWARFLinker::patchFrameInfoForObject(LinkContext &Context) {
+ DWARFContext &OrigDwarf = *Context.File.Dwarf;
+ unsigned SrcAddrSize = OrigDwarf.getDWARFObj().getAddressSize();
+
StringRef FrameData = OrigDwarf.getDWARFObj().getFrameSection().Data;
if (FrameData.empty())
return;
+ RangesTy AllUnitsRanges;
+ for (std::unique_ptr<CompileUnit> &Unit : Context.CompileUnits) {
+ for (auto CurRange : Unit->getFunctionRanges())
+ AllUnitsRanges.insert(CurRange.Range, CurRange.Value);
+ }
+
DataExtractor Data(FrameData, OrigDwarf.isLittleEndian(), 0);
uint64_t InputOffset = 0;
@@ -1984,7 +2259,7 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File,
uint64_t EntryOffset = InputOffset;
uint32_t InitialLength = Data.getU32(&InputOffset);
if (InitialLength == 0xFFFFFFFF)
- return reportWarning("Dwarf64 bits no supported", File);
+ return reportWarning("Dwarf64 bits no supported", Context.File);
uint32_t CIEId = Data.getU32(&InputOffset);
if (CIEId == 0xFFFFFFFF) {
@@ -1996,14 +2271,14 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File,
continue;
}
- uint64_t Loc = Data.getUnsigned(&InputOffset, AddrSize);
+ uint64_t Loc = Data.getUnsigned(&InputOffset, SrcAddrSize);
// Some compilers seem to emit frame info that doesn't start at
// the function entry point, thus we can't just lookup the address
// in the debug map. Use the AddressInfo's range map to see if the FDE
// describes something that we can relocate.
- std::optional<std::pair<AddressRange, int64_t>> Range =
- Ranges.getRangeValueThatContains(Loc);
+ std::optional<AddressRangeValuePair> Range =
+ AllUnitsRanges.getRangeThatContains(Loc);
if (!Range) {
// The +4 is to account for the size of the InitialLength field itself.
InputOffset = EntryOffset + InitialLength + 4;
@@ -2014,7 +2289,8 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File,
// Have we already emitted a corresponding CIE?
StringRef CIEData = LocalCIES[CIEId];
if (CIEData.empty())
- return reportWarning("Inconsistent debug_frame content. Dropping.", File);
+ return reportWarning("Inconsistent debug_frame content. Dropping.",
+ Context.File);
// Look if we already emitted a CIE that corresponds to the
// referenced one (the CIE data is the key of that lookup).
@@ -2030,9 +2306,9 @@ void DWARFLinker::patchFrameInfoForObject(const DWARFFile &File,
// Emit the FDE with updated address and CIE pointer.
// (4 + AddrSize) is the size of the CIEId + initial_location
// fields that will get reconstructed by emitFDE().
- unsigned FDERemainingBytes = InitialLength - (4 + AddrSize);
- TheDwarfEmitter->emitFDE(IteratorInserted.first->getValue(), AddrSize,
- Loc + Range->second,
+ unsigned FDERemainingBytes = InitialLength - (4 + SrcAddrSize);
+ TheDwarfEmitter->emitFDE(IteratorInserted.first->getValue(), SrcAddrSize,
+ Loc + Range->Value,
FrameData.substr(InputOffset, FDERemainingBytes));
InputOffset += FDERemainingBytes;
}
@@ -2261,10 +2537,9 @@ Error DWARFLinker::loadClangModule(objFileLoader Loader, const DWARFDie &CUDie,
}
uint64_t DWARFLinker::DIECloner::cloneAllCompileUnits(
- DWARFContext &DwarfContext, const DWARFFile &File,
- OffsetsStringPool &StringPool, bool IsLittleEndian) {
+ DWARFContext &DwarfContext, const DWARFFile &File, bool IsLittleEndian) {
uint64_t OutputDebugInfoSize =
- Linker.Options.NoOutput ? 0 : Emitter->getDebugInfoSectionSize();
+ (Emitter == nullptr) ? 0 : Emitter->getDebugInfoSectionSize();
const uint64_t StartOutputDebugInfoSize = OutputDebugInfoSize;
for (auto &CurrentUnit : CompileUnits) {
@@ -2281,50 +2556,48 @@ uint64_t DWARFLinker::DIECloner::cloneAllCompileUnits(
// already has a DIE inside of it.
CurrentUnit->createOutputDIE();
rememberUnitForMacroOffset(*CurrentUnit);
- cloneDIE(InputDIE, File, *CurrentUnit, StringPool, 0 /* PC offset */,
- UnitHeaderSize, 0, IsLittleEndian,
- CurrentUnit->getOutputUnitDIE());
+ cloneDIE(InputDIE, File, *CurrentUnit, 0 /* PC offset */, UnitHeaderSize,
+ 0, IsLittleEndian, CurrentUnit->getOutputUnitDIE());
}
OutputDebugInfoSize = CurrentUnit->computeNextUnitOffset(DwarfVersion);
- if (!Linker.Options.NoOutput) {
- assert(Emitter);
+ if (Emitter != nullptr) {
- if (LLVM_LIKELY(!Linker.Options.Update) ||
- Linker.needToTranslateStrings())
- Linker.patchLineTableForUnit(*CurrentUnit, DwarfContext, File);
+ generateLineTableForUnit(*CurrentUnit);
Linker.emitAcceleratorEntriesForUnit(*CurrentUnit);
if (LLVM_UNLIKELY(Linker.Options.Update))
continue;
- Linker.patchRangesForUnit(*CurrentUnit, DwarfContext, File);
- auto ProcessExpr = [&](StringRef Bytes,
- SmallVectorImpl<uint8_t> &Buffer) {
+ Linker.generateUnitRanges(*CurrentUnit, File);
+
+ auto ProcessExpr = [&](SmallVectorImpl<uint8_t> &SrcBytes,
+ SmallVectorImpl<uint8_t> &OutBytes,
+ int64_t RelocAdjustment) {
DWARFUnit &OrigUnit = CurrentUnit->getOrigUnit();
- DataExtractor Data(Bytes, IsLittleEndian,
+ DataExtractor Data(SrcBytes, IsLittleEndian,
OrigUnit.getAddressByteSize());
cloneExpression(Data,
DWARFExpression(Data, OrigUnit.getAddressByteSize(),
OrigUnit.getFormParams().Format),
- File, *CurrentUnit, Buffer);
+ File, *CurrentUnit, OutBytes, RelocAdjustment,
+ IsLittleEndian);
};
- Emitter->emitLocationsForUnit(*CurrentUnit, DwarfContext, ProcessExpr);
+ generateUnitLocations(*CurrentUnit, File, ProcessExpr);
+ emitDebugAddrSection(*CurrentUnit, DwarfVersion);
}
+ AddrPool.clear();
}
- if (!Linker.Options.NoOutput) {
+ if (Emitter != nullptr) {
assert(Emitter);
// Emit macro tables.
- Emitter->emitMacroTables(File.Dwarf, UnitMacroMap, StringPool);
+ Emitter->emitMacroTables(File.Dwarf.get(), UnitMacroMap, DebugStrPool);
// Emit all the compile unit's debug information.
for (auto &CurrentUnit : CompileUnits) {
- if (LLVM_LIKELY(!Linker.Options.Update))
- Linker.generateUnitRanges(*CurrentUnit);
-
CurrentUnit->fixupForwardReferences();
if (!CurrentUnit->getOutputUnitDIE())
@@ -2408,9 +2681,6 @@ bool DWARFLinker::emitPaperTrailWarnings(const DWARFFile &File,
}
void DWARFLinker::copyInvariantDebugSection(DWARFContext &Dwarf) {
- if (!needToTranslateStrings())
- TheDwarfEmitter->emitSectionContents(
- Dwarf.getDWARFObj().getLineSection().Data, "debug_line");
TheDwarfEmitter->emitSectionContents(Dwarf.getDWARFObj().getLocSection().Data,
"debug_loc");
TheDwarfEmitter->emitSectionContents(
@@ -2419,6 +2689,12 @@ void DWARFLinker::copyInvariantDebugSection(DWARFContext &Dwarf) {
Dwarf.getDWARFObj().getFrameSection().Data, "debug_frame");
TheDwarfEmitter->emitSectionContents(Dwarf.getDWARFObj().getArangesSection(),
"debug_aranges");
+ TheDwarfEmitter->emitSectionContents(
+ Dwarf.getDWARFObj().getAddrSection().Data, "debug_addr");
+ TheDwarfEmitter->emitSectionContents(
+ Dwarf.getDWARFObj().getRnglistsSection().Data, "debug_rnglists");
+ TheDwarfEmitter->emitSectionContents(
+ Dwarf.getDWARFObj().getLoclistsSection().Data, "debug_loclists");
}
void DWARFLinker::addObjectFile(DWARFFile &File, objFileLoader Loader,
@@ -2443,7 +2719,6 @@ void DWARFLinker::addObjectFile(DWARFFile &File, objFileLoader Loader,
}
Error DWARFLinker::link() {
- assert(Options.NoOutput || TheDwarfEmitter);
assert((Options.TargetDWARFVersion != 0) &&
"TargetDWARFVersion should be set");
@@ -2454,7 +2729,8 @@ Error DWARFLinker::link() {
// This Dwarf string pool which is used for emission. It must be used
// serially as the order of calling getStringOffset matters for
// reproducibility.
- OffsetsStringPool OffsetsStringPool(StringsTranslator, true);
+ OffsetsStringPool DebugStrPool(StringsTranslator, true);
+ OffsetsStringPool DebugLineStrPool(StringsTranslator, false);
// ODR Contexts for the optimize.
DeclContextTree ODRContexts;
@@ -2467,14 +2743,14 @@ Error DWARFLinker::link() {
outs() << "OBJECT FILE: " << OptContext.File.FileName << "\n";
}
- if (emitPaperTrailWarnings(OptContext.File, OffsetsStringPool))
+ if (emitPaperTrailWarnings(OptContext.File, DebugStrPool))
continue;
if (!OptContext.File.Dwarf)
continue;
if (Options.VerifyInputDWARF)
- verify(OptContext.File);
+ verifyInput(OptContext.File);
// Look for relocations that correspond to address map entries.
@@ -2504,30 +2780,6 @@ Error DWARFLinker::link() {
continue;
}
- // Check for unsupported sections. Following sections can be referenced
- // from .debug_info section. Current DWARFLinker implementation does not
- // support or update references to these tables. Thus we report warning
- // and skip corresponding object file.
- if (!OptContext.File.Dwarf->getDWARFObj()
- .getRnglistsSection()
- .Data.empty()) {
- reportWarning("'.debug_rnglists' is not currently supported: file "
- "will be skipped",
- OptContext.File);
- OptContext.Skip = true;
- continue;
- }
-
- if (!OptContext.File.Dwarf->getDWARFObj()
- .getLoclistsSection()
- .Data.empty()) {
- reportWarning("'.debug_loclists' is not currently supported: file "
- "will be skipped",
- OptContext.File);
- OptContext.Skip = true;
- continue;
- }
-
// In a first phase, just read in the debug info and load all clang modules.
OptContext.CompileUnits.reserve(
OptContext.File.Dwarf->getNumCompileUnits());
@@ -2544,8 +2796,8 @@ Error DWARFLinker::link() {
}
for (auto &CU : OptContext.ModuleUnits) {
- if (Error Err =
- cloneModuleUnit(OptContext, CU, ODRContexts, OffsetsStringPool))
+ if (Error Err = cloneModuleUnit(OptContext, CU, ODRContexts, DebugStrPool,
+ DebugLineStrPool))
reportWarning(toString(std::move(Err)), CU.File);
}
}
@@ -2556,7 +2808,8 @@ Error DWARFLinker::link() {
// later. This prevents undeterminism when analyze and clone execute
// concurrently, as clone set the canonical DIE offset and analyze reads it.
const uint64_t ModulesEndOffset =
- Options.NoOutput ? 0 : TheDwarfEmitter->getDebugInfoSectionSize();
+ (TheDwarfEmitter == nullptr) ? 0
+ : TheDwarfEmitter->getDebugInfoSectionSize();
// These variables manage the list of processed object files.
// The mutex and condition variable are to ensure that this is thread safe.
@@ -2622,9 +2875,7 @@ Error DWARFLinker::link() {
copyInvariantDebugSection(*OptContext.File.Dwarf);
} else {
for (auto &CurrentUnit : OptContext.CompileUnits) {
- lookForDIEsToKeep(*OptContext.File.Addresses,
- OptContext.File.Addresses->getValidAddressRanges(),
- OptContext.CompileUnits,
+ lookForDIEsToKeep(*OptContext.File.Addresses, OptContext.CompileUnits,
CurrentUnit->getOrigUnit().getUnitDIE(),
OptContext.File, *CurrentUnit, 0);
#ifndef NDEBUG
@@ -2641,18 +2892,15 @@ Error DWARFLinker::link() {
SizeByObject[OptContext.File.FileName].Input =
getDebugInfoSize(*OptContext.File.Dwarf);
SizeByObject[OptContext.File.FileName].Output =
- DIECloner(*this, TheDwarfEmitter, OptContext.File, DIEAlloc,
- OptContext.CompileUnits, Options.Update)
+ DIECloner(*this, TheDwarfEmitter.get(), OptContext.File, DIEAlloc,
+ OptContext.CompileUnits, Options.Update, DebugStrPool,
+ DebugLineStrPool)
.cloneAllCompileUnits(*OptContext.File.Dwarf, OptContext.File,
- OffsetsStringPool,
OptContext.File.Dwarf->isLittleEndian());
}
- if (!Options.NoOutput && !OptContext.CompileUnits.empty() &&
+ if ((TheDwarfEmitter != nullptr) && !OptContext.CompileUnits.empty() &&
LLVM_LIKELY(!Options.Update))
- patchFrameInfoForObject(
- OptContext.File, OptContext.File.Addresses->getValidAddressRanges(),
- *OptContext.File.Dwarf,
- OptContext.CompileUnits[0]->getOrigUnit().getAddressByteSize());
+ patchFrameInfoForObject(OptContext);
// Clean-up before starting working on the next object.
cleanupAuxiliarryData(OptContext);
@@ -2660,22 +2908,23 @@ Error DWARFLinker::link() {
auto EmitLambda = [&]() {
// Emit everything that's global.
- if (!Options.NoOutput) {
+ if (TheDwarfEmitter != nullptr) {
TheDwarfEmitter->emitAbbrevs(Abbreviations, Options.TargetDWARFVersion);
- TheDwarfEmitter->emitStrings(OffsetsStringPool);
- for (DwarfLinkerAccelTableKind TableKind : Options.AccelTables) {
+ TheDwarfEmitter->emitStrings(DebugStrPool);
+ TheDwarfEmitter->emitLineStrings(DebugLineStrPool);
+ for (AccelTableKind TableKind : Options.AccelTables) {
switch (TableKind) {
- case DwarfLinkerAccelTableKind::Apple:
+ case AccelTableKind::Apple:
TheDwarfEmitter->emitAppleNamespaces(AppleNamespaces);
TheDwarfEmitter->emitAppleNames(AppleNames);
TheDwarfEmitter->emitAppleTypes(AppleTypes);
TheDwarfEmitter->emitAppleObjc(AppleObjc);
break;
- case DwarfLinkerAccelTableKind::Pub:
+ case AccelTableKind::Pub:
// Already emitted by emitAcceleratorEntriesForUnit.
// Already emitted by emitAcceleratorEntriesForUnit.
break;
- case DwarfLinkerAccelTableKind::DebugNames:
+ case AccelTableKind::DebugNames:
TheDwarfEmitter->emitDebugNames(DebugNames);
break;
}
@@ -2776,7 +3025,8 @@ Error DWARFLinker::link() {
Error DWARFLinker::cloneModuleUnit(LinkContext &Context, RefModuleUnit &Unit,
DeclContextTree &ODRContexts,
- OffsetsStringPool &OffsetsStringPool,
+ OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool,
unsigned Indent) {
assert(Unit.Unit.get() != nullptr);
@@ -2802,22 +3052,34 @@ Error DWARFLinker::cloneModuleUnit(LinkContext &Context, RefModuleUnit &Unit,
UnitListTy CompileUnits;
CompileUnits.emplace_back(std::move(Unit.Unit));
assert(TheDwarfEmitter);
- DIECloner(*this, TheDwarfEmitter, Unit.File, DIEAlloc, CompileUnits,
- Options.Update)
- .cloneAllCompileUnits(*Unit.File.Dwarf, Unit.File, OffsetsStringPool,
+ DIECloner(*this, TheDwarfEmitter.get(), Unit.File, DIEAlloc, CompileUnits,
+ Options.Update, DebugStrPool, DebugLineStrPool)
+ .cloneAllCompileUnits(*Unit.File.Dwarf, Unit.File,
Unit.File.Dwarf->isLittleEndian());
return Error::success();
}
-bool DWARFLinker::verify(const DWARFFile &File) {
+void DWARFLinker::verifyInput(const DWARFFile &File) {
assert(File.Dwarf);
+ raw_ostream &os = Options.Verbose ? errs() : nulls();
DIDumpOptions DumpOpts;
- if (!File.Dwarf->verify(llvm::outs(), DumpOpts.noImplicitRecursion())) {
- reportWarning("input verification failed", File);
- return false;
+ if (!File.Dwarf->verify(os, DumpOpts.noImplicitRecursion())) {
+ if (Options.InputVerificationHandler)
+ Options.InputVerificationHandler(File);
}
- return true;
}
+Error DWARFLinker::createEmitter(const Triple &TheTriple,
+ OutputFileType FileType,
+ raw_pwrite_stream &OutFile) {
+
+ TheDwarfEmitter = std::make_unique<DwarfStreamer>(
+ FileType, OutFile, StringsTranslator, WarningHandler);
+
+ return TheDwarfEmitter->init(TheTriple, "__DWARF");
+}
+
+DwarfEmitter *DWARFLinker::getEmitter() { return TheDwarfEmitter.get(); }
+
} // namespace llvm
diff --git a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
index 6c9e0529154b..add0d94da73f 100644
--- a/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
+++ b/llvm/lib/DWARFLinker/DWARFLinkerCompileUnit.cpp
@@ -7,7 +7,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/DWARFLinker/DWARFLinkerDeclContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/Support/FormatVariadic.h"
namespace llvm {
@@ -63,6 +66,7 @@ void CompileUnit::markEverythingAsKept() {
// Mark everything that wasn't explicit marked for pruning.
I.Keep = !I.Prune;
auto DIE = OrigUnit.getDIEAtIndex(Idx++);
+ DWARFUnit *U = DIE.getDwarfUnit();
// Try to guess which DIEs must go to the accelerator tables. We do that
// just for variables, because functions will be handled depending on
@@ -78,10 +82,39 @@ void CompileUnit::markEverythingAsKept() {
I.InDebugMap = true;
continue;
}
- if (auto Block = Value->getAsBlock()) {
- if (Block->size() > OrigUnit.getAddressByteSize() &&
- (*Block)[0] == dwarf::DW_OP_addr)
- I.InDebugMap = true;
+
+ if (auto ExprLockBlock = Value->getAsBlock()) {
+ // Parse 'exprloc' expression.
+ DataExtractor Data(toStringRef(*ExprLockBlock),
+ U->getContext().isLittleEndian(),
+ U->getAddressByteSize());
+ DWARFExpression Expression(Data, U->getAddressByteSize(),
+ U->getFormParams().Format);
+
+ for (DWARFExpression::iterator It = Expression.begin();
+ (It != Expression.end()) && !I.InDebugMap; ++It) {
+ DWARFExpression::iterator NextIt = It;
+ ++NextIt;
+
+ switch (It->getCode()) {
+ case dwarf::DW_OP_const4u:
+ case dwarf::DW_OP_const8u:
+ case dwarf::DW_OP_const4s:
+ case dwarf::DW_OP_const8s:
+ if (NextIt == Expression.end() ||
+ NextIt->getCode() != dwarf::DW_OP_form_tls_address)
+ break;
+ [[fallthrough]];
+ case dwarf::DW_OP_constx:
+ case dwarf::DW_OP_addr:
+ case dwarf::DW_OP_addrx:
+ I.InDebugMap = true;
+ break;
+ default:
+ // Nothing to do.
+ break;
+ }
+ }
}
}
}
@@ -135,14 +168,16 @@ void CompileUnit::addFunctionRange(uint64_t FuncLowPc, uint64_t FuncHighPc,
}
void CompileUnit::noteRangeAttribute(const DIE &Die, PatchLocation Attr) {
- if (Die.getTag() != dwarf::DW_TAG_compile_unit)
- RangeAttributes.push_back(Attr);
- else
+ if (Die.getTag() == dwarf::DW_TAG_compile_unit) {
UnitRangeAttribute = Attr;
+ return;
+ }
+
+ RangeAttributes.emplace_back(Attr);
}
-void CompileUnit::noteLocationAttribute(PatchLocation Attr, int64_t PcOffset) {
- LocationAttributes.emplace_back(Attr, PcOffset);
+void CompileUnit::noteLocationAttribute(PatchLocation Attr) {
+ LocationAttributes.emplace_back(Attr);
}
void CompileUnit::addNamespaceAccelerator(const DIE *Die,
diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index 5cad267fd845..fbd89dcf1ca1 100644
--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/DWARFLinker/DWARFStreamer.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/NonRelocatableStringpool.h"
#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
@@ -25,36 +24,41 @@
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
-bool DwarfStreamer::init(Triple TheTriple,
- StringRef Swift5ReflectionSegmentName) {
+Error DwarfStreamer::init(Triple TheTriple,
+ StringRef Swift5ReflectionSegmentName) {
std::string ErrorStr;
std::string TripleName;
- StringRef Context = "dwarf streamer init";
// Get the target.
const Target *TheTarget =
TargetRegistry::lookupTarget(TripleName, TheTriple, ErrorStr);
if (!TheTarget)
- return error(ErrorStr, Context), false;
+ return createStringError(std::errc::invalid_argument, ErrorStr.c_str());
+
TripleName = TheTriple.getTriple();
// Create all the MC Objects.
MRI.reset(TheTarget->createMCRegInfo(TripleName));
if (!MRI)
- return error(Twine("no register info for target ") + TripleName, Context),
- false;
+ return createStringError(std::errc::invalid_argument,
+ "no register info for target %s",
+ TripleName.c_str());
MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
if (!MAI)
- return error("no asm info for target " + TripleName, Context), false;
+ return createStringError(std::errc::invalid_argument,
+ "no asm info for target %s", TripleName.c_str());
MSTI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
if (!MSTI)
- return error("no subtarget info for target " + TripleName, Context), false;
+ return createStringError(std::errc::invalid_argument,
+ "no subtarget info for target %s",
+ TripleName.c_str());
MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get(), nullptr,
nullptr, true, Swift5ReflectionSegmentName));
@@ -63,18 +67,24 @@ bool DwarfStreamer::init(Triple TheTriple,
MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions);
if (!MAB)
- return error("no asm backend for target " + TripleName, Context), false;
+ return createStringError(std::errc::invalid_argument,
+ "no asm backend for target %s",
+ TripleName.c_str());
MII.reset(TheTarget->createMCInstrInfo());
if (!MII)
- return error("no instr info info for target " + TripleName, Context), false;
+ return createStringError(std::errc::invalid_argument,
+ "no instr info info for target %s",
+ TripleName.c_str());
MCE = TheTarget->createMCCodeEmitter(*MII, *MC);
if (!MCE)
- return error("no code emitter for target " + TripleName, Context), false;
+ return createStringError(std::errc::invalid_argument,
+ "no code emitter for target %s",
+ TripleName.c_str());
switch (OutFileType) {
- case OutputFileType::Assembly: {
+ case DWARFLinker::OutputFileType::Assembly: {
MIP = TheTarget->createMCInstPrinter(TheTriple, MAI->getAssemblerDialect(),
*MAI, *MII, *MRI);
MS = TheTarget->createAsmStreamer(
@@ -83,7 +93,7 @@ bool DwarfStreamer::init(Triple TheTriple,
true);
break;
}
- case OutputFileType::Object: {
+ case DWARFLinker::OutputFileType::Object: {
MS = TheTarget->createMCObjectStreamer(
TheTriple, *MC, std::unique_ptr<MCAsmBackend>(MAB),
MAB->createObjectWriter(OutFile), std::unique_ptr<MCCodeEmitter>(MCE),
@@ -94,28 +104,36 @@ bool DwarfStreamer::init(Triple TheTriple,
}
if (!MS)
- return error("no object streamer for target " + TripleName, Context), false;
+ return createStringError(std::errc::invalid_argument,
+ "no object streamer for target %s",
+ TripleName.c_str());
// Finally create the AsmPrinter we'll use to emit the DIEs.
TM.reset(TheTarget->createTargetMachine(TripleName, "", "", TargetOptions(),
std::nullopt));
if (!TM)
- return error("no target machine for target " + TripleName, Context), false;
+ return createStringError(std::errc::invalid_argument,
+ "no target machine for target %s",
+ TripleName.c_str());
Asm.reset(TheTarget->createAsmPrinter(*TM, std::unique_ptr<MCStreamer>(MS)));
if (!Asm)
- return error("no asm printer for target " + TripleName, Context), false;
+ return createStringError(std::errc::invalid_argument,
+ "no asm printer for target %s",
+ TripleName.c_str());
Asm->setDwarfUsesRelocationsAcrossSections(false);
RangesSectionSize = 0;
+ RngListsSectionSize = 0;
LocSectionSize = 0;
+ LocListsSectionSize = 0;
LineSectionSize = 0;
FrameSectionSize = 0;
DebugInfoSectionSize = 0;
MacInfoSectionSize = 0;
MacroSectionSize = 0;
- return true;
+ return Error::success();
}
void DwarfStreamer::finish() { MS->finish(); }
@@ -202,6 +220,11 @@ void DwarfStreamer::emitSectionContents(StringRef SecData, StringRef SecName) {
.Case("debug_frame", MC->getObjectFileInfo()->getDwarfFrameSection())
.Case("debug_aranges",
MC->getObjectFileInfo()->getDwarfARangesSection())
+ .Case("debug_addr", MC->getObjectFileInfo()->getDwarfAddrSection())
+ .Case("debug_rnglists",
+ MC->getObjectFileInfo()->getDwarfRnglistsSection())
+ .Case("debug_loclists",
+ MC->getObjectFileInfo()->getDwarfLoclistsSection())
.Default(nullptr);
if (Section) {
@@ -233,16 +256,18 @@ void DwarfStreamer::emitStrings(const NonRelocatableStringpool &Pool) {
// Emit a null terminator.
Asm->emitInt8(0);
}
+}
-#if 0
- if (DwarfVersion >= 5) {
- // Emit an empty string offset section.
- Asm->OutStreamer->switchSection(MOFI->getDwarfStrOffSection());
- Asm->emitDwarfUnitLength(4, "Length of String Offsets Set");
- Asm->emitInt16(DwarfVersion);
- Asm->emitInt16(0);
+/// Emit the debug_line_str section stored in \p Pool.
+void DwarfStreamer::emitLineStrings(const NonRelocatableStringpool &Pool) {
+ Asm->OutStreamer->switchSection(MOFI->getDwarfLineStrSection());
+ std::vector<DwarfStringPoolEntryRef> Entries = Pool.getEntriesForEmission();
+ for (auto Entry : Entries) {
+ // Emit the string itself.
+ Asm->OutStreamer->emitBytes(Entry.getString());
+ // Emit a null terminator.
+ Asm->emitInt8(0);
}
-#endif
}
void DwarfStreamer::emitDebugNames(
@@ -363,11 +388,13 @@ void DwarfStreamer::emitDwarfDebugArangesTable(
}
void DwarfStreamer::emitDwarfDebugRangesTableFragment(
- const CompileUnit &Unit, const AddressRanges &LinkedRanges) {
- unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
+ const CompileUnit &Unit, const AddressRanges &LinkedRanges,
+ PatchLocation Patch) {
+ Patch.set(RangesSectionSize);
// Make .debug_ranges to be current section.
MS->switchSection(MC->getObjectFileInfo()->getDwarfRangesSection());
+ unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
// Emit ranges.
uint64_t BaseAddress = 0;
@@ -390,126 +417,525 @@ void DwarfStreamer::emitDwarfDebugRangesTableFragment(
RangesSectionSize += AddressSize;
}
-/// Emit the debug_aranges contribution of a unit and
-/// if \p DoDebugRanges is true the debug_range contents for a
-/// compile_unit level DW_AT_ranges attribute (Which are basically the
-/// same thing with a different base address).
-/// Just aggregate all the ranges gathered inside that unit.
-void DwarfStreamer::emitUnitRangesEntries(CompileUnit &Unit,
- bool DoDebugRanges) {
- const RangesTy &FunctionRanges = Unit.getFunctionRanges();
+MCSymbol *
+DwarfStreamer::emitDwarfDebugRangeListHeader(const CompileUnit &Unit) {
+ if (Unit.getOrigUnit().getVersion() < 5)
+ return nullptr;
- // Linked addresses might end up in a different order.
- // Build linked address ranges.
- AddressRanges LinkedRanges;
- for (size_t Idx = 0; Idx < FunctionRanges.size(); Idx++)
- LinkedRanges.insert(
- {FunctionRanges[Idx].first.start() + FunctionRanges[Idx].second,
- FunctionRanges[Idx].first.end() + FunctionRanges[Idx].second});
+ // Make .debug_rnglists to be current section.
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfRnglistsSection());
- if (!FunctionRanges.empty())
- emitDwarfDebugArangesTable(Unit, LinkedRanges);
+ MCSymbol *BeginLabel = Asm->createTempSymbol("Brnglists");
+ MCSymbol *EndLabel = Asm->createTempSymbol("Ernglists");
+ unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
+
+ // Length
+ Asm->emitLabelDifference(EndLabel, BeginLabel, sizeof(uint32_t));
+ Asm->OutStreamer->emitLabel(BeginLabel);
+ RngListsSectionSize += sizeof(uint32_t);
+
+ // Version.
+ MS->emitInt16(5);
+ RngListsSectionSize += sizeof(uint16_t);
+
+ // Address size.
+ MS->emitInt8(AddressSize);
+ RngListsSectionSize++;
+
+ // Seg_size
+ MS->emitInt8(0);
+ RngListsSectionSize++;
- if (DoDebugRanges)
- emitDwarfDebugRangesTableFragment(Unit, LinkedRanges);
+ // Offset entry count
+ MS->emitInt32(0);
+ RngListsSectionSize += sizeof(uint32_t);
+
+ return EndLabel;
}
-/// Emit location lists for \p Unit and update attributes to point to the new
-/// entries.
-void DwarfStreamer::emitLocationsForUnit(
- const CompileUnit &Unit, DWARFContext &Dwarf,
- std::function<void(StringRef, SmallVectorImpl<uint8_t> &)> ProcessExpr) {
- const auto &Attributes = Unit.getLocationAttributes();
+void DwarfStreamer::emitDwarfDebugRangeListFragment(
+ const CompileUnit &Unit, const AddressRanges &LinkedRanges,
+ PatchLocation Patch) {
+ if (Unit.getOrigUnit().getVersion() < 5) {
+ emitDwarfDebugRangesTableFragment(Unit, LinkedRanges, Patch);
+ return;
+ }
+
+ emitDwarfDebugRngListsTableFragment(Unit, LinkedRanges, Patch);
+}
- if (Attributes.empty())
+void DwarfStreamer::emitDwarfDebugRangeListFooter(const CompileUnit &Unit,
+ MCSymbol *EndLabel) {
+ if (Unit.getOrigUnit().getVersion() < 5)
return;
- MS->switchSection(MC->getObjectFileInfo()->getDwarfLocSection());
+ // Make .debug_rnglists to be current section.
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfRnglistsSection());
+
+ if (EndLabel != nullptr)
+ Asm->OutStreamer->emitLabel(EndLabel);
+}
+
+void DwarfStreamer::emitDwarfDebugRngListsTableFragment(
+ const CompileUnit &Unit, const AddressRanges &LinkedRanges,
+ PatchLocation Patch) {
+ Patch.set(RngListsSectionSize);
+
+ // Make .debug_rnglists to be current section.
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfRnglistsSection());
unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
- uint64_t BaseAddressMarker = (AddressSize == 8)
- ? std::numeric_limits<uint64_t>::max()
- : std::numeric_limits<uint32_t>::max();
- const DWARFSection &InputSec = Dwarf.getDWARFObj().getLocSection();
- DataExtractor Data(InputSec.Data, Dwarf.isLittleEndian(), AddressSize);
- DWARFUnit &OrigUnit = Unit.getOrigUnit();
- auto OrigUnitDie = OrigUnit.getUnitDIE(false);
- int64_t UnitPcOffset = 0;
- if (auto OrigLowPc =
- dwarf::toAddress(OrigUnitDie.find(dwarf::DW_AT_low_pc))) {
- assert(Unit.getLowPc());
- UnitPcOffset = int64_t(*OrigLowPc) - *Unit.getLowPc();
- }
-
- SmallVector<uint8_t, 32> Buffer;
- for (const auto &Attr : Attributes) {
- uint64_t Offset = Attr.first.get();
- Attr.first.set(LocSectionSize);
- // This is the quantity to add to the old location address to get
- // the correct address for the new one.
- int64_t LocPcOffset = Attr.second + UnitPcOffset;
- while (Data.isValidOffset(Offset)) {
- uint64_t Low = Data.getUnsigned(&Offset, AddressSize);
- uint64_t High = Data.getUnsigned(&Offset, AddressSize);
- LocSectionSize += 2 * AddressSize;
- // End of list entry.
- if (Low == 0 && High == 0) {
- Asm->OutStreamer->emitIntValue(0, AddressSize);
- Asm->OutStreamer->emitIntValue(0, AddressSize);
- break;
- }
- // Base address selection entry.
- if (Low == BaseAddressMarker) {
- Asm->OutStreamer->emitIntValue(BaseAddressMarker, AddressSize);
- Asm->OutStreamer->emitIntValue(High + Attr.second, AddressSize);
- LocPcOffset = 0;
- continue;
+
+ for (const AddressRange &Range : LinkedRanges) {
+ // Emit type of entry.
+ MS->emitInt8(dwarf::DW_RLE_start_length);
+ RngListsSectionSize += 1;
+
+ // Emit start address.
+ MS->emitIntValue(Range.start(), AddressSize);
+ RngListsSectionSize += AddressSize;
+
+ // Emit length of the range.
+ RngListsSectionSize += MS->emitULEB128IntValue(Range.end() - Range.start());
+ }
+
+ // Emit the terminator entry.
+ MS->emitInt8(dwarf::DW_RLE_end_of_list);
+ RngListsSectionSize += 1;
+}
+
+/// Emit debug locations(.debug_loc, .debug_loclists) header.
+MCSymbol *DwarfStreamer::emitDwarfDebugLocListHeader(const CompileUnit &Unit) {
+ if (Unit.getOrigUnit().getVersion() < 5)
+ return nullptr;
+
+ // Make .debug_loclists the current section.
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfLoclistsSection());
+
+ MCSymbol *BeginLabel = Asm->createTempSymbol("Bloclists");
+ MCSymbol *EndLabel = Asm->createTempSymbol("Eloclists");
+ unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
+
+ // Length
+ Asm->emitLabelDifference(EndLabel, BeginLabel, sizeof(uint32_t));
+ Asm->OutStreamer->emitLabel(BeginLabel);
+ LocListsSectionSize += sizeof(uint32_t);
+
+ // Version.
+ MS->emitInt16(5);
+ LocListsSectionSize += sizeof(uint16_t);
+
+ // Address size.
+ MS->emitInt8(AddressSize);
+ LocListsSectionSize++;
+
+ // Seg_size
+ MS->emitInt8(0);
+ LocListsSectionSize++;
+
+ // Offset entry count
+ MS->emitInt32(0);
+ LocListsSectionSize += sizeof(uint32_t);
+
+ return EndLabel;
+}
+
+/// Emit debug locations(.debug_loc, .debug_loclists) fragment.
+void DwarfStreamer::emitDwarfDebugLocListFragment(
+ const CompileUnit &Unit,
+ const DWARFLocationExpressionsVector &LinkedLocationExpression,
+ PatchLocation Patch, DebugAddrPool &AddrPool) {
+ if (Unit.getOrigUnit().getVersion() < 5) {
+ emitDwarfDebugLocTableFragment(Unit, LinkedLocationExpression, Patch);
+ return;
+ }
+
+ emitDwarfDebugLocListsTableFragment(Unit, LinkedLocationExpression, Patch,
+ AddrPool);
+}
+
+/// Emit debug locations(.debug_loc, .debug_loclists) footer.
+void DwarfStreamer::emitDwarfDebugLocListFooter(const CompileUnit &Unit,
+ MCSymbol *EndLabel) {
+ if (Unit.getOrigUnit().getVersion() < 5)
+ return;
+
+ // Make .debug_loclists the current section.
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfLoclistsSection());
+
+ if (EndLabel != nullptr)
+ Asm->OutStreamer->emitLabel(EndLabel);
+}
+
+/// Emit piece of .debug_loc for \p LinkedLocationExpression.
+void DwarfStreamer::emitDwarfDebugLocTableFragment(
+ const CompileUnit &Unit,
+ const DWARFLocationExpressionsVector &LinkedLocationExpression,
+ PatchLocation Patch) {
+ Patch.set(LocSectionSize);
+
+ // Make .debug_loc to be current section.
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfLocSection());
+ unsigned AddressSize = Unit.getOrigUnit().getAddressByteSize();
+
+ // Emit ranges.
+ uint64_t BaseAddress = 0;
+ if (std::optional<uint64_t> LowPC = Unit.getLowPc())
+ BaseAddress = *LowPC;
+
+ for (const DWARFLocationExpression &LocExpression :
+ LinkedLocationExpression) {
+ if (LocExpression.Range) {
+ MS->emitIntValue(LocExpression.Range->LowPC - BaseAddress, AddressSize);
+ MS->emitIntValue(LocExpression.Range->HighPC - BaseAddress, AddressSize);
+
+ LocSectionSize += AddressSize;
+ LocSectionSize += AddressSize;
+ }
+
+ Asm->OutStreamer->emitIntValue(LocExpression.Expr.size(), 2);
+ Asm->OutStreamer->emitBytes(StringRef(
+ (const char *)LocExpression.Expr.data(), LocExpression.Expr.size()));
+ LocSectionSize += LocExpression.Expr.size() + 2;
+ }
+
+ // Add the terminator entry.
+ MS->emitIntValue(0, AddressSize);
+ MS->emitIntValue(0, AddressSize);
+
+ LocSectionSize += AddressSize;
+ LocSectionSize += AddressSize;
+}
+
+/// Emit .debug_addr header.
+MCSymbol *DwarfStreamer::emitDwarfDebugAddrsHeader(const CompileUnit &Unit) {
+
+ // Make .debug_addr the current section.
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfAddrSection());
+
+ MCSymbol *BeginLabel = Asm->createTempSymbol("Bdebugaddr");
+ MCSymbol *EndLabel = Asm->createTempSymbol("Edebugaddr");
+ unsigned AddrSize = Unit.getOrigUnit().getAddressByteSize();
+
+ // Emit length.
+ Asm->emitLabelDifference(EndLabel, BeginLabel, sizeof(uint32_t));
+ Asm->OutStreamer->emitLabel(BeginLabel);
+ AddrSectionSize += sizeof(uint32_t);
+
+ // Emit version.
+ Asm->emitInt16(5);
+ AddrSectionSize += 2;
+
+ // Emit address size.
+ Asm->emitInt8(AddrSize);
+ AddrSectionSize += 1;
+
+ // Emit segment size.
+ Asm->emitInt8(0);
+ AddrSectionSize += 1;
+
+ return EndLabel;
+}
+
+/// Emit the .debug_addr addresses stored in \p Addrs.
+void DwarfStreamer::emitDwarfDebugAddrs(const SmallVector<uint64_t> &Addrs,
+ uint8_t AddrSize) {
+ Asm->OutStreamer->switchSection(MOFI->getDwarfAddrSection());
+ for (auto Addr : Addrs) {
+ Asm->OutStreamer->emitIntValue(Addr, AddrSize);
+ AddrSectionSize += AddrSize;
+ }
+}
+
+/// Emit .debug_addr footer.
+void DwarfStreamer::emitDwarfDebugAddrsFooter(const CompileUnit &Unit,
+ MCSymbol *EndLabel) {
+
+ // Make .debug_addr the current section.
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfAddrSection());
+
+ if (EndLabel != nullptr)
+ Asm->OutStreamer->emitLabel(EndLabel);
+}
+
+/// Emit piece of .debug_loclists for \p LinkedLocationExpression.
+void DwarfStreamer::emitDwarfDebugLocListsTableFragment(
+ const CompileUnit &Unit,
+ const DWARFLocationExpressionsVector &LinkedLocationExpression,
+ PatchLocation Patch, DebugAddrPool &AddrPool) {
+ Patch.set(LocListsSectionSize);
+
+ // Make .debug_loclists the current section.
+ MS->switchSection(MC->getObjectFileInfo()->getDwarfLoclistsSection());
+ std::optional<uint64_t> BaseAddress;
+
+ for (const DWARFLocationExpression &LocExpression :
+ LinkedLocationExpression) {
+ if (LocExpression.Range) {
+
+ if (!BaseAddress) {
+
+ BaseAddress = LocExpression.Range->LowPC;
+
+ // Emit base address.
+ MS->emitInt8(dwarf::DW_LLE_base_addressx);
+ LocListsSectionSize += 1;
+ LocListsSectionSize +=
+ MS->emitULEB128IntValue(AddrPool.getAddrIndex(*BaseAddress));
}
- // Location list entry.
- Asm->OutStreamer->emitIntValue(Low + LocPcOffset, AddressSize);
- Asm->OutStreamer->emitIntValue(High + LocPcOffset, AddressSize);
- uint64_t Length = Data.getU16(&Offset);
- Asm->OutStreamer->emitIntValue(Length, 2);
- // Copy the bytes into to the buffer, process them, emit them.
- Buffer.reserve(Length);
- Buffer.resize(0);
- StringRef Input = InputSec.Data.substr(Offset, Length);
- ProcessExpr(Input, Buffer);
- Asm->OutStreamer->emitBytes(
- StringRef((const char *)Buffer.data(), Length));
- Offset += Length;
- LocSectionSize += Length + 2;
+
+ // Emit type of entry.
+ MS->emitInt8(dwarf::DW_LLE_offset_pair);
+ LocListsSectionSize += 1;
+
+ // Emit start offset relative to base address.
+ LocListsSectionSize +=
+ MS->emitULEB128IntValue(LocExpression.Range->LowPC - *BaseAddress);
+
+ // Emit end offset relative to base address.
+ LocListsSectionSize +=
+ MS->emitULEB128IntValue(LocExpression.Range->HighPC - *BaseAddress);
+ } else {
+ // Emit type of entry.
+ MS->emitInt8(dwarf::DW_LLE_default_location);
+ LocListsSectionSize += 1;
}
+
+ LocListsSectionSize += MS->emitULEB128IntValue(LocExpression.Expr.size());
+ Asm->OutStreamer->emitBytes(StringRef(
+ (const char *)LocExpression.Expr.data(), LocExpression.Expr.size()));
+ LocListsSectionSize += LocExpression.Expr.size();
}
+
+ // Emit the terminator entry.
+ MS->emitInt8(dwarf::DW_LLE_end_of_list);
+ LocListsSectionSize += 1;
}
-void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params,
- StringRef PrologueBytes,
- unsigned MinInstLength,
- std::vector<DWARFDebugLine::Row> &Rows,
- unsigned PointerSize) {
+void DwarfStreamer::emitLineTableForUnit(
+ const DWARFDebugLine::LineTable &LineTable, const CompileUnit &Unit,
+ OffsetsStringPool &DebugStrPool, OffsetsStringPool &DebugLineStrPool) {
// Switch to the section where the table will be emitted into.
MS->switchSection(MC->getObjectFileInfo()->getDwarfLineSection());
+
MCSymbol *LineStartSym = MC->createTempSymbol();
MCSymbol *LineEndSym = MC->createTempSymbol();
- // The first 4 bytes is the total length of the information for this
- // compilation unit (not including these 4 bytes for the length).
- Asm->emitLabelDifference(LineEndSym, LineStartSym, 4);
+ // unit_length.
+ if (LineTable.Prologue.FormParams.Format == dwarf::DwarfFormat::DWARF64) {
+ MS->emitInt32(dwarf::DW_LENGTH_DWARF64);
+ LineSectionSize += 4;
+ }
+ emitLabelDifference(LineEndSym, LineStartSym,
+ LineTable.Prologue.FormParams.Format, LineSectionSize);
Asm->OutStreamer->emitLabel(LineStartSym);
- // Copy Prologue.
- MS->emitBytes(PrologueBytes);
- LineSectionSize += PrologueBytes.size() + 4;
+
+ // Emit prologue.
+ emitLineTablePrologue(LineTable.Prologue, DebugStrPool, DebugLineStrPool);
+
+ // Emit rows.
+ emitLineTableRows(LineTable, LineEndSym,
+ Unit.getOrigUnit().getAddressByteSize());
+}
+
+void DwarfStreamer::emitLineTablePrologue(const DWARFDebugLine::Prologue &P,
+ OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool) {
+ MCSymbol *PrologueStartSym = MC->createTempSymbol();
+ MCSymbol *PrologueEndSym = MC->createTempSymbol();
+
+ // version (uhalf).
+ MS->emitInt16(P.getVersion());
+ LineSectionSize += 2;
+ if (P.getVersion() == 5) {
+ // address_size (ubyte).
+ MS->emitInt8(P.getAddressSize());
+ LineSectionSize += 1;
+
+ // segment_selector_size (ubyte).
+ MS->emitInt8(P.SegSelectorSize);
+ LineSectionSize += 1;
+ }
+
+ // header_length.
+ emitLabelDifference(PrologueEndSym, PrologueStartSym, P.FormParams.Format,
+ LineSectionSize);
+
+ Asm->OutStreamer->emitLabel(PrologueStartSym);
+ emitLineTableProloguePayload(P, DebugStrPool, DebugLineStrPool);
+ Asm->OutStreamer->emitLabel(PrologueEndSym);
+}
+
+void DwarfStreamer::emitLineTablePrologueV2IncludeAndFileTable(
+ const DWARFDebugLine::Prologue &P, OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool) {
+ // include_directories (sequence of path names).
+ for (const DWARFFormValue &Include : P.IncludeDirectories)
+ emitLineTableString(P, Include, DebugStrPool, DebugLineStrPool);
+ // The last entry is followed by a single null byte.
+ MS->emitInt8(0);
+ LineSectionSize += 1;
+
+ // file_names (sequence of file entries).
+ for (const DWARFDebugLine::FileNameEntry &File : P.FileNames) {
+ // A null-terminated string containing the full or relative path name of a
+ // source file.
+ emitLineTableString(P, File.Name, DebugStrPool, DebugLineStrPool);
+ // An unsigned LEB128 number representing the directory index of a directory
+ // in the include_directories section.
+ LineSectionSize += MS->emitULEB128IntValue(File.DirIdx);
+ // An unsigned LEB128 number representing the (implementation-defined) time
+ // of last modification for the file, or 0 if not available.
+ LineSectionSize += MS->emitULEB128IntValue(File.ModTime);
+ // An unsigned LEB128 number representing the length in bytes of the file,
+ // or 0 if not available.
+ LineSectionSize += MS->emitULEB128IntValue(File.Length);
+ }
+ // The last entry is followed by a single null byte.
+ MS->emitInt8(0);
+ LineSectionSize += 1;
+}
+
+void DwarfStreamer::emitLineTablePrologueV5IncludeAndFileTable(
+ const DWARFDebugLine::Prologue &P, OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool) {
+ if (P.IncludeDirectories.empty()) {
+ // directory_entry_format_count(ubyte).
+ MS->emitInt8(0);
+ LineSectionSize += 1;
+ } else {
+ // directory_entry_format_count(ubyte).
+ MS->emitInt8(1);
+ LineSectionSize += 1;
+
+ // directory_entry_format (sequence of ULEB128 pairs).
+ LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_LNCT_path);
+ LineSectionSize +=
+ MS->emitULEB128IntValue(P.IncludeDirectories[0].getForm());
+ }
+
+ // directories_count (ULEB128).
+ LineSectionSize += MS->emitULEB128IntValue(P.IncludeDirectories.size());
+ // directories (sequence of directory names).
+ for (auto Include : P.IncludeDirectories)
+ emitLineTableString(P, Include, DebugStrPool, DebugLineStrPool);
+
+ if (P.FileNames.empty()) {
+ // file_name_entry_format_count (ubyte).
+ MS->emitInt8(0);
+ LineSectionSize += 1;
+ } else {
+ // file_name_entry_format_count (ubyte).
+ MS->emitInt8(2);
+ LineSectionSize += 1;
+
+ // file_name_entry_format (sequence of ULEB128 pairs).
+ LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_LNCT_path);
+ LineSectionSize += MS->emitULEB128IntValue(P.FileNames[0].Name.getForm());
+
+ LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_LNCT_directory_index);
+ LineSectionSize += MS->emitULEB128IntValue(dwarf::DW_FORM_data1);
+ }
+
+ // file_names_count (ULEB128).
+ LineSectionSize += MS->emitULEB128IntValue(P.FileNames.size());
+
+ // file_names (sequence of file name entries).
+ for (auto File : P.FileNames) {
+ emitLineTableString(P, File.Name, DebugStrPool, DebugLineStrPool);
+ MS->emitInt8(File.DirIdx);
+ LineSectionSize += 1;
+ }
+}
+
+void DwarfStreamer::emitLineTableString(const DWARFDebugLine::Prologue &P,
+ const DWARFFormValue &String,
+ OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool) {
+ std::optional<const char *> StringVal = dwarf::toString(String);
+ if (!StringVal) {
+ warn("Cann't read string from line table.");
+ return;
+ }
+
+ switch (String.getForm()) {
+ case dwarf::DW_FORM_string: {
+ StringRef TranslatedString =
+ (Translator) ? Translator(*StringVal) : *StringVal;
+ Asm->OutStreamer->emitBytes(TranslatedString.data());
+ Asm->emitInt8(0);
+ LineSectionSize += TranslatedString.size() + 1;
+ } break;
+ case dwarf::DW_FORM_strp:
+ case dwarf::DW_FORM_line_strp: {
+ DwarfStringPoolEntryRef StringRef =
+ String.getForm() == dwarf::DW_FORM_strp
+ ? DebugStrPool.getEntry(*StringVal)
+ : DebugLineStrPool.getEntry(*StringVal);
+
+ emitIntOffset(StringRef.getOffset(), P.FormParams.Format, LineSectionSize);
+ } break;
+ default:
+ warn("Unsupported string form inside line table.");
+ break;
+ };
+}
+
+void DwarfStreamer::emitLineTableProloguePayload(
+ const DWARFDebugLine::Prologue &P, OffsetsStringPool &DebugStrPool,
+ OffsetsStringPool &DebugLineStrPool) {
+ // minimum_instruction_length (ubyte).
+ MS->emitInt8(P.MinInstLength);
+ LineSectionSize += 1;
+ if (P.FormParams.Version >= 4) {
+ // maximum_operations_per_instruction (ubyte).
+ MS->emitInt8(P.MaxOpsPerInst);
+ LineSectionSize += 1;
+ }
+ // default_is_stmt (ubyte).
+ MS->emitInt8(P.DefaultIsStmt);
+ LineSectionSize += 1;
+ // line_base (sbyte).
+ MS->emitInt8(P.LineBase);
+ LineSectionSize += 1;
+ // line_range (ubyte).
+ MS->emitInt8(P.LineRange);
+ LineSectionSize += 1;
+ // opcode_base (ubyte).
+ MS->emitInt8(P.OpcodeBase);
+ LineSectionSize += 1;
+
+ // standard_opcode_lengths (array of ubyte).
+ for (auto Length : P.StandardOpcodeLengths) {
+ MS->emitInt8(Length);
+ LineSectionSize += 1;
+ }
+
+ if (P.FormParams.Version < 5)
+ emitLineTablePrologueV2IncludeAndFileTable(P, DebugStrPool,
+ DebugLineStrPool);
+ else
+ emitLineTablePrologueV5IncludeAndFileTable(P, DebugStrPool,
+ DebugLineStrPool);
+}
+
+void DwarfStreamer::emitLineTableRows(
+ const DWARFDebugLine::LineTable &LineTable, MCSymbol *LineEndSym,
+ unsigned AddressByteSize) {
+
+ MCDwarfLineTableParams Params;
+ Params.DWARF2LineOpcodeBase = LineTable.Prologue.OpcodeBase;
+ Params.DWARF2LineBase = LineTable.Prologue.LineBase;
+ Params.DWARF2LineRange = LineTable.Prologue.LineRange;
SmallString<128> EncodingBuffer;
- raw_svector_ostream EncodingOS(EncodingBuffer);
- if (Rows.empty()) {
+ if (LineTable.Rows.empty()) {
// We only have the dummy entry, dsymutil emits an entry with a 0
// address in that case.
- MCDwarfLineAddr::Encode(*MC, Params, std::numeric_limits<int64_t>::max(), 0,
- EncodingOS);
- MS->emitBytes(EncodingOS.str());
+ MCDwarfLineAddr::encode(*MC, Params, std::numeric_limits<int64_t>::max(), 0,
+ EncodingBuffer);
+ MS->emitBytes(EncodingBuffer);
LineSectionSize += EncodingBuffer.size();
MS->emitLabel(LineEndSym);
return;
@@ -525,17 +951,19 @@ void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params,
unsigned RowsSinceLastSequence = 0;
- for (DWARFDebugLine::Row &Row : Rows) {
+ for (const DWARFDebugLine::Row &Row : LineTable.Rows) {
int64_t AddressDelta;
if (Address == -1ULL) {
MS->emitIntValue(dwarf::DW_LNS_extended_op, 1);
- MS->emitULEB128IntValue(PointerSize + 1);
+ MS->emitULEB128IntValue(AddressByteSize + 1);
MS->emitIntValue(dwarf::DW_LNE_set_address, 1);
- MS->emitIntValue(Row.Address.Address, PointerSize);
- LineSectionSize += 2 + PointerSize + getULEB128Size(PointerSize + 1);
+ MS->emitIntValue(Row.Address.Address, AddressByteSize);
+ LineSectionSize +=
+ 2 + AddressByteSize + getULEB128Size(AddressByteSize + 1);
AddressDelta = 0;
} else {
- AddressDelta = (Row.Address.Address - Address) / MinInstLength;
+ AddressDelta =
+ (Row.Address.Address - Address) / LineTable.Prologue.MinInstLength;
}
// FIXME: code copied and transformed from MCDwarf.cpp::EmitDwarfLineTable.
@@ -587,8 +1015,9 @@ void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params,
int64_t LineDelta = int64_t(Row.Line) - LastLine;
if (!Row.EndSequence) {
- MCDwarfLineAddr::Encode(*MC, Params, LineDelta, AddressDelta, EncodingOS);
- MS->emitBytes(EncodingOS.str());
+ MCDwarfLineAddr::encode(*MC, Params, LineDelta, AddressDelta,
+ EncodingBuffer);
+ MS->emitBytes(EncodingBuffer);
LineSectionSize += EncodingBuffer.size();
EncodingBuffer.resize(0);
Address = Row.Address.Address;
@@ -605,9 +1034,9 @@ void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params,
MS->emitULEB128IntValue(AddressDelta);
LineSectionSize += 1 + getULEB128Size(AddressDelta);
}
- MCDwarfLineAddr::Encode(*MC, Params, std::numeric_limits<int64_t>::max(),
- 0, EncodingOS);
- MS->emitBytes(EncodingOS.str());
+ MCDwarfLineAddr::encode(*MC, Params, std::numeric_limits<int64_t>::max(),
+ 0, EncodingBuffer);
+ MS->emitBytes(EncodingBuffer);
LineSectionSize += EncodingBuffer.size();
EncodingBuffer.resize(0);
Address = -1ULL;
@@ -617,9 +1046,9 @@ void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params,
}
if (RowsSinceLastSequence) {
- MCDwarfLineAddr::Encode(*MC, Params, std::numeric_limits<int64_t>::max(), 0,
- EncodingOS);
- MS->emitBytes(EncodingOS.str());
+ MCDwarfLineAddr::encode(*MC, Params, std::numeric_limits<int64_t>::max(), 0,
+ EncodingBuffer);
+ MS->emitBytes(EncodingBuffer);
LineSectionSize += EncodingBuffer.size();
EncodingBuffer.resize(0);
}
@@ -627,86 +1056,19 @@ void DwarfStreamer::emitLineTableForUnit(MCDwarfLineTableParams Params,
MS->emitLabel(LineEndSym);
}
-/// Copy the debug_line over to the updated binary while unobfuscating the file
-/// names and directories.
-void DwarfStreamer::translateLineTable(DataExtractor Data, uint64_t Offset) {
- MS->switchSection(MC->getObjectFileInfo()->getDwarfLineSection());
- StringRef Contents = Data.getData();
-
- // We have to deconstruct the line table header, because it contains to
- // length fields that will need to be updated when we change the length of
- // the files and directories in there.
- unsigned UnitLength = Data.getU32(&Offset);
- uint64_t UnitEnd = Offset + UnitLength;
- MCSymbol *BeginLabel = MC->createTempSymbol();
- MCSymbol *EndLabel = MC->createTempSymbol();
- unsigned Version = Data.getU16(&Offset);
-
- if (Version > 5) {
- warn("Unsupported line table version: dropping contents and not "
- "unobfsucating line table.");
- return;
- }
-
- Asm->emitLabelDifference(EndLabel, BeginLabel, 4);
- Asm->OutStreamer->emitLabel(BeginLabel);
- Asm->emitInt16(Version);
- LineSectionSize += 6;
-
- MCSymbol *HeaderBeginLabel = MC->createTempSymbol();
- MCSymbol *HeaderEndLabel = MC->createTempSymbol();
- Asm->emitLabelDifference(HeaderEndLabel, HeaderBeginLabel, 4);
- Asm->OutStreamer->emitLabel(HeaderBeginLabel);
- Offset += 4;
- LineSectionSize += 4;
-
- uint64_t AfterHeaderLengthOffset = Offset;
- // Skip to the directories.
- Offset += (Version >= 4) ? 5 : 4;
- unsigned OpcodeBase = Data.getU8(&Offset);
- Offset += OpcodeBase - 1;
- Asm->OutStreamer->emitBytes(Contents.slice(AfterHeaderLengthOffset, Offset));
- LineSectionSize += Offset - AfterHeaderLengthOffset;
-
- // Offset points to the first directory.
- while (const char *Dir = Data.getCStr(&Offset)) {
- if (Dir[0] == 0)
- break;
-
- StringRef Translated = Translator(Dir);
- Asm->OutStreamer->emitBytes(Translated);
- Asm->emitInt8(0);
- LineSectionSize += Translated.size() + 1;
- }
- Asm->emitInt8(0);
- LineSectionSize += 1;
-
- while (const char *File = Data.getCStr(&Offset)) {
- if (File[0] == 0)
- break;
-
- StringRef Translated = Translator(File);
- Asm->OutStreamer->emitBytes(Translated);
- Asm->emitInt8(0);
- LineSectionSize += Translated.size() + 1;
-
- uint64_t OffsetBeforeLEBs = Offset;
- Asm->emitULEB128(Data.getULEB128(&Offset));
- Asm->emitULEB128(Data.getULEB128(&Offset));
- Asm->emitULEB128(Data.getULEB128(&Offset));
- LineSectionSize += Offset - OffsetBeforeLEBs;
- }
- Asm->emitInt8(0);
- LineSectionSize += 1;
-
- Asm->OutStreamer->emitLabel(HeaderEndLabel);
-
- // Copy the actual line table program over.
- Asm->OutStreamer->emitBytes(Contents.slice(Offset, UnitEnd));
- LineSectionSize += UnitEnd - Offset;
+void DwarfStreamer::emitIntOffset(uint64_t Offset, dwarf::DwarfFormat Format,
+ uint64_t &SectionSize) {
+ uint8_t Size = dwarf::getDwarfOffsetByteSize(Format);
+ MS->emitIntValue(Offset, Size);
+ SectionSize += Size;
+}
- Asm->OutStreamer->emitLabel(EndLabel);
- Offset = UnitEnd;
+void DwarfStreamer::emitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
+ dwarf::DwarfFormat Format,
+ uint64_t &SectionSize) {
+ uint8_t Size = dwarf::getDwarfOffsetByteSize(Format);
+ Asm->emitLabelDifference(Hi, Lo, Size);
+ SectionSize += Size;
}
/// Emit the pubnames or pubtypes section contribution for \p
diff --git a/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp b/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp
new file mode 100644
index 000000000000..50909c0ba669
--- /dev/null
+++ b/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.cpp
@@ -0,0 +1,131 @@
+//===- DWARFEmitterImpl.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFEmitterImpl.h"
+#include "llvm/DWARFLinker/DWARFLinkerCompileUnit.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/MCTargetOptionsCommandFlags.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/FormattedStream.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+Error DwarfEmitterImpl::init(Triple TheTriple,
+ StringRef Swift5ReflectionSegmentName) {
+ std::string ErrorStr;
+ std::string TripleName;
+
+ // Get the target.
+ const Target *TheTarget =
+ TargetRegistry::lookupTarget(TripleName, TheTriple, ErrorStr);
+ if (!TheTarget)
+ return createStringError(std::errc::invalid_argument, ErrorStr.c_str());
+ TripleName = TheTriple.getTriple();
+
+ // Create all the MC Objects.
+ MRI.reset(TheTarget->createMCRegInfo(TripleName));
+ if (!MRI)
+ return createStringError(std::errc::invalid_argument,
+ "no register info for target %s",
+ TripleName.c_str());
+
+ MCTargetOptions MCOptions = mc::InitMCTargetOptionsFromFlags();
+ MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
+ if (!MAI)
+ return createStringError(std::errc::invalid_argument,
+ "no asm info for target %s", TripleName.c_str());
+
+ MSTI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
+ if (!MSTI)
+ return createStringError(std::errc::invalid_argument,
+ "no subtarget info for target %s",
+ TripleName.c_str());
+
+ MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get(), nullptr,
+ nullptr, true, Swift5ReflectionSegmentName));
+ MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false, false));
+ MC->setObjectFileInfo(MOFI.get());
+
+ MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions);
+ if (!MAB)
+ return createStringError(std::errc::invalid_argument,
+ "no asm backend for target %s",
+ TripleName.c_str());
+
+ MII.reset(TheTarget->createMCInstrInfo());
+ if (!MII)
+ return createStringError(std::errc::invalid_argument,
+ "no instr info info for target %s",
+ TripleName.c_str());
+
+ MCE = TheTarget->createMCCodeEmitter(*MII, *MC);
+ if (!MCE)
+ return createStringError(std::errc::invalid_argument,
+ "no code emitter for target %s",
+ TripleName.c_str());
+
+ switch (OutFileType) {
+ case DWARFLinker::OutputFileType::Assembly: {
+ MIP = TheTarget->createMCInstPrinter(TheTriple, MAI->getAssemblerDialect(),
+ *MAI, *MII, *MRI);
+ MS = TheTarget->createAsmStreamer(
+ *MC, std::make_unique<formatted_raw_ostream>(OutFile), true, true, MIP,
+ std::unique_ptr<MCCodeEmitter>(MCE), std::unique_ptr<MCAsmBackend>(MAB),
+ true);
+ break;
+ }
+ case DWARFLinker::OutputFileType::Object: {
+ MS = TheTarget->createMCObjectStreamer(
+ TheTriple, *MC, std::unique_ptr<MCAsmBackend>(MAB),
+ MAB->createObjectWriter(OutFile), std::unique_ptr<MCCodeEmitter>(MCE),
+ *MSTI, MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible,
+ /*DWARFMustBeAtTheEnd*/ false);
+ break;
+ }
+ }
+
+ if (!MS)
+ return createStringError(std::errc::invalid_argument,
+ "no object streamer for target %s",
+ TripleName.c_str());
+
+ // Finally create the AsmPrinter we'll use to emit the DIEs.
+ TM.reset(TheTarget->createTargetMachine(TripleName, "", "", TargetOptions(),
+ std::nullopt));
+ if (!TM)
+ return createStringError(std::errc::invalid_argument,
+ "no target machine for target %s",
+ TripleName.c_str());
+
+ Asm.reset(TheTarget->createAsmPrinter(*TM, std::unique_ptr<MCStreamer>(MS)));
+ if (!Asm)
+ return createStringError(std::errc::invalid_argument,
+ "no asm printer for target %s",
+ TripleName.c_str());
+ Asm->setDwarfUsesRelocationsAcrossSections(false);
+
+ RangesSectionSize = 0;
+ RngListsSectionSize = 0;
+ LocSectionSize = 0;
+ LocListsSectionSize = 0;
+ LineSectionSize = 0;
+ FrameSectionSize = 0;
+ DebugInfoSectionSize = 0;
+ MacInfoSectionSize = 0;
+ MacroSectionSize = 0;
+
+ return Error::success();
+}
+
+} // end of namespace dwarflinker_parallel
+} // namespace llvm
diff --git a/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.h b/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.h
new file mode 100644
index 000000000000..d07397a30419
--- /dev/null
+++ b/llvm/lib/DWARFLinkerParallel/DWARFEmitterImpl.h
@@ -0,0 +1,274 @@
+//===- DwarfEmitterImpl.h ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DWARFEMITTERIMPL_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_DWARFEMITTERIMPL_H
+
+#include "DWARFLinkerCompileUnit.h"
+#include "llvm/BinaryFormat/Swift.h"
+#include "llvm/CodeGen/AccelTable.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/DWARFLinkerParallel/DWARFLinker.h"
+#include "llvm/DWARFLinkerParallel/StringTable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+/// User of DwarfEmitterImpl should call initialization code
+/// for AsmPrinter:
+///
+/// InitializeAllTargetInfos();
+/// InitializeAllTargetMCs();
+/// InitializeAllTargets();
+/// InitializeAllAsmPrinters();
+
+template <typename DataT> class AccelTable;
+class MCCodeEmitter;
+class DWARFDebugMacro;
+
+namespace dwarflinker_parallel {
+
+struct UnitStartSymbol {
+ unsigned UnitID = 0;
+ MCSymbol *Symbol = 0;
+};
+using UnitStartSymbolsTy = SmallVector<UnitStartSymbol>;
+using Offset2UnitMapTy = DenseMap<uint64_t, CompileUnit *>;
+
+struct RangeAttrPatch;
+struct LocAttrPatch;
+
+/// The Dwarf emission logic.
+///
+/// All interactions with the MC layer that is used to build the debug
+/// information binary representation are handled in this class.
+class DwarfEmitterImpl : public ExtraDwarfEmitter {
+public:
+ DwarfEmitterImpl(DWARFLinker::OutputFileType OutFileType,
+ raw_pwrite_stream &OutFile,
+ std::function<StringRef(StringRef Input)> Translator,
+ DWARFLinker::MessageHandlerTy Warning)
+ : OutFile(OutFile), OutFileType(OutFileType), Translator(Translator),
+ WarningHandler(Warning) {}
+
+ Error init(Triple TheTriple, StringRef Swift5ReflectionSegmentName);
+
+ /// Dump the file to the disk.
+ void finish() override { MS->finish(); }
+
+ AsmPrinter &getAsmPrinter() const override { return *Asm; }
+
+ /// Set the current output section to debug_info and change
+ /// the MC Dwarf version to \p DwarfVersion.
+ void switchToDebugInfoSection(unsigned DwarfVersion) {}
+
+ /// Emit the swift_ast section stored in \p Buffer.
+ void emitSwiftAST(StringRef Buffer) override {}
+
+ /// Emit the swift reflection section stored in \p Buffer.
+ void emitSwiftReflectionSection(
+ llvm::binaryformat::Swift5ReflectionSectionKind ReflSectionKind,
+ StringRef Buffer, uint32_t Alignment, uint32_t Size) override {}
+
+ void emitPaperTrailWarningsDie(DIE &Die) {}
+
+ void emitSectionContents(StringRef SecData, StringRef SecName) override {}
+
+ MCSymbol *emitTempSym(StringRef SecName, StringRef SymName) override {
+ return nullptr;
+ }
+
+ void emitAbbrevs(const SmallVector<std::unique_ptr<DIEAbbrev>> &Abbrevs,
+ unsigned DwarfVersion) {}
+
+ void emitStrings(const StringTable &Strings) {}
+
+ void emitLineStrings(const StringTable &Strings) {}
+
+ void emitDebugNames(AccelTable<DWARF5AccelTableStaticData> &,
+ UnitStartSymbolsTy &UnitOffsets) {}
+
+ void emitAppleNamespaces(AccelTable<AppleAccelTableStaticOffsetData> &) {}
+
+ void emitAppleNames(AccelTable<AppleAccelTableStaticOffsetData> &) {}
+
+ void emitAppleObjc(AccelTable<AppleAccelTableStaticOffsetData> &) {}
+
+ void emitAppleTypes(AccelTable<AppleAccelTableStaticTypeData> &) {}
+
+ MCSymbol *emitDwarfDebugRangeListHeader(const CompileUnit &Unit) {
+ return nullptr;
+ }
+
+ void emitDwarfDebugRangeListFragment(const CompileUnit &Unit,
+ const AddressRanges &LinkedRanges,
+ RangeAttrPatch &Patch) {}
+
+ void emitDwarfDebugRangeListFooter(const CompileUnit &Unit,
+ MCSymbol *EndLabel) {}
+
+ MCSymbol *emitDwarfDebugLocListHeader(const CompileUnit &Unit) {
+ return nullptr;
+ }
+
+ void emitDwarfDebugLocListFragment(
+ const CompileUnit &Unit,
+ const DWARFLocationExpressionsVector &LinkedLocationExpression,
+ LocAttrPatch &Patch) {}
+
+ void emitDwarfDebugLocListFooter(const CompileUnit &Unit,
+ MCSymbol *EndLabel) {}
+
+ void emitDwarfDebugArangesTable(const CompileUnit &Unit,
+ const AddressRanges &LinkedRanges) {}
+
+ void translateLineTable(DataExtractor LineData, uint64_t Offset) {}
+
+ void emitLineTableForUnit(MCDwarfLineTableParams Params,
+ StringRef PrologueBytes, unsigned MinInstLength,
+ std::vector<DWARFDebugLine::Row> &Rows,
+ unsigned AdddressSize) {}
+
+ void emitLineTableForUnit(const DWARFDebugLine::LineTable &LineTable,
+ const CompileUnit &Unit, const StringTable &Strings,
+ const StringTable &LineTableStrings) {}
+
+ void emitPubNamesForUnit(const CompileUnit &Unit) {}
+
+ void emitPubTypesForUnit(const CompileUnit &Unit) {}
+
+ void emitCIE(StringRef CIEBytes) {}
+
+ void emitFDE(uint32_t CIEOffset, uint32_t AddreSize, uint64_t Address,
+ StringRef Bytes) {}
+
+ void emitCompileUnitHeader(CompileUnit &Unit, unsigned DwarfVersion) {}
+
+ void emitDIE(DIE &Die) {}
+
+ void emitMacroTables(DWARFContext *Context,
+ const Offset2UnitMapTy &UnitMacroMap,
+ StringTable &Strings) {}
+
+ /// Returns size of generated .debug_line section.
+ uint64_t getDebugLineSectionSize() const { return LineSectionSize; }
+
+ /// Returns size of generated .debug_frame section.
+ uint64_t getDebugFrameSectionSize() const { return FrameSectionSize; }
+
+ /// Returns size of generated .debug_ranges section.
+ uint64_t getDebugRangesSectionSize() const { return RangesSectionSize; }
+
+ /// Returns size of generated .debug_rnglists section.
+ uint64_t getDebugRngListsSectionSize() const { return RngListsSectionSize; }
+
+ /// Returns size of generated .debug_info section.
+ uint64_t getDebugInfoSectionSize() const { return DebugInfoSectionSize; }
+
+ /// Returns size of generated .debug_macinfo section.
+ uint64_t getDebugMacInfoSectionSize() const { return MacInfoSectionSize; }
+
+ /// Returns size of generated .debug_macro section.
+ uint64_t getDebugMacroSectionSize() const { return MacroSectionSize; }
+
+ /// Returns size of generated .debug_loc section.
+ uint64_t getDebugLocSectionSize() const { return LocSectionSize; }
+
+ /// Returns size of generated .debug_loclists section.
+ uint64_t getDebugLocListsSectionSize() const { return LocListsSectionSize; }
+
+private:
+ inline void warn(const Twine &Warning, StringRef Context = "") {
+ if (WarningHandler)
+ WarningHandler(Warning, Context, nullptr);
+ }
+
+ void emitMacroTableImpl(const DWARFDebugMacro *MacroTable,
+ const Offset2UnitMapTy &UnitMacroMap,
+ StringPool &StringPool, uint64_t &OutOffset) {}
+
+ /// Emit piece of .debug_ranges for \p LinkedRanges.
+ void emitDwarfDebugRangesTableFragment(const CompileUnit &Unit,
+ const AddressRanges &LinkedRanges,
+ RangeAttrPatch &Patch) {}
+
+ /// Emit piece of .debug_rnglists for \p LinkedRanges.
+ void emitDwarfDebugRngListsTableFragment(const CompileUnit &Unit,
+ const AddressRanges &LinkedRanges,
+ RangeAttrPatch &Patch) {}
+
+ /// Emit piece of .debug_loc for \p LinkedRanges.
+ void emitDwarfDebugLocTableFragment(
+ const CompileUnit &Unit,
+ const DWARFLocationExpressionsVector &LinkedLocationExpression,
+ LocAttrPatch &Patch) {}
+
+ /// Emit piece of .debug_loclists for \p LinkedRanges.
+ void emitDwarfDebugLocListsTableFragment(
+ const CompileUnit &Unit,
+ const DWARFLocationExpressionsVector &LinkedLocationExpression,
+ LocAttrPatch &Patch) {}
+
+ /// \defgroup MCObjects MC layer objects constructed by the streamer
+ /// @{
+ std::unique_ptr<MCRegisterInfo> MRI;
+ std::unique_ptr<MCAsmInfo> MAI;
+ std::unique_ptr<MCObjectFileInfo> MOFI;
+ std::unique_ptr<MCContext> MC;
+ MCAsmBackend *MAB; // Owned by MCStreamer
+ std::unique_ptr<MCInstrInfo> MII;
+ std::unique_ptr<MCSubtargetInfo> MSTI;
+ MCInstPrinter *MIP; // Owned by AsmPrinter
+ MCCodeEmitter *MCE; // Owned by MCStreamer
+ MCStreamer *MS; // Owned by AsmPrinter
+ std::unique_ptr<TargetMachine> TM;
+ std::unique_ptr<AsmPrinter> Asm;
+ /// @}
+
+ /// The output file we stream the linked Dwarf to.
+ raw_pwrite_stream &OutFile;
+ DWARFLinker::OutputFileType OutFileType = DWARFLinker::OutputFileType::Object;
+ std::function<StringRef(StringRef Input)> Translator;
+
+ uint64_t RangesSectionSize = 0;
+ uint64_t RngListsSectionSize = 0;
+ uint64_t LocSectionSize = 0;
+ uint64_t LocListsSectionSize = 0;
+ uint64_t LineSectionSize = 0;
+ uint64_t FrameSectionSize = 0;
+ uint64_t DebugInfoSectionSize = 0;
+ uint64_t MacInfoSectionSize = 0;
+ uint64_t MacroSectionSize = 0;
+
+ /// Keep track of emitted CUs and their Unique ID.
+ struct EmittedUnit {
+ unsigned ID;
+ MCSymbol *LabelBegin;
+ };
+ std::vector<EmittedUnit> EmittedUnitsTy;
+
+ /// Emit the pubnames or pubtypes section contribution for \p
+ /// Unit into \p Sec. The data is provided in \p Names.
+ void emitPubSectionForUnit(MCSection *Sec, StringRef Name,
+ const CompileUnit &Unit,
+ const std::vector<CompileUnit::AccelInfo> &Names);
+
+ DWARFLinker::MessageHandlerTy WarningHandler = nullptr;
+};
+
+} // end namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_DWARFEMITTERIMPL_H
diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinker.cpp b/llvm/lib/DWARFLinkerParallel/DWARFLinker.cpp
index a54d2e3cc281..f082fd603610 100644
--- a/llvm/lib/DWARFLinkerParallel/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinkerParallel/DWARFLinker.cpp
@@ -6,8 +6,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/DWARFLinkerParallel/DWARFLinker.h"
+#include "DWARFLinkerImpl.h"
-namespace llvm {
-namespace dwarflinker_parallel {} // end of namespace dwarflinker_parallel
-} // namespace llvm
+std::unique_ptr<llvm::dwarflinker_parallel::DWARFLinker>
+llvm::dwarflinker_parallel::DWARFLinker::createLinker(
+ MessageHandlerTy ErrorHandler, MessageHandlerTy WarningHandler,
+ TranslatorFuncTy StringsTranslator) {
+ return std::make_unique<DWARFLinkerImpl>(ErrorHandler, WarningHandler,
+ StringsTranslator);
+}
diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.h b/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.h
new file mode 100644
index 000000000000..1617a848512d
--- /dev/null
+++ b/llvm/lib/DWARFLinkerParallel/DWARFLinkerCompileUnit.h
@@ -0,0 +1,156 @@
+//===- DWARFLinkerCompileUnit.h ---------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERCOMPILEUNIT_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERCOMPILEUNIT_H
+
+#include "DWARFLinkerUnit.h"
+#include "llvm/DWARFLinkerParallel/DWARFFile.h"
+#include "llvm/DWARFLinkerParallel/DWARFLinker.h"
+#include <optional>
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+struct LinkContext;
+class DWARFFile;
+
+/// Stores all information related to a compile unit, be it in its original
+/// instance of the object file or its brand new cloned and generated DIE tree.
+class CompileUnit : public DwarfUnit {
+public:
+ CompileUnit(LinkContext &, unsigned ID, StringRef ClangModuleName,
+ DWARFFile &File,
+ DWARFLinker::SwiftInterfacesMapTy *,
+ UnitMessageHandlerTy WarningHandler)
+ : DwarfUnit(ID, ClangModuleName, WarningHandler), ContaingFile(File) {
+ FormParams.Version = 4;
+ FormParams.Format = dwarf::DWARF32;
+ FormParams.AddrSize = 4;
+ UnitName = ContaingFile.FileName;
+ }
+
+ CompileUnit(LinkContext &, DWARFUnit &OrigUnit, unsigned ID,
+ StringRef ClangModuleName, DWARFFile &File,
+ UnitMessageHandlerTy WarningHandler)
+ : DwarfUnit(ID, ClangModuleName, WarningHandler),
+ ContaingFile(File), OrigUnit(&OrigUnit) {
+ DWARFDie CUDie = OrigUnit.getUnitDIE();
+ if (!CUDie)
+ return;
+
+ if (File.Dwarf)
+ Endianess = File.Dwarf->isLittleEndian() ? support::endianness::little
+ : support::endianness::big;
+
+ FormParams.Version = OrigUnit.getVersion();
+ FormParams.Format = dwarf::DWARF32;
+ FormParams.AddrSize = OrigUnit.getAddressByteSize();
+
+ Language = dwarf::toUnsigned(CUDie.find(dwarf::DW_AT_language), 0);
+
+ UnitName = ContaingFile.FileName;
+ SysRoot = dwarf::toStringRef(CUDie.find(dwarf::DW_AT_LLVM_sysroot)).str();
+ }
+
+ /// \defgroup Helper methods to access OrigUnit.
+ ///
+ /// @{
+
+ /// Returns paired compile unit from input DWARF.
+ DWARFUnit &getOrigUnit() const {
+ assert(OrigUnit != nullptr);
+ return *OrigUnit;
+ }
+
+ const DWARFDebugInfoEntry *
+ getFirstChildEntry(const DWARFDebugInfoEntry *Die) const {
+ assert(OrigUnit != nullptr);
+ return OrigUnit->getFirstChildEntry(Die);
+ }
+
+ const DWARFDebugInfoEntry *
+ getSiblingEntry(const DWARFDebugInfoEntry *Die) const {
+ assert(OrigUnit != nullptr);
+ return OrigUnit->getSiblingEntry(Die);
+ }
+
+ DWARFDie getParent(const DWARFDebugInfoEntry *Die) {
+ assert(OrigUnit != nullptr);
+ return OrigUnit->getParent(Die);
+ }
+
+ DWARFDie getDIEAtIndex(unsigned Index) {
+ assert(OrigUnit != nullptr);
+ return OrigUnit->getDIEAtIndex(Index);
+ }
+
+ const DWARFDebugInfoEntry *getDebugInfoEntry(unsigned Index) const {
+ assert(OrigUnit != nullptr);
+ return OrigUnit->getDebugInfoEntry(Index);
+ }
+
+ DWARFDie getUnitDIE(bool ExtractUnitDIEOnly = true) {
+ assert(OrigUnit != nullptr);
+ return OrigUnit->getUnitDIE(ExtractUnitDIEOnly);
+ }
+
+ DWARFDie getDIE(const DWARFDebugInfoEntry *Die) {
+ assert(OrigUnit != nullptr);
+ return DWARFDie(OrigUnit, Die);
+ }
+
+ uint32_t getDIEIndex(const DWARFDebugInfoEntry *Die) const {
+ assert(OrigUnit != nullptr);
+ return OrigUnit->getDIEIndex(Die);
+ }
+
+ uint32_t getDIEIndex(const DWARFDie &Die) const {
+ assert(OrigUnit != nullptr);
+ return OrigUnit->getDIEIndex(Die);
+ }
+
+ std::optional<DWARFFormValue> find(uint32_t DieIdx,
+ ArrayRef<dwarf::Attribute> Attrs) const {
+ assert(OrigUnit != nullptr);
+ return find(OrigUnit->getDebugInfoEntry(DieIdx), Attrs);
+ }
+
+ std::optional<DWARFFormValue> find(const DWARFDebugInfoEntry *Die,
+ ArrayRef<dwarf::Attribute> Attrs) const {
+ if (!Die)
+ return std::nullopt;
+ auto AbbrevDecl = Die->getAbbreviationDeclarationPtr();
+ if (AbbrevDecl) {
+ for (auto Attr : Attrs) {
+ if (auto Value = AbbrevDecl->getAttributeValue(Die->getOffset(), Attr,
+ *OrigUnit))
+ return Value;
+ }
+ }
+ return std::nullopt;
+ }
+
+ std::optional<uint32_t> getDIEIndexForOffset(uint64_t Offset) {
+ return OrigUnit->getDIEIndexForOffset(Offset);
+ }
+
+ /// @}
+
+private:
+ /// DWARFFile containing this compile unit.
+ DWARFFile &ContaingFile;
+
+ /// Pointer to the paired compile unit from the input DWARF.
+ DWARFUnit *OrigUnit = nullptr;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERCOMPILEUNIT_H
diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.cpp b/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.cpp
new file mode 100644
index 000000000000..dfd77af92f27
--- /dev/null
+++ b/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.cpp
@@ -0,0 +1,46 @@
+//=== DWARFLinkerImpl.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DWARFLinkerImpl.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// Similar to DWARFUnitSection::getUnitForOffset(), but returning our
+/// CompileUnit object instead.
+CompileUnit *
+DWARFLinkerImpl::LinkContext::getUnitForOffset(CompileUnit &CurrentCU,
+ uint64_t Offset) const {
+ if (CurrentCU.isClangModule())
+ return &CurrentCU;
+
+ auto CU = llvm::upper_bound(
+ CompileUnits, Offset,
+ [](uint64_t LHS, const std::unique_ptr<CompileUnit> &RHS) {
+ return LHS < RHS->getOrigUnit().getNextUnitOffset();
+ });
+
+ return CU != CompileUnits.end() ? CU->get() : nullptr;
+}
+
+Error DWARFLinkerImpl::createEmitter(const Triple &TheTriple,
+ OutputFileType FileType,
+ raw_pwrite_stream &OutFile) {
+
+ TheDwarfEmitter = std::make_unique<DwarfEmitterImpl>(
+ FileType, OutFile, OutputStrings.getTranslator(), WarningHandler);
+
+ return TheDwarfEmitter->init(TheTriple, "__DWARF");
+}
+
+ExtraDwarfEmitter *DWARFLinkerImpl::getEmitter() {
+ return TheDwarfEmitter.get();
+}
+
+} // end of namespace dwarflinker_parallel
+} // namespace llvm
diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.h b/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.h
new file mode 100644
index 000000000000..a8fa9b4b46d8
--- /dev/null
+++ b/llvm/lib/DWARFLinkerParallel/DWARFLinkerImpl.h
@@ -0,0 +1,319 @@
+//===- DWARFLinkerImpl.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERIMPL_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERIMPL_H
+
+#include "DWARFEmitterImpl.h"
+#include "DWARFLinkerCompileUnit.h"
+#include "llvm/ADT/AddressRanges.h"
+#include "llvm/CodeGen/AccelTable.h"
+#include "llvm/DWARFLinkerParallel/DWARFLinker.h"
+#include "llvm/DWARFLinkerParallel/StringPool.h"
+#include "llvm/DWARFLinkerParallel/StringTable.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+using Offset2UnitMapTy = DenseMap<uint64_t, CompileUnit *>;
+
+struct RangeAttrPatch;
+struct LocAttrPatch;
+
+class DWARFLinkerImpl : public DWARFLinker {
+public:
+ DWARFLinkerImpl(MessageHandlerTy ErrorHandler,
+ MessageHandlerTy WarningHandler,
+ TranslatorFuncTy StringsTranslator)
+ : UniqueUnitID(0), ErrorHandler(ErrorHandler),
+ WarningHandler(WarningHandler),
+ OutputStrings(Strings, StringsTranslator) {}
+
+ Error createEmitter(const Triple &TheTriple, OutputFileType FileType,
+ raw_pwrite_stream &OutFile) override;
+
+ ExtraDwarfEmitter *getEmitter() override;
+
+ /// Add object file to be linked. Pre-load compile unit die. Call
+ /// \p OnCUDieLoaded for each compile unit die. If specified \p File
+ /// has reference to the Clang module then such module would be
+ /// pre-loaded by \p Loader for !Update case.
+ ///
+ /// \pre NoODR, Update options should be set before call to addObjectFile.
+ void addObjectFile(
+ DWARFFile &File, ObjFileLoaderTy Loader = nullptr,
+ CompileUnitHandlerTy OnCUDieLoaded = [](const DWARFUnit &) {}) override {}
+
+ /// Link debug info for added files.
+ Error link() override {
+ reportWarning("LLVM parallel dwarflinker is not implemented yet.", "");
+ return Error::success();
+ }
+
+ /// \defgroup Methods setting various linking options:
+ ///
+ /// @{
+ ///
+
+ /// Allows to generate log of linking process to the standard output.
+ void setVerbosity(bool Verbose) override { Options.Verbose = Verbose; }
+
+ /// Print statistics to standard output.
+ void setStatistics(bool Statistics) override {
+ Options.Statistics = Statistics;
+ }
+
+ /// Verify the input DWARF.
+ void setVerifyInputDWARF(bool Verify) override {
+ Options.VerifyInputDWARF = Verify;
+ }
+
+ /// Do not unique types according to ODR.
+ void setNoODR(bool NoODR) override { Options.NoODR = NoODR; }
+
+ /// Update index tables only(do not modify rest of DWARF).
+ void setUpdateIndexTablesOnly(bool UpdateIndexTablesOnly) override {
+ Options.UpdateIndexTablesOnly = UpdateIndexTablesOnly;
+ }
+
+ /// Allow generating valid, but non-deterministic output.
+ void
+ setAllowNonDeterministicOutput(bool AllowNonDeterministicOutput) override {
+ Options.AllowNonDeterministicOutput = AllowNonDeterministicOutput;
+ }
+
+ /// Set to keep the enclosing function for a static variable.
+ void setKeepFunctionForStatic(bool KeepFunctionForStatic) override {
+ Options.KeepFunctionForStatic = KeepFunctionForStatic;
+ }
+
+ /// Use specified number of threads for parallel files linking.
+ void setNumThreads(unsigned NumThreads) override {
+ Options.Threads = NumThreads;
+ }
+
+ /// Add kind of accelerator tables to be generated.
+ void addAccelTableKind(AccelTableKind Kind) override {
+ assert(!llvm::is_contained(Options.AccelTables, Kind));
+ Options.AccelTables.emplace_back(Kind);
+ }
+
+ /// Set prepend path for clang modules.
+ void setPrependPath(const std::string &Ppath) override {
+ Options.PrependPath = Ppath;
+ }
+
+ /// Set estimated objects files amount, for preliminary data allocation.
+ void setEstimatedObjfilesAmount(unsigned ObjFilesNum) override {
+ ObjectContexts.reserve(ObjFilesNum);
+ }
+
+ /// Set verification handler which would be used to report verification
+ /// errors.
+ void
+ setInputVerificationHandler(InputVerificationHandlerTy Handler) override {
+ Options.InputVerificationHandler = Handler;
+ }
+
+ /// Set map for Swift interfaces.
+ void setSwiftInterfacesMap(SwiftInterfacesMapTy *Map) override {
+ Options.ParseableSwiftInterfaces = Map;
+ }
+
+ /// Set prefix map for objects.
+ void setObjectPrefixMap(ObjectPrefixMapTy *Map) override {
+ Options.ObjectPrefixMap = Map;
+ }
+
+ /// Set target DWARF version.
+ Error setTargetDWARFVersion(uint16_t TargetDWARFVersion) override {
+ if ((TargetDWARFVersion < 1) || (TargetDWARFVersion > 5))
+ return createStringError(std::errc::invalid_argument,
+ "unsupported DWARF version: %d",
+ TargetDWARFVersion);
+
+ Options.TargetDWARFVersion = TargetDWARFVersion;
+ return Error::success();
+ }
+ /// @}
+
+protected:
+ /// Reports Warning.
+ void reportWarning(const Twine &Warning, const DWARFFile &File,
+ const DWARFDie *DIE = nullptr) const {
+ if (WarningHandler != nullptr)
+ WarningHandler(Warning, File.FileName, DIE);
+ }
+
+ /// Reports Warning.
+ void reportWarning(const Twine &Warning, StringRef FileName,
+ const DWARFDie *DIE = nullptr) const {
+ if (WarningHandler != nullptr)
+ WarningHandler(Warning, FileName, DIE);
+ }
+
+ /// Reports Error.
+ void reportError(const Twine &Warning, StringRef FileName,
+ const DWARFDie *DIE = nullptr) const {
+ if (ErrorHandler != nullptr)
+ ErrorHandler(Warning, FileName, DIE);
+ }
+
+ /// Returns next available unique Compile Unit ID.
+ unsigned getNextUniqueUnitID() { return UniqueUnitID.fetch_add(1); }
+
+ /// Keeps track of data associated with one object during linking.
+ /// i.e. source file descriptor, compilation units, output data
+ /// for compilation units common tables.
+ struct LinkContext : public OutputSections {
+ using UnitListTy = SmallVector<std::unique_ptr<CompileUnit>>;
+
+ /// Keep information for referenced clang module: already loaded DWARF info
+ /// of the clang module and a CompileUnit of the module.
+ struct RefModuleUnit {
+ RefModuleUnit(DWARFFile &File, std::unique_ptr<CompileUnit> Unit)
+ : File(File), Unit(std::move(Unit)) {}
+ RefModuleUnit(RefModuleUnit &&Other)
+ : File(Other.File), Unit(std::move(Other.Unit)) {}
+ RefModuleUnit(const RefModuleUnit &) = delete;
+
+ DWARFFile &File;
+ std::unique_ptr<CompileUnit> Unit;
+ };
+ using ModuleUnitListTy = SmallVector<RefModuleUnit>;
+
+ /// Object file descriptor.
+ DWARFFile &File;
+
+ /// Set of Compilation Units(may be accessed asynchroniously for reading).
+ UnitListTy CompileUnits;
+
+ /// Set of Compile Units for modules.
+ ModuleUnitListTy ModulesCompileUnits;
+
+ /// Size of Debug info before optimizing.
+ uint64_t OriginalDebugInfoSize = 0;
+
+ /// Output sections, common for all compilation units.
+ OutTablesFileTy OutDebugInfoBytes;
+
+ /// Endianness for the final file.
+ support::endianness Endianess = support::endianness::little;
+
+ LinkContext(DWARFFile &File) : File(File) {
+ if (File.Dwarf) {
+ if (!File.Dwarf->compile_units().empty())
+ CompileUnits.reserve(File.Dwarf->getNumCompileUnits());
+
+ Endianess = File.Dwarf->isLittleEndian() ? support::endianness::little
+ : support::endianness::big;
+ }
+ }
+
+ /// Add Compile Unit corresponding to the module.
+ void addModulesCompileUnit(RefModuleUnit &&Unit) {
+ ModulesCompileUnits.emplace_back(std::move(Unit));
+ }
+
+ /// Return Endiannes of the source DWARF information.
+ support::endianness getEndianness() { return Endianess; }
+
+ /// \returns pointer to compilation unit which corresponds \p Offset.
+ CompileUnit *getUnitForOffset(CompileUnit &CU, uint64_t Offset) const;
+ };
+
+ /// linking options
+ struct DWARFLinkerOptions {
+ /// DWARF version for the output.
+ uint16_t TargetDWARFVersion = 0;
+
+ /// Generate processing log to the standard output.
+ bool Verbose = false;
+
+ /// Print statistics.
+ bool Statistics = false;
+
+ /// Verify the input DWARF.
+ bool VerifyInputDWARF = false;
+
+ /// Do not unique types according to ODR
+ bool NoODR = false;
+
+ /// Update index tables.
+ bool UpdateIndexTablesOnly = false;
+
+ /// Whether we want a static variable to force us to keep its enclosing
+ /// function.
+ bool KeepFunctionForStatic = false;
+
+ /// Allow to generate valid, but non deterministic output.
+ bool AllowNonDeterministicOutput = false;
+
+ /// Number of threads.
+ unsigned Threads = 1;
+
+ /// The accelerator table kinds
+ SmallVector<AccelTableKind, 1> AccelTables;
+
+ /// Prepend path for the clang modules.
+ std::string PrependPath;
+
+ /// input verification handler(it might be called asynchronously).
+ InputVerificationHandlerTy InputVerificationHandler = nullptr;
+
+ /// A list of all .swiftinterface files referenced by the debug
+ /// info, mapping Module name to path on disk. The entries need to
+ /// be uniqued and sorted and there are only few entries expected
+ /// per compile unit, which is why this is a std::map.
+ /// this is dsymutil specific fag.
+ ///
+ /// (it might be called asynchronously).
+ SwiftInterfacesMapTy *ParseableSwiftInterfaces = nullptr;
+
+ /// A list of remappings to apply to file paths.
+ ///
+ /// (it might be called asynchronously).
+ ObjectPrefixMapTy *ObjectPrefixMap = nullptr;
+ } Options;
+
+ /// \defgroup Data members accessed asinchroniously.
+ ///
+ /// @{
+
+ /// Unique ID for compile unit.
+ std::atomic<unsigned> UniqueUnitID;
+
+ /// Strings pool. Keeps all strings.
+ StringPool Strings;
+
+ /// error handler(it might be called asynchronously).
+ MessageHandlerTy ErrorHandler = nullptr;
+
+ /// warning handler(it might be called asynchronously).
+ MessageHandlerTy WarningHandler = nullptr;
+ /// @}
+
+ /// \defgroup Data members accessed sequentially.
+ ///
+ /// @{
+
+ /// Set of strings which should be emitted.
+ StringTable OutputStrings;
+
+ /// Keeps all linking contexts.
+ SmallVector<std::unique_ptr<LinkContext>> ObjectContexts;
+
+ /// The emitter of final dwarf file.
+ std::unique_ptr<DwarfEmitterImpl> TheDwarfEmitter;
+ /// @}
+};
+
+} // end namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERIMPL_H
diff --git a/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.h b/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.h
new file mode 100644
index 000000000000..78e8d82ea061
--- /dev/null
+++ b/llvm/lib/DWARFLinkerParallel/DWARFLinkerUnit.h
@@ -0,0 +1,186 @@
+//===- DWARFLinkerUnit.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERUNIT_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERUNIT_H
+
+#include "OutputSections.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/CodeGen/DIE.h"
+#include "llvm/DWARFLinkerParallel/StringPool.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/Support/LEB128.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+using UnitMessageHandlerTy = function_ref<void(
+ const Twine &Error, StringRef Context, const DWARFDie *DIE)>;
+
+/// Each unit keeps output data as a file with debug tables
+/// corresponding to the concrete unit.
+using OutTablesFileTy = SmallString<0>;
+
+/// Base class for all Dwarf units(Compile unit/Type table unit).
+class DwarfUnit : public OutputSections {
+public:
+ virtual ~DwarfUnit() {}
+ DwarfUnit(unsigned ID, StringRef ClangModuleName,
+ UnitMessageHandlerTy WarningHandler)
+ : ID(ID), ClangModuleName(ClangModuleName),
+ WarningHandler(WarningHandler) {
+ FormParams.Version = 4;
+ FormParams.Format = dwarf::DWARF32;
+ FormParams.AddrSize = 4;
+ }
+
+ /// Endiannes for the compile unit.
+ support::endianness getEndianness() const { return Endianess; }
+
+ /// Return DWARF version.
+ uint16_t getVersion() const { return FormParams.Version; }
+
+ /// Return size of header of debug_info table.
+ uint16_t getHeaderSize() const { return FormParams.Version >= 5 ? 12 : 11; }
+
+ /// Return size of address.
+ uint8_t getAddressByteSize() const { return FormParams.AddrSize; }
+
+ /// Return size of reference.
+ uint8_t getRefAddrByteSize() const { return FormParams.getRefAddrByteSize(); }
+
+ /// Return format of the Dwarf(DWARF32 or DWARF64).
+ /// TODO: DWARF64 is not currently supported.
+ dwarf::DwarfFormat getDwarfFormat() const { return FormParams.Format; }
+
+ /// Unique id of the unit.
+ unsigned getUniqueID() const { return ID; }
+
+ /// Return language of this unit.
+ uint16_t getLanguage() const { return Language; }
+
+ /// Set size of this(newly generated) compile unit.
+ void setUnitSize(uint64_t UnitSize) { this->UnitSize = UnitSize; }
+
+ /// Returns size of this(newly generated) compile unit.
+ uint64_t getUnitSize() const { return UnitSize; }
+
+ /// Returns this unit name.
+ StringRef getUnitName() const { return UnitName; }
+
+ /// Return the DW_AT_LLVM_sysroot of the compile unit or an empty StringRef.
+ StringRef getSysRoot() { return SysRoot; }
+
+ /// Create a Die for this unit.
+ void setOutputDIE(DIE *UnitDie) { NewUnit = UnitDie; }
+
+ /// Return Die for this compile unit.
+ DIE *getOutputUnitDIE() const { return NewUnit; }
+
+ /// Return true if this compile unit is from Clang module.
+ bool isClangModule() const { return !ClangModuleName.empty(); }
+
+ /// Return Clang module name;
+ const std::string &getClangModuleName() const { return ClangModuleName; }
+
+ /// Returns generated file keeping debug tables for this compile unit.
+ OutTablesFileTy &getOutDwarfBits() { return OutDebugInfoBits; }
+
+ /// Erases generated file keeping debug tables for this compile unit.
+ void eraseDwarfBits() { OutDebugInfoBits = OutTablesFileTy(); }
+
+ MCSymbol *getLabelBegin() { return LabelBegin; }
+ void setLabelBegin(MCSymbol *S) { LabelBegin = S; }
+
+ /// Error reporting methods.
+ /// @{
+
+ void reportWarning(const Twine &Warning,
+ const DWARFDie *Die = nullptr) const {
+ if (WarningHandler)
+ WarningHandler(Warning, getUnitName(), Die);
+ }
+ void reportWarning(Error Warning) const {
+ handleAllErrors(std::move(Warning), [&](ErrorInfoBase &Info) {
+ if (WarningHandler)
+ WarningHandler(Info.message(), getUnitName(), nullptr);
+ });
+ }
+ /// @}
+
+ /// This structure keeps fields which would be used for creating accelerator
+ /// table.
+ struct AccelInfo {
+ AccelInfo(StringEntry *Name, const DIE *Die, bool SkipPubSection = false);
+ AccelInfo(StringEntry *Name, const DIE *Die, uint32_t QualifiedNameHash,
+ bool ObjCClassIsImplementation);
+
+ /// Name of the entry.
+ StringEntry *Name = nullptr;
+
+ /// Tag of the DIE this entry describes.
+ dwarf::Tag Tag = dwarf::DW_TAG_null;
+
+ /// Output offset of the DIE this entry describes.
+ uint64_t OutOffset = 0;
+
+ /// Hash of the fully qualified name.
+ uint32_t QualifiedNameHash = 0;
+
+ /// Emit this entry only in the apple_* sections.
+ bool SkipPubSection = false;
+
+ /// Is this an ObjC class implementation?
+ bool ObjcClassImplementation = false;
+
+ /// Cloned Die containing acceleration info.
+ const DIE *Die = nullptr;
+ };
+
+protected:
+ /// Unique ID for the unit.
+ unsigned ID = 0;
+
+ /// Properties of the unit.
+ dwarf::FormParams FormParams;
+
+ /// DIE for newly generated compile unit.
+ DIE *NewUnit = nullptr;
+
+ /// The DW_AT_language of this unit.
+ uint16_t Language = 0;
+
+ /// The name of this unit.
+ std::string UnitName;
+
+ /// The DW_AT_LLVM_sysroot of this unit.
+ std::string SysRoot;
+
+ /// If this is a Clang module, this holds the module's name.
+ std::string ClangModuleName;
+
+ uint64_t UnitSize = 0;
+
+ /// Elf file containg generated debug tables for this compile unit.
+ OutTablesFileTy OutDebugInfoBits;
+
+ /// Endiannes for this compile unit.
+ support::endianness Endianess = support::endianness::little;
+
+ MCSymbol *LabelBegin = nullptr;
+
+ /// true if current unit references_to/is_referenced by other unit.
+ std::atomic<bool> IsInterconnectedCU = {false};
+
+ UnitMessageHandlerTy WarningHandler;
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_DWARFLINKERUNIT_H
diff --git a/llvm/lib/DWARFLinkerParallel/OutputSections.cpp b/llvm/lib/DWARFLinkerParallel/OutputSections.cpp
new file mode 100644
index 000000000000..69c5bfaa7bdf
--- /dev/null
+++ b/llvm/lib/DWARFLinkerParallel/OutputSections.cpp
@@ -0,0 +1,36 @@
+//=== OutputSections.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "OutputSections.h"
+#include "llvm/ADT/StringSwitch.h"
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+std::optional<OutputSections::DebugSectionKind>
+OutputSections::parseDebugSectionName(llvm::StringRef SecName) {
+ return llvm::StringSwitch<std::optional<OutputSections::DebugSectionKind>>(
+ SecName)
+ .Case("debug_info", DebugSectionKind::DebugInfo)
+ .Case("debug_line", DebugSectionKind::DebugLine)
+ .Case("debug_frame", DebugSectionKind::DebugFrame)
+ .Case("debug_ranges", DebugSectionKind::DebugRange)
+ .Case("debug_rnglists", DebugSectionKind::DebugRngLists)
+ .Case("debug_loc", DebugSectionKind::DebugLoc)
+ .Case("debug_loclists", DebugSectionKind::DebugLocLists)
+ .Case("debug_aranges", DebugSectionKind::DebugARanges)
+ .Case("debug_abbrev", DebugSectionKind::DebugAbbrev)
+ .Case("debug_macinfo", DebugSectionKind::DebugMacinfo)
+ .Case("debug_macro", DebugSectionKind::DebugMacro)
+ .Default(std::nullopt);
+
+ return std::nullopt;
+}
+
+} // end of namespace dwarflinker_parallel
+} // end of namespace llvm
diff --git a/llvm/lib/DWARFLinkerParallel/OutputSections.h b/llvm/lib/DWARFLinkerParallel/OutputSections.h
new file mode 100644
index 000000000000..15ab4cc1167a
--- /dev/null
+++ b/llvm/lib/DWARFLinkerParallel/OutputSections.h
@@ -0,0 +1,67 @@
+//===- OutputSections.h -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DWARFLINKERPARALLEL_OUTPUTSECTIONS_H
+#define LLVM_LIB_DWARFLINKERPARALLEL_OUTPUTSECTIONS_H
+
+#include "llvm/ADT/StringRef.h"
+#include <array>
+#include <cstdint>
+
+namespace llvm {
+namespace dwarflinker_parallel {
+
+/// This class keeps offsets to the debug sections. Any object which is
+/// supposed to be emitted into the debug section should use this class to
+/// track debug sections offsets.
+class OutputSections {
+public:
+ /// List of tracked debug sections.
+ enum class DebugSectionKind : uint8_t {
+ DebugInfo = 0,
+ DebugLine,
+ DebugFrame,
+ DebugRange,
+ DebugRngLists,
+ DebugLoc,
+ DebugLocLists,
+ DebugARanges,
+ DebugAbbrev,
+ DebugMacinfo,
+ DebugMacro,
+ };
+ constexpr static size_t SectionKindsNum = 11;
+
+ /// Recognise the section name and match it with the DebugSectionKind.
+ static std::optional<DebugSectionKind> parseDebugSectionName(StringRef Name);
+
+ /// When objects(f.e. compile units) are glued into the single file,
+ /// the debug sections corresponding to the concrete object are assigned
+ /// with offsets inside the whole file. This method returns offset
+ /// to the \p SectionKind debug section, corresponding to this object.
+ uint64_t getStartOffset(DebugSectionKind SectionKind) const {
+ return Offsets[static_cast<
+ typename std::underlying_type<DebugSectionKind>::type>(SectionKind)];
+ }
+
+ /// Set offset to the start of specified \p SectionKind debug section,
+ /// corresponding to this object.
+ void setStartOffset(DebugSectionKind SectionKind, uint64_t Offset) {
+ Offsets[static_cast<typename std::underlying_type<DebugSectionKind>::type>(
+ SectionKind)] = Offset;
+ }
+
+protected:
+ /// Offsets to the debug sections composing this object.
+ std::array<uint64_t, SectionKindsNum> Offsets = {0};
+};
+
+} // end of namespace dwarflinker_parallel
+} // end namespace llvm
+
+#endif // LLVM_LIB_DWARFLINKERPARALLEL_OUTPUTSECTIONS_H
diff --git a/llvm/include/llvm/ADT/Triple.h b/llvm/lib/DWARFLinkerParallel/StringPool.cpp
index 1b026f74ed1f..fbff6b05e3a5 100644
--- a/llvm/include/llvm/ADT/Triple.h
+++ b/llvm/lib/DWARFLinkerParallel/StringPool.cpp
@@ -1,15 +1,9 @@
-//===-- llvm/ADT/Triple.h ---------------------------------------*- C++ -*-===//
+//=== StringPool.cpp ------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This header is deprecated in favour of
-/// `llvm/TargetParser/Triple.h`.
-///
-//===----------------------------------------------------------------------===//
-#include "llvm/TargetParser/Triple.h"
+#include "llvm/DWARFLinkerParallel/StringPool.h"
diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index 50447042bbb8..89101ca7e573 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -11,12 +11,14 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/DWP/DWP.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/DWP/DWPError.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
#include "llvm/Object/Decompressor.h"
#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
#include <limits>
@@ -178,12 +180,28 @@ static StringRef getSubsection(StringRef Section,
return Section.substr(Off->getOffset(), Off->getLength());
}
-static void
-addAllTypesFromDWP(MCStreamer &Out,
- MapVector<uint64_t, UnitIndexEntry> &TypeIndexEntries,
- const DWARFUnitIndex &TUIndex, MCSection *OutputTypes,
- StringRef Types, const UnitIndexEntry &TUEntry,
- uint32_t &TypesOffset, unsigned TypesContributionIndex) {
+static Error sectionOverflowErrorOrWarning(uint32_t PrevOffset,
+ uint32_t OverflowedOffset,
+ StringRef SectionName,
+ bool ContinueOnCuIndexOverflow) {
+ std::string Msg =
+ (SectionName +
+ Twine(" Section Contribution Offset overflow 4G. Previous Offset ") +
+ Twine(PrevOffset) + Twine(", After overflow offset ") +
+ Twine(OverflowedOffset) + Twine("."))
+ .str();
+ if (ContinueOnCuIndexOverflow) {
+ WithColor::defaultWarningHandler(make_error<DWPError>(Msg));
+ return Error::success();
+ }
+ return make_error<DWPError>(Msg);
+}
+
+static Error addAllTypesFromDWP(
+ MCStreamer &Out, MapVector<uint64_t, UnitIndexEntry> &TypeIndexEntries,
+ const DWARFUnitIndex &TUIndex, MCSection *OutputTypes, StringRef Types,
+ const UnitIndexEntry &TUEntry, uint32_t &TypesOffset,
+ unsigned TypesContributionIndex, bool ContinueOnCuIndexOverflow) {
Out.switchSection(OutputTypes);
for (const DWARFUnitIndex::Entry &E : TUIndex.getRows()) {
auto *I = E.getContributions();
@@ -210,14 +228,23 @@ addAllTypesFromDWP(MCStreamer &Out,
TUEntry.Contributions[TypesContributionIndex].getOffset(),
C.getLength()));
C.setOffset(TypesOffset);
+ uint32_t OldOffset = TypesOffset;
+ static_assert(sizeof(OldOffset) == sizeof(TypesOffset));
TypesOffset += C.getLength();
+ if (OldOffset > TypesOffset) {
+ if (Error Err = sectionOverflowErrorOrWarning(
+ OldOffset, TypesOffset, "Types", ContinueOnCuIndexOverflow))
+ return Err;
+ }
}
+ return Error::success();
}
-static void addAllTypesFromTypesSection(
+static Error addAllTypesFromTypesSection(
MCStreamer &Out, MapVector<uint64_t, UnitIndexEntry> &TypeIndexEntries,
MCSection *OutputTypes, const std::vector<StringRef> &TypesSections,
- const UnitIndexEntry &CUEntry, uint32_t &TypesOffset) {
+ const UnitIndexEntry &CUEntry, uint32_t &TypesOffset,
+ bool ContinueOnCuIndexOverflow) {
for (StringRef Types : TypesSections) {
Out.switchSection(OutputTypes);
uint64_t Offset = 0;
@@ -243,9 +270,16 @@ static void addAllTypesFromTypesSection(
continue;
Out.emitBytes(Types.substr(PrevOffset, C.getLength32()));
+ uint32_t OldOffset = TypesOffset;
TypesOffset += C.getLength32();
+ if (OldOffset > TypesOffset) {
+ if (Error Err = sectionOverflowErrorOrWarning(
+ OldOffset, TypesOffset, "types", ContinueOnCuIndexOverflow))
+ return Err;
+ }
}
}
+ return Error::success();
}
static std::string buildDWODescription(StringRef Name, StringRef DWPName,
@@ -548,7 +582,8 @@ Error handleSection(
return Error::success();
}
-Error write(MCStreamer &Out, ArrayRef<std::string> Inputs) {
+Error write(MCStreamer &Out, ArrayRef<std::string> Inputs,
+ bool ContinueOnCuIndexOverflow) {
const auto &MCOFI = *Out.getContext().getObjectFileInfo();
MCSection *const StrSection = MCOFI.getDwarfStrDWOSection();
MCSection *const StrOffsetSection = MCOFI.getDwarfStrOffDWOSection();
@@ -646,7 +681,19 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs) {
auto Index = getContributionIndex(Pair.first, IndexVersion);
CurEntry.Contributions[Index].setOffset(ContributionOffsets[Index]);
CurEntry.Contributions[Index].setLength(Pair.second);
+ uint32_t OldOffset = ContributionOffsets[Index];
ContributionOffsets[Index] += CurEntry.Contributions[Index].getLength32();
+ if (OldOffset > ContributionOffsets[Index]) {
+ uint32_t SectionIndex = 0;
+ for (auto &Section : Obj.sections()) {
+ if (SectionIndex == Index) {
+ return sectionOverflowErrorOrWarning(
+ OldOffset, ContributionOffsets[Index], *Section.getName(),
+ ContinueOnCuIndexOverflow);
+ }
+ ++SectionIndex;
+ }
+ }
}
uint32_t &InfoSectionOffset =
@@ -670,9 +717,12 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs) {
C.setLength(Header.Length + 4);
if (std::numeric_limits<uint32_t>::max() - InfoSectionOffset <
- C.getLength32())
- return make_error<DWPError>(
- "debug information section offset is greater than 4GB");
+ C.getLength32()) {
+ if (Error Err = sectionOverflowErrorOrWarning(
+ InfoSectionOffset, InfoSectionOffset + C.getLength32(),
+ "debug_info", ContinueOnCuIndexOverflow))
+ return Err;
+ }
UnitOffset += C.getLength32();
if (Header.Version < 5 ||
@@ -709,9 +759,11 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs) {
if (IndexVersion == 2) {
// Add types from the .debug_types section from DWARF < 5.
- addAllTypesFromTypesSection(
- Out, TypeIndexEntries, TypesSection, CurTypesSection, CurEntry,
- ContributionOffsets[getContributionIndex(DW_SECT_EXT_TYPES, 2)]);
+ if (Error Err = addAllTypesFromTypesSection(
+ Out, TypeIndexEntries, TypesSection, CurTypesSection, CurEntry,
+ ContributionOffsets[getContributionIndex(DW_SECT_EXT_TYPES, 2)],
+ ContinueOnCuIndexOverflow))
+ return Err;
}
continue;
}
@@ -805,10 +857,11 @@ Error write(MCStreamer &Out, ArrayRef<std::string> Inputs) {
unsigned TypesContributionIndex =
getContributionIndex(TUSectionKind, IndexVersion);
- addAllTypesFromDWP(Out, TypeIndexEntries, TUIndex, OutSection,
- TypeInputSection, CurEntry,
- ContributionOffsets[TypesContributionIndex],
- TypesContributionIndex);
+ if (Error Err = addAllTypesFromDWP(
+ Out, TypeIndexEntries, TUIndex, OutSection, TypeInputSection,
+ CurEntry, ContributionOffsets[TypesContributionIndex],
+ TypesContributionIndex, ContinueOnCuIndexOverflow))
+ return Err;
}
}
diff --git a/llvm/lib/DebugInfo/BTF/BTFContext.cpp b/llvm/lib/DebugInfo/BTF/BTFContext.cpp
new file mode 100644
index 000000000000..24898739b824
--- /dev/null
+++ b/llvm/lib/DebugInfo/BTF/BTFContext.cpp
@@ -0,0 +1,69 @@
+//===- BTFContext.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the BTFContext interface, this is used by
+// llvm-objdump tool to print source code alongside disassembly.
+// In fact, currently it is a simple wrapper for BTFParser instance.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/BTF/BTFContext.h"
+
+#define DEBUG_TYPE "debug-info-btf-context"
+
+using namespace llvm;
+using object::ObjectFile;
+using object::SectionedAddress;
+
+DILineInfo BTFContext::getLineInfoForAddress(SectionedAddress Address,
+ DILineInfoSpecifier Specifier) {
+ const BTF::BPFLineInfo *LineInfo = BTF.findLineInfo(Address);
+ DILineInfo Result;
+ if (!LineInfo)
+ return Result;
+
+ Result.LineSource = BTF.findString(LineInfo->LineOff);
+ Result.FileName = BTF.findString(LineInfo->FileNameOff);
+ Result.Line = LineInfo->getLine();
+ Result.Column = LineInfo->getCol();
+ return Result;
+}
+
+DILineInfo BTFContext::getLineInfoForDataAddress(SectionedAddress Address) {
+ // BTF does not convey such information.
+ return {};
+}
+
+DILineInfoTable
+BTFContext::getLineInfoForAddressRange(SectionedAddress Address, uint64_t Size,
+ DILineInfoSpecifier Specifier) {
+ // This function is used only from llvm-rtdyld utility and a few
+ // JITEventListener implementations. Ignore it for now.
+ return {};
+}
+
+DIInliningInfo
+BTFContext::getInliningInfoForAddress(SectionedAddress Address,
+ DILineInfoSpecifier Specifier) {
+ // BTF does not convey such information
+ return {};
+}
+
+std::vector<DILocal> BTFContext::getLocalsForAddress(SectionedAddress Address) {
+ // BTF does not convey such information
+ return {};
+}
+
+std::unique_ptr<BTFContext>
+BTFContext::create(const ObjectFile &Obj,
+ std::function<void(Error)> ErrorHandler) {
+ auto Ctx = std::make_unique<BTFContext>();
+ if (Error E = Ctx->BTF.parse(Obj))
+ ErrorHandler(std::move(E));
+ return Ctx;
+}
diff --git a/llvm/lib/DebugInfo/BTF/BTFParser.cpp b/llvm/lib/DebugInfo/BTF/BTFParser.cpp
new file mode 100644
index 000000000000..6151e1b15cbb
--- /dev/null
+++ b/llvm/lib/DebugInfo/BTF/BTFParser.cpp
@@ -0,0 +1,283 @@
+//===- BTFParser.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// BTFParser reads/interprets .BTF and .BTF.ext ELF sections.
+// Refer to BTFParser.h for API description.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/BTF/BTFParser.h"
+#include "llvm/Support/Errc.h"
+
+#define DEBUG_TYPE "debug-info-btf-parser"
+
+using namespace llvm;
+using object::ObjectFile;
+using object::SectionedAddress;
+using object::SectionRef;
+
+const char BTFSectionName[] = ".BTF";
+const char BTFExtSectionName[] = ".BTF.ext";
+
+// Utility class with API similar to raw_ostream but can be cast
+// to Error, e.g.:
+//
+// Error foo(...) {
+// ...
+// if (Error E = bar(...))
+// return Err("error while foo(): ") << E;
+// ...
+// }
+//
+namespace {
+class Err {
+ std::string Buffer;
+ raw_string_ostream Stream;
+
+public:
+ Err(const char *InitialMsg) : Buffer(InitialMsg), Stream(Buffer) {}
+ Err(const char *SectionName, DataExtractor::Cursor &C)
+ : Buffer(), Stream(Buffer) {
+ *this << "error while reading " << SectionName
+ << " section: " << C.takeError();
+ };
+
+ template <typename T> Err &operator<<(T Val) {
+ Stream << Val;
+ return *this;
+ }
+
+ Err &write_hex(unsigned long long Val) {
+ Stream.write_hex(Val);
+ return *this;
+ }
+
+ Err &operator<<(Error Val) {
+ handleAllErrors(std::move(Val),
+ [=](ErrorInfoBase &Info) { Stream << Info.message(); });
+ return *this;
+ }
+
+ operator Error() const {
+ return make_error<StringError>(Buffer, errc::invalid_argument);
+ }
+};
+} // anonymous namespace
+
+// ParseContext wraps information that is only necessary while parsing
+// ObjectFile and can be discarded once parsing is done.
+// Used by BTFParser::parse* auxiliary functions.
+struct BTFParser::ParseContext {
+ const ObjectFile &Obj;
+ // Map from ELF section name to SectionRef
+ DenseMap<StringRef, SectionRef> Sections;
+
+public:
+ ParseContext(const ObjectFile &Obj) : Obj(Obj) {}
+
+ Expected<DataExtractor> makeExtractor(SectionRef Sec) {
+ Expected<StringRef> Contents = Sec.getContents();
+ if (!Contents)
+ return Contents.takeError();
+ return DataExtractor(Contents.get(), Obj.isLittleEndian(),
+ Obj.getBytesInAddress());
+ }
+
+ std::optional<SectionRef> findSection(StringRef Name) const {
+ auto It = Sections.find(Name);
+ if (It != Sections.end())
+ return It->second;
+ return std::nullopt;
+ }
+};
+
+Error BTFParser::parseBTF(ParseContext &Ctx, SectionRef BTF) {
+ Expected<DataExtractor> MaybeExtractor = Ctx.makeExtractor(BTF);
+ if (!MaybeExtractor)
+ return MaybeExtractor.takeError();
+
+ DataExtractor &Extractor = MaybeExtractor.get();
+ DataExtractor::Cursor C = DataExtractor::Cursor(0);
+ uint16_t Magic = Extractor.getU16(C);
+ if (!C)
+ return Err(".BTF", C);
+ if (Magic != BTF::MAGIC)
+ return Err("invalid .BTF magic: ").write_hex(Magic);
+ uint8_t Version = Extractor.getU8(C);
+ if (!C)
+ return Err(".BTF", C);
+ if (Version != 1)
+ return Err("unsupported .BTF version: ") << (unsigned)Version;
+ (void)Extractor.getU8(C); // flags
+ uint32_t HdrLen = Extractor.getU32(C);
+ if (!C)
+ return Err(".BTF", C);
+ if (HdrLen < 8)
+ return Err("unexpected .BTF header length: ") << HdrLen;
+ (void)Extractor.getU32(C); // type_off
+ (void)Extractor.getU32(C); // type_len
+ uint32_t StrOff = Extractor.getU32(C);
+ uint32_t StrLen = Extractor.getU32(C);
+ uint32_t StrStart = HdrLen + StrOff;
+ uint32_t StrEnd = StrStart + StrLen;
+ if (!C)
+ return Err(".BTF", C);
+ if (Extractor.getData().size() < StrEnd)
+ return Err("invalid .BTF section size, expecting at-least ")
+ << StrEnd << " bytes";
+
+ StringsTable = Extractor.getData().substr(StrStart, StrLen);
+ return Error::success();
+}
+
+Error BTFParser::parseBTFExt(ParseContext &Ctx, SectionRef BTFExt) {
+ Expected<DataExtractor> MaybeExtractor = Ctx.makeExtractor(BTFExt);
+ if (!MaybeExtractor)
+ return MaybeExtractor.takeError();
+
+ DataExtractor &Extractor = MaybeExtractor.get();
+ DataExtractor::Cursor C = DataExtractor::Cursor(0);
+ uint16_t Magic = Extractor.getU16(C);
+ if (!C)
+ return Err(".BTF.ext", C);
+ if (Magic != BTF::MAGIC)
+ return Err("invalid .BTF.ext magic: ").write_hex(Magic);
+ uint8_t Version = Extractor.getU8(C);
+ if (!C)
+ return Err(".BTF", C);
+ if (Version != 1)
+ return Err("unsupported .BTF.ext version: ") << (unsigned)Version;
+ (void)Extractor.getU8(C); // flags
+ uint32_t HdrLen = Extractor.getU32(C);
+ if (!C)
+ return Err(".BTF.ext", C);
+ if (HdrLen < 8)
+ return Err("unexpected .BTF.ext header length: ") << HdrLen;
+ (void)Extractor.getU32(C); // func_info_off
+ (void)Extractor.getU32(C); // func_info_len
+ uint32_t LineInfoOff = Extractor.getU32(C);
+ uint32_t LineInfoLen = Extractor.getU32(C);
+ if (!C)
+ return Err(".BTF.ext", C);
+ uint32_t LineInfoStart = HdrLen + LineInfoOff;
+ uint32_t LineInfoEnd = LineInfoStart + LineInfoLen;
+ if (Error E = parseLineInfo(Ctx, Extractor, LineInfoStart, LineInfoEnd))
+ return E;
+
+ return Error::success();
+}
+
+Error BTFParser::parseLineInfo(ParseContext &Ctx, DataExtractor &Extractor,
+ uint64_t LineInfoStart, uint64_t LineInfoEnd) {
+ DataExtractor::Cursor C = DataExtractor::Cursor(LineInfoStart);
+ uint32_t RecSize = Extractor.getU32(C);
+ if (!C)
+ return Err(".BTF.ext", C);
+ if (RecSize < 16)
+ return Err("unexpected .BTF.ext line info record length: ") << RecSize;
+
+ while (C && C.tell() < LineInfoEnd) {
+ uint32_t SecNameOff = Extractor.getU32(C);
+ uint32_t NumInfo = Extractor.getU32(C);
+ StringRef SecName = findString(SecNameOff);
+ std::optional<SectionRef> Sec = Ctx.findSection(SecName);
+ if (!C)
+ return Err(".BTF.ext", C);
+ if (!Sec)
+ return Err("") << "can't find section '" << SecName
+ << "' while parsing .BTF.ext line info";
+ BTFLinesVector &Lines = SectionLines[Sec->getIndex()];
+ for (uint32_t I = 0; C && I < NumInfo; ++I) {
+ uint64_t RecStart = C.tell();
+ uint32_t InsnOff = Extractor.getU32(C);
+ uint32_t FileNameOff = Extractor.getU32(C);
+ uint32_t LineOff = Extractor.getU32(C);
+ uint32_t LineCol = Extractor.getU32(C);
+ if (!C)
+ return Err(".BTF.ext", C);
+ Lines.push_back({InsnOff, FileNameOff, LineOff, LineCol});
+ C.seek(RecStart + RecSize);
+ }
+ llvm::stable_sort(Lines,
+ [](const BTF::BPFLineInfo &L, const BTF::BPFLineInfo &R) {
+ return L.InsnOffset < R.InsnOffset;
+ });
+ }
+ if (!C)
+ return Err(".BTF.ext", C);
+
+ return Error::success();
+}
+
+Error BTFParser::parse(const ObjectFile &Obj) {
+ StringsTable = StringRef();
+ SectionLines.clear();
+
+ ParseContext Ctx(Obj);
+ std::optional<SectionRef> BTF;
+ std::optional<SectionRef> BTFExt;
+ for (SectionRef Sec : Obj.sections()) {
+ Expected<StringRef> MaybeName = Sec.getName();
+ if (!MaybeName)
+ return Err("error while reading section name: ") << MaybeName.takeError();
+ Ctx.Sections[*MaybeName] = Sec;
+ if (*MaybeName == BTFSectionName)
+ BTF = Sec;
+ if (*MaybeName == BTFExtSectionName)
+ BTFExt = Sec;
+ }
+ if (!BTF)
+ return Err("can't find .BTF section");
+ if (!BTFExt)
+ return Err("can't find .BTF.ext section");
+ if (Error E = parseBTF(Ctx, *BTF))
+ return E;
+ if (Error E = parseBTFExt(Ctx, *BTFExt))
+ return E;
+
+ return Error::success();
+}
+
+bool BTFParser::hasBTFSections(const ObjectFile &Obj) {
+ bool HasBTF = false;
+ bool HasBTFExt = false;
+ for (SectionRef Sec : Obj.sections()) {
+ Expected<StringRef> Name = Sec.getName();
+ if (Error E = Name.takeError()) {
+ logAllUnhandledErrors(std::move(E), errs());
+ continue;
+ }
+ HasBTF |= *Name == BTFSectionName;
+ HasBTFExt |= *Name == BTFExtSectionName;
+ if (HasBTF && HasBTFExt)
+ return true;
+ }
+ return false;
+}
+
+StringRef BTFParser::findString(uint32_t Offset) const {
+ return StringsTable.slice(Offset, StringsTable.find(0, Offset));
+}
+
+const BTF::BPFLineInfo *
+BTFParser::findLineInfo(SectionedAddress Address) const {
+ auto MaybeSecInfo = SectionLines.find(Address.SectionIndex);
+ if (MaybeSecInfo == SectionLines.end())
+ return nullptr;
+
+ const BTFLinesVector &SecInfo = MaybeSecInfo->second;
+ const uint64_t TargetOffset = Address.Address;
+ BTFLinesVector::const_iterator LineInfo =
+ llvm::partition_point(SecInfo, [=](const BTF::BPFLineInfo &Line) {
+ return Line.InsnOffset < TargetOffset;
+ });
+ if (LineInfo == SecInfo.end() || LineInfo->InsnOffset != Address.Address)
+ return nullptr;
+
+ return LineInfo;
+}
diff --git a/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
index aea672976017..0d0a357dce68 100644
--- a/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
+++ b/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/GUID.h"
#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
diff --git a/llvm/lib/DebugInfo/CodeView/EnumTables.cpp b/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
index 78a258600696..b2f0099bd01c 100644
--- a/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -105,6 +105,7 @@ static const EnumEntry<codeview::SourceLanguage> SourceLanguages[] = {
CV_ENUM_ENT(SourceLanguage, JScript), CV_ENUM_ENT(SourceLanguage, MSIL),
CV_ENUM_ENT(SourceLanguage, HLSL), CV_ENUM_ENT(SourceLanguage, D),
CV_ENUM_ENT(SourceLanguage, Swift), CV_ENUM_ENT(SourceLanguage, Rust),
+ CV_ENUM_ENT(SourceLanguage, ObjC), CV_ENUM_ENT(SourceLanguage, ObjCpp),
};
static const EnumEntry<uint32_t> CompileSym2FlagNames[] = {
diff --git a/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp b/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp
index e44dec6d6396..046b660abfab 100644
--- a/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp
+++ b/llvm/lib/DebugInfo/CodeView/TypeRecordHelpers.cpp
@@ -144,15 +144,15 @@ uint64_t llvm::codeview::getSizeInBytesForTypeIndex(TypeIndex TI) {
// Complex float.
case SimpleTypeKind::Complex16:
- return 2;
- case SimpleTypeKind::Complex32:
return 4;
- case SimpleTypeKind::Complex64:
+ case SimpleTypeKind::Complex32:
return 8;
+ case SimpleTypeKind::Complex64:
+ return 16;
case SimpleTypeKind::Complex80:
- return 10;
+ return 20;
case SimpleTypeKind::Complex128:
- return 16;
+ return 32;
default:
return 0;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index 5b5b887e2a50..ecdbd004efad 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -34,22 +34,32 @@ DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() {
clear();
}
-bool
-DWARFAbbreviationDeclaration::extract(DataExtractor Data,
- uint64_t* OffsetPtr) {
+llvm::Expected<DWARFAbbreviationDeclaration::ExtractState>
+DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint64_t *OffsetPtr) {
clear();
const uint64_t Offset = *OffsetPtr;
- Code = Data.getULEB128(OffsetPtr);
- if (Code == 0) {
- return false;
- }
+ Error Err = Error::success();
+ Code = Data.getULEB128(OffsetPtr, &Err);
+ if (Err)
+ return std::move(Err);
+
+ if (Code == 0)
+ return ExtractState::Complete;
+
CodeByteSize = *OffsetPtr - Offset;
- Tag = static_cast<llvm::dwarf::Tag>(Data.getULEB128(OffsetPtr));
+ Tag = static_cast<llvm::dwarf::Tag>(Data.getULEB128(OffsetPtr, &Err));
+ if (Err)
+ return std::move(Err);
+
if (Tag == DW_TAG_null) {
clear();
- return false;
+ return make_error<llvm::object::GenericBinaryError>(
+ "abbreviation declaration requires a non-null tag");
}
- uint8_t ChildrenByte = Data.getU8(OffsetPtr);
+ uint8_t ChildrenByte = Data.getU8(OffsetPtr, &Err);
+ if (Err)
+ return std::move(Err);
+
HasChildren = (ChildrenByte == DW_CHILDREN_yes);
// Assign a value to our optional FixedAttributeSize member variable. If
// this member variable still has a value after the while loop below, then
@@ -57,70 +67,82 @@ DWARFAbbreviationDeclaration::extract(DataExtractor Data,
FixedAttributeSize = FixedSizeInfo();
// Read all of the abbreviation attributes and forms.
- while (true) {
- auto A = static_cast<Attribute>(Data.getULEB128(OffsetPtr));
- auto F = static_cast<Form>(Data.getULEB128(OffsetPtr));
- if (A && F) {
- bool IsImplicitConst = (F == DW_FORM_implicit_const);
- if (IsImplicitConst) {
- int64_t V = Data.getSLEB128(OffsetPtr);
- AttributeSpecs.push_back(AttributeSpec(A, F, V));
- continue;
- }
- std::optional<uint8_t> ByteSize;
- // If this abbrevation still has a fixed byte size, then update the
- // FixedAttributeSize as needed.
- switch (F) {
- case DW_FORM_addr:
- if (FixedAttributeSize)
- ++FixedAttributeSize->NumAddrs;
- break;
+ while (Data.isValidOffset(*OffsetPtr)) {
+ auto A = static_cast<Attribute>(Data.getULEB128(OffsetPtr, &Err));
+ if (Err)
+ return std::move(Err);
+
+ auto F = static_cast<Form>(Data.getULEB128(OffsetPtr, &Err));
+ if (Err)
+ return std::move(Err);
+
+ // We successfully reached the end of this abbreviation declaration
+ // since both attribute and form are zero. There may be more abbreviation
+ // declarations afterwards.
+ if (!A && !F)
+ return ExtractState::MoreItems;
+
+ if (!A || !F) {
+ // Attribute and form pairs must either both be non-zero, in which case
+ // they are added to the abbreviation declaration, or both be zero to
+ // terminate the abbrevation declaration. In this case only one was
+ // zero which is an error.
+ clear();
+ return make_error<llvm::object::GenericBinaryError>(
+ "malformed abbreviation declaration attribute. Either the attribute "
+ "or the form is zero while the other is not");
+ }
- case DW_FORM_ref_addr:
- if (FixedAttributeSize)
- ++FixedAttributeSize->NumRefAddrs;
- break;
+ bool IsImplicitConst = (F == DW_FORM_implicit_const);
+ if (IsImplicitConst) {
+ int64_t V = Data.getSLEB128(OffsetPtr);
+ AttributeSpecs.push_back(AttributeSpec(A, F, V));
+ continue;
+ }
+ std::optional<uint8_t> ByteSize;
+ // If this abbrevation still has a fixed byte size, then update the
+ // FixedAttributeSize as needed.
+ switch (F) {
+ case DW_FORM_addr:
+ if (FixedAttributeSize)
+ ++FixedAttributeSize->NumAddrs;
+ break;
- case DW_FORM_strp:
- case DW_FORM_GNU_ref_alt:
- case DW_FORM_GNU_strp_alt:
- case DW_FORM_line_strp:
- case DW_FORM_sec_offset:
- case DW_FORM_strp_sup:
- if (FixedAttributeSize)
- ++FixedAttributeSize->NumDwarfOffsets;
- break;
+ case DW_FORM_ref_addr:
+ if (FixedAttributeSize)
+ ++FixedAttributeSize->NumRefAddrs;
+ break;
+
+ case DW_FORM_strp:
+ case DW_FORM_GNU_ref_alt:
+ case DW_FORM_GNU_strp_alt:
+ case DW_FORM_line_strp:
+ case DW_FORM_sec_offset:
+ case DW_FORM_strp_sup:
+ if (FixedAttributeSize)
+ ++FixedAttributeSize->NumDwarfOffsets;
+ break;
- default:
- // The form has a byte size that doesn't depend on Params.
- // If it's a fixed size, keep track of it.
- if ((ByteSize = dwarf::getFixedFormByteSize(F, dwarf::FormParams()))) {
- if (FixedAttributeSize)
- FixedAttributeSize->NumBytes += *ByteSize;
- break;
- }
- // Indicate we no longer have a fixed byte size for this
- // abbreviation by clearing the FixedAttributeSize optional value
- // so it doesn't have a value.
- FixedAttributeSize.reset();
+ default:
+ // The form has a byte size that doesn't depend on Params.
+ // If it's a fixed size, keep track of it.
+ if ((ByteSize = dwarf::getFixedFormByteSize(F, dwarf::FormParams()))) {
+ if (FixedAttributeSize)
+ FixedAttributeSize->NumBytes += *ByteSize;
break;
}
- // Record this attribute and its fixed size if it has one.
- AttributeSpecs.push_back(AttributeSpec(A, F, ByteSize));
- } else if (A == 0 && F == 0) {
- // We successfully reached the end of this abbreviation declaration
- // since both attribute and form are zero.
+ // Indicate we no longer have a fixed byte size for this
+ // abbreviation by clearing the FixedAttributeSize optional value
+ // so it doesn't have a value.
+ FixedAttributeSize.reset();
break;
- } else {
- // Attribute and form pairs must either both be non-zero, in which case
- // they are added to the abbreviation declaration, or both be zero to
- // terminate the abbrevation declaration. In this case only one was
- // zero which is an error.
- clear();
- return false;
}
+ // Record this attribute and its fixed size if it has one.
+ AttributeSpecs.push_back(AttributeSpec(A, F, ByteSize));
}
- return true;
+ return make_error<llvm::object::GenericBinaryError>(
+ "abbreviation declaration attribute list was not terminated with a null "
+ "entry");
}
void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 889d3f0915b0..14962cd36c23 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -54,13 +54,11 @@ Error AppleAcceleratorTable::extract() {
Hdr.BucketCount = AccelSection.getU32(&Offset);
Hdr.HashCount = AccelSection.getU32(&Offset);
Hdr.HeaderDataLength = AccelSection.getU32(&Offset);
+ FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32};
// Check that we can read all the hashes and offsets from the
// section (see SourceLevelDebugging.rst for the structure of the index).
- // We need to substract one because we're checking for an *offset* which is
- // equal to the size for an empty table and hence pointer after the section.
- if (!AccelSection.isValidOffset(sizeof(Hdr) + Hdr.HeaderDataLength +
- Hdr.BucketCount * 4 + Hdr.HashCount * 8 - 1))
+ if (!AccelSection.isValidOffset(getIthBucketBase(Hdr.BucketCount - 1)))
return createStringError(
errc::illegal_byte_sequence,
"Section too small: cannot read buckets and hashes.");
@@ -68,20 +66,35 @@ Error AppleAcceleratorTable::extract() {
HdrData.DIEOffsetBase = AccelSection.getU32(&Offset);
uint32_t NumAtoms = AccelSection.getU32(&Offset);
+ HashDataEntryLength = 0;
+ auto MakeUnsupportedFormError = [](dwarf::Form Form) {
+ return createStringError(errc::not_supported,
+ "Unsupported form:" +
+ dwarf::FormEncodingString(Form));
+ };
+
for (unsigned i = 0; i < NumAtoms; ++i) {
uint16_t AtomType = AccelSection.getU16(&Offset);
auto AtomForm = static_cast<dwarf::Form>(AccelSection.getU16(&Offset));
HdrData.Atoms.push_back(std::make_pair(AtomType, AtomForm));
+
+ std::optional<uint8_t> FormSize =
+ dwarf::getFixedFormByteSize(AtomForm, FormParams);
+ if (!FormSize)
+ return MakeUnsupportedFormError(AtomForm);
+ HashDataEntryLength += *FormSize;
}
IsValid = true;
return Error::success();
}
-uint32_t AppleAcceleratorTable::getNumBuckets() { return Hdr.BucketCount; }
-uint32_t AppleAcceleratorTable::getNumHashes() { return Hdr.HashCount; }
-uint32_t AppleAcceleratorTable::getSizeHdr() { return sizeof(Hdr); }
-uint32_t AppleAcceleratorTable::getHeaderDataLength() {
+uint32_t AppleAcceleratorTable::getNumBuckets() const {
+ return Hdr.BucketCount;
+}
+uint32_t AppleAcceleratorTable::getNumHashes() const { return Hdr.HashCount; }
+uint32_t AppleAcceleratorTable::getSizeHdr() const { return sizeof(Hdr); }
+uint32_t AppleAcceleratorTable::getHeaderDataLength() const {
return Hdr.HeaderDataLength;
}
@@ -114,7 +127,6 @@ std::pair<uint64_t, dwarf::Tag>
AppleAcceleratorTable::readAtoms(uint64_t *HashDataOffset) {
uint64_t DieOffset = dwarf::DW_INVALID_OFFSET;
dwarf::Tag DieTag = dwarf::DW_TAG_null;
- dwarf::FormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32};
for (auto Atom : getAtomsDesc()) {
DWARFFormValue FormValue(Atom.second);
@@ -163,7 +175,6 @@ std::optional<uint64_t> AppleAcceleratorTable::HeaderData::extractOffset(
bool AppleAcceleratorTable::dumpName(ScopedPrinter &W,
SmallVectorImpl<DWARFFormValue> &AtomForms,
uint64_t *DataOffset) const {
- dwarf::FormParams FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32};
uint64_t NameOffset = *DataOffset;
if (!AccelSection.isValidOffsetForDataOfSize(*DataOffset, 4)) {
W.printString("Incorrectly terminated list.");
@@ -209,6 +220,7 @@ LLVM_DUMP_METHOD void AppleAcceleratorTable::dump(raw_ostream &OS) const {
W.printNumber("DIE offset base", HdrData.DIEOffsetBase);
W.printNumber("Number of atoms", uint64_t(HdrData.Atoms.size()));
+ W.printNumber("Size of each hash data entry", getHashDataEntryLength());
SmallVector<DWARFFormValue, 3> AtomForms;
{
ListScope AtomsScope(W, "Atoms");
@@ -255,41 +267,33 @@ LLVM_DUMP_METHOD void AppleAcceleratorTable::dump(raw_ostream &OS) const {
}
}
-AppleAcceleratorTable::Entry::Entry(
- const AppleAcceleratorTable::HeaderData &HdrData)
- : HdrData(&HdrData) {
- Values.reserve(HdrData.Atoms.size());
- for (const auto &Atom : HdrData.Atoms)
+AppleAcceleratorTable::Entry::Entry(const AppleAcceleratorTable &Table)
+ : Table(Table) {
+ Values.reserve(Table.HdrData.Atoms.size());
+ for (const auto &Atom : Table.HdrData.Atoms)
Values.push_back(DWARFFormValue(Atom.second));
}
-void AppleAcceleratorTable::Entry::extract(
- const AppleAcceleratorTable &AccelTable, uint64_t *Offset) {
-
- dwarf::FormParams FormParams = {AccelTable.Hdr.Version, 0,
- dwarf::DwarfFormat::DWARF32};
- for (auto &Atom : Values)
- Atom.extractValue(AccelTable.AccelSection, Offset, FormParams);
+void AppleAcceleratorTable::Entry::extract(uint64_t *Offset) {
+ for (auto &FormValue : Values)
+ FormValue.extractValue(Table.AccelSection, Offset, Table.FormParams);
}
std::optional<DWARFFormValue>
-AppleAcceleratorTable::Entry::lookup(HeaderData::AtomType Atom) const {
- assert(HdrData && "Dereferencing end iterator?");
- assert(HdrData->Atoms.size() == Values.size());
- for (auto Tuple : zip_first(HdrData->Atoms, Values)) {
- if (std::get<0>(Tuple).first == Atom)
- return std::get<1>(Tuple);
- }
+AppleAcceleratorTable::Entry::lookup(HeaderData::AtomType AtomToFind) const {
+ for (auto [Atom, FormValue] : zip_equal(Table.HdrData.Atoms, Values))
+ if (Atom.first == AtomToFind)
+ return FormValue;
return std::nullopt;
}
std::optional<uint64_t>
AppleAcceleratorTable::Entry::getDIESectionOffset() const {
- return HdrData->extractOffset(lookup(dwarf::DW_ATOM_die_offset));
+ return Table.HdrData.extractOffset(lookup(dwarf::DW_ATOM_die_offset));
}
std::optional<uint64_t> AppleAcceleratorTable::Entry::getCUOffset() const {
- return HdrData->extractOffset(lookup(dwarf::DW_ATOM_cu_offset));
+ return Table.HdrData.extractOffset(lookup(dwarf::DW_ATOM_cu_offset));
}
std::optional<dwarf::Tag> AppleAcceleratorTable::Entry::getTag() const {
@@ -301,65 +305,127 @@ std::optional<dwarf::Tag> AppleAcceleratorTable::Entry::getTag() const {
return std::nullopt;
}
-AppleAcceleratorTable::ValueIterator::ValueIterator(
- const AppleAcceleratorTable &AccelTable, uint64_t Offset)
- : AccelTable(&AccelTable), Current(AccelTable.HdrData), DataOffset(Offset) {
- if (!AccelTable.AccelSection.isValidOffsetForDataOfSize(DataOffset, 4))
+AppleAcceleratorTable::SameNameIterator::SameNameIterator(
+ const AppleAcceleratorTable &AccelTable, uint64_t DataOffset)
+ : Current(AccelTable), Offset(DataOffset) {}
+
+void AppleAcceleratorTable::Iterator::prepareNextEntryOrEnd() {
+ if (NumEntriesToCome == 0)
+ prepareNextStringOrEnd();
+ if (isEnd())
return;
+ uint64_t OffsetCopy = Offset;
+ Current.BaseEntry.extract(&OffsetCopy);
+ NumEntriesToCome--;
+ Offset += getTable().getHashDataEntryLength();
+}
+
+void AppleAcceleratorTable::Iterator::prepareNextStringOrEnd() {
+ std::optional<uint32_t> StrOffset = getTable().readStringOffsetAt(Offset);
+ if (!StrOffset)
+ return setToEnd();
+
+ // A zero denotes the end of the collision list. Read the next string
+ // again.
+ if (*StrOffset == 0)
+ return prepareNextStringOrEnd();
+ Current.StrOffset = *StrOffset;
- // Read the first entry.
- NumData = AccelTable.AccelSection.getU32(&DataOffset);
- Next();
+ std::optional<uint32_t> MaybeNumEntries = getTable().readU32FromAccel(Offset);
+ if (!MaybeNumEntries || *MaybeNumEntries == 0)
+ return setToEnd();
+ NumEntriesToCome = *MaybeNumEntries;
}
-void AppleAcceleratorTable::ValueIterator::Next() {
- assert(NumData > 0 && "attempted to increment iterator past the end");
- auto &AccelSection = AccelTable->AccelSection;
- if (Data >= NumData ||
- !AccelSection.isValidOffsetForDataOfSize(DataOffset, 4)) {
- NumData = 0;
- DataOffset = 0;
- return;
- }
- Current.extract(*AccelTable, &DataOffset);
- ++Data;
+AppleAcceleratorTable::Iterator::Iterator(const AppleAcceleratorTable &Table,
+ bool SetEnd)
+ : Current(Table), Offset(Table.getEntriesBase()), NumEntriesToCome(0) {
+ if (SetEnd)
+ setToEnd();
+ else
+ prepareNextEntryOrEnd();
}
-iterator_range<AppleAcceleratorTable::ValueIterator>
+iterator_range<AppleAcceleratorTable::SameNameIterator>
AppleAcceleratorTable::equal_range(StringRef Key) const {
+ const auto EmptyRange =
+ make_range(SameNameIterator(*this, 0), SameNameIterator(*this, 0));
if (!IsValid)
- return make_range(ValueIterator(), ValueIterator());
+ return EmptyRange;
// Find the bucket.
- unsigned HashValue = djbHash(Key);
- unsigned Bucket = HashValue % Hdr.BucketCount;
- uint64_t BucketBase = sizeof(Hdr) + Hdr.HeaderDataLength;
- uint64_t HashesBase = BucketBase + Hdr.BucketCount * 4;
- uint64_t OffsetsBase = HashesBase + Hdr.HashCount * 4;
-
- uint64_t BucketOffset = BucketBase + Bucket * 4;
- unsigned Index = AccelSection.getU32(&BucketOffset);
+ uint32_t SearchHash = djbHash(Key);
+ uint32_t BucketIdx = hashToBucketIdx(SearchHash);
+ std::optional<uint32_t> HashIdx = idxOfHashInBucket(SearchHash, BucketIdx);
+ if (!HashIdx)
+ return EmptyRange;
+
+ std::optional<uint64_t> MaybeDataOffset = readIthOffset(*HashIdx);
+ if (!MaybeDataOffset)
+ return EmptyRange;
+
+ uint64_t DataOffset = *MaybeDataOffset;
+ if (DataOffset >= AccelSection.size())
+ return EmptyRange;
+
+ std::optional<uint32_t> StrOffset = readStringOffsetAt(DataOffset);
+ // Valid input and still have strings in this hash.
+ while (StrOffset && *StrOffset) {
+ std::optional<StringRef> MaybeStr = readStringFromStrSection(*StrOffset);
+ std::optional<uint32_t> NumEntries = this->readU32FromAccel(DataOffset);
+ if (!MaybeStr || !NumEntries)
+ return EmptyRange;
+ uint64_t EndOffset = DataOffset + *NumEntries * getHashDataEntryLength();
+ if (Key == *MaybeStr)
+ return make_range({*this, DataOffset},
+ SameNameIterator{*this, EndOffset});
+ DataOffset = EndOffset;
+ StrOffset = readStringOffsetAt(DataOffset);
+ }
- // Search through all hashes in the bucket.
- for (unsigned HashIdx = Index; HashIdx < Hdr.HashCount; ++HashIdx) {
- uint64_t HashOffset = HashesBase + HashIdx * 4;
- uint64_t OffsetsOffset = OffsetsBase + HashIdx * 4;
- uint32_t Hash = AccelSection.getU32(&HashOffset);
+ return EmptyRange;
+}
- if (Hash % Hdr.BucketCount != Bucket)
- // We are already in the next bucket.
- break;
+std::optional<uint32_t>
+AppleAcceleratorTable::idxOfHashInBucket(uint32_t HashToFind,
+ uint32_t BucketIdx) const {
+ std::optional<uint32_t> HashStartIdx = readIthBucket(BucketIdx);
+ if (!HashStartIdx)
+ return std::nullopt;
- uint64_t DataOffset = AccelSection.getU32(&OffsetsOffset);
- uint64_t StringOffset = AccelSection.getRelocatedValue(4, &DataOffset);
- if (!StringOffset)
+ for (uint32_t HashIdx = *HashStartIdx; HashIdx < getNumHashes(); HashIdx++) {
+ std::optional<uint32_t> MaybeHash = readIthHash(HashIdx);
+ if (!MaybeHash || !wouldHashBeInBucket(*MaybeHash, BucketIdx))
break;
+ if (*MaybeHash == HashToFind)
+ return HashIdx;
+ }
+ return std::nullopt;
+}
- // Finally, compare the key.
- if (Key == StringSection.getCStr(&StringOffset))
- return make_range({*this, DataOffset}, ValueIterator());
+std::optional<StringRef> AppleAcceleratorTable::readStringFromStrSection(
+ uint64_t StringSectionOffset) const {
+ Error E = Error::success();
+ StringRef Str = StringSection.getCStrRef(&StringSectionOffset, &E);
+ if (E) {
+ consumeError(std::move(E));
+ return std::nullopt;
+ }
+ return Str;
+}
+
+std::optional<uint32_t>
+AppleAcceleratorTable::readU32FromAccel(uint64_t &Offset,
+ bool UseRelocation) const {
+ Error E = Error::success();
+ uint32_t Data = UseRelocation
+ ? AccelSection.getRelocatedValue(4, &Offset, nullptr, &E)
+ : AccelSection.getU32(&Offset, &E);
+ if (E) {
+ consumeError(std::move(E));
+ return std::nullopt;
}
- return make_range(ValueIterator(), ValueIterator());
+ return Data;
}
void DWARFDebugNames::Header::dump(ScopedPrinter &W) const {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index dd86144d16e0..33168abbdc38 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -48,6 +48,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
@@ -775,11 +776,13 @@ bool DWARFContext::verify(raw_ostream &OS, DIDumpOptions DumpOpts) {
Success &= verifier.handleDebugInfo();
if (DumpOpts.DumpType & DIDT_DebugLine)
Success &= verifier.handleDebugLine();
+ if (DumpOpts.DumpType & DIDT_DebugStrOffsets)
+ Success &= verifier.handleDebugStrOffsets();
Success &= verifier.handleAccelTables();
return Success;
}
-void fixupIndex(const DWARFObject &DObj, DWARFContext &C,
+void fixupIndexV4(const DWARFObject &DObj, DWARFContext &C,
DWARFUnitIndex &Index) {
using EntryType = DWARFUnitIndex::Entry::SectionContribution;
using EntryMap = DenseMap<uint32_t, EntryType>;
@@ -843,8 +846,55 @@ void fixupIndex(const DWARFObject &DObj, DWARFContext &C,
Twine::utohexstr(CUOff.getOffset())),
errs());
}
+}
+
+void fixupIndexV5(const DWARFObject &DObj, DWARFContext &C,
+ DWARFUnitIndex &Index) {
+ DenseMap<uint64_t, uint64_t> Map;
- return;
+ DObj.forEachInfoDWOSections([&](const DWARFSection &S) {
+ if (!(C.getParseCUTUIndexManually() ||
+ S.Data.size() >= std::numeric_limits<uint32_t>::max()))
+ return;
+ DWARFDataExtractor Data(DObj, S, C.isLittleEndian(), 0);
+ uint64_t Offset = 0;
+ while (Data.isValidOffset(Offset)) {
+ DWARFUnitHeader Header;
+ if (!Header.extract(C, Data, &Offset, DWARFSectionKind::DW_SECT_INFO)) {
+ logAllUnhandledErrors(
+ createError("Failed to parse unit header in DWP file"), errs());
+ break;
+ }
+ bool CU = Header.getUnitType() == DW_UT_split_compile;
+ uint64_t Sig = CU ? *Header.getDWOId() : Header.getTypeHash();
+ Map[Sig] = Header.getOffset();
+ Offset = Header.getNextUnitOffset();
+ }
+ });
+ if (Map.empty())
+ return;
+ for (DWARFUnitIndex::Entry &E : Index.getMutableRows()) {
+ if (!E.isValid())
+ continue;
+ DWARFUnitIndex::Entry::SectionContribution &CUOff = E.getContribution();
+ auto Iter = Map.find(E.getSignature());
+ if (Iter == Map.end()) {
+ logAllUnhandledErrors(
+ createError("Could not find unit with signature 0x" +
+ Twine::utohexstr(E.getSignature()) + " in the Map"),
+ errs());
+ break;
+ }
+ CUOff.setOffset(Iter->second);
+ }
+}
+
+void fixupIndex(const DWARFObject &DObj, DWARFContext &C,
+ DWARFUnitIndex &Index) {
+ if (Index.getVersion() < 5)
+ fixupIndexV4(DObj, C, Index);
+ else
+ fixupIndexV5(DObj, C, Index);
}
const DWARFUnitIndex &DWARFContext::getCUIndex() {
@@ -853,8 +903,9 @@ const DWARFUnitIndex &DWARFContext::getCUIndex() {
DataExtractor CUIndexData(DObj->getCUIndexSection(), isLittleEndian(), 0);
CUIndex = std::make_unique<DWARFUnitIndex>(DW_SECT_INFO);
- CUIndex->parse(CUIndexData);
- fixupIndex(*DObj, *this, *CUIndex.get());
+ bool IsParseSuccessful = CUIndex->parse(CUIndexData);
+ if (IsParseSuccessful)
+ fixupIndex(*DObj, *this, *CUIndex);
return *CUIndex;
}
@@ -868,7 +919,7 @@ const DWARFUnitIndex &DWARFContext::getTUIndex() {
// If we are parsing TU-index and for .debug_types section we don't need
// to do anything.
if (isParseSuccessful && TUIndex->getVersion() != 2)
- fixupIndex(*DObj, *this, *TUIndex.get());
+ fixupIndex(*DObj, *this, *TUIndex);
return *TUIndex;
}
@@ -887,9 +938,7 @@ const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
return Abbrev.get();
DataExtractor abbrData(DObj->getAbbrevSection(), isLittleEndian(), 0);
-
- Abbrev.reset(new DWARFDebugAbbrev());
- Abbrev->extract(abbrData);
+ Abbrev = std::make_unique<DWARFDebugAbbrev>(abbrData);
return Abbrev.get();
}
@@ -898,8 +947,7 @@ const DWARFDebugAbbrev *DWARFContext::getDebugAbbrevDWO() {
return AbbrevDWO.get();
DataExtractor abbrData(DObj->getAbbrevDWOSection(), isLittleEndian(), 0);
- AbbrevDWO.reset(new DWARFDebugAbbrev());
- AbbrevDWO->extract(abbrData);
+ AbbrevDWO = std::make_unique<DWARFDebugAbbrev>(abbrData);
return AbbrevDWO.get();
}
@@ -1118,14 +1166,17 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint64_t Offset) {
NormalUnits.getUnitForOffset(Offset));
}
-DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
- // First, get the offset of the compile unit.
+DWARFCompileUnit *DWARFContext::getCompileUnitForCodeAddress(uint64_t Address) {
+ uint64_t CUOffset = getDebugAranges()->findAddress(Address);
+ return getCompileUnitForOffset(CUOffset);
+}
+
+DWARFCompileUnit *DWARFContext::getCompileUnitForDataAddress(uint64_t Address) {
uint64_t CUOffset = getDebugAranges()->findAddress(Address);
- // Retrieve the compile unit.
if (DWARFCompileUnit *OffsetCU = getCompileUnitForOffset(CUOffset))
return OffsetCU;
- // Global variables are often not found by the above search, for one of two
+ // Global variables are often missed by the above search, for one of two
// reasons:
// 1. .debug_aranges may not include global variables. On clang, it seems we
// put the globals in the aranges, but this isn't true for gcc.
@@ -1146,7 +1197,7 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForAddress(uint64_t Address) {
DWARFContext::DIEsForAddress DWARFContext::getDIEsForAddress(uint64_t Address) {
DIEsForAddress Result;
- DWARFCompileUnit *CU = getCompileUnitForAddress(Address);
+ DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address);
if (!CU)
return Result;
@@ -1297,7 +1348,7 @@ void DWARFContext::addLocalsForDie(DWARFCompileUnit *CU, DWARFDie Subprogram,
std::vector<DILocal>
DWARFContext::getLocalsForAddress(object::SectionedAddress Address) {
std::vector<DILocal> Result;
- DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
+ DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address.Address);
if (!CU)
return Result;
@@ -1310,7 +1361,7 @@ DWARFContext::getLocalsForAddress(object::SectionedAddress Address) {
DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address,
DILineInfoSpecifier Spec) {
DILineInfo Result;
- DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
+ DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address.Address);
if (!CU)
return Result;
@@ -1331,7 +1382,7 @@ DILineInfo DWARFContext::getLineInfoForAddress(object::SectionedAddress Address,
DILineInfo
DWARFContext::getLineInfoForDataAddress(object::SectionedAddress Address) {
DILineInfo Result;
- DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
+ DWARFCompileUnit *CU = getCompileUnitForDataAddress(Address.Address);
if (!CU)
return Result;
@@ -1346,7 +1397,7 @@ DWARFContext::getLineInfoForDataAddress(object::SectionedAddress Address) {
DILineInfoTable DWARFContext::getLineInfoForAddressRange(
object::SectionedAddress Address, uint64_t Size, DILineInfoSpecifier Spec) {
DILineInfoTable Lines;
- DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
+ DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address.Address);
if (!CU)
return Lines;
@@ -1402,7 +1453,7 @@ DWARFContext::getInliningInfoForAddress(object::SectionedAddress Address,
DILineInfoSpecifier Spec) {
DIInliningInfo InliningInfo;
- DWARFCompileUnit *CU = getCompileUnitForAddress(Address.Address);
+ DWARFCompileUnit *CU = getCompileUnitForCodeAddress(Address.Address);
if (!CU)
return InliningInfo;
@@ -1805,13 +1856,9 @@ public:
continue;
}
- // Compressed sections names in GNU style starts from ".z",
- // at this point section is decompressed and we drop compression prefix.
- Name = Name.substr(
- Name.find_first_not_of("._z")); // Skip ".", "z" and "_" prefixes.
-
// Map platform specific debug section names to DWARF standard section
// names.
+ Name = Name.substr(Name.find_first_not_of("._"));
Name = Obj.mapDebugSectionName(Name);
if (StringRef *SectionData = mapSectionToMember(Name)) {
@@ -1836,10 +1883,6 @@ public:
S.Data = Data;
}
- if (RelocatedSection != Obj.section_end() && Name.contains(".dwo"))
- HandleWarning(
- createError("Unexpected relocations for dwo section " + Name));
-
if (RelocatedSection == Obj.section_end() ||
(RelocAction == DWARFContext::ProcessDebugRelocations::Ignore))
continue;
@@ -1865,11 +1908,15 @@ public:
if (!L && isa<MachOObjectFile>(&Obj))
continue;
- RelSecName = RelSecName.substr(
- RelSecName.find_first_not_of("._z")); // Skip . and _ prefixes.
+ if (!Section.relocations().empty() && Name.ends_with(".dwo") &&
+ RelSecName.startswith(".debug")) {
+ HandleWarning(createError("unexpected relocations for dwo section '" +
+ RelSecName + "'"));
+ }
// TODO: Add support for relocations in other sections as needed.
// Record relocations for the debug_info and debug_line sections.
+ RelSecName = RelSecName.substr(RelSecName.find_first_not_of("._"));
DWARFSectionMap *Sec = mapNameToDWARFSection(RelSecName);
RelocAddrMap *Map = Sec ? &Sec->Relocs : nullptr;
if (!Map) {
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
index 3ea3818e7cc3..3014e61f566a 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugAbbrev.cpp
@@ -25,26 +25,32 @@ void DWARFAbbreviationDeclarationSet::clear() {
Decls.clear();
}
-bool DWARFAbbreviationDeclarationSet::extract(DataExtractor Data,
- uint64_t *OffsetPtr) {
+Error DWARFAbbreviationDeclarationSet::extract(DataExtractor Data,
+ uint64_t *OffsetPtr) {
clear();
const uint64_t BeginOffset = *OffsetPtr;
Offset = BeginOffset;
DWARFAbbreviationDeclaration AbbrDecl;
uint32_t PrevAbbrCode = 0;
- while (AbbrDecl.extract(Data, OffsetPtr)) {
+ while (true) {
+ Expected<DWARFAbbreviationDeclaration::ExtractState> ES =
+ AbbrDecl.extract(Data, OffsetPtr);
+ if (!ES)
+ return ES.takeError();
+
+ if (*ES == DWARFAbbreviationDeclaration::ExtractState::Complete)
+ break;
+
if (FirstAbbrCode == 0) {
FirstAbbrCode = AbbrDecl.getCode();
- } else {
- if (PrevAbbrCode + 1 != AbbrDecl.getCode()) {
- // Codes are not consecutive, can't do O(1) lookups.
- FirstAbbrCode = UINT32_MAX;
- }
+ } else if (PrevAbbrCode + 1 != AbbrDecl.getCode()) {
+ // Codes are not consecutive, can't do O(1) lookups.
+ FirstAbbrCode = UINT32_MAX;
}
PrevAbbrCode = AbbrDecl.getCode();
Decls.push_back(std::move(AbbrDecl));
}
- return BeginOffset != *OffsetPtr;
+ return Error::success();
}
void DWARFAbbreviationDeclarationSet::dump(raw_ostream &OS) const {
@@ -96,17 +102,8 @@ std::string DWARFAbbreviationDeclarationSet::getCodeRange() const {
return Buffer;
}
-DWARFDebugAbbrev::DWARFDebugAbbrev() { clear(); }
-
-void DWARFDebugAbbrev::clear() {
- AbbrDeclSets.clear();
- PrevAbbrOffsetPos = AbbrDeclSets.end();
-}
-
-void DWARFDebugAbbrev::extract(DataExtractor Data) {
- clear();
- this->Data = Data;
-}
+DWARFDebugAbbrev::DWARFDebugAbbrev(DataExtractor Data)
+ : AbbrDeclSets(), PrevAbbrOffsetPos(AbbrDeclSets.end()), Data(Data) {}
void DWARFDebugAbbrev::parse() const {
if (!Data)
@@ -118,8 +115,11 @@ void DWARFDebugAbbrev::parse() const {
++I;
uint64_t CUAbbrOffset = Offset;
DWARFAbbreviationDeclarationSet AbbrDecls;
- if (!AbbrDecls.extract(*Data, &Offset))
+ if (Error Err = AbbrDecls.extract(*Data, &Offset)) {
+ // FIXME: We should propagate the error upwards.
+ consumeError(std::move(Err));
break;
+ }
AbbrDeclSets.insert(I, std::make_pair(CUAbbrOffset, std::move(AbbrDecls)));
}
Data = std::nullopt;
@@ -139,29 +139,30 @@ void DWARFDebugAbbrev::dump(raw_ostream &OS) const {
}
}
-const DWARFAbbreviationDeclarationSet*
+Expected<const DWARFAbbreviationDeclarationSet *>
DWARFDebugAbbrev::getAbbreviationDeclarationSet(uint64_t CUAbbrOffset) const {
const auto End = AbbrDeclSets.end();
if (PrevAbbrOffsetPos != End && PrevAbbrOffsetPos->first == CUAbbrOffset) {
- return &(PrevAbbrOffsetPos->second);
+ return &PrevAbbrOffsetPos->second;
}
const auto Pos = AbbrDeclSets.find(CUAbbrOffset);
if (Pos != End) {
PrevAbbrOffsetPos = Pos;
- return &(Pos->second);
+ return &Pos->second;
}
- if (Data && CUAbbrOffset < Data->getData().size()) {
- uint64_t Offset = CUAbbrOffset;
- DWARFAbbreviationDeclarationSet AbbrDecls;
- if (!AbbrDecls.extract(*Data, &Offset))
- return nullptr;
- PrevAbbrOffsetPos =
- AbbrDeclSets.insert(std::make_pair(CUAbbrOffset, std::move(AbbrDecls)))
- .first;
- return &PrevAbbrOffsetPos->second;
- }
+ if (!Data || CUAbbrOffset >= Data->getData().size())
+ return make_error<llvm::object::GenericBinaryError>(
+ "the abbreviation offset into the .debug_abbrev section is not valid");
+
+ uint64_t Offset = CUAbbrOffset;
+ DWARFAbbreviationDeclarationSet AbbrDecls;
+ if (Error Err = AbbrDecls.extract(*Data, &Offset))
+ return std::move(Err);
- return nullptr;
+ PrevAbbrOffsetPos =
+ AbbrDeclSets.insert(std::make_pair(CUAbbrOffset, std::move(AbbrDecls)))
+ .first;
+ return &PrevAbbrOffsetPos->second;
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 0725bd7744ae..6f2afe5d50e9 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -477,6 +477,7 @@ void DWARFDebugLine::Row::reset(bool DefaultIsStmt) {
Isa = 0;
Discriminator = 0;
IsStmt = DefaultIsStmt;
+ OpIndex = 0;
BasicBlock = false;
EndSequence = false;
PrologueEnd = false;
@@ -485,15 +486,16 @@ void DWARFDebugLine::Row::reset(bool DefaultIsStmt) {
void DWARFDebugLine::Row::dumpTableHeader(raw_ostream &OS, unsigned Indent) {
OS.indent(Indent)
- << "Address Line Column File ISA Discriminator Flags\n";
+ << "Address Line Column File ISA Discriminator OpIndex "
+ "Flags\n";
OS.indent(Indent)
- << "------------------ ------ ------ ------ --- ------------- "
+ << "------------------ ------ ------ ------ --- ------------- ------- "
"-------------\n";
}
void DWARFDebugLine::Row::dump(raw_ostream &OS) const {
OS << format("0x%16.16" PRIx64 " %6u %6u", Address.Address, Line, Column)
- << format(" %6u %3u %13u ", File, Isa, Discriminator)
+ << format(" %6u %3u %13u %7u ", File, Isa, Discriminator, OpIndex)
<< (IsStmt ? " is_stmt" : "") << (BasicBlock ? " basic_block" : "")
<< (PrologueEnd ? " prologue_end" : "")
<< (EpilogueBegin ? " epilogue_begin" : "")
@@ -608,21 +610,36 @@ static StringRef getOpcodeName(uint8_t Opcode, uint8_t OpcodeBase) {
return "special";
}
-uint64_t DWARFDebugLine::ParsingState::advanceAddr(uint64_t OperationAdvance,
- uint8_t Opcode,
- uint64_t OpcodeOffset) {
+DWARFDebugLine::ParsingState::AddrOpIndexDelta
+DWARFDebugLine::ParsingState::advanceAddrOpIndex(uint64_t OperationAdvance,
+ uint8_t Opcode,
+ uint64_t OpcodeOffset) {
StringRef OpcodeName = getOpcodeName(Opcode, LineTable->Prologue.OpcodeBase);
// For versions less than 4, the MaxOpsPerInst member is set to 0, as the
// maximum_operations_per_instruction field wasn't introduced until DWARFv4.
// Don't warn about bad values in this situation.
if (ReportAdvanceAddrProblem && LineTable->Prologue.getVersion() >= 4 &&
- LineTable->Prologue.MaxOpsPerInst != 1)
+ LineTable->Prologue.MaxOpsPerInst == 0)
+ ErrorHandler(createStringError(
+ errc::invalid_argument,
+ "line table program at offset 0x%8.8" PRIx64
+ " contains a %s opcode at offset 0x%8.8" PRIx64
+ ", but the prologue maximum_operations_per_instruction value is 0"
+ ", which is invalid. Assuming a value of 1 instead",
+ LineTableOffset, OpcodeName.data(), OpcodeOffset));
+ // Although we are able to correctly parse line number programs with
+ // MaxOpsPerInst > 1, the rest of DWARFDebugLine and its
+ // users have not been updated to handle line information for all operations
+ // in a multi-operation instruction, so warn about potentially incorrect
+ // results.
+ if (ReportAdvanceAddrProblem && LineTable->Prologue.MaxOpsPerInst > 1)
ErrorHandler(createStringError(
errc::not_supported,
"line table program at offset 0x%8.8" PRIx64
" contains a %s opcode at offset 0x%8.8" PRIx64
", but the prologue maximum_operations_per_instruction value is %" PRId8
- ", which is unsupported. Assuming a value of 1 instead",
+ ", which is experimentally supported, so line number information "
+ "may be incorrect",
LineTableOffset, OpcodeName.data(), OpcodeOffset,
LineTable->Prologue.MaxOpsPerInst));
if (ReportAdvanceAddrProblem && LineTable->Prologue.MinInstLength == 0)
@@ -634,14 +651,35 @@ uint64_t DWARFDebugLine::ParsingState::advanceAddr(uint64_t OperationAdvance,
"is 0, which prevents any address advancing",
LineTableOffset, OpcodeName.data(), OpcodeOffset));
ReportAdvanceAddrProblem = false;
- uint64_t AddrOffset = OperationAdvance * LineTable->Prologue.MinInstLength;
+
+ // Advances the address and op_index according to DWARFv5, section 6.2.5.1:
+ //
+ // new address = address +
+ // minimum_instruction_length *
+ // ((op_index + operation advance) / maximum_operations_per_instruction)
+ //
+ // new op_index =
+ // (op_index + operation advance) % maximum_operations_per_instruction
+
+ // For versions less than 4, the MaxOpsPerInst member is set to 0, as the
+ // maximum_operations_per_instruction field wasn't introduced until DWARFv4.
+ uint8_t MaxOpsPerInst =
+ std::max(LineTable->Prologue.MaxOpsPerInst, uint8_t{1});
+
+ uint64_t AddrOffset = ((Row.OpIndex + OperationAdvance) / MaxOpsPerInst) *
+ LineTable->Prologue.MinInstLength;
Row.Address.Address += AddrOffset;
- return AddrOffset;
+
+ uint8_t PrevOpIndex = Row.OpIndex;
+ Row.OpIndex = (Row.OpIndex + OperationAdvance) % MaxOpsPerInst;
+ int16_t OpIndexDelta = static_cast<int16_t>(Row.OpIndex) - PrevOpIndex;
+
+ return {AddrOffset, OpIndexDelta};
}
-DWARFDebugLine::ParsingState::AddrAndAdjustedOpcode
-DWARFDebugLine::ParsingState::advanceAddrForOpcode(uint8_t Opcode,
- uint64_t OpcodeOffset) {
+DWARFDebugLine::ParsingState::OpcodeAdvanceResults
+DWARFDebugLine::ParsingState::advanceForOpcode(uint8_t Opcode,
+ uint64_t OpcodeOffset) {
assert(Opcode == DW_LNS_const_add_pc ||
Opcode >= LineTable->Prologue.OpcodeBase);
if (ReportBadLineRange && LineTable->Prologue.LineRange == 0) {
@@ -665,11 +703,12 @@ DWARFDebugLine::ParsingState::advanceAddrForOpcode(uint8_t Opcode,
LineTable->Prologue.LineRange != 0
? AdjustedOpcode / LineTable->Prologue.LineRange
: 0;
- uint64_t AddrOffset = advanceAddr(OperationAdvance, Opcode, OpcodeOffset);
- return {AddrOffset, AdjustedOpcode};
+ AddrOpIndexDelta Advance =
+ advanceAddrOpIndex(OperationAdvance, Opcode, OpcodeOffset);
+ return {Advance.AddrOffset, Advance.OpIndexDelta, AdjustedOpcode};
}
-DWARFDebugLine::ParsingState::AddrAndLineDelta
+DWARFDebugLine::ParsingState::SpecialOpcodeDelta
DWARFDebugLine::ParsingState::handleSpecialOpcode(uint8_t Opcode,
uint64_t OpcodeOffset) {
// A special opcode value is chosen based on the amount that needs
@@ -703,15 +742,16 @@ DWARFDebugLine::ParsingState::handleSpecialOpcode(uint8_t Opcode,
//
// line increment = line_base + (adjusted opcode % line_range)
- DWARFDebugLine::ParsingState::AddrAndAdjustedOpcode AddrAdvanceResult =
- advanceAddrForOpcode(Opcode, OpcodeOffset);
+ DWARFDebugLine::ParsingState::OpcodeAdvanceResults AddrAdvanceResult =
+ advanceForOpcode(Opcode, OpcodeOffset);
int32_t LineOffset = 0;
if (LineTable->Prologue.LineRange != 0)
LineOffset =
LineTable->Prologue.LineBase +
(AddrAdvanceResult.AdjustedOpcode % LineTable->Prologue.LineRange);
Row.Line += LineOffset;
- return {AddrAdvanceResult.AddrDelta, LineOffset};
+ return {AddrAdvanceResult.AddrDelta, LineOffset,
+ AddrAdvanceResult.OpIndexDelta};
}
/// Parse a ULEB128 using the specified \p Cursor. \returns the parsed value on
@@ -858,9 +898,10 @@ Error DWARFDebugLine::LineTable::parse(
// Takes a single relocatable address as an operand. The size of the
// operand is the size appropriate to hold an address on the target
// machine. Set the address register to the value given by the
- // relocatable address. All of the other statement program opcodes
- // that affect the address register add a delta to it. This instruction
- // stores a relocatable value into it instead.
+ // relocatable address and set the op_index register to 0. All of the
+ // other statement program opcodes that affect the address register
+ // add a delta to it. This instruction stores a relocatable value into
+ // it instead.
//
// Make sure the extractor knows the address size. If not, infer it
// from the size of the operand.
@@ -891,6 +932,7 @@ Error DWARFDebugLine::LineTable::parse(
TableData.setAddressSize(OpcodeAddressSize);
State.Row.Address.Address = TableData.getRelocatedAddress(
Cursor, &State.Row.Address.SectionIndex);
+ State.Row.OpIndex = 0;
uint64_t Tombstone =
dwarf::computeTombstoneAddress(OpcodeAddressSize);
@@ -1002,15 +1044,16 @@ Error DWARFDebugLine::LineTable::parse(
break;
case DW_LNS_advance_pc:
- // Takes a single unsigned LEB128 operand, multiplies it by the
- // min_inst_length field of the prologue, and adds the
- // result to the address register of the state machine.
+ // Takes a single unsigned LEB128 operand as the operation advance
+ // and modifies the address and op_index registers of the state machine
+ // according to that.
if (std::optional<uint64_t> Operand =
parseULEB128<uint64_t>(TableData, Cursor)) {
- uint64_t AddrOffset =
- State.advanceAddr(*Operand, Opcode, OpcodeOffset);
+ ParsingState::AddrOpIndexDelta Advance =
+ State.advanceAddrOpIndex(*Operand, Opcode, OpcodeOffset);
if (Verbose)
- *OS << " (" << AddrOffset << ")";
+ *OS << " (addr += " << Advance.AddrOffset
+ << ", op-index += " << Advance.OpIndexDelta << ")";
}
break;
@@ -1062,8 +1105,8 @@ Error DWARFDebugLine::LineTable::parse(
break;
case DW_LNS_const_add_pc:
- // Takes no arguments. Add to the address register of the state
- // machine the address increment value corresponding to special
+ // Takes no arguments. Advance the address and op_index registers of
+ // the state machine by the increments corresponding to special
// opcode 255. The motivation for DW_LNS_const_add_pc is this:
// when the statement program needs to advance the address by a
// small amount, it can use a single special opcode, which occupies
@@ -1074,30 +1117,35 @@ Error DWARFDebugLine::LineTable::parse(
// than twice that range will it need to use both DW_LNS_advance_pc
// and a special opcode, requiring three or more bytes.
{
- uint64_t AddrOffset =
- State.advanceAddrForOpcode(Opcode, OpcodeOffset).AddrDelta;
+ ParsingState::OpcodeAdvanceResults Advance =
+ State.advanceForOpcode(Opcode, OpcodeOffset);
if (Verbose)
- *OS << format(" (0x%16.16" PRIx64 ")", AddrOffset);
+ *OS << format(" (addr += 0x%16.16" PRIx64 ", op-index += %" PRIu8
+ ")",
+ Advance.AddrDelta, Advance.OpIndexDelta);
}
break;
case DW_LNS_fixed_advance_pc:
// Takes a single uhalf operand. Add to the address register of
- // the state machine the value of the (unencoded) operand. This
- // is the only extended opcode that takes an argument that is not
- // a variable length number. The motivation for DW_LNS_fixed_advance_pc
- // is this: existing assemblers cannot emit DW_LNS_advance_pc or
- // special opcodes because they cannot encode LEB128 numbers or
- // judge when the computation of a special opcode overflows and
- // requires the use of DW_LNS_advance_pc. Such assemblers, however,
- // can use DW_LNS_fixed_advance_pc instead, sacrificing compression.
+ // the state machine the value of the (unencoded) operand and set
+ // the op_index register to 0. This is the only extended opcode that
+ // takes an argument that is not a variable length number.
+ // The motivation for DW_LNS_fixed_advance_pc is this: existing
+ // assemblers cannot emit DW_LNS_advance_pc or special opcodes because
+ // they cannot encode LEB128 numbers or judge when the computation
+ // of a special opcode overflows and requires the use of
+ // DW_LNS_advance_pc. Such assemblers, however, can use
+ // DW_LNS_fixed_advance_pc instead, sacrificing compression.
{
uint16_t PCOffset =
TableData.getRelocatedValue(Cursor, 2);
if (Cursor) {
State.Row.Address.Address += PCOffset;
+ State.Row.OpIndex = 0;
if (Verbose)
- *OS << format(" (0x%4.4" PRIx16 ")", PCOffset);
+ *OS << format(" (addr += 0x%4.4" PRIx16 ", op-index = 0)",
+ PCOffset);
}
}
break;
@@ -1161,11 +1209,12 @@ Error DWARFDebugLine::LineTable::parse(
*OffsetPtr = Cursor.tell();
} else {
// Special Opcodes.
- ParsingState::AddrAndLineDelta Delta =
+ ParsingState::SpecialOpcodeDelta Delta =
State.handleSpecialOpcode(Opcode, OpcodeOffset);
if (Verbose)
- *OS << "address += " << Delta.Address << ", line += " << Delta.Line;
+ *OS << "address += " << Delta.Address << ", line += " << Delta.Line
+ << ", op-index += " << Delta.OpIndex;
EmitRow();
*OffsetPtr = Cursor.tell();
}
@@ -1226,6 +1275,9 @@ uint32_t DWARFDebugLine::LineTable::findRowInSeq(
//
// In general we want a non-empty range: the last row whose address is less
// than or equal to Address. This can be computed as upper_bound - 1.
+ //
+ // TODO: This function, and its users, needs to be update to return multiple
+ // rows for bundles with multiple op-indexes.
DWARFDebugLine::Row Row;
Row.Address = Address;
RowIter FirstRow = Rows.begin() + Seq.FirstRowIndex;
@@ -1505,6 +1557,21 @@ DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint64_t Offset) {
return U;
}
+bool DWARFDebugLine::SectionParser::hasValidVersion(uint64_t Offset) {
+ DataExtractor::Cursor Cursor(Offset);
+ auto [TotalLength, _] = DebugLineData.getInitialLength(Cursor);
+ DWARFDataExtractor HeaderData(DebugLineData, Cursor.tell() + TotalLength);
+ uint16_t Version = HeaderData.getU16(Cursor);
+ if (!Cursor) {
+ // Ignore any error here.
+ // If this is not the end of the section parseNext() will still be
+ // attempted, where this error will occur again (and can be handled).
+ consumeError(Cursor.takeError());
+ return false;
+ }
+ return versionIsSupported(Version);
+}
+
void DWARFDebugLine::SectionParser::moveToNextTable(uint64_t OldOffset,
const Prologue &P) {
// If the length field is not valid, we don't know where the next table is, so
@@ -1518,5 +1585,29 @@ void DWARFDebugLine::SectionParser::moveToNextTable(uint64_t OldOffset,
Offset = OldOffset + P.TotalLength + P.sizeofTotalLength();
if (!DebugLineData.isValidOffset(Offset)) {
Done = true;
+ return;
+ }
+
+ // Heuristic: If the version is valid, then this is probably a line table.
+ // Otherwise, the offset might need alignment (to a 4 or 8 byte boundary).
+ if (hasValidVersion(Offset))
+ return;
+
+ // ARM C/C++ Compiler aligns each line table to word boundaries and pads out
+ // the .debug_line section to a word multiple. Note that in the specification
+ // this does not seem forbidden since each unit has a DW_AT_stmt_list.
+ for (unsigned Align : {4, 8}) {
+ uint64_t AlignedOffset = alignTo(Offset, Align);
+ if (!DebugLineData.isValidOffset(AlignedOffset)) {
+ // This is almost certainly not another line table but some alignment
+ // padding. This assumes the alignments tested are ordered, and are
+ // smaller than the header size (which is true for 4 and 8).
+ Done = true;
+ return;
+ }
+ if (hasValidVersion(AlignedOffset)) {
+ Offset = AlignedOffset;
+ break;
+ }
}
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
index 26cef8713df1..7af7ed8be7b4 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -39,7 +39,7 @@ using namespace object;
static void dumpApplePropertyAttribute(raw_ostream &OS, uint64_t Val) {
OS << " (";
do {
- uint64_t Shift = countTrailingZeros(Val);
+ uint64_t Shift = llvm::countr_zero(Val);
assert(Shift < 64 && "undefined behavior");
uint64_t Bit = 1ULL << Shift;
auto PropName = ApplePropertyString(Bit);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
index 523dee486d2d..87a4fc78ceb1 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
#include "llvm/Support/Format.h"
#include <cassert>
@@ -18,13 +19,11 @@ using namespace dwarf;
namespace llvm {
-typedef std::vector<DWARFExpression::Operation::Description> DescVector;
-
-static DescVector getDescriptions() {
- DescVector Descriptions;
- typedef DWARFExpression::Operation Op;
- typedef Op::Description Desc;
+typedef DWARFExpression::Operation Op;
+typedef Op::Description Desc;
+static std::vector<Desc> getOpDescriptions() {
+ std::vector<Desc> Descriptions;
Descriptions.resize(0xff);
Descriptions[DW_OP_addr] = Desc(Op::Dwarf2, Op::SizeAddr);
Descriptions[DW_OP_deref] = Desc(Op::Dwarf2);
@@ -94,26 +93,49 @@ static DescVector getDescriptions() {
Descriptions[DW_OP_WASM_location] =
Desc(Op::Dwarf4, Op::SizeLEB, Op::WasmLocationArg);
Descriptions[DW_OP_GNU_push_tls_address] = Desc(Op::Dwarf3);
- Descriptions[DW_OP_addrx] = Desc(Op::Dwarf4, Op::SizeLEB);
Descriptions[DW_OP_GNU_addr_index] = Desc(Op::Dwarf4, Op::SizeLEB);
Descriptions[DW_OP_GNU_const_index] = Desc(Op::Dwarf4, Op::SizeLEB);
Descriptions[DW_OP_GNU_entry_value] = Desc(Op::Dwarf4, Op::SizeLEB);
-
+ Descriptions[DW_OP_addrx] = Desc(Op::Dwarf5, Op::SizeLEB);
+ Descriptions[DW_OP_constx] = Desc(Op::Dwarf5, Op::SizeLEB);
Descriptions[DW_OP_convert] = Desc(Op::Dwarf5, Op::BaseTypeRef);
Descriptions[DW_OP_entry_value] = Desc(Op::Dwarf5, Op::SizeLEB);
Descriptions[DW_OP_regval_type] =
Desc(Op::Dwarf5, Op::SizeLEB, Op::BaseTypeRef);
-
+ // This Description acts as a marker that getSubOpDesc must be called
+ // to fetch the final Description for the operation. Each such final
+ // Description must share the same first SizeSubOpLEB operand.
+ Descriptions[DW_OP_LLVM_user] = Desc(Op::Dwarf5, Op::SizeSubOpLEB);
return Descriptions;
}
-static DWARFExpression::Operation::Description getOpDesc(unsigned OpCode) {
- // FIXME: Make this constexpr once all compilers are smart enough to do it.
- static DescVector Descriptions = getDescriptions();
+static Desc getDescImpl(ArrayRef<Desc> Descriptions, unsigned Opcode) {
// Handle possible corrupted or unsupported operation.
- if (OpCode >= Descriptions.size())
+ if (Opcode >= Descriptions.size())
return {};
- return Descriptions[OpCode];
+ return Descriptions[Opcode];
+}
+
+static Desc getOpDesc(unsigned Opcode) {
+ static std::vector<Desc> Descriptions = getOpDescriptions();
+ return getDescImpl(Descriptions, Opcode);
+}
+
+static std::vector<Desc> getSubOpDescriptions() {
+ static constexpr unsigned LlvmUserDescriptionsSize = 1
+#define HANDLE_DW_OP_LLVM_USEROP(ID, NAME) +1
+#include "llvm/BinaryFormat/Dwarf.def"
+ ;
+ std::vector<Desc> Descriptions;
+ Descriptions.resize(LlvmUserDescriptionsSize);
+ Descriptions[DW_OP_LLVM_nop] = Desc(Op::Dwarf5, Op::SizeSubOpLEB);
+ return Descriptions;
+}
+
+static Desc getSubOpDesc(unsigned Opcode, unsigned SubOpcode) {
+ assert(Opcode == DW_OP_LLVM_user);
+ static std::vector<Desc> Descriptions = getSubOpDescriptions();
+ return getDescImpl(Descriptions, SubOpcode);
}
bool DWARFExpression::Operation::extract(DataExtractor Data,
@@ -126,14 +148,22 @@ bool DWARFExpression::Operation::extract(DataExtractor Data,
if (Desc.Version == Operation::DwarfNA)
return false;
- for (unsigned Operand = 0; Operand < 2; ++Operand) {
+ Operands.resize(Desc.Op.size());
+ OperandEndOffsets.resize(Desc.Op.size());
+ for (unsigned Operand = 0; Operand < Desc.Op.size(); ++Operand) {
unsigned Size = Desc.Op[Operand];
unsigned Signed = Size & Operation::SignBit;
- if (Size == Operation::SizeNA)
- break;
-
switch (Size & ~Operation::SignBit) {
+ case Operation::SizeSubOpLEB:
+ assert(Operand == 0 && "SubOp operand must be the first operand");
+ Operands[Operand] = Data.getULEB128(&Offset);
+ Desc = getSubOpDesc(Opcode, Operands[Operand]);
+ if (Desc.Version == Operation::DwarfNA)
+ return false;
+ assert(Desc.Op[Operand] == Operation::SizeSubOpLEB &&
+ "SizeSubOpLEB Description must begin with SizeSubOpLEB operand");
+ break;
case Operation::Size1:
Operands[Operand] = Data.getU8(&Offset);
if (Signed)
@@ -207,9 +237,9 @@ bool DWARFExpression::Operation::extract(DataExtractor Data,
static void prettyPrintBaseTypeRef(DWARFUnit *U, raw_ostream &OS,
DIDumpOptions DumpOpts,
- const uint64_t Operands[2],
+ ArrayRef<uint64_t> Operands,
unsigned Operand) {
- assert(Operand < 2 && "operand out of bounds");
+ assert(Operand < Operands.size() && "operand out of bounds");
auto Die = U->getDIEForOffset(U->getOffset() + Operands[Operand]);
if (Die && Die.getTag() == dwarf::DW_TAG_base_type) {
OS << " (";
@@ -227,7 +257,7 @@ static void prettyPrintBaseTypeRef(DWARFUnit *U, raw_ostream &OS,
bool DWARFExpression::prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS,
DIDumpOptions DumpOpts,
uint8_t Opcode,
- const uint64_t Operands[2]) {
+ ArrayRef<uint64_t> Operands) {
if (!DumpOpts.GetNameForDWARFReg)
return false;
@@ -258,6 +288,12 @@ bool DWARFExpression::prettyPrintRegisterOp(DWARFUnit *U, raw_ostream &OS,
return false;
}
+std::optional<unsigned> DWARFExpression::Operation::getSubCode() const {
+ if (!Desc.Op.size() || Desc.Op[0] != Operation::SizeSubOpLEB)
+ return std::nullopt;
+ return Operands[0];
+}
+
bool DWARFExpression::Operation::print(raw_ostream &OS, DIDumpOptions DumpOpts,
const DWARFExpression *Expr,
DWARFUnit *U) const {
@@ -277,14 +313,15 @@ bool DWARFExpression::Operation::print(raw_ostream &OS, DIDumpOptions DumpOpts,
if (prettyPrintRegisterOp(U, OS, DumpOpts, Opcode, Operands))
return true;
- for (unsigned Operand = 0; Operand < 2; ++Operand) {
+ for (unsigned Operand = 0; Operand < Desc.Op.size(); ++Operand) {
unsigned Size = Desc.Op[Operand];
unsigned Signed = Size & Operation::SignBit;
- if (Size == Operation::SizeNA)
- break;
-
- if (Size == Operation::BaseTypeRef && U) {
+ if (Size == Operation::SizeSubOpLEB) {
+ StringRef SubName = SubOperationEncodingString(Opcode, Operands[Operand]);
+ assert(!SubName.empty() && "DW_OP SubOp has no name!");
+ OS << " " << SubName;
+ } else if (Size == Operation::BaseTypeRef && U) {
// For DW_OP_convert the operand may be 0 to indicate that conversion to
// the generic type should be done. The same holds for DW_OP_reinterpret,
// which is currently not supported.
@@ -355,12 +392,9 @@ void DWARFExpression::print(raw_ostream &OS, DIDumpOptions DumpOpts,
}
bool DWARFExpression::Operation::verify(const Operation &Op, DWARFUnit *U) {
- for (unsigned Operand = 0; Operand < 2; ++Operand) {
+ for (unsigned Operand = 0; Operand < Op.Desc.Op.size(); ++Operand) {
unsigned Size = Op.Desc.Op[Operand];
- if (Size == Operation::SizeNA)
- break;
-
if (Size == Operation::BaseTypeRef) {
// For DW_OP_convert the operand may be 0 to indicate that conversion to
// the generic type should be done, so don't look up a base type in that
@@ -454,6 +488,13 @@ static bool printCompactDWARFExpr(
Stack.back().Kind = PrintedExpr::Value;
break;
}
+ case dwarf::DW_OP_nop: {
+ break;
+ }
+ case dwarf::DW_OP_LLVM_user: {
+ assert(Op.getSubCode() && *Op.getSubCode() == dwarf::DW_OP_LLVM_nop);
+ break;
+ }
default:
if (Opcode >= dwarf::DW_OP_reg0 && Opcode <= dwarf::DW_OP_reg31) {
// DW_OP_reg<N>: A register, with the register num implied by the
@@ -487,7 +528,10 @@ static bool printCompactDWARFExpr(
++I;
}
- assert(Stack.size() == 1 && "expected one value on stack");
+ if (Stack.size() != 1) {
+ OS << "<stack of size " << Stack.size() << ", expected 1>";
+ return false;
+ }
if (Stack.front().Kind == PrintedExpr::Address)
OS << "[" << Stack.front().String << "]";
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 5dd9515aafdb..29949ee02145 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -160,9 +160,11 @@ bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData,
case DW_FORM_ref_sup8:
case DW_FORM_strx1:
case DW_FORM_strx2:
+ case DW_FORM_strx3:
case DW_FORM_strx4:
case DW_FORM_addrx1:
case DW_FORM_addrx2:
+ case DW_FORM_addrx3:
case DW_FORM_addrx4:
case DW_FORM_sec_offset:
case DW_FORM_strp:
@@ -212,35 +214,7 @@ bool DWARFFormValue::skipValue(dwarf::Form Form, DataExtractor DebugInfoData,
}
bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
- // First, check DWARF5 form classes.
- if (Form < ArrayRef(DWARF5FormClasses).size() &&
- DWARF5FormClasses[Form] == FC)
- return true;
- // Check more forms from extensions and proposals.
- switch (Form) {
- case DW_FORM_GNU_ref_alt:
- return (FC == FC_Reference);
- case DW_FORM_GNU_addr_index:
- return (FC == FC_Address);
- case DW_FORM_GNU_str_index:
- case DW_FORM_GNU_strp_alt:
- return (FC == FC_String);
- case DW_FORM_LLVM_addrx_offset:
- return (FC == FC_Address);
- default:
- break;
- }
-
- if (FC == FC_SectionOffset) {
- if (Form == DW_FORM_strp || Form == DW_FORM_line_strp)
- return true;
- // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section
- // offset. If we don't have a DWARFUnit, default to the old behavior.
- if (Form == DW_FORM_data4 || Form == DW_FORM_data8)
- return !U || U->getVersion() <= 3;
- }
-
- return false;
+ return doesFormBelongToClass(Form, FC, U ? U->getVersion() : 3);
}
bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
@@ -300,6 +274,7 @@ bool DWARFFormValue::extractValue(const DWARFDataExtractor &Data,
Value.uval = Data.getU16(OffsetPtr, &Err);
break;
case DW_FORM_strx3:
+ case DW_FORM_addrx3:
Value.uval = Data.getU24(OffsetPtr, &Err);
break;
case DW_FORM_data4:
@@ -420,39 +395,27 @@ void DWARFFormValue::dump(raw_ostream &OS, DIDumpOptions DumpOpts) const {
case DW_FORM_addrx2:
case DW_FORM_addrx3:
case DW_FORM_addrx4:
- case DW_FORM_GNU_addr_index: {
+ case DW_FORM_GNU_addr_index:
+ case DW_FORM_LLVM_addrx_offset: {
if (U == nullptr) {
OS << "<invalid dwarf unit>";
break;
}
- std::optional<object::SectionedAddress> A =
- U->getAddrOffsetSectionItem(UValue);
- if (!A || DumpOpts.Verbose)
- AddrOS << format("indexed (%8.8x) address = ", (uint32_t)UValue);
+ std::optional<object::SectionedAddress> A = getAsSectionedAddress();
+ if (!A || DumpOpts.Verbose) {
+ if (Form == DW_FORM_LLVM_addrx_offset) {
+ uint32_t Index = UValue >> 32;
+ uint32_t Offset = UValue & 0xffffffff;
+ AddrOS << format("indexed (%8.8x) + 0x%x address = ", Index, Offset);
+ } else
+ AddrOS << format("indexed (%8.8x) address = ", (uint32_t)UValue);
+ }
if (A)
dumpSectionedAddress(AddrOS, DumpOpts, *A);
else
OS << "<unresolved>";
break;
}
- case DW_FORM_LLVM_addrx_offset: {
- if (U == nullptr) {
- OS << "<invalid dwarf unit>";
- break;
- }
- uint32_t Index = UValue >> 32;
- uint32_t Offset = UValue & 0xffffffff;
- std::optional<object::SectionedAddress> A =
- U->getAddrOffsetSectionItem(Index);
- if (!A || DumpOpts.Verbose)
- AddrOS << format("indexed (%8.8x) + 0x%x address = ", Index, Offset);
- if (A) {
- A->Address += Offset;
- dumpSectionedAddress(AddrOS, DumpOpts, *A);
- } else
- OS << "<unresolved>";
- break;
- }
case DW_FORM_flag_present:
OS << "true";
break;
@@ -652,16 +615,18 @@ Expected<const char *> DWARFFormValue::getAsCString() const {
}
// Prefer the Unit's string extractor, because for .dwo it will point to
// .debug_str.dwo, while the Context's extractor always uses .debug_str.
- DataExtractor StrData = Form == DW_FORM_line_strp
- ? C->getLineStringExtractor()
- : U ? U->getStringExtractor()
- : C->getStringExtractor();
+ bool IsDebugLineString = Form == DW_FORM_line_strp;
+ DataExtractor StrData =
+ IsDebugLineString ? C->getLineStringExtractor()
+ : U ? U->getStringExtractor() : C->getStringExtractor();
if (const char *Str = StrData.getCStr(&Offset))
return Str;
std::string Msg = FormEncodingString(Form).str();
if (Index)
Msg += (" uses index " + Twine(*Index) + ", but the referenced string").str();
- Msg += (" offset " + Twine(Offset) + " is beyond .debug_str bounds").str();
+ Msg += (" offset " + Twine(Offset) + " is beyond " +
+ (IsDebugLineString ? ".debug_line_str" : ".debug_str") + " bounds")
+ .str();
return make_error<StringError>(Msg,
inconvertibleErrorCode());
}
@@ -672,12 +637,14 @@ std::optional<uint64_t> DWARFFormValue::getAsAddress() const {
return std::nullopt;
}
-std::optional<object::SectionedAddress>
-DWARFFormValue::getAsSectionedAddress() const {
- if (!isFormClass(FC_Address))
+std::optional<object::SectionedAddress> DWARFFormValue::getAsSectionedAddress(
+ const ValueType &Value, const dwarf::Form Form, const DWARFUnit *U) {
+ if (!doesFormBelongToClass(Form, FC_Address, U ? U->getVersion() : 3))
return std::nullopt;
bool AddrOffset = Form == dwarf::DW_FORM_LLVM_addrx_offset;
- if (Form == DW_FORM_GNU_addr_index || Form == DW_FORM_addrx || AddrOffset) {
+ if (Form == DW_FORM_GNU_addr_index || Form == DW_FORM_addrx ||
+ Form == DW_FORM_addrx1 || Form == DW_FORM_addrx2 ||
+ Form == DW_FORM_addrx3 || Form == DW_FORM_addrx4 || AddrOffset) {
uint32_t Index = AddrOffset ? (Value.uval >> 32) : Value.uval;
if (!U)
@@ -693,6 +660,11 @@ DWARFFormValue::getAsSectionedAddress() const {
return {{Value.uval, Value.SectionIndex}};
}
+std::optional<object::SectionedAddress>
+DWARFFormValue::getAsSectionedAddress() const {
+ return getAsSectionedAddress(Value, Form, U);
+}
+
std::optional<uint64_t> DWARFFormValue::getAsReference() const {
if (auto R = getAsRelativeReference())
return R->Unit ? R->Unit->getOffset() + R->Offset : R->Offset;
@@ -785,3 +757,33 @@ DWARFFormValue::getAsFile(DILineInfoSpecifier::FileLineInfoKind Kind) const {
}
return std::nullopt;
}
+
+bool llvm::dwarf::doesFormBelongToClass(dwarf::Form Form, DWARFFormValue::FormClass FC,
+ uint16_t DwarfVersion) {
+ // First, check DWARF5 form classes.
+ if (Form < ArrayRef(DWARF5FormClasses).size() &&
+ DWARF5FormClasses[Form] == FC)
+ return true;
+ // Check more forms from extensions and proposals.
+ switch (Form) {
+ case DW_FORM_GNU_ref_alt:
+ return (FC == DWARFFormValue::FC_Reference);
+ case DW_FORM_GNU_addr_index:
+ return (FC == DWARFFormValue::FC_Address);
+ case DW_FORM_GNU_str_index:
+ case DW_FORM_GNU_strp_alt:
+ return (FC == DWARFFormValue::FC_String);
+ case DW_FORM_LLVM_addrx_offset:
+ return (FC == DWARFFormValue::FC_Address);
+ case DW_FORM_strp:
+ case DW_FORM_line_strp:
+ return (FC == DWARFFormValue::FC_SectionOffset);
+ case DW_FORM_data4:
+ case DW_FORM_data8:
+ // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section
+ // offset.
+ return (FC == DWARFFormValue::FC_SectionOffset) && (DwarfVersion <= 3);
+ default:
+ return false;
+ }
+}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
index 3f140d21c53c..987e63963a06 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
@@ -16,6 +16,7 @@
#include <cassert>
#include <cinttypes>
#include <cstdint>
+#include <set>
#include <utility>
using namespace llvm;
@@ -114,9 +115,9 @@ void DWARFGdbIndex::dump(raw_ostream &OS) {
bool DWARFGdbIndex::parseImpl(DataExtractor Data) {
uint64_t Offset = 0;
- // Only version 7 is supported at this moment.
+ // Only version 7 and 8 are supported at this moment.
Version = Data.getU32(&Offset);
- if (Version != 7)
+ if (Version != 7 && Version != 8)
return false;
CuListOffset = Data.getU32(&Offset);
@@ -166,25 +167,26 @@ bool DWARFGdbIndex::parseImpl(DataExtractor Data) {
// for both a string and a CU vector.
uint32_t SymTableSize = (ConstantPoolOffset - SymbolTableOffset) / 8;
SymbolTable.reserve(SymTableSize);
- uint32_t CuVectorsTotal = 0;
+ std::set<uint32_t> CUOffsets;
for (uint32_t i = 0; i < SymTableSize; ++i) {
uint32_t NameOffset = Data.getU32(&Offset);
uint32_t CuVecOffset = Data.getU32(&Offset);
SymbolTable.push_back({NameOffset, CuVecOffset});
if (NameOffset || CuVecOffset)
- ++CuVectorsTotal;
+ CUOffsets.insert(CuVecOffset);
}
// The constant pool. CU vectors are stored first, followed by strings.
// The first value is the number of CU indices in the vector. Each subsequent
// value is the index and symbol attributes of a CU in the CU list.
- for (uint32_t i = 0; i < CuVectorsTotal; ++i) {
+ for (auto CUOffset : CUOffsets) {
+ Offset = ConstantPoolOffset + CUOffset;
ConstantPoolVectors.emplace_back(0, SmallVector<uint32_t, 0>());
auto &Vec = ConstantPoolVectors.back();
Vec.first = Offset - ConstantPoolOffset;
uint32_t Num = Data.getU32(&Offset);
- for (uint32_t j = 0; j < Num; ++j)
+ for (uint32_t J = 0; J < Num; ++J)
Vec.second.push_back(Data.getU32(&Offset));
}
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp
index 6a1423d37d9f..c474de607626 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFTypePrinter.cpp
@@ -424,11 +424,11 @@ bool DWARFTypePrinter::appendTemplateParameters(DWARFDie D,
OS << (char)Val;
OS << "'";
} else if (Val < 256)
- OS << to_string(llvm::format("'\\x%02x'", Val));
+ OS << llvm::format("'\\x%02" PRIx64 "'", Val);
else if (Val <= 0xFFFF)
- OS << to_string(llvm::format("'\\u%04x'", Val));
+ OS << llvm::format("'\\u%04" PRIx64 "'", Val);
else
- OS << to_string(llvm::format("'\\U%08x'", Val));
+ OS << llvm::format("'\\U%08" PRIx64 "'", Val);
}
}
continue;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index c199e0118a6f..19678f121982 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -176,7 +176,7 @@ DWARFUnitVector::getUnitForIndexEntry(const DWARFUnitIndex::Entry &E) {
auto U = Parser(Offset, DW_SECT_INFO, nullptr, &E);
if (!U)
- U = nullptr;
+ return nullptr;
auto *NewCU = U.get();
this->insert(CU, std::move(U));
@@ -1040,8 +1040,16 @@ DWARFUnit::getLastChildEntry(const DWARFDebugInfoEntry *Die) const {
}
const DWARFAbbreviationDeclarationSet *DWARFUnit::getAbbreviations() const {
- if (!Abbrevs)
- Abbrevs = Abbrev->getAbbreviationDeclarationSet(getAbbreviationsOffset());
+ if (!Abbrevs) {
+ Expected<const DWARFAbbreviationDeclarationSet *> AbbrevsOrError =
+ Abbrev->getAbbreviationDeclarationSet(getAbbreviationsOffset());
+ if (!AbbrevsOrError) {
+ // FIXME: We should propagate this error upwards.
+ consumeError(AbbrevsOrError.takeError());
+ return nullptr;
+ }
+ Abbrevs = *AbbrevsOrError;
+ }
return Abbrevs;
}
@@ -1049,7 +1057,7 @@ std::optional<object::SectionedAddress> DWARFUnit::getBaseAddress() {
if (BaseAddr)
return BaseAddr;
- DWARFDie UnitDie = getUnitDIE();
+ DWARFDie UnitDie = (SU ? SU : this)->getUnitDIE();
std::optional<DWARFFormValue> PC =
UnitDie.find({DW_AT_low_pc, DW_AT_entry_pc});
BaseAddr = toSectionedAddress(PC);
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
index c90237d4cb77..58900e1e80cb 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp
@@ -150,8 +150,15 @@ bool DWARFVerifier::verifyUnitHeader(const DWARFDataExtractor DebugInfoData,
AddrSize = DebugInfoData.getU8(Offset);
}
- if (!DCtx.getDebugAbbrev()->getAbbreviationDeclarationSet(AbbrOffset))
+ Expected<const DWARFAbbreviationDeclarationSet *> AbbrevSetOrErr =
+ DCtx.getDebugAbbrev()->getAbbreviationDeclarationSet(AbbrOffset);
+ if (!AbbrevSetOrErr) {
ValidAbbrevOffset = false;
+ // FIXME: A problematic debug_abbrev section is reported below in the form
+ // of a `note:`. We should propagate this error there (or elsewhere) to
+ // avoid losing the specific problem with the debug_abbrev section.
+ consumeError(AbbrevSetOrErr.takeError());
+ }
ValidLength = DebugInfoData.isValidOffset(OffsetStart + Length + 3);
ValidVersion = DWARFContext::isSupportedVersion(Version);
@@ -299,20 +306,27 @@ unsigned DWARFVerifier::verifyDebugInfoCallSite(const DWARFDie &Die) {
}
unsigned DWARFVerifier::verifyAbbrevSection(const DWARFDebugAbbrev *Abbrev) {
+ if (!Abbrev)
+ return 0;
+
+ Expected<const DWARFAbbreviationDeclarationSet *> AbbrDeclsOrErr =
+ Abbrev->getAbbreviationDeclarationSet(0);
+ if (!AbbrDeclsOrErr) {
+ error() << toString(AbbrDeclsOrErr.takeError()) << "\n";
+ return 1;
+ }
+
+ const auto *AbbrDecls = *AbbrDeclsOrErr;
unsigned NumErrors = 0;
- if (Abbrev) {
- const DWARFAbbreviationDeclarationSet *AbbrDecls =
- Abbrev->getAbbreviationDeclarationSet(0);
- for (auto AbbrDecl : *AbbrDecls) {
- SmallDenseSet<uint16_t> AttributeSet;
- for (auto Attribute : AbbrDecl.attributes()) {
- auto Result = AttributeSet.insert(Attribute.Attr);
- if (!Result.second) {
- error() << "Abbreviation declaration contains multiple "
- << AttributeString(Attribute.Attr) << " attributes.\n";
- AbbrDecl.dump(OS);
- ++NumErrors;
- }
+ for (auto AbbrDecl : *AbbrDecls) {
+ SmallDenseSet<uint16_t> AttributeSet;
+ for (auto Attribute : AbbrDecl.attributes()) {
+ auto Result = AttributeSet.insert(Attribute.Attr);
+ if (!Result.second) {
+ error() << "Abbreviation declaration contains multiple "
+ << AttributeString(Attribute.Attr) << " attributes.\n";
+ AbbrDecl.dump(OS);
+ ++NumErrors;
}
}
}
@@ -777,7 +791,8 @@ unsigned DWARFVerifier::verifyDebugInfoForm(const DWARFDie &Die,
case DW_FORM_strx1:
case DW_FORM_strx2:
case DW_FORM_strx3:
- case DW_FORM_strx4: {
+ case DW_FORM_strx4:
+ case DW_FORM_line_strp: {
if (Error E = AttrValue.Value.getAsCString().takeError()) {
++NumErrors;
error() << toString(std::move(E)) << ":\n";
@@ -867,8 +882,10 @@ void DWARFVerifier::verifyDebugLineRows() {
continue;
// Verify prologue.
+ bool isDWARF5 = LineTable->Prologue.getVersion() >= 5;
uint32_t MaxDirIndex = LineTable->Prologue.IncludeDirectories.size();
- uint32_t FileIndex = 1;
+ uint32_t MinFileIndex = isDWARF5 ? 0 : 1;
+ uint32_t FileIndex = MinFileIndex;
StringMap<uint16_t> FullPathMap;
for (const auto &FileName : LineTable->Prologue.FileNames) {
// Verify directory index.
@@ -926,12 +943,11 @@ void DWARFVerifier::verifyDebugLineRows() {
// Verify file index.
if (!LineTable->hasFileAtIndex(Row.File)) {
++NumDebugLineErrors;
- bool isDWARF5 = LineTable->Prologue.getVersion() >= 5;
error() << ".debug_line["
<< format("0x%08" PRIx64,
*toSectionOffset(Die.find(DW_AT_stmt_list)))
<< "][" << RowIndex << "] has invalid file index " << Row.File
- << " (valid values are [" << (isDWARF5 ? "0," : "1,")
+ << " (valid values are [" << MinFileIndex << ','
<< LineTable->Prologue.FileNames.size()
<< (isDWARF5 ? ")" : "]") << "):\n";
DWARFDebugLine::Row::dumpTableHeader(OS, 0);
@@ -1627,6 +1643,116 @@ bool DWARFVerifier::handleAccelTables() {
return NumErrors == 0;
}
+bool DWARFVerifier::handleDebugStrOffsets() {
+ OS << "Verifying .debug_str_offsets...\n";
+ const DWARFObject &DObj = DCtx.getDWARFObj();
+ bool Success = true;
+ Success &= verifyDebugStrOffsets(
+ ".debug_str_offsets.dwo", DObj.getStrOffsetsDWOSection(),
+ DObj.getStrDWOSection(), &DWARFObject::forEachInfoDWOSections);
+ Success &= verifyDebugStrOffsets(
+ ".debug_str_offsets", DObj.getStrOffsetsSection(), DObj.getStrSection(),
+ &DWARFObject::forEachInfoSections);
+ return Success;
+}
+
+bool DWARFVerifier::verifyDebugStrOffsets(
+ StringRef SectionName, const DWARFSection &Section, StringRef StrData,
+ void (DWARFObject::*VisitInfoSections)(
+ function_ref<void(const DWARFSection &)>) const) {
+ const DWARFObject &DObj = DCtx.getDWARFObj();
+ uint16_t InfoVersion = 0;
+ DwarfFormat InfoFormat = DwarfFormat::DWARF32;
+ (DObj.*VisitInfoSections)([&](const DWARFSection &S) {
+ if (InfoVersion)
+ return;
+ DWARFDataExtractor DebugInfoData(DObj, S, DCtx.isLittleEndian(), 0);
+ uint64_t Offset = 0;
+ InfoFormat = DebugInfoData.getInitialLength(&Offset).second;
+ InfoVersion = DebugInfoData.getU16(&Offset);
+ });
+
+ DWARFDataExtractor DA(DObj, Section, DCtx.isLittleEndian(), 0);
+
+ DataExtractor::Cursor C(0);
+ uint64_t NextUnit = 0;
+ bool Success = true;
+ while (C.seek(NextUnit), C.tell() < DA.getData().size()) {
+ DwarfFormat Format;
+ uint64_t Length;
+ uint64_t StartOffset = C.tell();
+ if (InfoVersion == 4) {
+ Format = InfoFormat;
+ Length = DA.getData().size();
+ NextUnit = C.tell() + Length;
+ } else {
+ std::tie(Length, Format) = DA.getInitialLength(C);
+ if (!C)
+ break;
+ if (C.tell() + Length > DA.getData().size()) {
+ error() << formatv(
+ "{0}: contribution {1:X}: length exceeds available space "
+ "(contribution "
+ "offset ({1:X}) + length field space ({2:X}) + length ({3:X}) == "
+ "{4:X} > section size {5:X})\n",
+ SectionName, StartOffset, C.tell() - StartOffset, Length,
+ C.tell() + Length, DA.getData().size());
+ Success = false;
+ // Nothing more to do - no other contributions to try.
+ break;
+ }
+ NextUnit = C.tell() + Length;
+ uint8_t Version = DA.getU16(C);
+ if (C && Version != 5) {
+ error() << formatv("{0}: contribution {1:X}: invalid version {2}\n",
+ SectionName, StartOffset, Version);
+ Success = false;
+ // Can't parse the rest of this contribution, since we don't know the
+ // version, but we can pick up with the next contribution.
+ continue;
+ }
+ (void)DA.getU16(C); // padding
+ }
+ uint64_t OffsetByteSize = getDwarfOffsetByteSize(Format);
+ DA.setAddressSize(OffsetByteSize);
+ uint64_t Remainder = (Length - 4) % OffsetByteSize;
+ if (Remainder != 0) {
+ error() << formatv(
+ "{0}: contribution {1:X}: invalid length ((length ({2:X}) "
+ "- header (0x4)) % offset size {3:X} == {4:X} != 0)\n",
+ SectionName, StartOffset, Length, OffsetByteSize, Remainder);
+ Success = false;
+ }
+ for (uint64_t Index = 0; C && C.tell() + OffsetByteSize <= NextUnit; ++Index) {
+ uint64_t OffOff = C.tell();
+ uint64_t StrOff = DA.getAddress(C);
+ // check StrOff refers to the start of a string
+ if (StrOff == 0)
+ continue;
+ if (StrData.size() <= StrOff) {
+ error() << formatv(
+ "{0}: contribution {1:X}: index {2:X}: invalid string "
+ "offset *{3:X} == {4:X}, is beyond the bounds of the string section of length {5:X}\n",
+ SectionName, StartOffset, Index, OffOff, StrOff, StrData.size());
+ continue;
+ }
+ if (StrData[StrOff - 1] == '\0')
+ continue;
+ error() << formatv("{0}: contribution {1:X}: index {2:X}: invalid string "
+ "offset *{3:X} == {4:X}, is neither zero nor "
+ "immediately following a null character\n",
+ SectionName, StartOffset, Index, OffOff, StrOff);
+ Success = false;
+ }
+ }
+
+ if (Error E = C.takeError()) {
+ error() << SectionName << ": " << toString(std::move(E)) << '\n';
+ return false;
+ }
+ return Success;
+}
+
raw_ostream &DWARFVerifier::error() const { return WithColor::error(OS); }
raw_ostream &DWARFVerifier::warn() const { return WithColor::warning(OS); }
diff --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index 51058fc09cf1..145a43d3b381 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -96,57 +96,83 @@ llvm::Expected<FunctionInfo> FunctionInfo::decode(DataExtractor &Data,
return std::move(FI);
}
-llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const {
+uint64_t FunctionInfo::cacheEncoding() {
+ EncodingCache.clear();
+ if (!isValid())
+ return 0;
+ raw_svector_ostream OutStrm(EncodingCache);
+ FileWriter FW(OutStrm, support::endian::system_endianness());
+ llvm::Expected<uint64_t> Result = encode(FW);
+ if (!Result) {
+ EncodingCache.clear();
+ consumeError(Result.takeError());
+ return 0;
+ }
+ return EncodingCache.size();
+}
+
+llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &Out) const {
if (!isValid())
return createStringError(std::errc::invalid_argument,
"attempted to encode invalid FunctionInfo object");
// Align FunctionInfo data to a 4 byte alignment.
- O.alignTo(4);
- const uint64_t FuncInfoOffset = O.tell();
+ Out.alignTo(4);
+ const uint64_t FuncInfoOffset = Out.tell();
+ // Check if we have already encoded this function info into EncodingCache.
+ // This will be non empty when creating segmented GSYM files as we need to
+ // precompute exactly how big FunctionInfo objects encode into so we can
+ // accurately make segments of a specific size.
+ if (!EncodingCache.empty() &&
+ support::endian::system_endianness() == Out.getByteOrder()) {
+ // We already encoded this object, just write out the bytes.
+ Out.writeData(llvm::ArrayRef<uint8_t>((const uint8_t *)EncodingCache.data(),
+ EncodingCache.size()));
+ return FuncInfoOffset;
+ }
// Write the size in bytes of this function as a uint32_t. This can be zero
// if we just have a symbol from a symbol table and that symbol has no size.
- O.writeU32(size());
+ Out.writeU32(size());
// Write the name of this function as a uint32_t string table offset.
- O.writeU32(Name);
+ Out.writeU32(Name);
if (OptLineTable) {
- O.writeU32(InfoType::LineTableInfo);
+ Out.writeU32(InfoType::LineTableInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the LineTable out with the number of bytes that were written.
- O.writeU32(0);
- const auto StartOffset = O.tell();
- llvm::Error err = OptLineTable->encode(O, Range.start());
+ Out.writeU32(0);
+ const auto StartOffset = Out.tell();
+ llvm::Error err = OptLineTable->encode(Out, Range.start());
if (err)
return std::move(err);
- const auto Length = O.tell() - StartOffset;
+ const auto Length = Out.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"LineTable length is greater than UINT32_MAX");
// Fixup the size of the LineTable data with the correct size.
- O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
+ Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
}
// Write out the inline function info if we have any and if it is valid.
if (Inline) {
- O.writeU32(InfoType::InlineInfo);
+ Out.writeU32(InfoType::InlineInfo);
// Write a uint32_t length as zero for now, we will fix this up after
// writing the LineTable out with the number of bytes that were written.
- O.writeU32(0);
- const auto StartOffset = O.tell();
- llvm::Error err = Inline->encode(O, Range.start());
+ Out.writeU32(0);
+ const auto StartOffset = Out.tell();
+ llvm::Error err = Inline->encode(Out, Range.start());
if (err)
return std::move(err);
- const auto Length = O.tell() - StartOffset;
+ const auto Length = Out.tell() - StartOffset;
if (Length > UINT32_MAX)
return createStringError(std::errc::invalid_argument,
"InlineInfo length is greater than UINT32_MAX");
// Fixup the size of the InlineInfo data with the correct size.
- O.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
+ Out.fixup32(static_cast<uint32_t>(Length), StartOffset - 4);
}
// Terminate the data chunks with and end of list with zero size
- O.writeU32(InfoType::EndOfList);
- O.writeU32(0);
+ Out.writeU32(InfoType::EndOfList);
+ Out.writeU32(0);
return FuncInfoOffset;
}
diff --git a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
index 8281938770cf..60b6dbc6a12d 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
@@ -34,8 +34,10 @@ uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
// requirements.
const uint32_t Dir = insertString(directory);
const uint32_t Base = insertString(filename);
- FileEntry FE(Dir, Base);
+ return insertFileEntry(FileEntry(Dir, Base));
+}
+uint32_t GsymCreator::insertFileEntry(FileEntry FE) {
std::lock_guard<std::mutex> Guard(Mutex);
const auto NextIndex = Files.size();
// Find FE in hash map and insert if not present.
@@ -45,8 +47,26 @@ uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {
return R.first->second;
}
+uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {
+ // File index zero is reserved for a FileEntry with no directory and no
+ // filename. Any other file and we need to copy the strings for the directory
+ // and filename.
+ if (FileIdx == 0)
+ return 0;
+ const FileEntry SrcFE = SrcGC.Files[FileIdx];
+ // Copy the strings for the file and then add the newly converted file entry.
+ uint32_t Dir = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);
+ uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);
+ FileEntry DstFE(Dir, Base);
+ return insertFileEntry(DstFE);
+}
+
+
llvm::Error GsymCreator::save(StringRef Path,
- llvm::support::endianness ByteOrder) const {
+ llvm::support::endianness ByteOrder,
+ std::optional<uint64_t> SegmentSize) const {
+ if (SegmentSize)
+ return saveSegments(Path, ByteOrder, *SegmentSize);
std::error_code EC;
raw_fd_ostream OutStrm(Path, EC);
if (EC)
@@ -68,16 +88,17 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
return createStringError(std::errc::invalid_argument,
"too many FunctionInfos");
- const uint64_t MinAddr =
- BaseAddress ? *BaseAddress : Funcs.front().startAddress();
- const uint64_t MaxAddr = Funcs.back().startAddress();
- const uint64_t AddrDelta = MaxAddr - MinAddr;
+ std::optional<uint64_t> BaseAddress = getBaseAddress();
+ // Base address should be valid if we have any functions.
+ if (!BaseAddress)
+ return createStringError(std::errc::invalid_argument,
+ "invalid base address");
Header Hdr;
Hdr.Magic = GSYM_MAGIC;
Hdr.Version = GSYM_VERSION;
- Hdr.AddrOffSize = 0;
+ Hdr.AddrOffSize = getAddressOffsetSize();
Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
- Hdr.BaseAddress = MinAddr;
+ Hdr.BaseAddress = *BaseAddress;
Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
Hdr.StrtabOffset = 0; // We will fix this up later.
Hdr.StrtabSize = 0; // We will fix this up later.
@@ -85,15 +106,6 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
if (UUID.size() > sizeof(Hdr.UUID))
return createStringError(std::errc::invalid_argument,
"invalid UUID size %u", (uint32_t)UUID.size());
- // Set the address offset size correctly in the GSYM header.
- if (AddrDelta <= UINT8_MAX)
- Hdr.AddrOffSize = 1;
- else if (AddrDelta <= UINT16_MAX)
- Hdr.AddrOffSize = 2;
- else if (AddrDelta <= UINT32_MAX)
- Hdr.AddrOffSize = 4;
- else
- Hdr.AddrOffSize = 8;
// Copy the UUID value if we have one.
if (UUID.size() > 0)
memcpy(Hdr.UUID, UUID.data(), UUID.size());
@@ -102,10 +114,17 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
if (Err)
return Err;
+ const uint64_t MaxAddressOffset = getMaxAddressOffset();
// Write out the address offsets.
O.alignTo(Hdr.AddrOffSize);
for (const auto &FuncInfo : Funcs) {
uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
+ // Make sure we calculated the address offsets byte size correctly by
+ // verifying the current address offset is within ranges. We have seen bugs
+ // introduced when the code changes that can cause problems here so it is
+ // good to catch this during testing.
+ assert(AddrOffset <= MaxAddressOffset);
+ (void)MaxAddressOffset;
switch (Hdr.AddrOffSize) {
case 1:
O.writeU8(static_cast<uint8_t>(AddrOffset));
@@ -142,7 +161,7 @@ llvm::Error GsymCreator::encode(FileWriter &O) const {
O.writeU32(File.Base);
}
- // Write out the sting table.
+ // Write out the string table.
const off_t StrtabOffset = O.tell();
StrTab.write(O.get_stream());
const off_t StrtabSize = O.tell() - StrtabOffset;
@@ -300,6 +319,13 @@ llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
return Error::success();
}
+uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {
+ // String offset at zero is always the empty string, no copying needed.
+ if (StrOff == 0)
+ return 0;
+ return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);
+}
+
uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
if (S.empty())
return 0;
@@ -318,7 +344,13 @@ uint32_t GsymCreator::insertString(StringRef S, bool Copy) {
CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),
CHStr.hash()};
}
- return StrTab.add(CHStr);
+ const uint32_t StrOff = StrTab.add(CHStr);
+ // Save a mapping of string offsets to the cached string reference in case
+ // we need to segment the GSYM file and copy string from one string table to
+ // another.
+ if (StringOffsetMap.count(StrOff) == 0)
+ StringOffsetMap.insert(std::make_pair(StrOff, CHStr));
+ return StrOff;
}
void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
@@ -360,3 +392,187 @@ bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const {
std::lock_guard<std::mutex> Guard(Mutex);
return Ranges.contains(Addr);
}
+
+std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {
+ if (Finalized && !Funcs.empty())
+ return std::optional<uint64_t>(Funcs.front().startAddress());
+ // This code gets used by the segmentation of GSYM files to help determine the
+ // size of the GSYM header while continually adding new FunctionInfo objects
+ // to this object, so we haven't finalized this object yet.
+ if (Ranges.empty())
+ return std::nullopt;
+ return std::optional<uint64_t>(Ranges.begin()->start());
+}
+
+std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {
+ if (Finalized && !Funcs.empty())
+ return std::optional<uint64_t>(Funcs.back().startAddress());
+ // This code gets used by the segmentation of GSYM files to help determine the
+ // size of the GSYM header while continually adding new FunctionInfo objects
+ // to this object, so we haven't finalized this object yet.
+ if (Ranges.empty())
+ return std::nullopt;
+ return std::optional<uint64_t>((Ranges.end() - 1)->end());
+}
+
+std::optional<uint64_t> GsymCreator::getBaseAddress() const {
+ if (BaseAddress)
+ return BaseAddress;
+ return getFirstFunctionAddress();
+}
+
+uint64_t GsymCreator::getMaxAddressOffset() const {
+ switch (getAddressOffsetSize()) {
+ case 1: return UINT8_MAX;
+ case 2: return UINT16_MAX;
+ case 4: return UINT32_MAX;
+ case 8: return UINT64_MAX;
+ }
+ llvm_unreachable("invalid address offset");
+}
+
+uint8_t GsymCreator::getAddressOffsetSize() const {
+ const std::optional<uint64_t> BaseAddress = getBaseAddress();
+ const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();
+ if (BaseAddress && LastFuncAddr) {
+ const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;
+ if (AddrDelta <= UINT8_MAX)
+ return 1;
+ else if (AddrDelta <= UINT16_MAX)
+ return 2;
+ else if (AddrDelta <= UINT32_MAX)
+ return 4;
+ return 8;
+ }
+ return 1;
+}
+
+uint64_t GsymCreator::calculateHeaderAndTableSize() const {
+ uint64_t Size = sizeof(Header);
+ const size_t NumFuncs = Funcs.size();
+ // Add size of address offset table
+ Size += NumFuncs * getAddressOffsetSize();
+ // Add size of address info offsets which are 32 bit integers in version 1.
+ Size += NumFuncs * sizeof(uint32_t);
+ // Add file table size
+ Size += Files.size() * sizeof(FileEntry);
+ // Add string table size
+ Size += StrTab.getSize();
+
+ return Size;
+}
+
+// This function takes a InlineInfo class that was copy constructed from an
+// InlineInfo from the \a SrcGC and updates all members that point to strings
+// and files to point to strings and files from this GsymCreator.
+void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {
+ II.Name = copyString(SrcGC, II.Name);
+ II.CallFile = copyFile(SrcGC, II.CallFile);
+ for (auto &ChildII: II.Children)
+ fixupInlineInfo(SrcGC, ChildII);
+}
+
+uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) {
+ // To copy a function info we need to copy any files and strings over into
+ // this GsymCreator and then copy the function info and update the string
+ // table offsets to match the new offsets.
+ const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];
+ Ranges.insert(SrcFI.Range);
+
+ FunctionInfo DstFI;
+ DstFI.Range = SrcFI.Range;
+ DstFI.Name = copyString(SrcGC, SrcFI.Name);
+ // Copy the line table if there is one.
+ if (SrcFI.OptLineTable) {
+ // Copy the entire line table.
+ DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());
+ // Fixup all LineEntry::File entries which are indexes in the the file table
+ // from SrcGC and must be converted to file indexes from this GsymCreator.
+ LineTable &DstLT = DstFI.OptLineTable.value();
+ const size_t NumLines = DstLT.size();
+ for (size_t I=0; I<NumLines; ++I) {
+ LineEntry &LE = DstLT.get(I);
+ LE.File = copyFile(SrcGC, LE.File);
+ }
+ }
+ // Copy the inline information if needed.
+ if (SrcFI.Inline) {
+ // Make a copy of the source inline information.
+ DstFI.Inline = SrcFI.Inline.value();
+ // Fixup all strings and files in the copied inline information.
+ fixupInlineInfo(SrcGC, *DstFI.Inline);
+ }
+ std::lock_guard<std::mutex> Guard(Mutex);
+ Funcs.push_back(DstFI);
+ return Funcs.back().cacheEncoding();
+}
+
+llvm::Error GsymCreator::saveSegments(StringRef Path,
+ llvm::support::endianness ByteOrder,
+ uint64_t SegmentSize) const {
+ if (SegmentSize == 0)
+ return createStringError(std::errc::invalid_argument,
+ "invalid segment size zero");
+
+ size_t FuncIdx = 0;
+ const size_t NumFuncs = Funcs.size();
+ while (FuncIdx < NumFuncs) {
+ llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC =
+ createSegment(SegmentSize, FuncIdx);
+ if (ExpectedGC) {
+ GsymCreator *GC = ExpectedGC->get();
+ if (GC == NULL)
+ break; // We had not more functions to encode.
+ raw_null_ostream ErrorStrm;
+ llvm::Error Err = GC->finalize(ErrorStrm);
+ if (Err)
+ return Err;
+ std::string SegmentedGsymPath;
+ raw_string_ostream SGP(SegmentedGsymPath);
+ std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();
+ if (FirstFuncAddr) {
+ SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);
+ SGP.flush();
+ Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);
+ if (Err)
+ return Err;
+ }
+ } else {
+ return ExpectedGC.takeError();
+ }
+ }
+ return Error::success();
+}
+
+llvm::Expected<std::unique_ptr<GsymCreator>>
+GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {
+ // No function entries, return empty unique pointer
+ if (FuncIdx >= Funcs.size())
+ return std::unique_ptr<GsymCreator>();
+
+ std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true));
+ // Set the base address if there is one.
+ if (BaseAddress)
+ GC->setBaseAddress(*BaseAddress);
+ // Copy the UUID value from this object into the new creator.
+ GC->setUUID(UUID);
+ const size_t NumFuncs = Funcs.size();
+ // Track how big the function infos are for the current segment so we can
+ // emit segments that are close to the requested size. It is quick math to
+ // determine the current header and tables sizes, so we can do that each loop.
+ uint64_t SegmentFuncInfosSize = 0;
+ for (; FuncIdx < NumFuncs; ++FuncIdx) {
+ const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();
+ if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {
+ if (SegmentFuncInfosSize == 0)
+ return createStringError(std::errc::invalid_argument,
+ "a segment size of %" PRIu64 " is to small to "
+ "fit any function infos, specify a larger value",
+ SegmentSize);
+
+ break;
+ }
+ SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);
+ }
+ return std::move(GC);
+}
diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp
index a320752befc4..cfe304eead51 100644
--- a/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Core/LVElement.cpp
@@ -17,6 +17,7 @@
#include "llvm/DebugInfo/LogicalView/Core/LVType.h"
using namespace llvm;
+using namespace llvm::codeview;
using namespace llvm::logicalview;
#define DEBUG_TYPE "Element"
@@ -103,6 +104,14 @@ void LVElement::setFilename(StringRef Filename) {
FilenameIndex = getStringIndex(Filename);
}
+void LVElement::setInnerComponent(StringRef Name) {
+ if (Name.size()) {
+ StringRef InnerComponent;
+ std::tie(std::ignore, InnerComponent) = getInnerComponent(Name);
+ setName(InnerComponent);
+ }
+}
+
// Return the string representation of a DIE offset.
std::string LVElement::typeOffsetAsString() const {
if (options().getAttributeOffset()) {
@@ -126,6 +135,19 @@ StringRef LVElement::accessibilityString(uint32_t Access) const {
}
}
+std::optional<uint32_t> LVElement::getAccessibilityCode(MemberAccess Access) {
+ switch (Access) {
+ case MemberAccess::Private:
+ return dwarf::DW_ACCESS_private;
+ case MemberAccess::Protected:
+ return dwarf::DW_ACCESS_protected;
+ case MemberAccess::Public:
+ return dwarf::DW_ACCESS_public;
+ default:
+ return std::nullopt;
+ }
+}
+
StringRef LVElement::externalString() const {
return getIsExternal() ? "extern" : StringRef();
}
@@ -160,6 +182,21 @@ StringRef LVElement::virtualityString(uint32_t Virtuality) const {
}
}
+std::optional<uint32_t> LVElement::getVirtualityCode(MethodKind Virtuality) {
+ switch (Virtuality) {
+ case MethodKind::Virtual:
+ return dwarf::DW_VIRTUALITY_virtual;
+ case MethodKind::PureVirtual:
+ return dwarf::DW_VIRTUALITY_pure_virtual;
+ case MethodKind::IntroducingVirtual:
+ case MethodKind::PureIntroducingVirtual:
+ // No direct equivalents in DWARF. Assume Virtual.
+ return dwarf::DW_VIRTUALITY_virtual;
+ default:
+ return std::nullopt;
+ }
+}
+
void LVElement::resolve() {
if (getIsResolved())
return;
diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVLocation.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVLocation.cpp
index 115b903c6c7f..17b32a5f67b4 100644
--- a/llvm/lib/DebugInfo/LogicalView/Core/LVLocation.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Core/LVLocation.cpp
@@ -352,7 +352,7 @@ std::string LVOperation::getOperandsCodeViewInfo() {
uint16_t OperationCode = getCodeViewOperationCode(Opcode);
switch (OperationCode) {
- // Operands: [Offset, 0].
+ // Operands: [Offset].
case codeview::SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL:
Stream << "frame_pointer_rel " << int(Operands[0]);
break;
@@ -360,7 +360,7 @@ std::string LVOperation::getOperandsCodeViewInfo() {
Stream << "frame_pointer_rel_full_scope " << int(Operands[0]);
break;
- // Operands: [Register, 0].
+ // Operands: [Register].
case codeview::SymbolKind::S_DEFRANGE_REGISTER:
Stream << "register " << getReader().getRegisterName(Opcode, Operands);
break;
@@ -375,7 +375,7 @@ std::string LVOperation::getOperandsCodeViewInfo() {
<< " offset " << int(Operands[1]);
break;
- // Operands: [Program, 0].
+ // Operands: [Program].
case codeview::SymbolKind::S_DEFRANGE:
Stream << "frame " << int(Operands[0]);
break;
@@ -576,11 +576,11 @@ void LVLocationSymbol::addObject(LVAddress LowPC, LVAddress HighPC,
}
// Add a Location Record.
-void LVLocationSymbol::addObject(LVSmall Opcode, LVUnsigned Operand1,
- LVUnsigned Operand2) {
+void LVLocationSymbol::addObject(LVSmall Opcode,
+ ArrayRef<LVUnsigned> Operands) {
if (!Entries)
- Entries = new LVAutoOperations();
- Entries->emplace_back(new LVOperation(Opcode, Operand1, Operand2));
+ Entries = std::make_unique<LVOperations>();
+ Entries->push_back(getReader().createOperation(Opcode, Operands));
}
// Based on the DWARF attribute, define the location kind.
@@ -606,8 +606,7 @@ void LVLocation::setKind() {
void LVLocationSymbol::updateKind() {
// Update the location type for simple ones.
if (Entries && Entries->size() == 1) {
- LVOperation *Operation = Entries->front();
- if (dwarf::DW_OP_fbreg == Operation->getOpcode())
+ if (dwarf::DW_OP_fbreg == Entries->front()->getOpcode())
setIsStackOffset();
}
}
@@ -660,7 +659,7 @@ void LVLocationSymbol::printExtra(raw_ostream &OS, bool Full) const {
if (Full && Entries) {
bool CodeViewLocation = getParentSymbol()->getHasCodeViewLocation();
std::stringstream Stream;
- std::string Leading = "";
+ std::string Leading;
for (LVOperation *Operation : *Entries) {
Stream << Leading
<< (CodeViewLocation ? Operation->getOperandsCodeViewInfo()
diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVReader.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVReader.cpp
index 88f66cf2093b..613452c0b501 100644
--- a/llvm/lib/DebugInfo/LogicalView/Core/LVReader.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Core/LVReader.cpp
@@ -182,6 +182,9 @@ Error LVReader::createSplitFolder() {
// Get the filename for given object.
StringRef LVReader::getFilename(LVObject *Object, size_t Index) const {
+ // TODO: The current CodeView Reader implementation does not have support
+ // for multiple compile units. Until we have a proper offset calculation,
+ // check only in the current compile unit.
if (CompileUnits.size()) {
// Get Compile Unit for the given object.
LVCompileUnits::const_iterator Iter =
diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp
index fb503f3d3e7e..2f26025d01ec 100644
--- a/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Core/LVScope.cpp
@@ -45,15 +45,6 @@ const char *const KindUnion = "Union";
//===----------------------------------------------------------------------===//
// DWARF lexical block, such as: namespace, function, compile unit, module, etc.
//===----------------------------------------------------------------------===//
-LVScope::~LVScope() {
- delete Types;
- delete Symbols;
- delete Scopes;
- delete Lines;
- delete Ranges;
- delete Children;
-}
-
// Return a string representation for the scope kind.
const char *LVScope::kind() const {
const char *Kind = KindUndefined;
@@ -114,7 +105,7 @@ LVScopeDispatch LVScope::Dispatch = {
void LVScope::addToChildren(LVElement *Element) {
if (!Children)
- Children = new LVElements();
+ Children = std::make_unique<LVElements>();
Children->push_back(Element);
}
@@ -137,7 +128,7 @@ void LVScope::addElement(LVLine *Line) {
assert(Line && "Invalid line.");
assert(!Line->getParent() && "Line already inserted");
if (!Lines)
- Lines = new LVAutoLines();
+ Lines = std::make_unique<LVLines>();
// Add it to parent.
Lines->push_back(Line);
@@ -161,7 +152,7 @@ void LVScope::addObject(LVLocation *Location) {
assert(Location && "Invalid location.");
assert(!Location->getParent() && "Location already inserted");
if (!Ranges)
- Ranges = new LVAutoLocations();
+ Ranges = std::make_unique<LVLocations>();
// Add it to parent.
Location->setParent(this);
@@ -176,7 +167,7 @@ void LVScope::addElement(LVScope *Scope) {
assert(Scope && "Invalid scope.");
assert(!Scope->getParent() && "Scope already inserted");
if (!Scopes)
- Scopes = new LVAutoScopes();
+ Scopes = std::make_unique<LVScopes>();
// Add it to parent.
Scopes->push_back(Scope);
@@ -203,7 +194,7 @@ void LVScope::addElement(LVSymbol *Symbol) {
assert(Symbol && "Invalid symbol.");
assert(!Symbol->getParent() && "Symbol already inserted");
if (!Symbols)
- Symbols = new LVAutoSymbols();
+ Symbols = std::make_unique<LVSymbols>();
// Add it to parent.
Symbols->push_back(Symbol);
@@ -230,7 +221,7 @@ void LVScope::addElement(LVType *Type) {
assert(Type && "Invalid type.");
assert(!Type->getParent() && "Type already inserted");
if (!Types)
- Types = new LVAutoTypes();
+ Types = std::make_unique<LVTypes>();
// Add it to parent.
Types->push_back(Type);
@@ -255,7 +246,7 @@ void LVScope::addElement(LVType *Type) {
// Add a pair of ranges.
void LVScope::addObject(LVAddress LowerAddress, LVAddress UpperAddress) {
// Pack the ranges into a Location object.
- LVLocation *Location = new LVLocation();
+ LVLocation *Location = getReader().createLocation();
Location->setLowerAddress(LowerAddress);
Location->setUpperAddress(UpperAddress);
Location->setIsAddressRange();
@@ -341,7 +332,7 @@ void LVScope::addMissingElements(LVScope *Reference) {
// information that is incorrect for the element to be inserted.
// As the symbol being added does not exist in the debug section,
// use its parent scope offset, to indicate its DIE location.
- LVSymbol *Symbol = new LVSymbol();
+ LVSymbol *Symbol = getReader().createSymbol();
addElement(Symbol);
Symbol->setOffset(getOffset());
Symbol->setIsOptimized();
@@ -598,6 +589,10 @@ Error LVScope::doPrint(bool Split, bool Match, bool Print, raw_ostream &OS,
// split context, then switch to the reader output stream.
raw_ostream *StreamSplit = &OS;
+ // Ignore the CU generated by the VS toolchain, when compiling to PDB.
+ if (getIsSystem() && !options().getAttributeSystem())
+ return Error::success();
+
// If 'Split', we use the scope name (CU name) as the ouput file; the
// delimiters in the pathname, must be replaced by a normal character.
if (getIsCompileUnit()) {
@@ -690,7 +685,7 @@ void LVScope::sort() {
if (SortFunction) {
std::function<void(LVScope * Parent, LVSortFunction SortFunction)> Sort =
[&](LVScope *Parent, LVSortFunction SortFunction) {
- auto Traverse = [&](auto *Set, LVSortFunction SortFunction) {
+ auto Traverse = [&](auto &Set, LVSortFunction SortFunction) {
if (Set)
std::stable_sort(Set->begin(), Set->end(), SortFunction);
};
@@ -877,7 +872,7 @@ bool LVScope::equalNumberOfChildren(const LVScope *Scope) const {
}
void LVScope::markMissingParents(const LVScope *Target, bool TraverseChildren) {
- auto SetCompareState = [&](auto *Container) {
+ auto SetCompareState = [&](auto &Container) {
if (Container)
for (auto *Entry : *Container)
Entry->setIsInCompare();
@@ -1356,8 +1351,7 @@ void LVScopeCompileUnit::addedElement(LVType *Type) {
// Record unsuported DWARF tags.
void LVScopeCompileUnit::addDebugTag(dwarf::Tag Target, LVOffset Offset) {
- addItem<LVTagOffsetsMap, LVOffsetList, dwarf::Tag, LVOffset>(&DebugTags,
- Target, Offset);
+ addItem<LVTagOffsetsMap, dwarf::Tag, LVOffset>(&DebugTags, Target, Offset);
}
// Record elements with invalid offsets.
@@ -1390,8 +1384,7 @@ void LVScopeCompileUnit::addLineZero(LVLine *Line) {
LVScope *Scope = Line->getParentScope();
LVOffset Offset = Scope->getOffset();
addInvalidOffset(Offset, Scope);
- addItem<LVOffsetLinesMap, LVLines, LVOffset, LVLine *>(&LinesZero, Offset,
- Line);
+ addItem<LVOffsetLinesMap, LVOffset, LVLine *>(&LinesZero, Offset, Line);
}
void LVScopeCompileUnit::printLocalNames(raw_ostream &OS, bool Full) const {
@@ -1481,7 +1474,7 @@ void LVScopeCompileUnit::printWarnings(raw_ostream &OS, bool Full) const {
PrintHeader(Header);
for (LVOffsetLocationsMap::const_reference Entry : Map) {
PrintElement(WarningOffsets, Entry.first);
- for (const LVLocation *Location : *Entry.second)
+ for (const LVLocation *Location : Entry.second)
OS << hexSquareString(Location->getOffset()) << " "
<< Location->getIntervalInfo() << "\n";
}
@@ -1494,7 +1487,7 @@ void LVScopeCompileUnit::printWarnings(raw_ostream &OS, bool Full) const {
OS << format("\n0x%02x", (unsigned)Entry.first) << ", "
<< dwarf::TagString(Entry.first) << "\n";
unsigned Count = 0;
- for (const LVOffset &Offset : *Entry.second)
+ for (const LVOffset &Offset : Entry.second)
PrintOffset(Count, Offset);
OS << "\n";
}
@@ -1519,7 +1512,7 @@ void LVScopeCompileUnit::printWarnings(raw_ostream &OS, bool Full) const {
for (LVOffsetLinesMap::const_reference Entry : LinesZero) {
PrintElement(WarningOffsets, Entry.first);
unsigned Count = 0;
- for (const LVLine *Line : *Entry.second)
+ for (const LVLine *Line : Entry.second)
PrintOffset(Count, Line->getOffset());
OS << "\n";
}
@@ -1795,6 +1788,8 @@ void LVScopeFunction::resolveReferences() {
// DW_AT_external DW_FORM_flag_present
// 00000070 DW_TAG_subprogram "bar"
// DW_AT_specification DW_FORM_ref4 0x00000048
+ // CodeView does not include any information at the class level to
+ // mark the member function as external.
// If there is a reference linking the declaration and definition, mark
// the definition as extern, to facilitate the logical view comparison.
if (getHasReferenceSpecification()) {
@@ -2030,6 +2025,28 @@ void LVScopeRoot::processRangeInformation() {
}
}
+void LVScopeRoot::transformScopedName() {
+ // Recursively transform all names.
+ std::function<void(LVScope * Parent)> TraverseScope = [&](LVScope *Parent) {
+ auto Traverse = [&](const auto *Set) {
+ if (Set)
+ for (const auto &Entry : *Set)
+ Entry->setInnerComponent();
+ };
+ if (const LVScopes *Scopes = Parent->getScopes())
+ for (LVScope *Scope : *Scopes) {
+ Scope->setInnerComponent();
+ TraverseScope(Scope);
+ }
+ Traverse(Parent->getSymbols());
+ Traverse(Parent->getTypes());
+ Traverse(Parent->getLines());
+ };
+
+ // Start traversing the scopes root and transform the element name.
+ TraverseScope(this);
+}
+
bool LVScopeRoot::equals(const LVScope *Scope) const {
return LVScope::equals(Scope);
}
diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVSupport.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVSupport.cpp
index 9fa1f28eb089..42fb1142eb44 100644
--- a/llvm/lib/DebugInfo/LogicalView/Core/LVSupport.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Core/LVSupport.cpp
@@ -20,6 +20,12 @@ using namespace llvm::logicalview;
#define DEBUG_TYPE "Support"
+namespace {
+// Unique string pool instance used by all logical readers.
+LVStringPool StringPool;
+} // namespace
+LVStringPool &llvm::logicalview::getStringPool() { return StringPool; }
+
// Perform the following transformations to the given 'Path':
// - all characters to lowercase.
// - '\\' into '/' (Platform independent).
@@ -54,3 +60,106 @@ std::string llvm::logicalview::flattenedFilePath(StringRef Path) {
};
return Name;
}
+
+using LexicalEntry = std::pair<size_t, size_t>;
+using LexicalIndexes = SmallVector<LexicalEntry, 10>;
+
+static LexicalIndexes getAllLexicalIndexes(StringRef Name) {
+ if (Name.empty())
+ return {};
+
+ size_t AngleCount = 0;
+ size_t ColonSeen = 0;
+ size_t Current = 0;
+
+ LexicalIndexes Indexes;
+
+#ifndef NDEBUG
+ auto PrintLexicalEntry = [&]() {
+ LexicalEntry Entry = Indexes.back();
+ llvm::dbgs() << formatv(
+ "'{0}:{1}', '{2}'\n", Entry.first, Entry.second,
+ Name.substr(Entry.first, Entry.second - Entry.first + 1));
+ };
+#endif
+
+ size_t Length = Name.size();
+ for (size_t Index = 0; Index < Length; ++Index) {
+ LLVM_DEBUG({
+ llvm::dbgs() << formatv("Index: '{0}', Char: '{1}'\n", Index,
+ Name[Index]);
+ });
+ switch (Name[Index]) {
+ case '<':
+ ++AngleCount;
+ break;
+ case '>':
+ --AngleCount;
+ break;
+ case ':':
+ ++ColonSeen;
+ break;
+ }
+ if (ColonSeen == 2) {
+ if (!AngleCount) {
+ Indexes.push_back(LexicalEntry(Current, Index - 2));
+ Current = Index + 1;
+ LLVM_DEBUG({ PrintLexicalEntry(); });
+ }
+ ColonSeen = 0;
+ continue;
+ }
+ }
+
+ // Store last component.
+ Indexes.push_back(LexicalEntry(Current, Length - 1));
+ LLVM_DEBUG({ PrintLexicalEntry(); });
+ return Indexes;
+}
+
+LVLexicalComponent llvm::logicalview::getInnerComponent(StringRef Name) {
+ if (Name.empty())
+ return {};
+
+ LexicalIndexes Indexes = getAllLexicalIndexes(Name);
+ if (Indexes.size() == 1)
+ return std::make_tuple(StringRef(), Name);
+
+ LexicalEntry BeginEntry = Indexes.front();
+ LexicalEntry EndEntry = Indexes[Indexes.size() - 2];
+ StringRef Outer =
+ Name.substr(BeginEntry.first, EndEntry.second - BeginEntry.first + 1);
+
+ LexicalEntry LastEntry = Indexes.back();
+ StringRef Inner =
+ Name.substr(LastEntry.first, LastEntry.second - LastEntry.first + 1);
+
+ return std::make_tuple(Outer, Inner);
+}
+
+LVStringRefs llvm::logicalview::getAllLexicalComponents(StringRef Name) {
+ if (Name.empty())
+ return {};
+
+ LexicalIndexes Indexes = getAllLexicalIndexes(Name);
+ LVStringRefs Components;
+ for (const LexicalEntry &Entry : Indexes)
+ Components.push_back(
+ Name.substr(Entry.first, Entry.second - Entry.first + 1));
+
+ return Components;
+}
+
+std::string llvm::logicalview::getScopedName(const LVStringRefs &Components,
+ StringRef BaseName) {
+ if (Components.empty())
+ return {};
+ std::string Name(BaseName);
+ raw_string_ostream Stream(Name);
+ if (BaseName.size())
+ Stream << "::";
+ Stream << Components[0];
+ for (LVStringRefs::size_type Index = 1; Index < Components.size(); ++Index)
+ Stream << "::" << Components[Index];
+ return Name;
+}
diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVSymbol.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVSymbol.cpp
index 82633fbc6b2e..4608fe20cb6d 100644
--- a/llvm/lib/DebugInfo/LogicalView/Core/LVSymbol.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Core/LVSymbol.cpp
@@ -66,10 +66,10 @@ void LVSymbol::addLocation(dwarf::Attribute Attr, LVAddress LowPC,
LVAddress HighPC, LVUnsigned SectionOffset,
uint64_t LocDescOffset, bool CallSiteLocation) {
if (!Locations)
- Locations = new LVAutoLocations();
+ Locations = std::make_unique<LVLocations>();
// Create the location entry.
- CurrentLocation = new LVLocationSymbol();
+ CurrentLocation = getReader().createLocationSymbol();
CurrentLocation->setParent(this);
CurrentLocation->setAttr(Attr);
if (CallSiteLocation)
@@ -82,10 +82,10 @@ void LVSymbol::addLocation(dwarf::Attribute Attr, LVAddress LowPC,
}
// Add a Location Record.
-void LVSymbol::addLocationOperands(LVSmall Opcode, uint64_t Operand1,
- uint64_t Operand2) {
+void LVSymbol::addLocationOperands(LVSmall Opcode,
+ ArrayRef<uint64_t> Operands) {
if (CurrentLocation)
- CurrentLocation->addObject(Opcode, Operand1, Operand2);
+ CurrentLocation->addObject(Opcode, Operands);
}
// Add a Location Entry.
@@ -97,15 +97,14 @@ void LVSymbol::addLocationConstant(dwarf::Attribute Attr, LVUnsigned Constant,
/*SectionOffset=*/0, LocDescOffset);
// Add records to Location Entry.
- addLocationOperands(/*Opcode=*/LVLocationMemberOffset,
- /*Operand1=*/Constant, /*Operand2=*/0);
+ addLocationOperands(/*Opcode=*/LVLocationMemberOffset, {Constant});
}
LVLocations::iterator LVSymbol::addLocationGap(LVLocations::iterator Pos,
LVAddress LowPC,
LVAddress HighPC) {
// Create a location entry for the gap.
- LVLocation *Gap = new LVLocationSymbol();
+ LVLocation *Gap = getReader().createLocationSymbol();
Gap->setParent(this);
Gap->setAttr(dwarf::DW_AT_location);
Gap->addObject(LowPC, HighPC,
@@ -115,8 +114,7 @@ LVLocations::iterator LVSymbol::addLocationGap(LVLocations::iterator Pos,
LVLocations::iterator Iter = Locations->insert(Pos, Gap);
// Add gap to Location Entry.
- Gap->addObject(/*op=*/dwarf::DW_OP_hi_user,
- /*opd1=*/0, /*opd2=*/0);
+ Gap->addObject(dwarf::DW_OP_hi_user, {});
// Mark the entry as a gap.
Gap->setIsGapEntry();
@@ -190,7 +188,7 @@ void LVSymbol::getLocations(LVLocations &LocationList) const {
// Calculate coverage factor.
void LVSymbol::calculateCoverage() {
- if (!LVLocation::calculateCoverage(Locations, CoverageFactor,
+ if (!LVLocation::calculateCoverage(Locations.get(), CoverageFactor,
CoveragePercentage)) {
LVScope *Parent = getParentScope();
if (Parent->getIsInlinedFunction()) {
@@ -444,6 +442,6 @@ void LVSymbol::printExtra(raw_ostream &OS, bool Full) const {
Reference->printReference(OS, Full, const_cast<LVSymbol *>(this));
// Print location information.
- LVLocation::print(Locations, OS, Full);
+ LVLocation::print(Locations.get(), OS, Full);
}
}
diff --git a/llvm/lib/DebugInfo/LogicalView/Core/LVType.cpp b/llvm/lib/DebugInfo/LogicalView/Core/LVType.cpp
index 3d32c34ee02a..28bccadce598 100644
--- a/llvm/lib/DebugInfo/LogicalView/Core/LVType.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Core/LVType.cpp
@@ -253,16 +253,10 @@ void LVType::getParameters(const LVTypes *Types, LVTypes *TypesParam,
if (!Type->getIsTemplateParam())
continue;
if (options().getAttributeArgument()) {
- LVScope *Scope = nullptr;
if (Type->getIsKindType())
- Type = Type->getTypeAsType();
- else {
- if (Type->getIsKindScope()) {
- Scope = Type->getTypeAsScope();
- Type = nullptr;
- }
- }
- Type ? TypesParam->push_back(Type) : ScopesParam->push_back(Scope);
+ TypesParam->push_back(Type->getTypeAsType());
+ else if (Type->getIsKindScope())
+ ScopesParam->push_back(Type->getTypeAsScope());
} else
TypesParam->push_back(Type);
}
@@ -330,6 +324,13 @@ LVElement *LVTypeDefinition::getUnderlyingType() {
}
void LVTypeDefinition::resolveExtra() {
+ // In the case of CodeView, the MSVC toolset generates a series of typedefs
+ // that refer to internal runtime structures, that we do not process. Those
+ // typedefs are marked as 'system'. They have an associated logical type,
+ // but the underlying type always is null.
+ if (getIsSystem())
+ return;
+
// Set the reference to the typedef type.
if (options().getAttributeUnderlying()) {
setUnderlyingType(getUnderlyingType());
diff --git a/llvm/lib/DebugInfo/LogicalView/LVReaderHandler.cpp b/llvm/lib/DebugInfo/LogicalView/LVReaderHandler.cpp
index 35dc30fd601f..5f82f816dc19 100644
--- a/llvm/lib/DebugInfo/LogicalView/LVReaderHandler.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/LVReaderHandler.cpp
@@ -11,8 +11,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/LogicalView/LVReaderHandler.h"
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
#include "llvm/DebugInfo/LogicalView/Core/LVCompare.h"
+#include "llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h"
#include "llvm/DebugInfo/LogicalView/Readers/LVELFReader.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/PDB.h"
+#include "llvm/Object/COFF.h"
using namespace llvm;
using namespace llvm::object;
@@ -32,31 +37,36 @@ Error LVReaderHandler::process() {
return Error::success();
}
-void LVReaderHandler::destroyReaders() {
- LLVM_DEBUG(dbgs() << "destroyReaders\n");
- for (const LVReader *Reader : TheReaders)
- delete Reader;
-}
-
Error LVReaderHandler::createReader(StringRef Filename, LVReaders &Readers,
PdbOrObj &Input, StringRef FileFormatName,
StringRef ExePath) {
- auto CreateOneReader = [&]() -> LVReader * {
- if (Input.is<ObjectFile *>()) {
- ObjectFile &Obj = *Input.get<ObjectFile *>();
+ auto CreateOneReader = [&]() -> std::unique_ptr<LVReader> {
+ if (isa<ObjectFile *>(Input)) {
+ ObjectFile &Obj = *cast<ObjectFile *>(Input);
+ if (Obj.isCOFF()) {
+ COFFObjectFile *COFF = cast<COFFObjectFile>(&Obj);
+ return std::make_unique<LVCodeViewReader>(Filename, FileFormatName,
+ *COFF, W, ExePath);
+ }
if (Obj.isELF() || Obj.isMachO())
- return new LVELFReader(Filename, FileFormatName, Obj, W);
+ return std::make_unique<LVELFReader>(Filename, FileFormatName, Obj, W);
+ }
+ if (isa<PDBFile *>(Input)) {
+ PDBFile &Pdb = *cast<PDBFile *>(Input);
+ return std::make_unique<LVCodeViewReader>(Filename, FileFormatName, Pdb,
+ W, ExePath);
}
return nullptr;
};
- LVReader *Reader = CreateOneReader();
- if (!Reader)
+ std::unique_ptr<LVReader> ReaderObj = CreateOneReader();
+ if (!ReaderObj)
return createStringError(errc::invalid_argument,
"unable to create reader for: '%s'",
Filename.str().c_str());
- Readers.push_back(Reader);
+ LVReader *Reader = ReaderObj.get();
+ Readers.emplace_back(std::move(ReaderObj));
return Reader->doLoad();
}
@@ -81,8 +91,102 @@ Error LVReaderHandler::handleArchive(LVReaders &Readers, StringRef Filename,
return Error::success();
}
+// Search for a matching executable image for the given PDB path.
+static std::string searchForExe(const StringRef Path,
+ const StringRef Extension) {
+ SmallString<128> ExePath(Path);
+ llvm::sys::path::replace_extension(ExePath, Extension);
+
+ std::unique_ptr<IPDBSession> Session;
+ if (Error Err = loadDataForEXE(PDB_ReaderType::Native, ExePath, Session)) {
+ consumeError(std::move(Err));
+ return {};
+ }
+ // We have a candidate for the executable image.
+ Expected<std::string> PdbPathOrErr = NativeSession::searchForPdb({ExePath});
+ if (!PdbPathOrErr) {
+ consumeError(PdbPathOrErr.takeError());
+ return {};
+ }
+ // Convert any Windows backslashes into forward slashes to get the path.
+ std::string ConvertedPath = sys::path::convert_to_slash(
+ PdbPathOrErr.get(), sys::path::Style::windows);
+ if (ConvertedPath == Path)
+ return std::string(ExePath);
+
+ return {};
+}
+
+// Search for a matching object image for the given PDB path.
+static std::string searchForObj(const StringRef Path,
+ const StringRef Extension) {
+ SmallString<128> ObjPath(Path);
+ llvm::sys::path::replace_extension(ObjPath, Extension);
+ if (llvm::sys::fs::exists(ObjPath)) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
+ MemoryBuffer::getFileOrSTDIN(ObjPath);
+ if (!BuffOrErr)
+ return {};
+ return std::string(ObjPath);
+ }
+
+ return {};
+}
+
Error LVReaderHandler::handleBuffer(LVReaders &Readers, StringRef Filename,
MemoryBufferRef Buffer, StringRef ExePath) {
+ // As PDB does not support the Binary interface, at this point we can check
+ // if the buffer corresponds to a PDB or PE file.
+ file_magic FileMagic = identify_magic(Buffer.getBuffer());
+ if (FileMagic == file_magic::pdb) {
+ if (!ExePath.empty())
+ return handleObject(Readers, Filename, Buffer.getBuffer(), ExePath);
+
+ // Search in the directory derived from the given 'Filename' for a
+ // matching object file (.o, .obj, .lib) or a matching executable file
+ // (.exe/.dll) and try to create the reader based on the matched file.
+ // If no matching file is found then we load the original PDB file.
+ std::vector<StringRef> ExecutableExtensions = {"exe", "dll"};
+ for (StringRef Extension : ExecutableExtensions) {
+ std::string ExecutableImage = searchForExe(Filename, Extension);
+ if (ExecutableImage.empty())
+ continue;
+ if (Error Err = handleObject(Readers, Filename, Buffer.getBuffer(),
+ ExecutableImage)) {
+ consumeError(std::move(Err));
+ continue;
+ }
+ return Error::success();
+ }
+
+ std::vector<StringRef> ObjectExtensions = {"o", "obj", "lib"};
+ for (StringRef Extension : ObjectExtensions) {
+ std::string ObjectImage = searchForObj(Filename, Extension);
+ if (ObjectImage.empty())
+ continue;
+ if (Error Err = handleFile(Readers, ObjectImage)) {
+ consumeError(std::move(Err));
+ continue;
+ }
+ return Error::success();
+ }
+
+ // No matching executable/object image was found. Load the given PDB.
+ return handleObject(Readers, Filename, Buffer.getBuffer(), ExePath);
+ }
+ if (FileMagic == file_magic::pecoff_executable) {
+ // If we have a valid executable, try to find a matching PDB file.
+ Expected<std::string> PdbPath = NativeSession::searchForPdb({Filename});
+ if (errorToErrorCode(PdbPath.takeError())) {
+ return createStringError(
+ errc::not_supported,
+ "Binary object format in '%s' does not have debug info.",
+ Filename.str().c_str());
+ }
+ // Process the matching PDB file and pass the executable filename.
+ return handleFile(Readers, PdbPath.get(), Filename);
+ }
+
Expected<std::unique_ptr<Binary>> BinOrErr = createBinary(Buffer);
if (errorToErrorCode(BinOrErr.takeError())) {
return createStringError(errc::not_supported,
@@ -139,7 +243,7 @@ Error LVReaderHandler::handleObject(LVReaders &Readers, StringRef Filename,
Binary &Binary) {
if (PdbOrObj Input = dyn_cast<ObjectFile>(&Binary))
return createReader(Filename, Readers, Input,
- Input.get<ObjectFile *>()->getFileFormatName());
+ cast<ObjectFile *>(Input)->getFileFormatName());
if (MachOUniversalBinary *Fat = dyn_cast<MachOUniversalBinary>(&Binary))
return handleMach(Readers, Filename, *Fat);
@@ -152,13 +256,32 @@ Error LVReaderHandler::handleObject(LVReaders &Readers, StringRef Filename,
Filename.str().c_str());
}
+Error LVReaderHandler::handleObject(LVReaders &Readers, StringRef Filename,
+ StringRef Buffer, StringRef ExePath) {
+ std::unique_ptr<IPDBSession> Session;
+ if (Error Err = loadDataForPDB(PDB_ReaderType::Native, Filename, Session))
+ return createStringError(errorToErrorCode(std::move(Err)), "%s",
+ Filename.str().c_str());
+
+ std::unique_ptr<NativeSession> PdbSession;
+ PdbSession.reset(static_cast<NativeSession *>(Session.release()));
+ PdbOrObj Input = &PdbSession->getPDBFile();
+ StringRef FileFormatName;
+ size_t Pos = Buffer.find_first_of("\r\n");
+ if (Pos)
+ FileFormatName = Buffer.substr(0, Pos - 1);
+ return createReader(Filename, Readers, Input, FileFormatName, ExePath);
+}
+
Error LVReaderHandler::createReaders() {
LLVM_DEBUG(dbgs() << "createReaders\n");
for (std::string &Object : Objects) {
LVReaders Readers;
if (Error Err = createReader(Object, Readers))
return Err;
- TheReaders.insert(TheReaders.end(), Readers.begin(), Readers.end());
+ TheReaders.insert(TheReaders.end(),
+ std::make_move_iterator(Readers.begin()),
+ std::make_move_iterator(Readers.end()));
}
return Error::success();
@@ -167,7 +290,7 @@ Error LVReaderHandler::createReaders() {
Error LVReaderHandler::printReaders() {
LLVM_DEBUG(dbgs() << "printReaders\n");
if (options().getPrintExecute())
- for (LVReader *Reader : TheReaders)
+ for (const std::unique_ptr<LVReader> &Reader : TheReaders)
if (Error Err = Reader->doPrint())
return Err;
@@ -182,7 +305,8 @@ Error LVReaderHandler::compareReaders() {
size_t ViewPairs = ReadersCount / 2;
LVCompare Compare(OS);
for (size_t Pair = 0, Index = 0; Pair < ViewPairs; ++Pair) {
- if (Error Err = Compare.execute(TheReaders[Index], TheReaders[Index + 1]))
+ if (Error Err = Compare.execute(TheReaders[Index].get(),
+ TheReaders[Index + 1].get()))
return Err;
Index += 2;
}
diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp
index b654c624f57c..a0cd8b7839cf 100644
--- a/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVBinaryReader.cpp
@@ -190,6 +190,61 @@ void LVBinaryReader::mapVirtualAddress(const object::ObjectFile &Obj) {
});
}
+void LVBinaryReader::mapVirtualAddress(const object::COFFObjectFile &COFFObj) {
+ ErrorOr<uint64_t> ImageBase = COFFObj.getImageBase();
+ if (ImageBase)
+ ImageBaseAddress = ImageBase.get();
+
+ LLVM_DEBUG({
+ dbgs() << "ImageBaseAddress: " << hexValue(ImageBaseAddress) << "\n";
+ });
+
+ uint32_t Flags = COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_LNK_COMDAT;
+
+ for (const object::SectionRef &Section : COFFObj.sections()) {
+ if (!Section.isText() || Section.isVirtual() || !Section.getSize())
+ continue;
+
+ const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
+ VirtualAddress = COFFSection->VirtualAddress;
+ bool IsComdat = (COFFSection->Characteristics & Flags) == Flags;
+
+ // Record section information required for symbol resolution.
+ // Note: The section index returned by 'getIndex()' is zero based.
+ Sections.emplace(Section.getIndex() + 1, Section);
+ addSectionAddress(Section);
+
+ // Additional initialization on the specific object format.
+ mapRangeAddress(COFFObj, Section, IsComdat);
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "\nSections Information:\n";
+ for (LVSections::reference Entry : Sections) {
+ LVSectionIndex SectionIndex = Entry.first;
+ const object::SectionRef Section = Entry.second;
+ const object::coff_section *COFFSection = COFFObj.getCOFFSection(Section);
+ Expected<StringRef> SectionNameOrErr = Section.getName();
+ if (!SectionNameOrErr)
+ consumeError(SectionNameOrErr.takeError());
+ dbgs() << "\nIndex: " << format_decimal(SectionIndex, 3)
+ << " Name: " << *SectionNameOrErr << "\n"
+ << "Size: " << hexValue(Section.getSize()) << "\n"
+ << "VirtualAddress: " << hexValue(VirtualAddress) << "\n"
+ << "SectionAddress: " << hexValue(Section.getAddress()) << "\n"
+ << "PointerToRawData: " << hexValue(COFFSection->PointerToRawData)
+ << "\n"
+ << "SizeOfRawData: " << hexValue(COFFSection->SizeOfRawData)
+ << "\n";
+ }
+ dbgs() << "\nObject Section Information:\n";
+ for (LVSectionAddresses::const_reference Entry : SectionAddresses)
+ dbgs() << "[" << hexValue(Entry.first) << ":"
+ << hexValue(Entry.first + Entry.second.getSize())
+ << "] Size: " << hexValue(Entry.second.getSize()) << "\n";
+ });
+}
+
Error LVBinaryReader::loadGenericTargetInfo(StringRef TheTriple,
StringRef TheFeatures) {
std::string TargetLookupError;
@@ -297,29 +352,16 @@ void LVBinaryReader::addSectionRange(LVSectionIndex SectionIndex,
}
LVRange *LVBinaryReader::getSectionRanges(LVSectionIndex SectionIndex) {
- LVRange *Range = nullptr;
// Check if we already have a mapping for this section index.
LVSectionRanges::iterator IterSection = SectionRanges.find(SectionIndex);
- if (IterSection == SectionRanges.end()) {
- Range = new LVRange();
- SectionRanges.emplace(SectionIndex, Range);
- } else {
- Range = IterSection->second;
- }
+ if (IterSection == SectionRanges.end())
+ IterSection =
+ SectionRanges.emplace(SectionIndex, std::make_unique<LVRange>()).first;
+ LVRange *Range = IterSection->second.get();
assert(Range && "Range is null.");
return Range;
}
-LVBinaryReader::~LVBinaryReader() {
- // Delete the lines created by 'createInstructions'.
- std::vector<LVLines *> AllInstructionLines = ScopeInstructions.find();
- for (LVLines *Entry : AllInstructionLines)
- delete Entry;
- // Delete the ranges created by 'getSectionRanges'.
- for (LVSectionRanges::reference Entry : SectionRanges)
- delete Entry.second;
-}
-
Error LVBinaryReader::createInstructions(LVScope *Scope,
LVSectionIndex SectionIndex,
const LVNameInfo &NameInfo) {
@@ -380,7 +422,9 @@ Error LVBinaryReader::createInstructions(LVScope *Scope,
// Address for first instruction line.
LVAddress FirstAddress = Address;
- LVLines *Instructions = new LVLines();
+ auto InstructionsSP = std::make_unique<LVLines>();
+ LVLines &Instructions = *InstructionsSP;
+ DiscoveredLines.emplace_back(std::move(InstructionsSP));
while (Begin < End) {
MCInst Instruction;
@@ -399,7 +443,7 @@ Error LVBinaryReader::createInstructions(LVScope *Scope,
break;
case MCDisassembler::SoftFail:
LLVM_DEBUG({ dbgs() << "Potentially undefined instruction:"; });
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case MCDisassembler::Success: {
std::string Buffer;
raw_string_ostream Stream(Buffer);
@@ -422,10 +466,10 @@ Error LVBinaryReader::createInstructions(LVScope *Scope,
// the 'processLines()' function will move each created logical line
// to its enclosing logical scope, using the debug ranges information
// and they will be released when its scope parent is deleted.
- LVLineAssembler *Line = new LVLineAssembler();
+ LVLineAssembler *Line = createLineAssembler();
Line->setAddress(Address);
Line->setName(StringRef(Stream.str()).trim());
- Instructions->push_back(Line);
+ Instructions.push_back(Line);
break;
}
}
@@ -439,15 +483,15 @@ Error LVBinaryReader::createInstructions(LVScope *Scope,
<< " Scope DIE: " << hexValue(Scope->getOffset()) << "\n"
<< "Address: " << hexValue(FirstAddress)
<< format(" - Collected instructions lines: %d\n",
- Instructions->size());
- for (const LVLine *Line : *Instructions)
+ Instructions.size());
+ for (const LVLine *Line : Instructions)
dbgs() << format_decimal(++Index, 5) << ": "
<< hexValue(Line->getOffset()) << ", (" << Line->getName()
<< ")\n";
});
// The scope in the assembler names is linked to its own instructions.
- ScopeInstructions.add(SectionIndex, Scope, Instructions);
+ ScopeInstructions.add(SectionIndex, Scope, &Instructions);
AssemblerMappings.add(SectionIndex, FirstAddress, Scope);
return Error::success();
@@ -815,6 +859,80 @@ void LVBinaryReader::processLines(LVLines *DebugLines,
}
}
+// Traverse the scopes for the given 'Function' looking for any inlined
+// scopes with inlined lines, which are found in 'CUInlineeLines'.
+void LVBinaryReader::includeInlineeLines(LVSectionIndex SectionIndex,
+ LVScope *Function) {
+ SmallVector<LVInlineeLine::iterator> InlineeIters;
+ std::function<void(LVScope * Parent)> FindInlinedScopes =
+ [&](LVScope *Parent) {
+ if (const LVScopes *Scopes = Parent->getScopes())
+ for (LVScope *Scope : *Scopes) {
+ LVInlineeLine::iterator Iter = CUInlineeLines.find(Scope);
+ if (Iter != CUInlineeLines.end())
+ InlineeIters.push_back(Iter);
+ FindInlinedScopes(Scope);
+ }
+ };
+
+ // Find all inlined scopes for the given 'Function'.
+ FindInlinedScopes(Function);
+ for (LVInlineeLine::iterator InlineeIter : InlineeIters) {
+ LVScope *Scope = InlineeIter->first;
+ addToSymbolTable(Scope->getLinkageName(), Scope, SectionIndex);
+
+ // TODO: Convert this into a reference.
+ LVLines *InlineeLines = InlineeIter->second.get();
+ LLVM_DEBUG({
+ dbgs() << "Inlined lines for: " << Scope->getName() << "\n";
+ for (const LVLine *Line : *InlineeLines)
+ dbgs() << "[" << hexValue(Line->getAddress()) << "] "
+ << Line->getLineNumber() << "\n";
+ dbgs() << format("Debug lines: %d\n", CULines.size());
+ for (const LVLine *Line : CULines)
+ dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
+ << Line->getLineNumber() << ")\n";
+ ;
+ });
+
+ // The inlined lines must be merged using its address, in order to keep
+ // the real order of the instructions. The inlined lines are mixed with
+ // the other non-inlined lines.
+ if (InlineeLines->size()) {
+ // First address of inlinee code.
+ uint64_t InlineeStart = (InlineeLines->front())->getAddress();
+ LVLines::iterator Iter = std::find_if(
+ CULines.begin(), CULines.end(), [&](LVLine *Item) -> bool {
+ return Item->getAddress() == InlineeStart;
+ });
+ if (Iter != CULines.end()) {
+ // 'Iter' points to the line where the inlined function is called.
+ // Emulate the DW_AT_call_line attribute.
+ Scope->setCallLineNumber((*Iter)->getLineNumber());
+ // Mark the referenced line as the start of the inlined function.
+ // Skip the first line during the insertion, as the address and
+ // line number as the same. Otherwise we have to erase and insert.
+ (*Iter)->setLineNumber((*InlineeLines->begin())->getLineNumber());
+ ++Iter;
+ CULines.insert(Iter, InlineeLines->begin() + 1, InlineeLines->end());
+ }
+ }
+
+ // Remove this set of lines from the container; each inlined function
+ // creates an unique set of lines. Remove only the created container.
+ CUInlineeLines.erase(InlineeIter);
+ InlineeLines->clear();
+ }
+ LLVM_DEBUG({
+ dbgs() << "Merged Inlined lines for: " << Function->getName() << "\n";
+ dbgs() << format("Debug lines: %d\n", CULines.size());
+ for (const LVLine *Line : CULines)
+ dbgs() << "Line address: " << hexValue(Line->getOffset()) << ", ("
+ << Line->getLineNumber() << ")\n";
+ ;
+ });
+}
+
void LVBinaryReader::print(raw_ostream &OS) const {
OS << "LVBinaryReader\n";
LLVM_DEBUG(dbgs() << "PrintReader\n");
diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp
new file mode 100644
index 000000000000..d72fe2683f92
--- /dev/null
+++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewReader.cpp
@@ -0,0 +1,1221 @@
+//===-- LVCodeViewReader.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the LVCodeViewReader class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h"
+#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVLine.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVScope.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVSymbol.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVType.h"
+#include "llvm/DebugInfo/PDB/GenericError.h"
+#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
+#include "llvm/DebugInfo/PDB/Native/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Native/LinePrinter.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Native/SymbolStream.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FormatAdapters.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/WithColor.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::logicalview;
+using namespace llvm::msf;
+using namespace llvm::object;
+using namespace llvm::pdb;
+
+#define DEBUG_TYPE "CodeViewReader"
+
+StringRef LVCodeViewReader::getSymbolKindName(SymbolKind Kind) {
+ switch (Kind) {
+#define SYMBOL_RECORD(EnumName, EnumVal, Name) \
+ case EnumName: \
+ return #EnumName;
+#include "llvm/DebugInfo/CodeView/CodeViewSymbols.def"
+ default:
+ return "UnknownSym";
+ }
+ llvm_unreachable("Unknown SymbolKind::Kind");
+}
+
+std::string LVCodeViewReader::formatRegisterId(RegisterId Register,
+ CPUType CPU) {
+#define RETURN_CASE(Enum, X, Ret) \
+ case Enum::X: \
+ return Ret;
+
+ if (CPU == CPUType::ARMNT) {
+ switch (Register) {
+#define CV_REGISTERS_ARM
+#define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name)
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_ARM
+
+ default:
+ break;
+ }
+ } else if (CPU == CPUType::ARM64) {
+ switch (Register) {
+#define CV_REGISTERS_ARM64
+#define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name)
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_ARM64
+
+ default:
+ break;
+ }
+ } else {
+ switch (Register) {
+#define CV_REGISTERS_X86
+#define CV_REGISTER(name, val) RETURN_CASE(RegisterId, name, #name)
+#include "llvm/DebugInfo/CodeView/CodeViewRegisters.def"
+#undef CV_REGISTER
+#undef CV_REGISTERS_X86
+
+ default:
+ break;
+ }
+ }
+ return "formatUnknownEnum(Id)";
+}
+
+void LVCodeViewReader::printRelocatedField(StringRef Label,
+ const coff_section *CoffSection,
+ uint32_t RelocOffset,
+ uint32_t Offset,
+ StringRef *RelocSym) {
+ StringRef SymStorage;
+ StringRef &Symbol = RelocSym ? *RelocSym : SymStorage;
+ if (!resolveSymbolName(CoffSection, RelocOffset, Symbol))
+ W.printSymbolOffset(Label, Symbol, Offset);
+ else
+ W.printHex(Label, RelocOffset);
+}
+
+void LVCodeViewReader::getLinkageName(const coff_section *CoffSection,
+ uint32_t RelocOffset, uint32_t Offset,
+ StringRef *RelocSym) {
+ StringRef SymStorage;
+ StringRef &Symbol = RelocSym ? *RelocSym : SymStorage;
+ if (resolveSymbolName(CoffSection, RelocOffset, Symbol))
+ Symbol = "";
+}
+
+Expected<StringRef>
+LVCodeViewReader::getFileNameForFileOffset(uint32_t FileOffset,
+ const SymbolGroup *SG) {
+ if (SG) {
+ Expected<StringRef> Filename = SG->getNameFromChecksums(FileOffset);
+ if (!Filename) {
+ consumeError(Filename.takeError());
+ return StringRef("");
+ }
+ return *Filename;
+ }
+
+ // The file checksum subsection should precede all references to it.
+ if (!CVFileChecksumTable.valid() || !CVStringTable.valid())
+ return createStringError(object_error::parse_failed, getFileName());
+
+ VarStreamArray<FileChecksumEntry>::Iterator Iter =
+ CVFileChecksumTable.getArray().at(FileOffset);
+
+ // Check if the file checksum table offset is valid.
+ if (Iter == CVFileChecksumTable.end())
+ return createStringError(object_error::parse_failed, getFileName());
+
+ Expected<StringRef> NameOrErr = CVStringTable.getString(Iter->FileNameOffset);
+ if (!NameOrErr)
+ return createStringError(object_error::parse_failed, getFileName());
+ return *NameOrErr;
+}
+
+Error LVCodeViewReader::printFileNameForOffset(StringRef Label,
+ uint32_t FileOffset,
+ const SymbolGroup *SG) {
+ Expected<StringRef> NameOrErr = getFileNameForFileOffset(FileOffset, SG);
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ W.printHex(Label, *NameOrErr, FileOffset);
+ return Error::success();
+}
+
+void LVCodeViewReader::cacheRelocations() {
+ for (const SectionRef &Section : getObj().sections()) {
+ const coff_section *CoffSection = getObj().getCOFFSection(Section);
+
+ for (const RelocationRef &Relocacion : Section.relocations())
+ RelocMap[CoffSection].push_back(Relocacion);
+
+ // Sort relocations by address.
+ llvm::sort(RelocMap[CoffSection], [](RelocationRef L, RelocationRef R) {
+ return L.getOffset() < R.getOffset();
+ });
+ }
+}
+
+// Given a section and an offset into this section the function returns the
+// symbol used for the relocation at the offset.
+Error LVCodeViewReader::resolveSymbol(const coff_section *CoffSection,
+ uint64_t Offset, SymbolRef &Sym) {
+ const auto &Relocations = RelocMap[CoffSection];
+ basic_symbol_iterator SymI = getObj().symbol_end();
+ for (const RelocationRef &Relocation : Relocations) {
+ uint64_t RelocationOffset = Relocation.getOffset();
+
+ if (RelocationOffset == Offset) {
+ SymI = Relocation.getSymbol();
+ break;
+ }
+ }
+ if (SymI == getObj().symbol_end())
+ return make_error<StringError>("Unknown Symbol", inconvertibleErrorCode());
+ Sym = *SymI;
+ return ErrorSuccess();
+}
+
+// Given a section and an offset into this section the function returns the
+// name of the symbol used for the relocation at the offset.
+Error LVCodeViewReader::resolveSymbolName(const coff_section *CoffSection,
+ uint64_t Offset, StringRef &Name) {
+ SymbolRef Symbol;
+ if (Error E = resolveSymbol(CoffSection, Offset, Symbol))
+ return E;
+ Expected<StringRef> NameOrErr = Symbol.getName();
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ Name = *NameOrErr;
+ return ErrorSuccess();
+}
+
+// CodeView and DWARF can have references to compiler generated elements,
+// used for initialization. The MSVC includes in the PDBs, internal compile
+// units, associated with the MS runtime support. We mark them as 'system'
+// and they are printed only if the command line option 'internal=system'.
+bool LVCodeViewReader::isSystemEntry(LVElement *Element, StringRef Name) const {
+ Name = Name.empty() ? Element->getName() : Name;
+ auto Find = [=](const char *String) -> bool {
+ return StringRef::npos != Name.find(String);
+ };
+ auto Starts = [=](const char *Pattern) -> bool {
+ return Name.startswith(Pattern);
+ };
+ auto CheckExclude = [&]() -> bool {
+ if (Starts("__") || Starts("_PMD") || Starts("_PMFN"))
+ return true;
+ if (Find("_s__"))
+ return true;
+ if (Find("_CatchableType") || Find("_TypeDescriptor"))
+ return true;
+ if (Find("Intermediate\\vctools"))
+ return true;
+ if (Find("$initializer$") || Find("dynamic initializer"))
+ return true;
+ if (Find("`vftable'") || Find("_GLOBAL__sub"))
+ return true;
+ return false;
+ };
+ bool Excluded = CheckExclude();
+ if (Excluded)
+ Element->setIsSystem();
+
+ return Excluded;
+}
+
+Error LVCodeViewReader::collectInlineeInfo(
+ DebugInlineeLinesSubsectionRef &Lines, const llvm::pdb::SymbolGroup *SG) {
+ for (const InlineeSourceLine &Line : Lines) {
+ TypeIndex TIInlinee = Line.Header->Inlinee;
+ uint32_t LineNumber = Line.Header->SourceLineNum;
+ uint32_t FileOffset = Line.Header->FileID;
+ LLVM_DEBUG({
+ DictScope S(W, "InlineeSourceLine");
+ LogicalVisitor.printTypeIndex("Inlinee", TIInlinee, StreamTPI);
+ if (Error Err = printFileNameForOffset("FileID", FileOffset, SG))
+ return Err;
+ W.printNumber("SourceLineNum", LineNumber);
+
+ if (Lines.hasExtraFiles()) {
+ W.printNumber("ExtraFileCount", Line.ExtraFiles.size());
+ ListScope ExtraFiles(W, "ExtraFiles");
+ for (const ulittle32_t &FID : Line.ExtraFiles)
+ if (Error Err = printFileNameForOffset("FileID", FID, SG))
+ return Err;
+ }
+ });
+ Expected<StringRef> NameOrErr = getFileNameForFileOffset(FileOffset, SG);
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ LogicalVisitor.addInlineeInfo(TIInlinee, LineNumber, *NameOrErr);
+ }
+
+ return Error::success();
+}
+
+Error LVCodeViewReader::traverseInlineeLines(StringRef Subsection) {
+ BinaryStreamReader SR(Subsection, llvm::support::little);
+ DebugInlineeLinesSubsectionRef Lines;
+ if (Error E = Lines.initialize(SR))
+ return createStringError(errorToErrorCode(std::move(E)), getFileName());
+
+ return collectInlineeInfo(Lines);
+}
+
+Error LVCodeViewReader::createLines(
+ const FixedStreamArray<LineNumberEntry> &LineNumbers, LVAddress Addendum,
+ uint32_t Segment, uint32_t Begin, uint32_t Size, uint32_t NameIndex,
+ const SymbolGroup *SG) {
+ LLVM_DEBUG({
+ uint32_t End = Begin + Size;
+ W.getOStream() << formatv("{0:x-4}:{1:x-8}-{2:x-8}\n", Segment, Begin, End);
+ });
+
+ for (const LineNumberEntry &Line : LineNumbers) {
+ if (Line.Offset >= Size)
+ return createStringError(object_error::parse_failed, getFileName());
+
+ LineInfo LI(Line.Flags);
+
+ LLVM_DEBUG({
+ W.getOStream() << formatv(
+ "{0} {1:x-8}\n", utostr(LI.getStartLine()),
+ fmt_align(Begin + Line.Offset, AlignStyle::Right, 8, '0'));
+ });
+
+ // The 'processLines()' function will move each created logical line
+ // to its enclosing logical scope, using the debug ranges information
+ // and they will be released when its scope parent is deleted.
+ LVLineDebug *LineDebug = createLineDebug();
+ CULines.push_back(LineDebug);
+ LVAddress Address = linearAddress(Segment, Begin + Line.Offset);
+ LineDebug->setAddress(Address + Addendum);
+
+ if (LI.isAlwaysStepInto())
+ LineDebug->setIsAlwaysStepInto();
+ else if (LI.isNeverStepInto())
+ LineDebug->setIsNeverStepInto();
+ else
+ LineDebug->setLineNumber(LI.getStartLine());
+
+ if (LI.isStatement())
+ LineDebug->setIsNewStatement();
+
+ Expected<StringRef> NameOrErr = getFileNameForFileOffset(NameIndex, SG);
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ LineDebug->setFilename(*NameOrErr);
+ }
+
+ return Error::success();
+}
+
+Error LVCodeViewReader::initializeFileAndStringTables(
+ BinaryStreamReader &Reader) {
+ while (Reader.bytesRemaining() > 0 &&
+ (!CVFileChecksumTable.valid() || !CVStringTable.valid())) {
+ // The section consists of a number of subsection in the following format:
+ // |SubSectionType|SubSectionSize|Contents...|
+ uint32_t SubType, SubSectionSize;
+
+ if (Error E = Reader.readInteger(SubType))
+ return createStringError(errorToErrorCode(std::move(E)), getFileName());
+ if (Error E = Reader.readInteger(SubSectionSize))
+ return createStringError(errorToErrorCode(std::move(E)), getFileName());
+
+ StringRef Contents;
+ if (Error E = Reader.readFixedString(Contents, SubSectionSize))
+ return createStringError(errorToErrorCode(std::move(E)), getFileName());
+
+ BinaryStreamRef ST(Contents, support::little);
+ switch (DebugSubsectionKind(SubType)) {
+ case DebugSubsectionKind::FileChecksums:
+ if (Error E = CVFileChecksumTable.initialize(ST))
+ return createStringError(errorToErrorCode(std::move(E)), getFileName());
+ break;
+ case DebugSubsectionKind::StringTable:
+ if (Error E = CVStringTable.initialize(ST))
+ return createStringError(errorToErrorCode(std::move(E)), getFileName());
+ break;
+ default:
+ break;
+ }
+
+ uint32_t PaddedSize = alignTo(SubSectionSize, 4);
+ if (Error E = Reader.skip(PaddedSize - SubSectionSize))
+ return createStringError(errorToErrorCode(std::move(E)), getFileName());
+ }
+
+ return Error::success();
+}
+
+Error LVCodeViewReader::loadTypeServer(TypeServer2Record &TS) {
+ LLVM_DEBUG({
+ W.printString("Guid", formatv("{0}", TS.getGuid()).str());
+ W.printNumber("Age", TS.getAge());
+ W.printString("Name", TS.getName());
+ });
+
+ SmallString<128> ServerName(TS.getName());
+ BuffOrErr = MemoryBuffer::getFile(ServerName);
+ if (BuffOrErr.getError()) {
+ // The server name does not exist. Try in the same directory as the
+ // input file.
+ ServerName = createAlternativePath(ServerName);
+ BuffOrErr = MemoryBuffer::getFile(ServerName);
+ if (BuffOrErr.getError()) {
+ // For the error message, use the original type server name.
+ return createStringError(errc::bad_file_descriptor,
+ "File '%s' does not exist.",
+ TS.getName().str().c_str());
+ }
+ }
+ MemBuffer = std::move(BuffOrErr.get());
+
+ // Check if the buffer corresponds to a PDB file.
+ assert(identify_magic((*MemBuffer).getBuffer()) == file_magic::pdb &&
+ "Invalid PDB file.");
+
+ if (Error Err = loadDataForPDB(PDB_ReaderType::Native, ServerName, Session))
+ return createStringError(errorToErrorCode(std::move(Err)), "%s",
+ ServerName.c_str());
+
+ PdbSession.reset(static_cast<NativeSession *>(Session.release()));
+ PDBFile &Pdb = PdbSession->getPDBFile();
+
+ // Just because a file with a matching name was found and it was an actual
+ // PDB file doesn't mean it matches. For it to match the InfoStream's GUID
+ // must match the GUID specified in the TypeServer2 record.
+ Expected<InfoStream &> expectedInfo = Pdb.getPDBInfoStream();
+ if (!expectedInfo || expectedInfo->getGuid() != TS.getGuid())
+ return createStringError(errc::invalid_argument, "signature_out_of_date");
+
+ // The reader needs to switch to a type server, to process the types from
+ // the server. We need to keep the original input source, as reading other
+ // sections will require the input associated with the loaded object file.
+ TypeServer = std::make_shared<InputFile>(&Pdb);
+ LogicalVisitor.setInput(TypeServer);
+
+ LazyRandomTypeCollection &Types = types();
+ LazyRandomTypeCollection &Ids = ids();
+ if (Error Err = traverseTypes(Pdb, Types, Ids))
+ return Err;
+
+ return Error::success();
+}
+
+Error LVCodeViewReader::loadPrecompiledObject(PrecompRecord &Precomp,
+ CVTypeArray &CVTypesObj) {
+ LLVM_DEBUG({
+ W.printHex("Count", Precomp.getTypesCount());
+ W.printHex("Signature", Precomp.getSignature());
+ W.printString("PrecompFile", Precomp.getPrecompFilePath());
+ });
+
+ SmallString<128> ServerName(Precomp.getPrecompFilePath());
+ BuffOrErr = MemoryBuffer::getFile(ServerName);
+ if (BuffOrErr.getError()) {
+ // The server name does not exist. Try in the directory as the input file.
+ ServerName = createAlternativePath(ServerName);
+ if (BuffOrErr.getError()) {
+ // For the error message, use the original type server name.
+ return createStringError(errc::bad_file_descriptor,
+ "File '%s' does not exist.",
+ Precomp.getPrecompFilePath().str().c_str());
+ }
+ }
+ MemBuffer = std::move(BuffOrErr.get());
+
+ Expected<std::unique_ptr<Binary>> BinOrErr = createBinary(*MemBuffer);
+ if (errorToErrorCode(BinOrErr.takeError()))
+ return createStringError(errc::not_supported,
+ "Binary object format in '%s' is not supported.",
+ ServerName.c_str());
+
+ Binary &BinaryObj = *BinOrErr.get();
+ if (!BinaryObj.isCOFF())
+ return createStringError(errc::not_supported, "'%s' is not a COFF object.",
+ ServerName.c_str());
+
+ Builder = std::make_unique<AppendingTypeTableBuilder>(BuilderAllocator);
+
+ // The MSVC precompiled header object file, should contain just a single
+ // ".debug$P" section.
+ COFFObjectFile &Obj = *cast<COFFObjectFile>(&BinaryObj);
+ for (const SectionRef &Section : Obj.sections()) {
+ Expected<StringRef> SectionNameOrErr = Section.getName();
+ if (!SectionNameOrErr)
+ return SectionNameOrErr.takeError();
+ if (*SectionNameOrErr == ".debug$P") {
+ Expected<StringRef> DataOrErr = Section.getContents();
+ if (!DataOrErr)
+ return DataOrErr.takeError();
+ uint32_t Magic;
+ if (Error Err = consume(*DataOrErr, Magic))
+ return Err;
+ if (Magic != COFF::DEBUG_SECTION_MAGIC)
+ return errorCodeToError(object_error::parse_failed);
+
+ ReaderPrecomp =
+ std::make_unique<BinaryStreamReader>(*DataOrErr, support::little);
+ cantFail(
+ ReaderPrecomp->readArray(CVTypesPrecomp, ReaderPrecomp->getLength()));
+
+ // Append all the type records up to the LF_ENDPRECOMP marker and
+ // check if the signatures match.
+ for (const CVType &Type : CVTypesPrecomp) {
+ ArrayRef<uint8_t> TypeData = Type.data();
+ if (Type.kind() == LF_ENDPRECOMP) {
+ EndPrecompRecord EndPrecomp = cantFail(
+ TypeDeserializer::deserializeAs<EndPrecompRecord>(TypeData));
+ if (Precomp.getSignature() != EndPrecomp.getSignature())
+ return createStringError(errc::invalid_argument, "no matching pch");
+ break;
+ }
+ Builder->insertRecordBytes(TypeData);
+ }
+ // Done processing .debug$P, break out of section loop.
+ break;
+ }
+ }
+
+ // Append all the type records, skipping the first record which is the
+ // reference to the precompiled header object information.
+ for (const CVType &Type : CVTypesObj) {
+ ArrayRef<uint8_t> TypeData = Type.data();
+ if (Type.kind() != LF_PRECOMP)
+ Builder->insertRecordBytes(TypeData);
+ }
+
+ // Set up a type stream that refers to the added type records.
+ Builder->ForEachRecord(
+ [&](TypeIndex TI, const CVType &Type) { TypeArray.push_back(Type); });
+
+ ItemStream =
+ std::make_unique<BinaryItemStream<CVType>>(llvm::support::little);
+ ItemStream->setItems(TypeArray);
+ TypeStream.setUnderlyingStream(*ItemStream);
+
+ PrecompHeader =
+ std::make_shared<LazyRandomTypeCollection>(TypeStream, TypeArray.size());
+
+ // Change the original input source to use the collected type records.
+ LogicalVisitor.setInput(PrecompHeader);
+
+ LazyRandomTypeCollection &Types = types();
+ LazyRandomTypeCollection &Ids = ids();
+ LVTypeVisitor TDV(W, &LogicalVisitor, Types, Ids, StreamTPI,
+ LogicalVisitor.getShared());
+ return visitTypeStream(Types, TDV);
+}
+
+Error LVCodeViewReader::traverseTypeSection(StringRef SectionName,
+ const SectionRef &Section) {
+ LLVM_DEBUG({
+ ListScope D(W, "CodeViewTypes");
+ W.printNumber("Section", SectionName, getObj().getSectionID(Section));
+ });
+
+ Expected<StringRef> DataOrErr = Section.getContents();
+ if (!DataOrErr)
+ return DataOrErr.takeError();
+ uint32_t Magic;
+ if (Error Err = consume(*DataOrErr, Magic))
+ return Err;
+ if (Magic != COFF::DEBUG_SECTION_MAGIC)
+ return errorCodeToError(object_error::parse_failed);
+
+ // Get the first type record. It will indicate if this object uses a type
+ // server (/Zi) or a PCH file (/Yu).
+ CVTypeArray CVTypes;
+ BinaryStreamReader Reader(*DataOrErr, support::little);
+ cantFail(Reader.readArray(CVTypes, Reader.getLength()));
+ CVTypeArray::Iterator FirstType = CVTypes.begin();
+
+ // The object was compiled with /Zi. It uses types from a type server PDB.
+ if (FirstType->kind() == LF_TYPESERVER2) {
+ TypeServer2Record TS = cantFail(
+ TypeDeserializer::deserializeAs<TypeServer2Record>(FirstType->data()));
+ return loadTypeServer(TS);
+ }
+
+ // The object was compiled with /Yc or /Yu. It uses types from another
+ // object file with a matching signature.
+ if (FirstType->kind() == LF_PRECOMP) {
+ PrecompRecord Precomp = cantFail(
+ TypeDeserializer::deserializeAs<PrecompRecord>(FirstType->data()));
+ return loadPrecompiledObject(Precomp, CVTypes);
+ }
+
+ LazyRandomTypeCollection &Types = types();
+ LazyRandomTypeCollection &Ids = ids();
+ Types.reset(*DataOrErr, 100);
+ LVTypeVisitor TDV(W, &LogicalVisitor, Types, Ids, StreamTPI,
+ LogicalVisitor.getShared());
+ return visitTypeStream(Types, TDV);
+}
+
+Error LVCodeViewReader::traverseTypes(PDBFile &Pdb,
+ LazyRandomTypeCollection &Types,
+ LazyRandomTypeCollection &Ids) {
+ // Traverse types (TPI and IPI).
+ auto VisitTypes = [&](LazyRandomTypeCollection &Types,
+ LazyRandomTypeCollection &Ids,
+ SpecialStream StreamIdx) -> Error {
+ LVTypeVisitor TDV(W, &LogicalVisitor, Types, Ids, StreamIdx,
+ LogicalVisitor.getShared());
+ return visitTypeStream(Types, TDV);
+ };
+
+ Expected<TpiStream &> StreamTpiOrErr = Pdb.getPDBTpiStream();
+ if (!StreamTpiOrErr)
+ return StreamTpiOrErr.takeError();
+ TpiStream &StreamTpi = *StreamTpiOrErr;
+ StreamTpi.buildHashMap();
+ LLVM_DEBUG({
+ W.getOStream() << formatv("Showing {0:N} TPI records\n",
+ StreamTpi.getNumTypeRecords());
+ });
+ if (Error Err = VisitTypes(Types, Ids, StreamTPI))
+ return Err;
+
+ Expected<TpiStream &> StreamIpiOrErr = Pdb.getPDBIpiStream();
+ if (!StreamIpiOrErr)
+ return StreamIpiOrErr.takeError();
+ TpiStream &StreamIpi = *StreamIpiOrErr;
+ StreamIpi.buildHashMap();
+ LLVM_DEBUG({
+ W.getOStream() << formatv("Showing {0:N} IPI records\n",
+ StreamIpi.getNumTypeRecords());
+ });
+ return VisitTypes(Ids, Ids, StreamIPI);
+}
+
+Error LVCodeViewReader::traverseSymbolsSubsection(StringRef Subsection,
+ const SectionRef &Section,
+ StringRef SectionContents) {
+ ArrayRef<uint8_t> BinaryData(Subsection.bytes_begin(),
+ Subsection.bytes_end());
+ LVSymbolVisitorDelegate VisitorDelegate(this, Section, &getObj(),
+ SectionContents);
+ CVSymbolArray Symbols;
+ BinaryStreamReader Reader(BinaryData, llvm::support::little);
+ if (Error E = Reader.readArray(Symbols, Reader.getLength()))
+ return createStringError(errorToErrorCode(std::move(E)), getFileName());
+
+ LazyRandomTypeCollection &Types = types();
+ LazyRandomTypeCollection &Ids = ids();
+ SymbolVisitorCallbackPipeline Pipeline;
+ SymbolDeserializer Deserializer(&VisitorDelegate,
+ CodeViewContainer::ObjectFile);
+ // As we are processing a COFF format, use TPI as IPI, so the generic code
+ // to process the CodeView format does not contain any additional checks.
+ LVSymbolVisitor Traverser(this, W, &LogicalVisitor, Types, Ids,
+ &VisitorDelegate, LogicalVisitor.getShared());
+
+ Pipeline.addCallbackToPipeline(Deserializer);
+ Pipeline.addCallbackToPipeline(Traverser);
+ CVSymbolVisitor Visitor(Pipeline);
+ return Visitor.visitSymbolStream(Symbols);
+}
+
+Error LVCodeViewReader::traverseSymbolSection(StringRef SectionName,
+ const SectionRef &Section) {
+ LLVM_DEBUG({
+ ListScope D(W, "CodeViewDebugInfo");
+ W.printNumber("Section", SectionName, getObj().getSectionID(Section));
+ });
+
+ Expected<StringRef> SectionOrErr = Section.getContents();
+ if (!SectionOrErr)
+ return SectionOrErr.takeError();
+ StringRef SectionContents = *SectionOrErr;
+ StringRef Data = SectionContents;
+
+ SmallVector<StringRef, 10> SymbolNames;
+ StringMap<StringRef> FunctionLineTables;
+
+ uint32_t Magic;
+ if (Error E = consume(Data, Magic))
+ return createStringError(errorToErrorCode(std::move(E)), getFileName());
+
+ if (Magic != COFF::DEBUG_SECTION_MAGIC)
+ return createStringError(object_error::parse_failed, getFileName());
+
+ BinaryStreamReader FSReader(Data, support::little);
+ if (Error Err = initializeFileAndStringTables(FSReader))
+ return Err;
+
+ while (!Data.empty()) {
+ // The section consists of a number of subsection in the following format:
+ // |SubSectionType|SubSectionSize|Contents...|
+ uint32_t SubType, SubSectionSize;
+ if (Error E = consume(Data, SubType))
+ return createStringError(errorToErrorCode(std::move(E)), getFileName());
+ if (Error E = consume(Data, SubSectionSize))
+ return createStringError(errorToErrorCode(std::move(E)), getFileName());
+
+ // Process the subsection as normal even if the ignore bit is set.
+ SubType &= ~SubsectionIgnoreFlag;
+
+ // Get the contents of the subsection.
+ if (SubSectionSize > Data.size())
+ return createStringError(object_error::parse_failed, getFileName());
+ StringRef Contents = Data.substr(0, SubSectionSize);
+
+ // Add SubSectionSize to the current offset and align that offset
+ // to find the next subsection.
+ size_t SectionOffset = Data.data() - SectionContents.data();
+ size_t NextOffset = SectionOffset + SubSectionSize;
+ NextOffset = alignTo(NextOffset, 4);
+ if (NextOffset > SectionContents.size())
+ return createStringError(object_error::parse_failed, getFileName());
+ Data = SectionContents.drop_front(NextOffset);
+
+ switch (DebugSubsectionKind(SubType)) {
+ case DebugSubsectionKind::Symbols:
+ if (Error Err =
+ traverseSymbolsSubsection(Contents, Section, SectionContents))
+ return Err;
+ break;
+
+ case DebugSubsectionKind::InlineeLines:
+ if (Error Err = traverseInlineeLines(Contents))
+ return Err;
+ break;
+
+ case DebugSubsectionKind::Lines:
+ // Holds a PC to file:line table. Some data to parse this subsection
+ // is stored in the other subsections, so just check sanity and store
+ // the pointers for deferred processing.
+
+ // Collect function and ranges only if we need to print logical lines.
+ if (options().getGeneralCollectRanges()) {
+
+ if (SubSectionSize < 12) {
+ // There should be at least three words to store two function
+ // relocations and size of the code.
+ return createStringError(object_error::parse_failed, getFileName());
+ }
+
+ StringRef SymbolName;
+ if (Error Err = resolveSymbolName(getObj().getCOFFSection(Section),
+ SectionOffset, SymbolName))
+ return createStringError(errorToErrorCode(std::move(Err)),
+ getFileName());
+
+ LLVM_DEBUG({ W.printString("Symbol Name", SymbolName); });
+ if (FunctionLineTables.count(SymbolName) != 0) {
+ // Saw debug info for this function already?
+ return createStringError(object_error::parse_failed, getFileName());
+ }
+
+ FunctionLineTables[SymbolName] = Contents;
+ SymbolNames.push_back(SymbolName);
+ }
+ break;
+
+ // Do nothing for unrecognized subsections.
+ default:
+ break;
+ }
+ W.flush();
+ }
+
+ // Traverse the line tables now that we've read all the subsections and
+ // know all the required information.
+ for (StringRef SymbolName : SymbolNames) {
+ LLVM_DEBUG({
+ ListScope S(W, "FunctionLineTable");
+ W.printString("Symbol Name", SymbolName);
+ });
+
+ BinaryStreamReader Reader(FunctionLineTables[SymbolName], support::little);
+
+ DebugLinesSubsectionRef Lines;
+ if (Error E = Lines.initialize(Reader))
+ return createStringError(errorToErrorCode(std::move(E)), getFileName());
+
+ // Find the associated symbol table information.
+ LVSymbolTableEntry SymbolTableEntry = getSymbolTableEntry(SymbolName);
+ LVScope *Function = SymbolTableEntry.Scope;
+ if (!Function)
+ continue;
+
+ LVAddress Addendum = SymbolTableEntry.Address;
+ LVSectionIndex SectionIndex = SymbolTableEntry.SectionIndex;
+
+ // The given scope represents the function that contains the line numbers.
+ // Collect all generated debug lines associated with the function.
+ CULines.clear();
+
+ // For the given scope, collect all scopes ranges.
+ LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
+ ScopesWithRanges->clear();
+ Function->getRanges(*ScopesWithRanges);
+ ScopesWithRanges->sort();
+
+ uint16_t Segment = Lines.header()->RelocSegment;
+ uint32_t Begin = Lines.header()->RelocOffset;
+ uint32_t Size = Lines.header()->CodeSize;
+ for (const LineColumnEntry &Block : Lines)
+ if (Error Err = createLines(Block.LineNumbers, Addendum, Segment, Begin,
+ Size, Block.NameIndex))
+ return Err;
+
+ // Include lines from any inlined functions within the current function.
+ includeInlineeLines(SectionIndex, Function);
+
+ if (Error Err = createInstructions(Function, SectionIndex))
+ return Err;
+
+ processLines(&CULines, SectionIndex, Function);
+ }
+
+ return Error::success();
+}
+
+void LVCodeViewReader::sortScopes() { Root->sort(); }
+
+void LVCodeViewReader::print(raw_ostream &OS) const {
+ LLVM_DEBUG(dbgs() << "CreateReaders\n");
+}
+
+void LVCodeViewReader::mapRangeAddress(const ObjectFile &Obj,
+ const SectionRef &Section,
+ bool IsComdat) {
+ if (!Obj.isCOFF())
+ return;
+
+ const COFFObjectFile *Object = cast<COFFObjectFile>(&Obj);
+
+ for (const SymbolRef &Sym : Object->symbols()) {
+ if (!Section.containsSymbol(Sym))
+ continue;
+
+ COFFSymbolRef Symbol = Object->getCOFFSymbol(Sym);
+ if (Symbol.getComplexType() != llvm::COFF::IMAGE_SYM_DTYPE_FUNCTION)
+ continue;
+
+ StringRef SymbolName;
+ Expected<StringRef> SymNameOrErr = Object->getSymbolName(Symbol);
+ if (!SymNameOrErr) {
+ W.startLine() << "Invalid symbol name: " << Symbol.getSectionNumber()
+ << "\n";
+ consumeError(SymNameOrErr.takeError());
+ continue;
+ }
+ SymbolName = *SymNameOrErr;
+
+ LLVM_DEBUG({
+ Expected<const coff_section *> SectionOrErr =
+ Object->getSection(Symbol.getSectionNumber());
+ if (!SectionOrErr) {
+ W.startLine() << "Invalid section number: " << Symbol.getSectionNumber()
+ << "\n";
+ consumeError(SectionOrErr.takeError());
+ return;
+ }
+ W.printNumber("Section #", Symbol.getSectionNumber());
+ W.printString("Name", SymbolName);
+ W.printHex("Value", Symbol.getValue());
+ });
+
+ // Record the symbol name (linkage) and its loading address.
+ addToSymbolTable(SymbolName, Symbol.getValue(), Symbol.getSectionNumber(),
+ IsComdat);
+ }
+}
+
+Error LVCodeViewReader::createScopes(COFFObjectFile &Obj) {
+ if (Error Err = loadTargetInfo(Obj))
+ return Err;
+
+ // Initialization required when processing a COFF file:
+ // Cache the symbols relocations.
+ // Create a mapping for virtual addresses.
+ // Get the functions entry points.
+ cacheRelocations();
+ mapVirtualAddress(Obj);
+
+ for (const SectionRef &Section : Obj.sections()) {
+ Expected<StringRef> SectionNameOrErr = Section.getName();
+ if (!SectionNameOrErr)
+ return SectionNameOrErr.takeError();
+ // .debug$T is a standard CodeView type section, while .debug$P is the
+ // same format but used for MSVC precompiled header object files.
+ if (*SectionNameOrErr == ".debug$T" || *SectionNameOrErr == ".debug$P")
+ if (Error Err = traverseTypeSection(*SectionNameOrErr, Section))
+ return Err;
+ }
+
+ // Process collected namespaces.
+ LogicalVisitor.processNamespaces();
+
+ for (const SectionRef &Section : Obj.sections()) {
+ Expected<StringRef> SectionNameOrErr = Section.getName();
+ if (!SectionNameOrErr)
+ return SectionNameOrErr.takeError();
+ if (*SectionNameOrErr == ".debug$S")
+ if (Error Err = traverseSymbolSection(*SectionNameOrErr, Section))
+ return Err;
+ }
+
+ // Check if we have to close the Compile Unit scope.
+ LogicalVisitor.closeScope();
+
+ // Traverse the strings recorded and transform them into filenames.
+ LogicalVisitor.processFiles();
+
+ // Process collected element lines.
+ LogicalVisitor.processLines();
+
+ // Translate composite names into a single component.
+ Root->transformScopedName();
+ return Error::success();
+}
+
+Error LVCodeViewReader::createScopes(PDBFile &Pdb) {
+ if (Error Err = loadTargetInfo(Pdb))
+ return Err;
+
+ if (!Pdb.hasPDBTpiStream() || !Pdb.hasPDBDbiStream())
+ return Error::success();
+
+ // Open the executable associated with the PDB file and get the section
+ // addresses used to calculate linear addresses for CodeView Symbols.
+ if (!ExePath.empty()) {
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =
+ MemoryBuffer::getFileOrSTDIN(ExePath);
+ if (BuffOrErr.getError()) {
+ return createStringError(errc::bad_file_descriptor,
+ "File '%s' does not exist.", ExePath.c_str());
+ }
+ BinaryBuffer = std::move(BuffOrErr.get());
+
+ // Check if the buffer corresponds to a PECOFF executable.
+ assert(identify_magic(BinaryBuffer->getBuffer()) ==
+ file_magic::pecoff_executable &&
+ "Invalid PECOFF executable file.");
+
+ Expected<std::unique_ptr<Binary>> BinOrErr =
+ createBinary(BinaryBuffer->getMemBufferRef());
+ if (errorToErrorCode(BinOrErr.takeError())) {
+ return createStringError(errc::not_supported,
+ "Binary object format in '%s' is not supported.",
+ ExePath.c_str());
+ }
+ BinaryExecutable = std::move(*BinOrErr);
+ if (COFFObjectFile *COFFObject =
+ dyn_cast<COFFObjectFile>(BinaryExecutable.get()))
+ mapVirtualAddress(*COFFObject);
+ }
+
+ // In order to generate a full logical view, we have to traverse both
+ // streams TPI and IPI if they are present. The following table gives
+ // the stream where a specified type is located. If the IPI stream is
+ // not present, all the types are located in the TPI stream.
+ //
+ // TPI Stream:
+ // LF_POINTER LF_MODIFIER LF_PROCEDURE LF_MFUNCTION
+ // LF_LABEL LF_ARGLIST LF_FIELDLIST LF_ARRAY
+ // LF_CLASS LF_STRUCTURE LF_INTERFACE LF_UNION
+ // LF_ENUM LF_TYPESERVER2 LF_VFTABLE LF_VTSHAPE
+ // LF_BITFIELD LF_METHODLIST LF_PRECOMP LF_ENDPRECOMP
+ //
+ // IPI stream:
+ // LF_FUNC_ID LF_MFUNC_ID LF_BUILDINFO
+ // LF_SUBSTR_LIST LF_STRING_ID LF_UDT_SRC_LINE
+ // LF_UDT_MOD_SRC_LINE
+
+ LazyRandomTypeCollection &Types = types();
+ LazyRandomTypeCollection &Ids = ids();
+ if (Error Err = traverseTypes(Pdb, Types, Ids))
+ return Err;
+
+ // Process collected namespaces.
+ LogicalVisitor.processNamespaces();
+
+ LLVM_DEBUG({ W.getOStream() << "Traversing inlined lines\n"; });
+
+ auto VisitInlineeLines = [&](int32_t Modi, const SymbolGroup &SG,
+ DebugInlineeLinesSubsectionRef &Lines) -> Error {
+ return collectInlineeInfo(Lines, &SG);
+ };
+
+ FilterOptions Filters = {};
+ LinePrinter Printer(/*Indent=*/2, false, nulls(), Filters);
+ const PrintScope HeaderScope(Printer, /*IndentLevel=*/2);
+ if (Error Err = iterateModuleSubsections<DebugInlineeLinesSubsectionRef>(
+ Input, HeaderScope, VisitInlineeLines))
+ return Err;
+
+ // Traverse global symbols.
+ LLVM_DEBUG({ W.getOStream() << "Traversing global symbols\n"; });
+ if (Pdb.hasPDBGlobalsStream()) {
+ Expected<GlobalsStream &> GlobalsOrErr = Pdb.getPDBGlobalsStream();
+ if (!GlobalsOrErr)
+ return GlobalsOrErr.takeError();
+ GlobalsStream &Globals = *GlobalsOrErr;
+ const GSIHashTable &Table = Globals.getGlobalsTable();
+ Expected<SymbolStream &> ExpectedSyms = Pdb.getPDBSymbolStream();
+ if (ExpectedSyms) {
+
+ SymbolVisitorCallbackPipeline Pipeline;
+ SymbolDeserializer Deserializer(nullptr, CodeViewContainer::Pdb);
+ LVSymbolVisitor Traverser(this, W, &LogicalVisitor, Types, Ids, nullptr,
+ LogicalVisitor.getShared());
+
+ // As the global symbols do not have an associated Compile Unit, create
+ // one, as the container for all global symbols.
+ RecordPrefix Prefix(SymbolKind::S_COMPILE3);
+ CVSymbol Symbol(&Prefix, sizeof(Prefix));
+ uint32_t Offset = 0;
+ if (Error Err = Traverser.visitSymbolBegin(Symbol, Offset))
+ consumeError(std::move(Err));
+ else {
+ // The CodeView compile unit containing the global symbols does not
+ // have a name; generate one using its parent name (object filename)
+ // follow by the '_global' string.
+ std::string Name(CompileUnit->getParentScope()->getName());
+ CompileUnit->setName(Name.append("_global"));
+
+ Pipeline.addCallbackToPipeline(Deserializer);
+ Pipeline.addCallbackToPipeline(Traverser);
+ CVSymbolVisitor Visitor(Pipeline);
+
+ BinaryStreamRef SymStream =
+ ExpectedSyms->getSymbolArray().getUnderlyingStream();
+ for (uint32_t PubSymOff : Table) {
+ Expected<CVSymbol> Sym = readSymbolFromStream(SymStream, PubSymOff);
+ if (Sym) {
+ if (Error Err = Visitor.visitSymbolRecord(*Sym, PubSymOff))
+ return createStringError(errorToErrorCode(std::move(Err)),
+ getFileName());
+ } else {
+ consumeError(Sym.takeError());
+ }
+ }
+ }
+
+ LogicalVisitor.closeScope();
+ } else {
+ consumeError(ExpectedSyms.takeError());
+ }
+ }
+
+ // Traverse symbols (DBI).
+ LLVM_DEBUG({ W.getOStream() << "Traversing symbol groups\n"; });
+
+ auto VisitSymbolGroup = [&](uint32_t Modi, const SymbolGroup &SG) -> Error {
+ Expected<ModuleDebugStreamRef> ExpectedModS =
+ getModuleDebugStream(Pdb, Modi);
+ if (ExpectedModS) {
+ ModuleDebugStreamRef &ModS = *ExpectedModS;
+
+ LLVM_DEBUG({
+ W.getOStream() << formatv("Traversing Group: Mod {0:4}\n", Modi);
+ });
+
+ SymbolVisitorCallbackPipeline Pipeline;
+ SymbolDeserializer Deserializer(nullptr, CodeViewContainer::Pdb);
+ LVSymbolVisitor Traverser(this, W, &LogicalVisitor, Types, Ids, nullptr,
+ LogicalVisitor.getShared());
+
+ Pipeline.addCallbackToPipeline(Deserializer);
+ Pipeline.addCallbackToPipeline(Traverser);
+ CVSymbolVisitor Visitor(Pipeline);
+ BinarySubstreamRef SS = ModS.getSymbolsSubstream();
+ if (Error Err =
+ Visitor.visitSymbolStream(ModS.getSymbolArray(), SS.Offset))
+ return createStringError(errorToErrorCode(std::move(Err)),
+ getFileName());
+ } else {
+ // If the module stream does not exist, it is not an error condition.
+ consumeError(ExpectedModS.takeError());
+ }
+
+ return Error::success();
+ };
+
+ if (Error Err = iterateSymbolGroups(Input, HeaderScope, VisitSymbolGroup))
+ return Err;
+
+ // At this stage, the logical view contains all scopes, symbols and types.
+ // For PDBs we can use the module id, to access its specific compile unit.
+ // The line record addresses has been already resolved, so we can apply the
+ // flow as when processing DWARF.
+
+ LLVM_DEBUG({ W.getOStream() << "Traversing lines\n"; });
+
+ // Record all line records for a Compile Unit.
+ CULines.clear();
+
+ auto VisitDebugLines = [this](int32_t Modi, const SymbolGroup &SG,
+ DebugLinesSubsectionRef &Lines) -> Error {
+ if (!options().getPrintLines())
+ return Error::success();
+
+ uint16_t Segment = Lines.header()->RelocSegment;
+ uint32_t Begin = Lines.header()->RelocOffset;
+ uint32_t Size = Lines.header()->CodeSize;
+
+ LLVM_DEBUG({ W.getOStream() << formatv("Modi = {0}\n", Modi); });
+
+ // We have line information for a new module; finish processing the
+ // collected information for the current module. Once it is done, start
+ // recording the line information for the new module.
+ if (CurrentModule != Modi) {
+ if (Error Err = processModule())
+ return Err;
+ CULines.clear();
+ CurrentModule = Modi;
+ }
+
+ for (const LineColumnEntry &Block : Lines)
+ if (Error Err = createLines(Block.LineNumbers, /*Addendum=*/0, Segment,
+ Begin, Size, Block.NameIndex, &SG))
+ return Err;
+
+ return Error::success();
+ };
+
+ if (Error Err = iterateModuleSubsections<DebugLinesSubsectionRef>(
+ Input, HeaderScope, VisitDebugLines))
+ return Err;
+
+ // Check if we have to close the Compile Unit scope.
+ LogicalVisitor.closeScope();
+
+ // Process collected element lines.
+ LogicalVisitor.processLines();
+
+ // Translate composite names into a single component.
+ Root->transformScopedName();
+ return Error::success();
+}
+
+Error LVCodeViewReader::processModule() {
+ if (LVScope *Scope = getScopeForModule(CurrentModule)) {
+ CompileUnit = static_cast<LVScopeCompileUnit *>(Scope);
+
+ LLVM_DEBUG({ dbgs() << "Processing Scope: " << Scope->getName() << "\n"; });
+
+ // For the given compile unit, collect all scopes ranges.
+ // For a complete ranges and lines mapping, the logical view support
+ // needs for the compile unit to have a low and high pc values. We
+ // can traverse the 'Modules' section and get the information for the
+ // specific module. Another option, is from all the ranges collected
+ // to take the first and last values.
+ LVSectionIndex SectionIndex = DotTextSectionIndex;
+ LVRange *ScopesWithRanges = getSectionRanges(SectionIndex);
+ ScopesWithRanges->clear();
+ CompileUnit->getRanges(*ScopesWithRanges);
+ if (!ScopesWithRanges->empty())
+ CompileUnit->addObject(ScopesWithRanges->getLower(),
+ ScopesWithRanges->getUpper());
+ ScopesWithRanges->sort();
+
+ if (Error Err = createInstructions())
+ return Err;
+
+ // Include lines from any inlined functions within the current function.
+ includeInlineeLines(SectionIndex, Scope);
+
+ processLines(&CULines, SectionIndex, nullptr);
+ }
+
+ return Error::success();
+}
+
+// In order to create the scopes, the CodeView Reader will:
+// = Traverse the TPI/IPI stream (Type visitor):
+// Collect forward references, scoped names, type indexes that will represent
+// a logical element, strings, line records, linkage names.
+// = Traverse the symbols section (Symbol visitor):
+// Create the scopes tree and creates the required logical elements, by
+// using the collected indexes from the type visitor.
+Error LVCodeViewReader::createScopes() {
+ LLVM_DEBUG({
+ W.startLine() << "\n";
+ W.printString("File", getFileName().str());
+ W.printString("Exe", ExePath);
+ W.printString("Format", FileFormatName);
+ });
+
+ if (Error Err = LVReader::createScopes())
+ return Err;
+
+ LogicalVisitor.setRoot(Root);
+
+ if (isObj()) {
+ if (Error Err = createScopes(getObj()))
+ return Err;
+ } else {
+ if (Error Err = createScopes(getPdb()))
+ return Err;
+ }
+
+ return Error::success();
+}
+
+Error LVCodeViewReader::loadTargetInfo(const ObjectFile &Obj) {
+ // Detect the architecture from the object file. We usually don't need OS
+ // info to lookup a target and create register info.
+ Triple TT;
+ TT.setArch(Triple::ArchType(Obj.getArch()));
+ TT.setVendor(Triple::UnknownVendor);
+ TT.setOS(Triple::UnknownOS);
+
+ // Features to be passed to target/subtarget
+ Expected<SubtargetFeatures> Features = Obj.getFeatures();
+ SubtargetFeatures FeaturesValue;
+ if (!Features) {
+ consumeError(Features.takeError());
+ FeaturesValue = SubtargetFeatures();
+ }
+ FeaturesValue = *Features;
+ return loadGenericTargetInfo(TT.str(), FeaturesValue.getString());
+}
+
+Error LVCodeViewReader::loadTargetInfo(const PDBFile &Pdb) {
+ Triple TT;
+ TT.setArch(Triple::ArchType::x86_64);
+ TT.setVendor(Triple::UnknownVendor);
+ TT.setOS(Triple::Win32);
+
+ StringRef TheFeature = "";
+
+ return loadGenericTargetInfo(TT.str(), TheFeature);
+}
+
+std::string LVCodeViewReader::getRegisterName(LVSmall Opcode,
+ ArrayRef<uint64_t> Operands) {
+ // Get Compilation Unit CPU Type.
+ CPUType CPU = getCompileUnitCPUType();
+ // For CodeView the register always is in Operands[0];
+ RegisterId Register = (RegisterId(Operands[0]));
+ return formatRegisterId(Register, CPU);
+}
diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp
new file mode 100644
index 000000000000..e4f5f533262b
--- /dev/null
+++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.cpp
@@ -0,0 +1,3525 @@
+//===-- LVCodeViewVisitor.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements the LVCodeViewVisitor class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/LogicalView/Readers/LVCodeViewVisitor.h"
+#include "llvm/BinaryFormat/Magic.h"
+#include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecordHelpers.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordHelpers.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVScope.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVSymbol.h"
+#include "llvm/DebugInfo/LogicalView/Core/LVType.h"
+#include "llvm/DebugInfo/LogicalView/Readers/LVCodeViewReader.h"
+#include "llvm/DebugInfo/PDB/Native/DbiStream.h"
+#include "llvm/DebugInfo/PDB/Native/InputFile.h"
+#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
+#include "llvm/DebugInfo/PDB/Native/RawError.h"
+#include "llvm/DebugInfo/PDB/Native/TpiStream.h"
+#include "llvm/DebugInfo/PDB/PDB.h"
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/FormatAdapters.h"
+#include "llvm/Support/FormatVariadic.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::object;
+using namespace llvm::pdb;
+using namespace llvm::logicalview;
+
+#define DEBUG_TYPE "CodeViewUtilities"
+
+namespace llvm {
+namespace logicalview {
+
+static TypeIndex getTrueType(TypeIndex &TI) {
+ // Dealing with a MSVC generated PDB, we encountered a type index with the
+ // value of: 0x0280xxxx where xxxx=0000.
+ //
+ // There is some documentation about type indices:
+ // https://llvm.org/docs/PDB/TpiStream.html
+ //
+ // A type index is a 32-bit integer that uniquely identifies a type inside
+ // of an object file’s .debug$T section or a PDB file’s TPI or IPI stream.
+ // The value of the type index for the first type record from the TPI stream
+ // is given by the TypeIndexBegin member of the TPI Stream Header although
+ // in practice this value is always equal to 0x1000 (4096).
+ //
+ // Any type index with a high bit set is considered to come from the IPI
+ // stream, although this appears to be more of a hack, and LLVM does not
+ // generate type indices of this nature. They can, however, be observed in
+ // Microsoft PDBs occasionally, so one should be prepared to handle them.
+ // Note that having the high bit set is not a necessary condition to
+ // determine whether a type index comes from the IPI stream, it is only
+ // sufficient.
+ LLVM_DEBUG(
+ { dbgs() << "Index before: " << HexNumber(TI.getIndex()) << "\n"; });
+ TI.setIndex(TI.getIndex() & 0x0000ffff);
+ LLVM_DEBUG(
+ { dbgs() << "Index after: " << HexNumber(TI.getIndex()) << "\n"; });
+ return TI;
+}
+
+static const EnumEntry<TypeLeafKind> LeafTypeNames[] = {
+#define CV_TYPE(enum, val) {#enum, enum},
+#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
+};
+
+// Return the type name pointed by the type index. It uses the kind to query
+// the associated name for the record type.
+static StringRef getRecordName(LazyRandomTypeCollection &Types, TypeIndex TI) {
+ if (TI.isSimple())
+ return {};
+
+ StringRef RecordName;
+ CVType CVReference = Types.getType(TI);
+ auto GetName = [&](auto Record) {
+ if (Error Err = TypeDeserializer::deserializeAs(
+ const_cast<CVType &>(CVReference), Record))
+ consumeError(std::move(Err));
+ else
+ RecordName = Record.getName();
+ };
+
+ TypeRecordKind RK = static_cast<TypeRecordKind>(CVReference.kind());
+ if (RK == TypeRecordKind::Class || RK == TypeRecordKind::Struct)
+ GetName(ClassRecord(RK));
+ else if (RK == TypeRecordKind::Union)
+ GetName(UnionRecord(RK));
+ else if (RK == TypeRecordKind::Enum)
+ GetName(EnumRecord(RK));
+
+ return RecordName;
+}
+
+} // namespace logicalview
+} // namespace llvm
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "CodeViewDataVisitor"
+
+namespace llvm {
+namespace logicalview {
+
+// Keeps the type indexes with line information.
+using LVLineRecords = std::vector<TypeIndex>;
+
+namespace {
+
+class LVTypeRecords {
+ LVShared *Shared = nullptr;
+
+ // Logical elements associated to their CodeView Type Index.
+ using RecordEntry = std::pair<TypeLeafKind, LVElement *>;
+ using RecordTable = std::map<TypeIndex, RecordEntry>;
+ RecordTable RecordFromTypes;
+ RecordTable RecordFromIds;
+
+ using NameTable = std::map<StringRef, TypeIndex>;
+ NameTable NameFromTypes;
+ NameTable NameFromIds;
+
+public:
+ LVTypeRecords(LVShared *Shared) : Shared(Shared) {}
+
+ void add(uint32_t StreamIdx, TypeIndex TI, TypeLeafKind Kind,
+ LVElement *Element = nullptr);
+ void add(uint32_t StreamIdx, TypeIndex TI, StringRef Name);
+ LVElement *find(uint32_t StreamIdx, TypeIndex TI, bool Create = true);
+ TypeIndex find(uint32_t StreamIdx, StringRef Name);
+};
+
+class LVForwardReferences {
+ // Forward reference and its definitions (Name as key).
+ using ForwardEntry = std::pair<TypeIndex, TypeIndex>;
+ using ForwardTypeNames = std::map<StringRef, ForwardEntry>;
+ ForwardTypeNames ForwardTypesNames;
+
+ // Forward reference and its definition (TypeIndex as key).
+ using ForwardType = std::map<TypeIndex, TypeIndex>;
+ ForwardType ForwardTypes;
+
+ // Forward types and its references.
+ void add(TypeIndex TIForward, TypeIndex TIReference) {
+ ForwardTypes.emplace(TIForward, TIReference);
+ }
+
+ void add(StringRef Name, TypeIndex TIForward) {
+ if (ForwardTypesNames.find(Name) == ForwardTypesNames.end()) {
+ ForwardTypesNames.emplace(
+ std::piecewise_construct, std::forward_as_tuple(Name),
+ std::forward_as_tuple(TIForward, TypeIndex::None()));
+ } else {
+ // Update a recorded definition with its reference.
+ ForwardTypesNames[Name].first = TIForward;
+ add(TIForward, ForwardTypesNames[Name].second);
+ }
+ }
+
+ // Update a previously recorded forward reference with its definition.
+ void update(StringRef Name, TypeIndex TIReference) {
+ if (ForwardTypesNames.find(Name) != ForwardTypesNames.end()) {
+ // Update the recorded forward reference with its definition.
+ ForwardTypesNames[Name].second = TIReference;
+ add(ForwardTypesNames[Name].first, TIReference);
+ } else {
+ // We have not seen the forward reference. Insert the definition.
+ ForwardTypesNames.emplace(
+ std::piecewise_construct, std::forward_as_tuple(Name),
+ std::forward_as_tuple(TypeIndex::None(), TIReference));
+ }
+ }
+
+public:
+ LVForwardReferences() = default;
+
+ void record(bool IsForwardRef, StringRef Name, TypeIndex TI) {
+ // We are expecting for the forward references to be first. But that
+ // is not always the case. A name must be recorded regardless of the
+ // order in which the forward reference appears.
+ (IsForwardRef) ? add(Name, TI) : update(Name, TI);
+ }
+
+ TypeIndex find(TypeIndex TIForward) {
+ return (ForwardTypes.find(TIForward) != ForwardTypes.end())
+ ? ForwardTypes[TIForward]
+ : TypeIndex::None();
+ }
+
+ TypeIndex find(StringRef Name) {
+ return (ForwardTypesNames.find(Name) != ForwardTypesNames.end())
+ ? ForwardTypesNames[Name].second
+ : TypeIndex::None();
+ }
+
+ // If the given TI corresponds to a reference, return the reference.
+ // Otherwise return the given TI.
+ TypeIndex remap(TypeIndex TI) {
+ TypeIndex Forward = find(TI);
+ return Forward.isNoneType() ? TI : Forward;
+ }
+};
+
+// Namespace deduction.
+class LVNamespaceDeduction {
+ LVShared *Shared = nullptr;
+
+ using Names = std::map<StringRef, LVScope *>;
+ Names NamespaceNames;
+
+ using LookupSet = std::set<StringRef>;
+ LookupSet DeducedScopes;
+ LookupSet UnresolvedScopes;
+ LookupSet IdentifiedNamespaces;
+
+ void add(StringRef Name, LVScope *Namespace) {
+ if (NamespaceNames.find(Name) == NamespaceNames.end())
+ NamespaceNames.emplace(Name, Namespace);
+ }
+
+public:
+ LVNamespaceDeduction(LVShared *Shared) : Shared(Shared) {}
+
+ void init();
+ void add(StringRef String);
+ LVScope *get(LVStringRefs Components);
+ LVScope *get(StringRef Name, bool CheckScope = true);
+
+ // Find the logical namespace for the 'Name' component.
+ LVScope *find(StringRef Name) {
+ LVScope *Namespace = (NamespaceNames.find(Name) != NamespaceNames.end())
+ ? NamespaceNames[Name]
+ : nullptr;
+ return Namespace;
+ }
+
+ // For the given lexical components, return a tuple with the first entry
+ // being the outermost namespace and the second entry being the first
+ // non-namespace.
+ LVLexicalIndex find(LVStringRefs Components) {
+ if (Components.empty())
+ return {};
+
+ LVStringRefs::size_type FirstNamespace = 0;
+ LVStringRefs::size_type FirstNonNamespace;
+ for (LVStringRefs::size_type Index = 0; Index < Components.size();
+ ++Index) {
+ FirstNonNamespace = Index;
+ LookupSet::iterator Iter = IdentifiedNamespaces.find(Components[Index]);
+ if (Iter == IdentifiedNamespaces.end())
+ // The component is not a namespace name.
+ break;
+ }
+ return std::make_tuple(FirstNamespace, FirstNonNamespace);
+ }
+};
+
+// Strings.
+class LVStringRecords {
+ using StringEntry = std::tuple<uint32_t, std::string, LVScopeCompileUnit *>;
+ using StringIds = std::map<TypeIndex, StringEntry>;
+ StringIds Strings;
+
+public:
+ LVStringRecords() = default;
+
+ void add(TypeIndex TI, StringRef String) {
+ static uint32_t Index = 0;
+ if (Strings.find(TI) == Strings.end())
+ Strings.emplace(
+ std::piecewise_construct, std::forward_as_tuple(TI),
+ std::forward_as_tuple(++Index, std::string(String), nullptr));
+ }
+
+ StringRef find(TypeIndex TI) {
+ StringIds::iterator Iter = Strings.find(TI);
+ return Iter != Strings.end() ? std::get<1>(Iter->second) : StringRef{};
+ }
+
+ uint32_t findIndex(TypeIndex TI) {
+ StringIds::iterator Iter = Strings.find(TI);
+ return Iter != Strings.end() ? std::get<0>(Iter->second) : 0;
+ }
+
+ // Move strings representing the filenames to the compile unit.
+ void addFilenames();
+ void addFilenames(LVScopeCompileUnit *Scope);
+};
+} // namespace
+
+using LVTypeKinds = std::set<TypeLeafKind>;
+using LVSymbolKinds = std::set<SymbolKind>;
+
+// The following data keeps forward information, type records, names for
+// namespace deduction, strings records, line records.
+// It is shared by the type visitor, symbol visitor and logical visitor and
+// it is independent from the CodeViewReader.
+struct LVShared {
+ LVCodeViewReader *Reader;
+ LVLogicalVisitor *Visitor;
+ LVForwardReferences ForwardReferences;
+ LVLineRecords LineRecords;
+ LVNamespaceDeduction NamespaceDeduction;
+ LVStringRecords StringRecords;
+ LVTypeRecords TypeRecords;
+
+ // In order to determine which types and/or symbols records should be handled
+ // by the reader, we record record kinds seen by the type and symbol visitors.
+ // At the end of the scopes creation, the '--internal=tag' option will allow
+ // to print the unique record ids collected.
+ LVTypeKinds TypeKinds;
+ LVSymbolKinds SymbolKinds;
+
+ LVShared(LVCodeViewReader *Reader, LVLogicalVisitor *Visitor)
+ : Reader(Reader), Visitor(Visitor), NamespaceDeduction(this),
+ TypeRecords(this) {}
+ ~LVShared() = default;
+};
+} // namespace logicalview
+} // namespace llvm
+
+void LVTypeRecords::add(uint32_t StreamIdx, TypeIndex TI, TypeLeafKind Kind,
+ LVElement *Element) {
+ RecordTable &Target =
+ (StreamIdx == StreamTPI) ? RecordFromTypes : RecordFromIds;
+ Target.emplace(std::piecewise_construct, std::forward_as_tuple(TI),
+ std::forward_as_tuple(Kind, Element));
+}
+
+void LVTypeRecords::add(uint32_t StreamIdx, TypeIndex TI, StringRef Name) {
+ NameTable &Target = (StreamIdx == StreamTPI) ? NameFromTypes : NameFromIds;
+ Target.emplace(Name, TI);
+}
+
+LVElement *LVTypeRecords::find(uint32_t StreamIdx, TypeIndex TI, bool Create) {
+ RecordTable &Target =
+ (StreamIdx == StreamTPI) ? RecordFromTypes : RecordFromIds;
+
+ LVElement *Element = nullptr;
+ RecordTable::iterator Iter = Target.find(TI);
+ if (Iter != Target.end()) {
+ Element = Iter->second.second;
+ if (Element || !Create)
+ return Element;
+
+ // Create the logical element if not found.
+ Element = Shared->Visitor->createElement(Iter->second.first);
+ if (Element) {
+ Element->setOffset(TI.getIndex());
+ Element->setOffsetFromTypeIndex();
+ Target[TI].second = Element;
+ }
+ }
+ return Element;
+}
+
+TypeIndex LVTypeRecords::find(uint32_t StreamIdx, StringRef Name) {
+ NameTable &Target = (StreamIdx == StreamTPI) ? NameFromTypes : NameFromIds;
+ NameTable::iterator Iter = Target.find(Name);
+ return Iter != Target.end() ? Iter->second : TypeIndex::None();
+}
+
+void LVStringRecords::addFilenames() {
+ for (StringIds::const_reference Entry : Strings) {
+ StringRef Name = std::get<1>(Entry.second);
+ LVScopeCompileUnit *Scope = std::get<2>(Entry.second);
+ Scope->addFilename(transformPath(Name));
+ }
+ Strings.clear();
+}
+
+void LVStringRecords::addFilenames(LVScopeCompileUnit *Scope) {
+ for (StringIds::reference Entry : Strings)
+ if (!std::get<2>(Entry.second))
+ std::get<2>(Entry.second) = Scope;
+}
+
+void LVNamespaceDeduction::add(StringRef String) {
+ StringRef InnerComponent;
+ StringRef OuterComponent;
+ std::tie(OuterComponent, InnerComponent) = getInnerComponent(String);
+ DeducedScopes.insert(InnerComponent);
+ if (OuterComponent.size())
+ UnresolvedScopes.insert(OuterComponent);
+}
+
+void LVNamespaceDeduction::init() {
+ // We have 2 sets of names:
+ // - deduced scopes (class, structure, union and enum) and
+ // - unresolved scopes, that can represent namespaces or any deduced.
+ // Before creating the namespaces, we have to traverse the unresolved
+ // and remove any references to already deduced scopes.
+ LVStringRefs Components;
+ for (const StringRef &Unresolved : UnresolvedScopes) {
+ Components = getAllLexicalComponents(Unresolved);
+ for (const StringRef &Component : Components) {
+ LookupSet::iterator Iter = DeducedScopes.find(Component);
+ if (Iter == DeducedScopes.end())
+ IdentifiedNamespaces.insert(Component);
+ }
+ }
+
+ LLVM_DEBUG({
+ auto Print = [&](LookupSet &Container, const char *Title) {
+ auto Header = [&]() {
+ dbgs() << formatv("\n{0}\n", fmt_repeat('=', 72));
+ dbgs() << formatv("{0}\n", Title);
+ dbgs() << formatv("{0}\n", fmt_repeat('=', 72));
+ };
+ Header();
+ for (const StringRef &Item : Container)
+ dbgs() << formatv("'{0}'\n", Item.str().c_str());
+ };
+
+ Print(DeducedScopes, "Deducted Scopes");
+ Print(UnresolvedScopes, "Unresolved Scopes");
+ Print(IdentifiedNamespaces, "Namespaces");
+ });
+}
+
+LVScope *LVNamespaceDeduction::get(LVStringRefs Components) {
+ LLVM_DEBUG({
+ for (const StringRef &Component : Components)
+ dbgs() << formatv("'{0}'\n", Component.str().c_str());
+ });
+
+ if (Components.empty())
+ return nullptr;
+
+ // Update the namespaces relationship.
+ LVScope *Namespace = nullptr;
+ LVScope *Parent = Shared->Reader->getCompileUnit();
+ for (const StringRef &Component : Components) {
+ // Check if we have seen the namespace.
+ Namespace = find(Component);
+ if (!Namespace) {
+ // We have identified namespaces that are generated by MSVC. Mark them
+ // as 'system' so they will be excluded from the logical view.
+ Namespace = Shared->Reader->createScopeNamespace();
+ Namespace->setTag(dwarf::DW_TAG_namespace);
+ Namespace->setName(Component);
+ Parent->addElement(Namespace);
+ getReader().isSystemEntry(Namespace);
+ add(Component, Namespace);
+ }
+ Parent = Namespace;
+ }
+ return Parent;
+}
+
+LVScope *LVNamespaceDeduction::get(StringRef ScopedName, bool CheckScope) {
+ LVStringRefs Components = getAllLexicalComponents(ScopedName);
+ if (CheckScope)
+ Components.erase(std::remove_if(Components.begin(), Components.end(),
+ [&](StringRef Component) {
+ LookupSet::iterator Iter =
+ IdentifiedNamespaces.find(Component);
+ return Iter == IdentifiedNamespaces.end();
+ }),
+ Components.end());
+
+ LLVM_DEBUG(
+ { dbgs() << formatv("ScopedName: '{0}'\n", ScopedName.str().c_str()); });
+
+ return get(Components);
+}
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "CodeViewTypeVisitor"
+
+//===----------------------------------------------------------------------===//
+// TypeRecord traversal.
+//===----------------------------------------------------------------------===//
+void LVTypeVisitor::printTypeIndex(StringRef FieldName, TypeIndex TI,
+ uint32_t StreamIdx) const {
+ codeview::printTypeIndex(W, FieldName, TI,
+ StreamIdx == StreamTPI ? Types : Ids);
+}
+
+Error LVTypeVisitor::visitTypeBegin(CVType &Record) {
+ return visitTypeBegin(Record, TypeIndex::fromArrayIndex(Types.size()));
+}
+
+Error LVTypeVisitor::visitTypeBegin(CVType &Record, TypeIndex TI) {
+ LLVM_DEBUG({
+ W.getOStream() << formatTypeLeafKind(Record.kind());
+ W.getOStream() << " (" << HexNumber(TI.getIndex()) << ")\n";
+ });
+
+ if (options().getInternalTag())
+ Shared->TypeKinds.insert(Record.kind());
+
+ // The collected type records, will be use to create the logical elements
+ // during the symbols traversal when a type is referenced.
+ CurrentTypeIndex = TI;
+ Shared->TypeRecords.add(StreamIdx, TI, Record.kind());
+ return Error::success();
+}
+
+Error LVTypeVisitor::visitUnknownType(CVType &Record) {
+ LLVM_DEBUG({ W.printNumber("Length", uint32_t(Record.content().size())); });
+ return Error::success();
+}
+
+Error LVTypeVisitor::visitMemberBegin(CVMemberRecord &Record) {
+ LLVM_DEBUG({
+ W.startLine() << formatTypeLeafKind(Record.Kind);
+ W.getOStream() << " {\n";
+ W.indent();
+ });
+ return Error::success();
+}
+
+Error LVTypeVisitor::visitMemberEnd(CVMemberRecord &Record) {
+ LLVM_DEBUG({
+ W.unindent();
+ W.startLine() << "}\n";
+ });
+ return Error::success();
+}
+
+Error LVTypeVisitor::visitUnknownMember(CVMemberRecord &Record) {
+ LLVM_DEBUG({ W.printHex("UnknownMember", unsigned(Record.Kind)); });
+ return Error::success();
+}
+
+// LF_BUILDINFO (TPI)/(IPI)
+Error LVTypeVisitor::visitKnownRecord(CVType &Record, BuildInfoRecord &Args) {
+ // All the args are references into the TPI/IPI stream.
+ LLVM_DEBUG({
+ W.printNumber("NumArgs", static_cast<uint32_t>(Args.getArgs().size()));
+ ListScope Arguments(W, "Arguments");
+ for (TypeIndex Arg : Args.getArgs())
+ printTypeIndex("ArgType", Arg, StreamIPI);
+ });
+
+ // Only add the strings that hold information about filenames. They will be
+ // used to complete the line/file information for the logical elements.
+ // There are other strings holding information about namespaces.
+ TypeIndex TI;
+ StringRef String;
+
+ // Absolute CWD path
+ TI = Args.getArgs()[BuildInfoRecord::BuildInfoArg::CurrentDirectory];
+ String = Ids.getTypeName(TI);
+ if (!String.empty())
+ Shared->StringRecords.add(TI, String);
+
+ // Get the compile unit name.
+ TI = Args.getArgs()[BuildInfoRecord::BuildInfoArg::SourceFile];
+ String = Ids.getTypeName(TI);
+ if (!String.empty())
+ Shared->StringRecords.add(TI, String);
+ LogicalVisitor->setCompileUnitName(std::string(String));
+
+ return Error::success();
+}
+
+// LF_CLASS, LF_STRUCTURE, LF_INTERFACE (TPI)
+Error LVTypeVisitor::visitKnownRecord(CVType &Record, ClassRecord &Class) {
+ LLVM_DEBUG({
+ printTypeIndex("TypeIndex", CurrentTypeIndex, StreamTPI);
+ printTypeIndex("FieldListType", Class.getFieldList(), StreamTPI);
+ W.printString("Name", Class.getName());
+ });
+
+ // Collect class name for scope deduction.
+ Shared->NamespaceDeduction.add(Class.getName());
+ Shared->ForwardReferences.record(Class.isForwardRef(), Class.getName(),
+ CurrentTypeIndex);
+
+ // Collect class name for contained scopes deduction.
+ Shared->TypeRecords.add(StreamIdx, CurrentTypeIndex, Class.getName());
+ return Error::success();
+}
+
+// LF_ENUM (TPI)
+Error LVTypeVisitor::visitKnownRecord(CVType &Record, EnumRecord &Enum) {
+ LLVM_DEBUG({
+ printTypeIndex("TypeIndex", CurrentTypeIndex, StreamTPI);
+ printTypeIndex("FieldListType", Enum.getFieldList(), StreamTPI);
+ W.printString("Name", Enum.getName());
+ });
+
+ // Collect enum name for scope deduction.
+ Shared->NamespaceDeduction.add(Enum.getName());
+ return Error::success();
+}
+
+// LF_FUNC_ID (TPI)/(IPI)
+Error LVTypeVisitor::visitKnownRecord(CVType &Record, FuncIdRecord &Func) {
+ LLVM_DEBUG({
+ printTypeIndex("TypeIndex", CurrentTypeIndex, StreamTPI);
+ printTypeIndex("Type", Func.getFunctionType(), StreamTPI);
+ printTypeIndex("Parent", Func.getParentScope(), StreamTPI);
+ W.printString("Name", Func.getName());
+ });
+
+ // Collect function name for scope deduction.
+ Shared->NamespaceDeduction.add(Func.getName());
+ return Error::success();
+}
+
+// LF_PROCEDURE (TPI)
+Error LVTypeVisitor::visitKnownRecord(CVType &Record, ProcedureRecord &Proc) {
+ LLVM_DEBUG({
+ printTypeIndex("TypeIndex", CurrentTypeIndex, StreamTPI);
+ printTypeIndex("ReturnType", Proc.getReturnType(), StreamTPI);
+ W.printNumber("NumParameters", Proc.getParameterCount());
+ printTypeIndex("ArgListType", Proc.getArgumentList(), StreamTPI);
+ });
+
+ // Collect procedure information as they can be referenced by typedefs.
+ Shared->TypeRecords.add(StreamTPI, CurrentTypeIndex, {});
+ return Error::success();
+}
+
+// LF_STRING_ID (TPI)/(IPI)
+Error LVTypeVisitor::visitKnownRecord(CVType &Record, StringIdRecord &String) {
+ // No additional references are needed.
+ LLVM_DEBUG({
+ printTypeIndex("Id", String.getId(), StreamIPI);
+ W.printString("StringData", String.getString());
+ });
+ return Error::success();
+}
+
+// LF_UDT_SRC_LINE (TPI)/(IPI)
+Error LVTypeVisitor::visitKnownRecord(CVType &Record,
+ UdtSourceLineRecord &Line) {
+ // UDT and SourceFile are references into the TPI/IPI stream.
+ LLVM_DEBUG({
+ printTypeIndex("UDT", Line.getUDT(), StreamIPI);
+ printTypeIndex("SourceFile", Line.getSourceFile(), StreamIPI);
+ W.printNumber("LineNumber", Line.getLineNumber());
+ });
+
+ Shared->LineRecords.push_back(CurrentTypeIndex);
+ return Error::success();
+}
+
+// LF_UNION (TPI)
+Error LVTypeVisitor::visitKnownRecord(CVType &Record, UnionRecord &Union) {
+ LLVM_DEBUG({
+ W.printNumber("MemberCount", Union.getMemberCount());
+ printTypeIndex("FieldList", Union.getFieldList(), StreamTPI);
+ W.printNumber("SizeOf", Union.getSize());
+ W.printString("Name", Union.getName());
+ if (Union.hasUniqueName())
+ W.printString("UniqueName", Union.getUniqueName());
+ });
+
+ // Collect union name for scope deduction.
+ Shared->NamespaceDeduction.add(Union.getName());
+ Shared->ForwardReferences.record(Union.isForwardRef(), Union.getName(),
+ CurrentTypeIndex);
+
+ // Collect class name for contained scopes deduction.
+ Shared->TypeRecords.add(StreamIdx, CurrentTypeIndex, Union.getName());
+ return Error::success();
+}
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "CodeViewSymbolVisitor"
+
+//===----------------------------------------------------------------------===//
+// SymbolRecord traversal.
+//===----------------------------------------------------------------------===//
+void LVSymbolVisitorDelegate::printRelocatedField(StringRef Label,
+ uint32_t RelocOffset,
+ uint32_t Offset,
+ StringRef *RelocSym) {
+ Reader->printRelocatedField(Label, CoffSection, RelocOffset, Offset,
+ RelocSym);
+}
+
+void LVSymbolVisitorDelegate::getLinkageName(uint32_t RelocOffset,
+ uint32_t Offset,
+ StringRef *RelocSym) {
+ Reader->getLinkageName(CoffSection, RelocOffset, Offset, RelocSym);
+}
+
+StringRef
+LVSymbolVisitorDelegate::getFileNameForFileOffset(uint32_t FileOffset) {
+ Expected<StringRef> Name = Reader->getFileNameForFileOffset(FileOffset);
+ if (!Name) {
+ consumeError(Name.takeError());
+ return {};
+ }
+ return *Name;
+}
+
+DebugStringTableSubsectionRef LVSymbolVisitorDelegate::getStringTable() {
+ return Reader->CVStringTable;
+}
+
+void LVSymbolVisitor::printLocalVariableAddrRange(
+ const LocalVariableAddrRange &Range, uint32_t RelocationOffset) {
+ DictScope S(W, "LocalVariableAddrRange");
+ if (ObjDelegate)
+ ObjDelegate->printRelocatedField("OffsetStart", RelocationOffset,
+ Range.OffsetStart);
+ W.printHex("ISectStart", Range.ISectStart);
+ W.printHex("Range", Range.Range);
+}
+
+void LVSymbolVisitor::printLocalVariableAddrGap(
+ ArrayRef<LocalVariableAddrGap> Gaps) {
+ for (const LocalVariableAddrGap &Gap : Gaps) {
+ ListScope S(W, "LocalVariableAddrGap");
+ W.printHex("GapStartOffset", Gap.GapStartOffset);
+ W.printHex("Range", Gap.Range);
+ }
+}
+
+void LVSymbolVisitor::printTypeIndex(StringRef FieldName, TypeIndex TI) const {
+ codeview::printTypeIndex(W, FieldName, TI, Types);
+}
+
+Error LVSymbolVisitor::visitSymbolBegin(CVSymbol &Record) {
+ return visitSymbolBegin(Record, 0);
+}
+
+Error LVSymbolVisitor::visitSymbolBegin(CVSymbol &Record, uint32_t Offset) {
+ SymbolKind Kind = Record.kind();
+ LLVM_DEBUG({
+ W.printNumber("Offset", Offset);
+ W.printEnum("Begin Kind", unsigned(Kind), getSymbolTypeNames());
+ });
+
+ if (options().getInternalTag())
+ Shared->SymbolKinds.insert(Kind);
+
+ LogicalVisitor->CurrentElement = LogicalVisitor->createElement(Kind);
+ if (!LogicalVisitor->CurrentElement) {
+ LLVM_DEBUG({
+ // We have an unsupported Symbol or Type Record.
+ // W.printEnum("Kind ignored", unsigned(Kind), getSymbolTypeNames());
+ });
+ return Error::success();
+ }
+
+ // Offset carried by the traversal routines when dealing with streams.
+ CurrentOffset = Offset;
+ IsCompileUnit = false;
+ if (!LogicalVisitor->CurrentElement->getOffsetFromTypeIndex())
+ LogicalVisitor->CurrentElement->setOffset(Offset);
+ if (symbolOpensScope(Kind) || (IsCompileUnit = symbolIsCompileUnit(Kind))) {
+ assert(LogicalVisitor->CurrentScope && "Invalid scope!");
+ LogicalVisitor->addElement(LogicalVisitor->CurrentScope, IsCompileUnit);
+ } else {
+ if (LogicalVisitor->CurrentSymbol)
+ LogicalVisitor->addElement(LogicalVisitor->CurrentSymbol);
+ if (LogicalVisitor->CurrentType)
+ LogicalVisitor->addElement(LogicalVisitor->CurrentType);
+ }
+
+ return Error::success();
+}
+
+Error LVSymbolVisitor::visitSymbolEnd(CVSymbol &Record) {
+ SymbolKind Kind = Record.kind();
+ LLVM_DEBUG(
+ { W.printEnum("End Kind", unsigned(Kind), getSymbolTypeNames()); });
+
+ if (symbolEndsScope(Kind)) {
+ LogicalVisitor->popScope();
+ }
+
+ return Error::success();
+}
+
+Error LVSymbolVisitor::visitUnknownSymbol(CVSymbol &Record) {
+ LLVM_DEBUG({ W.printNumber("Length", Record.length()); });
+ return Error::success();
+}
+
+// S_BLOCK32
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, BlockSym &Block) {
+ LLVM_DEBUG({
+ W.printHex("CodeSize", Block.CodeSize);
+ W.printHex("Segment", Block.Segment);
+ W.printString("BlockName", Block.Name);
+ });
+
+ if (LVScope *Scope = LogicalVisitor->CurrentScope) {
+ StringRef LinkageName;
+ if (ObjDelegate)
+ ObjDelegate->getLinkageName(Block.getRelocationOffset(), Block.CodeOffset,
+ &LinkageName);
+ Scope->setLinkageName(LinkageName);
+
+ if (options().getGeneralCollectRanges()) {
+ // Record converted segment::offset addressing for this scope.
+ LVAddress Addendum = Reader->getSymbolTableAddress(LinkageName);
+ LVAddress LowPC =
+ Reader->linearAddress(Block.Segment, Block.CodeOffset, Addendum);
+ LVAddress HighPC = LowPC + Block.CodeSize - 1;
+ Scope->addObject(LowPC, HighPC);
+ }
+ }
+
+ return Error::success();
+}
+
+// S_BPREL32
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
+ BPRelativeSym &Local) {
+ LLVM_DEBUG({
+ printTypeIndex("Type", Local.Type);
+ W.printNumber("Offset", Local.Offset);
+ W.printString("VarName", Local.Name);
+ });
+
+ if (LVSymbol *Symbol = LogicalVisitor->CurrentSymbol) {
+ Symbol->setName(Local.Name);
+ // From the MS_Symbol_Type.pdf documentation (S_BPREL32):
+ // This symbol specifies symbols that are allocated on the stack for a
+ // procedure. For C and C++, these include the actual function parameters
+ // and the local non-static variables of functions.
+ // However, the offset for 'this' comes as a negative value.
+
+ // Symbol was created as 'variable'; determine its real kind.
+ Symbol->resetIsVariable();
+
+ if (Local.Name.equals("this")) {
+ Symbol->setIsParameter();
+ Symbol->setIsArtificial();
+ } else {
+ // Determine symbol kind.
+ bool(Local.Offset > 0) ? Symbol->setIsParameter()
+ : Symbol->setIsVariable();
+ }
+
+ // Update correct debug information tag.
+ if (Symbol->getIsParameter())
+ Symbol->setTag(dwarf::DW_TAG_formal_parameter);
+
+ LVElement *Element = LogicalVisitor->getElement(StreamTPI, Local.Type);
+ if (Element && Element->getIsScoped()) {
+ // We have a local type. Find its parent function.
+ LVScope *Parent = Symbol->getFunctionParent();
+ // The element representing the type has been already finalized. If
+ // the type is an aggregate type, its members have been already added.
+ // As the type is local, its level will be changed.
+
+ // FIXME: Currently the algorithm used to scope lambda functions is
+ // incorrect. Before we allocate the type at this scope, check if is
+ // already allocated in other scope.
+ if (!Element->getParentScope()) {
+ Parent->addElement(Element);
+ Element->updateLevel(Parent);
+ }
+ }
+ Symbol->setType(Element);
+ }
+
+ return Error::success();
+}
+
+// S_REGREL32
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
+ RegRelativeSym &Local) {
+ LLVM_DEBUG({
+ printTypeIndex("Type", Local.Type);
+ W.printNumber("Offset", Local.Offset);
+ W.printString("VarName", Local.Name);
+ });
+
+ if (LVSymbol *Symbol = LogicalVisitor->CurrentSymbol) {
+ Symbol->setName(Local.Name);
+
+ // Symbol was created as 'variable'; determine its real kind.
+ Symbol->resetIsVariable();
+
+ // Check for the 'this' symbol.
+ if (Local.Name.equals("this")) {
+ Symbol->setIsArtificial();
+ Symbol->setIsParameter();
+ } else {
+ // Determine symbol kind.
+ determineSymbolKind(Symbol, Local.Register);
+ }
+
+ // Update correct debug information tag.
+ if (Symbol->getIsParameter())
+ Symbol->setTag(dwarf::DW_TAG_formal_parameter);
+
+ LVElement *Element = LogicalVisitor->getElement(StreamTPI, Local.Type);
+ if (Element && Element->getIsScoped()) {
+ // We have a local type. Find its parent function.
+ LVScope *Parent = Symbol->getFunctionParent();
+ // The element representing the type has been already finalized. If
+ // the type is an aggregate type, its members have been already added.
+ // As the type is local, its level will be changed.
+
+ // FIXME: Currently the algorithm used to scope lambda functions is
+ // incorrect. Before we allocate the type at this scope, check if is
+ // already allocated in other scope.
+ if (!Element->getParentScope()) {
+ Parent->addElement(Element);
+ Element->updateLevel(Parent);
+ }
+ }
+ Symbol->setType(Element);
+ }
+
+ return Error::success();
+}
+
+// S_BUILDINFO
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &CVR,
+ BuildInfoSym &BuildInfo) {
+ LLVM_DEBUG({ printTypeIndex("BuildId", BuildInfo.BuildId); });
+
+ CVType CVBuildType = Ids.getType(BuildInfo.BuildId);
+ if (Error Err = LogicalVisitor->finishVisitation(
+ CVBuildType, BuildInfo.BuildId, Reader->getCompileUnit()))
+ return Err;
+
+ return Error::success();
+}
+
+// S_COMPILE2
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
+ Compile2Sym &Compile2) {
+ LLVM_DEBUG({
+ W.printEnum("Language", uint8_t(Compile2.getLanguage()),
+ getSourceLanguageNames());
+ W.printFlags("Flags", uint32_t(Compile2.getFlags()),
+ getCompileSym3FlagNames());
+ W.printEnum("Machine", unsigned(Compile2.Machine), getCPUTypeNames());
+ W.printString("VersionName", Compile2.Version);
+ });
+
+ // MSVC generates the following sequence for a CodeView module:
+ // S_OBJNAME --> Set 'CurrentObjectName'.
+ // S_COMPILE2 --> Set the compile unit name using 'CurrentObjectName'.
+ // ...
+ // S_BUILDINFO --> Extract the source name.
+ //
+ // Clang generates the following sequence for a CodeView module:
+ // S_COMPILE2 --> Set the compile unit name to empty string.
+ // ...
+ // S_BUILDINFO --> Extract the source name.
+ //
+ // For both toolchains, update the compile unit name from S_BUILDINFO.
+ if (LVScope *Scope = LogicalVisitor->CurrentScope) {
+ // The name of the CU, was extracted from the 'BuildInfo' subsection.
+ Reader->setCompileUnitCPUType(Compile2.Machine);
+ Scope->setName(CurrentObjectName);
+ if (options().getAttributeProducer())
+ Scope->setProducer(Compile2.Version);
+ getReader().isSystemEntry(Scope, CurrentObjectName);
+
+ // The line records in CodeView are recorded per Module ID. Update
+ // the relationship between the current CU and the Module ID.
+ Reader->addModule(Scope);
+
+ // Updated the collected strings with their associated compile unit.
+ Shared->StringRecords.addFilenames(Reader->getCompileUnit());
+ }
+
+ // Clear any previous ObjectName.
+ CurrentObjectName = "";
+ return Error::success();
+}
+
+// S_COMPILE3
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
+ Compile3Sym &Compile3) {
+ LLVM_DEBUG({
+ W.printEnum("Language", uint8_t(Compile3.getLanguage()),
+ getSourceLanguageNames());
+ W.printFlags("Flags", uint32_t(Compile3.getFlags()),
+ getCompileSym3FlagNames());
+ W.printEnum("Machine", unsigned(Compile3.Machine), getCPUTypeNames());
+ W.printString("VersionName", Compile3.Version);
+ });
+
+ // MSVC generates the following sequence for a CodeView module:
+ // S_OBJNAME --> Set 'CurrentObjectName'.
+ // S_COMPILE3 --> Set the compile unit name using 'CurrentObjectName'.
+ // ...
+ // S_BUILDINFO --> Extract the source name.
+ //
+ // Clang generates the following sequence for a CodeView module:
+ // S_COMPILE3 --> Set the compile unit name to empty string.
+ // ...
+ // S_BUILDINFO --> Extract the source name.
+ //
+ // For both toolchains, update the compile unit name from S_BUILDINFO.
+ if (LVScope *Scope = LogicalVisitor->CurrentScope) {
+ // The name of the CU, was extracted from the 'BuildInfo' subsection.
+ Reader->setCompileUnitCPUType(Compile3.Machine);
+ Scope->setName(CurrentObjectName);
+ if (options().getAttributeProducer())
+ Scope->setProducer(Compile3.Version);
+ getReader().isSystemEntry(Scope, CurrentObjectName);
+
+ // The line records in CodeView are recorded per Module ID. Update
+ // the relationship between the current CU and the Module ID.
+ Reader->addModule(Scope);
+
+ // Updated the collected strings with their associated compile unit.
+ Shared->StringRecords.addFilenames(Reader->getCompileUnit());
+ }
+
+ // Clear any previous ObjectName.
+ CurrentObjectName = "";
+ return Error::success();
+}
+
+// S_CONSTANT, S_MANCONSTANT
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
+ ConstantSym &Constant) {
+ LLVM_DEBUG({
+ printTypeIndex("Type", Constant.Type);
+ W.printNumber("Value", Constant.Value);
+ W.printString("Name", Constant.Name);
+ });
+
+ if (LVSymbol *Symbol = LogicalVisitor->CurrentSymbol) {
+ Symbol->setName(Constant.Name);
+ Symbol->setType(LogicalVisitor->getElement(StreamTPI, Constant.Type));
+ Symbol->resetIncludeInPrint();
+ }
+
+ return Error::success();
+}
+
+// S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE
+Error LVSymbolVisitor::visitKnownRecord(
+ CVSymbol &Record,
+ DefRangeFramePointerRelFullScopeSym &DefRangeFramePointerRelFullScope) {
+ // DefRanges don't have types, just registers and code offsets.
+ LLVM_DEBUG({
+ if (LocalSymbol)
+ W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName());
+
+ W.printNumber("Offset", DefRangeFramePointerRelFullScope.Offset);
+ });
+
+ if (LVSymbol *Symbol = LocalSymbol) {
+ Symbol->setHasCodeViewLocation();
+ LocalSymbol = nullptr;
+
+ // Add location debug location. Operands: [Offset, 0].
+ dwarf::Attribute Attr =
+ dwarf::Attribute(SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE);
+
+ uint64_t Operand1 = DefRangeFramePointerRelFullScope.Offset;
+ Symbol->addLocation(Attr, 0, 0, 0, 0);
+ Symbol->addLocationOperands(LVSmall(Attr), {Operand1});
+ }
+
+ return Error::success();
+}
+
+// S_DEFRANGE_FRAMEPOINTER_REL
+Error LVSymbolVisitor::visitKnownRecord(
+ CVSymbol &Record, DefRangeFramePointerRelSym &DefRangeFramePointerRel) {
+ // DefRanges don't have types, just registers and code offsets.
+ LLVM_DEBUG({
+ if (LocalSymbol)
+ W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName());
+
+ W.printNumber("Offset", DefRangeFramePointerRel.Hdr.Offset);
+ printLocalVariableAddrRange(DefRangeFramePointerRel.Range,
+ DefRangeFramePointerRel.getRelocationOffset());
+ printLocalVariableAddrGap(DefRangeFramePointerRel.Gaps);
+ });
+
+ // We are expecting the following sequence:
+ // 128 | S_LOCAL [size = 20] `ParamBar`
+ // ...
+ // 148 | S_DEFRANGE_FRAMEPOINTER_REL [size = 16]
+ if (LVSymbol *Symbol = LocalSymbol) {
+ Symbol->setHasCodeViewLocation();
+ LocalSymbol = nullptr;
+
+ // Add location debug location. Operands: [Offset, 0].
+ dwarf::Attribute Attr =
+ dwarf::Attribute(SymbolKind::S_DEFRANGE_FRAMEPOINTER_REL);
+ uint64_t Operand1 = DefRangeFramePointerRel.Hdr.Offset;
+
+ LocalVariableAddrRange Range = DefRangeFramePointerRel.Range;
+ LVAddress Address =
+ Reader->linearAddress(Range.ISectStart, Range.OffsetStart);
+
+ Symbol->addLocation(Attr, Address, Address + Range.Range, 0, 0);
+ Symbol->addLocationOperands(LVSmall(Attr), {Operand1});
+ }
+
+ return Error::success();
+}
+
+// S_DEFRANGE_REGISTER_REL
+Error LVSymbolVisitor::visitKnownRecord(
+ CVSymbol &Record, DefRangeRegisterRelSym &DefRangeRegisterRel) {
+ // DefRanges don't have types, just registers and code offsets.
+ LLVM_DEBUG({
+ if (LocalSymbol)
+ W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName());
+
+ W.printBoolean("HasSpilledUDTMember",
+ DefRangeRegisterRel.hasSpilledUDTMember());
+ W.printNumber("OffsetInParent", DefRangeRegisterRel.offsetInParent());
+ W.printNumber("BasePointerOffset",
+ DefRangeRegisterRel.Hdr.BasePointerOffset);
+ printLocalVariableAddrRange(DefRangeRegisterRel.Range,
+ DefRangeRegisterRel.getRelocationOffset());
+ printLocalVariableAddrGap(DefRangeRegisterRel.Gaps);
+ });
+
+ if (LVSymbol *Symbol = LocalSymbol) {
+ Symbol->setHasCodeViewLocation();
+ LocalSymbol = nullptr;
+
+ // Add location debug location. Operands: [Register, Offset].
+ dwarf::Attribute Attr =
+ dwarf::Attribute(SymbolKind::S_DEFRANGE_REGISTER_REL);
+ uint64_t Operand1 = DefRangeRegisterRel.Hdr.Register;
+ uint64_t Operand2 = DefRangeRegisterRel.Hdr.BasePointerOffset;
+
+ LocalVariableAddrRange Range = DefRangeRegisterRel.Range;
+ LVAddress Address =
+ Reader->linearAddress(Range.ISectStart, Range.OffsetStart);
+
+ Symbol->addLocation(Attr, Address, Address + Range.Range, 0, 0);
+ Symbol->addLocationOperands(LVSmall(Attr), {Operand1, Operand2});
+ }
+
+ return Error::success();
+}
+
+// S_DEFRANGE_REGISTER
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
+ DefRangeRegisterSym &DefRangeRegister) {
+ // DefRanges don't have types, just registers and code offsets.
+ LLVM_DEBUG({
+ if (LocalSymbol)
+ W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName());
+
+ W.printEnum("Register", uint16_t(DefRangeRegister.Hdr.Register),
+ getRegisterNames(Reader->getCompileUnitCPUType()));
+ W.printNumber("MayHaveNoName", DefRangeRegister.Hdr.MayHaveNoName);
+ printLocalVariableAddrRange(DefRangeRegister.Range,
+ DefRangeRegister.getRelocationOffset());
+ printLocalVariableAddrGap(DefRangeRegister.Gaps);
+ });
+
+ if (LVSymbol *Symbol = LocalSymbol) {
+ Symbol->setHasCodeViewLocation();
+ LocalSymbol = nullptr;
+
+ // Add location debug location. Operands: [Register, 0].
+ dwarf::Attribute Attr = dwarf::Attribute(SymbolKind::S_DEFRANGE_REGISTER);
+ uint64_t Operand1 = DefRangeRegister.Hdr.Register;
+
+ LocalVariableAddrRange Range = DefRangeRegister.Range;
+ LVAddress Address =
+ Reader->linearAddress(Range.ISectStart, Range.OffsetStart);
+
+ Symbol->addLocation(Attr, Address, Address + Range.Range, 0, 0);
+ Symbol->addLocationOperands(LVSmall(Attr), {Operand1});
+ }
+
+ return Error::success();
+}
+
+// S_DEFRANGE_SUBFIELD_REGISTER
+Error LVSymbolVisitor::visitKnownRecord(
+ CVSymbol &Record, DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) {
+ // DefRanges don't have types, just registers and code offsets.
+ LLVM_DEBUG({
+ if (LocalSymbol)
+ W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName());
+
+ W.printEnum("Register", uint16_t(DefRangeSubfieldRegister.Hdr.Register),
+ getRegisterNames(Reader->getCompileUnitCPUType()));
+ W.printNumber("MayHaveNoName", DefRangeSubfieldRegister.Hdr.MayHaveNoName);
+ W.printNumber("OffsetInParent",
+ DefRangeSubfieldRegister.Hdr.OffsetInParent);
+ printLocalVariableAddrRange(DefRangeSubfieldRegister.Range,
+ DefRangeSubfieldRegister.getRelocationOffset());
+ printLocalVariableAddrGap(DefRangeSubfieldRegister.Gaps);
+ });
+
+ if (LVSymbol *Symbol = LocalSymbol) {
+ Symbol->setHasCodeViewLocation();
+ LocalSymbol = nullptr;
+
+ // Add location debug location. Operands: [Register, 0].
+ dwarf::Attribute Attr =
+ dwarf::Attribute(SymbolKind::S_DEFRANGE_SUBFIELD_REGISTER);
+ uint64_t Operand1 = DefRangeSubfieldRegister.Hdr.Register;
+
+ LocalVariableAddrRange Range = DefRangeSubfieldRegister.Range;
+ LVAddress Address =
+ Reader->linearAddress(Range.ISectStart, Range.OffsetStart);
+
+ Symbol->addLocation(Attr, Address, Address + Range.Range, 0, 0);
+ Symbol->addLocationOperands(LVSmall(Attr), {Operand1});
+ }
+
+ return Error::success();
+}
+
+// S_DEFRANGE_SUBFIELD
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
+ DefRangeSubfieldSym &DefRangeSubfield) {
+ // DefRanges don't have types, just registers and code offsets.
+ LLVM_DEBUG({
+ if (LocalSymbol)
+ W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName());
+
+ if (ObjDelegate) {
+ DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable();
+ auto ExpectedProgram = Strings.getString(DefRangeSubfield.Program);
+ if (!ExpectedProgram) {
+ consumeError(ExpectedProgram.takeError());
+ return llvm::make_error<CodeViewError>(
+ "String table offset outside of bounds of String Table!");
+ }
+ W.printString("Program", *ExpectedProgram);
+ }
+ W.printNumber("OffsetInParent", DefRangeSubfield.OffsetInParent);
+ printLocalVariableAddrRange(DefRangeSubfield.Range,
+ DefRangeSubfield.getRelocationOffset());
+ printLocalVariableAddrGap(DefRangeSubfield.Gaps);
+ });
+
+ if (LVSymbol *Symbol = LocalSymbol) {
+ Symbol->setHasCodeViewLocation();
+ LocalSymbol = nullptr;
+
+ // Add location debug location. Operands: [Program, 0].
+ dwarf::Attribute Attr = dwarf::Attribute(SymbolKind::S_DEFRANGE_SUBFIELD);
+ uint64_t Operand1 = DefRangeSubfield.Program;
+
+ LocalVariableAddrRange Range = DefRangeSubfield.Range;
+ LVAddress Address =
+ Reader->linearAddress(Range.ISectStart, Range.OffsetStart);
+
+ Symbol->addLocation(Attr, Address, Address + Range.Range, 0, 0);
+ Symbol->addLocationOperands(LVSmall(Attr), {Operand1, /*Operand2=*/0});
+ }
+
+ return Error::success();
+}
+
+// S_DEFRANGE
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
+ DefRangeSym &DefRange) {
+ // DefRanges don't have types, just registers and code offsets.
+ LLVM_DEBUG({
+ if (LocalSymbol)
+ W.getOStream() << formatv("Symbol: {0}, ", LocalSymbol->getName());
+
+ if (ObjDelegate) {
+ DebugStringTableSubsectionRef Strings = ObjDelegate->getStringTable();
+ auto ExpectedProgram = Strings.getString(DefRange.Program);
+ if (!ExpectedProgram) {
+ consumeError(ExpectedProgram.takeError());
+ return llvm::make_error<CodeViewError>(
+ "String table offset outside of bounds of String Table!");
+ }
+ W.printString("Program", *ExpectedProgram);
+ }
+ printLocalVariableAddrRange(DefRange.Range, DefRange.getRelocationOffset());
+ printLocalVariableAddrGap(DefRange.Gaps);
+ });
+
+ if (LVSymbol *Symbol = LocalSymbol) {
+ Symbol->setHasCodeViewLocation();
+ LocalSymbol = nullptr;
+
+ // Add location debug location. Operands: [Program, 0].
+ dwarf::Attribute Attr = dwarf::Attribute(SymbolKind::S_DEFRANGE);
+ uint64_t Operand1 = DefRange.Program;
+
+ LocalVariableAddrRange Range = DefRange.Range;
+ LVAddress Address =
+ Reader->linearAddress(Range.ISectStart, Range.OffsetStart);
+
+ Symbol->addLocation(Attr, Address, Address + Range.Range, 0, 0);
+ Symbol->addLocationOperands(LVSmall(Attr), {Operand1, /*Operand2=*/0});
+ }
+
+ return Error::success();
+}
+
+// S_FRAMEPROC
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
+ FrameProcSym &FrameProc) {
+ if (LVScope *Function = LogicalVisitor->getReaderScope()) {
+ // S_FRAMEPROC contains extra information for the function described
+ // by any of the previous generated records:
+ // S_GPROC32, S_LPROC32, S_LPROC32_ID, S_GPROC32_ID.
+
+ // The generated sequence is:
+ // S_GPROC32_ID ...
+ // S_FRAMEPROC ...
+
+ // Collect additional inline flags for the current scope function.
+ FrameProcedureOptions Flags = FrameProc.Flags;
+ if (FrameProcedureOptions::MarkedInline ==
+ (Flags & FrameProcedureOptions::MarkedInline))
+ Function->setInlineCode(dwarf::DW_INL_declared_inlined);
+ if (FrameProcedureOptions::Inlined ==
+ (Flags & FrameProcedureOptions::Inlined))
+ Function->setInlineCode(dwarf::DW_INL_inlined);
+
+ // To determine the symbol kind for any symbol declared in that function,
+ // we can access the S_FRAMEPROC for the parent scope function. It contains
+ // information about the local fp and param fp registers and compare with
+ // the register in the S_REGREL32 to get a match.
+ codeview::CPUType CPU = Reader->getCompileUnitCPUType();
+ LocalFrameRegister = FrameProc.getLocalFramePtrReg(CPU);
+ ParamFrameRegister = FrameProc.getParamFramePtrReg(CPU);
+ }
+
+ return Error::success();
+}
+
+// S_GDATA32, S_LDATA32, S_LMANDATA, S_GMANDATA
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, DataSym &Data) {
+ LLVM_DEBUG({
+ printTypeIndex("Type", Data.Type);
+ W.printString("DisplayName", Data.Name);
+ });
+
+ if (LVSymbol *Symbol = LogicalVisitor->CurrentSymbol) {
+ StringRef LinkageName;
+ if (ObjDelegate)
+ ObjDelegate->getLinkageName(Data.getRelocationOffset(), Data.DataOffset,
+ &LinkageName);
+
+ Symbol->setName(Data.Name);
+ Symbol->setLinkageName(LinkageName);
+
+ // The MSVC generates local data as initialization for aggregates. It
+ // contains the address for an initialization function.
+ // The symbols contains the '$initializer$' pattern. Allow them only if
+ // the '--internal=system' option is given.
+ // 0 | S_LDATA32 `Struct$initializer$`
+ // type = 0x1040 (void ()*)
+ if (getReader().isSystemEntry(Symbol) && !options().getAttributeSystem()) {
+ Symbol->resetIncludeInPrint();
+ return Error::success();
+ }
+
+ if (LVScope *Namespace = Shared->NamespaceDeduction.get(Data.Name)) {
+ // The variable is already at different scope. In order to reflect
+ // the correct parent, move it to the namespace.
+ if (Symbol->getParentScope()->removeElement(Symbol))
+ Namespace->addElement(Symbol);
+ }
+
+ Symbol->setType(LogicalVisitor->getElement(StreamTPI, Data.Type));
+ if (Record.kind() == SymbolKind::S_GDATA32)
+ Symbol->setIsExternal();
+ }
+
+ return Error::success();
+}
+
+// S_INLINESITE
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
+ InlineSiteSym &InlineSite) {
+ LLVM_DEBUG({ printTypeIndex("Inlinee", InlineSite.Inlinee); });
+
+ if (LVScope *InlinedFunction = LogicalVisitor->CurrentScope) {
+ LVScope *AbstractFunction = Reader->createScopeFunction();
+ AbstractFunction->setIsSubprogram();
+ AbstractFunction->setTag(dwarf::DW_TAG_subprogram);
+ AbstractFunction->setInlineCode(dwarf::DW_INL_inlined);
+ AbstractFunction->setIsInlinedAbstract();
+ InlinedFunction->setReference(AbstractFunction);
+
+ LogicalVisitor->startProcessArgumentList();
+ // 'Inlinee' is a Type ID.
+ CVType CVFunctionType = Ids.getType(InlineSite.Inlinee);
+ if (Error Err = LogicalVisitor->finishVisitation(
+ CVFunctionType, InlineSite.Inlinee, AbstractFunction))
+ return Err;
+ LogicalVisitor->stopProcessArgumentList();
+
+ // For inlined functions set the linkage name to be the same as
+ // the name. It used to find their lines and ranges.
+ StringRef Name = AbstractFunction->getName();
+ InlinedFunction->setName(Name);
+ InlinedFunction->setLinkageName(Name);
+
+ // Process annotation bytes to calculate code and line offsets.
+ if (Error Err = LogicalVisitor->inlineSiteAnnotation(
+ AbstractFunction, InlinedFunction, InlineSite))
+ return Err;
+ }
+
+ return Error::success();
+}
+
+// S_LOCAL
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, LocalSym &Local) {
+ LLVM_DEBUG({
+ printTypeIndex("Type", Local.Type);
+ W.printFlags("Flags", uint16_t(Local.Flags), getLocalFlagNames());
+ W.printString("VarName", Local.Name);
+ });
+
+ if (LVSymbol *Symbol = LogicalVisitor->CurrentSymbol) {
+ Symbol->setName(Local.Name);
+
+ // Symbol was created as 'variable'; determine its real kind.
+ Symbol->resetIsVariable();
+
+ // Be sure the 'this' symbol is marked as 'compiler generated'.
+ if (bool(Local.Flags & LocalSymFlags::IsCompilerGenerated) ||
+ Local.Name.equals("this")) {
+ Symbol->setIsArtificial();
+ Symbol->setIsParameter();
+ } else {
+ bool(Local.Flags & LocalSymFlags::IsParameter) ? Symbol->setIsParameter()
+ : Symbol->setIsVariable();
+ }
+
+ // Update correct debug information tag.
+ if (Symbol->getIsParameter())
+ Symbol->setTag(dwarf::DW_TAG_formal_parameter);
+
+ LVElement *Element = LogicalVisitor->getElement(StreamTPI, Local.Type);
+ if (Element && Element->getIsScoped()) {
+ // We have a local type. Find its parent function.
+ LVScope *Parent = Symbol->getFunctionParent();
+ // The element representing the type has been already finalized. If
+ // the type is an aggregate type, its members have been already added.
+ // As the type is local, its level will be changed.
+ Parent->addElement(Element);
+ Element->updateLevel(Parent);
+ }
+ Symbol->setType(Element);
+
+ // The CodeView records (S_DEFFRAME_*) describing debug location for
+ // this symbol, do not have any direct reference to it. Those records
+ // are emitted after this symbol. Record the current symbol.
+ LocalSymbol = Symbol;
+ }
+
+ return Error::success();
+}
+
+// S_OBJNAME
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, ObjNameSym &ObjName) {
+ LLVM_DEBUG({
+ W.printHex("Signature", ObjName.Signature);
+ W.printString("ObjectName", ObjName.Name);
+ });
+
+ CurrentObjectName = ObjName.Name;
+ return Error::success();
+}
+
+// S_GPROC32, S_LPROC32, S_LPROC32_ID, S_GPROC32_ID
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, ProcSym &Proc) {
+ if (InFunctionScope)
+ return llvm::make_error<CodeViewError>("Visiting a ProcSym while inside "
+ "function scope!");
+
+ InFunctionScope = true;
+
+ LLVM_DEBUG({
+ printTypeIndex("FunctionType", Proc.FunctionType);
+ W.printHex("Segment", Proc.Segment);
+ W.printFlags("Flags", static_cast<uint8_t>(Proc.Flags),
+ getProcSymFlagNames());
+ W.printString("DisplayName", Proc.Name);
+ });
+
+ // Clang and Microsoft generated different debug information records:
+ // For functions definitions:
+ // Clang: S_GPROC32 -> LF_FUNC_ID -> LF_PROCEDURE
+ // Microsoft: S_GPROC32 -> LF_PROCEDURE
+
+ // For member function definition:
+ // Clang: S_GPROC32 -> LF_MFUNC_ID -> LF_MFUNCTION
+ // Microsoft: S_GPROC32 -> LF_MFUNCTION
+ // In order to support both sequences, if we found LF_FUNCTION_ID, just
+ // get the TypeIndex for LF_PROCEDURE.
+
+ // For the given test case, we have the sequence:
+ // namespace NSP_local {
+ // void foo_local() {
+ // }
+ // }
+ //
+ // 0x1000 | LF_STRING_ID String: NSP_local
+ // 0x1002 | LF_PROCEDURE
+ // return type = 0x0003 (void), # args = 0, param list = 0x1001
+ // calling conv = cdecl, options = None
+ // 0x1003 | LF_FUNC_ID
+ // name = foo_local, type = 0x1002, parent scope = 0x1000
+ // 0 | S_GPROC32_ID `NSP_local::foo_local`
+ // type = `0x1003 (foo_local)`
+ // 0x1004 | LF_STRING_ID String: suite
+ // 0x1005 | LF_STRING_ID String: suite_local.cpp
+ //
+ // The LF_STRING_ID can hold different information:
+ // 0x1000 - The enclosing namespace.
+ // 0x1004 - The compile unit directory name.
+ // 0x1005 - The compile unit name.
+ //
+ // Before deducting its scope, we need to evaluate its type and create any
+ // associated namespaces.
+ if (LVScope *Function = LogicalVisitor->CurrentScope) {
+ StringRef LinkageName;
+ if (ObjDelegate)
+ ObjDelegate->getLinkageName(Proc.getRelocationOffset(), Proc.CodeOffset,
+ &LinkageName);
+
+ // The line table can be accessed using the linkage name.
+ Reader->addToSymbolTable(LinkageName, Function);
+ Function->setName(Proc.Name);
+ Function->setLinkageName(LinkageName);
+
+ if (options().getGeneralCollectRanges()) {
+ // Record converted segment::offset addressing for this scope.
+ LVAddress Addendum = Reader->getSymbolTableAddress(LinkageName);
+ LVAddress LowPC =
+ Reader->linearAddress(Proc.Segment, Proc.CodeOffset, Addendum);
+ LVAddress HighPC = LowPC + Proc.CodeSize - 1;
+ Function->addObject(LowPC, HighPC);
+
+ // If the scope is a function, add it to the public names.
+ if ((options().getAttributePublics() || options().getPrintAnyLine()) &&
+ !Function->getIsInlinedFunction())
+ Reader->getCompileUnit()->addPublicName(Function, LowPC, HighPC);
+ }
+
+ if (Function->getIsSystem() && !options().getAttributeSystem()) {
+ Function->resetIncludeInPrint();
+ return Error::success();
+ }
+
+ TypeIndex TIFunctionType = Proc.FunctionType;
+ if (TIFunctionType.isSimple())
+ Function->setType(LogicalVisitor->getElement(StreamTPI, TIFunctionType));
+ else {
+ // We have to detect the correct stream, using the lexical parent
+ // name, as there is not other obvious way to get the stream.
+ // Normal function: LF_FUNC_ID (TPI)/(IPI)
+ // LF_PROCEDURE (TPI)
+ // Lambda function: LF_MFUNCTION (TPI)
+ // Member function: LF_MFUNC_ID (TPI)/(IPI)
+
+ StringRef OuterComponent;
+ std::tie(OuterComponent, std::ignore) = getInnerComponent(Proc.Name);
+ TypeIndex TI = Shared->ForwardReferences.find(OuterComponent);
+
+ std::optional<CVType> CVFunctionType;
+ auto GetRecordType = [&]() -> bool {
+ CVFunctionType = Ids.tryGetType(TIFunctionType);
+ if (!CVFunctionType)
+ return false;
+
+ if (TI.isNoneType())
+ // Normal function.
+ if (CVFunctionType->kind() == LF_FUNC_ID)
+ return true;
+
+ // Member function.
+ return (CVFunctionType->kind() == LF_MFUNC_ID);
+ };
+
+ // We can have a LF_FUNC_ID, LF_PROCEDURE or LF_MFUNCTION.
+ if (!GetRecordType()) {
+ CVFunctionType = Types.tryGetType(TIFunctionType);
+ if (!CVFunctionType)
+ return llvm::make_error<CodeViewError>("Invalid type index");
+ }
+
+ if (Error Err = LogicalVisitor->finishVisitation(
+ *CVFunctionType, TIFunctionType, Function))
+ return Err;
+ }
+
+ if (Record.kind() == SymbolKind::S_GPROC32 ||
+ Record.kind() == SymbolKind::S_GPROC32_ID)
+ Function->setIsExternal();
+
+ // We don't have a way to see if the symbol is compiler generated. Use
+ // the linkage name, to detect `scalar deleting destructor' functions.
+ std::string DemangledSymbol = demangle(LinkageName);
+ if (DemangledSymbol.find("scalar deleting dtor") != std::string::npos) {
+ Function->setIsArtificial();
+ } else {
+ // Clang generates global ctor and dtor names containing the substrings:
+ // 'dynamic initializer for' and 'dynamic atexit destructor for'.
+ if (DemangledSymbol.find("dynamic atexit destructor for") !=
+ std::string::npos)
+ Function->setIsArtificial();
+ }
+ }
+
+ return Error::success();
+}
+
+// S_END
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
+ ScopeEndSym &ScopeEnd) {
+ InFunctionScope = false;
+ return Error::success();
+}
+
+// S_THUNK32
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, Thunk32Sym &Thunk) {
+ if (InFunctionScope)
+ return llvm::make_error<CodeViewError>("Visiting a Thunk32Sym while inside "
+ "function scope!");
+
+ InFunctionScope = true;
+
+ LLVM_DEBUG({
+ W.printHex("Segment", Thunk.Segment);
+ W.printString("Name", Thunk.Name);
+ });
+
+ if (LVScope *Function = LogicalVisitor->CurrentScope)
+ Function->setName(Thunk.Name);
+
+ return Error::success();
+}
+
+// S_UDT, S_COBOLUDT
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record, UDTSym &UDT) {
+ LLVM_DEBUG({
+ printTypeIndex("Type", UDT.Type);
+ W.printString("UDTName", UDT.Name);
+ });
+
+ if (LVType *Type = LogicalVisitor->CurrentType) {
+ if (LVScope *Namespace = Shared->NamespaceDeduction.get(UDT.Name)) {
+ if (Type->getParentScope()->removeElement(Type))
+ Namespace->addElement(Type);
+ }
+
+ Type->setName(UDT.Name);
+
+ // We have to determine if the typedef is a real C/C++ definition or is
+ // the S_UDT record that describe all the user defined types.
+ // 0 | S_UDT `Name` original type = 0x1009
+ // 0x1009 | LF_STRUCTURE `Name`
+ // Ignore type definitions for RTTI types:
+ // _s__RTTIBaseClassArray, _s__RTTIBaseClassDescriptor,
+ // _s__RTTICompleteObjectLocator, _s__RTTIClassHierarchyDescriptor.
+ if (getReader().isSystemEntry(Type))
+ Type->resetIncludeInPrint();
+ else {
+ StringRef RecordName = getRecordName(Types, UDT.Type);
+ if (UDT.Name.equals(RecordName))
+ Type->resetIncludeInPrint();
+ Type->setType(LogicalVisitor->getElement(StreamTPI, UDT.Type));
+ }
+ }
+
+ return Error::success();
+}
+
+// S_UNAMESPACE
+Error LVSymbolVisitor::visitKnownRecord(CVSymbol &Record,
+ UsingNamespaceSym &UN) {
+ LLVM_DEBUG({ W.printString("Namespace", UN.Name); });
+ return Error::success();
+}
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "CodeViewLogicalVisitor"
+
+//===----------------------------------------------------------------------===//
+// Logical visitor.
+//===----------------------------------------------------------------------===//
+LVLogicalVisitor::LVLogicalVisitor(LVCodeViewReader *Reader, ScopedPrinter &W,
+ InputFile &Input)
+ : Reader(Reader), W(W), Input(Input) {
+ // The LogicalVisitor connects the CodeViewReader with the visitors that
+ // traverse the types, symbols, etc. Do any initialization that is needed.
+ Shared = std::make_shared<LVShared>(Reader, this);
+}
+
+void LVLogicalVisitor::printTypeIndex(StringRef FieldName, TypeIndex TI,
+ uint32_t StreamIdx) {
+ codeview::printTypeIndex(W, FieldName, TI,
+ StreamIdx == StreamTPI ? types() : ids());
+}
+
+void LVLogicalVisitor::printTypeBegin(CVType &Record, TypeIndex TI,
+ LVElement *Element, uint32_t StreamIdx) {
+ W.getOStream() << "\n";
+ W.startLine() << formatTypeLeafKind(Record.kind());
+ W.getOStream() << " (" << HexNumber(TI.getIndex()) << ")";
+ W.getOStream() << " {\n";
+ W.indent();
+ W.printEnum("TypeLeafKind", unsigned(Record.kind()), ArrayRef(LeafTypeNames));
+ printTypeIndex("TI", TI, StreamIdx);
+ W.startLine() << "Element: " << HexNumber(Element->getOffset()) << " "
+ << Element->getName() << "\n";
+}
+
+void LVLogicalVisitor::printTypeEnd(CVType &Record) {
+ W.unindent();
+ W.startLine() << "}\n";
+}
+
+void LVLogicalVisitor::printMemberBegin(CVMemberRecord &Record, TypeIndex TI,
+ LVElement *Element,
+ uint32_t StreamIdx) {
+ W.getOStream() << "\n";
+ W.startLine() << formatTypeLeafKind(Record.Kind);
+ W.getOStream() << " (" << HexNumber(TI.getIndex()) << ")";
+ W.getOStream() << " {\n";
+ W.indent();
+ W.printEnum("TypeLeafKind", unsigned(Record.Kind), ArrayRef(LeafTypeNames));
+ printTypeIndex("TI", TI, StreamIdx);
+ W.startLine() << "Element: " << HexNumber(Element->getOffset()) << " "
+ << Element->getName() << "\n";
+}
+
+void LVLogicalVisitor::printMemberEnd(CVMemberRecord &Record) {
+ W.unindent();
+ W.startLine() << "}\n";
+}
+
+Error LVLogicalVisitor::visitUnknownType(CVType &Record, TypeIndex TI) {
+ LLVM_DEBUG({
+ printTypeIndex("\nTI", TI, StreamTPI);
+ W.printNumber("Length", uint32_t(Record.content().size()));
+ });
+ return Error::success();
+}
+
+// LF_ARGLIST (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, ArgListRecord &Args,
+ TypeIndex TI, LVElement *Element) {
+ ArrayRef<TypeIndex> Indices = Args.getIndices();
+ uint32_t Size = Indices.size();
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ W.printNumber("NumArgs", Size);
+ ListScope Arguments(W, "Arguments");
+ for (uint32_t I = 0; I < Size; ++I)
+ printTypeIndex("ArgType", Indices[I], StreamTPI);
+ printTypeEnd(Record);
+ });
+
+ LVScope *Function = static_cast<LVScope *>(Element);
+ for (uint32_t Index = 0; Index < Size; ++Index) {
+ TypeIndex ParameterType = Indices[Index];
+ createParameter(ParameterType, StringRef(), Function);
+ }
+
+ return Error::success();
+}
+
+// LF_ARRAY (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, ArrayRecord &AT,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("ElementType", AT.getElementType(), StreamTPI);
+ printTypeIndex("IndexType", AT.getIndexType(), StreamTPI);
+ W.printNumber("SizeOf", AT.getSize());
+ W.printString("Name", AT.getName());
+ printTypeEnd(Record);
+ });
+
+ if (Element->getIsFinalized())
+ return Error::success();
+ Element->setIsFinalized();
+
+ LVScopeArray *Array = static_cast<LVScopeArray *>(Element);
+ if (!Array)
+ return Error::success();
+
+ Reader->getCompileUnit()->addElement(Array);
+ TypeIndex TIElementType = AT.getElementType();
+
+ LVType *PrevSubrange = nullptr;
+ LazyRandomTypeCollection &Types = types();
+
+ // As the logical view is modeled on DWARF, for each dimension we have to
+ // create a DW_TAG_subrange_type, with dimension size.
+ // The subrange type can be: unsigned __int32 or unsigned __int64.
+ auto AddSubrangeType = [&](ArrayRecord &AR) {
+ LVType *Subrange = Reader->createTypeSubrange();
+ Subrange->setTag(dwarf::DW_TAG_subrange_type);
+ Subrange->setType(getElement(StreamTPI, AR.getIndexType()));
+ Subrange->setCount(AR.getSize());
+ Subrange->setOffset(
+ TIElementType.isSimple()
+ ? (uint32_t)(TypeLeafKind)TIElementType.getSimpleKind()
+ : TIElementType.getIndex());
+ Array->addElement(Subrange);
+
+ if (PrevSubrange)
+ if (int64_t Count = Subrange->getCount())
+ PrevSubrange->setCount(PrevSubrange->getCount() / Count);
+ PrevSubrange = Subrange;
+ };
+
+ // Preserve the original TypeIndex; it would be updated in the case of:
+ // - The array type contains qualifiers.
+ // - In multidimensional arrays, the last LF_ARRAY entry contains the type.
+ TypeIndex TIArrayType;
+
+ // For each dimension in the array, there is a LF_ARRAY entry. The last
+ // entry contains the array type, which can be a LF_MODIFIER in the case
+ // of the type being modified by a qualifier (const, etc).
+ ArrayRecord AR(AT);
+ CVType CVEntry = Record;
+ while (CVEntry.kind() == LF_ARRAY) {
+ // Create the subrange information, required by the logical view. Once
+ // the array has been processed, the dimension sizes will updated, as
+ // the sizes are a progression. For instance:
+ // sizeof(int) = 4
+ // int Array[2]; Sizes: 8 Dim: 8 / 4 -> [2]
+ // int Array[2][3]; Sizes: 24, 12 Dim: 24 / 12 -> [2]
+ // Dim: 12 / 4 -> [3]
+ // int Array[2][3][4]; sizes: 96, 48, 16 Dim: 96 / 48 -> [2]
+ // Dim: 48 / 16 -> [3]
+ // Dim: 16 / 4 -> [4]
+ AddSubrangeType(AR);
+ TIArrayType = TIElementType;
+
+ // The current ElementType can be a modifier, in which case we need to
+ // get the type being modified.
+ // If TypeIndex is not a simple type, check if we have a qualified type.
+ if (!TIElementType.isSimple()) {
+ CVType CVElementType = Types.getType(TIElementType);
+ if (CVElementType.kind() == LF_MODIFIER) {
+ LVElement *QualifiedType =
+ Shared->TypeRecords.find(StreamTPI, TIElementType);
+ if (Error Err =
+ finishVisitation(CVElementType, TIElementType, QualifiedType))
+ return Err;
+ // Get the TypeIndex of the type that the LF_MODIFIER modifies.
+ TIElementType = getModifiedType(CVElementType);
+ }
+ }
+ // Ends the traversal, as we have reached a simple type (int, char, etc).
+ if (TIElementType.isSimple())
+ break;
+
+ // Read next dimension linked entry, if any.
+ CVEntry = Types.getType(TIElementType);
+ if (Error Err = TypeDeserializer::deserializeAs(
+ const_cast<CVType &>(CVEntry), AR)) {
+ consumeError(std::move(Err));
+ break;
+ }
+ TIElementType = AR.getElementType();
+ // NOTE: The typeindex has a value of: 0x0280.0000
+ getTrueType(TIElementType);
+ }
+
+ Array->setName(AT.getName());
+ TIArrayType = Shared->ForwardReferences.remap(TIArrayType);
+ Array->setType(getElement(StreamTPI, TIArrayType));
+
+ if (PrevSubrange)
+ // In the case of an aggregate type (class, struct, union, interface),
+ // get the aggregate size. As the original record is pointing to its
+ // reference, we have to update it.
+ if (uint64_t Size =
+ isAggregate(CVEntry)
+ ? getSizeInBytesForTypeRecord(Types.getType(TIArrayType))
+ : getSizeInBytesForTypeIndex(TIElementType))
+ PrevSubrange->setCount(PrevSubrange->getCount() / Size);
+
+ return Error::success();
+}
+
+// LF_BITFIELD (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, BitFieldRecord &BF,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("Type", TI, StreamTPI);
+ W.printNumber("BitSize", BF.getBitSize());
+ W.printNumber("BitOffset", BF.getBitOffset());
+ printTypeEnd(Record);
+ });
+
+ Element->setType(getElement(StreamTPI, BF.getType()));
+ Element->setBitSize(BF.getBitSize());
+ return Error::success();
+}
+
+// LF_BUILDINFO (TPI)/(IPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, BuildInfoRecord &BI,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamIPI);
+ W.printNumber("NumArgs", static_cast<uint32_t>(BI.getArgs().size()));
+ ListScope Arguments(W, "Arguments");
+ for (TypeIndex Arg : BI.getArgs())
+ printTypeIndex("ArgType", Arg, StreamIPI);
+ printTypeEnd(Record);
+ });
+
+ // The given 'Element' refers to the current compilation unit.
+ // All the args are references into the TPI/IPI stream.
+ TypeIndex TIName = BI.getArgs()[BuildInfoRecord::BuildInfoArg::SourceFile];
+ std::string Name = std::string(ids().getTypeName(TIName));
+
+ // There are cases where LF_BUILDINFO fields are empty.
+ if (!Name.empty())
+ Element->setName(Name);
+
+ return Error::success();
+}
+
+// LF_CLASS, LF_STRUCTURE, LF_INTERFACE (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, ClassRecord &Class,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ W.printNumber("MemberCount", Class.getMemberCount());
+ printTypeIndex("FieldList", Class.getFieldList(), StreamTPI);
+ printTypeIndex("DerivedFrom", Class.getDerivationList(), StreamTPI);
+ printTypeIndex("VShape", Class.getVTableShape(), StreamTPI);
+ W.printNumber("SizeOf", Class.getSize());
+ W.printString("Name", Class.getName());
+ if (Class.hasUniqueName())
+ W.printString("UniqueName", Class.getUniqueName());
+ printTypeEnd(Record);
+ });
+
+ if (Element->getIsFinalized())
+ return Error::success();
+ Element->setIsFinalized();
+
+ LVScopeAggregate *Scope = static_cast<LVScopeAggregate *>(Element);
+ if (!Scope)
+ return Error::success();
+
+ Scope->setName(Class.getName());
+ if (Class.hasUniqueName())
+ Scope->setLinkageName(Class.getUniqueName());
+
+ if (Class.isNested()) {
+ Scope->setIsNested();
+ createParents(Class.getName(), Scope);
+ }
+
+ if (Class.isScoped())
+ Scope->setIsScoped();
+
+ // Nested types will be added to their parents at creation. The forward
+ // references are only processed to finish the referenced element creation.
+ if (!(Class.isNested() || Class.isScoped())) {
+ if (LVScope *Namespace = Shared->NamespaceDeduction.get(Class.getName()))
+ Namespace->addElement(Scope);
+ else
+ Reader->getCompileUnit()->addElement(Scope);
+ }
+
+ LazyRandomTypeCollection &Types = types();
+ TypeIndex TIFieldList = Class.getFieldList();
+ if (TIFieldList.isNoneType()) {
+ TypeIndex ForwardType = Shared->ForwardReferences.find(Class.getName());
+ if (!ForwardType.isNoneType()) {
+ CVType CVReference = Types.getType(ForwardType);
+ TypeRecordKind RK = static_cast<TypeRecordKind>(CVReference.kind());
+ ClassRecord ReferenceRecord(RK);
+ if (Error Err = TypeDeserializer::deserializeAs(
+ const_cast<CVType &>(CVReference), ReferenceRecord))
+ return Err;
+ TIFieldList = ReferenceRecord.getFieldList();
+ }
+ }
+
+ if (!TIFieldList.isNoneType()) {
+ // Pass down the TypeIndex 'TI' for the aggregate containing the field list.
+ CVType CVFieldList = Types.getType(TIFieldList);
+ if (Error Err = finishVisitation(CVFieldList, TI, Scope))
+ return Err;
+ }
+
+ return Error::success();
+}
+
+// LF_ENUM (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, EnumRecord &Enum,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ W.printNumber("NumEnumerators", Enum.getMemberCount());
+ printTypeIndex("UnderlyingType", Enum.getUnderlyingType(), StreamTPI);
+ printTypeIndex("FieldListType", Enum.getFieldList(), StreamTPI);
+ W.printString("Name", Enum.getName());
+ printTypeEnd(Record);
+ });
+
+ LVScopeEnumeration *Scope = static_cast<LVScopeEnumeration *>(Element);
+ if (!Scope)
+ return Error::success();
+
+ if (Scope->getIsFinalized())
+ return Error::success();
+ Scope->setIsFinalized();
+
+ // Set the name, as in the case of nested, it would determine the relation
+ // to any potential parent, via the LF_NESTTYPE record.
+ Scope->setName(Enum.getName());
+ if (Enum.hasUniqueName())
+ Scope->setLinkageName(Enum.getUniqueName());
+
+ Scope->setType(getElement(StreamTPI, Enum.getUnderlyingType()));
+
+ if (Enum.isNested()) {
+ Scope->setIsNested();
+ createParents(Enum.getName(), Scope);
+ }
+
+ if (Enum.isScoped()) {
+ Scope->setIsScoped();
+ Scope->setIsEnumClass();
+ }
+
+ // Nested types will be added to their parents at creation.
+ if (!(Enum.isNested() || Enum.isScoped())) {
+ if (LVScope *Namespace = Shared->NamespaceDeduction.get(Enum.getName()))
+ Namespace->addElement(Scope);
+ else
+ Reader->getCompileUnit()->addElement(Scope);
+ }
+
+ TypeIndex TIFieldList = Enum.getFieldList();
+ if (!TIFieldList.isNoneType()) {
+ LazyRandomTypeCollection &Types = types();
+ CVType CVFieldList = Types.getType(TIFieldList);
+ if (Error Err = finishVisitation(CVFieldList, TIFieldList, Scope))
+ return Err;
+ }
+
+ return Error::success();
+}
+
+// LF_FIELDLIST (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record,
+ FieldListRecord &FieldList,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ printTypeEnd(Record);
+ });
+
+ if (Error Err = visitFieldListMemberStream(TI, Element, FieldList.Data))
+ return Err;
+
+ return Error::success();
+}
+
+// LF_FUNC_ID (TPI)/(IPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, FuncIdRecord &Func,
+ TypeIndex TI, LVElement *Element) {
+ // ParentScope and FunctionType are references into the TPI stream.
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamIPI);
+ printTypeIndex("ParentScope", Func.getParentScope(), StreamTPI);
+ printTypeIndex("FunctionType", Func.getFunctionType(), StreamTPI);
+ W.printString("Name", Func.getName());
+ printTypeEnd(Record);
+ });
+
+ // The TypeIndex (LF_PROCEDURE) returned by 'getFunctionType' is the
+ // function propotype, we need to use the function definition.
+ if (LVScope *FunctionDcl = static_cast<LVScope *>(Element)) {
+ // For inlined functions, the inlined instance has been already processed
+ // (all its information is contained in the Symbols section).
+ // 'Element' points to the created 'abstract' (out-of-line) function.
+ // Use the parent scope information to allocate it to the correct scope.
+ LazyRandomTypeCollection &Types = types();
+ TypeIndex TIParent = Func.getParentScope();
+ if (FunctionDcl->getIsInlinedAbstract()) {
+ FunctionDcl->setName(Func.getName());
+ if (TIParent.isNoneType())
+ Reader->getCompileUnit()->addElement(FunctionDcl);
+ }
+
+ if (!TIParent.isNoneType()) {
+ CVType CVParentScope = ids().getType(TIParent);
+ if (Error Err = finishVisitation(CVParentScope, TIParent, FunctionDcl))
+ return Err;
+ }
+
+ TypeIndex TIFunctionType = Func.getFunctionType();
+ CVType CVFunctionType = Types.getType(TIFunctionType);
+ if (Error Err =
+ finishVisitation(CVFunctionType, TIFunctionType, FunctionDcl))
+ return Err;
+
+ FunctionDcl->setIsFinalized();
+ }
+
+ return Error::success();
+}
+
+// LF_LABEL (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, LabelRecord &LR,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ printTypeEnd(Record);
+ });
+ return Error::success();
+}
+
+// LF_MFUNC_ID (TPI)/(IPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, MemberFuncIdRecord &Id,
+ TypeIndex TI, LVElement *Element) {
+ // ClassType and FunctionType are references into the TPI stream.
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamIPI);
+ printTypeIndex("ClassType", Id.getClassType(), StreamTPI);
+ printTypeIndex("FunctionType", Id.getFunctionType(), StreamTPI);
+ W.printString("Name", Id.getName());
+ printTypeEnd(Record);
+ });
+
+ LVScope *FunctionDcl = static_cast<LVScope *>(Element);
+ if (FunctionDcl->getIsInlinedAbstract()) {
+ // For inlined functions, the inlined instance has been already processed
+ // (all its information is contained in the Symbols section).
+ // 'Element' points to the created 'abstract' (out-of-line) function.
+ // Use the parent scope information to allocate it to the correct scope.
+ if (LVScope *Class = static_cast<LVScope *>(
+ Shared->TypeRecords.find(StreamTPI, Id.getClassType())))
+ Class->addElement(FunctionDcl);
+ }
+
+ TypeIndex TIFunctionType = Id.getFunctionType();
+ CVType CVFunction = types().getType(TIFunctionType);
+ if (Error Err = finishVisitation(CVFunction, TIFunctionType, Element))
+ return Err;
+
+ return Error::success();
+}
+
+// LF_MFUNCTION (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record,
+ MemberFunctionRecord &MF, TypeIndex TI,
+ LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("ReturnType", MF.getReturnType(), StreamTPI);
+ printTypeIndex("ClassType", MF.getClassType(), StreamTPI);
+ printTypeIndex("ThisType", MF.getThisType(), StreamTPI);
+ W.printNumber("NumParameters", MF.getParameterCount());
+ printTypeIndex("ArgListType", MF.getArgumentList(), StreamTPI);
+ W.printNumber("ThisAdjustment", MF.getThisPointerAdjustment());
+ printTypeEnd(Record);
+ });
+
+ if (LVScope *MemberFunction = static_cast<LVScope *>(Element)) {
+ LVElement *Class = getElement(StreamTPI, MF.getClassType());
+
+ MemberFunction->setIsFinalized();
+ MemberFunction->setType(getElement(StreamTPI, MF.getReturnType()));
+ MemberFunction->setOffset(TI.getIndex());
+ MemberFunction->setOffsetFromTypeIndex();
+
+ if (ProcessArgumentList) {
+ ProcessArgumentList = false;
+
+ if (!MemberFunction->getIsStatic()) {
+ LVElement *ThisPointer = getElement(StreamTPI, MF.getThisType());
+ // When creating the 'this' pointer, check if it points to a reference.
+ ThisPointer->setType(Class);
+ LVSymbol *This =
+ createParameter(ThisPointer, StringRef(), MemberFunction);
+ This->setIsArtificial();
+ }
+
+ // Create formal parameters.
+ LazyRandomTypeCollection &Types = types();
+ CVType CVArguments = Types.getType(MF.getArgumentList());
+ if (Error Err = finishVisitation(CVArguments, MF.getArgumentList(),
+ MemberFunction))
+ return Err;
+ }
+ }
+
+ return Error::success();
+}
+
+// LF_METHODLIST (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record,
+ MethodOverloadListRecord &Overloads,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ printTypeEnd(Record);
+ });
+
+ for (OneMethodRecord &Method : Overloads.Methods) {
+ CVMemberRecord Record;
+ Record.Kind = LF_METHOD;
+ Method.Name = OverloadedMethodName;
+ if (Error Err = visitKnownMember(Record, Method, TI, Element))
+ return Err;
+ }
+
+ return Error::success();
+}
+
+// LF_MODIFIER (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, ModifierRecord &Mod,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("ModifiedType", Mod.getModifiedType(), StreamTPI);
+ printTypeEnd(Record);
+ });
+
+ // Create the modified type, which will be attached to the type(s) that
+ // contains the modifiers.
+ LVElement *ModifiedType = getElement(StreamTPI, Mod.getModifiedType());
+
+ // At this point the types recording the qualifiers do not have a
+ // scope parent. They must be assigned to the current compile unit.
+ LVScopeCompileUnit *CompileUnit = Reader->getCompileUnit();
+
+ // The incoming element does not have a defined kind. Use the given
+ // modifiers to complete its type. A type can have more than one modifier;
+ // in that case, we have to create an extra type to have the other modifier.
+ LVType *LastLink = static_cast<LVType *>(Element);
+ if (!LastLink->getParentScope())
+ CompileUnit->addElement(LastLink);
+
+ bool SeenModifier = false;
+ uint16_t Mods = static_cast<uint16_t>(Mod.getModifiers());
+ if (Mods & uint16_t(ModifierOptions::Const)) {
+ SeenModifier = true;
+ LastLink->setTag(dwarf::DW_TAG_const_type);
+ LastLink->setIsConst();
+ LastLink->setName("const");
+ }
+ if (Mods & uint16_t(ModifierOptions::Volatile)) {
+ if (SeenModifier) {
+ LVType *Volatile = Reader->createType();
+ Volatile->setIsModifier();
+ LastLink->setType(Volatile);
+ LastLink = Volatile;
+ CompileUnit->addElement(LastLink);
+ }
+ LastLink->setTag(dwarf::DW_TAG_volatile_type);
+ LastLink->setIsVolatile();
+ LastLink->setName("volatile");
+ }
+ if (Mods & uint16_t(ModifierOptions::Unaligned)) {
+ if (SeenModifier) {
+ LVType *Unaligned = Reader->createType();
+ Unaligned->setIsModifier();
+ LastLink->setType(Unaligned);
+ LastLink = Unaligned;
+ CompileUnit->addElement(LastLink);
+ }
+ LastLink->setTag(dwarf::DW_TAG_unaligned);
+ LastLink->setIsUnaligned();
+ LastLink->setName("unaligned");
+ }
+
+ LastLink->setType(ModifiedType);
+ return Error::success();
+}
+
+// LF_POINTER (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, PointerRecord &Ptr,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("PointeeType", Ptr.getReferentType(), StreamTPI);
+ W.printNumber("IsFlat", Ptr.isFlat());
+ W.printNumber("IsConst", Ptr.isConst());
+ W.printNumber("IsVolatile", Ptr.isVolatile());
+ W.printNumber("IsUnaligned", Ptr.isUnaligned());
+ W.printNumber("IsRestrict", Ptr.isRestrict());
+ W.printNumber("IsThisPtr&", Ptr.isLValueReferenceThisPtr());
+ W.printNumber("IsThisPtr&&", Ptr.isRValueReferenceThisPtr());
+ W.printNumber("SizeOf", Ptr.getSize());
+
+ if (Ptr.isPointerToMember()) {
+ const MemberPointerInfo &MI = Ptr.getMemberInfo();
+ printTypeIndex("ClassType", MI.getContainingType(), StreamTPI);
+ }
+ printTypeEnd(Record);
+ });
+
+ // Find the pointed-to type.
+ LVType *Pointer = static_cast<LVType *>(Element);
+ LVElement *Pointee = nullptr;
+
+ PointerMode Mode = Ptr.getMode();
+ Pointee = Ptr.isPointerToMember()
+ ? Shared->TypeRecords.find(StreamTPI, Ptr.getReferentType())
+ : getElement(StreamTPI, Ptr.getReferentType());
+
+ // At this point the types recording the qualifiers do not have a
+ // scope parent. They must be assigned to the current compile unit.
+ LVScopeCompileUnit *CompileUnit = Reader->getCompileUnit();
+
+ // Order for the different modifiers:
+ // <restrict> <pointer, Reference, ValueReference> <const, volatile>
+ // Const and volatile already processed.
+ bool SeenModifier = false;
+ LVType *LastLink = Pointer;
+ if (!LastLink->getParentScope())
+ CompileUnit->addElement(LastLink);
+
+ if (Ptr.isRestrict()) {
+ SeenModifier = true;
+ LVType *Restrict = Reader->createType();
+ Restrict->setTag(dwarf::DW_TAG_restrict_type);
+ Restrict->setIsRestrict();
+ Restrict->setName("restrict");
+ LastLink->setType(Restrict);
+ LastLink = Restrict;
+ CompileUnit->addElement(LastLink);
+ }
+ if (Mode == PointerMode::LValueReference) {
+ if (SeenModifier) {
+ LVType *LReference = Reader->createType();
+ LReference->setIsModifier();
+ LastLink->setType(LReference);
+ LastLink = LReference;
+ CompileUnit->addElement(LastLink);
+ }
+ LastLink->setTag(dwarf::DW_TAG_reference_type);
+ LastLink->setIsReference();
+ LastLink->setName("&");
+ }
+ if (Mode == PointerMode::RValueReference) {
+ if (SeenModifier) {
+ LVType *RReference = Reader->createType();
+ RReference->setIsModifier();
+ LastLink->setType(RReference);
+ LastLink = RReference;
+ CompileUnit->addElement(LastLink);
+ }
+ LastLink->setTag(dwarf::DW_TAG_rvalue_reference_type);
+ LastLink->setIsRvalueReference();
+ LastLink->setName("&&");
+ }
+
+ // When creating the pointer, check if it points to a reference.
+ LastLink->setType(Pointee);
+ return Error::success();
+}
+
+// LF_PROCEDURE (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, ProcedureRecord &Proc,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("ReturnType", Proc.getReturnType(), StreamTPI);
+ W.printNumber("NumParameters", Proc.getParameterCount());
+ printTypeIndex("ArgListType", Proc.getArgumentList(), StreamTPI);
+ printTypeEnd(Record);
+ });
+
+ // There is no need to traverse the argument list, as the CodeView format
+ // declares the parameters as a 'S_LOCAL' symbol tagged as parameter.
+ // Only process parameters when dealing with inline functions.
+ if (LVScope *FunctionDcl = static_cast<LVScope *>(Element)) {
+ FunctionDcl->setType(getElement(StreamTPI, Proc.getReturnType()));
+
+ if (ProcessArgumentList) {
+ ProcessArgumentList = false;
+ // Create formal parameters.
+ LazyRandomTypeCollection &Types = types();
+ CVType CVArguments = Types.getType(Proc.getArgumentList());
+ if (Error Err = finishVisitation(CVArguments, Proc.getArgumentList(),
+ FunctionDcl))
+ return Err;
+ }
+ }
+
+ return Error::success();
+}
+
+// LF_UNION (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, UnionRecord &Union,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ W.printNumber("MemberCount", Union.getMemberCount());
+ printTypeIndex("FieldList", Union.getFieldList(), StreamTPI);
+ W.printNumber("SizeOf", Union.getSize());
+ W.printString("Name", Union.getName());
+ if (Union.hasUniqueName())
+ W.printString("UniqueName", Union.getUniqueName());
+ printTypeEnd(Record);
+ });
+
+ LVScopeAggregate *Scope = static_cast<LVScopeAggregate *>(Element);
+ if (!Scope)
+ return Error::success();
+
+ if (Scope->getIsFinalized())
+ return Error::success();
+ Scope->setIsFinalized();
+
+ Scope->setName(Union.getName());
+ if (Union.hasUniqueName())
+ Scope->setLinkageName(Union.getUniqueName());
+
+ if (Union.isNested()) {
+ Scope->setIsNested();
+ createParents(Union.getName(), Scope);
+ } else {
+ if (LVScope *Namespace = Shared->NamespaceDeduction.get(Union.getName()))
+ Namespace->addElement(Scope);
+ else
+ Reader->getCompileUnit()->addElement(Scope);
+ }
+
+ if (!Union.getFieldList().isNoneType()) {
+ LazyRandomTypeCollection &Types = types();
+ // Pass down the TypeIndex 'TI' for the aggregate containing the field list.
+ CVType CVFieldList = Types.getType(Union.getFieldList());
+ if (Error Err = finishVisitation(CVFieldList, TI, Scope))
+ return Err;
+ }
+
+ return Error::success();
+}
+
+// LF_TYPESERVER2 (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, TypeServer2Record &TS,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ W.printString("Guid", formatv("{0}", TS.getGuid()).str());
+ W.printNumber("Age", TS.getAge());
+ W.printString("Name", TS.getName());
+ printTypeEnd(Record);
+ });
+ return Error::success();
+}
+
+// LF_VFTABLE (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, VFTableRecord &VFT,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("CompleteClass", VFT.getCompleteClass(), StreamTPI);
+ printTypeIndex("OverriddenVFTable", VFT.getOverriddenVTable(), StreamTPI);
+ W.printHex("VFPtrOffset", VFT.getVFPtrOffset());
+ W.printString("VFTableName", VFT.getName());
+ for (const StringRef &N : VFT.getMethodNames())
+ W.printString("MethodName", N);
+ printTypeEnd(Record);
+ });
+ return Error::success();
+}
+
+// LF_VTSHAPE (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record,
+ VFTableShapeRecord &Shape,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ W.printNumber("VFEntryCount", Shape.getEntryCount());
+ printTypeEnd(Record);
+ });
+ return Error::success();
+}
+
+// LF_SUBSTR_LIST (TPI)/(IPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record,
+ StringListRecord &Strings,
+ TypeIndex TI, LVElement *Element) {
+ // All the indices are references into the TPI/IPI stream.
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamIPI);
+ ArrayRef<TypeIndex> Indices = Strings.getIndices();
+ uint32_t Size = Indices.size();
+ W.printNumber("NumStrings", Size);
+ ListScope Arguments(W, "Strings");
+ for (uint32_t I = 0; I < Size; ++I)
+ printTypeIndex("String", Indices[I], StreamIPI);
+ printTypeEnd(Record);
+ });
+ return Error::success();
+}
+
+// LF_STRING_ID (TPI)/(IPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, StringIdRecord &String,
+ TypeIndex TI, LVElement *Element) {
+ // All args are references into the TPI/IPI stream.
+ LLVM_DEBUG({
+ printTypeIndex("\nTI", TI, StreamIPI);
+ printTypeIndex("Id", String.getId(), StreamIPI);
+ W.printString("StringData", String.getString());
+ });
+
+ if (LVScope *Namespace = Shared->NamespaceDeduction.get(
+ String.getString(), /*CheckScope=*/false)) {
+ // The function is already at different scope. In order to reflect
+ // the correct parent, move it to the namespace.
+ if (LVScope *Scope = Element->getParentScope())
+ Scope->removeElement(Element);
+ Namespace->addElement(Element);
+ }
+
+ return Error::success();
+}
+
+// LF_UDT_SRC_LINE (TPI)/(IPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record,
+ UdtSourceLineRecord &SourceLine,
+ TypeIndex TI, LVElement *Element) {
+ // All args are references into the TPI/IPI stream.
+ LLVM_DEBUG({
+ printTypeIndex("\nTI", TI, StreamIPI);
+ printTypeIndex("UDT", SourceLine.getUDT(), StreamIPI);
+ printTypeIndex("SourceFile", SourceLine.getSourceFile(), StreamIPI);
+ W.printNumber("LineNumber", SourceLine.getLineNumber());
+ });
+ return Error::success();
+}
+
+// LF_UDT_MOD_SRC_LINE (TPI)/(IPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record,
+ UdtModSourceLineRecord &ModSourceLine,
+ TypeIndex TI, LVElement *Element) {
+ // All args are references into the TPI/IPI stream.
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamIPI);
+ printTypeIndex("\nTI", TI, StreamIPI);
+ printTypeIndex("UDT", ModSourceLine.getUDT(), StreamIPI);
+ printTypeIndex("SourceFile", ModSourceLine.getSourceFile(), StreamIPI);
+ W.printNumber("LineNumber", ModSourceLine.getLineNumber());
+ W.printNumber("Module", ModSourceLine.getModule());
+ printTypeEnd(Record);
+ });
+ return Error::success();
+}
+
+// LF_PRECOMP (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record, PrecompRecord &Precomp,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ W.printHex("StartIndex", Precomp.getStartTypeIndex());
+ W.printHex("Count", Precomp.getTypesCount());
+ W.printHex("Signature", Precomp.getSignature());
+ W.printString("PrecompFile", Precomp.getPrecompFilePath());
+ printTypeEnd(Record);
+ });
+ return Error::success();
+}
+
+// LF_ENDPRECOMP (TPI)
+Error LVLogicalVisitor::visitKnownRecord(CVType &Record,
+ EndPrecompRecord &EndPrecomp,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printTypeBegin(Record, TI, Element, StreamTPI);
+ W.printHex("Signature", EndPrecomp.getSignature());
+ printTypeEnd(Record);
+ });
+ return Error::success();
+}
+
+Error LVLogicalVisitor::visitUnknownMember(CVMemberRecord &Record,
+ TypeIndex TI) {
+ LLVM_DEBUG({ W.printHex("UnknownMember", unsigned(Record.Kind)); });
+ return Error::success();
+}
+
+// LF_BCLASS, LF_BINTERFACE
+Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record,
+ BaseClassRecord &Base, TypeIndex TI,
+ LVElement *Element) {
+ LLVM_DEBUG({
+ printMemberBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("BaseType", Base.getBaseType(), StreamTPI);
+ W.printHex("BaseOffset", Base.getBaseOffset());
+ printMemberEnd(Record);
+ });
+
+ createElement(Record.Kind);
+ if (LVSymbol *Symbol = CurrentSymbol) {
+ LVElement *BaseClass = getElement(StreamTPI, Base.getBaseType());
+ Symbol->setName(BaseClass->getName());
+ Symbol->setType(BaseClass);
+ Symbol->setAccessibilityCode(Base.getAccess());
+ static_cast<LVScope *>(Element)->addElement(Symbol);
+ }
+
+ return Error::success();
+}
+
+// LF_MEMBER
+Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record,
+ DataMemberRecord &Field, TypeIndex TI,
+ LVElement *Element) {
+ LLVM_DEBUG({
+ printMemberBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("Type", Field.getType(), StreamTPI);
+ W.printHex("FieldOffset", Field.getFieldOffset());
+ W.printString("Name", Field.getName());
+ printMemberEnd(Record);
+ });
+
+ // Create the data member.
+ createDataMember(Record, static_cast<LVScope *>(Element), Field.getName(),
+ Field.getType(), Field.getAccess());
+ return Error::success();
+}
+
+// LF_ENUMERATE
+Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record,
+ EnumeratorRecord &Enum, TypeIndex TI,
+ LVElement *Element) {
+ LLVM_DEBUG({
+ printMemberBegin(Record, TI, Element, StreamTPI);
+ W.printNumber("EnumValue", Enum.getValue());
+ W.printString("Name", Enum.getName());
+ printMemberEnd(Record);
+ });
+
+ createElement(Record.Kind);
+ if (LVType *Type = CurrentType) {
+ Type->setName(Enum.getName());
+ SmallString<16> Value;
+ Enum.getValue().toString(Value, 16, true, true);
+ Type->setValue(Value);
+ static_cast<LVScope *>(Element)->addElement(CurrentType);
+ }
+
+ return Error::success();
+}
+
+// LF_INDEX
+Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record,
+ ListContinuationRecord &Cont,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printMemberBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("ContinuationIndex", Cont.getContinuationIndex(), StreamTPI);
+ printMemberEnd(Record);
+ });
+ return Error::success();
+}
+
+// LF_NESTTYPE
+Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record,
+ NestedTypeRecord &Nested, TypeIndex TI,
+ LVElement *Element) {
+ LLVM_DEBUG({
+ printMemberBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("Type", Nested.getNestedType(), StreamTPI);
+ W.printString("Name", Nested.getName());
+ printMemberEnd(Record);
+ });
+
+ if (LVElement *Typedef = createElement(SymbolKind::S_UDT)) {
+ Typedef->setName(Nested.getName());
+ LVElement *NestedType = getElement(StreamTPI, Nested.getNestedType());
+ Typedef->setType(NestedType);
+ LVScope *Scope = static_cast<LVScope *>(Element);
+ Scope->addElement(Typedef);
+
+ if (NestedType && NestedType->getIsNested()) {
+ // 'Element' is an aggregate type that may contains this nested type
+ // definition. Used their scoped names, to decide on their relationship.
+ StringRef RecordName = getRecordName(types(), TI);
+
+ StringRef NestedTypeName = NestedType->getName();
+ if (NestedTypeName.size() && RecordName.size()) {
+ StringRef OuterComponent;
+ std::tie(OuterComponent, std::ignore) =
+ getInnerComponent(NestedTypeName);
+ // We have an already created nested type. Add it to the current scope
+ // and update all its children if any.
+ if (OuterComponent.size() && OuterComponent.equals(RecordName)) {
+ if (!NestedType->getIsScopedAlready()) {
+ Scope->addElement(NestedType);
+ NestedType->setIsScopedAlready();
+ NestedType->updateLevel(Scope);
+ }
+ Typedef->resetIncludeInPrint();
+ }
+ }
+ }
+ }
+
+ return Error::success();
+}
+
+// LF_ONEMETHOD
+Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record,
+ OneMethodRecord &Method, TypeIndex TI,
+ LVElement *Element) {
+ LLVM_DEBUG({
+ printMemberBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("Type", Method.getType(), StreamTPI);
+ // If virtual, then read the vftable offset.
+ if (Method.isIntroducingVirtual())
+ W.printHex("VFTableOffset", Method.getVFTableOffset());
+ W.printString("Name", Method.getName());
+ printMemberEnd(Record);
+ });
+
+ // All the LF_ONEMETHOD objects share the same type description.
+ // We have to create a scope object for each one and get the required
+ // information from the LF_MFUNCTION object.
+ ProcessArgumentList = true;
+ if (LVElement *MemberFunction = createElement(TypeLeafKind::LF_ONEMETHOD)) {
+ MemberFunction->setIsFinalized();
+ static_cast<LVScope *>(Element)->addElement(MemberFunction);
+
+ MemberFunction->setName(Method.getName());
+ MemberFunction->setAccessibilityCode(Method.getAccess());
+
+ MethodKind Kind = Method.getMethodKind();
+ if (Kind == MethodKind::Static)
+ MemberFunction->setIsStatic();
+ MemberFunction->setVirtualityCode(Kind);
+
+ MethodOptions Flags = Method.Attrs.getFlags();
+ if (MethodOptions::CompilerGenerated ==
+ (Flags & MethodOptions::CompilerGenerated))
+ MemberFunction->setIsArtificial();
+
+ LazyRandomTypeCollection &Types = types();
+ CVType CVMethodType = Types.getType(Method.getType());
+ if (Error Err =
+ finishVisitation(CVMethodType, Method.getType(), MemberFunction))
+ return Err;
+ }
+ ProcessArgumentList = false;
+
+ return Error::success();
+}
+
+// LF_METHOD
+Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record,
+ OverloadedMethodRecord &Method,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printMemberBegin(Record, TI, Element, StreamTPI);
+ W.printHex("MethodCount", Method.getNumOverloads());
+ printTypeIndex("MethodListIndex", Method.getMethodList(), StreamTPI);
+ W.printString("Name", Method.getName());
+ printMemberEnd(Record);
+ });
+
+ // Record the overloaded method name, which will be used during the
+ // traversal of the method list.
+ LazyRandomTypeCollection &Types = types();
+ OverloadedMethodName = Method.getName();
+ CVType CVMethods = Types.getType(Method.getMethodList());
+ if (Error Err = finishVisitation(CVMethods, Method.getMethodList(), Element))
+ return Err;
+
+ return Error::success();
+}
+
+// LF_STMEMBER
+Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record,
+ StaticDataMemberRecord &Field,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printMemberBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("Type", Field.getType(), StreamTPI);
+ W.printString("Name", Field.getName());
+ printMemberEnd(Record);
+ });
+
+ // Create the data member.
+ createDataMember(Record, static_cast<LVScope *>(Element), Field.getName(),
+ Field.getType(), Field.getAccess());
+ return Error::success();
+}
+
+// LF_VFUNCTAB
+Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record,
+ VFPtrRecord &VFTable, TypeIndex TI,
+ LVElement *Element) {
+ LLVM_DEBUG({
+ printMemberBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("Type", VFTable.getType(), StreamTPI);
+ printMemberEnd(Record);
+ });
+ return Error::success();
+}
+
+// LF_VBCLASS, LF_IVBCLASS
+Error LVLogicalVisitor::visitKnownMember(CVMemberRecord &Record,
+ VirtualBaseClassRecord &Base,
+ TypeIndex TI, LVElement *Element) {
+ LLVM_DEBUG({
+ printMemberBegin(Record, TI, Element, StreamTPI);
+ printTypeIndex("BaseType", Base.getBaseType(), StreamTPI);
+ printTypeIndex("VBPtrType", Base.getVBPtrType(), StreamTPI);
+ W.printHex("VBPtrOffset", Base.getVBPtrOffset());
+ W.printHex("VBTableIndex", Base.getVTableIndex());
+ printMemberEnd(Record);
+ });
+
+ createElement(Record.Kind);
+ if (LVSymbol *Symbol = CurrentSymbol) {
+ LVElement *BaseClass = getElement(StreamTPI, Base.getBaseType());
+ Symbol->setName(BaseClass->getName());
+ Symbol->setType(BaseClass);
+ Symbol->setAccessibilityCode(Base.getAccess());
+ Symbol->setVirtualityCode(MethodKind::Virtual);
+ static_cast<LVScope *>(Element)->addElement(Symbol);
+ }
+
+ return Error::success();
+}
+
+Error LVLogicalVisitor::visitMemberRecord(CVMemberRecord &Record,
+ TypeVisitorCallbacks &Callbacks,
+ TypeIndex TI, LVElement *Element) {
+ if (Error Err = Callbacks.visitMemberBegin(Record))
+ return Err;
+
+ switch (Record.Kind) {
+ default:
+ if (Error Err = Callbacks.visitUnknownMember(Record))
+ return Err;
+ break;
+#define MEMBER_RECORD(EnumName, EnumVal, Name) \
+ case EnumName: { \
+ if (Error Err = \
+ visitKnownMember<Name##Record>(Record, Callbacks, TI, Element)) \
+ return Err; \
+ break; \
+ }
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) \
+ MEMBER_RECORD(EnumVal, EnumVal, AliasName)
+#define TYPE_RECORD(EnumName, EnumVal, Name)
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
+ }
+
+ if (Error Err = Callbacks.visitMemberEnd(Record))
+ return Err;
+
+ return Error::success();
+}
+
+Error LVLogicalVisitor::finishVisitation(CVType &Record, TypeIndex TI,
+ LVElement *Element) {
+ switch (Record.kind()) {
+ default:
+ if (Error Err = visitUnknownType(Record, TI))
+ return Err;
+ break;
+#define TYPE_RECORD(EnumName, EnumVal, Name) \
+ case EnumName: { \
+ if (Error Err = visitKnownRecord<Name##Record>(Record, TI, Element)) \
+ return Err; \
+ break; \
+ }
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName) \
+ TYPE_RECORD(EnumVal, EnumVal, AliasName)
+#define MEMBER_RECORD(EnumName, EnumVal, Name)
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "llvm/DebugInfo/CodeView/CodeViewTypes.def"
+ }
+
+ return Error::success();
+}
+
+// Customized version of 'FieldListVisitHelper'.
+Error LVLogicalVisitor::visitFieldListMemberStream(
+ TypeIndex TI, LVElement *Element, ArrayRef<uint8_t> FieldList) {
+ BinaryByteStream Stream(FieldList, llvm::support::little);
+ BinaryStreamReader Reader(Stream);
+ FieldListDeserializer Deserializer(Reader);
+ TypeVisitorCallbackPipeline Pipeline;
+ Pipeline.addCallbackToPipeline(Deserializer);
+
+ TypeLeafKind Leaf;
+ while (!Reader.empty()) {
+ if (Error Err = Reader.readEnum(Leaf))
+ return Err;
+
+ CVMemberRecord Record;
+ Record.Kind = Leaf;
+ if (Error Err = visitMemberRecord(Record, Pipeline, TI, Element))
+ return Err;
+ }
+
+ return Error::success();
+}
+
+void LVLogicalVisitor::addElement(LVScope *Scope, bool IsCompileUnit) {
+ // The CodeView specifications does not treat S_COMPILE2 and S_COMPILE3
+ // as symbols that open a scope. The CodeView reader, treat them in a
+ // similar way as DWARF. As there is no a symbole S_END to close the
+ // compile unit, we need to check for the next compile unit.
+ if (IsCompileUnit) {
+ if (!ScopeStack.empty())
+ popScope();
+ InCompileUnitScope = true;
+ }
+
+ pushScope(Scope);
+ ReaderParent->addElement(Scope);
+}
+
+void LVLogicalVisitor::addElement(LVSymbol *Symbol) {
+ ReaderScope->addElement(Symbol);
+}
+
+void LVLogicalVisitor::addElement(LVType *Type) {
+ ReaderScope->addElement(Type);
+}
+
+LVElement *LVLogicalVisitor::createElement(TypeLeafKind Kind) {
+ CurrentScope = nullptr;
+ CurrentSymbol = nullptr;
+ CurrentType = nullptr;
+
+ if (Kind < TypeIndex::FirstNonSimpleIndex) {
+ CurrentType = Reader->createType();
+ CurrentType->setIsBase();
+ CurrentType->setTag(dwarf::DW_TAG_base_type);
+ if (options().getAttributeBase())
+ CurrentType->setIncludeInPrint();
+ return CurrentType;
+ }
+
+ switch (Kind) {
+ // Types.
+ case TypeLeafKind::LF_ENUMERATE:
+ CurrentType = Reader->createTypeEnumerator();
+ CurrentType->setTag(dwarf::DW_TAG_enumerator);
+ return CurrentType;
+ case TypeLeafKind::LF_MODIFIER:
+ CurrentType = Reader->createType();
+ CurrentType->setIsModifier();
+ return CurrentType;
+ case TypeLeafKind::LF_POINTER:
+ CurrentType = Reader->createType();
+ CurrentType->setIsPointer();
+ CurrentType->setName("*");
+ CurrentType->setTag(dwarf::DW_TAG_pointer_type);
+ return CurrentType;
+
+ // Symbols.
+ case TypeLeafKind::LF_BCLASS:
+ case TypeLeafKind::LF_IVBCLASS:
+ case TypeLeafKind::LF_VBCLASS:
+ CurrentSymbol = Reader->createSymbol();
+ CurrentSymbol->setTag(dwarf::DW_TAG_inheritance);
+ CurrentSymbol->setIsInheritance();
+ return CurrentSymbol;
+ case TypeLeafKind::LF_MEMBER:
+ case TypeLeafKind::LF_STMEMBER:
+ CurrentSymbol = Reader->createSymbol();
+ CurrentSymbol->setIsMember();
+ CurrentSymbol->setTag(dwarf::DW_TAG_member);
+ return CurrentSymbol;
+
+ // Scopes.
+ case TypeLeafKind::LF_ARRAY:
+ CurrentScope = Reader->createScopeArray();
+ CurrentScope->setTag(dwarf::DW_TAG_array_type);
+ return CurrentScope;
+ case TypeLeafKind::LF_CLASS:
+ CurrentScope = Reader->createScopeAggregate();
+ CurrentScope->setTag(dwarf::DW_TAG_class_type);
+ CurrentScope->setIsClass();
+ return CurrentScope;
+ case TypeLeafKind::LF_ENUM:
+ CurrentScope = Reader->createScopeEnumeration();
+ CurrentScope->setTag(dwarf::DW_TAG_enumeration_type);
+ return CurrentScope;
+ case TypeLeafKind::LF_METHOD:
+ case TypeLeafKind::LF_ONEMETHOD:
+ case TypeLeafKind::LF_PROCEDURE:
+ CurrentScope = Reader->createScopeFunction();
+ CurrentScope->setIsSubprogram();
+ CurrentScope->setTag(dwarf::DW_TAG_subprogram);
+ return CurrentScope;
+ case TypeLeafKind::LF_STRUCTURE:
+ CurrentScope = Reader->createScopeAggregate();
+ CurrentScope->setIsStructure();
+ CurrentScope->setTag(dwarf::DW_TAG_structure_type);
+ return CurrentScope;
+ case TypeLeafKind::LF_UNION:
+ CurrentScope = Reader->createScopeAggregate();
+ CurrentScope->setIsUnion();
+ CurrentScope->setTag(dwarf::DW_TAG_union_type);
+ return CurrentScope;
+ default:
+ // If '--internal=tag' and '--print=warning' are specified in the command
+ // line, we record and print each seen 'TypeLeafKind'.
+ break;
+ }
+ return nullptr;
+}
+
+LVElement *LVLogicalVisitor::createElement(SymbolKind Kind) {
+ CurrentScope = nullptr;
+ CurrentSymbol = nullptr;
+ CurrentType = nullptr;
+ switch (Kind) {
+ // Types.
+ case SymbolKind::S_UDT:
+ CurrentType = Reader->createTypeDefinition();
+ CurrentType->setTag(dwarf::DW_TAG_typedef);
+ return CurrentType;
+
+ // Symbols.
+ case SymbolKind::S_CONSTANT:
+ CurrentSymbol = Reader->createSymbol();
+ CurrentSymbol->setIsConstant();
+ CurrentSymbol->setTag(dwarf::DW_TAG_constant);
+ return CurrentSymbol;
+
+ case SymbolKind::S_BPREL32:
+ case SymbolKind::S_REGREL32:
+ case SymbolKind::S_GDATA32:
+ case SymbolKind::S_LDATA32:
+ case SymbolKind::S_LOCAL:
+ // During the symbol traversal more information is available to
+ // determine if the symbol is a parameter or a variable. At this
+ // stage mark it as variable.
+ CurrentSymbol = Reader->createSymbol();
+ CurrentSymbol->setIsVariable();
+ CurrentSymbol->setTag(dwarf::DW_TAG_variable);
+ return CurrentSymbol;
+
+ // Scopes.
+ case SymbolKind::S_BLOCK32:
+ CurrentScope = Reader->createScope();
+ CurrentScope->setIsLexicalBlock();
+ CurrentScope->setTag(dwarf::DW_TAG_lexical_block);
+ return CurrentScope;
+ case SymbolKind::S_COMPILE2:
+ case SymbolKind::S_COMPILE3:
+ CurrentScope = Reader->createScopeCompileUnit();
+ CurrentScope->setTag(dwarf::DW_TAG_compile_unit);
+ Reader->setCompileUnit(static_cast<LVScopeCompileUnit *>(CurrentScope));
+ return CurrentScope;
+ case SymbolKind::S_INLINESITE:
+ case SymbolKind::S_INLINESITE2:
+ CurrentScope = Reader->createScopeFunctionInlined();
+ CurrentScope->setIsInlinedFunction();
+ CurrentScope->setTag(dwarf::DW_TAG_inlined_subroutine);
+ return CurrentScope;
+ case SymbolKind::S_LPROC32:
+ case SymbolKind::S_GPROC32:
+ case SymbolKind::S_LPROC32_ID:
+ case SymbolKind::S_GPROC32_ID:
+ case SymbolKind::S_SEPCODE:
+ case SymbolKind::S_THUNK32:
+ CurrentScope = Reader->createScopeFunction();
+ CurrentScope->setIsSubprogram();
+ CurrentScope->setTag(dwarf::DW_TAG_subprogram);
+ return CurrentScope;
+ default:
+ // If '--internal=tag' and '--print=warning' are specified in the command
+ // line, we record and print each seen 'SymbolKind'.
+ break;
+ }
+ return nullptr;
+}
+
+LVElement *LVLogicalVisitor::createElement(TypeIndex TI, TypeLeafKind Kind) {
+ LVElement *Element = Shared->TypeRecords.find(StreamTPI, TI);
+ if (!Element) {
+ // We are dealing with a base type or pointer to a base type, which are
+ // not included explicitly in the CodeView format.
+ if (Kind < TypeIndex::FirstNonSimpleIndex) {
+ Element = createElement(Kind);
+ Element->setIsFinalized();
+ Shared->TypeRecords.add(StreamTPI, (TypeIndex)Kind, Kind, Element);
+ Element->setOffset(Kind);
+ return Element;
+ }
+ // We are dealing with a pointer to a base type.
+ if (TI.getIndex() < TypeIndex::FirstNonSimpleIndex) {
+ Element = createElement(Kind);
+ Shared->TypeRecords.add(StreamTPI, TI, Kind, Element);
+ Element->setOffset(TI.getIndex());
+ Element->setOffsetFromTypeIndex();
+ return Element;
+ }
+
+ W.printString("** Not implemented. **");
+ printTypeIndex("TypeIndex", TI, StreamTPI);
+ W.printString("TypeLeafKind", formatTypeLeafKind(Kind));
+ return nullptr;
+ }
+
+ Element->setOffset(TI.getIndex());
+ Element->setOffsetFromTypeIndex();
+ return Element;
+}
+
+void LVLogicalVisitor::createDataMember(CVMemberRecord &Record, LVScope *Parent,
+ StringRef Name, TypeIndex TI,
+ MemberAccess Access) {
+ LLVM_DEBUG({
+ printTypeIndex("TypeIndex", TI, StreamTPI);
+ W.printString("TypeName", Name);
+ });
+
+ createElement(Record.Kind);
+ if (LVSymbol *Symbol = CurrentSymbol) {
+ Symbol->setName(Name);
+ if (TI.isNoneType() || TI.isSimple())
+ Symbol->setType(getElement(StreamTPI, TI));
+ else {
+ LazyRandomTypeCollection &Types = types();
+ CVType CVMemberType = Types.getType(TI);
+ if (CVMemberType.kind() == LF_BITFIELD) {
+ if (Error Err = finishVisitation(CVMemberType, TI, Symbol)) {
+ consumeError(std::move(Err));
+ return;
+ }
+ } else
+ Symbol->setType(getElement(StreamTPI, TI));
+ }
+ Symbol->setAccessibilityCode(Access);
+ Parent->addElement(Symbol);
+ }
+}
+
+LVSymbol *LVLogicalVisitor::createParameter(LVElement *Element, StringRef Name,
+ LVScope *Parent) {
+ LVSymbol *Parameter = Reader->createSymbol();
+ Parent->addElement(Parameter);
+ Parameter->setIsParameter();
+ Parameter->setTag(dwarf::DW_TAG_formal_parameter);
+ Parameter->setName(Name);
+ Parameter->setType(Element);
+ return Parameter;
+}
+
+LVSymbol *LVLogicalVisitor::createParameter(TypeIndex TI, StringRef Name,
+ LVScope *Parent) {
+ return createParameter(getElement(StreamTPI, TI), Name, Parent);
+}
+
+LVType *LVLogicalVisitor::createBaseType(TypeIndex TI, StringRef TypeName) {
+ TypeLeafKind SimpleKind = (TypeLeafKind)TI.getSimpleKind();
+ TypeIndex TIR = (TypeIndex)SimpleKind;
+ LLVM_DEBUG({
+ printTypeIndex("TypeIndex", TIR, StreamTPI);
+ W.printString("TypeName", TypeName);
+ });
+
+ if (LVElement *Element = Shared->TypeRecords.find(StreamTPI, TIR))
+ return static_cast<LVType *>(Element);
+
+ if (createElement(TIR, SimpleKind)) {
+ CurrentType->setName(TypeName);
+ Reader->getCompileUnit()->addElement(CurrentType);
+ }
+ return CurrentType;
+}
+
+LVType *LVLogicalVisitor::createPointerType(TypeIndex TI, StringRef TypeName) {
+ LLVM_DEBUG({
+ printTypeIndex("TypeIndex", TI, StreamTPI);
+ W.printString("TypeName", TypeName);
+ });
+
+ if (LVElement *Element = Shared->TypeRecords.find(StreamTPI, TI))
+ return static_cast<LVType *>(Element);
+
+ LVType *Pointee = createBaseType(TI, TypeName.drop_back(1));
+ if (createElement(TI, TypeLeafKind::LF_POINTER)) {
+ CurrentType->setIsFinalized();
+ CurrentType->setType(Pointee);
+ Reader->getCompileUnit()->addElement(CurrentType);
+ }
+ return CurrentType;
+}
+
+void LVLogicalVisitor::createParents(StringRef ScopedName, LVElement *Element) {
+ // For the given test case:
+ //
+ // struct S { enum E { ... }; };
+ // S::E V;
+ //
+ // 0 | S_LOCAL `V`
+ // type=0x1004 (S::E), flags = none
+ // 0x1004 | LF_ENUM `S::E`
+ // options: has unique name | is nested
+ // 0x1009 | LF_STRUCTURE `S`
+ // options: contains nested class
+ //
+ // When the local 'V' is processed, its type 'E' is created. But There is
+ // no direct reference to its parent 'S'. We use the scoped name for 'E',
+ // to create its parents.
+
+ // The input scoped name must have at least parent and nested names.
+ // Drop the last element name, as it corresponds to the nested type.
+ LVStringRefs Components = getAllLexicalComponents(ScopedName);
+ if (Components.size() < 2)
+ return;
+ Components.pop_back();
+
+ LVStringRefs::size_type FirstNamespace;
+ LVStringRefs::size_type FirstAggregate;
+ std::tie(FirstNamespace, FirstAggregate) =
+ Shared->NamespaceDeduction.find(Components);
+
+ LLVM_DEBUG({
+ W.printString("First Namespace", Components[FirstNamespace]);
+ W.printString("First NonNamespace", Components[FirstAggregate]);
+ });
+
+ // Create any referenced namespaces.
+ if (FirstNamespace < FirstAggregate) {
+ Shared->NamespaceDeduction.get(
+ LVStringRefs(Components.begin() + FirstNamespace,
+ Components.begin() + FirstAggregate));
+ }
+
+ // Traverse the enclosing scopes (aggregates) and create them. In the
+ // case of nested empty aggregates, MSVC does not emit a full record
+ // description. It emits only the reference record.
+ LVScope *Aggregate = nullptr;
+ TypeIndex TIAggregate;
+ std::string AggregateName = getScopedName(
+ LVStringRefs(Components.begin(), Components.begin() + FirstAggregate));
+
+ // This traversal is executed at least once.
+ for (LVStringRefs::size_type Index = FirstAggregate;
+ Index < Components.size(); ++Index) {
+ AggregateName = getScopedName(LVStringRefs(Components.begin() + Index,
+ Components.begin() + Index + 1),
+ AggregateName);
+ TIAggregate = Shared->ForwardReferences.remap(
+ Shared->TypeRecords.find(StreamTPI, AggregateName));
+ Aggregate =
+ TIAggregate.isNoneType()
+ ? nullptr
+ : static_cast<LVScope *>(getElement(StreamTPI, TIAggregate));
+ }
+
+ // Workaround for cases where LF_NESTTYPE is missing for nested templates.
+ // If we manage to get parent information from the scoped name, we can add
+ // the nested type without relying on the LF_NESTTYPE.
+ if (Aggregate && !Element->getIsScopedAlready()) {
+ Aggregate->addElement(Element);
+ Element->setIsScopedAlready();
+ }
+}
+
+LVElement *LVLogicalVisitor::getElement(uint32_t StreamIdx, TypeIndex TI,
+ LVScope *Parent) {
+ LLVM_DEBUG({ printTypeIndex("TypeIndex", TI, StreamTPI); });
+ TI = Shared->ForwardReferences.remap(TI);
+ LLVM_DEBUG({ printTypeIndex("TypeIndex Remap", TI, StreamTPI); });
+
+ LVElement *Element = Shared->TypeRecords.find(StreamIdx, TI);
+ if (!Element) {
+ if (TI.isNoneType() || TI.isSimple()) {
+ StringRef TypeName = TypeIndex::simpleTypeName(TI);
+ // If the name ends with "*", create 2 logical types: a pointer and a
+ // pointee type. TypeIndex is composed of a SympleTypeMode byte followed
+ // by a SimpleTypeKind byte. The logical pointer will be identified by
+ // the full TypeIndex value and the pointee by the SimpleTypeKind.
+ return (TypeName.back() == '*') ? createPointerType(TI, TypeName)
+ : createBaseType(TI, TypeName);
+ }
+
+ LLVM_DEBUG({ W.printHex("TypeIndex not implemented: ", TI.getIndex()); });
+ return nullptr;
+ }
+
+ // The element has been finalized.
+ if (Element->getIsFinalized())
+ return Element;
+
+ // Add the element in case of a given parent.
+ if (Parent)
+ Parent->addElement(Element);
+
+ // Check for a composite type.
+ LazyRandomTypeCollection &Types = types();
+ CVType CVRecord = Types.getType(TI);
+ if (Error Err = finishVisitation(CVRecord, TI, Element)) {
+ consumeError(std::move(Err));
+ return nullptr;
+ }
+ Element->setIsFinalized();
+ return Element;
+}
+
+void LVLogicalVisitor::processLines() {
+ // Traverse the collected LF_UDT_SRC_LINE records and add the source line
+ // information to the logical elements.
+ for (const TypeIndex &Entry : Shared->LineRecords) {
+ CVType CVRecord = ids().getType(Entry);
+ UdtSourceLineRecord Line;
+ if (Error Err = TypeDeserializer::deserializeAs(
+ const_cast<CVType &>(CVRecord), Line))
+ consumeError(std::move(Err));
+ else {
+ LLVM_DEBUG({
+ printTypeIndex("UDT", Line.getUDT(), StreamIPI);
+ printTypeIndex("SourceFile", Line.getSourceFile(), StreamIPI);
+ W.printNumber("LineNumber", Line.getLineNumber());
+ });
+
+ // The TypeIndex returned by 'getUDT()' must point to an already
+ // created logical element. If no logical element is found, it means
+ // the LF_UDT_SRC_LINE is associated with a system TypeIndex.
+ if (LVElement *Element = Shared->TypeRecords.find(
+ StreamTPI, Line.getUDT(), /*Create=*/false)) {
+ Element->setLineNumber(Line.getLineNumber());
+ Element->setFilenameIndex(
+ Shared->StringRecords.findIndex(Line.getSourceFile()));
+ }
+ }
+ }
+}
+
+void LVLogicalVisitor::processNamespaces() {
+ // Create namespaces.
+ Shared->NamespaceDeduction.init();
+}
+
+void LVLogicalVisitor::processFiles() { Shared->StringRecords.addFilenames(); }
+
+void LVLogicalVisitor::printRecords(raw_ostream &OS) const {
+ if (!options().getInternalTag())
+ return;
+
+ unsigned Count = 0;
+ auto PrintItem = [&](StringRef Name) {
+ auto NewLine = [&]() {
+ if (++Count == 4) {
+ Count = 0;
+ OS << "\n";
+ }
+ };
+ OS << format("%20s", Name.str().c_str());
+ NewLine();
+ };
+
+ OS << "\nTypes:\n";
+ for (const TypeLeafKind &Kind : Shared->TypeKinds)
+ PrintItem(formatTypeLeafKind(Kind));
+ Shared->TypeKinds.clear();
+
+ Count = 0;
+ OS << "\nSymbols:\n";
+ for (const SymbolKind &Kind : Shared->SymbolKinds)
+ PrintItem(LVCodeViewReader::getSymbolKindName(Kind));
+ Shared->SymbolKinds.clear();
+
+ OS << "\n";
+}
+
+Error LVLogicalVisitor::inlineSiteAnnotation(LVScope *AbstractFunction,
+ LVScope *InlinedFunction,
+ InlineSiteSym &InlineSite) {
+ // Get the parent scope to update the address ranges of the nested
+ // scope representing the inlined function.
+ LVAddress ParentLowPC = 0;
+ LVScope *Parent = InlinedFunction->getParentScope();
+ if (const LVLocations *Locations = Parent->getRanges()) {
+ if (!Locations->empty())
+ ParentLowPC = (*Locations->begin())->getLowerAddress();
+ }
+
+ // For the given inlinesite, get the initial line number and its
+ // source filename. Update the logical scope representing it.
+ uint32_t LineNumber = 0;
+ StringRef Filename;
+ LVInlineeInfo::iterator Iter = InlineeInfo.find(InlineSite.Inlinee);
+ if (Iter != InlineeInfo.end()) {
+ LineNumber = Iter->second.first;
+ Filename = Iter->second.second;
+ AbstractFunction->setLineNumber(LineNumber);
+ // TODO: This part needs additional work in order to set properly the
+ // correct filename in order to detect changes between filenames.
+ // AbstractFunction->setFilename(Filename);
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "inlineSiteAnnotation\n"
+ << "Abstract: " << AbstractFunction->getName() << "\n"
+ << "Inlined: " << InlinedFunction->getName() << "\n"
+ << "Parent: " << Parent->getName() << "\n"
+ << "Low PC: " << hexValue(ParentLowPC) << "\n";
+ });
+
+ // Get the source lines if requested by command line option.
+ if (!options().getPrintLines())
+ return Error::success();
+
+ // Limitation: Currently we don't track changes in the FileOffset. The
+ // side effects are the caller that it is unable to differentiate the
+ // source filename for the inlined code.
+ uint64_t CodeOffset = ParentLowPC;
+ int32_t LineOffset = LineNumber;
+ uint32_t FileOffset = 0;
+
+ auto UpdateClose = [&]() { LLVM_DEBUG({ dbgs() << ("\n"); }); };
+ auto UpdateCodeOffset = [&](uint32_t Delta) {
+ CodeOffset += Delta;
+ LLVM_DEBUG({
+ dbgs() << formatv(" code 0x{0} (+0x{1})", utohexstr(CodeOffset),
+ utohexstr(Delta));
+ });
+ };
+ auto UpdateLineOffset = [&](int32_t Delta) {
+ LineOffset += Delta;
+ LLVM_DEBUG({
+ char Sign = Delta > 0 ? '+' : '-';
+ dbgs() << formatv(" line {0} ({1}{2})", LineOffset, Sign,
+ std::abs(Delta));
+ });
+ };
+ auto UpdateFileOffset = [&](int32_t Offset) {
+ FileOffset = Offset;
+ LLVM_DEBUG({ dbgs() << formatv(" file {0}", FileOffset); });
+ };
+
+ LVLines InlineeLines;
+ auto CreateLine = [&]() {
+ // Create the logical line record.
+ LVLineDebug *Line = Reader->createLineDebug();
+ Line->setAddress(CodeOffset);
+ Line->setLineNumber(LineOffset);
+ // TODO: This part needs additional work in order to set properly the
+ // correct filename in order to detect changes between filenames.
+ // Line->setFilename(Filename);
+ InlineeLines.push_back(Line);
+ };
+
+ bool SeenLowAddress = false;
+ bool SeenHighAddress = false;
+ uint64_t LowPC = 0;
+ uint64_t HighPC = 0;
+
+ for (auto &Annot : InlineSite.annotations()) {
+ LLVM_DEBUG({
+ dbgs() << formatv(" {0}",
+ fmt_align(toHex(Annot.Bytes), AlignStyle::Left, 9));
+ });
+
+ // Use the opcode to interpret the integer values.
+ switch (Annot.OpCode) {
+ case BinaryAnnotationsOpCode::ChangeCodeOffset:
+ case BinaryAnnotationsOpCode::CodeOffset:
+ case BinaryAnnotationsOpCode::ChangeCodeLength:
+ UpdateCodeOffset(Annot.U1);
+ UpdateClose();
+ if (Annot.OpCode == BinaryAnnotationsOpCode::ChangeCodeOffset) {
+ CreateLine();
+ LowPC = CodeOffset;
+ SeenLowAddress = true;
+ break;
+ }
+ if (Annot.OpCode == BinaryAnnotationsOpCode::ChangeCodeLength) {
+ HighPC = CodeOffset - 1;
+ SeenHighAddress = true;
+ }
+ break;
+ case BinaryAnnotationsOpCode::ChangeCodeLengthAndCodeOffset:
+ UpdateCodeOffset(Annot.U2);
+ UpdateClose();
+ break;
+ case BinaryAnnotationsOpCode::ChangeLineOffset:
+ case BinaryAnnotationsOpCode::ChangeCodeOffsetAndLineOffset:
+ UpdateCodeOffset(Annot.U1);
+ UpdateLineOffset(Annot.S1);
+ UpdateClose();
+ if (Annot.OpCode ==
+ BinaryAnnotationsOpCode::ChangeCodeOffsetAndLineOffset)
+ CreateLine();
+ break;
+ case BinaryAnnotationsOpCode::ChangeFile:
+ UpdateFileOffset(Annot.U1);
+ UpdateClose();
+ break;
+ default:
+ break;
+ }
+ if (SeenLowAddress && SeenHighAddress) {
+ SeenLowAddress = false;
+ SeenHighAddress = false;
+ InlinedFunction->addObject(LowPC, HighPC);
+ }
+ }
+
+ Reader->addInlineeLines(InlinedFunction, InlineeLines);
+ UpdateClose();
+
+ return Error::success();
+}
diff --git a/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp b/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp
index 7746bc508b41..ab458341a0bd 100644
--- a/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp
+++ b/llvm/lib/DebugInfo/LogicalView/Readers/LVELFReader.cpp
@@ -57,182 +57,182 @@ LVElement *LVELFReader::createElement(dwarf::Tag Tag) {
switch (Tag) {
// Types.
case dwarf::DW_TAG_base_type:
- CurrentType = new LVType();
+ CurrentType = createType();
CurrentType->setIsBase();
if (options().getAttributeBase())
CurrentType->setIncludeInPrint();
return CurrentType;
case dwarf::DW_TAG_const_type:
- CurrentType = new LVType();
+ CurrentType = createType();
CurrentType->setIsConst();
CurrentType->setName("const");
return CurrentType;
case dwarf::DW_TAG_enumerator:
- CurrentType = new LVTypeEnumerator();
+ CurrentType = createTypeEnumerator();
return CurrentType;
case dwarf::DW_TAG_imported_declaration:
- CurrentType = new LVTypeImport();
+ CurrentType = createTypeImport();
CurrentType->setIsImportDeclaration();
return CurrentType;
case dwarf::DW_TAG_imported_module:
- CurrentType = new LVTypeImport();
+ CurrentType = createTypeImport();
CurrentType->setIsImportModule();
return CurrentType;
case dwarf::DW_TAG_pointer_type:
- CurrentType = new LVType();
+ CurrentType = createType();
CurrentType->setIsPointer();
CurrentType->setName("*");
return CurrentType;
case dwarf::DW_TAG_ptr_to_member_type:
- CurrentType = new LVType();
+ CurrentType = createType();
CurrentType->setIsPointerMember();
CurrentType->setName("*");
return CurrentType;
case dwarf::DW_TAG_reference_type:
- CurrentType = new LVType();
+ CurrentType = createType();
CurrentType->setIsReference();
CurrentType->setName("&");
return CurrentType;
case dwarf::DW_TAG_restrict_type:
- CurrentType = new LVType();
+ CurrentType = createType();
CurrentType->setIsRestrict();
CurrentType->setName("restrict");
return CurrentType;
case dwarf::DW_TAG_rvalue_reference_type:
- CurrentType = new LVType();
+ CurrentType = createType();
CurrentType->setIsRvalueReference();
CurrentType->setName("&&");
return CurrentType;
case dwarf::DW_TAG_subrange_type:
- CurrentType = new LVTypeSubrange();
+ CurrentType = createTypeSubrange();
return CurrentType;
case dwarf::DW_TAG_template_value_parameter:
- CurrentType = new LVTypeParam();
+ CurrentType = createTypeParam();
CurrentType->setIsTemplateValueParam();
return CurrentType;
case dwarf::DW_TAG_template_type_parameter:
- CurrentType = new LVTypeParam();
+ CurrentType = createTypeParam();
CurrentType->setIsTemplateTypeParam();
return CurrentType;
case dwarf::DW_TAG_GNU_template_template_param:
- CurrentType = new LVTypeParam();
+ CurrentType = createTypeParam();
CurrentType->setIsTemplateTemplateParam();
return CurrentType;
case dwarf::DW_TAG_typedef:
- CurrentType = new LVTypeDefinition();
+ CurrentType = createTypeDefinition();
return CurrentType;
case dwarf::DW_TAG_unspecified_type:
- CurrentType = new LVType();
+ CurrentType = createType();
CurrentType->setIsUnspecified();
return CurrentType;
case dwarf::DW_TAG_volatile_type:
- CurrentType = new LVType();
+ CurrentType = createType();
CurrentType->setIsVolatile();
CurrentType->setName("volatile");
return CurrentType;
// Symbols.
case dwarf::DW_TAG_formal_parameter:
- CurrentSymbol = new LVSymbol();
+ CurrentSymbol = createSymbol();
CurrentSymbol->setIsParameter();
return CurrentSymbol;
case dwarf::DW_TAG_unspecified_parameters:
- CurrentSymbol = new LVSymbol();
+ CurrentSymbol = createSymbol();
CurrentSymbol->setIsUnspecified();
CurrentSymbol->setName("...");
return CurrentSymbol;
case dwarf::DW_TAG_member:
- CurrentSymbol = new LVSymbol();
+ CurrentSymbol = createSymbol();
CurrentSymbol->setIsMember();
return CurrentSymbol;
case dwarf::DW_TAG_variable:
- CurrentSymbol = new LVSymbol();
+ CurrentSymbol = createSymbol();
CurrentSymbol->setIsVariable();
return CurrentSymbol;
case dwarf::DW_TAG_inheritance:
- CurrentSymbol = new LVSymbol();
+ CurrentSymbol = createSymbol();
CurrentSymbol->setIsInheritance();
return CurrentSymbol;
case dwarf::DW_TAG_call_site_parameter:
case dwarf::DW_TAG_GNU_call_site_parameter:
- CurrentSymbol = new LVSymbol();
+ CurrentSymbol = createSymbol();
CurrentSymbol->setIsCallSiteParameter();
return CurrentSymbol;
case dwarf::DW_TAG_constant:
- CurrentSymbol = new LVSymbol();
+ CurrentSymbol = createSymbol();
CurrentSymbol->setIsConstant();
return CurrentSymbol;
// Scopes.
case dwarf::DW_TAG_catch_block:
- CurrentScope = new LVScope();
+ CurrentScope = createScope();
CurrentScope->setIsCatchBlock();
return CurrentScope;
case dwarf::DW_TAG_lexical_block:
- CurrentScope = new LVScope();
+ CurrentScope = createScope();
CurrentScope->setIsLexicalBlock();
return CurrentScope;
case dwarf::DW_TAG_try_block:
- CurrentScope = new LVScope();
+ CurrentScope = createScope();
CurrentScope->setIsTryBlock();
return CurrentScope;
case dwarf::DW_TAG_compile_unit:
case dwarf::DW_TAG_skeleton_unit:
- CurrentScope = new LVScopeCompileUnit();
+ CurrentScope = createScopeCompileUnit();
CompileUnit = static_cast<LVScopeCompileUnit *>(CurrentScope);
return CurrentScope;
case dwarf::DW_TAG_inlined_subroutine:
- CurrentScope = new LVScopeFunctionInlined();
+ CurrentScope = createScopeFunctionInlined();
return CurrentScope;
case dwarf::DW_TAG_namespace:
- CurrentScope = new LVScopeNamespace();
+ CurrentScope = createScopeNamespace();
return CurrentScope;
case dwarf::DW_TAG_template_alias:
- CurrentScope = new LVScopeAlias();
+ CurrentScope = createScopeAlias();
return CurrentScope;
case dwarf::DW_TAG_array_type:
- CurrentScope = new LVScopeArray();
+ CurrentScope = createScopeArray();
return CurrentScope;
case dwarf::DW_TAG_call_site:
case dwarf::DW_TAG_GNU_call_site:
- CurrentScope = new LVScopeFunction();
+ CurrentScope = createScopeFunction();
CurrentScope->setIsCallSite();
return CurrentScope;
case dwarf::DW_TAG_entry_point:
- CurrentScope = new LVScopeFunction();
+ CurrentScope = createScopeFunction();
CurrentScope->setIsEntryPoint();
return CurrentScope;
case dwarf::DW_TAG_subprogram:
- CurrentScope = new LVScopeFunction();
+ CurrentScope = createScopeFunction();
CurrentScope->setIsSubprogram();
return CurrentScope;
case dwarf::DW_TAG_subroutine_type:
- CurrentScope = new LVScopeFunctionType();
+ CurrentScope = createScopeFunctionType();
return CurrentScope;
case dwarf::DW_TAG_label:
- CurrentScope = new LVScopeFunction();
+ CurrentScope = createScopeFunction();
CurrentScope->setIsLabel();
return CurrentScope;
case dwarf::DW_TAG_class_type:
- CurrentScope = new LVScopeAggregate();
+ CurrentScope = createScopeAggregate();
CurrentScope->setIsClass();
return CurrentScope;
case dwarf::DW_TAG_structure_type:
- CurrentScope = new LVScopeAggregate();
+ CurrentScope = createScopeAggregate();
CurrentScope->setIsStructure();
return CurrentScope;
case dwarf::DW_TAG_union_type:
- CurrentScope = new LVScopeAggregate();
+ CurrentScope = createScopeAggregate();
CurrentScope->setIsUnion();
return CurrentScope;
case dwarf::DW_TAG_enumeration_type:
- CurrentScope = new LVScopeEnumeration();
+ CurrentScope = createScopeEnumeration();
return CurrentScope;
case dwarf::DW_TAG_GNU_formal_parameter_pack:
- CurrentScope = new LVScopeFormalPack();
+ CurrentScope = createScopeFormalPack();
return CurrentScope;
case dwarf::DW_TAG_GNU_template_parameter_pack:
- CurrentScope = new LVScopeTemplatePack();
+ CurrentScope = createScopeTemplatePack();
return CurrentScope;
default:
// Collect TAGs not implemented.
@@ -548,22 +548,22 @@ LVScope *LVELFReader::processOneDie(const DWARFDie &InputDIE, LVScope *Parent,
// referencing this element.
if (ElementTable.find(Offset) == ElementTable.end()) {
// No previous references to this offset.
- ElementTable.emplace(
- std::piecewise_construct, std::forward_as_tuple(Offset),
- std::forward_as_tuple(CurrentElement, LVElementSet()));
+ ElementTable.emplace(std::piecewise_construct,
+ std::forward_as_tuple(Offset),
+ std::forward_as_tuple(CurrentElement));
} else {
// There are previous references to this element. We need to update the
// element and all the references pointing to this element.
LVElementEntry &Reference = ElementTable[Offset];
- Reference.first = CurrentElement;
+ Reference.Element = CurrentElement;
// Traverse the element set and update the elements (backtracking).
- // Using the bit associated with 'type' or 'reference' allows us to set
- // the correct target.
- for (LVElement *Target : Reference.second)
- Target->getHasReference() ? Target->setReference(CurrentElement)
- : Target->setType(CurrentElement);
+ for (LVElement *Target : Reference.References)
+ Target->setReference(CurrentElement);
+ for (LVElement *Target : Reference.Types)
+ Target->setType(CurrentElement);
// Clear the pending elements.
- Reference.second.clear();
+ Reference.References.clear();
+ Reference.Types.clear();
}
// Add the current element to its parent as there are attributes
@@ -733,7 +733,7 @@ void LVELFReader::createLineAndFileRecords(
// the 'processLines()' function will move each created logical line
// to its enclosing logical scope, using the debug ranges information
// and they will be released when its scope parent is deleted.
- LVLineDebug *Line = new LVLineDebug();
+ LVLineDebug *Line = createLineDebug();
CULines.push_back(Line);
Line->setAddress(Row.Address.Address);
Line->setFilename(
@@ -759,7 +759,8 @@ void LVELFReader::createLineAndFileRecords(
}
}
-std::string LVELFReader::getRegisterName(LVSmall Opcode, uint64_t Operands[2]) {
+std::string LVELFReader::getRegisterName(LVSmall Opcode,
+ ArrayRef<uint64_t> Operands) {
// The 'prettyPrintRegisterOp' function uses the DWARFUnit to support
// DW_OP_regval_type. At this point we are operating on a logical view
// item, with no access to the underlying DWARF data used by LLVM.
@@ -973,19 +974,8 @@ void LVELFReader::processLocationList(dwarf::Attribute Attr,
bool CallSiteLocation) {
auto ProcessLocationExpression = [&](const DWARFExpression &Expression) {
- // DW_OP_const_type is variable-length and has 3
- // operands. DWARFExpression thus far only supports 2.
- uint64_t Operands[2] = {0};
- for (const DWARFExpression::Operation &Op : Expression) {
- DWARFExpression::Operation::Description Description = Op.getDescription();
- for (unsigned Operand = 0; Operand < 2; ++Operand) {
- if (Description.Op[Operand] == DWARFExpression::Operation::SizeNA)
- break;
- Operands[Operand] = Op.getRawOperand(Operand);
- }
- CurrentSymbol->addLocationOperands(Op.getCode(), Operands[0],
- Operands[1]);
- }
+ for (const DWARFExpression::Operation &Op : Expression)
+ CurrentSymbol->addLocationOperands(Op.getCode(), Op.getRawOperands());
};
DWARFUnit *U = Die.getDwarfUnit();
@@ -1075,12 +1065,14 @@ void LVELFReader::processLocationMember(dwarf::Attribute Attr,
// Update the current element with the reference.
void LVELFReader::updateReference(dwarf::Attribute Attr,
const DWARFFormValue &FormValue) {
- // We are assuming that DW_AT_specification, DW_AT_abstract_origin,
- // DW_AT_type and DW_AT_extension do not appear at the same time
- // in the same DIE.
+ // FIXME: We are assuming that at most one Reference (DW_AT_specification,
+ // DW_AT_abstract_origin, ...) and at most one Type (DW_AT_import, DW_AT_type)
+ // appear in any single DIE, but this may not be true.
uint64_t Reference = *FormValue.getAsReference();
// Get target for the given reference, if already created.
- LVElement *Target = getElementForOffset(Reference, CurrentElement);
+ LVElement *Target = getElementForOffset(
+ Reference, CurrentElement,
+ /*IsType=*/Attr == dwarf::DW_AT_import || Attr == dwarf::DW_AT_type);
// Check if we are dealing with cross CU references.
if (FormValue.getForm() == dwarf::DW_FORM_ref_addr) {
if (Target) {
@@ -1124,26 +1116,18 @@ void LVELFReader::updateReference(dwarf::Attribute Attr,
}
// Get an element given the DIE offset.
-LVElement *LVELFReader::getElementForOffset(LVOffset Offset,
- LVElement *Element) {
- LVElement *Target = nullptr;
- // Search offset in the cross references.
- LVElementReference::iterator Iter = ElementTable.find(Offset);
- if (Iter == ElementTable.end())
- // Reference to an unseen element.
- ElementTable.emplace(std::piecewise_construct,
- std::forward_as_tuple(Offset),
- std::forward_as_tuple(nullptr, LVElementSet{Element}));
- else {
- // There are previous references to this element. We need to update the
- // element and all the references pointing to this element.
- LVElementEntry &Reference = Iter->second;
- Target = Reference.first;
- if (!Target)
- // Add the element to the set.
- Reference.second.insert(Element);
+LVElement *LVELFReader::getElementForOffset(LVOffset Offset, LVElement *Element,
+ bool IsType) {
+ auto Iter = ElementTable.try_emplace(Offset).first;
+ // Update the element and all the references pointing to this element.
+ LVElementEntry &Entry = Iter->second;
+ if (!Entry.Element) {
+ if (IsType)
+ Entry.Types.insert(Element);
+ else
+ Entry.References.insert(Element);
}
- return Target;
+ return Entry.Element;
}
Error LVELFReader::loadTargetInfo(const ObjectFile &Obj) {
diff --git a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
index f9a763d724a8..c26caa647ed9 100644
--- a/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
+++ b/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
@@ -364,6 +364,18 @@ Expected<FileBufferByteStream> MSFBuilder::commit(StringRef Path,
FileSize, Layout.SB->BlockSize));
}
+ uint64_t NumDirectoryBlocks =
+ bytesToBlocks(Layout.SB->NumDirectoryBytes, Layout.SB->BlockSize);
+ uint64_t DirectoryBlockMapSize =
+ NumDirectoryBlocks * sizeof(support::ulittle32_t);
+ if (DirectoryBlockMapSize > Layout.SB->BlockSize) {
+ return make_error<MSFError>(msf_error_code::stream_directory_overflow,
+ formatv("The directory block map ({0} bytes) "
+ "doesn't fit in a block ({1} bytes)",
+ DirectoryBlockMapSize,
+ Layout.SB->BlockSize));
+ }
+
auto OutFileOrError = FileOutputBuffer::create(Path, FileSize);
if (auto EC = OutFileOrError.takeError())
return std::move(EC);
diff --git a/llvm/lib/DebugInfo/MSF/MSFError.cpp b/llvm/lib/DebugInfo/MSF/MSFError.cpp
index fd93c3e726cc..dbd8648c4d41 100644
--- a/llvm/lib/DebugInfo/MSF/MSFError.cpp
+++ b/llvm/lib/DebugInfo/MSF/MSFError.cpp
@@ -43,6 +43,8 @@ public:
return "The data is in an unexpected format.";
case msf_error_code::block_in_use:
return "The block is already in use.";
+ case msf_error_code::stream_directory_overflow:
+ return "PDB stream directory too large.";
}
llvm_unreachable("Unrecognized msf_error_code");
}
diff --git a/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp b/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
index 009cd113f652..081cede6d840 100644
--- a/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/DbiModuleList.cpp
@@ -240,7 +240,9 @@ Error DbiModuleList::initializeFileInfo(BinaryStreamRef FileInfo) {
}
uint32_t DbiModuleList::getModuleCount() const {
- return FileInfoHeader->NumModules;
+ // Workaround to avoid the crash until upstream issue is fixed:
+ // https://github.com/llvm/llvm-project/issues/55214
+ return FileInfoHeader ? FileInfoHeader->NumModules : 0;
}
uint32_t DbiModuleList::getSourceFileCount() const {
diff --git a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
index 169d49f64eb5..b17fbd63e9fd 100644
--- a/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/GSIStreamBuilder.cpp
@@ -76,7 +76,7 @@ struct llvm::pdb::SymbolDenseMapInfo {
return Tombstone;
}
static unsigned getHashValue(const CVSymbol &Val) {
- return xxHash64(Val.RecordData);
+ return xxh3_64bits(Val.RecordData);
}
static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {
return LHS.RecordData == RHS.RecordData;
diff --git a/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp
index 495b25077737..85c22483fa90 100644
--- a/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/InputFile.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/PDB/Native/InputFile.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
@@ -347,32 +348,32 @@ Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
PDBFile &InputFile::pdb() {
assert(isPdb());
- return *PdbOrObj.get<PDBFile *>();
+ return *cast<PDBFile *>(PdbOrObj);
}
const PDBFile &InputFile::pdb() const {
assert(isPdb());
- return *PdbOrObj.get<PDBFile *>();
+ return *cast<PDBFile *>(PdbOrObj);
}
object::COFFObjectFile &InputFile::obj() {
assert(isObj());
- return *PdbOrObj.get<object::COFFObjectFile *>();
+ return *cast<object::COFFObjectFile *>(PdbOrObj);
}
const object::COFFObjectFile &InputFile::obj() const {
assert(isObj());
- return *PdbOrObj.get<object::COFFObjectFile *>();
+ return *cast<object::COFFObjectFile *>(PdbOrObj);
}
MemoryBuffer &InputFile::unknown() {
assert(isUnknown());
- return *PdbOrObj.get<MemoryBuffer *>();
+ return *cast<MemoryBuffer *>(PdbOrObj);
}
const MemoryBuffer &InputFile::unknown() const {
assert(isUnknown());
- return *PdbOrObj.get<MemoryBuffer *>();
+ return *cast<MemoryBuffer *>(PdbOrObj);
}
StringRef InputFile::getFilePath() const {
@@ -402,13 +403,13 @@ bool InputFile::hasIds() const {
return pdb().hasPDBIpiStream();
}
-bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); }
+bool InputFile::isPdb() const { return isa<PDBFile *>(PdbOrObj); }
bool InputFile::isObj() const {
- return PdbOrObj.is<object::COFFObjectFile *>();
+ return isa<object::COFFObjectFile *>(PdbOrObj);
}
-bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); }
+bool InputFile::isUnknown() const { return isa<MemoryBuffer *>(PdbOrObj); }
codeview::LazyRandomTypeCollection &
InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
@@ -562,13 +563,13 @@ static bool isMyCode(const SymbolGroup &Group) {
StringRef Name = Group.name();
if (Name.startswith("Import:"))
return false;
- if (Name.endswith_insensitive(".dll"))
+ if (Name.ends_with_insensitive(".dll"))
return false;
if (Name.equals_insensitive("* linker *"))
return false;
- if (Name.startswith_insensitive("f:\\binaries\\Intermediate\\vctools"))
+ if (Name.starts_with_insensitive("f:\\binaries\\Intermediate\\vctools"))
return false;
- if (Name.startswith_insensitive("f:\\dd\\vctools\\crt"))
+ if (Name.starts_with_insensitive("f:\\dd\\vctools\\crt"))
return false;
return true;
}
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
index 65e253ed115f..b0aadf861cbc 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeEnumInjectedSources.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/PDB/Native/NativeEnumInjectedSources.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/HashTable.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
index cf314c3bede3..91b428afaddb 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeSession.cpp
@@ -8,6 +8,7 @@
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
diff --git a/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp b/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp
index 8d6f8ebebf4c..89b1614ba2af 100644
--- a/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/NativeSourceFile.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/NativeSourceFile.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/PDB/Native/NativeSession.h"
#include "llvm/DebugInfo/PDB/Native/PDBFile.h"
#include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index 27df769ee6f2..cd30b56be7cd 100644
--- a/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/llvm/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/DebugInfo/PDB/Native/PDBFileBuilder.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/GUID.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
@@ -338,7 +340,7 @@ Error PDBFileBuilder::commit(StringRef Filename, codeview::GUID *Guid) {
if (Info->hashPDBContentsToGUID()) {
// Compute a hash of all sections of the output file.
uint64_t Digest =
- xxHash64({Buffer.getBufferStart(), Buffer.getBufferEnd()});
+ xxh3_64bits({Buffer.getBufferStart(), Buffer.getBufferEnd()});
H->Age = 1;
diff --git a/llvm/lib/DebugInfo/PDB/PDBExtras.cpp b/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
index 571510e6bad9..2b318bf1c648 100644
--- a/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBExtras.cpp
@@ -232,6 +232,8 @@ raw_ostream &llvm::pdb::operator<<(raw_ostream &OS, const PDB_Lang &Lang) {
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, D, OS)
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Swift, OS)
CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, Rust, OS)
+ CASE_OUTPUT_ENUM_CLASS_NAME(PDB_Lang, ObjC, OS)
+ CASE_OUTPUT_ENUM_CLASS_STR(PDB_Lang, ObjCpp, "ObjC++", OS)
}
return OS;
}
diff --git a/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp b/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
index bd60489b6bed..437b96677c0b 100644
--- a/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
+++ b/llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp
@@ -102,6 +102,8 @@ std::string PDBSymbolCompiland::getSourceFileFullPath() const {
.Case(".asm", Lang == PDB_Lang::Masm)
.Case(".swift", Lang == PDB_Lang::Swift)
.Case(".rs", Lang == PDB_Lang::Rust)
+ .Case(".m", Lang == PDB_Lang::ObjC)
+ .Case(".mm", Lang == PDB_Lang::ObjCpp)
.Default(false))
return File->getFileName();
}
diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
index bfd6f7c02ca3..f9669b554b47 100644
--- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -266,11 +266,8 @@ void PlainPrinterBase::printInvalidCommand(const Request &Request,
}
bool PlainPrinterBase::printError(const Request &Request,
- const ErrorInfoBase &ErrorInfo,
- StringRef ErrorBanner) {
- ES << ErrorBanner;
- ErrorInfo.log(ES);
- ES << '\n';
+ const ErrorInfoBase &ErrorInfo) {
+ ErrHandler(ErrorInfo, Request.ModuleName);
// Print an empty struct too.
return true;
}
@@ -288,6 +285,24 @@ static json::Object toJSON(const Request &Request, StringRef ErrorMsg = "") {
return Json;
}
+static json::Object toJSON(const DILineInfo &LineInfo) {
+ return json::Object(
+ {{"FunctionName", LineInfo.FunctionName != DILineInfo::BadString
+ ? LineInfo.FunctionName
+ : ""},
+ {"StartFileName", LineInfo.StartFileName != DILineInfo::BadString
+ ? LineInfo.StartFileName
+ : ""},
+ {"StartLine", LineInfo.StartLine},
+ {"StartAddress",
+ LineInfo.StartAddress ? toHex(*LineInfo.StartAddress) : ""},
+ {"FileName",
+ LineInfo.FileName != DILineInfo::BadString ? LineInfo.FileName : ""},
+ {"Line", LineInfo.Line},
+ {"Column", LineInfo.Column},
+ {"Discriminator", LineInfo.Discriminator}});
+}
+
void JSONPrinter::print(const Request &Request, const DILineInfo &Info) {
DIInliningInfo InliningInfo;
InliningInfo.addFrame(Info);
@@ -298,21 +313,7 @@ void JSONPrinter::print(const Request &Request, const DIInliningInfo &Info) {
json::Array Array;
for (uint32_t I = 0, N = Info.getNumberOfFrames(); I < N; ++I) {
const DILineInfo &LineInfo = Info.getFrame(I);
- json::Object Object(
- {{"FunctionName", LineInfo.FunctionName != DILineInfo::BadString
- ? LineInfo.FunctionName
- : ""},
- {"StartFileName", LineInfo.StartFileName != DILineInfo::BadString
- ? LineInfo.StartFileName
- : ""},
- {"StartLine", LineInfo.StartLine},
- {"StartAddress",
- LineInfo.StartAddress ? toHex(*LineInfo.StartAddress) : ""},
- {"FileName",
- LineInfo.FileName != DILineInfo::BadString ? LineInfo.FileName : ""},
- {"Line", LineInfo.Line},
- {"Column", LineInfo.Column},
- {"Discriminator", LineInfo.Discriminator}});
+ json::Object Object = toJSON(LineInfo);
SourceCode SourceCode(LineInfo.FileName, LineInfo.Line,
Config.SourceContextLines, LineInfo.Source);
std::string FormattedSource;
@@ -370,13 +371,11 @@ void JSONPrinter::printInvalidCommand(const Request &Request,
StringRef Command) {
printError(Request,
StringError("unable to parse arguments: " + Command,
- std::make_error_code(std::errc::invalid_argument)),
- "");
+ std::make_error_code(std::errc::invalid_argument)));
}
bool JSONPrinter::printError(const Request &Request,
- const ErrorInfoBase &ErrorInfo,
- StringRef ErrorBanner) {
+ const ErrorInfoBase &ErrorInfo) {
json::Object Json = toJSON(Request, ErrorInfo.message());
if (ObjectList)
ObjectList->push_back(std::move(Json));
diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
index 5e9d8ac538df..a2bc2577b70a 100644
--- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
@@ -133,9 +133,8 @@ bool MarkupFilter::tryReset(const MarkupNode &Node,
endAnyModuleInfoLine();
for (const MarkupNode &Node : DeferredNodes)
filterNode(Node);
- highlight();
- OS << "[[[reset]]]" << lineEnding();
- restoreColor();
+ printRawElement(Node);
+ OS << lineEnding();
Modules.clear();
MMaps.clear();
@@ -239,8 +238,7 @@ bool MarkupFilter::tryPC(const MarkupNode &Node) {
return false;
if (!checkNumFieldsAtLeast(Node, 1))
return true;
- if (!checkNumFieldsAtMost(Node, 2))
- return true;
+ warnNumFieldsAtMost(Node, 2);
std::optional<uint64_t> Addr = parseAddr(Node.Fields[0]);
if (!Addr)
@@ -293,8 +291,7 @@ bool MarkupFilter::tryBackTrace(const MarkupNode &Node) {
return false;
if (!checkNumFieldsAtLeast(Node, 2))
return true;
- if (!checkNumFieldsAtMost(Node, 3))
- return true;
+ warnNumFieldsAtMost(Node, 3);
std::optional<uint64_t> FrameNumber = parseFrameNumber(Node.Fields[0]);
if (!FrameNumber)
@@ -513,8 +510,9 @@ MarkupFilter::parseModule(const MarkupNode &Element) const {
}
if (!checkNumFields(Element, 4))
return std::nullopt;
- ASSIGN_OR_RETURN_NONE(SmallVector<uint8_t>, BuildID,
- parseBuildID(Element.Fields[3]));
+ SmallVector<uint8_t> BuildID = parseBuildID(Element.Fields[3]);
+ if (BuildID.empty())
+ return std::nullopt;
return Module{ID, Name.str(), std::move(BuildID)};
}
@@ -597,16 +595,11 @@ std::optional<uint64_t> MarkupFilter::parseFrameNumber(StringRef Str) const {
}
// Parse a build ID (%x in the spec).
-std::optional<SmallVector<uint8_t>>
-MarkupFilter::parseBuildID(StringRef Str) const {
- std::string Bytes;
- if (Str.empty() || Str.size() % 2 || !tryGetFromHex(Str, Bytes)) {
+object::BuildID MarkupFilter::parseBuildID(StringRef Str) const {
+ object::BuildID BID = llvm::object::parseBuildID(Str);
+ if (BID.empty())
reportTypeError(Str, "build ID");
- return std::nullopt;
- }
- ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
- Bytes.size());
- return SmallVector<uint8_t>(BuildID.begin(), BuildID.end());
+ return BID;
}
// Parses the mode string for an mmap element.
@@ -659,10 +652,12 @@ bool MarkupFilter::checkTag(const MarkupNode &Node) const {
bool MarkupFilter::checkNumFields(const MarkupNode &Element,
size_t Size) const {
if (Element.Fields.size() != Size) {
- WithColor::error(errs()) << "expected " << Size << " field(s); found "
- << Element.Fields.size() << "\n";
+ bool Warn = Element.Fields.size() > Size;
+ WithColor(errs(), Warn ? HighlightColor::Warning : HighlightColor::Error)
+ << (Warn ? "warning: " : "error: ") << "expected " << Size
+ << " field(s); found " << Element.Fields.size() << "\n";
reportLocation(Element.Tag.end());
- return false;
+ return Warn;
}
return true;
}
@@ -679,16 +674,14 @@ bool MarkupFilter::checkNumFieldsAtLeast(const MarkupNode &Element,
return true;
}
-bool MarkupFilter::checkNumFieldsAtMost(const MarkupNode &Element,
- size_t Size) const {
- if (Element.Fields.size() > Size) {
- WithColor::error(errs())
- << "expected at most " << Size << " field(s); found "
- << Element.Fields.size() << "\n";
- reportLocation(Element.Tag.end());
- return false;
- }
- return true;
+void MarkupFilter::warnNumFieldsAtMost(const MarkupNode &Element,
+ size_t Size) const {
+ if (Element.Fields.size() <= Size)
+ return;
+ WithColor::warning(errs())
+ << "expected at most " << Size << " field(s); found "
+ << Element.Fields.size() << "\n";
+ reportLocation(Element.Tag.end());
}
void MarkupFilter::reportTypeError(StringRef Str, StringRef TypeName) const {
diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
index 5c65742a39f5..6b8068a531c0 100644
--- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp
@@ -12,7 +12,6 @@
#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/Object/COFF.h"
@@ -21,6 +20,7 @@
#include "llvm/Object/SymbolSize.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/DataExtractor.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
using namespace llvm;
diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index 72c008d9835e..517f1e7dc284 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -13,6 +13,7 @@
#include "llvm/DebugInfo/Symbolize/Symbolize.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/BTF/BTFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/PDB/PDB.h"
#include "llvm/DebugInfo/PDB/PDBContext.h"
@@ -363,12 +364,10 @@ ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
const ELFObjectFileBase *Obj,
const std::string &ArchName) {
auto BuildID = getBuildID(Obj);
- if (!BuildID)
- return nullptr;
- if (BuildID->size() < 2)
+ if (BuildID.size() < 2)
return nullptr;
std::string DebugBinaryPath;
- if (!getOrFindDebugBinary(*BuildID, DebugBinaryPath))
+ if (!getOrFindDebugBinary(BuildID, DebugBinaryPath))
return nullptr;
auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
if (!DbgObjOrErr) {
@@ -617,6 +616,13 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
return ModuleOrErr;
}
+// For BPF programs .BTF.ext section contains line numbers information,
+// use it if regular DWARF is not available (e.g. for stripped binary).
+static bool useBTFContext(const ObjectFile &Obj) {
+ return Obj.makeTriple().isBPF() && !Obj.hasDebugInfo() &&
+ BTFParser::hasBTFSections(Obj);
+}
+
Expected<SymbolizableModule *>
LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) {
StringRef ObjName = Obj.getFileName();
@@ -624,7 +630,11 @@ LLVMSymbolizer::getOrCreateModuleInfo(const ObjectFile &Obj) {
if (I != Modules.end())
return I->second.get();
- std::unique_ptr<DIContext> Context = DWARFContext::create(Obj);
+ std::unique_ptr<DIContext> Context;
+ if (useBTFContext(Obj))
+ Context = BTFContext::create(Obj);
+ else
+ Context = DWARFContext::create(Obj);
// FIXME: handle COFF object with PDB info to use PDBContext
return createModuleInfo(&Obj, std::move(Context), ObjName);
}
@@ -634,8 +644,7 @@ LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef<uint8_t> BuildID) {
std::string Path;
if (!getOrFindDebugBinary(BuildID, Path)) {
return createStringError(errc::no_such_file_or_directory,
- Twine("could not find build ID '") +
- toHex(BuildID) + "'");
+ "could not find build ID");
}
return getOrCreateModuleInfo(Path);
}
@@ -649,22 +658,29 @@ namespace {
// vectorcall - foo@@12
// These are all different linkage names for 'foo'.
StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
- // Remove any '_' or '@' prefix.
char Front = SymbolName.empty() ? '\0' : SymbolName[0];
- if (Front == '_' || Front == '@')
- SymbolName = SymbolName.drop_front();
// Remove any '@[0-9]+' suffix.
+ bool HasAtNumSuffix = false;
if (Front != '?') {
size_t AtPos = SymbolName.rfind('@');
if (AtPos != StringRef::npos &&
- all_of(drop_begin(SymbolName, AtPos + 1), isDigit))
+ all_of(drop_begin(SymbolName, AtPos + 1), isDigit)) {
SymbolName = SymbolName.substr(0, AtPos);
+ HasAtNumSuffix = true;
+ }
}
// Remove any ending '@' for vectorcall.
- if (SymbolName.endswith("@"))
+ bool IsVectorCall = false;
+ if (HasAtNumSuffix && SymbolName.endswith("@")) {
SymbolName = SymbolName.drop_back();
+ IsVectorCall = true;
+ }
+
+ // If not vectorcall, remove any '_' or '@' prefix.
+ if (!IsVectorCall && (Front == '_' || Front == '@'))
+ SymbolName = SymbolName.drop_front();
return SymbolName;
}
@@ -675,14 +691,14 @@ std::string
LLVMSymbolizer::DemangleName(const std::string &Name,
const SymbolizableModule *DbiModuleDescriptor) {
std::string Result;
- if (nonMicrosoftDemangle(Name.c_str(), Result))
+ if (nonMicrosoftDemangle(Name, Result))
return Result;
if (!Name.empty() && Name.front() == '?') {
// Only do MSVC C++ demangling on symbols starting with '?'.
int status = 0;
char *DemangledName = microsoftDemangle(
- Name.c_str(), nullptr, nullptr, nullptr, &status,
+ Name, nullptr, &status,
MSDemangleFlags(MSDF_NoAccessSpecifier | MSDF_NoCallingConvention |
MSDF_NoMemberType | MSDF_NoReturnType));
if (status != 0)
@@ -692,8 +708,14 @@ LLVMSymbolizer::DemangleName(const std::string &Name,
return Result;
}
- if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module())
- return std::string(demanglePE32ExternCFunc(Name));
+ if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module()) {
+ std::string DemangledCName(demanglePE32ExternCFunc(Name));
+ // On i386 Windows, the C name mangling for different calling conventions
+ // may also be applied on top of the Itanium or Rust name mangling.
+ if (nonMicrosoftDemangle(DemangledCName, Result))
+ return Result;
+ return DemangledCName;
+ }
return Name;
}
diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp
index 026f118bbf5b..394f2b29aee6 100644
--- a/llvm/lib/Debuginfod/Debuginfod.cpp
+++ b/llvm/lib/Debuginfod/Debuginfod.cpp
@@ -55,7 +55,11 @@ static std::string buildIDToString(BuildIDRef ID) {
return llvm::toHex(ID, /*LowerCase=*/true);
}
-Expected<SmallVector<StringRef>> getDefaultDebuginfodUrls() {
+bool canUseDebuginfod() {
+ return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty();
+}
+
+SmallVector<StringRef> getDefaultDebuginfodUrls() {
const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS");
if (DebuginfodUrlsEnv == nullptr)
return SmallVector<StringRef>();
@@ -126,13 +130,8 @@ Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,
return CacheDirOrErr.takeError();
CacheDir = *CacheDirOrErr;
- Expected<SmallVector<StringRef>> DebuginfodUrlsOrErr =
- getDefaultDebuginfodUrls();
- if (!DebuginfodUrlsOrErr)
- return DebuginfodUrlsOrErr.takeError();
- SmallVector<StringRef> &DebuginfodUrls = *DebuginfodUrlsOrErr;
return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir,
- DebuginfodUrls,
+ getDefaultDebuginfodUrls(),
getDefaultDebuginfodTimeout());
}
@@ -159,7 +158,8 @@ public:
Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {
if (!FileStream) {
- if (Client.responseCode() != 200)
+ unsigned Code = Client.responseCode();
+ if (Code && Code != 200)
return Error::success();
Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =
CreateStream();
@@ -251,16 +251,25 @@ Expected<std::string> getCachedOrDownloadArtifact(
// Perform the HTTP request and if successful, write the response body to
// the cache.
- StreamedHTTPResponseHandler Handler(
- [&]() { return CacheAddStream(Task, ""); }, Client);
- HTTPRequest Request(ArtifactUrl);
- Request.Headers = getHeaders();
- Error Err = Client.perform(Request, Handler);
- if (Err)
- return std::move(Err);
-
- if (Client.responseCode() != 200)
- continue;
+ {
+ StreamedHTTPResponseHandler Handler(
+ [&]() { return CacheAddStream(Task, ""); }, Client);
+ HTTPRequest Request(ArtifactUrl);
+ Request.Headers = getHeaders();
+ Error Err = Client.perform(Request, Handler);
+ if (Err)
+ return std::move(Err);
+
+ unsigned Code = Client.responseCode();
+ if (Code && Code != 200)
+ continue;
+ }
+
+ Expected<CachePruningPolicy> PruningPolicyOrErr =
+ parseCachePruningPolicy(std::getenv("DEBUGINFOD_CACHE_POLICY"));
+ if (!PruningPolicyOrErr)
+ return PruningPolicyOrErr.takeError();
+ pruneCache(CacheDirectoryPath, *PruningPolicyOrErr);
// Return the path to the artifact on disk.
return std::string(AbsCachedArtifactPath);
@@ -403,11 +412,11 @@ Error DebuginfodCollection::findBinaries(StringRef Path) {
if (!Object)
continue;
- std::optional<BuildIDRef> ID = getBuildID(Object);
- if (!ID)
+ BuildIDRef ID = getBuildID(Object);
+ if (ID.empty())
continue;
- std::string IDString = buildIDToString(*ID);
+ std::string IDString = buildIDToString(ID);
if (Object->hasDebugInfo()) {
std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
(void)DebugBinaries.try_emplace(IDString, std::move(FilePath));
diff --git a/llvm/lib/Debuginfod/HTTPServer.cpp b/llvm/lib/Debuginfod/HTTPServer.cpp
index 2ea923d5a734..a5e992254ead 100644
--- a/llvm/lib/Debuginfod/HTTPServer.cpp
+++ b/llvm/lib/Debuginfod/HTTPServer.cpp
@@ -28,6 +28,12 @@
using namespace llvm;
+char HTTPServerError::ID = 0;
+
+HTTPServerError::HTTPServerError(const Twine &Msg) : Msg(Msg.str()) {}
+
+void HTTPServerError::log(raw_ostream &OS) const { OS << Msg; }
+
bool llvm::streamFile(HTTPServerRequest &Request, StringRef FilePath) {
Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(FilePath);
if (Error Err = FDOrErr.takeError()) {
@@ -159,31 +165,34 @@ HTTPServer::HTTPServer() = default;
HTTPServer::~HTTPServer() = default;
void HTTPServerRequest::setResponse(HTTPResponse Response) {
- llvm_unreachable("No HTTP server implementation available");
+ llvm_unreachable("no httplib");
}
void HTTPServerRequest::setResponse(StreamingHTTPResponse Response) {
- llvm_unreachable("No HTTP server implementation available");
+ llvm_unreachable("no httplib");
}
Error HTTPServer::get(StringRef UrlPathPattern, HTTPRequestHandler Handler) {
- llvm_unreachable("No HTTP server implementation available");
+ // TODO(https://github.com/llvm/llvm-project/issues/63873) We would ideally
+ // return an error as well but that's going to require refactoring of error
+ // handling in DebuginfodServer.
+ return Error::success();
}
Error HTTPServer::bind(unsigned ListenPort, const char *HostInterface) {
- llvm_unreachable("No HTTP server implementation available");
+ return make_error<HTTPServerError>("no httplib");
}
Expected<unsigned> HTTPServer::bind(const char *HostInterface) {
- llvm_unreachable("No HTTP server implementation available");
+ return make_error<HTTPServerError>("no httplib");
}
Error HTTPServer::listen() {
- llvm_unreachable("No HTTP server implementation available");
+ return make_error<HTTPServerError>("no httplib");
}
void HTTPServer::stop() {
- llvm_unreachable("No HTTP server implementation available");
+ llvm_unreachable("no httplib");
}
#endif // LLVM_ENABLE_HTTPLIB
diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp
index b747b0f9cc67..8856302be6dd 100644
--- a/llvm/lib/Demangle/DLangDemangle.cpp
+++ b/llvm/lib/Demangle/DLangDemangle.cpp
@@ -14,16 +14,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/Demangle/Demangle.h"
-#include "llvm/Demangle/StringView.h"
+#include "llvm/Demangle/StringViewExtras.h"
#include "llvm/Demangle/Utility.h"
#include <cctype>
#include <cstring>
#include <limits>
+#include <string_view>
using namespace llvm;
using llvm::itanium_demangle::OutputBuffer;
-using llvm::itanium_demangle::StringView;
+using llvm::itanium_demangle::starts_with;
namespace {
@@ -32,7 +33,7 @@ struct Demangler {
/// Initialize the information structure we use to pass around information.
///
/// \param Mangled String to demangle.
- Demangler(const char *Mangled);
+ Demangler(std::string_view Mangled);
/// Extract and demangle the mangled symbol and append it to the output
/// string.
@@ -52,46 +53,42 @@ private:
/// \param Demangled output buffer to write the demangled name.
/// \param Mangled mangled symbol to be demangled.
///
- /// \return The remaining string on success or nullptr on failure.
- ///
/// \see https://dlang.org/spec/abi.html#name_mangling .
/// \see https://dlang.org/spec/abi.html#MangledName .
- const char *parseMangle(OutputBuffer *Demangled, const char *Mangled);
+ void parseMangle(OutputBuffer *Demangled, std::string_view &Mangled);
/// Extract the number from a given string.
///
/// \param Mangled string to extract the number.
/// \param Ret assigned result value.
///
- /// \return The remaining string on success or nullptr on failure.
- ///
- /// \note A result larger than UINT_MAX is considered a failure.
+ /// \note Ret larger than UINT_MAX is considered a failure.
///
/// \see https://dlang.org/spec/abi.html#Number .
- const char *decodeNumber(const char *Mangled, unsigned long &Ret);
+ void decodeNumber(std::string_view &Mangled, unsigned long &Ret);
/// Extract the back reference position from a given string.
///
/// \param Mangled string to extract the back reference position.
/// \param Ret assigned result value.
///
- /// \return the remaining string on success or nullptr on failure.
+ /// \return true on success, false on error.
///
/// \note Ret is always >= 0 on success, and unspecified on failure
///
/// \see https://dlang.org/spec/abi.html#back_ref .
/// \see https://dlang.org/spec/abi.html#NumberBackRef .
- const char *decodeBackrefPos(const char *Mangled, long &Ret);
+ bool decodeBackrefPos(std::string_view &Mangled, long &Ret);
/// Extract the symbol pointed by the back reference form a given string.
///
/// \param Mangled string to extract the back reference position.
/// \param Ret assigned result value.
///
- /// \return the remaining string on success or nullptr on failure.
+ /// \return true on success, false on error.
///
/// \see https://dlang.org/spec/abi.html#back_ref .
- const char *decodeBackref(const char *Mangled, const char *&Ret);
+ bool decodeBackref(std::string_view &Mangled, std::string_view &Ret);
/// Extract and demangle backreferenced symbol from a given mangled symbol
/// and append it to the output string.
@@ -99,22 +96,18 @@ private:
/// \param Demangled output buffer to write the demangled name.
/// \param Mangled mangled symbol to be demangled.
///
- /// \return the remaining string on success or nullptr on failure.
- ///
/// \see https://dlang.org/spec/abi.html#back_ref .
/// \see https://dlang.org/spec/abi.html#IdentifierBackRef .
- const char *parseSymbolBackref(OutputBuffer *Demangled, const char *Mangled);
+ void parseSymbolBackref(OutputBuffer *Demangled, std::string_view &Mangled);
/// Extract and demangle backreferenced type from a given mangled symbol
/// and append it to the output string.
///
/// \param Mangled mangled symbol to be demangled.
///
- /// \return the remaining string on success or nullptr on failure.
- ///
/// \see https://dlang.org/spec/abi.html#back_ref .
/// \see https://dlang.org/spec/abi.html#TypeBackRef .
- const char *parseTypeBackref(const char *Mangled);
+ void parseTypeBackref(std::string_view &Mangled);
/// Check whether it is the beginning of a symbol name.
///
@@ -123,7 +116,7 @@ private:
/// \return true on success, false otherwise.
///
/// \see https://dlang.org/spec/abi.html#SymbolName .
- bool isSymbolName(const char *Mangled);
+ bool isSymbolName(std::string_view Mangled);
/// Extract and demangle an identifier from a given mangled symbol append it
/// to the output string.
@@ -131,10 +124,8 @@ private:
/// \param Demangled Output buffer to write the demangled name.
/// \param Mangled Mangled symbol to be demangled.
///
- /// \return The remaining string on success or nullptr on failure.
- ///
/// \see https://dlang.org/spec/abi.html#SymbolName .
- const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled);
+ void parseIdentifier(OutputBuffer *Demangled, std::string_view &Mangled);
/// Extract and demangle the plain identifier from a given mangled symbol and
/// prepend/append it to the output string, with a special treatment for some
@@ -144,11 +135,9 @@ private:
/// \param Mangled Mangled symbol to be demangled.
/// \param Len Length of the mangled symbol name.
///
- /// \return The remaining string on success or nullptr on failure.
- ///
/// \see https://dlang.org/spec/abi.html#LName .
- const char *parseLName(OutputBuffer *Demangled, const char *Mangled,
- unsigned long Len);
+ void parseLName(OutputBuffer *Demangled, std::string_view &Mangled,
+ unsigned long Len);
/// Extract and demangle the qualified symbol from a given mangled symbol
/// append it to the output string.
@@ -156,33 +145,38 @@ private:
/// \param Demangled Output buffer to write the demangled name.
/// \param Mangled Mangled symbol to be demangled.
///
- /// \return The remaining string on success or nullptr on failure.
- ///
/// \see https://dlang.org/spec/abi.html#QualifiedName .
- const char *parseQualified(OutputBuffer *Demangled, const char *Mangled);
+ void parseQualified(OutputBuffer *Demangled, std::string_view &Mangled);
/// Extract and demangle a type from a given mangled symbol append it to
/// the output string.
///
/// \param Mangled mangled symbol to be demangled.
///
- /// \return the remaining string on success or nullptr on failure.
+ /// \return true on success, false on error.
///
/// \see https://dlang.org/spec/abi.html#Type .
- const char *parseType(const char *Mangled);
+ bool parseType(std::string_view &Mangled);
- /// The string we are demangling.
- const char *Str;
+ /// An immutable view of the string we are demangling.
+ const std::string_view Str;
/// The index of the last back reference.
int LastBackref;
};
} // namespace
-const char *Demangler::decodeNumber(const char *Mangled, unsigned long &Ret) {
- // Return nullptr if trying to extract something that isn't a digit.
- if (Mangled == nullptr || !std::isdigit(*Mangled))
- return nullptr;
+void Demangler::decodeNumber(std::string_view &Mangled, unsigned long &Ret) {
+ // Clear Mangled if trying to extract something that isn't a digit.
+ if (Mangled.empty()) {
+ Mangled = {};
+ return;
+ }
+
+ if (!std::isdigit(Mangled.front())) {
+ Mangled = {};
+ return;
+ }
unsigned long Val = 0;
@@ -190,25 +184,29 @@ const char *Demangler::decodeNumber(const char *Mangled, unsigned long &Ret) {
unsigned long Digit = Mangled[0] - '0';
// Check for overflow.
- if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10)
- return nullptr;
+ if (Val > (std::numeric_limits<unsigned int>::max() - Digit) / 10) {
+ Mangled = {};
+ return;
+ }
Val = Val * 10 + Digit;
- ++Mangled;
- } while (std::isdigit(*Mangled));
+ Mangled.remove_prefix(1);
+ } while (!Mangled.empty() && std::isdigit(Mangled.front()));
- if (*Mangled == '\0')
- return nullptr;
+ if (Mangled.empty()) {
+ Mangled = {};
+ return;
+ }
Ret = Val;
- return Mangled;
}
-const char *Demangler::decodeBackrefPos(const char *Mangled, long &Ret) {
+bool Demangler::decodeBackrefPos(std::string_view &Mangled, long &Ret) {
// Return nullptr if trying to extract something that isn't a digit
- if (Mangled == nullptr || !std::isalpha(*Mangled))
- return nullptr;
-
+ if (Mangled.empty()) {
+ Mangled = {};
+ return false;
+ }
// Any identifier or non-basic type that has been emitted to the mangled
// symbol before will not be emitted again, but is referenced by a special
// sequence encoding the relative position of the original occurrence in the
@@ -221,7 +219,7 @@ const char *Demangler::decodeBackrefPos(const char *Mangled, long &Ret) {
// ^
unsigned long Val = 0;
- while (std::isalpha(*Mangled)) {
+ while (!Mangled.empty() && std::isalpha(Mangled.front())) {
// Check for overflow
if (Val > (std::numeric_limits<unsigned long>::max() - 25) / 26)
break;
@@ -233,116 +231,133 @@ const char *Demangler::decodeBackrefPos(const char *Mangled, long &Ret) {
if ((long)Val <= 0)
break;
Ret = Val;
- return Mangled + 1;
+ Mangled.remove_prefix(1);
+ return true;
}
Val += Mangled[0] - 'A';
- ++Mangled;
+ Mangled.remove_prefix(1);
}
- return nullptr;
+ Mangled = {};
+ return false;
}
-const char *Demangler::decodeBackref(const char *Mangled, const char *&Ret) {
- assert(Mangled != nullptr && *Mangled == 'Q' && "Invalid back reference!");
- Ret = nullptr;
+bool Demangler::decodeBackref(std::string_view &Mangled,
+ std::string_view &Ret) {
+ assert(!Mangled.empty() && Mangled.front() == 'Q' &&
+ "Invalid back reference!");
+ Ret = {};
// Position of 'Q'
- const char *Qpos = Mangled;
+ const char *Qpos = Mangled.data();
long RefPos;
- ++Mangled;
+ Mangled.remove_prefix(1);
- Mangled = decodeBackrefPos(Mangled, RefPos);
- if (Mangled == nullptr)
- return nullptr;
+ if (!decodeBackrefPos(Mangled, RefPos)) {
+ Mangled = {};
+ return false;
+ }
- if (RefPos > Qpos - Str)
- return nullptr;
+ if (RefPos > Qpos - Str.data()) {
+ Mangled = {};
+ return false;
+ }
// Set the position of the back reference.
Ret = Qpos - RefPos;
- return Mangled;
+ return true;
}
-const char *Demangler::parseSymbolBackref(OutputBuffer *Demangled,
- const char *Mangled) {
+void Demangler::parseSymbolBackref(OutputBuffer *Demangled,
+ std::string_view &Mangled) {
// An identifier back reference always points to a digit 0 to 9.
// IdentifierBackRef:
// Q NumberBackRef
// ^
- const char *Backref;
unsigned long Len;
// Get position of the back reference
- Mangled = decodeBackref(Mangled, Backref);
+ std::string_view Backref;
+ if (!decodeBackref(Mangled, Backref)) {
+ Mangled = {};
+ return;
+ }
// Must point to a simple identifier
- Backref = decodeNumber(Backref, Len);
- if (Backref == nullptr || strlen(Backref) < Len)
- return nullptr;
-
- Backref = parseLName(Demangled, Backref, Len);
- if (Backref == nullptr)
- return nullptr;
+ decodeNumber(Backref, Len);
+ if (Backref.empty() || Backref.length() < Len) {
+ Mangled = {};
+ return;
+ }
- return Mangled;
+ parseLName(Demangled, Backref, Len);
+ if (Backref.empty())
+ Mangled = {};
}
-const char *Demangler::parseTypeBackref(const char *Mangled) {
+void Demangler::parseTypeBackref(std::string_view &Mangled) {
// A type back reference always points to a letter.
// TypeBackRef:
// Q NumberBackRef
// ^
- const char *Backref;
// If we appear to be moving backwards through the mangle string, then
// bail as this may be a recursive back reference.
- if (Mangled - Str >= LastBackref)
- return nullptr;
+ if (Mangled.data() - Str.data() >= LastBackref) {
+ Mangled = {};
+ return;
+ }
int SaveRefPos = LastBackref;
- LastBackref = Mangled - Str;
+ LastBackref = Mangled.data() - Str.data();
// Get position of the back reference.
- Mangled = decodeBackref(Mangled, Backref);
+ std::string_view Backref;
+ if (!decodeBackref(Mangled, Backref)) {
+ Mangled = {};
+ return;
+ }
// Can't decode back reference.
- if (Backref == nullptr)
- return nullptr;
+ if (Backref.empty()) {
+ Mangled = {};
+ return;
+ }
// TODO: Add support for function type back references.
- Backref = parseType(Backref);
+ if (!parseType(Backref))
+ Mangled = {};
LastBackref = SaveRefPos;
- if (Backref == nullptr)
- return nullptr;
-
- return Mangled;
+ if (Backref.empty())
+ Mangled = {};
}
-bool Demangler::isSymbolName(const char *Mangled) {
+bool Demangler::isSymbolName(std::string_view Mangled) {
long Ret;
- const char *Qref = Mangled;
+ const char *Qref = Mangled.data();
- if (std::isdigit(*Mangled))
+ if (std::isdigit(Mangled.front()))
return true;
// TODO: Handle template instances.
- if (*Mangled != 'Q')
+ if (Mangled.front() != 'Q')
return false;
- Mangled = decodeBackrefPos(Mangled + 1, Ret);
- if (Mangled == nullptr || Ret > Qref - Str)
+ Mangled.remove_prefix(1);
+ bool Valid = decodeBackrefPos(Mangled, Ret);
+ if (!Valid || Ret > Qref - Str.data())
return false;
return std::isdigit(Qref[-Ret]);
}
-const char *Demangler::parseMangle(OutputBuffer *Demangled,
- const char *Mangled) {
+void Demangler::parseMangle(OutputBuffer *Demangled,
+ std::string_view &Mangled) {
// A D mangled symbol is comprised of both scope and type information.
// MangleName:
// _D QualifiedName Type
@@ -352,24 +367,24 @@ const char *Demangler::parseMangle(OutputBuffer *Demangled,
// above location.
// Note that type is never a function type, but only the return type of
// a function or the type of a variable.
- Mangled += 2;
+ Mangled.remove_prefix(2);
- Mangled = parseQualified(Demangled, Mangled);
+ parseQualified(Demangled, Mangled);
- if (Mangled != nullptr) {
- // Artificial symbols end with 'Z' and have no type.
- if (*Mangled == 'Z')
- ++Mangled;
- else {
- Mangled = parseType(Mangled);
- }
+ if (Mangled.empty()) {
+ Mangled = {};
+ return;
}
- return Mangled;
+ // Artificial symbols end with 'Z' and have no type.
+ if (Mangled.front() == 'Z') {
+ Mangled.remove_prefix(1);
+ } else if (!parseType(Mangled))
+ Mangled = {};
}
-const char *Demangler::parseQualified(OutputBuffer *Demangled,
- const char *Mangled) {
+void Demangler::parseQualified(OutputBuffer *Demangled,
+ std::string_view &Mangled) {
// Qualified names are identifiers separated by their encoded length.
// Nested functions also encode their argument types without specifying
// what they return.
@@ -388,10 +403,10 @@ const char *Demangler::parseQualified(OutputBuffer *Demangled,
size_t NotFirst = false;
do {
// Skip over anonymous symbols.
- if (*Mangled == '0') {
+ if (!Mangled.empty() && Mangled.front() == '0') {
do
- ++Mangled;
- while (*Mangled == '0');
+ Mangled.remove_prefix(1);
+ while (!Mangled.empty() && Mangled.front() == '0');
continue;
}
@@ -400,62 +415,63 @@ const char *Demangler::parseQualified(OutputBuffer *Demangled,
*Demangled << '.';
NotFirst = true;
- Mangled = parseIdentifier(Demangled, Mangled);
-
- } while (Mangled && isSymbolName(Mangled));
-
- return Mangled;
+ parseIdentifier(Demangled, Mangled);
+ } while (!Mangled.empty() && isSymbolName(Mangled));
}
-const char *Demangler::parseIdentifier(OutputBuffer *Demangled,
- const char *Mangled) {
- unsigned long Len;
-
- if (Mangled == nullptr || *Mangled == '\0')
- return nullptr;
+void Demangler::parseIdentifier(OutputBuffer *Demangled,
+ std::string_view &Mangled) {
+ if (Mangled.empty()) {
+ Mangled = {};
+ return;
+ }
- if (*Mangled == 'Q')
+ if (Mangled.front() == 'Q')
return parseSymbolBackref(Demangled, Mangled);
// TODO: Parse lengthless template instances.
- const char *Endptr = decodeNumber(Mangled, Len);
-
- if (Endptr == nullptr || Len == 0)
- return nullptr;
-
- if (strlen(Endptr) < Len)
- return nullptr;
+ unsigned long Len;
+ decodeNumber(Mangled, Len);
- Mangled = Endptr;
+ if (Mangled.empty()) {
+ Mangled = {};
+ return;
+ }
+ if (!Len || Mangled.length() < Len) {
+ Mangled = {};
+ return;
+ }
// TODO: Parse template instances with a length prefix.
// There can be multiple different declarations in the same function that
// have the same mangled name. To make the mangled names unique, a fake
// parent in the form `__Sddd' is added to the symbol.
- if (Len >= 4 && Mangled[0] == '_' && Mangled[1] == '_' && Mangled[2] == 'S') {
- const char *NumPtr = Mangled + 3;
- while (NumPtr < (Mangled + Len) && std::isdigit(*NumPtr))
- ++NumPtr;
-
- if (Mangled + Len == NumPtr) {
+ if (Len >= 4 && starts_with(Mangled, "__S")) {
+ const size_t SuffixLen = Mangled.length() - Len;
+ std::string_view P = Mangled.substr(3);
+ while (P.length() > SuffixLen && std::isdigit(P.front()))
+ P.remove_prefix(1);
+ if (P.length() == SuffixLen) {
// Skip over the fake parent.
- Mangled += Len;
+ Mangled.remove_prefix(Len);
return parseIdentifier(Demangled, Mangled);
}
// Else demangle it as a plain identifier.
}
- return parseLName(Demangled, Mangled, Len);
+ parseLName(Demangled, Mangled, Len);
}
-const char *Demangler::parseType(const char *Mangled) {
- if (*Mangled == '\0')
- return nullptr;
+bool Demangler::parseType(std::string_view &Mangled) {
+ if (Mangled.empty()) {
+ Mangled = {};
+ return false;
+ }
- switch (*Mangled) {
+ switch (Mangled.front()) {
// TODO: Parse type qualifiers.
// TODO: Parse function types.
// TODO: Parse compound types.
@@ -464,99 +480,102 @@ const char *Demangler::parseType(const char *Mangled) {
// Basic types.
case 'i':
- ++Mangled;
+ Mangled.remove_prefix(1);
// TODO: Add type name dumping
- return Mangled;
+ return true;
// TODO: Add support for the rest of the basic types.
// Back referenced type.
- case 'Q':
- return parseTypeBackref(Mangled);
+ case 'Q': {
+ parseTypeBackref(Mangled);
+ return true;
+ }
default: // unhandled.
- return nullptr;
+ Mangled = {};
+ return false;
}
}
-const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled,
- unsigned long Len) {
+void Demangler::parseLName(OutputBuffer *Demangled, std::string_view &Mangled,
+ unsigned long Len) {
switch (Len) {
case 6:
- if (strncmp(Mangled, "__initZ", Len + 1) == 0) {
+ if (starts_with(Mangled, "__initZ")) {
// The static initializer for a given symbol.
Demangled->prepend("initializer for ");
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
- Mangled += Len;
- return Mangled;
+ Mangled.remove_prefix(Len);
+ return;
}
- if (strncmp(Mangled, "__vtblZ", Len + 1) == 0) {
+ if (starts_with(Mangled, "__vtblZ")) {
// The vtable symbol for a given class.
Demangled->prepend("vtable for ");
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
- Mangled += Len;
- return Mangled;
+ Mangled.remove_prefix(Len);
+ return;
}
break;
case 7:
- if (strncmp(Mangled, "__ClassZ", Len + 1) == 0) {
+ if (starts_with(Mangled, "__ClassZ")) {
// The classinfo symbol for a given class.
Demangled->prepend("ClassInfo for ");
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
- Mangled += Len;
- return Mangled;
+ Mangled.remove_prefix(Len);
+ return;
}
break;
case 11:
- if (strncmp(Mangled, "__InterfaceZ", Len + 1) == 0) {
+ if (starts_with(Mangled, "__InterfaceZ")) {
// The interface symbol for a given class.
Demangled->prepend("Interface for ");
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
- Mangled += Len;
- return Mangled;
+ Mangled.remove_prefix(Len);
+ return;
}
break;
case 12:
- if (strncmp(Mangled, "__ModuleInfoZ", Len + 1) == 0) {
+ if (starts_with(Mangled, "__ModuleInfoZ")) {
// The ModuleInfo symbol for a given module.
Demangled->prepend("ModuleInfo for ");
Demangled->setCurrentPosition(Demangled->getCurrentPosition() - 1);
- Mangled += Len;
- return Mangled;
+ Mangled.remove_prefix(Len);
+ return;
}
break;
}
- *Demangled << StringView(Mangled, Len);
- Mangled += Len;
-
- return Mangled;
+ *Demangled << Mangled.substr(0, Len);
+ Mangled.remove_prefix(Len);
}
-Demangler::Demangler(const char *Mangled)
- : Str(Mangled), LastBackref(strlen(Mangled)) {}
+Demangler::Demangler(std::string_view Mangled)
+ : Str(Mangled), LastBackref(Mangled.length()) {}
const char *Demangler::parseMangle(OutputBuffer *Demangled) {
- return parseMangle(Demangled, this->Str);
+ std::string_view M(this->Str);
+ parseMangle(Demangled, M);
+ return M.data();
}
-char *llvm::dlangDemangle(const char *MangledName) {
- if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0)
+char *llvm::dlangDemangle(std::string_view MangledName) {
+ if (MangledName.empty() || !starts_with(MangledName, "_D"))
return nullptr;
OutputBuffer Demangled;
- if (strcmp(MangledName, "_Dmain") == 0) {
+ if (MangledName == "_Dmain") {
Demangled << "D main";
} else {
- Demangler D = Demangler(MangledName);
- MangledName = D.parseMangle(&Demangled);
+ Demangler D(MangledName);
+ const char *M = D.parseMangle(&Demangled);
// Check that the entire symbol was successfully demangled.
- if (MangledName == nullptr || *MangledName != '\0') {
+ if (M == nullptr || *M != '\0') {
std::free(Demangled.getBuffer());
return nullptr;
}
diff --git a/llvm/lib/Demangle/Demangle.cpp b/llvm/lib/Demangle/Demangle.cpp
index 9d128424cabf..f2aa571d685f 100644
--- a/llvm/lib/Demangle/Demangle.cpp
+++ b/llvm/lib/Demangle/Demangle.cpp
@@ -11,45 +11,45 @@
//===----------------------------------------------------------------------===//
#include "llvm/Demangle/Demangle.h"
+#include "llvm/Demangle/StringViewExtras.h"
#include <cstdlib>
-#include <cstring>
+#include <string_view>
-static bool isItaniumEncoding(const char *S) {
- // Itanium encoding requires 1 or 3 leading underscores, followed by 'Z'.
- return std::strncmp(S, "_Z", 2) == 0 || std::strncmp(S, "___Z", 4) == 0;
-}
-
-static bool isRustEncoding(const char *S) { return S[0] == '_' && S[1] == 'R'; }
-
-static bool isDLangEncoding(const std::string &MangledName) {
- return MangledName.size() >= 2 && MangledName[0] == '_' &&
- MangledName[1] == 'D';
-}
+using llvm::itanium_demangle::starts_with;
-std::string llvm::demangle(const std::string &MangledName) {
+std::string llvm::demangle(std::string_view MangledName) {
std::string Result;
- const char *S = MangledName.c_str();
- if (nonMicrosoftDemangle(S, Result))
+ if (nonMicrosoftDemangle(MangledName, Result))
return Result;
- if (S[0] == '_' && nonMicrosoftDemangle(S + 1, Result))
+ if (starts_with(MangledName, '_') &&
+ nonMicrosoftDemangle(MangledName.substr(1), Result))
return Result;
- if (char *Demangled =
- microsoftDemangle(S, nullptr, nullptr, nullptr, nullptr)) {
+ if (char *Demangled = microsoftDemangle(MangledName, nullptr, nullptr)) {
Result = Demangled;
std::free(Demangled);
- return Result;
+ } else {
+ Result = MangledName;
}
+ return Result;
+}
- return MangledName;
+static bool isItaniumEncoding(std::string_view S) {
+ // Itanium encoding requires 1 or 3 leading underscores, followed by 'Z'.
+ return starts_with(S, "_Z") || starts_with(S, "___Z");
}
-bool llvm::nonMicrosoftDemangle(const char *MangledName, std::string &Result) {
+static bool isRustEncoding(std::string_view S) { return starts_with(S, "_R"); }
+
+static bool isDLangEncoding(std::string_view S) { return starts_with(S, "_D"); }
+
+bool llvm::nonMicrosoftDemangle(std::string_view MangledName,
+ std::string &Result) {
char *Demangled = nullptr;
if (isItaniumEncoding(MangledName))
- Demangled = itaniumDemangle(MangledName, nullptr, nullptr, nullptr);
+ Demangled = itaniumDemangle(MangledName);
else if (isRustEncoding(MangledName))
Demangled = rustDemangle(MangledName);
else if (isDLangEncoding(MangledName))
diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp
index 9b646ea800aa..e3f208f0adf8 100644
--- a/llvm/lib/Demangle/ItaniumDemangle.cpp
+++ b/llvm/lib/Demangle/ItaniumDemangle.cpp
@@ -18,6 +18,7 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
+#include <exception>
#include <functional>
#include <utility>
@@ -78,8 +79,8 @@ struct DumpVisitor {
}
void printStr(const char *S) { fprintf(stderr, "%s", S); }
- void print(StringView SV) {
- fprintf(stderr, "\"%.*s\"", (int)SV.size(), SV.begin());
+ void print(std::string_view SV) {
+ fprintf(stderr, "\"%.*s\"", (int)SV.size(), SV.data());
}
void print(const Node *N) {
if (N)
@@ -365,33 +366,21 @@ public:
using Demangler = itanium_demangle::ManglingParser<DefaultAllocator>;
-char *llvm::itaniumDemangle(const char *MangledName, char *Buf,
- size_t *N, int *Status) {
- if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) {
- if (Status)
- *Status = demangle_invalid_args;
+char *llvm::itaniumDemangle(std::string_view MangledName) {
+ if (MangledName.empty())
return nullptr;
- }
- int InternalStatus = demangle_success;
- Demangler Parser(MangledName, MangledName + std::strlen(MangledName));
+ Demangler Parser(MangledName.data(),
+ MangledName.data() + MangledName.length());
Node *AST = Parser.parse();
+ if (!AST)
+ return nullptr;
- if (AST == nullptr)
- InternalStatus = demangle_invalid_mangled_name;
- else {
- OutputBuffer OB(Buf, N);
- assert(Parser.ForwardTemplateRefs.empty());
- AST->print(OB);
- OB += '\0';
- if (N != nullptr)
- *N = OB.getCurrentPosition();
- Buf = OB.getBuffer();
- }
-
- if (Status)
- *Status = InternalStatus;
- return InternalStatus == demangle_success ? Buf : nullptr;
+ OutputBuffer OB;
+ assert(Parser.ForwardTemplateRefs.empty());
+ AST->print(OB);
+ OB += '\0';
+ return OB.getBuffer();
}
ItaniumPartialDemangler::ItaniumPartialDemangler()
diff --git a/llvm/lib/Demangle/MicrosoftDemangle.cpp b/llvm/lib/Demangle/MicrosoftDemangle.cpp
index c21b0a30105e..cd7ff40d63a4 100644
--- a/llvm/lib/Demangle/MicrosoftDemangle.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangle.cpp
@@ -14,34 +14,50 @@
//===----------------------------------------------------------------------===//
#include "llvm/Demangle/MicrosoftDemangle.h"
-#include "llvm/Demangle/Demangle.h"
-#include "llvm/Demangle/MicrosoftDemangleNodes.h"
+#include "llvm/Demangle/Demangle.h"
#include "llvm/Demangle/DemangleConfig.h"
-#include "llvm/Demangle/StringView.h"
+#include "llvm/Demangle/MicrosoftDemangleNodes.h"
+#include "llvm/Demangle/StringViewExtras.h"
#include "llvm/Demangle/Utility.h"
#include <array>
#include <cctype>
#include <cstdio>
+#include <string_view>
#include <tuple>
using namespace llvm;
using namespace ms_demangle;
-static bool startsWithDigit(StringView S) {
+static bool startsWithDigit(std::string_view S) {
return !S.empty() && std::isdigit(S.front());
}
-
struct NodeList {
Node *N = nullptr;
NodeList *Next = nullptr;
};
-static bool isMemberPointer(StringView MangledName, bool &Error) {
+static bool consumeFront(std::string_view &S, char C) {
+ if (!llvm::itanium_demangle::starts_with(S, C))
+ return false;
+ S.remove_prefix(1);
+ return true;
+}
+
+static bool consumeFront(std::string_view &S, std::string_view C) {
+ if (!llvm::itanium_demangle::starts_with(S, C))
+ return false;
+ S.remove_prefix(C.size());
+ return true;
+}
+
+static bool isMemberPointer(std::string_view MangledName, bool &Error) {
Error = false;
- switch (MangledName.popFront()) {
+ const char F = MangledName.front();
+ MangledName.remove_prefix(1);
+ switch (F) {
case '$':
// This is probably an rvalue reference (e.g. $$Q), and you cannot have an
// rvalue reference to a member.
@@ -75,9 +91,9 @@ static bool isMemberPointer(StringView MangledName, bool &Error) {
// Remove ext qualifiers since those can appear on either type and are
// therefore not indicative.
- MangledName.consumeFront('E'); // 64-bit
- MangledName.consumeFront('I'); // restrict
- MangledName.consumeFront('F'); // unaligned
+ consumeFront(MangledName, 'E'); // 64-bit
+ consumeFront(MangledName, 'I'); // restrict
+ consumeFront(MangledName, 'F'); // unaligned
if (MangledName.empty()) {
Error = true;
@@ -103,50 +119,50 @@ static bool isMemberPointer(StringView MangledName, bool &Error) {
}
static SpecialIntrinsicKind
-consumeSpecialIntrinsicKind(StringView &MangledName) {
- if (MangledName.consumeFront("?_7"))
+consumeSpecialIntrinsicKind(std::string_view &MangledName) {
+ if (consumeFront(MangledName, "?_7"))
return SpecialIntrinsicKind::Vftable;
- if (MangledName.consumeFront("?_8"))
+ if (consumeFront(MangledName, "?_8"))
return SpecialIntrinsicKind::Vbtable;
- if (MangledName.consumeFront("?_9"))
+ if (consumeFront(MangledName, "?_9"))
return SpecialIntrinsicKind::VcallThunk;
- if (MangledName.consumeFront("?_A"))
+ if (consumeFront(MangledName, "?_A"))
return SpecialIntrinsicKind::Typeof;
- if (MangledName.consumeFront("?_B"))
+ if (consumeFront(MangledName, "?_B"))
return SpecialIntrinsicKind::LocalStaticGuard;
- if (MangledName.consumeFront("?_C"))
+ if (consumeFront(MangledName, "?_C"))
return SpecialIntrinsicKind::StringLiteralSymbol;
- if (MangledName.consumeFront("?_P"))
+ if (consumeFront(MangledName, "?_P"))
return SpecialIntrinsicKind::UdtReturning;
- if (MangledName.consumeFront("?_R0"))
+ if (consumeFront(MangledName, "?_R0"))
return SpecialIntrinsicKind::RttiTypeDescriptor;
- if (MangledName.consumeFront("?_R1"))
+ if (consumeFront(MangledName, "?_R1"))
return SpecialIntrinsicKind::RttiBaseClassDescriptor;
- if (MangledName.consumeFront("?_R2"))
+ if (consumeFront(MangledName, "?_R2"))
return SpecialIntrinsicKind::RttiBaseClassArray;
- if (MangledName.consumeFront("?_R3"))
+ if (consumeFront(MangledName, "?_R3"))
return SpecialIntrinsicKind::RttiClassHierarchyDescriptor;
- if (MangledName.consumeFront("?_R4"))
+ if (consumeFront(MangledName, "?_R4"))
return SpecialIntrinsicKind::RttiCompleteObjLocator;
- if (MangledName.consumeFront("?_S"))
+ if (consumeFront(MangledName, "?_S"))
return SpecialIntrinsicKind::LocalVftable;
- if (MangledName.consumeFront("?__E"))
+ if (consumeFront(MangledName, "?__E"))
return SpecialIntrinsicKind::DynamicInitializer;
- if (MangledName.consumeFront("?__F"))
+ if (consumeFront(MangledName, "?__F"))
return SpecialIntrinsicKind::DynamicAtexitDestructor;
- if (MangledName.consumeFront("?__J"))
+ if (consumeFront(MangledName, "?__J"))
return SpecialIntrinsicKind::LocalStaticThreadGuard;
return SpecialIntrinsicKind::None;
}
-static bool startsWithLocalScopePattern(StringView S) {
- if (!S.consumeFront('?'))
+static bool startsWithLocalScopePattern(std::string_view S) {
+ if (!consumeFront(S, '?'))
return false;
size_t End = S.find('?');
- if (End == StringView::npos)
+ if (End == std::string_view::npos)
return false;
- StringView Candidate = S.substr(0, End);
+ std::string_view Candidate = S.substr(0, End);
if (Candidate.empty())
return false;
@@ -158,7 +174,7 @@ static bool startsWithLocalScopePattern(StringView S) {
// If it's not 0-9, then it's an encoded number terminated with an @
if (Candidate.back() != '@')
return false;
- Candidate = Candidate.dropBack();
+ Candidate.remove_suffix(1);
// An encoded number starts with B-P and all subsequent digits are in A-P.
// Note that the reason the first digit cannot be A is two fold. First, it
@@ -168,17 +184,17 @@ static bool startsWithLocalScopePattern(StringView S) {
// ambiguity is also why single digit encoded numbers use 0-9 rather than A-J.
if (Candidate[0] < 'B' || Candidate[0] > 'P')
return false;
- Candidate = Candidate.dropFront();
+ Candidate.remove_prefix(1);
while (!Candidate.empty()) {
if (Candidate[0] < 'A' || Candidate[0] > 'P')
return false;
- Candidate = Candidate.dropFront();
+ Candidate.remove_prefix(1);
}
return true;
}
-static bool isTagType(StringView S) {
+static bool isTagType(std::string_view S) {
switch (S.front()) {
case 'T': // union
case 'U': // struct
@@ -189,10 +205,10 @@ static bool isTagType(StringView S) {
return false;
}
-static bool isCustomType(StringView S) { return S[0] == '?'; }
+static bool isCustomType(std::string_view S) { return S[0] == '?'; }
-static bool isPointerType(StringView S) {
- if (S.startsWith("$$Q")) // foo &&
+static bool isPointerType(std::string_view S) {
+ if (llvm::itanium_demangle::starts_with(S, "$$Q")) // foo &&
return true;
switch (S.front()) {
@@ -206,27 +222,30 @@ static bool isPointerType(StringView S) {
return false;
}
-static bool isArrayType(StringView S) { return S[0] == 'Y'; }
+static bool isArrayType(std::string_view S) { return S[0] == 'Y'; }
-static bool isFunctionType(StringView S) {
- return S.startsWith("$$A8@@") || S.startsWith("$$A6");
+static bool isFunctionType(std::string_view S) {
+ return llvm::itanium_demangle::starts_with(S, "$$A8@@") ||
+ llvm::itanium_demangle::starts_with(S, "$$A6");
}
static FunctionRefQualifier
-demangleFunctionRefQualifier(StringView &MangledName) {
- if (MangledName.consumeFront('G'))
+demangleFunctionRefQualifier(std::string_view &MangledName) {
+ if (consumeFront(MangledName, 'G'))
return FunctionRefQualifier::Reference;
- else if (MangledName.consumeFront('H'))
+ else if (consumeFront(MangledName, 'H'))
return FunctionRefQualifier::RValueReference;
return FunctionRefQualifier::None;
}
static std::pair<Qualifiers, PointerAffinity>
-demanglePointerCVQualifiers(StringView &MangledName) {
- if (MangledName.consumeFront("$$Q"))
+demanglePointerCVQualifiers(std::string_view &MangledName) {
+ if (consumeFront(MangledName, "$$Q"))
return std::make_pair(Q_None, PointerAffinity::RValueReference);
- switch (MangledName.popFront()) {
+ const char F = MangledName.front();
+ MangledName.remove_prefix(1);
+ switch (F) {
case 'A':
return std::make_pair(Q_None, PointerAffinity::Reference);
case 'P':
@@ -244,18 +263,18 @@ demanglePointerCVQualifiers(StringView &MangledName) {
DEMANGLE_UNREACHABLE;
}
-StringView Demangler::copyString(StringView Borrowed) {
+std::string_view Demangler::copyString(std::string_view Borrowed) {
char *Stable = Arena.allocUnalignedBuffer(Borrowed.size());
// This is not a micro-optimization, it avoids UB, should Borrowed be an null
// buffer.
if (Borrowed.size())
- std::memcpy(Stable, Borrowed.begin(), Borrowed.size());
+ std::memcpy(Stable, Borrowed.data(), Borrowed.size());
return {Stable, Borrowed.size()};
}
SpecialTableSymbolNode *
-Demangler::demangleSpecialTableSymbolNode(StringView &MangledName,
+Demangler::demangleSpecialTableSymbolNode(std::string_view &MangledName,
SpecialIntrinsicKind K) {
NamedIdentifierNode *NI = Arena.alloc<NamedIdentifierNode>();
switch (K) {
@@ -282,20 +301,22 @@ Demangler::demangleSpecialTableSymbolNode(StringView &MangledName,
Error = true;
return nullptr;
}
- char Front = MangledName.popFront();
+ char Front = MangledName.front();
+ MangledName.remove_prefix(1);
if (Front != '6' && Front != '7') {
Error = true;
return nullptr;
}
std::tie(STSN->Quals, IsMember) = demangleQualifiers(MangledName);
- if (!MangledName.consumeFront('@'))
+ if (!consumeFront(MangledName, '@'))
STSN->TargetName = demangleFullyQualifiedTypeName(MangledName);
return STSN;
}
LocalStaticGuardVariableNode *
-Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) {
+Demangler::demangleLocalStaticGuard(std::string_view &MangledName,
+ bool IsThread) {
LocalStaticGuardIdentifierNode *LSGI =
Arena.alloc<LocalStaticGuardIdentifierNode>();
LSGI->IsThread = IsThread;
@@ -304,9 +325,9 @@ Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) {
Arena.alloc<LocalStaticGuardVariableNode>();
LSGVN->Name = QN;
- if (MangledName.consumeFront("4IA"))
+ if (consumeFront(MangledName, "4IA"))
LSGVN->IsVisible = false;
- else if (MangledName.consumeFront("5"))
+ else if (consumeFront(MangledName, "5"))
LSGVN->IsVisible = true;
else {
Error = true;
@@ -319,7 +340,7 @@ Demangler::demangleLocalStaticGuard(StringView &MangledName, bool IsThread) {
}
static NamedIdentifierNode *synthesizeNamedIdentifier(ArenaAllocator &Arena,
- StringView Name) {
+ std::string_view Name) {
NamedIdentifierNode *Id = Arena.alloc<NamedIdentifierNode>();
Id->Name = Name;
return Id;
@@ -336,27 +357,29 @@ static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
}
static QualifiedNameNode *synthesizeQualifiedName(ArenaAllocator &Arena,
- StringView Name) {
+ std::string_view Name) {
NamedIdentifierNode *Id = synthesizeNamedIdentifier(Arena, Name);
return synthesizeQualifiedName(Arena, Id);
}
static VariableSymbolNode *synthesizeVariable(ArenaAllocator &Arena,
TypeNode *Type,
- StringView VariableName) {
+ std::string_view VariableName) {
VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
VSN->Type = Type;
VSN->Name = synthesizeQualifiedName(Arena, VariableName);
return VSN;
}
-VariableSymbolNode *Demangler::demangleUntypedVariable(
- ArenaAllocator &Arena, StringView &MangledName, StringView VariableName) {
+VariableSymbolNode *
+Demangler::demangleUntypedVariable(ArenaAllocator &Arena,
+ std::string_view &MangledName,
+ std::string_view VariableName) {
NamedIdentifierNode *NI = synthesizeNamedIdentifier(Arena, VariableName);
QualifiedNameNode *QN = demangleNameScopeChain(MangledName, NI);
VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
VSN->Name = QN;
- if (MangledName.consumeFront("8"))
+ if (consumeFront(MangledName, "8"))
return VSN;
Error = true;
@@ -365,7 +388,7 @@ VariableSymbolNode *Demangler::demangleUntypedVariable(
VariableSymbolNode *
Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
- StringView &MangledName) {
+ std::string_view &MangledName) {
RttiBaseClassDescriptorNode *RBCDN =
Arena.alloc<RttiBaseClassDescriptorNode>();
RBCDN->NVOffset = demangleUnsigned(MangledName);
@@ -377,18 +400,19 @@ Demangler::demangleRttiBaseClassDescriptorNode(ArenaAllocator &Arena,
VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
VSN->Name = demangleNameScopeChain(MangledName, RBCDN);
- MangledName.consumeFront('8');
+ consumeFront(MangledName, '8');
return VSN;
}
-FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
- bool IsDestructor) {
+FunctionSymbolNode *
+Demangler::demangleInitFiniStub(std::string_view &MangledName,
+ bool IsDestructor) {
DynamicStructorIdentifierNode *DSIN =
Arena.alloc<DynamicStructorIdentifierNode>();
DSIN->IsDestructor = IsDestructor;
bool IsKnownStaticDataMember = false;
- if (MangledName.consumeFront('?'))
+ if (consumeFront(MangledName, '?'))
IsKnownStaticDataMember = true;
SymbolNode *Symbol = demangleDeclarator(MangledName);
@@ -406,7 +430,7 @@ FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
// both cases.
int AtCount = IsKnownStaticDataMember ? 2 : 1;
for (int I = 0; I < AtCount; ++I) {
- if (MangledName.consumeFront('@'))
+ if (consumeFront(MangledName, '@'))
continue;
Error = true;
return nullptr;
@@ -430,7 +454,7 @@ FunctionSymbolNode *Demangler::demangleInitFiniStub(StringView &MangledName,
return FSN;
}
-SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
+SymbolNode *Demangler::demangleSpecialIntrinsic(std::string_view &MangledName) {
SpecialIntrinsicKind SIK = consumeSpecialIntrinsicKind(MangledName);
switch (SIK) {
@@ -453,7 +477,7 @@ SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
if (Error)
break;
- if (!MangledName.consumeFront("@8"))
+ if (!consumeFront(MangledName, "@8"))
break;
if (!MangledName.empty())
break;
@@ -484,18 +508,18 @@ SymbolNode *Demangler::demangleSpecialIntrinsic(StringView &MangledName) {
}
IdentifierNode *
-Demangler::demangleFunctionIdentifierCode(StringView &MangledName) {
- assert(MangledName.startsWith('?'));
- MangledName = MangledName.dropFront();
+Demangler::demangleFunctionIdentifierCode(std::string_view &MangledName) {
+ assert(llvm::itanium_demangle::starts_with(MangledName, '?'));
+ MangledName.remove_prefix(1);
if (MangledName.empty()) {
Error = true;
return nullptr;
}
- if (MangledName.consumeFront("__"))
+ if (consumeFront(MangledName, "__"))
return demangleFunctionIdentifierCode(
MangledName, FunctionIdentifierCodeGroup::DoubleUnder);
- if (MangledName.consumeFront("_"))
+ if (consumeFront(MangledName, "_"))
return demangleFunctionIdentifierCode(MangledName,
FunctionIdentifierCodeGroup::Under);
return demangleFunctionIdentifierCode(MangledName,
@@ -503,7 +527,7 @@ Demangler::demangleFunctionIdentifierCode(StringView &MangledName) {
}
StructorIdentifierNode *
-Demangler::demangleStructorIdentifier(StringView &MangledName,
+Demangler::demangleStructorIdentifier(std::string_view &MangledName,
bool IsDestructor) {
StructorIdentifierNode *N = Arena.alloc<StructorIdentifierNode>();
N->IsDestructor = IsDestructor;
@@ -511,14 +535,14 @@ Demangler::demangleStructorIdentifier(StringView &MangledName,
}
ConversionOperatorIdentifierNode *
-Demangler::demangleConversionOperatorIdentifier(StringView &MangledName) {
+Demangler::demangleConversionOperatorIdentifier(std::string_view &MangledName) {
ConversionOperatorIdentifierNode *N =
Arena.alloc<ConversionOperatorIdentifierNode>();
return N;
}
LiteralOperatorIdentifierNode *
-Demangler::demangleLiteralOperatorIdentifier(StringView &MangledName) {
+Demangler::demangleLiteralOperatorIdentifier(std::string_view &MangledName) {
LiteralOperatorIdentifierNode *N =
Arena.alloc<LiteralOperatorIdentifierNode>();
N->Name = demangleSimpleString(MangledName, /*Memorize=*/false);
@@ -666,15 +690,17 @@ Demangler::translateIntrinsicFunctionCode(char CH,
}
IdentifierNode *
-Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
+Demangler::demangleFunctionIdentifierCode(std::string_view &MangledName,
FunctionIdentifierCodeGroup Group) {
if (MangledName.empty()) {
Error = true;
return nullptr;
}
+ const char CH = MangledName.front();
switch (Group) {
case FunctionIdentifierCodeGroup::Basic:
- switch (char CH = MangledName.popFront()) {
+ MangledName.remove_prefix(1);
+ switch (CH) {
case '0':
case '1':
return demangleStructorIdentifier(MangledName, CH == '1');
@@ -685,10 +711,12 @@ Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
translateIntrinsicFunctionCode(CH, Group));
}
case FunctionIdentifierCodeGroup::Under:
+ MangledName.remove_prefix(1);
return Arena.alloc<IntrinsicFunctionIdentifierNode>(
- translateIntrinsicFunctionCode(MangledName.popFront(), Group));
+ translateIntrinsicFunctionCode(CH, Group));
case FunctionIdentifierCodeGroup::DoubleUnder:
- switch (char CH = MangledName.popFront()) {
+ MangledName.remove_prefix(1);
+ switch (CH) {
case 'K':
return demangleLiteralOperatorIdentifier(MangledName);
default:
@@ -700,7 +728,7 @@ Demangler::demangleFunctionIdentifierCode(StringView &MangledName,
DEMANGLE_UNREACHABLE;
}
-SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
+SymbolNode *Demangler::demangleEncodedSymbol(std::string_view &MangledName,
QualifiedNameNode *Name) {
if (MangledName.empty()) {
Error = true;
@@ -730,7 +758,7 @@ SymbolNode *Demangler::demangleEncodedSymbol(StringView &MangledName,
return FSN;
}
-SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) {
+SymbolNode *Demangler::demangleDeclarator(std::string_view &MangledName) {
// What follows is a main symbol name. This may include namespaces or class
// back references.
QualifiedNameNode *QN = demangleFullyQualifiedSymbolName(MangledName);
@@ -754,18 +782,19 @@ SymbolNode *Demangler::demangleDeclarator(StringView &MangledName) {
return Symbol;
}
-SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) {
- assert(MangledName.startsWith("??@"));
+SymbolNode *Demangler::demangleMD5Name(std::string_view &MangledName) {
+ assert(llvm::itanium_demangle::starts_with(MangledName, "??@"));
// This is an MD5 mangled name. We can't demangle it, just return the
// mangled name.
// An MD5 mangled name is ??@ followed by 32 characters and a terminating @.
size_t MD5Last = MangledName.find('@', strlen("??@"));
- if (MD5Last == StringView::npos) {
+ if (MD5Last == std::string_view::npos) {
Error = true;
return nullptr;
}
- const char *Start = MangledName.begin();
- MangledName = MangledName.dropFront(MD5Last + 1);
+ const char *Start = MangledName.data();
+ const size_t StartSize = MangledName.size();
+ MangledName.remove_prefix(MD5Last + 1);
// There are two additional special cases for MD5 names:
// 1. For complete object locators where the object name is long enough
@@ -777,18 +806,20 @@ SymbolNode *Demangler::demangleMD5Name(StringView &MangledName) {
// instead of_CT??@...@8 with just one MD5 name. Since we don't yet
// demangle catchable types anywhere, this isn't handled for MD5 names
// either.
- MangledName.consumeFront("??_R4@");
+ consumeFront(MangledName, "??_R4@");
- StringView MD5(Start, MangledName.begin());
+ assert(MangledName.size() < StartSize);
+ const size_t Count = StartSize - MangledName.size();
+ std::string_view MD5(Start, Count);
SymbolNode *S = Arena.alloc<SymbolNode>(NodeKind::Md5Symbol);
S->Name = synthesizeQualifiedName(Arena, MD5);
return S;
}
-SymbolNode *Demangler::demangleTypeinfoName(StringView &MangledName) {
- assert(MangledName.startsWith('.'));
- MangledName.consumeFront('.');
+SymbolNode *Demangler::demangleTypeinfoName(std::string_view &MangledName) {
+ assert(llvm::itanium_demangle::starts_with(MangledName, '.'));
+ consumeFront(MangledName, '.');
TypeNode *T = demangleType(MangledName, QualifierMangleMode::Result);
if (Error || !MangledName.empty()) {
@@ -799,23 +830,23 @@ SymbolNode *Demangler::demangleTypeinfoName(StringView &MangledName) {
}
// Parser entry point.
-SymbolNode *Demangler::parse(StringView &MangledName) {
+SymbolNode *Demangler::parse(std::string_view &MangledName) {
// Typeinfo names are strings stored in RTTI data. They're not symbol names.
// It's still useful to demangle them. They're the only demangled entity
// that doesn't start with a "?" but a ".".
- if (MangledName.startsWith('.'))
+ if (llvm::itanium_demangle::starts_with(MangledName, '.'))
return demangleTypeinfoName(MangledName);
- if (MangledName.startsWith("??@"))
+ if (llvm::itanium_demangle::starts_with(MangledName, "??@"))
return demangleMD5Name(MangledName);
// MSVC-style mangled symbols must start with '?'.
- if (!MangledName.startsWith('?')) {
+ if (!llvm::itanium_demangle::starts_with(MangledName, '?')) {
Error = true;
return nullptr;
}
- MangledName.consumeFront('?');
+ consumeFront(MangledName, '?');
// ?$ is a template instantiation, but all other names that start with ? are
// operators / special names.
@@ -825,12 +856,12 @@ SymbolNode *Demangler::parse(StringView &MangledName) {
return demangleDeclarator(MangledName);
}
-TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) {
- if (!MangledName.consumeFront(".?A")) {
+TagTypeNode *Demangler::parseTagUniqueName(std::string_view &MangledName) {
+ if (!consumeFront(MangledName, ".?A")) {
Error = true;
return nullptr;
}
- MangledName.consumeFront(".?A");
+ consumeFront(MangledName, ".?A");
if (MangledName.empty()) {
Error = true;
return nullptr;
@@ -846,8 +877,9 @@ TagTypeNode *Demangler::parseTagUniqueName(StringView &MangledName) {
// ::= 3 # global
// ::= 4 # static local
-VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName,
- StorageClass SC) {
+VariableSymbolNode *
+Demangler::demangleVariableEncoding(std::string_view &MangledName,
+ StorageClass SC) {
VariableSymbolNode *VSN = Arena.alloc<VariableSymbolNode>();
VSN->Type = demangleType(MangledName, QualifierMangleMode::Drop);
@@ -897,12 +929,13 @@ VariableSymbolNode *Demangler::demangleVariableEncoding(StringView &MangledName,
// ::= <hex digit>+ @ # when Number == 0 or >= 10
//
// <hex-digit> ::= [A-P] # A = 0, B = 1, ...
-std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) {
- bool IsNegative = MangledName.consumeFront('?');
+std::pair<uint64_t, bool>
+Demangler::demangleNumber(std::string_view &MangledName) {
+ bool IsNegative = consumeFront(MangledName, '?');
if (startsWithDigit(MangledName)) {
uint64_t Ret = MangledName[0] - '0' + 1;
- MangledName = MangledName.dropFront(1);
+ MangledName.remove_prefix(1);
return {Ret, IsNegative};
}
@@ -910,7 +943,7 @@ std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) {
for (size_t i = 0; i < MangledName.size(); ++i) {
char C = MangledName[i];
if (C == '@') {
- MangledName = MangledName.dropFront(i + 1);
+ MangledName.remove_prefix(i + 1);
return {Ret, IsNegative};
}
if ('A' <= C && C <= 'P') {
@@ -924,7 +957,7 @@ std::pair<uint64_t, bool> Demangler::demangleNumber(StringView &MangledName) {
return {0ULL, false};
}
-uint64_t Demangler::demangleUnsigned(StringView &MangledName) {
+uint64_t Demangler::demangleUnsigned(std::string_view &MangledName) {
bool IsNegative = false;
uint64_t Number = 0;
std::tie(Number, IsNegative) = demangleNumber(MangledName);
@@ -933,7 +966,7 @@ uint64_t Demangler::demangleUnsigned(StringView &MangledName) {
return Number;
}
-int64_t Demangler::demangleSigned(StringView &MangledName) {
+int64_t Demangler::demangleSigned(std::string_view &MangledName) {
bool IsNegative = false;
uint64_t Number = 0;
std::tie(Number, IsNegative) = demangleNumber(MangledName);
@@ -945,7 +978,7 @@ int64_t Demangler::demangleSigned(StringView &MangledName) {
// First 10 strings can be referenced by special BackReferences ?0, ?1, ..., ?9.
// Memorize it.
-void Demangler::memorizeString(StringView S) {
+void Demangler::memorizeString(std::string_view S) {
if (Backrefs.NamesCount >= BackrefContext::Max)
return;
for (size_t i = 0; i < Backrefs.NamesCount; ++i)
@@ -956,7 +989,8 @@ void Demangler::memorizeString(StringView S) {
Backrefs.Names[Backrefs.NamesCount++] = N;
}
-NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) {
+NamedIdentifierNode *
+Demangler::demangleBackRefName(std::string_view &MangledName) {
assert(startsWithDigit(MangledName));
size_t I = MangledName[0] - '0';
@@ -965,7 +999,7 @@ NamedIdentifierNode *Demangler::demangleBackRefName(StringView &MangledName) {
return nullptr;
}
- MangledName = MangledName.dropFront();
+ MangledName.remove_prefix(1);
return Backrefs.Names[I];
}
@@ -974,16 +1008,16 @@ void Demangler::memorizeIdentifier(IdentifierNode *Identifier) {
// memorize it for the purpose of back-referencing.
OutputBuffer OB;
Identifier->output(OB, OF_Default);
- StringView Owned = copyString(OB);
+ std::string_view Owned = copyString(OB);
memorizeString(Owned);
std::free(OB.getBuffer());
}
IdentifierNode *
-Demangler::demangleTemplateInstantiationName(StringView &MangledName,
+Demangler::demangleTemplateInstantiationName(std::string_view &MangledName,
NameBackrefBehavior NBB) {
- assert(MangledName.startsWith("?$"));
- MangledName.consumeFront("?$");
+ assert(llvm::itanium_demangle::starts_with(MangledName, "?$"));
+ consumeFront(MangledName, "?$");
BackrefContext OuterContext;
std::swap(OuterContext, Backrefs);
@@ -1013,9 +1047,9 @@ Demangler::demangleTemplateInstantiationName(StringView &MangledName,
return Identifier;
}
-NamedIdentifierNode *Demangler::demangleSimpleName(StringView &MangledName,
- bool Memorize) {
- StringView S = demangleSimpleString(MangledName, Memorize);
+NamedIdentifierNode *
+Demangler::demangleSimpleName(std::string_view &MangledName, bool Memorize) {
+ std::string_view S = demangleSimpleString(MangledName, Memorize);
if (Error)
return nullptr;
@@ -1031,33 +1065,36 @@ static uint8_t rebasedHexDigitToNumber(char C) {
return (C <= 'J') ? (C - 'A') : (10 + C - 'K');
}
-uint8_t Demangler::demangleCharLiteral(StringView &MangledName) {
+uint8_t Demangler::demangleCharLiteral(std::string_view &MangledName) {
assert(!MangledName.empty());
- if (!MangledName.startsWith('?'))
- return MangledName.popFront();
+ if (!llvm::itanium_demangle::starts_with(MangledName, '?')) {
+ const uint8_t F = MangledName.front();
+ MangledName.remove_prefix(1);
+ return F;
+ }
- MangledName = MangledName.dropFront();
+ MangledName.remove_prefix(1);
if (MangledName.empty())
goto CharLiteralError;
- if (MangledName.consumeFront('$')) {
+ if (consumeFront(MangledName, '$')) {
// Two hex digits
if (MangledName.size() < 2)
goto CharLiteralError;
- StringView Nibbles = MangledName.substr(0, 2);
+ std::string_view Nibbles = MangledName.substr(0, 2);
if (!isRebasedHexDigit(Nibbles[0]) || !isRebasedHexDigit(Nibbles[1]))
goto CharLiteralError;
// Don't append the null terminator.
uint8_t C1 = rebasedHexDigitToNumber(Nibbles[0]);
uint8_t C2 = rebasedHexDigitToNumber(Nibbles[1]);
- MangledName = MangledName.dropFront(2);
+ MangledName.remove_prefix(2);
return (C1 << 4) | C2;
}
if (startsWithDigit(MangledName)) {
const char *Lookup = ",/\\:. \n\t'-";
char C = Lookup[MangledName[0] - '0'];
- MangledName = MangledName.dropFront();
+ MangledName.remove_prefix(1);
return C;
}
@@ -1067,7 +1104,7 @@ uint8_t Demangler::demangleCharLiteral(StringView &MangledName) {
'\xEF', '\xF0', '\xF1', '\xF2', '\xF3', '\xF4', '\xF5',
'\xF6', '\xF7', '\xF8', '\xF9', '\xFA'};
char C = Lookup[MangledName[0] - 'a'];
- MangledName = MangledName.dropFront();
+ MangledName.remove_prefix(1);
return C;
}
@@ -1077,7 +1114,7 @@ uint8_t Demangler::demangleCharLiteral(StringView &MangledName) {
'\xCF', '\xD0', '\xD1', '\xD2', '\xD3', '\xD4', '\xD5',
'\xD6', '\xD7', '\xD8', '\xD9', '\xDA'};
char C = Lookup[MangledName[0] - 'A'];
- MangledName = MangledName.dropFront();
+ MangledName.remove_prefix(1);
return C;
}
@@ -1086,7 +1123,7 @@ CharLiteralError:
return '\0';
}
-wchar_t Demangler::demangleWcharLiteral(StringView &MangledName) {
+wchar_t Demangler::demangleWcharLiteral(std::string_view &MangledName) {
uint8_t C1, C2;
C1 = demangleCharLiteral(MangledName);
@@ -1131,7 +1168,7 @@ static void outputHex(OutputBuffer &OB, unsigned C) {
TempBuffer[Pos--] = 'x';
assert(Pos >= 0);
TempBuffer[Pos--] = '\\';
- OB << StringView(&TempBuffer[Pos + 1]);
+ OB << std::string_view(&TempBuffer[Pos + 1]);
}
static void outputEscapedChar(OutputBuffer &OB, unsigned C) {
@@ -1253,7 +1290,8 @@ static unsigned decodeMultiByteChar(const uint8_t *StringBytes,
return Result;
}
-FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) {
+FunctionSymbolNode *
+Demangler::demangleVcallThunkNode(std::string_view &MangledName) {
FunctionSymbolNode *FSN = Arena.alloc<FunctionSymbolNode>();
VcallThunkIdentifierNode *VTIN = Arena.alloc<VcallThunkIdentifierNode>();
FSN->Signature = Arena.alloc<ThunkSignatureNode>();
@@ -1261,36 +1299,39 @@ FunctionSymbolNode *Demangler::demangleVcallThunkNode(StringView &MangledName) {
FSN->Name = demangleNameScopeChain(MangledName, VTIN);
if (!Error)
- Error = !MangledName.consumeFront("$B");
+ Error = !consumeFront(MangledName, "$B");
if (!Error)
VTIN->OffsetInVTable = demangleUnsigned(MangledName);
if (!Error)
- Error = !MangledName.consumeFront('A');
+ Error = !consumeFront(MangledName, 'A');
if (!Error)
FSN->Signature->CallConvention = demangleCallingConvention(MangledName);
return (Error) ? nullptr : FSN;
}
EncodedStringLiteralNode *
-Demangler::demangleStringLiteral(StringView &MangledName) {
+Demangler::demangleStringLiteral(std::string_view &MangledName) {
// This function uses goto, so declare all variables up front.
OutputBuffer OB;
- StringView CRC;
+ std::string_view CRC;
uint64_t StringByteSize;
bool IsWcharT = false;
bool IsNegative = false;
size_t CrcEndPos = 0;
+ char F;
EncodedStringLiteralNode *Result = Arena.alloc<EncodedStringLiteralNode>();
// Prefix indicating the beginning of a string literal
- if (!MangledName.consumeFront("@_"))
+ if (!consumeFront(MangledName, "@_"))
goto StringLiteralError;
if (MangledName.empty())
goto StringLiteralError;
// Char Type (regular or wchar_t)
- switch (MangledName.popFront()) {
+ F = MangledName.front();
+ MangledName.remove_prefix(1);
+ switch (F) {
case '1':
IsWcharT = true;
DEMANGLE_FALLTHROUGH;
@@ -1307,10 +1348,10 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
// CRC 32 (always 8 characters plus a terminator)
CrcEndPos = MangledName.find('@');
- if (CrcEndPos == StringView::npos)
+ if (CrcEndPos == std::string_view::npos)
goto StringLiteralError;
CRC = MangledName.substr(0, CrcEndPos);
- MangledName = MangledName.dropFront(CrcEndPos + 1);
+ MangledName.remove_prefix(CrcEndPos + 1);
if (MangledName.empty())
goto StringLiteralError;
@@ -1319,7 +1360,7 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
if (StringByteSize > 64)
Result->IsTruncated = true;
- while (!MangledName.consumeFront('@')) {
+ while (!consumeFront(MangledName, '@')) {
if (MangledName.size() < 2)
goto StringLiteralError;
wchar_t W = demangleWcharLiteral(MangledName);
@@ -1336,7 +1377,7 @@ Demangler::demangleStringLiteral(StringView &MangledName) {
uint8_t StringBytes[MaxStringByteLength];
unsigned BytesDecoded = 0;
- while (!MangledName.consumeFront('@')) {
+ while (!consumeFront(MangledName, '@')) {
if (MangledName.size() < 1 || BytesDecoded >= MaxStringByteLength)
goto StringLiteralError;
StringBytes[BytesDecoded++] = demangleCharLiteral(MangledName);
@@ -1382,16 +1423,16 @@ StringLiteralError:
// Returns MangledName's prefix before the first '@', or an error if
// MangledName contains no '@' or the prefix has length 0.
-StringView Demangler::demangleSimpleString(StringView &MangledName,
- bool Memorize) {
- StringView S;
+std::string_view Demangler::demangleSimpleString(std::string_view &MangledName,
+ bool Memorize) {
+ std::string_view S;
for (size_t i = 0; i < MangledName.size(); ++i) {
if (MangledName[i] != '@')
continue;
if (i == 0)
break;
S = MangledName.substr(0, i);
- MangledName = MangledName.dropFront(i + 1);
+ MangledName.remove_prefix(i + 1);
if (Memorize)
memorizeString(S);
@@ -1403,36 +1444,36 @@ StringView Demangler::demangleSimpleString(StringView &MangledName,
}
NamedIdentifierNode *
-Demangler::demangleAnonymousNamespaceName(StringView &MangledName) {
- assert(MangledName.startsWith("?A"));
- MangledName.consumeFront("?A");
+Demangler::demangleAnonymousNamespaceName(std::string_view &MangledName) {
+ assert(llvm::itanium_demangle::starts_with(MangledName, "?A"));
+ consumeFront(MangledName, "?A");
NamedIdentifierNode *Node = Arena.alloc<NamedIdentifierNode>();
Node->Name = "`anonymous namespace'";
size_t EndPos = MangledName.find('@');
- if (EndPos == StringView::npos) {
+ if (EndPos == std::string_view::npos) {
Error = true;
return nullptr;
}
- StringView NamespaceKey = MangledName.substr(0, EndPos);
+ std::string_view NamespaceKey = MangledName.substr(0, EndPos);
memorizeString(NamespaceKey);
MangledName = MangledName.substr(EndPos + 1);
return Node;
}
NamedIdentifierNode *
-Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
+Demangler::demangleLocallyScopedNamePiece(std::string_view &MangledName) {
assert(startsWithLocalScopePattern(MangledName));
NamedIdentifierNode *Identifier = Arena.alloc<NamedIdentifierNode>();
- MangledName.consumeFront('?');
+ consumeFront(MangledName, '?');
uint64_t Number = 0;
bool IsNegative = false;
std::tie(Number, IsNegative) = demangleNumber(MangledName);
assert(!IsNegative);
// One ? to terminate the number
- MangledName.consumeFront('?');
+ consumeFront(MangledName, '?');
assert(!Error);
Node *Scope = parse(MangledName);
@@ -1453,7 +1494,7 @@ Demangler::demangleLocallyScopedNamePiece(StringView &MangledName) {
// Parses a type name in the form of A@B@C@@ which represents C::B::A.
QualifiedNameNode *
-Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
+Demangler::demangleFullyQualifiedTypeName(std::string_view &MangledName) {
IdentifierNode *Identifier =
demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
if (Error)
@@ -1471,7 +1512,7 @@ Demangler::demangleFullyQualifiedTypeName(StringView &MangledName) {
// Symbol names have slightly different rules regarding what can appear
// so we separate out the implementations for flexibility.
QualifiedNameNode *
-Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
+Demangler::demangleFullyQualifiedSymbolName(std::string_view &MangledName) {
// This is the final component of a symbol name (i.e. the leftmost component
// of a mangled name. Since the only possible template instantiation that
// can appear in this context is a function template, and since those are
@@ -1500,8 +1541,9 @@ Demangler::demangleFullyQualifiedSymbolName(StringView &MangledName) {
return QN;
}
-IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName,
- bool Memorize) {
+IdentifierNode *
+Demangler::demangleUnqualifiedTypeName(std::string_view &MangledName,
+ bool Memorize) {
// An inner-most name can be a back-reference, because a fully-qualified name
// (e.g. Scope + Inner) can contain other fully qualified names inside of
// them (for example template parameters), and these nested parameters can
@@ -1509,32 +1551,33 @@ IdentifierNode *Demangler::demangleUnqualifiedTypeName(StringView &MangledName,
if (startsWithDigit(MangledName))
return demangleBackRefName(MangledName);
- if (MangledName.startsWith("?$"))
+ if (llvm::itanium_demangle::starts_with(MangledName, "?$"))
return demangleTemplateInstantiationName(MangledName, NBB_Template);
return demangleSimpleName(MangledName, Memorize);
}
IdentifierNode *
-Demangler::demangleUnqualifiedSymbolName(StringView &MangledName,
+Demangler::demangleUnqualifiedSymbolName(std::string_view &MangledName,
NameBackrefBehavior NBB) {
if (startsWithDigit(MangledName))
return demangleBackRefName(MangledName);
- if (MangledName.startsWith("?$"))
+ if (llvm::itanium_demangle::starts_with(MangledName, "?$"))
return demangleTemplateInstantiationName(MangledName, NBB);
- if (MangledName.startsWith('?'))
+ if (llvm::itanium_demangle::starts_with(MangledName, '?'))
return demangleFunctionIdentifierCode(MangledName);
return demangleSimpleName(MangledName, /*Memorize=*/(NBB & NBB_Simple) != 0);
}
-IdentifierNode *Demangler::demangleNameScopePiece(StringView &MangledName) {
+IdentifierNode *
+Demangler::demangleNameScopePiece(std::string_view &MangledName) {
if (startsWithDigit(MangledName))
return demangleBackRefName(MangledName);
- if (MangledName.startsWith("?$"))
+ if (llvm::itanium_demangle::starts_with(MangledName, "?$"))
return demangleTemplateInstantiationName(MangledName, NBB_Template);
- if (MangledName.startsWith("?A"))
+ if (llvm::itanium_demangle::starts_with(MangledName, "?A"))
return demangleAnonymousNamespaceName(MangledName);
if (startsWithLocalScopePattern(MangledName))
@@ -1556,14 +1599,14 @@ static NodeArrayNode *nodeListToNodeArray(ArenaAllocator &Arena, NodeList *Head,
}
QualifiedNameNode *
-Demangler::demangleNameScopeChain(StringView &MangledName,
+Demangler::demangleNameScopeChain(std::string_view &MangledName,
IdentifierNode *UnqualifiedName) {
NodeList *Head = Arena.alloc<NodeList>();
Head->N = UnqualifiedName;
size_t Count = 1;
- while (!MangledName.consumeFront("@")) {
+ while (!consumeFront(MangledName, "@")) {
++Count;
NodeList *NewHead = Arena.alloc<NodeList>();
NewHead->Next = Head;
@@ -1587,8 +1630,10 @@ Demangler::demangleNameScopeChain(StringView &MangledName,
return QN;
}
-FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
- switch (MangledName.popFront()) {
+FuncClass Demangler::demangleFunctionClass(std::string_view &MangledName) {
+ const char F = MangledName.front();
+ MangledName.remove_prefix(1);
+ switch (F) {
case '9':
return FuncClass(FC_ExternC | FC_NoParameterList);
case 'A':
@@ -1645,11 +1690,13 @@ FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
return FuncClass(FC_Global | FC_Far);
case '$': {
FuncClass VFlag = FC_VirtualThisAdjust;
- if (MangledName.consumeFront('R'))
+ if (consumeFront(MangledName, 'R'))
VFlag = FuncClass(VFlag | FC_VirtualThisAdjustEx);
if (MangledName.empty())
break;
- switch (MangledName.popFront()) {
+ const char F = MangledName.front();
+ MangledName.remove_prefix(1);
+ switch (F) {
case '0':
return FuncClass(FC_Private | FC_Virtual | VFlag);
case '1':
@@ -1670,13 +1717,16 @@ FuncClass Demangler::demangleFunctionClass(StringView &MangledName) {
return FC_Public;
}
-CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
+CallingConv
+Demangler::demangleCallingConvention(std::string_view &MangledName) {
if (MangledName.empty()) {
Error = true;
return CallingConv::None;
}
- switch (MangledName.popFront()) {
+ const char F = MangledName.front();
+ MangledName.remove_prefix(1);
+ switch (F) {
case 'A':
case 'B':
return CallingConv::Cdecl;
@@ -1709,10 +1759,13 @@ CallingConv Demangler::demangleCallingConvention(StringView &MangledName) {
return CallingConv::None;
}
-StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
+StorageClass
+Demangler::demangleVariableStorageClass(std::string_view &MangledName) {
assert(MangledName.front() >= '0' && MangledName.front() <= '4');
- switch (MangledName.popFront()) {
+ const char F = MangledName.front();
+ MangledName.remove_prefix(1);
+ switch (F) {
case '0':
return StorageClass::PrivateStatic;
case '1':
@@ -1728,13 +1781,15 @@ StorageClass Demangler::demangleVariableStorageClass(StringView &MangledName) {
}
std::pair<Qualifiers, bool>
-Demangler::demangleQualifiers(StringView &MangledName) {
+Demangler::demangleQualifiers(std::string_view &MangledName) {
if (MangledName.empty()) {
Error = true;
return std::make_pair(Q_None, false);
}
- switch (MangledName.popFront()) {
+ const char F = MangledName.front();
+ MangledName.remove_prefix(1);
+ switch (F) {
// Member qualifiers
case 'Q':
return std::make_pair(Q_None, true);
@@ -1760,14 +1815,14 @@ Demangler::demangleQualifiers(StringView &MangledName) {
// <variable-type> ::= <type> <cvr-qualifiers>
// ::= <type> <pointee-cvr-qualifiers> # pointers, references
-TypeNode *Demangler::demangleType(StringView &MangledName,
+TypeNode *Demangler::demangleType(std::string_view &MangledName,
QualifierMangleMode QMM) {
Qualifiers Quals = Q_None;
bool IsMember = false;
if (QMM == QualifierMangleMode::Mangle) {
std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
} else if (QMM == QualifierMangleMode::Result) {
- if (MangledName.consumeFront('?'))
+ if (consumeFront(MangledName, '?'))
std::tie(Quals, IsMember) = demangleQualifiers(MangledName);
}
@@ -1789,11 +1844,11 @@ TypeNode *Demangler::demangleType(StringView &MangledName,
} else if (isArrayType(MangledName))
Ty = demangleArrayType(MangledName);
else if (isFunctionType(MangledName)) {
- if (MangledName.consumeFront("$$A8@@"))
+ if (consumeFront(MangledName, "$$A8@@"))
Ty = demangleFunctionType(MangledName, true);
else {
- assert(MangledName.startsWith("$$A6"));
- MangledName.consumeFront("$$A6");
+ assert(llvm::itanium_demangle::starts_with(MangledName, "$$A6"));
+ consumeFront(MangledName, "$$A6");
Ty = demangleFunctionType(MangledName, false);
}
} else if (isCustomType(MangledName)) {
@@ -1808,18 +1863,19 @@ TypeNode *Demangler::demangleType(StringView &MangledName,
return Ty;
}
-bool Demangler::demangleThrowSpecification(StringView &MangledName) {
- if (MangledName.consumeFront("_E"))
+bool Demangler::demangleThrowSpecification(std::string_view &MangledName) {
+ if (consumeFront(MangledName, "_E"))
return true;
- if (MangledName.consumeFront('Z'))
+ if (consumeFront(MangledName, 'Z'))
return false;
Error = true;
return false;
}
-FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName,
- bool HasThisQuals) {
+FunctionSignatureNode *
+Demangler::demangleFunctionType(std::string_view &MangledName,
+ bool HasThisQuals) {
FunctionSignatureNode *FTy = Arena.alloc<FunctionSignatureNode>();
if (HasThisQuals) {
@@ -1833,7 +1889,7 @@ FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName,
// <return-type> ::= <type>
// ::= @ # structors (they have no declared return type)
- bool IsStructor = MangledName.consumeFront('@');
+ bool IsStructor = consumeFront(MangledName, '@');
if (!IsStructor)
FTy->ReturnType = demangleType(MangledName, QualifierMangleMode::Result);
@@ -1845,9 +1901,9 @@ FunctionSignatureNode *Demangler::demangleFunctionType(StringView &MangledName,
}
FunctionSymbolNode *
-Demangler::demangleFunctionEncoding(StringView &MangledName) {
+Demangler::demangleFunctionEncoding(std::string_view &MangledName) {
FuncClass ExtraFlags = FC_None;
- if (MangledName.consumeFront("$$J0"))
+ if (consumeFront(MangledName, "$$J0"))
ExtraFlags = FC_ExternC;
if (MangledName.empty()) {
@@ -1897,13 +1953,13 @@ Demangler::demangleFunctionEncoding(StringView &MangledName) {
return Symbol;
}
-CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) {
- assert(MangledName.startsWith('?'));
- MangledName.popFront();
+CustomTypeNode *Demangler::demangleCustomType(std::string_view &MangledName) {
+ assert(llvm::itanium_demangle::starts_with(MangledName, '?'));
+ MangledName.remove_prefix(1);
CustomTypeNode *CTN = Arena.alloc<CustomTypeNode>();
CTN->Identifier = demangleUnqualifiedTypeName(MangledName, /*Memorize=*/true);
- if (!MangledName.consumeFront('@'))
+ if (!consumeFront(MangledName, '@'))
Error = true;
if (Error)
return nullptr;
@@ -1911,11 +1967,14 @@ CustomTypeNode *Demangler::demangleCustomType(StringView &MangledName) {
}
// Reads a primitive type.
-PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) {
- if (MangledName.consumeFront("$$T"))
+PrimitiveTypeNode *
+Demangler::demanglePrimitiveType(std::string_view &MangledName) {
+ if (consumeFront(MangledName, "$$T"))
return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Nullptr);
- switch (MangledName.popFront()) {
+ const char F = MangledName.front();
+ MangledName.remove_prefix(1);
+ switch (F) {
case 'X':
return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Void);
case 'D':
@@ -1947,7 +2006,9 @@ PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) {
Error = true;
return nullptr;
}
- switch (MangledName.popFront()) {
+ const char F = MangledName.front();
+ MangledName.remove_prefix(1);
+ switch (F) {
case 'N':
return Arena.alloc<PrimitiveTypeNode>(PrimitiveKind::Bool);
case 'J':
@@ -1970,10 +2031,12 @@ PrimitiveTypeNode *Demangler::demanglePrimitiveType(StringView &MangledName) {
return nullptr;
}
-TagTypeNode *Demangler::demangleClassType(StringView &MangledName) {
+TagTypeNode *Demangler::demangleClassType(std::string_view &MangledName) {
TagTypeNode *TT = nullptr;
- switch (MangledName.popFront()) {
+ const char F = MangledName.front();
+ MangledName.remove_prefix(1);
+ switch (F) {
case 'T':
TT = Arena.alloc<TagTypeNode>(TagKind::Union);
break;
@@ -1984,7 +2047,7 @@ TagTypeNode *Demangler::demangleClassType(StringView &MangledName) {
TT = Arena.alloc<TagTypeNode>(TagKind::Class);
break;
case 'W':
- if (!MangledName.consumeFront('4')) {
+ if (!consumeFront(MangledName, '4')) {
Error = true;
return nullptr;
}
@@ -2000,13 +2063,13 @@ TagTypeNode *Demangler::demangleClassType(StringView &MangledName) {
// <pointer-type> ::= E? <pointer-cvr-qualifiers> <ext-qualifiers> <type>
// # the E is required for 64-bit non-static pointers
-PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) {
+PointerTypeNode *Demangler::demanglePointerType(std::string_view &MangledName) {
PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
std::tie(Pointer->Quals, Pointer->Affinity) =
demanglePointerCVQualifiers(MangledName);
- if (MangledName.consumeFront("6")) {
+ if (consumeFront(MangledName, "6")) {
Pointer->Pointee = demangleFunctionType(MangledName, false);
return Pointer;
}
@@ -2018,7 +2081,8 @@ PointerTypeNode *Demangler::demanglePointerType(StringView &MangledName) {
return Pointer;
}
-PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
+PointerTypeNode *
+Demangler::demangleMemberPointerType(std::string_view &MangledName) {
PointerTypeNode *Pointer = Arena.alloc<PointerTypeNode>();
std::tie(Pointer->Quals, Pointer->Affinity) =
@@ -2030,7 +2094,7 @@ PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
// isMemberPointer() only returns true if there is at least one character
// after the qualifiers.
- if (MangledName.consumeFront("8")) {
+ if (consumeFront(MangledName, "8")) {
Pointer->ClassParent = demangleFullyQualifiedTypeName(MangledName);
Pointer->Pointee = demangleFunctionType(MangledName, true);
} else {
@@ -2048,21 +2112,22 @@ PointerTypeNode *Demangler::demangleMemberPointerType(StringView &MangledName) {
return Pointer;
}
-Qualifiers Demangler::demanglePointerExtQualifiers(StringView &MangledName) {
+Qualifiers
+Demangler::demanglePointerExtQualifiers(std::string_view &MangledName) {
Qualifiers Quals = Q_None;
- if (MangledName.consumeFront('E'))
+ if (consumeFront(MangledName, 'E'))
Quals = Qualifiers(Quals | Q_Pointer64);
- if (MangledName.consumeFront('I'))
+ if (consumeFront(MangledName, 'I'))
Quals = Qualifiers(Quals | Q_Restrict);
- if (MangledName.consumeFront('F'))
+ if (consumeFront(MangledName, 'F'))
Quals = Qualifiers(Quals | Q_Unaligned);
return Quals;
}
-ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
+ArrayTypeNode *Demangler::demangleArrayType(std::string_view &MangledName) {
assert(MangledName.front() == 'Y');
- MangledName.popFront();
+ MangledName.remove_prefix(1);
uint64_t Rank = 0;
bool IsNegative = false;
@@ -2091,7 +2156,7 @@ ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
}
ATy->Dimensions = nodeListToNodeArray(Arena, Head, Rank);
- if (MangledName.consumeFront("$$C")) {
+ if (consumeFront(MangledName, "$$C")) {
bool IsMember = false;
std::tie(ATy->Quals, IsMember) = demangleQualifiers(MangledName);
if (IsMember) {
@@ -2105,17 +2170,18 @@ ArrayTypeNode *Demangler::demangleArrayType(StringView &MangledName) {
}
// Reads a function's parameters.
-NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName,
- bool &IsVariadic) {
+NodeArrayNode *
+Demangler::demangleFunctionParameterList(std::string_view &MangledName,
+ bool &IsVariadic) {
// Empty parameter list.
- if (MangledName.consumeFront('X'))
+ if (consumeFront(MangledName, 'X'))
return nullptr;
NodeList *Head = Arena.alloc<NodeList>();
NodeList **Current = &Head;
size_t Count = 0;
- while (!Error && !MangledName.startsWith('@') &&
- !MangledName.startsWith('Z')) {
+ while (!Error && !llvm::itanium_demangle::starts_with(MangledName, '@') &&
+ !llvm::itanium_demangle::starts_with(MangledName, 'Z')) {
++Count;
if (startsWithDigit(MangledName)) {
@@ -2124,7 +2190,7 @@ NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName,
Error = true;
return nullptr;
}
- MangledName = MangledName.dropFront();
+ MangledName.remove_prefix(1);
*Current = Arena.alloc<NodeList>();
(*Current)->N = Backrefs.FunctionParams[N];
@@ -2159,10 +2225,10 @@ NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName,
// A non-empty parameter list is terminated by either 'Z' (variadic) parameter
// list or '@' (non variadic). Careful not to consume "@Z", as in that case
// the following Z could be a throw specifier.
- if (MangledName.consumeFront('@'))
+ if (consumeFront(MangledName, '@'))
return NA;
- if (MangledName.consumeFront('Z')) {
+ if (consumeFront(MangledName, 'Z')) {
IsVariadic = true;
return NA;
}
@@ -2171,14 +2237,14 @@ NodeArrayNode *Demangler::demangleFunctionParameterList(StringView &MangledName,
}
NodeArrayNode *
-Demangler::demangleTemplateParameterList(StringView &MangledName) {
+Demangler::demangleTemplateParameterList(std::string_view &MangledName) {
NodeList *Head = nullptr;
NodeList **Current = &Head;
size_t Count = 0;
- while (!MangledName.startsWith('@')) {
- if (MangledName.consumeFront("$S") || MangledName.consumeFront("$$V") ||
- MangledName.consumeFront("$$$V") || MangledName.consumeFront("$$Z")) {
+ while (!llvm::itanium_demangle::starts_with(MangledName, '@')) {
+ if (consumeFront(MangledName, "$S") || consumeFront(MangledName, "$$V") ||
+ consumeFront(MangledName, "$$$V") || consumeFront(MangledName, "$$Z")) {
// parameter pack separator
continue;
}
@@ -2191,29 +2257,32 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
NodeList &TP = **Current;
TemplateParameterReferenceNode *TPRN = nullptr;
- if (MangledName.consumeFront("$$Y")) {
+ if (consumeFront(MangledName, "$$Y")) {
// Template alias
TP.N = demangleFullyQualifiedTypeName(MangledName);
- } else if (MangledName.consumeFront("$$B")) {
+ } else if (consumeFront(MangledName, "$$B")) {
// Array
TP.N = demangleType(MangledName, QualifierMangleMode::Drop);
- } else if (MangledName.consumeFront("$$C")) {
+ } else if (consumeFront(MangledName, "$$C")) {
// Type has qualifiers.
TP.N = demangleType(MangledName, QualifierMangleMode::Mangle);
- } else if (MangledName.startsWith("$1") || MangledName.startsWith("$H") ||
- MangledName.startsWith("$I") || MangledName.startsWith("$J")) {
+ } else if (llvm::itanium_demangle::starts_with(MangledName, "$1") ||
+ llvm::itanium_demangle::starts_with(MangledName, "$H") ||
+ llvm::itanium_demangle::starts_with(MangledName, "$I") ||
+ llvm::itanium_demangle::starts_with(MangledName, "$J")) {
// Pointer to member
TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
TPRN->IsMemberPointer = true;
- MangledName = MangledName.dropFront();
+ MangledName.remove_prefix(1);
// 1 - single inheritance <name>
// H - multiple inheritance <name> <number>
// I - virtual inheritance <name> <number> <number>
// J - unspecified inheritance <name> <number> <number> <number>
- char InheritanceSpecifier = MangledName.popFront();
+ char InheritanceSpecifier = MangledName.front();
+ MangledName.remove_prefix(1);
SymbolNode *S = nullptr;
- if (MangledName.startsWith('?')) {
+ if (llvm::itanium_demangle::starts_with(MangledName, '?')) {
S = parse(MangledName);
if (Error || !S->Name) {
Error = true;
@@ -2242,18 +2311,20 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
}
TPRN->Affinity = PointerAffinity::Pointer;
TPRN->Symbol = S;
- } else if (MangledName.startsWith("$E?")) {
- MangledName.consumeFront("$E");
+ } else if (llvm::itanium_demangle::starts_with(MangledName, "$E?")) {
+ consumeFront(MangledName, "$E");
// Reference to symbol
TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
TPRN->Symbol = parse(MangledName);
TPRN->Affinity = PointerAffinity::Reference;
- } else if (MangledName.startsWith("$F") || MangledName.startsWith("$G")) {
+ } else if (llvm::itanium_demangle::starts_with(MangledName, "$F") ||
+ llvm::itanium_demangle::starts_with(MangledName, "$G")) {
TP.N = TPRN = Arena.alloc<TemplateParameterReferenceNode>();
// Data member pointer.
- MangledName = MangledName.dropFront();
- char InheritanceSpecifier = MangledName.popFront();
+ MangledName.remove_prefix(1);
+ char InheritanceSpecifier = MangledName.front();
+ MangledName.remove_prefix(1);
switch (InheritanceSpecifier) {
case 'G':
@@ -2271,7 +2342,7 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
}
TPRN->IsMemberPointer = true;
- } else if (MangledName.consumeFront("$0")) {
+ } else if (consumeFront(MangledName, "$0")) {
// Integral non-type template parameter
bool IsNegative = false;
uint64_t Value = 0;
@@ -2292,8 +2363,9 @@ Demangler::demangleTemplateParameterList(StringView &MangledName) {
// Template parameter lists cannot be variadic, so it can only be terminated
// by @ (as opposed to 'Z' in the function parameter case).
- assert(MangledName.startsWith('@')); // The above loop exits only on '@'.
- MangledName.consumeFront('@');
+ assert(llvm::itanium_demangle::starts_with(
+ MangledName, '@')); // The above loop exits only on '@'.
+ consumeFront(MangledName, '@');
return nodeListToNodeArray(Arena, Head, Count);
}
@@ -2309,8 +2381,8 @@ void Demangler::dumpBackReferences() {
TypeNode *T = Backrefs.FunctionParams[I];
T->output(OB, OF_Default);
- StringView B = OB;
- std::printf(" [%d] - %.*s\n", (int)I, (int)B.size(), B.begin());
+ std::string_view B = OB;
+ std::printf(" [%d] - %.*s\n", (int)I, (int)B.size(), B.data());
}
std::free(OB.getBuffer());
@@ -2319,21 +2391,20 @@ void Demangler::dumpBackReferences() {
std::printf("%d name backreferences\n", (int)Backrefs.NamesCount);
for (size_t I = 0; I < Backrefs.NamesCount; ++I) {
std::printf(" [%d] - %.*s\n", (int)I, (int)Backrefs.Names[I]->Name.size(),
- Backrefs.Names[I]->Name.begin());
+ Backrefs.Names[I]->Name.data());
}
if (Backrefs.NamesCount > 0)
std::printf("\n");
}
-char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled,
- char *Buf, size_t *N,
+char *llvm::microsoftDemangle(std::string_view MangledName, size_t *NMangled,
int *Status, MSDemangleFlags Flags) {
Demangler D;
- StringView Name{MangledName};
+ std::string_view Name{MangledName};
SymbolNode *AST = D.parse(Name);
if (!D.Error && NMangled)
- *NMangled = Name.begin() - MangledName;
+ *NMangled = MangledName.size() - Name.size();
if (Flags & MSDF_DumpBackrefs)
D.dumpBackReferences();
@@ -2351,14 +2422,13 @@ char *llvm::microsoftDemangle(const char *MangledName, size_t *NMangled,
OF = OutputFlags(OF | OF_NoVariableType);
int InternalStatus = demangle_success;
+ char *Buf;
if (D.Error)
InternalStatus = demangle_invalid_mangled_name;
else {
- OutputBuffer OB(Buf, N);
+ OutputBuffer OB;
AST->output(OB, OF);
OB += '\0';
- if (N != nullptr)
- *N = OB.getCurrentPosition();
Buf = OB.getBuffer();
}
diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
index 975649f28ad2..9a9c34ec6d34 100644
--- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -120,7 +120,7 @@ static void outputCallingConvention(OutputBuffer &OB, CallingConv CC) {
std::string Node::toString(OutputFlags Flags) const {
OutputBuffer OB;
this->output(OB, Flags);
- StringView SV = OB;
+ std::string_view SV = OB;
std::string Owned(SV.begin(), SV.end());
std::free(OB.getBuffer());
return Owned;
@@ -158,7 +158,7 @@ void NodeArrayNode::output(OutputBuffer &OB, OutputFlags Flags) const {
}
void NodeArrayNode::output(OutputBuffer &OB, OutputFlags Flags,
- StringView Separator) const {
+ std::string_view Separator) const {
if (Count == 0)
return;
if (Nodes[0])
diff --git a/llvm/lib/Demangle/RustDemangle.cpp b/llvm/lib/Demangle/RustDemangle.cpp
index 8c01155127d8..f0d70de3abb5 100644
--- a/llvm/lib/Demangle/RustDemangle.cpp
+++ b/llvm/lib/Demangle/RustDemangle.cpp
@@ -12,7 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Demangle/Demangle.h"
-#include "llvm/Demangle/StringView.h"
+#include "llvm/Demangle/StringViewExtras.h"
#include "llvm/Demangle/Utility.h"
#include <algorithm>
@@ -20,17 +20,18 @@
#include <cstdint>
#include <cstring>
#include <limits>
+#include <string_view>
using namespace llvm;
using llvm::itanium_demangle::OutputBuffer;
using llvm::itanium_demangle::ScopedOverride;
-using llvm::itanium_demangle::StringView;
+using llvm::itanium_demangle::starts_with;
namespace {
struct Identifier {
- StringView Name;
+ std::string_view Name;
bool Punycode;
bool empty() const { return Name.empty(); }
@@ -77,7 +78,7 @@ class Demangler {
size_t RecursionLevel;
size_t BoundLifetimes;
// Input string that is being demangled with "_R" prefix removed.
- StringView Input;
+ std::string_view Input;
// Position in the input string.
size_t Position;
// When true, print methods append the output to the stream.
@@ -92,7 +93,7 @@ public:
Demangler(size_t MaxRecursionLevel = 500);
- bool demangle(StringView MangledName);
+ bool demangle(std::string_view MangledName);
private:
bool demanglePath(IsInType Type,
@@ -128,10 +129,10 @@ private:
uint64_t parseOptionalBase62Number(char Tag);
uint64_t parseBase62Number();
uint64_t parseDecimalNumber();
- uint64_t parseHexNumber(StringView &HexDigits);
+ uint64_t parseHexNumber(std::string_view &HexDigits);
void print(char C);
- void print(StringView S);
+ void print(std::string_view S);
void printDecimalNumber(uint64_t N);
void printBasicType(BasicType);
void printLifetime(uint64_t Index);
@@ -147,17 +148,13 @@ private:
} // namespace
-char *llvm::rustDemangle(const char *MangledName) {
- if (MangledName == nullptr)
- return nullptr;
-
+char *llvm::rustDemangle(std::string_view MangledName) {
// Return early if mangled name doesn't look like a Rust symbol.
- StringView Mangled(MangledName);
- if (!Mangled.startsWith("_R"))
+ if (MangledName.empty() || !starts_with(MangledName, "_R"))
return nullptr;
Demangler D;
- if (!D.demangle(Mangled)) {
+ if (!D.demangle(MangledName)) {
std::free(D.Output.getBuffer());
return nullptr;
}
@@ -190,20 +187,20 @@ static inline bool isValid(const char C) {
// responsibility of the caller to free the memory behind the output stream.
//
// <symbol-name> = "_R" <path> [<instantiating-crate>]
-bool Demangler::demangle(StringView Mangled) {
+bool Demangler::demangle(std::string_view Mangled) {
Position = 0;
Error = false;
Print = true;
RecursionLevel = 0;
BoundLifetimes = 0;
- if (!Mangled.consumeFront("_R")) {
+ if (!starts_with(Mangled, "_R")) {
Error = true;
return false;
}
+ Mangled.remove_prefix(2);
size_t Dot = Mangled.find('.');
- Input = Mangled.substr(0, Dot);
- StringView Suffix = Mangled.dropFront(Dot);
+ Input = Dot == std::string_view::npos ? Mangled : Mangled.substr(0, Dot);
demanglePath(IsInType::No);
@@ -215,9 +212,9 @@ bool Demangler::demangle(StringView Mangled) {
if (Position != Input.size())
Error = true;
- if (!Suffix.empty()) {
+ if (Dot != std::string_view::npos) {
print(" (");
- print(Suffix);
+ print(Mangled.substr(Dot));
print(")");
}
@@ -775,7 +772,7 @@ void Demangler::demangleConstInt() {
if (consumeIf('n'))
print('-');
- StringView HexDigits;
+ std::string_view HexDigits;
uint64_t Value = parseHexNumber(HexDigits);
if (HexDigits.size() <= 16) {
printDecimalNumber(Value);
@@ -788,7 +785,7 @@ void Demangler::demangleConstInt() {
// <const-data> = "0_" // false
// | "1_" // true
void Demangler::demangleConstBool() {
- StringView HexDigits;
+ std::string_view HexDigits;
parseHexNumber(HexDigits);
if (HexDigits == "0")
print("false");
@@ -805,7 +802,7 @@ static bool isAsciiPrintable(uint64_t CodePoint) {
// <const-data> = <hex-number>
void Demangler::demangleConstChar() {
- StringView HexDigits;
+ std::string_view HexDigits;
uint64_t CodePoint = parseHexNumber(HexDigits);
if (Error || HexDigits.size() > 6) {
Error = true;
@@ -859,7 +856,7 @@ Identifier Demangler::parseIdentifier() {
Error = true;
return {};
}
- StringView S = Input.substr(Position, Bytes);
+ std::string_view S = Input.substr(Position, Bytes);
Position += Bytes;
if (!std::all_of(S.begin(), S.end(), isValid)) {
@@ -967,7 +964,7 @@ uint64_t Demangler::parseDecimalNumber() {
//
// <hex-number> = "0_"
// | <1-9a-f> {<0-9a-f>} "_"
-uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
+uint64_t Demangler::parseHexNumber(std::string_view &HexDigits) {
size_t Start = Position;
uint64_t Value = 0;
@@ -991,7 +988,7 @@ uint64_t Demangler::parseHexNumber(StringView &HexDigits) {
}
if (Error) {
- HexDigits = StringView();
+ HexDigits = std::string_view();
return 0;
}
@@ -1008,7 +1005,7 @@ void Demangler::print(char C) {
Output += C;
}
-void Demangler::print(StringView S) {
+void Demangler::print(std::string_view S) {
if (Error || !Print)
return;
@@ -1105,17 +1102,17 @@ static inline bool encodeUTF8(size_t CodePoint, char *Output) {
// Decodes string encoded using punycode and appends results to Output.
// Returns true if decoding was successful.
-static bool decodePunycode(StringView Input, OutputBuffer &Output) {
+static bool decodePunycode(std::string_view Input, OutputBuffer &Output) {
size_t OutputSize = Output.getCurrentPosition();
size_t InputIdx = 0;
// Rust uses an underscore as a delimiter.
- size_t DelimiterPos = StringView::npos;
+ size_t DelimiterPos = std::string_view::npos;
for (size_t I = 0; I != Input.size(); ++I)
if (Input[I] == '_')
DelimiterPos = I;
- if (DelimiterPos != StringView::npos) {
+ if (DelimiterPos != std::string_view::npos) {
// Copy basic code points before the last delimiter to the output.
for (; InputIdx != DelimiterPos; ++InputIdx) {
char C = Input[InputIdx];
@@ -1123,7 +1120,7 @@ static bool decodePunycode(StringView Input, OutputBuffer &Output) {
return false;
// Code points are padded with zeros while decoding is in progress.
char UTF8[4] = {C};
- Output += StringView(UTF8, UTF8 + 4);
+ Output += std::string_view(UTF8, 4);
}
// Skip over the delimiter.
++InputIdx;
diff --git a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index 2a90b67bee4b..768d84501337 100644
--- a/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -34,9 +34,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Host.h"
#include <cmath>
#include <cstring>
#include <mutex>
@@ -878,6 +878,12 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
report_fatal_error(OS.str());
}
+ if (auto *TETy = dyn_cast<TargetExtType>(C->getType())) {
+ assert(TETy->hasProperty(TargetExtType::HasZeroInit) && C->isNullValue() &&
+ "TargetExtType only supports null constant value");
+ C = Constant::getNullValue(TETy->getLayoutType());
+ }
+
// Otherwise, we have a simple constant.
GenericValue Result;
switch (C->getType()->getTypeID()) {
@@ -1017,6 +1023,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
GenericValue *Ptr, Type *Ty) {
+ // It is safe to treat TargetExtType as its layout type since the underlying
+ // bits are only copied and are not inspected.
+ if (auto *TETy = dyn_cast<TargetExtType>(Ty))
+ Ty = TETy->getLayoutType();
+
const unsigned StoreBytes = getDataLayout().getTypeStoreSize(Ty);
switch (Ty->getTypeID()) {
@@ -1068,6 +1079,9 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
GenericValue *Ptr,
Type *Ty) {
+ if (auto *TETy = dyn_cast<TargetExtType>(Ty))
+ Ty = TETy->getLayoutType();
+
const unsigned LoadBytes = getDataLayout().getTypeStoreSize(Ty);
switch (Ty->getTypeID()) {
diff --git a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
index f1eeee3b3599..b5b76130c55e 100644
--- a/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
+++ b/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
@@ -176,7 +176,7 @@ void GDBJITRegistrationListener::notifyObjectLoaded(
size_t Size = DebugObj.getBinary()->getMemoryBufferRef().getBufferSize();
std::lock_guard<llvm::sys::Mutex> locked(JITDebugLock);
- assert(ObjectBufferMap.find(K) == ObjectBufferMap.end() &&
+ assert(!ObjectBufferMap.contains(K) &&
"Second attempt to perform debug registration.");
jit_code_entry* JITCodeEntry = new jit_code_entry();
diff --git a/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 29f481a1e4e8..4f8f883a75f3 100644
--- a/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -344,6 +344,12 @@ static GenericValue lle_X_abort(FunctionType *FT, ArrayRef<GenericValue> Args) {
return GenericValue();
}
+// Silence warnings about sprintf. (See also
+// https://github.com/llvm/llvm-project/issues/58086)
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#endif
// int sprintf(char *, const char *, ...) - a very rough implementation to make
// output useful.
static GenericValue lle_X_sprintf(FunctionType *FT,
@@ -425,6 +431,9 @@ static GenericValue lle_X_sprintf(FunctionType *FT,
}
return GV;
}
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
// int printf(const char *, ...) - a very rough implementation to make output
// useful.
diff --git a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
index 3dfe736dc5be..41a0389442d3 100644
--- a/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -224,8 +224,6 @@ private: // Helper functions
ExecutionContext &SF);
GenericValue executeBitCastInst(Value *SrcVal, Type *DstTy,
ExecutionContext &SF);
- GenericValue executeCastOperation(Instruction::CastOps opcode, Value *SrcVal,
- Type *Ty, ExecutionContext &SF);
void popStackAndReturnValueToCaller(Type *RetTy, GenericValue Result);
};
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.h b/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.h
index 8d5e0f7314dd..5c953da7581f 100644
--- a/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.h
+++ b/llvm/lib/ExecutionEngine/JITLink/COFFDirectiveParser.h
@@ -13,13 +13,13 @@
#ifndef LLVM_EXECUTIONENGINE_JITLINK_COFFDIRECTIVEPARSER_H
#define LLVM_EXECUTIONENGINE_JITLINK_COFFDIRECTIVEPARSER_H
-#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/StringSaver.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
namespace jitlink {
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
index 782928c26084..6668854e1a6a 100644
--- a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.cpp
@@ -24,13 +24,12 @@ static Triple createTripleWithCOFFFormat(Triple T) {
}
COFFLinkGraphBuilder::COFFLinkGraphBuilder(
- const object::COFFObjectFile &Obj, Triple TT,
+ const object::COFFObjectFile &Obj, Triple TT, SubtargetFeatures Features,
LinkGraph::GetEdgeKindNameFunction GetEdgeKindName)
- : Obj(Obj),
- G(std::make_unique<LinkGraph>(Obj.getFileName().str(),
- createTripleWithCOFFFormat(TT),
- getPointerSize(Obj), getEndianness(Obj),
- std::move(GetEdgeKindName))) {
+ : Obj(Obj), G(std::make_unique<LinkGraph>(
+ Obj.getFileName().str(), createTripleWithCOFFFormat(TT),
+ std::move(Features), getPointerSize(Obj),
+ getEndianness(Obj), std::move(GetEdgeKindName))) {
LLVM_DEBUG({
dbgs() << "Created COFFLinkGraphBuilder for \"" << Obj.getFileName()
<< "\"\n";
@@ -135,6 +134,13 @@ Error COFFLinkGraphBuilder::graphifySections() {
SectionName = *SecNameOrErr;
// FIXME: Skip debug info sections
+ if (SectionName == ".voltbl") {
+ LLVM_DEBUG({
+ dbgs() << " "
+ << "Skipping section \"" << SectionName << "\"\n";
+ });
+ continue;
+ }
LLVM_DEBUG({
dbgs() << " "
@@ -152,8 +158,11 @@ Error COFFLinkGraphBuilder::graphifySections() {
// Look for existing sections first.
auto *GraphSec = G->findSectionByName(SectionName);
- if (!GraphSec)
+ if (!GraphSec) {
GraphSec = &G->createSection(SectionName, Prot);
+ if ((*Sec)->Characteristics & COFF::IMAGE_SCN_LNK_REMOVE)
+ GraphSec->setMemLifetimePolicy(orc::MemLifetimePolicy::NoAlloc);
+ }
if (GraphSec->getMemProt() != Prot)
return make_error<JITLinkError>("MemProt should match");
@@ -287,7 +296,7 @@ Error COFFLinkGraphBuilder::handleDirectiveSection(StringRef Str) {
break;
}
case COFF_OPT_incl: {
- auto DataCopy = G->allocateString(S);
+ auto DataCopy = G->allocateContent(S);
StringRef StrCopy(DataCopy.data(), DataCopy.size());
ExternalSymbols[StrCopy] = &G->addExternalSymbol(StrCopy, 0, false);
ExternalSymbols[StrCopy]->setLive(true);
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
index 0c0a1a536deb..e64823759540 100644
--- a/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/COFFLinkGraphBuilder.h
@@ -39,6 +39,7 @@ protected:
using COFFSymbolIndex = int32_t;
COFFLinkGraphBuilder(const object::COFFObjectFile &Obj, Triple TT,
+ SubtargetFeatures Features,
LinkGraph::GetEdgeKindNameFunction GetEdgeKindName);
LinkGraph &getGraph() const { return *G; }
@@ -192,6 +193,10 @@ Error COFFLinkGraphBuilder::forEachRelocation(const object::SectionRef &RelSec,
Expected<StringRef> Name = Obj.getSectionName(COFFRelSect);
if (!Name)
return Name.takeError();
+
+ // Skip the unhandled metadata sections.
+ if (*Name == ".voltbl")
+ return Error::success();
LLVM_DEBUG(dbgs() << " " << *Name << ":\n");
// Lookup the link-graph node corresponding to the target section name.
diff --git a/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp
index b09dc769b81c..3257a2ae94f6 100644
--- a/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/COFF_x86_64.cpp
@@ -181,8 +181,10 @@ private:
}
public:
- COFFLinkGraphBuilder_x86_64(const object::COFFObjectFile &Obj, const Triple T)
- : COFFLinkGraphBuilder(Obj, std::move(T), getCOFFX86RelocationKindName) {}
+ COFFLinkGraphBuilder_x86_64(const object::COFFObjectFile &Obj, const Triple T,
+ const SubtargetFeatures Features)
+ : COFFLinkGraphBuilder(Obj, std::move(T), std::move(Features),
+ getCOFFX86RelocationKindName) {}
};
class COFFLinkGraphLowering_x86_64 {
@@ -196,7 +198,7 @@ public:
auto ImageBase = getImageBaseAddress(G, Ctx);
if (!ImageBase)
return ImageBase.takeError();
- E.setAddend(E.getAddend() - *ImageBase);
+ E.setAddend(E.getAddend() - ImageBase->getValue());
E.setKind(x86_64::Pointer32);
break;
}
@@ -238,19 +240,19 @@ private:
return SectionStartCache[&Sec];
}
- Expected<JITTargetAddress> getImageBaseAddress(LinkGraph &G,
- JITLinkContext &Ctx) {
+ Expected<orc::ExecutorAddr> getImageBaseAddress(LinkGraph &G,
+ JITLinkContext &Ctx) {
if (this->ImageBase)
return this->ImageBase;
for (auto *S : G.defined_symbols())
if (S->getName() == getImageBaseSymbolName()) {
- this->ImageBase = S->getAddress().getValue();
+ this->ImageBase = S->getAddress();
return this->ImageBase;
}
JITLinkContext::LookupMap Symbols;
Symbols[getImageBaseSymbolName()] = SymbolLookupFlags::RequiredSymbol;
- JITTargetAddress ImageBase;
+ orc::ExecutorAddr ImageBase;
Error Err = Error::success();
Ctx.lookup(Symbols,
createLookupContinuation([&](Expected<AsyncLookupResult> LR) {
@@ -259,8 +261,7 @@ private:
Err = LR.takeError();
return;
}
- auto &ImageBaseSymbol = LR->begin()->second;
- ImageBase = ImageBaseSymbol.getAddress();
+ ImageBase = LR->begin()->second.getAddress();
}));
if (Err)
return std::move(Err);
@@ -269,7 +270,7 @@ private:
}
DenseMap<Section *, orc::ExecutorAddr> SectionStartCache;
- JITTargetAddress ImageBase = 0;
+ orc::ExecutorAddr ImageBase;
};
Error lowerEdges_COFF_x86_64(LinkGraph &G, JITLinkContext *Ctx) {
@@ -315,7 +316,12 @@ createLinkGraphFromCOFFObject_x86_64(MemoryBufferRef ObjectBuffer) {
if (!COFFObj)
return COFFObj.takeError();
- return COFFLinkGraphBuilder_x86_64(**COFFObj, (*COFFObj)->makeTriple())
+ auto Features = (*COFFObj)->getFeatures();
+ if (!Features)
+ return Features.takeError();
+
+ return COFFLinkGraphBuilder_x86_64(**COFFObj, (*COFFObj)->makeTriple(),
+ std::move(*Features))
.buildGraph();
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
index ef0f19a78571..dd08a23306ff 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp
@@ -13,9 +13,11 @@
#include "llvm/ExecutionEngine/JITLink/ELF.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/ExecutionEngine/JITLink/ELF_aarch32.h"
#include "llvm/ExecutionEngine/JITLink/ELF_aarch64.h"
#include "llvm/ExecutionEngine/JITLink/ELF_i386.h"
#include "llvm/ExecutionEngine/JITLink/ELF_loongarch.h"
+#include "llvm/ExecutionEngine/JITLink/ELF_ppc64.h"
#include "llvm/ExecutionEngine/JITLink/ELF_riscv.h"
#include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
#include "llvm/Object/ELF.h"
@@ -56,12 +58,13 @@ Expected<uint16_t> readTargetMachineArch(StringRef Buffer) {
Expected<std::unique_ptr<LinkGraph>>
createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer) {
StringRef Buffer = ObjectBuffer.getBuffer();
- if (Buffer.size() < ELF::EI_MAG3 + 1)
+ if (Buffer.size() < ELF::EI_NIDENT)
return make_error<JITLinkError>("Truncated ELF buffer");
if (memcmp(Buffer.data(), ELF::ElfMagic, strlen(ELF::ElfMagic)) != 0)
return make_error<JITLinkError>("ELF magic not valid");
+ uint8_t DataEncoding = Buffer.data()[ELF::EI_DATA];
Expected<uint16_t> TargetMachineArch = readTargetMachineArch(Buffer);
if (!TargetMachineArch)
return TargetMachineArch.takeError();
@@ -69,8 +72,16 @@ createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer) {
switch (*TargetMachineArch) {
case ELF::EM_AARCH64:
return createLinkGraphFromELFObject_aarch64(ObjectBuffer);
+ case ELF::EM_ARM:
+ return createLinkGraphFromELFObject_aarch32(ObjectBuffer);
case ELF::EM_LOONGARCH:
return createLinkGraphFromELFObject_loongarch(ObjectBuffer);
+ case ELF::EM_PPC64: {
+ if (DataEncoding == ELF::ELFDATA2LSB)
+ return createLinkGraphFromELFObject_ppc64le(ObjectBuffer);
+ else
+ return createLinkGraphFromELFObject_ppc64(ObjectBuffer);
+ }
case ELF::EM_RISCV:
return createLinkGraphFromELFObject_riscv(ObjectBuffer);
case ELF::EM_X86_64:
@@ -90,10 +101,22 @@ void link_ELF(std::unique_ptr<LinkGraph> G,
case Triple::aarch64:
link_ELF_aarch64(std::move(G), std::move(Ctx));
return;
+ case Triple::arm:
+ case Triple::armeb:
+ case Triple::thumb:
+ case Triple::thumbeb:
+ link_ELF_aarch32(std::move(G), std::move(Ctx));
+ return;
case Triple::loongarch32:
case Triple::loongarch64:
link_ELF_loongarch(std::move(G), std::move(Ctx));
return;
+ case Triple::ppc64:
+ link_ELF_ppc64(std::move(G), std::move(Ctx));
+ return;
+ case Triple::ppc64le:
+ link_ELF_ppc64le(std::move(G), std::move(Ctx));
+ return;
case Triple::riscv32:
case Triple::riscv64:
link_ELF_riscv(std::move(G), std::move(Ctx));
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
index 953a9f512784..e72645798349 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
@@ -59,9 +59,17 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase {
public:
ELFLinkGraphBuilder(const object::ELFFile<ELFT> &Obj, Triple TT,
- StringRef FileName,
+ SubtargetFeatures Features, StringRef FileName,
LinkGraph::GetEdgeKindNameFunction GetEdgeKindName);
+ /// Debug sections are included in the graph by default. Use
+ /// setProcessDebugSections(false) to ignore them if debug info is not
+ /// needed.
+ ELFLinkGraphBuilder &setProcessDebugSections(bool ProcessDebugSections) {
+ this->ProcessDebugSections = ProcessDebugSections;
+ return *this;
+ }
+
/// Attempt to construct and return the LinkGraph.
Expected<std::unique_ptr<LinkGraph>> buildGraph();
@@ -83,10 +91,7 @@ protected:
}
Block *getGraphBlock(ELFSectionIndex SecIndex) {
- auto I = GraphBlocks.find(SecIndex);
- if (I == GraphBlocks.end())
- return nullptr;
- return I->second;
+ return GraphBlocks.lookup(SecIndex);
}
void setGraphSymbol(ELFSymbolIndex SymIndex, Symbol &Sym) {
@@ -95,19 +100,33 @@ protected:
}
Symbol *getGraphSymbol(ELFSymbolIndex SymIndex) {
- auto I = GraphSymbols.find(SymIndex);
- if (I == GraphSymbols.end())
- return nullptr;
- return I->second;
+ return GraphSymbols.lookup(SymIndex);
}
Expected<std::pair<Linkage, Scope>>
getSymbolLinkageAndScope(const typename ELFT::Sym &Sym, StringRef Name);
+ /// Set the target flags on the given Symbol.
+ virtual TargetFlagsType makeTargetFlags(const typename ELFT::Sym &Sym) {
+ return TargetFlagsType{};
+ }
+
+ /// Get the physical offset of the symbol on the target platform.
+ virtual orc::ExecutorAddrDiff getRawOffset(const typename ELFT::Sym &Sym,
+ TargetFlagsType Flags) {
+ return Sym.getValue();
+ }
+
Error prepare();
Error graphifySections();
Error graphifySymbols();
+ /// Override in derived classes to suppress certain sections in the link
+ /// graph.
+ virtual bool excludeSection(const typename ELFT::Shdr &Sect) const {
+ return false;
+ }
+
/// Traverse all matching ELFT::Rela relocation records in the given section.
/// The handler function Func should be callable with this signature:
/// Error(const typename ELFT::Rela &,
@@ -115,8 +134,7 @@ protected:
///
template <typename RelocHandlerMethod>
Error forEachRelaRelocation(const typename ELFT::Shdr &RelSect,
- RelocHandlerMethod &&Func,
- bool ProcessDebugSections = false);
+ RelocHandlerMethod &&Func);
/// Traverse all matching ELFT::Rel relocation records in the given section.
/// The handler function Func should be callable with this signature:
@@ -125,22 +143,19 @@ protected:
///
template <typename RelocHandlerMethod>
Error forEachRelRelocation(const typename ELFT::Shdr &RelSect,
- RelocHandlerMethod &&Func,
- bool ProcessDebugSections = false);
+ RelocHandlerMethod &&Func);
/// Traverse all matching rela relocation records in the given section.
/// Convenience wrapper to allow passing a member function for the handler.
///
template <typename ClassT, typename RelocHandlerMethod>
Error forEachRelaRelocation(const typename ELFT::Shdr &RelSect,
- ClassT *Instance, RelocHandlerMethod &&Method,
- bool ProcessDebugSections = false) {
+ ClassT *Instance, RelocHandlerMethod &&Method) {
return forEachRelaRelocation(
RelSect,
[Instance, Method](const auto &Rel, const auto &Target, auto &GS) {
return (Instance->*Method)(Rel, Target, GS);
- },
- ProcessDebugSections);
+ });
}
/// Traverse all matching rel relocation records in the given section.
@@ -148,14 +163,12 @@ protected:
///
template <typename ClassT, typename RelocHandlerMethod>
Error forEachRelRelocation(const typename ELFT::Shdr &RelSect,
- ClassT *Instance, RelocHandlerMethod &&Method,
- bool ProcessDebugSections = false) {
+ ClassT *Instance, RelocHandlerMethod &&Method) {
return forEachRelRelocation(
RelSect,
[Instance, Method](const auto &Rel, const auto &Target, auto &GS) {
return (Instance->*Method)(Rel, Target, GS);
- },
- ProcessDebugSections);
+ });
}
const ELFFile &Obj;
@@ -163,6 +176,7 @@ protected:
typename ELFFile::Elf_Shdr_Range Sections;
const typename ELFFile::Elf_Shdr *SymTabSec = nullptr;
StringRef SectionStringTab;
+ bool ProcessDebugSections = true;
// Maps ELF section indexes to LinkGraph Blocks.
// Only SHF_ALLOC sections will have graph blocks.
@@ -175,11 +189,11 @@ protected:
template <typename ELFT>
ELFLinkGraphBuilder<ELFT>::ELFLinkGraphBuilder(
- const ELFFile &Obj, Triple TT, StringRef FileName,
- LinkGraph::GetEdgeKindNameFunction GetEdgeKindName)
+ const ELFFile &Obj, Triple TT, SubtargetFeatures Features,
+ StringRef FileName, LinkGraph::GetEdgeKindNameFunction GetEdgeKindName)
: ELFLinkGraphBuilderBase(std::make_unique<LinkGraph>(
- FileName.str(), Triple(std::move(TT)), ELFT::Is64Bits ? 8 : 4,
- support::endianness(ELFT::TargetEndianness),
+ FileName.str(), Triple(std::move(TT)), std::move(Features),
+ ELFT::Is64Bits ? 8 : 4, support::endianness(ELFT::TargetEndianness),
std::move(GetEdgeKindName))),
Obj(Obj) {
LLVM_DEBUG(
@@ -307,23 +321,28 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySections() {
auto Name = Obj.getSectionName(Sec, SectionStringTab);
if (!Name)
return Name.takeError();
+ if (excludeSection(Sec)) {
+ LLVM_DEBUG({
+ dbgs() << " " << SecIndex << ": Skipping section \"" << *Name
+ << "\" explicitly\n";
+ });
+ continue;
+ }
- // If the name indicates that it's a debug section then skip it: We don't
- // support those yet.
- if (isDwarfSection(*Name)) {
+ // Skip null sections.
+ if (Sec.sh_type == ELF::SHT_NULL) {
LLVM_DEBUG({
- dbgs() << " " << SecIndex << ": \"" << *Name
- << "\" is a debug section: "
- "No graph section will be created.\n";
+ dbgs() << " " << SecIndex << ": has type SHT_NULL. Skipping.\n";
});
continue;
}
- // Skip non-SHF_ALLOC sections
- if (!(Sec.sh_flags & ELF::SHF_ALLOC)) {
+ // If the name indicates that it's a debug section then skip it: We don't
+ // support those yet.
+ if (!ProcessDebugSections && isDwarfSection(*Name)) {
LLVM_DEBUG({
dbgs() << " " << SecIndex << ": \"" << *Name
- << "\" is not an SHF_ALLOC section: "
+ << "\" is a debug section: "
"No graph section will be created.\n";
});
continue;
@@ -335,16 +354,26 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySections() {
});
// Get the section's memory protection flags.
- orc::MemProt Prot;
+ orc::MemProt Prot = orc::MemProt::Read;
if (Sec.sh_flags & ELF::SHF_EXECINSTR)
- Prot = orc::MemProt::Read | orc::MemProt::Exec;
- else
- Prot = orc::MemProt::Read | orc::MemProt::Write;
+ Prot |= orc::MemProt::Exec;
+ if (Sec.sh_flags & ELF::SHF_WRITE)
+ Prot |= orc::MemProt::Write;
// Look for existing sections first.
auto *GraphSec = G->findSectionByName(*Name);
- if (!GraphSec)
+ if (!GraphSec) {
GraphSec = &G->createSection(*Name, Prot);
+ // Non-SHF_ALLOC sections get NoAlloc memory lifetimes.
+ if (!(Sec.sh_flags & ELF::SHF_ALLOC)) {
+ GraphSec->setMemLifetimePolicy(orc::MemLifetimePolicy::NoAlloc);
+ LLVM_DEBUG({
+ dbgs() << " " << SecIndex << ": \"" << *Name
+ << "\" is not a SHF_ALLOC section. Using NoAlloc lifetime.\n";
+ });
+ }
+ }
+
assert(GraphSec->getMemProt() == Prot && "MemProt should match");
Block *B = nullptr;
@@ -467,6 +496,9 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
<< "\"\n";
});
+ TargetFlagsType Flags = makeTargetFlags(Sym);
+ orc::ExecutorAddrDiff Offset = getRawOffset(Sym, Flags);
+
// In RISCV, temporary symbols (Used to generate dwarf, eh_frame
// sections...) will appear in object code's symbol table, and LLVM does
// not use names on these temporary symbols (RISCV gnu toolchain uses
@@ -474,10 +506,13 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
// anonymous symbol.
auto &GSym =
Name->empty()
- ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size,
+ ? G->addAnonymousSymbol(*B, Offset, Sym.st_size,
false, false)
- : G->addDefinedSymbol(*B, Sym.getValue(), *Name, Sym.st_size, L,
- S, Sym.getType() == ELF::STT_FUNC, false);
+ : G->addDefinedSymbol(*B, Offset, *Name, Sym.st_size, L,
+ S, Sym.getType() == ELF::STT_FUNC,
+ false);
+
+ GSym.setTargetFlags(Flags);
setGraphSymbol(SymIndex, GSym);
}
} else if (Sym.isUndefined() && Sym.isExternal()) {
@@ -499,6 +534,21 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
auto &GSym = G->addExternalSymbol(*Name, Sym.st_size,
Sym.getBinding() == ELF::STB_WEAK);
setGraphSymbol(SymIndex, GSym);
+ } else if (Sym.isUndefined() && Sym.st_value == 0 && Sym.st_size == 0 &&
+ Sym.getType() == ELF::STT_NOTYPE &&
+ Sym.getBinding() == ELF::STB_LOCAL && Name->empty()) {
+ // Some relocations (e.g., R_RISCV_ALIGN) don't have a target symbol and
+ // use this kind of null symbol as a placeholder.
+ LLVM_DEBUG({
+ dbgs() << " " << SymIndex << ": Creating null graph symbol\n";
+ });
+
+ auto SymName =
+ G->allocateContent("__jitlink_ELF_SYM_UND_" + Twine(SymIndex));
+ auto SymNameRef = StringRef(SymName.data(), SymName.size());
+ auto &GSym = G->addAbsoluteSymbol(SymNameRef, orc::ExecutorAddr(0), 0,
+ Linkage::Strong, Scope::Local, false);
+ setGraphSymbol(SymIndex, GSym);
} else {
LLVM_DEBUG({
dbgs() << " " << SymIndex
@@ -514,8 +564,7 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
template <typename ELFT>
template <typename RelocHandlerFunction>
Error ELFLinkGraphBuilder<ELFT>::forEachRelaRelocation(
- const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func,
- bool ProcessDebugSections) {
+ const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func) {
// Only look into sections that store relocation entries.
if (RelSect.sh_type != ELF::SHT_RELA)
return Error::success();
@@ -537,6 +586,10 @@ Error ELFLinkGraphBuilder<ELFT>::forEachRelaRelocation(
LLVM_DEBUG(dbgs() << " skipped (dwarf section)\n\n");
return Error::success();
}
+ if (excludeSection(**FixupSection)) {
+ LLVM_DEBUG(dbgs() << " skipped (fixup section excluded explicitly)\n\n");
+ return Error::success();
+ }
// Lookup the link-graph node corresponding to the target section name.
auto *BlockToFix = getGraphBlock(RelSect.sh_info);
@@ -561,8 +614,7 @@ Error ELFLinkGraphBuilder<ELFT>::forEachRelaRelocation(
template <typename ELFT>
template <typename RelocHandlerFunction>
Error ELFLinkGraphBuilder<ELFT>::forEachRelRelocation(
- const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func,
- bool ProcessDebugSections) {
+ const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func) {
// Only look into sections that store relocation entries.
if (RelSect.sh_type != ELF::SHT_REL)
return Error::success();
@@ -584,6 +636,10 @@ Error ELFLinkGraphBuilder<ELFT>::forEachRelRelocation(
LLVM_DEBUG(dbgs() << " skipped (dwarf section)\n\n");
return Error::success();
}
+ if (excludeSection(**FixupSection)) {
+ LLVM_DEBUG(dbgs() << " skipped (fixup section excluded explicitly)\n\n");
+ return Error::success();
+ }
// Lookup the link-graph node corresponding to the target section name.
auto *BlockToFix = getGraphBlock(RelSect.sh_info);
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp
new file mode 100644
index 000000000000..a1bc4c853323
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp
@@ -0,0 +1,311 @@
+//===----- ELF_aarch32.cpp - JIT linker implementation for arm/thumb ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// ELF/aarch32 jit-link implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/ELF_aarch32.h"
+
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ExecutionEngine/JITLink/aarch32.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TargetParser/ARMTargetParser.h"
+
+#include "ELFLinkGraphBuilder.h"
+#include "JITLinkGeneric.h"
+
+#define DEBUG_TYPE "jitlink"
+
+using namespace llvm::object;
+
+namespace llvm {
+namespace jitlink {
+
+/// Translate from ELF relocation type to JITLink-internal edge kind.
+Expected<aarch32::EdgeKind_aarch32> getJITLinkEdgeKind(uint32_t ELFType) {
+ switch (ELFType) {
+ case ELF::R_ARM_ABS32:
+ return aarch32::Data_Pointer32;
+ case ELF::R_ARM_REL32:
+ return aarch32::Data_Delta32;
+ case ELF::R_ARM_CALL:
+ return aarch32::Arm_Call;
+ case ELF::R_ARM_THM_CALL:
+ return aarch32::Thumb_Call;
+ case ELF::R_ARM_THM_JUMP24:
+ return aarch32::Thumb_Jump24;
+ case ELF::R_ARM_THM_MOVW_ABS_NC:
+ return aarch32::Thumb_MovwAbsNC;
+ case ELF::R_ARM_THM_MOVT_ABS:
+ return aarch32::Thumb_MovtAbs;
+ }
+
+ return make_error<JITLinkError>(
+ "Unsupported aarch32 relocation " + formatv("{0:d}: ", ELFType) +
+ object::getELFRelocationTypeName(ELF::EM_ARM, ELFType));
+}
+
+/// Translate from JITLink-internal edge kind back to ELF relocation type.
+Expected<uint32_t> getELFRelocationType(Edge::Kind Kind) {
+ switch (static_cast<aarch32::EdgeKind_aarch32>(Kind)) {
+ case aarch32::Data_Delta32:
+ return ELF::R_ARM_REL32;
+ case aarch32::Data_Pointer32:
+ return ELF::R_ARM_ABS32;
+ case aarch32::Arm_Call:
+ return ELF::R_ARM_CALL;
+ case aarch32::Thumb_Call:
+ return ELF::R_ARM_THM_CALL;
+ case aarch32::Thumb_Jump24:
+ return ELF::R_ARM_THM_JUMP24;
+ case aarch32::Thumb_MovwAbsNC:
+ return ELF::R_ARM_THM_MOVW_ABS_NC;
+ case aarch32::Thumb_MovtAbs:
+ return ELF::R_ARM_THM_MOVT_ABS;
+ }
+
+ return make_error<JITLinkError>(formatv("Invalid aarch32 edge {0:d}: ",
+ Kind));
+}
+
+/// Get a human-readable name for the given ELF AArch32 edge kind.
+const char *getELFAArch32EdgeKindName(Edge::Kind R) {
+ // No ELF-specific edge kinds yet
+ return aarch32::getEdgeKindName(R);
+}
+
+class ELFJITLinker_aarch32 : public JITLinker<ELFJITLinker_aarch32> {
+ friend class JITLinker<ELFJITLinker_aarch32>;
+
+public:
+ ELFJITLinker_aarch32(std::unique_ptr<JITLinkContext> Ctx,
+ std::unique_ptr<LinkGraph> G, PassConfiguration PassCfg,
+ aarch32::ArmConfig ArmCfg)
+ : JITLinker(std::move(Ctx), std::move(G), std::move(PassCfg)),
+ ArmCfg(std::move(ArmCfg)) {}
+
+private:
+ aarch32::ArmConfig ArmCfg;
+
+ Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const {
+ return aarch32::applyFixup(G, B, E, ArmCfg);
+ }
+};
+
+template <support::endianness DataEndianness>
+class ELFLinkGraphBuilder_aarch32
+ : public ELFLinkGraphBuilder<ELFType<DataEndianness, false>> {
+private:
+ using ELFT = ELFType<DataEndianness, false>;
+ using Base = ELFLinkGraphBuilder<ELFT>;
+
+ bool excludeSection(const typename ELFT::Shdr &Sect) const override {
+ // TODO: An .ARM.exidx (Exception Index table) entry is 8-bytes in size and
+ // consists of 2 words. It might be sufficient to process only relocations
+ // in the the second word (offset 4). Please find more details in: Exception
+ // Handling ABI for the Arm® Architecture -> Index table entries
+ if (Sect.sh_type == ELF::SHT_ARM_EXIDX)
+ return true;
+ return false;
+ }
+
+ Error addRelocations() override {
+ LLVM_DEBUG(dbgs() << "Processing relocations:\n");
+ using Self = ELFLinkGraphBuilder_aarch32<DataEndianness>;
+ for (const auto &RelSect : Base::Sections) {
+ if (Error Err = Base::forEachRelRelocation(RelSect, this,
+ &Self::addSingleRelRelocation))
+ return Err;
+ }
+ return Error::success();
+ }
+
+ Error addSingleRelRelocation(const typename ELFT::Rel &Rel,
+ const typename ELFT::Shdr &FixupSect,
+ Block &BlockToFix) {
+ uint32_t SymbolIndex = Rel.getSymbol(false);
+ auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec);
+ if (!ObjSymbol)
+ return ObjSymbol.takeError();
+
+ Symbol *GraphSymbol = Base::getGraphSymbol(SymbolIndex);
+ if (!GraphSymbol)
+ return make_error<StringError>(
+ formatv("Could not find symbol at given index, did you add it to "
+ "JITSymbolTable? index: {0}, shndx: {1} Size of table: {2}",
+ SymbolIndex, (*ObjSymbol)->st_shndx,
+ Base::GraphSymbols.size()),
+ inconvertibleErrorCode());
+
+ uint32_t Type = Rel.getType(false);
+ Expected<aarch32::EdgeKind_aarch32> Kind = getJITLinkEdgeKind(Type);
+ if (!Kind)
+ return Kind.takeError();
+
+ auto FixupAddress = orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset;
+ Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress();
+ Edge E(*Kind, Offset, *GraphSymbol, 0);
+
+ Expected<int64_t> Addend =
+ aarch32::readAddend(*Base::G, BlockToFix, E, ArmCfg);
+ if (!Addend)
+ return Addend.takeError();
+
+ E.setAddend(*Addend);
+ LLVM_DEBUG({
+ dbgs() << " ";
+ printEdge(dbgs(), BlockToFix, E, getELFAArch32EdgeKindName(*Kind));
+ dbgs() << "\n";
+ });
+
+ BlockToFix.addEdge(std::move(E));
+ return Error::success();
+ }
+
+ aarch32::ArmConfig ArmCfg;
+
+protected:
+ TargetFlagsType makeTargetFlags(const typename ELFT::Sym &Sym) override {
+ if (Sym.getValue() & 0x01)
+ return aarch32::ThumbSymbol;
+ return TargetFlagsType{};
+ }
+
+ orc::ExecutorAddrDiff getRawOffset(const typename ELFT::Sym &Sym,
+ TargetFlagsType Flags) override {
+ assert((makeTargetFlags(Sym) & Flags) == Flags);
+ static constexpr uint64_t ThumbBit = 0x01;
+ return Sym.getValue() & ~ThumbBit;
+ }
+
+public:
+ ELFLinkGraphBuilder_aarch32(StringRef FileName,
+ const llvm::object::ELFFile<ELFT> &Obj, Triple TT,
+ SubtargetFeatures Features,
+ aarch32::ArmConfig ArmCfg)
+ : ELFLinkGraphBuilder<ELFT>(Obj, std::move(TT), std::move(Features),
+ FileName, getELFAArch32EdgeKindName),
+ ArmCfg(std::move(ArmCfg)) {}
+};
+
+template <aarch32::StubsFlavor Flavor>
+Error buildTables_ELF_aarch32(LinkGraph &G) {
+ LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n");
+
+ aarch32::StubsManager<Flavor> PLT;
+ visitExistingEdges(G, PLT);
+ return Error::success();
+}
+
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer) {
+ LLVM_DEBUG({
+ dbgs() << "Building jitlink graph for new input "
+ << ObjectBuffer.getBufferIdentifier() << "...\n";
+ });
+
+ auto ELFObj = ObjectFile::createELFObjectFile(ObjectBuffer);
+ if (!ELFObj)
+ return ELFObj.takeError();
+
+ auto Features = (*ELFObj)->getFeatures();
+ if (!Features)
+ return Features.takeError();
+
+ // Find out what exact AArch32 instruction set and features we target.
+ auto TT = (*ELFObj)->makeTriple();
+ ARM::ArchKind AK = ARM::parseArch(TT.getArchName());
+ if (AK == ARM::ArchKind::INVALID)
+ return make_error<JITLinkError>(
+ "Failed to build ELF link graph: Invalid ARM ArchKind");
+
+ // Resolve our internal configuration for the target. If at some point the
+ // CPUArch alone becomes too unprecise, we can find more details in the
+ // Tag_CPU_arch_profile.
+ aarch32::ArmConfig ArmCfg;
+ using namespace ARMBuildAttrs;
+ auto Arch = static_cast<CPUArch>(ARM::getArchAttr(AK));
+ switch (Arch) {
+ case v7:
+ case v8_A:
+ ArmCfg = aarch32::getArmConfigForCPUArch(Arch);
+ assert(ArmCfg.Stubs != aarch32::Unsupported &&
+ "Provide a config for each supported CPU");
+ break;
+ default:
+ return make_error<JITLinkError>(
+ "Failed to build ELF link graph: Unsupported CPU arch " +
+ StringRef(aarch32::getCPUArchName(Arch)));
+ }
+
+ // Populate the link-graph.
+ switch (TT.getArch()) {
+ case Triple::arm:
+ case Triple::thumb: {
+ auto &ELFFile = cast<ELFObjectFile<ELF32LE>>(**ELFObj).getELFFile();
+ return ELFLinkGraphBuilder_aarch32<support::little>(
+ (*ELFObj)->getFileName(), ELFFile, TT, std::move(*Features),
+ ArmCfg)
+ .buildGraph();
+ }
+ case Triple::armeb:
+ case Triple::thumbeb: {
+ auto &ELFFile = cast<ELFObjectFile<ELF32BE>>(**ELFObj).getELFFile();
+ return ELFLinkGraphBuilder_aarch32<support::big>(
+ (*ELFObj)->getFileName(), ELFFile, TT, std::move(*Features),
+ ArmCfg)
+ .buildGraph();
+ }
+ default:
+ return make_error<JITLinkError>(
+ "Failed to build ELF/aarch32 link graph: Invalid target triple " +
+ TT.getTriple());
+ }
+}
+
+void link_ELF_aarch32(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx) {
+ const Triple &TT = G->getTargetTriple();
+
+ using namespace ARMBuildAttrs;
+ ARM::ArchKind AK = ARM::parseArch(TT.getArchName());
+ auto CPU = static_cast<CPUArch>(ARM::getArchAttr(AK));
+ aarch32::ArmConfig ArmCfg = aarch32::getArmConfigForCPUArch(CPU);
+
+ PassConfiguration PassCfg;
+ if (Ctx->shouldAddDefaultTargetPasses(TT)) {
+ // Add a mark-live pass.
+ if (auto MarkLive = Ctx->getMarkLivePass(TT))
+ PassCfg.PrePrunePasses.push_back(std::move(MarkLive));
+ else
+ PassCfg.PrePrunePasses.push_back(markAllSymbolsLive);
+
+ switch (ArmCfg.Stubs) {
+ case aarch32::Thumbv7:
+ PassCfg.PostPrunePasses.push_back(
+ buildTables_ELF_aarch32<aarch32::Thumbv7>);
+ break;
+ case aarch32::Unsupported:
+ llvm_unreachable("Check before building graph");
+ }
+ }
+
+ if (auto Err = Ctx->modifyPassConfig(*G, PassCfg))
+ return Ctx->notifyFailed(std::move(Err));
+
+ ELFJITLinker_aarch32::link(std::move(Ctx), std::move(G), std::move(PassCfg),
+ std::move(ArmCfg));
+}
+
+} // namespace jitlink
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
index 567d5a4dd47a..652eb931190e 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
@@ -47,6 +47,7 @@ class ELFLinkGraphBuilder_aarch64 : public ELFLinkGraphBuilder<ELFT> {
private:
enum ELFAArch64RelocationKind : Edge::Kind {
ELFCall26 = Edge::FirstRelocation,
+ ELFAdrLo21,
ELFAdrPage21,
ELFAddAbs12,
ELFLdSt8Abs12,
@@ -58,6 +59,9 @@ private:
ELFMovwAbsG1,
ELFMovwAbsG2,
ELFMovwAbsG3,
+ ELFTstBr14,
+ ELFCondBr19,
+ ELFAbs32,
ELFAbs64,
ELFPrel32,
ELFPrel64,
@@ -76,6 +80,8 @@ private:
case ELF::R_AARCH64_CALL26:
case ELF::R_AARCH64_JUMP26:
return ELFCall26;
+ case ELF::R_AARCH64_ADR_PREL_LO21:
+ return ELFAdrLo21;
case ELF::R_AARCH64_ADR_PREL_PG_HI21:
return ELFAdrPage21;
case ELF::R_AARCH64_ADD_ABS_LO12_NC:
@@ -98,6 +104,12 @@ private:
return ELFMovwAbsG2;
case ELF::R_AARCH64_MOVW_UABS_G3:
return ELFMovwAbsG3;
+ case ELF::R_AARCH64_TSTBR14:
+ return ELFTstBr14;
+ case ELF::R_AARCH64_CONDBR19:
+ return ELFCondBr19;
+ case ELF::R_AARCH64_ABS32:
+ return ELFAbs32;
case ELF::R_AARCH64_ABS64:
return ELFAbs64;
case ELF::R_AARCH64_PREL32:
@@ -177,6 +189,15 @@ private:
Kind = aarch64::Branch26PCRel;
break;
}
+ case ELFAdrLo21: {
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if (!aarch64::isADR(Instr))
+ return make_error<JITLinkError>(
+ "R_AARCH64_ADR_PREL_LO21 target is not an ADR instruction");
+
+ Kind = aarch64::ADRLiteral21;
+ break;
+ }
case ELFAdrPage21: {
Kind = aarch64::Page21;
break;
@@ -284,6 +305,29 @@ private:
Kind = aarch64::MoveWide16;
break;
}
+ case ELFTstBr14: {
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if (!aarch64::isTestAndBranchImm14(Instr))
+ return make_error<JITLinkError>("R_AARCH64_TSTBR14 target is not a "
+ "test and branch instruction");
+
+ Kind = aarch64::TestAndBranch14PCRel;
+ break;
+ }
+ case ELFCondBr19: {
+ uint32_t Instr = *(const ulittle32_t *)FixupContent;
+ if (!aarch64::isCondBranchImm19(Instr) &&
+ !aarch64::isCompAndBranchImm19(Instr))
+ return make_error<JITLinkError>("R_AARCH64_CONDBR19 target is not a "
+ "conditional branch instruction");
+
+ Kind = aarch64::CondBranch19PCRel;
+ break;
+ }
+ case ELFAbs32: {
+ Kind = aarch64::Pointer32;
+ break;
+ }
case ELFAbs64: {
Kind = aarch64::Pointer64;
break;
@@ -357,6 +401,8 @@ private:
return "ELFMovwAbsG2";
case ELFMovwAbsG3:
return "ELFMovwAbsG3";
+ case ELFAbs32:
+ return "ELFAbs32";
case ELFAbs64:
return "ELFAbs64";
case ELFPrel32:
@@ -382,9 +428,10 @@ private:
public:
ELFLinkGraphBuilder_aarch64(StringRef FileName,
- const object::ELFFile<ELFT> &Obj, const Triple T)
- : ELFLinkGraphBuilder<ELFT>(Obj, std::move(T), FileName,
- aarch64::getEdgeKindName) {}
+ const object::ELFFile<ELFT> &Obj, Triple TT,
+ SubtargetFeatures Features)
+ : ELFLinkGraphBuilder<ELFT>(Obj, std::move(TT), std::move(Features),
+ FileName, aarch64::getEdgeKindName) {}
};
// TLS Info Builder.
@@ -532,13 +579,17 @@ createLinkGraphFromELFObject_aarch64(MemoryBufferRef ObjectBuffer) {
if (!ELFObj)
return ELFObj.takeError();
+ auto Features = (*ELFObj)->getFeatures();
+ if (!Features)
+ return Features.takeError();
+
assert((*ELFObj)->getArch() == Triple::aarch64 &&
"Only AArch64 (little endian) is supported for now");
auto &ELFObjFile = cast<object::ELFObjectFile<object::ELF64LE>>(**ELFObj);
- return ELFLinkGraphBuilder_aarch64<object::ELF64LE>((*ELFObj)->getFileName(),
- ELFObjFile.getELFFile(),
- (*ELFObj)->makeTriple())
+ return ELFLinkGraphBuilder_aarch64<object::ELF64LE>(
+ (*ELFObj)->getFileName(), ELFObjFile.getELFFile(),
+ (*ELFObj)->makeTriple(), std::move(*Features))
.buildGraph();
}
@@ -552,6 +603,7 @@ void link_ELF_aarch64(std::unique_ptr<LinkGraph> G,
Config.PrePrunePasses.push_back(EHFrameEdgeFixer(
".eh_frame", 8, aarch64::Pointer32, aarch64::Pointer64,
aarch64::Delta32, aarch64::Delta64, aarch64::NegDelta32));
+ Config.PrePrunePasses.push_back(EHFrameNullTerminator(".eh_frame"));
// Add a mark-live pass.
if (auto MarkLive = Ctx->getMarkLivePass(TT))
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
index 1fee1b24b6bd..860165365a7e 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_i386.cpp
@@ -30,7 +30,8 @@ Error buildTables_ELF_i386(LinkGraph &G) {
LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n");
i386::GOTTableManager GOT;
- visitExistingEdges(G, GOT);
+ i386::PLTTableManager PLT(GOT);
+ visitExistingEdges(G, GOT, PLT);
return Error::success();
}
} // namespace
@@ -130,6 +131,8 @@ private:
return EdgeKind_i386::Delta32;
case ELF::R_386_GOTOFF:
return EdgeKind_i386::Delta32FromGOT;
+ case ELF::R_386_PLT32:
+ return EdgeKind_i386::BranchPCRel32;
}
return make_error<JITLinkError>("Unsupported i386 relocation:" +
@@ -207,9 +210,9 @@ private:
public:
ELFLinkGraphBuilder_i386(StringRef FileName, const object::ELFFile<ELFT> &Obj,
- const Triple T)
- : ELFLinkGraphBuilder<ELFT>(Obj, std::move(T), FileName,
- i386::getEdgeKindName) {}
+ Triple TT, SubtargetFeatures Features)
+ : ELFLinkGraphBuilder<ELFT>(Obj, std::move(TT), std::move(Features),
+ FileName, i386::getEdgeKindName) {}
};
Expected<std::unique_ptr<LinkGraph>>
@@ -223,13 +226,17 @@ createLinkGraphFromELFObject_i386(MemoryBufferRef ObjectBuffer) {
if (!ELFObj)
return ELFObj.takeError();
+ auto Features = (*ELFObj)->getFeatures();
+ if (!Features)
+ return Features.takeError();
+
assert((*ELFObj)->getArch() == Triple::x86 &&
"Only i386 (little endian) is supported for now");
auto &ELFObjFile = cast<object::ELFObjectFile<object::ELF32LE>>(**ELFObj);
- return ELFLinkGraphBuilder_i386<object::ELF32LE>((*ELFObj)->getFileName(),
- ELFObjFile.getELFFile(),
- (*ELFObj)->makeTriple())
+ return ELFLinkGraphBuilder_i386<object::ELF32LE>(
+ (*ELFObj)->getFileName(), ELFObjFile.getELFFile(),
+ (*ELFObj)->makeTriple(), std::move(*Features))
.buildGraph();
}
@@ -243,8 +250,11 @@ void link_ELF_i386(std::unique_ptr<LinkGraph> G,
else
Config.PrePrunePasses.push_back(markAllSymbolsLive);
- // Add an in-place GOT build pass.
+ // Add an in-place GOT and PLT build pass.
Config.PostPrunePasses.push_back(buildTables_ELF_i386);
+
+ // Add GOT/Stubs optimizer pass.
+ Config.PreFixupPasses.push_back(i386::optimizeGOTAndStubAccesses);
}
if (auto Err = Ctx->modifyPassConfig(*G, Config))
return Ctx->notifyFailed(std::move(Err));
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_loongarch.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_loongarch.cpp
index cd70217b4c0a..7f76b45aecbb 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_loongarch.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_loongarch.cpp
@@ -129,10 +129,10 @@ private:
public:
ELFLinkGraphBuilder_loongarch(StringRef FileName,
- const object::ELFFile<ELFT> &Obj,
- const Triple T)
- : ELFLinkGraphBuilder<ELFT>(Obj, std::move(T), FileName,
- loongarch::getEdgeKindName) {}
+ const object::ELFFile<ELFT> &Obj, Triple TT,
+ SubtargetFeatures Features)
+ : ELFLinkGraphBuilder<ELFT>(Obj, std::move(TT), std::move(Features),
+ FileName, loongarch::getEdgeKindName) {}
};
Error buildTables_ELF_loongarch(LinkGraph &G) {
@@ -160,11 +160,15 @@ createLinkGraphFromELFObject_loongarch(MemoryBufferRef ObjectBuffer) {
if (!ELFObj)
return ELFObj.takeError();
+ auto Features = (*ELFObj)->getFeatures();
+ if (!Features)
+ return Features.takeError();
+
if ((*ELFObj)->getArch() == Triple::loongarch64) {
auto &ELFObjFile = cast<object::ELFObjectFile<object::ELF64LE>>(**ELFObj);
return ELFLinkGraphBuilder_loongarch<object::ELF64LE>(
(*ELFObj)->getFileName(), ELFObjFile.getELFFile(),
- (*ELFObj)->makeTriple())
+ (*ELFObj)->makeTriple(), std::move(*Features))
.buildGraph();
}
@@ -173,7 +177,7 @@ createLinkGraphFromELFObject_loongarch(MemoryBufferRef ObjectBuffer) {
auto &ELFObjFile = cast<object::ELFObjectFile<object::ELF32LE>>(**ELFObj);
return ELFLinkGraphBuilder_loongarch<object::ELF32LE>(
(*ELFObj)->getFileName(), ELFObjFile.getELFFile(),
- (*ELFObj)->makeTriple())
+ (*ELFObj)->makeTriple(), std::move(*Features))
.buildGraph();
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp
new file mode 100644
index 000000000000..a30b9ce51c84
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_ppc64.cpp
@@ -0,0 +1,396 @@
+//===------- ELF_ppc64.cpp -JIT linker implementation for ELF/ppc64 -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// ELF/ppc64 jit-link implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/ELF_ppc64.h"
+#include "llvm/ExecutionEngine/JITLink/DWARFRecordSectionSplitter.h"
+#include "llvm/ExecutionEngine/JITLink/TableManager.h"
+#include "llvm/ExecutionEngine/JITLink/ppc64.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/Endian.h"
+
+#include "EHFrameSupportImpl.h"
+#include "ELFLinkGraphBuilder.h"
+#include "JITLinkGeneric.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace {
+
+using namespace llvm;
+using namespace llvm::jitlink;
+
+constexpr StringRef ELFTOCSymbolName = ".TOC.";
+constexpr StringRef TOCSymbolAliasIdent = "__TOC__";
+constexpr uint64_t ELFTOCBaseOffset = 0x8000;
+
+template <support::endianness Endianness>
+Symbol &createELFGOTHeader(LinkGraph &G,
+ ppc64::TOCTableManager<Endianness> &TOC) {
+ Symbol *TOCSymbol = nullptr;
+
+ for (Symbol *Sym : G.defined_symbols())
+ if (LLVM_UNLIKELY(Sym->getName() == ELFTOCSymbolName)) {
+ TOCSymbol = Sym;
+ break;
+ }
+
+ if (LLVM_LIKELY(TOCSymbol == nullptr)) {
+ for (Symbol *Sym : G.external_symbols())
+ if (Sym->getName() == ELFTOCSymbolName) {
+ TOCSymbol = Sym;
+ break;
+ }
+ }
+
+ if (!TOCSymbol)
+ TOCSymbol = &G.addExternalSymbol(ELFTOCSymbolName, 0, false);
+
+ return TOC.getEntryForTarget(G, *TOCSymbol);
+}
+
+// Register preexisting GOT entries with TOC table manager.
+template <support::endianness Endianness>
+inline void
+registerExistingGOTEntries(LinkGraph &G,
+ ppc64::TOCTableManager<Endianness> &TOC) {
+ auto isGOTEntry = [](const Edge &E) {
+ return E.getKind() == ppc64::Pointer64 && E.getTarget().isExternal();
+ };
+ if (Section *dotTOCSection = G.findSectionByName(".toc")) {
+ for (Block *B : dotTOCSection->blocks())
+ for (Edge &E : B->edges())
+ if (isGOTEntry(E))
+ TOC.registerPreExistingEntry(E.getTarget(),
+ G.addAnonymousSymbol(*B, E.getOffset(),
+ G.getPointerSize(),
+ false, false));
+ }
+}
+
+template <support::endianness Endianness>
+Error buildTables_ELF_ppc64(LinkGraph &G) {
+ LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n");
+ ppc64::TOCTableManager<Endianness> TOC;
+ // Before visiting edges, we create a header containing the address of TOC
+ // base as ELFABIv2 suggests:
+ // > The GOT consists of an 8-byte header that contains the TOC base (the
+ // first TOC base when multiple TOCs are present), followed by an array of
+ // 8-byte addresses.
+ createELFGOTHeader(G, TOC);
+
+ // There might be compiler-generated GOT entries in ELF relocatable file.
+ registerExistingGOTEntries(G, TOC);
+
+ ppc64::PLTTableManager<Endianness> PLT(TOC);
+ visitExistingEdges(G, TOC, PLT);
+ // TODO: Add TLS support.
+
+ // After visiting edges in LinkGraph, we have GOT entries built in the
+ // synthesized section.
+ // Merge sections included in TOC into synthesized TOC section,
+ // thus TOC is compact and reducing chances of relocation
+ // overflow.
+ if (Section *TOCSection = G.findSectionByName(TOC.getSectionName())) {
+ // .got and .plt are not normally present in a relocatable object file
+ // because they are linker generated.
+ if (Section *gotSection = G.findSectionByName(".got"))
+ G.mergeSections(*TOCSection, *gotSection);
+ if (Section *tocSection = G.findSectionByName(".toc"))
+ G.mergeSections(*TOCSection, *tocSection);
+ if (Section *sdataSection = G.findSectionByName(".sdata"))
+ G.mergeSections(*TOCSection, *sdataSection);
+ if (Section *sbssSection = G.findSectionByName(".sbss"))
+ G.mergeSections(*TOCSection, *sbssSection);
+ // .tocbss no longer appears in ELFABIv2. Leave it here to be compatible
+ // with rtdyld.
+ if (Section *tocbssSection = G.findSectionByName(".tocbss"))
+ G.mergeSections(*TOCSection, *tocbssSection);
+ if (Section *pltSection = G.findSectionByName(".plt"))
+ G.mergeSections(*TOCSection, *pltSection);
+ }
+
+ return Error::success();
+}
+
+} // namespace
+
+namespace llvm::jitlink {
+
+template <support::endianness Endianness>
+class ELFLinkGraphBuilder_ppc64
+ : public ELFLinkGraphBuilder<object::ELFType<Endianness, true>> {
+private:
+ using ELFT = object::ELFType<Endianness, true>;
+ using Base = ELFLinkGraphBuilder<ELFT>;
+
+ using Base::G; // Use LinkGraph pointer from base class.
+
+ Error addRelocations() override {
+ LLVM_DEBUG(dbgs() << "Processing relocations:\n");
+
+ using Self = ELFLinkGraphBuilder_ppc64<Endianness>;
+ for (const auto &RelSect : Base::Sections) {
+ // Validate the section to read relocation entries from.
+ if (RelSect.sh_type == ELF::SHT_REL)
+ return make_error<StringError>("No SHT_REL in valid " +
+ G->getTargetTriple().getArchName() +
+ " ELF object files",
+ inconvertibleErrorCode());
+
+ if (Error Err = Base::forEachRelaRelocation(RelSect, this,
+ &Self::addSingleRelocation))
+ return Err;
+ }
+
+ return Error::success();
+ }
+
+ Error addSingleRelocation(const typename ELFT::Rela &Rel,
+ const typename ELFT::Shdr &FixupSection,
+ Block &BlockToFix) {
+ using Base = ELFLinkGraphBuilder<ELFT>;
+ auto ELFReloc = Rel.getType(false);
+
+ // R_PPC64_NONE is a no-op.
+ if (LLVM_UNLIKELY(ELFReloc == ELF::R_PPC64_NONE))
+ return Error::success();
+
+ auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec);
+ if (!ObjSymbol)
+ return ObjSymbol.takeError();
+
+ uint32_t SymbolIndex = Rel.getSymbol(false);
+ Symbol *GraphSymbol = Base::getGraphSymbol(SymbolIndex);
+ if (!GraphSymbol)
+ return make_error<StringError>(
+ formatv("Could not find symbol at given index, did you add it to "
+ "JITSymbolTable? index: {0}, shndx: {1} Size of table: {2}",
+ SymbolIndex, (*ObjSymbol)->st_shndx,
+ Base::GraphSymbols.size()),
+ inconvertibleErrorCode());
+
+ int64_t Addend = Rel.r_addend;
+ orc::ExecutorAddr FixupAddress =
+ orc::ExecutorAddr(FixupSection.sh_addr) + Rel.r_offset;
+ Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress();
+ Edge::Kind Kind = Edge::Invalid;
+
+ switch (ELFReloc) {
+ default:
+ return make_error<JITLinkError>(
+ "In " + G->getName() + ": Unsupported ppc64 relocation type " +
+ object::getELFRelocationTypeName(ELF::EM_PPC64, ELFReloc));
+ case ELF::R_PPC64_ADDR64:
+ Kind = ppc64::Pointer64;
+ break;
+ case ELF::R_PPC64_TOC16_HA:
+ Kind = ppc64::TOCDelta16HA;
+ break;
+ case ELF::R_PPC64_TOC16_DS:
+ Kind = ppc64::TOCDelta16DS;
+ break;
+ case ELF::R_PPC64_TOC16_LO:
+ Kind = ppc64::TOCDelta16LO;
+ break;
+ case ELF::R_PPC64_TOC16_LO_DS:
+ Kind = ppc64::TOCDelta16LODS;
+ break;
+ case ELF::R_PPC64_REL16:
+ Kind = ppc64::Delta16;
+ break;
+ case ELF::R_PPC64_REL16_HA:
+ Kind = ppc64::Delta16HA;
+ break;
+ case ELF::R_PPC64_REL16_LO:
+ Kind = ppc64::Delta16LO;
+ break;
+ case ELF::R_PPC64_REL32:
+ Kind = ppc64::Delta32;
+ break;
+ case ELF::R_PPC64_REL24_NOTOC:
+ case ELF::R_PPC64_REL24: {
+ bool isLocal = !GraphSymbol->isExternal();
+ if (isLocal) {
+ // TODO: There are cases a local function call need a call stub.
+ // 1. Caller uses TOC, the callee doesn't, need a r2 save stub.
+ // 2. Caller doesn't use TOC, the callee does, need a r12 setup stub.
+ // FIXME: For a local call, we might need a thunk if branch target is
+ // out of range.
+ Kind = ppc64::CallBranchDelta;
+ // Branch to local entry.
+ Addend += ELF::decodePPC64LocalEntryOffset((*ObjSymbol)->st_other);
+ } else {
+ Kind = ELFReloc == ELF::R_PPC64_REL24 ? ppc64::RequestPLTCallStubSaveTOC
+ : ppc64::RequestPLTCallStubNoTOC;
+ }
+ break;
+ }
+ case ELF::R_PPC64_REL64:
+ Kind = ppc64::Delta64;
+ break;
+ }
+
+ Edge GE(Kind, Offset, *GraphSymbol, Addend);
+ BlockToFix.addEdge(std::move(GE));
+ return Error::success();
+ }
+
+public:
+ ELFLinkGraphBuilder_ppc64(StringRef FileName,
+ const object::ELFFile<ELFT> &Obj, Triple TT,
+ SubtargetFeatures Features)
+ : ELFLinkGraphBuilder<ELFT>(Obj, std::move(TT), std::move(Features),
+ FileName, ppc64::getEdgeKindName) {}
+};
+
+template <support::endianness Endianness>
+class ELFJITLinker_ppc64 : public JITLinker<ELFJITLinker_ppc64<Endianness>> {
+ using JITLinkerBase = JITLinker<ELFJITLinker_ppc64<Endianness>>;
+ friend JITLinkerBase;
+
+public:
+ ELFJITLinker_ppc64(std::unique_ptr<JITLinkContext> Ctx,
+ std::unique_ptr<LinkGraph> G, PassConfiguration PassConfig)
+ : JITLinkerBase(std::move(Ctx), std::move(G), std::move(PassConfig)) {
+ JITLinkerBase::getPassConfig().PostAllocationPasses.push_back(
+ [this](LinkGraph &G) { return defineTOCBase(G); });
+ }
+
+private:
+ Symbol *TOCSymbol = nullptr;
+
+ Error defineTOCBase(LinkGraph &G) {
+ for (Symbol *Sym : G.defined_symbols()) {
+ if (LLVM_UNLIKELY(Sym->getName() == ELFTOCSymbolName)) {
+ TOCSymbol = Sym;
+ return Error::success();
+ }
+ }
+
+ assert(TOCSymbol == nullptr &&
+ "TOCSymbol should not be defined at this point");
+
+ for (Symbol *Sym : G.external_symbols()) {
+ if (Sym->getName() == ELFTOCSymbolName) {
+ TOCSymbol = Sym;
+ break;
+ }
+ }
+
+ if (Section *TOCSection = G.findSectionByName(
+ ppc64::TOCTableManager<Endianness>::getSectionName())) {
+ assert(!TOCSection->empty() && "TOC section should have reserved an "
+ "entry for containing the TOC base");
+
+ SectionRange SR(*TOCSection);
+ orc::ExecutorAddr TOCBaseAddr(SR.getFirstBlock()->getAddress() +
+ ELFTOCBaseOffset);
+ assert(TOCSymbol && TOCSymbol->isExternal() &&
+ ".TOC. should be a external symbol at this point");
+ G.makeAbsolute(*TOCSymbol, TOCBaseAddr);
+ // Create an alias of .TOC. so that rtdyld checker can recognize.
+ G.addAbsoluteSymbol(TOCSymbolAliasIdent, TOCSymbol->getAddress(),
+ TOCSymbol->getSize(), TOCSymbol->getLinkage(),
+ TOCSymbol->getScope(), TOCSymbol->isLive());
+ return Error::success();
+ }
+
+ // If TOC section doesn't exist, which means no TOC relocation is found, we
+ // don't need a TOCSymbol.
+ return Error::success();
+ }
+
+ Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const {
+ return ppc64::applyFixup<Endianness>(G, B, E, TOCSymbol);
+ }
+};
+
+template <support::endianness Endianness>
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_ppc64(MemoryBufferRef ObjectBuffer) {
+ LLVM_DEBUG({
+ dbgs() << "Building jitlink graph for new input "
+ << ObjectBuffer.getBufferIdentifier() << "...\n";
+ });
+
+ auto ELFObj = object::ObjectFile::createELFObjectFile(ObjectBuffer);
+ if (!ELFObj)
+ return ELFObj.takeError();
+
+ auto Features = (*ELFObj)->getFeatures();
+ if (!Features)
+ return Features.takeError();
+
+ using ELFT = object::ELFType<Endianness, true>;
+ auto &ELFObjFile = cast<object::ELFObjectFile<ELFT>>(**ELFObj);
+ return ELFLinkGraphBuilder_ppc64<Endianness>(
+ (*ELFObj)->getFileName(), ELFObjFile.getELFFile(),
+ (*ELFObj)->makeTriple(), std::move(*Features))
+ .buildGraph();
+}
+
+template <support::endianness Endianness>
+void link_ELF_ppc64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx) {
+ PassConfiguration Config;
+
+ if (Ctx->shouldAddDefaultTargetPasses(G->getTargetTriple())) {
+ // Construct a JITLinker and run the link function.
+
+ // Add eh-frame passses.
+ Config.PrePrunePasses.push_back(DWARFRecordSectionSplitter(".eh_frame"));
+ Config.PrePrunePasses.push_back(EHFrameEdgeFixer(
+ ".eh_frame", G->getPointerSize(), ppc64::Pointer32, ppc64::Pointer64,
+ ppc64::Delta32, ppc64::Delta64, ppc64::NegDelta32));
+ Config.PrePrunePasses.push_back(EHFrameNullTerminator(".eh_frame"));
+
+ // Add a mark-live pass.
+ if (auto MarkLive = Ctx->getMarkLivePass(G->getTargetTriple()))
+ Config.PrePrunePasses.push_back(std::move(MarkLive));
+ else
+ Config.PrePrunePasses.push_back(markAllSymbolsLive);
+ }
+
+ Config.PostPrunePasses.push_back(buildTables_ELF_ppc64<Endianness>);
+
+ if (auto Err = Ctx->modifyPassConfig(*G, Config))
+ return Ctx->notifyFailed(std::move(Err));
+
+ ELFJITLinker_ppc64<Endianness>::link(std::move(Ctx), std::move(G),
+ std::move(Config));
+}
+
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_ppc64(MemoryBufferRef ObjectBuffer) {
+ return createLinkGraphFromELFObject_ppc64<support::big>(
+ std::move(ObjectBuffer));
+}
+
+Expected<std::unique_ptr<LinkGraph>>
+createLinkGraphFromELFObject_ppc64le(MemoryBufferRef ObjectBuffer) {
+ return createLinkGraphFromELFObject_ppc64<support::little>(
+ std::move(ObjectBuffer));
+}
+
+/// jit-link the given object buffer, which must be a ELF ppc64 object file.
+void link_ELF_ppc64(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx) {
+ return link_ELF_ppc64<support::big>(std::move(G), std::move(Ctx));
+}
+
+/// jit-link the given object buffer, which must be a ELF ppc64le object file.
+void link_ELF_ppc64le(std::unique_ptr<LinkGraph> G,
+ std::unique_ptr<JITLinkContext> Ctx) {
+ return link_ELF_ppc64<support::little>(std::move(G), std::move(Ctx));
+}
+
+} // end namespace llvm::jitlink
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
index 90d3bbe6a276..410dd7fedad1 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
@@ -70,13 +70,17 @@ public:
}
void fixPLTEdge(Edge &E, Symbol &PLTStubs) {
- assert(E.getKind() == R_RISCV_CALL_PLT && "Not a R_RISCV_CALL_PLT edge?");
+ assert((E.getKind() == R_RISCV_CALL || E.getKind() == R_RISCV_CALL_PLT ||
+ E.getKind() == CallRelaxable) &&
+ "Not a PLT edge?");
E.setKind(R_RISCV_CALL);
E.setTarget(PLTStubs);
}
bool isExternalBranchEdge(Edge &E) const {
- return E.getKind() == R_RISCV_CALL_PLT;
+ return (E.getKind() == R_RISCV_CALL || E.getKind() == R_RISCV_CALL_PLT ||
+ E.getKind() == CallRelaxable) &&
+ !E.getTarget().isDefined();
}
private:
@@ -229,6 +233,9 @@ private:
(RawInstr & 0xFFF) | Imm20 | Imm10_1 | Imm11 | Imm19_12;
break;
}
+ case CallRelaxable:
+ // Treat as R_RISCV_CALL when the relaxation pass did not run
+ case R_RISCV_CALL_PLT:
case R_RISCV_CALL: {
int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
int64_t Hi = Value + 0x800;
@@ -322,63 +329,52 @@ private:
case R_RISCV_ADD8: {
int64_t Value =
(E.getTarget().getAddress() +
- *(reinterpret_cast<const uint8_t *>(FixupAddress.getValue())) +
- E.getAddend())
+ *(reinterpret_cast<const uint8_t *>(FixupPtr)) + E.getAddend())
.getValue();
*FixupPtr = static_cast<uint8_t>(Value);
break;
}
case R_RISCV_ADD16: {
int64_t Value = (E.getTarget().getAddress() +
- support::endian::read16le(reinterpret_cast<const void *>(
- FixupAddress.getValue())) +
- E.getAddend())
+ support::endian::read16le(FixupPtr) + E.getAddend())
.getValue();
*(little16_t *)FixupPtr = static_cast<uint16_t>(Value);
break;
}
case R_RISCV_ADD32: {
int64_t Value = (E.getTarget().getAddress() +
- support::endian::read32le(reinterpret_cast<const void *>(
- FixupAddress.getValue())) +
- E.getAddend())
+ support::endian::read32le(FixupPtr) + E.getAddend())
.getValue();
*(little32_t *)FixupPtr = static_cast<uint32_t>(Value);
break;
}
case R_RISCV_ADD64: {
int64_t Value = (E.getTarget().getAddress() +
- support::endian::read64le(reinterpret_cast<const void *>(
- FixupAddress.getValue())) +
- E.getAddend())
+ support::endian::read64le(FixupPtr) + E.getAddend())
.getValue();
*(little64_t *)FixupPtr = static_cast<uint64_t>(Value);
break;
}
case R_RISCV_SUB8: {
- int64_t Value =
- *(reinterpret_cast<const uint8_t *>(FixupAddress.getValue())) -
- E.getTarget().getAddress().getValue() - E.getAddend();
+ int64_t Value = *(reinterpret_cast<const uint8_t *>(FixupPtr)) -
+ E.getTarget().getAddress().getValue() - E.getAddend();
*FixupPtr = static_cast<uint8_t>(Value);
break;
}
case R_RISCV_SUB16: {
- int64_t Value = support::endian::read16le(reinterpret_cast<const void *>(
- FixupAddress.getValue())) -
+ int64_t Value = support::endian::read16le(FixupPtr) -
E.getTarget().getAddress().getValue() - E.getAddend();
*(little16_t *)FixupPtr = static_cast<uint32_t>(Value);
break;
}
case R_RISCV_SUB32: {
- int64_t Value = support::endian::read32le(reinterpret_cast<const void *>(
- FixupAddress.getValue())) -
+ int64_t Value = support::endian::read32le(FixupPtr) -
E.getTarget().getAddress().getValue() - E.getAddend();
*(little32_t *)FixupPtr = static_cast<uint32_t>(Value);
break;
}
case R_RISCV_SUB64: {
- int64_t Value = support::endian::read64le(reinterpret_cast<const void *>(
- FixupAddress.getValue())) -
+ int64_t Value = support::endian::read64le(FixupPtr) -
E.getTarget().getAddress().getValue() - E.getAddend();
*(little64_t *)FixupPtr = static_cast<uint64_t>(Value);
break;
@@ -419,8 +415,7 @@ private:
break;
}
case R_RISCV_SUB6: {
- int64_t Value =
- *(reinterpret_cast<const uint8_t *>(FixupAddress.getValue())) & 0x3f;
+ int64_t Value = *(reinterpret_cast<const uint8_t *>(FixupPtr)) & 0x3f;
Value -= E.getTarget().getAddress().getValue() - E.getAddend();
*FixupPtr = (*FixupPtr & 0xc0) | (static_cast<uint8_t>(Value) & 0x3f);
break;
@@ -458,11 +453,318 @@ private:
*(little32_t *)FixupPtr = Word32;
break;
}
+ case AlignRelaxable:
+ // Ignore when the relaxation pass did not run
+ break;
}
return Error::success();
}
};
+namespace {
+
+struct SymbolAnchor {
+ uint64_t Offset;
+ Symbol *Sym;
+ bool End; // true for the anchor of getOffset() + getSize()
+};
+
+struct BlockRelaxAux {
+ // This records symbol start and end offsets which will be adjusted according
+ // to the nearest RelocDeltas element.
+ SmallVector<SymbolAnchor, 0> Anchors;
+ // All edges that either 1) are R_RISCV_ALIGN or 2) have a R_RISCV_RELAX edge
+ // at the same offset.
+ SmallVector<Edge *, 0> RelaxEdges;
+ // For RelaxEdges[I], the actual offset is RelaxEdges[I]->getOffset() - (I ?
+ // RelocDeltas[I - 1] : 0).
+ SmallVector<uint32_t, 0> RelocDeltas;
+ // For RelaxEdges[I], the actual type is EdgeKinds[I].
+ SmallVector<Edge::Kind, 0> EdgeKinds;
+ // List of rewritten instructions. Contains one raw encoded instruction per
+ // element in EdgeKinds that isn't Invalid or R_RISCV_ALIGN.
+ SmallVector<uint32_t, 0> Writes;
+};
+
+struct RelaxConfig {
+ bool IsRV32;
+ bool HasRVC;
+};
+
+struct RelaxAux {
+ RelaxConfig Config;
+ DenseMap<Block *, BlockRelaxAux> Blocks;
+};
+
+} // namespace
+
+static bool shouldRelax(const Section &S) {
+ return (S.getMemProt() & orc::MemProt::Exec) != orc::MemProt::None;
+}
+
+static bool isRelaxable(const Edge &E) {
+ switch (E.getKind()) {
+ default:
+ return false;
+ case CallRelaxable:
+ case AlignRelaxable:
+ return true;
+ }
+}
+
+static RelaxAux initRelaxAux(LinkGraph &G) {
+ RelaxAux Aux;
+ Aux.Config.IsRV32 = G.getTargetTriple().isRISCV32();
+ const auto &Features = G.getFeatures().getFeatures();
+ Aux.Config.HasRVC =
+ std::find(Features.begin(), Features.end(), "+c") != Features.end();
+
+ for (auto &S : G.sections()) {
+ if (!shouldRelax(S))
+ continue;
+ for (auto *B : S.blocks()) {
+ auto BlockEmplaceResult = Aux.Blocks.try_emplace(B);
+ assert(BlockEmplaceResult.second && "Block encountered twice");
+ auto &BlockAux = BlockEmplaceResult.first->second;
+
+ for (auto &E : B->edges())
+ if (isRelaxable(E))
+ BlockAux.RelaxEdges.push_back(&E);
+
+ if (BlockAux.RelaxEdges.empty()) {
+ Aux.Blocks.erase(BlockEmplaceResult.first);
+ continue;
+ }
+
+ const auto NumEdges = BlockAux.RelaxEdges.size();
+ BlockAux.RelocDeltas.resize(NumEdges, 0);
+ BlockAux.EdgeKinds.resize_for_overwrite(NumEdges);
+
+ // Store anchors (offset and offset+size) for symbols.
+ for (auto *Sym : S.symbols()) {
+ if (!Sym->isDefined() || &Sym->getBlock() != B)
+ continue;
+
+ BlockAux.Anchors.push_back({Sym->getOffset(), Sym, false});
+ BlockAux.Anchors.push_back(
+ {Sym->getOffset() + Sym->getSize(), Sym, true});
+ }
+ }
+ }
+
+ // Sort anchors by offset so that we can find the closest relocation
+ // efficiently. For a zero size symbol, ensure that its start anchor precedes
+ // its end anchor. For two symbols with anchors at the same offset, their
+ // order does not matter.
+ for (auto &BlockAuxIter : Aux.Blocks) {
+ llvm::sort(BlockAuxIter.second.Anchors, [](auto &A, auto &B) {
+ return std::make_pair(A.Offset, A.End) < std::make_pair(B.Offset, B.End);
+ });
+ }
+
+ return Aux;
+}
+
+static void relaxAlign(orc::ExecutorAddr Loc, const Edge &E, uint32_t &Remove,
+ Edge::Kind &NewEdgeKind) {
+ // E points to the start of the padding bytes.
+ // E + Addend points to the instruction to be aligned by removing padding.
+ // Alignment is the smallest power of 2 strictly greater than Addend.
+ const auto Align = NextPowerOf2(E.getAddend());
+ const auto DestLoc = alignTo(Loc.getValue(), Align);
+ const auto SrcLoc = Loc.getValue() + E.getAddend();
+ Remove = SrcLoc - DestLoc;
+ assert(static_cast<int32_t>(Remove) >= 0 &&
+ "R_RISCV_ALIGN needs expanding the content");
+ NewEdgeKind = AlignRelaxable;
+}
+
+static void relaxCall(const Block &B, BlockRelaxAux &Aux,
+ const RelaxConfig &Config, orc::ExecutorAddr Loc,
+ const Edge &E, uint32_t &Remove,
+ Edge::Kind &NewEdgeKind) {
+ const auto JALR =
+ support::endian::read32le(B.getContent().data() + E.getOffset() + 4);
+ const auto RD = extractBits(JALR, 7, 5);
+ const auto Dest = E.getTarget().getAddress() + E.getAddend();
+ const auto Displace = Dest - Loc;
+
+ if (Config.HasRVC && isInt<12>(Displace) && RD == 0) {
+ NewEdgeKind = R_RISCV_RVC_JUMP;
+ Aux.Writes.push_back(0xa001); // c.j
+ Remove = 6;
+ } else if (Config.HasRVC && Config.IsRV32 && isInt<12>(Displace) && RD == 1) {
+ NewEdgeKind = R_RISCV_RVC_JUMP;
+ Aux.Writes.push_back(0x2001); // c.jal
+ Remove = 6;
+ } else if (isInt<21>(Displace)) {
+ NewEdgeKind = R_RISCV_JAL;
+ Aux.Writes.push_back(0x6f | RD << 7); // jal
+ Remove = 4;
+ } else {
+ // Not relaxable
+ NewEdgeKind = R_RISCV_CALL_PLT;
+ Remove = 0;
+ }
+}
+
+static bool relaxBlock(LinkGraph &G, Block &Block, BlockRelaxAux &Aux,
+ const RelaxConfig &Config) {
+ const auto BlockAddr = Block.getAddress();
+ bool Changed = false;
+ ArrayRef<SymbolAnchor> SA = ArrayRef(Aux.Anchors);
+ uint32_t Delta = 0;
+
+ Aux.EdgeKinds.assign(Aux.EdgeKinds.size(), Edge::Invalid);
+ Aux.Writes.clear();
+
+ for (auto [I, E] : llvm::enumerate(Aux.RelaxEdges)) {
+ const auto Loc = BlockAddr + E->getOffset() - Delta;
+ auto &Cur = Aux.RelocDeltas[I];
+ uint32_t Remove = 0;
+ switch (E->getKind()) {
+ case AlignRelaxable:
+ relaxAlign(Loc, *E, Remove, Aux.EdgeKinds[I]);
+ break;
+ case CallRelaxable:
+ relaxCall(Block, Aux, Config, Loc, *E, Remove, Aux.EdgeKinds[I]);
+ break;
+ default:
+ llvm_unreachable("Unexpected relaxable edge kind");
+ }
+
+ // For all anchors whose offsets are <= E->getOffset(), they are preceded by
+ // the previous relocation whose RelocDeltas value equals Delta.
+ // Decrease their offset and update their size.
+ for (; SA.size() && SA[0].Offset <= E->getOffset(); SA = SA.slice(1)) {
+ if (SA[0].End)
+ SA[0].Sym->setSize(SA[0].Offset - Delta - SA[0].Sym->getOffset());
+ else
+ SA[0].Sym->setOffset(SA[0].Offset - Delta);
+ }
+
+ Delta += Remove;
+ if (Delta != Cur) {
+ Cur = Delta;
+ Changed = true;
+ }
+ }
+
+ for (const SymbolAnchor &A : SA) {
+ if (A.End)
+ A.Sym->setSize(A.Offset - Delta - A.Sym->getOffset());
+ else
+ A.Sym->setOffset(A.Offset - Delta);
+ }
+
+ return Changed;
+}
+
+static bool relaxOnce(LinkGraph &G, RelaxAux &Aux) {
+ bool Changed = false;
+
+ for (auto &[B, BlockAux] : Aux.Blocks)
+ Changed |= relaxBlock(G, *B, BlockAux, Aux.Config);
+
+ return Changed;
+}
+
+static void finalizeBlockRelax(LinkGraph &G, Block &Block, BlockRelaxAux &Aux) {
+ auto Contents = Block.getAlreadyMutableContent();
+ auto *Dest = Contents.data();
+ auto NextWrite = Aux.Writes.begin();
+ uint32_t Offset = 0;
+ uint32_t Delta = 0;
+
+ // Update section content: remove NOPs for R_RISCV_ALIGN and rewrite
+ // instructions for relaxed relocations.
+ for (auto [I, E] : llvm::enumerate(Aux.RelaxEdges)) {
+ uint32_t Remove = Aux.RelocDeltas[I] - Delta;
+ Delta = Aux.RelocDeltas[I];
+ if (Remove == 0 && Aux.EdgeKinds[I] == Edge::Invalid)
+ continue;
+
+ // Copy from last location to the current relocated location.
+ const auto Size = E->getOffset() - Offset;
+ std::memmove(Dest, Contents.data() + Offset, Size);
+ Dest += Size;
+
+ uint32_t Skip = 0;
+ switch (Aux.EdgeKinds[I]) {
+ case Edge::Invalid:
+ break;
+ case AlignRelaxable:
+ // For R_RISCV_ALIGN, we will place Offset in a location (among NOPs) to
+ // satisfy the alignment requirement. If both Remove and E->getAddend()
+ // are multiples of 4, it is as if we have skipped some NOPs. Otherwise we
+ // are in the middle of a 4-byte NOP, and we need to rewrite the NOP
+ // sequence.
+ if (Remove % 4 || E->getAddend() % 4) {
+ Skip = E->getAddend() - Remove;
+ uint32_t J = 0;
+ for (; J + 4 <= Skip; J += 4)
+ support::endian::write32le(Dest + J, 0x00000013); // nop
+ if (J != Skip) {
+ assert(J + 2 == Skip);
+ support::endian::write16le(Dest + J, 0x0001); // c.nop
+ }
+ }
+ break;
+ case R_RISCV_RVC_JUMP:
+ Skip = 2;
+ support::endian::write16le(Dest, *NextWrite++);
+ break;
+ case R_RISCV_JAL:
+ Skip = 4;
+ support::endian::write32le(Dest, *NextWrite++);
+ break;
+ }
+
+ Dest += Skip;
+ Offset = E->getOffset() + Skip + Remove;
+ }
+
+ std::memmove(Dest, Contents.data() + Offset, Contents.size() - Offset);
+
+ // Fixup edge offsets and kinds.
+ Delta = 0;
+ size_t I = 0;
+ for (auto &E : Block.edges()) {
+ E.setOffset(E.getOffset() - Delta);
+
+ if (I < Aux.RelaxEdges.size() && Aux.RelaxEdges[I] == &E) {
+ if (Aux.EdgeKinds[I] != Edge::Invalid)
+ E.setKind(Aux.EdgeKinds[I]);
+
+ Delta = Aux.RelocDeltas[I];
+ ++I;
+ }
+ }
+
+ // Remove AlignRelaxable edges: all other relaxable edges got modified and
+ // will be used later while linking. Alignment is entirely handled here so we
+ // don't need these edges anymore.
+ for (auto IE = Block.edges().begin(); IE != Block.edges().end();) {
+ if (IE->getKind() == AlignRelaxable)
+ IE = Block.removeEdge(IE);
+ else
+ ++IE;
+ }
+}
+
+static void finalizeRelax(LinkGraph &G, RelaxAux &Aux) {
+ for (auto &[B, BlockAux] : Aux.Blocks)
+ finalizeBlockRelax(G, *B, BlockAux);
+}
+
+static Error relax(LinkGraph &G) {
+ auto Aux = initRelaxAux(G);
+ while (relaxOnce(G, Aux)) {
+ }
+ finalizeRelax(G, Aux);
+ return Error::success();
+}
+
template <typename ELFT>
class ELFLinkGraphBuilder_riscv : public ELFLinkGraphBuilder<ELFT> {
private:
@@ -528,6 +830,8 @@ private:
return EdgeKind_riscv::R_RISCV_SET32;
case ELF::R_RISCV_32_PCREL:
return EdgeKind_riscv::R_RISCV_32_PCREL;
+ case ELF::R_RISCV_ALIGN:
+ return EdgeKind_riscv::AlignRelaxable;
}
return make_error<JITLinkError>(
@@ -535,6 +839,17 @@ private:
object::getELFRelocationTypeName(ELF::EM_RISCV, Type));
}
+ EdgeKind_riscv getRelaxableRelocationKind(EdgeKind_riscv Kind) {
+ switch (Kind) {
+ default:
+ // Just ignore unsupported relaxations
+ return Kind;
+ case R_RISCV_CALL:
+ case R_RISCV_CALL_PLT:
+ return CallRelaxable;
+ }
+ }
+
Error addRelocations() override {
LLVM_DEBUG(dbgs() << "Processing relocations:\n");
@@ -554,22 +869,17 @@ private:
using Base = ELFLinkGraphBuilder<ELFT>;
uint32_t Type = Rel.getType(false);
- // We do not implement linker relaxation, except what is required for
- // alignment (see below).
- if (Type == llvm::ELF::R_RISCV_RELAX)
- return Error::success();
-
int64_t Addend = Rel.r_addend;
- if (Type == llvm::ELF::R_RISCV_ALIGN) {
- uint64_t Alignment = PowerOf2Ceil(Addend);
- // FIXME: Implement support for ensuring alignment together with linker
- // relaxation; 2 bytes are guaranteed by the length of compressed
- // instructions, so this does not need any action from our side.
- if (Alignment > 2)
- return make_error<JITLinkError>(
- formatv("Unsupported relocation R_RISCV_ALIGN with alignment {0} "
- "larger than 2 (addend: {1})",
- Alignment, Addend));
+
+ if (Type == ELF::R_RISCV_RELAX) {
+ if (BlockToFix.edges_empty())
+ return make_error<StringError>(
+ "R_RISCV_RELAX without preceding relocation",
+ inconvertibleErrorCode());
+
+ auto &PrevEdge = *std::prev(BlockToFix.edges().end());
+ auto Kind = static_cast<EdgeKind_riscv>(PrevEdge.getKind());
+ PrevEdge.setKind(getRelaxableRelocationKind(Kind));
return Error::success();
}
@@ -606,9 +916,10 @@ private:
public:
ELFLinkGraphBuilder_riscv(StringRef FileName,
- const object::ELFFile<ELFT> &Obj, const Triple T)
- : ELFLinkGraphBuilder<ELFT>(Obj, std::move(T), FileName,
- riscv::getEdgeKindName) {}
+ const object::ELFFile<ELFT> &Obj, Triple TT,
+ SubtargetFeatures Features)
+ : ELFLinkGraphBuilder<ELFT>(Obj, std::move(TT), std::move(Features),
+ FileName, riscv::getEdgeKindName) {}
};
Expected<std::unique_ptr<LinkGraph>>
@@ -622,11 +933,15 @@ createLinkGraphFromELFObject_riscv(MemoryBufferRef ObjectBuffer) {
if (!ELFObj)
return ELFObj.takeError();
+ auto Features = (*ELFObj)->getFeatures();
+ if (!Features)
+ return Features.takeError();
+
if ((*ELFObj)->getArch() == Triple::riscv64) {
auto &ELFObjFile = cast<object::ELFObjectFile<object::ELF64LE>>(**ELFObj);
return ELFLinkGraphBuilder_riscv<object::ELF64LE>(
(*ELFObj)->getFileName(), ELFObjFile.getELFFile(),
- (*ELFObj)->makeTriple())
+ (*ELFObj)->makeTriple(), std::move(*Features))
.buildGraph();
} else {
assert((*ELFObj)->getArch() == Triple::riscv32 &&
@@ -634,7 +949,7 @@ createLinkGraphFromELFObject_riscv(MemoryBufferRef ObjectBuffer) {
auto &ELFObjFile = cast<object::ELFObjectFile<object::ELF32LE>>(**ELFObj);
return ELFLinkGraphBuilder_riscv<object::ELF32LE>(
(*ELFObj)->getFileName(), ELFObjFile.getELFFile(),
- (*ELFObj)->makeTriple())
+ (*ELFObj)->makeTriple(), std::move(*Features))
.buildGraph();
}
}
@@ -650,6 +965,7 @@ void link_ELF_riscv(std::unique_ptr<LinkGraph> G,
Config.PrePrunePasses.push_back(markAllSymbolsLive);
Config.PostPrunePasses.push_back(
PerGraphGOTAndPLTStubsBuilder_ELF_riscv::asPass);
+ Config.PostAllocationPasses.push_back(relax);
}
if (auto Err = Ctx->modifyPassConfig(*G, Config))
return Ctx->notifyFailed(std::move(Err));
@@ -657,5 +973,7 @@ void link_ELF_riscv(std::unique_ptr<LinkGraph> G,
ELFJITLinker_riscv::link(std::move(Ctx), std::move(G), std::move(Config));
}
+LinkGraphPassFunction createRelaxationPass_ELF_riscv() { return relax; }
+
} // namespace jitlink
} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
index c9359522c248..1bdddd4c722b 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
@@ -100,60 +100,10 @@ Error buildTables_ELF_x86_64(LinkGraph &G) {
namespace llvm {
namespace jitlink {
-// This should become a template as the ELFFile is so a lot of this could become
-// generic
class ELFLinkGraphBuilder_x86_64 : public ELFLinkGraphBuilder<object::ELF64LE> {
private:
using ELFT = object::ELF64LE;
- enum ELFX86RelocationKind : Edge::Kind {
- Branch32 = Edge::FirstRelocation,
- Pointer32Signed,
- Pointer64,
- PCRel32,
- PCRel32GOTLoad,
- PCRel32GOTLoadRelaxable,
- PCRel32REXGOTLoadRelaxable,
- PCRel32TLV,
- PCRel64GOT,
- GOTOFF64,
- GOT64,
- Delta64,
- };
-
- static Expected<ELFX86RelocationKind> getRelocationKind(const uint32_t Type) {
- switch (Type) {
- case ELF::R_X86_64_32S:
- return ELFX86RelocationKind::Pointer32Signed;
- case ELF::R_X86_64_PC32:
- return ELFX86RelocationKind::PCRel32;
- case ELF::R_X86_64_PC64:
- case ELF::R_X86_64_GOTPC64:
- return ELFX86RelocationKind::Delta64;
- case ELF::R_X86_64_64:
- return ELFX86RelocationKind::Pointer64;
- case ELF::R_X86_64_GOTPCREL:
- return ELFX86RelocationKind::PCRel32GOTLoad;
- case ELF::R_X86_64_GOTPCRELX:
- return ELFX86RelocationKind::PCRel32GOTLoadRelaxable;
- case ELF::R_X86_64_REX_GOTPCRELX:
- return ELFX86RelocationKind::PCRel32REXGOTLoadRelaxable;
- case ELF::R_X86_64_GOTPCREL64:
- return ELFX86RelocationKind::PCRel64GOT;
- case ELF::R_X86_64_GOT64:
- return ELFX86RelocationKind::GOT64;
- case ELF::R_X86_64_GOTOFF64:
- return ELFX86RelocationKind::GOTOFF64;
- case ELF::R_X86_64_PLT32:
- return ELFX86RelocationKind::Branch32;
- case ELF::R_X86_64_TLSGD:
- return ELFX86RelocationKind::PCRel32TLV;
- }
- return make_error<JITLinkError>(
- "Unsupported x86-64 relocation type " + formatv("{0:d}: ", Type) +
- object::getELFRelocationTypeName(ELF::EM_X86_64, Type));
- }
-
Error addRelocations() override {
LLVM_DEBUG(dbgs() << "Processing relocations:\n");
@@ -179,6 +129,12 @@ private:
Block &BlockToFix) {
using Base = ELFLinkGraphBuilder<ELFT>;
+ auto ELFReloc = Rel.getType(false);
+
+ // R_X86_64_NONE is a no-op.
+ if (LLVM_UNLIKELY(ELFReloc == ELF::R_X86_64_NONE))
+ return Error::success();
+
uint32_t SymbolIndex = Rel.getSymbol(false);
auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec);
if (!ObjSymbol)
@@ -194,62 +150,66 @@ private:
inconvertibleErrorCode());
// Validate the relocation kind.
- auto ELFRelocKind = getRelocationKind(Rel.getType(false));
- if (!ELFRelocKind)
- return ELFRelocKind.takeError();
-
int64_t Addend = Rel.r_addend;
Edge::Kind Kind = Edge::Invalid;
- switch (*ELFRelocKind) {
- case PCRel32:
+
+ switch (ELFReloc) {
+ case ELF::R_X86_64_PC32:
+ case ELF::R_X86_64_GOTPC32:
Kind = x86_64::Delta32;
break;
- case Delta64:
+ case ELF::R_X86_64_PC64:
+ case ELF::R_X86_64_GOTPC64:
Kind = x86_64::Delta64;
break;
- case Pointer32Signed:
+ case ELF::R_X86_64_32:
+ Kind = x86_64::Pointer32;
+ break;
+ case ELF::R_X86_64_16:
+ Kind = x86_64::Pointer16;
+ break;
+ case ELF::R_X86_64_8:
+ Kind = x86_64::Pointer8;
+ break;
+ case ELF::R_X86_64_32S:
Kind = x86_64::Pointer32Signed;
break;
- case Pointer64:
+ case ELF::R_X86_64_64:
Kind = x86_64::Pointer64;
break;
- case PCRel32GOTLoad: {
+ case ELF::R_X86_64_GOTPCREL:
Kind = x86_64::RequestGOTAndTransformToDelta32;
break;
- }
- case PCRel32REXGOTLoadRelaxable: {
+ case ELF::R_X86_64_REX_GOTPCRELX:
Kind = x86_64::RequestGOTAndTransformToPCRel32GOTLoadREXRelaxable;
Addend = 0;
break;
- }
- case PCRel32TLV: {
+ case ELF::R_X86_64_TLSGD:
Kind = x86_64::RequestTLSDescInGOTAndTransformToDelta32;
break;
- }
- case PCRel32GOTLoadRelaxable: {
+ case ELF::R_X86_64_GOTPCRELX:
Kind = x86_64::RequestGOTAndTransformToPCRel32GOTLoadRelaxable;
Addend = 0;
break;
- }
- case PCRel64GOT: {
+ case ELF::R_X86_64_GOTPCREL64:
Kind = x86_64::RequestGOTAndTransformToDelta64;
break;
- }
- case GOT64: {
+ case ELF::R_X86_64_GOT64:
Kind = x86_64::RequestGOTAndTransformToDelta64FromGOT;
break;
- }
- case GOTOFF64: {
+ case ELF::R_X86_64_GOTOFF64:
Kind = x86_64::Delta64FromGOT;
break;
- }
- case Branch32: {
+ case ELF::R_X86_64_PLT32:
Kind = x86_64::BranchPCRel32;
// BranchPCRel32 implicitly handles the '-4' PC adjustment, so we have to
// adjust the addend by '+4' to compensate.
Addend += 4;
break;
- }
+ default:
+ return make_error<JITLinkError>(
+ "In " + G->getName() + ": Unsupported x86-64 relocation type " +
+ object::getELFRelocationTypeName(ELF::EM_X86_64, ELFReloc));
}
auto FixupAddress = orc::ExecutorAddr(FixupSection.sh_addr) + Rel.r_offset;
@@ -267,8 +227,10 @@ private:
public:
ELFLinkGraphBuilder_x86_64(StringRef FileName,
- const object::ELFFile<object::ELF64LE> &Obj)
- : ELFLinkGraphBuilder(Obj, Triple("x86_64-unknown-linux"), FileName,
+ const object::ELFFile<object::ELF64LE> &Obj,
+ SubtargetFeatures Features)
+ : ELFLinkGraphBuilder(Obj, Triple("x86_64-unknown-linux"),
+ std::move(Features), FileName,
x86_64::getEdgeKindName) {}
};
@@ -334,6 +296,22 @@ private:
Linkage::Strong, Scope::Local, false, true);
}
+ // If we still haven't found a GOT symbol then double check the externals.
+ // We may have a GOT-relative reference but no GOT section, in which case
+ // we just need to point the GOT symbol at some address in this graph.
+ if (!GOTSymbol) {
+ for (auto *Sym : G.external_symbols()) {
+ if (Sym->getName() == ELFGOTSymbolName) {
+ auto Blocks = G.blocks();
+ if (!Blocks.empty()) {
+ G.makeAbsolute(*Sym, (*Blocks.begin())->getAddress());
+ GOTSymbol = Sym;
+ break;
+ }
+ }
+ }
+ }
+
return Error::success();
}
@@ -353,9 +331,14 @@ createLinkGraphFromELFObject_x86_64(MemoryBufferRef ObjectBuffer) {
if (!ELFObj)
return ELFObj.takeError();
+ auto Features = (*ELFObj)->getFeatures();
+ if (!Features)
+ return Features.takeError();
+
auto &ELFObjFile = cast<object::ELFObjectFile<object::ELF64LE>>(**ELFObj);
return ELFLinkGraphBuilder_x86_64((*ELFObj)->getFileName(),
- ELFObjFile.getELFFile())
+ ELFObjFile.getELFFile(),
+ std::move(*Features))
.buildGraph();
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
index bd5b4d585550..4a2755d3696b 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLink.cpp
@@ -8,6 +8,7 @@
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/ExecutionEngine/JITLink/COFF.h"
#include "llvm/ExecutionEngine/JITLink/ELF.h"
@@ -88,6 +89,21 @@ const char *getScopeName(Scope S) {
llvm_unreachable("Unrecognized llvm.jitlink.Scope enum");
}
+bool isCStringBlock(Block &B) {
+ if (B.getSize() == 0) // Empty blocks are not valid C-strings.
+ return false;
+
+ // Zero-fill blocks of size one are valid empty strings.
+ if (B.isZeroFill())
+ return B.getSize() == 1;
+
+ for (size_t I = 0; I != B.getSize() - 1; ++I)
+ if (B.getContent()[I] == '\0')
+ return false;
+
+ return B.getContent()[B.getSize() - 1] == '\0';
+}
+
raw_ostream &operator<<(raw_ostream &OS, const Block &B) {
return OS << B.getAddress() << " -- " << (B.getAddress() + B.getSize())
<< ": "
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
index 17de84fa6e11..feaa0fb6a58c 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.cpp
@@ -65,7 +65,7 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
if (AR)
Alloc = std::move(*AR);
else
- return Ctx->notifyFailed(AR.takeError());
+ return abandonAllocAndBailOut(std::move(Self), AR.takeError());
LLVM_DEBUG({
dbgs() << "Link graph \"" << G->getName()
@@ -75,13 +75,13 @@ void JITLinkerBase::linkPhase2(std::unique_ptr<JITLinkerBase> Self,
// Run post-allocation passes.
if (auto Err = runPasses(Passes.PostAllocationPasses))
- return Ctx->notifyFailed(std::move(Err));
+ return abandonAllocAndBailOut(std::move(Self), std::move(Err));
// Notify client that the defined symbols have been assigned addresses.
LLVM_DEBUG(dbgs() << "Resolving symbols defined in " << G->getName() << "\n");
if (auto Err = Ctx->notifyResolved(*G))
- return Ctx->notifyFailed(std::move(Err));
+ return abandonAllocAndBailOut(std::move(Self), std::move(Err));
auto ExternalSymbols = getExternalSymbolNames();
@@ -218,8 +218,7 @@ void JITLinkerBase::applyLookupResult(AsyncLookupResult Result) {
assert(!Sym->isDefined() && "Symbol being resolved is already defined");
auto ResultI = Result.find(Sym->getName());
if (ResultI != Result.end()) {
- Sym->getAddressable().setAddress(
- orc::ExecutorAddr(ResultI->second.getAddress()));
+ Sym->getAddressable().setAddress(ResultI->second.getAddress());
Sym->setLinkage(ResultI->second.getFlags().isWeak() ? Linkage::Weak
: Linkage::Strong);
Sym->setScope(ResultI->second.getFlags().isExported() ? Scope::Default
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
index 2c9244526536..e69eddd6e119 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkGeneric.h
@@ -123,26 +123,47 @@ private:
Error fixUpBlocks(LinkGraph &G) const override {
LLVM_DEBUG(dbgs() << "Fixing up blocks:\n");
- for (auto *B : G.blocks()) {
- LLVM_DEBUG(dbgs() << " " << *B << ":\n");
-
- // Copy Block data and apply fixups.
- LLVM_DEBUG(dbgs() << " Applying fixups.\n");
- assert((!B->isZeroFill() || all_of(B->edges(),
- [](const Edge &E) {
- return E.getKind() ==
- Edge::KeepAlive;
- })) &&
- "Non-KeepAlive edges in zero-fill block?");
- for (auto &E : B->edges()) {
-
- // Skip non-relocation edges.
- if (!E.isRelocation())
- continue;
-
- // Dispatch to LinkerImpl for fixup.
- if (auto Err = impl().applyFixup(G, *B, E))
- return Err;
+ for (auto &Sec : G.sections()) {
+ bool NoAllocSection =
+ Sec.getMemLifetimePolicy() == orc::MemLifetimePolicy::NoAlloc;
+
+ for (auto *B : Sec.blocks()) {
+ LLVM_DEBUG(dbgs() << " " << *B << ":\n");
+
+ // Copy Block data and apply fixups.
+ LLVM_DEBUG(dbgs() << " Applying fixups.\n");
+ assert((!B->isZeroFill() || all_of(B->edges(),
+ [](const Edge &E) {
+ return E.getKind() ==
+ Edge::KeepAlive;
+ })) &&
+ "Non-KeepAlive edges in zero-fill block?");
+
+ // If this is a no-alloc section then copy the block content into
+ // memory allocated on the Graph's allocator (if it hasn't been
+ // already).
+ if (NoAllocSection)
+ (void)B->getMutableContent(G);
+
+ for (auto &E : B->edges()) {
+
+ // Skip non-relocation edges.
+ if (!E.isRelocation())
+ continue;
+
+ // If B is a block in a Standard or Finalize section then make sure
+ // that no edges point to symbols in NoAlloc sections.
+ assert(
+ (NoAllocSection || !E.getTarget().isDefined() ||
+ E.getTarget().getBlock().getSection().getMemLifetimePolicy() !=
+ orc::MemLifetimePolicy::NoAlloc) &&
+ "Block in allocated section has edge pointing to no-alloc "
+ "section");
+
+ // Dispatch to LinkerImpl for fixup.
+ if (auto Err = impl().applyFixup(G, *B, E))
+ return Err;
+ }
}
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
index bd44b86f3081..f481504135a5 100644
--- a/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/JITLinkMemoryManager.cpp
@@ -24,11 +24,12 @@ JITLinkMemoryManager::InFlightAlloc::~InFlightAlloc() = default;
BasicLayout::BasicLayout(LinkGraph &G) : G(G) {
for (auto &Sec : G.sections()) {
- // Skip empty sections.
- if (Sec.blocks().empty())
+ // Skip empty sections, and sections with NoAlloc lifetime policies.
+ if (Sec.blocks().empty() ||
+ Sec.getMemLifetimePolicy() == orc::MemLifetimePolicy::NoAlloc)
continue;
- auto &Seg = Segments[{Sec.getMemProt(), Sec.getMemDeallocPolicy()}];
+ auto &Seg = Segments[{Sec.getMemProt(), Sec.getMemLifetimePolicy()}];
for (auto *B : Sec.blocks())
if (LLVM_LIKELY(!B->isZeroFill()))
Seg.ContentBlocks.push_back(B);
@@ -89,7 +90,7 @@ BasicLayout::getContiguousPageBasedLayoutSizes(uint64_t PageSize) {
inconvertibleErrorCode());
uint64_t SegSize = alignTo(Seg.ContentSize + Seg.ZeroFillSize, PageSize);
- if (AG.getMemDeallocPolicy() == orc::MemDeallocPolicy::Standard)
+ if (AG.getMemLifetimePolicy() == orc::MemLifetimePolicy::Standard)
SegsSizes.StandardSegs += SegSize;
else
SegsSizes.FinalizeSegs += SegSize;
@@ -146,7 +147,7 @@ void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr,
const JITLinkDylib *JD, SegmentMap Segments,
OnCreatedFunction OnCreated) {
- static_assert(orc::AllocGroup::NumGroups == 16,
+ static_assert(orc::AllocGroup::NumGroups == 32,
"AllocGroup has changed. Section names below must be updated");
StringRef AGSectionNames[] = {
"__---.standard", "__R--.standard", "__-W-.standard", "__RW-.standard",
@@ -163,12 +164,15 @@ void SimpleSegmentAlloc::Create(JITLinkMemoryManager &MemMgr,
auto &AG = KV.first;
auto &Seg = KV.second;
+ assert(AG.getMemLifetimePolicy() != orc::MemLifetimePolicy::NoAlloc &&
+ "NoAlloc segments are not supported by SimpleSegmentAlloc");
+
auto AGSectionName =
AGSectionNames[static_cast<unsigned>(AG.getMemProt()) |
- static_cast<bool>(AG.getMemDeallocPolicy()) << 3];
+ static_cast<bool>(AG.getMemLifetimePolicy()) << 3];
auto &Sec = G->createSection(AGSectionName, AG.getMemProt());
- Sec.setMemDeallocPolicy(AG.getMemDeallocPolicy());
+ Sec.setMemLifetimePolicy(AG.getMemLifetimePolicy());
if (Seg.ContentSize != 0) {
NextAddr =
@@ -236,10 +240,14 @@ public:
IPInFlightAlloc(InProcessMemoryManager &MemMgr, LinkGraph &G, BasicLayout BL,
sys::MemoryBlock StandardSegments,
sys::MemoryBlock FinalizationSegments)
- : MemMgr(MemMgr), G(G), BL(std::move(BL)),
+ : MemMgr(MemMgr), G(&G), BL(std::move(BL)),
StandardSegments(std::move(StandardSegments)),
FinalizationSegments(std::move(FinalizationSegments)) {}
+ ~IPInFlightAlloc() {
+ assert(!G && "InFlight alloc neither abandoned nor finalized");
+ }
+
void finalize(OnFinalizedFunction OnFinalized) override {
// Apply memory protections to all segments.
@@ -249,7 +257,7 @@ public:
}
// Run finalization actions.
- auto DeallocActions = runFinalizeActions(G.allocActions());
+ auto DeallocActions = runFinalizeActions(G->allocActions());
if (!DeallocActions) {
OnFinalized(DeallocActions.takeError());
return;
@@ -261,6 +269,13 @@ public:
return;
}
+#ifndef NDEBUG
+ // Set 'G' to null to flag that we've been successfully finalized.
+ // This allows us to assert at destruction time that a call has been made
+ // to either finalize or abandon.
+ G = nullptr;
+#endif
+
// Continue with finalized allocation.
OnFinalized(MemMgr.createFinalizedAlloc(std::move(StandardSegments),
std::move(*DeallocActions)));
@@ -272,6 +287,14 @@ public:
Err = joinErrors(std::move(Err), errorCodeToError(EC));
if (auto EC = sys::Memory::releaseMappedMemory(StandardSegments))
Err = joinErrors(std::move(Err), errorCodeToError(EC));
+
+#ifndef NDEBUG
+ // Set 'G' to null to flag that we've been successfully finalized.
+ // This allows us to assert at destruction time that a call has been made
+ // to either finalize or abandon.
+ G = nullptr;
+#endif
+
OnAbandoned(std::move(Err));
}
@@ -295,7 +318,7 @@ private:
}
InProcessMemoryManager &MemMgr;
- LinkGraph &G;
+ LinkGraph *G;
BasicLayout BL;
sys::MemoryBlock StandardSegments;
sys::MemoryBlock FinalizationSegments;
@@ -397,7 +420,7 @@ void InProcessMemoryManager::allocate(const JITLinkDylib *JD, LinkGraph &G,
auto &Seg = KV.second;
auto &SegAddr =
- (AG.getMemDeallocPolicy() == orc::MemDeallocPolicy::Standard)
+ (AG.getMemLifetimePolicy() == orc::MemLifetimePolicy::Standard)
? NextStandardSegAddr
: NextFinalizeSegAddr;
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
index e49480c78662..40086ccf2b66 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO.cpp
@@ -54,7 +54,7 @@ createLinkGraphFromMachOObject(MemoryBufferRef ObjectBuffer) {
uint32_t CPUType;
memcpy(&CPUType, Data.data() + 4, sizeof(uint32_t));
if (Magic == MachO::MH_CIGAM_64)
- CPUType = ByteSwap_32(CPUType);
+ CPUType = llvm::byteswap<uint32_t>(CPUType);
LLVM_DEBUG({
dbgs() << "jitLink_MachO: cputype = " << format("0x%08" PRIx32, CPUType)
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
index 987689993397..c40e0f9ffc8d 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.cpp
@@ -47,12 +47,13 @@ Expected<std::unique_ptr<LinkGraph>> MachOLinkGraphBuilder::buildGraph() {
}
MachOLinkGraphBuilder::MachOLinkGraphBuilder(
- const object::MachOObjectFile &Obj, Triple TT,
+ const object::MachOObjectFile &Obj, Triple TT, SubtargetFeatures Features,
LinkGraph::GetEdgeKindNameFunction GetEdgeKindName)
: Obj(Obj),
- G(std::make_unique<LinkGraph>(
- std::string(Obj.getFileName()), std::move(TT), getPointerSize(Obj),
- getEndianness(Obj), std::move(GetEdgeKindName))) {
+ G(std::make_unique<LinkGraph>(std::string(Obj.getFileName()),
+ std::move(TT), std::move(Features),
+ getPointerSize(Obj), getEndianness(Obj),
+ std::move(GetEdgeKindName))) {
auto &MachHeader = Obj.getHeader64();
SubsectionsViaSymbols = MachHeader.flags & MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
}
@@ -185,10 +186,14 @@ Error MachOLinkGraphBuilder::createNormalizedSections() {
Prot = orc::MemProt::Read | orc::MemProt::Write;
auto FullyQualifiedName =
- G->allocateString(StringRef(NSec.SegName) + "," + NSec.SectName);
+ G->allocateContent(StringRef(NSec.SegName) + "," + NSec.SectName);
NSec.GraphSection = &G->createSection(
StringRef(FullyQualifiedName.data(), FullyQualifiedName.size()), Prot);
+ // TODO: Are there any other criteria for NoAlloc lifetime?
+ if (NSec.Flags & MachO::S_ATTR_DEBUG)
+ NSec.GraphSection->setMemLifetimePolicy(orc::MemLifetimePolicy::NoAlloc);
+
IndexToSection.insert(std::make_pair(SecIndex, std::move(NSec)));
}
@@ -267,7 +272,11 @@ Error MachOLinkGraphBuilder::createNormalizedSymbols() {
Name = *NameOrErr;
else
return NameOrErr.takeError();
- }
+ } else if (Type & MachO::N_EXT)
+ return make_error<JITLinkError>("Symbol at index " +
+ formatv("{0}", SymbolIndex) +
+ " has no name (string table index 0), "
+ "but N_EXT bit is set");
LLVM_DEBUG({
dbgs() << " ";
@@ -656,7 +665,7 @@ Error MachOLinkGraphBuilder::graphifyCStringSection(
orc::ExecutorAddrDiff BlockStart = 0;
// Scan section for null characters.
- for (size_t I = 0; I != NSec.Size; ++I)
+ for (size_t I = 0; I != NSec.Size; ++I) {
if (NSec.Data[I] == '\0') {
size_t BlockSize = I + 1 - BlockStart;
// Create a block for this null terminated string.
@@ -723,6 +732,11 @@ Error MachOLinkGraphBuilder::graphifyCStringSection(
BlockStart += BlockSize;
}
+ }
+
+ assert(llvm::all_of(NSec.GraphSection->blocks(),
+ [](Block *B) { return isCStringBlock(*B); }) &&
+ "All blocks in section should hold single c-strings");
return Error::success();
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
index ba6cfaf8aa94..2805c2960b9b 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
@@ -84,6 +84,7 @@ protected:
using SectionParserFunction = std::function<Error(NormalizedSection &S)>;
MachOLinkGraphBuilder(const object::MachOObjectFile &Obj, Triple TT,
+ SubtargetFeatures Features,
LinkGraph::GetEdgeKindNameFunction GetEdgeKindName);
LinkGraph &getGraph() const { return *G; }
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index 3380bb563140..dd0b5d37d1b7 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -25,9 +25,10 @@ namespace {
class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder {
public:
- MachOLinkGraphBuilder_arm64(const object::MachOObjectFile &Obj)
+ MachOLinkGraphBuilder_arm64(const object::MachOObjectFile &Obj,
+ SubtargetFeatures Features)
: MachOLinkGraphBuilder(Obj, Triple("arm64-apple-darwin"),
- aarch64::getEdgeKindName),
+ std::move(Features), aarch64::getEdgeKindName),
NumSymbols(Obj.getSymtabLoadCommand().nsyms) {}
private:
@@ -541,7 +542,13 @@ createLinkGraphFromMachOObject_arm64(MemoryBufferRef ObjectBuffer) {
auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjectBuffer);
if (!MachOObj)
return MachOObj.takeError();
- return MachOLinkGraphBuilder_arm64(**MachOObj).buildGraph();
+
+ auto Features = (*MachOObj)->getFeatures();
+ if (!Features)
+ return Features.takeError();
+
+ return MachOLinkGraphBuilder_arm64(**MachOObj, std::move(*Features))
+ .buildGraph();
}
void link_MachO_arm64(std::unique_ptr<LinkGraph> G,
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index be40b740a5a7..4dba27bc61cb 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -25,9 +25,10 @@ namespace {
class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder {
public:
- MachOLinkGraphBuilder_x86_64(const object::MachOObjectFile &Obj)
+ MachOLinkGraphBuilder_x86_64(const object::MachOObjectFile &Obj,
+ SubtargetFeatures Features)
: MachOLinkGraphBuilder(Obj, Triple("x86_64-apple-darwin"),
- x86_64::getEdgeKindName) {}
+ std::move(Features), x86_64::getEdgeKindName) {}
private:
enum MachONormalizedRelocationType : unsigned {
@@ -466,7 +467,13 @@ createLinkGraphFromMachOObject_x86_64(MemoryBufferRef ObjectBuffer) {
auto MachOObj = object::ObjectFile::createMachOObjectFile(ObjectBuffer);
if (!MachOObj)
return MachOObj.takeError();
- return MachOLinkGraphBuilder_x86_64(**MachOObj).buildGraph();
+
+ auto Features = (*MachOObj)->getFeatures();
+ if (!Features)
+ return Features.takeError();
+
+ return MachOLinkGraphBuilder_x86_64(**MachOObj, std::move(*Features))
+ .buildGraph();
}
void link_MachO_x86_64(std::unique_ptr<LinkGraph> G,
diff --git a/llvm/lib/ExecutionEngine/JITLink/SEHFrameSupport.h b/llvm/lib/ExecutionEngine/JITLink/SEHFrameSupport.h
index 0d95fbf439b5..21bfd36d44a2 100644
--- a/llvm/lib/ExecutionEngine/JITLink/SEHFrameSupport.h
+++ b/llvm/lib/ExecutionEngine/JITLink/SEHFrameSupport.h
@@ -13,10 +13,10 @@
#ifndef LLVM_EXECUTIONENGINE_JITLINK_SEHFRAMESUPPORT_H
#define LLVM_EXECUTIONENGINE_JITLINK_SEHFRAMESUPPORT_H
-#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/JITLink/JITLink.h"
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/Support/Error.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
namespace jitlink {
diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
new file mode 100644
index 000000000000..ffc3950cdec8
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp
@@ -0,0 +1,519 @@
+//===--------- aarch32.cpp - Generic JITLink arm/thumb utilities ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic utilities for graphs representing arm/thumb objects.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/aarch32.h"
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/ExecutionEngine/JITLink/JITLink.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/MathExtras.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm {
+namespace jitlink {
+namespace aarch32 {
+
+/// Encode 22-bit immediate value for branch instructions without J1J2 range
+/// extension (formats B T4, BL T1 and BLX T2).
+///
+/// 00000:Imm11H:Imm11L:0 -> [ 00000:Imm11H, 00000:Imm11L ]
+/// J1^ ^J2 will always be 1
+///
+HalfWords encodeImmBT4BlT1BlxT2(int64_t Value) {
+ constexpr uint32_t J1J2 = 0x2800;
+ uint32_t Imm11H = (Value >> 12) & 0x07ff;
+ uint32_t Imm11L = (Value >> 1) & 0x07ff;
+ return HalfWords{Imm11H, Imm11L | J1J2};
+}
+
+/// Decode 22-bit immediate value for branch instructions without J1J2 range
+/// extension (formats B T4, BL T1 and BLX T2).
+///
+/// [ 00000:Imm11H, 00000:Imm11L ] -> 00000:Imm11H:Imm11L:0
+/// J1^ ^J2 will always be 1
+///
+int64_t decodeImmBT4BlT1BlxT2(uint32_t Hi, uint32_t Lo) {
+ uint32_t Imm11H = Hi & 0x07ff;
+ uint32_t Imm11L = Lo & 0x07ff;
+ return SignExtend64<22>(Imm11H << 12 | Imm11L << 1);
+}
+
+/// Encode 25-bit immediate value for branch instructions with J1J2 range
+/// extension (formats B T4, BL T1 and BLX T2).
+///
+/// S:I1:I2:Imm10:Imm11:0 -> [ 00000:S:Imm10, 00:J1:0:J2:Imm11 ]
+///
+HalfWords encodeImmBT4BlT1BlxT2_J1J2(int64_t Value) {
+ uint32_t S = (Value >> 14) & 0x0400;
+ uint32_t J1 = (((~(Value >> 10)) ^ (Value >> 11)) & 0x2000);
+ uint32_t J2 = (((~(Value >> 11)) ^ (Value >> 13)) & 0x0800);
+ uint32_t Imm10 = (Value >> 12) & 0x03ff;
+ uint32_t Imm11 = (Value >> 1) & 0x07ff;
+ return HalfWords{S | Imm10, J1 | J2 | Imm11};
+}
+
+/// Decode 25-bit immediate value for branch instructions with J1J2 range
+/// extension (formats B T4, BL T1 and BLX T2).
+///
+/// [ 00000:S:Imm10, 00:J1:0:J2:Imm11] -> S:I1:I2:Imm10:Imm11:0
+///
+int64_t decodeImmBT4BlT1BlxT2_J1J2(uint32_t Hi, uint32_t Lo) {
+ uint32_t S = Hi & 0x0400;
+ uint32_t I1 = ~((Lo ^ (Hi << 3)) << 10) & 0x00800000;
+ uint32_t I2 = ~((Lo ^ (Hi << 1)) << 11) & 0x00400000;
+ uint32_t Imm10 = Hi & 0x03ff;
+ uint32_t Imm11 = Lo & 0x07ff;
+ return SignExtend64<25>(S << 14 | I1 | I2 | Imm10 << 12 | Imm11 << 1);
+}
+
+/// Encode 16-bit immediate value for move instruction formats MOVT T1 and
+/// MOVW T3.
+///
+/// Imm4:Imm1:Imm3:Imm8 -> [ 00000:i:000000:Imm4, 0:Imm3:0000:Imm8 ]
+///
+HalfWords encodeImmMovtT1MovwT3(uint16_t Value) {
+ uint32_t Imm4 = (Value >> 12) & 0x0f;
+ uint32_t Imm1 = (Value >> 11) & 0x01;
+ uint32_t Imm3 = (Value >> 8) & 0x07;
+ uint32_t Imm8 = Value & 0xff;
+ return HalfWords{Imm1 << 10 | Imm4, Imm3 << 12 | Imm8};
+}
+
+/// Decode 16-bit immediate value from move instruction formats MOVT T1 and
+/// MOVW T3.
+///
+/// [ 00000:i:000000:Imm4, 0:Imm3:0000:Imm8 ] -> Imm4:Imm1:Imm3:Imm8
+///
+uint16_t decodeImmMovtT1MovwT3(uint32_t Hi, uint32_t Lo) {
+ uint32_t Imm4 = Hi & 0x0f;
+ uint32_t Imm1 = (Hi >> 10) & 0x01;
+ uint32_t Imm3 = (Lo >> 12) & 0x07;
+ uint32_t Imm8 = Lo & 0xff;
+ uint32_t Imm16 = Imm4 << 12 | Imm1 << 11 | Imm3 << 8 | Imm8;
+ assert(Imm16 <= 0xffff && "Decoded value out-of-range");
+ return Imm16;
+}
+
+/// Encode register ID for instruction formats MOVT T1 and MOVW T3.
+///
+/// Rd4 -> [0000000000000000, 0000:Rd4:00000000]
+///
+HalfWords encodeRegMovtT1MovwT3(int64_t Value) {
+ uint32_t Rd4 = (Value & 0x0f) << 8;
+ return HalfWords{0, Rd4};
+}
+
+/// Decode register ID from instruction formats MOVT T1 and MOVW T3.
+///
+/// [0000000000000000, 0000:Rd4:00000000] -> Rd4
+///
+int64_t decodeRegMovtT1MovwT3(uint32_t Hi, uint32_t Lo) {
+ uint32_t Rd4 = (Lo >> 8) & 0x0f;
+ return Rd4;
+}
+
+/// 32-bit Thumb instructions are stored as two little-endian halfwords.
+/// An instruction at address A encodes bytes A+1, A in the first halfword (Hi),
+/// followed by bytes A+3, A+2 in the second halfword (Lo).
+struct WritableThumbRelocation {
+ /// Create a writable reference to a Thumb32 fixup.
+ WritableThumbRelocation(char *FixupPtr)
+ : Hi{*reinterpret_cast<support::ulittle16_t *>(FixupPtr)},
+ Lo{*reinterpret_cast<support::ulittle16_t *>(FixupPtr + 2)} {}
+
+ support::ulittle16_t &Hi; // First halfword
+ support::ulittle16_t &Lo; // Second halfword
+};
+
+struct ThumbRelocation {
+ /// Create a read-only reference to a Thumb32 fixup.
+ ThumbRelocation(const char *FixupPtr)
+ : Hi{*reinterpret_cast<const support::ulittle16_t *>(FixupPtr)},
+ Lo{*reinterpret_cast<const support::ulittle16_t *>(FixupPtr + 2)} {}
+
+ /// Create a read-only Thumb32 fixup from a writeable one.
+ ThumbRelocation(WritableThumbRelocation &Writable)
+ : Hi{Writable.Hi}, Lo(Writable.Lo) {}
+
+ const support::ulittle16_t &Hi; // First halfword
+ const support::ulittle16_t &Lo; // Second halfword
+};
+
+Error makeUnexpectedOpcodeError(const LinkGraph &G, const ThumbRelocation &R,
+ Edge::Kind Kind) {
+ return make_error<JITLinkError>(
+ formatv("Invalid opcode [ 0x{0:x4}, 0x{1:x4} ] for relocation: {2}",
+ static_cast<uint16_t>(R.Hi), static_cast<uint16_t>(R.Lo),
+ G.getEdgeKindName(Kind)));
+}
+
+template <EdgeKind_aarch32 Kind> bool checkOpcode(const ThumbRelocation &R) {
+ uint16_t Hi = R.Hi & FixupInfo<Kind>::OpcodeMask.Hi;
+ uint16_t Lo = R.Lo & FixupInfo<Kind>::OpcodeMask.Lo;
+ return Hi == FixupInfo<Kind>::Opcode.Hi && Lo == FixupInfo<Kind>::Opcode.Lo;
+}
+
+template <EdgeKind_aarch32 Kind>
+bool checkRegister(const ThumbRelocation &R, HalfWords Reg) {
+ uint16_t Hi = R.Hi & FixupInfo<Kind>::RegMask.Hi;
+ uint16_t Lo = R.Lo & FixupInfo<Kind>::RegMask.Lo;
+ return Hi == Reg.Hi && Lo == Reg.Lo;
+}
+
+template <EdgeKind_aarch32 Kind>
+bool writeRegister(WritableThumbRelocation &R, HalfWords Reg) {
+ static constexpr HalfWords Mask = FixupInfo<Kind>::RegMask;
+ assert((Mask.Hi & Reg.Hi) == Reg.Hi && (Mask.Hi & Reg.Hi) == Reg.Hi &&
+ "Value bits exceed bit range of given mask");
+ R.Hi = (R.Hi & ~Mask.Hi) | Reg.Hi;
+ R.Lo = (R.Lo & ~Mask.Lo) | Reg.Lo;
+}
+
+template <EdgeKind_aarch32 Kind>
+void writeImmediate(WritableThumbRelocation &R, HalfWords Imm) {
+ static constexpr HalfWords Mask = FixupInfo<Kind>::ImmMask;
+ assert((Mask.Hi & Imm.Hi) == Imm.Hi && (Mask.Hi & Imm.Hi) == Imm.Hi &&
+ "Value bits exceed bit range of given mask");
+ R.Hi = (R.Hi & ~Mask.Hi) | Imm.Hi;
+ R.Lo = (R.Lo & ~Mask.Lo) | Imm.Lo;
+}
+
+Expected<int64_t> readAddendData(LinkGraph &G, Block &B, const Edge &E) {
+ support::endianness Endian = G.getEndianness();
+ assert(Endian != support::native && "Declare as little or big explicitly");
+
+ Edge::Kind Kind = E.getKind();
+ const char *BlockWorkingMem = B.getContent().data();
+ const char *FixupPtr = BlockWorkingMem + E.getOffset();
+
+ switch (Kind) {
+ case Data_Delta32:
+ case Data_Pointer32:
+ return SignExtend64<32>(support::endian::read32(FixupPtr, Endian));
+ default:
+ return make_error<JITLinkError>(
+ "In graph " + G.getName() + ", section " + B.getSection().getName() +
+ " can not read implicit addend for aarch32 edge kind " +
+ G.getEdgeKindName(E.getKind()));
+ }
+}
+
+Expected<int64_t> readAddendArm(LinkGraph &G, Block &B, const Edge &E) {
+ Edge::Kind Kind = E.getKind();
+
+ switch (Kind) {
+ case Arm_Call:
+ return make_error<JITLinkError>(
+ "Addend extraction for relocation type not yet implemented: " +
+ StringRef(G.getEdgeKindName(Kind)));
+ default:
+ return make_error<JITLinkError>(
+ "In graph " + G.getName() + ", section " + B.getSection().getName() +
+ " can not read implicit addend for aarch32 edge kind " +
+ G.getEdgeKindName(E.getKind()));
+ }
+}
+
+Expected<int64_t> readAddendThumb(LinkGraph &G, Block &B, const Edge &E,
+ const ArmConfig &ArmCfg) {
+ ThumbRelocation R(B.getContent().data() + E.getOffset());
+ Edge::Kind Kind = E.getKind();
+
+ switch (Kind) {
+ case Thumb_Call:
+ if (!checkOpcode<Thumb_Call>(R))
+ return makeUnexpectedOpcodeError(G, R, Kind);
+ return LLVM_LIKELY(ArmCfg.J1J2BranchEncoding)
+ ? decodeImmBT4BlT1BlxT2_J1J2(R.Hi, R.Lo)
+ : decodeImmBT4BlT1BlxT2(R.Hi, R.Lo);
+
+ case Thumb_Jump24:
+ if (!checkOpcode<Thumb_Jump24>(R))
+ return makeUnexpectedOpcodeError(G, R, Kind);
+ if (R.Lo & FixupInfo<Thumb_Jump24>::LoBitConditional)
+ return make_error<JITLinkError>("Relocation expects an unconditional "
+ "B.W branch instruction: " +
+ StringRef(G.getEdgeKindName(Kind)));
+ return LLVM_LIKELY(ArmCfg.J1J2BranchEncoding)
+ ? decodeImmBT4BlT1BlxT2_J1J2(R.Hi, R.Lo)
+ : decodeImmBT4BlT1BlxT2(R.Hi, R.Lo);
+
+ case Thumb_MovwAbsNC:
+ if (!checkOpcode<Thumb_MovwAbsNC>(R))
+ return makeUnexpectedOpcodeError(G, R, Kind);
+ // Initial addend is interpreted as a signed value
+ return SignExtend64<16>(decodeImmMovtT1MovwT3(R.Hi, R.Lo));
+
+ case Thumb_MovtAbs:
+ if (!checkOpcode<Thumb_MovtAbs>(R))
+ return makeUnexpectedOpcodeError(G, R, Kind);
+ // Initial addend is interpreted as a signed value
+ return SignExtend64<16>(decodeImmMovtT1MovwT3(R.Hi, R.Lo));
+
+ default:
+ return make_error<JITLinkError>(
+ "In graph " + G.getName() + ", section " + B.getSection().getName() +
+ " can not read implicit addend for aarch32 edge kind " +
+ G.getEdgeKindName(E.getKind()));
+ }
+}
+
+Error applyFixupData(LinkGraph &G, Block &B, const Edge &E) {
+ using namespace support;
+
+ char *BlockWorkingMem = B.getAlreadyMutableContent().data();
+ char *FixupPtr = BlockWorkingMem + E.getOffset();
+
+ auto Write32 = [FixupPtr, Endian = G.getEndianness()](int64_t Value) {
+ assert(Endian != native && "Must be explicit: little or big");
+ assert(isInt<32>(Value) && "Must be in signed 32-bit range");
+ uint32_t Imm = static_cast<int32_t>(Value);
+ if (LLVM_LIKELY(Endian == little))
+ endian::write32<little>(FixupPtr, Imm);
+ else
+ endian::write32<big>(FixupPtr, Imm);
+ };
+
+ Edge::Kind Kind = E.getKind();
+ uint64_t FixupAddress = (B.getAddress() + E.getOffset()).getValue();
+ int64_t Addend = E.getAddend();
+ Symbol &TargetSymbol = E.getTarget();
+ uint64_t TargetAddress = TargetSymbol.getAddress().getValue();
+ assert(!TargetSymbol.hasTargetFlags(ThumbSymbol));
+
+ // Regular data relocations have size 4, alignment 1 and write the full 32-bit
+ // result to the place; no need for overflow checking. There are three
+ // exceptions: R_ARM_ABS8, R_ARM_ABS16, R_ARM_PREL31
+ switch (Kind) {
+ case Data_Delta32: {
+ int64_t Value = TargetAddress - FixupAddress + Addend;
+ if (!isInt<32>(Value))
+ return makeTargetOutOfRangeError(G, B, E);
+ Write32(Value);
+ return Error::success();
+ }
+ case Data_Pointer32: {
+ int64_t Value = TargetAddress + Addend;
+ if (!isInt<32>(Value))
+ return makeTargetOutOfRangeError(G, B, E);
+ Write32(Value);
+ return Error::success();
+ }
+ default:
+ return make_error<JITLinkError>(
+ "In graph " + G.getName() + ", section " + B.getSection().getName() +
+ " encountered unfixable aarch32 edge kind " +
+ G.getEdgeKindName(E.getKind()));
+ }
+}
+
+Error applyFixupArm(LinkGraph &G, Block &B, const Edge &E) {
+ Edge::Kind Kind = E.getKind();
+
+ switch (Kind) {
+ case Arm_Call:
+ return make_error<JITLinkError>(
+ "Fix-up for relocation type not yet implemented: " +
+ StringRef(G.getEdgeKindName(Kind)));
+ default:
+ return make_error<JITLinkError>(
+ "In graph " + G.getName() + ", section " + B.getSection().getName() +
+ " encountered unfixable aarch32 edge kind " +
+ G.getEdgeKindName(E.getKind()));
+ }
+}
+
+Error applyFixupThumb(LinkGraph &G, Block &B, const Edge &E,
+ const ArmConfig &ArmCfg) {
+ WritableThumbRelocation R(B.getAlreadyMutableContent().data() +
+ E.getOffset());
+
+ Edge::Kind Kind = E.getKind();
+ uint64_t FixupAddress = (B.getAddress() + E.getOffset()).getValue();
+ int64_t Addend = E.getAddend();
+ Symbol &TargetSymbol = E.getTarget();
+ uint64_t TargetAddress = TargetSymbol.getAddress().getValue();
+ if (TargetSymbol.hasTargetFlags(ThumbSymbol))
+ TargetAddress |= 0x01;
+
+ switch (Kind) {
+ case Thumb_Jump24: {
+ if (!checkOpcode<Thumb_Jump24>(R))
+ return makeUnexpectedOpcodeError(G, R, Kind);
+ if (R.Lo & FixupInfo<Thumb_Jump24>::LoBitConditional)
+ return make_error<JITLinkError>("Relocation expects an unconditional "
+ "B.W branch instruction: " +
+ StringRef(G.getEdgeKindName(Kind)));
+ if (!(TargetSymbol.hasTargetFlags(ThumbSymbol)))
+ return make_error<JITLinkError>("Branch relocation needs interworking "
+ "stub when bridging to ARM: " +
+ StringRef(G.getEdgeKindName(Kind)));
+
+ int64_t Value = TargetAddress - FixupAddress + Addend;
+ if (LLVM_LIKELY(ArmCfg.J1J2BranchEncoding)) {
+ if (!isInt<25>(Value))
+ return makeTargetOutOfRangeError(G, B, E);
+ writeImmediate<Thumb_Jump24>(R, encodeImmBT4BlT1BlxT2_J1J2(Value));
+ } else {
+ if (!isInt<22>(Value))
+ return makeTargetOutOfRangeError(G, B, E);
+ writeImmediate<Thumb_Jump24>(R, encodeImmBT4BlT1BlxT2(Value));
+ }
+
+ return Error::success();
+ }
+
+ case Thumb_Call: {
+ if (!checkOpcode<Thumb_Call>(R))
+ return makeUnexpectedOpcodeError(G, R, Kind);
+
+ int64_t Value = TargetAddress - FixupAddress + Addend;
+
+ // The call instruction itself is Thumb. The call destination can either be
+ // Thumb or Arm. We use BL to stay in Thumb and BLX to change to Arm.
+ bool TargetIsArm = !TargetSymbol.hasTargetFlags(ThumbSymbol);
+ bool InstrIsBlx = (R.Lo & FixupInfo<Thumb_Call>::LoBitNoBlx) == 0;
+ if (TargetIsArm != InstrIsBlx) {
+ if (LLVM_LIKELY(TargetIsArm)) {
+ // Change opcode BL -> BLX and fix range value (account for 4-byte
+ // aligned destination while instruction may only be 2-byte aligned
+ // and clear Thumb bit).
+ R.Lo = R.Lo & ~FixupInfo<Thumb_Call>::LoBitNoBlx;
+ R.Lo = R.Lo & ~FixupInfo<Thumb_Call>::LoBitH;
+ Value = alignTo(Value, 4);
+ } else {
+ // Change opcode BLX -> BL and set Thumb bit
+ R.Lo = R.Lo & ~FixupInfo<Thumb_Call>::LoBitNoBlx;
+ Value |= 0x01;
+ }
+ }
+
+ if (LLVM_LIKELY(ArmCfg.J1J2BranchEncoding)) {
+ if (!isInt<25>(Value))
+ return makeTargetOutOfRangeError(G, B, E);
+ writeImmediate<Thumb_Call>(R, encodeImmBT4BlT1BlxT2_J1J2(Value));
+ } else {
+ if (!isInt<22>(Value))
+ return makeTargetOutOfRangeError(G, B, E);
+ writeImmediate<Thumb_Call>(R, encodeImmBT4BlT1BlxT2(Value));
+ }
+
+ assert(((R.Lo & FixupInfo<Thumb_Call>::LoBitNoBlx) ||
+ (R.Lo & FixupInfo<Thumb_Call>::LoBitH) == 0) &&
+ "Opcode BLX implies H bit is clear (avoid UB in BLX T2)");
+ return Error::success();
+ }
+
+ case Thumb_MovwAbsNC: {
+ if (!checkOpcode<Thumb_MovwAbsNC>(R))
+ return makeUnexpectedOpcodeError(G, R, Kind);
+ uint16_t Value = (TargetAddress + Addend) & 0xffff;
+ writeImmediate<Thumb_MovwAbsNC>(R, encodeImmMovtT1MovwT3(Value));
+ return Error::success();
+ }
+
+ case Thumb_MovtAbs: {
+ if (!checkOpcode<Thumb_MovtAbs>(R))
+ return makeUnexpectedOpcodeError(G, R, Kind);
+ uint16_t Value = ((TargetAddress + Addend) >> 16) & 0xffff;
+ writeImmediate<Thumb_MovtAbs>(R, encodeImmMovtT1MovwT3(Value));
+ return Error::success();
+ }
+
+ default:
+ return make_error<JITLinkError>(
+ "In graph " + G.getName() + ", section " + B.getSection().getName() +
+ " encountered unfixable aarch32 edge kind " +
+ G.getEdgeKindName(E.getKind()));
+ }
+}
+
+const uint8_t Thumbv7ABS[] = {
+ 0x40, 0xf2, 0x00, 0x0c, // movw r12, #0x0000 ; lower 16-bit
+ 0xc0, 0xf2, 0x00, 0x0c, // movt r12, #0x0000 ; upper 16-bit
+ 0x60, 0x47 // bx r12
+};
+
+template <>
+Symbol &StubsManager<Thumbv7>::createEntry(LinkGraph &G, Symbol &Target) {
+ constexpr uint64_t Alignment = 4;
+ Block &B = addStub(G, Thumbv7ABS, Alignment);
+ LLVM_DEBUG({
+ const char *StubPtr = B.getContent().data();
+ HalfWords Reg12 = encodeRegMovtT1MovwT3(12);
+ assert(checkRegister<Thumb_MovwAbsNC>(StubPtr, Reg12) &&
+ checkRegister<Thumb_MovtAbs>(StubPtr + 4, Reg12) &&
+ "Linker generated stubs may only corrupt register r12 (IP)");
+ });
+ B.addEdge(Thumb_MovwAbsNC, 0, Target, 0);
+ B.addEdge(Thumb_MovtAbs, 4, Target, 0);
+ Symbol &Stub = G.addAnonymousSymbol(B, 0, B.getSize(), true, false);
+ Stub.setTargetFlags(ThumbSymbol);
+ return Stub;
+}
+
+const char *getEdgeKindName(Edge::Kind K) {
+#define KIND_NAME_CASE(K) \
+ case K: \
+ return #K;
+
+ switch (K) {
+ KIND_NAME_CASE(Data_Delta32)
+ KIND_NAME_CASE(Arm_Call)
+ KIND_NAME_CASE(Thumb_Call)
+ KIND_NAME_CASE(Thumb_Jump24)
+ KIND_NAME_CASE(Thumb_MovwAbsNC)
+ KIND_NAME_CASE(Thumb_MovtAbs)
+ default:
+ return getGenericEdgeKindName(K);
+ }
+#undef KIND_NAME_CASE
+}
+
+const char *getCPUArchName(ARMBuildAttrs::CPUArch K) {
+#define CPUARCH_NAME_CASE(K) \
+ case K: \
+ return #K;
+
+ using namespace ARMBuildAttrs;
+ switch (K) {
+ CPUARCH_NAME_CASE(Pre_v4)
+ CPUARCH_NAME_CASE(v4)
+ CPUARCH_NAME_CASE(v4T)
+ CPUARCH_NAME_CASE(v5T)
+ CPUARCH_NAME_CASE(v5TE)
+ CPUARCH_NAME_CASE(v5TEJ)
+ CPUARCH_NAME_CASE(v6)
+ CPUARCH_NAME_CASE(v6KZ)
+ CPUARCH_NAME_CASE(v6T2)
+ CPUARCH_NAME_CASE(v6K)
+ CPUARCH_NAME_CASE(v7)
+ CPUARCH_NAME_CASE(v6_M)
+ CPUARCH_NAME_CASE(v6S_M)
+ CPUARCH_NAME_CASE(v7E_M)
+ CPUARCH_NAME_CASE(v8_A)
+ CPUARCH_NAME_CASE(v8_R)
+ CPUARCH_NAME_CASE(v8_M_Base)
+ CPUARCH_NAME_CASE(v8_M_Main)
+ CPUARCH_NAME_CASE(v8_1_M_Main)
+ CPUARCH_NAME_CASE(v9_A)
+ }
+ llvm_unreachable("Missing CPUArch in switch?");
+#undef CPUARCH_NAME_CASE
+}
+
+} // namespace aarch32
+} // namespace jitlink
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp
index 1011fa81f750..cc58255a338d 100644
--- a/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/aarch64.cpp
@@ -47,6 +47,12 @@ const char *getEdgeKindName(Edge::Kind R) {
return "MoveWide16";
case LDRLiteral19:
return "LDRLiteral19";
+ case TestAndBranch14PCRel:
+ return "TestAndBranch14PCRel";
+ case CondBranch19PCRel:
+ return "CondBranch19PCRel";
+ case ADRLiteral21:
+ return "ADRLiteral21";
case Page21:
return "Page21";
case PageOffset12:
diff --git a/llvm/lib/ExecutionEngine/JITLink/i386.cpp b/llvm/lib/ExecutionEngine/JITLink/i386.cpp
index c2c5761cd272..e984bb10983d 100644
--- a/llvm/lib/ExecutionEngine/JITLink/i386.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/i386.cpp
@@ -34,10 +34,58 @@ const char *getEdgeKindName(Edge::Kind K) {
return "Delta32FromGOT";
case RequestGOTAndTransformToDelta32FromGOT:
return "RequestGOTAndTransformToDelta32FromGOT";
+ case BranchPCRel32:
+ return "BranchPCRel32";
+ case BranchPCRel32ToPtrJumpStub:
+ return "BranchPCRel32ToPtrJumpStub";
+ case BranchPCRel32ToPtrJumpStubBypassable:
+ return "BranchPCRel32ToPtrJumpStubBypassable";
}
return getGenericEdgeKindName(K);
}
const char NullPointerContent[PointerSize] = {0x00, 0x00, 0x00, 0x00};
+
+const char PointerJumpStubContent[6] = {
+ static_cast<char>(0xFFu), 0x25, 0x00, 0x00, 0x00, 0x00};
+
+Error optimizeGOTAndStubAccesses(LinkGraph &G) {
+ LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n");
+
+ for (auto *B : G.blocks())
+ for (auto &E : B->edges()) {
+ if (E.getKind() == i386::BranchPCRel32ToPtrJumpStubBypassable) {
+ auto &StubBlock = E.getTarget().getBlock();
+ assert(StubBlock.getSize() == sizeof(PointerJumpStubContent) &&
+ "Stub block should be stub sized");
+ assert(StubBlock.edges_size() == 1 &&
+ "Stub block should only have one outgoing edge");
+
+ auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock();
+ assert(GOTBlock.getSize() == G.getPointerSize() &&
+ "GOT block should be pointer sized");
+ assert(GOTBlock.edges_size() == 1 &&
+ "GOT block should only have one outgoing edge");
+
+ auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
+ orc::ExecutorAddr EdgeAddr = B->getAddress() + E.getOffset();
+ orc::ExecutorAddr TargetAddr = GOTTarget.getAddress();
+
+ int64_t Displacement = TargetAddr - EdgeAddr + 4;
+ if (isInt<32>(Displacement)) {
+ E.setKind(i386::BranchPCRel32);
+ E.setTarget(GOTTarget);
+ LLVM_DEBUG({
+ dbgs() << " Replaced stub branch with direct branch:\n ";
+ printEdge(dbgs(), *B, E, getEdgeKindName(E.getKind()));
+ dbgs() << "\n";
+ });
+ }
+ }
+ }
+
+ return Error::success();
+}
+
} // namespace llvm::jitlink::i386
diff --git a/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp b/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp
new file mode 100644
index 000000000000..4e21eace21d0
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/JITLink/ppc64.cpp
@@ -0,0 +1,102 @@
+//===----- ppc64.cpp - Generic JITLink ppc64 edge kinds, utilities ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Generic utilities for graphs representing 64-bit PowerPC objects.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITLink/ppc64.h"
+
+#define DEBUG_TYPE "jitlink"
+
+namespace llvm::jitlink::ppc64 {
+
+const char NullPointerContent[8] = {0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00};
+
+const char PointerJumpStubContent_little[20] = {
+ 0x18, 0x00, 0x41, (char)0xf8, // std r2, 24(r1)
+ 0x00, 0x00, (char)0x82, 0x3d, // addis r12, r2, OffHa
+ 0x00, 0x00, (char)0x8c, (char)0xe9, // ld r12, OffLo(r12)
+ (char)0xa6, 0x03, (char)0x89, 0x7d, // mtctr r12
+ 0x20, 0x04, (char)0x80, 0x4e, // bctr
+};
+
+const char PointerJumpStubContent_big[20] = {
+ (char)0xf8, 0x41, 0x00, 0x18, // std r2, 24(r1)
+ 0x3d, (char)0x82, 0x00, 0x00, // addis r12, r2, OffHa
+ (char)0xe9, (char)0x8c, 0x00, 0x00, // ld r12, OffLo(r12)
+ 0x7d, (char)0x89, 0x03, (char)0xa6, // mtctr r12
+ 0x4e, (char)0x80, 0x04, 0x20, // bctr
+};
+
+// TODO: We can use prefixed instructions if LLJIT is running on power10.
+const char PointerJumpStubNoTOCContent_little[32] = {
+ (char)0xa6, 0x02, (char)0x88, 0x7d, // mflr 12
+ 0x05, (char)0x00, (char)0x9f, 0x42, // bcl 20,31,.+4
+ (char)0xa6, 0x02, 0x68, 0x7d, // mflr 11
+ (char)0xa6, 0x03, (char)0x88, 0x7d, // mtlr 12
+ 0x00, 0x00, (char)0x8b, 0x3d, // addis 12,11,OffHa
+ 0x00, 0x00, (char)0x8c, (char)0xe9, // ld 12, OffLo(12)
+ (char)0xa6, 0x03, (char)0x89, 0x7d, // mtctr 12
+ 0x20, 0x04, (char)0x80, 0x4e, // bctr
+};
+
+const char PointerJumpStubNoTOCContent_big[32] = {
+ 0x7d, (char)0x88, 0x02, (char)0xa6, // mflr 12
+ 0x42, (char)0x9f, 0x00, 0x05, // bcl 20,31,.+4
+ 0x7d, 0x68, 0x02, (char)0xa6, // mflr 11
+ 0x7d, (char)0x88, 0x03, (char)0xa6, // mtlr 12
+ 0x3d, (char)0x8b, 0x00, 0x00, // addis 12,11,OffHa
+ (char)0xe9, (char)0x8c, 0x00, 0x00, // ld 12, OffLo(12)
+ 0x7d, (char)0x89, 0x03, (char)0xa6, // mtctr 12
+ 0x4e, (char)0x80, 0x04, 0x20, // bctr
+};
+
+const char *getEdgeKindName(Edge::Kind K) {
+ switch (K) {
+ case Pointer64:
+ return "Pointer64";
+ case Pointer32:
+ return "Pointer32";
+ case Delta64:
+ return "Delta64";
+ case Delta32:
+ return "Delta32";
+ case NegDelta32:
+ return "NegDelta32";
+ case Delta16:
+ return "Delta16";
+ case Delta16HA:
+ return "Delta16HA";
+ case Delta16LO:
+ return "Delta16LO";
+ case TOCDelta16HA:
+ return "TOCDelta16HA";
+ case TOCDelta16LO:
+ return "TOCDelta16LO";
+ case TOCDelta16DS:
+ return "TOCDelta16DS";
+ case TOCDelta16LODS:
+ return "TOCDelta16LODS";
+ case CallBranchDelta:
+ return "CallBranchDelta";
+ case CallBranchDeltaRestoreTOC:
+ return "CallBranchDeltaRestoreTOC";
+ case RequestPLTCallStub:
+ return "RequestPLTCallStub";
+ case RequestPLTCallStubSaveTOC:
+ return "RequestPLTCallStubSaveTOC";
+ case RequestPLTCallStubNoTOC:
+ return "RequestPLTCallStubNoTOC";
+ default:
+ return getGenericEdgeKindName(static_cast<Edge::Kind>(K));
+ }
+}
+
+} // end namespace llvm::jitlink::ppc64
diff --git a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
index 6ee92b065ca1..a78843b16147 100644
--- a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
@@ -78,6 +78,10 @@ const char *getEdgeKindName(Edge::Kind K) {
return "R_RISCV_SET32";
case R_RISCV_32_PCREL:
return "R_RISCV_32_PCREL";
+ case CallRelaxable:
+ return "CallRelaxable";
+ case AlignRelaxable:
+ return "AlignRelaxable";
}
return getGenericEdgeKindName(K);
}
diff --git a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
index 097e19e02530..273ac7b372a7 100644
--- a/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/x86_64.cpp
@@ -28,6 +28,8 @@ const char *getEdgeKindName(Edge::Kind K) {
return "Pointer32Signed";
case Pointer16:
return "Pointer16";
+ case Pointer8:
+ return "Pointer8";
case Delta64:
return "Delta64";
case Delta32:
@@ -102,8 +104,8 @@ Error optimizeGOTAndStubAccesses(LinkGraph &G) {
orc::ExecutorAddr TargetAddr = GOTTarget.getAddress();
orc::ExecutorAddr EdgeAddr = B->getFixupAddress(E);
int64_t Displacement = TargetAddr - EdgeAddr + 4;
- bool TargetInRangeForImmU32 = isInRangeForImmU32(TargetAddr.getValue());
- bool DisplacementInRangeForImmS32 = isInRangeForImmS32(Displacement);
+ bool TargetInRangeForImmU32 = isUInt<32>(TargetAddr.getValue());
+ bool DisplacementInRangeForImmS32 = isInt<32>(Displacement);
// If both of the Target and displacement is out of range, then
// there isn't optimization chance.
@@ -173,7 +175,7 @@ Error optimizeGOTAndStubAccesses(LinkGraph &G) {
orc::ExecutorAddr TargetAddr = GOTTarget.getAddress();
int64_t Displacement = TargetAddr - EdgeAddr + 4;
- if (isInRangeForImmS32(Displacement)) {
+ if (isInt<32>(Displacement)) {
E.setKind(x86_64::BranchPCRel32);
E.setTarget(GOTTarget);
LLVM_DEBUG({
diff --git a/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp
index 40716a7f9b61..7c869bead0b0 100644
--- a/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/COFFPlatform.cpp
@@ -10,6 +10,7 @@
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h"
#include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h"
#include "llvm/Object/COFF.h"
@@ -54,8 +55,7 @@ public:
void materialize(std::unique_ptr<MaterializationResponsibility> R) override {
unsigned PointerSize;
support::endianness Endianness;
- const auto &TT =
- CP.getExecutionSession().getExecutorProcessControl().getTargetTriple();
+ const auto &TT = CP.getExecutionSession().getTargetTriple();
switch (TT.getArch()) {
case Triple::x86_64:
@@ -125,8 +125,8 @@ private:
llvm_unreachable("Unrecognized architecture");
}
- auto HeaderContent = G.allocateString(
- StringRef(reinterpret_cast<const char *>(&Hdr), sizeof(Hdr)));
+ auto HeaderContent = G.allocateContent(
+ ArrayRef<char>(reinterpret_cast<const char *>(&Hdr), sizeof(Hdr)));
return G.createContentBlock(HeaderSection, HeaderContent, ExecutorAddr(), 8,
0);
@@ -159,20 +159,36 @@ private:
namespace llvm {
namespace orc {
-Expected<std::unique_ptr<COFFPlatform>>
-COFFPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
- JITDylib &PlatformJD, const char *OrcRuntimePath,
- LoadDynamicLibrary LoadDynLibrary, bool StaticVCRuntime,
- const char *VCRuntimePath,
- std::optional<SymbolAliasMap> RuntimeAliases) {
- auto &EPC = ES.getExecutorProcessControl();
+Expected<std::unique_ptr<COFFPlatform>> COFFPlatform::Create(
+ ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD, std::unique_ptr<MemoryBuffer> OrcRuntimeArchiveBuffer,
+ LoadDynamicLibrary LoadDynLibrary, bool StaticVCRuntime,
+ const char *VCRuntimePath, std::optional<SymbolAliasMap> RuntimeAliases) {
// If the target is not supported then bail out immediately.
- if (!supportedTarget(EPC.getTargetTriple()))
+ if (!supportedTarget(ES.getTargetTriple()))
return make_error<StringError>("Unsupported COFFPlatform triple: " +
- EPC.getTargetTriple().str(),
+ ES.getTargetTriple().str(),
inconvertibleErrorCode());
+ auto &EPC = ES.getExecutorProcessControl();
+
+ auto GeneratorArchive =
+ object::Archive::create(OrcRuntimeArchiveBuffer->getMemBufferRef());
+ if (!GeneratorArchive)
+ return GeneratorArchive.takeError();
+
+ auto OrcRuntimeArchiveGenerator = StaticLibraryDefinitionGenerator::Create(
+ ObjLinkingLayer, nullptr, std::move(*GeneratorArchive));
+ if (!OrcRuntimeArchiveGenerator)
+ return OrcRuntimeArchiveGenerator.takeError();
+
+ // We need a second instance of the archive (for now) for the Platform. We
+ // can `cantFail` this call, since if it were going to fail it would have
+ // failed above.
+ auto RuntimeArchive = cantFail(
+ object::Archive::create(OrcRuntimeArchiveBuffer->getMemBufferRef()));
+
// Create default aliases if the caller didn't supply any.
if (!RuntimeAliases)
RuntimeAliases = standardPlatformAliases(ES);
@@ -184,13 +200,13 @@ COFFPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
auto &HostFuncJD = ES.createBareJITDylib("$<PlatformRuntimeHostFuncJD>");
// Add JIT-dispatch function support symbols.
- if (auto Err = HostFuncJD.define(absoluteSymbols(
- {{ES.intern("__orc_rt_jit_dispatch"),
- {EPC.getJITDispatchInfo().JITDispatchFunction.getValue(),
- JITSymbolFlags::Exported}},
- {ES.intern("__orc_rt_jit_dispatch_ctx"),
- {EPC.getJITDispatchInfo().JITDispatchContext.getValue(),
- JITSymbolFlags::Exported}}})))
+ if (auto Err = HostFuncJD.define(
+ absoluteSymbols({{ES.intern("__orc_rt_jit_dispatch"),
+ {EPC.getJITDispatchInfo().JITDispatchFunction,
+ JITSymbolFlags::Exported}},
+ {ES.intern("__orc_rt_jit_dispatch_ctx"),
+ {EPC.getJITDispatchInfo().JITDispatchContext,
+ JITSymbolFlags::Exported}}})))
return std::move(Err);
PlatformJD.addToLinkOrder(HostFuncJD);
@@ -198,13 +214,30 @@ COFFPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
// Create the instance.
Error Err = Error::success();
auto P = std::unique_ptr<COFFPlatform>(new COFFPlatform(
- ES, ObjLinkingLayer, PlatformJD, OrcRuntimePath,
+ ES, ObjLinkingLayer, PlatformJD, std::move(*OrcRuntimeArchiveGenerator),
+ std::move(OrcRuntimeArchiveBuffer), std::move(RuntimeArchive),
std::move(LoadDynLibrary), StaticVCRuntime, VCRuntimePath, Err));
if (Err)
return std::move(Err);
return std::move(P);
}
+Expected<std::unique_ptr<COFFPlatform>>
+COFFPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD, const char *OrcRuntimePath,
+ LoadDynamicLibrary LoadDynLibrary, bool StaticVCRuntime,
+ const char *VCRuntimePath,
+ std::optional<SymbolAliasMap> RuntimeAliases) {
+
+ auto ArchiveBuffer = MemoryBuffer::getFile(OrcRuntimePath);
+ if (!ArchiveBuffer)
+ return createFileError(OrcRuntimePath, ArchiveBuffer.getError());
+
+ return Create(ES, ObjLinkingLayer, PlatformJD, std::move(*ArchiveBuffer),
+ std::move(LoadDynLibrary), StaticVCRuntime, VCRuntimePath,
+ std::move(RuntimeAliases));
+}
+
Expected<MemoryBufferRef> COFFPlatform::getPerJDObjectFile() {
auto PerJDObj = OrcRuntimeArchive->findSym("__orc_rt_coff_per_jd_marker");
if (!PerJDObj)
@@ -348,37 +381,22 @@ bool COFFPlatform::supportedTarget(const Triple &TT) {
}
}
-COFFPlatform::COFFPlatform(ExecutionSession &ES,
- ObjectLinkingLayer &ObjLinkingLayer,
- JITDylib &PlatformJD, const char *OrcRuntimePath,
- LoadDynamicLibrary LoadDynLibrary,
- bool StaticVCRuntime, const char *VCRuntimePath,
- Error &Err)
+COFFPlatform::COFFPlatform(
+ ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD,
+ std::unique_ptr<StaticLibraryDefinitionGenerator> OrcRuntimeGenerator,
+ std::unique_ptr<MemoryBuffer> OrcRuntimeArchiveBuffer,
+ std::unique_ptr<object::Archive> OrcRuntimeArchive,
+ LoadDynamicLibrary LoadDynLibrary, bool StaticVCRuntime,
+ const char *VCRuntimePath, Error &Err)
: ES(ES), ObjLinkingLayer(ObjLinkingLayer),
LoadDynLibrary(std::move(LoadDynLibrary)),
+ OrcRuntimeArchiveBuffer(std::move(OrcRuntimeArchiveBuffer)),
+ OrcRuntimeArchive(std::move(OrcRuntimeArchive)),
StaticVCRuntime(StaticVCRuntime),
COFFHeaderStartSymbol(ES.intern("__ImageBase")) {
ErrorAsOutParameter _(&Err);
- // Create a generator for the ORC runtime archive.
- auto OrcRuntimeArchiveGenerator =
- StaticLibraryDefinitionGenerator::Load(ObjLinkingLayer, OrcRuntimePath);
- if (!OrcRuntimeArchiveGenerator) {
- Err = OrcRuntimeArchiveGenerator.takeError();
- return;
- }
-
- auto ArchiveBuffer = MemoryBuffer::getFile(OrcRuntimePath);
- if (!ArchiveBuffer) {
- Err = createFileError(OrcRuntimePath, ArchiveBuffer.getError());
- return;
- }
- OrcRuntimeArchiveBuffer = std::move(*ArchiveBuffer);
- OrcRuntimeArchive =
- std::make_unique<object::Archive>(*OrcRuntimeArchiveBuffer, Err);
- if (Err)
- return;
-
Bootstrapping.store(true);
ObjLinkingLayer.addPlugin(std::make_unique<COFFPlatformPlugin>(*this));
@@ -391,7 +409,7 @@ COFFPlatform::COFFPlatform(ExecutionSession &ES,
}
VCRuntimeBootstrap = std::move(*VCRT);
- for (auto &Lib : (*OrcRuntimeArchiveGenerator)->getImportedDynamicLibraries())
+ for (auto &Lib : OrcRuntimeGenerator->getImportedDynamicLibraries())
DylibsToPreload.insert(Lib);
auto ImportedLibs =
@@ -405,7 +423,7 @@ COFFPlatform::COFFPlatform(ExecutionSession &ES,
for (auto &Lib : *ImportedLibs)
DylibsToPreload.insert(Lib);
- PlatformJD.addGenerator(std::move(*OrcRuntimeArchiveGenerator));
+ PlatformJD.addGenerator(std::move(OrcRuntimeGenerator));
// PlatformJD hasn't been set up by the platform yet (since we're creating
// the platform now), so set it up.
@@ -415,10 +433,10 @@ COFFPlatform::COFFPlatform(ExecutionSession &ES,
}
for (auto& Lib : DylibsToPreload)
- if (auto E2 = LoadDynLibrary(PlatformJD, Lib)) {
- Err = std::move(E2);
- return;
- }
+ if (auto E2 = this->LoadDynLibrary(PlatformJD, Lib)) {
+ Err = std::move(E2);
+ return;
+ }
if (StaticVCRuntime)
if (auto E2 = VCRuntimeBootstrap->initializeStaticVCRuntime(PlatformJD)) {
@@ -561,10 +579,9 @@ void COFFPlatform::rt_pushInitializers(PushInitializersSendResultFn SendResult,
});
if (!JD) {
- SendResult(
- make_error<StringError>("No JITDylib with header addr " +
- formatv("{0:x}", JDHeaderAddr.getValue()),
- inconvertibleErrorCode()));
+ SendResult(make_error<StringError>("No JITDylib with header addr " +
+ formatv("{0:x}", JDHeaderAddr),
+ inconvertibleErrorCode()));
return;
}
@@ -579,10 +596,7 @@ void COFFPlatform::rt_pushInitializers(PushInitializersSendResultFn SendResult,
void COFFPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
ExecutorAddr Handle, StringRef SymbolName) {
- LLVM_DEBUG({
- dbgs() << "COFFPlatform::rt_lookupSymbol(\""
- << formatv("{0:x}", Handle.getValue()) << "\")\n";
- });
+ LLVM_DEBUG(dbgs() << "COFFPlatform::rt_lookupSymbol(\"" << Handle << "\")\n");
JITDylib *JD = nullptr;
@@ -594,12 +608,9 @@ void COFFPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
}
if (!JD) {
- LLVM_DEBUG({
- dbgs() << " No JITDylib for handle "
- << formatv("{0:x}", Handle.getValue()) << "\n";
- });
+ LLVM_DEBUG(dbgs() << " No JITDylib for handle " << Handle << "\n");
SendResult(make_error<StringError>("No JITDylib associated with handle " +
- formatv("{0:x}", Handle.getValue()),
+ formatv("{0:x}", Handle),
inconvertibleErrorCode()));
return;
}
@@ -612,7 +623,7 @@ void COFFPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
void operator()(Expected<SymbolMap> Result) {
if (Result) {
assert(Result->size() == 1 && "Unexpected result map count");
- SendResult(ExecutorAddr(Result->begin()->second.getAddress()));
+ SendResult(Result->begin()->second.getAddress());
} else {
SendResult(Result.takeError());
}
@@ -850,7 +861,7 @@ Error COFFPlatform::COFFPlatformPlugin::preserveInitializerSections(
jitlink::LinkGraph &G, MaterializationResponsibility &MR) {
JITLinkSymbolSet InitSectionSymbols;
for (auto &Sec : G.sections())
- if (COFFPlatform::isInitializerSection(Sec.getName()))
+ if (isCOFFInitializerSection(Sec.getName()))
for (auto *B : Sec.blocks())
if (!B->edges_empty())
InitSectionSymbols.insert(
@@ -885,14 +896,13 @@ Error COFFPlatform::COFFPlatformPlugin::
// Collect static initializers
for (auto &S : G.sections())
- if (COFFPlatform::isInitializerSection(S.getName()))
+ if (isCOFFInitializerSection(S.getName()))
for (auto *B : S.blocks()) {
if (B->edges_empty())
continue;
for (auto &E : B->edges())
BState.Initializers.push_back(std::make_pair(
- S.getName().str(),
- ExecutorAddr(E.getTarget().getAddress() + E.getAddend())));
+ S.getName().str(), E.getTarget().getAddress() + E.getAddend()));
}
return Error::success();
diff --git a/llvm/lib/ExecutionEngine/Orc/COFFVCRuntimeSupport.cpp b/llvm/lib/ExecutionEngine/Orc/COFFVCRuntimeSupport.cpp
index d9316fab2de3..94f696fa2086 100644
--- a/llvm/lib/ExecutionEngine/Orc/COFFVCRuntimeSupport.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/COFFVCRuntimeSupport.cpp
@@ -160,7 +160,7 @@ COFFVCRuntimeBootstrapper::getMSVCToolchainPath() {
if (!findVCToolChainViaCommandLine(*VFS, std::nullopt, std::nullopt,
std::nullopt, VCToolChainPath, VSLayout) &&
!findVCToolChainViaEnvironment(*VFS, VCToolChainPath, VSLayout) &&
- !findVCToolChainViaSetupConfig(*VFS, VCToolChainPath, VSLayout) &&
+ !findVCToolChainViaSetupConfig(*VFS, {}, VCToolChainPath, VSLayout) &&
!findVCToolChainViaRegistry(VCToolChainPath, VSLayout))
return make_error<StringError>("Couldn't find msvc toolchain.",
inconvertibleErrorCode());
diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp
index 4a9d0d470a8e..0c23f2b25219 100644
--- a/llvm/lib/ExecutionEngine/Orc/Core.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp
@@ -167,15 +167,16 @@ AsynchronousSymbolQuery::AsynchronousSymbolQuery(
OutstandingSymbolsCount = Symbols.size();
for (auto &KV : Symbols)
- ResolvedSymbols[KV.first] = nullptr;
+ ResolvedSymbols[KV.first] = ExecutorSymbolDef();
}
void AsynchronousSymbolQuery::notifySymbolMetRequiredState(
- const SymbolStringPtr &Name, JITEvaluatedSymbol Sym) {
+ const SymbolStringPtr &Name, ExecutorSymbolDef Sym) {
auto I = ResolvedSymbols.find(Name);
assert(I != ResolvedSymbols.end() &&
"Resolving symbol outside the requested set");
- assert(I->second.getAddress() == 0 && "Redundantly resolving symbol Name");
+ assert(I->second == ExecutorSymbolDef() &&
+ "Redundantly resolving symbol Name");
// If this is a materialization-side-effects-only symbol then drop it,
// otherwise update its map entry with its resolved address.
@@ -447,8 +448,8 @@ void ReExportsMaterializationUnit::materialize(
if (KV.second.AliasFlags.hasMaterializationSideEffectsOnly())
continue;
- ResolutionMap[KV.first] = JITEvaluatedSymbol(
- (*Result)[KV.second.Aliasee].getAddress(), KV.second.AliasFlags);
+ ResolutionMap[KV.first] = {(*Result)[KV.second.Aliasee].getAddress(),
+ KV.second.AliasFlags};
}
if (auto Err = QueryInfo->R->notifyResolved(ResolutionMap)) {
ES.reportError(std::move(Err));
@@ -688,11 +689,15 @@ void JITDylib::removeGenerator(DefinitionGenerator &G) {
}
Expected<SymbolFlagsMap>
-JITDylib::defineMaterializing(SymbolFlagsMap SymbolFlags) {
+JITDylib::defineMaterializing(MaterializationResponsibility &FromMR,
+ SymbolFlagsMap SymbolFlags) {
return ES.runSessionLocked([&]() -> Expected<SymbolFlagsMap> {
- std::vector<SymbolTable::iterator> AddedSyms;
- std::vector<SymbolFlagsMap::iterator> RejectedWeakDefs;
+ if (FromMR.RT->isDefunct())
+ return make_error<ResourceTrackerDefunct>(FromMR.RT);
+
+ std::vector<NonOwningSymbolStringPtr> AddedSyms;
+ std::vector<NonOwningSymbolStringPtr> RejectedWeakDefs;
for (auto SFItr = SymbolFlags.begin(), SFEnd = SymbolFlags.end();
SFItr != SFEnd; ++SFItr) {
@@ -708,27 +713,27 @@ JITDylib::defineMaterializing(SymbolFlagsMap SymbolFlags) {
// If this is a strong definition then error out.
if (!Flags.isWeak()) {
// Remove any symbols already added.
- for (auto &SI : AddedSyms)
- Symbols.erase(SI);
+ for (auto &S : AddedSyms)
+ Symbols.erase(Symbols.find_as(S));
// FIXME: Return all duplicates.
return make_error<DuplicateDefinition>(std::string(*Name));
}
// Otherwise just make a note to discard this symbol after the loop.
- RejectedWeakDefs.push_back(SFItr);
+ RejectedWeakDefs.push_back(NonOwningSymbolStringPtr(Name));
continue;
} else
EntryItr =
Symbols.insert(std::make_pair(Name, SymbolTableEntry(Flags))).first;
- AddedSyms.push_back(EntryItr);
+ AddedSyms.push_back(NonOwningSymbolStringPtr(Name));
EntryItr->second.setState(SymbolState::Materializing);
}
// Remove any rejected weak definitions from the SymbolFlags map.
while (!RejectedWeakDefs.empty()) {
- SymbolFlags.erase(RejectedWeakDefs.back());
+ SymbolFlags.erase(SymbolFlags.find_as(RejectedWeakDefs.back()));
RejectedWeakDefs.pop_back();
}
@@ -944,7 +949,7 @@ Error JITDylib::resolve(MaterializationResponsibility &MR,
struct WorklistEntry {
SymbolTable::iterator SymI;
- JITEvaluatedSymbol ResolvedSym;
+ ExecutorSymbolDef ResolvedSym;
};
SymbolNameSet SymbolsInErrorState;
@@ -964,7 +969,7 @@ Error JITDylib::resolve(MaterializationResponsibility &MR,
"Resolving symbol with materializer attached?");
assert(SymI->second.getState() == SymbolState::Materializing &&
"Symbol should be materializing");
- assert(SymI->second.getAddress() == 0 &&
+ assert(SymI->second.getAddress() == ExecutorAddr() &&
"Symbol has already been resolved");
if (SymI->second.getFlags().hasError())
@@ -976,8 +981,7 @@ Error JITDylib::resolve(MaterializationResponsibility &MR,
(SymI->second.getFlags() & ~JITSymbolFlags::Common) &&
"Resolved flags should match the declared flags");
- Worklist.push_back(
- {SymI, JITEvaluatedSymbol(KV.second.getAddress(), Flags)});
+ Worklist.push_back({SymI, {KV.second.getAddress(), Flags}});
}
}
@@ -1328,6 +1332,18 @@ void JITDylib::setLinkOrder(JITDylibSearchOrder NewLinkOrder,
});
}
+void JITDylib::addToLinkOrder(const JITDylibSearchOrder &NewLinks) {
+ ES.runSessionLocked([&]() {
+ for (auto &KV : NewLinks) {
+ // Skip elements of NewLinks that are already in the link order.
+ if (llvm::find(LinkOrder, KV) != LinkOrder.end())
+ continue;
+
+ LinkOrder.push_back(std::move(KV));
+ }
+ });
+}
+
void JITDylib::addToLinkOrder(JITDylib &JD, JITDylibLookupFlags JDLookupFlags) {
ES.runSessionLocked([&]() { LinkOrder.push_back({&JD, JDLookupFlags}); });
}
@@ -1437,16 +1453,23 @@ void JITDylib::dump(raw_ostream &OS) {
OS << "Link order: " << LinkOrder << "\n"
<< "Symbol table:\n";
- for (auto &KV : Symbols) {
+ // Sort symbols so we get a deterministic order and can check them in tests.
+ std::vector<std::pair<SymbolStringPtr, SymbolTableEntry *>> SymbolsSorted;
+ for (auto &KV : Symbols)
+ SymbolsSorted.emplace_back(KV.first, &KV.second);
+ std::sort(SymbolsSorted.begin(), SymbolsSorted.end(),
+ [](const auto &L, const auto &R) { return *L.first < *R.first; });
+
+ for (auto &KV : SymbolsSorted) {
OS << " \"" << *KV.first << "\": ";
- if (auto Addr = KV.second.getAddress())
- OS << format("0x%016" PRIx64, Addr);
+ if (auto Addr = KV.second->getAddress())
+ OS << Addr;
else
OS << "<not resolved> ";
- OS << " " << KV.second.getFlags() << " " << KV.second.getState();
+ OS << " " << KV.second->getFlags() << " " << KV.second->getState();
- if (KV.second.hasMaterializerAttached()) {
+ if (KV.second->hasMaterializerAttached()) {
OS << " (Materializer ";
auto I = UnmaterializedInfos.find(KV.first);
assert(I != UnmaterializedInfos.end() &&
@@ -1940,6 +1963,7 @@ JITDylib *ExecutionSession::getJITDylibByName(StringRef Name) {
JITDylib &ExecutionSession::createBareJITDylib(std::string Name) {
assert(!getJITDylibByName(Name) && "JITDylib with that name already exists");
return runSessionLocked([&, this]() -> JITDylib & {
+ assert(SessionOpen && "Cannot create JITDylib after session is closed");
JDs.push_back(new JITDylib(*this, std::move(Name)));
return *JDs.back();
});
@@ -2156,7 +2180,7 @@ ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder,
#endif
}
-Expected<JITEvaluatedSymbol>
+Expected<ExecutorSymbolDef>
ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder,
SymbolStringPtr Name, SymbolState RequiredState) {
SymbolLookupSet Names({Name});
@@ -2170,13 +2194,13 @@ ExecutionSession::lookup(const JITDylibSearchOrder &SearchOrder,
return ResultMap.takeError();
}
-Expected<JITEvaluatedSymbol>
+Expected<ExecutorSymbolDef>
ExecutionSession::lookup(ArrayRef<JITDylib *> SearchOrder, SymbolStringPtr Name,
SymbolState RequiredState) {
return lookup(makeJITDylibSearchOrder(SearchOrder), Name, RequiredState);
}
-Expected<JITEvaluatedSymbol>
+Expected<ExecutorSymbolDef>
ExecutionSession::lookup(ArrayRef<JITDylib *> SearchOrder, StringRef Name,
SymbolState RequiredState) {
return lookup(SearchOrder, intern(Name), RequiredState);
@@ -2213,9 +2237,9 @@ Error ExecutionSession::registerJITDispatchHandlers(
return Error::success();
}
-void ExecutionSession::runJITDispatchHandler(
- SendResultFunction SendResult, JITTargetAddress HandlerFnTagAddr,
- ArrayRef<char> ArgBuffer) {
+void ExecutionSession::runJITDispatchHandler(SendResultFunction SendResult,
+ ExecutorAddr HandlerFnTagAddr,
+ ArrayRef<char> ArgBuffer) {
std::shared_ptr<JITDispatchHandlerFunction> F;
{
@@ -2666,7 +2690,7 @@ void ExecutionSession::OL_completeLookup(
// whether it has a materializer attached, and if so prepare to run
// it.
if (SymI->second.hasMaterializerAttached()) {
- assert(SymI->second.getAddress() == 0 &&
+ assert(SymI->second.getAddress() == ExecutorAddr() &&
"Symbol not resolved but already has address?");
auto UMII = JD.UnmaterializedInfos.find(Name);
assert(UMII != JD.UnmaterializedInfos.end() &&
@@ -2946,7 +2970,7 @@ Error ExecutionSession::OL_defineMaterializing(
<< NewSymbolFlags << "\n";
});
if (auto AcceptedDefs =
- MR.JD.defineMaterializing(std::move(NewSymbolFlags))) {
+ MR.JD.defineMaterializing(MR, std::move(NewSymbolFlags))) {
// Add all newly accepted symbols to this responsibility object.
for (auto &KV : *AcceptedDefs)
MR.SymbolFlags.insert(KV);
diff --git a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
index 02c3e617df68..acbf33888ade 100644
--- a/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/DebugObjectManagerPlugin.cpp
@@ -60,26 +60,13 @@ public:
private:
typename ELFT::Shdr *Header;
-
- bool isTextOrDataSection() const;
};
template <typename ELFT>
void ELFDebugObjectSection<ELFT>::setTargetMemoryRange(SectionRange Range) {
- // Only patch load-addresses for executable and data sections.
- if (isTextOrDataSection())
- Header->sh_addr =
- static_cast<typename ELFT::uint>(Range.getStart().getValue());
-}
-
-template <typename ELFT>
-bool ELFDebugObjectSection<ELFT>::isTextOrDataSection() const {
- switch (Header->sh_type) {
- case ELF::SHT_PROGBITS:
- case ELF::SHT_X86_64_UNWIND:
- return Header->sh_flags & (ELF::SHF_EXECINSTR | ELF::SHF_ALLOC);
- }
- return false;
+ // All recorded sections are candidates for load-address patching.
+ Header->sh_addr =
+ static_cast<typename ELFT::uint>(Range.getStart().getValue());
}
template <typename ELFT>
@@ -106,16 +93,19 @@ Error ELFDebugObjectSection<ELFT>::validateInBounds(StringRef Buffer,
template <typename ELFT>
void ELFDebugObjectSection<ELFT>::dump(raw_ostream &OS, StringRef Name) {
- if (auto Addr = static_cast<JITTargetAddress>(Header->sh_addr)) {
+ if (uint64_t Addr = Header->sh_addr) {
OS << formatv(" {0:x16} {1}\n", Addr, Name);
} else {
OS << formatv(" {0}\n", Name);
}
}
-enum class Requirement {
+enum DebugObjectFlags : int {
// Request final target memory load-addresses for all sections.
- ReportFinalSectionLoadAddresses,
+ ReportFinalSectionLoadAddresses = 1 << 0,
+
+ // We found sections with debug information when processing the input object.
+ HasDebugSections = 1 << 1,
};
/// The plugin creates a debug object from when JITLink starts processing the
@@ -127,10 +117,15 @@ class DebugObject {
public:
DebugObject(JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD,
ExecutionSession &ES)
- : MemMgr(MemMgr), JD(JD), ES(ES) {}
+ : MemMgr(MemMgr), JD(JD), ES(ES), Flags(DebugObjectFlags{}) {}
- void set(Requirement Req) { Reqs.insert(Req); }
- bool has(Requirement Req) const { return Reqs.count(Req) > 0; }
+ bool hasFlags(DebugObjectFlags F) const { return Flags & F; }
+ void setFlags(DebugObjectFlags F) {
+ Flags = static_cast<DebugObjectFlags>(Flags | F);
+ }
+ void clearFlags(DebugObjectFlags F) {
+ Flags = static_cast<DebugObjectFlags>(Flags & ~F);
+ }
using FinalizeContinuation = std::function<void(Expected<ExecutorAddrRange>)>;
@@ -159,7 +154,7 @@ protected:
private:
ExecutionSession &ES;
- std::set<Requirement> Reqs;
+ DebugObjectFlags Flags;
FinalizedAlloc Alloc;
};
@@ -171,8 +166,7 @@ void DebugObject::finalizeAsync(FinalizeContinuation OnFinalize) {
if (auto SimpleSegAlloc = finalizeWorkingMemory()) {
auto ROSeg = SimpleSegAlloc->getSegInfo(MemProt::Read);
- ExecutorAddrRange DebugObjRange(ExecutorAddr(ROSeg.Addr),
- ExecutorAddrDiff(ROSeg.WorkingMem.size()));
+ ExecutorAddrRange DebugObjRange(ROSeg.Addr, ROSeg.WorkingMem.size());
SimpleSegAlloc->finalize(
[this, DebugObjRange,
OnFinalize = std::move(OnFinalize)](Expected<FinalizedAlloc> FA) {
@@ -222,7 +216,7 @@ private:
JITLinkMemoryManager &MemMgr, const JITLinkDylib *JD,
ExecutionSession &ES)
: DebugObject(MemMgr, JD, ES), Buffer(std::move(Buffer)) {
- set(Requirement::ReportFinalSectionLoadAddresses);
+ setFlags(ReportFinalSectionLoadAddresses);
}
std::unique_ptr<WritableMemoryBuffer> Buffer;
@@ -271,24 +265,23 @@ ELFDebugObject::CreateArchType(MemoryBufferRef Buffer,
if (!ObjRef)
return ObjRef.takeError();
- // TODO: Add support for other architectures.
- uint16_t TargetMachineArch = ObjRef->getHeader().e_machine;
- if (TargetMachineArch != ELF::EM_X86_64)
- return nullptr;
-
Expected<ArrayRef<SectionHeader>> Sections = ObjRef->sections();
if (!Sections)
return Sections.takeError();
- bool HasDwarfSection = false;
for (const SectionHeader &Header : *Sections) {
Expected<StringRef> Name = ObjRef->getSectionName(Header);
if (!Name)
return Name.takeError();
if (Name->empty())
continue;
- HasDwarfSection |= isDwarfSection(*Name);
+ if (isDwarfSection(*Name))
+ DebugObj->setFlags(HasDebugSections);
+ // Only record text and data sections (i.e. no bss, comments, rel, etc.)
+ if (Header.sh_type != ELF::SHT_PROGBITS &&
+ Header.sh_type != ELF::SHT_X86_64_UNWIND)
+ continue;
if (!(Header.sh_flags & ELF::SHF_ALLOC))
continue;
@@ -297,13 +290,6 @@ ELFDebugObject::CreateArchType(MemoryBufferRef Buffer,
return std::move(Err);
}
- if (!HasDwarfSection) {
- LLVM_DEBUG(dbgs() << "Aborting debug registration for LinkGraph \""
- << DebugObj->Buffer->getBufferIdentifier()
- << "\": input object contains no debug info\n");
- return nullptr;
- }
-
return std::move(DebugObj);
}
@@ -371,12 +357,11 @@ Error ELFDebugObject::recordSection(
StringRef Name, std::unique_ptr<ELFDebugObjectSection<ELFT>> Section) {
if (Error Err = Section->validateInBounds(this->getBuffer(), Name.data()))
return Err;
- auto ItInserted = Sections.try_emplace(Name, std::move(Section));
- if (!ItInserted.second)
- return make_error<StringError>("In " + Buffer->getBufferIdentifier() +
- ", encountered duplicate section \"" +
- Name + "\" while building debug object",
- inconvertibleErrorCode());
+ bool Inserted = Sections.try_emplace(Name, std::move(Section)).second;
+ if (!Inserted)
+ LLVM_DEBUG(dbgs() << "Skipping debug registration for section '" << Name
+ << "' in object " << Buffer->getBufferIdentifier()
+ << " (duplicate name)\n");
return Error::success();
}
@@ -403,8 +388,15 @@ createDebugObjectFromBuffer(ExecutionSession &ES, LinkGraph &G,
}
DebugObjectManagerPlugin::DebugObjectManagerPlugin(
+ ExecutionSession &ES, std::unique_ptr<DebugObjectRegistrar> Target,
+ bool RequireDebugSections, bool AutoRegisterCode)
+ : ES(ES), Target(std::move(Target)),
+ RequireDebugSections(RequireDebugSections),
+ AutoRegisterCode(AutoRegisterCode) {}
+
+DebugObjectManagerPlugin::DebugObjectManagerPlugin(
ExecutionSession &ES, std::unique_ptr<DebugObjectRegistrar> Target)
- : ES(ES), Target(std::move(Target)) {}
+ : DebugObjectManagerPlugin(ES, std::move(Target), true, true) {}
DebugObjectManagerPlugin::~DebugObjectManagerPlugin() = default;
@@ -418,8 +410,14 @@ void DebugObjectManagerPlugin::notifyMaterializing(
if (auto DebugObj = createDebugObjectFromBuffer(ES, G, Ctx, ObjBuffer)) {
// Not all link artifacts allow debugging.
- if (*DebugObj != nullptr)
- PendingObjs[&MR] = std::move(*DebugObj);
+ if (*DebugObj == nullptr)
+ return;
+ if (RequireDebugSections && !(**DebugObj).hasFlags(HasDebugSections)) {
+ LLVM_DEBUG(dbgs() << "Skipping debug registration for LinkGraph '"
+ << G.getName() << "': no debug info\n");
+ return;
+ }
+ PendingObjs[&MR] = std::move(*DebugObj);
} else {
ES.reportError(DebugObj.takeError());
}
@@ -435,7 +433,7 @@ void DebugObjectManagerPlugin::modifyPassConfig(
return;
DebugObject &DebugObj = *It->second;
- if (DebugObj.has(Requirement::ReportFinalSectionLoadAddresses)) {
+ if (DebugObj.hasFlags(ReportFinalSectionLoadAddresses)) {
PassConfig.PostAllocationPasses.push_back(
[&DebugObj](LinkGraph &Graph) -> Error {
for (const Section &GraphSection : Graph.sections())
@@ -467,7 +465,8 @@ Error DebugObjectManagerPlugin::notifyEmitted(
FinalizePromise.set_value(TargetMem.takeError());
return;
}
- if (Error Err = Target->registerDebugObject(*TargetMem)) {
+ if (Error Err =
+ Target->registerDebugObject(*TargetMem, AutoRegisterCode)) {
FinalizePromise.set_value(std::move(Err));
return;
}
diff --git a/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp b/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp
index 028bd245fb55..aca457642212 100644
--- a/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/DebugUtils.cpp
@@ -172,9 +172,8 @@ raw_ostream &operator<<(raw_ostream &OS, const JITSymbolFlags &Flags) {
return OS;
}
-raw_ostream &operator<<(raw_ostream &OS, const JITEvaluatedSymbol &Sym) {
- return OS << format("0x%016" PRIx64, Sym.getAddress()) << " "
- << Sym.getFlags();
+raw_ostream &operator<<(raw_ostream &OS, const ExecutorSymbolDef &Sym) {
+ return OS << Sym.getAddress() << " " << Sym.getFlags();
}
raw_ostream &operator<<(raw_ostream &OS, const SymbolFlagsMap::value_type &KV) {
@@ -299,8 +298,12 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolState &S) {
raw_ostream &operator<<(raw_ostream &OS, const SymbolStringPool &SSP) {
std::lock_guard<std::mutex> Lock(SSP.PoolMutex);
+ SmallVector<std::pair<StringRef, int>, 0> Vec;
for (auto &KV : SSP.Pool)
- OS << KV.first() << ": " << KV.second << "\n";
+ Vec.emplace_back(KV.first(), KV.second);
+ llvm::sort(Vec, less_first());
+ for (auto &[K, V] : Vec)
+ OS << K << ": " << V << "\n";
return OS;
}
diff --git a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
index 15e7ffb2f75a..830582bb3649 100644
--- a/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/DebuggerSupportPlugin.cpp
@@ -348,11 +348,12 @@ public:
Writer.write(SecCmd);
}
+ static constexpr bool AutoRegisterCode = true;
SectionRange R(MachOContainerBlock->getSection());
G.allocActions().push_back(
{cantFail(shared::WrapperFunctionCall::Create<
- shared::SPSArgList<shared::SPSExecutorAddrRange>>(
- RegisterActionAddr, R.getRange())),
+ shared::SPSArgList<shared::SPSExecutorAddrRange, bool>>(
+ RegisterActionAddr, R.getRange(), AutoRegisterCode)),
{}});
return Error::success();
}
@@ -377,11 +378,11 @@ GDBJITDebugInfoRegistrationPlugin::Create(ExecutionSession &ES,
? ES.intern("_llvm_orc_registerJITLoaderGDBAllocAction")
: ES.intern("llvm_orc_registerJITLoaderGDBAllocAction");
- if (auto Addr = ES.lookup({&ProcessJD}, RegisterActionAddr))
+ if (auto RegisterSym = ES.lookup({&ProcessJD}, RegisterActionAddr))
return std::make_unique<GDBJITDebugInfoRegistrationPlugin>(
- ExecutorAddr(Addr->getAddress()));
+ RegisterSym->getAddress());
else
- return Addr.takeError();
+ return RegisterSym.takeError();
}
Error GDBJITDebugInfoRegistrationPlugin::notifyFailed(
diff --git a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
index 00032e4dca3f..1bb4ecdff299 100644
--- a/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ELFNixPlatform.cpp
@@ -14,6 +14,7 @@
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/Debug.h"
#include <optional>
@@ -40,8 +41,7 @@ public:
unsigned PointerSize;
support::endianness Endianness;
jitlink::Edge::Kind EdgeKind;
- const auto &TT =
- ENP.getExecutionSession().getExecutorProcessControl().getTargetTriple();
+ const auto &TT = ENP.getExecutionSession().getTargetTriple();
switch (TT.getArch()) {
case Triple::x86_64:
@@ -96,31 +96,24 @@ private:
ELFNixPlatform &ENP;
};
-StringRef EHFrameSectionName = ".eh_frame";
-StringRef InitArrayFuncSectionName = ".init_array";
-
-StringRef ThreadBSSSectionName = ".tbss";
-StringRef ThreadDataSectionName = ".tdata";
-
} // end anonymous namespace
namespace llvm {
namespace orc {
-Expected<std::unique_ptr<ELFNixPlatform>>
-ELFNixPlatform::Create(ExecutionSession &ES,
- ObjectLinkingLayer &ObjLinkingLayer,
- JITDylib &PlatformJD, const char *OrcRuntimePath,
- std::optional<SymbolAliasMap> RuntimeAliases) {
-
- auto &EPC = ES.getExecutorProcessControl();
+Expected<std::unique_ptr<ELFNixPlatform>> ELFNixPlatform::Create(
+ ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD, std::unique_ptr<DefinitionGenerator> OrcRuntime,
+ std::optional<SymbolAliasMap> RuntimeAliases) {
// If the target is not supported then bail out immediately.
- if (!supportedTarget(EPC.getTargetTriple()))
+ if (!supportedTarget(ES.getTargetTriple()))
return make_error<StringError>("Unsupported ELFNixPlatform triple: " +
- EPC.getTargetTriple().str(),
+ ES.getTargetTriple().str(),
inconvertibleErrorCode());
+ auto &EPC = ES.getExecutorProcessControl();
+
// Create default aliases if the caller didn't supply any.
if (!RuntimeAliases) {
auto StandardRuntimeAliases = standardPlatformAliases(ES, PlatformJD);
@@ -134,31 +127,41 @@ ELFNixPlatform::Create(ExecutionSession &ES,
return std::move(Err);
// Add JIT-dispatch function support symbols.
- if (auto Err = PlatformJD.define(absoluteSymbols(
- {{ES.intern("__orc_rt_jit_dispatch"),
- {EPC.getJITDispatchInfo().JITDispatchFunction.getValue(),
- JITSymbolFlags::Exported}},
- {ES.intern("__orc_rt_jit_dispatch_ctx"),
- {EPC.getJITDispatchInfo().JITDispatchContext.getValue(),
- JITSymbolFlags::Exported}}})))
+ if (auto Err = PlatformJD.define(
+ absoluteSymbols({{ES.intern("__orc_rt_jit_dispatch"),
+ {EPC.getJITDispatchInfo().JITDispatchFunction,
+ JITSymbolFlags::Exported}},
+ {ES.intern("__orc_rt_jit_dispatch_ctx"),
+ {EPC.getJITDispatchInfo().JITDispatchContext,
+ JITSymbolFlags::Exported}}})))
return std::move(Err);
- // Create a generator for the ORC runtime archive.
- auto OrcRuntimeArchiveGenerator = StaticLibraryDefinitionGenerator::Load(
- ObjLinkingLayer, OrcRuntimePath, EPC.getTargetTriple());
- if (!OrcRuntimeArchiveGenerator)
- return OrcRuntimeArchiveGenerator.takeError();
-
// Create the instance.
Error Err = Error::success();
- auto P = std::unique_ptr<ELFNixPlatform>(
- new ELFNixPlatform(ES, ObjLinkingLayer, PlatformJD,
- std::move(*OrcRuntimeArchiveGenerator), Err));
+ auto P = std::unique_ptr<ELFNixPlatform>(new ELFNixPlatform(
+ ES, ObjLinkingLayer, PlatformJD, std::move(OrcRuntime), Err));
if (Err)
return std::move(Err);
return std::move(P);
}
+Expected<std::unique_ptr<ELFNixPlatform>>
+ELFNixPlatform::Create(ExecutionSession &ES,
+ ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD, const char *OrcRuntimePath,
+ std::optional<SymbolAliasMap> RuntimeAliases) {
+
+ // Create a generator for the ORC runtime archive.
+ auto OrcRuntimeArchiveGenerator =
+ StaticLibraryDefinitionGenerator::Load(ObjLinkingLayer, OrcRuntimePath);
+ if (!OrcRuntimeArchiveGenerator)
+ return OrcRuntimeArchiveGenerator.takeError();
+
+ return Create(ES, ObjLinkingLayer, PlatformJD,
+ std::move(*OrcRuntimeArchiveGenerator),
+ std::move(RuntimeAliases));
+}
+
Error ELFNixPlatform::setupJITDylib(JITDylib &JD) {
return JD.define(
std::make_unique<DSOHandleMaterializationUnit>(*this, DSOHandleSymbol));
@@ -204,47 +207,6 @@ ELFNixPlatform::standardPlatformAliases(ExecutionSession &ES,
SymbolAliasMap Aliases;
addAliases(ES, Aliases, requiredCXXAliases());
addAliases(ES, Aliases, standardRuntimeUtilityAliases());
-
- // Determine whether or not the libunwind extended-API function for
- // dynamically registering an entire .eh_frame section is available.
- // If it is not, we assume that libgcc_s is being used, and alias to
- // its __register_frame with the same functionality.
- auto RTRegisterFrame = ES.intern("__orc_rt_register_eh_frame_section");
- auto LibUnwindRegisterFrame = ES.intern("__unw_add_dynamic_eh_frame_section");
- auto RTDeregisterFrame = ES.intern("__orc_rt_deregister_eh_frame_section");
- auto LibUnwindDeregisterFrame =
- ES.intern("__unw_remove_dynamic_eh_frame_section");
- auto SM = ES.lookup(makeJITDylibSearchOrder(&PlatformJD),
- SymbolLookupSet()
- .add(LibUnwindRegisterFrame,
- SymbolLookupFlags::WeaklyReferencedSymbol)
- .add(LibUnwindDeregisterFrame,
- SymbolLookupFlags::WeaklyReferencedSymbol));
- if (!SM) { // Weak-ref means no "missing symbol" errors, so this must be
- // something more serious that we should report.
- return SM.takeError();
- } else if (SM->size() == 2) {
- LLVM_DEBUG({
- dbgs() << "Using libunwind " << LibUnwindRegisterFrame
- << " for unwind info registration\n";
- });
- Aliases[std::move(RTRegisterFrame)] = {LibUnwindRegisterFrame,
- JITSymbolFlags::Exported};
- Aliases[std::move(RTDeregisterFrame)] = {LibUnwindDeregisterFrame,
- JITSymbolFlags::Exported};
- } else {
- // Since LLVM libunwind is not present, we assume that unwinding
- // is provided by libgcc
- LLVM_DEBUG({
- dbgs() << "Using libgcc __register_frame"
- << " for unwind info registration\n";
- });
- Aliases[std::move(RTRegisterFrame)] = {ES.intern("__register_frame"),
- JITSymbolFlags::Exported};
- Aliases[std::move(RTDeregisterFrame)] = {ES.intern("__deregister_frame"),
- JITSymbolFlags::Exported};
- }
-
return Aliases;
}
@@ -272,13 +234,6 @@ ELFNixPlatform::standardRuntimeUtilityAliases() {
StandardRuntimeUtilityAliases);
}
-bool ELFNixPlatform::isInitializerSection(StringRef SecName) {
- if (SecName.consume_front(InitArrayFuncSectionName) &&
- (SecName.empty() || SecName[0] == '.'))
- return true;
- return false;
-}
-
bool ELFNixPlatform::supportedTarget(const Triple &TT) {
switch (TT.getArch()) {
case Triple::x86_64:
@@ -433,8 +388,7 @@ void ELFNixPlatform::rt_getInitializers(SendInitializerSequenceFn SendResult,
void ELFNixPlatform::rt_getDeinitializers(
SendDeinitializerSequenceFn SendResult, ExecutorAddr Handle) {
LLVM_DEBUG({
- dbgs() << "ELFNixPlatform::rt_getDeinitializers(\""
- << formatv("{0:x}", Handle.getValue()) << "\")\n";
+ dbgs() << "ELFNixPlatform::rt_getDeinitializers(\"" << Handle << "\")\n";
});
JITDylib *JD = nullptr;
@@ -447,12 +401,9 @@ void ELFNixPlatform::rt_getDeinitializers(
}
if (!JD) {
- LLVM_DEBUG({
- dbgs() << " No JITDylib for handle "
- << formatv("{0:x}", Handle.getValue()) << "\n";
- });
+ LLVM_DEBUG(dbgs() << " No JITDylib for handle " << Handle << "\n");
SendResult(make_error<StringError>("No JITDylib associated with handle " +
- formatv("{0:x}", Handle.getValue()),
+ formatv("{0:x}", Handle),
inconvertibleErrorCode()));
return;
}
@@ -464,8 +415,7 @@ void ELFNixPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
ExecutorAddr Handle,
StringRef SymbolName) {
LLVM_DEBUG({
- dbgs() << "ELFNixPlatform::rt_lookupSymbol(\""
- << formatv("{0:x}", Handle.getValue()) << "\")\n";
+ dbgs() << "ELFNixPlatform::rt_lookupSymbol(\"" << Handle << "\")\n";
});
JITDylib *JD = nullptr;
@@ -478,12 +428,9 @@ void ELFNixPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
}
if (!JD) {
- LLVM_DEBUG({
- dbgs() << " No JITDylib for handle "
- << formatv("{0:x}", Handle.getValue()) << "\n";
- });
+ LLVM_DEBUG(dbgs() << " No JITDylib for handle " << Handle << "\n");
SendResult(make_error<StringError>("No JITDylib associated with handle " +
- formatv("{0:x}", Handle.getValue()),
+ formatv("{0:x}", Handle),
inconvertibleErrorCode()));
return;
}
@@ -496,7 +443,7 @@ void ELFNixPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
void operator()(Expected<SymbolMap> Result) {
if (Result) {
assert(Result->size() == 1 && "Unexpected result map count");
- SendResult(ExecutorAddr(Result->begin()->second.getAddress()));
+ SendResult(Result->begin()->second.getAddress());
} else {
SendResult(Result.takeError());
}
@@ -538,7 +485,7 @@ Error ELFNixPlatform::bootstrapELFNixRuntime(JITDylib &PlatformJD) {
for (const auto &KV : AddrsToRecord) {
auto &Name = KV.first;
assert(RuntimeSymbolAddrs->count(Name) && "Missing runtime symbol?");
- KV.second->setValue((*RuntimeSymbolAddrs)[Name].getAddress());
+ *KV.second = (*RuntimeSymbolAddrs)[Name].getAddress();
}
auto PJDDSOHandle = ES.lookup(
@@ -547,7 +494,8 @@ Error ELFNixPlatform::bootstrapELFNixRuntime(JITDylib &PlatformJD) {
return PJDDSOHandle.takeError();
if (auto Err = ES.callSPSWrapper<void(uint64_t)>(
- orc_rt_elfnix_platform_bootstrap, PJDDSOHandle->getAddress()))
+ orc_rt_elfnix_platform_bootstrap,
+ PJDDSOHandle->getAddress().getValue()))
return Err;
// FIXME: Ordering is fuzzy here. We're probably best off saying
@@ -596,8 +544,7 @@ Error ELFNixPlatform::registerInitInfo(
for (auto *Sec : InitSections) {
// FIXME: Avoid copy here.
jitlink::SectionRange R(*Sec);
- InitSeq->InitSections[Sec->getName()].push_back(
- {ExecutorAddr(R.getStart()), ExecutorAddr(R.getEnd())});
+ InitSeq->InitSections[Sec->getName()].push_back(R.getRange());
}
return Error::success();
@@ -724,20 +671,19 @@ void ELFNixPlatform::ELFNixPlatformPlugin::addEHAndTLVSupportPasses(
Config.PostFixupPasses.push_back([this](jitlink::LinkGraph &G) -> Error {
ELFPerObjectSectionsToRegister POSR;
- if (auto *EHFrameSection = G.findSectionByName(EHFrameSectionName)) {
+ if (auto *EHFrameSection = G.findSectionByName(ELFEHFrameSectionName)) {
jitlink::SectionRange R(*EHFrameSection);
if (!R.empty())
- POSR.EHFrameSection = {ExecutorAddr(R.getStart()),
- ExecutorAddr(R.getEnd())};
+ POSR.EHFrameSection = R.getRange();
}
// Get a pointer to the thread data section if there is one. It will be used
// below.
jitlink::Section *ThreadDataSection =
- G.findSectionByName(ThreadDataSectionName);
+ G.findSectionByName(ELFThreadDataSectionName);
// Handle thread BSS section if there is one.
- if (auto *ThreadBSSSection = G.findSectionByName(ThreadBSSSectionName)) {
+ if (auto *ThreadBSSSection = G.findSectionByName(ELFThreadBSSSectionName)) {
// If there's already a thread data section in this graph then merge the
// thread BSS section content into it, otherwise just treat the thread
// BSS section as the thread data section.
@@ -752,8 +698,7 @@ void ELFNixPlatform::ELFNixPlatformPlugin::addEHAndTLVSupportPasses(
if (ThreadDataSection) {
jitlink::SectionRange R(*ThreadDataSection);
if (!R.empty())
- POSR.ThreadDataSection = {ExecutorAddr(R.getStart()),
- ExecutorAddr(R.getEnd())};
+ POSR.ThreadDataSection = R.getRange();
}
if (POSR.EHFrameSection.Start || POSR.ThreadDataSection.Start) {
@@ -781,7 +726,7 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::preserveInitSections(
JITLinkSymbolSet InitSectionSymbols;
for (auto &InitSection : G.sections()) {
// Skip non-init sections.
- if (!isInitializerSection(InitSection.getName()))
+ if (!isELFInitializerSection(InitSection.getName()))
continue;
// Make a pass over live symbols in the section: those blocks are already
@@ -816,10 +761,10 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::registerInitSections(
SmallVector<jitlink::Section *> InitSections;
- LLVM_DEBUG({ dbgs() << "ELFNixPlatform::registerInitSections\n"; });
+ LLVM_DEBUG(dbgs() << "ELFNixPlatform::registerInitSections\n");
for (auto &Sec : G.sections()) {
- if (isInitializerSection(Sec.getName())) {
+ if (isELFInitializerSection(Sec.getName())) {
InitSections.push_back(&Sec);
}
}
@@ -829,8 +774,7 @@ Error ELFNixPlatform::ELFNixPlatformPlugin::registerInitSections(
dbgs() << "ELFNixPlatform: Scraped " << G.getName() << " init sections:\n";
for (auto *Sec : InitSections) {
jitlink::SectionRange R(*Sec);
- dbgs() << " " << Sec->getName() << ": "
- << formatv("[ {0:x} -- {1:x} ]", R.getStart(), R.getEnd()) << "\n";
+ dbgs() << " " << Sec->getName() << ": " << R.getRange() << "\n";
}
});
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp
index 30d641ee00cf..b8969de54936 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCDebugObjectRegistrar.cpp
@@ -45,14 +45,13 @@ Expected<std::unique_ptr<EPCDebugObjectRegistrar>> createJITLoaderGDBRegistrar(
assert((*Result)[0].size() == 1 &&
"Unexpected number of addresses in result");
- return std::make_unique<EPCDebugObjectRegistrar>(
- ES, ExecutorAddr((*Result)[0][0]));
+ return std::make_unique<EPCDebugObjectRegistrar>(ES, (*Result)[0][0]);
}
-Error EPCDebugObjectRegistrar::registerDebugObject(
- ExecutorAddrRange TargetMem) {
- return ES.callSPSWrapper<void(shared::SPSExecutorAddrRange)>(RegisterFn,
- TargetMem);
+Error EPCDebugObjectRegistrar::registerDebugObject(ExecutorAddrRange TargetMem,
+ bool AutoRegisterCode) {
+ return ES.callSPSWrapper<void(shared::SPSExecutorAddrRange, bool)>(
+ RegisterFn, TargetMem, AutoRegisterCode);
}
} // namespace orc
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.cpp b/llvm/lib/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.cpp
index 1adcc9156957..46e16a55c7e1 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.cpp
@@ -53,8 +53,7 @@ Error EPCDynamicLibrarySearchGenerator::tryToGenerate(
auto ResultI = Result->front().begin();
for (auto &KV : LookupSymbols) {
if (*ResultI)
- NewSymbols[KV.first] =
- JITEvaluatedSymbol(ResultI->getValue(), JITSymbolFlags::Exported);
+ NewSymbols[KV.first] = {*ResultI, JITSymbolFlags::Exported};
++ResultI;
}
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp
index 3aa94a7f43e2..56cd982cd5e1 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCEHFrameRegistrar.cpp
@@ -57,9 +57,8 @@ Expected<std::unique_ptr<EPCEHFrameRegistrar>> EPCEHFrameRegistrar::Create(
auto RegisterEHFrameWrapperFnAddr = (*Result)[0][0];
auto DeregisterEHFrameWrapperFnAddr = (*Result)[0][1];
- return std::make_unique<EPCEHFrameRegistrar>(
- ES, ExecutorAddr(RegisterEHFrameWrapperFnAddr),
- ExecutorAddr(DeregisterEHFrameWrapperFnAddr));
+ return std::make_unique<EPCEHFrameRegistrar>(ES, RegisterEHFrameWrapperFnAddr,
+ DeregisterEHFrameWrapperFnAddr);
}
Error EPCEHFrameRegistrar::registerEHFrames(ExecutorAddrRange EHFrameSection) {
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
index a3d857c3bfc4..b05f08fd7cdf 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericJITLinkMemoryManager.cpp
@@ -158,7 +158,7 @@ void EPCGenericJITLinkMemoryManager::completeAllocation(
auto &SegInfo = SegInfos[AG];
SegInfo.ContentSize = Seg.ContentSize;
SegInfo.ZeroFillSize = Seg.ZeroFillSize;
- SegInfo.Addr = ExecutorAddr(Seg.Addr);
+ SegInfo.Addr = Seg.Addr;
SegInfo.WorkingMem = Seg.WorkingMem;
}
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
index ec82081937e2..fbe25d70c38a 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.cpp
@@ -235,7 +235,7 @@ bool EPCGenericRTDyldMemoryManager::finalizeMemory(std::string *ErrMsg) {
for (unsigned I = 0; I != 3; ++I) {
FR.Segments.push_back({});
auto &Seg = FR.Segments.back();
- Seg.AG = SegMemProts[I];
+ Seg.RAG = SegMemProts[I];
Seg.Addr = RemoteAddrs[I]->Start;
for (auto &SecAlloc : *SegSections[I]) {
Seg.Size = alignTo(Seg.Size, SecAlloc.Align);
diff --git a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
index ddfb30500c7b..833be826f8ae 100644
--- a/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/EPCIndirectionUtils.cpp
@@ -58,16 +58,16 @@ public:
Error deallocateStubs();
- Error createStub(StringRef StubName, JITTargetAddress StubAddr,
+ Error createStub(StringRef StubName, ExecutorAddr StubAddr,
JITSymbolFlags StubFlags) override;
Error createStubs(const StubInitsMap &StubInits) override;
- JITEvaluatedSymbol findStub(StringRef Name, bool ExportedStubsOnly) override;
+ ExecutorSymbolDef findStub(StringRef Name, bool ExportedStubsOnly) override;
- JITEvaluatedSymbol findPointer(StringRef Name) override;
+ ExecutorSymbolDef findPointer(StringRef Name) override;
- Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override;
+ Error updatePointer(StringRef Name, ExecutorAddr NewAddr) override;
private:
using StubInfo = std::pair<IndirectStubInfo, JITSymbolFlags>;
@@ -118,12 +118,10 @@ Error EPCTrampolinePool::grow() {
unsigned NumTrampolines = TrampolinesPerPage;
auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec);
- EPCIU.getABISupport().writeTrampolines(SegInfo.WorkingMem.data(),
- SegInfo.Addr.getValue(),
- ResolverAddress, NumTrampolines);
+ EPCIU.getABISupport().writeTrampolines(
+ SegInfo.WorkingMem.data(), SegInfo.Addr, ResolverAddress, NumTrampolines);
for (unsigned I = 0; I < NumTrampolines; ++I)
- AvailableTrampolines.push_back(SegInfo.Addr.getValue() +
- (I * TrampolineSize));
+ AvailableTrampolines.push_back(SegInfo.Addr + (I * TrampolineSize));
auto FA = Alloc->finalize();
if (!FA)
@@ -135,7 +133,7 @@ Error EPCTrampolinePool::grow() {
}
Error EPCIndirectStubsManager::createStub(StringRef StubName,
- JITTargetAddress StubAddr,
+ ExecutorAddr StubAddr,
JITSymbolFlags StubFlags) {
StubInitsMap SIM;
SIM[StubName] = std::make_pair(StubAddr, StubFlags);
@@ -162,18 +160,16 @@ Error EPCIndirectStubsManager::createStubs(const StubInitsMap &StubInits) {
unsigned ASIdx = 0;
std::vector<tpctypes::UInt32Write> PtrUpdates;
for (auto &SI : StubInits)
- PtrUpdates.push_back(
- {ExecutorAddr((*AvailableStubInfos)[ASIdx++].PointerAddress),
- static_cast<uint32_t>(SI.second.first)});
+ PtrUpdates.push_back({(*AvailableStubInfos)[ASIdx++].PointerAddress,
+ static_cast<uint32_t>(SI.second.first.getValue())});
return MemAccess.writeUInt32s(PtrUpdates);
}
case 8: {
unsigned ASIdx = 0;
std::vector<tpctypes::UInt64Write> PtrUpdates;
for (auto &SI : StubInits)
- PtrUpdates.push_back(
- {ExecutorAddr((*AvailableStubInfos)[ASIdx++].PointerAddress),
- static_cast<uint64_t>(SI.second.first)});
+ PtrUpdates.push_back({(*AvailableStubInfos)[ASIdx++].PointerAddress,
+ static_cast<uint64_t>(SI.second.first.getValue())});
return MemAccess.writeUInt64s(PtrUpdates);
}
default:
@@ -182,27 +178,27 @@ Error EPCIndirectStubsManager::createStubs(const StubInitsMap &StubInits) {
}
}
-JITEvaluatedSymbol EPCIndirectStubsManager::findStub(StringRef Name,
- bool ExportedStubsOnly) {
+ExecutorSymbolDef EPCIndirectStubsManager::findStub(StringRef Name,
+ bool ExportedStubsOnly) {
std::lock_guard<std::mutex> Lock(ISMMutex);
auto I = StubInfos.find(Name);
if (I == StubInfos.end())
- return nullptr;
+ return ExecutorSymbolDef();
return {I->second.first.StubAddress, I->second.second};
}
-JITEvaluatedSymbol EPCIndirectStubsManager::findPointer(StringRef Name) {
+ExecutorSymbolDef EPCIndirectStubsManager::findPointer(StringRef Name) {
std::lock_guard<std::mutex> Lock(ISMMutex);
auto I = StubInfos.find(Name);
if (I == StubInfos.end())
- return nullptr;
+ return ExecutorSymbolDef();
return {I->second.first.PointerAddress, I->second.second};
}
Error EPCIndirectStubsManager::updatePointer(StringRef Name,
- JITTargetAddress NewAddr) {
+ ExecutorAddr NewAddr) {
- JITTargetAddress PtrAddr = 0;
+ ExecutorAddr PtrAddr;
{
std::lock_guard<std::mutex> Lock(ISMMutex);
auto I = StubInfos.find(Name);
@@ -215,11 +211,11 @@ Error EPCIndirectStubsManager::updatePointer(StringRef Name,
auto &MemAccess = EPCIU.getExecutorProcessControl().getMemoryAccess();
switch (EPCIU.getABISupport().getPointerSize()) {
case 4: {
- tpctypes::UInt32Write PUpdate(ExecutorAddr(PtrAddr), NewAddr);
+ tpctypes::UInt32Write PUpdate(PtrAddr, NewAddr.getValue());
return MemAccess.writeUInt32s(PUpdate);
}
case 8: {
- tpctypes::UInt64Write PUpdate(ExecutorAddr(PtrAddr), NewAddr);
+ tpctypes::UInt64Write PUpdate(PtrAddr, NewAddr.getValue());
return MemAccess.writeUInt64s(PUpdate);
}
default:
@@ -290,9 +286,9 @@ Error EPCIndirectionUtils::cleanup() {
return Err;
}
-Expected<JITTargetAddress>
-EPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr) {
+Expected<ExecutorAddr>
+EPCIndirectionUtils::writeResolverBlock(ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr) {
using namespace jitlink;
assert(ABI && "ABI can not be null");
@@ -307,7 +303,7 @@ EPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr,
return Alloc.takeError();
auto SegInfo = Alloc->getSegInfo(MemProt::Read | MemProt::Exec);
- ResolverBlockAddr = SegInfo.Addr.getValue();
+ ResolverBlockAddr = SegInfo.Addr;
ABI->writeResolverCode(SegInfo.WorkingMem.data(), ResolverBlockAddr,
ReentryFnAddr, ReentryCtxAddr);
@@ -331,7 +327,7 @@ TrampolinePool &EPCIndirectionUtils::getTrampolinePool() {
}
LazyCallThroughManager &EPCIndirectionUtils::createLazyCallThroughManager(
- ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr) {
+ ExecutionSession &ES, ExecutorAddr ErrorHandlerAddr) {
assert(!LCTM &&
"createLazyCallThroughManager can not have been called before");
LCTM = std::make_unique<LazyCallThroughManager>(ES, ErrorHandlerAddr,
@@ -377,9 +373,8 @@ EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) {
auto StubSeg = Alloc->getSegInfo(StubProt);
auto PtrSeg = Alloc->getSegInfo(PtrProt);
- ABI->writeIndirectStubsBlock(StubSeg.WorkingMem.data(),
- StubSeg.Addr.getValue(),
- PtrSeg.Addr.getValue(), NumStubsToAllocate);
+ ABI->writeIndirectStubsBlock(StubSeg.WorkingMem.data(), StubSeg.Addr,
+ PtrSeg.Addr, NumStubsToAllocate);
auto FA = Alloc->finalize();
if (!FA)
@@ -390,8 +385,8 @@ EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) {
auto StubExecutorAddr = StubSeg.Addr;
auto PtrExecutorAddr = PtrSeg.Addr;
for (unsigned I = 0; I != NumStubsToAllocate; ++I) {
- AvailableIndirectStubs.push_back(IndirectStubInfo(
- StubExecutorAddr.getValue(), PtrExecutorAddr.getValue()));
+ AvailableIndirectStubs.push_back(
+ IndirectStubInfo(StubExecutorAddr, PtrExecutorAddr));
StubExecutorAddr += ABI->getStubSize();
PtrExecutorAddr += ABI->getPointerSize();
}
@@ -412,19 +407,19 @@ EPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) {
static JITTargetAddress reentry(JITTargetAddress LCTMAddr,
JITTargetAddress TrampolineAddr) {
auto &LCTM = *jitTargetAddressToPointer<LazyCallThroughManager *>(LCTMAddr);
- std::promise<JITTargetAddress> LandingAddrP;
+ std::promise<ExecutorAddr> LandingAddrP;
auto LandingAddrF = LandingAddrP.get_future();
LCTM.resolveTrampolineLandingAddress(
- TrampolineAddr,
- [&](JITTargetAddress Addr) { LandingAddrP.set_value(Addr); });
- return LandingAddrF.get();
+ ExecutorAddr(TrampolineAddr),
+ [&](ExecutorAddr Addr) { LandingAddrP.set_value(Addr); });
+ return LandingAddrF.get().getValue();
}
Error setUpInProcessLCTMReentryViaEPCIU(EPCIndirectionUtils &EPCIU) {
auto &LCTM = EPCIU.getLazyCallThroughManager();
return EPCIU
- .writeResolverBlock(pointerToJITTargetAddress(&reentry),
- pointerToJITTargetAddress(&LCTM))
+ .writeResolverBlock(ExecutorAddr::fromPtr(&reentry),
+ ExecutorAddr::fromPtr(&LCTM))
.takeError();
}
diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
index 377a59993eb0..fb685e6c3727 100644
--- a/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ExecutionUtils.cpp
@@ -156,8 +156,7 @@ Error CtorDtorRunner::run() {
for (auto &KV : CtorDtorsByPriority) {
for (auto &Name : KV.second) {
assert(CtorDtorMap->count(Name) && "No entry for Name");
- auto CtorDtor = reinterpret_cast<CtorDtorTy>(
- static_cast<uintptr_t>((*CtorDtorMap)[Name].getAddress()));
+ auto CtorDtor = (*CtorDtorMap)[Name].getAddress().toPtr<CtorDtorTy>();
CtorDtor();
}
}
@@ -186,12 +185,10 @@ int LocalCXXRuntimeOverridesBase::CXAAtExitOverride(DestructorPtr Destructor,
Error LocalCXXRuntimeOverrides::enable(JITDylib &JD,
MangleAndInterner &Mangle) {
SymbolMap RuntimeInterposes;
- RuntimeInterposes[Mangle("__dso_handle")] =
- JITEvaluatedSymbol(toTargetAddress(&DSOHandleOverride),
- JITSymbolFlags::Exported);
- RuntimeInterposes[Mangle("__cxa_atexit")] =
- JITEvaluatedSymbol(toTargetAddress(&CXAAtExitOverride),
- JITSymbolFlags::Exported);
+ RuntimeInterposes[Mangle("__dso_handle")] = {
+ ExecutorAddr::fromPtr(&DSOHandleOverride), JITSymbolFlags::Exported};
+ RuntimeInterposes[Mangle("__cxa_atexit")] = {
+ ExecutorAddr::fromPtr(&CXAAtExitOverride), JITSymbolFlags::Exported};
return JD.define(absoluteSymbols(std::move(RuntimeInterposes)));
}
@@ -257,11 +254,8 @@ Error DynamicLibrarySearchGenerator::tryToGenerate(
std::string Tmp((*Name).data() + HasGlobalPrefix,
(*Name).size() - HasGlobalPrefix);
- if (void *Addr = Dylib.getAddressOfSymbol(Tmp.c_str())) {
- NewSymbols[Name] = JITEvaluatedSymbol(
- static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(Addr)),
- JITSymbolFlags::Exported);
- }
+ if (void *P = Dylib.getAddressOfSymbol(Tmp.c_str()))
+ NewSymbols[Name] = {ExecutorAddr::fromPtr(P), JITSymbolFlags::Exported};
}
if (NewSymbols.empty())
@@ -274,57 +268,41 @@ Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
StaticLibraryDefinitionGenerator::Load(
ObjectLayer &L, const char *FileName,
GetObjectFileInterface GetObjFileInterface) {
- auto ArchiveBuffer = MemoryBuffer::getFile(FileName);
-
- if (!ArchiveBuffer)
- return createFileError(FileName, ArchiveBuffer.getError());
-
- return Create(L, std::move(*ArchiveBuffer), std::move(GetObjFileInterface));
-}
-
-Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
-StaticLibraryDefinitionGenerator::Load(
- ObjectLayer &L, const char *FileName, const Triple &TT,
- GetObjectFileInterface GetObjFileInterface) {
auto B = object::createBinary(FileName);
if (!B)
return createFileError(FileName, B.takeError());
// If this is a regular archive then create an instance from it.
- if (isa<object::Archive>(B->getBinary()))
- return Create(L, std::move(B->takeBinary().second),
+ if (isa<object::Archive>(B->getBinary())) {
+ auto [Archive, ArchiveBuffer] = B->takeBinary();
+ return Create(L, std::move(ArchiveBuffer),
+ std::unique_ptr<object::Archive>(
+ static_cast<object::Archive *>(Archive.release())),
std::move(GetObjFileInterface));
+ }
// If this is a universal binary then search for a slice matching the given
// Triple.
if (auto *UB = cast<object::MachOUniversalBinary>(B->getBinary())) {
- for (const auto &Obj : UB->objects()) {
- auto ObjTT = Obj.getTriple();
- if (ObjTT.getArch() == TT.getArch() &&
- ObjTT.getSubArch() == TT.getSubArch() &&
- (TT.getVendor() == Triple::UnknownVendor ||
- ObjTT.getVendor() == TT.getVendor())) {
- // We found a match. Create an instance from a buffer covering this
- // slice.
- auto SliceBuffer = MemoryBuffer::getFileSlice(FileName, Obj.getSize(),
- Obj.getOffset());
- if (!SliceBuffer)
- return make_error<StringError>(
- Twine("Could not create buffer for ") + TT.str() + " slice of " +
- FileName + ": [ " + formatv("{0:x}", Obj.getOffset()) +
- " .. " + formatv("{0:x}", Obj.getOffset() + Obj.getSize()) +
- ": " + SliceBuffer.getError().message(),
- SliceBuffer.getError());
- return Create(L, std::move(*SliceBuffer),
- std::move(GetObjFileInterface));
- }
- }
- return make_error<StringError>(Twine("Universal binary ") + FileName +
- " does not contain a slice for " +
- TT.str(),
- inconvertibleErrorCode());
+ const auto &TT = L.getExecutionSession().getTargetTriple();
+
+ auto SliceRange = getSliceRangeForArch(*UB, TT);
+ if (!SliceRange)
+ return SliceRange.takeError();
+
+ auto SliceBuffer = MemoryBuffer::getFileSlice(FileName, SliceRange->second,
+ SliceRange->first);
+ if (!SliceBuffer)
+ return make_error<StringError>(
+ Twine("Could not create buffer for ") + TT.str() + " slice of " +
+ FileName + ": [ " + formatv("{0:x}", SliceRange->first) + " .. " +
+ formatv("{0:x}", SliceRange->first + SliceRange->second) + ": " +
+ SliceBuffer.getError().message(),
+ SliceBuffer.getError());
+
+ return Create(L, std::move(*SliceBuffer), std::move(GetObjFileInterface));
}
return make_error<StringError>(Twine("Unrecognized file type for ") +
@@ -335,12 +313,15 @@ StaticLibraryDefinitionGenerator::Load(
Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
StaticLibraryDefinitionGenerator::Create(
ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer,
+ std::unique_ptr<object::Archive> Archive,
GetObjectFileInterface GetObjFileInterface) {
+
Error Err = Error::success();
std::unique_ptr<StaticLibraryDefinitionGenerator> ADG(
new StaticLibraryDefinitionGenerator(
- L, std::move(ArchiveBuffer), std::move(GetObjFileInterface), Err));
+ L, std::move(ArchiveBuffer), std::move(Archive),
+ std::move(GetObjFileInterface), Err));
if (Err)
return std::move(Err);
@@ -348,6 +329,50 @@ StaticLibraryDefinitionGenerator::Create(
return std::move(ADG);
}
+Expected<std::unique_ptr<StaticLibraryDefinitionGenerator>>
+StaticLibraryDefinitionGenerator::Create(
+ ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer,
+ GetObjectFileInterface GetObjFileInterface) {
+
+ auto B = object::createBinary(ArchiveBuffer->getMemBufferRef());
+ if (!B)
+ return B.takeError();
+
+ // If this is a regular archive then create an instance from it.
+ if (isa<object::Archive>(*B))
+ return Create(L, std::move(ArchiveBuffer),
+ std::unique_ptr<object::Archive>(
+ static_cast<object::Archive *>(B->release())),
+ std::move(GetObjFileInterface));
+
+ // If this is a universal binary then search for a slice matching the given
+ // Triple.
+ if (auto *UB = cast<object::MachOUniversalBinary>(B->get())) {
+
+ const auto &TT = L.getExecutionSession().getTargetTriple();
+
+ auto SliceRange = getSliceRangeForArch(*UB, TT);
+ if (!SliceRange)
+ return SliceRange.takeError();
+
+ MemoryBufferRef SliceRef(
+ StringRef(ArchiveBuffer->getBufferStart() + SliceRange->first,
+ SliceRange->second),
+ ArchiveBuffer->getBufferIdentifier());
+
+ auto Archive = object::Archive::create(SliceRef);
+ if (!Archive)
+ return Archive.takeError();
+
+ return Create(L, std::move(ArchiveBuffer), std::move(*Archive),
+ std::move(GetObjFileInterface));
+ }
+
+ return make_error<StringError>(Twine("Unrecognized file type for ") +
+ ArchiveBuffer->getBufferIdentifier(),
+ inconvertibleErrorCode());
+}
+
Error StaticLibraryDefinitionGenerator::tryToGenerate(
LookupState &LS, LookupKind K, JITDylib &JD,
JITDylibLookupFlags JDLookupFlags, const SymbolLookupSet &Symbols) {
@@ -417,12 +442,33 @@ Error StaticLibraryDefinitionGenerator::buildObjectFilesMap() {
return Error::success();
}
+Expected<std::pair<size_t, size_t>>
+StaticLibraryDefinitionGenerator::getSliceRangeForArch(
+ object::MachOUniversalBinary &UB, const Triple &TT) {
+
+ for (const auto &Obj : UB.objects()) {
+ auto ObjTT = Obj.getTriple();
+ if (ObjTT.getArch() == TT.getArch() &&
+ ObjTT.getSubArch() == TT.getSubArch() &&
+ (TT.getVendor() == Triple::UnknownVendor ||
+ ObjTT.getVendor() == TT.getVendor())) {
+ // We found a match. Return the range for the slice.
+ return std::make_pair(Obj.getOffset(), Obj.getSize());
+ }
+ }
+
+ return make_error<StringError>(Twine("Universal binary ") + UB.getFileName() +
+ " does not contain a slice for " +
+ TT.str(),
+ inconvertibleErrorCode());
+}
+
StaticLibraryDefinitionGenerator::StaticLibraryDefinitionGenerator(
ObjectLayer &L, std::unique_ptr<MemoryBuffer> ArchiveBuffer,
+ std::unique_ptr<object::Archive> Archive,
GetObjectFileInterface GetObjFileInterface, Error &Err)
: L(L), GetObjFileInterface(std::move(GetObjFileInterface)),
- ArchiveBuffer(std::move(ArchiveBuffer)),
- Archive(std::make_unique<object::Archive>(*this->ArchiveBuffer, Err)) {
+ ArchiveBuffer(std::move(ArchiveBuffer)), Archive(std::move(Archive)) {
ErrorAsOutParameter _(&Err);
if (!this->GetObjFileInterface)
this->GetObjFileInterface = getObjectFileInterface;
@@ -506,7 +552,7 @@ DLLImportDefinitionGenerator::getTargetEndianness(const Triple &TT) {
Expected<std::unique_ptr<jitlink::LinkGraph>>
DLLImportDefinitionGenerator::createStubsGraph(const SymbolMap &Resolved) {
- Triple TT = ES.getExecutorProcessControl().getTargetTriple();
+ Triple TT = ES.getTargetTriple();
auto PointerSize = getTargetEndianness(TT);
if (!PointerSize)
return PointerSize.takeError();
@@ -522,13 +568,13 @@ DLLImportDefinitionGenerator::createStubsGraph(const SymbolMap &Resolved) {
for (auto &KV : Resolved) {
jitlink::Symbol &Target = G->addAbsoluteSymbol(
- *KV.first, ExecutorAddr(KV.second.getAddress()), *PointerSize,
+ *KV.first, KV.second.getAddress(), *PointerSize,
jitlink::Linkage::Strong, jitlink::Scope::Local, false);
// Create __imp_ symbol
jitlink::Symbol &Ptr =
jitlink::x86_64::createAnonymousPointer(*G, Sec, &Target);
- auto NameCopy = G->allocateString(Twine(getImpPrefix()) + *KV.first);
+ auto NameCopy = G->allocateContent(Twine(getImpPrefix()) + *KV.first);
StringRef NameCopyRef = StringRef(NameCopy.data(), NameCopy.size());
Ptr.setName(NameCopyRef);
Ptr.setLinkage(jitlink::Linkage::Strong);
diff --git a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
index 361fcd4a2e9c..b8b013f8a7a9 100644
--- a/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ExecutorProcessControl.cpp
@@ -11,8 +11,8 @@
#include "llvm/ExecutionEngine/Orc/Core.h"
#include "llvm/ExecutionEngine/Orc/TargetProcess/TargetExecutionUtils.h"
#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/Process.h"
+#include "llvm/TargetParser/Host.h"
#define DEBUG_TYPE "orc"
@@ -192,7 +192,7 @@ SelfExecutorProcessControl::jitDispatchViaWrapperFunctionManager(
shared::WrapperFunctionResult Result) mutable {
ResultP.set_value(std::move(Result));
},
- pointerToJITTargetAddress(FnTag), {Data, Size});
+ ExecutorAddr::fromPtr(FnTag), {Data, Size});
return ResultF.get().release();
}
diff --git a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 989bb094cc25..a0d81cdf2086 100644
--- a/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -8,13 +8,13 @@
#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/Support/Format.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <sstream>
@@ -40,7 +40,7 @@ public:
private:
void materialize(std::unique_ptr<MaterializationResponsibility> R) override {
SymbolMap Result;
- Result[Name] = JITEvaluatedSymbol(Compile(), JITSymbolFlags::Exported);
+ Result[Name] = {Compile(), JITSymbolFlags::Exported};
// No dependencies, so these calls cannot fail.
cantFail(R->notifyResolved(Result));
cantFail(R->notifyEmitted());
@@ -62,7 +62,7 @@ namespace orc {
TrampolinePool::~TrampolinePool() = default;
void IndirectStubsManager::anchor() {}
-Expected<JITTargetAddress>
+Expected<ExecutorAddr>
JITCompileCallbackManager::getCompileCallback(CompileFunction Compile) {
if (auto TrampolineAddr = TP->getTrampoline()) {
auto CallbackName =
@@ -78,8 +78,8 @@ JITCompileCallbackManager::getCompileCallback(CompileFunction Compile) {
return TrampolineAddr.takeError();
}
-JITTargetAddress JITCompileCallbackManager::executeCompileCallback(
- JITTargetAddress TrampolineAddr) {
+ExecutorAddr
+JITCompileCallbackManager::executeCompileCallback(ExecutorAddr TrampolineAddr) {
SymbolStringPtr Name;
{
@@ -91,14 +91,10 @@ JITTargetAddress JITCompileCallbackManager::executeCompileCallback(
// callee.
if (I == AddrToSymbol.end()) {
Lock.unlock();
- std::string ErrMsg;
- {
- raw_string_ostream ErrMsgStream(ErrMsg);
- ErrMsgStream << "No compile callback for trampoline at "
- << format("0x%016" PRIx64, TrampolineAddr);
- }
ES.reportError(
- make_error<StringError>(std::move(ErrMsg), inconvertibleErrorCode()));
+ make_error<StringError>("No compile callback for trampoline at " +
+ formatv("{0:x}", TrampolineAddr),
+ inconvertibleErrorCode()));
return ErrorHandlerAddress;
} else
Name = I->second;
@@ -120,7 +116,7 @@ JITTargetAddress JITCompileCallbackManager::executeCompileCallback(
Expected<std::unique_ptr<JITCompileCallbackManager>>
createLocalCompileCallbackManager(const Triple &T, ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddress) {
+ ExecutorAddr ErrorHandlerAddress) {
switch (T.getArch()) {
default:
return make_error<StringError>(
@@ -244,9 +240,9 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
}
}
-Constant* createIRTypedAddress(FunctionType &FT, JITTargetAddress Addr) {
+Constant* createIRTypedAddress(FunctionType &FT, ExecutorAddr Addr) {
Constant *AddrIntVal =
- ConstantInt::get(Type::getInt64Ty(FT.getContext()), Addr);
+ ConstantInt::get(Type::getInt64Ty(FT.getContext()), Addr.getValue());
Constant *AddrPtrVal =
ConstantExpr::getCast(Instruction::IntToPtr, AddrIntVal,
PointerType::get(&FT, 0));
@@ -329,26 +325,6 @@ Function* cloneFunctionDecl(Module &Dst, const Function &F,
return NewF;
}
-void moveFunctionBody(Function &OrigF, ValueToValueMapTy &VMap,
- ValueMaterializer *Materializer,
- Function *NewF) {
- assert(!OrigF.isDeclaration() && "Nothing to move");
- if (!NewF)
- NewF = cast<Function>(VMap[&OrigF]);
- else
- assert(VMap[&OrigF] == NewF && "Incorrect function mapping in VMap.");
- assert(NewF && "Function mapping missing from VMap.");
- assert(NewF->getParent() != OrigF.getParent() &&
- "moveFunctionBody should only be used to move bodies between "
- "modules.");
-
- SmallVector<ReturnInst *, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(NewF, &OrigF, VMap,
- CloneFunctionChangeType::DifferentModule, Returns, "",
- nullptr, nullptr, Materializer);
- OrigF.deleteBody();
-}
-
GlobalVariable* cloneGlobalVariableDecl(Module &Dst, const GlobalVariable &GV,
ValueToValueMapTy *VMap) {
GlobalVariable *NewGV = new GlobalVariable(
@@ -361,24 +337,6 @@ GlobalVariable* cloneGlobalVariableDecl(Module &Dst, const GlobalVariable &GV,
return NewGV;
}
-void moveGlobalVariableInitializer(GlobalVariable &OrigGV,
- ValueToValueMapTy &VMap,
- ValueMaterializer *Materializer,
- GlobalVariable *NewGV) {
- assert(OrigGV.hasInitializer() && "Nothing to move");
- if (!NewGV)
- NewGV = cast<GlobalVariable>(VMap[&OrigGV]);
- else
- assert(VMap[&OrigGV] == NewGV &&
- "Incorrect global variable mapping in VMap.");
- assert(NewGV->getParent() != OrigGV.getParent() &&
- "moveGlobalVariableInitializer should only be used to move "
- "initializers between modules");
-
- NewGV->setInitializer(MapValue(OrigGV.getInitializer(), VMap, RF_None,
- nullptr, Materializer));
-}
-
GlobalAlias* cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
ValueToValueMapTy &VMap) {
assert(OrigA.getAliasee() && "Original alias doesn't have an aliasee?");
@@ -390,15 +348,6 @@ GlobalAlias* cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
return NewA;
}
-void cloneModuleFlagsMetadata(Module &Dst, const Module &Src,
- ValueToValueMapTy &VMap) {
- auto *MFs = Src.getModuleFlagsMetadata();
- if (!MFs)
- return;
- for (auto *MF : MFs->operands())
- Dst.addModuleFlag(MapMetadata(MF, VMap));
-}
-
Error addFunctionPointerRelocationsToCurrentSymbol(jitlink::Symbol &Sym,
jitlink::LinkGraph &G,
MCDisassembler &Disassembler,
diff --git a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
index 70a3c404d836..b66f52f1ec5d 100644
--- a/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/JITTargetMachineBuilder.cpp
@@ -9,8 +9,8 @@
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
namespace llvm {
namespace orc {
@@ -18,13 +18,10 @@ namespace orc {
JITTargetMachineBuilder::JITTargetMachineBuilder(Triple TT)
: TT(std::move(TT)) {
Options.EmulatedTLS = true;
- Options.ExplicitEmulatedTLS = true;
Options.UseInitArray = true;
}
Expected<JITTargetMachineBuilder> JITTargetMachineBuilder::detectHost() {
- // FIXME: getProcessTriple is bogus. It returns the host LLVM was compiled on,
- // rather than a valid triple for the current process.
JITTargetMachineBuilder TMBuilder((Triple(sys::getProcessTriple())));
// Retrieve host CPU name and sub-target features and add them to builder.
diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
index bc84988e3254..7c7c2f000368 100644
--- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp
@@ -9,6 +9,11 @@
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/COFFPlatform.h"
+#include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h"
+#include "llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h"
+#include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h"
+#include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h"
#include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h"
#include "llvm/ExecutionEngine/Orc/ExecutorProcessControl.h"
#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
@@ -98,10 +103,16 @@ public:
ORC_RT_RTLD_GLOBAL = 0x8
};
- if (auto WrapperAddr = J.lookup("__orc_rt_jit_dlopen_wrapper")) {
- return J.getExecutionSession().callSPSWrapper<SPSDLOpenSig>(
- *WrapperAddr, DSOHandles[&JD], JD.getName(),
- int32_t(ORC_RT_RTLD_LAZY));
+ auto &ES = J.getExecutionSession();
+ auto MainSearchOrder = J.getMainJITDylib().withLinkOrderDo(
+ [](const JITDylibSearchOrder &SO) { return SO; });
+
+ if (auto WrapperAddr =
+ ES.lookup(MainSearchOrder,
+ J.mangleAndIntern("__orc_rt_jit_dlopen_wrapper"))) {
+ return ES.callSPSWrapper<SPSDLOpenSig>(WrapperAddr->getAddress(),
+ DSOHandles[&JD], JD.getName(),
+ int32_t(ORC_RT_RTLD_LAZY));
} else
return WrapperAddr.takeError();
}
@@ -110,10 +121,16 @@ public:
using llvm::orc::shared::SPSExecutorAddr;
using SPSDLCloseSig = int32_t(SPSExecutorAddr);
- if (auto WrapperAddr = J.lookup("__orc_rt_jit_dlclose_wrapper")) {
+ auto &ES = J.getExecutionSession();
+ auto MainSearchOrder = J.getMainJITDylib().withLinkOrderDo(
+ [](const JITDylibSearchOrder &SO) { return SO; });
+
+ if (auto WrapperAddr =
+ ES.lookup(MainSearchOrder,
+ J.mangleAndIntern("__orc_rt_jit_dlclose_wrapper"))) {
int32_t result;
auto E = J.getExecutionSession().callSPSWrapper<SPSDLCloseSig>(
- *WrapperAddr, result, DSOHandles[&JD]);
+ WrapperAddr->getAddress(), result, DSOHandles[&JD]);
if (E)
return E;
else if (result)
@@ -176,7 +193,7 @@ private:
/// some runtime API, including __cxa_atexit, dlopen, and dlclose.
class GenericLLVMIRPlatformSupport : public LLJIT::PlatformSupport {
public:
- GenericLLVMIRPlatformSupport(LLJIT &J)
+ GenericLLVMIRPlatformSupport(LLJIT &J, JITDylib &PlatformJD)
: J(J), InitFunctionPrefix(J.mangle("__orc_init_func.")),
DeInitFunctionPrefix(J.mangle("__orc_deinit_func.")) {
@@ -188,17 +205,14 @@ public:
SymbolMap StdInterposes;
- StdInterposes[J.mangleAndIntern("__lljit.platform_support_instance")] =
- JITEvaluatedSymbol(pointerToJITTargetAddress(this),
- JITSymbolFlags::Exported);
- StdInterposes[J.mangleAndIntern("__lljit.cxa_atexit_helper")] =
- JITEvaluatedSymbol(pointerToJITTargetAddress(registerCxaAtExitHelper),
- JITSymbolFlags());
-
- cantFail(
- J.getMainJITDylib().define(absoluteSymbols(std::move(StdInterposes))));
- cantFail(setupJITDylib(J.getMainJITDylib()));
- cantFail(J.addIRModule(J.getMainJITDylib(), createPlatformRuntimeModule()));
+ StdInterposes[J.mangleAndIntern("__lljit.platform_support_instance")] = {
+ ExecutorAddr::fromPtr(this), JITSymbolFlags::Exported};
+ StdInterposes[J.mangleAndIntern("__lljit.cxa_atexit_helper")] = {
+ ExecutorAddr::fromPtr(registerCxaAtExitHelper), JITSymbolFlags()};
+
+ cantFail(PlatformJD.define(absoluteSymbols(std::move(StdInterposes))));
+ cantFail(setupJITDylib(PlatformJD));
+ cantFail(J.addIRModule(PlatformJD, createPlatformRuntimeModule()));
}
ExecutionSession &getExecutionSession() { return J.getExecutionSession(); }
@@ -208,12 +222,10 @@ public:
// Add per-jitdylib standard interposes.
SymbolMap PerJDInterposes;
- PerJDInterposes[J.mangleAndIntern("__lljit.run_atexits_helper")] =
- JITEvaluatedSymbol(pointerToJITTargetAddress(runAtExitsHelper),
- JITSymbolFlags());
- PerJDInterposes[J.mangleAndIntern("__lljit.atexit_helper")] =
- JITEvaluatedSymbol(pointerToJITTargetAddress(registerAtExitHelper),
- JITSymbolFlags());
+ PerJDInterposes[J.mangleAndIntern("__lljit.run_atexits_helper")] = {
+ ExecutorAddr::fromPtr(runAtExitsHelper), JITSymbolFlags()};
+ PerJDInterposes[J.mangleAndIntern("__lljit.atexit_helper")] = {
+ ExecutorAddr::fromPtr(registerAtExitHelper), JITSymbolFlags()};
cantFail(JD.define(absoluteSymbols(std::move(PerJDInterposes))));
auto Ctx = std::make_unique<LLVMContext>();
@@ -227,7 +239,7 @@ public:
"__dso_handle");
DSOHandle->setVisibility(GlobalValue::DefaultVisibility);
DSOHandle->setInitializer(
- ConstantInt::get(Int64Ty, pointerToJITTargetAddress(&JD)));
+ ConstantInt::get(Int64Ty, ExecutorAddr::fromPtr(&JD).getValue()));
auto *GenericIRPlatformSupportTy =
StructType::create(*Ctx, "lljit.GenericLLJITIRPlatformSupport");
@@ -287,7 +299,7 @@ public:
dbgs() << " Running init " << formatv("{0:x16}", InitFnAddr)
<< "...\n";
});
- auto *InitFn = jitTargetAddressToFunction<void (*)()>(InitFnAddr);
+ auto *InitFn = InitFnAddr.toPtr<void (*)()>();
InitFn();
}
} else
@@ -308,7 +320,7 @@ public:
dbgs() << " Running deinit " << formatv("{0:x16}", DeinitFnAddr)
<< "...\n";
});
- auto *DeinitFn = jitTargetAddressToFunction<void (*)()>(DeinitFnAddr);
+ auto *DeinitFn = DeinitFnAddr.toPtr<void (*)()>();
DeinitFn();
}
} else
@@ -329,8 +341,7 @@ public:
}
private:
-
- Expected<std::vector<JITTargetAddress>> getInitializers(JITDylib &JD) {
+ Expected<std::vector<ExecutorAddr>> getInitializers(JITDylib &JD) {
if (auto Err = issueInitLookups(JD))
return std::move(Err);
@@ -370,7 +381,7 @@ private:
if (!LookupResult)
return LookupResult.takeError();
- std::vector<JITTargetAddress> Initializers;
+ std::vector<ExecutorAddr> Initializers;
while (!DFSLinkOrder.empty()) {
auto &NextJD = *DFSLinkOrder.back();
DFSLinkOrder.pop_back();
@@ -384,7 +395,7 @@ private:
return Initializers;
}
- Expected<std::vector<JITTargetAddress>> getDeinitializers(JITDylib &JD) {
+ Expected<std::vector<ExecutorAddr>> getDeinitializers(JITDylib &JD) {
auto &ES = getExecutionSession();
auto LLJITRunAtExits = J.mangleAndIntern("__lljit_run_atexits");
@@ -427,7 +438,7 @@ private:
if (!LookupResult)
return LookupResult.takeError();
- std::vector<JITTargetAddress> DeInitializers;
+ std::vector<ExecutorAddr> DeInitializers;
for (auto &NextJD : DFSLinkOrder) {
auto DeInitsItr = LookupResult->find(NextJD.get());
assert(DeInitsItr != LookupResult->end() &&
@@ -695,6 +706,14 @@ Error LLJITBuilderState::prepareForConstruction() {
dbgs() << "\n";
});
+ // Create DL if not specified.
+ if (!DL) {
+ if (auto DLOrErr = JTMB->getDefaultDataLayoutForTarget())
+ DL = std::move(*DLOrErr);
+ else
+ return DLOrErr.takeError();
+ }
+
// If neither ES nor EPC has been set then create an EPC instance.
if (!ES && !EPC) {
LLVM_DEBUG({
@@ -705,21 +724,38 @@ Error LLJITBuilderState::prepareForConstruction() {
EPC = std::move(*EPCOrErr);
else
return EPCOrErr.takeError();
- } else
+ } else if (EPC) {
LLVM_DEBUG({
dbgs() << "Using explicitly specified ExecutorProcessControl instance "
<< EPC.get() << "\n";
});
+ } else {
+ LLVM_DEBUG({
+ dbgs() << "Using explicitly specified ExecutionSession instance "
+ << ES.get() << "\n";
+ });
+ }
// If the client didn't configure any linker options then auto-configure the
// JIT linker.
if (!CreateObjectLinkingLayer) {
auto &TT = JTMB->getTargetTriple();
- if (TT.getArch() == Triple::riscv64 ||
- TT.getArch() == Triple::loongarch64 ||
- (TT.isOSBinFormatMachO() &&
- (TT.getArch() == Triple::aarch64 || TT.getArch() == Triple::x86_64))) {
-
+ bool UseJITLink = false;
+ switch (TT.getArch()) {
+ case Triple::riscv64:
+ case Triple::loongarch64:
+ UseJITLink = true;
+ break;
+ case Triple::aarch64:
+ UseJITLink = !TT.isOSBinFormatCOFF();
+ break;
+ case Triple::x86_64:
+ UseJITLink = !TT.isOSBinFormatCOFF();
+ break;
+ default:
+ break;
+ }
+ if (UseJITLink) {
JTMB->setRelocationModel(Reloc::PIC_);
JTMB->setCodeModel(CodeModel::Small);
CreateObjectLinkingLayer =
@@ -737,6 +773,30 @@ Error LLJITBuilderState::prepareForConstruction() {
}
}
+ // If we need a process JITDylib but no setup function has been given then
+ // create a default one.
+ if (!SetupProcessSymbolsJITDylib &&
+ (LinkProcessSymbolsByDefault || EnableDebuggerSupport)) {
+
+ LLVM_DEBUG({
+ dbgs() << "Creating default Process JD setup function (neeeded for";
+ if (LinkProcessSymbolsByDefault)
+ dbgs() << " <link-process-syms-by-default>";
+ if (EnableDebuggerSupport)
+ dbgs() << " <debugger-support>";
+ dbgs() << ")\n";
+ });
+
+ SetupProcessSymbolsJITDylib = [this](JITDylib &JD) -> Error {
+ auto G = orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
+ DL->getGlobalPrefix());
+ if (!G)
+ return G.takeError();
+ JD.addGenerator(std::move(*G));
+ return Error::success();
+ };
+ }
+
return Error::success();
}
@@ -747,6 +807,54 @@ LLJIT::~LLJIT() {
ES->reportError(std::move(Err));
}
+JITDylibSP LLJIT::getProcessSymbolsJITDylib() { return ProcessSymbols; }
+
+JITDylibSP LLJIT::getPlatformJITDylib() { return Platform; }
+
+Expected<JITDylib &> LLJIT::createJITDylib(std::string Name) {
+ auto JD = ES->createJITDylib(std::move(Name));
+ if (!JD)
+ return JD.takeError();
+
+ JD->addToLinkOrder(DefaultLinks);
+ return JD;
+}
+
+Expected<JITDylib &> LLJIT::loadPlatformDynamicLibrary(const char *Path) {
+ auto G = EPCDynamicLibrarySearchGenerator::Load(*ES, Path);
+ if (!G)
+ return G.takeError();
+
+ if (auto *ExistingJD = ES->getJITDylibByName(Path))
+ return *ExistingJD;
+
+ auto &JD = ES->createBareJITDylib(Path);
+ JD.addGenerator(std::move(*G));
+ return JD;
+}
+
+Error LLJIT::linkStaticLibraryInto(JITDylib &JD,
+ std::unique_ptr<MemoryBuffer> LibBuffer) {
+ auto G = StaticLibraryDefinitionGenerator::Create(*ObjLinkingLayer,
+ std::move(LibBuffer));
+ if (!G)
+ return G.takeError();
+
+ JD.addGenerator(std::move(*G));
+
+ return Error::success();
+}
+
+Error LLJIT::linkStaticLibraryInto(JITDylib &JD, const char *Path) {
+ auto G = StaticLibraryDefinitionGenerator::Load(*ObjLinkingLayer, Path);
+ if (!G)
+ return G.takeError();
+
+ JD.addGenerator(std::move(*G));
+
+ return Error::success();
+}
+
Error LLJIT::addIRModule(ResourceTrackerSP RT, ThreadSafeModule TSM) {
assert(TSM && "Can not add null module");
@@ -777,7 +885,7 @@ Expected<ExecutorAddr> LLJIT::lookupLinkerMangled(JITDylib &JD,
if (auto Sym = ES->lookup(
makeJITDylibSearchOrder(&JD, JITDylibLookupFlags::MatchAllSymbols),
Name))
- return ExecutorAddr(Sym->getAddress());
+ return Sym->getAddress();
else
return Sym.takeError();
}
@@ -832,7 +940,7 @@ LLJIT::createCompileFunction(LLJITBuilderState &S,
}
LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
- : DL(""), TT(S.JTMB->getTargetTriple()) {
+ : DL(std::move(*S.DL)), TT(S.JTMB->getTargetTriple()) {
ErrorAsOutParameter _(&Err);
@@ -851,22 +959,6 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
}
}
- if (auto MainOrErr = this->ES->createJITDylib("main"))
- Main = &*MainOrErr;
- else {
- Err = MainOrErr.takeError();
- return;
- }
-
- if (S.DL)
- DL = std::move(*S.DL);
- else if (auto DLOrErr = S.JTMB->getDefaultDataLayoutForTarget())
- DL = std::move(*DLOrErr);
- else {
- Err = DLOrErr.takeError();
- return;
- }
-
auto ObjLayer = createObjectLinkingLayer(S, *ES);
if (!ObjLayer) {
Err = ObjLayer.takeError();
@@ -905,10 +997,77 @@ LLJIT::LLJIT(LLJITBuilderState &S, Error &Err)
});
}
- if (S.SetUpPlatform)
- Err = S.SetUpPlatform(*this);
- else
- setUpGenericLLVMIRPlatform(*this);
+ if (S.SetupProcessSymbolsJITDylib) {
+ ProcessSymbols = &ES->createBareJITDylib("<Process Symbols>");
+ if (auto Err2 = S.SetupProcessSymbolsJITDylib(*ProcessSymbols)) {
+ Err = std::move(Err2);
+ return;
+ }
+ }
+
+ if (S.EnableDebuggerSupport) {
+ if (auto *OLL = dyn_cast<ObjectLinkingLayer>(ObjLinkingLayer.get())) {
+ switch (TT.getObjectFormat()) {
+ case Triple::ELF: {
+ auto Registrar = createJITLoaderGDBRegistrar(*ES);
+ if (!Registrar) {
+ Err = Registrar.takeError();
+ return;
+ }
+ OLL->addPlugin(std::make_unique<DebugObjectManagerPlugin>(
+ *ES, std::move(*Registrar), true, true));
+ break;
+ }
+ case Triple::MachO: {
+ assert(ProcessSymbols && "ProcessSymbols JD should be available when "
+ "EnableDebuggerSupport is set");
+ auto DS =
+ GDBJITDebugInfoRegistrationPlugin::Create(*ES, *ProcessSymbols, TT);
+ if (!DS) {
+ Err = DS.takeError();
+ return;
+ }
+ OLL->addPlugin(std::move(*DS));
+ break;
+ }
+ default:
+ LLVM_DEBUG({
+ dbgs() << "Cannot enable LLJIT debugger support: "
+ << Triple::getObjectFormatTypeName(TT.getObjectFormat())
+ << " not supported.\n";
+ });
+ }
+ } else {
+ LLVM_DEBUG({
+ dbgs() << "Cannot enable LLJIT debugger support: "
+ " debugger support is only available when using JITLink.\n";
+ });
+ }
+ }
+
+ if (!S.SetUpPlatform)
+ S.SetUpPlatform = setUpGenericLLVMIRPlatform;
+
+ if (auto PlatformJDOrErr = S.SetUpPlatform(*this)) {
+ Platform = PlatformJDOrErr->get();
+ if (Platform)
+ DefaultLinks.push_back(
+ {Platform, JITDylibLookupFlags::MatchExportedSymbolsOnly});
+ } else {
+ Err = PlatformJDOrErr.takeError();
+ return;
+ }
+
+ if (S.LinkProcessSymbolsByDefault)
+ DefaultLinks.push_back(
+ {ProcessSymbols, JITDylibLookupFlags::MatchExportedSymbolsOnly});
+
+ if (auto MainOrErr = createJITDylib("main"))
+ Main = &*MainOrErr;
+ else {
+ Err = MainOrErr.takeError();
+ return;
+ }
}
std::string LLJIT::mangle(StringRef UnmangledName) const {
@@ -934,24 +1093,136 @@ Error LLJIT::applyDataLayout(Module &M) {
return Error::success();
}
-Error setUpOrcPlatform(LLJIT& J) {
- LLVM_DEBUG(
- { dbgs() << "Setting up orc platform support for LLJIT\n"; });
- J.setPlatformSupport(std::make_unique<ORCPlatformSupport>(J));
+Error setUpOrcPlatformManually(LLJIT &J) {
+ LLVM_DEBUG({ dbgs() << "Setting up orc platform support for LLJIT\n"; });
+ J.setPlatformSupport(std::make_unique<ORCPlatformSupport>(J));
+ return Error::success();
+}
+
+class LoadAndLinkDynLibrary {
+public:
+ LoadAndLinkDynLibrary(LLJIT &J) : J(J) {}
+ Error operator()(JITDylib &JD, StringRef DLLName) {
+ if (!DLLName.ends_with_insensitive(".dll"))
+ return make_error<StringError>("DLLName not ending with .dll",
+ inconvertibleErrorCode());
+ auto DLLNameStr = DLLName.str(); // Guarantees null-termination.
+ auto DLLJD = J.loadPlatformDynamicLibrary(DLLNameStr.c_str());
+ if (!DLLJD)
+ return DLLJD.takeError();
+ JD.addToLinkOrder(*DLLJD);
return Error::success();
+ }
+
+private:
+ LLJIT &J;
+};
+
+Expected<JITDylibSP> ExecutorNativePlatform::operator()(LLJIT &J) {
+ auto ProcessSymbolsJD = J.getProcessSymbolsJITDylib();
+ if (!ProcessSymbolsJD)
+ return make_error<StringError>(
+ "Native platforms require a process symbols JITDylib",
+ inconvertibleErrorCode());
+
+ const Triple &TT = J.getTargetTriple();
+ ObjectLinkingLayer *ObjLinkingLayer =
+ dyn_cast<ObjectLinkingLayer>(&J.getObjLinkingLayer());
+
+ if (!ObjLinkingLayer)
+ return make_error<StringError>(
+ "SetUpTargetPlatform requires ObjectLinkingLayer",
+ inconvertibleErrorCode());
+
+ std::unique_ptr<MemoryBuffer> RuntimeArchiveBuffer;
+ if (OrcRuntime.index() == 0) {
+ auto A = errorOrToExpected(MemoryBuffer::getFile(std::get<0>(OrcRuntime)));
+ if (!A)
+ return A.takeError();
+ RuntimeArchiveBuffer = std::move(*A);
+ } else
+ RuntimeArchiveBuffer = std::move(std::get<1>(OrcRuntime));
+
+ auto &ES = J.getExecutionSession();
+ auto &PlatformJD = ES.createBareJITDylib("<Platform>");
+ PlatformJD.addToLinkOrder(*ProcessSymbolsJD);
+
+ J.setPlatformSupport(std::make_unique<ORCPlatformSupport>(J));
+
+ switch (TT.getObjectFormat()) {
+ case Triple::COFF: {
+ const char *VCRuntimePath = nullptr;
+ bool StaticVCRuntime = false;
+ if (VCRuntime) {
+ VCRuntimePath = VCRuntime->first.c_str();
+ StaticVCRuntime = VCRuntime->second;
+ }
+ if (auto P = COFFPlatform::Create(
+ ES, *ObjLinkingLayer, PlatformJD, std::move(RuntimeArchiveBuffer),
+ LoadAndLinkDynLibrary(J), StaticVCRuntime, VCRuntimePath))
+ J.getExecutionSession().setPlatform(std::move(*P));
+ else
+ return P.takeError();
+ break;
+ }
+ case Triple::ELF: {
+ auto G = StaticLibraryDefinitionGenerator::Create(
+ *ObjLinkingLayer, std::move(RuntimeArchiveBuffer));
+ if (!G)
+ return G.takeError();
+
+ if (auto P = ELFNixPlatform::Create(ES, *ObjLinkingLayer, PlatformJD,
+ std::move(*G)))
+ J.getExecutionSession().setPlatform(std::move(*P));
+ else
+ return P.takeError();
+ break;
+ }
+ case Triple::MachO: {
+ auto G = StaticLibraryDefinitionGenerator::Create(
+ *ObjLinkingLayer, std::move(RuntimeArchiveBuffer));
+ if (!G)
+ return G.takeError();
+
+ if (auto P = MachOPlatform::Create(ES, *ObjLinkingLayer, PlatformJD,
+ std::move(*G)))
+ ES.setPlatform(std::move(*P));
+ else
+ return P.takeError();
+ break;
+ }
+ default:
+ return make_error<StringError>("Unsupported object format in triple " +
+ TT.str(),
+ inconvertibleErrorCode());
+ }
+
+ return &PlatformJD;
}
-void setUpGenericLLVMIRPlatform(LLJIT &J) {
+Expected<JITDylibSP> setUpGenericLLVMIRPlatform(LLJIT &J) {
LLVM_DEBUG(
{ dbgs() << "Setting up GenericLLVMIRPlatform support for LLJIT\n"; });
- J.setPlatformSupport(std::make_unique<GenericLLVMIRPlatformSupport>(J));
+ auto ProcessSymbolsJD = J.getProcessSymbolsJITDylib();
+ if (!ProcessSymbolsJD)
+ return make_error<StringError>(
+ "Native platforms require a process symbols JITDylib",
+ inconvertibleErrorCode());
+
+ auto &PlatformJD = J.getExecutionSession().createBareJITDylib("<Platform>");
+ PlatformJD.addToLinkOrder(*ProcessSymbolsJD);
+
+ J.setPlatformSupport(
+ std::make_unique<GenericLLVMIRPlatformSupport>(J, PlatformJD));
+
+ return &PlatformJD;
}
-Error setUpInactivePlatform(LLJIT &J) {
+Expected<JITDylibSP> setUpInactivePlatform(LLJIT &J) {
LLVM_DEBUG(
{ dbgs() << "Explicitly deactivated platform support for LLJIT\n"; });
J.setPlatformSupport(std::make_unique<InactivePlatformSupport>());
- return Error::success();
+ return nullptr;
}
Error LLLazyJITBuilderState::prepareForConstruction() {
@@ -984,7 +1255,7 @@ LLLazyJIT::LLLazyJIT(LLLazyJITBuilderState &S, Error &Err) : LLJIT(S, Err) {
LCTMgr = std::move(S.LCTMgr);
else {
if (auto LCTMgrOrErr = createLocalLazyCallThroughManager(
- S.TT, *ES, S.LazyCompileFailureAddr.getValue()))
+ S.TT, *ES, S.LazyCompileFailureAddr))
LCTMgr = std::move(*LCTMgrOrErr);
else {
Err = LCTMgrOrErr.takeError();
diff --git a/llvm/lib/ExecutionEngine/Orc/Layer.cpp b/llvm/lib/ExecutionEngine/Orc/Layer.cpp
index 95380d912392..3368d3276cb3 100644
--- a/llvm/lib/ExecutionEngine/Orc/Layer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Layer.cpp
@@ -125,6 +125,10 @@ void IRMaterializationUnit::discard(const JITDylib &JD,
assert(!I->second->isDeclaration() &&
"Discard should only apply to definitions");
I->second->setLinkage(GlobalValue::AvailableExternallyLinkage);
+ // According to the IR verifier, "Declaration[s] may not be in a Comdat!"
+ // Remove it, if this is a GlobalObject.
+ if (auto *GO = dyn_cast<GlobalObject>(I->second))
+ GO->setComdat(nullptr);
SymbolToDefinition.erase(I);
}
diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index c0a740d42dbd..d95a642934f1 100644
--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -8,19 +8,20 @@
#include "llvm/ExecutionEngine/Orc/LazyReexports.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
+#include "llvm/TargetParser/Triple.h"
#define DEBUG_TYPE "orc"
namespace llvm {
namespace orc {
-LazyCallThroughManager::LazyCallThroughManager(
- ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP)
+LazyCallThroughManager::LazyCallThroughManager(ExecutionSession &ES,
+ ExecutorAddr ErrorHandlerAddr,
+ TrampolinePool *TP)
: ES(ES), ErrorHandlerAddr(ErrorHandlerAddr), TP(TP) {}
-Expected<JITTargetAddress> LazyCallThroughManager::getCallThroughTrampoline(
+Expected<ExecutorAddr> LazyCallThroughManager::getCallThroughTrampoline(
JITDylib &SourceJD, SymbolStringPtr SymbolName,
NotifyResolvedFunction NotifyResolved) {
assert(TP && "TrampolinePool not set");
@@ -36,24 +37,24 @@ Expected<JITTargetAddress> LazyCallThroughManager::getCallThroughTrampoline(
return *Trampoline;
}
-JITTargetAddress LazyCallThroughManager::reportCallThroughError(Error Err) {
+ExecutorAddr LazyCallThroughManager::reportCallThroughError(Error Err) {
ES.reportError(std::move(Err));
return ErrorHandlerAddr;
}
Expected<LazyCallThroughManager::ReexportsEntry>
-LazyCallThroughManager::findReexport(JITTargetAddress TrampolineAddr) {
+LazyCallThroughManager::findReexport(ExecutorAddr TrampolineAddr) {
std::lock_guard<std::mutex> Lock(LCTMMutex);
auto I = Reexports.find(TrampolineAddr);
if (I == Reexports.end())
return createStringError(inconvertibleErrorCode(),
- "Missing reexport for trampoline address %p",
- TrampolineAddr);
+ "Missing reexport for trampoline address %p" +
+ formatv("{0:x}", TrampolineAddr));
return I->second;
}
-Error LazyCallThroughManager::notifyResolved(JITTargetAddress TrampolineAddr,
- JITTargetAddress ResolvedAddr) {
+Error LazyCallThroughManager::notifyResolved(ExecutorAddr TrampolineAddr,
+ ExecutorAddr ResolvedAddr) {
NotifyResolvedFunction NotifyResolved;
{
std::lock_guard<std::mutex> Lock(LCTMMutex);
@@ -68,7 +69,7 @@ Error LazyCallThroughManager::notifyResolved(JITTargetAddress TrampolineAddr,
}
void LazyCallThroughManager::resolveTrampolineLandingAddress(
- JITTargetAddress TrampolineAddr,
+ ExecutorAddr TrampolineAddr,
NotifyLandingResolvedFunction NotifyLandingResolved) {
auto Entry = findReexport(TrampolineAddr);
@@ -84,7 +85,7 @@ void LazyCallThroughManager::resolveTrampolineLandingAddress(
if (Result) {
assert(Result->size() == 1 && "Unexpected result size");
assert(Result->count(SymbolName) && "Unexpected result value");
- JITTargetAddress LandingAddr = (*Result)[SymbolName].getAddress();
+ ExecutorAddr LandingAddr = (*Result)[SymbolName].getAddress();
if (auto Err = notifyResolved(TrampolineAddr, LandingAddr))
NotifyLandingResolved(reportCallThroughError(std::move(Err)));
@@ -104,7 +105,7 @@ void LazyCallThroughManager::resolveTrampolineLandingAddress(
Expected<std::unique_ptr<LazyCallThroughManager>>
createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
- JITTargetAddress ErrorHandlerAddr) {
+ ExecutorAddr ErrorHandlerAddr) {
switch (T.getArch()) {
default:
return make_error<StringError>(
@@ -187,7 +188,7 @@ void LazyReexportsMaterializationUnit::materialize(
auto CallThroughTrampoline = LCTManager.getCallThroughTrampoline(
SourceJD, Alias.second.Aliasee,
[&ISManager = this->ISManager,
- StubSym = Alias.first](JITTargetAddress ResolvedAddr) -> Error {
+ StubSym = Alias.first](ExecutorAddr ResolvedAddr) -> Error {
return ISManager.updatePointer(*StubSym, ResolvedAddr);
});
diff --git a/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp
index 59c63d38458b..75075c5c2a22 100644
--- a/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LookupAndRecordAddrs.cpp
@@ -31,8 +31,8 @@ void lookupAndRecordAddrs(
return OnRec(Result.takeError());
for (auto &KV : Pairs) {
auto I = Result->find(KV.first);
- KV.second->setValue((I != Result->end()) ? I->second.getAddress()
- : 0);
+ *KV.second =
+ I != Result->end() ? I->second.getAddress() : orc::ExecutorAddr();
}
OnRec(Error::success());
},
diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index 914a1b5afc71..a3a766d602c1 100644
--- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -9,10 +9,13 @@
#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/ExecutionEngine/JITLink/MachO.h"
+#include "llvm/ExecutionEngine/JITLink/aarch64.h"
#include "llvm/ExecutionEngine/JITLink/x86_64.h"
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/LookupAndRecordAddrs.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/Debug.h"
#include <optional>
@@ -62,8 +65,7 @@ std::unique_ptr<jitlink::LinkGraph> createPlatformGraph(MachOPlatform &MOP,
std::string Name) {
unsigned PointerSize;
support::endianness Endianness;
- const auto &TT =
- MOP.getExecutionSession().getExecutorProcessControl().getTargetTriple();
+ const auto &TT = MOP.getExecutionSession().getTargetTriple();
switch (TT.getArch()) {
case Triple::aarch64:
@@ -147,8 +149,8 @@ private:
if (G.getEndianness() != support::endian::system_endianness())
MachO::swapStruct(Hdr);
- auto HeaderContent = G.allocateString(
- StringRef(reinterpret_cast<const char *>(&Hdr), sizeof(Hdr)));
+ auto HeaderContent = G.allocateContent(
+ ArrayRef<char>(reinterpret_cast<const char *>(&Hdr), sizeof(Hdr)));
return G.createContentBlock(HeaderSection, HeaderContent, ExecutorAddr(), 8,
0);
@@ -246,24 +248,23 @@ private:
ExecutorAddr MachOHeaderAddr;
};
-StringRef DataCommonSectionName = "__DATA,__common";
-StringRef DataDataSectionName = "__DATA,__data";
-StringRef EHFrameSectionName = "__TEXT,__eh_frame";
-StringRef CompactUnwindInfoSectionName = "__TEXT,__unwind_info";
-StringRef ModInitFuncSectionName = "__DATA,__mod_init_func";
-StringRef ObjCClassListSectionName = "__DATA,__objc_classlist";
-StringRef ObjCImageInfoSectionName = "__DATA,__objc_image_info";
-StringRef ObjCSelRefsSectionName = "__DATA,__objc_selrefs";
-StringRef Swift5ProtoSectionName = "__TEXT,__swift5_proto";
-StringRef Swift5ProtosSectionName = "__TEXT,__swift5_protos";
-StringRef Swift5TypesSectionName = "__TEXT,__swift5_types";
-StringRef ThreadBSSSectionName = "__DATA,__thread_bss";
-StringRef ThreadDataSectionName = "__DATA,__thread_data";
-StringRef ThreadVarsSectionName = "__DATA,__thread_vars";
-
-StringRef InitSectionNames[] = {
- ModInitFuncSectionName, ObjCSelRefsSectionName, ObjCClassListSectionName,
- Swift5ProtosSectionName, Swift5ProtoSectionName, Swift5TypesSectionName};
+static StringRef ObjCRuntimeObjectSectionsData[] = {
+ MachOObjCCatListSectionName, MachOObjCClassListSectionName,
+ MachOObjCClassRefsSectionName, MachOObjCConstSectionName,
+ MachOObjCDataSectionName, MachOObjCSelRefsSectionName};
+
+static StringRef ObjCRuntimeObjectSectionsText[] = {
+ MachOObjCClassNameSectionName, MachOObjCMethNameSectionName,
+ MachOObjCMethTypeSectionName, MachOSwift5TypesSectionName,
+ MachOSwift5TypeRefSectionName, MachOSwift5FieldMetadataSectionName,
+ MachOSwift5EntrySectionName, MachOSwift5ProtoSectionName,
+ MachOSwift5ProtosSectionName};
+
+static StringRef ObjCRuntimeObjectSectionName =
+ "__llvm_jitlink_ObjCRuntimeRegistrationObject";
+
+static StringRef ObjCImageInfoSymbolName =
+ "__llvm_jitlink_macho_objc_imageinfo";
} // end anonymous namespace
@@ -272,17 +273,18 @@ namespace orc {
Expected<std::unique_ptr<MachOPlatform>>
MachOPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
- JITDylib &PlatformJD, const char *OrcRuntimePath,
+ JITDylib &PlatformJD,
+ std::unique_ptr<DefinitionGenerator> OrcRuntime,
std::optional<SymbolAliasMap> RuntimeAliases) {
- auto &EPC = ES.getExecutorProcessControl();
-
// If the target is not supported then bail out immediately.
- if (!supportedTarget(EPC.getTargetTriple()))
+ if (!supportedTarget(ES.getTargetTriple()))
return make_error<StringError>("Unsupported MachOPlatform triple: " +
- EPC.getTargetTriple().str(),
+ ES.getTargetTriple().str(),
inconvertibleErrorCode());
+ auto &EPC = ES.getExecutorProcessControl();
+
// Create default aliases if the caller didn't supply any.
if (!RuntimeAliases)
RuntimeAliases = standardPlatformAliases(ES);
@@ -292,31 +294,40 @@ MachOPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
return std::move(Err);
// Add JIT-dispatch function support symbols.
- if (auto Err = PlatformJD.define(absoluteSymbols(
- {{ES.intern("___orc_rt_jit_dispatch"),
- {EPC.getJITDispatchInfo().JITDispatchFunction.getValue(),
- JITSymbolFlags::Exported}},
- {ES.intern("___orc_rt_jit_dispatch_ctx"),
- {EPC.getJITDispatchInfo().JITDispatchContext.getValue(),
- JITSymbolFlags::Exported}}})))
+ if (auto Err = PlatformJD.define(
+ absoluteSymbols({{ES.intern("___orc_rt_jit_dispatch"),
+ {EPC.getJITDispatchInfo().JITDispatchFunction,
+ JITSymbolFlags::Exported}},
+ {ES.intern("___orc_rt_jit_dispatch_ctx"),
+ {EPC.getJITDispatchInfo().JITDispatchContext,
+ JITSymbolFlags::Exported}}})))
return std::move(Err);
- // Create a generator for the ORC runtime archive.
- auto OrcRuntimeArchiveGenerator = StaticLibraryDefinitionGenerator::Load(
- ObjLinkingLayer, OrcRuntimePath, EPC.getTargetTriple());
- if (!OrcRuntimeArchiveGenerator)
- return OrcRuntimeArchiveGenerator.takeError();
-
// Create the instance.
Error Err = Error::success();
- auto P = std::unique_ptr<MachOPlatform>(
- new MachOPlatform(ES, ObjLinkingLayer, PlatformJD,
- std::move(*OrcRuntimeArchiveGenerator), Err));
+ auto P = std::unique_ptr<MachOPlatform>(new MachOPlatform(
+ ES, ObjLinkingLayer, PlatformJD, std::move(OrcRuntime), Err));
if (Err)
return std::move(Err);
return std::move(P);
}
+Expected<std::unique_ptr<MachOPlatform>>
+MachOPlatform::Create(ExecutionSession &ES, ObjectLinkingLayer &ObjLinkingLayer,
+ JITDylib &PlatformJD, const char *OrcRuntimePath,
+ std::optional<SymbolAliasMap> RuntimeAliases) {
+
+ // Create a generator for the ORC runtime archive.
+ auto OrcRuntimeArchiveGenerator =
+ StaticLibraryDefinitionGenerator::Load(ObjLinkingLayer, OrcRuntimePath);
+ if (!OrcRuntimeArchiveGenerator)
+ return OrcRuntimeArchiveGenerator.takeError();
+
+ return Create(ES, ObjLinkingLayer, PlatformJD,
+ std::move(*OrcRuntimeArchiveGenerator),
+ std::move(RuntimeAliases));
+}
+
Error MachOPlatform::setupJITDylib(JITDylib &JD) {
if (auto Err = JD.define(std::make_unique<MachOHeaderMaterializationUnit>(
*this, MachOHeaderStartSymbol)))
@@ -398,15 +409,6 @@ MachOPlatform::standardRuntimeUtilityAliases() {
StandardRuntimeUtilityAliases);
}
-bool MachOPlatform::isInitializerSection(StringRef SegName,
- StringRef SectName) {
- for (auto &Name : InitSectionNames) {
- if (Name.startswith(SegName) && Name.substr(7) == SectName)
- return true;
- }
- return false;
-}
-
bool MachOPlatform::supportedTarget(const Triple &TT) {
switch (TT.getArch()) {
case Triple::aarch64:
@@ -654,10 +656,9 @@ void MachOPlatform::rt_pushInitializers(PushInitializersSendResultFn SendResult,
});
if (!JD) {
- SendResult(
- make_error<StringError>("No JITDylib with header addr " +
- formatv("{0:x}", JDHeaderAddr.getValue()),
- inconvertibleErrorCode()));
+ SendResult(make_error<StringError>("No JITDylib with header addr " +
+ formatv("{0:x}", JDHeaderAddr),
+ inconvertibleErrorCode()));
return;
}
@@ -667,8 +668,7 @@ void MachOPlatform::rt_pushInitializers(PushInitializersSendResultFn SendResult,
void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
ExecutorAddr Handle, StringRef SymbolName) {
LLVM_DEBUG({
- dbgs() << "MachOPlatform::rt_lookupSymbol(\""
- << formatv("{0:x}", Handle.getValue()) << "\")\n";
+ dbgs() << "MachOPlatform::rt_lookupSymbol(\"" << Handle << "\")\n";
});
JITDylib *JD = nullptr;
@@ -681,12 +681,9 @@ void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
}
if (!JD) {
- LLVM_DEBUG({
- dbgs() << " No JITDylib for handle "
- << formatv("{0:x}", Handle.getValue()) << "\n";
- });
+ LLVM_DEBUG(dbgs() << " No JITDylib for handle " << Handle << "\n");
SendResult(make_error<StringError>("No JITDylib associated with handle " +
- formatv("{0:x}", Handle.getValue()),
+ formatv("{0:x}", Handle),
inconvertibleErrorCode()));
return;
}
@@ -699,7 +696,7 @@ void MachOPlatform::rt_lookupSymbol(SendSymbolAddressFn SendResult,
void operator()(Expected<SymbolMap> Result) {
if (Result) {
assert(Result->size() == 1 && "Unexpected result map count");
- SendResult(ExecutorAddr(Result->begin()->second.getAddress()));
+ SendResult(Result->begin()->second.getAddress());
} else {
SendResult(Result.takeError());
}
@@ -766,10 +763,14 @@ void MachOPlatform::MachOPlatformPlugin::modifyPassConfig(
// then add passes to preserve, process and register the init
// sections/symbols.
Config.PrePrunePasses.push_back([this, &MR](LinkGraph &G) {
- if (auto Err = preserveInitSections(G, MR))
+ if (auto Err = preserveImportantSections(G, MR))
return Err;
return processObjCImageInfo(G, MR);
});
+ Config.PostPrunePasses.push_back(
+ [this](LinkGraph &G) { return createObjCRuntimeObject(G); });
+ Config.PostAllocationPasses.push_back(
+ [this, &MR](LinkGraph &G) { return populateObjCRuntimeObject(G, MR); });
}
// Insert TLV lowering at the start of the PostPrunePasses, since we want
@@ -829,7 +830,10 @@ Error MachOPlatform::MachOPlatformPlugin::
&MP.RegisterObjectPlatformSections.Addr},
{*MP.DeregisterObjectPlatformSections.Name,
&MP.DeregisterObjectPlatformSections.Addr},
- {*MP.CreatePThreadKey.Name, &MP.CreatePThreadKey.Addr}};
+ {*MP.CreatePThreadKey.Name, &MP.CreatePThreadKey.Addr},
+ {*MP.RegisterObjCRuntimeObject.Name, &MP.RegisterObjCRuntimeObject.Addr},
+ {*MP.DeregisterObjCRuntimeObject.Name,
+ &MP.DeregisterObjCRuntimeObject.Addr}};
bool RegisterMachOHeader = false;
@@ -898,11 +902,40 @@ Error MachOPlatform::MachOPlatformPlugin::associateJITDylibHeaderSymbol(
return Error::success();
}
-Error MachOPlatform::MachOPlatformPlugin::preserveInitSections(
+Error MachOPlatform::MachOPlatformPlugin::preserveImportantSections(
jitlink::LinkGraph &G, MaterializationResponsibility &MR) {
+ // __objc_imageinfo is "important": we want to preserve it and record its
+ // address in the first graph that it appears in, then verify and discard it
+ // in all subsequent graphs. In this pass we preserve unconditionally -- we'll
+ // manually throw it away in the processObjCImageInfo pass.
+ if (auto *ObjCImageInfoSec =
+ G.findSectionByName(MachOObjCImageInfoSectionName)) {
+ if (ObjCImageInfoSec->blocks_size() != 1)
+ return make_error<StringError>(
+ "In " + G.getName() +
+ "__DATA,__objc_imageinfo contains multiple blocks",
+ inconvertibleErrorCode());
+ G.addAnonymousSymbol(**ObjCImageInfoSec->blocks().begin(), 0, 0, false,
+ true);
+
+ for (auto *B : ObjCImageInfoSec->blocks())
+ if (!B->edges_empty())
+ return make_error<StringError>("In " + G.getName() + ", " +
+ MachOObjCImageInfoSectionName +
+ " contains references to symbols",
+ inconvertibleErrorCode());
+ }
+ // Init sections are important: We need to preserve them and so that their
+ // addresses can be captured and reported to the ORC runtime in
+ // registerObjectPlatformSections.
JITLinkSymbolSet InitSectionSymbols;
- for (auto &InitSectionName : InitSectionNames) {
+ for (auto &InitSectionName : MachOInitSectionNames) {
+ // Skip ObjCImageInfo -- this shouldn't have any dependencies, and we may
+ // remove it later.
+ if (InitSectionName == MachOObjCImageInfoSectionName)
+ continue;
+
// Skip non-init sections.
auto *InitSection = G.findSectionByName(InitSectionName);
if (!InitSection)
@@ -944,7 +977,7 @@ Error MachOPlatform::MachOPlatformPlugin::processObjCImageInfo(
// OR
// (2) We already have a recorded __objc_imageinfo for this JITDylib,
// in which case we just verify it.
- auto *ObjCImageInfo = G.findSectionByName(ObjCImageInfoSectionName);
+ auto *ObjCImageInfo = G.findSectionByName(MachOObjCImageInfoSectionName);
if (!ObjCImageInfo)
return Error::success();
@@ -952,14 +985,14 @@ Error MachOPlatform::MachOPlatformPlugin::processObjCImageInfo(
// Check that the section is not empty if present.
if (ObjCImageInfoBlocks.empty())
- return make_error<StringError>("Empty " + ObjCImageInfoSectionName +
+ return make_error<StringError>("Empty " + MachOObjCImageInfoSectionName +
" section in " + G.getName(),
inconvertibleErrorCode());
// Check that there's only one block in the section.
if (std::next(ObjCImageInfoBlocks.begin()) != ObjCImageInfoBlocks.end())
return make_error<StringError>("Multiple blocks in " +
- ObjCImageInfoSectionName +
+ MachOObjCImageInfoSectionName +
" section in " + G.getName(),
inconvertibleErrorCode());
@@ -971,7 +1004,7 @@ Error MachOPlatform::MachOPlatformPlugin::processObjCImageInfo(
for (auto &E : B->edges())
if (E.getTarget().isDefined() &&
&E.getTarget().getBlock().getSection() == ObjCImageInfo)
- return make_error<StringError>(ObjCImageInfoSectionName +
+ return make_error<StringError>(MachOObjCImageInfoSectionName +
" is referenced within file " +
G.getName(),
inconvertibleErrorCode());
@@ -990,12 +1023,12 @@ Error MachOPlatform::MachOPlatformPlugin::processObjCImageInfo(
if (ObjCImageInfoItr != ObjCImageInfos.end()) {
// We've already registered an __objc_imageinfo section. Verify the
// content of this new section matches, then delete it.
- if (ObjCImageInfoItr->second.first != Version)
+ if (ObjCImageInfoItr->second.Version != Version)
return make_error<StringError>(
"ObjC version in " + G.getName() +
" does not match first registered version",
inconvertibleErrorCode());
- if (ObjCImageInfoItr->second.second != Flags)
+ if (ObjCImageInfoItr->second.Flags != Flags)
return make_error<StringError>("ObjC flags in " + G.getName() +
" do not match first registered flags",
inconvertibleErrorCode());
@@ -1007,7 +1040,14 @@ Error MachOPlatform::MachOPlatformPlugin::processObjCImageInfo(
} else {
// We haven't registered an __objc_imageinfo section yet. Register and
// move on. The section should already be marked no-dead-strip.
- ObjCImageInfos[&MR.getTargetJITDylib()] = std::make_pair(Version, Flags);
+ G.addDefinedSymbol(ObjCImageInfoBlock, 0, ObjCImageInfoSymbolName,
+ ObjCImageInfoBlock.getSize(), jitlink::Linkage::Strong,
+ jitlink::Scope::Hidden, false, true);
+ if (auto Err = MR.defineMaterializing(
+ {{MR.getExecutionSession().intern(ObjCImageInfoSymbolName),
+ JITSymbolFlags()}}))
+ return Err;
+ ObjCImageInfos[&MR.getTargetJITDylib()] = {Version, Flags};
}
return Error::success();
@@ -1024,7 +1064,7 @@ Error MachOPlatform::MachOPlatformPlugin::fixTLVSectionsAndEdges(
}
// Store key in __thread_vars struct fields.
- if (auto *ThreadDataSec = G.findSectionByName(ThreadVarsSectionName)) {
+ if (auto *ThreadDataSec = G.findSectionByName(MachOThreadVarsSectionName)) {
std::optional<uint64_t> Key;
{
std::lock_guard<std::mutex> Lock(MP.PlatformMutex);
@@ -1098,10 +1138,11 @@ MachOPlatform::MachOPlatformPlugin::findUnwindSectionInfo(
}
};
- if (Section *EHFrameSec = G.findSectionByName(EHFrameSectionName))
+ if (Section *EHFrameSec = G.findSectionByName(MachOEHFrameSectionName))
ScanUnwindInfoSection(*EHFrameSec, US.DwarfSection);
- if (Section *CUInfoSec = G.findSectionByName(CompactUnwindInfoSectionName))
+ if (Section *CUInfoSec =
+ G.findSectionByName(MachOCompactUnwindInfoSectionName))
ScanUnwindInfoSection(*CUInfoSec, US.CompactUnwindSection);
// If we didn't find any pointed-to code-blocks then there's no need to
@@ -1150,10 +1191,10 @@ Error MachOPlatform::MachOPlatformPlugin::registerObjectPlatformSections(
// Get a pointer to the thread data section if there is one. It will be used
// below.
jitlink::Section *ThreadDataSection =
- G.findSectionByName(ThreadDataSectionName);
+ G.findSectionByName(MachOThreadDataSectionName);
// Handle thread BSS section if there is one.
- if (auto *ThreadBSSSection = G.findSectionByName(ThreadBSSSectionName)) {
+ if (auto *ThreadBSSSection = G.findSectionByName(MachOThreadBSSSectionName)) {
// If there's already a thread data section in this graph then merge the
// thread BSS section content into it, otherwise just treat the thread
// BSS section as the thread data section.
@@ -1166,8 +1207,9 @@ Error MachOPlatform::MachOPlatformPlugin::registerObjectPlatformSections(
SmallVector<std::pair<StringRef, ExecutorAddrRange>, 8> MachOPlatformSecs;
// Collect data sections to register.
- StringRef DataSections[] = {DataDataSectionName, DataCommonSectionName,
- EHFrameSectionName};
+ StringRef DataSections[] = {MachODataDataSectionName,
+ MachODataCommonSectionName,
+ MachOEHFrameSectionName};
for (auto &SecName : DataSections) {
if (auto *Sec = G.findSectionByName(SecName)) {
jitlink::SectionRange R(*Sec);
@@ -1181,17 +1223,13 @@ Error MachOPlatform::MachOPlatformPlugin::registerObjectPlatformSections(
if (ThreadDataSection) {
jitlink::SectionRange R(*ThreadDataSection);
if (!R.empty())
- MachOPlatformSecs.push_back({ThreadDataSectionName, R.getRange()});
+ MachOPlatformSecs.push_back({MachOThreadDataSectionName, R.getRange()});
}
// If any platform sections were found then add an allocation action to call
// the registration function.
- StringRef PlatformSections[] = {
- ModInitFuncSectionName, ObjCClassListSectionName,
- ObjCImageInfoSectionName, ObjCSelRefsSectionName,
- Swift5ProtoSectionName, Swift5ProtosSectionName,
- Swift5TypesSectionName,
- };
+ StringRef PlatformSections[] = {MachOModInitFuncSectionName,
+ ObjCRuntimeObjectSectionName};
for (auto &SecName : PlatformSections) {
auto *Sec = G.findSectionByName(SecName);
@@ -1252,5 +1290,207 @@ Error MachOPlatform::MachOPlatformPlugin::registerObjectPlatformSections(
return Error::success();
}
+Error MachOPlatform::MachOPlatformPlugin::createObjCRuntimeObject(
+ jitlink::LinkGraph &G) {
+
+ bool NeedTextSegment = false;
+ size_t NumRuntimeSections = 0;
+
+ for (auto ObjCRuntimeSectionName : ObjCRuntimeObjectSectionsData)
+ if (G.findSectionByName(ObjCRuntimeSectionName))
+ ++NumRuntimeSections;
+
+ for (auto ObjCRuntimeSectionName : ObjCRuntimeObjectSectionsText) {
+ if (G.findSectionByName(ObjCRuntimeSectionName)) {
+ ++NumRuntimeSections;
+ NeedTextSegment = true;
+ }
+ }
+
+ // Early out for no runtime sections.
+ if (NumRuntimeSections == 0)
+ return Error::success();
+
+ // If there were any runtime sections then we need to add an __objc_imageinfo
+ // section.
+ ++NumRuntimeSections;
+
+ size_t MachOSize = sizeof(MachO::mach_header_64) +
+ (NeedTextSegment + 1) * sizeof(MachO::segment_command_64) +
+ NumRuntimeSections * sizeof(MachO::section_64);
+
+ auto &Sec = G.createSection(ObjCRuntimeObjectSectionName,
+ MemProt::Read | MemProt::Write);
+ G.createMutableContentBlock(Sec, MachOSize, ExecutorAddr(), 16, 0, true);
+
+ return Error::success();
+}
+
+Error MachOPlatform::MachOPlatformPlugin::populateObjCRuntimeObject(
+ jitlink::LinkGraph &G, MaterializationResponsibility &MR) {
+
+ auto *ObjCRuntimeObjectSec =
+ G.findSectionByName(ObjCRuntimeObjectSectionName);
+
+ if (!ObjCRuntimeObjectSec)
+ return Error::success();
+
+ switch (G.getTargetTriple().getArch()) {
+ case Triple::aarch64:
+ case Triple::x86_64:
+ // Supported.
+ break;
+ default:
+ return make_error<StringError>("Unrecognized MachO arch in triple " +
+ G.getTargetTriple().str(),
+ inconvertibleErrorCode());
+ }
+
+ auto &SecBlock = **ObjCRuntimeObjectSec->blocks().begin();
+
+ struct SecDesc {
+ MachO::section_64 Sec;
+ unique_function<void(size_t RecordOffset)> AddFixups;
+ };
+
+ std::vector<SecDesc> TextSections, DataSections;
+ auto AddSection = [&](SecDesc &SD, jitlink::Section &GraphSec) {
+ jitlink::SectionRange SR(GraphSec);
+ StringRef FQName = GraphSec.getName();
+ memset(&SD.Sec, 0, sizeof(MachO::section_64));
+ memcpy(SD.Sec.sectname, FQName.drop_front(7).data(), FQName.size() - 7);
+ memcpy(SD.Sec.segname, FQName.data(), 6);
+ SD.Sec.addr = SR.getStart() - SecBlock.getAddress();
+ SD.Sec.size = SR.getSize();
+ SD.Sec.flags = MachO::S_REGULAR;
+ };
+
+ // Add the __objc_imageinfo section.
+ {
+ DataSections.push_back({});
+ auto &SD = DataSections.back();
+ memset(&SD.Sec, 0, sizeof(SD.Sec));
+ memcpy(SD.Sec.sectname, "__objc_imageinfo", 16);
+ strcpy(SD.Sec.segname, "__DATA");
+ SD.Sec.size = 8;
+ SD.AddFixups = [&](size_t RecordOffset) {
+ jitlink::Edge::Kind PointerEdge = jitlink::Edge::Invalid;
+ switch (G.getTargetTriple().getArch()) {
+ case Triple::aarch64:
+ PointerEdge = jitlink::aarch64::Pointer64;
+ break;
+ case Triple::x86_64:
+ PointerEdge = jitlink::x86_64::Pointer64;
+ break;
+ default:
+ llvm_unreachable("Unsupported architecture");
+ }
+
+ // Look for an existing __objc_imageinfo symbol.
+ jitlink::Symbol *ObjCImageInfoSym = nullptr;
+ for (auto *Sym : G.external_symbols())
+ if (Sym->getName() == ObjCImageInfoSymbolName) {
+ ObjCImageInfoSym = Sym;
+ break;
+ }
+ if (!ObjCImageInfoSym)
+ for (auto *Sym : G.absolute_symbols())
+ if (Sym->getName() == ObjCImageInfoSymbolName) {
+ ObjCImageInfoSym = Sym;
+ break;
+ }
+ if (!ObjCImageInfoSym)
+ for (auto *Sym : G.defined_symbols())
+ if (Sym->hasName() && Sym->getName() == ObjCImageInfoSymbolName) {
+ ObjCImageInfoSym = Sym;
+ break;
+ }
+ if (!ObjCImageInfoSym)
+ ObjCImageInfoSym =
+ &G.addExternalSymbol(ObjCImageInfoSymbolName, 8, false);
+
+ SecBlock.addEdge(PointerEdge,
+ RecordOffset + ((char *)&SD.Sec.addr - (char *)&SD.Sec),
+ *ObjCImageInfoSym, -SecBlock.getAddress().getValue());
+ };
+ }
+
+ for (auto ObjCRuntimeSectionName : ObjCRuntimeObjectSectionsData) {
+ if (auto *GraphSec = G.findSectionByName(ObjCRuntimeSectionName)) {
+ DataSections.push_back({});
+ AddSection(DataSections.back(), *GraphSec);
+ }
+ }
+
+ for (auto ObjCRuntimeSectionName : ObjCRuntimeObjectSectionsText) {
+ if (auto *GraphSec = G.findSectionByName(ObjCRuntimeSectionName)) {
+ TextSections.push_back({});
+ AddSection(TextSections.back(), *GraphSec);
+ }
+ }
+
+ assert(ObjCRuntimeObjectSec->blocks_size() == 1 &&
+ "Unexpected number of blocks in runtime sections object");
+
+ // Build the header struct up-front. This also gives us a chance to check
+ // that the triple is supported, which we'll assume below.
+ MachO::mach_header_64 Hdr;
+ Hdr.magic = MachO::MH_MAGIC_64;
+ switch (G.getTargetTriple().getArch()) {
+ case Triple::aarch64:
+ Hdr.cputype = MachO::CPU_TYPE_ARM64;
+ Hdr.cpusubtype = MachO::CPU_SUBTYPE_ARM64_ALL;
+ break;
+ case Triple::x86_64:
+ Hdr.cputype = MachO::CPU_TYPE_X86_64;
+ Hdr.cpusubtype = MachO::CPU_SUBTYPE_X86_64_ALL;
+ break;
+ default:
+ llvm_unreachable("Unsupported architecture");
+ }
+
+ Hdr.filetype = MachO::MH_DYLIB;
+ Hdr.ncmds = 1 + !TextSections.empty();
+ Hdr.sizeofcmds =
+ Hdr.ncmds * sizeof(MachO::segment_command_64) +
+ (TextSections.size() + DataSections.size()) * sizeof(MachO::section_64);
+ Hdr.flags = 0;
+ Hdr.reserved = 0;
+
+ auto SecContent = SecBlock.getAlreadyMutableContent();
+ char *P = SecContent.data();
+ auto WriteMachOStruct = [&](auto S) {
+ if (G.getEndianness() != support::endian::system_endianness())
+ MachO::swapStruct(S);
+ memcpy(P, &S, sizeof(S));
+ P += sizeof(S);
+ };
+
+ auto WriteSegment = [&](StringRef Name, std::vector<SecDesc> &Secs) {
+ MachO::segment_command_64 SegLC;
+ memset(&SegLC, 0, sizeof(SegLC));
+ memcpy(SegLC.segname, Name.data(), Name.size());
+ SegLC.cmd = MachO::LC_SEGMENT_64;
+ SegLC.cmdsize = sizeof(MachO::segment_command_64) +
+ Secs.size() * sizeof(MachO::section_64);
+ SegLC.nsects = Secs.size();
+ WriteMachOStruct(SegLC);
+ for (auto &SD : Secs) {
+ if (SD.AddFixups)
+ SD.AddFixups(P - SecContent.data());
+ WriteMachOStruct(SD.Sec);
+ }
+ };
+
+ WriteMachOStruct(Hdr);
+ if (!TextSections.empty())
+ WriteSegment("__TEXT", TextSections);
+ if (!DataSections.empty())
+ WriteSegment("__DATA", DataSections);
+
+ assert(P == SecContent.end() && "Underflow writing ObjC runtime object");
+ return Error::success();
+}
+
} // End namespace orc.
} // End namespace llvm.
diff --git a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
index b457c7297bed..ca4950077ffe 100644
--- a/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MemoryMapper.cpp
@@ -322,7 +322,8 @@ void SharedMemoryMapper::initialize(MemoryMapper::AllocInfo &AI,
std::memset(Base + Segment.ContentSize, 0, Segment.ZeroFillSize);
tpctypes::SharedMemorySegFinalizeRequest SegReq;
- SegReq.AG = Segment.AG;
+ SegReq.RAG = {Segment.AG.getMemProt(), Segment.AG.getMemLifetimePolicy() ==
+ MemLifetimePolicy::Finalize};
SegReq.Addr = AI.MappingBase + Segment.Offset;
SegReq.Size = Segment.ContentSize + Segment.ZeroFillSize;
diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
index 0c3beba43a35..7c8fa63477d0 100644
--- a/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ObjectFileInterface.cpp
@@ -10,6 +10,7 @@
#include "llvm/ExecutionEngine/Orc/COFFPlatform.h"
#include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h"
#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/MachO.h"
@@ -85,7 +86,7 @@ getMachOObjectFileSymbolInfo(ExecutionSession &ES,
}
auto SegName = Obj.getSectionFinalSegmentName(Sec.getRawDataRefImpl());
auto SecName = cantFail(Obj.getSectionName(Sec.getRawDataRefImpl()));
- if (MachOPlatform::isInitializerSection(SegName, SecName)) {
+ if (isMachOInitializerSection(SegName, SecName)) {
addInitSymbol(I, ES, Obj.getFileName());
break;
}
@@ -138,7 +139,7 @@ getELFObjectFileSymbolInfo(ExecutionSession &ES,
SymbolStringPtr InitSymbol;
for (auto &Sec : Obj.sections()) {
if (auto SecName = Sec.getName()) {
- if (ELFNixPlatform::isInitializerSection(*SecName)) {
+ if (isELFInitializerSection(*SecName)) {
addInitSymbol(I, ES, Obj.getFileName());
break;
}
@@ -219,7 +220,7 @@ getCOFFObjectFileSymbolInfo(ExecutionSession &ES,
SymbolStringPtr InitSymbol;
for (auto &Sec : Obj.sections()) {
if (auto SecName = Sec.getName()) {
- if (COFFPlatform::isInitializerSection(*SecName)) {
+ if (isCOFFInitializerSection(*SecName)) {
addInitSymbol(I, ES, Obj.getFileName());
break;
}
@@ -287,22 +288,5 @@ getObjectFileInterface(ExecutionSession &ES, MemoryBufferRef ObjBuffer) {
return getGenericObjectFileSymbolInfo(ES, **Obj);
}
-bool hasInitializerSection(jitlink::LinkGraph &G) {
- bool IsMachO = G.getTargetTriple().isOSBinFormatMachO();
- bool IsElf = G.getTargetTriple().isOSBinFormatELF();
- if (!IsMachO && !IsElf)
- return false;
-
- for (auto &Sec : G.sections()) {
- if (IsMachO && std::apply(MachOPlatform::isInitializerSection,
- Sec.getName().split(",")))
- return true;
- if (IsElf && ELFNixPlatform::isInitializerSection(Sec.getName()))
- return true;
- }
-
- return false;
-}
-
} // End namespace orc.
} // End namespace llvm.
diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
index 2b11c472e812..a29f3d1c3aec 100644
--- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp
@@ -8,8 +8,10 @@
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h"
+#include "llvm/ExecutionEngine/JITLink/aarch32.h"
#include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h"
#include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h"
+#include "llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h"
#include "llvm/Support/MemoryBuffer.h"
#include <string>
#include <vector>
@@ -22,6 +24,55 @@ using namespace llvm::orc;
namespace {
+bool hasInitializerSection(jitlink::LinkGraph &G) {
+ bool IsMachO = G.getTargetTriple().isOSBinFormatMachO();
+ bool IsElf = G.getTargetTriple().isOSBinFormatELF();
+ if (!IsMachO && !IsElf)
+ return false;
+
+ for (auto &Sec : G.sections()) {
+ if (IsMachO && isMachOInitializerSection(Sec.getName()))
+ return true;
+ if (IsElf && isELFInitializerSection(Sec.getName()))
+ return true;
+ }
+
+ return false;
+}
+
+ExecutorAddr getJITSymbolPtrForSymbol(Symbol &Sym, const Triple &TT) {
+ switch (TT.getArch()) {
+ case Triple::arm:
+ case Triple::armeb:
+ case Triple::thumb:
+ case Triple::thumbeb:
+ if (Sym.hasTargetFlags(aarch32::ThumbSymbol)) {
+ // Set LSB to indicate thumb target
+ assert(Sym.isCallable() && "Only callable symbols can have thumb flag");
+ assert((Sym.getAddress().getValue() & 0x01) == 0 && "LSB is clear");
+ return Sym.getAddress() + 0x01;
+ }
+ return Sym.getAddress();
+ default:
+ return Sym.getAddress();
+ }
+}
+
+JITSymbolFlags getJITSymbolFlagsForSymbol(Symbol &Sym) {
+ JITSymbolFlags Flags;
+
+ if (Sym.getLinkage() == Linkage::Weak)
+ Flags |= JITSymbolFlags::Weak;
+
+ if (Sym.getScope() == Scope::Default)
+ Flags |= JITSymbolFlags::Exported;
+
+ if (Sym.isCallable())
+ Flags |= JITSymbolFlags::Callable;
+
+ return Flags;
+}
+
class LinkGraphMaterializationUnit : public MaterializationUnit {
public:
static std::unique_ptr<LinkGraphMaterializationUnit>
@@ -48,14 +99,8 @@ private:
continue;
assert(Sym->hasName() && "Anonymous non-local symbol?");
- JITSymbolFlags Flags;
- if (Sym->getScope() == Scope::Default)
- Flags |= JITSymbolFlags::Exported;
-
- if (Sym->isCallable())
- Flags |= JITSymbolFlags::Callable;
-
- LGI.SymbolFlags[ES.intern(Sym->getName())] = Flags;
+ LGI.SymbolFlags[ES.intern(Sym->getName())] =
+ getJITSymbolFlagsForSymbol(*Sym);
}
if (hasInitializerSection(G))
@@ -189,17 +234,9 @@ public:
for (auto *Sym : G.defined_symbols())
if (Sym->hasName() && Sym->getScope() != Scope::Local) {
auto InternedName = ES.intern(Sym->getName());
- JITSymbolFlags Flags;
-
- if (Sym->isCallable())
- Flags |= JITSymbolFlags::Callable;
- if (Sym->getScope() == Scope::Default)
- Flags |= JITSymbolFlags::Exported;
- if (Sym->getLinkage() == Linkage::Weak)
- Flags |= JITSymbolFlags::Weak;
-
- InternedResult[InternedName] =
- JITEvaluatedSymbol(Sym->getAddress().getValue(), Flags);
+ auto Ptr = getJITSymbolPtrForSymbol(*Sym, G.getTargetTriple());
+ auto Flags = getJITSymbolFlagsForSymbol(*Sym);
+ InternedResult[InternedName] = {Ptr, Flags};
if (AutoClaim && !MR->getSymbols().count(InternedName)) {
assert(!ExtraSymbolsToClaim.count(InternedName) &&
"Duplicate symbol to claim?");
@@ -210,15 +247,9 @@ public:
for (auto *Sym : G.absolute_symbols())
if (Sym->hasName() && Sym->getScope() != Scope::Local) {
auto InternedName = ES.intern(Sym->getName());
- JITSymbolFlags Flags;
- if (Sym->isCallable())
- Flags |= JITSymbolFlags::Callable;
- if (Sym->getScope() == Scope::Default)
- Flags |= JITSymbolFlags::Exported;
- if (Sym->getLinkage() == Linkage::Weak)
- Flags |= JITSymbolFlags::Weak;
- InternedResult[InternedName] =
- JITEvaluatedSymbol(Sym->getAddress().getValue(), Flags);
+ auto Ptr = getJITSymbolPtrForSymbol(*Sym, G.getTargetTriple());
+ auto Flags = getJITSymbolFlagsForSymbol(*Sym);
+ InternedResult[InternedName] = {Ptr, Flags};
if (AutoClaim && !MR->getSymbols().count(InternedName)) {
assert(!ExtraSymbolsToClaim.count(InternedName) &&
"Duplicate symbol to claim?");
@@ -407,10 +438,8 @@ private:
Sym->getScope() != Scope::Local) {
auto Name = ES.intern(Sym->getName());
if (!MR->getSymbols().count(ES.intern(Sym->getName()))) {
- JITSymbolFlags SF = JITSymbolFlags::Weak;
- if (Sym->getScope() == Scope::Default)
- SF |= JITSymbolFlags::Exported;
- NewSymbolsToClaim[Name] = SF;
+ NewSymbolsToClaim[Name] =
+ getJITSymbolFlagsForSymbol(*Sym) | JITSymbolFlags::Weak;
NameToSym.push_back(std::make_pair(std::move(Name), Sym));
}
}
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
index 48dd0df80415..6d568199378a 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
@@ -14,17 +14,17 @@
#define DEBUG_TYPE "orc"
using namespace llvm;
+using namespace llvm::orc;
template <typename ORCABI>
-bool stubAndPointerRangesOk(JITTargetAddress StubBlockAddr,
- JITTargetAddress PointerBlockAddr,
- unsigned NumStubs) {
+static bool stubAndPointerRangesOk(ExecutorAddr StubBlockAddr,
+ ExecutorAddr PointerBlockAddr,
+ unsigned NumStubs) {
constexpr unsigned MaxDisp = ORCABI::StubToPointerMaxDisplacement;
- JITTargetAddress FirstStub = StubBlockAddr;
- JITTargetAddress LastStub = FirstStub + ((NumStubs - 1) * ORCABI::StubSize);
- JITTargetAddress FirstPointer = PointerBlockAddr;
- JITTargetAddress LastPointer =
- FirstPointer + ((NumStubs - 1) * ORCABI::StubSize);
+ ExecutorAddr FirstStub = StubBlockAddr;
+ ExecutorAddr LastStub = FirstStub + ((NumStubs - 1) * ORCABI::StubSize);
+ ExecutorAddr FirstPointer = PointerBlockAddr;
+ ExecutorAddr LastPointer = FirstPointer + ((NumStubs - 1) * ORCABI::StubSize);
if (FirstStub < FirstPointer) {
if (LastStub >= FirstPointer)
@@ -44,9 +44,9 @@ namespace llvm {
namespace orc {
void OrcAArch64::writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr) {
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr) {
const uint32_t ResolverCode[] = {
// resolver_entry:
@@ -135,8 +135,8 @@ void OrcAArch64::writeResolverCode(char *ResolverWorkingMem,
}
void OrcAArch64::writeTrampolines(char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverAddr,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverAddr,
unsigned NumTrampolines) {
unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
@@ -159,17 +159,17 @@ void OrcAArch64::writeTrampolines(char *TrampolineBlockWorkingMem,
}
void OrcAArch64::writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
+ char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
// Stub format is:
//
// .section __orc_stubs
// stub1:
- // ldr x0, ptr1 ; PC-rel load of ptr1
- // br x0 ; Jump to resolver
+ // ldr x16, ptr1 ; PC-rel load of ptr1
+ // br x16 ; Jump to resolver
// stub2:
- // ldr x0, ptr2 ; PC-rel load of ptr2
- // br x0 ; Jump to resolver
+ // ldr x16, ptr2 ; PC-rel load of ptr2
+ // br x16 ; Jump to resolver
//
// ...
//
@@ -188,17 +188,19 @@ void OrcAArch64::writeIndirectStubsBlock(
"PointersBlock is out of range");
uint64_t PtrDisplacement =
PointersBlockTargetAddress - StubsBlockTargetAddress;
+ assert((PtrDisplacement % 8 == 0) &&
+ "Displacement to pointer is not a multiple of 8");
uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
- uint64_t PtrOffsetField = PtrDisplacement << 3;
+ uint64_t PtrOffsetField = ((PtrDisplacement >> 2) & 0x7ffff) << 5;
for (unsigned I = 0; I < NumStubs; ++I)
Stub[I] = 0xd61f020058000010 | PtrOffsetField;
}
-void OrcX86_64_Base::writeTrampolines(
- char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverAddr, unsigned NumTrampolines) {
+void OrcX86_64_Base::writeTrampolines(char *TrampolineBlockWorkingMem,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverAddr,
+ unsigned NumTrampolines) {
unsigned OffsetToPtr = NumTrampolines * TrampolineSize;
@@ -214,8 +216,8 @@ void OrcX86_64_Base::writeTrampolines(
}
void OrcX86_64_Base::writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
+ char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
// Stub format is:
//
// .section __orc_stubs
@@ -250,9 +252,9 @@ void OrcX86_64_Base::writeIndirectStubsBlock(
}
void OrcX86_64_SysV::writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr) {
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr) {
LLVM_DEBUG({
dbgs() << "Writing resolver code to "
@@ -324,9 +326,9 @@ void OrcX86_64_SysV::writeResolverCode(char *ResolverWorkingMem,
}
void OrcX86_64_Win32::writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr) {
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr) {
// resolverCode is similar to OrcX86_64 with differences specific to windows
// x64 calling convention: arguments go into rcx, rdx and come in reverse
@@ -402,12 +404,13 @@ void OrcX86_64_Win32::writeResolverCode(char *ResolverWorkingMem,
}
void OrcI386::writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr) {
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr) {
- assert((ReentryFnAddr >> 32) == 0 && "ReentryFnAddr out of range");
- assert((ReentryCtxAddr >> 32) == 0 && "ReentryCtxAddr out of range");
+ assert((ReentryFnAddr.getValue() >> 32) == 0 && "ReentryFnAddr out of range");
+ assert((ReentryCtxAddr.getValue() >> 32) == 0 &&
+ "ReentryCtxAddr out of range");
const uint8_t ResolverCode[] = {
// resolver_entry:
@@ -455,10 +458,10 @@ void OrcI386::writeResolverCode(char *ResolverWorkingMem,
}
void OrcI386::writeTrampolines(char *TrampolineWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverAddr,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverAddr,
unsigned NumTrampolines) {
- assert((ResolverAddr >> 32) == 0 && "ResolverAddr out of range");
+ assert((ResolverAddr.getValue() >> 32) == 0 && "ResolverAddr out of range");
uint64_t CallRelImm = 0xF1C4C400000000e8;
uint64_t ResolverRel = ResolverAddr - TrampolineBlockTargetAddress - 5;
@@ -468,12 +471,13 @@ void OrcI386::writeTrampolines(char *TrampolineWorkingMem,
Trampolines[I] = CallRelImm | (ResolverRel << 8);
}
-void OrcI386::writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
- assert((StubsBlockTargetAddress >> 32) == 0 &&
+void OrcI386::writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+ ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress,
+ unsigned NumStubs) {
+ assert((StubsBlockTargetAddress.getValue() >> 32) == 0 &&
"StubsBlockTargetAddress is out of range");
- assert((PointersBlockTargetAddress >> 32) == 0 &&
+ assert((PointersBlockTargetAddress.getValue() >> 32) == 0 &&
"PointersBlockTargetAddress is out of range");
// Stub format is:
@@ -501,15 +505,15 @@ void OrcI386::writeIndirectStubsBlock(
"PointersBlock is out of range");
uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
- uint64_t PtrAddr = PointersBlockTargetAddress;
+ uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 4)
Stub[I] = 0xF1C40000000025ff | (PtrAddr << 16);
}
void OrcMips32_Base::writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr,
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr,
bool isBigEndian) {
const uint32_t ResolverCode[] = {
@@ -596,32 +600,32 @@ void OrcMips32_Base::writeResolverCode(char *ResolverWorkingMem,
memcpy(ResolverWorkingMem + Offsett, &MoveVxT9, sizeof(MoveVxT9));
uint32_t ReentryCtxLUi =
- 0x3c040000 | (((ReentryCtxAddr + 0x8000) >> 16) & 0xFFFF);
- uint32_t ReentryCtxADDiu = 0x24840000 | ((ReentryCtxAddr)&0xFFFF);
+ 0x3c040000 | (((ReentryCtxAddr.getValue() + 0x8000) >> 16) & 0xFFFF);
+ uint32_t ReentryCtxADDiu = 0x24840000 | (ReentryCtxAddr.getValue() & 0xFFFF);
memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi,
sizeof(ReentryCtxLUi));
memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 4, &ReentryCtxADDiu,
sizeof(ReentryCtxADDiu));
uint32_t ReentryFnLUi =
- 0x3c190000 | (((ReentryFnAddr + 0x8000) >> 16) & 0xFFFF);
- uint32_t ReentryFnADDiu = 0x27390000 | ((ReentryFnAddr)&0xFFFF);
+ 0x3c190000 | (((ReentryFnAddr.getValue() + 0x8000) >> 16) & 0xFFFF);
+ uint32_t ReentryFnADDiu = 0x27390000 | (ReentryFnAddr.getValue() & 0xFFFF);
memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi,
sizeof(ReentryFnLUi));
memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 4, &ReentryFnADDiu,
sizeof(ReentryFnADDiu));
}
-void OrcMips32_Base::writeTrampolines(
- char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverAddr, unsigned NumTrampolines) {
+void OrcMips32_Base::writeTrampolines(char *TrampolineBlockWorkingMem,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverAddr,
+ unsigned NumTrampolines) {
- assert((ResolverAddr >> 32) == 0 && "ResolverAddr out of range");
+ assert((ResolverAddr.getValue() >> 32) == 0 && "ResolverAddr out of range");
uint32_t *Trampolines =
reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
- uint32_t RHiAddr = ((ResolverAddr + 0x8000) >> 16);
+ uint32_t RHiAddr = ((ResolverAddr.getValue() + 0x8000) >> 16);
for (unsigned I = 0; I < NumTrampolines; ++I) {
// move $t8,$ra
@@ -631,16 +635,16 @@ void OrcMips32_Base::writeTrampolines(
// nop
Trampolines[5 * I + 0] = 0x03e0c025;
Trampolines[5 * I + 1] = 0x3c190000 | (RHiAddr & 0xFFFF);
- Trampolines[5 * I + 2] = 0x27390000 | (ResolverAddr & 0xFFFF);
+ Trampolines[5 * I + 2] = 0x27390000 | (ResolverAddr.getValue() & 0xFFFF);
Trampolines[5 * I + 3] = 0x0320f809;
Trampolines[5 * I + 4] = 0x00000000;
}
}
void OrcMips32_Base::writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
- assert((StubsBlockTargetAddress >> 32) == 0 &&
+ char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
+ assert((StubsBlockTargetAddress.getValue() >> 32) == 0 &&
"InitialPtrVal is out of range");
// Stub format is:
@@ -671,7 +675,7 @@ void OrcMips32_Base::writeIndirectStubsBlock(
// Populate the stubs page stubs and mark it executable.
uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
- uint64_t PtrAddr = PointersBlockTargetAddress;
+ uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
for (unsigned I = 0; I < NumStubs; ++I) {
uint32_t HiAddr = ((PtrAddr + 0x8000) >> 16);
@@ -684,9 +688,9 @@ void OrcMips32_Base::writeIndirectStubsBlock(
}
void OrcMips64::writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr) {
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr) {
const uint32_t ResolverCode[] = {
//resolver_entry:
@@ -775,14 +779,16 @@ void OrcMips64::writeResolverCode(char *ResolverWorkingMem,
memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
uint32_t ReentryCtxLUi =
- 0x3c040000 | (((ReentryCtxAddr + 0x800080008000) >> 48) & 0xFFFF);
+ 0x3c040000 |
+ (((ReentryCtxAddr.getValue() + 0x800080008000) >> 48) & 0xFFFF);
uint32_t ReentryCtxDADDiu =
- 0x64840000 | (((ReentryCtxAddr + 0x80008000) >> 32) & 0xFFFF);
+ 0x64840000 | (((ReentryCtxAddr.getValue() + 0x80008000) >> 32) & 0xFFFF);
uint32_t ReentryCtxDSLL = 0x00042438;
uint32_t ReentryCtxDADDiu2 =
- 0x64840000 | ((((ReentryCtxAddr + 0x8000) >> 16) & 0xFFFF));
+ 0x64840000 | ((((ReentryCtxAddr.getValue() + 0x8000) >> 16) & 0xFFFF));
uint32_t ReentryCtxDSLL2 = 0x00042438;
- uint32_t ReentryCtxDADDiu3 = 0x64840000 | ((ReentryCtxAddr)&0xFFFF);
+ uint32_t ReentryCtxDADDiu3 =
+ 0x64840000 | (ReentryCtxAddr.getValue() & 0xFFFF);
memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi,
sizeof(ReentryCtxLUi));
@@ -798,19 +804,20 @@ void OrcMips64::writeResolverCode(char *ResolverWorkingMem,
sizeof(ReentryCtxDADDiu3));
uint32_t ReentryFnLUi =
- 0x3c190000 | (((ReentryFnAddr + 0x800080008000) >> 48) & 0xFFFF);
+ 0x3c190000 |
+ (((ReentryFnAddr.getValue() + 0x800080008000) >> 48) & 0xFFFF);
uint32_t ReentryFnDADDiu =
- 0x67390000 | (((ReentryFnAddr + 0x80008000) >> 32) & 0xFFFF);
+ 0x67390000 | (((ReentryFnAddr.getValue() + 0x80008000) >> 32) & 0xFFFF);
uint32_t ReentryFnDSLL = 0x0019cc38;
uint32_t ReentryFnDADDiu2 =
- 0x67390000 | (((ReentryFnAddr + 0x8000) >> 16) & 0xFFFF);
+ 0x67390000 | (((ReentryFnAddr.getValue() + 0x8000) >> 16) & 0xFFFF);
uint32_t ReentryFnDSLL2 = 0x0019cc38;
- uint32_t ReentryFnDADDiu3 = 0x67390000 | ((ReentryFnAddr)&0xFFFF);
+ uint32_t ReentryFnDADDiu3 = 0x67390000 | (ReentryFnAddr.getValue() & 0xFFFF);
memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi,
sizeof(ReentryFnLUi));
@@ -827,16 +834,16 @@ void OrcMips64::writeResolverCode(char *ResolverWorkingMem,
}
void OrcMips64::writeTrampolines(char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverAddr,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverAddr,
unsigned NumTrampolines) {
uint32_t *Trampolines =
reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
- uint64_t HeighestAddr = ((ResolverAddr + 0x800080008000) >> 48);
- uint64_t HeigherAddr = ((ResolverAddr + 0x80008000) >> 32);
- uint64_t HiAddr = ((ResolverAddr + 0x8000) >> 16);
+ uint64_t HeighestAddr = ((ResolverAddr.getValue() + 0x800080008000) >> 48);
+ uint64_t HeigherAddr = ((ResolverAddr.getValue() + 0x80008000) >> 32);
+ uint64_t HiAddr = ((ResolverAddr.getValue() + 0x8000) >> 16);
for (unsigned I = 0; I < NumTrampolines; ++I) {
Trampolines[10 * I + 0] = 0x03e0c025; // move $t8,$ra
@@ -845,17 +852,18 @@ void OrcMips64::writeTrampolines(char *TrampolineBlockWorkingMem,
Trampolines[10 * I + 3] = 0x0019cc38; // dsll $t9,$t9,16
Trampolines[10 * I + 4] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr)
Trampolines[10 * I + 5] = 0x0019cc38; // dsll $t9,$t9,16
- Trampolines[10 * I + 6] =
- 0x67390000 | (ResolverAddr & 0xFFFF); // daddiu $t9,$t9,%lo(ptr)
+ Trampolines[10 * I + 6] = 0x67390000 | (ResolverAddr.getValue() &
+ 0xFFFF); // daddiu $t9,$t9,%lo(ptr)
Trampolines[10 * I + 7] = 0x0320f809; // jalr $t9
Trampolines[10 * I + 8] = 0x00000000; // nop
Trampolines[10 * I + 9] = 0x00000000; // nop
}
}
-void OrcMips64::writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
+void OrcMips64::writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+ ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress,
+ unsigned NumStubs) {
// Stub format is:
//
// .section __orc_stubs
@@ -890,7 +898,7 @@ void OrcMips64::writeIndirectStubsBlock(
// Populate the stubs page stubs and mark it executable.
uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
- uint64_t PtrAddr = PointersBlockTargetAddress;
+ uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) {
uint64_t HeighestAddr = ((PtrAddr + 0x800080008000) >> 48);
@@ -908,9 +916,9 @@ void OrcMips64::writeIndirectStubsBlock(
}
void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr) {
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr) {
const uint32_t ResolverCode[] = {
0xef810113, // 0x00: addi sp,sp,-264
@@ -1008,8 +1016,8 @@ void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem,
}
void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverAddr,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverAddr,
unsigned NumTrampolines) {
unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
@@ -1031,8 +1039,8 @@ void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem,
}
void OrcRiscv64::writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
+ char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
// Stub format is:
//
// .section __orc_stubs
@@ -1078,9 +1086,9 @@ void OrcRiscv64::writeIndirectStubsBlock(
}
void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem,
- JITTargetAddress ResolverTargetAddress,
- JITTargetAddress ReentryFnAddr,
- JITTargetAddress ReentryCtxAddr) {
+ ExecutorAddr ResolverTargetAddress,
+ ExecutorAddr ReentryFnAddr,
+ ExecutorAddr ReentryCtxAddr) {
LLVM_DEBUG({
dbgs() << "Writing resolver code to "
@@ -1150,10 +1158,10 @@ void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem,
sizeof(uint64_t));
}
-void OrcLoongArch64::writeTrampolines(
- char *TrampolineBlockWorkingMem,
- JITTargetAddress TrampolineBlockTargetAddress,
- JITTargetAddress ResolverAddr, unsigned NumTrampolines) {
+void OrcLoongArch64::writeTrampolines(char *TrampolineBlockWorkingMem,
+ ExecutorAddr TrampolineBlockTargetAddress,
+ ExecutorAddr ResolverAddr,
+ unsigned NumTrampolines) {
LLVM_DEBUG({
dbgs() << "Writing trampoline code to "
@@ -1181,8 +1189,8 @@ void OrcLoongArch64::writeTrampolines(
}
void OrcLoongArch64::writeIndirectStubsBlock(
- char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
- JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
+ char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
+ ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
// Stub format is:
//
// .section __orc_stubs
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
index b823197b404f..a73aec6d98c6 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcV2CBindings.cpp
@@ -184,8 +184,8 @@ static SymbolMap toSymbolMap(LLVMOrcCSymbolMapPairs Syms, size_t NumPairs) {
SymbolMap SM;
for (size_t I = 0; I != NumPairs; ++I) {
JITSymbolFlags Flags = toJITSymbolFlags(Syms[I].Sym.Flags);
- SM[OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(Syms[I].Name))] =
- JITEvaluatedSymbol(Syms[I].Sym.Address, Flags);
+ SM[OrcV2CAPIHelper::moveToSymbolStringPtr(unwrap(Syms[I].Name))] = {
+ ExecutorAddr(Syms[I].Sym.Address), Flags};
}
return SM;
}
@@ -269,8 +269,8 @@ static LLVMOrcSymbolLookupFlags fromSymbolLookupFlags(SymbolLookupFlags SLF) {
}
static LLVMJITEvaluatedSymbol
-fromJITEvaluatedSymbol(const JITEvaluatedSymbol &S) {
- return {S.getAddress(), fromJITSymbolFlags(S.getFlags())};
+fromExecutorSymbolDef(const ExecutorSymbolDef &S) {
+ return {S.getAddress().getValue(), fromJITSymbolFlags(S.getFlags())};
}
} // end anonymous namespace
@@ -385,7 +385,7 @@ void LLVMOrcExecutionSessionLookup(
for (auto &KV : *Result)
CResult.push_back(LLVMOrcCSymbolMapPair{
wrap(OrcV2CAPIHelper::getRawPoolEntryPtr(KV.first)),
- fromJITEvaluatedSymbol(KV.second)});
+ fromExecutorSymbolDef(KV.second)});
HandleResult(LLVMErrorSuccess, CResult.data(), CResult.size(), Ctx);
} else
HandleResult(wrap(Result.takeError()), nullptr, 0, Ctx);
@@ -741,31 +741,19 @@ LLVMErrorRef LLVMOrcCreateDynamicLibrarySearchGeneratorForPath(
LLVMErrorRef LLVMOrcCreateStaticLibrarySearchGeneratorForPath(
LLVMOrcDefinitionGeneratorRef *Result, LLVMOrcObjectLayerRef ObjLayer,
- const char *FileName, const char *TargetTriple) {
+ const char *FileName) {
assert(Result && "Result can not be null");
assert(FileName && "Filename can not be null");
assert(ObjLayer && "ObjectLayer can not be null");
- if (TargetTriple) {
- auto TT = Triple(TargetTriple);
- auto LibrarySymsGenerator =
- StaticLibraryDefinitionGenerator::Load(*unwrap(ObjLayer), FileName, TT);
- if (!LibrarySymsGenerator) {
- *Result = nullptr;
- return wrap(LibrarySymsGenerator.takeError());
- }
- *Result = wrap(LibrarySymsGenerator->release());
- return LLVMErrorSuccess;
- } else {
- auto LibrarySymsGenerator =
- StaticLibraryDefinitionGenerator::Load(*unwrap(ObjLayer), FileName);
- if (!LibrarySymsGenerator) {
- *Result = nullptr;
- return wrap(LibrarySymsGenerator.takeError());
- }
- *Result = wrap(LibrarySymsGenerator->release());
- return LLVMErrorSuccess;
+ auto LibrarySymsGenerator =
+ StaticLibraryDefinitionGenerator::Load(*unwrap(ObjLayer), FileName);
+ if (!LibrarySymsGenerator) {
+ *Result = nullptr;
+ return wrap(LibrarySymsGenerator.takeError());
}
+ *Result = wrap(LibrarySymsGenerator->release());
+ return LLVMErrorSuccess;
}
LLVMOrcThreadSafeContextRef LLVMOrcCreateNewThreadSafeContext(void) {
@@ -859,9 +847,9 @@ LLVMErrorRef LLVMOrcObjectLayerAddObjectFile(LLVMOrcObjectLayerRef ObjLayer,
*unwrap(JD), std::unique_ptr<MemoryBuffer>(unwrap(ObjBuffer))));
}
-LLVMErrorRef LLVMOrcLLJITAddObjectFileWithRT(LLVMOrcObjectLayerRef ObjLayer,
- LLVMOrcResourceTrackerRef RT,
- LLVMMemoryBufferRef ObjBuffer) {
+LLVMErrorRef LLVMOrcObjectLayerAddObjectFileWithRT(LLVMOrcObjectLayerRef ObjLayer,
+ LLVMOrcResourceTrackerRef RT,
+ LLVMMemoryBufferRef ObjBuffer) {
return wrap(
unwrap(ObjLayer)->add(ResourceTrackerSP(unwrap(RT)),
std::unique_ptr<MemoryBuffer>(unwrap(ObjBuffer))));
@@ -1210,8 +1198,8 @@ LLVMErrorRef LLVMOrcCreateLocalLazyCallThroughManager(
const char *TargetTriple, LLVMOrcExecutionSessionRef ES,
LLVMOrcJITTargetAddress ErrorHandlerAddr,
LLVMOrcLazyCallThroughManagerRef *Result) {
- auto LCTM = createLocalLazyCallThroughManager(Triple(TargetTriple),
- *unwrap(ES), ErrorHandlerAddr);
+ auto LCTM = createLocalLazyCallThroughManager(
+ Triple(TargetTriple), *unwrap(ES), ExecutorAddr(ErrorHandlerAddr));
if (!LCTM)
return wrap(LCTM.takeError());
diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index 07b19b2e54f1..9ef333222028 100644
--- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -38,7 +38,8 @@ public:
LookupResult Result;
for (auto &KV : *InternedResult)
- Result[*KV.first] = std::move(KV.second);
+ Result[*KV.first] = {KV.second.getAddress().getValue(),
+ KV.second.getFlags()};
OnResolved(Result);
};
@@ -326,7 +327,7 @@ Error RTDyldObjectLinkingLayer::onObjLoad(
} else if (AutoClaimObjectSymbols)
ExtraSymbolsToClaim[InternedName] = Flags;
- Symbols[InternedName] = JITEvaluatedSymbol(KV.second.getAddress(), Flags);
+ Symbols[InternedName] = {ExecutorAddr(KV.second.getAddress()), Flags};
}
if (!ExtraSymbolsToClaim.empty()) {
diff --git a/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp b/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp
new file mode 100644
index 000000000000..ecf5e2915773
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/Shared/ObjectFormats.cpp
@@ -0,0 +1,94 @@
+//===---------- ObjectFormats.cpp - Object format details for ORC ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// ORC-specific object format details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h"
+
+namespace llvm {
+namespace orc {
+
+StringRef MachODataCommonSectionName = "__DATA,__common";
+StringRef MachODataDataSectionName = "__DATA,__data";
+StringRef MachOEHFrameSectionName = "__TEXT,__eh_frame";
+StringRef MachOCompactUnwindInfoSectionName = "__TEXT,__unwind_info";
+StringRef MachOModInitFuncSectionName = "__DATA,__mod_init_func";
+StringRef MachOObjCCatListSectionName = "__DATA,__objc_catlist";
+StringRef MachOObjCCatList2SectionName = "__DATA,__objc_catlist2";
+StringRef MachOObjCClassListSectionName = "__DATA,__objc_classlist";
+StringRef MachOObjCClassNameSectionName = "__TEXT,__objc_classname";
+StringRef MachOObjCClassRefsSectionName = "__DATA,__objc_classrefs";
+StringRef MachOObjCConstSectionName = "__DATA,__objc_const";
+StringRef MachOObjCDataSectionName = "__DATA,__objc_data";
+StringRef MachOObjCImageInfoSectionName = "__DATA,__objc_imageinfo";
+StringRef MachOObjCMethNameSectionName = "__TEXT,__objc_methname";
+StringRef MachOObjCMethTypeSectionName = "__TEXT,__objc_methtype";
+StringRef MachOObjCNLCatListSectionName = "__DATA,__objc_nlcatlist";
+StringRef MachOObjCSelRefsSectionName = "__DATA,__objc_selrefs";
+StringRef MachOSwift5ProtoSectionName = "__TEXT,__swift5_proto";
+StringRef MachOSwift5ProtosSectionName = "__TEXT,__swift5_protos";
+StringRef MachOSwift5TypesSectionName = "__TEXT,__swift5_types";
+StringRef MachOSwift5TypeRefSectionName = "__TEXT,__swift5_typeref";
+StringRef MachOSwift5FieldMetadataSectionName = "__TEXT,__swift5_fieldmd";
+StringRef MachOSwift5EntrySectionName = "__TEXT,__swift5_entry";
+StringRef MachOThreadBSSSectionName = "__DATA,__thread_bss";
+StringRef MachOThreadDataSectionName = "__DATA,__thread_data";
+StringRef MachOThreadVarsSectionName = "__DATA,__thread_vars";
+
+StringRef MachOInitSectionNames[19] = {
+ MachOModInitFuncSectionName, MachOObjCCatListSectionName,
+ MachOObjCCatList2SectionName, MachOObjCClassListSectionName,
+ MachOObjCClassNameSectionName, MachOObjCClassRefsSectionName,
+ MachOObjCConstSectionName, MachOObjCDataSectionName,
+ MachOObjCImageInfoSectionName, MachOObjCMethNameSectionName,
+ MachOObjCMethTypeSectionName, MachOObjCNLCatListSectionName,
+ MachOObjCSelRefsSectionName, MachOSwift5ProtoSectionName,
+ MachOSwift5ProtosSectionName, MachOSwift5TypesSectionName,
+ MachOSwift5TypeRefSectionName, MachOSwift5FieldMetadataSectionName,
+ MachOSwift5EntrySectionName,
+};
+
+StringRef ELFEHFrameSectionName = ".eh_frame";
+StringRef ELFInitArrayFuncSectionName = ".init_array";
+
+StringRef ELFThreadBSSSectionName = ".tbss";
+StringRef ELFThreadDataSectionName = ".tdata";
+
+bool isMachOInitializerSection(StringRef SegName, StringRef SecName) {
+ for (auto &InitSection : MachOInitSectionNames) {
+ // Loop below assumes all MachO init sectios have a length-6
+ // segment name.
+ assert(InitSection[6] == ',' && "Init section seg name has length != 6");
+ if (InitSection.starts_with(SegName) && InitSection.substr(7) == SecName)
+ return true;
+ }
+ return false;
+}
+
+bool isMachOInitializerSection(StringRef QualifiedName) {
+ for (auto &InitSection : MachOInitSectionNames)
+ if (InitSection == QualifiedName)
+ return true;
+ return false;
+}
+
+bool isELFInitializerSection(StringRef SecName) {
+ if (SecName.consume_front(ELFInitArrayFuncSectionName) &&
+ (SecName.empty() || SecName[0] == '.'))
+ return true;
+ return false;
+}
+
+bool isCOFFInitializerSection(StringRef SecName) {
+ return SecName.startswith(".CRT");
+}
+
+} // namespace orc
+} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
index 1bd10c9c6c0e..3d3ca891d881 100644
--- a/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/SimpleRemoteEPC.cpp
@@ -49,7 +49,7 @@ Expected<int32_t> SimpleRemoteEPC::runAsMain(ExecutorAddr MainFnAddr,
ArrayRef<std::string> Args) {
int64_t Result = 0;
if (auto Err = callSPSWrapper<rt::SPSRunAsMainSignature>(
- RunAsMainAddr, Result, ExecutorAddr(MainFnAddr), Args))
+ RunAsMainAddr, Result, MainFnAddr, Args))
return std::move(Err);
return Result;
}
@@ -57,7 +57,7 @@ Expected<int32_t> SimpleRemoteEPC::runAsMain(ExecutorAddr MainFnAddr,
Expected<int32_t> SimpleRemoteEPC::runAsVoidFunction(ExecutorAddr VoidFnAddr) {
int32_t Result = 0;
if (auto Err = callSPSWrapper<rt::SPSRunAsVoidFunctionSignature>(
- RunAsVoidFunctionAddr, Result, ExecutorAddr(VoidFnAddr)))
+ RunAsVoidFunctionAddr, Result, VoidFnAddr))
return std::move(Err);
return Result;
}
@@ -66,7 +66,7 @@ Expected<int32_t> SimpleRemoteEPC::runAsIntFunction(ExecutorAddr IntFnAddr,
int Arg) {
int32_t Result = 0;
if (auto Err = callSPSWrapper<rt::SPSRunAsIntFunctionSignature>(
- RunAsIntFunctionAddr, Result, ExecutorAddr(IntFnAddr), Arg))
+ RunAsIntFunctionAddr, Result, IntFnAddr, Arg))
return std::move(Err);
return Result;
}
@@ -126,23 +126,22 @@ SimpleRemoteEPC::handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
case SimpleRemoteEPCOpcode::Setup:
dbgs() << "Setup";
assert(SeqNo == 0 && "Non-zero SeqNo for Setup?");
- assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Setup?");
+ assert(!TagAddr && "Non-zero TagAddr for Setup?");
break;
case SimpleRemoteEPCOpcode::Hangup:
dbgs() << "Hangup";
assert(SeqNo == 0 && "Non-zero SeqNo for Hangup?");
- assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Hangup?");
+ assert(!TagAddr && "Non-zero TagAddr for Hangup?");
break;
case SimpleRemoteEPCOpcode::Result:
dbgs() << "Result";
- assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Result?");
+ assert(!TagAddr && "Non-zero TagAddr for Result?");
break;
case SimpleRemoteEPCOpcode::CallWrapper:
dbgs() << "CallWrapper";
break;
}
- dbgs() << ", seqno = " << SeqNo
- << ", tag-addr = " << formatv("{0:x}", TagAddr.getValue())
+ dbgs() << ", seqno = " << SeqNo << ", tag-addr = " << TagAddr
<< ", arg-buffer = " << formatv("{0:x}", ArgBytes.size())
<< " bytes\n";
});
@@ -227,11 +226,11 @@ Error SimpleRemoteEPC::sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
case SimpleRemoteEPCOpcode::Hangup:
dbgs() << "Hangup";
assert(SeqNo == 0 && "Non-zero SeqNo for Hangup?");
- assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Hangup?");
+ assert(!TagAddr && "Non-zero TagAddr for Hangup?");
break;
case SimpleRemoteEPCOpcode::Result:
dbgs() << "Result";
- assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Result?");
+ assert(!TagAddr && "Non-zero TagAddr for Result?");
break;
case SimpleRemoteEPCOpcode::CallWrapper:
dbgs() << "CallWrapper";
@@ -239,8 +238,7 @@ Error SimpleRemoteEPC::sendMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
default:
llvm_unreachable("Invalid opcode");
}
- dbgs() << ", seqno = " << SeqNo
- << ", tag-addr = " << formatv("{0:x}", TagAddr.getValue())
+ dbgs() << ", seqno = " << SeqNo << ", tag-addr = " << TagAddr
<< ", arg-buffer = " << formatv("{0:x}", ArgBytes.size())
<< " bytes\n";
});
@@ -317,13 +315,19 @@ Error SimpleRemoteEPC::setup(Setup S) {
dbgs() << "SimpleRemoteEPC received setup message:\n"
<< " Triple: " << EI->TargetTriple << "\n"
<< " Page size: " << EI->PageSize << "\n"
- << " Bootstrap symbols:\n";
+ << " Bootstrap map" << (EI->BootstrapMap.empty() ? " empty" : ":")
+ << "\n";
+ for (const auto &KV : EI->BootstrapMap)
+ dbgs() << " " << KV.first() << ": " << KV.second.size()
+ << "-byte SPS encoded buffer\n";
+ dbgs() << " Bootstrap symbols"
+ << (EI->BootstrapSymbols.empty() ? " empty" : ":") << "\n";
for (const auto &KV : EI->BootstrapSymbols)
- dbgs() << " " << KV.first() << ": "
- << formatv("{0:x16}", KV.second.getValue()) << "\n";
+ dbgs() << " " << KV.first() << ": " << KV.second << "\n";
});
TargetTriple = Triple(EI->TargetTriple);
PageSize = EI->PageSize;
+ BootstrapMap = std::move(EI->BootstrapMap);
BootstrapSymbols = std::move(EI->BootstrapSymbols);
if (auto Err = getBootstrapSymbols(
@@ -402,7 +406,7 @@ void SimpleRemoteEPC::handleCallWrapper(
ExecutorAddr(), {WFR.data(), WFR.size()}))
getExecutionSession().reportError(std::move(Err));
},
- TagAddr.getValue(), ArgBytes);
+ TagAddr, ArgBytes);
},
"callWrapper task"));
}
diff --git a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp
index b52d01318c0d..d4cbd1970d8f 100644
--- a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp
@@ -36,16 +36,15 @@ void ImplSymbolMap::trackImpls(SymbolAliasMap ImplMaps, JITDylib *SrcJD) {
// Trigger Speculative Compiles.
void Speculator::speculateForEntryPoint(Speculator *Ptr, uint64_t StubId) {
assert(Ptr && " Null Address Received in orc_speculate_for ");
- Ptr->speculateFor(StubId);
+ Ptr->speculateFor(ExecutorAddr(StubId));
}
Error Speculator::addSpeculationRuntime(JITDylib &JD,
MangleAndInterner &Mangle) {
- JITEvaluatedSymbol ThisPtr(pointerToJITTargetAddress(this),
- JITSymbolFlags::Exported);
- JITEvaluatedSymbol SpeculateForEntryPtr(
- pointerToJITTargetAddress(&speculateForEntryPoint),
- JITSymbolFlags::Exported);
+ ExecutorSymbolDef ThisPtr(ExecutorAddr::fromPtr(this),
+ JITSymbolFlags::Exported);
+ ExecutorSymbolDef SpeculateForEntryPtr(
+ ExecutorAddr::fromPtr(&speculateForEntryPoint), JITSymbolFlags::Exported);
return JD.define(absoluteSymbols({
{Mangle("__orc_speculator"), ThisPtr}, // Data Symbol
{Mangle("__orc_speculate_for"), SpeculateForEntryPtr} // Callable Symbol
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
index 147f915f61d6..3f70dbf60437 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/ExecutorSharedMemoryMapperService.cpp
@@ -132,11 +132,11 @@ Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize(
#if defined(LLVM_ON_UNIX)
int NativeProt = 0;
- if ((Segment.AG.getMemProt() & MemProt::Read) == MemProt::Read)
+ if ((Segment.RAG.Prot & MemProt::Read) == MemProt::Read)
NativeProt |= PROT_READ;
- if ((Segment.AG.getMemProt() & MemProt::Write) == MemProt::Write)
+ if ((Segment.RAG.Prot & MemProt::Write) == MemProt::Write)
NativeProt |= PROT_WRITE;
- if ((Segment.AG.getMemProt() & MemProt::Exec) == MemProt::Exec)
+ if ((Segment.RAG.Prot & MemProt::Exec) == MemProt::Exec)
NativeProt |= PROT_EXEC;
if (mprotect(Segment.Addr.toPtr<void *>(), Segment.Size, NativeProt))
@@ -144,8 +144,7 @@ Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize(
#elif defined(_WIN32)
- DWORD NativeProt =
- getWindowsProtectionFlags(Segment.AG.getMemProt());
+ DWORD NativeProt = getWindowsProtectionFlags(Segment.RAG.Prot);
if (!VirtualProtect(Segment.Addr.toPtr<void *>(), Segment.Size, NativeProt,
&NativeProt))
@@ -153,7 +152,7 @@ Expected<ExecutorAddr> ExecutorSharedMemoryMapperService::initialize(
#endif
- if ((Segment.AG.getMemProt() & MemProt::Exec) == MemProt::Exec)
+ if ((Segment.RAG.Prot & MemProt::Exec) == MemProt::Exec)
sys::Memory::InvalidateInstructionCache(Segment.Addr.toPtr<void *>(),
Segment.Size);
}
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
index 8296b03398a0..8eca874c48b8 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/JITLoaderGDB.cpp
@@ -67,9 +67,9 @@ using namespace llvm;
using namespace llvm::orc;
// Register debug object, return error message or null for success.
-static void registerJITLoaderGDBImpl(const char *ObjAddr, size_t Size) {
+static void appendJITDebugDescriptor(const char *ObjAddr, size_t Size) {
LLVM_DEBUG({
- dbgs() << "Registering debug object with GDB JIT interface "
+ dbgs() << "Adding debug object to GDB JIT interface "
<< formatv("([{0:x16} -- {1:x16}])",
reinterpret_cast<uintptr_t>(ObjAddr),
reinterpret_cast<uintptr_t>(ObjAddr + Size))
@@ -94,20 +94,20 @@ static void registerJITLoaderGDBImpl(const char *ObjAddr, size_t Size) {
__jit_debug_descriptor.first_entry = E;
__jit_debug_descriptor.relevant_entry = E;
-
- // Run into the rendezvous breakpoint.
__jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
- __jit_debug_register_code();
}
extern "C" orc::shared::CWrapperFunctionResult
llvm_orc_registerJITLoaderGDBAllocAction(const char *Data, size_t Size) {
using namespace orc::shared;
- return WrapperFunction<SPSError(SPSExecutorAddrRange)>::handle(
+ return WrapperFunction<SPSError(SPSExecutorAddrRange, bool)>::handle(
Data, Size,
- [](ExecutorAddrRange R) {
- registerJITLoaderGDBImpl(R.Start.toPtr<const char *>(),
+ [](ExecutorAddrRange R, bool AutoRegisterCode) {
+ appendJITDebugDescriptor(R.Start.toPtr<const char *>(),
R.size());
+ // Run into the rendezvous breakpoint.
+ if (AutoRegisterCode)
+ __jit_debug_register_code();
return Error::success();
})
.release();
@@ -116,11 +116,14 @@ llvm_orc_registerJITLoaderGDBAllocAction(const char *Data, size_t Size) {
extern "C" orc::shared::CWrapperFunctionResult
llvm_orc_registerJITLoaderGDBWrapper(const char *Data, uint64_t Size) {
using namespace orc::shared;
- return WrapperFunction<SPSError(SPSExecutorAddrRange)>::handle(
+ return WrapperFunction<SPSError(SPSExecutorAddrRange, bool)>::handle(
Data, Size,
- [](ExecutorAddrRange R) {
- registerJITLoaderGDBImpl(R.Start.toPtr<const char *>(),
+ [](ExecutorAddrRange R, bool AutoRegisterCode) {
+ appendJITDebugDescriptor(R.Start.toPtr<const char *>(),
R.size());
+ // Run into the rendezvous breakpoint.
+ if (AutoRegisterCode)
+ __jit_debug_register_code();
return Error::success();
})
.release();
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
index ce94bf1e039a..4da031716e32 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleExecutorMemoryManager.cpp
@@ -132,9 +132,9 @@ Error SimpleExecutorMemoryManager::finalize(tpctypes::FinalizeRequest &FR) {
assert(Seg.Size <= std::numeric_limits<size_t>::max());
if (auto EC = sys::Memory::protectMappedMemory(
{Mem, static_cast<size_t>(Seg.Size)},
- toSysMemoryProtectionFlags(Seg.AG.getMemProt())))
+ toSysMemoryProtectionFlags(Seg.RAG.Prot)))
return BailOut(errorCodeToError(EC));
- if ((Seg.AG.getMemProt() & MemProt::Exec) == MemProt::Exec)
+ if ((Seg.RAG.Prot & MemProt::Exec) == MemProt::Exec)
sys::Memory::InvalidateInstructionCache(Mem, Seg.Size);
}
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp
index 8ab0af3eab6e..67bc379f9821 100644
--- a/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.cpp
@@ -10,8 +10,8 @@
#include "llvm/ExecutionEngine/Orc/Shared/TargetProcessControlTypes.h"
#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/Process.h"
+#include "llvm/TargetParser/Host.h"
#include "OrcRTBootstrap.h"
@@ -68,23 +68,22 @@ SimpleRemoteEPCServer::handleMessage(SimpleRemoteEPCOpcode OpC, uint64_t SeqNo,
case SimpleRemoteEPCOpcode::Setup:
dbgs() << "Setup";
assert(SeqNo == 0 && "Non-zero SeqNo for Setup?");
- assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Setup?");
+ assert(!TagAddr && "Non-zero TagAddr for Setup?");
break;
case SimpleRemoteEPCOpcode::Hangup:
dbgs() << "Hangup";
assert(SeqNo == 0 && "Non-zero SeqNo for Hangup?");
- assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Hangup?");
+ assert(!TagAddr && "Non-zero TagAddr for Hangup?");
break;
case SimpleRemoteEPCOpcode::Result:
dbgs() << "Result";
- assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Result?");
+ assert(!TagAddr && "Non-zero TagAddr for Result?");
break;
case SimpleRemoteEPCOpcode::CallWrapper:
dbgs() << "CallWrapper";
break;
}
- dbgs() << ", seqno = " << SeqNo
- << ", tag-addr = " << formatv("{0:x}", TagAddr.getValue())
+ dbgs() << ", seqno = " << SeqNo << ", tag-addr = " << TagAddr
<< ", arg-buffer = " << formatv("{0:x}", ArgBytes.size())
<< " bytes\n";
});
@@ -158,23 +157,22 @@ Error SimpleRemoteEPCServer::sendMessage(SimpleRemoteEPCOpcode OpC,
case SimpleRemoteEPCOpcode::Setup:
dbgs() << "Setup";
assert(SeqNo == 0 && "Non-zero SeqNo for Setup?");
- assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Setup?");
+ assert(!TagAddr && "Non-zero TagAddr for Setup?");
break;
case SimpleRemoteEPCOpcode::Hangup:
dbgs() << "Hangup";
assert(SeqNo == 0 && "Non-zero SeqNo for Hangup?");
- assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Hangup?");
+ assert(!TagAddr && "Non-zero TagAddr for Hangup?");
break;
case SimpleRemoteEPCOpcode::Result:
dbgs() << "Result";
- assert(TagAddr.getValue() == 0 && "Non-zero TagAddr for Result?");
+ assert(!TagAddr && "Non-zero TagAddr for Result?");
break;
case SimpleRemoteEPCOpcode::CallWrapper:
dbgs() << "CallWrapper";
break;
}
- dbgs() << ", seqno = " << SeqNo
- << ", tag-addr = " << formatv("{0:x}", TagAddr.getValue())
+ dbgs() << ", seqno = " << SeqNo << ", tag-addr = " << TagAddr
<< ", arg-buffer = " << formatv("{0:x}", ArgBytes.size())
<< " bytes\n";
});
@@ -187,6 +185,7 @@ Error SimpleRemoteEPCServer::sendMessage(SimpleRemoteEPCOpcode OpC,
}
Error SimpleRemoteEPCServer::sendSetupMessage(
+ StringMap<std::vector<char>> BootstrapMap,
StringMap<ExecutorAddr> BootstrapSymbols) {
using namespace SimpleRemoteEPCDefaultBootstrapSymbolNames;
@@ -198,6 +197,7 @@ Error SimpleRemoteEPCServer::sendSetupMessage(
EI.PageSize = *PageSize;
else
return PageSize.takeError();
+ EI.BootstrapMap = std::move(BootstrapMap);
EI.BootstrapSymbols = std::move(BootstrapSymbols);
assert(!EI.BootstrapSymbols.count(ExecutorSessionObjectName) &&
diff --git a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
index b425eec5f6d6..62cab22a1c45 100644
--- a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+++ b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -417,7 +417,7 @@ void PerfJITEventListener::NotifyCode(Expected<llvm::StringRef> &Symbol,
rec.Prefix.Timestamp = perf_get_timestamp();
rec.CodeSize = CodeSize;
- rec.Vma = 0;
+ rec.Vma = CodeAddr;
rec.CodeAddr = CodeAddr;
rec.Pid = Pid;
rec.Tid = get_threadid();
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 1d8f1ac8ac8a..9255311f992d 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -16,9 +16,9 @@
#include "Targets/RuntimeDyldCOFFThumb.h"
#include "Targets/RuntimeDyldCOFFX86_64.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
using namespace llvm::object;
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 2fe49fefae2d..d439b1b4ebfb 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -15,12 +15,12 @@
#include "Targets/RuntimeDyldELFMips.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
using namespace llvm::object;
@@ -426,13 +426,15 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
break;
case ELF::R_AARCH64_ABS16: {
uint64_t Result = Value + Addend;
- assert(static_cast<int64_t>(Result) >= INT16_MIN && Result < UINT16_MAX);
+ assert(Result == static_cast<uint64_t>(llvm::SignExtend64(Result, 16)) ||
+ (Result >> 16) == 0);
write(isBE, TargetPtr, static_cast<uint16_t>(Result & 0xffffU));
break;
}
case ELF::R_AARCH64_ABS32: {
uint64_t Result = Value + Addend;
- assert(static_cast<int64_t>(Result) >= INT32_MIN && Result < UINT32_MAX);
+ assert(Result == static_cast<uint64_t>(llvm::SignExtend64(Result, 32)) ||
+ (Result >> 32) == 0);
write(isBE, TargetPtr, static_cast<uint32_t>(Result & 0xffffffffU));
break;
}
@@ -477,7 +479,9 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
assert(isInt<16>(BranchImm));
- *TargetPtr &= 0xfff8001fU;
+ uint32_t RawInstr = *(support::little32_t *)TargetPtr;
+ *(support::little32_t *)TargetPtr = RawInstr & 0xfff8001fU;
+
// Immediate:15:2 goes in bits 18:5 of TBZ, TBNZ
or32le(TargetPtr, (BranchImm & 0x0000FFFC) << 3);
break;
@@ -1282,6 +1286,7 @@ RuntimeDyldELF::processRelocationRef(
}
case SymbolRef::ST_Data:
case SymbolRef::ST_Function:
+ case SymbolRef::ST_Other:
case SymbolRef::ST_Unknown: {
Value.SymbolName = TargetName.data();
Value.Addend = Addend;
@@ -2405,6 +2410,7 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
}
}
+ GOTOffsetMap.clear();
GOTSectionID = 0;
CurrentGOTIndex = 0;
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index bf33a2dec18a..501417db421a 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -15,7 +15,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
#include "llvm/ExecutionEngine/RuntimeDyld.h"
#include "llvm/ExecutionEngine/RuntimeDyldChecker.h"
@@ -23,9 +22,10 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/Triple.h"
#include <deque>
#include <map>
#include <system_error>
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h
index 342c4221ff0c..da381986e9de 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFAArch64.h
@@ -15,6 +15,7 @@
#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFAARCH64_H
#include "../RuntimeDyldCOFF.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/Endian.h"
diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
index 3859f36ac4bd..22f1cf33158c 100644
--- a/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
+++ b/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
@@ -14,6 +14,7 @@
#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDCOFFTHUMB_H
#include "../RuntimeDyldCOFF.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/COFF.h"
diff --git a/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp b/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
index b23e33039c35..436888730bfb 100644
--- a/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
+++ b/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
@@ -101,7 +101,7 @@ uint8_t *SectionMemoryManager::allocateSection(
// FIXME: Initialize the Near member for each memory group to avoid
// interleaving.
std::error_code ec;
- sys::MemoryBlock MB = MMapper.allocateMappedMemory(
+ sys::MemoryBlock MB = MMapper->allocateMappedMemory(
Purpose, RequiredSize, &MemGroup.Near,
sys::Memory::MF_READ | sys::Memory::MF_WRITE, ec);
if (ec) {
@@ -204,7 +204,7 @@ std::error_code
SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
unsigned Permissions) {
for (sys::MemoryBlock &MB : MemGroup.PendingMem)
- if (std::error_code EC = MMapper.protectMappedMemory(MB, Permissions))
+ if (std::error_code EC = MMapper->protectMappedMemory(MB, Permissions))
return EC;
MemGroup.PendingMem.clear();
@@ -234,7 +234,7 @@ void SectionMemoryManager::invalidateInstructionCache() {
SectionMemoryManager::~SectionMemoryManager() {
for (MemoryGroup *Group : {&CodeMem, &RWDataMem, &RODataMem}) {
for (sys::MemoryBlock &Block : Group->AllocatedMem)
- MMapper.releaseMappedMemory(Block);
+ MMapper->releaseMappedMemory(Block);
}
}
@@ -263,11 +263,14 @@ public:
return sys::Memory::releaseMappedMemory(M);
}
};
-
-DefaultMMapper DefaultMMapperInstance;
} // namespace
-SectionMemoryManager::SectionMemoryManager(MemoryMapper *MM)
- : MMapper(MM ? *MM : DefaultMMapperInstance) {}
+SectionMemoryManager::SectionMemoryManager(MemoryMapper *UnownedMM)
+ : MMapper(UnownedMM), OwnedMMapper(nullptr) {
+ if (!MMapper) {
+ OwnedMMapper = std::make_unique<DefaultMMapper>();
+ MMapper = OwnedMMapper.get();
+ }
+}
} // namespace llvm
diff --git a/llvm/lib/ExecutionEngine/TargetSelect.cpp b/llvm/lib/ExecutionEngine/TargetSelect.cpp
index c67a1a7661d6..72fb16fbf203 100644
--- a/llvm/lib/ExecutionEngine/TargetSelect.cpp
+++ b/llvm/lib/ExecutionEngine/TargetSelect.cpp
@@ -13,13 +13,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/IR/Module.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
@@ -89,7 +89,6 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
Options, RelocModel, CMModel, OptLevel,
/*JIT*/ true);
Target->Options.EmulatedTLS = EmulatedTLS;
- Target->Options.ExplicitEmulatedTLS = true;
assert(Target && "Could not allocate target machine!");
return Target;
diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp
index ec963c2de45b..3e4514f2545b 100644
--- a/llvm/lib/FileCheck/FileCheck.cpp
+++ b/llvm/lib/FileCheck/FileCheck.cpp
@@ -16,6 +16,7 @@
#include "llvm/FileCheck/FileCheck.h"
#include "FileCheckImpl.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/CheckedArithmetic.h"
@@ -78,38 +79,42 @@ Expected<std::string> ExpressionFormat::getWildcardRegex() const {
Expected<std::string>
ExpressionFormat::getMatchingString(ExpressionValue IntegerValue) const {
- uint64_t AbsoluteValue;
- StringRef SignPrefix = IntegerValue.isNegative() ? "-" : "";
-
+ APInt IntValue = IntegerValue.getAPIntValue();
+ // Error out for values that cannot be represented by the appropriate 64-bit
+ // integer (e.g. int64_t for a signed format) to keep the getter of
+ // ExpressionValue as an APInt an NFC.
if (Value == Kind::Signed) {
- Expected<int64_t> SignedValue = IntegerValue.getSignedValue();
- if (!SignedValue)
- return SignedValue.takeError();
- if (*SignedValue < 0)
- AbsoluteValue = cantFail(IntegerValue.getAbsolute().getUnsignedValue());
- else
- AbsoluteValue = *SignedValue;
+ if (!IntValue.isSignedIntN(64))
+ return make_error<OverflowError>();
} else {
- Expected<uint64_t> UnsignedValue = IntegerValue.getUnsignedValue();
- if (!UnsignedValue)
- return UnsignedValue.takeError();
- AbsoluteValue = *UnsignedValue;
+ if (!IntValue.isIntN(64))
+ return make_error<OverflowError>();
}
- std::string AbsoluteValueStr;
+ unsigned Radix;
+ bool UpperCase = false;
+ SmallString<8> AbsoluteValueStr;
+ StringRef SignPrefix = IntValue.isNegative() ? "-" : "";
switch (Value) {
case Kind::Unsigned:
case Kind::Signed:
- AbsoluteValueStr = utostr(AbsoluteValue);
+ Radix = 10;
break;
case Kind::HexUpper:
+ UpperCase = true;
+ Radix = 16;
+ break;
case Kind::HexLower:
- AbsoluteValueStr = utohexstr(AbsoluteValue, Value == Kind::HexLower);
+ Radix = 16;
+ UpperCase = false;
break;
default:
return createStringError(std::errc::invalid_argument,
"trying to match value with invalid format");
}
+ IntValue.abs().toString(AbsoluteValueStr, Radix, /*Signed=*/false,
+ /*formatAsCLiteral=*/false,
+ /*UpperCase=*/UpperCase);
StringRef AlternateFormPrefix = AlternateForm ? StringRef("0x") : StringRef();
@@ -146,217 +151,89 @@ ExpressionFormat::valueFromStringRepr(StringRef StrVal,
bool Hex = Value == Kind::HexUpper || Value == Kind::HexLower;
uint64_t UnsignedValue;
bool MissingFormPrefix = AlternateForm && !StrVal.consume_front("0x");
+ (void)MissingFormPrefix;
+ assert(!MissingFormPrefix && "missing alternate form prefix");
if (StrVal.getAsInteger(Hex ? 16 : 10, UnsignedValue))
return ErrorDiagnostic::get(SM, StrVal, IntegerParseErrorStr);
- // Error out for a missing prefix only now that we know we have an otherwise
- // valid integer. For example, "-0x18" is reported above instead.
- if (MissingFormPrefix)
- return ErrorDiagnostic::get(SM, StrVal, "missing alternate form prefix");
-
return ExpressionValue(UnsignedValue);
}
-static int64_t getAsSigned(uint64_t UnsignedValue) {
- // Use memcpy to reinterpret the bitpattern in Value since casting to
- // signed is implementation-defined if the unsigned value is too big to be
- // represented in the signed type and using an union violates type aliasing
- // rules.
- int64_t SignedValue;
- memcpy(&SignedValue, &UnsignedValue, sizeof(SignedValue));
- return SignedValue;
-}
-
-Expected<int64_t> ExpressionValue::getSignedValue() const {
- if (Negative)
- return getAsSigned(Value);
-
- if (Value > (uint64_t)std::numeric_limits<int64_t>::max())
- return make_error<OverflowError>();
-
- // Value is in the representable range of int64_t so we can use cast.
- return static_cast<int64_t>(Value);
-}
-
-Expected<uint64_t> ExpressionValue::getUnsignedValue() const {
- if (Negative)
- return make_error<OverflowError>();
-
- return Value;
-}
-
-ExpressionValue ExpressionValue::getAbsolute() const {
- if (!Negative)
- return *this;
-
- int64_t SignedValue = getAsSigned(Value);
- int64_t MaxInt64 = std::numeric_limits<int64_t>::max();
- // Absolute value can be represented as int64_t.
- if (SignedValue >= -MaxInt64)
- return ExpressionValue(-getAsSigned(Value));
-
- // -X == -(max int64_t + Rem), negate each component independently.
- SignedValue += MaxInt64;
- uint64_t RemainingValueAbsolute = -SignedValue;
- return ExpressionValue(MaxInt64 + RemainingValueAbsolute);
-}
-
Expected<ExpressionValue> llvm::operator+(const ExpressionValue &LeftOperand,
const ExpressionValue &RightOperand) {
- if (LeftOperand.isNegative() && RightOperand.isNegative()) {
- int64_t LeftValue = cantFail(LeftOperand.getSignedValue());
- int64_t RightValue = cantFail(RightOperand.getSignedValue());
- std::optional<int64_t> Result = checkedAdd<int64_t>(LeftValue, RightValue);
- if (!Result)
- return make_error<OverflowError>();
-
- return ExpressionValue(*Result);
- }
-
- // (-A) + B == B - A.
- if (LeftOperand.isNegative())
- return RightOperand - LeftOperand.getAbsolute();
-
- // A + (-B) == A - B.
- if (RightOperand.isNegative())
- return LeftOperand - RightOperand.getAbsolute();
-
- // Both values are positive at this point.
- uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue());
- uint64_t RightValue = cantFail(RightOperand.getUnsignedValue());
- std::optional<uint64_t> Result =
- checkedAddUnsigned<uint64_t>(LeftValue, RightValue);
- if (!Result)
+ bool Overflow;
+ APInt Result = LeftOperand.getAPIntValue().sadd_ov(
+ RightOperand.getAPIntValue(), Overflow);
+ if (Overflow ||
+ (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1)))
return make_error<OverflowError>();
- return ExpressionValue(*Result);
+ if (Result.isNegative())
+ return ExpressionValue(Result.getSExtValue());
+ else
+ return ExpressionValue(Result.getZExtValue());
}
Expected<ExpressionValue> llvm::operator-(const ExpressionValue &LeftOperand,
const ExpressionValue &RightOperand) {
- // Result will be negative and thus might underflow.
- if (LeftOperand.isNegative() && !RightOperand.isNegative()) {
- int64_t LeftValue = cantFail(LeftOperand.getSignedValue());
- uint64_t RightValue = cantFail(RightOperand.getUnsignedValue());
- // Result <= -1 - (max int64_t) which overflows on 1- and 2-complement.
- if (RightValue > (uint64_t)std::numeric_limits<int64_t>::max())
- return make_error<OverflowError>();
- std::optional<int64_t> Result =
- checkedSub(LeftValue, static_cast<int64_t>(RightValue));
- if (!Result)
- return make_error<OverflowError>();
-
- return ExpressionValue(*Result);
- }
-
- // (-A) - (-B) == B - A.
- if (LeftOperand.isNegative())
- return RightOperand.getAbsolute() - LeftOperand.getAbsolute();
-
- // A - (-B) == A + B.
- if (RightOperand.isNegative())
- return LeftOperand + RightOperand.getAbsolute();
-
- // Both values are positive at this point.
- uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue());
- uint64_t RightValue = cantFail(RightOperand.getUnsignedValue());
- if (LeftValue >= RightValue)
- return ExpressionValue(LeftValue - RightValue);
- else {
- uint64_t AbsoluteDifference = RightValue - LeftValue;
- uint64_t MaxInt64 = std::numeric_limits<int64_t>::max();
- // Value might underflow.
- if (AbsoluteDifference > MaxInt64) {
- AbsoluteDifference -= MaxInt64;
- int64_t Result = -MaxInt64;
- int64_t MinInt64 = std::numeric_limits<int64_t>::min();
- // Underflow, tested by:
- // abs(Result + (max int64_t)) > abs((min int64_t) + (max int64_t))
- if (AbsoluteDifference > static_cast<uint64_t>(-(MinInt64 - Result)))
- return make_error<OverflowError>();
- Result -= static_cast<int64_t>(AbsoluteDifference);
- return ExpressionValue(Result);
- }
+ bool Overflow;
+ APInt Result = LeftOperand.getAPIntValue().ssub_ov(
+ RightOperand.getAPIntValue(), Overflow);
+ if (Overflow ||
+ (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1)))
+ return make_error<OverflowError>();
- return ExpressionValue(-static_cast<int64_t>(AbsoluteDifference));
- }
+ if (Result.isNegative())
+ return ExpressionValue(Result.getSExtValue());
+ else
+ return ExpressionValue(Result.getZExtValue());
}
Expected<ExpressionValue> llvm::operator*(const ExpressionValue &LeftOperand,
const ExpressionValue &RightOperand) {
- // -A * -B == A * B
- if (LeftOperand.isNegative() && RightOperand.isNegative())
- return LeftOperand.getAbsolute() * RightOperand.getAbsolute();
-
- // A * -B == -B * A
- if (RightOperand.isNegative())
- return RightOperand * LeftOperand;
-
- assert(!RightOperand.isNegative() && "Unexpected negative operand!");
-
- // Result will be negative and can underflow.
- if (LeftOperand.isNegative()) {
- auto Result = LeftOperand.getAbsolute() * RightOperand.getAbsolute();
- if (!Result)
- return Result;
-
- return ExpressionValue(0) - *Result;
- }
-
- // Result will be positive and can overflow.
- uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue());
- uint64_t RightValue = cantFail(RightOperand.getUnsignedValue());
- std::optional<uint64_t> Result =
- checkedMulUnsigned<uint64_t>(LeftValue, RightValue);
- if (!Result)
+ bool Overflow;
+ APInt Result = LeftOperand.getAPIntValue().smul_ov(
+ RightOperand.getAPIntValue(), Overflow);
+ if (Overflow ||
+ (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1)))
return make_error<OverflowError>();
- return ExpressionValue(*Result);
+ if (Result.isNegative())
+ return ExpressionValue(Result.getSExtValue());
+ else
+ return ExpressionValue(Result.getZExtValue());
}
Expected<ExpressionValue> llvm::operator/(const ExpressionValue &LeftOperand,
const ExpressionValue &RightOperand) {
- // -A / -B == A / B
- if (LeftOperand.isNegative() && RightOperand.isNegative())
- return LeftOperand.getAbsolute() / RightOperand.getAbsolute();
-
- // Check for divide by zero.
- if (RightOperand == ExpressionValue(0))
+ // Check for division by zero.
+ if (RightOperand.getAPIntValue().isZero())
return make_error<OverflowError>();
- // Result will be negative and can underflow.
- if (LeftOperand.isNegative() || RightOperand.isNegative())
- return ExpressionValue(0) -
- cantFail(LeftOperand.getAbsolute() / RightOperand.getAbsolute());
+ bool Overflow;
+ APInt Result = LeftOperand.getAPIntValue().sdiv_ov(
+ RightOperand.getAPIntValue(), Overflow);
+ if (Overflow ||
+ (Result.isNegative() && !Result.isSignedIntN(Result.getBitWidth() - 1)))
+ return make_error<OverflowError>();
- uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue());
- uint64_t RightValue = cantFail(RightOperand.getUnsignedValue());
- return ExpressionValue(LeftValue / RightValue);
+ if (Result.isNegative())
+ return ExpressionValue(Result.getSExtValue());
+ else
+ return ExpressionValue(Result.getZExtValue());
}
Expected<ExpressionValue> llvm::max(const ExpressionValue &LeftOperand,
const ExpressionValue &RightOperand) {
- if (LeftOperand.isNegative() && RightOperand.isNegative()) {
- int64_t LeftValue = cantFail(LeftOperand.getSignedValue());
- int64_t RightValue = cantFail(RightOperand.getSignedValue());
- return ExpressionValue(std::max(LeftValue, RightValue));
- }
-
- if (!LeftOperand.isNegative() && !RightOperand.isNegative()) {
- uint64_t LeftValue = cantFail(LeftOperand.getUnsignedValue());
- uint64_t RightValue = cantFail(RightOperand.getUnsignedValue());
- return ExpressionValue(std::max(LeftValue, RightValue));
- }
-
- if (LeftOperand.isNegative())
- return RightOperand;
-
- return LeftOperand;
+ return LeftOperand.getAPIntValue().slt(RightOperand.getAPIntValue())
+ ? RightOperand
+ : LeftOperand;
}
Expected<ExpressionValue> llvm::min(const ExpressionValue &LeftOperand,
const ExpressionValue &RightOperand) {
- if (cantFail(max(LeftOperand, RightOperand)) == LeftOperand)
+ if (cantFail(max(LeftOperand, RightOperand)).getAPIntValue() ==
+ LeftOperand.getAPIntValue())
return RightOperand;
return LeftOperand;
@@ -493,8 +370,7 @@ Expected<NumericVariable *> Pattern::parseNumericVariableDefinition(
// Detect collisions between string and numeric variables when the latter
// is created later than the former.
- if (Context->DefinedVariableTable.find(Name) !=
- Context->DefinedVariableTable.end())
+ if (Context->DefinedVariableTable.contains(Name))
return ErrorDiagnostic::get(
SM, Name, "string variable with name '" + Name + "' already exists");
@@ -1072,8 +948,7 @@ bool Pattern::parsePattern(StringRef PatternStr, StringRef Prefix,
// Detect collisions between string and numeric variables when the
// former is created later than the latter.
- if (Context->GlobalNumericVariableTable.find(Name) !=
- Context->GlobalNumericVariableTable.end()) {
+ if (Context->GlobalNumericVariableTable.contains(Name)) {
SM.PrintMessage(
SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error,
"numeric variable with name '" + Name + "' already exists");
@@ -2753,8 +2628,7 @@ Error FileCheckPatternContext::defineCmdlineVariables(
// Detect collisions between string and numeric variables when the former
// is created later than the latter.
- if (GlobalNumericVariableTable.find(Name) !=
- GlobalNumericVariableTable.end()) {
+ if (GlobalNumericVariableTable.contains(Name)) {
Errs = joinErrors(std::move(Errs),
ErrorDiagnostic::get(SM, Name,
"numeric variable with name '" +
diff --git a/llvm/lib/FileCheck/FileCheckImpl.h b/llvm/lib/FileCheck/FileCheckImpl.h
index fd3568e7a5b0..10fe8d46ffac 100644
--- a/llvm/lib/FileCheck/FileCheckImpl.h
+++ b/llvm/lib/FileCheck/FileCheckImpl.h
@@ -15,6 +15,7 @@
#ifndef LLVM_LIB_FILECHECK_FILECHECKIMPL_H
#define LLVM_LIB_FILECHECK_FILECHECKIMPL_H
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/FileCheck/FileCheck.h"
@@ -120,38 +121,14 @@ public:
/// Class representing a numeric value.
class ExpressionValue {
private:
- uint64_t Value;
- bool Negative;
+ APInt Value;
public:
+ // Store signed and unsigned 64-bit integers in a signed 65-bit APInt.
template <class T>
- explicit ExpressionValue(T Val) : Value(Val), Negative(Val < 0) {}
+ explicit ExpressionValue(T Val) : Value(65, Val, /*isSigned=*/Val < 0) {}
- bool operator==(const ExpressionValue &Other) const {
- return Value == Other.Value && isNegative() == Other.isNegative();
- }
-
- bool operator!=(const ExpressionValue &Other) const {
- return !(*this == Other);
- }
-
- /// Returns true if value is signed and negative, false otherwise.
- bool isNegative() const {
- assert((Value != 0 || !Negative) && "Unexpected negative zero!");
- return Negative;
- }
-
- /// \returns the value as a signed integer or an error if the value is out of
- /// range.
- Expected<int64_t> getSignedValue() const;
-
- /// \returns the value as an unsigned integer or an error if the value is out
- /// of range.
- Expected<uint64_t> getUnsignedValue() const;
-
- /// \returns an unsigned ExpressionValue instance whose value is the absolute
- /// value to this object's value.
- ExpressionValue getAbsolute() const;
+ APInt getAPIntValue() const { return Value; }
};
/// Performs operation and \returns its result or an error in case of failure,
@@ -269,7 +246,7 @@ private:
std::optional<ExpressionValue> Value;
/// The input buffer's string from which Value was parsed, or std::nullopt.
- /// See comments on getStringValue for a discussion of the None case.
+ /// See comments on getStringValue for a discussion of the std::nullopt case.
std::optional<StringRef> StrValue;
/// Line number where this variable is defined, or std::nullopt if defined
@@ -280,7 +257,7 @@ private:
public:
/// Constructor for a variable \p Name with implicit format \p ImplicitFormat
/// defined at line \p DefLineNumber or defined before input is parsed if
- /// \p DefLineNumber is None.
+ /// \p DefLineNumber is std::nullopt.
explicit NumericVariable(StringRef Name, ExpressionFormat ImplicitFormat,
std::optional<size_t> DefLineNumber = std::nullopt)
: Name(Name), ImplicitFormat(ImplicitFormat),
@@ -304,7 +281,7 @@ public:
/// Sets value of this numeric variable to \p NewValue, and sets the input
/// buffer string from which it was parsed to \p NewStrValue. See comments on
- /// getStringValue for a discussion of when the latter can be None.
+ /// getStringValue for a discussion of when the latter can be std::nullopt.
void setValue(ExpressionValue NewValue,
std::optional<StringRef> NewStrValue = std::nullopt) {
Value = NewValue;
diff --git a/llvm/lib/Frontend/OpenMP/OMPContext.cpp b/llvm/lib/Frontend/OpenMP/OMPContext.cpp
index 50ca01d34e20..e870c5aa2ba6 100644
--- a/llvm/lib/Frontend/OpenMP/OMPContext.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPContext.cpp
@@ -15,9 +15,9 @@
#include "llvm/Frontend/OpenMP/OMPContext.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#define DEBUG_TYPE "openmp-ir-builder"
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 8a4ed30628dc..4c3696f9c342 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -21,6 +21,8 @@
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -32,6 +34,7 @@
#include "llvm/IR/Value.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -328,6 +331,35 @@ BasicBlock *llvm::splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch,
return splitBB(Builder, CreateBranch, Old->getName() + Suffix);
}
+void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
+ IRBuilderBase &Builder,
+ SmallVector<Value *> &ArgsVector) {
+ Value *Version = Builder.getInt32(OMP_KERNEL_ARG_VERSION);
+ Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
+ auto Int32Ty = Type::getInt32Ty(Builder.getContext());
+ Value *ZeroArray = Constant::getNullValue(ArrayType::get(Int32Ty, 3));
+ Value *Flags = Builder.getInt64(KernelArgs.HasNoWait);
+
+ Value *NumTeams3D =
+ Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams, {0});
+ Value *NumThreads3D =
+ Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads, {0});
+
+ ArgsVector = {Version,
+ PointerNum,
+ KernelArgs.RTArgs.BasePointersArray,
+ KernelArgs.RTArgs.PointersArray,
+ KernelArgs.RTArgs.SizesArray,
+ KernelArgs.RTArgs.MapTypesArray,
+ KernelArgs.RTArgs.MapNamesArray,
+ KernelArgs.RTArgs.MappersArray,
+ KernelArgs.NumIterations,
+ Flags,
+ NumTeams3D,
+ NumThreads3D,
+ KernelArgs.DynCGGroupMem};
+}
+
void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
LLVMContext &Ctx = Fn.getContext();
Triple T(M.getTargetTriple());
@@ -433,9 +465,7 @@ OpenMPIRBuilder::getOrCreateRuntimeFunction(Module &M, RuntimeFunction FnID) {
assert(Fn && "Failed to create OpenMP runtime function");
- // Cast the function to the expected type if necessary
- Constant *C = ConstantExpr::getBitCast(Fn, FnTy->getPointerTo());
- return {FnTy, C};
+ return {FnTy, Fn};
}
Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) {
@@ -445,7 +475,31 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) {
return Fn;
}
-void OpenMPIRBuilder::initialize() { initializeTypes(M); }
+void OpenMPIRBuilder::initialize(StringRef HostFilePath) {
+ initializeTypes(M);
+
+ if (HostFilePath.empty())
+ return;
+
+ auto Buf = MemoryBuffer::getFile(HostFilePath);
+ if (std::error_code Err = Buf.getError()) {
+ report_fatal_error(("error opening host file from host file path inside of "
+ "OpenMPIRBuilder: " +
+ Err.message())
+ .c_str());
+ }
+
+ LLVMContext Ctx;
+ auto M = expectedToErrorOrAndEmitErrors(
+ Ctx, parseBitcodeFile(Buf.get()->getMemBufferRef(), Ctx));
+ if (std::error_code Err = M.getError()) {
+ report_fatal_error(
+ ("error parsing host file inside of OpenMPIRBuilder: " + Err.message())
+ .c_str());
+ }
+
+ loadOffloadInfoMetadata(*M.get());
+}
void OpenMPIRBuilder::finalize(Function *Fn) {
SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
@@ -534,6 +588,17 @@ void OpenMPIRBuilder::finalize(Function *Fn) {
// Remove work items that have been completed.
OutlineInfos = std::move(DeferredOutlines);
+
+ EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
+ [](EmitMetadataErrorKind Kind,
+ const TargetRegionEntryInfo &EntryInfo) -> void {
+ errs() << "Error of kind: " << Kind
+ << " when emitting offload entries and metadata during "
+ "OMPIRBuilder finalization \n";
+ };
+
+ if (!OffloadInfoManager.empty())
+ createOffloadEntriesAndInfoMetadata(ErrorReportFn);
}
OpenMPIRBuilder::~OpenMPIRBuilder() {
@@ -571,7 +636,7 @@ Constant *OpenMPIRBuilder::getOrCreateIdent(Constant *SrcLocStr,
// Look for existing encoding of the location + flags, not needed but
// minimizes the difference to the existing solution while we transition.
- for (GlobalVariable &GV : M.getGlobalList())
+ for (GlobalVariable &GV : M.globals())
if (GV.getValueType() == OpenMPIRBuilder::Ident && GV.hasInitializer())
if (GV.getInitializer() == Initializer)
Ident = &GV;
@@ -601,7 +666,7 @@ Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr,
// Look for existing encoding of the location, not needed but minimizes the
// difference to the existing solution while we transition.
- for (GlobalVariable &GV : M.getGlobalList())
+ for (GlobalVariable &GV : M.globals())
if (GV.isConstant() && GV.hasInitializer() &&
GV.getInitializer() == Initializer)
return SrcLocStr = ConstantExpr::getPointerCast(&GV, Int8Ptr);
@@ -813,14 +878,17 @@ void OpenMPIRBuilder::emitOffloadingEntry(Constant *Addr, StringRef Name,
}
OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
- const LocationDescription &Loc, Value *&Return, Value *Ident,
- Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr,
- ArrayRef<Value *> KernelArgs) {
+ const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return,
+ Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads,
+ Value *HostPtr, ArrayRef<Value *> KernelArgs) {
if (!updateToLocation(Loc))
return Loc.IP;
+ Builder.restoreIP(AllocaIP);
auto *KernelArgsPtr =
Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs, nullptr, "kernel_args");
+ Builder.restoreIP(Loc.IP);
+
for (unsigned I = 0, Size = KernelArgs.size(); I != Size; ++I) {
llvm::Value *Arg =
Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr, I);
@@ -839,6 +907,67 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
return Builder.saveIP();
}
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitKernelLaunch(
+ const LocationDescription &Loc, Function *OutlinedFn, Value *OutlinedFnID,
+ EmitFallbackCallbackTy emitTargetCallFallbackCB, TargetKernelArgs &Args,
+ Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP) {
+
+ if (!updateToLocation(Loc))
+ return Loc.IP;
+
+ Builder.restoreIP(Loc.IP);
+ // On top of the arrays that were filled up, the target offloading call
+ // takes as arguments the device id as well as the host pointer. The host
+ // pointer is used by the runtime library to identify the current target
+ // region, so it only has to be unique and not necessarily point to
+ // anything. It could be the pointer to the outlined function that
+ // implements the target region, but we aren't using that so that the
+ // compiler doesn't need to keep that, and could therefore inline the host
+ // function if proven worthwhile during optimization.
+
+ // From this point on, we need to have an ID of the target region defined.
+ assert(OutlinedFnID && "Invalid outlined function ID!");
+ (void)OutlinedFnID;
+
+ // Return value of the runtime offloading call.
+ Value *Return;
+
+ // Arguments for the target kernel.
+ SmallVector<Value *> ArgsVector;
+ getKernelArgsVector(Args, Builder, ArgsVector);
+
+ // The target region is an outlined function launched by the runtime
+ // via calls to __tgt_target_kernel().
+ //
+ // Note that on the host and CPU targets, the runtime implementation of
+ // these calls simply call the outlined function without forking threads.
+ // The outlined functions themselves have runtime calls to
+ // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
+ // the compiler in emitTeamsCall() and emitParallelCall().
+ //
+ // In contrast, on the NVPTX target, the implementation of
+ // __tgt_target_teams() launches a GPU kernel with the requested number
+ // of teams and threads so no additional calls to the runtime are required.
+ // Check the error code and execute the host version if required.
+ Builder.restoreIP(emitTargetKernel(Builder, AllocaIP, Return, RTLoc, DeviceID,
+ Args.NumTeams, Args.NumThreads,
+ OutlinedFnID, ArgsVector));
+
+ BasicBlock *OffloadFailedBlock =
+ BasicBlock::Create(Builder.getContext(), "omp_offload.failed");
+ BasicBlock *OffloadContBlock =
+ BasicBlock::Create(Builder.getContext(), "omp_offload.cont");
+ Value *Failed = Builder.CreateIsNotNull(Return);
+ Builder.CreateCondBr(Failed, OffloadFailedBlock, OffloadContBlock);
+
+ auto CurFn = Builder.GetInsertBlock()->getParent();
+ emitBlock(OffloadFailedBlock, CurFn);
+ Builder.restoreIP(emitTargetCallFallbackCB(Builder.saveIP()));
+ emitBranch(OffloadContBlock);
+ emitBlock(OffloadContBlock, CurFn, /*IsFinished=*/true);
+ return Builder.saveIP();
+}
+
void OpenMPIRBuilder::emitCancelationCheckImpl(Value *CancelFlag,
omp::Directive CanceledDirective,
FinalizeCallbackTy ExitCB) {
@@ -1402,12 +1531,6 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
(Twine(OutlinedFn.getName()) + ".wrapper").str(),
FunctionType::get(Builder.getInt32Ty(), WrapperArgTys, false));
Function *WrapperFunc = dyn_cast<Function>(WrapperFuncVal.getCallee());
- PointerType *WrapperFuncBitcastType =
- FunctionType::get(Builder.getInt32Ty(),
- {Builder.getInt32Ty(), Builder.getInt8PtrTy()}, false)
- ->getPointerTo();
- Value *WrapperFuncBitcast =
- ConstantExpr::getBitCast(WrapperFunc, WrapperFuncBitcastType);
// Emit the @__kmpc_omp_task_alloc runtime call
// The runtime call returns a pointer to an area where the task captured
@@ -1416,7 +1539,7 @@ OpenMPIRBuilder::createTask(const LocationDescription &Loc,
TaskAllocFn,
{/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags,
/*sizeof_task=*/TaskSize, /*sizeof_shared=*/Builder.getInt64(0),
- /*task_func=*/WrapperFuncBitcast});
+ /*task_func=*/WrapperFunc});
// Copy the arguments for outlined function
if (HasTaskData) {
@@ -1851,10 +1974,9 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createReductions(
BasicBlock *ReductionFuncBlock =
BasicBlock::Create(Module->getContext(), "", ReductionFunc);
Builder.SetInsertPoint(ReductionFuncBlock);
- Value *LHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(0),
- RedArrayTy->getPointerTo());
- Value *RHSArrayPtr = Builder.CreateBitCast(ReductionFunc->getArg(1),
- RedArrayTy->getPointerTo());
+ Value *LHSArrayPtr = ReductionFunc->getArg(0);
+ Value *RHSArrayPtr = ReductionFunc->getArg(1);
+
for (auto En : enumerate(ReductionInfos)) {
const ReductionInfo &RI = En.value();
Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
@@ -2081,8 +2203,7 @@ CanonicalLoopInfo *OpenMPIRBuilder::createCanonicalLoop(
// Avoid incrementing past stop since it could overflow.
Value *CountIfTwo = Builder.CreateAdd(
Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
- Value *OneCmp = Builder.CreateICmp(
- InclusiveStop ? CmpInst::ICMP_ULT : CmpInst::ICMP_ULE, Span, Incr);
+ Value *OneCmp = Builder.CreateICmp(CmpInst::ICMP_ULE, Span, Incr);
CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
}
Value *TripCount = Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
@@ -2381,7 +2502,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoop(
case OMPScheduleType::BaseRuntimeSimd:
assert(!ChunkSize &&
"schedule type does not support user-defined chunk sizes");
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case OMPScheduleType::BaseDynamicChunked:
case OMPScheduleType::BaseGuidedChunked:
case OMPScheduleType::BaseGuidedIterativeChunked:
@@ -3053,6 +3174,23 @@ void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
Builder.CreateBr(NewBlocks.front());
}
+unsigned
+OpenMPIRBuilder::getOpenMPDefaultSimdAlign(const Triple &TargetTriple,
+ const StringMap<bool> &Features) {
+ if (TargetTriple.isX86()) {
+ if (Features.lookup("avx512f"))
+ return 512;
+ else if (Features.lookup("avx"))
+ return 256;
+ return 128;
+ }
+ if (TargetTriple.isPPC())
+ return 128;
+ if (TargetTriple.isWasm())
+ return 128;
+ return 0;
+}
+
void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
MapVector<Value *, Value *> AlignedVars,
Value *IfCond, OrderKind Order,
@@ -3778,7 +3916,7 @@ CallInst *OpenMPIRBuilder::createOMPInteropInit(
Device = ConstantInt::get(Int32, -1);
Constant *InteropTypeVal = ConstantInt::get(Int32, (int)InteropType);
if (NumDependences == nullptr) {
- NumDependences = ConstantInt::get(Int64, 0);
+ NumDependences = ConstantInt::get(Int32, 0);
PointerType *PointerTypeVar = Type::getInt8PtrTy(M.getContext());
DependenceAddress = ConstantPointerNull::get(PointerTypeVar);
}
@@ -3938,7 +4076,7 @@ void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc,
void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
Function *OutlinedFn, int32_t NumTeams, int32_t NumThreads) {
- if (Config.isEmbedded()) {
+ if (Config.isTargetDevice()) {
OutlinedFn->setLinkage(GlobalValue::WeakODRLinkage);
// TODO: Determine if DSO local can be set to true.
OutlinedFn->setDSOLocal(false);
@@ -3956,7 +4094,7 @@ void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
StringRef EntryFnIDName) {
- if (Config.isEmbedded()) {
+ if (Config.isTargetDevice()) {
assert(OutlinedFn && "The outlined function must exist if embedded");
return ConstantExpr::getBitCast(OutlinedFn, Builder.getInt8PtrTy());
}
@@ -3979,15 +4117,15 @@ Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
}
void OpenMPIRBuilder::emitTargetRegionFunction(
- OffloadEntriesInfoManager &InfoManager, TargetRegionEntryInfo &EntryInfo,
+ TargetRegionEntryInfo &EntryInfo,
FunctionGenCallback &GenerateFunctionCallback, int32_t NumTeams,
int32_t NumThreads, bool IsOffloadEntry, Function *&OutlinedFn,
Constant *&OutlinedFnID) {
SmallString<64> EntryFnName;
- InfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
+ OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
- OutlinedFn = Config.isEmbedded() || !Config.openMPOffloadMandatory()
+ OutlinedFn = Config.isTargetDevice() || !Config.openMPOffloadMandatory()
? GenerateFunctionCallback(EntryFnName)
: nullptr;
@@ -3998,29 +4136,256 @@ void OpenMPIRBuilder::emitTargetRegionFunction(
return;
std::string EntryFnIDName =
- Config.isEmbedded()
+ Config.isTargetDevice()
? std::string(EntryFnName)
: createPlatformSpecificName({EntryFnName, "region_id"});
OutlinedFnID = registerTargetRegionFunction(
- InfoManager, EntryInfo, OutlinedFn, EntryFnName, EntryFnIDName, NumTeams,
- NumThreads);
+ EntryInfo, OutlinedFn, EntryFnName, EntryFnIDName, NumTeams, NumThreads);
}
Constant *OpenMPIRBuilder::registerTargetRegionFunction(
- OffloadEntriesInfoManager &InfoManager, TargetRegionEntryInfo &EntryInfo,
- Function *OutlinedFn, StringRef EntryFnName, StringRef EntryFnIDName,
- int32_t NumTeams, int32_t NumThreads) {
+ TargetRegionEntryInfo &EntryInfo, Function *OutlinedFn,
+ StringRef EntryFnName, StringRef EntryFnIDName, int32_t NumTeams,
+ int32_t NumThreads) {
if (OutlinedFn)
setOutlinedTargetRegionFunctionAttributes(OutlinedFn, NumTeams, NumThreads);
auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
- InfoManager.registerTargetRegionEntryInfo(
+ OffloadInfoManager.registerTargetRegionEntryInfo(
EntryInfo, EntryAddr, OutlinedFnID,
OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
return OutlinedFnID;
}
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData(
+ const LocationDescription &Loc, InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond,
+ TargetDataInfo &Info,
+ function_ref<MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCB,
+ omp::RuntimeFunction *MapperFunc,
+ function_ref<InsertPointTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)>
+ BodyGenCB,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+ function_ref<Value *(unsigned int)> CustomMapperCB, Value *SrcLocInfo) {
+ if (!updateToLocation(Loc))
+ return InsertPointTy();
+
+ Builder.restoreIP(CodeGenIP);
+ bool IsStandAlone = !BodyGenCB;
+ MapInfosTy *MapInfo;
+ // Generate the code for the opening of the data environment. Capture all the
+ // arguments of the runtime call by reference because they are used in the
+ // closing of the region.
+ auto BeginThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ MapInfo = &GenMapInfoCB(Builder.saveIP());
+ emitOffloadingArrays(AllocaIP, Builder.saveIP(), *MapInfo, Info,
+ /*IsNonContiguous=*/true, DeviceAddrCB,
+ CustomMapperCB);
+
+ TargetDataRTArgs RTArgs;
+ emitOffloadingArraysArgument(Builder, RTArgs, Info,
+ !MapInfo->Names.empty());
+
+ // Emit the number of elements in the offloading arrays.
+ Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
+
+ // Source location for the ident struct
+ if (!SrcLocInfo) {
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+ SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ }
+
+ Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
+ PointerNum, RTArgs.BasePointersArray,
+ RTArgs.PointersArray, RTArgs.SizesArray,
+ RTArgs.MapTypesArray, RTArgs.MapNamesArray,
+ RTArgs.MappersArray};
+
+ if (IsStandAlone) {
+ assert(MapperFunc && "MapperFunc missing for standalone target data");
+ Builder.CreateCall(getOrCreateRuntimeFunctionPtr(*MapperFunc),
+ OffloadingArgs);
+ } else {
+ Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
+ omp::OMPRTL___tgt_target_data_begin_mapper);
+
+ Builder.CreateCall(BeginMapperFunc, OffloadingArgs);
+
+ for (auto DeviceMap : Info.DevicePtrInfoMap) {
+ if (isa<AllocaInst>(DeviceMap.second.second)) {
+ auto *LI =
+ Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
+ Builder.CreateStore(LI, DeviceMap.second.second);
+ }
+ }
+
+ // If device pointer privatization is required, emit the body of the
+ // region here. It will have to be duplicated: with and without
+ // privatization.
+ Builder.restoreIP(BodyGenCB(Builder.saveIP(), BodyGenTy::Priv));
+ }
+ };
+
+ // If we need device pointer privatization, we need to emit the body of the
+ // region with no privatization in the 'else' branch of the conditional.
+ // Otherwise, we don't have to do anything.
+ auto BeginElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ Builder.restoreIP(BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv));
+ };
+
+ // Generate code for the closing of the data region.
+ auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
+ TargetDataRTArgs RTArgs;
+ emitOffloadingArraysArgument(Builder, RTArgs, Info, !MapInfo->Names.empty(),
+ /*ForEndCall=*/true);
+
+ // Emit the number of elements in the offloading arrays.
+ Value *PointerNum = Builder.getInt32(Info.NumberOfPtrs);
+
+ // Source location for the ident struct
+ if (!SrcLocInfo) {
+ uint32_t SrcLocStrSize;
+ Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+ SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+ }
+
+ Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
+ PointerNum, RTArgs.BasePointersArray,
+ RTArgs.PointersArray, RTArgs.SizesArray,
+ RTArgs.MapTypesArray, RTArgs.MapNamesArray,
+ RTArgs.MappersArray};
+ Function *EndMapperFunc =
+ getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
+
+ Builder.CreateCall(EndMapperFunc, OffloadingArgs);
+ };
+
+ // We don't have to do anything to close the region if the if clause evaluates
+ // to false.
+ auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {};
+
+ if (BodyGenCB) {
+ if (IfCond) {
+ emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
+ } else {
+ BeginThenGen(AllocaIP, Builder.saveIP());
+ }
+
+ // If we don't require privatization of device pointers, we emit the body in
+ // between the runtime calls. This avoids duplicating the body code.
+ Builder.restoreIP(BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv));
+
+ if (IfCond) {
+ emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
+ } else {
+ EndThenGen(AllocaIP, Builder.saveIP());
+ }
+ } else {
+ if (IfCond) {
+ emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
+ } else {
+ BeginThenGen(AllocaIP, Builder.saveIP());
+ }
+ }
+
+ return Builder.saveIP();
+}
+
+static Function *
+createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+ StringRef FuncName, SmallVectorImpl<Value *> &Inputs,
+ OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc) {
+ SmallVector<Type *> ParameterTypes;
+ for (auto &Arg : Inputs)
+ ParameterTypes.push_back(Arg->getType());
+
+ auto FuncType = FunctionType::get(Builder.getVoidTy(), ParameterTypes,
+ /*isVarArg*/ false);
+ auto Func = Function::Create(FuncType, GlobalValue::InternalLinkage, FuncName,
+ Builder.GetInsertBlock()->getModule());
+
+ // Save insert point.
+ auto OldInsertPoint = Builder.saveIP();
+
+ // Generate the region into the function.
+ BasicBlock *EntryBB = BasicBlock::Create(Builder.getContext(), "entry", Func);
+ Builder.SetInsertPoint(EntryBB);
+
+ // Insert target init call in the device compilation pass.
+ if (OMPBuilder.Config.isTargetDevice())
+ Builder.restoreIP(OMPBuilder.createTargetInit(Builder, /*IsSPMD*/ false));
+
+ Builder.restoreIP(CBFunc(Builder.saveIP(), Builder.saveIP()));
+
+ // Insert target deinit call in the device compilation pass.
+ if (OMPBuilder.Config.isTargetDevice())
+ OMPBuilder.createTargetDeinit(Builder, /*IsSPMD*/ false);
+
+ // Insert return instruction.
+ Builder.CreateRetVoid();
+
+ // Rewrite uses of input valus to parameters.
+ for (auto InArg : zip(Inputs, Func->args())) {
+ Value *Input = std::get<0>(InArg);
+ Argument &Arg = std::get<1>(InArg);
+
+ // Collect all the instructions
+ for (User *User : make_early_inc_range(Input->users()))
+ if (auto Instr = dyn_cast<Instruction>(User))
+ if (Instr->getFunction() == Func)
+ Instr->replaceUsesOfWith(Input, &Arg);
+ }
+
+ // Restore insert point.
+ Builder.restoreIP(OldInsertPoint);
+
+ return Func;
+}
+
+static void
+emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder,
+ TargetRegionEntryInfo &EntryInfo,
+ Function *&OutlinedFn, int32_t NumTeams,
+ int32_t NumThreads, SmallVectorImpl<Value *> &Inputs,
+ OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc) {
+
+ OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
+ [&OMPBuilder, &Builder, &Inputs, &CBFunc](StringRef EntryFnName) {
+ return createOutlinedFunction(OMPBuilder, Builder, EntryFnName, Inputs,
+ CBFunc);
+ };
+
+ Constant *OutlinedFnID;
+ OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
+ NumTeams, NumThreads, true, OutlinedFn,
+ OutlinedFnID);
+}
+
+static void emitTargetCall(IRBuilderBase &Builder, Function *OutlinedFn,
+ SmallVectorImpl<Value *> &Args) {
+ // TODO: Add kernel launch call
+ Builder.CreateCall(OutlinedFn, Args);
+}
+
+OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTarget(
+ const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy CodeGenIP,
+ TargetRegionEntryInfo &EntryInfo, int32_t NumTeams, int32_t NumThreads,
+ SmallVectorImpl<Value *> &Args, TargetBodyGenCallbackTy CBFunc) {
+ if (!updateToLocation(Loc))
+ return InsertPointTy();
+
+ Builder.restoreIP(CodeGenIP);
+
+ Function *OutlinedFn;
+ emitTargetOutlinedFunction(*this, Builder, EntryInfo, OutlinedFn, NumTeams,
+ NumThreads, Args, CBFunc);
+ if (!Config.isTargetDevice())
+ emitTargetCall(Builder, OutlinedFn, Args);
+ return Builder.saveIP();
+}
+
std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef<StringRef> Parts,
StringRef FirstSeparator,
StringRef Separator) {
@@ -4045,21 +4410,22 @@ OpenMPIRBuilder::getOrCreateInternalVariable(Type *Ty, const StringRef &Name,
unsigned AddressSpace) {
auto &Elem = *InternalVars.try_emplace(Name, nullptr).first;
if (Elem.second) {
- assert(cast<PointerType>(Elem.second->getType())
- ->isOpaqueOrPointeeTypeMatches(Ty) &&
+ assert(Elem.second->getValueType() == Ty &&
"OMP internal variable has different type than requested");
} else {
// TODO: investigate the appropriate linkage type used for the global
// variable for possibly changing that to internal or private, or maybe
// create different versions of the function for different OMP internal
// variables.
- Elem.second = new GlobalVariable(
+ auto *GV = new GlobalVariable(
M, Ty, /*IsConstant=*/false, GlobalValue::CommonLinkage,
Constant::getNullValue(Ty), Elem.first(),
/*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal, AddressSpace);
+ GV->setAlignment(M.getDataLayout().getABITypeAlign(Ty));
+ Elem.second = GV;
}
- return cast<GlobalVariable>(&*Elem.second);
+ return Elem.second;
}
Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
@@ -4068,6 +4434,16 @@ Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) {
return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
}
+Value *OpenMPIRBuilder::getSizeInBytes(Value *BasePtr) {
+ LLVMContext &Ctx = Builder.getContext();
+ Value *Null =
+ Constant::getNullValue(PointerType::getUnqual(BasePtr->getContext()));
+ Value *SizeGep =
+ Builder.CreateGEP(BasePtr->getType(), Null, Builder.getInt32(1));
+ Value *SizePtrToInt = Builder.CreatePtrToInt(SizeGep, Type::getInt64Ty(Ctx));
+ return SizePtrToInt;
+}
+
GlobalVariable *
OpenMPIRBuilder::createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
std::string VarName) {
@@ -4091,9 +4467,12 @@ void OpenMPIRBuilder::createMapperAllocas(const LocationDescription &Loc,
auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands);
auto *ArrI64Ty = ArrayType::get(Int64, NumOperands);
Builder.restoreIP(AllocaIP);
- AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy);
- AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy);
- AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty);
+ AllocaInst *ArgsBase = Builder.CreateAlloca(
+ ArrI8PtrTy, /* ArraySize = */ nullptr, ".offload_baseptrs");
+ AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy, /* ArraySize = */ nullptr,
+ ".offload_ptrs");
+ AllocaInst *ArgSizes = Builder.CreateAlloca(
+ ArrI64Ty, /* ArraySize = */ nullptr, ".offload_sizes");
Builder.restoreIP(Loc.IP);
MapperAllocas.ArgsBase = ArgsBase;
MapperAllocas.Args = Args;
@@ -4119,7 +4498,8 @@ void OpenMPIRBuilder::emitMapperCall(const LocationDescription &Loc,
Value *ArgSizesGEP =
Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
{Builder.getInt32(0), Builder.getInt32(0)});
- Value *NullPtr = Constant::getNullValue(Int8Ptr->getPointerTo());
+ Value *NullPtr =
+ Constant::getNullValue(PointerType::getUnqual(Int8Ptr->getContext()));
Builder.CreateCall(MapperFunc,
{SrcLocInfo, Builder.getInt64(DeviceID),
Builder.getInt32(NumOperands), ArgsBaseGEP, ArgsGEP,
@@ -4184,6 +4564,342 @@ void OpenMPIRBuilder::emitOffloadingArraysArgument(IRBuilderBase &Builder,
Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
}
+void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
+ InsertPointTy CodeGenIP,
+ MapInfosTy &CombinedInfo,
+ TargetDataInfo &Info) {
+ MapInfosTy::StructNonContiguousInfo &NonContigInfo =
+ CombinedInfo.NonContigInfo;
+
+ // Build an array of struct descriptor_dim and then assign it to
+ // offload_args.
+ //
+ // struct descriptor_dim {
+ // uint64_t offset;
+ // uint64_t count;
+ // uint64_t stride
+ // };
+ Type *Int64Ty = Builder.getInt64Ty();
+ StructType *DimTy = StructType::create(
+ M.getContext(), ArrayRef<Type *>({Int64Ty, Int64Ty, Int64Ty}),
+ "struct.descriptor_dim");
+
+ enum { OffsetFD = 0, CountFD, StrideFD };
+ // We need two index variable here since the size of "Dims" is the same as
+ // the size of Components, however, the size of offset, count, and stride is
+ // equal to the size of base declaration that is non-contiguous.
+ for (unsigned I = 0, L = 0, E = NonContigInfo.Dims.size(); I < E; ++I) {
+ // Skip emitting ir if dimension size is 1 since it cannot be
+ // non-contiguous.
+ if (NonContigInfo.Dims[I] == 1)
+ continue;
+ Builder.restoreIP(AllocaIP);
+ ArrayType *ArrayTy = ArrayType::get(DimTy, NonContigInfo.Dims[I]);
+ AllocaInst *DimsAddr =
+ Builder.CreateAlloca(ArrayTy, /* ArraySize = */ nullptr, "dims");
+ Builder.restoreIP(CodeGenIP);
+ for (unsigned II = 0, EE = NonContigInfo.Dims[I]; II < EE; ++II) {
+ unsigned RevIdx = EE - II - 1;
+ Value *DimsLVal = Builder.CreateInBoundsGEP(
+ DimsAddr->getAllocatedType(), DimsAddr,
+ {Builder.getInt64(0), Builder.getInt64(II)});
+ // Offset
+ Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
+ Builder.CreateAlignedStore(
+ NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
+ M.getDataLayout().getPrefTypeAlign(OffsetLVal->getType()));
+ // Count
+ Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
+ Builder.CreateAlignedStore(
+ NonContigInfo.Counts[L][RevIdx], CountLVal,
+ M.getDataLayout().getPrefTypeAlign(CountLVal->getType()));
+ // Stride
+ Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
+ Builder.CreateAlignedStore(
+ NonContigInfo.Strides[L][RevIdx], StrideLVal,
+ M.getDataLayout().getPrefTypeAlign(CountLVal->getType()));
+ }
+ // args[I] = &dims
+ Builder.restoreIP(CodeGenIP);
+ Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+ DimsAddr, Builder.getInt8PtrTy());
+ Value *P = Builder.CreateConstInBoundsGEP2_32(
+ ArrayType::get(Builder.getInt8PtrTy(), Info.NumberOfPtrs),
+ Info.RTArgs.PointersArray, 0, I);
+ Builder.CreateAlignedStore(
+ DAddr, P, M.getDataLayout().getPrefTypeAlign(Builder.getInt8PtrTy()));
+ ++L;
+ }
+}
+
+void OpenMPIRBuilder::emitOffloadingArrays(
+ InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
+ TargetDataInfo &Info, bool IsNonContiguous,
+ function_ref<void(unsigned int, Value *)> DeviceAddrCB,
+ function_ref<Value *(unsigned int)> CustomMapperCB) {
+
+ // Reset the array information.
+ Info.clearArrayInfo();
+ Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
+
+ if (Info.NumberOfPtrs == 0)
+ return;
+
+ Builder.restoreIP(AllocaIP);
+ // Detect if we have any capture size requiring runtime evaluation of the
+ // size so that a constant array could be eventually used.
+ ArrayType *PointerArrayType =
+ ArrayType::get(Builder.getInt8PtrTy(), Info.NumberOfPtrs);
+
+ Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
+ PointerArrayType, /* ArraySize = */ nullptr, ".offload_baseptrs");
+
+ Info.RTArgs.PointersArray = Builder.CreateAlloca(
+ PointerArrayType, /* ArraySize = */ nullptr, ".offload_ptrs");
+ AllocaInst *MappersArray = Builder.CreateAlloca(
+ PointerArrayType, /* ArraySize = */ nullptr, ".offload_mappers");
+ Info.RTArgs.MappersArray = MappersArray;
+
+ // If we don't have any VLA types or other types that require runtime
+ // evaluation, we can use a constant array for the map sizes, otherwise we
+ // need to fill up the arrays as we do for the pointers.
+ Type *Int64Ty = Builder.getInt64Ty();
+ SmallVector<Constant *> ConstSizes(CombinedInfo.Sizes.size(),
+ ConstantInt::get(Builder.getInt64Ty(), 0));
+ SmallBitVector RuntimeSizes(CombinedInfo.Sizes.size());
+ for (unsigned I = 0, E = CombinedInfo.Sizes.size(); I < E; ++I) {
+ if (auto *CI = dyn_cast<Constant>(CombinedInfo.Sizes[I])) {
+ if (!isa<ConstantExpr>(CI) && !isa<GlobalValue>(CI)) {
+ if (IsNonContiguous &&
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ CombinedInfo.Types[I] &
+ OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
+ ConstSizes[I] = ConstantInt::get(Builder.getInt64Ty(),
+ CombinedInfo.NonContigInfo.Dims[I]);
+ else
+ ConstSizes[I] = CI;
+ continue;
+ }
+ }
+ RuntimeSizes.set(I);
+ }
+
+ if (RuntimeSizes.all()) {
+ ArrayType *SizeArrayType = ArrayType::get(Int64Ty, Info.NumberOfPtrs);
+ Info.RTArgs.SizesArray = Builder.CreateAlloca(
+ SizeArrayType, /* ArraySize = */ nullptr, ".offload_sizes");
+ Builder.restoreIP(CodeGenIP);
+ } else {
+ auto *SizesArrayInit = ConstantArray::get(
+ ArrayType::get(Int64Ty, ConstSizes.size()), ConstSizes);
+ std::string Name = createPlatformSpecificName({"offload_sizes"});
+ auto *SizesArrayGbl =
+ new GlobalVariable(M, SizesArrayInit->getType(), /*isConstant=*/true,
+ GlobalValue::PrivateLinkage, SizesArrayInit, Name);
+ SizesArrayGbl->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+
+ if (!RuntimeSizes.any()) {
+ Info.RTArgs.SizesArray = SizesArrayGbl;
+ } else {
+ unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
+ Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
+ ArrayType *SizeArrayType = ArrayType::get(Int64Ty, Info.NumberOfPtrs);
+ AllocaInst *Buffer = Builder.CreateAlloca(
+ SizeArrayType, /* ArraySize = */ nullptr, ".offload_sizes");
+ Buffer->setAlignment(OffloadSizeAlign);
+ Builder.restoreIP(CodeGenIP);
+ Value *GblConstPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
+ SizesArrayGbl, Int64Ty->getPointerTo());
+ Builder.CreateMemCpy(
+ Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->getType()),
+ GblConstPtr, OffloadSizeAlign,
+ Builder.getIntN(
+ IndexSize,
+ Buffer->getAllocationSize(M.getDataLayout())->getFixedValue()));
+
+ Info.RTArgs.SizesArray = Buffer;
+ }
+ Builder.restoreIP(CodeGenIP);
+ }
+
+ // The map types are always constant so we don't need to generate code to
+ // fill arrays. Instead, we create an array constant.
+ SmallVector<uint64_t, 4> Mapping;
+ for (auto mapFlag : CombinedInfo.Types)
+ Mapping.push_back(
+ static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ mapFlag));
+ std::string MaptypesName = createPlatformSpecificName({"offload_maptypes"});
+ auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
+ Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
+
+ // The information types are only built if provided.
+ if (!CombinedInfo.Names.empty()) {
+ std::string MapnamesName = createPlatformSpecificName({"offload_mapnames"});
+ auto *MapNamesArrayGbl =
+ createOffloadMapnames(CombinedInfo.Names, MapnamesName);
+ Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
+ } else {
+ Info.RTArgs.MapNamesArray = Constant::getNullValue(
+ Type::getInt8Ty(Builder.getContext())->getPointerTo());
+ }
+
+ // If there's a present map type modifier, it must not be applied to the end
+ // of a region, so generate a separate map type array in that case.
+ if (Info.separateBeginEndCalls()) {
+ bool EndMapTypesDiffer = false;
+ for (uint64_t &Type : Mapping) {
+ if (Type & static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
+ Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
+ OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
+ EndMapTypesDiffer = true;
+ }
+ }
+ if (EndMapTypesDiffer) {
+ MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
+ Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
+ }
+ }
+
+ for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
+ Value *BPVal = CombinedInfo.BasePointers[I];
+ Value *BP = Builder.CreateConstInBoundsGEP2_32(
+ ArrayType::get(Builder.getInt8PtrTy(), Info.NumberOfPtrs),
+ Info.RTArgs.BasePointersArray, 0, I);
+ BP = Builder.CreatePointerBitCastOrAddrSpaceCast(
+ BP, BPVal->getType()->getPointerTo(/*AddrSpace=*/0));
+ Builder.CreateAlignedStore(
+ BPVal, BP, M.getDataLayout().getPrefTypeAlign(Builder.getInt8PtrTy()));
+
+ if (Info.requiresDevicePointerInfo()) {
+ if (CombinedInfo.DevicePointers[I] == DeviceInfoTy::Pointer) {
+ CodeGenIP = Builder.saveIP();
+ Builder.restoreIP(AllocaIP);
+ Info.DevicePtrInfoMap[BPVal] = {
+ BP, Builder.CreateAlloca(Builder.getPtrTy())};
+ Builder.restoreIP(CodeGenIP);
+ assert(DeviceAddrCB &&
+ "DeviceAddrCB missing for DevicePtr code generation");
+ DeviceAddrCB(I, Info.DevicePtrInfoMap[BPVal].second);
+ } else if (CombinedInfo.DevicePointers[I] == DeviceInfoTy::Address) {
+ Info.DevicePtrInfoMap[BPVal] = {BP, BP};
+ assert(DeviceAddrCB &&
+ "DeviceAddrCB missing for DevicePtr code generation");
+ DeviceAddrCB(I, BP);
+ }
+ }
+
+ Value *PVal = CombinedInfo.Pointers[I];
+ Value *P = Builder.CreateConstInBoundsGEP2_32(
+ ArrayType::get(Builder.getInt8PtrTy(), Info.NumberOfPtrs),
+ Info.RTArgs.PointersArray, 0, I);
+ P = Builder.CreatePointerBitCastOrAddrSpaceCast(
+ P, PVal->getType()->getPointerTo(/*AddrSpace=*/0));
+ // TODO: Check alignment correct.
+ Builder.CreateAlignedStore(
+ PVal, P, M.getDataLayout().getPrefTypeAlign(Builder.getInt8PtrTy()));
+
+ if (RuntimeSizes.test(I)) {
+ Value *S = Builder.CreateConstInBoundsGEP2_32(
+ ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
+ /*Idx0=*/0,
+ /*Idx1=*/I);
+ Builder.CreateAlignedStore(
+ Builder.CreateIntCast(CombinedInfo.Sizes[I], Int64Ty,
+ /*isSigned=*/true),
+ S, M.getDataLayout().getPrefTypeAlign(Builder.getInt8PtrTy()));
+ }
+ // Fill up the mapper array.
+ unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
+ Value *MFunc = ConstantPointerNull::get(Builder.getInt8PtrTy());
+ if (CustomMapperCB)
+ if (Value *CustomMFunc = CustomMapperCB(I))
+ MFunc = Builder.CreatePointerCast(CustomMFunc, Builder.getInt8PtrTy());
+ Value *MAddr = Builder.CreateInBoundsGEP(
+ MappersArray->getAllocatedType(), MappersArray,
+ {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
+ Builder.CreateAlignedStore(
+ MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->getType()));
+ }
+
+ if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
+ Info.NumberOfPtrs == 0)
+ return;
+ emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo, Info);
+}
+
+void OpenMPIRBuilder::emitBranch(BasicBlock *Target) {
+ BasicBlock *CurBB = Builder.GetInsertBlock();
+
+ if (!CurBB || CurBB->getTerminator()) {
+ // If there is no insert point or the previous block is already
+ // terminated, don't touch it.
+ } else {
+ // Otherwise, create a fall-through branch.
+ Builder.CreateBr(Target);
+ }
+
+ Builder.ClearInsertionPoint();
+}
+
+void OpenMPIRBuilder::emitBlock(BasicBlock *BB, Function *CurFn,
+ bool IsFinished) {
+ BasicBlock *CurBB = Builder.GetInsertBlock();
+
+ // Fall out of the current block (if necessary).
+ emitBranch(BB);
+
+ if (IsFinished && BB->use_empty()) {
+ BB->eraseFromParent();
+ return;
+ }
+
+ // Place the block after the current block, if possible, or else at
+ // the end of the function.
+ if (CurBB && CurBB->getParent())
+ CurFn->insert(std::next(CurBB->getIterator()), BB);
+ else
+ CurFn->insert(CurFn->end(), BB);
+ Builder.SetInsertPoint(BB);
+}
+
+void OpenMPIRBuilder::emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen,
+ BodyGenCallbackTy ElseGen,
+ InsertPointTy AllocaIP) {
+ // If the condition constant folds and can be elided, try to avoid emitting
+ // the condition and the dead arm of the if/else.
+ if (auto *CI = dyn_cast<ConstantInt>(Cond)) {
+ auto CondConstant = CI->getSExtValue();
+ if (CondConstant)
+ ThenGen(AllocaIP, Builder.saveIP());
+ else
+ ElseGen(AllocaIP, Builder.saveIP());
+ return;
+ }
+
+ Function *CurFn = Builder.GetInsertBlock()->getParent();
+
+ // Otherwise, the condition did not fold, or we couldn't elide it. Just
+ // emit the conditional branch.
+ BasicBlock *ThenBlock = BasicBlock::Create(M.getContext(), "omp_if.then");
+ BasicBlock *ElseBlock = BasicBlock::Create(M.getContext(), "omp_if.else");
+ BasicBlock *ContBlock = BasicBlock::Create(M.getContext(), "omp_if.end");
+ Builder.CreateCondBr(Cond, ThenBlock, ElseBlock);
+ // Emit the 'then' code.
+ emitBlock(ThenBlock, CurFn);
+ ThenGen(AllocaIP, Builder.saveIP());
+ emitBranch(ContBlock);
+ // Emit the 'else' code if present.
+ // There is no need to emit line number for unconditional branch.
+ emitBlock(ElseBlock, CurFn);
+ ElseGen(AllocaIP, Builder.saveIP());
+ // There is no need to emit line number for unconditional branch.
+ emitBranch(ContBlock);
+ // Emit the continuation block for code after the if.
+ emitBlock(ContBlock, CurFn, /*IsFinished=*/true);
+}
+
bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
const LocationDescription &Loc, llvm::AtomicOrdering AO, AtomicKind AK) {
assert(!(AO == AtomicOrdering::NotAtomic ||
@@ -4252,8 +4968,8 @@ OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
if (!updateToLocation(Loc))
return Loc.IP;
- Type *XTy = X.Var->getType();
- assert(XTy->isPointerTy() && "OMP Atomic expects a pointer to target memory");
+ assert(X.Var->getType()->isPointerTy() &&
+ "OMP Atomic expects a pointer to target memory");
Type *XElemTy = X.ElemTy;
assert((XElemTy->isFloatingPointTy() || XElemTy->isIntegerTy() ||
XElemTy->isPointerTy()) &&
@@ -4267,14 +4983,11 @@ OpenMPIRBuilder::createAtomicRead(const LocationDescription &Loc,
XLD->setAtomic(AO);
XRead = cast<Value>(XLD);
} else {
- // We need to bitcast and perform atomic op as integer
- unsigned Addrspace = cast<PointerType>(XTy)->getAddressSpace();
+ // We need to perform atomic op as integer
IntegerType *IntCastTy =
IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
- Value *XBCast = Builder.CreateBitCast(
- X.Var, IntCastTy->getPointerTo(Addrspace), "atomic.src.int.cast");
LoadInst *XLoad =
- Builder.CreateLoad(IntCastTy, XBCast, X.IsVolatile, "omp.atomic.load");
+ Builder.CreateLoad(IntCastTy, X.Var, X.IsVolatile, "omp.atomic.load");
XLoad->setAtomic(AO);
if (XElemTy->isFloatingPointTy()) {
XRead = Builder.CreateBitCast(XLoad, XElemTy, "atomic.flt.cast");
@@ -4416,13 +5129,10 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
else
Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
} else {
- unsigned Addrspace = cast<PointerType>(X->getType())->getAddressSpace();
IntegerType *IntCastTy =
IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
- Value *XBCast =
- Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
LoadInst *OldVal =
- Builder.CreateLoad(IntCastTy, XBCast, X->getName() + ".atomic.load");
+ Builder.CreateLoad(IntCastTy, X, X->getName() + ".atomic.load");
OldVal->setAtomic(AO);
// CurBB
// | /---\
@@ -4443,14 +5153,7 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
Builder.SetInsertPoint(ContBB);
llvm::PHINode *PHI = Builder.CreatePHI(OldVal->getType(), 2);
PHI->addIncoming(OldVal, CurBB);
- IntegerType *NewAtomicCastTy =
- IntegerType::get(M.getContext(), XElemTy->getScalarSizeInBits());
bool IsIntTy = XElemTy->isIntegerTy();
- Value *NewAtomicIntAddr =
- (IsIntTy)
- ? NewAtomicAddr
- : Builder.CreateBitCast(NewAtomicAddr,
- NewAtomicCastTy->getPointerTo(Addrspace));
Value *OldExprVal = PHI;
if (!IsIntTy) {
if (XElemTy->isFloatingPointTy()) {
@@ -4464,15 +5167,11 @@ std::pair<Value *, Value *> OpenMPIRBuilder::emitAtomicUpdate(
Value *Upd = UpdateOp(OldExprVal, Builder);
Builder.CreateStore(Upd, NewAtomicAddr);
- LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicIntAddr);
- Value *XAddr =
- (IsIntTy)
- ? X
- : Builder.CreateBitCast(X, IntCastTy->getPointerTo(Addrspace));
+ LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
AtomicOrdering Failure =
llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
AtomicCmpXchgInst *Result = Builder.CreateAtomicCmpXchg(
- XAddr, PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
+ X, PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
Result->setVolatile(VolatileX);
Value *PreviousVal = Builder.CreateExtractValue(Result, /*Idxs=*/0);
Value *SuccessFailureVal = Builder.CreateExtractValue(Result, /*Idxs=*/1);
@@ -4552,15 +5251,11 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
AtomicOrdering Failure = AtomicCmpXchgInst::getStrongestFailureOrdering(AO);
AtomicCmpXchgInst *Result = nullptr;
if (!IsInteger) {
- unsigned Addrspace =
- cast<PointerType>(X.Var->getType())->getAddressSpace();
IntegerType *IntCastTy =
IntegerType::get(M.getContext(), X.ElemTy->getScalarSizeInBits());
- Value *XBCast =
- Builder.CreateBitCast(X.Var, IntCastTy->getPointerTo(Addrspace));
Value *EBCast = Builder.CreateBitCast(E, IntCastTy);
Value *DBCast = Builder.CreateBitCast(D, IntCastTy);
- Result = Builder.CreateAtomicCmpXchg(XBCast, EBCast, DBCast, MaybeAlign(),
+ Result = Builder.CreateAtomicCmpXchg(X.Var, EBCast, DBCast, MaybeAlign(),
AO, Failure);
} else {
Result =
@@ -4766,7 +5461,7 @@ void OpenMPIRBuilder::OutlineInfo::collectBlocks(
void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr,
uint64_t Size, int32_t Flags,
GlobalValue::LinkageTypes) {
- if (!Config.isTargetCodegen()) {
+ if (!Config.isGPU()) {
emitOffloadingEntry(ID, Addr->getName(), Size, Flags);
return;
}
@@ -4790,22 +5485,24 @@ void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr,
// Add a function attribute for the kernel.
Fn->addFnAttr(Attribute::get(Ctx, "kernel"));
+ if (Triple(M.getTargetTriple()).isAMDGCN())
+ Fn->addFnAttr("uniform-work-group-size", "true");
+ Fn->addFnAttr(Attribute::MustProgress);
}
// We only generate metadata for function that contain target regions.
void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
- OffloadEntriesInfoManager &OffloadEntriesInfoManager,
EmitMetadataErrorReportFunctionTy &ErrorFn) {
// If there are no entries, we don't need to do anything.
- if (OffloadEntriesInfoManager.empty())
+ if (OffloadInfoManager.empty())
return;
LLVMContext &C = M.getContext();
SmallVector<std::pair<const OffloadEntriesInfoManager::OffloadEntryInfo *,
TargetRegionEntryInfo>,
16>
- OrderedEntries(OffloadEntriesInfoManager.size());
+ OrderedEntries(OffloadInfoManager.size());
// Auxiliary methods to create metadata values and strings.
auto &&GetMDInt = [this](unsigned V) {
@@ -4844,8 +5541,7 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
MD->addOperand(MDNode::get(C, Ops));
};
- OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo(
- TargetRegionMetadataEmitter);
+ OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
// Create function that emits metadata for each device global variable entry;
auto &&DeviceGlobalVarMetadataEmitter =
@@ -4870,7 +5566,7 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
MD->addOperand(MDNode::get(C, Ops));
};
- OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo(
+ OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo(
DeviceGlobalVarMetadataEmitter);
for (const auto &E : OrderedEntries) {
@@ -4897,8 +5593,9 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind>(
CE->getFlags());
switch (Flags) {
- case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo: {
- if (Config.isEmbedded() && Config.hasRequiresUnifiedSharedMemory())
+ case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter:
+ case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo:
+ if (Config.isTargetDevice() && Config.hasRequiresUnifiedSharedMemory())
continue;
if (!CE->getAddress()) {
ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR, E.second);
@@ -4908,18 +5605,19 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
if (CE->getVarSize() == 0)
continue;
break;
- }
case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
- assert(((Config.isEmbedded() && !CE->getAddress()) ||
- (!Config.isEmbedded() && CE->getAddress())) &&
+ assert(((Config.isTargetDevice() && !CE->getAddress()) ||
+ (!Config.isTargetDevice() && CE->getAddress())) &&
"Declaret target link address is set.");
- if (Config.isEmbedded())
+ if (Config.isTargetDevice())
continue;
if (!CE->getAddress()) {
ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
continue;
}
break;
+ default:
+ break;
}
// Hidden or internal symbols on the device are not externally visible.
@@ -4956,10 +5654,160 @@ void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
EntryInfo.Line, NewCount);
}
+TargetRegionEntryInfo
+OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
+ StringRef ParentName) {
+ sys::fs::UniqueID ID;
+ auto FileIDInfo = CallBack();
+ if (auto EC = sys::fs::getUniqueID(std::get<0>(FileIDInfo), ID)) {
+ report_fatal_error(("Unable to get unique ID for file, during "
+ "getTargetEntryUniqueInfo, error message: " +
+ EC.message())
+ .c_str());
+ }
+
+ return TargetRegionEntryInfo(ParentName, ID.getDevice(), ID.getFile(),
+ std::get<1>(FileIDInfo));
+}
+
+Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
+ OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
+ OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
+ bool IsDeclaration, bool IsExternallyVisible,
+ TargetRegionEntryInfo EntryInfo, StringRef MangledName,
+ std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
+ std::vector<Triple> TargetTriple, Type *LlvmPtrTy,
+ std::function<Constant *()> GlobalInitializer,
+ std::function<GlobalValue::LinkageTypes()> VariableLinkage) {
+ // TODO: convert this to utilise the IRBuilder Config rather than
+ // a passed down argument.
+ if (OpenMPSIMD)
+ return nullptr;
+
+ if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink ||
+ ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
+ CaptureClause ==
+ OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
+ Config.hasRequiresUnifiedSharedMemory())) {
+ SmallString<64> PtrName;
+ {
+ raw_svector_ostream OS(PtrName);
+ OS << MangledName;
+ if (!IsExternallyVisible)
+ OS << format("_%x", EntryInfo.FileID);
+ OS << "_decl_tgt_ref_ptr";
+ }
+
+ Value *Ptr = M.getNamedValue(PtrName);
+
+ if (!Ptr) {
+ GlobalValue *GlobalValue = M.getNamedValue(MangledName);
+ Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
+
+ auto *GV = cast<GlobalVariable>(Ptr);
+ GV->setLinkage(GlobalValue::WeakAnyLinkage);
+
+ if (!Config.isTargetDevice()) {
+ if (GlobalInitializer)
+ GV->setInitializer(GlobalInitializer());
+ else
+ GV->setInitializer(GlobalValue);
+ }
+
+ registerTargetGlobalVariable(
+ CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
+ EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
+ GlobalInitializer, VariableLinkage, LlvmPtrTy, cast<Constant>(Ptr));
+ }
+
+ return cast<Constant>(Ptr);
+ }
+
+ return nullptr;
+}
+
+void OpenMPIRBuilder::registerTargetGlobalVariable(
+ OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
+ OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
+ bool IsDeclaration, bool IsExternallyVisible,
+ TargetRegionEntryInfo EntryInfo, StringRef MangledName,
+ std::vector<GlobalVariable *> &GeneratedRefs, bool OpenMPSIMD,
+ std::vector<Triple> TargetTriple,
+ std::function<Constant *()> GlobalInitializer,
+ std::function<GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy,
+ Constant *Addr) {
+ if (DeviceClause != OffloadEntriesInfoManager::OMPTargetDeviceClauseAny ||
+ (TargetTriple.empty() && !Config.isTargetDevice()))
+ return;
+
+ OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
+ StringRef VarName;
+ int64_t VarSize;
+ GlobalValue::LinkageTypes Linkage;
+
+ if ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
+ CaptureClause ==
+ OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
+ !Config.hasRequiresUnifiedSharedMemory()) {
+ Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
+ VarName = MangledName;
+ GlobalValue *LlvmVal = M.getNamedValue(VarName);
+
+ if (!IsDeclaration)
+ VarSize = divideCeil(
+ M.getDataLayout().getTypeSizeInBits(LlvmVal->getValueType()), 8);
+ else
+ VarSize = 0;
+ Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->getLinkage();
+
+ // This is a workaround carried over from Clang which prevents undesired
+ // optimisation of internal variables.
+ if (Config.isTargetDevice() &&
+ (!IsExternallyVisible || Linkage == GlobalValue::LinkOnceODRLinkage)) {
+ // Do not create a "ref-variable" if the original is not also available
+ // on the host.
+ if (!OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
+ return;
+
+ std::string RefName = createPlatformSpecificName({VarName, "ref"});
+
+ if (!M.getNamedValue(RefName)) {
+ Constant *AddrRef =
+ getOrCreateInternalVariable(Addr->getType(), RefName);
+ auto *GvAddrRef = cast<GlobalVariable>(AddrRef);
+ GvAddrRef->setConstant(true);
+ GvAddrRef->setLinkage(GlobalValue::InternalLinkage);
+ GvAddrRef->setInitializer(Addr);
+ GeneratedRefs.push_back(GvAddrRef);
+ }
+ }
+ } else {
+ if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink)
+ Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
+ else
+ Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
+
+ if (Config.isTargetDevice()) {
+ VarName = (Addr) ? Addr->getName() : "";
+ Addr = nullptr;
+ } else {
+ Addr = getAddrOfDeclareTargetVar(
+ CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
+ EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
+ LlvmPtrTy, GlobalInitializer, VariableLinkage);
+ VarName = (Addr) ? Addr->getName() : "";
+ }
+ VarSize = M.getDataLayout().getPointerSize();
+ Linkage = GlobalValue::WeakAnyLinkage;
+ }
+
+ OffloadInfoManager.registerDeviceGlobalVarEntryInfo(VarName, Addr, VarSize,
+ Flags, Linkage);
+}
+
/// Loads all the offload entries information from the host IR
/// metadata.
-void OpenMPIRBuilder::loadOffloadInfoMetadata(
- Module &M, OffloadEntriesInfoManager &OffloadEntriesInfoManager) {
+void OpenMPIRBuilder::loadOffloadInfoMetadata(Module &M) {
// If we are in target mode, load the metadata from the host IR. This code has
// to match the metadata creation in createOffloadEntriesAndInfoMetadata().
@@ -4989,13 +5837,13 @@ void OpenMPIRBuilder::loadOffloadInfoMetadata(
/*FileID=*/GetMDInt(2),
/*Line=*/GetMDInt(4),
/*Count=*/GetMDInt(5));
- OffloadEntriesInfoManager.initializeTargetRegionEntryInfo(
- EntryInfo, /*Order=*/GetMDInt(6));
+ OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo,
+ /*Order=*/GetMDInt(6));
break;
}
case OffloadEntriesInfoManager::OffloadEntryInfo::
OffloadingEntryInfoDeviceGlobalVar:
- OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo(
+ OffloadInfoManager.initializeDeviceGlobalVarEntryInfo(
/*MangledName=*/GetMDString(1),
static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind>(
/*Flags=*/GetMDInt(2)),
@@ -5044,7 +5892,7 @@ void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
// If we are emitting code for a target, the entry is already initialized,
// only has to be registered.
- if (Config.isEmbedded()) {
+ if (OMPBuilder->Config.isTargetDevice()) {
// This could happen if the device compilation is invoked standalone.
if (!hasTargetRegionEntryInfo(EntryInfo)) {
return;
@@ -5099,7 +5947,7 @@ void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
StringRef VarName, Constant *Addr, int64_t VarSize,
OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage) {
- if (Config.isEmbedded()) {
+ if (OMPBuilder->Config.isTargetDevice()) {
// This could happen if the device compilation is invoked standalone.
if (!hasDeviceGlobalVarEntryInfo(VarName))
return;
diff --git a/llvm/lib/FuzzMutate/FuzzerCLI.cpp b/llvm/lib/FuzzMutate/FuzzerCLI.cpp
index 90a1a35e2e3e..0e47e3cc3af2 100644
--- a/llvm/lib/FuzzMutate/FuzzerCLI.cpp
+++ b/llvm/lib/FuzzMutate/FuzzerCLI.cpp
@@ -8,10 +8,10 @@
#include "llvm/FuzzMutate/FuzzerCLI.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/FuzzMutate/IRMutator.cpp b/llvm/lib/FuzzMutate/IRMutator.cpp
index 1e07acb5ae4d..ea630c4602ba 100644
--- a/llvm/lib/FuzzMutate/IRMutator.cpp
+++ b/llvm/lib/FuzzMutate/IRMutator.cpp
@@ -27,51 +27,52 @@
#include "llvm/Support/SourceMgr.h"
#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <map>
#include <optional>
using namespace llvm;
-static void createEmptyFunction(Module &M) {
- // TODO: Some arguments and a return value would probably be more interesting.
- LLVMContext &Context = M.getContext();
- Function *F = Function::Create(FunctionType::get(Type::getVoidTy(Context), {},
- /*isVarArg=*/false),
- GlobalValue::ExternalLinkage, "f", &M);
- BasicBlock *BB = BasicBlock::Create(Context, "BB", F);
- ReturnInst::Create(Context, BB);
-}
-
void IRMutationStrategy::mutate(Module &M, RandomIRBuilder &IB) {
auto RS = makeSampler<Function *>(IB.Rand);
for (Function &F : M)
if (!F.isDeclaration())
RS.sample(&F, /*Weight=*/1);
- if (RS.isEmpty())
- createEmptyFunction(M);
- else
- mutate(*RS.getSelection(), IB);
+ while (RS.totalWeight() < IB.MinFunctionNum) {
+ Function *F = IB.createFunctionDefinition(M);
+ RS.sample(F, /*Weight=*/1);
+ }
+ mutate(*RS.getSelection(), IB);
}
void IRMutationStrategy::mutate(Function &F, RandomIRBuilder &IB) {
- mutate(*makeSampler(IB.Rand, make_pointer_range(F)).getSelection(), IB);
+ auto Range = make_filter_range(make_pointer_range(F),
+ [](BasicBlock *BB) { return !BB->isEHPad(); });
+
+ mutate(*makeSampler(IB.Rand, Range).getSelection(), IB);
}
void IRMutationStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB);
}
-void IRMutator::mutateModule(Module &M, int Seed, size_t CurSize,
- size_t MaxSize) {
+size_t llvm::IRMutator::getModuleSize(const Module &M) {
+ return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size();
+}
+
+void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) {
std::vector<Type *> Types;
for (const auto &Getter : AllowedTypes)
Types.push_back(Getter(M.getContext()));
RandomIRBuilder IB(Seed, Types);
+ size_t CurSize = IRMutator::getModuleSize(M);
auto RS = makeSampler<IRMutationStrategy *>(IB.Rand);
for (const auto &Strategy : Strategies)
RS.sample(Strategy.get(),
Strategy->getWeight(CurSize, MaxSize, RS.totalWeight()));
+ if (RS.totalWeight() == 0)
+ return;
auto Strategy = RS.getSelection();
Strategy->mutate(M, IB);
@@ -113,10 +114,16 @@ InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
return *RS;
}
+static inline iterator_range<BasicBlock::iterator>
+getInsertionRange(BasicBlock &BB) {
+ auto End = BB.getTerminatingMustTailCall() ? std::prev(BB.end()) : BB.end();
+ return make_range(BB.getFirstInsertionPt(), End);
+}
+
void InjectorIRStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
SmallVector<Instruction *, 32> Insts;
- for (auto I = BB.getFirstInsertionPt(), E = BB.end(); I != E; ++I)
- Insts.push_back(&*I);
+ for (Instruction &I : getInsertionRange(BB))
+ Insts.push_back(&I);
if (Insts.size() < 1)
return;
@@ -252,7 +259,7 @@ void InstModificationIRStrategy::mutate(Instruction &Inst,
break;
case Instruction::FCmp:
- CI = cast<ICmpInst>(&Inst);
+ CI = cast<FCmpInst>(&Inst);
for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE;
p <= CmpInst::LAST_FCMP_PREDICATE; p++) {
Modifications.push_back(
@@ -349,10 +356,73 @@ static uint64_t getUniqueCaseValue(SmallSet<uint64_t, 4> &CasesTaken,
return tmp;
}
+void InsertFunctionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
+ Module *M = BB.getParent()->getParent();
+ // If nullptr is selected, we will create a new function declaration.
+ SmallVector<Function *, 32> Functions({nullptr});
+ for (Function &F : M->functions()) {
+ Functions.push_back(&F);
+ }
+
+ auto RS = makeSampler(IB.Rand, Functions);
+ Function *F = RS.getSelection();
+ // Some functions accept metadata type or token type as arguments.
+ // We don't call those functions for now.
+ // For example, `@llvm.dbg.declare(metadata, metadata, metadata)`
+ // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare
+ auto IsUnsupportedTy = [](Type *T) {
+ return T->isMetadataTy() || T->isTokenTy();
+ };
+ if (!F || IsUnsupportedTy(F->getReturnType()) ||
+ any_of(F->getFunctionType()->params(), IsUnsupportedTy)) {
+ F = IB.createFunctionDeclaration(*M);
+ }
+
+ FunctionType *FTy = F->getFunctionType();
+ SmallVector<fuzzerop::SourcePred, 2> SourcePreds;
+ if (!F->arg_empty()) {
+ for (Type *ArgTy : FTy->params()) {
+ SourcePreds.push_back(fuzzerop::onlyType(ArgTy));
+ }
+ }
+ bool isRetVoid = (F->getReturnType() == Type::getVoidTy(M->getContext()));
+ auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs,
+ Instruction *Inst) {
+ StringRef Name = isRetVoid ? nullptr : "C";
+ CallInst *Call = CallInst::Create(FTy, F, Srcs, Name, Inst);
+ // Don't return this call inst if it return void as it can't be sinked.
+ return isRetVoid ? nullptr : Call;
+ };
+
+ SmallVector<Instruction *, 32> Insts;
+ for (Instruction &I : getInsertionRange(BB))
+ Insts.push_back(&I);
+ if (Insts.size() < 1)
+ return;
+
+ // Choose an insertion point for our new call instruction.
+ uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
+
+ auto InstsBefore = ArrayRef(Insts).slice(0, IP);
+ auto InstsAfter = ArrayRef(Insts).slice(IP);
+
+ // Choose a source, which will be used to constrain the operation selection.
+ SmallVector<Value *, 2> Srcs;
+
+ for (const auto &Pred : ArrayRef(SourcePreds)) {
+ Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
+ }
+
+ if (Value *Op = BuilderFunc(Srcs, Insts[IP])) {
+ // Find a sink and wire up the results of the operation.
+ IB.connectToSink(BB, InstsAfter, Op);
+ }
+}
+
void InsertCFGStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
SmallVector<Instruction *, 32> Insts;
- for (auto I = BB.getFirstInsertionPt(), E = BB.end(); I != E; ++I)
- Insts.push_back(&*I);
+ for (Instruction &I : getInsertionRange(BB))
+ Insts.push_back(&I);
if (Insts.size() < 1)
return;
@@ -491,8 +561,8 @@ void InsertPHIStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
PHI->addIncoming(Src, Pred);
}
SmallVector<Instruction *, 32> InstsAfter;
- for (auto I = BB.getFirstInsertionPt(), E = BB.end(); I != E; ++I)
- InstsAfter.push_back(&*I);
+ for (Instruction &I : getInsertionRange(BB))
+ InstsAfter.push_back(&I);
IB.connectToSink(BB, InstsAfter, PHI);
}
@@ -503,8 +573,8 @@ void SinkInstructionStrategy::mutate(Function &F, RandomIRBuilder &IB) {
}
void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
SmallVector<Instruction *, 32> Insts;
- for (auto I = BB.getFirstInsertionPt(), E = BB.end(); I != E; ++I)
- Insts.push_back(&*I);
+ for (Instruction &I : getInsertionRange(BB))
+ Insts.push_back(&I);
if (Insts.size() < 1)
return;
// Choose an Instruction to mutate.
@@ -512,64 +582,74 @@ void SinkInstructionStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
Instruction *Inst = Insts[Idx];
// `Idx + 1` so we don't sink to ourselves.
auto InstsAfter = ArrayRef(Insts).slice(Idx + 1);
- LLVMContext &C = BB.getParent()->getParent()->getContext();
- // Don't sink terminators, void function calls, etc.
- if (Inst->getType() != Type::getVoidTy(C))
+ Type *Ty = Inst->getType();
+ // Don't sink terminators, void function calls, token, etc.
+ if (!Ty->isVoidTy() && !Ty->isTokenTy())
// Find a new sink and wire up the results of the operation.
IB.connectToSink(BB, InstsAfter, Inst);
}
void ShuffleBlockStrategy::mutate(BasicBlock &BB, RandomIRBuilder &IB) {
-
- SmallPtrSet<Instruction *, 8> AliveInsts;
+ // A deterministic alternative to SmallPtrSet with the same lookup
+ // performance.
+ std::map<size_t, Instruction *> AliveInsts;
+ std::map<Instruction *, size_t> AliveInstsLookup;
+ size_t InsertIdx = 0;
for (auto &I : make_early_inc_range(make_range(
BB.getFirstInsertionPt(), BB.getTerminator()->getIterator()))) {
// First gather all instructions that can be shuffled. Don't take
// terminator.
- AliveInsts.insert(&I);
+ AliveInsts.insert({InsertIdx, &I});
+ AliveInstsLookup.insert({&I, InsertIdx++});
// Then remove these instructions from the block
I.removeFromParent();
}
// Shuffle these instructions using topological sort.
- // Returns true if all current instruction's dependencies in this block have
+ // Returns false if all current instruction's dependencies in this block have
// been shuffled. If so, this instruction can be shuffled too.
- auto hasAliveParent = [&AliveInsts](Instruction *I) {
- for (Value *O : I->operands()) {
+ auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) {
+ for (Value *O : AliveInsts[Index]->operands()) {
Instruction *P = dyn_cast<Instruction>(O);
- if (P && AliveInsts.count(P))
+ if (P && AliveInstsLookup.count(P))
return true;
}
return false;
};
// Get all alive instructions that depend on the current instruction.
- auto getAliveChildren = [&AliveInsts](Instruction *I) {
- SmallPtrSet<Instruction *, 4> Children;
+ // Takes Instruction* instead of index because the instruction is already
+ // shuffled.
+ auto getAliveChildren = [&AliveInstsLookup](Instruction *I) {
+ SmallSetVector<size_t, 8> Children;
for (Value *U : I->users()) {
Instruction *P = dyn_cast<Instruction>(U);
- if (P && AliveInsts.count(P))
- Children.insert(P);
+ if (P && AliveInstsLookup.count(P))
+ Children.insert(AliveInstsLookup[P]);
}
return Children;
};
- SmallPtrSet<Instruction *, 8> Roots;
+ SmallSet<size_t, 8> RootIndices;
SmallVector<Instruction *, 8> Insts;
- for (Instruction *I : AliveInsts) {
- if (!hasAliveParent(I))
- Roots.insert(I);
+ for (const auto &[Index, Inst] : AliveInsts) {
+ if (!hasAliveParent(Index))
+ RootIndices.insert(Index);
}
// Topological sort by randomly selecting a node without a parent, or root.
- while (!Roots.empty()) {
- auto RS = makeSampler<Instruction *>(IB.Rand);
- for (Instruction *Root : Roots)
- RS.sample(Root, 1);
- Instruction *Root = RS.getSelection();
- Roots.erase(Root);
- AliveInsts.erase(Root);
+ while (!RootIndices.empty()) {
+ auto RS = makeSampler<size_t>(IB.Rand);
+ for (size_t RootIdx : RootIndices)
+ RS.sample(RootIdx, 1);
+ size_t RootIdx = RS.getSelection();
+
+ RootIndices.erase(RootIdx);
+ Instruction *Root = AliveInsts[RootIdx];
+ AliveInsts.erase(RootIdx);
+ AliveInstsLookup.erase(Root);
Insts.push_back(Root);
- for (Instruction *Child : getAliveChildren(Root)) {
+
+ for (size_t Child : getAliveChildren(Root)) {
if (!hasAliveParent(Child)) {
- Roots.insert(Child);
+ RootIndices.insert(Child);
}
}
}
diff --git a/llvm/lib/FuzzMutate/OpDescriptor.cpp b/llvm/lib/FuzzMutate/OpDescriptor.cpp
index 67d44be8b699..4baf45284de1 100644
--- a/llvm/lib/FuzzMutate/OpDescriptor.cpp
+++ b/llvm/lib/FuzzMutate/OpDescriptor.cpp
@@ -15,6 +15,9 @@ using namespace fuzzerop;
void fuzzerop::makeConstantsWithType(Type *T, std::vector<Constant *> &Cs) {
if (auto *IntTy = dyn_cast<IntegerType>(T)) {
uint64_t W = IntTy->getBitWidth();
+ Cs.push_back(ConstantInt::get(IntTy, 0));
+ Cs.push_back(ConstantInt::get(IntTy, 1));
+ Cs.push_back(ConstantInt::get(IntTy, 42));
Cs.push_back(ConstantInt::get(IntTy, APInt::getMaxValue(W)));
Cs.push_back(ConstantInt::get(IntTy, APInt::getMinValue(W)));
Cs.push_back(ConstantInt::get(IntTy, APInt::getSignedMaxValue(W)));
@@ -24,10 +27,24 @@ void fuzzerop::makeConstantsWithType(Type *T, std::vector<Constant *> &Cs) {
auto &Ctx = T->getContext();
auto &Sem = T->getFltSemantics();
Cs.push_back(ConstantFP::get(Ctx, APFloat::getZero(Sem)));
+ Cs.push_back(ConstantFP::get(Ctx, APFloat(Sem, 1)));
+ Cs.push_back(ConstantFP::get(Ctx, APFloat(Sem, 42)));
Cs.push_back(ConstantFP::get(Ctx, APFloat::getLargest(Sem)));
Cs.push_back(ConstantFP::get(Ctx, APFloat::getSmallest(Sem)));
- } else
+ Cs.push_back(ConstantFP::get(Ctx, APFloat::getInf(Sem)));
+ Cs.push_back(ConstantFP::get(Ctx, APFloat::getNaN(Sem)));
+ } else if (VectorType *VecTy = dyn_cast<VectorType>(T)) {
+ std::vector<Constant *> EleCs;
+ Type *EltTy = VecTy->getElementType();
+ makeConstantsWithType(EltTy, EleCs);
+ ElementCount EC = VecTy->getElementCount();
+ for (Constant *Elt : EleCs) {
+ Cs.push_back(ConstantVector::getSplat(EC, Elt));
+ }
+ } else {
Cs.push_back(UndefValue::get(T));
+ Cs.push_back(PoisonValue::get(T));
+ }
}
std::vector<Constant *> fuzzerop::makeConstantsWithType(Type *T) {
diff --git a/llvm/lib/FuzzMutate/Operations.cpp b/llvm/lib/FuzzMutate/Operations.cpp
index 46ebbef4d9d2..408f35879acd 100644
--- a/llvm/lib/FuzzMutate/Operations.cpp
+++ b/llvm/lib/FuzzMutate/Operations.cpp
@@ -67,11 +67,20 @@ void llvm::describeFuzzerFloatOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
Ops.push_back(cmpOpDescriptor(1, Instruction::FCmp, CmpInst::FCMP_TRUE));
}
+void llvm::describeFuzzerUnaryOperations(
+ std::vector<fuzzerop::OpDescriptor> &Ops) {
+ Ops.push_back(fnegDescriptor(1));
+}
+
void llvm::describeFuzzerControlFlowOps(
std::vector<fuzzerop::OpDescriptor> &Ops) {
Ops.push_back(splitBlockDescriptor(1));
}
+void llvm::describeFuzzerOtherOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
+ Ops.push_back(selectDescriptor(1));
+}
+
void llvm::describeFuzzerPointerOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
Ops.push_back(gepDescriptor(1));
}
@@ -88,6 +97,22 @@ void llvm::describeFuzzerVectorOps(std::vector<fuzzerop::OpDescriptor> &Ops) {
Ops.push_back(shuffleVectorDescriptor(1));
}
+OpDescriptor llvm::fuzzerop::selectDescriptor(unsigned Weight) {
+ auto buildOp = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
+ return SelectInst::Create(Srcs[0], Srcs[1], Srcs[2], "S", Inst);
+ };
+ return {Weight,
+ {boolOrVecBoolType(), matchFirstLengthWAnyType(), matchSecondType()},
+ buildOp};
+}
+
+OpDescriptor llvm::fuzzerop::fnegDescriptor(unsigned Weight) {
+ auto buildOp = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
+ return UnaryOperator::Create(Instruction::FNeg, Srcs[0], "F", Inst);
+ };
+ return {Weight, {anyFloatOrVecFloatType()}, buildOp};
+}
+
OpDescriptor llvm::fuzzerop::binOpDescriptor(unsigned Weight,
Instruction::BinaryOps Op) {
auto buildOp = [Op](ArrayRef<Value *> Srcs, Instruction *Inst) {
@@ -107,13 +132,13 @@ OpDescriptor llvm::fuzzerop::binOpDescriptor(unsigned Weight,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
- return {Weight, {anyIntType(), matchFirstType()}, buildOp};
+ return {Weight, {anyIntOrVecIntType(), matchFirstType()}, buildOp};
case Instruction::FAdd:
case Instruction::FSub:
case Instruction::FMul:
case Instruction::FDiv:
case Instruction::FRem:
- return {Weight, {anyFloatType(), matchFirstType()}, buildOp};
+ return {Weight, {anyFloatOrVecFloatType(), matchFirstType()}, buildOp};
case Instruction::BinaryOpsEnd:
llvm_unreachable("Value out of range of enum");
}
@@ -129,9 +154,9 @@ OpDescriptor llvm::fuzzerop::cmpOpDescriptor(unsigned Weight,
switch (CmpOp) {
case Instruction::ICmp:
- return {Weight, {anyIntType(), matchFirstType()}, buildOp};
+ return {Weight, {anyIntOrVecIntType(), matchFirstType()}, buildOp};
case Instruction::FCmp:
- return {Weight, {anyFloatType(), matchFirstType()}, buildOp};
+ return {Weight, {anyFloatOrVecFloatType(), matchFirstType()}, buildOp};
default:
llvm_unreachable("CmpOp must be ICmp or FCmp");
}
@@ -171,9 +196,7 @@ OpDescriptor llvm::fuzzerop::gepDescriptor(unsigned Weight) {
auto buildGEP = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
// TODO: It would be better to generate a random type here, rather than
// generating a random value and picking its type.
- Type *Ty = Srcs[0]->getType()->isOpaquePointerTy()
- ? Srcs[1]->getType()
- : Srcs[0]->getType()->getNonOpaquePointerElementType();
+ Type *Ty = Srcs[1]->getType();
auto Indices = ArrayRef(Srcs).drop_front(2);
return GetElementPtrInst::Create(Ty, Srcs[0], Indices, "G", Inst);
};
diff --git a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
index bb9f91d0bb37..5569888e5b28 100644
--- a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
+++ b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
@@ -13,12 +13,99 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
using namespace llvm;
using namespace fuzzerop;
+/// Return a vector of Blocks that dominates this block, excluding current
+/// block.
+static std::vector<BasicBlock *> getDominators(BasicBlock *BB) {
+ std::vector<BasicBlock *> ret;
+ DominatorTree DT(*BB->getParent());
+ DomTreeNode *Node = DT.getNode(BB);
+ // It's possible that an orphan block is not in the dom tree. In that case we
+ // just return nothing.
+ if (!Node)
+ return ret;
+ Node = Node->getIDom();
+ while (Node && Node->getBlock()) {
+ ret.push_back(Node->getBlock());
+ // Get parent block.
+ Node = Node->getIDom();
+ }
+ return ret;
+}
+
+/// Return a vector of Blocks that is dominated by this block, excluding current
+/// block
+static std::vector<BasicBlock *> getDominatees(BasicBlock *BB) {
+ DominatorTree DT(*BB->getParent());
+ std::vector<BasicBlock *> ret;
+ DomTreeNode *Parent = DT.getNode(BB);
+ // It's possible that an orphan block is not in the dom tree. In that case we
+ // just return nothing.
+ if (!Parent)
+ return ret;
+ for (DomTreeNode *Child : Parent->children())
+ ret.push_back(Child->getBlock());
+ uint64_t Idx = 0;
+ while (Idx < ret.size()) {
+ DomTreeNode *Node = DT[ret[Idx]];
+ Idx++;
+ for (DomTreeNode *Child : Node->children())
+ ret.push_back(Child->getBlock());
+ }
+ return ret;
+}
+
+AllocaInst *RandomIRBuilder::createStackMemory(Function *F, Type *Ty,
+ Value *Init) {
+ /// TODO: For all Allocas, maybe allocate an array.
+ BasicBlock *EntryBB = &F->getEntryBlock();
+ DataLayout DL(F->getParent());
+ AllocaInst *Alloca = new AllocaInst(Ty, DL.getAllocaAddrSpace(), "A",
+ &*EntryBB->getFirstInsertionPt());
+ if (Init)
+ new StoreInst(Init, Alloca, Alloca->getNextNode());
+ return Alloca;
+}
+
+std::pair<GlobalVariable *, bool>
+RandomIRBuilder::findOrCreateGlobalVariable(Module *M, ArrayRef<Value *> Srcs,
+ fuzzerop::SourcePred Pred) {
+ auto MatchesPred = [&Srcs, &Pred](GlobalVariable *GV) {
+ // Can't directly compare GV's type, as it would be a pointer to the actual
+ // type.
+ return Pred.matches(Srcs, UndefValue::get(GV->getValueType()));
+ };
+ bool DidCreate = false;
+ SmallVector<GlobalVariable *, 4> GlobalVars;
+ for (GlobalVariable &GV : M->globals()) {
+ GlobalVars.push_back(&GV);
+ }
+ auto RS = makeSampler(Rand, make_filter_range(GlobalVars, MatchesPred));
+ RS.sample(nullptr, 1);
+ GlobalVariable *GV = RS.getSelection();
+ if (!GV) {
+ DidCreate = true;
+ using LinkageTypes = GlobalVariable::LinkageTypes;
+ auto TRS = makeSampler<Constant *>(Rand);
+ TRS.sample(Pred.generate(Srcs, KnownTypes));
+ Constant *Init = TRS.getSelection();
+ Type *Ty = Init->getType();
+ GV = new GlobalVariable(*M, Ty, false, LinkageTypes::ExternalLinkage, Init,
+ "G", nullptr,
+ GlobalValue::ThreadLocalMode::NotThreadLocal,
+ M->getDataLayout().getDefaultGlobalsAddressSpace());
+ }
+ return {GV, DidCreate};
+}
+
Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
ArrayRef<Instruction *> Insts) {
return findOrCreateSource(BB, Insts, {}, anyType());
@@ -29,15 +116,83 @@ Value *RandomIRBuilder::findOrCreateSource(BasicBlock &BB,
ArrayRef<Value *> Srcs,
SourcePred Pred,
bool allowConstant) {
- auto MatchesPred = [&Srcs, &Pred](Instruction *Inst) {
- return Pred.matches(Srcs, Inst);
- };
- auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred));
- // Also consider choosing no source, meaning we want a new one.
- RS.sample(nullptr, /*Weight=*/1);
- if (Instruction *Src = RS.getSelection())
- return Src;
- return newSource(BB, Insts, Srcs, Pred, allowConstant);
+ auto MatchesPred = [&Srcs, &Pred](Value *V) { return Pred.matches(Srcs, V); };
+ SmallVector<uint64_t, 8> SrcTys;
+ for (uint64_t i = 0; i < EndOfValueSource; i++)
+ SrcTys.push_back(i);
+ std::shuffle(SrcTys.begin(), SrcTys.end(), Rand);
+ for (uint64_t SrcTy : SrcTys) {
+ switch (SrcTy) {
+ case SrcFromInstInCurBlock: {
+ auto RS = makeSampler(Rand, make_filter_range(Insts, MatchesPred));
+ if (!RS.isEmpty()) {
+ return RS.getSelection();
+ }
+ break;
+ }
+ case FunctionArgument: {
+ Function *F = BB.getParent();
+ SmallVector<Argument *, 8> Args;
+ for (uint64_t i = 0; i < F->arg_size(); i++) {
+ Args.push_back(F->getArg(i));
+ }
+ auto RS = makeSampler(Rand, make_filter_range(Args, MatchesPred));
+ if (!RS.isEmpty()) {
+ return RS.getSelection();
+ }
+ break;
+ }
+ case InstInDominator: {
+ auto Dominators = getDominators(&BB);
+ std::shuffle(Dominators.begin(), Dominators.end(), Rand);
+ for (BasicBlock *Dom : Dominators) {
+ SmallVector<Instruction *, 16> Instructions;
+ for (Instruction &I : *Dom) {
+ Instructions.push_back(&I);
+ }
+ auto RS =
+ makeSampler(Rand, make_filter_range(Instructions, MatchesPred));
+ // Also consider choosing no source, meaning we want a new one.
+ if (!RS.isEmpty()) {
+ return RS.getSelection();
+ }
+ }
+ break;
+ }
+ case SrcFromGlobalVariable: {
+ Module *M = BB.getParent()->getParent();
+ auto [GV, DidCreate] = findOrCreateGlobalVariable(M, Srcs, Pred);
+ Type *Ty = GV->getValueType();
+ LoadInst *LoadGV = nullptr;
+ if (BB.getTerminator()) {
+ LoadGV = new LoadInst(Ty, GV, "LGV", &*BB.getFirstInsertionPt());
+ } else {
+ LoadGV = new LoadInst(Ty, GV, "LGV", &BB);
+ }
+ // Because we might be generating new values, we have to check if it
+ // matches again.
+ if (DidCreate) {
+ if (Pred.matches(Srcs, LoadGV)) {
+ return LoadGV;
+ }
+ LoadGV->eraseFromParent();
+ // If no one is using this GlobalVariable, delete it too.
+ if (GV->use_empty()) {
+ GV->eraseFromParent();
+ }
+ }
+ break;
+ }
+ case NewConstOrStack: {
+ return newSource(BB, Insts, Srcs, Pred, allowConstant);
+ }
+ default:
+ case EndOfValueSource: {
+ llvm_unreachable("EndOfValueSource executed");
+ }
+ }
+ }
+ llvm_unreachable("Can't find a source");
}
Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
@@ -48,7 +203,7 @@ Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
RS.sample(Pred.generate(Srcs, KnownTypes));
// If we can find a pointer to load from, use it half the time.
- Value *Ptr = findPointer(BB, Insts, Srcs, Pred);
+ Value *Ptr = findPointer(BB, Insts);
if (Ptr) {
// Create load from the chosen pointer
auto IP = BB.getFirstInsertionPt();
@@ -56,10 +211,8 @@ Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
IP = ++I->getIterator();
assert(IP != BB.end() && "guaranteed by the findPointer");
}
- // For opaque pointers, pick the type independently.
- Type *AccessTy = Ptr->getType()->isOpaquePointerTy()
- ? RS.getSelection()->getType()
- : Ptr->getType()->getNonOpaquePointerElementType();
+ // Pick the type independently.
+ Type *AccessTy = RS.getSelection()->getType();
auto *NewLoad = new LoadInst(AccessTy, Ptr, "L", &*IP);
// Only sample this load if it really matches the descriptor
@@ -76,12 +229,7 @@ Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
if (!allowConstant && isa<Constant>(newSrc)) {
Type *Ty = newSrc->getType();
Function *F = BB.getParent();
- BasicBlock *EntryBB = &F->getEntryBlock();
- /// TODO: For all Allocas, maybe allocate an array.
- DataLayout DL(BB.getParent()->getParent());
- AllocaInst *Alloca = new AllocaInst(Ty, DL.getProgramAddressSpace(), "A",
- EntryBB->getTerminator());
- new StoreInst(newSrc, Alloca, EntryBB->getTerminator());
+ AllocaInst *Alloca = createStackMemory(F, Ty, newSrc);
if (BB.getTerminator()) {
newSrc = new LoadInst(Ty, Alloca, /*ArrLen,*/ "L", BB.getTerminator());
} else {
@@ -119,72 +267,124 @@ static bool isCompatibleReplacement(const Instruction *I, const Use &Operand,
if (OperandNo >= 1)
return false;
break;
+ case Instruction::Call:
+ case Instruction::Invoke:
+ case Instruction::CallBr: {
+ const Function *Callee = cast<CallBase>(I)->getCalledFunction();
+ // If it's an indirect call, give up.
+ if (!Callee)
+ return false;
+ // If callee is not an intrinsic, operand 0 is the function to be called.
+ // Since we cannot assume that the replacement is a function pointer,
+ // we give up.
+ if (!Callee->getIntrinsicID() && OperandNo == 0)
+ return false;
+ return !Callee->hasParamAttribute(OperandNo, Attribute::ImmArg);
+ }
default:
break;
}
return true;
}
-void RandomIRBuilder::connectToSink(BasicBlock &BB,
- ArrayRef<Instruction *> Insts, Value *V) {
- auto RS = makeSampler<Use *>(Rand);
- for (auto &I : Insts) {
- if (isa<IntrinsicInst>(I))
- // TODO: Replacing operands of intrinsics would be interesting, but
- // there's no easy way to verify that a given replacement is valid given
- // that intrinsics can impose arbitrary constraints.
- continue;
- for (Use &U : I->operands())
- if (isCompatibleReplacement(I, U, V))
- RS.sample(&U, 1);
- }
- // Also consider choosing no sink, meaning we want a new one.
- RS.sample(nullptr, /*Weight=*/1);
-
- if (Use *Sink = RS.getSelection()) {
- User *U = Sink->getUser();
- unsigned OpNo = Sink->getOperandNo();
- U->setOperand(OpNo, V);
- return;
+Instruction *RandomIRBuilder::connectToSink(BasicBlock &BB,
+ ArrayRef<Instruction *> Insts,
+ Value *V) {
+ SmallVector<uint64_t, 8> SinkTys;
+ for (uint64_t i = 0; i < EndOfValueSink; i++)
+ SinkTys.push_back(i);
+ std::shuffle(SinkTys.begin(), SinkTys.end(), Rand);
+ auto findSinkAndConnect =
+ [this, V](ArrayRef<Instruction *> Instructions) -> Instruction * {
+ auto RS = makeSampler<Use *>(Rand);
+ for (auto &I : Instructions) {
+ for (Use &U : I->operands())
+ if (isCompatibleReplacement(I, U, V))
+ RS.sample(&U, 1);
+ }
+ if (!RS.isEmpty()) {
+ Use *Sink = RS.getSelection();
+ User *U = Sink->getUser();
+ unsigned OpNo = Sink->getOperandNo();
+ U->setOperand(OpNo, V);
+ return cast<Instruction>(U);
+ }
+ return nullptr;
+ };
+ Instruction *Sink = nullptr;
+ for (uint64_t SinkTy : SinkTys) {
+ switch (SinkTy) {
+ case SinkToInstInCurBlock:
+ Sink = findSinkAndConnect(Insts);
+ if (Sink)
+ return Sink;
+ break;
+ case PointersInDominator: {
+ auto Dominators = getDominators(&BB);
+ std::shuffle(Dominators.begin(), Dominators.end(), Rand);
+ for (BasicBlock *Dom : Dominators) {
+ for (Instruction &I : *Dom) {
+ if (isa<PointerType>(I.getType()))
+ return new StoreInst(V, &I, Insts.back());
+ }
+ }
+ break;
+ }
+ case InstInDominatee: {
+ auto Dominatees = getDominatees(&BB);
+ std::shuffle(Dominatees.begin(), Dominatees.end(), Rand);
+ for (BasicBlock *Dominee : Dominatees) {
+ std::vector<Instruction *> Instructions;
+ for (Instruction &I : *Dominee)
+ Instructions.push_back(&I);
+ Sink = findSinkAndConnect(Instructions);
+ if (Sink) {
+ return Sink;
+ }
+ }
+ break;
+ }
+ case NewStore:
+ /// TODO: allocate a new stack memory.
+ return newSink(BB, Insts, V);
+ case SinkToGlobalVariable: {
+ Module *M = BB.getParent()->getParent();
+ auto [GV, DidCreate] =
+ findOrCreateGlobalVariable(M, {}, fuzzerop::onlyType(V->getType()));
+ return new StoreInst(V, GV, Insts.back());
+ }
+ case EndOfValueSink:
+ default:
+ llvm_unreachable("EndOfValueSink executed");
+ }
}
- newSink(BB, Insts, V);
+ llvm_unreachable("Can't find a sink");
}
-void RandomIRBuilder::newSink(BasicBlock &BB, ArrayRef<Instruction *> Insts,
- Value *V) {
- Value *Ptr = findPointer(BB, Insts, {V}, matchFirstType());
+Instruction *RandomIRBuilder::newSink(BasicBlock &BB,
+ ArrayRef<Instruction *> Insts, Value *V) {
+ Value *Ptr = findPointer(BB, Insts);
if (!Ptr) {
- if (uniform(Rand, 0, 1))
- Ptr = new AllocaInst(V->getType(), 0, "A", &*BB.getFirstInsertionPt());
- else
+ if (uniform(Rand, 0, 1)) {
+ Type *Ty = V->getType();
+ Ptr = createStackMemory(BB.getParent(), Ty, UndefValue::get(Ty));
+ } else {
Ptr = UndefValue::get(PointerType::get(V->getType(), 0));
+ }
}
- new StoreInst(V, Ptr, Insts.back());
+ return new StoreInst(V, Ptr, Insts.back());
}
Value *RandomIRBuilder::findPointer(BasicBlock &BB,
- ArrayRef<Instruction *> Insts,
- ArrayRef<Value *> Srcs, SourcePred Pred) {
- auto IsMatchingPtr = [&Srcs, &Pred](Instruction *Inst) {
+ ArrayRef<Instruction *> Insts) {
+ auto IsMatchingPtr = [](Instruction *Inst) {
// Invoke instructions sometimes produce valid pointers but currently
// we can't insert loads or stores from them
if (Inst->isTerminator())
return false;
- if (auto *PtrTy = dyn_cast<PointerType>(Inst->getType())) {
- if (PtrTy->isOpaque())
- return true;
-
- // We can never generate loads from non first class or non sized types
- Type *ElemTy = PtrTy->getNonOpaquePointerElementType();
- if (!ElemTy->isSized() || !ElemTy->isFirstClassType())
- return false;
-
- // TODO: Check if this is horribly expensive.
- return Pred.matches(Srcs, UndefValue::get(ElemTy));
- }
- return false;
+ return Inst->getType()->isPointerTy();
};
if (auto RS = makeSampler(Rand, make_filter_range(Insts, IsMatchingPtr)))
return RS.getSelection();
@@ -195,3 +395,48 @@ Type *RandomIRBuilder::randomType() {
uint64_t TyIdx = uniform<uint64_t>(Rand, 0, KnownTypes.size() - 1);
return KnownTypes[TyIdx];
}
+
+Function *RandomIRBuilder::createFunctionDeclaration(Module &M,
+ uint64_t ArgNum) {
+ Type *RetType = randomType();
+
+ SmallVector<Type *, 2> Args;
+ for (uint64_t i = 0; i < ArgNum; i++) {
+ Args.push_back(randomType());
+ }
+
+ Function *F = Function::Create(FunctionType::get(RetType, Args,
+ /*isVarArg=*/false),
+ GlobalValue::ExternalLinkage, "f", &M);
+ return F;
+}
+Function *RandomIRBuilder::createFunctionDeclaration(Module &M) {
+ return createFunctionDeclaration(
+ M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
+}
+
+Function *RandomIRBuilder::createFunctionDefinition(Module &M,
+ uint64_t ArgNum) {
+ Function *F = this->createFunctionDeclaration(M, ArgNum);
+
+ // TODO: Some arguments and a return value would probably be more
+ // interesting.
+ LLVMContext &Context = M.getContext();
+ DataLayout DL(&M);
+ BasicBlock *BB = BasicBlock::Create(Context, "BB", F);
+ Type *RetTy = F->getReturnType();
+ if (RetTy != Type::getVoidTy(Context)) {
+ Instruction *RetAlloca =
+ new AllocaInst(RetTy, DL.getAllocaAddrSpace(), "RP", BB);
+ Instruction *RetLoad = new LoadInst(RetTy, RetAlloca, "", BB);
+ ReturnInst::Create(Context, RetLoad, BB);
+ } else {
+ ReturnInst::Create(Context, BB);
+ }
+
+ return F;
+}
+Function *RandomIRBuilder::createFunctionDefinition(Module &M) {
+ return createFunctionDefinition(
+ M, uniform<uint64_t>(Rand, MinArgNum, MaxArgNum));
+}
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 6108ce09c289..be4a3ed79d88 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -329,8 +329,12 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::Swift: Out << "swiftcc"; break;
case CallingConv::SwiftTail: Out << "swifttailcc"; break;
case CallingConv::X86_INTR: Out << "x86_intrcc"; break;
- case CallingConv::HHVM: Out << "hhvmcc"; break;
- case CallingConv::HHVM_C: Out << "hhvm_ccc"; break;
+ case CallingConv::DUMMY_HHVM:
+ Out << "hhvmcc";
+ break;
+ case CallingConv::DUMMY_HHVM_C:
+ Out << "hhvm_ccc";
+ break;
case CallingConv::AMDGPU_VS: Out << "amdgpu_vs"; break;
case CallingConv::AMDGPU_LS: Out << "amdgpu_ls"; break;
case CallingConv::AMDGPU_HS: Out << "amdgpu_hs"; break;
@@ -338,6 +342,12 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
case CallingConv::AMDGPU_GS: Out << "amdgpu_gs"; break;
case CallingConv::AMDGPU_PS: Out << "amdgpu_ps"; break;
case CallingConv::AMDGPU_CS: Out << "amdgpu_cs"; break;
+ case CallingConv::AMDGPU_CS_Chain:
+ Out << "amdgpu_cs_chain";
+ break;
+ case CallingConv::AMDGPU_CS_ChainPreserve:
+ Out << "amdgpu_cs_chain_preserve";
+ break;
case CallingConv::AMDGPU_KERNEL: Out << "amdgpu_kernel"; break;
case CallingConv::AMDGPU_Gfx: Out << "amdgpu_gfx"; break;
}
@@ -421,8 +431,8 @@ static void PrintShuffleMask(raw_ostream &Out, Type *Ty, ArrayRef<int> Mask) {
bool FirstElt = true;
if (all_of(Mask, [](int Elt) { return Elt == 0; })) {
Out << "zeroinitializer";
- } else if (all_of(Mask, [](int Elt) { return Elt == UndefMaskElem; })) {
- Out << "undef";
+ } else if (all_of(Mask, [](int Elt) { return Elt == PoisonMaskElem; })) {
+ Out << "poison";
} else {
Out << "<";
for (int Elt : Mask) {
@@ -431,8 +441,8 @@ static void PrintShuffleMask(raw_ostream &Out, Type *Ty, ArrayRef<int> Mask) {
else
Out << ", ";
Out << "i32 ";
- if (Elt == UndefMaskElem)
- Out << "undef";
+ if (Elt == PoisonMaskElem)
+ Out << "poison";
else
Out << Elt;
}
@@ -585,16 +595,9 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
}
case Type::PointerTyID: {
PointerType *PTy = cast<PointerType>(Ty);
- if (PTy->isOpaque()) {
- OS << "ptr";
- if (unsigned AddressSpace = PTy->getAddressSpace())
- OS << " addrspace(" << AddressSpace << ')';
- return;
- }
- print(PTy->getNonOpaquePointerElementType(), OS);
+ OS << "ptr";
if (unsigned AddressSpace = PTy->getAddressSpace())
OS << " addrspace(" << AddressSpace << ')';
- OS << '*';
return;
}
case Type::ArrayTyID: {
@@ -1585,8 +1588,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
Out << CE->getOpcodeName();
WriteOptimizationInfo(Out, CE);
if (CE->isCompare())
- Out << ' ' << CmpInst::getPredicateName(
- static_cast<CmpInst::Predicate>(CE->getPredicate()));
+ Out << ' ' << static_cast<CmpInst::Predicate>(CE->getPredicate());
Out << " (";
std::optional<unsigned> InRangeOp;
@@ -3207,10 +3209,7 @@ void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
printTypeIdInfo(*TIdInfo);
// The AllocationType identifiers capture the profiled context behavior
- // reaching a specific static allocation site (possibly cloned). Thus
- // "notcoldandcold" implies there are multiple contexts which reach this site,
- // some of which are cold and some of which are not, and that need to
- // disambiguate via cloning or other context identification.
+ // reaching a specific static allocation site (possibly cloned).
auto AllocTypeName = [](uint8_t Type) -> const char * {
switch (Type) {
case (uint8_t)AllocationType::None:
@@ -3219,8 +3218,8 @@ void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
return "notcold";
case (uint8_t)AllocationType::Cold:
return "cold";
- case (uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold:
- return "notcoldandcold";
+ case (uint8_t)AllocationType::Hot:
+ return "hot";
}
llvm_unreachable("Unexpected alloc type");
};
@@ -4082,7 +4081,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
// Print out the compare instruction predicates
if (const CmpInst *CI = dyn_cast<CmpInst>(&I))
- Out << ' ' << CmpInst::getPredicateName(CI->getPredicate());
+ Out << ' ' << CI->getPredicate();
// Print out the atomicrmw operation
if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I))
diff --git a/llvm/lib/IR/AttributeImpl.h b/llvm/lib/IR/AttributeImpl.h
index 071c75e69377..78496786b0ae 100644
--- a/llvm/lib/IR/AttributeImpl.h
+++ b/llvm/lib/IR/AttributeImpl.h
@@ -266,6 +266,7 @@ public:
UWTableKind getUWTableKind() const;
AllocFnKind getAllocKind() const;
MemoryEffects getMemoryEffects() const;
+ FPClassTest getNoFPClass() const;
std::string getAsString(bool InAttrGrp) const;
Type *getAttributeType(Attribute::AttrKind Kind) const;
diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index 8c989c464551..3d89d18e5822 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
@@ -216,6 +217,11 @@ Attribute Attribute::getWithMemoryEffects(LLVMContext &Context,
return get(Context, Memory, ME.toIntValue());
}
+Attribute Attribute::getWithNoFPClass(LLVMContext &Context,
+ FPClassTest ClassMask) {
+ return get(Context, NoFPClass, ClassMask);
+}
+
Attribute
Attribute::getWithAllocSizeArgs(LLVMContext &Context, unsigned ElemSizeArg,
const std::optional<unsigned> &NumElemsArg) {
@@ -396,6 +402,12 @@ MemoryEffects Attribute::getMemoryEffects() const {
return MemoryEffects::createFromIntValue(pImpl->getValueAsInt());
}
+FPClassTest Attribute::getNoFPClass() const {
+ assert(hasAttribute(Attribute::NoFPClass) &&
+ "Can only call getNoFPClass() on nofpclass attribute");
+ return static_cast<FPClassTest>(pImpl->getValueAsInt());
+}
+
static const char *getModRefStr(ModRefInfo MR) {
switch (MR) {
case ModRefInfo::NoModRef:
@@ -511,7 +523,7 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
// Print access kind for "other" as the default access kind. This way it
// will apply to any new location kinds that get split out of "other".
- ModRefInfo OtherMR = ME.getModRef(MemoryEffects::Other);
+ ModRefInfo OtherMR = ME.getModRef(IRMemLocation::Other);
if (OtherMR != ModRefInfo::NoModRef || ME.getModRef() == OtherMR) {
First = false;
OS << getModRefStr(OtherMR);
@@ -527,13 +539,13 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
First = false;
switch (Loc) {
- case MemoryEffects::ArgMem:
+ case IRMemLocation::ArgMem:
OS << "argmem: ";
break;
- case MemoryEffects::InaccessibleMem:
+ case IRMemLocation::InaccessibleMem:
OS << "inaccessiblemem: ";
break;
- case MemoryEffects::Other:
+ case IRMemLocation::Other:
llvm_unreachable("This is represented as the default access kind");
}
OS << getModRefStr(MR);
@@ -543,6 +555,13 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
return Result;
}
+ if (hasAttribute(Attribute::NoFPClass)) {
+ std::string Result = "nofpclass";
+ raw_string_ostream OS(Result);
+ OS << getNoFPClass();
+ return Result;
+ }
+
// Convert target-dependent attributes to strings of the form:
//
// "kind"
@@ -840,6 +859,10 @@ MemoryEffects AttributeSet::getMemoryEffects() const {
return SetNode ? SetNode->getMemoryEffects() : MemoryEffects::unknown();
}
+FPClassTest AttributeSet::getNoFPClass() const {
+ return SetNode ? SetNode->getNoFPClass() : fcNone;
+}
+
std::string AttributeSet::getAsString(bool InAttrGrp) const {
return SetNode ? SetNode->getAsString(InAttrGrp) : "";
}
@@ -1024,6 +1047,12 @@ MemoryEffects AttributeSetNode::getMemoryEffects() const {
return MemoryEffects::unknown();
}
+FPClassTest AttributeSetNode::getNoFPClass() const {
+ if (auto A = findEnumAttribute(Attribute::NoFPClass))
+ return A->getNoFPClass();
+ return fcNone;
+}
+
std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
std::string Str;
for (iterator I = begin(), E = end(); I != E; ++I) {
@@ -1560,6 +1589,14 @@ AttributeList::getParamDereferenceableOrNullBytes(unsigned Index) const {
return getParamAttrs(Index).getDereferenceableOrNullBytes();
}
+FPClassTest AttributeList::getRetNoFPClass() const {
+ return getRetAttrs().getNoFPClass();
+}
+
+FPClassTest AttributeList::getParamNoFPClass(unsigned Index) const {
+ return getParamAttrs(Index).getNoFPClass();
+}
+
UWTableKind AttributeList::getUWTableKind() const {
return getFnAttrs().getUWTableKind();
}
@@ -1803,6 +1840,13 @@ AttrBuilder &AttrBuilder::addMemoryAttr(MemoryEffects ME) {
return addRawIntAttr(Attribute::Memory, ME.toIntValue());
}
+AttrBuilder &AttrBuilder::addNoFPClassAttr(FPClassTest Mask) {
+ if (Mask == fcNone)
+ return *this;
+
+ return addRawIntAttr(Attribute::NoFPClass, Mask);
+}
+
AttrBuilder &AttrBuilder::addAllocKindAttr(AllocFnKind Kind) {
return addRawIntAttr(Attribute::AllocKind, static_cast<uint64_t>(Kind));
}
@@ -1885,6 +1929,16 @@ bool AttrBuilder::operator==(const AttrBuilder &B) const {
// AttributeFuncs Function Defintions
//===----------------------------------------------------------------------===//
+/// Returns true if this is a type legal for the 'nofpclass' attribute. This
+/// follows the same type rules as FPMathOperator.
+///
+/// TODO: Consider relaxing to any FP type struct fields.
+bool AttributeFuncs::isNoFPClassCompatibleType(Type *Ty) {
+ while (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty))
+ Ty = ArrTy->getElementType();
+ return Ty->isFPOrFPVectorTy();
+}
+
/// Which attributes cannot be applied to a type.
AttributeMask AttributeFuncs::typeIncompatible(Type *Ty,
AttributeSafetyKind ASK) {
@@ -1926,6 +1980,11 @@ AttributeMask AttributeFuncs::typeIncompatible(Type *Ty,
Incompatible.addAttribute(Attribute::Alignment);
}
+ if (ASK & ASK_SAFE_TO_DROP) {
+ if (!isNoFPClassCompatibleType(Ty))
+ Incompatible.addAttribute(Attribute::NoFPClass);
+ }
+
// Some attributes can apply to all "values" but there are no `void` values.
if (Ty->isVoidTy()) {
if (ASK & ASK_SAFE_TO_DROP)
@@ -1943,6 +2002,41 @@ AttributeMask AttributeFuncs::getUBImplyingAttributes() {
return AM;
}
+/// Callees with dynamic denormal modes are compatible with any caller mode.
+static bool denormModeCompatible(DenormalMode CallerMode,
+ DenormalMode CalleeMode) {
+ if (CallerMode == CalleeMode || CalleeMode == DenormalMode::getDynamic())
+ return true;
+
+ // If they don't exactly match, it's OK if the mismatched component is
+ // dynamic.
+ if (CalleeMode.Input == CallerMode.Input &&
+ CalleeMode.Output == DenormalMode::Dynamic)
+ return true;
+
+ if (CalleeMode.Output == CallerMode.Output &&
+ CalleeMode.Input == DenormalMode::Dynamic)
+ return true;
+ return false;
+}
+
+static bool checkDenormMode(const Function &Caller, const Function &Callee) {
+ DenormalMode CallerMode = Caller.getDenormalModeRaw();
+ DenormalMode CalleeMode = Callee.getDenormalModeRaw();
+
+ if (denormModeCompatible(CallerMode, CalleeMode)) {
+ DenormalMode CallerModeF32 = Caller.getDenormalModeF32Raw();
+ DenormalMode CalleeModeF32 = Callee.getDenormalModeF32Raw();
+ if (CallerModeF32 == DenormalMode::getInvalid())
+ CallerModeF32 = CallerMode;
+ if (CalleeModeF32 == DenormalMode::getInvalid())
+ CalleeModeF32 = CalleeMode;
+ return denormModeCompatible(CallerModeF32, CalleeModeF32);
+ }
+
+ return false;
+}
+
template<typename AttrClass>
static bool isEqual(const Function &Caller, const Function &Callee) {
return Caller.getFnAttribute(AttrClass::getKind()) ==
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7b9c55ff30a5..71b5722925a1 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -13,10 +13,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/AutoUpgrade.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -26,15 +29,26 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
+#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Regex.h"
+#include "llvm/TargetParser/Triple.h"
#include <cstring>
+
using namespace llvm;
+static cl::opt<bool>
+ DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
+ cl::desc("Disable autoupgrade of debug info"));
+
static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
@@ -578,6 +592,71 @@ static bool UpgradeX86IntrinsicFunction(Function *F, StringRef Name,
return false;
}
+static Intrinsic::ID ShouldUpgradeNVPTXBF16Intrinsic(StringRef Name) {
+ return StringSwitch<Intrinsic::ID>(Name)
+ .Case("abs.bf16", Intrinsic::nvvm_abs_bf16)
+ .Case("abs.bf16x2", Intrinsic::nvvm_abs_bf16x2)
+ .Case("fma.rn.bf16", Intrinsic::nvvm_fma_rn_bf16)
+ .Case("fma.rn.bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
+ .Case("fma.rn.ftz_bf16", Intrinsic::nvvm_fma_rn_ftz_bf16)
+ .Case("fma.rn.ftz.bf16x2", Intrinsic::nvvm_fma_rn_ftz_bf16x2)
+ .Case("fma.rn.ftz.relu.bf16", Intrinsic::nvvm_fma_rn_ftz_relu_bf16)
+ .Case("fma.rn.ftz.relu.bf16x2", Intrinsic::nvvm_fma_rn_ftz_relu_bf16x2)
+ .Case("fma.rn.ftz_sat.bf16", Intrinsic::nvvm_fma_rn_ftz_sat_bf16)
+ .Case("fma.rn.ftz_sat.bf16x2", Intrinsic::nvvm_fma_rn_ftz_sat_bf16x2)
+ .Case("fma.rn.relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
+ .Case("fma.rn.relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
+ .Case("fma.rn.sat.bf16", Intrinsic::nvvm_fma_rn_sat_bf16)
+ .Case("fma.rn.sat.bf16x2", Intrinsic::nvvm_fma_rn_sat_bf16x2)
+ .Case("fmax.bf16", Intrinsic::nvvm_fmax_bf16)
+ .Case("fmax.bf16x2", Intrinsic::nvvm_fmax_bf16x2)
+ .Case("fmax.ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
+ .Case("fmax.ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
+ .Case("fmax.ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
+ .Case("fmax.ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
+ .Case("fmax.ftz.nan.xorsign.abs.bf16",
+ Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
+ .Case("fmax.ftz.nan.xorsign.abs.bf16x2",
+ Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
+ .Case("fmax.ftz.xorsign.abs.bf16",
+ Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
+ .Case("fmax.ftz.xorsign.abs.bf16x2",
+ Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
+ .Case("fmax.nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
+ .Case("fmax.nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
+ .Case("fmax.nan.xorsign.abs.bf16",
+ Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
+ .Case("fmax.nan.xorsign.abs.bf16x2",
+ Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
+ .Case("fmax.xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
+ .Case("fmax.xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
+ .Case("fmin.bf16", Intrinsic::nvvm_fmin_bf16)
+ .Case("fmin.bf16x2", Intrinsic::nvvm_fmin_bf16x2)
+ .Case("fmin.ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
+ .Case("fmin.ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
+ .Case("fmin.ftz.nan_bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
+ .Case("fmin.ftz.nan_bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
+ .Case("fmin.ftz.nan.xorsign.abs.bf16",
+ Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
+ .Case("fmin.ftz.nan.xorsign.abs.bf16x2",
+ Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
+ .Case("fmin.ftz.xorsign.abs.bf16",
+ Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
+ .Case("fmin.ftz.xorsign.abs.bf16x2",
+ Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
+ .Case("fmin.nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
+ .Case("fmin.nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
+ .Case("fmin.nan.xorsign.abs.bf16",
+ Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
+ .Case("fmin.nan.xorsign.abs.bf16x2",
+ Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
+ .Case("fmin.xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
+ .Case("fmin.xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
+ .Case("neg.bf16", Intrinsic::nvvm_neg_bf16)
+ .Case("neg.bf16x2", Intrinsic::nvvm_neg_bf16x2)
+ .Default(Intrinsic::not_intrinsic);
+}
+
static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
assert(F && "Illegal to upgrade a non-existent Function.");
@@ -802,10 +881,14 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "arm.mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
Name == "arm.mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
Name == "arm.mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
- Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
+ Name ==
+ "arm.mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
+ Name == "arm.mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
Name == "arm.mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
Name == "arm.mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
- Name == "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
+ Name ==
+ "arm.mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
+ Name == "arm.mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
Name == "arm.cde.vcx1q.predicated.v2i64.v4i1" ||
Name == "arm.cde.vcx1qa.predicated.v2i64.v4i1" ||
Name == "arm.cde.vcx2q.predicated.v2i64.v4i1" ||
@@ -814,16 +897,25 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
Name == "arm.cde.vcx3qa.predicated.v2i64.v4i1")
return true;
- if (Name == "amdgcn.alignbit") {
+ if (Name.startswith("amdgcn."))
+ Name = Name.substr(7); // Strip off "amdgcn."
+
+ if (Name == "alignbit") {
// Target specific intrinsic became redundant
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::fshr,
{F->getReturnType()});
return true;
}
+ if (Name.startswith("atomic.inc") || Name.startswith("atomic.dec")) {
+ // This was replaced with atomicrmw uinc_wrap and udec_wrap, so there's no
+ // new declaration.
+ NewFn = nullptr;
+ return true;
+ }
+
break;
}
-
case 'c': {
if (Name.startswith("ctlz.") && F->arg_size() == 1) {
rename(F);
@@ -840,6 +932,11 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
break;
}
case 'd': {
+ if (Name == "dbg.addr") {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
+ return true;
+ }
if (Name == "dbg.value" && F->arg_size() == 4) {
rename(F);
NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::dbg_value);
@@ -1051,7 +1148,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
{F->getReturnType()});
return true;
}
-
+ IID = ShouldUpgradeNVPTXBF16Intrinsic(Name);
+ if (IID != Intrinsic::not_intrinsic &&
+ !F->getReturnType()->getScalarType()->isBFloatTy()) {
+ NewFn = nullptr;
+ return true;
+ }
// The following nvvm intrinsics correspond exactly to an LLVM idiom, but
// not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
//
@@ -1107,6 +1209,87 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
break;
+ case 'r':
+ if (Name == "riscv.aes32dsi" &&
+ !F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_aes32dsi);
+ return true;
+ }
+ if (Name == "riscv.aes32dsmi" &&
+ !F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_aes32dsmi);
+ return true;
+ }
+ if (Name == "riscv.aes32esi" &&
+ !F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_aes32esi);
+ return true;
+ }
+ if (Name == "riscv.aes32esmi" &&
+ !F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_aes32esmi);
+ return true;
+ }
+ if (Name.startswith("riscv.sm4ks") &&
+ (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
+ F->getFunctionType()->getReturnType()->isIntegerTy(64))) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_sm4ks);
+ return true;
+ }
+ if (Name.startswith("riscv.sm4ed") &&
+ (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
+ F->getFunctionType()->getReturnType()->isIntegerTy(64))) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_sm4ed);
+ return true;
+ }
+ if (Name.startswith("riscv.sha256sig0") &&
+ F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::riscv_sha256sig0);
+ return true;
+ }
+ if (Name.startswith("riscv.sha256sig1") &&
+ F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::riscv_sha256sig1);
+ return true;
+ }
+ if (Name.startswith("riscv.sha256sum0") &&
+ F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::riscv_sha256sum0);
+ return true;
+ }
+ if (Name.startswith("riscv.sha256sum1") &&
+ F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(),
+ Intrinsic::riscv_sha256sum1);
+ return true;
+ }
+ if (Name.startswith("riscv.sm3p0") &&
+ F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_sm3p0);
+ return true;
+ }
+ if (Name.startswith("riscv.sm3p1") &&
+ F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::riscv_sm3p1);
+ return true;
+ }
+ break;
+
case 's':
if (Name == "stackprotectorcheck") {
NewFn = nullptr;
@@ -1125,6 +1308,40 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
break;
}
+ case 'w':
+ if (Name.startswith("wasm.fma.")) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::wasm_relaxed_madd, F->getReturnType());
+ return true;
+ }
+ if (Name.startswith("wasm.fms.")) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::wasm_relaxed_nmadd, F->getReturnType());
+ return true;
+ }
+ if (Name.startswith("wasm.laneselect.")) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::wasm_relaxed_laneselect,
+ F->getReturnType());
+ return true;
+ }
+ if (Name == "wasm.dot.i8x16.i7x16.signed") {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed);
+ return true;
+ }
+ if (Name == "wasm.dot.i8x16.i7x16.add.signed") {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(
+ F->getParent(), Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed);
+ return true;
+ }
+ break;
+
case 'x':
if (UpgradeX86IntrinsicFunction(F, Name, NewFn))
return true;
@@ -1994,10 +2211,14 @@ static Value *UpgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
- Name == "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
+ Name ==
+ "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
+ Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
- Name == "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
+ Name ==
+ "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
+ Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
@@ -2062,6 +2283,38 @@ static Value *UpgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F,
llvm_unreachable("Unknown function for ARM CallBase upgrade.");
}
+static Value *UpgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI,
+ Function *F, IRBuilder<> &Builder) {
+ const bool IsInc = Name.startswith("atomic.inc.");
+ if (IsInc || Name.startswith("atomic.dec.")) {
+ if (CI->getNumOperands() != 6) // Malformed bitcode.
+ return nullptr;
+
+ AtomicRMWInst::BinOp RMWOp =
+ IsInc ? AtomicRMWInst::UIncWrap : AtomicRMWInst::UDecWrap;
+
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Val = CI->getArgOperand(1);
+ ConstantInt *OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
+ ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
+
+ AtomicOrdering Order = AtomicOrdering::SequentiallyConsistent;
+ if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
+ Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
+ if (Order == AtomicOrdering::NotAtomic ||
+ Order == AtomicOrdering::Unordered)
+ Order = AtomicOrdering::SequentiallyConsistent;
+
+ AtomicRMWInst *RMW = Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order);
+
+ if (!VolatileArg || !VolatileArg->isZero())
+ RMW->setVolatile(true);
+ return RMW;
+ }
+
+ llvm_unreachable("Unknown function for AMDGPU intrinsic upgrade.");
+}
+
/// Upgrade a call to an old intrinsic. All argument and return casting must be
/// provided to seamlessly integrate with existing context.
void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
@@ -2092,9 +2345,11 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
bool IsARM = Name.startswith("arm.");
if (IsARM)
Name = Name.substr(4);
+ bool IsAMDGCN = Name.startswith("amdgcn.");
+ if (IsAMDGCN)
+ Name = Name.substr(7);
if (IsX86 && Name.startswith("sse4a.movnt.")) {
- Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;
Elts.push_back(
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
@@ -2112,7 +2367,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
StoreInst *SI = Builder.CreateAlignedStore(Extract, Addr, Align(1));
- SI->setMetadata(M->getMDKindID("nontemporal"), Node);
+ SI->setMetadata(LLVMContext::MD_nontemporal, Node);
// Remove intrinsic.
CI->eraseFromParent();
@@ -2121,7 +2376,6 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
if (IsX86 && (Name.startswith("avx.movnt.") ||
Name.startswith("avx512.storent."))) {
- Module *M = F->getParent();
SmallVector<Metadata *, 1> Elts;
Elts.push_back(
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
@@ -2137,7 +2391,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
StoreInst *SI = Builder.CreateAlignedStore(
Arg1, BC,
Align(Arg1->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
- SI->setMetadata(M->getMDKindID("nontemporal"), Node);
+ SI->setMetadata(LLVMContext::MD_nontemporal, Node);
// Remove intrinsic.
CI->eraseFromParent();
@@ -3465,7 +3719,6 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
} else if (IsX86 && Name.startswith("avx512.cvtmask2")) {
Rep = UpgradeMaskToInt(Builder, *CI);
} else if (IsX86 && Name.endswith(".movntdqa")) {
- Module *M = F->getParent();
MDNode *Node = MDNode::get(
C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
@@ -3477,7 +3730,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
LoadInst *LI = Builder.CreateAlignedLoad(
CI->getType(), BC,
Align(CI->getType()->getPrimitiveSizeInBits().getFixedValue() / 8));
- LI->setMetadata(M->getMDKindID("nontemporal"), Node);
+ LI->setMetadata(LLVMContext::MD_nontemporal, Node);
Rep = LI;
} else if (IsX86 && (Name.startswith("fma.vfmadd.") ||
Name.startswith("fma.vfmsub.") ||
@@ -3907,13 +4160,38 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
{Arg->getType()}),
Arg, "ctpop");
Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
- } else if (IsNVVM && Name == "h2f") {
- Rep = Builder.CreateCall(Intrinsic::getDeclaration(
+ } else if (IsNVVM) {
+ if (Name == "h2f") {
+ Rep =
+ Builder.CreateCall(Intrinsic::getDeclaration(
F->getParent(), Intrinsic::convert_from_fp16,
{Builder.getFloatTy()}),
CI->getArgOperand(0), "h2f");
+ } else {
+ Intrinsic::ID IID = ShouldUpgradeNVPTXBF16Intrinsic(Name);
+ if (IID != Intrinsic::not_intrinsic &&
+ !F->getReturnType()->getScalarType()->isBFloatTy()) {
+ rename(F);
+ NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
+ SmallVector<Value *, 2> Args;
+ for (size_t I = 0; I < NewFn->arg_size(); ++I) {
+ Value *Arg = CI->getArgOperand(I);
+ Type *OldType = Arg->getType();
+ Type *NewType = NewFn->getArg(I)->getType();
+ Args.push_back((OldType->isIntegerTy() &&
+ NewType->getScalarType()->isBFloatTy())
+ ? Builder.CreateBitCast(Arg, NewType)
+ : Arg);
+ }
+ Rep = Builder.CreateCall(NewFn, Args);
+ if (F->getReturnType()->isIntegerTy())
+ Rep = Builder.CreateBitCast(Rep, F->getReturnType());
+ }
+ }
} else if (IsARM) {
Rep = UpgradeARMIntrinsicCall(Name, CI, F, Builder);
+ } else if (IsAMDGCN) {
+ Rep = UpgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
} else {
llvm_unreachable("Unknown function for CallBase upgrade.");
}
@@ -4120,7 +4398,20 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
break;
- case Intrinsic::dbg_value:
+ case Intrinsic::dbg_value: {
+ StringRef Name = F->getName();
+ Name = Name.substr(5); // Strip llvm.
+ // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
+ if (Name.startswith("dbg.addr")) {
+ DIExpression *Expr = cast<DIExpression>(
+ cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
+ Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
+ NewCall =
+ Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
+ MetadataAsValue::get(C, Expr)});
+ break;
+ }
+
// Upgrade from the old version that had an extra offset argument.
assert(CI->arg_size() == 4);
// Drop nonzero offsets instead of attempting to upgrade them.
@@ -4133,6 +4424,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
}
CI->eraseFromParent();
return;
+ }
case Intrinsic::ptr_annotation:
// Upgrade from versions that lacked the annotation attribute argument.
@@ -4167,6 +4459,60 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
CI->eraseFromParent();
return;
+ case Intrinsic::riscv_aes32dsi:
+ case Intrinsic::riscv_aes32dsmi:
+ case Intrinsic::riscv_aes32esi:
+ case Intrinsic::riscv_aes32esmi:
+ case Intrinsic::riscv_sm4ks:
+ case Intrinsic::riscv_sm4ed: {
+ // The last argument to these intrinsics used to be i8 and changed to i32.
+ // The type overload for sm4ks and sm4ed was removed.
+ Value *Arg2 = CI->getArgOperand(2);
+ if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
+ return;
+
+ Value *Arg0 = CI->getArgOperand(0);
+ Value *Arg1 = CI->getArgOperand(1);
+ if (CI->getType()->isIntegerTy(64)) {
+ Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
+ Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
+ }
+
+ Arg2 = ConstantInt::get(Type::getInt32Ty(C),
+ cast<ConstantInt>(Arg2)->getZExtValue());
+
+ NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
+ Value *Res = NewCall;
+ if (Res->getType() != CI->getType())
+ Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
+ NewCall->takeName(CI);
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+ return;
+ }
+ case Intrinsic::riscv_sha256sig0:
+ case Intrinsic::riscv_sha256sig1:
+ case Intrinsic::riscv_sha256sum0:
+ case Intrinsic::riscv_sha256sum1:
+ case Intrinsic::riscv_sm3p0:
+ case Intrinsic::riscv_sm3p1: {
+ // The last argument to these intrinsics used to be i8 and changed to i32.
+ // The type overload for sm4ks and sm4ed was removed.
+ if (!CI->getType()->isIntegerTy(64))
+ return;
+
+ Value *Arg =
+ Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
+
+ NewCall = Builder.CreateCall(NewFn, Arg);
+ Value *Res =
+ Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
+ NewCall->takeName(CI);
+ CI->replaceAllUsesWith(Res);
+ CI->eraseFromParent();
+ return;
+ }
+
case Intrinsic::x86_xop_vfrcz_ss:
case Intrinsic::x86_xop_vfrcz_sd:
NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
@@ -4384,12 +4730,16 @@ void llvm::UpgradeCallsToIntrinsic(Function *F) {
}
MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
+ const unsigned NumOperands = MD.getNumOperands();
+ if (NumOperands == 0)
+ return &MD; // Invalid, punt to a verifier error.
+
// Check if the tag uses struct-path aware TBAA format.
- if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
+ if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
return &MD;
auto &Context = MD.getContext();
- if (MD.getNumOperands() == 3) {
+ if (NumOperands == 3) {
Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
MDNode *ScalarType = MDNode::get(Context, Elts);
// Create a MDNode <ScalarType, ScalarType, offset 0, const>
@@ -4450,6 +4800,9 @@ Constant *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) {
/// Check the debug info version number, if it is out-dated, drop the debug
/// info. Return true if module is modified.
bool llvm::UpgradeDebugInfo(Module &M) {
+ if (DisableAutoUpgradeDebugInfo)
+ return false;
+
unsigned Version = getDebugMetadataVersionFromModule(M);
if (Version == DEBUG_METADATA_VERSION) {
bool BrokenDebugInfo = false;
@@ -4889,9 +5242,10 @@ MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
Triple T(TT);
- // For AMDGPU we uprgrade older DataLayouts to include the default globals
- // address space of 1.
- if (T.isAMDGPU() && !DL.contains("-G") && !DL.startswith("G")) {
+ // The only data layout upgrades needed for pre-GCN are setting the address
+ // space of globals to 1.
+ if (T.isAMDGPU() && !T.isAMDGCN() && !DL.contains("-G") &&
+ !DL.startswith("G")) {
return DL.empty() ? std::string("G1") : (DL + "-G1").str();
}
@@ -4904,6 +5258,31 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
}
std::string Res = DL.str();
+ // AMDGCN data layout upgrades.
+ if (T.isAMDGCN()) {
+ // Define address spaces for constants.
+ if (!DL.contains("-G") && !DL.starts_with("G"))
+ Res.append(Res.empty() ? "G1" : "-G1");
+
+ // Add missing non-integral declarations.
+ // This goes before adding new address spaces to prevent incoherent string
+ // values.
+ if (!DL.contains("-ni") && !DL.startswith("ni"))
+ Res.append("-ni:7:8");
+ // Update ni:7 to ni:7:8.
+ if (DL.ends_with("ni:7"))
+ Res.append(":8");
+
+ // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
+ // resources) An empty data layout has already been upgraded to G1 by now.
+ if (!DL.contains("-p7") && !DL.startswith("p7"))
+ Res.append("-p7:160:256:256:32");
+ if (!DL.contains("-p8") && !DL.startswith("p8"))
+ Res.append("-p8:128:128");
+
+ return Res;
+ }
+
if (!T.isX86())
return Res;
@@ -4958,7 +5337,6 @@ void llvm::UpgradeAttributes(AttrBuilder &B) {
}
void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
-
// clang.arc.attachedcall bundles are now required to have an operand.
// If they don't, it's okay to drop them entirely: when there is an operand,
// the "attachedcall" is meaningful and required, but without an operand,
diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp
index 63d363e2d082..14e1787c2b14 100644
--- a/llvm/lib/IR/BasicBlock.cpp
+++ b/llvm/lib/IR/BasicBlock.cpp
@@ -133,9 +133,8 @@ iplist<BasicBlock>::iterator BasicBlock::eraseFromParent() {
return getParent()->getBasicBlockList().erase(getIterator());
}
-void BasicBlock::moveBefore(BasicBlock *MovePos) {
- MovePos->getParent()->splice(MovePos->getIterator(), getParent(),
- getIterator());
+void BasicBlock::moveBefore(SymbolTableList<BasicBlock>::iterator MovePos) {
+ getParent()->splice(MovePos, getParent(), getIterator());
}
void BasicBlock::moveAfter(BasicBlock *MovePos) {
@@ -205,6 +204,15 @@ const CallInst *BasicBlock::getPostdominatingDeoptimizeCall() const {
return BB->getTerminatingDeoptimizeCall();
}
+const Instruction *BasicBlock::getFirstMayFaultInst() const {
+ if (InstList.empty())
+ return nullptr;
+ for (const Instruction &I : *this)
+ if (isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallBase>(I))
+ return &I;
+ return nullptr;
+}
+
const Instruction* BasicBlock::getFirstNonPHI() const {
for (const Instruction &I : *this)
if (!isa<PHINode>(I))
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index f84fe79b21be..4c3325063c09 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -111,29 +111,6 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
if (SrcTy == DestTy)
return V; // no-op cast
- // Check to see if we are casting a pointer to an aggregate to a pointer to
- // the first element. If so, return the appropriate GEP instruction.
- if (PointerType *PTy = dyn_cast<PointerType>(V->getType()))
- if (PointerType *DPTy = dyn_cast<PointerType>(DestTy))
- if (PTy->getAddressSpace() == DPTy->getAddressSpace() &&
- !PTy->isOpaque() && !DPTy->isOpaque() &&
- PTy->getNonOpaquePointerElementType()->isSized()) {
- SmallVector<Value*, 8> IdxList;
- Value *Zero =
- Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
- IdxList.push_back(Zero);
- Type *ElTy = PTy->getNonOpaquePointerElementType();
- while (ElTy && ElTy != DPTy->getNonOpaquePointerElementType()) {
- ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, (uint64_t)0);
- IdxList.push_back(Zero);
- }
-
- if (ElTy == DPTy->getNonOpaquePointerElementType())
- // This GEP is inbounds because all indices are zero.
- return ConstantExpr::getInBoundsGetElementPtr(
- PTy->getNonOpaquePointerElementType(), V, IdxList);
- }
-
// Handle casts from one vector constant to another. We know that the src
// and dest type have the same size (otherwise its an illegal cast).
if (VectorType *DestPTy = dyn_cast<VectorType>(DestTy)) {
@@ -593,17 +570,6 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
if (isa<UndefValue>(V1) && NotPoison(V2)) return V2;
if (isa<UndefValue>(V2) && NotPoison(V1)) return V1;
- if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
- if (TrueVal->getOpcode() == Instruction::Select)
- if (TrueVal->getOperand(0) == Cond)
- return ConstantExpr::getSelect(Cond, TrueVal->getOperand(1), V2);
- }
- if (ConstantExpr *FalseVal = dyn_cast<ConstantExpr>(V2)) {
- if (FalseVal->getOpcode() == Instruction::Select)
- if (FalseVal->getOperand(0) == Cond)
- return ConstantExpr::getSelect(Cond, V1, FalseVal->getOperand(2));
- }
-
return nullptr;
}
@@ -721,9 +687,9 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
ElementCount::get(MaskNumElts, isa<ScalableVectorType>(V1VTy));
Type *EltTy = V1VTy->getElementType();
- // Undefined shuffle mask -> undefined value.
- if (all_of(Mask, [](int Elt) { return Elt == UndefMaskElem; })) {
- return UndefValue::get(VectorType::get(EltTy, MaskEltCount));
+ // Poison shuffle mask -> poison value.
+ if (all_of(Mask, [](int Elt) { return Elt == PoisonMaskElem; })) {
+ return PoisonValue::get(VectorType::get(EltTy, MaskEltCount));
}
// If the mask is all zeros this is a splat, no need to go through all
@@ -1053,7 +1019,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
isa<GlobalValue>(CE1->getOperand(0))) {
GlobalValue *GV = cast<GlobalValue>(CE1->getOperand(0));
- MaybeAlign GVAlign;
+ Align GVAlign; // defaults to 1
if (Module *TheModule = GV->getParent()) {
const DataLayout &DL = TheModule->getDataLayout();
@@ -1070,17 +1036,13 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, Constant *C1,
// appropriate defaults
if (isa<Function>(GV) && !DL.getFunctionPtrAlign())
GVAlign = Align(4);
- } else if (isa<Function>(GV)) {
- // Without a datalayout we have to assume the worst case: that the
- // function pointer isn't aligned at all.
- GVAlign = std::nullopt;
} else if (isa<GlobalVariable>(GV)) {
- GVAlign = cast<GlobalVariable>(GV)->getAlign();
+ GVAlign = cast<GlobalVariable>(GV)->getAlign().valueOrOne();
}
- if (GVAlign && *GVAlign > 1) {
+ if (GVAlign > 1) {
unsigned DstWidth = CI2->getType()->getBitWidth();
- unsigned SrcWidth = std::min(DstWidth, Log2(*GVAlign));
+ unsigned SrcWidth = std::min(DstWidth, Log2(GVAlign));
APInt BitsNotSet(APInt::getLowBitsSet(DstWidth, SrcWidth));
// If checking bits we know are clear, return zero.
@@ -1945,13 +1907,13 @@ static bool isInBoundsIndices(ArrayRef<IndexTy> Idxs) {
static bool isIndexInRangeOfArrayType(uint64_t NumElements,
const ConstantInt *CI) {
// We cannot bounds check the index if it doesn't fit in an int64_t.
- if (CI->getValue().getMinSignedBits() > 64)
+ if (CI->getValue().getSignificantBits() > 64)
return false;
// A negative index or an index past the end of our sequential type is
// considered out-of-range.
int64_t IndexVal = CI->getSExtValue();
- if (IndexVal < 0 || (NumElements > 0 && (uint64_t)IndexVal >= NumElements))
+ if (IndexVal < 0 || (IndexVal != 0 && (uint64_t)IndexVal >= NumElements))
return false;
// Otherwise, it is in-range.
@@ -2038,7 +2000,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
if (Idxs.empty()) return C;
Type *GEPTy = GetElementPtrInst::getGEPReturnType(
- PointeeTy, C, ArrayRef((Value *const *)Idxs.data(), Idxs.size()));
+ C, ArrayRef((Value *const *)Idxs.data(), Idxs.size()));
if (isa<PoisonValue>(C))
return PoisonValue::get(GEPTy);
@@ -2048,11 +2010,6 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
return InBounds ? PoisonValue::get(GEPTy) : UndefValue::get(GEPTy);
auto IsNoOp = [&]() {
- // For non-opaque pointers having multiple indices will change the result
- // type of the GEP.
- if (!C->getType()->getScalarType()->isOpaquePointerTy() && Idxs.size() != 1)
- return false;
-
// Avoid losing inrange information.
if (InRangeIndex)
return false;
@@ -2101,41 +2058,11 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
}
}
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
if (auto *GEP = dyn_cast<GEPOperator>(CE))
if (Constant *C = foldGEPOfGEP(GEP, PointeeTy, InBounds, Idxs))
return C;
- // Attempt to fold casts to the same type away. For example, folding:
- //
- // i32* getelementptr ([2 x i32]* bitcast ([3 x i32]* %X to [2 x i32]*),
- // i64 0, i64 0)
- // into:
- //
- // i32* getelementptr ([3 x i32]* %X, i64 0, i64 0)
- //
- // Don't fold if the cast is changing address spaces.
- Constant *Idx0 = cast<Constant>(Idxs[0]);
- if (CE->isCast() && Idxs.size() > 1 && Idx0->isNullValue()) {
- PointerType *SrcPtrTy =
- dyn_cast<PointerType>(CE->getOperand(0)->getType());
- PointerType *DstPtrTy = dyn_cast<PointerType>(CE->getType());
- if (SrcPtrTy && DstPtrTy && !SrcPtrTy->isOpaque() &&
- !DstPtrTy->isOpaque()) {
- ArrayType *SrcArrayTy =
- dyn_cast<ArrayType>(SrcPtrTy->getNonOpaquePointerElementType());
- ArrayType *DstArrayTy =
- dyn_cast<ArrayType>(DstPtrTy->getNonOpaquePointerElementType());
- if (SrcArrayTy && DstArrayTy
- && SrcArrayTy->getElementType() == DstArrayTy->getElementType()
- && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace())
- return ConstantExpr::getGetElementPtr(SrcArrayTy,
- (Constant *)CE->getOperand(0),
- Idxs, InBounds, InRangeIndex);
- }
- }
- }
-
// Check to see if any array indices are not within the corresponding
// notional array or vector bounds. If so, try to determine if they can be
// factored out into preceding dimensions.
@@ -2202,11 +2129,17 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
Unknown = true;
continue;
}
+
+ // Determine the number of elements in our sequential type.
+ uint64_t NumElements = STy->getArrayNumElements();
+ if (!NumElements) {
+ Unknown = true;
+ continue;
+ }
+
// It's out of range, but we can factor it into the prior
// dimension.
NewIdxs.resize(Idxs.size());
- // Determine the number of elements in our sequential type.
- uint64_t NumElements = STy->getArrayNumElements();
// Expand the current index or the previous index to a vector from a scalar
// if necessary.
@@ -2280,7 +2213,8 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
// check for the "inbounds" property.
if (!Unknown && !InBounds)
if (auto *GV = dyn_cast<GlobalVariable>(C))
- if (!GV->hasExternalWeakLinkage() && isInBoundsIndices(Idxs))
+ if (!GV->hasExternalWeakLinkage() && GV->getValueType() == PointeeTy &&
+ isInBoundsIndices(Idxs))
return ConstantExpr::getGetElementPtr(PointeeTy, C, Idxs,
/*InBounds=*/true, InRangeIndex);
diff --git a/llvm/lib/IR/ConstantRange.cpp b/llvm/lib/IR/ConstantRange.cpp
index 0dbccaa1a66a..e9344a8815c0 100644
--- a/llvm/lib/IR/ConstantRange.cpp
+++ b/llvm/lib/IR/ConstantRange.cpp
@@ -481,8 +481,8 @@ unsigned ConstantRange::getMinSignedBits() const {
if (isEmptySet())
return 0;
- return std::max(getSignedMin().getMinSignedBits(),
- getSignedMax().getMinSignedBits());
+ return std::max(getSignedMin().getSignificantBits(),
+ getSignedMax().getSignificantBits());
}
ConstantRange ConstantRange::subtract(const APInt &Val) const {
@@ -816,8 +816,7 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
if (isUpperWrapped()) {
// If Upper is greater than or equal to MaxValue(DstTy), it covers the whole
// truncated range.
- if (Upper.getActiveBits() > DstTySize ||
- Upper.countTrailingOnes() == DstTySize)
+ if (Upper.getActiveBits() > DstTySize || Upper.countr_one() == DstTySize)
return getFull(DstTySize);
Union = ConstantRange(APInt::getMaxValue(DstTySize),Upper.trunc(DstTySize));
@@ -945,6 +944,7 @@ bool ConstantRange::isIntrinsicSupported(Intrinsic::ID IntrinsicID) {
case Intrinsic::smin:
case Intrinsic::smax:
case Intrinsic::abs:
+ case Intrinsic::ctlz:
return true;
default:
return false;
@@ -976,6 +976,12 @@ ConstantRange ConstantRange::intrinsic(Intrinsic::ID IntrinsicID,
assert(IntMinIsPoison->getBitWidth() == 1 && "Must be boolean");
return Ops[0].abs(IntMinIsPoison->getBoolValue());
}
+ case Intrinsic::ctlz: {
+ const APInt *ZeroIsPoison = Ops[1].getSingleElement();
+ assert(ZeroIsPoison && "Must be known (immarg)");
+ assert(ZeroIsPoison->getBitWidth() == 1 && "Must be boolean");
+ return Ops[0].ctlz(ZeroIsPoison->getBoolValue());
+ }
default:
assert(!isIntrinsicSupported(IntrinsicID) && "Shouldn't be supported");
llvm_unreachable("Unsupported intrinsic");
@@ -1089,6 +1095,20 @@ ConstantRange::multiply(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
return getEmpty();
+ if (const APInt *C = getSingleElement()) {
+ if (C->isOne())
+ return Other;
+ if (C->isAllOnes())
+ return ConstantRange(APInt::getZero(getBitWidth())).sub(Other);
+ }
+
+ if (const APInt *C = Other.getSingleElement()) {
+ if (C->isOne())
+ return *this;
+ if (C->isAllOnes())
+ return ConstantRange(APInt::getZero(getBitWidth())).sub(*this);
+ }
+
// Multiplication is signedness-independent. However different ranges can be
// obtained depending on how the input ranges are treated. These different
// ranges are all conservatively correct, but one might be better than the
@@ -1448,7 +1468,7 @@ ConstantRange::shl(const ConstantRange &Other) const {
if (RHS->uge(BW))
return getEmpty();
- unsigned EqualLeadingBits = (Min ^ Max).countLeadingZeros();
+ unsigned EqualLeadingBits = (Min ^ Max).countl_zero();
if (RHS->ule(EqualLeadingBits))
return getNonEmpty(Min << *RHS, (Max << *RHS) + 1);
@@ -1459,7 +1479,7 @@ ConstantRange::shl(const ConstantRange &Other) const {
APInt OtherMax = Other.getUnsignedMax();
// There's overflow!
- if (OtherMax.ugt(Max.countLeadingZeros()))
+ if (OtherMax.ugt(Max.countl_zero()))
return getFull();
// FIXME: implement the other tricky cases
@@ -1667,6 +1687,44 @@ ConstantRange ConstantRange::abs(bool IntMinIsPoison) const {
APIntOps::umax(-SMin, SMax) + 1);
}
+ConstantRange ConstantRange::ctlz(bool ZeroIsPoison) const {
+ if (isEmptySet())
+ return getEmpty();
+
+ APInt Zero = APInt::getZero(getBitWidth());
+ if (ZeroIsPoison && contains(Zero)) {
+ // ZeroIsPoison is set, and zero is contained. We discern three cases, in
+ // which a zero can appear:
+ // 1) Lower is zero, handling cases of kind [0, 1), [0, 2), etc.
+ // 2) Upper is zero, wrapped set, handling cases of kind [3, 0], etc.
+ // 3) Zero contained in a wrapped set, e.g., [3, 2), [3, 1), etc.
+
+ if (getLower().isZero()) {
+ if ((getUpper() - 1).isZero()) {
+ // We have in input interval of kind [0, 1). In this case we cannot
+ // really help but return empty-set.
+ return getEmpty();
+ }
+
+ // Compute the resulting range by excluding zero from Lower.
+ return ConstantRange(
+ APInt(getBitWidth(), (getUpper() - 1).countl_zero()),
+ APInt(getBitWidth(), (getLower() + 1).countl_zero() + 1));
+ } else if ((getUpper() - 1).isZero()) {
+ // Compute the resulting range by excluding zero from Upper.
+ return ConstantRange(Zero,
+ APInt(getBitWidth(), getLower().countl_zero() + 1));
+ } else {
+ return ConstantRange(Zero, APInt(getBitWidth(), getBitWidth()));
+ }
+ }
+
+ // Zero is either safe or not in the range. The output range is composed by
+ // the result of countLeadingZero of the two extremes.
+ return getNonEmpty(APInt(getBitWidth(), getUnsignedMax().countl_zero()),
+ APInt(getBitWidth(), getUnsignedMin().countl_zero() + 1));
+}
+
ConstantRange::OverflowResult ConstantRange::unsignedAddMayOverflow(
const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index a53671183f77..c69c7c095f78 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -547,8 +547,6 @@ void llvm::deleteConstant(Constant *C) {
delete static_cast<CastConstantExpr *>(C);
else if (isa<BinaryConstantExpr>(C))
delete static_cast<BinaryConstantExpr *>(C);
- else if (isa<SelectConstantExpr>(C))
- delete static_cast<SelectConstantExpr *>(C);
else if (isa<ExtractElementConstantExpr>(C))
delete static_cast<ExtractElementConstantExpr *>(C);
else if (isa<InsertElementConstantExpr>(C))
@@ -874,7 +872,10 @@ Constant *ConstantInt::getBool(Type *Ty, bool V) {
ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) {
// get an existing value or the insertion position
LLVMContextImpl *pImpl = Context.pImpl;
- std::unique_ptr<ConstantInt> &Slot = pImpl->IntConstants[V];
+ std::unique_ptr<ConstantInt> &Slot =
+ V.isZero() ? pImpl->IntZeroConstants[V.getBitWidth()]
+ : V.isOne() ? pImpl->IntOneConstants[V.getBitWidth()]
+ : pImpl->IntConstants[V];
if (!Slot) {
// Get the corresponding integer type for the bit width of the value.
IntegerType *ITy = IntegerType::get(Context, V.getBitWidth());
@@ -898,14 +899,6 @@ ConstantInt *ConstantInt::get(IntegerType *Ty, uint64_t V, bool isSigned) {
return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned));
}
-ConstantInt *ConstantInt::getSigned(IntegerType *Ty, int64_t V) {
- return get(Ty, V, true);
-}
-
-Constant *ConstantInt::getSigned(Type *Ty, int64_t V) {
- return get(Ty, V, true);
-}
-
Constant *ConstantInt::get(Type *Ty, const APInt& V) {
ConstantInt *C = get(Ty->getContext(), V);
assert(C->getType() == Ty->getScalarType() &&
@@ -1016,13 +1009,6 @@ Constant *ConstantFP::getZero(Type *Ty, bool Negative) {
return C;
}
-Constant *ConstantFP::getZeroValueForNegation(Type *Ty) {
- if (Ty->isFPOrFPVectorTy())
- return getNegativeZero(Ty);
-
- return Constant::getNullValue(Ty);
-}
-
// ConstantFP accessors.
ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
@@ -1485,8 +1471,6 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
return ConstantExpr::getCast(getOpcode(), Ops[0], Ty, OnlyIfReduced);
- case Instruction::Select:
- return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2], OnlyIfReducedTy);
case Instruction::InsertElement:
return ConstantExpr::getInsertElement(Ops[0], Ops[1], Ops[2],
OnlyIfReducedTy);
@@ -2242,21 +2226,6 @@ Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy,
bool OnlyIfReduced) {
assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) &&
"Invalid constantexpr addrspacecast!");
-
- // Canonicalize addrspacecasts between different pointer types by first
- // bitcasting the pointer type and then converting the address space.
- PointerType *SrcScalarTy = cast<PointerType>(C->getType()->getScalarType());
- PointerType *DstScalarTy = cast<PointerType>(DstTy->getScalarType());
- if (!SrcScalarTy->hasSameElementTypeAs(DstScalarTy)) {
- Type *MidTy = PointerType::getWithSamePointeeType(
- DstScalarTy, SrcScalarTy->getAddressSpace());
- if (VectorType *VT = dyn_cast<VectorType>(DstTy)) {
- // Handle vectors of pointers.
- MidTy = FixedVectorType::get(MidTy,
- cast<FixedVectorType>(VT)->getNumElements());
- }
- C = getBitCast(C, MidTy);
- }
return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy, OnlyIfReduced);
}
@@ -2275,22 +2244,9 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::URem:
- case Instruction::SRem:
assert(C1->getType()->isIntOrIntVectorTy() &&
"Tried to create an integer operation on a non-integer type!");
break;
- case Instruction::FAdd:
- case Instruction::FSub:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FRem:
- assert(C1->getType()->isFPOrFPVectorTy() &&
- "Tried to create a floating-point operation on a "
- "non-floating-point type!");
- break;
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
@@ -2398,24 +2354,6 @@ Constant *ConstantExpr::getAlignOf(Type* Ty) {
Type::getInt64Ty(Ty->getContext()));
}
-Constant *ConstantExpr::getOffsetOf(StructType* STy, unsigned FieldNo) {
- return getOffsetOf(STy, ConstantInt::get(Type::getInt32Ty(STy->getContext()),
- FieldNo));
-}
-
-Constant *ConstantExpr::getOffsetOf(Type* Ty, Constant *FieldNo) {
- // offsetof is implemented as: (i64) gep (Ty*)null, 0, FieldNo
- // Note that a non-inbounds gep is used, as null isn't within any object.
- Constant *GEPIdx[] = {
- ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0),
- FieldNo
- };
- Constant *GEP = getGetElementPtr(
- Ty, Constant::getNullValue(PointerType::getUnqual(Ty)), GEPIdx);
- return getPtrToInt(GEP,
- Type::getInt64Ty(Ty->getContext()));
-}
-
Constant *ConstantExpr::getCompare(unsigned short Predicate, Constant *C1,
Constant *C2, bool OnlyIfReduced) {
assert(C1->getType() == C2->getType() && "Op types should be identical!");
@@ -2438,56 +2376,28 @@ Constant *ConstantExpr::getCompare(unsigned short Predicate, Constant *C1,
}
}
-Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2,
- Type *OnlyIfReducedTy) {
- assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands");
-
- if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
- return SC; // Fold common cases
-
- if (OnlyIfReducedTy == V1->getType())
- return nullptr;
-
- Constant *ArgVec[] = { C, V1, V2 };
- ConstantExprKeyType Key(Instruction::Select, ArgVec);
-
- LLVMContextImpl *pImpl = C->getContext().pImpl;
- return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
-}
-
Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
ArrayRef<Value *> Idxs, bool InBounds,
std::optional<unsigned> InRangeIndex,
Type *OnlyIfReducedTy) {
- PointerType *OrigPtrTy = cast<PointerType>(C->getType()->getScalarType());
assert(Ty && "Must specify element type");
- assert(OrigPtrTy->isOpaqueOrPointeeTypeMatches(Ty));
+ assert(isSupportedGetElementPtr(Ty) && "Element type is unsupported!");
if (Constant *FC =
ConstantFoldGetElementPtr(Ty, C, InBounds, InRangeIndex, Idxs))
return FC; // Fold a few common cases.
+ assert(GetElementPtrInst::getIndexedType(Ty, Idxs) &&
+ "GEP indices invalid!");;
+
// Get the result type of the getelementptr!
- Type *DestTy = GetElementPtrInst::getIndexedType(Ty, Idxs);
- assert(DestTy && "GEP indices invalid!");
- unsigned AS = OrigPtrTy->getAddressSpace();
- Type *ReqTy = OrigPtrTy->isOpaque()
- ? PointerType::get(OrigPtrTy->getContext(), AS)
- : DestTy->getPointerTo(AS);
+ Type *ReqTy = GetElementPtrInst::getGEPReturnType(C, Idxs);
+ if (OnlyIfReducedTy == ReqTy)
+ return nullptr;
auto EltCount = ElementCount::getFixed(0);
- if (VectorType *VecTy = dyn_cast<VectorType>(C->getType()))
+ if (VectorType *VecTy = dyn_cast<VectorType>(ReqTy))
EltCount = VecTy->getElementCount();
- else
- for (auto *Idx : Idxs)
- if (VectorType *VecTy = dyn_cast<VectorType>(Idx->getType()))
- EltCount = VecTy->getElementCount();
-
- if (EltCount.isNonZero())
- ReqTy = VectorType::get(ReqTy, EltCount);
-
- if (OnlyIfReducedTy == ReqTy)
- return nullptr;
// Look up the constant in the table first to ensure uniqueness
std::vector<Constant*> ArgVec;
@@ -2644,8 +2554,7 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
assert(C->getType()->isIntOrIntVectorTy() &&
"Cannot NEG a nonintegral value!");
- return getSub(ConstantFP::getZeroValueForNegation(C->getType()),
- C, HasNUW, HasNSW);
+ return getSub(ConstantInt::get(C->getType(), 0), C, HasNUW, HasNSW);
}
Constant *ConstantExpr::getNot(Constant *C) {
@@ -2687,11 +2596,6 @@ Constant *ConstantExpr::getXor(Constant *C1, Constant *C2) {
return get(Instruction::Xor, C1, C2);
}
-Constant *ConstantExpr::getUMin(Constant *C1, Constant *C2) {
- Constant *Cmp = ConstantExpr::getICmp(CmpInst::ICMP_ULT, C1, C2);
- return getSelect(Cmp, C1, C2);
-}
-
Constant *ConstantExpr::getShl(Constant *C1, Constant *C2,
bool HasNUW, bool HasNSW) {
unsigned Flags = (HasNUW ? OverflowingBinaryOperator::NoUnsignedWrap : 0) |
@@ -3440,8 +3344,6 @@ Instruction *ConstantExpr::getAsInstruction(Instruction *InsertBefore) const {
case Instruction::AddrSpaceCast:
return CastInst::Create((Instruction::CastOps)getOpcode(), Ops[0],
getType(), "", InsertBefore);
- case Instruction::Select:
- return SelectInst::Create(Ops[0], Ops[1], Ops[2], "", InsertBefore);
case Instruction::InsertElement:
return InsertElementInst::Create(Ops[0], Ops[1], Ops[2], "", InsertBefore);
case Instruction::ExtractElement:
diff --git a/llvm/lib/IR/ConstantsContext.h b/llvm/lib/IR/ConstantsContext.h
index fbda443de7b2..6023216a5070 100644
--- a/llvm/lib/IR/ConstantsContext.h
+++ b/llvm/lib/IR/ConstantsContext.h
@@ -90,32 +90,6 @@ public:
}
};
-/// SelectConstantExpr - This class is private to Constants.cpp, and is used
-/// behind the scenes to implement select constant exprs.
-class SelectConstantExpr final : public ConstantExpr {
-public:
- SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3)
- : ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) {
- Op<0>() = C1;
- Op<1>() = C2;
- Op<2>() = C3;
- }
-
- // allocate space for exactly three operands
- void *operator new(size_t S) { return User::operator new(S, 3); }
- void operator delete(void *Ptr) { User::operator delete(Ptr); }
-
- /// Transparently provide more efficient getOperand methods.
- DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
- static bool classof(const ConstantExpr *CE) {
- return CE->getOpcode() == Instruction::Select;
- }
- static bool classof(const Value *V) {
- return isa<ConstantExpr>(V) && classof(cast<ConstantExpr>(V));
- }
-};
-
/// ExtractElementConstantExpr - This class is private to
/// Constants.cpp, and is used behind the scenes to implement
/// extractelement constant exprs.
@@ -280,11 +254,6 @@ struct OperandTraits<BinaryConstantExpr>
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BinaryConstantExpr, Value)
template <>
-struct OperandTraits<SelectConstantExpr>
- : public FixedNumOperandTraits<SelectConstantExpr, 3> {};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectConstantExpr, Value)
-
-template <>
struct OperandTraits<ExtractElementConstantExpr>
: public FixedNumOperandTraits<ExtractElementConstantExpr, 2> {};
DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementConstantExpr, Value)
@@ -523,8 +492,6 @@ public:
return new BinaryConstantExpr(Opcode, Ops[0], Ops[1],
SubclassOptionalData);
llvm_unreachable("Invalid ConstantExpr!");
- case Instruction::Select:
- return new SelectConstantExpr(Ops[0], Ops[1], Ops[2]);
case Instruction::ExtractElement:
return new ExtractElementConstantExpr(Ops[0], Ops[1]);
case Instruction::InsertElement:
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index ea7ee4f97f69..f7b6d54013de 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -53,10 +53,6 @@ void llvm::initializeCore(PassRegistry &Registry) {
initializeVerifierLegacyPassPass(Registry);
}
-void LLVMInitializeCore(LLVMPassRegistryRef R) {
- initializeCore(*unwrap(R));
-}
-
void LLVMShutdown() {
llvm_shutdown();
}
@@ -129,10 +125,6 @@ void LLVMContextSetDiscardValueNames(LLVMContextRef C, LLVMBool Discard) {
unwrap(C)->setDiscardValueNames(Discard);
}
-void LLVMContextSetOpaquePointers(LLVMContextRef C, LLVMBool OpaquePointers) {
- unwrap(C)->setOpaquePointers(OpaquePointers);
-}
-
void LLVMContextDispose(LLVMContextRef C) {
delete unwrap(C);
}
@@ -792,12 +784,16 @@ LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
return wrap(ArrayType::get(unwrap(ElementType), ElementCount));
}
+LLVMTypeRef LLVMArrayType2(LLVMTypeRef ElementType, uint64_t ElementCount) {
+ return wrap(ArrayType::get(unwrap(ElementType), ElementCount));
+}
+
LLVMTypeRef LLVMPointerType(LLVMTypeRef ElementType, unsigned AddressSpace) {
return wrap(PointerType::get(unwrap(ElementType), AddressSpace));
}
LLVMBool LLVMPointerTypeIsOpaque(LLVMTypeRef Ty) {
- return unwrap(Ty)->isOpaquePointerTy();
+ return true;
}
LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) {
@@ -811,8 +807,6 @@ LLVMTypeRef LLVMScalableVectorType(LLVMTypeRef ElementType,
LLVMTypeRef LLVMGetElementType(LLVMTypeRef WrappedTy) {
auto *Ty = unwrap(WrappedTy);
- if (auto *PTy = dyn_cast<PointerType>(Ty))
- return wrap(PTy->getNonOpaquePointerElementType());
if (auto *ATy = dyn_cast<ArrayType>(Ty))
return wrap(ATy->getElementType());
return wrap(cast<VectorType>(Ty)->getElementType());
@@ -826,6 +820,10 @@ unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) {
return unwrap<ArrayType>(ArrayTy)->getNumElements();
}
+uint64_t LLVMGetArrayLength2(LLVMTypeRef ArrayTy) {
+ return unwrap<ArrayType>(ArrayTy)->getNumElements();
+}
+
unsigned LLVMGetPointerAddressSpace(LLVMTypeRef PointerTy) {
return unwrap<PointerType>(PointerTy)->getAddressSpace();
}
@@ -1013,6 +1011,13 @@ LLVMValueRef LLVMIsAMDNode(LLVMValueRef Val) {
return nullptr;
}
+LLVMValueRef LLVMIsAValueAsMetadata(LLVMValueRef Val) {
+ if (auto *MD = dyn_cast_or_null<MetadataAsValue>(unwrap(Val)))
+ if (isa<ValueAsMetadata>(MD->getMetadata()))
+ return Val;
+ return nullptr;
+}
+
LLVMValueRef LLVMIsAMDString(LLVMValueRef Val) {
if (auto *MD = dyn_cast_or_null<MetadataAsValue>(unwrap(Val)))
if (isa<MDString>(MD->getMetadata()))
@@ -1272,6 +1277,13 @@ void LLVMGetMDNodeOperands(LLVMValueRef V, LLVMValueRef *Dest) {
Dest[i] = getMDNodeOperandImpl(Context, N, i);
}
+void LLVMReplaceMDNodeOperandWith(LLVMValueRef V, unsigned Index,
+ LLVMMetadataRef Replacement) {
+ auto *MD = cast<MetadataAsValue>(unwrap(V));
+ auto *N = cast<MDNode>(MD->getMetadata());
+ N->replaceOperandWith(Index, unwrap<Metadata>(Replacement));
+}
+
unsigned LLVMGetNamedMetadataNumOperands(LLVMModuleRef M, const char *Name) {
if (NamedMDNode *N = unwrap(M)->getNamedMetadata(Name)) {
return N->getNumOperands();
@@ -1483,6 +1495,12 @@ LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), V));
}
+LLVMValueRef LLVMConstArray2(LLVMTypeRef ElementTy, LLVMValueRef *ConstantVals,
+ uint64_t Length) {
+ ArrayRef<Constant *> V(unwrap<Constant>(ConstantVals, Length), Length);
+ return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), V));
+}
+
LLVMValueRef LLVMConstStructInContext(LLVMContextRef C,
LLVMValueRef *ConstantVals,
unsigned Count, LLVMBool Packed) {
@@ -1777,14 +1795,6 @@ LLVMValueRef LLVMConstFPCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) {
unwrap(ToType)));
}
-LLVMValueRef LLVMConstSelect(LLVMValueRef ConstantCondition,
- LLVMValueRef ConstantIfTrue,
- LLVMValueRef ConstantIfFalse) {
- return wrap(ConstantExpr::getSelect(unwrap<Constant>(ConstantCondition),
- unwrap<Constant>(ConstantIfTrue),
- unwrap<Constant>(ConstantIfFalse)));
-}
-
LLVMValueRef LLVMConstExtractElement(LLVMValueRef VectorConstant,
LLVMValueRef IndexConstant) {
return wrap(ConstantExpr::getExtractElement(unwrap<Constant>(VectorConstant),
@@ -3434,6 +3444,36 @@ LLVMValueRef LLVMBuildNot(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
return wrap(unwrap(B)->CreateNot(unwrap(V), Name));
}
+LLVMBool LLVMGetNUW(LLVMValueRef ArithInst) {
+ Value *P = unwrap<Value>(ArithInst);
+ return cast<Instruction>(P)->hasNoUnsignedWrap();
+}
+
+void LLVMSetNUW(LLVMValueRef ArithInst, LLVMBool HasNUW) {
+ Value *P = unwrap<Value>(ArithInst);
+ cast<Instruction>(P)->setHasNoUnsignedWrap(HasNUW);
+}
+
+LLVMBool LLVMGetNSW(LLVMValueRef ArithInst) {
+ Value *P = unwrap<Value>(ArithInst);
+ return cast<Instruction>(P)->hasNoSignedWrap();
+}
+
+void LLVMSetNSW(LLVMValueRef ArithInst, LLVMBool HasNSW) {
+ Value *P = unwrap<Value>(ArithInst);
+ cast<Instruction>(P)->setHasNoSignedWrap(HasNSW);
+}
+
+LLVMBool LLVMGetExact(LLVMValueRef DivOrShrInst) {
+ Value *P = unwrap<Value>(DivOrShrInst);
+ return cast<Instruction>(P)->isExact();
+}
+
+void LLVMSetExact(LLVMValueRef DivOrShrInst, LLVMBool IsExact) {
+ Value *P = unwrap<Value>(DivOrShrInst);
+ cast<Instruction>(P)->setIsExact(IsExact);
+}
+
/*--.. Memory ..............................................................--*/
LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
@@ -3939,7 +3979,7 @@ int LLVMGetMaskValue(LLVMValueRef SVInst, unsigned Elt) {
return I->getMaskValue(Elt);
}
-int LLVMGetUndefMaskElem(void) { return UndefMaskElem; }
+int LLVMGetUndefMaskElem(void) { return PoisonMaskElem; }
LLVMBool LLVMIsAtomicSingleThread(LLVMValueRef AtomicInst) {
Value *P = unwrap(AtomicInst);
@@ -4057,12 +4097,6 @@ void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
delete unwrap(MemBuf);
}
-/*===-- Pass Registry -----------------------------------------------------===*/
-
-LLVMPassRegistryRef LLVMGetGlobalPassRegistry(void) {
- return wrap(PassRegistry::getPassRegistry());
-}
-
/*===-- Pass Manager ------------------------------------------------------===*/
LLVMPassManagerRef LLVMCreatePassManager() {
diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/lib/IR/CycleInfo.cpp
index 37b1b04404ce..a9b9129f24f0 100644
--- a/llvm/include/llvm/Support/TargetParser.h
+++ b/llvm/lib/IR/CycleInfo.cpp
@@ -1,14 +1,16 @@
-//===-- llvm/Support/TargetParser.h -----------------------------*- C++ -*-===//
+//===- CycleInfo.cpp - IR Cycle Info ----------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-///
-/// \file
-/// This header is deprecated in favour of `llvm/TargetParser/TargetParser.h`.
-///
-//===----------------------------------------------------------------------===//
-#include "llvm/TargetParser/TargetParser.h"
+#include "llvm/IR/CycleInfo.h"
+#include "llvm/ADT/GenericCycleImpl.h"
+#include "llvm/IR/CFG.h"
+
+using namespace llvm;
+
+template class llvm::GenericCycleInfo<SSAContext>;
+template class llvm::GenericCycle<SSAContext>;
diff --git a/llvm/lib/IR/DIBuilder.cpp b/llvm/lib/IR/DIBuilder.cpp
index 6c873c3c6644..1ce8c17f8a88 100644
--- a/llvm/lib/IR/DIBuilder.cpp
+++ b/llvm/lib/IR/DIBuilder.cpp
@@ -12,6 +12,8 @@
#include "llvm/IR/DIBuilder.h"
#include "LLVMContextImpl.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -23,14 +25,9 @@
using namespace llvm;
using namespace llvm::dwarf;
-static cl::opt<bool>
- UseDbgAddr("use-dbg-addr",
- llvm::cl::desc("Use llvm.dbg.addr for all local variables"),
- cl::init(false), cl::Hidden);
-
DIBuilder::DIBuilder(Module &m, bool AllowUnresolvedNodes, DICompileUnit *CU)
: M(m), VMContext(M.getContext()), CUNode(CU), DeclareFn(nullptr),
- ValueFn(nullptr), LabelFn(nullptr), AddrFn(nullptr), AssignFn(nullptr),
+ ValueFn(nullptr), LabelFn(nullptr), AssignFn(nullptr),
AllowUnresolvedNodes(AllowUnresolvedNodes) {
if (CUNode) {
if (const auto &ETs = CUNode->getEnumTypes())
@@ -40,7 +37,7 @@ DIBuilder::DIBuilder(Module &m, bool AllowUnresolvedNodes, DICompileUnit *CU)
if (const auto &GVs = CUNode->getGlobalVariables())
AllGVs.assign(GVs.begin(), GVs.end());
if (const auto &IMs = CUNode->getImportedEntities())
- AllImportedModules.assign(IMs.begin(), IMs.end());
+ ImportedModules.assign(IMs.begin(), IMs.end());
if (const auto &MNs = CUNode->getMacros())
AllMacrosPerParent.insert({nullptr, {MNs.begin(), MNs.end()}});
}
@@ -57,23 +54,11 @@ void DIBuilder::trackIfUnresolved(MDNode *N) {
}
void DIBuilder::finalizeSubprogram(DISubprogram *SP) {
- MDTuple *Temp = SP->getRetainedNodes().get();
- if (!Temp || !Temp->isTemporary())
- return;
-
- SmallVector<Metadata *, 16> RetainedNodes;
-
- auto PV = PreservedVariables.find(SP);
- if (PV != PreservedVariables.end())
- RetainedNodes.append(PV->second.begin(), PV->second.end());
-
- auto PL = PreservedLabels.find(SP);
- if (PL != PreservedLabels.end())
- RetainedNodes.append(PL->second.begin(), PL->second.end());
-
- DINodeArray Node = getOrCreateArray(RetainedNodes);
-
- TempMDTuple(Temp)->replaceAllUsesWith(Node.get());
+ auto PN = SubprogramTrackedNodes.find(SP);
+ if (PN != SubprogramTrackedNodes.end())
+ SP->replaceRetainedNodes(
+ MDTuple::get(VMContext, SmallVector<Metadata *, 16>(PN->second.begin(),
+ PN->second.end())));
}
void DIBuilder::finalize() {
@@ -101,8 +86,7 @@ void DIBuilder::finalize() {
if (!RetainValues.empty())
CUNode->replaceRetainedTypes(MDTuple::get(VMContext, RetainValues));
- DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms);
- for (auto *SP : SPs)
+ for (auto *SP : AllSubprograms)
finalizeSubprogram(SP);
for (auto *N : RetainValues)
if (auto *SP = dyn_cast<DISubprogram>(N))
@@ -111,10 +95,10 @@ void DIBuilder::finalize() {
if (!AllGVs.empty())
CUNode->replaceGlobalVariables(MDTuple::get(VMContext, AllGVs));
- if (!AllImportedModules.empty())
+ if (!ImportedModules.empty())
CUNode->replaceImportedEntities(MDTuple::get(
- VMContext, SmallVector<Metadata *, 16>(AllImportedModules.begin(),
- AllImportedModules.end())));
+ VMContext, SmallVector<Metadata *, 16>(ImportedModules.begin(),
+ ImportedModules.end())));
for (const auto &I : AllMacrosPerParent) {
// DIMacroNode's with nullptr parent are DICompileUnit direct children.
@@ -156,7 +140,7 @@ DICompileUnit *DIBuilder::createCompileUnit(
DICompileUnit::DebugNameTableKind NameTableKind, bool RangesBaseAddress,
StringRef SysRoot, StringRef SDK) {
- assert(((Lang <= dwarf::DW_LANG_Ada2012 && Lang >= dwarf::DW_LANG_C89) ||
+ assert(((Lang <= dwarf::DW_LANG_Mojo && Lang >= dwarf::DW_LANG_C89) ||
(Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
"Invalid Language tag");
@@ -178,7 +162,7 @@ static DIImportedEntity *
createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context,
Metadata *NS, DIFile *File, unsigned Line, StringRef Name,
DINodeArray Elements,
- SmallVectorImpl<TrackingMDNodeRef> &AllImportedModules) {
+ SmallVectorImpl<TrackingMDNodeRef> &ImportedModules) {
if (Line)
assert(File && "Source location has line number but no file");
unsigned EntitiesCount = C.pImpl->DIImportedEntitys.size();
@@ -187,7 +171,7 @@ createImportedModule(LLVMContext &C, dwarf::Tag Tag, DIScope *Context,
if (EntitiesCount < C.pImpl->DIImportedEntitys.size())
// A new Imported Entity was just added to the context.
// Add it to the Imported Modules list.
- AllImportedModules.emplace_back(M);
+ ImportedModules.emplace_back(M);
return M;
}
@@ -197,7 +181,7 @@ DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context,
DINodeArray Elements) {
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
Context, NS, File, Line, StringRef(), Elements,
- AllImportedModules);
+ getImportTrackingVector(Context));
}
DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context,
@@ -206,7 +190,7 @@ DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context,
DINodeArray Elements) {
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
Context, NS, File, Line, StringRef(), Elements,
- AllImportedModules);
+ getImportTrackingVector(Context));
}
DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context, DIModule *M,
@@ -214,7 +198,7 @@ DIImportedEntity *DIBuilder::createImportedModule(DIScope *Context, DIModule *M,
DINodeArray Elements) {
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_module,
Context, M, File, Line, StringRef(), Elements,
- AllImportedModules);
+ getImportTrackingVector(Context));
}
DIImportedEntity *
@@ -225,7 +209,7 @@ DIBuilder::createImportedDeclaration(DIScope *Context, DINode *Decl,
// types that have one.
return ::createImportedModule(VMContext, dwarf::DW_TAG_imported_declaration,
Context, Decl, File, Line, Name, Elements,
- AllImportedModules);
+ getImportTrackingVector(Context));
}
DIFile *DIBuilder::createFile(StringRef Filename, StringRef Directory,
@@ -588,14 +572,14 @@ DIBuilder::createArrayType(uint64_t Size, uint32_t AlignInBits, DIType *Ty,
VMContext, dwarf::DW_TAG_array_type, "", nullptr, 0, nullptr, Ty, Size,
AlignInBits, 0, DINode::FlagZero, Subscripts, 0, nullptr, nullptr, "",
nullptr,
- DL.is<DIExpression *>() ? (Metadata *)DL.get<DIExpression *>()
- : (Metadata *)DL.get<DIVariable *>(),
- AS.is<DIExpression *>() ? (Metadata *)AS.get<DIExpression *>()
- : (Metadata *)AS.get<DIVariable *>(),
- AL.is<DIExpression *>() ? (Metadata *)AL.get<DIExpression *>()
- : (Metadata *)AL.get<DIVariable *>(),
- RK.is<DIExpression *>() ? (Metadata *)RK.get<DIExpression *>()
- : (Metadata *)RK.get<DIVariable *>());
+ isa<DIExpression *>(DL) ? (Metadata *)cast<DIExpression *>(DL)
+ : (Metadata *)cast<DIVariable *>(DL),
+ isa<DIExpression *>(AS) ? (Metadata *)cast<DIExpression *>(AS)
+ : (Metadata *)cast<DIVariable *>(AS),
+ isa<DIExpression *>(AL) ? (Metadata *)cast<DIExpression *>(AL)
+ : (Metadata *)cast<DIVariable *>(AL),
+ isa<DIExpression *>(RK) ? (Metadata *)cast<DIExpression *>(RK)
+ : (Metadata *)cast<DIVariable *>(RK));
trackIfUnresolved(R);
return R;
}
@@ -720,8 +704,8 @@ DIGenericSubrange *DIBuilder::getOrCreateGenericSubrange(
DIGenericSubrange::BoundType CountNode, DIGenericSubrange::BoundType LB,
DIGenericSubrange::BoundType UB, DIGenericSubrange::BoundType Stride) {
auto ConvToMetadata = [&](DIGenericSubrange::BoundType Bound) -> Metadata * {
- return Bound.is<DIExpression *>() ? (Metadata *)Bound.get<DIExpression *>()
- : (Metadata *)Bound.get<DIVariable *>();
+ return isa<DIExpression *>(Bound) ? (Metadata *)cast<DIExpression *>(Bound)
+ : (Metadata *)cast<DIVariable *>(Bound);
};
return DIGenericSubrange::get(VMContext, ConvToMetadata(CountNode),
ConvToMetadata(LB), ConvToMetadata(UB),
@@ -772,26 +756,20 @@ DIGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl(
static DILocalVariable *createLocalVariable(
LLVMContext &VMContext,
- DenseMap<MDNode *, SmallVector<TrackingMDNodeRef, 1>> &PreservedVariables,
- DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File,
+ SmallVectorImpl<TrackingMDNodeRef> &PreservedNodes,
+ DIScope *Context, StringRef Name, unsigned ArgNo, DIFile *File,
unsigned LineNo, DIType *Ty, bool AlwaysPreserve, DINode::DIFlags Flags,
uint32_t AlignInBits, DINodeArray Annotations = nullptr) {
- // FIXME: Why getNonCompileUnitScope()?
- // FIXME: Why is "!Context" okay here?
// FIXME: Why doesn't this check for a subprogram or lexical block (AFAICT
// the only valid scopes)?
- DIScope *Context = getNonCompileUnitScope(Scope);
-
- auto *Node = DILocalVariable::get(
- VMContext, cast_or_null<DILocalScope>(Context), Name, File, LineNo, Ty,
- ArgNo, Flags, AlignInBits, Annotations);
+ auto *Scope = cast<DILocalScope>(Context);
+ auto *Node = DILocalVariable::get(VMContext, Scope, Name, File, LineNo, Ty,
+ ArgNo, Flags, AlignInBits, Annotations);
if (AlwaysPreserve) {
// The optimizer may remove local variables. If there is an interest
// to preserve variable info in such situation then stash it in a
// named mdnode.
- DISubprogram *Fn = getDISubprogram(Scope);
- assert(Fn && "Missing subprogram for local variable");
- PreservedVariables[Fn].emplace_back(Node);
+ PreservedNodes.emplace_back(Node);
}
return Node;
}
@@ -801,9 +779,11 @@ DILocalVariable *DIBuilder::createAutoVariable(DIScope *Scope, StringRef Name,
DIType *Ty, bool AlwaysPreserve,
DINode::DIFlags Flags,
uint32_t AlignInBits) {
- return createLocalVariable(VMContext, PreservedVariables, Scope, Name,
- /* ArgNo */ 0, File, LineNo, Ty, AlwaysPreserve,
- Flags, AlignInBits);
+ assert(Scope && isa<DILocalScope>(Scope) &&
+ "Unexpected scope for a local variable.");
+ return createLocalVariable(
+ VMContext, getSubprogramNodesTrackingVector(Scope), Scope, Name,
+ /* ArgNo */ 0, File, LineNo, Ty, AlwaysPreserve, Flags, AlignInBits);
}
DILocalVariable *DIBuilder::createParameterVariable(
@@ -811,25 +791,23 @@ DILocalVariable *DIBuilder::createParameterVariable(
unsigned LineNo, DIType *Ty, bool AlwaysPreserve, DINode::DIFlags Flags,
DINodeArray Annotations) {
assert(ArgNo && "Expected non-zero argument number for parameter");
- return createLocalVariable(VMContext, PreservedVariables, Scope, Name, ArgNo,
- File, LineNo, Ty, AlwaysPreserve, Flags,
- /*AlignInBits=*/0, Annotations);
+ assert(Scope && isa<DILocalScope>(Scope) &&
+ "Unexpected scope for a local variable.");
+ return createLocalVariable(
+ VMContext, getSubprogramNodesTrackingVector(Scope), Scope, Name, ArgNo,
+ File, LineNo, Ty, AlwaysPreserve, Flags, /*AlignInBits=*/0, Annotations);
}
-DILabel *DIBuilder::createLabel(DIScope *Scope, StringRef Name, DIFile *File,
- unsigned LineNo, bool AlwaysPreserve) {
- DIScope *Context = getNonCompileUnitScope(Scope);
-
- auto *Node = DILabel::get(VMContext, cast_or_null<DILocalScope>(Context),
- Name, File, LineNo);
+DILabel *DIBuilder::createLabel(DIScope *Context, StringRef Name, DIFile *File,
+ unsigned LineNo, bool AlwaysPreserve) {
+ auto *Scope = cast<DILocalScope>(Context);
+ auto *Node = DILabel::get(VMContext, Scope, Name, File, LineNo);
if (AlwaysPreserve) {
/// The optimizer may remove labels. If there is an interest
/// to preserve label info in such situation then append it to
/// the list of retained nodes of the DISubprogram.
- DISubprogram *Fn = getDISubprogram(Scope);
- assert(Fn && "Missing subprogram for label");
- PreservedLabels[Fn].emplace_back(Node);
+ getSubprogramNodesTrackingVector(Scope).emplace_back(Node);
}
return Node;
}
@@ -856,9 +834,8 @@ DISubprogram *DIBuilder::createFunction(
auto *Node = getSubprogram(
/*IsDistinct=*/IsDefinition, VMContext, getNonCompileUnitScope(Context),
Name, LinkageName, File, LineNo, Ty, ScopeLine, nullptr, 0, 0, Flags,
- SPFlags, IsDefinition ? CUNode : nullptr, TParams, Decl,
- MDTuple::getTemporary(VMContext, std::nullopt).release(), ThrownTypes,
- Annotations, TargetFuncName);
+ SPFlags, IsDefinition ? CUNode : nullptr, TParams, Decl, nullptr,
+ ThrownTypes, Annotations, TargetFuncName);
if (IsDefinition)
AllSubprograms.push_back(Node);
@@ -1022,24 +999,6 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V,
return insertDbgValueIntrinsic(V, VarInfo, Expr, DL, InsertAtEnd, nullptr);
}
-Instruction *DIBuilder::insertDbgAddrIntrinsic(Value *V,
- DILocalVariable *VarInfo,
- DIExpression *Expr,
- const DILocation *DL,
- Instruction *InsertBefore) {
- return insertDbgAddrIntrinsic(
- V, VarInfo, Expr, DL, InsertBefore ? InsertBefore->getParent() : nullptr,
- InsertBefore);
-}
-
-Instruction *DIBuilder::insertDbgAddrIntrinsic(Value *V,
- DILocalVariable *VarInfo,
- DIExpression *Expr,
- const DILocation *DL,
- BasicBlock *InsertAtEnd) {
- return insertDbgAddrIntrinsic(V, VarInfo, Expr, DL, InsertAtEnd, nullptr);
-}
-
/// Initialize IRBuilder for inserting dbg.declare and dbg.value intrinsics.
/// This abstracts over the various ways to specify an insert position.
static void initIRBuilder(IRBuilder<> &Builder, const DILocation *DL,
@@ -1057,8 +1016,7 @@ static Value *getDbgIntrinsicValueImpl(LLVMContext &VMContext, Value *V) {
}
static Function *getDeclareIntrin(Module &M) {
- return Intrinsic::getDeclaration(&M, UseDbgAddr ? Intrinsic::dbg_addr
- : Intrinsic::dbg_declare);
+ return Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
}
Instruction *DIBuilder::insertDbgValueIntrinsic(
@@ -1070,15 +1028,6 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(
InsertBefore);
}
-Instruction *DIBuilder::insertDbgAddrIntrinsic(
- llvm::Value *Val, DILocalVariable *VarInfo, DIExpression *Expr,
- const DILocation *DL, BasicBlock *InsertBB, Instruction *InsertBefore) {
- if (!AddrFn)
- AddrFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_addr);
- return insertDbgIntrinsic(AddrFn, Val, VarInfo, Expr, DL, InsertBB,
- InsertBefore);
-}
-
Instruction *DIBuilder::insertDeclare(Value *Storage, DILocalVariable *VarInfo,
DIExpression *Expr, const DILocation *DL,
BasicBlock *InsertBB,
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index 379c6d577b4e..53842b184ed6 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -18,7 +18,6 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -32,6 +31,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemAlloc.h"
#include "llvm/Support/TypeSize.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -45,21 +45,30 @@ using namespace llvm;
// Support for StructLayout
//===----------------------------------------------------------------------===//
-StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
+StructLayout::StructLayout(StructType *ST, const DataLayout &DL)
+ : StructSize(TypeSize::Fixed(0)) {
assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
- StructSize = 0;
IsPadded = false;
NumElements = ST->getNumElements();
// Loop over each of the elements, placing them in memory.
for (unsigned i = 0, e = NumElements; i != e; ++i) {
Type *Ty = ST->getElementType(i);
+ if (i == 0 && Ty->isScalableTy())
+ StructSize = TypeSize::Scalable(0);
+
const Align TyAlign = ST->isPacked() ? Align(1) : DL.getABITypeAlign(Ty);
// Add padding if necessary to align the data element properly.
- if (!isAligned(TyAlign, StructSize)) {
+ // Currently the only structure with scalable size will be the homogeneous
+ // scalable vector types. Homogeneous scalable vector types have members of
+ // the same data type so no alignment issue will happen. The condition here
+ // assumes so and needs to be adjusted if this assumption changes (e.g. we
+ // support structures with arbitrary scalable data type, or structure that
+ // contains both fixed size and scalable size data type members).
+ if (!StructSize.isScalable() && !isAligned(TyAlign, StructSize)) {
IsPadded = true;
- StructSize = alignTo(StructSize, TyAlign);
+ StructSize = TypeSize::Fixed(alignTo(StructSize, TyAlign));
}
// Keep track of maximum alignment constraint.
@@ -67,28 +76,39 @@ StructLayout::StructLayout(StructType *ST, const DataLayout &DL) {
getMemberOffsets()[i] = StructSize;
// Consume space for this data item
- StructSize += DL.getTypeAllocSize(Ty).getFixedValue();
+ StructSize += DL.getTypeAllocSize(Ty);
}
// Add padding to the end of the struct so that it could be put in an array
// and all array elements would be aligned correctly.
- if (!isAligned(StructAlignment, StructSize)) {
+ if (!StructSize.isScalable() && !isAligned(StructAlignment, StructSize)) {
IsPadded = true;
- StructSize = alignTo(StructSize, StructAlignment);
+ StructSize = TypeSize::Fixed(alignTo(StructSize, StructAlignment));
}
}
/// getElementContainingOffset - Given a valid offset into the structure,
/// return the structure index that contains it.
-unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
- ArrayRef<uint64_t> MemberOffsets = getMemberOffsets();
- auto SI = llvm::upper_bound(MemberOffsets, Offset);
+unsigned StructLayout::getElementContainingOffset(uint64_t FixedOffset) const {
+ assert(!StructSize.isScalable() &&
+ "Cannot get element at offset for structure containing scalable "
+ "vector types");
+ TypeSize Offset = TypeSize::Fixed(FixedOffset);
+ ArrayRef<TypeSize> MemberOffsets = getMemberOffsets();
+
+ const auto *SI =
+ std::upper_bound(MemberOffsets.begin(), MemberOffsets.end(), Offset,
+ [](TypeSize LHS, TypeSize RHS) -> bool {
+ return TypeSize::isKnownLT(LHS, RHS);
+ });
assert(SI != MemberOffsets.begin() && "Offset not in structure type!");
--SI;
- assert(*SI <= Offset && "upper_bound didn't work");
- assert((SI == MemberOffsets.begin() || *(SI - 1) <= Offset) &&
- (SI + 1 == MemberOffsets.end() || *(SI + 1) > Offset) &&
- "Upper bound didn't work!");
+ assert(TypeSize::isKnownLE(*SI, Offset) && "upper_bound didn't work");
+ assert(
+ (SI == MemberOffsets.begin() || TypeSize::isKnownLE(*(SI - 1), Offset)) &&
+ (SI + 1 == MemberOffsets.end() ||
+ TypeSize::isKnownGT(*(SI + 1), Offset)) &&
+ "Upper bound didn't work!");
// Multiple fields can have the same offset if any of them are zero sized.
// For example, in { i32, [0 x i32], i32 }, searching for offset 4 will stop
@@ -102,23 +122,19 @@ unsigned StructLayout::getElementContainingOffset(uint64_t Offset) const {
// LayoutAlignElem, LayoutAlign support
//===----------------------------------------------------------------------===//
-LayoutAlignElem LayoutAlignElem::get(AlignTypeEnum align_type, Align abi_align,
- Align pref_align, uint32_t bit_width) {
- assert(abi_align <= pref_align && "Preferred alignment worse than ABI!");
+LayoutAlignElem LayoutAlignElem::get(Align ABIAlign, Align PrefAlign,
+ uint32_t BitWidth) {
+ assert(ABIAlign <= PrefAlign && "Preferred alignment worse than ABI!");
LayoutAlignElem retval;
- retval.AlignType = align_type;
- retval.ABIAlign = abi_align;
- retval.PrefAlign = pref_align;
- retval.TypeBitWidth = bit_width;
+ retval.ABIAlign = ABIAlign;
+ retval.PrefAlign = PrefAlign;
+ retval.TypeBitWidth = BitWidth;
return retval;
}
-bool
-LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const {
- return (AlignType == rhs.AlignType
- && ABIAlign == rhs.ABIAlign
- && PrefAlign == rhs.PrefAlign
- && TypeBitWidth == rhs.TypeBitWidth);
+bool LayoutAlignElem::operator==(const LayoutAlignElem &rhs) const {
+ return ABIAlign == rhs.ABIAlign && PrefAlign == rhs.PrefAlign &&
+ TypeBitWidth == rhs.TypeBitWidth;
}
//===----------------------------------------------------------------------===//
@@ -162,19 +178,18 @@ const char *DataLayout::getManglingComponent(const Triple &T) {
return "-m:e";
}
-static const LayoutAlignElem DefaultAlignments[] = {
- {INTEGER_ALIGN, 1, Align(1), Align(1)}, // i1
- {INTEGER_ALIGN, 8, Align(1), Align(1)}, // i8
- {INTEGER_ALIGN, 16, Align(2), Align(2)}, // i16
- {INTEGER_ALIGN, 32, Align(4), Align(4)}, // i32
- {INTEGER_ALIGN, 64, Align(4), Align(8)}, // i64
- {FLOAT_ALIGN, 16, Align(2), Align(2)}, // half, bfloat
- {FLOAT_ALIGN, 32, Align(4), Align(4)}, // float
- {FLOAT_ALIGN, 64, Align(8), Align(8)}, // double
- {FLOAT_ALIGN, 128, Align(16), Align(16)}, // ppcf128, quad, ...
- {VECTOR_ALIGN, 64, Align(8), Align(8)}, // v2i32, v1i64, ...
- {VECTOR_ALIGN, 128, Align(16), Align(16)}, // v16i8, v8i16, v4i32, ...
- {AGGREGATE_ALIGN, 0, Align(1), Align(8)} // struct
+static const std::pair<AlignTypeEnum, LayoutAlignElem> DefaultAlignments[] = {
+ {INTEGER_ALIGN, {1, Align(1), Align(1)}}, // i1
+ {INTEGER_ALIGN, {8, Align(1), Align(1)}}, // i8
+ {INTEGER_ALIGN, {16, Align(2), Align(2)}}, // i16
+ {INTEGER_ALIGN, {32, Align(4), Align(4)}}, // i32
+ {INTEGER_ALIGN, {64, Align(4), Align(8)}}, // i64
+ {FLOAT_ALIGN, {16, Align(2), Align(2)}}, // half, bfloat
+ {FLOAT_ALIGN, {32, Align(4), Align(4)}}, // float
+ {FLOAT_ALIGN, {64, Align(8), Align(8)}}, // double
+ {FLOAT_ALIGN, {128, Align(16), Align(16)}}, // ppcf128, quad, ...
+ {VECTOR_ALIGN, {64, Align(8), Align(8)}}, // v2i32, v1i64, ...
+ {VECTOR_ALIGN, {128, Align(16), Align(16)}}, // v16i8, v8i16, v4i32, ...
};
void DataLayout::reset(StringRef Desc) {
@@ -190,11 +205,12 @@ void DataLayout::reset(StringRef Desc) {
TheFunctionPtrAlignType = FunctionPtrAlignType::Independent;
ManglingMode = MM_None;
NonIntegralAddressSpaces.clear();
+ StructAlignment = LayoutAlignElem::get(Align(1), Align(8), 0);
// Default alignments
- for (const LayoutAlignElem &E : DefaultAlignments) {
- if (Error Err = setAlignment((AlignTypeEnum)E.AlignType, E.ABIAlign,
- E.PrefAlign, E.TypeBitWidth))
+ for (const auto &[Kind, Layout] : DefaultAlignments) {
+ if (Error Err = setAlignment(Kind, Layout.ABIAlign, Layout.PrefAlign,
+ Layout.TypeBitWidth))
return report_fatal_error(std::move(Err));
}
if (Error Err = setPointerAlignmentInBits(0, Align(8), Align(8), 64, 64))
@@ -309,7 +325,7 @@ Error DataLayout::parseSpecifier(StringRef Desc) {
if (Error Err = getInt(Tok, AddrSpace))
return Err;
if (!isUInt<24>(AddrSpace))
- return reportError("Invalid address space, must be a 24bit integer");
+ return reportError("Invalid address space, must be a 24-bit integer");
// Size.
if (Rest.empty())
@@ -550,43 +566,63 @@ bool DataLayout::operator==(const DataLayout &Other) const {
TheFunctionPtrAlignType == Other.TheFunctionPtrAlignType &&
ManglingMode == Other.ManglingMode &&
LegalIntWidths == Other.LegalIntWidths &&
- Alignments == Other.Alignments && Pointers == Other.Pointers;
+ IntAlignments == Other.IntAlignments &&
+ FloatAlignments == Other.FloatAlignments &&
+ VectorAlignments == Other.VectorAlignments &&
+ StructAlignment == Other.StructAlignment &&
+ Pointers == Other.Pointers;
// Note: getStringRepresentation() might differs, it is not canonicalized
return Ret;
}
-DataLayout::AlignmentsTy::iterator
-DataLayout::findAlignmentLowerBound(AlignTypeEnum AlignType,
- uint32_t BitWidth) {
- auto Pair = std::make_pair((unsigned)AlignType, BitWidth);
- return partition_point(Alignments, [=](const LayoutAlignElem &E) {
- return std::make_pair(E.AlignType, E.TypeBitWidth) < Pair;
+static SmallVectorImpl<LayoutAlignElem>::const_iterator
+findAlignmentLowerBound(const SmallVectorImpl<LayoutAlignElem> &Alignments,
+ uint32_t BitWidth) {
+ return partition_point(Alignments, [BitWidth](const LayoutAlignElem &E) {
+ return E.TypeBitWidth < BitWidth;
});
}
-Error DataLayout::setAlignment(AlignTypeEnum align_type, Align abi_align,
- Align pref_align, uint32_t bit_width) {
+Error DataLayout::setAlignment(AlignTypeEnum AlignType, Align ABIAlign,
+ Align PrefAlign, uint32_t BitWidth) {
// AlignmentsTy::ABIAlign and AlignmentsTy::PrefAlign were once stored as
// uint16_t, it is unclear if there are requirements for alignment to be less
// than 2^16 other than storage. In the meantime we leave the restriction as
// an assert. See D67400 for context.
- assert(Log2(abi_align) < 16 && Log2(pref_align) < 16 && "Alignment too big");
- if (!isUInt<24>(bit_width))
- return reportError("Invalid bit width, must be a 24bit integer");
- if (pref_align < abi_align)
+ assert(Log2(ABIAlign) < 16 && Log2(PrefAlign) < 16 && "Alignment too big");
+ if (!isUInt<24>(BitWidth))
+ return reportError("Invalid bit width, must be a 24-bit integer");
+ if (PrefAlign < ABIAlign)
return reportError(
"Preferred alignment cannot be less than the ABI alignment");
- AlignmentsTy::iterator I = findAlignmentLowerBound(align_type, bit_width);
- if (I != Alignments.end() &&
- I->AlignType == (unsigned)align_type && I->TypeBitWidth == bit_width) {
+ SmallVectorImpl<LayoutAlignElem> *Alignments;
+ switch (AlignType) {
+ case AGGREGATE_ALIGN:
+ StructAlignment.ABIAlign = ABIAlign;
+ StructAlignment.PrefAlign = PrefAlign;
+ return Error::success();
+ case INTEGER_ALIGN:
+ Alignments = &IntAlignments;
+ break;
+ case FLOAT_ALIGN:
+ Alignments = &FloatAlignments;
+ break;
+ case VECTOR_ALIGN:
+ Alignments = &VectorAlignments;
+ break;
+ }
+
+ auto I = partition_point(*Alignments, [BitWidth](const LayoutAlignElem &E) {
+ return E.TypeBitWidth < BitWidth;
+ });
+ if (I != Alignments->end() && I->TypeBitWidth == BitWidth) {
// Update the abi, preferred alignments.
- I->ABIAlign = abi_align;
- I->PrefAlign = pref_align;
+ I->ABIAlign = ABIAlign;
+ I->PrefAlign = PrefAlign;
} else {
// Insert before I to keep the vector sorted.
- Alignments.insert(I, LayoutAlignElem::get(align_type, abi_align,
- pref_align, bit_width));
+ Alignments->insert(I, LayoutAlignElem::get(ABIAlign, PrefAlign, BitWidth));
}
return Error::success();
}
@@ -633,13 +669,12 @@ Error DataLayout::setPointerAlignmentInBits(uint32_t AddrSpace, Align ABIAlign,
Align DataLayout::getIntegerAlignment(uint32_t BitWidth,
bool abi_or_pref) const {
- auto I = findAlignmentLowerBound(INTEGER_ALIGN, BitWidth);
+ auto I = findAlignmentLowerBound(IntAlignments, BitWidth);
// If we don't have an exact match, use alignment of next larger integer
// type. If there is none, use alignment of largest integer type by going
// back one element.
- if (I == Alignments.end() || I->AlignType != INTEGER_ALIGN)
+ if (I == IntAlignments.end())
--I;
- assert(I->AlignType == INTEGER_ALIGN && "Must be integer alignment");
return abi_or_pref ? I->ABIAlign : I->PrefAlign;
}
@@ -668,7 +703,9 @@ public:
void DataLayout::clear() {
LegalIntWidths.clear();
- Alignments.clear();
+ IntAlignments.clear();
+ FloatAlignments.clear();
+ VectorAlignments.clear();
Pointers.clear();
delete static_cast<StructLayoutMap *>(LayoutMap);
LayoutMap = nullptr;
@@ -689,7 +726,7 @@ const StructLayout *DataLayout::getStructLayout(StructType *Ty) const {
// Otherwise, create the struct layout. Because it is variable length, we
// malloc it, then use placement new.
StructLayout *L = (StructLayout *)safe_malloc(
- StructLayout::totalSizeToAlloc<uint64_t>(Ty->getNumElements()));
+ StructLayout::totalSizeToAlloc<TypeSize>(Ty->getNumElements()));
// Set SL before calling StructLayout's ctor. The ctor could cause other
// entries to be added to TheMap, invalidating our reference.
@@ -768,11 +805,8 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
// Get the layout annotation... which is lazily created on demand.
const StructLayout *Layout = getStructLayout(cast<StructType>(Ty));
- const LayoutAlignElem &AggregateAlign = Alignments[0];
- assert(AggregateAlign.AlignType == AGGREGATE_ALIGN &&
- "Aggregate alignment must be first alignment entry");
const Align Align =
- abi_or_pref ? AggregateAlign.ABIAlign : AggregateAlign.PrefAlign;
+ abi_or_pref ? StructAlignment.ABIAlign : StructAlignment.PrefAlign;
return std::max(Align, Layout->getAlignment());
}
case Type::IntegerTyID:
@@ -787,9 +821,8 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
case Type::FP128TyID:
case Type::X86_FP80TyID: {
unsigned BitWidth = getTypeSizeInBits(Ty).getFixedValue();
- auto I = findAlignmentLowerBound(FLOAT_ALIGN, BitWidth);
- if (I != Alignments.end() && I->AlignType == FLOAT_ALIGN &&
- I->TypeBitWidth == BitWidth)
+ auto I = findAlignmentLowerBound(FloatAlignments, BitWidth);
+ if (I != FloatAlignments.end() && I->TypeBitWidth == BitWidth)
return abi_or_pref ? I->ABIAlign : I->PrefAlign;
// If we still couldn't find a reasonable default alignment, fall back
@@ -804,9 +837,8 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
case Type::FixedVectorTyID:
case Type::ScalableVectorTyID: {
unsigned BitWidth = getTypeSizeInBits(Ty).getKnownMinValue();
- auto I = findAlignmentLowerBound(VECTOR_ALIGN, BitWidth);
- if (I != Alignments.end() && I->AlignType == VECTOR_ALIGN &&
- I->TypeBitWidth == BitWidth)
+ auto I = findAlignmentLowerBound(VectorAlignments, BitWidth);
+ if (I != VectorAlignments.end() && I->TypeBitWidth == BitWidth)
return abi_or_pref ? I->ABIAlign : I->PrefAlign;
// By default, use natural alignment for vector types. This is consistent
@@ -828,11 +860,6 @@ Align DataLayout::getAlignment(Type *Ty, bool abi_or_pref) const {
}
}
-/// TODO: Remove this function once the transition to Align is over.
-uint64_t DataLayout::getABITypeAlignment(Type *Ty) const {
- return getABITypeAlign(Ty).value();
-}
-
Align DataLayout::getABITypeAlign(Type *Ty) const {
return getAlignment(Ty, true);
}
@@ -873,6 +900,11 @@ unsigned DataLayout::getLargestLegalIntTypeSizeInBits() const {
return Max != LegalIntWidths.end() ? *Max : 0;
}
+IntegerType *DataLayout::getIndexType(LLVMContext &C,
+ unsigned AddressSpace) const {
+ return IntegerType::get(C, getIndexSizeInBits(AddressSpace));
+}
+
Type *DataLayout::getIndexType(Type *Ty) const {
assert(Ty->isPtrOrPtrVectorTy() &&
"Expected a pointer or pointer vector type.");
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 9198179674bd..48b5501c55ba 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -43,10 +43,7 @@ using namespace llvm;
using namespace llvm::at;
using namespace llvm::dwarf;
-/// Finds all intrinsics declaring local variables as living in the memory that
-/// 'V' points to. This may include a mix of dbg.declare and
-/// dbg.addr intrinsics.
-TinyPtrVector<DbgVariableIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
+TinyPtrVector<DbgDeclareInst *> llvm::FindDbgDeclareUses(Value *V) {
// This function is hot. Check whether the value has any metadata to avoid a
// DenseMap lookup.
if (!V->isUsedByMetadata())
@@ -58,75 +55,54 @@ TinyPtrVector<DbgVariableIntrinsic *> llvm::FindDbgAddrUses(Value *V) {
if (!MDV)
return {};
- TinyPtrVector<DbgVariableIntrinsic *> Declares;
+ TinyPtrVector<DbgDeclareInst *> Declares;
for (User *U : MDV->users()) {
- if (auto *DII = dyn_cast<DbgVariableIntrinsic>(U))
- if (DII->isAddressOfVariable())
- Declares.push_back(DII);
+ if (auto *DDI = dyn_cast<DbgDeclareInst>(U))
+ Declares.push_back(DDI);
}
return Declares;
}
-TinyPtrVector<DbgDeclareInst *> llvm::FindDbgDeclareUses(Value *V) {
- TinyPtrVector<DbgDeclareInst *> DDIs;
- for (DbgVariableIntrinsic *DVI : FindDbgAddrUses(V))
- if (auto *DDI = dyn_cast<DbgDeclareInst>(DVI))
- DDIs.push_back(DDI);
- return DDIs;
-}
-
-void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
+template <typename IntrinsicT>
+static void findDbgIntrinsics(SmallVectorImpl<IntrinsicT *> &Result, Value *V) {
// This function is hot. Check whether the value has any metadata to avoid a
// DenseMap lookup.
if (!V->isUsedByMetadata())
return;
+
+ LLVMContext &Ctx = V->getContext();
// TODO: If this value appears multiple times in a DIArgList, we should still
// only add the owning DbgValueInst once; use this set to track ArgListUsers.
// This behaviour can be removed when we can automatically remove duplicates.
- SmallPtrSet<DbgValueInst *, 4> EncounteredDbgValues;
- if (auto *L = LocalAsMetadata::getIfExists(V)) {
- if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) {
+ // V will also appear twice in a dbg.assign if its used in the both the value
+ // and address components.
+ SmallPtrSet<IntrinsicT *, 4> EncounteredIntrinsics;
+
+ /// Append IntrinsicT users of MetadataAsValue(MD).
+ auto AppendUsers = [&Ctx, &EncounteredIntrinsics, &Result](Metadata *MD) {
+ if (auto *MDV = MetadataAsValue::getIfExists(Ctx, MD)) {
for (User *U : MDV->users())
- if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
- DbgValues.push_back(DVI);
- }
- for (Metadata *AL : L->getAllArgListUsers()) {
- if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), AL)) {
- for (User *U : MDV->users())
- if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
- if (EncounteredDbgValues.insert(DVI).second)
- DbgValues.push_back(DVI);
- }
+ if (IntrinsicT *DVI = dyn_cast<IntrinsicT>(U))
+ if (EncounteredIntrinsics.insert(DVI).second)
+ Result.push_back(DVI);
}
+ };
+
+ if (auto *L = LocalAsMetadata::getIfExists(V)) {
+ AppendUsers(L);
+ for (Metadata *AL : L->getAllArgListUsers())
+ AppendUsers(AL);
}
}
+void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {
+ findDbgIntrinsics<DbgValueInst>(DbgValues, V);
+}
+
void llvm::findDbgUsers(SmallVectorImpl<DbgVariableIntrinsic *> &DbgUsers,
Value *V) {
- // This function is hot. Check whether the value has any metadata to avoid a
- // DenseMap lookup.
- if (!V->isUsedByMetadata())
- return;
- // TODO: If this value appears multiple times in a DIArgList, we should still
- // only add the owning DbgValueInst once; use this set to track ArgListUsers.
- // This behaviour can be removed when we can automatically remove duplicates.
- SmallPtrSet<DbgVariableIntrinsic *, 4> EncounteredDbgValues;
- if (auto *L = LocalAsMetadata::getIfExists(V)) {
- if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L)) {
- for (User *U : MDV->users())
- if (DbgVariableIntrinsic *DII = dyn_cast<DbgVariableIntrinsic>(U))
- DbgUsers.push_back(DII);
- }
- for (Metadata *AL : L->getAllArgListUsers()) {
- if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), AL)) {
- for (User *U : MDV->users())
- if (DbgVariableIntrinsic *DII = dyn_cast<DbgVariableIntrinsic>(U))
- if (EncounteredDbgValues.insert(DII).second)
- DbgUsers.push_back(DII);
- }
- }
- }
+ findDbgIntrinsics<DbgVariableIntrinsic>(DbgUsers, V);
}
DISubprogram *llvm::getDISubprogram(const MDNode *Scope) {
@@ -410,16 +386,80 @@ static bool isDILocationReachable(SmallPtrSetImpl<Metadata *> &Visited,
for (auto &OpIt : N->operands()) {
Metadata *Op = OpIt.get();
if (isDILocationReachable(Visited, Reachable, Op)) {
+ // Don't return just yet as we want to visit all MD's children to
+ // initialize DILocationReachable in stripDebugLocFromLoopID
Reachable.insert(N);
- return true;
}
}
- return false;
+ return Reachable.count(N);
+}
+
+static bool isAllDILocation(SmallPtrSetImpl<Metadata *> &Visited,
+ SmallPtrSetImpl<Metadata *> &AllDILocation,
+ const SmallPtrSetImpl<Metadata *> &DIReachable,
+ Metadata *MD) {
+ MDNode *N = dyn_cast_or_null<MDNode>(MD);
+ if (!N)
+ return false;
+ if (isa<DILocation>(N) || AllDILocation.count(N))
+ return true;
+ if (!DIReachable.count(N))
+ return false;
+ if (!Visited.insert(N).second)
+ return false;
+ for (auto &OpIt : N->operands()) {
+ Metadata *Op = OpIt.get();
+ if (Op == MD)
+ continue;
+ if (!isAllDILocation(Visited, AllDILocation, DIReachable, Op)) {
+ return false;
+ }
+ }
+ AllDILocation.insert(N);
+ return true;
+}
+
+static Metadata *
+stripLoopMDLoc(const SmallPtrSetImpl<Metadata *> &AllDILocation,
+ const SmallPtrSetImpl<Metadata *> &DIReachable, Metadata *MD) {
+ if (isa<DILocation>(MD) || AllDILocation.count(MD))
+ return nullptr;
+
+ if (!DIReachable.count(MD))
+ return MD;
+
+ MDNode *N = dyn_cast_or_null<MDNode>(MD);
+ if (!N)
+ return MD;
+
+ SmallVector<Metadata *, 4> Args;
+ bool HasSelfRef = false;
+ for (unsigned i = 0; i < N->getNumOperands(); ++i) {
+ Metadata *A = N->getOperand(i);
+ if (!A) {
+ Args.push_back(nullptr);
+ } else if (A == MD) {
+ assert(i == 0 && "expected i==0 for self-reference");
+ HasSelfRef = true;
+ Args.push_back(nullptr);
+ } else if (Metadata *NewArg =
+ stripLoopMDLoc(AllDILocation, DIReachable, A)) {
+ Args.push_back(NewArg);
+ }
+ }
+ if (Args.empty() || (HasSelfRef && Args.size() == 1))
+ return nullptr;
+
+ MDNode *NewMD = N->isDistinct() ? MDNode::getDistinct(N->getContext(), Args)
+ : MDNode::get(N->getContext(), Args);
+ if (HasSelfRef)
+ NewMD->replaceOperandWith(0, NewMD);
+ return NewMD;
}
static MDNode *stripDebugLocFromLoopID(MDNode *N) {
assert(!N->operands().empty() && "Missing self reference?");
- SmallPtrSet<Metadata *, 8> Visited, DILocationReachable;
+ SmallPtrSet<Metadata *, 8> Visited, DILocationReachable, AllDILocation;
// If we already visited N, there is nothing to do.
if (!Visited.insert(N).second)
return N;
@@ -428,27 +468,27 @@ static MDNode *stripDebugLocFromLoopID(MDNode *N) {
// MDNode. This loop also initializes DILocationReachable, later
// needed by updateLoopMetadataDebugLocationsImpl; the use of
// count_if avoids an early exit.
- if (!std::count_if(N->op_begin() + 1, N->op_end(),
+ if (!llvm::count_if(llvm::drop_begin(N->operands()),
[&Visited, &DILocationReachable](const MDOperand &Op) {
return isDILocationReachable(
Visited, DILocationReachable, Op.get());
}))
return N;
+ Visited.clear();
// If there is only the debug location without any actual loop metadata, we
// can remove the metadata.
if (llvm::all_of(llvm::drop_begin(N->operands()),
- [&Visited, &DILocationReachable](const MDOperand &Op) {
- return isDILocationReachable(Visited, DILocationReachable,
- Op.get());
+ [&Visited, &AllDILocation,
+ &DILocationReachable](const MDOperand &Op) {
+ return isAllDILocation(Visited, AllDILocation,
+ DILocationReachable, Op.get());
}))
return nullptr;
return updateLoopMetadataDebugLocationsImpl(
- N, [&DILocationReachable](Metadata *MD) -> Metadata * {
- if (isa<DILocation>(MD) || DILocationReachable.count(MD))
- return nullptr;
- return MD;
+ N, [&AllDILocation, &DILocationReachable](Metadata *MD) -> Metadata * {
+ return stripLoopMDLoc(AllDILocation, DILocationReachable, MD);
});
}
@@ -737,7 +777,6 @@ bool llvm::stripNonLineTableDebugInfo(Module &M) {
Changed = true;
}
};
- RemoveUses("llvm.dbg.addr");
RemoveUses("llvm.dbg.declare");
RemoveUses("llvm.dbg.label");
RemoveUses("llvm.dbg.value");
@@ -806,7 +845,7 @@ bool llvm::stripNonLineTableDebugInfo(Module &M) {
// Create a new llvm.dbg.cu, which is equivalent to the one
// -gline-tables-only would have created.
- for (auto &NMD : M.getNamedMDList()) {
+ for (auto &NMD : M.named_metadata()) {
SmallVector<MDNode *, 8> Ops;
for (MDNode *Op : NMD.operands())
Ops.push_back(remap(Op));
@@ -829,8 +868,7 @@ unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) {
return 0;
}
-void Instruction::applyMergedLocation(const DILocation *LocA,
- const DILocation *LocB) {
+void Instruction::applyMergedLocation(DILocation *LocA, DILocation *LocB) {
setDebugLoc(DILocation::getMergedLocation(LocA, LocB));
}
@@ -1444,8 +1482,12 @@ LLVMDIBuilderCreateArtificialType(LLVMDIBuilderRef Builder,
return wrap(unwrap(Builder)->createArtificialType(unwrapDI<DIType>(Type)));
}
+uint16_t LLVMGetDINodeTag(LLVMMetadataRef MD) {
+ return unwrapDI<DINode>(MD)->getTag();
+}
+
const char *LLVMDITypeGetName(LLVMMetadataRef DType, size_t *Length) {
- StringRef Str = unwrap<DIType>(DType)->getName();
+ StringRef Str = unwrapDI<DIType>(DType)->getName();
*Length = Str.size();
return Str.data();
}
@@ -1738,14 +1780,155 @@ void at::deleteAll(Function *F) {
DAI->eraseFromParent();
}
+bool at::calculateFragmentIntersect(
+ const DataLayout &DL, const Value *Dest, uint64_t SliceOffsetInBits,
+ uint64_t SliceSizeInBits, const DbgAssignIntrinsic *DAI,
+ std::optional<DIExpression::FragmentInfo> &Result) {
+ // There are multiple offsets at play in this function, so let's break it
+ // down. Starting with how variables may be stored in allocas:
+ //
+ // 1 Simplest case: variable is alloca sized and starts at offset 0.
+ // 2 Variable is larger than the alloca: the alloca holds just a part of it.
+ // 3 Variable is smaller than the alloca: the alloca may hold multiple
+ // variables.
+ //
+ // Imagine we have a store to the entire alloca. In case (3) the store
+ // affects bits outside of the bounds of each variable. In case (2), where
+ // the alloca holds the Xth bit to the Yth bit of a variable, the
+ // zero-offset store doesn't represent an assignment at offset zero to the
+ // variable. It is an assignment to offset X.
+ //
+ // # Example 1
+ // Obviously, not all stores are alloca-sized and have zero offset. Imagine
+ // the lower 32 bits of this store are dead and are going to be DSEd:
+ //
+ // store i64 %v, ptr %dest, !DIAssignID !1
+ // dbg.assign(..., !DIExpression(fragment, 128, 32), !1, %dest,
+ // !DIExpression(DW_OP_plus_uconst, 4))
+ //
+ // Goal: Given our dead bits at offset:0 size:32 for the store, determine the
+ // part of the variable, which fits in the fragment expressed by the
+ // dbg.assign, that has been killed, if any.
+ //
+ // calculateFragmentIntersect(..., SliceOffsetInBits=0,
+ // SliceSizeInBits=32, Dest=%dest, DAI=dbg.assign)
+ //
+ // Drawing the store (s) in memory followed by the shortened version ($),
+ // then the dbg.assign (d), with the fragment information on a seperate scale
+ // underneath:
+ //
+ // Memory
+ // offset
+ // from
+ // dest 0 63
+ // | |
+ // s[######] - Original stores 64 bits to Dest.
+ // $----[##] - DSE says the lower 32 bits are dead, to be removed.
+ // d [##] - DAI's address-modifying expression adds 4 bytes to dest.
+ // Variable | |
+ // Fragment 128|
+ // Offsets 159
+ //
+ // The answer is achieved in a few steps:
+ // 1. Add the fragment offset to the store offset:
+ // SliceOffsetInBits:0 + VarFrag.OffsetInBits:128 = 128
+ //
+ // 2. Subtract the address-modifying expression offset plus difference
+ // between d.address and dest:
+ // 128 - (expression_offset:32 + (d.address - dest):0) = 96
+ //
+ // 3. That offset along with the store size (32) represents the bits of the
+ // variable that'd be affected by the store. Call it SliceOfVariable.
+ // Intersect that with DAI's fragment info:
+ // SliceOfVariable ∩ DAI_fragment = none
+ //
+ // In this case: none of the dead bits of the store affect DAI.
+ //
+ // # Example 2
+ // Similar example with the same goal. This time the upper 16 bits
+ // of the store are going to be DSE'd.
+ //
+ // store i64 %v, ptr %dest, !DIAssignID !1
+ // dbg.assign(..., !DIExpression(fragment, 128, 32), !1, %dest,
+ // !DIExpression(DW_OP_plus_uconst, 4))
+ //
+ // calculateFragmentIntersect(..., SliceOffsetInBits=48,
+ // SliceSizeInBits=16, Dest=%dest, DAI=dbg.assign)
+ //
+ // Memory
+ // offset
+ // from
+ // dest 0 63
+ // | |
+ // s[######] - Original stores 64 bits to Dest.
+ // $[####]-- - DSE says the upper 16 bits are dead, to be removed.
+ // d [##] - DAI's address-modifying expression adds 4 bytes to dest.
+ // Variable | |
+ // Fragment 128|
+ // Offsets 159
+ //
+ // Using the same steps in the first example:
+ // 1. SliceOffsetInBits:48 + VarFrag.OffsetInBits:128 = 176
+ // 2. 176 - (expression_offset:32 + (d.address - dest):0) = 144
+ // 3. SliceOfVariable offset = 144, size = 16:
+ // SliceOfVariable ∩ DAI_fragment = (offset: 144, size: 16)
+ // SliceOfVariable tells us the bits of the variable described by DAI that are
+ // affected by the DSE.
+ if (DAI->isKillAddress())
+ return false;
+
+ DIExpression::FragmentInfo VarFrag = DAI->getFragmentOrEntireVariable();
+ if (VarFrag.SizeInBits == 0)
+ return false; // Variable size is unknown.
+
+ // Calculate the difference between Dest and the dbg.assign address +
+ // address-modifying expression.
+ int64_t PointerOffsetInBits;
+ {
+ auto DestOffsetInBytes = DAI->getAddress()->getPointerOffsetFrom(Dest, DL);
+ if (!DestOffsetInBytes)
+ return false; // Can't calculate difference in addresses.
+
+ int64_t ExprOffsetInBytes;
+ if (!DAI->getAddressExpression()->extractIfOffset(ExprOffsetInBytes))
+ return false;
+
+ int64_t PointerOffsetInBytes = *DestOffsetInBytes + ExprOffsetInBytes;
+ PointerOffsetInBits = PointerOffsetInBytes * 8;
+ }
+
+ // Adjust the slice offset so that we go from describing the a slice
+ // of memory to a slice of the variable.
+ int64_t NewOffsetInBits =
+ SliceOffsetInBits + VarFrag.OffsetInBits - PointerOffsetInBits;
+ if (NewOffsetInBits < 0)
+ return false; // Fragment offsets can only be positive.
+ DIExpression::FragmentInfo SliceOfVariable(SliceSizeInBits, NewOffsetInBits);
+ // Intersect the variable slice with DAI's fragment to trim it down to size.
+ DIExpression::FragmentInfo TrimmedSliceOfVariable =
+ DIExpression::FragmentInfo::intersect(SliceOfVariable, VarFrag);
+ if (TrimmedSliceOfVariable == VarFrag)
+ Result = std::nullopt;
+ else
+ Result = TrimmedSliceOfVariable;
+ return true;
+}
+
/// Collect constant properies (base, size, offset) of \p StoreDest.
-/// Return std::nullopt if any properties are not constants.
+/// Return std::nullopt if any properties are not constants or the
+/// offset from the base pointer is negative.
static std::optional<AssignmentInfo>
getAssignmentInfoImpl(const DataLayout &DL, const Value *StoreDest,
- uint64_t SizeInBits) {
+ TypeSize SizeInBits) {
+ if (SizeInBits.isScalable())
+ return std::nullopt;
APInt GEPOffset(DL.getIndexTypeSizeInBits(StoreDest->getType()), 0);
const Value *Base = StoreDest->stripAndAccumulateConstantOffsets(
DL, GEPOffset, /*AllowNonInbounds*/ true);
+
+ if (GEPOffset.isNegative())
+ return std::nullopt;
+
uint64_t OffsetInBytes = GEPOffset.getLimitedValue();
// Check for overflow.
if (OffsetInBytes == UINT64_MAX)
@@ -1764,22 +1947,22 @@ std::optional<AssignmentInfo> at::getAssignmentInfo(const DataLayout &DL,
// We can't use a non-const size, bail.
return std::nullopt;
uint64_t SizeInBits = 8 * ConstLengthInBytes->getZExtValue();
- return getAssignmentInfoImpl(DL, StoreDest, SizeInBits);
+ return getAssignmentInfoImpl(DL, StoreDest, TypeSize::getFixed(SizeInBits));
}
std::optional<AssignmentInfo> at::getAssignmentInfo(const DataLayout &DL,
const StoreInst *SI) {
- const Value *StoreDest = SI->getPointerOperand();
- uint64_t SizeInBits = DL.getTypeSizeInBits(SI->getValueOperand()->getType());
- return getAssignmentInfoImpl(DL, StoreDest, SizeInBits);
+ TypeSize SizeInBits = DL.getTypeSizeInBits(SI->getValueOperand()->getType());
+ return getAssignmentInfoImpl(DL, SI->getPointerOperand(), SizeInBits);
}
std::optional<AssignmentInfo> at::getAssignmentInfo(const DataLayout &DL,
const AllocaInst *AI) {
- uint64_t SizeInBits = DL.getTypeSizeInBits(AI->getAllocatedType());
+ TypeSize SizeInBits = DL.getTypeSizeInBits(AI->getAllocatedType());
return getAssignmentInfoImpl(DL, AI, SizeInBits);
}
+/// Returns nullptr if the assignment shouldn't be attributed to this variable.
static CallInst *emitDbgAssign(AssignmentInfo Info, Value *Val, Value *Dest,
Instruction &StoreLikeInst,
const VarRecord &VarRec, DIBuilder &DIB) {
@@ -1787,11 +1970,35 @@ static CallInst *emitDbgAssign(AssignmentInfo Info, Value *Val, Value *Dest,
assert(ID && "Store instruction must have DIAssignID metadata");
(void)ID;
+ const uint64_t StoreStartBit = Info.OffsetInBits;
+ const uint64_t StoreEndBit = Info.OffsetInBits + Info.SizeInBits;
+
+ uint64_t FragStartBit = StoreStartBit;
+ uint64_t FragEndBit = StoreEndBit;
+
+ bool StoreToWholeVariable = Info.StoreToWholeAlloca;
+ if (auto Size = VarRec.Var->getSizeInBits()) {
+ // NOTE: trackAssignments doesn't understand base expressions yet, so all
+ // variables that reach here are guaranteed to start at offset 0 in the
+ // alloca.
+ const uint64_t VarStartBit = 0;
+ const uint64_t VarEndBit = *Size;
+
+ // FIXME: trim FragStartBit when nonzero VarStartBit is supported.
+ FragEndBit = std::min(FragEndBit, VarEndBit);
+
+ // Discard stores to bits outside this variable.
+ if (FragStartBit >= FragEndBit)
+ return nullptr;
+
+ StoreToWholeVariable = FragStartBit <= VarStartBit && FragEndBit >= *Size;
+ }
+
DIExpression *Expr =
DIExpression::get(StoreLikeInst.getContext(), std::nullopt);
- if (!Info.StoreToWholeAlloca) {
- auto R = DIExpression::createFragmentExpression(Expr, Info.OffsetInBits,
- Info.SizeInBits);
+ if (!StoreToWholeVariable) {
+ auto R = DIExpression::createFragmentExpression(Expr, FragStartBit,
+ FragEndBit - FragStartBit);
assert(R.has_value() && "failed to create fragment expression");
Expr = *R;
}
@@ -1889,13 +2096,19 @@ void at::trackAssignments(Function::iterator Start, Function::iterator End,
auto *Assign =
emitDbgAssign(*Info, ValueComponent, DestComponent, I, R, DIB);
(void)Assign;
- LLVM_DEBUG(errs() << " > INSERT: " << *Assign << "\n");
+ LLVM_DEBUG(if (Assign) errs() << " > INSERT: " << *Assign << "\n");
}
}
}
}
-void AssignmentTrackingPass::runOnFunction(Function &F) {
+bool AssignmentTrackingPass::runOnFunction(Function &F) {
+ // No value in assignment tracking without optimisations.
+ if (F.hasFnAttribute(Attribute::OptimizeNone))
+ return /*Changed*/ false;
+
+ bool Changed = false;
+ auto *DL = &F.getParent()->getDataLayout();
// Collect a map of {backing storage : dbg.declares} (currently "backing
// storage" is limited to Allocas). We'll use this to find dbg.declares to
// delete after running `trackAssignments`.
@@ -1913,15 +2126,22 @@ void AssignmentTrackingPass::runOnFunction(Function &F) {
// leave dbg.declares with non-empty expressions in place.
if (DDI->getExpression()->getNumElements() != 0)
continue;
+ if (!DDI->getAddress())
+ continue;
if (AllocaInst *Alloca =
dyn_cast<AllocaInst>(DDI->getAddress()->stripPointerCasts())) {
+ // FIXME: Skip VLAs for now (let these variables use dbg.declares).
+ if (!Alloca->isStaticAlloca())
+ continue;
+ // Similarly, skip scalable vectors (use dbg.declares instead).
+ if (auto Sz = Alloca->getAllocationSize(*DL); Sz && Sz->isScalable())
+ continue;
DbgDeclares[Alloca].insert(DDI);
Vars[Alloca].insert(VarRecord(DDI));
}
}
}
- auto DL = std::make_unique<DataLayout>(F.getParent());
// FIXME: Locals can be backed by caller allocas (sret, byval).
// Note: trackAssignments doesn't respect dbg.declare's IR positions (as it
// doesn't "understand" dbg.declares). However, this doesn't appear to break
@@ -1940,16 +2160,22 @@ void AssignmentTrackingPass::runOnFunction(Function &F) {
(void)Markers;
for (DbgDeclareInst *DDI : P.second) {
// Assert that the alloca that DDI uses is now linked to a dbg.assign
- // describing the same variable (i.e. check that this dbg.declare
- // has been replaced by a dbg.assign).
+ // describing the same variable (i.e. check that this dbg.declare has
+ // been replaced by a dbg.assign). Use DebugVariableAggregate to Discard
+ // the fragment part because trackAssignments may alter the
+ // fragment. e.g. if the alloca is smaller than the variable, then
+ // trackAssignments will create an alloca-sized fragment for the
+ // dbg.assign.
assert(llvm::any_of(Markers, [DDI](DbgAssignIntrinsic *DAI) {
- return DebugVariable(DAI) == DebugVariable(DDI);
+ return DebugVariableAggregate(DAI) == DebugVariableAggregate(DDI);
}));
// Delete DDI because the variable location is now tracked using
// assignment tracking.
DDI->eraseFromParent();
+ Changed = true;
}
}
+ return Changed;
}
static const char *AssignmentTrackingModuleFlag =
@@ -1972,7 +2198,8 @@ bool llvm::isAssignmentTrackingEnabled(const Module &M) {
PreservedAnalyses AssignmentTrackingPass::run(Function &F,
FunctionAnalysisManager &AM) {
- runOnFunction(F);
+ if (!runOnFunction(F))
+ return PreservedAnalyses::all();
// Record that this module uses assignment tracking. It doesn't matter that
// some functons in the module may not use it - the debug info in those
@@ -1988,8 +2215,12 @@ PreservedAnalyses AssignmentTrackingPass::run(Function &F,
PreservedAnalyses AssignmentTrackingPass::run(Module &M,
ModuleAnalysisManager &AM) {
+ bool Changed = false;
for (auto &F : M)
- runOnFunction(F);
+ Changed |= runOnFunction(F);
+
+ if (!Changed)
+ return PreservedAnalyses::all();
// Record that this module uses assignment tracking.
setAssignmentTrackingModuleFlag(M);
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index fb9a7b882220..4933b6032688 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -13,6 +13,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "LLVMContextImpl.h"
#include "MetadataImpl.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -41,6 +42,10 @@ DebugVariable::DebugVariable(const DbgVariableIntrinsic *DII)
Fragment(DII->getExpression()->getFragmentInfo()),
InlinedAt(DII->getDebugLoc().getInlinedAt()) {}
+DebugVariableAggregate::DebugVariableAggregate(const DbgVariableIntrinsic *DVI)
+ : DebugVariable(DVI->getVariable(), std::nullopt,
+ DVI->getDebugLoc()->getInlinedAt()) {}
+
DILocation::DILocation(LLVMContext &C, StorageType Storage, unsigned Line,
unsigned Column, ArrayRef<Metadata *> MDs,
bool ImplicitCode)
@@ -90,14 +95,13 @@ DILocation *DILocation::getImpl(LLVMContext &Context, unsigned Line,
Storage, Context.pImpl->DILocations);
}
-const DILocation *
-DILocation::getMergedLocations(ArrayRef<const DILocation *> Locs) {
+DILocation *DILocation::getMergedLocations(ArrayRef<DILocation *> Locs) {
if (Locs.empty())
return nullptr;
if (Locs.size() == 1)
return Locs[0];
auto *Merged = Locs[0];
- for (const DILocation *L : llvm::drop_begin(Locs)) {
+ for (DILocation *L : llvm::drop_begin(Locs)) {
Merged = getMergedLocation(Merged, L);
if (Merged == nullptr)
break;
@@ -105,8 +109,7 @@ DILocation::getMergedLocations(ArrayRef<const DILocation *> Locs) {
return Merged;
}
-const DILocation *DILocation::getMergedLocation(const DILocation *LocA,
- const DILocation *LocB) {
+DILocation *DILocation::getMergedLocation(DILocation *LocA, DILocation *LocB) {
if (!LocA || !LocB)
return nullptr;
@@ -114,63 +117,122 @@ const DILocation *DILocation::getMergedLocation(const DILocation *LocA,
return LocA;
LLVMContext &C = LocA->getContext();
- SmallDenseMap<std::pair<DILocalScope *, DILocation *>,
- std::pair<unsigned, unsigned>, 4>
- Locations;
-
- DIScope *S = LocA->getScope();
- DILocation *L = LocA->getInlinedAt();
- unsigned Line = LocA->getLine();
- unsigned Col = LocA->getColumn();
-
- // Walk from the current source locaiton until the file scope;
- // then, do the same for the inlined-at locations.
- auto AdvanceToParentLoc = [&S, &L, &Line, &Col]() {
- S = S->getScope();
- if (!S && L) {
- Line = L->getLine();
- Col = L->getColumn();
- S = L->getScope();
- L = L->getInlinedAt();
- }
- };
- while (S) {
- if (auto *LS = dyn_cast<DILocalScope>(S))
- Locations.try_emplace(std::make_pair(LS, L), std::make_pair(Line, Col));
- AdvanceToParentLoc();
+ using LocVec = SmallVector<const DILocation *>;
+ LocVec ALocs;
+ LocVec BLocs;
+ SmallDenseMap<std::pair<const DISubprogram *, const DILocation *>, unsigned,
+ 4>
+ ALookup;
+
+ // Walk through LocA and its inlined-at locations, populate them in ALocs and
+ // save the index for the subprogram and inlined-at pair, which we use to find
+ // a matching starting location in LocB's chain.
+ for (auto [L, I] = std::make_pair(LocA, 0U); L; L = L->getInlinedAt(), I++) {
+ ALocs.push_back(L);
+ auto Res = ALookup.try_emplace(
+ {L->getScope()->getSubprogram(), L->getInlinedAt()}, I);
+ assert(Res.second && "Multiple <SP, InlinedAt> pairs in a location chain?");
+ (void)Res;
}
- // Walk the source locations of LocB until a match with LocA is found.
- S = LocB->getScope();
- L = LocB->getInlinedAt();
- Line = LocB->getLine();
- Col = LocB->getColumn();
- while (S) {
- if (auto *LS = dyn_cast<DILocalScope>(S)) {
- auto MatchLoc = Locations.find(std::make_pair(LS, L));
- if (MatchLoc != Locations.end()) {
- // If the lines match, keep the line, but set the column to '0'
- // If the lines don't match, pick a "line 0" location but keep
- // the current scope and inlined-at.
- bool SameLine = Line == MatchLoc->second.first;
- bool SameCol = Col == MatchLoc->second.second;
- Line = SameLine ? Line : 0;
- Col = SameLine && SameCol ? Col : 0;
- break;
- }
- }
- AdvanceToParentLoc();
+ LocVec::reverse_iterator ARIt = ALocs.rend();
+ LocVec::reverse_iterator BRIt = BLocs.rend();
+
+ // Populate BLocs and look for a matching starting location, the first
+ // location with the same subprogram and inlined-at location as in LocA's
+ // chain. Since the two locations have the same inlined-at location we do
+ // not need to look at those parts of the chains.
+ for (auto [L, I] = std::make_pair(LocB, 0U); L; L = L->getInlinedAt(), I++) {
+ BLocs.push_back(L);
+
+ if (ARIt != ALocs.rend())
+ // We have already found a matching starting location.
+ continue;
+
+ auto IT = ALookup.find({L->getScope()->getSubprogram(), L->getInlinedAt()});
+ if (IT == ALookup.end())
+ continue;
+
+ // The + 1 is to account for the &*rev_it = &(it - 1) relationship.
+ ARIt = LocVec::reverse_iterator(ALocs.begin() + IT->second + 1);
+ BRIt = LocVec::reverse_iterator(BLocs.begin() + I + 1);
+
+ // If we have found a matching starting location we do not need to add more
+ // locations to BLocs, since we will only look at location pairs preceding
+ // the matching starting location, and adding more elements to BLocs could
+ // invalidate the iterator that we initialized here.
+ break;
}
- if (!S) {
- // If the two locations are irreconsilable, pick any scope,
- // and return a "line 0" location.
- Line = Col = 0;
- S = LocA->getScope();
+ // Merge the two locations if possible, using the supplied
+ // inlined-at location for the created location.
+ auto MergeLocPair = [&C](const DILocation *L1, const DILocation *L2,
+ DILocation *InlinedAt) -> DILocation * {
+ if (L1 == L2)
+ return DILocation::get(C, L1->getLine(), L1->getColumn(), L1->getScope(),
+ InlinedAt);
+
+ // If the locations originate from different subprograms we can't produce
+ // a common location.
+ if (L1->getScope()->getSubprogram() != L2->getScope()->getSubprogram())
+ return nullptr;
+
+ // Return the nearest common scope inside a subprogram.
+ auto GetNearestCommonScope = [](DIScope *S1, DIScope *S2) -> DIScope * {
+ SmallPtrSet<DIScope *, 8> Scopes;
+ for (; S1; S1 = S1->getScope()) {
+ Scopes.insert(S1);
+ if (isa<DISubprogram>(S1))
+ break;
+ }
+
+ for (; S2; S2 = S2->getScope()) {
+ if (Scopes.count(S2))
+ return S2;
+ if (isa<DISubprogram>(S2))
+ break;
+ }
+
+ return nullptr;
+ };
+
+ auto Scope = GetNearestCommonScope(L1->getScope(), L2->getScope());
+ assert(Scope && "No common scope in the same subprogram?");
+
+ bool SameLine = L1->getLine() == L2->getLine();
+ bool SameCol = L1->getColumn() == L2->getColumn();
+ unsigned Line = SameLine ? L1->getLine() : 0;
+ unsigned Col = SameLine && SameCol ? L1->getColumn() : 0;
+
+ return DILocation::get(C, Line, Col, Scope, InlinedAt);
+ };
+
+ DILocation *Result = ARIt != ALocs.rend() ? (*ARIt)->getInlinedAt() : nullptr;
+
+ // If we have found a common starting location, walk up the inlined-at chains
+ // and try to produce common locations.
+ for (; ARIt != ALocs.rend() && BRIt != BLocs.rend(); ++ARIt, ++BRIt) {
+ DILocation *Tmp = MergeLocPair(*ARIt, *BRIt, Result);
+
+ if (!Tmp)
+ // We have walked up to a point in the chains where the two locations
+ // are irreconsilable. At this point Result contains the nearest common
+ // location in the inlined-at chains of LocA and LocB, so we break here.
+ break;
+
+ Result = Tmp;
}
- return DILocation::get(C, Line, Col, S, L);
+ if (Result)
+ return Result;
+
+ // We ended up with LocA and LocB as irreconsilable locations. Produce a
+ // location at 0:0 with one of the locations' scope. The function has
+ // historically picked A's scope, and a nullptr inlined-at location, so that
+ // behavior is mimicked here but I am not sure if this is always the correct
+ // way to handle this.
+ return DILocation::get(C, 0, 0, LocA->getScope(), nullptr);
}
std::optional<unsigned>
@@ -908,6 +970,7 @@ DICompileUnit::getNameTableKind(StringRef Str) {
return StringSwitch<std::optional<DebugNameTableKind>>(Str)
.Case("Default", DebugNameTableKind::Default)
.Case("GNU", DebugNameTableKind::GNU)
+ .Case("Apple", DebugNameTableKind::Apple)
.Case("None", DebugNameTableKind::None)
.Default(std::nullopt);
}
@@ -932,6 +995,8 @@ const char *DICompileUnit::nameTableKindString(DebugNameTableKind NTK) {
return nullptr;
case DebugNameTableKind::GNU:
return "GNU";
+ case DebugNameTableKind::Apple:
+ return "Apple";
case DebugNameTableKind::None:
return "None";
}
@@ -1285,6 +1350,9 @@ bool DIExpression::isEntryValue() const {
bool DIExpression::startsWithDeref() const {
return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref;
}
+bool DIExpression::isDeref() const {
+ return getNumElements() == 1 && startsWithDeref();
+}
DIAssignID *DIAssignID::getImpl(LLVMContext &Context, StorageType Storage,
bool ShouldCreate) {
@@ -1396,6 +1464,12 @@ bool DIExpression::isValid() const {
case dwarf::DW_OP_push_object_address:
case dwarf::DW_OP_over:
case dwarf::DW_OP_consts:
+ case dwarf::DW_OP_eq:
+ case dwarf::DW_OP_ne:
+ case dwarf::DW_OP_gt:
+ case dwarf::DW_OP_ge:
+ case dwarf::DW_OP_lt:
+ case dwarf::DW_OP_le:
break;
}
}
@@ -1604,7 +1678,7 @@ bool DIExpression::hasAllLocationOps(unsigned N) const {
if (ExprOp.getOp() == dwarf::DW_OP_LLVM_arg)
SeenOps.insert(ExprOp.getArg(0));
for (uint64_t Idx = 0; Idx < N; ++Idx)
- if (!is_contained(SeenOps, Idx))
+ if (!SeenOps.contains(Idx))
return false;
return true;
}
@@ -2026,7 +2100,7 @@ void DIArgList::handleChangedOperand(void *Ref, Metadata *New) {
if (NewVM)
VM = NewVM;
else
- VM = ValueAsMetadata::get(UndefValue::get(VM->getValue()->getType()));
+ VM = ValueAsMetadata::get(PoisonValue::get(VM->getValue()->getType()));
}
}
if (Uniq) {
diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp
index fb238e2aac59..342c4cbbc39d 100644
--- a/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/llvm/lib/IR/DiagnosticInfo.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Demangle/Demangle.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -440,7 +441,7 @@ void llvm::diagnoseDontCall(const CallInst &CI) {
}
void DiagnosticInfoDontCall::print(DiagnosticPrinter &DP) const {
- DP << "call to " << getFunctionName() << " marked \"dontcall-";
+ DP << "call to " << demangle(getFunctionName()) << " marked \"dontcall-";
if (getSeverity() == DiagnosticSeverity::DS_Error)
DP << "error\"";
else
diff --git a/llvm/lib/IR/Dominators.cpp b/llvm/lib/IR/Dominators.cpp
index 7c620c3a9331..24cc9f46ff79 100644
--- a/llvm/lib/IR/Dominators.cpp
+++ b/llvm/lib/IR/Dominators.cpp
@@ -194,13 +194,6 @@ bool DominatorTree::dominates(const Instruction *Def,
return dominates(E, UseBB);
}
- // Callbr results are similarly only usable in the default destination.
- if (const auto *CBI = dyn_cast<CallBrInst>(Def)) {
- BasicBlock *NormalDest = CBI->getDefaultDest();
- BasicBlockEdge E(DefBB, NormalDest);
- return dominates(E, UseBB);
- }
-
return dominates(DefBB, UseBB);
}
@@ -311,13 +304,6 @@ bool DominatorTree::dominates(const Value *DefV, const Use &U) const {
return dominates(E, U);
}
- // Callbr results are similarly only usable in the default destination.
- if (const auto *CBI = dyn_cast<CallBrInst>(Def)) {
- BasicBlock *NormalDest = CBI->getDefaultDest();
- BasicBlockEdge E(DefBB, NormalDest);
- return dominates(E, U);
- }
-
// If the def and use are in different blocks, do a simple CFG dominator
// tree query.
if (DefBB != UseBB)
diff --git a/llvm/lib/Analysis/EHPersonalities.cpp b/llvm/lib/IR/EHPersonalities.cpp
index 277ff6ba735f..afbb2bb8275d 100644
--- a/llvm/lib/Analysis/EHPersonalities.cpp
+++ b/llvm/lib/IR/EHPersonalities.cpp
@@ -6,15 +6,16 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
/// See if the given exception handling personality function is one that we
@@ -46,22 +47,34 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
StringRef llvm::getEHPersonalityName(EHPersonality Pers) {
switch (Pers) {
- case EHPersonality::GNU_Ada: return "__gnat_eh_personality";
- case EHPersonality::GNU_CXX: return "__gxx_personality_v0";
- case EHPersonality::GNU_CXX_SjLj: return "__gxx_personality_sj0";
- case EHPersonality::GNU_C: return "__gcc_personality_v0";
- case EHPersonality::GNU_C_SjLj: return "__gcc_personality_sj0";
- case EHPersonality::GNU_ObjC: return "__objc_personality_v0";
- case EHPersonality::MSVC_X86SEH: return "_except_handler3";
+ case EHPersonality::GNU_Ada:
+ return "__gnat_eh_personality";
+ case EHPersonality::GNU_CXX:
+ return "__gxx_personality_v0";
+ case EHPersonality::GNU_CXX_SjLj:
+ return "__gxx_personality_sj0";
+ case EHPersonality::GNU_C:
+ return "__gcc_personality_v0";
+ case EHPersonality::GNU_C_SjLj:
+ return "__gcc_personality_sj0";
+ case EHPersonality::GNU_ObjC:
+ return "__objc_personality_v0";
+ case EHPersonality::MSVC_X86SEH:
+ return "_except_handler3";
case EHPersonality::MSVC_TableSEH:
return "__C_specific_handler";
- case EHPersonality::MSVC_CXX: return "__CxxFrameHandler3";
- case EHPersonality::CoreCLR: return "ProcessCLRException";
- case EHPersonality::Rust: return "rust_eh_personality";
- case EHPersonality::Wasm_CXX: return "__gxx_wasm_personality_v0";
+ case EHPersonality::MSVC_CXX:
+ return "__CxxFrameHandler3";
+ case EHPersonality::CoreCLR:
+ return "ProcessCLRException";
+ case EHPersonality::Rust:
+ return "rust_eh_personality";
+ case EHPersonality::Wasm_CXX:
+ return "__gxx_wasm_personality_v0";
case EHPersonality::XL_CXX:
return "__xlcxx_personality_v1";
- case EHPersonality::Unknown: llvm_unreachable("Unknown EHPersonality!");
+ case EHPersonality::Unknown:
+ llvm_unreachable("Unknown EHPersonality!");
}
llvm_unreachable("Invalid EHPersonality!");
@@ -79,7 +92,11 @@ bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {
// We can't simplify any invokes to nounwind functions if the personality
// function wants to catch asynch exceptions. The nounwind attribute only
// implies that the function does not throw synchronous exceptions.
- return !isAsynchronousEHPersonality(Personality);
+
+ // Cannot simplify CXX Personality under AsynchEH
+ const llvm::Module *M = (const llvm::Module *)F->getParent();
+ bool EHa = M->getModuleFlag("eh-asynch");
+ return !EHa && !isAsynchronousEHPersonality(Personality);
}
DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
@@ -97,8 +114,8 @@ DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
// Note: Despite not being a funclet in the truest sense, a catchswitch is
// considered to belong to its own funclet for the purposes of coloring.
- DEBUG_WITH_TYPE("winehprepare-coloring", dbgs() << "\nColoring funclets for "
- << F.getName() << "\n");
+ DEBUG_WITH_TYPE("winehprepare-coloring",
+ dbgs() << "\nColoring funclets for " << F.getName() << "\n");
Worklist.push_back({EntryBlock, EntryBlock});
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 677db46124e4..27219e89dc5f 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -229,6 +229,10 @@ uint64_t Argument::getDereferenceableOrNullBytes() const {
return getParent()->getParamDereferenceableOrNullBytes(getArgNo());
}
+FPClassTest Argument::getNoFPClass() const {
+ return getParent()->getParamNoFPClass(getArgNo());
+}
+
bool Argument::hasNestAttr() const {
if (!getType()->isPointerTy()) return false;
return hasAttribute(Attribute::Nest);
@@ -698,17 +702,30 @@ void Function::addDereferenceableOrNullParamAttr(unsigned ArgNo,
DenormalMode Function::getDenormalMode(const fltSemantics &FPType) const {
if (&FPType == &APFloat::IEEEsingle()) {
- Attribute Attr = getFnAttribute("denormal-fp-math-f32");
- StringRef Val = Attr.getValueAsString();
- if (!Val.empty())
- return parseDenormalFPAttribute(Val);
-
+ DenormalMode Mode = getDenormalModeF32Raw();
// If the f32 variant of the attribute isn't specified, try to use the
// generic one.
+ if (Mode.isValid())
+ return Mode;
}
+ return getDenormalModeRaw();
+}
+
+DenormalMode Function::getDenormalModeRaw() const {
Attribute Attr = getFnAttribute("denormal-fp-math");
- return parseDenormalFPAttribute(Attr.getValueAsString());
+ StringRef Val = Attr.getValueAsString();
+ return parseDenormalFPAttribute(Val);
+}
+
+DenormalMode Function::getDenormalModeF32Raw() const {
+ Attribute Attr = getFnAttribute("denormal-fp-math-f32");
+ if (Attr.isValid()) {
+ StringRef Val = Attr.getValueAsString();
+ return parseDenormalFPAttribute(Val);
+ }
+
+ return DenormalMode::getInvalid();
}
const std::string &Function::getGC() const {
@@ -900,11 +917,6 @@ static std::string getMangledTypeStr(Type *Ty, bool &HasUnnamedType) {
std::string Result;
if (PointerType *PTyp = dyn_cast<PointerType>(Ty)) {
Result += "p" + utostr(PTyp->getAddressSpace());
- // Opaque pointer doesn't have pointee type information, so we just mangle
- // address space for opaque pointer.
- if (!PTyp->isOpaque())
- Result += getMangledTypeStr(PTyp->getNonOpaquePointerElementType(),
- HasUnnamedType);
} else if (ArrayType *ATyp = dyn_cast<ArrayType>(Ty)) {
Result += "a" + utostr(ATyp->getNumElements()) +
getMangledTypeStr(ATyp->getElementType(), HasUnnamedType);
@@ -1019,70 +1031,11 @@ std::string Intrinsic::getNameNoUnnamedTypes(ID Id, ArrayRef<Type *> Tys) {
/// IIT_Info - These are enumerators that describe the entries returned by the
/// getIntrinsicInfoTableEntries function.
///
-/// NOTE: This must be kept in synch with the copy in TblGen/IntrinsicEmitter!
+/// Defined in Intrinsics.td.
enum IIT_Info {
- // Common values should be encoded with 0-15.
- IIT_Done = 0,
- IIT_I1 = 1,
- IIT_I8 = 2,
- IIT_I16 = 3,
- IIT_I32 = 4,
- IIT_I64 = 5,
- IIT_F16 = 6,
- IIT_F32 = 7,
- IIT_F64 = 8,
- IIT_V2 = 9,
- IIT_V4 = 10,
- IIT_V8 = 11,
- IIT_V16 = 12,
- IIT_V32 = 13,
- IIT_PTR = 14,
- IIT_ARG = 15,
-
- // Values from 16+ are only encodable with the inefficient encoding.
- IIT_V64 = 16,
- IIT_MMX = 17,
- IIT_TOKEN = 18,
- IIT_METADATA = 19,
- IIT_EMPTYSTRUCT = 20,
- IIT_STRUCT2 = 21,
- IIT_STRUCT3 = 22,
- IIT_STRUCT4 = 23,
- IIT_STRUCT5 = 24,
- IIT_EXTEND_ARG = 25,
- IIT_TRUNC_ARG = 26,
- IIT_ANYPTR = 27,
- IIT_V1 = 28,
- IIT_VARARG = 29,
- IIT_HALF_VEC_ARG = 30,
- IIT_SAME_VEC_WIDTH_ARG = 31,
- IIT_PTR_TO_ARG = 32,
- IIT_PTR_TO_ELT = 33,
- IIT_VEC_OF_ANYPTRS_TO_ELT = 34,
- IIT_I128 = 35,
- IIT_V512 = 36,
- IIT_V1024 = 37,
- IIT_STRUCT6 = 38,
- IIT_STRUCT7 = 39,
- IIT_STRUCT8 = 40,
- IIT_F128 = 41,
- IIT_VEC_ELEMENT = 42,
- IIT_SCALABLE_VEC = 43,
- IIT_SUBDIVIDE2_ARG = 44,
- IIT_SUBDIVIDE4_ARG = 45,
- IIT_VEC_OF_BITCASTS_TO_INT = 46,
- IIT_V128 = 47,
- IIT_BF16 = 48,
- IIT_STRUCT9 = 49,
- IIT_V256 = 50,
- IIT_AMX = 51,
- IIT_PPCF128 = 52,
- IIT_V3 = 53,
- IIT_EXTERNREF = 54,
- IIT_FUNCREF = 55,
- IIT_ANYPTR_TO_ELT = 56,
- IIT_I2 = 57,
- IIT_I4 = 58,
+#define GET_INTRINSIC_IITINFO
+#include "llvm/IR/IntrinsicImpl.inc"
+#undef GET_INTRINSIC_IITINFO
};
static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
@@ -1141,6 +1094,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
case IIT_I4:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 4));
return;
+ case IIT_AARCH64_SVCOUNT:
+ OutputTable.push_back(IITDescriptor::get(IITDescriptor::AArch64Svcount, 0));
+ return;
case IIT_I8:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8));
return;
@@ -1206,22 +1162,17 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
return;
case IIT_EXTERNREF:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 10));
- OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0));
return;
case IIT_FUNCREF:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 20));
- OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8));
return;
case IIT_PTR:
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0));
- DecodeIITType(NextElt, Infos, Info, OutputTable);
return;
- case IIT_ANYPTR: { // [ANYPTR addrspace, subtype]
+ case IIT_ANYPTR: // [ANYPTR addrspace]
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer,
Infos[NextElt++]));
- DecodeIITType(NextElt, Infos, Info, OutputTable);
return;
- }
case IIT_ARG: {
unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
OutputTable.push_back(IITDescriptor::get(IITDescriptor::Argument, ArgInfo));
@@ -1251,24 +1202,6 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
ArgInfo));
return;
}
- case IIT_PTR_TO_ARG: {
- unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
- OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToArgument,
- ArgInfo));
- return;
- }
- case IIT_PTR_TO_ELT: {
- unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
- OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToElt, ArgInfo));
- return;
- }
- case IIT_ANYPTR_TO_ELT: {
- unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
- unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
- OutputTable.push_back(
- IITDescriptor::get(IITDescriptor::AnyPtrToElt, ArgNo, RefNo));
- return;
- }
case IIT_VEC_OF_ANYPTRS_TO_ELT: {
unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
@@ -1382,6 +1315,8 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
case IITDescriptor::Double: return Type::getDoubleTy(Context);
case IITDescriptor::Quad: return Type::getFP128Ty(Context);
case IITDescriptor::PPCQuad: return Type::getPPC_FP128Ty(Context);
+ case IITDescriptor::AArch64Svcount:
+ return TargetExtType::get(Context, "aarch64.svcount");
case IITDescriptor::Integer:
return IntegerType::get(Context, D.Integer_Width);
@@ -1389,8 +1324,7 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
return VectorType::get(DecodeFixedType(Infos, Tys, Context),
D.Vector_Width);
case IITDescriptor::Pointer:
- return PointerType::get(DecodeFixedType(Infos, Tys, Context),
- D.Pointer_AddressSpace);
+ return PointerType::get(Context, D.Pointer_AddressSpace);
case IITDescriptor::Struct: {
SmallVector<Type *, 8> Elts;
for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i)
@@ -1433,18 +1367,6 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
return VectorType::get(EltTy, VTy->getElementCount());
return EltTy;
}
- case IITDescriptor::PtrToArgument: {
- Type *Ty = Tys[D.getArgumentNumber()];
- return PointerType::getUnqual(Ty);
- }
- case IITDescriptor::PtrToElt: {
- Type *Ty = Tys[D.getArgumentNumber()];
- VectorType *VTy = dyn_cast<VectorType>(Ty);
- if (!VTy)
- llvm_unreachable("Expected an argument of Vector Type");
- Type *EltTy = VTy->getElementType();
- return PointerType::getUnqual(EltTy);
- }
case IITDescriptor::VecElementArgument: {
Type *Ty = Tys[D.getArgumentNumber()];
if (VectorType *VTy = dyn_cast<VectorType>(Ty))
@@ -1460,9 +1382,6 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
case IITDescriptor::VecOfAnyPtrsToElt:
// Return the overloaded type (which determines the pointers address space)
return Tys[D.getOverloadArgNumber()];
- case IITDescriptor::AnyPtrToElt:
- // Return the overloaded type (which determines the pointers address space)
- return Tys[D.getOverloadArgNumber()];
}
llvm_unreachable("unhandled");
}
@@ -1556,6 +1475,9 @@ static bool matchIntrinsicType(
case IITDescriptor::Quad: return !Ty->isFP128Ty();
case IITDescriptor::PPCQuad: return !Ty->isPPC_FP128Ty();
case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width);
+ case IITDescriptor::AArch64Svcount:
+ return !isa<TargetExtType>(Ty) ||
+ cast<TargetExtType>(Ty)->getName() != "aarch64.svcount";
case IITDescriptor::Vector: {
VectorType *VT = dyn_cast<VectorType>(Ty);
return !VT || VT->getElementCount() != D.Vector_Width ||
@@ -1564,33 +1486,7 @@ static bool matchIntrinsicType(
}
case IITDescriptor::Pointer: {
PointerType *PT = dyn_cast<PointerType>(Ty);
- if (!PT || PT->getAddressSpace() != D.Pointer_AddressSpace)
- return true;
- if (!PT->isOpaque()) {
- /* Manually consume a pointer to empty struct descriptor, which is
- * used for externref. We don't want to enforce that the struct is
- * anonymous in this case. (This renders externref intrinsics
- * non-unique, but this will go away with opaque pointers anyway.) */
- if (Infos.front().Kind == IITDescriptor::Struct &&
- Infos.front().Struct_NumElements == 0) {
- Infos = Infos.slice(1);
- return false;
- }
- return matchIntrinsicType(PT->getNonOpaquePointerElementType(), Infos,
- ArgTys, DeferredChecks, IsDeferredCheck);
- }
- // Consume IIT descriptors relating to the pointer element type.
- // FIXME: Intrinsic type matching of nested single value types or even
- // aggregates doesn't work properly with opaque pointers but hopefully
- // doesn't happen in practice.
- while (Infos.front().Kind == IITDescriptor::Pointer ||
- Infos.front().Kind == IITDescriptor::Vector)
- Infos = Infos.slice(1);
- assert((Infos.front().Kind != IITDescriptor::Argument ||
- Infos.front().getArgumentKind() == IITDescriptor::AK_MatchType) &&
- "Unsupported polymorphic pointer type with opaque pointer");
- Infos = Infos.slice(1);
- return false;
+ return !PT || PT->getAddressSpace() != D.Pointer_AddressSpace;
}
case IITDescriptor::Struct: {
@@ -1688,50 +1584,6 @@ static bool matchIntrinsicType(
return matchIntrinsicType(EltTy, Infos, ArgTys, DeferredChecks,
IsDeferredCheck);
}
- case IITDescriptor::PtrToArgument: {
- if (D.getArgumentNumber() >= ArgTys.size())
- return IsDeferredCheck || DeferCheck(Ty);
- Type * ReferenceType = ArgTys[D.getArgumentNumber()];
- PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
- return (!ThisArgType ||
- !ThisArgType->isOpaqueOrPointeeTypeMatches(ReferenceType));
- }
- case IITDescriptor::PtrToElt: {
- if (D.getArgumentNumber() >= ArgTys.size())
- return IsDeferredCheck || DeferCheck(Ty);
- VectorType * ReferenceType =
- dyn_cast<VectorType> (ArgTys[D.getArgumentNumber()]);
- PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
-
- if (!ThisArgType || !ReferenceType)
- return true;
- return !ThisArgType->isOpaqueOrPointeeTypeMatches(
- ReferenceType->getElementType());
- }
- case IITDescriptor::AnyPtrToElt: {
- unsigned RefArgNumber = D.getRefArgNumber();
- if (RefArgNumber >= ArgTys.size()) {
- if (IsDeferredCheck)
- return true;
- // If forward referencing, already add the pointer type and
- // defer the checks for later.
- ArgTys.push_back(Ty);
- return DeferCheck(Ty);
- }
-
- if (!IsDeferredCheck) {
- assert(D.getOverloadArgNumber() == ArgTys.size() &&
- "Table consistency error");
- ArgTys.push_back(Ty);
- }
-
- auto *ReferenceType = dyn_cast<VectorType>(ArgTys[RefArgNumber]);
- auto *ThisArgType = dyn_cast<PointerType>(Ty);
- if (!ThisArgType || !ReferenceType)
- return true;
- return !ThisArgType->isOpaqueOrPointeeTypeMatches(
- ReferenceType->getElementType());
- }
case IITDescriptor::VecOfAnyPtrsToElt: {
unsigned RefArgNumber = D.getRefArgNumber();
if (RefArgNumber >= ArgTys.size()) {
@@ -1757,12 +1609,7 @@ static bool matchIntrinsicType(
if (!ThisArgVecTy || !ReferenceType ||
(ReferenceType->getElementCount() != ThisArgVecTy->getElementCount()))
return true;
- PointerType *ThisArgEltTy =
- dyn_cast<PointerType>(ThisArgVecTy->getElementType());
- if (!ThisArgEltTy)
- return true;
- return !ThisArgEltTy->isOpaqueOrPointeeTypeMatches(
- ReferenceType->getElementType());
+ return !ThisArgVecTy->getElementType()->isPointerTy();
}
case IITDescriptor::VecElementArgument: {
if (D.getArgumentNumber() >= ArgTys.size())
diff --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index c208ab0f3d6b..7bd4503a689e 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "LLVMContextImpl.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
@@ -22,6 +21,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -44,9 +44,7 @@ bool GlobalValue::isMaterializable() const {
return F->isMaterializable();
return false;
}
-Error GlobalValue::materialize() {
- return getParent()->materialize(this);
-}
+Error GlobalValue::materialize() { return getParent()->materialize(this); }
/// Override destroyConstantImpl to make sure it doesn't get called on
/// GlobalValue's because they shouldn't be treated like other constants.
@@ -127,6 +125,16 @@ void GlobalObject::setAlignment(MaybeAlign Align) {
assert(getAlign() == Align && "Alignment representation error!");
}
+void GlobalObject::setAlignment(Align Align) {
+ assert(Align <= MaximumAlignment &&
+ "Alignment is greater than MaximumAlignment!");
+ unsigned AlignmentData = encode(Align);
+ unsigned OldData = getGlobalValueSubClassData();
+ setGlobalValueSubClassData((OldData & ~AlignmentMask) | AlignmentData);
+ assert(getAlign() && *getAlign() == Align &&
+ "Alignment representation error!");
+}
+
void GlobalObject::copyAttributesFrom(const GlobalObject *Src) {
GlobalValue::copyAttributesFrom(Src);
setAlignment(Src->getAlign());
@@ -428,35 +436,23 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
ThreadLocalMode TLMode,
std::optional<unsigned> AddressSpace,
bool isExternallyInitialized)
- : GlobalObject(Ty, Value::GlobalVariableVal,
- OperandTraits<GlobalVariable>::op_begin(this),
- InitVal != nullptr, Link, Name,
- AddressSpace
- ? *AddressSpace
- : M.getDataLayout().getDefaultGlobalsAddressSpace()),
- isConstantGlobal(constant),
- isExternallyInitializedConstant(isExternallyInitialized) {
- assert(!Ty->isFunctionTy() && PointerType::isValidElementType(Ty) &&
- "invalid type for global variable");
- setThreadLocalMode(TLMode);
- if (InitVal) {
- assert(InitVal->getType() == Ty &&
- "Initializer should be the same type as the GlobalVariable!");
- Op<0>() = InitVal;
- }
-
+ : GlobalVariable(Ty, constant, Link, InitVal, Name, TLMode,
+ AddressSpace
+ ? *AddressSpace
+ : M.getDataLayout().getDefaultGlobalsAddressSpace(),
+ isExternallyInitialized) {
if (Before)
- Before->getParent()->getGlobalList().insert(Before->getIterator(), this);
+ Before->getParent()->insertGlobalVariable(Before->getIterator(), this);
else
- M.getGlobalList().push_back(this);
+ M.insertGlobalVariable(this);
}
void GlobalVariable::removeFromParent() {
- getParent()->getGlobalList().remove(getIterator());
+ getParent()->removeGlobalVariable(this);
}
void GlobalVariable::eraseFromParent() {
- getParent()->getGlobalList().erase(getIterator());
+ getParent()->eraseGlobalVariable(this);
}
void GlobalVariable::setInitializer(Constant *InitVal) {
@@ -504,7 +500,7 @@ GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link,
AddressSpace) {
setAliasee(Aliasee);
if (ParentModule)
- ParentModule->getAliasList().push_back(this);
+ ParentModule->insertAlias(this);
}
GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace,
@@ -535,13 +531,9 @@ GlobalAlias *GlobalAlias::create(const Twine &Name, GlobalValue *Aliasee) {
return create(Aliasee->getLinkage(), Name, Aliasee);
}
-void GlobalAlias::removeFromParent() {
- getParent()->getAliasList().remove(getIterator());
-}
+void GlobalAlias::removeFromParent() { getParent()->removeAlias(this); }
-void GlobalAlias::eraseFromParent() {
- getParent()->getAliasList().erase(getIterator());
-}
+void GlobalAlias::eraseFromParent() { getParent()->eraseAlias(this); }
void GlobalAlias::setAliasee(Constant *Aliasee) {
assert((!Aliasee || Aliasee->getType() == getType()) &&
@@ -565,7 +557,7 @@ GlobalIFunc::GlobalIFunc(Type *Ty, unsigned AddressSpace, LinkageTypes Link,
AddressSpace) {
setResolver(Resolver);
if (ParentModule)
- ParentModule->getIFuncList().push_back(this);
+ ParentModule->insertIFunc(this);
}
GlobalIFunc *GlobalIFunc::create(Type *Ty, unsigned AddressSpace,
@@ -574,13 +566,9 @@ GlobalIFunc *GlobalIFunc::create(Type *Ty, unsigned AddressSpace,
return new GlobalIFunc(Ty, AddressSpace, Link, Name, Resolver, ParentModule);
}
-void GlobalIFunc::removeFromParent() {
- getParent()->getIFuncList().remove(getIterator());
-}
+void GlobalIFunc::removeFromParent() { getParent()->removeIFunc(this); }
-void GlobalIFunc::eraseFromParent() {
- getParent()->getIFuncList().erase(getIterator());
-}
+void GlobalIFunc::eraseFromParent() { getParent()->eraseIFunc(this); }
const Function *GlobalIFunc::getResolverFunction() const {
return dyn_cast<Function>(getResolver()->stripPointerCastsAndAliases());
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index f871205843a7..094819dc39b5 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -60,15 +60,6 @@ Type *IRBuilderBase::getCurrentFunctionReturnType() const {
return BB->getParent()->getReturnType();
}
-Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
- auto *PT = cast<PointerType>(Ptr->getType());
- if (PT->isOpaqueOrPointeeTypeMatches(getInt8Ty()))
- return Ptr;
-
- // Otherwise, we need to insert a bitcast.
- return CreateBitCast(Ptr, getInt8PtrTy(PT->getAddressSpace()));
-}
-
DebugLoc IRBuilderBase::getCurrentDebugLocation() const {
for (auto &KV : MetadataToCopy)
if (KV.first == LLVMContext::MD_dbg)
@@ -102,9 +93,17 @@ Value *IRBuilderBase::CreateVScale(Constant *Scaling, const Twine &Name) {
Function *TheFn =
Intrinsic::getDeclaration(M, Intrinsic::vscale, {Scaling->getType()});
CallInst *CI = CreateCall(TheFn, {}, {}, Name);
- return cast<ConstantInt>(Scaling)->getSExtValue() == 1
- ? CI
- : CreateMul(CI, Scaling);
+ return cast<ConstantInt>(Scaling)->isOne() ? CI : CreateMul(CI, Scaling);
+}
+
+Value *IRBuilderBase::CreateElementCount(Type *DstType, ElementCount EC) {
+ Constant *MinEC = ConstantInt::get(DstType, EC.getKnownMinValue());
+ return EC.isScalable() ? CreateVScale(MinEC) : MinEC;
+}
+
+Value *IRBuilderBase::CreateTypeSize(Type *DstType, TypeSize Size) {
+ Constant *MinSize = ConstantInt::get(DstType, Size.getKnownMinValue());
+ return Size.isScalable() ? CreateVScale(MinSize) : MinSize;
}
Value *IRBuilderBase::CreateStepVector(Type *DstType, const Twine &Name) {
@@ -139,7 +138,6 @@ CallInst *IRBuilderBase::CreateMemSet(Value *Ptr, Value *Val, Value *Size,
MaybeAlign Align, bool isVolatile,
MDNode *TBAATag, MDNode *ScopeTag,
MDNode *NoAliasTag) {
- Ptr = getCastedInt8PtrValue(Ptr);
Value *Ops[] = {Ptr, Val, Size, getInt1(isVolatile)};
Type *Tys[] = { Ptr->getType(), Size->getType() };
Module *M = BB->getParent()->getParent();
@@ -168,7 +166,6 @@ CallInst *IRBuilderBase::CreateMemSetInline(Value *Dst, MaybeAlign DstAlign,
bool IsVolatile, MDNode *TBAATag,
MDNode *ScopeTag,
MDNode *NoAliasTag) {
- Dst = getCastedInt8PtrValue(Dst);
Value *Ops[] = {Dst, Val, Size, getInt1(IsVolatile)};
Type *Tys[] = {Dst->getType(), Size->getType()};
Module *M = BB->getParent()->getParent();
@@ -196,7 +193,6 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemSet(
Value *Ptr, Value *Val, Value *Size, Align Alignment, uint32_t ElementSize,
MDNode *TBAATag, MDNode *ScopeTag, MDNode *NoAliasTag) {
- Ptr = getCastedInt8PtrValue(Ptr);
Value *Ops[] = {Ptr, Val, Size, getInt32(ElementSize)};
Type *Tys[] = {Ptr->getType(), Size->getType()};
Module *M = BB->getParent()->getParent();
@@ -224,9 +220,6 @@ CallInst *IRBuilderBase::CreateMemTransferInst(
Intrinsic::ID IntrID, Value *Dst, MaybeAlign DstAlign, Value *Src,
MaybeAlign SrcAlign, Value *Size, bool isVolatile, MDNode *TBAATag,
MDNode *TBAAStructTag, MDNode *ScopeTag, MDNode *NoAliasTag) {
- Dst = getCastedInt8PtrValue(Dst);
- Src = getCastedInt8PtrValue(Src);
-
Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
Module *M = BB->getParent()->getParent();
@@ -261,9 +254,6 @@ CallInst *IRBuilderBase::CreateMemCpyInline(
Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign,
Value *Size, bool IsVolatile, MDNode *TBAATag, MDNode *TBAAStructTag,
MDNode *ScopeTag, MDNode *NoAliasTag) {
- Dst = getCastedInt8PtrValue(Dst);
- Src = getCastedInt8PtrValue(Src);
-
Value *Ops[] = {Dst, Src, Size, getInt1(IsVolatile)};
Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
Function *F = BB->getParent();
@@ -303,9 +293,6 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy(
"Pointer alignment must be at least element size");
assert(SrcAlign >= ElementSize &&
"Pointer alignment must be at least element size");
- Dst = getCastedInt8PtrValue(Dst);
- Src = getCastedInt8PtrValue(Src);
-
Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)};
Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
Module *M = BB->getParent()->getParent();
@@ -341,9 +328,6 @@ CallInst *IRBuilderBase::CreateMemMove(Value *Dst, MaybeAlign DstAlign,
Value *Size, bool isVolatile,
MDNode *TBAATag, MDNode *ScopeTag,
MDNode *NoAliasTag) {
- Dst = getCastedInt8PtrValue(Dst);
- Src = getCastedInt8PtrValue(Src);
-
Value *Ops[] = {Dst, Src, Size, getInt1(isVolatile)};
Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
Module *M = BB->getParent()->getParent();
@@ -378,9 +362,6 @@ CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemMove(
"Pointer alignment must be at least element size");
assert(SrcAlign >= ElementSize &&
"Pointer alignment must be at least element size");
- Dst = getCastedInt8PtrValue(Dst);
- Src = getCastedInt8PtrValue(Src);
-
Value *Ops[] = {Dst, Src, Size, getInt32(ElementSize)};
Type *Tys[] = {Dst->getType(), Src->getType(), Size->getType()};
Module *M = BB->getParent()->getParent();
@@ -474,10 +455,17 @@ CallInst *IRBuilderBase::CreateFPMinReduce(Value *Src) {
return getReductionIntrinsic(Intrinsic::vector_reduce_fmin, Src);
}
+CallInst *IRBuilderBase::CreateFPMaximumReduce(Value *Src) {
+ return getReductionIntrinsic(Intrinsic::vector_reduce_fmaximum, Src);
+}
+
+CallInst *IRBuilderBase::CreateFPMinimumReduce(Value *Src) {
+ return getReductionIntrinsic(Intrinsic::vector_reduce_fminimum, Src);
+}
+
CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
assert(isa<PointerType>(Ptr->getType()) &&
"lifetime.start only applies to pointers.");
- Ptr = getCastedInt8PtrValue(Ptr);
if (!Size)
Size = getInt64(-1);
else
@@ -493,7 +481,6 @@ CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
assert(isa<PointerType>(Ptr->getType()) &&
"lifetime.end only applies to pointers.");
- Ptr = getCastedInt8PtrValue(Ptr);
if (!Size)
Size = getInt64(-1);
else
@@ -510,7 +497,6 @@ CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) {
assert(isa<PointerType>(Ptr->getType()) &&
"invariant.start only applies to pointers.");
- Ptr = getCastedInt8PtrValue(Ptr);
if (!Size)
Size = getInt64(-1);
else
@@ -590,7 +576,6 @@ CallInst *IRBuilderBase::CreateMaskedLoad(Type *Ty, Value *Ptr, Align Alignment,
const Twine &Name) {
auto *PtrTy = cast<PointerType>(Ptr->getType());
assert(Ty->isVectorTy() && "Type should be vector");
- assert(PtrTy->isOpaqueOrPointeeTypeMatches(Ty) && "Wrong element type");
assert(Mask && "Mask should not be all-ones (null)");
if (!PassThru)
PassThru = PoisonValue::get(Ty);
@@ -611,7 +596,6 @@ CallInst *IRBuilderBase::CreateMaskedStore(Value *Val, Value *Ptr,
auto *PtrTy = cast<PointerType>(Ptr->getType());
Type *DataTy = Val->getType();
assert(DataTy->isVectorTy() && "Val should be a vector");
- assert(PtrTy->isOpaqueOrPointeeTypeMatches(DataTy) && "Wrong element type");
assert(Mask && "Mask should not be all-ones (null)");
Type *OverloadedTypes[] = { DataTy, PtrTy };
Value *Ops[] = {Val, Ptr, getInt32(Alignment.value()), Mask};
@@ -646,15 +630,10 @@ CallInst *IRBuilderBase::CreateMaskedGather(Type *Ty, Value *Ptrs,
auto *VecTy = cast<VectorType>(Ty);
ElementCount NumElts = VecTy->getElementCount();
auto *PtrsTy = cast<VectorType>(Ptrs->getType());
- assert(cast<PointerType>(PtrsTy->getElementType())
- ->isOpaqueOrPointeeTypeMatches(
- cast<VectorType>(Ty)->getElementType()) &&
- "Element type mismatch");
assert(NumElts == PtrsTy->getElementCount() && "Element count mismatch");
if (!Mask)
- Mask = Constant::getAllOnesValue(
- VectorType::get(Type::getInt1Ty(Context), NumElts));
+ Mask = getAllOnesMask(NumElts);
if (!PassThru)
PassThru = PoisonValue::get(Ty);
@@ -681,16 +660,8 @@ CallInst *IRBuilderBase::CreateMaskedScatter(Value *Data, Value *Ptrs,
auto *DataTy = cast<VectorType>(Data->getType());
ElementCount NumElts = PtrsTy->getElementCount();
-#ifndef NDEBUG
- auto *PtrTy = cast<PointerType>(PtrsTy->getElementType());
- assert(NumElts == DataTy->getElementCount() &&
- PtrTy->isOpaqueOrPointeeTypeMatches(DataTy->getElementType()) &&
- "Incompatible pointer and data types");
-#endif
-
if (!Mask)
- Mask = Constant::getAllOnesValue(
- VectorType::get(Type::getInt1Ty(Context), NumElts));
+ Mask = getAllOnesMask(NumElts);
Type *OverloadedTypes[] = {DataTy, PtrsTy};
Value *Ops[] = {Data, Ptrs, getInt32(Alignment.value()), Mask};
@@ -711,12 +682,7 @@ CallInst *IRBuilderBase::CreateMaskedScatter(Value *Data, Value *Ptrs,
CallInst *IRBuilderBase::CreateMaskedExpandLoad(Type *Ty, Value *Ptr,
Value *Mask, Value *PassThru,
const Twine &Name) {
- auto *PtrTy = cast<PointerType>(Ptr->getType());
assert(Ty->isVectorTy() && "Type should be vector");
- assert(PtrTy->isOpaqueOrPointeeTypeMatches(
- cast<FixedVectorType>(Ty)->getElementType()) &&
- "Wrong element type");
- (void)PtrTy;
assert(Mask && "Mask should not be all-ones (null)");
if (!PassThru)
PassThru = PoisonValue::get(Ty);
@@ -733,13 +699,8 @@ CallInst *IRBuilderBase::CreateMaskedExpandLoad(Type *Ty, Value *Ptr,
/// be accessed in memory
CallInst *IRBuilderBase::CreateMaskedCompressStore(Value *Val, Value *Ptr,
Value *Mask) {
- auto *PtrTy = cast<PointerType>(Ptr->getType());
Type *DataTy = Val->getType();
assert(DataTy->isVectorTy() && "Val should be a vector");
- assert(PtrTy->isOpaqueOrPointeeTypeMatches(
- cast<FixedVectorType>(DataTy)->getElementType()) &&
- "Wrong element type");
- (void)PtrTy;
assert(Mask && "Mask should not be all-ones (null)");
Type *OverloadedTypes[] = {DataTy};
Value *Ops[] = {Val, Ptr, Mask};
@@ -1018,6 +979,23 @@ CallInst *IRBuilderBase::CreateConstrainedFPBinOp(
return C;
}
+CallInst *IRBuilderBase::CreateConstrainedFPUnroundedBinOp(
+ Intrinsic::ID ID, Value *L, Value *R, Instruction *FMFSource,
+ const Twine &Name, MDNode *FPMathTag,
+ std::optional<fp::ExceptionBehavior> Except) {
+ Value *ExceptV = getConstrainedFPExcept(Except);
+
+ FastMathFlags UseFMF = FMF;
+ if (FMFSource)
+ UseFMF = FMFSource->getFastMathFlags();
+
+ CallInst *C =
+ CreateIntrinsic(ID, {L->getType()}, {L, R, ExceptV}, nullptr, Name);
+ setConstrainedFPCallAttr(C);
+ setFPAttrs(C, FPMathTag, UseFMF);
+ return C;
+}
+
Value *IRBuilderBase::CreateNAryOp(unsigned Opc, ArrayRef<Value *> Ops,
const Twine &Name, MDNode *FPMathTag) {
if (Instruction::isBinaryOp(Opc)) {
@@ -1143,9 +1121,6 @@ Value *IRBuilderBase::CreatePtrDiff(Type *ElemTy, Value *LHS, Value *RHS,
const Twine &Name) {
assert(LHS->getType() == RHS->getType() &&
"Pointer subtraction operand types must match!");
- assert(cast<PointerType>(LHS->getType())
- ->isOpaqueOrPointeeTypeMatches(ElemTy) &&
- "Pointer type must match element type");
Value *LHS_int = CreatePtrToInt(LHS, Type::getInt64Ty(Context));
Value *RHS_int = CreatePtrToInt(RHS, Type::getInt64Ty(Context));
Value *Difference = CreateSub(LHS_int, RHS_int);
@@ -1156,50 +1131,34 @@ Value *IRBuilderBase::CreatePtrDiff(Type *ElemTy, Value *LHS, Value *RHS,
Value *IRBuilderBase::CreateLaunderInvariantGroup(Value *Ptr) {
assert(isa<PointerType>(Ptr->getType()) &&
"launder.invariant.group only applies to pointers.");
- // FIXME: we could potentially avoid casts to/from i8*.
auto *PtrType = Ptr->getType();
- auto *Int8PtrTy = getInt8PtrTy(PtrType->getPointerAddressSpace());
- if (PtrType != Int8PtrTy)
- Ptr = CreateBitCast(Ptr, Int8PtrTy);
Module *M = BB->getParent()->getParent();
Function *FnLaunderInvariantGroup = Intrinsic::getDeclaration(
- M, Intrinsic::launder_invariant_group, {Int8PtrTy});
+ M, Intrinsic::launder_invariant_group, {PtrType});
- assert(FnLaunderInvariantGroup->getReturnType() == Int8PtrTy &&
+ assert(FnLaunderInvariantGroup->getReturnType() == PtrType &&
FnLaunderInvariantGroup->getFunctionType()->getParamType(0) ==
- Int8PtrTy &&
+ PtrType &&
"LaunderInvariantGroup should take and return the same type");
- CallInst *Fn = CreateCall(FnLaunderInvariantGroup, {Ptr});
-
- if (PtrType != Int8PtrTy)
- return CreateBitCast(Fn, PtrType);
- return Fn;
+ return CreateCall(FnLaunderInvariantGroup, {Ptr});
}
Value *IRBuilderBase::CreateStripInvariantGroup(Value *Ptr) {
assert(isa<PointerType>(Ptr->getType()) &&
"strip.invariant.group only applies to pointers.");
- // FIXME: we could potentially avoid casts to/from i8*.
auto *PtrType = Ptr->getType();
- auto *Int8PtrTy = getInt8PtrTy(PtrType->getPointerAddressSpace());
- if (PtrType != Int8PtrTy)
- Ptr = CreateBitCast(Ptr, Int8PtrTy);
Module *M = BB->getParent()->getParent();
Function *FnStripInvariantGroup = Intrinsic::getDeclaration(
- M, Intrinsic::strip_invariant_group, {Int8PtrTy});
+ M, Intrinsic::strip_invariant_group, {PtrType});
- assert(FnStripInvariantGroup->getReturnType() == Int8PtrTy &&
+ assert(FnStripInvariantGroup->getReturnType() == PtrType &&
FnStripInvariantGroup->getFunctionType()->getParamType(0) ==
- Int8PtrTy &&
+ PtrType &&
"StripInvariantGroup should take and return the same type");
- CallInst *Fn = CreateCall(FnStripInvariantGroup, {Ptr});
-
- if (PtrType != Int8PtrTy)
- return CreateBitCast(Fn, PtrType);
- return Fn;
+ return CreateCall(FnStripInvariantGroup, {Ptr});
}
Value *IRBuilderBase::CreateVectorReverse(Value *V, const Twine &Name) {
@@ -1295,16 +1254,13 @@ Value *IRBuilderBase::CreatePreserveArrayAccessIndex(
auto *BaseType = Base->getType();
assert(isa<PointerType>(BaseType) &&
"Invalid Base ptr type for preserve.array.access.index.");
- assert(cast<PointerType>(BaseType)->isOpaqueOrPointeeTypeMatches(ElTy) &&
- "Pointer element type mismatch");
Value *LastIndexV = getInt32(LastIndex);
Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
SmallVector<Value *, 4> IdxList(Dimension, Zero);
IdxList.push_back(LastIndexV);
- Type *ResultType =
- GetElementPtrInst::getGEPReturnType(ElTy, Base, IdxList);
+ Type *ResultType = GetElementPtrInst::getGEPReturnType(Base, IdxList);
Module *M = BB->getParent()->getParent();
Function *FnPreserveArrayAccessIndex = Intrinsic::getDeclaration(
@@ -1346,13 +1302,11 @@ Value *IRBuilderBase::CreatePreserveStructAccessIndex(
auto *BaseType = Base->getType();
assert(isa<PointerType>(BaseType) &&
"Invalid Base ptr type for preserve.struct.access.index.");
- assert(cast<PointerType>(BaseType)->isOpaqueOrPointeeTypeMatches(ElTy) &&
- "Pointer element type mismatch");
Value *GEPIndex = getInt32(Index);
Constant *Zero = ConstantInt::get(Type::getInt32Ty(Context), 0);
Type *ResultType =
- GetElementPtrInst::getGEPReturnType(ElTy, Base, {Zero, GEPIndex});
+ GetElementPtrInst::getGEPReturnType(Base, {Zero, GEPIndex});
Module *M = BB->getParent()->getParent();
Function *FnPreserveStructAccessIndex = Intrinsic::getDeclaration(
@@ -1369,6 +1323,14 @@ Value *IRBuilderBase::CreatePreserveStructAccessIndex(
return Fn;
}
+Value *IRBuilderBase::createIsFPClass(Value *FPNum, unsigned Test) {
+ ConstantInt *TestV = getInt32(Test);
+ Module *M = BB->getParent()->getParent();
+ Function *FnIsFPClass =
+ Intrinsic::getDeclaration(M, Intrinsic::is_fpclass, {FPNum->getType()});
+ return CreateCall(FnIsFPClass, {FPNum, TestV});
+}
+
CallInst *IRBuilderBase::CreateAlignmentAssumptionHelper(const DataLayout &DL,
Value *PtrValue,
Value *AlignValue,
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 9c88ca17ebde..0dcf0ac6a78a 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -12,6 +12,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -138,9 +139,10 @@ Instruction *Instruction::getInsertionPointAfterDef() {
} else if (auto *II = dyn_cast<InvokeInst>(this)) {
InsertBB = II->getNormalDest();
InsertPt = InsertBB->getFirstInsertionPt();
- } else if (auto *CB = dyn_cast<CallBrInst>(this)) {
- InsertBB = CB->getDefaultDest();
- InsertPt = InsertBB->getFirstInsertionPt();
+ } else if (isa<CallBrInst>(this)) {
+ // Def is available in multiple successors, there's no single dominating
+ // insertion point.
+ return nullptr;
} else {
assert(!isTerminator() && "Only invoke/callbr terminators return value");
InsertBB = getParent();
@@ -223,7 +225,7 @@ void Instruction::dropPoisonGeneratingMetadata() {
eraseMetadata(LLVMContext::MD_align);
}
-void Instruction::dropUndefImplyingAttrsAndUnknownMetadata(
+void Instruction::dropUBImplyingAttrsAndUnknownMetadata(
ArrayRef<unsigned> KnownIDs) {
dropUnknownNonDebugMetadata(KnownIDs);
auto *CB = dyn_cast<CallBase>(this);
@@ -242,6 +244,16 @@ void Instruction::dropUndefImplyingAttrsAndUnknownMetadata(
CB->removeRetAttrs(UBImplyingAttributes);
}
+void Instruction::dropUBImplyingAttrsAndMetadata() {
+ // !annotation metadata does not impact semantics.
+ // !range, !nonnull and !align produce poison, so they are safe to speculate.
+ // !noundef and various AA metadata must be dropped, as it generally produces
+ // immediate undefined behavior.
+ unsigned KnownIDs[] = {LLVMContext::MD_annotation, LLVMContext::MD_range,
+ LLVMContext::MD_nonnull, LLVMContext::MD_align};
+ dropUBImplyingAttrsAndUnknownMetadata(KnownIDs);
+}
+
bool Instruction::isExact() const {
return cast<PossiblyExactOperator>(this)->isExact();
}
@@ -479,11 +491,11 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
}
}
-/// Return true if both instructions have the same special state. This must be
-/// kept in sync with FunctionComparator::cmpOperations in
+/// This must be kept in sync with FunctionComparator::cmpOperations in
/// lib/Transforms/IPO/MergeFunctions.cpp.
-static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
- bool IgnoreAlignment = false) {
+bool Instruction::hasSameSpecialState(const Instruction *I2,
+ bool IgnoreAlignment) const {
+ auto I1 = this;
assert(I1->getOpcode() == I2->getOpcode() &&
"Can not compare special state of different instructions");
@@ -562,7 +574,7 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
// If both instructions have no operands, they are identical.
if (getNumOperands() == 0 && I->getNumOperands() == 0)
- return haveSameSpecialState(this, I);
+ return this->hasSameSpecialState(I);
// We have two instructions of identical opcode and #operands. Check to see
// if all operands are the same.
@@ -576,7 +588,7 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
otherPHI->block_begin());
}
- return haveSameSpecialState(this, I);
+ return this->hasSameSpecialState(I);
}
// Keep this in sync with FunctionComparator::cmpOperations in
@@ -602,7 +614,7 @@ bool Instruction::isSameOperationAs(const Instruction *I,
getOperand(i)->getType() != I->getOperand(i)->getType())
return false;
- return haveSameSpecialState(this, I, IgnoreAlignment);
+ return this->hasSameSpecialState(I, IgnoreAlignment);
}
bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
@@ -732,14 +744,89 @@ bool Instruction::isVolatile() const {
}
}
-bool Instruction::mayThrow() const {
- if (const CallInst *CI = dyn_cast<CallInst>(this))
- return !CI->doesNotThrow();
- if (const auto *CRI = dyn_cast<CleanupReturnInst>(this))
- return CRI->unwindsToCaller();
- if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(this))
- return CatchSwitch->unwindsToCaller();
- return isa<ResumeInst>(this);
+Type *Instruction::getAccessType() const {
+ switch (getOpcode()) {
+ case Instruction::Store:
+ return cast<StoreInst>(this)->getValueOperand()->getType();
+ case Instruction::Load:
+ case Instruction::AtomicRMW:
+ return getType();
+ case Instruction::AtomicCmpXchg:
+ return cast<AtomicCmpXchgInst>(this)->getNewValOperand()->getType();
+ case Instruction::Call:
+ case Instruction::Invoke:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(this)) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_gather:
+ case Intrinsic::masked_expandload:
+ case Intrinsic::vp_load:
+ case Intrinsic::vp_gather:
+ case Intrinsic::experimental_vp_strided_load:
+ return II->getType();
+ case Intrinsic::masked_store:
+ case Intrinsic::masked_scatter:
+ case Intrinsic::masked_compressstore:
+ case Intrinsic::vp_store:
+ case Intrinsic::vp_scatter:
+ case Intrinsic::experimental_vp_strided_store:
+ return II->getOperand(0)->getType();
+ default:
+ break;
+ }
+ }
+ }
+
+ return nullptr;
+}
+
+static bool canUnwindPastLandingPad(const LandingPadInst *LP,
+ bool IncludePhaseOneUnwind) {
+ // Because phase one unwinding skips cleanup landingpads, we effectively
+ // unwind past this frame, and callers need to have valid unwind info.
+ if (LP->isCleanup())
+ return IncludePhaseOneUnwind;
+
+ for (unsigned I = 0; I < LP->getNumClauses(); ++I) {
+ Constant *Clause = LP->getClause(I);
+ // catch ptr null catches all exceptions.
+ if (LP->isCatch(I) && isa<ConstantPointerNull>(Clause))
+ return false;
+ // filter [0 x ptr] catches all exceptions.
+ if (LP->isFilter(I) && Clause->getType()->getArrayNumElements() == 0)
+ return false;
+ }
+
+ // May catch only some subset of exceptions, in which case other exceptions
+ // will continue unwinding.
+ return true;
+}
+
+bool Instruction::mayThrow(bool IncludePhaseOneUnwind) const {
+ switch (getOpcode()) {
+ case Instruction::Call:
+ return !cast<CallInst>(this)->doesNotThrow();
+ case Instruction::CleanupRet:
+ return cast<CleanupReturnInst>(this)->unwindsToCaller();
+ case Instruction::CatchSwitch:
+ return cast<CatchSwitchInst>(this)->unwindsToCaller();
+ case Instruction::Resume:
+ return true;
+ case Instruction::Invoke: {
+ // Landingpads themselves don't unwind -- however, an invoke of a skipped
+ // landingpad may continue unwinding.
+ BasicBlock *UnwindDest = cast<InvokeInst>(this)->getUnwindDest();
+ Instruction *Pad = UnwindDest->getFirstNonPHI();
+ if (auto *LP = dyn_cast<LandingPadInst>(Pad))
+ return canUnwindPastLandingPad(LP, IncludePhaseOneUnwind);
+ return false;
+ }
+ case Instruction::CleanupPad:
+ // Treat the same as cleanup landingpad.
+ return IncludePhaseOneUnwind;
+ default:
+ return false;
+ }
}
bool Instruction::mayHaveSideEffects() const {
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 7c343a0ff00a..cb0ac0f8eae6 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -325,6 +325,22 @@ Intrinsic::ID CallBase::getIntrinsicID() const {
return Intrinsic::not_intrinsic;
}
+FPClassTest CallBase::getRetNoFPClass() const {
+ FPClassTest Mask = Attrs.getRetNoFPClass();
+
+ if (const Function *F = getCalledFunction())
+ Mask |= F->getAttributes().getRetNoFPClass();
+ return Mask;
+}
+
+FPClassTest CallBase::getParamNoFPClass(unsigned i) const {
+ FPClassTest Mask = Attrs.getParamNoFPClass(i);
+
+ if (const Function *F = getCalledFunction())
+ Mask |= F->getAttributes().getParamNoFPClass(i);
+ return Mask;
+}
+
bool CallBase::isReturnNonNull() const {
if (hasRetAttr(Attribute::NonNull))
return true;
@@ -1577,7 +1593,6 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
Align Align, AtomicOrdering Order, SyncScope::ID SSID,
Instruction *InsertBef)
: UnaryInstruction(Ty, Load, Ptr, InsertBef) {
- assert(cast<PointerType>(Ptr->getType())->isOpaqueOrPointeeTypeMatches(Ty));
setVolatile(isVolatile);
setAlignment(Align);
setAtomic(Order, SSID);
@@ -1589,7 +1604,6 @@ LoadInst::LoadInst(Type *Ty, Value *Ptr, const Twine &Name, bool isVolatile,
Align Align, AtomicOrdering Order, SyncScope::ID SSID,
BasicBlock *InsertAE)
: UnaryInstruction(Ty, Load, Ptr, InsertAE) {
- assert(cast<PointerType>(Ptr->getType())->isOpaqueOrPointeeTypeMatches(Ty));
setVolatile(isVolatile);
setAlignment(Align);
setAtomic(Order, SSID);
@@ -1605,9 +1619,6 @@ void StoreInst::AssertOK() {
assert(getOperand(0) && getOperand(1) && "Both operands must be non-null!");
assert(getOperand(1)->getType()->isPointerTy() &&
"Ptr must have pointer type!");
- assert(cast<PointerType>(getOperand(1)->getType())
- ->isOpaqueOrPointeeTypeMatches(getOperand(0)->getType()) &&
- "Ptr must be a pointer to Val type!");
}
StoreInst::StoreInst(Value *val, Value *addr, Instruction *InsertBefore)
@@ -1687,12 +1698,6 @@ void AtomicCmpXchgInst::Init(Value *Ptr, Value *Cmp, Value *NewVal,
"All operands must be non-null!");
assert(getOperand(0)->getType()->isPointerTy() &&
"Ptr must have pointer type!");
- assert(cast<PointerType>(getOperand(0)->getType())
- ->isOpaqueOrPointeeTypeMatches(getOperand(1)->getType()) &&
- "Ptr must be a pointer to Cmp type!");
- assert(cast<PointerType>(getOperand(0)->getType())
- ->isOpaqueOrPointeeTypeMatches(getOperand(2)->getType()) &&
- "Ptr must be a pointer to NewVal type!");
assert(getOperand(1)->getType() == getOperand(2)->getType() &&
"Cmp type and NewVal type must be same!");
}
@@ -1745,9 +1750,6 @@ void AtomicRMWInst::Init(BinOp Operation, Value *Ptr, Value *Val,
"All operands must be non-null!");
assert(getOperand(0)->getType()->isPointerTy() &&
"Ptr must have pointer type!");
- assert(cast<PointerType>(getOperand(0)->getType())
- ->isOpaqueOrPointeeTypeMatches(getOperand(1)->getType()) &&
- "Ptr must be a pointer to Val type!");
assert(Ordering != AtomicOrdering::NotAtomic &&
"AtomicRMW instructions must be atomic!");
}
@@ -2148,8 +2150,8 @@ void ShuffleVectorInst::commute() {
SmallVector<int, 16> NewMask(NumMaskElts);
for (int i = 0; i != NumMaskElts; ++i) {
int MaskElt = getMaskValue(i);
- if (MaskElt == UndefMaskElem) {
- NewMask[i] = UndefMaskElem;
+ if (MaskElt == PoisonMaskElem) {
+ NewMask[i] = PoisonMaskElem;
continue;
}
assert(MaskElt >= 0 && MaskElt < 2 * NumOpElts && "Out-of-range mask");
@@ -2170,11 +2172,11 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
int V1Size =
cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
for (int Elem : Mask)
- if (Elem != UndefMaskElem && Elem >= V1Size * 2)
+ if (Elem != PoisonMaskElem && Elem >= V1Size * 2)
return false;
if (isa<ScalableVectorType>(V1->getType()))
- if ((Mask[0] != 0 && Mask[0] != UndefMaskElem) || !all_equal(Mask))
+ if ((Mask[0] != 0 && Mask[0] != PoisonMaskElem) || !all_equal(Mask))
return false;
return true;
@@ -2273,8 +2275,8 @@ Constant *ShuffleVectorInst::convertShuffleMaskForBitcode(ArrayRef<int> Mask,
}
SmallVector<Constant *, 16> MaskConst;
for (int Elem : Mask) {
- if (Elem == UndefMaskElem)
- MaskConst.push_back(UndefValue::get(Int32Ty));
+ if (Elem == PoisonMaskElem)
+ MaskConst.push_back(PoisonValue::get(Int32Ty));
else
MaskConst.push_back(ConstantInt::get(Int32Ty, Elem));
}
@@ -2501,10 +2503,10 @@ bool ShuffleVectorInst::isInsertSubvectorMask(ArrayRef<int> Mask,
// Determine lo/hi span ranges.
// TODO: How should we handle undefs at the start of subvector insertions?
- int Src0Lo = Src0Elts.countTrailingZeros();
- int Src1Lo = Src1Elts.countTrailingZeros();
- int Src0Hi = NumMaskElts - Src0Elts.countLeadingZeros();
- int Src1Hi = NumMaskElts - Src1Elts.countLeadingZeros();
+ int Src0Lo = Src0Elts.countr_zero();
+ int Src1Lo = Src1Elts.countr_zero();
+ int Src0Hi = NumMaskElts - Src0Elts.countl_zero();
+ int Src1Hi = NumMaskElts - Src1Elts.countl_zero();
// If src0 is in place, see if the src1 elements is inplace within its own
// span.
@@ -2611,7 +2613,7 @@ static bool isReplicationMaskWithParams(ArrayRef<int> Mask,
"Run out of mask?");
Mask = Mask.drop_front(ReplicationFactor);
if (!all_of(CurrSubMask, [CurrElt](int MaskElt) {
- return MaskElt == UndefMaskElem || MaskElt == CurrElt;
+ return MaskElt == PoisonMaskElem || MaskElt == CurrElt;
}))
return false;
}
@@ -2623,7 +2625,7 @@ static bool isReplicationMaskWithParams(ArrayRef<int> Mask,
bool ShuffleVectorInst::isReplicationMask(ArrayRef<int> Mask,
int &ReplicationFactor, int &VF) {
// undef-less case is trivial.
- if (!llvm::is_contained(Mask, UndefMaskElem)) {
+ if (!llvm::is_contained(Mask, PoisonMaskElem)) {
ReplicationFactor =
Mask.take_while([](int MaskElt) { return MaskElt == 0; }).size();
if (ReplicationFactor == 0 || Mask.size() % ReplicationFactor != 0)
@@ -2641,7 +2643,7 @@ bool ShuffleVectorInst::isReplicationMask(ArrayRef<int> Mask,
// Before doing that, let's perform basic correctness checking first.
int Largest = -1;
for (int MaskElt : Mask) {
- if (MaskElt == UndefMaskElem)
+ if (MaskElt == PoisonMaskElem)
continue;
// Elements must be in non-decreasing order.
if (MaskElt < Largest)
@@ -2687,11 +2689,11 @@ bool ShuffleVectorInst::isOneUseSingleSourceMask(ArrayRef<int> Mask, int VF) {
return false;
for (unsigned K = 0, Sz = Mask.size(); K < Sz; K += VF) {
ArrayRef<int> SubMask = Mask.slice(K, VF);
- if (all_of(SubMask, [](int Idx) { return Idx == UndefMaskElem; }))
+ if (all_of(SubMask, [](int Idx) { return Idx == PoisonMaskElem; }))
continue;
SmallBitVector Used(VF, false);
for_each(SubMask, [&Used, VF](int Idx) {
- if (Idx != UndefMaskElem && Idx < VF)
+ if (Idx != PoisonMaskElem && Idx < VF)
Used.set(Idx);
});
if (!Used.all())
@@ -2712,6 +2714,98 @@ bool ShuffleVectorInst::isOneUseSingleSourceMask(int VF) const {
return isOneUseSingleSourceMask(ShuffleMask, VF);
}
+bool ShuffleVectorInst::isInterleave(unsigned Factor) {
+ FixedVectorType *OpTy = dyn_cast<FixedVectorType>(getOperand(0)->getType());
+ // shuffle_vector can only interleave fixed length vectors - for scalable
+ // vectors, see the @llvm.experimental.vector.interleave2 intrinsic
+ if (!OpTy)
+ return false;
+ unsigned OpNumElts = OpTy->getNumElements();
+
+ return isInterleaveMask(ShuffleMask, Factor, OpNumElts * 2);
+}
+
+bool ShuffleVectorInst::isInterleaveMask(
+ ArrayRef<int> Mask, unsigned Factor, unsigned NumInputElts,
+ SmallVectorImpl<unsigned> &StartIndexes) {
+ unsigned NumElts = Mask.size();
+ if (NumElts % Factor)
+ return false;
+
+ unsigned LaneLen = NumElts / Factor;
+ if (!isPowerOf2_32(LaneLen))
+ return false;
+
+ StartIndexes.resize(Factor);
+
+ // Check whether each element matches the general interleaved rule.
+ // Ignore undef elements, as long as the defined elements match the rule.
+ // Outer loop processes all factors (x, y, z in the above example)
+ unsigned I = 0, J;
+ for (; I < Factor; I++) {
+ unsigned SavedLaneValue;
+ unsigned SavedNoUndefs = 0;
+
+ // Inner loop processes consecutive accesses (x, x+1... in the example)
+ for (J = 0; J < LaneLen - 1; J++) {
+ // Lane computes x's position in the Mask
+ unsigned Lane = J * Factor + I;
+ unsigned NextLane = Lane + Factor;
+ int LaneValue = Mask[Lane];
+ int NextLaneValue = Mask[NextLane];
+
+ // If both are defined, values must be sequential
+ if (LaneValue >= 0 && NextLaneValue >= 0 &&
+ LaneValue + 1 != NextLaneValue)
+ break;
+
+ // If the next value is undef, save the current one as reference
+ if (LaneValue >= 0 && NextLaneValue < 0) {
+ SavedLaneValue = LaneValue;
+ SavedNoUndefs = 1;
+ }
+
+ // Undefs are allowed, but defined elements must still be consecutive:
+ // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
+ // Verify this by storing the last non-undef followed by an undef
+ // Check that following non-undef masks are incremented with the
+ // corresponding distance.
+ if (SavedNoUndefs > 0 && LaneValue < 0) {
+ SavedNoUndefs++;
+ if (NextLaneValue >= 0 &&
+ SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
+ break;
+ }
+ }
+
+ if (J < LaneLen - 1)
+ return false;
+
+ int StartMask = 0;
+ if (Mask[I] >= 0) {
+ // Check that the start of the I range (J=0) is greater than 0
+ StartMask = Mask[I];
+ } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
+ // StartMask defined by the last value in lane
+ StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
+ } else if (SavedNoUndefs > 0) {
+ // StartMask defined by some non-zero value in the j loop
+ StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
+ }
+ // else StartMask remains set to 0, i.e. all elements are undefs
+
+ if (StartMask < 0)
+ return false;
+ // We must stay within the vectors; This case can happen with undefs.
+ if (StartMask + LaneLen > NumInputElts)
+ return false;
+
+ StartIndexes[I] = StartMask;
+ }
+
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// InsertValueInst Class
//===----------------------------------------------------------------------===//
@@ -2965,42 +3059,42 @@ BinaryOperator *BinaryOperator::Create(BinaryOps Op, Value *S1, Value *S2,
BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
Instruction *InsertBefore) {
- Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ Value *Zero = ConstantInt::get(Op->getType(), 0);
return new BinaryOperator(Instruction::Sub,
- zero, Op,
+ Zero, Op,
Op->getType(), Name, InsertBefore);
}
BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const Twine &Name,
BasicBlock *InsertAtEnd) {
- Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
+ Value *Zero = ConstantInt::get(Op->getType(), 0);
return new BinaryOperator(Instruction::Sub,
- zero, Op,
+ Zero, Op,
Op->getType(), Name, InsertAtEnd);
}
BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
Instruction *InsertBefore) {
- Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
- return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertBefore);
+ Value *Zero = ConstantInt::get(Op->getType(), 0);
+ return BinaryOperator::CreateNSWSub(Zero, Op, Name, InsertBefore);
}
BinaryOperator *BinaryOperator::CreateNSWNeg(Value *Op, const Twine &Name,
BasicBlock *InsertAtEnd) {
- Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
- return BinaryOperator::CreateNSWSub(zero, Op, Name, InsertAtEnd);
+ Value *Zero = ConstantInt::get(Op->getType(), 0);
+ return BinaryOperator::CreateNSWSub(Zero, Op, Name, InsertAtEnd);
}
BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
Instruction *InsertBefore) {
- Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
- return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertBefore);
+ Value *Zero = ConstantInt::get(Op->getType(), 0);
+ return BinaryOperator::CreateNUWSub(Zero, Op, Name, InsertBefore);
}
BinaryOperator *BinaryOperator::CreateNUWNeg(Value *Op, const Twine &Name,
BasicBlock *InsertAtEnd) {
- Value *zero = ConstantFP::getZeroValueForNegation(Op->getType());
- return BinaryOperator::CreateNUWSub(zero, Op, Name, InsertAtEnd);
+ Value *Zero = ConstantInt::get(Op->getType(), 0);
+ return BinaryOperator::CreateNUWSub(Zero, Op, Name, InsertAtEnd);
}
BinaryOperator *BinaryOperator::CreateNot(Value *Op, const Twine &Name,
@@ -3059,23 +3153,6 @@ bool CastInst::isIntegerCast() const {
}
}
-bool CastInst::isLosslessCast() const {
- // Only BitCast can be lossless, exit fast if we're not BitCast
- if (getOpcode() != Instruction::BitCast)
- return false;
-
- // Identity cast is always lossless
- Type *SrcTy = getOperand(0)->getType();
- Type *DstTy = getType();
- if (SrcTy == DstTy)
- return true;
-
- // Pointer to pointer is always lossless.
- if (SrcTy->isPointerTy())
- return DstTy->isPointerTy();
- return false; // Other types have no identity values
-}
-
/// This function determines if the CastInst does not require any bits to be
/// changed in order to effect the cast. Essentially, it identifies cases where
/// no code gen is necessary for the cast, hence the name no-op cast. For
@@ -3306,15 +3383,9 @@ unsigned CastInst::isEliminableCastPair(
"Illegal addrspacecast, bitcast sequence!");
// Allowed, use first cast's opcode
return firstOp;
- case 14: {
- // bitcast, addrspacecast -> addrspacecast if the element type of
- // bitcast's source is the same as that of addrspacecast's destination.
- PointerType *SrcPtrTy = cast<PointerType>(SrcTy->getScalarType());
- PointerType *DstPtrTy = cast<PointerType>(DstTy->getScalarType());
- if (SrcPtrTy->hasSameElementTypeAs(DstPtrTy))
- return Instruction::AddrSpaceCast;
- return 0;
- }
+ case 14:
+ // bitcast, addrspacecast -> addrspacecast
+ return Instruction::AddrSpaceCast;
case 15:
// FIXME: this state can be merged with (1), but the following assert
// is useful to check the correcteness of the sequence due to semantic
@@ -4138,6 +4209,11 @@ StringRef CmpInst::getPredicateName(Predicate Pred) {
}
}
+raw_ostream &llvm::operator<<(raw_ostream &OS, CmpInst::Predicate Pred) {
+ OS << CmpInst::getPredicateName(Pred);
+ return OS;
+}
+
ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
switch (pred) {
default: llvm_unreachable("Unknown icmp predicate!");
diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp
index b258e7bd3154..36d56699c64e 100644
--- a/llvm/lib/IR/IntrinsicInst.cpp
+++ b/llvm/lib/IR/IntrinsicInst.cpp
@@ -71,11 +71,9 @@ bool IntrinsicInst::mayLowerToFunctionCall(Intrinsic::ID IID) {
/// intrinsics for variables.
///
-iterator_range<DbgVariableIntrinsic::location_op_iterator>
-DbgVariableIntrinsic::location_ops() const {
- auto *MD = getRawLocation();
+iterator_range<location_op_iterator> RawLocationWrapper::location_ops() const {
+ Metadata *MD = getRawLocation();
assert(MD && "First operand of DbgVariableIntrinsic should be non-null.");
-
// If operand is ValueAsMetadata, return a range over just that operand.
if (auto *VAM = dyn_cast<ValueAsMetadata>(MD)) {
return {location_op_iterator(VAM), location_op_iterator(VAM + 1)};
@@ -89,8 +87,17 @@ DbgVariableIntrinsic::location_ops() const {
location_op_iterator(static_cast<ValueAsMetadata *>(nullptr))};
}
+iterator_range<location_op_iterator>
+DbgVariableIntrinsic::location_ops() const {
+ return getWrappedLocation().location_ops();
+}
+
Value *DbgVariableIntrinsic::getVariableLocationOp(unsigned OpIdx) const {
- auto *MD = getRawLocation();
+ return getWrappedLocation().getVariableLocationOp(OpIdx);
+}
+
+Value *RawLocationWrapper::getVariableLocationOp(unsigned OpIdx) const {
+ Metadata *MD = getRawLocation();
assert(MD && "First operand of DbgVariableIntrinsic should be non-null.");
if (auto *AL = dyn_cast<DIArgList>(MD))
return AL->getArgs()[OpIdx]->getValue();
@@ -128,14 +135,14 @@ void DbgVariableIntrinsic::replaceVariableLocationOp(Value *OldValue,
assert(NewValue && "Values must be non-null");
auto Locations = location_ops();
auto OldIt = find(Locations, OldValue);
- assert((OldIt != Locations.end() || DbgAssignAddrReplaced) &&
- "OldValue must be a current location");
+ if (OldIt == Locations.end()) {
+ assert(DbgAssignAddrReplaced &&
+ "OldValue must be dbg.assign addr if unused in DIArgList");
+ return;
+ }
+
+ assert(OldIt != Locations.end() && "OldValue must be a current location");
if (!hasArgList()) {
- // Additional check necessary to avoid unconditionally replacing this
- // operand when a dbg.assign address is replaced (DbgAssignAddrReplaced is
- // true).
- if (OldValue != getVariableLocationOp(0))
- return;
Value *NewOperand = isa<MetadataAsValue>(NewValue)
? NewValue
: MetadataAsValue::get(
@@ -206,8 +213,6 @@ void DbgAssignIntrinsic::setAssignId(DIAssignID *New) {
}
void DbgAssignIntrinsic::setAddress(Value *V) {
- assert(V->getType()->isPointerTy() &&
- "Destination Component must be a pointer type");
setOperand(OpAddress,
MetadataAsValue::get(getContext(), ValueAsMetadata::get(V)));
}
@@ -524,6 +529,20 @@ VPIntrinsic::getFunctionalOpcodeForVP(Intrinsic::ID ID) {
return std::nullopt;
}
+// Equivalent non-predicated constrained intrinsic
+std::optional<unsigned>
+VPIntrinsic::getConstrainedIntrinsicIDForVP(Intrinsic::ID ID) {
+ switch (ID) {
+ default:
+ break;
+#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID:
+#define VP_PROPERTY_CONSTRAINEDFP(HASRND, HASEXCEPT, CID) return Intrinsic::CID;
+#define END_REGISTER_VP_INTRINSIC(VPID) break;
+#include "llvm/IR/VPIntrinsics.def"
+ }
+ return std::nullopt;
+}
+
Intrinsic::ID VPIntrinsic::getForOpcode(unsigned IROPC) {
switch (IROPC) {
default:
@@ -554,17 +573,11 @@ bool VPIntrinsic::canIgnoreVectorLengthParam() const {
// Check whether "W == vscale * EC.getKnownMinValue()"
if (EC.isScalable()) {
- // Undig the DL
- const auto *ParMod = this->getModule();
- if (!ParMod)
- return false;
- const auto &DL = ParMod->getDataLayout();
-
// Compare vscale patterns
uint64_t VScaleFactor;
- if (match(VLParam, m_c_Mul(m_ConstantInt(VScaleFactor), m_VScale(DL))))
+ if (match(VLParam, m_c_Mul(m_ConstantInt(VScaleFactor), m_VScale())))
return VScaleFactor >= EC.getKnownMinValue();
- return (EC.getKnownMinValue() == 1) && match(VLParam, m_VScale(DL));
+ return (EC.getKnownMinValue() == 1) && match(VLParam, m_VScale());
}
// standard SIMD operation
diff --git a/llvm/lib/IR/LLVMContext.cpp b/llvm/lib/IR/LLVMContext.cpp
index 7911705776e3..8ddf51537ec1 100644
--- a/llvm/lib/IR/LLVMContext.cpp
+++ b/llvm/lib/IR/LLVMContext.cpp
@@ -92,6 +92,11 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
"kcfi operand bundle id drifted!");
(void)KCFIEntry;
+ auto *ConvergenceCtrlEntry = pImpl->getOrInsertBundleTag("convergencectrl");
+ assert(ConvergenceCtrlEntry->second == LLVMContext::OB_convergencectrl &&
+ "convergencectrl operand bundle id drifted!");
+ (void)ConvergenceCtrlEntry;
+
SyncScope::ID SingleThreadSSID =
pImpl->getOrInsertSyncScopeID("singlethread");
assert(SingleThreadSSID == SyncScope::SingleThread &&
@@ -369,9 +374,9 @@ std::unique_ptr<DiagnosticHandler> LLVMContext::getDiagnosticHandler() {
}
void LLVMContext::setOpaquePointers(bool Enable) const {
- pImpl->setOpaquePointers(Enable);
+ assert(Enable && "Cannot disable opaque pointers");
}
bool LLVMContext::supportsTypedPointers() const {
- return !pImpl->getOpaquePointers();
+ return false;
}
diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp
index 9acb1f654899..2076eeed9417 100644
--- a/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/llvm/lib/IR/LLVMContextImpl.cpp
@@ -33,10 +33,6 @@
using namespace llvm;
-static cl::opt<bool>
- OpaquePointersCL("opaque-pointers", cl::desc("Use opaque pointers"),
- cl::init(true));
-
LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
: DiagHandler(std::make_unique<DiagnosticHandler>()),
VoidTy(C, Type::VoidTyID), LabelTy(C, Type::LabelTyID),
@@ -46,11 +42,7 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
X86_FP80Ty(C, Type::X86_FP80TyID), FP128Ty(C, Type::FP128TyID),
PPC_FP128Ty(C, Type::PPC_FP128TyID), X86_MMXTy(C, Type::X86_MMXTyID),
X86_AMXTy(C, Type::X86_AMXTyID), Int1Ty(C, 1), Int8Ty(C, 8),
- Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128) {
- if (OpaquePointersCL.getNumOccurrences()) {
- OpaquePointers = OpaquePointersCL;
- }
-}
+ Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128) {}
LLVMContextImpl::~LLVMContextImpl() {
// NOTE: We need to delete the contents of OwnedModules, but Module's dtor
@@ -116,6 +108,8 @@ LLVMContextImpl::~LLVMContextImpl() {
CTNConstants.clear();
UVConstants.clear();
PVConstants.clear();
+ IntZeroConstants.clear();
+ IntOneConstants.clear();
IntConstants.clear();
FPConstants.clear();
CDSConstants.clear();
@@ -248,15 +242,3 @@ OptPassGate &LLVMContextImpl::getOptPassGate() const {
void LLVMContextImpl::setOptPassGate(OptPassGate& OPG) {
this->OPG = &OPG;
}
-
-bool LLVMContextImpl::getOpaquePointers() {
- if (LLVM_UNLIKELY(!OpaquePointers))
- OpaquePointers = OpaquePointersCL;
- return *OpaquePointers;
-}
-
-void LLVMContextImpl::setOpaquePointers(bool OP) {
- assert((!OpaquePointers || *OpaquePointers == OP) &&
- "Cannot change opaque pointers mode once set");
- OpaquePointers = OP;
-}
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index d2651a6ec72e..4cc3f8da6b75 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -513,11 +513,20 @@ template <> struct MDNodeKeyImpl<DIStringType> {
bool isKeyOf(const DIStringType *RHS) const {
return Tag == RHS->getTag() && Name == RHS->getRawName() &&
+ StringLength == RHS->getRawStringLength() &&
+ StringLengthExp == RHS->getRawStringLengthExp() &&
+ StringLocationExp == RHS->getRawStringLocationExp() &&
SizeInBits == RHS->getSizeInBits() &&
AlignInBits == RHS->getAlignInBits() &&
Encoding == RHS->getEncoding();
}
- unsigned getHashValue() const { return hash_combine(Tag, Name, Encoding); }
+ unsigned getHashValue() const {
+ // Intentionally computes the hash on a subset of the operands for
+ // performance reason. The subset has to be significant enough to avoid
+ // collision "most of the time". There is no correctness issue in case of
+ // collision because of the full check above.
+ return hash_combine(Tag, Name, StringLength, Encoding);
+ }
};
template <> struct MDNodeKeyImpl<DIDerivedType> {
@@ -1446,13 +1455,13 @@ public:
DenseMap<const Value *, ValueName *> ValueNames;
- using IntMapTy =
- DenseMap<APInt, std::unique_ptr<ConstantInt>, DenseMapAPIntKeyInfo>;
- IntMapTy IntConstants;
+ DenseMap<unsigned, std::unique_ptr<ConstantInt>> IntZeroConstants;
+ DenseMap<unsigned, std::unique_ptr<ConstantInt>> IntOneConstants;
+ DenseMap<APInt, std::unique_ptr<ConstantInt>, DenseMapAPIntKeyInfo>
+ IntConstants;
- using FPMapTy =
- DenseMap<APFloat, std::unique_ptr<ConstantFP>, DenseMapAPFloatKeyInfo>;
- FPMapTy FPConstants;
+ DenseMap<APFloat, std::unique_ptr<ConstantFP>, DenseMapAPFloatKeyInfo>
+ FPConstants;
FoldingSet<AttributeImpl> AttrsSet;
FoldingSet<AttributeListImpl> AttrsLists;
@@ -1535,8 +1544,9 @@ public:
DenseMap<std::pair<Type *, uint64_t>, ArrayType *> ArrayTypes;
DenseMap<std::pair<Type *, ElementCount>, VectorType *> VectorTypes;
- DenseMap<Type *, PointerType *> PointerTypes; // Pointers in AddrSpace = 0
- DenseMap<std::pair<Type *, unsigned>, PointerType *> ASPointerTypes;
+ PointerType *AS0PointerType = nullptr; // AddrSpace = 0
+ DenseMap<unsigned, PointerType *> PointerTypes;
+ DenseMap<std::pair<Type *, unsigned>, PointerType *> LegacyPointerTypes;
DenseMap<std::pair<Type *, unsigned>, TypedPointerType *> ASTypedPointerTypes;
/// ValueHandles - This map keeps track of all of the value handles that are
@@ -1623,14 +1633,6 @@ public:
/// The lifetime of the object must be guaranteed to extend as long as the
/// LLVMContext is used by compilation.
void setOptPassGate(OptPassGate &);
-
- // TODO: clean up the following after we no longer support non-opaque pointer
- // types.
- bool getOpaquePointers();
- void setOpaquePointers(bool OP);
-
-private:
- std::optional<bool> OpaquePointers;
};
} // end namespace llvm
diff --git a/llvm/lib/IR/LLVMRemarkStreamer.cpp b/llvm/lib/IR/LLVMRemarkStreamer.cpp
index 8fbc33328de8..71f8d4a4b1c7 100644
--- a/llvm/lib/IR/LLVMRemarkStreamer.cpp
+++ b/llvm/lib/IR/LLVMRemarkStreamer.cpp
@@ -96,8 +96,8 @@ Expected<std::unique_ptr<ToolOutputFile>> llvm::setupLLVMOptimizationRemarks(
LLVMContext &Context, StringRef RemarksFilename, StringRef RemarksPasses,
StringRef RemarksFormat, bool RemarksWithHotness,
std::optional<uint64_t> RemarksHotnessThreshold) {
- if (RemarksWithHotness)
- Context.setDiagnosticsHotnessRequested(true);
+ if (RemarksWithHotness || RemarksHotnessThreshold.value_or(1))
+ Context.setDiagnosticsHotnessRequested(true);
Context.setDiagnosticsHotnessThreshold(RemarksHotnessThreshold);
@@ -143,7 +143,7 @@ Error llvm::setupLLVMOptimizationRemarks(
LLVMContext &Context, raw_ostream &OS, StringRef RemarksPasses,
StringRef RemarksFormat, bool RemarksWithHotness,
std::optional<uint64_t> RemarksHotnessThreshold) {
- if (RemarksWithHotness)
+ if (RemarksWithHotness || RemarksHotnessThreshold.value_or(1))
Context.setDiagnosticsHotnessRequested(true);
Context.setDiagnosticsHotnessThreshold(RemarksHotnessThreshold);
diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp
index ef3465177647..6c223d4ec381 100644
--- a/llvm/lib/IR/LegacyPassManager.cpp
+++ b/llvm/lib/IR/LegacyPassManager.cpp
@@ -1408,15 +1408,20 @@ bool FPPassManager::runOnFunction(Function &F) {
FunctionSize = F.getInstructionCount();
}
- llvm::TimeTraceScope FunctionScope("OptFunction", F.getName());
+ // Store name outside of loop to avoid redundant calls.
+ const StringRef Name = F.getName();
+ llvm::TimeTraceScope FunctionScope("OptFunction", Name);
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
FunctionPass *FP = getContainedPass(Index);
bool LocalChanged = false;
- llvm::TimeTraceScope PassScope("RunPass", FP->getPassName());
+ // Call getPassName only when required. The call itself is fairly cheap, but
+ // still virtual and repeated calling adds unnecessary overhead.
+ llvm::TimeTraceScope PassScope(
+ "RunPass", [FP]() { return std::string(FP->getPassName()); });
- dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
+ dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, Name);
dumpRequiredSet(FP);
initializeAnalysisImpl(FP);
@@ -1455,7 +1460,7 @@ bool FPPassManager::runOnFunction(Function &F) {
Changed |= LocalChanged;
if (LocalChanged)
- dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
+ dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, Name);
dumpPreservedSet(FP);
dumpUsedSet(FP);
@@ -1463,7 +1468,7 @@ bool FPPassManager::runOnFunction(Function &F) {
if (LocalChanged)
removeNotPreservedAnalysis(FP);
recordAvailableAnalysis(FP);
- removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG);
+ removeDeadPasses(FP, Name, ON_FUNCTION_MSG);
}
return Changed;
diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp
index 38ab1d3d1024..2490b3012bdc 100644
--- a/llvm/lib/IR/MDBuilder.cpp
+++ b/llvm/lib/IR/MDBuilder.cpp
@@ -336,12 +336,12 @@ MDNode *MDBuilder::createIrrLoopHeaderWeight(uint64_t Weight) {
}
MDNode *MDBuilder::createPseudoProbeDesc(uint64_t GUID, uint64_t Hash,
- Function *F) {
+ StringRef FName) {
auto *Int64Ty = Type::getInt64Ty(Context);
SmallVector<Metadata *, 3> Ops(3);
Ops[0] = createConstant(ConstantInt::get(Int64Ty, GUID));
Ops[1] = createConstant(ConstantInt::get(Int64Ty, Hash));
- Ops[2] = createString(F->getName());
+ Ops[2] = createString(FName);
return MDNode::get(Context, Ops);
}
diff --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp
index 9011f5db6a40..8d9880ecba58 100644
--- a/llvm/lib/IR/Mangler.cpp
+++ b/llvm/lib/IR/Mangler.cpp
@@ -13,13 +13,13 @@
#include "llvm/IR/Mangler.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
namespace {
@@ -119,6 +119,7 @@ static void addByteCountSuffix(raw_ostream &OS, const Function *F,
void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV,
bool CannotUsePrivateLabel) const {
ManglerPrefixTy PrefixTy = Default;
+ assert(GV != nullptr && "Invalid Global Value");
if (GV->hasPrivateLinkage()) {
if (CannotUsePrivateLabel)
PrefixTy = LinkerPrivate;
diff --git a/llvm/lib/IR/Metadata.cpp b/llvm/lib/IR/Metadata.cpp
index bb757269e55f..c153ffb71a73 100644
--- a/llvm/lib/IR/Metadata.cpp
+++ b/llvm/lib/IR/Metadata.cpp
@@ -195,9 +195,9 @@ SmallVector<Metadata *> ReplaceableMetadataImpl::getAllArgListUsers() {
SmallVector<std::pair<OwnerTy, uint64_t> *> MDUsersWithID;
for (auto Pair : UseMap) {
OwnerTy Owner = Pair.second.first;
- if (!Owner.is<Metadata *>())
+ if (!isa<Metadata *>(Owner))
continue;
- Metadata *OwnerMD = Owner.get<Metadata *>();
+ Metadata *OwnerMD = cast<Metadata *>(Owner);
if (OwnerMD->getMetadataID() == Metadata::DIArgListKind)
MDUsersWithID.push_back(&UseMap[Pair.first]);
}
@@ -206,7 +206,7 @@ SmallVector<Metadata *> ReplaceableMetadataImpl::getAllArgListUsers() {
});
SmallVector<Metadata *> MDUsers;
for (auto *UserWithID : MDUsersWithID)
- MDUsers.push_back(UserWithID->first.get<Metadata *>());
+ MDUsers.push_back(cast<Metadata *>(UserWithID->first));
return MDUsers;
}
@@ -263,9 +263,9 @@ void ReplaceableMetadataImpl::SalvageDebugInfo(const Constant &C) {
MetadataTracking::OwnerTy Owner = Pair.second.first;
if (!Owner)
continue;
- if (!Owner.is<Metadata *>())
+ if (!isa<Metadata *>(Owner))
continue;
- auto *OwnerMD = dyn_cast<MDNode>(Owner.get<Metadata *>());
+ auto *OwnerMD = dyn_cast_if_present<MDNode>(cast<Metadata *>(Owner));
if (!OwnerMD)
continue;
if (isa<DINode>(OwnerMD)) {
@@ -282,7 +282,9 @@ void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) {
// Copy out uses since UseMap will get touched below.
using UseTy = std::pair<void *, std::pair<OwnerTy, uint64_t>>;
SmallVector<UseTy, 8> Uses(UseMap.begin(), UseMap.end());
- llvm::sort(Uses, llvm::less_second());
+ llvm::sort(Uses, [](const UseTy &L, const UseTy &R) {
+ return L.second.second < R.second.second;
+ });
for (const auto &Pair : Uses) {
// Check that this Ref hasn't disappeared after RAUW (when updating a
// previous Ref).
@@ -301,13 +303,13 @@ void ReplaceableMetadataImpl::replaceAllUsesWith(Metadata *MD) {
}
// Check for MetadataAsValue.
- if (Owner.is<MetadataAsValue *>()) {
- Owner.get<MetadataAsValue *>()->handleChangedMetadata(MD);
+ if (isa<MetadataAsValue *>(Owner)) {
+ cast<MetadataAsValue *>(Owner)->handleChangedMetadata(MD);
continue;
}
// There's a Metadata owner -- dispatch.
- Metadata *OwnerMD = Owner.get<Metadata *>();
+ Metadata *OwnerMD = cast<Metadata *>(Owner);
switch (OwnerMD->getMetadataID()) {
#define HANDLE_METADATA_LEAF(CLASS) \
case Metadata::CLASS##Kind: \
@@ -341,11 +343,11 @@ void ReplaceableMetadataImpl::resolveAllUses(bool ResolveUsers) {
auto Owner = Pair.second.first;
if (!Owner)
continue;
- if (Owner.is<MetadataAsValue *>())
+ if (isa<MetadataAsValue *>(Owner))
continue;
// Resolve MDNodes that point at this.
- auto *OwnerMD = dyn_cast<MDNode>(Owner.get<Metadata *>());
+ auto *OwnerMD = dyn_cast_if_present<MDNode>(cast<Metadata *>(Owner));
if (!OwnerMD)
continue;
if (OwnerMD->isResolved())
@@ -1072,6 +1074,70 @@ MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
return B;
}
+// Call instructions with branch weights are only used in SamplePGO as
+// documented in
+/// https://llvm.org/docs/BranchWeightMetadata.html#callinst).
+MDNode *MDNode::mergeDirectCallProfMetadata(MDNode *A, MDNode *B,
+ const Instruction *AInstr,
+ const Instruction *BInstr) {
+ assert(A && B && AInstr && BInstr && "Caller should guarantee");
+ auto &Ctx = AInstr->getContext();
+ MDBuilder MDHelper(Ctx);
+
+ // LLVM IR verifier verifies !prof metadata has at least 2 operands.
+ assert(A->getNumOperands() >= 2 && B->getNumOperands() >= 2 &&
+ "!prof annotations should have no less than 2 operands");
+ MDString *AMDS = dyn_cast<MDString>(A->getOperand(0));
+ MDString *BMDS = dyn_cast<MDString>(B->getOperand(0));
+ // LLVM IR verfier verifies first operand is MDString.
+ assert(AMDS != nullptr && BMDS != nullptr &&
+ "first operand should be a non-null MDString");
+ StringRef AProfName = AMDS->getString();
+ StringRef BProfName = BMDS->getString();
+ if (AProfName.equals("branch_weights") &&
+ BProfName.equals("branch_weights")) {
+ ConstantInt *AInstrWeight =
+ mdconst::dyn_extract<ConstantInt>(A->getOperand(1));
+ ConstantInt *BInstrWeight =
+ mdconst::dyn_extract<ConstantInt>(B->getOperand(1));
+ assert(AInstrWeight && BInstrWeight && "verified by LLVM verifier");
+ return MDNode::get(Ctx,
+ {MDHelper.createString("branch_weights"),
+ MDHelper.createConstant(ConstantInt::get(
+ Type::getInt64Ty(Ctx),
+ SaturatingAdd(AInstrWeight->getZExtValue(),
+ BInstrWeight->getZExtValue())))});
+ }
+ return nullptr;
+}
+
+// Pass in both instructions and nodes. Instruction information (e.g.,
+// instruction type) helps interpret profiles and make implementation clearer.
+MDNode *MDNode::getMergedProfMetadata(MDNode *A, MDNode *B,
+ const Instruction *AInstr,
+ const Instruction *BInstr) {
+ if (!(A && B)) {
+ return A ? A : B;
+ }
+
+ assert(AInstr->getMetadata(LLVMContext::MD_prof) == A &&
+ "Caller should guarantee");
+ assert(BInstr->getMetadata(LLVMContext::MD_prof) == B &&
+ "Caller should guarantee");
+
+ const CallInst *ACall = dyn_cast<CallInst>(AInstr);
+ const CallInst *BCall = dyn_cast<CallInst>(BInstr);
+
+ // Both ACall and BCall are direct callsites.
+ if (ACall && BCall && ACall->getCalledFunction() &&
+ BCall->getCalledFunction())
+ return mergeDirectCallProfMetadata(A, B, AInstr, BInstr);
+
+ // The rest of the cases are not implemented but could be added
+ // when there are use cases.
+ return nullptr;
+}
+
static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
return A.getUpper() == B.getLower() || A.getLower() == B.getUpper();
}
@@ -1475,23 +1541,54 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
Value::setMetadata(KindID, Node);
}
+void Instruction::addAnnotationMetadata(SmallVector<StringRef> Annotations) {
+ SmallSetVector<StringRef, 2> AnnotationsSet(Annotations.begin(),
+ Annotations.end());
+ MDBuilder MDB(getContext());
+
+ auto *Existing = getMetadata(LLVMContext::MD_annotation);
+ SmallVector<Metadata *, 4> Names;
+ if (Existing) {
+ auto *Tuple = cast<MDTuple>(Existing);
+ for (auto &N : Tuple->operands()) {
+ if (isa<MDString>(N.get())) {
+ Names.push_back(N);
+ continue;
+ }
+ auto *MDAnnotationTuple = cast<MDTuple>(N);
+ if (any_of(MDAnnotationTuple->operands(), [&AnnotationsSet](auto &Op) {
+ return AnnotationsSet.contains(cast<MDString>(Op)->getString());
+ }))
+ return;
+ Names.push_back(N);
+ }
+ }
+
+ SmallVector<Metadata *> MDAnnotationStrings;
+ for (StringRef Annotation : Annotations)
+ MDAnnotationStrings.push_back(MDB.createString(Annotation));
+ MDNode *InfoTuple = MDTuple::get(getContext(), MDAnnotationStrings);
+ Names.push_back(InfoTuple);
+ MDNode *MD = MDTuple::get(getContext(), Names);
+ setMetadata(LLVMContext::MD_annotation, MD);
+}
+
void Instruction::addAnnotationMetadata(StringRef Name) {
MDBuilder MDB(getContext());
auto *Existing = getMetadata(LLVMContext::MD_annotation);
SmallVector<Metadata *, 4> Names;
- bool AppendName = true;
if (Existing) {
auto *Tuple = cast<MDTuple>(Existing);
for (auto &N : Tuple->operands()) {
- if (cast<MDString>(N.get())->getString() == Name)
- AppendName = false;
+ if (isa<MDString>(N.get()) &&
+ cast<MDString>(N.get())->getString() == Name)
+ return;
Names.push_back(N.get());
}
}
- if (AppendName)
- Names.push_back(MDB.createString(Name));
+ Names.push_back(MDB.createString(Name));
MDNode *MD = MDTuple::get(getContext(), Names);
setMetadata(LLVMContext::MD_annotation, MD);
}
@@ -1517,6 +1614,11 @@ void Instruction::setAAMetadata(const AAMDNodes &N) {
setMetadata(LLVMContext::MD_noalias, N.NoAlias);
}
+void Instruction::setNoSanitizeMetadata() {
+ setMetadata(llvm::LLVMContext::MD_nosanitize,
+ llvm::MDNode::get(getContext(), std::nullopt));
+}
+
MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
// Handle 'dbg' as a special case since it is not stored in the hash table.
if (KindID == LLVMContext::MD_dbg)
diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp
index 49fadc9ed7e6..73354a8f36d2 100644
--- a/llvm/lib/IR/Module.cpp
+++ b/llvm/lib/IR/Module.cpp
@@ -262,7 +262,7 @@ NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
if (!NMD) {
NMD = new NamedMDNode(Name);
NMD->setParent(this);
- NamedMDList.push_back(NMD);
+ insertNamedMDNode(NMD);
}
return NMD;
}
@@ -271,7 +271,7 @@ NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
/// delete it.
void Module::eraseNamedMetadata(NamedMDNode *NMD) {
NamedMDSymTab.erase(NMD->getName());
- NamedMDList.erase(NMD->getIterator());
+ eraseNamedMDNode(NMD);
}
bool Module::isValidModFlagBehavior(Metadata *MD, ModFlagBehavior &MFB) {
@@ -672,6 +672,18 @@ void Module::setRtLibUseGOT() {
addModuleFlag(ModFlagBehavior::Max, "RtLibUseGOT", 1);
}
+bool Module::getDirectAccessExternalData() const {
+ auto *Val = cast_or_null<ConstantAsMetadata>(
+ getModuleFlag("direct-access-external-data"));
+ if (Val)
+ return cast<ConstantInt>(Val->getValue())->getZExtValue() > 0;
+ return getPICLevel() == PICLevel::NotPIC;
+}
+
+void Module::setDirectAccessExternalData(bool Value) {
+ addModuleFlag(ModFlagBehavior::Max, "direct-access-external-data", Value);
+}
+
UWTableKind Module::getUwtable() const {
if (auto *Val = cast_or_null<ConstantAsMetadata>(getModuleFlag("uwtable")))
return UWTableKind(cast<ConstantInt>(Val->getValue())->getZExtValue());
@@ -746,6 +758,13 @@ unsigned Module::getOverrideStackAlignment() const {
return 0;
}
+unsigned Module::getMaxTLSAlignment() const {
+ Metadata *MD = getModuleFlag("MaxTLSAlign");
+ if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(MD))
+ return CI->getZExtValue();
+ return 0;
+}
+
void Module::setOverrideStackAlignment(unsigned Align) {
addModuleFlag(ModFlagBehavior::Error, "override-stack-alignment", Align);
}
diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp
index 2d1440756a95..15fe342969d6 100644
--- a/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -107,11 +107,15 @@ uint64_t ModuleSummaryIndex::getFlags() const {
Flags |= 0x40;
if (withWholeProgramVisibility())
Flags |= 0x80;
+ if (withSupportsHotColdNew())
+ Flags |= 0x100;
+ if (hasUnifiedLTO())
+ Flags |= 0x200;
return Flags;
}
void ModuleSummaryIndex::setFlags(uint64_t Flags) {
- assert(Flags <= 0xff && "Unexpected bits in flag");
+ assert(Flags <= 0x2ff && "Unexpected bits in flag");
// 1 bit: WithGlobalValueDeadStripping flag.
// Set on combined index only.
if (Flags & 0x1)
@@ -145,6 +149,14 @@ void ModuleSummaryIndex::setFlags(uint64_t Flags) {
// Set on combined index only.
if (Flags & 0x80)
setWithWholeProgramVisibility();
+ // 1 bit: WithSupportsHotColdNew flag.
+ // Set on combined index only.
+ if (Flags & 0x100)
+ setWithSupportsHotColdNew();
+ // 1 bit: WithUnifiedLTO flag.
+ // Set on combined index only.
+ if (Flags & 0x200)
+ setUnifiedLTO();
}
// Collect for the given module the list of function it defines
@@ -317,7 +329,7 @@ void ModuleSummaryIndex::propagateAttributes(
}
}
-bool ModuleSummaryIndex::canImportGlobalVar(GlobalValueSummary *S,
+bool ModuleSummaryIndex::canImportGlobalVar(const GlobalValueSummary *S,
bool AnalyzeRefs) const {
auto HasRefsPreventingImport = [this](const GlobalVarSummary *GVS) {
// We don't analyze GV references during attribute propagation, so
diff --git a/llvm/lib/IR/PassManager.cpp b/llvm/lib/IR/PassManager.cpp
index ef850b8235b9..92b729c44d21 100644
--- a/llvm/lib/IR/PassManager.cpp
+++ b/llvm/lib/IR/PassManager.cpp
@@ -96,9 +96,9 @@ void ModuleToFunctionPassAdaptor::printPipeline(
OS << "function";
if (EagerlyInvalidate)
OS << "<eager-inv>";
- OS << "(";
+ OS << '(';
Pass->printPipeline(OS, MapClassName2PassName);
- OS << ")";
+ OS << ')';
}
PreservedAnalyses ModuleToFunctionPassAdaptor::run(Module &M,
@@ -122,13 +122,14 @@ PreservedAnalyses ModuleToFunctionPassAdaptor::run(Module &M,
continue;
PreservedAnalyses PassPA = Pass->run(F, FAM);
- PI.runAfterPass(*Pass, F, PassPA);
// We know that the function pass couldn't have invalidated any other
// function's analyses (that's the contract of a function pass), so
// directly handle the function analysis manager's invalidation here.
FAM.invalidate(F, EagerlyInvalidate ? PreservedAnalyses::none() : PassPA);
+ PI.runAfterPass(*Pass, F, PassPA);
+
// Then intersect the preserved set so that invalidation of module
// analyses will eventually occur when the module pass completes.
PA.intersect(std::move(PassPA));
diff --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp
index f3802af26a61..df5f78c51182 100644
--- a/llvm/lib/IR/PseudoProbe.cpp
+++ b/llvm/lib/IR/PseudoProbe.cpp
@@ -22,12 +22,8 @@ using namespace llvm;
namespace llvm {
std::optional<PseudoProbe>
-extractProbeFromDiscriminator(const Instruction &Inst) {
- assert(isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst) &&
- "Only call instructions should have pseudo probe encodes as their "
- "Dwarf discriminators");
- if (const DebugLoc &DLoc = Inst.getDebugLoc()) {
- const DILocation *DIL = DLoc;
+extractProbeFromDiscriminator(const DILocation *DIL) {
+ if (DIL) {
auto Discriminator = DIL->getDiscriminator();
if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
PseudoProbe Probe;
@@ -40,12 +36,23 @@ extractProbeFromDiscriminator(const Instruction &Inst) {
Probe.Factor =
PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) /
(float)PseudoProbeDwarfDiscriminator::FullDistributionFactor;
+ Probe.Discriminator = 0;
return Probe;
}
}
return std::nullopt;
}
+std::optional<PseudoProbe>
+extractProbeFromDiscriminator(const Instruction &Inst) {
+ assert(isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst) &&
+ "Only call instructions should have pseudo probe encodes as their "
+ "Dwarf discriminators");
+ if (const DebugLoc &DLoc = Inst.getDebugLoc())
+ return extractProbeFromDiscriminator(DLoc);
+ return std::nullopt;
+}
+
std::optional<PseudoProbe> extractProbe(const Instruction &Inst) {
if (const auto *II = dyn_cast<PseudoProbeInst>(&Inst)) {
PseudoProbe Probe;
@@ -54,6 +61,9 @@ std::optional<PseudoProbe> extractProbe(const Instruction &Inst) {
Probe.Attr = II->getAttributes()->getZExtValue();
Probe.Factor = II->getFactor()->getZExtValue() /
(float)PseudoProbeFullDistributionFactor;
+ Probe.Discriminator = 0;
+ if (const DebugLoc &DLoc = Inst.getDebugLoc())
+ Probe.Discriminator = DLoc->getDiscriminator();
return Probe;
}
diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp
index 069da26e63b1..58aa040eb032 100644
--- a/llvm/lib/IR/ReplaceConstant.cpp
+++ b/llvm/lib/IR/ReplaceConstant.cpp
@@ -12,125 +12,91 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/ReplaceConstant.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/ValueMap.h"
namespace llvm {
-void convertConstantExprsToInstructions(Instruction *I, ConstantExpr *CE,
- SmallPtrSetImpl<Instruction *> *Insts) {
- // Collect all reachable paths to CE from constant exprssion operands of I.
- std::map<Use *, std::vector<std::vector<ConstantExpr *>>> CEPaths;
- collectConstantExprPaths(I, CE, CEPaths);
-
- // Convert all constant expressions to instructions which are collected at
- // CEPaths.
- convertConstantExprsToInstructions(I, CEPaths, Insts);
+static bool isExpandableUser(User *U) {
+ return isa<ConstantExpr>(U) || isa<ConstantAggregate>(U);
}
-void convertConstantExprsToInstructions(
- Instruction *I,
- std::map<Use *, std::vector<std::vector<ConstantExpr *>>> &CEPaths,
- SmallPtrSetImpl<Instruction *> *Insts) {
- ValueMap<ConstantExpr *, Instruction *> Visited;
-
- for (Use &U : I->operands()) {
- // The operand U is either not a constant expression operand or the
- // constant expression paths do not belong to U, ignore U.
- if (!CEPaths.count(&U))
- continue;
-
- // If the instruction I is a PHI instruction, then fix the instruction
- // insertion point to the entry of the incoming basic block for operand U.
- auto *BI = I;
- if (auto *Phi = dyn_cast<PHINode>(I)) {
- BasicBlock *BB = Phi->getIncomingBlock(U);
- BI = &(*(BB->getFirstInsertionPt()));
- }
-
- // Go through all the paths associated with operand U, and convert all the
- // constant expressions along all the paths to corresponding instructions.
- auto *II = I;
- auto &Paths = CEPaths[&U];
- for (auto &Path : Paths) {
- for (auto *CE : Path) {
- // Instruction which is equivalent to CE.
- Instruction *NI = nullptr;
-
- if (!Visited.count(CE)) {
- // CE is encountered first time, convert it into a corresponding
- // instruction NI, and appropriately insert NI before the parent
- // instruction.
- NI = CE->getAsInstruction(BI);
-
- // Mark CE as visited by mapping CE to NI.
- Visited[CE] = NI;
-
- // If required collect NI.
- if (Insts)
- Insts->insert(NI);
- } else {
- // We had already encountered CE, the correponding instruction already
- // exist, use it to replace CE.
- NI = Visited[CE];
- }
-
- assert(NI && "Expected an instruction corresponding to constant "
- "expression.");
-
- // Replace all uses of constant expression CE by the corresponding
- // instruction NI within the current parent instruction.
- II->replaceUsesOfWith(CE, NI);
- BI = II = NI;
- }
- }
+static Instruction *expandUser(Instruction *InsertPt, Constant *C) {
+ if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+ return CE->getAsInstruction(InsertPt);
+ } else if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
+ Value *V = PoisonValue::get(C->getType());
+ for (auto [Idx, Op] : enumerate(C->operands()))
+ V = InsertValueInst::Create(V, Op, Idx, "", InsertPt);
+ return cast<Instruction>(V);
+ } else if (isa<ConstantVector>(C)) {
+ Type *IdxTy = Type::getInt32Ty(C->getContext());
+ Value *V = PoisonValue::get(C->getType());
+ for (auto [Idx, Op] : enumerate(C->operands()))
+ V = InsertElementInst::Create(V, Op, ConstantInt::get(IdxTy, Idx), "",
+ InsertPt);
+ return cast<Instruction>(V);
+ } else {
+ llvm_unreachable("Not an expandable user");
}
-
- // Remove all converted constant expressions which are dead by now.
- for (auto Item : Visited)
- Item.first->removeDeadConstantUsers();
}
-void collectConstantExprPaths(
- Instruction *I, ConstantExpr *CE,
- std::map<Use *, std::vector<std::vector<ConstantExpr *>>> &CEPaths) {
- for (Use &U : I->operands()) {
- // If the operand U is not a constant expression operand, then ignore it.
- auto *CE2 = dyn_cast<ConstantExpr>(U.get());
- if (!CE2)
+bool convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts) {
+ // Find all expandable direct users of Consts.
+ SmallVector<Constant *> Stack;
+ for (Constant *C : Consts)
+ for (User *U : C->users())
+ if (isExpandableUser(U))
+ Stack.push_back(cast<Constant>(U));
+
+ // Include transitive users.
+ SetVector<Constant *> ExpandableUsers;
+ while (!Stack.empty()) {
+ Constant *C = Stack.pop_back_val();
+ if (!ExpandableUsers.insert(C))
continue;
- // Holds all reachable paths from CE2 to CE.
- std::vector<std::vector<ConstantExpr *>> Paths;
-
- // Collect all reachable paths from CE2 to CE.
- std::vector<ConstantExpr *> Path{CE2};
- std::vector<std::vector<ConstantExpr *>> Stack{Path};
- while (!Stack.empty()) {
- std::vector<ConstantExpr *> TPath = Stack.back();
- Stack.pop_back();
- auto *CE3 = TPath.back();
+ for (auto *Nested : C->users())
+ if (isExpandableUser(Nested))
+ Stack.push_back(cast<Constant>(Nested));
+ }
- if (CE3 == CE) {
- Paths.push_back(TPath);
- continue;
+ // Find all instructions that use any of the expandable users
+ SetVector<Instruction *> InstructionWorklist;
+ for (Constant *C : ExpandableUsers)
+ for (User *U : C->users())
+ if (auto *I = dyn_cast<Instruction>(U))
+ InstructionWorklist.insert(I);
+
+ // Replace those expandable operands with instructions
+ bool Changed = false;
+ while (!InstructionWorklist.empty()) {
+ Instruction *I = InstructionWorklist.pop_back_val();
+ for (Use &U : I->operands()) {
+ auto *BI = I;
+ if (auto *Phi = dyn_cast<PHINode>(I)) {
+ BasicBlock *BB = Phi->getIncomingBlock(U);
+ BasicBlock::iterator It = BB->getFirstInsertionPt();
+ assert(It != BB->end() && "Unexpected empty basic block");
+ BI = &*It;
}
- for (auto &UU : CE3->operands()) {
- if (auto *CE4 = dyn_cast<ConstantExpr>(UU.get())) {
- std::vector<ConstantExpr *> NPath(TPath.begin(), TPath.end());
- NPath.push_back(CE4);
- Stack.push_back(NPath);
+ if (auto *C = dyn_cast<Constant>(U.get())) {
+ if (ExpandableUsers.contains(C)) {
+ Changed = true;
+ Instruction *NI = expandUser(BI, C);
+ InstructionWorklist.insert(NI);
+ U.set(NI);
}
}
}
-
- // Associate all the collected paths with U, and save it.
- if (!Paths.empty())
- CEPaths[&U] = Paths;
}
+
+ for (Constant *C : Consts)
+ C->removeDeadConstantUsers();
+
+ return Changed;
}
} // namespace llvm
diff --git a/llvm/lib/IR/SSAContext.cpp b/llvm/lib/IR/SSAContext.cpp
index e758a3fbeac9..4790d19b74b5 100644
--- a/llvm/lib/IR/SSAContext.cpp
+++ b/llvm/lib/IR/SSAContext.cpp
@@ -22,8 +22,6 @@
using namespace llvm;
-Value *SSAContext::ValueRefNull = nullptr;
-
void SSAContext::setFunction(Function &Fn) { F = &Fn; }
BasicBlock *SSAContext::getEntryBlock(Function &F) {
@@ -75,9 +73,9 @@ bool SSAContext::comesBefore(const Instruction *lhs, const Instruction *rhs) {
return lhs->comesBefore(rhs);
}
-bool SSAContext::isConstantValuePhi(const Instruction &Instr) {
+bool SSAContext::isConstantOrUndefValuePhi(const Instruction &Instr) {
if (auto *Phi = dyn_cast<PHINode>(&Instr))
- return Phi->hasConstantValue();
+ return Phi->hasConstantOrUndefValue();
return false;
}
diff --git a/llvm/lib/IR/SafepointIRVerifier.cpp b/llvm/lib/IR/SafepointIRVerifier.cpp
index 5d3fa28f7d0a..ed99d05975c2 100644
--- a/llvm/lib/IR/SafepointIRVerifier.cpp
+++ b/llvm/lib/IR/SafepointIRVerifier.cpp
@@ -485,9 +485,7 @@ public:
InstructionVerifier &Verifier);
/// Returns true for reachable and live blocks.
- bool isMapped(const BasicBlock *BB) const {
- return BlockMap.find(BB) != BlockMap.end();
- }
+ bool isMapped(const BasicBlock *BB) const { return BlockMap.contains(BB); }
private:
/// Returns true if the instruction may be safely skipped during verification.
diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp
index b6b9fe72cc35..6ea108d831a1 100644
--- a/llvm/lib/IR/StructuralHash.cpp
+++ b/llvm/lib/IR/StructuralHash.cpp
@@ -1,40 +1,41 @@
-//===-- StructuralHash.cpp - IR Hash for expensive checks -------*- C++ -*-===//
+//===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-//
-
-#ifdef EXPENSIVE_CHECKS
#include "llvm/IR/StructuralHash.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Module.h"
using namespace llvm;
-namespace details {
+namespace {
// Basic hashing mechanism to detect structural change to the IR, used to verify
// pass return status consistency with actual change. Loosely copied from
// llvm/lib/Transforms/Utils/FunctionComparator.cpp
-class StructuralHash {
- uint64_t Hash = 0x6acaa36bef8325c5ULL;
+class StructuralHashImpl {
+ hash_code Hash;
- void update(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
+ template <typename T> void hash(const T &V) { Hash = hash_combine(Hash, V); }
public:
- StructuralHash() = default;
+ StructuralHashImpl() : Hash(4) {}
void update(const Function &F) {
- if (F.empty())
+ // Declarations don't affect analyses.
+ if (F.isDeclaration())
return;
- update(F.isVarArg());
- update(F.arg_size());
+ hash(12345); // Function header
+
+ hash(F.isVarArg());
+ hash(F.arg_size());
SmallVector<const BasicBlock *, 8> BBs;
SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
@@ -43,9 +44,9 @@ public:
VisitedBBs.insert(BBs[0]);
while (!BBs.empty()) {
const BasicBlock *BB = BBs.pop_back_val();
- update(45798); // Block header
+ hash(45798); // Block header
for (auto &Inst : *BB)
- update(Inst.getOpcode());
+ hash(Inst.getOpcode());
const Instruction *Term = BB->getTerminator();
for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
@@ -56,7 +57,19 @@ public:
}
}
+ void update(const GlobalVariable &GV) {
+ // Declarations and used/compiler.used don't affect analyses.
+ // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
+ // we ignore anything with the `.llvm` prefix
+ if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
+ return;
+ hash(23456); // Global header
+ hash(GV.getValueType()->getTypeID());
+ }
+
void update(const Module &M) {
+ for (const GlobalVariable &GV : M.globals())
+ update(GV);
for (const Function &F : M)
update(F);
}
@@ -64,18 +77,16 @@ public:
uint64_t getHash() const { return Hash; }
};
-} // namespace details
+} // namespace
uint64_t llvm::StructuralHash(const Function &F) {
- ::details::StructuralHash H;
+ StructuralHashImpl H;
H.update(F);
return H.getHash();
}
uint64_t llvm::StructuralHash(const Module &M) {
- ::details::StructuralHash H;
+ StructuralHashImpl H;
H.update(M);
return H.getHash();
}
-
-#endif
diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp
index 8bb8c9d29a62..ba4d0f5dc18d 100644
--- a/llvm/lib/IR/Type.cpp
+++ b/llvm/lib/IR/Type.cpp
@@ -57,10 +57,12 @@ bool Type::isIntegerTy(unsigned Bitwidth) const {
return isIntegerTy() && cast<IntegerType>(this)->getBitWidth() == Bitwidth;
}
-bool Type::isOpaquePointerTy() const {
- if (auto *PTy = dyn_cast<PointerType>(this))
- return PTy->isOpaque();
- return false;
+bool Type::isScalableTy() const {
+ if (const auto *STy = dyn_cast<StructType>(this)) {
+ SmallPtrSet<Type *, 4> Visited;
+ return STy->containsScalableVectorType(&Visited);
+ }
+ return getTypeID() == ScalableVectorTyID || isScalableTargetExtTy();
}
const fltSemantics &Type::getFltSemantics() const {
@@ -80,6 +82,12 @@ bool Type::isIEEE() const {
return APFloat::getZero(getFltSemantics()).isIEEE();
}
+bool Type::isScalableTargetExtTy() const {
+ if (auto *TT = dyn_cast<TargetExtType>(this))
+ return isa<ScalableVectorType>(TT->getLayoutType());
+ return false;
+}
+
Type *Type::getFloatingPointTy(LLVMContext &C, const fltSemantics &S) {
Type *Ty;
if (&S == &APFloat::IEEEhalf())
@@ -306,6 +314,18 @@ PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
return getInt64Ty(C)->getPointerTo(AS);
}
+Type *Type::getWasm_ExternrefTy(LLVMContext &C) {
+ // opaque pointer in addrspace(10)
+ static PointerType *Ty = PointerType::get(C, 10);
+ return Ty;
+}
+
+Type *Type::getWasm_FuncrefTy(LLVMContext &C) {
+ // opaque pointer in addrspace(20)
+ static PointerType *Ty = PointerType::get(C, 20);
+ return Ty;
+}
+
//===----------------------------------------------------------------------===//
// IntegerType Implementation
//===----------------------------------------------------------------------===//
@@ -432,18 +452,51 @@ StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes,
return ST;
}
-bool StructType::containsScalableVectorType() const {
+bool StructType::containsScalableVectorType(
+ SmallPtrSetImpl<Type *> *Visited) const {
+ if ((getSubclassData() & SCDB_ContainsScalableVector) != 0)
+ return true;
+
+ if ((getSubclassData() & SCDB_NotContainsScalableVector) != 0)
+ return false;
+
+ if (Visited && !Visited->insert(const_cast<StructType *>(this)).second)
+ return false;
+
for (Type *Ty : elements()) {
- if (isa<ScalableVectorType>(Ty))
+ if (isa<ScalableVectorType>(Ty)) {
+ const_cast<StructType *>(this)->setSubclassData(
+ getSubclassData() | SCDB_ContainsScalableVector);
return true;
- if (auto *STy = dyn_cast<StructType>(Ty))
- if (STy->containsScalableVectorType())
+ }
+ if (auto *STy = dyn_cast<StructType>(Ty)) {
+ if (STy->containsScalableVectorType(Visited)) {
+ const_cast<StructType *>(this)->setSubclassData(
+ getSubclassData() | SCDB_ContainsScalableVector);
return true;
+ }
+ }
}
+ // For structures that are opaque, return false but do not set the
+ // SCDB_NotContainsScalableVector flag since it may gain scalable vector type
+ // when it becomes non-opaque.
+ if (!isOpaque())
+ const_cast<StructType *>(this)->setSubclassData(
+ getSubclassData() | SCDB_NotContainsScalableVector);
return false;
}
+bool StructType::containsHomogeneousScalableVectorTypes() const {
+ Type *FirstTy = getNumElements() > 0 ? elements()[0] : nullptr;
+ if (!FirstTy || !isa<ScalableVectorType>(FirstTy))
+ return false;
+ for (Type *Ty : elements())
+ if (Ty != FirstTy)
+ return false;
+ return true;
+}
+
void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
assert(isOpaque() && "Struct body already set!");
@@ -563,10 +616,19 @@ bool StructType::isSized(SmallPtrSetImpl<Type*> *Visited) const {
// Okay, our struct is sized if all of the elements are, but if one of the
// elements is opaque, the struct isn't sized *yet*, but may become sized in
// the future, so just bail out without caching.
+ // The ONLY special case inside a struct that is considered sized is when the
+ // elements are homogeneous of a scalable vector type.
+ if (containsHomogeneousScalableVectorTypes()) {
+ const_cast<StructType *>(this)->setSubclassData(getSubclassData() |
+ SCDB_IsSized);
+ return true;
+ }
for (Type *Ty : elements()) {
// If the struct contains a scalable vector type, don't consider it sized.
- // This prevents it from being used in loads/stores/allocas/GEPs.
- if (isa<ScalableVectorType>(Ty))
+ // This prevents it from being used in loads/stores/allocas/GEPs. The ONLY
+ // special case right now is a structure of homogenous scalable vector
+ // types and is handled by the if-statement before this for-loop.
+ if (Ty->isScalableTy())
return false;
if (!Ty->isSized(Visited))
return false;
@@ -730,46 +792,24 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
assert(EltTy && "Can't get a pointer to <null> type!");
assert(isValidElementType(EltTy) && "Invalid type for pointer element!");
- LLVMContextImpl *CImpl = EltTy->getContext().pImpl;
-
// Automatically convert typed pointers to opaque pointers.
- if (CImpl->getOpaquePointers())
- return get(EltTy->getContext(), AddressSpace);
-
- // Since AddressSpace #0 is the common case, we special case it.
- PointerType *&Entry = AddressSpace == 0 ? CImpl->PointerTypes[EltTy]
- : CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)];
-
- if (!Entry)
- Entry = new (CImpl->Alloc) PointerType(EltTy, AddressSpace);
- return Entry;
+ return get(EltTy->getContext(), AddressSpace);
}
PointerType *PointerType::get(LLVMContext &C, unsigned AddressSpace) {
LLVMContextImpl *CImpl = C.pImpl;
- assert(CImpl->getOpaquePointers() &&
- "Can only create opaque pointers in opaque pointer mode");
// Since AddressSpace #0 is the common case, we special case it.
- PointerType *&Entry =
- AddressSpace == 0
- ? CImpl->PointerTypes[nullptr]
- : CImpl->ASPointerTypes[std::make_pair(nullptr, AddressSpace)];
+ PointerType *&Entry = AddressSpace == 0 ? CImpl->AS0PointerType
+ : CImpl->PointerTypes[AddressSpace];
if (!Entry)
Entry = new (CImpl->Alloc) PointerType(C, AddressSpace);
return Entry;
}
-PointerType::PointerType(Type *E, unsigned AddrSpace)
- : Type(E->getContext(), PointerTyID), PointeeTy(E) {
- ContainedTys = &PointeeTy;
- NumContainedTys = 1;
- setSubclassData(AddrSpace);
-}
-
PointerType::PointerType(LLVMContext &C, unsigned AddrSpace)
- : Type(C, PointerTyID), PointeeTy(nullptr) {
+ : Type(C, PointerTyID) {
setSubclassData(AddrSpace);
}
@@ -850,10 +890,14 @@ struct TargetTypeInfo {
static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) {
LLVMContext &C = Ty->getContext();
StringRef Name = Ty->getName();
- if (Name.startswith("spirv.")) {
+ if (Name.startswith("spirv."))
return TargetTypeInfo(Type::getInt8PtrTy(C, 0), TargetExtType::HasZeroInit,
TargetExtType::CanBeGlobal);
- }
+
+ // Opaque types in the AArch64 name space.
+ if (Name == "aarch64.svcount")
+ return TargetTypeInfo(ScalableVectorType::get(Type::getInt1Ty(C), 16));
+
return TargetTypeInfo(Type::getVoidTy(C));
}
diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp
index fa22065dcf36..41260a98e3ce 100644
--- a/llvm/lib/IR/Value.cpp
+++ b/llvm/lib/IR/Value.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DerivedUser.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -315,8 +316,12 @@ StringRef Value::getName() const {
}
void Value::setNameImpl(const Twine &NewName) {
+ bool NeedNewName =
+ !getContext().shouldDiscardValueNames() || isa<GlobalValue>(this);
+
// Fast-path: LLVMContext can be set to strip out non-GlobalValue names
- if (getContext().shouldDiscardValueNames() && !isa<GlobalValue>(this))
+ // and there is no need to delete the old name.
+ if (!NeedNewName && !hasName())
return;
// Fast path for common IRBuilder case of setName("") when there is no name.
@@ -324,7 +329,7 @@ void Value::setNameImpl(const Twine &NewName) {
return;
SmallString<256> NameData;
- StringRef NameRef = NewName.toStringRef(NameData);
+ StringRef NameRef = NeedNewName ? NewName.toStringRef(NameData) : "";
assert(NameRef.find_first_of(0) == StringRef::npos &&
"Null bytes are not allowed in names");
@@ -340,20 +345,17 @@ void Value::setNameImpl(const Twine &NewName) {
return; // Cannot set a name on this value (e.g. constant).
if (!ST) { // No symbol table to update? Just do the change.
- if (NameRef.empty()) {
- // Free the name for this value.
- destroyValueName();
- return;
- }
-
// NOTE: Could optimize for the case the name is shrinking to not deallocate
// then reallocated.
destroyValueName();
- // Create the new name.
- MallocAllocator Allocator;
- setValueName(ValueName::create(NameRef, Allocator));
- getValueName()->setValue(this);
+ if (!NameRef.empty()) {
+ // Create the new name.
+ assert(NeedNewName);
+ MallocAllocator Allocator;
+ setValueName(ValueName::create(NameRef, Allocator));
+ getValueName()->setValue(this);
+ }
return;
}
@@ -369,6 +371,7 @@ void Value::setNameImpl(const Twine &NewName) {
}
// Name is changing to something new.
+ assert(NeedNewName);
setValueName(ST->createValueName(NameRef, this));
}
@@ -737,7 +740,7 @@ const Value *Value::stripAndAccumulateConstantOffsets(
// Stop traversal if the pointer offset wouldn't fit in the bit-width
// provided by the Offset argument. This can happen due to AddrSpaceCast
// stripping.
- if (GEPOffset.getMinSignedBits() > BitWidth)
+ if (GEPOffset.getSignificantBits() > BitWidth)
return V;
// External Analysis can return a result higher/lower than the value
@@ -972,7 +975,7 @@ Align Value::getPointerAlignment(const DataLayout &DL) const {
if (auto *CstInt = dyn_cast_or_null<ConstantInt>(ConstantExpr::getPtrToInt(
const_cast<Constant *>(CstPtr), DL.getIntPtrType(getType()),
/*OnlyIfReduced=*/true))) {
- size_t TrailingZeros = CstInt->getValue().countTrailingZeros();
+ size_t TrailingZeros = CstInt->getValue().countr_zero();
// While the actual alignment may be large, elsewhere we have
// an arbitrary upper alignmet limit, so let's clamp to it.
return Align(TrailingZeros < Value::MaxAlignmentExponent
@@ -983,6 +986,78 @@ Align Value::getPointerAlignment(const DataLayout &DL) const {
return Align(1);
}
+static std::optional<int64_t>
+getOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, const DataLayout &DL) {
+ // Skip over the first indices.
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (unsigned i = 1; i != Idx; ++i, ++GTI)
+ /*skip along*/;
+
+ // Compute the offset implied by the rest of the indices.
+ int64_t Offset = 0;
+ for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
+ ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!OpC)
+ return std::nullopt;
+ if (OpC->isZero())
+ continue; // No offset.
+
+ // Handle struct indices, which add their field offset to the pointer.
+ if (StructType *STy = GTI.getStructTypeOrNull()) {
+ Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ continue;
+ }
+
+ // Otherwise, we have a sequential type like an array or fixed-length
+ // vector. Multiply the index by the ElementSize.
+ TypeSize Size = DL.getTypeAllocSize(GTI.getIndexedType());
+ if (Size.isScalable())
+ return std::nullopt;
+ Offset += Size.getFixedValue() * OpC->getSExtValue();
+ }
+
+ return Offset;
+}
+
+std::optional<int64_t> Value::getPointerOffsetFrom(const Value *Other,
+ const DataLayout &DL) const {
+ const Value *Ptr1 = Other;
+ const Value *Ptr2 = this;
+ APInt Offset1(DL.getIndexTypeSizeInBits(Ptr1->getType()), 0);
+ APInt Offset2(DL.getIndexTypeSizeInBits(Ptr2->getType()), 0);
+ Ptr1 = Ptr1->stripAndAccumulateConstantOffsets(DL, Offset1, true);
+ Ptr2 = Ptr2->stripAndAccumulateConstantOffsets(DL, Offset2, true);
+
+ // Handle the trivial case first.
+ if (Ptr1 == Ptr2)
+ return Offset2.getSExtValue() - Offset1.getSExtValue();
+
+ const GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
+ const GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
+
+ // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
+ // base. After that base, they may have some number of common (and
+ // potentially variable) indices. After that they handle some constant
+ // offset, which determines their offset from each other. At this point, we
+ // handle no other case.
+ if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0) ||
+ GEP1->getSourceElementType() != GEP2->getSourceElementType())
+ return std::nullopt;
+
+ // Skip any common indices and track the GEP types.
+ unsigned Idx = 1;
+ for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
+ if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
+ break;
+
+ auto IOffset1 = getOffsetFromIndex(GEP1, Idx, DL);
+ auto IOffset2 = getOffsetFromIndex(GEP2, Idx, DL);
+ if (!IOffset1 || !IOffset2)
+ return std::nullopt;
+ return *IOffset2 - *IOffset1 + Offset2.getSExtValue() -
+ Offset1.getSExtValue();
+}
+
const Value *Value::DoPHITranslation(const BasicBlock *CurBB,
const BasicBlock *PredBB) const {
auto *PN = dyn_cast<PHINode>(this);
diff --git a/llvm/lib/IR/ValueSymbolTable.cpp b/llvm/lib/IR/ValueSymbolTable.cpp
index cf85a571f9a0..52f7ddcdc65a 100644
--- a/llvm/lib/IR/ValueSymbolTable.cpp
+++ b/llvm/lib/IR/ValueSymbolTable.cpp
@@ -12,7 +12,6 @@
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/Module.h"
@@ -22,6 +21,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <utility>
diff --git a/llvm/lib/IR/VectorBuilder.cpp b/llvm/lib/IR/VectorBuilder.cpp
index e7be7a98a593..c07bc0561fba 100644
--- a/llvm/lib/IR/VectorBuilder.cpp
+++ b/llvm/lib/IR/VectorBuilder.cpp
@@ -32,9 +32,7 @@ Module &VectorBuilder::getModule() const {
}
Value *VectorBuilder::getAllTrueMask() {
- auto *BoolTy = Builder.getInt1Ty();
- auto *MaskTy = VectorType::get(BoolTy, StaticVectorLength);
- return ConstantInt::getAllOnesValue(MaskTy);
+ return Builder.getAllOnesMask(StaticVectorLength);
}
Value &VectorBuilder::requestMask() {
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 83e42bc184ff..1408ce293ca6 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -38,6 +38,11 @@
// * A landingpad instruction must be the first non-PHI instruction in the
// block.
// * Landingpad instructions must be in a function with a personality function.
+// * Convergence control intrinsics are introduced in ConvergentOperations.rst.
+// The applied restrictions are too numerous to list here.
+// * The convergence entry intrinsic and the loop heart must be the first
+// non-PHI instruction in their respective block. This does not conflict with
+// the landing pads, since these two kinds cannot occur in the same block.
// * All other things that are tested by asserts spread about the code...
//
//===----------------------------------------------------------------------===//
@@ -48,6 +53,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -58,6 +64,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Argument.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -66,12 +73,14 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/CycleInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GCStrategy.h"
#include "llvm/IR/GlobalAlias.h"
@@ -85,6 +94,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsAArch64.h"
+#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/LLVMContext.h"
@@ -220,6 +230,8 @@ private:
AL->print(*OS);
}
+ void Write(Printable P) { *OS << P << '\n'; }
+
template <typename T> void Write(ArrayRef<T> Vs) {
for (const T &V : Vs)
Write(V);
@@ -317,6 +329,13 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
/// The current source language.
dwarf::SourceLanguage CurrentSourceLang = dwarf::DW_LANG_lo_user;
+ /// Whether the current function has convergencectrl operand bundles.
+ enum {
+ ControlledConvergence,
+ UncontrolledConvergence,
+ NoConvergence
+ } ConvergenceKind = NoConvergence;
+
/// Whether source was present on the first DIFile encountered in each CU.
DenseMap<const DICompileUnit *, bool> HasSourceDebugInfo;
@@ -328,6 +347,10 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
// terminators that indicate the unwind, used to detect cycles therein.
MapVector<Instruction *, Instruction *> SiblingFuncletInfo;
+ /// Cache which blocks are in which funclet, if an EH funclet personality is
+ /// in use. Otherwise empty.
+ DenseMap<BasicBlock *, ColorVector> BlockEHFuncletColors;
+
/// Cache of constants visited in search of ConstantExprs.
SmallPtrSet<const Constant *, 32> ConstantExprVisited;
@@ -392,6 +415,8 @@ public:
// FIXME: We strip const here because the inst visitor strips const.
visit(const_cast<Function &>(F));
verifySiblingFuncletUnwinds();
+ if (ConvergenceKind == ControlledConvergence)
+ verifyConvergenceControl(const_cast<Function &>(F));
InstsInThisBlock.clear();
DebugFnArgs.clear();
LandingPadResultTy = nullptr;
@@ -399,6 +424,7 @@ public:
SiblingFuncletInfo.clear();
verifyNoAliasScopeDecl();
NoAliasScopeDecls.clear();
+ ConvergenceKind = NoConvergence;
return !Broken;
}
@@ -467,6 +493,8 @@ private:
void visitModuleFlagCGProfileEntry(const MDOperand &MDO);
void visitFunction(const Function &F);
void visitBasicBlock(BasicBlock &BB);
+ void verifyRangeMetadata(const Value &V, const MDNode *Range, Type *Ty,
+ bool IsAbsoluteSymbol);
void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty);
void visitDereferenceableMetadata(Instruction &I, MDNode *MD);
void visitProfMetadata(Instruction &I, MDNode *MD);
@@ -572,6 +600,7 @@ private:
void verifyStatepoint(const CallBase &Call);
void verifyFrameRecoverIndices();
void verifySiblingFuncletUnwinds();
+ void verifyConvergenceControl(Function &F);
void verifyFragmentExpression(const DbgVariableIntrinsic &I);
template <typename ValueOrMetadata>
@@ -653,7 +682,37 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) {
Check(A->value() <= Value::MaximumAlignment,
"huge alignment values are unsupported", GO);
}
+
+ if (const MDNode *Associated =
+ GO->getMetadata(LLVMContext::MD_associated)) {
+ Check(Associated->getNumOperands() == 1,
+ "associated metadata must have one operand", &GV, Associated);
+ const Metadata *Op = Associated->getOperand(0).get();
+ Check(Op, "associated metadata must have a global value", GO, Associated);
+
+ const auto *VM = dyn_cast_or_null<ValueAsMetadata>(Op);
+ Check(VM, "associated metadata must be ValueAsMetadata", GO, Associated);
+ if (VM) {
+ Check(isa<PointerType>(VM->getValue()->getType()),
+ "associated value must be pointer typed", GV, Associated);
+
+ const Value *Stripped = VM->getValue()->stripPointerCastsAndAliases();
+ Check(isa<GlobalObject>(Stripped) || isa<Constant>(Stripped),
+ "associated metadata must point to a GlobalObject", GO, Stripped);
+ Check(Stripped != GO,
+ "global values should not associate to themselves", GO,
+ Associated);
+ }
+ }
+
+ // FIXME: Why is getMetadata on GlobalValue protected?
+ if (const MDNode *AbsoluteSymbol =
+ GO->getMetadata(LLVMContext::MD_absolute_symbol)) {
+ verifyRangeMetadata(*GO, AbsoluteSymbol, DL.getIntPtrType(GO->getType()),
+ true);
+ }
}
+
Check(!GV.hasAppendingLinkage() || isa<GlobalVariable>(GV),
"Only global variables can have appending linkage!", &GV);
@@ -748,10 +807,8 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
"the third field of the element type is mandatory, "
"specify ptr null to migrate from the obsoleted 2-field form");
Type *ETy = STy->getTypeAtIndex(2);
- Type *Int8Ty = Type::getInt8Ty(ETy->getContext());
- Check(ETy->isPointerTy() &&
- cast<PointerType>(ETy)->isOpaqueOrPointeeTypeMatches(Int8Ty),
- "wrong type for intrinsic global variable", &GV);
+ Check(ETy->isPointerTy(), "wrong type for intrinsic global variable",
+ &GV);
}
}
@@ -801,9 +858,11 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
Check(!isa<ScalableVectorType>(GV.getValueType()),
"Globals cannot contain scalable vectors", &GV);
- if (auto *STy = dyn_cast<StructType>(GV.getValueType()))
- Check(!STy->containsScalableVectorType(),
+ if (auto *STy = dyn_cast<StructType>(GV.getValueType())) {
+ SmallPtrSet<Type *, 4> Visited;
+ Check(!STy->containsScalableVectorType(&Visited),
"Globals cannot contain scalable vectors", &GV);
+ }
// Check if it's a target extension type that disallows being used as a
// global.
@@ -1048,8 +1107,8 @@ void Verifier::visitDISubrange(const DISubrange &N) {
isa<DIVariable>(CBound) || isa<DIExpression>(CBound),
"Count must be signed constant or DIVariable or DIExpression", &N);
auto Count = N.getCount();
- CheckDI(!Count || !Count.is<ConstantInt *>() ||
- Count.get<ConstantInt *>()->getSExtValue() >= -1,
+ CheckDI(!Count || !isa<ConstantInt *>(Count) ||
+ cast<ConstantInt *>(Count)->getSExtValue() >= -1,
"invalid subrange count", &N);
auto *LBound = N.getRawLowerBound();
CheckDI(!LBound || isa<ConstantAsMetadata>(LBound) ||
@@ -1354,9 +1413,11 @@ void Verifier::visitDISubprogram(const DISubprogram &N) {
auto *Node = dyn_cast<MDTuple>(RawNode);
CheckDI(Node, "invalid retained nodes list", &N, RawNode);
for (Metadata *Op : Node->operands()) {
- CheckDI(Op && (isa<DILocalVariable>(Op) || isa<DILabel>(Op)),
- "invalid retained nodes, expected DILocalVariable or DILabel", &N,
- Node, Op);
+ CheckDI(Op && (isa<DILocalVariable>(Op) || isa<DILabel>(Op) ||
+ isa<DIImportedEntity>(Op)),
+ "invalid retained nodes, expected DILocalVariable, DILabel or "
+ "DIImportedEntity",
+ &N, Node, Op);
}
}
CheckDI(!hasConflictingReferenceFlags(N.getFlags()),
@@ -1373,6 +1434,8 @@ void Verifier::visitDISubprogram(const DISubprogram &N) {
} else {
// Subprogram declarations (part of the type hierarchy).
CheckDI(!Unit, "subprogram declarations must not have a compile unit", &N);
+ CheckDI(!N.getRawDeclaration(),
+ "subprogram declaration must not have a declaration field");
}
if (auto *RawThrownTypes = N.getRawThrownTypes()) {
@@ -1875,7 +1938,7 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
}
}
- if (PointerType *PTy = dyn_cast<PointerType>(Ty)) {
+ if (isa<PointerType>(Ty)) {
if (Attrs.hasAttribute(Attribute::ByVal)) {
if (Attrs.hasAttribute(Attribute::Alignment)) {
Align AttrAlign = Attrs.getAlignment().valueOrOne();
@@ -1902,38 +1965,14 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
Check(Attrs.getPreallocatedType()->isSized(&Visited),
"Attribute 'preallocated' does not support unsized types!", V);
}
- if (!PTy->isOpaque()) {
- if (!isa<PointerType>(PTy->getNonOpaquePointerElementType()))
- Check(!Attrs.hasAttribute(Attribute::SwiftError),
- "Attribute 'swifterror' only applies to parameters "
- "with pointer to pointer type!",
- V);
- if (Attrs.hasAttribute(Attribute::ByRef)) {
- Check(Attrs.getByRefType() == PTy->getNonOpaquePointerElementType(),
- "Attribute 'byref' type does not match parameter!", V);
- }
-
- if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) {
- Check(Attrs.getByValType() == PTy->getNonOpaquePointerElementType(),
- "Attribute 'byval' type does not match parameter!", V);
- }
-
- if (Attrs.hasAttribute(Attribute::Preallocated)) {
- Check(Attrs.getPreallocatedType() ==
- PTy->getNonOpaquePointerElementType(),
- "Attribute 'preallocated' type does not match parameter!", V);
- }
-
- if (Attrs.hasAttribute(Attribute::InAlloca)) {
- Check(Attrs.getInAllocaType() == PTy->getNonOpaquePointerElementType(),
- "Attribute 'inalloca' type does not match parameter!", V);
- }
+ }
- if (Attrs.hasAttribute(Attribute::ElementType)) {
- Check(Attrs.getElementType() == PTy->getNonOpaquePointerElementType(),
- "Attribute 'elementtype' type does not match parameter!", V);
- }
- }
+ if (Attrs.hasAttribute(Attribute::NoFPClass)) {
+ uint64_t Val = Attrs.getAttribute(Attribute::NoFPClass).getValueAsInt();
+ Check(Val != 0, "Attribute 'nofpclass' must have at least one test bit set",
+ V);
+ Check((Val & ~static_cast<unsigned>(fcAllFlags)) == 0,
+ "Invalid value for 'nofpclass' test mask", V);
}
}
@@ -2142,10 +2181,13 @@ void Verifier::verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs,
unsigned VScaleMin = Attrs.getFnAttrs().getVScaleRangeMin();
if (VScaleMin == 0)
CheckFailed("'vscale_range' minimum must be greater than 0", V);
-
+ else if (!isPowerOf2_32(VScaleMin))
+ CheckFailed("'vscale_range' minimum must be power-of-two value", V);
std::optional<unsigned> VScaleMax = Attrs.getFnAttrs().getVScaleRangeMax();
if (VScaleMax && VScaleMin > VScaleMax)
CheckFailed("'vscale_range' minimum cannot be greater than maximum", V);
+ else if (VScaleMax && !isPowerOf2_32(*VScaleMax))
+ CheckFailed("'vscale_range' maximum must be power-of-two value", V);
}
if (Attrs.hasFnAttr("frame-pointer")) {
@@ -2484,6 +2526,118 @@ void Verifier::verifySiblingFuncletUnwinds() {
}
}
+void Verifier::verifyConvergenceControl(Function &F) {
+ DenseMap<BasicBlock *, SmallVector<CallBase *, 8>> LiveTokenMap;
+ DenseMap<const Cycle *, const CallBase *> CycleHearts;
+
+ // Just like the DominatorTree, compute the CycleInfo locally so that we
+ // can run the verifier outside of a pass manager and we don't rely on
+ // potentially out-dated analysis results.
+ CycleInfo CI;
+ CI.compute(F);
+
+ auto checkBundle = [&](OperandBundleUse &Bundle, CallBase *CB,
+ SmallVectorImpl<CallBase *> &LiveTokens) {
+ Check(Bundle.Inputs.size() == 1 && Bundle.Inputs[0]->getType()->isTokenTy(),
+ "The 'convergencectrl' bundle requires exactly one token use.", CB);
+
+ Value *Token = Bundle.Inputs[0].get();
+ auto *Def = dyn_cast<CallBase>(Token);
+ Check(Def != nullptr,
+ "Convergence control tokens can only be produced by call "
+ "instructions.",
+ Token);
+
+ Check(llvm::is_contained(LiveTokens, Token),
+ "Convergence region is not well-nested.", Token, CB);
+
+ while (LiveTokens.back() != Token)
+ LiveTokens.pop_back();
+
+ // Check static rules about cycles.
+ auto *BB = CB->getParent();
+ auto *BBCycle = CI.getCycle(BB);
+ if (!BBCycle)
+ return;
+
+ BasicBlock *DefBB = Def->getParent();
+ if (DefBB == BB || BBCycle->contains(DefBB)) {
+ // degenerate occurrence of a loop intrinsic
+ return;
+ }
+
+ auto *II = dyn_cast<IntrinsicInst>(CB);
+ Check(II &&
+ II->getIntrinsicID() == Intrinsic::experimental_convergence_loop,
+ "Convergence token used by an instruction other than "
+ "llvm.experimental.convergence.loop in a cycle that does "
+ "not contain the token's definition.",
+ CB, CI.print(BBCycle));
+
+ while (true) {
+ auto *Parent = BBCycle->getParentCycle();
+ if (!Parent || Parent->contains(DefBB))
+ break;
+ BBCycle = Parent;
+ };
+
+ Check(BBCycle->isReducible() && BB == BBCycle->getHeader(),
+ "Cycle heart must dominate all blocks in the cycle.", CB, BB,
+ CI.print(BBCycle));
+ Check(!CycleHearts.count(BBCycle),
+ "Two static convergence token uses in a cycle that does "
+ "not contain either token's definition.",
+ CB, CycleHearts[BBCycle], CI.print(BBCycle));
+ CycleHearts[BBCycle] = CB;
+ };
+
+ ReversePostOrderTraversal<Function *> RPOT(&F);
+ SmallVector<CallBase *, 8> LiveTokens;
+ for (BasicBlock *BB : RPOT) {
+ LiveTokens.clear();
+ auto LTIt = LiveTokenMap.find(BB);
+ if (LTIt != LiveTokenMap.end()) {
+ LiveTokens = std::move(LTIt->second);
+ LiveTokenMap.erase(LTIt);
+ }
+
+ for (Instruction &I : *BB) {
+ CallBase *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ continue;
+
+ auto Bundle = CB->getOperandBundle(LLVMContext::OB_convergencectrl);
+ if (Bundle)
+ checkBundle(*Bundle, CB, LiveTokens);
+
+ if (CB->getType()->isTokenTy())
+ LiveTokens.push_back(CB);
+ }
+
+ // Propagate token liveness
+ for (BasicBlock *Succ : successors(BB)) {
+ DomTreeNode *SuccNode = DT.getNode(Succ);
+ LTIt = LiveTokenMap.find(Succ);
+ if (LTIt == LiveTokenMap.end()) {
+ // We're the first predecessor: all tokens which dominate the
+ // successor are live for now.
+ LTIt = LiveTokenMap.try_emplace(Succ).first;
+ for (CallBase *LiveToken : LiveTokens) {
+ if (!DT.dominates(DT.getNode(LiveToken->getParent()), SuccNode))
+ break;
+ LTIt->second.push_back(LiveToken);
+ }
+ } else {
+ // Compute the intersection of live tokens.
+ auto It = llvm::partition(LTIt->second, [&LiveTokens](CallBase *Token) {
+ return llvm::is_contained(LiveTokens, Token);
+ });
+ LTIt->second.erase(It, LTIt->second.end());
+ }
+ }
+ }
+}
+
// visitFunction - Verify that a function is ok.
//
void Verifier::visitFunction(const Function &F) {
@@ -2540,6 +2694,8 @@ void Verifier::visitFunction(const Function &F) {
}
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
Check(F.getReturnType()->isVoidTy(),
"Calling convention requires void return type", &F);
[[fallthrough]];
@@ -2630,6 +2786,9 @@ void Verifier::visitFunction(const Function &F) {
F.getParent(), Per, Per->getParent());
}
+ // EH funclet coloring can be expensive, recompute on-demand
+ BlockEHFuncletColors.clear();
+
if (F.isMaterializable()) {
// Function has a body somewhere we can't see.
Check(MDs.empty(), "unmaterialized function cannot have metadata", &F,
@@ -3207,14 +3366,23 @@ void Verifier::visitPHINode(PHINode &PN) {
visitInstruction(PN);
}
+static bool isControlledConvergent(const CallBase &Call) {
+ if (Call.getOperandBundle(LLVMContext::OB_convergencectrl))
+ return true;
+ if (const auto *F = dyn_cast<Function>(Call.getCalledOperand())) {
+ switch (F->getIntrinsicID()) {
+ case Intrinsic::experimental_convergence_anchor:
+ case Intrinsic::experimental_convergence_entry:
+ case Intrinsic::experimental_convergence_loop:
+ return true;
+ }
+ }
+ return false;
+}
+
void Verifier::visitCallBase(CallBase &Call) {
Check(Call.getCalledOperand()->getType()->isPointerTy(),
"Called function must be a pointer!", Call);
- PointerType *FPTy = cast<PointerType>(Call.getCalledOperand()->getType());
-
- Check(FPTy->isOpaqueOrPointeeTypeMatches(Call.getFunctionType()),
- "Called function is not the same type as the call!", Call);
-
FunctionType *FTy = Call.getFunctionType();
// Verify that the correct number of arguments are being passed
@@ -3243,6 +3411,15 @@ void Verifier::visitCallBase(CallBase &Call) {
Check(Callee->getValueType() == FTy,
"Intrinsic called with incompatible signature", Call);
+ // Disallow calls to functions with the amdgpu_cs_chain[_preserve] calling
+ // convention.
+ auto CC = Call.getCallingConv();
+ Check(CC != CallingConv::AMDGPU_CS_Chain &&
+ CC != CallingConv::AMDGPU_CS_ChainPreserve,
+ "Direct calls to amdgpu_cs_chain/amdgpu_cs_chain_preserve functions "
+ "not allowed. Please use the @llvm.amdgpu.cs.chain intrinsic instead.",
+ Call);
+
auto VerifyTypeAlign = [&](Type *Ty, const Twine &Message) {
if (!Ty->isSized())
return;
@@ -3496,6 +3673,23 @@ void Verifier::visitCallBase(CallBase &Call) {
if (Call.isInlineAsm())
verifyInlineAsmCall(Call);
+ if (isControlledConvergent(Call)) {
+ Check(Call.isConvergent(),
+ "Expected convergent attribute on a controlled convergent call.",
+ Call);
+ Check(ConvergenceKind != UncontrolledConvergence,
+ "Cannot mix controlled and uncontrolled convergence in the same "
+ "function.",
+ Call);
+ ConvergenceKind = ControlledConvergence;
+ } else if (Call.isConvergent()) {
+ Check(ConvergenceKind != ControlledConvergence,
+ "Cannot mix controlled and uncontrolled convergence in the same "
+ "function.",
+ Call);
+ ConvergenceKind = UncontrolledConvergence;
+ }
+
visitInstruction(Call);
}
@@ -3796,6 +3990,14 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
"GEP base pointer is not a vector or a vector of pointers", &GEP);
Check(GEP.getSourceElementType()->isSized(), "GEP into unsized type!", &GEP);
+ if (auto *STy = dyn_cast<StructType>(GEP.getSourceElementType())) {
+ SmallPtrSet<Type *, 4> Visited;
+ Check(!STy->containsScalableVectorType(&Visited),
+ "getelementptr cannot target structure that contains scalable vector"
+ "type",
+ &GEP);
+ }
+
SmallVector<Value *, 16> Idxs(GEP.indices());
Check(
all_of(Idxs, [](Value *V) { return V->getType()->isIntOrIntVectorTy(); }),
@@ -3839,10 +4041,10 @@ static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
return A.getUpper() == B.getLower() || A.getLower() == B.getUpper();
}
-void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
- assert(Range && Range == I.getMetadata(LLVMContext::MD_range) &&
- "precondition violation");
-
+/// Verify !range and !absolute_symbol metadata. These have the same
+/// restrictions, except !absolute_symbol allows the full set.
+void Verifier::verifyRangeMetadata(const Value &I, const MDNode *Range,
+ Type *Ty, bool IsAbsoluteSymbol) {
unsigned NumOperands = Range->getNumOperands();
Check(NumOperands % 2 == 0, "Unfinished range!", Range);
unsigned NumRanges = NumOperands / 2;
@@ -3856,13 +4058,20 @@ void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
ConstantInt *High =
mdconst::dyn_extract<ConstantInt>(Range->getOperand(2 * i + 1));
Check(High, "The upper limit must be an integer!", High);
- Check(High->getType() == Low->getType() && High->getType() == Ty,
+ Check(High->getType() == Low->getType() &&
+ High->getType() == Ty->getScalarType(),
"Range types must match instruction type!", &I);
APInt HighV = High->getValue();
APInt LowV = Low->getValue();
+
+ // ConstantRange asserts if the ranges are the same except for the min/max
+ // value. Leave the cases it tolerates for the empty range error below.
+ Check(LowV != HighV || LowV.isMaxValue() || LowV.isMinValue(),
+ "The upper and lower limits cannot be the same value", &I);
+
ConstantRange CurRange(LowV, HighV);
- Check(!CurRange.isEmptySet() && !CurRange.isFullSet(),
+ Check(!CurRange.isEmptySet() && (IsAbsoluteSymbol || !CurRange.isFullSet()),
"Range must not be empty!", Range);
if (i != 0) {
Check(CurRange.intersectWith(LastRange).isEmptySet(),
@@ -3887,6 +4096,12 @@ void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
}
}
+void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
+ assert(Range && Range == I.getMetadata(LLVMContext::MD_range) &&
+ "precondition violation");
+ verifyRangeMetadata(I, Range, Ty, false);
+}
+
void Verifier::checkAtomicMemAccessSize(Type *Ty, const Instruction *I) {
unsigned Size = DL.getTypeSizeInBits(Ty);
Check(Size >= 8, "atomic memory access' size must be byte-sized", Ty, I);
@@ -3924,8 +4139,6 @@ void Verifier::visitStoreInst(StoreInst &SI) {
PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
Check(PTy, "Store operand must be a pointer.", &SI);
Type *ElTy = SI.getOperand(0)->getType();
- Check(PTy->isOpaqueOrPointeeTypeMatches(ElTy),
- "Stored value type does not match pointer operand type!", &SI, ElTy);
if (MaybeAlign A = SI.getAlign()) {
Check(A->value() <= Value::MaximumAlignment,
"huge alignment values are unsupported", &SI);
@@ -4637,8 +4850,15 @@ void Verifier::visitAnnotationMetadata(MDNode *Annotation) {
Check(isa<MDTuple>(Annotation), "annotation must be a tuple");
Check(Annotation->getNumOperands() >= 1,
"annotation must have at least one operand");
- for (const MDOperand &Op : Annotation->operands())
- Check(isa<MDString>(Op.get()), "operands must be strings");
+ for (const MDOperand &Op : Annotation->operands()) {
+ bool TupleOfStrings =
+ isa<MDTuple>(Op.get()) &&
+ all_of(cast<MDTuple>(Op)->operands(), [](auto &Annotation) {
+ return isa<MDString>(Annotation.get());
+ });
+ Check(isa<MDString>(Op.get()) || TupleOfStrings,
+ "operands must be a string or a tuple of strings");
+ }
}
void Verifier::visitAliasScopeMetadata(const MDNode *MD) {
@@ -5038,7 +5258,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
}
case Intrinsic::is_fpclass: {
const ConstantInt *TestMask = cast<ConstantInt>(Call.getOperand(1));
- Check((TestMask->getZExtValue() & ~fcAllFlags) == 0,
+ Check((TestMask->getZExtValue() & ~static_cast<unsigned>(fcAllFlags)) == 0,
"unsupported bits for llvm.is.fpclass test mask");
break;
}
@@ -5076,9 +5296,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"invalid llvm.dbg.declare intrinsic call 1", Call);
visitDbgIntrinsic("declare", cast<DbgVariableIntrinsic>(Call));
break;
- case Intrinsic::dbg_addr: // llvm.dbg.addr
- visitDbgIntrinsic("addr", cast<DbgVariableIntrinsic>(Call));
- break;
case Intrinsic::dbg_value: // llvm.dbg.value
visitDbgIntrinsic("value", cast<DbgVariableIntrinsic>(Call));
break;
@@ -5414,11 +5631,16 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Call);
break;
}
+ case Intrinsic::experimental_get_vector_length: {
+ ConstantInt *VF = cast<ConstantInt>(Call.getArgOperand(1));
+ Check(!VF->isNegative() && !VF->isZero(),
+ "get_vector_length: VF must be positive", Call);
+ break;
+ }
case Intrinsic::masked_load: {
Check(Call.getType()->isVectorTy(), "masked_load: must return a vector",
Call);
- Value *Ptr = Call.getArgOperand(0);
ConstantInt *Alignment = cast<ConstantInt>(Call.getArgOperand(1));
Value *Mask = Call.getArgOperand(2);
Value *PassThru = Call.getArgOperand(3);
@@ -5426,10 +5648,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Call);
Check(Alignment->getValue().isPowerOf2(),
"masked_load: alignment must be a power of 2", Call);
-
- PointerType *PtrTy = cast<PointerType>(Ptr->getType());
- Check(PtrTy->isOpaqueOrPointeeTypeMatches(Call.getType()),
- "masked_load: return must match pointer type", Call);
Check(PassThru->getType() == Call.getType(),
"masked_load: pass through and return type must match", Call);
Check(cast<VectorType>(Mask->getType())->getElementCount() ==
@@ -5439,17 +5657,12 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
}
case Intrinsic::masked_store: {
Value *Val = Call.getArgOperand(0);
- Value *Ptr = Call.getArgOperand(1);
ConstantInt *Alignment = cast<ConstantInt>(Call.getArgOperand(2));
Value *Mask = Call.getArgOperand(3);
Check(Mask->getType()->isVectorTy(), "masked_store: mask must be vector",
Call);
Check(Alignment->getValue().isPowerOf2(),
"masked_store: alignment must be a power of 2", Call);
-
- PointerType *PtrTy = cast<PointerType>(Ptr->getType());
- Check(PtrTy->isOpaqueOrPointeeTypeMatches(Val->getType()),
- "masked_store: storee must match pointer type", Call);
Check(cast<VectorType>(Mask->getType())->getElementCount() ==
cast<VectorType>(Val->getType())->getElementCount(),
"masked_store: vector mask must be same length as value", Call);
@@ -5600,15 +5813,28 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
Type *Op0ElemTy = nullptr;
Type *Op1ElemTy = nullptr;
switch (ID) {
- case Intrinsic::matrix_multiply:
+ case Intrinsic::matrix_multiply: {
NumRows = cast<ConstantInt>(Call.getArgOperand(2));
+ ConstantInt *N = cast<ConstantInt>(Call.getArgOperand(3));
NumColumns = cast<ConstantInt>(Call.getArgOperand(4));
+ Check(cast<FixedVectorType>(Call.getArgOperand(0)->getType())
+ ->getNumElements() ==
+ NumRows->getZExtValue() * N->getZExtValue(),
+ "First argument of a matrix operation does not match specified "
+ "shape!");
+ Check(cast<FixedVectorType>(Call.getArgOperand(1)->getType())
+ ->getNumElements() ==
+ N->getZExtValue() * NumColumns->getZExtValue(),
+ "Second argument of a matrix operation does not match specified "
+ "shape!");
+
ResultTy = cast<VectorType>(Call.getType());
Op0ElemTy =
cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
Op1ElemTy =
cast<VectorType>(Call.getArgOperand(1)->getType())->getElementType();
break;
+ }
case Intrinsic::matrix_transpose:
NumRows = cast<ConstantInt>(Call.getArgOperand(1));
NumColumns = cast<ConstantInt>(Call.getArgOperand(2));
@@ -5621,11 +5847,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
NumRows = cast<ConstantInt>(Call.getArgOperand(3));
NumColumns = cast<ConstantInt>(Call.getArgOperand(4));
ResultTy = cast<VectorType>(Call.getType());
-
- PointerType *Op0PtrTy =
- cast<PointerType>(Call.getArgOperand(0)->getType());
- if (!Op0PtrTy->isOpaque())
- Op0ElemTy = Op0PtrTy->getNonOpaquePointerElementType();
break;
}
case Intrinsic::matrix_column_major_store: {
@@ -5635,11 +5856,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
ResultTy = cast<VectorType>(Call.getArgOperand(0)->getType());
Op0ElemTy =
cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
-
- PointerType *Op1PtrTy =
- cast<PointerType>(Call.getArgOperand(1)->getType());
- if (!Op1PtrTy->isOpaque())
- Op1ElemTy = Op1PtrTy->getNonOpaquePointerElementType();
break;
}
default:
@@ -5794,7 +6010,102 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"isdata argument to llvm.aarch64.prefetch must be 0 or 1", Call);
break;
}
+ case Intrinsic::callbr_landingpad: {
+ const auto *CBR = dyn_cast<CallBrInst>(Call.getOperand(0));
+ Check(CBR, "intrinstic requires callbr operand", &Call);
+ if (!CBR)
+ break;
+
+ const BasicBlock *LandingPadBB = Call.getParent();
+ const BasicBlock *PredBB = LandingPadBB->getUniquePredecessor();
+ if (!PredBB) {
+ CheckFailed("Intrinsic in block must have 1 unique predecessor", &Call);
+ break;
+ }
+ if (!isa<CallBrInst>(PredBB->getTerminator())) {
+ CheckFailed("Intrinsic must have corresponding callbr in predecessor",
+ &Call);
+ break;
+ }
+ Check(llvm::any_of(CBR->getIndirectDests(),
+ [LandingPadBB](const BasicBlock *IndDest) {
+ return IndDest == LandingPadBB;
+ }),
+ "Intrinsic's corresponding callbr must have intrinsic's parent basic "
+ "block in indirect destination list",
+ &Call);
+ const Instruction &First = *LandingPadBB->begin();
+ Check(&First == &Call, "No other instructions may proceed intrinsic",
+ &Call);
+ break;
+ }
+ case Intrinsic::amdgcn_cs_chain: {
+ auto CallerCC = Call.getCaller()->getCallingConv();
+ switch (CallerCC) {
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
+ break;
+ default:
+ CheckFailed("Intrinsic can only be used from functions with the "
+ "amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve "
+ "calling conventions",
+ &Call);
+ break;
+ }
+ break;
+ }
+ case Intrinsic::experimental_convergence_entry:
+ Check(Call.getFunction()->isConvergent(),
+ "Entry intrinsic can occur only in a convergent function.", &Call);
+ Check(Call.getParent()->isEntryBlock(),
+ "Entry intrinsic must occur in the entry block.", &Call);
+ Check(Call.getParent()->getFirstNonPHI() == &Call,
+ "Entry intrinsic must occur at the start of the basic block.", &Call);
+ LLVM_FALLTHROUGH;
+ case Intrinsic::experimental_convergence_anchor:
+ Check(!Call.getOperandBundle(LLVMContext::OB_convergencectrl),
+ "Entry or anchor intrinsic must not have a convergencectrl bundle.",
+ &Call);
+ break;
+ case Intrinsic::experimental_convergence_loop:
+ Check(Call.getOperandBundle(LLVMContext::OB_convergencectrl),
+ "Loop intrinsic must have a convergencectrl bundle.", &Call);
+ Check(Call.getParent()->getFirstNonPHI() == &Call,
+ "Loop intrinsic must occur at the start of the basic block.", &Call);
+ break;
};
+
+ // Verify that there aren't any unmediated control transfers between funclets.
+ if (IntrinsicInst::mayLowerToFunctionCall(ID)) {
+ Function *F = Call.getParent()->getParent();
+ if (F->hasPersonalityFn() &&
+ isScopedEHPersonality(classifyEHPersonality(F->getPersonalityFn()))) {
+ // Run EH funclet coloring on-demand and cache results for other intrinsic
+ // calls in this function
+ if (BlockEHFuncletColors.empty())
+ BlockEHFuncletColors = colorEHFunclets(*F);
+
+ // Check for catch-/cleanup-pad in first funclet block
+ bool InEHFunclet = false;
+ BasicBlock *CallBB = Call.getParent();
+ const ColorVector &CV = BlockEHFuncletColors.find(CallBB)->second;
+ assert(CV.size() > 0 && "Uncolored block");
+ for (BasicBlock *ColorFirstBB : CV)
+ if (dyn_cast_or_null<FuncletPadInst>(ColorFirstBB->getFirstNonPHI()))
+ InEHFunclet = true;
+
+ // Check for funclet operand bundle
+ bool HasToken = false;
+ for (unsigned I = 0, E = Call.getNumOperandBundles(); I != E; ++I)
+ if (Call.getOperandBundleAt(I).getTagID() == LLVMContext::OB_funclet)
+ HasToken = true;
+
+ // This would cause silent code truncation in WinEHPrepare
+ if (InEHFunclet)
+ Check(HasToken, "Missing funclet token on intrinsic call", &Call);
+ }
+ }
}
/// Carefully grab the subprogram from a local scope.
@@ -5961,20 +6272,20 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
case Intrinsic::experimental_constrained_fptosi:
case Intrinsic::experimental_constrained_fptoui: {
Value *Operand = FPI.getArgOperand(0);
- uint64_t NumSrcElem = 0;
+ ElementCount SrcEC;
Check(Operand->getType()->isFPOrFPVectorTy(),
"Intrinsic first argument must be floating point", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
- NumSrcElem = cast<FixedVectorType>(OperandT)->getNumElements();
+ SrcEC = cast<VectorType>(OperandT)->getElementCount();
}
Operand = &FPI;
- Check((NumSrcElem > 0) == Operand->getType()->isVectorTy(),
+ Check(SrcEC.isNonZero() == Operand->getType()->isVectorTy(),
"Intrinsic first argument and result disagree on vector use", &FPI);
Check(Operand->getType()->isIntOrIntVectorTy(),
"Intrinsic result must be an integer", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
- Check(NumSrcElem == cast<FixedVectorType>(OperandT)->getNumElements(),
+ Check(SrcEC == cast<VectorType>(OperandT)->getElementCount(),
"Intrinsic first argument and result vector lengths must be equal",
&FPI);
}
@@ -5984,20 +6295,20 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
case Intrinsic::experimental_constrained_sitofp:
case Intrinsic::experimental_constrained_uitofp: {
Value *Operand = FPI.getArgOperand(0);
- uint64_t NumSrcElem = 0;
+ ElementCount SrcEC;
Check(Operand->getType()->isIntOrIntVectorTy(),
"Intrinsic first argument must be integer", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
- NumSrcElem = cast<FixedVectorType>(OperandT)->getNumElements();
+ SrcEC = cast<VectorType>(OperandT)->getElementCount();
}
Operand = &FPI;
- Check((NumSrcElem > 0) == Operand->getType()->isVectorTy(),
+ Check(SrcEC.isNonZero() == Operand->getType()->isVectorTy(),
"Intrinsic first argument and result disagree on vector use", &FPI);
Check(Operand->getType()->isFPOrFPVectorTy(),
"Intrinsic result must be a floating point", &FPI);
if (auto *OperandT = dyn_cast<VectorType>(Operand->getType())) {
- Check(NumSrcElem == cast<FixedVectorType>(OperandT)->getNumElements(),
+ Check(SrcEC == cast<VectorType>(OperandT)->getElementCount(),
"Intrinsic first argument and result vector lengths must be equal",
&FPI);
}
@@ -6016,8 +6327,8 @@ void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
Check(OperandTy->isVectorTy() == ResultTy->isVectorTy(),
"Intrinsic first argument and result disagree on vector use", &FPI);
if (OperandTy->isVectorTy()) {
- Check(cast<FixedVectorType>(OperandTy)->getNumElements() ==
- cast<FixedVectorType>(ResultTy)->getNumElements(),
+ Check(cast<VectorType>(OperandTy)->getElementCount() ==
+ cast<VectorType>(ResultTy)->getElementCount(),
"Intrinsic first argument and result vector lengths must be equal",
&FPI);
}
@@ -6221,7 +6532,17 @@ void Verifier::verifyNotEntryValue(const DbgVariableIntrinsic &I) {
if (!E || !E->isValid())
return;
- CheckDI(!E->isEntryValue(), "Entry values are only allowed in MIR", &I);
+ // We allow EntryValues for swift async arguments, as they have an
+ // ABI-guarantee to be turned into a specific register.
+ if (isa<ValueAsMetadata>(I.getRawLocation()))
+ if (auto *ArgLoc = dyn_cast_or_null<Argument>(I.getVariableLocationOp(0));
+ ArgLoc && ArgLoc->hasAttribute(Attribute::SwiftAsync))
+ return;
+
+ CheckDI(!E->isEntryValue(),
+ "Entry values are only allowed in MIR unless they target a "
+ "swiftasync Argument",
+ &I);
}
void Verifier::verifyCompileUnits() {
@@ -6680,6 +7001,9 @@ static bool isNewFormatTBAATypeNode(llvm::MDNode *Type) {
}
bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
+ CheckTBAA(MD->getNumOperands() > 0, "TBAA metadata cannot have 0 operands",
+ &I, MD);
+
CheckTBAA(isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
isa<VAArgInst>(I) || isa<AtomicRMWInst>(I) ||
isa<AtomicCmpXchgInst>(I),
diff --git a/llvm/lib/InterfaceStub/IFSHandler.cpp b/llvm/lib/InterfaceStub/IFSHandler.cpp
index 8bb01836fccb..aa5817dceed5 100644
--- a/llvm/lib/InterfaceStub/IFSHandler.cpp
+++ b/llvm/lib/InterfaceStub/IFSHandler.cpp
@@ -10,13 +10,13 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/InterfaceStub/IFSStub.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/YAMLTraits.h"
+#include "llvm/TargetParser/Triple.h"
#include <functional>
#include <optional>
@@ -193,8 +193,13 @@ Expected<std::unique_ptr<IFSStub>> ifs::readIFSFromBuffer(StringRef Buf) {
"IFS version " + Stub->IfsVersion.getAsString() + " is unsupported.",
std::make_error_code(std::errc::invalid_argument));
if (Stub->Target.ArchString) {
- Stub->Target.Arch =
+ uint16_t eMachine =
ELF::convertArchNameToEMachine(*Stub->Target.ArchString);
+ if (eMachine == ELF::EM_NONE)
+ return createStringError(
+ std::make_error_code(std::errc::invalid_argument),
+ "IFS arch '" + *Stub->Target.ArchString + "' is unsupported");
+ Stub->Target.Arch = eMachine;
}
return std::move(Stub);
}
@@ -301,6 +306,9 @@ IFSTarget ifs::parseTriple(StringRef TripleStr) {
case Triple::ArchType::x86_64:
RetTarget.Arch = (IFSArch)ELF::EM_X86_64;
break;
+ case Triple::ArchType::riscv64:
+ RetTarget.Arch = (IFSArch)ELF::EM_RISCV;
+ break;
default:
RetTarget.Arch = (IFSArch)ELF::EM_NONE;
}
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 1cd48adac3f0..6803d6ab1285 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -51,6 +51,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Transforms/Utils/SplitModule.h"
@@ -75,6 +76,13 @@ cl::opt<bool> EnableLTOInternalization(
cl::desc("Enable global value internalization in LTO"));
}
+/// Indicate we are linking with an allocator that supports hot/cold operator
+/// new interfaces.
+extern cl::opt<bool> SupportsHotColdNew;
+
+/// Enable MemProf context disambiguation for thin link.
+extern cl::opt<bool> EnableMemProfContextDisambiguation;
+
// Computes a unique hash for the Module considering the current list of
// export/import and other global analysis results.
// The hash is produced in \p Key.
@@ -166,22 +174,38 @@ void llvm::computeLTOCacheKey(
// imported symbols for each module may affect code generation and is
// sensitive to link order, so include that as well.
using ImportMapIteratorTy = FunctionImporter::ImportMapTy::const_iterator;
- std::vector<ImportMapIteratorTy> ImportModulesVector;
+ struct ImportModule {
+ ImportMapIteratorTy ModIt;
+ const ModuleSummaryIndex::ModuleInfo *ModInfo;
+
+ StringRef getIdentifier() const { return ModIt->getKey(); }
+ const FunctionImporter::FunctionsToImportTy &getFunctions() const {
+ return ModIt->second;
+ }
+
+ const ModuleHash &getHash() const { return ModInfo->second.second; }
+ uint64_t getId() const { return ModInfo->second.first; }
+ };
+
+ std::vector<ImportModule> ImportModulesVector;
ImportModulesVector.reserve(ImportList.size());
for (ImportMapIteratorTy It = ImportList.begin(); It != ImportList.end();
++It) {
- ImportModulesVector.push_back(It);
+ ImportModulesVector.push_back({It, Index.getModule(It->getKey())});
}
+ // Order using moduleId integer which is based on the order the module was
+ // added.
llvm::sort(ImportModulesVector,
- [](const ImportMapIteratorTy &Lhs, const ImportMapIteratorTy &Rhs)
- -> bool { return Lhs->getKey() < Rhs->getKey(); });
- for (const ImportMapIteratorTy &EntryIt : ImportModulesVector) {
- auto ModHash = Index.getModuleHash(EntryIt->first());
+ [](const ImportModule &Lhs, const ImportModule &Rhs) -> bool {
+ return Lhs.getId() < Rhs.getId();
+ });
+ for (const ImportModule &Entry : ImportModulesVector) {
+ auto ModHash = Entry.getHash();
Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
- AddUint64(EntryIt->second.size());
- for (auto &Fn : EntryIt->second)
+ AddUint64(Entry.getFunctions().size());
+ for (auto &Fn : Entry.getFunctions())
AddUint64(Fn);
}
@@ -251,9 +275,10 @@ void llvm::computeLTOCacheKey(
// Imported functions may introduce new uses of type identifier resolutions,
// so we need to collect their used resolutions as well.
- for (auto &ImpM : ImportList)
- for (auto &ImpF : ImpM.second) {
- GlobalValueSummary *S = Index.findSummaryInModule(ImpF, ImpM.first());
+ for (const ImportModule &ImpM : ImportModulesVector)
+ for (auto &ImpF : ImpM.getFunctions()) {
+ GlobalValueSummary *S =
+ Index.findSummaryInModule(ImpF, ImpM.getIdentifier());
AddUsedThings(S);
// If this is an alias, we also care about any types/etc. that the aliasee
// may reference.
@@ -421,39 +446,93 @@ void llvm::thinLTOResolvePrevailingInIndex(
recordNewLinkage, GUIDPreservedSymbols);
}
-static bool isWeakObjectWithRWAccess(GlobalValueSummary *GVS) {
- if (auto *VarSummary = dyn_cast<GlobalVarSummary>(GVS->getBaseObject()))
- return !VarSummary->maybeReadOnly() && !VarSummary->maybeWriteOnly() &&
- (VarSummary->linkage() == GlobalValue::WeakODRLinkage ||
- VarSummary->linkage() == GlobalValue::LinkOnceODRLinkage);
- return false;
-}
-
static void thinLTOInternalizeAndPromoteGUID(
ValueInfo VI, function_ref<bool(StringRef, ValueInfo)> isExported,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing) {
+ auto ExternallyVisibleCopies =
+ llvm::count_if(VI.getSummaryList(),
+ [](const std::unique_ptr<GlobalValueSummary> &Summary) {
+ return !GlobalValue::isLocalLinkage(Summary->linkage());
+ });
+
for (auto &S : VI.getSummaryList()) {
+ // First see if we need to promote an internal value because it is not
+ // exported.
if (isExported(S->modulePath(), VI)) {
if (GlobalValue::isLocalLinkage(S->linkage()))
S->setLinkage(GlobalValue::ExternalLinkage);
- } else if (EnableLTOInternalization &&
- // Ignore local and appending linkage values since the linker
- // doesn't resolve them.
- !GlobalValue::isLocalLinkage(S->linkage()) &&
- (!GlobalValue::isInterposableLinkage(S->linkage()) ||
- isPrevailing(VI.getGUID(), S.get())) &&
- S->linkage() != GlobalValue::AppendingLinkage &&
- // We can't internalize available_externally globals because this
- // can break function pointer equality.
- S->linkage() != GlobalValue::AvailableExternallyLinkage &&
- // Functions and read-only variables with linkonce_odr and
- // weak_odr linkage can be internalized. We can't internalize
- // linkonce_odr and weak_odr variables which are both modified
- // and read somewhere in the program because reads and writes
- // will become inconsistent.
- !isWeakObjectWithRWAccess(S.get()))
- S->setLinkage(GlobalValue::InternalLinkage);
+ continue;
+ }
+
+ // Otherwise, see if we can internalize.
+ if (!EnableLTOInternalization)
+ continue;
+
+ // Ignore local and appending linkage values since the linker
+ // doesn't resolve them (and there is no need to internalize if this is
+ // already internal).
+ if (GlobalValue::isLocalLinkage(S->linkage()) ||
+ S->linkage() == GlobalValue::AppendingLinkage)
+ continue;
+
+ // We can't internalize available_externally globals because this
+ // can break function pointer equality.
+ if (S->linkage() == GlobalValue::AvailableExternallyLinkage)
+ continue;
+
+ bool IsPrevailing = isPrevailing(VI.getGUID(), S.get());
+
+ if (GlobalValue::isInterposableLinkage(S->linkage()) && !IsPrevailing)
+ continue;
+
+ // Non-exported functions and variables with linkonce_odr or weak_odr
+ // linkage can be internalized in certain cases. The minimum legality
+ // requirements would be that they are not address taken to ensure that we
+ // don't break pointer equality checks, and that variables are either read-
+ // or write-only. For functions, this is the case if either all copies are
+ // [local_]unnamed_addr, or we can propagate reference edge attributes
+ // (which is how this is guaranteed for variables, when analyzing whether
+ // they are read or write-only).
+ //
+ // However, we only get to this code for weak/linkonce ODR values in one of
+ // two cases:
+ // 1) The prevailing copy is not in IR (it is in native code).
+ // 2) The prevailing copy in IR is not exported from its module.
+ // Additionally, at least for the new LTO API, case 2 will only happen if
+ // there is exactly one definition of the value (i.e. in exactly one
+ // module), as duplicate defs are result in the value being marked exported.
+ // Likely, users of the legacy LTO API are similar, however, currently there
+ // are llvm-lto based tests of the legacy LTO API that do not mark
+ // duplicate linkonce_odr copies as exported via the tool, so we need
+ // to handle that case below by checking the number of copies.
+ //
+ // Generally, we only want to internalize a linkonce/weak ODR value in case
+ // 2, because in case 1 we cannot see how the value is used to know if it
+ // is read or write-only. We also don't want to bloat the binary with
+ // multiple internalized copies of non-prevailing linkonce_odr functions.
+ // Note if we don't internalize, we will convert non-prevailing copies to
+ // available_externally anyway, so that we drop them after inlining. The
+ // only reason to internalize such a function is if we indeed have a single
+ // copy, because internalizing it won't increase binary size, and enables
+ // use of inliner heuristics that are more aggressive in the face of a
+ // single call to a static (local). For variables, internalizing a read or
+ // write only variable can enable more aggressive optimization. However, we
+ // already perform this elsewhere in the ThinLTO backend handling for
+ // read or write-only variables (processGlobalForThinLTO).
+ //
+ // Therefore, only internalize linkonce/weak ODR if there is a single copy,
+ // that is prevailing in this IR module. We can do so aggressively, without
+ // requiring the address to be insignificant, or that a variable be read or
+ // write-only.
+ if ((S->linkage() == GlobalValue::WeakODRLinkage ||
+ S->linkage() == GlobalValue::LinkOnceODRLinkage) &&
+ // We can have only one copy in ThinLTO that isn't prevailing, if the
+ // prevailing copy is in a native object.
+ (!IsPrevailing || ExternallyVisibleCopies > 1))
+ continue;
+
+ S->setLinkage(GlobalValue::InternalLinkage);
}
}
@@ -524,10 +603,10 @@ LTO::ThinLTOState::ThinLTOState(ThinBackend Backend)
}
LTO::LTO(Config Conf, ThinBackend Backend,
- unsigned ParallelCodeGenParallelismLevel)
+ unsigned ParallelCodeGenParallelismLevel, LTOKind LTOMode)
: Conf(std::move(Conf)),
RegularLTO(ParallelCodeGenParallelismLevel, this->Conf),
- ThinLTO(std::move(Backend)) {}
+ ThinLTO(std::move(Backend)), LTOMode(LTOMode) {}
// Requires a destructor for MapVector<BitcodeModule>.
LTO::~LTO() = default;
@@ -668,12 +747,25 @@ Error LTO::addModule(InputFile &Input, unsigned ModI,
EnableSplitLTOUnit = LTOInfo->EnableSplitLTOUnit;
BitcodeModule BM = Input.Mods[ModI];
+
+ if ((LTOMode == LTOK_UnifiedRegular || LTOMode == LTOK_UnifiedThin) &&
+ !LTOInfo->UnifiedLTO)
+ return make_error<StringError>(
+ "unified LTO compilation must use "
+ "compatible bitcode modules (use -funified-lto)",
+ inconvertibleErrorCode());
+
+ if (LTOInfo->UnifiedLTO && LTOMode == LTOK_Default)
+ LTOMode = LTOK_UnifiedThin;
+
+ bool IsThinLTO = LTOInfo->IsThinLTO && (LTOMode != LTOK_UnifiedRegular);
+
auto ModSyms = Input.module_symbols(ModI);
addModuleToGlobalRes(ModSyms, {ResI, ResE},
- LTOInfo->IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0,
+ IsThinLTO ? ThinLTO.ModuleMap.size() + 1 : 0,
LTOInfo->HasSummary);
- if (LTOInfo->IsThinLTO)
+ if (IsThinLTO)
return addThinLTO(BM, ModSyms, ResI, ResE);
RegularLTO.EmptyCombinedModule = false;
@@ -741,6 +833,15 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
if (Error Err = M.materializeMetadata())
return std::move(Err);
+
+ // If cfi.functions is present and we are in regular LTO mode, LowerTypeTests
+ // will rename local functions in the merged module as "<function name>.1".
+ // This causes linking errors, since other parts of the module expect the
+ // original function name.
+ if (LTOMode == LTOK_UnifiedRegular)
+ if (NamedMDNode *CfiFunctionsMD = M.getNamedMetadata("cfi.functions"))
+ M.eraseNamedMetadata(CfiFunctionsMD);
+
UpgradeDebugInfo(M);
ModuleSymbolTable SymTab;
@@ -784,7 +885,7 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
ModuleSymbolTable::Symbol Msym = *MsymI++;
Skip();
- if (GlobalValue *GV = Msym.dyn_cast<GlobalValue *>()) {
+ if (GlobalValue *GV = dyn_cast_if_present<GlobalValue *>(Msym)) {
if (Res.Prevailing) {
if (Sym.isUndefined())
continue;
@@ -822,7 +923,8 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::
DefaultStorageClass);
}
- } else if (auto *AS = Msym.dyn_cast<ModuleSymbolTable::AsmSymbol *>()) {
+ } else if (auto *AS =
+ dyn_cast_if_present<ModuleSymbolTable::AsmSymbol *>(Msym)) {
// Collect non-prevailing symbols.
if (!Res.Prevailing)
NonPrevailingAsmSymbols.insert(AS->first);
@@ -839,8 +941,8 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
auto &CommonRes = RegularLTO.Commons[std::string(Sym.getIRName())];
CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize());
if (uint32_t SymAlignValue = Sym.getCommonAlignment()) {
- const Align SymAlign(SymAlignValue);
- CommonRes.Align = std::max(SymAlign, CommonRes.Align.valueOrOne());
+ CommonRes.Alignment =
+ std::max(Align(SymAlignValue), CommonRes.Alignment);
}
CommonRes.Prevailing |= Res.Prevailing;
}
@@ -925,13 +1027,16 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
}
}
+ uint64_t ModuleId = ThinLTO.ModuleMap.size();
if (Error Err =
BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(),
- ThinLTO.ModuleMap.size(), [&](GlobalValue::GUID GUID) {
+ ModuleId, [&](GlobalValue::GUID GUID) {
return ThinLTO.PrevailingModuleForGUID[GUID] ==
BM.getModuleIdentifier();
}))
return Err;
+ LLVM_DEBUG(dbgs() << "Module " << ModuleId << ": " << BM.getModuleIdentifier()
+ << "\n");
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
@@ -1004,11 +1109,16 @@ Error LTO::checkPartiallySplit() {
Intrinsic::getName(Intrinsic::type_test));
Function *TypeCheckedLoadFunc = RegularLTO.CombinedModule->getFunction(
Intrinsic::getName(Intrinsic::type_checked_load));
+ Function *TypeCheckedLoadRelativeFunc =
+ RegularLTO.CombinedModule->getFunction(
+ Intrinsic::getName(Intrinsic::type_checked_load_relative));
// First check if there are type tests / type checked loads in the
// merged regular LTO module IR.
if ((TypeTestFunc && !TypeTestFunc->use_empty()) ||
- (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()))
+ (TypeCheckedLoadFunc && !TypeCheckedLoadFunc->use_empty()) ||
+ (TypeCheckedLoadRelativeFunc &&
+ !TypeCheckedLoadRelativeFunc->use_empty()))
return make_error<StringError>(
"inconsistent LTO Unit splitting (recompile with -fsplit-lto-unit)",
inconvertibleErrorCode());
@@ -1071,6 +1181,14 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
return StatsFileOrErr.takeError();
std::unique_ptr<ToolOutputFile> StatsFile = std::move(StatsFileOrErr.get());
+ // TODO: Ideally this would be controlled automatically by detecting that we
+ // are linking with an allocator that supports these interfaces, rather than
+ // an internal option (which would still be needed for tests, however). For
+ // example, if the library exported a symbol like __malloc_hot_cold the linker
+ // could recognize that and set a flag in the lto::Config.
+ if (SupportsHotColdNew)
+ ThinLTO.CombinedIndex.setWithSupportsHotColdNew();
+
Error Result = runRegularLTO(AddStream);
if (!Result)
Result = runThinLTO(AddStream, Cache, GUIDPreservedSymbols);
@@ -1081,12 +1199,44 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
return Result;
}
+void lto::updateMemProfAttributes(Module &Mod,
+ const ModuleSummaryIndex &Index) {
+ if (Index.withSupportsHotColdNew())
+ return;
+
+ // The profile matcher applies hotness attributes directly for allocations,
+ // and those will cause us to generate calls to the hot/cold interfaces
+ // unconditionally. If supports-hot-cold-new was not enabled in the LTO
+ // link then assume we don't want these calls (e.g. not linking with
+ // the appropriate library, or otherwise trying to disable this behavior).
+ for (auto &F : Mod) {
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ auto *CI = dyn_cast<CallBase>(&I);
+ if (!CI)
+ continue;
+ if (CI->hasFnAttr("memprof"))
+ CI->removeFnAttr("memprof");
+ // Strip off all memprof metadata as it is no longer needed.
+ // Importantly, this avoids the addition of new memprof attributes
+ // after inlining propagation.
+ // TODO: If we support additional types of MemProf metadata beyond hot
+ // and cold, we will need to update the metadata based on the allocator
+ // APIs supported instead of completely stripping all.
+ CI->setMetadata(LLVMContext::MD_memprof, nullptr);
+ CI->setMetadata(LLVMContext::MD_callsite, nullptr);
+ }
+ }
+ }
+}
+
Error LTO::runRegularLTO(AddStreamFn AddStream) {
// Setup optimization remarks.
auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
RegularLTO.CombinedModule->getContext(), Conf.RemarksFilename,
Conf.RemarksPasses, Conf.RemarksFormat, Conf.RemarksWithHotness,
Conf.RemarksHotnessThreshold);
+ LLVM_DEBUG(dbgs() << "Running regular LTO\n");
if (!DiagFileOrErr)
return DiagFileOrErr.takeError();
DiagnosticOutputFile = std::move(*DiagFileOrErr);
@@ -1116,7 +1266,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
if (OldGV && DL.getTypeAllocSize(OldGV->getValueType()) == I.second.Size) {
// Don't create a new global if the type is already correct, just make
// sure the alignment is correct.
- OldGV->setAlignment(I.second.Align);
+ OldGV->setAlignment(I.second.Alignment);
continue;
}
ArrayType *Ty =
@@ -1124,7 +1274,7 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
auto *GV = new GlobalVariable(*RegularLTO.CombinedModule, Ty, false,
GlobalValue::CommonLinkage,
ConstantAggregateZero::get(Ty), "");
- GV->setAlignment(I.second.Align);
+ GV->setAlignment(I.second.Alignment);
if (OldGV) {
OldGV->replaceAllUsesWith(ConstantExpr::getBitCast(GV, OldGV->getType()));
GV->takeName(OldGV);
@@ -1134,6 +1284,8 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
}
}
+ updateMemProfAttributes(*RegularLTO.CombinedModule, ThinLTO.CombinedIndex);
+
// If allowed, upgrade public vcall visibility metadata to linkage unit
// visibility before whole program devirtualization in the optimizer.
updateVCallVisibilityInModule(*RegularLTO.CombinedModule,
@@ -1148,26 +1300,39 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
if (!Conf.CodeGenOnly) {
for (const auto &R : GlobalResolutions) {
+ GlobalValue *GV =
+ RegularLTO.CombinedModule->getNamedValue(R.second.IRName);
if (!R.second.isPrevailingIRSymbol())
continue;
if (R.second.Partition != 0 &&
R.second.Partition != GlobalResolution::External)
continue;
- GlobalValue *GV =
- RegularLTO.CombinedModule->getNamedValue(R.second.IRName);
// Ignore symbols defined in other partitions.
// Also skip declarations, which are not allowed to have internal linkage.
if (!GV || GV->hasLocalLinkage() || GV->isDeclaration())
continue;
+
+ // Symbols that are marked DLLImport or DLLExport should not be
+ // internalized, as they are either externally visible or referencing
+ // external symbols. Symbols that have AvailableExternally or Appending
+ // linkage might be used by future passes and should be kept as is.
+ // These linkages are seen in Unified regular LTO, because the process
+ // of creating split LTO units introduces symbols with that linkage into
+ // one of the created modules. Normally, only the ThinLTO backend would
+ // compile this module, but Unified Regular LTO processes both
+ // modules created by the splitting process as regular LTO modules.
+ if ((LTOMode == LTOKind::LTOK_UnifiedRegular) &&
+ ((GV->getDLLStorageClass() != GlobalValue::DefaultStorageClass) ||
+ GV->hasAvailableExternallyLinkage() || GV->hasAppendingLinkage()))
+ continue;
+
GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global
: GlobalValue::UnnamedAddr::None);
if (EnableLTOInternalization && R.second.Partition == 0)
GV->setLinkage(GlobalValue::InternalLinkage);
}
- RegularLTO.CombinedModule->addModuleFlag(Module::Error, "LTOPostLink", 1);
-
if (Conf.PostInternalizeModuleHook &&
!Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule))
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
@@ -1396,11 +1561,10 @@ ThinBackend lto::createInProcessThinBackend(ThreadPoolStrategy Parallelism,
// Given the original \p Path to an output file, replace any path
// prefix matching \p OldPrefix with \p NewPrefix. Also, create the
// resulting directory if it does not yet exist.
-std::string lto::getThinLTOOutputFile(const std::string &Path,
- const std::string &OldPrefix,
- const std::string &NewPrefix) {
+std::string lto::getThinLTOOutputFile(StringRef Path, StringRef OldPrefix,
+ StringRef NewPrefix) {
if (OldPrefix.empty() && NewPrefix.empty())
- return Path;
+ return std::string(Path);
SmallString<128> NewPath(Path);
llvm::sys::path::replace_path_prefix(NewPath, OldPrefix, NewPrefix);
StringRef ParentPath = llvm::sys::path::parent_path(NewPath.str());
@@ -1415,18 +1579,20 @@ std::string lto::getThinLTOOutputFile(const std::string &Path,
namespace {
class WriteIndexesThinBackend : public ThinBackendProc {
- std::string OldPrefix, NewPrefix;
+ std::string OldPrefix, NewPrefix, NativeObjectPrefix;
raw_fd_ostream *LinkedObjectsFile;
public:
WriteIndexesThinBackend(
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
- std::string OldPrefix, std::string NewPrefix, bool ShouldEmitImportsFiles,
+ std::string OldPrefix, std::string NewPrefix,
+ std::string NativeObjectPrefix, bool ShouldEmitImportsFiles,
raw_fd_ostream *LinkedObjectsFile, lto::IndexWriteCallback OnWrite)
: ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
OnWrite, ShouldEmitImportsFiles),
OldPrefix(OldPrefix), NewPrefix(NewPrefix),
+ NativeObjectPrefix(NativeObjectPrefix),
LinkedObjectsFile(LinkedObjectsFile) {}
Error start(
@@ -1437,10 +1603,15 @@ public:
MapVector<StringRef, BitcodeModule> &ModuleMap) override {
StringRef ModulePath = BM.getModuleIdentifier();
std::string NewModulePath =
- getThinLTOOutputFile(std::string(ModulePath), OldPrefix, NewPrefix);
-
- if (LinkedObjectsFile)
- *LinkedObjectsFile << NewModulePath << '\n';
+ getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix);
+
+ if (LinkedObjectsFile) {
+ std::string ObjectPrefix =
+ NativeObjectPrefix.empty() ? NewPrefix : NativeObjectPrefix;
+ std::string LinkedObjectsFilePath =
+ getThinLTOOutputFile(ModulePath, OldPrefix, ObjectPrefix);
+ *LinkedObjectsFile << LinkedObjectsFilePath << '\n';
+ }
if (auto E = emitFiles(ImportList, ModulePath, NewModulePath))
return E;
@@ -1459,19 +1630,21 @@ public:
} // end anonymous namespace
ThinBackend lto::createWriteIndexesThinBackend(
- std::string OldPrefix, std::string NewPrefix, bool ShouldEmitImportsFiles,
+ std::string OldPrefix, std::string NewPrefix,
+ std::string NativeObjectPrefix, bool ShouldEmitImportsFiles,
raw_fd_ostream *LinkedObjectsFile, IndexWriteCallback OnWrite) {
return [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
AddStreamFn AddStream, FileCache Cache) {
return std::make_unique<WriteIndexesThinBackend>(
Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix, NewPrefix,
- ShouldEmitImportsFiles, LinkedObjectsFile, OnWrite);
+ NativeObjectPrefix, ShouldEmitImportsFiles, LinkedObjectsFile, OnWrite);
};
}
Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
ThinLTO.CombinedIndex.releaseTemporaryMemory();
timeTraceProfilerBegin("ThinLink", StringRef(""));
auto TimeTraceScopeExit = llvm::make_scope_exit([]() {
@@ -1536,9 +1709,17 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs,
LocalWPDTargetsMap);
+ auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
+ return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath();
+ };
+ if (EnableMemProfContextDisambiguation) {
+ MemProfContextDisambiguation ContextDisambiguation;
+ ContextDisambiguation.run(ThinLTO.CombinedIndex, isPrevailing);
+ }
+
if (Conf.OptLevel > 0)
ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
- ImportLists, ExportLists);
+ isPrevailing, ImportLists, ExportLists);
// Figure out which symbols need to be internalized. This also needs to happen
// at -O0 because summary-based DCE is implemented using internalization, and
@@ -1577,10 +1758,6 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
updateIndexWPDForExports(ThinLTO.CombinedIndex, isExported,
LocalWPDTargetsMap);
- auto isPrevailing = [&](GlobalValue::GUID GUID,
- const GlobalValueSummary *S) {
- return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath();
- };
thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported,
isPrevailing);
diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 1c2ca253af35..29e288767608 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -25,7 +25,6 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/LTO/LTO.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Passes/PassBuilder.h"
@@ -38,8 +37,10 @@
#include "llvm/Support/Program.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
@@ -232,22 +233,24 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
unsigned OptLevel, bool IsThinLTO,
ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary) {
+ auto FS = vfs::getRealFileSystem();
std::optional<PGOOptions> PGOOpt;
if (!Conf.SampleProfile.empty())
PGOOpt = PGOOptions(Conf.SampleProfile, "", Conf.ProfileRemapping,
- PGOOptions::SampleUse, PGOOptions::NoCSAction, true);
+ /*MemoryProfile=*/"", FS, PGOOptions::SampleUse,
+ PGOOptions::NoCSAction, true);
else if (Conf.RunCSIRInstr) {
PGOOpt = PGOOptions("", Conf.CSIRProfile, Conf.ProfileRemapping,
- PGOOptions::IRUse, PGOOptions::CSIRInstr,
- Conf.AddFSDiscriminator);
+ /*MemoryProfile=*/"", FS, PGOOptions::IRUse,
+ PGOOptions::CSIRInstr, Conf.AddFSDiscriminator);
} else if (!Conf.CSIRProfile.empty()) {
PGOOpt = PGOOptions(Conf.CSIRProfile, "", Conf.ProfileRemapping,
- PGOOptions::IRUse, PGOOptions::CSIRUse,
- Conf.AddFSDiscriminator);
+ /*MemoryProfile=*/"", FS, PGOOptions::IRUse,
+ PGOOptions::CSIRUse, Conf.AddFSDiscriminator);
NoPGOWarnMismatch = !Conf.PGOWarnMismatch;
} else if (Conf.AddFSDiscriminator) {
- PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction,
- PGOOptions::NoCSAction, true);
+ PGOOpt = PGOOptions("", "", "", /*MemoryProfile=*/"", nullptr,
+ PGOOptions::NoAction, PGOOptions::NoCSAction, true);
}
TM->setPGOOption(PGOOpt);
@@ -257,8 +260,9 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
ModuleAnalysisManager MAM;
PassInstrumentationCallbacks PIC;
- StandardInstrumentations SI(Mod.getContext(), Conf.DebugPassManager);
- SI.registerCallbacks(PIC, &FAM);
+ StandardInstrumentations SI(Mod.getContext(), Conf.DebugPassManager,
+ Conf.VerifyEach);
+ SI.registerCallbacks(PIC, &MAM);
PassBuilder PB(TM, Conf.PTO, PGOOpt, &PIC);
RegisterPassPlugins(Conf.PassPlugins, PB);
@@ -501,6 +505,7 @@ Error lto::backend(const Config &C, AddStreamFn AddStream,
std::unique_ptr<TargetMachine> TM = createTargetMachine(C, *TOrErr, Mod);
+ LLVM_DEBUG(dbgs() << "Running regular LTO\n");
if (!C.CodeGenOnly) {
if (!opt(C, TM.get(), 0, Mod, /*IsThinLTO=*/false,
/*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr,
@@ -563,8 +568,7 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
// the module, if applicable.
Mod.setPartialSampleProfileRatio(CombinedIndex);
- updatePublicTypeTestCalls(Mod, CombinedIndex.withWholeProgramVisibility());
-
+ LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
if (Conf.CodeGenOnly) {
codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex);
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
@@ -649,6 +653,10 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
if (Error Err = Importer.importFunctions(Mod, ImportList).takeError())
return Err;
+ // Do this after any importing so that imported code is updated.
+ updateMemProfAttributes(Mod, CombinedIndex);
+ updatePublicTypeTestCalls(Mod, CombinedIndex.withWholeProgramVisibility());
+
if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp
index ae7b7e4b5481..1402da7fbbd2 100644
--- a/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -43,12 +43,10 @@
#include "llvm/Linker/Linker.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Signals.h"
@@ -57,6 +55,8 @@
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -244,7 +244,7 @@ bool LTOCodeGenerator::writeMergedModules(StringRef Path) {
bool LTOCodeGenerator::useAIXSystemAssembler() {
const auto &Triple = TargetMach->getTargetTriple();
- return Triple.isOSAIX();
+ return Triple.isOSAIX() && Config.Options.DisableIntegratedAS;
}
bool LTOCodeGenerator::runAIXSystemAssembler(SmallString<128> &AssemblyFile) {
@@ -617,9 +617,6 @@ bool LTOCodeGenerator::optimize() {
// Mark which symbols can not be internalized
this->applyScopeRestrictions();
- // Write LTOPostLink flag for passes that require all the modules.
- MergedModule->addModuleFlag(Module::Error, "LTOPostLink", 1);
-
// Add an appropriate DataLayout instance for this module...
MergedModule->setDataLayout(TargetMach->createDataLayout());
diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp
index 39f0f9842ec3..868169e78225 100644
--- a/llvm/lib/LTO/LTOModule.cpp
+++ b/llvm/lib/LTO/LTOModule.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/LTO/legacy/LTOModule.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
@@ -26,18 +25,19 @@
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/MachO.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
#include <system_error>
using namespace llvm;
@@ -348,7 +348,7 @@ void LTOModule::addDefinedDataSymbol(ModuleSymbolTable::Symbol Sym) {
Buffer.c_str();
}
- const GlobalValue *V = Sym.get<GlobalValue *>();
+ const GlobalValue *V = cast<GlobalValue *>(Sym);
addDefinedDataSymbol(Buffer, V);
}
@@ -406,7 +406,7 @@ void LTOModule::addDefinedFunctionSymbol(ModuleSymbolTable::Symbol Sym) {
Buffer.c_str();
}
- const Function *F = cast<Function>(Sym.get<GlobalValue *>());
+ const Function *F = cast<Function>(cast<GlobalValue *>(Sym));
addDefinedFunctionSymbol(Buffer, F);
}
@@ -556,7 +556,7 @@ void LTOModule::addPotentialUndefinedSymbol(ModuleSymbolTable::Symbol Sym,
info.name = IterBool.first->first();
- const GlobalValue *decl = Sym.dyn_cast<GlobalValue *>();
+ const GlobalValue *decl = dyn_cast_if_present<GlobalValue *>(Sym);
if (decl->hasExternalWeakLinkage())
info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
@@ -569,7 +569,7 @@ void LTOModule::addPotentialUndefinedSymbol(ModuleSymbolTable::Symbol Sym,
void LTOModule::parseSymbols() {
for (auto Sym : SymTab.symbols()) {
- auto *GV = Sym.dyn_cast<GlobalValue *>();
+ auto *GV = dyn_cast_if_present<GlobalValue *>(Sym);
uint32_t Flags = SymTab.getSymbolFlags(Sym);
if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
continue;
@@ -691,7 +691,7 @@ Expected<uint32_t> LTOModule::getMachOCPUSubType() const {
bool LTOModule::hasCtorDtor() const {
for (auto Sym : SymTab.symbols()) {
- if (auto *GV = Sym.dyn_cast<GlobalValue *>()) {
+ if (auto *GV = dyn_cast_if_present<GlobalValue *>(Sym)) {
StringRef Name = GV->getName();
if (Name.consume_front("llvm.global_")) {
if (Name.equals("ctors") || Name.equals("dtors"))
diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index 5b137a8f8cb3..24cd6e1a0b41 100644
--- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -36,7 +36,6 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm/LTO/LTO.h"
#include "llvm/LTO/SummaryBasedOptimizations.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Passes/PassBuilder.h"
@@ -45,14 +44,16 @@
#include "llvm/Support/CachePruning.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SHA1.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/Transforms/IPO/Internalize.h"
@@ -150,7 +151,7 @@ static StringMap<lto::InputFile *>
generateModuleMap(std::vector<std::unique_ptr<lto::InputFile>> &Modules) {
StringMap<lto::InputFile *> ModuleMap;
for (auto &M : Modules) {
- assert(ModuleMap.find(M->getName()) == ModuleMap.end() &&
+ assert(!ModuleMap.contains(M->getName()) &&
"Expect unique Buffer Identifier");
ModuleMap[M->getName()] = M.get();
}
@@ -245,7 +246,7 @@ static void optimizeModule(Module &TheModule, TargetMachine &TM,
PassInstrumentationCallbacks PIC;
StandardInstrumentations SI(TheModule.getContext(), DebugPassManager);
- SI.registerCallbacks(PIC, &FAM);
+ SI.registerCallbacks(PIC, &MAM);
PipelineTuningOptions PTO;
PTO.LoopVectorization = true;
PTO.SLPVectorization = true;
@@ -415,29 +416,14 @@ public:
if (EntryPath.empty())
return;
- // Write to a temporary to avoid race condition
- SmallString<128> TempFilename;
- SmallString<128> CachePath(EntryPath);
- llvm::sys::path::remove_filename(CachePath);
- sys::path::append(TempFilename, CachePath, "Thin-%%%%%%.tmp.o");
-
- if (auto Err = handleErrors(
- llvm::writeFileAtomically(TempFilename, EntryPath,
- OutputBuffer.getBuffer()),
- [](const llvm::AtomicFileWriteError &E) {
- std::string ErrorMsgBuffer;
- llvm::raw_string_ostream S(ErrorMsgBuffer);
- E.log(S);
-
- if (E.Error ==
- llvm::atomic_write_error::failed_to_create_uniq_file) {
- errs() << "Error: " << ErrorMsgBuffer << "\n";
- report_fatal_error("ThinLTO: Can't get a temporary file");
- }
- })) {
- // FIXME
- consumeError(std::move(Err));
- }
+ if (auto Err = llvm::writeToOutput(
+ EntryPath, [&OutputBuffer](llvm::raw_ostream &OS) -> llvm::Error {
+ OS << OutputBuffer.getBuffer();
+ return llvm::Error::success();
+ }))
+ report_fatal_error(llvm::formatv("ThinLTO: Can't write file {0}: {1}",
+ EntryPath,
+ toString(std::move(Err)).c_str()));
}
};
@@ -452,11 +438,6 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
bool DisableCodeGen, StringRef SaveTempsDir,
bool Freestanding, unsigned OptLevel, unsigned count,
bool DebugPassManager) {
- // See comment at call to updateVCallVisibilityInIndex() for why
- // WholeProgramVisibilityEnabledInLTO is false.
- updatePublicTypeTestCalls(TheModule,
- /* WholeProgramVisibilityEnabledInLTO */ false);
-
// "Benchmark"-like optimization: single-source case
bool SingleModule = (ModuleMap.size() == 1);
@@ -487,13 +468,18 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
// Save internalized bitcode
saveTempBitcode(TheModule, SaveTempsDir, count, ".2.internalized.bc");
- if (!SingleModule) {
+ if (!SingleModule)
crossImportIntoModule(TheModule, Index, ModuleMap, ImportList,
ClearDSOLocalOnDeclarations);
- // Save temps: after cross-module import.
- saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
- }
+ // Do this after any importing so that imported code is updated.
+ // See comment at call to updateVCallVisibilityInIndex() for why
+ // WholeProgramVisibilityEnabledInLTO is false.
+ updatePublicTypeTestCalls(TheModule,
+ /* WholeProgramVisibilityEnabledInLTO */ false);
+
+ // Save temps: after cross-module import.
+ saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
optimizeModule(TheModule, TM, OptLevel, Freestanding, DebugPassManager,
&Index);
@@ -714,15 +700,17 @@ void ThinLTOCodeGenerator::promote(Module &TheModule, ModuleSummaryIndex &Index,
// Compute "dead" symbols, we don't want to import/export these!
computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
+ // Compute prevailing symbols
+ DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
+ computePrevailingCopies(Index, PrevailingCopy);
+
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
- ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
+ ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries,
+ IsPrevailing(PrevailingCopy), ImportLists,
ExportLists);
- DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
- computePrevailingCopies(Index, PrevailingCopy);
-
// Resolve prevailing symbols
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols,
@@ -764,10 +752,15 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
// Compute "dead" symbols, we don't want to import/export these!
computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
+ // Compute prevailing symbols
+ DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
+ computePrevailingCopies(Index, PrevailingCopy);
+
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
- ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
+ ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries,
+ IsPrevailing(PrevailingCopy), ImportLists,
ExportLists);
auto &ImportList = ImportLists[TheModule.getModuleIdentifier()];
@@ -799,10 +792,15 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
// Compute "dead" symbols, we don't want to import/export these!
computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
+ // Compute prevailing symbols
+ DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
+ computePrevailingCopies(Index, PrevailingCopy);
+
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
- ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
+ ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries,
+ IsPrevailing(PrevailingCopy), ImportLists,
ExportLists);
llvm::gatherImportedSummariesForModule(
@@ -832,10 +830,15 @@ void ThinLTOCodeGenerator::emitImports(Module &TheModule, StringRef OutputName,
// Compute "dead" symbols, we don't want to import/export these!
computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
+ // Compute prevailing symbols
+ DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
+ computePrevailingCopies(Index, PrevailingCopy);
+
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
- ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
+ ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries,
+ IsPrevailing(PrevailingCopy), ImportLists,
ExportLists);
std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
@@ -874,10 +877,15 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
// Compute "dead" symbols, we don't want to import/export these!
computeDeadSymbolsInIndex(Index, GUIDPreservedSymbols);
+ // Compute prevailing symbols
+ DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
+ computePrevailingCopies(Index, PrevailingCopy);
+
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
- ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
+ ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries,
+ IsPrevailing(PrevailingCopy), ImportLists,
ExportLists);
auto &ExportList = ExportLists[ModuleIdentifier];
@@ -886,9 +894,6 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
if (ExportList.empty() && GUIDPreservedSymbols.empty())
return;
- DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
- computePrevailingCopies(Index, PrevailingCopy);
-
// Resolve prevailing symbols
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
resolvePrevailingInIndex(Index, ResolvedODR, GUIDPreservedSymbols,
@@ -1068,11 +1073,16 @@ void ThinLTOCodeGenerator::run() {
for (auto GUID : ExportedGUIDs)
GUIDPreservedSymbols.insert(GUID);
+ // Compute prevailing symbols
+ DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
+ computePrevailingCopies(*Index, PrevailingCopy);
+
// Collect the import/export lists for all modules from the call-graph in the
// combined index.
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
- ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists,
+ ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries,
+ IsPrevailing(PrevailingCopy), ImportLists,
ExportLists);
// We use a std::map here to be able to have a defined ordering when
@@ -1081,9 +1091,6 @@ void ThinLTOCodeGenerator::run() {
// on the index, and nuke this map.
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
- DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
- computePrevailingCopies(*Index, PrevailingCopy);
-
// Resolve prevailing symbols, this has to be computed early because it
// impacts the caching.
resolvePrevailingInIndex(*Index, ResolvedODR, GUIDPreservedSymbols,
diff --git a/llvm/lib/LTO/UpdateCompilerUsed.cpp b/llvm/lib/LTO/UpdateCompilerUsed.cpp
index 040e1106523c..8dff5418dedb 100644
--- a/llvm/lib/LTO/UpdateCompilerUsed.cpp
+++ b/llvm/lib/LTO/UpdateCompilerUsed.cpp
@@ -14,7 +14,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Mangler.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Target/TargetMachine.h"
diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp
index 517e2dc8ebe0..df090c5990e6 100644
--- a/llvm/lib/Linker/IRMover.cpp
+++ b/llvm/lib/Linker/IRMover.cpp
@@ -11,7 +11,6 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -28,6 +27,7 @@
#include "llvm/Object/ModuleSymbolTable.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Path.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <optional>
#include <utility>
@@ -409,6 +409,10 @@ class IRLinker {
std::vector<GlobalValue *> Worklist;
std::vector<std::pair<GlobalValue *, Value*>> RAUWWorklist;
+ /// Set of globals with eagerly copied metadata that may require remapping.
+ /// This remapping is performed after metadata linking.
+ DenseSet<GlobalObject *> UnmappedMetadata;
+
void maybeAdd(GlobalValue *GV) {
if (ValuesToLink.insert(GV).second)
Worklist.push_back(GV);
@@ -750,8 +754,11 @@ GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
if (auto *NewGO = dyn_cast<GlobalObject>(NewGV)) {
// Metadata for global variables and function declarations is copied eagerly.
- if (isa<GlobalVariable>(SGV) || SGV->isDeclaration())
+ if (isa<GlobalVariable>(SGV) || SGV->isDeclaration()) {
NewGO->copyMetadata(cast<GlobalObject>(SGV), 0);
+ if (SGV->isDeclaration() && NewGO->hasMetadata())
+ UnmappedMetadata.insert(NewGO);
+ }
}
// Remove these copied constants in case this stays a declaration, since
@@ -1056,6 +1063,10 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
// as well.
if (Function *F = dyn_cast<Function>(NewGV))
if (auto Remangled = Intrinsic::remangleIntrinsicFunction(F)) {
+ // Note: remangleIntrinsicFunction does not copy metadata and as such
+ // F should not occur in the set of objects with unmapped metadata.
+ // If this assertion fails then remangleIntrinsicFunction needs updating.
+ assert(!UnmappedMetadata.count(F) && "intrinsic has unmapped metadata");
NewGV->eraseFromParent();
NewGV = *Remangled;
NeedsRenaming = false;
@@ -1200,39 +1211,7 @@ void IRLinker::prepareCompileUnitsForImport() {
// size inefficient.
CU->replaceGlobalVariables(nullptr);
- // Imported entities only need to be mapped in if they have local
- // scope, as those might correspond to an imported entity inside a
- // function being imported (any locally scoped imported entities that
- // don't end up referenced by an imported function will not be emitted
- // into the object). Imported entities not in a local scope
- // (e.g. on the namespace) only need to be emitted by the originating
- // module. Create a list of the locally scoped imported entities, and
- // replace the source CUs imported entity list with the new list, so
- // only those are mapped in.
- // FIXME: Locally-scoped imported entities could be moved to the
- // functions they are local to instead of listing them on the CU, and
- // we would naturally only link in those needed by function importing.
- SmallVector<TrackingMDNodeRef, 4> AllImportedModules;
- bool ReplaceImportedEntities = false;
- for (auto *IE : CU->getImportedEntities()) {
- DIScope *Scope = IE->getScope();
- assert(Scope && "Invalid Scope encoding!");
- if (isa<DILocalScope>(Scope))
- AllImportedModules.emplace_back(IE);
- else
- ReplaceImportedEntities = true;
- }
- if (ReplaceImportedEntities) {
- if (!AllImportedModules.empty())
- CU->replaceImportedEntities(MDTuple::get(
- CU->getContext(),
- SmallVector<Metadata *, 16>(AllImportedModules.begin(),
- AllImportedModules.end())));
- else
- // If there were no local scope imported entities, we can map
- // the whole list to nullptr.
- CU->replaceImportedEntities(nullptr);
- }
+ CU->replaceImportedEntities(nullptr);
}
}
@@ -1651,6 +1630,13 @@ Error IRLinker::run() {
// are properly remapped.
linkNamedMDNodes();
+ // Clean up any global objects with potentially unmapped metadata.
+ // Specifically declarations which did not become definitions.
+ for (GlobalObject *NGO : UnmappedMetadata) {
+ if (NGO->isDeclaration())
+ Mapper.remapGlobalObjectMetadata(*NGO);
+ }
+
if (!IsPerformingImport && !SrcM->getModuleInlineAsm().empty()) {
// Append the module inline asm string.
DstM.appendModuleInlineAsm(adjustInlineAsm(SrcM->getModuleInlineAsm(),
@@ -1671,15 +1657,16 @@ Error IRLinker::run() {
// Reorder the globals just added to the destination module to match their
// original order in the source module.
- Module::GlobalListType &Globals = DstM.getGlobalList();
for (GlobalVariable &GV : SrcM->globals()) {
if (GV.hasAppendingLinkage())
continue;
Value *NewValue = Mapper.mapValue(GV);
if (NewValue) {
auto *NewGV = dyn_cast<GlobalVariable>(NewValue->stripPointerCasts());
- if (NewGV)
- Globals.splice(Globals.end(), Globals, NewGV->getIterator());
+ if (NewGV) {
+ NewGV->removeFromParent();
+ DstM.insertGlobalVariable(NewGV);
+ }
}
}
diff --git a/llvm/lib/MC/DXContainerPSVInfo.cpp b/llvm/lib/MC/DXContainerPSVInfo.cpp
new file mode 100644
index 000000000000..148e56c6b5bc
--- /dev/null
+++ b/llvm/lib/MC/DXContainerPSVInfo.cpp
@@ -0,0 +1,54 @@
+//===- llvm/MC/DXContainerPSVInfo.cpp - DXContainer PSVInfo -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/DXContainerPSVInfo.h"
+#include "llvm/BinaryFormat/DXContainer.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::mcdxbc;
+using namespace llvm::dxbc::PSV;
+
+void PSVRuntimeInfo::write(raw_ostream &OS, uint32_t Version) const {
+ uint32_t InfoSize;
+ uint32_t BindingSize;
+ switch (Version) {
+ case 0:
+ InfoSize = sizeof(dxbc::PSV::v0::RuntimeInfo);
+ BindingSize = sizeof(dxbc::PSV::v0::ResourceBindInfo);
+ break;
+ case 1:
+ InfoSize = sizeof(dxbc::PSV::v1::RuntimeInfo);
+ BindingSize = sizeof(dxbc::PSV::v0::ResourceBindInfo);
+ break;
+ case 2:
+ default:
+ InfoSize = sizeof(dxbc::PSV::v2::RuntimeInfo);
+ BindingSize = sizeof(dxbc::PSV::v2::ResourceBindInfo);
+ }
+ uint32_t InfoSizeSwapped = InfoSize;
+ if (sys::IsBigEndianHost)
+ sys::swapByteOrder(InfoSizeSwapped);
+ // Write the size of the info.
+ OS.write(reinterpret_cast<const char *>(&InfoSizeSwapped), sizeof(uint32_t));
+ // Write the info itself.
+ OS.write(reinterpret_cast<const char *>(&BaseData), InfoSize);
+
+ uint32_t ResourceCount = static_cast<uint32_t>(Resources.size());
+ uint32_t BindingSizeSwapped = BindingSize;
+ if (sys::IsBigEndianHost) {
+ sys::swapByteOrder(ResourceCount);
+ sys::swapByteOrder(BindingSizeSwapped);
+ }
+
+ OS.write(reinterpret_cast<const char *>(&ResourceCount), sizeof(uint32_t));
+ OS.write(reinterpret_cast<const char *>(&BindingSizeSwapped), sizeof(uint32_t));
+
+ for (const auto &Res : Resources)
+ OS.write(reinterpret_cast<const char *>(&Res), BindingSize);
+}
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 07ed3409707b..6a6befdd3054 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator.h"
@@ -43,11 +44,11 @@
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -128,11 +129,11 @@ struct ELFWriter {
/// @}
// This holds the symbol table index of the last local symbol.
- unsigned LastLocalSymbolIndex;
+ unsigned LastLocalSymbolIndex = ~0u;
// This holds the .strtab section index.
- unsigned StringTableIndex;
+ unsigned StringTableIndex = ~0u;
// This holds the .symtab section index.
- unsigned SymbolTableIndex;
+ unsigned SymbolTableIndex = ~0u;
// Sections in the order they are to be output in the section table.
std::vector<const MCSectionELF *> SectionTable;
diff --git a/llvm/lib/MC/MCAsmBackend.cpp b/llvm/lib/MC/MCAsmBackend.cpp
index c4e505146d44..64bbc63719c7 100644
--- a/llvm/lib/MC/MCAsmBackend.cpp
+++ b/llvm/lib/MC/MCAsmBackend.cpp
@@ -22,7 +22,8 @@
using namespace llvm;
-MCAsmBackend::MCAsmBackend(support::endianness Endian) : Endian(Endian) {}
+MCAsmBackend::MCAsmBackend(support::endianness Endian, unsigned RelaxFixupKind)
+ : Endian(Endian), RelaxFixupKind(RelaxFixupKind) {}
MCAsmBackend::~MCAsmBackend() = default;
@@ -61,6 +62,9 @@ MCAsmBackend::createDwoObjectWriter(raw_pwrite_stream &OS,
raw_pwrite_stream &DwoOS) const {
auto TW = createObjectTargetWriter();
switch (TW->getFormat()) {
+ case Triple::COFF:
+ return createWinCOFFDwoObjectWriter(
+ cast<MCWinCOFFObjectTargetWriter>(std::move(TW)), OS, DwoOS);
case Triple::ELF:
return createELFDwoObjectWriter(
cast<MCELFObjectTargetWriter>(std::move(TW)), OS, DwoOS,
@@ -69,7 +73,7 @@ MCAsmBackend::createDwoObjectWriter(raw_pwrite_stream &OS,
return createWasmDwoObjectWriter(
cast<MCWasmObjectTargetWriter>(std::move(TW)), OS, DwoOS);
default:
- report_fatal_error("dwo only supported with ELF and Wasm");
+ report_fatal_error("dwo only supported with COFF, ELF, and Wasm");
}
}
@@ -115,3 +119,19 @@ bool MCAsmBackend::fixupNeedsRelaxationAdvanced(
return true;
return fixupNeedsRelaxation(Fixup, Value, DF, Layout);
}
+
+bool MCAsmBackend::isDarwinCanonicalPersonality(const MCSymbol *Sym) const {
+ // Consider a NULL personality (ie., no personality encoding) to be canonical
+ // because it's always at 0.
+ if (!Sym)
+ return true;
+
+ if (!Sym->isMachO())
+ llvm_unreachable("Expected MachO symbols only");
+
+ StringRef name = Sym->getName();
+ // XXX: We intentionally leave out "___gcc_personality_v0" because, despite
+ // being system-defined like these two, it is not very commonly-used.
+ // Reserving an empty slot for it seems silly.
+ return name == "___gxx_personality_v0" || name == "___objc_personality_v0";
+}
diff --git a/llvm/lib/MC/MCAsmInfo.cpp b/llvm/lib/MC/MCAsmInfo.cpp
index b8d0021ed432..71564ba9d5a1 100644
--- a/llvm/lib/MC/MCAsmInfo.cpp
+++ b/llvm/lib/MC/MCAsmInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
diff --git a/llvm/lib/MC/MCAsmInfoXCOFF.cpp b/llvm/lib/MC/MCAsmInfoXCOFF.cpp
index ae7afeb30099..b07e95e45d55 100644
--- a/llvm/lib/MC/MCAsmInfoXCOFF.cpp
+++ b/llvm/lib/MC/MCAsmInfoXCOFF.cpp
@@ -52,7 +52,6 @@ MCAsmInfoXCOFF::MCAsmInfoXCOFF() {
COMMDirectiveAlignmentIsInBytes = false;
LCOMMDirectiveAlignmentType = LCOMM::Log2Alignment;
HasDotTypeDotSizeDirective = false;
- UseIntegratedAssembler = false;
ParseInlineAsmUsingAsmParser = true;
NeedsFunctionDescriptors = true;
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 282bdb95acac..06de70ad2f39 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -36,6 +36,7 @@
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
+#include <algorithm>
#include <optional>
using namespace llvm;
@@ -194,12 +195,13 @@ public:
void emitXCOFFRenameDirective(const MCSymbol *Name,
StringRef Rename) override;
- void emitXCOFFRefDirective(StringRef Name) override;
+ void emitXCOFFRefDirective(const MCSymbol *Symbol) override;
- void emitXCOFFExceptDirective(const MCSymbol *Symbol,
+ void emitXCOFFExceptDirective(const MCSymbol *Symbol,
const MCSymbol *Trap,
unsigned Lang, unsigned Reason,
unsigned FunctionSize, bool hasDebug) override;
+ void emitXCOFFCInfoSym(StringRef Name, StringRef Metadata) override;
void emitELFSize(MCSymbol *Symbol, const MCExpr *Value) override;
void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -328,27 +330,28 @@ public:
void emitCFIBKeyFrame() override;
void emitCFIMTETaggedFrame() override;
void emitCFISections(bool EH, bool Debug) override;
- void emitCFIDefCfa(int64_t Register, int64_t Offset) override;
- void emitCFIDefCfaOffset(int64_t Offset) override;
- void emitCFIDefCfaRegister(int64_t Register) override;
+ void emitCFIDefCfa(int64_t Register, int64_t Offset, SMLoc Loc) override;
+ void emitCFIDefCfaOffset(int64_t Offset, SMLoc Loc) override;
+ void emitCFIDefCfaRegister(int64_t Register, SMLoc Loc) override;
void emitCFILLVMDefAspaceCfa(int64_t Register, int64_t Offset,
- int64_t AddressSpace) override;
- void emitCFIOffset(int64_t Register, int64_t Offset) override;
+ int64_t AddressSpace, SMLoc Loc) override;
+ void emitCFIOffset(int64_t Register, int64_t Offset, SMLoc Loc) override;
void emitCFIPersonality(const MCSymbol *Sym, unsigned Encoding) override;
void emitCFILsda(const MCSymbol *Sym, unsigned Encoding) override;
- void emitCFIRememberState() override;
- void emitCFIRestoreState() override;
- void emitCFIRestore(int64_t Register) override;
- void emitCFISameValue(int64_t Register) override;
- void emitCFIRelOffset(int64_t Register, int64_t Offset) override;
- void emitCFIAdjustCfaOffset(int64_t Adjustment) override;
- void emitCFIEscape(StringRef Values) override;
- void emitCFIGnuArgsSize(int64_t Size) override;
+ void emitCFIRememberState(SMLoc Loc) override;
+ void emitCFIRestoreState(SMLoc Loc) override;
+ void emitCFIRestore(int64_t Register, SMLoc Loc) override;
+ void emitCFISameValue(int64_t Register, SMLoc Loc) override;
+ void emitCFIRelOffset(int64_t Register, int64_t Offset, SMLoc Loc) override;
+ void emitCFIAdjustCfaOffset(int64_t Adjustment, SMLoc Loc) override;
+ void emitCFIEscape(StringRef Values, SMLoc Loc) override;
+ void emitCFIGnuArgsSize(int64_t Size, SMLoc Loc) override;
void emitCFISignalFrame() override;
- void emitCFIUndefined(int64_t Register) override;
- void emitCFIRegister(int64_t Register1, int64_t Register2) override;
- void emitCFIWindowSave() override;
- void emitCFINegateRAState() override;
+ void emitCFIUndefined(int64_t Register, SMLoc Loc) override;
+ void emitCFIRegister(int64_t Register1, int64_t Register2,
+ SMLoc Loc) override;
+ void emitCFIWindowSave(SMLoc Loc) override;
+ void emitCFINegateRAState(SMLoc Loc) override;
void emitCFIReturnColumn(int64_t Register) override;
void emitWinCFIStartProc(const MCSymbol *Symbol, SMLoc Loc) override;
@@ -377,8 +380,9 @@ public:
void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
void emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
- uint64_t Attr,
- const MCPseudoProbeInlineStack &InlineStack, MCSymbol *FnSym) override;
+ uint64_t Attr, uint64_t Discriminator,
+ const MCPseudoProbeInlineStack &InlineStack,
+ MCSymbol *FnSym) override;
void emitBundleAlignMode(Align Alignment) override;
void emitBundleLock(bool AlignToEnd) override;
@@ -772,6 +776,9 @@ bool MCAsmStreamer::emitSymbolAttribute(MCSymbol *Symbol,
case MCSA_Memtag:
OS << "\t.memtag\t";
break;
+ case MCSA_WeakAntiDep:
+ OS << "\t.weak_anti_dep\t";
+ break;
}
Symbol->print(OS, MAI);
@@ -943,13 +950,14 @@ void MCAsmStreamer::emitXCOFFRenameDirective(const MCSymbol *Name,
EmitEOL();
}
-void MCAsmStreamer::emitXCOFFRefDirective(StringRef Name) {
- OS << "\t.ref " << Name;
+void MCAsmStreamer::emitXCOFFRefDirective(const MCSymbol *Symbol) {
+ OS << "\t.ref ";
+ Symbol->print(OS, MAI);
EmitEOL();
}
void MCAsmStreamer::emitXCOFFExceptDirective(const MCSymbol *Symbol,
- const MCSymbol *Trap,
+ const MCSymbol *Trap,
unsigned Lang,
unsigned Reason,
unsigned FunctionSize,
@@ -960,6 +968,70 @@ void MCAsmStreamer::emitXCOFFExceptDirective(const MCSymbol *Symbol,
EmitEOL();
}
+void MCAsmStreamer::emitXCOFFCInfoSym(StringRef Name, StringRef Metadata) {
+ const char InfoDirective[] = "\t.info ";
+ const char *Separator = ", ";
+ constexpr int WordSize = sizeof(uint32_t);
+
+ // Start by emitting the .info pseudo-op and C_INFO symbol name.
+ OS << InfoDirective;
+ PrintQuotedString(Name, OS);
+ OS << Separator;
+
+ size_t MetadataSize = Metadata.size();
+
+ // Emit the 4-byte length of the metadata.
+ OS << format_hex(MetadataSize, 10) << Separator;
+
+ // Nothing left to do if there's no metadata.
+ if (MetadataSize == 0) {
+ EmitEOL();
+ return;
+ }
+
+ // Metadata needs to be padded out to an even word size when generating
+ // assembly because the .info pseudo-op can only generate words of data. We
+ // apply the same restriction to the object case for consistency, however the
+ // linker doesn't require padding, so it will only save bytes specified by the
+ // length and discard any padding.
+ uint32_t PaddedSize = alignTo(MetadataSize, WordSize);
+ uint32_t PaddingSize = PaddedSize - MetadataSize;
+
+ // Write out the payload a word at a time.
+ //
+ // The assembler has a limit on the number of operands in an expression,
+ // so we need multiple .info pseudo-ops. We choose a small number of words
+ // per pseudo-op to keep the assembly readable.
+ constexpr int WordsPerDirective = 5;
+ // Force emitting a new directive to keep the first directive purely about the
+ // name and size of the note.
+ int WordsBeforeNextDirective = 0;
+ auto PrintWord = [&](const uint8_t *WordPtr) {
+ if (WordsBeforeNextDirective-- == 0) {
+ EmitEOL();
+ OS << InfoDirective;
+ WordsBeforeNextDirective = WordsPerDirective;
+ }
+ OS << Separator;
+ uint32_t Word = llvm::support::endian::read32be(WordPtr);
+ OS << format_hex(Word, 10);
+ };
+
+ size_t Index = 0;
+ for (; Index + WordSize <= MetadataSize; Index += WordSize)
+ PrintWord(reinterpret_cast<const uint8_t *>(Metadata.data()) + Index);
+
+ // If there is padding, then we have at least one byte of payload left
+ // to emit.
+ if (PaddingSize) {
+ assert(PaddedSize - Index == WordSize);
+ std::array<uint8_t, WordSize> LastWord = {0};
+ ::memcpy(LastWord.data(), Metadata.data() + Index, MetadataSize - Index);
+ PrintWord(LastWord.data());
+ }
+ EmitEOL();
+}
+
void MCAsmStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
assert(MAI->hasDotTypeDotSizeDirective());
OS << "\t.size\t";
@@ -1277,7 +1349,7 @@ void MCAsmStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
unsigned Remaining = Size - Emitted;
// The size of our partial emission must be a power of two less than
// Size.
- unsigned EmissionSize = PowerOf2Floor(std::min(Remaining, Size - 1));
+ unsigned EmissionSize = llvm::bit_floor(std::min(Remaining, Size - 1));
// Calculate the byte offset of our partial emission taking into account
// the endianness of the target.
unsigned ByteOffset =
@@ -1892,23 +1964,23 @@ void MCAsmStreamer::EmitRegisterName(int64_t Register) {
OS << Register;
}
-void MCAsmStreamer::emitCFIDefCfa(int64_t Register, int64_t Offset) {
- MCStreamer::emitCFIDefCfa(Register, Offset);
+void MCAsmStreamer::emitCFIDefCfa(int64_t Register, int64_t Offset, SMLoc Loc) {
+ MCStreamer::emitCFIDefCfa(Register, Offset, Loc);
OS << "\t.cfi_def_cfa ";
EmitRegisterName(Register);
OS << ", " << Offset;
EmitEOL();
}
-void MCAsmStreamer::emitCFIDefCfaOffset(int64_t Offset) {
- MCStreamer::emitCFIDefCfaOffset(Offset);
+void MCAsmStreamer::emitCFIDefCfaOffset(int64_t Offset, SMLoc Loc) {
+ MCStreamer::emitCFIDefCfaOffset(Offset, Loc);
OS << "\t.cfi_def_cfa_offset " << Offset;
EmitEOL();
}
void MCAsmStreamer::emitCFILLVMDefAspaceCfa(int64_t Register, int64_t Offset,
- int64_t AddressSpace) {
- MCStreamer::emitCFILLVMDefAspaceCfa(Register, Offset, AddressSpace);
+ int64_t AddressSpace, SMLoc Loc) {
+ MCStreamer::emitCFILLVMDefAspaceCfa(Register, Offset, AddressSpace, Loc);
OS << "\t.cfi_llvm_def_aspace_cfa ";
EmitRegisterName(Register);
OS << ", " << Offset;
@@ -1926,14 +1998,14 @@ static void PrintCFIEscape(llvm::formatted_raw_ostream &OS, StringRef Values) {
}
}
-void MCAsmStreamer::emitCFIEscape(StringRef Values) {
- MCStreamer::emitCFIEscape(Values);
+void MCAsmStreamer::emitCFIEscape(StringRef Values, SMLoc Loc) {
+ MCStreamer::emitCFIEscape(Values, Loc);
PrintCFIEscape(OS, Values);
EmitEOL();
}
-void MCAsmStreamer::emitCFIGnuArgsSize(int64_t Size) {
- MCStreamer::emitCFIGnuArgsSize(Size);
+void MCAsmStreamer::emitCFIGnuArgsSize(int64_t Size, SMLoc Loc) {
+ MCStreamer::emitCFIGnuArgsSize(Size, Loc);
uint8_t Buffer[16] = { dwarf::DW_CFA_GNU_args_size };
unsigned Len = encodeULEB128(Size, Buffer + 1) + 1;
@@ -1942,15 +2014,15 @@ void MCAsmStreamer::emitCFIGnuArgsSize(int64_t Size) {
EmitEOL();
}
-void MCAsmStreamer::emitCFIDefCfaRegister(int64_t Register) {
- MCStreamer::emitCFIDefCfaRegister(Register);
+void MCAsmStreamer::emitCFIDefCfaRegister(int64_t Register, SMLoc Loc) {
+ MCStreamer::emitCFIDefCfaRegister(Register, Loc);
OS << "\t.cfi_def_cfa_register ";
EmitRegisterName(Register);
EmitEOL();
}
-void MCAsmStreamer::emitCFIOffset(int64_t Register, int64_t Offset) {
- this->MCStreamer::emitCFIOffset(Register, Offset);
+void MCAsmStreamer::emitCFIOffset(int64_t Register, int64_t Offset, SMLoc Loc) {
+ MCStreamer::emitCFIOffset(Register, Offset, Loc);
OS << "\t.cfi_offset ";
EmitRegisterName(Register);
OS << ", " << Offset;
@@ -1972,42 +2044,43 @@ void MCAsmStreamer::emitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
EmitEOL();
}
-void MCAsmStreamer::emitCFIRememberState() {
- MCStreamer::emitCFIRememberState();
+void MCAsmStreamer::emitCFIRememberState(SMLoc Loc) {
+ MCStreamer::emitCFIRememberState(Loc);
OS << "\t.cfi_remember_state";
EmitEOL();
}
-void MCAsmStreamer::emitCFIRestoreState() {
- MCStreamer::emitCFIRestoreState();
+void MCAsmStreamer::emitCFIRestoreState(SMLoc Loc) {
+ MCStreamer::emitCFIRestoreState(Loc);
OS << "\t.cfi_restore_state";
EmitEOL();
}
-void MCAsmStreamer::emitCFIRestore(int64_t Register) {
- MCStreamer::emitCFIRestore(Register);
+void MCAsmStreamer::emitCFIRestore(int64_t Register, SMLoc Loc) {
+ MCStreamer::emitCFIRestore(Register, Loc);
OS << "\t.cfi_restore ";
EmitRegisterName(Register);
EmitEOL();
}
-void MCAsmStreamer::emitCFISameValue(int64_t Register) {
- MCStreamer::emitCFISameValue(Register);
+void MCAsmStreamer::emitCFISameValue(int64_t Register, SMLoc Loc) {
+ MCStreamer::emitCFISameValue(Register, Loc);
OS << "\t.cfi_same_value ";
EmitRegisterName(Register);
EmitEOL();
}
-void MCAsmStreamer::emitCFIRelOffset(int64_t Register, int64_t Offset) {
- MCStreamer::emitCFIRelOffset(Register, Offset);
+void MCAsmStreamer::emitCFIRelOffset(int64_t Register, int64_t Offset,
+ SMLoc Loc) {
+ MCStreamer::emitCFIRelOffset(Register, Offset, Loc);
OS << "\t.cfi_rel_offset ";
EmitRegisterName(Register);
OS << ", " << Offset;
EmitEOL();
}
-void MCAsmStreamer::emitCFIAdjustCfaOffset(int64_t Adjustment) {
- MCStreamer::emitCFIAdjustCfaOffset(Adjustment);
+void MCAsmStreamer::emitCFIAdjustCfaOffset(int64_t Adjustment, SMLoc Loc) {
+ MCStreamer::emitCFIAdjustCfaOffset(Adjustment, Loc);
OS << "\t.cfi_adjust_cfa_offset " << Adjustment;
EmitEOL();
}
@@ -2018,15 +2091,16 @@ void MCAsmStreamer::emitCFISignalFrame() {
EmitEOL();
}
-void MCAsmStreamer::emitCFIUndefined(int64_t Register) {
- MCStreamer::emitCFIUndefined(Register);
+void MCAsmStreamer::emitCFIUndefined(int64_t Register, SMLoc Loc) {
+ MCStreamer::emitCFIUndefined(Register, Loc);
OS << "\t.cfi_undefined ";
EmitRegisterName(Register);
EmitEOL();
}
-void MCAsmStreamer::emitCFIRegister(int64_t Register1, int64_t Register2) {
- MCStreamer::emitCFIRegister(Register1, Register2);
+void MCAsmStreamer::emitCFIRegister(int64_t Register1, int64_t Register2,
+ SMLoc Loc) {
+ MCStreamer::emitCFIRegister(Register1, Register2, Loc);
OS << "\t.cfi_register ";
EmitRegisterName(Register1);
OS << ", ";
@@ -2034,14 +2108,14 @@ void MCAsmStreamer::emitCFIRegister(int64_t Register1, int64_t Register2) {
EmitEOL();
}
-void MCAsmStreamer::emitCFIWindowSave() {
- MCStreamer::emitCFIWindowSave();
+void MCAsmStreamer::emitCFIWindowSave(SMLoc Loc) {
+ MCStreamer::emitCFIWindowSave(Loc);
OS << "\t.cfi_window_save";
EmitEOL();
}
-void MCAsmStreamer::emitCFINegateRAState() {
- MCStreamer::emitCFINegateRAState();
+void MCAsmStreamer::emitCFINegateRAState(SMLoc Loc) {
+ MCStreamer::emitCFINegateRAState(Loc);
OS << "\t.cfi_negate_ra_state";
EmitEOL();
}
@@ -2217,13 +2291,12 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
raw_ostream &OS = getCommentOS();
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
- raw_svector_ostream VecOS(Code);
// If we have no code emitter, don't emit code.
if (!getAssembler().getEmitterPtr())
return;
- getAssembler().getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
+ getAssembler().getEmitter().encodeInstruction(Inst, Code, Fixups, STI);
// If we are showing fixups, create symbolic markers in the encoded
// representation. We do this by making a per-bit map to the fixup item index,
@@ -2336,11 +2409,14 @@ void MCAsmStreamer::emitInstruction(const MCInst &Inst,
EmitEOL();
}
-void MCAsmStreamer::emitPseudoProbe(
- uint64_t Guid, uint64_t Index, uint64_t Type, uint64_t Attr,
- const MCPseudoProbeInlineStack &InlineStack, MCSymbol *FnSym) {
- OS << "\t.pseudoprobe\t" << Guid << " " << Index << " " << Type << " "
- << Attr;
+void MCAsmStreamer::emitPseudoProbe(uint64_t Guid, uint64_t Index,
+ uint64_t Type, uint64_t Attr,
+ uint64_t Discriminator,
+ const MCPseudoProbeInlineStack &InlineStack,
+ MCSymbol *FnSym) {
+ OS << "\t.pseudoprobe\t" << Guid << " " << Index << " " << Type << " " << Attr;
+ if (Discriminator)
+ OS << " " << Discriminator;
// Emit inline stack like
// @ GUIDmain:3 @ GUIDCaller:1 @ GUIDDirectCaller:11
for (const auto &Site : InlineStack)
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index a33d7ea9ebfe..55ed1a285cd7 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -273,7 +273,7 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
"FKF_IsAlignedDownTo32Bits is only allowed on PC-relative fixups!");
if (IsPCRel) {
- uint32_t Offset = Layout.getFragmentOffset(DF) + Fixup.getOffset();
+ uint64_t Offset = Layout.getFragmentOffset(DF) + Fixup.getOffset();
// A number of ARM fixups in Thumb mode require that the effective PC
// address be determined as the 32-bit aligned version of the actual offset.
@@ -287,6 +287,13 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout,
WasForced = true;
}
+ // A linker relaxation target may emit ADD/SUB relocations for A-B+C. Let
+ // recordRelocation handle non-VK_None cases like A@plt-B+C.
+ if (!IsResolved && Target.getSymA() && Target.getSymB() &&
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None &&
+ getBackend().handleAddSubRelocations(Layout, *DF, Fixup, Target, Value))
+ return true;
+
return IsResolved;
}
@@ -303,7 +310,7 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
case MCFragment::FT_Fill: {
auto &FF = cast<MCFillFragment>(F);
int64_t NumValues = 0;
- if (!FF.getNumValues().evaluateAsAbsolute(NumValues, Layout)) {
+ if (!FF.getNumValues().evaluateKnownAbsolute(NumValues, Layout)) {
getContext().reportError(FF.getLoc(),
"expected assembly-time absolute expression");
return 0;
@@ -464,14 +471,13 @@ void MCAsmLayout::layoutFragment(MCFragment *F) {
}
}
-void MCAssembler::registerSymbol(const MCSymbol &Symbol, bool *Created) {
- bool New = !Symbol.isRegistered();
- if (Created)
- *Created = New;
- if (New) {
+bool MCAssembler::registerSymbol(const MCSymbol &Symbol) {
+ bool Changed = !Symbol.isRegistered();
+ if (Changed) {
Symbol.setIsRegistered(true);
Symbols.push_back(&Symbol);
}
+ return Changed;
}
void MCAssembler::writeFragmentPadding(raw_ostream &OS,
@@ -991,19 +997,11 @@ bool MCAssembler::relaxInstruction(MCAsmLayout &Layout,
getBackend().relaxInstruction(Relaxed, *F.getSubtargetInfo());
// Encode the new instruction.
- //
- // FIXME-PERF: If it matters, we could let the target do this. It can
- // probably do so more efficiently in many cases.
- SmallVector<MCFixup, 4> Fixups;
- SmallString<256> Code;
- raw_svector_ostream VecOS(Code);
- getEmitter().encodeInstruction(Relaxed, VecOS, Fixups, *F.getSubtargetInfo());
-
- // Update the fragment.
F.setInst(Relaxed);
- F.getContents() = Code;
- F.getFixups() = Fixups;
-
+ F.getFixups().clear();
+ F.getContents().clear();
+ getEmitter().encodeInstruction(Relaxed, F.getContents(), F.getFixups(),
+ *F.getSubtargetInfo());
return true;
}
@@ -1105,11 +1103,10 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
LineDelta = DF.getLineDelta();
SmallVectorImpl<char> &Data = DF.getContents();
Data.clear();
- raw_svector_ostream OSE(Data);
DF.getFixups().clear();
- MCDwarfLineAddr::Encode(Context, getDWARFLinetableParams(), LineDelta,
- AddrDelta, OSE);
+ MCDwarfLineAddr::encode(Context, getDWARFLinetableParams(), LineDelta,
+ AddrDelta, Data);
return OldSize != Data.size();
}
@@ -1120,17 +1117,21 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
return WasRelaxed;
MCContext &Context = Layout.getAssembler().getContext();
- uint64_t OldSize = DF.getContents().size();
- int64_t AddrDelta;
- bool Abs = DF.getAddrDelta().evaluateKnownAbsolute(AddrDelta, Layout);
- assert(Abs && "We created call frame with an invalid expression");
- (void) Abs;
+ int64_t Value;
+ bool Abs = DF.getAddrDelta().evaluateAsAbsolute(Value, Layout);
+ if (!Abs) {
+ getContext().reportError(DF.getAddrDelta().getLoc(),
+ "invalid CFI advance_loc expression");
+ DF.setAddrDelta(MCConstantExpr::create(0, Context));
+ return false;
+ }
+
SmallVectorImpl<char> &Data = DF.getContents();
+ uint64_t OldSize = Data.size();
Data.clear();
- raw_svector_ostream OSE(Data);
DF.getFixups().clear();
- MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE);
+ MCDwarfFrameEmitter::encodeAdvanceLoc(Context, Value, Data);
return OldSize != Data.size();
}
diff --git a/llvm/lib/MC/MCCodeEmitter.cpp b/llvm/lib/MC/MCCodeEmitter.cpp
index 0d114f12d58c..afbe31e0070c 100644
--- a/llvm/lib/MC/MCCodeEmitter.cpp
+++ b/llvm/lib/MC/MCCodeEmitter.cpp
@@ -7,9 +7,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
MCCodeEmitter::MCCodeEmitter() = default;
MCCodeEmitter::~MCCodeEmitter() = default;
+
+void MCCodeEmitter::encodeInstruction(const MCInst &Inst,
+ SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ raw_svector_ostream OS(CB);
+ encodeInstruction(Inst, OS, Fixups, STI);
+}
diff --git a/llvm/lib/MC/MCCodeView.cpp b/llvm/lib/MC/MCCodeView.cpp
index aec8fc89acb2..a27ef64bec0f 100644
--- a/llvm/lib/MC/MCCodeView.cpp
+++ b/llvm/lib/MC/MCCodeView.cpp
@@ -649,8 +649,6 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout,
const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(RangeBegin, Ctx);
const MCBinaryExpr *BE =
MCBinaryExpr::createAdd(SRE, MCConstantExpr::create(Bias, Ctx), Ctx);
- MCValue Res;
- BE->evaluateAsRelocatable(Res, &Layout, /*Fixup=*/nullptr);
// Each record begins with a 2-byte number indicating how large the record
// is.
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index 40e5e0f2ef24..c443f46e0242 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -85,7 +85,7 @@ MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai,
Env = IsMachO;
break;
case Triple::COFF:
- if (!TheTriple.isOSWindows())
+ if (!TheTriple.isOSWindows() && !TheTriple.isUEFI())
report_fatal_error(
"Cannot initialize MC for non-Windows COFF object files.");
@@ -211,19 +211,19 @@ MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
return Sym;
}
-MCSymbol *MCContext::getOrCreateFrameAllocSymbol(StringRef FuncName,
+MCSymbol *MCContext::getOrCreateFrameAllocSymbol(const Twine &FuncName,
unsigned Idx) {
- return getOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + FuncName +
+ return getOrCreateSymbol(MAI->getPrivateGlobalPrefix() + FuncName +
"$frame_escape_" + Twine(Idx));
}
-MCSymbol *MCContext::getOrCreateParentFrameOffsetSymbol(StringRef FuncName) {
- return getOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + FuncName +
+MCSymbol *MCContext::getOrCreateParentFrameOffsetSymbol(const Twine &FuncName) {
+ return getOrCreateSymbol(MAI->getPrivateGlobalPrefix() + FuncName +
"$parent_frame_offset");
}
-MCSymbol *MCContext::getOrCreateLSDASymbol(StringRef FuncName) {
- return getOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + "__ehtable$" +
+MCSymbol *MCContext::getOrCreateLSDASymbol(const Twine &FuncName) {
+ return getOrCreateSymbol(MAI->getPrivateGlobalPrefix() + "__ehtable$" +
FuncName);
}
@@ -259,8 +259,8 @@ MCSymbol *MCContext::createSymbolImpl(const StringMapEntry<bool> *Name,
return new (Name, *this)
MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary);
}
- return new (Name, *this) MCSymbol(MCSymbol::SymbolKindUnset, Name,
- IsTemporary);
+ return new (Name, *this)
+ MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary);
}
MCSymbol *MCContext::createSymbol(StringRef Name, bool AlwaysAddSuffix,
@@ -310,8 +310,12 @@ MCSymbol *MCContext::createNamedTempSymbol(const Twine &Name) {
}
MCSymbol *MCContext::createLinkerPrivateTempSymbol() {
+ return createLinkerPrivateSymbol("tmp");
+}
+
+MCSymbol *MCContext::createLinkerPrivateSymbol(const Twine &Name) {
SmallString<128> NameSV;
- raw_svector_ostream(NameSV) << MAI->getLinkerPrivateGlobalPrefix() << "tmp";
+ raw_svector_ostream(NameSV) << MAI->getLinkerPrivateGlobalPrefix() << Name;
return createSymbol(NameSV, true, false);
}
@@ -362,9 +366,8 @@ MCSymbol *MCContext::lookupSymbol(const Twine &Name) const {
return Symbols.lookup(NameRef);
}
-void MCContext::setSymbolValue(MCStreamer &Streamer,
- StringRef Sym,
- uint64_t Val) {
+void MCContext::setSymbolValue(MCStreamer &Streamer, const Twine &Sym,
+ uint64_t Val) {
auto Symbol = getOrCreateSymbol(Sym);
Streamer.emitAssignment(Symbol, MCConstantExpr::create(Val, *this));
}
@@ -498,14 +501,13 @@ MCSectionELF *MCContext::createELFSectionImpl(StringRef Section, unsigned Type,
return Ret;
}
-MCSectionELF *MCContext::createELFRelSection(const Twine &Name, unsigned Type,
- unsigned Flags, unsigned EntrySize,
- const MCSymbolELF *Group,
- const MCSectionELF *RelInfoSection) {
+MCSectionELF *
+MCContext::createELFRelSection(const Twine &Name, unsigned Type, unsigned Flags,
+ unsigned EntrySize, const MCSymbolELF *Group,
+ const MCSectionELF *RelInfoSection) {
StringMap<bool>::iterator I;
bool Inserted;
- std::tie(I, Inserted) =
- RelSecNames.insert(std::make_pair(Name.str(), true));
+ std::tie(I, Inserted) = RelSecNames.insert(std::make_pair(Name.str(), true));
return createELFSectionImpl(
I->getKey(), Type, Flags, SectionKind::getReadOnly(), EntrySize, Group,
@@ -669,7 +671,6 @@ MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
COMDATSymName = COMDATSymbol->getName();
}
-
// Do the lookup, if we have a hit, return it.
COFFSectionKey T{Section, COMDATSymName, Selection, UniqueID};
auto IterBool = COFFUniquingMap.insert(std::make_pair(T, nullptr));
@@ -849,9 +850,6 @@ MCSectionSPIRV *MCContext::getSPIRVSection() {
Result->getFragmentList().insert(Result->begin(), F);
F->setParent(Result);
- if (Begin)
- Begin->setFragment(F);
-
return Result;
}
@@ -884,11 +882,11 @@ MCSubtargetInfo &MCContext::getSubtargetCopy(const MCSubtargetInfo &STI) {
void MCContext::addDebugPrefixMapEntry(const std::string &From,
const std::string &To) {
- DebugPrefixMap.insert(std::make_pair(From, To));
+ DebugPrefixMap.emplace_back(From, To);
}
void MCContext::remapDebugPath(SmallVectorImpl<char> &Path) {
- for (const auto &[From, To] : DebugPrefixMap)
+ for (const auto &[From, To] : llvm::reverse(DebugPrefixMap))
if (llvm::sys::path::replace_path_prefix(Path, From, To))
break;
}
@@ -928,6 +926,12 @@ EmitDwarfUnwindType MCContext::emitDwarfUnwindInfo() const {
return TargetOptions->EmitDwarfUnwind;
}
+bool MCContext::emitCompactUnwindNonCanonical() const {
+ if (TargetOptions)
+ return TargetOptions->EmitCompactUnwindNonCanonical;
+ return false;
+}
+
void MCContext::setGenDwarfRootFile(StringRef InputFileName, StringRef Buffer) {
// MCDwarf needs the root file as well as the compilation directory.
// If we find a '.file 0' directive that will supersede these values.
diff --git a/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/llvm/lib/MC/MCDisassembler/Disassembler.cpp
index f0c61840e413..067b951fbfcc 100644
--- a/llvm/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/llvm/lib/MC/MCDisassembler/Disassembler.cpp
@@ -10,7 +10,6 @@
#include "llvm-c/Disassembler.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -29,6 +28,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <cstring>
diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index 30b8aadbf63c..55632f2fe76a 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -78,9 +78,12 @@ static inline uint64_t ScaleAddrDelta(MCContext &Context, uint64_t AddrDelta) {
MCDwarfLineStr::MCDwarfLineStr(MCContext &Ctx) {
UseRelocs = Ctx.getAsmInfo()->doesDwarfUseRelocationsAcrossSections();
- if (UseRelocs)
- LineStrLabel =
- Ctx.getObjectFileInfo()->getDwarfLineStrSection()->getBeginSymbol();
+ if (UseRelocs) {
+ MCSection *DwarfLineStrSection =
+ Ctx.getObjectFileInfo()->getDwarfLineStrSection();
+ assert(DwarfLineStrSection && "DwarfLineStrSection must not be NULL");
+ LineStrLabel = DwarfLineStrSection->getBeginSymbol();
+ }
}
//
@@ -347,10 +350,14 @@ SmallString<0> MCDwarfLineStr::getFinalizedData() {
return Data;
}
+size_t MCDwarfLineStr::addString(StringRef Path) {
+ return LineStrings.add(Path);
+}
+
void MCDwarfLineStr::emitRef(MCStreamer *MCOS, StringRef Path) {
int RefSize =
dwarf::getDwarfOffsetByteSize(MCOS->getContext().getDwarfFormat());
- size_t Offset = LineStrings.add(Path);
+ size_t Offset = addString(Path);
if (UseRelocs) {
MCContext &Ctx = MCOS->getContext();
MCOS->emitValue(makeStartPlusIntExpr(Ctx, *LineStrLabel, Offset), RefSize);
@@ -667,9 +674,8 @@ void MCDwarfLineAddr::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
int64_t LineDelta, uint64_t AddrDelta) {
MCContext &Context = MCOS->getContext();
SmallString<256> Tmp;
- raw_svector_ostream OS(Tmp);
- MCDwarfLineAddr::Encode(Context, Params, LineDelta, AddrDelta, OS);
- MCOS->emitBytes(OS.str());
+ MCDwarfLineAddr::encode(Context, Params, LineDelta, AddrDelta, Tmp);
+ MCOS->emitBytes(Tmp);
}
/// Given a special op, return the address skip amount (in units of
@@ -679,9 +685,10 @@ static uint64_t SpecialAddr(MCDwarfLineTableParams Params, uint64_t op) {
}
/// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas.
-void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params,
+void MCDwarfLineAddr::encode(MCContext &Context, MCDwarfLineTableParams Params,
int64_t LineDelta, uint64_t AddrDelta,
- raw_ostream &OS) {
+ SmallVectorImpl<char> &Out) {
+ uint8_t Buf[16];
uint64_t Temp, Opcode;
bool NeedCopy = false;
@@ -696,14 +703,14 @@ void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params,
// end_sequence to emit the matrix entry.
if (LineDelta == INT64_MAX) {
if (AddrDelta == MaxSpecialAddrDelta)
- OS << char(dwarf::DW_LNS_const_add_pc);
+ Out.push_back(dwarf::DW_LNS_const_add_pc);
else if (AddrDelta) {
- OS << char(dwarf::DW_LNS_advance_pc);
- encodeULEB128(AddrDelta, OS);
+ Out.push_back(dwarf::DW_LNS_advance_pc);
+ Out.append(Buf, Buf + encodeULEB128(AddrDelta, Buf));
}
- OS << char(dwarf::DW_LNS_extended_op);
- OS << char(1);
- OS << char(dwarf::DW_LNE_end_sequence);
+ Out.push_back(dwarf::DW_LNS_extended_op);
+ Out.push_back(1);
+ Out.push_back(dwarf::DW_LNE_end_sequence);
return;
}
@@ -714,8 +721,8 @@ void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params,
// it with DW_LNS_advance_line.
if (Temp >= Params.DWARF2LineRange ||
Temp + Params.DWARF2LineOpcodeBase > 255) {
- OS << char(dwarf::DW_LNS_advance_line);
- encodeSLEB128(LineDelta, OS);
+ Out.push_back(dwarf::DW_LNS_advance_line);
+ Out.append(Buf, Buf + encodeSLEB128(LineDelta, Buf));
LineDelta = 0;
Temp = 0 - Params.DWARF2LineBase;
@@ -724,7 +731,7 @@ void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params,
// Use DW_LNS_copy instead of a "line +0, addr +0" special opcode.
if (LineDelta == 0 && AddrDelta == 0) {
- OS << char(dwarf::DW_LNS_copy);
+ Out.push_back(dwarf::DW_LNS_copy);
return;
}
@@ -736,28 +743,28 @@ void MCDwarfLineAddr::Encode(MCContext &Context, MCDwarfLineTableParams Params,
// Try using a special opcode.
Opcode = Temp + AddrDelta * Params.DWARF2LineRange;
if (Opcode <= 255) {
- OS << char(Opcode);
+ Out.push_back(Opcode);
return;
}
// Try using DW_LNS_const_add_pc followed by special op.
Opcode = Temp + (AddrDelta - MaxSpecialAddrDelta) * Params.DWARF2LineRange;
if (Opcode <= 255) {
- OS << char(dwarf::DW_LNS_const_add_pc);
- OS << char(Opcode);
+ Out.push_back(dwarf::DW_LNS_const_add_pc);
+ Out.push_back(Opcode);
return;
}
}
// Otherwise use DW_LNS_advance_pc.
- OS << char(dwarf::DW_LNS_advance_pc);
- encodeULEB128(AddrDelta, OS);
+ Out.push_back(dwarf::DW_LNS_advance_pc);
+ Out.append(Buf, Buf + encodeULEB128(AddrDelta, Buf));
if (NeedCopy)
- OS << char(dwarf::DW_LNS_copy);
+ Out.push_back(dwarf::DW_LNS_copy);
else {
assert(Temp <= 255 && "Buggy special opcode encoding.");
- OS << char(Temp);
+ Out.push_back(Temp);
}
}
@@ -1473,7 +1480,7 @@ void FrameEmitterImpl::emitCFIInstructions(ArrayRef<MCCFIInstruction> Instrs,
if (BaseLabel && Label) {
MCSymbol *ThisSym = Label;
if (ThisSym != BaseLabel) {
- Streamer.emitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
+ Streamer.emitDwarfAdvanceFrameAddr(BaseLabel, ThisSym, Instr.getLoc());
BaseLabel = ThisSym;
}
}
@@ -1876,7 +1883,11 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
}
}
- if (!NeedsEHFrameSection) return;
+ // Compact unwind information can be emitted in the eh_frame section or the
+ // debug_frame section. Skip emitting FDEs and CIEs when the compact unwind
+ // doesn't need an eh_frame section and the emission location is the eh_frame
+ // section.
+ if (!NeedsEHFrameSection && IsEH) return;
MCSection &Section =
IsEH ? *const_cast<MCObjectFileInfo *>(MOFI)->getEHFrameSection()
@@ -1903,9 +1914,13 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
const MCDwarfFrameInfo &Frame = *I;
++I;
if (CanOmitDwarf && Frame.CompactUnwindEncoding !=
- MOFI->getCompactUnwindDwarfEHFrameOnly())
- // Don't generate an EH frame if we don't need one. I.e., it's taken care
- // of by the compact unwind encoding.
+ MOFI->getCompactUnwindDwarfEHFrameOnly() && IsEH)
+ // CIEs and FDEs can be emitted in either the eh_frame section or the
+ // debug_frame section, on some platforms (e.g. AArch64) the target object
+ // file supports emitting a compact_unwind section without an associated
+ // eh_frame section. If the eh_frame section is not needed, and the
+ // location where the CIEs and FDEs are to be emitted is the eh_frame
+ // section, do not emit anything.
continue;
CIEKey Key(Frame);
@@ -1917,18 +1932,9 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB,
}
}
-void MCDwarfFrameEmitter::EmitAdvanceLoc(MCObjectStreamer &Streamer,
- uint64_t AddrDelta) {
- MCContext &Context = Streamer.getContext();
- SmallString<256> Tmp;
- raw_svector_ostream OS(Tmp);
- MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OS);
- Streamer.emitBytes(OS.str());
-}
-
-void MCDwarfFrameEmitter::EncodeAdvanceLoc(MCContext &Context,
+void MCDwarfFrameEmitter::encodeAdvanceLoc(MCContext &Context,
uint64_t AddrDelta,
- raw_ostream &OS) {
+ SmallVectorImpl<char> &Out) {
// Scale the address delta by the minimum instruction length.
AddrDelta = ScaleAddrDelta(Context, AddrDelta);
if (AddrDelta == 0)
@@ -1939,16 +1945,16 @@ void MCDwarfFrameEmitter::EncodeAdvanceLoc(MCContext &Context,
if (isUIntN(6, AddrDelta)) {
uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta;
- OS << Opcode;
+ Out.push_back(Opcode);
} else if (isUInt<8>(AddrDelta)) {
- OS << uint8_t(dwarf::DW_CFA_advance_loc1);
- OS << uint8_t(AddrDelta);
+ Out.push_back(dwarf::DW_CFA_advance_loc1);
+ Out.push_back(AddrDelta);
} else if (isUInt<16>(AddrDelta)) {
- OS << uint8_t(dwarf::DW_CFA_advance_loc2);
- support::endian::write<uint16_t>(OS, AddrDelta, E);
+ Out.push_back(dwarf::DW_CFA_advance_loc2);
+ support::endian::write<uint16_t>(Out, AddrDelta, E);
} else {
assert(isUInt<32>(AddrDelta));
- OS << uint8_t(dwarf::DW_CFA_advance_loc4);
- support::endian::write<uint32_t>(OS, AddrDelta, E);
+ Out.push_back(dwarf::DW_CFA_advance_loc4);
+ support::endian::write<uint32_t>(Out, AddrDelta, E);
}
}
diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp
index 380576f4b78b..653ff4e9435a 100644
--- a/llvm/lib/MC/MCELFStreamer.cpp
+++ b/llvm/lib/MC/MCELFStreamer.cpp
@@ -216,6 +216,7 @@ bool MCELFStreamer::emitSymbolAttribute(MCSymbol *S, MCSymbolAttr Attribute) {
case MCSA_Invalid:
case MCSA_IndirectSymbol:
case MCSA_Exported:
+ case MCSA_WeakAntiDep:
return false;
case MCSA_NoDeadStrip:
@@ -551,8 +552,7 @@ void MCELFStreamer::emitInstToData(const MCInst &Inst,
MCAssembler &Assembler = getAssembler();
SmallVector<MCFixup, 4> Fixups;
SmallString<256> Code;
- raw_svector_ostream VecOS(Code);
- Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
+ Assembler.getEmitter().encodeInstruction(Inst, Code, Fixups, STI);
for (auto &Fixup : Fixups)
fixSymbolsInTLSFixups(Fixup.getValue());
@@ -628,6 +628,9 @@ void MCELFStreamer::emitInstToData(const MCInst &Inst,
}
DF->setHasInstructions(STI);
+ if (!Fixups.empty() && Fixups.back().getTargetKind() ==
+ getAssembler().getBackend().RelaxFixupKind)
+ DF->setLinkerRelaxable();
DF->getContents().append(Code.begin(), Code.end());
if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) {
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 45a3d938257a..a7b980553af0 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -327,6 +327,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
return "gd";
case VK_PPC_AIX_TLSGDM:
return "m";
+ case VK_PPC_AIX_TLSLE:
+ return "le";
case VK_PPC_GOT_TLSLD: return "got@tlsld";
case VK_PPC_GOT_TLSLD_LO: return "got@tlsld@l";
case VK_PPC_GOT_TLSLD_HI: return "got@tlsld@h";
@@ -360,6 +362,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_WASM_TLSREL: return "TLSREL";
case VK_WASM_TBREL: return "TBREL";
case VK_WASM_GOT_TLS: return "GOT@TLS";
+ case VK_WASM_FUNCINDEX: return "FUNCINDEX";
case VK_AMDGPU_GOTPCREL32_LO: return "gotpcrel32@lo";
case VK_AMDGPU_GOTPCREL32_HI: return "gotpcrel32@hi";
case VK_AMDGPU_REL32_LO: return "rel32@lo";
@@ -503,6 +506,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("mbrel", VK_WASM_MBREL)
.Case("tlsrel", VK_WASM_TLSREL)
.Case("got@tls", VK_WASM_GOT_TLS)
+ .Case("funcindex", VK_WASM_FUNCINDEX)
.Case("gotpcrel32@lo", VK_AMDGPU_GOTPCREL32_LO)
.Case("gotpcrel32@hi", VK_AMDGPU_GOTPCREL32_HI)
.Case("rel32@lo", VK_AMDGPU_REL32_LO)
@@ -619,21 +623,25 @@ static void AttemptToFoldSymbolOffsetDifference(
const MCFragment *FA = SA.getFragment();
const MCFragment *FB = SB.getFragment();
- // If both symbols are in the same fragment, return the difference of their
- // offsets
- if (FA == FB && !SA.isVariable() && !SA.isUnset() && !SB.isVariable() &&
- !SB.isUnset()) {
- Addend += SA.getOffset() - SB.getOffset();
- return FinalizeFolding();
- }
-
const MCSection &SecA = *FA->getParent();
const MCSection &SecB = *FB->getParent();
-
if ((&SecA != &SecB) && !Addrs)
return;
- if (Layout) {
+ // When layout is available, we can generally compute the difference using the
+ // getSymbolOffset path, which also avoids the possible slow fragment walk.
+ // However, linker relaxation may cause incorrect fold of A-B if A and B are
+ // separated by a linker-relaxable instruction. If the section contains
+ // instructions and InSet is false (not expressions in directive like
+ // .size/.fill), disable the fast path.
+ if (Layout && (InSet || !SecA.hasInstructions() ||
+ !Asm->getContext().getTargetTriple().isRISCV())) {
+ // If both symbols are in the same fragment, return the difference of their
+ // offsets. canGetFragmentOffset(FA) may be false.
+ if (FA == FB && !SA.isVariable() && !SB.isVariable()) {
+ Addend += SA.getOffset() - SB.getOffset();
+ return FinalizeFolding();
+ }
// One of the symbol involved is part of a fragment being laid out. Quit now
// to avoid a self loop.
if (!Layout->canGetFragmentOffset(FA) || !Layout->canGetFragmentOffset(FB))
@@ -654,24 +662,65 @@ static void AttemptToFoldSymbolOffsetDifference(
// this is important when the Subtarget is changed and a new MCDataFragment
// is created in the case of foo: instr; .arch_extension ext; instr .if . -
// foo.
- if (SA.isVariable() || SA.isUnset() || SB.isVariable() || SB.isUnset() ||
- FA->getKind() != MCFragment::FT_Data ||
- FB->getKind() != MCFragment::FT_Data ||
+ if (SA.isVariable() || SB.isVariable() ||
FA->getSubsectionNumber() != FB->getSubsectionNumber())
return;
+
// Try to find a constant displacement from FA to FB, add the displacement
// between the offset in FA of SA and the offset in FB of SB.
+ bool Reverse = false;
+ if (FA == FB) {
+ Reverse = SA.getOffset() < SB.getOffset();
+ } else if (!isa<MCDummyFragment>(FA)) {
+ Reverse = std::find_if(std::next(FA->getIterator()), SecA.end(),
+ [&](auto &I) { return &I == FB; }) != SecA.end();
+ }
+
+ uint64_t SAOffset = SA.getOffset(), SBOffset = SB.getOffset();
int64_t Displacement = SA.getOffset() - SB.getOffset();
+ if (Reverse) {
+ std::swap(FA, FB);
+ std::swap(SAOffset, SBOffset);
+ Displacement *= -1;
+ }
+
+ [[maybe_unused]] bool Found = false;
+ // Track whether B is before a relaxable instruction and whether A is after
+ // a relaxable instruction. If SA and SB are separated by a linker-relaxable
+ // instruction, the difference cannot be resolved as it may be changed by
+ // the linker.
+ bool BBeforeRelax = false, AAfterRelax = false;
for (auto FI = FB->getIterator(), FE = SecA.end(); FI != FE; ++FI) {
+ auto DF = dyn_cast<MCDataFragment>(FI);
+ if (DF && DF->isLinkerRelaxable()) {
+ if (&*FI != FB || SBOffset != DF->getContents().size())
+ BBeforeRelax = true;
+ if (&*FI != FA || SAOffset == DF->getContents().size())
+ AAfterRelax = true;
+ if (BBeforeRelax && AAfterRelax)
+ return;
+ }
if (&*FI == FA) {
- Addend += Displacement;
- return FinalizeFolding();
+ Found = true;
+ break;
}
- if (FI->getKind() != MCFragment::FT_Data)
+ int64_t Num;
+ if (DF) {
+ Displacement += DF->getContents().size();
+ } else if (auto *FF = dyn_cast<MCFillFragment>(FI);
+ FF && FF->getNumValues().evaluateAsAbsolute(Num)) {
+ Displacement += Num * FF->getValueSize();
+ } else {
return;
- Displacement += cast<MCDataFragment>(FI)->getContents().size();
+ }
}
+ // If the previous loop does not find FA, FA must be a dummy fragment not in
+ // the fragment list (which means SA is a pending label (see
+ // flushPendingLabels)). In either case, we can resolve the difference.
+ assert(Found || isa<MCDummyFragment>(FA));
+ Addend += Reverse ? -Displacement : Displacement;
+ FinalizeFolding();
}
}
@@ -761,6 +810,9 @@ bool MCExpr::evaluateAsValue(MCValue &Res, const MCAsmLayout &Layout) const {
}
static bool canExpand(const MCSymbol &Sym, bool InSet) {
+ if (Sym.isWeakExternal())
+ return false;
+
const MCExpr *Expr = Sym.getVariableValue();
const auto *Inner = dyn_cast<MCSymbolRefExpr>(Expr);
if (Inner) {
@@ -885,18 +937,19 @@ bool MCExpr::evaluateAsRelocatableImpl(MCValue &Res, const MCAssembler *Asm,
!ABE->getRHS()->evaluateAsRelocatableImpl(RHSValue, Asm, Layout, Fixup,
Addrs, InSet)) {
// Check if both are Target Expressions, see if we can compare them.
- if (const MCTargetExpr *L = dyn_cast<MCTargetExpr>(ABE->getLHS()))
- if (const MCTargetExpr *R = cast<MCTargetExpr>(ABE->getRHS())) {
- switch (ABE->getOpcode()) {
- case MCBinaryExpr::EQ:
- Res = MCValue::get((L->isEqualTo(R)) ? -1 : 0);
- return true;
- case MCBinaryExpr::NE:
- Res = MCValue::get((R->isEqualTo(R)) ? 0 : -1);
- return true;
- default: break;
- }
+ if (const MCTargetExpr *L = dyn_cast<MCTargetExpr>(ABE->getLHS())) {
+ const MCTargetExpr *R = cast<MCTargetExpr>(ABE->getRHS());
+ switch (ABE->getOpcode()) {
+ case MCBinaryExpr::EQ:
+ Res = MCValue::get(L->isEqualTo(R) ? -1 : 0);
+ return true;
+ case MCBinaryExpr::NE:
+ Res = MCValue::get(L->isEqualTo(R) ? 0 : -1);
+ return true;
+ default:
+ break;
}
+ }
return false;
}
diff --git a/llvm/lib/MC/MCInstrDesc.cpp b/llvm/lib/MC/MCInstrDesc.cpp
index b800456edc68..45c5ea73f7f6 100644
--- a/llvm/lib/MC/MCInstrDesc.cpp
+++ b/llvm/lib/MC/MCInstrDesc.cpp
@@ -40,7 +40,7 @@ bool MCInstrDesc::hasImplicitDefOfPhysReg(unsigned Reg,
bool MCInstrDesc::hasDefOfPhysReg(const MCInst &MI, unsigned Reg,
const MCRegisterInfo &RI) const {
for (int i = 0, e = NumDefs; i != e; ++i)
- if (MI.getOperand(i).isReg() &&
+ if (MI.getOperand(i).isReg() && MI.getOperand(i).getReg() &&
RI.isSubRegisterEq(Reg, MI.getOperand(i).getReg()))
return true;
if (variadicOpsAreDefs())
diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp
index 699742f96db8..d7d343f15eaa 100644
--- a/llvm/lib/MC/MCMachOStreamer.cpp
+++ b/llvm/lib/MC/MCMachOStreamer.cpp
@@ -31,7 +31,6 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <vector>
@@ -359,6 +358,7 @@ bool MCMachOStreamer::emitSymbolAttribute(MCSymbol *Sym,
case MCSA_LGlobal:
case MCSA_Exported:
case MCSA_Memtag:
+ case MCSA_WeakAntiDep:
return false;
case MCSA_Global:
@@ -486,8 +486,7 @@ void MCMachOStreamer::emitInstToData(const MCInst &Inst,
SmallVector<MCFixup, 4> Fixups;
SmallString<256> Code;
- raw_svector_ostream VecOS(Code);
- getAssembler().getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
+ getAssembler().getEmitter().encodeInstruction(Inst, Code, Fixups, STI);
// Add the fixups and data.
for (MCFixup &Fixup : Fixups) {
@@ -536,9 +535,7 @@ void MCMachOStreamer::finishImpl() {
void MCMachOStreamer::finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE) {
const MCSymbol *S = &SRE->getSymbol();
- bool Created;
- getAssembler().registerSymbol(*S, &Created);
- if (Created)
+ if (getAssembler().registerSymbol(*S))
S->setExternal(true);
}
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index 7af00b71677c..0b5109e41e71 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -8,7 +8,6 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/Wasm.h"
@@ -24,6 +23,7 @@
#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/Support/Casting.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
@@ -48,6 +48,10 @@ static bool useCompactUnwind(const Triple &T) {
if (T.isiOS() && T.isX86())
return true;
+ // The rest of the simulators always have it.
+ if (T.isSimulatorEnvironment())
+ return true;
+
return false;
}
@@ -62,7 +66,8 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
SectionKind::getReadOnly());
if (T.isOSDarwin() &&
- (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32))
+ (T.getArch() == Triple::aarch64 || T.getArch() == Triple::aarch64_32 ||
+ T.isSimulatorEnvironment()))
SupportsCompactUnwindWithoutEHFrame = true;
switch (Ctx->emitDwarfUnwindInfo()) {
@@ -249,7 +254,7 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
SectionKind::getMetadata(), "section_line_str");
DwarfFrameSection =
Ctx->getMachOSection("__DWARF", "__debug_frame", MachO::S_ATTR_DEBUG,
- SectionKind::getMetadata());
+ SectionKind::getMetadata(), "section_frame");
DwarfPubNamesSection =
Ctx->getMachOSection("__DWARF", "__debug_pubnames", MachO::S_ATTR_DEBUG,
SectionKind::getMetadata());
@@ -542,6 +547,8 @@ void MCObjectFileInfo::initGOFFMCObjectFileInfo(const Triple &T) {
PPA1Section =
Ctx->getGOFFSection(".ppa1", SectionKind::getMetadata(), TextSection,
MCConstantExpr::create(GOFF::SK_PPA1, *Ctx));
+ ADASection =
+ Ctx->getGOFFSection(".ada", SectionKind::getData(), nullptr, nullptr);
}
void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
@@ -918,10 +925,10 @@ void MCObjectFileInfo::initWasmMCObjectFileInfo(const Triple &T) {
void MCObjectFileInfo::initXCOFFMCObjectFileInfo(const Triple &T) {
// The default csect for program code. Functions without a specified section
// get placed into this csect. The choice of csect name is not a property of
- // the ABI or object file format. For example, the XL compiler uses an unnamed
- // csect for program code.
+ // the ABI or object file format, but various tools rely on the section
+ // name being empty (considering named symbols to be "user symbol names").
TextSection = Ctx->getXCOFFSection(
- ".text", SectionKind::getText(),
+ "", SectionKind::getText(),
XCOFF::CsectProperties(XCOFF::StorageMappingClass::XMC_PR, XCOFF::XTY_SD),
/* MultiSymbolsAllowed*/ true);
@@ -1165,18 +1172,20 @@ MCObjectFileInfo::getKCFITrapSection(const MCSection &TextSec) const {
MCSection *
MCObjectFileInfo::getPseudoProbeSection(const MCSection &TextSec) const {
- if (Ctx->getObjectFileType() == MCContext::IsELF) {
- const auto &ElfSec = static_cast<const MCSectionELF &>(TextSec);
- // Create a separate section for probes that comes with a comdat function.
- if (const MCSymbol *Group = ElfSec.getGroup()) {
- auto *S = static_cast<MCSectionELF *>(PseudoProbeSection);
- auto Flags = S->getFlags() | ELF::SHF_GROUP;
- return Ctx->getELFSection(S->getName(), S->getType(), Flags,
- S->getEntrySize(), Group->getName(),
- /*IsComdat=*/true);
- }
+ if (Ctx->getObjectFileType() != MCContext::IsELF)
+ return PseudoProbeSection;
+
+ const auto &ElfSec = static_cast<const MCSectionELF &>(TextSec);
+ unsigned Flags = ELF::SHF_LINK_ORDER;
+ StringRef GroupName;
+ if (const MCSymbol *Group = ElfSec.getGroup()) {
+ GroupName = Group->getName();
+ Flags |= ELF::SHF_GROUP;
}
- return PseudoProbeSection;
+
+ return Ctx->getELFSection(PseudoProbeSection->getName(), ELF::SHT_PROGBITS,
+ Flags, 0, GroupName, true, ElfSec.getUniqueID(),
+ cast<MCSymbolELF>(TextSec.getBeginSymbol()));
}
MCSection *
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index ac90325bcc52..3cf7b4359cab 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -68,6 +68,7 @@ void MCObjectStreamer::addPendingLabel(MCSymbol* S) {
}
void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
+ assert(F);
MCSection *CurSection = getCurrentSectionOnly();
if (!CurSection) {
assert(PendingLabels.empty());
@@ -80,12 +81,8 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) {
PendingLabels.clear();
}
- // Associate a fragment with this label, either the supplied fragment
- // or an empty data fragment.
- if (F)
- CurSection->flushPendingLabels(F, FOffset, CurSubsectionIdx);
- else
- CurSection->flushPendingLabels(nullptr, 0, CurSubsectionIdx);
+ // Associate the labels with F.
+ CurSection->flushPendingLabels(F, FOffset, CurSubsectionIdx);
}
void MCObjectStreamer::flushPendingLabels() {
@@ -214,6 +211,11 @@ static bool canReuseDataFragment(const MCDataFragment &F,
const MCSubtargetInfo *STI) {
if (!F.hasInstructions())
return true;
+ // Do not add data after a linker-relaxable instruction. The difference
+ // between a new label and a label at or before the linker-relaxable
+ // instruction cannot be resolved at assemble-time.
+ if (F.isLinkerRelaxable())
+ return false;
// When bundling is enabled, we don't want to add data to a fragment that
// already has instructions (see MCELFStreamer::emitInstToData for details)
if (Assembler.isBundlingEnabled())
@@ -376,10 +378,16 @@ bool MCObjectStreamer::changeSectionImpl(MCSection *Section,
int64_t IntSubsection = 0;
if (Subsection &&
- !Subsection->evaluateAsAbsolute(IntSubsection, getAssemblerPtr()))
- report_fatal_error("Cannot evaluate subsection number");
- if (IntSubsection < 0 || IntSubsection > 8192)
- report_fatal_error("Subsection number out of range");
+ !Subsection->evaluateAsAbsolute(IntSubsection, getAssemblerPtr())) {
+ getContext().reportError(Subsection->getLoc(),
+ "cannot evaluate subsection number");
+ }
+ if (!isUInt<31>(IntSubsection)) {
+ getContext().reportError(Subsection->getLoc(),
+ "subsection number " + Twine(IntSubsection) +
+ " is not within [0,2147483647]");
+ }
+
CurSubsectionIdx = unsigned(IntSubsection);
CurInsertionPoint =
Section->getSubsectionInsertionPoint(CurSubsectionIdx);
@@ -471,8 +479,7 @@ void MCObjectStreamer::emitInstToFragment(const MCInst &Inst,
insert(IF);
SmallString<128> Code;
- raw_svector_ostream VecOS(Code);
- getAssembler().getEmitter().encodeInstruction(Inst, VecOS, IF->getFixups(),
+ getAssembler().getEmitter().encodeInstruction(Inst, Code, IF->getFixups(),
STI);
IF->getContents().append(Code.begin(), Code.end());
}
@@ -508,13 +515,13 @@ void MCObjectStreamer::emitDwarfLocDirective(unsigned FileNo, unsigned Line,
}
static const MCExpr *buildSymbolDiff(MCObjectStreamer &OS, const MCSymbol *A,
- const MCSymbol *B) {
+ const MCSymbol *B, SMLoc Loc) {
MCContext &Context = OS.getContext();
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
const MCExpr *ARef = MCSymbolRefExpr::create(A, Variant, Context);
const MCExpr *BRef = MCSymbolRefExpr::create(B, Variant, Context);
const MCExpr *AddrDelta =
- MCBinaryExpr::create(MCBinaryExpr::Sub, ARef, BRef, Context);
+ MCBinaryExpr::create(MCBinaryExpr::Sub, ARef, BRef, Context, Loc);
return AddrDelta;
}
@@ -541,13 +548,7 @@ void MCObjectStreamer::emitDwarfAdvanceLineAddr(int64_t LineDelta,
Label, PointerSize);
return;
}
- const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel);
- int64_t Res;
- if (AddrDelta->evaluateAsAbsolute(Res, getAssemblerPtr())) {
- MCDwarfLineAddr::Emit(this, Assembler->getDWARFLinetableParams(), LineDelta,
- Res);
- return;
- }
+ const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel, SMLoc());
insert(new MCDwarfLineAddrFragment(LineDelta, *AddrDelta));
}
@@ -570,14 +571,10 @@ void MCObjectStreamer::emitDwarfLineEndEntry(MCSection *Section,
}
void MCObjectStreamer::emitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
- const MCSymbol *Label) {
- const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel);
- int64_t Res;
- if (AddrDelta->evaluateAsAbsolute(Res, getAssemblerPtr())) {
- MCDwarfFrameEmitter::EmitAdvanceLoc(*this, Res);
- return;
- }
- insert(new MCDwarfCallFrameFragment(*AddrDelta));
+ const MCSymbol *Label,
+ SMLoc Loc) {
+ const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel, Loc);
+ insert(new MCDwarfCallFrameFragment(*AddrDelta, nullptr));
}
void MCObjectStreamer::emitCVLocDirective(unsigned FunctionId, unsigned FileNo,
diff --git a/llvm/lib/MC/MCObjectWriter.cpp b/llvm/lib/MC/MCObjectWriter.cpp
index 89ff5800da5b..559aff130d88 100644
--- a/llvm/lib/MC/MCObjectWriter.cpp
+++ b/llvm/lib/MC/MCObjectWriter.cpp
@@ -28,9 +28,7 @@ bool MCObjectWriter::isSymbolRefDifferenceFullyResolved(
const MCSymbol &SA = A->getSymbol();
const MCSymbol &SB = B->getSymbol();
- if (SA.isUndefined() || SB.isUndefined())
- return false;
-
+ assert(!SA.isUndefined() && !SB.isUndefined());
if (!SA.getFragment() || !SB.getFragment())
return false;
diff --git a/llvm/lib/MC/MCParser/AsmLexer.cpp b/llvm/lib/MC/MCParser/AsmLexer.cpp
index 19300e3885bb..f13549b24e2d 100644
--- a/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -578,7 +578,7 @@ AsmToken AsmLexer::LexSingleQuote() {
} else if (peekNextChar() == '\'') {
// In MASM single-quote strings, doubled single-quotes mean an escaped
// single quote, so should be lexed in.
- getNextChar();
+ (void)getNextChar();
CurChar = getNextChar();
} else {
break;
@@ -635,7 +635,7 @@ AsmToken AsmLexer::LexQuote() {
} else if (peekNextChar() == '"') {
// In MASM double-quoted strings, doubled double-quotes mean an escaped
// double quote, so should be lexed in.
- getNextChar();
+ (void)getNextChar();
CurChar = getNextChar();
} else {
break;
@@ -776,9 +776,11 @@ AsmToken AsmLexer::LexToken() {
IsAtStartOfStatement = false;
switch (CurChar) {
default:
- // Handle identifier: [a-zA-Z_.?][a-zA-Z0-9_$.@#?]*
- if (isalpha(CurChar) || CurChar == '_' || CurChar == '.' ||
- (MAI.doesAllowQuestionAtStartOfIdentifier() && CurChar == '?'))
+ // Handle identifier: [a-zA-Z_.$@#?][a-zA-Z0-9_.$@#?]*
+ // Whether or not the lexer accepts '$', '@', '#' and '?' at the start of
+ // an identifier is target-dependent. These characters are handled in the
+ // respective switch cases.
+ if (isalpha(CurChar) || CurChar == '_' || CurChar == '.')
return LexIdentifier();
// Unknown character, emit an error.
@@ -830,11 +832,18 @@ AsmToken AsmLexer::LexToken() {
return LexIdentifier();
return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
}
- case '@': {
+ case '@':
if (MAI.doesAllowAtAtStartOfIdentifier())
return LexIdentifier();
return AsmToken(AsmToken::At, StringRef(TokStart, 1));
- }
+ case '#':
+ if (MAI.doesAllowHashAtStartOfIdentifier())
+ return LexIdentifier();
+ return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
+ case '?':
+ if (MAI.doesAllowQuestionAtStartOfIdentifier())
+ return LexIdentifier();
+ return AsmToken(AsmToken::Question, StringRef(TokStart, 1));
case '\\': return AsmToken(AsmToken::BackSlash, StringRef(TokStart, 1));
case '=':
if (*CurPtr == '=') {
@@ -914,11 +923,6 @@ AsmToken AsmLexer::LexToken() {
case '/':
IsAtStartOfStatement = OldIsAtStartOfStatement;
return LexSlash();
- case '#': {
- if (MAI.doesAllowHashAtStartOfIdentifier())
- return LexIdentifier();
- return AsmToken(AsmToken::Hash, StringRef(TokStart, 1));
- }
case '\'': return LexSingleQuote();
case '"': return LexQuote();
case '0': case '1': case '2': case '3': case '4':
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index bd4da1b282dd..04590ed57a9f 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -601,25 +601,25 @@ private:
// .cfi directives
bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
- bool parseDirectiveCFIWindowSave();
+ bool parseDirectiveCFIWindowSave(SMLoc DirectiveLoc);
bool parseDirectiveCFISections();
bool parseDirectiveCFIStartProc();
bool parseDirectiveCFIEndProc();
- bool parseDirectiveCFIDefCfaOffset();
+ bool parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc);
bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
- bool parseDirectiveCFIAdjustCfaOffset();
+ bool parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc);
bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
bool parseDirectiveCFILLVMDefAspaceCfa(SMLoc DirectiveLoc);
bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
- bool parseDirectiveCFIRememberState();
- bool parseDirectiveCFIRestoreState();
+ bool parseDirectiveCFIRememberState(SMLoc DirectiveLoc);
+ bool parseDirectiveCFIRestoreState(SMLoc DirectiveLoc);
bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
- bool parseDirectiveCFIEscape();
+ bool parseDirectiveCFIEscape(SMLoc DirectiveLoc);
bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
- bool parseDirectiveCFISignalFrame();
+ bool parseDirectiveCFISignalFrame(SMLoc DirectiveLoc);
bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
// macro directives
@@ -1999,20 +1999,12 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
getTargetParser().flushPendingInstructions(getStreamer());
- SMLoc StartTokLoc = getTok().getLoc();
- bool TPDirectiveReturn = getTargetParser().ParseDirective(ID);
-
- if (hasPendingError())
- return true;
- // Currently the return value should be true if we are
- // uninterested but as this is at odds with the standard parsing
- // convention (return true = error) we have instances of a parsed
- // directive that fails returning true as an error. Catch these
- // cases as best as possible errors here.
- if (TPDirectiveReturn && StartTokLoc != getTok().getLoc())
+ ParseStatus TPDirectiveReturn = getTargetParser().parseDirective(ID);
+ assert(TPDirectiveReturn.isFailure() == hasPendingError() &&
+ "Should only return Failure iff there was an error");
+ if (TPDirectiveReturn.isFailure())
return true;
- // Return if we did some parsing or believe we succeeded.
- if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc())
+ if (TPDirectiveReturn.isSuccess())
return false;
// Next, check the extension directive map to see if any extension has
@@ -2195,9 +2187,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
case DK_CFI_DEF_CFA:
return parseDirectiveCFIDefCfa(IDLoc);
case DK_CFI_DEF_CFA_OFFSET:
- return parseDirectiveCFIDefCfaOffset();
+ return parseDirectiveCFIDefCfaOffset(IDLoc);
case DK_CFI_ADJUST_CFA_OFFSET:
- return parseDirectiveCFIAdjustCfaOffset();
+ return parseDirectiveCFIAdjustCfaOffset(IDLoc);
case DK_CFI_DEF_CFA_REGISTER:
return parseDirectiveCFIDefCfaRegister(IDLoc);
case DK_CFI_LLVM_DEF_ASPACE_CFA:
@@ -2211,25 +2203,25 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
case DK_CFI_LSDA:
return parseDirectiveCFIPersonalityOrLsda(false);
case DK_CFI_REMEMBER_STATE:
- return parseDirectiveCFIRememberState();
+ return parseDirectiveCFIRememberState(IDLoc);
case DK_CFI_RESTORE_STATE:
- return parseDirectiveCFIRestoreState();
+ return parseDirectiveCFIRestoreState(IDLoc);
case DK_CFI_SAME_VALUE:
return parseDirectiveCFISameValue(IDLoc);
case DK_CFI_RESTORE:
return parseDirectiveCFIRestore(IDLoc);
case DK_CFI_ESCAPE:
- return parseDirectiveCFIEscape();
+ return parseDirectiveCFIEscape(IDLoc);
case DK_CFI_RETURN_COLUMN:
return parseDirectiveCFIReturnColumn(IDLoc);
case DK_CFI_SIGNAL_FRAME:
- return parseDirectiveCFISignalFrame();
+ return parseDirectiveCFISignalFrame(IDLoc);
case DK_CFI_UNDEFINED:
return parseDirectiveCFIUndefined(IDLoc);
case DK_CFI_REGISTER:
return parseDirectiveCFIRegister(IDLoc);
case DK_CFI_WINDOW_SAVE:
- return parseDirectiveCFIWindowSave();
+ return parseDirectiveCFIWindowSave(IDLoc);
case DK_MACROS_ON:
case DK_MACROS_OFF:
return parseDirectiveMacrosOnOff(IDVal);
@@ -3451,7 +3443,7 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
Alignment = 1;
else if (!isPowerOf2_64(Alignment)) {
ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2");
- Alignment = PowerOf2Floor(Alignment);
+ Alignment = llvm::bit_floor<uint64_t>(Alignment);
}
if (!isUInt<32>(Alignment)) {
ReturnVal |= Error(AlignmentLoc, "alignment must be smaller than 2**32");
@@ -4238,18 +4230,18 @@ bool AsmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
parseAbsoluteExpression(Offset) || parseEOL())
return true;
- getStreamer().emitCFIDefCfa(Register, Offset);
+ getStreamer().emitCFIDefCfa(Register, Offset, DirectiveLoc);
return false;
}
/// parseDirectiveCFIDefCfaOffset
/// ::= .cfi_def_cfa_offset offset
-bool AsmParser::parseDirectiveCFIDefCfaOffset() {
+bool AsmParser::parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc) {
int64_t Offset = 0;
if (parseAbsoluteExpression(Offset) || parseEOL())
return true;
- getStreamer().emitCFIDefCfaOffset(Offset);
+ getStreamer().emitCFIDefCfaOffset(Offset, DirectiveLoc);
return false;
}
@@ -4261,27 +4253,27 @@ bool AsmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
parseRegisterOrRegisterNumber(Register2, DirectiveLoc) || parseEOL())
return true;
- getStreamer().emitCFIRegister(Register1, Register2);
+ getStreamer().emitCFIRegister(Register1, Register2, DirectiveLoc);
return false;
}
/// parseDirectiveCFIWindowSave
/// ::= .cfi_window_save
-bool AsmParser::parseDirectiveCFIWindowSave() {
+bool AsmParser::parseDirectiveCFIWindowSave(SMLoc DirectiveLoc) {
if (parseEOL())
return true;
- getStreamer().emitCFIWindowSave();
+ getStreamer().emitCFIWindowSave(DirectiveLoc);
return false;
}
/// parseDirectiveCFIAdjustCfaOffset
/// ::= .cfi_adjust_cfa_offset adjustment
-bool AsmParser::parseDirectiveCFIAdjustCfaOffset() {
+bool AsmParser::parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc) {
int64_t Adjustment = 0;
if (parseAbsoluteExpression(Adjustment) || parseEOL())
return true;
- getStreamer().emitCFIAdjustCfaOffset(Adjustment);
+ getStreamer().emitCFIAdjustCfaOffset(Adjustment, DirectiveLoc);
return false;
}
@@ -4292,7 +4284,7 @@ bool AsmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) {
if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || parseEOL())
return true;
- getStreamer().emitCFIDefCfaRegister(Register);
+ getStreamer().emitCFIDefCfaRegister(Register, DirectiveLoc);
return false;
}
@@ -4305,7 +4297,8 @@ bool AsmParser::parseDirectiveCFILLVMDefAspaceCfa(SMLoc DirectiveLoc) {
parseAbsoluteExpression(AddressSpace) || parseEOL())
return true;
- getStreamer().emitCFILLVMDefAspaceCfa(Register, Offset, AddressSpace);
+ getStreamer().emitCFILLVMDefAspaceCfa(Register, Offset, AddressSpace,
+ DirectiveLoc);
return false;
}
@@ -4319,7 +4312,7 @@ bool AsmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) {
parseAbsoluteExpression(Offset) || parseEOL())
return true;
- getStreamer().emitCFIOffset(Register, Offset);
+ getStreamer().emitCFIOffset(Register, Offset, DirectiveLoc);
return false;
}
@@ -4332,7 +4325,7 @@ bool AsmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
parseAbsoluteExpression(Offset) || parseEOL())
return true;
- getStreamer().emitCFIRelOffset(Register, Offset);
+ getStreamer().emitCFIRelOffset(Register, Offset, DirectiveLoc);
return false;
}
@@ -4387,19 +4380,19 @@ bool AsmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
/// parseDirectiveCFIRememberState
/// ::= .cfi_remember_state
-bool AsmParser::parseDirectiveCFIRememberState() {
+bool AsmParser::parseDirectiveCFIRememberState(SMLoc DirectiveLoc) {
if (parseEOL())
return true;
- getStreamer().emitCFIRememberState();
+ getStreamer().emitCFIRememberState(DirectiveLoc);
return false;
}
/// parseDirectiveCFIRestoreState
/// ::= .cfi_remember_state
-bool AsmParser::parseDirectiveCFIRestoreState() {
+bool AsmParser::parseDirectiveCFIRestoreState(SMLoc DirectiveLoc) {
if (parseEOL())
return true;
- getStreamer().emitCFIRestoreState();
+ getStreamer().emitCFIRestoreState(DirectiveLoc);
return false;
}
@@ -4411,7 +4404,7 @@ bool AsmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || parseEOL())
return true;
- getStreamer().emitCFISameValue(Register);
+ getStreamer().emitCFISameValue(Register, DirectiveLoc);
return false;
}
@@ -4422,13 +4415,13 @@ bool AsmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || parseEOL())
return true;
- getStreamer().emitCFIRestore(Register);
+ getStreamer().emitCFIRestore(Register, DirectiveLoc);
return false;
}
/// parseDirectiveCFIEscape
/// ::= .cfi_escape expression[,...]
-bool AsmParser::parseDirectiveCFIEscape() {
+bool AsmParser::parseDirectiveCFIEscape(SMLoc DirectiveLoc) {
std::string Values;
int64_t CurrValue;
if (parseAbsoluteExpression(CurrValue))
@@ -4445,7 +4438,7 @@ bool AsmParser::parseDirectiveCFIEscape() {
Values.push_back((uint8_t)CurrValue);
}
- getStreamer().emitCFIEscape(Values);
+ getStreamer().emitCFIEscape(Values, DirectiveLoc);
return false;
}
@@ -4461,7 +4454,7 @@ bool AsmParser::parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc) {
/// parseDirectiveCFISignalFrame
/// ::= .cfi_signal_frame
-bool AsmParser::parseDirectiveCFISignalFrame() {
+bool AsmParser::parseDirectiveCFISignalFrame(SMLoc DirectiveLoc) {
if (parseEOL())
return true;
@@ -4477,7 +4470,7 @@ bool AsmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) {
if (parseRegisterOrRegisterNumber(Register, DirectiveLoc) || parseEOL())
return true;
- getStreamer().emitCFIUndefined(Register);
+ getStreamer().emitCFIUndefined(Register, DirectiveLoc);
return false;
}
@@ -5853,24 +5846,23 @@ bool AsmParser::parseDirectivePseudoProbe() {
int64_t Index;
int64_t Type;
int64_t Attr;
+ int64_t Discriminator = 0;
- if (getLexer().is(AsmToken::Integer)) {
- if (parseIntToken(Guid, "unexpected token in '.pseudoprobe' directive"))
- return true;
- }
+ if (parseIntToken(Guid, "unexpected token in '.pseudoprobe' directive"))
+ return true;
- if (getLexer().is(AsmToken::Integer)) {
- if (parseIntToken(Index, "unexpected token in '.pseudoprobe' directive"))
- return true;
- }
+ if (parseIntToken(Index, "unexpected token in '.pseudoprobe' directive"))
+ return true;
- if (getLexer().is(AsmToken::Integer)) {
- if (parseIntToken(Type, "unexpected token in '.pseudoprobe' directive"))
- return true;
- }
+ if (parseIntToken(Type, "unexpected token in '.pseudoprobe' directive"))
+ return true;
- if (getLexer().is(AsmToken::Integer)) {
- if (parseIntToken(Attr, "unexpected token in '.pseudoprobe' directive"))
+ if (parseIntToken(Attr, "unexpected token in '.pseudoprobe' directive"))
+ return true;
+
+ if (hasDiscriminator(Attr)) {
+ if (parseIntToken(Discriminator,
+ "unexpected token in '.pseudoprobe' directive"))
return true;
}
@@ -5912,7 +5904,8 @@ bool AsmParser::parseDirectivePseudoProbe() {
if (parseEOL())
return true;
- getStreamer().emitPseudoProbe(Guid, Index, Type, Attr, InlineStack, FnSym);
+ getStreamer().emitPseudoProbe(Guid, Index, Type, Attr, Discriminator,
+ InlineStack, FnSym);
return false;
}
@@ -6371,7 +6364,7 @@ static bool isSymbolUsedInExpression(const MCSymbol *Sym, const MCExpr *Value) {
case MCExpr::SymbolRef: {
const MCSymbol &S =
static_cast<const MCSymbolRefExpr *>(Value)->getSymbol();
- if (S.isVariable())
+ if (S.isVariable() && !S.isWeakExternal())
return isSymbolUsedInExpression(Sym, S.getVariableValue());
return &S == Sym;
}
diff --git a/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/llvm/lib/MC/MCParser/COFFAsmParser.cpp
index ea123f43536f..bfded36da7ab 100644
--- a/llvm/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -8,7 +8,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/MCContext.h"
@@ -19,6 +18,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Support/SMLoc.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <cstdint>
#include <limits>
@@ -56,6 +56,10 @@ class COFFAsmParser : public MCAsmParserExtension {
addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveData>(".data");
addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveBSS>(".bss");
addDirectiveHandler<&COFFAsmParser::ParseDirectiveSection>(".section");
+ addDirectiveHandler<&COFFAsmParser::ParseDirectivePushSection>(
+ ".pushsection");
+ addDirectiveHandler<&COFFAsmParser::ParseDirectivePopSection>(
+ ".popsection");
addDirectiveHandler<&COFFAsmParser::ParseDirectiveDef>(".def");
addDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl");
addDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type");
@@ -67,6 +71,7 @@ class COFFAsmParser : public MCAsmParserExtension {
addDirectiveHandler<&COFFAsmParser::ParseDirectiveLinkOnce>(".linkonce");
addDirectiveHandler<&COFFAsmParser::ParseDirectiveRVA>(".rva");
addDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak");
+ addDirectiveHandler<&COFFAsmParser::ParseDirectiveSymbolAttribute>(".weak_anti_dep");
addDirectiveHandler<&COFFAsmParser::ParseDirectiveCGProfile>(".cg_profile");
// Win64 EH directives.
@@ -114,6 +119,9 @@ class COFFAsmParser : public MCAsmParserExtension {
}
bool ParseDirectiveSection(StringRef, SMLoc);
+ bool parseSectionArguments(StringRef, SMLoc);
+ bool ParseDirectivePushSection(StringRef, SMLoc);
+ bool ParseDirectivePopSection(StringRef, SMLoc);
bool ParseDirectiveDef(StringRef, SMLoc);
bool ParseDirectiveScl(StringRef, SMLoc);
bool ParseDirectiveType(StringRef, SMLoc);
@@ -281,6 +289,7 @@ bool COFFAsmParser::ParseSectionFlags(StringRef SectionName,
bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Directive)
.Case(".weak", MCSA_Weak)
+ .Case(".weak_anti_dep", MCSA_WeakAntiDep)
.Default(MCSA_Invalid);
assert(Attr != MCSA_Invalid && "unexpected symbol attribute directive!");
if (getLexer().isNot(AsmToken::EndOfStatement)) {
@@ -341,7 +350,12 @@ bool COFFAsmParser::ParseSectionName(StringRef &SectionName) {
return false;
}
+bool COFFAsmParser::ParseDirectiveSection(StringRef directive, SMLoc loc) {
+ return parseSectionArguments(directive, loc);
+}
+
// .section name [, "flags"] [, identifier [ identifier ], identifier]
+// .pushsection <same as above>
//
// Supported flags:
// a: Ignored.
@@ -356,7 +370,7 @@ bool COFFAsmParser::ParseSectionName(StringRef &SectionName) {
// y: Not-readable section (clears 'r')
//
// Subsections are not supported.
-bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
+bool COFFAsmParser::parseSectionArguments(StringRef, SMLoc) {
StringRef SectionName;
if (ParseSectionName(SectionName))
@@ -415,6 +429,23 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
return false;
}
+bool COFFAsmParser::ParseDirectivePushSection(StringRef directive, SMLoc loc) {
+ getStreamer().pushSection();
+
+ if (parseSectionArguments(directive, loc)) {
+ getStreamer().popSection();
+ return true;
+ }
+
+ return false;
+}
+
+bool COFFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) {
+ if (!getStreamer().popSection())
+ return TokError(".popsection without corresponding .pushsection");
+ return false;
+}
+
bool COFFAsmParser::ParseDirectiveDef(StringRef, SMLoc) {
StringRef SymbolName;
diff --git a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index 4be081ded3cf..7c390041b369 100644
--- a/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -9,7 +9,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCContext.h"
@@ -27,6 +26,7 @@
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <cstddef>
#include <cstdint>
#include <string>
@@ -1130,7 +1130,7 @@ bool DarwinAsmParser::parseVersionMin(StringRef Directive, SMLoc Loc,
if (isSDKVersionToken(getLexer().getTok()) && parseSDKVersion(SDKVersion))
return true;
- if (parseToken(AsmToken::EndOfStatement))
+ if (parseEOL())
return addErrorSuffix(Twine(" in '") + Directive + "' directive");
Triple::OSType ExpectedOS = getOSTypeFromMCVM(Type);
@@ -1191,7 +1191,7 @@ bool DarwinAsmParser::parseBuildVersion(StringRef Directive, SMLoc Loc) {
if (isSDKVersionToken(getLexer().getTok()) && parseSDKVersion(SDKVersion))
return true;
- if (parseToken(AsmToken::EndOfStatement))
+ if (parseEOL())
return addErrorSuffix(" in '.build_version' directive");
Triple::OSType ExpectedOS
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index a5981d15013f..dbfe0d83e1b2 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -317,20 +318,33 @@ static unsigned parseSectionFlags(const Triple &TT, StringRef flagsStr,
flags |= ELF::SHF_TLS;
break;
case 'c':
+ if (TT.getArch() != Triple::xcore)
+ return -1U;
flags |= ELF::XCORE_SHF_CP_SECTION;
break;
case 'd':
+ if (TT.getArch() != Triple::xcore)
+ return -1U;
flags |= ELF::XCORE_SHF_DP_SECTION;
break;
case 'y':
+ if (!(TT.isARM() || TT.isThumb()))
+ return -1U;
flags |= ELF::SHF_ARM_PURECODE;
break;
case 's':
+ if (TT.getArch() != Triple::hexagon)
+ return -1U;
flags |= ELF::SHF_HEX_GPREL;
break;
case 'G':
flags |= ELF::SHF_GROUP;
break;
+ case 'l':
+ if (TT.getArch() != Triple::x86_64)
+ return -1U;
+ flags |= ELF::SHF_X86_64_LARGE;
+ break;
case 'R':
if (TT.isOSSolaris())
flags |= ELF::SHF_SUNW_NODISCARD;
@@ -661,6 +675,8 @@ EndStmt:
Type = ELF::SHT_LLVM_BB_ADDR_MAP;
else if (TypeName == "llvm_offloading")
Type = ELF::SHT_LLVM_OFFLOADING;
+ else if (TypeName == "llvm_lto")
+ Type = ELF::SHT_LLVM_LTO;
else if (TypeName.getAsInteger(0, Type))
return TokError("unknown section type");
}
diff --git a/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/llvm/lib/MC/MCParser/MCAsmLexer.cpp
index 632c52479d70..f202b53732fc 100644
--- a/llvm/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/llvm/lib/MC/MCParser/MCAsmLexer.cpp
@@ -88,6 +88,7 @@ void AsmToken::dump(raw_ostream &OS) const {
case AsmToken::Pipe: OS << "Pipe"; break;
case AsmToken::PipePipe: OS << "PipePipe"; break;
case AsmToken::Plus: OS << "Plus"; break;
+ case AsmToken::Question: OS << "Question"; break;
case AsmToken::RBrac: OS << "RBrac"; break;
case AsmToken::RCurly: OS << "RCurly"; break;
case AsmToken::RParen: OS << "RParen"; break;
diff --git a/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp b/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp
index 940f26d4750b..0db5fb36f795 100644
--- a/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/MCTargetAsmParser.cpp
@@ -27,3 +27,24 @@ MCSubtargetInfo &MCTargetAsmParser::copySTI() {
const MCSubtargetInfo &MCTargetAsmParser::getSTI() const {
return *STI;
}
+
+ParseStatus MCTargetAsmParser::parseDirective(AsmToken DirectiveID) {
+ SMLoc StartTokLoc = getTok().getLoc();
+ // Delegate to ParseDirective by default for transition period. Once the
+ // transition is over, this method should just return NoMatch.
+ bool Res = ParseDirective(DirectiveID);
+
+ // Some targets erroneously report success after emitting an error.
+ if (getParser().hasPendingError())
+ return ParseStatus::Failure;
+
+ // ParseDirective returns true if there was an error or if the directive is
+ // not target-specific. Disambiguate the two cases by comparing position of
+ // the lexer before and after calling the method: if no tokens were consumed,
+ // there was no match, otherwise there was a failure.
+ if (!Res)
+ return ParseStatus::Success;
+ if (getTok().getLoc() != StartTokLoc)
+ return ParseStatus::Failure;
+ return ParseStatus::NoMatch;
+}
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index f38dad417aed..307256ffaf45 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -151,14 +151,14 @@ struct IntFieldInfo {
IntFieldInfo() = default;
IntFieldInfo(const SmallVector<const MCExpr *, 1> &V) { Values = V; }
- IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = V; }
+ IntFieldInfo(SmallVector<const MCExpr *, 1> &&V) { Values = std::move(V); }
};
struct RealFieldInfo {
SmallVector<APInt, 1> AsIntValues;
RealFieldInfo() = default;
RealFieldInfo(const SmallVector<APInt, 1> &V) { AsIntValues = V; }
- RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = V; }
+ RealFieldInfo(SmallVector<APInt, 1> &&V) { AsIntValues = std::move(V); }
};
struct StructFieldInfo {
std::vector<StructInitializer> Initializers;
@@ -269,12 +269,12 @@ FieldInitializer::FieldInitializer(FieldType FT) : FT(FT) {
FieldInitializer::FieldInitializer(SmallVector<const MCExpr *, 1> &&Values)
: FT(FT_INTEGRAL) {
- new (&IntInfo) IntFieldInfo(Values);
+ new (&IntInfo) IntFieldInfo(std::move(Values));
}
FieldInitializer::FieldInitializer(SmallVector<APInt, 1> &&AsIntValues)
: FT(FT_REAL) {
- new (&RealInfo) RealFieldInfo(AsIntValues);
+ new (&RealInfo) RealFieldInfo(std::move(AsIntValues));
}
FieldInitializer::FieldInitializer(
@@ -479,7 +479,7 @@ public:
void addDirectiveHandler(StringRef Directive,
ExtensionDirectiveHandler Handler) override {
ExtensionDirectiveMap[Directive] = Handler;
- if (DirectiveKindMap.find(Directive) == DirectiveKindMap.end()) {
+ if (!DirectiveKindMap.contains(Directive)) {
DirectiveKindMap[Directive] = DK_HANDLER_DIRECTIVE;
}
}
@@ -962,22 +962,22 @@ private:
// .cfi directives
bool parseDirectiveCFIRegister(SMLoc DirectiveLoc);
- bool parseDirectiveCFIWindowSave();
+ bool parseDirectiveCFIWindowSave(SMLoc DirectiveLoc);
bool parseDirectiveCFISections();
bool parseDirectiveCFIStartProc();
bool parseDirectiveCFIEndProc();
- bool parseDirectiveCFIDefCfaOffset();
+ bool parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc);
bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc);
- bool parseDirectiveCFIAdjustCfaOffset();
+ bool parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc);
bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc);
bool parseDirectiveCFIOffset(SMLoc DirectiveLoc);
bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc);
bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality);
- bool parseDirectiveCFIRememberState();
- bool parseDirectiveCFIRestoreState();
+ bool parseDirectiveCFIRememberState(SMLoc DirectiveLoc);
+ bool parseDirectiveCFIRestoreState(SMLoc DirectiveLoc);
bool parseDirectiveCFISameValue(SMLoc DirectiveLoc);
bool parseDirectiveCFIRestore(SMLoc DirectiveLoc);
- bool parseDirectiveCFIEscape();
+ bool parseDirectiveCFIEscape(SMLoc DirectiveLoc);
bool parseDirectiveCFIReturnColumn(SMLoc DirectiveLoc);
bool parseDirectiveCFISignalFrame();
bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc);
@@ -1618,19 +1618,7 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
// Parse symbol variant.
std::pair<StringRef, StringRef> Split;
if (!MAI.useParensForSymbolVariant()) {
- if (FirstTokenKind == AsmToken::String) {
- if (Lexer.is(AsmToken::At)) {
- Lex(); // eat @
- SMLoc AtLoc = getLexer().getLoc();
- StringRef VName;
- if (parseIdentifier(VName))
- return Error(AtLoc, "expected symbol variant after '@'");
-
- Split = std::make_pair(Identifier, VName);
- }
- } else {
- Split = Identifier.split('@');
- }
+ Split = Identifier.split('@');
} else if (Lexer.is(AsmToken::LParen)) {
Lex(); // eat '('.
StringRef VName;
@@ -2125,20 +2113,6 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
// Treat '.' as a valid identifier in this context.
Lex();
IDVal = ".";
- } else if (Lexer.is(AsmToken::LCurly)) {
- // Treat '{' as a valid identifier in this context.
- Lex();
- IDVal = "{";
-
- } else if (Lexer.is(AsmToken::RCurly)) {
- // Treat '}' as a valid identifier in this context.
- Lex();
- IDVal = "}";
- } else if (Lexer.is(AsmToken::Star) &&
- getTargetParser().starIsStartOfStatement()) {
- // Accept '*' as a valid start of statement.
- Lex();
- IDVal = "*";
} else if (Lexer.is(AsmToken::Real)) {
// Treat ".<number>" as a valid identifier in this context.
IDVal = getTok().getString();
@@ -2330,21 +2304,15 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
return (*Handler.second)(Handler.first, IDVal, IDLoc);
// Next, let the target-specific assembly parser try.
- SMLoc StartTokLoc = getTok().getLoc();
- bool TPDirectiveReturn =
- ID.is(AsmToken::Identifier) && getTargetParser().ParseDirective(ID);
+ if (ID.isNot(AsmToken::Identifier))
+ return false;
- if (hasPendingError())
+ ParseStatus TPDirectiveReturn = getTargetParser().parseDirective(ID);
+ assert(TPDirectiveReturn.isFailure() == hasPendingError() &&
+ "Should only return Failure iff there was an error");
+ if (TPDirectiveReturn.isFailure())
return true;
- // Currently the return value should be true if we are
- // uninterested but as this is at odds with the standard parsing
- // convention (return true = error) we have instances of a parsed
- // directive that fails returning true as an error. Catch these
- // cases as best as possible errors here.
- if (TPDirectiveReturn && StartTokLoc != getTok().getLoc())
- return true;
- // Return if we did some parsing or believe we succeeded.
- if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc())
+ if (TPDirectiveReturn.isSuccess())
return false;
// Finally, if no one else is interested in this directive, it must be
@@ -2452,9 +2420,9 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
case DK_CFI_DEF_CFA:
return parseDirectiveCFIDefCfa(IDLoc);
case DK_CFI_DEF_CFA_OFFSET:
- return parseDirectiveCFIDefCfaOffset();
+ return parseDirectiveCFIDefCfaOffset(IDLoc);
case DK_CFI_ADJUST_CFA_OFFSET:
- return parseDirectiveCFIAdjustCfaOffset();
+ return parseDirectiveCFIAdjustCfaOffset(IDLoc);
case DK_CFI_DEF_CFA_REGISTER:
return parseDirectiveCFIDefCfaRegister(IDLoc);
case DK_CFI_OFFSET:
@@ -2466,15 +2434,15 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
case DK_CFI_LSDA:
return parseDirectiveCFIPersonalityOrLsda(false);
case DK_CFI_REMEMBER_STATE:
- return parseDirectiveCFIRememberState();
+ return parseDirectiveCFIRememberState(IDLoc);
case DK_CFI_RESTORE_STATE:
- return parseDirectiveCFIRestoreState();
+ return parseDirectiveCFIRestoreState(IDLoc);
case DK_CFI_SAME_VALUE:
return parseDirectiveCFISameValue(IDLoc);
case DK_CFI_RESTORE:
return parseDirectiveCFIRestore(IDLoc);
case DK_CFI_ESCAPE:
- return parseDirectiveCFIEscape();
+ return parseDirectiveCFIEscape(IDLoc);
case DK_CFI_RETURN_COLUMN:
return parseDirectiveCFIReturnColumn(IDLoc);
case DK_CFI_SIGNAL_FRAME:
@@ -2484,7 +2452,7 @@ bool MasmParser::parseStatement(ParseStatementInfo &Info,
case DK_CFI_REGISTER:
return parseDirectiveCFIRegister(IDLoc);
case DK_CFI_WINDOW_SAVE:
- return parseDirectiveCFIWindowSave();
+ return parseDirectiveCFIWindowSave(IDLoc);
case DK_EXITM:
Info.ExitValue = "";
return parseDirectiveExitMacro(IDLoc, IDVal, *Info.ExitValue);
@@ -4563,7 +4531,7 @@ bool MasmParser::parseDirectiveStruct(StringRef Directive,
"' directive; expected none or NONUNIQUE");
}
- if (parseToken(AsmToken::EndOfStatement))
+ if (parseEOL())
return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
StructInProgress.emplace_back(Name, DirKind == DK_UNION, AlignmentValue);
@@ -4585,7 +4553,7 @@ bool MasmParser::parseDirectiveNestedStruct(StringRef Directive,
Name = getTok().getIdentifier();
parseToken(AsmToken::Identifier);
}
- if (parseToken(AsmToken::EndOfStatement))
+ if (parseEOL())
return addErrorSuffix(" in '" + Twine(Directive) + "' directive");
// Reserve space to ensure Alignment doesn't get invalidated when
@@ -4611,7 +4579,7 @@ bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
Structure.Size, std::min(Structure.Alignment, Structure.AlignmentSize));
Structs[Name.lower()] = Structure;
- if (parseToken(AsmToken::EndOfStatement))
+ if (parseEOL())
return addErrorSuffix(" in ENDS directive");
return false;
@@ -4623,7 +4591,7 @@ bool MasmParser::parseDirectiveNestedEnds() {
if (StructInProgress.size() == 1)
return TokError("missing name in top-level ENDS directive");
- if (parseToken(AsmToken::EndOfStatement))
+ if (parseEOL())
return addErrorSuffix(" in nested ENDS directive");
StructInfo Structure = StructInProgress.pop_back_val();
@@ -4695,7 +4663,7 @@ bool MasmParser::parseDirectiveOrg() {
SMLoc OffsetLoc = Lexer.getLoc();
if (checkForValidSection() || parseExpression(Offset))
return true;
- if (parseToken(AsmToken::EndOfStatement))
+ if (parseEOL())
return addErrorSuffix(" in 'org' directive");
if (StructInProgress.empty()) {
@@ -4764,10 +4732,9 @@ bool MasmParser::parseDirectiveAlign() {
if (getTok().is(AsmToken::EndOfStatement)) {
return Warning(AlignmentLoc,
"align directive with no operand is ignored") &&
- parseToken(AsmToken::EndOfStatement);
+ parseEOL();
}
- if (parseAbsoluteExpression(Alignment) ||
- parseToken(AsmToken::EndOfStatement))
+ if (parseAbsoluteExpression(Alignment) || parseEOL())
return addErrorSuffix(" in align directive");
// Always emit an alignment here even if we throw an error.
@@ -4790,7 +4757,7 @@ bool MasmParser::parseDirectiveAlign() {
/// parseDirectiveEven
/// ::= even
bool MasmParser::parseDirectiveEven() {
- if (parseToken(AsmToken::EndOfStatement) || emitAlignTo(2))
+ if (parseEOL() || emitAlignTo(2))
return addErrorSuffix(" in even directive");
return false;
@@ -5508,7 +5475,7 @@ bool MasmParser::parseDirectiveCFIStartProc() {
if (!parseOptionalToken(AsmToken::EndOfStatement)) {
if (check(parseIdentifier(Simple) || Simple != "simple",
"unexpected token") ||
- parseToken(AsmToken::EndOfStatement))
+ parseEOL())
return addErrorSuffix(" in '.cfi_startproc' directive");
}
@@ -5558,12 +5525,12 @@ bool MasmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) {
/// parseDirectiveCFIDefCfaOffset
/// ::= .cfi_def_cfa_offset offset
-bool MasmParser::parseDirectiveCFIDefCfaOffset() {
+bool MasmParser::parseDirectiveCFIDefCfaOffset(SMLoc DirectiveLoc) {
int64_t Offset = 0;
if (parseAbsoluteExpression(Offset))
return true;
- getStreamer().emitCFIDefCfaOffset(Offset);
+ getStreamer().emitCFIDefCfaOffset(Offset, DirectiveLoc);
return false;
}
@@ -5576,25 +5543,25 @@ bool MasmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) {
parseRegisterOrRegisterNumber(Register2, DirectiveLoc))
return true;
- getStreamer().emitCFIRegister(Register1, Register2);
+ getStreamer().emitCFIRegister(Register1, Register2, DirectiveLoc);
return false;
}
/// parseDirectiveCFIWindowSave
/// ::= .cfi_window_save
-bool MasmParser::parseDirectiveCFIWindowSave() {
- getStreamer().emitCFIWindowSave();
+bool MasmParser::parseDirectiveCFIWindowSave(SMLoc DirectiveLoc) {
+ getStreamer().emitCFIWindowSave(DirectiveLoc);
return false;
}
/// parseDirectiveCFIAdjustCfaOffset
/// ::= .cfi_adjust_cfa_offset adjustment
-bool MasmParser::parseDirectiveCFIAdjustCfaOffset() {
+bool MasmParser::parseDirectiveCFIAdjustCfaOffset(SMLoc DirectiveLoc) {
int64_t Adjustment = 0;
if (parseAbsoluteExpression(Adjustment))
return true;
- getStreamer().emitCFIAdjustCfaOffset(Adjustment);
+ getStreamer().emitCFIAdjustCfaOffset(Adjustment, DirectiveLoc);
return false;
}
@@ -5634,7 +5601,7 @@ bool MasmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) {
parseAbsoluteExpression(Offset))
return true;
- getStreamer().emitCFIRelOffset(Register, Offset);
+ getStreamer().emitCFIRelOffset(Register, Offset, DirectiveLoc);
return false;
}
@@ -5688,15 +5655,15 @@ bool MasmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) {
/// parseDirectiveCFIRememberState
/// ::= .cfi_remember_state
-bool MasmParser::parseDirectiveCFIRememberState() {
- getStreamer().emitCFIRememberState();
+bool MasmParser::parseDirectiveCFIRememberState(SMLoc DirectiveLoc) {
+ getStreamer().emitCFIRememberState(DirectiveLoc);
return false;
}
/// parseDirectiveCFIRestoreState
/// ::= .cfi_remember_state
-bool MasmParser::parseDirectiveCFIRestoreState() {
- getStreamer().emitCFIRestoreState();
+bool MasmParser::parseDirectiveCFIRestoreState(SMLoc DirectiveLoc) {
+ getStreamer().emitCFIRestoreState(DirectiveLoc);
return false;
}
@@ -5708,7 +5675,7 @@ bool MasmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) {
if (parseRegisterOrRegisterNumber(Register, DirectiveLoc))
return true;
- getStreamer().emitCFISameValue(Register);
+ getStreamer().emitCFISameValue(Register, DirectiveLoc);
return false;
}
@@ -5725,7 +5692,7 @@ bool MasmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) {
/// parseDirectiveCFIEscape
/// ::= .cfi_escape expression[,...]
-bool MasmParser::parseDirectiveCFIEscape() {
+bool MasmParser::parseDirectiveCFIEscape(SMLoc DirectiveLoc) {
std::string Values;
int64_t CurrValue;
if (parseAbsoluteExpression(CurrValue))
@@ -5742,7 +5709,7 @@ bool MasmParser::parseDirectiveCFIEscape() {
Values.push_back((uint8_t)CurrValue);
}
- getStreamer().emitCFIEscape(Values);
+ getStreamer().emitCFIEscape(Values, DirectiveLoc);
return false;
}
@@ -6129,6 +6096,7 @@ bool MasmParser::parseDirectiveComm(bool IsLocal) {
bool MasmParser::parseDirectiveComment(SMLoc DirectiveLoc) {
std::string FirstLine = parseStringTo(AsmToken::EndOfStatement);
size_t DelimiterEnd = FirstLine.find_first_of("\b\t\v\f\r\x1A ");
+ assert(DelimiterEnd != std::string::npos);
StringRef Delimiter = StringRef(FirstLine).take_front(DelimiterEnd);
if (Delimiter.empty())
return Error(DirectiveLoc, "no delimiter in 'comment' directive");
@@ -6274,9 +6242,9 @@ bool MasmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
parseEOL())
return true;
- if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) {
+ if (BuiltinSymbolMap.contains(Name.lower())) {
is_defined = true;
- } else if (Variables.find(Name.lower()) != Variables.end()) {
+ } else if (Variables.contains(Name.lower())) {
is_defined = true;
} else {
MCSymbol *Sym = getContext().lookupSymbol(Name.lower());
@@ -6395,9 +6363,9 @@ bool MasmParser::parseDirectiveElseIfdef(SMLoc DirectiveLoc,
parseEOL())
return true;
- if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) {
+ if (BuiltinSymbolMap.contains(Name.lower())) {
is_defined = true;
- } else if (Variables.find(Name.lower()) != Variables.end()) {
+ } else if (Variables.contains(Name.lower())) {
is_defined = true;
} else {
MCSymbol *Sym = getContext().lookupSymbol(Name);
@@ -6565,9 +6533,9 @@ bool MasmParser::parseDirectiveErrorIfdef(SMLoc DirectiveLoc,
if (check(parseIdentifier(Name), "expected identifier after '.errdef'"))
return true;
- if (BuiltinSymbolMap.find(Name.lower()) != BuiltinSymbolMap.end()) {
+ if (BuiltinSymbolMap.contains(Name.lower())) {
IsDefined = true;
- } else if (Variables.find(Name.lower()) != Variables.end()) {
+ } else if (Variables.contains(Name.lower())) {
IsDefined = true;
} else {
MCSymbol *Sym = getContext().lookupSymbol(Name);
diff --git a/llvm/lib/MC/MCParser/WasmAsmParser.cpp b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
index 75b69ee4c8ca..97045495a60d 100644
--- a/llvm/lib/MC/MCParser/WasmAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/WasmAsmParser.cpp
@@ -16,12 +16,13 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCAsmParserExtension.h"
-#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbolWasm.h"
diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp
index a9460b86d22a..caec98e9ea6a 100644
--- a/llvm/lib/MC/MCPseudoProbe.cpp
+++ b/llvm/lib/MC/MCPseudoProbe.cpp
@@ -8,6 +8,7 @@
#include "llvm/MC/MCPseudoProbe.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/PseudoProbe.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -58,10 +59,14 @@ void MCPseudoProbe::emit(MCObjectStreamer *MCOS,
// Type (bit 0 to 3), with bit 4 to 6 for attributes.
// Flag (bit 7, 0 - code address, 1 - address delta). This indicates whether
// the following field is a symbolic code address or an address delta.
+ // Emit FS discriminator
assert(Type <= 0xF && "Probe type too big to encode, exceeding 15");
- assert(Attributes <= 0x7 &&
+ auto NewAttributes = Attributes;
+ if (Discriminator)
+ NewAttributes |= (uint32_t)PseudoProbeAttributes::HasDiscriminator;
+ assert(NewAttributes <= 0x7 &&
"Probe attributes too big to encode, exceeding 7");
- uint8_t PackedType = Type | (Attributes << 4);
+ uint8_t PackedType = Type | (NewAttributes << 4);
uint8_t Flag =
!IsSentinel ? ((int8_t)MCPseudoProbeFlag::AddressDelta << 7) : 0;
MCOS->emitInt8(Flag | PackedType);
@@ -81,6 +86,9 @@ void MCPseudoProbe::emit(MCObjectStreamer *MCOS,
MCOS->emitInt64(Guid);
}
+ if (Discriminator)
+ MCOS->emitULEB128IntValue(Discriminator);
+
LLVM_DEBUG({
dbgs().indent(MCPseudoProbeTable::DdgPrintIndent);
dbgs() << "Probe: " << Index << "\n";
@@ -222,11 +230,11 @@ void MCPseudoProbeSections::emit(MCObjectStreamer *MCOS) {
for (const auto &Inlinee : Inlinees) {
// Emit the group guarded by a sentinel probe.
- MCPseudoProbe SentinelProbe(const_cast<MCSymbol *>(FuncSym),
- MD5Hash(FuncSym->getName()),
- (uint32_t)PseudoProbeReservedId::Invalid,
- (uint32_t)PseudoProbeType::Block,
- (uint32_t)PseudoProbeAttributes::Sentinel);
+ MCPseudoProbe SentinelProbe(
+ const_cast<MCSymbol *>(FuncSym), MD5Hash(FuncSym->getName()),
+ (uint32_t)PseudoProbeReservedId::Invalid,
+ (uint32_t)PseudoProbeType::Block,
+ (uint32_t)PseudoProbeAttributes::Sentinel, 0);
const MCPseudoProbe *Probe = &SentinelProbe;
Inlinee.second->emit(MCOS, Probe);
}
@@ -310,6 +318,8 @@ void MCDecodedPseudoProbe::print(raw_ostream &OS,
OS << Guid << " ";
}
OS << "Index: " << Index << " ";
+ if (Discriminator)
+ OS << "Discriminator: " << Discriminator << " ";
OS << "Type: " << PseudoProbeTypeStr[static_cast<uint8_t>(Type)] << " ";
std::string InlineContextStr = getInlineContextStr(GUID2FuncMAP);
if (InlineContextStr.size()) {
@@ -491,11 +501,19 @@ bool MCPseudoProbeDecoder::buildAddress2ProbeMap(
}
}
+ uint32_t Discriminator = 0;
+ if (hasDiscriminator(Attr)) {
+ auto ErrorOrDiscriminator = readUnsignedNumber<uint32_t>();
+ if (!ErrorOrDiscriminator)
+ return false;
+ Discriminator = std::move(*ErrorOrDiscriminator);
+ }
+
if (Cur && !isSentinelProbe(Attr)) {
// Populate Address2ProbesMap
auto &Probes = Address2ProbesMap[Addr];
Probes.emplace_back(Addr, Cur->Guid, Index, PseudoProbeType(Kind), Attr,
- Cur);
+ Discriminator, Cur);
Cur->addProbes(&Probes.back());
}
LastAddr = Addr;
@@ -566,10 +584,20 @@ MCPseudoProbeDecoder::getCallProbeForAddr(uint64_t Address) const {
const MCDecodedPseudoProbe *CallProbe = nullptr;
for (const auto &Probe : Probes) {
if (Probe.isCall()) {
- assert(!CallProbe &&
- "There should be only one call probe corresponding to address "
- "which is a callsite.");
+ // Disabling the assert and returning first call probe seen so far.
+ // Subsequent call probes, if any, are ignored. Due to the the way
+ // .pseudo_probe section is decoded, probes of the same-named independent
+ // static functions are merged thus multiple call probes may be seen for a
+ // callsite. This should only happen to compiler-generated statics, with
+ // -funique-internal-linkage-names where user statics get unique names.
+ //
+ // TODO: re-enable or narrow down the assert to static functions only.
+ //
+ // assert(!CallProbe &&
+ // "There should be only one call probe corresponding to address "
+ // "which is a callsite.");
CallProbe = &Probe;
+ break;
}
}
return CallProbe;
diff --git a/llvm/lib/MC/MCRegisterInfo.cpp b/llvm/lib/MC/MCRegisterInfo.cpp
index 9c88e3be97df..a2c1737e2964 100644
--- a/llvm/lib/MC/MCRegisterInfo.cpp
+++ b/llvm/lib/MC/MCRegisterInfo.cpp
@@ -23,9 +23,9 @@ using namespace llvm;
MCRegister
MCRegisterInfo::getMatchingSuperReg(MCRegister Reg, unsigned SubIdx,
const MCRegisterClass *RC) const {
- for (MCSuperRegIterator Supers(Reg, this); Supers.isValid(); ++Supers)
- if (RC->contains(*Supers) && Reg == getSubReg(*Supers, SubIdx))
- return *Supers;
+ for (MCPhysReg Super : superregs(Reg))
+ if (RC->contains(Super) && Reg == getSubReg(Super, SubIdx))
+ return Super;
return 0;
}
@@ -35,9 +35,11 @@ MCRegister MCRegisterInfo::getSubReg(MCRegister Reg, unsigned Idx) const {
// Get a pointer to the corresponding SubRegIndices list. This list has the
// name of each sub-register in the same order as MCSubRegIterator.
const uint16_t *SRI = SubRegIndices + get(Reg).SubRegIndices;
- for (MCSubRegIterator Subs(Reg, this); Subs.isValid(); ++Subs, ++SRI)
+ for (MCPhysReg Sub : subregs(Reg)) {
if (*SRI == Idx)
- return *Subs;
+ return Sub;
+ ++SRI;
+ }
return 0;
}
@@ -47,9 +49,11 @@ unsigned MCRegisterInfo::getSubRegIndex(MCRegister Reg,
// Get a pointer to the corresponding SubRegIndices list. This list has the
// name of each sub-register in the same order as MCSubRegIterator.
const uint16_t *SRI = SubRegIndices + get(Reg).SubRegIndices;
- for (MCSubRegIterator Subs(Reg, this); Subs.isValid(); ++Subs, ++SRI)
- if (*Subs == SubReg)
+ for (MCPhysReg Sub : subregs(Reg)) {
+ if (Sub == SubReg)
return *SRI;
+ ++SRI;
+ }
return 0;
}
@@ -101,8 +105,13 @@ int MCRegisterInfo::getDwarfRegNumFromDwarfEHRegNum(unsigned RegNum) const {
// a corresponding LLVM register number at all. So if we can't map the
// EH register number to an LLVM register number, assume it's just a
// valid DWARF register number as is.
- if (std::optional<unsigned> LRegNum = getLLVMRegNum(RegNum, true))
- return getDwarfRegNum(*LRegNum, false);
+ if (std::optional<unsigned> LRegNum = getLLVMRegNum(RegNum, true)) {
+ int DwarfRegNum = getDwarfRegNum(*LRegNum, false);
+ if (DwarfRegNum == -1)
+ return RegNum;
+ else
+ return DwarfRegNum;
+ }
return RegNum;
}
@@ -125,11 +134,13 @@ int MCRegisterInfo::getCodeViewRegNum(MCRegister RegNum) const {
bool MCRegisterInfo::regsOverlap(MCRegister RegA, MCRegister RegB) const {
// Regunits are numerically ordered. Find a common unit.
- MCRegUnitIterator RUA(RegA, this);
- MCRegUnitIterator RUB(RegB, this);
+ auto RangeA = regunits(RegA);
+ MCRegUnitIterator IA = RangeA.begin(), EA = RangeA.end();
+ auto RangeB = regunits(RegB);
+ MCRegUnitIterator IB = RangeB.begin(), EB = RangeB.end();
do {
- if (*RUA == *RUB)
+ if (*IA == *IB)
return true;
- } while (*RUA < *RUB ? (++RUA).isValid() : (++RUB).isValid());
+ } while (*IA < *IB ? ++IA != EA : ++IB != EB);
return false;
}
diff --git a/llvm/lib/MC/MCSPIRVStreamer.cpp b/llvm/lib/MC/MCSPIRVStreamer.cpp
index 863db7f36f29..0bb73c7ff7ee 100644
--- a/llvm/lib/MC/MCSPIRVStreamer.cpp
+++ b/llvm/lib/MC/MCSPIRVStreamer.cpp
@@ -21,8 +21,7 @@ void MCSPIRVStreamer::emitInstToData(const MCInst &Inst,
MCAssembler &Assembler = getAssembler();
SmallVector<MCFixup, 0> Fixups;
SmallString<256> Code;
- raw_svector_ostream VecOS(Code);
- Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
+ Assembler.getEmitter().encodeInstruction(Inst, Code, Fixups, STI);
// Append the encoded instruction to the current data fragment (or create a
// new such fragment if the current fragment is not a data fragment).
diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp
index 9f00064f501b..5a893b803fd0 100644
--- a/llvm/lib/MC/MCSchedule.cpp
+++ b/llvm/lib/MC/MCSchedule.cpp
@@ -30,6 +30,7 @@ const MCSchedModel MCSchedModel::Default = {DefaultIssueWidth,
DefaultMispredictPenalty,
false,
true,
+ false /*EnableIntervals*/,
0,
nullptr,
nullptr,
diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp
index 7547558fe6e2..0fb9e8e13910 100644
--- a/llvm/lib/MC/MCSection.cpp
+++ b/llvm/lib/MC/MCSection.cpp
@@ -94,9 +94,6 @@ void MCSection::addPendingLabel(MCSymbol *label, unsigned Subsection) {
void MCSection::flushPendingLabels(MCFragment *F, uint64_t FOffset,
unsigned Subsection) {
- if (PendingLabels.empty())
- return;
-
// Set the fragment and fragment offset for all pending symbols in the
// specified Subsection, and remove those symbols from the pending list.
for (auto It = PendingLabels.begin(); It != PendingLabels.end(); ++It) {
@@ -116,9 +113,13 @@ void MCSection::flushPendingLabels() {
PendingLabel& Label = PendingLabels[0];
iterator CurInsertionPoint =
this->getSubsectionInsertionPoint(Label.Subsection);
+ const MCSymbol *Atom = nullptr;
+ if (CurInsertionPoint != begin())
+ Atom = std::prev(CurInsertionPoint)->getAtom();
MCFragment *F = new MCDataFragment();
getFragmentList().insert(CurInsertionPoint, F);
F->setParent(this);
+ F->setAtom(Atom);
flushPendingLabels(F, 0, Label.Subsection);
}
}
diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp
index 077cee132338..666252ffcb74 100644
--- a/llvm/lib/MC/MCSectionELF.cpp
+++ b/llvm/lib/MC/MCSectionELF.cpp
@@ -7,12 +7,12 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCSectionELF.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
using namespace llvm;
@@ -169,6 +169,8 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << "llvm_bb_addr_map_v0";
else if (Type == ELF::SHT_LLVM_OFFLOADING)
OS << "llvm_offloading";
+ else if (Type == ELF::SHT_LLVM_LTO)
+ OS << "llvm_lto";
else
report_fatal_error("unsupported type 0x" + Twine::utohexstr(Type) +
" for section " + getName());
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index 56711079245d..7f9c0c3b0b8d 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -126,7 +126,7 @@ void MCStreamer::emitExplicitComments() {}
void MCStreamer::generateCompactUnwindEncodings(MCAsmBackend *MAB) {
for (auto &FI : DwarfFrameInfos)
FI.CompactUnwindEncoding =
- (MAB ? MAB->generateCompactUnwindEncoding(FI.Instructions) : 0);
+ (MAB ? MAB->generateCompactUnwindEncoding(&FI, &Context) : 0);
}
/// EmitIntValue - Special case of EmitValue that avoids the client having to
@@ -219,7 +219,8 @@ void MCStreamer::emitGPRel32Value(const MCExpr *Value) {
/// Emit NumBytes bytes worth of the value specified by FillValue.
/// This implements directives such as '.space'.
void MCStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) {
- emitFill(*MCConstantExpr::create(NumBytes, getContext()), FillValue);
+ if (NumBytes)
+ emitFill(*MCConstantExpr::create(NumBytes, getContext()), FillValue);
}
void llvm::MCStreamer::emitNops(int64_t NumBytes, int64_t ControlledNopLen,
@@ -278,7 +279,7 @@ MCSymbol *MCStreamer::getDwarfLineTableSymbol(unsigned CUID) {
}
bool MCStreamer::hasUnfinishedDwarfFrameInfo() {
- return !DwarfFrameInfos.empty() && !DwarfFrameInfos.back().End;
+ return !FrameInfoStack.empty();
}
MCDwarfFrameInfo *MCStreamer::getCurrentDwarfFrameInfo() {
@@ -288,7 +289,7 @@ MCDwarfFrameInfo *MCStreamer::getCurrentDwarfFrameInfo() {
".cfi_startproc and .cfi_endproc directives");
return nullptr;
}
- return &DwarfFrameInfos.back();
+ return &DwarfFrameInfos[FrameInfoStack.back().first];
}
bool MCStreamer::emitCVFileDirective(unsigned FileNo, StringRef Filename,
@@ -445,7 +446,8 @@ void MCStreamer::emitConditionalAssignment(MCSymbol *Symbol,
void MCStreamer::emitCFISections(bool EH, bool Debug) {}
void MCStreamer::emitCFIStartProc(bool IsSimple, SMLoc Loc) {
- if (hasUnfinishedDwarfFrameInfo())
+ if (!FrameInfoStack.empty() &&
+ getCurrentSectionOnly() == FrameInfoStack.back().second)
return getContext().reportError(
Loc, "starting new .cfi frame before finishing the previous one");
@@ -464,6 +466,7 @@ void MCStreamer::emitCFIStartProc(bool IsSimple, SMLoc Loc) {
}
}
+ FrameInfoStack.emplace_back(DwarfFrameInfos.size(), getCurrentSectionOnly());
DwarfFrameInfos.push_back(Frame);
}
@@ -475,6 +478,7 @@ void MCStreamer::emitCFIEndProc() {
if (!CurFrame)
return;
emitCFIEndProcImpl(*CurFrame);
+ FrameInfoStack.pop_back();
}
void MCStreamer::emitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
@@ -489,10 +493,10 @@ MCSymbol *MCStreamer::emitCFILabel() {
return (MCSymbol *)1;
}
-void MCStreamer::emitCFIDefCfa(int64_t Register, int64_t Offset) {
+void MCStreamer::emitCFIDefCfa(int64_t Register, int64_t Offset, SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
MCCFIInstruction Instruction =
- MCCFIInstruction::cfiDefCfa(Label, Register, Offset);
+ MCCFIInstruction::cfiDefCfa(Label, Register, Offset, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
@@ -500,7 +504,7 @@ void MCStreamer::emitCFIDefCfa(int64_t Register, int64_t Offset) {
CurFrame->CurrentCfaRegister = static_cast<unsigned>(Register);
}
-void MCStreamer::emitCFIDefCfaOffset(int64_t Offset) {
+void MCStreamer::emitCFIDefCfaOffset(int64_t Offset, SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
MCCFIInstruction Instruction =
MCCFIInstruction::cfiDefCfaOffset(Label, Offset);
@@ -510,20 +514,20 @@ void MCStreamer::emitCFIDefCfaOffset(int64_t Offset) {
CurFrame->Instructions.push_back(Instruction);
}
-void MCStreamer::emitCFIAdjustCfaOffset(int64_t Adjustment) {
+void MCStreamer::emitCFIAdjustCfaOffset(int64_t Adjustment, SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
MCCFIInstruction Instruction =
- MCCFIInstruction::createAdjustCfaOffset(Label, Adjustment);
+ MCCFIInstruction::createAdjustCfaOffset(Label, Adjustment, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
CurFrame->Instructions.push_back(Instruction);
}
-void MCStreamer::emitCFIDefCfaRegister(int64_t Register) {
+void MCStreamer::emitCFIDefCfaRegister(int64_t Register, SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
MCCFIInstruction Instruction =
- MCCFIInstruction::createDefCfaRegister(Label, Register);
+ MCCFIInstruction::createDefCfaRegister(Label, Register, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
@@ -532,10 +536,10 @@ void MCStreamer::emitCFIDefCfaRegister(int64_t Register) {
}
void MCStreamer::emitCFILLVMDefAspaceCfa(int64_t Register, int64_t Offset,
- int64_t AddressSpace) {
+ int64_t AddressSpace, SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
MCCFIInstruction Instruction = MCCFIInstruction::createLLVMDefAspaceCfa(
- Label, Register, Offset, AddressSpace);
+ Label, Register, Offset, AddressSpace, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
@@ -543,20 +547,20 @@ void MCStreamer::emitCFILLVMDefAspaceCfa(int64_t Register, int64_t Offset,
CurFrame->CurrentCfaRegister = static_cast<unsigned>(Register);
}
-void MCStreamer::emitCFIOffset(int64_t Register, int64_t Offset) {
+void MCStreamer::emitCFIOffset(int64_t Register, int64_t Offset, SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
MCCFIInstruction Instruction =
- MCCFIInstruction::createOffset(Label, Register, Offset);
+ MCCFIInstruction::createOffset(Label, Register, Offset, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
CurFrame->Instructions.push_back(Instruction);
}
-void MCStreamer::emitCFIRelOffset(int64_t Register, int64_t Offset) {
+void MCStreamer::emitCFIRelOffset(int64_t Register, int64_t Offset, SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
MCCFIInstruction Instruction =
- MCCFIInstruction::createRelOffset(Label, Register, Offset);
+ MCCFIInstruction::createRelOffset(Label, Register, Offset, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
@@ -580,58 +584,61 @@ void MCStreamer::emitCFILsda(const MCSymbol *Sym, unsigned Encoding) {
CurFrame->LsdaEncoding = Encoding;
}
-void MCStreamer::emitCFIRememberState() {
+void MCStreamer::emitCFIRememberState(SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
- MCCFIInstruction Instruction = MCCFIInstruction::createRememberState(Label);
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createRememberState(Label, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
CurFrame->Instructions.push_back(Instruction);
}
-void MCStreamer::emitCFIRestoreState() {
+void MCStreamer::emitCFIRestoreState(SMLoc Loc) {
// FIXME: Error if there is no matching cfi_remember_state.
MCSymbol *Label = emitCFILabel();
- MCCFIInstruction Instruction = MCCFIInstruction::createRestoreState(Label);
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createRestoreState(Label, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
CurFrame->Instructions.push_back(Instruction);
}
-void MCStreamer::emitCFISameValue(int64_t Register) {
+void MCStreamer::emitCFISameValue(int64_t Register, SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
MCCFIInstruction Instruction =
- MCCFIInstruction::createSameValue(Label, Register);
+ MCCFIInstruction::createSameValue(Label, Register, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
CurFrame->Instructions.push_back(Instruction);
}
-void MCStreamer::emitCFIRestore(int64_t Register) {
+void MCStreamer::emitCFIRestore(int64_t Register, SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
MCCFIInstruction Instruction =
- MCCFIInstruction::createRestore(Label, Register);
+ MCCFIInstruction::createRestore(Label, Register, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
CurFrame->Instructions.push_back(Instruction);
}
-void MCStreamer::emitCFIEscape(StringRef Values) {
+void MCStreamer::emitCFIEscape(StringRef Values, SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
- MCCFIInstruction Instruction = MCCFIInstruction::createEscape(Label, Values);
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createEscape(Label, Values, Loc, "");
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
CurFrame->Instructions.push_back(Instruction);
}
-void MCStreamer::emitCFIGnuArgsSize(int64_t Size) {
+void MCStreamer::emitCFIGnuArgsSize(int64_t Size, SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
MCCFIInstruction Instruction =
- MCCFIInstruction::createGnuArgsSize(Label, Size);
+ MCCFIInstruction::createGnuArgsSize(Label, Size, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
@@ -645,39 +652,40 @@ void MCStreamer::emitCFISignalFrame() {
CurFrame->IsSignalFrame = true;
}
-void MCStreamer::emitCFIUndefined(int64_t Register) {
+void MCStreamer::emitCFIUndefined(int64_t Register, SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
MCCFIInstruction Instruction =
- MCCFIInstruction::createUndefined(Label, Register);
+ MCCFIInstruction::createUndefined(Label, Register, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
CurFrame->Instructions.push_back(Instruction);
}
-void MCStreamer::emitCFIRegister(int64_t Register1, int64_t Register2) {
+void MCStreamer::emitCFIRegister(int64_t Register1, int64_t Register2,
+ SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
MCCFIInstruction Instruction =
- MCCFIInstruction::createRegister(Label, Register1, Register2);
+ MCCFIInstruction::createRegister(Label, Register1, Register2, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
CurFrame->Instructions.push_back(Instruction);
}
-void MCStreamer::emitCFIWindowSave() {
+void MCStreamer::emitCFIWindowSave(SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
- MCCFIInstruction Instruction =
- MCCFIInstruction::createWindowSave(Label);
+ MCCFIInstruction Instruction = MCCFIInstruction::createWindowSave(Label, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
CurFrame->Instructions.push_back(Instruction);
}
-void MCStreamer::emitCFINegateRAState() {
+void MCStreamer::emitCFINegateRAState(SMLoc Loc) {
MCSymbol *Label = emitCFILabel();
- MCCFIInstruction Instruction = MCCFIInstruction::createNegateRAState(Label);
+ MCCFIInstruction Instruction =
+ MCCFIInstruction::createNegateRAState(Label, Loc);
MCDwarfFrameInfo *CurFrame = getCurrentDwarfFrameInfo();
if (!CurFrame)
return;
@@ -1101,7 +1109,7 @@ void MCStreamer::emitInstruction(const MCInst &Inst, const MCSubtargetInfo &) {
}
void MCStreamer::emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
- uint64_t Attr,
+ uint64_t Attr, uint64_t Discriminator,
const MCPseudoProbeInlineStack &InlineStack,
MCSymbol *FnSym) {
auto &Context = getContext();
@@ -1113,7 +1121,7 @@ void MCStreamer::emitPseudoProbe(uint64_t Guid, uint64_t Index, uint64_t Type,
emitLabel(ProbeSym);
// Create a (local) probe entry with the symbol.
- MCPseudoProbe Probe(ProbeSym, Guid, Index, Type, Attr);
+ MCPseudoProbe Probe(ProbeSym, Guid, Index, Type, Attr, Discriminator);
// Add the probe entry to this section's entries.
Context.getMCPseudoProbeTable().getProbeSections().addPseudoProbe(
@@ -1187,12 +1195,12 @@ void MCStreamer::emitXCOFFRenameDirective(const MCSymbol *Name,
"XCOFF targets");
}
-void MCStreamer::emitXCOFFRefDirective(StringRef Name) {
+void MCStreamer::emitXCOFFRefDirective(const MCSymbol *Symbol) {
llvm_unreachable("emitXCOFFRefDirective is only supported on XCOFF targets");
}
void MCStreamer::emitXCOFFExceptDirective(const MCSymbol *Symbol,
- const MCSymbol *Trap,
+ const MCSymbol *Trap,
unsigned Lang, unsigned Reason,
unsigned FunctionSize,
bool hasDebug) {
@@ -1200,6 +1208,11 @@ void MCStreamer::emitXCOFFExceptDirective(const MCSymbol *Symbol,
"XCOFF targets");
}
+void MCStreamer::emitXCOFFCInfoSym(StringRef Name, StringRef Metadata) {
+ llvm_unreachable("emitXCOFFCInfoSym is only supported on"
+ "XCOFF targets");
+}
+
void MCStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
void MCStreamer::emitELFSymverDirective(const MCSymbol *OriginalSym,
StringRef Name, bool KeepOriginalSym) {}
diff --git a/llvm/lib/MC/MCSubtargetInfo.cpp b/llvm/lib/MC/MCSubtargetInfo.cpp
index fc9826cf2b2e..8ee823e0377b 100644
--- a/llvm/lib/MC/MCSubtargetInfo.cpp
+++ b/llvm/lib/MC/MCSubtargetInfo.cpp
@@ -11,9 +11,9 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCSchedule.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <algorithm>
#include <cassert>
#include <cstring>
@@ -142,7 +142,7 @@ static void cpuHelp(ArrayRef<SubtargetSubTypeKV> CPUTable) {
errs() << '\n';
errs() << "Use -mcpu or -mtune to specify the target's processor.\n"
- "For example, clang --target=aarch64-unknown-linux-gui "
+ "For example, clang --target=aarch64-unknown-linux-gnu "
"-mcpu=cortex-a35\n";
PrintOnce = true;
diff --git a/llvm/lib/MC/MCTargetOptions.cpp b/llvm/lib/MC/MCTargetOptions.cpp
index c2946da3ee66..8fea8c7715bd 100644
--- a/llvm/lib/MC/MCTargetOptions.cpp
+++ b/llvm/lib/MC/MCTargetOptions.cpp
@@ -18,7 +18,8 @@ MCTargetOptions::MCTargetOptions()
ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false),
PreserveAsmComments(true), Dwarf64(false),
EmitDwarfUnwind(EmitDwarfUnwindType::Default),
- MCUseDwarfDirectory(DefaultDwarfDirectory) {}
+ MCUseDwarfDirectory(DefaultDwarfDirectory),
+ EmitCompactUnwindNonCanonical(false) {}
StringRef MCTargetOptions::getABIName() const {
return ABIName;
diff --git a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
index 0667ca59830c..8a4923e4792f 100644
--- a/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
+++ b/llvm/lib/MC/MCTargetOptionsCommandFlags.cpp
@@ -39,6 +39,7 @@ MCOPT(bool, IncrementalLinkerCompatible)
MCOPT(int, DwarfVersion)
MCOPT(bool, Dwarf64)
MCOPT(EmitDwarfUnwindType, EmitDwarfUnwind)
+MCOPT(bool, EmitCompactUnwindNonCanonical)
MCOPT(bool, ShowMCInst)
MCOPT(bool, FatalWarnings)
MCOPT(bool, NoWarn)
@@ -87,6 +88,14 @@ llvm::mc::RegisterMCTargetOptionsFlags::RegisterMCTargetOptionsFlags() {
"Use target platform default")));
MCBINDOPT(EmitDwarfUnwind);
+ static cl::opt<bool> EmitCompactUnwindNonCanonical(
+ "emit-compact-unwind-non-canonical",
+ cl::desc(
+ "Whether to try to emit Compact Unwind for non canonical entries."),
+ cl::init(
+ false)); // By default, use DWARF for non-canonical personalities.
+ MCBINDOPT(EmitCompactUnwindNonCanonical);
+
static cl::opt<bool> ShowMCInst(
"asm-show-inst",
cl::desc("Emit internal instruction representation to assembly file"));
@@ -135,6 +144,7 @@ MCTargetOptions llvm::mc::InitMCTargetOptionsFromFlags() {
Options.MCNoDeprecatedWarn = getNoDeprecatedWarn();
Options.MCNoTypeCheck = getNoTypeCheck();
Options.EmitDwarfUnwind = getEmitDwarfUnwind();
+ Options.EmitCompactUnwindNonCanonical = getEmitCompactUnwindNonCanonical();
Options.AsSecureLogFile = getAsSecureLogFile();
return Options;
diff --git a/llvm/lib/MC/MCWasmStreamer.cpp b/llvm/lib/MC/MCWasmStreamer.cpp
index 823e98a871bd..fbab72fb5f3d 100644
--- a/llvm/lib/MC/MCWasmStreamer.cpp
+++ b/llvm/lib/MC/MCWasmStreamer.cpp
@@ -196,8 +196,7 @@ void MCWasmStreamer::emitInstToData(const MCInst &Inst,
MCAssembler &Assembler = getAssembler();
SmallVector<MCFixup, 4> Fixups;
SmallString<256> Code;
- raw_svector_ostream VecOS(Code);
- Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
+ Assembler.getEmitter().encodeInstruction(Inst, Code, Fixups, STI);
for (auto &Fixup : Fixups)
fixSymbolsInTLSFixups(Fixup.getValue());
diff --git a/llvm/lib/MC/MCWin64EH.cpp b/llvm/lib/MC/MCWin64EH.cpp
index 1a55722133cc..a2d61da722af 100644
--- a/llvm/lib/MC/MCWin64EH.cpp
+++ b/llvm/lib/MC/MCWin64EH.cpp
@@ -1089,7 +1089,7 @@ static void ARM64ProcessEpilogs(WinEH::FrameInfo *info,
FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info);
int PrologOffset;
if (MatchingEpilog) {
- assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() &&
+ assert(EpilogInfo.contains(MatchingEpilog) &&
"Duplicate epilog not found");
EpilogInfo[EpilogStart] = EpilogInfo.lookup(MatchingEpilog);
// Clear the unwind codes in the EpilogMap, so that they don't get output
@@ -2369,7 +2369,7 @@ static void ARMEmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
FindMatchingEpilog(EpilogInstrs, AddedEpilogs, info);
int PrologOffset;
if (MatchingEpilog) {
- assert(EpilogInfo.find(MatchingEpilog) != EpilogInfo.end() &&
+ assert(EpilogInfo.contains(MatchingEpilog) &&
"Duplicate epilog not found");
EpilogInfo[EpilogStart] = EpilogInfo.lookup(MatchingEpilog);
// Clear the unwind codes in the EpilogMap, so that they don't get output
@@ -2449,7 +2449,7 @@ static void ARMEmitUnwindInfo(MCStreamer &streamer, WinEH::FrameInfo *info,
else
OffsetExpr = GetSubDivExpr(streamer, EpilogStart, info->Begin, 2);
- assert(info->EpilogMap.find(EpilogStart) != info->EpilogMap.end());
+ assert(info->EpilogMap.contains(EpilogStart));
unsigned Condition = info->EpilogMap[EpilogStart].Condition;
assert(Condition <= 0xf);
diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp
index 198c71571d9d..e510e1e4031c 100644
--- a/llvm/lib/MC/MCWinCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp
@@ -13,7 +13,6 @@
#include "llvm/MC/MCWinCOFFStreamer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -33,6 +32,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cstdint>
@@ -53,8 +53,7 @@ void MCWinCOFFStreamer::emitInstToData(const MCInst &Inst,
SmallVector<MCFixup, 4> Fixups;
SmallString<256> Code;
- raw_svector_ostream VecOS(Code);
- getAssembler().getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
+ getAssembler().getEmitter().encodeInstruction(Inst, Code, Fixups, STI);
// Add the fixups and data.
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
@@ -116,9 +115,14 @@ bool MCWinCOFFStreamer::emitSymbolAttribute(MCSymbol *S,
default: return false;
case MCSA_WeakReference:
case MCSA_Weak:
- Symbol->setIsWeakExternal();
+ Symbol->setWeakExternalCharacteristics(COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS);
Symbol->setExternal(true);
break;
+ case MCSA_WeakAntiDep:
+ Symbol->setWeakExternalCharacteristics(COFF::IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY);
+ Symbol->setExternal(true);
+ Symbol->setIsWeakExternal(true);
+ break;
case MCSA_Global:
Symbol->setExternal(true);
break;
@@ -345,9 +349,7 @@ void MCWinCOFFStreamer::emitCGProfileEntry(const MCSymbolRefExpr *From,
void MCWinCOFFStreamer::finalizeCGProfileEntry(const MCSymbolRefExpr *&SRE) {
const MCSymbol *S = &SRE->getSymbol();
- bool Created;
- getAssembler().registerSymbol(*S, &Created);
- if (Created)
+ if (getAssembler().registerSymbol(*S))
cast<MCSymbolCOFF>(S)->setExternal(true);
}
diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp
index 25a678c68416..8585416cd081 100644
--- a/llvm/lib/MC/MCXCOFFStreamer.cpp
+++ b/llvm/lib/MC/MCXCOFFStreamer.cpp
@@ -81,6 +81,21 @@ void MCXCOFFStreamer::emitXCOFFSymbolLinkageWithVisibility(
emitSymbolAttribute(Symbol, Visibility);
}
+void MCXCOFFStreamer::emitXCOFFRefDirective(const MCSymbol *Symbol) {
+ // Add a Fixup here to later record a relocation of type R_REF to prevent the
+ // ref symbol from being garbage collected (by the binder).
+ MCDataFragment *DF = getOrCreateDataFragment();
+ const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext());
+ std::optional<MCFixupKind> MaybeKind =
+ getAssembler().getBackend().getFixupKind("R_REF");
+ if (!MaybeKind)
+ report_fatal_error("failed to get fixup kind for R_REF relocation");
+
+ MCFixupKind Kind = *MaybeKind;
+ MCFixup Fixup = MCFixup::create(DF->getContents().size(), SRE, Kind);
+ DF->getFixups().push_back(Fixup);
+}
+
void MCXCOFFStreamer::emitXCOFFExceptDirective(const MCSymbol *Symbol,
const MCSymbol *Trap,
unsigned Lang, unsigned Reason,
@@ -90,6 +105,10 @@ void MCXCOFFStreamer::emitXCOFFExceptDirective(const MCSymbol *Symbol,
FunctionSize, hasDebug);
}
+void MCXCOFFStreamer::emitXCOFFCInfoSym(StringRef Name, StringRef Metadata) {
+ getAssembler().getWriter().addCInfoSymEntry(Name, Metadata);
+}
+
void MCXCOFFStreamer::emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
Align ByteAlignment) {
getAssembler().registerSymbol(*Symbol);
@@ -118,8 +137,7 @@ void MCXCOFFStreamer::emitInstToData(const MCInst &Inst,
MCAssembler &Assembler = getAssembler();
SmallVector<MCFixup, 4> Fixups;
SmallString<256> Code;
- raw_svector_ostream VecOS(Code);
- Assembler.getEmitter().encodeInstruction(Inst, VecOS, Fixups, STI);
+ Assembler.getEmitter().encodeInstruction(Inst, Code, Fixups, STI);
// Add the fixups and data.
MCDataFragment *DF = getOrCreateDataFragment(&STI);
diff --git a/llvm/lib/MC/MachObjectWriter.cpp b/llvm/lib/MC/MachObjectWriter.cpp
index 446d1372fa66..6b263df92cbe 100644
--- a/llvm/lib/MC/MachObjectWriter.cpp
+++ b/llvm/lib/MC/MachObjectWriter.cpp
@@ -531,9 +531,7 @@ void MachObjectWriter::bindIndirectSymbols(MCAssembler &Asm) {
// Set the symbol type to undefined lazy, but only on construction.
//
// FIXME: Do not hardcode.
- bool Created;
- Asm.registerSymbol(*it->Symbol, &Created);
- if (Created)
+ if (Asm.registerSymbol(*it->Symbol))
cast<MCSymbolMachO>(it->Symbol)->setReferenceTypeUndefinedLazy(true);
}
}
diff --git a/llvm/lib/MC/TargetRegistry.cpp b/llvm/lib/MC/TargetRegistry.cpp
index b54853a6e0d7..fa7aaccabcd6 100644
--- a/llvm/lib/MC/TargetRegistry.cpp
+++ b/llvm/lib/MC/TargetRegistry.cpp
@@ -21,7 +21,7 @@ iterator_range<TargetRegistry::iterator> TargetRegistry::targets() {
return make_range(iterator(FirstTarget), iterator());
}
-const Target *TargetRegistry::lookupTarget(const std::string &ArchName,
+const Target *TargetRegistry::lookupTarget(StringRef ArchName,
Triple &TheTriple,
std::string &Error) {
// Allocate target machine. First, check whether the user has explicitly
@@ -33,7 +33,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName,
[&](const Target &T) { return ArchName == T.getName(); });
if (I == targets().end()) {
- Error = "invalid target '" + ArchName + "'.\n";
+ Error = ("invalid target '" + ArchName + "'.\n").str();
return nullptr;
}
@@ -59,8 +59,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName,
return TheTarget;
}
-const Target *TargetRegistry::lookupTarget(const std::string &TT,
- std::string &Error) {
+const Target *TargetRegistry::lookupTarget(StringRef TT, std::string &Error) {
// Provide special warning when no targets are initialized.
if (targets().begin() == targets().end()) {
Error = "Unable to find target for this triple (no targets are registered)";
@@ -71,7 +70,8 @@ const Target *TargetRegistry::lookupTarget(const std::string &TT,
auto I = find_if(targets(), ArchMatch);
if (I == targets().end()) {
- Error = "No available targets are compatible with triple \"" + TT + "\"";
+ Error = ("No available targets are compatible with triple \"" + TT + "\"")
+ .str();
return nullptr;
}
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index a4cb4149f036..2b886449f052 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -197,7 +197,7 @@ bool isDwoSection(const MCSection &Sec) {
}
class WasmObjectWriter : public MCObjectWriter {
- support::endian::Writer *W;
+ support::endian::Writer *W = nullptr;
/// The target specific Wasm writer instance.
std::unique_ptr<MCWasmObjectTargetWriter> TargetObjectWriter;
@@ -671,6 +671,7 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry,
// Provisional value is same as the index
return getRelocationIndexValue(RelEntry);
case wasm::R_WASM_FUNCTION_INDEX_LEB:
+ case wasm::R_WASM_FUNCTION_INDEX_I32:
case wasm::R_WASM_GLOBAL_INDEX_LEB:
case wasm::R_WASM_GLOBAL_INDEX_I32:
case wasm::R_WASM_TAG_INDEX_LEB:
@@ -791,6 +792,7 @@ void WasmObjectWriter::applyRelocations(
case wasm::R_WASM_TABLE_INDEX_I32:
case wasm::R_WASM_MEMORY_ADDR_I32:
case wasm::R_WASM_FUNCTION_OFFSET_I32:
+ case wasm::R_WASM_FUNCTION_INDEX_I32:
case wasm::R_WASM_SECTION_OFFSET_I32:
case wasm::R_WASM_GLOBAL_INDEX_I32:
case wasm::R_WASM_MEMORY_ADDR_LOCREL_I32:
diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp
index c0b5e8bdc503..c203280d2c10 100644
--- a/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -59,11 +59,7 @@ constexpr int OffsetLabelIntervalBits = 20;
using name = SmallString<COFF::NameSize>;
-enum AuxiliaryType {
- ATWeakExternal,
- ATFile,
- ATSectionDefinition
-};
+enum AuxiliaryType { ATWeakExternal, ATFile, ATSectionDefinition };
struct AuxSymbol {
AuxiliaryType AuxType;
@@ -79,7 +75,7 @@ public:
using AuxiliarySymbols = SmallVector<AuxSymbol, 1>;
name Name;
- int Index;
+ int Index = 0;
AuxiliarySymbols Aux;
COFFSymbol *Other = nullptr;
COFFSection *Section = nullptr;
@@ -115,7 +111,7 @@ public:
COFF::section Header = {};
std::string Name;
- int Number;
+ int Number = 0;
MCSectionCOFF const *MCSection = nullptr;
COFFSymbol *Symbol = nullptr;
relocations Relocations;
@@ -125,8 +121,10 @@ public:
SmallVector<COFFSymbol *, 1> OffsetSymbols;
};
-class WinCOFFObjectWriter : public MCObjectWriter {
-public:
+class WinCOFFObjectWriter;
+
+class WinCOFFWriter {
+ WinCOFFObjectWriter &OWriter;
support::endian::Writer W;
using symbols = std::vector<std::unique_ptr<COFFSymbol>>;
@@ -137,8 +135,6 @@ public:
using symbol_list = DenseSet<COFFSymbol *>;
- std::unique_ptr<MCWinCOFFObjectTargetWriter> TargetObjectWriter;
-
// Root level file contents.
COFF::header Header = {};
sections Sections;
@@ -154,25 +150,27 @@ public:
bool UseBigObj;
bool UseOffsetLabels = false;
- MCSectionCOFF *AddrsigSection;
-
+public:
+ MCSectionCOFF *AddrsigSection = nullptr;
MCSectionCOFF *CGProfileSection = nullptr;
- WinCOFFObjectWriter(std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW,
- raw_pwrite_stream &OS);
-
- void reset() override {
- memset(&Header, 0, sizeof(Header));
- Header.Machine = TargetObjectWriter->getMachine();
- Sections.clear();
- Symbols.clear();
- Strings.clear();
- SectionMap.clear();
- SymbolMap.clear();
- WeakDefaults.clear();
- MCObjectWriter::reset();
- }
+ enum DwoMode {
+ AllSections,
+ NonDwoOnly,
+ DwoOnly,
+ } Mode;
+
+ WinCOFFWriter(WinCOFFObjectWriter &OWriter, raw_pwrite_stream &OS,
+ DwoMode Mode);
+
+ void reset();
+ void executePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout);
+ void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+ uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+private:
COFFSymbol *createSymbol(StringRef Name);
COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol *Symbol);
COFFSection *createSection(StringRef Name);
@@ -189,7 +187,6 @@ public:
bool IsPhysicalSection(COFFSection *S);
// Entity writing methods.
-
void WriteFileHeader(const COFF::header &Header);
void WriteSymbol(const COFFSymbol &S);
void WriteAuxiliarySymbols(const COFFSymbol::AuxiliarySymbols &S);
@@ -198,32 +195,55 @@ public:
uint32_t writeSectionContents(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCSection &MCSec);
void writeSection(MCAssembler &Asm, const MCAsmLayout &Layout,
- const COFFSection &Sec, const MCSection &MCSec);
+ const COFFSection &Sec);
- // MCObjectWriter interface implementation.
+ void createFileSymbols(MCAssembler &Asm);
+ void setWeakDefaultNames();
+ void assignSectionNumbers();
+ void assignFileOffsets(MCAssembler &Asm, const MCAsmLayout &Layout);
+};
+class WinCOFFObjectWriter : public MCObjectWriter {
+ friend class WinCOFFWriter;
+
+ std::unique_ptr<MCWinCOFFObjectTargetWriter> TargetObjectWriter;
+ std::unique_ptr<WinCOFFWriter> ObjWriter, DwoWriter;
+
+public:
+ WinCOFFObjectWriter(std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS)
+ : TargetObjectWriter(std::move(MOTW)),
+ ObjWriter(std::make_unique<WinCOFFWriter>(*this, OS,
+ WinCOFFWriter::AllSections)) {
+ }
+ WinCOFFObjectWriter(std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW,
+ raw_pwrite_stream &OS, raw_pwrite_stream &DwoOS)
+ : TargetObjectWriter(std::move(MOTW)),
+ ObjWriter(std::make_unique<WinCOFFWriter>(*this, OS,
+ WinCOFFWriter::NonDwoOnly)),
+ DwoWriter(std::make_unique<WinCOFFWriter>(*this, DwoOS,
+ WinCOFFWriter::DwoOnly)) {}
+
+ // MCObjectWriter interface implementation.
+ void reset() override;
void executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) override;
-
bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
const MCSymbol &SymA,
const MCFragment &FB, bool InSet,
bool IsPCRel) const override;
-
void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, uint64_t &FixedValue) override;
-
- void createFileSymbols(MCAssembler &Asm);
- void setWeakDefaultNames();
- void assignSectionNumbers();
- void assignFileOffsets(MCAssembler &Asm, const MCAsmLayout &Layout);
-
uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
};
} // end anonymous namespace
+static bool isDwoSection(const MCSection &Sec) {
+ return Sec.getName().endswith(".dwo");
+}
+
//------------------------------------------------------------------------------
// Symbol class implementation
@@ -236,12 +256,12 @@ void COFFSymbol::set_name_offset(uint32_t Offset) {
}
//------------------------------------------------------------------------------
-// WinCOFFObjectWriter class implementation
+// WinCOFFWriter class implementation
-WinCOFFObjectWriter::WinCOFFObjectWriter(
- std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS)
- : W(OS, support::little), TargetObjectWriter(std::move(MOTW)) {
- Header.Machine = TargetObjectWriter->getMachine();
+WinCOFFWriter::WinCOFFWriter(WinCOFFObjectWriter &OWriter,
+ raw_pwrite_stream &OS, DwoMode Mode)
+ : OWriter(OWriter), W(OS, support::little), Mode(Mode) {
+ Header.Machine = OWriter.TargetObjectWriter->getMachine();
// Some relocations on ARM64 (the 21 bit ADRP relocations) have a slightly
// limited range for the immediate offset (+/- 1 MB); create extra offset
// label symbols with regular intervals to allow referencing a
@@ -249,19 +269,19 @@ WinCOFFObjectWriter::WinCOFFObjectWriter(
UseOffsetLabels = Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64;
}
-COFFSymbol *WinCOFFObjectWriter::createSymbol(StringRef Name) {
+COFFSymbol *WinCOFFWriter::createSymbol(StringRef Name) {
Symbols.push_back(std::make_unique<COFFSymbol>(Name));
return Symbols.back().get();
}
-COFFSymbol *WinCOFFObjectWriter::GetOrCreateCOFFSymbol(const MCSymbol *Symbol) {
+COFFSymbol *WinCOFFWriter::GetOrCreateCOFFSymbol(const MCSymbol *Symbol) {
COFFSymbol *&Ret = SymbolMap[Symbol];
if (!Ret)
Ret = createSymbol(Symbol->getName());
return Ret;
}
-COFFSection *WinCOFFObjectWriter::createSection(StringRef Name) {
+COFFSection *WinCOFFWriter::createSection(StringRef Name) {
Sections.emplace_back(std::make_unique<COFFSection>(Name));
return Sections.back().get();
}
@@ -302,8 +322,8 @@ static uint32_t getAlignment(const MCSectionCOFF &Sec) {
/// This function takes a section data object from the assembler
/// and creates the associated COFF section staging object.
-void WinCOFFObjectWriter::defineSection(const MCSectionCOFF &MCSec,
- const MCAsmLayout &Layout) {
+void WinCOFFWriter::defineSection(const MCSectionCOFF &MCSec,
+ const MCAsmLayout &Layout) {
COFFSection *Section = createSection(MCSec.getName());
COFFSymbol *Symbol = createSymbol(MCSec.getName());
Section->Symbol = Symbol;
@@ -361,7 +381,7 @@ static uint64_t getSymbolValue(const MCSymbol &Symbol,
return Res;
}
-COFFSymbol *WinCOFFObjectWriter::getLinkedSymbol(const MCSymbol &Symbol) {
+COFFSymbol *WinCOFFWriter::getLinkedSymbol(const MCSymbol &Symbol) {
if (!Symbol.isVariable())
return nullptr;
@@ -379,9 +399,8 @@ COFFSymbol *WinCOFFObjectWriter::getLinkedSymbol(const MCSymbol &Symbol) {
/// This function takes a symbol data object from the assembler
/// and creates the associated COFF symbol staging object.
-void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym,
- MCAssembler &Assembler,
- const MCAsmLayout &Layout) {
+void WinCOFFWriter::DefineSymbol(const MCSymbol &MCSym, MCAssembler &Assembler,
+ const MCAsmLayout &Layout) {
COFFSymbol *Sym = GetOrCreateCOFFSymbol(&MCSym);
const MCSymbol *Base = Layout.getBaseSymbol(MCSym);
COFFSection *Sec = nullptr;
@@ -392,7 +411,7 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym,
}
COFFSymbol *Local = nullptr;
- if (cast<MCSymbolCOFF>(MCSym).isWeakExternal()) {
+ if (cast<MCSymbolCOFF>(MCSym).getWeakExternalCharacteristics()) {
Sym->Data.StorageClass = COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL;
Sym->Section = nullptr;
@@ -414,9 +433,9 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym,
Sym->Aux.resize(1);
memset(&Sym->Aux[0], 0, sizeof(Sym->Aux[0]));
Sym->Aux[0].AuxType = ATWeakExternal;
- Sym->Aux[0].Aux.WeakExternal.TagIndex = 0;
+ Sym->Aux[0].Aux.WeakExternal.TagIndex = 0; // Filled in later
Sym->Aux[0].Aux.WeakExternal.Characteristics =
- COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS;
+ cast<MCSymbolCOFF>(MCSym).getWeakExternalCharacteristics();
} else {
if (!Base)
Sym->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE;
@@ -434,8 +453,8 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym,
// If no storage class was specified in the streamer, define it here.
if (Local->Data.StorageClass == COFF::IMAGE_SYM_CLASS_NULL) {
- bool IsExternal = MCSym.isExternal() ||
- (!MCSym.getFragment() && !MCSym.isVariable());
+ bool IsExternal =
+ MCSym.isExternal() || (!MCSym.getFragment() && !MCSym.isVariable());
Local->Data.StorageClass = IsExternal ? COFF::IMAGE_SYM_CLASS_EXTERNAL
: COFF::IMAGE_SYM_CLASS_STATIC;
@@ -445,7 +464,7 @@ void WinCOFFObjectWriter::DefineSymbol(const MCSymbol &MCSym,
Sym->MC = &MCSym;
}
-void WinCOFFObjectWriter::SetSectionName(COFFSection &S) {
+void WinCOFFWriter::SetSectionName(COFFSection &S) {
if (S.Name.size() <= COFF::NameSize) {
std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size());
return;
@@ -456,14 +475,14 @@ void WinCOFFObjectWriter::SetSectionName(COFFSection &S) {
report_fatal_error("COFF string table is greater than 64 GB.");
}
-void WinCOFFObjectWriter::SetSymbolName(COFFSymbol &S) {
+void WinCOFFWriter::SetSymbolName(COFFSymbol &S) {
if (S.Name.size() > COFF::NameSize)
S.set_name_offset(Strings.getOffset(S.Name));
else
std::memcpy(S.Data.Name, S.Name.c_str(), S.Name.size());
}
-bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
+bool WinCOFFWriter::IsPhysicalSection(COFFSection *S) {
return (S->Header.Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) ==
0;
}
@@ -471,7 +490,7 @@ bool WinCOFFObjectWriter::IsPhysicalSection(COFFSection *S) {
//------------------------------------------------------------------------------
// entity writing methods
-void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) {
+void WinCOFFWriter::WriteFileHeader(const COFF::header &Header) {
if (UseBigObj) {
W.write<uint16_t>(COFF::IMAGE_FILE_MACHINE_UNKNOWN);
W.write<uint16_t>(0xFFFF);
@@ -497,7 +516,7 @@ void WinCOFFObjectWriter::WriteFileHeader(const COFF::header &Header) {
}
}
-void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol &S) {
+void WinCOFFWriter::WriteSymbol(const COFFSymbol &S) {
W.OS.write(S.Data.Name, COFF::NameSize);
W.write<uint32_t>(S.Data.Value);
if (UseBigObj)
@@ -510,7 +529,7 @@ void WinCOFFObjectWriter::WriteSymbol(const COFFSymbol &S) {
WriteAuxiliarySymbols(S.Aux);
}
-void WinCOFFObjectWriter::WriteAuxiliarySymbols(
+void WinCOFFWriter::WriteAuxiliarySymbols(
const COFFSymbol::AuxiliarySymbols &S) {
for (const AuxSymbol &i : S) {
switch (i.AuxType) {
@@ -523,7 +542,7 @@ void WinCOFFObjectWriter::WriteAuxiliarySymbols(
break;
case ATFile:
W.OS.write(reinterpret_cast<const char *>(&i.Aux),
- UseBigObj ? COFF::Symbol32Size : COFF::Symbol16Size);
+ UseBigObj ? COFF::Symbol32Size : COFF::Symbol16Size);
break;
case ATSectionDefinition:
W.write<uint32_t>(i.Aux.SectionDefinition.Length);
@@ -533,7 +552,8 @@ void WinCOFFObjectWriter::WriteAuxiliarySymbols(
W.write<uint16_t>(static_cast<int16_t>(i.Aux.SectionDefinition.Number));
W.OS << char(i.Aux.SectionDefinition.Selection);
W.OS.write_zeros(sizeof(i.Aux.SectionDefinition.unused));
- W.write<uint16_t>(static_cast<int16_t>(i.Aux.SectionDefinition.Number >> 16));
+ W.write<uint16_t>(
+ static_cast<int16_t>(i.Aux.SectionDefinition.Number >> 16));
if (UseBigObj)
W.OS.write_zeros(COFF::Symbol32Size - COFF::Symbol16Size);
break;
@@ -542,7 +562,7 @@ void WinCOFFObjectWriter::WriteAuxiliarySymbols(
}
// Write the section header.
-void WinCOFFObjectWriter::writeSectionHeaders() {
+void WinCOFFWriter::writeSectionHeaders() {
// Section numbers must be monotonically increasing in the section
// header, but our Sections array is not sorted by section number,
// so make a copy of Sections and sort it.
@@ -573,7 +593,7 @@ void WinCOFFObjectWriter::writeSectionHeaders() {
}
}
-void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
+void WinCOFFWriter::WriteRelocation(const COFF::relocation &R) {
W.write<uint32_t>(R.VirtualAddress);
W.write<uint32_t>(R.SymbolTableIndex);
W.write<uint16_t>(R.Type);
@@ -582,9 +602,9 @@ void WinCOFFObjectWriter::WriteRelocation(const COFF::relocation &R) {
// Write MCSec's contents. What this function does is essentially
// "Asm.writeSectionData(&MCSec, Layout)", but it's a bit complicated
// because it needs to compute a CRC.
-uint32_t WinCOFFObjectWriter::writeSectionContents(MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCSection &MCSec) {
+uint32_t WinCOFFWriter::writeSectionContents(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCSection &MCSec) {
// Save the contents of the section to a temporary buffer, we need this
// to CRC the data before we dump it into the object file.
SmallVector<char, 128> Buf;
@@ -601,10 +621,8 @@ uint32_t WinCOFFObjectWriter::writeSectionContents(MCAssembler &Asm,
return JC.getCRC();
}
-void WinCOFFObjectWriter::writeSection(MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const COFFSection &Sec,
- const MCSection &MCSec) {
+void WinCOFFWriter::writeSection(MCAssembler &Asm, const MCAsmLayout &Layout,
+ const COFFSection &Sec) {
if (Sec.Number == -1)
return;
@@ -613,11 +631,10 @@ void WinCOFFObjectWriter::writeSection(MCAssembler &Asm,
assert(W.OS.tell() == Sec.Header.PointerToRawData &&
"Section::PointerToRawData is insane!");
- uint32_t CRC = writeSectionContents(Asm, Layout, MCSec);
+ uint32_t CRC = writeSectionContents(Asm, Layout, *Sec.MCSection);
// Update the section definition auxiliary symbol to record the CRC.
- COFFSection *Sec = SectionMap[&MCSec];
- COFFSymbol::AuxiliarySymbols &AuxSyms = Sec->Symbol->Aux;
+ COFFSymbol::AuxiliarySymbols &AuxSyms = Sec.Symbol->Aux;
assert(AuxSyms.size() == 1 && AuxSyms[0].AuxType == ATSectionDefinition);
AuxSymbol &SecDef = AuxSyms[0];
SecDef.Aux.SectionDefinition.CheckSum = CRC;
@@ -647,211 +664,8 @@ void WinCOFFObjectWriter::writeSection(MCAssembler &Asm,
WriteRelocation(Relocation.Data);
}
-////////////////////////////////////////////////////////////////////////////////
-// MCObjectWriter interface implementations
-
-void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
- const MCAsmLayout &Layout) {
- if (EmitAddrsigSection) {
- AddrsigSection = Asm.getContext().getCOFFSection(
- ".llvm_addrsig", COFF::IMAGE_SCN_LNK_REMOVE,
- SectionKind::getMetadata());
- Asm.registerSection(*AddrsigSection);
- }
-
- if (!Asm.CGProfile.empty()) {
- CGProfileSection = Asm.getContext().getCOFFSection(
- ".llvm.call-graph-profile", COFF::IMAGE_SCN_LNK_REMOVE,
- SectionKind::getMetadata());
- Asm.registerSection(*CGProfileSection);
- }
-
- // "Define" each section & symbol. This creates section & symbol
- // entries in the staging area.
- for (const auto &Section : Asm)
- defineSection(static_cast<const MCSectionCOFF &>(Section), Layout);
-
- for (const MCSymbol &Symbol : Asm.symbols())
- if (!Symbol.isTemporary())
- DefineSymbol(Symbol, Asm, Layout);
-}
-
-bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
- const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
- bool InSet, bool IsPCRel) const {
- // Don't drop relocations between functions, even if they are in the same text
- // section. Multiple Visual C++ linker features depend on having the
- // relocations present. The /INCREMENTAL flag will cause these relocations to
- // point to thunks, and the /GUARD:CF flag assumes that it can use relocations
- // to approximate the set of all address taken functions. LLD's implementation
- // of /GUARD:CF also relies on the existance of these relocations.
- uint16_t Type = cast<MCSymbolCOFF>(SymA).getType();
- if ((Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
- return false;
- return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
- InSet, IsPCRel);
-}
-
-void WinCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
- const MCAsmLayout &Layout,
- const MCFragment *Fragment,
- const MCFixup &Fixup, MCValue Target,
- uint64_t &FixedValue) {
- assert(Target.getSymA() && "Relocation must reference a symbol!");
-
- const MCSymbol &A = Target.getSymA()->getSymbol();
- if (!A.isRegistered()) {
- Asm.getContext().reportError(Fixup.getLoc(),
- Twine("symbol '") + A.getName() +
- "' can not be undefined");
- return;
- }
- if (A.isTemporary() && A.isUndefined()) {
- Asm.getContext().reportError(Fixup.getLoc(),
- Twine("assembler label '") + A.getName() +
- "' can not be undefined");
- return;
- }
-
- MCSection *MCSec = Fragment->getParent();
-
- // Mark this symbol as requiring an entry in the symbol table.
- assert(SectionMap.find(MCSec) != SectionMap.end() &&
- "Section must already have been defined in executePostLayoutBinding!");
-
- COFFSection *Sec = SectionMap[MCSec];
- const MCSymbolRefExpr *SymB = Target.getSymB();
-
- if (SymB) {
- const MCSymbol *B = &SymB->getSymbol();
- if (!B->getFragment()) {
- Asm.getContext().reportError(
- Fixup.getLoc(),
- Twine("symbol '") + B->getName() +
- "' can not be undefined in a subtraction expression");
- return;
- }
-
- // Offset of the symbol in the section
- int64_t OffsetOfB = Layout.getSymbolOffset(*B);
-
- // Offset of the relocation in the section
- int64_t OffsetOfRelocation =
- Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
-
- FixedValue = (OffsetOfRelocation - OffsetOfB) + Target.getConstant();
- } else {
- FixedValue = Target.getConstant();
- }
-
- COFFRelocation Reloc;
-
- Reloc.Data.SymbolTableIndex = 0;
- Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
-
- // Turn relocations for temporary symbols into section relocations.
- if (A.isTemporary()) {
- MCSection *TargetSection = &A.getSection();
- assert(
- SectionMap.find(TargetSection) != SectionMap.end() &&
- "Section must already have been defined in executePostLayoutBinding!");
- COFFSection *Section = SectionMap[TargetSection];
- Reloc.Symb = Section->Symbol;
- FixedValue += Layout.getSymbolOffset(A);
- // Technically, we should do the final adjustments of FixedValue (below)
- // before picking an offset symbol, otherwise we might choose one which
- // is slightly too far away. The relocations where it really matters
- // (arm64 adrp relocations) don't get any offset though.
- if (UseOffsetLabels && !Section->OffsetSymbols.empty()) {
- uint64_t LabelIndex = FixedValue >> OffsetLabelIntervalBits;
- if (LabelIndex > 0) {
- if (LabelIndex <= Section->OffsetSymbols.size())
- Reloc.Symb = Section->OffsetSymbols[LabelIndex - 1];
- else
- Reloc.Symb = Section->OffsetSymbols.back();
- FixedValue -= Reloc.Symb->Data.Value;
- }
- }
- } else {
- assert(
- SymbolMap.find(&A) != SymbolMap.end() &&
- "Symbol must already have been defined in executePostLayoutBinding!");
- Reloc.Symb = SymbolMap[&A];
- }
-
- ++Reloc.Symb->Relocations;
-
- Reloc.Data.VirtualAddress += Fixup.getOffset();
- Reloc.Data.Type = TargetObjectWriter->getRelocType(
- Asm.getContext(), Target, Fixup, SymB, Asm.getBackend());
-
- // The *_REL32 relocations are relative to the end of the relocation,
- // not to the start.
- if ((Header.Machine == COFF::IMAGE_FILE_MACHINE_AMD64 &&
- Reloc.Data.Type == COFF::IMAGE_REL_AMD64_REL32) ||
- (Header.Machine == COFF::IMAGE_FILE_MACHINE_I386 &&
- Reloc.Data.Type == COFF::IMAGE_REL_I386_REL32) ||
- (Header.Machine == COFF::IMAGE_FILE_MACHINE_ARMNT &&
- Reloc.Data.Type == COFF::IMAGE_REL_ARM_REL32) ||
- (Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64 &&
- Reloc.Data.Type == COFF::IMAGE_REL_ARM64_REL32))
- FixedValue += 4;
-
- if (Header.Machine == COFF::IMAGE_FILE_MACHINE_ARMNT) {
- switch (Reloc.Data.Type) {
- case COFF::IMAGE_REL_ARM_ABSOLUTE:
- case COFF::IMAGE_REL_ARM_ADDR32:
- case COFF::IMAGE_REL_ARM_ADDR32NB:
- case COFF::IMAGE_REL_ARM_TOKEN:
- case COFF::IMAGE_REL_ARM_SECTION:
- case COFF::IMAGE_REL_ARM_SECREL:
- break;
- case COFF::IMAGE_REL_ARM_BRANCH11:
- case COFF::IMAGE_REL_ARM_BLX11:
- // IMAGE_REL_ARM_BRANCH11 and IMAGE_REL_ARM_BLX11 are only used for
- // pre-ARMv7, which implicitly rules it out of ARMNT (it would be valid
- // for Windows CE).
- case COFF::IMAGE_REL_ARM_BRANCH24:
- case COFF::IMAGE_REL_ARM_BLX24:
- case COFF::IMAGE_REL_ARM_MOV32A:
- // IMAGE_REL_ARM_BRANCH24, IMAGE_REL_ARM_BLX24, IMAGE_REL_ARM_MOV32A are
- // only used for ARM mode code, which is documented as being unsupported
- // by Windows on ARM. Empirical proof indicates that masm is able to
- // generate the relocations however the rest of the MSVC toolchain is
- // unable to handle it.
- llvm_unreachable("unsupported relocation");
- break;
- case COFF::IMAGE_REL_ARM_MOV32T:
- break;
- case COFF::IMAGE_REL_ARM_BRANCH20T:
- case COFF::IMAGE_REL_ARM_BRANCH24T:
- case COFF::IMAGE_REL_ARM_BLX23T:
- // IMAGE_REL_BRANCH20T, IMAGE_REL_ARM_BRANCH24T, IMAGE_REL_ARM_BLX23T all
- // perform a 4 byte adjustment to the relocation. Relative branches are
- // offset by 4 on ARM, however, because there is no RELA relocations, all
- // branches are offset by 4.
- FixedValue = FixedValue + 4;
- break;
- }
- }
-
- // The fixed value never makes sense for section indices, ignore it.
- if (Fixup.getKind() == FK_SecRel_2)
- FixedValue = 0;
-
- if (TargetObjectWriter->recordRelocation(Fixup))
- Sec->Relocations.push_back(Reloc);
-}
-
-static std::time_t getTime() {
- std::time_t Now = time(nullptr);
- if (Now < 0 || !isUInt<32>(Now))
- return UINT32_MAX;
- return Now;
-}
-
// Create .file symbols.
-void WinCOFFObjectWriter::createFileSymbols(MCAssembler &Asm) {
+void WinCOFFWriter::createFileSymbols(MCAssembler &Asm) {
for (const std::pair<std::string, size_t> &It : Asm.getFileNames()) {
// round up to calculate the number of auxiliary symbols required
const std::string &Name = It.first;
@@ -882,7 +696,7 @@ void WinCOFFObjectWriter::createFileSymbols(MCAssembler &Asm) {
}
}
-void WinCOFFObjectWriter::setWeakDefaultNames() {
+void WinCOFFWriter::setWeakDefaultNames() {
if (WeakDefaults.empty())
return;
@@ -928,7 +742,7 @@ static bool isAssociative(const COFFSection &Section) {
COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE;
}
-void WinCOFFObjectWriter::assignSectionNumbers() {
+void WinCOFFWriter::assignSectionNumbers() {
size_t I = 1;
auto Assign = [&](COFFSection &Section) {
Section.Number = I;
@@ -949,8 +763,8 @@ void WinCOFFObjectWriter::assignSectionNumbers() {
}
// Assign file offsets to COFF object file structures.
-void WinCOFFObjectWriter::assignFileOffsets(MCAssembler &Asm,
- const MCAsmLayout &Layout) {
+void WinCOFFWriter::assignFileOffsets(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
unsigned Offset = W.OS.tell();
Offset += UseBigObj ? COFF::Header32Size : COFF::Header16Size;
@@ -1009,8 +823,194 @@ void WinCOFFObjectWriter::assignFileOffsets(MCAssembler &Asm,
Header.PointerToSymbolTable = Offset;
}
-uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
- const MCAsmLayout &Layout) {
+void WinCOFFWriter::reset() {
+ memset(&Header, 0, sizeof(Header));
+ Header.Machine = OWriter.TargetObjectWriter->getMachine();
+ Sections.clear();
+ Symbols.clear();
+ Strings.clear();
+ SectionMap.clear();
+ SymbolMap.clear();
+ WeakDefaults.clear();
+}
+
+void WinCOFFWriter::executePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ // "Define" each section & symbol. This creates section & symbol
+ // entries in the staging area.
+ for (const auto &Section : Asm) {
+ if ((Mode == NonDwoOnly && isDwoSection(Section)) ||
+ (Mode == DwoOnly && !isDwoSection(Section)))
+ continue;
+ defineSection(static_cast<const MCSectionCOFF &>(Section), Layout);
+ }
+
+ if (Mode != DwoOnly)
+ for (const MCSymbol &Symbol : Asm.symbols())
+ if (!Symbol.isTemporary())
+ DefineSymbol(Symbol, Asm, Layout);
+}
+
+void WinCOFFWriter::recordRelocation(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ assert(Target.getSymA() && "Relocation must reference a symbol!");
+
+ const MCSymbol &A = Target.getSymA()->getSymbol();
+ if (!A.isRegistered()) {
+ Asm.getContext().reportError(Fixup.getLoc(), Twine("symbol '") +
+ A.getName() +
+ "' can not be undefined");
+ return;
+ }
+ if (A.isTemporary() && A.isUndefined()) {
+ Asm.getContext().reportError(Fixup.getLoc(), Twine("assembler label '") +
+ A.getName() +
+ "' can not be undefined");
+ return;
+ }
+
+ MCSection *MCSec = Fragment->getParent();
+
+ // Mark this symbol as requiring an entry in the symbol table.
+ assert(SectionMap.contains(MCSec) &&
+ "Section must already have been defined in executePostLayoutBinding!");
+
+ COFFSection *Sec = SectionMap[MCSec];
+ const MCSymbolRefExpr *SymB = Target.getSymB();
+
+ if (SymB) {
+ const MCSymbol *B = &SymB->getSymbol();
+ if (!B->getFragment()) {
+ Asm.getContext().reportError(
+ Fixup.getLoc(),
+ Twine("symbol '") + B->getName() +
+ "' can not be undefined in a subtraction expression");
+ return;
+ }
+
+ // Offset of the symbol in the section
+ int64_t OffsetOfB = Layout.getSymbolOffset(*B);
+
+ // Offset of the relocation in the section
+ int64_t OffsetOfRelocation =
+ Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+
+ FixedValue = (OffsetOfRelocation - OffsetOfB) + Target.getConstant();
+ } else {
+ FixedValue = Target.getConstant();
+ }
+
+ COFFRelocation Reloc;
+
+ Reloc.Data.SymbolTableIndex = 0;
+ Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment);
+
+ // Turn relocations for temporary symbols into section relocations.
+ if (A.isTemporary()) {
+ MCSection *TargetSection = &A.getSection();
+ assert(
+ SectionMap.contains(TargetSection) &&
+ "Section must already have been defined in executePostLayoutBinding!");
+ COFFSection *Section = SectionMap[TargetSection];
+ Reloc.Symb = Section->Symbol;
+ FixedValue += Layout.getSymbolOffset(A);
+ // Technically, we should do the final adjustments of FixedValue (below)
+ // before picking an offset symbol, otherwise we might choose one which
+ // is slightly too far away. The relocations where it really matters
+ // (arm64 adrp relocations) don't get any offset though.
+ if (UseOffsetLabels && !Section->OffsetSymbols.empty()) {
+ uint64_t LabelIndex = FixedValue >> OffsetLabelIntervalBits;
+ if (LabelIndex > 0) {
+ if (LabelIndex <= Section->OffsetSymbols.size())
+ Reloc.Symb = Section->OffsetSymbols[LabelIndex - 1];
+ else
+ Reloc.Symb = Section->OffsetSymbols.back();
+ FixedValue -= Reloc.Symb->Data.Value;
+ }
+ }
+ } else {
+ assert(
+ SymbolMap.contains(&A) &&
+ "Symbol must already have been defined in executePostLayoutBinding!");
+ Reloc.Symb = SymbolMap[&A];
+ }
+
+ ++Reloc.Symb->Relocations;
+
+ Reloc.Data.VirtualAddress += Fixup.getOffset();
+ Reloc.Data.Type = OWriter.TargetObjectWriter->getRelocType(
+ Asm.getContext(), Target, Fixup, SymB, Asm.getBackend());
+
+ // The *_REL32 relocations are relative to the end of the relocation,
+ // not to the start.
+ if ((Header.Machine == COFF::IMAGE_FILE_MACHINE_AMD64 &&
+ Reloc.Data.Type == COFF::IMAGE_REL_AMD64_REL32) ||
+ (Header.Machine == COFF::IMAGE_FILE_MACHINE_I386 &&
+ Reloc.Data.Type == COFF::IMAGE_REL_I386_REL32) ||
+ (Header.Machine == COFF::IMAGE_FILE_MACHINE_ARMNT &&
+ Reloc.Data.Type == COFF::IMAGE_REL_ARM_REL32) ||
+ (Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64 &&
+ Reloc.Data.Type == COFF::IMAGE_REL_ARM64_REL32))
+ FixedValue += 4;
+
+ if (Header.Machine == COFF::IMAGE_FILE_MACHINE_ARMNT) {
+ switch (Reloc.Data.Type) {
+ case COFF::IMAGE_REL_ARM_ABSOLUTE:
+ case COFF::IMAGE_REL_ARM_ADDR32:
+ case COFF::IMAGE_REL_ARM_ADDR32NB:
+ case COFF::IMAGE_REL_ARM_TOKEN:
+ case COFF::IMAGE_REL_ARM_SECTION:
+ case COFF::IMAGE_REL_ARM_SECREL:
+ break;
+ case COFF::IMAGE_REL_ARM_BRANCH11:
+ case COFF::IMAGE_REL_ARM_BLX11:
+ // IMAGE_REL_ARM_BRANCH11 and IMAGE_REL_ARM_BLX11 are only used for
+ // pre-ARMv7, which implicitly rules it out of ARMNT (it would be valid
+ // for Windows CE).
+ case COFF::IMAGE_REL_ARM_BRANCH24:
+ case COFF::IMAGE_REL_ARM_BLX24:
+ case COFF::IMAGE_REL_ARM_MOV32A:
+ // IMAGE_REL_ARM_BRANCH24, IMAGE_REL_ARM_BLX24, IMAGE_REL_ARM_MOV32A are
+ // only used for ARM mode code, which is documented as being unsupported
+ // by Windows on ARM. Empirical proof indicates that masm is able to
+ // generate the relocations however the rest of the MSVC toolchain is
+ // unable to handle it.
+ llvm_unreachable("unsupported relocation");
+ break;
+ case COFF::IMAGE_REL_ARM_MOV32T:
+ break;
+ case COFF::IMAGE_REL_ARM_BRANCH20T:
+ case COFF::IMAGE_REL_ARM_BRANCH24T:
+ case COFF::IMAGE_REL_ARM_BLX23T:
+ // IMAGE_REL_BRANCH20T, IMAGE_REL_ARM_BRANCH24T, IMAGE_REL_ARM_BLX23T all
+ // perform a 4 byte adjustment to the relocation. Relative branches are
+ // offset by 4 on ARM, however, because there is no RELA relocations, all
+ // branches are offset by 4.
+ FixedValue = FixedValue + 4;
+ break;
+ }
+ }
+
+ // The fixed value never makes sense for section indices, ignore it.
+ if (Fixup.getKind() == FK_SecRel_2)
+ FixedValue = 0;
+
+ if (OWriter.TargetObjectWriter->recordRelocation(Fixup))
+ Sec->Relocations.push_back(Reloc);
+}
+
+static std::time_t getTime() {
+ std::time_t Now = time(nullptr);
+ if (Now < 0 || !isUInt<32>(Now))
+ return UINT32_MAX;
+ return Now;
+}
+
+uint64_t WinCOFFWriter::writeObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
uint64_t StartOffset = W.OS.tell();
if (Sections.size() > INT32_MAX)
@@ -1023,7 +1023,8 @@ uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
setWeakDefaultNames();
assignSectionNumbers();
- createFileSymbols(Asm);
+ if (Mode != DwoOnly)
+ createFileSymbols(Asm);
for (auto &Symbol : Symbols) {
// Update section number & offset for symbols that have them.
@@ -1093,11 +1094,11 @@ uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
}
// Create the contents of the .llvm_addrsig section.
- if (EmitAddrsigSection) {
+ if (Mode != DwoOnly && OWriter.EmitAddrsigSection) {
auto Frag = new MCDataFragment(AddrsigSection);
Frag->setLayoutOrder(0);
raw_svector_ostream OS(Frag->getContents());
- for (const MCSymbol *S : AddrsigSyms) {
+ for (const MCSymbol *S : OWriter.AddrsigSyms) {
if (!S->isRegistered())
continue;
if (!S->isTemporary()) {
@@ -1106,7 +1107,7 @@ uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
}
MCSection *TargetSection = &S->getSection();
- assert(SectionMap.find(TargetSection) != SectionMap.end() &&
+ assert(SectionMap.contains(TargetSection) &&
"Section must already have been defined in "
"executePostLayoutBinding!");
encodeULEB128(SectionMap[TargetSection]->Symbol->getIndex(), OS);
@@ -1114,7 +1115,7 @@ uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
}
// Create the contents of the .llvm.call-graph-profile section.
- if (CGProfileSection) {
+ if (Mode != DwoOnly && CGProfileSection) {
auto *Frag = new MCDataFragment(CGProfileSection);
Frag->setLayoutOrder(0);
raw_svector_ostream OS(Frag->getContents());
@@ -1142,13 +1143,22 @@ uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
WriteFileHeader(Header);
writeSectionHeaders();
- // Write section contents.
+#ifndef NDEBUG
sections::iterator I = Sections.begin();
sections::iterator IE = Sections.end();
MCAssembler::iterator J = Asm.begin();
MCAssembler::iterator JE = Asm.end();
- for (; I != IE && J != JE; ++I, ++J)
- writeSection(Asm, Layout, **I, *J);
+ for (; I != IE && J != JE; ++I, ++J) {
+ while (J != JE && ((Mode == NonDwoOnly && isDwoSection(*J)) ||
+ (Mode == DwoOnly && !isDwoSection(*J))))
+ ++J;
+ assert(J != JE && (**I).MCSection == &*J && "Wrong bound MCSection");
+ }
+#endif
+
+ // Write section contents.
+ for (std::unique_ptr<COFFSection> &Sec : Sections)
+ writeSection(Asm, Layout, *Sec);
assert(W.OS.tell() == Header.PointerToSymbolTable &&
"Header::PointerToSymbolTable is insane!");
@@ -1164,6 +1174,74 @@ uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
return W.OS.tell() - StartOffset;
}
+//------------------------------------------------------------------------------
+// WinCOFFObjectWriter class implementation
+
+////////////////////////////////////////////////////////////////////////////////
+// MCObjectWriter interface implementations
+
+void WinCOFFObjectWriter::reset() {
+ ObjWriter->reset();
+ if (DwoWriter)
+ DwoWriter->reset();
+ MCObjectWriter::reset();
+}
+
+bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
+ const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
+ bool InSet, bool IsPCRel) const {
+ // Don't drop relocations between functions, even if they are in the same text
+ // section. Multiple Visual C++ linker features depend on having the
+ // relocations present. The /INCREMENTAL flag will cause these relocations to
+ // point to thunks, and the /GUARD:CF flag assumes that it can use relocations
+ // to approximate the set of all address taken functions. LLD's implementation
+ // of /GUARD:CF also relies on the existance of these relocations.
+ uint16_t Type = cast<MCSymbolCOFF>(SymA).getType();
+ if ((Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
+ return false;
+ return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
+ InSet, IsPCRel);
+}
+
+void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ if (EmitAddrsigSection) {
+ ObjWriter->AddrsigSection = Asm.getContext().getCOFFSection(
+ ".llvm_addrsig", COFF::IMAGE_SCN_LNK_REMOVE,
+ SectionKind::getMetadata());
+ Asm.registerSection(*ObjWriter->AddrsigSection);
+ }
+
+ if (!Asm.CGProfile.empty()) {
+ ObjWriter->CGProfileSection = Asm.getContext().getCOFFSection(
+ ".llvm.call-graph-profile", COFF::IMAGE_SCN_LNK_REMOVE,
+ SectionKind::getMetadata());
+ Asm.registerSection(*ObjWriter->CGProfileSection);
+ }
+
+ ObjWriter->executePostLayoutBinding(Asm, Layout);
+ if (DwoWriter)
+ DwoWriter->executePostLayoutBinding(Asm, Layout);
+}
+
+void WinCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ assert(!isDwoSection(*Fragment->getParent()) &&
+ "No relocation in Dwo sections");
+ ObjWriter->recordRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
+}
+
+uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ uint64_t TotalSize = ObjWriter->writeObject(Asm, Layout);
+ if (DwoWriter)
+ TotalSize += DwoWriter->writeObject(Asm, Layout);
+ return TotalSize;
+}
+
MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_)
: Machine(Machine_) {}
@@ -1177,3 +1255,9 @@ std::unique_ptr<MCObjectWriter> llvm::createWinCOFFObjectWriter(
std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS) {
return std::make_unique<WinCOFFObjectWriter>(std::move(MOTW), OS);
}
+
+std::unique_ptr<MCObjectWriter> llvm::createWinCOFFDwoObjectWriter(
+ std::unique_ptr<MCWinCOFFObjectTargetWriter> MOTW, raw_pwrite_stream &OS,
+ raw_pwrite_stream &DwoOS) {
+ return std::make_unique<WinCOFFObjectWriter>(std::move(MOTW), OS, DwoOS);
+}
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index ab6acf085e7b..036210d6b0ef 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -122,6 +122,15 @@ struct SectionEntry {
int16_t Index;
+ virtual uint64_t advanceFileOffset(const uint64_t MaxRawDataSize,
+ const uint64_t RawPointer) {
+ FileOffsetToData = RawPointer;
+ uint64_t NewPointer = RawPointer + Size;
+ if (NewPointer > MaxRawDataSize)
+ report_fatal_error("Section raw data overflowed this object file.");
+ return NewPointer;
+ }
+
// XCOFF has special section numbers for symbols:
// -2 Specifies N_DEBUG, a special symbolic debugging symbol.
// -1 Specifies N_ABS, an absolute symbol. The symbol has a value but is not
@@ -189,6 +198,19 @@ struct DwarfSectionEntry : public SectionEntry {
// is for the size the DWARF section occupies including paddings.
uint32_t MemorySize;
+ // TODO: Remove this override. Loadable sections (e.g., .text, .data) may need
+ // to be aligned. Other sections generally don't need any alignment, but if
+ // they're aligned, the RawPointer should be adjusted before writing the
+ // section. Then a dwarf-specific function wouldn't be needed.
+ uint64_t advanceFileOffset(const uint64_t MaxRawDataSize,
+ const uint64_t RawPointer) override {
+ FileOffsetToData = RawPointer;
+ uint64_t NewPointer = RawPointer + MemorySize;
+ assert(NewPointer <= MaxRawDataSize &&
+ "Section raw data overflowed this object file.");
+ return NewPointer;
+ }
+
DwarfSectionEntry(StringRef N, int32_t Flags,
std::unique_ptr<XCOFFSection> Sect)
: SectionEntry(N, Flags | XCOFF::STYP_DWARF), DwarfSect(std::move(Sect)),
@@ -206,7 +228,7 @@ struct DwarfSectionEntry : public SectionEntry {
struct ExceptionTableEntry {
const MCSymbol *Trap;
- uint64_t TrapAddress;
+ uint64_t TrapAddress = ~0ul;
unsigned Lang;
unsigned Reason;
@@ -233,6 +255,42 @@ struct ExceptionSectionEntry : public SectionEntry {
virtual ~ExceptionSectionEntry() = default;
};
+struct CInfoSymInfo {
+ // Name of the C_INFO symbol associated with the section
+ std::string Name;
+ std::string Metadata;
+ // Offset into the start of the metadata in the section
+ uint64_t Offset;
+
+ CInfoSymInfo(std::string Name, std::string Metadata)
+ : Name(Name), Metadata(Metadata) {}
+ // Metadata needs to be padded out to an even word size.
+ uint32_t paddingSize() const {
+ return alignTo(Metadata.size(), sizeof(uint32_t)) - Metadata.size();
+ };
+
+ // Total size of the entry, including the 4 byte length
+ uint32_t size() const {
+ return Metadata.size() + paddingSize() + sizeof(uint32_t);
+ };
+};
+
+struct CInfoSymSectionEntry : public SectionEntry {
+ std::unique_ptr<CInfoSymInfo> Entry;
+
+ CInfoSymSectionEntry(StringRef N, int32_t Flags) : SectionEntry(N, Flags) {}
+ virtual ~CInfoSymSectionEntry() = default;
+ void addEntry(std::unique_ptr<CInfoSymInfo> NewEntry) {
+ Entry = std::move(NewEntry);
+ Entry->Offset = sizeof(uint32_t);
+ Size += Entry->size();
+ }
+ void reset() override {
+ SectionEntry::reset();
+ Entry.reset();
+ }
+};
+
class XCOFFObjectWriter : public MCObjectWriter {
uint32_t SymbolTableEntryCount = 0;
@@ -287,6 +345,7 @@ class XCOFFObjectWriter : public MCObjectWriter {
std::vector<SectionEntry> OverflowSections;
ExceptionSectionEntry ExceptionSection;
+ CInfoSymSectionEntry CInfoSymSection;
CsectGroup &getCsectGroup(const MCSectionXCOFF *MCSec);
@@ -328,6 +387,10 @@ class XCOFFObjectWriter : public MCObjectWriter {
void writeSectionForExceptionSectionEntry(
const MCAssembler &Asm, const MCAsmLayout &Layout,
ExceptionSectionEntry &ExceptionEntry, uint64_t &CurrentAddressLocation);
+ void writeSectionForCInfoSymSectionEntry(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ CInfoSymSectionEntry &CInfoSymEntry,
+ uint64_t &CurrentAddressLocation);
void writeSymbolTable(const MCAsmLayout &Layout);
void writeSymbolAuxDwarfEntry(uint64_t LengthOfSectionPortion,
uint64_t NumberOfRelocEnt = 0);
@@ -368,6 +431,7 @@ class XCOFFObjectWriter : public MCObjectWriter {
unsigned getExceptionSectionSize();
unsigned getExceptionOffset(const MCSymbol *Symbol);
+ void addCInfoSymEntry(StringRef Name, StringRef Metadata) override;
size_t auxiliaryHeaderSize() const {
// 64-bit object files have no auxiliary header.
return HasVisibility && !is64Bit() ? XCOFF::AuxFileHeaderSizeShort : 0;
@@ -396,7 +460,8 @@ XCOFFObjectWriter::XCOFFObjectWriter(
CsectGroups{&TDataCsects}),
TBSS(".tbss", XCOFF::STYP_TBSS, /* IsVirtual */ true,
CsectGroups{&TBSSCsects}),
- ExceptionSection(".except", XCOFF::STYP_EXCEPT) {}
+ ExceptionSection(".except", XCOFF::STYP_EXCEPT),
+ CInfoSymSection(".info", XCOFF::STYP_INFO) {}
void XCOFFObjectWriter::reset() {
// Clear the mappings we created.
@@ -412,6 +477,7 @@ void XCOFFObjectWriter::reset() {
for (auto &OverflowSec : OverflowSections)
OverflowSec.reset();
ExceptionSection.reset();
+ CInfoSymSection.reset();
// Reset states in XCOFFObjectWriter.
SymbolTableEntryCount = 0;
@@ -488,8 +554,7 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
const MCAsmLayout &Layout) {
for (const auto &S : Asm) {
const auto *MCSec = cast<const MCSectionXCOFF>(&S);
- assert(SectionMap.find(MCSec) == SectionMap.end() &&
- "Cannot add a section twice.");
+ assert(!SectionMap.contains(MCSec) && "Cannot add a section twice.");
// If the name does not fit in the storage provided in the symbol table
// entry, add it to the string table.
@@ -547,7 +612,7 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
if (!XSym->isExternal())
continue;
- assert(SectionMap.find(ContainingCsect) != SectionMap.end() &&
+ assert(SectionMap.contains(ContainingCsect) &&
"Expected containing csect to exist in map");
XCOFFSection *Csect = SectionMap[ContainingCsect];
// Lookup the containing csect and add the symbol to it.
@@ -560,6 +625,10 @@ void XCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
Strings.add(XSym->getSymbolTableName());
}
+ std::unique_ptr<CInfoSymInfo> &CISI = CInfoSymSection.Entry;
+ if (CISI && nameShouldBeInStringTable(CISI->Name))
+ Strings.add(CISI->Name);
+
FileNames = Asm.getFileNames();
// Emit ".file" as the source file name when there is no file name.
if (FileNames.empty())
@@ -583,7 +652,7 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
// If we could not find the symbol directly in SymbolIndexMap, this symbol
// could either be a temporary symbol or an undefined symbol. In this case,
// we would need to have the relocation reference its csect instead.
- return SymbolIndexMap.find(Sym) != SymbolIndexMap.end()
+ return SymbolIndexMap.contains(Sym)
? SymbolIndexMap[Sym]
: SymbolIndexMap[ContainingCsect->getQualNameSymbol()];
};
@@ -616,12 +685,19 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
TargetObjectWriter->getRelocTypeAndSignSize(Target, Fixup, IsPCRel);
const MCSectionXCOFF *SymASec = getContainingCsect(cast<MCSymbolXCOFF>(SymA));
- assert(SectionMap.find(SymASec) != SectionMap.end() &&
+ assert(SectionMap.contains(SymASec) &&
"Expected containing csect to exist in map.");
+ assert((Fixup.getOffset() <=
+ MaxRawDataSize - Layout.getFragmentOffset(Fragment)) &&
+ "Fragment offset + fixup offset is overflowed.");
+ uint32_t FixupOffsetInCsect =
+ Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+
const uint32_t Index = getIndex(SymA, SymASec);
if (Type == XCOFF::RelocationType::R_POS ||
- Type == XCOFF::RelocationType::R_TLS)
+ Type == XCOFF::RelocationType::R_TLS ||
+ Type == XCOFF::RelocationType::R_TLS_LE)
// The FixedValue should be symbol's virtual address in this object file
// plus any constant value that we might get.
FixedValue = getVirtualAddress(SymA, SymASec) + Target.getConstant();
@@ -657,24 +733,22 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
// The address of the branch instruction should be the sum of section
// address, fragment offset and Fixup offset.
- uint64_t BRInstrAddress = SectionMap[ParentSec]->Address +
- Layout.getFragmentOffset(Fragment) +
- Fixup.getOffset();
- // The FixedValue should be the difference between SymA csect address and BR
- // instr address plus any constant value.
- FixedValue =
- SectionMap[SymASec]->Address - BRInstrAddress + Target.getConstant();
+ uint64_t BRInstrAddress =
+ SectionMap[ParentSec]->Address + FixupOffsetInCsect;
+ // The FixedValue should be the difference between symbol's virtual address
+ // and BR instr address plus any constant value.
+ FixedValue = getVirtualAddress(SymA, SymASec) - BRInstrAddress +
+ Target.getConstant();
+ } else if (Type == XCOFF::RelocationType::R_REF) {
+ // The FixedValue and FixupOffsetInCsect should always be 0 since it
+ // specifies a nonrelocating reference.
+ FixedValue = 0;
+ FixupOffsetInCsect = 0;
}
- assert((Fixup.getOffset() <=
- MaxRawDataSize - Layout.getFragmentOffset(Fragment)) &&
- "Fragment offset + fixup offset is overflowed.");
- uint32_t FixupOffsetInCsect =
- Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
-
XCOFFRelocation Reloc = {Index, FixupOffsetInCsect, SignAndSize, Type};
MCSectionXCOFF *RelocationSec = cast<MCSectionXCOFF>(Fragment->getParent());
- assert(SectionMap.find(RelocationSec) != SectionMap.end() &&
+ assert(SectionMap.contains(RelocationSec) &&
"Expected containing csect to exist in map.");
SectionMap[RelocationSec]->Relocations.push_back(Reloc);
@@ -686,7 +760,7 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm,
report_fatal_error("relocation for opposite term is not yet supported");
const MCSectionXCOFF *SymBSec = getContainingCsect(cast<MCSymbolXCOFF>(SymB));
- assert(SectionMap.find(SymBSec) != SectionMap.end() &&
+ assert(SectionMap.contains(SymBSec) &&
"Expected containing csect to exist in map.");
if (SymASec == SymBSec)
report_fatal_error(
@@ -717,6 +791,8 @@ void XCOFFObjectWriter::writeSections(const MCAssembler &Asm,
CurrentAddressLocation);
writeSectionForExceptionSectionEntry(Asm, Layout, ExceptionSection,
CurrentAddressLocation);
+ writeSectionForCInfoSymSectionEntry(Asm, Layout, CInfoSymSection,
+ CurrentAddressLocation);
}
uint64_t XCOFFObjectWriter::writeObject(MCAssembler &Asm,
@@ -986,6 +1062,8 @@ void XCOFFObjectWriter::writeSectionHeaderTable() {
writeSectionHeader(&OverflowSec);
if (hasExceptionSection())
writeSectionHeader(&ExceptionSection);
+ if (CInfoSymSection.Entry)
+ writeSectionHeader(&CInfoSymSection);
}
void XCOFFObjectWriter::writeRelocation(XCOFFRelocation Reloc,
@@ -1026,14 +1104,45 @@ void XCOFFObjectWriter::writeRelocations() {
void XCOFFObjectWriter::writeSymbolTable(const MCAsmLayout &Layout) {
// Write C_FILE symbols.
- // The n_name of a C_FILE symbol is the source file's name when no auxiliary
- // entries are present.
for (const std::pair<std::string, size_t> &F : FileNames) {
- writeSymbolEntry(F.first, /*Value=*/0, XCOFF::ReservedSectionNum::N_DEBUG,
- /*SymbolType=*/0, XCOFF::C_FILE,
+ // The n_name of a C_FILE symbol is the source file's name when no auxiliary
+ // entries are present.
+ StringRef FileName = F.first;
+
+ // For C_FILE symbols, the Source Language ID overlays the high-order byte
+ // of the SymbolType field, and the CPU Version ID is defined as the
+ // low-order byte.
+ // AIX's system assembler determines the source language ID based on the
+ // source file's name suffix, and the behavior here is consistent with it.
+ uint8_t LangID;
+ if (FileName.ends_with(".c"))
+ LangID = XCOFF::TB_C;
+ else if (FileName.ends_with_insensitive(".f") ||
+ FileName.ends_with_insensitive(".f77") ||
+ FileName.ends_with_insensitive(".f90") ||
+ FileName.ends_with_insensitive(".f95") ||
+ FileName.ends_with_insensitive(".f03") ||
+ FileName.ends_with_insensitive(".f08"))
+ LangID = XCOFF::TB_Fortran;
+ else
+ LangID = XCOFF::TB_CPLUSPLUS;
+ uint8_t CpuID;
+ if (is64Bit())
+ CpuID = XCOFF::TCPU_PPC64;
+ else
+ CpuID = XCOFF::TCPU_COM;
+
+ writeSymbolEntry(FileName, /*Value=*/0, XCOFF::ReservedSectionNum::N_DEBUG,
+ /*SymbolType=*/(LangID << 8) | CpuID, XCOFF::C_FILE,
/*NumberOfAuxEntries=*/0);
}
+ if (CInfoSymSection.Entry)
+ writeSymbolEntry(CInfoSymSection.Entry->Name, CInfoSymSection.Entry->Offset,
+ CInfoSymSection.Index,
+ /*SymbolType=*/0, XCOFF::C_INFO,
+ /*NumberOfAuxEntries=*/0);
+
for (const auto &Csect : UndefinedCsects) {
writeSymbolEntryForControlSection(Csect, XCOFF::ReservedSectionNum::N_UNDEF,
Csect.MCSec->getStorageClass());
@@ -1158,29 +1267,21 @@ void XCOFFObjectWriter::finalizeSectionInfo() {
if (Sec->Index == SectionEntry::UninitializedIndex || Sec->IsVirtual)
continue;
- Sec->FileOffsetToData = RawPointer;
- RawPointer += Sec->Size;
- if (RawPointer > MaxRawDataSize)
- report_fatal_error("Section raw data overflowed this object file.");
+ RawPointer = Sec->advanceFileOffset(MaxRawDataSize, RawPointer);
}
if (!DwarfSections.empty()) {
RawPointer += PaddingsBeforeDwarf;
for (auto &DwarfSection : DwarfSections) {
- DwarfSection.FileOffsetToData = RawPointer;
- RawPointer += DwarfSection.MemorySize;
- if (RawPointer > MaxRawDataSize)
- report_fatal_error("Section raw data overflowed this object file.");
+ RawPointer = DwarfSection.advanceFileOffset(MaxRawDataSize, RawPointer);
}
}
- if (hasExceptionSection()) {
- ExceptionSection.FileOffsetToData = RawPointer;
- RawPointer += ExceptionSection.Size;
+ if (hasExceptionSection())
+ RawPointer = ExceptionSection.advanceFileOffset(MaxRawDataSize, RawPointer);
- assert(RawPointer <= MaxRawDataSize &&
- "Section raw data overflowed this object file.");
- }
+ if (CInfoSymSection.Entry)
+ RawPointer = CInfoSymSection.advanceFileOffset(MaxRawDataSize, RawPointer);
for (auto *Sec : Sections) {
if (Sec->Index != SectionEntry::UninitializedIndex)
@@ -1243,10 +1344,19 @@ unsigned XCOFFObjectWriter::getExceptionOffset(const MCSymbol *Symbol) {
: XCOFF::ExceptionSectionEntrySize32);
}
+void XCOFFObjectWriter::addCInfoSymEntry(StringRef Name, StringRef Metadata) {
+ assert(!CInfoSymSection.Entry && "Multiple entries are not supported");
+ CInfoSymSection.addEntry(
+ std::make_unique<CInfoSymInfo>(Name.str(), Metadata.str()));
+}
+
void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
// The symbol table starts with all the C_FILE symbols.
uint32_t SymbolTableIndex = FileNames.size();
+ if (CInfoSymSection.Entry)
+ SymbolTableIndex++;
+
// Calculate indices for undefined symbols.
for (auto &Csect : UndefinedCsects) {
Csect.Size = 0;
@@ -1403,6 +1513,14 @@ void XCOFFObjectWriter::assignAddressesAndIndices(const MCAsmLayout &Layout) {
Address = alignTo(Address, DefaultSectionAlign);
}
+ if (CInfoSymSection.Entry) {
+ CInfoSymSection.Index = SectionIndex++;
+ SectionCount++;
+ CInfoSymSection.Address = 0;
+ Address += CInfoSymSection.Size;
+ Address = alignTo(Address, DefaultSectionAlign);
+ }
+
SymbolTableEntryCount = SymbolTableIndex;
}
@@ -1504,6 +1622,41 @@ void XCOFFObjectWriter::writeSectionForExceptionSectionEntry(
CurrentAddressLocation += getExceptionSectionSize();
}
+void XCOFFObjectWriter::writeSectionForCInfoSymSectionEntry(
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ CInfoSymSectionEntry &CInfoSymEntry, uint64_t &CurrentAddressLocation) {
+ if (!CInfoSymSection.Entry)
+ return;
+
+ constexpr int WordSize = sizeof(uint32_t);
+ std::unique_ptr<CInfoSymInfo> &CISI = CInfoSymEntry.Entry;
+ const std::string &Metadata = CISI->Metadata;
+
+ // Emit the 4-byte length of the metadata.
+ W.write<uint32_t>(Metadata.size());
+
+ if (Metadata.size() == 0)
+ return;
+
+ // Write out the payload one word at a time.
+ size_t Index = 0;
+ while (Index + WordSize <= Metadata.size()) {
+ uint32_t NextWord =
+ llvm::support::endian::read32be(Metadata.data() + Index);
+ W.write<uint32_t>(NextWord);
+ Index += WordSize;
+ }
+
+ // If there is padding, we have at least one byte of payload left to emit.
+ if (CISI->paddingSize()) {
+ std::array<uint8_t, WordSize> LastWord = {0};
+ ::memcpy(LastWord.data(), Metadata.data() + Index, Metadata.size() - Index);
+ W.write<uint32_t>(llvm::support::endian::read32be(LastWord.data()));
+ }
+
+ CurrentAddressLocation += CISI->size();
+}
+
// Takes the log base 2 of the alignment and shifts the result into the 5 most
// significant bits of a byte, then or's in the csect type into the least
// significant 3 bits.
diff --git a/llvm/lib/MCA/CodeEmitter.cpp b/llvm/lib/MCA/CodeEmitter.cpp
index 0ce17bd84cf3..b3936a95c06a 100644
--- a/llvm/lib/MCA/CodeEmitter.cpp
+++ b/llvm/lib/MCA/CodeEmitter.cpp
@@ -27,7 +27,7 @@ CodeEmitter::EncodingInfo CodeEmitter::getOrCreateEncodingInfo(unsigned MCID) {
MAB.relaxInstruction(Relaxed, STI);
EI.first = Code.size();
- MCE.encodeInstruction(Relaxed, VecOS, Fixups, STI);
+ MCE.encodeInstruction(Relaxed, Code, Fixups, STI);
EI.second = Code.size() - EI.first;
return EI;
}
diff --git a/llvm/lib/MCA/CustomBehaviour.cpp b/llvm/lib/MCA/CustomBehaviour.cpp
index b593e96d1512..1aa266e0a1e4 100644
--- a/llvm/lib/MCA/CustomBehaviour.cpp
+++ b/llvm/lib/MCA/CustomBehaviour.cpp
@@ -42,14 +42,19 @@ CustomBehaviour::getEndViews(llvm::MCInstPrinter &IP,
return std::vector<std::unique_ptr<View>>();
}
-SharedInstrument InstrumentManager::createInstrument(llvm::StringRef Desc,
+UniqueInstrument InstrumentManager::createInstrument(llvm::StringRef Desc,
llvm::StringRef Data) {
- return std::make_shared<Instrument>(Desc, Data);
+ return std::make_unique<Instrument>(Desc, Data);
+}
+
+SmallVector<UniqueInstrument>
+InstrumentManager::createInstruments(const MCInst &Inst) {
+ return SmallVector<UniqueInstrument>();
}
unsigned InstrumentManager::getSchedClassID(
const MCInstrInfo &MCII, const MCInst &MCI,
- const llvm::SmallVector<SharedInstrument> &IVec) const {
+ const llvm::SmallVector<Instrument *> &IVec) const {
return MCII.get(MCI.getOpcode()).getSchedClass();
}
diff --git a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
index 474bf84cf891..53663a10e8a7 100644
--- a/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
+++ b/llvm/lib/MCA/HardwareUnits/RegisterFile.cpp
@@ -127,8 +127,8 @@ void RegisterFile::onInstructionExecuted(Instruction *IS) {
if (WR.getWriteState() == &WS)
WR.notifyExecuted(CurrentCycle);
- for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
- WriteRef &OtherWR = RegisterMappings[*I].first;
+ for (MCPhysReg I : MRI.subregs(RegID)) {
+ WriteRef &OtherWR = RegisterMappings[I].first;
if (OtherWR.getWriteState() == &WS)
OtherWR.notifyExecuted(CurrentCycle);
}
@@ -136,8 +136,8 @@ void RegisterFile::onInstructionExecuted(Instruction *IS) {
if (!WS.clearsSuperRegisters())
continue;
- for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
- WriteRef &OtherWR = RegisterMappings[*I].first;
+ for (MCPhysReg I : MRI.superregs(RegID)) {
+ WriteRef &OtherWR = RegisterMappings[I].first;
if (OtherWR.getWriteState() == &WS)
OtherWR.notifyExecuted(CurrentCycle);
}
@@ -182,11 +182,11 @@ void RegisterFile::addRegisterFile(const MCRegisterFileDesc &RF,
Entry.AllowMoveElimination = RCE.AllowMoveElimination;
// Assume the same cost for each sub-register.
- for (MCSubRegIterator I(Reg, &MRI); I.isValid(); ++I) {
- RegisterRenamingInfo &OtherEntry = RegisterMappings[*I].second;
+ for (MCPhysReg I : MRI.subregs(Reg)) {
+ RegisterRenamingInfo &OtherEntry = RegisterMappings[I].second;
if (!OtherEntry.IndexPlusCost.first &&
(!OtherEntry.RenameAs ||
- MRI.isSuperRegister(*I, OtherEntry.RenameAs))) {
+ MRI.isSuperRegister(I, OtherEntry.RenameAs))) {
OtherEntry.IndexPlusCost = IPC;
OtherEntry.RenameAs = Reg;
}
@@ -282,8 +282,8 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
MCPhysReg ZeroRegisterID =
WS.clearsSuperRegisters() ? RegID : WS.getRegisterID();
ZeroRegisters.setBitVal(ZeroRegisterID, IsWriteZero);
- for (MCSubRegIterator I(ZeroRegisterID, &MRI); I.isValid(); ++I)
- ZeroRegisters.setBitVal(*I, IsWriteZero);
+ for (MCPhysReg I : MRI.subregs(ZeroRegisterID))
+ ZeroRegisters.setBitVal(I, IsWriteZero);
// If this move has been eliminated, then method tryEliminateMoveOrSwap should
// have already updated all the register mappings.
@@ -304,9 +304,9 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
// Update the mapping for register RegID including its sub-registers.
RegisterMappings[RegID].first = Write;
RegisterMappings[RegID].second.AliasRegID = 0U;
- for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
- RegisterMappings[*I].first = Write;
- RegisterMappings[*I].second.AliasRegID = 0U;
+ for (MCPhysReg I : MRI.subregs(RegID)) {
+ RegisterMappings[I].first = Write;
+ RegisterMappings[I].second.AliasRegID = 0U;
}
// No physical registers are allocated for instructions that are optimized
@@ -319,13 +319,13 @@ void RegisterFile::addRegisterWrite(WriteRef Write,
if (!WS.clearsSuperRegisters())
return;
- for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
+ for (MCPhysReg I : MRI.superregs(RegID)) {
if (!IsEliminated) {
- RegisterMappings[*I].first = Write;
- RegisterMappings[*I].second.AliasRegID = 0U;
+ RegisterMappings[I].first = Write;
+ RegisterMappings[I].second.AliasRegID = 0U;
}
- ZeroRegisters.setBitVal(*I, IsWriteZero);
+ ZeroRegisters.setBitVal(I, IsWriteZero);
}
}
@@ -365,8 +365,8 @@ void RegisterFile::removeRegisterWrite(
if (WR.getWriteState() == &WS)
WR.commit();
- for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
- WriteRef &OtherWR = RegisterMappings[*I].first;
+ for (MCPhysReg I : MRI.subregs(RegID)) {
+ WriteRef &OtherWR = RegisterMappings[I].first;
if (OtherWR.getWriteState() == &WS)
OtherWR.commit();
}
@@ -374,8 +374,8 @@ void RegisterFile::removeRegisterWrite(
if (!WS.clearsSuperRegisters())
return;
- for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) {
- WriteRef &OtherWR = RegisterMappings[*I].first;
+ for (MCPhysReg I : MRI.superregs(RegID)) {
+ WriteRef &OtherWR = RegisterMappings[I].first;
if (OtherWR.getWriteState() == &WS)
OtherWR.commit();
}
@@ -472,8 +472,8 @@ bool RegisterFile::tryEliminateMoveOrSwap(MutableArrayRef<WriteState> Writes,
AliasedReg = RMAlias.AliasRegID;
RegisterMappings[AliasReg].second.AliasRegID = AliasedReg;
- for (MCSubRegIterator I(AliasReg, &MRI); I.isValid(); ++I)
- RegisterMappings[*I].second.AliasRegID = AliasedReg;
+ for (MCPhysReg I : MRI.subregs(AliasReg))
+ RegisterMappings[I].second.AliasRegID = AliasedReg;
if (ZeroRegisters[RS.getRegisterID()]) {
WS.setWriteZero();
@@ -530,8 +530,8 @@ void RegisterFile::collectWrites(
}
// Handle potential partial register updates.
- for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
- const WriteRef &WR = RegisterMappings[*I].first;
+ for (MCPhysReg I : MRI.subregs(RegID)) {
+ const WriteRef &WR = RegisterMappings[I].first;
if (WR.getWriteState()) {
Writes.push_back(WR);
} else if (WR.hasKnownWriteBackCycle()) {
diff --git a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
index a2e6a9e0e0f1..393548dd5bd3 100644
--- a/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
+++ b/llvm/lib/MCA/HardwareUnits/ResourceManager.cpp
@@ -320,7 +320,7 @@ uint64_t ResourceManager::checkAvailability(const InstrDesc &Desc) const {
continue;
}
- uint64_t ResourceMask = PowerOf2Floor(ReadyMask);
+ uint64_t ResourceMask = llvm::bit_floor(ReadyMask);
auto it = AvailableUnits.find(ResourceMask);
if (it == AvailableUnits.end()) {
diff --git a/llvm/lib/MCA/InstrBuilder.cpp b/llvm/lib/MCA/InstrBuilder.cpp
index 24729bd4f034..bddd370ea448 100644
--- a/llvm/lib/MCA/InstrBuilder.cpp
+++ b/llvm/lib/MCA/InstrBuilder.cpp
@@ -123,7 +123,7 @@ static void initializeUsedResources(InstrDesc &ID,
ResourcePlusCycles &A = Worklist[I];
if (!A.second.size()) {
assert(llvm::popcount(A.first) > 1 && "Expected a group!");
- UsedResourceGroups |= PowerOf2Floor(A.first);
+ UsedResourceGroups |= llvm::bit_floor(A.first);
continue;
}
@@ -134,7 +134,7 @@ static void initializeUsedResources(InstrDesc &ID,
UsedResourceUnits |= A.first;
} else {
// Remove the leading 1 from the resource group mask.
- NormalizedMask ^= PowerOf2Floor(NormalizedMask);
+ NormalizedMask ^= llvm::bit_floor(NormalizedMask);
if (UnitsFromResourceGroups & NormalizedMask)
ID.HasPartiallyOverlappingGroups = true;
@@ -172,7 +172,7 @@ static void initializeUsedResources(InstrDesc &ID,
for (ResourcePlusCycles &RPC : ID.Resources) {
if (llvm::popcount(RPC.first) > 1 && !RPC.second.isReserved()) {
// Remove the leading 1 from the resource group mask.
- uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
+ uint64_t Mask = RPC.first ^ llvm::bit_floor(RPC.first);
uint64_t MaxResourceUnits = llvm::popcount(Mask);
if (RPC.second.NumUnits > (unsigned)llvm::popcount(Mask)) {
RPC.second.setReserved();
@@ -511,7 +511,7 @@ Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
Expected<const InstrDesc &>
InstrBuilder::createInstrDescImpl(const MCInst &MCI,
- const SmallVector<SharedInstrument> &IVec) {
+ const SmallVector<Instrument *> &IVec) {
assert(STI.getSchedModel().hasInstrSchedModel() &&
"Itineraries are not yet supported!");
@@ -601,7 +601,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI,
Expected<const InstrDesc &>
InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI,
- const SmallVector<SharedInstrument> &IVec) {
+ const SmallVector<Instrument *> &IVec) {
// Cache lookup using SchedClassID from Instrumentation
unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
@@ -612,7 +612,7 @@ InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI,
unsigned CPUID = STI.getSchedModel().getProcessorID();
SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
auto VDKey = std::make_pair(&MCI, SchedClassID);
- if (VariantDescriptors.find(VDKey) != VariantDescriptors.end())
+ if (VariantDescriptors.contains(VDKey))
return *VariantDescriptors[VDKey];
return createInstrDescImpl(MCI, IVec);
@@ -622,7 +622,7 @@ STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc");
Expected<std::unique_ptr<Instruction>>
InstrBuilder::createInstruction(const MCInst &MCI,
- const SmallVector<SharedInstrument> &IVec) {
+ const SmallVector<Instrument *> &IVec) {
Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec);
if (!DescOrErr)
return DescOrErr.takeError();
diff --git a/llvm/lib/MCA/Stages/EntryStage.cpp b/llvm/lib/MCA/Stages/EntryStage.cpp
index 6b3fbb8c6236..5c82ce780478 100644
--- a/llvm/lib/MCA/Stages/EntryStage.cpp
+++ b/llvm/lib/MCA/Stages/EntryStage.cpp
@@ -67,7 +67,8 @@ llvm::Error EntryStage::cycleResume() {
llvm::Error EntryStage::cycleEnd() {
// Find the first instruction which hasn't been retired.
- auto Range = make_range(&Instructions[NumRetired], Instructions.end());
+ auto Range =
+ make_range(Instructions.begin() + NumRetired, Instructions.end());
auto It = find_if(Range, [](const std::unique_ptr<Instruction> &I) {
return !I->isRetired();
});
diff --git a/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp b/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp
index 37fb22740dca..622726be8ce5 100644
--- a/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp
@@ -13,6 +13,7 @@
#include "llvm/ObjCopy/COFF/COFFConfig.h"
#include "llvm/ObjCopy/CommonConfig.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
#include "llvm/Support/CRC.h"
@@ -130,8 +131,37 @@ static uint32_t flagsToCharacteristics(SectionFlag AllFlags, uint32_t OldChar) {
return NewCharacteristics;
}
+static Error dumpSection(Object &O, StringRef SectionName, StringRef FileName) {
+ for (const coff::Section &Section : O.getSections()) {
+ if (Section.Name != SectionName)
+ continue;
+
+ ArrayRef<uint8_t> Contents = Section.getContents();
+
+ std::unique_ptr<FileOutputBuffer> Buffer;
+ if (auto B = FileOutputBuffer::create(FileName, Contents.size()))
+ Buffer = std::move(*B);
+ else
+ return B.takeError();
+
+ llvm::copy(Contents, Buffer->getBufferStart());
+ if (Error E = Buffer->commit())
+ return E;
+
+ return Error::success();
+ }
+ return createStringError(object_error::parse_failed, "section '%s' not found",
+ SectionName.str().c_str());
+}
+
static Error handleArgs(const CommonConfig &Config,
const COFFConfig &COFFConfig, Object &Obj) {
+ for (StringRef Op : Config.DumpSection) {
+ auto [Section, File] = Op.split('=');
+ if (Error E = dumpSection(Obj, Section, File))
+ return E;
+ }
+
// Perform the actual section removals.
Obj.removeSections([&Config](const Section &Sec) {
// Contrary to --only-keep-debug, --only-section fully removes sections that
diff --git a/llvm/lib/ObjCopy/ConfigManager.cpp b/llvm/lib/ObjCopy/ConfigManager.cpp
index 77321829e614..5b8e2f5dc200 100644
--- a/llvm/lib/ObjCopy/ConfigManager.cpp
+++ b/llvm/lib/ObjCopy/ConfigManager.cpp
@@ -15,14 +15,14 @@ namespace objcopy {
Expected<const COFFConfig &> ConfigManager::getCOFFConfig() const {
if (!Common.SplitDWO.empty() || !Common.SymbolsPrefix.empty() ||
- !Common.AllocSectionsPrefix.empty() || !Common.DumpSection.empty() ||
- !Common.KeepSection.empty() || !Common.SymbolsToGlobalize.empty() ||
- !Common.SymbolsToKeep.empty() || !Common.SymbolsToLocalize.empty() ||
- !Common.SymbolsToWeaken.empty() || !Common.SymbolsToKeepGlobal.empty() ||
- !Common.SectionsToRename.empty() || !Common.SetSectionAlignment.empty() ||
- !Common.SetSectionType.empty() || Common.ExtractDWO ||
- Common.PreserveDates || Common.StripDWO || Common.StripNonAlloc ||
- Common.StripSections || Common.Weaken || Common.DecompressDebugSections ||
+ !Common.AllocSectionsPrefix.empty() || !Common.KeepSection.empty() ||
+ !Common.SymbolsToGlobalize.empty() || !Common.SymbolsToKeep.empty() ||
+ !Common.SymbolsToLocalize.empty() || !Common.SymbolsToWeaken.empty() ||
+ !Common.SymbolsToKeepGlobal.empty() || !Common.SectionsToRename.empty() ||
+ !Common.SetSectionAlignment.empty() || !Common.SetSectionType.empty() ||
+ Common.ExtractDWO || Common.PreserveDates || Common.StripDWO ||
+ Common.StripNonAlloc || Common.StripSections || Common.Weaken ||
+ Common.DecompressDebugSections ||
Common.DiscardMode == DiscardType::Locals || !Common.SymbolsToAdd.empty())
return createStringError(llvm::errc::invalid_argument,
"option is not supported for COFF");
diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
index 689c9152c7dd..dfe843e1d4b7 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp
@@ -97,6 +97,14 @@ static uint64_t getSectionFlagsPreserveMask(uint64_t OldFlags,
return (OldFlags & PreserveMask) | (NewFlags & ~PreserveMask);
}
+static void setSectionType(SectionBase &Sec, uint64_t Type) {
+ // If Sec's type is changed from SHT_NOBITS due to --set-section-flags,
+ // Offset may not be aligned. Align it to max(Align, 1).
+ if (Sec.Type == ELF::SHT_NOBITS && Type != ELF::SHT_NOBITS)
+ Sec.Offset = alignTo(Sec.Offset, std::max(Sec.Align, uint64_t(1)));
+ Sec.Type = Type;
+}
+
static void setSectionFlagsAndType(SectionBase &Sec, SectionFlag Flags) {
Sec.Flags = getSectionFlagsPreserveMask(Sec.Flags, getNewShfFlags(Flags));
@@ -106,7 +114,7 @@ static void setSectionFlagsAndType(SectionBase &Sec, SectionFlag Flags) {
if (Sec.Type == SHT_NOBITS &&
(!(Sec.Flags & ELF::SHF_ALLOC) ||
Flags & (SectionFlag::SecContents | SectionFlag::SecLoad)))
- Sec.Type = SHT_PROGBITS;
+ setSectionType(Sec, ELF::SHT_PROGBITS);
}
static ElfType getOutputElfType(const Binary &Bin) {
@@ -164,13 +172,6 @@ static std::unique_ptr<Writer> createWriter(const CommonConfig &Config,
}
}
-template <class... Ts>
-static Error makeStringError(std::error_code EC, const Twine &Msg,
- Ts &&...Args) {
- std::string FullMsg = (EC.message() + ": " + Msg).str();
- return createStringError(EC, FullMsg.c_str(), std::forward<Ts>(Args)...);
-}
-
static Error dumpSectionToFile(StringRef SecName, StringRef Filename,
Object &Obj) {
for (auto &Sec : Obj.sections()) {
@@ -684,7 +685,7 @@ static Error handleArgs(const CommonConfig &Config, const ELFConfig &ELFConfig,
}
auto It2 = Config.SetSectionType.find(Sec.Name);
if (It2 != Config.SetSectionType.end())
- Sec.Type = It2->second;
+ setSectionType(Sec, It2->second);
}
}
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.cpp b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
index ea6dadabace6..697afab2a617 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.cpp
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.cpp
@@ -429,6 +429,13 @@ Error Section::accept(MutableSectionVisitor &Visitor) {
return Visitor.visit(*this);
}
+void Section::restoreSymTabLink(SymbolTableSection &SymTab) {
+ if (HasSymTabLink) {
+ assert(LinkSection == nullptr);
+ LinkSection = &SymTab;
+ }
+}
+
Error SectionWriter::visit(const OwnedDataSection &Sec) {
llvm::copy(Sec.Data, Out.getBufferStart() + Sec.Offset);
return Error::success();
@@ -680,8 +687,11 @@ bool Symbol::isCommon() const { return getShndx() == SHN_COMMON; }
void SymbolTableSection::assignIndices() {
uint32_t Index = 0;
- for (auto &Sym : Symbols)
+ for (auto &Sym : Symbols) {
+ if (Sym->Index != Index)
+ IndicesChanged = true;
Sym->Index = Index++;
+ }
}
void SymbolTableSection::addSymbol(Twine Name, uint8_t Bind, uint8_t Type,
@@ -741,7 +751,10 @@ Error SymbolTableSection::removeSymbols(
std::remove_if(std::begin(Symbols) + 1, std::end(Symbols),
[ToRemove](const SymPtr &Sym) { return ToRemove(*Sym); }),
std::end(Symbols));
+ auto PrevSize = Size;
Size = Symbols.size() * EntrySize;
+ if (Size < PrevSize)
+ IndicesChanged = true;
assignIndices();
return Error::success();
}
@@ -1106,8 +1119,10 @@ Error Section::initialize(SectionTableRef SecTable) {
LinkSection = *Sec;
- if (LinkSection->Type == ELF::SHT_SYMTAB)
+ if (LinkSection->Type == ELF::SHT_SYMTAB) {
+ HasSymTabLink = true;
LinkSection = nullptr;
+ }
return Error::success();
}
@@ -1704,6 +1719,10 @@ Expected<SectionBase &> ELFBuilder<ELFT>::makeSection(const Elf_Shdr &Shdr) {
else
return Data.takeError();
case SHT_SYMTAB: {
+ // Multiple SHT_SYMTAB sections are forbidden by the ELF gABI.
+ if (Obj.SymbolTable != nullptr)
+ return createStringError(llvm::errc::invalid_argument,
+ "found multiple SHT_SYMTAB sections");
auto &SymTab = Obj.addSection<SymbolTableSection>();
Obj.SymbolTable = &SymTab;
return SymTab;
@@ -2298,7 +2317,7 @@ static uint64_t layoutSections(Range Sections, uint64_t Offset) {
for (auto &Sec : Sections) {
Sec.Index = Index++;
if (Sec.ParentSegment != nullptr) {
- auto Segment = *Sec.ParentSegment;
+ const Segment &Segment = *Sec.ParentSegment;
Sec.Offset =
Segment.Offset + (Sec.OriginalOffset - Segment.OriginalOffset);
} else
@@ -2511,6 +2530,12 @@ template <class ELFT> Error ELFWriter<ELFT>::finalize() {
if (Error E = removeUnneededSections(Obj))
return E;
+ // If the .symtab indices have not been changed, restore the sh_link to
+ // .symtab for sections that were linked to .symtab.
+ if (Obj.SymbolTable && !Obj.SymbolTable->indicesChanged())
+ for (SectionBase &Sec : Obj.sections())
+ Sec.restoreSymTabLink(*Obj.SymbolTable);
+
// We need to assign indexes before we perform layout because we need to know
// if we need large indexes or not. We can assign indexes first and check as
// we go to see if we will actully need large indexes.
@@ -2627,12 +2652,9 @@ Error BinaryWriter::finalize() {
// MinAddr will be skipped.
uint64_t MinAddr = UINT64_MAX;
for (SectionBase &Sec : Obj.allocSections()) {
- // If Sec's type is changed from SHT_NOBITS due to --set-section-flags,
- // Offset may not be aligned. Align it to max(Align, 1).
if (Sec.ParentSegment != nullptr)
- Sec.Addr = alignTo(Sec.Offset - Sec.ParentSegment->Offset +
- Sec.ParentSegment->PAddr,
- std::max(Sec.Align, uint64_t(1)));
+ Sec.Addr =
+ Sec.Offset - Sec.ParentSegment->Offset + Sec.ParentSegment->PAddr;
if (Sec.Type != SHT_NOBITS && Sec.Size > 0)
MinAddr = std::min(MinAddr, Sec.Addr);
}
diff --git a/llvm/lib/ObjCopy/ELF/ELFObject.h b/llvm/lib/ObjCopy/ELF/ELFObject.h
index 94b5afe7df89..89a03b3fe0ee 100644
--- a/llvm/lib/ObjCopy/ELF/ELFObject.h
+++ b/llvm/lib/ObjCopy/ELF/ELFObject.h
@@ -432,6 +432,8 @@ public:
virtual bool hasContents() const { return false; }
// Notify the section that it is subject to removal.
virtual void onRemove();
+
+ virtual void restoreSymTabLink(SymbolTableSection &) {}
};
class Segment {
@@ -483,6 +485,7 @@ class Section : public SectionBase {
ArrayRef<uint8_t> Contents;
SectionBase *LinkSection = nullptr;
+ bool HasSymTabLink = false;
public:
explicit Section(ArrayRef<uint8_t> Data) : Contents(Data) {}
@@ -497,6 +500,7 @@ public:
bool hasContents() const override {
return Type != ELF::SHT_NOBITS && Type != ELF::SHT_NULL;
}
+ void restoreSymTabLink(SymbolTableSection &SymTab) override;
};
class OwnedDataSection : public SectionBase {
@@ -691,6 +695,7 @@ protected:
std::vector<std::unique_ptr<Symbol>> Symbols;
StringTableSection *SymbolNames = nullptr;
SectionIndexSection *SectionIndexTable = nullptr;
+ bool IndicesChanged = false;
using SymPtr = std::unique_ptr<Symbol>;
@@ -703,6 +708,7 @@ public:
void prepareForLayout();
// An 'empty' symbol table still contains a null symbol.
bool empty() const { return Symbols.size() == 1; }
+ bool indicesChanged() const { return IndicesChanged; }
void setShndxTable(SectionIndexSection *ShndxTable) {
SectionIndexTable = ShndxTable;
}
diff --git a/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp b/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp
index d37241682efe..e26b363df21c 100644
--- a/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp
+++ b/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp
@@ -112,6 +112,9 @@ static void updateAndRemoveSymbols(const CommonConfig &Config,
if (Config.DiscardMode == DiscardType::All && !(N->n_type & MachO::N_EXT))
return true;
// This behavior is consistent with cctools' strip.
+ if (Config.StripDebug && (N->n_type & MachO::N_STAB))
+ return true;
+ // This behavior is consistent with cctools' strip.
if (MachOConfig.StripSwiftSymbols &&
(Obj.Header.Flags & MachO::MH_DYLDLINK) && Obj.SwiftVersion &&
*Obj.SwiftVersion && N->isSwiftSymbol())
diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 2cf924123888..9920145a2f3c 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -18,14 +18,15 @@
#include "llvm/Object/Error.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -135,6 +136,13 @@ BigArchiveMemberHeader::BigArchiveMemberHeader(const Archive *Parent,
return;
ErrorAsOutParameter ErrAsOutParam(Err);
+ if (RawHeaderPtr + getSizeOf() >= Parent->getData().end()) {
+ if (Err)
+ *Err = malformedError("malformed AIX big archive: remaining buffer is "
+ "unable to contain next archive member");
+ return;
+ }
+
if (Size < getSizeOf()) {
Error SubErr = createMemberHeaderParseError(this, RawHeaderPtr, Size);
if (Err)
@@ -461,6 +469,7 @@ Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
: Parent(Parent) {
if (!Start) {
Header = nullptr;
+ StartOfFile = -1;
return;
}
@@ -926,6 +935,34 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
StringTable = BufOrErr.get();
if (Increment())
return;
+
+ if (I == E) {
+ setFirstRegular(*C);
+ Err = Error::success();
+ return;
+ }
+
+ NameOrErr = C->getRawName();
+ if (!NameOrErr) {
+ Err = NameOrErr.takeError();
+ return;
+ }
+ Name = NameOrErr.get();
+ }
+
+ if (Name == "/<ECSYMBOLS>/") {
+ // ARM64EC-aware libraries contain an additional special member with
+ // an EC symbol map after the string table. Its format is similar to a
+ // regular symbol map, except it doesn't contain member offsets. Its indexes
+ // refer to member offsets from the regular symbol table instead.
+ Expected<StringRef> BufOrErr = C->getBuffer();
+ if (!BufOrErr) {
+ Err = BufOrErr.takeError();
+ return;
+ }
+ ECSymbolTable = BufOrErr.get();
+ if (Increment())
+ return;
}
setFirstRegular(*C);
@@ -960,7 +997,17 @@ Archive::child_iterator Archive::child_end() const {
return child_iterator::end(Child(nullptr, nullptr, nullptr));
}
+bool Archive::Symbol::isECSymbol() const {
+ // Symbols use SymbolCount..SymbolCount+getNumberOfECSymbols() for EC symbol
+ // indexes.
+ uint32_t SymbolCount = Parent->getNumberOfSymbols();
+ return SymbolCount <= SymbolIndex &&
+ SymbolIndex < SymbolCount + Parent->getNumberOfECSymbols();
+}
+
StringRef Archive::Symbol::getName() const {
+ if (isECSymbol())
+ return Parent->ECSymbolTable.begin() + StringIndex;
return Parent->getSymbolTable().begin() + StringIndex;
}
@@ -999,15 +1046,24 @@ Expected<Archive::Child> Archive::Symbol::getMember() const {
Buf += MemberCount * 4 + 4;
uint32_t SymbolCount = read32le(Buf);
- if (SymbolIndex >= SymbolCount)
+ uint16_t OffsetIndex;
+ if (SymbolIndex < SymbolCount) {
+ // Skip SymbolCount to get to the indices table.
+ const char *Indices = Buf + 4;
+
+ // Get the index of the offset in the file member offset table for this
+ // symbol.
+ OffsetIndex = read16le(Indices + SymbolIndex * 2);
+ } else if (isECSymbol()) {
+ // Skip SymbolCount to get to the indices table.
+ const char *Indices = Parent->ECSymbolTable.begin() + 4;
+
+ // Get the index of the offset in the file member offset table for this
+ // symbol.
+ OffsetIndex = read16le(Indices + (SymbolIndex - SymbolCount) * 2);
+ } else {
return errorCodeToError(object_error::parse_failed);
-
- // Skip SymbolCount to get to the indices table.
- const char *Indices = Buf + 4;
-
- // Get the index of the offset in the file member offset table for this
- // symbol.
- uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
+ }
// Subtract 1 since OffsetIndex is 1 based.
--OffsetIndex;
@@ -1056,6 +1112,9 @@ Archive::Symbol Archive::Symbol::getNext() const {
t.StringIndex -= CurRanStrx;
t.StringIndex += NextRanStrx;
}
+ } else if (t.isECSymbol()) {
+ // Go to one past next null.
+ t.StringIndex = Parent->ECSymbolTable.find('\0', t.StringIndex) + 1;
} else {
// Go to one past next null.
t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
@@ -1126,6 +1185,51 @@ Archive::symbol_iterator Archive::symbol_end() const {
return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
}
+Expected<iterator_range<Archive::symbol_iterator>> Archive::ec_symbols() const {
+ uint32_t Count = 0;
+
+ // Validate EC symbol table.
+ if (!ECSymbolTable.empty()) {
+ if (ECSymbolTable.size() < sizeof(uint32_t))
+ return malformedError("invalid EC symbols size (" +
+ Twine(ECSymbolTable.size()) + ")");
+ if (SymbolTable.size() < sizeof(uint32_t))
+ return malformedError("invalid symbols size (" +
+ Twine(ECSymbolTable.size()) + ")");
+
+ Count = read32le(ECSymbolTable.begin());
+ size_t StringIndex = sizeof(uint32_t) + Count * sizeof(uint16_t);
+ if (ECSymbolTable.size() < StringIndex)
+ return malformedError("invalid EC symbols size. Size was " +
+ Twine(ECSymbolTable.size()) + ", but expected " +
+ Twine(StringIndex));
+
+ uint32_t MemberCount = read32le(SymbolTable.begin());
+ const char *Indexes = ECSymbolTable.begin() + sizeof(uint32_t);
+
+ for (uint32_t i = 0; i < Count; ++i) {
+ uint16_t Index = read16le(Indexes + i * sizeof(uint16_t));
+ if (!Index)
+ return malformedError("invalid EC symbol index 0");
+ if (Index > MemberCount)
+ return malformedError("invalid EC symbol index " + Twine(Index) +
+ " is larger than member count " +
+ Twine(MemberCount));
+
+ StringIndex = ECSymbolTable.find('\0', StringIndex);
+ if (StringIndex == StringRef::npos)
+ return malformedError("malformed EC symbol names: not null-terminated");
+ ++StringIndex;
+ }
+ }
+
+ uint32_t SymbolCount = getNumberOfSymbols();
+ return make_range(
+ symbol_iterator(Symbol(this, SymbolCount,
+ sizeof(uint32_t) + Count * sizeof(uint16_t))),
+ symbol_iterator(Symbol(this, SymbolCount + Count, 0)));
+}
+
uint32_t Archive::getNumberOfSymbols() const {
if (!hasSymbolTable())
return 0;
@@ -1144,6 +1248,12 @@ uint32_t Archive::getNumberOfSymbols() const {
return read32le(buf);
}
+uint32_t Archive::getNumberOfECSymbols() const {
+ if (ECSymbolTable.size() < sizeof(uint32_t))
+ return 0;
+ return read32le(ECSymbolTable.begin());
+}
+
Expected<std::optional<Archive::Child>> Archive::findSym(StringRef name) const {
Archive::symbol_iterator bs = symbol_begin();
Archive::symbol_iterator es = symbol_end();
@@ -1167,11 +1277,78 @@ bool Archive::isEmpty() const {
bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
+static Error getGlobalSymtabLocAndSize(const MemoryBufferRef &Data,
+ uint64_t GlobalSymtabOffset,
+ const char *&GlobalSymtabLoc,
+ uint64_t &Size, const char *BitMessage) {
+ uint64_t BufferSize = Data.getBufferSize();
+ uint64_t GlobalSymtabContentOffset =
+ GlobalSymtabOffset + sizeof(BigArMemHdrType);
+ if (GlobalSymtabContentOffset > BufferSize)
+ return malformedError(
+ Twine(BitMessage) + " global symbol table header at offset 0x" +
+ Twine::utohexstr(GlobalSymtabOffset) + " and size 0x" +
+ Twine::utohexstr(sizeof(BigArMemHdrType)) +
+ " goes past the end of file");
+
+ GlobalSymtabLoc = Data.getBufferStart() + GlobalSymtabOffset;
+ const BigArMemHdrType *GlobalSymHdr =
+ reinterpret_cast<const BigArMemHdrType *>(GlobalSymtabLoc);
+ StringRef RawOffset = getFieldRawString(GlobalSymHdr->Size);
+ if (RawOffset.getAsInteger(10, Size))
+ return malformedError(Twine(BitMessage) + " global symbol table size \"" +
+ RawOffset + "\" is not a number");
+
+ if (GlobalSymtabContentOffset + Size > BufferSize)
+ return malformedError(
+ Twine(BitMessage) + " global symbol table content at offset 0x" +
+ Twine::utohexstr(GlobalSymtabContentOffset) + " and size 0x" +
+ Twine::utohexstr(Size) + " goes past the end of file");
+
+ return Error::success();
+}
+
+struct GlobalSymtabInfo {
+ uint64_t SymNum;
+ StringRef SymbolTable;
+ StringRef SymbolOffsetTable;
+ StringRef StringTable;
+};
+
+static void
+appendGlobalSymbolTableInfo(SmallVector<GlobalSymtabInfo> &SymtabInfos,
+ const char *GlobalSymtabLoc, uint64_t Size) {
+ // In a big archive, a global symbol table contains the following information:
+ // - The number of symbols.
+ // - The array of offsets into the archive file. The length is eight
+ // times the number of symbols.
+ // - The name-string table. The size is:
+ // Size-(8*(the number of symbols + 1)).
+
+ StringRef SymbolTable =
+ StringRef(GlobalSymtabLoc + sizeof(BigArMemHdrType), Size);
+ uint64_t SymNum = read64be(GlobalSymtabLoc + sizeof(BigArMemHdrType));
+ StringRef SymbolOffsetTable = StringRef(SymbolTable.data() + 8, 8 * SymNum);
+ unsigned SymOffsetsSize = 8 * (SymNum + 1);
+ uint64_t SymbolTableStringSize = Size - SymOffsetsSize;
+ StringRef StringTable =
+ StringRef(SymbolTable.data() + SymOffsetsSize, SymbolTableStringSize);
+ SymtabInfos.push_back({SymNum, SymbolTable, SymbolOffsetTable, StringTable});
+}
+
BigArchive::BigArchive(MemoryBufferRef Source, Error &Err)
: Archive(Source, Err) {
ErrorAsOutParameter ErrAsOutParam(&Err);
StringRef Buffer = Data.getBuffer();
ArFixLenHdr = reinterpret_cast<const FixLenHdr *>(Buffer.data());
+ uint64_t BufferSize = Data.getBufferSize();
+
+ if (BufferSize < sizeof(FixLenHdr)) {
+ Err = malformedError("malformed AIX big archive: incomplete fixed length "
+ "header, the archive is only" +
+ Twine(BufferSize) + " byte(s)");
+ return;
+ }
StringRef RawOffset = getFieldRawString(ArFixLenHdr->FirstChildOffset);
if (RawOffset.getAsInteger(10, FirstChildOffset))
@@ -1185,56 +1362,73 @@ BigArchive::BigArchive(MemoryBufferRef Source, Error &Err)
Err = malformedError("malformed AIX big archive: last member offset \"" +
RawOffset + "\" is not a number");
- // Calculate the global symbol table.
- uint64_t GlobSymOffset = 0;
+ uint64_t GlobSymtab32Offset = 0;
RawOffset = getFieldRawString(ArFixLenHdr->GlobSymOffset);
- if (RawOffset.getAsInteger(10, GlobSymOffset))
- // TODO: add test case.
- Err = malformedError(
- "malformed AIX big archive: global symbol table offset \"" + RawOffset +
- "\" is not a number");
+ if (RawOffset.getAsInteger(10, GlobSymtab32Offset)) {
+ Err = malformedError("global symbol table "
+ "offset of 32-bit members \"" +
+ RawOffset + "\" is not a number");
+ return;
+ }
- if (Err)
+ uint64_t GlobSymtab64Offset = 0;
+ RawOffset = getFieldRawString(ArFixLenHdr->GlobSym64Offset);
+ if (RawOffset.getAsInteger(10, GlobSymtab64Offset)) {
+ Err = malformedError("global symbol table "
+ "offset of 64-bit members\"" +
+ RawOffset + "\" is not a number");
return;
+ }
- if (GlobSymOffset > 0) {
- uint64_t BufferSize = Data.getBufferSize();
- uint64_t GlobalSymTblContentOffset =
- GlobSymOffset + sizeof(BigArMemHdrType);
- if (GlobalSymTblContentOffset > BufferSize) {
- Err = malformedError("global symbol table header at offset 0x" +
- Twine::utohexstr(GlobSymOffset) + " and size 0x" +
- Twine::utohexstr(sizeof(BigArMemHdrType)) +
- " goes past the end of file");
- return;
- }
+ const char *GlobSymtab32Loc = nullptr;
+ const char *GlobSymtab64Loc = nullptr;
+ uint64_t GlobSymtab32Size = 0;
+ uint64_t GlobSymtab64Size = 0;
+ const MemoryBufferRef &MemBuffRef = getMemoryBufferRef();
- const char *GlobSymTblLoc = Data.getBufferStart() + GlobSymOffset;
- const BigArMemHdrType *GlobalSymHdr =
- reinterpret_cast<const BigArMemHdrType *>(GlobSymTblLoc);
- RawOffset = getFieldRawString(GlobalSymHdr->Size);
- uint64_t Size;
- if (RawOffset.getAsInteger(10, Size)) {
- // TODO: add test case.
- Err = malformedError(
- "malformed AIX big archive: global symbol table size \"" + RawOffset +
- "\" is not a number");
+ if (GlobSymtab32Offset) {
+ Err =
+ getGlobalSymtabLocAndSize(MemBuffRef, GlobSymtab32Offset,
+ GlobSymtab32Loc, GlobSymtab32Size, "32-bit");
+ if (Err)
return;
- }
- if (GlobalSymTblContentOffset + Size > BufferSize) {
- Err = malformedError("global symbol table content at offset 0x" +
- Twine::utohexstr(GlobalSymTblContentOffset) +
- " and size 0x" + Twine::utohexstr(Size) +
- " goes past the end of file");
+ }
+
+ if (GlobSymtab64Offset) {
+ Err =
+ getGlobalSymtabLocAndSize(MemBuffRef, GlobSymtab64Offset,
+ GlobSymtab64Loc, GlobSymtab64Size, "64-bit");
+ if (Err)
return;
- }
- SymbolTable = StringRef(GlobSymTblLoc + sizeof(BigArMemHdrType), Size);
- unsigned SymNum = getNumberOfSymbols();
- unsigned SymOffsetsSize = 8 * (SymNum + 1);
- uint64_t SymbolTableStringSize = Size - SymOffsetsSize;
- StringTable =
- StringRef(GlobSymTblLoc + sizeof(BigArMemHdrType) + SymOffsetsSize,
- SymbolTableStringSize);
+ }
+
+ SmallVector<GlobalSymtabInfo> SymtabInfos;
+
+ if (GlobSymtab32Offset)
+ appendGlobalSymbolTableInfo(SymtabInfos, GlobSymtab32Loc, GlobSymtab32Size);
+ if (GlobSymtab64Offset)
+ appendGlobalSymbolTableInfo(SymtabInfos, GlobSymtab64Loc, GlobSymtab64Size);
+
+ if (SymtabInfos.size() == 1) {
+ SymbolTable = SymtabInfos[0].SymbolTable;
+ StringTable = SymtabInfos[0].StringTable;
+ } else if (SymtabInfos.size() == 2) {
+ // In order to let the Archive::Symbol::getNext() work for both 32-bit and
+ // 64-bit global symbol tables, we need to merge them into a single table.
+ raw_string_ostream Out(MergedGlobalSymtabBuf);
+ uint64_t SymNum = SymtabInfos[0].SymNum + SymtabInfos[1].SymNum;
+ write(Out, SymNum, support::big);
+ // Merge symbol offset.
+ Out << SymtabInfos[0].SymbolOffsetTable;
+ Out << SymtabInfos[1].SymbolOffsetTable;
+ // Merge string table.
+ Out << SymtabInfos[0].StringTable;
+ Out << SymtabInfos[1].StringTable;
+ SymbolTable = MergedGlobalSymtabBuf;
+ // The size of the symbol offset to the member file is 8 bytes.
+ StringTable = StringRef(SymbolTable.begin() + (SymNum + 1) * 8,
+ SymtabInfos[0].StringTable.size() +
+ SymtabInfos[1].StringTable.size());
}
child_iterator I = child_begin(Err, false);
diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp
index 0d3aad658fe4..d79a5c6bef30 100644
--- a/llvm/lib/Object/ArchiveWriter.cpp
+++ b/llvm/lib/Object/ArchiveWriter.cpp
@@ -17,6 +17,7 @@
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Object/Archive.h"
+#include "llvm/Object/COFF.h"
#include "llvm/Object/Error.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/MachO.h"
@@ -33,6 +34,7 @@
#include "llvm/Support/SmallVectorMemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include <cerrno>
#include <map>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
@@ -42,6 +44,13 @@
#endif
using namespace llvm;
+using namespace llvm::object;
+
+struct SymMap {
+ bool UseECMap;
+ std::map<std::string, uint16_t> Map;
+ std::map<std::string, uint16_t> ECMap;
+};
NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef)
: Buf(MemoryBuffer::getMemBuffer(BufRef, false)),
@@ -69,9 +78,11 @@ object::Archive::Kind NewArchiveMember::detectKindFromObject() const {
if (auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
MemBufferRef, file_magic::bitcode, &Context)) {
auto &IRObject = cast<object::IRObjectFile>(**ObjOrErr);
- return Triple(IRObject.getTargetTriple()).isOSDarwin()
+ auto TargetTriple = Triple(IRObject.getTargetTriple());
+ return TargetTriple.isOSDarwin()
? object::Archive::K_DARWIN
- : object::Archive::K_GNU;
+ : (TargetTriple.isOSAIX() ? object::Archive::K_AIXBIG
+ : object::Archive::K_GNU);
} else {
// Squelch the error in case this was not a SymbolicFile.
consumeError(ObjOrErr.takeError());
@@ -169,18 +180,21 @@ static bool isAIXBigArchive(object::Archive::Kind Kind) {
return Kind == object::Archive::K_AIXBIG;
}
+static bool isCOFFArchive(object::Archive::Kind Kind) {
+ return Kind == object::Archive::K_COFF;
+}
+
static bool isBSDLike(object::Archive::Kind Kind) {
switch (Kind) {
case object::Archive::K_GNU:
case object::Archive::K_GNU64:
case object::Archive::K_AIXBIG:
+ case object::Archive::K_COFF:
return false;
case object::Archive::K_BSD:
case object::Archive::K_DARWIN:
case object::Archive::K_DARWIN64:
return true;
- case object::Archive::K_COFF:
- break;
}
llvm_unreachable("not supported for writting");
}
@@ -191,6 +205,10 @@ static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) {
isBSDLike(Kind) ? support::little : support::big);
}
+template <class T> static void printLE(raw_ostream &Out, T Val) {
+ support::endian::write(Out, Val, support::little);
+}
+
static void printRestOfMemberHeader(
raw_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime,
unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) {
@@ -235,8 +253,8 @@ static void
printBigArchiveMemberHeader(raw_ostream &Out, StringRef Name,
const sys::TimePoint<std::chrono::seconds> &ModTime,
unsigned UID, unsigned GID, unsigned Perms,
- uint64_t Size, unsigned PrevOffset,
- unsigned NextOffset) {
+ uint64_t Size, uint64_t PrevOffset,
+ uint64_t NextOffset) {
unsigned NameLen = Name.size();
printWithSpacePadding(Out, Size, 20); // File member size
@@ -295,7 +313,11 @@ printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable,
auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)});
if (Insertion.second) {
Insertion.first->second = StringTable.tell();
- StringTable << M.MemberName << "/\n";
+ StringTable << M.MemberName;
+ if (isCOFFArchive(Kind))
+ StringTable << '\0';
+ else
+ StringTable << "/\n";
}
NamePos = Insertion.first->second;
}
@@ -356,7 +378,7 @@ static void printNBits(raw_ostream &Out, object::Archive::Kind Kind,
static uint64_t computeSymbolTableSize(object::Archive::Kind Kind,
uint64_t NumSyms, uint64_t OffsetSize,
- StringRef StringTable,
+ uint64_t StringTableSize,
uint32_t *Padding = nullptr) {
assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize");
uint64_t Size = OffsetSize; // Number of entries
@@ -366,7 +388,7 @@ static uint64_t computeSymbolTableSize(object::Archive::Kind Kind,
Size += NumSyms * OffsetSize; // Table
if (isBSDLike(Kind))
Size += OffsetSize; // byte count
- Size += StringTable.size();
+ Size += StringTableSize;
// ld64 expects the members to be 8-byte aligned for 64-bit content and at
// least 4-byte aligned for 32-bit content. Opt for the larger encoding
// uniformly.
@@ -376,6 +398,36 @@ static uint64_t computeSymbolTableSize(object::Archive::Kind Kind,
uint32_t Pad = isAIXBigArchive(Kind)
? 0
: offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2));
+
+ Size += Pad;
+ if (Padding)
+ *Padding = Pad;
+ return Size;
+}
+
+static uint64_t computeSymbolMapSize(uint64_t NumObj, SymMap &SymMap,
+ uint32_t *Padding = nullptr) {
+ uint64_t Size = sizeof(uint32_t) * 2; // Number of symbols and objects entries
+ Size += NumObj * sizeof(uint32_t); // Offset table
+
+ for (auto S : SymMap.Map)
+ Size += sizeof(uint16_t) + S.first.length() + 1;
+
+ uint32_t Pad = offsetToAlignment(Size, Align(2));
+ Size += Pad;
+ if (Padding)
+ *Padding = Pad;
+ return Size;
+}
+
+static uint64_t computeECSymbolsSize(SymMap &SymMap,
+ uint32_t *Padding = nullptr) {
+ uint64_t Size = sizeof(uint32_t); // Number of symbols
+
+ for (auto S : SymMap.ECMap)
+ Size += sizeof(uint16_t) + S.first.length() + 1;
+
+ uint32_t Pad = offsetToAlignment(Size, Align(2));
Size += Pad;
if (Padding)
*Padding = Pad;
@@ -384,47 +436,121 @@ static uint64_t computeSymbolTableSize(object::Archive::Kind Kind,
static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind,
bool Deterministic, uint64_t Size,
- uint64_t PrevMemberOffset = 0) {
+ uint64_t PrevMemberOffset = 0,
+ uint64_t NextMemberOffset = 0) {
if (isBSDLike(Kind)) {
const char *Name = is64BitKind(Kind) ? "__.SYMDEF_64" : "__.SYMDEF";
printBSDMemberHeader(Out, Out.tell(), Name, now(Deterministic), 0, 0, 0,
Size);
} else if (isAIXBigArchive(Kind)) {
- printBigArchiveMemberHeader(Out, "", now(Deterministic), 0, 0,
- 0, Size, PrevMemberOffset, 0);
+ printBigArchiveMemberHeader(Out, "", now(Deterministic), 0, 0, 0, Size,
+ PrevMemberOffset, NextMemberOffset);
} else {
const char *Name = is64BitKind(Kind) ? "/SYM64" : "";
printGNUSmallMemberHeader(Out, Name, now(Deterministic), 0, 0, 0, Size);
}
}
+static uint64_t computeHeadersSize(object::Archive::Kind Kind,
+ uint64_t NumMembers,
+ uint64_t StringMemberSize, uint64_t NumSyms,
+ uint64_t SymNamesSize, SymMap *SymMap) {
+ uint32_t OffsetSize = is64BitKind(Kind) ? 8 : 4;
+ uint64_t SymtabSize =
+ computeSymbolTableSize(Kind, NumSyms, OffsetSize, SymNamesSize);
+ auto computeSymbolTableHeaderSize = [=] {
+ SmallString<0> TmpBuf;
+ raw_svector_ostream Tmp(TmpBuf);
+ writeSymbolTableHeader(Tmp, Kind, true, SymtabSize);
+ return TmpBuf.size();
+ };
+ uint32_t HeaderSize = computeSymbolTableHeaderSize();
+ uint64_t Size = strlen("!<arch>\n") + HeaderSize + SymtabSize;
+
+ if (SymMap) {
+ Size += HeaderSize + computeSymbolMapSize(NumMembers, *SymMap);
+ if (SymMap->ECMap.size())
+ Size += HeaderSize + computeECSymbolsSize(*SymMap);
+ }
+
+ return Size + StringMemberSize;
+}
+
+static Expected<std::unique_ptr<SymbolicFile>>
+getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context) {
+ const file_magic Type = identify_magic(Buf.getBuffer());
+ // Don't attempt to read non-symbolic file types.
+ if (!object::SymbolicFile::isSymbolicFile(Type, &Context))
+ return nullptr;
+ if (Type == file_magic::bitcode) {
+ auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
+ Buf, file_magic::bitcode, &Context);
+ if (!ObjOrErr)
+ return ObjOrErr.takeError();
+ return std::move(*ObjOrErr);
+ } else {
+ auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf);
+ if (!ObjOrErr)
+ return ObjOrErr.takeError();
+ return std::move(*ObjOrErr);
+ }
+}
+
+static Expected<bool> is64BitSymbolicFile(const StringRef &ObjStringRef) {
+ MemoryBufferRef ObjMbf(ObjStringRef, "");
+ // In the scenario when LLVMContext is populated SymbolicFile will contain a
+ // reference to it, thus SymbolicFile should be destroyed first.
+ LLVMContext Context;
+ Expected<std::unique_ptr<SymbolicFile>> ObjOrErr =
+ getSymbolicFile(ObjMbf, Context);
+ if (!ObjOrErr)
+ return ObjOrErr.takeError();
+
+ // Treat non-symbolic file types as not 64-bits.
+ if (!*ObjOrErr)
+ return false;
+
+ return (*ObjOrErr)->is64Bit();
+}
+
static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
bool Deterministic, ArrayRef<MemberData> Members,
- StringRef StringTable,
- uint64_t PrevMemberOffset = 0) {
+ StringRef StringTable, uint64_t MembersOffset,
+ unsigned NumSyms, uint64_t PrevMemberOffset = 0,
+ uint64_t NextMemberOffset = 0,
+ bool Is64Bit = false) {
// We don't write a symbol table on an archive with no members -- except on
// Darwin, where the linker will abort unless the archive has a symbol table.
- if (StringTable.empty() && !isDarwin(Kind))
+ if (StringTable.empty() && !isDarwin(Kind) && !isCOFFArchive(Kind))
return;
- unsigned NumSyms = 0;
- for (const MemberData &M : Members)
- NumSyms += M.Symbols.size();
-
uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4;
uint32_t Pad;
- uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTable, &Pad);
- writeSymbolTableHeader(Out, Kind, Deterministic, Size, PrevMemberOffset);
-
- uint64_t Pos = isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr)
- : Out.tell() + Size;
+ uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize,
+ StringTable.size(), &Pad);
+ writeSymbolTableHeader(Out, Kind, Deterministic, Size, PrevMemberOffset,
+ NextMemberOffset);
if (isBSDLike(Kind))
printNBits(Out, Kind, NumSyms * 2 * OffsetSize);
else
printNBits(Out, Kind, NumSyms);
+ uint64_t Pos = MembersOffset;
for (const MemberData &M : Members) {
+ if (isAIXBigArchive(Kind)) {
+ Expected<bool> Is64BitOrErr = is64BitSymbolicFile(M.Data);
+ // If there is an error, the error will have been emitted when
+ // 'computeMemberData' called the 'getSymbol' function, so don't need to
+ // handle it here.
+ if (!Is64BitOrErr)
+ cantFail(Is64BitOrErr.takeError());
+ if (*Is64BitOrErr != Is64Bit) {
+ Pos += M.Header.size() + M.Data.size() + M.Padding.size();
+ continue;
+ }
+ }
+
for (unsigned StringOffset : M.Symbols) {
if (isBSDLike(Kind))
printNBits(Out, Kind, StringOffset);
@@ -442,40 +568,111 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind,
Out.write(uint8_t(0));
}
-static Expected<std::vector<unsigned>>
-getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
- std::vector<unsigned> Ret;
+static void writeSymbolMap(raw_ostream &Out, object::Archive::Kind Kind,
+ bool Deterministic, ArrayRef<MemberData> Members,
+ SymMap &SymMap, uint64_t MembersOffset) {
+ uint32_t Pad;
+ uint64_t Size = computeSymbolMapSize(Members.size(), SymMap, &Pad);
+ writeSymbolTableHeader(Out, Kind, Deterministic, Size, 0);
+
+ uint32_t Pos = MembersOffset;
+
+ printLE<uint32_t>(Out, Members.size());
+ for (const MemberData &M : Members) {
+ printLE(Out, Pos); // member offset
+ Pos += M.Header.size() + M.Data.size() + M.Padding.size();
+ }
+
+ printLE<uint32_t>(Out, SymMap.Map.size());
+
+ for (auto S : SymMap.Map)
+ printLE(Out, S.second);
+ for (auto S : SymMap.Map)
+ Out << S.first << '\0';
+
+ while (Pad--)
+ Out.write(uint8_t(0));
+}
+
+static void writeECSymbols(raw_ostream &Out, object::Archive::Kind Kind,
+ bool Deterministic, ArrayRef<MemberData> Members,
+ SymMap &SymMap) {
+ uint32_t Pad;
+ uint64_t Size = computeECSymbolsSize(SymMap, &Pad);
+ printGNUSmallMemberHeader(Out, "/<ECSYMBOLS>", now(Deterministic), 0, 0, 0,
+ Size);
+ printLE<uint32_t>(Out, SymMap.ECMap.size());
+
+ for (auto S : SymMap.ECMap)
+ printLE(Out, S.second);
+ for (auto S : SymMap.ECMap)
+ Out << S.first << '\0';
+ while (Pad--)
+ Out.write(uint8_t(0));
+}
+
+static bool isECObject(object::SymbolicFile &Obj) {
+ if (Obj.isCOFF())
+ return cast<llvm::object::COFFObjectFile>(&Obj)->getMachine() !=
+ COFF::IMAGE_FILE_MACHINE_ARM64;
+
+ if (Obj.isIR()) {
+ Expected<std::string> TripleStr =
+ getBitcodeTargetTriple(Obj.getMemoryBufferRef());
+ if (!TripleStr)
+ return false;
+ Triple T(*TripleStr);
+ return T.isWindowsArm64EC() || T.getArch() == Triple::x86_64;
+ }
+
+ return false;
+}
+
+static Expected<std::vector<unsigned>>
+getSymbols(MemoryBufferRef Buf, uint16_t Index, raw_ostream &SymNames,
+ SymMap *SymMap, bool &HasObject) {
// In the scenario when LLVMContext is populated SymbolicFile will contain a
// reference to it, thus SymbolicFile should be destroyed first.
LLVMContext Context;
- std::unique_ptr<object::SymbolicFile> Obj;
- const file_magic Type = identify_magic(Buf.getBuffer());
- // Treat unsupported file types as having no symbols.
- if (!object::SymbolicFile::isSymbolicFile(Type, &Context))
+ std::vector<unsigned> Ret;
+ Expected<std::unique_ptr<SymbolicFile>> ObjOrErr =
+ getSymbolicFile(Buf, Context);
+ if (!ObjOrErr)
+ return ObjOrErr.takeError();
+
+ // If the member is non-symbolic file, treat it as having no symbols.
+ if (!*ObjOrErr)
return Ret;
- if (Type == file_magic::bitcode) {
- auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
- Buf, file_magic::bitcode, &Context);
- if (!ObjOrErr)
- return ObjOrErr.takeError();
- Obj = std::move(*ObjOrErr);
- } else {
- auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf);
- if (!ObjOrErr)
- return ObjOrErr.takeError();
- Obj = std::move(*ObjOrErr);
- }
+ std::unique_ptr<object::SymbolicFile> Obj = std::move(*ObjOrErr);
+
+ std::map<std::string, uint16_t> *Map = nullptr;
+ if (SymMap)
+ Map = SymMap->UseECMap && isECObject(*Obj) ? &SymMap->ECMap : &SymMap->Map;
HasObject = true;
for (const object::BasicSymbolRef &S : Obj->symbols()) {
if (!isArchiveSymbol(S))
continue;
- Ret.push_back(SymNames.tell());
- if (Error E = S.printName(SymNames))
- return std::move(E);
- SymNames << '\0';
+ if (Map) {
+ std::string Name;
+ raw_string_ostream NameStream(Name);
+ if (Error E = S.printName(NameStream))
+ return std::move(E);
+ if (Map->find(Name) != Map->end())
+ continue; // ignore duplicated symbol
+ (*Map)[Name] = Index;
+ if (Map == &SymMap->Map) {
+ Ret.push_back(SymNames.tell());
+ SymNames << Name << '\0';
+ }
+ } else {
+ Ret.push_back(SymNames.tell());
+ if (Error E = S.printName(SymNames))
+ return std::move(E);
+ SymNames << '\0';
+ }
}
return Ret;
}
@@ -483,7 +680,8 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) {
static Expected<std::vector<MemberData>>
computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
object::Archive::Kind Kind, bool Thin, bool Deterministic,
- bool NeedSymbols, ArrayRef<NewArchiveMember> NewMembers) {
+ bool NeedSymbols, SymMap *SymMap,
+ ArrayRef<NewArchiveMember> NewMembers) {
static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'};
uint64_t Pos =
@@ -549,7 +747,8 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
// The big archive format needs to know the offset of the previous member
// header.
- unsigned PrevOffset = 0;
+ uint64_t PrevOffset = 0;
+ uint16_t Index = 0;
for (const NewArchiveMember &M : NewMembers) {
std::string Header;
raw_string_ostream Out(Header);
@@ -557,6 +756,8 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
MemoryBufferRef Buf = M.Buf->getMemBufferRef();
StringRef Data = Thin ? "" : Buf.getBuffer();
+ Index++;
+
// ld64 expects the members to be 8-byte aligned for 64-bit content and at
// least 4-byte aligned for 32-bit content. Opt for the larger encoding
// uniformly. This matches the behaviour with cctools and ensures that ld64
@@ -583,7 +784,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
}
if (isAIXBigArchive(Kind)) {
- unsigned NextOffset = Pos + sizeof(object::BigArMemHdrType) +
+ uint64_t NextOffset = Pos + sizeof(object::BigArMemHdrType) +
alignTo(M.MemberName.size(), 2) + alignTo(Size, 2);
printBigArchiveMemberHeader(Out, M.MemberName, ModTime, M.UID, M.GID,
M.Perms, Size, PrevOffset, NextOffset);
@@ -597,7 +798,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
std::vector<unsigned> Symbols;
if (NeedSymbols) {
Expected<std::vector<unsigned>> SymbolsOrErr =
- getSymbols(Buf, SymNames, HasObject);
+ getSymbols(Buf, Index, SymNames, SymMap, HasObject);
if (!SymbolsOrErr)
return createFileError(M.MemberName, SymbolsOrErr.takeError());
Symbols = std::move(*SymbolsOrErr);
@@ -609,7 +810,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames,
// If there are no symbols, emit an empty symbol table, to satisfy Solaris
// tools, older versions of which expect a symbol table in a non-empty
// archive, regardless of whether there are any symbols in it.
- if (HasObject && SymNames.tell() == 0)
+ if (HasObject && SymNames.tell() == 0 && !isCOFFArchive(Kind))
SymNames << '\0' << '\0' << '\0';
return Ret;
}
@@ -660,50 +861,74 @@ Expected<std::string> computeArchiveRelativePath(StringRef From, StringRef To) {
static Error writeArchiveToStream(raw_ostream &Out,
ArrayRef<NewArchiveMember> NewMembers,
bool WriteSymtab, object::Archive::Kind Kind,
- bool Deterministic, bool Thin) {
+ bool Deterministic, bool Thin, bool IsEC) {
assert((!Thin || !isBSDLike(Kind)) && "Only the gnu format has a thin mode");
SmallString<0> SymNamesBuf;
raw_svector_ostream SymNames(SymNamesBuf);
SmallString<0> StringTableBuf;
raw_svector_ostream StringTable(StringTableBuf);
+ SymMap SymMap;
+
+ // COFF symbol map uses 16-bit indexes, so we can't use it if there are too
+ // many members.
+ if (isCOFFArchive(Kind) && NewMembers.size() > 0xfffe)
+ Kind = object::Archive::K_GNU;
- Expected<std::vector<MemberData>> DataOrErr =
- computeMemberData(StringTable, SymNames, Kind, Thin, Deterministic,
- WriteSymtab, NewMembers);
+ SymMap.UseECMap = IsEC;
+ Expected<std::vector<MemberData>> DataOrErr = computeMemberData(
+ StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab,
+ isCOFFArchive(Kind) ? &SymMap : nullptr, NewMembers);
if (Error E = DataOrErr.takeError())
return E;
std::vector<MemberData> &Data = *DataOrErr;
- if (!StringTableBuf.empty() && !isAIXBigArchive(Kind))
- Data.insert(Data.begin(), computeStringTable(StringTableBuf));
+ uint64_t StringTableSize = 0;
+ MemberData StringTableMember;
+ if (!StringTableBuf.empty() && !isAIXBigArchive(Kind)) {
+ StringTableMember = computeStringTable(StringTableBuf);
+ StringTableSize = StringTableMember.Header.size() +
+ StringTableMember.Data.size() +
+ StringTableMember.Padding.size();
+ }
// We would like to detect if we need to switch to a 64-bit symbol table.
- uint64_t LastMemberEndOffset =
- isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 8;
- uint64_t LastMemberHeaderOffset = LastMemberEndOffset;
+ uint64_t LastMemberEndOffset = 0;
+ uint64_t LastMemberHeaderOffset = 0;
uint64_t NumSyms = 0;
+ uint64_t NumSyms32 = 0; // Store symbol number of 32-bit member files.
+
for (const auto &M : Data) {
// Record the start of the member's offset
LastMemberHeaderOffset = LastMemberEndOffset;
// Account for the size of each part associated with the member.
LastMemberEndOffset += M.Header.size() + M.Data.size() + M.Padding.size();
NumSyms += M.Symbols.size();
+
+ // AIX big archive files may contain two global symbol tables. The
+ // first global symbol table locates 32-bit file members that define global
+ // symbols; the second global symbol table does the same for 64-bit file
+ // members. As a big archive can have both 32-bit and 64-bit file members,
+ // we need to know the number of symbols in each symbol table individually.
+ if (isAIXBigArchive(Kind) && WriteSymtab) {
+ Expected<bool> Is64BitOrErr = is64BitSymbolicFile(M.Data);
+ if (Error E = Is64BitOrErr.takeError())
+ return E;
+
+ if (!*Is64BitOrErr)
+ NumSyms32 += M.Symbols.size();
+ }
}
+ std::optional<uint64_t> HeadersSize;
+
// The symbol table is put at the end of the big archive file. The symbol
// table is at the start of the archive file for other archive formats.
- if (WriteSymtab && !isAIXBigArchive(Kind)) {
+ if (WriteSymtab && !is64BitKind(Kind)) {
// We assume 32-bit offsets to see if 32-bit symbols are possible or not.
- uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, 4, SymNamesBuf);
- auto computeSymbolTableHeaderSize =
- [=] {
- SmallString<0> TmpBuf;
- raw_svector_ostream Tmp(TmpBuf);
- writeSymbolTableHeader(Tmp, Kind, Deterministic, SymtabSize);
- return TmpBuf.size();
- };
- LastMemberHeaderOffset += computeSymbolTableHeaderSize() + SymtabSize;
+ HeadersSize = computeHeadersSize(Kind, Data.size(), StringTableSize,
+ NumSyms, SymNamesBuf.size(),
+ isCOFFArchive(Kind) ? &SymMap : nullptr);
// The SYM64 format is used when an archive's member offsets are larger than
// 32-bits can hold. The need for this shift in format is detected by
@@ -720,11 +945,12 @@ static Error writeArchiveToStream(raw_ostream &Out,
// If LastMemberHeaderOffset isn't going to fit in a 32-bit varible we need
// to switch to 64-bit. Note that the file can be larger than 4GB as long as
// the last member starts before the 4GB offset.
- if (LastMemberHeaderOffset >= Sym64Threshold) {
+ if (*HeadersSize + LastMemberHeaderOffset >= Sym64Threshold) {
if (Kind == object::Archive::K_DARWIN)
Kind = object::Archive::K_DARWIN64;
else
Kind = object::Archive::K_GNU64;
+ HeadersSize.reset();
}
}
@@ -736,11 +962,32 @@ static Error writeArchiveToStream(raw_ostream &Out,
Out << "!<arch>\n";
if (!isAIXBigArchive(Kind)) {
- if (WriteSymtab)
- writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf);
+ if (WriteSymtab) {
+ if (!HeadersSize)
+ HeadersSize = computeHeadersSize(
+ Kind, Data.size(), StringTableSize, NumSyms, SymNamesBuf.size(),
+ isCOFFArchive(Kind) ? &SymMap : nullptr);
+ writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf,
+ *HeadersSize, NumSyms);
+
+ if (isCOFFArchive(Kind))
+ writeSymbolMap(Out, Kind, Deterministic, Data, SymMap, *HeadersSize);
+ }
+
+ if (StringTableSize)
+ Out << StringTableMember.Header << StringTableMember.Data
+ << StringTableMember.Padding;
+
+ if (WriteSymtab && SymMap.ECMap.size())
+ writeECSymbols(Out, Kind, Deterministic, Data, SymMap);
+
for (const MemberData &M : Data)
Out << M.Header << M.Data << M.Padding;
} else {
+ HeadersSize = sizeof(object::BigArchive::FixLenHdr);
+ LastMemberEndOffset += *HeadersSize;
+ LastMemberHeaderOffset += *HeadersSize;
+
// For the big archive (AIX) format, compute a table of member names and
// offsets, used in the member table.
uint64_t MemberTableNameStrTblSize = 0;
@@ -761,25 +1008,61 @@ static Error writeArchiveToStream(raw_ostream &Out,
}
// AIX member table size.
- unsigned MemberTableSize = 20 + // Number of members field
+ uint64_t MemberTableSize = 20 + // Number of members field
20 * MemberOffsets.size() +
MemberTableNameStrTblSize;
- unsigned GlobalSymbolOffset =
- (WriteSymtab && NumSyms > 0)
- ? LastMemberEndOffset +
- alignTo(sizeof(object::BigArMemHdrType) + MemberTableSize, 2)
- : 0;
+ SmallString<0> SymNamesBuf32;
+ SmallString<0> SymNamesBuf64;
+ raw_svector_ostream SymNames32(SymNamesBuf32);
+ raw_svector_ostream SymNames64(SymNamesBuf64);
+
+ if (WriteSymtab && NumSyms)
+ // Generate the symbol names for the members.
+ for (const NewArchiveMember &M : NewMembers) {
+ MemoryBufferRef Buf = M.Buf->getMemBufferRef();
+ Expected<bool> Is64BitOrErr = is64BitSymbolicFile(Buf.getBuffer());
+ if (!Is64BitOrErr)
+ return Is64BitOrErr.takeError();
+
+ bool HasObject;
+ Expected<std::vector<unsigned>> SymbolsOrErr =
+ getSymbols(Buf, 0, *Is64BitOrErr ? SymNames64 : SymNames32, nullptr,
+ HasObject);
+ if (!SymbolsOrErr)
+ return SymbolsOrErr.takeError();
+ }
+
+ uint64_t MemberTableEndOffset =
+ LastMemberEndOffset +
+ alignTo(sizeof(object::BigArMemHdrType) + MemberTableSize, 2);
+
+ // In AIX OS, The 'GlobSymOffset' field in the fixed-length header contains
+ // the offset to the 32-bit global symbol table, and the 'GlobSym64Offset'
+ // contains the offset to the 64-bit global symbol table.
+ uint64_t GlobalSymbolOffset =
+ (WriteSymtab && NumSyms32 > 0) ? MemberTableEndOffset : 0;
+
+ uint64_t GlobalSymbolOffset64 = 0;
+ uint64_t NumSyms64 = NumSyms - NumSyms32;
+ if (WriteSymtab && NumSyms64 > 0) {
+ if (GlobalSymbolOffset == 0)
+ GlobalSymbolOffset64 = MemberTableEndOffset;
+ else
+ // If there is a global symbol table for 32-bit members,
+ // the 64-bit global symbol table is after the 32-bit one.
+ GlobalSymbolOffset64 =
+ GlobalSymbolOffset + sizeof(object::BigArMemHdrType) +
+ (NumSyms32 + 1) * 8 + alignTo(SymNamesBuf32.size(), 2);
+ }
// Fixed Sized Header.
printWithSpacePadding(Out, NewMembers.size() ? LastMemberEndOffset : 0,
20); // Offset to member table
// If there are no file members in the archive, there will be no global
// symbol table.
- printWithSpacePadding(Out, NewMembers.size() ? GlobalSymbolOffset : 0, 20);
- printWithSpacePadding(
- Out, 0,
- 20); // Offset to 64 bits global symbol table - Not supported yet
+ printWithSpacePadding(Out, GlobalSymbolOffset, 20);
+ printWithSpacePadding(Out, GlobalSymbolOffset64, 20);
printWithSpacePadding(
Out, NewMembers.size() ? sizeof(object::BigArchive::FixLenHdr) : 0,
20); // Offset to first archive member
@@ -799,7 +1082,8 @@ static Error writeArchiveToStream(raw_ostream &Out,
// Member table.
printBigArchiveMemberHeader(Out, "", sys::toTimePoint(0), 0, 0, 0,
MemberTableSize, LastMemberHeaderOffset,
- GlobalSymbolOffset);
+ GlobalSymbolOffset ? GlobalSymbolOffset
+ : GlobalSymbolOffset64);
printWithSpacePadding(Out, MemberOffsets.size(), 20); // Number of members
for (uint64_t MemberOffset : MemberOffsets)
printWithSpacePadding(Out, MemberOffset,
@@ -811,9 +1095,25 @@ static Error writeArchiveToStream(raw_ostream &Out,
Out << '\0'; // Name table must be tail padded to an even number of
// bytes.
- if (WriteSymtab && NumSyms > 0)
- writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf,
- LastMemberEndOffset);
+ if (WriteSymtab) {
+ // Write global symbol table for 32-bit file members.
+ if (GlobalSymbolOffset) {
+ writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf32,
+ *HeadersSize, NumSyms32, LastMemberEndOffset,
+ GlobalSymbolOffset64);
+ // Add padding between the symbol tables, if needed.
+ if (GlobalSymbolOffset64 && (SymNamesBuf32.size() % 2))
+ Out << '\0';
+ }
+
+ // Write global symbol table for 64-bit file members.
+ if (GlobalSymbolOffset64)
+ writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf64,
+ *HeadersSize, NumSyms64,
+ GlobalSymbolOffset ? GlobalSymbolOffset
+ : LastMemberEndOffset,
+ 0, true);
+ }
}
}
Out.flush();
@@ -823,7 +1123,7 @@ static Error writeArchiveToStream(raw_ostream &Out,
Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
bool WriteSymtab, object::Archive::Kind Kind,
bool Deterministic, bool Thin,
- std::unique_ptr<MemoryBuffer> OldArchiveBuf) {
+ std::unique_ptr<MemoryBuffer> OldArchiveBuf, bool IsEC) {
Expected<sys::fs::TempFile> Temp =
sys::fs::TempFile::create(ArcName + ".temp-archive-%%%%%%%.a");
if (!Temp)
@@ -831,7 +1131,7 @@ Error writeArchive(StringRef ArcName, ArrayRef<NewArchiveMember> NewMembers,
raw_fd_ostream Out(Temp->FD, false);
if (Error E = writeArchiveToStream(Out, NewMembers, WriteSymtab, Kind,
- Deterministic, Thin)) {
+ Deterministic, Thin, IsEC)) {
if (Error DiscardError = Temp->discard())
return joinErrors(std::move(E), std::move(DiscardError));
return E;
@@ -860,7 +1160,7 @@ writeArchiveToBuffer(ArrayRef<NewArchiveMember> NewMembers, bool WriteSymtab,
raw_svector_ostream ArchiveStream(ArchiveBufferVector);
if (Error E = writeArchiveToStream(ArchiveStream, NewMembers, WriteSymtab,
- Kind, Deterministic, Thin))
+ Kind, Deterministic, Thin, false))
return std::move(E);
return std::make_unique<SmallVectorMemoryBuffer>(
diff --git a/llvm/lib/Object/BuildID.cpp b/llvm/lib/Object/BuildID.cpp
index 795c22e769aa..ef21458060ab 100644
--- a/llvm/lib/Object/BuildID.cpp
+++ b/llvm/lib/Object/BuildID.cpp
@@ -18,13 +18,12 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
-namespace llvm {
-namespace object {
+using namespace llvm;
+using namespace llvm::object;
namespace {
-template <typename ELFT>
-std::optional<BuildIDRef> getBuildID(const ELFFile<ELFT> &Obj) {
+template <typename ELFT> BuildIDRef getBuildID(const ELFFile<ELFT> &Obj) {
auto PhdrsOrErr = Obj.program_headers();
if (!PhdrsOrErr) {
consumeError(PhdrsOrErr.takeError());
@@ -37,7 +36,7 @@ std::optional<BuildIDRef> getBuildID(const ELFFile<ELFT> &Obj) {
for (auto N : Obj.notes(P, Err))
if (N.getType() == ELF::NT_GNU_BUILD_ID &&
N.getName() == ELF::ELF_NOTE_GNU)
- return N.getDesc();
+ return N.getDesc(P.p_align);
consumeError(std::move(Err));
}
return {};
@@ -45,15 +44,24 @@ std::optional<BuildIDRef> getBuildID(const ELFFile<ELFT> &Obj) {
} // namespace
-std::optional<BuildIDRef> getBuildID(const ObjectFile *Obj) {
+BuildID llvm::object::parseBuildID(StringRef Str) {
+ std::string Bytes;
+ if (!tryGetFromHex(Str, Bytes))
+ return {};
+ ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
+ Bytes.size());
+ return SmallVector<uint8_t>(BuildID.begin(), BuildID.end());
+}
+
+BuildIDRef llvm::object::getBuildID(const ObjectFile *Obj) {
if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Obj))
- return getBuildID(O->getELFFile());
+ return ::getBuildID(O->getELFFile());
if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Obj))
- return getBuildID(O->getELFFile());
+ return ::getBuildID(O->getELFFile());
if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Obj))
- return getBuildID(O->getELFFile());
+ return ::getBuildID(O->getELFFile());
if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Obj))
- return getBuildID(O->getELFFile());
+ return ::getBuildID(O->getELFFile());
return std::nullopt;
}
@@ -88,6 +96,3 @@ std::optional<std::string> BuildIDFetcher::fetch(BuildIDRef BuildID) const {
}
return std::nullopt;
}
-
-} // namespace object
-} // namespace llvm
diff --git a/llvm/lib/Object/COFFImportFile.cpp b/llvm/lib/Object/COFFImportFile.cpp
index 7090d3ca5618..765c12cc076c 100644
--- a/llvm/lib/Object/COFFImportFile.cpp
+++ b/llvm/lib/Object/COFFImportFile.cpp
@@ -39,6 +39,7 @@ static bool is32bit(MachineTypes Machine) {
llvm_unreachable("unsupported machine");
case IMAGE_FILE_MACHINE_ARM64:
case IMAGE_FILE_MACHINE_ARM64EC:
+ case IMAGE_FILE_MACHINE_ARM64X:
case IMAGE_FILE_MACHINE_AMD64:
return false;
case IMAGE_FILE_MACHINE_ARMNT:
@@ -57,6 +58,7 @@ static uint16_t getImgRelRelocation(MachineTypes Machine) {
return IMAGE_REL_ARM_ADDR32NB;
case IMAGE_FILE_MACHINE_ARM64:
case IMAGE_FILE_MACHINE_ARM64EC:
+ case IMAGE_FILE_MACHINE_ARM64X:
return IMAGE_REL_ARM64_ADDR32NB;
case IMAGE_FILE_MACHINE_I386:
return IMAGE_REL_I386_DIR32NB;
@@ -86,7 +88,8 @@ static void writeStringTable(std::vector<uint8_t> &B,
for (const auto &S : Strings) {
B.resize(Pos + S.length() + 1);
- strcpy(reinterpret_cast<char *>(&B[Pos]), S.c_str());
+ std::copy(S.begin(), S.end(), std::next(B.begin(), Pos));
+ B[Pos + S.length()] = 0;
Pos += S.length() + 1;
}
diff --git a/llvm/lib/Object/COFFModuleDefinition.cpp b/llvm/lib/Object/COFFModuleDefinition.cpp
index 0666970d5c60..a33949733c8e 100644
--- a/llvm/lib/Object/COFFModuleDefinition.cpp
+++ b/llvm/lib/Object/COFFModuleDefinition.cpp
@@ -138,8 +138,11 @@ private:
class Parser {
public:
- explicit Parser(StringRef S, MachineTypes M, bool B)
- : Lex(S), Machine(M), MingwDef(B) {}
+ explicit Parser(StringRef S, MachineTypes M, bool B, bool AU)
+ : Lex(S), Machine(M), MingwDef(B), AddUnderscores(AU) {
+ if (Machine != IMAGE_FILE_MACHINE_I386)
+ AddUnderscores = false;
+ }
Expected<COFFModuleDefinition> parse() {
do {
@@ -234,7 +237,7 @@ private:
unget();
}
- if (Machine == IMAGE_FILE_MACHINE_I386) {
+ if (AddUnderscores) {
if (!isDecorated(E.Name, MingwDef))
E.Name = (std::string("_").append(E.Name));
if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
@@ -279,7 +282,7 @@ private:
if (Tok.K == EqualEqual) {
read();
E.AliasTarget = std::string(Tok.Value);
- if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
+ if (AddUnderscores && !isDecorated(E.AliasTarget, MingwDef))
E.AliasTarget = std::string("_").append(E.AliasTarget);
continue;
}
@@ -349,12 +352,14 @@ private:
MachineTypes Machine;
COFFModuleDefinition Info;
bool MingwDef;
+ bool AddUnderscores;
};
Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
MachineTypes Machine,
- bool MingwDef) {
- return Parser(MB.getBuffer(), Machine, MingwDef).parse();
+ bool MingwDef,
+ bool AddUnderscores) {
+ return Parser(MB.getBuffer(), Machine, MingwDef, AddUnderscores).parse();
}
} // namespace object
diff --git a/llvm/lib/Object/COFFObjectFile.cpp b/llvm/lib/Object/COFFObjectFile.cpp
index b159ae1bba14..08eb0d034c53 100644
--- a/llvm/lib/Object/COFFObjectFile.cpp
+++ b/llvm/lib/Object/COFFObjectFile.cpp
@@ -13,7 +13,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/Binary.h"
@@ -26,6 +25,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cinttypes>
@@ -753,6 +753,54 @@ Error COFFObjectFile::initLoadConfigPtr() {
return E;
LoadConfig = (const void *)IntPtr;
+
+ if (is64()) {
+ auto Config = getLoadConfig64();
+ if (Config->Size >=
+ offsetof(coff_load_configuration64, CHPEMetadataPointer) +
+ sizeof(Config->CHPEMetadataPointer) &&
+ Config->CHPEMetadataPointer) {
+ uint64_t ChpeOff = Config->CHPEMetadataPointer;
+ if (Error E =
+ getRvaPtr(ChpeOff - getImageBase(), IntPtr, "CHPE metadata"))
+ return E;
+ if (Error E = checkOffset(Data, IntPtr, sizeof(CHPEMetadata)))
+ return E;
+
+ CHPEMetadata = reinterpret_cast<const chpe_metadata *>(IntPtr);
+
+ // Validate CHPE metadata
+ if (CHPEMetadata->CodeMapCount) {
+ if (Error E = getRvaPtr(CHPEMetadata->CodeMap, IntPtr, "CHPE code map"))
+ return E;
+ if (Error E = checkOffset(Data, IntPtr,
+ CHPEMetadata->CodeMapCount *
+ sizeof(chpe_range_entry)))
+ return E;
+ }
+
+ if (CHPEMetadata->CodeRangesToEntryPointsCount) {
+ if (Error E = getRvaPtr(CHPEMetadata->CodeRangesToEntryPoints, IntPtr,
+ "CHPE entry point ranges"))
+ return E;
+ if (Error E = checkOffset(Data, IntPtr,
+ CHPEMetadata->CodeRangesToEntryPointsCount *
+ sizeof(chpe_code_range_entry)))
+ return E;
+ }
+
+ if (CHPEMetadata->RedirectionMetadataCount) {
+ if (Error E = getRvaPtr(CHPEMetadata->RedirectionMetadata, IntPtr,
+ "CHPE redirection metadata"))
+ return E;
+ if (Error E = checkOffset(Data, IntPtr,
+ CHPEMetadata->RedirectionMetadataCount *
+ sizeof(chpe_redirection_entry)))
+ return E;
+ }
+ }
+ }
+
return Error::success();
}
@@ -1016,6 +1064,8 @@ StringRef COFFObjectFile::getFileFormatName() const {
return "COFF-ARM64";
case COFF::IMAGE_FILE_MACHINE_ARM64EC:
return "COFF-ARM64EC";
+ case COFF::IMAGE_FILE_MACHINE_ARM64X:
+ return "COFF-ARM64X";
default:
return "COFF-<unknown arch>";
}
@@ -1031,6 +1081,7 @@ Triple::ArchType COFFObjectFile::getArch() const {
return Triple::thumb;
case COFF::IMAGE_FILE_MACHINE_ARM64:
case COFF::IMAGE_FILE_MACHINE_ARM64EC:
+ case COFF::IMAGE_FILE_MACHINE_ARM64X:
return Triple::aarch64;
default:
return Triple::UnknownArch;
@@ -1318,6 +1369,7 @@ StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const {
break;
case COFF::IMAGE_FILE_MACHINE_ARM64:
case COFF::IMAGE_FILE_MACHINE_ARM64EC:
+ case COFF::IMAGE_FILE_MACHINE_ARM64X:
switch (Type) {
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ABSOLUTE);
LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32);
@@ -1901,6 +1953,7 @@ ResourceSectionRef::getContents(const coff_resource_data_entry &Entry) {
break;
case COFF::IMAGE_FILE_MACHINE_ARM64:
case COFF::IMAGE_FILE_MACHINE_ARM64EC:
+ case COFF::IMAGE_FILE_MACHINE_ARM64X:
RVAReloc = COFF::IMAGE_REL_ARM64_ADDR32NB;
break;
default:
diff --git a/llvm/lib/Object/DXContainer.cpp b/llvm/lib/Object/DXContainer.cpp
index 4d8f261fe4cc..48932afea84b 100644
--- a/llvm/lib/Object/DXContainer.cpp
+++ b/llvm/lib/Object/DXContainer.cpp
@@ -91,6 +91,15 @@ Error DXContainer::parseHash(StringRef Part) {
return Error::success();
}
+Error DXContainer::parsePSVInfo(StringRef Part) {
+ if (PSVInfo)
+ return parseFailed("More than one PSV0 part is present in the file");
+ PSVInfo = DirectX::PSVRuntimeInfo(Part);
+ // Parsing the PSVRuntime info occurs late because we need to read data from
+ // other parts first.
+ return Error::success();
+}
+
Error DXContainer::parsePartOffsets() {
uint32_t LastOffset =
sizeof(dxbc::Header) + (Header.PartCount * sizeof(uint32_t));
@@ -140,10 +149,24 @@ Error DXContainer::parsePartOffsets() {
if (Error Err = parseHash(PartData))
return Err;
break;
+ case dxbc::PartType::PSV0:
+ if (Error Err = parsePSVInfo(PartData))
+ return Err;
+ break;
case dxbc::PartType::Unknown:
break;
}
}
+
+ // Fully parsing the PSVInfo requires knowing the shader kind which we read
+ // out of the program header in the DXIL part.
+ if (PSVInfo) {
+ if (!DXIL)
+ return parseFailed("Cannot fully parse pipeline state validation "
+ "information without DXIL part.");
+ if (Error Err = PSVInfo->parse(DXIL->first.ShaderKind))
+ return Err;
+ }
return Error::success();
}
@@ -166,3 +189,69 @@ void DXContainer::PartIterator::updateIteratorImpl(const uint32_t Offset) {
StringRef(Current + sizeof(dxbc::PartHeader), IteratorState.Part.Size);
IteratorState.Offset = Offset;
}
+
+Error DirectX::PSVRuntimeInfo::parse(uint16_t ShaderKind) {
+ Triple::EnvironmentType ShaderStage = dxbc::getShaderStage(ShaderKind);
+
+ const char *Current = Data.begin();
+ if (Error Err = readInteger(Data, Current, Size))
+ return Err;
+ Current += sizeof(uint32_t);
+
+ StringRef PSVInfoData = Data.substr(sizeof(uint32_t), Size);
+
+ if (PSVInfoData.size() < Size)
+ return parseFailed(
+ "Pipeline state data extends beyond the bounds of the part");
+
+ using namespace dxbc::PSV;
+
+ const uint32_t PSVVersion = getVersion();
+
+ // Detect the PSVVersion by looking at the size field.
+ if (PSVVersion == 2) {
+ v2::RuntimeInfo Info;
+ if (Error Err = readStruct(PSVInfoData, Current, Info))
+ return Err;
+ if (sys::IsBigEndianHost)
+ Info.swapBytes(ShaderStage);
+ BasicInfo = Info;
+ } else if (PSVVersion == 1) {
+ v1::RuntimeInfo Info;
+ if (Error Err = readStruct(PSVInfoData, Current, Info))
+ return Err;
+ if (sys::IsBigEndianHost)
+ Info.swapBytes(ShaderStage);
+ BasicInfo = Info;
+ } else {
+ v0::RuntimeInfo Info;
+ if (Error Err = readStruct(PSVInfoData, Current, Info))
+ return Err;
+ if (sys::IsBigEndianHost)
+ Info.swapBytes(ShaderStage);
+ BasicInfo = Info;
+ }
+ Current += Size;
+
+ uint32_t ResourceCount = 0;
+ if (Error Err = readInteger(Data, Current, ResourceCount))
+ return Err;
+ Current += sizeof(uint32_t);
+
+ if (ResourceCount > 0) {
+ if (Error Err = readInteger(Data, Current, Resources.Stride))
+ return Err;
+ Current += sizeof(uint32_t);
+
+ size_t BindingDataSize = Resources.Stride * ResourceCount;
+ Resources.Data = Data.substr(Current - Data.begin(), BindingDataSize);
+
+ if (Resources.Data.size() < BindingDataSize)
+ return parseFailed(
+ "Resource binding data extends beyond the bounds of the part");
+
+ Current += BindingDataSize;
+ }
+
+ return Error::success();
+}
diff --git a/llvm/lib/Object/Decompressor.cpp b/llvm/lib/Object/Decompressor.cpp
index f38c0e69e850..39baf2f0cb0f 100644
--- a/llvm/lib/Object/Decompressor.cpp
+++ b/llvm/lib/Object/Decompressor.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/Decompressor.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Compression.h"
diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp
index 81c9a097170d..0d1862e57371 100644
--- a/llvm/lib/Object/ELF.cpp
+++ b/llvm/lib/Object/ELF.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/ELF.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/Support/DataExtractor.h"
@@ -270,6 +271,11 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
case ELF::EM_RISCV:
switch (Type) { STRINGIFY_ENUM_CASE(ELF, SHT_RISCV_ATTRIBUTES); }
break;
+ case ELF::EM_AARCH64:
+ switch (Type) {
+ STRINGIFY_ENUM_CASE(ELF, SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC);
+ STRINGIFY_ENUM_CASE(ELF, SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
+ }
default:
break;
}
@@ -307,6 +313,7 @@ StringRef llvm::object::getELFSectionTypeName(uint32_t Machine, unsigned Type) {
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_BB_ADDR_MAP_V0);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_BB_ADDR_MAP);
STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_OFFLOADING);
+ STRINGIFY_ENUM_CASE(ELF, SHT_LLVM_LTO);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_ATTRIBUTES);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_HASH);
STRINGIFY_ENUM_CASE(ELF, SHT_GNU_verdef);
@@ -640,7 +647,26 @@ ELFFile<ELFT>::toMappedAddr(uint64_t VAddr, WarningHandler WarnHandler) const {
template <class ELFT>
Expected<std::vector<BBAddrMap>>
-ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec) const {
+ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec,
+ const Elf_Shdr *RelaSec) const {
+ bool IsRelocatable = getHeader().e_type == ELF::ET_REL;
+
+ // This DenseMap maps the offset of each function (the location of the
+ // reference to the function in the SHT_LLVM_BB_ADDR_MAP section) to the
+ // addend (the location of the function in the text section).
+ llvm::DenseMap<uint64_t, uint64_t> FunctionOffsetTranslations;
+ if (IsRelocatable && RelaSec) {
+ assert(RelaSec &&
+ "Can't read a SHT_LLVM_BB_ADDR_MAP section in a relocatable "
+ "object file without providing a relocation section.");
+ Expected<Elf_Rela_Range> Relas = this->relas(*RelaSec);
+ if (!Relas)
+ return createError("unable to read relocations for section " +
+ describe(*this, Sec) + ": " +
+ toString(Relas.takeError()));
+ for (Elf_Rela Rela : *Relas)
+ FunctionOffsetTranslations[Rela.r_offset] = Rela.r_addend;
+ }
Expected<ArrayRef<uint8_t>> ContentsOrErr = getSectionContents(Sec);
if (!ContentsOrErr)
return ContentsOrErr.takeError();
@@ -650,6 +676,7 @@ ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec) const {
DataExtractor::Cursor Cur(0);
Error ULEBSizeErr = Error::success();
+ Error MetadataDecodeErr = Error::success();
// Helper to extract and decode the next ULEB128 value as uint32_t.
// Returns zero and sets ULEBSizeErr if the ULEB128 value exceeds the uint32_t
// limit.
@@ -670,7 +697,8 @@ ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec) const {
};
uint8_t Version = 0;
- while (!ULEBSizeErr && Cur && Cur.tell() < Content.size()) {
+ while (!ULEBSizeErr && !MetadataDecodeErr && Cur &&
+ Cur.tell() < Content.size()) {
if (Sec.sh_type == ELF::SHT_LLVM_BB_ADDR_MAP) {
Version = Data.getU8(Cur);
if (!Cur)
@@ -680,32 +708,97 @@ ELFFile<ELFT>::decodeBBAddrMap(const Elf_Shdr &Sec) const {
Twine(static_cast<int>(Version)));
Data.getU8(Cur); // Feature byte
}
+ uint64_t SectionOffset = Cur.tell();
uintX_t Address = static_cast<uintX_t>(Data.getAddress(Cur));
+ if (!Cur)
+ return Cur.takeError();
+ if (IsRelocatable) {
+ assert(Address == 0);
+ auto FOTIterator = FunctionOffsetTranslations.find(SectionOffset);
+ if (FOTIterator == FunctionOffsetTranslations.end()) {
+ return createError("failed to get relocation data for offset: " +
+ Twine::utohexstr(SectionOffset) + " in section " +
+ describe(*this, Sec));
+ }
+ Address = FOTIterator->second;
+ }
uint32_t NumBlocks = ReadULEB128AsUInt32();
std::vector<BBAddrMap::BBEntry> BBEntries;
uint32_t PrevBBEndOffset = 0;
for (uint32_t BlockIndex = 0;
- !ULEBSizeErr && Cur && (BlockIndex < NumBlocks); ++BlockIndex) {
+ !MetadataDecodeErr && !ULEBSizeErr && Cur && (BlockIndex < NumBlocks);
+ ++BlockIndex) {
uint32_t ID = Version >= 2 ? ReadULEB128AsUInt32() : BlockIndex;
uint32_t Offset = ReadULEB128AsUInt32();
uint32_t Size = ReadULEB128AsUInt32();
- uint32_t Metadata = ReadULEB128AsUInt32();
+ uint32_t MD = ReadULEB128AsUInt32();
if (Version >= 1) {
// Offset is calculated relative to the end of the previous BB.
Offset += PrevBBEndOffset;
PrevBBEndOffset = Offset + Size;
}
- BBEntries.push_back({ID, Offset, Size, Metadata});
+ Expected<BBAddrMap::BBEntry::Metadata> MetadataOrErr =
+ BBAddrMap::BBEntry::Metadata::decode(MD);
+ if (!MetadataOrErr) {
+ MetadataDecodeErr = MetadataOrErr.takeError();
+ break;
+ }
+ BBEntries.push_back({ID, Offset, Size, *MetadataOrErr});
}
FunctionEntries.push_back({Address, std::move(BBEntries)});
}
- // Either Cur is in the error state, or ULEBSizeError is set (not both), but
- // we join the two errors here to be safe.
- if (!Cur || ULEBSizeErr)
- return joinErrors(Cur.takeError(), std::move(ULEBSizeErr));
+ // Either Cur is in the error state, or we have an error in ULEBSizeErr or
+ // MetadataDecodeErr (but not both), but we join all errors here to be safe.
+ if (!Cur || ULEBSizeErr || MetadataDecodeErr)
+ return joinErrors(joinErrors(Cur.takeError(), std::move(ULEBSizeErr)),
+ std::move(MetadataDecodeErr));
return FunctionEntries;
}
+template <class ELFT>
+Expected<
+ MapVector<const typename ELFT::Shdr *, const typename ELFT::Shdr *>>
+ELFFile<ELFT>::getSectionAndRelocations(
+ std::function<Expected<bool>(const Elf_Shdr &)> IsMatch) const {
+ MapVector<const Elf_Shdr *, const Elf_Shdr *> SecToRelocMap;
+ Error Errors = Error::success();
+ for (const Elf_Shdr &Sec : cantFail(this->sections())) {
+ Expected<bool> DoesSectionMatch = IsMatch(Sec);
+ if (!DoesSectionMatch) {
+ Errors = joinErrors(std::move(Errors), DoesSectionMatch.takeError());
+ continue;
+ }
+ if (*DoesSectionMatch) {
+ if (SecToRelocMap.insert(std::make_pair(&Sec, (const Elf_Shdr *)nullptr))
+ .second)
+ continue;
+ }
+
+ if (Sec.sh_type != ELF::SHT_RELA && Sec.sh_type != ELF::SHT_REL)
+ continue;
+
+ Expected<const Elf_Shdr *> RelSecOrErr = this->getSection(Sec.sh_info);
+ if (!RelSecOrErr) {
+ Errors = joinErrors(std::move(Errors),
+ createError(describe(*this, Sec) +
+ ": failed to get a relocated section: " +
+ toString(RelSecOrErr.takeError())));
+ continue;
+ }
+ const Elf_Shdr *ContentsSec = *RelSecOrErr;
+ Expected<bool> DoesRelTargetMatch = IsMatch(*ContentsSec);
+ if (!DoesRelTargetMatch) {
+ Errors = joinErrors(std::move(Errors), DoesRelTargetMatch.takeError());
+ continue;
+ }
+ if (*DoesRelTargetMatch)
+ SecToRelocMap[ContentsSec] = &Sec;
+ }
+ if(Errors)
+ return std::move(Errors);
+ return SecToRelocMap;
+}
+
template class llvm::object::ELFFile<ELF32LE>;
template class llvm::object::ELFFile<ELF32BE>;
template class llvm::object::ELFFile<ELF64LE>;
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index ebc57bd04be7..143f9d37849d 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -11,10 +11,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCInstrAnalysis.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ELF.h"
#include "llvm/Object/ELFTypes.h"
@@ -26,6 +24,8 @@
#include "llvm/Support/RISCVAttributeParser.h"
#include "llvm/Support/RISCVAttributes.h"
#include "llvm/Support/RISCVISAInfo.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
@@ -73,7 +73,7 @@ ObjectFile::createELFObjectFile(MemoryBufferRef Obj, bool InitContent) {
std::pair<unsigned char, unsigned char> Ident =
getElfArchType(Obj.getBuffer());
std::size_t MaxAlignment =
- 1ULL << countTrailingZeros(
+ 1ULL << llvm::countr_zero(
reinterpret_cast<uintptr_t>(Obj.getBufferStart()));
if (MaxAlignment < 2)
@@ -303,12 +303,7 @@ Expected<SubtargetFeatures> ELFObjectFileBase::getRISCVFeatures() const {
std::optional<StringRef> Attr =
Attributes.getAttributeString(RISCVAttrs::ARCH);
if (Attr) {
- // Suppress version checking for experimental extensions to prevent erroring
- // when getting any unknown version of experimental extension.
- auto ParseResult = RISCVISAInfo::parseArchString(
- *Attr, /*EnableExperimentalExtension=*/true,
- /*ExperimentalExtensionVersionCheck=*/false,
- /*IgnoreUnknown=*/true);
+ auto ParseResult = RISCVISAInfo::parseNormalizedArchString(*Attr);
if (!ParseResult)
return ParseResult.takeError();
auto &ISAInfo = *ParseResult;
@@ -363,6 +358,7 @@ std::optional<StringRef> ELFObjectFileBase::tryGetCPUName() const {
switch (getEMachine()) {
case ELF::EM_AMDGPU:
return getAMDGPUCPUName();
+ case ELF::EM_PPC:
case ELF::EM_PPC64:
return StringRef("future");
default:
@@ -468,6 +464,10 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx90c";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940:
return "gfx940";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941:
+ return "gfx941";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:
+ return "gfx942";
// AMDGCN GFX10.
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:
@@ -502,6 +502,10 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx1102";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103:
return "gfx1103";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150:
+ return "gfx1150";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151:
+ return "gfx1151";
default:
llvm_unreachable("Unknown EF_AMDGPU_MACH value");
}
@@ -602,20 +606,21 @@ void ELFObjectFileBase::setARMSubArch(Triple &TheTriple) const {
TheTriple.setArchName(Triple);
}
-std::vector<std::pair<std::optional<DataRefImpl>, uint64_t>>
-ELFObjectFileBase::getPltAddresses() const {
+std::vector<ELFPltEntry> ELFObjectFileBase::getPltEntries() const {
std::string Err;
const auto Triple = makeTriple();
const auto *T = TargetRegistry::lookupTarget(Triple.str(), Err);
if (!T)
return {};
- uint64_t JumpSlotReloc = 0;
+ uint32_t JumpSlotReloc = 0, GlobDatReloc = 0;
switch (Triple.getArch()) {
case Triple::x86:
JumpSlotReloc = ELF::R_386_JUMP_SLOT;
+ GlobDatReloc = ELF::R_386_GLOB_DAT;
break;
case Triple::x86_64:
JumpSlotReloc = ELF::R_X86_64_JUMP_SLOT;
+ GlobDatReloc = ELF::R_X86_64_GLOB_DAT;
break;
case Triple::aarch64:
case Triple::aarch64_be:
@@ -629,7 +634,9 @@ ELFObjectFileBase::getPltAddresses() const {
T->createMCInstrAnalysis(MII.get()));
if (!MIA)
return {};
- std::optional<SectionRef> Plt, RelaPlt, GotPlt;
+ std::vector<std::pair<uint64_t, uint64_t>> PltEntries;
+ std::optional<SectionRef> RelaPlt, RelaDyn;
+ uint64_t GotBaseVA = 0;
for (const SectionRef &Section : sections()) {
Expected<StringRef> NameOrErr = Section.getName();
if (!NameOrErr) {
@@ -638,42 +645,66 @@ ELFObjectFileBase::getPltAddresses() const {
}
StringRef Name = *NameOrErr;
- if (Name == ".plt")
- Plt = Section;
- else if (Name == ".rela.plt" || Name == ".rel.plt")
+ if (Name == ".rela.plt" || Name == ".rel.plt") {
RelaPlt = Section;
- else if (Name == ".got.plt")
- GotPlt = Section;
- }
- if (!Plt || !RelaPlt || !GotPlt)
- return {};
- Expected<StringRef> PltContents = Plt->getContents();
- if (!PltContents) {
- consumeError(PltContents.takeError());
- return {};
+ } else if (Name == ".rela.dyn" || Name == ".rel.dyn") {
+ RelaDyn = Section;
+ } else if (Name == ".got.plt") {
+ GotBaseVA = Section.getAddress();
+ } else if (Name == ".plt" || Name == ".plt.got") {
+ Expected<StringRef> PltContents = Section.getContents();
+ if (!PltContents) {
+ consumeError(PltContents.takeError());
+ return {};
+ }
+ llvm::append_range(
+ PltEntries,
+ MIA->findPltEntries(Section.getAddress(),
+ arrayRefFromStringRef(*PltContents), Triple));
+ }
}
- auto PltEntries = MIA->findPltEntries(Plt->getAddress(),
- arrayRefFromStringRef(*PltContents),
- GotPlt->getAddress(), Triple);
+
// Build a map from GOT entry virtual address to PLT entry virtual address.
DenseMap<uint64_t, uint64_t> GotToPlt;
- for (const auto &Entry : PltEntries)
- GotToPlt.insert(std::make_pair(Entry.second, Entry.first));
+ for (auto [Plt, GotPlt] : PltEntries) {
+ uint64_t GotPltEntry = GotPlt;
+ // An x86-32 PIC PLT uses jmp DWORD PTR [ebx-offset]. Add
+ // _GLOBAL_OFFSET_TABLE_ (EBX) to get the .got.plt (or .got) entry address.
+ // See X86MCTargetDesc.cpp:findPltEntries for the 1 << 32 bit.
+ if (GotPltEntry & (uint64_t(1) << 32) && getEMachine() == ELF::EM_386)
+ GotPltEntry = static_cast<int32_t>(GotPltEntry) + GotBaseVA;
+ GotToPlt.insert(std::make_pair(GotPltEntry, Plt));
+ }
+
// Find the relocations in the dynamic relocation table that point to
// locations in the GOT for which we know the corresponding PLT entry.
- std::vector<std::pair<std::optional<DataRefImpl>, uint64_t>> Result;
- for (const auto &Relocation : RelaPlt->relocations()) {
- if (Relocation.getType() != JumpSlotReloc)
- continue;
- auto PltEntryIter = GotToPlt.find(Relocation.getOffset());
- if (PltEntryIter != GotToPlt.end()) {
- symbol_iterator Sym = Relocation.getSymbol();
- if (Sym == symbol_end())
- Result.emplace_back(std::nullopt, PltEntryIter->second);
- else
- Result.emplace_back(Sym->getRawDataRefImpl(), PltEntryIter->second);
+ std::vector<ELFPltEntry> Result;
+ auto handleRels = [&](iterator_range<relocation_iterator> Rels,
+ uint32_t RelType, StringRef PltSec) {
+ for (const auto &R : Rels) {
+ if (R.getType() != RelType)
+ continue;
+ auto PltEntryIter = GotToPlt.find(R.getOffset());
+ if (PltEntryIter != GotToPlt.end()) {
+ symbol_iterator Sym = R.getSymbol();
+ if (Sym == symbol_end())
+ Result.push_back(
+ ELFPltEntry{PltSec, std::nullopt, PltEntryIter->second});
+ else
+ Result.push_back(ELFPltEntry{PltSec, Sym->getRawDataRefImpl(),
+ PltEntryIter->second});
+ }
}
- }
+ };
+
+ if (RelaPlt)
+ handleRels(RelaPlt->relocations(), JumpSlotReloc, ".plt");
+
+ // If a symbol needing a PLT entry also needs a GLOB_DAT relocation, GNU ld's
+ // x86 port places the PLT entry in the .plt.got section.
+ if (RelaDyn)
+ handleRels(RelaDyn->relocations(), GlobDatReloc, ".plt.got");
+
return Result;
}
@@ -681,24 +712,39 @@ template <class ELFT>
Expected<std::vector<BBAddrMap>> static readBBAddrMapImpl(
const ELFFile<ELFT> &EF, std::optional<unsigned> TextSectionIndex) {
using Elf_Shdr = typename ELFT::Shdr;
+ bool IsRelocatable = EF.getHeader().e_type == ELF::ET_REL;
std::vector<BBAddrMap> BBAddrMaps;
+
const auto &Sections = cantFail(EF.sections());
- for (const Elf_Shdr &Sec : Sections) {
+ auto IsMatch = [&](const Elf_Shdr &Sec) -> Expected<bool> {
if (Sec.sh_type != ELF::SHT_LLVM_BB_ADDR_MAP &&
Sec.sh_type != ELF::SHT_LLVM_BB_ADDR_MAP_V0)
- continue;
- if (TextSectionIndex) {
- Expected<const Elf_Shdr *> TextSecOrErr = EF.getSection(Sec.sh_link);
- if (!TextSecOrErr)
- return createError("unable to get the linked-to section for " +
- describe(EF, Sec) + ": " +
- toString(TextSecOrErr.takeError()));
- if (*TextSectionIndex != std::distance(Sections.begin(), *TextSecOrErr))
- continue;
- }
- Expected<std::vector<BBAddrMap>> BBAddrMapOrErr = EF.decodeBBAddrMap(Sec);
+ return false;
+ if (!TextSectionIndex)
+ return true;
+ Expected<const Elf_Shdr *> TextSecOrErr = EF.getSection(Sec.sh_link);
+ if (!TextSecOrErr)
+ return createError("unable to get the linked-to section for " +
+ describe(EF, Sec) + ": " +
+ toString(TextSecOrErr.takeError()));
+ if (*TextSectionIndex != std::distance(Sections.begin(), *TextSecOrErr))
+ return false;
+ return true;
+ };
+
+ Expected<MapVector<const Elf_Shdr *, const Elf_Shdr *>> SectionRelocMapOrErr =
+ EF.getSectionAndRelocations(IsMatch);
+ if (!SectionRelocMapOrErr)
+ return SectionRelocMapOrErr.takeError();
+
+ for (auto const &[Sec, RelocSec] : *SectionRelocMapOrErr) {
+ if (IsRelocatable && !RelocSec)
+ return createError("unable to get relocation section for " +
+ describe(EF, *Sec));
+ Expected<std::vector<BBAddrMap>> BBAddrMapOrErr =
+ EF.decodeBBAddrMap(*Sec, RelocSec);
if (!BBAddrMapOrErr)
- return createError("unable to read " + describe(EF, Sec) + ": " +
+ return createError("unable to read " + describe(EF, *Sec) + ": " +
toString(BBAddrMapOrErr.takeError()));
std::move(BBAddrMapOrErr->begin(), BBAddrMapOrErr->end(),
std::back_inserter(BBAddrMaps));
@@ -783,8 +829,6 @@ Expected<std::vector<BBAddrMap>> ELFObjectFileBase::readBBAddrMap(
return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
if (const auto *Obj = dyn_cast<ELF32BEObjectFile>(this))
return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
- if (const auto *Obj = cast<ELF64BEObjectFile>(this))
- return readBBAddrMapImpl(Obj->getELFFile(), TextSectionIndex);
- else
- llvm_unreachable("Unsupported binary format");
+ return readBBAddrMapImpl(cast<ELF64BEObjectFile>(this)->getELFFile(),
+ TextSectionIndex);
}
diff --git a/llvm/lib/Object/GOFFObjectFile.cpp b/llvm/lib/Object/GOFFObjectFile.cpp
new file mode 100644
index 000000000000..76a13559ebfe
--- /dev/null
+++ b/llvm/lib/Object/GOFFObjectFile.cpp
@@ -0,0 +1,483 @@
+//===- GOFFObjectFile.cpp - GOFF object file implementation -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the GOFFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/GOFFObjectFile.h"
+#include "llvm/BinaryFormat/GOFF.h"
+#include "llvm/Object/GOFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/raw_ostream.h"
+
+#ifndef DEBUG_TYPE
+#define DEBUG_TYPE "goff"
+#endif
+
+using namespace llvm::object;
+using namespace llvm;
+
+Expected<std::unique_ptr<ObjectFile>>
+ObjectFile::createGOFFObjectFile(MemoryBufferRef Object) {
+ Error Err = Error::success();
+ std::unique_ptr<GOFFObjectFile> Ret(new GOFFObjectFile(Object, Err));
+ if (Err)
+ return std::move(Err);
+ return std::move(Ret);
+}
+
+GOFFObjectFile::GOFFObjectFile(MemoryBufferRef Object, Error &Err)
+ : ObjectFile(Binary::ID_GOFF, Object) {
+ ErrorAsOutParameter ErrAsOutParam(&Err);
+ // Object file isn't the right size, bail out early.
+ if ((Object.getBufferSize() % GOFF::RecordLength) != 0) {
+ Err = createStringError(
+ object_error::unexpected_eof,
+ "object file is not the right size. Must be a multiple "
+ "of 80 bytes, but is " +
+ std::to_string(Object.getBufferSize()) + " bytes");
+ return;
+ }
+ // Object file doesn't start/end with HDR/END records.
+ // Bail out early.
+ if (Object.getBufferSize() != 0) {
+ if ((base()[1] & 0xF0) >> 4 != GOFF::RT_HDR) {
+ Err = createStringError(object_error::parse_failed,
+ "object file must start with HDR record");
+ return;
+ }
+ if ((base()[Object.getBufferSize() - GOFF::RecordLength + 1] & 0xF0) >> 4 !=
+ GOFF::RT_END) {
+ Err = createStringError(object_error::parse_failed,
+ "object file must end with END record");
+ return;
+ }
+ }
+
+ SectionEntryImpl DummySection;
+ SectionList.emplace_back(DummySection); // Dummy entry at index 0.
+
+ uint8_t PrevRecordType = 0;
+ uint8_t PrevContinuationBits = 0;
+ const uint8_t *End = reinterpret_cast<const uint8_t *>(Data.getBufferEnd());
+ for (const uint8_t *I = base(); I < End; I += GOFF::RecordLength) {
+ uint8_t RecordType = (I[1] & 0xF0) >> 4;
+ bool IsContinuation = I[1] & 0x02;
+ bool PrevWasContinued = PrevContinuationBits & 0x01;
+ size_t RecordNum = (I - base()) / GOFF::RecordLength;
+
+ // If the previous record was continued, the current record should be a
+ // continuation.
+ if (PrevWasContinued && !IsContinuation) {
+ if (PrevRecordType == RecordType) {
+ Err = createStringError(object_error::parse_failed,
+ "record " + std::to_string(RecordNum) +
+ " is not a continuation record but the "
+ "preceding record is continued");
+ return;
+ }
+ }
+ // Don't parse continuations records, only parse initial record.
+ if (IsContinuation) {
+ if (RecordType != PrevRecordType) {
+ Err = createStringError(object_error::parse_failed,
+ "record " + std::to_string(RecordNum) +
+ " is a continuation record that does not "
+ "match the type of the previous record");
+ return;
+ }
+ if (!PrevWasContinued) {
+ Err = createStringError(object_error::parse_failed,
+ "record " + std::to_string(RecordNum) +
+ " is a continuation record that is not "
+ "preceded by a continued record");
+ return;
+ }
+ PrevRecordType = RecordType;
+ PrevContinuationBits = I[1] & 0x03;
+ continue;
+ }
+
+#ifndef NDEBUG
+ for (size_t J = 0; J < GOFF::RecordLength; ++J) {
+ const uint8_t *P = I + J;
+ if (J % 8 == 0)
+ dbgs() << " ";
+
+ dbgs() << format("%02hhX", *P);
+ }
+#endif
+ switch (RecordType) {
+ case GOFF::RT_ESD: {
+ // Save ESD record.
+ uint32_t EsdId;
+ ESDRecord::getEsdId(I, EsdId);
+ EsdPtrs.grow(EsdId);
+ EsdPtrs[EsdId] = I;
+
+ // Determine and save the "sections" in GOFF.
+ // A section is saved as a tuple of the form
+ // case (1): (ED,child PR)
+ // - where the PR must have non-zero length.
+ // case (2a) (ED,0)
+ // - where the ED is of non-zero length.
+ // case (2b) (ED,0)
+ // - where the ED is zero length but
+ // contains a label (LD).
+ GOFF::ESDSymbolType SymbolType;
+ ESDRecord::getSymbolType(I, SymbolType);
+ SectionEntryImpl Section;
+ uint32_t Length;
+ ESDRecord::getLength(I, Length);
+ if (SymbolType == GOFF::ESD_ST_ElementDefinition) {
+ // case (2a)
+ if (Length != 0) {
+ Section.d.a = EsdId;
+ SectionList.emplace_back(Section);
+ }
+ } else if (SymbolType == GOFF::ESD_ST_PartReference) {
+ // case (1)
+ if (Length != 0) {
+ uint32_t SymEdId;
+ ESDRecord::getParentEsdId(I, SymEdId);
+ Section.d.a = SymEdId;
+ Section.d.b = EsdId;
+ SectionList.emplace_back(Section);
+ }
+ } else if (SymbolType == GOFF::ESD_ST_LabelDefinition) {
+ // case (2b)
+ uint32_t SymEdId;
+ ESDRecord::getParentEsdId(I, SymEdId);
+ const uint8_t *SymEdRecord = EsdPtrs[SymEdId];
+ uint32_t EdLength;
+ ESDRecord::getLength(SymEdRecord, EdLength);
+ if (!EdLength) { // [ EDID, PRID ]
+ // LD child of a zero length parent ED.
+ // Add the section ED which was previously ignored.
+ Section.d.a = SymEdId;
+ SectionList.emplace_back(Section);
+ }
+ }
+ LLVM_DEBUG(dbgs() << " -- ESD " << EsdId << "\n");
+ break;
+ }
+ case GOFF::RT_END:
+ LLVM_DEBUG(dbgs() << " -- END (GOFF record type) unhandled\n");
+ break;
+ case GOFF::RT_HDR:
+ LLVM_DEBUG(dbgs() << " -- HDR (GOFF record type) unhandled\n");
+ break;
+ default:
+ llvm_unreachable("Unknown record type");
+ }
+ PrevRecordType = RecordType;
+ PrevContinuationBits = I[1] & 0x03;
+ }
+}
+
+const uint8_t *GOFFObjectFile::getSymbolEsdRecord(DataRefImpl Symb) const {
+ const uint8_t *EsdRecord = EsdPtrs[Symb.d.a];
+ return EsdRecord;
+}
+
+Expected<StringRef> GOFFObjectFile::getSymbolName(DataRefImpl Symb) const {
+ if (EsdNamesCache.count(Symb.d.a)) {
+ auto &StrPtr = EsdNamesCache[Symb.d.a];
+ return StringRef(StrPtr.second.get(), StrPtr.first);
+ }
+
+ SmallString<256> SymbolName;
+ if (auto Err = ESDRecord::getData(getSymbolEsdRecord(Symb), SymbolName))
+ return std::move(Err);
+
+ SmallString<256> SymbolNameConverted;
+ ConverterEBCDIC::convertToUTF8(SymbolName, SymbolNameConverted);
+
+ size_t Size = SymbolNameConverted.size();
+ auto StrPtr = std::make_pair(Size, std::make_unique<char[]>(Size));
+ char *Buf = StrPtr.second.get();
+ memcpy(Buf, SymbolNameConverted.data(), Size);
+ EsdNamesCache[Symb.d.a] = std::move(StrPtr);
+ return StringRef(Buf, Size);
+}
+
+Expected<StringRef> GOFFObjectFile::getSymbolName(SymbolRef Symbol) const {
+ return getSymbolName(Symbol.getRawDataRefImpl());
+}
+
+Expected<uint64_t> GOFFObjectFile::getSymbolAddress(DataRefImpl Symb) const {
+ uint32_t Offset;
+ const uint8_t *EsdRecord = getSymbolEsdRecord(Symb);
+ ESDRecord::getOffset(EsdRecord, Offset);
+ return static_cast<uint64_t>(Offset);
+}
+
+uint64_t GOFFObjectFile::getSymbolValueImpl(DataRefImpl Symb) const {
+ uint32_t Offset;
+ const uint8_t *EsdRecord = getSymbolEsdRecord(Symb);
+ ESDRecord::getOffset(EsdRecord, Offset);
+ return static_cast<uint64_t>(Offset);
+}
+
+uint64_t GOFFObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
+ return 0;
+}
+
+bool GOFFObjectFile::isSymbolUnresolved(DataRefImpl Symb) const {
+ const uint8_t *Record = getSymbolEsdRecord(Symb);
+ GOFF::ESDSymbolType SymbolType;
+ ESDRecord::getSymbolType(Record, SymbolType);
+
+ if (SymbolType == GOFF::ESD_ST_ExternalReference)
+ return true;
+ if (SymbolType == GOFF::ESD_ST_PartReference) {
+ uint32_t Length;
+ ESDRecord::getLength(Record, Length);
+ if (Length == 0)
+ return true;
+ }
+ return false;
+}
+
+bool GOFFObjectFile::isSymbolIndirect(DataRefImpl Symb) const {
+ const uint8_t *Record = getSymbolEsdRecord(Symb);
+ bool Indirect;
+ ESDRecord::getIndirectReference(Record, Indirect);
+ return Indirect;
+}
+
+Expected<uint32_t> GOFFObjectFile::getSymbolFlags(DataRefImpl Symb) const {
+ uint32_t Flags = 0;
+ if (isSymbolUnresolved(Symb))
+ Flags |= SymbolRef::SF_Undefined;
+
+ const uint8_t *Record = getSymbolEsdRecord(Symb);
+
+ GOFF::ESDBindingStrength BindingStrength;
+ ESDRecord::getBindingStrength(Record, BindingStrength);
+ if (BindingStrength == GOFF::ESD_BST_Weak)
+ Flags |= SymbolRef::SF_Weak;
+
+ GOFF::ESDBindingScope BindingScope;
+ ESDRecord::getBindingScope(Record, BindingScope);
+
+ if (BindingScope != GOFF::ESD_BSC_Section) {
+ Expected<StringRef> Name = getSymbolName(Symb);
+ if (Name && *Name != " ") { // Blank name is local.
+ Flags |= SymbolRef::SF_Global;
+ if (BindingScope == GOFF::ESD_BSC_ImportExport)
+ Flags |= SymbolRef::SF_Exported;
+ else if (!(Flags & SymbolRef::SF_Undefined))
+ Flags |= SymbolRef::SF_Hidden;
+ }
+ }
+
+ return Flags;
+}
+
+Expected<SymbolRef::Type>
+GOFFObjectFile::getSymbolType(DataRefImpl Symb) const {
+ const uint8_t *Record = getSymbolEsdRecord(Symb);
+ GOFF::ESDSymbolType SymbolType;
+ ESDRecord::getSymbolType(Record, SymbolType);
+ GOFF::ESDExecutable Executable;
+ ESDRecord::getExecutable(Record, Executable);
+
+ if (SymbolType != GOFF::ESD_ST_SectionDefinition &&
+ SymbolType != GOFF::ESD_ST_ElementDefinition &&
+ SymbolType != GOFF::ESD_ST_LabelDefinition &&
+ SymbolType != GOFF::ESD_ST_PartReference &&
+ SymbolType != GOFF::ESD_ST_ExternalReference) {
+ uint32_t EsdId;
+ ESDRecord::getEsdId(Record, EsdId);
+ return createStringError(llvm::errc::invalid_argument,
+ "ESD record %" PRIu32
+ " has invalid symbol type 0x%02" PRIX8,
+ EsdId, SymbolType);
+ }
+ switch (SymbolType) {
+ case GOFF::ESD_ST_SectionDefinition:
+ case GOFF::ESD_ST_ElementDefinition:
+ return SymbolRef::ST_Other;
+ case GOFF::ESD_ST_LabelDefinition:
+ case GOFF::ESD_ST_PartReference:
+ case GOFF::ESD_ST_ExternalReference:
+ if (Executable != GOFF::ESD_EXE_CODE && Executable != GOFF::ESD_EXE_DATA &&
+ Executable != GOFF::ESD_EXE_Unspecified) {
+ uint32_t EsdId;
+ ESDRecord::getEsdId(Record, EsdId);
+ return createStringError(llvm::errc::invalid_argument,
+ "ESD record %" PRIu32
+ " has unknown Executable type 0x%02X",
+ EsdId, Executable);
+ }
+ switch (Executable) {
+ case GOFF::ESD_EXE_CODE:
+ return SymbolRef::ST_Function;
+ case GOFF::ESD_EXE_DATA:
+ return SymbolRef::ST_Data;
+ case GOFF::ESD_EXE_Unspecified:
+ return SymbolRef::ST_Unknown;
+ }
+ llvm_unreachable("Unhandled ESDExecutable");
+ }
+ llvm_unreachable("Unhandled ESDSymbolType");
+}
+
+Expected<section_iterator>
+GOFFObjectFile::getSymbolSection(DataRefImpl Symb) const {
+ DataRefImpl Sec;
+
+ if (isSymbolUnresolved(Symb))
+ return section_iterator(SectionRef(Sec, this));
+
+ const uint8_t *SymEsdRecord = EsdPtrs[Symb.d.a];
+ uint32_t SymEdId;
+ ESDRecord::getParentEsdId(SymEsdRecord, SymEdId);
+ const uint8_t *SymEdRecord = EsdPtrs[SymEdId];
+
+ for (size_t I = 0, E = SectionList.size(); I < E; ++I) {
+ bool Found;
+ const uint8_t *SectionPrRecord = getSectionPrEsdRecord(I);
+ if (SectionPrRecord) {
+ Found = SymEsdRecord == SectionPrRecord;
+ } else {
+ const uint8_t *SectionEdRecord = getSectionEdEsdRecord(I);
+ Found = SymEdRecord == SectionEdRecord;
+ }
+
+ if (Found) {
+ Sec.d.a = I;
+ return section_iterator(SectionRef(Sec, this));
+ }
+ }
+ return createStringError(llvm::errc::invalid_argument,
+ "symbol with ESD id " + std::to_string(Symb.d.a) +
+ " refers to invalid section with ESD id " +
+ std::to_string(SymEdId));
+}
+
+const uint8_t *GOFFObjectFile::getSectionEdEsdRecord(DataRefImpl &Sec) const {
+ SectionEntryImpl EsdIds = SectionList[Sec.d.a];
+ const uint8_t *EsdRecord = EsdPtrs[EsdIds.d.a];
+ return EsdRecord;
+}
+
+const uint8_t *GOFFObjectFile::getSectionPrEsdRecord(DataRefImpl &Sec) const {
+ SectionEntryImpl EsdIds = SectionList[Sec.d.a];
+ const uint8_t *EsdRecord = nullptr;
+ if (EsdIds.d.b)
+ EsdRecord = EsdPtrs[EsdIds.d.b];
+ return EsdRecord;
+}
+
+const uint8_t *
+GOFFObjectFile::getSectionEdEsdRecord(uint32_t SectionIndex) const {
+ DataRefImpl Sec;
+ Sec.d.a = SectionIndex;
+ const uint8_t *EsdRecord = getSectionEdEsdRecord(Sec);
+ return EsdRecord;
+}
+
+const uint8_t *
+GOFFObjectFile::getSectionPrEsdRecord(uint32_t SectionIndex) const {
+ DataRefImpl Sec;
+ Sec.d.a = SectionIndex;
+ const uint8_t *EsdRecord = getSectionPrEsdRecord(Sec);
+ return EsdRecord;
+}
+
+section_iterator GOFFObjectFile::section_begin() const {
+ DataRefImpl Sec;
+ moveSectionNext(Sec);
+ return section_iterator(SectionRef(Sec, this));
+}
+
+section_iterator GOFFObjectFile::section_end() const {
+ DataRefImpl Sec;
+ return section_iterator(SectionRef(Sec, this));
+}
+
+void GOFFObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
+ for (uint32_t I = Symb.d.a + 1, E = EsdPtrs.size(); I < E; ++I) {
+ if (EsdPtrs[I]) {
+ const uint8_t *EsdRecord = EsdPtrs[I];
+ GOFF::ESDSymbolType SymbolType;
+ ESDRecord::getSymbolType(EsdRecord, SymbolType);
+ // Skip EDs - i.e. section symbols.
+ bool IgnoreSpecialGOFFSymbols = true;
+ bool SkipSymbol = ((SymbolType == GOFF::ESD_ST_ElementDefinition) ||
+ (SymbolType == GOFF::ESD_ST_SectionDefinition)) &&
+ IgnoreSpecialGOFFSymbols;
+ if (!SkipSymbol) {
+ Symb.d.a = I;
+ return;
+ }
+ }
+ }
+ Symb.d.a = 0;
+}
+
+basic_symbol_iterator GOFFObjectFile::symbol_begin() const {
+ DataRefImpl Symb;
+ moveSymbolNext(Symb);
+ return basic_symbol_iterator(SymbolRef(Symb, this));
+}
+
+basic_symbol_iterator GOFFObjectFile::symbol_end() const {
+ DataRefImpl Symb;
+ return basic_symbol_iterator(SymbolRef(Symb, this));
+}
+
+Error Record::getContinuousData(const uint8_t *Record, uint16_t DataLength,
+ int DataIndex, SmallString<256> &CompleteData) {
+ // First record.
+ const uint8_t *Slice = Record + DataIndex;
+ size_t SliceLength =
+ std::min(DataLength, (uint16_t)(GOFF::RecordLength - DataIndex));
+ CompleteData.append(Slice, Slice + SliceLength);
+ DataLength -= SliceLength;
+ Slice += SliceLength;
+
+ // Continuation records.
+ for (; DataLength > 0;
+ DataLength -= SliceLength, Slice += GOFF::PayloadLength) {
+ // Slice points to the start of the new record.
+ // Check that this block is a Continuation.
+ assert(Record::isContinuation(Slice) && "Continuation bit must be set");
+ // Check that the last Continuation is terminated correctly.
+ if (DataLength <= 77 && Record::isContinued(Slice))
+ return createStringError(object_error::parse_failed,
+ "continued bit should not be set");
+
+ SliceLength = std::min(DataLength, (uint16_t)GOFF::PayloadLength);
+ Slice += GOFF::RecordPrefixLength;
+ CompleteData.append(Slice, Slice + SliceLength);
+ }
+ return Error::success();
+}
+
+Error HDRRecord::getData(const uint8_t *Record,
+ SmallString<256> &CompleteData) {
+ uint16_t Length = getPropertyModuleLength(Record);
+ return getContinuousData(Record, Length, 60, CompleteData);
+}
+
+Error ESDRecord::getData(const uint8_t *Record,
+ SmallString<256> &CompleteData) {
+ uint16_t DataSize = getNameLength(Record);
+ return getContinuousData(Record, DataSize, 72, CompleteData);
+}
+
+Error ENDRecord::getData(const uint8_t *Record,
+ SmallString<256> &CompleteData) {
+ uint16_t Length = getNameLength(Record);
+ return getContinuousData(Record, Length, 26, CompleteData);
+}
diff --git a/llvm/lib/Object/IRSymtab.cpp b/llvm/lib/Object/IRSymtab.cpp
index 54ee000b302f..14db7a10f310 100644
--- a/llvm/lib/Object/IRSymtab.cpp
+++ b/llvm/lib/Object/IRSymtab.cpp
@@ -13,7 +13,6 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Comdat.h"
@@ -33,6 +32,7 @@
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/VCSRevision.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <string>
#include <utility>
@@ -41,8 +41,8 @@
using namespace llvm;
using namespace irsymtab;
-cl::opt<bool> DisableBitcodeVersionUpgrade(
- "disable-bitcode-version-upgrade", cl::init(false), cl::Hidden,
+static cl::opt<bool> DisableBitcodeVersionUpgrade(
+ "disable-bitcode-version-upgrade", cl::Hidden,
cl::desc("Disable automatic bitcode upgrade for version mismatch"));
static const char *PreservedSymbols[] = {
@@ -259,7 +259,7 @@ Error Builder::addSymbol(const ModuleSymbolTable &Msymtab,
Sym.Flags |= 1 << storage::Symbol::FB_executable;
Sym.ComdatIndex = -1;
- auto *GV = Msym.dyn_cast<GlobalValue *>();
+ auto *GV = dyn_cast_if_present<GlobalValue *>(Msym);
if (!GV) {
// Undefined module asm symbols act as GC roots and are implicitly used.
if (Flags & object::BasicSymbolRef::SF_Undefined)
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 9c0b85cf7416..6ca83a955d5a 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/bit.h"
#include "llvm/BinaryFormat/MachO.h"
@@ -32,12 +31,13 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
diff --git a/llvm/lib/Object/MachOUniversalWriter.cpp b/llvm/lib/Object/MachOUniversalWriter.cpp
index 333706baf8c1..909a10b2c072 100644
--- a/llvm/lib/Object/MachOUniversalWriter.cpp
+++ b/llvm/lib/Object/MachOUniversalWriter.cpp
@@ -14,7 +14,6 @@
#include "llvm/Object/MachOUniversalWriter.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/IRObjectFile.h"
@@ -27,6 +26,7 @@
#include "llvm/Support/MemoryBufferRef.h"
#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
using namespace object;
@@ -54,8 +54,8 @@ static uint32_t calculateFileAlignment(const MachOObjectFile &O) {
}
} else {
P2CurrentAlignment =
- countTrailingZeros(Is64Bit ? O.getSegment64LoadCommand(LC).vmaddr
- : O.getSegmentLoadCommand(LC).vmaddr);
+ llvm::countr_zero(Is64Bit ? O.getSegment64LoadCommand(LC).vmaddr
+ : O.getSegmentLoadCommand(LC).vmaddr);
}
P2MinAlignment = std::min(P2MinAlignment, P2CurrentAlignment);
}
diff --git a/llvm/lib/Object/ModuleSymbolTable.cpp b/llvm/lib/Object/ModuleSymbolTable.cpp
index 11274a7fcc16..0290a819e5de 100644
--- a/llvm/lib/Object/ModuleSymbolTable.cpp
+++ b/llvm/lib/Object/ModuleSymbolTable.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
@@ -42,6 +41,7 @@
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -174,12 +174,12 @@ void ModuleSymbolTable::CollectAsmSymvers(
}
void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const {
- if (S.is<AsmSymbol *>()) {
- OS << S.get<AsmSymbol *>()->first;
+ if (isa<AsmSymbol *>(S)) {
+ OS << cast<AsmSymbol *>(S)->first;
return;
}
- auto *GV = S.get<GlobalValue *>();
+ auto *GV = cast<GlobalValue *>(S);
if (GV->hasDLLImportStorageClass())
OS << "__imp_";
@@ -187,10 +187,10 @@ void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const {
}
uint32_t ModuleSymbolTable::getSymbolFlags(Symbol S) const {
- if (S.is<AsmSymbol *>())
- return S.get<AsmSymbol *>()->second;
+ if (isa<AsmSymbol *>(S))
+ return cast<AsmSymbol *>(S)->second;
- auto *GV = S.get<GlobalValue *>();
+ auto *GV = cast<GlobalValue *>(S);
uint32_t Res = BasicSymbolRef::SF_None;
if (GV->isDeclarationForLinker())
diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp
index 56a1d09097d4..0820187f32e1 100644
--- a/llvm/lib/Object/ObjectFile.cpp
+++ b/llvm/lib/Object/ObjectFile.cpp
@@ -79,7 +79,7 @@ uint32_t ObjectFile::getSymbolAlignment(DataRefImpl DRI) const { return 0; }
bool ObjectFile::isSectionBitcode(DataRefImpl Sec) const {
Expected<StringRef> NameOrErr = getSectionName(Sec);
if (NameOrErr)
- return *NameOrErr == ".llvmbc";
+ return *NameOrErr == ".llvmbc" || *NameOrErr == ".llvm.lto";
consumeError(NameOrErr.takeError());
return false;
}
@@ -130,6 +130,10 @@ Triple ObjectFile::makeTriple() const {
TheTriple.setOS(Triple::AIX);
TheTriple.setObjectFormat(Triple::XCOFF);
}
+ else if (isGOFF()) {
+ TheTriple.setOS(Triple::ZOS);
+ TheTriple.setObjectFormat(Triple::GOFF);
+ }
return TheTriple;
}
diff --git a/llvm/lib/Object/OffloadBinary.cpp b/llvm/lib/Object/OffloadBinary.cpp
index d8cdcdc21d39..342327daf7e4 100644
--- a/llvm/lib/Object/OffloadBinary.cpp
+++ b/llvm/lib/Object/OffloadBinary.cpp
@@ -209,8 +209,8 @@ OffloadBinary::write(const OffloadingImage &OffloadingData) {
// Create a null-terminated string table with all the used strings.
StringTableBuilder StrTab(StringTableBuilder::ELF);
for (auto &KeyAndValue : OffloadingData.StringData) {
- StrTab.add(KeyAndValue.getKey());
- StrTab.add(KeyAndValue.getValue());
+ StrTab.add(KeyAndValue.first);
+ StrTab.add(KeyAndValue.second);
}
StrTab.finalize();
@@ -250,8 +250,8 @@ OffloadBinary::write(const OffloadingImage &OffloadingData) {
OS << StringRef(reinterpret_cast<char *>(&TheEntry), sizeof(Entry));
for (auto &KeyAndValue : OffloadingData.StringData) {
uint64_t Offset = sizeof(Header) + sizeof(Entry) + StringEntrySize;
- StringEntry Map{Offset + StrTab.getOffset(KeyAndValue.getKey()),
- Offset + StrTab.getOffset(KeyAndValue.getValue())};
+ StringEntry Map{Offset + StrTab.getOffset(KeyAndValue.first),
+ Offset + StrTab.getOffset(KeyAndValue.second)};
OS << StringRef(reinterpret_cast<char *>(&Map), sizeof(StringEntry));
}
StrTab.write(OS);
diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
index 13a7a9851137..03ac59289528 100644
--- a/llvm/lib/Object/RelocationResolver.cpp
+++ b/llvm/lib/Object/RelocationResolver.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Object/RelocationResolver.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -24,6 +23,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <vector>
@@ -252,6 +252,19 @@ static uint64_t resolveSparc64(uint64_t Type, uint64_t Offset, uint64_t S,
}
}
+/// Returns true if \c Obj is an AMDGPU code object based solely on the value
+/// of e_machine.
+///
+/// AMDGPU code objects with an e_machine of EF_AMDGPU_MACH_NONE do not
+/// identify their arch as either r600 or amdgcn, but we can still handle
+/// their relocations. When we identify an ELF object with an UnknownArch,
+/// we use isAMDGPU to check for this case.
+static bool isAMDGPU(const ObjectFile &Obj) {
+ if (const auto *ELFObj = dyn_cast<ELFObjectFileBase>(&Obj))
+ return ELFObj->getEMachine() == ELF::EM_AMDGPU;
+ return false;
+}
+
static bool supportsAmdgpu(uint64_t Type) {
switch (Type) {
case ELF::R_AMDGPU_ABS32:
@@ -789,6 +802,8 @@ getRelocationResolver(const ObjectFile &Obj) {
case Triple::riscv64:
return {supportsRISCV, resolveRISCV};
default:
+ if (isAMDGPU(Obj))
+ return {supportsAmdgpu, resolveAmdgpu};
return {nullptr, nullptr};
}
}
@@ -821,11 +836,15 @@ getRelocationResolver(const ObjectFile &Obj) {
return {supportsSparc32, resolveSparc32};
case Triple::hexagon:
return {supportsHexagon, resolveHexagon};
+ case Triple::r600:
+ return {supportsAmdgpu, resolveAmdgpu};
case Triple::riscv32:
return {supportsRISCV, resolveRISCV};
case Triple::csky:
return {supportsCSKY, resolveCSKY};
default:
+ if (isAMDGPU(Obj))
+ return {supportsAmdgpu, resolveAmdgpu};
return {nullptr, nullptr};
}
} else if (Obj.isMachO()) {
diff --git a/llvm/lib/Object/TapiFile.cpp b/llvm/lib/Object/TapiFile.cpp
index 596445a09e85..b5f4d277bbfe 100644
--- a/llvm/lib/Object/TapiFile.cpp
+++ b/llvm/lib/Object/TapiFile.cpp
@@ -37,35 +37,46 @@ static uint32_t getFlags(const Symbol *Sym) {
return Flags;
}
-TapiFile::TapiFile(MemoryBufferRef Source, const InterfaceFile &interface,
+static SymbolRef::Type getType(const Symbol *Sym) {
+ SymbolRef::Type Type = SymbolRef::ST_Unknown;
+ if (Sym->isData())
+ Type = SymbolRef::ST_Data;
+ else if (Sym->isText())
+ Type = SymbolRef::ST_Function;
+
+ return Type;
+}
+
+TapiFile::TapiFile(MemoryBufferRef Source, const InterfaceFile &Interface,
Architecture Arch)
: SymbolicFile(ID_TapiFile, Source), Arch(Arch) {
- for (const auto *Symbol : interface.symbols()) {
+ for (const auto *Symbol : Interface.symbols()) {
if (!Symbol->getArchitectures().has(Arch))
continue;
switch (Symbol->getKind()) {
case SymbolKind::GlobalSymbol:
- Symbols.emplace_back(StringRef(), Symbol->getName(), getFlags(Symbol));
+ Symbols.emplace_back(StringRef(), Symbol->getName(), getFlags(Symbol),
+ ::getType(Symbol));
break;
case SymbolKind::ObjectiveCClass:
- if (interface.getPlatforms().count(PLATFORM_MACOS) && Arch == AK_i386) {
+ if (Interface.getPlatforms().count(PLATFORM_MACOS) && Arch == AK_i386) {
Symbols.emplace_back(ObjC1ClassNamePrefix, Symbol->getName(),
- getFlags(Symbol));
+ getFlags(Symbol), ::getType(Symbol));
} else {
Symbols.emplace_back(ObjC2ClassNamePrefix, Symbol->getName(),
- getFlags(Symbol));
+ getFlags(Symbol), ::getType(Symbol));
Symbols.emplace_back(ObjC2MetaClassNamePrefix, Symbol->getName(),
- getFlags(Symbol));
+ getFlags(Symbol), ::getType(Symbol));
}
break;
case SymbolKind::ObjectiveCClassEHType:
Symbols.emplace_back(ObjC2EHTypePrefix, Symbol->getName(),
- getFlags(Symbol));
+ getFlags(Symbol), ::getType(Symbol));
break;
case SymbolKind::ObjectiveCInstanceVariable:
- Symbols.emplace_back(ObjC2IVarPrefix, Symbol->getName(),
- getFlags(Symbol));
+ Symbols.emplace_back(ObjC2IVarPrefix, Symbol->getName(), getFlags(Symbol),
+ ::getType(Symbol));
break;
}
}
@@ -82,6 +93,11 @@ Error TapiFile::printSymbolName(raw_ostream &OS, DataRefImpl DRI) const {
return Error::success();
}
+Expected<SymbolRef::Type> TapiFile::getSymbolType(DataRefImpl DRI) const {
+ assert(DRI.d.a < Symbols.size() && "Attempt to access symbol out of bounds");
+ return Symbols[DRI.d.a].Type;
+}
+
Expected<uint32_t> TapiFile::getSymbolFlags(DataRefImpl DRI) const {
assert(DRI.d.a < Symbols.size() && "Attempt to access symbol out of bounds");
return Symbols[DRI.d.a].Flags;
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index 1e98de7cd42e..a72242bc4ac2 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -12,9 +12,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/Wasm.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/Error.h"
#include "llvm/Object/ObjectFile.h"
@@ -25,6 +23,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -38,7 +38,18 @@ using namespace object;
void WasmSymbol::print(raw_ostream &Out) const {
Out << "Name=" << Info.Name
<< ", Kind=" << toString(wasm::WasmSymbolType(Info.Kind)) << ", Flags=0x"
- << Twine::utohexstr(Info.Flags);
+ << Twine::utohexstr(Info.Flags) << " [";
+ switch (getBinding()) {
+ case wasm::WASM_SYMBOL_BINDING_GLOBAL: Out << "global"; break;
+ case wasm::WASM_SYMBOL_BINDING_LOCAL: Out << "local"; break;
+ case wasm::WASM_SYMBOL_BINDING_WEAK: Out << "weak"; break;
+ }
+ if (isHidden()) {
+ Out << ", hidden";
+ } else {
+ Out << ", default";
+ }
+ Out << "]";
if (!isTypeData()) {
Out << ", ElemIndex=" << Info.ElementIndex;
} else if (isDefined()) {
@@ -937,6 +948,7 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
Reloc.Index = readVaruint32(Ctx);
switch (type) {
case wasm::R_WASM_FUNCTION_INDEX_LEB:
+ case wasm::R_WASM_FUNCTION_INDEX_I32:
case wasm::R_WASM_TABLE_INDEX_SLEB:
case wasm::R_WASM_TABLE_INDEX_SLEB64:
case wasm::R_WASM_TABLE_INDEX_I32:
@@ -1034,6 +1046,7 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
Reloc.Type == wasm::R_WASM_MEMORY_ADDR_LOCREL_I32 ||
Reloc.Type == wasm::R_WASM_SECTION_OFFSET_I32 ||
Reloc.Type == wasm::R_WASM_FUNCTION_OFFSET_I32 ||
+ Reloc.Type == wasm::R_WASM_FUNCTION_INDEX_I32 ||
Reloc.Type == wasm::R_WASM_GLOBAL_INDEX_I32)
Size = 4;
if (Reloc.Type == wasm::R_WASM_TABLE_INDEX_I64 ||
diff --git a/llvm/lib/Object/WindowsMachineFlag.cpp b/llvm/lib/Object/WindowsMachineFlag.cpp
index 8335ea745548..b9f818775768 100644
--- a/llvm/lib/Object/WindowsMachineFlag.cpp
+++ b/llvm/lib/Object/WindowsMachineFlag.cpp
@@ -27,6 +27,7 @@ COFF::MachineTypes llvm::getMachineType(StringRef S) {
.Case("arm", COFF::IMAGE_FILE_MACHINE_ARMNT)
.Case("arm64", COFF::IMAGE_FILE_MACHINE_ARM64)
.Case("arm64ec", COFF::IMAGE_FILE_MACHINE_ARM64EC)
+ .Case("arm64x", COFF::IMAGE_FILE_MACHINE_ARM64X)
.Default(COFF::IMAGE_FILE_MACHINE_UNKNOWN);
}
@@ -38,6 +39,8 @@ StringRef llvm::machineToStr(COFF::MachineTypes MT) {
return "arm64";
case COFF::IMAGE_FILE_MACHINE_ARM64EC:
return "arm64ec";
+ case COFF::IMAGE_FILE_MACHINE_ARM64X:
+ return "arm64x";
case COFF::IMAGE_FILE_MACHINE_AMD64:
return "x64";
case COFF::IMAGE_FILE_MACHINE_I386:
diff --git a/llvm/lib/Object/WindowsResource.cpp b/llvm/lib/Object/WindowsResource.cpp
index 089a3fa0f91f..0764dc8f7523 100644
--- a/llvm/lib/Object/WindowsResource.cpp
+++ b/llvm/lib/Object/WindowsResource.cpp
@@ -990,6 +990,7 @@ void WindowsResourceCOFFWriter::writeFirstSectionRelocations() {
break;
case COFF::IMAGE_FILE_MACHINE_ARM64:
case COFF::IMAGE_FILE_MACHINE_ARM64EC:
+ case COFF::IMAGE_FILE_MACHINE_ARM64X:
Reloc->Type = COFF::IMAGE_REL_ARM64_ADDR32NB;
break;
default:
diff --git a/llvm/lib/Object/XCOFFObjectFile.cpp b/llvm/lib/Object/XCOFFObjectFile.cpp
index 68baefcd6eaa..fa4917e354e9 100644
--- a/llvm/lib/Object/XCOFFObjectFile.cpp
+++ b/llvm/lib/Object/XCOFFObjectFile.cpp
@@ -12,8 +12,8 @@
#include "llvm/Object/XCOFFObjectFile.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/DataExtractor.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <cstddef>
#include <cstring>
@@ -1217,6 +1217,10 @@ ObjectFile::createXCOFFObjectFile(MemoryBufferRef MemBufRef,
return XCOFFObjectFile::create(FileType, MemBufRef);
}
+std::optional<StringRef> XCOFFObjectFile::tryGetCPUName() const {
+ return StringRef("future");
+}
+
bool XCOFFSymbolRef::isFunction() const {
if (!isCsectSymbol())
return false;
@@ -1394,18 +1398,18 @@ bool TBVectorExt::hasVMXInstruction() const {
#undef GETVALUEWITHMASK
#undef GETVALUEWITHMASKSHIFT
-Expected<XCOFFTracebackTable> XCOFFTracebackTable::create(const uint8_t *Ptr,
- uint64_t &Size) {
+Expected<XCOFFTracebackTable>
+XCOFFTracebackTable::create(const uint8_t *Ptr, uint64_t &Size, bool Is64Bit) {
Error Err = Error::success();
- XCOFFTracebackTable TBT(Ptr, Size, Err);
+ XCOFFTracebackTable TBT(Ptr, Size, Err, Is64Bit);
if (Err)
return std::move(Err);
return TBT;
}
XCOFFTracebackTable::XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size,
- Error &Err)
- : TBPtr(Ptr) {
+ Error &Err, bool Is64Bit)
+ : TBPtr(Ptr), Is64BitObj(Is64Bit) {
ErrorAsOutParameter EAO(&Err);
DataExtractor DE(ArrayRef<uint8_t>(Ptr, Size), /*IsLittleEndian=*/false,
/*AddressSize=*/0);
@@ -1460,6 +1464,8 @@ XCOFFTracebackTable::XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size,
}
VecExt = TBVecExtOrErr.get();
VectorParmsNum = VecExt->getNumberOfVectorParms();
+ // Skip two bytes of padding after vector info.
+ DE.skip(Cur, 2);
}
}
@@ -1480,9 +1486,15 @@ XCOFFTracebackTable::XCOFFTracebackTable(const uint8_t *Ptr, uint64_t &Size,
ParmsType = ParmsTypeOrError.get();
}
- if (Cur && hasExtensionTable())
+ if (Cur && hasExtensionTable()) {
ExtensionTable = DE.getU8(Cur);
+ if (*ExtensionTable & ExtendedTBTableFlag::TB_EH_INFO) {
+ // eh_info displacement must be 4-byte aligned.
+ Cur.seek(alignTo(Cur.tell(), 4));
+ EhInfoDisp = Is64BitObj ? DE.getU64(Cur) : DE.getU32(Cur);
+ }
+ }
if (!Cur)
Err = Cur.takeError();
diff --git a/llvm/lib/ObjectYAML/COFFEmitter.cpp b/llvm/lib/ObjectYAML/COFFEmitter.cpp
index a57da9b3287d..2e72e4fa7f49 100644
--- a/llvm/lib/ObjectYAML/COFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/COFFEmitter.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
-#include "llvm/Object/COFF.h"
#include "llvm/ObjectYAML/ObjectYAML.h"
#include "llvm/ObjectYAML/yaml2obj.h"
#include "llvm/Support/BinaryStreamWriter.h"
@@ -48,11 +47,7 @@ struct COFFParser {
}
bool isPE() const { return Obj.OptionalHeader.has_value(); }
- bool is64Bit() const {
- return Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_AMD64 ||
- Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64 ||
- Obj.Header.Machine == COFF::IMAGE_FILE_MACHINE_ARM64EC;
- }
+ bool is64Bit() const { return COFF::is64Bit(Obj.Header.Machine); }
uint32_t getFileAlignment() const {
return Obj.OptionalHeader->Header.FileAlignment;
@@ -243,10 +238,13 @@ static bool layoutCOFF(COFFParser &CP) {
S.SectionData = CodeViewYAML::toDebugH(*S.DebugH, CP.Allocator);
}
- if (S.SectionData.binary_size() > 0) {
+ size_t DataSize = S.SectionData.binary_size();
+ for (auto E : S.StructuredData)
+ DataSize += E.size();
+ if (DataSize > 0) {
CurrentSectionDataOffset = alignTo(CurrentSectionDataOffset,
CP.isPE() ? CP.getFileAlignment() : 4);
- S.Header.SizeOfRawData = S.SectionData.binary_size();
+ S.Header.SizeOfRawData = DataSize;
if (CP.isPE())
S.Header.SizeOfRawData =
alignTo(S.Header.SizeOfRawData, CP.getFileAlignment());
@@ -497,9 +495,12 @@ static bool writeCOFF(COFFParser &CP, raw_ostream &OS) {
continue;
assert(S.Header.PointerToRawData >= OS.tell());
OS.write_zeros(S.Header.PointerToRawData - OS.tell());
+ for (auto E : S.StructuredData)
+ E.writeAsBinary(OS);
S.SectionData.writeAsBinary(OS);
assert(S.Header.SizeOfRawData >= S.SectionData.binary_size());
- OS.write_zeros(S.Header.SizeOfRawData - S.SectionData.binary_size());
+ OS.write_zeros(S.Header.PointerToRawData + S.Header.SizeOfRawData -
+ OS.tell());
if (S.Header.Characteristics & COFF::IMAGE_SCN_LNK_NRELOC_OVFL)
OS << binary_le<uint32_t>(/*VirtualAddress=*/ S.Relocations.size() + 1)
<< binary_le<uint32_t>(/*SymbolTableIndex=*/ 0)
@@ -589,6 +590,34 @@ static bool writeCOFF(COFFParser &CP, raw_ostream &OS) {
return true;
}
+size_t COFFYAML::SectionDataEntry::size() const {
+ size_t Size = Binary.binary_size();
+ if (UInt32)
+ Size += sizeof(*UInt32);
+ if (LoadConfig32)
+ Size += LoadConfig32->Size;
+ if (LoadConfig64)
+ Size += LoadConfig64->Size;
+ return Size;
+}
+
+template <typename T> static void writeLoadConfig(T &S, raw_ostream &OS) {
+ OS.write(reinterpret_cast<const char *>(&S),
+ std::min(sizeof(S), static_cast<size_t>(S.Size)));
+ if (sizeof(S) < S.Size)
+ OS.write_zeros(S.Size - sizeof(S));
+}
+
+void COFFYAML::SectionDataEntry::writeAsBinary(raw_ostream &OS) const {
+ if (UInt32)
+ OS << binary_le(*UInt32);
+ Binary.writeAsBinary(OS);
+ if (LoadConfig32)
+ writeLoadConfig(*LoadConfig32, OS);
+ if (LoadConfig64)
+ writeLoadConfig(*LoadConfig64, OS);
+}
+
namespace llvm {
namespace yaml {
diff --git a/llvm/lib/ObjectYAML/COFFYAML.cpp b/llvm/lib/ObjectYAML/COFFYAML.cpp
index 2fa0433a24f8..3fe2ea5af08f 100644
--- a/llvm/lib/ObjectYAML/COFFYAML.cpp
+++ b/llvm/lib/ObjectYAML/COFFYAML.cpp
@@ -66,6 +66,7 @@ void ScalarEnumerationTraits<COFF::MachineTypes>::enumeration(
ECase(IMAGE_FILE_MACHINE_ARMNT);
ECase(IMAGE_FILE_MACHINE_ARM64);
ECase(IMAGE_FILE_MACHINE_ARM64EC);
+ ECase(IMAGE_FILE_MACHINE_ARM64X);
ECase(IMAGE_FILE_MACHINE_EBC);
ECase(IMAGE_FILE_MACHINE_I386);
ECase(IMAGE_FILE_MACHINE_IA64);
@@ -430,8 +431,7 @@ void MappingTraits<COFFYAML::Relocation>::mapping(IO &IO,
MappingNormalization<NType<COFF::RelocationTypesARM>, uint16_t> NT(
IO, Rel.Type);
IO.mapRequired("Type", NT->Type);
- } else if (H.Machine == COFF::IMAGE_FILE_MACHINE_ARM64 ||
- H.Machine == COFF::IMAGE_FILE_MACHINE_ARM64EC) {
+ } else if (COFF::isAnyArm64(H.Machine)) {
MappingNormalization<NType<COFF::RelocationTypesARM64>, uint16_t> NT(
IO, Rel.Type);
IO.mapRequired("Type", NT->Type);
@@ -547,6 +547,102 @@ void MappingTraits<COFF::AuxiliaryCLRToken>::mapping(
IO.mapRequired("SymbolTableIndex", ACT.SymbolTableIndex);
}
+void MappingTraits<object::coff_load_config_code_integrity>::mapping(
+ IO &IO, object::coff_load_config_code_integrity &S) {
+ IO.mapOptional("Flags", S.Flags);
+ IO.mapOptional("Catalog", S.Catalog);
+ IO.mapOptional("CatalogOffset", S.CatalogOffset);
+}
+
+template <typename T, typename M>
+void mapLoadConfigMember(IO &IO, T &LoadConfig, const char *Name, M &Member) {
+ // Map only members that match a specified size.
+ if (reinterpret_cast<char *>(&Member) -
+ reinterpret_cast<char *>(&LoadConfig) <
+ LoadConfig.Size)
+ IO.mapOptional(Name, Member);
+}
+
+template <typename T> void mapLoadConfig(IO &IO, T &LoadConfig) {
+ IO.mapOptional("Size", LoadConfig.Size,
+ support::ulittle32_t(sizeof(LoadConfig)));
+ // The size must be large enough to fit at least the size member itself.
+ if (LoadConfig.Size < sizeof(LoadConfig.Size)) {
+ IO.setError("Size must be at least " + Twine(sizeof(LoadConfig.Size)));
+ return;
+ }
+
+#define MCase(X) mapLoadConfigMember(IO, LoadConfig, #X, LoadConfig.X)
+ MCase(TimeDateStamp);
+ MCase(MajorVersion);
+ MCase(MinorVersion);
+ MCase(GlobalFlagsClear);
+ MCase(GlobalFlagsSet);
+ MCase(CriticalSectionDefaultTimeout);
+ MCase(DeCommitFreeBlockThreshold);
+ MCase(DeCommitTotalFreeThreshold);
+ MCase(LockPrefixTable);
+ MCase(MaximumAllocationSize);
+ MCase(VirtualMemoryThreshold);
+ MCase(ProcessAffinityMask);
+ MCase(ProcessHeapFlags);
+ MCase(CSDVersion);
+ MCase(DependentLoadFlags);
+ MCase(EditList);
+ MCase(SecurityCookie);
+ MCase(SEHandlerTable);
+ MCase(SEHandlerCount);
+ MCase(GuardCFCheckFunction);
+ MCase(GuardCFCheckDispatch);
+ MCase(GuardCFFunctionTable);
+ MCase(GuardCFFunctionCount);
+ MCase(GuardFlags);
+ MCase(CodeIntegrity);
+ MCase(GuardAddressTakenIatEntryTable);
+ MCase(GuardAddressTakenIatEntryCount);
+ MCase(GuardLongJumpTargetTable);
+ MCase(GuardLongJumpTargetCount);
+ MCase(DynamicValueRelocTable);
+ MCase(CHPEMetadataPointer);
+ MCase(GuardRFFailureRoutine);
+ MCase(GuardRFFailureRoutineFunctionPointer);
+ MCase(DynamicValueRelocTableOffset);
+ MCase(DynamicValueRelocTableSection);
+ MCase(GuardRFVerifyStackPointerFunctionPointer);
+ MCase(HotPatchTableOffset);
+ MCase(EnclaveConfigurationPointer);
+ MCase(VolatileMetadataPointer);
+ MCase(GuardEHContinuationTable);
+ MCase(GuardEHContinuationCount);
+ MCase(GuardXFGCheckFunctionPointer);
+ MCase(GuardXFGDispatchFunctionPointer);
+ MCase(GuardXFGTableDispatchFunctionPointer);
+ MCase(CastGuardOsDeterminedFailureMode);
+#undef MCase
+}
+
+void MappingTraits<object::coff_load_configuration32>::mapping(
+ IO &IO, object::coff_load_configuration32 &S) {
+ mapLoadConfig(IO, S);
+}
+
+void MappingTraits<object::coff_load_configuration64>::mapping(
+ IO &IO, object::coff_load_configuration64 &S) {
+ mapLoadConfig(IO, S);
+}
+
+void MappingTraits<COFFYAML::SectionDataEntry>::mapping(
+ IO &IO, COFFYAML::SectionDataEntry &E) {
+ IO.mapOptional("UInt32", E.UInt32);
+ IO.mapOptional("Binary", E.Binary);
+
+ COFF::header &H = *static_cast<COFF::header *>(IO.getContext());
+ if (COFF::is64Bit(H.Machine))
+ IO.mapOptional("LoadConfig", E.LoadConfig64);
+ else
+ IO.mapOptional("LoadConfig", E.LoadConfig32);
+}
+
void MappingTraits<COFFYAML::Symbol>::mapping(IO &IO, COFFYAML::Symbol &S) {
MappingNormalization<NStorageClass, uint8_t> NS(IO, S.Header.StorageClass);
@@ -586,9 +682,16 @@ void MappingTraits<COFFYAML::Section>::mapping(IO &IO, COFFYAML::Section &Sec) {
else if (Sec.Name == ".debug$H")
IO.mapOptional("GlobalHashes", Sec.DebugH);
+ IO.mapOptional("StructuredData", Sec.StructuredData);
+
+ if (!Sec.StructuredData.empty() && Sec.SectionData.binary_size()) {
+ IO.setError("StructuredData and SectionData can't be used together");
+ return;
+ }
+
// Uninitialized sections, such as .bss, typically have no data, but the size
// is carried in SizeOfRawData, even though PointerToRawData is zero.
- if (Sec.SectionData.binary_size() == 0 &&
+ if (Sec.SectionData.binary_size() == 0 && Sec.StructuredData.empty() &&
NC->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA)
IO.mapOptional("SizeOfRawData", Sec.Header.SizeOfRawData);
diff --git a/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index 3f758fdc6879..8d2028abfe9b 100644
--- a/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/llvm/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -167,6 +167,7 @@ void ScalarEnumerationTraits<RegisterId>::enumeration(IO &io, RegisterId &Reg) {
break;
case COFF::IMAGE_FILE_MACHINE_ARM64:
case COFF::IMAGE_FILE_MACHINE_ARM64EC:
+ case COFF::IMAGE_FILE_MACHINE_ARM64X:
CpuType = CPUType::ARM64;
break;
}
diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp
index f25c016e9aa3..a26e93f65ed7 100644
--- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp
@@ -20,7 +20,6 @@
#include "llvm/ObjectYAML/DWARFYAML.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -28,6 +27,7 @@
#include "llvm/Support/SwapByteOrder.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp
index 37116ada9901..2bddeed46413 100644
--- a/llvm/lib/ObjectYAML/DWARFYAML.cpp
+++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp
@@ -59,22 +59,20 @@ Expected<DWARFYAML::Data::AbbrevTableInfo>
DWARFYAML::Data::getAbbrevTableInfoByID(uint64_t ID) const {
if (AbbrevTableInfoMap.empty()) {
uint64_t AbbrevTableOffset = 0;
- for (auto &AbbrevTable : enumerate(DebugAbbrev)) {
+ for (const auto &[Index, AbbrevTable] : enumerate(DebugAbbrev)) {
// If the abbrev table's ID isn't specified, we use the index as its ID.
- uint64_t AbbrevTableID =
- AbbrevTable.value().ID.value_or(AbbrevTable.index());
+ uint64_t AbbrevTableID = AbbrevTable.ID.value_or(Index);
auto It = AbbrevTableInfoMap.insert(
- {AbbrevTableID, AbbrevTableInfo{/*Index=*/AbbrevTable.index(),
+ {AbbrevTableID, AbbrevTableInfo{/*Index=*/Index,
/*Offset=*/AbbrevTableOffset}});
if (!It.second)
return createStringError(
errc::invalid_argument,
"the ID (%" PRIu64 ") of abbrev table with index %zu has been used "
"by abbrev table with index %" PRIu64,
- AbbrevTableID, AbbrevTable.index(), It.first->second.Index);
+ AbbrevTableID, Index, It.first->second.Index);
- AbbrevTableOffset +=
- getAbbrevTableContentByIndex(AbbrevTable.index()).size();
+ AbbrevTableOffset += getAbbrevTableContentByIndex(Index).size();
}
}
diff --git a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
index a5c60a6dc9cc..64b13fc0ccde 100644
--- a/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
+++ b/llvm/lib/ObjectYAML/DXContainerEmitter.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/BinaryFormat/DXContainer.h"
+#include "llvm/MC/DXContainerPSVInfo.h"
#include "llvm/ObjectYAML/ObjectYAML.h"
#include "llvm/ObjectYAML/yaml2obj.h"
#include "llvm/Support/Errc.h"
@@ -193,6 +194,19 @@ void DXContainerWriter::writeParts(raw_ostream &OS) {
OS.write(reinterpret_cast<char *>(&Hash), sizeof(dxbc::ShaderHash));
break;
}
+ case dxbc::PartType::PSV0: {
+ if (!P.Info.has_value())
+ continue;
+ mcdxbc::PSVRuntimeInfo PSV;
+ memcpy(&PSV.BaseData, &P.Info->Info, sizeof(dxbc::PSV::v2::RuntimeInfo));
+ PSV.Resources = P.Info->Resources;
+
+ if (sys::IsBigEndianHost)
+ PSV.swapBytes(static_cast<Triple::EnvironmentType>(
+ Triple::Pixel + P.Info->Info.ShaderStage));
+ PSV.write(OS, P.Info->Version);
+ break;
+ }
case dxbc::PartType::Unknown:
break; // Skip any handling for unrecognized parts.
}
diff --git a/llvm/lib/ObjectYAML/DXContainerYAML.cpp b/llvm/lib/ObjectYAML/DXContainerYAML.cpp
index 1d1dd42d93f1..ed9f39954111 100644
--- a/llvm/lib/ObjectYAML/DXContainerYAML.cpp
+++ b/llvm/lib/ObjectYAML/DXContainerYAML.cpp
@@ -43,6 +43,35 @@ DXContainerYAML::ShaderHash::ShaderHash(const dxbc::ShaderHash &Data)
memcpy(Digest.data(), &Data.Digest[0], 16);
}
+DXContainerYAML::PSVInfo::PSVInfo() : Version(0) {
+ memset(&Info, 0, sizeof(Info));
+}
+
+DXContainerYAML::PSVInfo::PSVInfo(const dxbc::PSV::v0::RuntimeInfo *P,
+ uint16_t Stage)
+ : Version(0) {
+ memset(&Info, 0, sizeof(Info));
+ memcpy(&Info, P, sizeof(dxbc::PSV::v0::RuntimeInfo));
+
+ assert(Stage < std::numeric_limits<uint8_t>::max() &&
+ "Stage should be a very small number");
+ // We need to bring the stage in separately since it isn't part of the v1 data
+ // structure.
+ Info.ShaderStage = static_cast<uint8_t>(Stage);
+}
+
+DXContainerYAML::PSVInfo::PSVInfo(const dxbc::PSV::v1::RuntimeInfo *P)
+ : Version(1) {
+ memset(&Info, 0, sizeof(Info));
+ memcpy(&Info, P, sizeof(dxbc::PSV::v1::RuntimeInfo));
+}
+
+DXContainerYAML::PSVInfo::PSVInfo(const dxbc::PSV::v2::RuntimeInfo *P)
+ : Version(2) {
+ memset(&Info, 0, sizeof(Info));
+ memcpy(&Info, P, sizeof(dxbc::PSV::v2::RuntimeInfo));
+}
+
namespace yaml {
void MappingTraits<DXContainerYAML::VersionTuple>::mapping(
@@ -84,6 +113,27 @@ void MappingTraits<DXContainerYAML::ShaderHash>::mapping(
IO.mapRequired("Digest", Hash.Digest);
}
+void MappingTraits<DXContainerYAML::PSVInfo>::mapping(
+ IO &IO, DXContainerYAML::PSVInfo &PSV) {
+ IO.mapRequired("Version", PSV.Version);
+
+ // Store the PSV version in the YAML context.
+ void *OldContext = IO.getContext();
+ uint32_t Version = PSV.Version;
+ IO.setContext(&Version);
+
+ // Shader stage is only included in binaries for v1 and later, but we always
+ // include it since it simplifies parsing and file construction.
+ IO.mapRequired("ShaderStage", PSV.Info.ShaderStage);
+ PSV.mapInfoForVersion(IO);
+
+ IO.mapRequired("ResourceStride", PSV.ResourceStride);
+ IO.mapRequired("Resources", PSV.Resources);
+
+ // Restore the YAML context.
+ IO.setContext(OldContext);
+}
+
void MappingTraits<DXContainerYAML::Part>::mapping(IO &IO,
DXContainerYAML::Part &P) {
IO.mapRequired("Name", P.Name);
@@ -91,6 +141,7 @@ void MappingTraits<DXContainerYAML::Part>::mapping(IO &IO,
IO.mapOptional("Program", P.Program);
IO.mapOptional("Flags", P.Flags);
IO.mapOptional("Hash", P.Hash);
+ IO.mapOptional("PSVInfo", P.Info);
}
void MappingTraits<DXContainerYAML::Object>::mapping(
@@ -100,5 +151,111 @@ void MappingTraits<DXContainerYAML::Object>::mapping(
IO.mapRequired("Parts", Obj.Parts);
}
+void MappingTraits<DXContainerYAML::ResourceBindInfo>::mapping(
+ IO &IO, DXContainerYAML::ResourceBindInfo &Res) {
+ IO.mapRequired("Type", Res.Type);
+ IO.mapRequired("Space", Res.Space);
+ IO.mapRequired("LowerBound", Res.LowerBound);
+ IO.mapRequired("UpperBound", Res.UpperBound);
+
+ const uint32_t *PSVVersion = static_cast<uint32_t *>(IO.getContext());
+ if (*PSVVersion < 2)
+ return;
+
+ IO.mapRequired("Kind", Res.Kind);
+ IO.mapRequired("Flags", Res.Flags);
+}
+
} // namespace yaml
+
+void DXContainerYAML::PSVInfo::mapInfoForVersion(yaml::IO &IO) {
+ dxbc::PipelinePSVInfo &StageInfo = Info.StageInfo;
+ Triple::EnvironmentType Stage = dxbc::getShaderStage(Info.ShaderStage);
+
+ switch (Stage) {
+ case Triple::EnvironmentType::Pixel:
+ IO.mapRequired("DepthOutput", StageInfo.PS.DepthOutput);
+ IO.mapRequired("SampleFrequency", StageInfo.PS.SampleFrequency);
+ break;
+ case Triple::EnvironmentType::Vertex:
+ IO.mapRequired("OutputPositionPresent", StageInfo.VS.OutputPositionPresent);
+ break;
+ case Triple::EnvironmentType::Geometry:
+ IO.mapRequired("InputPrimitive", StageInfo.GS.InputPrimitive);
+ IO.mapRequired("OutputTopology", StageInfo.GS.OutputTopology);
+ IO.mapRequired("OutputStreamMask", StageInfo.GS.OutputStreamMask);
+ IO.mapRequired("OutputPositionPresent", StageInfo.GS.OutputPositionPresent);
+ break;
+ case Triple::EnvironmentType::Hull:
+ IO.mapRequired("InputControlPointCount",
+ StageInfo.HS.InputControlPointCount);
+ IO.mapRequired("OutputControlPointCount",
+ StageInfo.HS.OutputControlPointCount);
+ IO.mapRequired("TessellatorDomain", StageInfo.HS.TessellatorDomain);
+ IO.mapRequired("TessellatorOutputPrimitive",
+ StageInfo.HS.TessellatorOutputPrimitive);
+ break;
+ case Triple::EnvironmentType::Domain:
+ IO.mapRequired("InputControlPointCount",
+ StageInfo.DS.InputControlPointCount);
+ IO.mapRequired("OutputPositionPresent", StageInfo.DS.OutputPositionPresent);
+ IO.mapRequired("TessellatorDomain", StageInfo.DS.TessellatorDomain);
+ break;
+ case Triple::EnvironmentType::Mesh:
+ IO.mapRequired("GroupSharedBytesUsed", StageInfo.MS.GroupSharedBytesUsed);
+ IO.mapRequired("GroupSharedBytesDependentOnViewID",
+ StageInfo.MS.GroupSharedBytesDependentOnViewID);
+ IO.mapRequired("PayloadSizeInBytes", StageInfo.MS.PayloadSizeInBytes);
+ IO.mapRequired("MaxOutputVertices", StageInfo.MS.MaxOutputVertices);
+ IO.mapRequired("MaxOutputPrimitives", StageInfo.MS.MaxOutputPrimitives);
+ break;
+ case Triple::EnvironmentType::Amplification:
+ IO.mapRequired("PayloadSizeInBytes", StageInfo.AS.PayloadSizeInBytes);
+ break;
+ default:
+ break;
+ }
+
+ IO.mapRequired("MinimumWaveLaneCount", Info.MinimumWaveLaneCount);
+ IO.mapRequired("MaximumWaveLaneCount", Info.MaximumWaveLaneCount);
+
+ if (Version == 0)
+ return;
+
+ IO.mapRequired("UsesViewID", Info.UsesViewID);
+
+ switch (Stage) {
+ case Triple::EnvironmentType::Geometry:
+ IO.mapRequired("MaxVertexCount", Info.GeomData.MaxVertexCount);
+ break;
+ case Triple::EnvironmentType::Hull:
+ case Triple::EnvironmentType::Domain:
+ IO.mapRequired("SigPatchConstOrPrimVectors",
+ Info.GeomData.SigPatchConstOrPrimVectors);
+ break;
+ case Triple::EnvironmentType::Mesh:
+ IO.mapRequired("SigPrimVectors", Info.GeomData.MeshInfo.SigPrimVectors);
+ IO.mapRequired("MeshOutputTopology",
+ Info.GeomData.MeshInfo.MeshOutputTopology);
+ break;
+ default:
+ break;
+ }
+
+ IO.mapRequired("SigInputElements", Info.SigInputElements);
+ IO.mapRequired("SigOutputElements", Info.SigOutputElements);
+ IO.mapRequired("SigPatchConstOrPrimElements",
+ Info.SigPatchConstOrPrimElements);
+ IO.mapRequired("SigInputVectors", Info.SigInputVectors);
+ MutableArrayRef<uint8_t> Vec(Info.SigOutputVectors);
+ IO.mapRequired("SigOutputVectors", Vec);
+
+ if (Version == 1)
+ return;
+
+ IO.mapRequired("NumThreadsX", Info.NumThreadsX);
+ IO.mapRequired("NumThreadsY", Info.NumThreadsY);
+ IO.mapRequired("NumThreadsZ", Info.NumThreadsZ);
+}
+
} // namespace llvm
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 0ca0348f36ed..e92c61d81055 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -131,6 +131,9 @@ void ScalarEnumerationTraits<ELFYAML::ELF_NT>::enumeration(
ECase(NT_ARM_HW_WATCH);
ECase(NT_ARM_SVE);
ECase(NT_ARM_PAC_MASK);
+ ECase(NT_ARM_SSVE);
+ ECase(NT_ARM_ZA);
+ ECase(NT_ARM_ZT);
ECase(NT_FILE);
ECase(NT_PRXFPREG);
ECase(NT_SIGINFO);
@@ -587,6 +590,8 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
@@ -602,6 +607,8 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1101, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1102, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1103, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1150, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1151, EF_AMDGPU_MACH);
switch (Object->Header.ABIVersion) {
default:
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
@@ -676,6 +683,7 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
ECase(SHT_LLVM_BB_ADDR_MAP_V0);
ECase(SHT_LLVM_BB_ADDR_MAP);
ECase(SHT_LLVM_OFFLOADING);
+ ECase(SHT_LLVM_LTO);
ECase(SHT_GNU_ATTRIBUTES);
ECase(SHT_GNU_HASH);
ECase(SHT_GNU_verdef);
@@ -707,6 +715,10 @@ void ScalarEnumerationTraits<ELFYAML::ELF_SHT>::enumeration(
case ELF::EM_MSP430:
ECase(SHT_MSP430_ATTRIBUTES);
break;
+ case ELF::EM_AARCH64:
+ ECase(SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
+ ECase(SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC);
+ break;
default:
// Nothing to do.
break;
diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp
index 54983f0c260d..0de9112a4ac4 100644
--- a/llvm/lib/ObjectYAML/MachOEmitter.cpp
+++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp
@@ -105,7 +105,7 @@ void MachOWriter::writeHeader(raw_ostream &OS) {
}
template <typename SectionType>
-SectionType constructSection(MachOYAML::Section Sec) {
+SectionType constructSection(const MachOYAML::Section &Sec) {
SectionType TempSec;
memcpy(reinterpret_cast<void *>(&TempSec.sectname[0]), &Sec.sectname[0], 16);
memcpy(reinterpret_cast<void *>(&TempSec.segname[0]), &Sec.segname[0], 16);
@@ -426,7 +426,7 @@ void MachOWriter::writeRelocations(raw_ostream &OS) {
void MachOWriter::writeBindOpcodes(
raw_ostream &OS, std::vector<MachOYAML::BindOpcode> &BindOpcodes) {
- for (auto Opcode : BindOpcodes) {
+ for (auto &Opcode : BindOpcodes) {
uint8_t OpByte = Opcode.Opcode | Opcode.Imm;
OS.write(reinterpret_cast<char *>(&OpByte), 1);
for (auto Data : Opcode.ULEBExtraData) {
diff --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp
index 8c8b4532dcde..56120901be23 100644
--- a/llvm/lib/ObjectYAML/MachOYAML.cpp
+++ b/llvm/lib/ObjectYAML/MachOYAML.cpp
@@ -14,9 +14,9 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
#include <cinttypes>
#include <cstdint>
#include <cstring>
diff --git a/llvm/lib/ObjectYAML/MinidumpEmitter.cpp b/llvm/lib/ObjectYAML/MinidumpEmitter.cpp
index 1bda6f364b1b..24b521a9925c 100644
--- a/llvm/lib/ObjectYAML/MinidumpEmitter.cpp
+++ b/llvm/lib/ObjectYAML/MinidumpEmitter.cpp
@@ -236,8 +236,8 @@ bool yaml2minidump(MinidumpYAML::Object &Obj, raw_ostream &Out,
Obj.Header.StreamDirectoryRVA = File.allocateArray(ArrayRef(StreamDirectory));
Obj.Header.NumberOfStreams = StreamDirectory.size();
- for (auto &Stream : enumerate(Obj.Streams))
- StreamDirectory[Stream.index()] = layout(File, *Stream.value());
+ for (const auto &[Index, Stream] : enumerate(Obj.Streams))
+ StreamDirectory[Index] = layout(File, *Stream);
File.writeTo(Out);
return true;
diff --git a/llvm/lib/ObjectYAML/OffloadEmitter.cpp b/llvm/lib/ObjectYAML/OffloadEmitter.cpp
index 3ffbc4ff0e11..dfb572531660 100644
--- a/llvm/lib/ObjectYAML/OffloadEmitter.cpp
+++ b/llvm/lib/ObjectYAML/OffloadEmitter.cpp
@@ -28,12 +28,9 @@ bool yaml2offload(Binary &Doc, raw_ostream &Out, ErrorHandler EH) {
if (Member.Flags)
Image.Flags = *Member.Flags;
- StringMap<StringRef> &StringData = Image.StringData;
- if (Member.StringEntries) {
- for (const auto &Entry : *Member.StringEntries) {
- StringData[Entry.Key] = Entry.Value;
- }
- }
+ if (Member.StringEntries)
+ for (const auto &Entry : *Member.StringEntries)
+ Image.StringData[Entry.Key] = Entry.Value;
SmallVector<char, 1024> Data;
raw_svector_ostream OS(Data);
diff --git a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
index 1ceac6c05893..7ad878f04c88 100644
--- a/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/XCOFFEmitter.cpp
@@ -70,7 +70,7 @@ private:
support::endian::Writer W;
yaml::ErrorHandler ErrHandler;
StringTableBuilder StrTblBuilder;
- uint64_t StartOffset;
+ uint64_t StartOffset = 0u;
// Map the section name to its corrresponding section index.
DenseMap<StringRef, int16_t> SectionIndexMap = {
{StringRef("N_DEBUG"), XCOFF::N_DEBUG},
diff --git a/llvm/lib/Option/Arg.cpp b/llvm/lib/Option/Arg.cpp
index 2da32bfacf30..48d173accdac 100644
--- a/llvm/lib/Option/Arg.cpp
+++ b/llvm/lib/Option/Arg.cpp
@@ -20,19 +20,19 @@ using namespace llvm::opt;
Arg::Arg(const Option Opt, StringRef S, unsigned Index, const Arg *BaseArg)
: Opt(Opt), BaseArg(BaseArg), Spelling(S), Index(Index), Claimed(false),
- OwnsValues(false) {}
+ IgnoredTargetSpecific(false), OwnsValues(false) {}
Arg::Arg(const Option Opt, StringRef S, unsigned Index, const char *Value0,
const Arg *BaseArg)
: Opt(Opt), BaseArg(BaseArg), Spelling(S), Index(Index), Claimed(false),
- OwnsValues(false) {
+ IgnoredTargetSpecific(false), OwnsValues(false) {
Values.push_back(Value0);
}
Arg::Arg(const Option Opt, StringRef S, unsigned Index, const char *Value0,
const char *Value1, const Arg *BaseArg)
: Opt(Opt), BaseArg(BaseArg), Spelling(S), Index(Index), Claimed(false),
- OwnsValues(false) {
+ IgnoredTargetSpecific(false), OwnsValues(false) {
Values.push_back(Value0);
Values.push_back(Value1);
}
diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp
index 2e289c58b45a..3f53ac119c69 100644
--- a/llvm/lib/Option/OptTable.cpp
+++ b/llvm/lib/Option/OptTable.cpp
@@ -163,7 +163,7 @@ static unsigned matchOption(const OptTable::Info *I, StringRef Str,
for (auto Prefix : I->Prefixes) {
if (Str.startswith(Prefix)) {
StringRef Rest = Str.substr(Prefix.size());
- bool Matched = IgnoreCase ? Rest.startswith_insensitive(I->Name)
+ bool Matched = IgnoreCase ? Rest.starts_with_insensitive(I->Name)
: Rest.startswith(I->Name);
if (Matched)
return Prefix.size() + StringRef(I->Name).size();
@@ -468,6 +468,16 @@ InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
continue;
}
+ // In DashDashParsing mode, the first "--" stops option scanning and treats
+ // all subsequent arguments as positional.
+ if (DashDashParsing && Str == "--") {
+ while (++Index < End) {
+ Args.append(new Arg(getOption(InputOptionID), Str, Index,
+ Args.getArgString(Index)));
+ }
+ break;
+ }
+
unsigned Prev = Index;
std::unique_ptr<Arg> A = GroupedShortOptions
? parseOneArgGrouped(Args, Index)
diff --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp
index 1f1eb93bcca0..c570b02b08ce 100644
--- a/llvm/lib/Option/Option.cpp
+++ b/llvm/lib/Option/Option.cpp
@@ -108,20 +108,21 @@ bool Option::matches(OptSpecifier Opt) const {
std::unique_ptr<Arg> Option::acceptInternal(const ArgList &Args,
StringRef Spelling,
unsigned &Index) const {
- size_t ArgSize = Spelling.size();
+ const size_t SpellingSize = Spelling.size();
+ const size_t ArgStringSize = StringRef(Args.getArgString(Index)).size();
switch (getKind()) {
case FlagClass: {
- if (ArgSize != strlen(Args.getArgString(Index)))
+ if (SpellingSize != ArgStringSize)
return nullptr;
return std::make_unique<Arg>(*this, Spelling, Index++);
}
case JoinedClass: {
- const char *Value = Args.getArgString(Index) + ArgSize;
+ const char *Value = Args.getArgString(Index) + SpellingSize;
return std::make_unique<Arg>(*this, Spelling, Index++, Value);
}
case CommaJoinedClass: {
// Always matches.
- const char *Str = Args.getArgString(Index) + ArgSize;
+ const char *Str = Args.getArgString(Index) + SpellingSize;
auto A = std::make_unique<Arg>(*this, Spelling, Index++);
// Parse out the comma separated values.
@@ -149,8 +150,7 @@ std::unique_ptr<Arg> Option::acceptInternal(const ArgList &Args,
}
case SeparateClass:
// Matches iff this is an exact match.
- // FIXME: Avoid strlen.
- if (ArgSize != strlen(Args.getArgString(Index)))
+ if (SpellingSize != ArgStringSize)
return nullptr;
Index += 2;
@@ -162,8 +162,7 @@ std::unique_ptr<Arg> Option::acceptInternal(const ArgList &Args,
Args.getArgString(Index - 1));
case MultiArgClass: {
// Matches iff this is an exact match.
- // FIXME: Avoid strlen.
- if (ArgSize != strlen(Args.getArgString(Index)))
+ if (SpellingSize != ArgStringSize)
return nullptr;
Index += 1 + getNumArgs();
@@ -178,9 +177,8 @@ std::unique_ptr<Arg> Option::acceptInternal(const ArgList &Args,
}
case JoinedOrSeparateClass: {
// If this is not an exact match, it is a joined arg.
- // FIXME: Avoid strlen.
- if (ArgSize != strlen(Args.getArgString(Index))) {
- const char *Value = Args.getArgString(Index) + ArgSize;
+ if (SpellingSize != ArgStringSize) {
+ const char *Value = Args.getArgString(Index) + SpellingSize;
return std::make_unique<Arg>(*this, Spelling, Index++, Value);
}
@@ -201,12 +199,11 @@ std::unique_ptr<Arg> Option::acceptInternal(const ArgList &Args,
return nullptr;
return std::make_unique<Arg>(*this, Spelling, Index - 2,
- Args.getArgString(Index - 2) + ArgSize,
+ Args.getArgString(Index - 2) + SpellingSize,
Args.getArgString(Index - 1));
case RemainingArgsClass: {
// Matches iff this is an exact match.
- // FIXME: Avoid strlen.
- if (ArgSize != strlen(Args.getArgString(Index)))
+ if (SpellingSize != ArgStringSize)
return nullptr;
auto A = std::make_unique<Arg>(*this, Spelling, Index++);
while (Index < Args.getNumInputArgStrings() &&
@@ -216,9 +213,9 @@ std::unique_ptr<Arg> Option::acceptInternal(const ArgList &Args,
}
case RemainingArgsJoinedClass: {
auto A = std::make_unique<Arg>(*this, Spelling, Index);
- if (ArgSize != strlen(Args.getArgString(Index))) {
+ if (SpellingSize != ArgStringSize) {
// An inexact match means there is a joined arg.
- A->getValues().push_back(Args.getArgString(Index) + ArgSize);
+ A->getValues().push_back(Args.getArgString(Index) + SpellingSize);
}
Index++;
while (Index < Args.getNumInputArgStrings() &&
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index e251d56463a3..d0cbbcc0e310 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -34,7 +34,6 @@
#include "llvm/Analysis/Delinearization.h"
#include "llvm/Analysis/DemandedBits.h"
#include "llvm/Analysis/DependenceAnalysis.h"
-#include "llvm/Analysis/DivergenceAnalysis.h"
#include "llvm/Analysis/DomPrinter.h"
#include "llvm/Analysis/DominanceFrontier.h"
#include "llvm/Analysis/FunctionPropertiesAnalysis.h"
@@ -46,7 +45,6 @@
#include "llvm/Analysis/InstCount.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LazyValueInfo.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/Lint.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/LoopCacheAnalysis.h"
@@ -73,6 +71,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/CodeGen/HardwareLoops.h"
#include "llvm/CodeGen/TypePromotion.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
@@ -81,6 +80,7 @@
#include "llvm/IR/SafepointIRVerifier.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IRPrinter/IRPrintingPasses.h"
+#include "llvm/Passes/OptimizationLevel.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -103,6 +103,7 @@
#include "llvm/Transforms/IPO/CrossDSOCFI.h"
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
+#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
@@ -116,6 +117,7 @@
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/LoopExtractor.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
+#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/Transforms/IPO/ModuleInliner.h"
#include "llvm/Transforms/IPO/OpenMPOpt.h"
@@ -205,6 +207,7 @@
#include "llvm/Transforms/Scalar/NaryReassociate.h"
#include "llvm/Transforms/Scalar/NewGVN.h"
#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
+#include "llvm/Transforms/Scalar/PlaceSafepoints.h"
#include "llvm/Transforms/Scalar/Reassociate.h"
#include "llvm/Transforms/Scalar/Reg2Mem.h"
#include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
@@ -227,6 +230,7 @@
#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/CanonicalizeFreezeInLoops.h"
+#include "llvm/Transforms/Utils/CountVisits.h"
#include "llvm/Transforms/Utils/Debugify.h"
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
#include "llvm/Transforms/Utils/FixIrreducible.h"
@@ -243,6 +247,7 @@
#include "llvm/Transforms/Utils/LowerSwitch.h"
#include "llvm/Transforms/Utils/Mem2Reg.h"
#include "llvm/Transforms/Utils/MetaRenamer.h"
+#include "llvm/Transforms/Utils/MoveAutoInit.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/PredicateInfo.h"
#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
@@ -483,6 +488,28 @@ static std::optional<int> parseRepeatPassName(StringRef Name) {
return Count;
}
+static std::optional<std::pair<bool, bool>>
+parseFunctionPipelineName(StringRef Name) {
+ std::pair<bool, bool> Params;
+ if (!Name.consume_front("function"))
+ return std::nullopt;
+ if (Name.empty())
+ return Params;
+ if (!Name.consume_front("<") || !Name.consume_back(">"))
+ return std::nullopt;
+ while (!Name.empty()) {
+ auto [Front, Back] = Name.split(';');
+ Name = Back;
+ if (Front == "eager-inv")
+ Params.first = true;
+ else if (Front == "no-rerun")
+ Params.second = true;
+ else
+ return std::nullopt;
+ }
+ return Params;
+}
+
static std::optional<int> parseDevirtPassName(StringRef Name) {
if (!Name.consume_front("devirt<") || !Name.consume_back(">"))
return std::nullopt;
@@ -501,6 +528,17 @@ static bool checkParametrizedPassName(StringRef Name, StringRef PassName) {
return Name.startswith("<") && Name.endswith(">");
}
+static std::optional<OptimizationLevel> parseOptLevel(StringRef S) {
+ return StringSwitch<std::optional<OptimizationLevel>>(S)
+ .Case("O0", OptimizationLevel::O0)
+ .Case("O1", OptimizationLevel::O1)
+ .Case("O2", OptimizationLevel::O2)
+ .Case("O3", OptimizationLevel::O3)
+ .Case("Os", OptimizationLevel::Os)
+ .Case("Oz", OptimizationLevel::Oz)
+ .Default(std::nullopt);
+}
+
namespace {
/// This performs customized parsing of pass name with parameters.
@@ -539,20 +577,58 @@ auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name,
return Result;
}
+/// Parser of parameters for HardwareLoops pass.
+Expected<HardwareLoopOptions> parseHardwareLoopOptions(StringRef Params) {
+ HardwareLoopOptions HardwareLoopOpts;
+
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+ if (ParamName.consume_front("hardware-loop-decrement=")) {
+ int Count;
+ if (ParamName.getAsInteger(0, Count))
+ return make_error<StringError>(
+ formatv("invalid HardwareLoopPass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ HardwareLoopOpts.setDecrement(Count);
+ continue;
+ }
+ if (ParamName.consume_front("hardware-loop-counter-bitwidth=")) {
+ int Count;
+ if (ParamName.getAsInteger(0, Count))
+ return make_error<StringError>(
+ formatv("invalid HardwareLoopPass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ HardwareLoopOpts.setCounterBitwidth(Count);
+ continue;
+ }
+ if (ParamName == "force-hardware-loops") {
+ HardwareLoopOpts.setForce(true);
+ } else if (ParamName == "force-hardware-loop-phi") {
+ HardwareLoopOpts.setForcePhi(true);
+ } else if (ParamName == "force-nested-hardware-loop") {
+ HardwareLoopOpts.setForceNested(true);
+ } else if (ParamName == "force-hardware-loop-guard") {
+ HardwareLoopOpts.setForceGuard(true);
+ } else {
+ return make_error<StringError>(
+ formatv("invalid HardwarePass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return HardwareLoopOpts;
+}
+
/// Parser of parameters for LoopUnroll pass.
Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
LoopUnrollOptions UnrollOpts;
while (!Params.empty()) {
StringRef ParamName;
std::tie(ParamName, Params) = Params.split(';');
- int OptLevel = StringSwitch<int>(ParamName)
- .Case("O0", 0)
- .Case("O1", 1)
- .Case("O2", 2)
- .Case("O3", 3)
- .Default(-1);
- if (OptLevel >= 0) {
- UnrollOpts.setOptLevel(OptLevel);
+ std::optional<OptimizationLevel> OptLevel = parseOptLevel(ParamName);
+ // Don't accept -Os/-Oz.
+ if (OptLevel && !OptLevel->isOptimizingForSize()) {
+ UnrollOpts.setOptLevel(OptLevel->getSpeedupLevel());
continue;
}
if (ParamName.consume_front("full-unroll-max=")) {
@@ -604,6 +680,10 @@ Expected<bool> parseSinglePassOption(StringRef Params, StringRef OptionName,
return Result;
}
+Expected<bool> parseGlobalDCEPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "vfe-linkage-unit-visibility", "GlobalDCE");
+}
+
Expected<bool> parseInlinerPassOptions(StringRef Params) {
return parseSinglePassOption(Params, "only-mandatory", "InlinerPass");
}
@@ -612,6 +692,11 @@ Expected<bool> parseCoroSplitPassOptions(StringRef Params) {
return parseSinglePassOption(Params, "reuse-storage", "CoroSplitPass");
}
+Expected<bool> parsePostOrderFunctionAttrsPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "skip-non-recursive",
+ "PostOrderFunctionAttrs");
+}
+
Expected<bool> parseEarlyCSEPassOptions(StringRef Params) {
return parseSinglePassOption(Params, "memssa", "EarlyCSE");
}
@@ -666,6 +751,26 @@ Expected<HWAddressSanitizerOptions> parseHWASanPassOptions(StringRef Params) {
return Result;
}
+Expected<EmbedBitcodeOptions> parseEmbedBitcodePassOptions(StringRef Params) {
+ EmbedBitcodeOptions Result;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ if (ParamName == "thinlto") {
+ Result.IsThinLTO = true;
+ } else if (ParamName == "emit-summary") {
+ Result.EmitLTOSummary = true;
+ } else {
+ return make_error<StringError>(
+ formatv("invalid EmbedBitcode pass parameter '{0}' ", ParamName)
+ .str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) {
MemorySanitizerOptions Result;
while (!Params.empty()) {
@@ -704,7 +809,11 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
std::tie(ParamName, Params) = Params.split(';');
bool Enable = !ParamName.consume_front("no-");
- if (ParamName == "forward-switch-cond") {
+ if (ParamName == "speculate-blocks") {
+ Result.speculateBlocks(Enable);
+ } else if (ParamName == "simplify-cond-branch") {
+ Result.setSimplifyCondBranch(Enable);
+ } else if (ParamName == "forward-switch-cond") {
Result.forwardSwitchCondToPhi(Enable);
} else if (ParamName == "switch-range-to-icmp") {
Result.convertSwitchRangeToICmp(Enable);
@@ -734,6 +843,33 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
return Result;
}
+Expected<InstCombineOptions> parseInstCombineOptions(StringRef Params) {
+ InstCombineOptions Result;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ bool Enable = !ParamName.consume_front("no-");
+ if (ParamName == "use-loop-info") {
+ Result.setUseLoopInfo(Enable);
+ } else if (Enable && ParamName.consume_front("max-iterations=")) {
+ APInt MaxIterations;
+ if (ParamName.getAsInteger(0, MaxIterations))
+ return make_error<StringError>(
+ formatv("invalid argument to InstCombine pass max-iterations "
+ "parameter: '{0}' ",
+ ParamName).str(),
+ inconvertibleErrorCode());
+ Result.setMaxIterations((unsigned)MaxIterations.getZExtValue());
+ } else {
+ return make_error<StringError>(
+ formatv("invalid InstCombine pass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
/// Parser of parameters for LoopVectorize pass.
Expected<LoopVectorizeOptions> parseLoopVectorizeOptions(StringRef Params) {
LoopVectorizeOptions Opts;
@@ -794,6 +930,26 @@ Expected<LICMOptions> parseLICMOptions(StringRef Params) {
return Result;
}
+Expected<std::pair<bool, bool>> parseLoopRotateOptions(StringRef Params) {
+ std::pair<bool, bool> Result = {true, false};
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ bool Enable = !ParamName.consume_front("no-");
+ if (ParamName == "header-duplication") {
+ Result.first = Enable;
+ } else if (ParamName == "prepare-for-lto") {
+ Result.second = Enable;
+ } else {
+ return make_error<StringError>(
+ formatv("invalid LoopRotate pass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
Expected<bool> parseMergedLoadStoreMotionOptions(StringRef Params) {
bool Result = false;
while (!Params.empty()) {
@@ -893,6 +1049,45 @@ Expected<bool> parseDependenceAnalysisPrinterOptions(StringRef Params) {
"DependenceAnalysisPrinter");
}
+Expected<bool> parseSeparateConstOffsetFromGEPPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "lower-gep",
+ "SeparateConstOffsetFromGEP");
+}
+
+Expected<OptimizationLevel>
+parseFunctionSimplificationPipelineOptions(StringRef Params) {
+ std::optional<OptimizationLevel> L = parseOptLevel(Params);
+ if (!L || *L == OptimizationLevel::O0) {
+ return make_error<StringError>(
+ formatv("invalid function-simplification parameter '{0}' ", Params)
+ .str(),
+ inconvertibleErrorCode());
+ };
+ return *L;
+}
+
+Expected<bool> parseMemorySSAPrinterPassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "no-ensure-optimized-uses",
+ "MemorySSAPrinterPass");
+}
+
+Expected<std::string> parseMemProfUsePassOptions(StringRef Params) {
+ std::string Result;
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+
+ if (ParamName.consume_front("profile-filename=")) {
+ Result = ParamName.str();
+ } else {
+ return make_error<StringError>(
+ formatv("invalid MemProfUse pass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return Result;
+}
+
} // namespace
/// Tests whether a pass name starts with a valid prefix for a default pipeline
@@ -927,12 +1122,14 @@ static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) {
if (startsWithDefaultPipelineAliasPrefix(Name))
return DefaultAliasRegex.match(Name);
+ StringRef NameNoBracket = Name.take_until([](char C) { return C == '<'; });
+
// Explicitly handle pass manager names.
if (Name == "module")
return true;
if (Name == "cgscc")
return true;
- if (Name == "function" || Name == "function<eager-inv>")
+ if (NameNoBracket == "function")
return true;
if (Name == "coro-cond")
return true;
@@ -958,9 +1155,10 @@ static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) {
template <typename CallbacksT>
static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
+ StringRef NameNoBracket = Name.take_until([](char C) { return C == '<'; });
if (Name == "cgscc")
return true;
- if (Name == "function" || Name == "function<eager-inv>")
+ if (NameNoBracket == "function")
return true;
// Explicitly handle custom-parsed pass names.
@@ -986,7 +1184,8 @@ static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
template <typename CallbacksT>
static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
// Explicitly handle pass manager names.
- if (Name == "function" || Name == "function<eager-inv>")
+ StringRef NameNoBracket = Name.take_until([](char C) { return C == '<'; });
+ if (NameNoBracket == "function")
return true;
if (Name == "loop" || Name == "loop-mssa")
return true;
@@ -1144,12 +1343,16 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
return Error::success();
}
- if (Name == "function" || Name == "function<eager-inv>") {
+ if (auto Params = parseFunctionPipelineName(Name)) {
+ if (Params->second)
+ return make_error<StringError>(
+ "cannot have a no-rerun module to function adaptor",
+ inconvertibleErrorCode());
FunctionPassManager FPM;
if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline))
return Err;
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM),
- Name != "function"));
+ MPM.addPass(
+ createModuleToFunctionPassAdaptor(std::move(FPM), Params->first));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
@@ -1181,19 +1384,7 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
assert(Matches.size() == 3 && "Must capture two matched strings!");
- OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2])
- .Case("O0", OptimizationLevel::O0)
- .Case("O1", OptimizationLevel::O1)
- .Case("O2", OptimizationLevel::O2)
- .Case("O3", OptimizationLevel::O3)
- .Case("Os", OptimizationLevel::Os)
- .Case("Oz", OptimizationLevel::Oz);
- if (L == OptimizationLevel::O0 && Matches[1] != "thinlto" &&
- Matches[1] != "lto") {
- MPM.addPass(buildO0DefaultPipeline(L, Matches[1] == "thinlto-pre-link" ||
- Matches[1] == "lto-pre-link"));
- return Error::success();
- }
+ OptimizationLevel L = *parseOptLevel(Matches[2]);
// This is consistent with old pass manager invoked via opt, but
// inconsistent with clang. Clang doesn't enable loop vectorization
@@ -1210,7 +1401,13 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
} else if (Matches[1] == "thinlto") {
MPM.addPass(buildThinLTODefaultPipeline(L, nullptr));
} else if (Matches[1] == "lto-pre-link") {
- MPM.addPass(buildLTOPreLinkDefaultPipeline(L));
+ if (PTO.UnifiedLTO)
+ // When UnifiedLTO is enabled, use the ThinLTO pre-link pipeline. This
+ // avoids compile-time performance regressions and keeps the pre-link
+ // LTO pipeline "unified" for both LTO modes.
+ MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L));
+ else
+ MPM.addPass(buildLTOPreLinkDefaultPipeline(L));
} else {
assert(Matches[1] == "lto" && "Not one of the matched options!");
MPM.addPass(buildLTODefaultPipeline(L, nullptr));
@@ -1318,13 +1515,13 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
CGPM.addPass(std::move(NestedCGPM));
return Error::success();
}
- if (Name == "function" || Name == "function<eager-inv>") {
+ if (auto Params = parseFunctionPipelineName(Name)) {
FunctionPassManager FPM;
if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline))
return Err;
// Add the nested pass manager with the appropriate adaptor.
- CGPM.addPass(
- createCGSCCToFunctionPassAdaptor(std::move(FPM), Name != "function"));
+ CGPM.addPass(createCGSCCToFunctionPassAdaptor(
+ std::move(FPM), Params->first, Params->second));
return Error::success();
}
if (auto Count = parseRepeatPassName(Name)) {
diff --git a/llvm/lib/Passes/PassBuilderBindings.cpp b/llvm/lib/Passes/PassBuilderBindings.cpp
index a87c0e6dc0a3..0d3a3d7d0223 100644
--- a/llvm/lib/Passes/PassBuilderBindings.cpp
+++ b/llvm/lib/Passes/PassBuilderBindings.cpp
@@ -66,7 +66,7 @@ LLVMErrorRef LLVMRunPasses(LLVMModuleRef M, const char *Passes,
PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
StandardInstrumentations SI(Mod->getContext(), Debug, VerifyEach);
- SI.registerCallbacks(PIC, &FAM);
+ SI.registerCallbacks(PIC, &MAM);
ModulePassManager MPM;
if (VerifyEach) {
MPM.addPass(VerifierPass());
@@ -139,6 +139,11 @@ void LLVMPassBuilderOptionsSetMergeFunctions(LLVMPassBuilderOptionsRef Options,
unwrap(Options)->PTO.MergeFunctions = MergeFunctions;
}
+void LLVMPassBuilderOptionsSetInlinerThreshold(
+ LLVMPassBuilderOptionsRef Options, int Threshold) {
+ unwrap(Options)->PTO.InlinerThreshold = Threshold;
+}
+
void LLVMDisposePassBuilderOptions(LLVMPassBuilderOptionsRef Options) {
delete unwrap(Options);
}
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 0762c535f7f5..660cb2e974d7 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -14,12 +14,12 @@
///
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InlineAdvisor.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
@@ -29,6 +29,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/PGOOptions.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
#include "llvm/Transforms/Coroutines/CoroCleanup.h"
@@ -45,6 +46,7 @@
#include "llvm/Transforms/IPO/CrossDSOCFI.h"
#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
+#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/IPO/GlobalDCE.h"
@@ -55,6 +57,7 @@
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
+#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/Transforms/IPO/ModuleInliner.h"
#include "llvm/Transforms/IPO/OpenMPOpt.h"
@@ -118,9 +121,11 @@
#include "llvm/Transforms/Utils/AddDiscriminators.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
+#include "llvm/Transforms/Utils/CountVisits.h"
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
#include "llvm/Transforms/Utils/Mem2Reg.h"
+#include "llvm/Transforms/Utils/MoveAutoInit.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
@@ -151,9 +156,6 @@ static cl::opt<bool>
cl::Hidden,
cl::desc("Enable inline deferral during PGO"));
-static cl::opt<bool> EnableMemProfiler("enable-mem-prof", cl::Hidden,
- cl::desc("Enable memory profiler"));
-
static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
cl::init(false), cl::Hidden,
cl::desc("Enable module inliner"));
@@ -163,21 +165,10 @@ static cl::opt<bool> PerformMandatoryInliningsFirst(
cl::desc("Perform mandatory inlinings module-wide, before performing "
"inlining"));
-static cl::opt<bool> EnableO3NonTrivialUnswitching(
- "enable-npm-O3-nontrivial-unswitch", cl::init(true), cl::Hidden,
- cl::desc("Enable non-trivial loop unswitching for -O3"));
-
static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
"eagerly-invalidate-analyses", cl::init(true), cl::Hidden,
cl::desc("Eagerly invalidate more analyses in default pipelines"));
-static cl::opt<bool> EnableNoRerunSimplificationPipeline(
- "enable-no-rerun-simplification-pipeline", cl::init(true), cl::Hidden,
- cl::desc(
- "Prevent running the simplification pipeline on a function more "
- "than once in the case that SCC mutations cause a function to be "
- "visited multiple times as long as the function has not been changed"));
-
static cl::opt<bool> EnableMergeFunctions(
"enable-merge-functions", cl::init(false), cl::Hidden,
cl::desc("Enable function merging as part of the optimization pipeline"));
@@ -263,7 +254,7 @@ static cl::opt<bool>
cl::desc("Enable lowering of the matrix intrinsics"));
static cl::opt<bool> EnableConstraintElimination(
- "enable-constraint-elimination", cl::init(false), cl::Hidden,
+ "enable-constraint-elimination", cl::init(true), cl::Hidden,
cl::desc(
"Enable pass to eliminate conditions based on linear constraints"));
@@ -279,6 +270,10 @@ static cl::opt<AttributorRunOption> AttributorRun(
clEnumValN(AttributorRunOption::NONE, "none",
"disable attributor runs")));
+cl::opt<bool> EnableMemProfContextDisambiguation(
+ "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
+ cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"));
+
PipelineTuningOptions::PipelineTuningOptions() {
LoopInterleaving = true;
LoopVectorization = true;
@@ -288,6 +283,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
LicmMssaOptCap = SetLicmMssaOptCap;
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
CallGraphProfile = true;
+ UnifiedLTO = false;
MergeFunctions = EnableMergeFunctions;
InlinerThreshold = -1;
EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
@@ -303,6 +299,61 @@ void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
for (auto &C : PeepholeEPCallbacks)
C(FPM, Level);
}
+void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(
+ LoopPassManager &LPM, OptimizationLevel Level) {
+ for (auto &C : LateLoopOptimizationsEPCallbacks)
+ C(LPM, Level);
+}
+void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
+ OptimizationLevel Level) {
+ for (auto &C : LoopOptimizerEndEPCallbacks)
+ C(LPM, Level);
+}
+void PassBuilder::invokeScalarOptimizerLateEPCallbacks(
+ FunctionPassManager &FPM, OptimizationLevel Level) {
+ for (auto &C : ScalarOptimizerLateEPCallbacks)
+ C(FPM, Level);
+}
+void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
+ OptimizationLevel Level) {
+ for (auto &C : CGSCCOptimizerLateEPCallbacks)
+ C(CGPM, Level);
+}
+void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
+ OptimizationLevel Level) {
+ for (auto &C : VectorizerStartEPCallbacks)
+ C(FPM, Level);
+}
+void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
+ OptimizationLevel Level) {
+ for (auto &C : OptimizerEarlyEPCallbacks)
+ C(MPM, Level);
+}
+void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
+ OptimizationLevel Level) {
+ for (auto &C : OptimizerLastEPCallbacks)
+ C(MPM, Level);
+}
+void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
+ ModulePassManager &MPM, OptimizationLevel Level) {
+ for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
+ C(MPM, Level);
+}
+void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(
+ ModulePassManager &MPM, OptimizationLevel Level) {
+ for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
+ C(MPM, Level);
+}
+void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
+ OptimizationLevel Level) {
+ for (auto &C : PipelineStartEPCallbacks)
+ C(MPM, Level);
+}
+void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
+ ModulePassManager &MPM, OptimizationLevel Level) {
+ for (auto &C : PipelineEarlySimplificationEPCallbacks)
+ C(MPM, Level);
+}
// Helper to add AnnotationRemarksPass.
static void addAnnotationRemarksPass(ModulePassManager &MPM) {
@@ -322,6 +373,9 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
FunctionPassManager FPM;
+ if (AreStatisticsEnabled())
+ FPM.addPass(CountVisitsPass());
+
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -384,8 +438,7 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
LPM2.addPass(LoopIdiomRecognizePass());
LPM2.addPass(IndVarSimplifyPass());
- for (auto &C : LateLoopOptimizationsEPCallbacks)
- C(LPM2, Level);
+ invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
LPM2.addPass(LoopDeletionPass());
@@ -403,13 +456,8 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
/* OnlyWhenForced= */ !PTO.LoopUnrolling,
PTO.ForgetAllSCEVInLoopUnroll));
- for (auto &C : LoopOptimizerEndEPCallbacks)
- C(LPM2, Level);
+ invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
- // We provide the opt remark emitter pass for LICM to use. We only need to do
- // this once as it is immutable.
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
/*UseMemorySSA=*/true,
/*UseBlockFrequencyInfo=*/true));
@@ -445,8 +493,7 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(CoroElidePass());
- for (auto &C : ScalarOptimizerLateEPCallbacks)
- C(FPM, Level);
+ invokeScalarOptimizerLateEPCallbacks(FPM, Level);
// Finally, do an expensive DCE pass to catch all the dead code exposed by
// the simplifications and basic cleanup after all the simplifications.
@@ -472,6 +519,9 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FunctionPassManager FPM;
+ if (AreStatisticsEnabled())
+ FPM.addPass(CountVisitsPass());
+
// Form SSA out of local memory accesses after breaking apart aggregates into
// scalars.
FPM.addPass(SROAPass(SROAOptions::ModifyCFG));
@@ -502,8 +552,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
FPM.addPass(InstCombinePass());
- if (Level == OptimizationLevel::O3)
- FPM.addPass(AggressiveInstCombinePass());
+ FPM.addPass(AggressiveInstCombinePass());
if (EnableConstraintElimination)
FPM.addPass(ConstraintEliminationPass());
@@ -561,16 +610,14 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true));
LPM1.addPass(
- SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
- EnableO3NonTrivialUnswitching));
+ SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
if (EnableLoopFlatten)
LPM1.addPass(LoopFlattenPass());
LPM2.addPass(LoopIdiomRecognizePass());
LPM2.addPass(IndVarSimplifyPass());
- for (auto &C : LateLoopOptimizationsEPCallbacks)
- C(LPM2, Level);
+ invokeLateLoopOptimizationsEPCallbacks(LPM2, Level);
LPM2.addPass(LoopDeletionPass());
@@ -588,13 +635,8 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
/* OnlyWhenForced= */ !PTO.LoopUnrolling,
PTO.ForgetAllSCEVInLoopUnroll));
- for (auto &C : LoopOptimizerEndEPCallbacks)
- C(LPM2, Level);
+ invokeLoopOptimizerEndEPCallbacks(LPM2, Level);
- // We provide the opt remark emitter pass for LICM to use. We only need to do
- // this once as it is immutable.
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1),
/*UseMemorySSA=*/true,
/*UseBlockFrequencyInfo=*/true));
@@ -654,15 +696,16 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(MemCpyOptPass());
FPM.addPass(DSEPass());
+ FPM.addPass(MoveAutoInitPass());
+
FPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
- /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
+ /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
FPM.addPass(CoroElidePass());
- for (auto &C : ScalarOptimizerLateEPCallbacks)
- C(FPM, Level);
+ invokeScalarOptimizerLateEPCallbacks(FPM, Level);
FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
.convertSwitchRangeToICmp(true)
@@ -671,15 +714,6 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
FPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(FPM, Level);
- // Don't add CHR pass for CSIRInstr build in PostLink as the profile
- // is still the same as the PreLink compilation.
- if (EnableCHR && Level == OptimizationLevel::O3 && PGOOpt &&
- ((PGOOpt->Action == PGOOptions::IRUse &&
- (Phase != ThinOrFullLTOPhase::ThinLTOPostLink ||
- PGOOpt->CSAction != PGOOptions::CSIRInstr)) ||
- PGOOpt->Action == PGOOptions::SampleUse))
- FPM.addPass(ControlHeightReductionPass());
-
return FPM;
}
@@ -692,7 +726,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
OptimizationLevel Level, bool RunProfileGen,
bool IsCS, std::string ProfileFile,
std::string ProfileRemappingFile,
- ThinOrFullLTOPhase LTOPhase) {
+ ThinOrFullLTOPhase LTOPhase,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS) {
assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
if (!IsCS && !DisablePreInliner) {
InlineParams IP;
@@ -730,7 +765,8 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
if (!RunProfileGen) {
assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
- MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+ MPM.addPass(
+ PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
// RequireAnalysisPass for PSI before subsequent non-module passes.
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
@@ -760,13 +796,14 @@ void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
MPM.addPass(InstrProfiling(Options, IsCS));
}
-void PassBuilder::addPGOInstrPassesForO0(ModulePassManager &MPM,
- bool RunProfileGen, bool IsCS,
- std::string ProfileFile,
- std::string ProfileRemappingFile) {
+void PassBuilder::addPGOInstrPassesForO0(
+ ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
+ std::string ProfileFile, std::string ProfileRemappingFile,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS) {
if (!RunProfileGen) {
assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
- MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+ MPM.addPass(
+ PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
// RequireAnalysisPass for PSI before subsequent non-module passes.
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
@@ -840,8 +877,11 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
if (AttributorRun & AttributorRunOption::CGSCC)
MainCGPipeline.addPass(AttributorCGSCCPass());
- // Now deduce any function attributes based in the current code.
- MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
+ // Deduce function attributes. We do another run of this after the function
+ // simplification pipeline, so this only needs to run when it could affect the
+ // function simplification pipeline, which is only the case with recursive
+ // functions.
+ MainCGPipeline.addPass(PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
// When at O3 add argument promotion to the pass pipeline.
// FIXME: It isn't at all clear why this should be limited to O3.
@@ -853,20 +893,29 @@ PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
MainCGPipeline.addPass(OpenMPOptCGSCCPass());
- for (auto &C : CGSCCOptimizerLateEPCallbacks)
- C(MainCGPipeline, Level);
+ invokeCGSCCOptimizerLateEPCallbacks(MainCGPipeline, Level);
- // Lastly, add the core function simplification pipeline nested inside the
+ // Add the core function simplification pipeline nested inside the
// CGSCC walk.
MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
buildFunctionSimplificationPipeline(Level, Phase),
- PTO.EagerlyInvalidateAnalyses, EnableNoRerunSimplificationPipeline));
+ PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
+
+ // Finally, deduce any function attributes based on the fully simplified
+ // function.
+ MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
+
+ // Mark that the function is fully simplified and that it shouldn't be
+ // simplified again if we somehow revisit it due to CGSCC mutations unless
+ // it's been modified since.
+ MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+ RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
MainCGPipeline.addPass(CoroSplitPass(Level != OptimizationLevel::O0));
- if (EnableNoRerunSimplificationPipeline)
- MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
- InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
+ // Make sure we don't affect potential future NoRerun CGSCC adaptors.
+ MIWP.addLateModulePass(createModuleToFunctionPassAdaptor(
+ InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
return MIWP;
}
@@ -913,6 +962,12 @@ PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
ModulePassManager
PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
ThinOrFullLTOPhase Phase) {
+ assert(Level != OptimizationLevel::O0 &&
+ "Should not be used for O0 pipeline");
+
+ assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink &&
+ "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
+
ModulePassManager MPM;
// Place pseudo probe instrumentation as the first pass of the pipeline to
@@ -947,33 +1002,28 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
- // Do basic inference of function attributes from known properties of system
- // libraries and other oracles.
- MPM.addPass(InferFunctionAttrsPass());
- MPM.addPass(CoroEarlyPass());
-
// Create an early function pass manager to cleanup the output of the
- // frontend.
- FunctionPassManager EarlyFPM;
- // Lower llvm.expect to metadata before attempting transforms.
- // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
- EarlyFPM.addPass(LowerExpectIntrinsicPass());
- EarlyFPM.addPass(SimplifyCFGPass());
- EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
- EarlyFPM.addPass(EarlyCSEPass());
- if (Level == OptimizationLevel::O3)
- EarlyFPM.addPass(CallSiteSplittingPass());
-
- // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
- // to convert bitcast to direct calls so that they can be inlined during the
- // profile annotation prepration step.
- // More details about SamplePGO design can be found in:
- // https://research.google.com/pubs/pub45290.html
- // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
- if (LoadSampleProfile)
- EarlyFPM.addPass(InstCombinePass());
- MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM),
- PTO.EagerlyInvalidateAnalyses));
+ // frontend. Not necessary with LTO post link pipelines since the pre link
+ // pipeline already cleaned up the frontend output.
+ if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
+ // Do basic inference of function attributes from known properties of system
+ // libraries and other oracles.
+ MPM.addPass(InferFunctionAttrsPass());
+ MPM.addPass(CoroEarlyPass());
+
+ FunctionPassManager EarlyFPM;
+ // Lower llvm.expect to metadata before attempting transforms.
+ // Compare/branch metadata may alter the behavior of passes like
+ // SimplifyCFG.
+ EarlyFPM.addPass(LowerExpectIntrinsicPass());
+ EarlyFPM.addPass(SimplifyCFGPass());
+ EarlyFPM.addPass(SROAPass(SROAOptions::ModifyCFG));
+ EarlyFPM.addPass(EarlyCSEPass());
+ if (Level == OptimizationLevel::O3)
+ EarlyFPM.addPass(CallSiteSplittingPass());
+ MPM.addPass(createModuleToFunctionPassAdaptor(
+ std::move(EarlyFPM), PTO.EagerlyInvalidateAnalyses));
+ }
if (LoadSampleProfile) {
// Annotate sample profile right after early FPM to ensure freshness of
@@ -985,8 +1035,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
// Do not invoke ICP in the LTOPrelink phase as it makes it hard
// for the profile annotation to be accurate in the LTO backend.
- if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink &&
- Phase != ThinOrFullLTOPhase::FullLTOPreLink)
+ if (!isLTOPreLink(Phase))
// We perform early indirect call promotion here, before globalopt.
// This is important for the ThinLTO backend phase because otherwise
// imported available_externally functions look unreferenced and are
@@ -997,8 +1046,7 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Try to perform OpenMP specific optimizations on the module. This is a
// (quick!) no-op if there are no OpenMP runtime calls present in the module.
- if (Level != OptimizationLevel::O0)
- MPM.addPass(OpenMPOptPass());
+ MPM.addPass(OpenMPOptPass());
if (AttributorRun & AttributorRunOption::MODULE)
MPM.addPass(AttributorPass());
@@ -1009,16 +1057,17 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
- for (auto &C : PipelineEarlySimplificationEPCallbacks)
- C(MPM, Level);
+ invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
// Interprocedural constant propagation now that basic cleanup has occurred
// and prior to optimizing globals.
// FIXME: This position in the pipeline hasn't been carefully considered in
// years, it should be re-analyzed.
- MPM.addPass(IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
- Level != OptimizationLevel::Os &&
- Level != OptimizationLevel::Oz)));
+ MPM.addPass(IPSCCPPass(
+ IPSCCPOptions(/*AllowFuncSpec=*/
+ Level != OptimizationLevel::Os &&
+ Level != OptimizationLevel::Oz &&
+ !isLTOPreLink(Phase))));
// Attach metadata to indirect call sites indicating the set of functions
// they may target at run-time. This should follow IPSCCP.
@@ -1027,19 +1076,13 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
// Optimize globals to try and fold them into constants.
MPM.addPass(GlobalOptPass());
- // Promote any localized globals to SSA registers.
- // FIXME: Should this instead by a run of SROA?
- // FIXME: We should probably run instcombine and simplifycfg afterward to
- // delete control flows that are dead once globals have been folded to
- // constants.
- MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
-
// Create a small function pass pipeline to cleanup after all the global
// optimizations.
FunctionPassManager GlobalCleanupPM;
+ // FIXME: Should this instead by a run of SROA?
+ GlobalCleanupPM.addPass(PromotePass());
GlobalCleanupPM.addPass(InstCombinePass());
invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
-
GlobalCleanupPM.addPass(
SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM),
@@ -1052,13 +1095,17 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
addPGOInstrPasses(MPM, Level,
/* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
/* IsCS */ false, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile, Phase);
+ PGOOpt->ProfileRemappingFile, Phase, PGOOpt->FS);
MPM.addPass(PGOIndirectCallPromotion(false, false));
}
if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
PGOOpt->CSAction == PGOOptions::CSIRInstr)
MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
+ if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
+ !PGOOpt->MemoryProfile.empty())
+ MPM.addPass(MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
+
// Synthesize function entry counts for non-PGO compilation.
if (EnableSyntheticCounts && !PGOOpt)
MPM.addPass(SyntheticCountsPropagation());
@@ -1074,10 +1121,9 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MPM.addPass(CoroCleanupPass());
- if (EnableMemProfiler && Phase != ThinOrFullLTOPhase::ThinLTOPreLink) {
- MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
- MPM.addPass(ModuleMemProfilerPass());
- }
+ // Optimize globals now that functions are fully simplified.
+ MPM.addPass(GlobalOptPass());
+ MPM.addPass(GlobalDCEPass());
return MPM;
}
@@ -1138,8 +1184,6 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
OptimizationLevel::O3));
ExtraPasses.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- ExtraPasses.addPass(
createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/true,
/*UseBlockFrequencyInfo=*/true));
ExtraPasses.addPass(
@@ -1205,34 +1249,32 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level,
// or SimplifyCFG passes scheduled after us, that would cleanup
// the CFG mess this may created if allowed to modify CFG, so forbid that.
FPM.addPass(SROAPass(SROAOptions::PreserveCFG));
- FPM.addPass(InstCombinePass());
- FPM.addPass(
- RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
- FPM.addPass(createFunctionToLoopPassAdaptor(
- LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/true),
- /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
}
+ FPM.addPass(InstCombinePass());
+
+ // This is needed for two reasons:
+ // 1. It works around problems that instcombine introduces, such as sinking
+ // expensive FP divides into loops containing multiplications using the
+ // divide result.
+ // 2. It helps to clean up some loop-invariant code created by the loop
+ // unroll pass when IsFullLTO=false.
+ FPM.addPass(createFunctionToLoopPassAdaptor(
+ LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+ /*AllowSpeculation=*/true),
+ /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
+
// Now that we've vectorized and unrolled loops, we may have more refined
// alignment information, try to re-derive it here.
FPM.addPass(AlignmentFromAssumptionsPass());
-
- if (IsFullLTO)
- FPM.addPass(InstCombinePass());
}
ModulePassManager
PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
ThinOrFullLTOPhase LTOPhase) {
- const bool LTOPreLink = (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink ||
- LTOPhase == ThinOrFullLTOPhase::FullLTOPreLink);
+ const bool LTOPreLink = isLTOPreLink(LTOPhase);
ModulePassManager MPM;
- // Optimize globals now that the module is fully simplified.
- MPM.addPass(GlobalOptPass());
- MPM.addPass(GlobalDCEPass());
-
// Run partial inlining pass to partially inline functions that have
// large bodies.
if (RunPartialInlining)
@@ -1266,11 +1308,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
/* IsCS */ true, PGOOpt->CSProfileGenFile,
- PGOOpt->ProfileRemappingFile, LTOPhase);
+ PGOOpt->ProfileRemappingFile, LTOPhase, PGOOpt->FS);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
/* IsCS */ true, PGOOpt->ProfileFile,
- PGOOpt->ProfileRemappingFile, LTOPhase);
+ PGOOpt->ProfileRemappingFile, LTOPhase, PGOOpt->FS);
}
// Re-compute GlobalsAA here prior to function passes. This is particularly
@@ -1282,8 +1324,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// memory operations.
MPM.addPass(RecomputeGlobalsAAPass());
- for (auto &C : OptimizerEarlyEPCallbacks)
- C(MPM, Level);
+ invokeOptimizerEarlyEPCallbacks(MPM, Level);
FunctionPassManager OptimizePM;
OptimizePM.addPass(Float2IntPass());
@@ -1294,6 +1335,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
OptimizePM.addPass(EarlyCSEPass());
}
+ // CHR pass should only be applied with the profile information.
+ // The check is to check the profile summary information in CHR.
+ if (EnableCHR && Level == OptimizationLevel::O3)
+ OptimizePM.addPass(ControlHeightReductionPass());
+
// FIXME: We need to run some loop optimizations to re-rotate loops after
// simplifycfg and others undo their rotation.
@@ -1301,8 +1347,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// rather than on each loop in an inside-out manner, and so they are actually
// function passes.
- for (auto &C : VectorizerStartEPCallbacks)
- C(OptimizePM, Level);
+ invokeVectorizerStartEPCallbacks(OptimizePM, Level);
LoopPassManager LPM;
// First rotate loops that may have been un-rotated by prior passes.
@@ -1354,8 +1399,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
PTO.EagerlyInvalidateAnalyses));
- for (auto &C : OptimizerLastEPCallbacks)
- C(MPM, Level);
+ invokeOptimizerLastEPCallbacks(MPM, Level);
// Split out cold code. Splitting is done late to avoid hiding context from
// other optimizations and inadvertently regressing performance. The tradeoff
@@ -1396,8 +1440,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
ModulePassManager
PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
bool LTOPreLink) {
- assert(Level != OptimizationLevel::O0 &&
- "Must request optimizations for the default pipeline!");
+ if (Level == OptimizationLevel::O0)
+ return buildO0DefaultPipeline(Level, LTOPreLink);
ModulePassManager MPM;
@@ -1407,13 +1451,12 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
// Force any function attributes we want the rest of the pipeline to observe.
MPM.addPass(ForceFunctionAttrsPass());
- // Apply module pipeline start EP callback.
- for (auto &C : PipelineStartEPCallbacks)
- C(MPM, Level);
-
if (PGOOpt && PGOOpt->DebugInfoForProfiling)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+ // Apply module pipeline start EP callback.
+ invokePipelineStartEPCallbacks(MPM, Level);
+
const ThinOrFullLTOPhase LTOPhase = LTOPreLink
? ThinOrFullLTOPhase::FullLTOPreLink
: ThinOrFullLTOPhase::None;
@@ -1432,14 +1475,25 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
if (LTOPreLink)
addRequiredLTOPreLinkPasses(MPM);
+ return MPM;
+}
+ModulePassManager
+PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
+ bool EmitSummary) {
+ ModulePassManager MPM;
+ MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary,
+ ThinLTO
+ ? buildThinLTOPreLinkDefaultPipeline(Level)
+ : buildLTOPreLinkDefaultPipeline(Level)));
+ MPM.addPass(buildPerModuleDefaultPipeline(Level));
return MPM;
}
ModulePassManager
PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
- assert(Level != OptimizationLevel::O0 &&
- "Must request optimizations for the default pipeline!");
+ if (Level == OptimizationLevel::O0)
+ return buildO0DefaultPipeline(Level, /*LTOPreLink*/true);
ModulePassManager MPM;
@@ -1453,8 +1507,7 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
// Apply module pipeline start EP callback.
- for (auto &C : PipelineStartEPCallbacks)
- C(MPM, Level);
+ invokePipelineStartEPCallbacks(MPM, Level);
// If we are planning to perform ThinLTO later, we don't bloat the code with
// unrolling/vectorization/... now. Just simplify the module as much as we
@@ -1472,9 +1525,6 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
if (RunPartialInlining)
MPM.addPass(PartialInlinerPass());
- // Reduce the size of the IR as much as possible.
- MPM.addPass(GlobalOptPass());
-
if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
PGOOpt->Action == PGOOptions::SampleUse)
MPM.addPass(PseudoProbeUpdatePass());
@@ -1482,10 +1532,8 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
// Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
// optimization is going to be done in PostLink stage, but clang can't add
// callbacks there in case of in-process ThinLTO called by linker.
- for (auto &C : OptimizerEarlyEPCallbacks)
- C(MPM, Level);
- for (auto &C : OptimizerLastEPCallbacks)
- C(MPM, Level);
+ invokeOptimizerEarlyEPCallbacks(MPM, Level);
+ invokeOptimizerLastEPCallbacks(MPM, Level);
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
@@ -1499,10 +1547,12 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
ModulePassManager MPM;
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
-
if (ImportSummary) {
+ // For ThinLTO we must apply the context disambiguation decisions early, to
+ // ensure we can correctly match the callsites to summary data.
+ if (EnableMemProfContextDisambiguation)
+ MPM.addPass(MemProfContextDisambiguation(ImportSummary));
+
// These passes import type identifier resolutions for whole-program
// devirtualization and CFI. They must run early because other passes may
// disturb the specific instruction patterns that these passes look for,
@@ -1534,9 +1584,6 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
return MPM;
}
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
-
// Add the core simplification pipeline.
MPM.addPass(buildModuleSimplificationPipeline(
Level, ThinOrFullLTOPhase::ThinLTOPostLink));
@@ -1553,8 +1600,6 @@ ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
ModulePassManager
PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
- assert(Level != OptimizationLevel::O0 &&
- "Must request optimizations for the default pipeline!");
// FIXME: We should use a customized pre-link pipeline!
return buildPerModuleDefaultPipeline(Level,
/* LTOPreLink */ true);
@@ -1565,11 +1610,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
ModuleSummaryIndex *ExportSummary) {
ModulePassManager MPM;
- // Convert @llvm.global.annotations to !annotation metadata.
- MPM.addPass(Annotation2MetadataPass());
-
- for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
- C(MPM, Level);
+ invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
// Create a function that performs CFI checks for cross-DSO calls with targets
// in the current module.
@@ -1584,8 +1625,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// in ICP.
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
- for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
- C(MPM, Level);
+ invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
@@ -1604,14 +1644,11 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
}
// Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
- MPM.addPass(OpenMPOptPass());
+ MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
// Remove unused virtual tables to improve the quality of code generated by
// whole-program devirtualization and bitset lowering.
- MPM.addPass(GlobalDCEPass());
-
- // Force any function attributes we want the rest of the pipeline to observe.
- MPM.addPass(ForceFunctionAttrsPass());
+ MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
// Do basic inference of function attributes from known properties of system
// libraries and other oracles.
@@ -1666,8 +1703,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// pipeline).
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
- for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
- C(MPM, Level);
+ invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
@@ -1685,13 +1721,16 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// keep one copy of each constant.
MPM.addPass(ConstantMergePass());
+ // Remove unused arguments from functions.
+ MPM.addPass(DeadArgumentEliminationPass());
+
// Reduce the code after globalopt and ipsccp. Both can open up significant
// simplification opportunities, and both can propagate functions through
// function pointers. When this happens, we often have to resolve varargs
// calls, etc, so let instcombine do this.
FunctionPassManager PeepholeFPM;
PeepholeFPM.addPass(InstCombinePass());
- if (Level == OptimizationLevel::O3)
+ if (Level.getSpeedupLevel() > 1)
PeepholeFPM.addPass(AggressiveInstCombinePass());
invokePeepholeEPCallbacks(PeepholeFPM, Level);
@@ -1703,25 +1742,37 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// valuable as the inliner doesn't currently care whether it is inlining an
// invoke or a call.
// Run the inliner now.
- MPM.addPass(ModuleInlinerWrapperPass(
- getInlineParamsFromOptLevel(Level),
- /* MandatoryFirst */ true,
- InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,
- InlinePass::CGSCCInliner}));
+ if (EnableModuleInliner) {
+ MPM.addPass(ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
+ UseInlineAdvisor,
+ ThinOrFullLTOPhase::FullLTOPostLink));
+ } else {
+ MPM.addPass(ModuleInlinerWrapperPass(
+ getInlineParamsFromOptLevel(Level),
+ /* MandatoryFirst */ true,
+ InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,
+ InlinePass::CGSCCInliner}));
+ }
+
+ // Perform context disambiguation after inlining, since that would reduce the
+ // amount of additional cloning required to distinguish the allocation
+ // contexts.
+ if (EnableMemProfContextDisambiguation)
+ MPM.addPass(MemProfContextDisambiguation());
// Optimize globals again after we ran the inliner.
MPM.addPass(GlobalOptPass());
+ // Run the OpenMPOpt pass again after global optimizations.
+ MPM.addPass(OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
+
// Garbage collect dead functions.
- MPM.addPass(GlobalDCEPass());
+ MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
// If we didn't decide to inline a function, check to see if we can
// transform it to pass arguments by value instead of by reference.
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(ArgumentPromotionPass()));
- // Remove unused arguments from functions.
- MPM.addPass(DeadArgumentEliminationPass());
-
FunctionPassManager FPM;
// The IPO Passes may leave cruft around. Clean up after them.
FPM.addPass(InstCombinePass());
@@ -1739,12 +1790,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
addPGOInstrPasses(MPM, Level, /* RunProfileGen */ true,
/* IsCS */ true, PGOOpt->CSProfileGenFile,
PGOOpt->ProfileRemappingFile,
- ThinOrFullLTOPhase::FullLTOPostLink);
+ ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS);
else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
addPGOInstrPasses(MPM, Level, /* RunProfileGen */ false,
/* IsCS */ true, PGOOpt->ProfileFile,
PGOOpt->ProfileRemappingFile,
- ThinOrFullLTOPhase::FullLTOPostLink);
+ ThinOrFullLTOPhase::FullLTOPostLink, PGOOpt->FS);
}
// Break up allocas
@@ -1773,7 +1824,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
MainFPM.addPass(createFunctionToLoopPassAdaptor(
LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
/*AllowSpeculation=*/true),
- /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
+ /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
if (RunNewGVN)
MainFPM.addPass(NewGVNPass());
@@ -1785,6 +1836,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
// Nuke dead stores.
MainFPM.addPass(DSEPass());
+ MainFPM.addPass(MoveAutoInitPass());
MainFPM.addPass(MergedLoadStoreMotionPass());
LoopPassManager LPM;
@@ -1808,7 +1860,8 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
addVectorPasses(Level, MainFPM, /* IsFullLTO */ true);
// Run the OpenMPOpt CGSCC pass again late.
- MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(OpenMPOptCGSCCPass()));
+ MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+ OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
invokePeepholeEPCallbacks(MainFPM, Level);
MainFPM.addPass(JumpThreadingPass());
@@ -1829,16 +1882,30 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
MPM.addPass(HotColdSplittingPass());
// Add late LTO optimization passes.
+ FunctionPassManager LateFPM;
+
+ // LoopSink pass sinks instructions hoisted by LICM, which serves as a
+ // canonicalization pass that enables other optimizations. As a result,
+ // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
+ // result too early.
+ LateFPM.addPass(LoopSinkPass());
+
+ // This hoists/decomposes div/rem ops. It should run after other sink/hoist
+ // passes to avoid re-sinking, but before SimplifyCFG because it can allow
+ // flattening of blocks.
+ LateFPM.addPass(DivRemPairsPass());
+
// Delete basic blocks, which optimization passes may have killed.
- MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass(
+ LateFPM.addPass(SimplifyCFGPass(
SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
- true))));
+ true)));
+ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
// Drop bodies of available eternally objects to improve GlobalDCE.
MPM.addPass(EliminateAvailableExternallyPass());
// Now that we have optimized the program, discard unreachable functions.
- MPM.addPass(GlobalDCEPass());
+ MPM.addPass(GlobalDCEPass(/*InLTOPostLink=*/true));
if (PTO.MergeFunctions)
MPM.addPass(MergeFunctionsPass());
@@ -1846,8 +1913,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
if (PTO.CallGraphProfile)
MPM.addPass(CGProfilePass());
- for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
- C(MPM, Level);
+ invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
@@ -1874,16 +1940,15 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
addPGOInstrPassesForO0(
MPM,
/* RunProfileGen */ (PGOOpt->Action == PGOOptions::IRInstr),
- /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile);
+ /* IsCS */ false, PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
+ PGOOpt->FS);
- for (auto &C : PipelineStartEPCallbacks)
- C(MPM, Level);
+ invokePipelineStartEPCallbacks(MPM, Level);
if (PGOOpt && PGOOpt->DebugInfoForProfiling)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
- for (auto &C : PipelineEarlySimplificationEPCallbacks)
- C(MPM, Level);
+ invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
// Build a minimal pipeline based on the semantics required by LLVM,
// which is just that always inlining occurs. Further, disable generating
@@ -1901,15 +1966,13 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
if (!CGSCCOptimizerLateEPCallbacks.empty()) {
CGSCCPassManager CGPM;
- for (auto &C : CGSCCOptimizerLateEPCallbacks)
- C(CGPM, Level);
+ invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
if (!CGPM.isEmpty())
MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
}
if (!LateLoopOptimizationsEPCallbacks.empty()) {
LoopPassManager LPM;
- for (auto &C : LateLoopOptimizationsEPCallbacks)
- C(LPM, Level);
+ invokeLateLoopOptimizationsEPCallbacks(LPM, Level);
if (!LPM.isEmpty()) {
MPM.addPass(createModuleToFunctionPassAdaptor(
createFunctionToLoopPassAdaptor(std::move(LPM))));
@@ -1917,8 +1980,7 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
}
if (!LoopOptimizerEndEPCallbacks.empty()) {
LoopPassManager LPM;
- for (auto &C : LoopOptimizerEndEPCallbacks)
- C(LPM, Level);
+ invokeLoopOptimizerEndEPCallbacks(LPM, Level);
if (!LPM.isEmpty()) {
MPM.addPass(createModuleToFunctionPassAdaptor(
createFunctionToLoopPassAdaptor(std::move(LPM))));
@@ -1926,19 +1988,16 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
}
if (!ScalarOptimizerLateEPCallbacks.empty()) {
FunctionPassManager FPM;
- for (auto &C : ScalarOptimizerLateEPCallbacks)
- C(FPM, Level);
+ invokeScalarOptimizerLateEPCallbacks(FPM, Level);
if (!FPM.isEmpty())
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
- for (auto &C : OptimizerEarlyEPCallbacks)
- C(MPM, Level);
+ invokeOptimizerEarlyEPCallbacks(MPM, Level);
if (!VectorizerStartEPCallbacks.empty()) {
FunctionPassManager FPM;
- for (auto &C : VectorizerStartEPCallbacks)
- C(FPM, Level);
+ invokeVectorizerStartEPCallbacks(FPM, Level);
if (!FPM.isEmpty())
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
@@ -1952,8 +2011,7 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
CoroPM.addPass(GlobalDCEPass());
MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
- for (auto &C : OptimizerLastEPCallbacks)
- C(MPM, Level);
+ invokeOptimizerLastEPCallbacks(MPM, Level);
if (LTOPreLink)
addRequiredLTOPreLinkPasses(MPM);
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index ad44d86ea1a7..e10dc995c493 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -44,6 +44,7 @@ MODULE_PASS("always-inline", AlwaysInlinerPass())
MODULE_PASS("attributor", AttributorPass())
MODULE_PASS("annotation2metadata", Annotation2MetadataPass())
MODULE_PASS("openmp-opt", OpenMPOptPass())
+MODULE_PASS("openmp-opt-postlink", OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink))
MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
MODULE_PASS("cg-profile", CGProfilePass())
@@ -59,7 +60,6 @@ MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
MODULE_PASS("extract-blocks", BlockExtractorPass({}, false))
MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
MODULE_PASS("function-import", FunctionImportPass())
-MODULE_PASS("globaldce", GlobalDCEPass())
MODULE_PASS("globalopt", GlobalOptPass())
MODULE_PASS("globalsplit", GlobalSplitPass())
MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
@@ -86,6 +86,7 @@ MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
MODULE_PASS("no-op-module", NoOpModulePass())
MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass())
MODULE_PASS("partial-inliner", PartialInlinerPass())
+MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation())
MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion())
MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen())
MODULE_PASS("pgo-instr-use", PGOInstrumentationUse())
@@ -140,6 +141,13 @@ MODULE_PASS_WITH_PARAMS("loop-extract",
},
parseLoopExtractorPassOptions,
"single")
+MODULE_PASS_WITH_PARAMS("globaldce",
+ "GlobalDCEPass",
+ [](bool InLTOPostLink) {
+ return GlobalDCEPass(InLTOPostLink);
+ },
+ parseGlobalDCEPassOptions,
+ "in-lto-post-link")
MODULE_PASS_WITH_PARAMS("hwasan",
"HWAddressSanitizerPass",
[](HWAddressSanitizerOptions Opts) {
@@ -168,6 +176,20 @@ MODULE_PASS_WITH_PARAMS("ipsccp",
},
parseIPSCCPOptions,
"no-func-spec;func-spec")
+MODULE_PASS_WITH_PARAMS("embed-bitcode",
+ "EmbedBitcodePass",
+ [](EmbedBitcodeOptions Opts) {
+ return EmbedBitcodePass(Opts);
+ },
+ parseEmbedBitcodePassOptions,
+ "thinlto;emit-summary")
+MODULE_PASS_WITH_PARAMS("memprof-use",
+ "MemProfUsePass",
+ [](std::string Opts) {
+ return MemProfUsePass(Opts);
+ },
+ parseMemProfUsePassOptions,
+ "profile-filename=S")
#undef MODULE_PASS_WITH_PARAMS
#ifndef CGSCC_ANALYSIS
@@ -183,7 +205,6 @@ CGSCC_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
#endif
CGSCC_PASS("argpromotion", ArgumentPromotionPass())
CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
-CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
CGSCC_PASS("attributor-cgscc", AttributorCGSCCPass())
CGSCC_PASS("openmp-opt-cgscc", OpenMPOptCGSCCPass())
CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
@@ -206,6 +227,13 @@ CGSCC_PASS_WITH_PARAMS("coro-split",
},
parseCoroSplitPassOptions,
"reuse-storage")
+CGSCC_PASS_WITH_PARAMS("function-attrs",
+ "PostOrderFunctionAttrsPass",
+ [](bool SkipNonRecursive) {
+ return PostOrderFunctionAttrsPass(SkipNonRecursive);
+ },
+ parsePostOrderFunctionAttrsPassOptions,
+ "skip-non-recursive")
#undef CGSCC_PASS_WITH_PARAMS
#ifndef FUNCTION_ANALYSIS
@@ -241,7 +269,6 @@ FUNCTION_ANALYSIS("targetir",
TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis())
FUNCTION_ANALYSIS("verify", VerifierAnalysis())
FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
-FUNCTION_ANALYSIS("divergence", DivergenceAnalysis())
FUNCTION_ANALYSIS("uniformity", UniformityInfoAnalysis())
#ifndef FUNCTION_ALIAS_ANALYSIS
@@ -272,6 +299,7 @@ FUNCTION_PASS("bounds-checking", BoundsCheckingPass())
FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass())
FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass())
FUNCTION_PASS("consthoist", ConstantHoistingPass())
+FUNCTION_PASS("count-visits", CountVisitsPass())
FUNCTION_PASS("constraint-elimination", ConstraintEliminationPass())
FUNCTION_PASS("chr", ControlHeightReductionPass())
FUNCTION_PASS("coro-elide", CoroElidePass())
@@ -308,7 +336,6 @@ FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass())
FUNCTION_PASS("lint", LintPass())
FUNCTION_PASS("inject-tli-mappings", InjectTLIMappings())
FUNCTION_PASS("instnamer", InstructionNamerPass())
-FUNCTION_PASS("legacy-divergence-analysis", LegacyDivergenceAnalysisPass())
FUNCTION_PASS("loweratomic", LowerAtomicPass())
FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
@@ -324,6 +351,7 @@ FUNCTION_PASS("mem2reg", PromotePass())
FUNCTION_PASS("memcpyopt", MemCpyOptPass())
FUNCTION_PASS("mergeicmps", MergeICmpsPass())
FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass())
+FUNCTION_PASS("move-auto-init", MoveAutoInitPass())
FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
FUNCTION_PASS("newgvn", NewGVNPass())
FUNCTION_PASS("jump-threading", JumpThreadingPass())
@@ -340,6 +368,7 @@ FUNCTION_PASS("objc-arc-contract", ObjCARCContractPass())
FUNCTION_PASS("objc-arc-expand", ObjCARCExpandPass())
FUNCTION_PASS("pa-eval", PAEvalPass())
FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt())
+FUNCTION_PASS("place-safepoints", PlaceSafepointsPass())
FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs()))
@@ -347,7 +376,6 @@ FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
FUNCTION_PASS("print<cost-model>", CostModelPrinterPass(dbgs()))
FUNCTION_PASS("print<cycles>", CycleInfoPrinterPass(dbgs()))
FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
-FUNCTION_PASS("print<divergence>", DivergenceAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs()))
FUNCTION_PASS("print<delinearization>", DelinearizationPrinterPass(dbgs()))
@@ -358,7 +386,6 @@ FUNCTION_PASS("print<inline-cost>", InlineCostAnnotationPrinterPass(dbgs()))
FUNCTION_PASS("print<inliner-size-estimator>",
InlineSizeEstimatorAnalysisPrinterPass(dbgs()))
FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs()))
-FUNCTION_PASS("print<memoryssa>", MemorySSAPrinterPass(dbgs()))
FUNCTION_PASS("print<memoryssa-walker>", MemorySSAWalkerPrinterPass(dbgs()))
FUNCTION_PASS("print<phi-values>", PhiValuesPrinterPass(dbgs()))
FUNCTION_PASS("print<regions>", RegionInfoPrinterPass(dbgs()))
@@ -422,6 +449,18 @@ FUNCTION_PASS_WITH_PARAMS("ee-instrument",
},
parseEntryExitInstrumenterPassOptions,
"post-inline")
+FUNCTION_PASS_WITH_PARAMS("hardware-loops",
+ "HardwareLoopsPass",
+ [](HardwareLoopOptions Opts) {
+ return HardwareLoopsPass(Opts);
+ },
+ parseHardwareLoopOptions,
+ "force-hardware-loops;"
+ "force-hardware-loop-phi;"
+ "force-nested-hardware-loop;"
+ "force-hardware-loop-guard;"
+ "hardware-loop-decrement=N;"
+ "hardware-loop-counter-bitwidth=N")
FUNCTION_PASS_WITH_PARAMS("lower-matrix-intrinsics",
"LowerMatrixIntrinsicsPass",
[](bool Minimal) {
@@ -463,6 +502,15 @@ FUNCTION_PASS_WITH_PARAMS("loop-vectorize",
parseLoopVectorizeOptions,
"no-interleave-forced-only;interleave-forced-only;"
"no-vectorize-forced-only;vectorize-forced-only")
+FUNCTION_PASS_WITH_PARAMS("instcombine",
+ "InstCombinePass",
+ [](InstCombineOptions Opts) {
+ return InstCombinePass(Opts);
+ },
+ parseInstCombineOptions,
+ "no-use-loop-info;use-loop-info;"
+ "max-iterations=N"
+ )
FUNCTION_PASS_WITH_PARAMS("mldst-motion",
"MergedLoadStoreMotionPass",
[](MergedLoadStoreMotionOptions Opts) {
@@ -501,6 +549,27 @@ FUNCTION_PASS_WITH_PARAMS("print<da>",
},
parseDependenceAnalysisPrinterOptions,
"normalized-results")
+FUNCTION_PASS_WITH_PARAMS("separate-const-offset-from-gep",
+ "SeparateConstOffsetFromGEPPass",
+ [](bool LowerGEP) {
+ return SeparateConstOffsetFromGEPPass(LowerGEP);
+ },
+ parseSeparateConstOffsetFromGEPPassOptions,
+ "lower-gep")
+FUNCTION_PASS_WITH_PARAMS("function-simplification",
+ "",
+ [this](OptimizationLevel OL) {
+ return buildFunctionSimplificationPipeline(OL, ThinOrFullLTOPhase::None);
+ },
+ parseFunctionSimplificationPipelineOptions,
+ "O1;O2;O3;Os;Oz")
+FUNCTION_PASS_WITH_PARAMS("print<memoryssa>",
+ "MemorySSAPrinterPass",
+ [](bool NoEnsureOptimizedUses) {
+ return MemorySSAPrinterPass(dbgs(), !NoEnsureOptimizedUses);
+ },
+ parseMemorySSAPrinterPassOptions,
+ "no-ensure-optimized-uses")
#undef FUNCTION_PASS_WITH_PARAMS
#ifndef LOOPNEST_PASS
@@ -529,7 +598,6 @@ LOOP_PASS("dot-ddg", DDGDotPrinterPass())
LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass())
LOOP_PASS("loop-idiom", LoopIdiomRecognizePass())
LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass())
-LOOP_PASS("loop-rotate", LoopRotatePass())
LOOP_PASS("no-op-loop", NoOpLoopPass())
LOOP_PASS("print", PrintLoopPass(dbgs()))
LOOP_PASS("loop-deletion", LoopDeletionPass())
@@ -572,4 +640,12 @@ LOOP_PASS_WITH_PARAMS("lnicm", "LNICMPass",
},
parseLICMOptions,
"allowspeculation");
+
+LOOP_PASS_WITH_PARAMS("loop-rotate",
+ "LoopRotatePass",
+ [](std::pair<bool, bool> Params) {
+ return LoopRotatePass(Params.first, Params.second);
+ },
+ parseLoopRotateOptions,
+ "no-header-duplication;header-duplication;no-prepare-for-lto;prepare-for-lto")
#undef LOOP_PASS_WITH_PARAMS
diff --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index ad2504eca2fb..7eef511928ec 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -20,11 +20,11 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassInstrumentation.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PrintPasses.h"
+#include "llvm/IR/StructuralHash.h"
#include "llvm/IR/Verifier.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/CrashRecoveryContext.h"
@@ -44,14 +44,14 @@
using namespace llvm;
-cl::opt<bool> PreservedCFGCheckerInstrumentation::VerifyPreservedCFG(
- "verify-cfg-preserved", cl::Hidden,
-#ifdef NDEBUG
- cl::init(false)
+static cl::opt<bool> VerifyAnalysisInvalidation("verify-analysis-invalidation",
+ cl::Hidden,
+#ifdef EXPENSIVE_CHECKS
+ cl::init(true)
#else
- cl::init(true)
+ cl::init(false)
#endif
- );
+);
// An option that supports the -print-changed option. See
// the description for -print-changed for an explanation of the use
@@ -96,16 +96,30 @@ static cl::opt<std::string> DotCfgDir(
cl::desc("Generate dot files into specified directory for changed IRs"),
cl::Hidden, cl::init("./"));
-// An option to print the IR that was being processed when a pass crashes.
-static cl::opt<bool>
- PrintCrashIR("print-on-crash",
- cl::desc("Print the last form of the IR before crash"),
- cl::Hidden);
+// Options to print the IR that was being processed when a pass crashes.
+static cl::opt<std::string> PrintOnCrashPath(
+ "print-on-crash-path",
+ cl::desc("Print the last form of the IR before crash to a file"),
+ cl::Hidden);
+
+static cl::opt<bool> PrintOnCrash(
+ "print-on-crash",
+ cl::desc("Print the last form of the IR before crash (use -print-on-crash-path to dump to a file)"),
+ cl::Hidden);
static cl::opt<std::string> OptBisectPrintIRPath(
"opt-bisect-print-ir-path",
cl::desc("Print IR to path when opt-bisect-limit is reached"), cl::Hidden);
+static cl::opt<bool> PrintPassNumbers(
+ "print-pass-numbers", cl::init(false), cl::Hidden,
+ cl::desc("Print pass names and their ordinals"));
+
+static cl::opt<unsigned>
+ PrintAtPassNumber("print-at-pass-number", cl::init(0), cl::Hidden,
+ cl::desc("Print IR at pass with this number as "
+ "reported by print-passes-names"));
+
namespace {
// An option for specifying an executable that will be called with the IR
@@ -691,13 +705,19 @@ void PrintIRInstrumentation::printBeforePass(StringRef PassID, Any IR) {
// Note: here we rely on a fact that we do not change modules while
// traversing the pipeline, so the latest captured module is good
// for all print operations that has not happen yet.
- if (shouldPrintAfterPass(PassID))
+ if (shouldPrintPassNumbers() || shouldPrintAtPassNumber() ||
+ shouldPrintAfterPass(PassID))
pushModuleDesc(PassID, IR);
- if (!shouldPrintBeforePass(PassID))
+ if (!shouldPrintIR(IR))
return;
- if (!shouldPrintIR(IR))
+ ++CurrentPassNumber;
+
+ if (shouldPrintPassNumbers())
+ dbgs() << " Running pass " << CurrentPassNumber << " " << PassID << "\n";
+
+ if (!shouldPrintBeforePass(PassID))
return;
dbgs() << "*** IR Dump Before " << PassID << " on " << getIRName(IR)
@@ -709,7 +729,8 @@ void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) {
if (isIgnored(PassID))
return;
- if (!shouldPrintAfterPass(PassID))
+ if (!shouldPrintAfterPass(PassID) && !shouldPrintPassNumbers() &&
+ !shouldPrintAtPassNumber())
return;
const Module *M;
@@ -718,19 +739,23 @@ void PrintIRInstrumentation::printAfterPass(StringRef PassID, Any IR) {
std::tie(M, IRName, StoredPassID) = popModuleDesc(PassID);
assert(StoredPassID == PassID && "mismatched PassID");
- if (!shouldPrintIR(IR))
+ if (!shouldPrintIR(IR) || !shouldPrintAfterPass(PassID))
return;
- dbgs() << "*** IR Dump After " << PassID << " on " << IRName << " ***\n";
+ dbgs() << "*** IR Dump "
+ << (shouldPrintAtPassNumber()
+ ? StringRef(formatv("At {0}-{1}", CurrentPassNumber, PassID))
+ : StringRef(formatv("After {0}", PassID)))
+ << " on " << IRName << " ***\n";
unwrapAndPrint(dbgs(), IR);
}
void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) {
- StringRef PassName = PIC->getPassNameForClassName(PassID);
- if (!shouldPrintAfterPass(PassName))
+ if (isIgnored(PassID))
return;
- if (isIgnored(PassID))
+ if (!shouldPrintAfterPass(PassID) && !shouldPrintPassNumbers() &&
+ !shouldPrintAtPassNumber())
return;
const Module *M;
@@ -740,11 +765,16 @@ void PrintIRInstrumentation::printAfterPassInvalidated(StringRef PassID) {
assert(StoredPassID == PassID && "mismatched PassID");
// Additional filtering (e.g. -filter-print-func) can lead to module
// printing being skipped.
- if (!M)
+ if (!M || !shouldPrintAfterPass(PassID))
return;
- SmallString<20> Banner =
- formatv("*** IR Dump After {0} on {1} (invalidated) ***", PassID, IRName);
+ SmallString<20> Banner;
+ if (shouldPrintAtPassNumber())
+ Banner = formatv("*** IR Dump At {0}-{1} on {2} (invalidated) ***",
+ CurrentPassNumber, PassID, IRName);
+ else
+ Banner = formatv("*** IR Dump After {0} on {1} (invalidated) ***",
+ PassID, IRName);
dbgs() << Banner << "\n";
printIR(dbgs(), M);
}
@@ -761,21 +791,34 @@ bool PrintIRInstrumentation::shouldPrintAfterPass(StringRef PassID) {
if (shouldPrintAfterAll())
return true;
+ if (shouldPrintAtPassNumber() && CurrentPassNumber == PrintAtPassNumber)
+ return true;
+
StringRef PassName = PIC->getPassNameForClassName(PassID);
return is_contained(printAfterPasses(), PassName);
}
+bool PrintIRInstrumentation::shouldPrintPassNumbers() {
+ return PrintPassNumbers;
+}
+
+bool PrintIRInstrumentation::shouldPrintAtPassNumber() {
+ return PrintAtPassNumber > 0;
+}
+
void PrintIRInstrumentation::registerCallbacks(
PassInstrumentationCallbacks &PIC) {
this->PIC = &PIC;
// BeforePass callback is not just for printing, it also saves a Module
// for later use in AfterPassInvalidated.
- if (shouldPrintBeforeSomePass() || shouldPrintAfterSomePass())
+ if (shouldPrintPassNumbers() || shouldPrintAtPassNumber() ||
+ shouldPrintBeforeSomePass() || shouldPrintAfterSomePass())
PIC.registerBeforeNonSkippedPassCallback(
[this](StringRef P, Any IR) { this->printBeforePass(P, IR); });
- if (shouldPrintAfterSomePass()) {
+ if (shouldPrintPassNumbers() || shouldPrintAtPassNumber() ||
+ shouldPrintAfterSomePass()) {
PIC.registerAfterPassCallback(
[this](StringRef P, Any IR, const PreservedAnalyses &) {
this->printAfterPass(P, IR);
@@ -1050,6 +1093,40 @@ public:
AnalysisKey PreservedCFGCheckerAnalysis::Key;
+struct PreservedFunctionHashAnalysis
+ : public AnalysisInfoMixin<PreservedFunctionHashAnalysis> {
+ static AnalysisKey Key;
+
+ struct FunctionHash {
+ uint64_t Hash;
+ };
+
+ using Result = FunctionHash;
+
+ Result run(Function &F, FunctionAnalysisManager &FAM) {
+ return Result{StructuralHash(F)};
+ }
+};
+
+AnalysisKey PreservedFunctionHashAnalysis::Key;
+
+struct PreservedModuleHashAnalysis
+ : public AnalysisInfoMixin<PreservedModuleHashAnalysis> {
+ static AnalysisKey Key;
+
+ struct ModuleHash {
+ uint64_t Hash;
+ };
+
+ using Result = ModuleHash;
+
+ Result run(Module &F, ModuleAnalysisManager &FAM) {
+ return Result{StructuralHash(F)};
+ }
+};
+
+AnalysisKey PreservedModuleHashAnalysis::Key;
+
bool PreservedCFGCheckerInstrumentation::CFG::invalidate(
Function &F, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &) {
@@ -1058,39 +1135,52 @@ bool PreservedCFGCheckerInstrumentation::CFG::invalidate(
PAC.preservedSet<CFGAnalyses>());
}
-void PreservedCFGCheckerInstrumentation::registerCallbacks(
- PassInstrumentationCallbacks &PIC, FunctionAnalysisManager &FAM) {
- if (!VerifyPreservedCFG)
- return;
-
- FAM.registerPass([&] { return PreservedCFGCheckerAnalysis(); });
+static SmallVector<Function *, 1> GetFunctions(Any IR) {
+ SmallVector<Function *, 1> Functions;
- auto checkCFG = [](StringRef Pass, StringRef FuncName, const CFG &GraphBefore,
- const CFG &GraphAfter) {
- if (GraphAfter == GraphBefore)
- return;
+ if (const auto **MaybeF = any_cast<const Function *>(&IR)) {
+ Functions.push_back(*const_cast<Function **>(MaybeF));
+ } else if (const auto **MaybeM = any_cast<const Module *>(&IR)) {
+ for (Function &F : **const_cast<Module **>(MaybeM))
+ Functions.push_back(&F);
+ }
+ return Functions;
+}
- dbgs() << "Error: " << Pass
- << " does not invalidate CFG analyses but CFG changes detected in "
- "function @"
- << FuncName << ":\n";
- CFG::printDiff(dbgs(), GraphBefore, GraphAfter);
- report_fatal_error(Twine("CFG unexpectedly changed by ", Pass));
- };
+void PreservedCFGCheckerInstrumentation::registerCallbacks(
+ PassInstrumentationCallbacks &PIC, ModuleAnalysisManager &MAM) {
+ if (!VerifyAnalysisInvalidation)
+ return;
- PIC.registerBeforeNonSkippedPassCallback(
- [this, &FAM](StringRef P, Any IR) {
+ bool Registered = false;
+ PIC.registerBeforeNonSkippedPassCallback([this, &MAM, Registered](
+ StringRef P, Any IR) mutable {
#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
- assert(&PassStack.emplace_back(P));
+ assert(&PassStack.emplace_back(P));
#endif
- (void)this;
- const auto **F = any_cast<const Function *>(&IR);
- if (!F)
- return;
+ (void)this;
- // Make sure a fresh CFG snapshot is available before the pass.
- FAM.getResult<PreservedCFGCheckerAnalysis>(*const_cast<Function *>(*F));
- });
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(
+ *const_cast<Module *>(unwrapModule(IR, /*Force=*/true)))
+ .getManager();
+ if (!Registered) {
+ FAM.registerPass([&] { return PreservedCFGCheckerAnalysis(); });
+ FAM.registerPass([&] { return PreservedFunctionHashAnalysis(); });
+ MAM.registerPass([&] { return PreservedModuleHashAnalysis(); });
+ Registered = true;
+ }
+
+ for (Function *F : GetFunctions(IR)) {
+ // Make sure a fresh CFG snapshot is available before the pass.
+ FAM.getResult<PreservedCFGCheckerAnalysis>(*F);
+ FAM.getResult<PreservedFunctionHashAnalysis>(*F);
+ }
+
+ if (auto *MaybeM = any_cast<const Module *>(&IR)) {
+ Module &M = **const_cast<Module **>(MaybeM);
+ MAM.getResult<PreservedModuleHashAnalysis>(M);
+ }
+ });
PIC.registerAfterPassInvalidatedCallback(
[this](StringRef P, const PreservedAnalyses &PassPA) {
@@ -1101,27 +1191,60 @@ void PreservedCFGCheckerInstrumentation::registerCallbacks(
(void)this;
});
- PIC.registerAfterPassCallback([this, &FAM,
- checkCFG](StringRef P, Any IR,
- const PreservedAnalyses &PassPA) {
+ PIC.registerAfterPassCallback([this, &MAM](StringRef P, Any IR,
+ const PreservedAnalyses &PassPA) {
#ifdef LLVM_ENABLE_ABI_BREAKING_CHECKS
assert(PassStack.pop_back_val() == P &&
"Before and After callbacks must correspond");
#endif
(void)this;
- const auto **F = any_cast<const Function *>(&IR);
- if (!F)
- return;
+ // We have to get the FAM via the MAM, rather than directly use a passed in
+ // FAM because if MAM has not cached the FAM, it won't invalidate function
+ // analyses in FAM.
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(
+ *const_cast<Module *>(unwrapModule(IR, /*Force=*/true)))
+ .getManager();
+
+ for (Function *F : GetFunctions(IR)) {
+ if (auto *HashBefore =
+ FAM.getCachedResult<PreservedFunctionHashAnalysis>(*F)) {
+ if (HashBefore->Hash != StructuralHash(*F)) {
+ report_fatal_error(formatv(
+ "Function @{0} changed by {1} without invalidating analyses",
+ F->getName(), P));
+ }
+ }
- if (!PassPA.allAnalysesInSetPreserved<CFGAnalyses>() &&
- !PassPA.allAnalysesInSetPreserved<AllAnalysesOn<Function>>())
- return;
+ auto CheckCFG = [](StringRef Pass, StringRef FuncName,
+ const CFG &GraphBefore, const CFG &GraphAfter) {
+ if (GraphAfter == GraphBefore)
+ return;
- if (auto *GraphBefore = FAM.getCachedResult<PreservedCFGCheckerAnalysis>(
- *const_cast<Function *>(*F)))
- checkCFG(P, (*F)->getName(), *GraphBefore,
- CFG(*F, /* TrackBBLifetime */ false));
+ dbgs()
+ << "Error: " << Pass
+ << " does not invalidate CFG analyses but CFG changes detected in "
+ "function @"
+ << FuncName << ":\n";
+ CFG::printDiff(dbgs(), GraphBefore, GraphAfter);
+ report_fatal_error(Twine("CFG unexpectedly changed by ", Pass));
+ };
+
+ if (auto *GraphBefore =
+ FAM.getCachedResult<PreservedCFGCheckerAnalysis>(*F))
+ CheckCFG(P, F->getName(), *GraphBefore,
+ CFG(F, /* TrackBBLifetime */ false));
+ }
+ if (auto *MaybeM = any_cast<const Module *>(&IR)) {
+ Module &M = **const_cast<Module **>(MaybeM);
+ if (auto *HashBefore =
+ MAM.getCachedResult<PreservedModuleHashAnalysis>(M)) {
+ if (HashBefore->Hash != StructuralHash(M)) {
+ report_fatal_error(formatv(
+ "Module changed by {0} without invalidating analyses", P));
+ }
+ }
+ }
});
}
@@ -2107,7 +2230,17 @@ StandardInstrumentations::StandardInstrumentations(
PrintCrashIRInstrumentation *PrintCrashIRInstrumentation::CrashReporter =
nullptr;
-void PrintCrashIRInstrumentation::reportCrashIR() { dbgs() << SavedIR; }
+void PrintCrashIRInstrumentation::reportCrashIR() {
+ if (!PrintOnCrashPath.empty()) {
+ std::error_code EC;
+ raw_fd_ostream Out(PrintOnCrashPath, EC);
+ if (EC)
+ report_fatal_error(errorCodeToError(EC));
+ Out << SavedIR;
+ } else {
+ dbgs() << SavedIR;
+ }
+}
void PrintCrashIRInstrumentation::SignalHandler(void *) {
// Called by signal handlers so do not lock here
@@ -2115,7 +2248,8 @@ void PrintCrashIRInstrumentation::SignalHandler(void *) {
if (!CrashReporter)
return;
- assert(PrintCrashIR && "Did not expect to get here without option set.");
+ assert((PrintOnCrash || !PrintOnCrashPath.empty()) &&
+ "Did not expect to get here without option set.");
CrashReporter->reportCrashIR();
}
@@ -2123,52 +2257,52 @@ PrintCrashIRInstrumentation::~PrintCrashIRInstrumentation() {
if (!CrashReporter)
return;
- assert(PrintCrashIR && "Did not expect to get here without option set.");
+ assert((PrintOnCrash || !PrintOnCrashPath.empty()) &&
+ "Did not expect to get here without option set.");
CrashReporter = nullptr;
}
void PrintCrashIRInstrumentation::registerCallbacks(
PassInstrumentationCallbacks &PIC) {
- if (!PrintCrashIR || CrashReporter)
+ if ((!PrintOnCrash && PrintOnCrashPath.empty()) || CrashReporter)
return;
sys::AddSignalHandler(SignalHandler, nullptr);
CrashReporter = this;
- PIC.registerBeforeNonSkippedPassCallback([&PIC, this](StringRef PassID,
- Any IR) {
- SavedIR.clear();
- raw_string_ostream OS(SavedIR);
- OS << formatv("*** Dump of {0}IR Before Last Pass {1}",
- llvm::forcePrintModuleIR() ? "Module " : "", PassID);
- if (!isInteresting(IR, PassID, PIC.getPassNameForClassName(PassID))) {
- OS << " Filtered Out ***\n";
- return;
- }
- OS << " Started ***\n";
- unwrapAndPrint(OS, IR);
- });
+ PIC.registerBeforeNonSkippedPassCallback(
+ [&PIC, this](StringRef PassID, Any IR) {
+ SavedIR.clear();
+ raw_string_ostream OS(SavedIR);
+ OS << formatv("*** Dump of {0}IR Before Last Pass {1}",
+ llvm::forcePrintModuleIR() ? "Module " : "", PassID);
+ if (!isInteresting(IR, PassID, PIC.getPassNameForClassName(PassID))) {
+ OS << " Filtered Out ***\n";
+ return;
+ }
+ OS << " Started ***\n";
+ unwrapAndPrint(OS, IR);
+ });
}
void StandardInstrumentations::registerCallbacks(
- PassInstrumentationCallbacks &PIC, FunctionAnalysisManager *FAM) {
+ PassInstrumentationCallbacks &PIC, ModuleAnalysisManager *MAM) {
PrintIR.registerCallbacks(PIC);
PrintPass.registerCallbacks(PIC);
TimePasses.registerCallbacks(PIC);
OptNone.registerCallbacks(PIC);
OptPassGate.registerCallbacks(PIC);
- if (FAM)
- PreservedCFGChecker.registerCallbacks(PIC, *FAM);
PrintChangedIR.registerCallbacks(PIC);
PseudoProbeVerification.registerCallbacks(PIC);
if (VerifyEach)
Verify.registerCallbacks(PIC);
PrintChangedDiff.registerCallbacks(PIC);
WebsiteChangeReporter.registerCallbacks(PIC);
-
ChangeTester.registerCallbacks(PIC);
-
PrintCrashIR.registerCallbacks(PIC);
+ if (MAM)
+ PreservedCFGChecker.registerCallbacks(PIC, *MAM);
+
// TimeProfiling records the pass running time cost.
// Its 'BeforePassCallback' can be appended at the tail of all the
// BeforeCallbacks by calling `registerCallbacks` in the end.
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index 6113f78aeb4e..849ee80bfaa3 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -15,8 +15,11 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Object/BuildID.h"
#include "llvm/ProfileData/Coverage/CoverageMappingReader.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/Debug.h"
@@ -24,6 +27,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -245,7 +249,7 @@ Error CoverageMapping::loadFunctionRecord(
std::vector<uint64_t> Counts;
if (Error E = ProfileReader.getFunctionCounts(Record.FunctionName,
Record.FunctionHash, Counts)) {
- instrprof_error IPE = InstrProfError::take(std::move(E));
+ instrprof_error IPE = std::get<0>(InstrProfError::take(std::move(E)));
if (IPE == instrprof_error::hash_mismatch) {
FuncHashMismatches.emplace_back(std::string(Record.FunctionName),
Record.FunctionHash);
@@ -342,46 +346,108 @@ static Error handleMaybeNoDataFoundError(Error E) {
});
}
-Expected<std::unique_ptr<CoverageMapping>>
-CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames,
- StringRef ProfileFilename, ArrayRef<StringRef> Arches,
- StringRef CompilationDir) {
- auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename);
+Error CoverageMapping::loadFromFile(
+ StringRef Filename, StringRef Arch, StringRef CompilationDir,
+ IndexedInstrProfReader &ProfileReader, CoverageMapping &Coverage,
+ bool &DataFound, SmallVectorImpl<object::BuildID> *FoundBinaryIDs) {
+ auto CovMappingBufOrErr = MemoryBuffer::getFileOrSTDIN(
+ Filename, /*IsText=*/false, /*RequiresNullTerminator=*/false);
+ if (std::error_code EC = CovMappingBufOrErr.getError())
+ return createFileError(Filename, errorCodeToError(EC));
+ MemoryBufferRef CovMappingBufRef =
+ CovMappingBufOrErr.get()->getMemBufferRef();
+ SmallVector<std::unique_ptr<MemoryBuffer>, 4> Buffers;
+
+ SmallVector<object::BuildIDRef> BinaryIDs;
+ auto CoverageReadersOrErr = BinaryCoverageReader::create(
+ CovMappingBufRef, Arch, Buffers, CompilationDir,
+ FoundBinaryIDs ? &BinaryIDs : nullptr);
+ if (Error E = CoverageReadersOrErr.takeError()) {
+ E = handleMaybeNoDataFoundError(std::move(E));
+ if (E)
+ return createFileError(Filename, std::move(E));
+ return E;
+ }
+
+ SmallVector<std::unique_ptr<CoverageMappingReader>, 4> Readers;
+ for (auto &Reader : CoverageReadersOrErr.get())
+ Readers.push_back(std::move(Reader));
+ if (FoundBinaryIDs && !Readers.empty()) {
+ llvm::append_range(*FoundBinaryIDs,
+ llvm::map_range(BinaryIDs, [](object::BuildIDRef BID) {
+ return object::BuildID(BID);
+ }));
+ }
+ DataFound |= !Readers.empty();
+ if (Error E = loadFromReaders(Readers, ProfileReader, Coverage))
+ return createFileError(Filename, std::move(E));
+ return Error::success();
+}
+
+Expected<std::unique_ptr<CoverageMapping>> CoverageMapping::load(
+ ArrayRef<StringRef> ObjectFilenames, StringRef ProfileFilename,
+ vfs::FileSystem &FS, ArrayRef<StringRef> Arches, StringRef CompilationDir,
+ const object::BuildIDFetcher *BIDFetcher, bool CheckBinaryIDs) {
+ auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename, FS);
if (Error E = ProfileReaderOrErr.takeError())
return createFileError(ProfileFilename, std::move(E));
auto ProfileReader = std::move(ProfileReaderOrErr.get());
auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping());
bool DataFound = false;
+ auto GetArch = [&](size_t Idx) {
+ if (Arches.empty())
+ return StringRef();
+ if (Arches.size() == 1)
+ return Arches.front();
+ return Arches[Idx];
+ };
+
+ SmallVector<object::BuildID> FoundBinaryIDs;
for (const auto &File : llvm::enumerate(ObjectFilenames)) {
- auto CovMappingBufOrErr = MemoryBuffer::getFileOrSTDIN(
- File.value(), /*IsText=*/false, /*RequiresNullTerminator=*/false);
- if (std::error_code EC = CovMappingBufOrErr.getError())
- return createFileError(File.value(), errorCodeToError(EC));
- StringRef Arch = Arches.empty() ? StringRef() : Arches[File.index()];
- MemoryBufferRef CovMappingBufRef =
- CovMappingBufOrErr.get()->getMemBufferRef();
- SmallVector<std::unique_ptr<MemoryBuffer>, 4> Buffers;
- auto CoverageReadersOrErr = BinaryCoverageReader::create(
- CovMappingBufRef, Arch, Buffers, CompilationDir);
- if (Error E = CoverageReadersOrErr.takeError()) {
- E = handleMaybeNoDataFoundError(std::move(E));
- if (E)
- return createFileError(File.value(), std::move(E));
- // E == success (originally a no_data_found error).
- continue;
+ if (Error E =
+ loadFromFile(File.value(), GetArch(File.index()), CompilationDir,
+ *ProfileReader, *Coverage, DataFound, &FoundBinaryIDs))
+ return std::move(E);
+ }
+
+ if (BIDFetcher) {
+ std::vector<object::BuildID> ProfileBinaryIDs;
+ if (Error E = ProfileReader->readBinaryIds(ProfileBinaryIDs))
+ return createFileError(ProfileFilename, std::move(E));
+
+ SmallVector<object::BuildIDRef> BinaryIDsToFetch;
+ if (!ProfileBinaryIDs.empty()) {
+ const auto &Compare = [](object::BuildIDRef A, object::BuildIDRef B) {
+ return std::lexicographical_compare(A.begin(), A.end(), B.begin(),
+ B.end());
+ };
+ llvm::sort(FoundBinaryIDs, Compare);
+ std::set_difference(
+ ProfileBinaryIDs.begin(), ProfileBinaryIDs.end(),
+ FoundBinaryIDs.begin(), FoundBinaryIDs.end(),
+ std::inserter(BinaryIDsToFetch, BinaryIDsToFetch.end()), Compare);
}
- SmallVector<std::unique_ptr<CoverageMappingReader>, 4> Readers;
- for (auto &Reader : CoverageReadersOrErr.get())
- Readers.push_back(std::move(Reader));
- DataFound |= !Readers.empty();
- if (Error E = loadFromReaders(Readers, *ProfileReader, *Coverage))
- return createFileError(File.value(), std::move(E));
+ for (object::BuildIDRef BinaryID : BinaryIDsToFetch) {
+ std::optional<std::string> PathOpt = BIDFetcher->fetch(BinaryID);
+ if (PathOpt) {
+ std::string Path = std::move(*PathOpt);
+ StringRef Arch = Arches.size() == 1 ? Arches.front() : StringRef();
+ if (Error E = loadFromFile(Path, Arch, CompilationDir, *ProfileReader,
+ *Coverage, DataFound))
+ return std::move(E);
+ } else if (CheckBinaryIDs) {
+ return createFileError(
+ ProfileFilename,
+ createStringError(errc::no_such_file_or_directory,
+ "Missing binary ID: " +
+ llvm::toHex(BinaryID, /*LowerCase=*/true)));
+ }
+ }
}
- // If no readers were created, either no objects were provided or none of them
- // had coverage data. Return an error in the latter case.
- if (!DataFound && !ObjectFilenames.empty())
+
+ if (!DataFound)
return createFileError(
join(ObjectFilenames.begin(), ObjectFilenames.end(), ", "),
make_error<CoverageMapError>(coveragemap_error::no_data_found));
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index 41962ab24ff9..05737323314a 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/COFF.h"
@@ -36,6 +35,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <vector>
using namespace llvm;
@@ -954,7 +954,8 @@ static Expected<std::vector<SectionRef>> lookupSections(ObjectFile &OF,
static Expected<std::unique_ptr<BinaryCoverageReader>>
loadBinaryFormat(std::unique_ptr<Binary> Bin, StringRef Arch,
- StringRef CompilationDir = "") {
+ StringRef CompilationDir = "",
+ object::BuildIDRef *BinaryID = nullptr) {
std::unique_ptr<ObjectFile> OF;
if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin.get())) {
// If we have a universal binary, try to look up the object for the
@@ -1052,6 +1053,9 @@ loadBinaryFormat(std::unique_ptr<Binary> Bin, StringRef Arch,
FuncRecords = std::move(WritableBuffer);
}
+ if (BinaryID)
+ *BinaryID = getBuildID(OF.get());
+
return BinaryCoverageReader::createCoverageReaderFromBuffer(
CoverageMapping, std::move(FuncRecords), std::move(ProfileNames),
BytesInAddress, Endian, CompilationDir);
@@ -1074,7 +1078,7 @@ Expected<std::vector<std::unique_ptr<BinaryCoverageReader>>>
BinaryCoverageReader::create(
MemoryBufferRef ObjectBuffer, StringRef Arch,
SmallVectorImpl<std::unique_ptr<MemoryBuffer>> &ObjectFileBuffers,
- StringRef CompilationDir) {
+ StringRef CompilationDir, SmallVectorImpl<object::BuildIDRef> *BinaryIDs) {
std::vector<std::unique_ptr<BinaryCoverageReader>> Readers;
if (ObjectBuffer.getBuffer().startswith(TestingFormatMagic)) {
@@ -1114,7 +1118,7 @@ BinaryCoverageReader::create(
return BinaryCoverageReader::create(
ArchiveOrErr.get()->getMemoryBufferRef(), Arch, ObjectFileBuffers,
- CompilationDir);
+ CompilationDir, BinaryIDs);
}
}
@@ -1127,7 +1131,8 @@ BinaryCoverageReader::create(
return ChildBufOrErr.takeError();
auto ChildReadersOrErr = BinaryCoverageReader::create(
- ChildBufOrErr.get(), Arch, ObjectFileBuffers, CompilationDir);
+ ChildBufOrErr.get(), Arch, ObjectFileBuffers, CompilationDir,
+ BinaryIDs);
if (!ChildReadersOrErr)
return ChildReadersOrErr.takeError();
for (auto &Reader : ChildReadersOrErr.get())
@@ -1146,10 +1151,14 @@ BinaryCoverageReader::create(
return std::move(Readers);
}
- auto ReaderOrErr = loadBinaryFormat(std::move(Bin), Arch, CompilationDir);
+ object::BuildIDRef BinaryID;
+ auto ReaderOrErr = loadBinaryFormat(std::move(Bin), Arch, CompilationDir,
+ BinaryIDs ? &BinaryID : nullptr);
if (!ReaderOrErr)
return ReaderOrErr.takeError();
Readers.push_back(std::move(ReaderOrErr.get()));
+ if (!BinaryID.empty())
+ BinaryIDs->push_back(BinaryID);
return std::move(Readers);
}
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index db9be34d5248..df65032da517 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -11,10 +11,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/Coverage/CoverageMappingWriter.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Compression.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp
index 5af92799e05e..1e70431a1fae 100644
--- a/llvm/lib/ProfileData/GCOV.cpp
+++ b/llvm/lib/ProfileData/GCOV.cpp
@@ -140,10 +140,7 @@ bool GCOVFile::readGCNO(GCOVBuffer &buf) {
if (version >= GCOV::V900)
fn->endColumn = buf.getWord();
}
- auto r = filenameToIdx.try_emplace(filename, filenameToIdx.size());
- if (r.second)
- filenames.emplace_back(filename);
- fn->srcIdx = r.first->second;
+ fn->srcIdx = addNormalizedPathToMap(filename);
identToFunction[fn->ident] = fn;
} else if (tag == GCOV_TAG_BLOCKS && fn) {
if (version < GCOV::V800) {
@@ -326,6 +323,19 @@ void GCOVFile::print(raw_ostream &OS) const {
LLVM_DUMP_METHOD void GCOVFile::dump() const { print(dbgs()); }
#endif
+unsigned GCOVFile::addNormalizedPathToMap(StringRef filename) {
+ // unify filename, as the same path can have different form
+ SmallString<256> P(filename);
+ sys::path::remove_dots(P, true);
+ filename = P.str();
+
+ auto r = filenameToIdx.try_emplace(filename, filenameToIdx.size());
+ if (r.second)
+ filenames.emplace_back(filename);
+
+ return r.first->second;
+}
+
bool GCOVArc::onTree() const { return flags & GCOV_ARC_ON_TREE; }
//===----------------------------------------------------------------------===//
@@ -337,10 +347,8 @@ StringRef GCOVFunction::getName(bool demangle) const {
if (demangled.empty()) {
do {
if (Name.startswith("_Z")) {
- int status = 0;
// Name is guaranteed to be NUL-terminated.
- char *res = itaniumDemangle(Name.data(), nullptr, nullptr, &status);
- if (status == 0) {
+ if (char *res = itaniumDemangle(Name.data())) {
demangled = res;
free(res);
break;
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index aee104310a1d..0f9c33de3f52 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -13,11 +13,11 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Config/config.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -42,6 +42,8 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SwapByteOrder.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -152,6 +154,9 @@ static std::string getInstrProfErrString(instrprof_error Err,
OS << "profile uses zlib compression but the profile reader was built "
"without zlib support";
break;
+ case instrprof_error::raw_profile_version_mismatch:
+ OS << "raw profile version mismatch";
+ break;
}
// If optional error message is not empty, append it to the message.
@@ -228,31 +233,6 @@ std::string getInstrProfSectionName(InstrProfSectKind IPSK,
return SectName;
}
-void SoftInstrProfErrors::addError(instrprof_error IE) {
- if (IE == instrprof_error::success)
- return;
-
- if (FirstError == instrprof_error::success)
- FirstError = IE;
-
- switch (IE) {
- case instrprof_error::hash_mismatch:
- ++NumHashMismatches;
- break;
- case instrprof_error::count_mismatch:
- ++NumCountMismatches;
- break;
- case instrprof_error::counter_overflow:
- ++NumCounterOverflows;
- break;
- case instrprof_error::value_site_count_mismatch:
- ++NumValueSiteCountMismatches;
- break;
- default:
- llvm_unreachable("Not a soft error");
- }
-}
-
std::string InstrProfError::message() const {
return getInstrProfErrString(Err, Msg);
}
@@ -435,6 +415,13 @@ uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) {
return 0;
}
+void InstrProfSymtab::dumpNames(raw_ostream &OS) const {
+ SmallVector<StringRef, 0> Sorted(NameTab.keys());
+ llvm::sort(Sorted);
+ for (StringRef S : Sorted)
+ OS << S << '\n';
+}
+
Error collectPGOFuncNameStrings(ArrayRef<std::string> NameStrs,
bool doCompression, std::string &Result) {
assert(!NameStrs.empty() && "No name data to emit");
@@ -799,6 +786,48 @@ void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site,
ValueSites.emplace_back(VData, VData + N);
}
+std::vector<BPFunctionNode> TemporalProfTraceTy::createBPFunctionNodes(
+ ArrayRef<TemporalProfTraceTy> Traces) {
+ using IDT = BPFunctionNode::IDT;
+ using UtilityNodeT = BPFunctionNode::UtilityNodeT;
+ // Collect all function IDs ordered by their smallest timestamp. This will be
+ // used as the initial FunctionNode order.
+ SetVector<IDT> FunctionIds;
+ size_t LargestTraceSize = 0;
+ for (auto &Trace : Traces)
+ LargestTraceSize =
+ std::max(LargestTraceSize, Trace.FunctionNameRefs.size());
+ for (size_t Timestamp = 0; Timestamp < LargestTraceSize; Timestamp++)
+ for (auto &Trace : Traces)
+ if (Timestamp < Trace.FunctionNameRefs.size())
+ FunctionIds.insert(Trace.FunctionNameRefs[Timestamp]);
+
+ int N = std::ceil(std::log2(LargestTraceSize));
+
+ // TODO: We need to use the Trace.Weight field to give more weight to more
+ // important utilities
+ DenseMap<IDT, SmallVector<UtilityNodeT, 4>> FuncGroups;
+ for (size_t TraceIdx = 0; TraceIdx < Traces.size(); TraceIdx++) {
+ auto &Trace = Traces[TraceIdx].FunctionNameRefs;
+ for (size_t Timestamp = 0; Timestamp < Trace.size(); Timestamp++) {
+ for (int I = std::floor(std::log2(Timestamp + 1)); I < N; I++) {
+ auto &FunctionId = Trace[Timestamp];
+ UtilityNodeT GroupId = TraceIdx * N + I;
+ FuncGroups[FunctionId].push_back(GroupId);
+ }
+ }
+ }
+
+ std::vector<BPFunctionNode> Nodes;
+ for (auto &Id : FunctionIds) {
+ auto &UNs = FuncGroups[Id];
+ llvm::sort(UNs);
+ UNs.erase(std::unique(UNs.begin(), UNs.end()), UNs.end());
+ Nodes.emplace_back(Id, UNs);
+ }
+ return Nodes;
+}
+
#define INSTR_PROF_COMMON_API_IMPL
#include "llvm/ProfileData/InstrProfData.inc"
@@ -1224,7 +1253,10 @@ Error OverlapStats::accumulateCounts(const std::string &BaseFilename,
bool IsCS) {
auto getProfileSum = [IsCS](const std::string &Filename,
CountSumOrPercent &Sum) -> Error {
- auto ReaderOrErr = InstrProfReader::create(Filename);
+ // This function is only used from llvm-profdata that doesn't use any kind
+ // of VFS. Just create a default RealFileSystem to read profiles.
+ auto FS = vfs::getRealFileSystem();
+ auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
if (Error E = ReaderOrErr.takeError()) {
return E;
}
@@ -1372,9 +1404,13 @@ Expected<Header> Header::readFromBuffer(const unsigned char *Buffer) {
// When a new field is added in the header add a case statement here to
// populate it.
static_assert(
- IndexedInstrProf::ProfVersion::CurrentVersion == Version9,
+ IndexedInstrProf::ProfVersion::CurrentVersion == Version10,
"Please update the reading code below if a new field has been added, "
"if not add a case statement to fall through to the latest version.");
+ case 10ull:
+ H.TemporalProfTracesOffset =
+ read(Buffer, offsetOf(&Header::TemporalProfTracesOffset));
+ [[fallthrough]];
case 9ull:
H.BinaryIdOffset = read(Buffer, offsetOf(&Header::BinaryIdOffset));
[[fallthrough]];
@@ -1394,10 +1430,13 @@ size_t Header::size() const {
// When a new field is added to the header add a case statement here to
// compute the size as offset of the new field + size of the new field. This
// relies on the field being added to the end of the list.
- static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version9,
+ static_assert(IndexedInstrProf::ProfVersion::CurrentVersion == Version10,
"Please update the size computation below if a new field has "
"been added to the header, if not add a case statement to "
"fall through to the latest version.");
+ case 10ull:
+ return offsetOf(&Header::TemporalProfTracesOffset) +
+ sizeof(Header::TemporalProfTracesOffset);
case 9ull:
return offsetOf(&Header::BinaryIdOffset) + sizeof(Header::BinaryIdOffset);
case 8ull:
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index d0714c9b4665..4160f7e6dfd5 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -20,12 +20,13 @@
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/ProfileData/SymbolRemappingReader.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SwapByteOrder.h"
-#include "llvm/Support/SymbolRemappingReader.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
@@ -59,13 +60,16 @@ static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
if (Version & VARIANT_MASK_MEMPROF) {
ProfileKind |= InstrProfKind::MemProf;
}
+ if (Version & VARIANT_MASK_TEMPORAL_PROF) {
+ ProfileKind |= InstrProfKind::TemporalProfile;
+ }
return ProfileKind;
}
static Expected<std::unique_ptr<MemoryBuffer>>
-setupMemoryBuffer(const Twine &Path) {
- ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
- MemoryBuffer::getFileOrSTDIN(Path, /*IsText=*/true);
+setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
+ auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
+ : FS.getBufferForFile(Filename);
if (std::error_code EC = BufferOrErr.getError())
return errorCodeToError(EC);
return std::move(BufferOrErr.get());
@@ -161,10 +165,10 @@ static Error printBinaryIdsInternal(raw_ostream &OS,
}
Expected<std::unique_ptr<InstrProfReader>>
-InstrProfReader::create(const Twine &Path,
+InstrProfReader::create(const Twine &Path, vfs::FileSystem &FS,
const InstrProfCorrelator *Correlator) {
// Set up the buffer to read.
- auto BufferOrError = setupMemoryBuffer(Path);
+ auto BufferOrError = setupMemoryBuffer(Path, FS);
if (Error E = BufferOrError.takeError())
return std::move(E);
return InstrProfReader::create(std::move(BufferOrError.get()), Correlator);
@@ -173,10 +177,6 @@ InstrProfReader::create(const Twine &Path,
Expected<std::unique_ptr<InstrProfReader>>
InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
const InstrProfCorrelator *Correlator) {
- // Sanity check the buffer.
- if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
- return make_error<InstrProfError>(instrprof_error::too_large);
-
if (Buffer->getBufferSize() == 0)
return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
@@ -201,9 +201,10 @@ InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
}
Expected<std::unique_ptr<IndexedInstrProfReader>>
-IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) {
+IndexedInstrProfReader::create(const Twine &Path, vfs::FileSystem &FS,
+ const Twine &RemappingPath) {
// Set up the buffer to read.
- auto BufferOrError = setupMemoryBuffer(Path);
+ auto BufferOrError = setupMemoryBuffer(Path, FS);
if (Error E = BufferOrError.takeError())
return std::move(E);
@@ -211,7 +212,7 @@ IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) {
std::unique_ptr<MemoryBuffer> RemappingBuffer;
std::string RemappingPathStr = RemappingPath.str();
if (!RemappingPathStr.empty()) {
- auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr);
+ auto RemappingBufferOrError = setupMemoryBuffer(RemappingPathStr, FS);
if (Error E = RemappingBufferOrError.takeError())
return std::move(E);
RemappingBuffer = std::move(RemappingBufferOrError.get());
@@ -224,9 +225,6 @@ IndexedInstrProfReader::create(const Twine &Path, const Twine &RemappingPath) {
Expected<std::unique_ptr<IndexedInstrProfReader>>
IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
std::unique_ptr<MemoryBuffer> RemappingBuffer) {
- if (uint64_t(Buffer->getBufferSize()) > std::numeric_limits<uint64_t>::max())
- return make_error<InstrProfError>(instrprof_error::too_large);
-
// Create the reader.
if (!IndexedInstrProfReader::hasFormat(*Buffer))
return make_error<InstrProfError>(instrprof_error::bad_magic);
@@ -269,13 +267,57 @@ Error TextInstrProfReader::readHeader() {
ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
else if (Str.equals_insensitive("not_entry_first"))
ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation;
- else
+ else if (Str.equals_insensitive("temporal_prof_traces")) {
+ ProfileKind |= InstrProfKind::TemporalProfile;
+ if (auto Err = readTemporalProfTraceData())
+ return error(std::move(Err));
+ } else
return error(instrprof_error::bad_header);
++Line;
}
return success();
}
+/// Temporal profile trace data is stored in the header immediately after
+/// ":temporal_prof_traces". The first integer is the number of traces, the
+/// second integer is the stream size, then the following lines are the actual
+/// traces which consist of a weight and a comma separated list of function
+/// names.
+Error TextInstrProfReader::readTemporalProfTraceData() {
+ if ((++Line).is_at_end())
+ return error(instrprof_error::eof);
+
+ uint32_t NumTraces;
+ if (Line->getAsInteger(0, NumTraces))
+ return error(instrprof_error::malformed);
+
+ if ((++Line).is_at_end())
+ return error(instrprof_error::eof);
+
+ if (Line->getAsInteger(0, TemporalProfTraceStreamSize))
+ return error(instrprof_error::malformed);
+
+ for (uint32_t i = 0; i < NumTraces; i++) {
+ if ((++Line).is_at_end())
+ return error(instrprof_error::eof);
+
+ TemporalProfTraceTy Trace;
+ if (Line->getAsInteger(0, Trace.Weight))
+ return error(instrprof_error::malformed);
+
+ if ((++Line).is_at_end())
+ return error(instrprof_error::eof);
+
+ SmallVector<StringRef> FuncNames;
+ Line->split(FuncNames, ",", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
+ for (auto &FuncName : FuncNames)
+ Trace.FunctionNameRefs.push_back(
+ IndexedInstrProf::ComputeHash(FuncName.trim()));
+ TemporalProfTraces.push_back(std::move(Trace));
+ }
+ return success();
+}
+
Error
TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
@@ -404,6 +446,25 @@ InstrProfKind RawInstrProfReader<IntPtrT>::getProfileKind() const {
}
template <class IntPtrT>
+SmallVector<TemporalProfTraceTy> &
+RawInstrProfReader<IntPtrT>::getTemporalProfTraces(
+ std::optional<uint64_t> Weight) {
+ if (TemporalProfTimestamps.empty()) {
+ assert(TemporalProfTraces.empty());
+ return TemporalProfTraces;
+ }
+ // Sort functions by their timestamps to build the trace.
+ std::sort(TemporalProfTimestamps.begin(), TemporalProfTimestamps.end());
+ TemporalProfTraceTy Trace;
+ if (Weight)
+ Trace.Weight = *Weight;
+ for (auto &[TimestampValue, NameRef] : TemporalProfTimestamps)
+ Trace.FunctionNameRefs.push_back(NameRef);
+ TemporalProfTraces = {std::move(Trace)};
+ return TemporalProfTraces;
+}
+
+template <class IntPtrT>
bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
if (DataBuffer.getBufferSize() < sizeof(uint64_t))
return false;
@@ -471,7 +532,13 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
const RawInstrProf::Header &Header) {
Version = swap(Header.Version);
if (GET_VERSION(Version) != RawInstrProf::Version)
- return error(instrprof_error::unsupported_version);
+ return error(instrprof_error::raw_profile_version_mismatch,
+ ("Profile uses raw profile format version = " +
+ Twine(GET_VERSION(Version)) +
+ "; expected version = " + Twine(RawInstrProf::Version) +
+ "\nPLEASE update this tool to version in the raw profile, or "
+ "regenerate raw profile with expected version.")
+ .str());
if (useDebugInfoCorrelate() && !Correlator)
return error(instrprof_error::missing_debug_info_for_correlation);
if (!useDebugInfoCorrelate() && Correlator)
@@ -587,6 +654,23 @@ Error RawInstrProfReader<IntPtrT>::readRawCounts(
for (uint32_t I = 0; I < NumCounters; I++) {
const char *Ptr =
CountersStart + CounterBaseOffset + I * getCounterTypeSize();
+ if (I == 0 && hasTemporalProfile()) {
+ uint64_t TimestampValue = swap(*reinterpret_cast<const uint64_t *>(Ptr));
+ if (TimestampValue != 0 &&
+ TimestampValue != std::numeric_limits<uint64_t>::max()) {
+ TemporalProfTimestamps.emplace_back(TimestampValue,
+ swap(Data->NameRef));
+ TemporalProfTraceStreamSize = 1;
+ }
+ if (hasSingleByteCoverage()) {
+ // In coverage mode, getCounterTypeSize() returns 1 byte but our
+ // timestamp field has size uint64_t. Increment I so that the next
+ // iteration of this for loop points to the byte after the timestamp
+ // field, i.e., I += 8.
+ I += 7;
+ }
+ continue;
+ }
if (hasSingleByteCoverage()) {
// A value of zero signifies the block is covered.
Record.Counts.push_back(*Ptr == 0 ? 1 : 0);
@@ -637,7 +721,7 @@ Error RawInstrProfReader<IntPtrT>::readNextRecord(NamedInstrProfRecord &Record)
if (Error E = readNextHeader(getNextHeaderPos()))
return error(std::move(E));
- // Read name ad set it in Record.
+ // Read name and set it in Record.
if (Error E = readName(Record))
return error(std::move(E));
@@ -1066,6 +1150,40 @@ Error IndexedInstrProfReader::readHeader() {
"corrupted binary ids");
}
+ if (GET_VERSION(Header->formatVersion()) >= 10 &&
+ Header->formatVersion() & VARIANT_MASK_TEMPORAL_PROF) {
+ uint64_t TemporalProfTracesOffset =
+ endian::byte_swap<uint64_t, little>(Header->TemporalProfTracesOffset);
+ const unsigned char *Ptr = Start + TemporalProfTracesOffset;
+ const auto *PtrEnd = (const unsigned char *)DataBuffer->getBufferEnd();
+ // Expect at least two 64 bit fields: NumTraces, and TraceStreamSize
+ if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
+ return error(instrprof_error::truncated);
+ const uint64_t NumTraces =
+ support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ TemporalProfTraceStreamSize =
+ support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ for (unsigned i = 0; i < NumTraces; i++) {
+ // Expect at least two 64 bit fields: Weight and NumFunctions
+ if (Ptr + 2 * sizeof(uint64_t) > PtrEnd)
+ return error(instrprof_error::truncated);
+ TemporalProfTraceTy Trace;
+ Trace.Weight =
+ support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ const uint64_t NumFunctions =
+ support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ // Expect at least NumFunctions 64 bit fields
+ if (Ptr + NumFunctions * sizeof(uint64_t) > PtrEnd)
+ return error(instrprof_error::truncated);
+ for (unsigned j = 0; j < NumFunctions; j++) {
+ const uint64_t NameRef =
+ support::endian::readNext<uint64_t, little, unaligned>(Ptr);
+ Trace.FunctionNameRefs.push_back(NameRef);
+ }
+ TemporalProfTraces.push_back(std::move(Trace));
+ }
+ }
+
// Load the remapping table now if requested.
if (RemappingBuffer) {
Remapper =
@@ -1087,7 +1205,8 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
std::unique_ptr<InstrProfSymtab> NewSymtab = std::make_unique<InstrProfSymtab>();
if (Error E = Index->populateSymtab(*NewSymtab)) {
- consumeError(error(InstrProfError::take(std::move(E))));
+ auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
+ consumeError(error(ErrCode, Msg));
}
Symtab = std::move(NewSymtab);
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index af3c27ebac76..b74d5c3862d8 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -13,6 +13,7 @@
#include "llvm/ProfileData/InstrProfWriter.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ProfileSummary.h"
#include "llvm/ProfileData/InstrProf.h"
@@ -171,8 +172,12 @@ public:
} // end namespace llvm
-InstrProfWriter::InstrProfWriter(bool Sparse)
- : Sparse(Sparse), InfoObj(new InstrProfRecordWriterTrait()) {}
+InstrProfWriter::InstrProfWriter(bool Sparse,
+ uint64_t TemporalProfTraceReservoirSize,
+ uint64_t MaxTemporalProfTraceLength)
+ : Sparse(Sparse), MaxTemporalProfTraceLength(MaxTemporalProfTraceLength),
+ TemporalProfTraceReservoirSize(TemporalProfTraceReservoirSize),
+ InfoObj(new InstrProfRecordWriterTrait()) {}
InstrProfWriter::~InstrProfWriter() { delete InfoObj; }
@@ -200,7 +205,7 @@ void InstrProfWriter::overlapRecord(NamedInstrProfRecord &&Other,
auto Name = Other.Name;
auto Hash = Other.Hash;
Other.accumulateCounts(FuncLevelOverlap.Test);
- if (FunctionData.find(Name) == FunctionData.end()) {
+ if (!FunctionData.contains(Name)) {
Overlap.addOneUnique(FuncLevelOverlap.Test);
return;
}
@@ -285,6 +290,62 @@ void InstrProfWriter::addBinaryIds(ArrayRef<llvm::object::BuildID> BIs) {
llvm::append_range(BinaryIds, BIs);
}
+void InstrProfWriter::addTemporalProfileTrace(TemporalProfTraceTy Trace) {
+ if (Trace.FunctionNameRefs.size() > MaxTemporalProfTraceLength)
+ Trace.FunctionNameRefs.resize(MaxTemporalProfTraceLength);
+ if (Trace.FunctionNameRefs.empty())
+ return;
+
+ if (TemporalProfTraceStreamSize < TemporalProfTraceReservoirSize) {
+ // Simply append the trace if we have not yet hit our reservoir size limit.
+ TemporalProfTraces.push_back(std::move(Trace));
+ } else {
+ // Otherwise, replace a random trace in the stream.
+ std::uniform_int_distribution<uint64_t> Distribution(
+ 0, TemporalProfTraceStreamSize);
+ uint64_t RandomIndex = Distribution(RNG);
+ if (RandomIndex < TemporalProfTraces.size())
+ TemporalProfTraces[RandomIndex] = std::move(Trace);
+ }
+ ++TemporalProfTraceStreamSize;
+}
+
+void InstrProfWriter::addTemporalProfileTraces(
+ SmallVectorImpl<TemporalProfTraceTy> &SrcTraces, uint64_t SrcStreamSize) {
+ // Assume that the source has the same reservoir size as the destination to
+ // avoid needing to record it in the indexed profile format.
+ bool IsDestSampled =
+ (TemporalProfTraceStreamSize > TemporalProfTraceReservoirSize);
+ bool IsSrcSampled = (SrcStreamSize > TemporalProfTraceReservoirSize);
+ if (!IsDestSampled && IsSrcSampled) {
+ // If one of the traces are sampled, ensure that it belongs to Dest.
+ std::swap(TemporalProfTraces, SrcTraces);
+ std::swap(TemporalProfTraceStreamSize, SrcStreamSize);
+ std::swap(IsDestSampled, IsSrcSampled);
+ }
+ if (!IsSrcSampled) {
+ // If the source stream is not sampled, we add each source trace normally.
+ for (auto &Trace : SrcTraces)
+ addTemporalProfileTrace(std::move(Trace));
+ return;
+ }
+ // Otherwise, we find the traces that would have been removed if we added
+ // the whole source stream.
+ SmallSetVector<uint64_t, 8> IndicesToReplace;
+ for (uint64_t I = 0; I < SrcStreamSize; I++) {
+ std::uniform_int_distribution<uint64_t> Distribution(
+ 0, TemporalProfTraceStreamSize);
+ uint64_t RandomIndex = Distribution(RNG);
+ if (RandomIndex < TemporalProfTraces.size())
+ IndicesToReplace.insert(RandomIndex);
+ ++TemporalProfTraceStreamSize;
+ }
+ // Then we insert a random sample of the source traces.
+ llvm::shuffle(SrcTraces.begin(), SrcTraces.end(), RNG);
+ for (const auto &[Index, Trace] : llvm::zip(IndicesToReplace, SrcTraces))
+ TemporalProfTraces[Index] = std::move(Trace);
+}
+
void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
function_ref<void(Error)> Warn) {
for (auto &I : IPW.FunctionData)
@@ -295,6 +356,9 @@ void InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW,
for (auto &I : IPW.BinaryIds)
addBinaryIds(I);
+ addTemporalProfileTraces(IPW.TemporalProfTraces,
+ IPW.TemporalProfTraceStreamSize);
+
MemProfFrameData.reserve(IPW.MemProfFrameData.size());
for (auto &I : IPW.MemProfFrameData) {
// If we weren't able to add the frame mappings then it doesn't make sense
@@ -349,9 +413,13 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
InfoObj->CSSummaryBuilder = &CSISB;
// Populate the hash table generator.
+ SmallVector<std::pair<StringRef, const ProfilingData *>, 0> OrderedData;
for (const auto &I : FunctionData)
if (shouldEncodeData(I.getValue()))
- Generator.insert(I.getKey(), &I.getValue());
+ OrderedData.emplace_back((I.getKey()), &I.getValue());
+ llvm::sort(OrderedData, less_first());
+ for (const auto &I : OrderedData)
+ Generator.insert(I.first, I.second);
// Write the header.
IndexedInstrProf::Header Header;
@@ -370,18 +438,21 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
Header.Version |= VARIANT_MASK_FUNCTION_ENTRY_ONLY;
if (static_cast<bool>(ProfileKind & InstrProfKind::MemProf))
Header.Version |= VARIANT_MASK_MEMPROF;
+ if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))
+ Header.Version |= VARIANT_MASK_TEMPORAL_PROF;
Header.Unused = 0;
Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType);
Header.HashOffset = 0;
Header.MemProfOffset = 0;
Header.BinaryIdOffset = 0;
+ Header.TemporalProfTracesOffset = 0;
int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t);
- // Only write out all the fields except 'HashOffset', 'MemProfOffset' and
- // 'BinaryIdOffset'. We need to remember the offset of these fields to allow
- // back patching later.
- for (int I = 0; I < N - 3; I++)
+ // Only write out all the fields except 'HashOffset', 'MemProfOffset',
+ // 'BinaryIdOffset' and `TemporalProfTracesOffset`. We need to remember the
+ // offset of these fields to allow back patching later.
+ for (int I = 0; I < N - 4; I++)
OS.write(reinterpret_cast<uint64_t *>(&Header)[I]);
// Save the location of Header.HashOffset field in \c OS.
@@ -402,6 +473,9 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
// profile contains binary ids.
OS.write(0);
+ uint64_t TemporalProfTracesOffset = OS.tell();
+ OS.write(0);
+
// Reserve space to write profile summary data.
uint32_t NumEntries = ProfileSummaryBuilder::DefaultCutoffs.size();
uint32_t SummarySize = Summary::getSize(Summary::NumKinds, NumEntries);
@@ -515,6 +589,19 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
OS.writeByte(0);
}
+ uint64_t TemporalProfTracesSectionStart = 0;
+ if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile)) {
+ TemporalProfTracesSectionStart = OS.tell();
+ OS.write(TemporalProfTraces.size());
+ OS.write(TemporalProfTraceStreamSize);
+ for (auto &Trace : TemporalProfTraces) {
+ OS.write(Trace.Weight);
+ OS.write(Trace.FunctionNameRefs.size());
+ for (auto &NameRef : Trace.FunctionNameRefs)
+ OS.write(NameRef);
+ }
+ }
+
// Allocate space for data to be serialized out.
std::unique_ptr<IndexedInstrProf::Summary> TheSummary =
IndexedInstrProf::allocSummary(SummarySize);
@@ -542,6 +629,9 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
{MemProfSectionOffset, &MemProfSectionStart, 1},
// Patch the Header.BinaryIdSectionOffset.
{BinaryIdSectionOffset, &BinaryIdSectionStart, 1},
+ // Patch the Header.TemporalProfTracesOffset (=0 for profiles without
+ // traces).
+ {TemporalProfTracesOffset, &TemporalProfTracesSectionStart, 1},
// Patch the summary data.
{SummaryOffset, reinterpret_cast<uint64_t *>(TheSummary.get()),
(int)(SummarySize / sizeof(uint64_t))},
@@ -564,12 +654,16 @@ Error InstrProfWriter::write(raw_fd_ostream &OS) {
return writeImpl(POS);
}
+Error InstrProfWriter::write(raw_string_ostream &OS) {
+ ProfOStream POS(OS);
+ return writeImpl(POS);
+}
+
std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {
std::string Data;
raw_string_ostream OS(Data);
- ProfOStream POS(OS);
// Write the hash table.
- if (Error E = writeImpl(POS))
+ if (Error E = write(OS))
return nullptr;
// Return this in an aligned memory buffer.
return MemoryBuffer::getMemBufferCopy(Data);
@@ -664,6 +758,9 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
}
}
+ if (static_cast<bool>(ProfileKind & InstrProfKind::TemporalProfile))
+ writeTextTemporalProfTraceData(OS, Symtab);
+
llvm::sort(OrderedFuncData, [](const RecordType &A, const RecordType &B) {
return std::tie(A.first, A.second.first) <
std::tie(B.first, B.second.first);
@@ -683,3 +780,18 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
return Error::success();
}
+
+void InstrProfWriter::writeTextTemporalProfTraceData(raw_fd_ostream &OS,
+ InstrProfSymtab &Symtab) {
+ OS << ":temporal_prof_traces\n";
+ OS << "# Num Temporal Profile Traces:\n" << TemporalProfTraces.size() << "\n";
+ OS << "# Temporal Profile Trace Stream Size:\n"
+ << TemporalProfTraceStreamSize << "\n";
+ for (auto &Trace : TemporalProfTraces) {
+ OS << "# Weight:\n" << Trace.Weight << "\n";
+ for (auto &NameRef : Trace.FunctionNameRefs)
+ OS << Symtab.getFuncName(NameRef) << ",";
+ OS << "\n";
+ }
+ OS << "\n";
+}
diff --git a/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp b/llvm/lib/ProfileData/ItaniumManglingCanonicalizer.cpp
index d95d84f7837e..afbb09ed35fc 100644
--- a/llvm/lib/Support/ItaniumManglingCanonicalizer.cpp
+++ b/llvm/lib/ProfileData/ItaniumManglingCanonicalizer.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/ItaniumManglingCanonicalizer.h"
+#include "llvm/ProfileData/ItaniumManglingCanonicalizer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/StringRef.h"
@@ -17,14 +17,16 @@ using namespace llvm;
using llvm::itanium_demangle::ForwardTemplateReference;
using llvm::itanium_demangle::Node;
using llvm::itanium_demangle::NodeKind;
-using llvm::itanium_demangle::StringView;
namespace {
struct FoldingSetNodeIDBuilder {
llvm::FoldingSetNodeID &ID;
void operator()(const Node *P) { ID.AddPointer(P); }
- void operator()(StringView Str) {
- ID.AddString(llvm::StringRef(Str.begin(), Str.size()));
+ void operator()(std::string_view Str) {
+ if (Str.empty())
+ ID.AddString({});
+ else
+ ID.AddString(llvm::StringRef(&*Str.begin(), Str.size()));
}
template <typename T>
std::enable_if_t<std::is_integral_v<T> || std::is_enum_v<T>> operator()(T V) {
@@ -292,7 +294,7 @@ parseMaybeMangledName(CanonicalizingDemangler &Demangler, StringRef Mangling,
N = Demangler.parse();
else
N = Demangler.make<itanium_demangle::NameType>(
- StringView(Mangling.data(), Mangling.size()));
+ std::string_view(Mangling.data(), Mangling.size()));
return reinterpret_cast<ItaniumManglingCanonicalizer::Key>(N);
}
diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index 3081a04f2686..bccb205fb243 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -17,20 +17,27 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
#include "llvm/Object/Binary.h"
+#include "llvm/Object/BuildID.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/MemProfData.inc"
#include "llvm/ProfileData/RawMemProfReader.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#define DEBUG_TYPE "memprof"
@@ -149,20 +156,22 @@ Error report(Error E, const StringRef Context) {
}
bool isRuntimePath(const StringRef Path) {
- return StringRef(llvm::sys::path::convert_to_slash(Path))
- .contains("memprof/memprof_");
+ const StringRef Filename = llvm::sys::path::filename(Path);
+ // This list should be updated in case new files with additional interceptors
+ // are added to the memprof runtime.
+ return Filename.equals("memprof_malloc_linux.cpp") ||
+ Filename.equals("memprof_interceptors.cpp") ||
+ Filename.equals("memprof_new_delete.cpp");
}
std::string getBuildIdString(const SegmentEntry &Entry) {
- constexpr size_t Size = sizeof(Entry.BuildId) / sizeof(uint8_t);
- constexpr uint8_t Zeros[Size] = {0};
// If the build id is unset print a helpful string instead of all zeros.
- if (memcmp(Entry.BuildId, Zeros, Size) == 0)
+ if (Entry.BuildIdSize == 0)
return "<None>";
std::string Str;
raw_string_ostream OS(Str);
- for (size_t I = 0; I < Size; I++) {
+ for (size_t I = 0; I < Entry.BuildIdSize; I++) {
OS << format_hex_no_prefix(Entry.BuildId[I], 2);
}
return OS.str();
@@ -177,13 +186,29 @@ RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
return report(errorCodeToError(EC), Path.getSingleStringRef());
std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
- if (Error E = checkBuffer(*Buffer))
- return report(std::move(E), Path.getSingleStringRef());
+ return create(std::move(Buffer), ProfiledBinary, KeepName);
+}
- if (ProfiledBinary.empty())
+Expected<std::unique_ptr<RawMemProfReader>>
+RawMemProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
+ const StringRef ProfiledBinary, bool KeepName) {
+ if (Error E = checkBuffer(*Buffer))
+ return report(std::move(E), Buffer->getBufferIdentifier());
+
+ if (ProfiledBinary.empty()) {
+ // Peek the build ids to print a helpful error message.
+ const std::vector<std::string> BuildIds = peekBuildIds(Buffer.get());
+ std::string ErrorMessage(
+ R"(Path to profiled binary is empty, expected binary with one of the following build ids:
+)");
+ for (const auto &Id : BuildIds) {
+ ErrorMessage += "\n BuildId: ";
+ ErrorMessage += Id;
+ }
return report(
- errorCodeToError(make_error_code(std::errc::invalid_argument)),
- "Path to profiled binary is empty!");
+ make_error<StringError>(ErrorMessage, inconvertibleErrorCode()),
+ /*Context=*/"");
+ }
auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
if (!BinaryOr) {
@@ -263,22 +288,42 @@ Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
}
// Check whether the profiled binary was built with position independent code
- // (PIC). For now we provide a error message until symbolization support
- // is added for pic.
+ // (PIC). Perform sanity checks for assumptions we rely on to simplify
+ // symbolization.
auto* Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(ElfObject);
const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile();
auto PHdrsOr = ElfFile.program_headers();
- if(!PHdrsOr)
- return report(make_error<StringError>(Twine("Could not read program headers: "),
- inconvertibleErrorCode()),
- FileName);
- auto FirstLoadHeader = PHdrsOr->begin();
- while (FirstLoadHeader->p_type != llvm::ELF::PT_LOAD)
- ++FirstLoadHeader;
- if(FirstLoadHeader->p_vaddr == 0)
- return report(make_error<StringError>(Twine("Unsupported position independent code"),
- inconvertibleErrorCode()),
- FileName);
+ if (!PHdrsOr)
+ return report(
+ make_error<StringError>(Twine("Could not read program headers: "),
+ inconvertibleErrorCode()),
+ FileName);
+
+ int NumExecutableSegments = 0;
+ for (const auto &Phdr : *PHdrsOr) {
+ if (Phdr.p_type == ELF::PT_LOAD) {
+ if (Phdr.p_flags & ELF::PF_X) {
+ // We assume only one text segment in the main binary for simplicity and
+ // reduce the overhead of checking multiple ranges during symbolization.
+ if (++NumExecutableSegments > 1) {
+ return report(
+ make_error<StringError>(
+ "Expect only one executable load segment in the binary",
+ inconvertibleErrorCode()),
+ FileName);
+ }
+ // Segment will always be loaded at a page boundary, expect it to be
+ // aligned already. Assume 4K pagesize for the machine from which the
+ // profile has been collected. This should be fine for now, in case we
+ // want to support other pagesizes it can be recorded in the raw profile
+ // during collection.
+ PreferredTextSegmentAddress = Phdr.p_vaddr;
+ assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) &&
+ "Expect p_vaddr to always be page aligned");
+ assert(Phdr.p_offset == 0 && "Expect p_offset = 0 for symbolization.");
+ }
+ }
+ }
auto Triple = ElfObject->makeTriple();
if (!Triple.isX86())
@@ -297,15 +342,50 @@ Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
return report(SOFOr.takeError(), FileName);
Symbolizer = std::move(SOFOr.get());
+ // Process the raw profile.
if (Error E = readRawProfile(std::move(DataBuffer)))
return E;
+ if (Error E = setupForSymbolization())
+ return E;
+
if (Error E = symbolizeAndFilterStackFrames())
return E;
return mapRawProfileToRecords();
}
+Error RawMemProfReader::setupForSymbolization() {
+ auto *Object = cast<object::ObjectFile>(Binary.getBinary());
+ object::BuildIDRef BinaryId = object::getBuildID(Object);
+ if (BinaryId.empty())
+ return make_error<StringError>(Twine("No build id found in binary ") +
+ Binary.getBinary()->getFileName(),
+ inconvertibleErrorCode());
+
+ int NumMatched = 0;
+ for (const auto &Entry : SegmentInfo) {
+ llvm::ArrayRef<uint8_t> SegmentId(Entry.BuildId, Entry.BuildIdSize);
+ if (BinaryId == SegmentId) {
+ // We assume only one text segment in the main binary for simplicity and
+ // reduce the overhead of checking multiple ranges during symbolization.
+ if (++NumMatched > 1) {
+ return make_error<StringError>(
+ "We expect only one executable segment in the profiled binary",
+ inconvertibleErrorCode());
+ }
+ ProfiledTextSegmentStart = Entry.Start;
+ ProfiledTextSegmentEnd = Entry.End;
+ }
+ }
+ assert(NumMatched != 0 && "No matching executable segments in segment info.");
+ assert((PreferredTextSegmentAddress == 0 ||
+ (PreferredTextSegmentAddress == ProfiledTextSegmentStart)) &&
+ "Expect text segment address to be 0 or equal to profiled text "
+ "segment start.");
+ return Error::success();
+}
+
Error RawMemProfReader::mapRawProfileToRecords() {
// Hold a mapping from function to each callsite location we encounter within
// it that is part of some dynamic allocation context. The location is stored
@@ -462,6 +542,36 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames() {
return Error::success();
}
+std::vector<std::string>
+RawMemProfReader::peekBuildIds(MemoryBuffer *DataBuffer) {
+ const char *Next = DataBuffer->getBufferStart();
+ // Use a set + vector since a profile file may contain multiple raw profile
+ // dumps, each with segment information. We want them unique and in order they
+ // were stored in the profile; the profiled binary should be the first entry.
+ // The runtime uses dl_iterate_phdr and the "... first object visited by
+ // callback is the main program."
+ // https://man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html
+ std::vector<std::string> BuildIds;
+ llvm::SmallSet<std::string, 10> BuildIdsSet;
+ while (Next < DataBuffer->getBufferEnd()) {
+ auto *Header = reinterpret_cast<const memprof::Header *>(Next);
+
+ const llvm::SmallVector<SegmentEntry> Entries =
+ readSegmentEntries(Next + Header->SegmentOffset);
+
+ for (const auto &Entry : Entries) {
+ const std::string Id = getBuildIdString(Entry);
+ if (BuildIdsSet.contains(Id))
+ continue;
+ BuildIds.push_back(Id);
+ BuildIdsSet.insert(Id);
+ }
+
+ Next += Header->TotalSize;
+ }
+ return BuildIds;
+}
+
Error RawMemProfReader::readRawProfile(
std::unique_ptr<MemoryBuffer> DataBuffer) {
const char *Next = DataBuffer->getBufferStart();
@@ -514,20 +624,19 @@ Error RawMemProfReader::readRawProfile(
object::SectionedAddress
RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
- LLVM_DEBUG({
- SegmentEntry *ContainingSegment = nullptr;
- for (auto &SE : SegmentInfo) {
- if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) {
- ContainingSegment = &SE;
- }
+ if (VirtualAddress > ProfiledTextSegmentStart &&
+ VirtualAddress <= ProfiledTextSegmentEnd) {
+ // For PIE binaries, the preferred address is zero and we adjust the virtual
+ // address by start of the profiled segment assuming that the offset of the
+ // segment in the binary is zero. For non-PIE binaries the preferred and
+ // profiled segment addresses should be equal and this is a no-op.
+ const uint64_t AdjustedAddress =
+ VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart;
+ return object::SectionedAddress{AdjustedAddress};
}
-
- // Ensure that the virtual address is valid.
- assert(ContainingSegment && "Could not find a segment entry");
- });
-
- // TODO: Compute the file offset based on the maps and program headers. For
- // now this only works for non PIE binaries.
+ // Addresses which do not originate from the profiled text segment in the
+ // binary are not adjusted. These will fail symbolization and be filtered out
+ // during processing.
return object::SectionedAddress{VirtualAddress};
}
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index b4d5550a1721..fdae8a011e71 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -32,7 +32,7 @@ static cl::opt<uint64_t> ProfileSymbolListCutOff(
cl::desc("Cutoff value about how many symbols in profile symbol list "
"will be used. This is very useful for performance debugging"));
-cl::opt<bool> GenerateMergedBaseProfiles(
+static cl::opt<bool> GenerateMergedBaseProfiles(
"generate-merged-base-profiles",
cl::desc("When generating nested context-sensitive profiles, always "
"generate extra base profile for function with all its context "
@@ -291,7 +291,7 @@ const FunctionSamples *FunctionSamples::findFunctionSamplesAt(
std::string CalleeGUID;
CalleeName = getRepInFormat(CalleeName, UseMD5, CalleeGUID);
- auto iter = CallsiteSamples.find(Loc);
+ auto iter = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));
if (iter == CallsiteSamples.end())
return nullptr;
auto FS = iter->second.find(CalleeName);
@@ -461,9 +461,9 @@ void ProfileSymbolList::dump(raw_ostream &OS) const {
OS << Sym << "\n";
}
-CSProfileConverter::FrameNode *
-CSProfileConverter::FrameNode::getOrCreateChildFrame(
- const LineLocation &CallSite, StringRef CalleeName) {
+ProfileConverter::FrameNode *
+ProfileConverter::FrameNode::getOrCreateChildFrame(const LineLocation &CallSite,
+ StringRef CalleeName) {
uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite);
auto It = AllChildFrames.find(Hash);
if (It != AllChildFrames.end()) {
@@ -476,7 +476,7 @@ CSProfileConverter::FrameNode::getOrCreateChildFrame(
return &AllChildFrames[Hash];
}
-CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles)
+ProfileConverter::ProfileConverter(SampleProfileMap &Profiles)
: ProfileMap(Profiles) {
for (auto &FuncSample : Profiles) {
FunctionSamples *FSamples = &FuncSample.second;
@@ -486,8 +486,8 @@ CSProfileConverter::CSProfileConverter(SampleProfileMap &Profiles)
}
}
-CSProfileConverter::FrameNode *
-CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) {
+ProfileConverter::FrameNode *
+ProfileConverter::getOrCreateContextPath(const SampleContext &Context) {
auto Node = &RootFrame;
LineLocation CallSiteLoc(0, 0);
for (auto &Callsite : Context.getContextFrames()) {
@@ -497,14 +497,14 @@ CSProfileConverter::getOrCreateContextPath(const SampleContext &Context) {
return Node;
}
-void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) {
+void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {
// Process each child profile. Add each child profile to callsite profile map
// of the current node `Node` if `Node` comes with a profile. Otherwise
// promote the child profile to a standalone profile.
auto *NodeProfile = Node.FuncSamples;
for (auto &It : Node.AllChildFrames) {
auto &ChildNode = It.second;
- convertProfiles(ChildNode);
+ convertCSProfiles(ChildNode);
auto *ChildProfile = ChildNode.FuncSamples;
if (!ChildProfile)
continue;
@@ -544,4 +544,4 @@ void CSProfileConverter::convertProfiles(CSProfileConverter::FrameNode &Node) {
}
}
-void CSProfileConverter::convertProfiles() { convertProfiles(RootFrame); }
+void ProfileConverter::convertCSProfiles() { convertCSProfiles(RootFrame); }
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index d3753d1e8a99..fbdd9a307321 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -35,6 +35,7 @@
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstddef>
@@ -50,7 +51,7 @@ using namespace sampleprof;
#define DEBUG_TYPE "samplepgo-reader"
// This internal option specifies if the profile uses FS discriminators.
-// It only applies to text, binary and compact binary format profiles.
+// It only applies to text, and binary format profiles.
// For ext-binary format profiles, the flag is set in the summary.
static cl::opt<bool> ProfileIsFSDisciminator(
"profile-isfs", cl::Hidden, cl::init(false),
@@ -327,7 +328,8 @@ std::error_code SampleProfileReaderText::readImpl() {
ProfileIsFS = ProfileIsFSDisciminator;
FunctionSamples::ProfileIsFS = ProfileIsFS;
for (; !LineIt.is_at_eof(); ++LineIt) {
- if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#')
+ size_t pos = LineIt->find_first_not_of(' ');
+ if (pos == LineIt->npos || (*LineIt)[pos] == '#')
continue;
// Read the header of each function.
//
@@ -513,9 +515,9 @@ ErrorOr<T> SampleProfileReaderBinary::readUnencodedNumber() {
}
template <typename T>
-inline ErrorOr<uint32_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
+inline ErrorOr<size_t> SampleProfileReaderBinary::readStringIndex(T &Table) {
std::error_code EC;
- auto Idx = readNumber<uint32_t>();
+ auto Idx = readNumber<size_t>();
if (std::error_code EC = Idx.getError())
return EC;
if (*Idx >= Table.size())
@@ -528,50 +530,43 @@ ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() {
if (std::error_code EC = Idx.getError())
return EC;
- return NameTable[*Idx];
-}
-
-ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
- auto FName(readStringFromTable());
- if (std::error_code EC = FName.getError())
- return EC;
- return SampleContext(*FName);
-}
-
-ErrorOr<StringRef> SampleProfileReaderExtBinaryBase::readStringFromTable() {
- if (!FixedLengthMD5)
- return SampleProfileReaderBinary::readStringFromTable();
-
- // read NameTable index.
- auto Idx = readStringIndex(NameTable);
- if (std::error_code EC = Idx.getError())
- return EC;
-
- // Check whether the name to be accessed has been accessed before,
- // if not, read it from memory directly.
+ // Lazy loading, if the string has not been materialized from memory storing
+ // MD5 values, then it is default initialized with the null pointer. This can
+ // only happen when using fixed length MD5, that bounds check is performed
+ // while parsing the name table to ensure MD5NameMemStart points to an array
+ // with enough MD5 entries.
StringRef &SR = NameTable[*Idx];
- if (SR.empty()) {
- const uint8_t *SavedData = Data;
- Data = MD5NameMemStart + ((*Idx) * sizeof(uint64_t));
- auto FID = readUnencodedNumber<uint64_t>();
- if (std::error_code EC = FID.getError())
- return EC;
- // Save the string converted from uint64_t in MD5StringBuf. All the
- // references to the name are all StringRefs refering to the string
- // in MD5StringBuf.
- MD5StringBuf->push_back(std::to_string(*FID));
- SR = MD5StringBuf->back();
- Data = SavedData;
+ if (!SR.data()) {
+ assert(MD5NameMemStart);
+ using namespace support;
+ uint64_t FID = endian::read<uint64_t, little, unaligned>(
+ MD5NameMemStart + (*Idx) * sizeof(uint64_t));
+ SR = MD5StringBuf.emplace_back(std::to_string(FID));
}
return SR;
}
-ErrorOr<StringRef> SampleProfileReaderCompactBinary::readStringFromTable() {
- auto Idx = readStringIndex(NameTable);
- if (std::error_code EC = Idx.getError())
+ErrorOr<SampleContextFrames> SampleProfileReaderBinary::readContextFromTable() {
+ auto ContextIdx = readNumber<size_t>();
+ if (std::error_code EC = ContextIdx.getError())
return EC;
+ if (*ContextIdx >= CSNameTable.size())
+ return sampleprof_error::truncated_name_table;
+ return CSNameTable[*ContextIdx];
+}
- return StringRef(NameTable[*Idx]);
+ErrorOr<SampleContext> SampleProfileReaderBinary::readSampleContextFromTable() {
+ if (ProfileIsCS) {
+ auto FContext(readContextFromTable());
+ if (std::error_code EC = FContext.getError())
+ return EC;
+ return SampleContext(*FContext);
+ } else {
+ auto FName(readStringFromTable());
+ if (std::error_code EC = FName.getError())
+ return EC;
+ return SampleContext(*FName);
+ }
}
std::error_code
@@ -684,7 +679,7 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
std::error_code SampleProfileReaderBinary::readImpl() {
ProfileIsFS = ProfileIsFSDisciminator;
FunctionSamples::ProfileIsFS = ProfileIsFS;
- while (!at_eof()) {
+ while (Data < End) {
if (std::error_code EC = readFuncProfile(Data))
return EC;
}
@@ -692,31 +687,6 @@ std::error_code SampleProfileReaderBinary::readImpl() {
return sampleprof_error::success;
}
-ErrorOr<SampleContextFrames>
-SampleProfileReaderExtBinaryBase::readContextFromTable() {
- auto ContextIdx = readNumber<uint32_t>();
- if (std::error_code EC = ContextIdx.getError())
- return EC;
- if (*ContextIdx >= CSNameTable->size())
- return sampleprof_error::truncated_name_table;
- return (*CSNameTable)[*ContextIdx];
-}
-
-ErrorOr<SampleContext>
-SampleProfileReaderExtBinaryBase::readSampleContextFromTable() {
- if (ProfileIsCS) {
- auto FContext(readContextFromTable());
- if (std::error_code EC = FContext.getError())
- return EC;
- return SampleContext(*FContext);
- } else {
- auto FName(readStringFromTable());
- if (std::error_code EC = FName.getError())
- return EC;
- return SampleContext(*FName);
- }
-}
-
std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
const uint8_t *Start, uint64_t Size, const SecHdrTableEntry &Entry) {
Data = Start;
@@ -735,14 +705,15 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
FunctionSamples::ProfileIsFS = ProfileIsFS = true;
break;
case SecNameTable: {
- FixedLengthMD5 =
+ bool FixedLengthMD5 =
hasSecFlag(Entry, SecNameTableFlags::SecFlagFixedLengthMD5);
bool UseMD5 = hasSecFlag(Entry, SecNameTableFlags::SecFlagMD5Name);
- assert((!FixedLengthMD5 || UseMD5) &&
- "If FixedLengthMD5 is true, UseMD5 has to be true");
+ // UseMD5 means if THIS section uses MD5, ProfileIsMD5 means if the entire
+ // profile uses MD5 for function name matching in IPO passes.
+ ProfileIsMD5 = ProfileIsMD5 || UseMD5;
FunctionSamples::HasUniqSuffix =
hasSecFlag(Entry, SecNameTableFlags::SecFlagUniqSuffix);
- if (std::error_code EC = readNameTableSec(UseMD5))
+ if (std::error_code EC = readNameTableSec(UseMD5, FixedLengthMD5))
return EC;
break;
}
@@ -756,9 +727,17 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
return EC;
break;
case SecFuncOffsetTable:
- FuncOffsetsOrdered = hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered);
- if (std::error_code EC = readFuncOffsetTable())
- return EC;
+ // If module is absent, we are using LLVM tools, and need to read all
+ // profiles, so skip reading the function offset table.
+ if (!M) {
+ Data = End;
+ } else {
+ assert((!ProfileIsCS ||
+ hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered)) &&
+ "func offset table should always be sorted in CS profile");
+ if (std::error_code EC = readFuncOffsetTable())
+ return EC;
+ }
break;
case SecFuncMetadata: {
ProfileIsProbeBased =
@@ -782,6 +761,35 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
return sampleprof_error::success;
}
+bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
+ // If profile is CS, the function offset section is expected to consist of
+ // sequences of contexts in pre-order layout
+ // (e.g. [A, A:1 @ B, A:1 @ B:2.3 @ C] [D, D:1 @ E]), so that when a matched
+ // context in the module is found, the profiles of all its callees are
+ // recursively loaded. A list is needed since the order of profiles matters.
+ if (ProfileIsCS)
+ return true;
+
+ // If the profile is MD5, use the map container to lookup functions in
+ // the module. A remapper has no use on MD5 names.
+ if (useMD5())
+ return false;
+
+ // Profile is not MD5 and if a remapper is present, the remapped name of
+ // every function needed to be matched against the module, so use the list
+ // container since each entry is accessed.
+ if (Remapper)
+ return true;
+
+ // Otherwise use the map container for faster lookup.
+ // TODO: If the cardinality of the function offset section is much smaller
+ // than the number of functions in the module, using the list container can
+ // be always faster, but we need to figure out the constant factor to
+ // determine the cutoff.
+ return false;
+}
+
+
bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
if (!M)
return false;
@@ -792,22 +800,20 @@ bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
}
std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
- // If there are more than one FuncOffsetTable, the profile read associated
- // with previous FuncOffsetTable has to be done before next FuncOffsetTable
- // is read.
+ // If there are more than one function offset section, the profile associated
+ // with the previous section has to be done reading before next one is read.
FuncOffsetTable.clear();
+ FuncOffsetList.clear();
auto Size = readNumber<uint64_t>();
if (std::error_code EC = Size.getError())
return EC;
- FuncOffsetTable.reserve(*Size);
-
- if (FuncOffsetsOrdered) {
- OrderedFuncOffsets =
- std::make_unique<std::vector<std::pair<SampleContext, uint64_t>>>();
- OrderedFuncOffsets->reserve(*Size);
- }
+ bool UseFuncOffsetList = useFuncOffsetList();
+ if (UseFuncOffsetList)
+ FuncOffsetList.reserve(*Size);
+ else
+ FuncOffsetTable.reserve(*Size);
for (uint64_t I = 0; I < *Size; ++I) {
auto FContext(readSampleContextFromTable());
@@ -818,12 +824,13 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
if (std::error_code EC = Offset.getError())
return EC;
- FuncOffsetTable[*FContext] = *Offset;
- if (FuncOffsetsOrdered)
- OrderedFuncOffsets->emplace_back(*FContext, *Offset);
- }
+ if (UseFuncOffsetList)
+ FuncOffsetList.emplace_back(*FContext, *Offset);
+ else
+ FuncOffsetTable[*FContext] = *Offset;
+ }
- return sampleprof_error::success;
+ return sampleprof_error::success;
}
std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
@@ -835,7 +842,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
// NameTable section is read.
bool LoadFuncsToBeUsed = collectFuncsFromModule();
- // When LoadFuncsToBeUsed is false, load all the function profiles.
+ // When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all
+ // profiles.
const uint8_t *Start = Data;
if (!LoadFuncsToBeUsed) {
while (Data < End) {
@@ -852,6 +860,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
}
if (ProfileIsCS) {
+ assert(useFuncOffsetList());
DenseSet<uint64_t> FuncGuidsToUse;
if (useMD5()) {
for (auto Name : FuncsToUse)
@@ -865,10 +874,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
// as if they were walked in preorder of a context trie. While
// traversing the trie, a link to the highest common ancestor node is
// kept so that all of its decendants will be loaded.
- assert(OrderedFuncOffsets.get() &&
- "func offset table should always be sorted in CS profile");
const SampleContext *CommonContext = nullptr;
- for (const auto &NameOffset : *OrderedFuncOffsets) {
+ for (const auto &NameOffset : FuncOffsetList) {
const auto &FContext = NameOffset.first;
auto FName = FContext.getName();
// For function in the current module, keep its farthest ancestor
@@ -886,35 +893,41 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
// Load profile for the current context which originated from
// the common ancestor.
const uint8_t *FuncProfileAddr = Start + NameOffset.second;
- assert(FuncProfileAddr < End && "out of LBRProfile section");
if (std::error_code EC = readFuncProfile(FuncProfileAddr))
return EC;
}
}
+ } else if (useMD5()) {
+ assert(!useFuncOffsetList());
+ for (auto Name : FuncsToUse) {
+ auto GUID = std::to_string(MD5Hash(Name));
+ auto iter = FuncOffsetTable.find(StringRef(GUID));
+ if (iter == FuncOffsetTable.end())
+ continue;
+ const uint8_t *FuncProfileAddr = Start + iter->second;
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
+ } else if (Remapper) {
+ assert(useFuncOffsetList());
+ for (auto NameOffset : FuncOffsetList) {
+ SampleContext FContext(NameOffset.first);
+ auto FuncName = FContext.getName();
+ if (!FuncsToUse.count(FuncName) && !Remapper->exist(FuncName))
+ continue;
+ const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
} else {
- if (useMD5()) {
- for (auto Name : FuncsToUse) {
- auto GUID = std::to_string(MD5Hash(Name));
- auto iter = FuncOffsetTable.find(StringRef(GUID));
- if (iter == FuncOffsetTable.end())
- continue;
- const uint8_t *FuncProfileAddr = Start + iter->second;
- assert(FuncProfileAddr < End && "out of LBRProfile section");
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
- }
- } else {
- for (auto NameOffset : FuncOffsetTable) {
- SampleContext FContext(NameOffset.first);
- auto FuncName = FContext.getName();
- if (!FuncsToUse.count(FuncName) &&
- (!Remapper || !Remapper->exist(FuncName)))
- continue;
- const uint8_t *FuncProfileAddr = Start + NameOffset.second;
- assert(FuncProfileAddr < End && "out of LBRProfile section");
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
- }
+ assert(!useFuncOffsetList());
+ for (auto Name : FuncsToUse) {
+ auto iter = FuncOffsetTable.find(Name);
+ if (iter == FuncOffsetTable.end())
+ continue;
+ const uint8_t *FuncProfileAddr = Start + iter->second;
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
}
}
Data = End;
@@ -1010,40 +1023,6 @@ std::error_code SampleProfileReaderExtBinaryBase::readImpl() {
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderCompactBinary::readImpl() {
- // Collect functions used by current module if the Reader has been
- // given a module.
- bool LoadFuncsToBeUsed = collectFuncsFromModule();
- ProfileIsFS = ProfileIsFSDisciminator;
- FunctionSamples::ProfileIsFS = ProfileIsFS;
- std::vector<uint64_t> OffsetsToUse;
- if (!LoadFuncsToBeUsed) {
- // load all the function profiles.
- for (auto FuncEntry : FuncOffsetTable) {
- OffsetsToUse.push_back(FuncEntry.second);
- }
- } else {
- // load function profiles on demand.
- for (auto Name : FuncsToUse) {
- auto GUID = std::to_string(MD5Hash(Name));
- auto iter = FuncOffsetTable.find(StringRef(GUID));
- if (iter == FuncOffsetTable.end())
- continue;
- OffsetsToUse.push_back(iter->second);
- }
- }
-
- for (auto Offset : OffsetsToUse) {
- const uint8_t *SavedData = Data;
- if (std::error_code EC = readFuncProfile(
- reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
- Offset))
- return EC;
- Data = SavedData;
- }
- return sampleprof_error::success;
-}
-
std::error_code SampleProfileReaderRawBinary::verifySPMagic(uint64_t Magic) {
if (Magic == SPMagic())
return sampleprof_error::success;
@@ -1056,61 +1035,81 @@ std::error_code SampleProfileReaderExtBinary::verifySPMagic(uint64_t Magic) {
return sampleprof_error::bad_magic;
}
-std::error_code
-SampleProfileReaderCompactBinary::verifySPMagic(uint64_t Magic) {
- if (Magic == SPMagic(SPF_Compact_Binary))
- return sampleprof_error::success;
- return sampleprof_error::bad_magic;
-}
-
std::error_code SampleProfileReaderBinary::readNameTable() {
- auto Size = readNumber<uint32_t>();
+ auto Size = readNumber<size_t>();
if (std::error_code EC = Size.getError())
return EC;
- NameTable.reserve(*Size + NameTable.size());
- for (uint32_t I = 0; I < *Size; ++I) {
+
+ // Normally if useMD5 is true, the name table should have MD5 values, not
+ // strings, however in the case that ExtBinary profile has multiple name
+ // tables mixing string and MD5, all of them have to be normalized to use MD5,
+ // because optimization passes can only handle either type.
+ bool UseMD5 = useMD5();
+ if (UseMD5)
+ MD5StringBuf.reserve(MD5StringBuf.size() + *Size);
+
+ NameTable.clear();
+ NameTable.reserve(*Size);
+ for (size_t I = 0; I < *Size; ++I) {
auto Name(readString());
if (std::error_code EC = Name.getError())
return EC;
- NameTable.push_back(*Name);
+ if (UseMD5) {
+ uint64_t FID = MD5Hash(*Name);
+ NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(FID)));
+ } else
+ NameTable.push_back(*Name);
}
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderExtBinaryBase::readMD5NameTable() {
- auto Size = readNumber<uint64_t>();
- if (std::error_code EC = Size.getError())
- return EC;
- MD5StringBuf = std::make_unique<std::vector<std::string>>();
- MD5StringBuf->reserve(*Size);
+std::error_code
+SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5,
+ bool FixedLengthMD5) {
if (FixedLengthMD5) {
+ if (!IsMD5)
+ errs() << "If FixedLengthMD5 is true, UseMD5 has to be true";
+ auto Size = readNumber<size_t>();
+ if (std::error_code EC = Size.getError())
+ return EC;
+
+ assert(Data + (*Size) * sizeof(uint64_t) == End &&
+ "Fixed length MD5 name table does not contain specified number of "
+ "entries");
+ if (Data + (*Size) * sizeof(uint64_t) > End)
+ return sampleprof_error::truncated;
+
// Preallocate and initialize NameTable so we can check whether a name
// index has been read before by checking whether the element in the
// NameTable is empty, meanwhile readStringIndex can do the boundary
// check using the size of NameTable.
- NameTable.resize(*Size + NameTable.size());
-
+ MD5StringBuf.reserve(MD5StringBuf.size() + *Size);
+ NameTable.clear();
+ NameTable.resize(*Size);
MD5NameMemStart = Data;
Data = Data + (*Size) * sizeof(uint64_t);
return sampleprof_error::success;
}
- NameTable.reserve(*Size);
- for (uint64_t I = 0; I < *Size; ++I) {
- auto FID = readNumber<uint64_t>();
- if (std::error_code EC = FID.getError())
+
+ if (IsMD5) {
+ assert(!FixedLengthMD5 && "FixedLengthMD5 should be unreachable here");
+ auto Size = readNumber<size_t>();
+ if (std::error_code EC = Size.getError())
return EC;
- MD5StringBuf->push_back(std::to_string(*FID));
- // NameTable is a vector of StringRef. Here it is pushing back a
- // StringRef initialized with the last string in MD5stringBuf.
- NameTable.push_back(MD5StringBuf->back());
+
+ MD5StringBuf.reserve(MD5StringBuf.size() + *Size);
+ NameTable.clear();
+ NameTable.reserve(*Size);
+ for (size_t I = 0; I < *Size; ++I) {
+ auto FID = readNumber<uint64_t>();
+ if (std::error_code EC = FID.getError())
+ return EC;
+ NameTable.emplace_back(MD5StringBuf.emplace_back(std::to_string(*FID)));
+ }
+ return sampleprof_error::success;
}
- return sampleprof_error::success;
-}
-std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
- if (IsMD5)
- return readMD5NameTable();
return SampleProfileReaderBinary::readNameTable();
}
@@ -1119,15 +1118,14 @@ std::error_code SampleProfileReaderExtBinaryBase::readNameTableSec(bool IsMD5) {
// underlying raw function names that are stored in the name table, as well as
// a callsite identifier that only makes sense for non-leaf frames.
std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
- auto Size = readNumber<uint32_t>();
+ auto Size = readNumber<size_t>();
if (std::error_code EC = Size.getError())
return EC;
- std::vector<SampleContextFrameVector> *PNameVec =
- new std::vector<SampleContextFrameVector>();
- PNameVec->reserve(*Size);
- for (uint32_t I = 0; I < *Size; ++I) {
- PNameVec->emplace_back(SampleContextFrameVector());
+ CSNameTable.clear();
+ CSNameTable.reserve(*Size);
+ for (size_t I = 0; I < *Size; ++I) {
+ CSNameTable.emplace_back(SampleContextFrameVector());
auto ContextSize = readNumber<uint32_t>();
if (std::error_code EC = ContextSize.getError())
return EC;
@@ -1146,18 +1144,15 @@ std::error_code SampleProfileReaderExtBinaryBase::readCSNameTableSec() {
if (std::error_code EC = Discriminator.getError())
return EC;
- PNameVec->back().emplace_back(
+ CSNameTable.back().emplace_back(
FName.get(), LineLocation(LineOffset.get(), Discriminator.get()));
}
}
- // From this point the underlying object of CSNameTable should be immutable.
- CSNameTable.reset(PNameVec);
return sampleprof_error::success;
}
std::error_code
-
SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
FunctionSamples *FProfile) {
if (Data < End) {
@@ -1232,22 +1227,8 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderCompactBinary::readNameTable() {
- auto Size = readNumber<uint64_t>();
- if (std::error_code EC = Size.getError())
- return EC;
- NameTable.reserve(*Size);
- for (uint64_t I = 0; I < *Size; ++I) {
- auto FID = readNumber<uint64_t>();
- if (std::error_code EC = FID.getError())
- return EC;
- NameTable.push_back(std::to_string(*FID));
- }
- return sampleprof_error::success;
-}
-
std::error_code
-SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint32_t Idx) {
+SampleProfileReaderExtBinaryBase::readSecHdrTableEntry(uint64_t Idx) {
SecHdrTableEntry Entry;
auto Type = readUnencodedNumber<uint64_t>();
if (std::error_code EC = Type.getError())
@@ -1425,54 +1406,6 @@ std::error_code SampleProfileReaderBinary::readHeader() {
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderCompactBinary::readHeader() {
- SampleProfileReaderBinary::readHeader();
- if (std::error_code EC = readFuncOffsetTable())
- return EC;
- return sampleprof_error::success;
-}
-
-std::error_code SampleProfileReaderCompactBinary::readFuncOffsetTable() {
- auto TableOffset = readUnencodedNumber<uint64_t>();
- if (std::error_code EC = TableOffset.getError())
- return EC;
-
- const uint8_t *SavedData = Data;
- const uint8_t *TableStart =
- reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()) +
- *TableOffset;
- Data = TableStart;
-
- auto Size = readNumber<uint64_t>();
- if (std::error_code EC = Size.getError())
- return EC;
-
- FuncOffsetTable.reserve(*Size);
- for (uint64_t I = 0; I < *Size; ++I) {
- auto FName(readStringFromTable());
- if (std::error_code EC = FName.getError())
- return EC;
-
- auto Offset = readNumber<uint64_t>();
- if (std::error_code EC = Offset.getError())
- return EC;
-
- FuncOffsetTable[*FName] = *Offset;
- }
- End = TableStart;
- Data = SavedData;
- return sampleprof_error::success;
-}
-
-bool SampleProfileReaderCompactBinary::collectFuncsFromModule() {
- if (!M)
- return false;
- FuncsToUse.clear();
- for (auto &F : *M)
- FuncsToUse.insert(FunctionSamples::getCanonicalFnName(F));
- return true;
-}
-
std::error_code SampleProfileReaderBinary::readSummaryEntry(
std::vector<ProfileSummaryEntry> &Entries) {
auto Cutoff = readNumber<uint64_t>();
@@ -1543,13 +1476,6 @@ bool SampleProfileReaderExtBinary::hasFormat(const MemoryBuffer &Buffer) {
return Magic == SPMagic(SPF_Ext_Binary);
}
-bool SampleProfileReaderCompactBinary::hasFormat(const MemoryBuffer &Buffer) {
- const uint8_t *Data =
- reinterpret_cast<const uint8_t *>(Buffer.getBufferStart());
- uint64_t Magic = decodeULEB128(Data);
- return Magic == SPMagic(SPF_Compact_Binary);
-}
-
std::error_code SampleProfileReaderGCC::skipNextWord() {
uint32_t dummy;
if (!GcovBuffer.readInt(dummy))
@@ -1801,7 +1727,7 @@ void SampleProfileReaderItaniumRemapper::applyRemapping(LLVMContext &Ctx) {
Ctx.diagnose(DiagnosticInfoSampleProfile(
Reader.getBuffer()->getBufferIdentifier(),
"Profile data remapping cannot be applied to profile data "
- "in compact format (original mangled names are not available).",
+ "using MD5 names (original mangled names are not available).",
DS_Warning));
return;
}
@@ -1831,8 +1757,9 @@ SampleProfileReaderItaniumRemapper::lookUpNameInProfile(StringRef Fname) {
///
/// \returns an error code indicating the status of the buffer.
static ErrorOr<std::unique_ptr<MemoryBuffer>>
-setupMemoryBuffer(const Twine &Filename) {
- auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename, /*IsText=*/true);
+setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
+ auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
+ : FS.getBufferForFile(Filename);
if (std::error_code EC = BufferOrErr.getError())
return EC;
auto Buffer = std::move(BufferOrErr.get());
@@ -1853,12 +1780,12 @@ setupMemoryBuffer(const Twine &Filename) {
/// \returns an error code indicating the status of the created reader.
ErrorOr<std::unique_ptr<SampleProfileReader>>
SampleProfileReader::create(const std::string Filename, LLVMContext &C,
- FSDiscriminatorPass P,
+ vfs::FileSystem &FS, FSDiscriminatorPass P,
const std::string RemapFilename) {
- auto BufferOrError = setupMemoryBuffer(Filename);
+ auto BufferOrError = setupMemoryBuffer(Filename, FS);
if (std::error_code EC = BufferOrError.getError())
return EC;
- return create(BufferOrError.get(), C, P, RemapFilename);
+ return create(BufferOrError.get(), C, FS, P, RemapFilename);
}
/// Create a sample profile remapper from the given input, to remap the
@@ -1873,9 +1800,10 @@ SampleProfileReader::create(const std::string Filename, LLVMContext &C,
/// \returns an error code indicating the status of the created reader.
ErrorOr<std::unique_ptr<SampleProfileReaderItaniumRemapper>>
SampleProfileReaderItaniumRemapper::create(const std::string Filename,
+ vfs::FileSystem &FS,
SampleProfileReader &Reader,
LLVMContext &C) {
- auto BufferOrError = setupMemoryBuffer(Filename);
+ auto BufferOrError = setupMemoryBuffer(Filename, FS);
if (std::error_code EC = BufferOrError.getError())
return EC;
return create(BufferOrError.get(), Reader, C);
@@ -1923,15 +1851,13 @@ SampleProfileReaderItaniumRemapper::create(std::unique_ptr<MemoryBuffer> &B,
/// \returns an error code indicating the status of the created reader.
ErrorOr<std::unique_ptr<SampleProfileReader>>
SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
- FSDiscriminatorPass P,
+ vfs::FileSystem &FS, FSDiscriminatorPass P,
const std::string RemapFilename) {
std::unique_ptr<SampleProfileReader> Reader;
if (SampleProfileReaderRawBinary::hasFormat(*B))
Reader.reset(new SampleProfileReaderRawBinary(std::move(B), C));
else if (SampleProfileReaderExtBinary::hasFormat(*B))
Reader.reset(new SampleProfileReaderExtBinary(std::move(B), C));
- else if (SampleProfileReaderCompactBinary::hasFormat(*B))
- Reader.reset(new SampleProfileReaderCompactBinary(std::move(B), C));
else if (SampleProfileReaderGCC::hasFormat(*B))
Reader.reset(new SampleProfileReaderGCC(std::move(B), C));
else if (SampleProfileReaderText::hasFormat(*B))
@@ -1940,8 +1866,8 @@ SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C,
return sampleprof_error::unrecognized_format;
if (!RemapFilename.empty()) {
- auto ReaderOrErr =
- SampleProfileReaderItaniumRemapper::create(RemapFilename, *Reader, C);
+ auto ReaderOrErr = SampleProfileReaderItaniumRemapper::create(
+ RemapFilename, FS, *Reader, C);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not create remapper: " + EC.message();
C.diagnose(DiagnosticInfoSampleProfile(RemapFilename, Msg));
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 093790afe2d6..0873093ad426 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -30,6 +30,7 @@
#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
+#include <cmath>
#include <cstdint>
#include <memory>
#include <set>
@@ -37,9 +38,109 @@
#include <utility>
#include <vector>
+#define DEBUG_TYPE "llvm-profdata"
+
using namespace llvm;
using namespace sampleprof;
+namespace llvm {
+namespace support {
+namespace endian {
+namespace {
+
+// Adapter class to llvm::support::endian::Writer for pwrite().
+struct SeekableWriter {
+ raw_pwrite_stream &OS;
+ endianness Endian;
+ SeekableWriter(raw_pwrite_stream &OS, endianness Endian)
+ : OS(OS), Endian(Endian) {}
+
+ template <typename ValueType>
+ void pwrite(ValueType Val, size_t Offset) {
+ std::string StringBuf;
+ raw_string_ostream SStream(StringBuf);
+ Writer(SStream, Endian).write(Val);
+ OS.pwrite(StringBuf.data(), StringBuf.size(), Offset);
+ }
+};
+
+} // namespace
+} // namespace endian
+} // namespace support
+} // namespace llvm
+
+DefaultFunctionPruningStrategy::DefaultFunctionPruningStrategy(
+ SampleProfileMap &ProfileMap, size_t OutputSizeLimit)
+ : FunctionPruningStrategy(ProfileMap, OutputSizeLimit) {
+ sortFuncProfiles(ProfileMap, SortedFunctions);
+}
+
+void DefaultFunctionPruningStrategy::Erase(size_t CurrentOutputSize) {
+ double D = (double)OutputSizeLimit / CurrentOutputSize;
+ size_t NewSize = (size_t)round(ProfileMap.size() * D * D);
+ size_t NumToRemove = ProfileMap.size() - NewSize;
+ if (NumToRemove < 1)
+ NumToRemove = 1;
+
+ assert(NumToRemove <= SortedFunctions.size());
+ llvm::for_each(
+ llvm::make_range(SortedFunctions.begin() + SortedFunctions.size() -
+ NumToRemove,
+ SortedFunctions.end()),
+ [&](const NameFunctionSamples &E) { ProfileMap.erase(E.first); });
+ SortedFunctions.resize(SortedFunctions.size() - NumToRemove);
+}
+
+std::error_code SampleProfileWriter::writeWithSizeLimitInternal(
+ SampleProfileMap &ProfileMap, size_t OutputSizeLimit,
+ FunctionPruningStrategy *Strategy) {
+ if (OutputSizeLimit == 0)
+ return write(ProfileMap);
+
+ size_t OriginalFunctionCount = ProfileMap.size();
+
+ std::unique_ptr<raw_ostream> OriginalOutputStream;
+ OutputStream.swap(OriginalOutputStream);
+
+ size_t IterationCount = 0;
+ size_t TotalSize;
+
+ SmallVector<char> StringBuffer;
+ do {
+ StringBuffer.clear();
+ OutputStream.reset(new raw_svector_ostream(StringBuffer));
+ if (std::error_code EC = write(ProfileMap))
+ return EC;
+
+ TotalSize = StringBuffer.size();
+ // On Windows every "\n" is actually written as "\r\n" to disk but not to
+ // memory buffer, this difference should be added when considering the total
+ // output size.
+#ifdef _WIN32
+ if (Format == SPF_Text)
+ TotalSize += LineCount;
+#endif
+ if (TotalSize <= OutputSizeLimit)
+ break;
+
+ Strategy->Erase(TotalSize);
+ IterationCount++;
+ } while (ProfileMap.size() != 0);
+
+ if (ProfileMap.size() == 0)
+ return sampleprof_error::too_large;
+
+ OutputStream.swap(OriginalOutputStream);
+ OutputStream->write(StringBuffer.data(), StringBuffer.size());
+ LLVM_DEBUG(dbgs() << "Profile originally has " << OriginalFunctionCount
+ << " functions, reduced to " << ProfileMap.size() << " in "
+ << IterationCount << " iterations\n");
+ // Silence warning on Release build.
+ (void)OriginalFunctionCount;
+ (void)IterationCount;
+ return sampleprof_error::success;
+}
+
std::error_code
SampleProfileWriter::writeFuncProfiles(const SampleProfileMap &ProfileMap) {
std::vector<NameFunctionSamples> V;
@@ -116,6 +217,12 @@ std::error_code SampleProfileWriterExtBinaryBase::addNewSection(
std::error_code
SampleProfileWriterExtBinaryBase::write(const SampleProfileMap &ProfileMap) {
+ // When calling write on a different profile map, existing states should be
+ // cleared.
+ NameTable.clear();
+ CSNameTable.clear();
+ SecHdrTable.clear();
+
if (std::error_code EC = writeHeader(ProfileMap))
return EC;
@@ -450,15 +557,6 @@ std::error_code SampleProfileWriterExtBinary::writeSections(
return EC;
}
-std::error_code
-SampleProfileWriterCompactBinary::write(const SampleProfileMap &ProfileMap) {
- if (std::error_code EC = SampleProfileWriter::write(ProfileMap))
- return EC;
- if (std::error_code EC = writeFuncOffsetTable())
- return EC;
- return sampleprof_error::success;
-}
-
/// Write samples to a text file.
///
/// Note: it may be tempting to implement this in terms of
@@ -477,6 +575,7 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
if (Indent == 0)
OS << ":" << S.getHeadSamples();
OS << "\n";
+ LineCount++;
SampleSorter<LineLocation, SampleRecord> SortedSamples(S.getBodySamples());
for (const auto &I : SortedSamples.get()) {
@@ -493,6 +592,7 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
for (const auto &J : Sample.getSortedCallTargets())
OS << " " << J.first << ":" << J.second;
OS << "\n";
+ LineCount++;
}
SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(
@@ -515,11 +615,13 @@ std::error_code SampleProfileWriterText::writeSample(const FunctionSamples &S) {
if (FunctionSamples::ProfileIsProbeBased) {
OS.indent(Indent + 1);
OS << "!CFGChecksum: " << S.getFunctionHash() << "\n";
+ LineCount++;
}
if (S.getContext().getAllAttributes()) {
OS.indent(Indent + 1);
OS << "!Attributes: " << S.getContext().getAllAttributes() << "\n";
+ LineCount++;
}
return sampleprof_error::success;
@@ -601,44 +703,6 @@ std::error_code SampleProfileWriterBinary::writeNameTable() {
return sampleprof_error::success;
}
-std::error_code SampleProfileWriterCompactBinary::writeFuncOffsetTable() {
- auto &OS = *OutputStream;
-
- // Fill the slot remembered by TableOffset with the offset of FuncOffsetTable.
- auto &OFS = static_cast<raw_fd_ostream &>(OS);
- uint64_t FuncOffsetTableStart = OS.tell();
- if (OFS.seek(TableOffset) == (uint64_t)-1)
- return sampleprof_error::ostream_seek_unsupported;
- support::endian::Writer Writer(*OutputStream, support::little);
- Writer.write(FuncOffsetTableStart);
- if (OFS.seek(FuncOffsetTableStart) == (uint64_t)-1)
- return sampleprof_error::ostream_seek_unsupported;
-
- // Write out the table size.
- encodeULEB128(FuncOffsetTable.size(), OS);
-
- // Write out FuncOffsetTable.
- for (auto Entry : FuncOffsetTable) {
- if (std::error_code EC = writeNameIdx(Entry.first))
- return EC;
- encodeULEB128(Entry.second, OS);
- }
- return sampleprof_error::success;
-}
-
-std::error_code SampleProfileWriterCompactBinary::writeNameTable() {
- auto &OS = *OutputStream;
- std::set<StringRef> V;
- stablizeNameTable(NameTable, V);
-
- // Write out the name table.
- encodeULEB128(NameTable.size(), OS);
- for (auto N : V) {
- encodeULEB128(MD5Hash(N), OS);
- }
- return sampleprof_error::success;
-}
-
std::error_code
SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) {
auto &OS = *OutputStream;
@@ -650,6 +714,10 @@ SampleProfileWriterBinary::writeMagicIdent(SampleProfileFormat Format) {
std::error_code
SampleProfileWriterBinary::writeHeader(const SampleProfileMap &ProfileMap) {
+ // When calling write on a different profile map, existing names should be
+ // cleared.
+ NameTable.clear();
+
writeMagicIdent(Format);
computeSummary(ProfileMap);
@@ -690,14 +758,6 @@ void SampleProfileWriterExtBinaryBase::allocSecHdrTable() {
}
std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
- auto &OFS = static_cast<raw_fd_ostream &>(*OutputStream);
- uint64_t Saved = OutputStream->tell();
-
- // Set OutputStream to the location saved in SecHdrTableOffset.
- if (OFS.seek(SecHdrTableOffset) == (uint64_t)-1)
- return sampleprof_error::ostream_seek_unsupported;
- support::endian::Writer Writer(*OutputStream, support::little);
-
assert(SecHdrTable.size() == SectionHdrLayout.size() &&
"SecHdrTable entries doesn't match SectionHdrLayout");
SmallVector<uint32_t, 16> IndexMap(SecHdrTable.size(), -1);
@@ -714,21 +774,23 @@ std::error_code SampleProfileWriterExtBinaryBase::writeSecHdrTable() {
// needs to be computed after SecLBRProfile (the order in SecHdrTable),
// but it needs to be read before SecLBRProfile (the order in
// SectionHdrLayout). So we use IndexMap above to switch the order.
+ support::endian::SeekableWriter Writer(
+ static_cast<raw_pwrite_stream &>(*OutputStream), support::little);
for (uint32_t LayoutIdx = 0; LayoutIdx < SectionHdrLayout.size();
LayoutIdx++) {
assert(IndexMap[LayoutIdx] < SecHdrTable.size() &&
"Incorrect LayoutIdx in SecHdrTable");
auto Entry = SecHdrTable[IndexMap[LayoutIdx]];
- Writer.write(static_cast<uint64_t>(Entry.Type));
- Writer.write(static_cast<uint64_t>(Entry.Flags));
- Writer.write(static_cast<uint64_t>(Entry.Offset));
- Writer.write(static_cast<uint64_t>(Entry.Size));
+ Writer.pwrite(static_cast<uint64_t>(Entry.Type),
+ SecHdrTableOffset + 4 * LayoutIdx * sizeof(uint64_t));
+ Writer.pwrite(static_cast<uint64_t>(Entry.Flags),
+ SecHdrTableOffset + (4 * LayoutIdx + 1) * sizeof(uint64_t));
+ Writer.pwrite(static_cast<uint64_t>(Entry.Offset),
+ SecHdrTableOffset + (4 * LayoutIdx + 2) * sizeof(uint64_t));
+ Writer.pwrite(static_cast<uint64_t>(Entry.Size),
+ SecHdrTableOffset + (4 * LayoutIdx + 3) * sizeof(uint64_t));
}
- // Reset OutputStream.
- if (OFS.seek(Saved) == (uint64_t)-1)
- return sampleprof_error::ostream_seek_unsupported;
-
return sampleprof_error::success;
}
@@ -742,19 +804,6 @@ std::error_code SampleProfileWriterExtBinaryBase::writeHeader(
return sampleprof_error::success;
}
-std::error_code SampleProfileWriterCompactBinary::writeHeader(
- const SampleProfileMap &ProfileMap) {
- support::endian::Writer Writer(*OutputStream, support::little);
- if (auto EC = SampleProfileWriterBinary::writeHeader(ProfileMap))
- return EC;
-
- // Reserve a slot for the offset of function offset table. The slot will
- // be populated with the offset of FuncOffsetTable later.
- TableOffset = OutputStream->tell();
- Writer.write(static_cast<uint64_t>(-2));
- return sampleprof_error::success;
-}
-
std::error_code SampleProfileWriterBinary::writeSummary() {
auto &OS = *OutputStream;
encodeULEB128(Summary->getTotalCount(), OS);
@@ -824,15 +873,6 @@ SampleProfileWriterBinary::writeSample(const FunctionSamples &S) {
return writeBody(S);
}
-std::error_code
-SampleProfileWriterCompactBinary::writeSample(const FunctionSamples &S) {
- uint64_t Offset = OutputStream->tell();
- StringRef Name = S.getName();
- FuncOffsetTable[Name] = Offset;
- encodeULEB128(S.getHeadSamples(), *OutputStream);
- return writeBody(S);
-}
-
/// Create a sample profile file writer based on the specified format.
///
/// \param Filename The file to create.
@@ -844,8 +884,7 @@ ErrorOr<std::unique_ptr<SampleProfileWriter>>
SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) {
std::error_code EC;
std::unique_ptr<raw_ostream> OS;
- if (Format == SPF_Binary || Format == SPF_Ext_Binary ||
- Format == SPF_Compact_Binary)
+ if (Format == SPF_Binary || Format == SPF_Ext_Binary)
OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_None));
else
OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::OF_TextWithCRLF));
@@ -870,15 +909,13 @@ SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS,
// Currently only Text and Extended Binary format are supported for CSSPGO.
if ((FunctionSamples::ProfileIsCS || FunctionSamples::ProfileIsProbeBased) &&
- (Format == SPF_Binary || Format == SPF_Compact_Binary))
+ Format == SPF_Binary)
return sampleprof_error::unsupported_writing_format;
if (Format == SPF_Binary)
Writer.reset(new SampleProfileWriterRawBinary(OS));
else if (Format == SPF_Ext_Binary)
Writer.reset(new SampleProfileWriterExtBinary(OS));
- else if (Format == SPF_Compact_Binary)
- Writer.reset(new SampleProfileWriterCompactBinary(OS));
else if (Format == SPF_Text)
Writer.reset(new SampleProfileWriterText(OS));
else if (Format == SPF_GCC)
diff --git a/llvm/lib/Support/SymbolRemappingReader.cpp b/llvm/lib/ProfileData/SymbolRemappingReader.cpp
index 0082696038e3..78457beb3e49 100644
--- a/llvm/lib/Support/SymbolRemappingReader.cpp
+++ b/llvm/lib/ProfileData/SymbolRemappingReader.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/SymbolRemappingReader.h"
+#include "llvm/ProfileData/SymbolRemappingReader.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/LineIterator.h"
diff --git a/llvm/lib/Remarks/Remark.cpp b/llvm/lib/Remarks/Remark.cpp
index a038f81874d1..1b248db41747 100644
--- a/llvm/lib/Remarks/Remark.cpp
+++ b/llvm/lib/Remarks/Remark.cpp
@@ -12,7 +12,6 @@
#include "llvm/Remarks/Remark.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/raw_ostream.h"
#include <optional>
using namespace llvm;
@@ -26,6 +25,33 @@ std::string Remark::getArgsAsMsg() const {
return OS.str();
}
+void RemarkLocation::print(raw_ostream &OS) const {
+ OS << "{ "
+ << "File: " << SourceFilePath << ", Line: " << SourceLine
+ << " Column:" << SourceColumn << " }\n";
+}
+
+void Argument::print(raw_ostream &OS) const {
+ OS << Key << ": " << Val << "\n";
+}
+
+void Remark::print(raw_ostream &OS) const {
+ OS << "Name: ";
+ OS << RemarkName << "\n";
+ OS << "Type: " << typeToStr(RemarkType) << "\n";
+ OS << "FunctionName: " << FunctionName << "\n";
+ OS << "PassName: " << PassName << "\n";
+ if (Loc)
+ OS << "Loc: " << Loc.value();
+ if (Hotness)
+ OS << "Hotness: " << Hotness;
+ if (!Args.empty()) {
+ OS << "Args:\n";
+ for (auto Arg : Args)
+ OS << "\t" << Arg;
+ }
+}
+
// Create wrappers for C Binding types (see CBindingWrapping.h).
DEFINE_SIMPLE_CONVERSION_FUNCTIONS(StringRef, LLVMRemarkStringRef)
diff --git a/llvm/lib/Remarks/RemarkLinker.cpp b/llvm/lib/Remarks/RemarkLinker.cpp
index 74acb0835ff8..b70b06d706bd 100644
--- a/llvm/lib/Remarks/RemarkLinker.cpp
+++ b/llvm/lib/Remarks/RemarkLinker.cpp
@@ -66,9 +66,6 @@ void RemarkLinker::setExternalFilePrependPath(StringRef PrependPathIn) {
PrependPath = std::string(PrependPathIn);
}
-// Discard remarks with no source location.
-static bool shouldKeepRemark(const Remark &R) { return R.Loc.has_value(); }
-
Error RemarkLinker::link(StringRef Buffer, std::optional<Format> RemarkFormat) {
if (!RemarkFormat) {
Expected<Format> ParserFormat = magicToFormat(Buffer);
diff --git a/llvm/lib/Remarks/YAMLRemarkParser.cpp b/llvm/lib/Remarks/YAMLRemarkParser.cpp
index 4996ab6b08b9..f5123b0f64ce 100644
--- a/llvm/lib/Remarks/YAMLRemarkParser.cpp
+++ b/llvm/lib/Remarks/YAMLRemarkParser.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "YAMLRemarkParser.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Path.h"
@@ -292,9 +293,16 @@ Expected<StringRef> YAMLRemarkParser::parseKey(yaml::KeyValueNode &Node) {
Expected<StringRef> YAMLRemarkParser::parseStr(yaml::KeyValueNode &Node) {
auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
- if (!Value)
- return error("expected a value of scalar type.", Node);
- StringRef Result = Value->getRawValue();
+ yaml::BlockScalarNode *ValueBlock;
+ StringRef Result;
+ if (!Value) {
+ // Try to parse the value as a block node.
+ ValueBlock = dyn_cast<yaml::BlockScalarNode>(Node.getValue());
+ if (!ValueBlock)
+ return error("expected a value of scalar type.", Node);
+ Result = ValueBlock->getValue();
+ } else
+ Result = Value->getRawValue();
if (Result.front() == '\'')
Result = Result.drop_front();
@@ -428,9 +436,16 @@ Expected<std::unique_ptr<Remark>> YAMLRemarkParser::next() {
Expected<StringRef> YAMLStrTabRemarkParser::parseStr(yaml::KeyValueNode &Node) {
auto *Value = dyn_cast<yaml::ScalarNode>(Node.getValue());
- if (!Value)
- return error("expected a value of scalar type.", Node);
+ yaml::BlockScalarNode *ValueBlock;
StringRef Result;
+ if (!Value) {
+ // Try to parse the value as a block node.
+ ValueBlock = dyn_cast<yaml::BlockScalarNode>(Node.getValue());
+ if (!ValueBlock)
+ return error("expected a value of scalar type.", Node);
+ Result = ValueBlock->getValue();
+ } else
+ Result = Value->getRawValue();
// If we have a string table, parse it as an unsigned.
unsigned StrID = 0;
if (Expected<unsigned> MaybeStrID = parseUnsigned(Node))
diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp
index eae4fdb6c3d0..4a73739b5282 100644
--- a/llvm/lib/Support/APFloat.cpp
+++ b/llvm/lib/Support/APFloat.cpp
@@ -14,8 +14,10 @@
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Config/llvm-config.h"
@@ -51,209 +53,303 @@ static_assert(APFloatBase::integerPartWidth % 4 == 0, "Part width must be divisi
namespace llvm {
- // How the nonfinite values Inf and NaN are represented.
- enum class fltNonfiniteBehavior {
- // Represents standard IEEE 754 behavior. A value is nonfinite if the
- // exponent field is all 1s. In such cases, a value is Inf if the
- // significand bits are all zero, and NaN otherwise
- IEEE754,
-
- // Only the Float8E5M2 has this behavior. There is no Inf representation. A
- // value is NaN if the exponent field and the mantissa field are all 1s.
- // This behavior matches the FP8 E4M3 type described in
- // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
- // as non-signalling, although the paper does not state whether the NaN
- // values are signalling or not.
- NanOnly,
- };
+// How the nonfinite values Inf and NaN are represented.
+enum class fltNonfiniteBehavior {
+ // Represents standard IEEE 754 behavior. A value is nonfinite if the
+ // exponent field is all 1s. In such cases, a value is Inf if the
+ // significand bits are all zero, and NaN otherwise
+ IEEE754,
+
+ // This behavior is present in the Float8ExMyFN* types (Float8E4M3FN,
+ // Float8E5M2FNUZ, Float8E4M3FNUZ, and Float8E4M3B11FNUZ). There is no
+ // representation for Inf, and operations that would ordinarily produce Inf
+ // produce NaN instead.
+ // The details of the NaN representation(s) in this form are determined by the
+ // `fltNanEncoding` enum. We treat all NaNs as quiet, as the available
+ // encodings do not distinguish between signalling and quiet NaN.
+ NanOnly,
+};
- /* Represents floating point arithmetic semantics. */
- struct fltSemantics {
- /* The largest E such that 2^E is representable; this matches the
- definition of IEEE 754. */
- APFloatBase::ExponentType maxExponent;
+// How NaN values are represented. This is curently only used in combination
+// with fltNonfiniteBehavior::NanOnly, and using a variant other than IEEE
+// while having IEEE non-finite behavior is liable to lead to unexpected
+// results.
+enum class fltNanEncoding {
+ // Represents the standard IEEE behavior where a value is NaN if its
+ // exponent is all 1s and the significand is non-zero.
+ IEEE,
+
+ // Represents the behavior in the Float8E4M3 floating point type where NaN is
+ // represented by having the exponent and mantissa set to all 1s.
+ // This behavior matches the FP8 E4M3 type described in
+ // https://arxiv.org/abs/2209.05433. We treat both signed and unsigned NaNs
+ // as non-signalling, although the paper does not state whether the NaN
+ // values are signalling or not.
+ AllOnes,
+
+ // Represents the behavior in Float8E{5,4}E{2,3}FNUZ floating point types
+ // where NaN is represented by a sign bit of 1 and all 0s in the exponent
+ // and mantissa (i.e. the negative zero encoding in a IEEE float). Since
+ // there is only one NaN value, it is treated as quiet NaN. This matches the
+ // behavior described in https://arxiv.org/abs/2206.02915 .
+ NegativeZero,
+};
- /* The smallest E such that 2^E is a normalized number; this
- matches the definition of IEEE 754. */
- APFloatBase::ExponentType minExponent;
+/* Represents floating point arithmetic semantics. */
+struct fltSemantics {
+ /* The largest E such that 2^E is representable; this matches the
+ definition of IEEE 754. */
+ APFloatBase::ExponentType maxExponent;
- /* Number of bits in the significand. This includes the integer
- bit. */
- unsigned int precision;
+ /* The smallest E such that 2^E is a normalized number; this
+ matches the definition of IEEE 754. */
+ APFloatBase::ExponentType minExponent;
- /* Number of bits actually used in the semantics. */
- unsigned int sizeInBits;
+ /* Number of bits in the significand. This includes the integer
+ bit. */
+ unsigned int precision;
- fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
+ /* Number of bits actually used in the semantics. */
+ unsigned int sizeInBits;
- // Returns true if any number described by this semantics can be precisely
- // represented by the specified semantics. Does not take into account
- // the value of fltNonfiniteBehavior.
- bool isRepresentableBy(const fltSemantics &S) const {
- return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
- precision <= S.precision;
- }
- };
+ fltNonfiniteBehavior nonFiniteBehavior = fltNonfiniteBehavior::IEEE754;
- static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
- static const fltSemantics semBFloat = {127, -126, 8, 16};
- static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
- static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
- static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
- static const fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
- static const fltSemantics semFloat8E4M3FN = {8, -6, 4, 8,
- fltNonfiniteBehavior::NanOnly};
- static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
- static const fltSemantics semBogus = {0, 0, 0, 0};
-
- /* The IBM double-double semantics. Such a number consists of a pair of IEEE
- 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
- (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
- Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
- to each other, and two 11-bit exponents.
-
- Note: we need to make the value different from semBogus as otherwise
- an unsafe optimization may collapse both values to a single address,
- and we heavily rely on them having distinct addresses. */
- static const fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
-
- /* These are legacy semantics for the fallback, inaccrurate implementation of
- IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
- operation. It's equivalent to having an IEEE number with consecutive 106
- bits of mantissa and 11 bits of exponent.
-
- It's not equivalent to IBM double-double. For example, a legit IBM
- double-double, 1 + epsilon:
-
- 1 + epsilon = 1 + (1 >> 1076)
-
- is not representable by a consecutive 106 bits of mantissa.
-
- Currently, these semantics are used in the following way:
-
- semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
- (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
- semPPCDoubleDoubleLegacy -> IEEE operations
-
- We use bitcastToAPInt() to get the bit representation (in APInt) of the
- underlying IEEEdouble, then use the APInt constructor to construct the
- legacy IEEE float.
-
- TODO: Implement all operations in semPPCDoubleDouble, and delete these
- semantics. */
- static const fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
- 53 + 53, 128};
-
- const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
- switch (S) {
- case S_IEEEhalf:
- return IEEEhalf();
- case S_BFloat:
- return BFloat();
- case S_IEEEsingle:
- return IEEEsingle();
- case S_IEEEdouble:
- return IEEEdouble();
- case S_IEEEquad:
- return IEEEquad();
- case S_PPCDoubleDouble:
- return PPCDoubleDouble();
- case S_Float8E5M2:
- return Float8E5M2();
- case S_Float8E4M3FN:
- return Float8E4M3FN();
- case S_x87DoubleExtended:
- return x87DoubleExtended();
- }
- llvm_unreachable("Unrecognised floating semantics");
- }
-
- APFloatBase::Semantics
- APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
- if (&Sem == &llvm::APFloat::IEEEhalf())
- return S_IEEEhalf;
- else if (&Sem == &llvm::APFloat::BFloat())
- return S_BFloat;
- else if (&Sem == &llvm::APFloat::IEEEsingle())
- return S_IEEEsingle;
- else if (&Sem == &llvm::APFloat::IEEEdouble())
- return S_IEEEdouble;
- else if (&Sem == &llvm::APFloat::IEEEquad())
- return S_IEEEquad;
- else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
- return S_PPCDoubleDouble;
- else if (&Sem == &llvm::APFloat::Float8E5M2())
- return S_Float8E5M2;
- else if (&Sem == &llvm::APFloat::Float8E4M3FN())
- return S_Float8E4M3FN;
- else if (&Sem == &llvm::APFloat::x87DoubleExtended())
- return S_x87DoubleExtended;
- else
- llvm_unreachable("Unknown floating semantics");
+ fltNanEncoding nanEncoding = fltNanEncoding::IEEE;
+ // Returns true if any number described by this semantics can be precisely
+ // represented by the specified semantics. Does not take into account
+ // the value of fltNonfiniteBehavior.
+ bool isRepresentableBy(const fltSemantics &S) const {
+ return maxExponent <= S.maxExponent && minExponent >= S.minExponent &&
+ precision <= S.precision;
}
+};
- const fltSemantics &APFloatBase::IEEEhalf() {
- return semIEEEhalf;
- }
- const fltSemantics &APFloatBase::BFloat() {
- return semBFloat;
- }
- const fltSemantics &APFloatBase::IEEEsingle() {
- return semIEEEsingle;
- }
- const fltSemantics &APFloatBase::IEEEdouble() {
- return semIEEEdouble;
- }
- const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
- const fltSemantics &APFloatBase::PPCDoubleDouble() {
- return semPPCDoubleDouble;
- }
- const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
- const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
- const fltSemantics &APFloatBase::x87DoubleExtended() {
- return semX87DoubleExtended;
- }
- const fltSemantics &APFloatBase::Bogus() { return semBogus; }
+static constexpr fltSemantics semIEEEhalf = {15, -14, 11, 16};
+static constexpr fltSemantics semBFloat = {127, -126, 8, 16};
+static constexpr fltSemantics semIEEEsingle = {127, -126, 24, 32};
+static constexpr fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
+static constexpr fltSemantics semIEEEquad = {16383, -16382, 113, 128};
+static constexpr fltSemantics semFloat8E5M2 = {15, -14, 3, 8};
+static constexpr fltSemantics semFloat8E5M2FNUZ = {
+ 15, -15, 3, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
+static constexpr fltSemantics semFloat8E4M3FN = {
+ 8, -6, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::AllOnes};
+static constexpr fltSemantics semFloat8E4M3FNUZ = {
+ 7, -7, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
+static constexpr fltSemantics semFloat8E4M3B11FNUZ = {
+ 4, -10, 4, 8, fltNonfiniteBehavior::NanOnly, fltNanEncoding::NegativeZero};
+static constexpr fltSemantics semFloatTF32 = {127, -126, 11, 19};
+static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
+static constexpr fltSemantics semBogus = {0, 0, 0, 0};
+
+/* The IBM double-double semantics. Such a number consists of a pair of IEEE
+ 64-bit doubles (Hi, Lo), where |Hi| > |Lo|, and if normal,
+ (double)(Hi + Lo) == Hi. The numeric value it's modeling is Hi + Lo.
+ Therefore it has two 53-bit mantissa parts that aren't necessarily adjacent
+ to each other, and two 11-bit exponents.
+
+ Note: we need to make the value different from semBogus as otherwise
+ an unsafe optimization may collapse both values to a single address,
+ and we heavily rely on them having distinct addresses. */
+static constexpr fltSemantics semPPCDoubleDouble = {-1, 0, 0, 128};
+
+/* These are legacy semantics for the fallback, inaccrurate implementation of
+ IBM double-double, if the accurate semPPCDoubleDouble doesn't handle the
+ operation. It's equivalent to having an IEEE number with consecutive 106
+ bits of mantissa and 11 bits of exponent.
+
+ It's not equivalent to IBM double-double. For example, a legit IBM
+ double-double, 1 + epsilon:
+
+ 1 + epsilon = 1 + (1 >> 1076)
+
+ is not representable by a consecutive 106 bits of mantissa.
+
+ Currently, these semantics are used in the following way:
+
+ semPPCDoubleDouble -> (IEEEdouble, IEEEdouble) ->
+ (64-bit APInt, 64-bit APInt) -> (128-bit APInt) ->
+ semPPCDoubleDoubleLegacy -> IEEE operations
+
+ We use bitcastToAPInt() to get the bit representation (in APInt) of the
+ underlying IEEEdouble, then use the APInt constructor to construct the
+ legacy IEEE float.
+
+ TODO: Implement all operations in semPPCDoubleDouble, and delete these
+ semantics. */
+static constexpr fltSemantics semPPCDoubleDoubleLegacy = {1023, -1022 + 53,
+ 53 + 53, 128};
+
+const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) {
+ switch (S) {
+ case S_IEEEhalf:
+ return IEEEhalf();
+ case S_BFloat:
+ return BFloat();
+ case S_IEEEsingle:
+ return IEEEsingle();
+ case S_IEEEdouble:
+ return IEEEdouble();
+ case S_IEEEquad:
+ return IEEEquad();
+ case S_PPCDoubleDouble:
+ return PPCDoubleDouble();
+ case S_Float8E5M2:
+ return Float8E5M2();
+ case S_Float8E5M2FNUZ:
+ return Float8E5M2FNUZ();
+ case S_Float8E4M3FN:
+ return Float8E4M3FN();
+ case S_Float8E4M3FNUZ:
+ return Float8E4M3FNUZ();
+ case S_Float8E4M3B11FNUZ:
+ return Float8E4M3B11FNUZ();
+ case S_FloatTF32:
+ return FloatTF32();
+ case S_x87DoubleExtended:
+ return x87DoubleExtended();
+ }
+ llvm_unreachable("Unrecognised floating semantics");
+}
+
+APFloatBase::Semantics
+APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) {
+ if (&Sem == &llvm::APFloat::IEEEhalf())
+ return S_IEEEhalf;
+ else if (&Sem == &llvm::APFloat::BFloat())
+ return S_BFloat;
+ else if (&Sem == &llvm::APFloat::IEEEsingle())
+ return S_IEEEsingle;
+ else if (&Sem == &llvm::APFloat::IEEEdouble())
+ return S_IEEEdouble;
+ else if (&Sem == &llvm::APFloat::IEEEquad())
+ return S_IEEEquad;
+ else if (&Sem == &llvm::APFloat::PPCDoubleDouble())
+ return S_PPCDoubleDouble;
+ else if (&Sem == &llvm::APFloat::Float8E5M2())
+ return S_Float8E5M2;
+ else if (&Sem == &llvm::APFloat::Float8E5M2FNUZ())
+ return S_Float8E5M2FNUZ;
+ else if (&Sem == &llvm::APFloat::Float8E4M3FN())
+ return S_Float8E4M3FN;
+ else if (&Sem == &llvm::APFloat::Float8E4M3FNUZ())
+ return S_Float8E4M3FNUZ;
+ else if (&Sem == &llvm::APFloat::Float8E4M3B11FNUZ())
+ return S_Float8E4M3B11FNUZ;
+ else if (&Sem == &llvm::APFloat::FloatTF32())
+ return S_FloatTF32;
+ else if (&Sem == &llvm::APFloat::x87DoubleExtended())
+ return S_x87DoubleExtended;
+ else
+ llvm_unreachable("Unknown floating semantics");
+}
+
+const fltSemantics &APFloatBase::IEEEhalf() { return semIEEEhalf; }
+const fltSemantics &APFloatBase::BFloat() { return semBFloat; }
+const fltSemantics &APFloatBase::IEEEsingle() { return semIEEEsingle; }
+const fltSemantics &APFloatBase::IEEEdouble() { return semIEEEdouble; }
+const fltSemantics &APFloatBase::IEEEquad() { return semIEEEquad; }
+const fltSemantics &APFloatBase::PPCDoubleDouble() {
+ return semPPCDoubleDouble;
+}
+const fltSemantics &APFloatBase::Float8E5M2() { return semFloat8E5M2; }
+const fltSemantics &APFloatBase::Float8E5M2FNUZ() { return semFloat8E5M2FNUZ; }
+const fltSemantics &APFloatBase::Float8E4M3FN() { return semFloat8E4M3FN; }
+const fltSemantics &APFloatBase::Float8E4M3FNUZ() { return semFloat8E4M3FNUZ; }
+const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() {
+ return semFloat8E4M3B11FNUZ;
+}
+const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; }
+const fltSemantics &APFloatBase::x87DoubleExtended() {
+ return semX87DoubleExtended;
+}
+const fltSemantics &APFloatBase::Bogus() { return semBogus; }
+
+constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
+constexpr RoundingMode APFloatBase::rmTowardPositive;
+constexpr RoundingMode APFloatBase::rmTowardNegative;
+constexpr RoundingMode APFloatBase::rmTowardZero;
+constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
+
+/* A tight upper bound on number of parts required to hold the value
+ pow(5, power) is
+
+ power * 815 / (351 * integerPartWidth) + 1
+
+ However, whilst the result may require only this many parts,
+ because we are multiplying two values to get it, the
+ multiplication may require an extra part with the excess part
+ being zero (consider the trivial case of 1 * 1, tcFullMultiply
+ requires two parts to hold the single-part result). So we add an
+ extra one to guarantee enough space whilst multiplying. */
+const unsigned int maxExponent = 16383;
+const unsigned int maxPrecision = 113;
+const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
+const unsigned int maxPowerOfFiveParts =
+ 2 +
+ ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
+
+unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
+ return semantics.precision;
+}
+APFloatBase::ExponentType
+APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
+ return semantics.maxExponent;
+}
+APFloatBase::ExponentType
+APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
+ return semantics.minExponent;
+}
+unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
+ return semantics.sizeInBits;
+}
+unsigned int APFloatBase::semanticsIntSizeInBits(const fltSemantics &semantics,
+ bool isSigned) {
+ // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
+ // at least one more bit than the MaxExponent to hold the max FP value.
+ unsigned int MinBitWidth = semanticsMaxExponent(semantics) + 1;
+ // Extra sign bit needed.
+ if (isSigned)
+ ++MinBitWidth;
+ return MinBitWidth;
+}
+
+bool APFloatBase::isRepresentableAsNormalIn(const fltSemantics &Src,
+ const fltSemantics &Dst) {
+ // Exponent range must be larger.
+ if (Src.maxExponent >= Dst.maxExponent || Src.minExponent <= Dst.minExponent)
+ return false;
- constexpr RoundingMode APFloatBase::rmNearestTiesToEven;
- constexpr RoundingMode APFloatBase::rmTowardPositive;
- constexpr RoundingMode APFloatBase::rmTowardNegative;
- constexpr RoundingMode APFloatBase::rmTowardZero;
- constexpr RoundingMode APFloatBase::rmNearestTiesToAway;
+ // If the mantissa is long enough, the result value could still be denormal
+ // with a larger exponent range.
+ //
+ // FIXME: This condition is probably not accurate but also shouldn't be a
+ // practical concern with existing types.
+ return Dst.precision >= Src.precision;
+}
- /* A tight upper bound on number of parts required to hold the value
- pow(5, power) is
+unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
+ return Sem.sizeInBits;
+}
- power * 815 / (351 * integerPartWidth) + 1
+static constexpr APFloatBase::ExponentType
+exponentZero(const fltSemantics &semantics) {
+ return semantics.minExponent - 1;
+}
- However, whilst the result may require only this many parts,
- because we are multiplying two values to get it, the
- multiplication may require an extra part with the excess part
- being zero (consider the trivial case of 1 * 1, tcFullMultiply
- requires two parts to hold the single-part result). So we add an
- extra one to guarantee enough space whilst multiplying. */
- const unsigned int maxExponent = 16383;
- const unsigned int maxPrecision = 113;
- const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
- const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815) / (351 * APFloatBase::integerPartWidth));
+static constexpr APFloatBase::ExponentType
+exponentInf(const fltSemantics &semantics) {
+ return semantics.maxExponent + 1;
+}
- unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
- return semantics.precision;
- }
- APFloatBase::ExponentType
- APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
+static constexpr APFloatBase::ExponentType
+exponentNaN(const fltSemantics &semantics) {
+ if (semantics.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
+ if (semantics.nanEncoding == fltNanEncoding::NegativeZero)
+ return exponentZero(semantics);
return semantics.maxExponent;
}
- APFloatBase::ExponentType
- APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
- return semantics.minExponent;
- }
- unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
- return semantics.sizeInBits;
- }
-
- unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
- return Sem.sizeInBits;
+ return semantics.maxExponent + 1;
}
/* A bunch of private, handy routines. */
@@ -262,9 +358,7 @@ static inline Error createError(const Twine &Err) {
return make_error<StringError>(Err, inconvertibleErrorCode());
}
-static inline unsigned int
-partCountForBits(unsigned int bits)
-{
+static constexpr inline unsigned int partCountForBits(unsigned int bits) {
return ((bits) + APFloatBase::integerPartWidth - 1) / APFloatBase::integerPartWidth;
}
@@ -509,7 +603,7 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
/* If we ran off the end it is exactly zero or one-half, otherwise
a little more. */
- if (hexDigit == -1U)
+ if (hexDigit == UINT_MAX)
return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
else
return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
@@ -526,7 +620,7 @@ lostFractionThroughTruncation(const APFloatBase::integerPart *parts,
lsb = APInt::tcLSB(parts, partCount);
- /* Note this is guaranteed true if bits == 0, or LSB == -1U. */
+ /* Note this is guaranteed true if bits == 0, or LSB == UINT_MAX. */
if (bits <= lsb)
return lfExactlyZero;
if (bits == lsb + 1)
@@ -798,10 +892,15 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
APInt fill_storage;
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
- // The only NaN representation is where the mantissa is all 1s, which is
- // non-signalling.
+ // Finite-only types do not distinguish signalling and quiet NaN, so
+ // make them all signalling.
SNaN = false;
- fill_storage = APInt::getAllOnes(semantics->precision - 1);
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+ sign = true;
+ fill_storage = APInt::getZero(semantics->precision - 1);
+ } else {
+ fill_storage = APInt::getAllOnes(semantics->precision - 1);
+ }
fill = &fill_storage;
}
@@ -832,6 +931,9 @@ void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
// conventionally, this is the next bit down from the QNaN bit.
if (APInt::tcIsZero(significand, numParts))
APInt::tcSetBit(significand, QNaNBit - 1);
+ } else if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+ // The only NaN is a quiet NaN, and it has no bits sets in the significand.
+ // Do nothing.
} else {
// We always have to set the QNaN bit to make it a QNaN.
APInt::tcSetBit(significand, QNaNBit);
@@ -976,7 +1078,8 @@ bool IEEEFloat::isSignificandAllZerosExceptMSB() const {
}
bool IEEEFloat::isLargest() const {
- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
+ if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes) {
// The largest number by magnitude in our format will be the floating point
// number with maximum exponent and with significand that is all ones except
// the LSB.
@@ -1418,7 +1521,8 @@ IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
exponent = semantics->maxExponent;
tcSetLeastSignificantBits(significandParts(), partCount(),
semantics->precision);
- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
+ if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes)
APInt::tcClearBit(significandParts(), 0);
return opInexact;
@@ -1519,7 +1623,10 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
}
}
+ // The all-ones values is an overflow if NaN is all ones. If NaN is
+ // represented by negative zero, then it is a valid finite value.
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes &&
exponent == semantics->maxExponent && isSignificandAllOnes())
return handleOverflow(rounding_mode);
@@ -1530,8 +1637,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
underflow for exact results. */
if (lost_fraction == lfExactlyZero) {
/* Canonicalize zeroes. */
- if (omsb == 0)
+ if (omsb == 0) {
category = fcZero;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
+ }
return opOK;
}
@@ -1549,18 +1659,22 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
/* Renormalize by incrementing the exponent and shifting our
significand right one. However if we already have the
maximum exponent we overflow to infinity. */
- if (exponent == semantics->maxExponent) {
- category = fcInfinity;
-
- return (opStatus) (opOverflow | opInexact);
- }
+ if (exponent == semantics->maxExponent)
+ // Invoke overflow handling with a rounding mode that will guarantee
+ // that the result gets turned into the correct infinity representation.
+ // This is needed instead of just setting the category to infinity to
+ // account for 8-bit floating point types that have no inf, only NaN.
+ return handleOverflow(sign ? rmTowardNegative : rmTowardPositive);
shiftSignificandRight(1);
return opInexact;
}
+ // The all-ones values is an overflow if NaN is all ones. If NaN is
+ // represented by negative zero, then it is a valid finite value.
if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes &&
exponent == semantics->maxExponent && isSignificandAllOnes())
return handleOverflow(rounding_mode);
}
@@ -1574,8 +1688,11 @@ IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
assert(omsb < semantics->precision);
/* Canonicalize zeroes. */
- if (omsb == 0)
+ if (omsb == 0) {
category = fcZero;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
+ }
/* The fcZero case is a denormal that underflowed to zero. */
return (opStatus) (opUnderflow | opInexact);
@@ -1877,6 +1994,11 @@ IEEEFloat::opStatus IEEEFloat::remainderSpecials(const IEEEFloat &rhs) {
/* Change sign. */
void IEEEFloat::changeSign() {
+ // With NaN-as-negative-zero, neither NaN or negative zero can change
+ // their signs.
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero &&
+ (isZero() || isNaN()))
+ return;
/* Look mummy, this one's easy. */
sign = !sign;
}
@@ -1906,6 +2028,9 @@ IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
if (category == fcZero) {
if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
sign = (rounding_mode == rmTowardNegative);
+ // NaN-in-negative-zero means zeros need to be normalized to +0.
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
}
return fs;
@@ -1931,6 +2056,8 @@ IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
sign ^= rhs.sign;
fs = multiplySpecials(rhs);
+ if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
if (isFiniteNonZero()) {
lostFraction lost_fraction = multiplySignificand(rhs);
fs = normalize(rounding_mode, lost_fraction);
@@ -1949,6 +2076,8 @@ IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
sign ^= rhs.sign;
fs = divideSpecials(rhs);
+ if (isZero() && semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
if (isFiniteNonZero()) {
lostFraction lost_fraction = divideSignificand(rhs);
fs = normalize(rounding_mode, lost_fraction);
@@ -2057,8 +2186,13 @@ IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
}
}
- if (isZero())
+ if (isZero()) {
sign = origSign; // IEEE754 requires this
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ // But some 8-bit floats only have positive 0.
+ sign = false;
+ }
+
else
sign ^= origSign;
return fs;
@@ -2083,8 +2217,11 @@ IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
fs = subtract(V, rmNearestTiesToEven);
assert(fs==opOK);
}
- if (isZero())
+ if (isZero()) {
sign = origSign; // fmod requires this
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
+ }
return fs;
}
@@ -2112,8 +2249,11 @@ IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
positive zero unless rounding to minus infinity, except that
adding two like-signed zeroes gives that zero. */
- if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign)
+ if (category == fcZero && !(fs & opUnderflow) && sign != addend.sign) {
sign = (rounding_mode == rmTowardNegative);
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
+ }
} else {
fs = multiplySpecials(multiplicand);
@@ -2389,6 +2529,12 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
return is_signaling ? opInvalidOp : opOK;
}
+ // If NaN is negative zero, we need to create a new NaN to avoid converting
+ // NaN to -Inf.
+ if (fromSemantics.nanEncoding == fltNanEncoding::NegativeZero &&
+ semantics->nanEncoding != fltNanEncoding::NegativeZero)
+ makeNaN(false, false);
+
*losesInfo = lostFraction != lfExactlyZero || X86SpecialNan;
// For x87 extended precision, we want to make a NaN, not a special NaN if
@@ -2410,6 +2556,14 @@ IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
makeNaN(false, sign);
*losesInfo = true;
fs = opInexact;
+ } else if (category == fcZero &&
+ semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+ // Negative zero loses info, but positive zero doesn't.
+ *losesInfo =
+ fromSemantics.nanEncoding != fltNanEncoding::NegativeZero && sign;
+ fs = *losesInfo ? opInexact : opOK;
+ // NaN is negative zero means -0 -> +0, which can lose information
+ sign = false;
} else {
*losesInfo = false;
fs = opOK;
@@ -2696,7 +2850,7 @@ IEEEFloat::convertFromHexadecimalString(StringRef s,
}
hex_value = hexDigitValue(*p);
- if (hex_value == -1U)
+ if (hex_value == UINT_MAX)
break;
p++;
@@ -2877,9 +3031,11 @@ IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) {
category = fcZero;
fs = opOK;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
- /* Check whether the normalized exponent is high enough to overflow
- max during the log-rebasing in the max-exponent check below. */
+ /* Check whether the normalized exponent is high enough to overflow
+ max during the log-rebasing in the max-exponent check below. */
} else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
fs = handleOverflow(rounding_mode);
@@ -3337,201 +3493,121 @@ APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
return APInt(128, words);
}
-APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
- assert(partCount()==2);
+template <const fltSemantics &S>
+APInt IEEEFloat::convertIEEEFloatToAPInt() const {
+ assert(semantics == &S);
+
+ constexpr int bias = -(S.minExponent - 1);
+ constexpr unsigned int trailing_significand_bits = S.precision - 1;
+ constexpr int integer_bit_part = trailing_significand_bits / integerPartWidth;
+ constexpr integerPart integer_bit =
+ integerPart{1} << (trailing_significand_bits % integerPartWidth);
+ constexpr uint64_t significand_mask = integer_bit - 1;
+ constexpr unsigned int exponent_bits =
+ S.sizeInBits - 1 - trailing_significand_bits;
+ static_assert(exponent_bits < 64);
+ constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
- uint64_t myexponent, mysignificand, mysignificand2;
+ uint64_t myexponent;
+ std::array<integerPart, partCountForBits(trailing_significand_bits)>
+ mysignificand;
if (isFiniteNonZero()) {
- myexponent = exponent+16383; //bias
- mysignificand = significandParts()[0];
- mysignificand2 = significandParts()[1];
- if (myexponent==1 && !(mysignificand2 & 0x1000000000000LL))
- myexponent = 0; // denormal
- } else if (category==fcZero) {
- myexponent = 0;
- mysignificand = mysignificand2 = 0;
- } else if (category==fcInfinity) {
- myexponent = 0x7fff;
- mysignificand = mysignificand2 = 0;
+ myexponent = exponent + bias;
+ std::copy_n(significandParts(), mysignificand.size(),
+ mysignificand.begin());
+ if (myexponent == 1 &&
+ !(significandParts()[integer_bit_part] & integer_bit))
+ myexponent = 0; // denormal
+ } else if (category == fcZero) {
+ myexponent = ::exponentZero(S) + bias;
+ mysignificand.fill(0);
+ } else if (category == fcInfinity) {
+ if (S.nonFiniteBehavior == fltNonfiniteBehavior::NanOnly) {
+ llvm_unreachable("semantics don't support inf!");
+ }
+ myexponent = ::exponentInf(S) + bias;
+ mysignificand.fill(0);
} else {
assert(category == fcNaN && "Unknown category!");
- myexponent = 0x7fff;
- mysignificand = significandParts()[0];
- mysignificand2 = significandParts()[1];
- }
-
- uint64_t words[2];
- words[0] = mysignificand;
- words[1] = ((uint64_t)(sign & 1) << 63) |
- ((myexponent & 0x7fff) << 48) |
- (mysignificand2 & 0xffffffffffffLL);
+ myexponent = ::exponentNaN(S) + bias;
+ std::copy_n(significandParts(), mysignificand.size(),
+ mysignificand.begin());
+ }
+ std::array<uint64_t, (S.sizeInBits + 63) / 64> words;
+ auto words_iter =
+ std::copy_n(mysignificand.begin(), mysignificand.size(), words.begin());
+ if constexpr (significand_mask != 0) {
+ // Clear the integer bit.
+ words[mysignificand.size() - 1] &= significand_mask;
+ }
+ std::fill(words_iter, words.end(), uint64_t{0});
+ constexpr size_t last_word = words.size() - 1;
+ uint64_t shifted_sign = static_cast<uint64_t>(sign & 1)
+ << ((S.sizeInBits - 1) % 64);
+ words[last_word] |= shifted_sign;
+ uint64_t shifted_exponent = (myexponent & exponent_mask)
+ << (trailing_significand_bits % 64);
+ words[last_word] |= shifted_exponent;
+ if constexpr (last_word == 0) {
+ return APInt(S.sizeInBits, words[0]);
+ }
+ return APInt(S.sizeInBits, words);
+}
- return APInt(128, words);
+APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
+ assert(partCount() == 2);
+ return convertIEEEFloatToAPInt<semIEEEquad>();
}
APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
assert(partCount()==1);
-
- uint64_t myexponent, mysignificand;
-
- if (isFiniteNonZero()) {
- myexponent = exponent+1023; //bias
- mysignificand = *significandParts();
- if (myexponent==1 && !(mysignificand & 0x10000000000000LL))
- myexponent = 0; // denormal
- } else if (category==fcZero) {
- myexponent = 0;
- mysignificand = 0;
- } else if (category==fcInfinity) {
- myexponent = 0x7ff;
- mysignificand = 0;
- } else {
- assert(category == fcNaN && "Unknown category!");
- myexponent = 0x7ff;
- mysignificand = *significandParts();
- }
-
- return APInt(64, ((((uint64_t)(sign & 1) << 63) |
- ((myexponent & 0x7ff) << 52) |
- (mysignificand & 0xfffffffffffffLL))));
+ return convertIEEEFloatToAPInt<semIEEEdouble>();
}
APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
assert(partCount()==1);
-
- uint32_t myexponent, mysignificand;
-
- if (isFiniteNonZero()) {
- myexponent = exponent+127; //bias
- mysignificand = (uint32_t)*significandParts();
- if (myexponent == 1 && !(mysignificand & 0x800000))
- myexponent = 0; // denormal
- } else if (category==fcZero) {
- myexponent = 0;
- mysignificand = 0;
- } else if (category==fcInfinity) {
- myexponent = 0xff;
- mysignificand = 0;
- } else {
- assert(category == fcNaN && "Unknown category!");
- myexponent = 0xff;
- mysignificand = (uint32_t)*significandParts();
- }
-
- return APInt(32, (((sign&1) << 31) | ((myexponent&0xff) << 23) |
- (mysignificand & 0x7fffff)));
+ return convertIEEEFloatToAPInt<semIEEEsingle>();
}
APInt IEEEFloat::convertBFloatAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics *)&semBFloat);
assert(partCount() == 1);
-
- uint32_t myexponent, mysignificand;
-
- if (isFiniteNonZero()) {
- myexponent = exponent + 127; // bias
- mysignificand = (uint32_t)*significandParts();
- if (myexponent == 1 && !(mysignificand & 0x80))
- myexponent = 0; // denormal
- } else if (category == fcZero) {
- myexponent = 0;
- mysignificand = 0;
- } else if (category == fcInfinity) {
- myexponent = 0xff;
- mysignificand = 0;
- } else {
- assert(category == fcNaN && "Unknown category!");
- myexponent = 0xff;
- mysignificand = (uint32_t)*significandParts();
- }
-
- return APInt(16, (((sign & 1) << 15) | ((myexponent & 0xff) << 7) |
- (mysignificand & 0x7f)));
+ return convertIEEEFloatToAPInt<semBFloat>();
}
APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
assert(partCount()==1);
-
- uint32_t myexponent, mysignificand;
-
- if (isFiniteNonZero()) {
- myexponent = exponent+15; //bias
- mysignificand = (uint32_t)*significandParts();
- if (myexponent == 1 && !(mysignificand & 0x400))
- myexponent = 0; // denormal
- } else if (category==fcZero) {
- myexponent = 0;
- mysignificand = 0;
- } else if (category==fcInfinity) {
- myexponent = 0x1f;
- mysignificand = 0;
- } else {
- assert(category == fcNaN && "Unknown category!");
- myexponent = 0x1f;
- mysignificand = (uint32_t)*significandParts();
- }
-
- return APInt(16, (((sign&1) << 15) | ((myexponent&0x1f) << 10) |
- (mysignificand & 0x3ff)));
+ return convertIEEEFloatToAPInt<semIEEEhalf>();
}
APInt IEEEFloat::convertFloat8E5M2APFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics *)&semFloat8E5M2);
assert(partCount() == 1);
+ return convertIEEEFloatToAPInt<semFloat8E5M2>();
+}
- uint32_t myexponent, mysignificand;
-
- if (isFiniteNonZero()) {
- myexponent = exponent + 15; // bias
- mysignificand = (uint32_t)*significandParts();
- if (myexponent == 1 && !(mysignificand & 0x4))
- myexponent = 0; // denormal
- } else if (category == fcZero) {
- myexponent = 0;
- mysignificand = 0;
- } else if (category == fcInfinity) {
- myexponent = 0x1f;
- mysignificand = 0;
- } else {
- assert(category == fcNaN && "Unknown category!");
- myexponent = 0x1f;
- mysignificand = (uint32_t)*significandParts();
- }
-
- return APInt(8, (((sign & 1) << 7) | ((myexponent & 0x1f) << 2) |
- (mysignificand & 0x3)));
+APInt IEEEFloat::convertFloat8E5M2FNUZAPFloatToAPInt() const {
+ assert(partCount() == 1);
+ return convertIEEEFloatToAPInt<semFloat8E5M2FNUZ>();
}
APInt IEEEFloat::convertFloat8E4M3FNAPFloatToAPInt() const {
- assert(semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN);
assert(partCount() == 1);
+ return convertIEEEFloatToAPInt<semFloat8E4M3FN>();
+}
- uint32_t myexponent, mysignificand;
+APInt IEEEFloat::convertFloat8E4M3FNUZAPFloatToAPInt() const {
+ assert(partCount() == 1);
+ return convertIEEEFloatToAPInt<semFloat8E4M3FNUZ>();
+}
- if (isFiniteNonZero()) {
- myexponent = exponent + 7; // bias
- mysignificand = (uint32_t)*significandParts();
- if (myexponent == 1 && !(mysignificand & 0x8))
- myexponent = 0; // denormal
- } else if (category == fcZero) {
- myexponent = 0;
- mysignificand = 0;
- } else if (category == fcInfinity) {
- myexponent = 0xf;
- mysignificand = 0;
- } else {
- assert(category == fcNaN && "Unknown category!");
- myexponent = 0xf;
- mysignificand = (uint32_t)*significandParts();
- }
+APInt IEEEFloat::convertFloat8E4M3B11FNUZAPFloatToAPInt() const {
+ assert(partCount() == 1);
+ return convertIEEEFloatToAPInt<semFloat8E4M3B11FNUZ>();
+}
- return APInt(8, (((sign & 1) << 7) | ((myexponent & 0xf) << 3) |
- (mysignificand & 0x7)));
+APInt IEEEFloat::convertFloatTF32APFloatToAPInt() const {
+ assert(partCount() == 1);
+ return convertIEEEFloatToAPInt<semFloatTF32>();
}
// This function creates an APInt that is just a bit map of the floating
@@ -3560,9 +3636,21 @@ APInt IEEEFloat::bitcastToAPInt() const {
if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2)
return convertFloat8E5M2APFloatToAPInt();
+ if (semantics == (const llvm::fltSemantics *)&semFloat8E5M2FNUZ)
+ return convertFloat8E5M2FNUZAPFloatToAPInt();
+
if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FN)
return convertFloat8E4M3FNAPFloatToAPInt();
+ if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3FNUZ)
+ return convertFloat8E4M3FNUZAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics *)&semFloat8E4M3B11FNUZ)
+ return convertFloat8E4M3B11FNUZAPFloatToAPInt();
+
+ if (semantics == (const llvm::fltSemantics *)&semFloatTF32)
+ return convertFloatTF32APFloatToAPInt();
+
assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
"unknown format!");
return convertF80LongDoubleAPFloatToAPInt();
@@ -3643,205 +3731,131 @@ void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
}
}
-void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
- uint64_t i1 = api.getRawData()[0];
- uint64_t i2 = api.getRawData()[1];
- uint64_t myexponent = (i2 >> 48) & 0x7fff;
- uint64_t mysignificand = i1;
- uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
+template <const fltSemantics &S>
+void IEEEFloat::initFromIEEEAPInt(const APInt &api) {
+ assert(api.getBitWidth() == S.sizeInBits);
+ constexpr integerPart integer_bit = integerPart{1}
+ << ((S.precision - 1) % integerPartWidth);
+ constexpr uint64_t significand_mask = integer_bit - 1;
+ constexpr unsigned int trailing_significand_bits = S.precision - 1;
+ constexpr unsigned int stored_significand_parts =
+ partCountForBits(trailing_significand_bits);
+ constexpr unsigned int exponent_bits =
+ S.sizeInBits - 1 - trailing_significand_bits;
+ static_assert(exponent_bits < 64);
+ constexpr uint64_t exponent_mask = (uint64_t{1} << exponent_bits) - 1;
+ constexpr int bias = -(S.minExponent - 1);
- initialize(&semIEEEquad);
- assert(partCount()==2);
-
- sign = static_cast<unsigned int>(i2>>63);
- if (myexponent==0 &&
- (mysignificand==0 && mysignificand2==0)) {
- makeZero(sign);
- } else if (myexponent==0x7fff &&
- (mysignificand==0 && mysignificand2==0)) {
- makeInf(sign);
- } else if (myexponent==0x7fff &&
- (mysignificand!=0 || mysignificand2 !=0)) {
- category = fcNaN;
- exponent = exponentNaN();
- significandParts()[0] = mysignificand;
- significandParts()[1] = mysignificand2;
- } else {
- category = fcNormal;
- exponent = myexponent - 16383;
- significandParts()[0] = mysignificand;
- significandParts()[1] = mysignificand2;
- if (myexponent==0) // denormal
- exponent = -16382;
- else
- significandParts()[1] |= 0x1000000000000LL; // integer bit
+ // Copy the bits of the significand. We need to clear out the exponent and
+ // sign bit in the last word.
+ std::array<integerPart, stored_significand_parts> mysignificand;
+ std::copy_n(api.getRawData(), mysignificand.size(), mysignificand.begin());
+ if constexpr (significand_mask != 0) {
+ mysignificand[mysignificand.size() - 1] &= significand_mask;
}
-}
-void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
- uint64_t i = *api.getRawData();
- uint64_t myexponent = (i >> 52) & 0x7ff;
- uint64_t mysignificand = i & 0xfffffffffffffLL;
+ // We assume the last word holds the sign bit, the exponent, and potentially
+ // some of the trailing significand field.
+ uint64_t last_word = api.getRawData()[api.getNumWords() - 1];
+ uint64_t myexponent =
+ (last_word >> (trailing_significand_bits % 64)) & exponent_mask;
- initialize(&semIEEEdouble);
- assert(partCount()==1);
+ initialize(&S);
+ assert(partCount() == mysignificand.size());
- sign = static_cast<unsigned int>(i>>63);
- if (myexponent==0 && mysignificand==0) {
- makeZero(sign);
- } else if (myexponent==0x7ff && mysignificand==0) {
- makeInf(sign);
- } else if (myexponent==0x7ff && mysignificand!=0) {
- category = fcNaN;
- exponent = exponentNaN();
- *significandParts() = mysignificand;
- } else {
- category = fcNormal;
- exponent = myexponent - 1023;
- *significandParts() = mysignificand;
- if (myexponent==0) // denormal
- exponent = -1022;
- else
- *significandParts() |= 0x10000000000000LL; // integer bit
- }
-}
+ sign = static_cast<unsigned int>(last_word >> ((S.sizeInBits - 1) % 64));
-void IEEEFloat::initFromFloatAPInt(const APInt &api) {
- uint32_t i = (uint32_t)*api.getRawData();
- uint32_t myexponent = (i >> 23) & 0xff;
- uint32_t mysignificand = i & 0x7fffff;
+ bool all_zero_significand =
+ llvm::all_of(mysignificand, [](integerPart bits) { return bits == 0; });
- initialize(&semIEEEsingle);
- assert(partCount()==1);
+ bool is_zero = myexponent == 0 && all_zero_significand;
- sign = i >> 31;
- if (myexponent==0 && mysignificand==0) {
- makeZero(sign);
- } else if (myexponent==0xff && mysignificand==0) {
- makeInf(sign);
- } else if (myexponent==0xff && mysignificand!=0) {
- category = fcNaN;
- exponent = exponentNaN();
- *significandParts() = mysignificand;
- } else {
- category = fcNormal;
- exponent = myexponent - 127; //bias
- *significandParts() = mysignificand;
- if (myexponent==0) // denormal
- exponent = -126;
- else
- *significandParts() |= 0x800000; // integer bit
+ if constexpr (S.nonFiniteBehavior == fltNonfiniteBehavior::IEEE754) {
+ if (myexponent - bias == ::exponentInf(S) && all_zero_significand) {
+ makeInf(sign);
+ return;
+ }
}
-}
-void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
- uint32_t i = (uint32_t)*api.getRawData();
- uint32_t myexponent = (i >> 7) & 0xff;
- uint32_t mysignificand = i & 0x7f;
+ bool is_nan = false;
- initialize(&semBFloat);
- assert(partCount() == 1);
+ if constexpr (S.nanEncoding == fltNanEncoding::IEEE) {
+ is_nan = myexponent - bias == ::exponentNaN(S) && !all_zero_significand;
+ } else if constexpr (S.nanEncoding == fltNanEncoding::AllOnes) {
+ bool all_ones_significand =
+ std::all_of(mysignificand.begin(), mysignificand.end() - 1,
+ [](integerPart bits) { return bits == ~integerPart{0}; }) &&
+ (!significand_mask ||
+ mysignificand[mysignificand.size() - 1] == significand_mask);
+ is_nan = myexponent - bias == ::exponentNaN(S) && all_ones_significand;
+ } else if constexpr (S.nanEncoding == fltNanEncoding::NegativeZero) {
+ is_nan = is_zero && sign;
+ }
- sign = i >> 15;
- if (myexponent == 0 && mysignificand == 0) {
- makeZero(sign);
- } else if (myexponent == 0xff && mysignificand == 0) {
- makeInf(sign);
- } else if (myexponent == 0xff && mysignificand != 0) {
+ if (is_nan) {
category = fcNaN;
- exponent = exponentNaN();
- *significandParts() = mysignificand;
- } else {
- category = fcNormal;
- exponent = myexponent - 127; // bias
- *significandParts() = mysignificand;
- if (myexponent == 0) // denormal
- exponent = -126;
- else
- *significandParts() |= 0x80; // integer bit
+ exponent = ::exponentNaN(S);
+ std::copy_n(mysignificand.begin(), mysignificand.size(),
+ significandParts());
+ return;
+ }
+
+ if (is_zero) {
+ makeZero(sign);
+ return;
}
+
+ category = fcNormal;
+ exponent = myexponent - bias;
+ std::copy_n(mysignificand.begin(), mysignificand.size(), significandParts());
+ if (myexponent == 0) // denormal
+ exponent = S.minExponent;
+ else
+ significandParts()[mysignificand.size()-1] |= integer_bit; // integer bit
}
-void IEEEFloat::initFromHalfAPInt(const APInt &api) {
- uint32_t i = (uint32_t)*api.getRawData();
- uint32_t myexponent = (i >> 10) & 0x1f;
- uint32_t mysignificand = i & 0x3ff;
+void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
+ initFromIEEEAPInt<semIEEEquad>(api);
+}
- initialize(&semIEEEhalf);
- assert(partCount()==1);
+void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
+ initFromIEEEAPInt<semIEEEdouble>(api);
+}
- sign = i >> 15;
- if (myexponent==0 && mysignificand==0) {
- makeZero(sign);
- } else if (myexponent==0x1f && mysignificand==0) {
- makeInf(sign);
- } else if (myexponent==0x1f && mysignificand!=0) {
- category = fcNaN;
- exponent = exponentNaN();
- *significandParts() = mysignificand;
- } else {
- category = fcNormal;
- exponent = myexponent - 15; //bias
- *significandParts() = mysignificand;
- if (myexponent==0) // denormal
- exponent = -14;
- else
- *significandParts() |= 0x400; // integer bit
- }
+void IEEEFloat::initFromFloatAPInt(const APInt &api) {
+ initFromIEEEAPInt<semIEEEsingle>(api);
}
-void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
- uint32_t i = (uint32_t)*api.getRawData();
- uint32_t myexponent = (i >> 2) & 0x1f;
- uint32_t mysignificand = i & 0x3;
+void IEEEFloat::initFromBFloatAPInt(const APInt &api) {
+ initFromIEEEAPInt<semBFloat>(api);
+}
- initialize(&semFloat8E5M2);
- assert(partCount() == 1);
+void IEEEFloat::initFromHalfAPInt(const APInt &api) {
+ initFromIEEEAPInt<semIEEEhalf>(api);
+}
- sign = i >> 7;
- if (myexponent == 0 && mysignificand == 0) {
- makeZero(sign);
- } else if (myexponent == 0x1f && mysignificand == 0) {
- makeInf(sign);
- } else if (myexponent == 0x1f && mysignificand != 0) {
- category = fcNaN;
- exponent = exponentNaN();
- *significandParts() = mysignificand;
- } else {
- category = fcNormal;
- exponent = myexponent - 15; // bias
- *significandParts() = mysignificand;
- if (myexponent == 0) // denormal
- exponent = -14;
- else
- *significandParts() |= 0x4; // integer bit
- }
+void IEEEFloat::initFromFloat8E5M2APInt(const APInt &api) {
+ initFromIEEEAPInt<semFloat8E5M2>(api);
+}
+
+void IEEEFloat::initFromFloat8E5M2FNUZAPInt(const APInt &api) {
+ initFromIEEEAPInt<semFloat8E5M2FNUZ>(api);
}
void IEEEFloat::initFromFloat8E4M3FNAPInt(const APInt &api) {
- uint32_t i = (uint32_t)*api.getRawData();
- uint32_t myexponent = (i >> 3) & 0xf;
- uint32_t mysignificand = i & 0x7;
+ initFromIEEEAPInt<semFloat8E4M3FN>(api);
+}
- initialize(&semFloat8E4M3FN);
- assert(partCount() == 1);
+void IEEEFloat::initFromFloat8E4M3FNUZAPInt(const APInt &api) {
+ initFromIEEEAPInt<semFloat8E4M3FNUZ>(api);
+}
- sign = i >> 7;
- if (myexponent == 0 && mysignificand == 0) {
- makeZero(sign);
- } else if (myexponent == 0xf && mysignificand == 7) {
- category = fcNaN;
- exponent = exponentNaN();
- *significandParts() = mysignificand;
- } else {
- category = fcNormal;
- exponent = myexponent - 7; // bias
- *significandParts() = mysignificand;
- if (myexponent == 0) // denormal
- exponent = -6;
- else
- *significandParts() |= 0x8; // integer bit
- }
+void IEEEFloat::initFromFloat8E4M3B11FNUZAPInt(const APInt &api) {
+ initFromIEEEAPInt<semFloat8E4M3B11FNUZ>(api);
+}
+
+void IEEEFloat::initFromFloatTF32APInt(const APInt &api) {
+ initFromIEEEAPInt<semFloatTF32>(api);
}
/// Treat api as containing the bits of a floating point number.
@@ -3863,8 +3877,16 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
return initFromPPCDoubleDoubleAPInt(api);
if (Sem == &semFloat8E5M2)
return initFromFloat8E5M2APInt(api);
+ if (Sem == &semFloat8E5M2FNUZ)
+ return initFromFloat8E5M2FNUZAPInt(api);
if (Sem == &semFloat8E4M3FN)
return initFromFloat8E4M3FNAPInt(api);
+ if (Sem == &semFloat8E4M3FNUZ)
+ return initFromFloat8E4M3FNUZAPInt(api);
+ if (Sem == &semFloat8E4M3B11FNUZ)
+ return initFromFloat8E4M3B11FNUZAPInt(api);
+ if (Sem == &semFloatTF32)
+ return initFromFloatTF32APInt(api);
llvm_unreachable(nullptr);
}
@@ -3893,7 +3915,8 @@ void IEEEFloat::makeLargest(bool Negative) {
? (~integerPart(0) >> NumUnusedHighBits)
: 0;
- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
+ if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly &&
+ semantics->nanEncoding == fltNanEncoding::AllOnes)
significand[0] &= ~integerPart(1);
}
@@ -4074,7 +4097,7 @@ void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
}
// Ignore trailing binary zeros.
- int trailingZeros = significand.countTrailingZeros();
+ int trailingZeros = significand.countr_zero();
exp += trailingZeros;
significand.lshrInPlace(trailingZeros);
@@ -4321,6 +4344,8 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
APInt::tcSet(significandParts(), 0, partCount());
category = fcZero;
exponent = 0;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero)
+ sign = false;
break;
}
@@ -4407,17 +4432,15 @@ IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
}
APFloatBase::ExponentType IEEEFloat::exponentNaN() const {
- if (semantics->nonFiniteBehavior == fltNonfiniteBehavior::NanOnly)
- return semantics->maxExponent;
- return semantics->maxExponent + 1;
+ return ::exponentNaN(*semantics);
}
APFloatBase::ExponentType IEEEFloat::exponentInf() const {
- return semantics->maxExponent + 1;
+ return ::exponentInf(*semantics);
}
APFloatBase::ExponentType IEEEFloat::exponentZero() const {
- return semantics->minExponent - 1;
+ return ::exponentZero(*semantics);
}
void IEEEFloat::makeInf(bool Negative) {
@@ -4435,6 +4458,10 @@ void IEEEFloat::makeInf(bool Negative) {
void IEEEFloat::makeZero(bool Negative) {
category = fcZero;
sign = Negative;
+ if (semantics->nanEncoding == fltNanEncoding::NegativeZero) {
+ // Merge negative zero to positive because 0b10000...000 is used for NaN
+ sign = false;
+ }
exponent = exponentZero();
APInt::tcSet(significandParts(), 0, partCount());
}
@@ -4477,7 +4504,7 @@ IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
int MaxIncrement = MaxExp - (MinExp - SignificandBits) + 1;
// Clamp to one past the range ends to let normalize handle overlflow.
- X.exponent += std::min(std::max(Exp, -MaxIncrement - 1), MaxIncrement);
+ X.exponent += std::clamp(Exp, -MaxIncrement - 1, MaxIncrement);
X.normalize(RoundingMode, lfExactlyZero);
if (X.isNaN())
X.makeQuiet();
@@ -5114,6 +5141,19 @@ APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
consumeError(StatusOrErr.takeError());
}
+FPClassTest APFloat::classify() const {
+ if (isZero())
+ return isNegative() ? fcNegZero : fcPosZero;
+ if (isNormal())
+ return isNegative() ? fcNegNormal : fcPosNormal;
+ if (isDenormal())
+ return isNegative() ? fcNegSubnormal : fcPosSubnormal;
+ if (isInfinity())
+ return isNegative() ? fcNegInf : fcPosInf;
+ assert(isNaN() && "Other class of FP constant");
+ return isSignaling() ? fcSNan : fcQNan;
+}
+
APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
roundingMode RM, bool *losesInfo) {
if (&getSemantics() == &ToSemantics) {
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index afe7478a8b2a..05b1526da95f 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/bit.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -68,7 +69,7 @@ inline static unsigned getDigit(char cdigit, uint8_t radix) {
if (r < radix)
return r;
- return -1U;
+ return UINT_MAX;
}
@@ -164,6 +165,14 @@ void APInt::Profile(FoldingSetNodeID& ID) const {
ID.AddInteger(U.pVal[i]);
}
+bool APInt::isAligned(Align A) const {
+ if (isZero())
+ return true;
+ const unsigned TrailingZeroes = countr_zero();
+ const unsigned MinimumTrailingZeroes = Log2(A);
+ return TrailingZeroes >= MinimumTrailingZeroes;
+}
+
/// Prefix increment operator. Increments the APInt by one.
APInt& APInt::operator++() {
if (isSingleWord())
@@ -479,7 +488,6 @@ APInt APInt::extractBits(unsigned numBits, unsigned bitPosition) const {
uint64_t APInt::extractBitsAsZExtValue(unsigned numBits,
unsigned bitPosition) const {
- assert(numBits > 0 && "Can't extract zero bits");
assert(bitPosition < BitWidth && (numBits + bitPosition) <= BitWidth &&
"Illegal bit extraction");
assert(numBits <= 64 && "Illegal bit extraction");
@@ -626,7 +634,7 @@ unsigned APInt::countLeadingZerosSlowCase() const {
if (V == 0)
Count += APINT_BITS_PER_WORD;
else {
- Count += llvm::countLeadingZeros(V);
+ Count += llvm::countl_zero(V);
break;
}
}
@@ -646,13 +654,13 @@ unsigned APInt::countLeadingOnesSlowCase() const {
shift = APINT_BITS_PER_WORD - highWordBits;
}
int i = getNumWords() - 1;
- unsigned Count = llvm::countLeadingOnes(U.pVal[i] << shift);
+ unsigned Count = llvm::countl_one(U.pVal[i] << shift);
if (Count == highWordBits) {
for (i--; i >= 0; --i) {
if (U.pVal[i] == WORDTYPE_MAX)
Count += APINT_BITS_PER_WORD;
else {
- Count += llvm::countLeadingOnes(U.pVal[i]);
+ Count += llvm::countl_one(U.pVal[i]);
break;
}
}
@@ -666,7 +674,7 @@ unsigned APInt::countTrailingZerosSlowCase() const {
for (; i < getNumWords() && U.pVal[i] == 0; ++i)
Count += APINT_BITS_PER_WORD;
if (i < getNumWords())
- Count += llvm::countTrailingZeros(U.pVal[i]);
+ Count += llvm::countr_zero(U.pVal[i]);
return std::min(Count, BitWidth);
}
@@ -676,7 +684,7 @@ unsigned APInt::countTrailingOnesSlowCase() const {
for (; i < getNumWords() && U.pVal[i] == WORDTYPE_MAX; ++i)
Count += APINT_BITS_PER_WORD;
if (i < getNumWords())
- Count += llvm::countTrailingOnes(U.pVal[i]);
+ Count += llvm::countr_one(U.pVal[i]);
assert(Count <= BitWidth);
return Count;
}
@@ -707,18 +715,18 @@ bool APInt::isSubsetOfSlowCase(const APInt &RHS) const {
APInt APInt::byteSwap() const {
assert(BitWidth >= 16 && BitWidth % 8 == 0 && "Cannot byteswap!");
if (BitWidth == 16)
- return APInt(BitWidth, ByteSwap_16(uint16_t(U.VAL)));
+ return APInt(BitWidth, llvm::byteswap<uint16_t>(U.VAL));
if (BitWidth == 32)
- return APInt(BitWidth, ByteSwap_32(unsigned(U.VAL)));
+ return APInt(BitWidth, llvm::byteswap<uint32_t>(U.VAL));
if (BitWidth <= 64) {
- uint64_t Tmp1 = ByteSwap_64(U.VAL);
+ uint64_t Tmp1 = llvm::byteswap<uint64_t>(U.VAL);
Tmp1 >>= (64 - BitWidth);
return APInt(BitWidth, Tmp1);
}
APInt Result(getNumWords() * APINT_BITS_PER_WORD, 0);
for (unsigned I = 0, N = getNumWords(); I != N; ++I)
- Result.U.pVal[I] = ByteSwap_64(U.pVal[N - I - 1]);
+ Result.U.pVal[I] = llvm::byteswap<uint64_t>(U.pVal[N - I - 1]);
if (Result.BitWidth != BitWidth) {
Result.lshrInPlace(Result.BitWidth - BitWidth);
Result.BitWidth = BitWidth;
@@ -767,8 +775,8 @@ APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
// Count common powers of 2 and remove all other powers of 2.
unsigned Pow2;
{
- unsigned Pow2_A = A.countTrailingZeros();
- unsigned Pow2_B = B.countTrailingZeros();
+ unsigned Pow2_A = A.countr_zero();
+ unsigned Pow2_B = B.countr_zero();
if (Pow2_A > Pow2_B) {
A.lshrInPlace(Pow2_A - Pow2_B);
Pow2 = Pow2_B;
@@ -789,10 +797,10 @@ APInt llvm::APIntOps::GreatestCommonDivisor(APInt A, APInt B) {
while (A != B) {
if (A.ugt(B)) {
A -= B;
- A.lshrInPlace(A.countTrailingZeros() - Pow2);
+ A.lshrInPlace(A.countr_zero() - Pow2);
} else {
B -= A;
- B.lshrInPlace(B.countTrailingZeros() - Pow2);
+ B.lshrInPlace(B.countr_zero() - Pow2);
}
}
@@ -1318,7 +1326,7 @@ static void KnuthDiv(uint32_t *u, uint32_t *v, uint32_t *q, uint32_t* r,
// and v so that its high bits are shifted to the top of v's range without
// overflow. Note that this can require an extra word in u so that u must
// be of length m+n+1.
- unsigned shift = countLeadingZeros(v[n-1]);
+ unsigned shift = llvm::countl_zero(v[n - 1]);
uint32_t v_carry = 0;
uint32_t u_carry = 0;
if (shift) {
@@ -1967,7 +1975,7 @@ APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const {
}
APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
- if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) {
+ if (countl_zero() + RHS.countl_zero() + 2 <= BitWidth) {
Overflow = true;
return *this * RHS;
}
@@ -1984,24 +1992,32 @@ APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const {
}
APInt APInt::sshl_ov(const APInt &ShAmt, bool &Overflow) const {
- Overflow = ShAmt.uge(getBitWidth());
+ return sshl_ov(ShAmt.getLimitedValue(getBitWidth()), Overflow);
+}
+
+APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const {
+ Overflow = ShAmt >= getBitWidth();
if (Overflow)
return APInt(BitWidth, 0);
if (isNonNegative()) // Don't allow sign change.
- Overflow = ShAmt.uge(countLeadingZeros());
+ Overflow = ShAmt >= countl_zero();
else
- Overflow = ShAmt.uge(countLeadingOnes());
+ Overflow = ShAmt >= countl_one();
return *this << ShAmt;
}
APInt APInt::ushl_ov(const APInt &ShAmt, bool &Overflow) const {
- Overflow = ShAmt.uge(getBitWidth());
+ return ushl_ov(ShAmt.getLimitedValue(getBitWidth()), Overflow);
+}
+
+APInt APInt::ushl_ov(unsigned ShAmt, bool &Overflow) const {
+ Overflow = ShAmt >= getBitWidth();
if (Overflow)
return APInt(BitWidth, 0);
- Overflow = ShAmt.ugt(countLeadingZeros());
+ Overflow = ShAmt > countl_zero();
return *this << ShAmt;
}
@@ -2067,6 +2083,10 @@ APInt APInt::umul_sat(const APInt &RHS) const {
}
APInt APInt::sshl_sat(const APInt &RHS) const {
+ return sshl_sat(RHS.getLimitedValue(getBitWidth()));
+}
+
+APInt APInt::sshl_sat(unsigned RHS) const {
bool Overflow;
APInt Res = sshl_ov(RHS, Overflow);
if (!Overflow)
@@ -2077,6 +2097,10 @@ APInt APInt::sshl_sat(const APInt &RHS) const {
}
APInt APInt::ushl_sat(const APInt &RHS) const {
+ return ushl_sat(RHS.getLimitedValue(getBitWidth()));
+}
+
+APInt APInt::ushl_sat(unsigned RHS) const {
bool Overflow;
APInt Res = ushl_ov(RHS, Overflow);
if (!Overflow)
@@ -2136,8 +2160,8 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
this->negate();
}
-void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
- bool Signed, bool formatAsCLiteral) const {
+void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
+ bool formatAsCLiteral, bool UpperCase) const {
assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2 ||
Radix == 36) &&
"Radix should be 2, 8, 10, 16, or 36!");
@@ -2173,7 +2197,9 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
return;
}
- static const char Digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ static const char BothDigits[] = "0123456789abcdefghijklmnopqrstuvwxyz"
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ const char *Digits = BothDigits + (UpperCase ? 36 : 0);
if (isSingleWord()) {
char Buffer[65];
@@ -2290,14 +2316,6 @@ static inline APInt::WordType highHalf(APInt::WordType part) {
return part >> (APInt::APINT_BITS_PER_WORD / 2);
}
-/// Returns the bit number of the most significant set bit of a part.
-/// If the input number has no bits set -1U is returned.
-static unsigned partMSB(APInt::WordType value) { return findLastSet(value); }
-
-/// Returns the bit number of the least significant set bit of a part. If the
-/// input number has no bits set -1U is returned.
-static unsigned partLSB(APInt::WordType value) { return findFirstSet(value); }
-
/// Sets the least significant part of a bignum to the input value, and zeroes
/// out higher parts.
void APInt::tcSet(WordType *dst, WordType part, unsigned parts) {
@@ -2338,32 +2356,33 @@ void APInt::tcClearBit(WordType *parts, unsigned bit) {
}
/// Returns the bit number of the least significant set bit of a number. If the
-/// input number has no bits set -1U is returned.
+/// input number has no bits set UINT_MAX is returned.
unsigned APInt::tcLSB(const WordType *parts, unsigned n) {
for (unsigned i = 0; i < n; i++) {
if (parts[i] != 0) {
- unsigned lsb = partLSB(parts[i]);
+ unsigned lsb = llvm::countr_zero(parts[i]);
return lsb + i * APINT_BITS_PER_WORD;
}
}
- return -1U;
+ return UINT_MAX;
}
/// Returns the bit number of the most significant set bit of a number.
-/// If the input number has no bits set -1U is returned.
+/// If the input number has no bits set UINT_MAX is returned.
unsigned APInt::tcMSB(const WordType *parts, unsigned n) {
do {
--n;
if (parts[n] != 0) {
- unsigned msb = partMSB(parts[n]);
+ static_assert(sizeof(parts[n]) <= sizeof(uint64_t));
+ unsigned msb = llvm::Log2_64(parts[n]);
return msb + n * APINT_BITS_PER_WORD;
}
} while (n);
- return -1U;
+ return UINT_MAX;
}
/// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
@@ -2961,7 +2980,7 @@ llvm::APIntOps::GetMostSignificantDifferentBit(const APInt &A, const APInt &B) {
assert(A.getBitWidth() == B.getBitWidth() && "Must have the same bitwidth");
if (A == B)
return std::nullopt;
- return A.getBitWidth() - ((A ^ B).countLeadingZeros() + 1);
+ return A.getBitWidth() - ((A ^ B).countl_zero() + 1);
}
APInt llvm::APIntOps::ScaleBitMask(const APInt &A, unsigned NewBitWidth,
diff --git a/llvm/lib/Support/APSInt.cpp b/llvm/lib/Support/APSInt.cpp
index b65b6824eaf8..5a9f44f304a2 100644
--- a/llvm/lib/Support/APSInt.cpp
+++ b/llvm/lib/Support/APSInt.cpp
@@ -25,7 +25,7 @@ APSInt::APSInt(StringRef Str) {
unsigned NumBits = ((Str.size() * 64) / 19) + 2;
APInt Tmp(NumBits, Str, /*radix=*/10);
if (Str[0] == '-') {
- unsigned MinBits = Tmp.getMinSignedBits();
+ unsigned MinBits = Tmp.getSignificantBits();
if (MinBits < NumBits)
Tmp = Tmp.trunc(std::max<unsigned>(1, MinBits));
*this = APSInt(Tmp, /*isUnsigned=*/false);
diff --git a/llvm/lib/Support/AddressRanges.cpp b/llvm/lib/Support/AddressRanges.cpp
deleted file mode 100644
index 187d5be00dae..000000000000
--- a/llvm/lib/Support/AddressRanges.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-//===- AddressRanges.cpp ----------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/AddressRanges.h"
-#include "llvm/ADT/STLExtras.h"
-#include <inttypes.h>
-
-using namespace llvm;
-
-AddressRanges::Collection::const_iterator
-AddressRanges::insert(AddressRange Range) {
- if (Range.size() == 0)
- return Ranges.end();
-
- auto It = llvm::upper_bound(Ranges, Range);
- auto It2 = It;
- while (It2 != Ranges.end() && It2->start() <= Range.end())
- ++It2;
- if (It != It2) {
- Range = {Range.start(), std::max(Range.end(), std::prev(It2)->end())};
- It = Ranges.erase(It, It2);
- }
- if (It != Ranges.begin() && Range.start() <= std::prev(It)->end()) {
- --It;
- *It = {It->start(), std::max(It->end(), Range.end())};
- return It;
- }
-
- return Ranges.insert(It, Range);
-}
-
-AddressRanges::Collection::const_iterator
-AddressRanges::find(uint64_t Addr) const {
- auto It = std::partition_point(
- Ranges.begin(), Ranges.end(),
- [=](const AddressRange &R) { return R.start() <= Addr; });
-
- if (It == Ranges.begin())
- return Ranges.end();
-
- --It;
- if (Addr >= It->end())
- return Ranges.end();
-
- return It;
-}
-
-AddressRanges::Collection::const_iterator
-AddressRanges::find(AddressRange Range) const {
- if (Range.size() == 0)
- return Ranges.end();
-
- auto It = std::partition_point(
- Ranges.begin(), Ranges.end(),
- [=](const AddressRange &R) { return R.start() <= Range.start(); });
-
- if (It == Ranges.begin())
- return Ranges.end();
-
- --It;
- if (Range.end() > It->end())
- return Ranges.end();
-
- return It;
-}
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
index 449e07492832..69fc0936d73c 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
@@ -1,5 +1,7 @@
#if defined(__x86_64__)
+#include "llvm_blake3_prefix.h"
+
#if defined(__ELF__) && defined(__linux__)
.section .note.GNU-stack,"",%progbits
#endif
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S
index bb58d2ae64b1..5ad1c641a7fc 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S
@@ -1,3 +1,5 @@
+#include "llvm_blake3_prefix.h"
+
.intel_syntax noprefix
.global _blake3_hash_many_avx2
.global blake3_hash_many_avx2
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm
index 352298edd2e8..46bad1d98f38 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_msvc.asm
@@ -1,11 +1,11 @@
-public _blake3_hash_many_avx2
-public blake3_hash_many_avx2
+public _llvm_blake3_hash_many_avx2
+public llvm_blake3_hash_many_avx2
_TEXT SEGMENT ALIGN(16) 'CODE'
ALIGN 16
-blake3_hash_many_avx2 PROC
-_blake3_hash_many_avx2 PROC
+llvm_blake3_hash_many_avx2 PROC
+_llvm_blake3_hash_many_avx2 PROC
push r15
push r14
push r13
@@ -1785,8 +1785,8 @@ endroundloop1:
vmovdqu xmmword ptr [rbx+10H], xmm1
jmp unwind
-_blake3_hash_many_avx2 ENDP
-blake3_hash_many_avx2 ENDP
+_llvm_blake3_hash_many_avx2 ENDP
+llvm_blake3_hash_many_avx2 ENDP
_TEXT ENDS
_RDATA SEGMENT READONLY PAGE ALIAS(".rdata") 'CONST'
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
index 3afc0e2250e2..f04a135dd1bc 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
@@ -1,5 +1,7 @@
#if defined(__x86_64__)
+#include "llvm_blake3_prefix.h"
+
#if defined(__ELF__) && defined(__linux__)
.section .note.GNU-stack,"",%progbits
#endif
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S
index e10b9f36cbcc..53c586141fbe 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S
@@ -1,3 +1,5 @@
+#include "llvm_blake3_prefix.h"
+
.intel_syntax noprefix
.global _blake3_hash_many_avx512
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm
index b19efbaaeb36..f13d1b260ab8 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_msvc.asm
@@ -1,15 +1,15 @@
-public _blake3_hash_many_avx512
-public blake3_hash_many_avx512
-public blake3_compress_in_place_avx512
-public _blake3_compress_in_place_avx512
-public blake3_compress_xof_avx512
-public _blake3_compress_xof_avx512
+public _llvm_blake3_hash_many_avx512
+public llvm_blake3_hash_many_avx512
+public llvm_blake3_compress_in_place_avx512
+public _llvm_blake3_compress_in_place_avx512
+public llvm_blake3_compress_xof_avx512
+public _llvm_blake3_compress_xof_avx512
_TEXT SEGMENT ALIGN(16) 'CODE'
ALIGN 16
-blake3_hash_many_avx512 PROC
-_blake3_hash_many_avx512 PROC
+llvm_blake3_hash_many_avx512 PROC
+_llvm_blake3_hash_many_avx512 PROC
push r15
push r14
push r13
@@ -2404,12 +2404,12 @@ endroundloop1:
vmovdqu xmmword ptr [rbx+10H], xmm1
jmp unwind
-_blake3_hash_many_avx512 ENDP
-blake3_hash_many_avx512 ENDP
+_llvm_blake3_hash_many_avx512 ENDP
+llvm_blake3_hash_many_avx512 ENDP
ALIGN 16
-blake3_compress_in_place_avx512 PROC
-_blake3_compress_in_place_avx512 PROC
+llvm_blake3_compress_in_place_avx512 PROC
+_llvm_blake3_compress_in_place_avx512 PROC
sub rsp, 72
vmovdqa xmmword ptr [rsp], xmm6
vmovdqa xmmword ptr [rsp+10H], xmm7
@@ -2498,12 +2498,12 @@ _blake3_compress_in_place_avx512 PROC
vmovdqa xmm9, xmmword ptr [rsp+30H]
add rsp, 72
ret
-_blake3_compress_in_place_avx512 ENDP
-blake3_compress_in_place_avx512 ENDP
+_llvm_blake3_compress_in_place_avx512 ENDP
+llvm_blake3_compress_in_place_avx512 ENDP
ALIGN 16
-blake3_compress_xof_avx512 PROC
-_blake3_compress_xof_avx512 PROC
+llvm_blake3_compress_xof_avx512 PROC
+_llvm_blake3_compress_xof_avx512 PROC
sub rsp, 72
vmovdqa xmmword ptr [rsp], xmm6
vmovdqa xmmword ptr [rsp+10H], xmm7
@@ -2597,8 +2597,8 @@ _blake3_compress_xof_avx512 PROC
vmovdqa xmm9, xmmword ptr [rsp+30H]
add rsp, 72
ret
-_blake3_compress_xof_avx512 ENDP
-blake3_compress_xof_avx512 ENDP
+_llvm_blake3_compress_xof_avx512 ENDP
+llvm_blake3_compress_xof_avx512 ENDP
_TEXT ENDS
diff --git a/llvm/lib/Support/BLAKE3/blake3_impl.h b/llvm/lib/Support/BLAKE3/blake3_impl.h
index 180d0a6eeda8..8e5456d745cd 100644
--- a/llvm/lib/Support/BLAKE3/blake3_impl.h
+++ b/llvm/lib/Support/BLAKE3/blake3_impl.h
@@ -11,15 +11,7 @@
// For \p LLVM_LIBRARY_VISIBILITY
#include "llvm/Support/Compiler.h"
-// Remove the 'llvm_' prefix for the rest of the internal implementation.
-#define BLAKE3_VERSION_STRING LLVM_BLAKE3_VERSION_STRING
-#define BLAKE3_KEY_LEN LLVM_BLAKE3_KEY_LEN
-#define BLAKE3_OUT_LEN LLVM_BLAKE3_OUT_LEN
-#define BLAKE3_BLOCK_LEN LLVM_BLAKE3_BLOCK_LEN
-#define BLAKE3_CHUNK_LEN LLVM_BLAKE3_CHUNK_LEN
-#define BLAKE3_MAX_DEPTH LLVM_BLAKE3_MAX_DEPTH
-#define blake3_hasher llvm_blake3_hasher
-#define blake3_chunk_state llvm_blake3_chunk_state
+#include "llvm_blake3_prefix.h"
// internal flags
enum blake3_flags {
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
index 0106b13ba851..9a4f5eb7318b 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
@@ -1,5 +1,7 @@
#if defined(__x86_64__)
+#include "llvm_blake3_prefix.h"
+
#if defined(__ELF__) && defined(__linux__)
.section .note.GNU-stack,"",%progbits
#endif
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S
index 8852ba5976e1..bf3b4523a9f1 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S
@@ -1,3 +1,5 @@
+#include "llvm_blake3_prefix.h"
+
.intel_syntax noprefix
.global blake3_hash_many_sse2
.global _blake3_hash_many_sse2
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm
index 507502f11a80..1069c8df4ed6 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_msvc.asm
@@ -1,15 +1,15 @@
-public _blake3_hash_many_sse2
-public blake3_hash_many_sse2
-public blake3_compress_in_place_sse2
-public _blake3_compress_in_place_sse2
-public blake3_compress_xof_sse2
-public _blake3_compress_xof_sse2
+public _llvm_blake3_hash_many_sse2
+public llvm_blake3_hash_many_sse2
+public llvm_blake3_compress_in_place_sse2
+public _llvm_blake3_compress_in_place_sse2
+public llvm_blake3_compress_xof_sse2
+public _llvm_blake3_compress_xof_sse2
_TEXT SEGMENT ALIGN(16) 'CODE'
ALIGN 16
-blake3_hash_many_sse2 PROC
-_blake3_hash_many_sse2 PROC
+llvm_blake3_hash_many_sse2 PROC
+_llvm_blake3_hash_many_sse2 PROC
push r15
push r14
push r13
@@ -2034,11 +2034,11 @@ endroundloop1:
movups xmmword ptr [rbx], xmm0
movups xmmword ptr [rbx+10H], xmm1
jmp unwind
-_blake3_hash_many_sse2 ENDP
-blake3_hash_many_sse2 ENDP
+_llvm_blake3_hash_many_sse2 ENDP
+llvm_blake3_hash_many_sse2 ENDP
-blake3_compress_in_place_sse2 PROC
-_blake3_compress_in_place_sse2 PROC
+llvm_blake3_compress_in_place_sse2 PROC
+_llvm_blake3_compress_in_place_sse2 PROC
sub rsp, 120
movdqa xmmword ptr [rsp], xmm6
movdqa xmmword ptr [rsp+10H], xmm7
@@ -2164,12 +2164,12 @@ _blake3_compress_in_place_sse2 PROC
movdqa xmm15, xmmword ptr [rsp+60H]
add rsp, 120
ret
-_blake3_compress_in_place_sse2 ENDP
-blake3_compress_in_place_sse2 ENDP
+_llvm_blake3_compress_in_place_sse2 ENDP
+llvm_blake3_compress_in_place_sse2 ENDP
ALIGN 16
-blake3_compress_xof_sse2 PROC
-_blake3_compress_xof_sse2 PROC
+llvm_blake3_compress_xof_sse2 PROC
+_llvm_blake3_compress_xof_sse2 PROC
sub rsp, 120
movdqa xmmword ptr [rsp], xmm6
movdqa xmmword ptr [rsp+10H], xmm7
@@ -2302,8 +2302,8 @@ _blake3_compress_xof_sse2 PROC
movdqa xmm15, xmmword ptr [rsp+60H]
add rsp, 120
ret
-_blake3_compress_xof_sse2 ENDP
-blake3_compress_xof_sse2 ENDP
+_llvm_blake3_compress_xof_sse2 ENDP
+llvm_blake3_compress_xof_sse2 ENDP
_TEXT ENDS
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
index 4e918c5bb2cc..1be4ed744426 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
@@ -1,5 +1,7 @@
#if defined(__x86_64__)
+#include "llvm_blake3_prefix.h"
+
#if defined(__ELF__) && defined(__linux__)
.section .note.GNU-stack,"",%progbits
#endif
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S
index 60d0a4042e71..28bdf3890a29 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S
@@ -1,3 +1,5 @@
+#include "llvm_blake3_prefix.h"
+
.intel_syntax noprefix
.global blake3_hash_many_sse41
.global _blake3_hash_many_sse41
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm
index 8966c7b84406..770935372cd9 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_msvc.asm
@@ -1,15 +1,15 @@
-public _blake3_hash_many_sse41
-public blake3_hash_many_sse41
-public blake3_compress_in_place_sse41
-public _blake3_compress_in_place_sse41
-public blake3_compress_xof_sse41
-public _blake3_compress_xof_sse41
+public _llvm_blake3_hash_many_sse41
+public llvm_blake3_hash_many_sse41
+public llvm_blake3_compress_in_place_sse41
+public _llvm_blake3_compress_in_place_sse41
+public llvm_blake3_compress_xof_sse41
+public _llvm_blake3_compress_xof_sse41
_TEXT SEGMENT ALIGN(16) 'CODE'
ALIGN 16
-blake3_hash_many_sse41 PROC
-_blake3_hash_many_sse41 PROC
+llvm_blake3_hash_many_sse41 PROC
+_llvm_blake3_hash_many_sse41 PROC
push r15
push r14
push r13
@@ -1797,11 +1797,11 @@ endroundloop1:
movups xmmword ptr [rbx], xmm0
movups xmmword ptr [rbx+10H], xmm1
jmp unwind
-_blake3_hash_many_sse41 ENDP
-blake3_hash_many_sse41 ENDP
+_llvm_blake3_hash_many_sse41 ENDP
+llvm_blake3_hash_many_sse41 ENDP
-blake3_compress_in_place_sse41 PROC
-_blake3_compress_in_place_sse41 PROC
+llvm_blake3_compress_in_place_sse41 PROC
+_llvm_blake3_compress_in_place_sse41 PROC
sub rsp, 120
movdqa xmmword ptr [rsp], xmm6
movdqa xmmword ptr [rsp+10H], xmm7
@@ -1916,12 +1916,12 @@ _blake3_compress_in_place_sse41 PROC
movdqa xmm15, xmmword ptr [rsp+60H]
add rsp, 120
ret
-_blake3_compress_in_place_sse41 ENDP
-blake3_compress_in_place_sse41 ENDP
+_llvm_blake3_compress_in_place_sse41 ENDP
+llvm_blake3_compress_in_place_sse41 ENDP
ALIGN 16
-blake3_compress_xof_sse41 PROC
-_blake3_compress_xof_sse41 PROC
+llvm_blake3_compress_xof_sse41 PROC
+_llvm_blake3_compress_xof_sse41 PROC
sub rsp, 120
movdqa xmmword ptr [rsp], xmm6
movdqa xmmword ptr [rsp+10H], xmm7
@@ -2043,8 +2043,8 @@ _blake3_compress_xof_sse41 PROC
movdqa xmm15, xmmword ptr [rsp+60H]
add rsp, 120
ret
-_blake3_compress_xof_sse41 ENDP
-blake3_compress_xof_sse41 ENDP
+_llvm_blake3_compress_xof_sse41 ENDP
+llvm_blake3_compress_xof_sse41 ENDP
_TEXT ENDS
diff --git a/llvm/lib/Support/BLAKE3/llvm_blake3_prefix.h b/llvm/lib/Support/BLAKE3/llvm_blake3_prefix.h
new file mode 100644
index 000000000000..3cee3691e4cf
--- /dev/null
+++ b/llvm/lib/Support/BLAKE3/llvm_blake3_prefix.h
@@ -0,0 +1,41 @@
+#ifndef LLVM_BLAKE3_PREFIX_H
+#define LLVM_BLAKE3_PREFIX_H
+
+#define BLAKE3_VERSION_STRING LLVM_BLAKE3_VERSION_STRING
+#define BLAKE3_KEY_LEN LLVM_BLAKE3_KEY_LEN
+#define BLAKE3_OUT_LEN LLVM_BLAKE3_OUT_LEN
+#define BLAKE3_BLOCK_LEN LLVM_BLAKE3_BLOCK_LEN
+#define BLAKE3_CHUNK_LEN LLVM_BLAKE3_CHUNK_LEN
+#define BLAKE3_MAX_DEPTH LLVM_BLAKE3_MAX_DEPTH
+#define blake3_hasher llvm_blake3_hasher
+#define blake3_chunk_state llvm_blake3_chunk_state
+#define blake3_compress_in_place llvm_blake3_compress_in_place
+#define blake3_compress_xof llvm_blake3_compress_xof
+#define blake3_hash_many llvm_blake3_hash_many
+#define blake3_simd_degree llvm_blake3_simd_degree
+#define blake3_compress_in_place_portable llvm_blake3_compress_in_place_portable
+#define blake3_compress_xof_portable llvm_blake3_compress_xof_portable
+#define blake3_hash_many_portable llvm_blake3_hash_many_portable
+#define blake3_compress_in_place_sse2 llvm_blake3_compress_in_place_sse2
+#define _blake3_compress_in_place_sse2 _llvm_blake3_compress_in_place_sse2
+#define blake3_compress_xof_sse2 llvm_blake3_compress_xof_sse2
+#define _blake3_compress_xof_sse2 _llvm_blake3_compress_xof_sse2
+#define blake3_hash_many_sse2 llvm_blake3_hash_many_sse2
+#define _blake3_hash_many_sse2 _llvm_blake3_hash_many_sse2
+#define blake3_compress_in_place_sse41 llvm_blake3_compress_in_place_sse41
+#define _blake3_compress_in_place_sse41 _llvm_blake3_compress_in_place_sse41
+#define blake3_compress_xof_sse41 llvm_blake3_compress_xof_sse41
+#define _blake3_compress_xof_sse41 _llvm_blake3_compress_xof_sse41
+#define blake3_hash_many_sse41 llvm_blake3_hash_many_sse41
+#define _blake3_hash_many_sse41 _llvm_blake3_hash_many_sse41
+#define blake3_hash_many_avx2 llvm_blake3_hash_many_avx2
+#define _blake3_hash_many_avx2 _llvm_blake3_hash_many_avx2
+#define blake3_compress_in_place_avx512 llvm_blake3_compress_in_place_avx512
+#define _blake3_compress_in_place_avx512 _llvm_blake3_compress_in_place_avx512
+#define blake3_compress_xof_avx512 llvm_blake3_compress_xof_avx512
+#define _blake3_compress_xof_avx512 _llvm_blake3_compress_xof_avx512
+#define blake3_hash_many_avx512 llvm_blake3_hash_many_avx512
+#define _blake3_hash_many_avx512 _llvm_blake3_hash_many_avx512
+#define blake3_hash_many_neon llvm_blake3_hash_many_neon
+
+#endif /* LLVM_BLAKE3_PREFIX_H */
diff --git a/llvm/lib/Support/BalancedPartitioning.cpp b/llvm/lib/Support/BalancedPartitioning.cpp
new file mode 100644
index 000000000000..113e9484f528
--- /dev/null
+++ b/llvm/lib/Support/BalancedPartitioning.cpp
@@ -0,0 +1,337 @@
+//===- BalancedPartitioning.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements BalancedPartitioning, a recursive balanced graph
+// partitioning algorithm.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/BalancedPartitioning.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/ThreadPool.h"
+
+using namespace llvm;
+#define DEBUG_TYPE "balanced-partitioning"
+
+void BPFunctionNode::dump(raw_ostream &OS) const {
+ OS << formatv("{{ID={0} Utilities={{{1:$[,]}} Bucket={2}}", Id,
+ make_range(UtilityNodes.begin(), UtilityNodes.end()), Bucket);
+}
+
+template <typename Func>
+void BalancedPartitioning::BPThreadPool::async(Func &&F) {
+#if LLVM_ENABLE_THREADS
+ // This new thread could spawn more threads, so mark it as active
+ ++NumActiveThreads;
+ TheThreadPool.async([=]() {
+ // Run the task
+ F();
+
+ // This thread will no longer spawn new threads, so mark it as inactive
+ if (--NumActiveThreads == 0) {
+ // There are no more active threads, so mark as finished and notify
+ {
+ std::unique_lock<std::mutex> lock(mtx);
+ assert(!IsFinishedSpawning);
+ IsFinishedSpawning = true;
+ }
+ cv.notify_one();
+ }
+ });
+#else
+ llvm_unreachable("threads are disabled");
+#endif
+}
+
+void BalancedPartitioning::BPThreadPool::wait() {
+#if LLVM_ENABLE_THREADS
+ // TODO: We could remove the mutex and condition variable and use
+ // std::atomic::wait() instead, but that isn't available until C++20
+ {
+ std::unique_lock<std::mutex> lock(mtx);
+ cv.wait(lock, [&]() { return IsFinishedSpawning; });
+ assert(IsFinishedSpawning && NumActiveThreads == 0);
+ }
+ // Now we can call ThreadPool::wait() since all tasks have been submitted
+ TheThreadPool.wait();
+#else
+ llvm_unreachable("threads are disabled");
+#endif
+}
+
+BalancedPartitioning::BalancedPartitioning(
+ const BalancedPartitioningConfig &Config)
+ : Config(Config) {
+ // Pre-computing log2 values
+ Log2Cache[0] = 0.0;
+ for (unsigned I = 1; I < LOG_CACHE_SIZE; I++)
+ Log2Cache[I] = std::log2(I);
+}
+
+void BalancedPartitioning::run(std::vector<BPFunctionNode> &Nodes) const {
+ LLVM_DEBUG(
+ dbgs() << format(
+ "Partitioning %d nodes using depth %d and %d iterations per split\n",
+ Nodes.size(), Config.SplitDepth, Config.IterationsPerSplit));
+ std::optional<BPThreadPool> TP;
+#if LLVM_ENABLE_THREADS
+ ThreadPool TheThreadPool;
+ if (Config.TaskSplitDepth > 1)
+ TP.emplace(TheThreadPool);
+#endif
+
+ // Record the input order
+ for (unsigned I = 0; I < Nodes.size(); I++)
+ Nodes[I].InputOrderIndex = I;
+
+ auto NodesRange = llvm::make_range(Nodes.begin(), Nodes.end());
+ auto BisectTask = [=, &TP]() {
+ bisect(NodesRange, /*RecDepth=*/0, /*RootBucket=*/1, /*Offset=*/0, TP);
+ };
+ if (TP) {
+ TP->async(std::move(BisectTask));
+ TP->wait();
+ } else {
+ BisectTask();
+ }
+
+ llvm::stable_sort(NodesRange, [](const auto &L, const auto &R) {
+ return L.Bucket < R.Bucket;
+ });
+
+ LLVM_DEBUG(dbgs() << "Balanced partitioning completed\n");
+}
+
+void BalancedPartitioning::bisect(const FunctionNodeRange Nodes,
+ unsigned RecDepth, unsigned RootBucket,
+ unsigned Offset,
+ std::optional<BPThreadPool> &TP) const {
+ unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
+ if (NumNodes <= 1 || RecDepth >= Config.SplitDepth) {
+ // We've reach the lowest level of the recursion tree. Fall back to the
+ // original order and assign to buckets.
+ llvm::stable_sort(Nodes, [](const auto &L, const auto &R) {
+ return L.InputOrderIndex < R.InputOrderIndex;
+ });
+ for (auto &N : Nodes)
+ N.Bucket = Offset++;
+ return;
+ }
+
+ LLVM_DEBUG(dbgs() << format("Bisect with %d nodes and root bucket %d\n",
+ NumNodes, RootBucket));
+
+ std::mt19937 RNG(RootBucket);
+
+ unsigned LeftBucket = 2 * RootBucket;
+ unsigned RightBucket = 2 * RootBucket + 1;
+
+ // Split into two and assign to the left and right buckets
+ split(Nodes, LeftBucket);
+
+ runIterations(Nodes, RecDepth, LeftBucket, RightBucket, RNG);
+
+ // Split nodes wrt the resulting buckets
+ auto NodesMid =
+ llvm::partition(Nodes, [&](auto &N) { return N.Bucket == LeftBucket; });
+ unsigned MidOffset = Offset + std::distance(Nodes.begin(), NodesMid);
+
+ auto LeftNodes = llvm::make_range(Nodes.begin(), NodesMid);
+ auto RightNodes = llvm::make_range(NodesMid, Nodes.end());
+
+ auto LeftRecTask = [=, &TP]() {
+ bisect(LeftNodes, RecDepth + 1, LeftBucket, Offset, TP);
+ };
+ auto RightRecTask = [=, &TP]() {
+ bisect(RightNodes, RecDepth + 1, RightBucket, MidOffset, TP);
+ };
+
+ if (TP && RecDepth < Config.TaskSplitDepth && NumNodes >= 4) {
+ TP->async(std::move(LeftRecTask));
+ TP->async(std::move(RightRecTask));
+ } else {
+ LeftRecTask();
+ RightRecTask();
+ }
+}
+
+void BalancedPartitioning::runIterations(const FunctionNodeRange Nodes,
+ unsigned RecDepth, unsigned LeftBucket,
+ unsigned RightBucket,
+ std::mt19937 &RNG) const {
+ unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
+ DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeDegree;
+ for (auto &N : Nodes)
+ for (auto &UN : N.UtilityNodes)
+ ++UtilityNodeDegree[UN];
+ // Remove utility nodes if they have just one edge or are connected to all
+ // functions
+ for (auto &N : Nodes)
+ llvm::erase_if(N.UtilityNodes, [&](auto &UN) {
+ return UtilityNodeDegree[UN] <= 1 || UtilityNodeDegree[UN] >= NumNodes;
+ });
+
+ // Renumber utility nodes so they can be used to index into Signatures
+ DenseMap<BPFunctionNode::UtilityNodeT, unsigned> UtilityNodeIndex;
+ for (auto &N : Nodes)
+ for (auto &UN : N.UtilityNodes)
+ if (!UtilityNodeIndex.count(UN))
+ UtilityNodeIndex[UN] = UtilityNodeIndex.size();
+ for (auto &N : Nodes)
+ for (auto &UN : N.UtilityNodes)
+ UN = UtilityNodeIndex[UN];
+
+ // Initialize signatures
+ SignaturesT Signatures(/*Size=*/UtilityNodeIndex.size());
+ for (auto &N : Nodes) {
+ for (auto &UN : N.UtilityNodes) {
+ assert(UN < Signatures.size());
+ if (N.Bucket == LeftBucket) {
+ Signatures[UN].LeftCount++;
+ } else {
+ Signatures[UN].RightCount++;
+ }
+ }
+ }
+
+ for (unsigned I = 0; I < Config.IterationsPerSplit; I++) {
+ unsigned NumMovedNodes =
+ runIteration(Nodes, LeftBucket, RightBucket, Signatures, RNG);
+ if (NumMovedNodes == 0)
+ break;
+ }
+}
+
+unsigned BalancedPartitioning::runIteration(const FunctionNodeRange Nodes,
+ unsigned LeftBucket,
+ unsigned RightBucket,
+ SignaturesT &Signatures,
+ std::mt19937 &RNG) const {
+ // Init signature cost caches
+ for (auto &Signature : Signatures) {
+ if (Signature.CachedGainIsValid)
+ continue;
+ unsigned L = Signature.LeftCount;
+ unsigned R = Signature.RightCount;
+ assert((L > 0 || R > 0) && "incorrect signature");
+ float Cost = logCost(L, R);
+ Signature.CachedGainLR = 0.f;
+ Signature.CachedGainRL = 0.f;
+ if (L > 0)
+ Signature.CachedGainLR = Cost - logCost(L - 1, R + 1);
+ if (R > 0)
+ Signature.CachedGainRL = Cost - logCost(L + 1, R - 1);
+ Signature.CachedGainIsValid = true;
+ }
+
+ // Compute move gains
+ typedef std::pair<float, BPFunctionNode *> GainPair;
+ std::vector<GainPair> Gains;
+ for (auto &N : Nodes) {
+ bool FromLeftToRight = (N.Bucket == LeftBucket);
+ float Gain = moveGain(N, FromLeftToRight, Signatures);
+ Gains.push_back(std::make_pair(Gain, &N));
+ }
+
+ // Collect left and right gains
+ auto LeftEnd = llvm::partition(
+ Gains, [&](const auto &GP) { return GP.second->Bucket == LeftBucket; });
+ auto LeftRange = llvm::make_range(Gains.begin(), LeftEnd);
+ auto RightRange = llvm::make_range(LeftEnd, Gains.end());
+
+ // Sort gains in descending order
+ auto LargerGain = [](const auto &L, const auto &R) {
+ return L.first > R.first;
+ };
+ llvm::stable_sort(LeftRange, LargerGain);
+ llvm::stable_sort(RightRange, LargerGain);
+
+ unsigned NumMovedDataVertices = 0;
+ for (auto [LeftPair, RightPair] : llvm::zip(LeftRange, RightRange)) {
+ auto &[LeftGain, LeftNode] = LeftPair;
+ auto &[RightGain, RightNode] = RightPair;
+ // Stop when the gain is no longer beneficial
+ if (LeftGain + RightGain <= 0.f)
+ break;
+ // Try to exchange the nodes between buckets
+ if (moveFunctionNode(*LeftNode, LeftBucket, RightBucket, Signatures, RNG))
+ ++NumMovedDataVertices;
+ if (moveFunctionNode(*RightNode, LeftBucket, RightBucket, Signatures, RNG))
+ ++NumMovedDataVertices;
+ }
+ return NumMovedDataVertices;
+}
+
+bool BalancedPartitioning::moveFunctionNode(BPFunctionNode &N,
+ unsigned LeftBucket,
+ unsigned RightBucket,
+ SignaturesT &Signatures,
+ std::mt19937 &RNG) const {
+ // Sometimes we skip the move. This helps to escape local optima
+ if (std::uniform_real_distribution<float>(0.f, 1.f)(RNG) <=
+ Config.SkipProbability)
+ return false;
+
+ bool FromLeftToRight = (N.Bucket == LeftBucket);
+ // Update the current bucket
+ N.Bucket = (FromLeftToRight ? RightBucket : LeftBucket);
+
+ // Update signatures and invalidate gain cache
+ if (FromLeftToRight) {
+ for (auto &UN : N.UtilityNodes) {
+ auto &Signature = Signatures[UN];
+ Signature.LeftCount--;
+ Signature.RightCount++;
+ Signature.CachedGainIsValid = false;
+ }
+ } else {
+ for (auto &UN : N.UtilityNodes) {
+ auto &Signature = Signatures[UN];
+ Signature.LeftCount++;
+ Signature.RightCount--;
+ Signature.CachedGainIsValid = false;
+ }
+ }
+ return true;
+}
+
+void BalancedPartitioning::split(const FunctionNodeRange Nodes,
+ unsigned StartBucket) const {
+ unsigned NumNodes = std::distance(Nodes.begin(), Nodes.end());
+ auto NodesMid = Nodes.begin() + (NumNodes + 1) / 2;
+
+ std::nth_element(Nodes.begin(), NodesMid, Nodes.end(), [](auto &L, auto &R) {
+ return L.InputOrderIndex < R.InputOrderIndex;
+ });
+
+ for (auto &N : llvm::make_range(Nodes.begin(), NodesMid))
+ N.Bucket = StartBucket;
+ for (auto &N : llvm::make_range(NodesMid, Nodes.end()))
+ N.Bucket = StartBucket + 1;
+}
+
+float BalancedPartitioning::moveGain(const BPFunctionNode &N,
+ bool FromLeftToRight,
+ const SignaturesT &Signatures) {
+ float Gain = 0.f;
+ for (auto &UN : N.UtilityNodes)
+ Gain += (FromLeftToRight ? Signatures[UN].CachedGainLR
+ : Signatures[UN].CachedGainRL);
+ return Gain;
+}
+
+float BalancedPartitioning::logCost(unsigned X, unsigned Y) const {
+ return -(X * log2Cached(X + 1) + Y * log2Cached(Y + 1));
+}
+
+float BalancedPartitioning::log2Cached(unsigned i) const {
+ return (i < LOG_CACHE_SIZE) ? Log2Cache[i] : std::log2(i);
+}
diff --git a/llvm/lib/Support/BinaryStreamWriter.cpp b/llvm/lib/Support/BinaryStreamWriter.cpp
index dc4ea200c7be..3d87a30a86a1 100644
--- a/llvm/lib/Support/BinaryStreamWriter.cpp
+++ b/llvm/lib/Support/BinaryStreamWriter.cpp
@@ -8,6 +8,7 @@
#include "llvm/Support/BinaryStreamWriter.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/BinaryStreamReader.h"
#include "llvm/Support/BinaryStreamRef.h"
#include "llvm/Support/LEB128.h"
diff --git a/llvm/lib/Support/BlockFrequency.cpp b/llvm/lib/Support/BlockFrequency.cpp
index 702165ac480b..a4a1e477d940 100644
--- a/llvm/lib/Support/BlockFrequency.cpp
+++ b/llvm/lib/Support/BlockFrequency.cpp
@@ -12,7 +12,6 @@
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
-#include <cassert>
using namespace llvm;
@@ -37,47 +36,3 @@ BlockFrequency BlockFrequency::operator/(BranchProbability Prob) const {
Freq /= Prob;
return Freq;
}
-
-BlockFrequency &BlockFrequency::operator+=(BlockFrequency Freq) {
- uint64_t Before = Freq.Frequency;
- Frequency += Freq.Frequency;
-
- // If overflow, set frequency to the maximum value.
- if (Frequency < Before)
- Frequency = UINT64_MAX;
-
- return *this;
-}
-
-BlockFrequency BlockFrequency::operator+(BlockFrequency Freq) const {
- BlockFrequency NewFreq(Frequency);
- NewFreq += Freq;
- return NewFreq;
-}
-
-BlockFrequency &BlockFrequency::operator-=(BlockFrequency Freq) {
- // If underflow, set frequency to 0.
- if (Frequency <= Freq.Frequency)
- Frequency = 0;
- else
- Frequency -= Freq.Frequency;
- return *this;
-}
-
-BlockFrequency BlockFrequency::operator-(BlockFrequency Freq) const {
- BlockFrequency NewFreq(Frequency);
- NewFreq -= Freq;
- return NewFreq;
-}
-
-BlockFrequency &BlockFrequency::operator>>=(const unsigned count) {
- // Frequency can never be 0 by design.
- assert(Frequency != 0);
-
- // Shift right by count.
- Frequency >>= count;
-
- // Saturate to 1 if we are 0.
- Frequency |= Frequency == 0;
- return *this;
-}
diff --git a/llvm/lib/Support/Chrono.cpp b/llvm/lib/Support/Chrono.cpp
index 8c28d45d8822..859ece8f5500 100644
--- a/llvm/lib/Support/Chrono.cpp
+++ b/llvm/lib/Support/Chrono.cpp
@@ -74,7 +74,7 @@ void format_provider<TimePoint<>>::format(const TimePoint<> &T, raw_ostream &OS,
continue;
case 'N': // Nanoseconds, from date(1).
FStream << llvm::format(
- "%.6lu", (long)duration_cast<nanoseconds>(Fractional).count());
+ "%.9lu", (long)duration_cast<nanoseconds>(Fractional).count());
++I;
continue;
case '%': // Consume %%, so %%f parses as (%%)f not %(%f)
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index 66632504d6fb..d3efb8b67be5 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -208,8 +208,7 @@ public:
bool HadErrors = false;
if (O->hasArgStr()) {
// If it's a DefaultOption, check to make sure it isn't already there.
- if (O->isDefaultOption() &&
- SC->OptionsMap.find(O->ArgStr) != SC->OptionsMap.end())
+ if (O->isDefaultOption() && SC->OptionsMap.contains(O->ArgStr))
return;
// Add argument to the argument map!
@@ -2758,7 +2757,7 @@ StringMap<Option *> &cl::getRegisteredOptions(SubCommand &Sub) {
initCommonOptions();
auto &Subs = GlobalParser->RegisteredSubCommands;
(void)Subs;
- assert(is_contained(Subs, &Sub));
+ assert(Subs.contains(&Sub));
return Sub.OptionsMap;
}
diff --git a/llvm/lib/Support/ConvertEBCDIC.cpp b/llvm/lib/Support/ConvertEBCDIC.cpp
new file mode 100644
index 000000000000..08eeaa52a6c9
--- /dev/null
+++ b/llvm/lib/Support/ConvertEBCDIC.cpp
@@ -0,0 +1,123 @@
+//===--- ConvertEBCDIC.cpp - UTF8/EBCDIC CharSet Conversion -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file provides utility functions for converting between EBCDIC-1047 and
+/// UTF-8.
+///
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/ConvertEBCDIC.h"
+
+using namespace llvm;
+
+static const unsigned char ISO88591ToIBM1047[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x15, 0x0b,
+ 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26,
+ 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, 0x40, 0x5a, 0x7f, 0x7b,
+ 0x5b, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e,
+ 0x4c, 0x7e, 0x6e, 0x6f, 0x7c, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xe2,
+ 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xad, 0xe0, 0xbd, 0x5f, 0x6d,
+ 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92,
+ 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6,
+ 0xa7, 0xa8, 0xa9, 0xc0, 0x4f, 0xd0, 0xa1, 0x07, 0x20, 0x21, 0x22, 0x23,
+ 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x09, 0x0a, 0x1b,
+ 0x30, 0x31, 0x1a, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3a, 0x3b,
+ 0x04, 0x14, 0x3e, 0xff, 0x41, 0xaa, 0x4a, 0xb1, 0x9f, 0xb2, 0x6a, 0xb5,
+ 0xbb, 0xb4, 0x9a, 0x8a, 0xb0, 0xca, 0xaf, 0xbc, 0x90, 0x8f, 0xea, 0xfa,
+ 0xbe, 0xa0, 0xb6, 0xb3, 0x9d, 0xda, 0x9b, 0x8b, 0xb7, 0xb8, 0xb9, 0xab,
+ 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9e, 0x68, 0x74, 0x71, 0x72, 0x73,
+ 0x78, 0x75, 0x76, 0x77, 0xac, 0x69, 0xed, 0xee, 0xeb, 0xef, 0xec, 0xbf,
+ 0x80, 0xfd, 0xfe, 0xfb, 0xfc, 0xba, 0xae, 0x59, 0x44, 0x45, 0x42, 0x46,
+ 0x43, 0x47, 0x9c, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
+ 0x8c, 0x49, 0xcd, 0xce, 0xcb, 0xcf, 0xcc, 0xe1, 0x70, 0xdd, 0xde, 0xdb,
+ 0xdc, 0x8d, 0x8e, 0xdf};
+
+static const unsigned char IBM1047ToISO88591[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d, 0x8e, 0x0b,
+ 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x9d, 0x0a, 0x08, 0x87,
+ 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f, 0x80, 0x81, 0x82, 0x83,
+ 0x84, 0x85, 0x17, 0x1b, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07,
+ 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b,
+ 0x14, 0x15, 0x9e, 0x1a, 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1, 0xe3, 0xe5,
+ 0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, 0x26, 0xe9, 0xea, 0xeb,
+ 0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
+ 0x2d, 0x2f, 0xc2, 0xc4, 0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c,
+ 0x25, 0x5f, 0x3e, 0x3f, 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
+ 0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, 0xd8, 0x61, 0x62, 0x63,
+ 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1,
+ 0xb0, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba,
+ 0xe6, 0xb8, 0xc6, 0xa4, 0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+ 0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae, 0xac, 0xa3, 0xa5, 0xb7,
+ 0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0xdd, 0xa8, 0xaf, 0x5d, 0xb4, 0xd7,
+ 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0xad, 0xf4,
+ 0xf6, 0xf2, 0xf3, 0xf5, 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
+ 0x51, 0x52, 0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff, 0x5c, 0xf7, 0x53, 0x54,
+ 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2, 0xd3, 0xd5,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xb3, 0xdb,
+ 0xdc, 0xd9, 0xda, 0x9f};
+
+std::error_code
+ConverterEBCDIC::convertToEBCDIC(StringRef Source,
+ SmallVectorImpl<char> &Result) {
+ assert(Result.empty() && "Result must be empty!");
+ const unsigned char *Table = ISO88591ToIBM1047;
+ const unsigned char *Ptr =
+ reinterpret_cast<const unsigned char *>(Source.data());
+ size_t Length = Source.size();
+ Result.reserve(Length);
+ while (Length--) {
+ unsigned char Ch = *Ptr++;
+ // Handle UTF-8 2-byte-sequences in input.
+ if (Ch >= 128) {
+ // Only two-byte sequences can be decoded.
+ if (Ch != 0xc2 && Ch != 0xc3)
+ return std::make_error_code(std::errc::illegal_byte_sequence);
+ // Is buffer truncated?
+ if (!Length)
+ return std::make_error_code(std::errc::invalid_argument);
+ unsigned char Ch2 = *Ptr++;
+ // Is second byte well-formed?
+ if ((Ch2 & 0xc0) != 0x80)
+ return std::make_error_code(std::errc::illegal_byte_sequence);
+ Ch = Ch2 | (Ch << 6);
+ Length--;
+ }
+ // Translate the character.
+ Ch = Table[Ch];
+ Result.push_back(static_cast<char>(Ch));
+ }
+ return std::error_code();
+}
+
+void ConverterEBCDIC::convertToUTF8(StringRef Source,
+ SmallVectorImpl<char> &Result) {
+ assert(Result.empty() && "Result must be empty!");
+
+ const unsigned char *Table = IBM1047ToISO88591;
+ const unsigned char *Ptr =
+ reinterpret_cast<const unsigned char *>(Source.data());
+ size_t Length = Source.size();
+ Result.reserve(Length);
+ while (Length--) {
+ unsigned char Ch = *Ptr++;
+ // Translate the character.
+ Ch = Table[Ch];
+ // Handle UTF-8 2-byte-sequences in output.
+ if (Ch >= 128) {
+ // First byte prefixed with either 0xc2 or 0xc3.
+ Result.push_back(static_cast<char>(0xc0 | (Ch >> 6)));
+ // Second byte is either the same as the ASCII byte or ASCII byte -64.
+ Ch = Ch & 0xbf;
+ }
+ Result.push_back(static_cast<char>(Ch));
+ }
+}
diff --git a/llvm/lib/Support/ConvertUTFWrapper.cpp b/llvm/lib/Support/ConvertUTFWrapper.cpp
index 9bf3f8f8b897..3fa7365e72d3 100644
--- a/llvm/lib/Support/ConvertUTFWrapper.cpp
+++ b/llvm/lib/Support/ConvertUTFWrapper.cpp
@@ -102,7 +102,7 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) {
ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
for (UTF16 &I : ByteSwapped)
- I = llvm::ByteSwap_16(I);
+ I = llvm::byteswap<uint16_t>(I);
Src = &ByteSwapped[0];
SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
}
@@ -160,7 +160,7 @@ bool convertUTF32ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out) {
if (Src[0] == UNI_UTF32_BYTE_ORDER_MARK_SWAPPED) {
ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd);
for (UTF32 &I : ByteSwapped)
- I = llvm::ByteSwap_32(I);
+ I = llvm::byteswap<uint32_t>(I);
Src = &ByteSwapped[0];
SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1;
}
diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp
index e96a9b59d834..f53aea177d61 100644
--- a/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -431,7 +431,10 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
[[noreturn]] void CrashRecoveryContext::HandleExit(int RetCode) {
#if defined(_WIN32)
- // SEH and VEH
+ // Since the exception code is actually of NTSTATUS type, we use the
+ // Microsoft-recommended 0xE prefix, to signify that this is a user error.
+ // This value is a combination of the customer field (bit 29) and severity
+ // field (bits 30-31) in the NTSTATUS specification.
::RaiseException(0xE0000000 | RetCode, 0, 0, NULL);
#else
// On Unix we don't need to raise an exception, we go directly to
@@ -445,10 +448,10 @@ bool CrashRecoveryContext::RunSafely(function_ref<void()> Fn) {
bool CrashRecoveryContext::isCrash(int RetCode) {
#if defined(_WIN32)
- // On Windows, the high bits are reserved for kernel return codes. Values
- // starting with 0x80000000 are reserved for "warnings"; values of 0xC0000000
- // and up are for "errors". In practice, both are interpreted as a
- // non-continuable signal.
+ // On Windows, the code is interpreted as NTSTATUS. The two high bits
+ // represent the severity. Values starting with 0x80000000 are reserved for
+ // "warnings"; values of 0xC0000000 and up are for "errors". In practice, both
+ // are interpreted as a non-continuable signal.
unsigned Code = ((unsigned)RetCode & 0xF0000000) >> 28;
if (Code != 0xC && Code != 8)
return false;
diff --git a/llvm/lib/Support/DataExtractor.cpp b/llvm/lib/Support/DataExtractor.cpp
index 8cf312191153..59a44f4071b5 100644
--- a/llvm/lib/Support/DataExtractor.cpp
+++ b/llvm/lib/Support/DataExtractor.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/DataExtractor.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/LEB128.h"
diff --git a/llvm/lib/Support/DebugOptions.h b/llvm/lib/Support/DebugOptions.h
index 75e557d7d8d7..db727d5a584c 100644
--- a/llvm/lib/Support/DebugOptions.h
+++ b/llvm/lib/Support/DebugOptions.h
@@ -11,6 +11,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_SUPPORT_DEBUGOPTIONS_H
+#define LLVM_SUPPORT_DEBUGOPTIONS_H
+
namespace llvm {
// These are invoked internally before parsing command line options.
@@ -27,3 +30,5 @@ void initDebugOptions();
void initRandomSeedOptions();
} // namespace llvm
+
+#endif // LLVM_SUPPORT_DEBUGOPTIONS_H
diff --git a/llvm/lib/Support/DivisionByConstantInfo.cpp b/llvm/lib/Support/DivisionByConstantInfo.cpp
index e7072d94e49c..8150bd83c79f 100644
--- a/llvm/lib/Support/DivisionByConstantInfo.cpp
+++ b/llvm/lib/Support/DivisionByConstantInfo.cpp
@@ -132,7 +132,7 @@ UnsignedDivisionByConstantInfo::get(const APInt &D, unsigned LeadingZeros,
(Q1.ult(Delta) || (Q1 == Delta && R1.isZero())));
if (Retval.IsAdd && !D[0] && AllowEvenDivisorOptimization) {
- unsigned PreShift = D.countTrailingZeros();
+ unsigned PreShift = D.countr_zero();
APInt ShiftedD = D.lshr(PreShift);
Retval =
UnsignedDivisionByConstantInfo::get(ShiftedD, LeadingZeros + PreShift);
diff --git a/llvm/lib/Support/ELFAttributeParser.cpp b/llvm/lib/Support/ELFAttributeParser.cpp
index a5a0676b1077..2e90b70dc83f 100644
--- a/llvm/lib/Support/ELFAttributeParser.cpp
+++ b/llvm/lib/Support/ELFAttributeParser.cpp
@@ -127,10 +127,14 @@ Error ELFAttributeParser::parseSubsection(uint32_t length) {
sw->printString("Vendor", vendorName);
}
- // Ignore unrecognized vendor-name.
- if (vendorName.lower() != vendor)
- return createStringError(errc::invalid_argument,
- "unrecognized vendor-name: " + vendorName);
+ // Handle a subsection with an unrecognized vendor-name by skipping
+ // over it to the next subsection. ADDENDA32 in the Arm ABI defines
+ // that vendor attribute sections must not affect compatibility, so
+ // this should always be safe.
+ if (vendorName.lower() != vendor) {
+ cursor.seek(end);
+ return Error::success();
+ }
while (cursor.tell() < end) {
/// Tag_File | Tag_Section | Tag_Symbol uleb128:byte-size
diff --git a/llvm/lib/Support/Errno.cpp b/llvm/lib/Support/Errno.cpp
index 7f665be8db6c..60a7e536b6c5 100644
--- a/llvm/lib/Support/Errno.cpp
+++ b/llvm/lib/Support/Errno.cpp
@@ -55,17 +55,11 @@ std::string StrError(int errnum) {
#elif HAVE_DECL_STRERROR_S // "Windows Secure API"
strerror_s(buffer, MaxErrStrLen - 1, errnum);
str = buffer;
-#elif defined(HAVE_STRERROR)
+#else
// Copy the thread un-safe result of strerror into
// the buffer as fast as possible to minimize impact
// of collision of strerror in multiple threads.
str = strerror(errnum);
-#else
- // Strange that this system doesn't even have strerror
- // but, oh well, just use a generic message
- raw_string_ostream stream(str);
- stream << "Error #" << errnum;
- stream.flush();
#endif
return str;
}
diff --git a/llvm/lib/Support/Error.cpp b/llvm/lib/Support/Error.cpp
index fbe86f2b59e1..21d591530b41 100644
--- a/llvm/lib/Support/Error.cpp
+++ b/llvm/lib/Support/Error.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/Error.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include <system_error>
@@ -70,6 +72,15 @@ void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner) {
});
}
+/// Write all error messages (if any) in E to a string. The newline character
+/// is used to separate error messages.
+std::string toString(Error E) {
+ SmallVector<std::string, 2> Errors;
+ handleAllErrors(std::move(E), [&Errors](const ErrorInfoBase &EI) {
+ Errors.push_back(EI.message());
+ });
+ return join(Errors.begin(), Errors.end(), "\n");
+}
std::error_code ErrorList::convertToErrorCode() const {
return std::error_code(static_cast<int>(ErrorErrorCode::MultipleErrors),
@@ -149,7 +160,7 @@ void report_fatal_error(Error Err, bool GenCrashDiag) {
raw_string_ostream ErrStream(ErrMsg);
logAllUnhandledErrors(std::move(Err), ErrStream);
}
- report_fatal_error(Twine(ErrMsg));
+ report_fatal_error(Twine(ErrMsg), GenCrashDiag);
}
} // end namespace llvm
diff --git a/llvm/lib/Support/FileUtilities.cpp b/llvm/lib/Support/FileUtilities.cpp
index d01a41a46489..dbd6c324cf4d 100644
--- a/llvm/lib/Support/FileUtilities.cpp
+++ b/llvm/lib/Support/FileUtilities.cpp
@@ -169,7 +169,7 @@ static bool CompareNumbers(const char *&F1P, const char *&F2P,
/// DiffFilesWithTolerance - Compare the two files specified, returning 0 if the
/// files match, 1 if they are different, and 2 if there is a file error. This
-/// function differs from DiffFiles in that you can specify an absolete and
+/// function differs from DiffFiles in that you can specify an absolute and
/// relative FP error that is allowed to exist. If you specify a string to fill
/// in for the error option, it will set the string to an error message if an
/// error occurs, allowing the caller to distinguish between a failed diff and a
@@ -267,64 +267,6 @@ int llvm::DiffFilesWithTolerance(StringRef NameA,
return CompareFailed;
}
-void llvm::AtomicFileWriteError::log(raw_ostream &OS) const {
- OS << "atomic_write_error: ";
- switch (Error) {
- case atomic_write_error::failed_to_create_uniq_file:
- OS << "failed_to_create_uniq_file";
- return;
- case atomic_write_error::output_stream_error:
- OS << "output_stream_error";
- return;
- case atomic_write_error::failed_to_rename_temp_file:
- OS << "failed_to_rename_temp_file";
- return;
- }
- llvm_unreachable("unknown atomic_write_error value in "
- "failed_to_rename_temp_file::log()");
-}
-
-llvm::Error llvm::writeFileAtomically(StringRef TempPathModel,
- StringRef FinalPath, StringRef Buffer) {
- return writeFileAtomically(TempPathModel, FinalPath,
- [&Buffer](llvm::raw_ostream &OS) {
- OS.write(Buffer.data(), Buffer.size());
- return llvm::Error::success();
- });
-}
-
-llvm::Error llvm::writeFileAtomically(
- StringRef TempPathModel, StringRef FinalPath,
- std::function<llvm::Error(llvm::raw_ostream &)> Writer) {
- SmallString<128> GeneratedUniqPath;
- int TempFD;
- if (sys::fs::createUniqueFile(TempPathModel, TempFD, GeneratedUniqPath)) {
- return llvm::make_error<AtomicFileWriteError>(
- atomic_write_error::failed_to_create_uniq_file);
- }
- llvm::FileRemover RemoveTmpFileOnFail(GeneratedUniqPath);
-
- raw_fd_ostream OS(TempFD, /*shouldClose=*/true);
- if (llvm::Error Err = Writer(OS)) {
- return Err;
- }
-
- OS.close();
- if (OS.has_error()) {
- OS.clear_error();
- return llvm::make_error<AtomicFileWriteError>(
- atomic_write_error::output_stream_error);
- }
-
- if (sys::fs::rename(/*from=*/GeneratedUniqPath, /*to=*/FinalPath)) {
- return llvm::make_error<AtomicFileWriteError>(
- atomic_write_error::failed_to_rename_temp_file);
- }
-
- RemoveTmpFileOnFail.releaseFile();
- return Error::success();
-}
-
Expected<FilePermissionsApplier>
FilePermissionsApplier::create(StringRef InputFilename) {
sys::fs::file_status Status;
@@ -389,5 +331,3 @@ Error FilePermissionsApplier::apply(
return Error::success();
}
-
-char llvm::AtomicFileWriteError::ID;
diff --git a/llvm/lib/Support/FloatingPointMode.cpp b/llvm/lib/Support/FloatingPointMode.cpp
new file mode 100644
index 000000000000..9543884ff46e
--- /dev/null
+++ b/llvm/lib/Support/FloatingPointMode.cpp
@@ -0,0 +1,95 @@
+//===- FloatingPointMode.cpp ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/FloatingPointMode.h"
+#include "llvm/ADT/StringExtras.h"
+
+using namespace llvm;
+
+FPClassTest llvm::fneg(FPClassTest Mask) {
+ FPClassTest NewMask = Mask & fcNan;
+ if (Mask & fcNegInf)
+ NewMask |= fcPosInf;
+ if (Mask & fcNegNormal)
+ NewMask |= fcPosNormal;
+ if (Mask & fcNegSubnormal)
+ NewMask |= fcPosSubnormal;
+ if (Mask & fcNegZero)
+ NewMask |= fcPosZero;
+ if (Mask & fcPosZero)
+ NewMask |= fcNegZero;
+ if (Mask & fcPosSubnormal)
+ NewMask |= fcNegSubnormal;
+ if (Mask & fcPosNormal)
+ NewMask |= fcNegNormal;
+ if (Mask & fcPosInf)
+ NewMask |= fcNegInf;
+ return NewMask;
+}
+
+FPClassTest llvm::fabs(FPClassTest Mask) {
+ FPClassTest NewMask = Mask & fcNan;
+ if (Mask & fcPosZero)
+ NewMask |= fcZero;
+ if (Mask & fcPosSubnormal)
+ NewMask |= fcSubnormal;
+ if (Mask & fcPosNormal)
+ NewMask |= fcNormal;
+ if (Mask & fcPosInf)
+ NewMask |= fcInf;
+ return NewMask;
+}
+
+// Every bitfield has a unique name and one or more aliasing names that cover
+// multiple bits. Names should be listed in order of preference, with higher
+// popcounts listed first.
+//
+// Bits are consumed as printed. Each field should only be represented in one
+// printed field.
+static constexpr std::pair<FPClassTest, StringLiteral> NoFPClassName[] = {
+ {fcAllFlags, "all"},
+ {fcNan, "nan"},
+ {fcSNan, "snan"},
+ {fcQNan, "qnan"},
+ {fcInf, "inf"},
+ {fcNegInf, "ninf"},
+ {fcPosInf, "pinf"},
+ {fcZero, "zero"},
+ {fcNegZero, "nzero"},
+ {fcPosZero, "pzero"},
+ {fcSubnormal, "sub"},
+ {fcNegSubnormal, "nsub"},
+ {fcPosSubnormal, "psub"},
+ {fcNormal, "norm"},
+ {fcNegNormal, "nnorm"},
+ {fcPosNormal, "pnorm"}
+};
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, FPClassTest Mask) {
+ OS << '(';
+
+ if (Mask == fcNone) {
+ OS << "none)";
+ return OS;
+ }
+
+ ListSeparator LS(" ");
+ for (auto [BitTest, Name] : NoFPClassName) {
+ if ((Mask & BitTest) == BitTest) {
+ OS << LS << Name;
+
+ // Clear the bits so we don't print any aliased names later.
+ Mask &= ~BitTest;
+ }
+ }
+
+ assert(Mask == 0 && "didn't print some mask bits");
+
+ OS << ')';
+ return OS;
+}
diff --git a/llvm/lib/Support/FoldingSet.cpp b/llvm/lib/Support/FoldingSet.cpp
index ece31b971c1c..419bf6740768 100644
--- a/llvm/lib/Support/FoldingSet.cpp
+++ b/llvm/lib/Support/FoldingSet.cpp
@@ -269,7 +269,7 @@ void FoldingSetBase::reserve(unsigned EltCount, const FoldingSetInfo &Info) {
// range of 1.0 - 2.0.
if(EltCount < capacity())
return;
- GrowBucketCount(PowerOf2Floor(EltCount), Info);
+ GrowBucketCount(llvm::bit_floor(EltCount), Info);
}
/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists,
diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp
index 0e7f7bf1d999..c672a43b033e 100644
--- a/llvm/lib/Support/JSON.cpp
+++ b/llvm/lib/Support/JSON.cpp
@@ -8,12 +8,14 @@
#include "llvm/Support/JSON.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/NativeFormatting.h"
+#include "llvm/Support/raw_ostream.h"
#include <cctype>
+#include <cerrno>
#include <optional>
namespace llvm {
diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index 745c46fb6ffb..097c22d33dd1 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -105,7 +105,7 @@ KnownBits KnownBits::sextInReg(unsigned SrcBitWidth) const {
KnownBits KnownBits::makeGE(const APInt &Val) const {
// Count the number of leading bit positions where our underlying value is
// known to be less than or equal to Val.
- unsigned N = (Zero | Val).countLeadingOnes();
+ unsigned N = (Zero | Val).countl_one();
// For each of those bit positions, if Val has a 1 in that bit then our
// underlying value must also have a 1.
@@ -129,7 +129,7 @@ KnownBits KnownBits::umax(const KnownBits &LHS, const KnownBits &RHS) {
// are common to these two values are also known in the result.
KnownBits L = LHS.makeGE(RHS.getMinValue());
KnownBits R = RHS.makeGE(LHS.getMinValue());
- return KnownBits::commonBits(L, R);
+ return L.intersectWith(R);
}
KnownBits KnownBits::umin(const KnownBits &LHS, const KnownBits &RHS) {
@@ -164,169 +164,189 @@ KnownBits KnownBits::smin(const KnownBits &LHS, const KnownBits &RHS) {
return Flip(umax(Flip(LHS), Flip(RHS)));
}
-KnownBits KnownBits::shl(const KnownBits &LHS, const KnownBits &RHS) {
+static unsigned getMaxShiftAmount(const APInt &MaxValue, unsigned BitWidth) {
+ if (isPowerOf2_32(BitWidth))
+ return MaxValue.extractBitsAsZExtValue(Log2_32(BitWidth), 0);
+ // This is only an approximate upper bound.
+ return MaxValue.getLimitedValue(BitWidth - 1);
+}
+
+KnownBits KnownBits::shl(const KnownBits &LHS, const KnownBits &RHS, bool NUW,
+ bool NSW, bool ShAmtNonZero) {
unsigned BitWidth = LHS.getBitWidth();
- KnownBits Known(BitWidth);
+ auto ShiftByConst = [&](const KnownBits &LHS, unsigned ShiftAmt) {
+ KnownBits Known;
+ bool ShiftedOutZero, ShiftedOutOne;
+ Known.Zero = LHS.Zero.ushl_ov(ShiftAmt, ShiftedOutZero);
+ Known.Zero.setLowBits(ShiftAmt);
+ Known.One = LHS.One.ushl_ov(ShiftAmt, ShiftedOutOne);
+
+ // All cases returning poison have been handled by MaxShiftAmount already.
+ if (NSW) {
+ if (NUW && ShiftAmt != 0)
+ // NUW means we can assume anything shifted out was a zero.
+ ShiftedOutZero = true;
+
+ if (ShiftedOutZero)
+ Known.makeNonNegative();
+ else if (ShiftedOutOne)
+ Known.makeNegative();
+ }
+ return Known;
+ };
- // If the shift amount is a valid constant then transform LHS directly.
- if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) {
- unsigned Shift = RHS.getConstant().getZExtValue();
- Known = LHS;
- Known.Zero <<= Shift;
- Known.One <<= Shift;
- // Low bits are known zero.
- Known.Zero.setLowBits(Shift);
+ // Fast path for a common case when LHS is completely unknown.
+ KnownBits Known(BitWidth);
+ unsigned MinShiftAmount = RHS.getMinValue().getLimitedValue(BitWidth);
+ if (MinShiftAmount == 0 && ShAmtNonZero)
+ MinShiftAmount = 1;
+ if (LHS.isUnknown()) {
+ Known.Zero.setLowBits(MinShiftAmount);
+ if (NUW && NSW && MinShiftAmount != 0)
+ Known.makeNonNegative();
return Known;
}
- // No matter the shift amount, the trailing zeros will stay zero.
- unsigned MinTrailingZeros = LHS.countMinTrailingZeros();
-
- // Minimum shift amount low bits are known zero.
- APInt MinShiftAmount = RHS.getMinValue();
- if (MinShiftAmount.ult(BitWidth)) {
- MinTrailingZeros += MinShiftAmount.getZExtValue();
- MinTrailingZeros = std::min(MinTrailingZeros, BitWidth);
+ // Determine maximum shift amount, taking NUW/NSW flags into account.
+ APInt MaxValue = RHS.getMaxValue();
+ unsigned MaxShiftAmount = getMaxShiftAmount(MaxValue, BitWidth);
+ if (NUW && NSW)
+ MaxShiftAmount = std::min(MaxShiftAmount, LHS.countMaxLeadingZeros() - 1);
+ if (NUW)
+ MaxShiftAmount = std::min(MaxShiftAmount, LHS.countMaxLeadingZeros());
+ if (NSW)
+ MaxShiftAmount = std::min(
+ MaxShiftAmount,
+ std::max(LHS.countMaxLeadingZeros(), LHS.countMaxLeadingOnes()) - 1);
+
+ // Fast path for common case where the shift amount is unknown.
+ if (MinShiftAmount == 0 && MaxShiftAmount == BitWidth - 1 &&
+ isPowerOf2_32(BitWidth)) {
+ Known.Zero.setLowBits(LHS.countMinTrailingZeros());
+ if (LHS.isAllOnes())
+ Known.One.setSignBit();
+ if (NSW) {
+ if (LHS.isNonNegative())
+ Known.makeNonNegative();
+ if (LHS.isNegative())
+ Known.makeNegative();
+ }
+ return Known;
}
- // If the maximum shift is in range, then find the common bits from all
- // possible shifts.
- APInt MaxShiftAmount = RHS.getMaxValue();
- if (MaxShiftAmount.ult(BitWidth) && !LHS.isUnknown()) {
- uint64_t ShiftAmtZeroMask = (~RHS.Zero).getZExtValue();
- uint64_t ShiftAmtOneMask = RHS.One.getZExtValue();
- assert(MinShiftAmount.ult(MaxShiftAmount) && "Illegal shift range");
- Known.Zero.setAllBits();
- Known.One.setAllBits();
- for (uint64_t ShiftAmt = MinShiftAmount.getZExtValue(),
- MaxShiftAmt = MaxShiftAmount.getZExtValue();
- ShiftAmt <= MaxShiftAmt; ++ShiftAmt) {
- // Skip if the shift amount is impossible.
- if ((ShiftAmtZeroMask & ShiftAmt) != ShiftAmt ||
- (ShiftAmtOneMask | ShiftAmt) != ShiftAmt)
- continue;
- KnownBits SpecificShift;
- SpecificShift.Zero = LHS.Zero << ShiftAmt;
- SpecificShift.One = LHS.One << ShiftAmt;
- Known = KnownBits::commonBits(Known, SpecificShift);
- if (Known.isUnknown())
- break;
- }
+ // Find the common bits from all possible shifts.
+ unsigned ShiftAmtZeroMask = RHS.Zero.zextOrTrunc(32).getZExtValue();
+ unsigned ShiftAmtOneMask = RHS.One.zextOrTrunc(32).getZExtValue();
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ for (unsigned ShiftAmt = MinShiftAmount; ShiftAmt <= MaxShiftAmount;
+ ++ShiftAmt) {
+ // Skip if the shift amount is impossible.
+ if ((ShiftAmtZeroMask & ShiftAmt) != 0 ||
+ (ShiftAmtOneMask | ShiftAmt) != ShiftAmt)
+ continue;
+ Known = Known.intersectWith(ShiftByConst(LHS, ShiftAmt));
+ if (Known.isUnknown())
+ break;
}
- Known.Zero.setLowBits(MinTrailingZeros);
+ // All shift amounts may result in poison.
+ if (Known.hasConflict())
+ Known.setAllZero();
return Known;
}
-KnownBits KnownBits::lshr(const KnownBits &LHS, const KnownBits &RHS) {
+KnownBits KnownBits::lshr(const KnownBits &LHS, const KnownBits &RHS,
+ bool ShAmtNonZero) {
unsigned BitWidth = LHS.getBitWidth();
- KnownBits Known(BitWidth);
-
- if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) {
- unsigned Shift = RHS.getConstant().getZExtValue();
- Known = LHS;
- Known.Zero.lshrInPlace(Shift);
- Known.One.lshrInPlace(Shift);
+ auto ShiftByConst = [&](const KnownBits &LHS, unsigned ShiftAmt) {
+ KnownBits Known = LHS;
+ Known.Zero.lshrInPlace(ShiftAmt);
+ Known.One.lshrInPlace(ShiftAmt);
// High bits are known zero.
- Known.Zero.setHighBits(Shift);
+ Known.Zero.setHighBits(ShiftAmt);
return Known;
- }
-
- // No matter the shift amount, the leading zeros will stay zero.
- unsigned MinLeadingZeros = LHS.countMinLeadingZeros();
+ };
- // Minimum shift amount high bits are known zero.
- APInt MinShiftAmount = RHS.getMinValue();
- if (MinShiftAmount.ult(BitWidth)) {
- MinLeadingZeros += MinShiftAmount.getZExtValue();
- MinLeadingZeros = std::min(MinLeadingZeros, BitWidth);
+ // Fast path for a common case when LHS is completely unknown.
+ KnownBits Known(BitWidth);
+ unsigned MinShiftAmount = RHS.getMinValue().getLimitedValue(BitWidth);
+ if (MinShiftAmount == 0 && ShAmtNonZero)
+ MinShiftAmount = 1;
+ if (LHS.isUnknown()) {
+ Known.Zero.setHighBits(MinShiftAmount);
+ return Known;
}
- // If the maximum shift is in range, then find the common bits from all
- // possible shifts.
- APInt MaxShiftAmount = RHS.getMaxValue();
- if (MaxShiftAmount.ult(BitWidth) && !LHS.isUnknown()) {
- uint64_t ShiftAmtZeroMask = (~RHS.Zero).getZExtValue();
- uint64_t ShiftAmtOneMask = RHS.One.getZExtValue();
- assert(MinShiftAmount.ult(MaxShiftAmount) && "Illegal shift range");
- Known.Zero.setAllBits();
- Known.One.setAllBits();
- for (uint64_t ShiftAmt = MinShiftAmount.getZExtValue(),
- MaxShiftAmt = MaxShiftAmount.getZExtValue();
- ShiftAmt <= MaxShiftAmt; ++ShiftAmt) {
- // Skip if the shift amount is impossible.
- if ((ShiftAmtZeroMask & ShiftAmt) != ShiftAmt ||
- (ShiftAmtOneMask | ShiftAmt) != ShiftAmt)
- continue;
- KnownBits SpecificShift = LHS;
- SpecificShift.Zero.lshrInPlace(ShiftAmt);
- SpecificShift.One.lshrInPlace(ShiftAmt);
- Known = KnownBits::commonBits(Known, SpecificShift);
- if (Known.isUnknown())
- break;
- }
+ // Find the common bits from all possible shifts.
+ APInt MaxValue = RHS.getMaxValue();
+ unsigned MaxShiftAmount = getMaxShiftAmount(MaxValue, BitWidth);
+ unsigned ShiftAmtZeroMask = RHS.Zero.zextOrTrunc(32).getZExtValue();
+ unsigned ShiftAmtOneMask = RHS.One.zextOrTrunc(32).getZExtValue();
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ for (unsigned ShiftAmt = MinShiftAmount; ShiftAmt <= MaxShiftAmount;
+ ++ShiftAmt) {
+ // Skip if the shift amount is impossible.
+ if ((ShiftAmtZeroMask & ShiftAmt) != 0 ||
+ (ShiftAmtOneMask | ShiftAmt) != ShiftAmt)
+ continue;
+ Known = Known.intersectWith(ShiftByConst(LHS, ShiftAmt));
+ if (Known.isUnknown())
+ break;
}
- Known.Zero.setHighBits(MinLeadingZeros);
+ // All shift amounts may result in poison.
+ if (Known.hasConflict())
+ Known.setAllZero();
return Known;
}
-KnownBits KnownBits::ashr(const KnownBits &LHS, const KnownBits &RHS) {
+KnownBits KnownBits::ashr(const KnownBits &LHS, const KnownBits &RHS,
+ bool ShAmtNonZero) {
unsigned BitWidth = LHS.getBitWidth();
- KnownBits Known(BitWidth);
-
- if (RHS.isConstant() && RHS.getConstant().ult(BitWidth)) {
- unsigned Shift = RHS.getConstant().getZExtValue();
- Known = LHS;
- Known.Zero.ashrInPlace(Shift);
- Known.One.ashrInPlace(Shift);
+ auto ShiftByConst = [&](const KnownBits &LHS, unsigned ShiftAmt) {
+ KnownBits Known = LHS;
+ Known.Zero.ashrInPlace(ShiftAmt);
+ Known.One.ashrInPlace(ShiftAmt);
return Known;
- }
-
- // No matter the shift amount, the leading sign bits will stay.
- unsigned MinLeadingZeros = LHS.countMinLeadingZeros();
- unsigned MinLeadingOnes = LHS.countMinLeadingOnes();
+ };
- // Minimum shift amount high bits are known sign bits.
- APInt MinShiftAmount = RHS.getMinValue();
- if (MinShiftAmount.ult(BitWidth)) {
- if (MinLeadingZeros) {
- MinLeadingZeros += MinShiftAmount.getZExtValue();
- MinLeadingZeros = std::min(MinLeadingZeros, BitWidth);
- }
- if (MinLeadingOnes) {
- MinLeadingOnes += MinShiftAmount.getZExtValue();
- MinLeadingOnes = std::min(MinLeadingOnes, BitWidth);
+ // Fast path for a common case when LHS is completely unknown.
+ KnownBits Known(BitWidth);
+ unsigned MinShiftAmount = RHS.getMinValue().getLimitedValue(BitWidth);
+ if (MinShiftAmount == 0 && ShAmtNonZero)
+ MinShiftAmount = 1;
+ if (LHS.isUnknown()) {
+ if (MinShiftAmount == BitWidth) {
+ // Always poison. Return zero because we don't like returning conflict.
+ Known.setAllZero();
+ return Known;
}
+ return Known;
}
- // If the maximum shift is in range, then find the common bits from all
- // possible shifts.
- APInt MaxShiftAmount = RHS.getMaxValue();
- if (MaxShiftAmount.ult(BitWidth) && !LHS.isUnknown()) {
- uint64_t ShiftAmtZeroMask = (~RHS.Zero).getZExtValue();
- uint64_t ShiftAmtOneMask = RHS.One.getZExtValue();
- assert(MinShiftAmount.ult(MaxShiftAmount) && "Illegal shift range");
- Known.Zero.setAllBits();
- Known.One.setAllBits();
- for (uint64_t ShiftAmt = MinShiftAmount.getZExtValue(),
- MaxShiftAmt = MaxShiftAmount.getZExtValue();
- ShiftAmt <= MaxShiftAmt; ++ShiftAmt) {
- // Skip if the shift amount is impossible.
- if ((ShiftAmtZeroMask & ShiftAmt) != ShiftAmt ||
- (ShiftAmtOneMask | ShiftAmt) != ShiftAmt)
- continue;
- KnownBits SpecificShift = LHS;
- SpecificShift.Zero.ashrInPlace(ShiftAmt);
- SpecificShift.One.ashrInPlace(ShiftAmt);
- Known = KnownBits::commonBits(Known, SpecificShift);
- if (Known.isUnknown())
- break;
- }
+ // Find the common bits from all possible shifts.
+ APInt MaxValue = RHS.getMaxValue();
+ unsigned MaxShiftAmount = getMaxShiftAmount(MaxValue, BitWidth);
+ unsigned ShiftAmtZeroMask = RHS.Zero.zextOrTrunc(32).getZExtValue();
+ unsigned ShiftAmtOneMask = RHS.One.zextOrTrunc(32).getZExtValue();
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ for (unsigned ShiftAmt = MinShiftAmount; ShiftAmt <= MaxShiftAmount;
+ ++ShiftAmt) {
+ // Skip if the shift amount is impossible.
+ if ((ShiftAmtZeroMask & ShiftAmt) != 0 ||
+ (ShiftAmtOneMask | ShiftAmt) != ShiftAmt)
+ continue;
+ Known = Known.intersectWith(ShiftByConst(LHS, ShiftAmt));
+ if (Known.isUnknown())
+ break;
}
- Known.Zero.setHighBits(MinLeadingZeros);
- Known.One.setHighBits(MinLeadingOnes);
+ // All shift amounts may result in poison.
+ if (Known.hasConflict())
+ Known.setAllZero();
return Known;
}
@@ -399,19 +419,219 @@ KnownBits KnownBits::abs(bool IntMinIsPoison) const {
// Absolute value preserves trailing zero count.
KnownBits KnownAbs(getBitWidth());
- KnownAbs.Zero.setLowBits(countMinTrailingZeros());
- // We only know that the absolute values's MSB will be zero if INT_MIN is
- // poison, or there is a set bit that isn't the sign bit (otherwise it could
- // be INT_MIN).
- if (IntMinIsPoison || (!One.isZero() && !One.isMinSignedValue()))
- KnownAbs.Zero.setSignBit();
+ // If the input is negative, then abs(x) == -x.
+ if (isNegative()) {
+ KnownBits Tmp = *this;
+ // Special case for IntMinIsPoison. We know the sign bit is set and we know
+ // all the rest of the bits except one to be zero. Since we have
+ // IntMinIsPoison, that final bit MUST be a one, as otherwise the input is
+ // INT_MIN.
+ if (IntMinIsPoison && (Zero.popcount() + 2) == getBitWidth())
+ Tmp.One.setBit(countMinTrailingZeros());
+
+ KnownAbs = computeForAddSub(
+ /*Add*/ false, IntMinIsPoison,
+ KnownBits::makeConstant(APInt(getBitWidth(), 0)), Tmp);
+
+ // One more special case for IntMinIsPoison. If we don't know any ones other
+ // than the signbit, we know for certain that all the unknowns can't be
+ // zero. So if we know high zero bits, but have unknown low bits, we know
+ // for certain those high-zero bits will end up as one. This is because,
+ // the low bits can't be all zeros, so the +1 in (~x + 1) cannot carry up
+ // to the high bits. If we know a known INT_MIN input skip this. The result
+ // is poison anyways.
+ if (IntMinIsPoison && Tmp.countMinPopulation() == 1 &&
+ Tmp.countMaxPopulation() != 1) {
+ Tmp.One.clearSignBit();
+ Tmp.Zero.setSignBit();
+ KnownAbs.One.setBits(getBitWidth() - Tmp.countMinLeadingZeros(),
+ getBitWidth() - 1);
+ }
+
+ } else {
+ unsigned MaxTZ = countMaxTrailingZeros();
+ unsigned MinTZ = countMinTrailingZeros();
+
+ KnownAbs.Zero.setLowBits(MinTZ);
+ // If we know the lowest set 1, then preserve it.
+ if (MaxTZ == MinTZ && MaxTZ < getBitWidth())
+ KnownAbs.One.setBit(MaxTZ);
+
+ // We only know that the absolute values's MSB will be zero if INT_MIN is
+ // poison, or there is a set bit that isn't the sign bit (otherwise it could
+ // be INT_MIN).
+ if (IntMinIsPoison || (!One.isZero() && !One.isMinSignedValue())) {
+ KnownAbs.One.clearSignBit();
+ KnownAbs.Zero.setSignBit();
+ }
+ }
- // FIXME: Handle known negative input?
- // FIXME: Calculate the negated Known bits and combine them?
+ assert(!KnownAbs.hasConflict() && "Bad Output");
return KnownAbs;
}
+static KnownBits computeForSatAddSub(bool Add, bool Signed,
+ const KnownBits &LHS,
+ const KnownBits &RHS) {
+ assert(!LHS.hasConflict() && !RHS.hasConflict() && "Bad inputs");
+ // We don't see NSW even for sadd/ssub as we want to check if the result has
+ // signed overflow.
+ KnownBits Res = KnownBits::computeForAddSub(Add, /*NSW*/ false, LHS, RHS);
+ unsigned BitWidth = Res.getBitWidth();
+ auto SignBitKnown = [&](const KnownBits &K) {
+ return K.Zero[BitWidth - 1] || K.One[BitWidth - 1];
+ };
+ std::optional<bool> Overflow;
+
+ if (Signed) {
+ // If we can actually detect overflow do so. Otherwise leave Overflow as
+ // nullopt (we assume it may have happened).
+ if (SignBitKnown(LHS) && SignBitKnown(RHS) && SignBitKnown(Res)) {
+ if (Add) {
+ // sadd.sat
+ Overflow = (LHS.isNonNegative() == RHS.isNonNegative() &&
+ Res.isNonNegative() != LHS.isNonNegative());
+ } else {
+ // ssub.sat
+ Overflow = (LHS.isNonNegative() != RHS.isNonNegative() &&
+ Res.isNonNegative() != LHS.isNonNegative());
+ }
+ }
+ } else if (Add) {
+ // uadd.sat
+ bool Of;
+ (void)LHS.getMaxValue().uadd_ov(RHS.getMaxValue(), Of);
+ if (!Of) {
+ Overflow = false;
+ } else {
+ (void)LHS.getMinValue().uadd_ov(RHS.getMinValue(), Of);
+ if (Of)
+ Overflow = true;
+ }
+ } else {
+ // usub.sat
+ bool Of;
+ (void)LHS.getMinValue().usub_ov(RHS.getMaxValue(), Of);
+ if (!Of) {
+ Overflow = false;
+ } else {
+ (void)LHS.getMaxValue().usub_ov(RHS.getMinValue(), Of);
+ if (Of)
+ Overflow = true;
+ }
+ }
+
+ if (Signed) {
+ if (Add) {
+ if (LHS.isNonNegative() && RHS.isNonNegative()) {
+ // Pos + Pos -> Pos
+ Res.One.clearSignBit();
+ Res.Zero.setSignBit();
+ }
+ if (LHS.isNegative() && RHS.isNegative()) {
+ // Neg + Neg -> Neg
+ Res.One.setSignBit();
+ Res.Zero.clearSignBit();
+ }
+ } else {
+ if (LHS.isNegative() && RHS.isNonNegative()) {
+ // Neg - Pos -> Neg
+ Res.One.setSignBit();
+ Res.Zero.clearSignBit();
+ } else if (LHS.isNonNegative() && RHS.isNegative()) {
+ // Pos - Neg -> Pos
+ Res.One.clearSignBit();
+ Res.Zero.setSignBit();
+ }
+ }
+ } else {
+ // Add: Leading ones of either operand are preserved.
+ // Sub: Leading zeros of LHS and leading ones of RHS are preserved
+ // as leading zeros in the result.
+ unsigned LeadingKnown;
+ if (Add)
+ LeadingKnown =
+ std::max(LHS.countMinLeadingOnes(), RHS.countMinLeadingOnes());
+ else
+ LeadingKnown =
+ std::max(LHS.countMinLeadingZeros(), RHS.countMinLeadingOnes());
+
+ // We select between the operation result and all-ones/zero
+ // respectively, so we can preserve known ones/zeros.
+ APInt Mask = APInt::getHighBitsSet(BitWidth, LeadingKnown);
+ if (Add) {
+ Res.One |= Mask;
+ Res.Zero &= ~Mask;
+ } else {
+ Res.Zero |= Mask;
+ Res.One &= ~Mask;
+ }
+ }
+
+ if (Overflow) {
+ // We know whether or not we overflowed.
+ if (!(*Overflow)) {
+ // No overflow.
+ assert(!Res.hasConflict() && "Bad Output");
+ return Res;
+ }
+
+ // We overflowed
+ APInt C;
+ if (Signed) {
+ // sadd.sat / ssub.sat
+ assert(SignBitKnown(LHS) &&
+ "We somehow know overflow without knowing input sign");
+ C = LHS.isNegative() ? APInt::getSignedMinValue(BitWidth)
+ : APInt::getSignedMaxValue(BitWidth);
+ } else if (Add) {
+ // uadd.sat
+ C = APInt::getMaxValue(BitWidth);
+ } else {
+ // uadd.sat
+ C = APInt::getMinValue(BitWidth);
+ }
+
+ Res.One = C;
+ Res.Zero = ~C;
+ assert(!Res.hasConflict() && "Bad Output");
+ return Res;
+ }
+
+ // We don't know if we overflowed.
+ if (Signed) {
+ // sadd.sat/ssub.sat
+ // We can keep our information about the sign bits.
+ Res.Zero.clearLowBits(BitWidth - 1);
+ Res.One.clearLowBits(BitWidth - 1);
+ } else if (Add) {
+ // uadd.sat
+ // We need to clear all the known zeros as we can only use the leading ones.
+ Res.Zero.clearAllBits();
+ } else {
+ // usub.sat
+ // We need to clear all the known ones as we can only use the leading zero.
+ Res.One.clearAllBits();
+ }
+
+ assert(!Res.hasConflict() && "Bad Output");
+ return Res;
+}
+
+KnownBits KnownBits::sadd_sat(const KnownBits &LHS, const KnownBits &RHS) {
+ return computeForSatAddSub(/*Add*/ true, /*Signed*/ true, LHS, RHS);
+}
+KnownBits KnownBits::ssub_sat(const KnownBits &LHS, const KnownBits &RHS) {
+ return computeForSatAddSub(/*Add*/ false, /*Signed*/ true, LHS, RHS);
+}
+KnownBits KnownBits::uadd_sat(const KnownBits &LHS, const KnownBits &RHS) {
+ return computeForSatAddSub(/*Add*/ true, /*Signed*/ false, LHS, RHS);
+}
+KnownBits KnownBits::usub_sat(const KnownBits &LHS, const KnownBits &RHS) {
+ return computeForSatAddSub(/*Add*/ false, /*Signed*/ false, LHS, RHS);
+}
+
KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
bool NoUndefSelfMultiply) {
unsigned BitWidth = LHS.getBitWidth();
@@ -432,7 +652,7 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
// fit in the bitwidth (it must not overflow).
bool HasOverflow;
APInt UMaxResult = UMaxLHS.umul_ov(UMaxRHS, HasOverflow);
- unsigned LeadZ = HasOverflow ? 0 : UMaxResult.countLeadingZeros();
+ unsigned LeadZ = HasOverflow ? 0 : UMaxResult.countl_zero();
// The result of the bottom bits of an integer multiply can be
// inferred by looking at the bottom bits of both operands and
@@ -481,8 +701,8 @@ KnownBits KnownBits::mul(const KnownBits &LHS, const KnownBits &RHS,
// How many times we'd be able to divide each argument by 2 (shr by 1).
// This gives us the number of trailing zeros on the multiplication result.
- unsigned TrailBitsKnown0 = (LHS.Zero | LHS.One).countTrailingOnes();
- unsigned TrailBitsKnown1 = (RHS.Zero | RHS.One).countTrailingOnes();
+ unsigned TrailBitsKnown0 = (LHS.Zero | LHS.One).countr_one();
+ unsigned TrailBitsKnown1 = (RHS.Zero | RHS.One).countr_one();
unsigned TrailZero0 = LHS.countMinTrailingZeros();
unsigned TrailZero1 = RHS.countMinTrailingZeros();
unsigned TrailZ = TrailZero0 + TrailZero1;
@@ -528,34 +748,151 @@ KnownBits KnownBits::mulhu(const KnownBits &LHS, const KnownBits &RHS) {
return mul(WideLHS, WideRHS).extractBits(BitWidth, BitWidth);
}
-KnownBits KnownBits::udiv(const KnownBits &LHS, const KnownBits &RHS) {
+static KnownBits divComputeLowBit(KnownBits Known, const KnownBits &LHS,
+ const KnownBits &RHS, bool Exact) {
+
+ if (!Exact)
+ return Known;
+
+ // If LHS is Odd, the result is Odd no matter what.
+ // Odd / Odd -> Odd
+ // Odd / Even -> Impossible (because its exact division)
+ if (LHS.One[0])
+ Known.One.setBit(0);
+
+ int MinTZ =
+ (int)LHS.countMinTrailingZeros() - (int)RHS.countMaxTrailingZeros();
+ int MaxTZ =
+ (int)LHS.countMaxTrailingZeros() - (int)RHS.countMinTrailingZeros();
+ if (MinTZ >= 0) {
+ // Result has at least MinTZ trailing zeros.
+ Known.Zero.setLowBits(MinTZ);
+ if (MinTZ == MaxTZ) {
+ // Result has exactly MinTZ trailing zeros.
+ Known.One.setBit(MinTZ);
+ }
+ } else if (MaxTZ < 0) {
+ // Poison Result
+ Known.setAllZero();
+ }
+
+ // In the KnownBits exhaustive tests, we have poison inputs for exact values
+ // a LOT. If we have a conflict, just return all zeros.
+ if (Known.hasConflict())
+ Known.setAllZero();
+
+ return Known;
+}
+
+KnownBits KnownBits::sdiv(const KnownBits &LHS, const KnownBits &RHS,
+ bool Exact) {
+ // Equivalent of `udiv`. We must have caught this before it was folded.
+ if (LHS.isNonNegative() && RHS.isNonNegative())
+ return udiv(LHS, RHS, Exact);
+
+ unsigned BitWidth = LHS.getBitWidth();
+ assert(!LHS.hasConflict() && !RHS.hasConflict() && "Bad inputs");
+ KnownBits Known(BitWidth);
+
+ if (LHS.isZero() || RHS.isZero()) {
+ // Result is either known Zero or UB. Return Zero either way.
+ // Checking this earlier saves us a lot of special cases later on.
+ Known.setAllZero();
+ return Known;
+ }
+
+ std::optional<APInt> Res;
+ if (LHS.isNegative() && RHS.isNegative()) {
+ // Result non-negative.
+ APInt Denom = RHS.getSignedMaxValue();
+ APInt Num = LHS.getSignedMinValue();
+ // INT_MIN/-1 would be a poison result (impossible). Estimate the division
+ // as signed max (we will only set sign bit in the result).
+ Res = (Num.isMinSignedValue() && Denom.isAllOnes())
+ ? APInt::getSignedMaxValue(BitWidth)
+ : Num.sdiv(Denom);
+ } else if (LHS.isNegative() && RHS.isNonNegative()) {
+ // Result is negative if Exact OR -LHS u>= RHS.
+ if (Exact || (-LHS.getSignedMaxValue()).uge(RHS.getSignedMaxValue())) {
+ APInt Denom = RHS.getSignedMinValue();
+ APInt Num = LHS.getSignedMinValue();
+ Res = Denom.isZero() ? Num : Num.sdiv(Denom);
+ }
+ } else if (LHS.isStrictlyPositive() && RHS.isNegative()) {
+ // Result is negative if Exact OR LHS u>= -RHS.
+ if (Exact || LHS.getSignedMinValue().uge(-RHS.getSignedMinValue())) {
+ APInt Denom = RHS.getSignedMaxValue();
+ APInt Num = LHS.getSignedMaxValue();
+ Res = Num.sdiv(Denom);
+ }
+ }
+
+ if (Res) {
+ if (Res->isNonNegative()) {
+ unsigned LeadZ = Res->countLeadingZeros();
+ Known.Zero.setHighBits(LeadZ);
+ } else {
+ unsigned LeadO = Res->countLeadingOnes();
+ Known.One.setHighBits(LeadO);
+ }
+ }
+
+ Known = divComputeLowBit(Known, LHS, RHS, Exact);
+
+ assert(!Known.hasConflict() && "Bad Output");
+ return Known;
+}
+
+KnownBits KnownBits::udiv(const KnownBits &LHS, const KnownBits &RHS,
+ bool Exact) {
unsigned BitWidth = LHS.getBitWidth();
assert(!LHS.hasConflict() && !RHS.hasConflict());
KnownBits Known(BitWidth);
- // For the purposes of computing leading zeros we can conservatively
- // treat a udiv as a logical right shift by the power of 2 known to
- // be less than the denominator.
- unsigned LeadZ = LHS.countMinLeadingZeros();
- unsigned RHSMaxLeadingZeros = RHS.countMaxLeadingZeros();
+ if (LHS.isZero() || RHS.isZero()) {
+ // Result is either known Zero or UB. Return Zero either way.
+ // Checking this earlier saves us a lot of special cases later on.
+ Known.setAllZero();
+ return Known;
+ }
- if (RHSMaxLeadingZeros != BitWidth)
- LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSMaxLeadingZeros - 1);
+ // We can figure out the minimum number of upper zero bits by doing
+ // MaxNumerator / MinDenominator. If the Numerator gets smaller or Denominator
+ // gets larger, the number of upper zero bits increases.
+ APInt MinDenom = RHS.getMinValue();
+ APInt MaxNum = LHS.getMaxValue();
+ APInt MaxRes = MinDenom.isZero() ? MaxNum : MaxNum.udiv(MinDenom);
+
+ unsigned LeadZ = MaxRes.countLeadingZeros();
Known.Zero.setHighBits(LeadZ);
+ Known = divComputeLowBit(Known, LHS, RHS, Exact);
+
+ assert(!Known.hasConflict() && "Bad Output");
return Known;
}
-KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) {
+KnownBits KnownBits::remGetLowBits(const KnownBits &LHS, const KnownBits &RHS) {
unsigned BitWidth = LHS.getBitWidth();
+ if (!RHS.isZero() && RHS.Zero[0]) {
+ // rem X, Y where Y[0:N] is zero will preserve X[0:N] in the result.
+ unsigned RHSZeros = RHS.countMinTrailingZeros();
+ APInt Mask = APInt::getLowBitsSet(BitWidth, RHSZeros);
+ APInt OnesMask = LHS.One & Mask;
+ APInt ZerosMask = LHS.Zero & Mask;
+ return KnownBits(ZerosMask, OnesMask);
+ }
+ return KnownBits(BitWidth);
+}
+
+KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) {
assert(!LHS.hasConflict() && !RHS.hasConflict());
- KnownBits Known(BitWidth);
+ KnownBits Known = remGetLowBits(LHS, RHS);
if (RHS.isConstant() && RHS.getConstant().isPowerOf2()) {
- // The upper bits are all zero, the lower ones are unchanged.
- APInt LowBits = RHS.getConstant() - 1;
- Known.Zero = LHS.Zero | ~LowBits;
- Known.One = LHS.One & LowBits;
+ // NB: Low bits set in `remGetLowBits`.
+ APInt HighBits = ~(RHS.getConstant() - 1);
+ Known.Zero |= HighBits;
return Known;
}
@@ -568,16 +905,12 @@ KnownBits KnownBits::urem(const KnownBits &LHS, const KnownBits &RHS) {
}
KnownBits KnownBits::srem(const KnownBits &LHS, const KnownBits &RHS) {
- unsigned BitWidth = LHS.getBitWidth();
assert(!LHS.hasConflict() && !RHS.hasConflict());
- KnownBits Known(BitWidth);
+ KnownBits Known = remGetLowBits(LHS, RHS);
if (RHS.isConstant() && RHS.getConstant().isPowerOf2()) {
- // The low bits of the first operand are unchanged by the srem.
+ // NB: Low bits are set in `remGetLowBits`.
APInt LowBits = RHS.getConstant() - 1;
- Known.Zero = LHS.Zero & LowBits;
- Known.One = LHS.One & LowBits;
-
// If the first operand is non-negative or has all low bits zero, then
// the upper bits are all zero.
if (LHS.isNonNegative() || LowBits.isSubsetOf(LHS.Zero))
@@ -623,8 +956,40 @@ KnownBits &KnownBits::operator^=(const KnownBits &RHS) {
return *this;
}
+KnownBits KnownBits::blsi() const {
+ unsigned BitWidth = getBitWidth();
+ KnownBits Known(Zero, APInt(BitWidth, 0));
+ unsigned Max = countMaxTrailingZeros();
+ Known.Zero.setBitsFrom(std::min(Max + 1, BitWidth));
+ unsigned Min = countMinTrailingZeros();
+ if (Max == Min && Max < BitWidth)
+ Known.One.setBit(Max);
+ return Known;
+}
+
+KnownBits KnownBits::blsmsk() const {
+ unsigned BitWidth = getBitWidth();
+ KnownBits Known(BitWidth);
+ unsigned Max = countMaxTrailingZeros();
+ Known.Zero.setBitsFrom(std::min(Max + 1, BitWidth));
+ unsigned Min = countMinTrailingZeros();
+ Known.One.setLowBits(std::min(Min + 1, BitWidth));
+ return Known;
+}
+
void KnownBits::print(raw_ostream &OS) const {
- OS << "{Zero=" << Zero << ", One=" << One << "}";
+ unsigned BitWidth = getBitWidth();
+ for (unsigned I = 0; I < BitWidth; ++I) {
+ unsigned N = BitWidth - I - 1;
+ if (Zero[N] && One[N])
+ OS << "!";
+ else if (Zero[N])
+ OS << "0";
+ else if (One[N])
+ OS << "1";
+ else
+ OS << "?";
+ }
}
void KnownBits::dump() const {
print(dbgs());
diff --git a/llvm/lib/Support/LowLevelType.cpp b/llvm/lib/Support/LowLevelType.cpp
deleted file mode 100644
index 0282cd9bd79e..000000000000
--- a/llvm/lib/Support/LowLevelType.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//===-- llvm/Support/LowLevelType.cpp -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file This file implements the more header-heavy bits of the LLT class to
-/// avoid polluting users' namespaces.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-LLT::LLT(MVT VT) {
- if (VT.isVector()) {
- bool asVector = VT.getVectorMinNumElements() > 1;
- init(/*IsPointer=*/false, asVector, /*IsScalar=*/!asVector,
- VT.getVectorElementCount(), VT.getVectorElementType().getSizeInBits(),
- /*AddressSpace=*/0);
- } else if (VT.isValid()) {
- // Aggregates are no different from real scalars as far as GlobalISel is
- // concerned.
- init(/*IsPointer=*/false, /*IsVector=*/false, /*IsScalar=*/true,
- ElementCount::getFixed(0), VT.getSizeInBits(), /*AddressSpace=*/0);
- } else {
- IsScalar = false;
- IsPointer = false;
- IsVector = false;
- RawData = 0;
- }
-}
-
-void LLT::print(raw_ostream &OS) const {
- if (isVector()) {
- OS << "<";
- OS << getElementCount() << " x " << getElementType() << ">";
- } else if (isPointer())
- OS << "p" << getAddressSpace();
- else if (isValid()) {
- assert(isScalar() && "unexpected type");
- OS << "s" << getScalarSizeInBits();
- } else
- OS << "LLT_invalid";
-}
-
-const constexpr LLT::BitFieldInfo LLT::ScalarSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerAddressSpaceFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::VectorElementsFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::VectorScalableFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::VectorSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerVectorElementsFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerVectorScalableFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerVectorSizeFieldInfo;
-const constexpr LLT::BitFieldInfo LLT::PointerVectorAddressSpaceFieldInfo;
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 0bb11725d2fc..4cc4fe019b75 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Config/config.h"
#include "llvm/Support/Alignment.h"
@@ -22,6 +23,7 @@
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/SmallVectorMemoryBuffer.h"
+#include <algorithm>
#include <cassert>
#include <cstring>
#include <new>
@@ -132,10 +134,13 @@ MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) {
static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
getMemBufferCopyImpl(StringRef InputData, const Twine &BufferName) {
- auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName);
+ auto Buf =
+ WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName);
if (!Buf)
return make_error_code(errc::not_enough_memory);
- memcpy(Buf->getBufferStart(), InputData.data(), InputData.size());
+ // Calling memcpy with null src/dst is UB, and an empty StringRef is
+ // represented with {nullptr, 0}.
+ llvm::copy(InputData, Buf->getBufferStart());
return std::move(Buf);
}
diff --git a/llvm/lib/Support/NativeFormatting.cpp b/llvm/lib/Support/NativeFormatting.cpp
index 6e8137c405b8..3b9273e1eaad 100644
--- a/llvm/lib/Support/NativeFormatting.cpp
+++ b/llvm/lib/Support/NativeFormatting.cpp
@@ -58,10 +58,7 @@ static void write_unsigned_impl(raw_ostream &S, T N, size_t MinDigits,
static_assert(std::is_unsigned_v<T>, "Value is not unsigned!");
char NumberBuffer[128];
- std::memset(NumberBuffer, '0', sizeof(NumberBuffer));
-
- size_t Len = 0;
- Len = format_to_buffer(N, NumberBuffer);
+ size_t Len = format_to_buffer(N, NumberBuffer);
if (IsNegative)
S << '-';
diff --git a/llvm/lib/Support/PGOOptions.cpp b/llvm/lib/Support/PGOOptions.cpp
new file mode 100644
index 000000000000..04d50cc70d91
--- /dev/null
+++ b/llvm/lib/Support/PGOOptions.cpp
@@ -0,0 +1,58 @@
+//===------ PGOOptions.cpp -- PGO option tunables --------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/PGOOptions.h"
+#include "llvm/Support/VirtualFileSystem.h"
+
+using namespace llvm;
+
+PGOOptions::PGOOptions(std::string ProfileFile, std::string CSProfileGenFile,
+ std::string ProfileRemappingFile,
+ std::string MemoryProfile,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS, PGOAction Action,
+ CSPGOAction CSAction, bool DebugInfoForProfiling,
+ bool PseudoProbeForProfiling)
+ : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
+ ProfileRemappingFile(ProfileRemappingFile), MemoryProfile(MemoryProfile),
+ Action(Action), CSAction(CSAction),
+ DebugInfoForProfiling(DebugInfoForProfiling ||
+ (Action == SampleUse && !PseudoProbeForProfiling)),
+ PseudoProbeForProfiling(PseudoProbeForProfiling), FS(std::move(FS)) {
+ // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
+ // callback with IRUse action without ProfileFile.
+
+ // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
+ assert(this->CSAction == NoCSAction ||
+ (this->Action != IRInstr && this->Action != SampleUse));
+
+ // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
+ assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
+
+ // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
+ // a profile.
+ assert(this->CSAction != CSIRUse || this->Action == IRUse);
+
+ // Cannot optimize with MemProf profile during IR instrumentation.
+ assert(this->MemoryProfile.empty() || this->Action != PGOOptions::IRInstr);
+
+ // If neither Action nor CSAction nor MemoryProfile are set,
+ // DebugInfoForProfiling or PseudoProbeForProfiling needs to be true.
+ assert(this->Action != NoAction || this->CSAction != NoCSAction ||
+ !this->MemoryProfile.empty() || this->DebugInfoForProfiling ||
+ this->PseudoProbeForProfiling);
+
+ // If we need to use the profile, the VFS cannot be nullptr.
+ assert(this->FS || !(this->Action == IRUse || this->CSAction == CSIRUse ||
+ !this->MemoryProfile.empty()));
+}
+
+PGOOptions::PGOOptions(const PGOOptions &) = default;
+
+PGOOptions &PGOOptions::operator=(const PGOOptions &O) = default;
+
+PGOOptions::~PGOOptions() = default;
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 23ed9d813548..9b14b05b5211 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -12,8 +12,8 @@
#include "llvm/Support/Threading.h"
#include <atomic>
+#include <deque>
#include <future>
-#include <stack>
#include <thread>
#include <vector>
@@ -24,11 +24,11 @@ namespace parallel {
#if LLVM_ENABLE_THREADS
#ifdef _WIN32
-static thread_local unsigned threadIndex;
+static thread_local unsigned threadIndex = UINT_MAX;
-unsigned getThreadIndex() { return threadIndex; }
+unsigned getThreadIndex() { GET_THREAD_INDEX_IMPL; }
#else
-thread_local unsigned threadIndex;
+thread_local unsigned threadIndex = UINT_MAX;
#endif
namespace detail {
@@ -39,7 +39,8 @@ namespace {
class Executor {
public:
virtual ~Executor() = default;
- virtual void add(std::function<void()> func) = 0;
+ virtual void add(std::function<void()> func, bool Sequential = false) = 0;
+ virtual size_t getThreadCount() const = 0;
static Executor *getDefaultExecutor();
};
@@ -49,13 +50,16 @@ public:
class ThreadPoolExecutor : public Executor {
public:
explicit ThreadPoolExecutor(ThreadPoolStrategy S = hardware_concurrency()) {
- unsigned ThreadCount = S.compute_thread_count();
+ ThreadCount = S.compute_thread_count();
// Spawn all but one of the threads in another thread as spawning threads
// can take a while.
Threads.reserve(ThreadCount);
Threads.resize(1);
std::lock_guard<std::mutex> Lock(Mutex);
- Threads[0] = std::thread([this, ThreadCount, S] {
+ // Use operator[] before creating the thread to avoid data race in .size()
+ // in “safe libc++” mode.
+ auto &Thread0 = Threads[0];
+ Thread0 = std::thread([this, S] {
for (unsigned I = 1; I < ThreadCount; ++I) {
Threads.emplace_back([=] { work(S, I); });
if (Stop)
@@ -94,36 +98,61 @@ public:
static void call(void *Ptr) { ((ThreadPoolExecutor *)Ptr)->stop(); }
};
- void add(std::function<void()> F) override {
+ void add(std::function<void()> F, bool Sequential = false) override {
{
std::lock_guard<std::mutex> Lock(Mutex);
- WorkStack.push(std::move(F));
+ if (Sequential)
+ WorkQueueSequential.emplace_front(std::move(F));
+ else
+ WorkQueue.emplace_back(std::move(F));
}
Cond.notify_one();
}
+ size_t getThreadCount() const override { return ThreadCount; }
+
private:
+ bool hasSequentialTasks() const {
+ return !WorkQueueSequential.empty() && !SequentialQueueIsLocked;
+ }
+
+ bool hasGeneralTasks() const { return !WorkQueue.empty(); }
+
void work(ThreadPoolStrategy S, unsigned ThreadID) {
threadIndex = ThreadID;
S.apply_thread_strategy(ThreadID);
while (true) {
std::unique_lock<std::mutex> Lock(Mutex);
- Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); });
+ Cond.wait(Lock, [&] {
+ return Stop || hasGeneralTasks() || hasSequentialTasks();
+ });
if (Stop)
break;
- auto Task = std::move(WorkStack.top());
- WorkStack.pop();
+ bool Sequential = hasSequentialTasks();
+ if (Sequential)
+ SequentialQueueIsLocked = true;
+ else
+ assert(hasGeneralTasks());
+
+ auto &Queue = Sequential ? WorkQueueSequential : WorkQueue;
+ auto Task = std::move(Queue.back());
+ Queue.pop_back();
Lock.unlock();
Task();
+ if (Sequential)
+ SequentialQueueIsLocked = false;
}
}
std::atomic<bool> Stop{false};
- std::stack<std::function<void()>> WorkStack;
+ std::atomic<bool> SequentialQueueIsLocked{false};
+ std::deque<std::function<void()>> WorkQueue;
+ std::deque<std::function<void()>> WorkQueueSequential;
std::mutex Mutex;
std::condition_variable Cond;
std::promise<void> ThreadsCreated;
std::vector<std::thread> Threads;
+ unsigned ThreadCount;
};
Executor *Executor::getDefaultExecutor() {
@@ -153,54 +182,53 @@ Executor *Executor::getDefaultExecutor() {
}
} // namespace
} // namespace detail
-#endif
-static std::atomic<int> TaskGroupInstances;
+size_t getThreadCount() {
+ return detail::Executor::getDefaultExecutor()->getThreadCount();
+}
+#endif
// Latch::sync() called by the dtor may cause one thread to block. If is a dead
// lock if all threads in the default executor are blocked. To prevent the dead
-// lock, only allow the first TaskGroup to run tasks parallelly. In the scenario
+// lock, only allow the root TaskGroup to run tasks parallelly. In the scenario
// of nested parallel_for_each(), only the outermost one runs parallelly.
-TaskGroup::TaskGroup() : Parallel(TaskGroupInstances++ == 0) {}
+TaskGroup::TaskGroup()
+#if LLVM_ENABLE_THREADS
+ : Parallel((parallel::strategy.ThreadsRequested != 1) &&
+ (threadIndex == UINT_MAX)) {}
+#else
+ : Parallel(false) {}
+#endif
TaskGroup::~TaskGroup() {
// We must ensure that all the workloads have finished before decrementing the
// instances count.
L.sync();
- --TaskGroupInstances;
}
-void TaskGroup::spawn(std::function<void()> F) {
+void TaskGroup::spawn(std::function<void()> F, bool Sequential) {
#if LLVM_ENABLE_THREADS
if (Parallel) {
L.inc();
- detail::Executor::getDefaultExecutor()->add([&, F = std::move(F)] {
- F();
- L.dec();
- });
+ detail::Executor::getDefaultExecutor()->add(
+ [&, F = std::move(F)] {
+ F();
+ L.dec();
+ },
+ Sequential);
return;
}
#endif
F();
}
-void TaskGroup::execute(std::function<void()> F) {
- if (parallel::strategy.ThreadsRequested == 1)
- F();
- else
- spawn(F);
-}
} // namespace parallel
} // namespace llvm
void llvm::parallelFor(size_t Begin, size_t End,
llvm::function_ref<void(size_t)> Fn) {
- // If we have zero or one items, then do not incur the overhead of spinning up
- // a task group. They are surprisingly expensive, and because they do not
- // support nested parallelism, a single entry task group can block parallel
- // execution underneath them.
#if LLVM_ENABLE_THREADS
- auto NumItems = End - Begin;
- if (NumItems > 1 && parallel::strategy.ThreadsRequested != 1) {
+ if (parallel::strategy.ThreadsRequested != 1) {
+ auto NumItems = End - Begin;
// Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
// overhead on large inputs.
auto TaskSize = NumItems / parallel::detail::MaxTasksPerGroup;
@@ -214,8 +242,12 @@ void llvm::parallelFor(size_t Begin, size_t End,
Fn(I);
});
}
- for (; Begin != End; ++Begin)
- Fn(Begin);
+ if (Begin != End) {
+ TG.spawn([=, &Fn] {
+ for (size_t I = Begin; I != End; ++I)
+ Fn(I);
+ });
+ }
return;
}
#endif
diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp
index 152d902f52e6..7a57c104ef10 100644
--- a/llvm/lib/Support/Path.cpp
+++ b/llvm/lib/Support/Path.cpp
@@ -13,6 +13,7 @@
#include "llvm/Support/Path.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Endian.h"
@@ -22,6 +23,7 @@
#include "llvm/Support/Process.h"
#include "llvm/Support/Signals.h"
#include <cctype>
+#include <cerrno>
#if !defined(_MSC_VER) && !defined(__MINGW32__)
#include <unistd.h>
@@ -1202,18 +1204,10 @@ Error readNativeFileToEOF(file_t FileHandle, SmallVectorImpl<char> &Buffer,
#include "Windows/Path.inc"
#endif
-bool IsLLVMDriver = false;
-
namespace llvm {
namespace sys {
namespace fs {
-std::string getMainExecutable(const char *Argv0, void *MainAddr) {
- if (IsLLVMDriver)
- return sys::path::stem(Argv0).str();
- return getMainExecutableImpl(Argv0, MainAddr);
-}
-
TempFile::TempFile(StringRef Name, int FD)
: TmpName(std::string(Name)), FD(FD) {}
TempFile::TempFile(TempFile &&Other) { *this = std::move(Other); }
diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp
index fa91405fee10..f9f1b8a419b8 100644
--- a/llvm/lib/Support/PrettyStackTrace.cpp
+++ b/llvm/lib/Support/PrettyStackTrace.cpp
@@ -64,8 +64,7 @@ static LLVM_THREAD_LOCAL PrettyStackTraceEntry *PrettyStackTraceHead = nullptr;
// the current thread". If the user happens to overflow an 'unsigned' with
// SIGINFO requests, it's possible that some threads will stop responding to it,
// but the program won't crash.
-static volatile std::atomic<unsigned> GlobalSigInfoGenerationCounter =
- ATOMIC_VAR_INIT(1);
+static volatile std::atomic<unsigned> GlobalSigInfoGenerationCounter = 1;
static LLVM_THREAD_LOCAL unsigned ThreadLocalSigInfoGenerationCounter = 0;
namespace llvm {
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 1b1bff023d2f..70fab8010831 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVISAInfo.cpp - RISCV Arch String Parser -------------*- C++ -*-===//
+//===-- RISCVISAInfo.cpp - RISC-V Arch String Parser ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -16,6 +16,7 @@
#include "llvm/Support/raw_ostream.h"
#include <array>
+#include <atomic>
#include <optional>
#include <string>
#include <vector>
@@ -33,107 +34,189 @@ struct RISCVSupportedExtension {
const char *Name;
/// Supported version.
RISCVExtensionVersion Version;
+
+ bool operator<(const RISCVSupportedExtension &RHS) const {
+ return StringRef(Name) < StringRef(RHS.Name);
+ }
};
} // end anonymous namespace
static constexpr StringLiteral AllStdExts = "mafdqlcbkjtpvnh";
+static const char *RISCVGImplications[] = {
+ "i", "m", "a", "f", "d", "zicsr", "zifencei"
+};
+
+// NOTE: This table should be sorted alphabetically by extension name.
static const RISCVSupportedExtension SupportedExtensions[] = {
- {"i", RISCVExtensionVersion{2, 0}},
- {"e", RISCVExtensionVersion{1, 9}},
- {"m", RISCVExtensionVersion{2, 0}},
- {"a", RISCVExtensionVersion{2, 0}},
- {"f", RISCVExtensionVersion{2, 0}},
- {"d", RISCVExtensionVersion{2, 0}},
+ {"a", RISCVExtensionVersion{2, 1}},
{"c", RISCVExtensionVersion{2, 0}},
-
+ {"d", RISCVExtensionVersion{2, 2}},
+ {"e", RISCVExtensionVersion{2, 0}},
+ {"f", RISCVExtensionVersion{2, 2}},
{"h", RISCVExtensionVersion{1, 0}},
+ {"i", RISCVExtensionVersion{2, 1}},
+ {"m", RISCVExtensionVersion{2, 0}},
- {"zihintpause", RISCVExtensionVersion{2, 0}},
+ {"svinval", RISCVExtensionVersion{1, 0}},
+ {"svnapot", RISCVExtensionVersion{1, 0}},
+ {"svpbmt", RISCVExtensionVersion{1, 0}},
- {"zfhmin", RISCVExtensionVersion{1, 0}},
- {"zfh", RISCVExtensionVersion{1, 0}},
+ {"v", RISCVExtensionVersion{1, 0}},
- {"zfinx", RISCVExtensionVersion{1, 0}},
- {"zdinx", RISCVExtensionVersion{1, 0}},
- {"zhinxmin", RISCVExtensionVersion{1, 0}},
- {"zhinx", RISCVExtensionVersion{1, 0}},
+ // vendor-defined ('X') extensions
+ {"xcvbitmanip", RISCVExtensionVersion{1, 0}},
+ {"xcvmac", RISCVExtensionVersion{1, 0}},
+ {"xsfcie", RISCVExtensionVersion{1, 0}},
+ {"xsfvcp", RISCVExtensionVersion{1, 0}},
+ {"xtheadba", RISCVExtensionVersion{1, 0}},
+ {"xtheadbb", RISCVExtensionVersion{1, 0}},
+ {"xtheadbs", RISCVExtensionVersion{1, 0}},
+ {"xtheadcmo", RISCVExtensionVersion{1, 0}},
+ {"xtheadcondmov", RISCVExtensionVersion{1, 0}},
+ {"xtheadfmemidx", RISCVExtensionVersion{1, 0}},
+ {"xtheadmac", RISCVExtensionVersion{1, 0}},
+ {"xtheadmemidx", RISCVExtensionVersion{1, 0}},
+ {"xtheadmempair", RISCVExtensionVersion{1, 0}},
+ {"xtheadsync", RISCVExtensionVersion{1, 0}},
+ {"xtheadvdot", RISCVExtensionVersion{1, 0}},
+ {"xventanacondops", RISCVExtensionVersion{1, 0}},
+
+ {"zawrs", RISCVExtensionVersion{1, 0}},
{"zba", RISCVExtensionVersion{1, 0}},
{"zbb", RISCVExtensionVersion{1, 0}},
{"zbc", RISCVExtensionVersion{1, 0}},
- {"zbs", RISCVExtensionVersion{1, 0}},
-
{"zbkb", RISCVExtensionVersion{1, 0}},
{"zbkc", RISCVExtensionVersion{1, 0}},
{"zbkx", RISCVExtensionVersion{1, 0}},
+ {"zbs", RISCVExtensionVersion{1, 0}},
+
+ {"zca", RISCVExtensionVersion{1, 0}},
+ {"zcb", RISCVExtensionVersion{1, 0}},
+ {"zcd", RISCVExtensionVersion{1, 0}},
+ {"zce", RISCVExtensionVersion{1, 0}},
+ {"zcf", RISCVExtensionVersion{1, 0}},
+ {"zcmp", RISCVExtensionVersion{1, 0}},
+ {"zcmt", RISCVExtensionVersion{1, 0}},
+
+ {"zdinx", RISCVExtensionVersion{1, 0}},
+
+ {"zfh", RISCVExtensionVersion{1, 0}},
+ {"zfhmin", RISCVExtensionVersion{1, 0}},
+ {"zfinx", RISCVExtensionVersion{1, 0}},
+
+ {"zhinx", RISCVExtensionVersion{1, 0}},
+ {"zhinxmin", RISCVExtensionVersion{1, 0}},
+
+ {"zicbom", RISCVExtensionVersion{1, 0}},
+ {"zicbop", RISCVExtensionVersion{1, 0}},
+ {"zicboz", RISCVExtensionVersion{1, 0}},
+ {"zicntr", RISCVExtensionVersion{1, 0}},
+ {"zicsr", RISCVExtensionVersion{2, 0}},
+ {"zifencei", RISCVExtensionVersion{2, 0}},
+ {"zihintpause", RISCVExtensionVersion{2, 0}},
+ {"zihpm", RISCVExtensionVersion{1, 0}},
+
+ {"zk", RISCVExtensionVersion{1, 0}},
+ {"zkn", RISCVExtensionVersion{1, 0}},
{"zknd", RISCVExtensionVersion{1, 0}},
{"zkne", RISCVExtensionVersion{1, 0}},
{"zknh", RISCVExtensionVersion{1, 0}},
- {"zksed", RISCVExtensionVersion{1, 0}},
- {"zksh", RISCVExtensionVersion{1, 0}},
{"zkr", RISCVExtensionVersion{1, 0}},
- {"zkn", RISCVExtensionVersion{1, 0}},
{"zks", RISCVExtensionVersion{1, 0}},
+ {"zksed", RISCVExtensionVersion{1, 0}},
+ {"zksh", RISCVExtensionVersion{1, 0}},
{"zkt", RISCVExtensionVersion{1, 0}},
- {"zk", RISCVExtensionVersion{1, 0}},
{"zmmul", RISCVExtensionVersion{1, 0}},
- {"v", RISCVExtensionVersion{1, 0}},
- {"zvl32b", RISCVExtensionVersion{1, 0}},
- {"zvl64b", RISCVExtensionVersion{1, 0}},
- {"zvl128b", RISCVExtensionVersion{1, 0}},
- {"zvl256b", RISCVExtensionVersion{1, 0}},
- {"zvl512b", RISCVExtensionVersion{1, 0}},
- {"zvl1024b", RISCVExtensionVersion{1, 0}},
- {"zvl2048b", RISCVExtensionVersion{1, 0}},
- {"zvl4096b", RISCVExtensionVersion{1, 0}},
- {"zvl8192b", RISCVExtensionVersion{1, 0}},
- {"zvl16384b", RISCVExtensionVersion{1, 0}},
- {"zvl32768b", RISCVExtensionVersion{1, 0}},
- {"zvl65536b", RISCVExtensionVersion{1, 0}},
- {"zve32x", RISCVExtensionVersion{1, 0}},
{"zve32f", RISCVExtensionVersion{1, 0}},
- {"zve64x", RISCVExtensionVersion{1, 0}},
- {"zve64f", RISCVExtensionVersion{1, 0}},
+ {"zve32x", RISCVExtensionVersion{1, 0}},
{"zve64d", RISCVExtensionVersion{1, 0}},
+ {"zve64f", RISCVExtensionVersion{1, 0}},
+ {"zve64x", RISCVExtensionVersion{1, 0}},
- {"zicbom", RISCVExtensionVersion{1, 0}},
- {"zicboz", RISCVExtensionVersion{1, 0}},
- {"zicbop", RISCVExtensionVersion{1, 0}},
+ {"zvfh", RISCVExtensionVersion{1, 0}},
- {"svnapot", RISCVExtensionVersion{1, 0}},
- {"svpbmt", RISCVExtensionVersion{1, 0}},
- {"svinval", RISCVExtensionVersion{1, 0}},
- {"xventanacondops", RISCVExtensionVersion{1, 0}},
- {"xtheadvdot", RISCVExtensionVersion{1, 0}},
+ {"zvl1024b", RISCVExtensionVersion{1, 0}},
+ {"zvl128b", RISCVExtensionVersion{1, 0}},
+ {"zvl16384b", RISCVExtensionVersion{1, 0}},
+ {"zvl2048b", RISCVExtensionVersion{1, 0}},
+ {"zvl256b", RISCVExtensionVersion{1, 0}},
+ {"zvl32768b", RISCVExtensionVersion{1, 0}},
+ {"zvl32b", RISCVExtensionVersion{1, 0}},
+ {"zvl4096b", RISCVExtensionVersion{1, 0}},
+ {"zvl512b", RISCVExtensionVersion{1, 0}},
+ {"zvl64b", RISCVExtensionVersion{1, 0}},
+ {"zvl65536b", RISCVExtensionVersion{1, 0}},
+ {"zvl8192b", RISCVExtensionVersion{1, 0}},
};
+// NOTE: This table should be sorted alphabetically by extension name.
static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
+ {"smaia", RISCVExtensionVersion{1, 0}},
+ {"ssaia", RISCVExtensionVersion{1, 0}},
+
+ {"zacas", RISCVExtensionVersion{1, 0}},
+
+ {"zfa", RISCVExtensionVersion{0, 2}},
+ {"zfbfmin", RISCVExtensionVersion{0, 6}},
+
+ {"zicond", RISCVExtensionVersion{1, 0}},
+
{"zihintntl", RISCVExtensionVersion{0, 2}},
- {"zca", RISCVExtensionVersion{0, 70}},
- {"zcd", RISCVExtensionVersion{0, 70}},
- {"zcf", RISCVExtensionVersion{0, 70}},
- {"zvfh", RISCVExtensionVersion{0, 1}},
- {"zawrs", RISCVExtensionVersion{1, 0}},
{"ztso", RISCVExtensionVersion{0, 1}},
+
+ {"zvbb", RISCVExtensionVersion{1, 0}},
+ {"zvbc", RISCVExtensionVersion{1, 0}},
+
+ {"zvfbfmin", RISCVExtensionVersion{0, 6}},
+ {"zvfbfwma", RISCVExtensionVersion{0, 6}},
+
+ // vector crypto
+ {"zvkg", RISCVExtensionVersion{1, 0}},
+ {"zvkn", RISCVExtensionVersion{1, 0}},
+ {"zvknc", RISCVExtensionVersion{1, 0}},
+ {"zvkned", RISCVExtensionVersion{1, 0}},
+ {"zvkng", RISCVExtensionVersion{1, 0}},
+ {"zvknha", RISCVExtensionVersion{1, 0}},
+ {"zvknhb", RISCVExtensionVersion{1, 0}},
+ {"zvks", RISCVExtensionVersion{1, 0}},
+ {"zvksc", RISCVExtensionVersion{1, 0}},
+ {"zvksed", RISCVExtensionVersion{1, 0}},
+ {"zvksg", RISCVExtensionVersion{1, 0}},
+ {"zvksh", RISCVExtensionVersion{1, 0}},
+ {"zvkt", RISCVExtensionVersion{1, 0}},
};
+static void verifyTables() {
+#ifndef NDEBUG
+ static std::atomic<bool> TableChecked(false);
+ if (!TableChecked.load(std::memory_order_relaxed)) {
+ assert(llvm::is_sorted(SupportedExtensions) &&
+ "Extensions are not sorted by name");
+ assert(llvm::is_sorted(SupportedExperimentalExtensions) &&
+ "Experimental extensions are not sorted by name");
+ TableChecked.store(true, std::memory_order_relaxed);
+ }
+#endif
+}
+
static bool stripExperimentalPrefix(StringRef &Ext) {
return Ext.consume_front("experimental-");
}
-// This function finds the first character that doesn't belong to a version
+// This function finds the last character that doesn't belong to a version
// (e.g. zba1p0 is extension 'zba' of version '1p0'). So the function will
// consume [0-9]*p[0-9]* starting from the backward. An extension name will not
// end with a digit or the letter 'p', so this function will parse correctly.
// NOTE: This function is NOT able to take empty strings or strings that only
// have version numbers and no extension name. It assumes the extension name
// will be at least more than one character.
-static size_t findFirstNonVersionCharacter(StringRef Ext) {
+static size_t findLastNonVersionCharacter(StringRef Ext) {
assert(!Ext.empty() &&
"Already guarded by if-statement in ::parseArchString");
@@ -149,11 +232,12 @@ static size_t findFirstNonVersionCharacter(StringRef Ext) {
}
namespace {
-struct FindByName {
- FindByName(StringRef Ext) : Ext(Ext){};
- StringRef Ext;
- bool operator()(const RISCVSupportedExtension &ExtInfo) {
- return ExtInfo.Name == Ext;
+struct LessExtName {
+ bool operator()(const RISCVSupportedExtension &LHS, StringRef RHS) {
+ return StringRef(LHS.Name) < RHS;
+ }
+ bool operator()(StringRef LHS, const RISCVSupportedExtension &RHS) {
+ return LHS < StringRef(RHS.Name);
}
};
} // namespace
@@ -164,12 +248,12 @@ findDefaultVersion(StringRef ExtName) {
// TODO: We might set default version based on profile or ISA spec.
for (auto &ExtInfo : {ArrayRef(SupportedExtensions),
ArrayRef(SupportedExperimentalExtensions)}) {
- auto ExtensionInfoIterator = llvm::find_if(ExtInfo, FindByName(ExtName));
+ auto I = llvm::lower_bound(ExtInfo, ExtName, LessExtName());
- if (ExtensionInfoIterator == ExtInfo.end()) {
+ if (I == ExtInfo.end() || I->Name != ExtName)
continue;
- }
- return ExtensionInfoIterator->Version;
+
+ return I->Version;
}
return std::nullopt;
}
@@ -177,15 +261,12 @@ findDefaultVersion(StringRef ExtName) {
void RISCVISAInfo::addExtension(StringRef ExtName, unsigned MajorVersion,
unsigned MinorVersion) {
RISCVExtensionInfo Ext;
- Ext.ExtName = ExtName.str();
Ext.MajorVersion = MajorVersion;
Ext.MinorVersion = MinorVersion;
Exts[ExtName.str()] = Ext;
}
static StringRef getExtensionTypeDesc(StringRef Ext) {
- if (Ext.startswith("sx"))
- return "non-standard supervisor-level extension";
if (Ext.startswith("s"))
return "standard supervisor-level extension";
if (Ext.startswith("x"))
@@ -196,8 +277,6 @@ static StringRef getExtensionTypeDesc(StringRef Ext) {
}
static StringRef getExtensionType(StringRef Ext) {
- if (Ext.startswith("sx"))
- return "sx";
if (Ext.startswith("s"))
return "s";
if (Ext.startswith("x"))
@@ -209,36 +288,50 @@ static StringRef getExtensionType(StringRef Ext) {
static std::optional<RISCVExtensionVersion>
isExperimentalExtension(StringRef Ext) {
- auto ExtIterator =
- llvm::find_if(SupportedExperimentalExtensions, FindByName(Ext));
- if (ExtIterator == std::end(SupportedExperimentalExtensions))
+ auto I =
+ llvm::lower_bound(SupportedExperimentalExtensions, Ext, LessExtName());
+ if (I == std::end(SupportedExperimentalExtensions) || I->Name != Ext)
return std::nullopt;
- return ExtIterator->Version;
+ return I->Version;
}
bool RISCVISAInfo::isSupportedExtensionFeature(StringRef Ext) {
bool IsExperimental = stripExperimentalPrefix(Ext);
- if (IsExperimental)
- return llvm::any_of(SupportedExperimentalExtensions, FindByName(Ext));
- else
- return llvm::any_of(SupportedExtensions, FindByName(Ext));
+ ArrayRef<RISCVSupportedExtension> ExtInfo =
+ IsExperimental ? ArrayRef(SupportedExperimentalExtensions)
+ : ArrayRef(SupportedExtensions);
+
+ auto I = llvm::lower_bound(ExtInfo, Ext, LessExtName());
+ return I != ExtInfo.end() && I->Name == Ext;
}
bool RISCVISAInfo::isSupportedExtension(StringRef Ext) {
- return llvm::any_of(SupportedExtensions, FindByName(Ext)) ||
- llvm::any_of(SupportedExperimentalExtensions, FindByName(Ext));
+ verifyTables();
+
+ for (auto ExtInfo : {ArrayRef(SupportedExtensions),
+ ArrayRef(SupportedExperimentalExtensions)}) {
+ auto I = llvm::lower_bound(ExtInfo, Ext, LessExtName());
+ if (I != ExtInfo.end() && I->Name == Ext)
+ return true;
+ }
+
+ return false;
}
bool RISCVISAInfo::isSupportedExtension(StringRef Ext, unsigned MajorVersion,
unsigned MinorVersion) {
- auto FindByNameAndVersion = [=](const RISCVSupportedExtension &ExtInfo) {
- return ExtInfo.Name == Ext && (MajorVersion == ExtInfo.Version.Major) &&
- (MinorVersion == ExtInfo.Version.Minor);
- };
- return llvm::any_of(SupportedExtensions, FindByNameAndVersion) ||
- llvm::any_of(SupportedExperimentalExtensions, FindByNameAndVersion);
+ for (auto ExtInfo : {ArrayRef(SupportedExtensions),
+ ArrayRef(SupportedExperimentalExtensions)}) {
+ auto Range =
+ std::equal_range(ExtInfo.begin(), ExtInfo.end(), Ext, LessExtName());
+ for (auto I = Range.first, E = Range.second; I != E; ++I)
+ if (I->Version.Major == MajorVersion && I->Version.Minor == MinorVersion)
+ return true;
+ }
+
+ return false;
}
bool RISCVISAInfo::hasExtension(StringRef Ext) const {
@@ -250,78 +343,71 @@ bool RISCVISAInfo::hasExtension(StringRef Ext) const {
return Exts.count(Ext.str()) != 0;
}
+// We rank extensions in the following order:
+// -Single letter extensions in canonical order.
+// -Unknown single letter extensions in alphabetical order.
+// -Multi-letter extensions starting with 'z' sorted by canonical order of
+// the second letter then sorted alphabetically.
+// -Multi-letter extensions starting with 's' in alphabetical order.
+// -(TODO) Multi-letter extensions starting with 'zxm' in alphabetical order.
+// -X extensions in alphabetical order.
+// These flags are used to indicate the category. The first 6 bits store the
+// single letter extension rank for single letter and multi-letter extensions
+// starting with 'z'.
+enum RankFlags {
+ RF_Z_EXTENSION = 1 << 6,
+ RF_S_EXTENSION = 1 << 7,
+ RF_X_EXTENSION = 1 << 8,
+};
+
// Get the rank for single-letter extension, lower value meaning higher
// priority.
-static int singleLetterExtensionRank(char Ext) {
+static unsigned singleLetterExtensionRank(char Ext) {
+ assert(Ext >= 'a' && Ext <= 'z');
switch (Ext) {
case 'i':
- return -2;
+ return 0;
case 'e':
- return -1;
- default:
- break;
+ return 1;
}
size_t Pos = AllStdExts.find(Ext);
- int Rank;
- if (Pos == StringRef::npos)
- // If we got an unknown extension letter, then give it an alphabetical
- // order, but after all known standard extensions.
- Rank = AllStdExts.size() + (Ext - 'a');
- else
- Rank = Pos;
+ if (Pos != StringRef::npos)
+ return Pos + 2; // Skip 'e' and 'i' from above.
- return Rank;
+ // If we got an unknown extension letter, then give it an alphabetical
+ // order, but after all known standard extensions.
+ return 2 + AllStdExts.size() + (Ext - 'a');
}
// Get the rank for multi-letter extension, lower value meaning higher
// priority/order in canonical order.
-static int multiLetterExtensionRank(const std::string &ExtName) {
- assert(ExtName.length() >= 2);
- int HighOrder;
- int LowOrder = 0;
- // The order between multi-char extensions: s -> h -> z -> x.
- char ExtClass = ExtName[0];
- switch (ExtClass) {
+static unsigned getExtensionRank(const std::string &ExtName) {
+ assert(ExtName.size() >= 1);
+ switch (ExtName[0]) {
case 's':
- HighOrder = 0;
- break;
+ return RF_S_EXTENSION;
case 'z':
- HighOrder = 1;
+ assert(ExtName.size() >= 2);
// `z` extension must be sorted by canonical order of second letter.
// e.g. zmx has higher rank than zax.
- LowOrder = singleLetterExtensionRank(ExtName[1]);
- break;
+ return RF_Z_EXTENSION | singleLetterExtensionRank(ExtName[1]);
case 'x':
- HighOrder = 2;
- break;
+ return RF_X_EXTENSION;
default:
- llvm_unreachable("Unknown prefix for multi-char extension");
- return -1;
+ assert(ExtName.size() == 1);
+ return singleLetterExtensionRank(ExtName[0]);
}
-
- return (HighOrder << 8) + LowOrder;
}
// Compare function for extension.
// Only compare the extension name, ignore version comparison.
bool RISCVISAInfo::compareExtension(const std::string &LHS,
const std::string &RHS) {
- size_t LHSLen = LHS.length();
- size_t RHSLen = RHS.length();
- if (LHSLen == 1 && RHSLen != 1)
- return true;
+ unsigned LHSRank = getExtensionRank(LHS);
+ unsigned RHSRank = getExtensionRank(RHS);
- if (LHSLen != 1 && RHSLen == 1)
- return false;
-
- if (LHSLen == 1 && RHSLen == 1)
- return singleLetterExtensionRank(LHS[0]) <
- singleLetterExtensionRank(RHS[0]);
-
- // Both are multi-char ext here.
- int LHSRank = multiLetterExtensionRank(LHS);
- int RHSRank = multiLetterExtensionRank(RHS);
+ // If the ranks differ, pick the lower rank.
if (LHSRank != RHSRank)
return LHSRank < RHSRank;
@@ -485,11 +571,12 @@ RISCVISAInfo::parseFeatures(unsigned XLen,
? ArrayRef(SupportedExperimentalExtensions)
: ArrayRef(SupportedExtensions);
auto ExtensionInfoIterator =
- llvm::find_if(ExtensionInfos, FindByName(ExtName));
+ llvm::lower_bound(ExtensionInfos, ExtName, LessExtName());
// Not all features is related to ISA extension, like `relax` or
// `save-restore`, skip those feature.
- if (ExtensionInfoIterator == ExtensionInfos.end())
+ if (ExtensionInfoIterator == ExtensionInfos.end() ||
+ ExtensionInfoIterator->Name != ExtName)
continue;
if (Add)
@@ -503,6 +590,67 @@ RISCVISAInfo::parseFeatures(unsigned XLen,
}
llvm::Expected<std::unique_ptr<RISCVISAInfo>>
+RISCVISAInfo::parseNormalizedArchString(StringRef Arch) {
+ if (llvm::any_of(Arch, isupper)) {
+ return createStringError(errc::invalid_argument,
+ "string must be lowercase");
+ }
+ // Must start with a valid base ISA name.
+ unsigned XLen;
+ if (Arch.startswith("rv32i") || Arch.startswith("rv32e"))
+ XLen = 32;
+ else if (Arch.startswith("rv64i") || Arch.startswith("rv64e"))
+ XLen = 64;
+ else
+ return createStringError(errc::invalid_argument,
+ "arch string must begin with valid base ISA");
+ std::unique_ptr<RISCVISAInfo> ISAInfo(new RISCVISAInfo(XLen));
+ // Discard rv32/rv64 prefix.
+ Arch = Arch.substr(4);
+
+ // Each extension is of the form ${name}${major_version}p${minor_version}
+ // and separated by _. Split by _ and then extract the name and version
+ // information for each extension.
+ SmallVector<StringRef, 8> Split;
+ Arch.split(Split, '_');
+ for (StringRef Ext : Split) {
+ StringRef Prefix, MinorVersionStr;
+ std::tie(Prefix, MinorVersionStr) = Ext.rsplit('p');
+ if (MinorVersionStr.empty())
+ return createStringError(errc::invalid_argument,
+ "extension lacks version in expected format");
+ unsigned MajorVersion, MinorVersion;
+ if (MinorVersionStr.getAsInteger(10, MinorVersion))
+ return createStringError(errc::invalid_argument,
+ "failed to parse minor version number");
+
+ // Split Prefix into the extension name and the major version number
+ // (the trailing digits of Prefix).
+ int TrailingDigits = 0;
+ StringRef ExtName = Prefix;
+ while (!ExtName.empty()) {
+ if (!isDigit(ExtName.back()))
+ break;
+ ExtName = ExtName.drop_back(1);
+ TrailingDigits++;
+ }
+ if (!TrailingDigits)
+ return createStringError(errc::invalid_argument,
+ "extension lacks version in expected format");
+
+ StringRef MajorVersionStr = Prefix.take_back(TrailingDigits);
+ if (MajorVersionStr.getAsInteger(10, MajorVersion))
+ return createStringError(errc::invalid_argument,
+ "failed to parse major version number");
+ ISAInfo->addExtension(ExtName, MajorVersion, MinorVersion);
+ }
+ ISAInfo->updateFLen();
+ ISAInfo->updateMinVLen();
+ ISAInfo->updateMaxELen();
+ return std::move(ISAInfo);
+}
+
+llvm::Expected<std::unique_ptr<RISCVISAInfo>>
RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
bool ExperimentalExtensionVersionCheck,
bool IgnoreUnknown) {
@@ -515,8 +663,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
bool HasRV64 = Arch.startswith("rv64");
// ISA string must begin with rv32 or rv64.
if (!(Arch.startswith("rv32") || HasRV64) || (Arch.size() < 5)) {
- return createStringError(errc::invalid_argument,
- "string must begin with rv32{i,e,g} or rv64{i,g}");
+ return createStringError(
+ errc::invalid_argument,
+ "string must begin with rv32{i,e,g} or rv64{i,e,g}");
}
unsigned XLen = HasRV64 ? 64 : 32;
@@ -532,27 +681,27 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
default:
return createStringError(errc::invalid_argument,
"first letter should be 'e', 'i' or 'g'");
- case 'e': {
- // Extension 'e' is not allowed in rv64.
- if (HasRV64)
- return createStringError(
- errc::invalid_argument,
- "standard user-level extension 'e' requires 'rv32'");
- break;
- }
+ case 'e':
case 'i':
break;
case 'g':
- // g = imafd
+ // g expands to extensions in RISCVGImplications.
+ if (Arch.size() > 5 && isDigit(Arch[5]))
+ return createStringError(errc::invalid_argument,
+ "version not supported for 'g'");
StdExts = StdExts.drop_front(4);
break;
}
+ if (Arch.back() == '_')
+ return createStringError(errc::invalid_argument,
+ "extension name missing after separator '_'");
+
// Skip rvxxx
StringRef Exts = Arch.substr(5);
// Remove multi-letter standard extensions, non-standard extensions and
- // supervisor-level extensions. They have 'z', 'x', 's', 'sx' prefixes.
+ // supervisor-level extensions. They have 'z', 'x', 's' prefixes.
// Parse them at the end.
// Find the very first occurrence of 's', 'x' or 'z'.
StringRef OtherExts;
@@ -563,36 +712,48 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
}
unsigned Major, Minor, ConsumeLength;
- if (auto E = getExtensionVersion(std::string(1, Baseline), Exts, Major, Minor,
- ConsumeLength, EnableExperimentalExtension,
- ExperimentalExtensionVersionCheck))
- return std::move(E);
-
if (Baseline == 'g') {
+ // Versions for g are disallowed, and this was checked for previously.
+ ConsumeLength = 0;
+
// No matter which version is given to `g`, we always set imafd to default
// version since the we don't have clear version scheme for that on
// ISA spec.
- for (const auto *Ext : {"i", "m", "a", "f", "d"})
+ for (const auto *Ext : RISCVGImplications) {
if (auto Version = findDefaultVersion(Ext))
ISAInfo->addExtension(Ext, Version->Major, Version->Minor);
else
llvm_unreachable("Default extension version not found?");
- } else
+ }
+ } else {
// Baseline is `i` or `e`
- ISAInfo->addExtension(std::string(1, Baseline), Major, Minor);
+ if (auto E = getExtensionVersion(
+ StringRef(&Baseline, 1), Exts, Major, Minor, ConsumeLength,
+ EnableExperimentalExtension, ExperimentalExtensionVersionCheck)) {
+ if (!IgnoreUnknown)
+ return std::move(E);
+ // If IgnoreUnknown, then ignore an unrecognised version of the baseline
+ // ISA and just use the default supported version.
+ consumeError(std::move(E));
+ auto Version = findDefaultVersion(StringRef(&Baseline, 1));
+ Major = Version->Major;
+ Minor = Version->Minor;
+ }
+
+ ISAInfo->addExtension(StringRef(&Baseline, 1), Major, Minor);
+ }
// Consume the base ISA version number and any '_' between rvxxx and the
// first extension
Exts = Exts.drop_front(ConsumeLength);
Exts.consume_front("_");
- // TODO: Use version number when setting target features
-
auto StdExtsItr = StdExts.begin();
auto StdExtsEnd = StdExts.end();
- auto GoToNextExt = [](StringRef::iterator &I, unsigned ConsumeLength) {
+ auto GoToNextExt = [](StringRef::iterator &I, unsigned ConsumeLength,
+ StringRef::iterator E) {
I += 1 + ConsumeLength;
- if (*I == '_')
+ if (I != E && *I == '_')
++I;
};
for (auto I = Exts.begin(), E = Exts.end(); I != E;) {
@@ -619,38 +780,37 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
// Move to next char to prevent repeated letter.
++StdExtsItr;
- std::string Next;
+ StringRef Next;
unsigned Major, Minor, ConsumeLength;
if (std::next(I) != E)
- Next = std::string(std::next(I), E);
- if (auto E = getExtensionVersion(std::string(1, C), Next, Major, Minor,
+ Next = StringRef(std::next(I), E - std::next(I));
+ if (auto E = getExtensionVersion(StringRef(&C, 1), Next, Major, Minor,
ConsumeLength, EnableExperimentalExtension,
ExperimentalExtensionVersionCheck)) {
if (IgnoreUnknown) {
consumeError(std::move(E));
- GoToNextExt(I, ConsumeLength);
+ GoToNextExt(I, ConsumeLength, Exts.end());
continue;
}
return std::move(E);
}
// The order is OK, then push it into features.
- // TODO: Use version number when setting target features
// Currently LLVM supports only "mafdcvh".
if (!isSupportedExtension(StringRef(&C, 1))) {
if (IgnoreUnknown) {
- GoToNextExt(I, ConsumeLength);
+ GoToNextExt(I, ConsumeLength, Exts.end());
continue;
}
return createStringError(errc::invalid_argument,
"unsupported standard user-level extension '%c'",
C);
}
- ISAInfo->addExtension(std::string(1, C), Major, Minor);
+ ISAInfo->addExtension(StringRef(&C, 1), Major, Minor);
// Consume full extension name and version, including any optional '_'
// between this extension and the next
- GoToNextExt(I, ConsumeLength);
+ GoToNextExt(I, ConsumeLength, Exts.end());
}
// Handle other types of extensions other than the standard
@@ -658,9 +818,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
// Parse the ISA string containing non-standard user-level
// extensions, standard supervisor-level extensions and
// non-standard supervisor-level extensions.
- // These extensions start with 'z', 'x', 's', 'sx' prefixes, follow a
- // canonical order, might have a version number (major, minor)
- // and are separated by a single underscore '_'.
+ // These extensions start with 'z', 's', 'x' prefixes, might have a version
+ // number (major, minor) and are separated by a single underscore '_'. We do
+ // not enforce a canonical order for them.
// Set the hardware features for the extensions that are supported.
// Multi-letter extensions are seperated by a single underscore
@@ -669,9 +829,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
OtherExts.split(Split, '_');
SmallVector<StringRef, 8> AllExts;
- std::array<StringRef, 4> Prefix{"z", "x", "s", "sx"};
- auto I = Prefix.begin();
- auto E = Prefix.end();
if (Split.size() > 1 || Split[0] != "") {
for (StringRef Ext : Split) {
if (Ext.empty())
@@ -680,7 +837,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
StringRef Type = getExtensionType(Ext);
StringRef Desc = getExtensionTypeDesc(Ext);
- auto Pos = findFirstNonVersionCharacter(Ext) + 1;
+ auto Pos = findLastNonVersionCharacter(Ext) + 1;
StringRef Name(Ext.substr(0, Pos));
StringRef Vers(Ext.substr(Pos));
@@ -691,18 +848,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
"invalid extension prefix '" + Ext + "'");
}
- // Check ISA extensions are specified in the canonical order.
- while (I != E && *I != Type)
- ++I;
-
- if (I == E) {
- if (IgnoreUnknown)
- continue;
- return createStringError(errc::invalid_argument,
- "%s not given in canonical order '%s'",
- Desc.str().c_str(), Ext.str().c_str());
- }
-
if (!IgnoreUnknown && Name.size() == Type.size()) {
return createStringError(errc::invalid_argument,
"%s name missing after '%s'",
@@ -726,6 +871,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
Desc.str().c_str(), Name.str().c_str());
}
+ if (IgnoreUnknown && !isSupportedExtension(Name))
+ continue;
+
ISAInfo->addExtension(Name, Major, Minor);
// Extension format is correct, keep parsing the extensions.
// TODO: Save Type, Name, Major, Minor to avoid parsing them later.
@@ -745,51 +893,55 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
}
Error RISCVISAInfo::checkDependency() {
- bool IsRv32 = XLen == 32;
- bool HasE = Exts.count("e") != 0;
- bool HasD = Exts.count("d") != 0;
+ bool HasC = Exts.count("c") != 0;
bool HasF = Exts.count("f") != 0;
bool HasZfinx = Exts.count("zfinx") != 0;
- bool HasZdinx = Exts.count("zdinx") != 0;
bool HasVector = Exts.count("zve32x") != 0;
- bool HasZve32f = Exts.count("zve32f") != 0;
- bool HasZve64d = Exts.count("zve64d") != 0;
bool HasZvl = MinVLen != 0;
+ bool HasZcmt = Exts.count("zcmt") != 0;
- if (HasE && !IsRv32)
+ if (HasF && HasZfinx)
+ return createStringError(errc::invalid_argument,
+ "'f' and 'zfinx' extensions are incompatible");
+
+ if (HasZvl && !HasVector)
return createStringError(
errc::invalid_argument,
- "standard user-level extension 'e' requires 'rv32'");
+ "'zvl*b' requires 'v' or 'zve*' extension to also be specified");
- // It's illegal to specify the 'd' (double-precision floating point)
- // extension without also specifying the 'f' (single precision
- // floating-point) extension.
- // TODO: This has been removed in later specs, which specify that D implies F
- if (HasD && !HasF)
- return createStringError(errc::invalid_argument,
- "d requires f extension to also be specified");
+ if (Exts.count("zvbb") && !HasVector)
+ return createStringError(
+ errc::invalid_argument,
+ "'zvbb' requires 'v' or 'zve*' extension to also be specified");
- if (HasZve32f && !HasF && !HasZfinx)
+ if (Exts.count("zvbc") && !Exts.count("zve64x"))
return createStringError(
errc::invalid_argument,
- "zve32f requires f or zfinx extension to also be specified");
+ "'zvbc' requires 'v' or 'zve64*' extension to also be specified");
- if (HasZve64d && !HasD && !HasZdinx)
+ if ((Exts.count("zvkg") || Exts.count("zvkned") || Exts.count("zvknha") ||
+ Exts.count("zvksed") || Exts.count("zvksh")) &&
+ !HasVector)
return createStringError(
errc::invalid_argument,
- "zve64d requires d or zdinx extension to also be specified");
+ "'zvk*' requires 'v' or 'zve*' extension to also be specified");
- if (Exts.count("zvfh") && !Exts.count("zfh") && !Exts.count("zfhmin") &&
- !Exts.count("zhinx") && !Exts.count("zhinxmin"))
+ if (Exts.count("zvknhb") && !Exts.count("zve64x"))
return createStringError(
errc::invalid_argument,
- "zvfh requires zfh, zfhmin, zhinx or zhinxmin extension to also be "
- "specified");
+ "'zvknhb' requires 'v' or 'zve64*' extension to also be specified");
- if (HasZvl && !HasVector)
+ if ((HasZcmt || Exts.count("zcmp")) && Exts.count("d") &&
+ (HasC || Exts.count("zcd")))
return createStringError(
errc::invalid_argument,
- "zvl*b requires v or zve* extension to also be specified");
+ Twine("'") + (HasZcmt ? "zcmt" : "zcmp") +
+ "' extension is incompatible with '" + (HasC ? "c" : "zcd") +
+ "' extension when 'd' extension is enabled");
+
+ if (XLen != 32 && Exts.count("zcf"))
+ return createStringError(errc::invalid_argument,
+ "'zcf' is only supported for 'rv32'");
// Additional dependency checks.
// TODO: The 'q' extension requires rv64.
@@ -798,34 +950,58 @@ Error RISCVISAInfo::checkDependency() {
return Error::success();
}
-static const char *ImpliedExtsV[] = {"zvl128b", "zve64d", "f", "d"};
-static const char *ImpliedExtsZfhmin[] = {"f"};
-static const char *ImpliedExtsZfh[] = {"f"};
+static const char *ImpliedExtsD[] = {"f"};
+static const char *ImpliedExtsF[] = {"zicsr"};
+static const char *ImpliedExtsV[] = {"zvl128b", "zve64d"};
+static const char *ImpliedExtsXTHeadVdot[] = {"v"};
+static const char *ImpliedExtsXsfvcp[] = {"zve32x"};
+static const char *ImpliedExtsZacas[] = {"a"};
+static const char *ImpliedExtsZcb[] = {"zca"};
+static const char *ImpliedExtsZcd[] = {"zca"};
+static const char *ImpliedExtsZce[] = {"zcb", "zcmp", "zcmt"};
+static const char *ImpliedExtsZcf[] = {"zca"};
+static const char *ImpliedExtsZcmp[] = {"zca"};
+static const char *ImpliedExtsZcmt[] = {"zca"};
static const char *ImpliedExtsZdinx[] = {"zfinx"};
-static const char *ImpliedExtsZhinxmin[] = {"zfinx"};
+static const char *ImpliedExtsZfa[] = {"f"};
+static const char *ImpliedExtsZfbfmin[] = {"f"};
+static const char *ImpliedExtsZfh[] = {"f"};
+static const char *ImpliedExtsZfhmin[] = {"f"};
+static const char *ImpliedExtsZfinx[] = {"zicsr"};
static const char *ImpliedExtsZhinx[] = {"zfinx"};
-static const char *ImpliedExtsZve64d[] = {"zve64f"};
+static const char *ImpliedExtsZhinxmin[] = {"zfinx"};
+static const char *ImpliedExtsZicntr[] = {"zicsr"};
+static const char *ImpliedExtsZihpm[] = {"zicsr"};
+static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"};
+static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx",
+ "zkne", "zknd", "zknh"};
+static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zbkx", "zksed", "zksh"};
+static const char *ImpliedExtsZve32f[] = {"zve32x", "f"};
+static const char *ImpliedExtsZve32x[] = {"zvl32b", "zicsr"};
+static const char *ImpliedExtsZve64d[] = {"zve64f", "d"};
static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"};
static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"};
-static const char *ImpliedExtsZve32f[] = {"zve32x"};
-static const char *ImpliedExtsZve32x[] = {"zvl32b"};
-static const char *ImpliedExtsZvl65536b[] = {"zvl32768b"};
-static const char *ImpliedExtsZvl32768b[] = {"zvl16384b"};
+static const char *ImpliedExtsZvfbfmin[] = {"zve32f"};
+static const char *ImpliedExtsZvfbfwma[] = {"zve32f"};
+static const char *ImpliedExtsZvfh[] = {"zve32f", "zfhmin"};
+static const char *ImpliedExtsZvkn[] = {"zvbb", "zvkned", "zvknhb", "zvkt"};
+static const char *ImpliedExtsZvknc[] = {"zvbc", "zvkn"};
+static const char *ImpliedExtsZvkng[] = {"zvkg", "zvkn"};
+static const char *ImpliedExtsZvknhb[] = {"zvknha"};
+static const char *ImpliedExtsZvks[] = {"zvbb", "zvksed", "zvksh", "zvkt"};
+static const char *ImpliedExtsZvksc[] = {"zvbc", "zvks"};
+static const char *ImpliedExtsZvksg[] = {"zvkg", "zvks"};
+static const char *ImpliedExtsZvl1024b[] = {"zvl512b"};
+static const char *ImpliedExtsZvl128b[] = {"zvl64b"};
static const char *ImpliedExtsZvl16384b[] = {"zvl8192b"};
-static const char *ImpliedExtsZvl8192b[] = {"zvl4096b"};
-static const char *ImpliedExtsZvl4096b[] = {"zvl2048b"};
static const char *ImpliedExtsZvl2048b[] = {"zvl1024b"};
-static const char *ImpliedExtsZvl1024b[] = {"zvl512b"};
-static const char *ImpliedExtsZvl512b[] = {"zvl256b"};
static const char *ImpliedExtsZvl256b[] = {"zvl128b"};
-static const char *ImpliedExtsZvl128b[] = {"zvl64b"};
+static const char *ImpliedExtsZvl32768b[] = {"zvl16384b"};
+static const char *ImpliedExtsZvl4096b[] = {"zvl2048b"};
+static const char *ImpliedExtsZvl512b[] = {"zvl256b"};
static const char *ImpliedExtsZvl64b[] = {"zvl32b"};
-static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"};
-static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx",
- "zkne", "zknd", "zknh"};
-static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zbkx", "zksed", "zksh"};
-static const char *ImpliedExtsZvfh[] = {"zve32f"};
-static const char *ImpliedExtsXTHeadVdot[] = {"v"};
+static const char *ImpliedExtsZvl65536b[] = {"zvl32768b"};
+static const char *ImpliedExtsZvl8192b[] = {"zvl4096b"};
struct ImpliedExtsEntry {
StringLiteral Name;
@@ -840,13 +1016,28 @@ struct ImpliedExtsEntry {
// Note: The table needs to be sorted by name.
static constexpr ImpliedExtsEntry ImpliedExts[] = {
+ {{"d"}, {ImpliedExtsD}},
+ {{"f"}, {ImpliedExtsF}},
{{"v"}, {ImpliedExtsV}},
+ {{"xsfvcp"}, {ImpliedExtsXsfvcp}},
{{"xtheadvdot"}, {ImpliedExtsXTHeadVdot}},
+ {{"zacas"}, {ImpliedExtsZacas}},
+ {{"zcb"}, {ImpliedExtsZcb}},
+ {{"zcd"}, {ImpliedExtsZcd}},
+ {{"zce"}, {ImpliedExtsZce}},
+ {{"zcf"}, {ImpliedExtsZcf}},
+ {{"zcmp"}, {ImpliedExtsZcmp}},
+ {{"zcmt"}, {ImpliedExtsZcmt}},
{{"zdinx"}, {ImpliedExtsZdinx}},
+ {{"zfa"}, {ImpliedExtsZfa}},
+ {{"zfbfmin"}, {ImpliedExtsZfbfmin}},
{{"zfh"}, {ImpliedExtsZfh}},
{{"zfhmin"}, {ImpliedExtsZfhmin}},
+ {{"zfinx"}, {ImpliedExtsZfinx}},
{{"zhinx"}, {ImpliedExtsZhinx}},
{{"zhinxmin"}, {ImpliedExtsZhinxmin}},
+ {{"zicntr"}, {ImpliedExtsZicntr}},
+ {{"zihpm"}, {ImpliedExtsZihpm}},
{{"zk"}, {ImpliedExtsZk}},
{{"zkn"}, {ImpliedExtsZkn}},
{{"zks"}, {ImpliedExtsZks}},
@@ -855,7 +1046,16 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = {
{{"zve64d"}, {ImpliedExtsZve64d}},
{{"zve64f"}, {ImpliedExtsZve64f}},
{{"zve64x"}, {ImpliedExtsZve64x}},
+ {{"zvfbfmin"}, {ImpliedExtsZvfbfmin}},
+ {{"zvfbfwma"}, {ImpliedExtsZvfbfwma}},
{{"zvfh"}, {ImpliedExtsZvfh}},
+ {{"zvkn"}, {ImpliedExtsZvkn}},
+ {{"zvknc"}, {ImpliedExtsZvknc}},
+ {{"zvkng"}, {ImpliedExtsZvkng}},
+ {{"zvknhb"}, {ImpliedExtsZvknhb}},
+ {{"zvks"}, {ImpliedExtsZvks}},
+ {{"zvksc"}, {ImpliedExtsZvksc}},
+ {{"zvksg"}, {ImpliedExtsZvksg}},
{{"zvl1024b"}, {ImpliedExtsZvl1024b}},
{{"zvl128b"}, {ImpliedExtsZvl128b}},
{{"zvl16384b"}, {ImpliedExtsZvl16384b}},
@@ -903,6 +1103,13 @@ void RISCVISAInfo::updateImplication() {
}
}
}
+
+ // Add Zcf if Zce and F are enabled on RV32.
+ if (XLen == 32 && Exts.count("zce") && Exts.count("f") &&
+ !Exts.count("zcf")) {
+ auto Version = findDefaultVersion("zcf");
+ addExtension("zcf", Version->Major, Version->Minor);
+ }
}
struct CombinedExtsEntry {
@@ -914,6 +1121,12 @@ static constexpr CombinedExtsEntry CombineIntoExts[] = {
{{"zk"}, {ImpliedExtsZk}},
{{"zkn"}, {ImpliedExtsZkn}},
{{"zks"}, {ImpliedExtsZks}},
+ {{"zvkn"}, {ImpliedExtsZvkn}},
+ {{"zvknc"}, {ImpliedExtsZvknc}},
+ {{"zvkng"}, {ImpliedExtsZvkng}},
+ {{"zvks"}, {ImpliedExtsZvks}},
+ {{"zvksc"}, {ImpliedExtsZvksc}},
+ {{"zvksg"}, {ImpliedExtsZvksg}},
};
void RISCVISAInfo::updateCombination() {
@@ -999,6 +1212,8 @@ std::vector<std::string> RISCVISAInfo::toFeatureVector() const {
std::string ExtName = Ext.first;
if (ExtName == "i") // i is not recognized in clang -cc1
continue;
+ if (!isSupportedExtension(ExtName))
+ continue;
std::string Feature = isExperimentalExtension(ExtName)
? "+experimental-" + ExtName
: "+" + ExtName;
@@ -1030,6 +1245,8 @@ StringRef RISCVISAInfo::computeDefaultABI() const {
} else if (XLen == 64) {
if (hasExtension("d"))
return "lp64d";
+ if (hasExtension("e"))
+ return "lp64e";
return "lp64";
}
llvm_unreachable("Invalid XLEN");
diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp
index 7a804a1a2297..dfbd373e4a98 100644
--- a/llvm/lib/Support/Regex.cpp
+++ b/llvm/lib/Support/Regex.cpp
@@ -14,14 +14,11 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
+#include "regex_impl.h"
+
#include <cassert>
#include <string>
-// Important this comes last because it defines "_REGEX_H_". At least on
-// Darwin, if included before any header that (transitively) includes
-// xlocale.h, this will cause trouble, because of missing regex-related types.
-#include "regex_impl.h"
-
using namespace llvm;
Regex::Regex() : preg(nullptr), error(REG_BADPAT) {}
diff --git a/llvm/lib/Support/ScaledNumber.cpp b/llvm/lib/Support/ScaledNumber.cpp
index 54d4cc33410b..85d7afbea5c6 100644
--- a/llvm/lib/Support/ScaledNumber.cpp
+++ b/llvm/lib/Support/ScaledNumber.cpp
@@ -44,7 +44,7 @@ std::pair<uint64_t, int16_t> ScaledNumbers::multiply64(uint64_t LHS,
return std::make_pair(Lower, 0);
// Shift as little as possible to maximize precision.
- unsigned LeadingZeros = countLeadingZeros(Upper);
+ unsigned LeadingZeros = llvm::countl_zero(Upper);
int Shift = 64 - LeadingZeros;
if (LeadingZeros)
Upper = Upper << LeadingZeros | Lower >> Shift;
@@ -62,7 +62,7 @@ std::pair<uint32_t, int16_t> ScaledNumbers::divide32(uint32_t Dividend,
// Use 64-bit math and canonicalize the dividend to gain precision.
uint64_t Dividend64 = Dividend;
int Shift = 0;
- if (int Zeros = countLeadingZeros(Dividend64)) {
+ if (int Zeros = llvm::countl_zero(Dividend64)) {
Shift -= Zeros;
Dividend64 <<= Zeros;
}
@@ -84,7 +84,7 @@ std::pair<uint64_t, int16_t> ScaledNumbers::divide64(uint64_t Dividend,
// Minimize size of divisor.
int Shift = 0;
- if (int Zeros = countTrailingZeros(Divisor)) {
+ if (int Zeros = llvm::countr_zero(Divisor)) {
Shift -= Zeros;
Divisor >>= Zeros;
}
@@ -94,7 +94,7 @@ std::pair<uint64_t, int16_t> ScaledNumbers::divide64(uint64_t Dividend,
return std::make_pair(Dividend, Shift);
// Maximize size of dividend.
- if (int Zeros = countLeadingZeros(Dividend)) {
+ if (int Zeros = llvm::countl_zero(Dividend)) {
Shift -= Zeros;
Dividend <<= Zeros;
}
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 0fb65accbf1d..64f66e0f8179 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -37,7 +37,6 @@ bool SpecialCaseList::Matcher::insert(std::string Regexp,
Strings[Regexp] = LineNumber;
return true;
}
- Trigrams.insert(Regexp);
// Replace * with .*
for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
@@ -61,8 +60,6 @@ unsigned SpecialCaseList::Matcher::match(StringRef Query) const {
auto It = Strings.find(Query);
if (It != Strings.end())
return It->second;
- if (Trigrams.isDefinitelyOut(Query))
- return false;
for (const auto &RegExKV : RegExes)
if (RegExKV.first->match(Query))
return RegExKV.second;
@@ -175,7 +172,7 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB,
StringRef Category = SplitRegexp.second;
// Create this section if it has not been seen before.
- if (SectionsMap.find(Section) == SectionsMap.end()) {
+ if (!SectionsMap.contains(Section)) {
std::unique_ptr<Matcher> M = std::make_unique<Matcher>();
std::string REError;
if (!M->insert(std::string(Section), LineNo, REError)) {
diff --git a/llvm/lib/Support/StringMap.cpp b/llvm/lib/Support/StringMap.cpp
index 9b2f96fca2cd..67c05a87959c 100644
--- a/llvm/lib/Support/StringMap.cpp
+++ b/llvm/lib/Support/StringMap.cpp
@@ -11,8 +11,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/DJB.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ReverseIteration.h"
+#include "llvm/Support/xxhash.h"
using namespace llvm;
@@ -84,7 +85,9 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
// Hash table unallocated so far?
if (NumBuckets == 0)
init(16);
- unsigned FullHashValue = djbHash(Name, 0);
+ unsigned FullHashValue = xxh3_64bits(Name);
+ if (shouldReverseIterate())
+ FullHashValue = ~FullHashValue;
unsigned BucketNo = FullHashValue & (NumBuckets - 1);
unsigned *HashTable = getHashTable(TheTable, NumBuckets);
@@ -139,7 +142,9 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
int StringMapImpl::FindKey(StringRef Key) const {
if (NumBuckets == 0)
return -1; // Really empty table?
- unsigned FullHashValue = djbHash(Key, 0);
+ unsigned FullHashValue = xxh3_64bits(Key);
+ if (shouldReverseIterate())
+ FullHashValue = ~FullHashValue;
unsigned BucketNo = FullHashValue & (NumBuckets - 1);
unsigned *HashTable = getHashTable(TheTable, NumBuckets);
diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp
index fb93940592c7..3cce83a982c4 100644
--- a/llvm/lib/Support/StringRef.cpp
+++ b/llvm/lib/Support/StringRef.cpp
@@ -191,7 +191,7 @@ size_t StringRef::find(StringRef Str, size_t From) const {
size_t StringRef::find_insensitive(StringRef Str, size_t From) const {
StringRef This = substr(From);
while (This.size() >= Str.size()) {
- if (This.startswith_insensitive(Str))
+ if (This.starts_with_insensitive(Str))
return From;
This = This.drop_front();
++From;
@@ -509,7 +509,7 @@ bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
return !Str.empty();
}
-bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
+bool StringRef::consumeInteger(unsigned Radix, APInt &Result) {
StringRef Str = *this;
// Autosense radix if not specified.
@@ -529,6 +529,7 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
// If it was nothing but zeroes....
if (Str.empty()) {
Result = APInt(64, 0);
+ *this = Str;
return false;
}
@@ -561,12 +562,12 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
else if (Str[0] >= 'A' && Str[0] <= 'Z')
CharVal = Str[0]-'A'+10;
else
- return true;
+ break;
// If the parsed value is larger than the integer radix, the string is
// invalid.
if (CharVal >= Radix)
- return true;
+ break;
// Add in this character.
if (IsPowerOf2Radix) {
@@ -581,9 +582,25 @@ bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
Str = Str.substr(1);
}
+ // We consider the operation a failure if no characters were consumed
+ // successfully.
+ if (size() == Str.size())
+ return true;
+
+ *this = Str;
return false;
}
+bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
+ StringRef Str = *this;
+ if (Str.consumeInteger(Radix, Result))
+ return true;
+
+ // For getAsInteger, we require the whole string to be consumed or else we
+ // consider it a failure.
+ return !Str.empty();
+}
+
bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
APFloat F(0.0);
auto StatusOrErr = F.convertFromString(*this, APFloat::rmNearestTiesToEven);
diff --git a/llvm/lib/Support/SuffixTree.cpp b/llvm/lib/Support/SuffixTree.cpp
index 0d419f12cd1d..eaa653078e09 100644
--- a/llvm/lib/Support/SuffixTree.cpp
+++ b/llvm/lib/Support/SuffixTree.cpp
@@ -12,12 +12,22 @@
#include "llvm/Support/SuffixTree.h"
#include "llvm/Support/Allocator.h"
-#include <vector>
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/SuffixTreeNode.h"
using namespace llvm;
-SuffixTree::SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
- Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0);
+/// \returns the number of elements in the substring associated with \p N.
+static size_t numElementsInSubstring(const SuffixTreeNode *N) {
+ assert(N && "Got a null node?");
+ if (auto *Internal = dyn_cast<SuffixTreeInternalNode>(N))
+ if (Internal->isRoot())
+ return 0;
+ return N->getEndIdx() - N->getStartIdx() + 1;
+}
+
+SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
+ Root = insertRoot();
Active.Node = Root;
// Keep track of the number of suffixes we have to add of the current
@@ -38,39 +48,38 @@ SuffixTree::SuffixTree(const std::vector<unsigned> &Str) : Str(Str) {
setSuffixIndices();
}
-SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeNode &Parent,
+SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeInternalNode &Parent,
unsigned StartIdx, unsigned Edge) {
-
assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
-
- SuffixTreeNode *N = new (NodeAllocator.Allocate())
- SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr);
+ auto *N = new (LeafNodeAllocator.Allocate())
+ SuffixTreeLeafNode(StartIdx, &LeafEndIdx);
Parent.Children[Edge] = N;
-
return N;
}
-SuffixTreeNode *SuffixTree::insertInternalNode(SuffixTreeNode *Parent,
- unsigned StartIdx,
- unsigned EndIdx, unsigned Edge) {
-
+SuffixTreeInternalNode *
+SuffixTree::insertInternalNode(SuffixTreeInternalNode *Parent,
+ unsigned StartIdx, unsigned EndIdx,
+ unsigned Edge) {
assert(StartIdx <= EndIdx && "String can't start after it ends!");
- assert(!(!Parent && StartIdx != EmptyIdx) &&
+ assert(!(!Parent && StartIdx != SuffixTreeNode::EmptyIdx) &&
"Non-root internal nodes must have parents!");
-
- unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx);
- SuffixTreeNode *N =
- new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, E, Root);
+ auto *N = new (InternalNodeAllocator.Allocate())
+ SuffixTreeInternalNode(StartIdx, EndIdx, Root);
if (Parent)
Parent->Children[Edge] = N;
-
return N;
}
+SuffixTreeInternalNode *SuffixTree::insertRoot() {
+ return insertInternalNode(/*Parent = */ nullptr, SuffixTreeNode::EmptyIdx,
+ SuffixTreeNode::EmptyIdx, /*Edge = */ 0);
+}
+
void SuffixTree::setSuffixIndices() {
// List of nodes we need to visit along with the current length of the
// string.
- std::vector<std::pair<SuffixTreeNode *, unsigned>> ToVisit;
+ SmallVector<std::pair<SuffixTreeNode *, unsigned>> ToVisit;
// Current node being visited.
SuffixTreeNode *CurrNode = Root;
@@ -81,21 +90,23 @@ void SuffixTree::setSuffixIndices() {
while (!ToVisit.empty()) {
std::tie(CurrNode, CurrNodeLen) = ToVisit.back();
ToVisit.pop_back();
- CurrNode->ConcatLen = CurrNodeLen;
- for (auto &ChildPair : CurrNode->Children) {
- assert(ChildPair.second && "Node had a null child!");
- ToVisit.push_back(
- {ChildPair.second, CurrNodeLen + ChildPair.second->size()});
- }
-
+ // Length of the current node from the root down to here.
+ CurrNode->setConcatLen(CurrNodeLen);
+ if (auto *InternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode))
+ for (auto &ChildPair : InternalNode->Children) {
+ assert(ChildPair.second && "Node had a null child!");
+ ToVisit.push_back(
+ {ChildPair.second,
+ CurrNodeLen + numElementsInSubstring(ChildPair.second)});
+ }
// No children, so we are at the end of the string.
- if (CurrNode->Children.size() == 0 && !CurrNode->isRoot())
- CurrNode->SuffixIdx = Str.size() - CurrNodeLen;
+ if (auto *LeafNode = dyn_cast<SuffixTreeLeafNode>(CurrNode))
+ LeafNode->setSuffixIdx(Str.size() - CurrNodeLen);
}
}
unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
- SuffixTreeNode *NeedsLink = nullptr;
+ SuffixTreeInternalNode *NeedsLink = nullptr;
while (SuffixesToAdd > 0) {
@@ -118,7 +129,7 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
// The active node is an internal node, and we visited it, so it must
// need a link if it doesn't have one.
if (NeedsLink) {
- NeedsLink->Link = Active.Node;
+ NeedsLink->setLink(Active.Node);
NeedsLink = nullptr;
}
} else {
@@ -126,16 +137,18 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
// insert a new node.
SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
- unsigned SubstringLen = NextNode->size();
+ unsigned SubstringLen = numElementsInSubstring(NextNode);
// Is the current suffix we're trying to insert longer than the size of
// the child we want to move to?
if (Active.Len >= SubstringLen) {
// If yes, then consume the characters we've seen and move to the next
// node.
+ assert(isa<SuffixTreeInternalNode>(NextNode) &&
+ "Expected an internal node?");
Active.Idx += SubstringLen;
Active.Len -= SubstringLen;
- Active.Node = NextNode;
+ Active.Node = cast<SuffixTreeInternalNode>(NextNode);
continue;
}
@@ -144,12 +157,12 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
unsigned LastChar = Str[EndIdx];
// Is the string we're trying to insert a substring of the next node?
- if (Str[NextNode->StartIdx + Active.Len] == LastChar) {
+ if (Str[NextNode->getStartIdx() + Active.Len] == LastChar) {
// If yes, then we're done for this step. Remember our insertion point
// and move to the next end index. At this point, we have an implicit
// suffix tree.
if (NeedsLink && !Active.Node->isRoot()) {
- NeedsLink->Link = Active.Node;
+ NeedsLink->setLink(Active.Node);
NeedsLink = nullptr;
}
@@ -171,9 +184,9 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
// n l
// The node s from the diagram
- SuffixTreeNode *SplitNode =
- insertInternalNode(Active.Node, NextNode->StartIdx,
- NextNode->StartIdx + Active.Len - 1, FirstChar);
+ SuffixTreeInternalNode *SplitNode = insertInternalNode(
+ Active.Node, NextNode->getStartIdx(),
+ NextNode->getStartIdx() + Active.Len - 1, FirstChar);
// Insert the new node representing the new substring into the tree as
// a child of the split node. This is the node l from the diagram.
@@ -181,12 +194,12 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
// Make the old node a child of the split node and update its start
// index. This is the node n from the diagram.
- NextNode->StartIdx += Active.Len;
- SplitNode->Children[Str[NextNode->StartIdx]] = NextNode;
+ NextNode->incrementStartIdx(Active.Len);
+ SplitNode->Children[Str[NextNode->getStartIdx()]] = NextNode;
// SplitNode is an internal node, update the suffix link.
if (NeedsLink)
- NeedsLink->Link = SplitNode;
+ NeedsLink->setLink(SplitNode);
NeedsLink = SplitNode;
}
@@ -202,9 +215,68 @@ unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
}
} else {
// Start the next phase at the next smallest suffix.
- Active.Node = Active.Node->Link;
+ Active.Node = Active.Node->getLink();
}
}
return SuffixesToAdd;
}
+
+void SuffixTree::RepeatedSubstringIterator::advance() {
+ // Clear the current state. If we're at the end of the range, then this
+ // is the state we want to be in.
+ RS = RepeatedSubstring();
+ N = nullptr;
+
+ // Each leaf node represents a repeat of a string.
+ SmallVector<unsigned> RepeatedSubstringStarts;
+
+ // Continue visiting nodes until we find one which repeats more than once.
+ while (!InternalNodesToVisit.empty()) {
+ RepeatedSubstringStarts.clear();
+ auto *Curr = InternalNodesToVisit.back();
+ InternalNodesToVisit.pop_back();
+
+ // Keep track of the length of the string associated with the node. If
+ // it's too short, we'll quit.
+ unsigned Length = Curr->getConcatLen();
+
+ // Iterate over each child, saving internal nodes for visiting, and
+ // leaf nodes in LeafChildren. Internal nodes represent individual
+ // strings, which may repeat.
+ for (auto &ChildPair : Curr->Children) {
+ // Save all of this node's children for processing.
+ if (auto *InternalChild =
+ dyn_cast<SuffixTreeInternalNode>(ChildPair.second)) {
+ InternalNodesToVisit.push_back(InternalChild);
+ continue;
+ }
+
+ if (Length < MinLength)
+ continue;
+
+ // Have an occurrence of a potentially repeated string. Save it.
+ auto *Leaf = cast<SuffixTreeLeafNode>(ChildPair.second);
+ RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
+ }
+
+ // The root never represents a repeated substring. If we're looking at
+ // that, then skip it.
+ if (Curr->isRoot())
+ continue;
+
+ // Do we have any repeated substrings?
+ if (RepeatedSubstringStarts.size() < 2)
+ continue;
+
+ // Yes. Update the state to reflect this, and then bail out.
+ N = Curr;
+ RS.Length = Length;
+ for (unsigned StartIdx : RepeatedSubstringStarts)
+ RS.StartIndices.push_back(StartIdx);
+ break;
+ }
+ // At this point, either NewRS is an empty RepeatedSubstring, or it was
+ // set in the above loop. Similarly, N is either nullptr, or the node
+ // associated with NewRS.
+}
diff --git a/llvm/lib/Support/SuffixTreeNode.cpp b/llvm/lib/Support/SuffixTreeNode.cpp
new file mode 100644
index 000000000000..113b990fd352
--- /dev/null
+++ b/llvm/lib/Support/SuffixTreeNode.cpp
@@ -0,0 +1,40 @@
+//===- llvm/ADT/SuffixTreeNode.cpp - Nodes for SuffixTrees --------*- C++
+//-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines nodes for use within a SuffixTree.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SuffixTreeNode.h"
+#include "llvm/Support/Casting.h"
+
+using namespace llvm;
+
+unsigned SuffixTreeNode::getStartIdx() const { return StartIdx; }
+void SuffixTreeNode::incrementStartIdx(unsigned Inc) { StartIdx += Inc; }
+void SuffixTreeNode::setConcatLen(unsigned Len) { ConcatLen = Len; }
+unsigned SuffixTreeNode::getConcatLen() const { return ConcatLen; }
+
+bool SuffixTreeInternalNode::isRoot() const {
+ return getStartIdx() == EmptyIdx;
+}
+unsigned SuffixTreeInternalNode::getEndIdx() const { return EndIdx; }
+void SuffixTreeInternalNode::setLink(SuffixTreeInternalNode *L) {
+ assert(L && "Cannot set a null link?");
+ Link = L;
+}
+SuffixTreeInternalNode *SuffixTreeInternalNode::getLink() const { return Link; }
+
+unsigned SuffixTreeLeafNode::getEndIdx() const {
+ assert(EndIdx && "EndIdx is empty?");
+ return *EndIdx;
+}
+
+unsigned SuffixTreeLeafNode::getSuffixIdx() const { return SuffixIdx; }
+void SuffixTreeLeafNode::setSuffixIdx(unsigned Idx) { SuffixIdx = Idx; }
diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index 31461e31c65c..4eef339000e1 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -15,6 +15,7 @@
#include "llvm/Config/llvm-config.h"
#if LLVM_ENABLE_THREADS
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Threading.h"
#else
#include "llvm/Support/raw_ostream.h"
@@ -43,6 +44,7 @@ void ThreadPool::grow(int requested) {
while (static_cast<int>(Threads.size()) < newThreadCount) {
int ThreadID = Threads.size();
Threads.emplace_back([this, ThreadID] {
+ set_thread_name(formatv("llvm-worker-{0}", ThreadID));
Strategy.apply_thread_strategy(ThreadID);
processTasks(nullptr);
});
diff --git a/llvm/lib/Support/Threading.cpp b/llvm/lib/Support/Threading.cpp
index 923935bbca10..7cc7ba44cc72 100644
--- a/llvm/lib/Support/Threading.cpp
+++ b/llvm/lib/Support/Threading.cpp
@@ -83,6 +83,11 @@ unsigned llvm::ThreadPoolStrategy::compute_thread_count() const {
// the same interface as std::thread but requests the same stack size as the
// main thread (8MB) before creation.
const std::optional<unsigned> llvm::thread::DefaultStackSize = 8 * 1024 * 1024;
+#elif defined(_AIX)
+ // On AIX, the default pthread stack size limit is ~192k for 64-bit programs.
+ // This limit is easily reached when doing link-time thinLTO. AIX library
+ // developers have used 4MB, so we'll do the same.
+const std::optional<unsigned> llvm::thread::DefaultStackSize = 4 * 1024 * 1024;
#else
const std::optional<unsigned> llvm::thread::DefaultStackSize;
#endif
diff --git a/llvm/lib/Support/TrigramIndex.cpp b/llvm/lib/Support/TrigramIndex.cpp
deleted file mode 100644
index 40a20ccc6583..000000000000
--- a/llvm/lib/Support/TrigramIndex.cpp
+++ /dev/null
@@ -1,107 +0,0 @@
-//===-- TrigramIndex.cpp - a heuristic for SpecialCaseList ----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// TrigramIndex implements a heuristic for SpecialCaseList that allows to
-// filter out ~99% incoming queries when all regular expressions in the
-// SpecialCaseList are simple wildcards with '*' and '.'. If rules are more
-// complicated, the check is defeated and it will always pass the queries to a
-// full regex.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/TrigramIndex.h"
-#include "llvm/ADT/StringRef.h"
-#include <set>
-
-using namespace llvm;
-
-static const char RegexAdvancedMetachars[] = "()^$|+?[]\\{}";
-
-static bool isAdvancedMetachar(unsigned Char) {
- return strchr(RegexAdvancedMetachars, Char) != nullptr;
-}
-
-void TrigramIndex::insert(const std::string &Regex) {
- if (Defeated) return;
- std::set<unsigned> Was;
- unsigned Cnt = 0;
- unsigned Tri = 0;
- unsigned Len = 0;
- bool Escaped = false;
- for (unsigned Char : Regex) {
- if (!Escaped) {
- // Regular expressions allow escaping symbols by preceding it with '\'.
- if (Char == '\\') {
- Escaped = true;
- continue;
- }
- if (isAdvancedMetachar(Char)) {
- // This is a more complicated regex than we can handle here.
- Defeated = true;
- return;
- }
- if (Char == '.' || Char == '*') {
- Tri = 0;
- Len = 0;
- continue;
- }
- }
- if (Escaped && Char >= '1' && Char <= '9') {
- Defeated = true;
- return;
- }
- // We have already handled escaping and can reset the flag.
- Escaped = false;
- Tri = ((Tri << 8) + Char) & 0xFFFFFF;
- Len++;
- if (Len < 3)
- continue;
- // We don't want the index to grow too much for the popular trigrams,
- // as they are weak signals. It's ok to still require them for the
- // rules we have already processed. It's just a small additional
- // computational cost.
- if (Index[Tri].size() >= 4)
- continue;
- Cnt++;
- if (!Was.count(Tri)) {
- // Adding the current rule to the index.
- Index[Tri].push_back(Counts.size());
- Was.insert(Tri);
- }
- }
- if (!Cnt) {
- // This rule does not have remarkable trigrams to rely on.
- // We have to always call the full regex chain.
- Defeated = true;
- return;
- }
- Counts.push_back(Cnt);
-}
-
-bool TrigramIndex::isDefinitelyOut(StringRef Query) const {
- if (Defeated)
- return false;
- std::vector<unsigned> CurCounts(Counts.size());
- unsigned Tri = 0;
- for (size_t I = 0; I < Query.size(); I++) {
- Tri = ((Tri << 8) + Query[I]) & 0xFFFFFF;
- if (I < 2)
- continue;
- const auto &II = Index.find(Tri);
- if (II == Index.end())
- continue;
- for (size_t J : II->second) {
- CurCounts[J]++;
- // If we have reached a desired limit, we have to look at the query
- // more closely by running a full regex.
- if (CurCounts[J] >= Counts[J])
- return false;
- }
- }
- return true;
-}
diff --git a/llvm/lib/Support/Unix/Path.inc b/llvm/lib/Support/Unix/Path.inc
index 3efcad4f2bed..e2aece49cbc5 100644
--- a/llvm/lib/Support/Unix/Path.inc
+++ b/llvm/lib/Support/Unix/Path.inc
@@ -190,7 +190,7 @@ static char *getprogpath(char ret[PATH_MAX], const char *bin) {
/// GetMainExecutable - Return the path to the main executable, given the
/// value of argv[0] from program startup.
-std::string getMainExecutableImpl(const char *argv0, void *MainAddr) {
+std::string getMainExecutable(const char *argv0, void *MainAddr) {
#if defined(__APPLE__)
// On OS X the executable path is saved to the stack by dyld. Reading it
// from there is much faster than calling dladdr, especially for large
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 05a7335216f4..fcf5701afcfd 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -62,6 +62,9 @@
#if HAVE_MACH_MACH_H
#include <mach/mach.h>
#endif
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+#endif
#if HAVE_LINK_H
#include <link.h>
#endif
@@ -84,13 +87,11 @@ static void InfoSignalHandler(int Sig); // defined below.
using SignalHandlerFunctionType = void (*)();
/// The function to call if ctrl-c is pressed.
-static std::atomic<SignalHandlerFunctionType> InterruptFunction =
- ATOMIC_VAR_INIT(nullptr);
-static std::atomic<SignalHandlerFunctionType> InfoSignalFunction =
- ATOMIC_VAR_INIT(nullptr);
+static std::atomic<SignalHandlerFunctionType> InterruptFunction = nullptr;
+static std::atomic<SignalHandlerFunctionType> InfoSignalFunction = nullptr;
/// The function to call on SIGPIPE (one-time use only).
static std::atomic<SignalHandlerFunctionType> OneShotPipeSignalFunction =
- ATOMIC_VAR_INIT(nullptr);
+ nullptr;
namespace {
/// Signal-safe removal of files.
@@ -98,8 +99,8 @@ namespace {
/// themselves is signal-safe. Memory is freed when the head is freed, deletion
/// is therefore not signal-safe either.
class FileToRemoveList {
- std::atomic<char *> Filename = ATOMIC_VAR_INIT(nullptr);
- std::atomic<FileToRemoveList *> Next = ATOMIC_VAR_INIT(nullptr);
+ std::atomic<char *> Filename = nullptr;
+ std::atomic<FileToRemoveList *> Next = nullptr;
FileToRemoveList() = default;
// Not signal-safe.
@@ -188,7 +189,7 @@ public:
Head.exchange(OldHead);
}
};
-static std::atomic<FileToRemoveList *> FilesToRemove = ATOMIC_VAR_INIT(nullptr);
+static std::atomic<FileToRemoveList *> FilesToRemove = nullptr;
/// Clean up the list in a signal-friendly manner.
/// Recall that signals can fire during llvm_shutdown. If this occurs we should
@@ -248,7 +249,7 @@ static const int InfoSigs[] = {SIGUSR1
static const size_t NumSigs = std::size(IntSigs) + std::size(KillSigs) +
std::size(InfoSigs) + 1 /* SIGPIPE */;
-static std::atomic<unsigned> NumRegisteredSignals = ATOMIC_VAR_INIT(0);
+static std::atomic<unsigned> NumRegisteredSignals = 0;
static struct {
struct sigaction SA;
int SigNo;
@@ -463,7 +464,7 @@ void llvm::sys::AddSignalHandler(sys::SignalHandlerCallback FnPtr,
RegisterHandlers();
}
-#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && HAVE_LINK_H && \
+#if ENABLE_BACKTRACES && defined(HAVE_BACKTRACE) && HAVE_LINK_H && \
(defined(__linux__) || defined(__FreeBSD__) || \
defined(__FreeBSD_kernel__) || defined(__NetBSD__))
struct DlIteratePhdrData {
@@ -509,16 +510,50 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
dl_iterate_phdr(dl_iterate_phdr_cb, &data);
return true;
}
+#elif ENABLE_BACKTRACES && defined(__APPLE__) && defined(__LP64__)
+static bool findModulesAndOffsets(void **StackTrace, int Depth,
+ const char **Modules, intptr_t *Offsets,
+ const char *MainExecutableName,
+ StringSaver &StrPool) {
+ uint32_t NumImgs = _dyld_image_count();
+ for (uint32_t ImageIndex = 0; ImageIndex < NumImgs; ImageIndex++) {
+ const char *Name = _dyld_get_image_name(ImageIndex);
+ intptr_t Slide = _dyld_get_image_vmaddr_slide(ImageIndex);
+ auto *Header =
+ (const struct mach_header_64 *)_dyld_get_image_header(ImageIndex);
+ if (Header == NULL)
+ continue;
+ auto Cmd = (const struct load_command *)(&Header[1]);
+ for (uint32_t CmdNum = 0; CmdNum < Header->ncmds; ++CmdNum) {
+ uint32_t BaseCmd = Cmd->cmd & ~LC_REQ_DYLD;
+ if (BaseCmd == LC_SEGMENT_64) {
+ auto CmdSeg64 = (const struct segment_command_64 *)Cmd;
+ for (int j = 0; j < Depth; j++) {
+ if (Modules[j])
+ continue;
+ intptr_t Addr = (intptr_t)StackTrace[j];
+ if ((intptr_t)CmdSeg64->vmaddr + Slide <= Addr &&
+ Addr < intptr_t(CmdSeg64->vmaddr + CmdSeg64->vmsize + Slide)) {
+ Modules[j] = Name;
+ Offsets[j] = Addr - Slide;
+ }
+ }
+ }
+ Cmd = (const load_command *)(((const char *)Cmd) + (Cmd->cmdsize));
+ }
+ }
+ return true;
+}
#else
-/// This platform does not have dl_iterate_phdr, so we do not yet know how to
-/// find all loaded DSOs.
+/// Backtraces are not enabled or we don't yet know how to find all loaded DSOs
+/// on this platform.
static bool findModulesAndOffsets(void **StackTrace, int Depth,
const char **Modules, intptr_t *Offsets,
const char *MainExecutableName,
StringSaver &StrPool) {
return false;
}
-#endif // defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && ...
+#endif // ENABLE_BACKTRACES && ... (findModulesAndOffsets variants)
#if ENABLE_BACKTRACES && defined(HAVE__UNWIND_BACKTRACE)
static int unwindBacktrace(void **StackTrace, int MaxEntries) {
@@ -617,13 +652,12 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
if (dlinfo.dli_sname != nullptr) {
OS << ' ';
- int res;
- char *d = itaniumDemangle(dlinfo.dli_sname, nullptr, nullptr, &res);
- if (!d)
- OS << dlinfo.dli_sname;
- else
+ if (char *d = itaniumDemangle(dlinfo.dli_sname)) {
OS << d;
- free(d);
+ free(d);
+ } else {
+ OS << dlinfo.dli_sname;
+ }
OS << format(" + %tu", (static_cast<const char *>(StackTrace[i]) -
static_cast<const char *>(dlinfo.dli_saddr)));
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index a167e0a76795..d381d79fba96 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -43,6 +43,7 @@
#include <cstdint>
#include <iterator>
#include <limits>
+#include <map>
#include <memory>
#include <optional>
#include <string>
@@ -257,12 +258,12 @@ public:
explicit RealFileSystem(bool LinkCWDToProcess) {
if (!LinkCWDToProcess) {
SmallString<128> PWD, RealPWD;
- if (llvm::sys::fs::current_path(PWD))
- return; // Awful, but nothing to do here.
- if (llvm::sys::fs::real_path(PWD, RealPWD))
- WD = {PWD, PWD};
+ if (std::error_code EC = llvm::sys::fs::current_path(PWD))
+ WD = EC;
+ else if (llvm::sys::fs::real_path(PWD, RealPWD))
+ WD = WorkingDirectory{PWD, PWD};
else
- WD = {PWD, RealPWD};
+ WD = WorkingDirectory{PWD, RealPWD};
}
}
@@ -284,10 +285,10 @@ private:
// If this FS has its own working dir, use it to make Path absolute.
// The returned twine is safe to use as long as both Storage and Path live.
Twine adjustPath(const Twine &Path, SmallVectorImpl<char> &Storage) const {
- if (!WD)
+ if (!WD || !*WD)
return Path;
Path.toVector(Storage);
- sys::fs::make_absolute(WD->Resolved, Storage);
+ sys::fs::make_absolute(WD->get().Resolved, Storage);
return Storage;
}
@@ -297,7 +298,7 @@ private:
// The current working directory, with links resolved. (readlink .).
SmallString<128> Resolved;
};
- std::optional<WorkingDirectory> WD;
+ std::optional<llvm::ErrorOr<WorkingDirectory>> WD;
};
} // namespace
@@ -323,8 +324,10 @@ RealFileSystem::openFileForRead(const Twine &Name) {
}
llvm::ErrorOr<std::string> RealFileSystem::getCurrentWorkingDirectory() const {
+ if (WD && *WD)
+ return std::string(WD->get().Specified.str());
if (WD)
- return std::string(WD->Specified.str());
+ return WD->getError();
SmallString<128> Dir;
if (std::error_code EC = llvm::sys::fs::current_path(Dir))
@@ -345,7 +348,7 @@ std::error_code RealFileSystem::setCurrentWorkingDirectory(const Twine &Path) {
return std::make_error_code(std::errc::not_a_directory);
if (auto Err = llvm::sys::fs::real_path(Absolute, Resolved))
return Err;
- WD = {Absolute, Resolved};
+ WD = WorkingDirectory{Absolute, Resolved};
return std::error_code();
}
@@ -723,7 +726,7 @@ public:
class InMemoryDirectory : public InMemoryNode {
Status Stat;
- llvm::StringMap<std::unique_ptr<InMemoryNode>> Entries;
+ std::map<std::string, std::unique_ptr<InMemoryNode>> Entries;
public:
InMemoryDirectory(Status Stat)
@@ -739,15 +742,14 @@ public:
UniqueID getUniqueID() const { return Stat.getUniqueID(); }
InMemoryNode *getChild(StringRef Name) const {
- auto I = Entries.find(Name);
+ auto I = Entries.find(Name.str());
if (I != Entries.end())
return I->second.get();
return nullptr;
}
InMemoryNode *addChild(StringRef Name, std::unique_ptr<InMemoryNode> Child) {
- return Entries.insert(make_pair(Name, std::move(Child)))
- .first->second.get();
+ return Entries.emplace(Name, std::move(Child)).first->second.get();
}
using const_iterator = decltype(Entries)::const_iterator;
@@ -2237,6 +2239,14 @@ RedirectingFileSystem::LookupResult::LookupResult(
}
}
+void RedirectingFileSystem::LookupResult::getPath(
+ llvm::SmallVectorImpl<char> &Result) const {
+ Result.clear();
+ for (Entry *Parent : Parents)
+ llvm::sys::path::append(Result, Parent->getName());
+ llvm::sys::path::append(Result, E->getName());
+}
+
std::error_code
RedirectingFileSystem::makeCanonical(SmallVectorImpl<char> &Path) const {
if (std::error_code EC = makeAbsolute(Path))
@@ -2255,11 +2265,14 @@ ErrorOr<RedirectingFileSystem::LookupResult>
RedirectingFileSystem::lookupPath(StringRef Path) const {
sys::path::const_iterator Start = sys::path::begin(Path);
sys::path::const_iterator End = sys::path::end(Path);
+ llvm::SmallVector<Entry *, 32> Entries;
for (const auto &Root : Roots) {
ErrorOr<RedirectingFileSystem::LookupResult> Result =
- lookupPathImpl(Start, End, Root.get());
- if (Result || Result.getError() != llvm::errc::no_such_file_or_directory)
+ lookupPathImpl(Start, End, Root.get(), Entries);
+ if (Result || Result.getError() != llvm::errc::no_such_file_or_directory) {
+ Result->Parents = std::move(Entries);
return Result;
+ }
}
return make_error_code(llvm::errc::no_such_file_or_directory);
}
@@ -2267,7 +2280,8 @@ RedirectingFileSystem::lookupPath(StringRef Path) const {
ErrorOr<RedirectingFileSystem::LookupResult>
RedirectingFileSystem::lookupPathImpl(
sys::path::const_iterator Start, sys::path::const_iterator End,
- RedirectingFileSystem::Entry *From) const {
+ RedirectingFileSystem::Entry *From,
+ llvm::SmallVectorImpl<Entry *> &Entries) const {
assert(!isTraversalComponent(*Start) &&
!isTraversalComponent(From->getName()) &&
"Paths should not contain traversal components");
@@ -2296,10 +2310,12 @@ RedirectingFileSystem::lookupPathImpl(
auto *DE = cast<RedirectingFileSystem::DirectoryEntry>(From);
for (const std::unique_ptr<RedirectingFileSystem::Entry> &DirEntry :
llvm::make_range(DE->contents_begin(), DE->contents_end())) {
+ Entries.push_back(From);
ErrorOr<RedirectingFileSystem::LookupResult> Result =
- lookupPathImpl(Start, End, DirEntry.get());
+ lookupPathImpl(Start, End, DirEntry.get(), Entries);
if (Result || Result.getError() != llvm::errc::no_such_file_or_directory)
return Result;
+ Entries.pop_back();
}
return make_error_code(llvm::errc::no_such_file_or_directory);
@@ -2541,10 +2557,12 @@ RedirectingFileSystem::getRealPath(const Twine &OriginalPath,
return P;
}
- // If we found a DirectoryEntry, still fallthrough to the original path if
- // allowed, because directories don't have a single external contents path.
- if (Redirection == RedirectKind::Fallthrough)
- return ExternalFS->getRealPath(CanonicalPath, Output);
+ // We found a DirectoryEntry, which does not have a single external contents
+ // path. Use the canonical virtual path.
+ if (Redirection == RedirectKind::Fallthrough) {
+ Result->getPath(Output);
+ return {};
+ }
return llvm::errc::invalid_argument;
}
diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index 92cf4fcda5a6..b949b724509f 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -130,7 +130,7 @@ namespace fs {
const file_t kInvalidFile = INVALID_HANDLE_VALUE;
-std::string getMainExecutableImpl(const char *argv0, void *MainExecAddr) {
+std::string getMainExecutable(const char *argv0, void *MainExecAddr) {
SmallVector<wchar_t, MAX_PATH> PathName;
PathName.resize_for_overwrite(PathName.capacity());
DWORD Size = ::GetModuleFileNameW(NULL, PathName.data(), PathName.size());
@@ -650,8 +650,6 @@ bool equivalent(file_status A, file_status B) {
return A.FileIndexHigh == B.FileIndexHigh &&
A.FileIndexLow == B.FileIndexLow && A.FileSizeHigh == B.FileSizeHigh &&
A.FileSizeLow == B.FileSizeLow &&
- A.LastAccessedTimeHigh == B.LastAccessedTimeHigh &&
- A.LastAccessedTimeLow == B.LastAccessedTimeLow &&
A.LastWriteTimeHigh == B.LastWriteTimeHigh &&
A.LastWriteTimeLow == B.LastWriteTimeLow &&
A.VolumeSerialNumber == B.VolumeSerialNumber;
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index ba93afe0803b..cb82f55fc38b 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -10,6 +10,7 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/ExitCodes.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
@@ -204,6 +205,9 @@ static bool RegisteredUnhandledExceptionFilter = false;
static bool CleanupExecuted = false;
static PTOP_LEVEL_EXCEPTION_FILTER OldFilter = NULL;
+/// The function to call on "SIGPIPE" (one-time use only).
+static std::atomic<void (*)()> OneShotPipeSignalFunction(nullptr);
+
// Windows creates a new thread to execute the console handler when an event
// (such as CTRL/C) occurs. This causes concurrency issues with the above
// globals which this critical section addresses.
@@ -575,11 +579,16 @@ void llvm::sys::SetInfoSignalFunction(void (*Handler)()) {
}
void llvm::sys::SetOneShotPipeSignalFunction(void (*Handler)()) {
- // Unimplemented.
+ OneShotPipeSignalFunction.exchange(Handler);
}
void llvm::sys::DefaultOneShotPipeSignalHandler() {
- // Unimplemented.
+ llvm::sys::Process::Exit(EX_IOERR, /*NoCleanup=*/true);
+}
+
+void llvm::sys::CallOneShotPipeSignalHandler() {
+ if (auto OldOneShotPipeFunction = OneShotPipeSignalFunction.exchange(nullptr))
+ OldOneShotPipeFunction();
}
/// Add a function to be called when a signal is delivered to the process. The
@@ -816,7 +825,15 @@ WriteWindowsDumpFile(PMINIDUMP_EXCEPTION_INFORMATION ExceptionInfo) {
}
void sys::CleanupOnSignal(uintptr_t Context) {
- LLVMUnhandledExceptionFilter((LPEXCEPTION_POINTERS)Context);
+ LPEXCEPTION_POINTERS EP = (LPEXCEPTION_POINTERS)Context;
+ // Broken pipe is not a crash.
+ //
+ // 0xE0000000 is combined with the return code in the exception raised in
+ // CrashRecoveryContext::HandleExit().
+ unsigned RetCode = EP->ExceptionRecord->ExceptionCode;
+ if (RetCode == (0xE0000000 | EX_IOERR))
+ return;
+ LLVMUnhandledExceptionFilter(EP);
}
static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
diff --git a/llvm/lib/Support/Windows/Threading.inc b/llvm/lib/Support/Windows/Threading.inc
index aa47484cb5ce..4baf8b8cb82a 100644
--- a/llvm/lib/Support/Windows/Threading.inc
+++ b/llvm/lib/Support/Windows/Threading.inc
@@ -233,7 +233,7 @@ static ArrayRef<ProcessorGroup> getProcessorGroups() {
unsigned CurrentGroupID = (*ActiveGroups)[0];
ProcessorGroup NewG{Groups[CurrentGroupID]};
NewG.Affinity = ProcessAffinityMask;
- NewG.UsableThreads = countPopulation(ProcessAffinityMask);
+ NewG.UsableThreads = llvm::popcount(ProcessAffinityMask);
Groups.clear();
Groups.push_back(NewG);
}
diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp
index b85b1eb83ef8..6ac2c6aeeb46 100644
--- a/llvm/lib/Support/YAMLParser.cpp
+++ b/llvm/lib/Support/YAMLParser.cpp
@@ -2041,8 +2041,11 @@ StringRef ScalarNode::getValue(SmallVectorImpl<char> &Storage) const {
}
return UnquotedValue;
}
- // Plain or block.
- return Value.rtrim(' ');
+ // Plain.
+ // Trim whitespace ('b-char' and 's-white').
+ // NOTE: Alternatively we could change the scanner to not include whitespace
+ // here in the first place.
+ return Value.rtrim("\x0A\x0D\x20\x09");
}
StringRef ScalarNode::unescapeDoubleQuoted( StringRef UnquotedValue
diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp
index 4eb0b3afd563..f21b7a0ca699 100644
--- a/llvm/lib/Support/YAMLTraits.cpp
+++ b/llvm/lib/Support/YAMLTraits.cpp
@@ -397,17 +397,23 @@ void Input::reportWarning(const SMRange &range, const Twine &message) {
std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
SmallString<128> StringStorage;
- if (ScalarNode *SN = dyn_cast<ScalarNode>(N)) {
+ switch (N->getType()) {
+ case Node::NK_Scalar: {
+ ScalarNode *SN = dyn_cast<ScalarNode>(N);
StringRef KeyStr = SN->getValue(StringStorage);
if (!StringStorage.empty()) {
// Copy string to permanent storage
KeyStr = StringStorage.str().copy(StringAllocator);
}
return std::make_unique<ScalarHNode>(N, KeyStr);
- } else if (BlockScalarNode *BSN = dyn_cast<BlockScalarNode>(N)) {
+ }
+ case Node::NK_BlockScalar: {
+ BlockScalarNode *BSN = dyn_cast<BlockScalarNode>(N);
StringRef ValueCopy = BSN->getValue().copy(StringAllocator);
return std::make_unique<ScalarHNode>(N, ValueCopy);
- } else if (SequenceNode *SQ = dyn_cast<SequenceNode>(N)) {
+ }
+ case Node::NK_Sequence: {
+ SequenceNode *SQ = dyn_cast<SequenceNode>(N);
auto SQHNode = std::make_unique<SequenceHNode>(N);
for (Node &SN : *SQ) {
auto Entry = createHNodes(&SN);
@@ -416,7 +422,9 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
SQHNode->Entries.push_back(std::move(Entry));
}
return std::move(SQHNode);
- } else if (MappingNode *Map = dyn_cast<MappingNode>(N)) {
+ }
+ case Node::NK_Mapping: {
+ MappingNode *Map = dyn_cast<MappingNode>(N);
auto mapHNode = std::make_unique<MapHNode>(N);
for (KeyValueNode &KVN : *Map) {
Node *KeyNode = KVN.getKey();
@@ -435,6 +443,11 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
// Copy string to permanent storage
KeyStr = StringStorage.str().copy(StringAllocator);
}
+ if (mapHNode->Mapping.count(KeyStr))
+ // From YAML spec: "The content of a mapping node is an unordered set of
+ // key/value node pairs, with the restriction that each of the keys is
+ // unique."
+ setError(KeyNode, Twine("duplicated mapping key '") + KeyStr + "'");
auto ValueHNode = createHNodes(Value);
if (EC)
break;
@@ -442,9 +455,10 @@ std::unique_ptr<Input::HNode> Input::createHNodes(Node *N) {
std::make_pair(std::move(ValueHNode), KeyNode->getSourceRange());
}
return std::move(mapHNode);
- } else if (isa<NullNode>(N)) {
+ }
+ case Node::NK_Null:
return std::make_unique<EmptyHNode>(N);
- } else {
+ default:
setError(N, "unknown node kind");
return nullptr;
}
diff --git a/llvm/lib/Support/Z3Solver.cpp b/llvm/lib/Support/Z3Solver.cpp
index a49bedcfd2b0..eb671fe2596d 100644
--- a/llvm/lib/Support/Z3Solver.cpp
+++ b/llvm/lib/Support/Z3Solver.cpp
@@ -729,7 +729,7 @@ public:
const Z3_sort Z3Sort = toZ3Sort(*getBitvectorSort(BitWidth)).Sort;
// Slow path, when 64 bits are not enough.
- if (LLVM_UNLIKELY(Int.getBitWidth() > 64u)) {
+ if (LLVM_UNLIKELY(!Int.isRepresentableByInt64())) {
SmallString<40> Buffer;
Int.toString(Buffer, 10);
return newExprRef(Z3Expr(
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index 92b15f14c62f..a4fc605019c2 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -56,6 +56,7 @@
#ifdef _WIN32
#include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Signals.h"
#include "llvm/Support/Windows/WindowsSupport.h"
#endif
@@ -83,8 +84,15 @@ raw_ostream::~raw_ostream() {
}
size_t raw_ostream::preferred_buffer_size() const {
+#ifdef _WIN32
+ // On Windows BUFSIZ is only 512 which results in more calls to write. This
+ // overhead can cause significant performance degradation. Therefore use a
+ // better default.
+ return (16 * 1024);
+#else
// BUFSIZ is intended to be a reasonable default.
return BUFSIZ;
+#endif
}
void raw_ostream::SetBuffered() {
@@ -775,6 +783,15 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
)
continue;
+#ifdef _WIN32
+ // Windows equivalents of SIGPIPE/EPIPE.
+ DWORD WinLastError = GetLastError();
+ if (WinLastError == ERROR_BROKEN_PIPE ||
+ (WinLastError == ERROR_NO_DATA && errno == EINVAL)) {
+ llvm::sys::CallOneShotPipeSignalHandler();
+ errno = EPIPE;
+ }
+#endif
// Otherwise it's a non-recoverable error. Note it and quit.
error_detected(std::error_code(errno, std::generic_category()));
break;
@@ -802,8 +819,6 @@ uint64_t raw_fd_ostream::seek(uint64_t off) {
flush();
#ifdef _WIN32
pos = ::_lseeki64(FD, off, SEEK_SET);
-#elif defined(HAVE_LSEEK64)
- pos = ::lseek64(FD, off, SEEK_SET);
#else
pos = ::lseek(FD, off, SEEK_SET);
#endif
@@ -992,7 +1007,7 @@ Error llvm::writeToOutput(StringRef OutputFileName,
return Write(Out);
}
- unsigned Mode = sys::fs::all_read | sys::fs::all_write | sys::fs::all_exe;
+ unsigned Mode = sys::fs::all_read | sys::fs::all_write;
Expected<sys::fs::TempFile> Temp =
sys::fs::TempFile::create(OutputFileName + ".temp-stream-%%%%%%", Mode);
if (!Temp)
diff --git a/llvm/lib/Support/regcomp.c b/llvm/lib/Support/regcomp.c
index 9d484195a6d6..4e9082cec456 100644
--- a/llvm/lib/Support/regcomp.c
+++ b/llvm/lib/Support/regcomp.c
@@ -329,7 +329,15 @@ llvm_regcomp(llvm_regex_t *preg, const char *pattern, int cflags)
/* set things up */
p->g = g;
+ /* suppress warning from the following explicit cast. */
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif /* __GNUC__ */
p->next = (char *)pattern; /* convenience; we do not modify it */
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif /* __GNUC__ */
p->end = p->next + len;
p->error = 0;
p->ncsalloc = 0;
diff --git a/llvm/lib/Support/regex_impl.h b/llvm/lib/Support/regex_impl.h
index 8ddac7dcf998..8f0c532205ed 100644
--- a/llvm/lib/Support/regex_impl.h
+++ b/llvm/lib/Support/regex_impl.h
@@ -35,8 +35,8 @@
* @(#)regex.h 8.1 (Berkeley) 6/2/93
*/
-#ifndef _REGEX_H_
-#define _REGEX_H_
+#ifndef LLVM_SUPPORT_REGEX_IMPL_H
+#define LLVM_SUPPORT_REGEX_IMPL_H
#include <sys/types.h>
typedef off_t llvm_regoff_t;
@@ -105,4 +105,4 @@ size_t llvm_strlcpy(char *dst, const char *src, size_t siz);
}
#endif
-#endif /* !_REGEX_H_ */
+#endif /* LLVM_SUPPORT_REGEX_IMPL_H */
diff --git a/llvm/lib/Support/xxhash.cpp b/llvm/lib/Support/xxhash.cpp
index 9a3f5faa336b..577f14189caf 100644
--- a/llvm/lib/Support/xxhash.cpp
+++ b/llvm/lib/Support/xxhash.cpp
@@ -1,6 +1,6 @@
/*
* xxHash - Fast Hash algorithm
-* Copyright (C) 2012-2016, Yann Collet
+* Copyright (C) 2012-2021, Yann Collet
*
* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
*
@@ -32,10 +32,14 @@
* - xxHash source repository : https://github.com/Cyan4973/xxHash
*/
-/* based on revision d2df04efcbef7d7f6886d345861e5dfda4edacc1 Removed
- * everything but a simple interface for computing XXh64. */
+// xxhash64 is based on commit d2df04efcbef7d7f6886d345861e5dfda4edacc1. Removed
+// everything but a simple interface for computing xxh64.
+
+// xxh3_64bits is based on commit d5891596637d21366b9b1dcf2c0007a3edb26a9e (July
+// 2023).
#include "llvm/Support/xxhash.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Endian.h"
#include <stdlib.h>
@@ -47,6 +51,10 @@ static uint64_t rotl64(uint64_t X, size_t R) {
return (X << R) | (X >> (64 - R));
}
+constexpr uint32_t PRIME32_1 = 0x9E3779B1;
+constexpr uint32_t PRIME32_2 = 0x85EBCA77;
+constexpr uint32_t PRIME32_3 = 0xC2B2AE3D;
+
static const uint64_t PRIME64_1 = 11400714785074694791ULL;
static const uint64_t PRIME64_2 = 14029467366897019727ULL;
static const uint64_t PRIME64_3 = 1609587929392839161ULL;
@@ -67,6 +75,15 @@ static uint64_t mergeRound(uint64_t Acc, uint64_t Val) {
return Acc;
}
+static uint64_t XXH64_avalanche(uint64_t hash) {
+ hash ^= hash >> 33;
+ hash *= PRIME64_2;
+ hash ^= hash >> 29;
+ hash *= PRIME64_3;
+ hash ^= hash >> 32;
+ return hash;
+}
+
uint64_t llvm::xxHash64(StringRef Data) {
size_t Len = Data.size();
uint64_t Seed = 0;
@@ -104,14 +121,15 @@ uint64_t llvm::xxHash64(StringRef Data) {
H64 += (uint64_t)Len;
- while (P + 8 <= BEnd) {
+ while (reinterpret_cast<uintptr_t>(P) + 8 <=
+ reinterpret_cast<uintptr_t>(BEnd)) {
uint64_t const K1 = round(0, endian::read64le(P));
H64 ^= K1;
H64 = rotl64(H64, 27) * PRIME64_1 + PRIME64_4;
P += 8;
}
- if (P + 4 <= BEnd) {
+ if (reinterpret_cast<uintptr_t>(P) + 4 <= reinterpret_cast<uintptr_t>(BEnd)) {
H64 ^= (uint64_t)(endian::read32le(P)) * PRIME64_1;
H64 = rotl64(H64, 23) * PRIME64_2 + PRIME64_3;
P += 4;
@@ -123,15 +141,267 @@ uint64_t llvm::xxHash64(StringRef Data) {
P++;
}
- H64 ^= H64 >> 33;
- H64 *= PRIME64_2;
- H64 ^= H64 >> 29;
- H64 *= PRIME64_3;
- H64 ^= H64 >> 32;
-
- return H64;
+ return XXH64_avalanche(H64);
}
uint64_t llvm::xxHash64(ArrayRef<uint8_t> Data) {
return xxHash64({(const char *)Data.data(), Data.size()});
}
+
+constexpr size_t XXH3_SECRETSIZE_MIN = 136;
+constexpr size_t XXH_SECRET_DEFAULT_SIZE = 192;
+
+/* Pseudorandom data taken directly from FARSH */
+// clang-format off
+constexpr uint8_t kSecret[XXH_SECRET_DEFAULT_SIZE] = {
+ 0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
+ 0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
+ 0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
+ 0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
+ 0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
+ 0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
+ 0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
+ 0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
+ 0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
+ 0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
+ 0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
+ 0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
+};
+// clang-format on
+
+constexpr uint64_t PRIME_MX1 = 0x165667919E3779F9;
+constexpr uint64_t PRIME_MX2 = 0x9FB21C651E98DF25;
+
+// Calculates a 64-bit to 128-bit multiply, then XOR folds it.
+static uint64_t XXH3_mul128_fold64(uint64_t lhs, uint64_t rhs) {
+#if defined(__SIZEOF_INT128__) || \
+ (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+ __uint128_t product = (__uint128_t)lhs * (__uint128_t)rhs;
+ return uint64_t(product) ^ uint64_t(product >> 64);
+
+#else
+ /* First calculate all of the cross products. */
+ const uint64_t lo_lo = (lhs & 0xFFFFFFFF) * (rhs & 0xFFFFFFFF);
+ const uint64_t hi_lo = (lhs >> 32) * (rhs & 0xFFFFFFFF);
+ const uint64_t lo_hi = (lhs & 0xFFFFFFFF) * (rhs >> 32);
+ const uint64_t hi_hi = (lhs >> 32) * (rhs >> 32);
+
+ /* Now add the products together. These will never overflow. */
+ const uint64_t cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
+ const uint64_t upper = (hi_lo >> 32) + (cross >> 32) + hi_hi;
+ const uint64_t lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
+
+ return upper ^ lower;
+#endif
+}
+
+constexpr size_t XXH_STRIPE_LEN = 64;
+constexpr size_t XXH_SECRET_CONSUME_RATE = 8;
+constexpr size_t XXH_ACC_NB = XXH_STRIPE_LEN / sizeof(uint64_t);
+
+static uint64_t XXH3_avalanche(uint64_t hash) {
+ hash ^= hash >> 37;
+ hash *= PRIME_MX1;
+ hash ^= hash >> 32;
+ return hash;
+}
+
+static uint64_t XXH3_len_1to3_64b(const uint8_t *input, size_t len,
+ const uint8_t *secret, uint64_t seed) {
+ const uint8_t c1 = input[0];
+ const uint8_t c2 = input[len >> 1];
+ const uint8_t c3 = input[len - 1];
+ uint32_t combined = ((uint32_t)c1 << 16) | ((uint32_t)c2 << 24) |
+ ((uint32_t)c3 << 0) | ((uint32_t)len << 8);
+ uint64_t bitflip =
+ (uint64_t)(endian::read32le(secret) ^ endian::read32le(secret + 4)) +
+ seed;
+ return XXH64_avalanche(uint64_t(combined) ^ bitflip);
+}
+
+static uint64_t XXH3_len_4to8_64b(const uint8_t *input, size_t len,
+ const uint8_t *secret, uint64_t seed) {
+ seed ^= (uint64_t)byteswap(uint32_t(seed)) << 32;
+ const uint32_t input1 = endian::read32le(input);
+ const uint32_t input2 = endian::read32le(input + len - 4);
+ uint64_t acc =
+ (endian::read64le(secret + 8) ^ endian::read64le(secret + 16)) - seed;
+ const uint64_t input64 = (uint64_t)input2 | ((uint64_t)input1 << 32);
+ acc ^= input64;
+ // XXH3_rrmxmx(acc, len)
+ acc ^= rotl64(acc, 49) ^ rotl64(acc, 24);
+ acc *= PRIME_MX2;
+ acc ^= (acc >> 35) + (uint64_t)len;
+ acc *= PRIME_MX2;
+ return acc ^ (acc >> 28);
+}
+
+static uint64_t XXH3_len_9to16_64b(const uint8_t *input, size_t len,
+ const uint8_t *secret, uint64_t const seed) {
+ uint64_t input_lo =
+ (endian::read64le(secret + 24) ^ endian::read64le(secret + 32)) + seed;
+ uint64_t input_hi =
+ (endian::read64le(secret + 40) ^ endian::read64le(secret + 48)) - seed;
+ input_lo ^= endian::read64le(input);
+ input_hi ^= endian::read64le(input + len - 8);
+ uint64_t acc = uint64_t(len) + byteswap(input_lo) + input_hi +
+ XXH3_mul128_fold64(input_lo, input_hi);
+ return XXH3_avalanche(acc);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE
+static uint64_t XXH3_len_0to16_64b(const uint8_t *input, size_t len,
+ const uint8_t *secret, uint64_t const seed) {
+ if (LLVM_LIKELY(len > 8))
+ return XXH3_len_9to16_64b(input, len, secret, seed);
+ if (LLVM_LIKELY(len >= 4))
+ return XXH3_len_4to8_64b(input, len, secret, seed);
+ if (len != 0)
+ return XXH3_len_1to3_64b(input, len, secret, seed);
+ return XXH64_avalanche(seed ^ endian::read64le(secret + 56) ^
+ endian::read64le(secret + 64));
+}
+
+static uint64_t XXH3_mix16B(const uint8_t *input, uint8_t const *secret,
+ uint64_t seed) {
+ uint64_t lhs = seed;
+ uint64_t rhs = 0U - seed;
+ lhs += endian::read64le(secret);
+ rhs += endian::read64le(secret + 8);
+ lhs ^= endian::read64le(input);
+ rhs ^= endian::read64le(input + 8);
+ return XXH3_mul128_fold64(lhs, rhs);
+}
+
+/* For mid range keys, XXH3 uses a Mum-hash variant. */
+LLVM_ATTRIBUTE_ALWAYS_INLINE
+static uint64_t XXH3_len_17to128_64b(const uint8_t *input, size_t len,
+ const uint8_t *secret,
+ uint64_t const seed) {
+ uint64_t acc = len * PRIME64_1, acc_end;
+ acc += XXH3_mix16B(input + 0, secret + 0, seed);
+ acc_end = XXH3_mix16B(input + len - 16, secret + 16, seed);
+ if (len > 32) {
+ acc += XXH3_mix16B(input + 16, secret + 32, seed);
+ acc_end += XXH3_mix16B(input + len - 32, secret + 48, seed);
+ if (len > 64) {
+ acc += XXH3_mix16B(input + 32, secret + 64, seed);
+ acc_end += XXH3_mix16B(input + len - 48, secret + 80, seed);
+ if (len > 96) {
+ acc += XXH3_mix16B(input + 48, secret + 96, seed);
+ acc_end += XXH3_mix16B(input + len - 64, secret + 112, seed);
+ }
+ }
+ }
+ return XXH3_avalanche(acc + acc_end);
+}
+
+constexpr size_t XXH3_MIDSIZE_MAX = 240;
+
+LLVM_ATTRIBUTE_NOINLINE
+static uint64_t XXH3_len_129to240_64b(const uint8_t *input, size_t len,
+ const uint8_t *secret, uint64_t seed) {
+ constexpr size_t XXH3_MIDSIZE_STARTOFFSET = 3;
+ constexpr size_t XXH3_MIDSIZE_LASTOFFSET = 17;
+ uint64_t acc = (uint64_t)len * PRIME64_1;
+ const unsigned nbRounds = len / 16;
+ for (unsigned i = 0; i < 8; ++i)
+ acc += XXH3_mix16B(input + 16 * i, secret + 16 * i, seed);
+ acc = XXH3_avalanche(acc);
+
+ for (unsigned i = 8; i < nbRounds; ++i) {
+ acc += XXH3_mix16B(input + 16 * i,
+ secret + 16 * (i - 8) + XXH3_MIDSIZE_STARTOFFSET, seed);
+ }
+ /* last bytes */
+ acc +=
+ XXH3_mix16B(input + len - 16,
+ secret + XXH3_SECRETSIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
+ return XXH3_avalanche(acc);
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE
+static void XXH3_accumulate_512_scalar(uint64_t *acc, const uint8_t *input,
+ const uint8_t *secret) {
+ for (size_t i = 0; i < XXH_ACC_NB; ++i) {
+ uint64_t data_val = endian::read64le(input + 8 * i);
+ uint64_t data_key = data_val ^ endian::read64le(secret + 8 * i);
+ acc[i ^ 1] += data_val;
+ acc[i] += uint32_t(data_key) * (data_key >> 32);
+ }
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE
+static void XXH3_accumulate_scalar(uint64_t *acc, const uint8_t *input,
+ const uint8_t *secret, size_t nbStripes) {
+ for (size_t n = 0; n < nbStripes; ++n)
+ XXH3_accumulate_512_scalar(acc, input + n * XXH_STRIPE_LEN,
+ secret + n * XXH_SECRET_CONSUME_RATE);
+}
+
+static void XXH3_scrambleAcc(uint64_t *acc, const uint8_t *secret) {
+ for (size_t i = 0; i < XXH_ACC_NB; ++i) {
+ acc[i] ^= acc[i] >> 47;
+ acc[i] ^= endian::read64le(secret + 8 * i);
+ acc[i] *= PRIME32_1;
+ }
+}
+
+static uint64_t XXH3_mix2Accs(const uint64_t *acc, const uint8_t *secret) {
+ return XXH3_mul128_fold64(acc[0] ^ endian::read64le(secret),
+ acc[1] ^ endian::read64le(secret + 8));
+}
+
+static uint64_t XXH3_mergeAccs(const uint64_t *acc, const uint8_t *key,
+ uint64_t start) {
+ uint64_t result64 = start;
+ for (size_t i = 0; i < 4; ++i)
+ result64 += XXH3_mix2Accs(acc + 2 * i, key + 16 * i);
+ return XXH3_avalanche(result64);
+}
+
+LLVM_ATTRIBUTE_NOINLINE
+static uint64_t XXH3_hashLong_64b(const uint8_t *input, size_t len,
+ const uint8_t *secret, size_t secretSize) {
+ const size_t nbStripesPerBlock =
+ (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
+ const size_t block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
+ const size_t nb_blocks = (len - 1) / block_len;
+ alignas(16) uint64_t acc[XXH_ACC_NB] = {
+ PRIME32_3, PRIME64_1, PRIME64_2, PRIME64_3,
+ PRIME64_4, PRIME32_2, PRIME64_5, PRIME32_1,
+ };
+ for (size_t n = 0; n < nb_blocks; ++n) {
+ XXH3_accumulate_scalar(acc, input + n * block_len, secret,
+ nbStripesPerBlock);
+ XXH3_scrambleAcc(acc, secret + secretSize - XXH_STRIPE_LEN);
+ }
+
+ /* last partial block */
+ const size_t nbStripes = (len - 1 - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
+ assert(nbStripes <= secretSize / XXH_SECRET_CONSUME_RATE);
+ XXH3_accumulate_scalar(acc, input + nb_blocks * block_len, secret, nbStripes);
+
+ /* last stripe */
+ constexpr size_t XXH_SECRET_LASTACC_START = 7;
+ XXH3_accumulate_512_scalar(acc, input + len - XXH_STRIPE_LEN,
+ secret + secretSize - XXH_STRIPE_LEN -
+ XXH_SECRET_LASTACC_START);
+
+ /* converge into final hash */
+ constexpr size_t XXH_SECRET_MERGEACCS_START = 11;
+ return XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START,
+ (uint64_t)len * PRIME64_1);
+}
+
+uint64_t llvm::xxh3_64bits(ArrayRef<uint8_t> data) {
+ auto *in = data.data();
+ size_t len = data.size();
+ if (len <= 16)
+ return XXH3_len_0to16_64b(in, len, kSecret, 0);
+ if (len <= 128)
+ return XXH3_len_17to128_64b(in, len, kSecret, 0);
+ if (len <= XXH3_MIDSIZE_MAX)
+ return XXH3_len_129to240_64b(in, len, kSecret, 0);
+ return XXH3_hashLong_64b(in, len, kSecret, sizeof(kSecret));
+}
diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp
index 1d5f130737ee..9aee1f8fecd2 100644
--- a/llvm/lib/TableGen/Main.cpp
+++ b/llvm/lib/TableGen/Main.cpp
@@ -15,15 +15,26 @@
//===----------------------------------------------------------------------===//
#include "llvm/TableGen/Main.h"
+#include "TGLexer.h"
#include "TGParser.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
-#include <algorithm>
+#include "llvm/TableGen/TableGenBackend.h"
+#include <memory>
+#include <string>
#include <system_error>
+#include <utility>
+#include <vector>
using namespace llvm;
static cl::opt<std::string>
@@ -85,7 +96,8 @@ static int createDependencyFile(const TGParser &Parser, const char *argv0) {
return 0;
}
-int llvm::TableGenMain(const char *argv0, TableGenMainFn *MainFn) {
+int llvm::TableGenMain(const char *argv0,
+ std::function<TableGenMainFn> MainFn) {
RecordKeeper Records;
if (TimePhases)
@@ -119,7 +131,14 @@ int llvm::TableGenMain(const char *argv0, TableGenMainFn *MainFn) {
Records.startBackendTimer("Backend overall");
std::string OutString;
raw_string_ostream Out(OutString);
- unsigned status = MainFn(Out, Records);
+ unsigned status = 0;
+ TableGen::Emitter::FnT ActionFn = TableGen::Emitter::Action->getValue();
+ if (ActionFn)
+ ActionFn(Records, Out);
+ else if (MainFn)
+ status = MainFn(Out, Records);
+ else
+ return 1;
Records.stopBackendTimer();
if (status)
return 1;
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index 9ea68e2eca51..20db470855a1 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -70,6 +70,7 @@ struct RecordKeeperImpl {
BitInit TrueBitInit;
BitInit FalseBitInit;
+ FoldingSet<ArgumentInit> TheArgumentInitPool;
FoldingSet<BitsInit> TheBitsInitPool;
std::map<int64_t, IntInit *> TheIntInitPool;
StringMap<StringInit *, BumpPtrAllocator &> StringInitStringPool;
@@ -83,8 +84,6 @@ struct RecordKeeperImpl {
FoldingSet<ExistsOpInit> TheExistsOpInitPool;
DenseMap<std::pair<RecTy *, Init *>, VarInit *> TheVarInitPool;
DenseMap<std::pair<TypedInit *, unsigned>, VarBitInit *> TheVarBitInitPool;
- DenseMap<std::pair<TypedInit *, unsigned>, VarListElementInit *>
- TheVarListElementInitPool;
FoldingSet<VarDefInit> TheVarDefInitPool;
DenseMap<std::pair<Init *, StringInit *>, FieldInit *> TheFieldInitPool;
FoldingSet<CondOpInit> TheCondOpInitPool;
@@ -151,12 +150,6 @@ bool BitsRecTy::typeIsConvertibleTo(const RecTy *RHS) const {
return (kind == BitRecTyKind && Size == 1) || (kind == IntRecTyKind);
}
-bool BitsRecTy::typeIsA(const RecTy *RHS) const {
- if (const BitsRecTy *RHSb = dyn_cast<BitsRecTy>(RHS))
- return RHSb->Size == Size;
- return false;
-}
-
IntRecTy *IntRecTy::get(RecordKeeper &RK) {
return &RK.getImpl().SharedIntRecTy;
}
@@ -324,8 +317,11 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
return resolveRecordTypes(RecTy1, RecTy2);
}
+ assert(T1 != nullptr && "Invalid record type");
if (T1->typeIsConvertibleTo(T2))
return T2;
+
+ assert(T2 != nullptr && "Invalid record type");
if (T2->typeIsConvertibleTo(T1))
return T1;
@@ -354,6 +350,8 @@ LLVM_DUMP_METHOD void Init::dump() const { return print(errs()); }
RecordKeeper &Init::getRecordKeeper() const {
if (auto *TyInit = dyn_cast<TypedInit>(this))
return TyInit->getType()->getRecordKeeper();
+ if (auto *ArgInit = dyn_cast<ArgumentInit>(this))
+ return ArgInit->getRecordKeeper();
return cast<UnsetInit>(this)->getRecordKeeper();
}
@@ -369,6 +367,44 @@ Init *UnsetInit::convertInitializerTo(RecTy *Ty) const {
return const_cast<UnsetInit *>(this);
}
+static void ProfileArgumentInit(FoldingSetNodeID &ID, Init *Value,
+ ArgAuxType Aux) {
+ auto I = Aux.index();
+ ID.AddInteger(I);
+ if (I == ArgumentInit::Positional)
+ ID.AddInteger(std::get<ArgumentInit::Positional>(Aux));
+ if (I == ArgumentInit::Named)
+ ID.AddPointer(std::get<ArgumentInit::Named>(Aux));
+ ID.AddPointer(Value);
+}
+
+void ArgumentInit::Profile(FoldingSetNodeID &ID) const {
+ ProfileArgumentInit(ID, Value, Aux);
+}
+
+ArgumentInit *ArgumentInit::get(Init *Value, ArgAuxType Aux) {
+ FoldingSetNodeID ID;
+ ProfileArgumentInit(ID, Value, Aux);
+
+ RecordKeeper &RK = Value->getRecordKeeper();
+ detail::RecordKeeperImpl &RKImpl = RK.getImpl();
+ void *IP = nullptr;
+ if (ArgumentInit *I = RKImpl.TheArgumentInitPool.FindNodeOrInsertPos(ID, IP))
+ return I;
+
+ ArgumentInit *I = new (RKImpl.Allocator) ArgumentInit(Value, Aux);
+ RKImpl.TheArgumentInitPool.InsertNode(I, IP);
+ return I;
+}
+
+Init *ArgumentInit::resolveReferences(Resolver &R) const {
+ Init *NewValue = Value->resolveReferences(R);
+ if (NewValue != Value)
+ return cloneWithValue(NewValue);
+
+ return const_cast<ArgumentInit *>(this);
+}
+
BitInit *BitInit::get(RecordKeeper &RK, bool V) {
return V ? &RK.getImpl().TrueBitInit : &RK.getImpl().FalseBitInit;
}
@@ -676,23 +712,6 @@ Init *ListInit::convertInitializerTo(RecTy *Ty) const {
return nullptr;
}
-Init *ListInit::convertInitListSlice(ArrayRef<unsigned> Elements) const {
- if (Elements.size() == 1) {
- if (Elements[0] >= size())
- return nullptr;
- return getElement(Elements[0]);
- }
-
- SmallVector<Init*, 8> Vals;
- Vals.reserve(Elements.size());
- for (unsigned Element : Elements) {
- if (Element >= size())
- return nullptr;
- Vals.push_back(getElement(Element));
- }
- return ListInit::get(Vals, getElementType());
-}
-
Record *ListInit::getElementAsRecord(unsigned i) const {
assert(i < NumValues && "List element index out of range!");
DefInit *DI = dyn_cast<DefInit>(getElement(i));
@@ -778,6 +797,14 @@ void UnOpInit::Profile(FoldingSetNodeID &ID) const {
Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
RecordKeeper &RK = getRecordKeeper();
switch (getOpcode()) {
+ case TOLOWER:
+ if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
+ return StringInit::get(RK, LHSs->getValue().lower());
+ break;
+ case TOUPPER:
+ if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
+ return StringInit::get(RK, LHSs->getValue().upper());
+ break;
case CAST:
if (isa<StringRecTy>(getType())) {
if (StringInit *LHSs = dyn_cast<StringInit>(LHS))
@@ -792,37 +819,40 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
} else if (isa<RecordRecTy>(getType())) {
if (StringInit *Name = dyn_cast<StringInit>(LHS)) {
- if (!CurRec && !IsFinal)
- break;
- assert(CurRec && "NULL pointer");
- Record *D;
-
- // Self-references are allowed, but their resolution is delayed until
- // the final resolve to ensure that we get the correct type for them.
- auto *Anonymous = dyn_cast<AnonymousNameInit>(CurRec->getNameInit());
- if (Name == CurRec->getNameInit() ||
- (Anonymous && Name == Anonymous->getNameInit())) {
- if (!IsFinal)
- break;
- D = CurRec;
- } else {
- D = CurRec->getRecords().getDef(Name->getValue());
- if (!D) {
- if (IsFinal)
- PrintFatalError(CurRec->getLoc(),
- Twine("Undefined reference to record: '") +
- Name->getValue() + "'\n");
- break;
+ Record *D = RK.getDef(Name->getValue());
+ if (!D && CurRec) {
+ // Self-references are allowed, but their resolution is delayed until
+ // the final resolve to ensure that we get the correct type for them.
+ auto *Anonymous = dyn_cast<AnonymousNameInit>(CurRec->getNameInit());
+ if (Name == CurRec->getNameInit() ||
+ (Anonymous && Name == Anonymous->getNameInit())) {
+ if (!IsFinal)
+ break;
+ D = CurRec;
+ }
+ }
+
+ auto PrintFatalErrorHelper = [CurRec](const Twine &T) {
+ if (CurRec)
+ PrintFatalError(CurRec->getLoc(), T);
+ else
+ PrintFatalError(T);
+ };
+
+ if (!D) {
+ if (IsFinal) {
+ PrintFatalErrorHelper(Twine("Undefined reference to record: '") +
+ Name->getValue() + "'\n");
}
+ break;
}
DefInit *DI = DefInit::get(D);
if (!DI->getType()->typeIsA(getType())) {
- PrintFatalError(CurRec->getLoc(),
- Twine("Expected type '") +
- getType()->getAsString() + "', got '" +
- DI->getType()->getAsString() + "' in: " +
- getAsString() + "\n");
+ PrintFatalErrorHelper(Twine("Expected type '") +
+ getType()->getAsString() + "', got '" +
+ DI->getType()->getAsString() + "' in: " +
+ getAsString() + "\n");
}
return DI;
}
@@ -927,6 +957,12 @@ std::string UnOpInit::getAsString() const {
case EMPTY: Result = "!empty"; break;
case GETDAGOP: Result = "!getdagop"; break;
case LOG2 : Result = "!logtwo"; break;
+ case TOLOWER:
+ Result = "!tolower";
+ break;
+ case TOUPPER:
+ Result = "!toupper";
+ break;
}
return Result + "(" + LHS->getAsString() + ")";
}
@@ -1031,89 +1067,123 @@ Init *BinOpInit::getListConcat(TypedInit *LHS, Init *RHS) {
assert(isa<ListRecTy>(LHS->getType()) && "First arg must be a list");
// Shortcut for the common case of concatenating two lists.
- if (const ListInit *LHSList = dyn_cast<ListInit>(LHS))
- if (const ListInit *RHSList = dyn_cast<ListInit>(RHS))
- return ConcatListInits(LHSList, RHSList);
- return BinOpInit::get(BinOpInit::LISTCONCAT, LHS, RHS, LHS->getType());
-}
-
-std::optional<bool> BinOpInit::CompareInit(unsigned Opc, Init *LHS, Init *RHS) const {
- // First see if we have two bit, bits, or int.
- IntInit *LHSi = dyn_cast_or_null<IntInit>(
- LHS->convertInitializerTo(IntRecTy::get(getRecordKeeper())));
- IntInit *RHSi = dyn_cast_or_null<IntInit>(
- RHS->convertInitializerTo(IntRecTy::get(getRecordKeeper())));
-
- if (LHSi && RHSi) {
- bool Result;
- switch (Opc) {
- case EQ:
- Result = LHSi->getValue() == RHSi->getValue();
- break;
- case NE:
- Result = LHSi->getValue() != RHSi->getValue();
- break;
- case LE:
- Result = LHSi->getValue() <= RHSi->getValue();
- break;
- case LT:
- Result = LHSi->getValue() < RHSi->getValue();
- break;
- case GE:
- Result = LHSi->getValue() >= RHSi->getValue();
- break;
- case GT:
- Result = LHSi->getValue() > RHSi->getValue();
- break;
- default:
- llvm_unreachable("unhandled comparison");
- }
- return Result;
- }
-
- // Next try strings.
- StringInit *LHSs = dyn_cast<StringInit>(LHS);
- StringInit *RHSs = dyn_cast<StringInit>(RHS);
-
- if (LHSs && RHSs) {
- bool Result;
- switch (Opc) {
- case EQ:
- Result = LHSs->getValue() == RHSs->getValue();
- break;
- case NE:
- Result = LHSs->getValue() != RHSs->getValue();
- break;
- case LE:
- Result = LHSs->getValue() <= RHSs->getValue();
- break;
- case LT:
- Result = LHSs->getValue() < RHSs->getValue();
- break;
- case GE:
- Result = LHSs->getValue() >= RHSs->getValue();
- break;
- case GT:
- Result = LHSs->getValue() > RHSs->getValue();
- break;
- default:
- llvm_unreachable("unhandled comparison");
- }
- return Result;
- }
-
- // Finally, !eq and !ne can be used with records.
- if (Opc == EQ || Opc == NE) {
- DefInit *LHSd = dyn_cast<DefInit>(LHS);
- DefInit *RHSd = dyn_cast<DefInit>(RHS);
- if (LHSd && RHSd)
- return (Opc == EQ) ? LHSd == RHSd : LHSd != RHSd;
- }
-
- return std::nullopt;
-}
-
- Init *BinOpInit::Fold(Record *CurRec) const {
+ if (const ListInit *LHSList = dyn_cast<ListInit>(LHS))
+ if (const ListInit *RHSList = dyn_cast<ListInit>(RHS))
+ return ConcatListInits(LHSList, RHSList);
+ return BinOpInit::get(BinOpInit::LISTCONCAT, LHS, RHS, LHS->getType());
+}
+
+std::optional<bool> BinOpInit::CompareInit(unsigned Opc, Init *LHS,
+ Init *RHS) const {
+ // First see if we have two bit, bits, or int.
+ IntInit *LHSi = dyn_cast_or_null<IntInit>(
+ LHS->convertInitializerTo(IntRecTy::get(getRecordKeeper())));
+ IntInit *RHSi = dyn_cast_or_null<IntInit>(
+ RHS->convertInitializerTo(IntRecTy::get(getRecordKeeper())));
+
+ if (LHSi && RHSi) {
+ bool Result;
+ switch (Opc) {
+ case EQ:
+ Result = LHSi->getValue() == RHSi->getValue();
+ break;
+ case NE:
+ Result = LHSi->getValue() != RHSi->getValue();
+ break;
+ case LE:
+ Result = LHSi->getValue() <= RHSi->getValue();
+ break;
+ case LT:
+ Result = LHSi->getValue() < RHSi->getValue();
+ break;
+ case GE:
+ Result = LHSi->getValue() >= RHSi->getValue();
+ break;
+ case GT:
+ Result = LHSi->getValue() > RHSi->getValue();
+ break;
+ default:
+ llvm_unreachable("unhandled comparison");
+ }
+ return Result;
+ }
+
+ // Next try strings.
+ StringInit *LHSs = dyn_cast<StringInit>(LHS);
+ StringInit *RHSs = dyn_cast<StringInit>(RHS);
+
+ if (LHSs && RHSs) {
+ bool Result;
+ switch (Opc) {
+ case EQ:
+ Result = LHSs->getValue() == RHSs->getValue();
+ break;
+ case NE:
+ Result = LHSs->getValue() != RHSs->getValue();
+ break;
+ case LE:
+ Result = LHSs->getValue() <= RHSs->getValue();
+ break;
+ case LT:
+ Result = LHSs->getValue() < RHSs->getValue();
+ break;
+ case GE:
+ Result = LHSs->getValue() >= RHSs->getValue();
+ break;
+ case GT:
+ Result = LHSs->getValue() > RHSs->getValue();
+ break;
+ default:
+ llvm_unreachable("unhandled comparison");
+ }
+ return Result;
+ }
+
+ // Finally, !eq and !ne can be used with records.
+ if (Opc == EQ || Opc == NE) {
+ DefInit *LHSd = dyn_cast<DefInit>(LHS);
+ DefInit *RHSd = dyn_cast<DefInit>(RHS);
+ if (LHSd && RHSd)
+ return (Opc == EQ) ? LHSd == RHSd : LHSd != RHSd;
+ }
+
+ return std::nullopt;
+}
+
+static std::optional<unsigned> getDagArgNoByKey(DagInit *Dag, Init *Key,
+ std::string &Error) {
+ // Accessor by index
+ if (IntInit *Idx = dyn_cast<IntInit>(Key)) {
+ int64_t Pos = Idx->getValue();
+ if (Pos < 0) {
+ // The index is negative.
+ Error =
+ (Twine("index ") + std::to_string(Pos) + Twine(" is negative")).str();
+ return std::nullopt;
+ }
+ if (Pos >= Dag->getNumArgs()) {
+ // The index is out-of-range.
+ Error = (Twine("index ") + std::to_string(Pos) +
+ " is out of range (dag has " +
+ std::to_string(Dag->getNumArgs()) + " arguments)")
+ .str();
+ return std::nullopt;
+ }
+ return Pos;
+ }
+ assert(isa<StringInit>(Key));
+ // Accessor by name
+ StringInit *Name = dyn_cast<StringInit>(Key);
+ auto ArgNo = Dag->getArgNo(Name->getValue());
+ if (!ArgNo) {
+ // The key is not found.
+ Error = (Twine("key '") + Name->getValue() + Twine("' is not found")).str();
+ return std::nullopt;
+ }
+ return *ArgNo;
+}
+
+Init *BinOpInit::Fold(Record *CurRec) const {
switch (getOpcode()) {
case CONCAT: {
DagInit *LHSs = dyn_cast<DagInit>(LHS);
@@ -1189,6 +1259,67 @@ std::optional<bool> BinOpInit::CompareInit(unsigned Opc, Init *LHS, Init *RHS) c
}
break;
}
+ case LISTELEM: {
+ auto *TheList = dyn_cast<ListInit>(LHS);
+ auto *Idx = dyn_cast<IntInit>(RHS);
+ if (!TheList || !Idx)
+ break;
+ auto i = Idx->getValue();
+ if (i < 0 || i >= (ssize_t)TheList->size())
+ break;
+ return TheList->getElement(i);
+ }
+ case LISTSLICE: {
+ auto *TheList = dyn_cast<ListInit>(LHS);
+ auto *SliceIdxs = dyn_cast<ListInit>(RHS);
+ if (!TheList || !SliceIdxs)
+ break;
+ SmallVector<Init *, 8> Args;
+ Args.reserve(SliceIdxs->size());
+ for (auto *I : *SliceIdxs) {
+ auto *II = dyn_cast<IntInit>(I);
+ if (!II)
+ goto unresolved;
+ auto i = II->getValue();
+ if (i < 0 || i >= (ssize_t)TheList->size())
+ goto unresolved;
+ Args.push_back(TheList->getElement(i));
+ }
+ return ListInit::get(Args, TheList->getElementType());
+ }
+ case RANGE:
+ case RANGEC: {
+ auto *LHSi = dyn_cast<IntInit>(LHS);
+ auto *RHSi = dyn_cast<IntInit>(RHS);
+ if (!LHSi || !RHSi)
+ break;
+
+ auto Start = LHSi->getValue();
+ auto End = RHSi->getValue();
+ SmallVector<Init *, 8> Args;
+ if (getOpcode() == RANGEC) {
+ // Closed interval
+ if (Start <= End) {
+ // Ascending order
+ Args.reserve(End - Start + 1);
+ for (auto i = Start; i <= End; ++i)
+ Args.push_back(IntInit::get(getRecordKeeper(), i));
+ } else {
+ // Descending order
+ Args.reserve(Start - End + 1);
+ for (auto i = Start; i >= End; --i)
+ Args.push_back(IntInit::get(getRecordKeeper(), i));
+ }
+ } else if (Start < End) {
+ // Half-open interval (excludes `End`)
+ Args.reserve(End - Start);
+ for (auto i = Start; i < End; ++i)
+ Args.push_back(IntInit::get(getRecordKeeper(), i));
+ } else {
+ // Empty set
+ }
+ return ListInit::get(Args, LHSi->getType());
+ }
case STRCONCAT: {
StringInit *LHSs = dyn_cast<StringInit>(LHS);
StringInit *RHSs = dyn_cast<StringInit>(RHS);
@@ -1220,6 +1351,43 @@ std::optional<bool> BinOpInit::CompareInit(unsigned Opc, Init *LHS, Init *RHS) c
return BitInit::get(getRecordKeeper(), *Result);
break;
}
+ case GETDAGARG: {
+ DagInit *Dag = dyn_cast<DagInit>(LHS);
+ if (Dag && isa<IntInit, StringInit>(RHS)) {
+ std::string Error;
+ auto ArgNo = getDagArgNoByKey(Dag, RHS, Error);
+ if (!ArgNo)
+ PrintFatalError(CurRec->getLoc(), "!getdagarg " + Error);
+
+ assert(*ArgNo < Dag->getNumArgs());
+
+ Init *Arg = Dag->getArg(*ArgNo);
+ if (auto *TI = dyn_cast<TypedInit>(Arg))
+ if (!TI->getType()->typeIsConvertibleTo(getType()))
+ return UnsetInit::get(Dag->getRecordKeeper());
+ return Arg;
+ }
+ break;
+ }
+ case GETDAGNAME: {
+ DagInit *Dag = dyn_cast<DagInit>(LHS);
+ IntInit *Idx = dyn_cast<IntInit>(RHS);
+ if (Dag && Idx) {
+ int64_t Pos = Idx->getValue();
+ if (Pos < 0 || Pos >= Dag->getNumArgs()) {
+ // The index is out-of-range.
+ PrintError(CurRec->getLoc(),
+ Twine("!getdagname index is out of range 0...") +
+ std::to_string(Dag->getNumArgs() - 1) + ": " +
+ std::to_string(Pos));
+ }
+ Init *ArgName = Dag->getArgName(Pos);
+ if (!ArgName)
+ return UnsetInit::get(getRecordKeeper());
+ return ArgName;
+ }
+ break;
+ }
case SETDAGOP: {
DagInit *Dag = dyn_cast<DagInit>(LHS);
DefInit *Op = dyn_cast<DefInit>(RHS);
@@ -1278,6 +1446,7 @@ std::optional<bool> BinOpInit::CompareInit(unsigned Opc, Init *LHS, Init *RHS) c
break;
}
}
+unresolved:
return const_cast<BinOpInit *>(this);
}
@@ -1294,6 +1463,11 @@ Init *BinOpInit::resolveReferences(Resolver &R) const {
std::string BinOpInit::getAsString() const {
std::string Result;
switch (getOpcode()) {
+ case LISTELEM:
+ case LISTSLICE:
+ return LHS->getAsString() + "[" + RHS->getAsString() + "]";
+ case RANGEC:
+ return LHS->getAsString() + "..." + RHS->getAsString();
case CONCAT: Result = "!con"; break;
case ADD: Result = "!add"; break;
case SUB: Result = "!sub"; break;
@@ -1314,9 +1488,16 @@ std::string BinOpInit::getAsString() const {
case LISTCONCAT: Result = "!listconcat"; break;
case LISTSPLAT: Result = "!listsplat"; break;
case LISTREMOVE: Result = "!listremove"; break;
+ case RANGE: Result = "!range"; break;
case STRCONCAT: Result = "!strconcat"; break;
case INTERLEAVE: Result = "!interleave"; break;
case SETDAGOP: Result = "!setdagop"; break;
+ case GETDAGARG:
+ Result = "!getdagarg<" + getType()->getAsString() + ">";
+ break;
+ case GETDAGNAME:
+ Result = "!getdagname";
+ break;
}
return Result + "(" + LHS->getAsString() + ", " + RHS->getAsString() + ")";
}
@@ -1563,6 +1744,42 @@ Init *TernOpInit::Fold(Record *CurRec) const {
}
break;
}
+
+ case SETDAGARG: {
+ DagInit *Dag = dyn_cast<DagInit>(LHS);
+ if (Dag && isa<IntInit, StringInit>(MHS)) {
+ std::string Error;
+ auto ArgNo = getDagArgNoByKey(Dag, MHS, Error);
+ if (!ArgNo)
+ PrintFatalError(CurRec->getLoc(), "!setdagarg " + Error);
+
+ assert(*ArgNo < Dag->getNumArgs());
+
+ SmallVector<Init *, 8> Args(Dag->getArgs());
+ SmallVector<StringInit *, 8> Names(Dag->getArgNames());
+ Args[*ArgNo] = RHS;
+ return DagInit::get(Dag->getOperator(), Dag->getName(), Args, Names);
+ }
+ break;
+ }
+
+ case SETDAGNAME: {
+ DagInit *Dag = dyn_cast<DagInit>(LHS);
+ if (Dag && isa<IntInit, StringInit>(MHS)) {
+ std::string Error;
+ auto ArgNo = getDagArgNoByKey(Dag, MHS, Error);
+ if (!ArgNo)
+ PrintFatalError(CurRec->getLoc(), "!setdagname " + Error);
+
+ assert(*ArgNo < Dag->getNumArgs());
+
+ SmallVector<Init *, 8> Args(Dag->getArgs());
+ SmallVector<StringInit *, 8> Names(Dag->getArgNames());
+ Names[*ArgNo] = dyn_cast<StringInit>(RHS);
+ return DagInit::get(Dag->getOperator(), Dag->getName(), Args, Names);
+ }
+ break;
+ }
}
return const_cast<TernOpInit *>(this);
@@ -1609,6 +1826,12 @@ std::string TernOpInit::getAsString() const {
case SUBST: Result = "!subst"; break;
case SUBSTR: Result = "!substr"; break;
case FIND: Result = "!find"; break;
+ case SETDAGARG:
+ Result = "!setdagarg";
+ break;
+ case SETDAGNAME:
+ Result = "!setdagname";
+ break;
}
return (Result + "(" +
(UnquotedLHS ? LHS->getAsUnquotedString() : LHS->getAsString()) +
@@ -1772,34 +1995,34 @@ void ExistsOpInit::Profile(FoldingSetNodeID &ID) const {
Init *ExistsOpInit::Fold(Record *CurRec, bool IsFinal) const {
if (StringInit *Name = dyn_cast<StringInit>(Expr)) {
- if (!CurRec && !IsFinal)
- return const_cast<ExistsOpInit *>(this);
-
- // Self-references are allowed, but their resolution is delayed until
- // the final resolve to ensure that we get the correct type for them.
- auto *Anonymous = dyn_cast<AnonymousNameInit>(CurRec->getNameInit());
- if (Name == CurRec->getNameInit() ||
- (Anonymous && Name == Anonymous->getNameInit())) {
- if (!IsFinal)
- return const_cast<ExistsOpInit *>(this);
-
- // No doubt that there exists a record, so we should check if types are
- // compatiable.
- return IntInit::get(getRecordKeeper(),
- CurRec->getType()->typeIsA(CheckType));
- }
// Look up all defined records to see if we can find one.
Record *D = CheckType->getRecordKeeper().getDef(Name->getValue());
- if (!D) {
- if (IsFinal)
- return IntInit::get(getRecordKeeper(), 0);
- return const_cast<ExistsOpInit *>(this);
+ if (D) {
+ // Check if types are compatible.
+ return IntInit::get(getRecordKeeper(),
+ DefInit::get(D)->getType()->typeIsA(CheckType));
}
- // Check if types are compatiable.
- return IntInit::get(getRecordKeeper(),
- DefInit::get(D)->getType()->typeIsA(CheckType));
+ if (CurRec) {
+ // Self-references are allowed, but their resolution is delayed until
+ // the final resolve to ensure that we get the correct type for them.
+ auto *Anonymous = dyn_cast<AnonymousNameInit>(CurRec->getNameInit());
+ if (Name == CurRec->getNameInit() ||
+ (Anonymous && Name == Anonymous->getNameInit())) {
+ if (!IsFinal)
+ return const_cast<ExistsOpInit *>(this);
+
+ // No doubt that there exists a record, so we should check if types are
+ // compatible.
+ return IntInit::get(getRecordKeeper(),
+ CurRec->getType()->typeIsA(CheckType));
+ }
+ }
+
+ if (IsFinal)
+ return IntInit::get(getRecordKeeper(), 0);
+ return const_cast<ExistsOpInit *>(this);
}
return const_cast<ExistsOpInit *>(this);
}
@@ -1877,22 +2100,6 @@ Init *TypedInit::getCastTo(RecTy *Ty) const {
->Fold(nullptr);
}
-Init *TypedInit::convertInitListSlice(ArrayRef<unsigned> Elements) const {
- ListRecTy *T = dyn_cast<ListRecTy>(getType());
- if (!T) return nullptr; // Cannot subscript a non-list variable.
-
- if (Elements.size() == 1)
- return VarListElementInit::get(const_cast<TypedInit *>(this), Elements[0]);
-
- SmallVector<Init*, 8> ListInits;
- ListInits.reserve(Elements.size());
- for (unsigned Element : Elements)
- ListInits.push_back(VarListElementInit::get(const_cast<TypedInit *>(this),
- Element));
- return ListInit::get(ListInits, T->getElementType());
-}
-
-
VarInit *VarInit::get(StringRef VN, RecTy *T) {
Init *Value = StringInit::get(T->getRecordKeeper(), VN);
return VarInit::get(Value, T);
@@ -1943,37 +2150,6 @@ Init *VarBitInit::resolveReferences(Resolver &R) const {
return const_cast<VarBitInit*>(this);
}
-VarListElementInit *VarListElementInit::get(TypedInit *T, unsigned E) {
- detail::RecordKeeperImpl &RK = T->getRecordKeeper().getImpl();
- VarListElementInit *&I = RK.TheVarListElementInitPool[std::make_pair(T, E)];
- if (!I)
- I = new (RK.Allocator) VarListElementInit(T, E);
- return I;
-}
-
-std::string VarListElementInit::getAsString() const {
- return TI->getAsString() + "[" + utostr(Element) + "]";
-}
-
-Init *VarListElementInit::resolveReferences(Resolver &R) const {
- Init *NewTI = TI->resolveReferences(R);
- if (ListInit *List = dyn_cast<ListInit>(NewTI)) {
- // Leave out-of-bounds array references as-is. This can happen without
- // being an error, e.g. in the untaken "branch" of an !if expression.
- if (getElementNum() < List->size())
- return List->getElement(getElementNum());
- }
- if (NewTI != TI && isa<TypedInit>(NewTI))
- return VarListElementInit::get(cast<TypedInit>(NewTI), getElementNum());
- return const_cast<VarListElementInit *>(this);
-}
-
-Init *VarListElementInit::getBit(unsigned Bit) const {
- if (getType() == BitRecTy::get(getRecordKeeper()))
- return const_cast<VarListElementInit*>(this);
- return VarBitInit::get(const_cast<VarListElementInit*>(this), Bit);
-}
-
DefInit::DefInit(Record *D)
: TypedInit(IK_DefInit, D->getType()), Def(D) {}
@@ -1996,9 +2172,8 @@ RecTy *DefInit::getFieldType(StringInit *FieldName) const {
std::string DefInit::getAsString() const { return std::string(Def->getName()); }
-static void ProfileVarDefInit(FoldingSetNodeID &ID,
- Record *Class,
- ArrayRef<Init *> Args) {
+static void ProfileVarDefInit(FoldingSetNodeID &ID, Record *Class,
+ ArrayRef<ArgumentInit *> Args) {
ID.AddInteger(Args.size());
ID.AddPointer(Class);
@@ -2010,7 +2185,7 @@ VarDefInit::VarDefInit(Record *Class, unsigned N)
: TypedInit(IK_VarDefInit, RecordRecTy::get(Class)), Class(Class),
NumArgs(N) {}
-VarDefInit *VarDefInit::get(Record *Class, ArrayRef<Init *> Args) {
+VarDefInit *VarDefInit::get(Record *Class, ArrayRef<ArgumentInit *> Args) {
FoldingSetNodeID ID;
ProfileVarDefInit(ID, Class, Args);
@@ -2019,11 +2194,11 @@ VarDefInit *VarDefInit::get(Record *Class, ArrayRef<Init *> Args) {
if (VarDefInit *I = RK.TheVarDefInitPool.FindNodeOrInsertPos(ID, IP))
return I;
- void *Mem = RK.Allocator.Allocate(totalSizeToAlloc<Init *>(Args.size()),
- alignof(VarDefInit));
+ void *Mem = RK.Allocator.Allocate(
+ totalSizeToAlloc<ArgumentInit *>(Args.size()), alignof(VarDefInit));
VarDefInit *I = new (Mem) VarDefInit(Class, Args.size());
std::uninitialized_copy(Args.begin(), Args.end(),
- I->getTrailingObjects<Init *>());
+ I->getTrailingObjects<ArgumentInit *>());
RK.TheVarDefInitPool.InsertNode(I, IP);
return I;
}
@@ -2051,13 +2226,16 @@ DefInit *VarDefInit::instantiate() {
ArrayRef<Init *> TArgs = Class->getTemplateArgs();
MapResolver R(NewRec);
- for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
- if (i < args_size())
- R.set(TArgs[i], getArg(i));
- else
- R.set(TArgs[i], NewRec->getValue(TArgs[i])->getValue());
+ for (unsigned I = 0, E = TArgs.size(); I != E; ++I) {
+ R.set(TArgs[I], NewRec->getValue(TArgs[I])->getValue());
+ NewRec->removeValue(TArgs[I]);
+ }
- NewRec->removeValue(TArgs[i]);
+ for (auto *Arg : args()) {
+ if (Arg->isPositional())
+ R.set(TArgs[Arg->getIndex()], Arg->getValue());
+ if (Arg->isNamed())
+ R.set(Arg->getName(), Arg->getValue());
}
NewRec->resolveReferences(R);
@@ -2087,11 +2265,11 @@ DefInit *VarDefInit::instantiate() {
Init *VarDefInit::resolveReferences(Resolver &R) const {
TrackUnresolvedResolver UR(&R);
bool Changed = false;
- SmallVector<Init *, 8> NewArgs;
+ SmallVector<ArgumentInit *, 8> NewArgs;
NewArgs.reserve(args_size());
- for (Init *Arg : args()) {
- Init *NewArg = Arg->resolveReferences(UR);
+ for (ArgumentInit *Arg : args()) {
+ auto *NewArg = cast<ArgumentInit>(Arg->resolveReferences(UR));
NewArgs.push_back(NewArg);
Changed |= NewArg != Arg;
}
@@ -2259,7 +2437,7 @@ Init *CondOpInit::Fold(Record *CurRec) const {
}
PrintFatalError(CurRec->getLoc(),
- CurRec->getName() +
+ CurRec->getNameInitAsString() +
" does not have any true condition in:" +
this->getAsString());
return nullptr;
@@ -2370,6 +2548,15 @@ Record *DagInit::getOperatorAsDef(ArrayRef<SMLoc> Loc) const {
return nullptr;
}
+std::optional<unsigned> DagInit::getArgNo(StringRef Name) const {
+ for (unsigned i = 0, e = getNumArgs(); i < e; ++i) {
+ StringInit *ArgName = getArgName(i);
+ if (ArgName && ArgName->getValue() == Name)
+ return i;
+ }
+ return std::nullopt;
+}
+
Init *DagInit::resolveReferences(Resolver &R) const {
SmallVector<Init*, 8> NewArgs;
NewArgs.reserve(arg_size());
@@ -2989,6 +3176,10 @@ std::vector<Record *> RecordKeeper::getAllDerivedDefinitions(
Defs.push_back(OneDef.second.get());
}
+ llvm::sort(Defs, [](Record *LHS, Record *RHS) {
+ return LHS->getName().compare_numeric(RHS->getName()) < 0;
+ });
+
return Defs;
}
diff --git a/llvm/lib/TableGen/TGLexer.cpp b/llvm/lib/TableGen/TGLexer.cpp
index f2148b40a1b5..98f0e8c1149c 100644
--- a/llvm/lib/TableGen/TGLexer.cpp
+++ b/llvm/lib/TableGen/TGLexer.cpp
@@ -549,50 +549,57 @@ tgtok::TokKind TGLexer::LexExclaim() {
// Check to see which operator this is.
tgtok::TokKind Kind =
- StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
- .Case("eq", tgtok::XEq)
- .Case("ne", tgtok::XNe)
- .Case("le", tgtok::XLe)
- .Case("lt", tgtok::XLt)
- .Case("ge", tgtok::XGe)
- .Case("gt", tgtok::XGt)
- .Case("if", tgtok::XIf)
- .Case("cond", tgtok::XCond)
- .Case("isa", tgtok::XIsA)
- .Case("head", tgtok::XHead)
- .Case("tail", tgtok::XTail)
- .Case("size", tgtok::XSize)
- .Case("con", tgtok::XConcat)
- .Case("dag", tgtok::XDag)
- .Case("add", tgtok::XADD)
- .Case("sub", tgtok::XSUB)
- .Case("mul", tgtok::XMUL)
- .Case("div", tgtok::XDIV)
- .Case("not", tgtok::XNOT)
- .Case("logtwo", tgtok::XLOG2)
- .Case("and", tgtok::XAND)
- .Case("or", tgtok::XOR)
- .Case("xor", tgtok::XXOR)
- .Case("shl", tgtok::XSHL)
- .Case("sra", tgtok::XSRA)
- .Case("srl", tgtok::XSRL)
- .Case("cast", tgtok::XCast)
- .Case("empty", tgtok::XEmpty)
- .Case("subst", tgtok::XSubst)
- .Case("foldl", tgtok::XFoldl)
- .Case("foreach", tgtok::XForEach)
- .Case("filter", tgtok::XFilter)
- .Case("listconcat", tgtok::XListConcat)
- .Case("listsplat", tgtok::XListSplat)
- .Case("listremove", tgtok::XListRemove)
- .Case("strconcat", tgtok::XStrConcat)
- .Case("interleave", tgtok::XInterleave)
- .Case("substr", tgtok::XSubstr)
- .Case("find", tgtok::XFind)
- .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
- .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.
- .Case("exists", tgtok::XExists)
- .Default(tgtok::Error);
+ StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
+ .Case("eq", tgtok::XEq)
+ .Case("ne", tgtok::XNe)
+ .Case("le", tgtok::XLe)
+ .Case("lt", tgtok::XLt)
+ .Case("ge", tgtok::XGe)
+ .Case("gt", tgtok::XGt)
+ .Case("if", tgtok::XIf)
+ .Case("cond", tgtok::XCond)
+ .Case("isa", tgtok::XIsA)
+ .Case("head", tgtok::XHead)
+ .Case("tail", tgtok::XTail)
+ .Case("size", tgtok::XSize)
+ .Case("con", tgtok::XConcat)
+ .Case("dag", tgtok::XDag)
+ .Case("add", tgtok::XADD)
+ .Case("sub", tgtok::XSUB)
+ .Case("mul", tgtok::XMUL)
+ .Case("div", tgtok::XDIV)
+ .Case("not", tgtok::XNOT)
+ .Case("logtwo", tgtok::XLOG2)
+ .Case("and", tgtok::XAND)
+ .Case("or", tgtok::XOR)
+ .Case("xor", tgtok::XXOR)
+ .Case("shl", tgtok::XSHL)
+ .Case("sra", tgtok::XSRA)
+ .Case("srl", tgtok::XSRL)
+ .Case("cast", tgtok::XCast)
+ .Case("empty", tgtok::XEmpty)
+ .Case("subst", tgtok::XSubst)
+ .Case("foldl", tgtok::XFoldl)
+ .Case("foreach", tgtok::XForEach)
+ .Case("filter", tgtok::XFilter)
+ .Case("listconcat", tgtok::XListConcat)
+ .Case("listsplat", tgtok::XListSplat)
+ .Case("listremove", tgtok::XListRemove)
+ .Case("range", tgtok::XRange)
+ .Case("strconcat", tgtok::XStrConcat)
+ .Case("interleave", tgtok::XInterleave)
+ .Case("substr", tgtok::XSubstr)
+ .Case("find", tgtok::XFind)
+ .Cases("setdagop", "setop", tgtok::XSetDagOp) // !setop is deprecated.
+ .Cases("getdagop", "getop", tgtok::XGetDagOp) // !getop is deprecated.
+ .Case("getdagarg", tgtok::XGetDagArg)
+ .Case("getdagname", tgtok::XGetDagName)
+ .Case("setdagarg", tgtok::XSetDagArg)
+ .Case("setdagname", tgtok::XSetDagName)
+ .Case("exists", tgtok::XExists)
+ .Case("tolower", tgtok::XToLower)
+ .Case("toupper", tgtok::XToUpper)
+ .Default(tgtok::Error);
return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
}
diff --git a/llvm/lib/TableGen/TGLexer.h b/llvm/lib/TableGen/TGLexer.h
index 284f1bade9de..c9bba98971d0 100644
--- a/llvm/lib/TableGen/TGLexer.h
+++ b/llvm/lib/TableGen/TGLexer.h
@@ -29,52 +29,134 @@ class SourceMgr;
class Twine;
namespace tgtok {
- enum TokKind {
- // Markers
- Eof, Error,
-
- // Tokens with no info.
- minus, plus, // - +
- l_square, r_square, // [ ]
- l_brace, r_brace, // { }
- l_paren, r_paren, // ( )
- less, greater, // < >
- colon, semi, // : ;
- comma, dot, // , .
- equal, question, // = ?
- paste, // #
- dotdotdot, // ...
-
- // Reserved keywords. ('ElseKW' is named to distinguish it from the
- // existing 'Else' that means the preprocessor #else.)
- Assert, Bit, Bits, Class, Code, Dag, Def, Defm, Defset, Defvar, ElseKW,
- FalseKW, Field, Foreach, If, In, Include, Int, Let, List, MultiClass,
- String, Then, TrueKW,
-
- // Bang operators.
- XConcat, XADD, XSUB, XMUL, XDIV, XNOT, XLOG2, XAND, XOR, XXOR, XSRA, XSRL,
- XSHL, XListConcat, XListSplat, XStrConcat, XInterleave, XSubstr, XFind,
- XCast, XSubst, XForEach, XFilter, XFoldl, XHead, XTail, XSize, XEmpty, XIf,
- XCond, XEq, XIsA, XDag, XNe, XLe, XLt, XGe, XGt, XSetDagOp, XGetDagOp,
- XExists, XListRemove,
-
- // Boolean literals.
- TrueVal, FalseVal,
-
- // Integer value.
- IntVal,
-
- // Binary constant. Note that these are sized according to the number of
- // bits given.
- BinaryIntVal,
-
- // String valued tokens.
- Id, StrVal, VarName, CodeFragment,
-
- // Preprocessing tokens for internal usage by the lexer.
- // They are never returned as a result of Lex().
- Ifdef, Ifndef, Else, Endif, Define
- };
+enum TokKind {
+ // Markers
+ Eof,
+ Error,
+
+ // Tokens with no info.
+ minus, // -
+ plus, // +
+ l_square, // [
+ r_square, // ]
+ l_brace, // {
+ r_brace, // }
+ l_paren, // (
+ r_paren, // )
+ less, // <
+ greater, // >
+ colon, // :
+ semi, // ;
+ comma, // ,
+ dot, // .
+ equal, // =
+ question, // ?
+ paste, // #
+ dotdotdot, // ...
+
+ // Reserved keywords. ('ElseKW' is named to distinguish it from the
+ // existing 'Else' that means the preprocessor #else.)
+ Assert,
+ Bit,
+ Bits,
+ Class,
+ Code,
+ Dag,
+ Def,
+ Defm,
+ Defset,
+ Defvar,
+ ElseKW,
+ FalseKW,
+ Field,
+ Foreach,
+ If,
+ In,
+ Include,
+ Int,
+ Let,
+ List,
+ MultiClass,
+ String,
+ Then,
+ TrueKW,
+
+ // Bang operators.
+ XConcat,
+ XADD,
+ XSUB,
+ XMUL,
+ XDIV,
+ XNOT,
+ XLOG2,
+ XAND,
+ XOR,
+ XXOR,
+ XSRA,
+ XSRL,
+ XSHL,
+ XListConcat,
+ XListSplat,
+ XStrConcat,
+ XInterleave,
+ XSubstr,
+ XFind,
+ XCast,
+ XSubst,
+ XForEach,
+ XFilter,
+ XFoldl,
+ XHead,
+ XTail,
+ XSize,
+ XEmpty,
+ XIf,
+ XCond,
+ XEq,
+ XIsA,
+ XDag,
+ XNe,
+ XLe,
+ XLt,
+ XGe,
+ XGt,
+ XSetDagOp,
+ XGetDagOp,
+ XExists,
+ XListRemove,
+ XToLower,
+ XToUpper,
+ XRange,
+ XGetDagArg,
+ XGetDagName,
+ XSetDagArg,
+ XSetDagName,
+
+ // Boolean literals.
+ TrueVal,
+ FalseVal,
+
+ // Integer value.
+ IntVal,
+
+ // Binary constant. Note that these are sized according to the number of
+ // bits given.
+ BinaryIntVal,
+
+ // String valued tokens.
+ Id,
+ StrVal,
+ VarName,
+ CodeFragment,
+
+ // Preprocessing tokens for internal usage by the lexer.
+ // They are never returned as a result of Lex().
+ Ifdef,
+ Ifndef,
+ Else,
+ Endif,
+ Define
+};
}
/// TGLexer - TableGen Lexer class.
diff --git a/llvm/lib/TableGen/TGParser.cpp b/llvm/lib/TableGen/TGParser.cpp
index 7fc46a8b4a87..759e15f4c443 100644
--- a/llvm/lib/TableGen/TGParser.cpp
+++ b/llvm/lib/TableGen/TGParser.cpp
@@ -36,7 +36,7 @@ namespace llvm {
struct SubClassReference {
SMRange RefRange;
Record *Rec;
- SmallVector<Init*, 4> TemplateArgs;
+ SmallVector<ArgumentInit *, 4> TemplateArgs;
SubClassReference() : Rec(nullptr) {}
@@ -46,7 +46,7 @@ struct SubClassReference {
struct SubMultiClassReference {
SMRange RefRange;
MultiClass *MC;
- SmallVector<Init*, 4> TemplateArgs;
+ SmallVector<ArgumentInit *, 4> TemplateArgs;
SubMultiClassReference() : MC(nullptr) {}
@@ -117,11 +117,6 @@ static Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass, Init *Name,
Init *NewName = BinOpInit::getStrConcat(CurRec.getNameInit(),
StringInit::get(RK, Scoper));
NewName = BinOpInit::getStrConcat(NewName, Name);
- if (CurMultiClass && Scoper != "::") {
- Init *Prefix = BinOpInit::getStrConcat(CurMultiClass->Rec.getNameInit(),
- StringInit::get(RK, "::"));
- NewName = BinOpInit::getStrConcat(Prefix, NewName);
- }
if (BinOpInit *BinOp = dyn_cast<BinOpInit>(NewName))
NewName = BinOp->Fold(&CurRec);
@@ -139,6 +134,79 @@ static Init *QualifiedNameOfImplicitName(MultiClass *MC) {
return QualifiedNameOfImplicitName(MC->Rec, MC);
}
+Init *TGVarScope::getVar(RecordKeeper &Records, MultiClass* ParsingMultiClass,
+ StringInit *Name, SMRange NameLoc,
+ bool TrackReferenceLocs) const {
+ // First, we search in local variables.
+ auto It = Vars.find(Name->getValue());
+ if (It != Vars.end())
+ return It->second;
+
+ std::function<Init *(Record *, StringInit *, StringRef)> FindValueInArgs =
+ [&](Record *Rec, StringInit *Name, StringRef Scoper) -> Init * {
+ if (!Rec)
+ return nullptr;
+ Init *ArgName = QualifyName(*Rec, ParsingMultiClass, Name, Scoper);
+ if (Rec->isTemplateArg(ArgName)) {
+ RecordVal *RV = Rec->getValue(ArgName);
+ assert(RV && "Template arg doesn't exist??");
+ RV->setUsed(true);
+ if (TrackReferenceLocs)
+ RV->addReferenceLoc(NameLoc);
+ return VarInit::get(ArgName, RV->getType());
+ }
+ return Name->getValue() == "NAME"
+ ? VarInit::get(ArgName, StringRecTy::get(Records))
+ : nullptr;
+ };
+
+ // If not found, we try to find the variable in additional variables like
+ // arguments, loop iterator, etc.
+ switch (Kind) {
+ case SK_Local:
+ break; /* do nothing. */
+ case SK_Record: {
+ if (CurRec) {
+ // The variable is a record field?
+ if (RecordVal *RV = CurRec->getValue(Name)) {
+ if (TrackReferenceLocs)
+ RV->addReferenceLoc(NameLoc);
+ return VarInit::get(Name, RV->getType());
+ }
+
+ // The variable is a class template argument?
+ if (CurRec->isClass())
+ if (auto *V = FindValueInArgs(CurRec, Name, ":"))
+ return V;
+ }
+ break;
+ }
+ case SK_ForeachLoop: {
+ // The variable is a loop iterator?
+ if (CurLoop->IterVar) {
+ VarInit *IterVar = dyn_cast<VarInit>(CurLoop->IterVar);
+ if (IterVar && IterVar->getNameInit() == Name)
+ return IterVar;
+ }
+ break;
+ }
+ case SK_MultiClass: {
+ // The variable is a multiclass template argument?
+ if (CurMultiClass)
+ if (auto *V = FindValueInArgs(&CurMultiClass->Rec, Name, "::"))
+ return V;
+ break;
+ }
+ }
+
+ // Then, we try to find the name in parent scope.
+ if (Parent)
+ return Parent->getVar(Records, ParsingMultiClass, Name, NameLoc,
+ TrackReferenceLocs);
+
+ return nullptr;
+}
+
bool TGParser::AddValue(Record *CurRec, SMLoc Loc, const RecordVal &RV) {
if (!CurRec)
CurRec = &CurMultiClass->Rec;
@@ -231,32 +299,16 @@ bool TGParser::AddSubClass(Record *CurRec, SubClassReference &SubClass) {
Record *SC = SubClass.Rec;
MapResolver R(CurRec);
- // Loop over all the subclass record's fields. Add template arguments
- // to the resolver map. Add regular fields to the new record.
- for (const RecordVal &Field : SC->getValues()) {
- if (Field.isTemplateArg()) {
- R.set(Field.getNameInit(), Field.getValue());
- } else {
+ // Loop over all the subclass record's fields. Add regular fields to the new
+ // record.
+ for (const RecordVal &Field : SC->getValues())
+ if (!Field.isTemplateArg())
if (AddValue(CurRec, SubClass.RefRange.Start, Field))
return true;
- }
- }
-
- ArrayRef<Init *> TArgs = SC->getTemplateArgs();
- assert(SubClass.TemplateArgs.size() <= TArgs.size() &&
- "Too many template arguments allowed");
- // Loop over the template argument names. If a value was specified,
- // reset the map value. If not and there was no default, complain.
- for (unsigned I = 0, E = TArgs.size(); I != E; ++I) {
- if (I < SubClass.TemplateArgs.size())
- R.set(TArgs[I], SubClass.TemplateArgs[I]);
- else if (!R.isComplete(TArgs[I]))
- return Error(SubClass.RefRange.Start,
- "Value not specified for template argument '" +
- TArgs[I]->getAsUnquotedString() + "' (#" + Twine(I) +
- ") of parent class '" + SC->getNameInitAsString() + "'");
- }
+ if (resolveArgumentsOfClass(R, SC, SubClass.TemplateArgs,
+ SubClass.RefRange.Start))
+ return true;
// Copy the subclass record's assertions to the new record.
CurRec->appendAssertions(SC);
@@ -310,36 +362,16 @@ bool TGParser::AddSubMultiClass(MultiClass *CurMC,
SubMultiClassReference &SubMultiClass) {
MultiClass *SMC = SubMultiClass.MC;
- ArrayRef<Init *> SMCTArgs = SMC->Rec.getTemplateArgs();
- if (SMCTArgs.size() < SubMultiClass.TemplateArgs.size())
- return Error(SubMultiClass.RefRange.Start,
- "More template args specified than expected");
-
- // Prepare the mapping of template argument name to value, filling in default
- // values if necessary.
- SubstStack TemplateArgs;
- for (unsigned i = 0, e = SMCTArgs.size(); i != e; ++i) {
- if (i < SubMultiClass.TemplateArgs.size()) {
- TemplateArgs.emplace_back(SMCTArgs[i], SubMultiClass.TemplateArgs[i]);
- } else {
- Init *Default = SMC->Rec.getValue(SMCTArgs[i])->getValue();
- if (!Default->isComplete()) {
- return Error(SubMultiClass.RefRange.Start,
- "value not specified for template argument #" + Twine(i) +
- " (" + SMCTArgs[i]->getAsUnquotedString() +
- ") of multiclass '" + SMC->Rec.getNameInitAsString() +
- "'");
- }
- TemplateArgs.emplace_back(SMCTArgs[i], Default);
- }
- }
-
- TemplateArgs.emplace_back(QualifiedNameOfImplicitName(SMC),
- VarInit::get(QualifiedNameOfImplicitName(CurMC),
- StringRecTy::get(Records)));
+ SubstStack Substs;
+ if (resolveArgumentsOfMultiClass(
+ Substs, SMC, SubMultiClass.TemplateArgs,
+ VarInit::get(QualifiedNameOfImplicitName(CurMC),
+ StringRecTy::get(Records)),
+ SubMultiClass.RefRange.Start))
+ return true;
// Add all of the defs in the subclass into the current multiclass.
- return resolve(SMC->Entries, TemplateArgs, false, &CurMC->Entries);
+ return resolve(SMC->Entries, Substs, false, &CurMC->Entries);
}
/// Add a record, foreach loop, or assertion to the current context.
@@ -384,10 +416,35 @@ bool TGParser::addEntry(RecordsEntry E) {
bool TGParser::resolve(const ForeachLoop &Loop, SubstStack &Substs,
bool Final, std::vector<RecordsEntry> *Dest,
SMLoc *Loc) {
+
MapResolver R;
for (const auto &S : Substs)
R.set(S.first, S.second);
Init *List = Loop.ListValue->resolveReferences(R);
+
+ // For if-then-else blocks, we lower to a foreach loop whose list is a
+ // ternary selection between lists of different length. Since we don't
+ // have a means to track variable length record lists, we *must* resolve
+ // the condition here. We want to defer final resolution of the arms
+ // until the resulting records are finalized.
+ // e.g. !if(!exists<SchedWrite>("__does_not_exist__"), [1], [])
+ if (auto *TI = dyn_cast<TernOpInit>(List);
+ TI && TI->getOpcode() == TernOpInit::IF && Final) {
+ Init *OldLHS = TI->getLHS();
+ R.setFinal(true);
+ Init *LHS = OldLHS->resolveReferences(R);
+ if (LHS == OldLHS) {
+ PrintError(Loop.Loc,
+ Twine("unable to resolve if condition '") +
+ LHS->getAsString() + "' at end of containing scope");
+ return true;
+ }
+ Init *MHS = TI->getMHS();
+ Init *RHS = TI->getRHS();
+ List = TernOpInit::get(TernOpInit::IF, LHS, MHS, RHS, TI->getType())
+ ->Fold(nullptr);
+ }
+
auto LI = dyn_cast<ListInit>(List);
if (!LI) {
if (!Final) {
@@ -507,6 +564,67 @@ bool TGParser::addDefOne(std::unique_ptr<Record> Rec) {
return false;
}
+bool TGParser::resolveArguments(Record *Rec, ArrayRef<ArgumentInit *> ArgValues,
+ SMLoc Loc, ArgValueHandler ArgValueHandler) {
+ ArrayRef<Init *> ArgNames = Rec->getTemplateArgs();
+ assert(ArgValues.size() <= ArgNames.size() &&
+ "Too many template arguments allowed");
+
+ // Loop over the template arguments and handle the (name, value) pair.
+ SmallVector<Init *, 2> UnsolvedArgNames(ArgNames);
+ for (auto *Arg : ArgValues) {
+ Init *ArgName = nullptr;
+ Init *ArgValue = Arg->getValue();
+ if (Arg->isPositional())
+ ArgName = ArgNames[Arg->getIndex()];
+ if (Arg->isNamed())
+ ArgName = Arg->getName();
+
+ // We can only specify the template argument once.
+ if (!is_contained(UnsolvedArgNames, ArgName))
+ return Error(Loc, "We can only specify the template argument '" +
+ ArgName->getAsUnquotedString() + "' once");
+
+ ArgValueHandler(ArgName, ArgValue);
+ llvm::erase_value(UnsolvedArgNames, ArgName);
+ }
+
+ // For unsolved arguments, if there is no default value, complain.
+ for (auto *UnsolvedArgName : UnsolvedArgNames) {
+ Init *Default = Rec->getValue(UnsolvedArgName)->getValue();
+ if (!Default->isComplete()) {
+ return Error(Loc, "value not specified for template argument (" +
+ UnsolvedArgName->getAsUnquotedString() +
+ ") of multiclass '" + Rec->getNameInitAsString() +
+ "'");
+ }
+ ArgValueHandler(UnsolvedArgName, Default);
+ }
+
+ return false;
+}
+
+/// Resolve the arguments of class and set them to MapResolver.
+/// Returns true if failed.
+bool TGParser::resolveArgumentsOfClass(MapResolver &R, Record *Rec,
+ ArrayRef<ArgumentInit *> ArgValues,
+ SMLoc Loc) {
+ return resolveArguments(Rec, ArgValues, Loc,
+ [&](Init *Name, Init *Value) { R.set(Name, Value); });
+}
+
+/// Resolve the arguments of multiclass and store them into SubstStack.
+/// Returns true if failed.
+bool TGParser::resolveArgumentsOfMultiClass(SubstStack &Substs, MultiClass *MC,
+ ArrayRef<ArgumentInit *> ArgValues,
+ Init *DefmName, SMLoc Loc) {
+ // Add an implicit argument NAME.
+ Substs.emplace_back(QualifiedNameOfImplicitName(MC), DefmName);
+ return resolveArguments(
+ &MC->Rec, ArgValues, Loc,
+ [&](Init *Name, Init *Value) { Substs.emplace_back(Name, Value); });
+}
+
//===----------------------------------------------------------------------===//
// Parser Code
//===----------------------------------------------------------------------===//
@@ -616,7 +734,7 @@ MultiClass *TGParser::ParseMultiClassID() {
/// multiclass. This returns a SubClassRefTy with a null Record* on error.
///
/// SubClassRef ::= ClassID
-/// SubClassRef ::= ClassID '<' ValueList '>'
+/// SubClassRef ::= ClassID '<' ArgValueList '>'
///
SubClassReference TGParser::
ParseSubClassReference(Record *CurRec, bool isDefm) {
@@ -637,7 +755,8 @@ ParseSubClassReference(Record *CurRec, bool isDefm) {
return Result;
}
- if (ParseTemplateArgValueList(Result.TemplateArgs, CurRec, Result.Rec)) {
+ if (ParseTemplateArgValueList(Result.TemplateArgs, CurRec, Result.Rec,
+ isDefm)) {
Result.Rec = nullptr; // Error parsing value list.
return Result;
}
@@ -657,7 +776,7 @@ ParseSubClassReference(Record *CurRec, bool isDefm) {
/// Record* on error.
///
/// SubMultiClassRef ::= MultiClassID
-/// SubMultiClassRef ::= MultiClassID '<' ValueList '>'
+/// SubMultiClassRef ::= MultiClassID '<' ArgValueList '>'
///
SubMultiClassReference TGParser::
ParseSubMultiClassReference(MultiClass *CurMC) {
@@ -674,7 +793,7 @@ ParseSubMultiClassReference(MultiClass *CurMC) {
}
if (ParseTemplateArgValueList(Result.TemplateArgs, &CurMC->Rec,
- &Result.MC->Rec)) {
+ &Result.MC->Rec, true)) {
Result.MC = nullptr; // Error parsing value list.
return Result;
}
@@ -684,6 +803,148 @@ ParseSubMultiClassReference(MultiClass *CurMC) {
return Result;
}
+/// ParseSliceElement - Parse subscript or range
+///
+/// SliceElement ::= Value<list<int>>
+/// SliceElement ::= Value<int>
+/// SliceElement ::= Value<int> '...' Value<int>
+/// SliceElement ::= Value<int> '-' Value<int> (deprecated)
+/// SliceElement ::= Value<int> INTVAL(Negative; deprecated)
+///
+/// SliceElement is either IntRecTy, ListRecTy, or nullptr
+///
+TypedInit *TGParser::ParseSliceElement(Record *CurRec) {
+ auto LHSLoc = Lex.getLoc();
+ auto *CurVal = ParseValue(CurRec);
+ if (!CurVal)
+ return nullptr;
+ auto *LHS = cast<TypedInit>(CurVal);
+
+ TypedInit *RHS = nullptr;
+ switch (Lex.getCode()) {
+ case tgtok::dotdotdot:
+ case tgtok::minus: { // Deprecated
+ Lex.Lex(); // eat
+ auto RHSLoc = Lex.getLoc();
+ CurVal = ParseValue(CurRec);
+ if (!CurVal)
+ return nullptr;
+ RHS = cast<TypedInit>(CurVal);
+ if (!isa<IntRecTy>(RHS->getType())) {
+ Error(RHSLoc,
+ "expected int...int, got " + Twine(RHS->getType()->getAsString()));
+ return nullptr;
+ }
+ break;
+ }
+ case tgtok::IntVal: { // Deprecated "-num"
+ auto i = -Lex.getCurIntVal();
+ if (i < 0) {
+ TokError("invalid range, cannot be negative");
+ return nullptr;
+ }
+ RHS = IntInit::get(Records, i);
+ Lex.Lex(); // eat IntVal
+ break;
+ }
+ default: // Single value (IntRecTy or ListRecTy)
+ return LHS;
+ }
+
+ assert(RHS);
+ assert(isa<IntRecTy>(RHS->getType()));
+
+ // Closed-interval range <LHS:IntRecTy>...<RHS:IntRecTy>
+ if (!isa<IntRecTy>(LHS->getType())) {
+ Error(LHSLoc,
+ "expected int...int, got " + Twine(LHS->getType()->getAsString()));
+ return nullptr;
+ }
+
+ return cast<TypedInit>(BinOpInit::get(BinOpInit::RANGEC, LHS, RHS,
+ IntRecTy::get(Records)->getListTy())
+ ->Fold(CurRec));
+}
+
+/// ParseSliceElements - Parse subscripts in square brackets.
+///
+/// SliceElements ::= ( SliceElement ',' )* SliceElement ','?
+///
+/// SliceElement is either IntRecTy, ListRecTy, or nullptr
+///
+/// Returns ListRecTy by defaut.
+/// Returns IntRecTy if;
+/// - Single=true
+/// - SliceElements is Value<int> w/o trailing comma
+///
+TypedInit *TGParser::ParseSliceElements(Record *CurRec, bool Single) {
+ TypedInit *CurVal;
+ SmallVector<Init *, 2> Elems; // int
+ SmallVector<TypedInit *, 2> Slices; // list<int>
+
+ auto FlushElems = [&] {
+ if (!Elems.empty()) {
+ Slices.push_back(ListInit::get(Elems, IntRecTy::get(Records)));
+ Elems.clear();
+ }
+ };
+
+ do {
+ auto LHSLoc = Lex.getLoc();
+ CurVal = ParseSliceElement(CurRec);
+ if (!CurVal)
+ return nullptr;
+ auto *CurValTy = CurVal->getType();
+
+ if (auto *ListValTy = dyn_cast<ListRecTy>(CurValTy)) {
+ if (!isa<IntRecTy>(ListValTy->getElementType())) {
+ Error(LHSLoc,
+ "expected list<int>, got " + Twine(ListValTy->getAsString()));
+ return nullptr;
+ }
+
+ FlushElems();
+ Slices.push_back(CurVal);
+ Single = false;
+ CurVal = nullptr;
+ } else if (!isa<IntRecTy>(CurValTy)) {
+ Error(LHSLoc,
+ "unhandled type " + Twine(CurValTy->getAsString()) + " in range");
+ return nullptr;
+ }
+
+ if (Lex.getCode() != tgtok::comma)
+ break;
+
+ Lex.Lex(); // eat comma
+
+ // `[i,]` is not LISTELEM but LISTSLICE
+ Single = false;
+ if (CurVal)
+ Elems.push_back(CurVal);
+ CurVal = nullptr;
+ } while (Lex.getCode() != tgtok::r_square);
+
+ if (CurVal) {
+ // LISTELEM
+ if (Single)
+ return CurVal;
+
+ Elems.push_back(CurVal);
+ }
+
+ FlushElems();
+
+ // Concatenate lists in Slices
+ TypedInit *Result = nullptr;
+ for (auto *Slice : Slices) {
+ Result = (Result ? cast<TypedInit>(BinOpInit::getListConcat(Result, Slice))
+ : Slice);
+ }
+
+ return Result;
+}
+
/// ParseRangePiece - Parse a bit/value range.
/// RangePiece ::= INTVAL
/// RangePiece ::= INTVAL '...' INTVAL
@@ -870,47 +1131,9 @@ RecTy *TGParser::ParseType() {
/// ParseIDValue
Init *TGParser::ParseIDValue(Record *CurRec, StringInit *Name, SMRange NameLoc,
IDParseMode Mode) {
- if (CurRec) {
- if (RecordVal *RV = CurRec->getValue(Name)) {
- if (TrackReferenceLocs)
- RV->addReferenceLoc(NameLoc);
- return VarInit::get(Name, RV->getType());
- }
- }
-
- if ((CurRec && CurRec->isClass()) || CurMultiClass) {
- Init *TemplateArgName;
- if (CurMultiClass) {
- TemplateArgName =
- QualifyName(CurMultiClass->Rec, CurMultiClass, Name, "::");
- } else
- TemplateArgName = QualifyName(*CurRec, CurMultiClass, Name, ":");
-
- Record *TemplateRec = CurMultiClass ? &CurMultiClass->Rec : CurRec;
- if (TemplateRec->isTemplateArg(TemplateArgName)) {
- RecordVal *RV = TemplateRec->getValue(TemplateArgName);
- assert(RV && "Template arg doesn't exist??");
- RV->setUsed(true);
- if (TrackReferenceLocs)
- RV->addReferenceLoc(NameLoc);
- return VarInit::get(TemplateArgName, RV->getType());
- } else if (Name->getValue() == "NAME") {
- return VarInit::get(TemplateArgName, StringRecTy::get(Records));
- }
- }
-
- if (CurLocalScope)
- if (Init *I = CurLocalScope->getVar(Name->getValue()))
- return I;
-
- // If this is in a foreach loop, make sure it's not a loop iterator
- for (const auto &L : Loops) {
- if (L->IterVar) {
- VarInit *IterVar = dyn_cast<VarInit>(L->IterVar);
- if (IterVar && IterVar->getNameInit() == Name)
- return IterVar;
- }
- }
+ if (Init *I = CurScope->getVar(Records, CurMultiClass, Name, NameLoc,
+ TrackReferenceLocs))
+ return I;
if (Mode == ParseNameMode)
return Name;
@@ -944,6 +1167,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
TokError("unknown bang operator");
return nullptr;
case tgtok::XNOT:
+ case tgtok::XToLower:
+ case tgtok::XToUpper:
case tgtok::XLOG2:
case tgtok::XHead:
case tgtok::XTail:
@@ -968,6 +1193,16 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
}
break;
+ case tgtok::XToLower:
+ Lex.Lex(); // eat the operation
+ Code = UnOpInit::TOLOWER;
+ Type = StringRecTy::get(Records);
+ break;
+ case tgtok::XToUpper:
+ Lex.Lex(); // eat the operation
+ Code = UnOpInit::TOUPPER;
+ Type = StringRecTy::get(Records);
+ break;
case tgtok::XNOT:
Lex.Lex(); // eat the operation
Code = UnOpInit::NOT;
@@ -1180,8 +1415,11 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XListConcat:
case tgtok::XListSplat:
case tgtok::XListRemove:
+ case tgtok::XRange:
case tgtok::XStrConcat:
case tgtok::XInterleave:
+ case tgtok::XGetDagArg:
+ case tgtok::XGetDagName:
case tgtok::XSetDagOp: { // Value ::= !binop '(' Value ',' Value ')'
tgtok::TokKind OpTok = Lex.getCode();
SMLoc OpLoc = Lex.getLoc();
@@ -1210,9 +1448,16 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XListConcat: Code = BinOpInit::LISTCONCAT; break;
case tgtok::XListSplat: Code = BinOpInit::LISTSPLAT; break;
case tgtok::XListRemove: Code = BinOpInit::LISTREMOVE; break;
+ case tgtok::XRange: Code = BinOpInit::RANGE; break;
case tgtok::XStrConcat: Code = BinOpInit::STRCONCAT; break;
case tgtok::XInterleave: Code = BinOpInit::INTERLEAVE; break;
case tgtok::XSetDagOp: Code = BinOpInit::SETDAGOP; break;
+ case tgtok::XGetDagArg:
+ Code = BinOpInit::GETDAGARG;
+ break;
+ case tgtok::XGetDagName:
+ Code = BinOpInit::GETDAGNAME;
+ break;
}
RecTy *Type = nullptr;
@@ -1225,6 +1470,18 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
Type = DagRecTy::get(Records);
ArgType = DagRecTy::get(Records);
break;
+ case tgtok::XGetDagArg:
+ Type = ParseOperatorType();
+ if (!Type) {
+ TokError("did not get type for !getdagarg operator");
+ return nullptr;
+ }
+ ArgType = DagRecTy::get(Records);
+ break;
+ case tgtok::XGetDagName:
+ Type = StringRecTy::get(Records);
+ ArgType = DagRecTy::get(Records);
+ break;
case tgtok::XAND:
case tgtok::XOR:
case tgtok::XXOR:
@@ -1258,6 +1515,10 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
// We don't know the list type until we parse the first argument.
ArgType = ItemType;
break;
+ case tgtok::XRange:
+ Type = IntRecTy::get(Records)->getListTy();
+ // ArgType may be either Int or List.
+ break;
case tgtok::XStrConcat:
Type = StringRecTy::get(Records);
ArgType = StringRecTy::get(Records);
@@ -1342,6 +1603,27 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return nullptr;
}
break;
+ case BinOpInit::RANGE:
+ if (InitList.size() == 1) {
+ if (isa<ListRecTy>(ArgType)) {
+ ArgType = nullptr; // Detect error if 2nd arg were present.
+ } else if (isa<IntRecTy>(ArgType)) {
+ // Assume 2nd arg should be IntRecTy
+ } else {
+ Error(InitLoc,
+ Twine("expected list or int, got value of type '") +
+ ArgType->getAsString() + "'");
+ return nullptr;
+ }
+ } else {
+ // Don't come here unless 1st arg is ListRecTy.
+ assert(isa<ListRecTy>(cast<TypedInit>(InitList[0])->getType()));
+ Error(InitLoc,
+ Twine("expected one list, got extra value of type '") +
+ ArgType->getAsString() + "'");
+ return nullptr;
+ }
+ break;
case BinOpInit::EQ:
case BinOpInit::NE:
if (!ArgType->typeIsConvertibleTo(IntRecTy::get(Records)) &&
@@ -1353,6 +1635,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return nullptr;
}
break;
+ case BinOpInit::GETDAGARG: // The 2nd argument of !getdagarg could be
+ // index or name.
case BinOpInit::LE:
case BinOpInit::LT:
case BinOpInit::GE:
@@ -1417,6 +1701,15 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
// a record, with no restriction on its superclasses.
ArgType = RecordRecTy::get(Records, {});
break;
+ case BinOpInit::GETDAGARG:
+ // After parsing the first dag argument, expect an index integer or a
+ // name string.
+ ArgType = nullptr;
+ break;
+ case BinOpInit::GETDAGNAME:
+ // After parsing the first dag argument, expect an index integer.
+ ArgType = IntRecTy::get(Records);
+ break;
default:
break;
}
@@ -1440,6 +1733,37 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
if (Code == BinOpInit::LISTREMOVE)
Type = ArgType;
+ if (Code == BinOpInit::RANGE) {
+ Init *LHS, *RHS;
+ auto ArgCount = InitList.size();
+ assert(ArgCount >= 1);
+ auto *Arg0 = cast<TypedInit>(InitList[0]);
+ auto *Arg0Ty = Arg0->getType();
+ if (ArgCount == 1) {
+ if (isa<ListRecTy>(Arg0Ty)) {
+ // (0, !size(arg))
+ LHS = IntInit::get(Records, 0);
+ RHS = UnOpInit::get(UnOpInit::SIZE, Arg0, IntRecTy::get(Records))
+ ->Fold(CurRec);
+ } else {
+ assert(isa<IntRecTy>(Arg0Ty));
+ // (0, arg)
+ LHS = IntInit::get(Records, 0);
+ RHS = Arg0;
+ }
+ } else if (ArgCount == 2) {
+ assert(isa<IntRecTy>(Arg0Ty));
+ auto *Arg1 = cast<TypedInit>(InitList[1]);
+ assert(isa<IntRecTy>(Arg1->getType()));
+ LHS = Arg0;
+ RHS = Arg1;
+ } else {
+ Error(OpLoc, "expected at most two values of integer");
+ return nullptr;
+ }
+ return BinOpInit::get(Code, LHS, RHS, Type)->Fold(CurRec);
+ }
+
// We allow multiple operands to associative operators like !strconcat as
// shorthand for nesting them.
if (Code == BinOpInit::STRCONCAT || Code == BinOpInit::LISTCONCAT ||
@@ -1466,6 +1790,8 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
return ParseOperationForEachFilter(CurRec, ItemType);
}
+ case tgtok::XSetDagArg:
+ case tgtok::XSetDagName:
case tgtok::XDag:
case tgtok::XIf:
case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')'
@@ -1487,6 +1813,16 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
case tgtok::XSubst:
Code = TernOpInit::SUBST;
break;
+ case tgtok::XSetDagArg:
+ Code = TernOpInit::SETDAGARG;
+ Type = DagRecTy::get(Records);
+ ItemType = nullptr;
+ break;
+ case tgtok::XSetDagName:
+ Code = TernOpInit::SETDAGNAME;
+ Type = DagRecTy::get(Records);
+ ItemType = nullptr;
+ break;
}
if (!consume(tgtok::l_paren)) {
TokError("expected '(' after ternary operator");
@@ -1599,6 +1935,35 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
Type = RHSt->getType();
break;
}
+ case tgtok::XSetDagArg: {
+ TypedInit *MHSt = dyn_cast<TypedInit>(MHS);
+ if (!MHSt || !isa<IntRecTy, StringRecTy>(MHSt->getType())) {
+ Error(MHSLoc, Twine("expected integer index or string name, got ") +
+ (MHSt ? ("type '" + MHSt->getType()->getAsString())
+ : ("'" + MHS->getAsString())) +
+ "'");
+ return nullptr;
+ }
+ break;
+ }
+ case tgtok::XSetDagName: {
+ TypedInit *MHSt = dyn_cast<TypedInit>(MHS);
+ if (!MHSt || !isa<IntRecTy, StringRecTy>(MHSt->getType())) {
+ Error(MHSLoc, Twine("expected integer index or string name, got ") +
+ (MHSt ? ("type '" + MHSt->getType()->getAsString())
+ : ("'" + MHS->getAsString())) +
+ "'");
+ return nullptr;
+ }
+ TypedInit *RHSt = dyn_cast<TypedInit>(RHS);
+ // The name could be a string or unset.
+ if (RHSt && !isa<StringRecTy>(RHSt->getType())) {
+ Error(RHSLoc, Twine("expected string or unset name, got type '") +
+ RHSt->getType()->getAsString() + "'");
+ return nullptr;
+ }
+ break;
+ }
}
return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
}
@@ -1705,12 +2070,14 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
ParseRec = ParseRecTmp.get();
}
+ TGVarScope *FoldScope = PushScope(ParseRec);
ParseRec->addValue(RecordVal(A, Start->getType(), RecordVal::FK_Normal));
- ParseRec->addValue(RecordVal(B, ListType->getElementType(),
- RecordVal::FK_Normal));
+ ParseRec->addValue(
+ RecordVal(B, ListType->getElementType(), RecordVal::FK_Normal));
Init *ExprUntyped = ParseValue(ParseRec);
ParseRec->removeValue(A);
ParseRec->removeValue(B);
+ PopScope(FoldScope);
if (!ExprUntyped)
return nullptr;
@@ -2042,10 +2409,11 @@ Init *TGParser::ParseOperationForEachFilter(Record *CurRec, RecTy *ItemType) {
std::make_unique<Record>(".parse", ArrayRef<SMLoc>{}, Records);
ParseRec = ParseRecTmp.get();
}
-
+ TGVarScope *TempScope = PushScope(ParseRec);
ParseRec->addValue(RecordVal(LHS, InEltType, RecordVal::FK_Normal));
Init *RHS = ParseValue(ParseRec, ExprEltType);
ParseRec->removeValue(LHS);
+ PopScope(TempScope);
if (!RHS)
return nullptr;
@@ -2171,6 +2539,8 @@ Init *TGParser::ParseOperationCond(Record *CurRec, RecTy *ItemType) {
/// SimpleValue ::= LISTCONCATTOK '(' Value ',' Value ')'
/// SimpleValue ::= LISTSPLATTOK '(' Value ',' Value ')'
/// SimpleValue ::= LISTREMOVETOK '(' Value ',' Value ')'
+/// SimpleValue ::= RANGE '(' Value ')'
+/// SimpleValue ::= RANGE '(' Value ',' Value ')'
/// SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')'
/// SimpleValue ::= COND '(' [Value ':' Value,]+ ')'
///
@@ -2225,10 +2595,13 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::Id: {
SMRange NameLoc = Lex.getLocRange();
StringInit *Name = StringInit::get(Records, Lex.getCurStrVal());
- if (Lex.Lex() != tgtok::less) // consume the Id.
- return ParseIDValue(CurRec, Name, NameLoc, Mode); // Value ::= IDValue
+ tgtok::TokKind Next = Lex.Lex();
+ if (Next == tgtok::equal) // Named argument.
+ return Name;
+ if (Next != tgtok::less) // consume the Id.
+ return ParseIDValue(CurRec, Name, NameLoc, Mode); // Value ::= IDValue
- // Value ::= CLASSID '<' ValueListNE '>' (CLASSID has been consumed)
+ // Value ::= CLASSID '<' ArgValueList '>' (CLASSID has been consumed)
// This is supposed to synthesize a new anonymous definition, deriving
// from the class with the template arguments, but no body.
Record *Class = Records.getClass(Name->getValue());
@@ -2238,7 +2611,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
return nullptr;
}
- SmallVector<Init *, 8> Args;
+ SmallVector<ArgumentInit *, 8> Args;
Lex.Lex(); // consume the <
if (ParseTemplateArgValueList(Args, CurRec, Class))
return nullptr; // Error parsing value list.
@@ -2246,18 +2619,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
if (CheckTemplateArgValues(Args, NameLoc.Start, Class))
return nullptr; // Error checking template argument values.
- // Loop through the arguments that were not specified and make sure
- // they have a complete value.
- ArrayRef<Init *> TArgs = Class->getTemplateArgs();
- for (unsigned I = Args.size(), E = TArgs.size(); I < E; ++I) {
- RecordVal *Arg = Class->getValue(TArgs[I]);
- if (!Arg->getValue()->isComplete()) {
- Error(NameLoc.Start, "Value not specified for template argument '" +
- TArgs[I]->getAsUnquotedString() + "' (#" +
- Twine(I) + ") of parent class '" +
- Class->getNameInitAsString() + "'");
- }
- }
+ if (resolveArguments(Class, Args, NameLoc.Start))
+ return nullptr;
if (TrackReferenceLocs)
Class->appendReferenceLoc(NameLoc);
@@ -2445,6 +2808,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::XSize:
case tgtok::XEmpty:
case tgtok::XCast:
+ case tgtok::XToLower:
+ case tgtok::XToUpper:
case tgtok::XGetDagOp: // Value ::= !unop '(' Value ')'
case tgtok::XExists:
case tgtok::XIsA:
@@ -2471,9 +2836,14 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
case tgtok::XListConcat:
case tgtok::XListSplat:
case tgtok::XListRemove:
+ case tgtok::XRange:
case tgtok::XStrConcat:
case tgtok::XInterleave:
+ case tgtok::XGetDagArg:
+ case tgtok::XGetDagName:
case tgtok::XSetDagOp: // Value ::= !binop '(' Value ',' Value ')'
+ case tgtok::XSetDagArg:
+ case tgtok::XSetDagName:
case tgtok::XIf:
case tgtok::XCond:
case tgtok::XFoldl:
@@ -2493,10 +2863,11 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
///
/// Value ::= SimpleValue ValueSuffix*
/// ValueSuffix ::= '{' BitList '}'
-/// ValueSuffix ::= '[' BitList ']'
+/// ValueSuffix ::= '[' SliceElements ']'
/// ValueSuffix ::= '.' ID
///
Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
+ SMLoc LHSLoc = Lex.getLoc();
Init *Result = ParseSimpleValue(CurRec, ItemType, Mode);
if (!Result) return nullptr;
@@ -2531,18 +2902,35 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
break;
}
case tgtok::l_square: {
- SMLoc SquareLoc = Lex.getLoc();
- Lex.Lex(); // eat the '['
- SmallVector<unsigned, 16> Ranges;
- ParseRangeList(Ranges);
- if (Ranges.empty()) return nullptr;
+ auto *LHS = dyn_cast<TypedInit>(Result);
+ if (!LHS) {
+ Error(LHSLoc, "Invalid value, list expected");
+ return nullptr;
+ }
- Result = Result->convertInitListSlice(Ranges);
- if (!Result) {
- Error(SquareLoc, "Invalid range for list slice");
+ auto *LHSTy = dyn_cast<ListRecTy>(LHS->getType());
+ if (!LHSTy) {
+ Error(LHSLoc, "Type '" + Twine(LHS->getType()->getAsString()) +
+ "' is invalid, list expected");
return nullptr;
}
+ Lex.Lex(); // eat the '['
+ TypedInit *RHS = ParseSliceElements(CurRec, /*Single=*/true);
+ if (!RHS)
+ return nullptr;
+
+ if (isa<ListRecTy>(RHS->getType())) {
+ Result =
+ BinOpInit::get(BinOpInit::LISTSLICE, LHS, RHS, LHSTy)->Fold(CurRec);
+ } else {
+ Result = BinOpInit::get(BinOpInit::LISTELEM, LHS, RHS,
+ LHSTy->getElementType())
+ ->Fold(CurRec);
+ }
+
+ assert(Result);
+
// Eat the ']'.
if (!consume(tgtok::r_square)) {
TokError("expected ']' at end of list slice");
@@ -2744,34 +3132,72 @@ void TGParser::ParseValueList(SmallVectorImpl<Init *> &Result, Record *CurRec,
// ParseTemplateArgValueList - Parse a template argument list with the syntax
// shown, filling in the Result vector. The open angle has been consumed.
-// An empty argument list is allowed. Return false if okay, true if an
+// An empty argument list is allowed. Return false if okay, true if an
// error was detected.
//
-// TemplateArgList ::= '<' [Value {',' Value}*] '>'
-bool TGParser::ParseTemplateArgValueList(SmallVectorImpl<Init *> &Result,
- Record *CurRec, Record *ArgsRec) {
-
+// ArgValueList ::= '<' PostionalArgValueList [','] NamedArgValueList '>'
+// PostionalArgValueList ::= [Value {',' Value}*]
+// NamedArgValueList ::= [NameValue '=' Value {',' NameValue '=' Value}*]
+bool TGParser::ParseTemplateArgValueList(
+ SmallVectorImpl<ArgumentInit *> &Result, Record *CurRec, Record *ArgsRec,
+ bool IsDefm) {
assert(Result.empty() && "Result vector is not empty");
ArrayRef<Init *> TArgs = ArgsRec->getTemplateArgs();
- unsigned ArgIndex = 0;
- RecTy *ItemType;
if (consume(tgtok::greater)) // empty value list
return false;
+ bool HasNamedArg = false;
+ unsigned ArgIndex = 0;
while (true) {
if (ArgIndex >= TArgs.size()) {
TokError("Too many template arguments: " + utostr(ArgIndex + 1));
return true;
}
- const RecordVal *Arg = ArgsRec->getValue(TArgs[ArgIndex]);
- assert(Arg && "Template argument record not found");
- ItemType = Arg->getType();
- Init *Value = ParseValue(CurRec, ItemType);
+ SMLoc ValueLoc = Lex.getLoc();
+ // If we are parsing named argument, we don't need to know the argument name
+ // and argument type will be resolved after we know the name.
+ Init *Value = ParseValue(
+ CurRec,
+ HasNamedArg ? nullptr : ArgsRec->getValue(TArgs[ArgIndex])->getType());
if (!Value)
return true;
- Result.push_back(Value);
+
+ // If we meet '=', then we are parsing named arguments.
+ if (Lex.getCode() == tgtok::equal) {
+ if (!isa<StringInit>(Value))
+ return Error(ValueLoc,
+ "The name of named argument should be a valid identifier");
+
+ auto *Name = cast<StringInit>(Value);
+ Init *QualifiedName =
+ QualifyName(*ArgsRec, CurMultiClass, Name, IsDefm ? "::" : ":");
+ auto *NamedArg = ArgsRec->getValue(QualifiedName);
+ if (!NamedArg)
+ return Error(ValueLoc,
+ "Argument " + Name->getAsString() + " doesn't exist");
+
+ Lex.Lex(); // eat the '='.
+ ValueLoc = Lex.getLoc();
+ Value = ParseValue(CurRec, NamedArg->getType());
+ // Named value can't be uninitialized.
+ if (isa<UnsetInit>(Value))
+ return Error(ValueLoc,
+ "The value of named argument should be initialized, "
+ "but we got '" +
+ Value->getAsString() + "'");
+
+ Result.push_back(ArgumentInit::get(Value, QualifiedName));
+ HasNamedArg = true;
+ } else {
+ // Positional arguments should be put before named arguments.
+ if (HasNamedArg)
+ return Error(ValueLoc,
+ "Positional argument should be put before named argument");
+
+ Result.push_back(ArgumentInit::get(Value, ArgIndex));
+ }
if (consume(tgtok::greater)) // end of argument list?
return false;
@@ -2810,6 +3236,11 @@ Init *TGParser::ParseDeclaration(Record *CurRec,
return nullptr;
}
+ if (!ParsingTemplateArgs && CurScope->varAlreadyDefined(Str)) {
+ TokError("local variable of this name already exists");
+ return nullptr;
+ }
+
SMLoc IdLoc = Lex.getLoc();
Init *DeclName = StringInit::get(Records, Str);
Lex.Lex();
@@ -2984,7 +3415,7 @@ bool TGParser::ParseBodyItem(Record *CurRec) {
return ParseAssert(nullptr, CurRec);
if (Lex.getCode() == tgtok::Defvar)
- return ParseDefvar();
+ return ParseDefvar(CurRec);
if (Lex.getCode() != tgtok::Let) {
if (!ParseDeclaration(CurRec, false))
@@ -3046,15 +3477,10 @@ bool TGParser::ParseBody(Record *CurRec) {
if (!consume(tgtok::l_brace))
return TokError("Expected '{' to start body or ';' for declaration only");
- // An object body introduces a new scope for local variables.
- TGLocalVarScope *BodyScope = PushLocalScope();
-
while (Lex.getCode() != tgtok::r_brace)
if (ParseBodyItem(CurRec))
return true;
- PopLocalScope(BodyScope);
-
// Eat the '}'.
Lex.Lex();
@@ -3105,6 +3531,8 @@ bool TGParser::ApplyLetStack(RecordsEntry &Entry) {
/// BaseClassListNE ::= SubClassRef (',' SubClassRef)*
///
bool TGParser::ParseObjectBody(Record *CurRec) {
+ // An object body introduces a new scope for local variables.
+ TGVarScope *ObjectScope = PushScope(CurRec);
// If there is a baseclass list, read it.
if (consume(tgtok::colon)) {
@@ -3127,7 +3555,9 @@ bool TGParser::ParseObjectBody(Record *CurRec) {
if (ApplyLetStack(CurRec))
return true;
- return ParseBody(CurRec);
+ bool Result = ParseBody(CurRec);
+ PopScope(ObjectScope);
+ return Result;
}
/// ParseDef - Parse and return a top level or multiclass record definition.
@@ -3215,34 +3645,41 @@ bool TGParser::ParseDefset() {
///
/// Defvar ::= DEFVAR Id '=' Value ';'
///
-bool TGParser::ParseDefvar() {
+bool TGParser::ParseDefvar(Record *CurRec) {
assert(Lex.getCode() == tgtok::Defvar);
Lex.Lex(); // Eat the 'defvar' token
if (Lex.getCode() != tgtok::Id)
return TokError("expected identifier");
StringInit *DeclName = StringInit::get(Records, Lex.getCurStrVal());
- if (CurLocalScope) {
- if (CurLocalScope->varAlreadyDefined(DeclName->getValue()))
- return TokError("local variable of this name already exists");
- } else {
- if (Records.getGlobal(DeclName->getValue()))
- return TokError("def or global variable of this name already exists");
+ if (CurScope->varAlreadyDefined(DeclName->getValue()))
+ return TokError("local variable of this name already exists");
+
+ // The name should not be conflicted with existed field names.
+ if (CurRec) {
+ auto *V = CurRec->getValue(DeclName->getValue());
+ if (V && !V->isTemplateArg())
+ return TokError("field of this name already exists");
}
+ // If this defvar is in the top level, the name should not be conflicted
+ // with existed global names.
+ if (CurScope->isOutermost() && Records.getGlobal(DeclName->getValue()))
+ return TokError("def or global variable of this name already exists");
+
Lex.Lex();
if (!consume(tgtok::equal))
return TokError("expected '='");
- Init *Value = ParseValue(nullptr);
+ Init *Value = ParseValue(CurRec);
if (!Value)
return true;
if (!consume(tgtok::semi))
return TokError("expected ';'");
- if (CurLocalScope)
- CurLocalScope->addVar(DeclName->getValue(), Value);
+ if (!CurScope->isOutermost())
+ CurScope->addVar(DeclName->getValue(), Value);
else
Records.addExtraGlobal(DeclName->getValue(), Value);
@@ -3271,10 +3708,10 @@ bool TGParser::ParseForeach(MultiClass *CurMultiClass) {
return TokError("Unknown tok");
// Create a loop object and remember it.
- Loops.push_back(std::make_unique<ForeachLoop>(Loc, IterName, ListValue));
-
+ auto TheLoop = std::make_unique<ForeachLoop>(Loc, IterName, ListValue);
// A foreach loop introduces a new scope for local variables.
- TGLocalVarScope *ForeachScope = PushLocalScope();
+ TGVarScope *ForeachScope = PushScope(TheLoop.get());
+ Loops.push_back(std::move(TheLoop));
if (Lex.getCode() != tgtok::l_brace) {
// FOREACH Declaration IN Object
@@ -3295,7 +3732,7 @@ bool TGParser::ParseForeach(MultiClass *CurMultiClass) {
}
}
- PopLocalScope(ForeachScope);
+ PopScope(ForeachScope);
// Resolve the loop or store it for later resolution.
std::unique_ptr<ForeachLoop> Loop = std::move(Loops.back());
@@ -3384,7 +3821,8 @@ bool TGParser::ParseIf(MultiClass *CurMultiClass) {
/// IfBody ::= '{' ObjectList '}'
///
bool TGParser::ParseIfBody(MultiClass *CurMultiClass, StringRef Kind) {
- TGLocalVarScope *BodyScope = PushLocalScope();
+ // An if-statement introduces a new scope for local variables.
+ TGVarScope *BodyScope = PushScope();
if (Lex.getCode() != tgtok::l_brace) {
// A single object.
@@ -3405,7 +3843,7 @@ bool TGParser::ParseIfBody(MultiClass *CurMultiClass, StringRef Kind) {
}
}
- PopLocalScope(BodyScope);
+ PopScope(BodyScope);
return false;
}
@@ -3472,6 +3910,8 @@ bool TGParser::ParseClass() {
}
Lex.Lex(); // eat the name.
+ // A class definition introduces a new scope.
+ TGVarScope *ClassScope = PushScope(CurRec);
// If there are template args, parse them.
if (Lex.getCode() == tgtok::less)
if (ParseTemplateArgList(CurRec))
@@ -3482,6 +3922,8 @@ bool TGParser::ParseClass() {
if (!NoWarnOnUnusedTemplateArgs)
CurRec->checkUnusedTemplateArgs();
+
+ PopScope(ClassScope);
return false;
}
@@ -3547,8 +3989,6 @@ bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) {
if (!consume(tgtok::In))
return TokError("expected 'in' at end of top-level 'let'");
- TGLocalVarScope *LetScope = PushLocalScope();
-
// If this is a scalar let, just handle it now
if (Lex.getCode() != tgtok::l_brace) {
// LET LetList IN Object
@@ -3559,6 +3999,9 @@ bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) {
// Otherwise, this is a group let.
Lex.Lex(); // eat the '{'.
+ // A group let introduces a new scope for local variables.
+ TGVarScope *LetScope = PushScope();
+
// Parse the object list.
if (ParseObjectList(CurMultiClass))
return true;
@@ -3567,9 +4010,9 @@ bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) {
TokError("expected '}' at end of top level let command");
return Error(BraceLoc, "to match this '{'");
}
- }
- PopLocalScope(LetScope);
+ PopScope(LetScope);
+ }
// Outside this let scope, this let block is not active.
LetStack.pop_back();
@@ -3607,6 +4050,9 @@ bool TGParser::ParseMultiClass() {
CurMultiClass = Result.first->second.get();
Lex.Lex(); // Eat the identifier.
+ // A multiclass body introduces a new scope for local variables.
+ TGVarScope *MulticlassScope = PushScope(CurMultiClass);
+
// If there are template args, parse them.
if (Lex.getCode() == tgtok::less)
if (ParseTemplateArgList(nullptr))
@@ -3644,9 +4090,6 @@ bool TGParser::ParseMultiClass() {
if (Lex.Lex() == tgtok::r_brace) // eat the '{'.
return TokError("multiclass must contain at least one def");
- // A multiclass body introduces a new scope for local variables.
- TGLocalVarScope *MulticlassScope = PushLocalScope();
-
while (Lex.getCode() != tgtok::r_brace) {
switch (Lex.getCode()) {
default:
@@ -3673,13 +4116,12 @@ bool TGParser::ParseMultiClass() {
PrintError(SemiLoc, "A multiclass body should not end with a semicolon");
PrintNote("Semicolon ignored; remove to eliminate this error");
}
-
- PopLocalScope(MulticlassScope);
}
if (!NoWarnOnUnusedTemplateArgs)
CurMultiClass->Rec.checkUnusedTemplateArgs();
+ PopScope(MulticlassScope);
CurMultiClass = nullptr;
return false;
}
@@ -3729,26 +4171,10 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
MultiClass *MC = MultiClasses[std::string(Ref.Rec->getName())].get();
assert(MC && "Didn't lookup multiclass correctly?");
- ArrayRef<Init *> TemplateVals = Ref.TemplateArgs;
- ArrayRef<Init *> TArgs = MC->Rec.getTemplateArgs();
SubstStack Substs;
-
- for (unsigned i = 0, e = TArgs.size(); i != e; ++i) {
- if (i < TemplateVals.size()) {
- Substs.emplace_back(TArgs[i], TemplateVals[i]);
- } else {
- Init *Default = MC->Rec.getValue(TArgs[i])->getValue();
- if (!Default->isComplete())
- return Error(SubClassLoc,
- "value not specified for template argument '" +
- TArgs[i]->getAsUnquotedString() + "' (#" +
- Twine(i) + ") of multiclass '" +
- MC->Rec.getNameInitAsString() + "'");
- Substs.emplace_back(TArgs[i], Default);
- }
- }
-
- Substs.emplace_back(QualifiedNameOfImplicitName(MC), DefmName);
+ if (resolveArgumentsOfMultiClass(Substs, MC, Ref.TemplateArgs, DefmName,
+ SubClassLoc))
+ return true;
if (resolve(MC->Entries, Substs, !CurMultiClass && Loops.empty(),
&NewEntries, &SubClassLoc))
@@ -3858,7 +4284,10 @@ bool TGParser::ParseObjectList(MultiClass *MC) {
bool TGParser::ParseFile() {
Lex.Lex(); // Prime the lexer.
- if (ParseObjectList()) return true;
+ TGVarScope *GlobalScope = PushScope();
+ if (ParseObjectList())
+ return true;
+ PopScope(GlobalScope);
// If we have unread input at the end of the file, report it.
if (Lex.getCode() == tgtok::Eof)
@@ -3871,30 +4300,34 @@ bool TGParser::ParseFile() {
// inheritance, multiclass invocation, or anonymous class invocation.
// If necessary, replace an argument with a cast to the required type.
// The argument count has already been checked.
-bool TGParser::CheckTemplateArgValues(SmallVectorImpl<llvm::Init *> &Values,
- SMLoc Loc, Record *ArgsRec) {
-
+bool TGParser::CheckTemplateArgValues(
+ SmallVectorImpl<llvm::ArgumentInit *> &Values, SMLoc Loc, Record *ArgsRec) {
ArrayRef<Init *> TArgs = ArgsRec->getTemplateArgs();
for (unsigned I = 0, E = Values.size(); I < E; ++I) {
- RecordVal *Arg = ArgsRec->getValue(TArgs[I]);
- RecTy *ArgType = Arg->getType();
auto *Value = Values[I];
+ Init *ArgName = nullptr;
+ if (Value->isPositional())
+ ArgName = TArgs[Value->getIndex()];
+ if (Value->isNamed())
+ ArgName = Value->getName();
+
+ RecordVal *Arg = ArgsRec->getValue(ArgName);
+ RecTy *ArgType = Arg->getType();
- if (TypedInit *ArgValue = dyn_cast<TypedInit>(Value)) {
+ if (TypedInit *ArgValue = dyn_cast<TypedInit>(Value->getValue())) {
auto *CastValue = ArgValue->getCastTo(ArgType);
if (CastValue) {
assert((!isa<TypedInit>(CastValue) ||
cast<TypedInit>(CastValue)->getType()->typeIsA(ArgType)) &&
"result of template arg value cast has wrong type");
- Values[I] = CastValue;
+ Values[I] = Value->cloneWithValue(CastValue);
} else {
- PrintFatalError(Loc,
- "Value specified for template argument '" +
- Arg->getNameInitAsString() + "' (#" + Twine(I) +
- ") is of type " + ArgValue->getType()->getAsString() +
- "; expected type " + ArgType->getAsString() + ": " +
- ArgValue->getAsString());
+ PrintFatalError(Loc, "Value specified for template argument '" +
+ Arg->getNameInitAsString() + "' is of type " +
+ ArgValue->getType()->getAsString() +
+ "; expected type " + ArgType->getAsString() +
+ ": " + ArgValue->getAsString());
}
}
}
diff --git a/llvm/lib/TableGen/TGParser.h b/llvm/lib/TableGen/TGParser.h
index f7271a5e79c3..d42cdad88a84 100644
--- a/llvm/lib/TableGen/TGParser.h
+++ b/llvm/lib/TableGen/TGParser.h
@@ -19,112 +19,121 @@
#include <map>
namespace llvm {
- class SourceMgr;
- class Twine;
- struct ForeachLoop;
- struct MultiClass;
- struct SubClassReference;
- struct SubMultiClassReference;
-
- struct LetRecord {
- StringInit *Name;
- std::vector<unsigned> Bits;
- Init *Value;
- SMLoc Loc;
- LetRecord(StringInit *N, ArrayRef<unsigned> B, Init *V, SMLoc L)
- : Name(N), Bits(B), Value(V), Loc(L) {
- }
- };
+class SourceMgr;
+class Twine;
+struct ForeachLoop;
+struct MultiClass;
+struct SubClassReference;
+struct SubMultiClassReference;
+
+struct LetRecord {
+ StringInit *Name;
+ std::vector<unsigned> Bits;
+ Init *Value;
+ SMLoc Loc;
+ LetRecord(StringInit *N, ArrayRef<unsigned> B, Init *V, SMLoc L)
+ : Name(N), Bits(B), Value(V), Loc(L) {}
+};
- /// RecordsEntry - Holds exactly one of a Record, ForeachLoop, or
- /// AssertionInfo.
- struct RecordsEntry {
- std::unique_ptr<Record> Rec;
- std::unique_ptr<ForeachLoop> Loop;
- std::unique_ptr<Record::AssertionInfo> Assertion;
-
- void dump() const;
-
- RecordsEntry() = default;
- RecordsEntry(std::unique_ptr<Record> Rec) : Rec(std::move(Rec)) {}
- RecordsEntry(std::unique_ptr<ForeachLoop> Loop)
- : Loop(std::move(Loop)) {}
- RecordsEntry(std::unique_ptr<Record::AssertionInfo> Assertion)
- : Assertion(std::move(Assertion)) {}
- };
+/// RecordsEntry - Holds exactly one of a Record, ForeachLoop, or
+/// AssertionInfo.
+struct RecordsEntry {
+ std::unique_ptr<Record> Rec;
+ std::unique_ptr<ForeachLoop> Loop;
+ std::unique_ptr<Record::AssertionInfo> Assertion;
- /// ForeachLoop - Record the iteration state associated with a for loop.
- /// This is used to instantiate items in the loop body.
- ///
- /// IterVar is allowed to be null, in which case no iteration variable is
- /// defined in the loop at all. (This happens when a ForeachLoop is
- /// constructed by desugaring an if statement.)
- struct ForeachLoop {
- SMLoc Loc;
- VarInit *IterVar;
- Init *ListValue;
- std::vector<RecordsEntry> Entries;
-
- void dump() const;
-
- ForeachLoop(SMLoc Loc, VarInit *IVar, Init *LValue)
+ void dump() const;
+
+ RecordsEntry() = default;
+ RecordsEntry(std::unique_ptr<Record> Rec) : Rec(std::move(Rec)) {}
+ RecordsEntry(std::unique_ptr<ForeachLoop> Loop) : Loop(std::move(Loop)) {}
+ RecordsEntry(std::unique_ptr<Record::AssertionInfo> Assertion)
+ : Assertion(std::move(Assertion)) {}
+};
+
+/// ForeachLoop - Record the iteration state associated with a for loop.
+/// This is used to instantiate items in the loop body.
+///
+/// IterVar is allowed to be null, in which case no iteration variable is
+/// defined in the loop at all. (This happens when a ForeachLoop is
+/// constructed by desugaring an if statement.)
+struct ForeachLoop {
+ SMLoc Loc;
+ VarInit *IterVar;
+ Init *ListValue;
+ std::vector<RecordsEntry> Entries;
+
+ void dump() const;
+
+ ForeachLoop(SMLoc Loc, VarInit *IVar, Init *LValue)
: Loc(Loc), IterVar(IVar), ListValue(LValue) {}
- };
+};
- struct DefsetRecord {
- SMLoc Loc;
- RecTy *EltTy = nullptr;
- SmallVector<Init *, 16> Elements;
- };
+struct DefsetRecord {
+ SMLoc Loc;
+ RecTy *EltTy = nullptr;
+ SmallVector<Init *, 16> Elements;
+};
-class TGLocalVarScope {
- // A scope to hold local variable definitions from defvar.
- std::map<std::string, Init *, std::less<>> vars;
- std::unique_ptr<TGLocalVarScope> parent;
+struct MultiClass {
+ Record Rec; // Placeholder for template args and Name.
+ std::vector<RecordsEntry> Entries;
+ void dump() const;
+
+ MultiClass(StringRef Name, SMLoc Loc, RecordKeeper &Records)
+ : Rec(Name, Loc, Records) {}
+};
+
+class TGVarScope {
public:
- TGLocalVarScope() = default;
- TGLocalVarScope(std::unique_ptr<TGLocalVarScope> parent)
- : parent(std::move(parent)) {}
+ enum ScopeKind { SK_Local, SK_Record, SK_ForeachLoop, SK_MultiClass };
+
+private:
+ ScopeKind Kind;
+ std::unique_ptr<TGVarScope> Parent;
+ // A scope to hold variable definitions from defvar.
+ std::map<std::string, Init *, std::less<>> Vars;
+ Record *CurRec = nullptr;
+ ForeachLoop *CurLoop = nullptr;
+ MultiClass *CurMultiClass = nullptr;
- std::unique_ptr<TGLocalVarScope> extractParent() {
+public:
+ TGVarScope(std::unique_ptr<TGVarScope> Parent)
+ : Kind(SK_Local), Parent(std::move(Parent)) {}
+ TGVarScope(std::unique_ptr<TGVarScope> Parent, Record *Rec)
+ : Kind(SK_Record), Parent(std::move(Parent)), CurRec(Rec) {}
+ TGVarScope(std::unique_ptr<TGVarScope> Parent, ForeachLoop *Loop)
+ : Kind(SK_ForeachLoop), Parent(std::move(Parent)), CurLoop(Loop) {}
+ TGVarScope(std::unique_ptr<TGVarScope> Parent, MultiClass *Multiclass)
+ : Kind(SK_MultiClass), Parent(std::move(Parent)),
+ CurMultiClass(Multiclass) {}
+
+ std::unique_ptr<TGVarScope> extractParent() {
// This is expected to be called just before we are destructed, so
// it doesn't much matter what state we leave 'parent' in.
- return std::move(parent);
+ return std::move(Parent);
}
- Init *getVar(StringRef Name) const {
- auto It = vars.find(Name);
- if (It != vars.end())
- return It->second;
- if (parent)
- return parent->getVar(Name);
- return nullptr;
- }
+ Init *getVar(RecordKeeper &Records, MultiClass *ParsingMultiClass,
+ StringInit *Name, SMRange NameLoc,
+ bool TrackReferenceLocs) const;
bool varAlreadyDefined(StringRef Name) const {
// When we check whether a variable is already defined, for the purpose of
// reporting an error on redefinition, we don't look up to the parent
// scope, because it's all right to shadow an outer definition with an
// inner one.
- return vars.find(Name) != vars.end();
+ return Vars.find(Name) != Vars.end();
}
void addVar(StringRef Name, Init *I) {
- bool Ins = vars.insert(std::make_pair(std::string(Name), I)).second;
+ bool Ins = Vars.insert(std::make_pair(std::string(Name), I)).second;
(void)Ins;
assert(Ins && "Local variable already exists");
}
-};
-struct MultiClass {
- Record Rec; // Placeholder for template args and Name.
- std::vector<RecordsEntry> Entries;
-
- void dump() const;
-
- MultiClass(StringRef Name, SMLoc Loc, RecordKeeper &Records) :
- Rec(Name, Loc, Records) {}
+ bool isOutermost() const { return Parent == nullptr; }
};
class TGParser {
@@ -142,9 +151,8 @@ class TGParser {
/// current value.
MultiClass *CurMultiClass;
- /// CurLocalScope - Innermost of the current nested scopes for 'defvar' local
- /// variables.
- std::unique_ptr<TGLocalVarScope> CurLocalScope;
+ /// CurScope - Innermost of the current nested scopes for 'defvar' variables.
+ std::unique_ptr<TGVarScope> CurScope;
// Record tracker
RecordKeeper &Records;
@@ -186,17 +194,29 @@ public:
return Lex.getDependencies();
}
- TGLocalVarScope *PushLocalScope() {
- CurLocalScope = std::make_unique<TGLocalVarScope>(std::move(CurLocalScope));
+ TGVarScope *PushScope() {
+ CurScope = std::make_unique<TGVarScope>(std::move(CurScope));
// Returns a pointer to the new scope, so that the caller can pass it back
- // to PopLocalScope which will check by assertion that the pushes and pops
+ // to PopScope which will check by assertion that the pushes and pops
// match up properly.
- return CurLocalScope.get();
+ return CurScope.get();
}
- void PopLocalScope(TGLocalVarScope *ExpectedStackTop) {
- assert(ExpectedStackTop == CurLocalScope.get() &&
+ TGVarScope *PushScope(Record *Rec) {
+ CurScope = std::make_unique<TGVarScope>(std::move(CurScope), Rec);
+ return CurScope.get();
+ }
+ TGVarScope *PushScope(ForeachLoop *Loop) {
+ CurScope = std::make_unique<TGVarScope>(std::move(CurScope), Loop);
+ return CurScope.get();
+ }
+ TGVarScope *PushScope(MultiClass *Multiclass) {
+ CurScope = std::make_unique<TGVarScope>(std::move(CurScope), Multiclass);
+ return CurScope.get();
+ }
+ void PopScope(TGVarScope *ExpectedStackTop) {
+ assert(ExpectedStackTop == CurScope.get() &&
"Mismatched pushes and pops of local variable scopes");
- CurLocalScope = CurLocalScope->extractParent();
+ CurScope = CurScope->extractParent();
}
private: // Semantic analysis methods.
@@ -222,6 +242,16 @@ private: // Semantic analysis methods.
SMLoc *Loc = nullptr);
bool addDefOne(std::unique_ptr<Record> Rec);
+ using ArgValueHandler = std::function<void(Init *, Init *)>;
+ bool resolveArguments(
+ Record *Rec, ArrayRef<ArgumentInit *> ArgValues, SMLoc Loc,
+ ArgValueHandler ArgValueHandler = [](Init *, Init *) {});
+ bool resolveArgumentsOfClass(MapResolver &R, Record *Rec,
+ ArrayRef<ArgumentInit *> ArgValues, SMLoc Loc);
+ bool resolveArgumentsOfMultiClass(SubstStack &Substs, MultiClass *MC,
+ ArrayRef<ArgumentInit *> ArgValues,
+ Init *DefmName, SMLoc Loc);
+
private: // Parser methods.
bool consume(tgtok::TokKind K);
bool ParseObjectList(MultiClass *MC = nullptr);
@@ -231,7 +261,7 @@ private: // Parser methods.
bool ParseDefm(MultiClass *CurMultiClass);
bool ParseDef(MultiClass *CurMultiClass);
bool ParseDefset();
- bool ParseDefvar();
+ bool ParseDefvar(Record *CurRec = nullptr);
bool ParseForeach(MultiClass *CurMultiClass);
bool ParseIf(MultiClass *CurMultiClass);
bool ParseIfBody(MultiClass *CurMultiClass, StringRef Kind);
@@ -258,13 +288,16 @@ private: // Parser methods.
IDParseMode Mode = ParseValueMode);
void ParseValueList(SmallVectorImpl<llvm::Init*> &Result,
Record *CurRec, RecTy *ItemType = nullptr);
- bool ParseTemplateArgValueList(SmallVectorImpl<llvm::Init *> &Result,
- Record *CurRec, Record *ArgsRec);
+ bool ParseTemplateArgValueList(SmallVectorImpl<llvm::ArgumentInit *> &Result,
+ Record *CurRec, Record *ArgsRec,
+ bool IsDefm = false);
void ParseDagArgList(
SmallVectorImpl<std::pair<llvm::Init*, StringInit*>> &Result,
Record *CurRec);
bool ParseOptionalRangeList(SmallVectorImpl<unsigned> &Ranges);
bool ParseOptionalBitList(SmallVectorImpl<unsigned> &Ranges);
+ TypedInit *ParseSliceElement(Record *CurRec);
+ TypedInit *ParseSliceElements(Record *CurRec, bool Single = false);
void ParseRangeList(SmallVectorImpl<unsigned> &Result);
bool ParseRangePiece(SmallVectorImpl<unsigned> &Ranges,
TypedInit *FirstItem = nullptr);
@@ -280,7 +313,7 @@ private: // Parser methods.
MultiClass *ParseMultiClassID();
bool ApplyLetStack(Record *CurRec);
bool ApplyLetStack(RecordsEntry &Entry);
- bool CheckTemplateArgValues(SmallVectorImpl<llvm::Init *> &Values,
+ bool CheckTemplateArgValues(SmallVectorImpl<llvm::ArgumentInit *> &Values,
SMLoc Loc, Record *ArgsRec);
};
diff --git a/llvm/lib/TableGen/TableGenBackend.cpp b/llvm/lib/TableGen/TableGenBackend.cpp
index 252f126d2d00..135ec643bc3a 100644
--- a/llvm/lib/TableGen/TableGenBackend.cpp
+++ b/llvm/lib/TableGen/TableGenBackend.cpp
@@ -13,12 +13,21 @@
#include "llvm/TableGen/TableGenBackend.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
#include <cassert>
+#include <cstddef>
using namespace llvm;
const size_t MAX_LINE_LEN = 80U;
+namespace llvm::TableGen::Emitter {
+ManagedStatic<cl::opt<FnT>, OptCreatorT> Action;
+void *OptCreatorT::call() {
+ return new cl::opt<FnT>(cl::desc("Action to perform:"));
+}
+} // namespace llvm::TableGen::Emitter
+
static void printLine(raw_ostream &OS, const Twine &Prefix, char Fill,
StringRef Suffix) {
size_t Pos = (size_t)OS.tell();
diff --git a/llvm/lib/TableGen/TableGenBackendSkeleton.cpp b/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
index 0ba00c8d8ab1..2fde4a66727b 100644
--- a/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
+++ b/llvm/lib/TableGen/TableGenBackendSkeleton.cpp
@@ -46,14 +46,20 @@ void SkeletonEmitter::run(raw_ostream &OS) {
(void)Records; // To suppress unused variable warning; remove on use.
}
-namespace llvm {
+// Choose either option A or B.
-// The only thing that should be in the llvm namespace is the
-// emitter entry point function.
+//===----------------------------------------------------------------------===//
+// Option A: Register the backed as class <SkeletonEmitter>
+static TableGen::Emitter::OptClass<SkeletonEmitter>
+ X("gen-skeleton-class", "Generate example skeleton class");
-void EmitSkeleton(RecordKeeper &RK, raw_ostream &OS) {
+//===----------------------------------------------------------------------===//
+// Option B: Register "EmitSkeleton" directly
+// The emitter entry may be private scope.
+static void EmitSkeleton(RecordKeeper &RK, raw_ostream &OS) {
// Instantiate the emitter class and invoke run().
SkeletonEmitter(RK).run(OS);
}
-} // namespace llvm
+static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton,
+ "Generate example skeleton entry");
diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h
index 6ef0c804ede3..76f55666e743 100644
--- a/llvm/lib/Target/AArch64/AArch64.h
+++ b/llvm/lib/Target/AArch64/AArch64.h
@@ -42,7 +42,6 @@ FunctionPass *createAArch64ExpandPseudoPass();
FunctionPass *createAArch64SLSHardeningPass();
FunctionPass *createAArch64IndirectThunks();
FunctionPass *createAArch64SpeculationHardeningPass();
-FunctionPass *createAArch64KCFIPass();
FunctionPass *createAArch64LoadStoreOptimizationPass();
ModulePass *createAArch64LowerHomogeneousPrologEpilogPass();
FunctionPass *createAArch64SIMDInstrOptPass();
@@ -70,6 +69,7 @@ FunctionPass *createAArch64PostLegalizerLowering();
FunctionPass *createAArch64PostSelectOptimize();
FunctionPass *createAArch64StackTaggingPass(bool IsOptNone);
FunctionPass *createAArch64StackTaggingPreRAPass();
+ModulePass *createAArch64GlobalsTaggingPass();
void initializeAArch64A53Fix835769Pass(PassRegistry&);
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
@@ -84,7 +84,7 @@ void initializeAArch64ConditionalComparesPass(PassRegistry &);
void initializeAArch64DAGToDAGISelPass(PassRegistry &);
void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
void initializeAArch64ExpandPseudoPass(PassRegistry &);
-void initializeAArch64KCFIPass(PassRegistry &);
+void initializeAArch64GlobalsTaggingPass(PassRegistry &);
void initializeAArch64LoadStoreOptPass(PassRegistry&);
void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &);
void initializeAArch64MIPeepholeOptPass(PassRegistry &);
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 4bf53792d677..05adbe27c948 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -50,7 +50,7 @@ def FeatureAES : SubtargetFeature<
// Crypto has been split up and any combination is now valid (see the
// crypto definitions above). Also, crypto is now context sensitive:
// it has a different meaning for e.g. Armv8.4 than it has for Armv8.2.
-// Therefore, we rely on Clang, the user interacing tool, to pass on the
+// Therefore, we rely on Clang, the user interfacing tool, to pass on the
// appropriate crypto options. But here in the backend, crypto has very little
// meaning anymore. We kept the Crypto definition here for backward
// compatibility, and now imply features SHA2 and AES, which was the
@@ -289,6 +289,10 @@ def FeatureFuseLiterals : SubtargetFeature<
"fuse-literals", "HasFuseLiterals", "true",
"CPU fuses literal generation operations">;
+def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature<
+ "fuse-addsub-2reg-const1", "HasFuseAddSub2RegAndConstOne", "true",
+ "CPU fuses (a + b + 1) and (a - b - 1)">;
+
def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
@@ -518,6 +522,12 @@ def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice",
"Don't place a BTI instruction "
"after a return-twice">;
+def FeatureCHK : SubtargetFeature<"chk", "HasCHK",
+ "true", "Enable Armv8.0-A Check Feature Status Extension (FEAT_CHK)">;
+
+def FeatureGCS : SubtargetFeature<"gcs", "HasGCS",
+ "true", "Enable Armv9.4-A Guarded Call Stack Extension", [FeatureCHK]>;
+
def FeatureCLRBHB : SubtargetFeature<"clrbhb", "HasCLRBHB",
"true", "Enable Clear BHB instruction (FEAT_CLRBHB)">;
@@ -599,7 +609,7 @@ def HasV8_8aOps : SubtargetFeature<
def HasV8_9aOps : SubtargetFeature<
"v8.9a", "HasV8_9aOps", "true", "Support ARM v8.9a instructions",
[HasV8_8aOps, FeatureCLRBHB, FeaturePRFM_SLC, FeatureSPECRES2,
- FeatureCSSC, FeatureRASv2]>;
+ FeatureCSSC, FeatureRASv2, FeatureCHK]>;
def HasV9_0aOps : SubtargetFeature<
"v9a", "HasV9_0aOps", "true", "Support ARM v9a instructions",
@@ -661,7 +671,7 @@ include "AArch64Schedule.td"
include "AArch64InstrInfo.td"
include "AArch64SchedPredicates.td"
include "AArch64SchedPredExynos.td"
-include "AArch64SchedPredAmpere.td"
+include "AArch64SchedPredNeoverse.td"
include "AArch64Combine.td"
def AArch64InstrInfo : InstrInfo;
@@ -679,6 +689,8 @@ include "AArch64SystemOperands.td"
foreach i = 1-3 in
def FeatureUseEL#i#ForTP : SubtargetFeature<"tpidr-el"#i, "UseEL"#i#"ForTP",
"true", "Permit use of TPIDR_EL"#i#" for the TLS base">;
+def FeatureUseROEL0ForTP : SubtargetFeature<"tpidrro-el0", "UseROEL0ForTP",
+ "true", "Permit use of TPIDRRO_EL0 for the TLS base">;
//===----------------------------------------------------------------------===//
// Control codegen mitigation against Straight Line Speculation vulnerability.
@@ -704,22 +716,39 @@ def FeatureHardenSlsNoComdat : SubtargetFeature<"harden-sls-nocomdat",
class AArch64Unsupported { list<Predicate> F; }
+let F = [HasSVE2p1, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1] in
+def SVE2p1Unsupported : AArch64Unsupported;
+
+def SVE2Unsupported : AArch64Unsupported {
+ let F = !listconcat([HasSVE2, HasSVE2orSME,
+ HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
+ SVE2p1Unsupported.F);
+}
+
def SVEUnsupported : AArch64Unsupported {
- let F = [HasSVE, HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3,
- HasSVE2BitPerm, HasSVEorSME, HasSVE2p1, HasSVE2orSME, HasSVE2p1_or_HasSME2p1];
+ let F = !listconcat([HasSVE, HasSVEorSME],
+ SVE2Unsupported.F);
}
-def PAUnsupported : AArch64Unsupported {
- let F = [HasPAuth];
+let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in
+def SME2p1Unsupported : AArch64Unsupported;
+
+def SME2Unsupported : AArch64Unsupported {
+ let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2],
+ SME2p1Unsupported.F);
}
def SMEUnsupported : AArch64Unsupported {
- let F = [HasSME, HasSMEF64F64, HasSMEI16I64, HasSME2, HasSVE2p1_or_HasSME2,
- HasSVE2p1_or_HasSME2p1, HasSME2p1, HasSMEF16F16];
+ let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64],
+ SME2Unsupported.F);
}
+let F = [HasPAuth] in
+def PAUnsupported : AArch64Unsupported;
+
include "AArch64SchedA53.td"
include "AArch64SchedA55.td"
+include "AArch64SchedA510.td"
include "AArch64SchedA57.td"
include "AArch64SchedCyclone.td"
include "AArch64SchedFalkor.td"
@@ -733,7 +762,10 @@ include "AArch64SchedA64FX.td"
include "AArch64SchedThunderX3T110.td"
include "AArch64SchedTSV110.td"
include "AArch64SchedAmpere1.td"
+include "AArch64SchedNeoverseN1.td"
include "AArch64SchedNeoverseN2.td"
+include "AArch64SchedNeoverseV1.td"
+include "AArch64SchedNeoverseV2.td"
def TuneA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
"Cortex-A35 ARM processors">;
@@ -777,33 +809,38 @@ def TuneA65 : SubtargetFeature<"a65", "ARMProcFamily", "CortexA65",
FeatureFuseAddress,
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72",
"Cortex-A72 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73",
"Cortex-A73 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75",
"Cortex-A75 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
"Cortex-A76 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
FeatureLSLFast,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
"Cortex-A77 ARM processors", [
@@ -811,7 +848,8 @@ def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
FeatureFuseAES,
FeatureFuseAdrpAdd,
FeatureLSLFast,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78",
"Cortex-A78 ARM processors", [
@@ -820,7 +858,8 @@ def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78",
FeatureFuseAdrpAdd,
FeatureLSLFast,
FeaturePostRAScheduler,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily",
"CortexA78C",
@@ -830,7 +869,8 @@ def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily",
FeatureFuseAdrpAdd,
FeatureLSLFast,
FeaturePostRAScheduler,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710",
"Cortex-A710 ARM processors", [
@@ -839,7 +879,8 @@ def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710",
FeatureFuseAdrpAdd,
FeatureLSLFast,
FeaturePostRAScheduler,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715",
"Cortex-A715 ARM processors", [
@@ -848,7 +889,8 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715",
FeatureCmpBccFusion,
FeatureLSLFast,
FeatureFuseAdrpAdd,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
"CortexR82",
@@ -862,7 +904,8 @@ def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
FeatureFuseAdrpAdd,
FeatureLSLFast,
FeaturePostRAScheduler,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2",
"Cortex-X2 ARM processors", [
@@ -871,7 +914,8 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2",
FeatureFuseAdrpAdd,
FeatureLSLFast,
FeaturePostRAScheduler,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3",
"Cortex-X3 ARM processors", [
@@ -879,7 +923,8 @@ def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3",
FeatureFuseAdrpAdd,
FeatureFuseAES,
FeaturePostRAScheduler,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
"Fujitsu A64FX processors", [
@@ -1064,7 +1109,8 @@ def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1
FeatureFuseAdrpAdd,
FeatureLSLFast,
FeaturePostRAScheduler,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2",
"Neoverse N2 ARM processors", [
@@ -1072,7 +1118,8 @@ def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2
FeatureFuseAdrpAdd,
FeatureLSLFast,
FeaturePostRAScheduler,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Neoverse512TVB",
"Neoverse 512-TVB ARM processors", [
@@ -1080,7 +1127,8 @@ def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Ne
FeatureFuseAdrpAdd,
FeatureLSLFast,
FeaturePostRAScheduler,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1",
"Neoverse V1 ARM processors", [
@@ -1088,14 +1136,16 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1
FeatureFuseAdrpAdd,
FeatureLSLFast,
FeaturePostRAScheduler,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2",
"Neoverse V2 ARM processors", [
FeatureFuseAES,
FeatureLSLFast,
FeaturePostRAScheduler,
- FeatureEnableSelectOptimize]>;
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
"Qualcomm Saphira processors", [
@@ -1333,7 +1383,7 @@ def : ProcessorModel<"cortex-a53", CortexA53Model, ProcessorFeatures.A53,
[TuneA53]>;
def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55,
[TuneA55]>;
-def : ProcessorModel<"cortex-a510", CortexA55Model, ProcessorFeatures.A510,
+def : ProcessorModel<"cortex-a510", CortexA510Model, ProcessorFeatures.A510,
[TuneA510]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53,
[TuneA57]>;
@@ -1373,15 +1423,15 @@ def : ProcessorModel<"cortex-x3", NeoverseN2Model, ProcessorFeatures.X3,
[TuneX3]>;
def : ProcessorModel<"neoverse-e1", CortexA53Model,
ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>;
-def : ProcessorModel<"neoverse-n1", CortexA57Model,
+def : ProcessorModel<"neoverse-n1", NeoverseN1Model,
ProcessorFeatures.NeoverseN1, [TuneNeoverseN1]>;
def : ProcessorModel<"neoverse-n2", NeoverseN2Model,
ProcessorFeatures.NeoverseN2, [TuneNeoverseN2]>;
-def : ProcessorModel<"neoverse-512tvb", NeoverseN2Model,
+def : ProcessorModel<"neoverse-512tvb", NeoverseV1Model,
ProcessorFeatures.Neoverse512TVB, [TuneNeoverse512TVB]>;
-def : ProcessorModel<"neoverse-v1", NeoverseN2Model,
+def : ProcessorModel<"neoverse-v1", NeoverseV1Model,
ProcessorFeatures.NeoverseV1, [TuneNeoverseV1]>;
-def : ProcessorModel<"neoverse-v2", NeoverseN2Model,
+def : ProcessorModel<"neoverse-v2", NeoverseV2Model,
ProcessorFeatures.NeoverseV2, [TuneNeoverseV2]>;
def : ProcessorModel<"exynos-m3", ExynosM3Model, ProcessorFeatures.ExynosM3,
[TuneExynosM3]>;
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index b8b6ddc33f66..76f1cc782b24 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -27,7 +27,6 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/ELF.h"
@@ -55,6 +54,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include <algorithm>
#include <cassert>
@@ -107,6 +107,7 @@ public:
void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
+ void LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, bool Typed);
typedef std::tuple<unsigned, bool, uint32_t> HwasanMemaccessTuple;
std::map<HwasanMemaccessTuple, MCSymbol *> HwasanMemaccessSymbols;
@@ -298,7 +299,7 @@ void AArch64AsmPrinter::emitSled(const MachineInstr &MI, SledKind Kind) {
// over the full 32 bytes (8 instructions) with the following pattern:
//
// STP X0, X30, [SP, #-16]! ; push X0 and the link register to the stack
- // LDR W0, #12 ; W0 := function ID
+ // LDR W17, #12 ; W17 := function ID
// LDR X16,#12 ; X16 := addr of __xray_FunctionEntry or __xray_FunctionExit
// BLR X16 ; call the tracing trampoline
// ;DATA: 32 bits of function ID
@@ -323,6 +324,100 @@ void AArch64AsmPrinter::emitSled(const MachineInstr &MI, SledKind Kind) {
recordSled(CurSled, MI, Kind, 2);
}
+// Emit the following code for Intrinsic::{xray_customevent,xray_typedevent}
+// (built-in functions __xray_customevent/__xray_typedevent).
+//
+// .Lxray_event_sled_N:
+// b 1f
+// save x0 and x1 (and also x2 for TYPED_EVENT_CALL)
+// set up x0 and x1 (and also x2 for TYPED_EVENT_CALL)
+// bl __xray_CustomEvent or __xray_TypedEvent
+// restore x0 and x1 (and also x2 for TYPED_EVENT_CALL)
+// 1:
+//
+// There are 6 instructions for EVENT_CALL and 9 for TYPED_EVENT_CALL.
+//
+// Then record a sled of kind CUSTOM_EVENT or TYPED_EVENT.
+// After patching, b .+N will become a nop.
+void AArch64AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
+ bool Typed) {
+ auto &O = *OutStreamer;
+ MCSymbol *CurSled = OutContext.createTempSymbol("xray_sled_", true);
+ O.emitLabel(CurSled);
+ MCInst MovX0Op0 = MCInstBuilder(AArch64::ORRXrs)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::XZR)
+ .addReg(MI.getOperand(0).getReg())
+ .addImm(0);
+ MCInst MovX1Op1 = MCInstBuilder(AArch64::ORRXrs)
+ .addReg(AArch64::X1)
+ .addReg(AArch64::XZR)
+ .addReg(MI.getOperand(1).getReg())
+ .addImm(0);
+ bool MachO = TM.getTargetTriple().isOSBinFormatMachO();
+ auto *Sym = MCSymbolRefExpr::create(
+ OutContext.getOrCreateSymbol(
+ Twine(MachO ? "_" : "") +
+ (Typed ? "__xray_TypedEvent" : "__xray_CustomEvent")),
+ OutContext);
+ if (Typed) {
+ O.AddComment("Begin XRay typed event");
+ EmitToStreamer(O, MCInstBuilder(AArch64::B).addImm(9));
+ EmitToStreamer(O, MCInstBuilder(AArch64::STPXpre)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X1)
+ .addReg(AArch64::SP)
+ .addImm(-4));
+ EmitToStreamer(O, MCInstBuilder(AArch64::STRXui)
+ .addReg(AArch64::X2)
+ .addReg(AArch64::SP)
+ .addImm(2));
+ EmitToStreamer(O, MovX0Op0);
+ EmitToStreamer(O, MovX1Op1);
+ EmitToStreamer(O, MCInstBuilder(AArch64::ORRXrs)
+ .addReg(AArch64::X2)
+ .addReg(AArch64::XZR)
+ .addReg(MI.getOperand(2).getReg())
+ .addImm(0));
+ EmitToStreamer(O, MCInstBuilder(AArch64::BL).addExpr(Sym));
+ EmitToStreamer(O, MCInstBuilder(AArch64::LDRXui)
+ .addReg(AArch64::X2)
+ .addReg(AArch64::SP)
+ .addImm(2));
+ O.AddComment("End XRay typed event");
+ EmitToStreamer(O, MCInstBuilder(AArch64::LDPXpost)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X1)
+ .addReg(AArch64::SP)
+ .addImm(4));
+
+ recordSled(CurSled, MI, SledKind::TYPED_EVENT, 2);
+ } else {
+ O.AddComment("Begin XRay custom event");
+ EmitToStreamer(O, MCInstBuilder(AArch64::B).addImm(6));
+ EmitToStreamer(O, MCInstBuilder(AArch64::STPXpre)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X1)
+ .addReg(AArch64::SP)
+ .addImm(-2));
+ EmitToStreamer(O, MovX0Op0);
+ EmitToStreamer(O, MovX1Op1);
+ EmitToStreamer(O, MCInstBuilder(AArch64::BL).addExpr(Sym));
+ O.AddComment("End XRay custom event");
+ EmitToStreamer(O, MCInstBuilder(AArch64::LDPXpost)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::X0)
+ .addReg(AArch64::X1)
+ .addReg(AArch64::SP)
+ .addImm(2));
+
+ recordSled(CurSled, MI, SledKind::CUSTOM_EVENT, 2);
+ }
+}
+
void AArch64AsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) {
Register AddrReg = MI.getOperand(0).getReg();
assert(std::next(MI.getIterator())->isCall() &&
@@ -531,14 +626,14 @@ void AArch64AsmPrinter::emitHwasanMemaccessSymbols(Module &M) {
if (HasMatchAllTag) {
OutStreamer->emitInstruction(MCInstBuilder(AArch64::UBFMXri)
- .addReg(AArch64::X16)
+ .addReg(AArch64::X17)
.addReg(Reg)
.addImm(56)
.addImm(63),
*STI);
OutStreamer->emitInstruction(MCInstBuilder(AArch64::SUBSXri)
.addReg(AArch64::XZR)
- .addReg(AArch64::X16)
+ .addReg(AArch64::X17)
.addImm(MatchAllTag)
.addImm(0),
*STI);
@@ -1235,7 +1330,7 @@ void AArch64AsmPrinter::LowerFAULTING_OP(const MachineInstr &FaultingMI) {
void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) {
Register DestReg = MI.getOperand(0).getReg();
if (STI->hasZeroCycleZeroingFP() && !STI->hasZeroCycleZeroingFPWorkaround() &&
- STI->hasNEON()) {
+ STI->isNeonAvailable()) {
// Convert H/S register to corresponding D register
if (AArch64::H0 <= DestReg && DestReg <= AArch64::H31)
DestReg = AArch64::D0 + (DestReg - AArch64::H0);
@@ -1254,7 +1349,9 @@ void AArch64AsmPrinter::emitFMov0(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default: llvm_unreachable("Unexpected opcode");
case AArch64::FMOVH0:
- FMov.setOpcode(AArch64::FMOVWHr);
+ FMov.setOpcode(STI->hasFullFP16() ? AArch64::FMOVWHr : AArch64::FMOVWSr);
+ if (!STI->hasFullFP16())
+ DestReg = (AArch64::S0 + (DestReg - AArch64::H0));
FMov.addOperand(MCOperand::createReg(DestReg));
FMov.addOperand(MCOperand::createReg(AArch64::WZR));
break;
@@ -1550,6 +1647,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
case TargetOpcode::PATCHABLE_TAIL_CALL:
LowerPATCHABLE_TAIL_CALL(*MI);
return;
+ case TargetOpcode::PATCHABLE_EVENT_CALL:
+ return LowerPATCHABLE_EVENT_CALL(*MI, false);
+ case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
+ return LowerPATCHABLE_EVENT_CALL(*MI, true);
case AArch64::KCFI_CHECK:
LowerKCFI_CHECK(*MI);
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index e53f573de66c..37976a222783 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -82,9 +82,9 @@ def CC_AArch64_AAPCS : CallingConv<[
nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
CCPassIndirect<i64>>,
- CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1, aarch64svcount],
CCAssignToReg<[P0, P1, P2, P3]>>,
- CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1, aarch64svcount],
CCPassIndirect<i64>>,
// Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
@@ -149,7 +149,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
- CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1, aarch64svcount],
CCAssignToReg<[P0, P1, P2, P3]>>
]>;
@@ -416,6 +416,12 @@ def CSR_Win_AArch64_AAPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
D8, D9, D10, D11,
D12, D13, D14, D15)>;
+def CSR_Win_AArch64_AAPCS_SwiftError
+ : CalleeSavedRegs<(sub CSR_Win_AArch64_AAPCS, X21)>;
+
+def CSR_Win_AArch64_AAPCS_SwiftTail
+ : CalleeSavedRegs<(sub CSR_Win_AArch64_AAPCS, X20, X22)>;
+
// The Control Flow Guard check call uses a custom calling convention that also
// preserves X0-X8 and Q0-Q7.
def CSR_Win_AArch64_CFGuard_Check : CalleeSavedRegs<(add CSR_Win_AArch64_AAPCS,
@@ -489,6 +495,9 @@ def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>;
def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
(sequence "X%u", 9, 15))>;
+def CSR_AArch64_RT_AllRegs : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs,
+ (sequence "Q%u", 8, 31))>;
+
def CSR_AArch64_StackProbe_Windows
: CalleeSavedRegs<(add (sequence "X%u", 0, 15),
(sequence "X%u", 18, 28), FP, SP,
@@ -551,6 +560,9 @@ def CSR_Darwin_AArch64_CXX_TLS_ViaCopy
def CSR_Darwin_AArch64_RT_MostRegs
: CalleeSavedRegs<(add CSR_Darwin_AArch64_AAPCS, (sequence "X%u", 9, 15))>;
+def CSR_Darwin_AArch64_RT_AllRegs
+ : CalleeSavedRegs<(add CSR_Darwin_AArch64_RT_MostRegs, (sequence "Q%u", 8, 31))>;
+
// Variants of the standard calling conventions for shadow call stack.
// These all preserve x18 in addition to any other registers.
def CSR_AArch64_NoRegs_SCS
@@ -561,6 +573,8 @@ def CSR_AArch64_AAPCS_SwiftError_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>;
def CSR_AArch64_RT_MostRegs_SCS
: CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
+def CSR_AArch64_RT_AllRegs_SCS
+ : CalleeSavedRegs<(add CSR_AArch64_RT_AllRegs, X18)>;
def CSR_AArch64_AAVPCS_SCS
: CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
def CSR_AArch64_SVE_AAPCS_SCS
diff --git a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
index d12689970dc5..c73b33a58408 100644
--- a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -212,7 +212,7 @@ static bool isCandidateStore(const MachineInstr &MI, const MachineOperand &MO) {
// In case we have str xA, [xA, #imm], this is two different uses
// of xA and we cannot fold, otherwise the xA stored may be wrong,
// even if #imm == 0.
- return MI.getOperandNo(&MO) == 1 &&
+ return MO.getOperandNo() == 1 &&
MI.getOperand(0).getReg() != MI.getOperand(1).getReg();
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 89b6de810b10..96fd28650504 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -30,24 +30,18 @@ def fold_global_offset : GICombineRule<
(defs root:$root, fold_global_offset_matchdata:$matchinfo),
(match (wip_match_opcode G_GLOBAL_VALUE):$root,
[{ return matchFoldGlobalOffset(*${root}, MRI, ${matchinfo}); }]),
- (apply [{ return applyFoldGlobalOffset(*${root}, MRI, B, Observer, ${matchinfo});}])
+ (apply [{ applyFoldGlobalOffset(*${root}, MRI, B, Observer, ${matchinfo});}])
>;
-def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
- "AArch64GenPreLegalizerCombinerHelper", [all_combines,
- fconstant_to_constant,
- icmp_redundant_trunc,
- fold_global_offset]> {
- let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
- let StateClass = "AArch64PreLegalizerCombinerHelperState";
- let AdditionalArguments = [];
+def AArch64PreLegalizerCombiner: GICombinerHelper<
+ "AArch64PreLegalizerCombinerImpl", [all_combines,
+ fconstant_to_constant,
+ icmp_redundant_trunc,
+ fold_global_offset]> {
}
-def AArch64O0PreLegalizerCombinerHelper: GICombinerHelper<
- "AArch64GenO0PreLegalizerCombinerHelper", [optnone_combines]> {
- let DisableRuleOption = "aarch64O0prelegalizercombiner-disable-rule";
- let StateClass = "AArch64O0PreLegalizerCombinerHelperState";
- let AdditionalArguments = [];
+def AArch64O0PreLegalizerCombiner: GICombinerHelper<
+ "AArch64O0PreLegalizerCombinerImpl", [optnone_combines]> {
}
// Matchdata for combines which replace a G_SHUFFLE_VECTOR with a
@@ -101,7 +95,7 @@ def shuf_to_ins: GICombineRule <
(defs root:$root, shuf_to_ins_matchdata:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
[{ return matchINS(*${root}, MRI, ${matchinfo}); }]),
- (apply [{ return applyINS(*${root}, MRI, B, ${matchinfo}); }])
+ (apply [{ applyINS(*${root}, MRI, B, ${matchinfo}); }])
>;
def vashr_vlshr_imm_matchdata : GIDefMatchData<"int64_t">;
@@ -163,7 +157,7 @@ def build_vector_to_dup : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_BUILD_VECTOR):$root,
[{ return matchBuildVectorToDup(*${root}, MRI); }]),
- (apply [{ return applyBuildVectorToDup(*${root}, MRI, B); }])
+ (apply [{ applyBuildVectorToDup(*${root}, MRI, B); }])
>;
def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>;
@@ -171,8 +165,8 @@ def build_vector_lowering : GICombineGroup<[build_vector_to_dup]>;
def lower_vector_fcmp : GICombineRule<
(defs root:$root),
(match (wip_match_opcode G_FCMP):$root,
- [{ return lowerVectorFCMP(*${root}, MRI, B); }]),
- (apply [{}])>;
+ [{ return matchLowerVectorFCMP(*${root}, MRI, B); }]),
+ (apply [{ applyLowerVectorFCMP(*${root}, MRI, B); }])>;
def form_truncstore_matchdata : GIDefMatchData<"Register">;
def form_truncstore : GICombineRule<
@@ -213,18 +207,17 @@ def vector_sext_inreg_to_shift : GICombineRule<
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
-def AArch64PostLegalizerLoweringHelper
- : GICombinerHelper<"AArch64GenPostLegalizerLoweringHelper",
+def AArch64PostLegalizerLowering
+ : GICombinerHelper<"AArch64PostLegalizerLoweringImpl",
[shuffle_vector_lowering, vashr_vlshr_imm,
icmp_lowering, build_vector_lowering,
lower_vector_fcmp, form_truncstore,
vector_sext_inreg_to_shift]> {
- let DisableRuleOption = "aarch64postlegalizerlowering-disable-rule";
}
// Post-legalization combines which are primarily optimizations.
-def AArch64PostLegalizerCombinerHelper
- : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
+def AArch64PostLegalizerCombiner
+ : GICombinerHelper<"AArch64PostLegalizerCombinerImpl",
[copy_prop, combines_for_extload,
sext_trunc_sextload, mutate_anyext_to_zext,
hoist_logic_op_with_same_opcode_hands,
@@ -238,5 +231,4 @@ def AArch64PostLegalizerCombinerHelper
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
select_to_minmax]> {
- let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}
diff --git a/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp b/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
index 75abe9c53e3b..7d14d2d20bad 100644
--- a/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
+++ b/llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp
@@ -37,7 +37,7 @@ class AArch64CompressJumpTables : public MachineFunctionPass {
MachineFunction *MF;
SmallVector<int, 8> BlockInfo;
- /// Returns the size in instructions of the block \p MBB, or std::nullopt if
+ /// Returns the size of instructions in the block \p MBB, or std::nullopt if
/// we couldn't get a safe upper bound.
std::optional<int> computeBlockSize(MachineBasicBlock &MBB);
@@ -88,19 +88,20 @@ bool AArch64CompressJumpTables::scanFunction() {
BlockInfo.clear();
BlockInfo.resize(MF->getNumBlockIDs());
+ // NOTE: BlockSize, Offset, OffsetAfterAlignment are all upper bounds.
+
unsigned Offset = 0;
for (MachineBasicBlock &MBB : *MF) {
const Align Alignment = MBB.getAlignment();
- unsigned AlignedOffset;
- if (Alignment == Align(1))
- AlignedOffset = Offset;
- else
- AlignedOffset = alignTo(Offset, Alignment);
- BlockInfo[MBB.getNumber()] = AlignedOffset;
+ unsigned OffsetAfterAlignment = Offset;
+ // We don't know the exact size of MBB so assume worse case padding.
+ if (Alignment > Align(4))
+ OffsetAfterAlignment += Alignment.value() - 4;
+ BlockInfo[MBB.getNumber()] = OffsetAfterAlignment;
auto BlockSize = computeBlockSize(MBB);
if (!BlockSize)
return false;
- Offset = AlignedOffset + *BlockSize;
+ Offset = OffsetAfterAlignment + *BlockSize;
}
return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index b3e816df0f46..4c8c03a4c693 100644
--- a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -855,7 +855,7 @@ bool AArch64ConditionalCompares::shouldConvert() {
if (Stress)
return true;
if (!MinInstr)
- MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+ MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
// Head dominates CmpBB, so it is always included in its trace.
MachineTraceMetrics::Trace Trace = MinInstr->getTrace(CmpConv.CmpBB);
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
index 4f324198f3dc..731972a039ba 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp
@@ -239,6 +239,129 @@ static bool trySequenceOfOnes(uint64_t UImm,
return true;
}
+static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) {
+ uint64_t NumOnes = llvm::countr_one(V >> StartPosition);
+
+ uint64_t UnshiftedOnes;
+ if (NumOnes == 64) {
+ UnshiftedOnes = ~0ULL;
+ } else {
+ UnshiftedOnes = (1ULL << NumOnes) - 1;
+ }
+ return UnshiftedOnes << StartPosition;
+}
+
+static uint64_t MaximallyReplicateSubImmediate(uint64_t V, uint64_t Subset) {
+ uint64_t Result = Subset;
+
+ // 64, 32, 16, 8, 4, 2
+ for (uint64_t i = 0; i < 6; ++i) {
+ uint64_t Rotation = 1ULL << (6 - i);
+ uint64_t Closure = Result | llvm::rotl<uint64_t>(Result, Rotation);
+ if (Closure != (Closure & V)) {
+ break;
+ }
+ Result = Closure;
+ }
+
+ return Result;
+}
+
+// Find the logical immediate that covers the most bits in RemainingBits,
+// allowing for additional bits to be set that were set in OriginalBits.
+static uint64_t maximalLogicalImmWithin(uint64_t RemainingBits,
+ uint64_t OriginalBits) {
+ // Find the first set bit.
+ uint32_t Position = llvm::countr_zero(RemainingBits);
+
+ // Get the first run of set bits.
+ uint64_t FirstRun = GetRunOfOnesStartingAt(OriginalBits, Position);
+
+ // Replicate the run as many times as possible, as long as the bits are set in
+ // RemainingBits.
+ uint64_t MaximalImm = MaximallyReplicateSubImmediate(OriginalBits, FirstRun);
+
+ return MaximalImm;
+}
+
+static std::optional<std::pair<uint64_t, uint64_t>>
+decomposeIntoOrrOfLogicalImmediates(uint64_t UImm) {
+ if (UImm == 0 || ~UImm == 0)
+ return std::nullopt;
+
+ // Make sure we don't have a run of ones split around the rotation boundary.
+ uint32_t InitialTrailingOnes = llvm::countr_one(UImm);
+ uint64_t RotatedBits = llvm::rotr<uint64_t>(UImm, InitialTrailingOnes);
+
+ // Find the largest logical immediate that fits within the full immediate.
+ uint64_t MaximalImm1 = maximalLogicalImmWithin(RotatedBits, RotatedBits);
+
+ // Remove all bits that are set by this mask.
+ uint64_t RemainingBits = RotatedBits & ~MaximalImm1;
+
+ // Find the largest logical immediate covering the remaining bits, allowing
+ // for additional bits to be set that were also set in the original immediate.
+ uint64_t MaximalImm2 = maximalLogicalImmWithin(RemainingBits, RotatedBits);
+
+ // If any bits still haven't been covered, then give up.
+ if (RemainingBits & ~MaximalImm2)
+ return std::nullopt;
+
+ // Make sure to un-rotate the immediates.
+ return std::make_pair(rotl(MaximalImm1, InitialTrailingOnes),
+ rotl(MaximalImm2, InitialTrailingOnes));
+}
+
+// Attempt to expand an immediate as the ORR of a pair of logical immediates.
+static bool tryOrrOfLogicalImmediates(uint64_t UImm,
+ SmallVectorImpl<ImmInsnModel> &Insn) {
+ auto MaybeDecomposition = decomposeIntoOrrOfLogicalImmediates(UImm);
+ if (MaybeDecomposition == std::nullopt)
+ return false;
+ uint64_t Imm1 = MaybeDecomposition->first;
+ uint64_t Imm2 = MaybeDecomposition->second;
+
+ uint64_t Encoding1, Encoding2;
+ bool Imm1Success = AArch64_AM::processLogicalImmediate(Imm1, 64, Encoding1);
+ bool Imm2Success = AArch64_AM::processLogicalImmediate(Imm2, 64, Encoding2);
+
+ if (Imm1Success && Imm2Success) {
+ // Create the ORR-immediate instructions.
+ Insn.push_back({AArch64::ORRXri, 0, Encoding1});
+ Insn.push_back({AArch64::ORRXri, 1, Encoding2});
+ return true;
+ }
+
+ return false;
+}
+
+// Attempt to expand an immediate as the AND of a pair of logical immediates.
+// This is done by applying DeMorgan's law, under which logical immediates
+// are closed.
+static bool tryAndOfLogicalImmediates(uint64_t UImm,
+ SmallVectorImpl<ImmInsnModel> &Insn) {
+ // Apply DeMorgan's law to turn this into an ORR problem.
+ auto MaybeDecomposition = decomposeIntoOrrOfLogicalImmediates(~UImm);
+ if (MaybeDecomposition == std::nullopt)
+ return false;
+ uint64_t Imm1 = MaybeDecomposition->first;
+ uint64_t Imm2 = MaybeDecomposition->second;
+
+ uint64_t Encoding1, Encoding2;
+ bool Imm1Success = AArch64_AM::processLogicalImmediate(~Imm1, 64, Encoding1);
+ bool Imm2Success = AArch64_AM::processLogicalImmediate(~Imm2, 64, Encoding2);
+
+ if (Imm1Success && Imm2Success) {
+ // Materialize Imm1, the LHS of the AND
+ Insn.push_back({AArch64::ORRXri, 0, Encoding1});
+ // AND Imm1 with Imm2
+ Insn.push_back({AArch64::ANDXri, 1, Encoding2});
+ return true;
+ }
+
+ return false;
+}
+
/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
/// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize,
@@ -268,8 +391,8 @@ static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize,
unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN
unsigned LastShift = 0; // LSL amount for last MOVK
if (Imm != 0) {
- unsigned LZ = countLeadingZeros(Imm);
- unsigned TZ = countTrailingZeros(Imm);
+ unsigned LZ = llvm::countl_zero(Imm);
+ unsigned TZ = llvm::countr_zero(Imm);
Shift = (TZ / 16) * 16;
LastShift = ((63 - LZ) / 16) * 16;
}
@@ -372,6 +495,14 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
}
}
+ // Attempt to use a sequence of two ORR-immediate instructions.
+ if (tryOrrOfLogicalImmediates(Imm, Insn))
+ return;
+
+ // Attempt to use a sequence of ORR-immediate followed by AND-immediate.
+ if (tryAndOfLogicalImmediates(Imm, Insn))
+ return;
+
// FIXME: Add more two-instruction sequences.
// Three instruction sequences.
diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index 3370da479e3c..dcb73ae2dce2 100644
--- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -20,7 +20,6 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "Utils/AArch64BaseInfo.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -36,6 +35,7 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -148,10 +148,40 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
case AArch64::ORRWri:
case AArch64::ORRXri:
- MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
- .add(MI.getOperand(0))
- .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
- .addImm(I->Op2));
+ if (I->Op1 == 0) {
+ MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+ .add(MI.getOperand(0))
+ .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
+ .addImm(I->Op2));
+ } else {
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ MIBS.push_back(
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+ .addReg(DstReg, RegState::Define |
+ getDeadRegState(DstIsDead && LastItem) |
+ RenamableState)
+ .addReg(DstReg)
+ .addImm(I->Op2));
+ }
+ break;
+ case AArch64::ANDXri:
+ if (I->Op1 == 0) {
+ MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+ .add(MI.getOperand(0))
+ .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
+ .addImm(I->Op2));
+ } else {
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ MIBS.push_back(
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
+ .addReg(DstReg, RegState::Define |
+ getDeadRegState(DstIsDead && LastItem) |
+ RenamableState)
+ .addReg(DstReg)
+ .addImm(I->Op2));
+ }
break;
case AArch64::MOVNWi:
case AArch64::MOVNXi:
@@ -559,7 +589,8 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
// If we cannot prefix the requested instruction we'll instead emit a
// prefixed_zeroing_mov for DestructiveBinary.
assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
- DType == AArch64::DestructiveBinaryComm) &&
+ DType == AArch64::DestructiveBinaryComm ||
+ DType == AArch64::DestructiveBinaryCommWithRev) &&
"The destructive operand should be unique");
assert(ElementSize != AArch64::ElementSizeNone &&
"This instruction is unpredicated");
@@ -577,7 +608,8 @@ bool AArch64ExpandPseudo::expand_DestructiveOp(
// unique. Zeros the lanes in z0 that aren't active in p0 with sequence
// movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
if ((DType == AArch64::DestructiveBinary ||
- DType == AArch64::DestructiveBinaryComm) &&
+ DType == AArch64::DestructiveBinaryComm ||
+ DType == AArch64::DestructiveBinaryCommWithRev) &&
!DOPRegIsUnique) {
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
.addReg(DstReg, RegState::Define)
@@ -674,12 +706,15 @@ bool AArch64ExpandPseudo::expandSetTagLoop(
.addImm(2)
.cloneMemRefs(MI)
.setMIFlags(MI.getFlags());
- BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
+ BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri))
.addDef(SizeReg)
.addReg(SizeReg)
.addImm(16 * 2)
.addImm(0);
- BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
+ BuildMI(LoopBB, DL, TII->get(AArch64::Bcc))
+ .addImm(AArch64CC::NE)
+ .addMBB(LoopBB)
+ .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
LoopBB->addSuccessor(LoopBB);
LoopBB->addSuccessor(DoneBB);
@@ -1015,8 +1050,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
if (OrigInstr != -1) {
auto &Orig = TII->get(OrigInstr);
- if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
- != AArch64::NotDestructive) {
+ if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
+ AArch64::NotDestructive) {
return expand_DestructiveOp(MI, MBB, MBBI);
}
}
@@ -1145,6 +1180,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
.add(MI.getOperand(2))
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
transferImpOps(MI, MIB1, MIB1);
+ if (auto DebugNumber = MI.peekDebugInstrNum())
+ NewMI->setDebugInstrNum(DebugNumber);
MI.eraseFromParent();
return true;
}
@@ -1303,6 +1340,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
SysReg = AArch64SysReg::TPIDR_EL2;
else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
SysReg = AArch64SysReg::TPIDR_EL1;
+ else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
+ SysReg = AArch64SysReg::TPIDRRO_EL0;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
.addImm(SysReg);
MI.eraseFromParent();
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 41f9303da1bf..1ae3709e9588 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Argument.h"
@@ -53,6 +54,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -65,7 +67,6 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
#include <cassert>
@@ -272,7 +273,7 @@ private:
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
unsigned &NumBytes);
- bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
+ bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
public:
// Backend specific FastISel code.
@@ -3021,7 +3022,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
// Get a count of how many bytes are to be pushed on the stack.
- NumBytes = CCInfo.getNextStackOffset();
+ NumBytes = CCInfo.getStackSize();
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
@@ -3102,8 +3103,7 @@ bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
return true;
}
-bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
- unsigned NumBytes) {
+bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
CallingConv::ID CC = CLI.CallConv;
// Issue CALLSEQ_END
@@ -3111,33 +3111,31 @@ bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
.addImm(NumBytes).addImm(0);
- // Now the return value.
- if (RetVT != MVT::isVoid) {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
- CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
+ // Now the return values.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
- // Only handle a single return value.
- if (RVLocs.size() != 1)
- return false;
-
- // Copy all of the result registers out of their specified physreg.
- MVT CopyVT = RVLocs[0].getValVT();
+ Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ MVT CopyVT = VA.getValVT();
+ unsigned CopyReg = ResultReg + i;
// TODO: Handle big-endian results
if (CopyVT.isVector() && !Subtarget->isLittleEndian())
return false;
- Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(RVLocs[0].getLocReg());
- CLI.InRegs.push_back(RVLocs[0].getLocReg());
-
- CLI.ResultReg = ResultReg;
- CLI.NumResultRegs = 1;
+ // Copy result out of their specified physreg.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
+ CopyReg)
+ .addReg(VA.getLocReg());
+ CLI.InRegs.push_back(VA.getLocReg());
}
+ CLI.ResultReg = ResultReg;
+ CLI.NumResultRegs = RVLocs.size();
+
return true;
}
@@ -3185,13 +3183,6 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (IsVarArg)
return false;
- // FIXME: Only handle *simple* calls for now.
- MVT RetVT;
- if (CLI.RetTy->isVoidTy())
- RetVT = MVT::isVoid;
- else if (!isTypeLegal(CLI.RetTy, RetVT))
- return false;
-
for (auto Flag : CLI.OutFlags)
if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
@@ -3287,7 +3278,7 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
CLI.Call = MIB;
// Finish off the call including any return values.
- return finishCall(CLI, RetVT, NumBytes);
+ return finishCall(CLI, NumBytes);
}
bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
@@ -3786,6 +3777,57 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
updateValueMap(II, ResultReg1, 2);
return true;
}
+ case Intrinsic::aarch64_crc32b:
+ case Intrinsic::aarch64_crc32h:
+ case Intrinsic::aarch64_crc32w:
+ case Intrinsic::aarch64_crc32x:
+ case Intrinsic::aarch64_crc32cb:
+ case Intrinsic::aarch64_crc32ch:
+ case Intrinsic::aarch64_crc32cw:
+ case Intrinsic::aarch64_crc32cx: {
+ if (!Subtarget->hasCRC())
+ return false;
+
+ unsigned Opc;
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unexpected intrinsic!");
+ case Intrinsic::aarch64_crc32b:
+ Opc = AArch64::CRC32Brr;
+ break;
+ case Intrinsic::aarch64_crc32h:
+ Opc = AArch64::CRC32Hrr;
+ break;
+ case Intrinsic::aarch64_crc32w:
+ Opc = AArch64::CRC32Wrr;
+ break;
+ case Intrinsic::aarch64_crc32x:
+ Opc = AArch64::CRC32Xrr;
+ break;
+ case Intrinsic::aarch64_crc32cb:
+ Opc = AArch64::CRC32CBrr;
+ break;
+ case Intrinsic::aarch64_crc32ch:
+ Opc = AArch64::CRC32CHrr;
+ break;
+ case Intrinsic::aarch64_crc32cw:
+ Opc = AArch64::CRC32CWrr;
+ break;
+ case Intrinsic::aarch64_crc32cx:
+ Opc = AArch64::CRC32CXrr;
+ break;
+ }
+
+ Register LHSReg = getRegForValue(II->getArgOperand(0));
+ Register RHSReg = getRegForValue(II->getArgOperand(1));
+ if (!LHSReg || !RHSReg)
+ return false;
+
+ Register ResultReg =
+ fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
+ updateValueMap(II, ResultReg);
+ return true;
+ }
}
return false;
}
@@ -4848,7 +4890,7 @@ bool AArch64FastISel::selectSDiv(const Instruction *I) {
!(C.isPowerOf2() || C.isNegatedPowerOf2()))
return selectBinaryOp(I, ISD::SDIV);
- unsigned Lg2 = C.countTrailingZeros();
+ unsigned Lg2 = C.countr_zero();
Register Src0Reg = getRegForValue(I->getOperand(0));
if (!Src0Reg)
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 9f3c14aede7f..d66800664c0c 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -954,10 +954,10 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
switch (LastI->getOpcode()) {
case AArch64::STGloop:
case AArch64::STZGloop:
- case AArch64::STGOffset:
- case AArch64::STZGOffset:
- case AArch64::ST2GOffset:
- case AArch64::STZ2GOffset:
+ case AArch64::STGi:
+ case AArch64::STZGi:
+ case AArch64::ST2Gi:
+ case AArch64::STZ2Gi:
return false;
default:
return true;
@@ -1364,6 +1364,27 @@ static void emitShadowCallStackEpilogue(const TargetInstrInfo &TII,
}
}
+// Define the current CFA rule to use the provided FP.
+static void emitDefineCFAWithFP(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, unsigned FixedObject) {
+ const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
+ const AArch64RegisterInfo *TRI = STI.getRegisterInfo();
+ const TargetInstrInfo *TII = STI.getInstrInfo();
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+
+ const int OffsetToFirstCalleeSaveFromFP =
+ AFI->getCalleeSaveBaseToFrameRecordOffset() -
+ AFI->getCalleeSavedStackSize();
+ Register FramePtr = TRI->getFrameRegister(MF);
+ unsigned Reg = TRI->getDwarfRegNum(FramePtr, true);
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
+ nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+}
+
void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -1375,6 +1396,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineModuleInfo &MMI = MF.getMMI();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
bool EmitCFI = AFI->needsDwarfUnwindInfo(MF);
+ bool EmitAsyncCFI = AFI->needsAsyncDwarfUnwindInfo(MF);
bool HasFP = hasFP(MF);
bool NeedsWinCFI = needsWinCFI(MF);
bool HasWinCFI = false;
@@ -1535,7 +1557,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
MachineInstr::FrameSetup, false, NeedsWinCFI, &HasWinCFI,
- EmitCFI);
+ EmitAsyncCFI);
NumBytes = 0;
} else if (HomPrologEpilog) {
// Stack has been already adjusted.
@@ -1543,7 +1565,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
} else if (PrologueSaveSize != 0) {
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI,
- EmitCFI);
+ EmitAsyncCFI);
NumBytes -= PrologueSaveSize;
}
assert(NumBytes >= 0 && "Negative stack allocation size!?");
@@ -1604,25 +1626,14 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
NeedsWinCFI = false;
}
}
- if (EmitCFI) {
- // Define the current CFA rule to use the provided FP.
- const int OffsetToFirstCalleeSaveFromFP =
- AFI->getCalleeSaveBaseToFrameRecordOffset() -
- AFI->getCalleeSavedStackSize();
- Register FramePtr = RegInfo->getFrameRegister(MF);
- unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
- unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
- nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
- BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
- .addCFIIndex(CFIIndex)
- .setMIFlags(MachineInstr::FrameSetup);
- }
+ if (EmitAsyncCFI)
+ emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject);
}
// Now emit the moves for whatever callee saved regs we have (including FP,
// LR if those are saved). Frame instructions for SVE register are emitted
// later, after the instruction which actually save SVE regs.
- if (EmitCFI)
+ if (EmitAsyncCFI)
emitCalleeSavedGPRLocations(MBB, MBBI);
// Alignment is required for the parent frame, not the funclet
@@ -1728,10 +1739,23 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
NumBytes = 0;
if (RealignmentPadding > 0) {
- BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
- .addReg(AArch64::SP)
- .addImm(RealignmentPadding)
- .addImm(0);
+ if (RealignmentPadding >= 4096) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm))
+ .addReg(AArch64::X16, RegState::Define)
+ .addImm(RealignmentPadding)
+ .setMIFlags(MachineInstr::FrameSetup);
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXrx64), AArch64::X15)
+ .addReg(AArch64::SP)
+ .addReg(AArch64::X16, RegState::Kill)
+ .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 0))
+ .setMIFlag(MachineInstr::FrameSetup);
+ } else {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri), AArch64::X15)
+ .addReg(AArch64::SP)
+ .addImm(RealignmentPadding)
+ .addImm(0)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
uint64_t AndMask = ~(MFI.getMaxAlign().value() - 1);
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
@@ -1766,16 +1790,16 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
emitFrameOffset(
MBB, CalleeSavesBegin, DL, AArch64::SP, AArch64::SP, -AllocateBefore, TII,
MachineInstr::FrameSetup, false, false, nullptr,
- EmitCFI && !HasFP && AllocateBefore,
+ EmitAsyncCFI && !HasFP && AllocateBefore,
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
- if (EmitCFI)
+ if (EmitAsyncCFI)
emitCalleeSavedSVELocations(MBB, CalleeSavesEnd);
// Finally allocate remaining SVE stack space.
emitFrameOffset(MBB, CalleeSavesEnd, DL, AArch64::SP, AArch64::SP,
-AllocateAfter, TII, MachineInstr::FrameSetup, false, false,
- nullptr, EmitCFI && !HasFP && AllocateAfter,
+ nullptr, EmitAsyncCFI && !HasFP && AllocateAfter,
AllocateBefore + StackOffset::getFixed(
(int64_t)MFI.getStackSize() - NumBytes));
@@ -1796,7 +1820,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
emitFrameOffset(
MBB, MBBI, DL, scratchSPReg, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII, MachineInstr::FrameSetup,
- false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
+ false, NeedsWinCFI, &HasWinCFI, EmitAsyncCFI && !HasFP,
SVEStackSize +
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes));
}
@@ -1858,6 +1882,23 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MBB.addLiveIn(AArch64::X1);
}
}
+
+ if (EmitCFI && !EmitAsyncCFI) {
+ if (HasFP) {
+ emitDefineCFAWithFP(MF, MBB, MBBI, DL, FixedObject);
+ } else {
+ StackOffset TotalSize =
+ SVEStackSize + StackOffset::getFixed((int64_t)MFI.getStackSize());
+ unsigned CFIIndex = MF.addFrameInst(createDefCFA(
+ *RegInfo, /*FrameReg=*/AArch64::SP, /*Reg=*/AArch64::SP, TotalSize,
+ /*LastAdjustmentWasScalable=*/false));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameSetup);
+ }
+ emitCalleeSavedGPRLocations(MBB, MBBI);
+ emitCalleeSavedSVELocations(MBB, MBBI);
+ }
}
static void InsertReturnAddressAuth(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -2229,6 +2270,11 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
}
}
+bool AArch64FrameLowering::enableCFIFixup(MachineFunction &MF) const {
+ return TargetFrameLowering::enableCFIFixup(MF) &&
+ MF.getInfo<AArch64FunctionInfo>()->needsAsyncDwarfUnwindInfo(MF);
+}
+
/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
/// debug info. It's the same as what we use for resolving the code-gen
/// references for now. FIXME: This can go wrong when references are
@@ -2541,8 +2587,8 @@ static void computeCalleeSaveRegisterPairs(
// MachO's compact unwind format relies on all registers being stored in
// pairs.
assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost ||
- CC == CallingConv::CXX_FAST_TLS || CC == CallingConv::Win64 ||
- (Count & 1) == 0) &&
+ CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS ||
+ CC == CallingConv::Win64 || (Count & 1) == 0) &&
"Odd number of callee-saved regs to spill!");
int ByteOffset = AFI->getCalleeSavedStackSize();
int StackFillDir = -1;
@@ -2628,7 +2674,8 @@ static void computeCalleeSaveRegisterPairs(
// MachO's compact unwind format relies on all registers being stored in
// adjacent register pairs.
assert((!produceCompactUnwindFrame(MF) || CC == CallingConv::PreserveMost ||
- CC == CallingConv::CXX_FAST_TLS || CC == CallingConv::Win64 ||
+ CC == CallingConv::PreserveAll || CC == CallingConv::CXX_FAST_TLS ||
+ CC == CallingConv::Win64 ||
(RPI.isPaired() &&
((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
RPI.Reg1 + 1 == RPI.Reg2))) &&
@@ -3074,9 +3121,18 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// won't include them.
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
+ // We may address some of the stack above the canonical frame address, either
+ // for our own arguments or during a call. Include that in calculating whether
+ // we have complicated addressing concerns.
+ int64_t CalleeStackUsed = 0;
+ for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) {
+ int64_t FixedOff = MFI.getObjectOffset(I);
+ if (FixedOff > CalleeStackUsed) CalleeStackUsed = FixedOff;
+ }
+
// Conservatively always assume BigStack when there are SVE spills.
- bool BigStack = SVEStackSize ||
- (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
+ bool BigStack = SVEStackSize || (EstimatedStackSize + CSStackSize +
+ CalleeStackUsed) > EstimatedStackSizeLimit;
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
AFI->setHasStackFrame(true);
@@ -3385,7 +3441,8 @@ class TagStoreEdit {
Register FrameReg;
StackOffset FrameRegOffset;
int64_t Size;
- // If not None, move FrameReg to (FrameReg + FrameRegUpdate) at the end.
+ // If not std::nullopt, move FrameReg to (FrameReg + FrameRegUpdate) at the
+ // end.
std::optional<int64_t> FrameRegUpdate;
// MIFlags for any FrameReg updating instructions.
unsigned FrameRegUpdateFlags;
@@ -3429,7 +3486,11 @@ void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
Register BaseReg = FrameReg;
int64_t BaseRegOffsetBytes = FrameRegOffset.getFixed();
if (BaseRegOffsetBytes < kMinOffset ||
- BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset) {
+ BaseRegOffsetBytes + (Size - Size % 32) > kMaxOffset ||
+ // BaseReg can be FP, which is not necessarily aligned to 16-bytes. In
+ // that case, BaseRegOffsetBytes will not be aligned to 16 bytes, which
+ // is required for the offset of ST2G.
+ BaseRegOffsetBytes % 16 != 0) {
Register ScratchReg = MRI->createVirtualRegister(&AArch64::GPR64RegClass);
emitFrameOffset(*MBB, InsertI, DL, ScratchReg, BaseReg,
StackOffset::getFixed(BaseRegOffsetBytes), TII);
@@ -3442,8 +3503,9 @@ void TagStoreEdit::emitUnrolled(MachineBasicBlock::iterator InsertI) {
int64_t InstrSize = (Size > 16) ? 32 : 16;
unsigned Opcode =
InstrSize == 16
- ? (ZeroData ? AArch64::STZGOffset : AArch64::STGOffset)
- : (ZeroData ? AArch64::STZ2GOffset : AArch64::ST2GOffset);
+ ? (ZeroData ? AArch64::STZGi : AArch64::STGi)
+ : (ZeroData ? AArch64::STZ2Gi : AArch64::ST2Gi);
+ assert(BaseRegOffsetBytes % 16 == 0);
MachineInstr *I = BuildMI(*MBB, InsertI, DL, TII->get(Opcode))
.addReg(AArch64::SP)
.addReg(BaseReg)
@@ -3624,8 +3686,8 @@ bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset,
const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Opcode = MI.getOpcode();
- ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGOffset ||
- Opcode == AArch64::STZ2GOffset);
+ ZeroData = (Opcode == AArch64::STZGloop || Opcode == AArch64::STZGi ||
+ Opcode == AArch64::STZ2Gi);
if (Opcode == AArch64::STGloop || Opcode == AArch64::STZGloop) {
if (!MI.getOperand(0).isDead() || !MI.getOperand(1).isDead())
@@ -3637,9 +3699,9 @@ bool isMergeableStackTaggingInstruction(MachineInstr &MI, int64_t &Offset,
return true;
}
- if (Opcode == AArch64::STGOffset || Opcode == AArch64::STZGOffset)
+ if (Opcode == AArch64::STGi || Opcode == AArch64::STZGi)
Size = 16;
- else if (Opcode == AArch64::ST2GOffset || Opcode == AArch64::STZ2GOffset)
+ else if (Opcode == AArch64::ST2Gi || Opcode == AArch64::STZ2Gi)
Size = 32;
else
return false;
@@ -3889,10 +3951,10 @@ void AArch64FrameLowering::orderFrameObjects(
case AArch64::STZGloop:
OpIndex = 3;
break;
- case AArch64::STGOffset:
- case AArch64::STZGOffset:
- case AArch64::ST2GOffset:
- case AArch64::STZ2GOffset:
+ case AArch64::STGi:
+ case AArch64::STZGi:
+ case AArch64::ST2Gi:
+ case AArch64::STZ2Gi:
OpIndex = 1;
break;
default:
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index e83366519552..147b5c181be5 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -35,6 +35,8 @@ public:
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;
+ bool enableCFIFixup(MachineFunction &MF) const override;
+
bool canUseAsPrologue(const MachineBasicBlock &MBB) const override;
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
diff --git a/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp b/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp
new file mode 100644
index 000000000000..2ed668712897
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64GlobalsTagging.cpp
@@ -0,0 +1,142 @@
+//===- AArch64GlobalsTagging.cpp - Global tagging in IR -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//===----------------------------------------------------------------------===//
+
+#include "AArch64.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <set>
+
+using namespace llvm;
+
+static const Align kTagGranuleSize = Align(16);
+
+static bool shouldTagGlobal(GlobalVariable &G) {
+ if (!G.isTagged())
+ return false;
+
+ assert(G.hasSanitizerMetadata() &&
+ "Missing sanitizer metadata, but symbol is apparently tagged.");
+ GlobalValue::SanitizerMetadata Meta = G.getSanitizerMetadata();
+
+ // For now, don't instrument constant data, as it'll be in .rodata anyway. It
+ // may be worth instrumenting these in future to stop them from being used as
+ // gadgets.
+ if (G.getName().startswith("llvm.") || G.isThreadLocal() || G.isConstant()) {
+ Meta.Memtag = false;
+ G.setSanitizerMetadata(Meta);
+ return false;
+ }
+
+ return true;
+}
+
+// Technically, due to ELF symbol interposition semantics, we can't change the
+// alignment or size of symbols. If we increase the alignment or size of a
+// symbol, the compiler may make optimisations based on this new alignment or
+// size. If the symbol is interposed, this optimisation could lead to
+// alignment-related or OOB read/write crashes.
+//
+// This is handled in the linker. When the linker sees multiple declarations of
+// a global variable, and some are tagged, and some are untagged, it resolves it
+// to be an untagged definition - but preserves the tag-granule-rounded size and
+// tag-granule-alignment. This should prevent these kind of crashes intra-DSO.
+// For cross-DSO, it's been a reasonable contract that if you're interposing a
+// sanitizer-instrumented global, then the interposer also needs to be
+// sanitizer-instrumented.
+//
+// FIXME: In theory, this can be fixed by splitting the size/alignment of
+// globals into two uses: an "output alignment" that's emitted to the ELF file,
+// and an "optimisation alignment" that's used for optimisation. Thus, we could
+// adjust the output alignment only, and still optimise based on the pessimistic
+// pre-tagging size/alignment.
+static void tagGlobalDefinition(Module &M, GlobalVariable *G) {
+ Constant *Initializer = G->getInitializer();
+ uint64_t SizeInBytes =
+ M.getDataLayout().getTypeAllocSize(Initializer->getType());
+
+ uint64_t NewSize = alignTo(SizeInBytes, kTagGranuleSize);
+ if (SizeInBytes != NewSize) {
+ // Pad the initializer out to the next multiple of 16 bytes.
+ llvm::SmallVector<uint8_t> Init(NewSize - SizeInBytes, 0);
+ Constant *Padding = ConstantDataArray::get(M.getContext(), Init);
+ Initializer = ConstantStruct::getAnon({Initializer, Padding});
+ auto *NewGV = new GlobalVariable(
+ M, Initializer->getType(), G->isConstant(), G->getLinkage(),
+ Initializer, "", G, G->getThreadLocalMode(), G->getAddressSpace());
+ NewGV->copyAttributesFrom(G);
+ NewGV->setComdat(G->getComdat());
+ NewGV->copyMetadata(G, 0);
+
+ NewGV->takeName(G);
+ G->replaceAllUsesWith(NewGV);
+ G->eraseFromParent();
+ G = NewGV;
+ }
+
+ G->setAlignment(std::max(G->getAlign().valueOrOne(), kTagGranuleSize));
+
+ // Ensure that tagged globals don't get merged by ICF - as they should have
+ // different tags at runtime.
+ G->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
+}
+
+namespace {
+class AArch64GlobalsTagging : public ModulePass {
+public:
+ static char ID;
+
+ explicit AArch64GlobalsTagging() : ModulePass(ID) {
+ initializeAArch64GlobalsTaggingPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+
+ StringRef getPassName() const override { return "AArch64 Globals Tagging"; }
+
+private:
+ std::set<GlobalVariable *> GlobalsToTag;
+};
+} // anonymous namespace
+
+char AArch64GlobalsTagging::ID = 0;
+
+bool AArch64GlobalsTagging::runOnModule(Module &M) {
+ // No mutating the globals in-place, or iterator invalidation occurs.
+ std::vector<GlobalVariable *> GlobalsToTag;
+ for (GlobalVariable &G : M.globals()) {
+ if (G.isDeclaration() || !shouldTagGlobal(G))
+ continue;
+ GlobalsToTag.push_back(&G);
+ }
+
+ for (GlobalVariable *G : GlobalsToTag) {
+ tagGlobalDefinition(M, G);
+ }
+
+ return true;
+}
+
+INITIALIZE_PASS_BEGIN(AArch64GlobalsTagging, "aarch64-globals-tagging",
+ "AArch64 Globals Tagging Pass", false, false)
+INITIALIZE_PASS_END(AArch64GlobalsTagging, "aarch64-globals-tagging",
+ "AArch64 Globals Tagging Pass", false, false)
+
+ModulePass *llvm::createAArch64GlobalsTaggingPass() {
+ return new AArch64GlobalsTagging();
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 0397e894ef4c..f79d4d1934aa 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -68,8 +68,6 @@ public:
template <signed Low, signed High, signed Scale>
bool SelectRDVLImm(SDValue N, SDValue &Imm);
- bool tryMLAV64LaneV128(SDNode *N);
- bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
@@ -243,6 +241,18 @@ public:
return false;
}
+ bool SelectDupNegativeZero(SDValue N) {
+ switch(N->getOpcode()) {
+ case AArch64ISD::DUP:
+ case ISD::SPLAT_VECTOR: {
+ ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
+ return Const && Const->isZero() && Const->isNegative();
+ }
+ }
+
+ return false;
+ }
+
template<MVT::SimpleValueType VT>
bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
return SelectSVEAddSubImm(N, VT, Imm, Shift);
@@ -335,6 +345,10 @@ public:
// e.g. structured loads and stores (ldN, stN).
SDValue createZTuple(ArrayRef<SDValue> Vecs);
+ // Similar to above, except the register must start at a multiple of the
+ // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
+ SDValue createZMulTuple(ArrayRef<SDValue> Regs);
+
/// Generic helper for the createDTuple/createQTuple
/// functions. Those should almost always be called instead.
SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
@@ -356,8 +370,23 @@ public:
void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
unsigned Opc_rr, unsigned Opc_ri,
bool IsIntr = false);
+ void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
+ unsigned Scale, unsigned Opc_ri,
+ unsigned Opc_rr);
+ void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
+ bool IsZmMulti, unsigned Opcode,
+ bool HasPred = false);
+ void SelectPExtPair(SDNode *N, unsigned Opc);
void SelectWhilePair(SDNode *N, unsigned Opc);
void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
+ void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
+ void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
+ bool IsTupleInput, unsigned Opc);
+ void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
+
+ template <unsigned MaxIdx, unsigned Scale>
+ void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
+ unsigned Op);
bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
/// SVE Reg+Imm addressing mode.
@@ -391,11 +420,13 @@ public:
bool tryBitfieldInsertOp(SDNode *N);
bool tryBitfieldInsertInZeroOp(SDNode *N);
bool tryShiftAmountMod(SDNode *N);
- bool tryHighFPExt(SDNode *N);
bool tryReadRegister(SDNode *N);
bool tryWriteRegister(SDNode *N);
+ bool trySelectCastFixedLengthToScalableVector(SDNode *N);
+ bool trySelectCastScalableToFixedLengthVector(SDNode *N);
+
// Include the pieces autogenerated from the target description.
#include "AArch64GenDAGISel.inc"
@@ -448,6 +479,7 @@ private:
SDValue &Offset, unsigned Scale = 1);
bool SelectAllActivePredicate(SDValue N);
+ bool SelectAnyPredicate(SDValue N);
};
} // end anonymous namespace
@@ -805,135 +837,6 @@ getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
return AArch64_AM::InvalidShiftExtend;
}
-// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
-static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
- if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
- DL->getOpcode() != AArch64ISD::DUPLANE32)
- return false;
-
- SDValue SV = DL->getOperand(0);
- if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
- return false;
-
- SDValue EV = SV.getOperand(1);
- if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
- return false;
-
- ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
- ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
- LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
- LaneOp = EV.getOperand(0);
-
- return true;
-}
-
-// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
-// high lane extract.
-static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
- SDValue &LaneOp, int &LaneIdx) {
-
- if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
- std::swap(Op0, Op1);
- if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
- return false;
- }
- StdOp = Op1;
- return true;
-}
-
-/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
-/// is a lane in the upper half of a 128-bit vector. Recognize and select this
-/// so that we don't emit unnecessary lane extracts.
-bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
- SDLoc dl(N);
- SDValue Op0 = N->getOperand(0);
- SDValue Op1 = N->getOperand(1);
- SDValue MLAOp1; // Will hold ordinary multiplicand for MLA.
- SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA.
- int LaneIdx = -1; // Will hold the lane index.
-
- if (Op1.getOpcode() != ISD::MUL ||
- !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
- LaneIdx)) {
- std::swap(Op0, Op1);
- if (Op1.getOpcode() != ISD::MUL ||
- !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
- LaneIdx))
- return false;
- }
-
- SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
-
- SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
-
- unsigned MLAOpc = ~0U;
-
- switch (N->getSimpleValueType(0).SimpleTy) {
- default:
- llvm_unreachable("Unrecognized MLA.");
- case MVT::v4i16:
- MLAOpc = AArch64::MLAv4i16_indexed;
- break;
- case MVT::v8i16:
- MLAOpc = AArch64::MLAv8i16_indexed;
- break;
- case MVT::v2i32:
- MLAOpc = AArch64::MLAv2i32_indexed;
- break;
- case MVT::v4i32:
- MLAOpc = AArch64::MLAv4i32_indexed;
- break;
- }
-
- ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
- return true;
-}
-
-bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
- SDLoc dl(N);
- SDValue SMULLOp0;
- SDValue SMULLOp1;
- int LaneIdx;
-
- if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
- LaneIdx))
- return false;
-
- SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
-
- SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
-
- unsigned SMULLOpc = ~0U;
-
- if (IntNo == Intrinsic::aarch64_neon_smull) {
- switch (N->getSimpleValueType(0).SimpleTy) {
- default:
- llvm_unreachable("Unrecognized SMULL.");
- case MVT::v4i32:
- SMULLOpc = AArch64::SMULLv4i16_indexed;
- break;
- case MVT::v2i64:
- SMULLOpc = AArch64::SMULLv2i32_indexed;
- break;
- }
- } else if (IntNo == Intrinsic::aarch64_neon_umull) {
- switch (N->getSimpleValueType(0).SimpleTy) {
- default:
- llvm_unreachable("Unrecognized SMULL.");
- case MVT::v4i32:
- SMULLOpc = AArch64::UMULLv4i16_indexed;
- break;
- case MVT::v2i64:
- SMULLOpc = AArch64::UMULLv2i32_indexed;
- break;
- }
- } else
- llvm_unreachable("Unrecognized intrinsic.");
-
- ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
- return true;
-}
-
/// Instructions that accept extend modifiers like UXTW expect the register
/// being extended to be a GPR32, but the incoming DAG might be acting on a
/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
@@ -943,10 +846,7 @@ static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
return N;
SDLoc dl(N);
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
- MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
- dl, MVT::i32, N, SubReg);
- return SDValue(Node, 0);
+ return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
}
// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
@@ -1219,12 +1119,10 @@ bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
SDLoc dl(N);
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
SDValue ImpDef = SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
- MachineSDNode *Node = CurDAG->getMachineNode(
- TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
- return SDValue(Node, 0);
+ return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
+ N);
}
/// Check if the given SHL node (\p N), can be used to form an
@@ -1454,6 +1352,18 @@ SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
return createTuple(Regs, RegClassIDs, SubRegs);
}
+SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
+ assert(Regs.size() == 2 || Regs.size() == 4);
+
+ // The createTuple interface requires 3 RegClassIDs for each possible
+ // tuple type even though we only have them for ZPR2 and ZPR4.
+ static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
+ AArch64::ZPR4Mul4RegClassID};
+ static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
+ AArch64::zsub2, AArch64::zsub3};
+ return createTuple(Regs, RegClassIDs, SubRegs);
+}
+
SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
const unsigned RegClassIDs[],
const unsigned SubRegs[]) {
@@ -1692,6 +1602,9 @@ AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
enum class SelectTypeKind {
Int1 = 0,
+ Int = 1,
+ FP = 2,
+ AnyType = 3,
};
/// This function selects an opcode from a list of opcodes, which is
@@ -1705,10 +1618,21 @@ static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
EVT EltVT = VT.getVectorElementType();
switch (Kind) {
+ case SelectTypeKind::AnyType:
+ break;
+ case SelectTypeKind::Int:
+ if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
+ EltVT != MVT::i64)
+ return 0;
+ break;
case SelectTypeKind::Int1:
if (EltVT != MVT::i1)
return 0;
break;
+ case SelectTypeKind::FP:
+ if (EltVT != MVT::f16 && EltVT != MVT::f32 && EltVT != MVT::f64)
+ return 0;
+ break;
}
unsigned Offset;
@@ -1732,6 +1656,28 @@ static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
}
+// This function is almost identical to SelectWhilePair, but has an
+// extra check on the range of the immediate operand.
+// TODO: Merge these two functions together at some point?
+void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
+ // Immediate can be either 0 or 1.
+ if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+ if (Imm->getZExtValue() > 1)
+ return;
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
+ SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
+ SDValue SuperReg = SDValue(WhilePair, 0);
+
+ for (unsigned I = 0; I < 2; ++I)
+ ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
+ AArch64::psub0 + I, DL, VT, SuperReg));
+
+ CurDAG->RemoveDeadNode(N);
+}
+
void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
@@ -1761,7 +1707,45 @@ void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
AArch64::zsub0 + i, DL, VT, SuperReg));
CurDAG->RemoveDeadNode(N);
- return;
+}
+
+void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
+ unsigned NumVecs,
+ bool IsZmMulti,
+ unsigned Opcode,
+ bool HasPred) {
+ assert(Opcode != 0 && "Unexpected opcode");
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ unsigned FirstVecIdx = HasPred ? 2 : 1;
+
+ auto GetMultiVecOperand = [=](unsigned StartIdx) {
+ SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx,
+ N->op_begin() + StartIdx + NumVecs);
+ return createZMulTuple(Regs);
+ };
+
+ SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
+
+ SDValue Zm;
+ if (IsZmMulti)
+ Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
+ else
+ Zm = N->getOperand(NumVecs + FirstVecIdx);
+
+ SDNode *Intrinsic;
+ if (HasPred)
+ Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
+ N->getOperand(1), Zdn, Zm);
+ else
+ Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
+ SDValue SuperReg = SDValue(Intrinsic, 0);
+ for (unsigned i = 0; i < NumVecs; ++i)
+ ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
+ AArch64::zsub0 + i, DL, VT, SuperReg));
+
+ CurDAG->RemoveDeadNode(N);
}
void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
@@ -1797,6 +1781,161 @@ void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
CurDAG->RemoveDeadNode(N);
}
+void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
+ unsigned NumVecs,
+ unsigned Scale,
+ unsigned Opc_ri,
+ unsigned Opc_rr) {
+ assert(Scale < 4 && "Invalid scaling value.");
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue Chain = N->getOperand(0);
+
+ SDValue PNg = N->getOperand(2);
+ SDValue Base = N->getOperand(3);
+ SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
+ unsigned Opc;
+ std::tie(Opc, Base, Offset) =
+ findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
+
+ SDValue Ops[] = {PNg, // Predicate-as-counter
+ Base, // Memory operand
+ Offset, Chain};
+
+ const EVT ResTys[] = {MVT::Untyped, MVT::Other};
+
+ SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
+ SDValue SuperReg = SDValue(Load, 0);
+ for (unsigned i = 0; i < NumVecs; ++i)
+ ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
+ AArch64::zsub0 + i, DL, VT, SuperReg));
+
+ // Copy chain
+ unsigned ChainIdx = NumVecs;
+ ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
+ CurDAG->RemoveDeadNode(N);
+}
+
+void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
+ unsigned Opcode) {
+ if (N->getValueType(0) != MVT::nxv4f32)
+ return;
+ SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
+}
+
+void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
+ unsigned Op) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
+ SDValue Zd = createZMulTuple(Regs);
+ SDValue Zn = N->getOperand(1 + NumVecs);
+ SDValue Zm = N->getOperand(2 + NumVecs);
+
+ SDValue Ops[] = {Zd, Zn, Zm};
+
+ SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
+ SDValue SuperReg = SDValue(Intrinsic, 0);
+ for (unsigned i = 0; i < NumVecs; ++i)
+ ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
+ AArch64::zsub0 + i, DL, VT, SuperReg));
+
+ CurDAG->RemoveDeadNode(N);
+}
+
+bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
+ switch (BaseReg) {
+ default:
+ return false;
+ case AArch64::ZA:
+ case AArch64::ZAB0:
+ if (TileNum == 0)
+ break;
+ return false;
+ case AArch64::ZAH0:
+ if (TileNum <= 1)
+ break;
+ return false;
+ case AArch64::ZAS0:
+ if (TileNum <= 3)
+ break;
+ return false;
+ case AArch64::ZAD0:
+ if (TileNum <= 7)
+ break;
+ return false;
+ }
+
+ BaseReg += TileNum;
+ return true;
+}
+
+template <unsigned MaxIdx, unsigned Scale>
+void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
+ unsigned BaseReg, unsigned Op) {
+ unsigned TileNum = 0;
+ if (BaseReg != AArch64::ZA)
+ TileNum = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+
+ if (!SelectSMETile(BaseReg, TileNum))
+ return;
+
+ SDValue SliceBase, Base, Offset;
+ if (BaseReg == AArch64::ZA)
+ SliceBase = N->getOperand(2);
+ else
+ SliceBase = N->getOperand(3);
+
+ if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
+ return;
+
+ SDLoc DL(N);
+ SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
+ SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
+ SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
+
+ EVT VT = N->getValueType(0);
+ for (unsigned I = 0; I < NumVecs; ++I)
+ ReplaceUses(SDValue(N, I),
+ CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
+ SDValue(Mov, 0)));
+ // Copy chain
+ unsigned ChainIdx = NumVecs;
+ ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
+ CurDAG->RemoveDeadNode(N);
+}
+
+void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
+ unsigned NumOutVecs,
+ bool IsTupleInput,
+ unsigned Opc) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ unsigned NumInVecs = N->getNumOperands() - 1;
+
+ SmallVector<SDValue, 6> Ops;
+ if (IsTupleInput) {
+ assert((NumInVecs == 2 || NumInVecs == 4) &&
+ "Don't know how to handle multi-register input!");
+ SmallVector<SDValue, 4> Regs(N->op_begin() + 1,
+ N->op_begin() + 1 + NumInVecs);
+ Ops.push_back(createZMulTuple(Regs));
+ } else {
+ // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
+ for (unsigned I = 0; I < NumInVecs; I++)
+ Ops.push_back(N->getOperand(1 + I));
+ }
+
+ SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
+ SDValue SuperReg = SDValue(Res, 0);
+
+ for (unsigned I = 0; I < NumOutVecs; I++)
+ ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
+ AArch64::zsub0 + I, DL, VT, SuperReg));
+ CurDAG->RemoveDeadNode(N);
+}
+
void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
unsigned Opc) {
SDLoc dl(N);
@@ -2151,8 +2290,9 @@ static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
}
LSB = SrlImm;
- MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
- : countTrailingOnes<uint64_t>(AndImm)) -
+ MSB = SrlImm +
+ (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
+ : llvm::countr_one<uint64_t>(AndImm)) -
1;
if (ClampMSB)
// Since we're moving the extend before the right shift operation, we need
@@ -2212,7 +2352,7 @@ static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
//
// This gets selected into a single UBFM:
//
- // UBFM Value, ShiftImm, findLastSet(MaskImm)
+ // UBFM Value, ShiftImm, Log2_64(MaskImm)
//
if (N->getOpcode() != ISD::SRL)
@@ -2234,7 +2374,7 @@ static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
LSB = SrlImm;
- MSB = findLastSet(AndMask, ZB_Undefined);
+ MSB = llvm::Log2_64(AndMask);
return true;
}
@@ -2329,35 +2469,6 @@ bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
return true;
}
-/// Try to form fcvtl2 instructions from a floating-point extend of a high-half
-/// extract of a subvector.
-bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) {
- assert(N->getOpcode() == ISD::FP_EXTEND);
-
- // There are 2 forms of fcvtl2 - extend to double or extend to float.
- SDValue Extract = N->getOperand(0);
- EVT VT = N->getValueType(0);
- EVT NarrowVT = Extract.getValueType();
- if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) &&
- (VT != MVT::v4f32 || NarrowVT != MVT::v4f16))
- return false;
-
- // Optionally look past a bitcast.
- Extract = peekThroughBitcasts(Extract);
- if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
- return false;
-
- // Match extract from start of high half index.
- // Example: v8i16 -> v4i16 means the extract must begin at index 4.
- unsigned ExtractIndex = Extract.getConstantOperandVal(1);
- if (ExtractIndex != Extract.getValueType().getVectorNumElements())
- return false;
-
- auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
- CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0));
- return true;
-}
-
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
SDValue &Opd0, unsigned &Immr, unsigned &Imms,
unsigned NumberOfIgnoredLowBits = 0,
@@ -2415,9 +2526,9 @@ bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
- SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
- ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
- MVT::i32, SDValue(BFM, 0), SubReg));
+ SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
+ MVT::i32, SDValue(BFM, 0));
+ ReplaceNode(N, Inner.getNode());
return true;
}
@@ -2794,8 +2905,8 @@ static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
if (!BiggerPattern && !AndOp0.hasOneUse())
return false;
- DstLSB = countTrailingZeros(NonZeroBits);
- Width = countTrailingOnes(NonZeroBits >> DstLSB);
+ DstLSB = llvm::countr_zero(NonZeroBits);
+ Width = llvm::countr_one(NonZeroBits >> DstLSB);
// Bail out on large Width. This happens when no proper combining / constant
// folding was performed.
@@ -2853,7 +2964,7 @@ static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
// For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
// the AND result corresponding to those bits are shifted out, so it's fine
// to not extract them.
- Width = countTrailingOnes(ShiftedAndImm);
+ Width = llvm::countr_one(ShiftedAndImm);
DstLSB = ShlImm;
Src = Op0.getOperand(0);
return true;
@@ -2883,8 +2994,8 @@ static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
return true;
- DstLSB = countTrailingZeros(NonZeroBits);
- Width = countTrailingOnes(NonZeroBits >> DstLSB);
+ DstLSB = llvm::countr_zero(NonZeroBits);
+ Width = llvm::countr_one(NonZeroBits >> DstLSB);
if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
return false;
@@ -2948,8 +3059,8 @@ static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
}
// BFI/BFXIL dst, src, #lsb, #width.
- int LSB = countTrailingOnes(NotKnownZero);
- int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
+ int LSB = llvm::countr_one(NotKnownZero);
+ int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
// BFI/BFXIL is an alias of BFM, so translate to BFM operands.
unsigned ImmR = (BitWidth - LSB) % BitWidth;
@@ -3026,10 +3137,10 @@ static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
// the dependency chain is improved after the transformation.
uint64_t SrlImm;
if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
- uint64_t NumTrailingZeroInShiftedMask = countTrailingZeros(AndImm);
+ uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
unsigned MaskWidth =
- countTrailingOnes(AndImm >> NumTrailingZeroInShiftedMask);
+ llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
unsigned UBFMOpc =
(VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
SDNode *UBFMNode = CurDAG->getMachineNode(
@@ -3178,8 +3289,8 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
// Because of simplify-demanded-bits in DAGCombine, involved masks may not
// have the expected shape. Try to undo that.
- unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
- unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
+ unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
+ unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
// Given a OR operation, check if we have the following pattern
// ubfm c, b, imm, imm2 (or something that does the same jobs, see
@@ -3304,8 +3415,8 @@ static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
SDValue Src = And1->getOperand(0);
SDValue Dst = And0->getOperand(0);
- unsigned LSB = countTrailingZeros(Mask1Imm);
- int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
+ unsigned LSB = llvm::countr_zero(Mask1Imm);
+ int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
// The BFXIL inserts the low-order bits from a source register, so right
// shift the needed bits into place.
@@ -3492,7 +3603,7 @@ bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
!isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
return false;
- if (countTrailingOnes(MaskImm) < Bits)
+ if ((unsigned)llvm::countr_one(MaskImm) < Bits)
return false;
NewShiftAmt = ShiftAmt->getOperand(0);
@@ -4011,61 +4122,64 @@ void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
ReplaceNode(N, N3);
}
-// NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
-// vector types larger than NEON don't have a matching SubRegIndex.
-static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
- assert(V.getValueType().isScalableVector() &&
- V.getValueType().getSizeInBits().getKnownMinValue() ==
- AArch64::SVEBitsPerBlock &&
- "Expected to extract from a packed scalable vector!");
- assert(VT.isFixedLengthVector() &&
- "Expected to extract a fixed length vector!");
+bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
+ assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
- SDLoc DL(V);
- switch (VT.getSizeInBits()) {
- case 64: {
- auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
- return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
- }
- case 128: {
- auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
- return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
- }
- default: {
- auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
- return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
- }
- }
-}
+ // Bail when not a "cast" like insert_subvector.
+ if (cast<ConstantSDNode>(N->getOperand(2))->getZExtValue() != 0)
+ return false;
+ if (!N->getOperand(0).isUndef())
+ return false;
+
+ // Bail when normal isel should do the job.
+ EVT VT = N->getValueType(0);
+ EVT InVT = N->getOperand(1).getValueType();
+ if (VT.isFixedLengthVector() || InVT.isScalableVector())
+ return false;
+ if (InVT.getSizeInBits() <= 128)
+ return false;
-// NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
-// vector types larger than NEON don't have a matching SubRegIndex.
-static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
- assert(VT.isScalableVector() &&
- VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
+ // NOTE: We can only get here when doing fixed length SVE code generation.
+ // We do manual selection because the types involved are not linked to real
+ // registers (despite being legal) and must be coerced into SVE registers.
+
+ assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
"Expected to insert into a packed scalable vector!");
- assert(V.getValueType().isFixedLengthVector() &&
- "Expected to insert a fixed length vector!");
- SDLoc DL(V);
- switch (V.getValueType().getSizeInBits()) {
- case 64: {
- auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
- auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
- return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
- SDValue(Container, 0), V, SubReg);
- }
- case 128: {
- auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
- auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
- return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
- SDValue(Container, 0), V, SubReg);
- }
- default: {
- auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
- return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
- }
- }
+ SDLoc DL(N);
+ auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
+ ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
+ N->getOperand(1), RC));
+ return true;
+}
+
+bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
+ assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
+
+ // Bail when not a "cast" like extract_subvector.
+ if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 0)
+ return false;
+
+ // Bail when normal isel can do the job.
+ EVT VT = N->getValueType(0);
+ EVT InVT = N->getOperand(0).getValueType();
+ if (VT.isScalableVector() || InVT.isFixedLengthVector())
+ return false;
+ if (VT.getSizeInBits() <= 128)
+ return false;
+
+ // NOTE: We can only get here when doing fixed length SVE code generation.
+ // We do manual selection because the types involved are not linked to real
+ // registers (despite being legal) and must be coerced into SVE registers.
+
+ assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
+ "Expected to extract from a packed scalable vector!");
+
+ SDLoc DL(N);
+ auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
+ ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
+ N->getOperand(0), RC));
+ return true;
}
void AArch64DAGToDAGISel::Select(SDNode *Node) {
@@ -4100,11 +4214,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
break;
- case ISD::ADD:
- if (tryMLAV64LaneV128(Node))
- return;
- break;
-
case ISD::LOAD: {
// Try to select as an indexed load. Fall through to normal processing
// if we can't.
@@ -4133,60 +4242,21 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
break;
- case ISD::FP_EXTEND:
- if (tryHighFPExt(Node))
- return;
- break;
-
case ISD::OR:
if (tryBitfieldInsertOp(Node))
return;
break;
case ISD::EXTRACT_SUBVECTOR: {
- // Bail when not a "cast" like extract_subvector.
- if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
- break;
-
- // Bail when normal isel can do the job.
- EVT InVT = Node->getOperand(0).getValueType();
- if (VT.isScalableVector() || InVT.isFixedLengthVector())
- break;
-
- // NOTE: We can only get here when doing fixed length SVE code generation.
- // We do manual selection because the types involved are not linked to real
- // registers (despite being legal) and must be coerced into SVE registers.
- //
- // NOTE: If the above changes, be aware that selection will still not work
- // because the td definition of extract_vector does not support extracting
- // a fixed length vector from a scalable vector.
-
- ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
- return;
+ if (trySelectCastScalableToFixedLengthVector(Node))
+ return;
+ break;
}
case ISD::INSERT_SUBVECTOR: {
- // Bail when not a "cast" like insert_subvector.
- if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
- break;
- if (!Node->getOperand(0).isUndef())
- break;
-
- // Bail when normal isel should do the job.
- EVT InVT = Node->getOperand(1).getValueType();
- if (VT.isFixedLengthVector() || InVT.isScalableVector())
- break;
-
- // NOTE: We can only get here when doing fixed length SVE code generation.
- // We do manual selection because the types involved are not linked to real
- // registers (despite being legal) and must be coerced into SVE registers.
- //
- // NOTE: If the above changes, be aware that selection will still not work
- // because the td definition of insert_vector does not support inserting a
- // fixed length vector into a scalable vector.
-
- ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
- return;
+ if (trySelectCastFixedLengthToScalableVector(Node))
+ return;
+ break;
}
case ISD::Constant: {
@@ -4588,6 +4658,74 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+ case Intrinsic::aarch64_sve_ld1_pn_x2: {
+ if (VT == MVT::nxv16i8) {
+ SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ VT == MVT::nxv8bf16) {
+ SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sve_ld1_pn_x4: {
+ if (VT == MVT::nxv16i8) {
+ SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ VT == MVT::nxv8bf16) {
+ SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
+ if (VT == MVT::nxv16i8) {
+ SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ VT == MVT::nxv8bf16) {
+ SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
+ if (VT == MVT::nxv16i8) {
+ SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ VT == MVT::nxv8bf16) {
+ SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z);
+ return;
+ }
+ break;
+ }
case Intrinsic::aarch64_sve_ld3_sret: {
if (VT == MVT::nxv16i8) {
SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
@@ -4630,6 +4768,100 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
}
break;
}
+ case Intrinsic::aarch64_sme_read_hor_vg2: {
+ if (VT == MVT::nxv16i8) {
+ SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
+ AArch64::MOVA_2ZMXI_H_B);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ VT == MVT::nxv8bf16) {
+ SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
+ AArch64::MOVA_2ZMXI_H_H);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
+ AArch64::MOVA_2ZMXI_H_S);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
+ AArch64::MOVA_2ZMXI_H_D);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sme_read_ver_vg2: {
+ if (VT == MVT::nxv16i8) {
+ SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
+ AArch64::MOVA_2ZMXI_V_B);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ VT == MVT::nxv8bf16) {
+ SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
+ AArch64::MOVA_2ZMXI_V_H);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
+ AArch64::MOVA_2ZMXI_V_S);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
+ AArch64::MOVA_2ZMXI_V_D);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sme_read_hor_vg4: {
+ if (VT == MVT::nxv16i8) {
+ SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
+ AArch64::MOVA_4ZMXI_H_B);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ VT == MVT::nxv8bf16) {
+ SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
+ AArch64::MOVA_4ZMXI_H_H);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
+ AArch64::MOVA_4ZMXI_H_S);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
+ AArch64::MOVA_4ZMXI_H_D);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sme_read_ver_vg4: {
+ if (VT == MVT::nxv16i8) {
+ SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
+ AArch64::MOVA_4ZMXI_V_B);
+ return;
+ } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
+ VT == MVT::nxv8bf16) {
+ SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
+ AArch64::MOVA_4ZMXI_V_H);
+ return;
+ } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
+ SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
+ AArch64::MOVA_4ZMXI_V_S);
+ return;
+ } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
+ SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
+ AArch64::MOVA_4ZMXI_V_D);
+ return;
+ }
+ break;
+ }
+ case Intrinsic::aarch64_sme_read_vg1x2: {
+ SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
+ AArch64::MOVA_VG2_2ZMXI);
+ return;
+ }
+ case Intrinsic::aarch64_sme_read_vg1x4: {
+ SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
+ AArch64::MOVA_VG4_4ZMXI);
+ return;
+ }
case Intrinsic::swift_async_context_addr: {
SDLoc DL(Node);
SDValue Chain = Node->getOperand(0);
@@ -4688,11 +4920,90 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
: AArch64::TBXv16i8Four,
true);
return;
- case Intrinsic::aarch64_neon_smull:
- case Intrinsic::aarch64_neon_umull:
- if (tryMULLV64LaneV128(IntNo, Node))
- return;
- break;
+ case Intrinsic::aarch64_sve_srshl_single_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
+ AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_srshl_single_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
+ AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_urshl_single_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
+ AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_urshl_single_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
+ AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_srshl_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
+ AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_srshl_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
+ AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_urshl_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
+ AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_urshl_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
+ AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
+ AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
+ AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_sqdmulh_vgx2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
+ AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_sqdmulh_vgx4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
+ AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
+ return;
case Intrinsic::aarch64_sve_whilege_x2:
if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
Node->getValueType(0),
@@ -4749,6 +5060,230 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
SelectWhilePair(Node, Op);
return;
+ case Intrinsic::aarch64_sve_smax_single_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
+ AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_umax_single_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
+ AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_fmax_single_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_S,
+ AArch64::FMAX_VG2_2ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_smax_single_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
+ AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_umax_single_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
+ AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_fmax_single_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_S,
+ AArch64::FMAX_VG4_4ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_smin_single_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
+ AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_umin_single_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
+ AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_fmin_single_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_S,
+ AArch64::FMIN_VG2_2ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_smin_single_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
+ AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_umin_single_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
+ AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_fmin_single_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_S,
+ AArch64::FMIN_VG4_4ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_smax_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
+ AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_umax_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
+ AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_fmax_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_S,
+ AArch64::FMAX_VG2_2Z2Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_smax_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
+ AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_umax_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
+ AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_fmax_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMAX_VG4_4Z4Z_H, AArch64::FMAX_VG4_4Z4Z_S,
+ AArch64::FMAX_VG4_4Z4Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_smin_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
+ AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_umin_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
+ AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_fmin_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_S,
+ AArch64::FMIN_VG2_2Z2Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_smin_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
+ AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_umin_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
+ AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_fmin_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMIN_VG4_4Z4Z_H, AArch64::FMIN_VG4_4Z4Z_S,
+ AArch64::FMIN_VG4_4Z4Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_S,
+ AArch64::FMAXNM_VG2_2ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_S,
+ AArch64::FMAXNM_VG4_4ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_fminnm_single_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_S,
+ AArch64::FMINNM_VG2_2ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_fminnm_single_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_S,
+ AArch64::FMINNM_VG4_4ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_fmaxnm_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_S,
+ AArch64::FMAXNM_VG2_2Z2Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_fmaxnm_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMAXNM_VG4_4Z4Z_H, AArch64::FMAXNM_VG4_4Z4Z_S,
+ AArch64::FMAXNM_VG4_4Z4Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_fminnm_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_S,
+ AArch64::FMINNM_VG2_2Z2Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
+ return;
+ case Intrinsic::aarch64_sve_fminnm_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FMINNM_VG4_4Z4Z_H, AArch64::FMINNM_VG4_4Z4Z_S,
+ AArch64::FMINNM_VG4_4Z4Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
+ return;
case Intrinsic::aarch64_sve_fcvts_x2:
SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
return;
@@ -4773,6 +5308,180 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
case Intrinsic::aarch64_sve_ucvtf_x4:
SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
return;
+ case Intrinsic::aarch64_sve_sclamp_single_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
+ AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
+ SelectClamp(Node, 2, Op);
+ return;
+ case Intrinsic::aarch64_sve_uclamp_single_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
+ AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
+ SelectClamp(Node, 2, Op);
+ return;
+ case Intrinsic::aarch64_sve_fclamp_single_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
+ AArch64::FCLAMP_VG2_2Z2Z_D}))
+ SelectClamp(Node, 2, Op);
+ return;
+ case Intrinsic::aarch64_sve_sclamp_single_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
+ AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
+ SelectClamp(Node, 4, Op);
+ return;
+ case Intrinsic::aarch64_sve_uclamp_single_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
+ AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
+ SelectClamp(Node, 4, Op);
+ return;
+ case Intrinsic::aarch64_sve_fclamp_single_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
+ Node->getValueType(0),
+ {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
+ AArch64::FCLAMP_VG4_4Z4Z_D}))
+ SelectClamp(Node, 4, Op);
+ return;
+ case Intrinsic::aarch64_sve_add_single_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
+ AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_add_single_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
+ AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
+ return;
+ case Intrinsic::aarch64_sve_zip_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
+ Node->getValueType(0),
+ {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
+ AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
+ SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
+ return;
+ case Intrinsic::aarch64_sve_zipq_x2:
+ SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
+ AArch64::ZIP_VG2_2ZZZ_Q);
+ return;
+ case Intrinsic::aarch64_sve_zip_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
+ Node->getValueType(0),
+ {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
+ AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
+ SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
+ return;
+ case Intrinsic::aarch64_sve_zipq_x4:
+ SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
+ AArch64::ZIP_VG4_4Z4Z_Q);
+ return;
+ case Intrinsic::aarch64_sve_uzp_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
+ Node->getValueType(0),
+ {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
+ AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
+ SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
+ return;
+ case Intrinsic::aarch64_sve_uzpq_x2:
+ SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
+ AArch64::UZP_VG2_2ZZZ_Q);
+ return;
+ case Intrinsic::aarch64_sve_uzp_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
+ Node->getValueType(0),
+ {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
+ AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
+ SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
+ return;
+ case Intrinsic::aarch64_sve_uzpq_x4:
+ SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
+ AArch64::UZP_VG4_4Z4Z_Q);
+ return;
+ case Intrinsic::aarch64_sve_sel_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
+ Node->getValueType(0),
+ {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
+ AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
+ return;
+ case Intrinsic::aarch64_sve_sel_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
+ Node->getValueType(0),
+ {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
+ AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
+ SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
+ return;
+ case Intrinsic::aarch64_sve_frinta_x2:
+ SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frinta_x4:
+ SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintm_x2:
+ SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintm_x4:
+ SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintn_x2:
+ SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintn_x4:
+ SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintp_x2:
+ SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
+ return;
+ case Intrinsic::aarch64_sve_frintp_x4:
+ SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
+ return;
+ case Intrinsic::aarch64_sve_sunpk_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
+ AArch64::SUNPK_VG2_2ZZ_D}))
+ SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
+ return;
+ case Intrinsic::aarch64_sve_uunpk_x2:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
+ AArch64::UUNPK_VG2_2ZZ_D}))
+ SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
+ return;
+ case Intrinsic::aarch64_sve_sunpk_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
+ AArch64::SUNPK_VG4_4Z2Z_D}))
+ SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
+ return;
+ case Intrinsic::aarch64_sve_uunpk_x4:
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
+ Node->getValueType(0),
+ {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
+ AArch64::UUNPK_VG4_4Z2Z_D}))
+ SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
+ return;
+ case Intrinsic::aarch64_sve_pext_x2: {
+ if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
+ Node->getValueType(0),
+ {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
+ AArch64::PEXT_2PCI_D}))
+ SelectPExtPair(Node, Op);
+ return;
+ }
}
break;
}
@@ -5827,7 +6536,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
int FI = cast<FrameIndexSDNode>(N)->getIndex();
// We can only encode VL scaled offsets, so only fold in frame indexes
// referencing SVE objects.
- if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+ if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
return true;
@@ -5862,7 +6571,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
// We can only encode VL scaled offsets, so only fold in frame indexes
// referencing SVE objects.
- if (FI == 0 || MFI.getStackID(FI) == TargetStackID::ScalableVector)
+ if (MFI.getStackID(FI) == TargetStackID::ScalableVector)
Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
}
@@ -5930,29 +6639,27 @@ bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
return TLI->isAllActivePredicate(*CurDAG, N);
}
+bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
+ EVT VT = N.getValueType();
+ return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
+}
+
bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
SDValue &Base, SDValue &Offset,
unsigned Scale) {
- if (N.getOpcode() != ISD::ADD) {
- Base = N;
- Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
- return true;
- }
-
- // Process an ADD node.
- const SDValue LHS = N.getOperand(0);
- const SDValue RHS = N.getOperand(1);
-
- if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
- int64_t ImmOff = C->getSExtValue();
-
- if ((ImmOff < 0 || ImmOff > MaxSize) || (ImmOff % Scale != 0))
- return false;
-
- Base = LHS;
- Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
- return true;
- }
+ // Try to untangle an ADD node into a 'reg + offset'
+ if (N.getOpcode() == ISD::ADD)
+ if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ int64_t ImmOff = C->getSExtValue();
+ if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
+ Base = N.getOperand(0);
+ Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
+ return true;
+ }
+ }
- return false;
+ // By default, just match reg + 0.
+ Base = N;
+ Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
+ return true;
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6f2058c72157..13df87af6c7b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -27,7 +27,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -45,6 +44,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -72,6 +72,7 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
@@ -80,11 +81,11 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <bitset>
#include <cassert>
@@ -140,6 +141,17 @@ static cl::opt<unsigned> MaxXors("aarch64-max-xors", cl::init(16), cl::Hidden,
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
+static const MCPhysReg GPRArgRegs[] = {AArch64::X0, AArch64::X1, AArch64::X2,
+ AArch64::X3, AArch64::X4, AArch64::X5,
+ AArch64::X6, AArch64::X7};
+static const MCPhysReg FPRArgRegs[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2,
+ AArch64::Q3, AArch64::Q4, AArch64::Q5,
+ AArch64::Q6, AArch64::Q7};
+
+const ArrayRef<MCPhysReg> llvm::AArch64::getGPRArgRegs() { return GPRArgRegs; }
+
+const ArrayRef<MCPhysReg> llvm::AArch64::getFPRArgRegs() { return FPRArgRegs; }
+
static inline EVT getPackedSVEVectorVT(EVT VT) {
switch (VT.getSimpleVT().SimpleTy) {
default:
@@ -403,6 +415,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
}
}
+ if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
+ addRegisterClass(MVT::aarch64svcount, &AArch64::PPRRegClass);
+ setOperationPromotedToType(ISD::LOAD, MVT::aarch64svcount, MVT::nxv16i1);
+ setOperationPromotedToType(ISD::STORE, MVT::aarch64svcount, MVT::nxv16i1);
+
+ setOperationAction(ISD::SELECT, MVT::aarch64svcount, Custom);
+ setOperationAction(ISD::SELECT_CC, MVT::aarch64svcount, Expand);
+ }
+
// Compute derived properties from the register classes
computeRegisterProperties(Subtarget->getRegisterInfo());
@@ -559,6 +580,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MULHS, MVT::i32, Expand);
// AArch64 doesn't have {U|S}MUL_LOHI.
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
@@ -624,10 +647,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMULO, MVT::i32, Custom);
setOperationAction(ISD::UMULO, MVT::i64, Custom);
- setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
- setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
- setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
- setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);
+ setOperationAction(ISD::UADDO_CARRY, MVT::i32, Custom);
+ setOperationAction(ISD::UADDO_CARRY, MVT::i64, Custom);
+ setOperationAction(ISD::USUBO_CARRY, MVT::i32, Custom);
+ setOperationAction(ISD::USUBO_CARRY, MVT::i64, Custom);
setOperationAction(ISD::SADDO_CARRY, MVT::i32, Custom);
setOperationAction(ISD::SADDO_CARRY, MVT::i64, Custom);
setOperationAction(ISD::SSUBO_CARRY, MVT::i32, Custom);
@@ -693,7 +716,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
setOperationAction(ISD::FMA, MVT::v4f16, Expand);
- setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
+ setOperationAction(ISD::SETCC, MVT::v4f16, Custom);
setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
@@ -821,12 +844,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
#undef LCALLNAME5
}
+ if (Subtarget->hasLSE128()) {
+ // Custom lowering because i128 is not legal. Must be replaced by 2x64
+ // values. ATOMIC_LOAD_AND also needs op legalisation to emit LDCLRP.
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i128, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i128, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i128, Custom);
+ }
+
// 128-bit loads and stores can be done without expanding
setOperationAction(ISD::LOAD, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::i128, Custom);
// Aligned 128-bit loads and stores are single-copy atomic according to the
- // v8.4a spec.
+ // v8.4a spec. LRCPC3 introduces 128-bit STILP/LDIAPP but still requires LSE2.
if (Subtarget->hasLSE2()) {
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
@@ -940,7 +971,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
ISD::UINT_TO_FP});
setTargetDAGCombine({ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_TO_SINT_SAT,
- ISD::FP_TO_UINT_SAT, ISD::FDIV});
+ ISD::FP_TO_UINT_SAT, ISD::FADD, ISD::FDIV});
// Try and combine setcc with csel
setTargetDAGCombine(ISD::SETCC);
@@ -972,6 +1003,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::CTLZ);
+ setTargetDAGCombine(ISD::VECREDUCE_AND);
+ setTargetDAGCombine(ISD::VECREDUCE_OR);
+ setTargetDAGCombine(ISD::VECREDUCE_XOR);
+
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
MaxStoresPerMemset =
@@ -998,9 +1033,9 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// Set required alignment.
setMinFunctionAlignment(Align(4));
// Set preferred alignments.
- setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
+ setPrefLoopAlignment(STI.getPrefLoopAlignment());
setMaxBytesForAlignment(STI.getMaxBytesForLoopAlignment());
- setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
+ setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
// Only change the limit for entries in a jump table if specified by
// the sub target, but not at the command line.
@@ -1018,7 +1053,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
// silliness like this:
for (auto Op :
- {ISD::SELECT, ISD::SELECT_CC, ISD::SETCC,
+ {ISD::SELECT, ISD::SELECT_CC,
ISD::BR_CC, ISD::FADD, ISD::FSUB,
ISD::FMUL, ISD::FDIV, ISD::FMA,
ISD::FNEG, ISD::FABS, ISD::FCEIL,
@@ -1093,12 +1128,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SMIN, VT, Custom);
}
- // AArch64 doesn't have MUL.2d:
- setOperationAction(ISD::MUL, MVT::v2i64, Expand);
// Custom handling for some quad-vector types to detect MULL.
setOperationAction(ISD::MUL, MVT::v8i16, Custom);
setOperationAction(ISD::MUL, MVT::v4i32, Custom);
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
+ setOperationAction(ISD::MUL, MVT::v4i16, Custom);
+ setOperationAction(ISD::MUL, MVT::v2i32, Custom);
+ setOperationAction(ISD::MUL, MVT::v1i64, Custom);
// Saturates
for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
@@ -1123,8 +1159,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
for (MVT VT : { MVT::v4f16, MVT::v2f32,
MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
- setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMAX, VT, Legal);
+ setOperationAction(ISD::VECREDUCE_FMIN, VT, Legal);
+ setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Legal);
+ setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Legal);
setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
}
@@ -1136,8 +1174,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
}
setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_AND, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_OR, MVT::v2i64, Custom);
+ setOperationAction(ISD::VECREDUCE_XOR, MVT::v2i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
@@ -1182,6 +1226,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i2, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i4, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i8, Custom);
+ setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+
+ setOperationAction(ISD::BITCAST, MVT::v2i8, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v2i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::v4i8, Custom);
+
setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
@@ -1208,6 +1261,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
{MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1, MVT::nxv1i1}) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
+ setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
}
}
@@ -1253,6 +1308,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+ setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
+ setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
setOperationAction(ISD::UMUL_LOHI, VT, Expand);
setOperationAction(ISD::SMUL_LOHI, VT, Expand);
@@ -1269,12 +1326,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);
- if (Subtarget->hasSVE2()) {
- setOperationAction(ISD::AVGFLOORS, VT, Custom);
- setOperationAction(ISD::AVGFLOORU, VT, Custom);
- setOperationAction(ISD::AVGCEILS, VT, Custom);
- setOperationAction(ISD::AVGCEILU, VT, Custom);
- }
+ setOperationAction(ISD::AVGFLOORS, VT, Custom);
+ setOperationAction(ISD::AVGFLOORU, VT, Custom);
+ setOperationAction(ISD::AVGCEILS, VT, Custom);
+ setOperationAction(ISD::AVGCEILU, VT, Custom);
}
// Illegal unpacked integer vector types.
@@ -1367,6 +1422,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::MLOAD, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::FADD, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::FDIV, VT, Custom);
@@ -1392,8 +1448,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
+ setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
+ setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
@@ -1448,7 +1508,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
- if (Subtarget->forceStreamingCompatibleSVE()) {
+ if (!Subtarget->isNeonAvailable()) {
setTruncStoreAction(MVT::v2f32, MVT::v2f16, Custom);
setTruncStoreAction(MVT::v4f32, MVT::v4f16, Custom);
setTruncStoreAction(MVT::v8f32, MVT::v8f16, Custom);
@@ -1460,11 +1520,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
for (MVT VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
MVT::v4i32, MVT::v1i64, MVT::v2i64})
- addTypeForStreamingSVE(VT);
+ addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/ true);
for (MVT VT :
{MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
- addTypeForStreamingSVE(VT);
+ addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/ true);
}
// NOTE: Currently this has to happen after computeRegisterProperties rather
@@ -1472,10 +1532,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
- addTypeForFixedLengthSVE(VT);
+ addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/ false);
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
- addTypeForFixedLengthSVE(VT);
+ addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/ false);
// 64bit results can mean a bigger than NEON input.
for (auto VT : {MVT::v8i8, MVT::v4i16})
@@ -1518,6 +1578,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(ISD::MULHS, VT, Custom);
+ setOperationAction(ISD::MULHU, VT, Custom);
}
@@ -1681,128 +1743,8 @@ bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
return false;
}
-void AArch64TargetLowering::addTypeForStreamingSVE(MVT VT) {
- // By default set all operations to Expand,
- // then change to Legal/Custom if needed.
- for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
- setOperationAction(Op, VT, Expand);
-
- assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
-
- if (VT.isFloatingPoint()) {
- setCondCodeAction(ISD::SETO, VT, Expand);
- setCondCodeAction(ISD::SETOLT, VT, Expand);
- setCondCodeAction(ISD::SETOLE, VT, Expand);
- setCondCodeAction(ISD::SETULT, VT, Expand);
- setCondCodeAction(ISD::SETULE, VT, Expand);
- setCondCodeAction(ISD::SETUGE, VT, Expand);
- setCondCodeAction(ISD::SETUGT, VT, Expand);
- setCondCodeAction(ISD::SETUEQ, VT, Expand);
- setCondCodeAction(ISD::SETONE, VT, Expand);
- }
-
- // STORE, LOAD, SCALAR_TO_VECTOR and BITCAST are natively supported,
- // so no need to Custom/Expand them.
- setOperationAction(ISD::STORE, VT, Legal);
- setOperationAction(ISD::LOAD, VT, Legal);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
- setOperationAction(ISD::BITCAST, VT, Legal);
-
- // Mark integer truncating stores/extending loads as having custom lowering
- if (VT.isInteger()) {
- MVT InnerVT = VT.changeVectorElementType(MVT::i8);
- while (InnerVT != VT) {
- setTruncStoreAction(VT, InnerVT, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
- InnerVT = InnerVT.changeVectorElementType(
- MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
- }
- }
-
- // Mark floating-point truncating stores/extending loads as having custom
- // lowering
- if (VT.isFloatingPoint()) {
- MVT InnerVT = VT.changeVectorElementType(MVT::f16);
- while (InnerVT != VT) {
- setTruncStoreAction(VT, InnerVT, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Custom);
- InnerVT = InnerVT.changeVectorElementType(
- MVT::getFloatingPointVT(2 * InnerVT.getScalarSizeInBits()));
- }
- }
-
- setOperationAction(ISD::ABS, VT, Custom);
- setOperationAction(ISD::ADD, VT, Custom);
- setOperationAction(ISD::AND, VT, Custom);
- setOperationAction(ISD::ANY_EXTEND, VT, Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::CTLZ, VT, Custom);
- setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::FABS, VT, Custom);
- setOperationAction(ISD::FADD, VT, Custom);
- setOperationAction(ISD::FCEIL, VT, Custom);
- setOperationAction(ISD::FCOPYSIGN, VT, Custom);
- setOperationAction(ISD::FDIV, VT, Custom);
- setOperationAction(ISD::FFLOOR, VT, Custom);
- setOperationAction(ISD::FMA, VT, Custom);
- setOperationAction(ISD::FMAXIMUM, VT, Custom);
- setOperationAction(ISD::FMAXNUM, VT, Custom);
- setOperationAction(ISD::FMINIMUM, VT, Custom);
- setOperationAction(ISD::FMINNUM, VT, Custom);
- setOperationAction(ISD::FMUL, VT, Custom);
- setOperationAction(ISD::FNEARBYINT, VT, Custom);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FP_ROUND, VT, Custom);
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
- setOperationAction(ISD::FRINT, VT, Custom);
- setOperationAction(ISD::FROUND, VT, Custom);
- setOperationAction(ISD::FROUNDEVEN, VT, Custom);
- setOperationAction(ISD::FSQRT, VT, Custom);
- setOperationAction(ISD::FSUB, VT, Custom);
- setOperationAction(ISD::FTRUNC, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::MLOAD, VT, Custom);
- setOperationAction(ISD::MSTORE, VT, Custom);
- setOperationAction(ISD::MUL, VT, Custom);
- setOperationAction(ISD::MULHS, VT, Custom);
- setOperationAction(ISD::MULHU, VT, Custom);
- setOperationAction(ISD::OR, VT, Custom);
- setOperationAction(ISD::SDIV, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
- setOperationAction(ISD::SINT_TO_FP, VT, Custom);
- setOperationAction(ISD::SMAX, VT, Custom);
- setOperationAction(ISD::SMIN, VT, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::SUB, VT, Custom);
- setOperationAction(ISD::TRUNCATE, VT, Custom);
- setOperationAction(ISD::UDIV, VT, Custom);
- setOperationAction(ISD::UINT_TO_FP, VT, Custom);
- setOperationAction(ISD::UMAX, VT, Custom);
- setOperationAction(ISD::UMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::XOR, VT, Custom);
- setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
-}
-
-void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
+void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
+ bool StreamingSVE) {
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
// By default everything must be expanded.
@@ -1850,7 +1792,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::ADD, VT, Custom);
setOperationAction(ISD::AND, VT, Custom);
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
- setOperationAction(ISD::BITCAST, VT, Custom);
+ setOperationAction(ISD::BITCAST, VT, StreamingSVE ? Legal : Custom);
setOperationAction(ISD::BITREVERSE, VT, Custom);
setOperationAction(ISD::BSWAP, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
@@ -1885,15 +1827,16 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::FSUB, VT, Custom);
setOperationAction(ISD::FTRUNC, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
+ setOperationAction(ISD::LOAD, VT, StreamingSVE ? Legal : Custom);
+ setOperationAction(ISD::MGATHER, VT, StreamingSVE ? Expand : Custom);
setOperationAction(ISD::MLOAD, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
+ setOperationAction(ISD::MSCATTER, VT, StreamingSVE ? Expand : Custom);
setOperationAction(ISD::MSTORE, VT, Custom);
setOperationAction(ISD::MUL, VT, Custom);
setOperationAction(ISD::MULHS, VT, Custom);
setOperationAction(ISD::MULHU, VT, Custom);
setOperationAction(ISD::OR, VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, StreamingSVE ? Legal : Expand);
setOperationAction(ISD::SDIV, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -1906,7 +1849,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::STORE, VT, StreamingSVE ? Legal : Custom);
setOperationAction(ISD::SUB, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
setOperationAction(ISD::UDIV, VT, Custom);
@@ -1918,6 +1861,8 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
@@ -2084,7 +2029,7 @@ bool AArch64TargetLowering::targetShrinkDemandedConstant(
"i32 or i64 is expected after legalization.");
// Exit early if we demand all bits.
- if (DemandedBits.countPopulation() == Size)
+ if (DemandedBits.popcount() == Size)
return false;
unsigned NewOpc;
@@ -2130,7 +2075,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
}
case AArch64ISD::BICi: {
@@ -2220,6 +2165,38 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
}
}
+unsigned AArch64TargetLowering::ComputeNumSignBitsForTargetNode(
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+ unsigned Depth) const {
+ EVT VT = Op.getValueType();
+ unsigned VTBits = VT.getScalarSizeInBits();
+ unsigned Opcode = Op.getOpcode();
+ switch (Opcode) {
+ case AArch64ISD::CMEQ:
+ case AArch64ISD::CMGE:
+ case AArch64ISD::CMGT:
+ case AArch64ISD::CMHI:
+ case AArch64ISD::CMHS:
+ case AArch64ISD::FCMEQ:
+ case AArch64ISD::FCMGE:
+ case AArch64ISD::FCMGT:
+ case AArch64ISD::CMEQz:
+ case AArch64ISD::CMGEz:
+ case AArch64ISD::CMGTz:
+ case AArch64ISD::CMLEz:
+ case AArch64ISD::CMLTz:
+ case AArch64ISD::FCMEQz:
+ case AArch64ISD::FCMGEz:
+ case AArch64ISD::FCMGTz:
+ case AArch64ISD::FCMLEz:
+ case AArch64ISD::FCMLTz:
+ // Compares return either 0 or all-ones
+ return VTBits;
+ }
+
+ return 1;
+}
+
MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
EVT) const {
return MVT::i64;
@@ -2297,7 +2274,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::ADR)
MAKE_CASE(AArch64ISD::ADDlow)
MAKE_CASE(AArch64ISD::LOADgot)
- MAKE_CASE(AArch64ISD::RET_FLAG)
+ MAKE_CASE(AArch64ISD::RET_GLUE)
MAKE_CASE(AArch64ISD::BRCOND)
MAKE_CASE(AArch64ISD::CSEL)
MAKE_CASE(AArch64ISD::CSINV)
@@ -2564,8 +2541,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::SSTNT1_PRED)
MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
MAKE_CASE(AArch64ISD::LDP)
+ MAKE_CASE(AArch64ISD::LDIAPP)
MAKE_CASE(AArch64ISD::LDNP)
MAKE_CASE(AArch64ISD::STP)
+ MAKE_CASE(AArch64ISD::STILP)
MAKE_CASE(AArch64ISD::STNP)
MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
@@ -2783,6 +2762,10 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
case TargetOpcode::PATCHPOINT:
return emitPatchPoint(MI, BB);
+ case TargetOpcode::PATCHABLE_EVENT_CALL:
+ case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
+ return BB;
+
case AArch64::CATCHRET:
return EmitLoweredCatchRet(MI, BB);
case AArch64::LD1_MXIPXX_H_PSEUDO_B:
@@ -3640,7 +3623,7 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerToScalableOp(Op, DAG);
SDValue Sel = Op.getOperand(0);
@@ -3737,31 +3720,31 @@ static SDValue valueToCarryFlag(SDValue Value, SelectionDAG &DAG, bool Invert) {
// If Invert is false, value is 1 if 'C' bit of NZCV is 1, else 0.
// If Invert is true, value is 0 if 'C' bit of NZCV is 1, else 1.
-static SDValue carryFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG,
+static SDValue carryFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG,
bool Invert) {
- assert(Flag.getResNo() == 1);
- SDLoc DL(Flag);
+ assert(Glue.getResNo() == 1);
+ SDLoc DL(Glue);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
unsigned Cond = Invert ? AArch64CC::LO : AArch64CC::HS;
SDValue CC = DAG.getConstant(Cond, DL, MVT::i32);
- return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag);
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Glue);
}
// Value is 1 if 'V' bit of NZCV is 1, else 0
-static SDValue overflowFlagToValue(SDValue Flag, EVT VT, SelectionDAG &DAG) {
- assert(Flag.getResNo() == 1);
- SDLoc DL(Flag);
+static SDValue overflowFlagToValue(SDValue Glue, EVT VT, SelectionDAG &DAG) {
+ assert(Glue.getResNo() == 1);
+ SDLoc DL(Glue);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
SDValue CC = DAG.getConstant(AArch64CC::VS, DL, MVT::i32);
- return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Flag);
+ return DAG.getNode(AArch64ISD::CSEL, DL, VT, One, Zero, CC, Glue);
}
// This lowering is inefficient, but it will get cleaned up by
// `foldOverflowCheck`
-static SDValue lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG, unsigned Opcode,
- bool IsSigned) {
+static SDValue lowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG,
+ unsigned Opcode, bool IsSigned) {
EVT VT0 = Op.getValue(0).getValueType();
EVT VT1 = Op.getValue(1).getValueType();
@@ -3850,7 +3833,7 @@ SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
if (VT.isScalableVector())
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
- if (useSVEForFixedLengthVectorVT(VT))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return LowerFixedLengthFPExtendToSVE(Op, DAG);
assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
@@ -3866,8 +3849,7 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
EVT SrcVT = SrcVal.getValueType();
- if (useSVEForFixedLengthVectorVT(SrcVT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(SrcVT, !Subtarget->isNeonAvailable()))
return LowerFixedLengthFPRoundToSVE(Op, DAG);
if (SrcVT != MVT::f128) {
@@ -3898,10 +3880,8 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
return LowerToPredicatedOp(Op, DAG, Opcode);
}
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()) ||
- useSVEForFixedLengthVectorVT(InVT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) ||
+ useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable()))
return LowerFixedLengthFPToIntToSVE(Op, DAG);
unsigned NumElts = InVT.getVectorNumElements();
@@ -4065,7 +4045,7 @@ AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
Sat = DAG.getNode(ISD::SMAX, DL, IntVT, Min, MaxC);
} else {
SDValue MinC = DAG.getConstant(
- APInt::getAllOnesValue(SatWidth).zext(SrcElementWidth), DL, IntVT);
+ APInt::getAllOnes(SatWidth).zext(SrcElementWidth), DL, IntVT);
Sat = DAG.getNode(ISD::UMIN, DL, IntVT, NativeCvt, MinC);
}
@@ -4121,7 +4101,7 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
Sat = DAG.getNode(ISD::SMAX, DL, DstVT, Min, MaxC);
} else {
SDValue MinC = DAG.getConstant(
- APInt::getAllOnesValue(SatWidth).zext(DstWidth), DL, DstVT);
+ APInt::getAllOnes(SatWidth).zext(DstWidth), DL, DstVT);
Sat = DAG.getNode(ISD::UMIN, DL, DstVT, NativeCvt, MinC);
}
@@ -4155,10 +4135,8 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
return LowerToPredicatedOp(Op, DAG, Opcode);
}
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()) ||
- useSVEForFixedLengthVectorVT(InVT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()) ||
+ useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable()))
return LowerFixedLengthIntToFPToSVE(Op, DAG);
uint64_t VTSize = VT.getFixedSizeInBits();
@@ -4315,10 +4293,7 @@ SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
- return SDValue(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
- DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
- 0);
+ return DAG.getTargetExtractSubreg(AArch64::hsub, DL, OpVT, Op);
}
static EVT getExtensionTo64Bits(const EVT &OrigVT) {
@@ -4398,8 +4373,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
}
static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
- if (N->getOpcode() == ISD::SIGN_EXTEND ||
- N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
+ if (ISD::isExtOpcode(N->getOpcode()))
return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
N->getOperand(0)->getValueType(0),
N->getValueType(0),
@@ -4612,28 +4586,50 @@ static unsigned selectUmullSmull(SDNode *&N0, SDNode *&N1, SelectionDAG &DAG,
SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- // If SVE is available then i64 vector multiplications can also be made legal.
- bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64 ||
- Subtarget->forceStreamingCompatibleSVE();
-
+ bool OverrideNEON = !Subtarget->isNeonAvailable();
if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
- // Multiplications are only custom-lowered for 128-bit vectors so that
- // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
- assert(VT.is128BitVector() && VT.isInteger() &&
+ // Multiplications are only custom-lowered for 128-bit and 64-bit vectors so
+ // that VMULL can be detected. Otherwise v2i64 multiplications are not legal.
+ assert((VT.is128BitVector() || VT.is64BitVector()) && VT.isInteger() &&
"unexpected type for custom-lowering ISD::MUL");
SDNode *N0 = Op.getOperand(0).getNode();
SDNode *N1 = Op.getOperand(1).getNode();
bool isMLA = false;
+ EVT OVT = VT;
+ if (VT.is64BitVector()) {
+ if (N0->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ isNullConstant(N0->getOperand(1)) &&
+ N1->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ isNullConstant(N1->getOperand(1))) {
+ N0 = N0->getOperand(0).getNode();
+ N1 = N1->getOperand(0).getNode();
+ VT = N0->getValueType(0);
+ } else {
+ if (VT == MVT::v1i64) {
+ if (Subtarget->hasSVE())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
+ // Fall through to expand this. It is not legal.
+ return SDValue();
+ } else
+ // Other vector multiplications are legal.
+ return Op;
+ }
+ }
+
SDLoc DL(Op);
unsigned NewOpc = selectUmullSmull(N0, N1, DAG, DL, isMLA);
if (!NewOpc) {
- if (VT == MVT::v2i64)
+ if (VT.getVectorElementType() == MVT::i64) {
+ // If SVE is available then i64 vector multiplications can also be made
+ // legal.
+ if (Subtarget->hasSVE())
+ return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED);
// Fall through to expand this. It is not legal.
return SDValue();
- else
+ } else
// Other vector multiplications are legal.
return Op;
}
@@ -4646,7 +4642,9 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
assert(Op0.getValueType().is64BitVector() &&
Op1.getValueType().is64BitVector() &&
"unexpected types for extended operands to VMULL");
- return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OVT,
+ DAG.getNode(NewOpc, DL, VT, Op0, Op1),
+ DAG.getConstant(0, DL, MVT::i64));
}
// Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
// isel lowering to take advantage of no-stall back to back s/umul + s/umla.
@@ -4654,11 +4652,14 @@ SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
EVT Op1VT = Op1.getValueType();
- return DAG.getNode(N0->getOpcode(), DL, VT,
- DAG.getNode(NewOpc, DL, VT,
- DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
- DAG.getNode(NewOpc, DL, VT,
- DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, OVT,
+ DAG.getNode(N0->getOpcode(), DL, VT,
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
+ DAG.getNode(NewOpc, DL, VT,
+ DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)),
+ DAG.getConstant(0, DL, MVT::i64));
}
static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
@@ -5072,8 +5073,12 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::aarch64_sve_dupq_lane:
return LowerDUPQLane(Op, DAG);
case Intrinsic::aarch64_sve_convert_from_svbool:
+ if (Op.getValueType() == MVT::aarch64svcount)
+ return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Op.getOperand(1));
return getSVEPredicateBitCast(Op.getValueType(), Op.getOperand(1), DAG);
case Intrinsic::aarch64_sve_convert_to_svbool:
+ if (Op.getOperand(1).getValueType() == MVT::aarch64svcount)
+ return DAG.getNode(ISD::BITCAST, dl, MVT::nxv16i1, Op.getOperand(1));
return getSVEPredicateBitCast(MVT::nxv16i1, Op.getOperand(1), DAG);
case Intrinsic::aarch64_sve_fneg:
return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
@@ -5254,13 +5259,6 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
Op.getOperand(2));
}
- case Intrinsic::aarch64_neon_sabd:
- case Intrinsic::aarch64_neon_uabd: {
- unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
- : ISD::ABDS;
- return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
- Op.getOperand(2));
- }
case Intrinsic::aarch64_neon_saddlp:
case Intrinsic::aarch64_neon_uaddlp: {
unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
@@ -5314,9 +5312,7 @@ bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
return ExtVal.getValueType().isScalableVector() ||
- useSVEForFixedLengthVectorVT(
- ExtVal.getValueType(),
- /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors());
+ Subtarget->useSVEForFixedLengthVectors();
}
unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
@@ -5690,21 +5686,25 @@ SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
MemSDNode *StoreNode = cast<MemSDNode>(Op);
assert(StoreNode->getMemoryVT() == MVT::i128);
assert(StoreNode->isVolatile() || StoreNode->isAtomic());
- assert(!StoreNode->isAtomic() ||
- StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
- StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);
+
+ bool IsStoreRelease =
+ StoreNode->getMergedOrdering() == AtomicOrdering::Release;
+ if (StoreNode->isAtomic())
+ assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
+ Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
+ StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
+ StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);
SDValue Value = StoreNode->getOpcode() == ISD::STORE
? StoreNode->getOperand(1)
: StoreNode->getOperand(2);
SDLoc DL(Op);
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
- DAG.getConstant(0, DL, MVT::i64));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
- DAG.getConstant(1, DL, MVT::i64));
+ auto StoreValue = DAG.SplitScalar(Value, DL, MVT::i64, MVT::i64);
+ unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
SDValue Result = DAG.getMemIntrinsicNode(
- AArch64ISD::STP, DL, DAG.getVTList(MVT::Other),
- {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
+ Opcode, DL, DAG.getVTList(MVT::Other),
+ {StoreNode->getChain(), StoreValue.first, StoreValue.second,
+ StoreNode->getBasePtr()},
StoreNode->getMemoryVT(), StoreNode->getMemOperand());
return Result;
}
@@ -5840,14 +5840,14 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerVACOPY(Op, DAG);
case ISD::VAARG:
return LowerVAARG(Op, DAG);
- case ISD::ADDCARRY:
- return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, false /*unsigned*/);
- case ISD::SUBCARRY:
- return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, false /*unsigned*/);
+ case ISD::UADDO_CARRY:
+ return lowerADDSUBO_CARRY(Op, DAG, AArch64ISD::ADCS, false /*unsigned*/);
+ case ISD::USUBO_CARRY:
+ return lowerADDSUBO_CARRY(Op, DAG, AArch64ISD::SBCS, false /*unsigned*/);
case ISD::SADDO_CARRY:
- return lowerADDSUBCARRY(Op, DAG, AArch64ISD::ADCS, true /*signed*/);
+ return lowerADDSUBO_CARRY(Op, DAG, AArch64ISD::ADCS, true /*signed*/);
case ISD::SSUBO_CARRY:
- return lowerADDSUBCARRY(Op, DAG, AArch64ISD::SBCS, true /*signed*/);
+ return lowerADDSUBO_CARRY(Op, DAG, AArch64ISD::SBCS, true /*signed*/);
case ISD::SADDO:
case ISD::UADDO:
case ISD::SSUBO:
@@ -5976,7 +5976,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerINTRINSIC_VOID(Op, DAG);
case ISD::ATOMIC_STORE:
if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
- assert(Subtarget->hasLSE2());
+ assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
return LowerStore128(Op, DAG);
}
return SDValue();
@@ -6001,6 +6001,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::VECREDUCE_FADD:
case ISD::VECREDUCE_FMAX:
case ISD::VECREDUCE_FMIN:
+ case ISD::VECREDUCE_FMAXIMUM:
+ case ISD::VECREDUCE_FMINIMUM:
return LowerVECREDUCE(Op, DAG);
case ISD::ATOMIC_LOAD_SUB:
return LowerATOMIC_LOAD_SUB(Op, DAG);
@@ -6031,7 +6033,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerMLOAD(Op, DAG);
case ISD::LOAD:
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
return LowerLOAD(Op, DAG);
case ISD::ADD:
@@ -6055,13 +6057,13 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::ABDU:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
case ISD::AVGFLOORS:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::HADDS_PRED);
+ return LowerAVG(Op, DAG, AArch64ISD::HADDS_PRED);
case ISD::AVGFLOORU:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::HADDU_PRED);
+ return LowerAVG(Op, DAG, AArch64ISD::HADDU_PRED);
case ISD::AVGCEILS:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::RHADDS_PRED);
+ return LowerAVG(Op, DAG, AArch64ISD::RHADDS_PRED);
case ISD::AVGCEILU:
- return LowerToPredicatedOp(Op, DAG, AArch64ISD::RHADDU_PRED);
+ return LowerAVG(Op, DAG, AArch64ISD::RHADDU_PRED);
case ISD::BITREVERSE:
return LowerBitreverse(Op, DAG);
case ISD::BSWAP:
@@ -6072,6 +6074,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerCTTZ(Op, DAG);
case ISD::VECTOR_SPLICE:
return LowerVECTOR_SPLICE(Op, DAG);
+ case ISD::VECTOR_DEINTERLEAVE:
+ return LowerVECTOR_DEINTERLEAVE(Op, DAG);
+ case ISD::VECTOR_INTERLEAVE:
+ return LowerVECTOR_INTERLEAVE(Op, DAG);
case ISD::STRICT_LROUND:
case ISD::STRICT_LLROUND:
case ISD::STRICT_LRINT:
@@ -6091,16 +6097,12 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
SDValue Chain = Op.getOperand(0);
SDValue SysRegName = Op.getOperand(1);
- SDValue Pair = Op.getOperand(2);
-
- SDValue PairLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Pair,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue PairHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Pair,
- DAG.getConstant(1, DL, MVT::i32));
+ std::pair<SDValue, SDValue> Pair =
+ DAG.SplitScalar(Op.getOperand(2), DL, MVT::i64, MVT::i64);
// chain = MSRR(chain, sysregname, lo, hi)
SDValue Result = DAG.getNode(AArch64ISD::MSRR, DL, MVT::Other, Chain,
- SysRegName, PairLo, PairHi);
+ SysRegName, Pair.first, Pair.second);
return Result;
}
@@ -6111,10 +6113,6 @@ bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
return !Subtarget->useSVEForFixedLengthVectors();
}
-bool AArch64TargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
- return true;
-}
-
bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
EVT VT, bool OverrideNEON) const {
if (!VT.isFixedLengthVector() || !VT.isSimple())
@@ -6208,6 +6206,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::PreserveMost:
+ case CallingConv::PreserveAll:
case CallingConv::CXX_FAST_TLS:
case CallingConv::Swift:
case CallingConv::SwiftTail:
@@ -6385,6 +6384,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
RegVT.getVectorElementType() == MVT::i1) {
FuncInfo->setIsSVECC(true);
RC = &AArch64::PPRRegClass;
+ } else if (RegVT == MVT::aarch64svcount) {
+ FuncInfo->setIsSVECC(true);
+ RC = &AArch64::PPRRegClass;
} else if (RegVT.isScalableVector()) {
FuncInfo->setIsSVECC(true);
RC = &AArch64::ZPRRegClass;
@@ -6419,9 +6421,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
case CCValAssign::Full:
break;
case CCValAssign::Indirect:
- assert((VA.getValVT().isScalableVector() ||
- Subtarget->isWindowsArm64EC()) &&
- "Indirect arguments should be scalable on most subtargets");
+ assert(
+ (VA.getValVT().isScalableVT() || Subtarget->isWindowsArm64EC()) &&
+ "Indirect arguments should be scalable on most subtargets");
break;
case CCValAssign::BCvt:
ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
@@ -6500,9 +6502,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
}
if (VA.getLocInfo() == CCValAssign::Indirect) {
- assert(
- (VA.getValVT().isScalableVector() || Subtarget->isWindowsArm64EC()) &&
- "Indirect arguments should be scalable on most subtargets");
+ assert((VA.getValVT().isScalableVT() ||
+ Subtarget->isWindowsArm64EC()) &&
+ "Indirect arguments should be scalable on most subtargets");
uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinValue();
unsigned NumParts = 1;
@@ -6595,11 +6597,12 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
}
// This will point to the next argument passed via stack.
- unsigned StackOffset = CCInfo.getNextStackOffset();
+ unsigned VarArgsOffset = CCInfo.getStackSize();
// We currently pass all varargs at 8-byte alignment, or 4 for ILP32
- StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
- FuncInfo->setVarArgsStackOffset(StackOffset);
- FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
+ VarArgsOffset = alignTo(VarArgsOffset, Subtarget->isTargetILP32() ? 4 : 8);
+ FuncInfo->setVarArgsStackOffset(VarArgsOffset);
+ FuncInfo->setVarArgsStackIndex(
+ MFI.CreateFixedObject(4, VarArgsOffset, true));
if (MFI.hasMustTailInVarArgFunc()) {
SmallVector<MVT, 2> RegParmTypes;
@@ -6639,7 +6642,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
}
}
- unsigned StackArgSize = CCInfo.getNextStackOffset();
+ unsigned StackArgSize = CCInfo.getStackSize();
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
// This is a non-standard ABI so by fiat I say we're allowed to make full
@@ -6683,10 +6686,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
SmallVector<SDValue, 8> MemOps;
- static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
- AArch64::X3, AArch64::X4, AArch64::X5,
- AArch64::X6, AArch64::X7 };
- unsigned NumGPRArgRegs = std::size(GPRArgRegs);
+ auto GPRArgRegs = AArch64::getGPRArgRegs();
+ unsigned NumGPRArgRegs = GPRArgRegs.size();
if (Subtarget->isWindowsArm64EC()) {
// In the ARM64EC ABI, only x0-x3 are used to pass arguments to varargs
// functions.
@@ -6736,10 +6737,8 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
FuncInfo->setVarArgsGPRSize(GPRSaveSize);
if (Subtarget->hasFPARMv8() && !IsWin64) {
- static const MCPhysReg FPRArgRegs[] = {
- AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
- AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
- static const unsigned NumFPRArgRegs = std::size(FPRArgRegs);
+ auto FPRArgRegs = AArch64::getFPRArgRegs();
+ const unsigned NumFPRArgRegs = FPRArgRegs.size();
unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
@@ -6772,7 +6771,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue AArch64TargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
+ SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<CCValAssign> &RVLocs, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
SDValue ThisVal) const {
@@ -6795,9 +6794,9 @@ SDValue AArch64TargetLowering::LowerCallResult(
SDValue Val = CopiedRegs.lookup(VA.getLocReg());
if (!Val) {
Val =
- DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
+ DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InGlue);
Chain = Val.getValue(1);
- InFlag = Val.getValue(2);
+ InGlue = Val.getValue(2);
CopiedRegs[VA.getLocReg()] = Val;
}
@@ -6838,6 +6837,7 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
case CallingConv::C:
case CallingConv::AArch64_SVE_VectorCall:
case CallingConv::PreserveMost:
+ case CallingConv::PreserveAll:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
@@ -7039,7 +7039,7 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// If the stack arguments for this call do not fit into our own save area then
// the call cannot be made tail.
- if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
+ if (CCInfo.getStackSize() > FuncInfo->getBytesInStackArgArea())
return false;
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -7100,7 +7100,7 @@ static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
SDValue AArch64TargetLowering::changeStreamingMode(
SelectionDAG &DAG, SDLoc DL, bool Enable,
- SDValue Chain, SDValue InFlag, SDValue PStateSM, bool Entry) const {
+ SDValue Chain, SDValue InGlue, SDValue PStateSM, bool Entry) const {
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
SDValue RegMask = DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask());
SDValue MSROp =
@@ -7110,8 +7110,8 @@ SDValue AArch64TargetLowering::changeStreamingMode(
DAG.getTargetConstant(Entry ? Enable : !Enable, DL, MVT::i64);
SmallVector<SDValue> Ops = {Chain, MSROp, PStateSM, ExpectedSMVal, RegMask};
- if (InFlag)
- Ops.push_back(InFlag);
+ if (InGlue)
+ Ops.push_back(InGlue);
unsigned Opcode = Enable ? AArch64ISD::SMSTART : AArch64ISD::SMSTOP;
return DAG.getNode(Opcode, DL, DAG.getVTList(MVT::Other, MVT::Glue), Ops);
@@ -7143,7 +7143,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
bool IsSibCall = false;
bool GuardWithBTI = false;
- if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
+ if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
!Subtarget->noBTIAtReturnTwice()) {
GuardWithBTI = FuncInfo->branchTargetEnforcement();
}
@@ -7203,7 +7203,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
"site marked musttail");
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getStackSize();
if (IsSibCall) {
// Since we're not changing the ABI to make this a tail call, the memory
@@ -7359,7 +7359,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::Indirect:
- bool isScalable = VA.getValVT().isScalableVector();
+ bool isScalable = VA.getValVT().isScalableVT();
assert((isScalable || Subtarget->isWindowsArm64EC()) &&
"Indirect arguments should be scalable on most subtargets");
@@ -7536,20 +7536,20 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
- SDValue InFlag;
+ SDValue InGlue;
if (RequiresSMChange) {
SDValue NewChain = changeStreamingMode(DAG, DL, *RequiresSMChange, Chain,
- InFlag, PStateSM, true);
+ InGlue, PStateSM, true);
Chain = NewChain.getValue(0);
- InFlag = NewChain.getValue(1);
+ InGlue = NewChain.getValue(1);
}
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
for (auto &RegToPass : RegsToPass) {
Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
- RegToPass.second, InFlag);
- InFlag = Chain.getValue(1);
+ RegToPass.second, InGlue);
+ InGlue = Chain.getValue(1);
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -7583,8 +7583,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
// we've carefully laid out the parameters so that when sp is reset they'll be
// in the correct location.
if (IsTailCall && !IsSibCall) {
- Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, DL);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, DL);
+ InGlue = Chain.getValue(1);
}
std::vector<SDValue> Ops;
@@ -7626,8 +7626,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -7640,6 +7640,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
if (IsCFICall)
Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
+ DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
return Ret;
}
@@ -7668,27 +7669,27 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
uint64_t CalleePopBytes =
DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
- Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InFlag, DL);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, DL);
+ InGlue = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
- SDValue Result = LowerCallResult(Chain, InFlag, CallConv, IsVarArg, RVLocs,
+ SDValue Result = LowerCallResult(Chain, InGlue, CallConv, IsVarArg, RVLocs,
DL, DAG, InVals, IsThisReturn,
IsThisReturn ? OutVals[0] : SDValue());
if (!Ins.empty())
- InFlag = Result.getValue(Result->getNumValues() - 1);
+ InGlue = Result.getValue(Result->getNumValues() - 1);
if (RequiresSMChange) {
assert(PStateSM && "Expected a PStateSM to be set");
- Result = changeStreamingMode(DAG, DL, !*RequiresSMChange, Result, InFlag,
+ Result = changeStreamingMode(DAG, DL, !*RequiresSMChange, Result, InGlue,
PStateSM, false);
}
@@ -7771,7 +7772,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
CCInfo.AnalyzeReturn(Outs, RetCC);
// Copy the result values into the output registers.
- SDValue Flag;
+ SDValue Glue;
SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
SmallSet<unsigned, 4> RegsUsed;
for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
@@ -7829,13 +7830,13 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
DAG.getTargetConstant((int32_t)AArch64SVCR::SVCRSM, DL, MVT::i32),
DAG.getConstant(1, DL, MVT::i64), DAG.getConstant(0, DL, MVT::i64),
DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask()));
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
}
SmallVector<SDValue, 4> RetOps(1, Chain);
for (auto &RetVal : RetVals) {
- Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(
DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
}
@@ -7849,8 +7850,8 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
getPointerTy(MF.getDataLayout()));
unsigned RetValReg = AArch64::X0;
- Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
@@ -7870,11 +7871,11 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps[0] = Chain; // Update chain.
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ // Add the glue if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
- return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
+ return DAG.getNode(AArch64ISD::RET_GLUE, DL, MVT::Other, RetOps);
}
//===----------------------------------------------------------------------===//
@@ -8532,7 +8533,7 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger());
if (VT.isFixedLengthVector() &&
- useSVEForFixedLengthVectorVT(VT, Subtarget->forceStreamingCompatibleSVE())) {
+ useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
In1 = convertToScalableVector(DAG, ContainerVT, In1);
@@ -8665,8 +8666,7 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
assert(!IsParity && "ISD::PARITY of vector types not supported");
if (VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()))
+ useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
@@ -9026,6 +9026,24 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
}
+ // Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns.
+ // (SELECT_CC setgt, lhs, 0, lhs, 0) -> (BIC lhs, (SRA lhs, typesize-1))
+ // (SELECT_CC setlt, lhs, 0, lhs, 0) -> (AND lhs, (SRA lhs, typesize-1))
+ // Both require less instructions than compare and conditional select.
+ if ((CC == ISD::SETGT || CC == ISD::SETLT) && LHS == TVal &&
+ RHSC && RHSC->isZero() && CFVal && CFVal->isZero() &&
+ LHS.getValueType() == RHS.getValueType()) {
+ EVT VT = LHS.getValueType();
+ SDValue Shift =
+ DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
+
+ if (CC == ISD::SETGT)
+ Shift = DAG.getNOT(dl, Shift, VT);
+
+ return DAG.getNode(ISD::AND, dl, VT, LHS, Shift);
+ }
+
unsigned Opcode = AArch64ISD::CSEL;
// If both the TVal and the FVal are constants, see if we can swap them in
@@ -9248,14 +9266,21 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
SDLoc DL(Op);
EVT Ty = Op.getValueType();
+ if (Ty == MVT::aarch64svcount) {
+ TVal = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i1, TVal);
+ FVal = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i1, FVal);
+ SDValue Sel =
+ DAG.getNode(ISD::SELECT, DL, MVT::nxv16i1, CCVal, TVal, FVal);
+ return DAG.getNode(ISD::BITCAST, DL, Ty, Sel);
+ }
+
if (Ty.isScalableVector()) {
- SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount());
- SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
+ SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, CCVal);
return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
}
- if (useSVEForFixedLengthVectorVT(Ty)) {
+ if (useSVEForFixedLengthVectorVT(Ty, !Subtarget->isNeonAvailable())) {
// FIXME: Ideally this would be the same as above using i1 types, however
// for the moment we can't deal with fixed i1 vector types properly, so
// instead extend the predicate to a result type sized integer vector.
@@ -9298,25 +9323,16 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
// If we are lowering a f16 and we do not have fullf16, convert to a f32 in
// order to use FCSELSrrr
if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
- TVal = SDValue(
- DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
- DAG.getUNDEF(MVT::f32), TVal,
- DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
- 0);
- FVal = SDValue(
- DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
- DAG.getUNDEF(MVT::f32), FVal,
- DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
- 0);
+ TVal = DAG.getTargetInsertSubreg(AArch64::hsub, DL, MVT::f32,
+ DAG.getUNDEF(MVT::f32), TVal);
+ FVal = DAG.getTargetInsertSubreg(AArch64::hsub, DL, MVT::f32,
+ DAG.getUNDEF(MVT::f32), FVal);
}
SDValue Res = LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
- Res = SDValue(
- DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, Ty, Res,
- DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
- 0);
+ return DAG.getTargetExtractSubreg(AArch64::hsub, DL, Ty, Res);
}
return Res;
@@ -9741,14 +9757,16 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
else if (VT == MVT::f32)
IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
- else if (VT == MVT::f16 && Subtarget->hasFullFP16())
- IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
- // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
- // generate that fmov.
+ else if (VT == MVT::f16)
+ IsLegal =
+ (Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) ||
+ Imm.isPosZero();
// If we can not materialize in immediate field for fmov, check if the
// value can be encoded as the immediate operand of a logical instruction.
// The immediate value will be created with either MOVZ, MOVN, or ORR.
+ // TODO: fmov h0, w0 is also legal, however we don't have an isel pattern to
+ // generate that fmov.
if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
// The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
// however the mov+fmov sequence is always better because of the reduced
@@ -9756,13 +9774,12 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
// movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
// movw+movk is fused). So we limit up to 2 instrdduction at most.
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
- AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
- Insn);
+ AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(), Insn);
unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
IsLegal = Insn.size() <= Limit;
}
- LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()
+ LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT
<< " imm value: "; Imm.dump(););
return IsLegal;
}
@@ -9935,6 +9952,72 @@ static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
return P;
}
+// The set of cc code supported is from
+// https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html#Flag-Output-Operands
+static AArch64CC::CondCode parseConstraintCode(llvm::StringRef Constraint) {
+ AArch64CC::CondCode Cond = StringSwitch<AArch64CC::CondCode>(Constraint)
+ .Case("{@cchi}", AArch64CC::HI)
+ .Case("{@cccs}", AArch64CC::HS)
+ .Case("{@cclo}", AArch64CC::LO)
+ .Case("{@ccls}", AArch64CC::LS)
+ .Case("{@cccc}", AArch64CC::LO)
+ .Case("{@cceq}", AArch64CC::EQ)
+ .Case("{@ccgt}", AArch64CC::GT)
+ .Case("{@ccge}", AArch64CC::GE)
+ .Case("{@cclt}", AArch64CC::LT)
+ .Case("{@ccle}", AArch64CC::LE)
+ .Case("{@cchs}", AArch64CC::HS)
+ .Case("{@ccne}", AArch64CC::NE)
+ .Case("{@ccvc}", AArch64CC::VC)
+ .Case("{@ccpl}", AArch64CC::PL)
+ .Case("{@ccvs}", AArch64CC::VS)
+ .Case("{@ccmi}", AArch64CC::MI)
+ .Default(AArch64CC::Invalid);
+ return Cond;
+}
+
+/// Helper function to create 'CSET', which is equivalent to 'CSINC <Wd>, WZR,
+/// WZR, invert(<cond>)'.
+static SDValue getSETCC(AArch64CC::CondCode CC, SDValue NZCV, const SDLoc &DL,
+ SelectionDAG &DAG) {
+ return DAG.getNode(
+ AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(0, DL, MVT::i32),
+ DAG.getConstant(getInvertedCondCode(CC), DL, MVT::i32), NZCV);
+}
+
+// Lower @cc flag output via getSETCC.
+SDValue AArch64TargetLowering::LowerAsmOutputForConstraint(
+ SDValue &Chain, SDValue &Glue, const SDLoc &DL,
+ const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
+ AArch64CC::CondCode Cond = parseConstraintCode(OpInfo.ConstraintCode);
+ if (Cond == AArch64CC::Invalid)
+ return SDValue();
+ // The output variable should be a scalar integer.
+ if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
+ OpInfo.ConstraintVT.getSizeInBits() < 8)
+ report_fatal_error("Flag output operand is of invalid type");
+
+ // Get NZCV register. Only update chain when copyfrom is glued.
+ if (Glue.getNode()) {
+ Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32, Glue);
+ Chain = Glue.getValue(1);
+ } else
+ Glue = DAG.getCopyFromReg(Chain, DL, AArch64::NZCV, MVT::i32);
+ // Extract CC code.
+ SDValue CC = getSETCC(Cond, Glue, DL, DAG);
+
+ SDValue Result;
+
+ // Truncate or ZERO_EXTEND based on value types.
+ if (OpInfo.ConstraintVT.getSizeInBits() <= 32)
+ Result = DAG.getNode(ISD::TRUNCATE, DL, OpInfo.ConstraintVT, CC);
+ else
+ Result = DAG.getNode(ISD::ZERO_EXTEND, DL, OpInfo.ConstraintVT, CC);
+
+ return Result;
+}
+
/// getConstraintType - Given a constraint letter, return the type of
/// constraint it is for this target.
AArch64TargetLowering::ConstraintType
@@ -9967,6 +10050,8 @@ AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
} else if (parsePredicateConstraint(Constraint) !=
PredicateConstraint::Invalid)
return C_RegisterClass;
+ else if (parseConstraintCode(Constraint) != AArch64CC::Invalid)
+ return C_Other;
return TargetLowering::getConstraintType(Constraint);
}
@@ -10064,7 +10149,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
: std::make_pair(0U, &AArch64::PPRRegClass);
}
}
- if (StringRef("{cc}").equals_insensitive(Constraint))
+ if (StringRef("{cc}").equals_insensitive(Constraint) ||
+ parseConstraintCode(Constraint) != AArch64CC::Invalid)
return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
// Use the default implementation in TargetLowering to convert the register
@@ -10285,16 +10371,96 @@ static unsigned getExtFactor(SDValue &V) {
return EltType.getSizeInBits() / 8;
}
-/// NarrowVector - Given a value in the V128 register class, produce the
-/// equivalent value in the V64 register class.
-static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
- EVT VT = V128Reg.getValueType();
- unsigned WideSize = VT.getVectorNumElements();
- MVT EltTy = VT.getVectorElementType().getSimpleVT();
- MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
- SDLoc DL(V128Reg);
+// Check if a vector is built from one vector via extracted elements of
+// another together with an AND mask, ensuring that all elements fit
+// within range. This can be reconstructed using AND and NEON's TBL1.
+SDValue ReconstructShuffleWithRuntimeMask(SDValue Op, SelectionDAG &DAG) {
+ assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
+ SDLoc dl(Op);
+ EVT VT = Op.getValueType();
+ assert(!VT.isScalableVector() &&
+ "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
+
+ // Can only recreate a shuffle with 16xi8 or 8xi8 elements, as they map
+ // directly to TBL1.
+ if (VT != MVT::v16i8 && VT != MVT::v8i8)
+ return SDValue();
+
+ unsigned NumElts = VT.getVectorNumElements();
+ assert((NumElts == 8 || NumElts == 16) &&
+ "Need to have exactly 8 or 16 elements in vector.");
+
+ SDValue SourceVec;
+ SDValue MaskSourceVec;
+ SmallVector<SDValue, 16> AndMaskConstants;
+
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ SDValue OperandSourceVec = V.getOperand(0);
+ if (!SourceVec)
+ SourceVec = OperandSourceVec;
+ else if (SourceVec != OperandSourceVec)
+ return SDValue();
+
+ // This only looks at shuffles with elements that are
+ // a) truncated by a constant AND mask extracted from a mask vector, or
+ // b) extracted directly from a mask vector.
+ SDValue MaskSource = V.getOperand(1);
+ if (MaskSource.getOpcode() == ISD::AND) {
+ if (!isa<ConstantSDNode>(MaskSource.getOperand(1)))
+ return SDValue();
+
+ AndMaskConstants.push_back(MaskSource.getOperand(1));
+ MaskSource = MaskSource->getOperand(0);
+ } else if (!AndMaskConstants.empty()) {
+ // Either all or no operands should have an AND mask.
+ return SDValue();
+ }
+
+ // An ANY_EXTEND may be inserted between the AND and the source vector
+ // extraction. We don't care about that, so we can just skip it.
+ if (MaskSource.getOpcode() == ISD::ANY_EXTEND)
+ MaskSource = MaskSource.getOperand(0);
+
+ if (MaskSource.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+
+ SDValue MaskIdx = MaskSource.getOperand(1);
+ if (!isa<ConstantSDNode>(MaskIdx) ||
+ !cast<ConstantSDNode>(MaskIdx)->getConstantIntValue()->equalsInt(i))
+ return SDValue();
- return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
+ // We only apply this if all elements come from the same vector with the
+ // same vector type.
+ if (!MaskSourceVec) {
+ MaskSourceVec = MaskSource->getOperand(0);
+ if (MaskSourceVec.getValueType() != VT)
+ return SDValue();
+ } else if (MaskSourceVec != MaskSource->getOperand(0)) {
+ return SDValue();
+ }
+ }
+
+ // We need a v16i8 for TBL, so we extend the source with a placeholder vector
+ // for v8i8 to get a v16i8. As the pattern we are replacing is extract +
+ // insert, we know that the index in the mask must be smaller than the number
+ // of elements in the source, or we would have an out-of-bounds access.
+ if (NumElts == 8)
+ SourceVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, SourceVec,
+ DAG.getUNDEF(VT));
+
+ // Preconditions met, so we can use a vector (AND +) TBL to build this vector.
+ if (!AndMaskConstants.empty())
+ MaskSourceVec = DAG.getNode(ISD::AND, dl, VT, MaskSourceVec,
+ DAG.getBuildVector(VT, dl, AndMaskConstants));
+
+ return DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getConstant(Intrinsic::aarch64_neon_tbl1, dl, MVT::i32), SourceVec,
+ MaskSourceVec);
}
// Gather data to see if the operation can be modelled as a
@@ -11146,23 +11312,17 @@ static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1,
DAG.getConstant(Imm, dl, MVT::i32));
}
case OP_VUZPL:
- return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
- OpRHS);
+ return DAG.getNode(AArch64ISD::UZP1, dl, VT, OpLHS, OpRHS);
case OP_VUZPR:
- return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
- OpRHS);
+ return DAG.getNode(AArch64ISD::UZP2, dl, VT, OpLHS, OpRHS);
case OP_VZIPL:
- return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
- OpRHS);
+ return DAG.getNode(AArch64ISD::ZIP1, dl, VT, OpLHS, OpRHS);
case OP_VZIPR:
- return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
- OpRHS);
+ return DAG.getNode(AArch64ISD::ZIP2, dl, VT, OpLHS, OpRHS);
case OP_VTRNL:
- return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
- OpRHS);
+ return DAG.getNode(AArch64ISD::TRN1, dl, VT, OpLHS, OpRHS);
case OP_VTRNR:
- return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
- OpRHS);
+ return DAG.getNode(AArch64ISD::TRN2, dl, VT, OpLHS, OpRHS);
}
}
@@ -11465,8 +11625,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG);
// Convert shuffles that are directly supported on NEON to target-specific
@@ -11639,8 +11798,7 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return LowerToScalableOp(Op, DAG);
assert(VT.isScalableVector() && VT.getVectorElementType() == MVT::i1 &&
@@ -11762,7 +11920,7 @@ static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const SDValue *LHS = nullptr) {
EVT VT = Op.getValueType();
if (VT.isFixedLengthVector() &&
- DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE())
+ !DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable())
return SDValue();
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
@@ -11793,7 +11951,8 @@ static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
SDValue Mov;
if (LHS)
- Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
+ Mov = DAG.getNode(NewOp, dl, MovTy,
+ DAG.getNode(AArch64ISD::NVCAST, dl, MovTy, *LHS),
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
else
@@ -11814,7 +11973,7 @@ static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
const SDValue *LHS = nullptr) {
EVT VT = Op.getValueType();
if (VT.isFixedLengthVector() &&
- DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE())
+ !DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable())
return SDValue();
if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
@@ -11837,7 +11996,8 @@ static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
SDValue Mov;
if (LHS)
- Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
+ Mov = DAG.getNode(NewOp, dl, MovTy,
+ DAG.getNode(AArch64ISD::NVCAST, dl, MovTy, *LHS),
DAG.getConstant(Value, dl, MVT::i32),
DAG.getConstant(Shift, dl, MVT::i32));
else
@@ -12046,7 +12206,7 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerToScalableOp(Op, DAG);
// Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
@@ -12166,8 +12326,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE())) {
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
if (auto SeqInfo = cast<BuildVectorSDNode>(Op)->isConstantSequence()) {
SDLoc DL(Op);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
@@ -12188,20 +12347,22 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (Op.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();
- if (VT.isInteger()) {
- // Certain vector constants, used to express things like logical NOT and
- // arithmetic NEG, are passed through unmodified. This allows special
- // patterns for these operations to match, which will lower these constants
- // to whatever is proven necessary.
- BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
- if (BVN->isConstant())
- if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
- unsigned BitSize = VT.getVectorElementType().getSizeInBits();
- APInt Val(BitSize,
- Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
- if (Val.isZero() || Val.isAllOnes())
- return Op;
- }
+ // Certain vector constants, used to express things like logical NOT and
+ // arithmetic NEG, are passed through unmodified. This allows special
+ // patterns for these operations to match, which will lower these constants
+ // to whatever is proven necessary.
+ BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
+ if (BVN->isConstant()) {
+ if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
+ unsigned BitSize = VT.getVectorElementType().getSizeInBits();
+ APInt Val(BitSize,
+ Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
+ if (Val.isZero() || (VT.isInteger() && Val.isAllOnes()))
+ return Op;
+ }
+ if (ConstantFPSDNode *Const = BVN->getConstantFPSplatNode())
+ if (Const->isZero() && !Const->isNegative())
+ return Op;
}
if (SDValue V = ConstantBuildVector(Op, DAG))
@@ -12231,6 +12392,9 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
unsigned NumUndefLanes = 0;
SDValue Value;
SDValue ConstantValue;
+ SmallMapVector<SDValue, unsigned, 16> DifferentValueMap;
+ unsigned ConsecutiveValCount = 0;
+ SDValue PrevVal;
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
@@ -12258,6 +12422,24 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
usesOnlyOneValue = false;
++NumDifferentLanes;
}
+
+ if (PrevVal != V) {
+ ConsecutiveValCount = 0;
+ PrevVal = V;
+ }
+
+ // Keep different values and its last consecutive count. For example,
+ //
+ // t22: v16i8 = build_vector t23, t23, t23, t23, t23, t23, t23, t23,
+ // t24, t24, t24, t24, t24, t24, t24, t24
+ // t23 = consecutive count 8
+ // t24 = consecutive count 8
+ // ------------------------------------------------------------------
+ // t22: v16i8 = build_vector t24, t24, t23, t23, t23, t23, t23, t24,
+ // t24, t24, t24, t24, t24, t24, t24, t24
+ // t23 = consecutive count 5
+ // t24 = consecutive count 9
+ DifferentValueMap[V] = ++ConsecutiveValCount;
}
if (!Value.getNode()) {
@@ -12284,8 +12466,11 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
for (unsigned i = 0; i < NumElts; ++i) {
SDValue V = Op.getOperand(i);
const SDNode *N = V.getNode();
- if (!isa<ConstantSDNode>(N->getOperand(1)))
+ if (!isa<ConstantSDNode>(N->getOperand(1))) {
+ Even = false;
+ Odd = false;
break;
+ }
SDValue N0 = N->getOperand(0);
// All elements are extracted from the same vector.
@@ -12398,12 +12583,17 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
// for each lane.
if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
// Firstly, try to materialize the splat constant.
- SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
- Val = ConstantBuildVector(Vec, DAG);
- if (!Val) {
- // Otherwise, materialize the constant and splat it.
- Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
- DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
+ SDValue Val = DAG.getSplatBuildVector(VT, dl, ConstantValue);
+ unsigned BitSize = VT.getScalarSizeInBits();
+ APInt ConstantValueAPInt(1, 0);
+ if (auto *C = dyn_cast<ConstantSDNode>(ConstantValue))
+ ConstantValueAPInt = C->getAPIntValue().zextOrTrunc(BitSize);
+ if (!isNullConstant(ConstantValue) && !isNullFPConstant(ConstantValue) &&
+ !ConstantValueAPInt.isAllOnes()) {
+ Val = ConstantBuildVector(Val, DAG);
+ if (!Val)
+ // Otherwise, materialize the constant and splat it.
+ Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
}
// Now insert the non-constant lanes.
@@ -12434,8 +12624,11 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
// Empirical tests suggest this is rarely worth it for vectors of length <= 2.
if (NumElts >= 4) {
- if (SDValue shuffle = ReconstructShuffle(Op, DAG))
- return shuffle;
+ if (SDValue Shuffle = ReconstructShuffle(Op, DAG))
+ return Shuffle;
+
+ if (SDValue Shuffle = ReconstructShuffleWithRuntimeMask(Op, DAG))
+ return Shuffle;
}
if (PreferDUPAndInsert) {
@@ -12452,6 +12645,82 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
return NewVector;
}
+ // If vector consists of two different values, try to generate two DUPs and
+ // (CONCAT_VECTORS or VECTOR_SHUFFLE).
+ if (DifferentValueMap.size() == 2 && NumUndefLanes == 0) {
+ SmallVector<SDValue, 2> Vals;
+ // Check the consecutive count of the value is the half number of vector
+ // elements. In this case, we can use CONCAT_VECTORS. For example,
+ //
+ // canUseVECTOR_CONCAT = true;
+ // t22: v16i8 = build_vector t23, t23, t23, t23, t23, t23, t23, t23,
+ // t24, t24, t24, t24, t24, t24, t24, t24
+ //
+ // canUseVECTOR_CONCAT = false;
+ // t22: v16i8 = build_vector t23, t23, t23, t23, t23, t24, t24, t24,
+ // t24, t24, t24, t24, t24, t24, t24, t24
+ bool canUseVECTOR_CONCAT = true;
+ for (auto Pair : DifferentValueMap) {
+ // Check different values have same length which is NumElts / 2.
+ if (Pair.second != NumElts / 2)
+ canUseVECTOR_CONCAT = false;
+ Vals.push_back(Pair.first);
+ }
+
+ // If canUseVECTOR_CONCAT is true, we can generate two DUPs and
+ // CONCAT_VECTORs. For example,
+ //
+ // t22: v16i8 = BUILD_VECTOR t23, t23, t23, t23, t23, t23, t23, t23,
+ // t24, t24, t24, t24, t24, t24, t24, t24
+ // ==>
+ // t26: v8i8 = AArch64ISD::DUP t23
+ // t28: v8i8 = AArch64ISD::DUP t24
+ // t29: v16i8 = concat_vectors t26, t28
+ if (canUseVECTOR_CONCAT) {
+ EVT SubVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
+ if (isTypeLegal(SubVT) && SubVT.isVector() &&
+ SubVT.getVectorNumElements() >= 2) {
+ SmallVector<SDValue, 8> Ops1(NumElts / 2, Vals[0]);
+ SmallVector<SDValue, 8> Ops2(NumElts / 2, Vals[1]);
+ SDValue DUP1 =
+ LowerBUILD_VECTOR(DAG.getBuildVector(SubVT, dl, Ops1), DAG);
+ SDValue DUP2 =
+ LowerBUILD_VECTOR(DAG.getBuildVector(SubVT, dl, Ops2), DAG);
+ SDValue CONCAT_VECTORS =
+ DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DUP1, DUP2);
+ return CONCAT_VECTORS;
+ }
+ }
+
+ // Let's try to generate VECTOR_SHUFFLE. For example,
+ //
+ // t24: v8i8 = BUILD_VECTOR t25, t25, t25, t25, t26, t26, t26, t26
+ // ==>
+ // t27: v8i8 = BUILD_VECTOR t26, t26, t26, t26, t26, t26, t26, t26
+ // t28: v8i8 = BUILD_VECTOR t25, t25, t25, t25, t25, t25, t25, t25
+ // t29: v8i8 = vector_shuffle<0,1,2,3,12,13,14,15> t27, t28
+ if (NumElts >= 8) {
+ SmallVector<int, 16> MaskVec;
+ // Build mask for VECTOR_SHUFLLE.
+ SDValue FirstLaneVal = Op.getOperand(0);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Val = Op.getOperand(i);
+ if (FirstLaneVal == Val)
+ MaskVec.push_back(i);
+ else
+ MaskVec.push_back(i + NumElts);
+ }
+
+ SmallVector<SDValue, 8> Ops1(NumElts, Vals[0]);
+ SmallVector<SDValue, 8> Ops2(NumElts, Vals[1]);
+ SDValue VEC1 = DAG.getBuildVector(VT, dl, Ops1);
+ SDValue VEC2 = DAG.getBuildVector(VT, dl, Ops2);
+ SDValue VECTOR_SHUFFLE =
+ DAG.getVectorShuffle(VT, dl, VEC1, VEC2, MaskVec);
+ return VECTOR_SHUFFLE;
+ }
+ }
+
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
// know the default expansion would otherwise fall back on something even
// worse. For a vector with one or two non-undef values, that's
@@ -12503,7 +12772,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
assert(Op.getValueType().isScalableVector() &&
@@ -12542,10 +12811,9 @@ SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerFixedLengthInsertVectorElt(Op, DAG);
- // Check for non-constant or out of range lane.
EVT VT = Op.getOperand(0).getValueType();
if (VT.getScalarType() == MVT::i1) {
@@ -12564,31 +12832,12 @@ SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return DAG.getAnyExtOrTrunc(ExtendedVector, DL, VT);
}
+ // Check for non-constant or out of range lane.
ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
return SDValue();
- // Insertion/extraction are legal for V128 types.
- if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
- VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
- VT == MVT::v8f16 || VT == MVT::v8bf16)
- return Op;
-
- if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
- VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
- VT != MVT::v4bf16)
- return SDValue();
-
- // For V64 types, we perform insertion by expanding the value
- // to a V128 type and perform the insertion on that.
- SDLoc DL(Op);
- SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
- EVT WideTy = WideVec.getValueType();
-
- SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
- Op.getOperand(1), Op.getOperand(2));
- // Re-narrow the resultant vector.
- return NarrowVector(Node, DAG);
+ return Op;
}
SDValue
@@ -12610,8 +12859,7 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
}
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return LowerFixedLengthExtractVectorElt(Op, DAG);
// Check for non-constant or out of range lane.
@@ -12673,11 +12921,10 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
// If this is extracting the upper 64-bits of a 128-bit vector, we match
// that directly.
if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
- InVT.getSizeInBits() == 128 && !Subtarget->forceStreamingCompatibleSVE())
+ InVT.getSizeInBits() == 128 && Subtarget->isNeonAvailable())
return Op;
- if (useSVEForFixedLengthVectorVT(InVT,
- Subtarget->forceStreamingCompatibleSVE())) {
+ if (useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable())) {
SDLoc DL(Op);
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
@@ -12865,8 +13112,7 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
// Currently no fixed length shuffles that require SVE are legal.
- if (useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()))
+ if (useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return false;
if (VT.getVectorNumElements() == 4 &&
@@ -12959,7 +13205,7 @@ SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
return SDValue();
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
return SDValue();
@@ -12978,8 +13224,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
switch (Op.getOpcode()) {
case ISD::SHL:
if (VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(VT,
- Subtarget->forceStreamingCompatibleSVE()))
+ useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
@@ -12992,8 +13237,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
case ISD::SRA:
case ISD::SRL:
if (VT.isScalableVector() ||
- useSVEForFixedLengthVectorVT(
- VT, Subtarget->forceStreamingCompatibleSVE())) {
+ useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable())) {
unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
: AArch64ISD::SRL_PRED;
return LowerToPredicatedOp(Op, DAG, Opc);
@@ -13131,7 +13375,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType(),
- Subtarget->forceStreamingCompatibleSVE()))
+ !Subtarget->isNeonAvailable()))
return LowerFixedLengthVectorSetccToSVE(Op, DAG);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
@@ -13203,13 +13447,113 @@ static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
DAG.getConstant(0, DL, MVT::i64));
}
+static SDValue getVectorBitwiseReduce(unsigned Opcode, SDValue Vec, EVT VT,
+ SDLoc DL, SelectionDAG &DAG) {
+ unsigned ScalarOpcode;
+ switch (Opcode) {
+ case ISD::VECREDUCE_AND:
+ ScalarOpcode = ISD::AND;
+ break;
+ case ISD::VECREDUCE_OR:
+ ScalarOpcode = ISD::OR;
+ break;
+ case ISD::VECREDUCE_XOR:
+ ScalarOpcode = ISD::XOR;
+ break;
+ default:
+ llvm_unreachable("Expected bitwise vector reduction");
+ return SDValue();
+ }
+
+ EVT VecVT = Vec.getValueType();
+ assert(VecVT.isFixedLengthVector() && VecVT.isPow2VectorType() &&
+ "Expected power-of-2 length vector");
+
+ EVT ElemVT = VecVT.getVectorElementType();
+
+ SDValue Result;
+ unsigned NumElems = VecVT.getVectorNumElements();
+
+ // Special case for boolean reductions
+ if (ElemVT == MVT::i1) {
+ // Split large vectors into smaller ones
+ if (NumElems > 16) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
+ EVT HalfVT = Lo.getValueType();
+ SDValue HalfVec = DAG.getNode(ScalarOpcode, DL, HalfVT, Lo, Hi);
+ return getVectorBitwiseReduce(Opcode, HalfVec, VT, DL, DAG);
+ }
+
+ // Vectors that are less than 64 bits get widened to neatly fit a 64 bit
+ // register, so e.g. <4 x i1> gets lowered to <4 x i16>. Sign extending to
+ // this element size leads to the best codegen, since e.g. setcc results
+ // might need to be truncated otherwise.
+ EVT ExtendedVT = MVT::getIntegerVT(std::max(64u / NumElems, 8u));
+
+ // any_ext doesn't work with umin/umax, so only use it for uadd.
+ unsigned ExtendOp =
+ ScalarOpcode == ISD::XOR ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
+ SDValue Extended = DAG.getNode(
+ ExtendOp, DL, VecVT.changeVectorElementType(ExtendedVT), Vec);
+ switch (ScalarOpcode) {
+ case ISD::AND:
+ Result = DAG.getNode(ISD::VECREDUCE_UMIN, DL, ExtendedVT, Extended);
+ break;
+ case ISD::OR:
+ Result = DAG.getNode(ISD::VECREDUCE_UMAX, DL, ExtendedVT, Extended);
+ break;
+ case ISD::XOR:
+ Result = DAG.getNode(ISD::VECREDUCE_ADD, DL, ExtendedVT, Extended);
+ break;
+ default:
+ llvm_unreachable("Unexpected Opcode");
+ }
+
+ Result = DAG.getAnyExtOrTrunc(Result, DL, MVT::i1);
+ } else {
+ // Iteratively split the vector in half and combine using the bitwise
+ // operation until it fits in a 64 bit register.
+ while (VecVT.getSizeInBits() > 64) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
+ VecVT = Lo.getValueType();
+ NumElems = VecVT.getVectorNumElements();
+ Vec = DAG.getNode(ScalarOpcode, DL, VecVT, Lo, Hi);
+ }
+
+ EVT ScalarVT = EVT::getIntegerVT(*DAG.getContext(), VecVT.getSizeInBits());
+
+ // Do the remaining work on a scalar since it allows the code generator to
+ // combine the shift and bitwise operation into one instruction and since
+ // integer instructions can have higher throughput than vector instructions.
+ SDValue Scalar = DAG.getBitcast(ScalarVT, Vec);
+
+ // Iteratively combine the lower and upper halves of the scalar using the
+ // bitwise operation, halving the relevant region of the scalar in each
+ // iteration, until the relevant region is just one element of the original
+ // vector.
+ for (unsigned Shift = NumElems / 2; Shift > 0; Shift /= 2) {
+ SDValue ShiftAmount =
+ DAG.getConstant(Shift * ElemVT.getSizeInBits(), DL, MVT::i64);
+ SDValue Shifted =
+ DAG.getNode(ISD::SRL, DL, ScalarVT, Scalar, ShiftAmount);
+ Scalar = DAG.getNode(ScalarOpcode, DL, ScalarVT, Scalar, Shifted);
+ }
+
+ Result = DAG.getAnyExtOrTrunc(Scalar, DL, ElemVT);
+ }
+
+ return DAG.getAnyExtOrTrunc(Result, DL, VT);
+}
+
SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
SelectionDAG &DAG) const {
SDValue Src = Op.getOperand(0);
// Try to lower fixed length reductions to SVE.
EVT SrcVT = Src.getValueType();
- bool OverrideNEON = Subtarget->forceStreamingCompatibleSVE() ||
+ bool OverrideNEON = !Subtarget->isNeonAvailable() ||
Op.getOpcode() == ISD::VECREDUCE_AND ||
Op.getOpcode() == ISD::VECREDUCE_OR ||
Op.getOpcode() == ISD::VECREDUCE_XOR ||
@@ -13246,6 +13590,10 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
case ISD::VECREDUCE_FMIN:
return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
+ case ISD::VECREDUCE_FMAXIMUM:
+ return LowerReductionToSVE(AArch64ISD::FMAXV_PRED, Op, DAG);
+ case ISD::VECREDUCE_FMINIMUM:
+ return LowerReductionToSVE(AArch64ISD::FMINV_PRED, Op, DAG);
default:
llvm_unreachable("Unhandled fixed length reduction");
}
@@ -13254,6 +13602,11 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
// Lower NEON reductions.
SDLoc dl(Op);
switch (Op.getOpcode()) {
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ return getVectorBitwiseReduce(Op.getOpcode(), Op.getOperand(0),
+ Op.getValueType(), dl, DAG);
case ISD::VECREDUCE_ADD:
return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
case ISD::VECREDUCE_SMAX:
@@ -13264,18 +13617,6 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
case ISD::VECREDUCE_UMIN:
return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
- case ISD::VECREDUCE_FMAX: {
- return DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
- DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
- Src);
- }
- case ISD::VECREDUCE_FMIN: {
- return DAG.getNode(
- ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
- DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
- Src);
- }
default:
llvm_unreachable("Unhandled reduction");
}
@@ -13301,12 +13642,14 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
+ // No point replacing if we don't have the relevant instruction/libcall anyway
if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
// LSE has an atomic load-clear instruction, but not a load-and.
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
+ assert(VT != MVT::i128 && "Handled elsewhere, code replicated.");
SDValue RHS = Op.getOperand(2);
AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
@@ -13344,6 +13687,57 @@ SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
return Chain;
}
+// When x and y are extended, lower:
+// avgfloor(x, y) -> (x + y) >> 1
+// avgceil(x, y) -> (x + y + 1) >> 1
+
+// Otherwise, lower to:
+// avgfloor(x, y) -> (x >> 1) + (y >> 1) + (x & y & 1)
+// avgceil(x, y) -> (x >> 1) + (y >> 1) + ((x || y) & 1)
+SDValue AArch64TargetLowering::LowerAVG(SDValue Op, SelectionDAG &DAG,
+ unsigned NewOp) const {
+ if (Subtarget->hasSVE2())
+ return LowerToPredicatedOp(Op, DAG, NewOp);
+
+ SDLoc dl(Op);
+ SDValue OpA = Op->getOperand(0);
+ SDValue OpB = Op->getOperand(1);
+ EVT VT = Op.getValueType();
+ bool IsCeil =
+ (Op->getOpcode() == ISD::AVGCEILS || Op->getOpcode() == ISD::AVGCEILU);
+ bool IsSigned =
+ (Op->getOpcode() == ISD::AVGFLOORS || Op->getOpcode() == ISD::AVGCEILS);
+ unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL;
+
+ assert(VT.isScalableVector() && "Only expect to lower scalable vector op!");
+
+ auto IsZeroExtended = [&DAG](SDValue &Node) {
+ KnownBits Known = DAG.computeKnownBits(Node, 0);
+ return Known.Zero.isSignBitSet();
+ };
+
+ auto IsSignExtended = [&DAG](SDValue &Node) {
+ return (DAG.ComputeNumSignBits(Node, 0) > 1);
+ };
+
+ SDValue ConstantOne = DAG.getConstant(1, dl, VT);
+ if ((!IsSigned && IsZeroExtended(OpA) && IsZeroExtended(OpB)) ||
+ (IsSigned && IsSignExtended(OpA) && IsSignExtended(OpB))) {
+ SDValue Add = DAG.getNode(ISD::ADD, dl, VT, OpA, OpB);
+ if (IsCeil)
+ Add = DAG.getNode(ISD::ADD, dl, VT, Add, ConstantOne);
+ return DAG.getNode(ShiftOpc, dl, VT, Add, ConstantOne);
+ }
+
+ SDValue ShiftOpA = DAG.getNode(ShiftOpc, dl, VT, OpA, ConstantOne);
+ SDValue ShiftOpB = DAG.getNode(ShiftOpc, dl, VT, OpB, ConstantOne);
+
+ SDValue tmp = DAG.getNode(IsCeil ? ISD::OR : ISD::AND, dl, VT, OpA, OpB);
+ tmp = DAG.getNode(ISD::AND, dl, VT, tmp, ConstantOne);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, VT, ShiftOpA, ShiftOpB);
+ return DAG.getNode(ISD::ADD, dl, VT, Add, tmp);
+}
+
SDValue
AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
@@ -13604,6 +13998,22 @@ bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
return true;
}
+// Treat a sext_inreg(extract(..)) as free if it has multiple uses.
+bool AArch64TargetLowering::shouldRemoveRedundantExtend(SDValue Extend) const {
+ EVT VT = Extend.getValueType();
+ if ((VT == MVT::i64 || VT == MVT::i32) && Extend->use_size()) {
+ SDValue Extract = Extend.getOperand(0);
+ if (Extract.getOpcode() == ISD::ANY_EXTEND && Extract.hasOneUse())
+ Extract = Extract.getOperand(0);
+ if (Extract.getOpcode() == ISD::EXTRACT_VECTOR_ELT && Extract.hasOneUse()) {
+ EVT VecVT = Extract.getOperand(0).getValueType();
+ if (VecVT.getScalarType() == MVT::i8 || VecVT.getScalarType() == MVT::i16)
+ return false;
+ }
+ }
+ return true;
+}
+
// Truncations from 64-bit GPR to 32-bit GPR is free.
bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
@@ -13709,8 +14119,10 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
// 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
// Get the shift amount based on the scaling factor:
// log2(sizeof(IdxTy)) - log2(8).
+ if (IdxTy->isScalableTy())
+ return false;
uint64_t ShiftAmt =
- countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedValue()) -
+ llvm::countr_zero(DL.getTypeStoreSizeInBits(IdxTy).getFixedValue()) -
3;
// Is the constant foldable in the shift of the addressing mode?
// I.e., shift amount is between 1 and 4 inclusive.
@@ -13948,6 +14360,43 @@ bool AArch64TargetLowering::shouldSinkOperands(
return true;
}
+ case Instruction::Or: {
+ // Pattern: Or(And(MaskValue, A), And(Not(MaskValue), B)) ->
+ // bitselect(MaskValue, A, B) where Not(MaskValue) = Xor(MaskValue, -1)
+ if (Subtarget->hasNEON()) {
+ Instruction *OtherAnd, *IA, *IB;
+ Value *MaskValue;
+ // MainAnd refers to And instruction that has 'Not' as one of its operands
+ if (match(I, m_c_Or(m_OneUse(m_Instruction(OtherAnd)),
+ m_OneUse(m_c_And(m_OneUse(m_Not(m_Value(MaskValue))),
+ m_Instruction(IA)))))) {
+ if (match(OtherAnd,
+ m_c_And(m_Specific(MaskValue), m_Instruction(IB)))) {
+ Instruction *MainAnd = I->getOperand(0) == OtherAnd
+ ? cast<Instruction>(I->getOperand(1))
+ : cast<Instruction>(I->getOperand(0));
+
+ // Both Ands should be in same basic block as Or
+ if (I->getParent() != MainAnd->getParent() ||
+ I->getParent() != OtherAnd->getParent())
+ return false;
+
+ // Non-mask operands of both Ands should also be in same basic block
+ if (I->getParent() != IA->getParent() ||
+ I->getParent() != IB->getParent())
+ return false;
+
+ Ops.push_back(&MainAnd->getOperandUse(MainAnd->getOperand(0) == IA ? 1 : 0));
+ Ops.push_back(&I->getOperandUse(0));
+ Ops.push_back(&I->getOperandUse(1));
+
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
case Instruction::Mul: {
int NumZExts = 0, NumSExts = 0;
for (auto &Op : I->operands()) {
@@ -13994,7 +14443,7 @@ bool AArch64TargetLowering::shouldSinkOperands(
ConstantInt *ElementConstant =
dyn_cast<ConstantInt>(Insert->getOperand(2));
// Check that the insertelement is inserting into element 0
- if (!ElementConstant || ElementConstant->getZExtValue() != 0)
+ if (!ElementConstant || !ElementConstant->isZero())
continue;
unsigned Opcode = OperandInstr->getOpcode();
@@ -14026,12 +14475,15 @@ bool AArch64TargetLowering::shouldSinkOperands(
return false;
}
-static void createTblShuffleForZExt(ZExtInst *ZExt, bool IsLittleEndian) {
+static bool createTblShuffleForZExt(ZExtInst *ZExt, FixedVectorType *DstTy,
+ bool IsLittleEndian) {
Value *Op = ZExt->getOperand(0);
auto *SrcTy = cast<FixedVectorType>(Op->getType());
- auto *DstTy = cast<FixedVectorType>(ZExt->getType());
auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
+ if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth >= 64)
+ return false;
+
assert(DstWidth % SrcWidth == 0 &&
"TBL lowering is not supported for a ZExt instruction with this "
"source & destination element type.");
@@ -14060,8 +14512,11 @@ static void createTblShuffleForZExt(ZExtInst *ZExt, bool IsLittleEndian) {
PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0));
Value *Result = Builder.CreateShuffleVector(Op, FirstEltZero, Mask);
Result = Builder.CreateBitCast(Result, DstTy);
+ if (DstTy != ZExt->getType())
+ Result = Builder.CreateZExt(Result, ZExt->getType());
ZExt->replaceAllUsesWith(Result);
ZExt->eraseFromParent();
+ return true;
}
static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian) {
@@ -14183,8 +14638,8 @@ static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian) {
TI->eraseFromParent();
}
-bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(Instruction *I,
- Loop *L) const {
+bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
+ Instruction *I, Loop *L, const TargetTransformInfo &TTI) const {
// shuffle_vector instructions are serialized when targeting SVE,
// see LowerSPLAT_VECTOR. This peephole is not beneficial.
if (Subtarget->useSVEForFixedLengthVectors())
@@ -14209,11 +14664,25 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(Instruction *I,
// into i8x lanes. This is enabled for cases where it is beneficial.
auto *ZExt = dyn_cast<ZExtInst>(I);
if (ZExt && SrcTy->getElementType()->isIntegerTy(8)) {
- auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
- if (DstWidth % 8 == 0 && DstWidth > 16 && DstWidth < 64) {
- createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
- return true;
+ auto DstWidth = DstTy->getElementType()->getScalarSizeInBits();
+ if (DstWidth % 8 != 0)
+ return false;
+
+ auto *TruncDstType =
+ cast<FixedVectorType>(VectorType::getTruncatedElementVectorType(DstTy));
+ // If the ZExt can be lowered to a single ZExt to the next power-of-2 and
+ // the remaining ZExt folded into the user, don't use tbl lowering.
+ auto SrcWidth = SrcTy->getElementType()->getScalarSizeInBits();
+ if (TTI.getCastInstrCost(I->getOpcode(), DstTy, TruncDstType,
+ TargetTransformInfo::getCastContextHint(I),
+ TTI::TCK_SizeAndLatency, I) == TTI::TCC_Free) {
+ if (SrcWidth * 2 >= TruncDstType->getElementType()->getScalarSizeInBits())
+ return false;
+
+ DstTy = TruncDstType;
}
+
+ return createTblShuffleForZExt(ZExt, DstTy, Subtarget->isLittleEndian());
}
auto *UIToFP = dyn_cast<UIToFPInst>(I);
@@ -14225,8 +14694,8 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(Instruction *I,
auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
I->replaceAllUsesWith(UI);
I->eraseFromParent();
- createTblShuffleForZExt(ZExt, Subtarget->isLittleEndian());
- return true;
+ return createTblShuffleForZExt(ZExt, cast<FixedVectorType>(ZExt->getType()),
+ Subtarget->isLittleEndian());
}
// Convert 'fptoui <(8|16) x float> to <(8|16) x i8>' to a wide fptoui
@@ -14278,9 +14747,11 @@ bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
unsigned AArch64TargetLowering::getNumInterleavedAccesses(
VectorType *VecTy, const DataLayout &DL, bool UseScalable) const {
unsigned VecSize = 128;
+ unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
+ unsigned MinElts = VecTy->getElementCount().getKnownMinValue();
if (UseScalable)
VecSize = std::max(Subtarget->getMinSVEVectorSizeInBits(), 128u);
- return std::max<unsigned>(1, (DL.getTypeSizeInBits(VecTy) + 127) / VecSize);
+ return std::max<unsigned>(1, (MinElts * ElSize + 127) / VecSize);
}
MachineMemOperand::Flags
@@ -14293,30 +14764,41 @@ AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
bool AArch64TargetLowering::isLegalInterleavedAccessType(
VectorType *VecTy, const DataLayout &DL, bool &UseScalable) const {
-
- unsigned VecSize = DL.getTypeSizeInBits(VecTy);
unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
- unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
+ auto EC = VecTy->getElementCount();
+ unsigned MinElts = EC.getKnownMinValue();
UseScalable = false;
+ if (!VecTy->isScalableTy() && !Subtarget->hasNEON())
+ return false;
+
+ if (VecTy->isScalableTy() && !Subtarget->hasSVEorSME())
+ return false;
+
// Ensure that the predicate for this number of elements is available.
- if (Subtarget->hasSVE() && !getSVEPredPatternFromNumElements(NumElements))
+ if (Subtarget->hasSVE() && !getSVEPredPatternFromNumElements(MinElts))
return false;
// Ensure the number of vector elements is greater than 1.
- if (NumElements < 2)
+ if (MinElts < 2)
return false;
// Ensure the element type is legal.
if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
return false;
- if (Subtarget->forceStreamingCompatibleSVE() ||
+ if (EC.isScalable()) {
+ UseScalable = true;
+ return isPowerOf2_32(MinElts) && (MinElts * ElSize) % 128 == 0;
+ }
+
+ unsigned VecSize = DL.getTypeSizeInBits(VecTy);
+ if (!Subtarget->isNeonAvailable() ||
(Subtarget->useSVEForFixedLengthVectors() &&
(VecSize % Subtarget->getMinSVEVectorSizeInBits() == 0 ||
(VecSize < Subtarget->getMinSVEVectorSizeInBits() &&
- isPowerOf2_32(NumElements) && VecSize > 128)))) {
+ isPowerOf2_32(MinElts) && VecSize > 128)))) {
UseScalable = true;
return true;
}
@@ -14354,6 +14836,38 @@ static ScalableVectorType *getSVEContainerIRType(FixedVectorType *VTy) {
llvm_unreachable("Cannot handle input vector type");
}
+static Function *getStructuredLoadFunction(Module *M, unsigned Factor,
+ bool Scalable, Type *LDVTy,
+ Type *PtrTy) {
+ assert(Factor >= 2 && Factor <= 4 && "Invalid interleave factor");
+ static const Intrinsic::ID SVELoads[3] = {Intrinsic::aarch64_sve_ld2_sret,
+ Intrinsic::aarch64_sve_ld3_sret,
+ Intrinsic::aarch64_sve_ld4_sret};
+ static const Intrinsic::ID NEONLoads[3] = {Intrinsic::aarch64_neon_ld2,
+ Intrinsic::aarch64_neon_ld3,
+ Intrinsic::aarch64_neon_ld4};
+ if (Scalable)
+ return Intrinsic::getDeclaration(M, SVELoads[Factor - 2], {LDVTy});
+
+ return Intrinsic::getDeclaration(M, NEONLoads[Factor - 2], {LDVTy, PtrTy});
+}
+
+static Function *getStructuredStoreFunction(Module *M, unsigned Factor,
+ bool Scalable, Type *STVTy,
+ Type *PtrTy) {
+ assert(Factor >= 2 && Factor <= 4 && "Invalid interleave factor");
+ static const Intrinsic::ID SVEStores[3] = {Intrinsic::aarch64_sve_st2,
+ Intrinsic::aarch64_sve_st3,
+ Intrinsic::aarch64_sve_st4};
+ static const Intrinsic::ID NEONStores[3] = {Intrinsic::aarch64_neon_st2,
+ Intrinsic::aarch64_neon_st3,
+ Intrinsic::aarch64_neon_st4};
+ if (Scalable)
+ return Intrinsic::getDeclaration(M, SVEStores[Factor - 2], {STVTy});
+
+ return Intrinsic::getDeclaration(M, NEONStores[Factor - 2], {STVTy, PtrTy});
+}
+
/// Lower an interleaved load into a ldN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -14419,26 +14933,12 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
}
- Type *PtrTy =
- UseScalable
- ? LDVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace())
- : LDVTy->getPointerTo(LI->getPointerAddressSpace());
+ Type *PtrTy = LI->getPointerOperandType();
Type *PredTy = VectorType::get(Type::getInt1Ty(LDVTy->getContext()),
LDVTy->getElementCount());
- static const Intrinsic::ID SVELoadIntrs[3] = {
- Intrinsic::aarch64_sve_ld2_sret, Intrinsic::aarch64_sve_ld3_sret,
- Intrinsic::aarch64_sve_ld4_sret};
- static const Intrinsic::ID NEONLoadIntrs[3] = {Intrinsic::aarch64_neon_ld2,
- Intrinsic::aarch64_neon_ld3,
- Intrinsic::aarch64_neon_ld4};
- Function *LdNFunc;
- if (UseScalable)
- LdNFunc = Intrinsic::getDeclaration(LI->getModule(),
- SVELoadIntrs[Factor - 2], {LDVTy});
- else
- LdNFunc = Intrinsic::getDeclaration(
- LI->getModule(), NEONLoadIntrs[Factor - 2], {LDVTy, PtrTy});
+ Function *LdNFunc = getStructuredLoadFunction(LI->getModule(), Factor,
+ UseScalable, LDVTy, PtrTy);
// Holds sub-vectors extracted from the load intrinsic return values. The
// sub-vectors are associated with the shufflevector instructions they will
@@ -14541,10 +15041,6 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
ShuffleVectorInst *SVI,
unsigned Factor) const {
- // Skip if streaming compatible SVE is enabled, because it generates invalid
- // code in streaming mode when SVE length is not specified.
- if (Subtarget->forceStreamingCompatibleSVE())
- return false;
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
@@ -14610,32 +15106,22 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
auto Mask = SVI->getShuffleMask();
// Sanity check if all the indices are NOT in range.
- // If mask is `undef` or `poison`, `Mask` may be a vector of -1s.
- // If all of them are `undef`, OOB read will happen later.
- if (llvm::all_of(Mask, [](int Idx) { return Idx == UndefMaskElem; })) {
+ // If mask is `poison`, `Mask` may be a vector of -1s.
+ // If all of them are `poison`, OOB read will happen later.
+ if (llvm::all_of(Mask, [](int Idx) { return Idx == PoisonMaskElem; })) {
return false;
}
+ // A 64bit st2 which does not start at element 0 will involved adding extra
+ // ext elements, making the st2 unprofitable.
+ if (Factor == 2 && SubVecTy->getPrimitiveSizeInBits() == 64 && Mask[0] != 0)
+ return false;
- Type *PtrTy =
- UseScalable
- ? STVTy->getElementType()->getPointerTo(SI->getPointerAddressSpace())
- : STVTy->getPointerTo(SI->getPointerAddressSpace());
+ Type *PtrTy = SI->getPointerOperandType();
Type *PredTy = VectorType::get(Type::getInt1Ty(STVTy->getContext()),
STVTy->getElementCount());
- static const Intrinsic::ID SVEStoreIntrs[3] = {Intrinsic::aarch64_sve_st2,
- Intrinsic::aarch64_sve_st3,
- Intrinsic::aarch64_sve_st4};
- static const Intrinsic::ID NEONStoreIntrs[3] = {Intrinsic::aarch64_neon_st2,
- Intrinsic::aarch64_neon_st3,
- Intrinsic::aarch64_neon_st4};
- Function *StNFunc;
- if (UseScalable)
- StNFunc = Intrinsic::getDeclaration(SI->getModule(),
- SVEStoreIntrs[Factor - 2], {STVTy});
- else
- StNFunc = Intrinsic::getDeclaration(
- SI->getModule(), NEONStoreIntrs[Factor - 2], {STVTy, PtrTy});
+ Function *StNFunc = getStructuredStoreFunction(SI->getModule(), Factor,
+ UseScalable, STVTy, PtrTy);
Value *PTrue = nullptr;
if (UseScalable) {
@@ -14705,6 +15191,144 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
return true;
}
+bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
+ IntrinsicInst *DI, LoadInst *LI) const {
+ // Only deinterleave2 supported at present.
+ if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
+ return false;
+
+ // Only a factor of 2 supported at present.
+ const unsigned Factor = 2;
+
+ VectorType *VTy = cast<VectorType>(DI->getType()->getContainedType(0));
+ const DataLayout &DL = DI->getModule()->getDataLayout();
+ bool UseScalable;
+ if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
+ return false;
+
+ // TODO: Add support for using SVE instructions with fixed types later, using
+ // the code from lowerInterleavedLoad to obtain the correct container type.
+ if (UseScalable && !VTy->isScalableTy())
+ return false;
+
+ unsigned NumLoads = getNumInterleavedAccesses(VTy, DL, UseScalable);
+
+ VectorType *LdTy =
+ VectorType::get(VTy->getElementType(),
+ VTy->getElementCount().divideCoefficientBy(NumLoads));
+
+ Type *PtrTy = LI->getPointerOperandType();
+ Function *LdNFunc = getStructuredLoadFunction(DI->getModule(), Factor,
+ UseScalable, LdTy, PtrTy);
+
+ IRBuilder<> Builder(LI);
+
+ Value *Pred = nullptr;
+ if (UseScalable)
+ Pred =
+ Builder.CreateVectorSplat(LdTy->getElementCount(), Builder.getTrue());
+
+ Value *BaseAddr = LI->getPointerOperand();
+ Value *Result;
+ if (NumLoads > 1) {
+ Value *Left = PoisonValue::get(VTy);
+ Value *Right = PoisonValue::get(VTy);
+
+ for (unsigned I = 0; I < NumLoads; ++I) {
+ Value *Offset = Builder.getInt64(I * Factor);
+
+ Value *Address = Builder.CreateGEP(LdTy, BaseAddr, {Offset});
+ Value *LdN = nullptr;
+ if (UseScalable)
+ LdN = Builder.CreateCall(LdNFunc, {Pred, Address}, "ldN");
+ else
+ LdN = Builder.CreateCall(LdNFunc, Address, "ldN");
+
+ Value *Idx =
+ Builder.getInt64(I * LdTy->getElementCount().getKnownMinValue());
+ Left = Builder.CreateInsertVector(
+ VTy, Left, Builder.CreateExtractValue(LdN, 0), Idx);
+ Right = Builder.CreateInsertVector(
+ VTy, Right, Builder.CreateExtractValue(LdN, 1), Idx);
+ }
+
+ Result = PoisonValue::get(DI->getType());
+ Result = Builder.CreateInsertValue(Result, Left, 0);
+ Result = Builder.CreateInsertValue(Result, Right, 1);
+ } else {
+ if (UseScalable)
+ Result = Builder.CreateCall(LdNFunc, {Pred, BaseAddr}, "ldN");
+ else
+ Result = Builder.CreateCall(LdNFunc, BaseAddr, "ldN");
+ }
+
+ DI->replaceAllUsesWith(Result);
+ return true;
+}
+
+bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
+ IntrinsicInst *II, StoreInst *SI) const {
+ // Only interleave2 supported at present.
+ if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
+ return false;
+
+ // Only a factor of 2 supported at present.
+ const unsigned Factor = 2;
+
+ VectorType *VTy = cast<VectorType>(II->getOperand(0)->getType());
+ const DataLayout &DL = II->getModule()->getDataLayout();
+ bool UseScalable;
+ if (!isLegalInterleavedAccessType(VTy, DL, UseScalable))
+ return false;
+
+ // TODO: Add support for using SVE instructions with fixed types later, using
+ // the code from lowerInterleavedStore to obtain the correct container type.
+ if (UseScalable && !VTy->isScalableTy())
+ return false;
+
+ unsigned NumStores = getNumInterleavedAccesses(VTy, DL, UseScalable);
+
+ VectorType *StTy =
+ VectorType::get(VTy->getElementType(),
+ VTy->getElementCount().divideCoefficientBy(NumStores));
+
+ Type *PtrTy = SI->getPointerOperandType();
+ Function *StNFunc = getStructuredStoreFunction(SI->getModule(), Factor,
+ UseScalable, StTy, PtrTy);
+
+ IRBuilder<> Builder(SI);
+
+ Value *BaseAddr = SI->getPointerOperand();
+ Value *Pred = nullptr;
+
+ if (UseScalable)
+ Pred =
+ Builder.CreateVectorSplat(StTy->getElementCount(), Builder.getTrue());
+
+ Value *L = II->getOperand(0);
+ Value *R = II->getOperand(1);
+
+ for (unsigned I = 0; I < NumStores; ++I) {
+ Value *Address = BaseAddr;
+ if (NumStores > 1) {
+ Value *Offset = Builder.getInt64(I * Factor);
+ Address = Builder.CreateGEP(StTy, BaseAddr, {Offset});
+
+ Value *Idx =
+ Builder.getInt64(I * StTy->getElementCount().getKnownMinValue());
+ L = Builder.CreateExtractVector(StTy, II->getOperand(0), Idx);
+ R = Builder.CreateExtractVector(StTy, II->getOperand(1), Idx);
+ }
+
+ if (UseScalable)
+ Builder.CreateCall(StNFunc, {L, R, Pred, Address});
+ else
+ Builder.CreateCall(StNFunc, {L, R, Address});
+ }
+
+ return true;
+}
+
EVT AArch64TargetLowering::getOptimalMemOpType(
const MemOp &Op, const AttributeList &FuncAttributes) const {
bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
@@ -14800,7 +15424,9 @@ bool AArch64TargetLowering::isMulAddWithConstProfitable(
if (!isLegalAddImmediate(C1) || isLegalAddImmediate(C1C2.getSExtValue()))
return true;
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
- AArch64_IMM::expandMOVImm(C1C2.getZExtValue(), VT.getSizeInBits(), Insn);
+ // Adapt to the width of a register.
+ unsigned BitSize = VT.getSizeInBits() <= 32 ? 32 : 64;
+ AArch64_IMM::expandMOVImm(C1C2.getZExtValue(), BitSize, Insn);
if (Insn.size() > 1)
return false;
@@ -14817,7 +15443,7 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
- const AddrMode &AM, Type *Ty,
+ const AddrMode &AMode, Type *Ty,
unsigned AS, Instruction *I) const {
// AArch64 has five basic addressing modes:
// reg
@@ -14827,14 +15453,36 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
// reg + SIZE_IN_BYTES * reg
// No global is ever allowed as a base.
- if (AM.BaseGV)
+ if (AMode.BaseGV)
return false;
// No reg+reg+imm addressing.
- if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
+ if (AMode.HasBaseReg && AMode.BaseOffs && AMode.Scale)
+ return false;
+
+ // Canonicalise `1*ScaledReg + imm` into `BaseReg + imm` and
+ // `2*ScaledReg` into `BaseReg + ScaledReg`
+ AddrMode AM = AMode;
+ if (AM.Scale && !AM.HasBaseReg) {
+ if (AM.Scale == 1) {
+ AM.HasBaseReg = true;
+ AM.Scale = 0;
+ } else if (AM.Scale == 2) {
+ AM.HasBaseReg = true;
+ AM.Scale = 1;
+ } else {
+ return false;
+ }
+ }
+
+ // A base register is required in all addressing modes.
+ if (!AM.HasBaseReg)
return false;
// FIXME: Update this method to support scalable addressing modes.
+ if (Ty->isScalableTargetExtTy())
+ return AM.HasBaseReg && !AM.BaseOffs && !AM.Scale;
+
if (isa<ScalableVectorType>(Ty)) {
uint64_t VecElemNumBytes =
DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
@@ -14926,6 +15574,11 @@ AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
return ScratchRegs;
}
+ArrayRef<MCPhysReg> AArch64TargetLowering::getRoundingControlRegisters() const {
+ static const MCPhysReg RCRegs[] = {AArch64::FPCR};
+ return RCRegs;
+}
+
bool
AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const {
@@ -15003,6 +15656,11 @@ bool AArch64TargetLowering::shouldFoldConstantShiftPairToMask(
return true;
}
+bool AArch64TargetLowering::shouldFoldSelectWithIdentityConstant(
+ unsigned BinOpcode, EVT VT) const {
+ return VT.isScalableVector() && isTypeLegal(VT);
+}
+
bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
Type *Ty) const {
assert(Ty->isIntegerTy());
@@ -15020,8 +15678,7 @@ bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
if (BitSize == 32)
Val &= (1LL << 32) - 1;
- unsigned LZ = countLeadingZeros((uint64_t)Val);
- unsigned Shift = (63 - LZ) / 16;
+ unsigned Shift = llvm::Log2_64((uint64_t)Val) / 16;
// MOVZ is free so return true for one or fewer MOVK.
return Shift < 3;
}
@@ -15154,6 +15811,9 @@ static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
// Turn a v8i8/v16i8 extended vecreduce into a udot/sdot and vecreduce
// vecreduce.add(ext(A)) to vecreduce.add(DOT(zero, A, one))
// vecreduce.add(mul(ext(A), ext(B))) to vecreduce.add(DOT(zero, A, B))
+// If we have vectors larger than v16i8 we extract v16i8 vectors,
+// Follow the same steps above to get DOT instructions concatenate them
+// and generate vecreduce.add(concat_vector(DOT, DOT2, ..)).
static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
const AArch64Subtarget *ST) {
if (!ST->hasDotProd())
@@ -15179,7 +15839,9 @@ static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
EVT Op0VT = A.getOperand(0).getValueType();
- if (Op0VT != MVT::v8i8 && Op0VT != MVT::v16i8)
+ bool IsValidElementCount = Op0VT.getVectorNumElements() % 8 == 0;
+ bool IsValidSize = Op0VT.getScalarSizeInBits() == 8;
+ if (!IsValidElementCount || !IsValidSize)
return SDValue();
SDLoc DL(Op0);
@@ -15190,20 +15852,72 @@ static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
else
B = B.getOperand(0);
- SDValue Zeros =
- DAG.getConstant(0, DL, Op0VT == MVT::v8i8 ? MVT::v2i32 : MVT::v4i32);
+ unsigned IsMultipleOf16 = Op0VT.getVectorNumElements() % 16 == 0;
+ unsigned NumOfVecReduce;
+ EVT TargetType;
+ if (IsMultipleOf16) {
+ NumOfVecReduce = Op0VT.getVectorNumElements() / 16;
+ TargetType = MVT::v4i32;
+ } else {
+ NumOfVecReduce = Op0VT.getVectorNumElements() / 8;
+ TargetType = MVT::v2i32;
+ }
auto DotOpcode =
(ExtOpcode == ISD::ZERO_EXTEND) ? AArch64ISD::UDOT : AArch64ISD::SDOT;
- SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros,
- A.getOperand(0), B);
- return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
+ // Handle the case where we need to generate only one Dot operation.
+ if (NumOfVecReduce == 1) {
+ SDValue Zeros = DAG.getConstant(0, DL, TargetType);
+ SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros,
+ A.getOperand(0), B);
+ return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
+ }
+ // Generate Dot instructions that are multiple of 16.
+ unsigned VecReduce16Num = Op0VT.getVectorNumElements() / 16;
+ SmallVector<SDValue, 4> SDotVec16;
+ unsigned I = 0;
+ for (; I < VecReduce16Num; I += 1) {
+ SDValue Zeros = DAG.getConstant(0, DL, MVT::v4i32);
+ SDValue Op0 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, A.getOperand(0),
+ DAG.getConstant(I * 16, DL, MVT::i64));
+ SDValue Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, B,
+ DAG.getConstant(I * 16, DL, MVT::i64));
+ SDValue Dot =
+ DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros, Op0, Op1);
+ SDotVec16.push_back(Dot);
+ }
+ // Concatenate dot operations.
+ EVT SDot16EVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::i32, 4 * VecReduce16Num);
+ SDValue ConcatSDot16 =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, SDot16EVT, SDotVec16);
+ SDValue VecReduceAdd16 =
+ DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), ConcatSDot16);
+ unsigned VecReduce8Num = (Op0VT.getVectorNumElements() % 16) / 8;
+ if (VecReduce8Num == 0)
+ return VecReduceAdd16;
+
+ // Generate the remainder Dot operation that is multiple of 8.
+ SmallVector<SDValue, 4> SDotVec8;
+ SDValue Zeros = DAG.getConstant(0, DL, MVT::v2i32);
+ SDValue Vec8Op0 =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, A.getOperand(0),
+ DAG.getConstant(I * 16, DL, MVT::i64));
+ SDValue Vec8Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, B,
+ DAG.getConstant(I * 16, DL, MVT::i64));
+ SDValue Dot =
+ DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros, Vec8Op0, Vec8Op1);
+ SDValue VecReudceAdd8 =
+ DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
+ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), VecReduceAdd16,
+ VecReudceAdd8);
}
// Given an (integer) vecreduce, we know the order of the inputs does not
// matter. We can convert UADDV(add(zext(extract_lo(x)), zext(extract_hi(x))))
// into UADDV(UADDLP(x)). This can also happen through an extra add, where we
// transform UADDV(add(y, add(zext(extract_lo(x)), zext(extract_hi(x))))).
-static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performUADDVAddCombine(SDValue A, SelectionDAG &DAG) {
auto DetectAddExtract = [&](SDValue A) {
// Look for add(zext(extract_lo(x)), zext(extract_hi(x))), returning
// UADDLP(x) if found.
@@ -15237,22 +15951,27 @@ static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(Opcode, SDLoc(A), VT, Ext0.getOperand(0));
};
- SDValue A = N->getOperand(0);
if (SDValue R = DetectAddExtract(A))
- return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
- if (A.getOpcode() == ISD::ADD) {
- if (SDValue R = DetectAddExtract(A.getOperand(0)))
- return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
- DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
- A.getOperand(1)));
- if (SDValue R = DetectAddExtract(A.getOperand(1)))
- return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
- DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
- A.getOperand(0)));
- }
+ return R;
+
+ if (A.getOperand(0).getOpcode() == ISD::ADD && A.getOperand(0).hasOneUse())
+ if (SDValue R = performUADDVAddCombine(A.getOperand(0), DAG))
+ return DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
+ A.getOperand(1));
+ if (A.getOperand(1).getOpcode() == ISD::ADD && A.getOperand(1).hasOneUse())
+ if (SDValue R = performUADDVAddCombine(A.getOperand(1), DAG))
+ return DAG.getNode(ISD::ADD, SDLoc(A), A.getValueType(), R,
+ A.getOperand(0));
return SDValue();
}
+static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue A = N->getOperand(0);
+ if (A.getOpcode() == ISD::ADD)
+ if (SDValue R = performUADDVAddCombine(A, DAG))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), R);
+ return SDValue();
+}
static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -15285,7 +16004,7 @@ AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
- unsigned Lg2 = Divisor.countTrailingZeros();
+ unsigned Lg2 = Divisor.countr_zero();
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
@@ -15332,7 +16051,7 @@ AArch64TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor,
!(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
return SDValue();
- unsigned Lg2 = Divisor.countTrailingZeros();
+ unsigned Lg2 = Divisor.countr_zero();
if (Lg2 == 0)
return SDValue();
@@ -15620,7 +16339,7 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
// TrailingZeroes is used to test if the mul can be lowered to
// shift+add+shift.
- unsigned TrailingZeroes = ConstValue.countTrailingZeros();
+ unsigned TrailingZeroes = ConstValue.countr_zero();
if (TrailingZeroes) {
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into smul or umul.
@@ -15814,7 +16533,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
- if (!Subtarget->hasNEON() || Subtarget->forceStreamingCompatibleSVE())
+ if (!Subtarget->isNeonAvailable())
return SDValue();
if (!N->getValueType(0).isSimple())
@@ -16027,8 +16746,7 @@ static SDValue tryCombineToBSL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
// It also doesn't work for streaming mode because it causes generating
// bsl instructions that are invalid in streaming mode.
if (TLI.useSVEForFixedLengthVectorVT(
- VT,
- DAG.getSubtarget<AArch64Subtarget>().forceStreamingCompatibleSVE()))
+ VT, !DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable()))
return SDValue();
SDValue N0 = N->getOperand(0);
@@ -16316,14 +17034,28 @@ static SDValue performSVEAndCombine(SDNode *N,
uint64_t ExtVal = C->getZExtValue();
+ auto MaskAndTypeMatch = [ExtVal](EVT VT) -> bool {
+ return ((ExtVal == 0xFF && VT == MVT::i8) ||
+ (ExtVal == 0xFFFF && VT == MVT::i16) ||
+ (ExtVal == 0xFFFFFFFF && VT == MVT::i32));
+ };
+
// If the mask is fully covered by the unpack, we don't need to push
// a new AND onto the operand
EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
- if ((ExtVal == 0xFF && EltTy == MVT::i8) ||
- (ExtVal == 0xFFFF && EltTy == MVT::i16) ||
- (ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
+ if (MaskAndTypeMatch(EltTy))
return Src;
+ // If this is 'and (uunpklo/hi (extload MemTy -> ExtTy)), mask', then check
+ // to see if the mask is all-ones of size MemTy.
+ auto MaskedLoadOp = dyn_cast<MaskedLoadSDNode>(UnpkOp);
+ if (MaskedLoadOp && (MaskedLoadOp->getExtensionType() == ISD::ZEXTLOAD ||
+ MaskedLoadOp->getExtensionType() == ISD::EXTLOAD)) {
+ EVT EltTy = MaskedLoadOp->getMemoryVT().getVectorElementType();
+ if (MaskAndTypeMatch(EltTy))
+ return Src;
+ }
+
// Truncate to prevent a DUP with an over wide constant
APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
@@ -16424,14 +17156,22 @@ static SDValue performANDCombine(SDNode *N,
if (resolveBuildVector(BVN, DefBits, UndefBits)) {
SDValue NewOp;
- DefBits = ~DefBits;
+ // Any bits known to already be 0 need not be cleared again, which can help
+ // reduce the size of the immediate to one supported by the instruction.
+ KnownBits Known = DAG.computeKnownBits(LHS);
+ APInt ZeroSplat(VT.getSizeInBits(), 0);
+ for (unsigned I = 0; I < VT.getSizeInBits() / Known.Zero.getBitWidth(); I++)
+ ZeroSplat |= Known.Zero.zext(VT.getSizeInBits())
+ << (Known.Zero.getBitWidth() * I);
+
+ DefBits = ~(DefBits | ZeroSplat);
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
DefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
DefBits, &LHS)))
return NewOp;
- UndefBits = ~UndefBits;
+ UndefBits = ~(UndefBits | ZeroSplat);
if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
UndefBits, &LHS)) ||
(NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
@@ -16442,6 +17182,42 @@ static SDValue performANDCombine(SDNode *N,
return SDValue();
}
+static SDValue performFADDCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (!N->getFlags().hasAllowReassociation())
+ return SDValue();
+
+ // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
+ auto ReassocComplex = [&](SDValue A, SDValue B) {
+ if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+ return SDValue();
+ unsigned Opc = A.getConstantOperandVal(0);
+ if (Opc != Intrinsic::aarch64_neon_vcmla_rot0 &&
+ Opc != Intrinsic::aarch64_neon_vcmla_rot90 &&
+ Opc != Intrinsic::aarch64_neon_vcmla_rot180 &&
+ Opc != Intrinsic::aarch64_neon_vcmla_rot270)
+ return SDValue();
+ SDValue VCMLA = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0),
+ DAG.getNode(ISD::FADD, DL, VT, A.getOperand(1), B, N->getFlags()),
+ A.getOperand(2), A.getOperand(3));
+ VCMLA->setFlags(A->getFlags());
+ return VCMLA;
+ };
+ if (SDValue R = ReassocComplex(LHS, RHS))
+ return R;
+ if (SDValue R = ReassocComplex(RHS, LHS))
+ return R;
+
+ return SDValue();
+}
+
static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
switch (Opcode) {
case ISD::STRICT_FADD:
@@ -16552,7 +17328,6 @@ performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
- ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
@@ -16560,7 +17335,8 @@ performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
// extract(dup x) -> x
if (N0.getOpcode() == AArch64ISD::DUP)
- return DAG.getZExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
+ return VT.isInteger() ? DAG.getZExtOrTrunc(N0.getOperand(0), SDLoc(N), VT)
+ : N0.getOperand(0);
// Rewrite for pairwise fadd pattern
// (f32 (extract_vector_elt
@@ -16571,8 +17347,7 @@ performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
// (extract_vector_elt (vXf32 Other) 1))
// For strict_fadd we need to make sure the old strict_fadd can be deleted, so
// we can only do this when it's used only by the extract_vector_elt.
- if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
- hasPairwiseAdd(N0->getOpcode(), VT, FullFP16) &&
+ if (isNullConstant(N1) && hasPairwiseAdd(N0->getOpcode(), VT, FullFP16) &&
(!IsStrict || N0.hasOneUse())) {
SDLoc DL(N0);
SDValue N00 = N0->getOperand(IsStrict ? 1 : 0);
@@ -16772,6 +17547,62 @@ static SDValue performConcatVectorsCombine(SDNode *N,
}
}
+ auto IsRSHRN = [](SDValue Shr) {
+ if (Shr.getOpcode() != AArch64ISD::VLSHR)
+ return false;
+ SDValue Op = Shr.getOperand(0);
+ EVT VT = Op.getValueType();
+ unsigned ShtAmt = Shr.getConstantOperandVal(1);
+ if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
+ return false;
+
+ APInt Imm;
+ if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
+ Imm = APInt(VT.getScalarSizeInBits(),
+ Op.getOperand(1).getConstantOperandVal(0)
+ << Op.getOperand(1).getConstantOperandVal(1));
+ else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
+ isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
+ Imm = APInt(VT.getScalarSizeInBits(),
+ Op.getOperand(1).getConstantOperandVal(0));
+ else
+ return false;
+
+ if (Imm != 1ULL << (ShtAmt - 1))
+ return false;
+ return true;
+ };
+
+ // concat(rshrn(x), rshrn(y)) -> rshrn(concat(x, y))
+ if (N->getNumOperands() == 2 && IsRSHRN(N0) &&
+ ((IsRSHRN(N1) &&
+ N0.getConstantOperandVal(1) == N1.getConstantOperandVal(1)) ||
+ N1.isUndef())) {
+ SDValue X = N0.getOperand(0).getOperand(0);
+ SDValue Y = N1.isUndef() ? DAG.getUNDEF(X.getValueType())
+ : N1.getOperand(0).getOperand(0);
+ EVT BVT =
+ X.getValueType().getDoubleNumVectorElementsVT(*DCI.DAG.getContext());
+ SDValue CC = DAG.getNode(ISD::CONCAT_VECTORS, dl, BVT, X, Y);
+ SDValue Add = DAG.getNode(
+ ISD::ADD, dl, BVT, CC,
+ DAG.getConstant(1ULL << (N0.getConstantOperandVal(1) - 1), dl, BVT));
+ SDValue Shr =
+ DAG.getNode(AArch64ISD::VLSHR, dl, BVT, Add, N0.getOperand(1));
+ return Shr;
+ }
+
+ // concat(zip1(a, b), zip2(a, b)) is zip1(a, b)
+ if (N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 &&
+ N1Opc == AArch64ISD::ZIP2 && N0.getOperand(0) == N1.getOperand(0) &&
+ N0.getOperand(1) == N1.getOperand(1)) {
+ SDValue E0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, N0.getOperand(0),
+ DAG.getUNDEF(N0.getValueType()));
+ SDValue E1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, N0.getOperand(1),
+ DAG.getUNDEF(N0.getValueType()));
+ return DAG.getNode(AArch64ISD::ZIP1, dl, VT, E0, E1);
+ }
+
// If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
// splat. The indexed instructions are going to be expecting a DUPLANE64, so
// canonicalise to that.
@@ -17314,8 +18145,8 @@ static SDValue performNegCSelCombine(SDNode *N, SelectionDAG &DAG) {
// instruction can still be used profitably. This function puts the DAG into a
// more appropriate form for those patterns to trigger.
static SDValue performAddSubLongCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
if (DCI.isBeforeLegalizeOps())
return SDValue();
@@ -17521,8 +18352,7 @@ static SDValue performTruncateCombine(SDNode *N,
// Check an node is an extend or shift operand
static bool isExtendOrShiftOperand(SDValue N) {
unsigned Opcode = N.getOpcode();
- if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_INREG ||
- Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ANY_EXTEND) {
+ if (ISD::isExtOpcode(Opcode) || Opcode == ISD::SIGN_EXTEND_INREG) {
EVT SrcVT;
if (Opcode == ISD::SIGN_EXTEND_INREG)
SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
@@ -17612,24 +18442,396 @@ static SDValue performAddCombineForShiftedOperands(SDNode *N,
return SDValue();
}
+// The mid end will reassociate sub(sub(x, m1), m2) to sub(x, add(m1, m2))
+// This reassociates it back to allow the creation of more mls instructions.
+static SDValue performSubAddMULCombine(SDNode *N, SelectionDAG &DAG) {
+ if (N->getOpcode() != ISD::SUB)
+ return SDValue();
+
+ SDValue Add = N->getOperand(1);
+ SDValue X = N->getOperand(0);
+ if (Add.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ if (!Add.hasOneUse())
+ return SDValue();
+ if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(X)))
+ return SDValue();
+
+ SDValue M1 = Add.getOperand(0);
+ SDValue M2 = Add.getOperand(1);
+ if (M1.getOpcode() != ISD::MUL && M1.getOpcode() != AArch64ISD::SMULL &&
+ M1.getOpcode() != AArch64ISD::UMULL)
+ return SDValue();
+ if (M2.getOpcode() != ISD::MUL && M2.getOpcode() != AArch64ISD::SMULL &&
+ M2.getOpcode() != AArch64ISD::UMULL)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, X, M1);
+ return DAG.getNode(ISD::SUB, SDLoc(N), VT, Sub, M2);
+}
+
+// Combine into mla/mls.
+// This works on the patterns of:
+// add v1, (mul v2, v3)
+// sub v1, (mul v2, v3)
+// for vectors of type <1 x i64> and <2 x i64> when SVE is available.
+// It will transform the add/sub to a scalable version, so that we can
+// make use of SVE's MLA/MLS that will be generated for that pattern
+static SDValue
+performSVEMulAddSubCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ // Make sure that the types are legal
+ if (!DCI.isAfterLegalizeDAG())
+ return SDValue();
+ // Before using SVE's features, check first if it's available.
+ if (!DAG.getSubtarget<AArch64Subtarget>().hasSVE())
+ return SDValue();
+
+ if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::SUB)
+ return SDValue();
+
+ if (!N->getValueType(0).isFixedLengthVector())
+ return SDValue();
+
+ auto performOpt = [&DAG, &N](SDValue Op0, SDValue Op1) -> SDValue {
+ if (Op1.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return SDValue();
+
+ if (!cast<ConstantSDNode>(Op1->getOperand(1))->isZero())
+ return SDValue();
+
+ SDValue MulValue = Op1->getOperand(0);
+ if (MulValue.getOpcode() != AArch64ISD::MUL_PRED)
+ return SDValue();
+
+ if (!Op1.hasOneUse() || !MulValue.hasOneUse())
+ return SDValue();
+
+ EVT ScalableVT = MulValue.getValueType();
+ if (!ScalableVT.isScalableVector())
+ return SDValue();
+
+ SDValue ScaledOp = convertToScalableVector(DAG, ScalableVT, Op0);
+ SDValue NewValue =
+ DAG.getNode(N->getOpcode(), SDLoc(N), ScalableVT, {ScaledOp, MulValue});
+ return convertFromScalableVector(DAG, N->getValueType(0), NewValue);
+ };
+
+ if (SDValue res = performOpt(N->getOperand(0), N->getOperand(1)))
+ return res;
+ else if (N->getOpcode() == ISD::ADD)
+ return performOpt(N->getOperand(1), N->getOperand(0));
+
+ return SDValue();
+}
+
+// Given a i64 add from a v1i64 extract, convert to a neon v1i64 add. This can
+// help, for example, to produce ssra from sshr+add.
+static SDValue performAddSubIntoVectorOp(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i64)
+ return SDValue();
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // At least one of the operands should be an extract, and the other should be
+ // something that is easy to convert to v1i64 type (in this case a load).
+ if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOpcode() != ISD::LOAD)
+ return SDValue();
+ if (Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+ Op1.getOpcode() != ISD::LOAD)
+ return SDValue();
+
+ SDLoc DL(N);
+ if (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0).getValueType() == MVT::v1i64) {
+ Op0 = Op0.getOperand(0);
+ Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i64, Op1);
+ } else if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op1.getOperand(0).getValueType() == MVT::v1i64) {
+ Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i64, Op0);
+ Op1 = Op1.getOperand(0);
+ } else
+ return SDValue();
+
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64,
+ DAG.getNode(N->getOpcode(), DL, MVT::v1i64, Op0, Op1),
+ DAG.getConstant(0, DL, MVT::i64));
+}
+
+static bool isLoadOrMultipleLoads(SDValue B, SmallVector<LoadSDNode *> &Loads) {
+ SDValue BV = peekThroughOneUseBitcasts(B);
+ if (!BV->hasOneUse())
+ return false;
+ if (auto *Ld = dyn_cast<LoadSDNode>(BV)) {
+ if (!Ld || !Ld->isSimple())
+ return false;
+ Loads.push_back(Ld);
+ return true;
+ } else if (BV.getOpcode() == ISD::BUILD_VECTOR ||
+ BV.getOpcode() == ISD::CONCAT_VECTORS) {
+ for (unsigned Op = 0; Op < BV.getNumOperands(); Op++) {
+ auto *Ld = dyn_cast<LoadSDNode>(BV.getOperand(Op));
+ if (!Ld || !Ld->isSimple() || !BV.getOperand(Op).hasOneUse())
+ return false;
+ Loads.push_back(Ld);
+ }
+ return true;
+ } else if (B.getOpcode() == ISD::VECTOR_SHUFFLE) {
+ // Try to find a tree of shuffles and concats from how IR shuffles of loads
+ // are lowered. Note that this only comes up because we do not always visit
+ // operands before uses. After that is fixed this can be removed and in the
+ // meantime this is fairly specific to the lowering we expect from IR.
+ // t46: v16i8 = vector_shuffle<0,1,2,3,4,5,6,7,8,9,10,11,16,17,18,19> t44, t45
+ // t44: v16i8 = vector_shuffle<0,1,2,3,4,5,6,7,16,17,18,19,u,u,u,u> t42, t43
+ // t42: v16i8 = concat_vectors t40, t36, undef:v4i8, undef:v4i8
+ // t40: v4i8,ch = load<(load (s32) from %ir.17)> t0, t22, undef:i64
+ // t36: v4i8,ch = load<(load (s32) from %ir.13)> t0, t18, undef:i64
+ // t43: v16i8 = concat_vectors t32, undef:v4i8, undef:v4i8, undef:v4i8
+ // t32: v4i8,ch = load<(load (s32) from %ir.9)> t0, t14, undef:i64
+ // t45: v16i8 = concat_vectors t28, undef:v4i8, undef:v4i8, undef:v4i8
+ // t28: v4i8,ch = load<(load (s32) from %ir.0)> t0, t2, undef:i64
+ if (B.getOperand(0).getOpcode() != ISD::VECTOR_SHUFFLE ||
+ B.getOperand(0).getOperand(0).getOpcode() != ISD::CONCAT_VECTORS ||
+ B.getOperand(0).getOperand(1).getOpcode() != ISD::CONCAT_VECTORS ||
+ B.getOperand(1).getOpcode() != ISD::CONCAT_VECTORS ||
+ B.getOperand(1).getNumOperands() != 4)
+ return false;
+ auto SV1 = cast<ShuffleVectorSDNode>(B);
+ auto SV2 = cast<ShuffleVectorSDNode>(B.getOperand(0));
+ int NumElts = B.getValueType().getVectorNumElements();
+ int NumSubElts = NumElts / 4;
+ for (int I = 0; I < NumSubElts; I++) {
+ // <0,1,2,3,4,5,6,7,8,9,10,11,16,17,18,19>
+ if (SV1->getMaskElt(I) != I ||
+ SV1->getMaskElt(I + NumSubElts) != I + NumSubElts ||
+ SV1->getMaskElt(I + NumSubElts * 2) != I + NumSubElts * 2 ||
+ SV1->getMaskElt(I + NumSubElts * 3) != I + NumElts)
+ return false;
+ // <0,1,2,3,4,5,6,7,16,17,18,19,u,u,u,u>
+ if (SV2->getMaskElt(I) != I ||
+ SV2->getMaskElt(I + NumSubElts) != I + NumSubElts ||
+ SV2->getMaskElt(I + NumSubElts * 2) != I + NumElts)
+ return false;
+ }
+ auto *Ld0 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(0));
+ auto *Ld1 = dyn_cast<LoadSDNode>(SV2->getOperand(0).getOperand(1));
+ auto *Ld2 = dyn_cast<LoadSDNode>(SV2->getOperand(1).getOperand(0));
+ auto *Ld3 = dyn_cast<LoadSDNode>(B.getOperand(1).getOperand(0));
+ if (!Ld0 || !Ld1 || !Ld2 || !Ld3 || !Ld0->isSimple() || !Ld1->isSimple() ||
+ !Ld2->isSimple() || !Ld3->isSimple())
+ return false;
+ Loads.push_back(Ld0);
+ Loads.push_back(Ld1);
+ Loads.push_back(Ld2);
+ Loads.push_back(Ld3);
+ return true;
+ }
+ return false;
+}
+
+static bool areLoadedOffsetButOtherwiseSame(SDValue Op0, SDValue Op1,
+ SelectionDAG &DAG,
+ unsigned &NumSubLoads) {
+ if (!Op0.hasOneUse() || !Op1.hasOneUse())
+ return false;
+
+ SmallVector<LoadSDNode *> Loads0, Loads1;
+ if (isLoadOrMultipleLoads(Op0, Loads0) &&
+ isLoadOrMultipleLoads(Op1, Loads1)) {
+ if (NumSubLoads && Loads0.size() != NumSubLoads)
+ return false;
+ NumSubLoads = Loads0.size();
+ return Loads0.size() == Loads1.size() &&
+ all_of(zip(Loads0, Loads1), [&DAG](auto L) {
+ unsigned Size = get<0>(L)->getValueType(0).getSizeInBits();
+ return Size == get<1>(L)->getValueType(0).getSizeInBits() &&
+ DAG.areNonVolatileConsecutiveLoads(get<1>(L), get<0>(L),
+ Size / 8, 1);
+ });
+ }
+
+ if (Op0.getOpcode() != Op1.getOpcode())
+ return false;
+
+ switch (Op0.getOpcode()) {
+ case ISD::ADD:
+ case ISD::SUB:
+ return areLoadedOffsetButOtherwiseSame(Op0.getOperand(0), Op1.getOperand(0),
+ DAG, NumSubLoads) &&
+ areLoadedOffsetButOtherwiseSame(Op0.getOperand(1), Op1.getOperand(1),
+ DAG, NumSubLoads);
+ case ISD::SIGN_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::ZERO_EXTEND:
+ EVT XVT = Op0.getOperand(0).getValueType();
+ if (XVT.getScalarSizeInBits() != 8 && XVT.getScalarSizeInBits() != 16 &&
+ XVT.getScalarSizeInBits() != 32)
+ return false;
+ return areLoadedOffsetButOtherwiseSame(Op0.getOperand(0), Op1.getOperand(0),
+ DAG, NumSubLoads);
+ }
+ return false;
+}
+
+// This method attempts to fold trees of add(ext(load p), shl(ext(load p+4))
+// into a single load of twice the size, that we extract the bottom part and top
+// part so that the shl can use a shll2 instruction. The two loads in that
+// example can also be larger trees of instructions, which are identical except
+// for the leaves which are all loads offset from the LHS, including
+// buildvectors of multiple loads. For example the RHS tree could be
+// sub(zext(buildvec(load p+4, load q+4)), zext(buildvec(load r+4, load s+4)))
+// Whilst it can be common for the larger loads to replace LDP instructions
+// (which doesn't gain anything on it's own), the larger loads can help create
+// more efficient code, and in buildvectors prevent the need for ld1 lane
+// inserts which can be slower than normal loads.
+static SDValue performExtBinopLoadFold(SDNode *N, SelectionDAG &DAG) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isFixedLengthVector() ||
+ (VT.getScalarSizeInBits() != 16 && VT.getScalarSizeInBits() != 32 &&
+ VT.getScalarSizeInBits() != 64))
+ return SDValue();
+
+ SDValue Other = N->getOperand(0);
+ SDValue Shift = N->getOperand(1);
+ if (Shift.getOpcode() != ISD::SHL && N->getOpcode() != ISD::SUB)
+ std::swap(Shift, Other);
+ APInt ShiftAmt;
+ if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse() ||
+ !ISD::isConstantSplatVector(Shift.getOperand(1).getNode(), ShiftAmt))
+ return SDValue();
+
+ if (!ISD::isExtOpcode(Shift.getOperand(0).getOpcode()) ||
+ !ISD::isExtOpcode(Other.getOpcode()) ||
+ Shift.getOperand(0).getOperand(0).getValueType() !=
+ Other.getOperand(0).getValueType() ||
+ !Other.hasOneUse() || !Shift.getOperand(0).hasOneUse())
+ return SDValue();
+
+ SDValue Op0 = Other.getOperand(0);
+ SDValue Op1 = Shift.getOperand(0).getOperand(0);
+
+ unsigned NumSubLoads = 0;
+ if (!areLoadedOffsetButOtherwiseSame(Op0, Op1, DAG, NumSubLoads))
+ return SDValue();
+
+ // Attempt to rule out some unprofitable cases using heuristics (some working
+ // around suboptimal code generation), notably if the extend not be able to
+ // use ushll2 instructions as the types are not large enough. Otherwise zip's
+ // will need to be created which can increase the instruction count.
+ unsigned NumElts = Op0.getValueType().getVectorNumElements();
+ unsigned NumSubElts = NumElts / NumSubLoads;
+ if (NumSubElts * VT.getScalarSizeInBits() < 128 ||
+ (Other.getOpcode() != Shift.getOperand(0).getOpcode() &&
+ Op0.getValueType().getSizeInBits() < 128 &&
+ !DAG.getTargetLoweringInfo().isTypeLegal(Op0.getValueType())))
+ return SDValue();
+
+ // Recreate the tree with the new combined loads.
+ std::function<SDValue(SDValue, SDValue, SelectionDAG &)> GenCombinedTree =
+ [&GenCombinedTree](SDValue Op0, SDValue Op1, SelectionDAG &DAG) {
+ EVT DVT =
+ Op0.getValueType().getDoubleNumVectorElementsVT(*DAG.getContext());
+
+ SmallVector<LoadSDNode *> Loads0, Loads1;
+ if (isLoadOrMultipleLoads(Op0, Loads0) &&
+ isLoadOrMultipleLoads(Op1, Loads1)) {
+ EVT LoadVT = EVT::getVectorVT(
+ *DAG.getContext(), Op0.getValueType().getScalarType(),
+ Op0.getValueType().getVectorNumElements() / Loads0.size());
+ EVT DLoadVT = LoadVT.getDoubleNumVectorElementsVT(*DAG.getContext());
+
+ SmallVector<SDValue> NewLoads;
+ for (const auto &[L0, L1] : zip(Loads0, Loads1)) {
+ SDValue Load = DAG.getLoad(DLoadVT, SDLoc(L0), L0->getChain(),
+ L0->getBasePtr(), L0->getPointerInfo(),
+ L0->getOriginalAlign());
+ DAG.makeEquivalentMemoryOrdering(L0, Load.getValue(1));
+ DAG.makeEquivalentMemoryOrdering(L1, Load.getValue(1));
+ NewLoads.push_back(Load);
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op0), DVT, NewLoads);
+ }
+
+ SmallVector<SDValue> Ops;
+ for (const auto &[O0, O1] : zip(Op0->op_values(), Op1->op_values()))
+ Ops.push_back(GenCombinedTree(O0, O1, DAG));
+ return DAG.getNode(Op0.getOpcode(), SDLoc(Op0), DVT, Ops);
+ };
+ SDValue NewOp = GenCombinedTree(Op0, Op1, DAG);
+
+ SmallVector<int> LowMask(NumElts, 0), HighMask(NumElts, 0);
+ int Hi = NumSubElts, Lo = 0;
+ for (unsigned i = 0; i < NumSubLoads; i++) {
+ for (unsigned j = 0; j < NumSubElts; j++) {
+ LowMask[i * NumSubElts + j] = Lo++;
+ HighMask[i * NumSubElts + j] = Hi++;
+ }
+ Lo += NumSubElts;
+ Hi += NumSubElts;
+ }
+ SDLoc DL(N);
+ SDValue Ext0, Ext1;
+ // Extract the top and bottom lanes, then extend the result. Possibly extend
+ // the result then extract the lanes if the two operands match as it produces
+ // slightly smaller code.
+ if (Other.getOpcode() != Shift.getOperand(0).getOpcode()) {
+ SDValue SubL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, Op0.getValueType(),
+ NewOp, DAG.getConstant(0, DL, MVT::i64));
+ SDValue SubH =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, Op0.getValueType(), NewOp,
+ DAG.getConstant(NumSubElts * NumSubLoads, DL, MVT::i64));
+ SDValue Extr0 =
+ DAG.getVectorShuffle(Op0.getValueType(), DL, SubL, SubH, LowMask);
+ SDValue Extr1 =
+ DAG.getVectorShuffle(Op0.getValueType(), DL, SubL, SubH, HighMask);
+ Ext0 = DAG.getNode(Other.getOpcode(), DL, VT, Extr0);
+ Ext1 = DAG.getNode(Shift.getOperand(0).getOpcode(), DL, VT, Extr1);
+ } else {
+ EVT DVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
+ SDValue Ext = DAG.getNode(Other.getOpcode(), DL, DVT, NewOp);
+ SDValue SubL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Ext,
+ DAG.getConstant(0, DL, MVT::i64));
+ SDValue SubH =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Ext,
+ DAG.getConstant(NumSubElts * NumSubLoads, DL, MVT::i64));
+ Ext0 = DAG.getVectorShuffle(VT, DL, SubL, SubH, LowMask);
+ Ext1 = DAG.getVectorShuffle(VT, DL, SubL, SubH, HighMask);
+ }
+ SDValue NShift =
+ DAG.getNode(Shift.getOpcode(), DL, VT, Ext1, Shift.getOperand(1));
+ return DAG.getNode(N->getOpcode(), DL, VT, Ext0, NShift);
+}
+
static SDValue performAddSubCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- SelectionDAG &DAG) {
+ TargetLowering::DAGCombinerInfo &DCI) {
// Try to change sum of two reductions.
- if (SDValue Val = performAddUADDVCombine(N, DAG))
+ if (SDValue Val = performAddUADDVCombine(N, DCI.DAG))
+ return Val;
+ if (SDValue Val = performAddDotCombine(N, DCI.DAG))
return Val;
- if (SDValue Val = performAddDotCombine(N, DAG))
+ if (SDValue Val = performAddCSelIntoCSinc(N, DCI.DAG))
return Val;
- if (SDValue Val = performAddCSelIntoCSinc(N, DAG))
+ if (SDValue Val = performNegCSelCombine(N, DCI.DAG))
return Val;
- if (SDValue Val = performNegCSelCombine(N, DAG))
+ if (SDValue Val = performVectorAddSubExtCombine(N, DCI.DAG))
return Val;
- if (SDValue Val = performVectorAddSubExtCombine(N, DAG))
+ if (SDValue Val = performAddCombineForShiftedOperands(N, DCI.DAG))
return Val;
- if (SDValue Val = performAddCombineForShiftedOperands(N, DAG))
+ if (SDValue Val = performSubAddMULCombine(N, DCI.DAG))
+ return Val;
+ if (SDValue Val = performSVEMulAddSubCombine(N, DCI))
+ return Val;
+ if (SDValue Val = performAddSubIntoVectorOp(N, DCI.DAG))
+ return Val;
+
+ if (SDValue Val = performExtBinopLoadFold(N, DCI.DAG))
return Val;
- return performAddSubLongCombine(N, DCI, DAG);
+ return performAddSubLongCombine(N, DCI);
}
// Massage DAGs which we can use the high-half "long" operations on into
@@ -17662,7 +18864,8 @@ static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
LHS = tryExtendDUPToExtractHigh(LHS, DAG);
if (!LHS.getNode())
return SDValue();
- }
+ } else
+ return SDValue();
if (IID == Intrinsic::not_intrinsic)
return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
@@ -17691,6 +18894,10 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
} else
return SDValue();
+ // If the shift amount is zero, remove the shift intrinsic.
+ if (ShiftAmount == 0 && IID != Intrinsic::aarch64_neon_sqshlu)
+ return N->getOperand(1);
+
unsigned Opcode;
bool IsRightShift;
switch (IID) {
@@ -17726,14 +18933,28 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
break;
}
+ EVT VT = N->getValueType(0);
+ SDValue Op = N->getOperand(1);
+ SDLoc dl(N);
+ if (VT == MVT::i64) {
+ Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op);
+ VT = MVT::v1i64;
+ }
+
if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
- SDLoc dl(N);
- return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
- DAG.getConstant(-ShiftAmount, dl, MVT::i32));
+ Op = DAG.getNode(Opcode, dl, VT, Op,
+ DAG.getConstant(-ShiftAmount, dl, MVT::i32));
+ if (N->getValueType(0) == MVT::i64)
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
+ DAG.getConstant(0, dl, MVT::i64));
+ return Op;
} else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
- SDLoc dl(N);
- return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
- DAG.getConstant(ShiftAmount, dl, MVT::i32));
+ Op = DAG.getNode(Opcode, dl, VT, Op,
+ DAG.getConstant(ShiftAmount, dl, MVT::i32));
+ if (N->getValueType(0) == MVT::i64)
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
+ DAG.getConstant(0, dl, MVT::i64));
+ return Op;
}
return SDValue();
@@ -18104,6 +19325,12 @@ static SDValue performIntrinsicCombine(SDNode *N,
DAG.getConstant(N->getConstantOperandVal(2), DL, VT));
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Sht);
}
+ case Intrinsic::aarch64_neon_sabd:
+ return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
+ case Intrinsic::aarch64_neon_uabd:
+ return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2));
case Intrinsic::aarch64_crc32b:
case Intrinsic::aarch64_crc32cb:
return tryCombineCRC32(0xff, N, DAG);
@@ -18141,89 +19368,87 @@ static SDValue performIntrinsicCombine(SDNode *N,
N->getOperand(1));
case Intrinsic::aarch64_sve_ext:
return LowerSVEIntrinsicEXT(N, DAG);
- case Intrinsic::aarch64_sve_mul:
- return convertMergedOpToPredOp(N, AArch64ISD::MUL_PRED, DAG);
case Intrinsic::aarch64_sve_mul_u:
return DAG.getNode(AArch64ISD::MUL_PRED, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
- case Intrinsic::aarch64_sve_smulh:
- return convertMergedOpToPredOp(N, AArch64ISD::MULHS_PRED, DAG);
case Intrinsic::aarch64_sve_smulh_u:
return DAG.getNode(AArch64ISD::MULHS_PRED, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
- case Intrinsic::aarch64_sve_umulh:
- return convertMergedOpToPredOp(N, AArch64ISD::MULHU_PRED, DAG);
case Intrinsic::aarch64_sve_umulh_u:
return DAG.getNode(AArch64ISD::MULHU_PRED, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
- case Intrinsic::aarch64_sve_smin:
- return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
case Intrinsic::aarch64_sve_smin_u:
return DAG.getNode(AArch64ISD::SMIN_PRED, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
- case Intrinsic::aarch64_sve_umin:
- return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
case Intrinsic::aarch64_sve_umin_u:
return DAG.getNode(AArch64ISD::UMIN_PRED, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
- case Intrinsic::aarch64_sve_smax:
- return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
case Intrinsic::aarch64_sve_smax_u:
return DAG.getNode(AArch64ISD::SMAX_PRED, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
- case Intrinsic::aarch64_sve_umax:
- return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
case Intrinsic::aarch64_sve_umax_u:
return DAG.getNode(AArch64ISD::UMAX_PRED, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
- case Intrinsic::aarch64_sve_lsl:
- return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
case Intrinsic::aarch64_sve_lsl_u:
return DAG.getNode(AArch64ISD::SHL_PRED, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
- case Intrinsic::aarch64_sve_lsr:
- return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
case Intrinsic::aarch64_sve_lsr_u:
return DAG.getNode(AArch64ISD::SRL_PRED, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
- case Intrinsic::aarch64_sve_asr:
- return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
case Intrinsic::aarch64_sve_asr_u:
return DAG.getNode(AArch64ISD::SRA_PRED, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2), N->getOperand(3));
- case Intrinsic::aarch64_sve_fadd:
- return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);
- case Intrinsic::aarch64_sve_fsub:
- return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);
- case Intrinsic::aarch64_sve_fmul:
- return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG);
- case Intrinsic::aarch64_sve_add:
- return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
+ case Intrinsic::aarch64_sve_fadd_u:
+ return DAG.getNode(AArch64ISD::FADD_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_fdiv_u:
+ return DAG.getNode(AArch64ISD::FDIV_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_fmax_u:
+ return DAG.getNode(AArch64ISD::FMAX_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_fmaxnm_u:
+ return DAG.getNode(AArch64ISD::FMAXNM_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_fmla_u:
+ return DAG.getNode(AArch64ISD::FMA_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(3), N->getOperand(4),
+ N->getOperand(2));
+ case Intrinsic::aarch64_sve_fmin_u:
+ return DAG.getNode(AArch64ISD::FMIN_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_fminnm_u:
+ return DAG.getNode(AArch64ISD::FMINNM_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_fmul_u:
+ return DAG.getNode(AArch64ISD::FMUL_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_fsub_u:
+ return DAG.getNode(AArch64ISD::FSUB_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_add_u:
return DAG.getNode(ISD::ADD, SDLoc(N), N->getValueType(0), N->getOperand(2),
N->getOperand(3));
- case Intrinsic::aarch64_sve_sub:
- return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
case Intrinsic::aarch64_sve_sub_u:
return DAG.getNode(ISD::SUB, SDLoc(N), N->getValueType(0), N->getOperand(2),
N->getOperand(3));
case Intrinsic::aarch64_sve_subr:
return convertMergedOpToPredOp(N, ISD::SUB, DAG, true, true);
- case Intrinsic::aarch64_sve_and:
- return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
- case Intrinsic::aarch64_sve_bic:
- return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
- case Intrinsic::aarch64_sve_eor:
- return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
- case Intrinsic::aarch64_sve_orr:
- return convertMergedOpToPredOp(N, ISD::OR, DAG, true);
- case Intrinsic::aarch64_sve_sabd:
- return convertMergedOpToPredOp(N, ISD::ABDS, DAG, true);
+ case Intrinsic::aarch64_sve_and_u:
+ return DAG.getNode(ISD::AND, SDLoc(N), N->getValueType(0), N->getOperand(2),
+ N->getOperand(3));
+ case Intrinsic::aarch64_sve_bic_u:
+ return DAG.getNode(AArch64ISD::BIC, SDLoc(N), N->getValueType(0),
+ N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_eor_u:
+ return DAG.getNode(ISD::XOR, SDLoc(N), N->getValueType(0), N->getOperand(2),
+ N->getOperand(3));
+ case Intrinsic::aarch64_sve_orr_u:
+ return DAG.getNode(ISD::OR, SDLoc(N), N->getValueType(0), N->getOperand(2),
+ N->getOperand(3));
case Intrinsic::aarch64_sve_sabd_u:
return DAG.getNode(ISD::ABDS, SDLoc(N), N->getValueType(0),
N->getOperand(2), N->getOperand(3));
- case Intrinsic::aarch64_sve_uabd:
- return convertMergedOpToPredOp(N, ISD::ABDU, DAG, true);
case Intrinsic::aarch64_sve_uabd_u:
return DAG.getNode(ISD::ABDU, SDLoc(N), N->getValueType(0),
N->getOperand(2), N->getOperand(3));
@@ -18235,12 +19460,14 @@ static SDValue performIntrinsicCombine(SDNode *N,
N->getOperand(1), N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_sqadd:
return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true);
- case Intrinsic::aarch64_sve_sqsub:
- return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true);
+ case Intrinsic::aarch64_sve_sqsub_u:
+ return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
+ N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_uqadd:
return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true);
- case Intrinsic::aarch64_sve_uqsub:
- return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true);
+ case Intrinsic::aarch64_sve_uqsub_u:
+ return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
+ N->getOperand(2), N->getOperand(3));
case Intrinsic::aarch64_sve_sqadd_x:
return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
N->getOperand(1), N->getOperand(2));
@@ -19061,6 +20288,13 @@ static SDValue performVectorShiftCombine(SDNode *N,
unsigned ShiftImm = N->getConstantOperandVal(1);
assert(OpScalarSize > ShiftImm && "Invalid shift imm");
+ // Remove sign_extend_inreg (ashr(shl(x)) based on the number of sign bits.
+ if (N->getOpcode() == AArch64ISD::VASHR &&
+ Op.getOpcode() == AArch64ISD::VSHL &&
+ N->getOperand(1) == Op.getOperand(1))
+ if (DCI.DAG.ComputeNumSignBits(Op.getOperand(0)) > ShiftImm)
+ return Op.getOperand(0);
+
APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm);
APInt DemandedMask = ~ShiftedOutBits;
@@ -19132,6 +20366,15 @@ static SDValue performPostLD1Combine(SDNode *N,
return SDValue();
}
+ // If there is one use and it can splat the value, prefer that operation.
+ // TODO: This could be expanded to more operations if they reliably use the
+ // index variants.
+ if (N->hasOneUse()) {
+ unsigned UseOpc = N->use_begin()->getOpcode();
+ if (UseOpc == ISD::FMUL || UseOpc == ISD::FMA)
+ return SDValue();
+ }
+
SDValue Addr = LD->getOperand(1);
SDValue Vector = N->getOperand(0);
// Search for a use of the address operand that is an increment.
@@ -19316,6 +20559,151 @@ static SDValue performLOADCombine(SDNode *N,
return DAG.getMergeValues({ExtractSubVector, TokenFactor}, DL);
}
+static EVT tryGetOriginalBoolVectorType(SDValue Op, int Depth = 0) {
+ EVT VecVT = Op.getValueType();
+ assert(VecVT.isVector() && VecVT.getVectorElementType() == MVT::i1 &&
+ "Need boolean vector type.");
+
+ if (Depth > 3)
+ return MVT::INVALID_SIMPLE_VALUE_TYPE;
+
+ // We can get the base type from a vector compare or truncate.
+ if (Op.getOpcode() == ISD::SETCC || Op.getOpcode() == ISD::TRUNCATE)
+ return Op.getOperand(0).getValueType();
+
+ // If an operand is a bool vector, continue looking.
+ EVT BaseVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
+ for (SDValue Operand : Op->op_values()) {
+ if (Operand.getValueType() != VecVT)
+ continue;
+
+ EVT OperandVT = tryGetOriginalBoolVectorType(Operand, Depth + 1);
+ if (!BaseVT.isSimple())
+ BaseVT = OperandVT;
+ else if (OperandVT != BaseVT)
+ return MVT::INVALID_SIMPLE_VALUE_TYPE;
+ }
+
+ return BaseVT;
+}
+
+// When converting a <N x iX> vector to <N x i1> to store or use as a scalar
+// iN, we can use a trick that extracts the i^th bit from the i^th element and
+// then performs a vector add to get a scalar bitmask. This requires that each
+// element's bits are either all 1 or all 0.
+static SDValue vectorToScalarBitmask(SDNode *N, SelectionDAG &DAG) {
+ SDLoc DL(N);
+ SDValue ComparisonResult(N, 0);
+ EVT VecVT = ComparisonResult.getValueType();
+ assert(VecVT.isVector() && "Must be a vector type");
+
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return SDValue();
+
+ if (VecVT.getVectorElementType() != MVT::i1 &&
+ !DAG.getTargetLoweringInfo().isTypeLegal(VecVT))
+ return SDValue();
+
+ // If we can find the original types to work on instead of a vector of i1,
+ // we can avoid extend/extract conversion instructions.
+ if (VecVT.getVectorElementType() == MVT::i1) {
+ VecVT = tryGetOriginalBoolVectorType(ComparisonResult);
+ if (!VecVT.isSimple()) {
+ unsigned BitsPerElement = std::max(64 / NumElts, 8u); // >= 64-bit vector
+ VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), NumElts);
+ }
+ }
+ VecVT = VecVT.changeVectorElementTypeToInteger();
+
+ // Large vectors don't map directly to this conversion, so to avoid too many
+ // edge cases, we don't apply it here. The conversion will likely still be
+ // applied later via multiple smaller vectors, whose results are concatenated.
+ if (VecVT.getSizeInBits() > 128)
+ return SDValue();
+
+ // Ensure that all elements' bits are either 0s or 1s.
+ ComparisonResult = DAG.getSExtOrTrunc(ComparisonResult, DL, VecVT);
+
+ SmallVector<SDValue, 16> MaskConstants;
+ if (VecVT == MVT::v16i8) {
+ // v16i8 is a special case, as we need to split it into two halves and
+ // combine, perform the mask+addition twice, and then combine them.
+ for (unsigned Half = 0; Half < 2; ++Half) {
+ for (unsigned MaskBit = 1; MaskBit <= 128; MaskBit *= 2) {
+ MaskConstants.push_back(DAG.getConstant(MaskBit, DL, MVT::i32));
+ }
+ }
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, MaskConstants);
+ SDValue RepresentativeBits =
+ DAG.getNode(ISD::AND, DL, VecVT, ComparisonResult, Mask);
+
+ EVT HalfVT = VecVT.getHalfNumVectorElementsVT(*DAG.getContext());
+ unsigned NumElementsInHalf = HalfVT.getVectorNumElements();
+
+ SDValue LowHalf =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, RepresentativeBits,
+ DAG.getConstant(0, DL, MVT::i64));
+ SDValue HighHalf =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, RepresentativeBits,
+ DAG.getConstant(NumElementsInHalf, DL, MVT::i64));
+
+ SDValue ReducedLowBits =
+ DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i16, LowHalf);
+ SDValue ReducedHighBits =
+ DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i16, HighHalf);
+
+ SDValue ShiftedHighBits =
+ DAG.getNode(ISD::SHL, DL, MVT::i16, ReducedHighBits,
+ DAG.getConstant(NumElementsInHalf, DL, MVT::i32));
+ return DAG.getNode(ISD::OR, DL, MVT::i16, ShiftedHighBits, ReducedLowBits);
+ }
+
+ // All other vector sizes.
+ unsigned MaxBitMask = 1u << (VecVT.getVectorNumElements() - 1);
+ for (unsigned MaskBit = 1; MaskBit <= MaxBitMask; MaskBit *= 2) {
+ MaskConstants.push_back(DAG.getConstant(MaskBit, DL, MVT::i64));
+ }
+
+ SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, MaskConstants);
+ SDValue RepresentativeBits =
+ DAG.getNode(ISD::AND, DL, VecVT, ComparisonResult, Mask);
+ EVT ResultVT = MVT::getIntegerVT(std::max<unsigned>(
+ NumElts, VecVT.getVectorElementType().getSizeInBits()));
+ return DAG.getNode(ISD::VECREDUCE_ADD, DL, ResultVT, RepresentativeBits);
+}
+
+static SDValue combineBoolVectorAndTruncateStore(SelectionDAG &DAG,
+ StoreSDNode *Store) {
+ if (!Store->isTruncatingStore())
+ return SDValue();
+
+ SDLoc DL(Store);
+ SDValue VecOp = Store->getValue();
+ EVT VT = VecOp.getValueType();
+ EVT MemVT = Store->getMemoryVT();
+
+ if (!MemVT.isVector() || !VT.isVector() ||
+ MemVT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ // If we are storing a vector that we are currently building, let
+ // `scalarizeVectorStore()` handle this more efficiently.
+ if (VecOp.getOpcode() == ISD::BUILD_VECTOR)
+ return SDValue();
+
+ VecOp = DAG.getNode(ISD::TRUNCATE, DL, MemVT, VecOp);
+ SDValue VectorBits = vectorToScalarBitmask(VecOp.getNode(), DAG);
+ if (!VectorBits)
+ return SDValue();
+
+ EVT StoreVT =
+ EVT::getIntegerVT(*DAG.getContext(), MemVT.getStoreSizeInBits());
+ SDValue ExtendedBits = DAG.getZExtOrTrunc(VectorBits, DL, StoreVT);
+ return DAG.getStore(Store->getChain(), DL, ExtendedBits, Store->getBasePtr(),
+ Store->getMemOperand());
+}
+
static SDValue performSTORECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG,
@@ -19354,6 +20742,9 @@ static SDValue performSTORECombine(SDNode *N,
if (SDValue Store = foldTruncStoreOfExt(DAG, N))
return Store;
+ if (SDValue Store = combineBoolVectorAndTruncateStore(DAG, ST))
+ return Store;
+
return SDValue();
}
@@ -20234,6 +21625,22 @@ static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG) {
Op0ExtV, Op1ExtV, Op->getOperand(2));
}
+static SDValue
+performVecReduceBitwiseCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ SDValue Vec = N->getOperand(0);
+ if (DCI.isBeforeLegalize() &&
+ Vec.getValueType().getVectorElementType() == MVT::i1 &&
+ Vec.getValueType().isFixedLengthVector() &&
+ Vec.getValueType().isPow2VectorType()) {
+ SDLoc DL(N);
+ return getVectorBitwiseReduce(N->getOpcode(), Vec, N->getValueType(0), DL,
+ DAG);
+ }
+
+ return SDValue();
+}
+
static SDValue performSETCCCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
@@ -20268,6 +21675,7 @@ static SDValue performSETCCCombine(SDNode *N,
// setcc (srl x, imm), 0, ne ==> setcc (and x, (-1 << imm)), 0, ne
if (Cond == ISD::SETNE && isNullConstant(RHS) &&
LHS->getOpcode() == ISD::SRL && isa<ConstantSDNode>(LHS->getOperand(1)) &&
+ LHS->getConstantOperandVal(1) < VT.getScalarSizeInBits() &&
LHS->hasOneUse()) {
EVT TstVT = LHS->getValueType(0);
if (TstVT.isScalarInteger() && TstVT.getFixedSizeInBits() <= 64) {
@@ -20281,15 +21689,21 @@ static SDValue performSETCCCombine(SDNode *N,
// setcc (iN (bitcast (vNi1 X))), 0, (eq|ne)
// ==> setcc (iN (zext (i1 (vecreduce_or (vNi1 X))))), 0, (eq|ne)
+ // setcc (iN (bitcast (vNi1 X))), -1, (eq|ne)
+ // ==> setcc (iN (sext (i1 (vecreduce_and (vNi1 X))))), -1, (eq|ne)
if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE) && isNullConstant(RHS) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ (isNullConstant(RHS) || isAllOnesConstant(RHS)) &&
LHS->getOpcode() == ISD::BITCAST) {
EVT ToVT = LHS->getValueType(0);
EVT FromVT = LHS->getOperand(0).getValueType();
if (FromVT.isFixedLengthVector() &&
FromVT.getVectorElementType() == MVT::i1) {
- LHS = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, LHS->getOperand(0));
- LHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ToVT, LHS);
+ bool IsNull = isNullConstant(RHS);
+ LHS = DAG.getNode(IsNull ? ISD::VECREDUCE_OR : ISD::VECREDUCE_AND,
+ DL, MVT::i1, LHS->getOperand(0));
+ LHS = DAG.getNode(IsNull ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND, DL, ToVT,
+ LHS);
return DAG.getSetCC(DL, VT, LHS, RHS, Cond);
}
}
@@ -20632,7 +22046,7 @@ static SDValue performSelectCombine(SDNode *N,
if (N0.getOpcode() != ISD::SETCC)
return SDValue();
- if (ResVT.isScalableVector())
+ if (ResVT.isScalableVT())
return SDValue();
// Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
@@ -20693,15 +22107,19 @@ static SDValue performDUPCombine(SDNode *N,
// 128bit vector version.
if (VT.is64BitVector() && DCI.isAfterLegalizeDAG()) {
EVT LVT = VT.getDoubleNumVectorElementsVT(*DCI.DAG.getContext());
- if (SDNode *LN = DCI.DAG.getNodeIfExists(
- N->getOpcode(), DCI.DAG.getVTList(LVT), {N->getOperand(0)})) {
+ SmallVector<SDValue> Ops(N->ops());
+ if (SDNode *LN = DCI.DAG.getNodeIfExists(N->getOpcode(),
+ DCI.DAG.getVTList(LVT), Ops)) {
SDLoc DL(N);
return DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SDValue(LN, 0),
DCI.DAG.getConstant(0, DL, MVT::i64));
}
}
- return performPostLD1Combine(N, DCI, false);
+ if (N->getOpcode() == AArch64ISD::DUP)
+ return performPostLD1Combine(N, DCI, false);
+
+ return SDValue();
}
/// Get rid of unnecessary NVCASTs (that don't change the type).
@@ -21388,6 +22806,152 @@ static SDValue performDupLane128Combine(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::BITCAST, DL, VT, NewDuplane128);
}
+// Try to combine mull with uzp1.
+static SDValue tryCombineMULLWithUZP1(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ SDValue ExtractHigh;
+ SDValue ExtractLow;
+ SDValue TruncHigh;
+ SDValue TruncLow;
+ SDLoc DL(N);
+
+ // Check the operands are trunc and extract_high.
+ if (isEssentiallyExtractHighSubvector(LHS) &&
+ RHS.getOpcode() == ISD::TRUNCATE) {
+ TruncHigh = RHS;
+ if (LHS.getOpcode() == ISD::BITCAST)
+ ExtractHigh = LHS.getOperand(0);
+ else
+ ExtractHigh = LHS;
+ } else if (isEssentiallyExtractHighSubvector(RHS) &&
+ LHS.getOpcode() == ISD::TRUNCATE) {
+ TruncHigh = LHS;
+ if (LHS.getOpcode() == ISD::BITCAST)
+ ExtractHigh = RHS.getOperand(0);
+ else
+ ExtractHigh = RHS;
+ } else
+ return SDValue();
+
+ // If the truncate's operand is BUILD_VECTOR with DUP, do not combine the op
+ // with uzp1.
+ // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll
+ SDValue TruncHighOp = TruncHigh.getOperand(0);
+ EVT TruncHighOpVT = TruncHighOp.getValueType();
+ if (TruncHighOp.getOpcode() == AArch64ISD::DUP ||
+ DAG.isSplatValue(TruncHighOp, false))
+ return SDValue();
+
+ // Check there is other extract_high with same source vector.
+ // For example,
+ //
+ // t18: v4i16 = extract_subvector t2, Constant:i64<0>
+ // t12: v4i16 = truncate t11
+ // t31: v4i32 = AArch64ISD::SMULL t18, t12
+ // t23: v4i16 = extract_subvector t2, Constant:i64<4>
+ // t16: v4i16 = truncate t15
+ // t30: v4i32 = AArch64ISD::SMULL t23, t1
+ //
+ // This dagcombine assumes the two extract_high uses same source vector in
+ // order to detect the pair of the mull. If they have different source vector,
+ // this code will not work.
+ bool HasFoundMULLow = true;
+ SDValue ExtractHighSrcVec = ExtractHigh.getOperand(0);
+ if (ExtractHighSrcVec->use_size() != 2)
+ HasFoundMULLow = false;
+
+ // Find ExtractLow.
+ for (SDNode *User : ExtractHighSrcVec.getNode()->uses()) {
+ if (User == ExtractHigh.getNode())
+ continue;
+
+ if (User->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ !isNullConstant(User->getOperand(1))) {
+ HasFoundMULLow = false;
+ break;
+ }
+
+ ExtractLow.setNode(User);
+ }
+
+ if (!ExtractLow || !ExtractLow->hasOneUse())
+ HasFoundMULLow = false;
+
+ // Check ExtractLow's user.
+ if (HasFoundMULLow) {
+ SDNode *ExtractLowUser = *ExtractLow.getNode()->use_begin();
+ if (ExtractLowUser->getOpcode() != N->getOpcode())
+ HasFoundMULLow = false;
+
+ if (ExtractLowUser->getOperand(0) == ExtractLow) {
+ if (ExtractLowUser->getOperand(1).getOpcode() == ISD::TRUNCATE)
+ TruncLow = ExtractLowUser->getOperand(1);
+ else
+ HasFoundMULLow = false;
+ } else {
+ if (ExtractLowUser->getOperand(0).getOpcode() == ISD::TRUNCATE)
+ TruncLow = ExtractLowUser->getOperand(0);
+ else
+ HasFoundMULLow = false;
+ }
+ }
+
+ // If the truncate's operand is BUILD_VECTOR with DUP, do not combine the op
+ // with uzp1.
+ // You can see the regressions on test/CodeGen/AArch64/aarch64-smull.ll
+ EVT TruncHighVT = TruncHigh.getValueType();
+ EVT UZP1VT = TruncHighVT.getDoubleNumVectorElementsVT(*DAG.getContext());
+ SDValue TruncLowOp =
+ HasFoundMULLow ? TruncLow.getOperand(0) : DAG.getUNDEF(UZP1VT);
+ EVT TruncLowOpVT = TruncLowOp.getValueType();
+ if (HasFoundMULLow && (TruncLowOp.getOpcode() == AArch64ISD::DUP ||
+ DAG.isSplatValue(TruncLowOp, false)))
+ return SDValue();
+
+ // Create uzp1, extract_high and extract_low.
+ if (TruncHighOpVT != UZP1VT)
+ TruncHighOp = DAG.getNode(ISD::BITCAST, DL, UZP1VT, TruncHighOp);
+ if (TruncLowOpVT != UZP1VT)
+ TruncLowOp = DAG.getNode(ISD::BITCAST, DL, UZP1VT, TruncLowOp);
+
+ SDValue UZP1 =
+ DAG.getNode(AArch64ISD::UZP1, DL, UZP1VT, TruncLowOp, TruncHighOp);
+ SDValue HighIdxCst =
+ DAG.getConstant(TruncHighVT.getVectorNumElements(), DL, MVT::i64);
+ SDValue NewTruncHigh =
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, TruncHighVT, UZP1, HighIdxCst);
+ DAG.ReplaceAllUsesWith(TruncHigh, NewTruncHigh);
+
+ if (HasFoundMULLow) {
+ EVT TruncLowVT = TruncLow.getValueType();
+ SDValue NewTruncLow = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, TruncLowVT,
+ UZP1, ExtractLow.getOperand(1));
+ DAG.ReplaceAllUsesWith(TruncLow, NewTruncLow);
+ }
+
+ return SDValue(N, 0);
+}
+
+static SDValue performMULLCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ if (SDValue Val =
+ tryCombineLongOpWithDup(Intrinsic::not_intrinsic, N, DCI, DAG))
+ return Val;
+
+ if (SDValue Val = tryCombineMULLWithUZP1(N, DCI, DAG))
+ return Val;
+
+ return SDValue();
+}
+
SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -21395,9 +22959,13 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
default:
LLVM_DEBUG(dbgs() << "Custom combining: skipping\n");
break;
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ return performVecReduceBitwiseCombine(N, DCI, DAG);
case ISD::ADD:
case ISD::SUB:
- return performAddSubCombine(N, DCI, DAG);
+ return performAddSubCombine(N, DCI);
case ISD::BUILD_VECTOR:
return performBuildVectorCombine(N, DCI, DAG);
case ISD::TRUNCATE:
@@ -21436,6 +23004,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
return performORCombine(N, DCI, Subtarget, *this);
case ISD::AND:
return performANDCombine(N, DCI);
+ case ISD::FADD:
+ return performFADDCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
return performIntrinsicCombine(N, DCI, Subtarget);
case ISD::ANY_EXTEND:
@@ -21477,6 +23047,10 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case AArch64ISD::CSEL:
return performCSELCombine(N, DCI, DAG);
case AArch64ISD::DUP:
+ case AArch64ISD::DUPLANE8:
+ case AArch64ISD::DUPLANE16:
+ case AArch64ISD::DUPLANE32:
+ case AArch64ISD::DUPLANE64:
return performDUPCombine(N, DCI);
case AArch64ISD::DUPLANE128:
return performDupLane128Combine(N, DAG);
@@ -21526,7 +23100,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case AArch64ISD::SMULL:
case AArch64ISD::UMULL:
case AArch64ISD::PMULL:
- return tryCombineLongOpWithDup(Intrinsic::not_intrinsic, N, DCI, DAG);
+ return performMULLCombine(N, DCI, DAG);
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
@@ -21722,7 +23296,7 @@ bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
bool HasRet = false;
for (SDNode *Node : Copy->uses()) {
- if (Node->getOpcode() != AArch64ISD::RET_FLAG)
+ if (Node->getOpcode() != AArch64ISD::RET_GLUE)
return false;
HasRet = true;
}
@@ -21742,9 +23316,10 @@ bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
-bool AArch64TargetLowering::getIndexedAddressParts(
- SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
- ISD::MemIndexedMode &AM, bool &IsInc, SelectionDAG &DAG) const {
+bool AArch64TargetLowering::getIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ SelectionDAG &DAG) const {
if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
return false;
@@ -21783,8 +23358,9 @@ bool AArch64TargetLowering::getIndexedAddressParts(
RHSC = -(uint64_t)RHSC;
if (!isInt<9>(RHSC))
return false;
- IsInc = (Op->getOpcode() == ISD::ADD);
- Offset = Op->getOperand(1);
+ // Always emit pre-inc/post-inc addressing mode. Use negated constant offset
+ // when dealing with subtraction.
+ Offset = DAG.getConstant(RHSC, SDLoc(N), RHS->getValueType(0));
return true;
}
return false;
@@ -21805,10 +23381,9 @@ bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
} else
return false;
- bool IsInc;
- if (!getIndexedAddressParts(N, Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
+ if (!getIndexedAddressParts(N, Ptr.getNode(), Base, Offset, DAG))
return false;
- AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
+ AM = ISD::PRE_INC;
return true;
}
@@ -21826,17 +23401,60 @@ bool AArch64TargetLowering::getPostIndexedAddressParts(
} else
return false;
- bool IsInc;
- if (!getIndexedAddressParts(N, Op, Base, Offset, AM, IsInc, DAG))
+ if (!getIndexedAddressParts(N, Op, Base, Offset, DAG))
return false;
// Post-indexing updates the base, so it's not a valid transform
// if that's not the same as the load's pointer.
if (Ptr != Base)
return false;
- AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
+ AM = ISD::POST_INC;
return true;
}
+static void replaceBoolVectorBitcast(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ [[maybe_unused]] EVT SrcVT = Op.getValueType();
+ assert(SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 &&
+ "Must be bool vector.");
+
+ // Special handling for Clang's __builtin_convertvector. For vectors with <8
+ // elements, it adds a vector concatenation with undef(s). If we encounter
+ // this here, we can skip the concat.
+ if (Op.getOpcode() == ISD::CONCAT_VECTORS && !Op.getOperand(0).isUndef()) {
+ bool AllUndef = true;
+ for (unsigned I = 1; I < Op.getNumOperands(); ++I)
+ AllUndef &= Op.getOperand(I).isUndef();
+
+ if (AllUndef)
+ Op = Op.getOperand(0);
+ }
+
+ SDValue VectorBits = vectorToScalarBitmask(Op.getNode(), DAG);
+ if (VectorBits)
+ Results.push_back(DAG.getZExtOrTrunc(VectorBits, DL, VT));
+}
+
+static void CustomNonLegalBITCASTResults(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG, EVT ExtendVT,
+ EVT CastVT) {
+ SDLoc DL(N);
+ SDValue Op = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Use SCALAR_TO_VECTOR for lane zero
+ SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtendVT, Op);
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, DL, CastVT, Vec);
+ SDValue IdxZero = DAG.getVectorIdxConstant(0, DL);
+ Results.push_back(
+ DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, CastVal, IdxZero));
+ return;
+}
+
void AArch64TargetLowering::ReplaceBITCASTResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDLoc DL(N);
@@ -21844,6 +23462,21 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
EVT VT = N->getValueType(0);
EVT SrcVT = Op.getValueType();
+ if (VT == MVT::v2i16 && SrcVT == MVT::i32) {
+ CustomNonLegalBITCASTResults(N, Results, DAG, MVT::v2i32, MVT::v4i16);
+ return;
+ }
+
+ if (VT == MVT::v4i8 && SrcVT == MVT::i32) {
+ CustomNonLegalBITCASTResults(N, Results, DAG, MVT::v2i32, MVT::v8i8);
+ return;
+ }
+
+ if (VT == MVT::v2i8 && SrcVT == MVT::i16) {
+ CustomNonLegalBITCASTResults(N, Results, DAG, MVT::v4i16, MVT::v8i8);
+ return;
+ }
+
if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) {
assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
"Expected fp->int bitcast!");
@@ -21861,14 +23494,14 @@ void AArch64TargetLowering::ReplaceBITCASTResults(
return;
}
+ if (SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1)
+ return replaceBoolVectorBitcast(N, Results, DAG);
+
if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
return;
- Op = SDValue(
- DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
- DAG.getUNDEF(MVT::i32), Op,
- DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
- 0);
+ Op = DAG.getTargetInsertSubreg(AArch64::hsub, DL, MVT::f32,
+ DAG.getUNDEF(MVT::i32), Op);
Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
}
@@ -21934,15 +23567,6 @@ static void ReplaceReductionResults(SDNode *N,
Results.push_back(SplitVal);
}
-static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
- SDLoc DL(N);
- SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N);
- SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
- DAG.getNode(ISD::SRL, DL, MVT::i128, N,
- DAG.getConstant(64, DL, MVT::i64)));
- return std::make_pair(Lo, Hi);
-}
-
void AArch64TargetLowering::ReplaceExtractSubVectorResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDValue In = N->getOperand(0);
@@ -22068,8 +23692,9 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
llvm_unreachable("Unexpected ordering!");
}
- auto Desired = splitInt128(N->getOperand(2), DAG);
- auto New = splitInt128(N->getOperand(3), DAG);
+ SDLoc DL(N);
+ auto Desired = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64);
+ auto New = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64);
SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
New.first, New.second, N->getOperand(0)};
SDNode *CmpSwap = DAG.getMachineNode(
@@ -22077,11 +23702,143 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
Ops);
DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
- Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128,
SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
Results.push_back(SDValue(CmpSwap, 3));
}
+static unsigned getAtomicLoad128Opcode(unsigned ISDOpcode,
+ AtomicOrdering Ordering) {
+ // ATOMIC_LOAD_CLR only appears when lowering ATOMIC_LOAD_AND (see
+ // LowerATOMIC_LOAD_AND). We can't take that approach with 128-bit, because
+ // the type is not legal. Therefore we shouldn't expect to see a 128-bit
+ // ATOMIC_LOAD_CLR at any point.
+ assert(ISDOpcode != ISD::ATOMIC_LOAD_CLR &&
+ "ATOMIC_LOAD_AND should be lowered to LDCLRP directly");
+ assert(ISDOpcode != ISD::ATOMIC_LOAD_ADD && "There is no 128 bit LDADD");
+ assert(ISDOpcode != ISD::ATOMIC_LOAD_SUB && "There is no 128 bit LDSUB");
+
+ if (ISDOpcode == ISD::ATOMIC_LOAD_AND) {
+ // The operand will need to be XORed in a separate step.
+ switch (Ordering) {
+ case AtomicOrdering::Monotonic:
+ return AArch64::LDCLRP;
+ break;
+ case AtomicOrdering::Acquire:
+ return AArch64::LDCLRPA;
+ break;
+ case AtomicOrdering::Release:
+ return AArch64::LDCLRPL;
+ break;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ return AArch64::LDCLRPAL;
+ break;
+ default:
+ llvm_unreachable("Unexpected ordering!");
+ }
+ }
+
+ if (ISDOpcode == ISD::ATOMIC_LOAD_OR) {
+ switch (Ordering) {
+ case AtomicOrdering::Monotonic:
+ return AArch64::LDSETP;
+ break;
+ case AtomicOrdering::Acquire:
+ return AArch64::LDSETPA;
+ break;
+ case AtomicOrdering::Release:
+ return AArch64::LDSETPL;
+ break;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ return AArch64::LDSETPAL;
+ break;
+ default:
+ llvm_unreachable("Unexpected ordering!");
+ }
+ }
+
+ if (ISDOpcode == ISD::ATOMIC_SWAP) {
+ switch (Ordering) {
+ case AtomicOrdering::Monotonic:
+ return AArch64::SWPP;
+ break;
+ case AtomicOrdering::Acquire:
+ return AArch64::SWPPA;
+ break;
+ case AtomicOrdering::Release:
+ return AArch64::SWPPL;
+ break;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ return AArch64::SWPPAL;
+ break;
+ default:
+ llvm_unreachable("Unexpected ordering!");
+ }
+ }
+
+ llvm_unreachable("Unexpected ISDOpcode!");
+}
+
+static void ReplaceATOMIC_LOAD_128Results(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ // LSE128 has a 128-bit RMW ops, but i128 is not a legal type, so lower it
+ // here. This follows the approach of the CMP_SWAP_XXX pseudo instructions
+ // rather than the CASP instructions, because CASP has register classes for
+ // the pairs of registers and therefore uses REG_SEQUENCE and EXTRACT_SUBREG
+ // to present them as single operands. LSE128 instructions use the GPR64
+ // register class (because the pair does not have to be sequential), like
+ // CMP_SWAP_XXX, and therefore we use TRUNCATE and BUILD_PAIR.
+
+ assert(N->getValueType(0) == MVT::i128 &&
+ "AtomicLoadXXX on types less than 128 should be legal");
+
+ if (!Subtarget->hasLSE128())
+ return;
+
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ const SDValue &Chain = N->getOperand(0);
+ const SDValue &Ptr = N->getOperand(1);
+ const SDValue &Val128 = N->getOperand(2);
+ std::pair<SDValue, SDValue> Val2x64 =
+ DAG.SplitScalar(Val128, SDLoc(Val128), MVT::i64, MVT::i64);
+
+ const unsigned ISDOpcode = N->getOpcode();
+ const unsigned MachineOpcode =
+ getAtomicLoad128Opcode(ISDOpcode, MemOp->getMergedOrdering());
+
+ if (ISDOpcode == ISD::ATOMIC_LOAD_AND) {
+ SDLoc dl(Val128);
+ Val2x64.first =
+ DAG.getNode(ISD::XOR, dl, MVT::i64,
+ DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.first);
+ Val2x64.second =
+ DAG.getNode(ISD::XOR, dl, MVT::i64,
+ DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.second);
+ }
+
+ SDValue Ops[] = {Val2x64.first, Val2x64.second, Ptr, Chain};
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Ops[0], Ops[1]);
+
+ MachineSDNode *AtomicInst =
+ DAG.getMachineNode(MachineOpcode, SDLoc(N),
+ DAG.getVTList(MVT::i64, MVT::i64, MVT::Other), Ops);
+
+ DAG.setNodeMemRefs(AtomicInst, {MemOp});
+
+ SDValue Lo = SDValue(AtomicInst, 0), Hi = SDValue(AtomicInst, 1);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi));
+ Results.push_back(SDValue(AtomicInst, 2)); // Chain out
+}
+
void AArch64TargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
switch (N->getOpcode()) {
@@ -22125,6 +23882,16 @@ void AArch64TargetLowering::ReplaceNodeResults(
case AArch64ISD::UMAXV:
ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV);
return;
+ case ISD::MULHS:
+ if (useSVEForFixedLengthVectorVT(SDValue(N, 0).getValueType()))
+ Results.push_back(
+ LowerToPredicatedOp(SDValue(N, 0), DAG, AArch64ISD::MULHS_PRED));
+ return;
+ case ISD::MULHU:
+ if (useSVEForFixedLengthVectorVT(SDValue(N, 0).getValueType()))
+ Results.push_back(
+ LowerToPredicatedOp(SDValue(N, 0), DAG, AArch64ISD::MULHU_PRED));
+ return;
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::STRICT_FP_TO_SINT:
@@ -22135,6 +23902,19 @@ void AArch64TargetLowering::ReplaceNodeResults(
case ISD::ATOMIC_CMP_SWAP:
ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
return;
+ case ISD::ATOMIC_LOAD_CLR:
+ assert(N->getValueType(0) != MVT::i128 &&
+ "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP");
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_SWAP: {
+ assert(cast<AtomicSDNode>(N)->getVal().getValueType() == MVT::i128 &&
+ "Expected 128-bit atomicrmw.");
+ // These need custom type legalisation so we go directly to instruction.
+ ReplaceATOMIC_LOAD_128Results(N, Results, DAG, Subtarget);
+ return;
+ }
case ISD::ATOMIC_LOAD:
case ISD::LOAD: {
MemSDNode *LoadNode = cast<MemSDNode>(N);
@@ -22170,9 +23950,16 @@ void AArch64TargetLowering::ReplaceNodeResults(
}
if (SDValue(N, 0).getValueType() == MVT::i128) {
+ auto *AN = dyn_cast<AtomicSDNode>(LoadNode);
+ bool isLoadAcquire =
+ AN && AN->getSuccessOrdering() == AtomicOrdering::Acquire;
+ unsigned Opcode = isLoadAcquire ? AArch64ISD::LDIAPP : AArch64ISD::LDP;
+
+ if (isLoadAcquire)
+ assert(Subtarget->hasFeature(AArch64::FeatureRCPC3));
+
SDValue Result = DAG.getMemIntrinsicNode(
- AArch64ISD::LDP, SDLoc(N),
- DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
+ Opcode, SDLoc(N), DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
{LoadNode->getChain(), LoadNode->getBasePtr()},
LoadNode->getMemoryVT(), LoadNode->getMemOperand());
@@ -22295,9 +24082,54 @@ bool AArch64TargetLowering::isOpSuitableForLDPSTP(const Instruction *I) const {
return false;
}
+bool AArch64TargetLowering::isOpSuitableForLSE128(const Instruction *I) const {
+ if (!Subtarget->hasLSE128())
+ return false;
+
+ // Only use SWPP for stores where LSE2 would require a fence. Unlike STP, SWPP
+ // will clobber the two registers.
+ if (const auto *SI = dyn_cast<StoreInst>(I))
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
+ SI->getAlign() >= Align(16) &&
+ (SI->getOrdering() == AtomicOrdering::Release ||
+ SI->getOrdering() == AtomicOrdering::SequentiallyConsistent);
+
+ if (const auto *RMW = dyn_cast<AtomicRMWInst>(I))
+ return RMW->getValOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
+ RMW->getAlign() >= Align(16) &&
+ (RMW->getOperation() == AtomicRMWInst::Xchg ||
+ RMW->getOperation() == AtomicRMWInst::And ||
+ RMW->getOperation() == AtomicRMWInst::Or);
+
+ return false;
+}
+
+bool AArch64TargetLowering::isOpSuitableForRCPC3(const Instruction *I) const {
+ if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3())
+ return false;
+
+ if (auto LI = dyn_cast<LoadInst>(I))
+ return LI->getType()->getPrimitiveSizeInBits() == 128 &&
+ LI->getAlign() >= Align(16) &&
+ LI->getOrdering() == AtomicOrdering::Acquire;
+
+ if (auto SI = dyn_cast<StoreInst>(I))
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
+ SI->getAlign() >= Align(16) &&
+ SI->getOrdering() == AtomicOrdering::Release;
+
+ return false;
+}
+
bool AArch64TargetLowering::shouldInsertFencesForAtomic(
const Instruction *I) const {
- return isOpSuitableForLDPSTP(I);
+ if (isOpSuitableForRCPC3(I))
+ return false;
+ if (isOpSuitableForLSE128(I))
+ return false;
+ if (isOpSuitableForLDPSTP(I))
+ return true;
+ return false;
}
bool AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore(
@@ -22330,7 +24162,13 @@ bool AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore(
TargetLoweringBase::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
- if (Size != 128 || isOpSuitableForLDPSTP(SI))
+ if (Size != 128)
+ return AtomicExpansionKind::None;
+ if (isOpSuitableForRCPC3(SI))
+ return AtomicExpansionKind::None;
+ if (isOpSuitableForLSE128(SI))
+ return AtomicExpansionKind::Expand;
+ if (isOpSuitableForLDPSTP(SI))
return AtomicExpansionKind::None;
return AtomicExpansionKind::Expand;
}
@@ -22342,7 +24180,12 @@ TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
- if (Size != 128 || isOpSuitableForLDPSTP(LI))
+ if (Size != 128)
+ return AtomicExpansionKind::None;
+ if (isOpSuitableForRCPC3(LI))
+ return AtomicExpansionKind::None;
+ // No LSE128 loads
+ if (isOpSuitableForLDPSTP(LI))
return AtomicExpansionKind::None;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
@@ -22368,6 +24211,13 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size > 128) return AtomicExpansionKind::None;
+ bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
+ (AI->getOperation() == AtomicRMWInst::Xchg ||
+ AI->getOperation() == AtomicRMWInst::Or ||
+ AI->getOperation() == AtomicRMWInst::And);
+ if (CanUseLSE128)
+ return AtomicExpansionKind::None;
+
// Nand is not supported in LSE.
// Leave 128 bits to LLSC or CmpXChg.
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
@@ -22713,6 +24563,33 @@ bool AArch64TargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
return TargetLowering::shouldConvertFpToSat(Op, FPVT, VT);
}
+MachineInstr *
+AArch64TargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator &MBBI,
+ const TargetInstrInfo *TII) const {
+ assert(MBBI->isCall() && MBBI->getCFIType() &&
+ "Invalid call instruction for a KCFI check");
+
+ switch (MBBI->getOpcode()) {
+ case AArch64::BLR:
+ case AArch64::BLRNoIP:
+ case AArch64::TCRETURNri:
+ case AArch64::TCRETURNriBTI:
+ break;
+ default:
+ llvm_unreachable("Unexpected CFI call opcode");
+ }
+
+ MachineOperand &Target = MBBI->getOperand(0);
+ assert(Target.isReg() && "Invalid target operand for an indirect call");
+ Target.setIsRenamable(false);
+
+ return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(AArch64::KCFI_CHECK))
+ .addReg(Target.getReg())
+ .addImm(MBBI->getCFIType())
+ .getInstr();
+}
+
bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
}
@@ -22778,7 +24655,7 @@ bool AArch64TargetLowering::shouldLocalize(
const GlobalValue &GV = *MI.getOperand(1).getGlobal();
if (GV.isThreadLocal() && Subtarget->isTargetMachO())
return false;
- break;
+ return true; // Always localize G_GLOBAL_VALUE to avoid high reg pressure.
}
case TargetOpcode::G_CONSTANT: {
auto *CI = MI.getOperand(1).getCImm();
@@ -22799,6 +24676,8 @@ bool AArch64TargetLowering::shouldLocalize(
// localizable.
case AArch64::ADRP:
case AArch64::G_ADD_LOW:
+ // Need to localize G_PTR_ADD so that G_GLOBAL_VALUE can be localized too.
+ case TargetOpcode::G_PTR_ADD:
return true;
default:
break;
@@ -22807,15 +24686,15 @@ bool AArch64TargetLowering::shouldLocalize(
}
bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
- if (isa<ScalableVectorType>(Inst.getType()))
+ if (Inst.getType()->isScalableTy())
return true;
for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
- if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
+ if (Inst.getOperand(i)->getType()->isScalableTy())
return true;
if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
- if (isa<ScalableVectorType>(AI->getAllocatedType()))
+ if (AI->getAllocatedType()->isScalableTy())
return true;
}
@@ -23642,6 +25521,34 @@ AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
}
SDValue
+AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT OpVT = Op.getValueType();
+ assert(OpVT.isScalableVector() &&
+ "Expected scalable vector in LowerVECTOR_DEINTERLEAVE.");
+ SDValue Even = DAG.getNode(AArch64ISD::UZP1, DL, OpVT, Op.getOperand(0),
+ Op.getOperand(1));
+ SDValue Odd = DAG.getNode(AArch64ISD::UZP2, DL, OpVT, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getMergeValues({Even, Odd}, DL);
+}
+
+SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT OpVT = Op.getValueType();
+ assert(OpVT.isScalableVector() &&
+ "Expected scalable vector in LowerVECTOR_INTERLEAVE.");
+
+ SDValue Lo = DAG.getNode(AArch64ISD::ZIP1, DL, OpVT, Op.getOperand(0),
+ Op.getOperand(1));
+ SDValue Hi = DAG.getNode(AArch64ISD::ZIP2, DL, OpVT, Op.getOperand(0),
+ Op.getOperand(1));
+ return DAG.getMergeValues({Lo, Hi}, DL);
+}
+
+SDValue
AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -23959,81 +25866,125 @@ bool AArch64TargetLowering::isConstantUnsignedBitfieldExtractLegal(
}
bool AArch64TargetLowering::isComplexDeinterleavingSupported() const {
- return Subtarget->hasComplxNum();
+ return Subtarget->hasSVE() || Subtarget->hasSVE2() ||
+ Subtarget->hasComplxNum();
}
bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported(
ComplexDeinterleavingOperation Operation, Type *Ty) const {
- auto *VTy = dyn_cast<FixedVectorType>(Ty);
+ auto *VTy = dyn_cast<VectorType>(Ty);
if (!VTy)
return false;
+ // If the vector is scalable, SVE is enabled, implying support for complex
+ // numbers. Otherwirse, we need to ensure complex number support is avaialble
+ if (!VTy->isScalableTy() && !Subtarget->hasComplxNum())
+ return false;
+
auto *ScalarTy = VTy->getScalarType();
- unsigned NumElements = VTy->getNumElements();
+ unsigned NumElements = VTy->getElementCount().getKnownMinValue();
+ // We can only process vectors that have a bit size of 128 or higher (with an
+ // additional 64 bits for Neon). Additionally, these vectors must have a
+ // power-of-2 size, as we later split them into the smallest supported size
+ // and merging them back together after applying complex operation.
unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
- if ((VTyWidth < 128 && VTyWidth != 64) || !llvm::isPowerOf2_32(VTyWidth))
+ if ((VTyWidth < 128 && (VTy->isScalableTy() || VTyWidth != 64)) ||
+ !llvm::isPowerOf2_32(VTyWidth))
return false;
+ if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2()) {
+ unsigned ScalarWidth = ScalarTy->getScalarSizeInBits();
+ return 8 <= ScalarWidth && ScalarWidth <= 64;
+ }
+
return (ScalarTy->isHalfTy() && Subtarget->hasFullFP16()) ||
ScalarTy->isFloatTy() || ScalarTy->isDoubleTy();
}
Value *AArch64TargetLowering::createComplexDeinterleavingIR(
- Instruction *I, ComplexDeinterleavingOperation OperationType,
+ IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
Value *Accumulator) const {
- FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
+ VectorType *Ty = cast<VectorType>(InputA->getType());
+ bool IsScalable = Ty->isScalableTy();
+ bool IsInt = Ty->getElementType()->isIntegerTy();
- IRBuilder<> B(I);
-
- unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
+ unsigned TyWidth =
+ Ty->getScalarSizeInBits() * Ty->getElementCount().getKnownMinValue();
assert(((TyWidth >= 128 && llvm::isPowerOf2_32(TyWidth)) || TyWidth == 64) &&
"Vector type must be either 64 or a power of 2 that is at least 128");
if (TyWidth > 128) {
- int Stride = Ty->getNumElements() / 2;
- auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
- auto SplitSeqVec = llvm::to_vector(SplitSeq);
- ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
- ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
-
- auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
- auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
- auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
- auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
+ int Stride = Ty->getElementCount().getKnownMinValue() / 2;
+ auto *HalfTy = VectorType::getHalfElementsVectorType(Ty);
+ auto *LowerSplitA = B.CreateExtractVector(HalfTy, InputA, B.getInt64(0));
+ auto *LowerSplitB = B.CreateExtractVector(HalfTy, InputB, B.getInt64(0));
+ auto *UpperSplitA =
+ B.CreateExtractVector(HalfTy, InputA, B.getInt64(Stride));
+ auto *UpperSplitB =
+ B.CreateExtractVector(HalfTy, InputB, B.getInt64(Stride));
Value *LowerSplitAcc = nullptr;
Value *UpperSplitAcc = nullptr;
-
if (Accumulator) {
- LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
- UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
+ LowerSplitAcc = B.CreateExtractVector(HalfTy, Accumulator, B.getInt64(0));
+ UpperSplitAcc =
+ B.CreateExtractVector(HalfTy, Accumulator, B.getInt64(Stride));
}
-
auto *LowerSplitInt = createComplexDeinterleavingIR(
- I, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
+ B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
auto *UpperSplitInt = createComplexDeinterleavingIR(
- I, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
+ B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
- ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
- return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
+ auto *Result = B.CreateInsertVector(Ty, PoisonValue::get(Ty), LowerSplitInt,
+ B.getInt64(0));
+ return B.CreateInsertVector(Ty, Result, UpperSplitInt, B.getInt64(Stride));
}
if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
+ if (Accumulator == nullptr)
+ Accumulator = Constant::getNullValue(Ty);
+
+ if (IsScalable) {
+ if (IsInt)
+ return B.CreateIntrinsic(
+ Intrinsic::aarch64_sve_cmla_x, Ty,
+ {Accumulator, InputA, InputB, B.getInt32((int)Rotation * 90)});
+
+ auto *Mask = B.getAllOnesMask(Ty->getElementCount());
+ return B.CreateIntrinsic(
+ Intrinsic::aarch64_sve_fcmla, Ty,
+ {Mask, Accumulator, InputA, InputB, B.getInt32((int)Rotation * 90)});
+ }
+
Intrinsic::ID IdMap[4] = {Intrinsic::aarch64_neon_vcmla_rot0,
Intrinsic::aarch64_neon_vcmla_rot90,
Intrinsic::aarch64_neon_vcmla_rot180,
Intrinsic::aarch64_neon_vcmla_rot270};
- if (Accumulator == nullptr)
- Accumulator = ConstantFP::get(Ty, 0);
return B.CreateIntrinsic(IdMap[(int)Rotation], Ty,
{Accumulator, InputB, InputA});
}
if (OperationType == ComplexDeinterleavingOperation::CAdd) {
+ if (IsScalable) {
+ if (Rotation == ComplexDeinterleavingRotation::Rotation_90 ||
+ Rotation == ComplexDeinterleavingRotation::Rotation_270) {
+ if (IsInt)
+ return B.CreateIntrinsic(
+ Intrinsic::aarch64_sve_cadd_x, Ty,
+ {InputA, InputB, B.getInt32((int)Rotation * 90)});
+
+ auto *Mask = B.getAllOnesMask(Ty->getElementCount());
+ return B.CreateIntrinsic(
+ Intrinsic::aarch64_sve_fcadd, Ty,
+ {Mask, InputA, InputB, B.getInt32((int)Rotation * 90)});
+ }
+ return nullptr;
+ }
+
Intrinsic::ID IntId = Intrinsic::not_intrinsic;
if (Rotation == ComplexDeinterleavingRotation::Rotation_90)
IntId = Intrinsic::aarch64_neon_vcadd_rot90;
@@ -24048,3 +25999,13 @@ Value *AArch64TargetLowering::createComplexDeinterleavingIR(
return nullptr;
}
+
+bool AArch64TargetLowering::preferScalarizeSplat(SDNode *N) const {
+ unsigned Opc = N->getOpcode();
+ if (ISD::isExtOpcode(Opc)) {
+ if (any_of(N->uses(),
+ [&](SDNode *Use) { return Use->getOpcode() == ISD::MUL; }))
+ return false;
+ }
+ return true;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 0edec721ed87..aca45f113e73 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -77,7 +77,7 @@ enum NodeType : unsigned {
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
LOADgot, // Load from automatically generated descriptor (e.g. Global
// Offset Table, TLS record).
- RET_FLAG, // Return with a flag operand. Operand 0 is the chain operand.
+ RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
BRCOND, // Conditional branch instruction; "b.cond".
CSEL,
CSINV, // Conditional select invert.
@@ -475,8 +475,10 @@ enum NodeType : unsigned {
STZ2G,
LDP,
+ LDIAPP,
LDNP,
STP,
+ STILP,
STNP,
// Memory Operations
@@ -501,6 +503,11 @@ enum Rounding {
// Bit position of rounding mode bits in FPCR.
const unsigned RoundingBitsPos = 22;
+
+// Registers used to pass function arguments.
+const ArrayRef<MCPhysReg> getGPRArgRegs();
+const ArrayRef<MCPhysReg> getFPRArgRegs();
+
} // namespace AArch64
class AArch64Subtarget;
@@ -528,6 +535,11 @@ public:
const SelectionDAG &DAG,
unsigned Depth = 0) const override;
+ unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const override;
+
MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
// Returning i64 unconditionally here (i.e. even for ILP32) means that the
// *DAG* representation of pointers will always be 64-bits. They will be
@@ -611,6 +623,8 @@ public:
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
EVT NewVT) const override;
+ bool shouldRemoveRedundantExtend(SDValue Op) const override;
+
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
bool isTruncateFree(EVT VT1, EVT VT2) const override;
@@ -623,8 +637,8 @@ public:
bool shouldSinkOperands(Instruction *I,
SmallVectorImpl<Use *> &Ops) const override;
- bool optimizeExtendOrTruncateConversion(Instruction *I,
- Loop *L) const override;
+ bool optimizeExtendOrTruncateConversion(
+ Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override;
bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
@@ -637,6 +651,12 @@ public:
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
unsigned Factor) const override;
+ bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
+ LoadInst *LI) const override;
+
+ bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
+ StoreInst *SI) const override;
+
bool isLegalAddImmediate(int64_t) const override;
bool isLegalICmpImmediate(int64_t) const override;
@@ -668,6 +688,7 @@ public:
CodeGenOpt::Level OptLevel) const override;
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
+ ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
bool isDesirableToCommuteWithShift(const SDNode *N,
@@ -680,6 +701,9 @@ public:
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
+ bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
+ EVT VT) const override;
+
/// Returns true if it is beneficial to convert a load of a constant
/// to just the constant itself.
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
@@ -705,6 +729,8 @@ public:
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
bool isOpSuitableForLDPSTP(const Instruction *I) const;
+ bool isOpSuitableForLSE128(const Instruction *I) const;
+ bool isOpSuitableForRCPC3(const Instruction *I) const;
bool shouldInsertFencesForAtomic(const Instruction *I) const override;
bool
shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
@@ -826,15 +852,10 @@ public:
ComplexDeinterleavingOperation Operation, Type *Ty) const override;
Value *createComplexDeinterleavingIR(
- Instruction *I, ComplexDeinterleavingOperation OperationType,
+ IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
Value *Accumulator = nullptr) const override;
- bool hasBitPreservingFPLogic(EVT VT) const override {
- // FIXME: Is this always true? It should be true for vectors at least.
- return VT == MVT::f32 || VT == MVT::f64;
- }
-
bool supportSplitCSR(MachineFunction *MF) const override {
return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
@@ -850,6 +871,10 @@ public:
bool supportKCFIBundles() const override { return true; }
+ MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator &MBBI,
+ const TargetInstrInfo *TII) const override;
+
/// Enable aggressive FMA fusion on targets that want it.
bool enableAggressiveFMAFusion(EVT VT) const override;
@@ -907,10 +932,10 @@ public:
/// \p Entry tells whether this is before/after the Call, which is necessary
/// because PSTATE.SM is only queried once.
SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
- SDValue Chain, SDValue InFlag,
+ SDValue Chain, SDValue InGlue,
SDValue PStateSM, bool Entry) const;
- bool isVScaleKnownToBeAPowerOfTwo() const override;
+ bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
// Normally SVE is only used for byte size vectors that do not fit within a
// NEON vector. This changes when OverrideNEON is true, allowing SVE to be
@@ -925,8 +950,7 @@ private:
bool isExtFreeImpl(const Instruction *Ext) const override;
void addTypeForNEON(MVT VT);
- void addTypeForStreamingSVE(MVT VT);
- void addTypeForFixedLengthSVE(MVT VT);
+ void addTypeForFixedLengthSVE(MVT VT, bool StreamingSVE);
void addDRTypeForNEON(MVT VT);
void addQRTypeForNEON(MVT VT);
@@ -942,7 +966,7 @@ private:
SDValue LowerCall(CallLoweringInfo & /*CLI*/,
SmallVectorImpl<SDValue> &InVals) const override;
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<CCValAssign> &RVLocs,
const SDLoc &DL, SelectionDAG &DAG,
@@ -1038,7 +1062,6 @@ private:
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
@@ -1050,6 +1073,8 @@ private:
SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
@@ -1082,6 +1107,7 @@ private:
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
SDValue &Size,
SelectionDAG &DAG) const;
+ SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
SelectionDAG &DAG) const;
@@ -1156,14 +1182,19 @@ private:
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
}
+ /// Handle Lowering flag assembly outputs.
+ SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
+ const SDLoc &DL,
+ const AsmOperandInfo &Constraint,
+ SelectionDAG &DAG) const override;
+
bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
- SDValue &Offset, ISD::MemIndexedMode &AM,
- bool &IsInc, SelectionDAG &DAG) const;
+ SDValue &Offset, SelectionDAG &DAG) const;
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
ISD::MemIndexedMode &AM,
SelectionDAG &DAG) const override;
@@ -1214,6 +1245,8 @@ private:
bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
LLT Ty2) const override;
+
+ bool preferScalarizeSplat(SDNode *N) const override;
};
namespace AArch64 {
diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 64629eec2289..1427886d71c0 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -542,3 +542,34 @@ let Predicates = [HasLSE] in {
defm : LDOPregister_patterns_mod<"LDADD", "atomic_load_sub", "SUB">;
defm : LDOPregister_patterns_mod<"LDCLR", "atomic_load_and", "ORN">;
}
+
+// v8.9a/v9.4a FEAT_LRCPC patterns
+let Predicates = [HasRCPC3, HasNEON] in {
+ // LDAP1 loads
+ def : Pat<(vector_insert (v2i64 VecListOne128:$Rd),
+ (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)), VectorIndexD:$idx),
+ (LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>;
+ def : Pat<(vector_insert (v2f64 VecListOne128:$Rd),
+ (f64 (bitconvert (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)))), VectorIndexD:$idx),
+ (LDAP1 VecListOne128:$Rd, VectorIndexD:$idx, GPR64sp:$Rn)>;
+ def : Pat<(v1i64 (scalar_to_vector
+ (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)))),
+ (EXTRACT_SUBREG (LDAP1 (v2i64 (IMPLICIT_DEF)), (i64 0), GPR64sp:$Rn), dsub)>;
+ def : Pat<(v1f64 (scalar_to_vector
+ (f64 (bitconvert (i64 (acquiring_load<atomic_load_64> GPR64sp:$Rn)))))),
+ (EXTRACT_SUBREG (LDAP1 (v2f64 (IMPLICIT_DEF)), (i64 0), GPR64sp:$Rn), dsub)>;
+
+ // STL1 stores
+ def : Pat<(releasing_store<atomic_store_64> GPR64sp:$Rn,
+ (i64 (vector_extract (v2i64 VecListOne128:$Vt), VectorIndexD:$idx))),
+ (STL1 VecListOne128:$Vt, VectorIndexD:$idx, GPR64sp:$Rn)>;
+ def : Pat<(releasing_store<atomic_store_64> GPR64sp:$Rn,
+ (i64 (bitconvert (f64 (vector_extract (v2f64 VecListOne128:$Vt), VectorIndexD:$idx))))),
+ (STL1 VecListOne128:$Vt, VectorIndexD:$idx, GPR64sp:$Rn)>;
+ // The v1i64 version of the vldap1_lane_* intrinsic is represented as a
+ // vector_insert -> vector_extract -> atomic store sequence, which is captured
+ // by the patterns above. We only need to cover the v1f64 case manually.
+ def : Pat<(releasing_store<atomic_store_64> GPR64sp:$Rn,
+ (i64 (bitconvert (v1f64 VecListOne64:$Vt)))),
+ (STL1 (SUBREG_TO_REG (i64 0), VecListOne64:$Vt, dsub), (i64 0), GPR64sp:$Rn)>;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 91179aa8046e..cd2b9df27a24 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -133,11 +133,40 @@ def extract_high_v4i32 :
def extract_high_v2i64 :
ComplexPattern<v1i64, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+def extract_high_v8f16 :
+ ComplexPattern<v4f16, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+def extract_high_v4f32 :
+ ComplexPattern<v2f32, 1, "SelectExtractHigh", [extract_subvector, bitconvert]>;
+
+def gi_extract_high_v8f16 :
+ GIComplexOperandMatcher<v4s16, "selectExtractHigh">,
+ GIComplexPatternEquiv<extract_high_v8f16>;
+def gi_extract_high_v4f32 :
+ GIComplexOperandMatcher<v2s32, "selectExtractHigh">,
+ GIComplexPatternEquiv<extract_high_v4f32>;
+
def extract_high_dup_v8i16 :
BinOpFrag<(extract_subvector (v8i16 (AArch64duplane16 (v8i16 node:$LHS), node:$RHS)), (i64 4))>;
def extract_high_dup_v4i32 :
BinOpFrag<(extract_subvector (v4i32 (AArch64duplane32 (v4i32 node:$LHS), node:$RHS)), (i64 2))>;
+def dup_v8i16 :
+ PatFrags<(ops node:$LHS, node:$RHS),
+ [(v4i16 (extract_subvector (v8i16 (AArch64duplane16 (v8i16 node:$LHS), node:$RHS)), (i64 0))),
+ (v4i16 (AArch64duplane16 (v8i16 node:$LHS), node:$RHS))]>;
+def dup_v4i32 :
+ PatFrags<(ops node:$LHS, node:$RHS),
+ [(v2i32 (extract_subvector (v4i32 (AArch64duplane32 (v4i32 node:$LHS), node:$RHS)), (i64 0))),
+ (v2i32 (AArch64duplane32 (v4i32 node:$LHS), node:$RHS))]>;
+def dup_v8f16 :
+ PatFrags<(ops node:$LHS, node:$RHS),
+ [(v4f16 (extract_subvector (v8f16 (AArch64duplane16 (v8f16 node:$LHS), node:$RHS)), (i64 0))),
+ (v4f16 (AArch64duplane16 (v8f16 node:$LHS), node:$RHS))]>;
+def dup_v4f32 :
+ PatFrags<(ops node:$LHS, node:$RHS),
+ [(v2f32 (extract_subvector (v4f32 (AArch64duplane32 (v4f32 node:$LHS), node:$RHS)), (i64 0))),
+ (v2f32 (AArch64duplane32 (v4f32 node:$LHS), node:$RHS))]>;
+
//===----------------------------------------------------------------------===//
// Asm Operand Classes.
//
@@ -285,7 +314,7 @@ def AdrpOperand : AsmOperandClass {
}
def adrplabel : Operand<i64> {
let EncoderMethod = "getAdrLabelOpValue";
- let PrintMethod = "printAdrpLabel";
+ let PrintMethod = "printAdrAdrpLabel";
let ParserMatchClass = AdrpOperand;
let OperandType = "OPERAND_PCREL";
}
@@ -297,7 +326,9 @@ def AdrOperand : AsmOperandClass {
}
def adrlabel : Operand<i64> {
let EncoderMethod = "getAdrLabelOpValue";
+ let PrintMethod = "printAdrAdrpLabel";
let ParserMatchClass = AdrOperand;
+ let OperandType = "OPERAND_PCREL";
}
class SImmOperand<int width> : AsmOperandClass {
@@ -859,6 +890,11 @@ let DiagnosticType = "LogicalSecondSource" in {
let RenderMethod = "addLogicalImmNotOperands<int64_t>";
}
}
+
+def Imm0_127Operand : AsmImmRange<0, 127>;
+
+let OperandType = "OPERAND_IMMEDIATE" in {
+
def logical_imm32 : Operand<i32>, IntImmLeaf<i32, [{
return AArch64_AM::isLogicalImmediate(Imm.getZExtValue(), 32);
}], logical_imm32_XFORM> {
@@ -887,7 +923,11 @@ def timm32_0_65535 : Operand<i32>, TImmLeaf<i32, [{
def timm64_0_65535 : Operand<i64>, TImmLeaf<i64, [{
return ((uint64_t)Imm) < 65536;
}]>;
-}
+
+def imm64_0_65535 : Operand<i64>, ImmLeaf<i64, [{
+ return ((uint64_t)Imm) < 65536;
+}]>;
+} // ParserMatchClass
def imm0_255 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 256;
@@ -897,7 +937,6 @@ def imm0_255 : Operand<i32>, ImmLeaf<i32, [{
}
// imm0_127 predicate - True if the immediate is in the range [0,127]
-def Imm0_127Operand : AsmImmRange<0, 127>;
def imm0_127 : Operand<i32>, ImmLeaf<i32, [{
return ((uint32_t)Imm) < 128;
}]> {
@@ -1034,6 +1073,8 @@ def timm32_0_255 : Operand<i32>, TImmLeaf<i32, [{
let ParserMatchClass = Imm0_255Operand;
}
+} // let OperandType = "OPERAND_IMMEDIATE"
+
// An arithmetic shifter operand:
// {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr
// {5-0} - imm6
@@ -4671,7 +4712,7 @@ class BaseMemTagStore<bits<2> opc1, bits<2> opc2, string asm_insn,
}
multiclass MemTagStore<bits<2> opc1, string insn> {
- def Offset :
+ def i :
BaseMemTagStore<opc1, 0b10, insn, "\t$Rt, [$Rn, $offset]", "",
(outs), (ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
def PreIndex :
@@ -4686,7 +4727,7 @@ multiclass MemTagStore<bits<2> opc1, string insn> {
(ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
def : InstAlias<insn # "\t$Rt, [$Rn]",
- (!cast<Instruction>(NAME # "Offset") GPR64sp:$Rt, GPR64sp:$Rn, 0)>;
+ (!cast<Instruction>(NAME # "i") GPR64sp:$Rt, GPR64sp:$Rn, 0)>;
}
//---
@@ -7461,7 +7502,7 @@ multiclass SIMDAcrossLanesHSD<bit U, bits<5> opcode, string asm> {
let mayRaiseFPException = 1, Uses = [FPCR] in
multiclass SIMDFPAcrossLanes<bits<5> opcode, bit sz1, string asm,
- Intrinsic intOp> {
+ SDPatternOperator intOp> {
let Predicates = [HasNEON, HasFullFP16] in {
def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64,
asm, ".4h",
@@ -8421,9 +8462,9 @@ multiclass SIMDThreeSameVectorFMLIndex<bit U, bits<4> opc, string asm,
V128, v4f32, v8f16, OpNode>;
}
-let mayRaiseFPException = 1, Uses = [FPCR] in
multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
SDPatternOperator OpNode> {
+ let mayRaiseFPException = 1, Uses = [FPCR] in {
let Predicates = [HasNEON, HasFullFP16] in {
def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b00, opc,
V64, V64,
@@ -8431,7 +8472,7 @@ multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
asm, ".4h", ".4h", ".4h", ".h",
[(set (v4f16 V64:$Rd),
(OpNode (v4f16 V64:$Rn),
- (v4f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ (dup_v8f16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -8458,7 +8499,7 @@ multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
asm, ".2s", ".2s", ".2s", ".s",
[(set (v2f32 V64:$Rd),
(OpNode (v2f32 V64:$Rn),
- (v2f32 (AArch64duplane32 (v4f32 V128:$Rm), VectorIndexS:$idx))))]> {
+ (dup_v4f32 (v4f32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -8526,6 +8567,29 @@ multiclass SIMDFPIndexed<bit U, bits<4> opc, string asm,
let Inst{11} = idx{0};
let Inst{21} = 0;
}
+ } // mayRaiseFPException = 1, Uses = [FPCR]
+
+ let Predicates = [HasNEON, HasFullFP16] in {
+ def : Pat<(f16 (OpNode
+ (f16 (vector_extract (v8f16 V128:$Rn), (i64 0))),
+ (f16 (vector_extract (v8f16 V128:$Rm), VectorIndexH:$idx)))),
+ (!cast<Instruction>(NAME # v1i16_indexed)
+ (EXTRACT_SUBREG V128:$Rn, hsub), V128:$Rm, VectorIndexH:$idx)>;
+ }
+
+ let Predicates = [HasNEON] in {
+ def : Pat<(f32 (OpNode
+ (f32 (vector_extract (v4f32 V128:$Rn), (i64 0))),
+ (f32 (vector_extract (v4f32 V128:$Rm), VectorIndexS:$idx)))),
+ (!cast<Instruction>(NAME # v1i32_indexed)
+ (EXTRACT_SUBREG V128:$Rn, ssub), V128:$Rm, VectorIndexS:$idx)>;
+
+ def : Pat<(f64 (OpNode
+ (f64 (vector_extract (v2f64 V128:$Rn), (i64 0))),
+ (f64 (vector_extract (v2f64 V128:$Rm), VectorIndexD:$idx)))),
+ (!cast<Instruction>(NAME # v1i64_indexed)
+ (EXTRACT_SUBREG V128:$Rn, dsub), V128:$Rm, VectorIndexD:$idx)>;
+ }
}
multiclass SIMDFPIndexedTiedPatterns<string INST, SDPatternOperator OpNode> {
@@ -8746,7 +8810,7 @@ multiclass SIMDIndexedHS<bit U, bits<4> opc, string asm,
asm, ".4h", ".4h", ".4h", ".h",
[(set (v4i16 V64:$Rd),
(OpNode (v4i16 V64:$Rn),
- (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ (dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -8772,7 +8836,7 @@ multiclass SIMDIndexedHS<bit U, bits<4> opc, string asm,
asm, ".2s", ".2s", ".2s", ".s",
[(set (v2i32 V64:$Rd),
(OpNode (v2i32 V64:$Rn),
- (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ (dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -8820,7 +8884,7 @@ multiclass SIMDVectorIndexedHS<bit U, bits<4> opc, string asm,
asm, ".4h", ".4h", ".4h", ".h",
[(set (v4i16 V64:$Rd),
(OpNode (v4i16 V64:$Rn),
- (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ (dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -8846,7 +8910,7 @@ multiclass SIMDVectorIndexedHS<bit U, bits<4> opc, string asm,
asm, ".2s", ".2s", ".2s", ".s",
[(set (v2i32 V64:$Rd),
(OpNode (v2i32 V64:$Rn),
- (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ (dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -8872,7 +8936,7 @@ multiclass SIMDVectorIndexedHSTied<bit U, bits<4> opc, string asm,
asm, ".4h", ".4h", ".4h", ".h",
[(set (v4i16 V64:$dst),
(OpNode (v4i16 V64:$Rd),(v4i16 V64:$Rn),
- (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ (dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -8898,7 +8962,7 @@ multiclass SIMDVectorIndexedHSTied<bit U, bits<4> opc, string asm,
asm, ".2s", ".2s", ".2s", ".s",
[(set (v2i32 V64:$dst),
(OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn),
- (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ (dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -8925,7 +8989,7 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
asm, ".4s", ".4s", ".4h", ".h",
[(set (v4i32 V128:$Rd),
(OpNode (v4i16 V64:$Rn),
- (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ (dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -8952,7 +9016,7 @@ multiclass SIMDIndexedLongSD<bit U, bits<4> opc, string asm,
asm, ".2d", ".2d", ".2s", ".s",
[(set (v2i64 V128:$Rd),
(OpNode (v2i32 V64:$Rn),
- (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ (dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -8998,8 +9062,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
(Accum (v4i32 V128:$Rd),
(v4i32 (int_aarch64_neon_sqdmull
(v4i16 V64:$Rn),
- (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx))))))]> {
+ (dup_v8i16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx)))))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -9029,8 +9093,7 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
(Accum (v2i64 V128:$Rd),
(v2i64 (int_aarch64_neon_sqdmull
(v2i32 V64:$Rn),
- (v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx))))))]> {
+ (dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -9075,9 +9138,8 @@ multiclass SIMDIndexedLongSQDMLXSDTied<bit U, bits<4> opc, string asm,
(i32 (vector_extract
(v4i32 (int_aarch64_neon_sqdmull
(v4i16 V64:$Rn),
- (v4i16 (AArch64duplane16
- (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx)))),
+ (dup_v8i16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))),
(i64 0))))),
(!cast<Instruction>(NAME # v1i32_indexed)
FPR32Op:$Rd,
@@ -9110,7 +9172,7 @@ multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
asm, ".4s", ".4s", ".4h", ".h",
[(set (v4i32 V128:$Rd),
(OpNode (v4i16 V64:$Rn),
- (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ (dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -9137,7 +9199,7 @@ multiclass SIMDVectorIndexedLongSD<bit U, bits<4> opc, string asm,
asm, ".2d", ".2d", ".2s", ".s",
[(set (v2i64 V128:$Rd),
(OpNode (v2i32 V64:$Rn),
- (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ (dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -9166,7 +9228,7 @@ multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
asm, ".4s", ".4s", ".4h", ".h",
[(set (v4i32 V128:$dst),
(OpNode (v4i32 V128:$Rd), (v4i16 V64:$Rn),
- (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx))))]> {
+ (dup_v8i16 (v8i16 V128_lo:$Rm), VectorIndexH:$idx)))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -9193,7 +9255,7 @@ multiclass SIMDVectorIndexedLongSDTied<bit U, bits<4> opc, string asm,
asm, ".2d", ".2d", ".2s", ".s",
[(set (v2i64 V128:$dst),
(OpNode (v2i64 V128:$Rd), (v2i32 V64:$Rn),
- (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
+ (dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx)))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -10815,8 +10877,8 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
asm, ".4h", ".4h", ".4h", ".h",
[(set (v4i16 V64:$dst),
(v4i16 (op (v4i16 V64:$Rd), (v4i16 V64:$Rn),
- (v4i16 (AArch64duplane16 (v8i16 V128_lo:$Rm),
- VectorIndexH:$idx)))))]> {
+ (dup_v8i16 (v8i16 V128_lo:$Rm),
+ VectorIndexH:$idx))))]> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
@@ -10841,8 +10903,7 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
asm, ".2s", ".2s", ".2s", ".s",
[(set (v2i32 V64:$dst),
(v2i32 (op (v2i32 V64:$Rd), (v2i32 V64:$Rn),
- (v2i32 (AArch64duplane32 (v4i32 V128:$Rm),
- VectorIndexS:$idx)))))]> {
+ (dup_v4i32 (v4i32 V128:$Rm), VectorIndexS:$idx))))]> {
bits<2> idx;
let Inst{11} = idx{1};
let Inst{21} = idx{0};
@@ -10860,19 +10921,19 @@ multiclass SIMDIndexedSQRDMLxHSDTied<bit U, bits<4> opc, string asm,
let Inst{21} = idx{0};
}
- def i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
- FPR16Op, FPR16Op, V128_lo,
- VectorIndexH, asm, ".h", "", "", ".h",
- []> {
+ def v1i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b01, opc,
+ FPR16Op, FPR16Op, V128_lo,
+ VectorIndexH, asm, ".h", "", "", ".h",
+ []> {
bits<3> idx;
let Inst{11} = idx{2};
let Inst{21} = idx{1};
let Inst{20} = idx{0};
}
- def i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
- FPR32Op, FPR32Op, V128, VectorIndexS,
- asm, ".s", "", "", ".s",
+ def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc,
+ FPR32Op, FPR32Op, V128, VectorIndexS,
+ asm, ".s", "", "", ".s",
[(set (i32 FPR32Op:$dst),
(i32 (op (i32 FPR32Op:$Rd), (i32 FPR32Op:$Rn),
(i32 (vector_extract (v4i32 V128:$Rm),
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 70c4ba763a34..b3d093af1c16 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -302,3 +302,108 @@ def : Pat<(int_aarch64_stlxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
(STLXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
def : Pat<(int_aarch64_stxp GPR64:$lo, GPR64:$hi, GPR64:$addr),
(STXPX GPR64:$lo, GPR64:$hi, GPR64:$addr)>;
+
+multiclass SIMDAcrossLanesSignedIntrinsicBHS<string baseOpc, Intrinsic intOp> {
+ def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ (i64 0)))>;
+ def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+ (i32 (SMOVvi8to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ (i64 0)))>;
+
+ def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
+ (i64 0)))>;
+ def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+ (i32 (SMOVvi16to32
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ (i64 0)))>;
+
+ def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
+ ssub))>;
+}
+
+multiclass SIMDAcrossLanesUnsignedIntrinsicBHS<string baseOpc,
+ Intrinsic intOp> {
+ def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
+ ssub))>;
+ def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
+ ssub))>;
+
+ def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
+ ssub))>;
+ def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
+ ssub))>;
+
+ def : Pat<(i32 (intOp (v4i32 V128:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub),
+ ssub))>;
+}
+
+
+defm : SIMDAcrossLanesSignedIntrinsicBHS<"ADDV", int_aarch64_neon_saddv>;
+// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
+def : Pat<(i32 (int_aarch64_neon_saddv (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (ADDPv2i32 V64:$Rn, V64:$Rn), dsub),
+ ssub))>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"ADDV", int_aarch64_neon_uaddv>;
+def : Pat<(i32 (int_aarch64_neon_uaddv (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (ADDPv2i32 V64:$Rn, V64:$Rn), dsub),
+ ssub))>;
+
+defm : SIMDAcrossLanesSignedIntrinsicBHS<"SMAXV", int_aarch64_neon_smaxv>;
+def : Pat<(i32 (int_aarch64_neon_smaxv (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (SMAXPv2i32 V64:$Rn, V64:$Rn), dsub),
+ ssub))>;
+
+defm : SIMDAcrossLanesSignedIntrinsicBHS<"SMINV", int_aarch64_neon_sminv>;
+def : Pat<(i32 (int_aarch64_neon_sminv (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (SMINPv2i32 V64:$Rn, V64:$Rn), dsub),
+ ssub))>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"UMAXV", int_aarch64_neon_umaxv>;
+def : Pat<(i32 (int_aarch64_neon_umaxv (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (UMAXPv2i32 V64:$Rn, V64:$Rn), dsub),
+ ssub))>;
+
+defm : SIMDAcrossLanesUnsignedIntrinsicBHS<"UMINV", int_aarch64_neon_uminv>;
+def : Pat<(i32 (int_aarch64_neon_uminv (v2i32 V64:$Rn))),
+ (i32 (EXTRACT_SUBREG
+ (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
+ (UMINPv2i32 V64:$Rn, V64:$Rn), dsub),
+ ssub))>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 6916e1ec5700..9d901fd70446 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -126,6 +126,12 @@ unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
if (NumBytes == 0)
NumBytes = 4;
break;
+ case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
+ case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
+ // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
+ NumBytes = 36;
+ break;
+
case AArch64::SPACE:
NumBytes = MI.getOperand(1).getImm();
break;
@@ -1692,17 +1698,34 @@ static bool isSUBSRegImm(unsigned Opcode) {
/// MI and CmpInstr
/// or if MI opcode is not the S form there must be neither defs of flags
/// nor uses of flags between MI and CmpInstr.
-/// - and C/V flags are not used after CmpInstr
+/// - and, if C/V flags are not used after CmpInstr
+/// or if N flag is used but MI produces poison value if signed overflow
+/// occurs.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
const TargetRegisterInfo &TRI) {
+ // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
+ // that may or may not set flags.
assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
const unsigned CmpOpcode = CmpInstr.getOpcode();
if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
return false;
+ assert((CmpInstr.getOperand(2).isImm() &&
+ CmpInstr.getOperand(2).getImm() == 0) &&
+ "Caller guarantees that CmpInstr compares with constant 0");
+
std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
- if (!NZVCUsed || NZVCUsed->C || NZVCUsed->V)
+ if (!NZVCUsed || NZVCUsed->C)
+ return false;
+
+ // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
+ // '%vreg = add ...' or '%vreg = sub ...'.
+ // Condition flag V is used to indicate signed overflow.
+ // 1) MI and CmpInstr set N and V to the same value.
+ // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
+ // signed overflow occurs, so CmpInstr could still be simplified away.
+ if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
return false;
AccessKind AccessToCheck = AK_Write;
@@ -2205,6 +2228,7 @@ bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
case AArch64::LDRWpre:
case AArch64::LDURXi:
case AArch64::LDRXpre:
+ case AArch64::LDRSWpre:
case AArch64::LDURSWi:
case AArch64::LDURHHi:
case AArch64::LDURBBi:
@@ -2369,7 +2393,7 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
case AArch64::LDNF1D_IMM:
return 3;
case AArch64::ADDG:
- case AArch64::STGOffset:
+ case AArch64::STGi:
case AArch64::LDR_PXI:
case AArch64::STR_PXI:
return 2;
@@ -2414,6 +2438,7 @@ bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
case AArch64::LDURXi:
case AArch64::LDRXpre:
case AArch64::LDURSWi:
+ case AArch64::LDRSWpre:
return true;
}
}
@@ -2534,7 +2559,8 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
// Can't merge/pair if the instruction modifies the base register.
// e.g., ldr x0, [x0]
// This case will never occur with an FI base.
- // However, if the instruction is an LDR/STR<S,D,Q,W,X>pre, it can be merged.
+ // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
+ // STR<S,D,Q,W,X>pre, it can be merged.
// For example:
// ldr q0, [x11, #32]!
// ldr q1, [x11, #16]
@@ -2874,8 +2900,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
MaxOffset = 63;
break;
case AArch64::LDG:
- case AArch64::STGOffset:
- case AArch64::STZGOffset:
+ case AArch64::STGi:
+ case AArch64::STZGi:
Scale = TypeSize::Fixed(16);
Width = 16;
MinOffset = -256;
@@ -3033,8 +3059,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
MinOffset = -8;
MaxOffset = 7;
break;
- case AArch64::ST2GOffset:
- case AArch64::STZ2GOffset:
+ case AArch64::ST2Gi:
+ case AArch64::STZ2Gi:
Scale = TypeSize::Fixed(16);
Width = 32;
MinOffset = -256;
@@ -3111,6 +3137,7 @@ int AArch64InstrInfo::getMemScale(unsigned Opc) {
case AArch64::LDRSpre:
case AArch64::LDRSWui:
case AArch64::LDURSWi:
+ case AArch64::LDRSWpre:
case AArch64::LDRWpre:
case AArch64::LDRWui:
case AArch64::LDURWi:
@@ -3151,10 +3178,10 @@ int AArch64InstrInfo::getMemScale(unsigned Opc) {
case AArch64::LDPQi:
case AArch64::LDRQpre:
case AArch64::STPQi:
- case AArch64::STGOffset:
- case AArch64::STZGOffset:
- case AArch64::ST2GOffset:
- case AArch64::STZ2GOffset:
+ case AArch64::STGi:
+ case AArch64::STZGi:
+ case AArch64::ST2Gi:
+ case AArch64::STZ2Gi:
case AArch64::STGPi:
return 16;
}
@@ -3166,6 +3193,7 @@ bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
return false;
case AArch64::LDRWpre:
case AArch64::LDRXpre:
+ case AArch64::LDRSWpre:
case AArch64::LDRSpre:
case AArch64::LDRDpre:
case AArch64::LDRQpre:
@@ -3233,6 +3261,20 @@ static const TargetRegisterClass *getRegClass(const MachineInstr &MI,
return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
}
+bool AArch64InstrInfo::isHForm(const MachineInstr &MI) {
+ auto IsHFPR = [&](const MachineOperand &Op) {
+ if (!Op.isReg())
+ return false;
+ auto Reg = Op.getReg();
+ if (Reg.isPhysical())
+ return AArch64::FPR16RegClass.contains(Reg);
+ const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
+ return TRC == &AArch64::FPR16RegClass ||
+ TRC == &AArch64::FPR16_loRegClass;
+ };
+ return llvm::any_of(MI.operands(), IsHFPR);
+}
+
bool AArch64InstrInfo::isQForm(const MachineInstr &MI) {
auto IsQFPR = [&](const MachineOperand &Op) {
if (!Op.isReg())
@@ -3682,16 +3724,16 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (AArch64::FPR128RegClass.contains(DestReg) &&
AArch64::FPR128RegClass.contains(SrcReg)) {
- if (Subtarget.forceStreamingCompatibleSVE()) {
+ if (Subtarget.hasSVEorSME() && !Subtarget.isNeonAvailable())
BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
.addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
.addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
.addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
- } else if (Subtarget.hasNEON()) {
+ else if (Subtarget.hasNEON())
BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
.addReg(SrcReg)
.addReg(SrcReg, getKillRegState(KillSrc));
- } else {
+ else {
BuildMI(MBB, I, DL, get(AArch64::STRQpre))
.addReg(AArch64::SP, RegState::Define)
.addReg(SrcReg, getKillRegState(KillSrc))
@@ -4218,7 +4260,7 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
uint8_t buffer[16];
DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
DefCfaExpr.append(Expr.str());
- return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(),
+ return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
Comment.str());
}
@@ -4266,7 +4308,8 @@ MCCFIInstruction llvm::createCFAOffset(const TargetRegisterInfo &TRI,
CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
CfaExpr.append(OffsetExpr.str());
- return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), Comment.str());
+ return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
+ Comment.str());
}
// Helper function to emit a frame offset adjustment from a given
@@ -5386,6 +5429,39 @@ static bool getFMULPatterns(MachineInstr &Root,
return Found;
}
+static bool getFNEGPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) {
+ unsigned Opc = Root.getOpcode();
+ MachineBasicBlock &MBB = *Root.getParent();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+ auto Match = [&](unsigned Opcode, MachineCombinerPattern Pattern) -> bool {
+ MachineOperand &MO = Root.getOperand(1);
+ MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
+ if (MI != nullptr && MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
+ (MI->getOpcode() == Opcode) &&
+ Root.getFlag(MachineInstr::MIFlag::FmContract) &&
+ Root.getFlag(MachineInstr::MIFlag::FmNsz) &&
+ MI->getFlag(MachineInstr::MIFlag::FmContract) &&
+ MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
+ Patterns.push_back(Pattern);
+ return true;
+ }
+ return false;
+ };
+
+ switch (Opc) {
+ default:
+ break;
+ case AArch64::FNEGDr:
+ return Match(AArch64::FMADDDrrr, MachineCombinerPattern::FNMADD);
+ case AArch64::FNEGSr:
+ return Match(AArch64::FMADDSrrr, MachineCombinerPattern::FNMADD);
+ }
+
+ return false;
+}
+
/// Return true when a code sequence can improve throughput. It
/// should be called only for instructions in loops.
/// \param Pattern - combiner pattern
@@ -5555,6 +5631,8 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
return true;
if (getFMAPatterns(Root, Patterns))
return true;
+ if (getFNEGPatterns(Root, Patterns))
+ return true;
// Other patterns
if (getMiscPatterns(Root, Patterns))
@@ -5645,6 +5723,47 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
return MUL;
}
+static MachineInstr *
+genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII, MachineInstr &Root,
+ SmallVectorImpl<MachineInstr *> &InsInstrs) {
+ MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
+
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
+ if (AArch64::FPR32RegClass.hasSubClassEq(RC))
+ Opc = AArch64::FNMADDSrrr;
+ else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
+ Opc = AArch64::FNMADDDrrr;
+ else
+ return nullptr;
+
+ Register ResultReg = Root.getOperand(0).getReg();
+ Register SrcReg0 = MAD->getOperand(1).getReg();
+ Register SrcReg1 = MAD->getOperand(2).getReg();
+ Register SrcReg2 = MAD->getOperand(3).getReg();
+ bool Src0IsKill = MAD->getOperand(1).isKill();
+ bool Src1IsKill = MAD->getOperand(2).isKill();
+ bool Src2IsKill = MAD->getOperand(3).isKill();
+ if (ResultReg.isVirtual())
+ MRI.constrainRegClass(ResultReg, RC);
+ if (SrcReg0.isVirtual())
+ MRI.constrainRegClass(SrcReg0, RC);
+ if (SrcReg1.isVirtual())
+ MRI.constrainRegClass(SrcReg1, RC);
+ if (SrcReg2.isVirtual())
+ MRI.constrainRegClass(SrcReg2, RC);
+
+ MachineInstrBuilder MIB =
+ BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
+ .addReg(SrcReg0, getKillRegState(Src0IsKill))
+ .addReg(SrcReg1, getKillRegState(Src1IsKill))
+ .addReg(SrcReg2, getKillRegState(Src2IsKill));
+ InsInstrs.push_back(MIB);
+
+ return MAD;
+}
+
/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static MachineInstr *
genIndexedMultiply(MachineInstr &Root,
@@ -6777,6 +6896,11 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
&AArch64::FPR128_loRegClass, MRI);
break;
}
+ case MachineCombinerPattern::FNMADD: {
+ MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
+ break;
+ }
+
} // end switch (Pattern)
// Record MUL and ADD/SUB for deletion
if (MUL)
@@ -6785,7 +6909,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
// Set the flags on the inserted instructions to be the merged flags of the
// instructions that we have combined.
- uint16_t Flags = Root.getFlags();
+ uint32_t Flags = Root.getFlags();
if (MUL)
Flags = Root.mergeFlagsWith(*MUL);
for (auto *MI : InsInstrs)
@@ -7151,7 +7275,8 @@ static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
}
-outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
+std::optional<outliner::OutlinedFunction>
+AArch64InstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
unsigned SequenceSize =
@@ -7181,7 +7306,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
}
return true;
}) != RepeatedSequenceLocs.end()) {
- return outliner::OutlinedFunction();
+ return std::nullopt;
}
// Since at this point all candidates agree on their return address signing
@@ -7259,7 +7384,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
- return outliner::OutlinedFunction();
+ return std::nullopt;
}
// Properties about candidate MBBs that hold for all of them.
@@ -7269,41 +7394,6 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
for (outliner::Candidate &C : RepeatedSequenceLocs)
FlagsSetInAll &= C.Flags;
- // According to the AArch64 Procedure Call Standard, the following are
- // undefined on entry/exit from a function call:
- //
- // * Registers x16, x17, (and thus w16, w17)
- // * Condition codes (and thus the NZCV register)
- //
- // Because if this, we can't outline any sequence of instructions where
- // one
- // of these registers is live into/across it. Thus, we need to delete
- // those
- // candidates.
- auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
- // If the unsafe registers in this block are all dead, then we don't need
- // to compute liveness here.
- if (C.Flags & UnsafeRegsDead)
- return false;
- return C.isAnyUnavailableAcrossOrOutOfSeq(
- {AArch64::W16, AArch64::W17, AArch64::NZCV}, TRI);
- };
-
- // Are there any candidates where those registers are live?
- if (!(FlagsSetInAll & UnsafeRegsDead)) {
- // Erase every candidate that violates the restrictions above. (It could be
- // true that we have viable candidates, so it's not worth bailing out in
- // the case that, say, 1 out of 20 candidates violate the restructions.)
- llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
-
- // If the sequence doesn't have enough candidates left, then we're done.
- if (RepeatedSequenceLocs.size() < 2)
- return outliner::OutlinedFunction();
- }
-
- // At this point, we have only "safe" candidates to outline. Figure out
- // frame + call instruction information.
-
unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
// Helper lambda which sets call information for every candidate.
@@ -7339,7 +7429,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
C.getMF()->getFrameInstructions();
if (CFICount > 0 && CFICount != CFIInstructions.size())
- return outliner::OutlinedFunction();
+ return std::nullopt;
}
// Returns true if an instructions is safe to fix up, false otherwise.
@@ -7429,6 +7519,10 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// Check if we have to save LR.
for (outliner::Candidate &C : RepeatedSequenceLocs) {
+ bool LRAvailable =
+ (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
+ ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
+ : true;
// If we have a noreturn caller, then we're going to be conservative and
// say that we have to save LR. If we don't have a ret at the end of the
// block, then we can't reason about liveness accurately.
@@ -7439,7 +7533,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
// Is LR available? If so, we don't need a save.
- if (C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) && !IsNoReturn) {
+ if (LRAvailable && !IsNoReturn) {
NumBytesNoStackCalls += 4;
C.setCallInfo(MachineOutlinerNoLRSave, 4);
CandidatesWithoutStackFixups.push_back(C);
@@ -7537,7 +7631,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// If we dropped all of the candidates, bail out here.
if (RepeatedSequenceLocs.size() < 2) {
RepeatedSequenceLocs.clear();
- return outliner::OutlinedFunction();
+ return std::nullopt;
}
}
@@ -7564,7 +7658,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// We can't fix up the stack. Bail out.
if (!AllStackInstrsSafe) {
RepeatedSequenceLocs.clear();
- return outliner::OutlinedFunction();
+ return std::nullopt;
}
// Save + restore LR.
@@ -7575,7 +7669,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
// If we have CFI instructions, we can only outline if the outlined section
// can be a tail call
if (FrameID != MachineOutlinerTailCall && CFICount > 0)
- return outliner::OutlinedFunction();
+ return std::nullopt;
return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
NumBytesToCreateFrame, FrameID);
@@ -7611,76 +7705,121 @@ bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
return true;
}
-bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
- unsigned &Flags) const {
- if (!TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags))
- return false;
- // Check if LR is available through all of the MBB. If it's not, then set
- // a flag.
+SmallVector<std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
+AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
+ unsigned &Flags) const {
assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
- "Suitable Machine Function for outlining must track liveness");
+ "Must track liveness!");
+ SmallVector<
+ std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
+ Ranges;
+ // According to the AArch64 Procedure Call Standard, the following are
+ // undefined on entry/exit from a function call:
+ //
+ // * Registers x16, x17, (and thus w16, w17)
+ // * Condition codes (and thus the NZCV register)
+ //
+ // If any of these registers are used inside or live across an outlined
+ // function, then they may be modified later, either by the compiler or
+ // some other tool (like the linker).
+ //
+ // To avoid outlining in these situations, partition each block into ranges
+ // where these registers are dead. We will only outline from those ranges.
LiveRegUnits LRU(getRegisterInfo());
+ auto AreAllUnsafeRegsDead = [&LRU]() {
+ return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
+ LRU.available(AArch64::NZCV);
+ };
- for (MachineInstr &MI : llvm::reverse(MBB))
- LRU.accumulate(MI);
-
- // Check if each of the unsafe registers are available...
- bool W16AvailableInBlock = LRU.available(AArch64::W16);
- bool W17AvailableInBlock = LRU.available(AArch64::W17);
- bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
-
- // If all of these are dead (and not live out), we know we don't have to check
- // them later.
- if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
- Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
-
- // Now, add the live outs to the set.
+ // We need to know if LR is live across an outlining boundary later on in
+ // order to decide how we'll create the outlined call, frame, etc.
+ //
+ // It's pretty expensive to check this for *every candidate* within a block.
+ // That's some potentially n^2 behaviour, since in the worst case, we'd need
+ // to compute liveness from the end of the block for O(n) candidates within
+ // the block.
+ //
+ // So, to improve the average case, let's keep track of liveness from the end
+ // of the block to the beginning of *every outlinable range*. If we know that
+ // LR is available in every range we could outline from, then we know that
+ // we don't need to check liveness for any candidate within that range.
+ bool LRAvailableEverywhere = true;
+ // Compute liveness bottom-up.
LRU.addLiveOuts(MBB);
-
- // If any of these registers is available in the MBB, but also a live out of
- // the block, then we know outlining is unsafe.
- if (W16AvailableInBlock && !LRU.available(AArch64::W16))
- return false;
- if (W17AvailableInBlock && !LRU.available(AArch64::W17))
- return false;
- if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
- return false;
-
- // Check if there's a call inside this MachineBasicBlock. If there is, then
- // set a flag.
- if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
- Flags |= MachineOutlinerMBBFlags::HasCalls;
-
- MachineFunction *MF = MBB.getParent();
-
- // In the event that we outline, we may have to save LR. If there is an
- // available register in the MBB, then we'll always save LR there. Check if
- // this is true.
- bool CanSaveLR = false;
- const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
- MF->getSubtarget().getRegisterInfo());
-
- // Check if there is an available register across the sequence that we can
- // use.
- for (unsigned Reg : AArch64::GPR64RegClass) {
- if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
- Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
- CanSaveLR = true;
+ // Update flags that require info about the entire MBB.
+ auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
+ if (MI.isCall() && !MI.isTerminator())
+ Flags |= MachineOutlinerMBBFlags::HasCalls;
+ };
+ // Range: [RangeBegin, RangeEnd)
+ MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
+ unsigned RangeLen;
+ auto CreateNewRangeStartingAt =
+ [&RangeBegin, &RangeEnd,
+ &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
+ RangeBegin = NewBegin;
+ RangeEnd = std::next(RangeBegin);
+ RangeLen = 0;
+ };
+ auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
+ // At least one unsafe register is not dead. We do not want to outline at
+ // this point. If it is long enough to outline from, save the range
+ // [RangeBegin, RangeEnd).
+ if (RangeLen > 1)
+ Ranges.push_back(std::make_pair(RangeBegin, RangeEnd));
+ };
+ // Find the first point where all unsafe registers are dead.
+ // FIND: <safe instr> <-- end of first potential range
+ // SKIP: <unsafe def>
+ // SKIP: ... everything between ...
+ // SKIP: <unsafe use>
+ auto FirstPossibleEndPt = MBB.instr_rbegin();
+ for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
+ LRU.stepBackward(*FirstPossibleEndPt);
+ // Update flags that impact how we outline across the entire block,
+ // regardless of safety.
+ UpdateWholeMBBFlags(*FirstPossibleEndPt);
+ if (AreAllUnsafeRegsDead())
break;
- }
}
-
- // Check if we have a register we can save LR to, and if LR was used
- // somewhere. If both of those things are true, then we need to evaluate the
- // safety of outlining stack instructions later.
- if (!CanSaveLR && !LRU.available(AArch64::LR))
+ // If we exhausted the entire block, we have no safe ranges to outline.
+ if (FirstPossibleEndPt == MBB.instr_rend())
+ return Ranges;
+ // Current range.
+ CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
+ // StartPt points to the first place where all unsafe registers
+ // are dead (if there is any such point). Begin partitioning the MBB into
+ // ranges.
+ for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
+ LRU.stepBackward(MI);
+ UpdateWholeMBBFlags(MI);
+ if (!AreAllUnsafeRegsDead()) {
+ SaveRangeIfNonEmpty();
+ CreateNewRangeStartingAt(MI.getIterator());
+ continue;
+ }
+ LRAvailableEverywhere &= LRU.available(AArch64::LR);
+ RangeBegin = MI.getIterator();
+ ++RangeLen;
+ }
+ // Above loop misses the last (or only) range. If we are still safe, then
+ // let's save the range.
+ if (AreAllUnsafeRegsDead())
+ SaveRangeIfNonEmpty();
+ if (Ranges.empty())
+ return Ranges;
+ // We found the ranges bottom-up. Mapping expects the top-down. Reverse
+ // the order.
+ std::reverse(Ranges.begin(), Ranges.end());
+ // If there is at least one outlinable range where LR is unavailable
+ // somewhere, remember that.
+ if (!LRAvailableEverywhere)
Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
-
- return true;
+ return Ranges;
}
outliner::InstrType
-AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
+AArch64InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
unsigned Flags) const {
MachineInstr &MI = *MIT;
MachineBasicBlock *MBB = MI.getParent();
@@ -7713,31 +7852,17 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
if (MI.isCFIInstruction())
return outliner::InstrType::Legal;
- // Don't allow debug values to impact outlining type.
- if (MI.isDebugInstr() || MI.isIndirectDebugValue())
- return outliner::InstrType::Invisible;
-
- // At this point, KILL instructions don't really tell us much so we can go
- // ahead and skip over them.
- if (MI.isKill())
- return outliner::InstrType::Invisible;
-
// Is this a terminator for a basic block?
- if (MI.isTerminator()) {
-
- // Is this the end of a function?
- if (MI.getParent()->succ_empty())
- return outliner::InstrType::Legal;
-
- // It's not, so don't outline it.
- return outliner::InstrType::Illegal;
- }
+ if (MI.isTerminator())
+ // TargetInstrInfo::getOutliningType has already filtered out anything
+ // that would break this, so we can allow it here.
+ return outliner::InstrType::Legal;
// Make sure none of the operands are un-outlinable.
for (const MachineOperand &MOP : MI.operands()) {
- if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
- MOP.isTargetIndex())
- return outliner::InstrType::Illegal;
+ // A check preventing CFI indices was here before, but only CFI
+ // instructions should have those.
+ assert(!MOP.isCFIIndex());
// If it uses LR or W30 explicitly, then don't touch it.
if (MOP.isReg() && !MOP.isImplicit() &&
@@ -7813,10 +7938,6 @@ AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
return outliner::InstrType::Legal;
}
- // Don't outline positions.
- if (MI.isPosition())
- return outliner::InstrType::Illegal;
-
// Don't touch the link register or W30.
if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index caf9421eb001..20210a96d67a 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -17,6 +17,7 @@
#include "AArch64RegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/TypeSize.h"
+#include <optional>
#define GET_INSTRINFO_HEADER
#include "AArch64GenInstrInfo.inc"
@@ -113,6 +114,9 @@ public:
/// Returns whether the instruction is FP or NEON.
static bool isFpOrNEON(const MachineInstr &MI);
+ /// Returns whether the instruction is in H form (16 bit operands)
+ static bool isHForm(const MachineInstr &MI);
+
/// Returns whether the instruction is in Q form (128 bit operands)
static bool isQForm(const MachineInstr &MI);
@@ -289,12 +293,13 @@ public:
bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
bool OutlineFromLinkOnceODRs) const override;
- outliner::OutlinedFunction getOutliningCandidateInfo(
+ std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
outliner::InstrType
- getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override;
- bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
- unsigned &Flags) const override;
+ getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override;
+ SmallVector<
+ std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
+ getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override;
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
const outliner::OutlinedFunction &OF) const override;
MachineBasicBlock::iterator
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 17fc90afcaab..3450ed29d142 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -101,8 +101,6 @@ def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">,
AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
def HasNEON : Predicate<"Subtarget->hasNEON()">,
AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
-def HasCrypto : Predicate<"Subtarget->hasCrypto()">,
- AssemblerPredicateWithAll<(all_of FeatureCrypto), "crypto">;
def HasSM4 : Predicate<"Subtarget->hasSM4()">,
AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
def HasSHA3 : Predicate<"Subtarget->hasSHA3()">,
@@ -246,6 +244,10 @@ def HasLSE128 : Predicate<"Subtarget->hasLSE128()">,
AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">;
def HasD128 : Predicate<"Subtarget->hasD128()">,
AssemblerPredicateWithAll<(all_of FeatureD128), "d128">;
+def HasCHK : Predicate<"Subtarget->hasCHK()">,
+ AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">;
+def HasGCS : Predicate<"Subtarget->hasGCS()">,
+ AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
@@ -260,7 +262,7 @@ def UseNegativeImmediates
def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
-def NotInStreamingSVEMode : Predicate<"!Subtarget->forceStreamingCompatibleSVE()">;
+def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
@@ -362,8 +364,10 @@ def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
// Generates the general dynamic sequences, i.e.
@@ -606,7 +610,7 @@ def AArch64csel : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
def AArch64csinv : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
def AArch64csneg : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
def AArch64csinc : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
-def AArch64retflag : SDNode<"AArch64ISD::RET_FLAG", SDTNone,
+def AArch64retglue : SDNode<"AArch64ISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def AArch64adc : SDNode<"AArch64ISD::ADC", SDTBinaryArithWithFlagsIn >;
def AArch64sbc : SDNode<"AArch64ISD::SBC", SDTBinaryArithWithFlagsIn>;
@@ -770,6 +774,25 @@ def AArch64faddp : PatFrags<(ops node:$Rn, node:$Rm),
[(AArch64addp_n node:$Rn, node:$Rm),
(int_aarch64_neon_faddp node:$Rn, node:$Rm)]>;
def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>;
+def AArch64facge : PatFrags<(ops node:$Rn, node:$Rm),
+ [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)),
+ (int_aarch64_neon_facge node:$Rn, node:$Rm)]>;
+def AArch64facgt : PatFrags<(ops node:$Rn, node:$Rm),
+ [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)),
+ (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>;
+
+def AArch64fmaxnmv : PatFrags<(ops node:$Rn),
+ [(vecreduce_fmax node:$Rn),
+ (int_aarch64_neon_fmaxnmv node:$Rn)]>;
+def AArch64fminnmv : PatFrags<(ops node:$Rn),
+ [(vecreduce_fmin node:$Rn),
+ (int_aarch64_neon_fminnmv node:$Rn)]>;
+def AArch64fmaxv : PatFrags<(ops node:$Rn),
+ [(vecreduce_fmaximum node:$Rn),
+ (int_aarch64_neon_fmaxv node:$Rn)]>;
+def AArch64fminv : PatFrags<(ops node:$Rn),
+ [(vecreduce_fminimum node:$Rn),
+ (int_aarch64_neon_fminv node:$Rn)]>;
def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
@@ -786,8 +809,10 @@ def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
@@ -1048,6 +1073,71 @@ def BRB_INJ: BRBEI<0b101, "\tinj">;
def : TokenAlias<"INJ", "inj">;
def : TokenAlias<"IALL", "iall">;
+
+// ARMv9.4-A Guarded Control Stack
+class GCSNoOp<bits<3> op2, string mnemonic>
+ : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> {
+ let Inst{20-8} = 0b0100001110111;
+ let Inst{7-5} = op2;
+ let Predicates = [HasGCS];
+}
+def GCSPUSHX : GCSNoOp<0b100, "gcspushx">;
+def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">;
+def GCSPOPX : GCSNoOp<0b110, "gcspopx">;
+
+class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic,
+ list<dag> pattern = []>
+ : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> {
+ let Inst{20-19} = 0b01;
+ let Inst{18-16} = op1;
+ let Inst{15-8} = 0b01110111;
+ let Inst{7-5} = op2;
+ let Predicates = [HasGCS];
+}
+
+def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1">;
+def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">;
+
+class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic,
+ list<dag> pattern = []>
+ : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> {
+ let Inst{20-19} = 0b01;
+ let Inst{18-16} = op1;
+ let Inst{15-8} = 0b01110111;
+ let Inst{7-5} = op2;
+ let Predicates = [HasGCS];
+}
+
+def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">;
+def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">;
+def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent
+
+def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>;
+def GCSB_DSYNC : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>;
+
+def : TokenAlias<"DSYNC", "dsync">;
+
+let Uses = [X16], Defs = [X16], CRm = 0b0101 in {
+ def CHKFEAT : SystemNoOperands<0b000, "hint\t#40">;
+}
+def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>;
+def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>;
+
+class GCSSt<string mnemonic, bits<3> op>
+ : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, $Rn", "", []>, Sched<[]> {
+ bits<5> Rt;
+ bits<5> Rn;
+ let Inst{31-15} = 0b11011001000111110;
+ let Inst{14-12} = op;
+ let Inst{11-10} = 0b11;
+ let Inst{9-5} = Rn;
+ let Inst{4-0} = Rt;
+ let Predicates = [HasGCS];
+}
+def GCSSTR : GCSSt<"gcsstr", 0b000>;
+def GCSSTTR : GCSSt<"gcssttr", 0b001>;
+
+
// ARMv8.2-A Dot Product
let Predicates = [HasDotProd] in {
defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>;
@@ -1202,6 +1292,8 @@ def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
(XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
+def : Pat<(xor (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))),
+ (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
} // HasSHA3
@@ -2126,20 +2218,20 @@ defm ST2G : MemTagStore<0b10, "st2g">;
defm STZ2G : MemTagStore<0b11, "stz2g">;
def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
- (STGOffset $Rn, $Rm, $imm)>;
+ (STGi $Rn, $Rm, $imm)>;
def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
- (STZGOffset $Rn, $Rm, $imm)>;
+ (STZGi $Rn, $Rm, $imm)>;
def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
- (ST2GOffset $Rn, $Rm, $imm)>;
+ (ST2Gi $Rn, $Rm, $imm)>;
def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
- (STZ2GOffset $Rn, $Rm, $imm)>;
+ (STZ2Gi $Rn, $Rm, $imm)>;
defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
- (STGOffset GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
+ (STGi GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>;
def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
(STGPi $Rt, $Rt2, $Rn, $imm)>;
@@ -2576,6 +2668,9 @@ def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
def : Pat<(AArch64call_bti GPR64:$Rn),
(BLR_BTI GPR64:$Rn)>,
Requires<[NoSLSBLRMitigation]>;
+def : Pat<(AArch64call_bti GPR64noip:$Rn),
+ (BLR_BTI GPR64noip:$Rn)>,
+ Requires<[SLSBLRMitigation]>;
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
@@ -2584,7 +2679,7 @@ def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
// Create a separate pseudo-instruction for codegen to use so that we don't
// flag lr as used in every function. It'll be restored before the RET by the
// epilogue if it's legitimately used.
-def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retflag)]>,
+def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>,
Sched<[WriteBrReg]> {
let isTerminator = 1;
let isBarrier = 1;
@@ -2610,7 +2705,7 @@ def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {}
// FIXME: maybe the scratch register used shouldn't be fixed to X1?
// FIXME: can "hasSideEffects be dropped?
// This gets lowered to an instruction sequence which takes 16 bytes
-let isCall = 1, Defs = [LR, X0, X1], hasSideEffects = 1, Size = 16,
+let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16,
isCodeGenOnly = 1 in
def TLSDESC_CALLSEQ
: Pseudo<(outs), (ins i64imm:$sym),
@@ -2730,7 +2825,7 @@ defm LDRW : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
defm LDRX : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
// Floating-point
-defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", untyped, load>;
+defm LDRB : Load8RO<0b00, 1, 0b01, FPR8Op, "ldr", i8, load>;
defm LDRH : Load16RO<0b01, 1, 0b01, FPR16Op, "ldr", f16, load>;
defm LDRS : Load32RO<0b10, 1, 0b01, FPR32Op, "ldr", f32, load>;
defm LDRD : Load64RO<0b11, 1, 0b01, FPR64Op, "ldr", f64, load>;
@@ -3149,6 +3244,10 @@ defm LDURBB
[(set GPR32:$Rt,
(zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
+// bf16 load pattern
+def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
+ (LDURHi GPR64sp:$Rn, simm9:$offset)>;
+
// Match all load 64 bits width whose type is compatible with FPR64
let Predicates = [IsLE] in {
def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
@@ -3315,6 +3414,57 @@ def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
(LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
+// A LDR will implicitly zero the rest of the vector, so vector_insert(zeros,
+// load, 0) can use a single load.
+multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT,
+ ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst,
+ ComplexPattern Addr, ComplexPattern UnscaledAddr, Operand AddrImm,
+ SubRegIndex SubReg> {
+ // Scaled
+ def : Pat <(vector_insert (VT immAllZerosV),
+ (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
+ (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
+ // Unscaled
+ def : Pat <(vector_insert (VT immAllZerosV),
+ (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
+ (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
+
+ // Half-vector patterns
+ def : Pat <(vector_insert (HVT immAllZerosV),
+ (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
+ (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
+ // Unscaled
+ def : Pat <(vector_insert (HVT immAllZerosV),
+ (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
+ (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
+
+ // SVE patterns
+ def : Pat <(vector_insert (SVT immAllZerosV),
+ (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
+ (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
+ // Unscaled
+ def : Pat <(vector_insert (SVT immAllZerosV),
+ (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
+ (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
+}
+
+defm : LoadInsertZeroPatterns<extloadi8, v16i8, v8i8, nxv16i8, i32, LDRBui, LDURBi,
+ am_indexed8, am_unscaled8, uimm12s1, bsub>;
+defm : LoadInsertZeroPatterns<extloadi16, v8i16, v4i16, nxv8i16, i32, LDRHui, LDURHi,
+ am_indexed16, am_unscaled16, uimm12s2, hsub>;
+defm : LoadInsertZeroPatterns<load, v4i32, v2i32, nxv4i32, i32, LDRSui, LDURSi,
+ am_indexed32, am_unscaled32, uimm12s4, ssub>;
+defm : LoadInsertZeroPatterns<load, v2i64, v1i64, nxv2i64, i64, LDRDui, LDURDi,
+ am_indexed64, am_unscaled64, uimm12s8, dsub>;
+defm : LoadInsertZeroPatterns<load, v8f16, v4f16, nxv8f16, f16, LDRHui, LDURHi,
+ am_indexed16, am_unscaled16, uimm12s2, hsub>;
+defm : LoadInsertZeroPatterns<load, v8bf16, v4bf16, nxv8bf16, bf16, LDRHui, LDURHi,
+ am_indexed16, am_unscaled16, uimm12s2, hsub>;
+defm : LoadInsertZeroPatterns<load, v4f32, v2f32, nxv4f32, f32, LDRSui, LDURSi,
+ am_indexed32, am_unscaled32, uimm12s4, ssub>;
+defm : LoadInsertZeroPatterns<load, v2f64, v1f64, nxv2f64, f64, LDRDui, LDURDi,
+ am_indexed64, am_unscaled64, uimm12s8, dsub>;
+
// Pre-fetch.
defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
[(AArch64Prefetch timm:$Rt,
@@ -3408,7 +3558,7 @@ def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
-// Pair (pre-indexed)
+// Pair (post-indexed)
def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
@@ -3440,7 +3590,7 @@ defm STRX : Store64RO<0b11, 0, 0b00, GPR64, "str", i64, store>;
// Floating-point
-defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", untyped, store>;
+defm STRB : Store8RO< 0b00, 1, 0b00, FPR8Op, "str", i8, store>;
defm STRH : Store16RO<0b01, 1, 0b00, FPR16Op, "str", f16, store>;
defm STRS : Store32RO<0b10, 1, 0b00, FPR32Op, "str", f32, store>;
defm STRD : Store64RO<0b11, 1, 0b00, FPR64Op, "str", f64, store>;
@@ -3850,7 +4000,7 @@ defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
// (immediate pre-indexed)
def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str", pre_store, i32>;
def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str", pre_store, i64>;
-def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, untyped>;
+def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op, "str", pre_store, i8>;
def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str", pre_store, f16>;
def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str", pre_store, f32>;
def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str", pre_store, f64>;
@@ -3904,7 +4054,7 @@ def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
// (immediate post-indexed)
def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z, "str", post_store, i32>;
def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z, "str", post_store, i64>;
-def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, untyped>;
+def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op, "str", post_store, i8>;
def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op, "str", post_store, f16>;
def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op, "str", post_store, f32>;
def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op, "str", post_store, f64>;
@@ -4164,24 +4314,24 @@ defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (any_lround f16:$Rn)),
- (!cast<Instruction>(FCVTASUWHr) f16:$Rn)>;
+ (FCVTASUWHr f16:$Rn)>;
def : Pat<(i64 (any_lround f16:$Rn)),
- (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
+ (FCVTASUXHr f16:$Rn)>;
def : Pat<(i64 (any_llround f16:$Rn)),
- (!cast<Instruction>(FCVTASUXHr) f16:$Rn)>;
+ (FCVTASUXHr f16:$Rn)>;
}
def : Pat<(i32 (any_lround f32:$Rn)),
- (!cast<Instruction>(FCVTASUWSr) f32:$Rn)>;
+ (FCVTASUWSr f32:$Rn)>;
def : Pat<(i32 (any_lround f64:$Rn)),
- (!cast<Instruction>(FCVTASUWDr) f64:$Rn)>;
+ (FCVTASUWDr f64:$Rn)>;
def : Pat<(i64 (any_lround f32:$Rn)),
- (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
+ (FCVTASUXSr f32:$Rn)>;
def : Pat<(i64 (any_lround f64:$Rn)),
- (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
+ (FCVTASUXDr f64:$Rn)>;
def : Pat<(i64 (any_llround f32:$Rn)),
- (!cast<Instruction>(FCVTASUXSr) f32:$Rn)>;
+ (FCVTASUXSr f32:$Rn)>;
def : Pat<(i64 (any_llround f64:$Rn)),
- (!cast<Instruction>(FCVTASUXDr) f64:$Rn)>;
+ (FCVTASUXDr f64:$Rn)>;
//===----------------------------------------------------------------------===//
// Scaled integer to floating point conversion instructions.
@@ -4199,7 +4349,7 @@ defm FMOV : UnscaledConversion<"fmov">;
// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in {
def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
- Sched<[WriteF]>, Requires<[HasFullFP16]>;
+ Sched<[WriteF]>;
def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
Sched<[WriteF]>;
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
@@ -4256,24 +4406,24 @@ let Predicates = [HasFRInt3264] in {
// in the FCVTZS as the output of FRINTX is an integer).
let Predicates = [HasFullFP16] in {
def : Pat<(i32 (any_lrint f16:$Rn)),
- (FCVTZSUWHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
+ (FCVTZSUWHr (FRINTXHr f16:$Rn))>;
def : Pat<(i64 (any_lrint f16:$Rn)),
- (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
+ (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
def : Pat<(i64 (any_llrint f16:$Rn)),
- (FCVTZSUXHr (!cast<Instruction>(FRINTXHr) f16:$Rn))>;
+ (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
}
def : Pat<(i32 (any_lrint f32:$Rn)),
- (FCVTZSUWSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
+ (FCVTZSUWSr (FRINTXSr f32:$Rn))>;
def : Pat<(i32 (any_lrint f64:$Rn)),
- (FCVTZSUWDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
+ (FCVTZSUWDr (FRINTXDr f64:$Rn))>;
def : Pat<(i64 (any_lrint f32:$Rn)),
- (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
+ (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
def : Pat<(i64 (any_lrint f64:$Rn)),
- (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
+ (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
def : Pat<(i64 (any_llrint f32:$Rn)),
- (FCVTZSUXSr (!cast<Instruction>(FRINTXSr) f32:$Rn))>;
+ (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
def : Pat<(i64 (any_llrint f64:$Rn)),
- (FCVTZSUXDr (!cast<Instruction>(FRINTXDr) f64:$Rn))>;
+ (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
//===----------------------------------------------------------------------===//
// Floating point two operand instructions.
@@ -4293,6 +4443,33 @@ defm FNMUL : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>;
}
defm FSUB : TwoOperandFPData<0b0011, "fsub", any_fsub>;
+multiclass FMULScalarFromIndexedLane0Patterns<string inst,
+ string inst_f16_suffix,
+ string inst_f32_suffix,
+ string inst_f64_suffix,
+ SDPatternOperator OpNode,
+ list<Predicate> preds = []> {
+ let Predicates = !listconcat(preds, [HasFullFP16]) in {
+ def : Pat<(f16 (OpNode (f16 FPR16:$Rn),
+ (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))),
+ (!cast<Instruction>(inst # inst_f16_suffix)
+ FPR16:$Rn, (EXTRACT_SUBREG V128:$Rm, hsub))>;
+ }
+ let Predicates = preds in {
+ def : Pat<(f32 (OpNode (f32 FPR32:$Rn),
+ (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))),
+ (!cast<Instruction>(inst # inst_f32_suffix)
+ FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>;
+ def : Pat<(f64 (OpNode (f64 FPR64:$Rn),
+ (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))),
+ (!cast<Instruction>(inst # inst_f64_suffix)
+ FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>;
+ }
+}
+
+defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr",
+ any_fmul>;
+
// Match reassociated forms of FNMUL.
def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)),
(FNMULHrr FPR16:$a, FPR16:$b)>,
@@ -4513,11 +4690,16 @@ defm FCVTL : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
(FCVTLv4i16 V64:$Rn)>;
def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
- (i64 4)))),
+ (i64 4)))),
+ (FCVTLv8i16 V128:$Rn)>;
+def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))),
+ (FCVTLv2i32 V64:$Rn)>;
+def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))),
+ (FCVTLv4i32 V128:$Rn)>;
+def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))),
+ (FCVTLv4i16 V64:$Rn)>;
+def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))),
(FCVTLv8i16 V128:$Rn)>;
-def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
-
-def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
@@ -4529,10 +4711,14 @@ def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
def : Pat<(concat_vectors V64:$Rd,
(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
(FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
-def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
+def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))),
+ (FCVTNv2i32 V128:$Rn)>;
+def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))),
+ (FCVTNv4i16 V128:$Rn)>;
def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))),
(FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
+def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))),
+ (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
@@ -4743,8 +4929,8 @@ let Predicates = [HasNEON, HasFullFP16] in {
foreach VT = [ v4f16, v8f16 ] in
def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
}
-defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>;
-defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>;
+defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>;
+defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>;
defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>;
defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>;
defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
@@ -5094,6 +5280,10 @@ let Predicates = [HasRDM] in {
(SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
}
+defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64",
+ int_aarch64_neon_fmulx,
+ [HasNEONorSME]>;
+
def : InstAlias<"cmls $dst, $src1, $src2",
(CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
def : InstAlias<"cmle $dst, $src1, $src2",
@@ -5469,6 +5659,34 @@ defm : Neon_mul_acc_widen_patterns<sub, AArch64umull,
defm : Neon_mul_acc_widen_patterns<sub, AArch64smull,
SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
+
+multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> {
+ def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)),
+ (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
+ (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>;
+ def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)),
+ (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
+ (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>;
+ def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)),
+ (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
+ (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>;
+
+ def : Pat<(v4i16 (opnode (v4i16 V64:$Rn),
+ (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
+ (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
+ def : Pat<(v2i32 (opnode (v2i32 V64:$Rn),
+ (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
+ (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
+ def : Pat<(v1i64 (opnode (v1i64 V64:$Rn),
+ (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
+ (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
+}
+
+defm : Neon_addl_extract_patterns<add, zanyext, "UADD">;
+defm : Neon_addl_extract_patterns<add, sext, "SADD">;
+defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">;
+defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
+
// CodeGen patterns for addhn and subhn instructions, which can actually be
// written in LLVM IR without too much difficulty.
@@ -5675,21 +5893,21 @@ def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
(FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
(FADDPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_aarch64_neon_fmaxnmv (v2f32 V64:$Rn))),
+def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))),
(FMAXNMPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_aarch64_neon_fmaxnmv (v2f64 V128:$Rn))),
+def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))),
(FMAXNMPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_aarch64_neon_fmaxv (v2f32 V64:$Rn))),
+def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))),
(FMAXPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_aarch64_neon_fmaxv (v2f64 V128:$Rn))),
+def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))),
(FMAXPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_aarch64_neon_fminnmv (v2f32 V64:$Rn))),
+def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))),
(FMINNMPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_aarch64_neon_fminnmv (v2f64 V128:$Rn))),
+def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))),
(FMINNMPv2i64p V128:$Rn)>;
-def : Pat<(f32 (int_aarch64_neon_fminv (v2f32 V64:$Rn))),
+def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))),
(FMINPv2i32p V64:$Rn)>;
-def : Pat<(f64 (int_aarch64_neon_fminv (v2f64 V128:$Rn))),
+def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))),
(FMINPv2i64p V128:$Rn)>;
//----------------------------------------------------------------------------
@@ -5927,14 +6145,15 @@ def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
(i64 0)),
dsub)>;
-def : Pat<(vector_insert (v8f16 v8f16:$Rn), (f16 fpimm0),
- (i64 VectorIndexH:$imm)),
+def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
(INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
-def : Pat<(vector_insert v4f32:$Rn, (f32 fpimm0),
- (i64 VectorIndexS:$imm)),
+def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
+ (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>;
+def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
(INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
-def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0),
- (i64 VectorIndexD:$imm)),
+def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
+ (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>;
+def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)),
(INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
@@ -5983,6 +6202,22 @@ def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
(v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
(i64 0))>;
+def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))),
+ (EXTRACT_SUBREG
+ (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
+ VectorIndexS:$imm, GPR32:$Rm),
+ dsub)>;
+def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))),
+ (EXTRACT_SUBREG
+ (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
+ VectorIndexH:$imm, GPR32:$Rm),
+ dsub)>;
+def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))),
+ (EXTRACT_SUBREG
+ (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)),
+ VectorIndexB:$imm, GPR32:$Rm),
+ dsub)>;
+
// Copy an element at a constant index in one vector into a constant indexed
// element of another.
// FIXME refactor to a shared class/dev parameterized on vector type, vector
@@ -6046,10 +6281,20 @@ defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
+defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
+defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
+defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
+defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>;
+
// Insert from bitcast
// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
(INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>;
+def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
+ (EXTRACT_SUBREG
+ (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)),
+ imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0),
+ dsub)>;
def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)),
(INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>;
@@ -6126,10 +6371,10 @@ defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
-defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>;
-defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>;
-defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>;
-defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>;
+defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>;
+defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>;
+defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>;
+defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>;
multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> {
// Patterns for addv(addlp(x)) ==> addlv
@@ -6154,6 +6399,17 @@ multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp>
defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;
+// Patterns for uaddlv(uaddlp(x)) ==> uaddlv
+def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
+ (i64 (EXTRACT_SUBREG
+ (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)),
+ dsub))>;
+
+def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
+ (i32 (EXTRACT_SUBREG
+ (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)),
+ ssub))>;
+
// Patterns for across-vector intrinsics, that have a node equivalent, that
// returns a vector (with only the low lane defined) instead of a scalar.
// In effect, opNode is the same as (scalar_to_vector (IntNode)).
@@ -6442,6 +6698,10 @@ def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
+def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
@@ -6454,6 +6714,10 @@ def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
def : Pat<(v8i8 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
+def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>;
+def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>;
+def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>;
+def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>;
def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
@@ -6841,6 +7105,20 @@ defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
BinOpFrag<(trunc (AArch64roundingvlshr node:$LHS, node:$RHS))>>;
defm SHL : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
+
+// X << 1 ==> X + X
+class SHLToADDPat<ValueType ty, RegisterClass regtype>
+ : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
+ (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>;
+
+def : SHLToADDPat<v16i8, FPR128>;
+def : SHLToADDPat<v8i16, FPR128>;
+def : SHLToADDPat<v4i32, FPR128>;
+def : SHLToADDPat<v2i64, FPR128>;
+def : SHLToADDPat<v8i8, FPR64>;
+def : SHLToADDPat<v4i16, FPR64>;
+def : SHLToADDPat<v2i32, FPR64>;
+
defm SHRN : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
defm SLI : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
@@ -7229,6 +7507,22 @@ def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
(LD1Rv2d GPR64sp:$Rn)>;
def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
(LD1Rv1d GPR64sp:$Rn)>;
+
+def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
+ (LD1Rv8b GPR64sp:$Rn)>;
+def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))),
+ (LD1Rv16b GPR64sp:$Rn)>;
+def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
+ (LD1Rv4h GPR64sp:$Rn)>;
+def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))),
+ (LD1Rv8h GPR64sp:$Rn)>;
+def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
+ (LD1Rv2s GPR64sp:$Rn)>;
+def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))),
+ (LD1Rv4s GPR64sp:$Rn)>;
+def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))),
+ (LD1Rv2d GPR64sp:$Rn)>;
+
// Grab the floating point version too
def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
(LD1Rv2s GPR64sp:$Rn)>;
@@ -7270,12 +7564,22 @@ def : Ld1Lane128Pat<load, VectorIndexH, v8bf16, bf16, LD1i16>;
// In this case, the index must be adjusted to match LD1 type.
//
class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand
- VecIndex, ValueType VTy, ValueType STy,
- Instruction LD1, SDNodeXForm IdxOp>
+ VecIndex, ValueType VTy, ValueType STy,
+ Instruction LD1, SDNodeXForm IdxOp>
: Pat<(vector_insert (VTy VecListOne128:$Rd),
(STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
(LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>;
+class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex,
+ ValueType VTy, ValueType STy, Instruction LD1,
+ SDNodeXForm IdxOp>
+ : Pat<(vector_insert (VTy VecListOne64:$Rd),
+ (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
+ (EXTRACT_SUBREG
+ (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
+ (IdxOp VecIndex:$idx), GPR64sp:$Rn),
+ dsub)>;
+
def VectorIndexStoH : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
}]>;
@@ -7290,9 +7594,13 @@ def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorInd
def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>;
def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>;
+def : Ld1Lane64IdxOpPat<extloadi16, VectorIndexS, v2i32, i32, LD1i16, VectorIndexStoH>;
+def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexS, v2i32, i32, LD1i8, VectorIndexStoB>;
+def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexHtoB>;
+
// Same as above, but the first element is populated using
// scalar_to_vector + insert_subvector instead of insert_vector_elt.
-let Predicates = [NotInStreamingSVEMode] in {
+let Predicates = [IsNeonAvailable] in {
class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
SDPatternOperator ExtLoad, Instruction LD1>
: Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
@@ -8294,6 +8602,25 @@ def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
(vector_extract (v8f16 FPR128:$Rn), (i64 1))),
(f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
+// Prefer using the bottom lanes of addp Rn, Rn compared to
+// addp extractlow(Rn), extracthigh(Rn)
+def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))),
+ (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))),
+ (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>;
+def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))),
+ (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))),
+ (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>;
+def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))),
+ (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))),
+ (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>;
+
+def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))),
+ (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))),
+ (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>;
+def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))),
+ (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))),
+ (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>;
+
// Scalar 64-bit shifts in FPR64 registers.
def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
(SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
@@ -8586,6 +8913,18 @@ let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, maySt
[], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
}
+//-----------------------------------------------------------------------------
+// v8.3 Pointer Authentication late patterns
+
+let Predicates = [HasPAuth] in {
+def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm),
+ (MOVKXi GPR64:$Rd, (trunc_imm imm64_0_65535:$imm), 48)>;
+def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn),
+ (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>;
+}
+
+//-----------------------------------------------------------------------------
+
// This gets lowered into an instruction sequence of 20 bytes
let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
def StoreSwiftAsyncContext
@@ -8684,6 +9023,9 @@ let Predicates = [HasRCPC3] in {
def LDIAPPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
def LDIAPPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
+ def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>;
+ def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>;
+
// Aliases for when offset=0
def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>;
def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>;
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index afea2b2f5f75..419b471db3a3 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -237,10 +237,10 @@ static bool isTagStore(const MachineInstr &MI) {
switch (MI.getOpcode()) {
default:
return false;
- case AArch64::STGOffset:
- case AArch64::STZGOffset:
- case AArch64::ST2GOffset:
- case AArch64::STZ2GOffset:
+ case AArch64::STGi:
+ case AArch64::STZGi:
+ case AArch64::ST2Gi:
+ case AArch64::STZ2Gi:
return true;
}
}
@@ -293,6 +293,8 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
return AArch64::LDRWui;
case AArch64::LDURSWi:
return AArch64::LDURWi;
+ case AArch64::LDRSWpre:
+ return AArch64::LDRWpre;
}
}
@@ -372,6 +374,8 @@ static unsigned getMatchingPairOpcode(unsigned Opc) {
case AArch64::LDRSWui:
case AArch64::LDURSWi:
return AArch64::LDPSWi;
+ case AArch64::LDRSWpre:
+ return AArch64::LDPSWpre;
}
}
@@ -465,13 +469,13 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
return AArch64::STPWpre;
case AArch64::STPXi:
return AArch64::STPXpre;
- case AArch64::STGOffset:
+ case AArch64::STGi:
return AArch64::STGPreIndex;
- case AArch64::STZGOffset:
+ case AArch64::STZGi:
return AArch64::STZGPreIndex;
- case AArch64::ST2GOffset:
+ case AArch64::ST2Gi:
return AArch64::ST2GPreIndex;
- case AArch64::STZ2GOffset:
+ case AArch64::STZ2Gi:
return AArch64::STZ2GPreIndex;
case AArch64::STGPi:
return AArch64::STGPpre;
@@ -544,13 +548,13 @@ static unsigned getPostIndexedOpcode(unsigned Opc) {
return AArch64::STPWpost;
case AArch64::STPXi:
return AArch64::STPXpost;
- case AArch64::STGOffset:
+ case AArch64::STGi:
return AArch64::STGPostIndex;
- case AArch64::STZGOffset:
+ case AArch64::STZGi:
return AArch64::STZGPostIndex;
- case AArch64::ST2GOffset:
+ case AArch64::ST2Gi:
return AArch64::ST2GPostIndex;
- case AArch64::STZ2GOffset:
+ case AArch64::STZ2Gi:
return AArch64::STZ2GPostIndex;
case AArch64::STGPi:
return AArch64::STGPpost;
@@ -585,6 +589,8 @@ static bool isPreLdStPairCandidate(MachineInstr &FirstMI, MachineInstr &MI) {
return (OpcB == AArch64::LDRWui) || (OpcB == AArch64::LDURWi);
case AArch64::LDRXpre:
return (OpcB == AArch64::LDRXui) || (OpcB == AArch64::LDURXi);
+ case AArch64::LDRSWpre:
+ return (OpcB == AArch64::LDRSWui) || (OpcB == AArch64::LDURSWi);
}
}
@@ -681,10 +687,10 @@ static bool isMergeableLdStUpdate(MachineInstr &MI) {
case AArch64::LDRWui:
case AArch64::LDRHHui:
case AArch64::LDRBBui:
- case AArch64::STGOffset:
- case AArch64::STZGOffset:
- case AArch64::ST2GOffset:
- case AArch64::STZ2GOffset:
+ case AArch64::STGi:
+ case AArch64::STZGi:
+ case AArch64::ST2Gi:
+ case AArch64::STZ2Gi:
case AArch64::STGPi:
// Unscaled instructions.
case AArch64::STURSi:
@@ -734,8 +740,11 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
NextI = next_nodbg(NextI, E);
unsigned Opc = I->getOpcode();
+ unsigned MergeMIOpc = MergeMI->getOpcode();
bool IsScaled = !TII->hasUnscaledLdStOffset(Opc);
- int OffsetStride = IsScaled ? 1 : TII->getMemScale(*I);
+ bool IsMergedMIScaled = !TII->hasUnscaledLdStOffset(MergeMIOpc);
+ int OffsetStride = IsScaled ? TII->getMemScale(*I) : 1;
+ int MergeMIOffsetStride = IsMergedMIScaled ? TII->getMemScale(*MergeMI) : 1;
bool MergeForward = Flags.getMergeForward();
// Insert our new paired instruction after whichever of the paired
@@ -748,18 +757,27 @@ AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
: AArch64InstrInfo::getLdStBaseOp(*I);
// Which register is Rt and which is Rt2 depends on the offset order.
- MachineInstr *RtMI;
- if (AArch64InstrInfo::getLdStOffsetOp(*I).getImm() ==
- AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
- RtMI = &*MergeMI;
+ int64_t IOffsetInBytes =
+ AArch64InstrInfo::getLdStOffsetOp(*I).getImm() * OffsetStride;
+ int64_t MIOffsetInBytes =
+ AArch64InstrInfo::getLdStOffsetOp(*MergeMI).getImm() *
+ MergeMIOffsetStride;
+ // Select final offset based on the offset order.
+ int64_t OffsetImm;
+ if (IOffsetInBytes > MIOffsetInBytes)
+ OffsetImm = MIOffsetInBytes;
else
- RtMI = &*I;
+ OffsetImm = IOffsetInBytes;
- int OffsetImm = AArch64InstrInfo::getLdStOffsetOp(*RtMI).getImm();
- // Change the scaled offset from small to large type.
- if (IsScaled) {
- assert(((OffsetImm & 1) == 0) && "Unexpected offset to merge");
- OffsetImm /= 2;
+ int NewOpcode = getMatchingWideOpcode(Opc);
+ bool FinalIsScaled = !TII->hasUnscaledLdStOffset(NewOpcode);
+
+ // Adjust final offset if the result opcode is a scaled store.
+ if (FinalIsScaled) {
+ int NewOffsetStride = FinalIsScaled ? TII->getMemScale(NewOpcode) : 1;
+ assert(((OffsetImm % NewOffsetStride) == 0) &&
+ "Offset should be a multiple of the store memory scale");
+ OffsetImm = OffsetImm / NewOffsetStride;
}
// Construct the new instruction.
@@ -1328,7 +1346,7 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
return false;
// The STR<S,D,Q,W,X>pre - STR<S,D,Q,W,X>ui and
- // LDR<S,D,Q,W,X>pre-LDR<S,D,Q,W,X>ui
+ // LDR<S,D,Q,W,X,SW>pre-LDR<S,D,Q,W,X,SW>ui
// are candidate pairs that can be merged.
if (isPreLdStPairCandidate(FirstMI, MI))
return true;
diff --git a/llvm/lib/Target/AArch64/AArch64MCInstLower.h b/llvm/lib/Target/AArch64/AArch64MCInstLower.h
index b008e49d52dd..8b6abc5c1f23 100644
--- a/llvm/lib/Target/AArch64/AArch64MCInstLower.h
+++ b/llvm/lib/Target/AArch64/AArch64MCInstLower.h
@@ -9,8 +9,8 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MCINSTLOWER_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64MCINSTLOWER_H
-#include "llvm/ADT/Triple.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
class AsmPrinter;
diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 0f9d45e86b21..87aa3b98d938 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -35,6 +35,32 @@
// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
// ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
//
+// 6. %intermediate:gpr32 = COPY %src:fpr128
+// %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32
+// ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0
+//
+// In cases where a source FPR is copied to a GPR in order to be copied
+// to a destination FPR, we can directly copy the values between the FPRs,
+// eliminating the use of the Integer unit. When we match a pattern of
+// INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR
+// source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr
+// instructions.
+//
+// 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high
+// 64-bits. For example,
+//
+// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
+// %2:fpr64 = MOVID 0
+// %4:fpr128 = IMPLICIT_DEF
+// %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub
+// %6:fpr128 = IMPLICIT_DEF
+// %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
+// %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
+// ==>
+// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
+// %6:fpr128 = IMPLICIT_DEF
+// %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
+//
//===----------------------------------------------------------------------===//
#include "AArch64ExpandImm.h"
@@ -99,6 +125,8 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
bool visitAND(unsigned Opc, MachineInstr &MI);
bool visitORR(MachineInstr &MI);
bool visitINSERT(MachineInstr &MI);
+ bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
+ bool visitINSvi64lane(MachineInstr &MI);
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
@@ -136,7 +164,7 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
// consecutive ones. We can split it in to two bitmask immediate like
// 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
// If we do AND with these two bitmask immediate, we can see original one.
- unsigned LowestBitSet = countTrailingZeros(UImm);
+ unsigned LowestBitSet = llvm::countr_zero(UImm);
unsigned HighestBitSet = Log2_64(UImm);
// Create a mask which is filled with one from the position of lowest bit set
@@ -323,17 +351,24 @@ bool AArch64MIPeepholeOpt::visitADDSUB(
unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
// Try below transformation.
//
- // MOVi32imm + ADDWrr ==> ADDWri + ADDWri
- // MOVi64imm + ADDXrr ==> ADDXri + ADDXri
+ // ADDWrr X, MOVi32imm ==> ADDWri + ADDWri
+ // ADDXrr X, MOVi64imm ==> ADDXri + ADDXri
//
- // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
- // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
+ // SUBWrr X, MOVi32imm ==> SUBWri + SUBWri
+ // SUBXrr X, MOVi64imm ==> SUBXri + SUBXri
//
// The mov pseudo instruction could be expanded to multiple mov instructions
// later. Let's try to split the constant operand of mov instruction into two
// legal add/sub immediates. It makes only two ADD/SUB instructions intead of
// multiple `mov` + `and/sub` instructions.
+ // We can sometimes have ADDWrr WZR, MULi32imm that have not been constant
+ // folded. Make sure that we don't generate invalid instructions that use XZR
+ // in those cases.
+ if (MI.getOperand(1).getReg() == AArch64::XZR ||
+ MI.getOperand(1).getReg() == AArch64::WZR)
+ return false;
+
return splitTwoPartImm<T>(
MI,
[PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
@@ -365,6 +400,11 @@ bool AArch64MIPeepholeOpt::visitADDSSUBS(
OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
// Try the same transformation as ADDSUB but with additional requirement
// that the condition code usages are only for Equal and Not Equal
+
+ if (MI.getOperand(1).getReg() == AArch64::XZR ||
+ MI.getOperand(1).getReg() == AArch64::WZR)
+ return false;
+
return splitTwoPartImm<T>(
MI,
[PosOpcs, NegOpcs, &MI, &TRI = TRI,
@@ -523,6 +563,113 @@ bool AArch64MIPeepholeOpt::splitTwoPartImm(
return true;
}
+bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) {
+ // Check if this INSvi[X]gpr comes from COPY of a source FPR128
+ //
+ // From
+ // %intermediate1:gpr64 = COPY %src:fpr128
+ // %intermediate2:gpr32 = COPY %intermediate1:gpr64
+ // %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32
+ // To
+ // %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128,
+ // src_index
+ // where src_index = 0, X = [8|16|32|64]
+
+ MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
+
+ // For a chain of COPY instructions, find the initial source register
+ // and check if it's an FPR128
+ while (true) {
+ if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY)
+ return false;
+
+ if (!SrcMI->getOperand(1).getReg().isVirtual())
+ return false;
+
+ if (MRI->getRegClass(SrcMI->getOperand(1).getReg()) ==
+ &AArch64::FPR128RegClass) {
+ break;
+ }
+ SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = SrcMI->getOperand(1).getReg();
+ MachineInstr *INSvilaneMI =
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opc), DstReg)
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .addUse(SrcReg, getRegState(SrcMI->getOperand(1)))
+ .addImm(0);
+
+ LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *INSvilaneMI << "\n");
+ (void)INSvilaneMI;
+ MI.eraseFromParent();
+ return true;
+}
+
+// All instructions that set a FPR64 will implicitly zero the top bits of the
+// register.
+static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI,
+ MachineRegisterInfo *MRI) {
+ if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef())
+ return false;
+ const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
+ if (RC != &AArch64::FPR64RegClass)
+ return false;
+ return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
+}
+
+bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) {
+ // Check the MI for low 64-bits sets zero for high 64-bits implicitly.
+ // We are expecting below case.
+ //
+ // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
+ // %6:fpr128 = IMPLICIT_DEF
+ // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
+ // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
+ MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
+ if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG)
+ return false;
+ Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg());
+ if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
+ return false;
+
+ // Check there is `mov 0` MI for high 64-bits.
+ // We are expecting below cases.
+ //
+ // %2:fpr64 = MOVID 0
+ // %4:fpr128 = IMPLICIT_DEF
+ // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub
+ // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
+ // or
+ // %5:fpr128 = MOVIv2d_ns 0
+ // %6:fpr64 = COPY %5.dsub:fpr128
+ // %8:fpr128 = IMPLICIT_DEF
+ // %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub
+ // %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0
+ MachineInstr *High64MI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
+ if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG)
+ return false;
+ High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(2).getReg());
+ if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY)
+ High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(1).getReg());
+ if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID &&
+ High64MI->getOpcode() != AArch64::MOVIv2d_ns))
+ return false;
+ if (High64MI->getOperand(1).getImm() != 0)
+ return false;
+
+ // Let's remove MIs for high 64-bits.
+ Register OldDef = MI.getOperand(0).getReg();
+ Register NewDef = MI.getOperand(1).getReg();
+ MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
+ MRI->replaceRegWith(OldDef, NewDef);
+ MI.eraseFromParent();
+
+ return true;
+}
+
bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -543,48 +690,63 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
default:
break;
case AArch64::INSERT_SUBREG:
- Changed = visitINSERT(MI);
+ Changed |= visitINSERT(MI);
break;
case AArch64::ANDWrr:
- Changed = visitAND<uint32_t>(AArch64::ANDWri, MI);
+ Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI);
break;
case AArch64::ANDXrr:
- Changed = visitAND<uint64_t>(AArch64::ANDXri, MI);
+ Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI);
break;
case AArch64::ORRWrs:
- Changed = visitORR(MI);
+ Changed |= visitORR(MI);
break;
case AArch64::ADDWrr:
- Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
+ Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
break;
case AArch64::SUBWrr:
- Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
+ Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
break;
case AArch64::ADDXrr:
- Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
+ Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
break;
case AArch64::SUBXrr:
- Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
+ Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
break;
case AArch64::ADDSWrr:
- Changed = visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
- {AArch64::SUBWri, AArch64::SUBSWri},
- MI);
+ Changed |=
+ visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
+ {AArch64::SUBWri, AArch64::SUBSWri}, MI);
break;
case AArch64::SUBSWrr:
- Changed = visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
- {AArch64::ADDWri, AArch64::ADDSWri},
- MI);
+ Changed |=
+ visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
+ {AArch64::ADDWri, AArch64::ADDSWri}, MI);
break;
case AArch64::ADDSXrr:
- Changed = visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
- {AArch64::SUBXri, AArch64::SUBSXri},
- MI);
+ Changed |=
+ visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
+ {AArch64::SUBXri, AArch64::SUBSXri}, MI);
break;
case AArch64::SUBSXrr:
- Changed = visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
- {AArch64::ADDXri, AArch64::ADDSXri},
- MI);
+ Changed |=
+ visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
+ {AArch64::ADDXri, AArch64::ADDSXri}, MI);
+ break;
+ case AArch64::INSvi64gpr:
+ Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);
+ break;
+ case AArch64::INSvi32gpr:
+ Changed |= visitINSviGPR(MI, AArch64::INSvi32lane);
+ break;
+ case AArch64::INSvi16gpr:
+ Changed |= visitINSviGPR(MI, AArch64::INSvi16lane);
+ break;
+ case AArch64::INSvi8gpr:
+ Changed |= visitINSviGPR(MI, AArch64::INSvi8lane);
+ break;
+ case AArch64::INSvi64lane:
+ Changed |= visitINSvi64lane(MI);
break;
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
index f51c27c62dfb..05d60872bf51 100644
--- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp
@@ -379,6 +379,64 @@ static bool isArithmeticLogicPair(const MachineInstr *FirstMI,
return false;
}
+// "(A + B) + 1" or "(A - B) - 1"
+static bool isAddSub2RegAndConstOnePair(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ bool NeedsSubtract = false;
+
+ // The 2nd instr must be an add-immediate or subtract-immediate.
+ switch (SecondMI.getOpcode()) {
+ case AArch64::SUBWri:
+ case AArch64::SUBXri:
+ NeedsSubtract = true;
+ [[fallthrough]];
+ case AArch64::ADDWri:
+ case AArch64::ADDXri:
+ break;
+
+ default:
+ return false;
+ }
+
+ // The immediate in the 2nd instr must be "1".
+ if (!SecondMI.getOperand(2).isImm() || SecondMI.getOperand(2).getImm() != 1) {
+ return false;
+ }
+
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ if (FirstMI == nullptr) {
+ return true;
+ }
+
+ switch (FirstMI->getOpcode()) {
+ case AArch64::SUBWrs:
+ case AArch64::SUBXrs:
+ if (AArch64InstrInfo::hasShiftedReg(*FirstMI))
+ return false;
+ [[fallthrough]];
+ case AArch64::SUBWrr:
+ case AArch64::SUBXrr:
+ if (NeedsSubtract) {
+ return true;
+ }
+ break;
+
+ case AArch64::ADDWrs:
+ case AArch64::ADDXrs:
+ if (AArch64InstrInfo::hasShiftedReg(*FirstMI))
+ return false;
+ [[fallthrough]];
+ case AArch64::ADDWrr:
+ case AArch64::ADDXrr:
+ if (!NeedsSubtract) {
+ return true;
+ }
+ break;
+ }
+
+ return false;
+}
+
/// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
/// together. Given SecondMI, when FirstMI is unspecified, then check if
/// SecondMI may be part of a fused pair at all.
@@ -411,6 +469,9 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
return true;
if (ST.hasFuseArithmeticLogic() && isArithmeticLogicPair(FirstMI, SecondMI))
return true;
+ if (ST.hasFuseAddSub2RegAndConstOne() &&
+ isAddSub2RegAndConstOnePair(FirstMI, SecondMI))
+ return true;
return false;
}
diff --git a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
index 4555f1a3ebb0..5846fd454b65 100644
--- a/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
+++ b/llvm/lib/Target/AArch64/AArch64PerfectShuffle.h
@@ -6590,11 +6590,11 @@ static unsigned getPerfectShuffleCost(llvm::ArrayRef<int> M) {
assert(M.size() == 4 && "Expected a 4 entry perfect shuffle");
// Special case zero-cost nop copies, from either LHS or RHS.
- if (llvm::all_of(llvm::enumerate(M), [](auto &E) {
+ if (llvm::all_of(llvm::enumerate(M), [](const auto &E) {
return E.value() < 0 || E.value() == (int)E.index();
}))
return 0;
- if (llvm::all_of(llvm::enumerate(M), [](auto &E) {
+ if (llvm::all_of(llvm::enumerate(M), [](const auto &E) {
return E.value() < 0 || E.value() == (int)E.index() + 4;
}))
return 0;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 299892ad4ede..d1ddf6d76975 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -19,7 +19,6 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64InstPrinter.h"
#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -31,6 +30,7 @@
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
@@ -85,8 +85,16 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (MF->getFunction().getCallingConv() == CallingConv::CFGuard_Check)
return CSR_Win_AArch64_CFGuard_Check_SaveList;
- if (MF->getSubtarget<AArch64Subtarget>().isTargetWindows())
+ if (MF->getSubtarget<AArch64Subtarget>().isTargetWindows()) {
+ if (MF->getSubtarget<AArch64Subtarget>().getTargetLowering()
+ ->supportSwiftError() &&
+ MF->getFunction().getAttributes().hasAttrSomewhere(
+ Attribute::SwiftError))
+ return CSR_Win_AArch64_AAPCS_SwiftError_SaveList;
+ if (MF->getFunction().getCallingConv() == CallingConv::SwiftTail)
+ return CSR_Win_AArch64_AAPCS_SwiftTail_SaveList;
return CSR_Win_AArch64_AAPCS_SaveList;
+ }
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
return CSR_AArch64_AAVPCS_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_SVE_VectorCall)
@@ -112,6 +120,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_AArch64_AAPCS_SwiftTail_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
return CSR_AArch64_RT_MostRegs_SaveList;
+ if (MF->getFunction().getCallingConv() == CallingConv::PreserveAll)
+ return CSR_AArch64_RT_AllRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::Win64)
// This is for OSes other than Windows; Windows is a separate case further
// above.
@@ -160,6 +170,8 @@ AArch64RegisterInfo::getDarwinCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_Darwin_AArch64_AAPCS_SwiftTail_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::PreserveMost)
return CSR_Darwin_AArch64_RT_MostRegs_SaveList;
+ if (MF->getFunction().getCallingConv() == CallingConv::PreserveAll)
+ return CSR_Darwin_AArch64_RT_AllRegs_SaveList;
if (MF->getFunction().getCallingConv() == CallingConv::Win64)
return CSR_Darwin_AArch64_AAPCS_Win64_SaveList;
return CSR_Darwin_AArch64_AAPCS_SaveList;
@@ -237,6 +249,8 @@ AArch64RegisterInfo::getDarwinCallPreservedMask(const MachineFunction &MF,
return CSR_Darwin_AArch64_AAPCS_SwiftTail_RegMask;
if (CC == CallingConv::PreserveMost)
return CSR_Darwin_AArch64_RT_MostRegs_RegMask;
+ if (CC == CallingConv::PreserveAll)
+ return CSR_Darwin_AArch64_RT_AllRegs_RegMask;
return CSR_Darwin_AArch64_AAPCS_RegMask;
}
@@ -281,6 +295,10 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (CC == CallingConv::PreserveMost)
return SCS ? CSR_AArch64_RT_MostRegs_SCS_RegMask
: CSR_AArch64_RT_MostRegs_RegMask;
+ else if (CC == CallingConv::PreserveAll)
+ return SCS ? CSR_AArch64_RT_AllRegs_SCS_RegMask
+ : CSR_AArch64_RT_AllRegs_RegMask;
+
else
return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask;
}
@@ -309,12 +327,11 @@ void AArch64RegisterInfo::UpdateCustomCallPreservedMask(MachineFunction &MF,
for (size_t i = 0; i < AArch64::GPR64commonRegClass.getNumRegs(); ++i) {
if (MF.getSubtarget<AArch64Subtarget>().isXRegCustomCalleeSaved(i)) {
- for (MCSubRegIterator SubReg(AArch64::GPR64commonRegClass.getRegister(i),
- this, true);
- SubReg.isValid(); ++SubReg) {
+ for (MCPhysReg SubReg :
+ subregs_inclusive(AArch64::GPR64commonRegClass.getRegister(i))) {
// See TargetRegisterInfo::getCallPreservedMask for how to interpret the
// register mask.
- UpdatedMask[*SubReg / 32] |= 1u << (*SubReg % 32);
+ UpdatedMask[SubReg / 32] |= 1u << (SubReg % 32);
}
}
}
@@ -419,9 +436,8 @@ AArch64RegisterInfo::getStrictlyReservedRegs(const MachineFunction &MF) const {
// SME tiles are not allocatable.
if (MF.getSubtarget<AArch64Subtarget>().hasSME()) {
- for (MCSubRegIterator SubReg(AArch64::ZA, this, /*self=*/true);
- SubReg.isValid(); ++SubReg)
- Reserved.set(*SubReg);
+ for (MCPhysReg SubReg : subregs_inclusive(AArch64::ZA))
+ Reserved.set(SubReg);
}
markSuperRegs(Reserved, AArch64::FPCR);
@@ -549,6 +565,7 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::PreserveMost:
+ case CallingConv::PreserveAll:
case CallingConv::CXX_FAST_TLS:
case CallingConv::Swift:
case CallingConv::SwiftTail:
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 358f7f6c1656..4bb1f9413f2b 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -435,7 +435,7 @@ def Q30 : AArch64Reg<30, "q30", [D30], ["v30", ""]>, DwarfRegAlias<B30>;
def Q31 : AArch64Reg<31, "q31", [D31], ["v31", ""]>, DwarfRegAlias<B31>;
}
-def FPR8 : RegisterClass<"AArch64", [untyped], 8, (sequence "B%u", 0, 31)> {
+def FPR8 : RegisterClass<"AArch64", [i8], 8, (sequence "B%u", 0, 31)> {
let Size = 8;
}
def FPR16 : RegisterClass<"AArch64", [f16, bf16], 16, (sequence "H%u", 0, 31)> {
@@ -891,7 +891,7 @@ class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
// SVE predicate register classes.
class PPRClass<int firstreg, int lastreg> : RegisterClass<
"AArch64",
- [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16,
+ [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1, aarch64svcount ], 16,
(sequence "P%u", firstreg, lastreg)> {
let Size = 16;
}
diff --git a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
index cd65c16ee69b..ff56259eb34a 100644
--- a/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SLSHardening.cpp
@@ -60,7 +60,7 @@ private:
bool hardenReturnsAndBRs(MachineBasicBlock &MBB) const;
bool hardenBLRs(MachineBasicBlock &MBB) const;
MachineBasicBlock &ConvertBLRToBL(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator) const;
+ MachineBasicBlock::instr_iterator) const;
};
} // end anonymous namespace
@@ -245,9 +245,8 @@ void SLSBLRThunkInserter::populateThunk(MachineFunction &MF) {
Entry->end(), DebugLoc(), true /*AlwaysUseISBDSB*/);
}
-MachineBasicBlock &
-AArch64SLSHardening::ConvertBLRToBL(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI) const {
+MachineBasicBlock &AArch64SLSHardening::ConvertBLRToBL(
+ MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator MBBI) const {
// Transform a BLR to a BL as follows:
// Before:
// |-----------------------------|
@@ -382,8 +381,9 @@ bool AArch64SLSHardening::hardenBLRs(MachineBasicBlock &MBB) const {
if (!ST->hardenSlsBlr())
return false;
bool Modified = false;
- MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
- MachineBasicBlock::iterator NextMBBI;
+ MachineBasicBlock::instr_iterator MBBI = MBB.instr_begin(),
+ E = MBB.instr_end();
+ MachineBasicBlock::instr_iterator NextMBBI;
for (; MBBI != E; MBBI = NextMBBI) {
MachineInstr &MI = *MBBI;
NextMBBI = std::next(MBBI);
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index d0c5bfe72566..cabfe9def7c2 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -274,17 +274,17 @@ defm FMLS_VG4_M4Z4Z_S : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b011001, M
defm FMLS_VG2_M2ZZI_S : sme2_multi_vec_array_vg2_index_32b<"fmls", 0b0010, ZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x2>;
defm FMLS_VG4_M4ZZI_S : sme2_multi_vec_array_vg4_index_32b<"fmls", 0b0010, ZZZZ_s_mul_r, ZPR4b32, nxv4f32, int_aarch64_sme_fmls_lane_vg1x4>;
-defm ADD_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"add", 0b0010, MatrixOp32, ZZ_s_mul_r>;
-defm ADD_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"add", 0b0010, MatrixOp32, ZZZZ_s_mul_r>;
+defm ADD_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"add", 0b0010, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_za32_vg1x2>;
+defm ADD_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"add", 0b0010, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_add_za32_vg1x4>;
-defm SUB_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"sub", 0b0011, MatrixOp32, ZZ_s_mul_r>;
-defm SUB_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"sub", 0b0011, MatrixOp32, ZZZZ_s_mul_r>;
+defm SUB_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"sub", 0b0011, MatrixOp32, ZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_za32_vg1x2>;
+defm SUB_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"sub", 0b0011, MatrixOp32, ZZZZ_s_mul_r, nxv4i32, int_aarch64_sme_sub_za32_vg1x4>;
-defm FADD_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"fadd", 0b0000, MatrixOp32, ZZ_s_mul_r>;
-defm FADD_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0000, MatrixOp32, ZZZZ_s_mul_r>;
+defm FADD_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"fadd", 0b0000, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_add_za32_vg1x2>;
+defm FADD_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0000, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_add_za32_vg1x4>;
-defm FSUB_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0001, MatrixOp32, ZZ_s_mul_r>;
-defm FSUB_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0001, MatrixOp32, ZZZZ_s_mul_r>;
+defm FSUB_VG2_M2Z_S : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0001, MatrixOp32, ZZ_s_mul_r, nxv4f32, int_aarch64_sme_sub_za32_vg1x2>;
+defm FSUB_VG4_M4Z_S : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0001, MatrixOp32, ZZZZ_s_mul_r, nxv4f32, int_aarch64_sme_sub_za32_vg1x4>;
defm SQDMULH_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"sqdmulh", 0b1000000>;
defm SQDMULH_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"sqdmulh", 0b1000000>;
@@ -446,131 +446,131 @@ defm SCLAMP_VG4_4Z4Z : sme2_int_clamp_vector_vg4_multi<"sclamp", 0b0>;
defm UCLAMP_VG2_2Z2Z : sme2_int_clamp_vector_vg2_multi<"uclamp", 0b1>;
defm UCLAMP_VG4_4Z4Z : sme2_int_clamp_vector_vg4_multi<"uclamp", 0b1>;
-defm FDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b1001, ZZ_h_mul_r, ZPR4b16, nxv8f16, null_frag>;
-defm FDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b1001, ZZZZ_h_mul_r, ZPR4b16, nxv8f16, null_frag>;
-defm FDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010000, MatrixOp32, ZZ_h, ZPR4b16>;
-defm FDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110000, MatrixOp32, ZZZZ_h, ZPR4b16>;
-defm FDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b010000, MatrixOp32, ZZ_h_mul_r, nxv8f16, null_frag>;
-defm FDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b010000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, null_frag>;
+defm FDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fdot", 0b1001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x2>;
+defm FDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"fdot", 0b1001, ZZZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_lane_za32_vg1x4>;
+defm FDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"fdot", 0b0010000, MatrixOp32, ZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_single_za32_vg1x2>;
+defm FDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"fdot", 0b0110000, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8f16, int_aarch64_sme_fdot_single_za32_vg1x4>;
+defm FDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b010000, MatrixOp32, ZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x2>;
+defm FDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b010000, MatrixOp32, ZZZZ_h_mul_r, nxv8f16, int_aarch64_sme_fdot_za32_vg1x4>;
-defm BFDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfdot", 0b1011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, null_frag>;
-defm BFDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"bfdot", 0b1011, ZZZZ_h_mul_r, ZPR4b16, nxv8bf16, null_frag>;
-defm BFDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg24_single<"bfdot", 0b0010010, MatrixOp32, ZZ_h, ZPR4b16>;
-defm BFDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg24_single<"bfdot", 0b0110010, MatrixOp32, ZZZZ_h, ZPR4b16>;
-defm BFDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"bfdot", 0b010010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, null_frag>;
-defm BFDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"bfdot", 0b010010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, null_frag>;
+defm BFDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfdot", 0b1011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x2>;
+defm BFDOT_VG4_M4ZZI_HtoS : sme2_multi_vec_array_vg4_index_32b<"bfdot", 0b1011, ZZZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_lane_za32_vg1x4>;
+defm BFDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"bfdot", 0b0010010, MatrixOp32, ZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_single_za32_vg1x2>;
+defm BFDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"bfdot", 0b0110010, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8bf16, int_aarch64_sme_fdot_single_za32_vg1x4>;
+defm BFDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"bfdot", 0b010010, MatrixOp32, ZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x2>;
+defm BFDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"bfdot", 0b010010, MatrixOp32, ZZZZ_h_mul_r, nxv8bf16, int_aarch64_sme_fdot_za32_vg1x4>;
defm BFVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"bfvdot", 0b0011, ZZ_h_mul_r, ZPR4b16, nxv8bf16, int_aarch64_sme_fvdot_lane_za32_vg1x2>;
defm FVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"fvdot", 0b0001, ZZ_h_mul_r, ZPR4b16, nxv8f16, int_aarch64_sme_fvdot_lane_za32_vg1x2>;
-defm SDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b1000, ZZ_h_mul_r, ZPR4b16, nxv8i16, null_frag>;
-defm SDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b1100, ZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
-defm SDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1000, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, null_frag>;
-defm SDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1100, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
-defm SDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg24_single<"sdot", 0b1010101, MatrixOp32, ZZ_h, ZPR4b16>;
-defm SDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg24_single<"sdot", 0b1110101, MatrixOp32, ZZZZ_h, ZPR4b16>;
-defm SDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b110101, MatrixOp32, ZZ_h_mul_r, nxv8i16, null_frag>;
-defm SDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b110101, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, null_frag>;
-defm SDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"sdot", 0b0010100, MatrixOp32, ZZ_b, ZPR4b8>;
-defm SDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"sdot", 0b0110100, MatrixOp32, ZZZZ_b, ZPR4b8>;
-defm SDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b010100, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
-defm SDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b010100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;
-
-defm SUDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sudot", 0b1111, ZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
-defm SUDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sudot", 0b1111, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
-defm SUDOT_VG2_M2ZZ_BToS : sme2_dot_mla_add_sub_array_vg24_single<"sudot", 0b0010111, MatrixOp32, ZZ_b, ZPR4b8>;
-defm SUDOT_VG4_M4ZZ_BToS : sme2_dot_mla_add_sub_array_vg24_single<"sudot", 0b0110111, MatrixOp32, ZZZZ_b, ZPR4b8>;
+defm SDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b1000, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x2>;
+defm SDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sdot", 0b1100, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x2>;
+defm SDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1000, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za32_vg1x4>;
+defm SDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sdot", 0b1100, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_lane_za32_vg1x4>;
+defm SDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b1010101, MatrixOp32, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za32_vg1x2>;
+defm SDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b1110101, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za32_vg1x4>;
+defm SDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b110101, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x2>;
+defm SDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b110101, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za32_vg1x4>;
+defm SDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b0010100, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_single_za32_vg1x2>;
+defm SDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b0110100, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sdot_single_za32_vg1x4>;
+defm SDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b010100, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x2>;
+defm SDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b010100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_sdot_za32_vg1x4>;
+
+defm SUDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"sudot", 0b1111, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x2>;
+defm SUDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"sudot", 0b1111, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_lane_za32_vg1x4>;
+defm SUDOT_VG2_M2ZZ_BToS : sme2_dot_mla_add_sub_array_vg2_single<"sudot", 0b0010111, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_single_za32_vg1x2>;
+defm SUDOT_VG4_M4ZZ_BToS : sme2_dot_mla_add_sub_array_vg4_single<"sudot", 0b0110111, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sudot_single_za32_vg1x4>;
defm SVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"svdot", 0b0100, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za32_vg1x2>;
defm SVDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"svdot", 0b0100, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_svdot_lane_za32_vg1x4>;
defm SUVDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"suvdot", 0b0111, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_suvdot_lane_za32_vg1x4>;
-defm UDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b1010, ZZ_h_mul_r, ZPR4b16, nxv8i16, null_frag>;
-defm UDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b1110, ZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
-defm UDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1110, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
-defm UDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1010, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, null_frag>;
-defm UDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg24_single<"udot", 0b1010111, MatrixOp32, ZZ_h, ZPR4b16>;
-defm UDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg24_single<"udot", 0b1110111, MatrixOp32, ZZZZ_h, ZPR4b16>;
-defm UDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b110111, MatrixOp32, ZZ_h_mul_r, nxv8i16, null_frag>;
-defm UDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b110111, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, null_frag>;
-defm UDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"udot", 0b0010110, MatrixOp32, ZZ_b, ZPR4b8>;
-defm UDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"udot", 0b0110110, MatrixOp32, ZZZZ_b, ZPR4b8>;
-defm UDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b010110, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
-defm UDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b010110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;
-
-defm USDOT_VG2_M2ZZI_BToS: sme2_multi_vec_array_vg2_index_32b<"usdot", 0b1101, ZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
-defm USDOT_VG4_M4ZZI_BToS: sme2_multi_vec_array_vg4_index_32b<"usdot", 0b1101, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, null_frag>;
-defm USDOT_VG2_M2ZZ_BToS : sme2_dot_mla_add_sub_array_vg24_single<"usdot", 0b0010101, MatrixOp32, ZZ_b, ZPR4b8>;
-defm USDOT_VG4_M4ZZ_BToS : sme2_dot_mla_add_sub_array_vg24_single<"usdot", 0b0110101, MatrixOp32, ZZZZ_b, ZPR4b8>;
-defm USDOT_VG2_M2Z2Z_BToS : sme2_dot_mla_add_sub_array_vg2_multi<"usdot", 0b010101, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
-defm USDOT_VG4_M4Z4Z_BToS : sme2_dot_mla_add_sub_array_vg4_multi<"usdot", 0b010101, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;
+defm UDOT_VG2_M2ZZI_HToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b1010, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x2>;
+defm UDOT_VG2_M2ZZI_BToS : sme2_multi_vec_array_vg2_index_32b<"udot", 0b1110, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x2>;
+defm UDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1110, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_udot_lane_za32_vg1x4>;
+defm UDOT_VG4_M4ZZI_HToS : sme2_multi_vec_array_vg4_index_32b<"udot", 0b1010, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za32_vg1x4>;
+defm UDOT_VG2_M2ZZ_HtoS : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b1010111, MatrixOp32, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za32_vg1x2>;
+defm UDOT_VG4_M4ZZ_HtoS : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b1110111, MatrixOp32, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za32_vg1x4>;
+defm UDOT_VG2_M2Z2Z_HtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b110111, MatrixOp32, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x2>;
+defm UDOT_VG4_M4Z4Z_HtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b110111, MatrixOp32, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za32_vg1x4>;
+defm UDOT_VG2_M2ZZ_BtoS : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b0010110, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_udot_single_za32_vg1x2>;
+defm UDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b0110110, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_udot_single_za32_vg1x4>;
+defm UDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b010110, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x2>;
+defm UDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b010110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_udot_za32_vg1x4>;
+
+defm USDOT_VG2_M2ZZI_BToS: sme2_multi_vec_array_vg2_index_32b<"usdot", 0b1101, ZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x2>;
+defm USDOT_VG4_M4ZZI_BToS: sme2_multi_vec_array_vg4_index_32b<"usdot", 0b1101, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_lane_za32_vg1x4>;
+defm USDOT_VG2_M2ZZ_BToS : sme2_dot_mla_add_sub_array_vg2_single<"usdot", 0b0010101, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_single_za32_vg1x2>;
+defm USDOT_VG4_M4ZZ_BToS : sme2_dot_mla_add_sub_array_vg4_single<"usdot", 0b0110101, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usdot_single_za32_vg1x4>;
+defm USDOT_VG2_M2Z2Z_BToS : sme2_dot_mla_add_sub_array_vg2_multi<"usdot", 0b010101, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x2>;
+defm USDOT_VG4_M4Z4Z_BToS : sme2_dot_mla_add_sub_array_vg4_multi<"usdot", 0b010101, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usdot_za32_vg1x4>;
defm USVDOT_VG4_M4ZZI_BToS : sme2_multi_vec_array_vg4_index_32b<"usvdot", 0b0101, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_usvdot_lane_za32_vg1x4>;
defm UVDOT_VG2_M2ZZI_HtoS : sme2_multi_vec_array_vg2_index_32b<"uvdot", 0b0110, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za32_vg1x2>;
defm UVDOT_VG4_M4ZZI_BtoS : sme2_multi_vec_array_vg4_index_32b<"uvdot", 0b0110, ZZZZ_b_mul_r, ZPR4b8, nxv16i8, int_aarch64_sme_uvdot_lane_za32_vg1x4>;
-def SMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlall", 0b000>;
-defm SMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlall", 0b000>;
-defm SMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlall", 0b000>;
-def SMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"smlall", 0b0000, MatrixOp32, ZPR8, ZPR4b8>;
-defm SMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"smlall", 0b00000, MatrixOp32, ZZ_b, ZPR4b8>;
-defm SMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"smlall", 0b01000, MatrixOp32, ZZZZ_b, ZPR4b8>;
-defm SMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlall", 0b0000, MatrixOp32, ZZ_b_mul_r>;
-defm SMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlall", 0b0000, MatrixOp32, ZZZZ_b_mul_r>;
-
-def USMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"usmlall", 0b001>;
-defm USMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"usmlall", 0b100>;
-defm USMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"usmlall", 0b100>;
-def USMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"usmlall", 0b0001, MatrixOp32, ZPR8, ZPR4b8>;
-defm USMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"usmlall", 0b00001, MatrixOp32, ZZ_b, ZPR4b8>;
-defm USMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"usmlall", 0b01001, MatrixOp32, ZZZZ_b, ZPR4b8>;
-defm USMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"usmlall", 0b0001, MatrixOp32, ZZ_b_mul_r>;
-defm USMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"usmlall", 0b0001, MatrixOp32, ZZZZ_b_mul_r>;
-
-def SMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlsll", 0b010>;
-defm SMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlsll", 0b001>;
-defm SMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlsll", 0b001>;
-def SMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"smlsll", 0b0010, MatrixOp32, ZPR8, ZPR4b8>;
-defm SMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"smlsll", 0b00010, MatrixOp32, ZZ_b, ZPR4b8>;
-defm SMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"smlsll", 0b01010, MatrixOp32, ZZZZ_b, ZPR4b8>;
-defm SMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlsll", 0b0010, MatrixOp32, ZZ_b_mul_r>;
-defm SMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlsll", 0b0010, MatrixOp32, ZZZZ_b_mul_r>;
-
-def UMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlall", 0b100>;
-defm UMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlall", 0b010>;
-defm UMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlall", 0b010>;
-def UMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"umlall", 0b0100, MatrixOp32, ZPR8, ZPR4b8>;
-defm UMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"umlall", 0b00100, MatrixOp32, ZZ_b, ZPR4b8>;
-defm UMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"umlall", 0b01100, MatrixOp32, ZZZZ_b, ZPR4b8>;
-defm UMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlall", 0b0100, MatrixOp32, ZZ_b_mul_r>;
-defm UMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlall", 0b0100, MatrixOp32, ZZZZ_b_mul_r>;
-
-def SUMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"sumlall", 0b101>;
-defm SUMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"sumlall", 0b110>;
-defm SUMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"sumlall", 0b110>;
-defm SUMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"sumlall", 0b00101, MatrixOp32, ZZ_b, ZPR4b8>;
-defm SUMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"sumlall", 0b01101, MatrixOp32, ZZZZ_b, ZPR4b8>;
-
-def UMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlsll", 0b110>;
-defm UMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlsll", 0b011>;
-defm UMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlsll", 0b011>;
-def UMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"umlsll", 0b0110, MatrixOp32, ZPR8, ZPR4b8>;
-defm UMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg24_single<"umlsll", 0b00110, MatrixOp32, ZZ_b, ZPR4b8>;
-defm UMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg24_single<"umlsll", 0b01110, MatrixOp32, ZZZZ_b, ZPR4b8>;
-defm UMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlsll", 0b0110, MatrixOp32, ZZ_b_mul_r>;
-defm UMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlsll", 0b0110, MatrixOp32, ZZZZ_b_mul_r>;
-
-defm BMOPA_MPPZZ_S : sme2_bfp_mopx_tile<"bmopa", 0b100>;
-defm BMOPS_MPPZZ_S : sme2_bfp_mopx_tile<"bmops", 0b101>;
-
-defm SMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"smopa", 0b000>;
-defm SMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"smops", 0b001>;
-
-defm UMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"umopa", 0b100>;
-defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101>;
+defm SMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlall", 0b000, int_aarch64_sme_smla_za32_lane_vg4x1>;
+defm SMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlall", 0b000, int_aarch64_sme_smla_za32_lane_vg4x2>;
+defm SMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlall", 0b000, int_aarch64_sme_smla_za32_lane_vg4x4>;
+defm SMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"smlall", 0b0000, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x1>;
+defm SMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"smlall", 0b00000, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x2>;
+defm SMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"smlall", 0b01000, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smla_za32_single_vg4x4>;
+defm SMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlall", 0b0000, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x2>;
+defm SMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlall", 0b0000, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smla_za32_vg4x4>;
+
+defm USMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"usmlall", 0b001, int_aarch64_sme_usmla_za32_lane_vg4x1>;
+defm USMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"usmlall", 0b100, int_aarch64_sme_usmla_za32_lane_vg4x2>;
+defm USMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"usmlall", 0b100, int_aarch64_sme_usmla_za32_lane_vg4x4>;
+defm USMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"usmlall", 0b0001, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x1>;
+defm USMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"usmlall", 0b00001, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x2>;
+defm USMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"usmlall", 0b01001, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_usmla_za32_single_vg4x4>;
+defm USMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"usmlall", 0b0001, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x2>;
+defm USMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"usmlall", 0b0001, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_usmla_za32_vg4x4>;
+
+defm SMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlsll", 0b010, int_aarch64_sme_smls_za32_lane_vg4x1>;
+defm SMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlsll", 0b001, int_aarch64_sme_smls_za32_lane_vg4x2>;
+defm SMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlsll", 0b001, int_aarch64_sme_smls_za32_lane_vg4x4>;
+defm SMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"smlsll", 0b0010, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x1>;
+defm SMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"smlsll", 0b00010, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x2>;
+defm SMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"smlsll", 0b01010, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_smls_za32_single_vg4x4>;
+defm SMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"smlsll", 0b0010, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x2>;
+defm SMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"smlsll", 0b0010, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_smls_za32_vg4x4>;
+
+defm UMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlall", 0b100, int_aarch64_sme_umla_za32_lane_vg4x1>;
+defm UMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlall", 0b010, int_aarch64_sme_umla_za32_lane_vg4x2>;
+defm UMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlall", 0b010, int_aarch64_sme_umla_za32_lane_vg4x4>;
+defm UMLALL_MZZ_BtoS : sme2_mla_ll_array_single<"umlall", 0b0100, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x1>;
+defm UMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"umlall", 0b00100, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x2>;
+defm UMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"umlall", 0b01100, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umla_za32_single_vg4x4>;
+defm UMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlall", 0b0100, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x2>;
+defm UMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlall", 0b0100, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umla_za32_vg4x4>;
+
+defm SUMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"sumlall", 0b101, int_aarch64_sme_sumla_za32_lane_vg4x1>;
+defm SUMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"sumlall", 0b110, int_aarch64_sme_sumla_za32_lane_vg4x2>;
+defm SUMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"sumlall", 0b110, int_aarch64_sme_sumla_za32_lane_vg4x4>;
+defm SUMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"sumlall", 0b00101, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sumla_za32_single_vg4x2>;
+defm SUMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"sumlall", 0b01101, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_sumla_za32_single_vg4x4>;
+
+defm UMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlsll", 0b110, int_aarch64_sme_umls_za32_lane_vg4x1>;
+defm UMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlsll", 0b011, int_aarch64_sme_umls_za32_lane_vg4x2>;
+defm UMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlsll", 0b011, int_aarch64_sme_umls_za32_lane_vg4x4>;
+defm UMLSLL_MZZ_BtoS : sme2_mla_ll_array_single<"umlsll", 0b0110, MatrixOp32, ZPR8, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x1>;
+defm UMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single<"umlsll", 0b00110, MatrixOp32, ZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x2>;
+defm UMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single<"umlsll", 0b01110, MatrixOp32, ZZZZ_b, ZPR4b8, nxv16i8, int_aarch64_sme_umls_za32_single_vg4x4>;
+defm UMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi<"umlsll", 0b0110, MatrixOp32, ZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x2>;
+defm UMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi<"umlsll", 0b0110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, int_aarch64_sme_umls_za32_vg4x4>;
+
+defm BMOPA_MPPZZ_S : sme2_int_bmopx_tile<"bmopa", 0b100, int_aarch64_sme_bmopa_za32>;
+defm BMOPS_MPPZZ_S : sme2_int_bmopx_tile<"bmops", 0b101, int_aarch64_sme_bmops_za32>;
+
+defm SMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"smopa", 0b000, int_aarch64_sme_smopa_za32>;
+defm SMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"smops", 0b001, int_aarch64_sme_smops_za32>;
+
+defm UMOPA_MPPZZ_HtoS : sme2_int_mopx_tile<"umopa", 0b100, int_aarch64_sme_umopa_za32>;
+defm UMOPS_MPPZZ_HtoS : sme2_int_mopx_tile<"umops", 0b101, int_aarch64_sme_umops_za32>;
def ZERO_T : sme2_zero_zt<"zero", 0b0001>;
@@ -609,99 +609,99 @@ defm FRINTN_4Z4Z: sme2_frint_vector_vg4_multi<"frintn", 0b1000000>;
defm FRINTP_2Z2Z: sme2_frint_vector_vg2_multi<"frintp", 0b10010>;
defm FRINTP_4Z4Z: sme2_frint_vector_vg4_multi<"frintp", 0b1001000>;
-defm MOVA_MXI2Z : sme2_mova_vec_to_tile_vg2_multi<"mova">;
-defm MOVA_MXI4Z : sme2_mova_vec_to_tile_vg4_multi<"mova">;
+defm MOVA_MXI2Z : sme2_mova_vec_to_tile_vg2_multi<"mova", int_aarch64_sme_write_hor_vg2, int_aarch64_sme_write_ver_vg2>;
+defm MOVA_MXI4Z : sme2_mova_vec_to_tile_vg4_multi<"mova", int_aarch64_sme_write_hor_vg4, int_aarch64_sme_write_ver_vg4>;
defm MOVA_2ZMXI : sme2_mova_tile_to_vec_vg2_multi<"mova">;
defm MOVA_4ZMXI : sme2_mova_tile_to_vec_vg4_multi<"mova">;
-defm MOVA_VG2_MXI2Z : sme2_mova_vec_to_array_vg2_multi<"mova">;
-defm MOVA_VG4_MXI4Z : sme2_mova_vec_to_array_vg4_multi<"mova">;
+defm MOVA_VG2_MXI2Z : sme2_mova_vec_to_array_vg2_multi<"mova", int_aarch64_sme_write_vg1x2>;
+defm MOVA_VG4_MXI4Z : sme2_mova_vec_to_array_vg4_multi<"mova", int_aarch64_sme_write_vg1x4>;
defm MOVA_VG2_2ZMXI : sme2_mova_array_to_vec_vg2_multi<0b000, "mova">;
defm MOVA_VG4_4ZMXI : sme2_mova_array_to_vec_vg4_multi<0b1000, "mova">;
-defm SQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshr", 0b0, 0b0>;
-defm SQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshr", 0b000>;
-
-defm UQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"uqrshr", 0b0, 0b1>;
-defm UQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshr", 0b001>;
-
-defm SQRSHRU_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshru", 0b1, 0b0>;
-defm SQRSHRU_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshru", 0b010>;
-
-defm SQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrn", 0b100>;
-defm UQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshrn", 0b101>;
-defm SQRSHRUN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrun", 0b110>;
-
-defm SEL_VG2_2ZP2Z2Z: sme2_sel_vector_vg2<"sel">;
-defm SEL_VG4_4ZP4Z4Z: sme2_sel_vector_vg4<"sel">;
-
-def LD1B_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b00, 0b0, ZZ_b_strided, GPR64shifted8, "ld1b">;
-def LD1B_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b00, 0b0, ZZZZ_b_strided, GPR64shifted8, "ld1b">;
-defm LD1B_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b00, 0b0, ZZ_b_strided, simm4s2, "ld1b">;
-defm LD1B_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b00, 0b0, ZZZZ_b_strided, simm4s4, "ld1b">;
-def LD1H_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b01, 0b0, ZZ_h_strided, GPR64shifted16, "ld1h">;
-def LD1H_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b01, 0b0, ZZZZ_h_strided, GPR64shifted16, "ld1h">;
-defm LD1H_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b01, 0b0, ZZ_h_strided, simm4s2, "ld1h">;
-defm LD1H_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b01, 0b0, ZZZZ_h_strided, simm4s4, "ld1h">;
-def LD1W_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b10, 0b0, ZZ_s_strided, GPR64shifted32, "ld1w">;
-def LD1W_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b10, 0b0, ZZZZ_s_strided, GPR64shifted32, "ld1w">;
-defm LD1W_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b10, 0b0, ZZ_s_strided, simm4s2, "ld1w">;
-defm LD1W_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b10, 0b0, ZZZZ_s_strided, simm4s4, "ld1w">;
-def LD1D_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b11, 0b0, ZZ_d_strided, GPR64shifted64, "ld1d">;
-def LD1D_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b11, 0b0, ZZZZ_d_strided, GPR64shifted64, "ld1d">;
-defm LD1D_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b11, 0b0, ZZ_d_strided, simm4s2, "ld1d">;
-defm LD1D_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b11, 0b0, ZZZZ_d_strided, simm4s4, "ld1d">;
-
-def LDNT1B_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b00, 0b1, ZZ_b_strided, GPR64shifted8, "ldnt1b">;
-def LDNT1B_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b00, 0b1, ZZZZ_b_strided, GPR64shifted8, "ldnt1b">;
-defm LDNT1B_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b00, 0b1, ZZ_b_strided, simm4s2, "ldnt1b">;
-defm LDNT1B_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b00, 0b1, ZZZZ_b_strided, simm4s4, "ldnt1b">;
-def LDNT1H_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b01, 0b1, ZZ_h_strided, GPR64shifted16, "ldnt1h">;
-def LDNT1H_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b01, 0b1, ZZZZ_h_strided, GPR64shifted16, "ldnt1h">;
-defm LDNT1H_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b01, 0b1, ZZ_h_strided, simm4s2, "ldnt1h">;
-defm LDNT1H_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b01, 0b1, ZZZZ_h_strided, simm4s4, "ldnt1h">;
-def LDNT1W_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b10, 0b1, ZZ_s_strided, GPR64shifted32, "ldnt1w">;
-def LDNT1W_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b10, 0b1, ZZZZ_s_strided, GPR64shifted32, "ldnt1w">;
-defm LDNT1W_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b10, 0b1, ZZ_s_strided, simm4s2, "ldnt1w">;
-defm LDNT1W_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b10, 0b1, ZZZZ_s_strided, simm4s4, "ldnt1w">;
-def LDNT1D_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b11, 0b1, ZZ_d_strided, GPR64shifted64, "ldnt1d">;
-def LDNT1D_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b11, 0b1, ZZZZ_d_strided, GPR64shifted64, "ldnt1d">;
-defm LDNT1D_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b11, 0b1, ZZ_d_strided, simm4s2, "ldnt1d">;
-defm LDNT1D_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b11, 0b1, ZZZZ_d_strided, simm4s4, "ldnt1d">;
-
-def ST1B_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b00, 0b0, ZZ_b_strided, GPR64shifted8, "st1b">;
-def ST1B_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b00, 0b0, ZZZZ_b_strided, GPR64shifted8, "st1b">;
-defm ST1B_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b00, 0b0, ZZ_b_strided, simm4s2, "st1b">;
-defm ST1B_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b00, 0b0, ZZZZ_b_strided, simm4s4, "st1b">;
-def ST1H_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b01, 0b0, ZZ_h_strided, GPR64shifted16, "st1h">;
-def ST1H_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b01, 0b0, ZZZZ_h_strided, GPR64shifted16, "st1h">;
-defm ST1H_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b01, 0b0, ZZ_h_strided, simm4s2, "st1h">;
-defm ST1H_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b01, 0b0, ZZZZ_h_strided, simm4s4, "st1h">;
-def ST1W_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b10, 0b0, ZZ_s_strided, GPR64shifted32, "st1w">;
-def ST1W_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b10, 0b0, ZZZZ_s_strided, GPR64shifted32, "st1w">;
-defm ST1W_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b10, 0b0, ZZ_s_strided, simm4s2, "st1w">;
-defm ST1W_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b10, 0b0, ZZZZ_s_strided, simm4s4, "st1w">;
-def ST1D_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b11, 0b0, ZZ_d_strided, GPR64shifted64, "st1d">;
-def ST1D_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b11, 0b0, ZZZZ_d_strided, GPR64shifted64, "st1d">;
-defm ST1D_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b11, 0b0, ZZ_d_strided, simm4s2, "st1d">;
-defm ST1D_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b11, 0b0, ZZZZ_d_strided, simm4s4, "st1d">;
-
-def STNT1B_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b00, 0b1, ZZ_b_strided, GPR64shifted8, "stnt1b">;
-def STNT1B_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b00, 0b1, ZZZZ_b_strided, GPR64shifted8, "stnt1b">;
-defm STNT1B_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b00, 0b1, ZZ_b_strided, simm4s2, "stnt1b">;
-defm STNT1B_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b00, 0b1, ZZZZ_b_strided, simm4s4, "stnt1b">;
-def STNT1H_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b01, 0b1, ZZ_h_strided, GPR64shifted16, "stnt1h">;
-def STNT1H_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b01, 0b1, ZZZZ_h_strided, GPR64shifted16, "stnt1h">;
-defm STNT1H_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b01, 0b1, ZZ_h_strided, simm4s2, "stnt1h">;
-defm STNT1H_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b01, 0b1, ZZZZ_h_strided, simm4s4, "stnt1h">;
-def STNT1W_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b10, 0b1, ZZ_s_strided, GPR64shifted32, "stnt1w">;
-def STNT1W_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b10, 0b1, ZZZZ_s_strided, GPR64shifted32, "stnt1w">;
-defm STNT1W_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b10, 0b1, ZZ_s_strided, simm4s2, "stnt1w">;
-defm STNT1W_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b10, 0b1, ZZZZ_s_strided, simm4s4, "stnt1w">;
-def STNT1D_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b11, 0b1, ZZ_d_strided, GPR64shifted64, "stnt1d">;
-def STNT1D_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b11, 0b1, ZZZZ_d_strided, GPR64shifted64, "stnt1d">;
-defm STNT1D_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b11, 0b1, ZZ_d_strided, simm4s2, "stnt1d">;
-defm STNT1D_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b11, 0b1, ZZZZ_d_strided, simm4s4, "stnt1d">;
+defm SQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshr", 0b0, 0b0, int_aarch64_sve_sqrshr_x2>;
+defm SQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshr", 0b000, int_aarch64_sve_sqrshr_x4>;
+
+defm UQRSHR_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"uqrshr", 0b0, 0b1, int_aarch64_sve_uqrshr_x2>;
+defm UQRSHR_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshr", 0b001, int_aarch64_sve_uqrshr_x4>;
+
+defm SQRSHRU_VG2_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshru", 0b1, 0b0, int_aarch64_sve_sqrshru_x2>;
+defm SQRSHRU_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshru", 0b010, int_aarch64_sve_sqrshru_x4>;
+
+defm SQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrn", 0b100, int_aarch64_sve_sqrshrn_x4>;
+defm UQRSHRN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshrn", 0b101, int_aarch64_sve_uqrshrn_x4>;
+defm SQRSHRUN_VG4_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrun", 0b110, int_aarch64_sve_sqrshrun_x4>;
+
+defm SEL_VG2_2ZC2Z2Z: sme2_sel_vector_vg2<"sel">;
+defm SEL_VG4_4ZC4Z4Z: sme2_sel_vector_vg4<"sel">;
+
+def LD1B_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b00, 0b0, ZZ_b_strided, GPR64shifted8, "ld1b">;
+def LD1B_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b00, 0b0, ZZZZ_b_strided, GPR64shifted8, "ld1b">;
+defm LD1B_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b00, 0b0, ZZ_b_strided, simm4s2, "ld1b">;
+defm LD1B_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b00, 0b0, ZZZZ_b_strided, simm4s4, "ld1b">;
+def LD1H_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b01, 0b0, ZZ_h_strided, GPR64shifted16, "ld1h">;
+def LD1H_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b01, 0b0, ZZZZ_h_strided, GPR64shifted16, "ld1h">;
+defm LD1H_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b01, 0b0, ZZ_h_strided, simm4s2, "ld1h">;
+defm LD1H_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b01, 0b0, ZZZZ_h_strided, simm4s4, "ld1h">;
+def LD1W_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b10, 0b0, ZZ_s_strided, GPR64shifted32, "ld1w">;
+def LD1W_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b10, 0b0, ZZZZ_s_strided, GPR64shifted32, "ld1w">;
+defm LD1W_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b10, 0b0, ZZ_s_strided, simm4s2, "ld1w">;
+defm LD1W_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b10, 0b0, ZZZZ_s_strided, simm4s4, "ld1w">;
+def LD1D_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b11, 0b0, ZZ_d_strided, GPR64shifted64, "ld1d">;
+def LD1D_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b11, 0b0, ZZZZ_d_strided, GPR64shifted64, "ld1d">;
+defm LD1D_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b11, 0b0, ZZ_d_strided, simm4s2, "ld1d">;
+defm LD1D_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b11, 0b0, ZZZZ_d_strided, simm4s4, "ld1d">;
+
+def LDNT1B_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b00, 0b1, ZZ_b_strided, GPR64shifted8, "ldnt1b">;
+def LDNT1B_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b00, 0b1, ZZZZ_b_strided, GPR64shifted8, "ldnt1b">;
+defm LDNT1B_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b00, 0b1, ZZ_b_strided, simm4s2, "ldnt1b">;
+defm LDNT1B_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b00, 0b1, ZZZZ_b_strided, simm4s4, "ldnt1b">;
+def LDNT1H_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b01, 0b1, ZZ_h_strided, GPR64shifted16, "ldnt1h">;
+def LDNT1H_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b01, 0b1, ZZZZ_h_strided, GPR64shifted16, "ldnt1h">;
+defm LDNT1H_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b01, 0b1, ZZ_h_strided, simm4s2, "ldnt1h">;
+defm LDNT1H_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b01, 0b1, ZZZZ_h_strided, simm4s4, "ldnt1h">;
+def LDNT1W_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b10, 0b1, ZZ_s_strided, GPR64shifted32, "ldnt1w">;
+def LDNT1W_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b10, 0b1, ZZZZ_s_strided, GPR64shifted32, "ldnt1w">;
+defm LDNT1W_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b10, 0b1, ZZ_s_strided, simm4s2, "ldnt1w">;
+defm LDNT1W_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b10, 0b1, ZZZZ_s_strided, simm4s4, "ldnt1w">;
+def LDNT1D_2Z_STRIDED : sme2_ld_vector_vg2_multi_scalar_scalar<0b11, 0b1, ZZ_d_strided, GPR64shifted64, "ldnt1d">;
+def LDNT1D_4Z_STRIDED : sme2_ld_vector_vg4_multi_scalar_scalar<0b11, 0b1, ZZZZ_d_strided, GPR64shifted64, "ldnt1d">;
+defm LDNT1D_2Z_STRIDED_IMM : sme2_ld_vector_vg2_multi_scalar_immediate<0b11, 0b1, ZZ_d_strided, simm4s2, "ldnt1d">;
+defm LDNT1D_4Z_STRIDED_IMM : sme2_ld_vector_vg4_multi_scalar_immediate<0b11, 0b1, ZZZZ_d_strided, simm4s4, "ldnt1d">;
+
+def ST1B_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b00, 0b0, ZZ_b_strided, GPR64shifted8, "st1b">;
+def ST1B_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b00, 0b0, ZZZZ_b_strided, GPR64shifted8, "st1b">;
+defm ST1B_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b00, 0b0, ZZ_b_strided, simm4s2, "st1b">;
+defm ST1B_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b00, 0b0, ZZZZ_b_strided, simm4s4, "st1b">;
+def ST1H_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b01, 0b0, ZZ_h_strided, GPR64shifted16, "st1h">;
+def ST1H_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b01, 0b0, ZZZZ_h_strided, GPR64shifted16, "st1h">;
+defm ST1H_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b01, 0b0, ZZ_h_strided, simm4s2, "st1h">;
+defm ST1H_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b01, 0b0, ZZZZ_h_strided, simm4s4, "st1h">;
+def ST1W_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b10, 0b0, ZZ_s_strided, GPR64shifted32, "st1w">;
+def ST1W_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b10, 0b0, ZZZZ_s_strided, GPR64shifted32, "st1w">;
+defm ST1W_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b10, 0b0, ZZ_s_strided, simm4s2, "st1w">;
+defm ST1W_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b10, 0b0, ZZZZ_s_strided, simm4s4, "st1w">;
+def ST1D_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b11, 0b0, ZZ_d_strided, GPR64shifted64, "st1d">;
+def ST1D_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b11, 0b0, ZZZZ_d_strided, GPR64shifted64, "st1d">;
+defm ST1D_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b11, 0b0, ZZ_d_strided, simm4s2, "st1d">;
+defm ST1D_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b11, 0b0, ZZZZ_d_strided, simm4s4, "st1d">;
+
+def STNT1B_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b00, 0b1, ZZ_b_strided, GPR64shifted8, "stnt1b">;
+def STNT1B_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b00, 0b1, ZZZZ_b_strided, GPR64shifted8, "stnt1b">;
+defm STNT1B_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b00, 0b1, ZZ_b_strided, simm4s2, "stnt1b">;
+defm STNT1B_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b00, 0b1, ZZZZ_b_strided, simm4s4, "stnt1b">;
+def STNT1H_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b01, 0b1, ZZ_h_strided, GPR64shifted16, "stnt1h">;
+def STNT1H_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b01, 0b1, ZZZZ_h_strided, GPR64shifted16, "stnt1h">;
+defm STNT1H_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b01, 0b1, ZZ_h_strided, simm4s2, "stnt1h">;
+defm STNT1H_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b01, 0b1, ZZZZ_h_strided, simm4s4, "stnt1h">;
+def STNT1W_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b10, 0b1, ZZ_s_strided, GPR64shifted32, "stnt1w">;
+def STNT1W_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b10, 0b1, ZZZZ_s_strided, GPR64shifted32, "stnt1w">;
+defm STNT1W_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b10, 0b1, ZZ_s_strided, simm4s2, "stnt1w">;
+defm STNT1W_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b10, 0b1, ZZZZ_s_strided, simm4s4, "stnt1w">;
+def STNT1D_2Z_STRIDED : sme2_st_vector_vg2_multi_scalar_scalar<0b11, 0b1, ZZ_d_strided, GPR64shifted64, "stnt1d">;
+def STNT1D_4Z_STRIDED : sme2_st_vector_vg4_multi_scalar_scalar<0b11, 0b1, ZZZZ_d_strided, GPR64shifted64, "stnt1d">;
+defm STNT1D_2Z_STRIDED_IMM : sme2_st_vector_vg2_multi_scalar_immediate<0b11, 0b1, ZZ_d_strided, simm4s2, "stnt1d">;
+defm STNT1D_4Z_STRIDED_IMM : sme2_st_vector_vg4_multi_scalar_immediate<0b11, 0b1, ZZZZ_d_strided, simm4s4, "stnt1d">;
}
let Predicates = [HasSME2, HasSMEI16I64] in {
@@ -715,65 +715,65 @@ defm SUB_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"sub", 0b1111011, M
defm SUB_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"sub", 0b111011, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x2>;
defm SUB_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"sub", 0b111011, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_write_za_vg1x4>;
-defm ADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"add", 0b1010, MatrixOp64, ZZ_d_mul_r>;
-defm ADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"add", 0b1010, MatrixOp64, ZZZZ_d_mul_r>;
+defm ADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"add", 0b1010, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_za64_vg1x2>;
+defm ADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"add", 0b1010, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_add_za64_vg1x4>;
-defm SUB_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"sub", 0b1011, MatrixOp64, ZZ_d_mul_r>;
-defm SUB_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"sub", 0b1011, MatrixOp64, ZZZZ_d_mul_r>;
+defm SUB_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"sub", 0b1011, MatrixOp64, ZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_za64_vg1x2>;
+defm SUB_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"sub", 0b1011, MatrixOp64, ZZZZ_d_mul_r, nxv2i64, int_aarch64_sme_sub_za64_vg1x4>;
-defm SDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"sdot", 0b01, ZZ_h_mul_r, ZPR4b16, nxv8i16, null_frag>;
-defm SDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"sdot", 0b001, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, null_frag>;
-defm SDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg24_single<"sdot", 0b1010100, MatrixOp64, ZZ_h, ZPR4b16>;
-defm SDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg24_single<"sdot", 0b1110100, MatrixOp64, ZZZZ_h, ZPR4b16>;
-defm SDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b110100, MatrixOp64, ZZ_h_mul_r, nxv8i16, null_frag>;
-defm SDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b110100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, null_frag>;
+defm SDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"sdot", 0b01, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za64_vg1x2>;
+defm SDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"sdot", 0b001, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_lane_za64_vg1x4>;
+defm SDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg2_single<"sdot", 0b1010100, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za64_vg1x2>;
+defm SDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg4_single<"sdot", 0b1110100, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_sdot_single_za64_vg1x4>;
+defm SDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"sdot", 0b110100, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x2>;
+defm SDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"sdot", 0b110100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_sdot_za64_vg1x4>;
defm SVDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"svdot", 0b101, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_svdot_lane_za64_vg1x4>;
-defm UDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"udot", 0b11, ZZ_h_mul_r, ZPR4b16, nxv8i16, null_frag>;
-defm UDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"udot", 0b011, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, null_frag>;
-defm UDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg24_single<"udot", 0b1010110, MatrixOp64, ZZ_h, ZPR4b16>;
-defm UDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg24_single<"udot", 0b1110110, MatrixOp64, ZZZZ_h, ZPR4b16>;
-defm UDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b110110, MatrixOp64, ZZ_h_mul_r, nxv8i16, null_frag>;
-defm UDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b110110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, null_frag>;
+defm UDOT_VG2_M2ZZI_HtoD : sme2_multi_vec_array_vg2_index_64b<"udot", 0b11, ZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za64_vg1x2>;
+defm UDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"udot", 0b011, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_udot_lane_za64_vg1x4>;
+defm UDOT_VG2_M2ZZ_HtoD : sme2_dot_mla_add_sub_array_vg2_single<"udot", 0b1010110, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za64_vg1x2>;
+defm UDOT_VG4_M4ZZ_HtoD : sme2_dot_mla_add_sub_array_vg4_single<"udot", 0b1110110, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_udot_single_za64_vg1x4>;
+defm UDOT_VG2_M2Z2Z_HtoD : sme2_dot_mla_add_sub_array_vg2_multi<"udot", 0b110110, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x2>;
+defm UDOT_VG4_M4Z4Z_HtoD : sme2_dot_mla_add_sub_array_vg4_multi<"udot", 0b110110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_udot_za64_vg1x4>;
defm UVDOT_VG4_M4ZZI_HtoD : sme2_multi_vec_array_vg4_index_64b<"uvdot", 0b111, ZZZZ_h_mul_r, ZPR4b16, nxv8i16, int_aarch64_sme_uvdot_lane_za64_vg1x4>;
-def SMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlall", 0b00>;
-defm SMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlall", 0b00>;
-defm SMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlall", 0b00>;
-def SMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"smlall", 0b1000, MatrixOp64, ZPR16, ZPR4b16>;
-defm SMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg24_single<"smlall", 0b10000, MatrixOp64, ZZ_h, ZPR4b16>;
-defm SMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg24_single<"smlall", 0b11000, MatrixOp64, ZZZZ_h, ZPR4b16>;
-defm SMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlall", 0b1000, MatrixOp64, ZZ_h_mul_r>;
-defm SMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlall", 0b1000, MatrixOp64, ZZZZ_h_mul_r>;
-
-def SMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlsll", 0b01>;
-defm SMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlsll", 0b01>;
-defm SMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlsll", 0b01>;
-def SMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"smlsll", 0b1010, MatrixOp64, ZPR16, ZPR4b16>;
-defm SMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg24_single<"smlsll", 0b10010, MatrixOp64, ZZ_h, ZPR4b16>;
-defm SMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg24_single<"smlsll", 0b11010, MatrixOp64, ZZZZ_h, ZPR4b16>;
-defm SMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlsll", 0b1010, MatrixOp64, ZZ_h_mul_r>;
-defm SMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlsll", 0b1010, MatrixOp64, ZZZZ_h_mul_r>;
-
-def UMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlall", 0b10>;
-defm UMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlall", 0b10>;
-defm UMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlall", 0b10>;
-def UMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"umlall", 0b1100, MatrixOp64, ZPR16, ZPR4b16>;
-defm UMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg24_single<"umlall", 0b10100, MatrixOp64, ZZ_h, ZPR4b16>;
-defm UMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg24_single<"umlall", 0b11100, MatrixOp64, ZZZZ_h, ZPR4b16>;
-defm UMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlall", 0b1100, MatrixOp64, ZZ_h_mul_r>;
-defm UMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlall", 0b1100, MatrixOp64, ZZZZ_h_mul_r>;
-
-def UMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlsll", 0b11>;
-defm UMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlsll", 0b11>;
-defm UMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlsll", 0b11>;
-def UMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"umlsll", 0b1110, MatrixOp64, ZPR16, ZPR4b16>;
-defm UMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg24_single<"umlsll", 0b10110, MatrixOp64, ZZ_h, ZPR4b16>;
-defm UMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg24_single<"umlsll", 0b11110, MatrixOp64, ZZZZ_h, ZPR4b16>;
-defm UMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlsll", 0b1110, MatrixOp64, ZZ_h_mul_r>;
-defm UMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlsll", 0b1110, MatrixOp64, ZZZZ_h_mul_r>;
+defm SMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x1>;
+defm SMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x2>;
+defm SMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlall", 0b00, int_aarch64_sme_smla_za64_lane_vg4x4>;
+defm SMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"smlall", 0b1000, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x1>;
+defm SMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"smlall", 0b10000, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x2>;
+defm SMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"smlall", 0b11000, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smla_za64_single_vg4x4>;
+defm SMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlall", 0b1000, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x2>;
+defm SMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlall", 0b1000, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smla_za64_vg4x4>;
+
+defm SMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x1>;
+defm SMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x2>;
+defm SMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlsll", 0b01, int_aarch64_sme_smls_za64_lane_vg4x4>;
+defm SMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"smlsll", 0b1010, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x1>;
+defm SMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"smlsll", 0b10010, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x2>;
+defm SMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"smlsll", 0b11010, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_smls_za64_single_vg4x4>;
+defm SMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"smlsll", 0b1010, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x2>;
+defm SMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"smlsll", 0b1010, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_smls_za64_vg4x4>;
+
+defm UMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x1>;
+defm UMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x2>;
+defm UMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlall", 0b10, int_aarch64_sme_umla_za64_lane_vg4x4>;
+defm UMLALL_MZZ_HtoD : sme2_mla_ll_array_single<"umlall", 0b1100, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x1>;
+defm UMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"umlall", 0b10100, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x2>;
+defm UMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"umlall", 0b11100, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umla_za64_single_vg4x4>;
+defm UMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlall", 0b1100, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x2>;
+defm UMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlall", 0b1100, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umla_za64_vg4x4>;
+
+defm UMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x1>;
+defm UMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x2>;
+defm UMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlsll", 0b11, int_aarch64_sme_umls_za64_lane_vg4x4>;
+defm UMLSLL_MZZ_HtoD : sme2_mla_ll_array_single<"umlsll", 0b1110, MatrixOp64, ZPR16, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x1>;
+defm UMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single<"umlsll", 0b10110, MatrixOp64, ZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x2>;
+defm UMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single<"umlsll", 0b11110, MatrixOp64, ZZZZ_h, ZPR4b16, nxv8i16, int_aarch64_sme_umls_za64_single_vg4x4>;
+defm UMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi<"umlsll", 0b1110, MatrixOp64, ZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x2>;
+defm UMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi<"umlsll", 0b1110, MatrixOp64, ZZZZ_h_mul_r, nxv8i16, int_aarch64_sme_umls_za64_vg4x4>;
}
let Predicates = [HasSME2, HasSMEF64F64] in {
@@ -791,11 +791,11 @@ defm FMLS_VG4_M4ZZ_D : sme2_dot_mla_add_sub_array_vg4_single<"fmls", 0b1111001,
defm FMLS_VG2_M2Z2Z_D : sme2_dot_mla_add_sub_array_vg2_multi<"fmls", 0b111001, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x2>;
defm FMLS_VG4_M4Z4Z_D : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b111001, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_fmls_vg1x4>;
-defm FADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"fadd", 0b1000, MatrixOp64, ZZ_d_mul_r>;
-defm FADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"fadd", 0b1000, MatrixOp64, ZZZZ_d_mul_r>;
+defm FADD_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"fadd", 0b1000, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_add_za64_vg1x2>;
+defm FADD_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"fadd", 0b1000, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_add_za64_vg1x4>;
-defm FSUB_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"fsub", 0b1001, MatrixOp64, ZZ_d_mul_r>;
-defm FSUB_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"fsub", 0b1001, MatrixOp64, ZZZZ_d_mul_r>;
+defm FSUB_VG2_M2Z_D : sme2_multivec_accum_add_sub_vg2<"fsub", 0b1001, MatrixOp64, ZZ_d_mul_r, nxv2f64, int_aarch64_sme_sub_za64_vg1x2>;
+defm FSUB_VG4_M4Z_D : sme2_multivec_accum_add_sub_vg4<"fsub", 0b1001, MatrixOp64, ZZZZ_d_mul_r, nxv2f64, int_aarch64_sme_sub_za64_vg1x4>;
}
let Predicates = [HasSME2p1] in {
@@ -815,10 +815,10 @@ defm LUTI4_S_4ZTZI : sme2p1_luti4_vector_vg4_index<"luti4">;
}
let Predicates = [HasSME2p1, HasSMEF16F16] in {
-defm FADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fadd", 0b0100, MatrixOp16, ZZ_h_mul_r>;
-defm FADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0100, MatrixOp16, ZZZZ_h_mul_r>;
-defm FSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0101, MatrixOp16, ZZ_h_mul_r>;
-defm FSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0101, MatrixOp16, ZZZZ_h_mul_r>;
+defm FADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fadd", 0b0100, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>;
+defm FADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fadd", 0b0100, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>;
+defm FSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"fsub", 0b0101, MatrixOp16, ZZ_h_mul_r, nxv8f16, null_frag>;
+defm FSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"fsub", 0b0101, MatrixOp16, ZZZZ_h_mul_r, nxv8f16, null_frag>;
defm FMLA_VG2_M2ZZI_H : sme2p1_multi_vec_array_vg2_index_16b<"fmla", 0b00>;
defm FMLA_VG4_M4ZZI_H : sme2p1_multi_vec_array_vg4_index_16b<"fmla", 0b00>;
@@ -842,10 +842,10 @@ defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1>;
}
let Predicates = [HasSME2p1, HasB16B16] in {
-defm BFADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfadd", 0b1100, MatrixOp16, ZZ_h_mul_r>;
-defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp16, ZZZZ_h_mul_r>;
-defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r>;
-defm BFSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfsub", 0b1101, MatrixOp16, ZZZZ_h_mul_r>;
+defm BFADD_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfadd", 0b1100, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>;
+defm BFADD_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfadd", 0b1100, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>;
+defm BFSUB_VG2_M2Z_H : sme2_multivec_accum_add_sub_vg2<"bfsub", 0b1101, MatrixOp16, ZZ_h_mul_r, nxv8bf16, null_frag>;
+defm BFSUB_VG4_M4Z_H : sme2_multivec_accum_add_sub_vg4<"bfsub", 0b1101, MatrixOp16, ZZZZ_h_mul_r, nxv8bf16, null_frag>;
defm BFMLA_VG2_M2ZZI : sme2p1_multi_vec_array_vg2_index_16b<"bfmla", 0b10>;
defm BFMLA_VG4_M4ZZI : sme2p1_multi_vec_array_vg4_index_16b<"bfmla", 0b10>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 6a42c4ff31dc..ad404e8dab2a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -171,7 +171,8 @@ def SDT_AArch64Arith : SDTypeProfile<1, 3, [
def SDT_AArch64FMA : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>, SDTCisVec<4>,
- SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>, SDTCisSameAs<2,3>, SDTCisSameAs<3,4>
+ SDTCVecEltisVT<1,i1>, SDTCisSameNumEltsAs<0,1>,
+ SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, SDTCisSameAs<0,4>
]>;
// Predicated operations with the result of inactive lanes being unspecified.
@@ -207,6 +208,10 @@ def AArch64fadd_p_nsz : PatFrag<(ops node:$op1, node:$op2, node:$op3),
(AArch64fadd_p node:$op1, node:$op2, node:$op3), [{
return N->getFlags().hasNoSignedZeros();
}]>;
+def AArch64fsub_p_nsz : PatFrag<(ops node:$op1, node:$op2, node:$op3),
+ (AArch64fsub_p node:$op1, node:$op2, node:$op3), [{
+ return N->getFlags().hasNoSignedZeros();
+}]>;
def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>,
@@ -244,22 +249,29 @@ def AArch64revh_mt : SDNode<"AArch64ISD::REVH_MERGE_PASSTHRU", SDT_AArch64Arit
def AArch64revw_mt : SDNode<"AArch64ISD::REVW_MERGE_PASSTHRU", SDT_AArch64Arith>;
def AArch64revd_mt : SDNode<"AArch64ISD::REVD_MERGE_PASSTHRU", SDT_AArch64Arith>;
+def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt),
+ (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
+ return N->getFlags().hasNoSignedZeros();
+}]>;
+
// These are like the above but we don't yet have need for ISD nodes. They allow
// a single pattern to match intrinsic and ISD operand layouts.
def AArch64cls_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cls node:$pt, node:$pg, node:$op)]>;
def AArch64cnot_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_cnot node:$pt, node:$pg, node:$op)]>;
def AArch64not_mt : PatFrags<(ops node:$pg, node:$op, node:$pt), [(int_aarch64_sve_not node:$pt, node:$pg, node:$op)]>;
-def AArch64fmul_m1 : EitherVSelectOrPassthruPatFrags<int_aarch64_sve_fmul, AArch64fmul_p>;
+def AArch64fmul_m1 : VSelectPredOrPassthruPatFrags<int_aarch64_sve_fmul, AArch64fmul_p>;
def AArch64fadd_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2), [
(int_aarch64_sve_fadd node:$pg, node:$op1, node:$op2),
(vselect node:$pg, (AArch64fadd_p (SVEAllActive), node:$op1, node:$op2), node:$op1),
- (AArch64fadd_p_nsz (SVEAllActive), node:$op1, (vselect node:$pg, node:$op2, (SVEDup0)))
+ (AArch64fadd_p_nsz (SVEAllActive), node:$op1, (vselect node:$pg, node:$op2, (SVEDup0))),
+ (AArch64fadd_p (SVEAllActive), node:$op1, (vselect node:$pg, node:$op2, (SVEDupNeg0)))
]>;
def AArch64fsub_m1 : PatFrags<(ops node:$pg, node:$op1, node:$op2), [
(int_aarch64_sve_fsub node:$pg, node:$op1, node:$op2),
(vselect node:$pg, (AArch64fsub_p (SVEAllActive), node:$op1, node:$op2), node:$op1),
- (AArch64fsub_p (SVEAllActive), node:$op1, (vselect node:$pg, node:$op2, (SVEDup0)))
+ (AArch64fsub_p (SVEAllActive), node:$op1, (vselect node:$pg, node:$op2, (SVEDup0))),
+ (AArch64fsub_p_nsz (SVEAllActive), node:$op1, (vselect node:$pg, node:$op2, (SVEDupNeg0)))
]>;
def AArch64shadd : PatFrags<(ops node:$pg, node:$op1, node:$op2),
@@ -285,11 +297,11 @@ def AArch64uaba : PatFrags<(ops node:$op1, node:$op2, node:$op3),
def AArch64usra : PatFrags<(ops node:$op1, node:$op2, node:$op3),
[(int_aarch64_sve_usra node:$op1, node:$op2, node:$op3),
- (add node:$op1, (AArch64lsr_p (SVEAllActive), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>;
+ (add node:$op1, (AArch64lsr_p (SVEAnyPredicate), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>;
def AArch64ssra : PatFrags<(ops node:$op1, node:$op2, node:$op3),
[(int_aarch64_sve_ssra node:$op1, node:$op2, node:$op3),
- (add node:$op1, (AArch64asr_p (SVEAllActive), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>;
+ (add node:$op1, (AArch64asr_p (SVEAnyPredicate), node:$op2, (SVEShiftSplatImmR (i32 node:$op3))))]>;
def SDT_AArch64FCVT : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
@@ -346,22 +358,32 @@ def AArch64fmul_p_oneuse : PatFrag<(ops node:$pred, node:$src1, node:$src2),
}]>;
-def AArch64fabd_p : PatFrag<(ops node:$pg, node:$op1, node:$op2),
- (AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)>;
+def AArch64fabd_p : PatFrags<(ops node:$pg, node:$op1, node:$op2),
+ [(int_aarch64_sve_fabd_u node:$pg, node:$op1, node:$op2),
+ (AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)]>;
+
+def AArch64fmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
+ [(AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za),
+ (vselect node:$pg, (AArch64fma_p (AArch64ptrue 31), node:$zn, node:$zm, node:$za), node:$za)]>;
+
+def AArch64fmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
+ [(int_aarch64_sve_fmls_u node:$pg, node:$za, node:$zn, node:$zm),
+ (AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, node:$za),
+ (AArch64fma_p node:$pg, node:$zm, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$za),
+ (vselect node:$pg, (AArch64fma_p (AArch64ptrue 31), (AArch64fneg_mt (AArch64ptrue 31), node:$zn, (undef)), node:$zm, node:$za), node:$za)]>;
-// FMAs with a negated multiplication operand can be commuted.
-def AArch64fmls_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
- [(AArch64fma_p node:$pred, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op2, node:$op3),
- (AArch64fma_p node:$pred, node:$op2, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op3)]>;
+def AArch64fnmla_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
+ [(int_aarch64_sve_fnmla_u node:$pg, node:$za, node:$zn, node:$zm),
+ (AArch64fma_p node:$pg, (AArch64fneg_mt node:$pg, node:$zn, (undef)), node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef))),
+ (AArch64fneg_mt_nsz node:$pg, (AArch64fma_p node:$pg, node:$zn, node:$zm, node:$za), (undef))]>;
+
+def AArch64fnmls_p : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
+ [(int_aarch64_sve_fnmls_u node:$pg, node:$za, node:$zn, node:$zm),
+ (AArch64fma_p node:$pg, node:$zn, node:$zm, (AArch64fneg_mt node:$pg, node:$za, (undef)))]>;
def AArch64fsubr_p : PatFrag<(ops node:$pg, node:$op1, node:$op2),
(AArch64fsub_p node:$pg, node:$op2, node:$op1)>;
-def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt),
- (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{
- return N->getFlags().hasNoSignedZeros();
-}]>;
-
def SDT_AArch64Arith_Unpred : SDTypeProfile<1, 2, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>,
SDTCisSameAs<0,1>, SDTCisSameAs<1,2>
@@ -377,44 +399,57 @@ def AArch64bic : PatFrags<(ops node:$op1, node:$op2),
def AArch64subr : PatFrag<(ops node:$op1, node:$op2),
(sub node:$op2, node:$op1)>;
-def AArch64add_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2),
- [(int_aarch64_sve_add node:$pred, node:$op1, node:$op2),
- (add node:$op1, (vselect node:$pred, node:$op2, (SVEDup0)))]>;
-def AArch64sub_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2),
- [(int_aarch64_sve_sub node:$pred, node:$op1, node:$op2),
- (sub node:$op1, (vselect node:$pred, node:$op2, (SVEDup0)))]>;
def AArch64mla_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
[(int_aarch64_sve_mla node:$pred, node:$op1, node:$op2, node:$op3),
- (add node:$op1, (AArch64mul_p_oneuse node:$pred, node:$op2, node:$op3)),
- // add(a, select(mask, mul(b, c), splat(0))) -> mla(a, mask, b, c)
- (add node:$op1, (vselect node:$pred, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0)))]>;
+ (vselect node:$pred, (add node:$op1, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)]>;
+def AArch64mla_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
+ [(int_aarch64_sve_mla_u node:$pred, node:$op1, node:$op2, node:$op3),
+ (add node:$op1, (AArch64mul_p_oneuse node:$pred, node:$op2, node:$op3))]>;
+def AArch64mad_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
+ [(int_aarch64_sve_mad node:$pred, node:$op1, node:$op2, node:$op3),
+ (vselect node:$pred, (add node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op1, node:$op2)), node:$op1),
+ (vselect node:$pred, (add node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op1)), node:$op1)]>;
def AArch64mls_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
[(int_aarch64_sve_mls node:$pred, node:$op1, node:$op2, node:$op3),
- (sub node:$op1, (AArch64mul_p_oneuse node:$pred, node:$op2, node:$op3)),
- // sub(a, select(mask, mul(b, c), splat(0))) -> mls(a, mask, b, c)
- (sub node:$op1, (vselect node:$pred, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0)))]>;
+ (vselect node:$pred, (sub node:$op1, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)]>;
+def AArch64mls_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
+ [(int_aarch64_sve_mls_u node:$pred, node:$op1, node:$op2, node:$op3),
+ (sub node:$op1, (AArch64mul_p_oneuse node:$pred, node:$op2, node:$op3))]>;
+def AArch64msb_m1 : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
+ [(int_aarch64_sve_msb node:$pred, node:$op1, node:$op2, node:$op3),
+ (vselect node:$pred, (sub node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op1, node:$op2)), node:$op1),
+ (vselect node:$pred, (sub node:$op3, (AArch64mul_p_oneuse (SVEAllActive), node:$op2, node:$op1)), node:$op1)]>;
def AArch64eor3 : PatFrags<(ops node:$op1, node:$op2, node:$op3),
[(int_aarch64_sve_eor3 node:$op1, node:$op2, node:$op3),
(xor node:$op1, (xor node:$op2, node:$op3))]>;
-class fma_patfrags<SDPatternOperator intrinsic, SDPatternOperator sdnode>
+class fma_patfrags<SDPatternOperator intrinsic, SDPatternOperator add>
: PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3),
[(intrinsic node:$pred, node:$op1, node:$op2, node:$op3),
- (sdnode (SVEAllActive), node:$op1, (vselect node:$pred, (AArch64fmul_p_oneuse (SVEAllActive), node:$op2, node:$op3), (SVEDup0)))],
- [{
- if ((N->getOpcode() != AArch64ISD::FADD_PRED) &&
- (N->getOpcode() != AArch64ISD::FSUB_PRED))
- return true; // it's the intrinsic
- return N->getFlags().hasAllowContract();
+ (vselect node:$pred, (add (SVEAllActive), node:$op1, (AArch64fmul_p_oneuse (SVEAllActive), node:$op2, node:$op3)), node:$op1)],
+[{
+ if (N->getOpcode() == ISD::VSELECT)
+ return N->getOperand(1)->getFlags().hasAllowContract();
+ return true; // it's the intrinsic
}]>;
-def AArch64fmla_m1 : fma_patfrags<int_aarch64_sve_fmla, AArch64fadd_p_nsz>;
+def AArch64fmla_m1 : fma_patfrags<int_aarch64_sve_fmla, AArch64fadd_p>;
def AArch64fmls_m1 : fma_patfrags<int_aarch64_sve_fmls, AArch64fsub_p>;
-def AArch64smax_m1 : EitherVSelectOrPassthruPatFrags<int_aarch64_sve_smax, AArch64smax_p>;
-def AArch64umax_m1 : EitherVSelectOrPassthruPatFrags<int_aarch64_sve_umax, AArch64umax_p>;
-def AArch64smin_m1 : EitherVSelectOrPassthruPatFrags<int_aarch64_sve_smin, AArch64smin_p>;
-def AArch64umin_m1 : EitherVSelectOrPassthruPatFrags<int_aarch64_sve_umin, AArch64umin_p>;
+def AArch64add_m1 : VSelectUnpredOrPassthruPatFrags<int_aarch64_sve_add, add>;
+def AArch64sub_m1 : VSelectUnpredOrPassthruPatFrags<int_aarch64_sve_sub, sub>;
+def AArch64mul_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_mul, AArch64mul_p>;
+def AArch64and_m1 : VSelectUnpredOrPassthruPatFrags<int_aarch64_sve_and, and>;
+def AArch64orr_m1 : VSelectUnpredOrPassthruPatFrags<int_aarch64_sve_orr, or>;
+def AArch64eor_m1 : VSelectUnpredOrPassthruPatFrags<int_aarch64_sve_eor, xor>;
+def AArch64smax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_smax, AArch64smax_p>;
+def AArch64umax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_umax, AArch64umax_p>;
+def AArch64smin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_smin, AArch64smin_p>;
+def AArch64umin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_umin, AArch64umin_p>;
+def AArch64fminnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fminnm, AArch64fminnm_p>;
+def AArch64fmaxnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmaxnm, AArch64fmaxnm_p>;
+def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmin, AArch64fmin_p>;
+def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmax, AArch64fmax_p>;
let Predicates = [HasSVE] in {
defm RDFFR_PPz : sve_int_rdffr_pred<0b0, "rdffr", int_aarch64_sve_rdffr_z>;
@@ -441,9 +476,9 @@ let Predicates = [HasSVEorSME] in {
defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", AArch64sub_m1, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">;
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;
- defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", "ORR_ZPZZ", int_aarch64_sve_orr, DestructiveBinaryComm>;
- defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", "EOR_ZPZZ", int_aarch64_sve_eor, DestructiveBinaryComm>;
- defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", "AND_ZPZZ", int_aarch64_sve_and, DestructiveBinaryComm>;
+ defm ORR_ZPmZ : sve_int_bin_pred_log<0b000, "orr", "ORR_ZPZZ", AArch64orr_m1, DestructiveBinaryComm>;
+ defm EOR_ZPmZ : sve_int_bin_pred_log<0b001, "eor", "EOR_ZPZZ", AArch64eor_m1, DestructiveBinaryComm>;
+ defm AND_ZPmZ : sve_int_bin_pred_log<0b010, "and", "AND_ZPZZ", AArch64and_m1, DestructiveBinaryComm>;
defm BIC_ZPmZ : sve_int_bin_pred_log<0b011, "bic", "BIC_ZPZZ", int_aarch64_sve_bic, DestructiveBinary>;
} // End HasSVEorSME
@@ -467,10 +502,13 @@ let Predicates = [HasSVEorSME] in {
defm SQSUB_ZI : sve_int_arith_imm0<0b110, "sqsub", ssubsat>;
defm UQSUB_ZI : sve_int_arith_imm0<0b111, "uqsub", usubsat>;
- defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", int_aarch64_sve_mad>;
- defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", int_aarch64_sve_msb>;
- defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", AArch64mla_m1>;
- defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", AArch64mls_m1>;
+ defm MAD_ZPmZZ : sve_int_mladdsub_vvv_pred<0b0, "mad", AArch64mad_m1, "MLA_ZPmZZ", /*isReverseInstr*/ 1>;
+ defm MSB_ZPmZZ : sve_int_mladdsub_vvv_pred<0b1, "msb", AArch64msb_m1, "MLS_ZPmZZ", /*isReverseInstr*/ 1>;
+ defm MLA_ZPmZZ : sve_int_mlas_vvv_pred<0b0, "mla", AArch64mla_m1, "MLA_ZPZZZ", "MAD_ZPmZZ">;
+ defm MLS_ZPmZZ : sve_int_mlas_vvv_pred<0b1, "mls", AArch64mls_m1, "MLS_ZPZZZ", "MSB_ZPmZZ">;
+
+ defm MLA_ZPZZZ : sve_int_3op_p_mladdsub<AArch64mla_p>;
+ defm MLS_ZPZZZ : sve_int_3op_p_mladdsub<AArch64mls_p>;
// SVE predicated integer reductions.
defm SADDV_VPZ : sve_int_reduce_0_saddv<0b000, "saddv", AArch64saddv_p>;
@@ -494,7 +532,7 @@ let Predicates = [HasSVEorSME] in {
defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>;
defm MUL_ZI : sve_int_arith_imm2<"mul", AArch64mul_p>;
- defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", "MUL_ZPZZ", int_aarch64_sve_mul, DestructiveBinaryComm>;
+ defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", "MUL_ZPZZ", AArch64mul_m1, DestructiveBinaryComm>;
defm SMULH_ZPmZ : sve_int_bin_pred_arit_2<0b010, "smulh", "SMULH_ZPZZ", int_aarch64_sve_smulh, DestructiveBinaryComm>;
defm UMULH_ZPmZ : sve_int_bin_pred_arit_2<0b011, "umulh", "UMULH_ZPZZ", int_aarch64_sve_umulh, DestructiveBinaryComm>;
@@ -593,10 +631,10 @@ let Predicates = [HasSVEorSME] in {
defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", "FSUB_ZPZZ", AArch64fsub_m1, DestructiveBinaryCommWithRev, "FSUBR_ZPmZ">;
defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", "FMUL_ZPZZ", AArch64fmul_m1, DestructiveBinaryComm>;
defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", "FSUBR_ZPZZ", int_aarch64_sve_fsubr, DestructiveBinaryCommWithRev, "FSUB_ZPmZ", /*isReverseInstr*/ 1>;
- defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", int_aarch64_sve_fmaxnm, DestructiveBinaryComm>;
- defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", int_aarch64_sve_fminnm, DestructiveBinaryComm>;
- defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", int_aarch64_sve_fmax, DestructiveBinaryComm>;
- defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", "FMIN_ZPZZ", int_aarch64_sve_fmin, DestructiveBinaryComm>;
+ defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", "FMAXNM_ZPZZ", AArch64fmaxnm_m1, DestructiveBinaryComm>;
+ defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", "FMINNM_ZPZZ", AArch64fminnm_m1, DestructiveBinaryComm>;
+ defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", "FMAX_ZPZZ", AArch64fmax_m1, DestructiveBinaryComm>;
+ defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", "FMIN_ZPZZ", AArch64fmin_m1, DestructiveBinaryComm>;
defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", "FABD_ZPZZ", int_aarch64_sve_fabd, DestructiveBinaryComm>;
defm FSCALE_ZPmZ : sve_fp_2op_p_zds_fscale<0b1001, "fscale", int_aarch64_sve_fscale>;
defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", "FMULX_ZPZZ", int_aarch64_sve_fmulx, DestructiveBinaryComm>;
@@ -611,6 +649,7 @@ let Predicates = [HasSVEorSME] in {
defm FMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmax_p>;
defm FMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64fmin_p>;
defm FABD_ZPZZ : sve_fp_bin_pred_hfd<AArch64fabd_p>;
+ defm FMULX_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_fmulx_u>;
defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
} // End HasSVEorSME
@@ -649,7 +688,7 @@ let Predicates = [HasSVE] in {
} // End HasSVE
let Predicates = [HasSVEorSME] in {
- defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>;
+ defm FCADD_ZPmZ : sve_fp_fcadd<"fcadd", int_aarch64_sve_fcadd>;
defm FCMLA_ZPmZZ : sve_fp_fcmla<"fcmla", int_aarch64_sve_fcmla>;
defm FMLA_ZPmZZ : sve_fp_3op_p_zds_a<0b00, "fmla", "FMLA_ZPZZZ", AArch64fmla_m1, "FMAD_ZPmZZ">;
@@ -662,48 +701,10 @@ let Predicates = [HasSVEorSME] in {
defm FNMAD_ZPmZZ : sve_fp_3op_p_zds_b<0b10, "fnmad", int_aarch64_sve_fnmad, "FNMLA_ZPmZZ", /*isReverseInstr*/ 1>;
defm FNMSB_ZPmZZ : sve_fp_3op_p_zds_b<0b11, "fnmsb", int_aarch64_sve_fnmsb, "FNMLS_ZPmZZ", /*isReverseInstr*/ 1>;
- defm FMLA_ZPZZZ : sve_fp_3op_p_zds_zx;
- defm FMLS_ZPZZZ : sve_fp_3op_p_zds_zx;
- defm FNMLA_ZPZZZ : sve_fp_3op_p_zds_zx;
- defm FNMLS_ZPZZZ : sve_fp_3op_p_zds_zx;
-
- multiclass fma<ValueType Ty, ValueType PredTy, string Suffix> {
- // Zd = Za + Zn * Zm
- def : Pat<(Ty (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za)),
- (!cast<Instruction>("FMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
-
- // Zd = Za + -Zn * Zm
- def : Pat<(Ty (AArch64fmls_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za)),
- (!cast<Instruction>("FMLS_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
-
- // Zd = -Za + Zn * Zm
- def : Pat<(Ty (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, (AArch64fneg_mt PredTy:$P, Ty:$Za, (Ty (undef))))),
- (!cast<Instruction>("FNMLS_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
-
- // Zd = -Za + -Zn * Zm
- def : Pat<(Ty (AArch64fma_p PredTy:$P, (AArch64fneg_mt PredTy:$P, Ty:$Zn, (Ty (undef))), Ty:$Zm, (AArch64fneg_mt PredTy:$P, Ty:$Za, (Ty (undef))))),
- (!cast<Instruction>("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
-
- // Zd = -(Za + Zn * Zm)
- // (with nsz neg.)
- def : Pat<(AArch64fneg_mt_nsz PredTy:$P, (AArch64fma_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za), (Ty (undef))),
- (!cast<Instruction>("FNMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
-
- // Zda = Zda + Zn * Zm
- def : Pat<(vselect (PredTy PPR:$Pg), (Ty (AArch64fma_p (PredTy (AArch64ptrue 31)), ZPR:$Zn, ZPR:$Zm, ZPR:$Za)), ZPR:$Za),
- (!cast<Instruction>("FMLA_ZPmZZ_"#Suffix) PPR:$Pg, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
-
- // Zda = Zda + -Zn * Zm
- def : Pat<(vselect (PredTy PPR:$Pg), (Ty (AArch64fma_p (PredTy (AArch64ptrue 31)), (AArch64fneg_mt (PredTy (AArch64ptrue 31)), Ty:$Zn, (Ty (undef))), ZPR:$Zm, ZPR:$Za)), ZPR:$Za),
- (!cast<Instruction>("FMLS_ZPmZZ_"#Suffix) PPR:$Pg, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>;
- }
-
- defm : fma<nxv8f16, nxv8i1, "H">;
- defm : fma<nxv4f16, nxv4i1, "H">;
- defm : fma<nxv2f16, nxv2i1, "H">;
- defm : fma<nxv4f32, nxv4i1, "S">;
- defm : fma<nxv2f32, nxv2i1, "S">;
- defm : fma<nxv2f64, nxv2i1, "D">;
+ defm FMLA_ZPZZZ : sve_fp_3op_pred_hfd<AArch64fmla_p>;
+ defm FMLS_ZPZZZ : sve_fp_3op_pred_hfd<AArch64fmls_p>;
+ defm FNMLA_ZPZZZ : sve_fp_3op_pred_hfd<AArch64fnmla_p>;
+ defm FNMLS_ZPZZZ : sve_fp_3op_pred_hfd<AArch64fnmls_p>;
} // End HasSVEorSME
let Predicates = [HasSVE] in {
@@ -839,7 +840,7 @@ let Predicates = [HasSVEorSME] in {
defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", AArch64revh_mt>;
defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", AArch64revw_mt>;
- defm REV_PP : sve_int_perm_reverse_p<"rev", vector_reverse>;
+ defm REV_PP : sve_int_perm_reverse_p<"rev", vector_reverse, int_aarch64_sve_rev_b16, int_aarch64_sve_rev_b32, int_aarch64_sve_rev_b64>;
defm REV_ZZ : sve_int_perm_reverse_z<"rev", vector_reverse>;
defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>;
@@ -1672,12 +1673,12 @@ let Predicates = [HasSVEorSME] in {
defm TRN1_ZZZ : sve_int_perm_bin_perm_zz<0b100, "trn1", AArch64trn1>;
defm TRN2_ZZZ : sve_int_perm_bin_perm_zz<0b101, "trn2", AArch64trn2>;
- defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1", AArch64zip1>;
- defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2", AArch64zip2>;
- defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1", AArch64uzp1>;
- defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2", AArch64uzp2>;
- defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>;
- defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>;
+ defm ZIP1_PPP : sve_int_perm_bin_perm_pp<0b000, "zip1", AArch64zip1, int_aarch64_sve_zip1_b16, int_aarch64_sve_zip1_b32, int_aarch64_sve_zip1_b64>;
+ defm ZIP2_PPP : sve_int_perm_bin_perm_pp<0b001, "zip2", AArch64zip2, int_aarch64_sve_zip2_b16, int_aarch64_sve_zip2_b32, int_aarch64_sve_zip2_b64>;
+ defm UZP1_PPP : sve_int_perm_bin_perm_pp<0b010, "uzp1", AArch64uzp1, int_aarch64_sve_uzp1_b16, int_aarch64_sve_uzp1_b32, int_aarch64_sve_uzp1_b64>;
+ defm UZP2_PPP : sve_int_perm_bin_perm_pp<0b011, "uzp2", AArch64uzp2, int_aarch64_sve_uzp2_b16, int_aarch64_sve_uzp2_b32, int_aarch64_sve_uzp2_b64>;
+ defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1, int_aarch64_sve_trn1_b16, int_aarch64_sve_trn1_b32, int_aarch64_sve_trn1_b64>;
+ defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2, int_aarch64_sve_trn2_b16, int_aarch64_sve_trn2_b32, int_aarch64_sve_trn2_b64>;
// Extract lo/hi halves of legal predicate types.
def : Pat<(nxv1i1 (extract_subvector (nxv2i1 PPR:$Ps), (i64 0))),
@@ -1830,6 +1831,22 @@ let Predicates = [HasSVEorSME] in {
def : Pat<(nxv2bf16 (extract_subvector (nxv8bf16 ZPR:$Zs), (i64 6))),
(UUNPKHI_ZZ_D (UUNPKHI_ZZ_S ZPR:$Zs))>;
+ // extract/insert 64-bit fixed length vector from/into a scalable vector
+ foreach VT = [v8i8, v4i16, v2i32, v1i64, v4f16, v2f32, v1f64, v4bf16] in {
+ def : Pat<(VT (vector_extract_subvec (SVEContainerVT<VT>.Value ZPR:$Zs), (i64 0))),
+ (EXTRACT_SUBREG ZPR:$Zs, dsub)>;
+ def : Pat<(SVEContainerVT<VT>.Value (vector_insert_subvec undef, (VT V64:$src), (i64 0))),
+ (INSERT_SUBREG (IMPLICIT_DEF), $src, dsub)>;
+ }
+
+ // extract/insert 128-bit fixed length vector from/into a scalable vector
+ foreach VT = [v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64, v8bf16] in {
+ def : Pat<(VT (vector_extract_subvec (SVEContainerVT<VT>.Value ZPR:$Zs), (i64 0))),
+ (EXTRACT_SUBREG ZPR:$Zs, zsub)>;
+ def : Pat<(SVEContainerVT<VT>.Value (vector_insert_subvec undef, (VT V128:$src), (i64 0))),
+ (INSERT_SUBREG (IMPLICIT_DEF), $src, zsub)>;
+ }
+
// Concatenate two predicates.
def : Pat<(nxv2i1 (concat_vectors nxv1i1:$p1, nxv1i1:$p2)),
(UZP1_PPP_D $p1, $p2)>;
@@ -2059,6 +2076,10 @@ let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<AArch64asrd_m1>;
+
+ defm ASR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd<int_aarch64_sve_asr, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
+ defm LSR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd<int_aarch64_sve_lsr, SVEShiftImmR8, SVEShiftImmR16, SVEShiftImmR32, SVEShiftImmR64>;
+ defm LSL_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd<int_aarch64_sve_lsl, SVEShiftImmL8, SVEShiftImmL16, SVEShiftImmL32, SVEShiftImmL64>;
} // End HasSVEorSME, UseExperimentalZeroingPseudos
let Predicates = [HasSVEorSME] in {
@@ -2356,12 +2377,12 @@ let Predicates = [HasSVEorSME] in {
defm : ld1rq_pat<nxv4i32, AArch64ld1rq_z, LD1RQ_W, am_sve_regreg_lsl2>;
defm : ld1rq_pat<nxv2i64, AArch64ld1rq_z, LD1RQ_D, am_sve_regreg_lsl3>;
- def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
- def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
- def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_UNDEF_D (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
- def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
- def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_UNDEF_S (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
- def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_UNDEF_H (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i32), (SXTW_ZPmZ_D_UNDEF (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i16), (SXTH_ZPmZ_D_UNDEF (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv2i64 ZPR:$Zs), nxv2i8), (SXTB_ZPmZ_D_UNDEF (IMPLICIT_DEF), (PTRUE_D 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i16), (SXTH_ZPmZ_S_UNDEF (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv4i32 ZPR:$Zs), nxv4i8), (SXTB_ZPmZ_S_UNDEF (IMPLICIT_DEF), (PTRUE_S 31), ZPR:$Zs)>;
+ def : Pat<(sext_inreg (nxv8i16 ZPR:$Zs), nxv8i8), (SXTB_ZPmZ_H_UNDEF (IMPLICIT_DEF), (PTRUE_H 31), ZPR:$Zs)>;
// General case that we ideally never want to match.
def : Pat<(vscale GPR64:$scale), (MADDXrrr (UBFMXri (RDVLI_XI 1), 4, 63), $scale, XZR)>;
@@ -2517,6 +2538,9 @@ let Predicates = [HasSVEorSME] in {
def : Pat<(nxv8f16 (bitconvert (nxv8bf16 ZPR:$src))), (nxv8f16 ZPR:$src)>;
def : Pat<(nxv4f32 (bitconvert (nxv8bf16 ZPR:$src))), (nxv4f32 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
+
+ def : Pat<(nxv16i1 (bitconvert (aarch64svcount PPR:$src))), (nxv16i1 PPR:$src)>;
+ def : Pat<(aarch64svcount (bitconvert (nxv16i1 PPR:$src))), (aarch64svcount PPR:$src)>;
}
// These allow casting from/to unpacked predicate types.
@@ -2973,6 +2997,12 @@ let Predicates = [HasSVEorSME] in {
(INSERT_SUBREG (nxv4f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
def : Pat<(nxv2f16 (vector_insert (nxv2f16 (undef)), (f16 FPR16:$src), 0)),
(INSERT_SUBREG (nxv2f16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+ def : Pat<(nxv8bf16 (vector_insert (nxv8bf16 (undef)), (bf16 FPR16:$src), 0)),
+ (INSERT_SUBREG (nxv8bf16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+ def : Pat<(nxv4bf16 (vector_insert (nxv4bf16 (undef)), (bf16 FPR16:$src), 0)),
+ (INSERT_SUBREG (nxv4bf16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
+ def : Pat<(nxv2bf16 (vector_insert (nxv2bf16 (undef)), (bf16 FPR16:$src), 0)),
+ (INSERT_SUBREG (nxv2bf16 (IMPLICIT_DEF)), FPR16:$src, hsub)>;
def : Pat<(nxv4f32 (vector_insert (nxv4f32 (undef)), (f32 FPR32:$src), 0)),
(INSERT_SUBREG (nxv4f32 (IMPLICIT_DEF)), FPR32:$src, ssub)>;
def : Pat<(nxv2f32 (vector_insert (nxv2f32 (undef)), (f32 FPR32:$src), 0)),
@@ -2992,6 +3022,8 @@ let Predicates = [HasSVEorSME] in {
def : Pat<(nxv8f16 (vector_insert (nxv8f16 ZPR:$vec), (f16 FPR16:$src), 0)),
(SEL_ZPZZ_H (PTRUE_H 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), ZPR:$vec)>;
+ def : Pat<(nxv8bf16 (vector_insert (nxv8bf16 ZPR:$vec), (bf16 FPR16:$src), 0)),
+ (SEL_ZPZZ_H (PTRUE_H 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), ZPR:$vec)>;
def : Pat<(nxv4f32 (vector_insert (nxv4f32 ZPR:$vec), (f32 FPR32:$src), 0)),
(SEL_ZPZZ_S (PTRUE_S 1), (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), ZPR:$vec)>;
def : Pat<(nxv2f64 (vector_insert (nxv2f64 ZPR:$vec), (f64 FPR64:$src), 0)),
@@ -3042,6 +3074,24 @@ let Predicates = [HasSVEorSME] in {
(INDEX_II_H 0, 1),
(DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
$src)>;
+ def : Pat<(nxv2bf16 (vector_insert (nxv2bf16 ZPR:$vec), (bf16 FPR16:$src), GPR64:$index)),
+ (CPY_ZPmV_H ZPR:$vec,
+ (CMPEQ_PPzZZ_D (PTRUE_D 31),
+ (INDEX_II_D 0, 1),
+ (DUP_ZR_D GPR64:$index)),
+ $src)>;
+ def : Pat<(nxv4bf16 (vector_insert (nxv4bf16 ZPR:$vec), (bf16 FPR16:$src), GPR64:$index)),
+ (CPY_ZPmV_H ZPR:$vec,
+ (CMPEQ_PPzZZ_S (PTRUE_S 31),
+ (INDEX_II_S 0, 1),
+ (DUP_ZR_S (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+ $src)>;
+ def : Pat<(nxv8bf16 (vector_insert (nxv8bf16 ZPR:$vec), (bf16 FPR16:$src), GPR64:$index)),
+ (CPY_ZPmV_H ZPR:$vec,
+ (CMPEQ_PPzZZ_H (PTRUE_H 31),
+ (INDEX_II_H 0, 1),
+ (DUP_ZR_H (i32 (EXTRACT_SUBREG GPR64:$index, sub_32)))),
+ $src)>;
def : Pat<(nxv2f32 (vector_insert (nxv2f32 ZPR:$vec), (f32 FPR32:$src), GPR64:$index)),
(CPY_ZPmV_S ZPR:$vec,
(CMPEQ_PPzZZ_D (PTRUE_D 31),
@@ -3076,6 +3126,12 @@ let Predicates = [HasSVEorSME] in {
(LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), GPR64:$index)),
(LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(bf16 (vector_extract (nxv8bf16 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_H (WHILELS_PXX_H XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(bf16 (vector_extract (nxv4bf16 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_H (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
+ def : Pat<(bf16 (vector_extract (nxv2bf16 ZPR:$vec), GPR64:$index)),
+ (LASTB_VPZ_H (WHILELS_PXX_D XZR, GPR64:$index), ZPR:$vec)>;
def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), GPR64:$index)),
(LASTB_VPZ_S (WHILELS_PXX_S XZR, GPR64:$index), ZPR:$vec)>;
def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), GPR64:$index)),
@@ -3098,6 +3154,12 @@ let Predicates = [HasSVEorSME] in {
(EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>;
def : Pat<(f16 (vector_extract (nxv2f16 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>;
+ def : Pat<(bf16 (vector_extract (nxv8bf16 ZPR:$vec), sve_elm_idx_extdup_h:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_H ZPR:$vec, sve_elm_idx_extdup_h:$index), hsub)>;
+ def : Pat<(bf16 (vector_extract (nxv4bf16 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), hsub)>;
+ def : Pat<(bf16 (vector_extract (nxv2bf16 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
+ (EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), hsub)>;
def : Pat<(f32 (vector_extract (nxv4f32 ZPR:$vec), sve_elm_idx_extdup_s:$index)),
(EXTRACT_SUBREG (DUP_ZZI_S ZPR:$vec, sve_elm_idx_extdup_s:$index), ssub)>;
def : Pat<(f32 (vector_extract (nxv2f32 ZPR:$vec), sve_elm_idx_extdup_d:$index)),
@@ -3106,7 +3168,7 @@ let Predicates = [HasSVEorSME] in {
(EXTRACT_SUBREG (DUP_ZZI_D ZPR:$vec, sve_elm_idx_extdup_d:$index), dsub)>;
// Extract element from vector with immediate index that's within the bottom 128-bits.
- let Predicates = [NotInStreamingSVEMode], AddedComplexity = 1 in {
+ let Predicates = [IsNeonAvailable], AddedComplexity = 1 in {
def : Pat<(i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)),
(i32 (UMOVvi8 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
def : Pat<(i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)),
@@ -3115,22 +3177,22 @@ let Predicates = [HasSVEorSME] in {
(i32 (UMOVvi32 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
def : Pat<(i64 (vector_extract (nxv2i64 ZPR:$vec), VectorIndexD:$index)),
(i64 (UMOVvi64 (v2i64 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexD:$index))>;
- } // End NotInStreamingSVEMode
+ } // End IsNeonAvailable
- let Predicates = [NotInStreamingSVEMode] in {
+ let Predicates = [IsNeonAvailable] in {
def : Pat<(sext_inreg (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index), i8),
(i32 (SMOVvi8to32 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
- def : Pat<(sext_inreg (anyext (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index)), i8),
+ def : Pat<(sext_inreg (anyext (i32 (vector_extract (nxv16i8 ZPR:$vec), VectorIndexB:$index))), i8),
(i64 (SMOVvi8to64 (v16i8 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexB:$index))>;
def : Pat<(sext_inreg (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index), i16),
(i32 (SMOVvi16to32 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
- def : Pat<(sext_inreg (anyext (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index)), i16),
+ def : Pat<(sext_inreg (anyext (i32 (vector_extract (nxv8i16 ZPR:$vec), VectorIndexH:$index))), i16),
(i64 (SMOVvi16to64 (v8i16 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexH:$index))>;
- def : Pat<(sext (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index)),
+ def : Pat<(sext (i32 (vector_extract (nxv4i32 ZPR:$vec), VectorIndexS:$index))),
(i64 (SMOVvi32to64 (v4i32 (EXTRACT_SUBREG ZPR:$vec, zsub)), VectorIndexS:$index))>;
- } // End NotInStreamingSVEMode
+ } // End IsNeonAvailable
// Extract first element from vector.
let AddedComplexity = 2 in {
@@ -3148,6 +3210,12 @@ let Predicates = [HasSVEorSME] in {
(f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
def : Pat<(vector_extract (nxv2f16 ZPR:$Zs), (i64 0)),
(f16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
+ def : Pat<(vector_extract (nxv8bf16 ZPR:$Zs), (i64 0)),
+ (bf16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
+ def : Pat<(vector_extract (nxv4bf16 ZPR:$Zs), (i64 0)),
+ (bf16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
+ def : Pat<(vector_extract (nxv2bf16 ZPR:$Zs), (i64 0)),
+ (bf16 (EXTRACT_SUBREG ZPR:$Zs, hsub))>;
def : Pat<(vector_extract (nxv4f32 ZPR:$Zs), (i64 0)),
(f32 (EXTRACT_SUBREG ZPR:$Zs, ssub))>;
def : Pat<(vector_extract (nxv2f32 ZPR:$Zs), (i64 0)),
@@ -3523,8 +3591,14 @@ let Predicates = [HasSVE2] in {
let Predicates = [HasSVE2orSME] in {
// SVE2 floating-point base 2 logarithm as integer
- defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", int_aarch64_sve_flogb>;
+ defm FLOGB_ZPmZ : sve2_fp_flogb<"flogb", "FLOGB_ZPZZ", int_aarch64_sve_flogb>;
+}
+
+let Predicates = [HasSVE2orSME, UseExperimentalZeroingPseudos] in {
+ defm FLOGB_ZPZZ : sve2_fp_un_pred_zeroing_hsd<int_aarch64_sve_flogb>;
+} // End HasSVE2orSME, UseExperimentalZeroingPseudos
+let Predicates = [HasSVE2orSME] in {
// SVE2 floating-point convert precision
defm FCVTXNT_ZPmZ : sve2_fp_convert_down_odd_rounding_top<"fcvtxnt", "int_aarch64_sve_fcvtxnt">;
defm FCVTX_ZPmZ : sve2_fp_convert_down_odd_rounding<"fcvtx", "int_aarch64_sve_fcvtx">;
@@ -3674,29 +3748,29 @@ defm PSEL_PPPRI : sve2_int_perm_sel_p<"psel", int_aarch64_sve_psel>;
let Predicates = [HasSVE2p1_or_HasSME2] in {
defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp", int_aarch64_sve_fclamp>;
-def FDOT_ZZZ_S : sve_float_dot<0b0, "fdot">;
-def FDOT_ZZZI_S : sve_float_dot_indexed<0b0, "fdot">;
+defm FDOT_ZZZ_S : sve_float_dot<0b0, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>;
+defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>;
def BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb">;
def BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt">;
def BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb">;
def BFMLSLT_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b111, "bfmlslt">;
-def SDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"sdot", 0b0>;
-def UDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"udot", 0b1>;
-def SDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"sdot", 0b0>;
-def UDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"udot", 0b1>;
+defm SDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"sdot", 0b0, int_aarch64_sve_sdot_x2>;
+defm UDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"udot", 0b1, int_aarch64_sve_udot_x2>;
+defm SDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"sdot", 0b0, int_aarch64_sve_sdot_lane_x2>;
+defm UDOT_ZZZI_HtoS : sve2p1_two_way_dot_vvi<"udot", 0b1, int_aarch64_sve_udot_lane_x2>;
defm CNTP_XCI : sve2p1_pcount_pn<"cntp", 0b000>;
-defm PEXT_PCI : sve2p1_pred_as_ctr_to_mask<"pext">;
+defm PEXT_PCI : sve2p1_pred_as_ctr_to_mask<"pext", int_aarch64_sve_pext>;
defm PEXT_2PCI : sve2p1_pred_as_ctr_to_mask_pair<"pext">;
defm PTRUE_C : sve2p1_ptrue_pn<"ptrue">;
defm SQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtn", 0b00, int_aarch64_sve_sqcvtn_x2>;
defm UQCVTN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"uqcvtn", 0b01, int_aarch64_sve_uqcvtn_x2>;
defm SQCVTUN_Z2Z_StoH : sve2p1_multi_vec_extract_narrow<"sqcvtun", 0b10, int_aarch64_sve_sqcvtun_x2>;
-defm SQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrn", 0b101>;
-defm UQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"uqrshrn", 0b111>;
-defm SQRSHRUN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrun", 0b001>;
+defm SQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrn", 0b101, int_aarch64_sve_sqrshrn_x2>;
+defm UQRSHRN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"uqrshrn", 0b111, int_aarch64_sve_uqrshrn_x2>;
+defm SQRSHRUN_Z2ZI_StoH : sve2p1_multi_vec_shift_narrow<"sqrshrun", 0b001, int_aarch64_sve_sqrshrun_x2>;
// Load to two registers
def LD1B_2Z : sve2p1_mem_cld_ss_2z<"ld1b", 0b00, 0b0, ZZ_b_mul_r, GPR64shifted8>;
@@ -3770,6 +3844,59 @@ defm STNT1H_4Z_IMM : sve2p1_mem_cst_si_4z<"stnt1h", 0b01, 0b1, ZZZZ_h_mul_r>;
defm STNT1W_4Z_IMM : sve2p1_mem_cst_si_4z<"stnt1w", 0b10, 0b1, ZZZZ_s_mul_r>;
defm STNT1D_4Z_IMM : sve2p1_mem_cst_si_4z<"stnt1d", 0b11, 0b1, ZZZZ_d_mul_r>;
+multiclass store_pn_x2<ValueType Ty, SDPatternOperator Store,
+ Instruction RegImmInst> {
+ def : Pat<(Store (Ty ZPR:$vec0), (Ty ZPR:$vec1),
+ (aarch64svcount PPR:$PNg), GPR64:$base),
+ (RegImmInst (REG_SEQUENCE ZPR2Mul2, Ty:$vec0, zsub0, Ty:$vec1, zsub1),
+ PPR:$PNg, GPR64:$base, (i64 0))>;
+}
+
+// Stores of 2 consecutive vectors
+defm : store_pn_x2<nxv16i8, int_aarch64_sve_st1_pn_x2, ST1B_2Z_IMM>;
+defm : store_pn_x2<nxv8i16, int_aarch64_sve_st1_pn_x2, ST1H_2Z_IMM>;
+defm : store_pn_x2<nxv4i32, int_aarch64_sve_st1_pn_x2, ST1W_2Z_IMM>;
+defm : store_pn_x2<nxv2i64, int_aarch64_sve_st1_pn_x2, ST1D_2Z_IMM>;
+defm : store_pn_x2<nxv16i8, int_aarch64_sve_stnt1_pn_x2, STNT1B_2Z_IMM>;
+defm : store_pn_x2<nxv8i16, int_aarch64_sve_stnt1_pn_x2, STNT1H_2Z_IMM>;
+defm : store_pn_x2<nxv4i32, int_aarch64_sve_stnt1_pn_x2, STNT1W_2Z_IMM>;
+defm : store_pn_x2<nxv2i64, int_aarch64_sve_stnt1_pn_x2, STNT1D_2Z_IMM>;
+defm : store_pn_x2<nxv8f16, int_aarch64_sve_st1_pn_x2, ST1H_2Z_IMM>;
+defm : store_pn_x2<nxv8bf16, int_aarch64_sve_st1_pn_x2, ST1H_2Z_IMM>;
+defm : store_pn_x2<nxv4f32, int_aarch64_sve_st1_pn_x2, ST1W_2Z_IMM>;
+defm : store_pn_x2<nxv2f64, int_aarch64_sve_st1_pn_x2, ST1D_2Z_IMM>;
+defm : store_pn_x2<nxv8f16, int_aarch64_sve_stnt1_pn_x2, STNT1H_2Z_IMM>;
+defm : store_pn_x2<nxv8bf16, int_aarch64_sve_stnt1_pn_x2, STNT1H_2Z_IMM>;
+defm : store_pn_x2<nxv4f32, int_aarch64_sve_stnt1_pn_x2, STNT1W_2Z_IMM>;
+defm : store_pn_x2<nxv2f64, int_aarch64_sve_stnt1_pn_x2, STNT1D_2Z_IMM>;
+
+multiclass store_pn_x4<ValueType Ty, SDPatternOperator Store,
+ Instruction RegImmInst> {
+ def : Pat<(Store (Ty ZPR:$vec0), (Ty ZPR:$vec1), (Ty ZPR:$vec2), (Ty ZPR:$vec3),
+ (aarch64svcount PPR:$PNg), GPR64:$base),
+ (RegImmInst (REG_SEQUENCE ZPR4Mul4, Ty:$vec0, zsub0, Ty:$vec1, zsub1,
+ Ty:$vec2, zsub2, Ty:$vec3, zsub3),
+ PPR:$PNg, GPR64:$base, (i64 0))>;
+}
+
+// Stores of 4 consecutive vectors
+defm : store_pn_x4<nxv16i8, int_aarch64_sve_st1_pn_x4, ST1B_4Z_IMM>;
+defm : store_pn_x4<nxv8i16, int_aarch64_sve_st1_pn_x4, ST1H_4Z_IMM>;
+defm : store_pn_x4<nxv4i32, int_aarch64_sve_st1_pn_x4, ST1W_4Z_IMM>;
+defm : store_pn_x4<nxv2i64, int_aarch64_sve_st1_pn_x4, ST1D_4Z_IMM>;
+defm : store_pn_x4<nxv16i8, int_aarch64_sve_stnt1_pn_x4, STNT1B_4Z_IMM>;
+defm : store_pn_x4<nxv8i16, int_aarch64_sve_stnt1_pn_x4, STNT1H_4Z_IMM>;
+defm : store_pn_x4<nxv4i32, int_aarch64_sve_stnt1_pn_x4, STNT1W_4Z_IMM>;
+defm : store_pn_x4<nxv2i64, int_aarch64_sve_stnt1_pn_x4, STNT1D_4Z_IMM>;
+defm : store_pn_x4<nxv8f16, int_aarch64_sve_st1_pn_x4, ST1H_4Z_IMM>;
+defm : store_pn_x4<nxv8bf16, int_aarch64_sve_st1_pn_x4, ST1H_4Z_IMM>;
+defm : store_pn_x4<nxv4f32, int_aarch64_sve_st1_pn_x4, ST1W_4Z_IMM>;
+defm : store_pn_x4<nxv2f64, int_aarch64_sve_st1_pn_x4, ST1D_4Z_IMM>;
+defm : store_pn_x4<nxv8f16, int_aarch64_sve_stnt1_pn_x4, STNT1H_4Z_IMM>;
+defm : store_pn_x4<nxv8bf16, int_aarch64_sve_stnt1_pn_x4, STNT1H_4Z_IMM>;
+defm : store_pn_x4<nxv4f32, int_aarch64_sve_stnt1_pn_x4, STNT1W_4Z_IMM>;
+defm : store_pn_x4<nxv2f64, int_aarch64_sve_stnt1_pn_x4, STNT1D_4Z_IMM>;
+
defm WHILEGE_2PXX : sve2p1_int_while_rr_pair<"whilege", 0b000>;
defm WHILEGT_2PXX : sve2p1_int_while_rr_pair<"whilegt", 0b001>;
defm WHILELT_2PXX : sve2p1_int_while_rr_pair<"whilelt", 0b010>;
@@ -3786,6 +3913,25 @@ defm WHILEHS_CXX : sve2p1_int_while_rr_pn<"whilehs", 0b100>;
defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>;
defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>;
defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>;
+
+
+// Aliases for existing SVE instructions for which predicate-as-counter are
+// accepted as an operand to the instruction
+def : InstAlias<"ldr $Pt, [$Rn, $imm9, mul vl]",
+ (LDR_PXI PNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>;
+def : InstAlias<"ldr $Pt, [$Rn]",
+ (LDR_PXI PNRAny:$Pt, GPR64sp:$Rn, 0), 0>;
+
+def : InstAlias<"str $Pt, [$Rn, $imm9, mul vl]",
+ (STR_PXI PNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>;
+def : InstAlias<"str $Pt, [$Rn]",
+ (STR_PXI PNRAny:$Pt, GPR64sp:$Rn, 0), 0>;
+
+def : InstAlias<"mov $Pd, $Pn",
+ (ORR_PPzPP PNR8:$Pd, PNR8:$Pn, PNR8:$Pn, PNR8:$Pn), 0>;
+
+def : InstAlias<"pfalse\t$Pd", (PFALSE PNR8:$Pd), 0>;
+
} // End HasSVE2p1_or_HasSME2
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td
new file mode 100644
index 000000000000..2526fe304190
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td
@@ -0,0 +1,1386 @@
+//==- AArch64SchedCortexA510.td - ARM Cortex-A510 Scheduling Definitions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for the ARM Cortex-A510 processor.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The following definitions describe the per-operand machine model.
+// This works with MachineScheduler. See MCSchedModel.h for details.
+
+// Cortex-A510 machine model for scheduling and other instruction cost heuristics.
+def CortexA510Model : SchedMachineModel {
+ let MicroOpBufferSize = 0; // The Cortex-A510 is an in-order processor
+ let IssueWidth = 3; // It dual-issues under most circumstances
+ let LoadLatency = 3; // Cycles for loads to access the cache.
+ // Most loads have a latency of 2, but some have higher latencies.
+ // 3 seems to be a good tradeoff
+ let PostRAScheduler = 1; // Enable PostRA scheduler pass.
+ let CompleteModel = 0; // Covers instructions applicable to Cortex-A510.
+
+ // FIXME: Remove when all errors have been fixed.
+ let FullInstRWOverlapCheck = 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types
+
+let SchedModel = CortexA510Model in {
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the
+// Cortex-A510 is in-order.
+let BufferSize = 0 in {
+ def CortexA510UnitALU0 : ProcResource<1>; // Int ALU0
+ def CortexA510UnitALU12 : ProcResource<2>; // Int ALU1 & ALU2
+ def CortexA510UnitMAC : ProcResource<1>; // Int MAC, 64-bi wide
+ def CortexA510UnitDiv : ProcResource<1>; // Int Division, not pipelined
+ // There are 2 LS pipes, 1 for Load/Store; 1 for Store only
+ def CortexA510UnitLdSt : ProcResource<1>; // Load/Store shared pipe
+ def CortexA510UnitLd1 : ProcResource<1>; // Load pipe
+ def CortexA510UnitB : ProcResource<1>; // Branch
+ def CortexA510UnitPAC : ProcResource<1>; // Pointer Authentication (PAC) pipe
+
+ // The FP DIV/SQRT instructions execute totally differently from the FP ALU
+ // instructions, which can mostly be dual-issued; that's why for now we model
+ // them with 2 resources.
+ def CortexA510UnitVALU0 : ProcResource<1>; // SIMD/FP/SVE ALU0
+ def CortexA510UnitVALU1 : ProcResource<1>; // SIMD/FP/SVE ALU0
+ def CortexA510UnitVMAC : ProcResource<2>; // SIMD/FP/SVE MAC
+ def CortexA510UnitVMC : ProcResource<1>; // SIMD/FP/SVE multicycle instrs (e.g Div, SQRT, cryptography)
+}
+
+def CortexA510UnitLd : ProcResGroup<[CortexA510UnitLdSt, CortexA510UnitLd1]>;
+def CortexA510UnitVALU : ProcResGroup<[CortexA510UnitVALU0, CortexA510UnitVALU1]>;
+def CortexA510UnitALU : ProcResGroup<[CortexA510UnitALU0, CortexA510UnitALU12]>;
+// These latencies are modeled without taking into account forwarding paths
+// (the software optimisation guide lists latencies taking into account
+// typical forwarding paths).
+def : WriteRes<WriteImm, [CortexA510UnitALU]> { let Latency = 1; } // MOVN, MOVZ
+def : WriteRes<WriteI, [CortexA510UnitALU]> { let Latency = 1; } // ALU
+def : WriteRes<WriteISReg, [CortexA510UnitALU]> { let Latency = 2; } // ALU of Shifted-Reg
+def : WriteRes<WriteIEReg, [CortexA510UnitALU]> { let Latency = 2; } // ALU of Extended-Reg
+def : WriteRes<WriteExtr, [CortexA510UnitALU]> { let Latency = 2; } // EXTR from a reg pair
+def : WriteRes<WriteIS, [CortexA510UnitALU]> { let Latency = 2; } // Shift/Scale
+
+// MAC
+def : WriteRes<WriteIM32, [CortexA510UnitMAC]> { let Latency = 3; } // 32-bit Multiply
+def : WriteRes<WriteIM64, [CortexA510UnitMAC]> { let Latency = 5; let ResourceCycles = [2];} // 64-bit Multiply
+
+// Div
+def : WriteRes<WriteID32, [CortexA510UnitDiv]> {
+ let Latency = 8; let ResourceCycles = [8];
+}
+def : WriteRes<WriteID64, [CortexA510UnitDiv]> {
+ let Latency = 16; let ResourceCycles = [16];
+}
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Cortex A510
+
+//===----------------------------------------------------------------------===//
+class CortexA510Write<int n, ProcResourceKind res> : SchedWriteRes<[res]> {
+ let Latency = n;
+}
+
+class CortexA510MCWrite<int n, int m, ProcResourceKind res> : SchedWriteRes<[res]> {
+ let Latency = n;
+ let ResourceCycles = [m];
+ let BeginGroup = 1;
+}
+
+class CortexA510MC_RC0Write<int n, ProcResourceKind res> : SchedWriteRes<[res]> {
+ let Latency = n;
+ let BeginGroup = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 2 micro-op types
+def A510Write_10cyc_1VMAC_1VALU : SchedWriteRes<[CortexA510UnitVALU, CortexA510UnitVMAC]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+
+def A510Write_15cyc_1VMAC_1VALU : SchedWriteRes<[CortexA510UnitVALU, CortexA510UnitVMAC]> {
+ let Latency = 15;
+ let NumMicroOps = 2;
+}
+
+class A510Write_PAC_B <int lat> : SchedWriteRes<[CortexA510UnitPAC, CortexA510UnitB]> {
+ let Latency = lat;
+ let NumMicroOps = 2;
+}
+// Load
+def : WriteRes<WriteLD, [CortexA510UnitLd]> { let Latency = 2; }
+def : WriteRes<WriteLDIdx, [CortexA510UnitLd]> { let Latency = 2; }
+def : WriteRes<WriteLDHi, [CortexA510UnitLd]> { let Latency = 2; }
+
+def CortexA510WriteVLD1 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 3; }
+def CortexA510WriteVLD1SI : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 3; let SingleIssue = 1; }
+def CortexA510WriteVLD2 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 4;
+ let ResourceCycles = [2]; }
+def CortexA510WriteVLD3 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 5;
+ let ResourceCycles = [3]; }
+def CortexA510WriteVLD4 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 6;
+ let ResourceCycles = [4]; }
+def CortexA510WriteVLD6 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 5;
+ let ResourceCycles = [3]; }
+def CortexA510WriteVLD8 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 6;
+ let ResourceCycles = [4]; }
+
+def CortexA510WriteLDP1 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 3; }
+def CortexA510WriteLDP2 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 3; }
+def CortexA510WriteLDP4 : SchedWriteRes<[CortexA510UnitLd]> { let Latency = 3; }
+
+// Pre/Post Indexing - Performed as part of address generation
+def : WriteRes<WriteAdr, []> { let Latency = 0; }
+
+// Store
+let RetireOOO = 1 in {
+def : WriteRes<WriteST, [CortexA510UnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTP, [CortexA510UnitLdSt]> { let Latency = 1; }
+def : WriteRes<WriteSTIdx, [CortexA510UnitLdSt]> { let Latency = 1; }
+}
+def : WriteRes<WriteSTX, [CortexA510UnitLdSt]> { let Latency = 3; }
+
+// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
+def : WriteRes<WriteVST, [CortexA510UnitLdSt]> { let Latency = 5;
+ let ResourceCycles = [2];}
+def CortexA510WriteVST1 : SchedWriteRes<[CortexA510UnitLdSt]> { let Latency = 4; }
+def CortexA510WriteVST2 : SchedWriteRes<[CortexA510UnitLdSt]> { let Latency = 5;
+ let ResourceCycles = [2]; }
+def CortexA510WriteVST3 : SchedWriteRes<[CortexA510UnitLdSt]> { let Latency = 5;
+ let ResourceCycles = [3]; }
+def CortexA510WriteVST4 : SchedWriteRes<[CortexA510UnitLdSt]> { let Latency = 5;
+ let ResourceCycles = [4]; }
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+
+// Branch
+def : WriteRes<WriteBr, [CortexA510UnitB]>;
+def : WriteRes<WriteBrReg, [CortexA510UnitB]>;
+def : WriteRes<WriteSys, [CortexA510UnitB]>;
+def : WriteRes<WriteBarrier, [CortexA510UnitB]>;
+def : WriteRes<WriteHint, [CortexA510UnitB]>;
+
+// FP ALU
+// As WriteF result is produced in F5 and it can be mostly forwarded
+// to consumer at F1, the effectively Latency is set as 4.
+def : WriteRes<WriteF, [CortexA510UnitVALU]> { let Latency = 4; }
+def : WriteRes<WriteFCmp, [CortexA510UnitVALU]> { let Latency = 3; }
+def : WriteRes<WriteFCvt, [CortexA510UnitVALU]> { let Latency = 4; }
+def : WriteRes<WriteFCopy, [CortexA510UnitVALU]> { let Latency = 3; }
+def : WriteRes<WriteFImm, [CortexA510UnitVALU]> { let Latency = 3; }
+
+class CortexA510VSt<int n> : SchedWriteRes<[CortexA510UnitLdSt]> {
+ let RetireOOO = 1;
+ let ResourceCycles = [n];
+}
+
+def CortexA510VSt0 : SchedWriteRes<[CortexA510UnitLdSt]> {
+ let RetireOOO = 1;
+}
+
+def : SchedAlias<WriteVd, CortexA510Write<4, CortexA510UnitVALU>>;
+def : SchedAlias<WriteVq, CortexA510Write<4, CortexA510UnitVALU>>;
+
+// FP ALU specific new schedwrite definitions
+def CortexA510WriteFPALU_F3 : SchedWriteRes<[CortexA510UnitVALU]> { let Latency = 3;}
+def CortexA510WriteFPALU_F4 : SchedWriteRes<[CortexA510UnitVALU]> { let Latency = 4;}
+
+// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined
+def : WriteRes<WriteFMul, [CortexA510UnitVMAC]> { let Latency = 4; }
+
+let RetireOOO = 1 in {
+def : WriteRes<WriteFDiv, [CortexA510UnitVMC]> { let Latency = 22;
+ let ResourceCycles = [29]; }
+def CortexA510WriteVMAC : SchedWriteRes<[CortexA510UnitVMAC]> { let Latency = 4; }
+def CortexA510WriteFDivHP : SchedWriteRes<[CortexA510UnitVMC]> { let Latency = 8;
+ let ResourceCycles = [5]; }
+def CortexA510WriteFDivSP : SchedWriteRes<[CortexA510UnitVMC]> { let Latency = 13;
+ let ResourceCycles = [10]; }
+def CortexA510WriteFDivDP : SchedWriteRes<[CortexA510UnitVMC]> { let Latency = 22;
+ let ResourceCycles = [19]; }
+def CortexA510WriteFSqrtHP : SchedWriteRes<[CortexA510UnitVMC]> { let Latency = 8;
+ let ResourceCycles = [5]; }
+def CortexA510WriteFSqrtSP : SchedWriteRes<[CortexA510UnitVMC]> { let Latency = 12;
+ let ResourceCycles = [9]; }
+def CortexA510WriteFSqrtDP : SchedWriteRes<[CortexA510UnitVMC]> { let Latency = 22;
+ let ResourceCycles = [19]; }
+}
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedRead types.
+
+def : ReadAdvance<ReadVLD, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 1>;
+
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+
+
+// MUL
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 2>;
+
+// Div
+def : ReadAdvance<ReadID, 0>;
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific InstRWs.
+
+def A510WriteISReg : SchedWriteVariant<[
+ SchedVar<RegShiftedPred, [WriteISReg]>,
+ SchedVar<NoSchedPred, [WriteI]>]>;
+def : InstRW<[A510WriteISReg], (instregex ".*rs$")>;
+def : InstRW<[WriteIS], (instrs RBITWr, RBITXr)>;
+
+// Pointer Authentication Instructions (v8.3 PAC)
+// -----------------------------------------------------------------------------
+
+// Authenticate data address
+// Authenticate instruction address
+// Compute pointer authentication code for data address
+// Compute pointer authentication code, using generic key
+// Compute pointer authentication code for instruction address
+def : InstRW<[CortexA510Write<3, CortexA510UnitPAC>], (instregex "^AUT", "^PAC")>;
+
+// Branch and link, register, with pointer authentication
+// Branch, register, with pointer authentication
+// Branch, return, with pointer authentication
+def : InstRW<[A510Write_PAC_B<1>], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
+ BRAAZ, BRAB, BRABZ, RETAA, RETAB,
+ ERETAA, ERETAB)>;
+
+// Load register, with pointer authentication
+def : InstRW<[CortexA510Write<2, CortexA510UnitPAC>], (instregex "^LDRA[AB](indexed|writeback)")>;
+
+// Strip pointer authentication code
+def : InstRW<[CortexA510Write<5, CortexA510UnitPAC>], (instrs XPACD, XPACI, XPACLRI)>;
+//---
+// Miscellaneous
+//---
+def : InstRW<[CortexA510WriteVLD1SI,CortexA510WriteLDP1], (instregex "LDPS?Wi")>;
+def : InstRW<[CortexA510WriteVLD1,CortexA510WriteLDP1], (instregex "LDPSi")>;
+def : InstRW<[CortexA510WriteVLD1,CortexA510WriteLDP2], (instregex "LDP(X|D)i")>;
+def : InstRW<[CortexA510WriteVLD1,CortexA510WriteLDP4], (instregex "LDPQi")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD1SI,CortexA510WriteLDP1], (instregex "LDPS?W(pre|post)")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD1,CortexA510WriteLDP1], (instregex "LDPS(pre|post)")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD1,CortexA510WriteLDP2], (instregex "LDP(X|D)(pre|post)")>;
+def : InstRW<[WriteAdr, CortexA510WriteVLD1,CortexA510WriteLDP4], (instregex "LDPQ(pre|post)")>;
+def : InstRW<[WriteI], (instrs COPY)>;
+//---
+// Vector Loads - 128-bit per cycle
+//---
+// 1-element structures
+def : InstRW<[CortexA510WriteVLD1], (instregex "LD1i(8|16|32|64)$")>; // single element
+def : InstRW<[CortexA510WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate
+def : InstRW<[CortexA510WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA510WriteVLD1], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA510WriteVLD1], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures
+def : InstRW<[CortexA510WriteVLD1], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA510WriteVLD2], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA510WriteVLD2], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA510WriteVLD2], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA510WriteVLD2], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA510WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA510WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA510WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// 2-element structures
+def : InstRW<[CortexA510WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[CortexA510WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA510WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[CortexA510WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
+def : InstRW<[CortexA510WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
+
+// 3-element structures
+def : InstRW<[CortexA510WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[CortexA510WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA510WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[CortexA510WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA510WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA510WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+// 4-element structures
+def : InstRW<[CortexA510WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs.
+def : InstRW<[CortexA510WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
+def : InstRW<[CortexA510WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs.
+def : InstRW<[CortexA510WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA510WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA510WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
+def : InstRW<[CortexA510WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+//---
+// Vector Stores
+//---
+def : InstRW<[CortexA510WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[CortexA510WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA510WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA510WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA510WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA510WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA510WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA510WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[CortexA510WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[CortexA510WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[CortexA510WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+def : InstRW<[CortexA510WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[CortexA510WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
+
+def : InstRW<[CortexA510WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
+def : InstRW<[CortexA510WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
+def : InstRW<[CortexA510WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
+def : InstRW<[CortexA510WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
+
+//---
+// Floating Point Conversions, MAC, DIV, SQRT
+//---
+def : InstRW<[CortexA510WriteFPALU_F3], (instregex "^DUP(v2i64|v4i32|v8i16|v16i8)")>;
+def : InstRW<[CortexA510WriteFPALU_F4], (instregex "^XTN")>;
+def : InstRW<[CortexA510WriteFPALU_F4], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
+def : InstRW<[CortexA510WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
+
+def : InstRW<[CortexA510WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>;
+def : InstRW<[CortexA510WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>;
+def : InstRW<[CortexA510WriteFPALU_F4], (instregex "^(S|U)CVTFv")>;
+
+def : InstRW<[CortexA510WriteVMAC], (instregex "^FN?M(ADD|SUB).*")>;
+def : InstRW<[CortexA510WriteVMAC], (instregex "^FML(A|S)v.*")>;
+def : InstRW<[CortexA510WriteFDivHP], (instrs FDIVHrr)>;
+def : InstRW<[CortexA510WriteFDivSP], (instrs FDIVSrr)>;
+def : InstRW<[CortexA510WriteFDivDP], (instrs FDIVDrr)>;
+def : InstRW<[CortexA510WriteFDivHP], (instregex "^FDIVv.*16$")>;
+def : InstRW<[CortexA510WriteFDivSP], (instregex "^FDIVv.*32$")>;
+def : InstRW<[CortexA510WriteFDivDP], (instregex "^FDIVv.*64$")>;
+def : InstRW<[CortexA510WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
+def : InstRW<[CortexA510WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
+def : InstRW<[CortexA510WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
+
+// 4.15. Advanced SIMD integer instructions
+// ASIMD absolute diff
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]ABDv(2i32|4i16|8i8)")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]ABDv(16i8|4i32|8i16)")>;
+// ASIMD absolute diff accum
+def : InstRW<[CortexA510Write<8, CortexA510UnitVALU>], (instregex "[SU]ABAL?v")>;
+// ASIMD absolute diff long
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]ABDLv")>;
+// ASIMD arith #1
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "(ADD|SUB|NEG)v(1i64|2i32|4i16|8i8)",
+ "[SU]R?HADDv(2i32|4i16|8i8)", "[SU]HSUBv(2i32|4i16|8i8)")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "(ADD|SUB|NEG)v(2i64|4i32|8i16|16i8)",
+ "[SU]R?HADDv(8i16|4i32|16i8)", "[SU]HSUBv(8i16|4i32|16i8)")>;
+// ASIMD arith #2
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "ABSv(1i64|2i32|4i16|8i8)$",
+ "[SU]ADDLPv(2i32_v1i64|4i16_v2i32|8i8_v4i16)$",
+ "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(1i16|1i32|1i64|1i8|2i32|4i16|8i8)$",
+ "ADDPv(2i32|4i16|8i8)$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "ABSv(2i64|4i32|8i16|16i8)$",
+ "[SU]ADDLPv(16i8_v8i16|4i32_v2i64|8i16_v4i32)$",
+ "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(16i8|2i64|4i32|8i16)$",
+ "ADDPv(16i8|2i64|4i32|8i16)$")>;
+// ASIMD arith #3
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "SADDLv", "UADDLv", "SADDWv",
+ "UADDWv", "SSUBLv", "USUBLv", "SSUBWv", "USUBWv", "ADDHNv", "SUBHNv")>;
+// ASIMD arith #5
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "RADDHNv", "RSUBHNv")>;
+// ASIMD arith, reduce
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "ADDVv", "SADDLVv", "UADDLVv")>;
+// ASIMD compare #1
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(1i64|2i32|4i16|8i8)")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(2i64|4i32|8i16|16i8)")>;
+// ASIMD compare #2
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "CMTSTv(1i64|2i32|4i16|8i8)")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "CMTSTv(2i64|4i32|8i16|16i8)")>;
+// ASIMD logical $1
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "(AND|EOR|NOT|ORN)v8i8",
+ "(ORR|BIC)v(2i32|4i16|8i8)$", "MVNIv(2i|2s|4i16)")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "(AND|EOR|NOT|ORN)v16i8",
+ "(ORR|BIC)v(16i8|4i32|8i16)$", "MVNIv(4i32|4s|8i16)")>;
+// ASIMD max/min, basic
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU](MIN|MAX)P?v(2i32|4i16|8i8)")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU](MIN|MAX)P?v(16i8|4i132|8i16)")>;
+// SIMD max/min, reduce
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU](MAX|MIN)Vv")>;
+// ASIMD multiply, by element
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "MULv(2i32|4i16|4i32|8i16)_indexed$",
+ "SQR?DMULHv(1i16|1i32|2i32|4i16|4i32|8i16)_indexed$")>;
+// ASIMD multiply
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instrs PMULv8i8)>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instrs PMULv16i8)>;
+// ASIMD multiply accumulate
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "ML[AS]v(2i32|4i16|8i8)$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "ML[AS]v(16i8|4i32|8i16)$")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "ML[AS]v(2i32|4i16|4i32|8i16)_indexed$")>;
+// ASIMD multiply accumulate half
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "SQRDML[AS]H[vi]")>;
+// ASIMD multiply accumulate long
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU]ML[AS]Lv")>;
+// ASIMD multiply accumulate long #2
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "SQDML[AS]L[iv]")>;
+// ASIMD dot product
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU]DOTv8i8")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU]DOTv16i8")>;
+// ASIMD dot product, by scalar
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU]DOTlanev")>;
+// ASIMD multiply long
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU]MULLv", "SQDMULL[iv]")>;
+// ASIMD polynomial (8x8) multiply long
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instrs PMULLv8i8, PMULLv16i8)>;
+// ASIMD pairwise add and accumulate
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "[SU]ADALPv")>;
+// ASIMD shift accumulate
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "[SU]SRA(d|v2i32|v4i16|v8i8)")>;
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "[SU]SRAv(16i8|2i64|4i32|8i16)")>;
+// ASIMD shift accumulate #2
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "[SU]RSRA[vd]")>;
+// ASIMD shift by immed
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "SHLd$", "SHLv",
+ "SLId$", "SRId$", "[SU]SHR[vd]", "SHRNv")>;
+// ASIMD shift by immed
+// SXTL and UXTL are aliases for SHLL
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[US]?SHLLv")>;
+// ASIMD shift by immed #2
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]RSHR(d|v2i32|v4i16|v8i8)",
+ "[SU]RSHRv(16i8|2i64|4i32|8i16)")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "RSHRNv(2i32|4i16|8i8)",
+ "RSHRNv(16i8|4i32|8i16)")>;
+// ASIMD shift by register
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]SHLv(1i64|2i32|4i16|8i8)")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]SHLv(2i64|4i32|8i16|16i8)")>;
+// ASIMD shift by register #2
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]RSHLv(1i64|2i32|4i16|8i8)")>;
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "[SU]RSHLv(2i64|4i32|8i16|16i8)")>;
+
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU]QSHLv(1i64|2i32|4i16|8i8)")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU]QSHLv(2i64|4i32|8i16|16i8)")>;
+
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU]QRSHLv(1i64|2i32|4i16|8i8)")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "[SU]QRSHLv(2i64|4i32|8i16|16i8)")>;
+
+// Cryptography extensions
+// -----------------------------------------------------------------------------
+
+// Crypto AES ops
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
+
+// Crypto polynomial (64x64) multiply long
+def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instrs PMULLv1i64, PMULLv2i64)>;
+
+// Crypto SHA1 hash acceleration op
+// Crypto SHA1 schedule acceleration ops
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^SHA1(H|SU0|SU1)")>;
+
+// Crypto SHA1 hash acceleration ops
+// Crypto SHA256 hash acceleration ops
+def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
+
+// Crypto SHA256 schedule acceleration ops
+def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instregex "^SHA256SU[01]")>;
+
+// Crypto SHA512 hash acceleration ops
+def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instregex "^SHA512(H|H2|SU0|SU1)")>;
+
+// Crypto SHA3 ops
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instrs BCAX, EOR3, XAR)>;
+def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instrs RAX1)>;
+
+
+// Crypto SM3 ops
+def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
+ "^SM3TT[12][AB]$")>;
+
+// Crypto SM4 ops
+def : InstRW<[CortexA510MCWrite<8, 0, CortexA510UnitVMC>], (instrs SM4E, SM4ENCKEY)>;
+
+// CRC
+// -----------------------------------------------------------------------------
+
+def : InstRW<[CortexA510MCWrite<2, 0, CortexA510UnitMAC>], (instregex "^CRC32")>;
+
+// SVE Predicate instructions
+
+// Loop control, based on predicate
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs BRKA_PPmP, BRKA_PPzP,
+ BRKB_PPmP, BRKB_PPzP)>;
+
+// Loop control, based on predicate and flag setting
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
+
+// Loop control, propagating
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
+
+// Loop control, propagating and flag setting
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs BRKNS_PPzP)>;
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs BRKPAS_PPzPP, BRKPBS_PPzPP)>;
+
+
+// Loop control, based on GPR
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
+ (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
+
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
+
+// Loop terminate
+def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
+
+// Predicate counting scalar
+def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
+
+def : InstRW<[CortexA510Write<1, CortexA510UnitALU>],
+ (instregex "^CNT[BHWD]_XPiI")>;
+
+def : InstRW<[CortexA510Write<1, CortexA510UnitALU>],
+ (instregex "^(INC|DEC)[BHWD]_XPiI")>;
+
+def : InstRW<[CortexA510Write<1, CortexA510UnitALU>],
+ (instregex "^(SQINC|SQDEC|UQINC|UQDEC)[BHWD]_[XW]Pi(Wd)?I")>;
+
+// Predicate counting scalar, active predicate
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
+ (instregex "^CNTP_XPP_[BHSD]")>;
+
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
+ (instregex "^(DEC|INC)P_XP_[BHSD]")>;
+
+def : InstRW<[CortexA510Write<8, CortexA510UnitVALU0>],
+ (instregex "^(SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
+ "^(UQDEC|UQINC)P_WP_[BHSD]",
+ "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]")>;
+
+
+// Predicate counting vector, active predicate
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
+ (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
+
+// Predicate logical
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
+ (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
+
+// Predicate logical, flag setting
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>],
+ (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
+
+// Predicate reverse
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^REV_PP_[BHSD]")>;
+
+// Predicate select
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs SEL_PPPP)>;
+
+// Predicate set
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
+
+// Predicate set/initialize, set flags
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PTRUES_[BHSD]")>;
+
+// Predicate find first/next
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
+
+// Predicate test
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs PTEST_PP)>;
+
+// Predicate transpose
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^TRN[12]_PPP_[BHSDQ]")>;
+
+// Predicate unpack and widen
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
+
+// Predicate zip/unzip
+def : InstRW<[CortexA510Write<6, CortexA510UnitVALU0>], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]")>;
+
+
+// SVE integer instructions
+// -----------------------------------------------------------------------------
+// Arithmetic, absolute diff
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABD_(ZPmZ|ZPZZ)_[BHSD]")>;
+
+// Arithmetic, absolute diff accum
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
+
+// Arithmetic, absolute diff accum long
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
+
+// Arithmetic, absolute diff long
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
+
+// Arithmetic, basic
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
+ (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
+ "^(ADD|SUB)_ZZZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZI_[BHSD]",
+ "^ADR_[SU]XTW_ZZZ_D_[0123]",
+ "^ADR_LSL_ZZZ_[SD]_[0123]",
+ "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
+ "^SADDLBT_ZZZ_[HSD]",
+ "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
+
+// Arithmetic, complex
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
+ (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
+ "^SQ(ABS|NEG)_ZPmZ_[BHSD]",
+ "^SQ(ADD|SUB|SUBR)_ZPmZ_?[BHSD]",
+ "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
+ "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
+ "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
+ "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
+
+// Arithmetic, large integer
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
+
+// Arithmetic, pairwise add
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^ADDP_ZPmZ_[BHSD]")>;
+
+// Arithmetic, pairwise add and accum long
+def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
+
+// Arithmetic, shift
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
+ (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
+ "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
+ "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPZI_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPZZ_[BHSD]",
+ "^(ASR|LSL|LSR)_ZZI_[BHSD]",
+ "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
+// Arithmetic, shift right for divide
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
+ (instregex "^ASRD_ZPmI_[BHSD]",
+ "^ASRD_ZPZI_[BHSD]")>;
+
+// Arithmetic, shift and accumulate
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
+ (instregex "^(SSRA|USRA)_ZZI_[BHSD]")>;
+
+def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>],
+ (instregex "^(SRSRA|URSRA)_ZZI_[BHSD]")>;
+
+
+// Arithmetic, shift by immediate
+// Arithmetic, shift by immediate and insert
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
+ (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]")>;
+
+// Arithmetic, shift complex
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
+ (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
+ "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_(ZPmZ|ZPZZ)_[BHSD]",
+ "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
+ "^SQSHRU?N[BT]_ZZI_[BHS]",
+ "^UQR?SHRN[BT]_ZZI_[BHS]")>;
+
+// Arithmetic, shift rounding
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
+ (instregex "^(SRSHL|SRSHR|SRSHLR|URSHL|URSHLR|URSHR)_(ZPmZ|ZPZZ|ZPZI)_[BHSD]",
+ "^[SU]RSHR_ZPmI_[BHSD]")>;
+
+// Bit manipulation
+def : InstRW<[CortexA510MCWrite<14, 13, CortexA510UnitVMC>],
+ (instregex "^(BDEP|BEXT|BGRP)_ZZZ_B")>;
+
+def : InstRW<[CortexA510MCWrite<22, 21, CortexA510UnitVMC>],
+ (instregex "^(BDEP|BEXT|BGRP)_ZZZ_H")>;
+
+def : InstRW<[CortexA510MCWrite<38, 37, CortexA510UnitVMC>],
+ (instregex "^(BDEP|BEXT|BGRP)_ZZZ_S")>;
+
+def : InstRW<[CortexA510MCWrite<70, 69, CortexA510UnitVMC>],
+ (instregex "^(BDEP|BEXT|BGRP)_ZZZ_D")>;
+
+
+// Bitwise select
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
+
+// Count/reverse bits
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(CLS|CLZ|RBIT)_ZPmZ_[BHSD]")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_[BH]")>;
+def : InstRW<[CortexA510Write<8, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_S")>;
+def : InstRW<[CortexA510Write<12, CortexA510UnitVALU>], (instregex "^CNT_ZPmZ_D")>;
+// Broadcast logical bitmask immediate to vector
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instrs DUPM_ZI)>;
+
+// Compare and set flags
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
+ (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
+ "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
+
+// Complex add
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CADD_ZZI_[BHSD]")>;
+
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^SQCADD_ZZI_[BHSD]")>;
+
+// Complex dot product 8-bit element
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
+
+// Complex dot product 16-bit element
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
+
+// Complex multiply-add B, H, S element size
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^CMLA_ZZZ_[BHS]",
+ "^CMLA_ZZZI_[HS]")>;
+
+// Complex multiply-add D element size
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs CMLA_ZZZ_D)>;
+
+// Conditional extract operations, scalar form
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU>], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
+
+// Conditional extract operations, SIMD&FP scalar and vector forms
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
+ "^COMPACT_ZPZ_[SD]",
+ "^SPLICE_ZPZZ?_[BHSD]")>;
+
+// Convert to floating point, 64b to float or convert to double
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]")>;
+
+// Convert to floating point, 64b to half
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_DtoH")>;
+
+// Convert to floating point, 32b to single or half
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
+
+// Convert to floating point, 32b to double
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_StoD")>;
+
+// Convert to floating point, 16b to half
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
+
+// Copy, scalar
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>],(instregex "^CPY_ZPmR_[BHSD]")>;
+
+// Copy, scalar SIMD&FP or imm
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^CPY_ZPm[IV]_[BHSD]",
+ "^CPY_ZPzI_[BHSD]")>;
+
+// Divides, 32 bit
+def : InstRW<[CortexA510MCWrite<15, 12, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_(ZPmZ|ZPZZ)_S")>;
+
+// Divides, 64 bit
+def : InstRW<[CortexA510MCWrite<26, 23, CortexA510UnitVMC>], (instregex "^[SU]DIVR?_(ZPmZ|ZPZZ)_D")>;
+
+// Dot product, 8 bit
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_S")>;
+
+// Dot product, 8 bit, using signed and unsigned integers
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
+
+// Dot product, 16 bit
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]DOT_ZZZI?_D")>;
+
+// Duplicate, immediate and indexed form
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZI_[BHSD]",
+ "^DUP_ZZI_[BHSDQ]")>;
+
+// Duplicate, scalar form
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^DUP_ZR_[BHSD]")>;
+
+// Extend, sign or zero
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU]XTB_ZPmZ_[HSD]",
+ "^[SU]XTH_ZPmZ_[SD]",
+ "^[SU]XTW_ZPmZ_[D]")>;
+
+// Extract
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instrs EXT_ZZI, EXT_ZZI_B)>;
+
+// Extract narrow saturating
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
+ "^SQXTUN[BT]_ZZ_[BHS]")>;
+
+// Extract/insert operation, SIMD and FP scalar form
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^LAST[AB]_VPZ_[BHSD]",
+ "^INSR_ZV_[BHSD]")>;
+
+// Extract/insert operation, scalar
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^LAST[AB]_RPZ_[BHSD]",
+ "^INSR_ZR_[BHSD]")>;
+
+// Histogram operations
+def : InstRW<[CortexA510MCWrite<8, 2, CortexA510UnitVALU0>], (instregex "^HISTCNT_ZPzZZ_[SD]",
+ "^HISTSEG_ZZZ")>;
+
+// Horizontal operations, B, H, S form, immediate operands only
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_II_[BHS]")>;
+
+// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
+// operands only / immediate, scalar operands
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
+
+// Horizontal operations, D form, immediate operands only
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs INDEX_II_D)>;
+
+// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
+// only / immediate, scalar operands
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^INDEX_(IR|RI|RR)_D")>;
+
+// Logical
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>],
+ (instregex "^(AND|EOR|ORR)_ZI",
+ "^(AND|BIC|EOR|EOR|ORR)_ZZZ",
+ "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]",
+ "^(AND|BIC|EOR|NOT|ORR)_ZPZZ_[BHSD]")>;
+
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
+ (instregex "^EOR(BT|TB)_ZZZ_[BHSD]")>;
+
+// Max/min, basic and pairwise
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
+ "^[SU](MAX|MIN)P?_(ZPmZ|ZPZZ)_[BHSD]")>;
+
+// Matching operations
+def : InstRW<[CortexA510MCWrite<7, 2, CortexA510UnitVALU>], (instregex "^N?MATCH_PPzZZ_[BH]")>;
+
+// Matrix multiply-accumulate
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
+
+// Move prefix
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
+ "^MOVPRFX_ZZ")>;
+
+// Multiply, B, H, S element size
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ|ZPZZ)_[BHS]",
+ "^[SU]MULH_(ZPmZ|ZZZ|ZPZZ)_[BHS]")>;
+
+// Multiply, D element size
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ|ZPZZ)_D",
+ "^[SU]MULH_(ZPmZ|ZZZ|ZPZZ)_D")>;
+
+// Multiply long
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
+ "^[SU]MULL[BT]_ZZZ_[HSD]")>;
+
+// Multiply accumulate, B, H, S element size
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_(ZZZI|ZPZZZ)_[BHS]",
+ "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
+
+// Multiply accumulate, D element size
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^ML[AS]_(ZZZI|ZPZZZ)_D",
+ "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
+
+// Multiply accumulate long
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
+ "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
+
+// Multiply accumulate saturating doubling long regular
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]",
+ "^SQDML[AS](LB|LT)_ZZZI_[SD]")>;
+
+// Multiply saturating doubling high, B, H, S element size
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULH_ZZZ_[BHS]",
+ "^SQDMULH_ZZZI_[HS]")>;
+
+// Multiply saturating doubling high, D element size
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
+
+// Multiply saturating doubling long
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
+ "^SQDMULL[BT]_ZZZI_[SD]")>;
+
+// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
+// element size
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
+ "^SQRDCMLAH_ZZZ_[BHS]",
+ "^SQRDML[AS]H_ZZZI_[HS]",
+ "^SQRDCMLAH_ZZZI_[HS]")>;
+
+// Multiply saturating rounding doubling regular/complex accumulate, D element
+// size
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDML[AS]H_ZZZI?_D",
+ "^SQRDCMLAH_ZZZ_D")>;
+
+// Multiply saturating rounding doubling regular/complex, B, H, S element size
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZ_[BHS]",
+ "^SQRDMULH_ZZZI_[HS]")>;
+
+// Multiply saturating rounding doubling regular/complex, D element size
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^SQRDMULH_ZZZI?_D")>;
+
+// Multiply/multiply long, (8x8) polynomial
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^PMUL_ZZZ_B")>;
+
+def : InstRW<[CortexA510Write<6, CortexA510UnitVMC>], (instregex "^PMULL[BT]_ZZZ_[HDQ]")>;
+
+
+// Predicate counting vector
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
+ (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI")>;
+
+// Reciprocal estimate
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
+
+// Reduction, arithmetic, B form
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
+
+// Reduction, arithmetic, H form
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
+
+// Reduction, arithmetic, S form
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
+
+// Reduction, arithmetic, D form
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
+
+// Reduction, logical
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]")>;
+
+// Reverse, vector
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^REV_ZZ_[BHSD]",
+ "^REVB_ZPmZ_[HSD]",
+ "^REVH_ZPmZ_[SD]",
+ "^REVW_ZPmZ_D")>;
+
+// Select, vector form
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^SEL_ZPZZ_[BHSD]")>;
+
+// Table lookup
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBL_ZZZZ?_[BHSD]")>;
+
+// Table lookup extension
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TBX_ZZZ_[BHSD]")>;
+
+// Transpose, vector form
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
+
+// Unpack and extend
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
+
+// Zip/unzip
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
+
+// SVE floating-point instructions
+// -----------------------------------------------------------------------------
+
+// Floating point absolute value/difference
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FAB[SD]_ZPmZ_[HSD]",
+ "^FAB[SD]_ZPZZ_[HSD]")>;
+
+// Floating point arithmetic
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ|ZPZI|ZPZZ)_[HSD]",
+ "^FADDP_ZPmZZ_[HSD]",
+ "^FNEG_ZPmZ_[HSD]",
+ "^FSUBR_(ZPm[IZ]|ZPZ[IZ])_[HSD]")>;
+
+// Floating point associative add, F16
+def : InstRW<[CortexA510MCWrite<32, 29, CortexA510UnitVALU>], (instrs FADDA_VPZ_H)>;
+
+// Floating point associative add, F32
+def : InstRW<[CortexA510MCWrite<16, 13, CortexA510UnitVALU>], (instrs FADDA_VPZ_S)>;
+
+// Floating point associative add, F64
+def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVALU>], (instrs FADDA_VPZ_D)>;
+
+// Floating point compare
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FACG[ET]_PPzZZ_[HSD]",
+ "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
+ "^FCM(LE|LT)_PPzZ0_[HSD]",
+ "^FCMUO_PPzZZ_[HSD]")>;
+
+// Floating point complex add
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCADD_ZPmZ_[HSD]")>;
+
+// Floating point complex multiply add
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FCMLA_ZPmZZ_[HSD]",
+ "^FCMLA_ZZZI_[HS]")>;
+
+// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
+ "^FCVTLT_ZPmZ_HtoS",
+ "^FCVTNT_ZPmZ_StoH")>;
+
+// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
+// or F64 to F16)
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
+ "^FCVTLT_ZPmZ_StoD",
+ "^FCVTNT_ZPmZ_DtoS")>;
+
+// Floating point convert, round to odd
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVTX_ZPmZ_DtoS", "FCVTXNT_ZPmZ_DtoS")>;
+
+// Floating point base2 log, F16
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
+
+// Floating point base2 log, F32
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
+
+// Floating point base2 log, F64
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
+
+// Floating point convert to integer, F16
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
+
+// Floating point convert to integer, F32
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
+
+// Floating point convert to integer, F64
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>],
+ (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
+
+// Floating point copy
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU0>], (instregex "^FCPY_ZPmI_[HSD]",
+ "^FDUP_ZI_[HSD]")>;
+
+// Floating point divide, F16
+def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
+
+// Floating point divide, F32
+def : InstRW<[CortexA510MCWrite<13, 10, CortexA510UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
+
+// Floating point divide, F64
+def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
+
+// Floating point min/max pairwise
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
+
+// Floating point min/max
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^F(MAX|MIN)(NM)?_(ZPm[IZ]|ZPZZ|ZPZI)_[HSD]")>;
+
+// Floating point multiply
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^(FSCALE|FMULX)_(ZPmZ|ZPZZ)_[HSD]",
+ "^FMUL_(ZPm[IZ]|ZZZI?|ZPZI|ZPZZ)_[HSD]")>;
+
+// Floating point multiply accumulate
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>],
+ (instregex "^FML[AS]_(ZPmZZ|ZZZI|ZPZZZ)_[HSD]",
+ "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_(ZPmZZ|ZPZZZ)_[HSD]")>;
+
+// Floating point multiply add/sub accumulate long
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
+
+// Floating point reciprocal estimate, F16
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FRECPE_ZZ_H", "^FRECPX_ZPmZ_H",
+ "^FRSQRTE_ZZ_H")>;
+
+// Floating point reciprocal estimate, F32
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FRECPE_ZZ_S", "^FRECPX_ZPmZ_S",
+ "^FRSQRTE_ZZ_S")>;
+// Floating point reciprocal estimate, F64
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>],(instregex "^FRECPE_ZZ_D", "^FRECPX_ZPmZ_D",
+ "^FRSQRTE_ZZ_D")>;
+
+// Floating point reciprocal step
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
+
+// Floating point reduction, F16
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>],
+ (instregex "^(FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_[HSD]")>;
+
+// Floating point reduction, F32
+def : InstRW<[CortexA510MCWrite<12, 11, CortexA510UnitVALU0>],
+ (instregex "^FADDV_VPZ_H")>;
+
+def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVALU0>],
+ (instregex "^FADDV_VPZ_S")>;
+
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU0>],
+ (instregex "^FADDV_VPZ_D")>;
+
+
+// Floating point round to integral, F16
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
+
+// Floating point round to integral, F32
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
+
+// Floating point round to integral, F64
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
+
+// Floating point square root, F16
+def : InstRW<[CortexA510MCWrite<8, 5, CortexA510UnitVMC>], (instregex "^FSQRT_ZPmZ_H")>;
+
+// Floating point square root, F32
+def : InstRW<[CortexA510MCWrite<12, 9, CortexA510UnitVMC>], (instregex "^FSQRT_ZPmZ_S")>;
+
+// Floating point square root, F64
+def : InstRW<[CortexA510MCWrite<22, 19, CortexA510UnitVMC>], (instregex "^FSQRT_ZPmZ_D")>;
+
+// Floating point trigonometric exponentiation
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FEXPA_ZZ_[HSD]")>;
+
+// Floating point trigonometric multiply add
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTMAD_ZZI_[HSD]")>;
+
+// Floating point trigonometric, miscellaneous
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^FTSMUL_ZZZ_[HSD]")>;
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^FTSSEL_ZZZ_[HSD]")>;
+
+
+// SVE BFloat16 (BF16) instructions
+// -----------------------------------------------------------------------------
+
+// Convert, F32 to BF16
+def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
+
+// Dot product
+def : InstRW<[A510Write_10cyc_1VMAC_1VALU], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
+
+// Matrix multiply accumulate
+def : InstRW<[A510Write_15cyc_1VMAC_1VALU], (instrs BFMMLA_ZZZ)>;
+
+// Multiply accumulate long
+def : InstRW<[CortexA510Write<4, CortexA510UnitVMAC>], (instregex "^BFMLAL[BT]_ZZZ(I)?")>;
+
+// SVE Load instructions
+// -----------------------------------------------------------------------------
+
+// Load vector
+def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instrs LDR_ZXI)>;
+
+// Load predicate
+def : InstRW<[CortexA510Write<3, CortexA510UnitLdSt>], (instrs LDR_PXI)>;
+
+// Contiguous load, scalar + imm
+def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LD1[BHWD]_IMM_REAL$",
+ "^LD1S?B_[HSD]_IMM_REAL$",
+ "^LD1S?H_[SD]_IMM_REAL$",
+ "^LD1S?W_D_IMM_REAL$" )>;
+// Contiguous load, scalar + scalar
+def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LD1[BHWD]$",
+ "^LD1S?B_[HSD]$",
+ "^LD1S?H_[SD]$",
+ "^LD1S?W_D$" )>;
+
+// Contiguous load broadcast, scalar + imm
+def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LD1R[BHWD]_IMM$",
+ "^LD1RSW_IMM$",
+ "^LD1RS?B_[HSD]_IMM$",
+ "^LD1RS?H_[SD]_IMM$",
+ "^LD1RS?W_D_IMM$",
+ "^LD1RQ_[BHWD]_IMM$")>;
+
+// Contiguous load broadcast, scalar + scalar
+def : InstRW<[CortexA510Write<3, CortexA510UnitLdSt>], (instregex "^LD1RQ_[BHWD]$")>;
+
+// Non temporal load, scalar + imm
+def : InstRW<[CortexA510Write<3, CortexA510UnitLdSt>], (instregex "^LDNT1[BHWD]_ZRI$")>;
+
+// Non temporal load, scalar + scalar
+def : InstRW<[CortexA510Write<3, CortexA510UnitLdSt>], (instregex "^LDNT1[BHWD]_ZRR$")>;
+
+// Non temporal gather load, vector + scalar 32-bit element size
+def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
+ "^LDNT1S[BH]_ZZR_S_REAL$")>;
+
+// Non temporal gather load, vector + scalar 64-bit element size
+def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
+def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instrs LDNT1D_ZZR_D_REAL)>;
+
+// Contiguous first faulting load, scalar + scalar
+def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDFF1[BHWD]_REAL$",
+ "^LDFF1S?B_[HSD]_REAL$",
+ "^LDFF1S?H_[SD]_REAL$",
+ "^LDFF1S?W_D_REAL$")>;
+
+// Contiguous non faulting load, scalar + imm
+def : InstRW<[CortexA510Write<3, CortexA510UnitLd>], (instregex "^LDNF1[BHWD]_IMM_REAL$",
+ "^LDNF1S?B_[HSD]_IMM_REAL$",
+ "^LDNF1S?H_[SD]_IMM_REAL$",
+ "^LDNF1S?W_D_IMM_REAL$")>;
+
+// Contiguous Load two structures to two vectors, scalar + imm
+def : InstRW<[CortexA510MCWrite<3, 1, CortexA510UnitLdSt>], (instregex "^LD2[BHWD]_IMM$")>;
+
+// Contiguous Load two structures to two vectors, scalar + scalar
+def : InstRW<[CortexA510MCWrite<3, 2, CortexA510UnitLdSt>], (instregex "^LD2[BHWD]$")>;
+
+// Contiguous Load three structures to three vectors, scalar + imm
+def : InstRW<[CortexA510MCWrite<5, 3, CortexA510UnitLdSt>], (instregex "^LD3[BHWD]_IMM$")>;
+
+// Contiguous Load three structures to three vectors, scalar + scalar
+def : InstRW<[CortexA510MCWrite<5, 3, CortexA510UnitLdSt>], (instregex "^LD3[BHWD]$")>;
+
+// Contiguous Load four structures to four vectors, scalar + imm
+def : InstRW<[CortexA510MCWrite<5, 3, CortexA510UnitLdSt>], (instregex "^LD4[BHWD]_IMM$")>;
+
+// Contiguous Load four structures to four vectors, scalar + scalar
+def : InstRW<[CortexA510MCWrite<5, 3, CortexA510UnitLdSt>], (instregex "^LD4[BHWD]$")>;
+
+// Gather load, vector + imm, 32-bit element size
+def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
+ "^GLD(FF)?1W_IMM_REAL$")>;
+
+// Gather load, vector + imm, 64-bit element size
+def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
+ "^GLD(FF)?1D_IMM_REAL$")>;
+
+// Gather load, 64-bit element size
+def : InstRW<[CortexA510MCWrite<7, 7, CortexA510UnitLdSt>],
+ (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$",
+ "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$",
+ "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$",
+ "^GLD(FF)?1D_(SCALED_)?REAL$")>;
+
+// Gather load, 32-bit scaled offset
+def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>],
+ (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
+ "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
+
+// Gather load, 32-bit unpacked unscaled offset
+def : InstRW<[CortexA510MCWrite<9, 9, CortexA510UnitLd>], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
+ "^GLD(FF)?1W_[SU]XTW_REAL$")>;
+
+def : InstRW<[CortexA510Write<0, CortexA510UnitVALU>], (instregex "^PRF(B|H|W|D).*")>;
+// SVE Store instructions
+// -----------------------------------------------------------------------------
+
+// Store from predicate reg
+def : InstRW<[CortexA510VSt0], (instrs STR_PXI)>;
+
+// Store from vector reg
+def : InstRW<[CortexA510VSt0], (instrs STR_ZXI)>;
+
+// Contiguous store, scalar + imm
+def : InstRW<[CortexA510VSt0], (instregex "^ST1[BHWD]_IMM$",
+ "^ST1B_[HSD]_IMM$",
+ "^ST1H_[SD]_IMM$",
+ "^ST1W_D_IMM$")>;
+
+// Contiguous store, scalar + scalar
+def : InstRW<[CortexA510VSt0], (instregex "^ST1H(_[SD])?$")>;
+def : InstRW<[CortexA510VSt0], (instregex "^ST1[BWD]$",
+ "^ST1B_[HSD]$",
+ "^ST1W_D$")>;
+
+// Contiguous store two structures from two vectors, scalar + imm
+def : InstRW<[CortexA510VSt<11>], (instregex "^ST2[BHWD]_IMM$")>;
+
+// Contiguous store two structures from two vectors, scalar + scalar
+def : InstRW<[CortexA510VSt<11>], (instrs ST2H)>;
+
+// Contiguous store two structures from two vectors, scalar + scalar
+def : InstRW<[CortexA510VSt<11>], (instregex "^ST2[BWD]$")>;
+
+// Contiguous store three structures from three vectors, scalar + imm
+def : InstRW<[CortexA510VSt<25>], (instregex "^ST3[BHW]_IMM$")>;
+def : InstRW<[CortexA510VSt<14>], (instregex "^ST3D_IMM$")>;
+
+// Contiguous store three structures from three vectors, scalar + scalar
+def : InstRW<[CortexA510VSt<25>], (instregex "^ST3[BHW]$")>;
+def : InstRW<[CortexA510VSt<14>], (instregex "^ST3D$")>;
+
+// Contiguous store four structures from four vectors, scalar + imm
+def : InstRW<[CortexA510VSt<50>], (instregex "^ST4[BHW]_IMM$")>;
+def : InstRW<[CortexA510VSt<25>], (instregex "^ST4D_IMM$")>;
+
+// Contiguous store four structures from four vectors, scalar + scalar
+def : InstRW<[CortexA510VSt<50>], (instregex "^ST4[BHW]$")>;
+
+// Contiguous store four structures from four vectors, scalar + scalar
+def : InstRW<[CortexA510VSt<25>], (instregex "^ST4D$")>;
+
+// Non temporal store, scalar + imm
+def : InstRW<[CortexA510VSt0], (instregex "^STNT1[BHWD]_ZRI$")>;
+
+// Non temporal store, scalar + scalar
+def : InstRW<[CortexA510VSt0], (instrs STNT1H_ZRR)>;
+def : InstRW<[CortexA510VSt0], (instregex "^STNT1[BWD]_ZRR$")>;
+
+// Scatter non temporal store, vector + scalar 32-bit element size
+def : InstRW<[CortexA510VSt<9>], (instregex "^STNT1[BHW]_ZZR_S")>;
+
+// Scatter non temporal store, vector + scalar 64-bit element size
+def : InstRW<[CortexA510VSt<7>], (instregex "^STNT1[BHWD]_ZZR_D")>;
+
+// Scatter store vector + imm 32-bit element size
+def : InstRW<[CortexA510VSt<9>], (instregex "^SST1[BH]_S_IMM$",
+ "^SST1W_IMM$")>;
+
+// Scatter store vector + imm 64-bit element size
+def : InstRW<[CortexA510VSt<7>], (instregex "^SST1[BHW]_D_IMM$",
+ "^SST1D_IMM$")>;
+
+// Scatter store, 32-bit scaled offset
+def : InstRW<[CortexA510VSt<8>],
+ (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
+
+// Scatter store, 32-bit unpacked unscaled offset
+def : InstRW<[CortexA510VSt<8>], (instregex "^SST1[BHW]_D_[SU]XTW$",
+ "^SST1D_[SU]XTW$")>;
+
+// Scatter store, 32-bit unpacked scaled offset
+def : InstRW<[CortexA510VSt<8>], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
+ "^SST1D_[SU]XTW_SCALED$")>;
+
+// Scatter store, 32-bit unscaled offset
+def : InstRW<[CortexA510VSt<8>], (instregex "^SST1[BH]_S_[SU]XTW$",
+ "^SST1W_[SU]XTW$")>;
+
+// Scatter store, 64-bit scaled offset
+def : InstRW<[CortexA510VSt<8>], (instregex "^SST1[HW]_D_SCALED$",
+ "^SST1D_SCALED$")>;
+
+// Scatter store, 64-bit unscaled offset
+def : InstRW<[CortexA510VSt<8>], (instregex "^SST1[BHW]_D$",
+ "^SST1D$")>;
+
+// SVE Miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// Read first fault register, unpredicated
+def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs RDFFR_P_REAL)>;
+
+// Read first fault register, predicated
+def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFR_PPz_REAL)>;
+
+// Read first fault register and set flags
+def : InstRW<[CortexA510Write<3, CortexA510UnitALU0>], (instrs RDFFRS_PPz)>;
+
+// Set first fault register
+// Write to first fault register
+def : InstRW<[CortexA510Write<1, CortexA510UnitALU>], (instrs SETFFR, WRFFR)>;
+
+// SVE Cryptographic instructions
+// -----------------------------------------------------------------------------
+
+// Crypto AES ops
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^AES[DE]_ZZZ_B$",
+ "^AESI?MC_ZZ_B$")>;
+
+// Crypto SHA3 ops
+def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], (instregex "^(BCAX|EOR3)_ZZZZ$",
+ "^XAR_ZZZI_[BHSD]$")>;
+
+def : InstRW<[CortexA510MC_RC0Write<8, CortexA510UnitVMC>], (instregex "^RAX1_ZZZ_D$")>;
+
+// Crypto SM4 ops
+def : InstRW<[CortexA510MC_RC0Write<8, CortexA510UnitVMC>], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
+
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
index b8d5a70d7ec6..de09177d1dc0 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td
@@ -571,12 +571,12 @@ def Ampere1Write_62cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
// across Unit A or B for both uops.
def Ampere1Write_Arith : SchedWriteVariant<[
SchedVar<RegExtendedPred, [Ampere1Write_2cyc_2AB]>,
- SchedVar<AmpereCheapLSL, [Ampere1Write_1cyc_1AB]>,
+ SchedVar<IsCheapLSL, [Ampere1Write_1cyc_1AB]>,
SchedVar<NoSchedPred, [Ampere1Write_2cyc_1B_1AB]>]>;
def Ampere1Write_ArithFlagsetting : SchedWriteVariant<[
SchedVar<RegExtendedPred, [Ampere1Write_2cyc_1AB_1A]>,
- SchedVar<AmpereCheapLSL, [Ampere1Write_1cyc_1A]>,
+ SchedVar<IsCheapLSL, [Ampere1Write_1cyc_1A]>,
SchedVar<NoSchedPred, [Ampere1Write_2cyc_1B_1A]>]>;
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
index f2863f5a8e3b..728eecfa645e 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td
@@ -141,8 +141,8 @@ def M3WriteAW : SchedWriteVariant<[SchedVar<IsZeroIdiomPred, [M3WriteZ0]>,
def M3WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M3WriteA1]>,
SchedVar<ExynosLogicPred, [M3WriteA1]>,
SchedVar<NoSchedPred, [M3WriteAA]>]>;
-def M3WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M3WriteA1]>,
- SchedVar<NoSchedPred, [M3WriteAA]>]>;
+def M3WriteAY : SchedWriteVariant<[SchedVar<IsRORImmIdiomPred, [M3WriteA1]>,
+ SchedVar<NoSchedPred, [M3WriteAA]>]>;
def M3WriteB1 : SchedWriteRes<[M3UnitB]> { let Latency = 1; }
def M3WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M3WriteAC]>,
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
index ab1e680f9e99..66e1c0b9ced1 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td
@@ -166,8 +166,8 @@ def M4WriteAV : SchedWriteVariant<[SchedVar<ExynosResetPred, [M4WriteZ0]>,
def M4WriteAX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M4WriteA1]>,
SchedVar<ExynosLogicExPred, [M4WriteA1]>,
SchedVar<NoSchedPred, [M4WriteAA]>]>;
-def M4WriteAY : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M4WriteA1]>,
- SchedVar<NoSchedPred, [M4WriteAF]>]>;
+def M4WriteAY : SchedWriteVariant<[SchedVar<IsRORImmIdiomPred, [M4WriteA1]>,
+ SchedVar<NoSchedPred, [M4WriteAF]>]>;
def M4WriteB1 : SchedWriteRes<[M4UnitB]> { let Latency = 1; }
def M4WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M4WriteAC]>,
diff --git a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
index ae0b2b3eaeb6..a6405d4fc49c 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td
@@ -182,10 +182,10 @@ def M5WriteAXW : SchedWriteVariant<[SchedVar<ExynosArithPred, [M5WriteA1W]>,
def M5WriteAXX : SchedWriteVariant<[SchedVar<ExynosArithPred, [M5WriteA1X]>,
SchedVar<ExynosLogicExPred, [M5WriteA1X]>,
SchedVar<NoSchedPred, [M5WriteAAX]>]>;
-def M5WriteAYW : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M5WriteA1W]>,
- SchedVar<NoSchedPred, [M5WriteAFW]>]>;
-def M5WriteAYX : SchedWriteVariant<[SchedVar<ExynosRotateRightImmPred, [M5WriteA1X]>,
- SchedVar<NoSchedPred, [M5WriteAFX]>]>;
+def M5WriteAYW : SchedWriteVariant<[SchedVar<IsRORImmIdiomPred, [M5WriteA1W]>,
+ SchedVar<NoSchedPred, [M5WriteAFW]>]>;
+def M5WriteAYX : SchedWriteVariant<[SchedVar<IsRORImmIdiomPred, [M5WriteA1X]>,
+ SchedVar<NoSchedPred, [M5WriteAFX]>]>;
def M5WriteB1 : SchedWriteRes<[M5UnitB]> { let Latency = 1; }
def M5WriteBX : SchedWriteVariant<[SchedVar<ExynosBranchLinkLRPred, [M5WriteAC]>,
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td
new file mode 100644
index 000000000000..d689b9fa9c06
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td
@@ -0,0 +1,1060 @@
+//=- AArch64SchedNeoverseN1.td - NeoverseN1 Scheduling Model -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the Arm Neoverse N1 processors.
+//
+// References:
+// - "Arm Neoverse N1 Software Optimization Guide"
+// - https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_n1
+//
+//===----------------------------------------------------------------------===//
+
+def NeoverseN1Model : SchedMachineModel {
+ let IssueWidth = 8; // Maximum micro-ops dispatch rate.
+ let MicroOpBufferSize = 128; // NOTE: Copied from Cortex-A76.
+ let LoadLatency = 4; // Optimistic load latency.
+ let MispredictPenalty = 11; // Cycles cost of branch mispredicted.
+ let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(PAUnsupported.F,
+ SMEUnsupported.F,
+ SVEUnsupported.F,
+ [HasMTE]);
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Neoverse N1.
+// Instructions are first fetched and then decoded into internal macro-ops
+// (MOPs). From there, the MOPs proceed through register renaming and dispatch
+// stages. A MOP can be split into one or more micro-ops further down the
+// pipeline, after the decode stage. Once dispatched, micro-ops wait for their
+// operands and issue out-of-order to one of the issue pipelines. Each issue
+// pipeline can accept one micro-op per cycle.
+
+let SchedModel = NeoverseN1Model in {
+
+// Define the issue ports.
+def N1UnitB : ProcResource<1>; // Branch
+def N1UnitS : ProcResource<2>; // Integer single cycle 0/1
+def N1UnitM : ProcResource<1>; // Integer multicycle
+def N1UnitL : ProcResource<2>; // Load/Store 0/1
+def N1UnitD : ProcResource<2>; // Store data 0/1
+def N1UnitV0 : ProcResource<1>; // FP/ASIMD 0
+def N1UnitV1 : ProcResource<1>; // FP/ASIMD 1
+
+def N1UnitI : ProcResGroup<[N1UnitS, N1UnitM]>; // Integer units
+def N1UnitV : ProcResGroup<[N1UnitV0, N1UnitV1]>; // FP/ASIMD units
+
+// Define commonly used read types.
+
+// No generic forwarding is provided for these types.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+
+//===----------------------------------------------------------------------===//
+// Define generic 0 micro-op types
+
+let Latency = 0, NumMicroOps = 0 in
+def N1Write_0c_0Z : SchedWriteRes<[]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 1 micro-op types
+
+def N1Write_1c_1B : SchedWriteRes<[N1UnitB]> { let Latency = 1; }
+def N1Write_1c_1I : SchedWriteRes<[N1UnitI]> { let Latency = 1; }
+def N1Write_2c_1M : SchedWriteRes<[N1UnitM]> { let Latency = 2; }
+def N1Write_3c_1M : SchedWriteRes<[N1UnitM]> { let Latency = 3; }
+def N1Write_4c3_1M : SchedWriteRes<[N1UnitM]> { let Latency = 4;
+ let ResourceCycles = [3]; }
+def N1Write_5c3_1M : SchedWriteRes<[N1UnitM]> { let Latency = 5;
+ let ResourceCycles = [3]; }
+def N1Write_12c5_1M : SchedWriteRes<[N1UnitM]> { let Latency = 12;
+ let ResourceCycles = [5]; }
+def N1Write_20c5_1M : SchedWriteRes<[N1UnitM]> { let Latency = 20;
+ let ResourceCycles = [5]; }
+def N1Write_4c_1L : SchedWriteRes<[N1UnitL]> { let Latency = 4; }
+def N1Write_5c_1L : SchedWriteRes<[N1UnitL]> { let Latency = 5; }
+def N1Write_7c_1L : SchedWriteRes<[N1UnitL]> { let Latency = 7; }
+def N1Write_2c_1V : SchedWriteRes<[N1UnitV]> { let Latency = 2; }
+def N1Write_3c_1V : SchedWriteRes<[N1UnitV]> { let Latency = 3; }
+def N1Write_4c_1V : SchedWriteRes<[N1UnitV]> { let Latency = 4; }
+def N1Write_5c_1V : SchedWriteRes<[N1UnitV]> { let Latency = 5; }
+def N1Write_2c_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 2; }
+def N1Write_3c_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 3; }
+def N1Write_4c_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 4; }
+def N1Write_7c7_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 7;
+ let ResourceCycles = [7]; }
+def N1Write_10c7_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 10;
+ let ResourceCycles = [7]; }
+def N1Write_13c10_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 13;
+ let ResourceCycles = [10]; }
+def N1Write_15c7_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 15;
+ let ResourceCycles = [7]; }
+def N1Write_17c7_1V0 : SchedWriteRes<[N1UnitV0]> { let Latency = 17;
+ let ResourceCycles = [7]; }
+def N1Write_2c_1V1 : SchedWriteRes<[N1UnitV1]> { let Latency = 2; }
+def N1Write_3c_1V1 : SchedWriteRes<[N1UnitV1]> { let Latency = 3; }
+def N1Write_4c_1V1 : SchedWriteRes<[N1UnitV1]> { let Latency = 4; }
+
+//===----------------------------------------------------------------------===//
+// Define generic 2 micro-op types
+
+let Latency = 1, NumMicroOps = 2 in
+def N1Write_1c_1B_1I : SchedWriteRes<[N1UnitB, N1UnitI]>;
+let Latency = 3, NumMicroOps = 2 in
+def N1Write_3c_1I_1M : SchedWriteRes<[N1UnitI, N1UnitM]>;
+let Latency = 2, NumMicroOps = 2 in
+def N1Write_2c_1I_1L : SchedWriteRes<[N1UnitI, N1UnitL]>;
+let Latency = 5, NumMicroOps = 2 in
+def N1Write_5c_1I_1L : SchedWriteRes<[N1UnitI, N1UnitL]>;
+let Latency = 6, NumMicroOps = 2 in
+def N1Write_6c_1I_1L : SchedWriteRes<[N1UnitI, N1UnitL]>;
+let Latency = 7, NumMicroOps = 2 in
+def N1Write_7c_1I_1L : SchedWriteRes<[N1UnitI, N1UnitL]>;
+let Latency = 5, NumMicroOps = 2 in
+def N1Write_5c_1M_1V : SchedWriteRes<[N1UnitM, N1UnitV]>;
+let Latency = 6, NumMicroOps = 2 in
+def N1Write_6c_1M_1V0 : SchedWriteRes<[N1UnitM, N1UnitV0]>;
+let Latency = 5, NumMicroOps = 2 in
+def N1Write_5c_2L : SchedWriteRes<[N1UnitL, N1UnitL]>;
+let Latency = 1, NumMicroOps = 2 in
+def N1Write_1c_1L_1D : SchedWriteRes<[N1UnitL, N1UnitD]>;
+let Latency = 2, NumMicroOps = 2 in
+def N1Write_2c_1L_1V : SchedWriteRes<[N1UnitL, N1UnitV]>;
+let Latency = 4, NumMicroOps = 2 in
+def N1Write_4c_1L_1V : SchedWriteRes<[N1UnitL, N1UnitV]>;
+let Latency = 7, NumMicroOps = 2 in
+def N1Write_7c_1L_1V : SchedWriteRes<[N1UnitL, N1UnitV]>;
+let Latency = 4, NumMicroOps = 2 in
+def N1Write_4c_1V0_1V1 : SchedWriteRes<[N1UnitV0, N1UnitV1]>;
+let Latency = 4, NumMicroOps = 2 in
+def N1Write_4c_2V0 : SchedWriteRes<[N1UnitV0, N1UnitV0]>;
+let Latency = 5, NumMicroOps = 2 in
+def N1Write_5c_2V0 : SchedWriteRes<[N1UnitV0, N1UnitV0]>;
+let Latency = 6, NumMicroOps = 2 in
+def N1Write_6c_2V1 : SchedWriteRes<[N1UnitV1, N1UnitV1]>;
+let Latency = 5, NumMicroOps = 2 in
+def N1Write_5c_1V1_1V : SchedWriteRes<[N1UnitV1, N1UnitV]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 3 micro-op types
+
+let Latency = 7, NumMicroOps = 3 in
+def N1Write_2c_1I_1L_1V : SchedWriteRes<[N1UnitI, N1UnitL, N1UnitV]>;
+let Latency = 1, NumMicroOps = 3 in
+def N1Write_1c_2L_1D : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitD]>;
+let Latency = 2, NumMicroOps = 3 in
+def N1Write_2c_1L_2V : SchedWriteRes<[N1UnitL, N1UnitV, N1UnitV]>;
+let Latency = 6, NumMicroOps = 3 in
+def N1Write_6c_3L : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL]>;
+let Latency = 4, NumMicroOps = 3 in
+def N1Write_4c_3V : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>;
+let Latency = 6, NumMicroOps = 3 in
+def N1Write_6c_3V : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>;
+let Latency = 8, NumMicroOps = 3 in
+def N1Write_8c_3V : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 4 micro-op types
+
+let Latency = 2, NumMicroOps = 4 in
+def N1Write_2c_2I_2L : SchedWriteRes<[N1UnitI, N1UnitI, N1UnitL, N1UnitL]>;
+let Latency = 6, NumMicroOps = 4 in
+def N1Write_6c_4L : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL]>;
+let Latency = 2, NumMicroOps = 4 in
+def N1Write_2c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
+let Latency = 2, NumMicroOps = 4 in
+def N1Write_3c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
+let Latency = 5, NumMicroOps = 4 in
+def N1Write_5c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
+let Latency = 7, NumMicroOps = 4 in
+def N1Write_7c_2L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitV, N1UnitV]>;
+let Latency = 4, NumMicroOps = 4 in
+def N1Write_4c_4V : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
+let Latency = 6, NumMicroOps = 4 in
+def N1Write_6c_4V0 : SchedWriteRes<[N1UnitV0, N1UnitV0, N1UnitV0, N1UnitV0]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 5 micro-op types
+
+let Latency = 3, NumMicroOps = 5 in
+def N1Write_3c_2L_3V : SchedWriteRes<[N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV, N1UnitV]>;
+let Latency = 7, NumMicroOps = 5 in
+def N1Write_7c_2L_3V : SchedWriteRes<[N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV, N1UnitV]>;
+let Latency = 6, NumMicroOps = 5 in
+def N1Write_6c_5V : SchedWriteRes<[N1UnitV, N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 6 micro-op types
+
+let Latency = 3, NumMicroOps = 6 in
+def N1Write_3c_4L_2V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV]>;
+let Latency = 4, NumMicroOps = 6 in
+def N1Write_4c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV, N1UnitV]>;
+let Latency = 5, NumMicroOps = 6 in
+def N1Write_5c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV, N1UnitV]>;
+let Latency = 6, NumMicroOps = 6 in
+def N1Write_6c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV, N1UnitV]>;
+let Latency = 7, NumMicroOps = 6 in
+def N1Write_7c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV, N1UnitV]>;
+let Latency = 8, NumMicroOps = 6 in
+def N1Write_8c_3L_3V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV, N1UnitV]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 7 micro-op types
+
+let Latency = 8, NumMicroOps = 7 in
+def N1Write_8c_3L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 8 micro-op types
+
+let Latency = 5, NumMicroOps = 8 in
+def N1Write_5c_4L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
+let Latency = 6, NumMicroOps = 8 in
+def N1Write_6c_4L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
+let Latency = 8, NumMicroOps = 8 in
+def N1Write_8c_4L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
+let Latency = 10, NumMicroOps = 8 in
+def N1Write_10c_4L_4V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV, N1UnitV, N1UnitV]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 12 micro-op types
+
+let Latency = 9, NumMicroOps = 12 in
+def N1Write_9c_6L_6V : SchedWriteRes<[N1UnitL, N1UnitL, N1UnitL,
+ N1UnitL, N1UnitL, N1UnitL,
+ N1UnitV, N1UnitV, N1UnitV,
+ N1UnitV, N1UnitV, N1UnitV]>;
+
+
+// Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// Convert floating-point condition flags
+// Flag manipulation instructions
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+
+// Branch, immed
+// Compare and branch
+def : SchedAlias<WriteBr, N1Write_1c_1B>;
+
+// Branch, register
+def : SchedAlias<WriteBrReg, N1Write_1c_1B>;
+
+// Branch and link, immed
+// Branch and link, register
+def : InstRW<[N1Write_1c_1B_1I], (instrs BL, BLR)>;
+
+// Compare and branch
+def : InstRW<[N1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>;
+
+
+// Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+
+// ALU, basic
+// ALU, basic, flagset
+// Conditional compare
+// Conditional select
+// Logical, basic
+// Address generation
+// Count leading
+// Reverse bits/bytes
+// Move immediate
+def : SchedAlias<WriteI, N1Write_1c_1I>;
+
+// ALU, extend and shift
+def : SchedAlias<WriteIEReg, N1Write_2c_1M>;
+
+// Arithmetic, LSL shift, shift <= 4
+// Arithmetic, flagset, LSL shift, shift <= 4
+// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
+def N1WriteISReg : SchedWriteVariant<[
+ SchedVar<IsCheapLSL, [N1Write_1c_1I]>,
+ SchedVar<NoSchedPred, [N1Write_2c_1M]>]>;
+def : SchedAlias<WriteISReg, N1WriteISReg>;
+
+// Logical, shift, no flagset
+def : InstRW<[N1Write_1c_1I],
+ (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+
+// Logical, shift, flagset
+def : InstRW<[N1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>;
+
+
+// Divide and multiply instructions
+// -----------------------------------------------------------------------------
+
+// Divide
+def : SchedAlias<WriteID32, N1Write_12c5_1M>;
+def : SchedAlias<WriteID64, N1Write_20c5_1M>;
+
+// Multiply accumulate
+// Multiply accumulate, long
+def : SchedAlias<WriteIM32, N1Write_2c_1M>;
+def : SchedAlias<WriteIM64, N1Write_4c3_1M>;
+
+// Multiply high
+def : InstRW<[N1Write_5c3_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>;
+
+
+// Miscellaneous data-processing instructions
+// -----------------------------------------------------------------------------
+
+// Bitfield extract, one reg
+// Bitfield extract, two regs
+def N1WriteExtr : SchedWriteVariant<[
+ SchedVar<IsRORImmIdiomPred, [N1Write_1c_1I]>,
+ SchedVar<NoSchedPred, [N1Write_3c_1I_1M]>]>;
+def : SchedAlias<WriteExtr, N1WriteExtr>;
+
+// Bitfield move, basic
+// Variable shift
+def : SchedAlias<WriteIS, N1Write_1c_1I>;
+
+// Bitfield move, insert
+def : InstRW<[N1Write_2c_1M], (instregex "^BFM[WX]ri$")>;
+
+// Move immediate
+def : SchedAlias<WriteImm, N1Write_1c_1I>;
+
+// Load instructions
+// -----------------------------------------------------------------------------
+
+// Load register, immed offset
+def : SchedAlias<WriteLD, N1Write_4c_1L>;
+
+// Load register, immed offset, index
+def : SchedAlias<WriteLDIdx, N1Write_4c_1L>;
+def : SchedAlias<WriteAdr, N1Write_1c_1I>;
+
+// Load pair, immed offset
+def : SchedAlias<WriteLDHi, N1Write_4c_1L>;
+
+// Load pair, immed offset, W-form
+def : InstRW<[N1Write_4c_1L, N1Write_0c_0Z], (instrs LDPWi, LDNPWi)>;
+
+// Load pair, signed immed offset, signed words
+def : InstRW<[N1Write_5c_1I_1L, N1Write_0c_0Z], (instrs LDPSWi)>;
+
+// Load pair, immed post or pre-index, signed words
+def : InstRW<[N1Write_5c_1I_1L, N1Write_0c_0Z, WriteAdr],
+ (instrs LDPSWpost, LDPSWpre)>;
+
+
+// Store instructions
+// -----------------------------------------------------------------------------
+
+// Store register, immed offset
+def : SchedAlias<WriteST, N1Write_1c_1L_1D>;
+
+// Store register, immed offset, index
+def : SchedAlias<WriteSTIdx, N1Write_1c_1L_1D>;
+
+// Store pair, immed offset
+def : SchedAlias<WriteSTP, N1Write_1c_2L_1D>;
+
+// Store pair, immed offset, W-form
+def : InstRW<[N1Write_1c_1L_1D], (instrs STPWi)>;
+
+
+// FP data processing instructions
+// -----------------------------------------------------------------------------
+
+// FP absolute value
+// FP arithmetic
+// FP min/max
+// FP negate
+// FP select
+def : SchedAlias<WriteF, N1Write_2c_1V>;
+
+// FP compare
+def : SchedAlias<WriteFCmp, N1Write_2c_1V0>;
+
+// FP divide
+// FP square root
+def : SchedAlias<WriteFDiv, N1Write_10c7_1V0>;
+
+// FP divide, H-form
+// FP square root, H-form
+def : InstRW<[N1Write_7c7_1V0], (instrs FDIVHrr, FSQRTHr)>;
+
+// FP divide, S-form
+// FP square root, S-form
+def : InstRW<[N1Write_10c7_1V0], (instrs FDIVSrr, FSQRTSr)>;
+
+// FP divide, D-form
+def : InstRW<[N1Write_15c7_1V0], (instrs FDIVDrr)>;
+
+// FP square root, D-form
+def : InstRW<[N1Write_17c7_1V0], (instrs FSQRTDr)>;
+
+// FP multiply
+def : SchedAlias<WriteFMul, N1Write_3c_1V>;
+
+// FP multiply accumulate
+def : InstRW<[N1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
+
+// FP round to integral
+def : InstRW<[N1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$",
+ "^FRINT(32|64)[XZ][SD]r$")>;
+
+
+// FP miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// FP convert, from vec to vec reg
+// FP convert, Javascript from vec to gen reg
+def : SchedAlias<WriteFCvt, N1Write_3c_1V>;
+
+// FP convert, from gen to vec reg
+def : InstRW<[N1Write_6c_1M_1V0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
+
+// FP convert, from vec to gen reg
+def : InstRW<[N1Write_4c_1V0_1V1], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
+
+// FP move, immed
+def : SchedAlias<WriteFImm, N1Write_2c_1V>;
+
+// FP move, register
+def : InstRW<[N1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
+
+// FP transfer, from gen to low half of vec reg
+// FP transfer, from gen to high half of vec reg
+def : InstRW<[N1Write_3c_1M], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
+ FMOVXDHighr)>;
+
+// FP transfer, from vec to gen reg
+def : SchedAlias<WriteFCopy, N1Write_2c_1V1>;
+
+
+// FP load instructions
+// -----------------------------------------------------------------------------
+
+// Load vector reg, literal, S/D/Q forms
+// Load vector reg, unscaled immed
+def : InstRW<[N1Write_5c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$",
+ "^LDUR[BHSDQ]i$")>;
+
+// Load vector reg, immed post-index
+// Load vector reg, immed pre-index
+def : InstRW<[N1Write_5c_1L, WriteAdr],
+ (instregex "^LDR[BHSDQ](post|pre)$")>;
+
+// Load vector reg, unsigned immed
+def : InstRW<[N1Write_5c_1I_1L], (instregex "^LDR[BHSDQ]ui$")>;
+
+// Load vector reg, register offset, basic
+// Load vector reg, register offset, scale, S/D-form
+// Load vector reg, register offset, extend
+// Load vector reg, register offset, extend, scale, S/D-form
+def : InstRW<[N1Write_5c_1I_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
+
+// Load vector reg, register offset, scale, H/Q-form
+// Load vector reg, register offset, extend, scale, H/Q-form
+def : InstRW<[N1Write_6c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
+
+// Load vector pair, immed offset, S/D-form
+def : InstRW<[N1Write_5c_1I_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
+
+// Load vector pair, immed offset, H/Q-form
+def : InstRW<[N1Write_7c_1I_1L, WriteLDHi], (instregex "^LDPN?[HQ]i$")>;
+
+// Load vector pair, immed post-index, S/D-form
+// Load vector pair, immed pre-index, S/D-form
+def : InstRW<[N1Write_5c_1L, WriteLDHi, WriteAdr],
+ (instregex "^LDP[SD](pre|post)$")>;
+
+// Load vector pair, immed post-index, Q-form
+// Load vector pair, immed pre-index, Q-form
+def : InstRW<[N1Write_7c_1L, WriteLDHi, WriteAdr],
+ (instrs LDPQpost, LDPQpre)>;
+
+
+// FP store instructions
+// -----------------------------------------------------------------------------
+
+// Store vector reg, unscaled immed, B/H/S/D-form
+def : InstRW<[N1Write_2c_1I_1L], (instregex "^STUR[BHSD]i$")>;
+
+// Store vector reg, unscaled immed, Q-form
+def : InstRW<[N1Write_2c_2I_2L], (instrs STURQi)>;
+
+// Store vector reg, immed post-index, B/H/S/D-form
+// Store vector reg, immed pre-index, B/H/S/D-form
+def : InstRW<[N1Write_2c_1L_1V, WriteAdr], (instregex "^STR[BHSD](pre|post)$")>;
+
+// Store vector reg, immed pre-index, Q-form
+// Store vector reg, immed post-index, Q-form
+def : InstRW<[N1Write_2c_2L_2V, WriteAdr], (instrs STRQpre, STRQpost)>;
+
+// Store vector reg, unsigned immed, B/H/S/D-form
+def : InstRW<[N1Write_2c_1L_1V], (instregex "^STR[BHSD]ui$")>;
+
+// Store vector reg, unsigned immed, Q-form
+def : InstRW<[N1Write_2c_2L_2V], (instrs STRQui)>;
+
+// Store vector reg, register offset, basic, B/S/D-form
+// Store vector reg, register offset, scale, B/S/D-form
+// Store vector reg, register offset, extend, B/S/D-form
+// Store vector reg, register offset, extend, scale, B/S/D-form
+def : InstRW<[N1Write_2c_1L_1V, ReadAdrBase], (instregex "^STR[BSD]ro[WX]$")>;
+
+// Store vector reg, register offset, basic, H-form
+// Store vector reg, register offset, scale, H-form
+// Store vector reg, register offset, extend, H-form
+// Store vector reg, register offset, extend, scale, H-form
+def : InstRW<[N1Write_2c_1I_1L_1V, ReadAdrBase], (instregex "^STRHro[WX]$")>;
+
+// Store vector reg, register offset, basic, Q-form
+// Store vector reg, register offset, scale, Q-form
+// Store vector reg, register offset, extend, Q-form
+// Store vector reg, register offset, extend, scale, Q-form
+def : InstRW<[N1Write_2c_2L_2V, ReadAdrBase], (instregex "^STRQro[WX]$")>;
+
+// Store vector pair, immed offset, S-form
+def : InstRW<[N1Write_2c_1L_1V], (instrs STPSi, STNPSi)>;
+
+// Store vector pair, immed offset, D-form
+def : InstRW<[N1Write_2c_2L_2V], (instrs STPDi, STNPDi)>;
+
+// Store vector pair, immed offset, Q-form
+def : InstRW<[N1Write_3c_4L_2V], (instrs STPQi, STNPQi)>;
+
+// Store vector pair, immed post-index, S-form
+// Store vector pair, immed pre-index, S-form
+def : InstRW<[N1Write_2c_1L_1V, WriteAdr], (instrs STPSpre, STPSpost)>;
+
+// Store vector pair, immed post-index, D-form
+// Store vector pair, immed pre-index, D-form
+def : InstRW<[N1Write_2c_2L_2V, WriteAdr], (instrs STPDpre, STPDpost)>;
+
+// Store vector pair, immed post-index, Q-form
+// Store vector pair, immed pre-index, Q-form
+def : InstRW<[N1Write_3c_4L_2V, WriteAdr], (instrs STPQpre, STPQpost)>;
+
+
+// ASIMD integer instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD absolute diff
+// ASIMD absolute diff long
+// ASIMD arith, basic
+// ASIMD arith, complex
+// ASIMD arith, pair-wise
+// ASIMD compare
+// ASIMD logical
+// ASIMD max/min, basic and pair-wise
+def : SchedAlias<WriteVd, N1Write_2c_1V>;
+def : SchedAlias<WriteVq, N1Write_2c_1V>;
+
+// ASIMD absolute diff accum
+// ASIMD absolute diff accum long
+def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]ABAL?v")>;
+
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[N1Write_3c_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
+
+// ASIMD arith, reduce, 8B/8H
+def : InstRW<[N1Write_5c_1V1_1V], (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
+
+// ASIMD arith, reduce, 16B
+def : InstRW<[N1Write_6c_2V1], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
+
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[N1Write_3c_1V1], (instregex "^[SU](MAX|MIN)Vv4(i16|i32)v$")>;
+
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[N1Write_5c_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8(i8|i16)v$")>;
+
+// ASIMD max/min, reduce, 16B
+def : InstRW<[N1Write_6c_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
+
+// ASIMD multiply, D-form
+// ASIMD multiply accumulate, D-form
+// ASIMD multiply accumulate high, D-form
+// ASIMD multiply accumulate saturating long
+// ASIMD multiply long
+// ASIMD multiply accumulate long
+def : InstRW<[N1Write_4c_1V0], (instregex "^MUL(v[14]i16|v[12]i32)$",
+ "^ML[AS](v[14]i16|v[12]i32)$",
+ "^SQ(R)?DMULH(v[14]i16|v[12]i32)$",
+ "^SQRDML[AS]H(v[14]i16|v[12]i32)$",
+ "^SQDML[AS]Lv",
+ "^([SU]|SQD)MULLv",
+ "^[SU]ML[AS]Lv")>;
+
+// ASIMD multiply, Q-form
+// ASIMD multiply accumulate, Q-form
+// ASIMD multiply accumulate high, Q-form
+def : InstRW<[N1Write_5c_2V0], (instregex "^MUL(v8i16|v4i32)$",
+ "^ML[AS](v8i16|v4i32)$",
+ "^SQ(R)?DMULH(v8i16|v4i32)$",
+ "^SQRDML[AS]H(v8i16|v4i32)$")>;
+
+// ASIMD multiply/multiply long (8x8) polynomial, D-form
+def : InstRW<[N1Write_3c_1V0], (instrs PMULv8i8, PMULLv8i8)>;
+
+// ASIMD multiply/multiply long (8x8) polynomial, Q-form
+def : InstRW<[N1Write_4c_2V0], (instrs PMULv16i8, PMULLv16i8)>;
+
+// ASIMD pairwise add and accumulate long
+def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]ADALPv")>;
+
+// ASIMD shift accumulate
+def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]R?SRAv")>;
+
+// ASIMD shift by immed, basic
+// ASIMD shift by immed and insert, basic
+// ASIMD shift by register, basic
+def : InstRW<[N1Write_2c_1V1], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv",
+ "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>;
+
+// ASIMD shift by immed, complex
+// ASIMD shift by register, complex
+def : InstRW<[N1Write_4c_1V1],
+ (instregex "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
+ "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
+ "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv",
+ "^[SU]Q?RSHLv", "^[SU]QSHLv")>;
+
+
+// ASIMD FP instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD FP absolute value/difference
+// ASIMD FP arith, normal
+// ASIMD FP compare
+// ASIMD FP max/min, normal
+// ASIMD FP max/min, pairwise
+// ASIMD FP negate
+// Covered by "SchedAlias (WriteV[dq]...)" above
+
+// ASIMD FP convert, long (F16 to F32)
+def : InstRW<[N1Write_4c_2V0], (instregex "^FCVTL(v4|v8)i16$")>;
+
+// ASIMD FP convert, long (F32 to F64)
+def : InstRW<[N1Write_3c_1V0], (instregex "^FCVTL(v2|v4)i32$")>;
+
+// ASIMD FP convert, narrow (F32 to F16)
+def : InstRW<[N1Write_4c_2V0], (instregex "^FCVTN(v4|v8)i16$")>;
+
+// ASIMD FP convert, narrow (F64 to F32)
+def : InstRW<[N1Write_3c_1V0], (instregex "^FCVTN(v2|v4)i32$",
+ "^FCVTXN(v2|v4)f32$")>;
+
+// ASIMD FP convert, other, D-form F32 and Q-form F64
+def : InstRW<[N1Write_3c_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
+ "^[SU]CVTFv2f(32|64)$")>;
+
+// ASIMD FP convert, other, D-form F16 and Q-form F32
+def : InstRW<[N1Write_4c_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
+ "^[SU]CVTFv4f(16|32)$")>;
+
+// ASIMD FP convert, other, Q-form F16
+def : InstRW<[N1Write_6c_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
+ "^[SU]CVTFv8f16$")>;
+
+// ASIMD FP divide, D-form, F16
+// ASIMD FP square root, D-form, F16
+def : InstRW<[N1Write_7c7_1V0], (instrs FDIVv4f16, FSQRTv4f16)>;
+
+// ASIMD FP divide, D-form, F32
+// ASIMD FP square root, D-form, F32
+def : InstRW<[N1Write_10c7_1V0], (instrs FDIVv2f32, FSQRTv2f32)>;
+
+// ASIMD FP divide, Q-form, F16
+// ASIMD FP square root, Q-form, F16
+def : InstRW<[N1Write_13c10_1V0], (instrs FDIVv8f16, FSQRTv8f16)>;
+
+// ASIMD FP divide, Q-form, F32
+// ASIMD FP square root, Q-form, F32
+def : InstRW<[N1Write_10c7_1V0], (instrs FDIVv4f32, FSQRTv4f32)>;
+
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[N1Write_15c7_1V0], (instrs FDIVv2f64)>;
+
+// ASIMD FP square root, Q-form, F64
+def : InstRW<[N1Write_17c7_1V0], (instrs FSQRTv2f64)>;
+
+// ASIMD FP max/min, reduce, F32 and D-form F16
+def : InstRW<[N1Write_5c_1V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>;
+
+// ASIMD FP max/min, reduce, Q-form F16
+def : InstRW<[N1Write_8c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>;
+
+// ASIMD FP multiply
+def : InstRW<[N1Write_3c_1V], (instregex "^FMULX?v")>;
+
+// ASIMD FP multiply accumulate
+def : InstRW<[N1Write_4c_1V], (instregex "^FML[AS]v")>;
+
+// ASIMD FP multiply accumulate long
+def : InstRW<[N1Write_5c_1V], (instregex "^FML[AS]L2?v")>;
+
+// ASIMD FP round, D-form F32 and Q-form F64
+def : InstRW<[N1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>;
+
+// ASIMD FP round, D-form F16 and Q-form F32
+def : InstRW<[N1Write_4c_2V0], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>;
+
+// ASIMD FP round, Q-form F16
+def : InstRW<[N1Write_6c_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
+
+
+// ASIMD miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD bit reverse
+// ASIMD bitwise insert
+// ASIMD count
+// ASIMD duplicate, element
+// ASIMD extract
+// ASIMD extract narrow
+// ASIMD insert, element to element
+// ASIMD move, FP immed
+// ASIMD move, integer immed
+// ASIMD reverse
+// ASIMD table lookup, 1 or 2 table regs
+// ASIMD table lookup extension, 1 table reg
+// ASIMD transfer, element to gen reg
+// ASIMD transpose
+// ASIMD unzip/zip
+// Covered by "SchedAlias (WriteV[dq]...)" above
+
+// ASIMD duplicate, gen reg
+def : InstRW<[N1Write_3c_1M],
+ (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>;
+
+// ASIMD extract narrow, saturating
+def : InstRW<[N1Write_4c_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
+
+// ASIMD reciprocal and square root estimate, D-form F32 and F64
+def : InstRW<[N1Write_3c_1V0], (instrs FRECPEv1i32, FRECPEv2f32, FRECPEv1i64,
+ FRECPXv1i32, FRECPXv1i64,
+ URECPEv2i32,
+ FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64,
+ URSQRTEv2i32)>;
+
+// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
+def : InstRW<[N1Write_4c_2V0], (instrs FRECPEv1f16, FRECPEv4f16, FRECPEv4f32,
+ FRECPXv1f16,
+ URECPEv4i32,
+ FRSQRTEv1f16, FRSQRTEv4f16, FRSQRTEv4f32,
+ URSQRTEv4i32)>;
+
+// ASIMD reciprocal and square root estimate, Q-form F16
+def : InstRW<[N1Write_6c_4V0], (instrs FRECPEv8f16,
+ FRSQRTEv8f16)>;
+
+// ASIMD reciprocal step
+def : InstRW<[N1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv",
+ "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>;
+
+// ASIMD table lookup, 3 table regs
+// ASIMD table lookup extension, 2 table reg
+def : InstRW<[N1Write_4c_4V], (instrs TBLv8i8Three, TBLv16i8Three,
+ TBXv8i8Two, TBXv16i8Two)>;
+
+// ASIMD table lookup, 4 table regs
+def : InstRW<[N1Write_4c_3V], (instrs TBLv8i8Four, TBLv16i8Four)>;
+
+// ASIMD table lookup extension, 3 table reg
+def : InstRW<[N1Write_6c_3V], (instrs TBXv8i8Three, TBXv16i8Three)>;
+
+// ASIMD table lookup extension, 4 table reg
+def : InstRW<[N1Write_6c_5V], (instrs TBXv8i8Four, TBXv16i8Four)>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[N1Write_2c_1V1], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$",
+ "^UMOVvi(8|16|32|64)$")>;
+
+// ASIMD transfer, gen reg to element
+def : InstRW<[N1Write_5c_1M_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
+
+
+// ASIMD load instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD load, 1 element, multiple, 1 reg
+def : InstRW<[N1Write_5c_1L],
+ (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[N1Write_5c_1L, WriteAdr],
+ (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg
+def : InstRW<[N1Write_5c_2L],
+ (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[N1Write_5c_2L, WriteAdr],
+ (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg
+def : InstRW<[N1Write_6c_3L],
+ (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[N1Write_6c_3L, WriteAdr],
+ (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg
+def : InstRW<[N1Write_6c_4L],
+ (instregex "^LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[N1Write_6c_4L, WriteAdr],
+ (instregex "^LD1Fourv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 1 element, one lane
+// ASIMD load, 1 element, all lanes
+def : InstRW<[N1Write_7c_1L_1V],
+ (instregex "LD1(i|Rv)(8|16|32|64)$",
+ "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[N1Write_7c_1L_1V, WriteAdr],
+ (instregex "LD1i(8|16|32|64)_POST$",
+ "LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 2 element, multiple
+// ASIMD load, 2 element, one lane
+// ASIMD load, 2 element, all lanes
+def : InstRW<[N1Write_7c_2L_2V],
+ (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)$",
+ "LD2i(8|16|32|64)$",
+ "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[N1Write_7c_2L_2V, WriteAdr],
+ (instregex "LD2Twov(8b|16b|4h|8h|2s|4s|2d)_POST$",
+ "LD2i(8|16|32|64)_POST$",
+ "LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 3 element, multiple
+def : InstRW<[N1Write_8c_3L_3V],
+ (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)$")>;
+def : InstRW<[N1Write_8c_3L_3V, WriteAdr],
+ (instregex "LD3Threev(8b|16b|4h|8h|2s|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, one lane
+// ASIMD load, 3 element, all lanes
+def : InstRW<[N1Write_7c_2L_3V],
+ (instregex "LD3i(8|16|32|64)$",
+ "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[N1Write_7c_2L_3V, WriteAdr],
+ (instregex "LD3i(8|16|32|64)_POST$",
+ "LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 4 element, multiple, D-form
+def : InstRW<[N1Write_8c_3L_4V],
+ (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[N1Write_8c_3L_4V, WriteAdr],
+ (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 4 element, multiple, Q-form
+def : InstRW<[N1Write_10c_4L_4V],
+ (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[N1Write_10c_4L_4V, WriteAdr],
+ (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, one lane
+// ASIMD load, 4 element, all lanes
+def : InstRW<[N1Write_8c_4L_4V],
+ (instregex "LD4i(8|16|32|64)$",
+ "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[N1Write_8c_4L_4V, WriteAdr],
+ (instregex "LD4i(8|16|32|64)_POST$",
+ "LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+
+// ASIMD store instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD store, 1 element, multiple, 1 reg, D-form
+def : InstRW<[N1Write_2c_1L_1V],
+ (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[N1Write_2c_1L_1V, WriteAdr],
+ (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[N1Write_2c_1L_1V],
+ (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[N1Write_2c_1L_1V, WriteAdr],
+ (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, D-form
+def : InstRW<[N1Write_2c_1L_2V],
+ (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[N1Write_2c_1L_2V, WriteAdr],
+ (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[N1Write_3c_2L_2V],
+ (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[N1Write_3c_2L_2V, WriteAdr],
+ (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, D-form
+def : InstRW<[N1Write_3c_2L_3V],
+ (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[N1Write_3c_2L_3V, WriteAdr],
+ (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[N1Write_4c_3L_3V],
+ (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[N1Write_4c_3L_3V, WriteAdr],
+ (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, D-form
+def : InstRW<[N1Write_3c_2L_2V],
+ (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[N1Write_3c_2L_2V, WriteAdr],
+ (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[N1Write_5c_4L_4V],
+ (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[N1Write_5c_4L_4V, WriteAdr],
+ (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, one lane
+def : InstRW<[N1Write_4c_1L_1V],
+ (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[N1Write_4c_1L_1V, WriteAdr],
+ (instregex "ST1i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 2 element, multiple, D-form, B/H/S
+def : InstRW<[N1Write_4c_1L_1V],
+ (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[N1Write_4c_1L_1V, WriteAdr],
+ (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 2 element, multiple, Q-form
+def : InstRW<[N1Write_5c_2L_2V],
+ (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[N1Write_5c_2L_2V, WriteAdr],
+ (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 2 element, one lane
+def : InstRW<[N1Write_4c_1L_1V],
+ (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[N1Write_4c_1L_1V, WriteAdr],
+ (instregex "ST2i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 3 element, multiple, D-form, B/H/S
+def : InstRW<[N1Write_5c_2L_2V],
+ (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[N1Write_5c_2L_2V, WriteAdr],
+ (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 3 element, multiple, Q-form
+def : InstRW<[N1Write_6c_3L_3V],
+ (instregex "ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[N1Write_6c_3L_3V, WriteAdr],
+ (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 3 element, one lane, B/H/S
+def : InstRW<[N1Write_4c_3L_3V],
+ (instregex "ST3i(8|16|32)$")>;
+def : InstRW<[N1Write_4c_3L_3V, WriteAdr],
+ (instregex "ST3i(8|16|32)_POST$")>;
+
+// ASIMD store, 3 element, one lane, D
+def : InstRW<[N1Write_5c_3L_3V],
+ (instrs ST3i64)>;
+def : InstRW<[N1Write_5c_3L_3V, WriteAdr],
+ (instrs ST3i64_POST)>;
+
+// ASIMD store, 4 element, multiple, D-form, B/H/S
+def : InstRW<[N1Write_7c_3L_3V],
+ (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[N1Write_7c_3L_3V, WriteAdr],
+ (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 4 element, multiple, Q-form, B/H/S
+def : InstRW<[N1Write_9c_6L_6V],
+ (instregex "ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[N1Write_9c_6L_6V, WriteAdr],
+ (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+
+// ASIMD store, 4 element, multiple, Q-form, D
+def : InstRW<[N1Write_6c_4L_4V],
+ (instrs ST4Fourv2d)>;
+def : InstRW<[N1Write_6c_4L_4V, WriteAdr],
+ (instrs ST4Fourv2d_POST)>;
+
+// ASIMD store, 4 element, one lane, B/H/S
+def : InstRW<[N1Write_5c_3L_3V],
+ (instregex "ST4i(8|16|32)$")>;
+def : InstRW<[N1Write_5c_3L_3V, WriteAdr],
+ (instregex "ST4i(8|16|32)_POST$")>;
+
+// ASIMD store, 4 element, one lane, D
+def : InstRW<[N1Write_4c_3L_3V],
+ (instrs ST4i64)>;
+def : InstRW<[N1Write_4c_3L_3V, WriteAdr],
+ (instrs ST4i64_POST)>;
+
+
+// Cryptography extensions
+// -----------------------------------------------------------------------------
+
+// Crypto AES ops
+def N1WriteVC : WriteSequence<[N1Write_2c_1V0]>;
+def N1ReadVC : SchedReadAdvance<2, [N1WriteVC]>;
+def : InstRW<[N1WriteVC], (instrs AESDrr, AESErr)>;
+def : InstRW<[N1Write_2c_1V0, N1ReadVC], (instrs AESMCrr, AESIMCrr)>;
+
+// Crypto polynomial (64x64) multiply long
+// Crypto SHA1 hash acceleration op
+// Crypto SHA1 schedule acceleration ops
+// Crypto SHA256 schedule acceleration ops
+def : InstRW<[N1Write_2c_1V0], (instregex "^PMULLv[12]i64$",
+ "^SHA1(H|SU0|SU1)rr",
+ "^SHA256SU[01]rr")>;
+
+// Crypto SHA1 hash acceleration ops
+// Crypto SHA256 hash acceleration ops
+def : InstRW<[N1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$",
+ "^SHA256H2?rrr$")>;
+
+
+// CRC
+// -----------------------------------------------------------------------------
+
+// CRC checksum ops
+def : InstRW<[N1Write_2c_1M], (instregex "^CRC32C?[BHWX]rr$")>;
+
+
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
index 21a0e927d756..6bb71f2ce236 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td
@@ -609,6 +609,16 @@ def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
let NumMicroOps = 27;
}
+//===----------------------------------------------------------------------===//
+// Define types for arithmetic and logical ops with short shifts
+def N2Write_Arith : SchedWriteVariant<[
+ SchedVar<IsCheapLSL, [N2Write_1cyc_1I]>,
+ SchedVar<NoSchedPred, [N2Write_2cyc_1M]>]>;
+
+def N2Write_Logical: SchedWriteVariant<[
+ SchedVar<NeoverseNoLSL, [N2Write_1cyc_1I]>,
+ SchedVar<NoSchedPred, [N2Write_2cyc_1M]>]>;
+
// Miscellaneous
// -----------------------------------------------------------------------------
@@ -636,9 +646,20 @@ def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>;
def : SchedAlias<WriteI, N2Write_1cyc_1I>;
// ALU, extend and shift
-def : SchedAlias<WriteISReg, N2Write_2cyc_1M>;
def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>;
+// Arithmetic, LSL shift, shift <= 4
+// Arithmetic, flagset, LSL shift, shift <= 4
+// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
+def : SchedAlias<WriteISReg, N2Write_Arith>;
+
+// Logical, shift, no flagset
+def : InstRW<[N2Write_1cyc_1I],
+ (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+
+// Logical, shift, flagset
+def : InstRW<[N2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
+
// Arithmetic, immediate to logical address tag
def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>;
@@ -758,8 +779,8 @@ def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
// Store allocation tag to two granules, zeroing, signed offset
// Store allocation tag and reg pair to memory, signed offset
// Store multiple allocation tags
-def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGOffset, ST2GOffset, STZGOffset,
- STZ2GOffset, STGPi, STGM, STZGM)>;
+def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi,
+ STZ2Gi, STGPi, STGM, STZGM)>;
// FP data processing instructions
// -----------------------------------------------------------------------------
@@ -1210,7 +1231,7 @@ def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>;
def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>;
// ASIMD transfer, gen reg to element
-def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSv")>;
+def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
// ASIMD load instructions
// -----------------------------------------------------------------------------
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
new file mode 100644
index 000000000000..571f290bbf83
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td
@@ -0,0 +1,1861 @@
+//=- AArch64SchedNeoverseV1.td - NeoverseV1 Scheduling Model -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the Arm Neoverse V1 processors.
+//
+// References:
+// - "Arm Neoverse V1 Software Optimization Guide"
+// - "Arm Neoverse V1 Platform: Unleashing a new performance tier for Arm-based computing"
+// https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/neoverse-v1-platform-a-new-performance-tier-for-arm
+// - "Neoverse V1"
+// https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_v1
+
+//
+//===----------------------------------------------------------------------===//
+
+def NeoverseV1Model : SchedMachineModel {
+ let IssueWidth = 15; // Maximum micro-ops dispatch rate.
+ let MicroOpBufferSize = 256; // Micro-op re-order buffer.
+ let LoadLatency = 4; // Optimistic load latency.
+ let MispredictPenalty = 11; // Cycles cost of branch mispredicted.
+ let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F,
+ SMEUnsupported.F,
+ [HasMTE]);
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Neoverse V1.
+// Instructions are first fetched and then decoded into internal macro-ops
+// (MOPs). From there, the MOPs proceed through register renaming and dispatch
+// stages. A MOP can be split into one or more micro-ops further down the
+// pipeline, after the decode stage. Once dispatched, micro-ops wait for their
+// operands and issue out-of-order to one of the issue pipelines. Each issue
+// pipeline can accept one micro-op per cycle.
+
+let SchedModel = NeoverseV1Model in {
+
+// Define the issue ports.
+def V1UnitB : ProcResource<2>; // Branch 0/1
+def V1UnitS : ProcResource<2>; // Integer single cycle 0/1
+def V1UnitM0 : ProcResource<1>; // Integer multicycle 0
+def V1UnitM1 : ProcResource<1>; // Integer multicycle 1
+def V1UnitL01 : ProcResource<2>; // Load/Store 0/1
+def V1UnitL2 : ProcResource<1>; // Load 2
+def V1UnitD : ProcResource<2>; // Store data 0/1
+def V1UnitV0 : ProcResource<1>; // FP/ASIMD 0
+def V1UnitV1 : ProcResource<1>; // FP/ASIMD 1
+def V1UnitV2 : ProcResource<1>; // FP/ASIMD 2
+def V1UnitV3 : ProcResource<1>; // FP/ASIMD 3
+
+def V1UnitI : ProcResGroup<[V1UnitS,
+ V1UnitM0, V1UnitM1]>; // Integer units
+def V1UnitJ : ProcResGroup<[V1UnitS, V1UnitM0]>; // Integer 0-2 units
+def V1UnitM : ProcResGroup<[V1UnitM0, V1UnitM1]>; // Integer multicycle units
+def V1UnitL : ProcResGroup<[V1UnitL01, V1UnitL2]>; // Load units
+def V1UnitV : ProcResGroup<[V1UnitV0, V1UnitV1,
+ V1UnitV2, V1UnitV3]>; // FP/ASIMD units
+def V1UnitV01 : ProcResGroup<[V1UnitV0, V1UnitV1]>; // FP/ASIMD 0/1 units
+def V1UnitV02 : ProcResGroup<[V1UnitV0, V1UnitV2]>; // FP/ASIMD 0/2 units
+def V1UnitV13 : ProcResGroup<[V1UnitV1, V1UnitV3]>; // FP/ASIMD 1/3 units
+
+// Define commonly used read types.
+
+// No generic forwarding is provided for these types.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+
+
+//===----------------------------------------------------------------------===//
+// Define generic 0 micro-op types
+
+let Latency = 0, NumMicroOps = 0 in
+def V1Write_0c_0Z : SchedWriteRes<[]>;
+
+
+//===----------------------------------------------------------------------===//
+// Define generic 1 micro-op types
+
+def V1Write_1c_1B : SchedWriteRes<[V1UnitB]> { let Latency = 1; }
+def V1Write_1c_1I : SchedWriteRes<[V1UnitI]> { let Latency = 1; }
+def V1Write_1c_1J : SchedWriteRes<[V1UnitJ]> { let Latency = 1; }
+def V1Write_4c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 4; }
+def V1Write_6c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6; }
+def V1Write_1c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 1; }
+def V1Write_4c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 4; }
+def V1Write_6c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 6; }
+def V1Write_2c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 2; }
+def V1Write_3c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 3; }
+def V1Write_4c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 4; }
+def V1Write_1c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 1; }
+def V1Write_2c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 2; }
+def V1Write_3c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 3; }
+def V1Write_5c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 5; }
+def V1Write_12c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 12;
+ let ResourceCycles = [5]; }
+def V1Write_20c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 20;
+ let ResourceCycles = [5]; }
+def V1Write_2c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 2; }
+def V1Write_3c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 3; }
+def V1Write_4c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4; }
+def V1Write_5c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 5; }
+def V1Write_2c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 2; }
+def V1Write_3c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 3; }
+def V1Write_4c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 4; }
+def V1Write_6c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 6; }
+def V1Write_10c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 10;
+ let ResourceCycles = [7]; }
+def V1Write_12c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 12;
+ let ResourceCycles = [7]; }
+def V1Write_13c10_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 13;
+ let ResourceCycles = [10]; }
+def V1Write_15c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 15;
+ let ResourceCycles = [7]; }
+def V1Write_16c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 16;
+ let ResourceCycles = [7]; }
+def V1Write_20c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 20;
+ let ResourceCycles = [7]; }
+def V1Write_2c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 2; }
+def V1Write_3c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
+def V1Write_4c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
+def V1Write_5c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
+def V1Write_3c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 3; }
+def V1Write_4c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
+def V1Write_7c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
+ let ResourceCycles = [7]; }
+def V1Write_10c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
+ let ResourceCycles = [7]; }
+def V1Write_13c5_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
+ let ResourceCycles = [5]; }
+def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
+ let ResourceCycles = [11]; }
+def V1Write_15c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
+ let ResourceCycles = [7]; }
+def V1Write_16c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
+ let ResourceCycles = [7]; }
+def V1Write_2c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 2; }
+def V1Write_3c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 3; }
+def V1Write_4c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 4; }
+def V1Write_2c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 2; }
+def V1Write_4c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
+
+//===----------------------------------------------------------------------===//
+// Define generic 2 micro-op types
+
+let Latency = 1, NumMicroOps = 2 in
+def V1Write_1c_1B_1S : SchedWriteRes<[V1UnitB, V1UnitS]>;
+let Latency = 6, NumMicroOps = 2 in
+def V1Write_6c_1B_1M0 : SchedWriteRes<[V1UnitB, V1UnitM0]>;
+let Latency = 3, NumMicroOps = 2 in
+def V1Write_3c_1I_1M : SchedWriteRes<[V1UnitI, V1UnitM]>;
+let Latency = 5, NumMicroOps = 2 in
+def V1Write_5c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>;
+let Latency = 7, NumMicroOps = 2 in
+def V1Write_7c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>;
+let Latency = 6, NumMicroOps = 2 in
+def V1Write_6c_2L : SchedWriteRes<[V1UnitL, V1UnitL]>;
+let Latency = 6, NumMicroOps = 2 in
+def V1Write_6c_1L_1M : SchedWriteRes<[V1UnitL, V1UnitM]>;
+let Latency = 8, NumMicroOps = 2 in
+def V1Write_8c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>;
+let Latency = 9, NumMicroOps = 2 in
+def V1Write_9c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>;
+let Latency = 11, NumMicroOps = 2 in
+def V1Write_11c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>;
+let Latency = 1, NumMicroOps = 2 in
+def V1Write_1c_1L01_1D : SchedWriteRes<[V1UnitL01, V1UnitD]>;
+let Latency = 6, NumMicroOps = 2 in
+def V1Write_6c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>;
+let Latency = 7, NumMicroOps = 2 in
+def V1Write_7c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>;
+let Latency = 2, NumMicroOps = 2 in
+def V1Write_2c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>;
+let Latency = 4, NumMicroOps = 2 in
+def V1Write_4c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>;
+let Latency = 6, NumMicroOps = 2 in
+def V1Write_6c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>;
+let Latency = 2, NumMicroOps = 2 in
+def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
+let Latency = 4, NumMicroOps = 2 in
+def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
+let Latency = 2, NumMicroOps = 2 in
+def V1Write_2c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
+let Latency = 3, NumMicroOps = 2 in
+def V1Write_3c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
+let Latency = 9, NumMicroOps = 2 in
+def V1Write_9c_1M0_1L : SchedWriteRes<[V1UnitM0, V1UnitL]>;
+let Latency = 5, NumMicroOps = 2 in
+def V1Write_5c_1M0_1V : SchedWriteRes<[V1UnitM0, V1UnitV]>;
+let Latency = 4, NumMicroOps = 2 in
+def V1Write_4c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]>;
+let Latency = 7, NumMicroOps = 2 in
+def V1Write_7c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
+let Latency = 5, NumMicroOps = 2 in
+def V1Write_5c_1M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitV01]>;
+let Latency = 6, NumMicroOps = 2 in
+def V1Write_6c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
+let Latency = 9, NumMicroOps = 2 in
+def V1Write_9c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
+let Latency = 4, NumMicroOps = 2 in
+def V1Write_4c_2V : SchedWriteRes<[V1UnitV, V1UnitV]>;
+let Latency = 8, NumMicroOps = 2 in
+def V1Write_8c_1V_1V01 : SchedWriteRes<[V1UnitV, V1UnitV01]>;
+let Latency = 4, NumMicroOps = 2 in
+def V1Write_4c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
+let Latency = 5, NumMicroOps = 2 in
+def V1Write_5c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
+let Latency = 2, NumMicroOps = 2 in
+def V1Write_2c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
+let Latency = 4, NumMicroOps = 2 in
+def V1Write_4c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
+let Latency = 4, NumMicroOps = 2 in
+def V1Write_4c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
+let Latency = 6, NumMicroOps = 2 in
+def V1Write_6c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
+let Latency = 4, NumMicroOps = 2 in
+def V1Write_4c_1V13_1V : SchedWriteRes<[V1UnitV13, V1UnitV]>;
+let Latency = 4, NumMicroOps = 2 in
+def V1Write_4c_2V13 : SchedWriteRes<[V1UnitV13, V1UnitV13]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 3 micro-op types
+
+let Latency = 2, NumMicroOps = 3 in
+def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]>;
+let Latency = 7, NumMicroOps = 3 in
+def V1Write_7c_2M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitM0, V1UnitV01]>;
+let Latency = 8, NumMicroOps = 3 in
+def V1Write_8c_1L_2V : SchedWriteRes<[V1UnitL, V1UnitV, V1UnitV]>;
+let Latency = 6, NumMicroOps = 3 in
+def V1Write_6c_3L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL]>;
+let Latency = 2, NumMicroOps = 3 in
+def V1Write_2c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
+let Latency = 4, NumMicroOps = 3 in
+def V1Write_4c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
+let Latency = 2, NumMicroOps = 3 in
+def V1Write_2c_2L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitV01]>;
+let Latency = 6, NumMicroOps = 3 in
+def V1Write_6c_3V : SchedWriteRes<[V1UnitV, V1UnitV, V1UnitV]>;
+let Latency = 4, NumMicroOps = 3 in
+def V1Write_4c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
+let Latency = 6, NumMicroOps = 3 in
+def V1Write_6c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
+let Latency = 8, NumMicroOps = 3 in
+def V1Write_8c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 4 micro-op types
+
+let Latency = 8, NumMicroOps = 4 in
+def V1Write_8c_2M0_2V0 : SchedWriteRes<[V1UnitM0, V1UnitM0,
+ V1UnitV0, V1UnitV0]>;
+let Latency = 7, NumMicroOps = 4 in
+def V1Write_7c_4L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, V1UnitL]>;
+let Latency = 8, NumMicroOps = 4 in
+def V1Write_8c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL,
+ V1UnitV, V1UnitV]>;
+let Latency = 9, NumMicroOps = 4 in
+def V1Write_9c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL,
+ V1UnitV, V1UnitV]>;
+let Latency = 11, NumMicroOps = 4 in
+def V1Write_11c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL,
+ V1UnitV, V1UnitV]>;
+let Latency = 10, NumMicroOps = 4 in
+def V1Write_10c_2L01_2V : SchedWriteRes<[V1UnitL01, V1UnitL01,
+ V1UnitV, V1UnitV]>;
+let Latency = 2, NumMicroOps = 4 in
+def V1Write_2c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
+ V1UnitV01, V1UnitV01]>;
+let Latency = 4, NumMicroOps = 4 in
+def V1Write_4c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
+ V1UnitV01, V1UnitV01]>;
+let Latency = 8, NumMicroOps = 4 in
+def V1Write_8c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
+ V1UnitV01, V1UnitV01]>;
+let Latency = 9, NumMicroOps = 4 in
+def V1Write_9c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
+ V1UnitV01, V1UnitV01]>;
+let Latency = 10, NumMicroOps = 4 in
+def V1Write_10c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
+ V1UnitV01, V1UnitV01]>;
+let Latency = 10, NumMicroOps = 4 in
+def V1Write_10c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01,
+ V1UnitV1, V1UnitV1]>;
+let Latency = 12, NumMicroOps = 4 in
+def V1Write_12c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01,
+ V1UnitV1, V1UnitV1]>;
+let Latency = 6, NumMicroOps = 4 in
+def V1Write_6c_4V0 : SchedWriteRes<[V1UnitV0, V1UnitV0,
+ V1UnitV0, V1UnitV0]>;
+let Latency = 12, NumMicroOps = 4 in
+def V1Write_12c_4V01 : SchedWriteRes<[V1UnitV01, V1UnitV01,
+ V1UnitV01, V1UnitV01]>;
+let Latency = 6, NumMicroOps = 4 in
+def V1Write_6c_4V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 5 micro-op types
+
+let Latency = 8, NumMicroOps = 5 in
+def V1Write_8c_2L_3V : SchedWriteRes<[V1UnitL, V1UnitL,
+ V1UnitV, V1UnitV, V1UnitV]>;
+let Latency = 14, NumMicroOps = 5 in
+def V1Write_14c_1V_1V0_2V1_1V13 : SchedWriteRes<[V1UnitV,
+ V1UnitV0,
+ V1UnitV1, V1UnitV1,
+ V1UnitV13]>;
+let Latency = 9, NumMicroOps = 5 in
+def V1Write_9c_1V_4V01 : SchedWriteRes<[V1UnitV,
+ V1UnitV01, V1UnitV01,
+ V1UnitV01, V1UnitV01]>;
+let Latency = 6, NumMicroOps = 5 in
+def V1Write_6c_5V01 : SchedWriteRes<[V1UnitV01, V1UnitV01,
+ V1UnitV01, V1UnitV01, V1UnitV01]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 6 micro-op types
+
+let Latency = 6, NumMicroOps = 6 in
+def V1Write_6c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
+ V1UnitV, V1UnitV, V1UnitV]>;
+let Latency = 8, NumMicroOps = 6 in
+def V1Write_8c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
+ V1UnitV, V1UnitV, V1UnitV]>;
+let Latency = 2, NumMicroOps = 6 in
+def V1Write_2c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitV01, V1UnitV01, V1UnitV01]>;
+let Latency = 5, NumMicroOps = 6 in
+def V1Write_5c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitV01, V1UnitV01, V1UnitV01]>;
+let Latency = 6, NumMicroOps = 6 in
+def V1Write_6c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitV01, V1UnitV01, V1UnitV01]>;
+let Latency = 11, NumMicroOps = 6 in
+def V1Write_11c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitV01, V1UnitV01, V1UnitV01]>;
+let Latency = 11, NumMicroOps = 6 in
+def V1Write_11c_1V_5V01 : SchedWriteRes<[V1UnitV,
+ V1UnitV01, V1UnitV01,
+ V1UnitV01, V1UnitV01, V1UnitV01]>;
+let Latency = 13, NumMicroOps = 6 in
+def V1Write_13c_6V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01,
+ V1UnitV01, V1UnitV01, V1UnitV01]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 7 micro-op types
+
+let Latency = 8, NumMicroOps = 7 in
+def V1Write_8c_3L_4V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
+ V1UnitV, V1UnitV, V1UnitV, V1UnitV]>;
+let Latency = 8, NumMicroOps = 7 in
+def V1Write_13c_3L01_1S_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitS,
+ V1UnitV01, V1UnitV01, V1UnitV01]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 8 micro-op types
+
+let Latency = 9, NumMicroOps = 8 in
+def V1Write_9c_4L_4V : SchedWriteRes<[V1UnitL, V1UnitL,
+ V1UnitL, V1UnitL,
+ V1UnitV, V1UnitV,
+ V1UnitV, V1UnitV]>;
+let Latency = 2, NumMicroOps = 8 in
+def V1Write_2c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
+ V1UnitL01, V1UnitL01,
+ V1UnitV01, V1UnitV01,
+ V1UnitV01, V1UnitV01]>;
+let Latency = 4, NumMicroOps = 8 in
+def V1Write_4c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
+ V1UnitL01, V1UnitL01,
+ V1UnitV01, V1UnitV01,
+ V1UnitV01, V1UnitV01]>;
+let Latency = 12, NumMicroOps = 8 in
+def V1Write_12c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
+ V1UnitL01, V1UnitL01,
+ V1UnitV01, V1UnitV01,
+ V1UnitV01, V1UnitV01]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 10 micro-op types
+
+let Latency = 13, NumMicroOps = 10 in
+def V1Write_13c_4L01_2S_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
+ V1UnitL01, V1UnitL01,
+ V1UnitS, V1UnitS,
+ V1UnitV01, V1UnitV01,
+ V1UnitV01, V1UnitV01]>;
+let Latency = 7, NumMicroOps = 10 in
+def V1Write_7c_5L01_5V : SchedWriteRes<[V1UnitL01, V1UnitL01,
+ V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitV, V1UnitV,
+ V1UnitV, V1UnitV, V1UnitV]>;
+let Latency = 11, NumMicroOps = 10 in
+def V1Write_11c_10V0 : SchedWriteRes<[V1UnitV0,
+ V1UnitV0, V1UnitV0, V1UnitV0,
+ V1UnitV0, V1UnitV0, V1UnitV0,
+ V1UnitV0, V1UnitV0, V1UnitV0]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 12 micro-op types
+
+let Latency = 7, NumMicroOps = 12 in
+def V1Write_7c_6L01_6V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitV01, V1UnitV01, V1UnitV01,
+ V1UnitV01, V1UnitV01, V1UnitV01]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 15 micro-op types
+
+let Latency = 7, NumMicroOps = 15 in
+def V1Write_7c_5L01_5S_5V : SchedWriteRes<[V1UnitL01, V1UnitL01,
+ V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitS, V1UnitS,
+ V1UnitS, V1UnitS, V1UnitS,
+ V1UnitV, V1UnitV,
+ V1UnitV, V1UnitV, V1UnitV]>;
+
+
+//===----------------------------------------------------------------------===//
+// Define generic 18 micro-op types
+
+let Latency = 19, NumMicroOps = 18 in
+def V1Write_11c_9L01_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitV, V1UnitV, V1UnitV,
+ V1UnitV, V1UnitV, V1UnitV,
+ V1UnitV, V1UnitV, V1UnitV]>;
+let Latency = 19, NumMicroOps = 18 in
+def V1Write_19c_18V0 : SchedWriteRes<[V1UnitV0, V1UnitV0, V1UnitV0,
+ V1UnitV0, V1UnitV0, V1UnitV0,
+ V1UnitV0, V1UnitV0, V1UnitV0,
+ V1UnitV0, V1UnitV0, V1UnitV0,
+ V1UnitV0, V1UnitV0, V1UnitV0,
+ V1UnitV0, V1UnitV0, V1UnitV0]>;
+
+//===----------------------------------------------------------------------===//
+// Define generic 27 micro-op types
+
+let Latency = 11, NumMicroOps = 27 in
+def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitL01, V1UnitL01, V1UnitL01,
+ V1UnitS, V1UnitS, V1UnitS,
+ V1UnitS, V1UnitS, V1UnitS,
+ V1UnitS, V1UnitS, V1UnitS,
+ V1UnitV, V1UnitV, V1UnitV,
+ V1UnitV, V1UnitV, V1UnitV,
+ V1UnitV, V1UnitV, V1UnitV]>;
+
+
+// Miscellaneous Instructions
+// -----------------------------------------------------------------------------
+
+// COPY
+def : InstRW<[V1Write_1c_1I], (instrs COPY)>;
+
+// MSR
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+
+
+// Branch Instructions
+// -----------------------------------------------------------------------------
+
+// Branch, immed
+// Compare and branch
+def : SchedAlias<WriteBr, V1Write_1c_1B>;
+
+// Branch, register
+def : SchedAlias<WriteBrReg, V1Write_1c_1B>;
+
+// Branch and link, immed
+// Branch and link, register
+def : InstRW<[V1Write_1c_1B_1S], (instrs BL, BLR)>;
+
+// Compare and branch
+def : InstRW<[V1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>;
+
+
+// Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+
+// ALU, basic
+// Conditional compare
+// Conditional select
+// Logical, basic
+// Address generation
+// Count leading
+// Reverse bits/bytes
+// Move immediate
+def : SchedAlias<WriteI, V1Write_1c_1I>;
+
+// ALU, basic, flagset
+def : InstRW<[V1Write_1c_1J],
+ (instregex "^(ADD|SUB)S[WX]r[ir]$",
+ "^(ADC|SBC)S[WX]r$",
+ "^ANDS[WX]ri$",
+ "^(AND|BIC)S[WX]rr$")>;
+
+// ALU, extend and shift
+def : SchedAlias<WriteIEReg, V1Write_2c_1M>;
+
+// Arithmetic, LSL shift, shift <= 4
+// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
+def V1WriteISReg : SchedWriteVariant<
+ [SchedVar<IsCheapLSL, [V1Write_1c_1I]>,
+ SchedVar<NoSchedPred, [V1Write_2c_1M]>]>;
+def : SchedAlias<WriteISReg, V1WriteISReg>;
+
+// Arithmetic, flagset, LSL shift, shift <= 4
+// Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4
+def V1WriteISRegS : SchedWriteVariant<
+ [SchedVar<IsCheapLSL, [V1Write_1c_1J]>,
+ SchedVar<NoSchedPred, [V1Write_2c_1M]>]>;
+def : InstRW<[V1WriteISRegS],
+ (instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>;
+
+// Logical, shift, no flagset
+def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+
+// Logical, shift, flagset
+def : InstRW<[V1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>;
+
+// Flag manipulation instructions
+def : InstRW<[V1Write_1c_1J], (instrs SETF8, SETF16, RMIF, CFINV)>;
+
+
+// Divide and multiply instructions
+// -----------------------------------------------------------------------------
+
+// Divide
+def : SchedAlias<WriteID32, V1Write_12c5_1M0>;
+def : SchedAlias<WriteID64, V1Write_20c5_1M0>;
+
+// Multiply
+// Multiply accumulate
+// Multiply accumulate, long
+// Multiply long
+def V1WriteIM : SchedWriteVariant<
+ [SchedVar<NeoverseMULIdiomPred, [V1Write_2c_1M]>,
+ SchedVar<NoSchedPred, [V1Write_2c_1M0]>]>;
+def : SchedAlias<WriteIM32, V1WriteIM>;
+def : SchedAlias<WriteIM64, V1WriteIM>;
+
+// Multiply high
+def : InstRW<[V1Write_3c_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>;
+
+
+// Pointer Authentication Instructions (v8.3 PAC)
+// -----------------------------------------------------------------------------
+
+// Authenticate data address
+// Authenticate instruction address
+// Compute pointer authentication code for data address
+// Compute pointer authentication code, using generic key
+// Compute pointer authentication code for instruction address
+def : InstRW<[V1Write_5c_1M0], (instregex "^AUT",
+ "^PAC")>;
+
+// Branch and link, register, with pointer authentication
+// Branch, register, with pointer authentication
+// Branch, return, with pointer authentication
+def : InstRW<[V1Write_6c_1B_1M0], (instregex "^BL?RA[AB]Z?$",
+ "^E?RETA[AB]$")>;
+
+// Load register, with pointer authentication
+def : InstRW<[V1Write_9c_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
+
+// Strip pointer authentication code
+def : InstRW<[V1Write_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>;
+
+
+// Miscellaneous data-processing instructions
+// -----------------------------------------------------------------------------
+
+// Bitfield extract, one reg
+// Bitfield extract, two regs
+def V1WriteExtr : SchedWriteVariant<
+ [SchedVar<IsRORImmIdiomPred, [V1Write_1c_1I]>,
+ SchedVar<NoSchedPred, [V1Write_3c_1I_1M]>]>;
+def : SchedAlias<WriteExtr, V1WriteExtr>;
+
+// Bitfield move, basic
+// Variable shift
+def : SchedAlias<WriteIS, V1Write_1c_1I>;
+
+// Bitfield move, insert
+def : InstRW<[V1Write_2c_1M], (instregex "^BFM[WX]ri$")>;
+
+// Move immediate
+def : SchedAlias<WriteImm, V1Write_1c_1I>;
+
+
+// Load instructions
+// -----------------------------------------------------------------------------
+
+// Load register, immed offset
+def : SchedAlias<WriteLD, V1Write_4c_1L>;
+
+// Load register, immed offset, index
+def : SchedAlias<WriteLDIdx, V1Write_4c_1L>;
+def : SchedAlias<WriteAdr, V1Write_1c_1I>;
+
+// Load pair, immed offset
+def : SchedAlias<WriteLDHi, V1Write_4c_1L>;
+def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z], (instrs LDPWi, LDNPWi)>;
+def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z, WriteAdr],
+ (instrs LDPWpost, LDPWpre)>;
+
+// Load pair, signed immed offset, signed words
+def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>;
+
+// Load pair, immed post or pre-index, signed words
+def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z, WriteAdr],
+ (instrs LDPSWpost, LDPSWpre)>;
+
+
+// Store instructions
+// -----------------------------------------------------------------------------
+
+// Store register, immed offset
+def : SchedAlias<WriteST, V1Write_1c_1L01_1D>;
+
+// Store register, immed offset, index
+def : SchedAlias<WriteSTIdx, V1Write_1c_1L01_1D>;
+
+// Store pair, immed offset
+def : SchedAlias<WriteSTP, V1Write_1c_1L01_1D>;
+
+
+// FP data processing instructions
+// -----------------------------------------------------------------------------
+
+// FP absolute value
+// FP arithmetic
+// FP min/max
+// FP negate
+def : SchedAlias<WriteF, V1Write_2c_1V>;
+
+// FP compare
+def : SchedAlias<WriteFCmp, V1Write_2c_1V0>;
+
+// FP divide
+// FP square root
+def : SchedAlias<WriteFDiv, V1Write_10c7_1V02>;
+
+// FP divide, H-form
+// FP square root, H-form
+def : InstRW<[V1Write_7c7_1V02], (instrs FDIVHrr, FSQRTHr)>;
+
+// FP divide, S-form
+// FP square root, S-form
+def : InstRW<[V1Write_10c7_1V02], (instrs FDIVSrr, FSQRTSr)>;
+
+// FP divide, D-form
+def : InstRW<[V1Write_15c7_1V02], (instrs FDIVDrr)>;
+
+// FP square root, D-form
+def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTDr)>;
+
+// FP multiply
+def : SchedAlias<WriteFMul, V1Write_3c_1V>;
+
+// FP multiply accumulate
+def : InstRW<[V1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
+
+// FP round to integral
+def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
+ "^FRINT(32|64)[XZ][SD]r$")>;
+
+// FP select
+def : InstRW<[V1Write_2c_1V01], (instregex "^FCSEL[HSD]rrr$")>;
+
+
+// FP miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// FP convert, from gen to vec reg
+def : InstRW<[V1Write_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
+
+// FP convert, from vec to gen reg
+def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
+
+// FP convert, Javascript from vec to gen reg
+def : InstRW<[V1Write_3c_1V0], (instrs FJCVTZS)>;
+
+// FP convert, from vec to vec reg
+def : SchedAlias<WriteFCvt, V1Write_3c_1V02>;
+
+// FP move, immed
+def : SchedAlias<WriteFImm, V1Write_2c_1V>;
+
+// FP move, register
+def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
+
+// FP transfer, from gen to low half of vec reg
+def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
+
+// FP transfer, from gen to high half of vec reg
+def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
+
+// FP transfer, from vec to gen reg
+def : SchedAlias<WriteFCopy, V1Write_2c_1V1>;
+
+
+// FP load instructions
+// -----------------------------------------------------------------------------
+
+// Load vector reg, literal, S/D/Q forms
+// Load vector reg, unscaled immed
+// Load vector reg, unsigned immed
+def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$",
+ "^LDUR[BHSDQ]i$",
+ "^LDR[BHSDQ]ui$")>;
+
+// Load vector reg, immed post-index
+// Load vector reg, immed pre-index
+def : InstRW<[V1Write_6c_1L, WriteAdr],
+ (instregex "^LDR[BHSDQ](post|pre)$")>;
+
+// Load vector reg, register offset, basic
+// Load vector reg, register offset, scale, S/D-form
+// Load vector reg, register offset, extend
+// Load vector reg, register offset, extend, scale, S/D-form
+def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
+
+// Load vector reg, register offset, scale, H/Q-form
+// Load vector reg, register offset, extend, scale, H/Q-form
+def : InstRW<[V1Write_7c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
+
+// Load vector pair, immed offset, S/D-form
+def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z], (instregex "^LDN?P[SD]i$")>;
+
+// Load vector pair, immed offset, Q-form
+def : InstRW<[V1Write_6c_1L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
+
+// Load vector pair, immed post-index, S/D-form
+// Load vector pair, immed pre-index, S/D-form
+def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z, WriteAdr],
+ (instregex "^LDP[SD](pre|post)$")>;
+
+// Load vector pair, immed post-index, Q-form
+// Load vector pair, immed pre-index, Q-form
+def : InstRW<[V1Write_6c_1L, WriteLDHi, WriteAdr],
+ (instrs LDPQpost, LDPQpre)>;
+
+
+// FP store instructions
+// -----------------------------------------------------------------------------
+
+// Store vector reg, unscaled immed, B/H/S/D/Q-form
+def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
+
+// Store vector reg, immed post-index, B/H/S/D/Q-form
+// Store vector reg, immed pre-index, B/H/S/D/Q-form
+def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr],
+ (instregex "^STR[BHSDQ](pre|post)$")>;
+
+// Store vector reg, unsigned immed, B/H/S/D/Q-form
+def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>;
+
+// Store vector reg, register offset, basic, B/S/D-form
+// Store vector reg, register offset, scale, B/S/D-form
+// Store vector reg, register offset, extend, B/S/D-form
+// Store vector reg, register offset, extend, scale, B/S/D-form
+def : InstRW<[V1Write_2c_1L01_1V01, ReadAdrBase],
+ (instregex "^STR[BSD]ro[WX]$")>;
+
+// Store vector reg, register offset, basic, H/Q-form
+// Store vector reg, register offset, scale, H/Q-form
+// Store vector reg, register offset, extend, H/Q-form
+// Store vector reg, register offset, extend, scale, H/Q-form
+def : InstRW<[V1Write_2c_1I_1L01_1V01, ReadAdrBase],
+ (instregex "^STR[HQ]ro[WX]$")>;
+
+// Store vector pair, immed offset, S/D/Q-form
+def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STN?P[SDQ]i$")>;
+
+// Store vector pair, immed post-index, S/D-form
+// Store vector pair, immed pre-index, S/D-form
+def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr],
+ (instregex "^STP[SD](pre|post)$")>;
+
+// Store vector pair, immed post-index, Q-form
+// Store vector pair, immed pre-index, Q-form
+def : InstRW<[V1Write_2c_2L01_1V01, WriteAdr], (instrs STPQpre, STPQpost)>;
+
+
+// ASIMD integer instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD absolute diff
+// ASIMD absolute diff long
+// ASIMD arith, basic
+// ASIMD arith, complex
+// ASIMD arith, pair-wise
+// ASIMD compare
+// ASIMD logical
+// ASIMD max/min, basic and pair-wise
+def : SchedAlias<WriteVd, V1Write_2c_1V>;
+def : SchedAlias<WriteVq, V1Write_2c_1V>;
+
+// ASIMD absolute diff accum
+// ASIMD absolute diff accum long
+// ASIMD pairwise add and accumulate long
+def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>;
+
+// ASIMD arith, reduce, 4H/4S
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[V1Write_2c_1V13], (instregex "^(ADD|[SU]ADDL)Vv4(i16|i32)v$",
+ "^[SU](MAX|MIN)Vv4(i16|i32)v$")>;
+
+// ASIMD arith, reduce, 8B/8H
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[V1Write_4c_1V13_1V], (instregex "^(ADD|[SU]ADDL)Vv8(i8|i16)v$",
+ "^[SU](MAX|MIN)Vv8(i8|i16)v$")>;
+
+// ASIMD arith, reduce, 16B
+// ASIMD max/min, reduce, 16B
+def : InstRW<[V1Write_4c_2V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$",
+ "[SU](MAX|MIN)Vv16i8v$")>;
+
+// ASIMD dot product
+// ASIMD dot product using signed and unsigned integers
+def : InstRW<[V1Write_2c_1V], (instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>;
+
+// ASIMD matrix multiply- accumulate
+def : InstRW<[V1Write_3c_1V], (instrs SMMLA, UMMLA, USMMLA)>;
+
+// ASIMD multiply
+// ASIMD multiply accumulate
+// ASIMD multiply accumulate long
+// ASIMD multiply accumulate high
+// ASIMD multiply accumulate saturating long
+def : InstRW<[V1Write_4c_1V02],
+ (instregex "^MUL(v[148]i16|v[124]i32)$",
+ "^SQR?DMULH(v[48]i16|v[24]i32)$",
+ "^ML[AS](v[148]i16|v[124]i32)$",
+ "^[SU]ML[AS]Lv",
+ "^SQRDML[AS]H(v[148]i16|v[124]i32)$",
+ "^SQDML[AS]Lv")>;
+
+// ASIMD multiply/multiply long (8x8) polynomial
+def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>;
+
+// ASIMD multiply long
+def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULLv")>;
+
+// ASIMD shift accumulate
+// ASIMD shift by immed, complex
+// ASIMD shift by register, complex
+def : InstRW<[V1Write_4c_1V13],
+ (instregex "^[SU]R?SRAv",
+ "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
+ "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
+ "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv",
+ "^[SU]Q?RSHLv", "^[SU]QSHLv")>;
+
+// ASIMD shift by immed, basic
+// ASIMD shift by immed and insert, basic
+// ASIMD shift by register, basic
+def : InstRW<[V1Write_2c_1V13], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv",
+ "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>;
+
+
+// ASIMD FP instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD FP absolute value/difference
+// ASIMD FP arith, normal
+// ASIMD FP compare
+// ASIMD FP complex add
+// ASIMD FP max/min, normal
+// ASIMD FP max/min, pairwise
+// ASIMD FP negate
+// Covered by "SchedAlias (WriteV[dq]...)" above
+
+// ASIMD FP complex multiply add
+// ASIMD FP multiply accumulate
+def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$",
+ "^FML[AS]v")>;
+
+// ASIMD FP convert, long (F16 to F32)
+def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTLv[48]i16$")>;
+
+// ASIMD FP convert, long (F32 to F64)
+def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTLv[24]i32$")>;
+
+// ASIMD FP convert, narrow (F32 to F16)
+def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTNv[48]i16$")>;
+
+// ASIMD FP convert, narrow (F64 to F32)
+def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$",
+ "^FCVTXN(v[24]f32|v1i64)$")>;
+
+// ASIMD FP convert, other, D-form F32 and Q-form F64
+def : InstRW<[V1Write_3c_1V02], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
+ "^[SU]CVTFv2f(32|64)$")>;
+
+// ASIMD FP convert, other, D-form F16 and Q-form F32
+def : InstRW<[V1Write_4c_2V02], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
+ "^[SU]CVTFv4f(16|32)$")>;
+
+// ASIMD FP convert, other, Q-form F16
+def : InstRW<[V1Write_6c_4V02], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
+ "^[SU]CVTFv8f16$")>;
+
+// ASIMD FP divide, D-form, F16
+// ASIMD FP square root, D-form, F16
+def : InstRW<[V1Write_7c7_1V02], (instrs FDIVv4f16, FSQRTv4f16)>;
+
+// ASIMD FP divide, F32
+// ASIMD FP square root, F32
+def : InstRW<[V1Write_10c7_1V02], (instrs FDIVv2f32, FDIVv4f32,
+ FSQRTv2f32, FSQRTv4f32)>;
+
+// ASIMD FP divide, Q-form, F16
+def : InstRW<[V1Write_13c5_1V02], (instrs FDIVv8f16)>;
+
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[V1Write_15c7_1V02], (instrs FDIVv2f64)>;
+
+// ASIMD FP square root, Q-form, F16
+def : InstRW<[V1Write_13c11_1V02], (instrs FSQRTv8f16)>;
+
+// ASIMD FP square root, Q-form, F64
+def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTv2f64)>;
+
+// ASIMD FP max/min, reduce, F32 and D-form F16
+def : InstRW<[V1Write_4c_2V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>;
+
+// ASIMD FP max/min, reduce, Q-form F16
+def : InstRW<[V1Write_6c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>;
+
+// ASIMD FP multiply
+def : InstRW<[V1Write_3c_1V], (instregex "^FMULX?v")>;
+
+// ASIMD FP multiply accumulate long
+def : InstRW<[V1Write_5c_1V], (instregex "^FML[AS]L2?v")>;
+
+// ASIMD FP round, D-form F32 and Q-form F64
+def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>;
+
+// ASIMD FP round, D-form F16 and Q-form F32
+def : InstRW<[V1Write_4c_2V02], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>;
+
+// ASIMD FP round, Q-form F16
+def : InstRW<[V1Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
+
+
+// ASIMD BF instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD convert, F32 to BF16
+def : InstRW<[V1Write_4c_1V02], (instrs BFCVTN, BFCVTN2)>;
+
+// ASIMD dot product
+def : InstRW<[V1Write_4c_1V], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>;
+
+// ASIMD matrix multiply accumulate
+def : InstRW<[V1Write_5c_1V], (instrs BFMMLA)>;
+
+// ASIMD multiply accumulate long
+def : InstRW<[V1Write_4c_1V], (instregex "^BFMLAL[BT](Idx)?$")>;
+
+// Scalar convert, F32 to BF16
+def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>;
+
+
+// ASIMD miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD bit reverse
+// ASIMD bitwise insert
+// ASIMD count
+// ASIMD duplicate, element
+// ASIMD extract
+// ASIMD extract narrow
+// ASIMD insert, element to element
+// ASIMD move, FP immed
+// ASIMD move, integer immed
+// ASIMD reverse
+// ASIMD table lookup, 1 or 2 table regs
+// ASIMD table lookup extension, 1 table reg
+// ASIMD transfer, element to gen reg
+// ASIMD transpose
+// ASIMD unzip/zip
+// Covered by "SchedAlias (WriteV[dq]...)" above
+
+// ASIMD duplicate, gen reg
+def : InstRW<[V1Write_3c_1M0],
+ (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>;
+
+// ASIMD extract narrow, saturating
+def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
+
+// ASIMD reciprocal and square root estimate, D-form U32
+// ASIMD reciprocal and square root estimate, D-form F32 and F64
+def : InstRW<[V1Write_3c_1V02], (instrs URECPEv2i32,
+ URSQRTEv2i32,
+ FRECPEv1i32, FRECPEv2f32, FRECPEv1i64,
+ FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64)>;
+
+// ASIMD reciprocal and square root estimate, Q-form U32
+// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 and F64
+def : InstRW<[V1Write_4c_1V02], (instrs URECPEv4i32,
+ URSQRTEv4i32,
+ FRECPEv1f16, FRECPEv4f16,
+ FRECPEv4f32, FRECPEv2f64,
+ FRSQRTEv1f16, FRSQRTEv4f16,
+ FRSQRTEv4f32, FRSQRTEv2f64)>;
+
+// ASIMD reciprocal and square root estimate, Q-form F16
+def : InstRW<[V1Write_6c_2V02], (instrs FRECPEv8f16,
+ FRSQRTEv8f16)>;
+
+// ASIMD reciprocal exponent
+def : InstRW<[V1Write_3c_1V02], (instrs FRECPXv1f16, FRECPXv1i32, FRECPXv1i64)>;
+
+// ASIMD reciprocal step
+def : InstRW<[V1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv",
+ "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>;
+
+// ASIMD table lookup, 1 or 2 table regs
+// ASIMD table lookup extension, 1 table reg
+def : InstRW<[V1Write_2c_2V01], (instregex "^TBLv(8|16)i8(One|Two)$",
+ "^TBXv(8|16)i8One$")>;
+
+// ASIMD table lookup, 3 table regs
+// ASIMD table lookup extension, 2 table reg
+def : InstRW<[V1Write_4c_2V01], (instrs TBLv8i8Three, TBLv16i8Three,
+ TBXv8i8Two, TBXv16i8Two)>;
+
+// ASIMD table lookup, 4 table regs
+def : InstRW<[V1Write_4c_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
+
+// ASIMD table lookup extension, 3 table reg
+def : InstRW<[V1Write_6c_3V01], (instrs TBXv8i8Three, TBXv16i8Three)>;
+
+// ASIMD table lookup extension, 4 table reg
+def : InstRW<[V1Write_6c_5V01], (instrs TBXv8i8Four, TBXv16i8Four)>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[V1Write_2c_1V], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$",
+ "^UMOVvi(8|16|32|64)$")>;
+
+// ASIMD transfer, gen reg to element
+def : InstRW<[V1Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
+
+
+// ASIMD load instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD load, 1 element, multiple, 1 reg
+def : InstRW<[V1Write_6c_1L],
+ (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[V1Write_6c_1L, WriteAdr],
+ (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg
+def : InstRW<[V1Write_6c_2L],
+ (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[V1Write_6c_2L, WriteAdr],
+ (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg
+def : InstRW<[V1Write_6c_3L],
+ (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[V1Write_6c_3L, WriteAdr],
+ (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, D-form
+def : InstRW<[V1Write_6c_2L],
+ (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[V1Write_6c_2L, WriteAdr],
+ (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[V1Write_7c_4L],
+ (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[V1Write_7c_4L, WriteAdr],
+ (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, one lane
+// ASIMD load, 1 element, all lanes
+def : InstRW<[V1Write_8c_1L_1V],
+ (instregex "^LD1(i|Rv)(8|16|32|64)$",
+ "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[V1Write_8c_1L_1V, WriteAdr],
+ (instregex "^LD1i(8|16|32|64)_POST$",
+ "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 2 element, multiple, D-form
+def : InstRW<[V1Write_8c_1L_2V],
+ (instregex "^LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[V1Write_8c_1L_2V, WriteAdr],
+ (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 2 element, multiple, Q-form
+def : InstRW<[V1Write_8c_2L_2V],
+ (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[V1Write_8c_2L_2V, WriteAdr],
+ (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, one lane
+// ASIMD load, 2 element, all lanes
+def : InstRW<[V1Write_8c_1L_2V],
+ (instregex "^LD2i(8|16|32|64)$",
+ "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[V1Write_8c_1L_2V, WriteAdr],
+ (instregex "^LD2i(8|16|32|64)_POST$",
+ "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 3 element, multiple, D-form
+// ASIMD load, 3 element, one lane
+// ASIMD load, 3 element, all lanes
+def : InstRW<[V1Write_8c_2L_3V],
+ (instregex "^LD3Threev(8b|4h|2s)$",
+ "^LD3i(8|16|32|64)$",
+ "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[V1Write_8c_2L_3V, WriteAdr],
+ (instregex "^LD3Threev(8b|4h|2s)_POST$",
+ "^LD3i(8|16|32|64)_POST$",
+ "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 3 element, multiple, Q-form
+def : InstRW<[V1Write_8c_3L_3V],
+ (instregex "^LD3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[V1Write_8c_3L_3V, WriteAdr],
+ (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, multiple, D-form
+// ASIMD load, 4 element, one lane
+// ASIMD load, 4 element, all lanes
+def : InstRW<[V1Write_8c_3L_4V],
+ (instregex "^LD4Fourv(8b|4h|2s)$",
+ "^LD4i(8|16|32|64)$",
+ "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
+def : InstRW<[V1Write_8c_3L_4V, WriteAdr],
+ (instregex "^LD4Fourv(8b|4h|2s)_POST$",
+ "^LD4i(8|16|32|64)_POST$",
+ "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
+
+// ASIMD load, 4 element, multiple, Q-form
+def : InstRW<[V1Write_9c_4L_4V],
+ (instregex "^LD4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[V1Write_9c_4L_4V, WriteAdr],
+ (instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+
+// ASIMD store instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD store, 1 element, multiple, 1 reg
+// ASIMD store, 1 element, multiple, 2 reg, D-form
+def : InstRW<[V1Write_2c_1L01_1V01],
+ (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$",
+ "^ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr],
+ (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$",
+ "^ST1Twov(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, Q-form
+// ASIMD store, 1 element, multiple, 3 reg, D-form
+// ASIMD store, 1 element, multiple, 4 reg, D-form
+def : InstRW<[V1Write_2c_2L01_2V01],
+ (instregex "^ST1Twov(16b|8h|4s|2d)$",
+ "^ST1Threev(8b|4h|2s|1d)$",
+ "^ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[V1Write_2c_2L01_2V01, WriteAdr],
+ (instregex "^ST1Twov(16b|8h|4s|2d)_POST$",
+ "^ST1Threev(8b|4h|2s|1d)_POST$",
+ "^ST1Fourv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[V1Write_2c_3L01_3V01],
+ (instregex "^ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[V1Write_2c_3L01_3V01, WriteAdr],
+ (instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[V1Write_2c_4L01_4V01],
+ (instregex "^ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[V1Write_2c_4L01_4V01, WriteAdr],
+ (instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, one lane
+// ASIMD store, 2 element, multiple, D-form
+// ASIMD store, 2 element, one lane
+def : InstRW<[V1Write_4c_1L01_1V01],
+ (instregex "^ST1i(8|16|32|64)$",
+ "^ST2Twov(8b|4h|2s)$",
+ "^ST2i(8|16|32|64)$")>;
+def : InstRW<[V1Write_4c_1L01_1V01, WriteAdr],
+ (instregex "^ST1i(8|16|32|64)_POST$",
+ "^ST2Twov(8b|4h|2s)_POST$",
+ "^ST2i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 2 element, multiple, Q-form
+// ASIMD store, 3 element, multiple, D-form
+// ASIMD store, 3 element, one lane
+// ASIMD store, 4 element, one lane, D
+def : InstRW<[V1Write_4c_2L01_2V01],
+ (instregex "^ST2Twov(16b|8h|4s|2d)$",
+ "^ST3Threev(8b|4h|2s)$",
+ "^ST3i(8|16|32|64)$",
+ "^ST4i64$")>;
+def : InstRW<[V1Write_4c_2L01_2V01, WriteAdr],
+ (instregex "^ST2Twov(16b|8h|4s|2d)_POST$",
+ "^ST3Threev(8b|4h|2s)_POST$",
+ "^ST3i(8|16|32|64)_POST$",
+ "^ST4i64_POST$")>;
+
+// ASIMD store, 3 element, multiple, Q-form
+def : InstRW<[V1Write_5c_3L01_3V01],
+ (instregex "^ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[V1Write_5c_3L01_3V01, WriteAdr],
+ (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 4 element, multiple, D-form
+def : InstRW<[V1Write_6c_3L01_3V01],
+ (instregex "^ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[V1Write_6c_3L01_3V01, WriteAdr],
+ (instregex "^ST4Fourv(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 4 element, multiple, Q-form, B/H/S
+def : InstRW<[V1Write_7c_6L01_6V01],
+ (instregex "^ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[V1Write_7c_6L01_6V01, WriteAdr],
+ (instregex "^ST4Fourv(16b|8h|4s)_POST$")>;
+
+// ASIMD store, 4 element, multiple, Q-form, D
+def : InstRW<[V1Write_4c_4L01_4V01],
+ (instrs ST4Fourv2d)>;
+def : InstRW<[V1Write_4c_4L01_4V01, WriteAdr],
+ (instrs ST4Fourv2d_POST)>;
+
+// ASIMD store, 4 element, one lane, B/H/S
+def : InstRW<[V1Write_6c_3L_3V],
+ (instregex "^ST4i(8|16|32)$")>;
+def : InstRW<[V1Write_6c_3L_3V, WriteAdr],
+ (instregex "^ST4i(8|16|32)_POST$")>;
+
+
+// Cryptography extensions
+// -----------------------------------------------------------------------------
+
+// Crypto polynomial (64x64) multiply long
+// Covered by "SchedAlias (WriteV[dq]...)" above
+
+// Crypto AES ops
+def V1WriteVC : WriteSequence<[V1Write_2c_1V]>;
+def V1ReadVC : SchedReadAdvance<2, [V1WriteVC]>;
+def : InstRW<[V1WriteVC], (instrs AESDrr, AESErr)>;
+def : InstRW<[V1Write_2c_1V, V1ReadVC], (instrs AESMCrr, AESIMCrr)>;
+
+// Crypto SHA1 hash acceleration op
+// Crypto SHA1 schedule acceleration ops
+// Crypto SHA256 schedule acceleration ops
+// Crypto SHA512 hash acceleration ops
+// Crypto SM3 ops
+def : InstRW<[V1Write_2c_1V0], (instregex "^SHA1(H|SU[01])rr$",
+ "^SHA256SU[01]rr$",
+ "^SHA512(H2?|SU[01])$",
+ "^SM3(PARTW(1|2SM3SS1)|TT[12][AB])$")>;
+
+// Crypto SHA1 hash acceleration ops
+// Crypto SHA256 hash acceleration ops
+// Crypto SM4 ops
+def : InstRW<[V1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$",
+ "^SHA256H2?rrr$",
+ "^SM4E(KEY)?$")>;
+
+// Crypto SHA3 ops
+def : InstRW<[V1Write_2c_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
+
+
+// CRC instruction
+// -----------------------------------------------------------------------------
+
+// CRC checksum ops
+def : InstRW<[V1Write_2c_1M0], (instregex "^CRC32C?[BHWX]rr$")>;
+
+
+// SVE Predicate instructions
+// -----------------------------------------------------------------------------
+
+// Loop control, based on predicate
+def : InstRW<[V1Write_2c_1M0], (instregex "^BRK[AB]_PP[mz]P$")>;
+def : InstRW<[V1Write_2c_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
+
+// Loop control, based on predicate and flag setting
+def : InstRW<[V1Write_3c_2M0], (instrs BRKAS_PPzP, BRKBS_PPzP, BRKNS_PPzP,
+ BRKPAS_PPzPP, BRKPBS_PPzPP)>;
+
+// Loop control, based on GPR
+def : InstRW<[V1Write_3c_2M0], (instregex "^WHILE(LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
+
+// Loop terminate
+def : InstRW<[V1Write_1c_1M0], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
+
+// Predicate counting scalar
+// Predicate counting scalar, active predicate
+def : InstRW<[V1Write_2c_1M0], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
+def : InstRW<[V1Write_2c_1M0], (instregex "^(CNT|([SU]Q)?(DEC|INC))[BHWD]_XPiI$",
+ "^SQ(DEC|INC)[BHWD]_XPiWdI$",
+ "^UQ(DEC|INC)[BHWD]_WPiI$",
+ "^CNTP_XPP_[BHSD]$",
+ "^([SU]Q)?(DEC|INC)P_XP_[BHSD]$",
+ "^UQ(DEC|INC)P_WP_[BHSD]$",
+ "^[SU]Q(DEC|INC)P_XPWd_[BHSD]$")>;
+
+// Predicate counting vector, active predicate
+def : InstRW<[V1Write_7c_2M0_1V01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>;
+
+// Predicate logical
+def : InstRW<[V1Write_1c_1M0],
+ (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
+
+// Predicate logical, flag setting
+def : InstRW<[V1Write_2c_2M0],
+ (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)S_PPzPP$")>;
+
+// Predicate reverse
+// Predicate set/initialize/find next
+// Predicate transpose
+// Predicate unpack and widen
+// Predicate zip/unzip
+def : InstRW<[V1Write_2c_1M0], (instregex "^REV_PP_[BHSD]$",
+ "^PFALSE$", "^PFIRST_B$",
+ "^PNEXT_[BHSD]$", "^PTRUE_[BHSD]$",
+ "^TRN[12]_PPP_[BHSDQ]$",
+ "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
+
+// Predicate set/initialize/find next
+// Predicate unpack and widen
+def : InstRW<[V1Write_2c_1M0], (instrs PTEST_PP,
+ PUNPKHI_PP, PUNPKLO_PP)>;
+
+// Predicate select
+def : InstRW<[V1Write_1c_1M0], (instrs SEL_PPPP)>;
+
+// Predicate set/initialize, set flags
+def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>;
+
+
+
+// SVE integer instructions
+// -----------------------------------------------------------------------------
+
+// Arithmetic, basic
+// Logical
+def : InstRW<[V1Write_2c_1V01],
+ (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]$",
+ "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]$",
+ "^ADR_[SU]XTW_ZZZ_D_[0123]$",
+ "^ADR_LSL_ZZZ_[SD]_[0123]$",
+ "^[SU]ABD_ZP[mZ]Z_[BHSD]$",
+ "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]$",
+ "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$",
+ "^SUBR_Z(I|P[mZ]Z)_[BHSD]$",
+ "^(AND|EOR|ORR)_ZI$",
+ "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$",
+ "^EOR(BT|TB)_ZZZ_[BHSD]$",
+ "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>;
+
+// Arithmetic, shift
+def : InstRW<[V1Write_2c_1V1],
+ (instregex "^(ASR|LSL|LSR)_WIDE_Z(Pm|Z)Z_[BHS]",
+ "^(ASR|LSL|LSR)_ZPm[IZ]_[BHSD]",
+ "^(ASR|LSL|LSR)_ZZI_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
+ "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
+
+// Arithmetic, shift right for divide
+def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_ZP[mZ]I_[BHSD]$")>;
+
+// Count/reverse bits
+def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>;
+
+// Broadcast logical bitmask immediate to vector
+def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>;
+
+// Compare and set flags
+def : InstRW<[V1Write_4c_1M0_1V0],
+ (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
+ "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
+
+// Conditional extract operations, scalar form
+def : InstRW<[V1Write_9c_1M0_1V1], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
+
+// Conditional extract operations, SIMD&FP scalar and vector forms
+def : InstRW<[V1Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
+ "^COMPACT_ZPZ_[SD]$",
+ "^SPLICE_ZPZZ?_[BHSD]$")>;
+
+// Convert to floating point, 64b to float or convert to double
+def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
+ "^[SU]CVTF_ZPmZ_StoD")>;
+
+// Convert to floating point, 32b to single or half
+def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>;
+
+// Convert to floating point, 16b to half
+def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>;
+
+// Copy, scalar
+def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>;
+
+// Copy, scalar SIMD&FP or imm
+def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>;
+
+// Divides, 32 bit
+def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>;
+
+// Divides, 64 bit
+def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>;
+
+// Dot product, 8 bit
+def : InstRW<[V1Write_3c_1V01], (instregex "^[SU]DOT_ZZZI?_S$")>;
+
+// Dot product, 8 bit, using signed and unsigned integers
+def : InstRW<[V1Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>;
+
+// Dot product, 16 bit
+def : InstRW<[V1Write_4c_1V01], (instregex "^[SU]DOT_ZZZI?_D$")>;
+
+// Duplicate, immediate and indexed form
+def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$",
+ "^DUP_ZZI_[BHSDQ]$")>;
+
+// Duplicate, scalar form
+def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
+
+// Extend, sign or zero
+def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$",
+ "^[SU]XTH_ZPmZ_[SD]$",
+ "^[SU]XTW_ZPmZ_[D]$")>;
+
+// Extract
+def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>;
+
+// Extract/insert operation, SIMD and FP scalar form
+def : InstRW<[V1Write_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
+ "^INSR_ZV_[BHSD]$")>;
+
+// Extract/insert operation, scalar
+def : InstRW<[V1Write_6c_1M0_1V1], (instregex "^LAST[AB]_RPZ_[BHSD]$",
+ "^INSR_ZR_[BHSD]$")>;
+
+// Horizontal operations, B, H, S form, imm, imm
+def : InstRW<[V1Write_4c_1V0], (instregex "^INDEX_II_[BHS]$")>;
+
+// Horizontal operations, B, H, S form, scalar, imm / scalar / imm, scalar
+def : InstRW<[V1Write_7c_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
+
+// Horizontal operations, D form, imm, imm
+def : InstRW<[V1Write_5c_2V0], (instrs INDEX_II_D)>;
+
+// Horizontal operations, D form, scalar, imm / scalar / imm, scalar
+def : InstRW<[V1Write_8c_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
+
+// Move prefix
+def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
+ "^MOVPRFX_ZZ$")>;
+
+// Matrix multiply-accumulate
+def : InstRW<[V1Write_3c_1V01], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
+
+// Multiply, B, H, S element size
+def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ)_[BHS]$",
+ "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>;
+
+// Multiply, D element size
+// Multiply accumulate, D element size
+def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ)_D$",
+ "^[SU]MULH_ZPmZ_D$",
+ "^(MLA|MLS|MAD|MSB)_ZPmZZ_D$")>;
+
+// Multiply accumulate, B, H, S element size
+// NOTE: This is not specified in the SOG.
+def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
+
+// Predicate counting vector
+def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>;
+
+// Reduction, arithmetic, B form
+def : InstRW<[V1Write_14c_1V_1V0_2V1_1V13],
+ (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
+
+// Reduction, arithmetic, H form
+def : InstRW<[V1Write_12c_1V_1V01_2V1],
+ (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
+
+// Reduction, arithmetic, S form
+def : InstRW<[V1Write_10c_1V_1V01_2V1],
+ (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
+
+// Reduction, arithmetic, D form
+def : InstRW<[V1Write_8c_1V_1V01],
+ (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
+
+// Reduction, logical
+def : InstRW<[V1Write_12c_4V01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>;
+
+// Reverse, vector
+def : InstRW<[V1Write_2c_1V01], (instregex "^REV_ZZ_[BHSD]$",
+ "^REVB_ZPmZ_[HSD]$",
+ "^REVH_ZPmZ_[SD]$",
+ "^REVW_ZPmZ_D$")>;
+
+// Select, vector form
+// Table lookup
+// Table lookup extension
+// Transpose, vector form
+// Unpack and extend
+// Zip/unzip
+def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$",
+ "^TB[LX]_ZZZ_[BHSD]$",
+ "^TRN[12]_ZZZ_[BHSDQ]$",
+ "^[SU]UNPK(HI|LO)_ZZ_[HSD]$",
+ "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
+
+
+// SVE floating-point instructions
+// -----------------------------------------------------------------------------
+
+// Floating point absolute value/difference
+// Floating point arithmetic
+def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]$",
+ "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
+ "^FADDP_ZPmZZ_[HSD]$",
+ "^FNEG_ZPmZ_[HSD]$",
+ "^FSUBR_ZPm[IZ]_[HSD]$")>;
+
+// Floating point associative add, F16
+def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>;
+
+// Floating point associative add, F32
+def : InstRW<[V1Write_11c_10V0], (instrs FADDA_VPZ_S)>;
+
+// Floating point associative add, F64
+def : InstRW<[V1Write_8c_3V01], (instrs FADDA_VPZ_D)>;
+
+// Floating point compare
+def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$",
+ "^FCM(EQ|GE|GT|NE|UO)_PPzZZ_[HSD]$",
+ "^FCM(EQ|GE|GT|LE|LT|NE)_PPzZ0_[HSD]$")>;
+
+// Floating point complex add
+def : InstRW<[V1Write_3c_1V01], (instregex "^FCADD_ZPmZ_[HSD]$")>;
+
+// Floating point complex multiply add
+def : InstRW<[V1Write_5c_1V01], (instregex "^FCMLA_ZPmZZ_[HSD]$",
+ "^FCMLA_ZZZI_[HS]$")>;
+
+// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
+// Floating point convert to integer, F32
+def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$",
+ "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>;
+
+// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16)
+// Floating point convert to integer, F64
+def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$",
+ "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>;
+
+// Floating point convert to integer, F16
+def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>;
+
+// Floating point copy
+def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$",
+ "^FDUP_ZI_[HSD]$")>;
+
+// Floating point divide, F16
+def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_ZPmZ_H$")>;
+
+// Floating point divide, F32
+def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_ZPmZ_S$")>;
+
+// Floating point divide, F64
+def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_ZPmZ_D$")>;
+
+// Floating point min/max
+def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>;
+
+// Floating point multiply
+def : InstRW<[V1Write_3c_1V01], (instregex "^F(SCALE|MULX)_ZPmZ_[HSD]$",
+ "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>;
+
+// Floating point multiply accumulate
+// Floating point reciprocal step
+def : InstRW<[V1Write_4c_1V01], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$",
+ "^FML[AS]_ZZZI_[HSD]$",
+ "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
+
+// Floating point reciprocal estimate, F16
+def : InstRW<[V1Write_6c_4V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>;
+
+// Floating point reciprocal estimate, F32
+def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>;
+
+// Floating point reciprocal estimate, F64
+def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>;
+
+// Floating point reciprocal exponent
+def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]$")>;
+
+// Floating point reduction, F16
+def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>;
+
+// Floating point reduction, F32
+def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>;
+
+// Floating point reduction, F64
+def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>;
+
+// Floating point round to integral, F16
+def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>;
+
+// Floating point round to integral, F32
+def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>;
+
+// Floating point round to integral, F64
+def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>;
+
+// Floating point square root, F16
+def : InstRW<[V1Write_13c10_1V0], (instrs FSQRT_ZPmZ_H)>;
+
+// Floating point square root, F32
+def : InstRW<[V1Write_10c7_1V0], (instrs FSQRT_ZPmZ_S)>;
+
+// Floating point square root, F64
+def : InstRW<[V1Write_16c7_1V0], (instrs FSQRT_ZPmZ_D)>;
+
+// Floating point trigonometric
+def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$",
+ "^FTMAD_ZZI_[HSD]$",
+ "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
+
+
+// SVE BFloat16 (BF16) instructions
+// -----------------------------------------------------------------------------
+
+// Convert, F32 to BF16
+def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
+
+// Dot product
+def : InstRW<[V1Write_4c_1V01], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
+
+// Matrix multiply accumulate
+def : InstRW<[V1Write_5c_1V01], (instrs BFMMLA_ZZZ)>;
+
+// Multiply accumulate long
+def : InstRW<[V1Write_5c_1V01], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
+
+
+// SVE Load instructions
+// -----------------------------------------------------------------------------
+
+// Load vector
+def : InstRW<[V1Write_6c_1L01], (instrs LDR_ZXI)>;
+
+// Load predicate
+def : InstRW<[V1Write_6c_1L_1M], (instrs LDR_PXI)>;
+
+// Contiguous load, scalar + imm
+// Contiguous load, scalar + scalar
+// Contiguous load broadcast, scalar + imm
+// Contiguous load broadcast, scalar + scalar
+def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM_REAL$",
+ "^LD1S?B_[HSD]_IMM_REAL$",
+ "^LD1S?H_[SD]_IMM_REAL$",
+ "^LD1S?W_D_IMM_REAL$",
+ "^LD1[BWD]$",
+ "^LD1S?B_[HSD]$",
+ "^LD1S?W_D$",
+ "^LD1R[BHWD]_IMM$",
+ "^LD1RSW_IMM$",
+ "^LD1RS?B_[HSD]_IMM$",
+ "^LD1RS?H_[SD]_IMM$",
+ "^LD1RS?W_D_IMM$",
+ "^LD1RQ_[BHWD]_IMM$",
+ "^LD1RQ_[BWD]$")>;
+def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LD1H$",
+ "^LD1S?H_[SD]$",
+ "^LD1RQ_H$")>;
+
+// Non temporal load, scalar + imm
+def : InstRW<[V1Write_6c_1L01], (instregex "^LDNT1[BHWD]_ZRI$")>;
+
+// Non temporal load, scalar + scalar
+def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>;
+def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>;
+
+// Contiguous first faulting load, scalar + scalar
+def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H_REAL$",
+ "^LDFF1S?H_[SD]_REAL$")>;
+def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]_REAL$",
+ "^LDFF1S?B_[HSD]_REAL$",
+ "^LDFF1S?W_D_REAL$")>;
+
+// Contiguous non faulting load, scalar + imm
+def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM_REAL$",
+ "^LDNF1S?B_[HSD]_IMM_REAL$",
+ "^LDNF1S?H_[SD]_IMM_REAL$",
+ "^LDNF1S?W_D_IMM_REAL$")>;
+
+// Contiguous Load two structures to two vectors, scalar + imm
+def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>;
+
+// Contiguous Load two structures to two vectors, scalar + scalar
+def : InstRW<[V1Write_10c_2L01_2V01], (instrs LD2H)>;
+def : InstRW<[V1Write_9c_2L01_2V01], (instregex "^LD2[BWD]$")>;
+
+// Contiguous Load three structures to three vectors, scalar + imm
+def : InstRW<[V1Write_11c_3L01_3V01], (instregex "^LD3[BHWD]_IMM$")>;
+
+// Contiguous Load three structures to three vectors, scalar + scalar
+def : InstRW<[V1Write_13c_3L01_1S_3V01], (instregex "^LD3[BHWD]$")>;
+
+// Contiguous Load four structures to four vectors, scalar + imm
+def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>;
+
+// Contiguous Load four structures to four vectors, scalar + scalar
+def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>;
+
+// Gather load, vector + imm, 32-bit element size
+def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
+ "^GLD(FF)?1W_IMM_REAL$")>;
+
+// Gather load, vector + imm, 64-bit element size
+def : InstRW<[V1Write_9c_2L_2V],
+ (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
+ "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?(SCALED_)?REAL$",
+ "^GLD(FF)?1D_IMM_REAL$",
+ "^GLD(FF)?1D_([SU]XTW_)?(SCALED_)?REAL$")>;
+
+// Gather load, 32-bit scaled offset
+def : InstRW<[V1Write_11c_2L_2V],
+ (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
+ "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
+
+// Gather load, 32-bit unpacked unscaled offset
+def : InstRW<[V1Write_9c_1L_1V],
+ (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
+ "^GLD(FF)?1W_[SU]XTW_REAL$")>;
+
+// Prefetch
+// NOTE: This is not specified in the SOG.
+def : InstRW<[V1Write_4c_1L01], (instregex "^PRF[BHWD]")>;
+
+
+// SVE Store instructions
+// -----------------------------------------------------------------------------
+
+// Store from predicate reg
+def : InstRW<[V1Write_1c_1L01], (instrs STR_PXI)>;
+
+// Store from vector reg
+def : InstRW<[V1Write_2c_1L01_1V], (instrs STR_ZXI)>;
+
+// Contiguous store, scalar + imm
+// Contiguous store, scalar + scalar
+def : InstRW<[V1Write_2c_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
+ "^ST1B_[HSD]_IMM$",
+ "^ST1H_[SD]_IMM$",
+ "^ST1W_D_IMM$",
+ "^ST1[BWD]$",
+ "^ST1B_[HSD]$",
+ "^ST1W_D$")>;
+def : InstRW<[V1Write_2c_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
+
+// Contiguous store two structures from two vectors, scalar + imm
+// Contiguous store two structures from two vectors, scalar + scalar
+def : InstRW<[V1Write_4c_1L01_1V], (instregex "^ST2[BHWD]_IMM$",
+ "^ST2[BWD]$")>;
+def : InstRW<[V1Write_4c_1L01_1S_1V], (instrs ST2H)>;
+
+// Contiguous store three structures from three vectors, scalar + imm
+def : InstRW<[V1Write_7c_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
+
+// Contiguous store three structures from three vectors, scalar + scalar
+def : InstRW<[V1Write_7c_5L01_5S_5V], (instregex "^ST3[BHWD]$")>;
+
+// Contiguous store four structures from four vectors, scalar + imm
+def : InstRW<[V1Write_11c_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
+
+// Contiguous store four structures from four vectors, scalar + scalar
+def : InstRW<[V1Write_11c_9L01_9S_9V], (instregex "^ST4[BHWD]$")>;
+
+// Non temporal store, scalar + imm
+// Non temporal store, scalar + scalar
+def : InstRW<[V1Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$",
+ "^STNT1[BWD]_ZRR$")>;
+def : InstRW<[V1Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>;
+
+// Scatter store vector + imm 32-bit element size
+// Scatter store, 32-bit scaled offset
+// Scatter store, 32-bit unscaled offset
+def : InstRW<[V1Write_10c_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
+ "^SST1W_IMM$",
+ "^SST1(H_S|W)_[SU]XTW_SCALED$",
+ "^SST1[BH]_S_[SU]XTW$",
+ "^SST1W_[SU]XTW$")>;
+
+// Scatter store, 32-bit unpacked unscaled offset
+// Scatter store, 32-bit unpacked scaled offset
+def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
+ "^SST1D_[SU]XTW$",
+ "^SST1[HW]_D_[SU]XTW_SCALED$",
+ "^SST1D_[SU]XTW_SCALED$")>;
+
+// Scatter store vector + imm 64-bit element size
+// Scatter store, 64-bit scaled offset
+// Scatter store, 64-bit unscaled offset
+def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
+ "^SST1D_IMM$",
+ "^SST1[HW]_D_SCALED$",
+ "^SST1D_SCALED$",
+ "^SST1[BHW]_D$",
+ "^SST1D$")>;
+
+
+// SVE Miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// Read first fault register, unpredicated
+// Set first fault register
+// Write to first fault register
+def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P_REAL,
+ SETFFR,
+ WRFFR)>;
+
+// Read first fault register, predicated
+def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz_REAL)>;
+
+// Read first fault register and set flags
+def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>;
+
+
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
new file mode 100644
index 000000000000..199ebc6ac650
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td
@@ -0,0 +1,2805 @@
+//=- AArch64SchedNeoverseV2.td - NeoverseV2 Scheduling Defs --*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the scheduling model for the Arm Neoverse V2 processors.
+// All information is taken from the V2 Software Optimisation guide:
+//
+// https://developer.arm.com/documentation/PJDOC-466751330-593177/r0p2
+//
+//===----------------------------------------------------------------------===//
+
+def NeoverseV2Model : SchedMachineModel {
+ let IssueWidth = 16; // Micro-ops dispatched at a time.
+ let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. NOTE: Copied from N2.
+ let LoadLatency = 4; // Optimistic load latency.
+ let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2.
+ let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57.
+ let CompleteModel = 1;
+
+ list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
+ [HasSVE2p1]);
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Neoverse V2.
+// Instructions are first fetched and then decoded into internal macro-ops
+// (MOPs). From there, the MOPs proceed through register renaming and dispatch
+// stages. A MOP can be split into two micro-ops further down the pipeline
+// after the decode stage. Once dispatched, micro-ops wait for their operands
+// and issue out-of-order to one of seventeen issue pipelines. Each issue
+// pipeline can accept one micro-op per cycle.
+
+let SchedModel = NeoverseV2Model in {
+
+// Define the (17) issue ports.
+def V2UnitB : ProcResource<2>; // Branch 0/1
+def V2UnitS0 : ProcResource<1>; // Integer single-cycle 0
+def V2UnitS1 : ProcResource<1>; // Integer single-cycle 1
+def V2UnitS2 : ProcResource<1>; // Integer single-cycle 2
+def V2UnitS3 : ProcResource<1>; // Integer single-cycle 3
+def V2UnitM0 : ProcResource<1>; // Integer single/multicycle 0
+def V2UnitM1 : ProcResource<1>; // Integer single/multicycle 1
+def V2UnitV0 : ProcResource<1>; // FP/ASIMD 0
+def V2UnitV1 : ProcResource<1>; // FP/ASIMD 1
+def V2UnitV2 : ProcResource<1>; // FP/ASIMD 2
+def V2UnitV3 : ProcResource<1>; // FP/ASIMD 3
+def V2UnitL01 : ProcResource<2>; // Load/Store 0/1
+def V2UnitL2 : ProcResource<1>; // Load 2
+def V2UnitD : ProcResource<2>; // Store data 0/1
+
+def V2UnitR : ProcResGroup<[V2UnitS0, V2UnitS1]>; // Integer single-cycle 0/1
+def V2UnitS : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3]>; // Integer single-cycle 0/1/2/3
+def V2UnitF : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitM0, V2UnitM1]>; // Integer single-cycle 0/1 and single/multicycle 0/1
+def V2UnitI : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3, V2UnitM0, V2UnitM1]>; // Integer single-cycle 0/1/2/3 and single/multicycle 0/1
+def V2UnitM : ProcResGroup<[V2UnitM0, V2UnitM1]>; // Integer single/multicycle 0/1
+def V2UnitL : ProcResGroup<[V2UnitL01, V2UnitL2]>; // Load/Store 0/1 and Load 2
+def V2UnitV : ProcResGroup<[V2UnitV0, V2UnitV1, V2UnitV2, V2UnitV3]>; // FP/ASIMD 0/1/2/3
+def V2UnitV01 : ProcResGroup<[V2UnitV0, V2UnitV1]>; // FP/ASIMD 0/1
+def V2UnitV02 : ProcResGroup<[V2UnitV0, V2UnitV2]>; // FP/ASIMD 0/2
+def V2UnitV13 : ProcResGroup<[V2UnitV1, V2UnitV3]>; // FP/ASIMD 1/3
+def V2UnitV23 : ProcResGroup<[V2UnitV2, V2UnitV3]>; // FP/ASIMD 2/3
+
+// Define commonly used read types.
+
+// No forwarding is provided for these types.
+def : ReadAdvance<ReadI, 0>;
+def : ReadAdvance<ReadISReg, 0>;
+def : ReadAdvance<ReadIEReg, 0>;
+def : ReadAdvance<ReadIM, 0>;
+def : ReadAdvance<ReadIMA, 0>;
+def : ReadAdvance<ReadID, 0>;
+def : ReadAdvance<ReadExtrHi, 0>;
+def : ReadAdvance<ReadAdrBase, 0>;
+def : ReadAdvance<ReadST, 0>;
+def : ReadAdvance<ReadVLD, 0>;
+
+// NOTE: Copied from N2.
+def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
+def : WriteRes<WriteBarrier, []> { let Latency = 1; }
+def : WriteRes<WriteHint, []> { let Latency = 1; }
+def : WriteRes<WriteLDHi, []> { let Latency = 4; }
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Neoverse V2.
+
+//===----------------------------------------------------------------------===//
+// Define generic 1 micro-op types
+
+def V2Write_1cyc_1B : SchedWriteRes<[V2UnitB]> { let Latency = 1; }
+def V2Write_1cyc_1F : SchedWriteRes<[V2UnitF]> { let Latency = 1; }
+def V2Write_1cyc_1I : SchedWriteRes<[V2UnitI]> { let Latency = 1; }
+def V2Write_1cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 1; }
+def V2Write_1cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 1; }
+def V2Write_1cyc_1L01 : SchedWriteRes<[V2UnitL01]> { let Latency = 1; }
+def V2Write_2cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 2; }
+def V2Write_3cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 3; }
+def V2Write_2cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
+def V2Write_3cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 3; }
+def V2Write_5cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 5; }
+def V2Write_12cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 12;
+ let ResourceCycles = [12]; }
+def V2Write_20cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 20;
+ let ResourceCycles = [20]; }
+def V2Write_4cyc_1L : SchedWriteRes<[V2UnitL]> { let Latency = 4; }
+def V2Write_6cyc_1L : SchedWriteRes<[V2UnitL]> { let Latency = 6; }
+def V2Write_2cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 2; }
+def V2Write_2cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 2; }
+def V2Write_2cyc_1V01 : SchedWriteRes<[V2UnitV01]> { let Latency = 2; }
+def V2Write_2cyc_1V23 : SchedWriteRes<[V2UnitV23]> { let Latency = 2; }
+def V2Write_3cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
+def V2Write_3cyc_1V01 : SchedWriteRes<[V2UnitV01]> { let Latency = 3;
+ let ResourceCycles = [2]; }
+def V2Write_3cyc_1V23 : SchedWriteRes<[V2UnitV23]> { let Latency = 3; }
+def V2Write_4cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
+def V2Write_5cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
+def V2Write_6cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
+def V2Write_12cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 12; }
+def V2Write_3cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 3; }
+def V2Write_3cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 3; }
+def V2Write_4cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 4; }
+def V2Write_4cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
+def V2Write_7cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 7;
+ let ResourceCycles = [7]; }
+def V2Write_7cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 7;
+ let ResourceCycles = [2]; }
+def V2Write_9cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 9; }
+def V2Write_9cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 9;
+ let ResourceCycles = [2]; }
+def V2Write_10cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 10; }
+def V2Write_10cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 10;
+ let ResourceCycles = [2]; }
+def V2Write_12cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 12;
+ let ResourceCycles = [11]; }
+def V2Write_13cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 13; }
+def V2Write_15cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 15; }
+def V2Write_15cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 15;
+ let ResourceCycles = [8]; }
+def V2Write_16cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 16; }
+def V2Write_16cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 16;
+ let ResourceCycles = [8]; }
+def V2Write_20cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 20;
+ let ResourceCycles = [20]; }
+def V2Write_2cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 2; }
+def V2Write_2cyc_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 2; }
+def V2Write_3cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 3; }
+def V2Write_4cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 4; }
+def V2Write_4cyc_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
+def V2Write_6cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 6; }
+def V2Write_10cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 10; }
+def V2Write_6cyc_1L01 : SchedWriteRes<[V2UnitL01]> { let Latency = 6; }
+
+//===----------------------------------------------------------------------===//
+// Define generic 2 micro-op types
+
+def V2Write_1cyc_1B_1R : SchedWriteRes<[V2UnitB, V2UnitR]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def V2Write_6cyc_1M0_1B : SchedWriteRes<[V2UnitM0, V2UnitB]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V2Write_9cyc_1M0_1L : SchedWriteRes<[V2UnitM0, V2UnitL]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+
+def V2Write_3cyc_1I_1M : SchedWriteRes<[V2UnitI, V2UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def V2Write_1cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def V2Write_3cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def V2Write_4cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V2Write_5cyc_1L_1F : SchedWriteRes<[V2UnitL, V2UnitF]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V2Write_6cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V2Write_7cyc_1F_1L : SchedWriteRes<[V2UnitF, V2UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def V2Write_7cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def V2Write_1cyc_1L01_1D : SchedWriteRes<[V2UnitL01, V2UnitD]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def V2Write_5cyc_1M0_1V : SchedWriteRes<[V2UnitM0, V2UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V2Write_2cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V2Write_2cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V2Write_2cyc_2V01 : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V2Write_4cyc_2V01 : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V2Write_4cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V2Write_4cyc_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V2Write_4cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V2Write_4cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V2Write_4cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V2Write_6cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V2Write_6cyc_2L : SchedWriteRes<[V2UnitL, V2UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V2Write_8cyc_1L_1V : SchedWriteRes<[V2UnitL, V2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+def V2Write_4cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V2Write_3cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def V2Write_4cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V2Write_1cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+
+def V2Write_2cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V2Write_6cyc_2V1 : SchedWriteRes<[V2UnitV1, V2UnitV1]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V2Write_4cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V2Write_5cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V2Write_5cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V2Write_5cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+
+def V2Write_6cyc_1V1_1M0 : SchedWriteRes<[V2UnitV1, V2UnitM0]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V2Write_7cyc_1M0_1V02 : SchedWriteRes<[V2UnitM0, V2UnitV02]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+
+def V2Write_2cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+
+def V2Write_3cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+def V2Write_6cyc_1V_1V13 : SchedWriteRes<[V2UnitV, V2UnitV13]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V2Write_6cyc_1L_1M : SchedWriteRes<[V2UnitL, V2UnitM]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V2Write_6cyc_1L_1S : SchedWriteRes<[V2UnitL, V2UnitS]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+
+def V2Write_4cyc_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+def V2Write_8cyc_1M0_1V01 : SchedWriteRes<[V2UnitM0, V2UnitV01]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 3 micro-op types
+
+def V2Write_1cyc_1L01_1D_1I : SchedWriteRes<[V2UnitL01, V2UnitD, V2UnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 3;
+}
+
+def V2Write_2cyc_1L01_1V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def V2Write_2cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def V2Write_4cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def V2Write_9cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+
+def V2Write_4cyc_3V01 : SchedWriteRes<[V2UnitV01, V2UnitV01, V2UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+
+def V2Write_7cyc_1M_1M0_1V : SchedWriteRes<[V2UnitM, V2UnitM0, V2UnitV]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+
+def V2Write_2cyc_1L01_1S_1V : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def V2Write_2cyc_1L01_1S_1V01 : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+
+def V2Write_6cyc_3L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def V2Write_6cyc_3V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+
+def V2Write_8cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 4 micro-op types
+
+def V2Write_2cyc_1L01_2V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
+ V2UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def V2Write_2cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
+ V2UnitV01, V2UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def V2Write_4cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
+ V2UnitV01, V2UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def V2Write_5cyc_1I_3L : SchedWriteRes<[V2UnitI, V2UnitL, V2UnitL, V2UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+def V2Write_9cyc_2L_2V1 : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV1,
+ V2UnitV1]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+
+def V2Write_6cyc_4V0 : SchedWriteRes<[V2UnitV0, V2UnitV0, V2UnitV0, V2UnitV0]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V2Write_8cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V2Write_6cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
+ V2UnitV13]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V2Write_8cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
+ V2UnitV13]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V2Write_6cyc_4V02 : SchedWriteRes<[V2UnitV02, V2UnitV02, V2UnitV02,
+ V2UnitV02]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V2Write_6cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V2Write_8cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V2Write_9cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+
+def V2Write_2cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
+ V2UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+
+def V2Write_4cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
+ V2UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def V2Write_8cyc_2M0_2V02 : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitV02,
+ V2UnitV02]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V2Write_8cyc_2V_2V1 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV1,
+ V2UnitV1]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+
+def V2Write_4cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
+ V2UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+
+def V2Write_5cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
+ V2UnitM]> {
+ let Latency = 5;
+ let NumMicroOps = 4;
+}
+
+def V2Write_6cyc_2I_2L : SchedWriteRes<[V2UnitI, V2UnitI, V2UnitL, V2UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+def V2Write_7cyc_4L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL]> {
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+
+def V2Write_6cyc_1L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
+ V2UnitV01]> {
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 5 micro-op types
+
+def V2Write_2cyc_1L01_2V01_2I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
+ V2UnitI, V2UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 5;
+}
+
+def V2Write_8cyc_2L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV,
+ V2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 5;
+}
+
+def V2Write_9cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
+ V2UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 5;
+}
+
+def V2Write_10cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
+ V2UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 5;
+}
+
+def V2Write_6cyc_5V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV,
+ V2UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 5;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 6 micro-op types
+
+def V2Write_8cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
+ V2UnitV, V2UnitV, V2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 6;
+}
+
+def V2Write_9cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
+ V2UnitV, V2UnitV, V2UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def V2Write_9cyc_2L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
+ V2UnitV, V2UnitV, V2UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def V2Write_9cyc_2L_2V_2S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
+ V2UnitV, V2UnitS, V2UnitS]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def V2Write_9cyc_2V_4V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
+ V2UnitV13, V2UnitV13, V2UnitV13]> {
+ let Latency = 9;
+ let NumMicroOps = 6;
+}
+
+def V2Write_2cyc_3L01_3V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitV, V2UnitV, V2UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 6;
+}
+
+def V2Write_4cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+
+def V2Write_5cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01]> {
+ let Latency = 5;
+ let NumMicroOps = 6;
+}
+
+def V2Write_2cyc_3L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitV01, V2UnitV01, V2UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 6;
+}
+
+def V2Write_4cyc_2L01_2S_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitS,
+ V2UnitS, V2UnitV01, V2UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 6;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 7 micro-op types
+
+def V2Write_8cyc_3L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
+ V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 7;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 8 micro-op types
+
+def V2Write_2cyc_4L01_4V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitV, V2UnitV, V2UnitV,
+ V2UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 8;
+}
+
+def V2Write_2cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01]> {
+ let Latency = 2;
+ let NumMicroOps = 8;
+}
+
+def V2Write_4cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01]> {
+ let Latency = 4;
+ let NumMicroOps = 8;
+}
+
+def V2Write_6cyc_2L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01]> {
+ let Latency = 6;
+ let NumMicroOps = 8;
+}
+
+def V2Write_8cyc_4L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
+ V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 8;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 9 micro-op types
+
+def V2Write_6cyc_3L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01]> {
+ let Latency = 6;
+ let NumMicroOps = 9;
+}
+
+def V2Write_10cyc_1L_8V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
+ V2UnitV, V2UnitV, V2UnitV, V2UnitV,
+ V2UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 9;
+}
+
+def V2Write_10cyc_3V_3L_3S : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV,
+ V2UnitL, V2UnitL, V2UnitL,
+ V2UnitS, V2UnitS, V2UnitS]> {
+ let Latency = 10;
+ let NumMicroOps = 9;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 10 micro-op types
+
+def V2Write_9cyc_6L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
+ V2UnitL, V2UnitL, V2UnitV, V2UnitV,
+ V2UnitV, V2UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 10;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 12 micro-op types
+
+def V2Write_5cyc_4L01_8V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01]> {
+ let Latency = 5;
+ let NumMicroOps = 12;
+}
+
+def V2Write_9cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
+ V2UnitL, V2UnitV, V2UnitV,
+ V2UnitV, V2UnitV, V2UnitV,
+ V2UnitV, V2UnitV, V2UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 12;
+}
+
+def V2Write_10cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
+ V2UnitL, V2UnitV, V2UnitV,
+ V2UnitV, V2UnitV, V2UnitV,
+ V2UnitV, V2UnitV, V2UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 12;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 16 micro-op types
+
+def V2Write_7cyc_4L01_12V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01]> {
+ let Latency = 7;
+ let NumMicroOps = 16;
+}
+
+def V2Write_10cyc_4L_8V_4S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
+ V2UnitL, V2UnitV, V2UnitV,
+ V2UnitV, V2UnitV, V2UnitV,
+ V2UnitV, V2UnitV, V2UnitV,
+ V2UnitS, V2UnitS, V2UnitS,
+ V2UnitS]> {
+ let Latency = 10;
+ let NumMicroOps = 16;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 18 micro-op types
+
+def V2Write_7cyc_9L01_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01]> {
+ let Latency = 7;
+ let NumMicroOps = 18;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 27 micro-op types
+
+def V2Write_7cyc_9L01_9S_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitS, V2UnitS, V2UnitS,
+ V2UnitS, V2UnitS, V2UnitS,
+ V2UnitS, V2UnitS, V2UnitS,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01,
+ V2UnitV01]> {
+ let Latency = 7;
+ let NumMicroOps = 27;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 36 micro-op types
+
+def V2Write_11cyc_18L01_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01, V2UnitL01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01,
+ V2UnitV01]> {
+ let Latency = 11;
+ let NumMicroOps = 36;
+}
+
+//===----------------------------------------------------------------------===//
+// Define generic 54 micro-op types
+
+def V2Write_11cyc_18L01_18S_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01,
+ V2UnitL01, V2UnitL01,
+ V2UnitS, V2UnitS, V2UnitS,
+ V2UnitS, V2UnitS, V2UnitS,
+ V2UnitS, V2UnitS, V2UnitS,
+ V2UnitS, V2UnitS, V2UnitS,
+ V2UnitS, V2UnitS, V2UnitS,
+ V2UnitS, V2UnitS, V2UnitS,
+ V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01,
+ V2UnitV01, V2UnitV01]> {
+ let Latency = 11;
+ let NumMicroOps = 54;
+}
+
+//===----------------------------------------------------------------------===//
+// Define predicate-controlled types
+
+def V2Write_ArithI : SchedWriteVariant<[
+ SchedVar<IsCheapLSL, [V2Write_1cyc_1I]>,
+ SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
+
+def V2Write_ArithF : SchedWriteVariant<[
+ SchedVar<IsCheapLSL, [V2Write_1cyc_1F]>,
+ SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
+
+def V2Write_Logical : SchedWriteVariant<[
+ SchedVar<NeoverseNoLSL, [V2Write_1cyc_1F]>,
+ SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
+
+def V2Write_Extr : SchedWriteVariant<[
+ SchedVar<IsRORImmIdiomPred, [V2Write_1cyc_1I]>,
+ SchedVar<NoSchedPred, [V2Write_3cyc_1I_1M]>]>;
+
+def V2Write_LdrHQ : SchedWriteVariant<[
+ SchedVar<NeoverseHQForm, [V2Write_7cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [V2Write_6cyc_1L]>]>;
+
+def V2Write_StrHQ : SchedWriteVariant<[
+ SchedVar<NeoverseHQForm, [V2Write_2cyc_1L01_1V01_1I]>,
+ SchedVar<NoSchedPred, [V2Write_2cyc_1L01_1V01]>]>;
+
+def V2Write_2or3cyc_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V2Write_3cyc_1M]>,
+ SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
+
+def V2Write_3or4cyc_2M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V2Write_4cyc_2M]>,
+ SchedVar<NoSchedPred, [V2Write_3cyc_2M]>]>;
+
+def V2Write_1or2cyc_1M0 : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V2Write_2cyc_1M0]>,
+ SchedVar<NoSchedPred, [V2Write_1cyc_1M0]>]>;
+
+def V2Write_2or3cyc_1M0 : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V2Write_3cyc_1M0]>,
+ SchedVar<NoSchedPred, [V2Write_2cyc_1M0]>]>;
+
+def V2Write_1or2cyc_1M0_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V2Write_2cyc_1M0_1M]>,
+ SchedVar<NoSchedPred, [V2Write_1cyc_1M0_1M]>]>;
+
+def V2Write_3or4cyc_1M0_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V2Write_4cyc_1M0_1M]>,
+ SchedVar<NoSchedPred, [V2Write_3cyc_1M0_1M]>]>;
+
+def V2Write_4or5cyc_2M0_2M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V2Write_5cyc_2M0_2M]>,
+ SchedVar<NoSchedPred, [V2Write_4cyc_2M0_2M]>]>;
+
+def V2Write_4or5cyc_1V0_1M0 : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V2Write_5cyc_1V0_1M0]>,
+ SchedVar<NoSchedPred, [V2Write_4cyc_1V0_1M0]>]>;
+
+def V2Write_2or3cyc_1V0_1M : SchedWriteVariant<[
+ SchedVar<NeoversePdIsPg, [V2Write_3cyc_1V0_1M]>,
+ SchedVar<NoSchedPred, [V2Write_2cyc_1V0_1M]>]>;
+
+def V2Write_IncDec : SchedWriteVariant<[
+ SchedVar<NeoverseCheapIncDec, [V2Write_1cyc_1F]>,
+ SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
+
+//===----------------------------------------------------------------------===//
+// Define forwarded types
+
+// NOTE: SOG, p. 16, n. 2: Accumulator forwarding is not supported for
+// consumers of 64 bit multiply high operations?
+def V2Wr_IM : SchedWriteRes<[V2UnitM]> { let Latency = 2; }
+def V2Wr_IMA : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
+def V2Wr_IMUL : SchedWriteVariant<[
+ SchedVar<IsReg3ZeroPred, [V2Wr_IM]>,
+ SchedVar<NoSchedPred, [V2Wr_IMA]>]>;
+def V2Rd_IMA : SchedReadAdvance<1, [V2Wr_IMA]>;
+
+def V2Wr_FMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
+def V2Rd_FMA : SchedReadAdvance<2, [WriteFMul, V2Wr_FMA]>;
+
+def V2Wr_VA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
+def V2Rd_VA : SchedReadAdvance<3, [V2Wr_VA]>;
+
+def V2Wr_VDOT : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
+def V2Rd_VDOT : SchedReadAdvance<2, [V2Wr_VDOT]>;
+
+def V2Wr_VMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
+def V2Rd_VMMA : SchedReadAdvance<2, [V2Wr_VMMA]>;
+
+def V2Wr_VMA : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
+def V2Rd_VMA : SchedReadAdvance<3, [V2Wr_VMA]>;
+
+def V2Wr_VMAH : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
+def V2Rd_VMAH : SchedReadAdvance<2, [V2Wr_VMAH]>;
+
+def V2Wr_VMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
+def V2Rd_VMAL : SchedReadAdvance<3, [V2Wr_VMAL]>;
+
+def V2Wr_VPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
+def V2Rd_VPA : SchedReadAdvance<3, [V2Wr_VPA]>;
+
+def V2Wr_VSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
+def V2Rd_VSA : SchedReadAdvance<3, [V2Wr_VSA]>;
+
+def V2Wr_VFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
+def V2Rd_VFCMA : SchedReadAdvance<2, [V2Wr_VFCMA]>;
+
+def V2Wr_VFM : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
+def V2Wr_VFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
+def V2Rd_VFMA : SchedReadAdvance<2, [V2Wr_VFM, V2Wr_VFMA]>;
+
+def V2Wr_VFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
+def V2Rd_VFMAL : SchedReadAdvance<2, [V2Wr_VFMAL]>;
+
+def V2Wr_VBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
+def V2Rd_VBFDOT : SchedReadAdvance<2, [V2Wr_VBFDOT]>;
+def V2Wr_VBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
+def V2Rd_VBFMMA : SchedReadAdvance<2, [V2Wr_VBFMMA]>;
+def V2Wr_VBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
+def V2Rd_VBFMAL : SchedReadAdvance<3, [V2Wr_VBFMAL]>;
+
+def V2Wr_CRC : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
+def V2Rd_CRC : SchedReadAdvance<1, [V2Wr_CRC]>;
+
+def V2Wr_ZA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
+def V2Rd_ZA : SchedReadAdvance<3, [V2Wr_ZA]>;
+def V2Wr_ZPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
+def V2Rd_ZPA : SchedReadAdvance<3, [V2Wr_ZPA]>;
+def V2Wr_ZSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
+def V2Rd_ZSA : SchedReadAdvance<3, [V2Wr_ZSA]>;
+
+def V2Wr_ZDOTB : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
+def V2Rd_ZDOTB : SchedReadAdvance<2, [V2Wr_ZDOTB]>;
+def V2Wr_ZDOTH : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
+def V2Rd_ZDOTH : SchedReadAdvance<3, [V2Wr_ZDOTH]>;
+
+// NOTE: SOG p. 43: Complex multiply-add B, H, S element size: How to reduce
+// throughput to 1 in case of forwarding?
+def V2Wr_ZCMABHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
+def V2Rd_ZCMABHS : SchedReadAdvance<3, [V2Wr_ZCMABHS]>;
+def V2Wr_ZCMAD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
+def V2Rd_ZCMAD : SchedReadAdvance<2, [V2Wr_ZCMAD]>;
+
+def V2Wr_ZMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
+def V2Rd_ZMMA : SchedReadAdvance<2, [V2Wr_ZMMA]>;
+
+def V2Wr_ZMABHS : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
+def V2Rd_ZMABHS : SchedReadAdvance<3, [V2Wr_ZMABHS]>;
+def V2Wr_ZMAD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
+def V2Rd_ZMAD : SchedReadAdvance<2, [V2Wr_ZMAD]>;
+
+def V2Wr_ZMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
+def V2Rd_ZMAL : SchedReadAdvance<3, [V2Wr_ZMAL]>;
+
+def V2Wr_ZMASQL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
+def V2Wr_ZMASQBHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
+def V2Wr_ZMASQD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
+def V2Rd_ZMASQ : SchedReadAdvance<2, [V2Wr_ZMASQL, V2Wr_ZMASQBHS,
+ V2Wr_ZMASQD]>;
+
+def V2Wr_ZFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
+def V2Rd_ZFCMA : SchedReadAdvance<3, [V2Wr_ZFCMA]>;
+
+def V2Wr_ZFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
+def V2Rd_ZFMA : SchedReadAdvance<2, [V2Wr_ZFMA]>;
+
+def V2Wr_ZFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
+def V2Rd_ZFMAL : SchedReadAdvance<2, [V2Wr_ZFMAL]>;
+
+def V2Wr_ZBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
+def V2Rd_ZBFDOT : SchedReadAdvance<2, [V2Wr_ZBFDOT]>;
+def V2Wr_ZBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
+def V2Rd_ZBFMMA : SchedReadAdvance<2, [V2Wr_ZBFMMA]>;
+def V2Wr_ZBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
+def V2Rd_ZBFMAL : SchedReadAdvance<3, [V2Wr_ZBFMAL]>;
+
+//===----------------------------------------------------------------------===//
+// Define types with long resource cycles (rc)
+
+def V2Write_6cyc_1V1_5rc : SchedWriteRes<[V2UnitV1]> { let Latency = 6; let ResourceCycles = [ 5]; }
+def V2Write_7cyc_1V02_7rc : SchedWriteRes<[V2UnitV02]> { let Latency = 7; let ResourceCycles = [ 7]; }
+def V2Write_10cyc_1V02_5rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ResourceCycles = [ 5]; }
+def V2Write_10cyc_1V02_9rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ResourceCycles = [ 9]; }
+def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ResourceCycles = [10]; }
+def V2Write_10cyc_1V0_9rc : SchedWriteRes<[V2UnitV0]> { let Latency = 10; let ResourceCycles = [ 9]; }
+def V2Write_10cyc_1V1_9rc : SchedWriteRes<[V2UnitV1]> { let Latency = 10; let ResourceCycles = [ 9]; }
+def V2Write_13cyc_1V0_12rc : SchedWriteRes<[V2UnitV0]> { let Latency = 13; let ResourceCycles = [12]; }
+def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ResourceCycles = [12]; }
+def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ResourceCycles = [13]; }
+def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ResourceCycles = [14]; }
+def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ResourceCycles = [15]; }
+def V2Write_16cyc_1V0_14rc : SchedWriteRes<[V2UnitV0]> { let Latency = 16; let ResourceCycles = [14]; }
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[WriteI], (instrs COPY)>;
+
+// §3.3 Branch instructions
+// -----------------------------------------------------------------------------
+
+// Branch, immed
+// Compare and branch
+def : SchedAlias<WriteBr, V2Write_1cyc_1B>;
+
+// Branch, register
+def : SchedAlias<WriteBrReg, V2Write_1cyc_1B>;
+
+// Branch and link, immed
+// Branch and link, register
+def : InstRW<[V2Write_1cyc_1B_1R], (instrs BL, BLR)>;
+
+// §3.4 Arithmetic and Logical Instructions
+// -----------------------------------------------------------------------------
+
+// ALU, basic
+// ALU, basic, flagset
+def : SchedAlias<WriteI, V2Write_1cyc_1I>;
+def : InstRW<[V2Write_1cyc_1F],
+ (instregex "^(ADC|SBC)S[WX]r$")>;
+
+// ALU, extend and shift
+def : SchedAlias<WriteIEReg, V2Write_2cyc_1M>;
+
+// Arithmetic, LSL shift, shift <= 4
+// Arithmetic, flagset, LSL shift, shift <= 4
+// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
+def : SchedAlias<WriteISReg, V2Write_ArithI>;
+def : InstRW<[V2Write_ArithF],
+ (instregex "^(ADD|SUB)S[WX]rs$")>;
+
+// Arithmetic, immediate to logical address tag
+def : InstRW<[V2Write_2cyc_1M], (instrs ADDG, SUBG)>;
+
+// Convert floating-point condition flags
+// Flag manipulation instructions
+def : WriteRes<WriteSys, []> { let Latency = 1; }
+
+// Insert Random Tags
+def : InstRW<[V2Write_2cyc_1M], (instrs IRG, IRGstack)>;
+
+// Insert Tag Mask
+// Subtract Pointer
+// Subtract Pointer, flagset
+def : InstRW<[V2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
+
+// Logical, shift, no flagset
+def : InstRW<[V2Write_1cyc_1I],
+ (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
+
+// Logical, shift, flagset
+def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
+
+// Move and shift instructions
+// -----------------------------------------------------------------------------
+
+def : SchedAlias<WriteImm, V2Write_1cyc_1I>;
+
+// §3.5 Divide and multiply instructions
+// -----------------------------------------------------------------------------
+
+// SDIV, UDIV
+def : SchedAlias<WriteID32, V2Write_12cyc_1M0>;
+def : SchedAlias<WriteID64, V2Write_20cyc_1M0>;
+
+def : SchedAlias<WriteIM32, V2Write_2cyc_1M>;
+def : SchedAlias<WriteIM64, V2Write_2cyc_1M>;
+
+// Multiply
+// Multiply accumulate, W-form
+// Multiply accumulate, X-form
+def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
+ (instregex "^M(ADD|SUB)[WX]rrr$")>;
+
+// Multiply accumulate long
+// Multiply long
+def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
+ (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
+
+// Multiply high
+def : InstRW<[V2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
+
+// Pointer Authentication Instructions (v8.3 PAC)
+// -----------------------------------------------------------------------------
+
+// Authenticate data address
+// Authenticate instruction address
+// Compute pointer authentication code for data address
+// Compute pointer authentication code, using generic key
+// Compute pointer authentication code for instruction address
+def : InstRW<[V2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
+
+// Branch and link, register, with pointer authentication
+// Branch, register, with pointer authentication
+// Branch, return, with pointer authentication
+def : InstRW<[V2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
+ BRAAZ, BRAB, BRABZ, RETAA, RETAB,
+ ERETAA, ERETAB)>;
+
+
+// Load register, with pointer authentication
+def : InstRW<[V2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
+
+// Strip pointer authentication code
+def : InstRW<[V2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
+
+// Miscellaneous data-processing instructions
+// -----------------------------------------------------------------------------
+
+// Address generation
+def : InstRW<[V2Write_1cyc_1F], (instrs ADR, ADRP)>;
+
+// Bitfield extract, one reg
+// Bitfield extract, two regs
+def : SchedAlias<WriteExtr, V2Write_Extr>;
+def : InstRW<[V2Write_Extr], (instrs EXTRWrri, EXTRXrri)>;
+
+// Bitfield move, basic
+def : SchedAlias<WriteIS, V2Write_1cyc_1I>;
+
+// Bitfield move, insert
+def : InstRW<[V2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
+
+// Load instructions
+// -----------------------------------------------------------------------------
+
+// NOTE: SOG p. 19: Throughput of LDN?P X-form should be 2, but reported as 3.
+
+def : SchedAlias<WriteLD, V2Write_4cyc_1L>;
+def : SchedAlias<WriteLDIdx, V2Write_4cyc_1L>;
+
+// Load register, literal
+def : InstRW<[V2Write_5cyc_1L_1F], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>;
+
+// Load pair, signed immed offset, signed words
+def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi], (instrs LDPSWi)>;
+
+// Load pair, immed post-index or immed pre-index, signed words
+def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi, WriteAdr],
+ (instregex "^LDPSW(post|pre)$")>;
+
+// Store instructions
+// -----------------------------------------------------------------------------
+
+// NOTE: SOG, p. 20: Unsure if STRH uses pipeline I.
+
+def : SchedAlias<WriteST, V2Write_1cyc_1L01_1D>;
+def : SchedAlias<WriteSTIdx, V2Write_1cyc_1L01_1D>;
+def : SchedAlias<WriteSTP, V2Write_1cyc_1L01_1D>;
+def : SchedAlias<WriteAdr, V2Write_1cyc_1I>; // copied from A57.
+
+// Tag load instructions
+// -----------------------------------------------------------------------------
+
+// Load allocation tag
+// Load multiple allocation tags
+def : InstRW<[V2Write_4cyc_1L], (instrs LDG, LDGM)>;
+
+// Tag store instructions
+// -----------------------------------------------------------------------------
+
+// Store allocation tags to one or two granules, post-index
+// Store allocation tags to one or two granules, pre-index
+// Store allocation tag to one or two granules, zeroing, post-index
+// Store Allocation Tag to one or two granules, zeroing, pre-index
+// Store allocation tag and reg pair to memory, post-Index
+// Store allocation tag and reg pair to memory, pre-Index
+def : InstRW<[V2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
+ ST2GPreIndex, ST2GPostIndex,
+ STZGPreIndex, STZGPostIndex,
+ STZ2GPreIndex, STZ2GPostIndex,
+ STGPpre, STGPpost)>;
+
+// Store allocation tags to one or two granules, signed offset
+// Store allocation tag to two granules, zeroing, signed offset
+// Store allocation tag and reg pair to memory, signed offset
+// Store multiple allocation tags
+def : InstRW<[V2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi,
+ STZ2Gi, STGPi, STGM, STZGM)>;
+
+// FP data processing instructions
+// -----------------------------------------------------------------------------
+
+// FP absolute value
+// FP arithmetic
+// FP min/max
+// FP negate
+// FP select
+def : SchedAlias<WriteF, V2Write_2cyc_1V>;
+
+// FP compare
+def : SchedAlias<WriteFCmp, V2Write_2cyc_1V0>;
+
+// FP divide, square root
+def : SchedAlias<WriteFDiv, V2Write_7cyc_1V02>;
+
+// FP divide, H-form
+def : InstRW<[V2Write_7cyc_1V02], (instrs FDIVHrr)>;
+// FP divide, S-form
+def : InstRW<[V2Write_10cyc_1V02], (instrs FDIVSrr)>;
+// FP divide, D-form
+def : InstRW<[V2Write_15cyc_1V02], (instrs FDIVDrr)>;
+
+// FP square root, H-form
+def : InstRW<[V2Write_7cyc_1V02], (instrs FSQRTHr)>;
+// FP square root, S-form
+def : InstRW<[V2Write_9cyc_1V02], (instrs FSQRTSr)>;
+// FP square root, D-form
+def : InstRW<[V2Write_16cyc_1V02], (instrs FSQRTDr)>;
+
+// FP multiply
+def : WriteRes<WriteFMul, [V2UnitV]> { let Latency = 3; }
+
+// FP multiply accumulate
+def : InstRW<[V2Wr_FMA, ReadDefault, ReadDefault, V2Rd_FMA],
+ (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
+
+// FP round to integral
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
+ "^FRINT(32|64)[XZ][SD]r$")>;
+
+// FP miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// FP convert, from gen to vec reg
+def : InstRW<[V2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
+
+// FP convert, from vec to gen reg
+def : InstRW<[V2Write_3cyc_1V01],
+ (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri?$")>;
+
+// FP convert, Javascript from vec to gen reg
+def : SchedAlias<WriteFCvt, V2Write_3cyc_1V0>;
+
+// FP convert, from vec to vec reg
+def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr,
+ FCVTHDr, FCVTSDr, FCVTXNv1i64)>;
+
+// FP move, immed
+// FP move, register
+def : SchedAlias<WriteFImm, V2Write_2cyc_1V>;
+
+// FP transfer, from gen to low half of vec reg
+def : InstRW<[V2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
+
+// FP transfer, from gen to high half of vec reg
+def : InstRW<[V2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
+
+// FP transfer, from vec to gen reg
+def : SchedAlias<WriteFCopy, V2Write_2cyc_2V01>;
+
+// FP load instructions
+// -----------------------------------------------------------------------------
+
+// Load vector reg, literal, S/D/Q forms
+def : InstRW<[V2Write_7cyc_1F_1L], (instregex "^LDR[SDQ]l$")>;
+
+// Load vector reg, unscaled immed
+def : InstRW<[V2Write_6cyc_1L], (instregex "^LDUR[BHSDQ]i$")>;
+
+// Load vector reg, immed post-index
+// Load vector reg, immed pre-index
+def : InstRW<[V2Write_6cyc_1I_1L, WriteAdr],
+ (instregex "^LDR[BHSDQ](pre|post)$")>;
+
+// Load vector reg, unsigned immed
+def : InstRW<[V2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
+
+// Load vector reg, register offset, basic
+// Load vector reg, register offset, scale, S/D-form
+// Load vector reg, register offset, scale, H/Q-form
+// Load vector reg, register offset, extend
+// Load vector reg, register offset, extend, scale, S/D-form
+// Load vector reg, register offset, extend, scale, H/Q-form
+def : InstRW<[V2Write_LdrHQ, ReadAdrBase], (instregex "^LDR[BHSDQ]ro[WX]$")>;
+
+// Load vector pair, immed offset, S/D-form
+def : InstRW<[V2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
+
+// Load vector pair, immed offset, Q-form
+def : InstRW<[V2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
+
+// Load vector pair, immed post-index, S/D-form
+// Load vector pair, immed pre-index, S/D-form
+def : InstRW<[V2Write_6cyc_1I_1L, WriteLDHi, WriteAdr],
+ (instregex "^LDP[SD](pre|post)$")>;
+
+// Load vector pair, immed post-index, Q-form
+// Load vector pair, immed pre-index, Q-form
+def : InstRW<[V2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost,
+ LDPQpre)>;
+
+// FP store instructions
+// -----------------------------------------------------------------------------
+
+// Store vector reg, unscaled immed, B/H/S/D-form
+// Store vector reg, unscaled immed, Q-form
+def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
+
+// Store vector reg, immed post-index, B/H/S/D-form
+// Store vector reg, immed post-index, Q-form
+// Store vector reg, immed pre-index, B/H/S/D-form
+// Store vector reg, immed pre-index, Q-form
+def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I],
+ (instregex "^STR[BHSDQ](pre|post)$")>;
+
+// Store vector reg, unsigned immed, B/H/S/D-form
+// Store vector reg, unsigned immed, Q-form
+def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>;
+
+// Store vector reg, register offset, basic, B/H/S/D-form
+// Store vector reg, register offset, basic, Q-form
+// Store vector reg, register offset, scale, H-form
+// Store vector reg, register offset, scale, S/D-form
+// Store vector reg, register offset, scale, Q-form
+// Store vector reg, register offset, extend, B/H/S/D-form
+// Store vector reg, register offset, extend, Q-form
+// Store vector reg, register offset, extend, scale, H-form
+// Store vector reg, register offset, extend, scale, S/D-form
+// Store vector reg, register offset, extend, scale, Q-form
+def : InstRW<[V2Write_StrHQ, ReadAdrBase],
+ (instregex "^STR[BHSDQ]ro[WX]$")>;
+
+// Store vector pair, immed offset, S-form
+// Store vector pair, immed offset, D-form
+def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STN?P[SD]i$")>;
+
+// Store vector pair, immed offset, Q-form
+def : InstRW<[V2Write_2cyc_1L01_2V01], (instrs STPQi, STNPQi)>;
+
+// Store vector pair, immed post-index, S-form
+// Store vector pair, immed post-index, D-form
+// Store vector pair, immed pre-index, S-form
+// Store vector pair, immed pre-index, D-form
+def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I],
+ (instregex "^STP[SD](pre|post)$")>;
+
+// Store vector pair, immed post-index, Q-form
+def : InstRW<[V2Write_2cyc_1L01_2V01_1I], (instrs STPQpost)>;
+
+// Store vector pair, immed pre-index, Q-form
+def : InstRW<[V2Write_2cyc_1L01_2V01_2I], (instrs STPQpre)>;
+
+// ASIMD integer instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD absolute diff
+// ASIMD absolute diff long
+// ASIMD arith, basic
+// ASIMD arith, complex
+// ASIMD arith, pair-wise
+// ASIMD compare
+// ASIMD logical
+// ASIMD max/min, basic and pair-wise
+def : SchedAlias<WriteVd, V2Write_2cyc_1V>;
+def : SchedAlias<WriteVq, V2Write_2cyc_1V>;
+
+// ASIMD absolute diff accum
+// ASIMD absolute diff accum long
+def : InstRW<[V2Wr_VA, V2Rd_VA], (instregex "^[SU]ABAL?v")>;
+
+// ASIMD arith, reduce, 4H/4S
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
+
+// ASIMD arith, reduce, 8B/8H
+def : InstRW<[V2Write_4cyc_1V13_1V],
+ (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
+
+// ASIMD arith, reduce, 16B
+def : InstRW<[V2Write_4cyc_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
+
+// ASIMD dot product
+// ASIMD dot product using signed and unsigned integers
+def : InstRW<[V2Wr_VDOT, V2Rd_VDOT],
+ (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
+
+// ASIMD matrix multiply-accumulate
+def : InstRW<[V2Wr_VMMA, V2Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
+
+// ASIMD max/min, reduce, 4H/4S
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
+ "^[SU](MAX|MIN)Vv4i32v$")>;
+
+// ASIMD max/min, reduce, 8B/8H
+def : InstRW<[V2Write_4cyc_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
+ "^[SU](MAX|MIN)Vv8i16v$")>;
+
+// ASIMD max/min, reduce, 16B
+def : InstRW<[V2Write_4cyc_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
+
+// ASIMD multiply
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;
+
+// ASIMD multiply accumulate
+def : InstRW<[V2Wr_VMA, V2Rd_VMA], (instregex "^MLAv", "^MLSv")>;
+
+// ASIMD multiply accumulate high
+def : InstRW<[V2Wr_VMAH, V2Rd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
+
+// ASIMD multiply accumulate long
+def : InstRW<[V2Wr_VMAL, V2Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
+
+// ASIMD multiply accumulate saturating long
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDML[AS]L[iv]")>;
+
+// ASIMD multiply/multiply long (8x8) polynomial, D-form
+// ASIMD multiply/multiply long (8x8) polynomial, Q-form
+def : InstRW<[V2Write_3cyc_1V23], (instregex "^PMULL?(v8i8|v16i8)$")>;
+
+// ASIMD multiply long
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]MULLv", "^SQDMULL[iv]")>;
+
+// ASIMD pairwise add and accumulate long
+def : InstRW<[V2Wr_VPA, V2Rd_VPA], (instregex "^[SU]ADALPv")>;
+
+// ASIMD shift accumulate
+def : InstRW<[V2Wr_VSA, V2Rd_VSA], (instregex "^[SU]SRA[dv]", "^[SU]RSRA[dv]")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHL[dv]", "^SHLLv", "^SHRNv",
+ "^SSHLLv", "^SSHR[dv]", "^USHLLv",
+ "^USHR[dv]")>;
+
+// ASIMD shift by immed and insert, basic
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^SLI[dv]", "^SRI[dv]")>;
+
+// ASIMD shift by immed, complex
+def : InstRW<[V2Write_4cyc_1V13],
+ (instregex "^RSHRNv", "^SQRSHRU?N[bhsv]", "^(SQSHLU?|UQSHL)[bhsd]$",
+ "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
+ "^SQSHRU?N[bhsv]", "^SRSHR[dv]", "^UQRSHRN[bhsv]",
+ "^UQSHRN[bhsv]", "^URSHR[dv]")>;
+
+// ASIMD shift by register, basic
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]SHLv")>;
+
+// ASIMD shift by register, complex
+def : InstRW<[V2Write_4cyc_1V13],
+ (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
+ "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
+
+// ASIMD floating-point instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD FP absolute value/difference
+// ASIMD FP arith, normal
+// ASIMD FP compare
+// ASIMD FP complex add
+// ASIMD FP max/min, normal
+// ASIMD FP max/min, pairwise
+// ASIMD FP negate
+// Handled by SchedAlias<WriteV[dq], ...>
+
+// ASIMD FP complex multiply add
+def : InstRW<[V2Wr_VFCMA, V2Rd_VFCMA], (instregex "^FCMLAv")>;
+
+// ASIMD FP convert, long (F16 to F32)
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTL(v4|v8)i16")>;
+
+// ASIMD FP convert, long (F32 to F64)
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTL(v2|v4)i32")>;
+
+// ASIMD FP convert, narrow (F32 to F16)
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTN(v4|v8)i16")>;
+
+// ASIMD FP convert, narrow (F64 to F32)
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTN(v2|v4)i32",
+ "^FCVTXN(v2|v4)f32")>;
+
+// ASIMD FP convert, other, D-form F32 and Q-form F64
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
+ "^FCVT[AMNPZ][SU]v1i64$",
+ "^FCVTZ[SU]d$",
+ "^[SU]CVTFv2f(32|64)$",
+ "^[SU]CVTFv1i64$",
+ "^[SU]CVTFd$")>;
+
+// ASIMD FP convert, other, D-form F16 and Q-form F32
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
+ "^FCVT[AMNPZ][SU]v1i32$",
+ "^FCVTZ[SU]s$",
+ "^[SU]CVTFv4f(16|32)$",
+ "^[SU]CVTFv1i32$",
+ "^[SU]CVTFs$")>;
+
+// ASIMD FP convert, other, Q-form F16
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
+ "^FCVT[AMNPZ][SU]v1f16$",
+ "^FCVTZ[SU]h$",
+ "^[SU]CVTFv8f16$",
+ "^[SU]CVTFv1i16$",
+ "^[SU]CVTFh$")>;
+
+// ASIMD FP divide, D-form, F16
+def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FDIVv4f16)>;
+
+// ASIMD FP divide, D-form, F32
+def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FDIVv2f32)>;
+
+// ASIMD FP divide, Q-form, F16
+def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FDIVv8f16)>;
+
+// ASIMD FP divide, Q-form, F32
+def : InstRW<[V2Write_10cyc_1V02_10rc], (instrs FDIVv4f32)>;
+
+// ASIMD FP divide, Q-form, F64
+def : InstRW<[V2Write_15cyc_1V02_14rc], (instrs FDIVv2f64)>;
+
+// ASIMD FP max/min, reduce, F32 and D-form F16
+def : InstRW<[V2Write_4cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
+
+// ASIMD FP max/min, reduce, Q-form F16
+def : InstRW<[V2Write_6cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
+
+// ASIMD FP multiply
+def : InstRW<[V2Wr_VFM], (instregex "^FMULv", "^FMULXv")>;
+
+// ASIMD FP multiply accumulate
+def : InstRW<[V2Wr_VFMA, V2Rd_VFMA], (instregex "^FMLAv", "^FMLSv")>;
+
+// ASIMD FP multiply accumulate long
+def : InstRW<[V2Wr_VFMAL, V2Rd_VFMAL], (instregex "^FML[AS]L2?(lane)?v")>;
+
+// ASIMD FP round, D-form F32 and Q-form F64
+def : InstRW<[V2Write_3cyc_1V02],
+ (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
+ "^FRINT(32|64)[XZ]v2f(32|64)$")>;
+
+// ASIMD FP round, D-form F16 and Q-form F32
+def : InstRW<[V2Write_4cyc_2V02],
+ (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
+ "^FRINT(32|64)[XZ]v4f32$")>;
+
+// ASIMD FP round, Q-form F16
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
+
+// ASIMD FP square root, D-form, F16
+def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FSQRTv4f16)>;
+
+// ASIMD FP square root, D-form, F32
+def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FSQRTv2f32)>;
+
+// ASIMD FP square root, Q-form, F16
+def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FSQRTv8f16)>;
+
+// ASIMD FP square root, Q-form, F32
+def : InstRW<[V2Write_10cyc_1V02_9rc], (instrs FSQRTv4f32)>;
+
+// ASIMD FP square root, Q-form, F64
+def : InstRW<[V2Write_16cyc_1V02_15rc], (instrs FSQRTv2f64)>;
+
+// ASIMD BFloat16 (BF16) instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD convert, F32 to BF16
+def : InstRW<[V2Write_4cyc_2V02], (instrs BFCVTN, BFCVTN2)>;
+
+// ASIMD dot product
+def : InstRW<[V2Wr_VBFDOT, V2Rd_VBFDOT], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
+
+// ASIMD matrix multiply accumulate
+def : InstRW<[V2Wr_VBFMMA, V2Rd_VBFMMA], (instrs BFMMLA)>;
+
+// ASIMD multiply accumulate long
+def : InstRW<[V2Wr_VBFMAL, V2Rd_VBFMAL], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
+ BFMLALTIdx)>;
+
+// Scalar convert, F32 to BF16
+def : InstRW<[V2Write_3cyc_1V02], (instrs BFCVT)>;
+
+// ASIMD miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD bit reverse
+// ASIMD bitwise insert
+// ASIMD count
+// ASIMD duplicate, element
+// ASIMD extract
+// ASIMD extract narrow
+// ASIMD insert, element to element
+// ASIMD move, FP immed
+// ASIMD move, integer immed
+// ASIMD reverse
+// ASIMD table lookup extension, 1 table reg
+// ASIMD transpose
+// ASIMD unzip/zip
+// Handled by SchedAlias<WriteV[dq], ...>
+
+// ASIMD duplicate, gen reg
+def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
+
+// ASIMD extract narrow, saturating
+def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
+
+// ASIMD reciprocal and square root estimate, D-form U32
+def : InstRW<[V2Write_3cyc_1V02], (instrs URECPEv2i32, URSQRTEv2i32)>;
+
+// ASIMD reciprocal and square root estimate, Q-form U32
+def : InstRW<[V2Write_4cyc_2V02], (instrs URECPEv4i32, URSQRTEv4i32)>;
+
+// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
+def : InstRW<[V2Write_3cyc_1V02], (instrs FRECPEv1f16, FRECPEv1i32,
+ FRECPEv1i64, FRECPEv2f32,
+ FRSQRTEv1f16, FRSQRTEv1i32,
+ FRSQRTEv1i64, FRSQRTEv2f32)>;
+
+// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
+def : InstRW<[V2Write_4cyc_2V02], (instrs FRECPEv4f16, FRECPEv4f32,
+ FRSQRTEv4f16, FRSQRTEv4f32)>;
+
+// ASIMD reciprocal and square root estimate, Q-form F16
+def : InstRW<[V2Write_6cyc_4V02], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
+
+// ASIMD reciprocal exponent
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRECPXv")>;
+
+// ASIMD reciprocal step
+def : InstRW<[V2Write_4cyc_1V], (instregex "^FRECPS(32|64|v)",
+ "^FRSQRTS(32|64|v)")>;
+
+// ASIMD table lookup, 1 or 2 table regs
+def : InstRW<[V2Write_2cyc_1V01], (instrs TBLv8i8One, TBLv16i8One,
+ TBLv8i8Two, TBLv16i8Two)>;
+
+// ASIMD table lookup, 3 table regs
+def : InstRW<[V2Write_4cyc_2V01], (instrs TBLv8i8Three, TBLv16i8Three)>;
+
+// ASIMD table lookup, 4 table regs
+def : InstRW<[V2Write_4cyc_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
+
+// ASIMD table lookup extension, 2 table reg
+def : InstRW<[V2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
+
+// ASIMD table lookup extension, 3 table reg
+def : InstRW<[V2Write_6cyc_3V], (instrs TBXv8i8Three, TBXv16i8Three)>;
+
+// ASIMD table lookup extension, 4 table reg
+def : InstRW<[V2Write_6cyc_5V], (instrs TBXv8i8Four, TBXv16i8Four)>;
+
+// ASIMD transfer, element to gen reg
+def : InstRW<[V2Write_2cyc_2V01], (instregex "^[SU]MOVv")>;
+
+// ASIMD transfer, gen reg to element
+def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
+
+// ASIMD load instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD load, 1 element, multiple, 1 reg, D-form
+def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[V2Write_6cyc_1L, WriteAdr],
+ (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_6cyc_1L, WriteAdr],
+ (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, D-form
+def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[V2Write_6cyc_2L, WriteAdr],
+ (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_6cyc_2L, WriteAdr],
+ (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, D-form
+def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[V2Write_6cyc_3L, WriteAdr],
+ (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_6cyc_3L, WriteAdr],
+ (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, D-form
+def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[V2Write_7cyc_4L, WriteAdr],
+ (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_7cyc_4L, WriteAdr],
+ (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 1 element, one lane, B/H/S
+// ASIMD load, 1 element, one lane, D
+def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>;
+def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, D-form, B/H/S
+// ASIMD load, 1 element, all lanes, D-form, D
+def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 1 element, all lanes, Q-form
+def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, multiple, D-form, B/H/S
+def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>;
+def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 2 element, multiple, Q-form, B/H/S
+// ASIMD load, 2 element, multiple, Q-form, D
+def : InstRW<[V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 2 element, one lane, B/H
+// ASIMD load, 2 element, one lane, S
+// ASIMD load, 2 element, one lane, D
+def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>;
+def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, D-form, B/H/S
+// ASIMD load, 2 element, all lanes, D-form, D
+def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 2 element, all lanes, Q-form
+def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, multiple, D-form, B/H/S
+def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>;
+def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 3 element, multiple, Q-form, B/H/S
+// ASIMD load, 3 element, multiple, Q-form, D
+def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 3 element, one lane, B/H
+// ASIMD load, 3 element, one lane, S
+// ASIMD load, 3 element, one lane, D
+def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>;
+def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, D-form, B/H/S
+// ASIMD load, 3 element, all lanes, D-form, D
+def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 3 element, all lanes, Q-form, B/H/S
+// ASIMD load, 3 element, all lanes, Q-form, D
+def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, multiple, D-form, B/H/S
+def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>;
+def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
+
+// ASIMD load, 4 element, multiple, Q-form, B/H/S
+// ASIMD load, 4 element, multiple, Q-form, D
+def : InstRW<[V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_9cyc_6L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD load, 4 element, one lane, B/H
+// ASIMD load, 4 element, one lane, S
+// ASIMD load, 4 element, one lane, D
+def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>;
+def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, D-form, B/H/S
+// ASIMD load, 4 element, all lanes, D-form, D
+def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>;
+def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD load, 4 element, all lanes, Q-form, B/H/S
+// ASIMD load, 4 element, all lanes, Q-form, D
+def : InstRW<[V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_8cyc_4L_4V, WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store instructions
+// -----------------------------------------------------------------------------
+
+// ASIMD store, 1 element, multiple, 1 reg, D-form
+def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)$")>;
+def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 1 reg, Q-form
+def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, D-form
+def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)$")>;
+def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 2 reg, Q-form
+def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, D-form
+def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)$")>;
+def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 3 reg, Q-form
+def : InstRW<[V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_2cyc_3L01_3V01, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, D-form
+def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
+def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
+
+// ASIMD store, 1 element, multiple, 4 reg, Q-form
+def : InstRW<[V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_2cyc_4L01_4V01, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 1 element, one lane, B/H/S
+// ASIMD store, 1 element, one lane, D
+def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)$")>;
+def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 2 element, multiple, D-form, B/H/S
+def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)$")>;
+def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 2 element, multiple, Q-form, B/H/S
+// ASIMD store, 2 element, multiple, Q-form, D
+def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_4cyc_2L01_4V01, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 2 element, one lane, B/H/S
+// ASIMD store, 2 element, one lane, D
+def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)$")>;
+def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 3 element, multiple, D-form, B/H/S
+def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)$")>;
+def : InstRW<[V2Write_5cyc_2L01_4V01, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 3 element, multiple, Q-form, B/H/S
+// ASIMD store, 3 element, multiple, Q-form, D
+def : InstRW<[V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)$")>;
+def : InstRW<[V2Write_6cyc_3L01_6V01, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
+
+// ASIMD store, 3 element, one lane, B/H
+// ASIMD store, 3 element, one lane, S
+// ASIMD store, 3 element, one lane, D
+def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)$")>;
+def : InstRW<[V2Write_5cyc_2L01_4V01, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
+
+// ASIMD store, 4 element, multiple, D-form, B/H/S
+def : InstRW<[V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)$")>;
+def : InstRW<[V2Write_6cyc_2L01_6V01, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
+
+// ASIMD store, 4 element, multiple, Q-form, B/H/S
+def : InstRW<[V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)$")>;
+def : InstRW<[V2Write_7cyc_4L01_12V01, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
+
+// ASIMD store, 4 element, multiple, Q-form, D
+def : InstRW<[V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)$")>;
+def : InstRW<[V2Write_5cyc_4L01_8V01, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
+
+// ASIMD store, 4 element, one lane, B/H/S
+def : InstRW<[V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)$")>;
+def : InstRW<[V2Write_6cyc_1L01_3V01, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>;
+
+// ASIMD store, 4 element, one lane, D
+def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)$")>;
+def : InstRW<[V2Write_4cyc_2L01_4V01, WriteAdr], (instregex "ST4i(64)_POST$")>;
+
+// Cryptography extensions
+// -----------------------------------------------------------------------------
+
+// Crypto AES ops
+def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
+
+// Crypto polynomial (64x64) multiply long
+def : InstRW<[V2Write_2cyc_1V], (instrs PMULLv1i64, PMULLv2i64)>;
+
+// Crypto SHA1 hash acceleration op
+// Crypto SHA1 schedule acceleration ops
+def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
+
+// Crypto SHA1 hash acceleration ops
+// Crypto SHA256 hash acceleration ops
+def : InstRW<[V2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
+
+// Crypto SHA256 schedule acceleration ops
+def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
+
+// Crypto SHA512 hash acceleration ops
+def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
+
+// Crypto SHA3 ops
+def : InstRW<[V2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
+
+// Crypto SM3 ops
+def : InstRW<[V2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
+ "^SM3TT[12][AB]$")>;
+
+// Crypto SM4 ops
+def : InstRW<[V2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
+
+// CRC
+// -----------------------------------------------------------------------------
+
+def : InstRW<[V2Wr_CRC, V2Rd_CRC], (instregex "^CRC32")>;
+
+// SVE Predicate instructions
+// -----------------------------------------------------------------------------
+
+// Loop control, based on predicate
+def : InstRW<[V2Write_2or3cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
+ BRKB_PPmP, BRKB_PPzP)>;
+
+// Loop control, based on predicate and flag setting
+def : InstRW<[V2Write_3or4cyc_2M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
+
+// Loop control, propagating
+def : InstRW<[V2Write_2or3cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP,
+ BRKPB_PPzPP)>;
+
+// Loop control, propagating and flag setting
+def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
+ BRKPBS_PPzPP)>;
+
+// Loop control, based on GPR
+def : InstRW<[V2Write_3cyc_2M],
+ (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
+def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
+
+// Loop terminate
+def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
+
+// Predicate counting scalar
+def : InstRW<[V2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
+def : InstRW<[V2Write_2cyc_1M],
+ (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI",
+ "^SQ(DEC|INC)[BHWD]_XPiWdI",
+ "^UQ(DEC|INC)[BHWD]_WPiI")>;
+
+// Predicate counting scalar, ALL, {1,2,4}
+def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>;
+
+// Predicate counting scalar, active predicate
+def : InstRW<[V2Write_2cyc_1M],
+ (instregex "^CNTP_XPP_[BHSD]",
+ "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
+ "^(UQDEC|UQINC)P_WP_[BHSD]",
+ "^(SQDEC|SQINC)P_XPWd_[BHSD]")>;
+
+// Predicate counting vector, active predicate
+def : InstRW<[V2Write_7cyc_1M_1M0_1V],
+ (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
+
+// Predicate logical
+def : InstRW<[V2Write_1or2cyc_1M0],
+ (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
+
+// Predicate logical, flag setting
+def : InstRW<[V2Write_1or2cyc_1M0_1M],
+ (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
+
+// Predicate reverse
+def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]")>;
+
+// Predicate select
+def : InstRW<[V2Write_1cyc_1M0], (instrs SEL_PPPP)>;
+
+// Predicate set
+def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
+
+// Predicate set/initialize, set flags
+def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]")>;
+
+// Predicate find first/next
+def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
+
+// Predicate test
+def : InstRW<[V2Write_1cyc_1M], (instrs PTEST_PP)>;
+
+// Predicate transpose
+def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]")>;
+
+// Predicate unpack and widen
+def : InstRW<[V2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
+
+// Predicate zip/unzip
+def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>;
+
+// SVE integer instructions
+// -----------------------------------------------------------------------------
+
+// Arithmetic, absolute diff
+def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
+ "^[SU]ABD_ZPZZ_[BHSD]")>;
+
+// Arithmetic, absolute diff accum
+def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
+
+// Arithmetic, absolute diff accum long
+def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
+
+// Arithmetic, absolute diff long
+def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
+
+// Arithmetic, basic
+def : InstRW<[V2Write_2cyc_1V],
+ (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^(ADD|SUB)_ZZZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
+ "^(ADD|SUB|SUBR)_ZI_[BHSD]",
+ "^ADR_[SU]XTW_ZZZ_D_[0123]",
+ "^ADR_LSL_ZZZ_[SD]_[0123]",
+ "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
+ "^SADDLBT_ZZZ_[HSD]",
+ "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
+
+// Arithmetic, complex
+def : InstRW<[V2Write_2cyc_1V],
+ (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
+ "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
+ "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
+ "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
+ "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
+ "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
+
+// Arithmetic, large integer
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
+
+// Arithmetic, pairwise add
+def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>;
+
+// Arithmetic, pairwise add and accum long
+def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA],
+ (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
+
+// Arithmetic, shift
+def : InstRW<[V2Write_2cyc_1V13],
+ (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
+ "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
+ "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
+ "^(ASR|LSL|LSR)_ZZI_[BHSD]",
+ "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
+ "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
+
+// Arithmetic, shift and accumulate
+def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>;
+
+// Arithmetic, shift by immediate
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]",
+ "^[SU]SHLL[BT]_ZZI_[HSD]")>;
+
+// Arithmetic, shift by immediate and insert
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>;
+
+// Arithmetic, shift complex
+def : InstRW<[V2Write_4cyc_1V13],
+ (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
+ "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
+ "^[SU]QR?SHL_ZPZZ_[BHSD]",
+ "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
+ "^SQSHRU?N[BT]_ZZI_[BHS]",
+ "^UQR?SHRN[BT]_ZZI_[BHS]")>;
+
+// Arithmetic, shift right for divide
+def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
+
+// Arithmetic, shift rounding
+def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
+ "^[SU]RSHL_ZPZZ_[BHSD]",
+ "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
+
+// Bit manipulation
+def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
+
+// Bitwise select
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
+
+// Count/reverse bits
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
+
+// Broadcast logical bitmask immediate to vector
+def : InstRW<[V2Write_2cyc_1V], (instrs DUPM_ZI)>;
+
+// Compare and set flags
+def : InstRW<[V2Write_4or5cyc_1V0_1M0],
+ (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
+ "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
+
+// Complex add
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>;
+
+// Complex dot product 8-bit element
+def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
+
+// Complex dot product 16-bit element
+def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
+
+// Complex multiply-add B, H, S element size
+def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]",
+ "^CMLA_ZZZI_[HS]")>;
+
+// Complex multiply-add D element size
+def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>;
+
+// Conditional extract operations, scalar form
+def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
+
+// Conditional extract operations, SIMD&FP scalar and vector forms
+def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
+ "^COMPACT_ZPZ_[SD]",
+ "^SPLICE_ZPZZ?_[BHSD]")>;
+
+// Convert to floating point, 64b to float or convert to double
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
+ "^[SU]CVTF_ZPmZ_StoD")>;
+
+// Convert to floating point, 32b to single or half
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
+
+// Convert to floating point, 16b to half
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
+
+// Copy, scalar
+def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>;
+
+// Copy, scalar SIMD&FP or imm
+def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]",
+ "^CPY_ZPzI_[BHSD]")>;
+
+// Divides, 32 bit
+def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
+ "^[SU]DIV_ZPZZ_S")>;
+
+// Divides, 64 bit
+def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
+ "^[SU]DIV_ZPZZ_D")>;
+
+// Dot product, 8 bit
+def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>;
+
+// Dot product, 8 bit, using signed and unsigned integers
+def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
+
+// Dot product, 16 bit
+def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>;
+
+// Duplicate, immediate and indexed form
+def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]",
+ "^DUP_ZZI_[BHSDQ]")>;
+
+// Duplicate, scalar form
+def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]")>;
+
+// Extend, sign or zero
+def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]",
+ "^[SU]XTH_ZPmZ_[SD]",
+ "^[SU]XTW_ZPmZ_[D]")>;
+
+// Extract
+def : InstRW<[V2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
+
+// Extract narrow saturating
+def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
+ "^SQXTUN[BT]_ZZ_[BHS]")>;
+
+// Extract/insert operation, SIMD and FP scalar form
+def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]",
+ "^INSR_ZV_[BHSD]")>;
+
+// Extract/insert operation, scalar
+def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]",
+ "^INSR_ZR_[BHSD]")>;
+
+// Histogram operations
+def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]",
+ "^HISTSEG_ZZZ")>;
+
+// Horizontal operations, B, H, S form, immediate operands only
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]")>;
+
+// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
+// operands only / immediate, scalar operands
+def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
+
+// Horizontal operations, D form, immediate operands only
+def : InstRW<[V2Write_5cyc_2V02], (instrs INDEX_II_D)>;
+
+// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
+// only / immediate, scalar operands
+def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>;
+
+// Logical
+def : InstRW<[V2Write_2cyc_1V],
+ (instregex "^(AND|EOR|ORR)_ZI",
+ "^(AND|BIC|EOR|ORR)_ZZZ",
+ "^EOR(BT|TB)_ZZZ_[BHSD]",
+ "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
+ "^NOT_ZPmZ_[BHSD]")>;
+
+// Max/min, basic and pairwise
+def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
+ "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
+ "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
+
+// Matching operations
+// FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the
+// latency for this instruction is 4 cycles.
+def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>;
+
+// Matrix multiply-accumulate
+def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
+
+// Move prefix
+def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
+ "^MOVPRFX_ZZ")>;
+
+// Multiply, B, H, S element size
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
+ "^MUL_ZPZZ_[BHS]",
+ "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
+ "^[SU]MULH_ZPZZ_[BHS]")>;
+
+// Multiply, D element size
+def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
+ "^MUL_ZPZZ_D",
+ "^[SU]MULH_(ZPmZ|ZZZ)_D",
+ "^[SU]MULH_ZPZZ_D")>;
+
+// Multiply long
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
+ "^[SU]MULL[BT]_ZZZ_[HSD]")>;
+
+// Multiply accumulate, B, H, S element size
+def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS],
+ (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>;
+def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS],
+ (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
+
+// Multiply accumulate, D element size
+def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD],
+ (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>;
+def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD],
+ (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
+
+// Multiply accumulate long
+def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
+ "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
+
+// Multiply accumulate saturating doubling long regular
+def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ],
+ (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]",
+ "^SQDML[AS]L[BT]_ZZZI_[SD]")>;
+
+// Multiply saturating doubling high, B, H, S element size
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]",
+ "^SQDMULH_ZZZI_[HS]")>;
+
+// Multiply saturating doubling high, D element size
+def : InstRW<[V2Write_5cyc_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
+
+// Multiply saturating doubling long
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
+ "^SQDMULL[BT]_ZZZI_[SD]")>;
+
+// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
+// element size
+def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
+ "^SQRDCMLAH_ZZZ_[BHS]",
+ "^SQRDML[AS]H_ZZZI_[HS]",
+ "^SQRDCMLAH_ZZZI_[HS]")>;
+
+// Multiply saturating rounding doubling regular/complex accumulate, D element
+// size
+def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D",
+ "^SQRDCMLAH_ZZZ_D")>;
+
+// Multiply saturating rounding doubling regular/complex, B, H, S element size
+def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]",
+ "^SQRDMULH_ZZZI_[HS]")>;
+
+// Multiply saturating rounding doubling regular/complex, D element size
+def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D")>;
+
+// Multiply/multiply long, (8x8) polynomial
+def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B",
+ "^PMULL[BT]_ZZZ_[HDQ]")>;
+
+// Predicate counting vector
+def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>;
+
+// Reciprocal estimate
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
+
+// Reduction, arithmetic, B form
+def : InstRW<[V2Write_9cyc_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
+
+// Reduction, arithmetic, H form
+def : InstRW<[V2Write_8cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
+
+// Reduction, arithmetic, S form
+def : InstRW<[V2Write_6cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
+
+// Reduction, arithmetic, D form
+def : InstRW<[V2Write_4cyc_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
+
+// Reduction, logical
+def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>;
+
+// Reverse, vector
+def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]",
+ "^REVB_ZPmZ_[HSD]",
+ "^REVH_ZPmZ_[SD]",
+ "^REVW_ZPmZ_D")>;
+
+// Select, vector form
+def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]")>;
+
+// Table lookup
+def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>;
+
+// Table lookup extension
+def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]")>;
+
+// Transpose, vector form
+def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
+
+// Unpack and extend
+def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
+
+// Zip/unzip
+def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
+
+// SVE floating-point instructions
+// -----------------------------------------------------------------------------
+
+// Floating point absolute value/difference
+def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
+ "^FABD_ZPZZ_[HSD]",
+ "^FABS_ZPmZ_[HSD]")>;
+
+// Floating point arithmetic
+def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
+ "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
+ "^FADDP_ZPmZZ_[HSD]",
+ "^FNEG_ZPmZ_[HSD]",
+ "^FSUBR_ZPm[IZ]_[HSD]",
+ "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
+
+// Floating point associative add, F16
+def : InstRW<[V2Write_10cyc_1V1_9rc], (instrs FADDA_VPZ_H)>;
+
+// Floating point associative add, F32
+def : InstRW<[V2Write_6cyc_1V1_5rc], (instrs FADDA_VPZ_S)>;
+
+// Floating point associative add, F64
+def : InstRW<[V2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
+
+// Floating point compare
+def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]",
+ "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
+ "^FCM(LE|LT)_PPzZ0_[HSD]",
+ "^FCMUO_PPzZZ_[HSD]")>;
+
+// Floating point complex add
+def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]")>;
+
+// Floating point complex multiply add
+def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
+def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>;
+
+// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
+ "^FCVTLT_ZPmZ_HtoS",
+ "^FCVTNT_ZPmZ_StoH")>;
+
+// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
+// or F64 to F16)
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
+ "^FCVTLT_ZPmZ_StoD",
+ "^FCVTNT_ZPmZ_DtoS")>;
+
+// Floating point convert, round to odd
+def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
+
+// Floating point base2 log, F16
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
+
+// Floating point base2 log, F32
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
+
+// Floating point base2 log, F64
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
+
+// Floating point convert to integer, F16
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
+
+// Floating point convert to integer, F32
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
+
+// Floating point convert to integer, F64
+def : InstRW<[V2Write_3cyc_1V02],
+ (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
+
+// Floating point copy
+def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]",
+ "^FDUP_ZI_[HSD]")>;
+
+// Floating point divide, F16
+def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
+
+// Floating point divide, F32
+def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
+
+// Floating point divide, F64
+def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
+
+// Floating point min/max pairwise
+def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
+
+// Floating point min/max
+def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
+ "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
+
+// Floating point multiply
+def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
+ "^FMULX_ZPZZ_[HSD]",
+ "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
+ "^FMUL_ZPZ[IZ]_[HSD]")>;
+
+// Floating point multiply accumulate
+def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA],
+ (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
+ "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
+def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA],
+ (instregex "^FML[AS]_ZZZI_[HSD]",
+ "^FN?ML[AS]_ZPZZZ_[HSD]")>;
+
+// Floating point multiply add/sub accumulate long
+def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
+
+// Floating point reciprocal estimate, F16
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
+
+// Floating point reciprocal estimate, F32
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
+
+// Floating point reciprocal estimate, F64
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
+
+// Floating point reciprocal step
+def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
+
+// Floating point reduction, F16
+def : InstRW<[V2Write_8cyc_4V],
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>;
+
+// Floating point reduction, F32
+def : InstRW<[V2Write_6cyc_3V],
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>;
+
+// Floating point reduction, F64
+def : InstRW<[V2Write_4cyc_2V],
+ (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>;
+
+// Floating point round to integral, F16
+def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
+
+// Floating point round to integral, F32
+def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
+
+// Floating point round to integral, F64
+def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
+
+// Floating point square root, F16
+def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>;
+
+// Floating point square root, F32
+def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>;
+
+// Floating point square root, F64
+def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>;
+
+// Floating point trigonometric exponentiation
+def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
+
+// Floating point trigonometric multiply add
+def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]")>;
+
+// Floating point trigonometric, miscellaneous
+def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>;
+
+// SVE BFloat16 (BF16) instructions
+// -----------------------------------------------------------------------------
+
+// Convert, F32 to BF16
+def : InstRW<[V2Write_4cyc_1V02], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
+
+// Dot product
+def : InstRW<[V2Wr_ZBFDOT, V2Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
+
+// Matrix multiply accumulate
+def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>;
+
+// Multiply accumulate long
+def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>;
+
+// SVE Load instructions
+// -----------------------------------------------------------------------------
+
+// Load vector
+def : InstRW<[V2Write_6cyc_1L], (instrs LDR_ZXI)>;
+
+// Load predicate
+def : InstRW<[V2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
+
+// Contiguous load, scalar + imm
+def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM_REAL$",
+ "^LD1S?B_[HSD]_IMM_REAL$",
+ "^LD1S?H_[SD]_IMM_REAL$",
+ "^LD1S?W_D_IMM_REAL$" )>;
+// Contiguous load, scalar + scalar
+def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]$",
+ "^LD1S?B_[HSD]$",
+ "^LD1S?H_[SD]$",
+ "^LD1S?W_D$" )>;
+
+// Contiguous load broadcast, scalar + imm
+def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
+ "^LD1RS?B_[HSD]_IMM$",
+ "^LD1RS?H_[SD]_IMM$",
+ "^LD1RW_D_IMM$",
+ "^LD1RSW_IMM$",
+ "^LD1RQ_[BHWD]_IMM$")>;
+
+// Contiguous load broadcast, scalar + scalar
+def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
+
+// Non temporal load, scalar + imm
+// Non temporal load, scalar + scalar
+def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>;
+
+// Non temporal gather load, vector + scalar 32-bit element size
+def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
+ "^LDNT1S[BH]_ZZR_S_REAL$")>;
+
+// Non temporal gather load, vector + scalar 64-bit element size
+def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
+def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
+
+// Contiguous first faulting load, scalar + scalar
+def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
+ "^LDFF1S?B_[HSD]_REAL$",
+ "^LDFF1S?H_[SD]_REAL$",
+ "^LDFF1S?W_D_REAL$")>;
+
+// Contiguous non faulting load, scalar + imm
+def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
+ "^LDNF1S?B_[HSD]_IMM_REAL$",
+ "^LDNF1S?H_[SD]_IMM_REAL$",
+ "^LDNF1S?W_D_IMM_REAL$")>;
+
+// Contiguous Load two structures to two vectors, scalar + imm
+def : InstRW<[V2Write_8cyc_2L_2V], (instregex "^LD2[BHWD]_IMM$")>;
+
+// Contiguous Load two structures to two vectors, scalar + scalar
+def : InstRW<[V2Write_9cyc_2L_2V_2S], (instregex "^LD2[BHWD]$")>;
+
+// Contiguous Load three structures to three vectors, scalar + imm
+def : InstRW<[V2Write_9cyc_3L_3V], (instregex "^LD3[BHWD]_IMM$")>;
+
+// Contiguous Load three structures to three vectors, scalar + scalar
+def : InstRW<[V2Write_10cyc_3V_3L_3S], (instregex "^LD3[BHWD]$")>;
+
+// Contiguous Load four structures to four vectors, scalar + imm
+def : InstRW<[V2Write_9cyc_4L_8V], (instregex "^LD4[BHWD]_IMM$")>;
+
+// Contiguous Load four structures to four vectors, scalar + scalar
+def : InstRW<[V2Write_10cyc_4L_8V_4S], (instregex "^LD4[BHWD]$")>;
+
+// Gather load, vector + imm, 32-bit element size
+def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
+ "^GLD(FF)?1W_IMM_REAL$")>;
+
+// Gather load, vector + imm, 64-bit element size
+def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
+ "^GLD(FF)?1D_IMM_REAL$")>;
+
+// Gather load, 32-bit scaled offset
+def : InstRW<[V2Write_10cyc_1L_8V],
+ (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED_REAL$",
+ "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
+
+// Gather load, 64-bit scaled offset
+// NOTE: These instructions are not specified in the SOG.
+def : InstRW<[V2Write_10cyc_1L_4V],
+ (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED_REAL$",
+ "^GLD(FF)?1D_([SU]XTW_)?SCALED_REAL$")>;
+
+// Gather load, 32-bit unpacked unscaled offset
+def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
+ "^GLD(FF)?1W_[SU]XTW_REAL$")>;
+
+// Gather load, 64-bit unpacked unscaled offset
+// NOTE: These instructions are not specified in the SOG.
+def : InstRW<[V2Write_9cyc_1L_2V],
+ (instregex "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?REAL$",
+ "^GLD(FF)?1D_([SU]XTW_)?REAL$")>;
+
+// SVE Store instructions
+// -----------------------------------------------------------------------------
+
+// Store from predicate reg
+def : InstRW<[V2Write_1cyc_1L01], (instrs STR_PXI)>;
+
+// Store from vector reg
+def : InstRW<[V2Write_2cyc_1L01_1V01], (instrs STR_ZXI)>;
+
+// Contiguous store, scalar + imm
+def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BHWD]_IMM$",
+ "^ST1B_[HSD]_IMM$",
+ "^ST1H_[SD]_IMM$",
+ "^ST1W_D_IMM$")>;
+
+// Contiguous store, scalar + scalar
+def : InstRW<[V2Write_2cyc_1L01_1S_1V01], (instregex "^ST1H(_[SD])?$")>;
+def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BWD]$",
+ "^ST1B_[HSD]$",
+ "^ST1W_D$")>;
+
+// Contiguous store two structures from two vectors, scalar + imm
+def : InstRW<[V2Write_4cyc_1L01_1V01], (instregex "^ST2[BHWD]_IMM$")>;
+
+// Contiguous store two structures from two vectors, scalar + scalar
+def : InstRW<[V2Write_4cyc_2L01_2S_2V01], (instrs ST2H)>;
+def : InstRW<[V2Write_4cyc_2L01_2V01], (instregex "^ST2[BWD]$")>;
+
+// Contiguous store three structures from three vectors, scalar + imm
+def : InstRW<[V2Write_7cyc_9L01_9V01], (instregex "^ST3[BHWD]_IMM$")>;
+
+// Contiguous store three structures from three vectors, scalar + scalar
+def : InstRW<[V2Write_7cyc_9L01_9S_9V01], (instregex "^ST3[BHWD]$")>;
+
+// Contiguous store four structures from four vectors, scalar + imm
+def : InstRW<[V2Write_11cyc_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>;
+
+// Contiguous store four structures from four vectors, scalar + scalar
+def : InstRW<[V2Write_11cyc_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>;
+
+// Non temporal store, scalar + imm
+def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
+
+// Non temporal store, scalar + scalar
+def : InstRW<[V2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
+def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
+
+// Scatter non temporal store, vector + scalar 32-bit element size
+def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>;
+
+// Scatter non temporal store, vector + scalar 64-bit element size
+def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^STNT1[BHWD]_ZZR_D")>;
+
+// Scatter store vector + imm 32-bit element size
+def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_IMM$",
+ "^SST1W_IMM$")>;
+
+// Scatter store vector + imm 64-bit element size
+def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_IMM$",
+ "^SST1D_IMM$")>;
+
+// Scatter store, 32-bit scaled offset
+def : InstRW<[V2Write_4cyc_4L01_4V01],
+ (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
+
+// Scatter store, 32-bit unpacked unscaled offset
+def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_[SU]XTW$",
+ "^SST1D_[SU]XTW$")>;
+
+// Scatter store, 32-bit unpacked scaled offset
+def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
+ "^SST1D_[SU]XTW_SCALED$")>;
+
+// Scatter store, 32-bit unscaled offset
+def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_[SU]XTW$",
+ "^SST1W_[SU]XTW$")>;
+
+// Scatter store, 64-bit scaled offset
+def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_SCALED$",
+ "^SST1D_SCALED$")>;
+
+// Scatter store, 64-bit unscaled offset
+def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D$",
+ "^SST1D$")>;
+
+// SVE Miscellaneous instructions
+// -----------------------------------------------------------------------------
+
+// Read first fault register, unpredicated
+def : InstRW<[V2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
+
+// Read first fault register, predicated
+def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
+
+// Read first fault register and set flags
+def : InstRW<[V2Write_4or5cyc_2M0_2M], (instrs RDFFRS_PPz)>;
+
+// Set first fault register
+// Write to first fault register
+def : InstRW<[V2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
+
+// Prefetch
+// NOTE: This is not specified in the SOG.
+def : InstRW<[V2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
+
+// SVE Cryptographic instructions
+// -----------------------------------------------------------------------------
+
+// Crypto AES ops
+def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
+ "^AESI?MC_ZZ_B$")>;
+
+// Crypto SHA3 ops
+def : InstRW<[V2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
+ "^RAX1_ZZZ_D$",
+ "^XAR_ZZZI_[BHSD]$")>;
+
+// Crypto SM4 ops
+def : InstRW<[V2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
+
+}
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td b/llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td
deleted file mode 100644
index 8552c07bda56..000000000000
--- a/llvm/lib/Target/AArch64/AArch64SchedPredAmpere.td
+++ /dev/null
@@ -1,25 +0,0 @@
-//===- AArch64SchedPredAmpere.td - AArch64 Sched Preds -----*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines scheduling predicate definitions that are used by the
-// AArch64 Ampere Computing processors.
-//
-//===----------------------------------------------------------------------===//
-
-// Auxiliary predicates.
-
-// Check for a LSL shift <= 4
-def AmpereCheapLSL : MCSchedPredicate<
- CheckAny<[CheckShiftBy0,
- CheckAll<
- [CheckShiftLSL,
- CheckAny<
- [CheckShiftBy1,
- CheckShiftBy2,
- CheckShiftBy3,
- CheckShiftBy4]>]>]>>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td b/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
index ee7cc1f5095b..f68fc3675f89 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredExynos.td
@@ -125,23 +125,13 @@ def ExynosResetFn : TIIPredicate<
MCReturnStatement<TruePred>>,
MCOpcodeSwitchCase<
[ORRWri, ORRXri],
- MCReturnStatement<
- CheckAll<
- [CheckIsRegOperand<1>,
- CheckAny<
- [CheckRegOperand<1, WZR>,
- CheckRegOperand<1, XZR>]>]>>>],
+ MCReturnStatement<CheckIsReg1Zero>>],
MCReturnStatement<
CheckAny<
[IsCopyIdiomFn,
IsZeroFPIdiomFn]>>>>;
def ExynosResetPred : MCSchedPredicate<ExynosResetFn>;
-// Identify EXTR as the alias for ROR (immediate).
-def ExynosRotateRightImmPred : MCSchedPredicate<
- CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>,
- CheckSameRegOperand<1, 2>]>>;
-
// Identify cheap arithmetic and logic immediate instructions.
def ExynosCheapFn : TIIPredicate<
"isExynosCheapAsMove",
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
new file mode 100644
index 000000000000..a4c6cd4b978f
--- /dev/null
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td
@@ -0,0 +1,56 @@
+//===- AArch64SchedPredNeoverse.td - AArch64 Sched Preds -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines scheduling predicate definitions that are used by the
+// AArch64 Neoverse processors.
+//
+//===----------------------------------------------------------------------===//
+
+// Auxiliary predicates.
+
+// Check for LSL shift == 0
+def NeoverseNoLSL : MCSchedPredicate<
+ CheckAll<[CheckShiftLSL,
+ CheckShiftBy0]>>;
+
+// Identify LDR/STR H/Q-form scaled (and potentially extended) FP instructions
+def NeoverseHQForm : MCSchedPredicate<
+ CheckAll<[
+ CheckAny<[CheckHForm, CheckQForm]>,
+ CheckImmOperand<4, 1>]>>;
+
+// Check if <Pd> == <Pg>
+def NeoversePdIsPgFn : TIIPredicate<
+ "isNeoversePdSameAsPg",
+ MCOpcodeSwitchStatement<
+ [MCOpcodeSwitchCase<[BRKA_PPmP, BRKB_PPmP],
+ MCReturnStatement<CheckSameRegOperand<1, 2>>>],
+ MCReturnStatement<CheckSameRegOperand<0, 1>>>>;
+def NeoversePdIsPg : MCSchedPredicate<NeoversePdIsPgFn>;
+
+// Check if SVE INC/DEC (scalar), ALL, {1, 2, 4}
+def NeoverseCheapIncDec : MCSchedPredicate<
+ CheckAll<[CheckOpcode<[
+ INCB_XPiI, INCH_XPiI,
+ INCW_XPiI, INCD_XPiI,
+ DECB_XPiI, DECH_XPiI,
+ DECW_XPiI, DECD_XPiI]>,
+ CheckImmOperand<2, 31>,
+ CheckAny<[
+ CheckImmOperand<3, 1>,
+ CheckImmOperand<3, 2>,
+ CheckImmOperand<3, 4>]>]>>;
+
+// Identify "[SU]?(MADD|MSUB)L?" as the alias for "[SU]?(MUL|MNEG)L?".
+def NeoverseMULIdiomPred : MCSchedPredicate< // <op> Rd, Rs, Rv, ZR
+ CheckAll<[CheckOpcode<
+ [MADDWrrr, MADDXrrr,
+ MSUBWrrr, MSUBXrrr,
+ SMADDLrrr, UMADDLrrr,
+ SMSUBLrrr, UMSUBLrrr]>,
+ CheckIsReg3Zero]>>;
diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredicates.td b/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
index 4473f3a53845..854d3ce56483 100644
--- a/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
+++ b/llvm/lib/Target/AArch64/AArch64SchedPredicates.td
@@ -31,6 +31,12 @@ foreach I = {0-3} in {
def CheckExtBy#I : CheckImmOperand<3, I>;
}
+// Check for shifting in arithmetic and logic instructions.
+foreach I = {0-4, 8} in {
+ let FunctionMapper = "AArch64_AM::getShiftValue" in
+ def CheckShiftBy#I : CheckImmOperand<3, I>;
+}
+
// Check the extension type in the register offset addressing mode.
let FunctionMapper = "AArch64_AM::getMemExtendType" in {
def CheckMemExtUXTW : CheckImmOperand_s<3, "AArch64_AM::UXTW">;
@@ -52,19 +58,32 @@ let FunctionMapper = "AArch64_AM::getShiftType" in {
def CheckShiftMSL : CheckImmOperand_s<3, "AArch64_AM::MSL">;
}
-// Check for shifting in arithmetic and logic instructions.
-foreach I = {0-4, 8} in {
- let FunctionMapper = "AArch64_AM::getShiftValue" in
- def CheckShiftBy#I : CheckImmOperand<3, I>;
+// Generic predicates.
+
+// Check for ZR in a register operand.
+foreach I = {1-3} in {
+ def CheckIsReg#I#Zero : CheckAll<
+ [CheckIsRegOperand<I>,
+ CheckAny<
+ [CheckRegOperand<I, WZR>,
+ CheckRegOperand<I, XZR>]>]>;
}
+def IsReg1ZeroPred : MCSchedPredicate<CheckIsReg1Zero>;
+def IsReg2ZeroPred : MCSchedPredicate<CheckIsReg2Zero>;
+def IsReg3ZeroPred : MCSchedPredicate<CheckIsReg3Zero>;
-// Generic predicates.
// Identify whether an instruction is NEON or floating point
def CheckFpOrNEON : CheckFunctionPredicateWithTII<
"AArch64_MC::isFpOrNEON",
"AArch64InstrInfo::isFpOrNEON"
>;
+// Identify whether an instruction is the 16-bit NEON form based on its result.
+def CheckHForm : CheckFunctionPredicateWithTII<
+ "AArch64_MC::isHForm",
+ "AArch64InstrInfo::isHForm"
+>;
+
// Identify whether an instruction is the 128-bit NEON form based on its result.
def CheckQForm : CheckFunctionPredicateWithTII<
"AArch64_MC::isQForm",
@@ -212,38 +231,6 @@ def IsLoadStoreRegOffsetOp : CheckOpcode<!listconcat(IsLoadRegOffsetOp.ValidOpco
// Target predicates.
-// Identify an instruction that effectively transfers a register to another.
-def IsCopyIdiomFn : TIIPredicate<"isCopyIdiom",
- MCOpcodeSwitchStatement<
- [// MOV {Rd, SP}, {SP, Rn} =>
- // ADD {Rd, SP}, {SP, Rn}, #0
- MCOpcodeSwitchCase<
- [ADDWri, ADDXri],
- MCReturnStatement<
- CheckAll<
- [CheckIsRegOperand<0>,
- CheckIsRegOperand<1>,
- CheckAny<
- [CheckRegOperand<0, WSP>,
- CheckRegOperand<0, SP>,
- CheckRegOperand<1, WSP>,
- CheckRegOperand<1, SP>]>,
- CheckZeroOperand<2>]>>>,
- // MOV Rd, Rm =>
- // ORR Rd, ZR, Rm, LSL #0
- MCOpcodeSwitchCase<
- [ORRWrs, ORRXrs],
- MCReturnStatement<
- CheckAll<
- [CheckIsRegOperand<1>,
- CheckIsRegOperand<2>,
- CheckAny<
- [CheckRegOperand<1, WZR>,
- CheckRegOperand<1, XZR>]>,
- CheckShiftBy0]>>>],
- MCReturnStatement<FalsePred>>>;
-def IsCopyIdiomPred : MCSchedPredicate<IsCopyIdiomFn>;
-
// Identify arithmetic instructions with an extended register.
def RegExtendedFn : TIIPredicate<"hasExtendedReg",
MCOpcodeSwitchStatement<
@@ -276,6 +263,63 @@ def ScaledIdxFn : TIIPredicate<"isScaledAddr",
MCReturnStatement<FalsePred>>>;
def ScaledIdxPred : MCSchedPredicate<ScaledIdxFn>;
+// Special cases.
+
+// Check for LSL shift <= 4
+def IsCheapLSL : MCSchedPredicate<
+ CheckAll<
+ [CheckShiftLSL,
+ CheckAny<
+ [CheckShiftBy0,
+ CheckShiftBy1,
+ CheckShiftBy2,
+ CheckShiftBy3,
+ CheckShiftBy4]>]>>;
+
+// Idioms.
+
+// Identify an instruction that effectively transfers a register to another.
+def IsCopyIdiomFn : TIIPredicate<"isCopyIdiom",
+ MCOpcodeSwitchStatement<
+ [// MOV {Rd, SP}, {SP, Rn} =>
+ // ADD {Rd, SP}, {SP, Rn}, #0
+ MCOpcodeSwitchCase<
+ [ADDWri, ADDXri],
+ MCReturnStatement<
+ CheckAll<
+ [CheckIsRegOperand<0>,
+ CheckIsRegOperand<1>,
+ CheckAny<
+ [CheckRegOperand<0, WSP>,
+ CheckRegOperand<0, SP>,
+ CheckRegOperand<1, WSP>,
+ CheckRegOperand<1, SP>]>,
+ CheckZeroOperand<2>]>>>,
+ // MOV Rd, Rm =>
+ // ORR Rd, ZR, Rm, LSL #0
+ MCOpcodeSwitchCase<
+ [ORRWrs, ORRXrs],
+ MCReturnStatement<
+ CheckAll<
+ [CheckIsReg1Zero,
+ CheckIsRegOperand<2>,
+ CheckShiftBy0]>>>],
+ MCReturnStatement<FalsePred>>>;
+def IsCopyIdiomPred : MCSchedPredicate<IsCopyIdiomFn>;
+
+// Identify an instruction that effectively resets a GP register to zero.
+def IsZeroIdiomFn : TIIPredicate<"isZeroIdiom",
+ MCOpcodeSwitchStatement<
+ [// ORR Rd, ZR, #0
+ MCOpcodeSwitchCase<
+ [ORRWri, ORRXri],
+ MCReturnStatement<
+ CheckAll<
+ [CheckIsReg1Zero,
+ CheckZeroOperand<2>]>>>],
+ MCReturnStatement<FalsePred>>>;
+def IsZeroIdiomPred : MCSchedPredicate<IsZeroIdiomFn>;
+
// Identify an instruction that effectively resets a FP register to zero.
def IsZeroFPIdiomFn : TIIPredicate<"isZeroFPIdiom",
MCOpcodeSwitchStatement<
@@ -295,18 +339,7 @@ def IsZeroFPIdiomFn : TIIPredicate<"isZeroFPIdiom",
MCReturnStatement<FalsePred>>>;
def IsZeroFPIdiomPred : MCSchedPredicate<IsZeroFPIdiomFn>;
-// Identify an instruction that effectively resets a GP register to zero.
-def IsZeroIdiomFn : TIIPredicate<"isZeroIdiom",
- MCOpcodeSwitchStatement<
- [// ORR Rd, ZR, #0
- MCOpcodeSwitchCase<
- [ORRWri, ORRXri],
- MCReturnStatement<
- CheckAll<
- [CheckIsRegOperand<1>,
- CheckAny<
- [CheckRegOperand<1, WZR>,
- CheckRegOperand<1, XZR>]>,
- CheckZeroOperand<2>]>>>],
- MCReturnStatement<FalsePred>>>;
-def IsZeroIdiomPred : MCSchedPredicate<IsZeroIdiomFn>;
+// Identify EXTR as the alias for ROR (immediate).
+def IsRORImmIdiomPred : MCSchedPredicate< // EXTR Rd, Rs, Rs, #Imm
+ CheckAll<[CheckOpcode<[EXTRWrri, EXTRXrri]>,
+ CheckSameRegOperand<1, 2>]>>;
diff --git a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
index 85b32f9df0e1..753f69461308 100644
--- a/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
+++ b/llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp
@@ -286,18 +286,20 @@ bool AArch64SpeculationHardening::instrumentControlFlow(
bool TmpRegisterNotAvailableEverywhere = false;
RegScavenger RS;
- RS.enterBasicBlock(MBB);
+ RS.enterBasicBlockEnd(MBB);
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) {
- MachineInstr &MI = *I;
+ for (MachineBasicBlock::iterator I = MBB.end(); I != MBB.begin(); ) {
+ MachineInstr &MI = *--I;
if (!MI.isReturn() && !MI.isCall())
continue;
// The RegScavenger represents registers available *after* the MI
// instruction pointed to by RS.getCurrentPosition().
// We need to have a register that is available *before* the MI is executed.
- if (I != MBB.begin())
- RS.forward(std::prev(I));
+ if (I == MBB.begin())
+ RS.enterBasicBlock(MBB);
+ else
+ RS.backward(std::prev(I));
// FIXME: The below just finds *a* unused register. Maybe code could be
// optimized more if this looks for the register that isn't used for the
// longest time around this place, to enable more scheduling freedom. Not
diff --git a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
index 97afb66ed9ab..3ac86b3cde2e 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTagging.cpp
@@ -380,7 +380,7 @@ Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst,
// Check to see if this store is to a constant offset from the start ptr.
std::optional<int64_t> Offset =
- isPointerOffset(StartPtr, NextStore->getPointerOperand(), *DL);
+ NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, *DL);
if (!Offset)
break;
@@ -398,7 +398,7 @@ Instruction *AArch64StackTagging::collectInitializers(Instruction *StartInst,
// Check to see if this store is to a constant offset from the start ptr.
std::optional<int64_t> Offset =
- isPointerOffset(StartPtr, MSI->getDest(), *DL);
+ MSI->getDest()->getPointerOffsetFrom(StartPtr, *DL);
if (!Offset)
break;
diff --git a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
index 2cbac9783bbd..41cd405c891e 100644
--- a/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StackTaggingPreRA.cpp
@@ -276,8 +276,8 @@ std::optional<int> AArch64StackTaggingPreRA::findFirstSlotCandidate() {
Register UseReg = WorkList.pop_back_val();
for (auto &UseI : MRI->use_instructions(UseReg)) {
unsigned Opcode = UseI.getOpcode();
- if (Opcode == AArch64::STGOffset || Opcode == AArch64::ST2GOffset ||
- Opcode == AArch64::STZGOffset || Opcode == AArch64::STZ2GOffset ||
+ if (Opcode == AArch64::STGi || Opcode == AArch64::ST2Gi ||
+ Opcode == AArch64::STZGi || Opcode == AArch64::STZ2Gi ||
Opcode == AArch64::STGPi || Opcode == AArch64::STGloop ||
Opcode == AArch64::STZGloop || Opcode == AArch64::STGloop_wback ||
Opcode == AArch64::STZGloop_wback)
diff --git a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index 3ee0d5190ea3..93bd35b9c121 100644
--- a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -75,7 +75,7 @@ FunctionPass *llvm::createAArch64StorePairSuppressPass() {
/// oversaturate the vector units.
bool AArch64StorePairSuppress::shouldAddSTPToBlock(const MachineBasicBlock *BB) {
if (!MinInstr)
- MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount);
+ MinInstr = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount);
MachineTraceMetrics::Trace BBTrace = MinInstr->getTrace(BB);
unsigned ResLength = BBTrace.getResourceLength();
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 245ed812ae9e..450e27b8a2af 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -24,8 +24,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/IR/GlobalValue.h"
-#include "llvm/Support/AArch64TargetParser.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/TargetParser/AArch64TargetParser.h"
using namespace llvm;
@@ -65,9 +64,11 @@ ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
"Should only be used for testing register allocator."),
cl::CommaSeparated, cl::Hidden);
-static cl::opt<bool>
- ForceStreamingCompatibleSVE("force-streaming-compatible-sve",
- cl::init(false), cl::Hidden);
+static cl::opt<bool> ForceStreamingCompatibleSVE(
+ "force-streaming-compatible-sve",
+ cl::desc(
+ "Force the use of streaming-compatible SVE code for all functions"),
+ cl::Hidden);
unsigned AArch64Subtarget::getVectorInsertExtractBaseCost() const {
if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
@@ -104,24 +105,24 @@ void AArch64Subtarget::initializeProperties() {
case CortexA35:
case CortexA53:
case CortexA55:
- PrefFunctionLogAlignment = 4;
- PrefLoopLogAlignment = 4;
+ PrefFunctionAlignment = Align(16);
+ PrefLoopAlignment = Align(16);
MaxBytesForLoopAlignment = 8;
break;
case CortexA57:
MaxInterleaveFactor = 4;
- PrefFunctionLogAlignment = 4;
- PrefLoopLogAlignment = 4;
+ PrefFunctionAlignment = Align(16);
+ PrefLoopAlignment = Align(16);
MaxBytesForLoopAlignment = 8;
break;
case CortexA65:
- PrefFunctionLogAlignment = 3;
+ PrefFunctionAlignment = Align(8);
break;
case CortexA72:
case CortexA73:
case CortexA75:
- PrefFunctionLogAlignment = 4;
- PrefLoopLogAlignment = 4;
+ PrefFunctionAlignment = Align(16);
+ PrefLoopAlignment = Align(16);
MaxBytesForLoopAlignment = 8;
break;
case CortexA76:
@@ -131,29 +132,29 @@ void AArch64Subtarget::initializeProperties() {
case CortexR82:
case CortexX1:
case CortexX1C:
- PrefFunctionLogAlignment = 4;
- PrefLoopLogAlignment = 5;
+ PrefFunctionAlignment = Align(16);
+ PrefLoopAlignment = Align(32);
MaxBytesForLoopAlignment = 16;
break;
case CortexA510:
- PrefFunctionLogAlignment = 4;
+ PrefFunctionAlignment = Align(16);
VScaleForTuning = 1;
- PrefLoopLogAlignment = 4;
+ PrefLoopAlignment = Align(16);
MaxBytesForLoopAlignment = 8;
break;
case CortexA710:
case CortexA715:
case CortexX2:
case CortexX3:
- PrefFunctionLogAlignment = 4;
+ PrefFunctionAlignment = Align(16);
VScaleForTuning = 1;
- PrefLoopLogAlignment = 5;
+ PrefLoopAlignment = Align(32);
MaxBytesForLoopAlignment = 16;
break;
case A64FX:
CacheLineSize = 256;
- PrefFunctionLogAlignment = 3;
- PrefLoopLogAlignment = 2;
+ PrefFunctionAlignment = Align(8);
+ PrefLoopAlignment = Align(4);
MaxInterleaveFactor = 4;
PrefetchDistance = 128;
MinPrefetchStride = 1024;
@@ -185,8 +186,8 @@ void AArch64Subtarget::initializeProperties() {
case ExynosM3:
MaxInterleaveFactor = 4;
MaxJumpTableSize = 20;
- PrefFunctionLogAlignment = 5;
- PrefLoopLogAlignment = 4;
+ PrefFunctionAlignment = Align(32);
+ PrefLoopAlignment = Align(16);
break;
case Falkor:
MaxInterleaveFactor = 4;
@@ -208,28 +209,29 @@ void AArch64Subtarget::initializeProperties() {
MinVectorRegisterBitWidth = 128;
break;
case NeoverseE1:
- PrefFunctionLogAlignment = 3;
+ PrefFunctionAlignment = Align(8);
break;
case NeoverseN1:
- PrefFunctionLogAlignment = 4;
- PrefLoopLogAlignment = 5;
+ PrefFunctionAlignment = Align(16);
+ PrefLoopAlignment = Align(32);
MaxBytesForLoopAlignment = 16;
break;
case NeoverseN2:
case NeoverseV2:
- PrefFunctionLogAlignment = 4;
- PrefLoopLogAlignment = 5;
+ PrefFunctionAlignment = Align(16);
+ PrefLoopAlignment = Align(32);
MaxBytesForLoopAlignment = 16;
VScaleForTuning = 1;
break;
case NeoverseV1:
- PrefFunctionLogAlignment = 4;
- PrefLoopLogAlignment = 5;
+ PrefFunctionAlignment = Align(16);
+ PrefLoopAlignment = Align(32);
MaxBytesForLoopAlignment = 16;
VScaleForTuning = 2;
+ DefaultSVETFOpts = TailFoldingOpts::Simple;
break;
case Neoverse512TVB:
- PrefFunctionLogAlignment = 4;
+ PrefFunctionAlignment = Align(16);
VScaleForTuning = 1;
MaxInterleaveFactor = 4;
break;
@@ -240,8 +242,8 @@ void AArch64Subtarget::initializeProperties() {
break;
case ThunderX2T99:
CacheLineSize = 64;
- PrefFunctionLogAlignment = 3;
- PrefLoopLogAlignment = 2;
+ PrefFunctionAlignment = Align(8);
+ PrefLoopAlignment = Align(4);
MaxInterleaveFactor = 4;
PrefetchDistance = 128;
MinPrefetchStride = 1024;
@@ -254,20 +256,20 @@ void AArch64Subtarget::initializeProperties() {
case ThunderXT81:
case ThunderXT83:
CacheLineSize = 128;
- PrefFunctionLogAlignment = 3;
- PrefLoopLogAlignment = 2;
+ PrefFunctionAlignment = Align(8);
+ PrefLoopAlignment = Align(4);
// FIXME: remove this to enable 64-bit SLP if performance looks good.
MinVectorRegisterBitWidth = 128;
break;
case TSV110:
CacheLineSize = 64;
- PrefFunctionLogAlignment = 4;
- PrefLoopLogAlignment = 2;
+ PrefFunctionAlignment = Align(16);
+ PrefLoopAlignment = Align(4);
break;
case ThunderX3T110:
CacheLineSize = 64;
- PrefFunctionLogAlignment = 4;
- PrefLoopLogAlignment = 2;
+ PrefFunctionAlignment = Align(16);
+ PrefLoopAlignment = Align(4);
MaxInterleaveFactor = 4;
PrefetchDistance = 128;
MinPrefetchStride = 1024;
@@ -278,8 +280,8 @@ void AArch64Subtarget::initializeProperties() {
case Ampere1:
case Ampere1A:
CacheLineSize = 64;
- PrefFunctionLogAlignment = 6;
- PrefLoopLogAlignment = 6;
+ PrefFunctionAlignment = Align(64);
+ PrefLoopAlignment = Align(64);
MaxInterleaveFactor = 4;
break;
}
@@ -290,13 +292,15 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
const TargetMachine &TM, bool LittleEndian,
unsigned MinSVEVectorSizeInBitsOverride,
unsigned MaxSVEVectorSizeInBitsOverride,
- bool StreamingSVEModeDisabled)
+ bool StreamingSVEMode,
+ bool StreamingCompatibleSVEMode)
: AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
IsLittle(LittleEndian),
- StreamingSVEModeDisabled(StreamingSVEModeDisabled),
+ StreamingSVEMode(StreamingSVEMode),
+ StreamingCompatibleSVEMode(StreamingCompatibleSVEMode),
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
@@ -363,6 +367,13 @@ AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
return AArch64II::MO_GOT;
+ // All globals dynamically protected by MTE must have their address tags
+ // synthesized. This is done by having the loader stash the tag in the GOT
+ // entry. Force all tagged globals (even ones with internal linkage) through
+ // the GOT.
+ if (GV->isTagged())
+ return AArch64II::MO_GOT;
+
if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
if (GV->hasDLLImportStorageClass()) {
if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy())
@@ -466,10 +477,14 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
bool AArch64Subtarget::useAA() const { return UseAA; }
-bool AArch64Subtarget::forceStreamingCompatibleSVE() const {
- if (ForceStreamingCompatibleSVE) {
- assert(hasSVEorSME() && "Expected SVE to be available");
- return hasSVEorSME();
- }
- return false;
+bool AArch64Subtarget::isNeonAvailable() const {
+ if (!hasNEON())
+ return false;
+
+ // The 'force-streaming-comaptible-sve' flag overrides the streaming
+ // function attributes.
+ if (ForceStreamingCompatibleSVE.getNumOccurrences() > 0)
+ return !ForceStreamingCompatibleSVE;
+
+ return !isStreaming() && !isStreamingCompatible();
}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 70f5b023c501..9ab86684856e 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -108,8 +108,8 @@ protected:
uint16_t PrefetchDistance = 0;
uint16_t MinPrefetchStride = 1;
unsigned MaxPrefetchIterationsAhead = UINT_MAX;
- unsigned PrefFunctionLogAlignment = 0;
- unsigned PrefLoopLogAlignment = 0;
+ Align PrefFunctionAlignment;
+ Align PrefLoopAlignment;
unsigned MaxBytesForLoopAlignment = 0;
unsigned MaxJumpTableSize = 0;
@@ -124,10 +124,12 @@ protected:
bool IsLittle;
- bool StreamingSVEModeDisabled;
+ bool StreamingSVEMode;
+ bool StreamingCompatibleSVEMode;
unsigned MinSVEVectorSizeInBits;
unsigned MaxSVEVectorSizeInBits;
unsigned VScaleForTuning = 2;
+ TailFoldingOpts DefaultSVETFOpts = TailFoldingOpts::Disabled;
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
@@ -162,7 +164,8 @@ public:
StringRef FS, const TargetMachine &TM, bool LittleEndian,
unsigned MinSVEVectorSizeInBitsOverride = 0,
unsigned MaxSVEVectorSizeInBitsOverride = 0,
- bool StreamingSVEModeDisabled = true);
+ bool StreamingSVEMode = false,
+ bool StreamingCompatibleSVEMode = false);
// Getters for SubtargetFeatures defined in tablegen
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
@@ -201,9 +204,20 @@ public:
bool isXRaySupported() const override { return true; }
+ /// Returns true if the function has the streaming attribute.
+ bool isStreaming() const { return StreamingSVEMode; }
+
+ /// Returns true if the function has the streaming-compatible attribute.
+ bool isStreamingCompatible() const { return StreamingCompatibleSVEMode; }
+
+ /// Returns true if the target has NEON and the function at runtime is known
+ /// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
+ /// mode, which disables NEON instructions).
+ bool isNeonAvailable() const;
+
unsigned getMinVectorRegisterBitWidth() const {
// Don't assume any minimum vector size when PSTATE.SM may not be 0.
- if (!isStreamingSVEModeDisabled())
+ if (StreamingSVEMode || StreamingCompatibleSVEMode)
return 0;
return MinVectorRegisterBitWidth;
}
@@ -241,10 +255,10 @@ public:
unsigned getMaxPrefetchIterationsAhead() const override {
return MaxPrefetchIterationsAhead;
}
- unsigned getPrefFunctionLogAlignment() const {
- return PrefFunctionLogAlignment;
+ Align getPrefFunctionAlignment() const {
+ return PrefFunctionAlignment;
}
- unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; }
+ Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
unsigned getMaxBytesForLoopAlignment() const {
return MaxBytesForLoopAlignment;
@@ -379,17 +393,26 @@ public:
}
bool useSVEForFixedLengthVectors() const {
- if (forceStreamingCompatibleSVE())
+ if (!isNeonAvailable())
return true;
// Prefer NEON unless larger SVE registers are available.
return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
}
- bool forceStreamingCompatibleSVE() const;
+ bool useSVEForFixedLengthVectors(EVT VT) const {
+ if (!useSVEForFixedLengthVectors() || !VT.isFixedLengthVector())
+ return false;
+ return VT.getFixedSizeInBits() > AArch64::SVEBitsPerBlock ||
+ !isNeonAvailable();
+ }
unsigned getVScaleForTuning() const { return VScaleForTuning; }
+ TailFoldingOpts getSVETailFoldingDefaultOpts() const {
+ return DefaultSVETFOpts;
+ }
+
const char* getChkStkName() const {
if (isWindowsArm64EC())
return "__chkstk_arm64ec";
@@ -401,8 +424,6 @@ public:
return "__security_check_cookie_arm64ec";
return "__security_check_cookie";
}
-
- bool isStreamingSVEModeDisabled() const { return StreamingSVEModeDisabled; }
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index c7f404463c58..3e7d4d81b242 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -73,6 +73,11 @@ def : AT<"S1E1RP", 0b000, 0b0111, 0b1001, 0b000>;
def : AT<"S1E1WP", 0b000, 0b0111, 0b1001, 0b001>;
}
+// v8.9a/v9.4a FEAT_ATS1A
+def : AT<"S1E1A", 0b000, 0b0111, 0b1001, 0b010>;
+def : AT<"S1E2A", 0b100, 0b0111, 0b1001, 0b010>;
+def : AT<"S1E3A", 0b110, 0b0111, 0b1001, 0b010>;
+
//===----------------------------------------------------------------------===//
// DMB/DSB (data barrier) instruction options.
//===----------------------------------------------------------------------===//
@@ -1761,6 +1766,19 @@ let Requires = [{ {AArch64::FeatureNMI} }] in {
def : ROSysReg<"ICC_NMIAR1_EL1", 0b11, 0b000, 0b1100, 0b1001, 0b101>; // FEAT_GICv3_NMI
}
+// v9.4a Guarded Control Stack Extension (GCS)
+// Op0 Op1 CRn CRm Op2
+def : RWSysReg<"GCSCR_EL1", 0b11, 0b000, 0b0010, 0b0101, 0b000>;
+def : RWSysReg<"GCSPR_EL1", 0b11, 0b000, 0b0010, 0b0101, 0b001>;
+def : RWSysReg<"GCSCRE0_EL1", 0b11, 0b000, 0b0010, 0b0101, 0b010>;
+def : RWSysReg<"GCSPR_EL0", 0b11, 0b011, 0b0010, 0b0101, 0b001>;
+def : RWSysReg<"GCSCR_EL2", 0b11, 0b100, 0b0010, 0b0101, 0b000>;
+def : RWSysReg<"GCSPR_EL2", 0b11, 0b100, 0b0010, 0b0101, 0b001>;
+def : RWSysReg<"GCSCR_EL12", 0b11, 0b101, 0b0010, 0b0101, 0b000>;
+def : RWSysReg<"GCSPR_EL12", 0b11, 0b101, 0b0010, 0b0101, 0b001>;
+def : RWSysReg<"GCSCR_EL3", 0b11, 0b110, 0b0010, 0b0101, 0b000>;
+def : RWSysReg<"GCSPR_EL3", 0b11, 0b110, 0b0010, 0b0101, 0b001>;
+
// v8.9a/v9.4a Memory Attribute Index Enhancement (FEAT_AIE)
// Op0 Op1 CRn CRm Op2
def : RWSysReg<"AMAIR2_EL1", 0b11, 0b000, 0b1010, 0b0011, 0b001>;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index eafd311c808e..559879139758 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -20,7 +20,6 @@
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "TargetInfo/AArch64TargetInfo.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/CFIFixup.h"
#include "llvm/CodeGen/CSEConfigBase.h"
@@ -47,6 +46,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/CFGuard.h"
#include "llvm/Transforms/Scalar.h"
#include <memory>
@@ -215,7 +215,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64ConditionOptimizerPass(*PR);
initializeAArch64DeadRegisterDefinitionsPass(*PR);
initializeAArch64ExpandPseudoPass(*PR);
- initializeAArch64KCFIPass(*PR);
initializeAArch64LoadStoreOptPass(*PR);
initializeAArch64MIPeepholeOptPass(*PR);
initializeAArch64SIMDInstrOptPass(*PR);
@@ -230,6 +229,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeFalkorHWPFFixPass(*PR);
initializeFalkorMarkStridedAccessesLegacyPass(*PR);
initializeLDTLSCleanupPass(*PR);
+ initializeKCFIPass(*PR);
initializeSMEABIPass(*PR);
initializeSVEIntrinsicOptsPass(*PR);
initializeAArch64SpeculationHardeningPass(*PR);
@@ -238,6 +238,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() {
initializeAArch64StackTaggingPreRAPass(*PR);
initializeAArch64LowerHomogeneousPrologEpilogPass(*PR);
initializeAArch64DAGToDAGISelPass(*PR);
+ initializeAArch64GlobalsTaggingPass(*PR);
}
//===----------------------------------------------------------------------===//
@@ -390,10 +391,10 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
StringRef TuneCPU = TuneAttr.isValid() ? TuneAttr.getValueAsString() : CPU;
StringRef FS = FSAttr.isValid() ? FSAttr.getValueAsString() : TargetFS;
- bool StreamingSVEModeDisabled =
- !F.hasFnAttribute("aarch64_pstate_sm_enabled") &&
- !F.hasFnAttribute("aarch64_pstate_sm_compatible") &&
- !F.hasFnAttribute("aarch64_pstate_sm_body");
+ bool StreamingSVEMode = F.hasFnAttribute("aarch64_pstate_sm_enabled") ||
+ F.hasFnAttribute("aarch64_pstate_sm_body");
+ bool StreamingCompatibleSVEMode =
+ F.hasFnAttribute("aarch64_pstate_sm_compatible");
unsigned MinSVEVectorSize = 0;
unsigned MaxSVEVectorSize = 0;
@@ -426,8 +427,11 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
SmallString<512> Key;
raw_svector_ostream(Key) << "SVEMin" << MinSVEVectorSize << "SVEMax"
- << MaxSVEVectorSize << "StreamingSVEModeDisabled="
- << StreamingSVEModeDisabled << CPU << TuneCPU << FS;
+ << MaxSVEVectorSize
+ << "StreamingSVEMode=" << StreamingSVEMode
+ << "StreamingCompatibleSVEMode="
+ << StreamingCompatibleSVEMode << CPU << TuneCPU
+ << FS;
auto &I = SubtargetMap[Key];
if (!I) {
@@ -437,8 +441,14 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
resetTargetOptions(F);
I = std::make_unique<AArch64Subtarget>(
TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
- MaxSVEVectorSize, StreamingSVEModeDisabled);
+ MaxSVEVectorSize, StreamingSVEMode, StreamingCompatibleSVEMode);
}
+
+ assert((!StreamingSVEMode || I->hasSME()) &&
+ "Expected SME to be available");
+ assert((!StreamingCompatibleSVEMode || I->hasSVEorSME()) &&
+ "Expected SVE or SME to be available");
+
return I.get();
}
@@ -509,7 +519,6 @@ public:
bool addLegalizeMachineIR() override;
void addPreRegBankSelect() override;
bool addRegBankSelect() override;
- void addPreGlobalInstructionSelect() override;
bool addGlobalInstructionSelect() override;
void addMachineSSAOptimization() override;
bool addILPOpts() override;
@@ -517,6 +526,7 @@ public:
void addPostRegAlloc() override;
void addPreSched2() override;
void addPreEmitPass() override;
+ void addPostBBSections() override;
void addPreEmitPass2() override;
std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
@@ -587,6 +597,7 @@ void AArch64PassConfig::addIRPasses() {
if (getOptLevel() == CodeGenOpt::Aggressive && EnableSelectOpt)
addPass(createSelectOptimizePass());
+ addPass(createAArch64GlobalsTaggingPass());
addPass(createAArch64StackTaggingPass(
/*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None));
@@ -670,10 +681,12 @@ bool AArch64PassConfig::addIRTranslator() {
}
void AArch64PassConfig::addPreLegalizeMachineIR() {
- if (getOptLevel() == CodeGenOpt::None)
+ if (getOptLevel() == CodeGenOpt::None) {
addPass(createAArch64O0PreLegalizerCombiner());
- else {
+ addPass(new Localizer());
+ } else {
addPass(createAArch64PreLegalizerCombiner());
+ addPass(new Localizer());
if (EnableGISelLoadStoreOptPreLegal)
addPass(new LoadStoreOpt());
}
@@ -699,10 +712,6 @@ bool AArch64PassConfig::addRegBankSelect() {
return false;
}
-void AArch64PassConfig::addPreGlobalInstructionSelect() {
- addPass(new Localizer());
-}
-
bool AArch64PassConfig::addGlobalInstructionSelect() {
addPass(new InstructionSelect(getOptLevel()));
if (getOptLevel() != CodeGenOpt::None)
@@ -773,7 +782,7 @@ void AArch64PassConfig::addPreSched2() {
addPass(createAArch64LoadStoreOptimizationPass());
}
// Emit KCFI checks for indirect calls.
- addPass(createAArch64KCFIPass());
+ addPass(createKCFIPass());
// The AArch64SpeculationHardeningPass destroys dominator tree and natural
// loop info, which is needed for the FalkorHWPFFixPass and also later on.
@@ -807,11 +816,6 @@ void AArch64PassConfig::addPreEmitPass() {
if (EnableBranchTargets)
addPass(createAArch64BranchTargetsPass());
- // Relax conditional branch instructions if they're otherwise out of
- // range of their destination.
- if (BranchRelaxation)
- addPass(&BranchRelaxationPassID);
-
if (TM->getTargetTriple().isOSWindows()) {
// Identify valid longjmp targets for Windows Control Flow Guard.
addPass(createCFGuardLongjmpPass());
@@ -819,14 +823,21 @@ void AArch64PassConfig::addPreEmitPass() {
addPass(createEHContGuardCatchretPass());
}
- if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
- addPass(createAArch64CompressJumpTablesPass());
-
if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
TM->getTargetTriple().isOSBinFormatMachO())
addPass(createAArch64CollectLOHPass());
}
+void AArch64PassConfig::addPostBBSections() {
+ // Relax conditional branch instructions if they're otherwise out of
+ // range of their destination.
+ if (BranchRelaxation)
+ addPass(&BranchRelaxationPassID);
+
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables)
+ addPass(createAArch64CompressJumpTablesPass());
+}
+
void AArch64PassConfig::addPreEmitPass2() {
// SVE bundles move prefixes with destructive operations. BLR_RVMARKER pseudo
// instructions are lowered to bundles as well.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index c6e9e0550117..353e96856b8f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -39,75 +39,144 @@ static cl::opt<unsigned> SVEGatherOverhead("sve-gather-overhead", cl::init(10),
static cl::opt<unsigned> SVEScatterOverhead("sve-scatter-overhead",
cl::init(10), cl::Hidden);
+static cl::opt<unsigned> SVETailFoldInsnThreshold("sve-tail-folding-insn-threshold",
+ cl::init(15), cl::Hidden);
+
+static cl::opt<unsigned>
+ NeonNonConstStrideOverhead("neon-nonconst-stride-overhead", cl::init(10),
+ cl::Hidden);
+
namespace {
-class TailFoldingKind {
-private:
- uint8_t Bits = 0; // Currently defaults to disabled.
+class TailFoldingOption {
+ // These bitfields will only ever be set to something non-zero in operator=,
+ // when setting the -sve-tail-folding option. This option should always be of
+ // the form (default|simple|all|disable)[+(Flag1|Flag2|etc)], where here
+ // InitialBits is one of (disabled|all|simple). EnableBits represents
+ // additional flags we're enabling, and DisableBits for those flags we're
+ // disabling. The default flag is tracked in the variable NeedsDefault, since
+ // at the time of setting the option we may not know what the default value
+ // for the CPU is.
+ TailFoldingOpts InitialBits = TailFoldingOpts::Disabled;
+ TailFoldingOpts EnableBits = TailFoldingOpts::Disabled;
+ TailFoldingOpts DisableBits = TailFoldingOpts::Disabled;
+
+ // This value needs to be initialised to true in case the user does not
+ // explicitly set the -sve-tail-folding option.
+ bool NeedsDefault = true;
+
+ void setInitialBits(TailFoldingOpts Bits) { InitialBits = Bits; }
+
+ void setNeedsDefault(bool V) { NeedsDefault = V; }
+
+ void setEnableBit(TailFoldingOpts Bit) {
+ EnableBits |= Bit;
+ DisableBits &= ~Bit;
+ }
+
+ void setDisableBit(TailFoldingOpts Bit) {
+ EnableBits &= ~Bit;
+ DisableBits |= Bit;
+ }
+
+ TailFoldingOpts getBits(TailFoldingOpts DefaultBits) const {
+ TailFoldingOpts Bits = TailFoldingOpts::Disabled;
+
+ assert((InitialBits == TailFoldingOpts::Disabled || !NeedsDefault) &&
+ "Initial bits should only include one of "
+ "(disabled|all|simple|default)");
+ Bits = NeedsDefault ? DefaultBits : InitialBits;
+ Bits |= EnableBits;
+ Bits &= ~DisableBits;
+
+ return Bits;
+ }
+
+ void reportError(std::string Opt) {
+ errs() << "invalid argument '" << Opt
+ << "' to -sve-tail-folding=; the option should be of the form\n"
+ " (disabled|all|default|simple)[+(reductions|recurrences"
+ "|reverse|noreductions|norecurrences|noreverse)]\n";
+ report_fatal_error("Unrecognised tail-folding option");
+ }
public:
- enum TailFoldingOpts {
- TFDisabled = 0x0,
- TFReductions = 0x01,
- TFRecurrences = 0x02,
- TFSimple = 0x80,
- TFAll = TFReductions | TFRecurrences | TFSimple
- };
void operator=(const std::string &Val) {
- if (Val.empty())
+ // If the user explicitly sets -sve-tail-folding= then treat as an error.
+ if (Val.empty()) {
+ reportError("");
return;
- SmallVector<StringRef, 6> TailFoldTypes;
+ }
+
+ // Since the user is explicitly setting the option we don't automatically
+ // need the default unless they require it.
+ setNeedsDefault(false);
+
+ SmallVector<StringRef, 4> TailFoldTypes;
StringRef(Val).split(TailFoldTypes, '+', -1, false);
- for (auto TailFoldType : TailFoldTypes) {
- if (TailFoldType == "disabled")
- Bits = 0;
- else if (TailFoldType == "all")
- Bits = TFAll;
- else if (TailFoldType == "default")
- Bits = 0; // Currently defaults to never tail-folding.
- else if (TailFoldType == "simple")
- add(TFSimple);
- else if (TailFoldType == "reductions")
- add(TFReductions);
- else if (TailFoldType == "recurrences")
- add(TFRecurrences);
- else if (TailFoldType == "noreductions")
- remove(TFReductions);
- else if (TailFoldType == "norecurrences")
- remove(TFRecurrences);
- else {
- errs()
- << "invalid argument " << TailFoldType.str()
- << " to -sve-tail-folding=; each element must be one of: disabled, "
- "all, default, simple, reductions, noreductions, recurrences, "
- "norecurrences\n";
- }
+
+ unsigned StartIdx = 1;
+ if (TailFoldTypes[0] == "disabled")
+ setInitialBits(TailFoldingOpts::Disabled);
+ else if (TailFoldTypes[0] == "all")
+ setInitialBits(TailFoldingOpts::All);
+ else if (TailFoldTypes[0] == "default")
+ setNeedsDefault(true);
+ else if (TailFoldTypes[0] == "simple")
+ setInitialBits(TailFoldingOpts::Simple);
+ else {
+ StartIdx = 0;
+ setInitialBits(TailFoldingOpts::Disabled);
}
- }
- operator uint8_t() const { return Bits; }
+ for (unsigned I = StartIdx; I < TailFoldTypes.size(); I++) {
+ if (TailFoldTypes[I] == "reductions")
+ setEnableBit(TailFoldingOpts::Reductions);
+ else if (TailFoldTypes[I] == "recurrences")
+ setEnableBit(TailFoldingOpts::Recurrences);
+ else if (TailFoldTypes[I] == "reverse")
+ setEnableBit(TailFoldingOpts::Reverse);
+ else if (TailFoldTypes[I] == "noreductions")
+ setDisableBit(TailFoldingOpts::Reductions);
+ else if (TailFoldTypes[I] == "norecurrences")
+ setDisableBit(TailFoldingOpts::Recurrences);
+ else if (TailFoldTypes[I] == "noreverse")
+ setDisableBit(TailFoldingOpts::Reverse);
+ else
+ reportError(Val);
+ }
+ }
- void add(uint8_t Flag) { Bits |= Flag; }
- void remove(uint8_t Flag) { Bits &= ~Flag; }
+ bool satisfies(TailFoldingOpts DefaultBits, TailFoldingOpts Required) const {
+ return (getBits(DefaultBits) & Required) == Required;
+ }
};
} // namespace
-TailFoldingKind TailFoldingKindLoc;
+TailFoldingOption TailFoldingOptionLoc;
-cl::opt<TailFoldingKind, true, cl::parser<std::string>> SVETailFolding(
+cl::opt<TailFoldingOption, true, cl::parser<std::string>> SVETailFolding(
"sve-tail-folding",
cl::desc(
- "Control the use of vectorisation using tail-folding for SVE:"
- "\ndisabled No loop types will vectorize using tail-folding"
- "\ndefault Uses the default tail-folding settings for the target "
- "CPU"
- "\nall All legal loop types will vectorize using tail-folding"
- "\nsimple Use tail-folding for simple loops (not reductions or "
- "recurrences)"
- "\nreductions Use tail-folding for loops containing reductions"
- "\nrecurrences Use tail-folding for loops containing fixed order "
- "recurrences"),
- cl::location(TailFoldingKindLoc));
+ "Control the use of vectorisation using tail-folding for SVE where the"
+ " option is specified in the form (Initial)[+(Flag1|Flag2|...)]:"
+ "\ndisabled (Initial) No loop types will vectorize using "
+ "tail-folding"
+ "\ndefault (Initial) Uses the default tail-folding settings for "
+ "the target CPU"
+ "\nall (Initial) All legal loop types will vectorize using "
+ "tail-folding"
+ "\nsimple (Initial) Use tail-folding for simple loops (not "
+ "reductions or recurrences)"
+ "\nreductions Use tail-folding for loops containing reductions"
+ "\nnoreductions Inverse of above"
+ "\nrecurrences Use tail-folding for loops containing fixed order "
+ "recurrences"
+ "\nnorecurrences Inverse of above"
+ "\nreverse Use tail-folding for loops requiring reversed "
+ "predicates"
+ "\nnoreverse Inverse of above"),
+ cl::location(TailFoldingOptionLoc));
// Experimental option that will only be fully functional when the
// code-generator is changed to use SVE instead of NEON for all fixed-width
@@ -146,7 +215,8 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
bool AArch64TTIImpl::shouldMaximizeVectorBandwidth(
TargetTransformInfo::RegisterKind K) const {
assert(K != TargetTransformInfo::RGK_Scalar);
- return K == TargetTransformInfo::RGK_FixedWidthVector;
+ return (K == TargetTransformInfo::RGK_FixedWidthVector &&
+ ST->isNeonAvailable());
}
/// Calculate the cost of materializing a 64-bit value. This helper
@@ -331,7 +401,9 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
case Intrinsic::smin:
case Intrinsic::smax: {
static const auto ValidMinMaxTys = {MVT::v8i8, MVT::v16i8, MVT::v4i16,
- MVT::v8i16, MVT::v2i32, MVT::v4i32};
+ MVT::v8i16, MVT::v2i32, MVT::v4i32,
+ MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32,
+ MVT::nxv2i64};
auto LT = getTypeLegalizationCost(RetTy);
// v2i64 types get converted to cmp+bif hence the cost of 2
if (LT.second == MVT::v2i64)
@@ -365,6 +437,15 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return LT.first;
break;
}
+ case Intrinsic::bswap: {
+ static const auto ValidAbsTys = {MVT::v4i16, MVT::v8i16, MVT::v2i32,
+ MVT::v4i32, MVT::v2i64};
+ auto LT = getTypeLegalizationCost(RetTy);
+ if (any_of(ValidAbsTys, [&LT](MVT M) { return M == LT.second; }) &&
+ LT.second.getScalarSizeInBits() == RetTy->getScalarSizeInBits())
+ return LT.first;
+ break;
+ }
case Intrinsic::experimental_stepvector: {
InstructionCost Cost = 1; // Cost of the `index' instruction
auto LT = getTypeLegalizationCost(RetTy);
@@ -516,6 +597,52 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
}
break;
}
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ if (ICA.getArgs().empty())
+ break;
+
+ // TODO: Add handling for fshl where third argument is not a constant.
+ const TTI::OperandValueInfo OpInfoZ = TTI::getOperandInfo(ICA.getArgs()[2]);
+ if (!OpInfoZ.isConstant())
+ break;
+
+ const auto LegalisationCost = getTypeLegalizationCost(RetTy);
+ if (OpInfoZ.isUniform()) {
+ // FIXME: The costs could be lower if the codegen is better.
+ static const CostTblEntry FshlTbl[] = {
+ {Intrinsic::fshl, MVT::v4i32, 3}, // ushr + shl + orr
+ {Intrinsic::fshl, MVT::v2i64, 3}, {Intrinsic::fshl, MVT::v16i8, 4},
+ {Intrinsic::fshl, MVT::v8i16, 4}, {Intrinsic::fshl, MVT::v2i32, 3},
+ {Intrinsic::fshl, MVT::v8i8, 4}, {Intrinsic::fshl, MVT::v4i16, 4}};
+ // Costs for both fshl & fshr are the same, so just pass Intrinsic::fshl
+ // to avoid having to duplicate the costs.
+ const auto *Entry =
+ CostTableLookup(FshlTbl, Intrinsic::fshl, LegalisationCost.second);
+ if (Entry)
+ return LegalisationCost.first * Entry->Cost;
+ }
+
+ auto TyL = getTypeLegalizationCost(RetTy);
+ if (!RetTy->isIntegerTy())
+ break;
+
+ // Estimate cost manually, as types like i8 and i16 will get promoted to
+ // i32 and CostTableLookup will ignore the extra conversion cost.
+ bool HigherCost = (RetTy->getScalarSizeInBits() != 32 &&
+ RetTy->getScalarSizeInBits() < 64) ||
+ (RetTy->getScalarSizeInBits() % 64 != 0);
+ unsigned ExtraCost = HigherCost ? 1 : 0;
+ if (RetTy->getScalarSizeInBits() == 32 ||
+ RetTy->getScalarSizeInBits() == 64)
+ ExtraCost = 0; // fhsl/fshr for i32 and i64 can be lowered to a single
+ // extr instruction.
+ else if (HigherCost)
+ ExtraCost = 1;
+ else
+ break;
+ return TyL.first + ExtraCost;
+ }
default:
break;
}
@@ -546,10 +673,8 @@ static std::optional<Instruction *> processPhiNode(InstCombiner &IC,
}
// Create the new Phi
- LLVMContext &Ctx = PN->getContext();
- IRBuilder<> Builder(Ctx);
- Builder.SetInsertPoint(PN);
- PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues());
+ IC.Builder.SetInsertPoint(PN);
+ PHINode *NPN = IC.Builder.CreatePHI(RequiredType, PN->getNumIncomingValues());
Worklist.push_back(PN);
for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) {
@@ -605,21 +730,18 @@ tryCombineFromSVBoolBinOp(InstCombiner &IC, IntrinsicInst &II) {
if (PredOpTy != II.getType())
return std::nullopt;
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
-
SmallVector<Value *> NarrowedBinOpArgs = {PredOp};
- auto NarrowBinOpOp1 = Builder.CreateIntrinsic(
+ auto NarrowBinOpOp1 = IC.Builder.CreateIntrinsic(
Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp1});
NarrowedBinOpArgs.push_back(NarrowBinOpOp1);
if (BinOpOp1 == BinOpOp2)
NarrowedBinOpArgs.push_back(NarrowBinOpOp1);
else
- NarrowedBinOpArgs.push_back(Builder.CreateIntrinsic(
+ NarrowedBinOpArgs.push_back(IC.Builder.CreateIntrinsic(
Intrinsic::aarch64_sve_convert_from_svbool, {PredOpTy}, {BinOpOp2}));
auto NarrowedBinOp =
- Builder.CreateIntrinsic(IntrinsicID, {PredOpTy}, NarrowedBinOpArgs);
+ IC.Builder.CreateIntrinsic(IntrinsicID, {PredOpTy}, NarrowedBinOpArgs);
return IC.replaceInstUsesWith(II, NarrowedBinOp);
}
@@ -632,6 +754,11 @@ instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II) {
if (auto BinOpCombine = tryCombineFromSVBoolBinOp(IC, II))
return BinOpCombine;
+ // Ignore converts to/from svcount_t.
+ if (isa<TargetExtType>(II.getArgOperand(0)->getType()) ||
+ isa<TargetExtType>(II.getType()))
+ return std::nullopt;
+
SmallVector<Instruction *, 32> CandidatesForRemoval;
Value *Cursor = II.getOperand(0), *EarliestReplacement = nullptr;
@@ -673,9 +800,8 @@ instCombineConvertFromSVBool(InstCombiner &IC, IntrinsicInst &II) {
static std::optional<Instruction *> instCombineSVESel(InstCombiner &IC,
IntrinsicInst &II) {
- IRBuilder<> Builder(&II);
- auto Select = Builder.CreateSelect(II.getOperand(0), II.getOperand(1),
- II.getOperand(2));
+ auto Select = IC.Builder.CreateSelect(II.getOperand(0), II.getOperand(1),
+ II.getOperand(2));
return IC.replaceInstUsesWith(II, Select);
}
@@ -706,11 +832,9 @@ static std::optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
static std::optional<Instruction *> instCombineSVEDupX(InstCombiner &IC,
IntrinsicInst &II) {
// Replace DupX with a regular IR splat.
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
auto *RetTy = cast<ScalableVectorType>(II.getType());
- Value *Splat =
- Builder.CreateVectorSplat(RetTy->getElementCount(), II.getArgOperand(0));
+ Value *Splat = IC.Builder.CreateVectorSplat(RetTy->getElementCount(),
+ II.getArgOperand(0));
Splat->takeName(&II);
return IC.replaceInstUsesWith(II, Splat);
}
@@ -718,8 +842,6 @@ static std::optional<Instruction *> instCombineSVEDupX(InstCombiner &IC,
static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
IntrinsicInst &II) {
LLVMContext &Ctx = II.getContext();
- IRBuilder<> Builder(Ctx);
- Builder.SetInsertPoint(&II);
// Check that the predicate is all active
auto *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(0));
@@ -804,13 +926,13 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
auto *PTruePat =
ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
- auto *PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue,
- {PredType}, {PTruePat});
- auto *ConvertToSVBool = Builder.CreateIntrinsic(
+ auto *PTrue = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue,
+ {PredType}, {PTruePat});
+ auto *ConvertToSVBool = IC.Builder.CreateIntrinsic(
Intrinsic::aarch64_sve_convert_to_svbool, {PredType}, {PTrue});
auto *ConvertFromSVBool =
- Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
- {II.getType()}, {ConvertToSVBool});
+ IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_from_svbool,
+ {II.getType()}, {ConvertToSVBool});
ConvertFromSVBool->takeName(&II);
return IC.replaceInstUsesWith(II, ConvertFromSVBool);
@@ -818,8 +940,6 @@ static std::optional<Instruction *> instCombineSVECmpNE(InstCombiner &IC,
static std::optional<Instruction *> instCombineSVELast(InstCombiner &IC,
IntrinsicInst &II) {
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
Value *Pg = II.getArgOperand(0);
Value *Vec = II.getArgOperand(1);
auto IntrinsicID = II.getIntrinsicID();
@@ -837,9 +957,9 @@ static std::optional<Instruction *> instCombineSVELast(InstCombiner &IC,
auto *OldBinOp = cast<BinaryOperator>(Vec);
auto OpC = OldBinOp->getOpcode();
auto *NewLHS =
- Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, LHS});
+ IC.Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, LHS});
auto *NewRHS =
- Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, RHS});
+ IC.Builder.CreateIntrinsic(IntrinsicID, {Vec->getType()}, {Pg, RHS});
auto *NewBinOp = BinaryOperator::CreateWithCopiedFlags(
OpC, NewLHS, NewRHS, OldBinOp, OldBinOp->getName(), &II);
return IC.replaceInstUsesWith(II, NewBinOp);
@@ -901,8 +1021,6 @@ static std::optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
// depending on the micro-architecture, but has been observed as generally
// being faster, particularly when the CLAST[AB] op is a loop-carried
// dependency.
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
Value *Pg = II.getArgOperand(0);
Value *Fallback = II.getArgOperand(1);
Value *Vec = II.getArgOperand(2);
@@ -916,39 +1034,37 @@ static std::optional<Instruction *> instCombineSVECondLast(InstCombiner &IC,
default:
return std::nullopt;
case 16:
- FPTy = Builder.getHalfTy();
+ FPTy = IC.Builder.getHalfTy();
break;
case 32:
- FPTy = Builder.getFloatTy();
+ FPTy = IC.Builder.getFloatTy();
break;
case 64:
- FPTy = Builder.getDoubleTy();
+ FPTy = IC.Builder.getDoubleTy();
break;
}
- Value *FPFallBack = Builder.CreateBitCast(Fallback, FPTy);
+ Value *FPFallBack = IC.Builder.CreateBitCast(Fallback, FPTy);
auto *FPVTy = VectorType::get(
FPTy, cast<VectorType>(Vec->getType())->getElementCount());
- Value *FPVec = Builder.CreateBitCast(Vec, FPVTy);
- auto *FPII = Builder.CreateIntrinsic(II.getIntrinsicID(), {FPVec->getType()},
- {Pg, FPFallBack, FPVec});
- Value *FPIItoInt = Builder.CreateBitCast(FPII, II.getType());
+ Value *FPVec = IC.Builder.CreateBitCast(Vec, FPVTy);
+ auto *FPII = IC.Builder.CreateIntrinsic(
+ II.getIntrinsicID(), {FPVec->getType()}, {Pg, FPFallBack, FPVec});
+ Value *FPIItoInt = IC.Builder.CreateBitCast(FPII, II.getType());
return IC.replaceInstUsesWith(II, FPIItoInt);
}
static std::optional<Instruction *> instCombineRDFFR(InstCombiner &IC,
IntrinsicInst &II) {
LLVMContext &Ctx = II.getContext();
- IRBuilder<> Builder(Ctx);
- Builder.SetInsertPoint(&II);
// Replace rdffr with predicated rdffr.z intrinsic, so that optimizePTestInstr
// can work with RDFFR_PP for ptest elimination.
auto *AllPat =
ConstantInt::get(Type::getInt32Ty(Ctx), AArch64SVEPredPattern::all);
- auto *PTrue = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue,
- {II.getType()}, {AllPat});
+ auto *PTrue = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptrue,
+ {II.getType()}, {AllPat});
auto *RDFFR =
- Builder.CreateIntrinsic(Intrinsic::aarch64_sve_rdffr_z, {}, {PTrue});
+ IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_rdffr_z, {}, {PTrue});
RDFFR->takeName(&II);
return IC.replaceInstUsesWith(II, RDFFR);
}
@@ -958,12 +1074,8 @@ instCombineSVECntElts(InstCombiner &IC, IntrinsicInst &II, unsigned NumElts) {
const auto Pattern = cast<ConstantInt>(II.getArgOperand(0))->getZExtValue();
if (Pattern == AArch64SVEPredPattern::all) {
- LLVMContext &Ctx = II.getContext();
- IRBuilder<> Builder(Ctx);
- Builder.SetInsertPoint(&II);
-
Constant *StepVal = ConstantInt::get(II.getType(), NumElts);
- auto *VScale = Builder.CreateVScale(StepVal);
+ auto *VScale = IC.Builder.CreateVScale(StepVal);
VScale->takeName(&II);
return IC.replaceInstUsesWith(II, VScale);
}
@@ -981,9 +1093,6 @@ static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
Value *PgVal = II.getArgOperand(0);
Value *OpVal = II.getArgOperand(1);
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
-
// PTEST_<FIRST|LAST>(X, X) is equivalent to PTEST_ANY(X, X).
// Later optimizations prefer this form.
if (PgVal == OpVal &&
@@ -993,7 +1102,7 @@ static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
Type *Tys[] = {PgVal->getType()};
auto *PTest =
- Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptest_any, Tys, Ops);
+ IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_ptest_any, Tys, Ops);
PTest->takeName(&II);
return IC.replaceInstUsesWith(II, PTest);
@@ -1013,7 +1122,7 @@ static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
Value *Ops[] = {Pg->getArgOperand(0), Op->getArgOperand(0)};
Type *Tys[] = {Pg->getArgOperand(0)->getType()};
- auto *PTest = Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops);
+ auto *PTest = IC.Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops);
PTest->takeName(&II);
return IC.replaceInstUsesWith(II, PTest);
@@ -1038,7 +1147,7 @@ static std::optional<Instruction *> instCombineSVEPTest(InstCombiner &IC,
Value *Ops[] = {Pg->getArgOperand(0), Pg};
Type *Tys[] = {Pg->getType()};
- auto *PTest = Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops);
+ auto *PTest = IC.Builder.CreateIntrinsic(II.getIntrinsicID(), Tys, Ops);
PTest->takeName(&II);
return IC.replaceInstUsesWith(II, PTest);
@@ -1080,16 +1189,13 @@ instCombineSVEVectorFuseMulAddSub(InstCombiner &IC, IntrinsicInst &II,
FMFSource = &II;
}
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
-
CallInst *Res;
if (MergeIntoAddendOp)
- Res = Builder.CreateIntrinsic(FuseOpc, {II.getType()},
- {P, AddendOp, MulOp0, MulOp1}, FMFSource);
+ Res = IC.Builder.CreateIntrinsic(FuseOpc, {II.getType()},
+ {P, AddendOp, MulOp0, MulOp1}, FMFSource);
else
- Res = Builder.CreateIntrinsic(FuseOpc, {II.getType()},
- {P, MulOp0, MulOp1, AddendOp}, FMFSource);
+ Res = IC.Builder.CreateIntrinsic(FuseOpc, {II.getType()},
+ {P, MulOp0, MulOp1, AddendOp}, FMFSource);
return IC.replaceInstUsesWith(II, Res);
}
@@ -1112,57 +1218,48 @@ static bool isAllActivePredicate(Value *Pred) {
static std::optional<Instruction *>
instCombineSVELD1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
-
Value *Pred = II.getOperand(0);
Value *PtrOp = II.getOperand(1);
Type *VecTy = II.getType();
- Value *VecPtr = Builder.CreateBitCast(PtrOp, VecTy->getPointerTo());
if (isAllActivePredicate(Pred)) {
- LoadInst *Load = Builder.CreateLoad(VecTy, VecPtr);
+ LoadInst *Load = IC.Builder.CreateLoad(VecTy, PtrOp);
Load->copyMetadata(II);
return IC.replaceInstUsesWith(II, Load);
}
CallInst *MaskedLoad =
- Builder.CreateMaskedLoad(VecTy, VecPtr, PtrOp->getPointerAlignment(DL),
- Pred, ConstantAggregateZero::get(VecTy));
+ IC.Builder.CreateMaskedLoad(VecTy, PtrOp, PtrOp->getPointerAlignment(DL),
+ Pred, ConstantAggregateZero::get(VecTy));
MaskedLoad->copyMetadata(II);
return IC.replaceInstUsesWith(II, MaskedLoad);
}
static std::optional<Instruction *>
instCombineSVEST1(InstCombiner &IC, IntrinsicInst &II, const DataLayout &DL) {
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
-
Value *VecOp = II.getOperand(0);
Value *Pred = II.getOperand(1);
Value *PtrOp = II.getOperand(2);
- Value *VecPtr =
- Builder.CreateBitCast(PtrOp, VecOp->getType()->getPointerTo());
if (isAllActivePredicate(Pred)) {
- StoreInst *Store = Builder.CreateStore(VecOp, VecPtr);
+ StoreInst *Store = IC.Builder.CreateStore(VecOp, PtrOp);
Store->copyMetadata(II);
return IC.eraseInstFromFunction(II);
}
- CallInst *MaskedStore = Builder.CreateMaskedStore(
- VecOp, VecPtr, PtrOp->getPointerAlignment(DL), Pred);
+ CallInst *MaskedStore = IC.Builder.CreateMaskedStore(
+ VecOp, PtrOp, PtrOp->getPointerAlignment(DL), Pred);
MaskedStore->copyMetadata(II);
return IC.eraseInstFromFunction(II);
}
static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {
switch (Intrinsic) {
- case Intrinsic::aarch64_sve_fmul:
+ case Intrinsic::aarch64_sve_fmul_u:
return Instruction::BinaryOps::FMul;
- case Intrinsic::aarch64_sve_fadd:
+ case Intrinsic::aarch64_sve_fadd_u:
return Instruction::BinaryOps::FAdd;
- case Intrinsic::aarch64_sve_fsub:
+ case Intrinsic::aarch64_sve_fsub_u:
return Instruction::BinaryOps::FSub;
default:
return Instruction::BinaryOpsEnd;
@@ -1171,70 +1268,160 @@ static Instruction::BinaryOps intrinsicIDToBinOpCode(unsigned Intrinsic) {
static std::optional<Instruction *>
instCombineSVEVectorBinOp(InstCombiner &IC, IntrinsicInst &II) {
+ // Bail due to missing support for ISD::STRICT_ scalable vector operations.
+ if (II.isStrictFP())
+ return std::nullopt;
+
auto *OpPredicate = II.getOperand(0);
auto BinOpCode = intrinsicIDToBinOpCode(II.getIntrinsicID());
if (BinOpCode == Instruction::BinaryOpsEnd ||
!match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
m_ConstantInt<AArch64SVEPredPattern::all>())))
return std::nullopt;
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
- Builder.setFastMathFlags(II.getFastMathFlags());
+ IRBuilderBase::FastMathFlagGuard FMFGuard(IC.Builder);
+ IC.Builder.setFastMathFlags(II.getFastMathFlags());
auto BinOp =
- Builder.CreateBinOp(BinOpCode, II.getOperand(1), II.getOperand(2));
+ IC.Builder.CreateBinOp(BinOpCode, II.getOperand(1), II.getOperand(2));
return IC.replaceInstUsesWith(II, BinOp);
}
+// Canonicalise operations that take an all active predicate (e.g. sve.add ->
+// sve.add_u).
+static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II,
+ Intrinsic::ID IID) {
+ auto *OpPredicate = II.getOperand(0);
+ if (!match(OpPredicate, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+ m_ConstantInt<AArch64SVEPredPattern::all>())))
+ return std::nullopt;
+
+ auto *Mod = II.getModule();
+ auto *NewDecl = Intrinsic::getDeclaration(Mod, IID, {II.getType()});
+ II.setCalledFunction(NewDecl);
+
+ return &II;
+}
+
static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
IntrinsicInst &II) {
+ if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u))
+ return II_U;
+ if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
+ Intrinsic::aarch64_sve_mla>(
+ IC, II, true))
+ return MLA;
+ if (auto MAD = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
+ Intrinsic::aarch64_sve_mad>(
+ IC, II, false))
+ return MAD;
+ return std::nullopt;
+}
+
+static std::optional<Instruction *>
+instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
+ if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u))
+ return II_U;
if (auto FMLA =
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
Intrinsic::aarch64_sve_fmla>(IC, II,
true))
return FMLA;
- if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
- Intrinsic::aarch64_sve_mla>(
- IC, II, true))
- return MLA;
if (auto FMAD =
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
Intrinsic::aarch64_sve_fmad>(IC, II,
false))
return FMAD;
- if (auto MAD = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
- Intrinsic::aarch64_sve_mad>(
- IC, II, false))
- return MAD;
+ if (auto FMLA =
+ instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
+ Intrinsic::aarch64_sve_fmla>(IC, II,
+ true))
+ return FMLA;
+ return std::nullopt;
+}
+
+static std::optional<Instruction *>
+instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) {
+ if (auto FMLA =
+ instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
+ Intrinsic::aarch64_sve_fmla>(IC, II,
+ true))
+ return FMLA;
+ if (auto FMAD =
+ instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
+ Intrinsic::aarch64_sve_fmad>(IC, II,
+ false))
+ return FMAD;
+ if (auto FMLA_U =
+ instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
+ Intrinsic::aarch64_sve_fmla_u>(
+ IC, II, true))
+ return FMLA_U;
return instCombineSVEVectorBinOp(IC, II);
}
-static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
- IntrinsicInst &II) {
+static std::optional<Instruction *>
+instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) {
+ if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u))
+ return II_U;
+ if (auto FMLS =
+ instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
+ Intrinsic::aarch64_sve_fmls>(IC, II,
+ true))
+ return FMLS;
+ if (auto FMSB =
+ instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
+ Intrinsic::aarch64_sve_fnmsb>(
+ IC, II, false))
+ return FMSB;
+ if (auto FMLS =
+ instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
+ Intrinsic::aarch64_sve_fmls>(IC, II,
+ true))
+ return FMLS;
+ return std::nullopt;
+}
+
+static std::optional<Instruction *>
+instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) {
if (auto FMLS =
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
Intrinsic::aarch64_sve_fmls>(IC, II,
true))
return FMLS;
- if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
- Intrinsic::aarch64_sve_mls>(
- IC, II, true))
- return MLS;
if (auto FMSB =
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
Intrinsic::aarch64_sve_fnmsb>(
IC, II, false))
return FMSB;
+ if (auto FMLS_U =
+ instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul_u,
+ Intrinsic::aarch64_sve_fmls_u>(
+ IC, II, true))
+ return FMLS_U;
return instCombineSVEVectorBinOp(IC, II);
}
-static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
+static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
IntrinsicInst &II) {
+ if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u))
+ return II_U;
+ if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
+ Intrinsic::aarch64_sve_mls>(
+ IC, II, true))
+ return MLS;
+ return std::nullopt;
+}
+
+static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
+ IntrinsicInst &II,
+ Intrinsic::ID IID) {
auto *OpPredicate = II.getOperand(0);
auto *OpMultiplicand = II.getOperand(1);
auto *OpMultiplier = II.getOperand(2);
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
+ // Canonicalise a non _u intrinsic only.
+ if (II.getIntrinsicID() != IID)
+ if (auto II_U = instCombineSVEAllActive(II, IID))
+ return II_U;
// Return true if a given instruction is a unit splat value, false otherwise.
auto IsUnitSplat = [](auto *I) {
@@ -1276,8 +1463,6 @@ static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
static std::optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
IntrinsicInst &II) {
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
Value *UnpackArg = II.getArgOperand(0);
auto *RetTy = cast<ScalableVectorType>(II.getType());
bool IsSigned = II.getIntrinsicID() == Intrinsic::aarch64_sve_sunpkhi ||
@@ -1287,9 +1472,9 @@ static std::optional<Instruction *> instCombineSVEUnpack(InstCombiner &IC,
// Lo = uunpklo(splat(X)) --> Lo = splat(extend(X))
if (auto *ScalarArg = getSplatValue(UnpackArg)) {
ScalarArg =
- Builder.CreateIntCast(ScalarArg, RetTy->getScalarType(), IsSigned);
+ IC.Builder.CreateIntCast(ScalarArg, RetTy->getScalarType(), IsSigned);
Value *NewVal =
- Builder.CreateVectorSplat(RetTy->getElementCount(), ScalarArg);
+ IC.Builder.CreateVectorSplat(RetTy->getElementCount(), ScalarArg);
NewVal->takeName(&II);
return IC.replaceInstUsesWith(II, NewVal);
}
@@ -1311,11 +1496,9 @@ static std::optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
// Convert sve_tbl(OpVal sve_dup_x(SplatValue)) to
// splat_vector(extractelement(OpVal, SplatValue)) for further optimization.
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
- auto *Extract = Builder.CreateExtractElement(OpVal, SplatValue);
+ auto *Extract = IC.Builder.CreateExtractElement(OpVal, SplatValue);
auto *VectorSplat =
- Builder.CreateVectorSplat(VTy->getElementCount(), Extract);
+ IC.Builder.CreateVectorSplat(VTy->getElementCount(), Extract);
VectorSplat->takeName(&II);
return IC.replaceInstUsesWith(II, VectorSplat);
@@ -1350,18 +1533,15 @@ instCombineLD1GatherIndex(InstCombiner &IC, IntrinsicInst &II) {
Value *IndexBase;
if (match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
m_Value(IndexBase), m_SpecificInt(1)))) {
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
-
Align Alignment =
BasePtr->getPointerAlignment(II.getModule()->getDataLayout());
Type *VecPtrTy = PointerType::getUnqual(Ty);
- Value *Ptr = Builder.CreateGEP(cast<VectorType>(Ty)->getElementType(),
- BasePtr, IndexBase);
- Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);
+ Value *Ptr = IC.Builder.CreateGEP(cast<VectorType>(Ty)->getElementType(),
+ BasePtr, IndexBase);
+ Ptr = IC.Builder.CreateBitCast(Ptr, VecPtrTy);
CallInst *MaskedLoad =
- Builder.CreateMaskedLoad(Ty, Ptr, Alignment, Mask, PassThru);
+ IC.Builder.CreateMaskedLoad(Ty, Ptr, Alignment, Mask, PassThru);
MaskedLoad->takeName(&II);
return IC.replaceInstUsesWith(II, MaskedLoad);
}
@@ -1383,18 +1563,15 @@ instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II) {
Value *IndexBase;
if (match(Index, m_Intrinsic<Intrinsic::aarch64_sve_index>(
m_Value(IndexBase), m_SpecificInt(1)))) {
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
-
Align Alignment =
BasePtr->getPointerAlignment(II.getModule()->getDataLayout());
- Value *Ptr = Builder.CreateGEP(cast<VectorType>(Ty)->getElementType(),
- BasePtr, IndexBase);
+ Value *Ptr = IC.Builder.CreateGEP(cast<VectorType>(Ty)->getElementType(),
+ BasePtr, IndexBase);
Type *VecPtrTy = PointerType::getUnqual(Ty);
- Ptr = Builder.CreateBitCast(Ptr, VecPtrTy);
+ Ptr = IC.Builder.CreateBitCast(Ptr, VecPtrTy);
- (void)Builder.CreateMaskedStore(Val, Ptr, Alignment, Mask);
+ (void)IC.Builder.CreateMaskedStore(Val, Ptr, Alignment, Mask);
return IC.eraseInstFromFunction(II);
}
@@ -1404,9 +1581,7 @@ instCombineST1ScatterIndex(InstCombiner &IC, IntrinsicInst &II) {
static std::optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
IntrinsicInst &II) {
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
- Type *Int32Ty = Builder.getInt32Ty();
+ Type *Int32Ty = IC.Builder.getInt32Ty();
Value *Pred = II.getOperand(0);
Value *Vec = II.getOperand(1);
Value *DivVec = II.getOperand(2);
@@ -1419,17 +1594,17 @@ static std::optional<Instruction *> instCombineSVESDIV(InstCombiner &IC,
if (Divisor.isPowerOf2()) {
Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2());
- auto ASRD = Builder.CreateIntrinsic(
+ auto ASRD = IC.Builder.CreateIntrinsic(
Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
return IC.replaceInstUsesWith(II, ASRD);
}
if (Divisor.isNegatedPowerOf2()) {
Divisor.negate();
Constant *DivisorLog2 = ConstantInt::get(Int32Ty, Divisor.logBase2());
- auto ASRD = Builder.CreateIntrinsic(
+ auto ASRD = IC.Builder.CreateIntrinsic(
Intrinsic::aarch64_sve_asrd, {II.getType()}, {Pred, Vec, DivisorLog2});
- auto NEG = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_neg,
- {ASRD->getType()}, {ASRD, Pred, ASRD});
+ auto NEG = IC.Builder.CreateIntrinsic(
+ Intrinsic::aarch64_sve_neg, {ASRD->getType()}, {ASRD, Pred, ASRD});
return IC.replaceInstUsesWith(II, NEG);
}
@@ -1489,14 +1664,12 @@ static std::optional<Instruction *> instCombineSVEDupqLane(InstCombiner &IC,
return std::nullopt;
// Rebuild the simplified chain of InsertElements. e.g. (a, b, a, b) as (a, b)
- IRBuilder<> Builder(II.getContext());
- Builder.SetInsertPoint(&II);
Value *InsertEltChain = PoisonValue::get(CurrentInsertElt->getType());
for (size_t I = 0; I < Elts.size(); I++) {
if (Elts[I] == nullptr)
continue;
- InsertEltChain = Builder.CreateInsertElement(InsertEltChain, Elts[I],
- Builder.getInt64(I));
+ InsertEltChain = IC.Builder.CreateInsertElement(InsertEltChain, Elts[I],
+ IC.Builder.getInt64(I));
}
if (InsertEltChain == nullptr)
return std::nullopt;
@@ -1510,21 +1683,21 @@ static std::optional<Instruction *> instCombineSVEDupqLane(InstCombiner &IC,
IIScalableTy->getMinNumElements() /
PatternWidth;
- IntegerType *WideTy = Builder.getIntNTy(PatternWidth);
+ IntegerType *WideTy = IC.Builder.getIntNTy(PatternWidth);
auto *WideScalableTy = ScalableVectorType::get(WideTy, PatternElementCount);
auto *WideShuffleMaskTy =
- ScalableVectorType::get(Builder.getInt32Ty(), PatternElementCount);
+ ScalableVectorType::get(IC.Builder.getInt32Ty(), PatternElementCount);
- auto ZeroIdx = ConstantInt::get(Builder.getInt64Ty(), APInt(64, 0));
- auto InsertSubvector = Builder.CreateInsertVector(
+ auto ZeroIdx = ConstantInt::get(IC.Builder.getInt64Ty(), APInt(64, 0));
+ auto InsertSubvector = IC.Builder.CreateInsertVector(
II.getType(), PoisonValue::get(II.getType()), InsertEltChain, ZeroIdx);
auto WideBitcast =
- Builder.CreateBitOrPointerCast(InsertSubvector, WideScalableTy);
+ IC.Builder.CreateBitOrPointerCast(InsertSubvector, WideScalableTy);
auto WideShuffleMask = ConstantAggregateZero::get(WideShuffleMaskTy);
- auto WideShuffle = Builder.CreateShuffleVector(
+ auto WideShuffle = IC.Builder.CreateShuffleVector(
WideBitcast, PoisonValue::get(WideScalableTy), WideShuffleMask);
auto NarrowBitcast =
- Builder.CreateBitOrPointerCast(WideShuffle, II.getType());
+ IC.Builder.CreateBitOrPointerCast(WideShuffle, II.getType());
return IC.replaceInstUsesWith(II, NarrowBitcast);
}
@@ -1541,7 +1714,6 @@ static std::optional<Instruction *> instCombineMaxMinNM(InstCombiner &IC,
static std::optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
IntrinsicInst &II) {
- IRBuilder<> Builder(&II);
Value *Pred = II.getOperand(0);
Value *Vec = II.getOperand(1);
Value *Shift = II.getOperand(2);
@@ -1568,8 +1740,8 @@ static std::optional<Instruction *> instCombineSVESrshl(InstCombiner &IC,
if (!match(Shift, m_NonNegative()))
return std::nullopt;
- auto LSL = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_lsl, {II.getType()},
- {Pred, Vec, Shift});
+ auto LSL = IC.Builder.CreateIntrinsic(Intrinsic::aarch64_sve_lsl,
+ {II.getType()}, {Pred, Vec, Shift});
return IC.replaceInstUsesWith(II, LSL);
}
@@ -1613,15 +1785,92 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_ptest_first:
case Intrinsic::aarch64_sve_ptest_last:
return instCombineSVEPTest(IC, II);
- case Intrinsic::aarch64_sve_mul:
- case Intrinsic::aarch64_sve_fmul:
- return instCombineSVEVectorMul(IC, II);
+ case Intrinsic::aarch64_sve_fabd:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u);
case Intrinsic::aarch64_sve_fadd:
+ return instCombineSVEVectorFAdd(IC, II);
+ case Intrinsic::aarch64_sve_fadd_u:
+ return instCombineSVEVectorFAddU(IC, II);
+ case Intrinsic::aarch64_sve_fdiv:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u);
+ case Intrinsic::aarch64_sve_fmax:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u);
+ case Intrinsic::aarch64_sve_fmaxnm:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u);
+ case Intrinsic::aarch64_sve_fmin:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u);
+ case Intrinsic::aarch64_sve_fminnm:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u);
+ case Intrinsic::aarch64_sve_fmla:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u);
+ case Intrinsic::aarch64_sve_fmls:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u);
+ case Intrinsic::aarch64_sve_fmul:
+ case Intrinsic::aarch64_sve_fmul_u:
+ return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u);
+ case Intrinsic::aarch64_sve_fmulx:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u);
+ case Intrinsic::aarch64_sve_fnmla:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u);
+ case Intrinsic::aarch64_sve_fnmls:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u);
+ case Intrinsic::aarch64_sve_fsub:
+ return instCombineSVEVectorFSub(IC, II);
+ case Intrinsic::aarch64_sve_fsub_u:
+ return instCombineSVEVectorFSubU(IC, II);
case Intrinsic::aarch64_sve_add:
return instCombineSVEVectorAdd(IC, II);
- case Intrinsic::aarch64_sve_fsub:
+ case Intrinsic::aarch64_sve_add_u:
+ return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
+ Intrinsic::aarch64_sve_mla_u>(
+ IC, II, true);
+ case Intrinsic::aarch64_sve_mla:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u);
+ case Intrinsic::aarch64_sve_mls:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u);
+ case Intrinsic::aarch64_sve_mul:
+ case Intrinsic::aarch64_sve_mul_u:
+ return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u);
+ case Intrinsic::aarch64_sve_sabd:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u);
+ case Intrinsic::aarch64_sve_smax:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u);
+ case Intrinsic::aarch64_sve_smin:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u);
+ case Intrinsic::aarch64_sve_smulh:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u);
case Intrinsic::aarch64_sve_sub:
return instCombineSVEVectorSub(IC, II);
+ case Intrinsic::aarch64_sve_sub_u:
+ return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
+ Intrinsic::aarch64_sve_mls_u>(
+ IC, II, true);
+ case Intrinsic::aarch64_sve_uabd:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u);
+ case Intrinsic::aarch64_sve_umax:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u);
+ case Intrinsic::aarch64_sve_umin:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u);
+ case Intrinsic::aarch64_sve_umulh:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u);
+ case Intrinsic::aarch64_sve_asr:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u);
+ case Intrinsic::aarch64_sve_lsl:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u);
+ case Intrinsic::aarch64_sve_lsr:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u);
+ case Intrinsic::aarch64_sve_and:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u);
+ case Intrinsic::aarch64_sve_bic:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u);
+ case Intrinsic::aarch64_sve_eor:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u);
+ case Intrinsic::aarch64_sve_orr:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u);
+ case Intrinsic::aarch64_sve_sqsub:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u);
+ case Intrinsic::aarch64_sve_uqsub:
+ return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u);
case Intrinsic::aarch64_sve_tbl:
return instCombineSVETBL(IC, II);
case Intrinsic::aarch64_sve_uunpkhi:
@@ -1685,8 +1934,7 @@ AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(64);
case TargetTransformInfo::RGK_FixedWidthVector:
- if (!ST->isStreamingSVEModeDisabled() &&
- !EnableFixedwidthAutovecInStreamingMode)
+ if (!ST->isNeonAvailable() && !EnableFixedwidthAutovecInStreamingMode)
return TypeSize::getFixed(0);
if (ST->hasSVE())
@@ -1695,7 +1943,8 @@ AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
return TypeSize::getFixed(ST->hasNEON() ? 128 : 0);
case TargetTransformInfo::RGK_ScalableVector:
- if (!ST->isStreamingSVEModeDisabled() && !EnableScalableAutovecInStreamingMode)
+ if ((ST->isStreaming() || ST->isStreamingCompatible()) &&
+ !EnableScalableAutovecInStreamingMode)
return TypeSize::getScalable(0);
return TypeSize::getScalable(ST->hasSVE() ? 128 : 0);
@@ -1704,8 +1953,8 @@ AArch64TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
}
bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
- ArrayRef<const Value *> Args) {
-
+ ArrayRef<const Value *> Args,
+ Type *SrcOverrideTy) {
// A helper that returns a vector type from the given type. The number of
// elements in type Ty determines the vector width.
auto toVectorTy = [&](Type *ArgTy) {
@@ -1713,12 +1962,14 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
cast<VectorType>(DstTy)->getElementCount());
};
- // Exit early if DstTy is not a vector type whose elements are at least
- // 16-bits wide. SVE doesn't generally have the same set of instructions to
+ // Exit early if DstTy is not a vector type whose elements are one of [i16,
+ // i32, i64]. SVE doesn't generally have the same set of instructions to
// perform an extend with the add/sub/mul. There are SMULLB style
// instructions, but they operate on top/bottom, requiring some sort of lane
// interleaving to be used with zext/sext.
- if (!useNeonVector(DstTy) || DstTy->getScalarSizeInBits() < 16)
+ unsigned DstEltSize = DstTy->getScalarSizeInBits();
+ if (!useNeonVector(DstTy) || Args.size() != 2 ||
+ (DstEltSize != 16 && DstEltSize != 32 && DstEltSize != 64))
return false;
// Determine if the operation has a widening variant. We consider both the
@@ -1728,40 +1979,55 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
// TODO: Add additional widening operations (e.g., shl, etc.) once we
// verify that their extending operands are eliminated during code
// generation.
+ Type *SrcTy = SrcOverrideTy;
switch (Opcode) {
case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
- case Instruction::Mul: // SMULL(2), UMULL(2)
+ // The second operand needs to be an extend
+ if (isa<SExtInst>(Args[1]) || isa<ZExtInst>(Args[1])) {
+ if (!SrcTy)
+ SrcTy =
+ toVectorTy(cast<Instruction>(Args[1])->getOperand(0)->getType());
+ } else
+ return false;
break;
+ case Instruction::Mul: { // SMULL(2), UMULL(2)
+ // Both operands need to be extends of the same type.
+ if ((isa<SExtInst>(Args[0]) && isa<SExtInst>(Args[1])) ||
+ (isa<ZExtInst>(Args[0]) && isa<ZExtInst>(Args[1]))) {
+ if (!SrcTy)
+ SrcTy =
+ toVectorTy(cast<Instruction>(Args[0])->getOperand(0)->getType());
+ } else if (isa<ZExtInst>(Args[0]) || isa<ZExtInst>(Args[1])) {
+ // If one of the operands is a Zext and the other has enough zero bits to
+ // be treated as unsigned, we can still general a umull, meaning the zext
+ // is free.
+ KnownBits Known =
+ computeKnownBits(isa<ZExtInst>(Args[0]) ? Args[1] : Args[0], DL);
+ if (Args[0]->getType()->getScalarSizeInBits() -
+ Known.Zero.countLeadingOnes() >
+ DstTy->getScalarSizeInBits() / 2)
+ return false;
+ if (!SrcTy)
+ SrcTy = toVectorTy(Type::getIntNTy(DstTy->getContext(),
+ DstTy->getScalarSizeInBits() / 2));
+ } else
+ return false;
+ break;
+ }
default:
return false;
}
- // To be a widening instruction (either the "wide" or "long" versions), the
- // second operand must be a sign- or zero extend.
- if (Args.size() != 2 ||
- (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])))
- return false;
- auto *Extend = cast<CastInst>(Args[1]);
- auto *Arg0 = dyn_cast<CastInst>(Args[0]);
-
- // A mul only has a mull version (not like addw). Both operands need to be
- // extending and the same type.
- if (Opcode == Instruction::Mul &&
- (!Arg0 || Arg0->getOpcode() != Extend->getOpcode() ||
- Arg0->getOperand(0)->getType() != Extend->getOperand(0)->getType()))
- return false;
-
// Legalize the destination type and ensure it can be used in a widening
// operation.
auto DstTyL = getTypeLegalizationCost(DstTy);
- unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
- if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
+ if (!DstTyL.second.isVector() || DstEltSize != DstTy->getScalarSizeInBits())
return false;
// Legalize the source type and ensure it can be used in a widening
// operation.
- auto *SrcTy = toVectorTy(Extend->getSrcTy());
+ assert(SrcTy && "Expected some SrcTy");
auto SrcTyL = getTypeLegalizationCost(SrcTy);
unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
@@ -1775,7 +2041,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
// Return true if the legalized types have the same number of vector elements
// and the destination element type size is twice that of the source type.
- return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
+ return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstEltSize;
}
InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
@@ -1785,24 +2051,22 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
const Instruction *I) {
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
-
// If the cast is observable, and it is used by a widening instruction (e.g.,
// uaddl, saddw, etc.), it may be free.
if (I && I->hasOneUser()) {
auto *SingleUser = cast<Instruction>(*I->user_begin());
SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
- if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
- // If the cast is the second operand, it is free. We will generate either
- // a "wide" or "long" version of the widening instruction.
- if (I == SingleUser->getOperand(1))
- return 0;
- // If the cast is not the second operand, it will be free if it looks the
- // same as the second operand. In this case, we will generate a "long"
- // version of the widening instruction.
- if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
- if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
- cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
+ if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands, Src)) {
+ // For adds only count the second operand as free if both operands are
+ // extends but not the same operation. (i.e both operands are not free in
+ // add(sext, zext)).
+ if (SingleUser->getOpcode() == Instruction::Add) {
+ if (I == SingleUser->getOperand(1) ||
+ (isa<CastInst>(SingleUser->getOperand(1)) &&
+ cast<CastInst>(SingleUser->getOperand(1))->getOpcode() == Opcode))
return 0;
+ } else // Others are free so long as isWideningInstruction returned true.
+ return 0;
}
}
@@ -2072,8 +2336,44 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
{ ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0 },
{ ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0 },
{ ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0 },
+
+ // Add cost for extending to illegal -too wide- scalable vectors.
+ // zero/sign extend are implemented by multiple unpack operations,
+ // where each operation has a cost of 1.
+ { ISD::ZERO_EXTEND, MVT::nxv16i16, MVT::nxv16i8, 2},
+ { ISD::ZERO_EXTEND, MVT::nxv16i32, MVT::nxv16i8, 6},
+ { ISD::ZERO_EXTEND, MVT::nxv16i64, MVT::nxv16i8, 14},
+ { ISD::ZERO_EXTEND, MVT::nxv8i32, MVT::nxv8i16, 2},
+ { ISD::ZERO_EXTEND, MVT::nxv8i64, MVT::nxv8i16, 6},
+ { ISD::ZERO_EXTEND, MVT::nxv4i64, MVT::nxv4i32, 2},
+
+ { ISD::SIGN_EXTEND, MVT::nxv16i16, MVT::nxv16i8, 2},
+ { ISD::SIGN_EXTEND, MVT::nxv16i32, MVT::nxv16i8, 6},
+ { ISD::SIGN_EXTEND, MVT::nxv16i64, MVT::nxv16i8, 14},
+ { ISD::SIGN_EXTEND, MVT::nxv8i32, MVT::nxv8i16, 2},
+ { ISD::SIGN_EXTEND, MVT::nxv8i64, MVT::nxv8i16, 6},
+ { ISD::SIGN_EXTEND, MVT::nxv4i64, MVT::nxv4i32, 2},
};
+ // We have to estimate a cost of fixed length operation upon
+ // SVE registers(operations) with the number of registers required
+ // for a fixed type to be represented upon SVE registers.
+ EVT WiderTy = SrcTy.bitsGT(DstTy) ? SrcTy : DstTy;
+ if (SrcTy.isFixedLengthVector() && DstTy.isFixedLengthVector() &&
+ SrcTy.getVectorNumElements() == DstTy.getVectorNumElements() &&
+ ST->useSVEForFixedLengthVectors(WiderTy)) {
+ std::pair<InstructionCost, MVT> LT =
+ getTypeLegalizationCost(WiderTy.getTypeForEVT(Dst->getContext()));
+ unsigned NumElements = AArch64::SVEBitsPerBlock /
+ LT.second.getVectorElementType().getSizeInBits();
+ return AdjustCost(
+ LT.first *
+ getCastInstrCost(
+ Opcode, ScalableVectorType::get(Dst->getScalarType(), NumElements),
+ ScalableVectorType::get(Src->getScalarType(), NumElements), CCH,
+ CostKind, I));
+ }
+
if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
DstTy.getSimpleVT(),
SrcTy.getSimpleVT()))
@@ -2109,6 +2409,13 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
FP16Tbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()))
return AdjustCost(Entry->Cost);
+ // The BasicTTIImpl version only deals with CCH==TTI::CastContextHint::Normal,
+ // but we also want to include the TTI::CastContextHint::Masked case too.
+ if ((ISD == ISD::ZERO_EXTEND || ISD == ISD::SIGN_EXTEND) &&
+ CCH == TTI::CastContextHint::Masked && ST->hasSVEorSME() &&
+ TLI->isTypeLegal(DstTy))
+ CCH = TTI::CastContextHint::Normal;
+
return AdjustCost(
BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
}
@@ -2184,7 +2491,8 @@ InstructionCost AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
return 0;
}
-InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(Type *Val,
+InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(const Instruction *I,
+ Type *Val,
unsigned Index,
bool HasRealUse) {
assert(Val->isVectorTy() && "This must be a vector type");
@@ -2210,14 +2518,26 @@ InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(Type *Val,
// needed. So it has non-zero cost.
// - For the rest of cases (virtual instruction or element type is float),
// consider the instruction free.
- //
+ if (Index == 0 && (!HasRealUse || !Val->getScalarType()->isIntegerTy()))
+ return 0;
+
+ // This is recognising a LD1 single-element structure to one lane of one
+ // register instruction. I.e., if this is an `insertelement` instruction,
+ // and its second operand is a load, then we will generate a LD1, which
+ // are expensive instructions.
+ if (I && dyn_cast<LoadInst>(I->getOperand(1)))
+ return ST->getVectorInsertExtractBaseCost() + 1;
+
+ // i1 inserts and extract will include an extra cset or cmp of the vector
+ // value. Increase the cost by 1 to account.
+ if (Val->getScalarSizeInBits() == 1)
+ return ST->getVectorInsertExtractBaseCost() + 1;
+
// FIXME:
// If the extract-element and insert-element instructions could be
// simplified away (e.g., could be combined into users by looking at use-def
// context), they have no cost. This is not done in the first place for
// compile-time considerations.
- if (Index == 0 && (!HasRealUse || !Val->getScalarType()->isIntegerTy()))
- return 0;
}
// All other insert/extracts cost this much.
@@ -2228,14 +2548,16 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) {
- return getVectorInstrCostHelper(Val, Index, false /* HasRealUse */);
+ bool HasRealUse =
+ Opcode == Instruction::InsertElement && Op0 && !isa<UndefValue>(Op0);
+ return getVectorInstrCostHelper(nullptr, Val, Index, HasRealUse);
}
InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index) {
- return getVectorInstrCostHelper(Val, Index, true /* HasRealUse */);
+ return getVectorInstrCostHelper(&I, Val, Index, true /* HasRealUse */);
}
InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
@@ -2381,11 +2703,19 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
// We know that they are legal. See LowerAdd in ISelLowering.
return LT.first;
+ case ISD::FNEG:
case ISD::FADD:
case ISD::FSUB:
+ // Increase the cost for half and bfloat types if not architecturally
+ // supported.
+ if ((Ty->getScalarType()->isHalfTy() && !ST->hasFullFP16()) ||
+ (Ty->getScalarType()->isBFloatTy() && !ST->hasBF16()))
+ return 2 * LT.first;
+ if (!Ty->getScalarType()->isFP128Ty())
+ return LT.first;
+ [[fallthrough]];
case ISD::FMUL:
case ISD::FDIV:
- case ISD::FNEG:
// These nodes are marked as 'custom' just to lower them to SVE.
// We know said lowering will incur no additional cost.
if (!Ty->getScalarType()->isFP128Ty())
@@ -2403,7 +2733,7 @@ InstructionCost AArch64TTIImpl::getAddressComputationCost(Type *Ty,
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
// extra micro-ops can significantly decrease throughput.
- unsigned NumVectorInstToHideOverhead = 10;
+ unsigned NumVectorInstToHideOverhead = NeonNonConstStrideOverhead;
int MaxMergeDistance = 64;
if (Ty->isVectorTy() && SE &&
@@ -2460,6 +2790,11 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
static const TypeConversionCostTblEntry
VectorSelectTbl[] = {
+ { ISD::SELECT, MVT::v2i1, MVT::v2f32, 2 },
+ { ISD::SELECT, MVT::v2i1, MVT::v2f64, 2 },
+ { ISD::SELECT, MVT::v4i1, MVT::v4f32, 2 },
+ { ISD::SELECT, MVT::v4i1, MVT::v4f16, 2 },
+ { ISD::SELECT, MVT::v8i1, MVT::v8f16, 2 },
{ ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
{ ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
{ ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
@@ -2477,6 +2812,24 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
return Entry->Cost;
}
}
+
+ if (isa<FixedVectorType>(ValTy) && ISD == ISD::SETCC) {
+ auto LT = getTypeLegalizationCost(ValTy);
+ // Cost v4f16 FCmp without FP16 support via converting to v4f32 and back.
+ if (LT.second == MVT::v4f16 && !ST->hasFullFP16())
+ return LT.first * 4; // fcvtl + fcvtl + fcmp + xtn
+ }
+
+ // Treat the icmp in icmp(and, 0) as free, as we can make use of ands.
+ // FIXME: This can apply to more conditions and add/sub if it can be shown to
+ // be profitable.
+ if (ValTy->isIntegerTy() && ISD == ISD::SETCC && I &&
+ ICmpInst::isEquality(VecPred) &&
+ TLI->isTypeLegal(TLI->getValueType(DL, ValTy)) &&
+ match(I->getOperand(1), m_Zero()) &&
+ match(I->getOperand(0), m_And(m_Value(), m_Value())))
+ return 0;
+
// The base case handles scalable vectors fine for now, since it treats the
// cost as 1 * legalization cost.
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
@@ -2628,19 +2981,27 @@ InstructionCost AArch64TTIImpl::getInterleavedMemoryOpCost(
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");
- auto *VecVTy = cast<FixedVectorType>(VecTy);
+ auto *VecVTy = cast<VectorType>(VecTy);
- if (!UseMaskForCond && !UseMaskForGaps &&
- Factor <= TLI->getMaxSupportedInterleaveFactor()) {
- unsigned NumElts = VecVTy->getNumElements();
+ if (VecTy->isScalableTy() && (!ST->hasSVE() || Factor != 2))
+ return InstructionCost::getInvalid();
+
+ // Vectorization for masked interleaved accesses is only enabled for scalable
+ // VF.
+ if (!VecTy->isScalableTy() && (UseMaskForCond || UseMaskForGaps))
+ return InstructionCost::getInvalid();
+
+ if (!UseMaskForGaps && Factor <= TLI->getMaxSupportedInterleaveFactor()) {
+ unsigned MinElts = VecVTy->getElementCount().getKnownMinValue();
auto *SubVecTy =
- FixedVectorType::get(VecTy->getScalarType(), NumElts / Factor);
+ VectorType::get(VecVTy->getElementType(),
+ VecVTy->getElementCount().divideCoefficientBy(Factor));
// ldN/stN only support legal vector types of size 64 or 128 in bits.
// Accesses having vector types that are a multiple of 128 bits can be
// matched to more than one ldN/stN instruction.
bool UseScalable;
- if (NumElts % Factor == 0 &&
+ if (MinElts % Factor == 0 &&
TLI->isLegalInterleavedAccessType(SubVecTy, DL, UseScalable))
return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL, UseScalable);
}
@@ -2665,7 +3026,7 @@ AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
return Cost;
}
-unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned AArch64TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
return ST->getMaxInterleaveFactor();
}
@@ -2921,25 +3282,18 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
}
InstructionCost
-AArch64TTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned,
+AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16())
- return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
-
- assert((isa<ScalableVectorType>(Ty) == isa<ScalableVectorType>(CondTy)) &&
- "Both vector needs to be equally scalable");
+ return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
InstructionCost LegalizationCost = 0;
if (LT.first > 1) {
Type *LegalVTy = EVT(LT.second).getTypeForEVT(Ty->getContext());
- unsigned MinMaxOpcode =
- Ty->isFPOrFPVectorTy()
- ? Intrinsic::maxnum
- : (IsUnsigned ? Intrinsic::umin : Intrinsic::smin);
- IntrinsicCostAttributes Attrs(MinMaxOpcode, LegalVTy, {LegalVTy, LegalVTy});
+ IntrinsicCostAttributes Attrs(IID, LegalVTy, {LegalVTy, LegalVTy}, FMF);
LegalizationCost = getIntrinsicInstrCost(Attrs, CostKind) * (LT.first - 1);
}
@@ -3053,8 +3407,7 @@ AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
if (!Entry)
break;
auto *ValVTy = cast<FixedVectorType>(ValTy);
- if (!ValVTy->getElementType()->isIntegerTy(1) &&
- MTy.getVectorNumElements() <= ValVTy->getNumElements() &&
+ if (MTy.getVectorNumElements() <= ValVTy->getNumElements() &&
isPowerOf2_32(ValVTy->getNumElements())) {
InstructionCost ExtraCost = 0;
if (LT.first != 1) {
@@ -3065,7 +3418,9 @@ AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
ExtraCost = getArithmeticInstrCost(Opcode, Ty, CostKind);
ExtraCost *= LT.first - 1;
}
- return Entry->Cost + ExtraCost;
+ // All and/or/xor of i1 will be lowered with maxv/minv/addv + fmov
+ auto Cost = ValVTy->getElementType()->isIntegerTy(1) ? 2 : Entry->Cost;
+ return Cost + ExtraCost;
}
break;
}
@@ -3157,9 +3512,9 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
unsigned NumSources = 0;
for (unsigned E = 0; E < LTNumElts; E++) {
int MaskElt = (N * LTNumElts + E < TpNumElts) ? Mask[N * LTNumElts + E]
- : UndefMaskElem;
+ : PoisonMaskElem;
if (MaskElt < 0) {
- NMask.push_back(UndefMaskElem);
+ NMask.push_back(PoisonMaskElem);
continue;
}
@@ -3203,13 +3558,19 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
Kind = improveShuffleKindFromMask(Kind, Mask);
- // Check for broadcast loads.
- if (Kind == TTI::SK_Broadcast) {
+ // Check for broadcast loads, which are supported by the LD1R instruction.
+ // In terms of code-size, the shuffle vector is free when a load + dup get
+ // folded into a LD1R. That's what we check and return here. For performance
+ // and reciprocal throughput, a LD1R is not completely free. In this case, we
+ // return the cost for the broadcast below (i.e. 1 for most/all types), so
+ // that we model the load + dup sequence slightly higher because LD1R is a
+ // high latency instruction.
+ if (CostKind == TTI::TCK_CodeSize && Kind == TTI::SK_Broadcast) {
bool IsLoad = !Args.empty() && isa<LoadInst>(Args[0]);
if (IsLoad && LT.second.isVector() &&
isLegalBroadcastLoad(Tp->getElementType(),
LT.second.getVectorElementCount()))
- return 0; // broadcast is handled by ld1r
+ return 0;
}
// If we have 4 elements for the shuffle and a Mask, get the cost straight
@@ -3231,6 +3592,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Broadcast, MVT::v2i32, 1},
{TTI::SK_Broadcast, MVT::v4i32, 1},
{TTI::SK_Broadcast, MVT::v2i64, 1},
+ {TTI::SK_Broadcast, MVT::v4f16, 1},
+ {TTI::SK_Broadcast, MVT::v8f16, 1},
{TTI::SK_Broadcast, MVT::v2f32, 1},
{TTI::SK_Broadcast, MVT::v4f32, 1},
{TTI::SK_Broadcast, MVT::v2f64, 1},
@@ -3243,6 +3606,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
{TTI::SK_Transpose, MVT::v2i32, 1},
{TTI::SK_Transpose, MVT::v4i32, 1},
{TTI::SK_Transpose, MVT::v2i64, 1},
+ {TTI::SK_Transpose, MVT::v4f16, 1},
+ {TTI::SK_Transpose, MVT::v8f16, 1},
{TTI::SK_Transpose, MVT::v2f32, 1},
{TTI::SK_Transpose, MVT::v4f32, 1},
{TTI::SK_Transpose, MVT::v2f64, 1},
@@ -3357,28 +3722,64 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
}
-bool AArch64TTIImpl::preferPredicateOverEpilogue(
- Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
- TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL,
- InterleavedAccessInfo *IAI) {
- if (!ST->hasSVE() || TailFoldingKindLoc == TailFoldingKind::TFDisabled)
+static bool containsDecreasingPointers(Loop *TheLoop,
+ PredicatedScalarEvolution *PSE) {
+ const auto &Strides = DenseMap<Value *, const SCEV *>();
+ for (BasicBlock *BB : TheLoop->blocks()) {
+ // Scan the instructions in the block and look for addresses that are
+ // consecutive and decreasing.
+ for (Instruction &I : *BB) {
+ if (isa<LoadInst>(&I) || isa<StoreInst>(&I)) {
+ Value *Ptr = getLoadStorePointerOperand(&I);
+ Type *AccessTy = getLoadStoreType(&I);
+ if (getPtrStride(*PSE, AccessTy, Ptr, TheLoop, Strides, /*Assume=*/true,
+ /*ShouldCheckWrap=*/false)
+ .value_or(0) < 0)
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+bool AArch64TTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) {
+ if (!ST->hasSVE())
return false;
// We don't currently support vectorisation with interleaving for SVE - with
// such loops we're better off not using tail-folding. This gives us a chance
// to fall back on fixed-width vectorisation using NEON's ld2/st2/etc.
- if (IAI->hasGroups())
+ if (TFI->IAI->hasGroups())
return false;
- TailFoldingKind Required; // Defaults to 0.
- if (LVL->getReductionVars().size())
- Required.add(TailFoldingKind::TFReductions);
- if (LVL->getFixedOrderRecurrences().size())
- Required.add(TailFoldingKind::TFRecurrences);
- if (!Required)
- Required.add(TailFoldingKind::TFSimple);
+ TailFoldingOpts Required = TailFoldingOpts::Disabled;
+ if (TFI->LVL->getReductionVars().size())
+ Required |= TailFoldingOpts::Reductions;
+ if (TFI->LVL->getFixedOrderRecurrences().size())
+ Required |= TailFoldingOpts::Recurrences;
+
+ // We call this to discover whether any load/store pointers in the loop have
+ // negative strides. This will require extra work to reverse the loop
+ // predicate, which may be expensive.
+ if (containsDecreasingPointers(TFI->LVL->getLoop(),
+ TFI->LVL->getPredicatedScalarEvolution()))
+ Required |= TailFoldingOpts::Reverse;
+ if (Required == TailFoldingOpts::Disabled)
+ Required |= TailFoldingOpts::Simple;
+
+ if (!TailFoldingOptionLoc.satisfies(ST->getSVETailFoldingDefaultOpts(),
+ Required))
+ return false;
+
+ // Don't tail-fold for tight loops where we would be better off interleaving
+ // with an unpredicated loop.
+ unsigned NumInsns = 0;
+ for (BasicBlock *BB : TFI->LVL->getLoop()->blocks()) {
+ NumInsns += BB->sizeWithoutDebug();
+ }
- return (TailFoldingKindLoc & Required) == Required;
+ // We expect 4 of these to be a IV PHI, IV add, IV compare and branch.
+ return NumInsns >= SVETailFoldInsnThreshold;
}
InstructionCost
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index a22ba4720e02..787cb3c5d34b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -57,16 +57,17 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
VECTOR_LDST_FOUR_ELEMENTS
};
- bool isWideningInstruction(Type *Ty, unsigned Opcode,
- ArrayRef<const Value *> Args);
+ bool isWideningInstruction(Type *DstTy, unsigned Opcode,
+ ArrayRef<const Value *> Args,
+ Type *SrcOverrideTy = nullptr);
// A helper function called by 'getVectorInstrCost'.
//
// 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse'
// indicates whether the vector instruction is available in the input IR or
// just imaginary in vectorizer passes.
- InstructionCost getVectorInstrCostHelper(Type *Val, unsigned Index,
- bool HasRealUse);
+ InstructionCost getVectorInstrCostHelper(const Instruction *I, Type *Val,
+ unsigned Index, bool HasRealUse);
public:
explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
@@ -99,6 +100,8 @@ public:
bool enableInterleavedAccessVectorization() { return true; }
+ bool enableMaskedInterleavedAccessVectorization() { return ST->hasSVE(); }
+
unsigned getNumberOfRegisters(unsigned ClassID) const {
bool Vector = (ClassID == 1);
if (Vector) {
@@ -131,6 +134,8 @@ public:
return ST->getVScaleForTuning();
}
+ bool isVScaleKnownToBeAPowerOfTwo() const { return true; }
+
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
/// Try to return an estimate cost factor that can be used as a multiplier
@@ -144,7 +149,7 @@ public:
return VF.getKnownMinValue() * ST->getVScaleForTuning();
}
- unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getMaxInterleaveFactor(ElementCount VF);
bool prefersVectorizedAddressing() const;
@@ -176,8 +181,8 @@ public:
TTI::TargetCostKind CostKind,
unsigned Index);
- InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned,
+ InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind);
InstructionCost getArithmeticReductionCostSVE(unsigned Opcode,
@@ -262,7 +267,7 @@ public:
}
bool isLegalMaskedGatherScatter(Type *DataType) const {
- if (!ST->hasSVE() || ST->forceStreamingCompatibleSVE())
+ if (!ST->hasSVE() || !ST->isNeonAvailable())
return false;
// For fixed vectors, scalarize if not using SVE for them.
@@ -347,17 +352,16 @@ public:
return ST->hasSVE() ? 5 : 0;
}
- PredicationStyle emitGetActiveLaneMask() const {
+ TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const {
if (ST->hasSVE())
- return PredicationStyle::DataAndControlFlow;
- return PredicationStyle::None;
+ return IVUpdateMayOverflow
+ ? TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck
+ : TailFoldingStyle::DataAndControlFlow;
+
+ return TailFoldingStyle::DataWithoutLaneMask;
}
- bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
- AssumptionCache &AC, TargetLibraryInfo *TLI,
- DominatorTree *DT,
- LoopVectorizationLegality *LVL,
- InterleavedAccessInfo *IAI);
+ bool preferPredicateOverEpilogue(TailFoldingInfo *TFI);
bool supportsScalableVectors() const { return ST->hasSVE(); }
@@ -392,6 +396,15 @@ public:
/// @}
bool enableSelectOptimize() { return ST->enableSelectOptimize(); }
+
+ unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
+ Type *ScalarValTy) const {
+ // We can vectorize store v4i8.
+ if (ScalarMemTy->isIntegerTy(8) && isPowerOf2_32(VF) && VF >= 4)
+ return 4;
+
+ return BaseT::getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 993ce4898d05..4756746063d5 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -41,16 +41,15 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/AArch64TargetParser.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/AArch64TargetParser.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <cassert>
#include <cctype>
#include <cstdint>
@@ -241,48 +240,47 @@ private:
/// }
- OperandMatchResultTy tryParseScalarRegister(MCRegister &Reg);
- OperandMatchResultTy tryParseVectorRegister(MCRegister &Reg, StringRef &Kind,
- RegKind MatchKind);
- OperandMatchResultTy tryParseMatrixRegister(OperandVector &Operands);
- OperandMatchResultTy tryParseSVCR(OperandVector &Operands);
- OperandMatchResultTy tryParseOptionalShiftExtend(OperandVector &Operands);
- OperandMatchResultTy tryParseBarrierOperand(OperandVector &Operands);
- OperandMatchResultTy tryParseBarriernXSOperand(OperandVector &Operands);
- OperandMatchResultTy tryParseMRSSystemRegister(OperandVector &Operands);
- OperandMatchResultTy tryParseSysReg(OperandVector &Operands);
- OperandMatchResultTy tryParseSysCROperand(OperandVector &Operands);
+ ParseStatus tryParseScalarRegister(MCRegister &Reg);
+ ParseStatus tryParseVectorRegister(MCRegister &Reg, StringRef &Kind,
+ RegKind MatchKind);
+ ParseStatus tryParseMatrixRegister(OperandVector &Operands);
+ ParseStatus tryParseSVCR(OperandVector &Operands);
+ ParseStatus tryParseOptionalShiftExtend(OperandVector &Operands);
+ ParseStatus tryParseBarrierOperand(OperandVector &Operands);
+ ParseStatus tryParseBarriernXSOperand(OperandVector &Operands);
+ ParseStatus tryParseSysReg(OperandVector &Operands);
+ ParseStatus tryParseSysCROperand(OperandVector &Operands);
template <bool IsSVEPrefetch = false>
- OperandMatchResultTy tryParsePrefetch(OperandVector &Operands);
- OperandMatchResultTy tryParseRPRFMOperand(OperandVector &Operands);
- OperandMatchResultTy tryParsePSBHint(OperandVector &Operands);
- OperandMatchResultTy tryParseBTIHint(OperandVector &Operands);
- OperandMatchResultTy tryParseAdrpLabel(OperandVector &Operands);
- OperandMatchResultTy tryParseAdrLabel(OperandVector &Operands);
- template<bool AddFPZeroAsLiteral>
- OperandMatchResultTy tryParseFPImm(OperandVector &Operands);
- OperandMatchResultTy tryParseImmWithOptionalShift(OperandVector &Operands);
- OperandMatchResultTy tryParseGPR64sp0Operand(OperandVector &Operands);
+ ParseStatus tryParsePrefetch(OperandVector &Operands);
+ ParseStatus tryParseRPRFMOperand(OperandVector &Operands);
+ ParseStatus tryParsePSBHint(OperandVector &Operands);
+ ParseStatus tryParseBTIHint(OperandVector &Operands);
+ ParseStatus tryParseAdrpLabel(OperandVector &Operands);
+ ParseStatus tryParseAdrLabel(OperandVector &Operands);
+ template <bool AddFPZeroAsLiteral>
+ ParseStatus tryParseFPImm(OperandVector &Operands);
+ ParseStatus tryParseImmWithOptionalShift(OperandVector &Operands);
+ ParseStatus tryParseGPR64sp0Operand(OperandVector &Operands);
bool tryParseNeonVectorRegister(OperandVector &Operands);
- OperandMatchResultTy tryParseVectorIndex(OperandVector &Operands);
- OperandMatchResultTy tryParseGPRSeqPair(OperandVector &Operands);
- OperandMatchResultTy tryParseSyspXzrPair(OperandVector &Operands);
+ ParseStatus tryParseVectorIndex(OperandVector &Operands);
+ ParseStatus tryParseGPRSeqPair(OperandVector &Operands);
+ ParseStatus tryParseSyspXzrPair(OperandVector &Operands);
template <bool ParseShiftExtend,
RegConstraintEqualityTy EqTy = RegConstraintEqualityTy::EqualsReg>
- OperandMatchResultTy tryParseGPROperand(OperandVector &Operands);
- OperandMatchResultTy tryParseZTOperand(OperandVector &Operands);
+ ParseStatus tryParseGPROperand(OperandVector &Operands);
+ ParseStatus tryParseZTOperand(OperandVector &Operands);
template <bool ParseShiftExtend, bool ParseSuffix>
- OperandMatchResultTy tryParseSVEDataVector(OperandVector &Operands);
+ ParseStatus tryParseSVEDataVector(OperandVector &Operands);
template <RegKind RK>
- OperandMatchResultTy tryParseSVEPredicateVector(OperandVector &Operands);
+ ParseStatus tryParseSVEPredicateVector(OperandVector &Operands);
template <RegKind VectorKind>
- OperandMatchResultTy tryParseVectorList(OperandVector &Operands,
- bool ExpectMatch = false);
- OperandMatchResultTy tryParseMatrixTileList(OperandVector &Operands);
- OperandMatchResultTy tryParseSVEPattern(OperandVector &Operands);
- OperandMatchResultTy tryParseSVEVecLenSpecifier(OperandVector &Operands);
- OperandMatchResultTy tryParseGPR64x8(OperandVector &Operands);
- OperandMatchResultTy tryParseImmRange(OperandVector &Operands);
+ ParseStatus tryParseVectorList(OperandVector &Operands,
+ bool ExpectMatch = false);
+ ParseStatus tryParseMatrixTileList(OperandVector &Operands);
+ ParseStatus tryParseSVEPattern(OperandVector &Operands);
+ ParseStatus tryParseSVEVecLenSpecifier(OperandVector &Operands);
+ ParseStatus tryParseGPR64x8(OperandVector &Operands);
+ ParseStatus tryParseImmRange(OperandVector &Operands);
public:
enum AArch64MatchResultTy {
@@ -2912,7 +2910,7 @@ unsigned AArch64AsmParser::getNumRegsForRegKind(RegKind K) {
case RegKind::SVEPredicateAsCounter:
return 16;
case RegKind::LookupTable:
- return 1;
+ return 1;
}
llvm_unreachable("Unsupported RegKind");
}
@@ -2920,54 +2918,45 @@ unsigned AArch64AsmParser::getNumRegsForRegKind(RegKind K) {
/// tryParseScalarRegister - Try to parse a register name. The token must be an
/// Identifier when called, and if it is a register name the token is eaten and
/// the register is added to the operand list.
-OperandMatchResultTy
-AArch64AsmParser::tryParseScalarRegister(MCRegister &RegNum) {
+ParseStatus AArch64AsmParser::tryParseScalarRegister(MCRegister &RegNum) {
const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
std::string lowerCase = Tok.getString().lower();
unsigned Reg = matchRegisterNameAlias(lowerCase, RegKind::Scalar);
if (Reg == 0)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
RegNum = Reg;
Lex(); // Eat identifier token.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// tryParseSysCROperand - Try to parse a system instruction CR operand name.
-OperandMatchResultTy
-AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
SMLoc S = getLoc();
- if (getTok().isNot(AsmToken::Identifier)) {
- Error(S, "Expected cN operand where 0 <= N <= 15");
- return MatchOperand_ParseFail;
- }
+ if (getTok().isNot(AsmToken::Identifier))
+ return Error(S, "Expected cN operand where 0 <= N <= 15");
StringRef Tok = getTok().getIdentifier();
- if (Tok[0] != 'c' && Tok[0] != 'C') {
- Error(S, "Expected cN operand where 0 <= N <= 15");
- return MatchOperand_ParseFail;
- }
+ if (Tok[0] != 'c' && Tok[0] != 'C')
+ return Error(S, "Expected cN operand where 0 <= N <= 15");
uint32_t CRNum;
bool BadNum = Tok.drop_front().getAsInteger(10, CRNum);
- if (BadNum || CRNum > 15) {
- Error(S, "Expected cN operand where 0 <= N <= 15");
- return MatchOperand_ParseFail;
- }
+ if (BadNum || CRNum > 15)
+ return Error(S, "Expected cN operand where 0 <= N <= 15");
Lex(); // Eat identifier token.
Operands.push_back(
AArch64Operand::CreateSysCR(CRNum, S, getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// Either an identifier for named values or a 6-bit immediate.
-OperandMatchResultTy
-AArch64AsmParser::tryParseRPRFMOperand(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseRPRFMOperand(OperandVector &Operands) {
SMLoc S = getLoc();
const AsmToken &Tok = getTok();
@@ -2978,47 +2967,38 @@ AArch64AsmParser::tryParseRPRFMOperand(OperandVector &Operands) {
Tok.is(AsmToken::Integer)) {
const MCExpr *ImmVal;
if (getParser().parseExpression(ImmVal))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- TokError("immediate value expected for prefetch operand");
- return MatchOperand_ParseFail;
- }
+ if (!MCE)
+ return TokError("immediate value expected for prefetch operand");
unsigned prfop = MCE->getValue();
- if (prfop > MaxVal) {
- TokError("prefetch operand out of range, [0," + utostr(MaxVal) +
- "] expected");
- return MatchOperand_ParseFail;
- }
+ if (prfop > MaxVal)
+ return TokError("prefetch operand out of range, [0," + utostr(MaxVal) +
+ "] expected");
auto RPRFM = AArch64RPRFM::lookupRPRFMByEncoding(MCE->getValue());
Operands.push_back(AArch64Operand::CreatePrefetch(
prfop, RPRFM ? RPRFM->Name : "", S, getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- if (Tok.isNot(AsmToken::Identifier)) {
- TokError("prefetch hint expected");
- return MatchOperand_ParseFail;
- }
+ if (Tok.isNot(AsmToken::Identifier))
+ return TokError("prefetch hint expected");
auto RPRFM = AArch64RPRFM::lookupRPRFMByName(Tok.getString());
- if (!RPRFM) {
- TokError("prefetch hint expected");
- return MatchOperand_ParseFail;
- }
+ if (!RPRFM)
+ return TokError("prefetch hint expected");
Operands.push_back(AArch64Operand::CreatePrefetch(
RPRFM->Encoding, Tok.getString(), S, getContext()));
Lex(); // Eat identifier token.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// tryParsePrefetch - Try to parse a prefetch operand.
template <bool IsSVEPrefetch>
-OperandMatchResultTy
-AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
SMLoc S = getLoc();
const AsmToken &Tok = getTok();
@@ -3047,67 +3027,53 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
Tok.is(AsmToken::Integer)) {
const MCExpr *ImmVal;
if (getParser().parseExpression(ImmVal))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- TokError("immediate value expected for prefetch operand");
- return MatchOperand_ParseFail;
- }
+ if (!MCE)
+ return TokError("immediate value expected for prefetch operand");
unsigned prfop = MCE->getValue();
- if (prfop > MaxVal) {
- TokError("prefetch operand out of range, [0," + utostr(MaxVal) +
- "] expected");
- return MatchOperand_ParseFail;
- }
+ if (prfop > MaxVal)
+ return TokError("prefetch operand out of range, [0," + utostr(MaxVal) +
+ "] expected");
auto PRFM = LookupByEncoding(MCE->getValue());
Operands.push_back(AArch64Operand::CreatePrefetch(prfop, PRFM.value_or(""),
S, getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- if (Tok.isNot(AsmToken::Identifier)) {
- TokError("prefetch hint expected");
- return MatchOperand_ParseFail;
- }
+ if (Tok.isNot(AsmToken::Identifier))
+ return TokError("prefetch hint expected");
auto PRFM = LookupByName(Tok.getString());
- if (!PRFM) {
- TokError("prefetch hint expected");
- return MatchOperand_ParseFail;
- }
+ if (!PRFM)
+ return TokError("prefetch hint expected");
Operands.push_back(AArch64Operand::CreatePrefetch(
*PRFM, Tok.getString(), S, getContext()));
Lex(); // Eat identifier token.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// tryParsePSBHint - Try to parse a PSB operand, mapped to Hint command
-OperandMatchResultTy
-AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
SMLoc S = getLoc();
const AsmToken &Tok = getTok();
- if (Tok.isNot(AsmToken::Identifier)) {
- TokError("invalid operand for instruction");
- return MatchOperand_ParseFail;
- }
+ if (Tok.isNot(AsmToken::Identifier))
+ return TokError("invalid operand for instruction");
auto PSB = AArch64PSBHint::lookupPSBByName(Tok.getString());
- if (!PSB) {
- TokError("invalid operand for instruction");
- return MatchOperand_ParseFail;
- }
+ if (!PSB)
+ return TokError("invalid operand for instruction");
Operands.push_back(AArch64Operand::CreatePSBHint(
PSB->Encoding, Tok.getString(), S, getContext()));
Lex(); // Eat identifier token.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AArch64AsmParser::tryParseSyspXzrPair(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseSyspXzrPair(OperandVector &Operands) {
SMLoc StartLoc = getLoc();
MCRegister RegNum;
@@ -3115,61 +3081,51 @@ AArch64AsmParser::tryParseSyspXzrPair(OperandVector &Operands) {
// The case where xzr, xzr is not present is handled by an InstAlias.
auto RegTok = getTok(); // in case we need to backtrack
- if (tryParseScalarRegister(RegNum) != MatchOperand_Success)
- return MatchOperand_NoMatch;
+ if (!tryParseScalarRegister(RegNum).isSuccess())
+ return ParseStatus::NoMatch;
if (RegNum != AArch64::XZR) {
getLexer().UnLex(RegTok);
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
if (parseComma())
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
- if (tryParseScalarRegister(RegNum) != MatchOperand_Success) {
- TokError("expected register operand");
- return MatchOperand_ParseFail;
- }
+ if (!tryParseScalarRegister(RegNum).isSuccess())
+ return TokError("expected register operand");
- if (RegNum != AArch64::XZR) {
- TokError("xzr must be followed by xzr");
- return MatchOperand_ParseFail;
- }
+ if (RegNum != AArch64::XZR)
+ return TokError("xzr must be followed by xzr");
// We need to push something, since we claim this is an operand in .td.
// See also AArch64AsmParser::parseKeywordOperand.
Operands.push_back(AArch64Operand::CreateReg(
RegNum, RegKind::Scalar, StartLoc, getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// tryParseBTIHint - Try to parse a BTI operand, mapped to Hint command
-OperandMatchResultTy
-AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseBTIHint(OperandVector &Operands) {
SMLoc S = getLoc();
const AsmToken &Tok = getTok();
- if (Tok.isNot(AsmToken::Identifier)) {
- TokError("invalid operand for instruction");
- return MatchOperand_ParseFail;
- }
+ if (Tok.isNot(AsmToken::Identifier))
+ return TokError("invalid operand for instruction");
auto BTI = AArch64BTIHint::lookupBTIByName(Tok.getString());
- if (!BTI) {
- TokError("invalid operand for instruction");
- return MatchOperand_ParseFail;
- }
+ if (!BTI)
+ return TokError("invalid operand for instruction");
Operands.push_back(AArch64Operand::CreateBTIHint(
BTI->Encoding, Tok.getString(), S, getContext()));
Lex(); // Eat identifier token.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// tryParseAdrpLabel - Parse and validate a source label for the ADRP
/// instruction.
-OperandMatchResultTy
-AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
SMLoc S = getLoc();
const MCExpr *Expr = nullptr;
@@ -3178,7 +3134,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
}
if (parseSymbolicImmVal(Expr))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
AArch64MCExpr::VariantKind ELFRefKind;
MCSymbolRefExpr::VariantKind DarwinRefKind;
@@ -3193,8 +3149,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
} else if ((DarwinRefKind == MCSymbolRefExpr::VK_GOTPAGE ||
DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGE) &&
Addend != 0) {
- Error(S, "gotpage label reference not allowed an addend");
- return MatchOperand_ParseFail;
+ return Error(S, "gotpage label reference not allowed an addend");
} else if (DarwinRefKind != MCSymbolRefExpr::VK_PAGE &&
DarwinRefKind != MCSymbolRefExpr::VK_GOTPAGE &&
DarwinRefKind != MCSymbolRefExpr::VK_TLVPPAGE &&
@@ -3204,8 +3159,7 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
ELFRefKind != AArch64MCExpr::VK_GOTTPREL_PAGE &&
ELFRefKind != AArch64MCExpr::VK_TLSDESC_PAGE) {
// The operand must be an @page or @gotpage qualified symbolref.
- Error(S, "page or gotpage label reference expected");
- return MatchOperand_ParseFail;
+ return Error(S, "page or gotpage label reference expected");
}
}
@@ -3215,25 +3169,24 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// tryParseAdrLabel - Parse and validate a source label for the ADR
/// instruction.
-OperandMatchResultTy
-AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
SMLoc S = getLoc();
const MCExpr *Expr = nullptr;
// Leave anything with a bracket to the default for SVE
if (getTok().is(AsmToken::LBrac))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (getTok().is(AsmToken::Hash))
Lex(); // Eat hash token.
if (parseSymbolicImmVal(Expr))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
AArch64MCExpr::VariantKind ELFRefKind;
MCSymbolRefExpr::VariantKind DarwinRefKind;
@@ -3245,20 +3198,18 @@ AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
// ADR relocation (unfortunately).
Expr = AArch64MCExpr::create(Expr, AArch64MCExpr::VK_ABS, getContext());
} else {
- Error(S, "unexpected adr label");
- return MatchOperand_ParseFail;
+ return Error(S, "unexpected adr label");
}
}
SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
Operands.push_back(AArch64Operand::CreateImm(Expr, S, E, getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// tryParseFPImm - A floating point immediate expression operand.
-template<bool AddFPZeroAsLiteral>
-OperandMatchResultTy
-AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
+template <bool AddFPZeroAsLiteral>
+ParseStatus AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
SMLoc S = getLoc();
bool Hash = parseOptionalToken(AsmToken::Hash);
@@ -3269,17 +3220,14 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
const AsmToken &Tok = getTok();
if (!Tok.is(AsmToken::Real) && !Tok.is(AsmToken::Integer)) {
if (!Hash)
- return MatchOperand_NoMatch;
- TokError("invalid floating point immediate");
- return MatchOperand_ParseFail;
+ return ParseStatus::NoMatch;
+ return TokError("invalid floating point immediate");
}
// Parse hexadecimal representation.
if (Tok.is(AsmToken::Integer) && Tok.getString().startswith("0x")) {
- if (Tok.getIntVal() > 255 || isNegative) {
- TokError("encoded floating point value out of range");
- return MatchOperand_ParseFail;
- }
+ if (Tok.getIntVal() > 255 || isNegative)
+ return TokError("encoded floating point value out of range");
APFloat F((double)AArch64_AM::getFPImmFloat(Tok.getIntVal()));
Operands.push_back(
@@ -3289,10 +3237,8 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
APFloat RealVal(APFloat::IEEEdouble());
auto StatusOrErr =
RealVal.convertFromString(Tok.getString(), APFloat::rmTowardZero);
- if (errorToBool(StatusOrErr.takeError())) {
- TokError("invalid floating point representation");
- return MatchOperand_ParseFail;
- }
+ if (errorToBool(StatusOrErr.takeError()))
+ return TokError("invalid floating point representation");
if (isNegative)
RealVal.changeSign();
@@ -3307,12 +3253,12 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
Lex(); // Eat the token.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// tryParseImmWithOptionalShift - Parse immediate operand, optionally with
/// a shift suffix, for example '#1, lsl #12'.
-OperandMatchResultTy
+ParseStatus
AArch64AsmParser::tryParseImmWithOptionalShift(OperandVector &Operands) {
SMLoc S = getLoc();
@@ -3320,7 +3266,7 @@ AArch64AsmParser::tryParseImmWithOptionalShift(OperandVector &Operands) {
Lex(); // Eat '#'
else if (getTok().isNot(AsmToken::Integer))
// Operand should start from # or should be integer, emit error otherwise.
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (getTok().is(AsmToken::Integer) &&
getLexer().peekTok().is(AsmToken::Colon))
@@ -3328,11 +3274,11 @@ AArch64AsmParser::tryParseImmWithOptionalShift(OperandVector &Operands) {
const MCExpr *Imm = nullptr;
if (parseSymbolicImmVal(Imm))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
else if (getTok().isNot(AsmToken::Comma)) {
Operands.push_back(
AArch64Operand::CreateImm(Imm, S, getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// Eat ','
@@ -3343,44 +3289,38 @@ AArch64AsmParser::tryParseImmWithOptionalShift(OperandVector &Operands) {
AArch64Operand::CreateImm(Imm, S, getLoc(), getContext()));
Operands.push_back(
AArch64Operand::CreateToken(VecGroup, getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// The optional operand must be "lsl #N" where N is non-negative.
if (!getTok().is(AsmToken::Identifier) ||
- !getTok().getIdentifier().equals_insensitive("lsl")) {
- Error(getLoc(), "only 'lsl #+N' valid after immediate");
- return MatchOperand_ParseFail;
- }
+ !getTok().getIdentifier().equals_insensitive("lsl"))
+ return Error(getLoc(), "only 'lsl #+N' valid after immediate");
// Eat 'lsl'
Lex();
parseOptionalToken(AsmToken::Hash);
- if (getTok().isNot(AsmToken::Integer)) {
- Error(getLoc(), "only 'lsl #+N' valid after immediate");
- return MatchOperand_ParseFail;
- }
+ if (getTok().isNot(AsmToken::Integer))
+ return Error(getLoc(), "only 'lsl #+N' valid after immediate");
int64_t ShiftAmount = getTok().getIntVal();
- if (ShiftAmount < 0) {
- Error(getLoc(), "positive shift amount required");
- return MatchOperand_ParseFail;
- }
+ if (ShiftAmount < 0)
+ return Error(getLoc(), "positive shift amount required");
Lex(); // Eat the number
// Just in case the optional lsl #0 is used for immediates other than zero.
if (ShiftAmount == 0 && Imm != nullptr) {
Operands.push_back(
AArch64Operand::CreateImm(Imm, S, getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
Operands.push_back(AArch64Operand::CreateShiftedImm(Imm, ShiftAmount, S,
getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// parseCondCodeString - Parse a Condition Code string, optionally returning a
@@ -3408,8 +3348,7 @@ AArch64AsmParser::parseCondCodeString(StringRef Cond, std::string &Suggestion) {
.Case("nv", AArch64CC::NV)
.Default(AArch64CC::Invalid);
- if (CC == AArch64CC::Invalid &&
- getSTI().getFeatureBits()[AArch64::FeatureSVE]) {
+ if (CC == AArch64CC::Invalid && getSTI().hasFeature(AArch64::FeatureSVE)) {
CC = StringSwitch<AArch64CC::CondCode>(Cond.lower())
.Case("none", AArch64CC::EQ)
.Case("any", AArch64CC::NE)
@@ -3458,48 +3397,42 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
return false;
}
-OperandMatchResultTy
-AArch64AsmParser::tryParseSVCR(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseSVCR(OperandVector &Operands) {
const AsmToken &Tok = getTok();
SMLoc S = getLoc();
- if (Tok.isNot(AsmToken::Identifier)) {
- TokError("invalid operand for instruction");
- return MatchOperand_ParseFail;
- }
+ if (Tok.isNot(AsmToken::Identifier))
+ return TokError("invalid operand for instruction");
unsigned PStateImm = -1;
const auto *SVCR = AArch64SVCR::lookupSVCRByName(Tok.getString());
if (!SVCR)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (SVCR->haveFeatures(getSTI().getFeatureBits()))
PStateImm = SVCR->Encoding;
Operands.push_back(
AArch64Operand::CreateSVCR(PStateImm, Tok.getString(), S, getContext()));
Lex(); // Eat identifier token.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
const AsmToken &Tok = getTok();
SMLoc S = getLoc();
StringRef Name = Tok.getString();
- if (Name.equals_insensitive("za") || Name.startswith_insensitive("za.")) {
+ if (Name.equals_insensitive("za") || Name.starts_with_insensitive("za.")) {
Lex(); // eat "za[.(b|h|s|d)]"
unsigned ElementWidth = 0;
auto DotPosition = Name.find('.');
if (DotPosition != StringRef::npos) {
const auto &KindRes =
parseVectorKind(Name.drop_front(DotPosition), RegKind::Matrix);
- if (!KindRes) {
- TokError(
+ if (!KindRes)
+ return TokError(
"Expected the register to be followed by element width suffix");
- return MatchOperand_ParseFail;
- }
ElementWidth = KindRes->second;
}
Operands.push_back(AArch64Operand::CreateMatrixRegister(
@@ -3509,15 +3442,15 @@ AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
// There's no comma after matrix operand, so we can parse the next operand
// immediately.
if (parseOperand(Operands, false, false))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// Try to parse matrix register.
unsigned Reg = matchRegisterNameAlias(Name, RegKind::Matrix);
if (!Reg)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
size_t DotPosition = Name.find('.');
assert(DotPosition != StringRef::npos && "Unexpected register");
@@ -3533,10 +3466,9 @@ AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
// Next up, parsing the suffix
const auto &KindRes = parseVectorKind(Tail, RegKind::Matrix);
- if (!KindRes) {
- TokError("Expected the register to be followed by element width suffix");
- return MatchOperand_ParseFail;
- }
+ if (!KindRes)
+ return TokError(
+ "Expected the register to be followed by element width suffix");
unsigned ElementWidth = KindRes->second;
Lex();
@@ -3548,14 +3480,14 @@ AArch64AsmParser::tryParseMatrixRegister(OperandVector &Operands) {
// There's no comma after matrix operand, so we can parse the next operand
// immediately.
if (parseOperand(Operands, false, false))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// tryParseOptionalShift - Some operands take an optional shift argument. Parse
/// them if present.
-OperandMatchResultTy
+ParseStatus
AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
const AsmToken &Tok = getTok();
std::string LowerID = Tok.getString().lower();
@@ -3577,7 +3509,7 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
.Default(AArch64_AM::InvalidShiftExtend);
if (ShOp == AArch64_AM::InvalidShiftExtend)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
SMLoc S = Tok.getLoc();
Lex();
@@ -3589,40 +3521,35 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
ShOp == AArch64_AM::ASR || ShOp == AArch64_AM::ROR ||
ShOp == AArch64_AM::MSL) {
// We expect a number here.
- TokError("expected #imm after shift specifier");
- return MatchOperand_ParseFail;
+ return TokError("expected #imm after shift specifier");
}
// "extend" type operations don't need an immediate, #0 is implicit.
SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
Operands.push_back(
AArch64Operand::CreateShiftExtend(ShOp, 0, false, S, E, getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// Make sure we do actually have a number, identifier or a parenthesized
// expression.
SMLoc E = getLoc();
if (!getTok().is(AsmToken::Integer) && !getTok().is(AsmToken::LParen) &&
- !getTok().is(AsmToken::Identifier)) {
- Error(E, "expected integer shift amount");
- return MatchOperand_ParseFail;
- }
+ !getTok().is(AsmToken::Identifier))
+ return Error(E, "expected integer shift amount");
const MCExpr *ImmVal;
if (getParser().parseExpression(ImmVal))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- Error(E, "expected constant '#imm' after shift specifier");
- return MatchOperand_ParseFail;
- }
+ if (!MCE)
+ return Error(E, "expected constant '#imm' after shift specifier");
E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
Operands.push_back(AArch64Operand::CreateShiftExtend(
ShOp, MCE->getValue(), true, S, E, getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
static const struct Extension {
@@ -3679,10 +3606,25 @@ static const struct Extension {
{"ite", {AArch64::FeatureITE}},
{"cssc", {AArch64::FeatureCSSC}},
{"rcpc3", {AArch64::FeatureRCPC3}},
- // FIXME: Unsupported extensions
- {"lor", {}},
- {"rdma", {}},
- {"profile", {}},
+ {"gcs", {AArch64::FeatureGCS}},
+ {"bf16", {AArch64::FeatureBF16}},
+ {"compnum", {AArch64::FeatureComplxNum}},
+ {"dotprod", {AArch64::FeatureDotProd}},
+ {"f32mm", {AArch64::FeatureMatMulFP32}},
+ {"f64mm", {AArch64::FeatureMatMulFP64}},
+ {"fp16", {AArch64::FeatureFullFP16}},
+ {"fp16fml", {AArch64::FeatureFP16FML}},
+ {"i8mm", {AArch64::FeatureMatMulInt8}},
+ {"lor", {AArch64::FeatureLOR}},
+ {"profile", {AArch64::FeatureSPE}},
+ // "rdma" is the name documented by binutils for the feature, but
+ // binutils also accepts incomplete prefixes of features, so "rdm"
+ // works too. Support both spellings here.
+ {"rdm", {AArch64::FeatureRDM}},
+ {"rdma", {AArch64::FeatureRDM}},
+ {"sb", {AArch64::FeatureSB}},
+ {"ssbs", {AArch64::FeatureSSBS}},
+ {"tme", {AArch64::FeatureTME}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
@@ -3869,7 +3811,7 @@ bool AArch64AsmParser::parseSyspAlias(StringRef Name, SMLoc NameLoc,
SMLoc S = Tok.getLoc();
if (Mnemonic == "tlbip") {
- bool HasnXSQualifier = Op.endswith_insensitive("nXS");
+ bool HasnXSQualifier = Op.ends_with_insensitive("nXS");
if (HasnXSQualifier) {
Op = Op.drop_back(3);
}
@@ -3900,9 +3842,9 @@ bool AArch64AsmParser::parseSyspAlias(StringRef Name, SMLoc NameLoc,
if (Tok.isNot(AsmToken::Identifier))
return TokError("expected register identifier");
auto Result = tryParseSyspXzrPair(Operands);
- if (Result == MatchOperand_NoMatch)
+ if (Result.isNoMatch())
Result = tryParseGPRSeqPair(Operands);
- if (Result != MatchOperand_Success)
+ if (!Result.isSuccess())
return TokError("specified " + Mnemonic +
" op requires a pair of registers");
@@ -3912,69 +3854,58 @@ bool AArch64AsmParser::parseSyspAlias(StringRef Name, SMLoc NameLoc,
return false;
}
-OperandMatchResultTy
-AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = getTok();
- if (Mnemonic == "tsb" && Tok.isNot(AsmToken::Identifier)) {
- TokError("'csync' operand expected");
- return MatchOperand_ParseFail;
- } else if (parseOptionalToken(AsmToken::Hash) || Tok.is(AsmToken::Integer)) {
+ if (Mnemonic == "tsb" && Tok.isNot(AsmToken::Identifier))
+ return TokError("'csync' operand expected");
+ if (parseOptionalToken(AsmToken::Hash) || Tok.is(AsmToken::Integer)) {
// Immediate operand.
const MCExpr *ImmVal;
SMLoc ExprLoc = getLoc();
AsmToken IntTok = Tok;
if (getParser().parseExpression(ImmVal))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- Error(ExprLoc, "immediate value expected for barrier operand");
- return MatchOperand_ParseFail;
- }
+ if (!MCE)
+ return Error(ExprLoc, "immediate value expected for barrier operand");
int64_t Value = MCE->getValue();
if (Mnemonic == "dsb" && Value > 15) {
// This case is a no match here, but it might be matched by the nXS
// variant. Deliberately not unlex the optional '#' as it is not necessary
// to characterize an integer immediate.
Parser.getLexer().UnLex(IntTok);
- return MatchOperand_NoMatch;
- }
- if (Value < 0 || Value > 15) {
- Error(ExprLoc, "barrier operand out of range");
- return MatchOperand_ParseFail;
+ return ParseStatus::NoMatch;
}
+ if (Value < 0 || Value > 15)
+ return Error(ExprLoc, "barrier operand out of range");
auto DB = AArch64DB::lookupDBByEncoding(Value);
Operands.push_back(AArch64Operand::CreateBarrier(Value, DB ? DB->Name : "",
ExprLoc, getContext(),
false /*hasnXSModifier*/));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- if (Tok.isNot(AsmToken::Identifier)) {
- TokError("invalid operand for instruction");
- return MatchOperand_ParseFail;
- }
+ if (Tok.isNot(AsmToken::Identifier))
+ return TokError("invalid operand for instruction");
StringRef Operand = Tok.getString();
auto TSB = AArch64TSB::lookupTSBByName(Operand);
auto DB = AArch64DB::lookupDBByName(Operand);
// The only valid named option for ISB is 'sy'
- if (Mnemonic == "isb" && (!DB || DB->Encoding != AArch64DB::sy)) {
- TokError("'sy' or #imm operand expected");
- return MatchOperand_ParseFail;
+ if (Mnemonic == "isb" && (!DB || DB->Encoding != AArch64DB::sy))
+ return TokError("'sy' or #imm operand expected");
// The only valid named option for TSB is 'csync'
- } else if (Mnemonic == "tsb" && (!TSB || TSB->Encoding != AArch64TSB::csync)) {
- TokError("'csync' operand expected");
- return MatchOperand_ParseFail;
- } else if (!DB && !TSB) {
+ if (Mnemonic == "tsb" && (!TSB || TSB->Encoding != AArch64TSB::csync))
+ return TokError("'csync' operand expected");
+ if (!DB && !TSB) {
if (Mnemonic == "dsb") {
// This case is a no match here, but it might be matched by the nXS
// variant.
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
- TokError("invalid barrier option name");
- return MatchOperand_ParseFail;
+ return TokError("invalid barrier option name");
}
Operands.push_back(AArch64Operand::CreateBarrier(
@@ -3982,72 +3913,63 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
getContext(), false /*hasnXSModifier*/));
Lex(); // Consume the option
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
+ParseStatus
AArch64AsmParser::tryParseBarriernXSOperand(OperandVector &Operands) {
const AsmToken &Tok = getTok();
assert(Mnemonic == "dsb" && "Instruction does not accept nXS operands");
if (Mnemonic != "dsb")
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
if (parseOptionalToken(AsmToken::Hash) || Tok.is(AsmToken::Integer)) {
// Immediate operand.
const MCExpr *ImmVal;
SMLoc ExprLoc = getLoc();
if (getParser().parseExpression(ImmVal))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- Error(ExprLoc, "immediate value expected for barrier operand");
- return MatchOperand_ParseFail;
- }
+ if (!MCE)
+ return Error(ExprLoc, "immediate value expected for barrier operand");
int64_t Value = MCE->getValue();
// v8.7-A DSB in the nXS variant accepts only the following immediate
// values: 16, 20, 24, 28.
- if (Value != 16 && Value != 20 && Value != 24 && Value != 28) {
- Error(ExprLoc, "barrier operand out of range");
- return MatchOperand_ParseFail;
- }
+ if (Value != 16 && Value != 20 && Value != 24 && Value != 28)
+ return Error(ExprLoc, "barrier operand out of range");
auto DB = AArch64DBnXS::lookupDBnXSByImmValue(Value);
Operands.push_back(AArch64Operand::CreateBarrier(DB->Encoding, DB->Name,
ExprLoc, getContext(),
true /*hasnXSModifier*/));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- if (Tok.isNot(AsmToken::Identifier)) {
- TokError("invalid operand for instruction");
- return MatchOperand_ParseFail;
- }
+ if (Tok.isNot(AsmToken::Identifier))
+ return TokError("invalid operand for instruction");
StringRef Operand = Tok.getString();
auto DB = AArch64DBnXS::lookupDBnXSByName(Operand);
- if (!DB) {
- TokError("invalid barrier option name");
- return MatchOperand_ParseFail;
- }
+ if (!DB)
+ return TokError("invalid barrier option name");
Operands.push_back(
AArch64Operand::CreateBarrier(DB->Encoding, Tok.getString(), getLoc(),
getContext(), true /*hasnXSModifier*/));
Lex(); // Consume the option
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (AArch64SVCR::lookupSVCRByName(Tok.getString()))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
int MRSReg, MSRReg;
auto SysReg = AArch64SysReg::lookupSysRegByName(Tok.getString());
@@ -4072,7 +3994,7 @@ AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
PStateImm, getContext()));
Lex(); // Eat identifier
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// tryParseNeonVectorRegister - Parse a vector register operand.
@@ -4084,9 +4006,8 @@ bool AArch64AsmParser::tryParseNeonVectorRegister(OperandVector &Operands) {
// Check for a vector register specifier first.
StringRef Kind;
MCRegister Reg;
- OperandMatchResultTy Res =
- tryParseVectorRegister(Reg, Kind, RegKind::NeonVector);
- if (Res != MatchOperand_Success)
+ ParseStatus Res = tryParseVectorRegister(Reg, Kind, RegKind::NeonVector);
+ if (!Res.isSuccess())
return true;
const auto &KindRes = parseVectorKind(Kind, RegKind::NeonVector);
@@ -4103,45 +4024,42 @@ bool AArch64AsmParser::tryParseNeonVectorRegister(OperandVector &Operands) {
if (!Kind.empty())
Operands.push_back(AArch64Operand::CreateToken(Kind, S, getContext()));
- return tryParseVectorIndex(Operands) == MatchOperand_ParseFail;
+ return tryParseVectorIndex(Operands).isFailure();
}
-OperandMatchResultTy
-AArch64AsmParser::tryParseVectorIndex(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseVectorIndex(OperandVector &Operands) {
SMLoc SIdx = getLoc();
if (parseOptionalToken(AsmToken::LBrac)) {
const MCExpr *ImmVal;
if (getParser().parseExpression(ImmVal))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- TokError("immediate value expected for vector index");
- return MatchOperand_ParseFail;;
- }
+ if (!MCE)
+ return TokError("immediate value expected for vector index");
SMLoc E = getLoc();
if (parseToken(AsmToken::RBrac, "']' expected"))
- return MatchOperand_ParseFail;;
+ return ParseStatus::Failure;
Operands.push_back(AArch64Operand::CreateVectorIndex(MCE->getValue(), SIdx,
E, getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
// tryParseVectorRegister - Try to parse a vector register name with
// optional kind specifier. If it is a register specifier, eat the token
// and return it.
-OperandMatchResultTy
-AArch64AsmParser::tryParseVectorRegister(MCRegister &Reg, StringRef &Kind,
- RegKind MatchKind) {
+ParseStatus AArch64AsmParser::tryParseVectorRegister(MCRegister &Reg,
+ StringRef &Kind,
+ RegKind MatchKind) {
const AsmToken &Tok = getTok();
if (Tok.isNot(AsmToken::Identifier))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
StringRef Name = Tok.getString();
// If there is a kind specifier, it's separated from the register name by
@@ -4153,34 +4071,33 @@ AArch64AsmParser::tryParseVectorRegister(MCRegister &Reg, StringRef &Kind,
if (RegNum) {
if (Next != StringRef::npos) {
Kind = Name.slice(Next, StringRef::npos);
- if (!isValidVectorKind(Kind, MatchKind)) {
- TokError("invalid vector kind qualifier");
- return MatchOperand_ParseFail;
- }
+ if (!isValidVectorKind(Kind, MatchKind))
+ return TokError("invalid vector kind qualifier");
}
Lex(); // Eat the register token.
Reg = RegNum;
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
/// tryParseSVEPredicateVector - Parse a SVE predicate register operand.
-template <RegKind RK> OperandMatchResultTy
+template <RegKind RK>
+ParseStatus
AArch64AsmParser::tryParseSVEPredicateVector(OperandVector &Operands) {
// Check for a SVE predicate register specifier first.
const SMLoc S = getLoc();
StringRef Kind;
MCRegister RegNum;
auto Res = tryParseVectorRegister(RegNum, Kind, RK);
- if (Res != MatchOperand_Success)
+ if (!Res.isSuccess())
return Res;
const auto &KindRes = parseVectorKind(Kind, RK);
if (!KindRes)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
unsigned ElementWidth = KindRes->second;
Operands.push_back(AArch64Operand::CreateVectorReg(
@@ -4189,26 +4106,24 @@ AArch64AsmParser::tryParseSVEPredicateVector(OperandVector &Operands) {
if (getLexer().is(AsmToken::LBrac)) {
if (RK == RegKind::SVEPredicateAsCounter) {
- OperandMatchResultTy ResIndex = tryParseVectorIndex(Operands);
- if (ResIndex == MatchOperand_Success)
- return MatchOperand_Success;
+ ParseStatus ResIndex = tryParseVectorIndex(Operands);
+ if (ResIndex.isSuccess())
+ return ParseStatus::Success;
} else {
// Indexed predicate, there's no comma so try parse the next operand
// immediately.
if (parseOperand(Operands, false, false))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
}
// Not all predicates are followed by a '/m' or '/z'.
if (getTok().isNot(AsmToken::Slash))
- return MatchOperand_Success;
+ return ParseStatus::Success;
// But when they do they shouldn't have an element type suffix.
- if (!Kind.empty()) {
- Error(S, "not expecting size suffix");
- return MatchOperand_ParseFail;
- }
+ if (!Kind.empty())
+ return Error(S, "not expecting size suffix");
// Add a literal slash as operand
Operands.push_back(AArch64Operand::CreateToken("/", getLoc(), getContext()));
@@ -4217,22 +4132,18 @@ AArch64AsmParser::tryParseSVEPredicateVector(OperandVector &Operands) {
// Zeroing or merging?
auto Pred = getTok().getString().lower();
- if (RK == RegKind::SVEPredicateAsCounter && Pred != "z") {
- Error(getLoc(), "expecting 'z' predication");
- return MatchOperand_ParseFail;
- }
+ if (RK == RegKind::SVEPredicateAsCounter && Pred != "z")
+ return Error(getLoc(), "expecting 'z' predication");
- if (RK == RegKind::SVEPredicateVector && Pred != "z" && Pred != "m") {
- Error(getLoc(), "expecting 'm' or 'z' predication");
- return MatchOperand_ParseFail;
- }
+ if (RK == RegKind::SVEPredicateVector && Pred != "z" && Pred != "m")
+ return Error(getLoc(), "expecting 'm' or 'z' predication");
// Add zero/merge token.
const char *ZM = Pred == "z" ? "z" : "m";
Operands.push_back(AArch64Operand::CreateToken(ZM, getLoc(), getContext()));
Lex(); // Eat zero/merge token.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// parseRegister - Parse a register operand.
@@ -4241,11 +4152,11 @@ bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
if (!tryParseNeonVectorRegister(Operands))
return false;
- if (tryParseZTOperand(Operands) == MatchOperand_Success)
+ if (tryParseZTOperand(Operands).isSuccess())
return false;
// Otherwise try for a scalar register.
- if (tryParseGPROperand<false>(Operands) == MatchOperand_Success)
+ if (tryParseGPROperand<false>(Operands).isSuccess())
return false;
return true;
@@ -4329,32 +4240,31 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
return false;
}
-OperandMatchResultTy
-AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
if (getTok().isNot(AsmToken::LCurly))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
- auto ParseMatrixTile = [this](unsigned &Reg, unsigned &ElementWidth) {
+ auto ParseMatrixTile = [this](unsigned &Reg,
+ unsigned &ElementWidth) -> ParseStatus {
StringRef Name = getTok().getString();
size_t DotPosition = Name.find('.');
if (DotPosition == StringRef::npos)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
unsigned RegNum = matchMatrixTileListRegName(Name);
if (!RegNum)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
StringRef Tail = Name.drop_front(DotPosition);
const std::optional<std::pair<int, int>> &KindRes =
parseVectorKind(Tail, RegKind::Matrix);
- if (!KindRes) {
- TokError("Expected the register to be followed by element width suffix");
- return MatchOperand_ParseFail;
- }
+ if (!KindRes)
+ return TokError(
+ "Expected the register to be followed by element width suffix");
ElementWidth = KindRes->second;
Reg = RegNum;
Lex(); // Eat the register.
- return MatchOperand_Success;
+ return ParseStatus::Success;
};
SMLoc S = getLoc();
@@ -4365,7 +4275,7 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
if (parseOptionalToken(AsmToken::RCurly)) {
Operands.push_back(AArch64Operand::CreateMatrixTileList(
/*RegMask=*/0, S, getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// Try parse {za} alias early
@@ -4373,18 +4283,18 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
Lex(); // Eat 'za'
if (parseToken(AsmToken::RCurly, "'}' expected"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Operands.push_back(AArch64Operand::CreateMatrixTileList(
/*RegMask=*/0xFF, S, getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
SMLoc TileLoc = getLoc();
unsigned FirstReg, ElementWidth;
auto ParseRes = ParseMatrixTile(FirstReg, ElementWidth);
- if (ParseRes != MatchOperand_Success) {
+ if (!ParseRes.isSuccess()) {
getLexer().UnLex(LCurly);
return ParseRes;
}
@@ -4403,14 +4313,12 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
TileLoc = getLoc();
unsigned Reg, NextElementWidth;
ParseRes = ParseMatrixTile(Reg, NextElementWidth);
- if (ParseRes != MatchOperand_Success)
+ if (!ParseRes.isSuccess())
return ParseRes;
// Element size must match on all regs in the list.
- if (ElementWidth != NextElementWidth) {
- Error(TileLoc, "mismatched register size suffix");
- return MatchOperand_ParseFail;
- }
+ if (ElementWidth != NextElementWidth)
+ return Error(TileLoc, "mismatched register size suffix");
if (RI->getEncodingValue(Reg) <= (RI->getEncodingValue(PrevReg)))
Warning(TileLoc, "tile list not in ascending order");
@@ -4426,7 +4334,7 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
}
if (parseToken(AsmToken::RCurly, "'}' expected"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
unsigned RegMask = 0;
for (auto Reg : DRegs)
@@ -4435,41 +4343,37 @@ AArch64AsmParser::tryParseMatrixTileList(OperandVector &Operands) {
Operands.push_back(
AArch64Operand::CreateMatrixTileList(RegMask, S, getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
template <RegKind VectorKind>
-OperandMatchResultTy
-AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
- bool ExpectMatch) {
+ParseStatus AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
+ bool ExpectMatch) {
MCAsmParser &Parser = getParser();
if (!getTok().is(AsmToken::LCurly))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
// Wrapper around parse function
auto ParseVector = [this](MCRegister &Reg, StringRef &Kind, SMLoc Loc,
- bool NoMatchIsError) {
+ bool NoMatchIsError) -> ParseStatus {
auto RegTok = getTok();
auto ParseRes = tryParseVectorRegister(Reg, Kind, VectorKind);
- if (ParseRes == MatchOperand_Success) {
+ if (ParseRes.isSuccess()) {
if (parseVectorKind(Kind, VectorKind))
return ParseRes;
llvm_unreachable("Expected a valid vector kind");
}
- if (RegTok.is(AsmToken::Identifier) && ParseRes == MatchOperand_NoMatch &&
+ if (RegTok.is(AsmToken::Identifier) && ParseRes.isNoMatch() &&
RegTok.getString().equals_insensitive("zt0"))
- return MatchOperand_NoMatch;
-
- if (RegTok.isNot(AsmToken::Identifier) ||
- ParseRes == MatchOperand_ParseFail ||
- (ParseRes == MatchOperand_NoMatch && NoMatchIsError &&
- !RegTok.getString().startswith_insensitive("za"))) {
- Error(Loc, "vector register expected");
- return MatchOperand_ParseFail;
- }
+ return ParseStatus::NoMatch;
+
+ if (RegTok.isNot(AsmToken::Identifier) || ParseRes.isFailure() ||
+ (ParseRes.isNoMatch() && NoMatchIsError &&
+ !RegTok.getString().starts_with_insensitive("za")))
+ return Error(Loc, "vector register expected");
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
};
int NumRegs = getNumRegsForRegKind(VectorKind);
@@ -4483,10 +4387,10 @@ AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
// Put back the original left bracket if there was no match, so that
// different types of list-operands can be matched (e.g. SVE, Neon).
- if (ParseRes == MatchOperand_NoMatch)
+ if (ParseRes.isNoMatch())
Parser.getLexer().UnLex(LCurly);
- if (ParseRes != MatchOperand_Success)
+ if (!ParseRes.isSuccess())
return ParseRes;
int64_t PrevReg = FirstReg;
@@ -4499,22 +4403,18 @@ AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
MCRegister Reg;
ParseRes = ParseVector(Reg, NextKind, getLoc(), true);
- if (ParseRes != MatchOperand_Success)
+ if (!ParseRes.isSuccess())
return ParseRes;
// Any Kind suffices must match on all regs in the list.
- if (Kind != NextKind) {
- Error(Loc, "mismatched register size suffix");
- return MatchOperand_ParseFail;
- }
+ if (Kind != NextKind)
+ return Error(Loc, "mismatched register size suffix");
unsigned Space =
(PrevReg < Reg) ? (Reg - PrevReg) : (Reg + NumRegs - PrevReg);
- if (Space == 0 || Space > 3) {
- Error(Loc, "invalid number of vectors");
- return MatchOperand_ParseFail;
- }
+ if (Space == 0 || Space > 3)
+ return Error(Loc, "invalid number of vectors");
Count += Space;
}
@@ -4525,14 +4425,12 @@ AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
StringRef NextKind;
MCRegister Reg;
ParseRes = ParseVector(Reg, NextKind, getLoc(), true);
- if (ParseRes != MatchOperand_Success)
+ if (!ParseRes.isSuccess())
return ParseRes;
// Any Kind suffices must match on all regs in the list.
- if (Kind != NextKind) {
- Error(Loc, "mismatched register size suffix");
- return MatchOperand_ParseFail;
- }
+ if (Kind != NextKind)
+ return Error(Loc, "mismatched register size suffix");
unsigned RegVal = getContext().getRegisterInfo()->getEncodingValue(Reg);
unsigned PrevRegVal =
@@ -4544,10 +4442,8 @@ AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
}
// Register must be incremental (with a wraparound at last register).
- if (Stride == 0 || RegVal != ((PrevRegVal + Stride) % NumRegs)) {
- Error(Loc, "registers must have the same sequential stride");
- return MatchOperand_ParseFail;
- }
+ if (Stride == 0 || RegVal != ((PrevRegVal + Stride) % NumRegs))
+ return Error(Loc, "registers must have the same sequential stride");
PrevReg = Reg;
++Count;
@@ -4555,12 +4451,10 @@ AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
}
if (parseToken(AsmToken::RCurly, "'}' expected"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
- if (Count > 4) {
- Error(S, "invalid number of vectors");
- return MatchOperand_ParseFail;
- }
+ if (Count > 4)
+ return Error(S, "invalid number of vectors");
unsigned NumElements = 0;
unsigned ElementWidth = 0;
@@ -4573,54 +4467,48 @@ AArch64AsmParser::tryParseVectorList(OperandVector &Operands,
FirstReg, Count, Stride, NumElements, ElementWidth, VectorKind, S,
getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// parseNeonVectorList - Parse a vector list operand for AdvSIMD instructions.
bool AArch64AsmParser::parseNeonVectorList(OperandVector &Operands) {
auto ParseRes = tryParseVectorList<RegKind::NeonVector>(Operands, true);
- if (ParseRes != MatchOperand_Success)
+ if (!ParseRes.isSuccess())
return true;
- return tryParseVectorIndex(Operands) == MatchOperand_ParseFail;
+ return tryParseVectorIndex(Operands).isFailure();
}
-OperandMatchResultTy
-AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
SMLoc StartLoc = getLoc();
MCRegister RegNum;
- OperandMatchResultTy Res = tryParseScalarRegister(RegNum);
- if (Res != MatchOperand_Success)
+ ParseStatus Res = tryParseScalarRegister(RegNum);
+ if (!Res.isSuccess())
return Res;
if (!parseOptionalToken(AsmToken::Comma)) {
Operands.push_back(AArch64Operand::CreateReg(
RegNum, RegKind::Scalar, StartLoc, getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
parseOptionalToken(AsmToken::Hash);
- if (getTok().isNot(AsmToken::Integer)) {
- Error(getLoc(), "index must be absent or #0");
- return MatchOperand_ParseFail;
- }
+ if (getTok().isNot(AsmToken::Integer))
+ return Error(getLoc(), "index must be absent or #0");
const MCExpr *ImmVal;
if (getParser().parseExpression(ImmVal) || !isa<MCConstantExpr>(ImmVal) ||
- cast<MCConstantExpr>(ImmVal)->getValue() != 0) {
- Error(getLoc(), "index must be absent or #0");
- return MatchOperand_ParseFail;
- }
+ cast<MCConstantExpr>(ImmVal)->getValue() != 0)
+ return Error(getLoc(), "index must be absent or #0");
Operands.push_back(AArch64Operand::CreateReg(
RegNum, RegKind::Scalar, StartLoc, getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) {
SMLoc StartLoc = getLoc();
const AsmToken &Tok = getTok();
std::string Name = Tok.getString().lower();
@@ -4628,7 +4516,7 @@ AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) {
unsigned RegNum = matchRegisterNameAlias(Name, RegKind::LookupTable);
if (RegNum == 0)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Operands.push_back(AArch64Operand::CreateReg(
RegNum, RegKind::LookupTable, StartLoc, getLoc(), getContext()));
@@ -4638,38 +4526,35 @@ AArch64AsmParser::tryParseZTOperand(OperandVector &Operands) {
if (parseOptionalToken(AsmToken::LBrac)) {
const MCExpr *ImmVal;
if (getParser().parseExpression(ImmVal))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(ImmVal);
- if (!MCE) {
- TokError("immediate value expected for vector index");
- return MatchOperand_ParseFail;
- }
+ if (!MCE)
+ return TokError("immediate value expected for vector index");
if (parseToken(AsmToken::RBrac, "']' expected"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Operands.push_back(AArch64Operand::CreateImm(
MCConstantExpr::create(MCE->getValue(), getContext()), StartLoc,
getLoc(), getContext()));
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
template <bool ParseShiftExtend, RegConstraintEqualityTy EqTy>
-OperandMatchResultTy
-AArch64AsmParser::tryParseGPROperand(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseGPROperand(OperandVector &Operands) {
SMLoc StartLoc = getLoc();
MCRegister RegNum;
- OperandMatchResultTy Res = tryParseScalarRegister(RegNum);
- if (Res != MatchOperand_Success)
+ ParseStatus Res = tryParseScalarRegister(RegNum);
+ if (!Res.isSuccess())
return Res;
// No shift/extend is the default.
if (!ParseShiftExtend || getTok().isNot(AsmToken::Comma)) {
Operands.push_back(AArch64Operand::CreateReg(
RegNum, RegKind::Scalar, StartLoc, getLoc(), getContext(), EqTy));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// Eat the comma
@@ -4678,7 +4563,7 @@ AArch64AsmParser::tryParseGPROperand(OperandVector &Operands) {
// Match the shift
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> ExtOpnd;
Res = tryParseOptionalShiftExtend(ExtOpnd);
- if (Res != MatchOperand_Success)
+ if (!Res.isSuccess())
return Res;
auto Ext = static_cast<AArch64Operand*>(ExtOpnd.back().get());
@@ -4687,7 +4572,7 @@ AArch64AsmParser::tryParseGPROperand(OperandVector &Operands) {
Ext->getShiftExtendType(), Ext->getShiftExtendAmount(),
Ext->hasShiftExtendAmount()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool AArch64AsmParser::parseOptionalMulOperand(OperandVector &Operands) {
@@ -4725,7 +4610,7 @@ bool AArch64AsmParser::parseOptionalMulOperand(OperandVector &Operands) {
Operands.push_back(AArch64Operand::CreateImm(
MCConstantExpr::create(MCE->getValue(), getContext()), S, getLoc(),
getContext()));
- return MatchOperand_Success;
+ return false;
}
}
@@ -4775,17 +4660,17 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
bool invertCondCode) {
MCAsmParser &Parser = getParser();
- OperandMatchResultTy ResTy =
- MatchOperandParserImpl(Operands, Mnemonic, /*ParseForAllFeatures=*/ true);
+ ParseStatus ResTy =
+ MatchOperandParserImpl(Operands, Mnemonic, /*ParseForAllFeatures=*/true);
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
- if (ResTy == MatchOperand_Success)
+ if (ResTy.isSuccess())
return false;
// If there wasn't a custom match, try the generic matcher below. Otherwise,
// there was a match, but an error occurred, in which case, just return that
// the operand parsing failed.
- if (ResTy == MatchOperand_ParseFail)
+ if (ResTy.isFailure())
return true;
// Nothing custom, so do general case parsing.
@@ -4843,20 +4728,16 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
if (!parseOptionalMulOperand(Operands))
return false;
- // If this is an "smstart" or "smstop" instruction, parse its special
- // keyword operand as an identifier.
- if (Mnemonic == "smstart" || Mnemonic == "smstop")
- return parseKeywordOperand(Operands);
-
// This could be an optional "shift" or "extend" operand.
- OperandMatchResultTy GotShift = tryParseOptionalShiftExtend(Operands);
+ ParseStatus GotShift = tryParseOptionalShiftExtend(Operands);
// We can only continue if no tokens were eaten.
- if (GotShift != MatchOperand_NoMatch)
- return GotShift;
+ if (!GotShift.isNoMatch())
+ return GotShift.isFailure();
// If this is a two-word mnemonic, parse its special keyword
// operand as an identifier.
- if (Mnemonic == "brb")
+ if (Mnemonic == "brb" || Mnemonic == "smstart" || Mnemonic == "smstop" ||
+ Mnemonic == "gcsb")
return parseKeywordOperand(Operands);
// This was not a register so parse other operands that start with an
@@ -4938,7 +4819,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
if (isa<MCConstantExpr>(SubExprVal)) {
uint64_t Imm = (cast<MCConstantExpr>(SubExprVal))->getValue();
uint32_t ShiftAmt = 0, MaxShiftAmt = IsXReg ? 48 : 16;
- while(Imm > 0xFFFF && countTrailingZeros(Imm) >= 16) {
+ while (Imm > 0xFFFF && llvm::countr_zero(Imm) >= 16) {
ShiftAmt += 16;
Imm >>= 16;
}
@@ -5309,6 +5190,14 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst, SMLoc &IDLoc,
"is also a destination");
[[fallthrough]];
}
+ case AArch64::LDR_ZA:
+ case AArch64::STR_ZA: {
+ if (Inst.getOperand(2).isImm() && Inst.getOperand(4).isImm() &&
+ Inst.getOperand(2).getImm() != Inst.getOperand(4).getImm())
+ return Error(Loc[1],
+ "unpredictable instruction, immediate and offset mismatch.");
+ break;
+ }
case AArch64::LDPDi:
case AArch64::LDPQi:
case AArch64::LDPSi:
@@ -6355,7 +6244,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// instruction for FP registers correctly in some rare circumstances. Convert
// it to a safe instruction and warn (because silently changing someone's
// assembly is rude).
- if (getSTI().getFeatureBits()[AArch64::FeatureZCZeroingFPWorkaround] &&
+ if (getSTI().hasFeature(AArch64::FeatureZCZeroingFPWorkaround) &&
NumOperands == 4 && Tok == "movi") {
AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]);
AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]);
@@ -6880,8 +6769,8 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
std::tie(Arch, ExtensionString) =
getParser().parseStringToEndOfStatement().trim().split('+');
- const AArch64::ArchInfo &ArchInfo = AArch64::parseArch(Arch);
- if (ArchInfo == AArch64::INVALID)
+ std::optional<AArch64::ArchInfo> ArchInfo = AArch64::parseArch(Arch);
+ if (!ArchInfo)
return Error(ArchLoc, "unknown arch name");
if (parseToken(AsmToken::EndOfStatement))
@@ -6889,9 +6778,8 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
// Get the architecture and extension features.
std::vector<StringRef> AArch64Features;
- AArch64Features.push_back(ArchInfo.ArchFeature);
- AArch64::getExtensionFeatures(
- AArch64::getDefaultExtensions("generic", ArchInfo), AArch64Features);
+ AArch64Features.push_back(ArchInfo->ArchFeature);
+ AArch64::getExtensionFeatures(ArchInfo->DefaultExts, AArch64Features);
MCSubtargetInfo &STI = copySTI();
std::vector<std::string> ArchFeatures(AArch64Features.begin(), AArch64Features.end());
@@ -6902,13 +6790,14 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
if (!ExtensionString.empty())
ExtensionString.split(RequestedExtensions, '+');
- ExpandCryptoAEK(ArchInfo, RequestedExtensions);
+ ExpandCryptoAEK(*ArchInfo, RequestedExtensions);
FeatureBitset Features = STI.getFeatureBits();
+ setAvailableFeatures(ComputeAvailableFeatures(Features));
for (auto Name : RequestedExtensions) {
bool EnableFeature = true;
- if (Name.startswith_insensitive("no")) {
+ if (Name.starts_with_insensitive("no")) {
EnableFeature = false;
Name = Name.substr(2);
}
@@ -6942,7 +6831,7 @@ bool AArch64AsmParser::parseDirectiveArchExtension(SMLoc L) {
return true;
bool EnableFeature = true;
- if (Name.startswith_insensitive("no")) {
+ if (Name.starts_with_insensitive("no")) {
EnableFeature = false;
Name = Name.substr(2);
}
@@ -6987,26 +6876,24 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) {
if (!ExtensionString.empty())
ExtensionString.split(RequestedExtensions, '+');
- // FIXME This is using tablegen data, but should be moved to ARMTargetParser
- // once that is tablegen'ed
- if (!getSTI().isCPUStringValid(CPU)) {
+ const std::optional<llvm::AArch64::ArchInfo> CpuArch = llvm::AArch64::getArchForCpu(CPU);
+ if (!CpuArch) {
Error(CurLoc, "unknown CPU name");
return false;
}
+ ExpandCryptoAEK(*CpuArch, RequestedExtensions);
MCSubtargetInfo &STI = copySTI();
STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, "");
CurLoc = incrementLoc(CurLoc, CPU.size());
- ExpandCryptoAEK(llvm::AArch64::getArchForCpu(CPU), RequestedExtensions);
-
for (auto Name : RequestedExtensions) {
// Advance source location past '+'.
CurLoc = incrementLoc(CurLoc, 1);
bool EnableFeature = true;
- if (Name.startswith_insensitive("no")) {
+ if (Name.starts_with_insensitive("no")) {
EnableFeature = false;
Name = Name.substr(2);
}
@@ -7144,48 +7031,48 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
SMLoc SRegLoc = getLoc();
RegKind RegisterKind = RegKind::Scalar;
MCRegister RegNum;
- OperandMatchResultTy ParseRes = tryParseScalarRegister(RegNum);
+ ParseStatus ParseRes = tryParseScalarRegister(RegNum);
- if (ParseRes != MatchOperand_Success) {
+ if (!ParseRes.isSuccess()) {
StringRef Kind;
RegisterKind = RegKind::NeonVector;
ParseRes = tryParseVectorRegister(RegNum, Kind, RegKind::NeonVector);
- if (ParseRes == MatchOperand_ParseFail)
+ if (ParseRes.isFailure())
return true;
- if (ParseRes == MatchOperand_Success && !Kind.empty())
+ if (ParseRes.isSuccess() && !Kind.empty())
return Error(SRegLoc, "vector register without type specifier expected");
}
- if (ParseRes != MatchOperand_Success) {
+ if (!ParseRes.isSuccess()) {
StringRef Kind;
RegisterKind = RegKind::SVEDataVector;
ParseRes =
tryParseVectorRegister(RegNum, Kind, RegKind::SVEDataVector);
- if (ParseRes == MatchOperand_ParseFail)
+ if (ParseRes.isFailure())
return true;
- if (ParseRes == MatchOperand_Success && !Kind.empty())
+ if (ParseRes.isSuccess() && !Kind.empty())
return Error(SRegLoc,
"sve vector register without type specifier expected");
}
- if (ParseRes != MatchOperand_Success) {
+ if (!ParseRes.isSuccess()) {
StringRef Kind;
RegisterKind = RegKind::SVEPredicateVector;
ParseRes = tryParseVectorRegister(RegNum, Kind, RegKind::SVEPredicateVector);
- if (ParseRes == MatchOperand_ParseFail)
+ if (ParseRes.isFailure())
return true;
- if (ParseRes == MatchOperand_Success && !Kind.empty())
+ if (ParseRes.isSuccess() && !Kind.empty())
return Error(SRegLoc,
"sve predicate register without type specifier expected");
}
- if (ParseRes != MatchOperand_Success)
+ if (!ParseRes.isSuccess())
return Error(SRegLoc, "register name or alias expected");
// Shouldn't be anything else.
@@ -7626,61 +7513,21 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64AsmParser() {
unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
unsigned Kind) {
AArch64Operand &Op = static_cast<AArch64Operand &>(AsmOp);
- // If the kind is a token for a literal immediate, check if our asm
- // operand matches. This is for InstAliases which have a fixed-value
- // immediate in the syntax.
- int64_t ExpectedVal;
+
+ auto MatchesOpImmediate = [&](int64_t ExpectedVal) -> MatchResultTy {
+ if (!Op.isImm())
+ return Match_InvalidOperand;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm());
+ if (!CE)
+ return Match_InvalidOperand;
+ if (CE->getValue() == ExpectedVal)
+ return Match_Success;
+ return Match_InvalidOperand;
+ };
+
switch (Kind) {
default:
return Match_InvalidOperand;
- case MCK__HASH_0:
- ExpectedVal = 0;
- break;
- case MCK__HASH_1:
- ExpectedVal = 1;
- break;
- case MCK__HASH_12:
- ExpectedVal = 12;
- break;
- case MCK__HASH_16:
- ExpectedVal = 16;
- break;
- case MCK__HASH_2:
- ExpectedVal = 2;
- break;
- case MCK__HASH_24:
- ExpectedVal = 24;
- break;
- case MCK__HASH_3:
- ExpectedVal = 3;
- break;
- case MCK__HASH_32:
- ExpectedVal = 32;
- break;
- case MCK__HASH_4:
- ExpectedVal = 4;
- break;
- case MCK__HASH_48:
- ExpectedVal = 48;
- break;
- case MCK__HASH_6:
- ExpectedVal = 6;
- break;
- case MCK__HASH_64:
- ExpectedVal = 64;
- break;
- case MCK__HASH_8:
- ExpectedVal = 8;
- break;
- case MCK__HASH__MINUS_4:
- ExpectedVal = -4;
- break;
- case MCK__HASH__MINUS_8:
- ExpectedVal = -8;
- break;
- case MCK__HASH__MINUS_16:
- ExpectedVal = -16;
- break;
case MCK_MPR:
// If the Kind is a token for the MPR register class which has the "za"
// register (SME accumulator array), check if the asm is a literal "za"
@@ -7689,34 +7536,61 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
if (Op.isTokenEqual("za"))
return Match_Success;
return Match_InvalidOperand;
+
+ // If the kind is a token for a literal immediate, check if our asm operand
+ // matches. This is for InstAliases which have a fixed-value immediate in
+ // the asm string, such as hints which are parsed into a specific
+ // instruction definition.
+#define MATCH_HASH(N) \
+ case MCK__HASH_##N: \
+ return MatchesOpImmediate(N);
+ MATCH_HASH(0)
+ MATCH_HASH(1)
+ MATCH_HASH(2)
+ MATCH_HASH(3)
+ MATCH_HASH(4)
+ MATCH_HASH(6)
+ MATCH_HASH(7)
+ MATCH_HASH(8)
+ MATCH_HASH(10)
+ MATCH_HASH(12)
+ MATCH_HASH(14)
+ MATCH_HASH(16)
+ MATCH_HASH(24)
+ MATCH_HASH(25)
+ MATCH_HASH(26)
+ MATCH_HASH(27)
+ MATCH_HASH(28)
+ MATCH_HASH(29)
+ MATCH_HASH(30)
+ MATCH_HASH(31)
+ MATCH_HASH(32)
+ MATCH_HASH(40)
+ MATCH_HASH(48)
+ MATCH_HASH(64)
+#undef MATCH_HASH
+#define MATCH_HASH_MINUS(N) \
+ case MCK__HASH__MINUS_##N: \
+ return MatchesOpImmediate(-N);
+ MATCH_HASH_MINUS(4)
+ MATCH_HASH_MINUS(8)
+ MATCH_HASH_MINUS(16)
+#undef MATCH_HASH_MINUS
}
- if (!Op.isImm())
- return Match_InvalidOperand;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm());
- if (!CE)
- return Match_InvalidOperand;
- if (CE->getValue() == ExpectedVal)
- return Match_Success;
- return Match_InvalidOperand;
}
-OperandMatchResultTy
-AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
SMLoc S = getLoc();
- if (getTok().isNot(AsmToken::Identifier)) {
- Error(S, "expected register");
- return MatchOperand_ParseFail;
- }
+ if (getTok().isNot(AsmToken::Identifier))
+ return Error(S, "expected register");
MCRegister FirstReg;
- OperandMatchResultTy Res = tryParseScalarRegister(FirstReg);
- if (Res != MatchOperand_Success) {
- Error(S, "expected first even register of a "
- "consecutive same-size even/odd register pair");
- return MatchOperand_ParseFail;
- }
+ ParseStatus Res = tryParseScalarRegister(FirstReg);
+ if (!Res.isSuccess())
+ return Error(S, "expected first even register of a consecutive same-size "
+ "even/odd register pair");
const MCRegisterClass &WRegClass =
AArch64MCRegisterClasses[AArch64::GPR32RegClassID];
@@ -7725,44 +7599,34 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
bool isXReg = XRegClass.contains(FirstReg),
isWReg = WRegClass.contains(FirstReg);
- if (!isXReg && !isWReg) {
- Error(S, "expected first even register of a "
- "consecutive same-size even/odd register pair");
- return MatchOperand_ParseFail;
- }
+ if (!isXReg && !isWReg)
+ return Error(S, "expected first even register of a consecutive same-size "
+ "even/odd register pair");
const MCRegisterInfo *RI = getContext().getRegisterInfo();
unsigned FirstEncoding = RI->getEncodingValue(FirstReg);
- if (FirstEncoding & 0x1) {
- Error(S, "expected first even register of a "
- "consecutive same-size even/odd register pair");
- return MatchOperand_ParseFail;
- }
+ if (FirstEncoding & 0x1)
+ return Error(S, "expected first even register of a consecutive same-size "
+ "even/odd register pair");
- if (getTok().isNot(AsmToken::Comma)) {
- Error(getLoc(), "expected comma");
- return MatchOperand_ParseFail;
- }
+ if (getTok().isNot(AsmToken::Comma))
+ return Error(getLoc(), "expected comma");
// Eat the comma
Lex();
SMLoc E = getLoc();
MCRegister SecondReg;
Res = tryParseScalarRegister(SecondReg);
- if (Res != MatchOperand_Success) {
- Error(E, "expected second odd register of a "
- "consecutive same-size even/odd register pair");
- return MatchOperand_ParseFail;
- }
+ if (!Res.isSuccess())
+ return Error(E, "expected second odd register of a consecutive same-size "
+ "even/odd register pair");
if (RI->getEncodingValue(SecondReg) != FirstEncoding + 1 ||
(isXReg && !XRegClass.contains(SecondReg)) ||
- (isWReg && !WRegClass.contains(SecondReg))) {
- Error(E, "expected second odd register of a "
- "consecutive same-size even/odd register pair");
- return MatchOperand_ParseFail;
- }
+ (isWReg && !WRegClass.contains(SecondReg)))
+ return Error(E, "expected second odd register of a consecutive same-size "
+ "even/odd register pair");
unsigned Pair = 0;
if (isXReg) {
@@ -7776,29 +7640,28 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
Operands.push_back(AArch64Operand::CreateReg(Pair, RegKind::Scalar, S,
getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
template <bool ParseShiftExtend, bool ParseSuffix>
-OperandMatchResultTy
-AArch64AsmParser::tryParseSVEDataVector(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseSVEDataVector(OperandVector &Operands) {
const SMLoc S = getLoc();
// Check for a SVE vector register specifier first.
MCRegister RegNum;
StringRef Kind;
- OperandMatchResultTy Res =
+ ParseStatus Res =
tryParseVectorRegister(RegNum, Kind, RegKind::SVEDataVector);
- if (Res != MatchOperand_Success)
+ if (!Res.isSuccess())
return Res;
if (ParseSuffix && Kind.empty())
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
const auto &KindRes = parseVectorKind(Kind, RegKind::SVEDataVector);
if (!KindRes)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
unsigned ElementWidth = KindRes->second;
@@ -7807,10 +7670,10 @@ AArch64AsmParser::tryParseSVEDataVector(OperandVector &Operands) {
Operands.push_back(AArch64Operand::CreateVectorReg(
RegNum, RegKind::SVEDataVector, ElementWidth, S, S, getContext()));
- OperandMatchResultTy Res = tryParseVectorIndex(Operands);
- if (Res == MatchOperand_ParseFail)
- return MatchOperand_ParseFail;
- return MatchOperand_Success;
+ ParseStatus Res = tryParseVectorIndex(Operands);
+ if (Res.isFailure())
+ return ParseStatus::Failure;
+ return ParseStatus::Success;
}
// Eat the comma
@@ -7819,7 +7682,7 @@ AArch64AsmParser::tryParseSVEDataVector(OperandVector &Operands) {
// Match the shift
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> ExtOpnd;
Res = tryParseOptionalShiftExtend(ExtOpnd);
- if (Res != MatchOperand_Success)
+ if (!Res.isSuccess())
return Res;
auto Ext = static_cast<AArch64Operand *>(ExtOpnd.back().get());
@@ -7828,11 +7691,10 @@ AArch64AsmParser::tryParseSVEDataVector(OperandVector &Operands) {
getContext(), Ext->getShiftExtendType(), Ext->getShiftExtendAmount(),
Ext->hasShiftExtendAmount()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc SS = getLoc();
@@ -7840,7 +7702,7 @@ AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
bool IsHash = TokE.is(AsmToken::Hash);
if (!IsHash && TokE.isNot(AsmToken::Identifier))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
int64_t Pattern;
if (IsHash) {
@@ -7850,18 +7712,18 @@ AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
const MCExpr *ImmVal;
SS = getLoc();
if (Parser.parseExpression(ImmVal))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
auto *MCE = dyn_cast<MCConstantExpr>(ImmVal);
if (!MCE)
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Pattern = MCE->getValue();
} else {
// Parse the pattern
auto Pat = AArch64SVEPredPattern::lookupSVEPREDPATByName(TokE.getString());
if (!Pat)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Lex();
Pattern = Pat->Encoding;
@@ -7872,10 +7734,10 @@ AArch64AsmParser::tryParseSVEPattern(OperandVector &Operands) {
AArch64Operand::CreateImm(MCConstantExpr::create(Pattern, getContext()),
SS, getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
+ParseStatus
AArch64AsmParser::tryParseSVEVecLenSpecifier(OperandVector &Operands) {
int64_t Pattern;
SMLoc SS = getLoc();
@@ -7884,7 +7746,7 @@ AArch64AsmParser::tryParseSVEVecLenSpecifier(OperandVector &Operands) {
auto Pat = AArch64SVEVecLenSpecifier::lookupSVEVECLENSPECIFIERByName(
TokE.getString());
if (!Pat)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Lex();
Pattern = Pat->Encoding;
@@ -7894,62 +7756,59 @@ AArch64AsmParser::tryParseSVEVecLenSpecifier(OperandVector &Operands) {
AArch64Operand::CreateImm(MCConstantExpr::create(Pattern, getContext()),
SS, getLoc(), getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AArch64AsmParser::tryParseGPR64x8(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseGPR64x8(OperandVector &Operands) {
SMLoc SS = getLoc();
MCRegister XReg;
- if (tryParseScalarRegister(XReg) != MatchOperand_Success)
- return MatchOperand_NoMatch;
+ if (!tryParseScalarRegister(XReg).isSuccess())
+ return ParseStatus::NoMatch;
MCContext &ctx = getContext();
const MCRegisterInfo *RI = ctx.getRegisterInfo();
int X8Reg = RI->getMatchingSuperReg(
XReg, AArch64::x8sub_0,
&AArch64MCRegisterClasses[AArch64::GPR64x8ClassRegClassID]);
- if (!X8Reg) {
- Error(SS, "expected an even-numbered x-register in the range [x0,x22]");
- return MatchOperand_ParseFail;
- }
+ if (!X8Reg)
+ return Error(SS,
+ "expected an even-numbered x-register in the range [x0,x22]");
Operands.push_back(
AArch64Operand::CreateReg(X8Reg, RegKind::Scalar, SS, getLoc(), ctx));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AArch64AsmParser::tryParseImmRange(OperandVector &Operands) {
+ParseStatus AArch64AsmParser::tryParseImmRange(OperandVector &Operands) {
SMLoc S = getLoc();
if (getTok().isNot(AsmToken::Integer))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (getLexer().peekTok().isNot(AsmToken::Colon))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
const MCExpr *ImmF;
if (getParser().parseExpression(ImmF))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (getTok().isNot(AsmToken::Colon))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Lex(); // Eat ':'
if (getTok().isNot(AsmToken::Integer))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
SMLoc E = getTok().getLoc();
const MCExpr *ImmL;
if (getParser().parseExpression(ImmL))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
unsigned ImmFVal = dyn_cast<MCConstantExpr>(ImmF)->getValue();
unsigned ImmLVal = dyn_cast<MCConstantExpr>(ImmL)->getValue();
Operands.push_back(
AArch64Operand::CreateImmRange(ImmFVal, ImmLVal, S, E, getContext()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index 6b5d665e627a..e50ac5c92d50 100644
--- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -701,17 +701,16 @@ DecodeMatrixTileListRegisterClass(MCInst &Inst, unsigned RegMask,
return Success;
}
-static const SmallVector<SmallVector<unsigned, 16>, 5>
- MatrixZATileDecoderTable = {
- {AArch64::ZAB0},
- {AArch64::ZAH0, AArch64::ZAH1},
- {AArch64::ZAS0, AArch64::ZAS1, AArch64::ZAS2, AArch64::ZAS3},
- {AArch64::ZAD0, AArch64::ZAD1, AArch64::ZAD2, AArch64::ZAD3,
- AArch64::ZAD4, AArch64::ZAD5, AArch64::ZAD6, AArch64::ZAD7},
- {AArch64::ZAQ0, AArch64::ZAQ1, AArch64::ZAQ2, AArch64::ZAQ3,
- AArch64::ZAQ4, AArch64::ZAQ5, AArch64::ZAQ6, AArch64::ZAQ7,
- AArch64::ZAQ8, AArch64::ZAQ9, AArch64::ZAQ10, AArch64::ZAQ11,
- AArch64::ZAQ12, AArch64::ZAQ13, AArch64::ZAQ14, AArch64::ZAQ15}};
+static const MCPhysReg MatrixZATileDecoderTable[5][16] = {
+ {AArch64::ZAB0},
+ {AArch64::ZAH0, AArch64::ZAH1},
+ {AArch64::ZAS0, AArch64::ZAS1, AArch64::ZAS2, AArch64::ZAS3},
+ {AArch64::ZAD0, AArch64::ZAD1, AArch64::ZAD2, AArch64::ZAD3, AArch64::ZAD4,
+ AArch64::ZAD5, AArch64::ZAD6, AArch64::ZAD7},
+ {AArch64::ZAQ0, AArch64::ZAQ1, AArch64::ZAQ2, AArch64::ZAQ3, AArch64::ZAQ4,
+ AArch64::ZAQ5, AArch64::ZAQ6, AArch64::ZAQ7, AArch64::ZAQ8, AArch64::ZAQ9,
+ AArch64::ZAQ10, AArch64::ZAQ11, AArch64::ZAQ12, AArch64::ZAQ13,
+ AArch64::ZAQ14, AArch64::ZAQ15}};
template <unsigned NumBitsForTile>
static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index e20b0f4a9e3f..a66d2ddee652 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -15,6 +15,7 @@
#include "AArch64CallLowering.h"
#include "AArch64ISelLowering.h"
#include "AArch64MachineFunctionInfo.h"
+#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
@@ -24,7 +25,7 @@
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -32,6 +33,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -40,7 +42,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/Support/MachineValueType.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -123,7 +124,7 @@ struct AArch64OutgoingValueAssigner
} else
Res = AssignFnVarArg(ValNo, ValVT, LocVT, LocInfo, Flags, State);
- StackOffset = State.getNextStackOffset();
+ StackSize = State.getStackSize();
return Res;
}
};
@@ -405,29 +406,22 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
if (NewVT.isVector()) {
if (OldLLT.isVector()) {
if (NewLLT.getNumElements() > OldLLT.getNumElements()) {
- // We don't handle VA types which are not exactly twice the
- // size, but can easily be done in future.
- if (NewLLT.getNumElements() != OldLLT.getNumElements() * 2) {
- LLVM_DEBUG(dbgs() << "Outgoing vector ret has too many elts");
- return false;
- }
- auto Undef = MIRBuilder.buildUndef({OldLLT});
+
CurVReg =
- MIRBuilder.buildMergeLikeInstr({NewLLT}, {CurVReg, Undef})
+ MIRBuilder.buildPadVectorWithUndefElements(NewLLT, CurVReg)
.getReg(0);
} else {
// Just do a vector extend.
CurVReg = MIRBuilder.buildInstr(ExtendOp, {NewLLT}, {CurVReg})
.getReg(0);
}
- } else if (NewLLT.getNumElements() == 2) {
- // We need to pad a <1 x S> type to <2 x S>. Since we don't have
- // <1 x S> vector types in GISel we use a build_vector instead
- // of a vector merge/concat.
- auto Undef = MIRBuilder.buildUndef({OldLLT});
+ } else if (NewLLT.getNumElements() >= 2 &&
+ NewLLT.getNumElements() <= 8) {
+ // We need to pad a <1 x S> type to <2/4/8 x S>. Since we don't
+ // have <1 x S> vector types in GISel we use a build_vector
+ // instead of a vector merge/concat.
CurVReg =
- MIRBuilder
- .buildBuildVector({NewLLT}, {CurVReg, Undef.getReg(0)})
+ MIRBuilder.buildPadVectorWithUndefElements(NewLLT, CurVReg)
.getReg(0);
} else {
LLVM_DEBUG(dbgs() << "Could not handle ret ty\n");
@@ -526,10 +520,9 @@ static void handleMustTailForwardedRegisters(MachineIRBuilder &MIRBuilder,
bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
auto &F = MF.getFunction();
- if (isa<ScalableVectorType>(F.getReturnType()))
- return true;
- if (llvm::any_of(F.args(), [](const Argument &A) {
- return isa<ScalableVectorType>(A.getType());
+ if (F.getReturnType()->isScalableTy() ||
+ llvm::any_of(F.args(), [](const Argument &A) {
+ return A.getType()->isScalableTy();
}))
return true;
const auto &ST = MF.getSubtarget<AArch64Subtarget>();
@@ -546,6 +539,88 @@ bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
return false;
}
+void AArch64CallLowering::saveVarArgRegisters(
+ MachineIRBuilder &MIRBuilder, CallLowering::IncomingValueHandler &Handler,
+ CCState &CCInfo) const {
+ auto GPRArgRegs = AArch64::getGPRArgRegs();
+ auto FPRArgRegs = AArch64::getFPRArgRegs();
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
+ auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ bool IsWin64CC =
+ Subtarget.isCallingConvWin64(CCInfo.getCallingConv());
+ const LLT p0 = LLT::pointer(0, 64);
+ const LLT s64 = LLT::scalar(64);
+
+ unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
+ unsigned NumVariadicGPRArgRegs = GPRArgRegs.size() - FirstVariadicGPR + 1;
+
+ unsigned GPRSaveSize = 8 * (GPRArgRegs.size() - FirstVariadicGPR);
+ int GPRIdx = 0;
+ if (GPRSaveSize != 0) {
+ if (IsWin64CC) {
+ GPRIdx = MFI.CreateFixedObject(GPRSaveSize,
+ -static_cast<int>(GPRSaveSize), false);
+ } else
+ GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
+
+ auto FIN = MIRBuilder.buildFrameIndex(p0, GPRIdx);
+ auto Offset =
+ MIRBuilder.buildConstant(MRI.createGenericVirtualRegister(s64), 8);
+
+ for (unsigned i = FirstVariadicGPR; i < GPRArgRegs.size(); ++i) {
+ Register Val = MRI.createGenericVirtualRegister(s64);
+ Handler.assignValueToReg(
+ Val, GPRArgRegs[i],
+ CCValAssign::getReg(i + MF.getFunction().getNumOperands(), MVT::i64,
+ GPRArgRegs[i], MVT::i64, CCValAssign::Full));
+ auto MPO = IsWin64CC ? MachinePointerInfo::getFixedStack(
+ MF, GPRIdx, (i - FirstVariadicGPR) * 8)
+ : MachinePointerInfo::getStack(MF, i * 8);
+ MIRBuilder.buildStore(Val, FIN, MPO, inferAlignFromPtrInfo(MF, MPO));
+
+ FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0),
+ FIN.getReg(0), Offset);
+ }
+ }
+ FuncInfo->setVarArgsGPRIndex(GPRIdx);
+ FuncInfo->setVarArgsGPRSize(GPRSaveSize);
+
+ if (Subtarget.hasFPARMv8() && !IsWin64CC) {
+ unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
+
+ unsigned FPRSaveSize = 16 * (FPRArgRegs.size() - FirstVariadicFPR);
+ int FPRIdx = 0;
+ if (FPRSaveSize != 0) {
+ FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
+
+ auto FIN = MIRBuilder.buildFrameIndex(p0, FPRIdx);
+ auto Offset =
+ MIRBuilder.buildConstant(MRI.createGenericVirtualRegister(s64), 16);
+
+ for (unsigned i = FirstVariadicFPR; i < FPRArgRegs.size(); ++i) {
+ Register Val = MRI.createGenericVirtualRegister(LLT::scalar(128));
+ Handler.assignValueToReg(
+ Val, FPRArgRegs[i],
+ CCValAssign::getReg(
+ i + MF.getFunction().getNumOperands() + NumVariadicGPRArgRegs,
+ MVT::f128, FPRArgRegs[i], MVT::f128, CCValAssign::Full));
+
+ auto MPO = MachinePointerInfo::getStack(MF, i * 16);
+ MIRBuilder.buildStore(Val, FIN, MPO, inferAlignFromPtrInfo(MF, MPO));
+
+ FIN = MIRBuilder.buildPtrAdd(MRI.createGenericVirtualRegister(p0),
+ FIN.getReg(0), Offset);
+ }
+ }
+ FuncInfo->setVarArgsFPRIndex(FPRIdx);
+ FuncInfo->setVarArgsFPRSize(FPRSaveSize);
+ }
+}
+
bool AArch64CallLowering::lowerFormalArguments(
MachineIRBuilder &MIRBuilder, const Function &F,
ArrayRef<ArrayRef<Register>> VRegs, FunctionLoweringInfo &FLI) const {
@@ -553,6 +628,9 @@ bool AArch64CallLowering::lowerFormalArguments(
MachineBasicBlock &MBB = MIRBuilder.getMBB();
MachineRegisterInfo &MRI = MF.getRegInfo();
auto &DL = F.getParent()->getDataLayout();
+ auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+ // TODO: Support Arm64EC
+ bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv()) && !Subtarget.isWindowsArm64EC();
SmallVector<ArgInfo, 8> SplitArgs;
SmallVector<std::pair<Register, Register>> BoolArgs;
@@ -598,13 +676,14 @@ bool AArch64CallLowering::lowerFormalArguments(
MIRBuilder.setInstr(*MBB.begin());
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
- CCAssignFn *AssignFn =
- TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
+ CCAssignFn *AssignFn = TLI.CCAssignFnForCall(F.getCallingConv(), IsWin64 && F.isVarArg());
AArch64IncomingValueAssigner Assigner(AssignFn, AssignFn);
FormalArgHandler Handler(MIRBuilder, MRI);
- if (!determineAndHandleAssignments(Handler, Assigner, SplitArgs, MIRBuilder,
- F.getCallingConv(), F.isVarArg()))
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+ if (!determineAssignments(Assigner, SplitArgs, CCInfo) ||
+ !handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs, MIRBuilder))
return false;
if (!BoolArgs.empty()) {
@@ -620,32 +699,35 @@ bool AArch64CallLowering::lowerFormalArguments(
}
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
- uint64_t StackOffset = Assigner.StackOffset;
+ uint64_t StackSize = Assigner.StackSize;
if (F.isVarArg()) {
- auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- if (!Subtarget.isTargetDarwin()) {
- // FIXME: we need to reimplement saveVarArgsRegisters from
- // AArch64ISelLowering.
+ if ((!Subtarget.isTargetDarwin() && !Subtarget.isWindowsArm64EC()) || IsWin64) {
+ // The AAPCS variadic function ABI is identical to the non-variadic
+ // one. As a result there may be more arguments in registers and we should
+ // save them for future reference.
+ // Win64 variadic functions also pass arguments in registers, but all
+ // float arguments are passed in integer registers.
+ saveVarArgRegisters(MIRBuilder, Handler, CCInfo);
+ } else if (Subtarget.isWindowsArm64EC()) {
return false;
}
// We currently pass all varargs at 8-byte alignment, or 4 in ILP32.
- StackOffset =
- alignTo(Assigner.StackOffset, Subtarget.isTargetILP32() ? 4 : 8);
+ StackSize = alignTo(Assigner.StackSize, Subtarget.isTargetILP32() ? 4 : 8);
auto &MFI = MIRBuilder.getMF().getFrameInfo();
- FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
+ FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackSize, true));
}
if (doesCalleeRestoreStack(F.getCallingConv(),
MF.getTarget().Options.GuaranteedTailCallOpt)) {
// We have a non-standard ABI, so why not make full use of the stack that
// we're going to pop? It must be aligned to 16 B in any case.
- StackOffset = alignTo(StackOffset, 16);
+ StackSize = alignTo(StackSize, 16);
// If we're expected to restore the stack (e.g. fastcc), then we'll be
// adding a multiple of 16.
- FuncInfo->setArgumentStackToRestore(StackOffset);
+ FuncInfo->setArgumentStackToRestore(StackSize);
// Our own callers will guarantee that the space is free by giving an
// aligned value to CALLSEQ_START.
@@ -655,9 +737,8 @@ bool AArch64CallLowering::lowerFormalArguments(
// will fit on the caller's stack. So, whenever we lower formal arguments,
// we should keep track of this information, since we might lower a tail call
// in this function later.
- FuncInfo->setBytesInStackArgArea(StackOffset);
+ FuncInfo->setBytesInStackArgArea(StackSize);
- auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
if (Subtarget.hasCustomCallingConv())
Subtarget.getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
@@ -680,6 +761,7 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
switch (CC) {
case CallingConv::C:
case CallingConv::PreserveMost:
+ case CallingConv::PreserveAll:
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
@@ -771,7 +853,7 @@ bool AArch64CallLowering::areCalleeOutgoingArgsTailCallable(
// Make sure that they can fit on the caller's stack.
const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
- if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
+ if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) {
LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
return false;
}
@@ -1020,7 +1102,7 @@ bool AArch64CallLowering::lowerTailCall(
// The callee will pop the argument stack as a tail call. Thus, we must
// keep it 16-byte aligned.
- NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
+ NumBytes = alignTo(OutInfo.getStackSize(), 16);
// FPDiff will be negative if this tail call requires more space than we
// would automatically have in our incoming argument space. Positive if we
@@ -1176,8 +1258,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
Opc = AArch64::BLR_RVMARKER;
// A call to a returns twice function like setjmp must be followed by a bti
// instruction.
- else if (Info.CB &&
- Info.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
+ else if (Info.CB && Info.CB->hasFnAttr(Attribute::ReturnsTwice) &&
!Subtarget.noBTIAtReturnTwice() &&
MF.getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
Opc = AArch64::BLR_BTI;
@@ -1223,6 +1304,17 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Now we can add the actual call instruction to the correct basic block.
MIRBuilder.insertInstr(MIB);
+ uint64_t CalleePopBytes =
+ doesCalleeRestoreStack(Info.CallConv,
+ MF.getTarget().Options.GuaranteedTailCallOpt)
+ ? alignTo(Assigner.StackSize, 16)
+ : 0;
+
+ CallSeqStart.addImm(Assigner.StackSize).addImm(0);
+ MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
+ .addImm(Assigner.StackSize)
+ .addImm(CalleePopBytes);
+
// If Callee is a reg, since it is used by a target specific
// instruction, it must have a register class matching the
// constraint of that instruction.
@@ -1255,17 +1347,6 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
}
- uint64_t CalleePopBytes =
- doesCalleeRestoreStack(Info.CallConv,
- MF.getTarget().Options.GuaranteedTailCallOpt)
- ? alignTo(Assigner.StackOffset, 16)
- : 0;
-
- CallSeqStart.addImm(Assigner.StackOffset).addImm(0);
- MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
- .addImm(Assigner.StackOffset)
- .addImm(CalleePopBytes);
-
if (!Info.CanLowerReturn) {
insertSRetLoads(MIRBuilder, Info.OrigRet.Ty, Info.OrigRet.Regs,
Info.DemoteRegister, Info.DemoteStackIndex);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
index cbdf77f69a63..9ae175274d5d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.h
@@ -66,6 +66,10 @@ private:
using MemHandler =
std::function<void(MachineIRBuilder &, int, CCValAssign &)>;
+ void saveVarArgRegisters(MachineIRBuilder &MIRBuilder,
+ CallLowering::IncomingValueHandler &Handler,
+ CCState &CCInfo) const;
+
bool lowerTailCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info,
SmallVectorImpl<ArgInfo> &OutArgs) const;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 58e89010c139..f1ba1aa7ba89 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -21,9 +21,9 @@
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -76,7 +76,7 @@ public:
static const char *getName() { return DEBUG_TYPE; }
void setupMF(MachineFunction &MF, GISelKnownBits *KB,
- CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
+ CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) override {
InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
MIB.setMF(MF);
@@ -269,6 +269,10 @@ private:
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
@@ -289,6 +293,11 @@ private:
MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
MachineIRBuilder &MIRBuilder) const;
+ /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
+ /// Might elide the instruction if the previous instruction already sets NZCV
+ /// correctly.
+ MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
+
/// Emit the overflow op for \p Opcode.
///
/// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
@@ -297,6 +306,8 @@ private:
emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
+ bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
+
/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
/// In some cases this is even possible with OR operations in the expression.
MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
@@ -425,6 +436,8 @@ private:
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
+ ComplexRendererFns selectExtractHigh(MachineOperand &Root) const;
+
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx = -1) const;
void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
@@ -1774,7 +1787,7 @@ bool AArch64InstructionSelector::selectCompareBranch(
MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
auto Bcc = MIB.buildInstr(AArch64::Bcc)
- .addImm(AArch64CC::EQ)
+ .addImm(AArch64CC::NE)
.addMBB(I.getOperand(1).getMBB());
I.eraseFromParent();
return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
@@ -1940,10 +1953,18 @@ bool AArch64InstructionSelector::selectVaStartDarwin(
Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+ int FrameIdx = FuncInfo->getVarArgsStackIndex();
+ if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
+ MF.getFunction().getCallingConv())) {
+ FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
+ ? FuncInfo->getVarArgsGPRIndex()
+ : FuncInfo->getVarArgsStackIndex();
+ }
+
auto MIB =
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
.addDef(ArgsAddrReg)
- .addFrameIndex(FuncInfo->getVarArgsStackIndex())
+ .addFrameIndex(FrameIdx)
.addImm(0)
.addImm(0);
@@ -2232,7 +2253,7 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
case TargetOpcode::G_CONSTANT: {
bool IsZero = false;
if (I.getOperand(1).isCImm())
- IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
+ IsZero = I.getOperand(1).getCImm()->isZero();
else if (I.getOperand(1).isImm())
IsZero = I.getOperand(1).getImm() == 0;
@@ -3073,24 +3094,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.eraseFromParent();
return true;
}
+
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_USUBE:
case TargetOpcode::G_SADDO:
case TargetOpcode::G_UADDO:
case TargetOpcode::G_SSUBO:
- case TargetOpcode::G_USUBO: {
- // Emit the operation and get the correct condition code.
- auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
- I.getOperand(2), I.getOperand(3), MIB);
-
- // Now, put the overflow result in the register given by the first operand
- // to the overflow op. CSINC increments the result when the predicate is
- // false, so to get the increment when it's true, we need to use the
- // inverse. In this case, we want to increment when carry is set.
- Register ZReg = AArch64::WZR;
- emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
- getInvertedCondCode(OpAndCC.second), MIB);
- I.eraseFromParent();
- return true;
- }
+ case TargetOpcode::G_USUBO:
+ return selectOverflowOp(I, MRI);
case TargetOpcode::G_PTRMASK: {
Register MaskReg = I.getOperand(2).getReg();
@@ -3501,7 +3514,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
return selectConcatVectors(I, MRI);
case TargetOpcode::G_JUMP_TABLE:
return selectJumpTable(I, MRI);
- case TargetOpcode::G_VECREDUCE_FADD:
case TargetOpcode::G_VECREDUCE_ADD:
return selectReduction(I, MRI);
case TargetOpcode::G_MEMCPY:
@@ -3551,19 +3563,6 @@ bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
- if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
- unsigned Opc = 0;
- if (VecTy == LLT::fixed_vector(2, 32))
- Opc = AArch64::FADDPv2i32p;
- else if (VecTy == LLT::fixed_vector(2, 64))
- Opc = AArch64::FADDPv2i64p;
- else {
- LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
- return false;
- }
- I.setDesc(TII.get(Opc));
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
- }
return false;
}
@@ -3994,6 +3993,8 @@ MachineInstr *AArch64InstructionSelector::emitScalarToVector(
};
switch (EltSize) {
+ case 8:
+ return BuildFn(AArch64::bsub);
case 16:
return BuildFn(AArch64::hsub);
case 32:
@@ -4376,54 +4377,55 @@ AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
- auto &MF = MIRBuilder.getMF();
- unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
-
- auto Adrp =
- MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
- .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
-
- MachineInstr *LoadMI = nullptr;
- MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
+ const TargetRegisterClass *RC;
+ unsigned Opc;
+ bool IsTiny = TM.getCodeModel() == CodeModel::Tiny;
unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
switch (Size) {
case 16:
- LoadMI =
- &*MIRBuilder
- .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
- .addConstantPoolIndex(CPIdx, 0,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ RC = &AArch64::FPR128RegClass;
+ Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui;
break;
case 8:
- LoadMI =
- &*MIRBuilder
- .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
- .addConstantPoolIndex(CPIdx, 0,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ RC = &AArch64::FPR64RegClass;
+ Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui;
break;
case 4:
- LoadMI =
- &*MIRBuilder
- .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
- .addConstantPoolIndex(CPIdx, 0,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ RC = &AArch64::FPR32RegClass;
+ Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui;
break;
case 2:
- LoadMI =
- &*MIRBuilder
- .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
- .addConstantPoolIndex(CPIdx, 0,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+ RC = &AArch64::FPR16RegClass;
+ Opc = AArch64::LDRHui;
break;
default:
LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
<< *CPVal->getType());
return nullptr;
}
+
+ MachineInstr *LoadMI = nullptr;
+ auto &MF = MIRBuilder.getMF();
+ unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
+ if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) {
+ // Use load(literal) for tiny code model.
+ LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx);
+ } else {
+ auto Adrp =
+ MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
+ .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
+
+ LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp})
+ .addConstantPoolIndex(
+ CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+
+ constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
+ }
+
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
MachineMemOperand::MOLoad,
Size, Align(Size)));
- constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
return LoadMI;
}
@@ -4434,7 +4436,10 @@ static std::pair<unsigned, unsigned>
getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
unsigned Opc, SubregIdx;
if (RB.getID() == AArch64::GPRRegBankID) {
- if (EltSize == 16) {
+ if (EltSize == 8) {
+ Opc = AArch64::INSvi8gpr;
+ SubregIdx = AArch64::bsub;
+ } else if (EltSize == 16) {
Opc = AArch64::INSvi16gpr;
SubregIdx = AArch64::ssub;
} else if (EltSize == 32) {
@@ -4556,6 +4561,28 @@ AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
}
MachineInstr *
+AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+ bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
+ static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
+ return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+ bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
+ static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
+ return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
+}
+
+MachineInstr *
AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
@@ -4761,6 +4788,72 @@ AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
return &*CSINC;
}
+MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
+ Register CarryReg) {
+ MachineRegisterInfo *MRI = MIB.getMRI();
+ unsigned Opcode = I.getOpcode();
+
+ // If the instruction is a SUB, we need to negate the carry,
+ // because borrowing is indicated by carry-flag == 0.
+ bool NeedsNegatedCarry =
+ (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
+
+ // If the previous instruction will already produce the correct carry, do not
+ // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
+ // generated during legalization of wide add/sub. This optimization depends on
+ // these sequences not being interrupted by other instructions.
+ MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
+ if (SrcMI == I.getPrevNode()) {
+ if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
+ bool ProducesNegatedCarry = CarrySrcMI->isSub();
+ if (NeedsNegatedCarry == ProducesNegatedCarry && CarrySrcMI->isUnsigned())
+ return nullptr;
+ }
+ }
+
+ Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
+
+ if (NeedsNegatedCarry) {
+ // (0 - Carry) sets !C in NZCV when Carry == 1
+ Register ZReg = AArch64::WZR;
+ return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
+ }
+
+ // (Carry - 1) sets !C in NZCV when Carry == 0
+ auto Fns = select12BitValueWithLeftShift(1);
+ return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
+}
+
+bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
+ MachineRegisterInfo &MRI) {
+ auto &CarryMI = cast<GAddSubCarryOut>(I);
+
+ if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
+ // Set NZCV carry according to carry-in VReg
+ emitCarryIn(I, CarryInMI->getCarryInReg());
+ }
+
+ // Emit the operation and get the correct condition code.
+ auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
+ CarryMI.getLHS(), CarryMI.getRHS(), MIB);
+
+ Register CarryOutReg = CarryMI.getCarryOutReg();
+
+ // Don't convert carry-out to VReg if it is never used
+ if (!MRI.use_nodbg_empty(CarryOutReg)) {
+ // Now, put the overflow result in the register given by the first operand
+ // to the overflow op. CSINC increments the result when the predicate is
+ // false, so to get the increment when it's true, we need to use the
+ // inverse. In this case, we want to increment when carry is set.
+ Register ZReg = AArch64::WZR;
+ emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
+ getInvertedCondCode(OpAndCC.second), MIB);
+ }
+
+ I.eraseFromParent();
+ return true;
+}
+
std::pair<MachineInstr *, AArch64CC::CondCode>
AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
MachineOperand &LHS,
@@ -4777,6 +4870,14 @@ AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
case TargetOpcode::G_USUBO:
return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
+ case TargetOpcode::G_SADDE:
+ return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+ case TargetOpcode::G_UADDE:
+ return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
+ case TargetOpcode::G_SSUBE:
+ return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+ case TargetOpcode::G_USUBE:
+ return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
}
}
@@ -5358,8 +5459,8 @@ bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
Register EltReg = I.getOperand(2).getReg();
const LLT EltTy = MRI.getType(EltReg);
unsigned EltSize = EltTy.getSizeInBits();
- if (EltSize < 16 || EltSize > 64)
- return false; // Don't support all element types yet.
+ if (EltSize < 8 || EltSize > 64)
+ return false;
// Find the definition of the index. Bail out if it's not defined by a
// G_CONSTANT.
@@ -5543,7 +5644,7 @@ bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
if (tryOptBuildVecToSubregToReg(I, MRI))
return true;
- if (EltSize < 16 || EltSize > 64)
+ if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64)
return false; // Don't support all element types yet.
const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
@@ -5840,7 +5941,7 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
uint64_t Key = I.getOperand(3).getImm();
Register DiscReg = I.getOperand(4).getReg();
auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
- bool IsDiscZero = DiscVal && DiscVal->isNullValue();
+ bool IsDiscZero = DiscVal && DiscVal->isZero();
if (Key > AArch64PACKey::LAST)
return false;
@@ -5877,6 +5978,25 @@ bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
I.eraseFromParent();
return true;
}
+ case Intrinsic::ptrauth_blend: {
+ MachineFunction &MF = *I.getParent()->getParent();
+ auto RHS = getIConstantVRegVal(I.getOperand(3).getReg(), MRI);
+ if (RHS && (RHS->getZExtValue() <= 0xffff)) {
+ I.setDesc(TII.get(AArch64::MOVKXi));
+ I.removeOperand(3);
+ I.removeOperand(1);
+ MachineInstrBuilder(MF, I)
+ .addImm(RHS->getZExtValue() & 0xffff)
+ .addImm(48)
+ .constrainAllUses(TII, TRI, RBI);
+ } else {
+ I.setDesc(TII.get(AArch64::BFMXri));
+ I.removeOperand(1);
+ MachineInstrBuilder(MF, I).addImm(16).addImm(15).constrainAllUses(
+ TII, TRI, RBI);
+ }
+ return true;
+ }
case Intrinsic::frameaddress:
case Intrinsic::returnaddress: {
MachineFunction &MF = *I.getParent()->getParent();
@@ -6153,7 +6273,7 @@ AArch64InstructionSelector::selectExtendedSHL(
// Since we're going to pull this into a shift, the constant value must be
// a power of 2. If we got a multiply, then we need to check this.
if (OffsetOpc == TargetOpcode::G_MUL) {
- if (!isPowerOf2_32(ImmVal))
+ if (!llvm::has_single_bit<uint32_t>(ImmVal))
return std::nullopt;
// Got a power of 2. So, the amount we'll shift is the log base-2 of that.
@@ -6759,6 +6879,23 @@ AArch64InstructionSelector::selectArithExtendedRegister(
}}};
}
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const {
+ if (!Root.isReg())
+ return std::nullopt;
+ MachineRegisterInfo &MRI =
+ Root.getParent()->getParent()->getParent()->getRegInfo();
+
+ MachineInstr *Extract = getDefIgnoringCopies(Root.getReg(), MRI);
+ if (Extract && Extract->getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
+ Root.getReg() == Extract->getOperand(1).getReg()) {
+ Register ExtReg = Extract->getOperand(2).getReg();
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}};
+ }
+
+ return std::nullopt;
+}
+
void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 186d0ed35d56..d905da4eaec3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -79,8 +79,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT &MinFPScalar = HasFP16 ? s16 : s32;
const bool HasCSSC = ST.hasCSSC();
+ const bool HasRCPC3 = ST.hasRCPC3();
- getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
+ getActionDefinitionsBuilder(
+ {G_IMPLICIT_DEF, G_FREEZE, G_CONSTANT_FOLD_BARRIER})
.legalFor({p0, s8, s16, s32, s64})
.legalFor(PackedVectorAllTypeList)
.widenScalarToNextPow2(0)
@@ -125,8 +127,25 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.legalFor({v2s64})
.widenScalarToNextPow2(0)
.clampScalar(0, s32, s64)
+ .clampMaxNumElements(0, s8, 16)
+ .clampMaxNumElements(0, s16, 8)
.clampNumElements(0, v2s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getNumElements() <= 2;
+ },
+ 0, s32)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getNumElements() <= 4;
+ },
+ 0, s16)
+ .minScalarOrEltIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0].getNumElements() <= 16;
+ },
+ 0, s8)
.moreElementsToNextPow2(0);
getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
@@ -310,6 +329,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_LOAD)
.customIf([=](const LegalityQuery &Query) {
+ return HasRCPC3 && Query.Types[0] == s128 &&
+ Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
+ })
+ .customIf([=](const LegalityQuery &Query) {
return Query.Types[0] == s128 &&
Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
})
@@ -328,16 +351,17 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{v2s64, p0, s128, 8}})
// These extends are also legal
.legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
- .widenScalarToNextPow2(0, /* MinSize = */8)
+ .widenScalarToNextPow2(0, /* MinSize = */ 8)
.lowerIfMemSizeNotByteSizePow2()
.clampScalar(0, s8, s64)
- .narrowScalarIf([=](const LegalityQuery &Query) {
- // Clamp extending load results to 32-bits.
- return Query.Types[0].isScalar() &&
- Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
- Query.Types[0].getSizeInBits() > 32;
- },
- changeTo(0, s32))
+ .narrowScalarIf(
+ [=](const LegalityQuery &Query) {
+ // Clamp extending load results to 32-bits.
+ return Query.Types[0].isScalar() &&
+ Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
+ Query.Types[0].getSizeInBits() > 32;
+ },
+ changeTo(0, s32))
.clampMaxNumElements(0, s8, 16)
.clampMaxNumElements(0, s16, 8)
.clampMaxNumElements(0, s32, 4)
@@ -348,30 +372,24 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_STORE)
.customIf([=](const LegalityQuery &Query) {
+ return HasRCPC3 && Query.Types[0] == s128 &&
+ Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
+ })
+ .customIf([=](const LegalityQuery &Query) {
return Query.Types[0] == s128 &&
Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
})
- .legalForTypesWithMemDesc({{s8, p0, s8, 8},
- {s16, p0, s8, 8}, // truncstorei8 from s16
- {s32, p0, s8, 8}, // truncstorei8 from s32
- {s64, p0, s8, 8}, // truncstorei8 from s64
- {s16, p0, s16, 8},
- {s32, p0, s16, 8}, // truncstorei16 from s32
- {s64, p0, s16, 8}, // truncstorei16 from s64
- {s32, p0, s8, 8},
- {s32, p0, s16, 8},
- {s32, p0, s32, 8},
- {s64, p0, s64, 8},
- {s64, p0, s32, 8}, // truncstorei32 from s64
- {p0, p0, s64, 8},
- {s128, p0, s128, 8},
- {v16s8, p0, s128, 8},
- {v8s8, p0, s64, 8},
- {v4s16, p0, s64, 8},
- {v8s16, p0, s128, 8},
- {v2s32, p0, s64, 8},
- {v4s32, p0, s128, 8},
- {v2s64, p0, s128, 8}})
+ .legalForTypesWithMemDesc(
+ {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
+ {s32, p0, s8, 8}, // truncstorei8 from s32
+ {s64, p0, s8, 8}, // truncstorei8 from s64
+ {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
+ {s64, p0, s16, 8}, // truncstorei16 from s64
+ {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
+ {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
+ {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
+ {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
+ {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
.clampScalar(0, s8, s64)
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].isScalar() &&
@@ -401,7 +419,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
})
.clampScalar(0, MinFPScalar, s128);
- getActionDefinitionsBuilder({G_ICMP, G_FCMP})
+ getActionDefinitionsBuilder(G_ICMP)
.legalFor({{s32, s32},
{s32, s64},
{s32, p0},
@@ -432,6 +450,43 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
s64)
.clampNumElements(0, v2s32, v4s32);
+ getActionDefinitionsBuilder(G_FCMP)
+ // If we don't have full FP16 support, then scalarize the elements of
+ // vectors containing fp16 types.
+ .fewerElementsIf(
+ [=](const LegalityQuery &Query) {
+ const auto &Ty = Query.Types[0];
+ return Ty.isVector() && Ty.getElementType() == s16 && !HasFP16;
+ },
+ [=](const LegalityQuery &Query) { return std::make_pair(0, s16); })
+ // If we don't have full FP16 support, then widen s16 to s32 if we
+ // encounter it.
+ .widenScalarIf(
+ [=](const LegalityQuery &Query) {
+ return Query.Types[0] == s16 && !HasFP16;
+ },
+ [=](const LegalityQuery &Query) { return std::make_pair(0, s32); })
+ .legalFor({{s16, s16},
+ {s32, s32},
+ {s32, s64},
+ {v4s32, v4s32},
+ {v2s32, v2s32},
+ {v2s64, v2s64},
+ {v4s16, v4s16},
+ {v8s16, v8s16}})
+ .widenScalarOrEltToNextPow2(1)
+ .clampScalar(1, s32, s64)
+ .clampScalar(0, s32, s32)
+ .minScalarEltSameAsIf(
+ [=](const LegalityQuery &Query) {
+ const LLT &Ty = Query.Types[0];
+ const LLT &SrcTy = Query.Types[1];
+ return Ty.isVector() && !SrcTy.getElementType().isPointer() &&
+ Ty.getElementType() != SrcTy.getElementType();
+ },
+ 0, 1)
+ .clampNumElements(0, v2s32, v4s32);
+
// Extensions
auto ExtLegalFunc = [=](const LegalityQuery &Query) {
unsigned DstSize = Query.Types[0].getSizeInBits();
@@ -479,11 +534,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_FPTRUNC)
.legalFor(
{{s16, s32}, {s16, s64}, {s32, s64}, {v4s16, v4s32}, {v2s32, v2s64}})
- .clampMaxNumElements(0, s32, 2);
+ .clampNumElements(0, v4s16, v4s16)
+ .clampNumElements(0, v2s32, v2s32)
+ .scalarize(0);
+
getActionDefinitionsBuilder(G_FPEXT)
.legalFor(
{{s32, s16}, {s64, s16}, {s64, s32}, {v4s32, v4s16}, {v2s64, v2s32}})
- .clampMaxNumElements(0, s64, 2);
+ .clampNumElements(0, v4s32, v4s32)
+ .clampNumElements(0, v2s64, v2s64)
+ .scalarize(0);
// Conversions
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
@@ -644,12 +704,13 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampMaxNumElements(1, p0, 2);
getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT)
- .legalIf(typeInSet(0, {v8s16, v2s32, v4s32, v2s64}));
+ .legalIf(typeInSet(0, {v16s8, v8s8, v8s16, v4s16, v4s32, v2s32, v2s64}))
+ .clampMinNumElements(0, s16, 4)
+ .clampMaxNumElements(0, s16, 8);
getActionDefinitionsBuilder(G_BUILD_VECTOR)
.legalFor({{v8s8, s8},
{v16s8, s8},
- {v2s16, s16},
{v4s16, s16},
{v8s16, s16},
{v2s32, s32},
@@ -659,6 +720,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.clampNumElements(0, v4s32, v4s32)
.clampNumElements(0, v2s64, v2s64)
.minScalarOrElt(0, s8)
+ .widenVectorEltsToVectorMinSize(0, 64)
.minScalarSameAs(1, 0);
getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).lower();
@@ -666,7 +728,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_CTLZ)
.legalForCartesianProduct(
{s32, s64, v8s8, v16s8, v4s16, v8s16, v2s32, v4s32})
- .scalarize(1);
+ .scalarize(1)
+ .widenScalarToNextPow2(1, /*Min=*/32)
+ .clampScalar(1, s32, s64)
+ .scalarSameSizeAs(0, 1);
getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF).lower();
// TODO: Custom lowering for v2s32, v4s32, v2s64.
@@ -679,8 +744,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_CTTZ)
.lowerIf(isVector(0))
- .clampScalar(0, s32, s64)
- .scalarSameSizeAs(1, 0)
+ .widenScalarToNextPow2(1, /*Min=*/32)
+ .clampScalar(1, s32, s64)
+ .scalarSameSizeAs(0, 1)
.legalIf([=](const LegalityQuery &Query) {
return (HasCSSC && typeInSet(0, {s32, s64})(Query));
})
@@ -713,7 +779,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
changeTo(1, 0))
.moreElementsToNextPow2(0)
.clampNumElements(0, v4s32, v4s32)
- .clampNumElements(0, v2s64, v2s64);
+ .clampNumElements(0, v2s64, v2s64)
+ .moreElementsIf(
+ [](const LegalityQuery &Query) {
+ return Query.Types[0].isVector() && Query.Types[1].isVector() &&
+ Query.Types[0].getNumElements() <
+ Query.Types[1].getNumElements();
+ },
+ changeTo(0, 1));
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
.legalFor({{v4s32, v2s32}, {v8s16, v4s16}, {v16s8, v8s8}});
@@ -964,8 +1037,8 @@ bool AArch64LegalizerInfo::legalizeVectorTrunc(
Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(SrcReg);
- assert(isPowerOf2_32(DstTy.getSizeInBits()) &&
- isPowerOf2_32(SrcTy.getSizeInBits()));
+ assert(llvm::has_single_bit<uint32_t>(DstTy.getSizeInBits()) &&
+ llvm::has_single_bit<uint32_t>(SrcTy.getSizeInBits()));
// Split input type.
LLT SplitSrcTy =
@@ -1188,27 +1261,49 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
const LLT ValTy = MRI.getType(ValReg);
if (ValTy == LLT::scalar(128)) {
- assert((*MI.memoperands_begin())->getSuccessOrdering() ==
- AtomicOrdering::Monotonic ||
- (*MI.memoperands_begin())->getSuccessOrdering() ==
- AtomicOrdering::Unordered);
- assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
+
+ AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
+ bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
+ bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
+ bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
+ bool IsRcpC3 =
+ ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
+
LLT s64 = LLT::scalar(64);
+
+ unsigned Opcode;
+ if (IsRcpC3) {
+ Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
+ } else {
+ // For LSE2, loads/stores should have been converted to monotonic and had
+ // a fence inserted after them.
+ assert(Ordering == AtomicOrdering::Monotonic ||
+ Ordering == AtomicOrdering::Unordered);
+ assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
+
+ Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
+ }
+
MachineInstrBuilder NewI;
- if (MI.getOpcode() == TargetOpcode::G_LOAD) {
- NewI = MIRBuilder.buildInstr(AArch64::LDPXi, {s64, s64}, {});
+ if (IsLoad) {
+ NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
MIRBuilder.buildMergeLikeInstr(
ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
} else {
auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
NewI = MIRBuilder.buildInstr(
- AArch64::STPXi, {}, {Split->getOperand(0), Split->getOperand(1)});
+ Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
+ }
+
+ if (IsRcpC3) {
+ NewI.addUse(MI.getOperand(1).getReg());
+ } else {
+ Register Base;
+ int Offset;
+ matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
+ NewI.addUse(Base);
+ NewI.addImm(Offset / 8);
}
- Register Base;
- int Offset;
- matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
- NewI.addUse(Base);
- NewI.addImm(Offset / 8);
NewI.cloneMemRefs(MI);
constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
index 2a13a3606d23..c10f6e071ed4 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.h
@@ -47,7 +47,6 @@ private:
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const;
bool legalizeVectorTrunc(MachineInstr &MI, LegalizerHelper &Helper) const;
- bool legalizeShuffleVector(MachineInstr &MI, LegalizerHelper &Helper) const;
bool legalizeBitfieldExtract(MachineInstr &MI, MachineRegisterInfo &MRI,
LegalizerHelper &Helper) const;
bool legalizeRotate(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
index 3553492935d3..590afbc29d6d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp
@@ -16,6 +16,8 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -27,33 +29,67 @@
#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
+#define GET_GICOMBINER_DEPS
+#include "AArch64GenO0PreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_DEPS
+
#define DEBUG_TYPE "aarch64-O0-prelegalizer-combiner"
using namespace llvm;
using namespace MIPatternMatch;
+namespace {
+#define GET_GICOMBINER_TYPES
+#include "AArch64GenO0PreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_TYPES
-class AArch64O0PreLegalizerCombinerHelperState {
+class AArch64O0PreLegalizerCombinerImpl : public GIMatchTableExecutor {
protected:
CombinerHelper &Helper;
+ const AArch64O0PreLegalizerCombinerImplRuleConfig &RuleConfig;
+
+ const AArch64Subtarget &STI;
+ GISelChangeObserver &Observer;
+ MachineIRBuilder &B;
+ MachineFunction &MF;
+
+ MachineRegisterInfo &MRI;
public:
- AArch64O0PreLegalizerCombinerHelperState(CombinerHelper &Helper)
- : Helper(Helper) {}
-};
+ AArch64O0PreLegalizerCombinerImpl(
+ const AArch64O0PreLegalizerCombinerImplRuleConfig &RuleConfig,
+ GISelChangeObserver &Observer, MachineIRBuilder &B,
+ CombinerHelper &Helper);
+
+ static const char *getName() { return "AArch64O0PreLegalizerCombiner"; }
-#define AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+ bool tryCombineAll(MachineInstr &I) const;
+
+private:
+#define GET_GICOMBINER_CLASS_MEMBERS
#include "AArch64GenO0PreLegalizeGICombiner.inc"
-#undef AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+#undef GET_GICOMBINER_CLASS_MEMBERS
+};
-namespace {
-#define AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#define GET_GICOMBINER_IMPL
+#include "AArch64GenO0PreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_IMPL
+
+AArch64O0PreLegalizerCombinerImpl::AArch64O0PreLegalizerCombinerImpl(
+ const AArch64O0PreLegalizerCombinerImplRuleConfig &RuleConfig,
+ GISelChangeObserver &Observer, MachineIRBuilder &B, CombinerHelper &Helper)
+ : Helper(Helper), RuleConfig(RuleConfig),
+ STI(B.getMF().getSubtarget<AArch64Subtarget>()), Observer(Observer), B(B),
+ MF(B.getMF()), MRI(*B.getMRI()),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AArch64GenO0PreLegalizeGICombiner.inc"
-#undef AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
class AArch64O0PreLegalizerCombinerInfo : public CombinerInfo {
GISelKnownBits *KB;
MachineDominatorTree *MDT;
- AArch64GenO0PreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
+ AArch64O0PreLegalizerCombinerImplRuleConfig RuleConfig;
public:
AArch64O0PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
@@ -62,7 +98,7 @@ public:
: CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
/*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
KB(KB), MDT(MDT) {
- if (!GeneratedRuleCfg.parseCommandLineOption())
+ if (!RuleConfig.parseCommandLineOption())
report_fatal_error("Invalid rule identifier");
}
@@ -74,9 +110,10 @@ bool AArch64O0PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
CombinerHelper Helper(Observer, B, /*IsPreLegalize*/ true, KB, MDT);
- AArch64GenO0PreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper);
+ AArch64O0PreLegalizerCombinerImpl Impl(RuleConfig, Observer, B, Helper);
+ Impl.setupMF(*MI.getMF(), KB);
- if (Generated.tryCombineAll(Observer, MI, B))
+ if (Impl.tryCombineAll(MI))
return true;
unsigned Opc = MI.getOpcode();
@@ -104,10 +141,6 @@ bool AArch64O0PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return false;
}
-#define AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
-#include "AArch64GenO0PreLegalizeGICombiner.inc"
-#undef AARCH64O0PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
-
// Pass boilerplate
// ================
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index fbeff1370ef3..303cf11d4f30 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -24,6 +24,8 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
@@ -37,11 +39,21 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
+#define GET_GICOMBINER_DEPS
+#include "AArch64GenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_DEPS
+
#define DEBUG_TYPE "aarch64-postlegalizer-combiner"
using namespace llvm;
using namespace MIPatternMatch;
+namespace {
+
+#define GET_GICOMBINER_TYPES
+#include "AArch64GenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_TYPES
+
/// This combine tries do what performExtractVectorEltCombine does in SDAG.
/// Rewrite for pairwise fadd pattern
/// (s32 (g_extract_vector_elt
@@ -93,7 +105,7 @@ bool matchExtractVecEltPairwiseAdd(
return false;
}
-bool applyExtractVecEltPairwiseAdd(
+void applyExtractVecEltPairwiseAdd(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
std::tuple<unsigned, LLT, Register> &MatchInfo) {
unsigned Opc = std::get<0>(MatchInfo);
@@ -107,16 +119,15 @@ bool applyExtractVecEltPairwiseAdd(
auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));
B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});
MI.eraseFromParent();
- return true;
}
-static bool isSignExtended(Register R, MachineRegisterInfo &MRI) {
+bool isSignExtended(Register R, MachineRegisterInfo &MRI) {
// TODO: check if extended build vector as well.
unsigned Opc = MRI.getVRegDef(R)->getOpcode();
return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;
}
-static bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
+bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {
// TODO: check if extended build vector as well.
return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;
}
@@ -149,7 +160,7 @@ bool matchAArch64MulConstCombine(
// which equals to (1+2)*16-(1+2).
// TrailingZeroes is used to test if the mul can be lowered to
// shift+add+shift.
- unsigned TrailingZeroes = ConstValue.countTrailingZeros();
+ unsigned TrailingZeroes = ConstValue.countr_zero();
if (TrailingZeroes) {
// Conservatively do not lower to shift+add+shift if the mul might be
// folded into smul or umul.
@@ -234,13 +245,12 @@ bool matchAArch64MulConstCombine(
return true;
}
-bool applyAArch64MulConstCombine(
+void applyAArch64MulConstCombine(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {
B.setInstrAndDebugLoc(MI);
ApplyFn(B, MI.getOperand(0).getReg());
MI.eraseFromParent();
- return true;
}
/// Try to fold a G_MERGE_VALUES of 2 s32 sources, where the second source
@@ -266,7 +276,7 @@ void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI,
/// \returns True if a G_ANYEXT instruction \p MI should be mutated to a G_ZEXT
/// instruction.
-static bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) {
+bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) {
// If this is coming from a scalar compare then we can use a G_ZEXT instead of
// a G_ANYEXT:
//
@@ -283,9 +293,9 @@ static bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI)
m_GFCmp(m_Pred(), m_Reg(), m_Reg())));
}
-static void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
- GISelChangeObserver &Observer) {
+void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ GISelChangeObserver &Observer) {
Observer.changingInstr(MI);
MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));
Observer.changedInstr(MI);
@@ -293,7 +303,7 @@ static void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,
/// Match a 128b store of zero and split it into two 64 bit stores, for
/// size/performance reasons.
-static bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
+bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
GStore &Store = cast<GStore>(MI);
if (!Store.isSimple())
return false;
@@ -309,9 +319,9 @@ static bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {
return MaybeCst && MaybeCst->isZero();
}
-static void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
- GISelChangeObserver &Observer) {
+void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ GISelChangeObserver &Observer) {
B.setInstrAndDebugLoc(MI);
GStore &Store = cast<GStore>(MI);
assert(MRI.getType(Store.getValueReg()).isVector() &&
@@ -329,21 +339,55 @@ static void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
Store.eraseFromParent();
}
-#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+class AArch64PostLegalizerCombinerImpl : public GIMatchTableExecutor {
+protected:
+ CombinerHelper &Helper;
+ const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig;
+
+ const AArch64Subtarget &STI;
+ MachineRegisterInfo &MRI;
+ GISelChangeObserver &Observer;
+ MachineIRBuilder &B;
+ MachineFunction &MF;
+
+public:
+ AArch64PostLegalizerCombinerImpl(
+ const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
+ const AArch64Subtarget &STI, GISelChangeObserver &Observer,
+ MachineIRBuilder &B, CombinerHelper &Helper);
+
+ static const char *getName() { return "AArch64PostLegalizerCombiner"; }
+
+ bool tryCombineAll(MachineInstr &I) const;
+
+private:
+#define GET_GICOMBINER_CLASS_MEMBERS
#include "AArch64GenPostLegalizeGICombiner.inc"
-#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+#undef GET_GICOMBINER_CLASS_MEMBERS
+};
-namespace {
-#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#define GET_GICOMBINER_IMPL
+#include "AArch64GenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_IMPL
+
+AArch64PostLegalizerCombinerImpl::AArch64PostLegalizerCombinerImpl(
+ const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,
+ const AArch64Subtarget &STI, GISelChangeObserver &Observer,
+ MachineIRBuilder &B, CombinerHelper &Helper)
+ : Helper(Helper), RuleConfig(RuleConfig), STI(STI), MRI(*B.getMRI()),
+ Observer(Observer), B(B), MF(B.getMF()),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AArch64GenPostLegalizeGICombiner.inc"
-#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
class AArch64PostLegalizerCombinerInfo : public CombinerInfo {
GISelKnownBits *KB;
MachineDominatorTree *MDT;
public:
- AArch64GenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
+ AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;
AArch64PostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
GISelKnownBits *KB,
@@ -351,7 +395,7 @@ public:
: CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
/*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
KB(KB), MDT(MDT) {
- if (!GeneratedRuleCfg.parseCommandLineOption())
+ if (!RuleConfig.parseCommandLineOption())
report_fatal_error("Invalid rule identifier");
}
@@ -362,17 +406,14 @@ public:
bool AArch64PostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
- const auto *LI =
- MI.getParent()->getParent()->getSubtarget().getLegalizerInfo();
+ const auto &STI = MI.getMF()->getSubtarget<AArch64Subtarget>();
+ const auto *LI = STI.getLegalizerInfo();
CombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT, LI);
- AArch64GenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg);
- return Generated.tryCombineAll(Observer, MI, B, Helper);
+ AArch64PostLegalizerCombinerImpl Impl(RuleConfig, STI, Observer, B, Helper);
+ Impl.setupMF(*MI.getMF(), KB);
+ return Impl.tryCombineAll(MI);
}
-#define AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
-#include "AArch64GenPostLegalizeGICombiner.inc"
-#undef AARCH64POSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
-
class AArch64PostLegalizerCombiner : public MachineFunctionPass {
public:
static char ID;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
index 7894c05218eb..feeef91bce19 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp
@@ -29,6 +29,8 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
@@ -46,18 +48,28 @@
#include "llvm/Support/ErrorHandling.h"
#include <optional>
+#define GET_GICOMBINER_DEPS
+#include "AArch64GenPostLegalizeGILowering.inc"
+#undef GET_GICOMBINER_DEPS
+
#define DEBUG_TYPE "aarch64-postlegalizer-lowering"
using namespace llvm;
using namespace MIPatternMatch;
using namespace AArch64GISelUtils;
+namespace {
+
+#define GET_GICOMBINER_TYPES
+#include "AArch64GenPostLegalizeGILowering.inc"
+#undef GET_GICOMBINER_TYPES
+
/// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR.
///
/// Used for matching target-supported shuffles before codegen.
struct ShuffleVectorPseudo {
- unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
- Register Dst; ///< Destination register.
+ unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1)
+ Register Dst; ///< Destination register.
SmallVector<SrcOp, 2> SrcOps; ///< Source registers.
ShuffleVectorPseudo(unsigned Opc, Register Dst,
std::initializer_list<SrcOp> SrcOps)
@@ -67,8 +79,8 @@ struct ShuffleVectorPseudo {
/// Check if a vector shuffle corresponds to a REV instruction with the
/// specified blocksize.
-static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
- unsigned BlockSize) {
+bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
+ unsigned BlockSize) {
assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
"Only possible block sizes for REV are: 16, 32, 64");
assert(EltSize != 64 && "EltSize cannot be 64 for REV mask.");
@@ -96,8 +108,7 @@ static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
-static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
- unsigned &WhichResult) {
+bool isTRNMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
if (NumElts % 2 != 0)
return false;
WhichResult = (M[0] == 0 ? 0 : 1);
@@ -112,8 +123,8 @@ static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
/// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector
/// sources of the shuffle are different.
-static std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
- unsigned NumElts) {
+std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
+ unsigned NumElts) {
// Look for the first non-undef element.
auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
if (FirstRealElt == M.end())
@@ -154,8 +165,7 @@ static std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M,
/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
-static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
- unsigned &WhichResult) {
+bool isUZPMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
WhichResult = (M[0] == 0 ? 0 : 1);
for (unsigned i = 0; i != NumElts; ++i) {
// Skip undef indices.
@@ -169,8 +179,7 @@ static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
/// \return true if \p M is a zip mask for a shuffle vector of \p NumElts.
/// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult.
-static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
- unsigned &WhichResult) {
+bool isZipMask(ArrayRef<int> M, unsigned NumElts, unsigned &WhichResult) {
if (NumElts % 2 != 0)
return false;
@@ -178,9 +187,9 @@ static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
WhichResult = (M[0] == 0 ? 0 : 1);
unsigned Idx = WhichResult * NumElts / 2;
for (unsigned i = 0; i != NumElts; i += 2) {
- if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
- (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
- return false;
+ if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) ||
+ (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts))
+ return false;
Idx += 1;
}
return true;
@@ -194,8 +203,8 @@ static bool isZipMask(ArrayRef<int> M, unsigned NumElts,
/// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR.
///
/// Second element is the destination lane for the G_INSERT_VECTOR_ELT.
-static std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
- int NumInputElements) {
+std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
+ int NumInputElements) {
if (M.size() != static_cast<size_t>(NumInputElements))
return std::nullopt;
int NumLHSMatch = 0, NumRHSMatch = 0;
@@ -219,8 +228,8 @@ static std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M,
/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a
/// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc.
-static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
+bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
Register Dst = MI.getOperand(0).getReg();
@@ -248,8 +257,8 @@ static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
/// a G_TRN1 or G_TRN2 instruction.
-static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
+bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
unsigned WhichResult;
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
@@ -269,8 +278,8 @@ static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
///
/// \param [in] MI - The shuffle vector instruction.
/// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success.
-static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
+bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
unsigned WhichResult;
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
@@ -285,8 +294,8 @@ static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}
-static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
+bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
unsigned WhichResult;
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
@@ -302,9 +311,9 @@ static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI,
}
/// Helper function for matchDup.
-static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
- MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
+bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
if (Lane != 0)
return false;
@@ -316,7 +325,8 @@ static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
// %cst0:gpr(s32) = G_CONSTANT i32 0
// %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32)
// %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32)
- // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>)
+ // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef,
+ // %zerovec(<2 x s32>)
//
// ...into:
// %splat = G_DUP %scalar
@@ -341,9 +351,9 @@ static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI,
}
/// Helper function for matchDup.
-static bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
- MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
+bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
assert(Lane >= 0 && "Expected positive lane?");
// Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the
// lane's definition directly.
@@ -357,8 +367,8 @@ static bool matchDupFromBuildVector(int Lane, MachineInstr &MI,
return true;
}
-static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
+bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
auto MaybeLane = getSplatIndex(MI);
if (!MaybeLane)
@@ -376,7 +386,7 @@ static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI,
// Check if an EXT instruction can handle the shuffle mask when the vector
// sources of the shuffle are the same.
-static bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
+bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
unsigned NumElts = Ty.getNumElements();
// Assume that the first shuffle index is not UNDEF. Fail if it is.
@@ -403,8 +413,8 @@ static bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) {
return true;
}
-static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
- ShuffleVectorPseudo &MatchInfo) {
+bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
+ ShuffleVectorPseudo &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
Register Dst = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst);
@@ -435,18 +445,17 @@ static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI,
/// Replace a G_SHUFFLE_VECTOR instruction with a pseudo.
/// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR.
-static bool applyShuffleVectorPseudo(MachineInstr &MI,
- ShuffleVectorPseudo &MatchInfo) {
+void applyShuffleVectorPseudo(MachineInstr &MI,
+ ShuffleVectorPseudo &MatchInfo) {
MachineIRBuilder MIRBuilder(MI);
MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps);
MI.eraseFromParent();
- return true;
}
/// Replace a G_SHUFFLE_VECTOR instruction with G_EXT.
/// Special-cased because the constant operand must be emitted as a G_CONSTANT
/// for the imported tablegen patterns to work.
-static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
+void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
MachineIRBuilder MIRBuilder(MI);
// Tablegen patterns expect an i32 G_CONSTANT as the final op.
auto Cst =
@@ -454,7 +463,6 @@ static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst},
{MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst});
MI.eraseFromParent();
- return true;
}
/// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a
@@ -468,8 +476,8 @@ static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) {
/// %extract = G_EXTRACT_VECTOR_ELT %left, 0
/// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1
///
-static bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
- std::tuple<Register, int, Register, int> &MatchInfo) {
+bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::tuple<Register, int, Register, int> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
Register Dst = MI.getOperand(0).getReg();
@@ -495,9 +503,9 @@ static bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}
-static bool applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &Builder,
- std::tuple<Register, int, Register, int> &MatchInfo) {
+void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &Builder,
+ std::tuple<Register, int, Register, int> &MatchInfo) {
Builder.setInstrAndDebugLoc(MI);
Register Dst = MI.getOperand(0).getReg();
auto ScalarTy = MRI.getType(Dst).getElementType();
@@ -509,14 +517,13 @@ static bool applyINS(MachineInstr &MI, MachineRegisterInfo &MRI,
auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane);
Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst);
MI.eraseFromParent();
- return true;
}
/// isVShiftRImm - Check if this is a valid vector for the immediate
/// operand of a vector shift right operation. The value must be in the range:
/// 1 <= Value <= ElementBits for a right shift.
-static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
- int64_t &Cnt) {
+bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
+ int64_t &Cnt) {
assert(Ty.isVector() && "vector shift count is not a vector type");
MachineInstr *MI = MRI.getVRegDef(Reg);
auto Cst = getAArch64VectorSplatScalar(*MI, MRI);
@@ -528,8 +535,8 @@ static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty,
}
/// Match a vector G_ASHR or G_LSHR with a valid immediate shift.
-static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
- int64_t &Imm) {
+bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
+ int64_t &Imm) {
assert(MI.getOpcode() == TargetOpcode::G_ASHR ||
MI.getOpcode() == TargetOpcode::G_LSHR);
LLT Ty = MRI.getType(MI.getOperand(1).getReg());
@@ -538,8 +545,8 @@ static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm);
}
-static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
- int64_t &Imm) {
+void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
+ int64_t &Imm) {
unsigned Opc = MI.getOpcode();
assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR);
unsigned NewOpc =
@@ -548,7 +555,6 @@ static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm);
MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef});
MI.eraseFromParent();
- return true;
}
/// Determine if it is possible to modify the \p RHS and predicate \p P of a
@@ -668,7 +674,7 @@ bool matchAdjustICmpImmAndPred(
return false;
}
-bool applyAdjustICmpImmAndPred(
+void applyAdjustICmpImmAndPred(
MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo,
MachineIRBuilder &MIB, GISelChangeObserver &Observer) {
MIB.setInstrAndDebugLoc(MI);
@@ -680,7 +686,6 @@ bool applyAdjustICmpImmAndPred(
RHS.setReg(Cst->getOperand(0).getReg());
MI.getOperand(1).setPredicate(MatchInfo.second);
Observer.changedInstr(MI);
- return true;
}
bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -735,7 +740,7 @@ bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}
-bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
+void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
Register Src1Reg = MI.getOperand(1).getReg();
@@ -758,10 +763,9 @@ bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI,
}
B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane});
MI.eraseFromParent();
- return true;
}
-static bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
+bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
auto Splat = getAArch64VectorSplat(MI, MRI);
if (!Splat)
@@ -775,19 +779,17 @@ static bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) {
return (Cst != 0 && Cst != -1);
}
-static bool applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B) {
+void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) {
B.setInstrAndDebugLoc(MI);
B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()},
{MI.getOperand(1).getReg()});
MI.eraseFromParent();
- return true;
}
/// \returns how many instructions would be saved by folding a G_ICMP's shift
/// and/or extension operations.
-static unsigned getCmpOperandFoldingProfit(Register CmpOp,
- const MachineRegisterInfo &MRI) {
+unsigned getCmpOperandFoldingProfit(Register CmpOp, MachineRegisterInfo &MRI) {
// No instructions to save if there's more than one use or no uses.
if (!MRI.hasOneNonDBGUse(CmpOp))
return 0;
@@ -841,8 +843,7 @@ static unsigned getCmpOperandFoldingProfit(Register CmpOp,
/// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP
/// instruction \p MI.
-static bool trySwapICmpOperands(MachineInstr &MI,
- const MachineRegisterInfo &MRI) {
+bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_ICMP);
// Swap the operands if it would introduce a profitable folding opportunity.
// (e.g. a shift + extend).
@@ -878,8 +879,7 @@ static bool trySwapICmpOperands(MachineInstr &MI,
getCmpOperandFoldingProfit(TheRHS, MRI));
}
-static bool applySwapICmpOperands(MachineInstr &MI,
- GISelChangeObserver &Observer) {
+void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) {
auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
Register LHS = MI.getOperand(2).getReg();
Register RHS = MI.getOperand(3).getReg();
@@ -888,14 +888,13 @@ static bool applySwapICmpOperands(MachineInstr &MI,
MI.getOperand(2).setReg(RHS);
MI.getOperand(3).setReg(LHS);
Observer.changedInstr(MI);
- return true;
}
/// \returns a function which builds a vector floating point compare instruction
/// for a condition code \p CC.
/// \param [in] IsZero - True if the comparison is against 0.
/// \param [in] NoNans - True if the target has NoNansFPMath.
-static std::function<Register(MachineIRBuilder &)>
+std::function<Register(MachineIRBuilder &)>
getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
bool NoNans, MachineRegisterInfo &MRI) {
LLT DstTy = MRI.getType(LHS);
@@ -950,28 +949,45 @@ getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero,
}
/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
-static bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &MIB) {
+bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIB) {
assert(MI.getOpcode() == TargetOpcode::G_FCMP);
const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
+
Register Dst = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(Dst);
if (!DstTy.isVector() || !ST.hasNEON())
return false;
- const auto Pred =
- static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
Register LHS = MI.getOperand(2).getReg();
- // TODO: Handle v4s16 case.
unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits();
- if (EltSize != 32 && EltSize != 64)
+ if (EltSize == 16 && !ST.hasFullFP16())
return false;
- Register RHS = MI.getOperand(3).getReg();
+ if (EltSize != 16 && EltSize != 32 && EltSize != 64)
+ return false;
+
+ return true;
+}
+
+/// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo.
+void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIB) {
+ assert(MI.getOpcode() == TargetOpcode::G_FCMP);
+ const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>();
+
+ const auto &CmpMI = cast<GFCmp>(MI);
+
+ Register Dst = CmpMI.getReg(0);
+ CmpInst::Predicate Pred = CmpMI.getCond();
+ Register LHS = CmpMI.getLHSReg();
+ Register RHS = CmpMI.getRHSReg();
+
+ LLT DstTy = MRI.getType(Dst);
+
auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI);
// Compares against 0 have special target-specific pseudos.
bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0;
-
bool Invert = false;
AArch64CC::CondCode CC, CC2 = AArch64CC::AL;
if (Pred == CmpInst::Predicate::FCMP_ORD && IsZero) {
@@ -984,10 +1000,12 @@ static bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
} else
changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert);
- bool NoNans = ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
-
// Instead of having an apply function, just build here to simplify things.
MIB.setInstrAndDebugLoc(MI);
+
+ const bool NoNans =
+ ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath;
+
auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI);
Register CmpRes;
if (CC2 == AArch64CC::AL)
@@ -1002,11 +1020,10 @@ static bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI,
CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0);
MRI.replaceRegWith(Dst, CmpRes);
MI.eraseFromParent();
- return false;
}
-static bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
- Register &SrcReg) {
+bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ Register &SrcReg) {
assert(MI.getOpcode() == TargetOpcode::G_STORE);
Register DstReg = MI.getOperand(0).getReg();
if (MRI.getType(DstReg).isVector())
@@ -1018,54 +1035,86 @@ static bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
return MRI.getType(SrcReg).getSizeInBits() <= 64;
}
-static bool applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
- GISelChangeObserver &Observer,
- Register &SrcReg) {
+void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, GISelChangeObserver &Observer,
+ Register &SrcReg) {
assert(MI.getOpcode() == TargetOpcode::G_STORE);
Observer.changingInstr(MI);
MI.getOperand(0).setReg(SrcReg);
Observer.changedInstr(MI);
- return true;
}
// Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to
// form in the first place for combine opportunities, so any remaining ones
// at this stage need be lowered back.
-static bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
+bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
Register DstReg = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(DstReg);
return DstTy.isVector();
}
-static void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
- GISelChangeObserver &Observer) {
+void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, GISelChangeObserver &Observer) {
assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
B.setInstrAndDebugLoc(MI);
LegalizerHelper Helper(*MI.getMF(), Observer, B);
Helper.lower(MI, 0, /* Unused hint type */ LLT());
}
-#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
+class AArch64PostLegalizerLoweringImpl : public GIMatchTableExecutor {
+protected:
+ CombinerHelper &Helper;
+ const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig;
+
+ const AArch64Subtarget &STI;
+ GISelChangeObserver &Observer;
+ MachineIRBuilder &B;
+ MachineFunction &MF;
+
+ MachineRegisterInfo &MRI;
+
+public:
+ AArch64PostLegalizerLoweringImpl(
+ const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
+ const AArch64Subtarget &STI, GISelChangeObserver &Observer,
+ MachineIRBuilder &B, CombinerHelper &Helper);
+
+ static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
+
+ bool tryCombineAll(MachineInstr &I) const;
+
+private:
+#define GET_GICOMBINER_CLASS_MEMBERS
#include "AArch64GenPostLegalizeGILowering.inc"
-#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS
+#undef GET_GICOMBINER_CLASS_MEMBERS
+};
-namespace {
-#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
+#define GET_GICOMBINER_IMPL
+#include "AArch64GenPostLegalizeGILowering.inc"
+#undef GET_GICOMBINER_IMPL
+
+AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl(
+ const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig,
+ const AArch64Subtarget &STI, GISelChangeObserver &Observer,
+ MachineIRBuilder &B, CombinerHelper &Helper)
+ : Helper(Helper), RuleConfig(RuleConfig), STI(STI), Observer(Observer),
+ B(B), MF(B.getMF()), MRI(*B.getMRI()),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AArch64GenPostLegalizeGILowering.inc"
-#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
class AArch64PostLegalizerLoweringInfo : public CombinerInfo {
public:
- AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg;
+ AArch64PostLegalizerLoweringImplRuleConfig RuleConfig;
AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize)
: CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
/*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize,
MinSize) {
- if (!GeneratedRuleCfg.parseCommandLineOption())
+ if (!RuleConfig.parseCommandLineOption())
report_fatal_error("Invalid rule identifier");
}
@@ -1076,15 +1125,12 @@ public:
bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
+ const auto &STI = MI.getMF()->getSubtarget<AArch64Subtarget>();
CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false);
- AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg);
- return Generated.tryCombineAll(Observer, MI, B, Helper);
+ AArch64PostLegalizerLoweringImpl Impl(RuleConfig, STI, Observer, B, Helper);
+ Impl.setupMF(*MI.getMF(), Helper.getKnownBits());
+ return Impl.tryCombineAll(MI);
}
-
-#define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
-#include "AArch64GenPostLegalizeGILowering.inc"
-#undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP
-
class AArch64PostLegalizerLowering : public MachineFunctionPass {
public:
static char ID;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
index 670a16209705..94584e20f5ab 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
@@ -73,10 +73,32 @@ unsigned getNonFlagSettingVariant(unsigned Opc) {
return AArch64::SUBWrr;
case AArch64::SUBSXrs:
return AArch64::SUBXrs;
+ case AArch64::SUBSWrs:
+ return AArch64::SUBWrs;
case AArch64::SUBSXri:
return AArch64::SUBXri;
case AArch64::SUBSWri:
return AArch64::SUBWri;
+ case AArch64::ADDSXrr:
+ return AArch64::ADDXrr;
+ case AArch64::ADDSWrr:
+ return AArch64::ADDWrr;
+ case AArch64::ADDSXrs:
+ return AArch64::ADDXrs;
+ case AArch64::ADDSWrs:
+ return AArch64::ADDWrs;
+ case AArch64::ADDSXri:
+ return AArch64::ADDXri;
+ case AArch64::ADDSWri:
+ return AArch64::ADDWri;
+ case AArch64::SBCSXr:
+ return AArch64::SBCXr;
+ case AArch64::SBCSWr:
+ return AArch64::SBCWr;
+ case AArch64::ADCSXr:
+ return AArch64::ADCXr;
+ case AArch64::ADCSWr:
+ return AArch64::ADCWr;
}
}
@@ -137,6 +159,12 @@ bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) {
}
bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
+ // If we find a dead NZCV implicit-def, we
+ // - try to convert the operation to a non-flag-setting equivalent
+ // - or mark the def as dead to aid later peephole optimizations.
+
+ // Use cases:
+ // 1)
// Consider the following code:
// FCMPSrr %0, %1, implicit-def $nzcv
// %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
@@ -153,8 +181,11 @@ bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
// in between the two FCMPs. In this case, the SUBS defines NZCV
// but it doesn't have any users, being overwritten by the second FCMP.
//
- // Our solution here is to try to convert flag setting operations between
- // a interval of identical FCMPs, so that CSE will be able to eliminate one.
+ // 2)
+ // The instruction selector always emits the flag-setting variant of ADC/SBC
+ // while selecting G_UADDE/G_SADDE/G_USUBE/G_SSUBE. If the carry-out of these
+ // instructions is never used, we can switch to the non-flag-setting variant.
+
bool Changed = false;
auto &MF = *MBB.getParent();
auto &Subtarget = MF.getSubtarget();
@@ -163,52 +194,20 @@ bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
auto RBI = Subtarget.getRegBankInfo();
auto &MRI = MF.getRegInfo();
- // The first step is to find the first and last FCMPs. If we have found
- // at least two, then set the limit of the bottom-up walk to the first FCMP
- // found since we're only interested in dealing with instructions between
- // them.
- MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
- for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
- if (MI.getOpcode() == AArch64::FCMPSrr ||
- MI.getOpcode() == AArch64::FCMPDrr) {
- if (!FirstCmp)
- FirstCmp = &MI;
- else
- LastCmp = &MI;
- }
- }
-
- // In addition to converting flag-setting ops in fcmp ranges into non-flag
- // setting ops, across the whole basic block we also detect when nzcv
- // implicit-defs are dead, and mark them as dead. Peephole optimizations need
- // this information later.
-
LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
LRU.addLiveOuts(MBB);
- bool NZCVDead = LRU.available(AArch64::NZCV);
- bool InsideCmpRange = false;
- for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
- LRU.stepBackward(II);
-
- if (LastCmp) { // There's a range present in this block.
- // If we're inside an fcmp range, look for begin instruction.
- if (InsideCmpRange && &II == FirstCmp)
- InsideCmpRange = false;
- else if (&II == LastCmp)
- InsideCmpRange = true;
- }
- // Did this instruction define NZCV?
- bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
- if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
- // If we have a def and NZCV is dead, then we may convert this op.
+ for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
+ bool NZCVDead = LRU.available(AArch64::NZCV);
+ if (NZCVDead && II.definesRegister(AArch64::NZCV)) {
+ // The instruction defines NZCV, but NZCV is dead.
unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
if (DeadNZCVIdx != -1) {
- // If we're inside an fcmp range, then convert flag setting ops.
- if (InsideCmpRange && NewOpc) {
+ if (NewOpc) {
+ // If there is an equivalent non-flag-setting op, we convert.
LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
- "op in fcmp range: "
+ "op: "
<< II);
II.setDesc(TII->get(NewOpc));
II.removeOperand(DeadNZCVIdx);
@@ -225,8 +224,7 @@ bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
}
}
}
-
- NZCVDead = NZCVDeadAtCurrInstr;
+ LRU.stepBackward(II);
}
return Changed;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 542abd74ecdd..a918e9f36e69 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -17,6 +17,8 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
@@ -28,15 +30,24 @@
#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
+#define GET_GICOMBINER_DEPS
+#include "AArch64GenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_DEPS
+
#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
using namespace llvm;
using namespace MIPatternMatch;
+namespace {
+
+#define GET_GICOMBINER_TYPES
+#include "AArch64GenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_TYPES
+
/// Return true if a G_FCONSTANT instruction is known to be better-represented
/// as a G_CONSTANT.
-static bool matchFConstantToConstant(MachineInstr &MI,
- MachineRegisterInfo &MRI) {
+bool matchFConstantToConstant(MachineInstr &MI, MachineRegisterInfo &MRI) {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
Register DstReg = MI.getOperand(0).getReg();
const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
@@ -51,7 +62,7 @@ static bool matchFConstantToConstant(MachineInstr &MI,
}
/// Change a G_FCONSTANT into a G_CONSTANT.
-static void applyFConstantToConstant(MachineInstr &MI) {
+void applyFConstantToConstant(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT);
MachineIRBuilder MIB(MI);
const APFloat &ImmValAPF = MI.getOperand(1).getFPImm()->getValueAPF();
@@ -62,8 +73,8 @@ static void applyFConstantToConstant(MachineInstr &MI) {
/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
/// are sign bits. In this case, we can transform the G_ICMP to directly compare
/// the wide value with a zero.
-static bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
- GISelKnownBits *KB, Register &MatchInfo) {
+bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+ GISelKnownBits *KB, Register &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_ICMP && KB);
auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
@@ -91,10 +102,9 @@ static bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}
-static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &Builder,
- GISelChangeObserver &Observer,
- Register &WideReg) {
+void applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &Builder,
+ GISelChangeObserver &Observer, Register &WideReg) {
assert(MI.getOpcode() == TargetOpcode::G_ICMP);
LLT WideTy = MRI.getType(WideReg);
@@ -106,7 +116,6 @@ static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
MI.getOperand(2).setReg(WideReg);
MI.getOperand(3).setReg(WideZero.getReg(0));
Observer.changedInstr(MI);
- return true;
}
/// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
@@ -114,8 +123,8 @@ static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
/// e.g.
///
/// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
-static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
- std::pair<uint64_t, uint64_t> &MatchInfo) {
+bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::pair<uint64_t, uint64_t> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
MachineFunction &MF = *MI.getMF();
auto &GlobalOp = MI.getOperand(1);
@@ -181,10 +190,9 @@ static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
return true;
}
-static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B,
- GISelChangeObserver &Observer,
- std::pair<uint64_t, uint64_t> &MatchInfo) {
+void applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B, GISelChangeObserver &Observer,
+ std::pair<uint64_t, uint64_t> &MatchInfo) {
// Change:
//
// %g = G_GLOBAL_VALUE @x
@@ -219,12 +227,10 @@ static bool applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
B.buildPtrAdd(
Dst, NewGVDst,
B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
- return true;
}
-static bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
- CombinerHelper &Helper,
- GISelChangeObserver &Observer) {
+bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
+ CombinerHelper &Helper, GISelChangeObserver &Observer) {
// Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
// result is only used in the no-overflow case. It is restricted to cases
// where we know that the high-bits of the operands are 0. If there's an
@@ -337,28 +343,54 @@ static bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
return true;
}
-class AArch64PreLegalizerCombinerHelperState {
+class AArch64PreLegalizerCombinerImpl : public GIMatchTableExecutor {
protected:
CombinerHelper &Helper;
+ const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig;
+
+ const AArch64Subtarget &STI;
+ GISelChangeObserver &Observer;
+ MachineIRBuilder &B;
+ MachineFunction &MF;
+
+ MachineRegisterInfo &MRI;
public:
- AArch64PreLegalizerCombinerHelperState(CombinerHelper &Helper)
- : Helper(Helper) {}
-};
+ AArch64PreLegalizerCombinerImpl(
+ const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
+ const AArch64Subtarget &STI, GISelChangeObserver &Observer,
+ MachineIRBuilder &B, CombinerHelper &Helper);
-#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+ static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
+
+ bool tryCombineAll(MachineInstr &I) const;
+
+private:
+#define GET_GICOMBINER_CLASS_MEMBERS
#include "AArch64GenPreLegalizeGICombiner.inc"
-#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+#undef GET_GICOMBINER_CLASS_MEMBERS
+};
-namespace {
-#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#define GET_GICOMBINER_IMPL
+#include "AArch64GenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_IMPL
+
+AArch64PreLegalizerCombinerImpl::AArch64PreLegalizerCombinerImpl(
+ const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
+ const AArch64Subtarget &STI, GISelChangeObserver &Observer,
+ MachineIRBuilder &B, CombinerHelper &Helper)
+ : Helper(Helper), RuleConfig(RuleConfig), STI(STI), Observer(Observer),
+ B(B), MF(B.getMF()), MRI(*B.getMRI()),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AArch64GenPreLegalizeGICombiner.inc"
-#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
class AArch64PreLegalizerCombinerInfo : public CombinerInfo {
GISelKnownBits *KB;
MachineDominatorTree *MDT;
- AArch64GenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
+ AArch64PreLegalizerCombinerImplRuleConfig RuleConfig;
public:
AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
@@ -366,7 +398,7 @@ public:
: CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
/*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
KB(KB), MDT(MDT) {
- if (!GeneratedRuleCfg.parseCommandLineOption())
+ if (!RuleConfig.parseCommandLineOption())
report_fatal_error("Invalid rule identifier");
}
@@ -377,11 +409,13 @@ public:
bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
- const auto *LI = MI.getMF()->getSubtarget().getLegalizerInfo();
+ const auto &STI = MI.getMF()->getSubtarget<AArch64Subtarget>();
+ const auto *LI = STI.getLegalizerInfo();
CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ true, KB, MDT, LI);
- AArch64GenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper);
+ AArch64PreLegalizerCombinerImpl Impl(RuleConfig, STI, Observer, B, Helper);
+ Impl.setupMF(*MI.getMF(), KB);
- if (Generated.tryCombineAll(Observer, MI, B))
+ if (Impl.tryCombineAll(MI))
return true;
unsigned Opc = MI.getOpcode();
@@ -412,10 +446,6 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return false;
}
-#define AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
-#include "AArch64GenPreLegalizeGICombiner.inc"
-#undef AARCH64PRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
-
// Pass boilerplate
// ================
@@ -425,7 +455,9 @@ public:
AArch64PreLegalizerCombiner();
- StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; }
+ StringRef getPassName() const override {
+ return "AArch64PreLegalizerCombiner";
+ }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -484,7 +516,6 @@ INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE,
"Combine AArch64 machine instrs before legalization", false,
false)
-
namespace llvm {
FunctionPass *createAArch64PreLegalizerCombiner() {
return new AArch64PreLegalizerCombiner();
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index d6d5c6047747..0314a3b65ebd 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -18,7 +18,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -71,7 +71,8 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(
// GR64all + its subclasses.
assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
"Subclass not added?");
- assert(RBGPR.getSize() == 128 && "GPRs should hold up to 128-bit");
+ assert(getMaximumSize(RBGPR.getID()) == 128 &&
+ "GPRs should hold up to 128-bit");
// The FPR register bank is fully defined by all the registers in
// GR64all + its subclasses.
@@ -79,12 +80,13 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(
"Subclass not added?");
assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
"Subclass not added?");
- assert(RBFPR.getSize() == 512 &&
+ assert(getMaximumSize(RBFPR.getID()) == 512 &&
"FPRs should hold up to 512-bit via QQQQ sequence");
assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
"Class not added?");
- assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit");
+ assert(getMaximumSize(RBCCR.getID()) == 32 &&
+ "CCR should hold up to 32-bit");
// Check that the TableGen'ed like file is in sync we our expectations.
// First, the Idx.
@@ -481,14 +483,35 @@ AArch64RegisterBankInfo::getSameKindOfOperandsMapping(
getValueMapping(RBIdx, Size), NumOperands);
}
-/// \returns true if a given intrinsic \p ID only uses and defines FPRs.
-static bool isFPIntrinsic(unsigned ID) {
+/// \returns true if a given intrinsic only uses and defines FPRs.
+static bool isFPIntrinsic(const MachineRegisterInfo &MRI,
+ const MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC);
// TODO: Add more intrinsics.
- switch (ID) {
+ switch (MI.getIntrinsicID()) {
default:
return false;
case Intrinsic::aarch64_neon_uaddlv:
+ case Intrinsic::aarch64_neon_uaddv:
+ case Intrinsic::aarch64_neon_umaxv:
+ case Intrinsic::aarch64_neon_uminv:
+ case Intrinsic::aarch64_neon_fmaxv:
+ case Intrinsic::aarch64_neon_fminv:
+ case Intrinsic::aarch64_neon_fmaxnmv:
+ case Intrinsic::aarch64_neon_fminnmv:
return true;
+ case Intrinsic::aarch64_neon_saddlv: {
+ const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
+ return SrcTy.getElementType().getSizeInBits() >= 16 &&
+ SrcTy.getElementCount().getFixedValue() >= 4;
+ }
+ case Intrinsic::aarch64_neon_saddv:
+ case Intrinsic::aarch64_neon_smaxv:
+ case Intrinsic::aarch64_neon_sminv: {
+ const LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
+ return SrcTy.getElementType().getSizeInBits() >= 32 &&
+ SrcTy.getElementCount().getFixedValue() >= 2;
+ }
}
}
@@ -497,7 +520,7 @@ bool AArch64RegisterBankInfo::hasFPConstraints(const MachineInstr &MI,
const TargetRegisterInfo &TRI,
unsigned Depth) const {
unsigned Op = MI.getOpcode();
- if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MI.getIntrinsicID()))
+ if (Op == TargetOpcode::G_INTRINSIC && isFPIntrinsic(MRI, MI))
return true;
// Do we have an explicit floating point instruction?
@@ -753,7 +776,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
*AArch64GenRegisterBankInfo::PartMappings[OpRegBankIdx[1]].RegBank,
OpSize[0]);
break;
- case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_LOAD: {
// Loading in vector unit is slightly more expensive.
// This is actually only true for the LD1R and co instructions,
// but anyway for the fast mode this number does not matter and
@@ -771,6 +794,33 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
+ // Try to guess the type of the load from the MMO.
+ const auto &MMO = **MI.memoperands_begin();
+ const Value *LdVal = MMO.getValue();
+ if (LdVal) {
+ Type *EltTy = nullptr;
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(LdVal)) {
+ EltTy = GV->getValueType();
+ } else {
+ // FIXME: grubbing around uses is pretty ugly, but with no more
+ // `getPointerElementType` there's not much else we can do.
+ for (const auto *LdUser : LdVal->users()) {
+ if (isa<LoadInst>(LdUser)) {
+ EltTy = LdUser->getType();
+ break;
+ }
+ if (isa<StoreInst>(LdUser) && LdUser->getOperand(1) == LdVal) {
+ EltTy = LdUser->getOperand(0)->getType();
+ break;
+ }
+ }
+ }
+ if (EltTy && EltTy->isFPOrFPVectorTy()) {
+ OpRegBankIdx[0] = PMI_FirstFPR;
+ break;
+ }
+ }
+
// Check if that load feeds fp instructions.
// In that case, we want the default mapping to be on FPR
// instead of blind map every scalar to GPR.
@@ -788,6 +838,7 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
}))
OpRegBankIdx[0] = PMI_FirstFPR;
break;
+ }
case TargetOpcode::G_STORE:
// Check if that store is fed by fp instructions.
if (OpRegBankIdx[0] == PMI_FirstGPR) {
@@ -968,9 +1019,8 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_INTRINSIC: {
// Check if we know that the intrinsic has any constraints on its register
// banks. If it does, then update the mapping accordingly.
- unsigned ID = MI.getIntrinsicID();
unsigned Idx = 0;
- if (!isFPIntrinsic(ID))
+ if (!isFPIntrinsic(MRI, MI))
break;
for (const auto &Op : MI.explicit_operands()) {
if (Op.isReg())
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
index 71862e85b49c..33c08bfc6de6 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AddressingModes.h
@@ -237,17 +237,17 @@ static inline bool processLogicalImmediate(uint64_t Imm, unsigned RegSize,
Imm &= Mask;
if (isShiftedMask_64(Imm)) {
- I = countTrailingZeros(Imm);
+ I = llvm::countr_zero(Imm);
assert(I < 64 && "undefined behavior");
- CTO = countTrailingOnes(Imm >> I);
+ CTO = llvm::countr_one(Imm >> I);
} else {
Imm |= ~Mask;
if (!isShiftedMask_64(~Imm))
return false;
- unsigned CLO = countLeadingOnes(Imm);
+ unsigned CLO = llvm::countl_one(Imm);
I = 64 - CLO;
- CTO = CLO + countTrailingOnes(Imm) - (64 - Size);
+ CTO = CLO + llvm::countr_one(Imm) - (64 - Size);
}
// Encode in Immr the number of RORs it would take to get *from* 0^m 1^n
@@ -298,7 +298,7 @@ static inline uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize) {
unsigned imms = val & 0x3f;
assert((regSize == 64 || N == 0) && "undefined logical immediate encoding");
- int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f));
+ int len = 31 - llvm::countl_zero((N << 6) | (~imms & 0x3f));
assert(len >= 0 && "undefined logical immediate encoding");
unsigned size = (1 << len);
unsigned R = immr & (size - 1);
@@ -327,7 +327,7 @@ static inline bool isValidDecodeLogicalImmediate(uint64_t val,
if (regSize == 32 && N != 0) // undefined logical immediate encoding
return false;
- int len = 31 - countLeadingZeros((N << 6) | (~imms & 0x3f));
+ int len = 31 - llvm::countl_zero((N << 6) | (~imms & 0x3f));
if (len < 0) // undefined logical immediate encoding
return false;
unsigned size = (1 << len);
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 8eaf528d9ea5..a7933c00f507 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -10,7 +10,6 @@
#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "Utils/AArch64BaseInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
@@ -27,6 +26,8 @@
#include "llvm/MC/MCValue.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
namespace {
@@ -155,7 +156,7 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
default:
llvm_unreachable("Unknown fixup kind!");
case AArch64::fixup_aarch64_pcrel_adr_imm21:
- if (SignedValue > 2097151 || SignedValue < -2097152)
+ if (!isInt<21>(SignedValue))
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
return AdrImmBits(Value & 0x1fffffULL);
case AArch64::fixup_aarch64_pcrel_adrp_imm21:
@@ -168,8 +169,8 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, const MCValue &Target,
return AdrImmBits((Value & 0x1fffff000ULL) >> 12);
case AArch64::fixup_aarch64_ldr_pcrel_imm19:
case AArch64::fixup_aarch64_pcrel_branch19:
- // Signed 21-bit immediate
- if (SignedValue > 2097151 || SignedValue < -2097152)
+ // Signed 19-bit immediate which gets multiplied by 4
+ if (!isInt<21>(SignedValue))
Ctx.reportError(Fixup.getLoc(), "fixup value out of range");
if (Value & 0x3)
Ctx.reportError(Fixup.getLoc(), "fixup not sufficiently aligned");
@@ -564,10 +565,14 @@ public:
}
/// Generate the compact unwind encoding from the CFI directives.
- uint32_t generateCompactUnwindEncoding(
- ArrayRef<MCCFIInstruction> Instrs) const override {
+ uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
+ const MCContext *Ctxt) const override {
+ ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
if (Instrs.empty())
return CU::UNWIND_ARM64_MODE_FRAMELESS;
+ if (!isDarwinCanonicalPersonality(FI->Personality) &&
+ !Ctxt->emitCompactUnwindNonCanonical())
+ return CU::UNWIND_ARM64_MODE_DWARF;
bool HasFP = false;
unsigned StackSize = 0;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
index e4003a6c1f7b..6a5f1430643d 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp
@@ -18,7 +18,6 @@
#include "AArch64WinCOFFStreamer.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmBackend.h"
@@ -37,6 +36,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 6ff5459fe026..2983e9a9be92 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -213,8 +213,7 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
int ImmS = MI->getOperand(4).getImm();
if ((Op2.getReg() == AArch64::WZR || Op2.getReg() == AArch64::XZR) &&
- (ImmR == 0 || ImmS < ImmR) &&
- STI.getFeatureBits()[AArch64::HasV8_2aOps]) {
+ (ImmR == 0 || ImmS < ImmR) && STI.hasFeature(AArch64::HasV8_2aOps)) {
// BFC takes precedence over its entire range, sligtly differently to BFI.
int BitWidth = Opcode == AArch64::BFMXri ? 64 : 32;
int LSB = (BitWidth - ImmR) % BitWidth;
@@ -283,6 +282,23 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
return;
}
+ auto PrintMovImm = [&](uint64_t Value, int RegWidth) {
+ int64_t SExtVal = SignExtend64(Value, RegWidth);
+ O << "\tmov\t";
+ printRegName(O, MI->getOperand(0).getReg());
+ O << ", " << markup("<imm:") << "#"
+ << formatImm(SExtVal) << markup(">");
+ if (CommentStream) {
+ // Do the opposite to that used for instruction operands.
+ if (getPrintImmHex())
+ *CommentStream << '=' << formatDec(SExtVal) << '\n';
+ else {
+ uint64_t Mask = maskTrailingOnes<uint64_t>(RegWidth);
+ *CommentStream << '=' << formatHex(SExtVal & Mask) << '\n';
+ }
+ }
+ };
+
// MOVZ, MOVN and "ORR wzr, #imm" instructions are aliases for MOV, but their
// domains overlap so they need to be prioritized. The chain is "MOVZ lsl #0 >
// MOVZ lsl #N > MOVN lsl #0 > MOVN lsl #N > ORR". The highest instruction
@@ -296,10 +312,7 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
if (AArch64_AM::isMOVZMovAlias(Value, Shift,
Opcode == AArch64::MOVZXi ? 64 : 32)) {
- O << "\tmov\t";
- printRegName(O, MI->getOperand(0).getReg());
- O << ", " << markup("<imm:") << "#"
- << formatImm(SignExtend64(Value, RegWidth)) << markup(">");
+ PrintMovImm(Value, RegWidth);
return;
}
}
@@ -313,10 +326,7 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
Value = Value & 0xffffffff;
if (AArch64_AM::isMOVNMovAlias(Value, Shift, RegWidth)) {
- O << "\tmov\t";
- printRegName(O, MI->getOperand(0).getReg());
- O << ", " << markup("<imm:") << "#"
- << formatImm(SignExtend64(Value, RegWidth)) << markup(">");
+ PrintMovImm(Value, RegWidth);
return;
}
}
@@ -329,10 +339,7 @@ void AArch64InstPrinter::printInst(const MCInst *MI, uint64_t Address,
uint64_t Value = AArch64_AM::decodeLogicalImmediate(
MI->getOperand(2).getImm(), RegWidth);
if (!AArch64_AM::isAnyMOVWMovAlias(Value, RegWidth)) {
- O << "\tmov\t";
- printRegName(O, MI->getOperand(0).getReg());
- O << ", " << markup("<imm:") << "#"
- << formatImm(SignExtend64(Value, RegWidth)) << markup(">");
+ PrintMovImm(Value, RegWidth);
return;
}
}
@@ -1773,19 +1780,23 @@ void AArch64InstPrinter::printAlignedLabel(const MCInst *MI, uint64_t Address,
}
}
-void AArch64InstPrinter::printAdrpLabel(const MCInst *MI, uint64_t Address,
- unsigned OpNum,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
+void AArch64InstPrinter::printAdrAdrpLabel(const MCInst *MI, uint64_t Address,
+ unsigned OpNum,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNum);
// If the label has already been resolved to an immediate offset (say, when
// we're running the disassembler), just print the immediate.
if (Op.isImm()) {
- const int64_t Offset = Op.getImm() * 4096;
+ int64_t Offset = Op.getImm();
+ if (MI->getOpcode() == AArch64::ADRP) {
+ Offset = Offset * 4096;
+ Address = Address & -4096;
+ }
O << markup("<imm:");
if (PrintBranchImmAsAddress)
- O << formatHex((Address & -4096) + Offset);
+ O << formatHex(Address + Offset);
else
O << "#" << Offset;
O << markup(">");
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
index 1baf7e42c35c..fcaa57402bc2 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h
@@ -174,8 +174,8 @@ protected:
const MCSubtargetInfo &STI, raw_ostream &O);
void printMatrixIndex(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum,
- const MCSubtargetInfo &STI, raw_ostream &O);
+ void printAdrAdrpLabel(const MCInst *MI, uint64_t Address, unsigned OpNum,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printBarrierOption(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI, raw_ostream &O);
void printBarriernXSOption(const MCInst *MI, unsigned OpNum,
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index 68c721cb0d72..62eac059371e 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -11,11 +11,11 @@
//===----------------------------------------------------------------------===//
#include "AArch64MCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
enum AsmWriterVariantTy {
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index e482ab731012..2dbbab13e8f3 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -171,7 +171,7 @@ public:
unsigned fixMOVZ(const MCInst &MI, unsigned EncodedValue,
const MCSubtargetInfo &STI) const;
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -661,7 +661,9 @@ unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
return EncodedValue;
}
-void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
+
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
if (MI.getOpcode() == AArch64::TLSDESCCALL) {
@@ -683,7 +685,7 @@ void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
}
uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
- support::endian::write<uint32_t>(OS, Binary, support::little);
+ support::endian::write<uint32_t>(CB, Binary, support::little);
++MCNumEmitted; // Keep track of the # of mi's emitted.
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
index d3e834a140b2..f8938cdbbec3 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h
@@ -170,8 +170,6 @@ public:
static bool classof(const MCExpr *E) {
return E->getKind() == MCExpr::Target;
}
-
- static bool classof(const AArch64MCExpr *) { return true; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 8b61ebcfea0e..043f0a03b797 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -300,6 +300,13 @@ void AArch64_MC::initLLVMToCVRegMapping(MCRegisterInfo *MRI) {
MRI->mapLLVMRegToCVReg(I.Reg, static_cast<int>(I.CVReg));
}
+bool AArch64_MC::isHForm(const MCInst &MI, const MCInstrInfo *MCII) {
+ const auto &FPR16 = AArch64MCRegisterClasses[AArch64::FPR16RegClassID];
+ return llvm::any_of(MI, [&](const MCOperand &Op) {
+ return Op.isReg() && FPR16.contains(Op.getReg());
+ });
+}
+
bool AArch64_MC::isQForm(const MCInst &MI, const MCInstrInfo *MCII) {
const auto &FPR128 = AArch64MCRegisterClasses[AArch64::FPR128RegClassID];
return llvm::any_of(MI, [&](const MCOperand &Op) {
@@ -413,7 +420,9 @@ public:
for (unsigned i = 0, e = Inst.getNumOperands(); i != e; i++) {
if (Desc.operands()[i].OperandType == MCOI::OPERAND_PCREL) {
int64_t Imm = Inst.getOperand(i).getImm();
- if (Inst.getOpcode() == AArch64::ADRP)
+ if (Inst.getOpcode() == AArch64::ADR)
+ Target = Addr + Imm;
+ else if (Inst.getOpcode() == AArch64::ADRP)
Target = (Addr & -4096) + Imm * 4096;
else
Target = Addr + Imm * 4;
@@ -425,7 +434,6 @@ public:
std::vector<std::pair<uint64_t, uint64_t>>
findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
- uint64_t GotPltSectionVA,
const Triple &TargetTriple) const override {
// Do a lightweight parsing of PLT entries.
std::vector<std::pair<uint64_t, uint64_t>> Result;
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 7daf08499945..7b4f102840aa 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -63,6 +63,7 @@ MCTargetStreamer *createAArch64AsmTargetStreamer(MCStreamer &S,
namespace AArch64_MC {
void initLLVMToCVRegMapping(MCRegisterInfo *MRI);
+bool isHForm(const MCInst &MI, const MCInstrInfo *MCII);
bool isQForm(const MCInst &MI, const MCInstrInfo *MCII);
bool isFpOrNEON(const MCInst &MI, const MCInstrInfo *MCII);
}
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 012661edbbfd..04bd85260c56 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -139,13 +139,11 @@ static bool canUseLocalRelocation(const MCSectionMachO &Section,
return false;
if (RefSec.getSegmentName() == "__DATA" &&
- RefSec.getName() == "__objc_classrefs")
+ (RefSec.getName() == "__cfstring" ||
+ RefSec.getName() == "__objc_classrefs"))
return false;
- // FIXME: ld64 currently handles internal pointer-sized relocations
- // incorrectly (applying the addend twice). We should be able to return true
- // unconditionally by this point when that's fixed.
- return false;
+ return true;
}
void AArch64MachObjectWriter::recordRelocation(
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 22635a3791c2..6e3aadd5dd8c 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -24,6 +24,12 @@ def tileslice128 : ComplexPattern<i32 , 2, "SelectSMETileSlice<0, 1>", []>; //
def tileslicerange3s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<14, 2>", []>;
def tileslicerange2s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<6, 2>", []>;
+def tileslicerange1s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<2, 2>", []>;
+def tileslicerange0s2 : ComplexPattern<i32, 2, "SelectSMETileSlice<0, 2>", []>;
+
+def tileslicerange2s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<12, 4>", []>;
+def tileslicerange1s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<4, 4>", []>;
+def tileslicerange0s4 : ComplexPattern<i32, 2, "SelectSMETileSlice<0, 4>", []>;
def am_sme_indexed_b4 :ComplexPattern<iPTR, 2, "SelectAddrModeIndexedSVE<0,15>", [], [SDNPWantRoot]>;
@@ -77,6 +83,20 @@ class sme2_za_array_2op_multi_index_pseudo<string name, Operand index_ty, Regist
let usesCustomInserter = 1;
}
+class sme2_move_to_za_pseudo<string name, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag>
+ : SMEPseudo2Instr<name, 0>,
+ Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> {
+ let SMEMatrixType = za_flag;
+ let usesCustomInserter = 1;
+}
+
+class sme2_move_to_tile_pseudo<string name, Operand tile_imm, Operand imm_ty, RegisterOperand multi_vector_ty, SMEMatrixTypeEnum za_flag>
+ : SMEPseudo2Instr<name, 0>,
+ Pseudo<(outs), (ins tile_imm:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> {
+ let SMEMatrixType = za_flag;
+ let usesCustomInserter = 1;
+}
+
//===----------------------------------------------------------------------===//
// SME pattern match helpers.
//===----------------------------------------------------------------------===//
@@ -133,10 +153,43 @@ class SME2_ZA_TwoOp_VG4_Multi_Index_Pat<string name, SDPatternOperator intrinsic
(REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3),
zpr_ty:$Zm, imm_ty:$i)>;
+class SME2_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
+ : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))),
+ (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>;
+
+class SME2_Sat_Shift_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
+ : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4, (i32 imm_ty:$i))),
+ (!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3),
+ imm_ty:$i)>;
+
class SME2_Cvt_VG4_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt>
: Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4)),
(!cast<Instruction>(name) (REG_SEQUENCE ZPR4Mul4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3))>;
+class SME2_ZA_VG1x2_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
+ : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
+ (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
+
+class SME2_ZA_VG1x4_Multi_Pat<string name, SDPatternOperator intrinsic, ValueType vt, Operand index_ty, ComplexPattern tileslice>
+ : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
+ (!cast<Instruction>(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
+
+class SME2_Tile_VG2_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
+ : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2),
+ (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>;
+
+class SME2_Tile_VG4_Multi_Pat<string name, SDPatternOperator intrinsic, Operand tile_imm, ValueType vt, Operand index_ty, ComplexPattern tileslice>
+ : Pat<(intrinsic tile_imm:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4),
+ (!cast<Instruction>(name # _PSEUDO) $tile, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>;
+
+//===----------------------------------------------------------------------===//
+// SME pattern match helpers.
+//===----------------------------------------------------------------------===//
+
+class SME_ZA_Tile_TwoPred_TwoVec_Pat<string name, SDPatternOperator intrinsic, Operand imm_ty, ValueType pg_ty, ValueType vt>
+ : Pat<(intrinsic imm_ty:$tile, (pg_ty PPR3bAny:$Pn), (pg_ty PPR3bAny:$Pm), vt:$Zn, vt:$Zm),
+ (!cast<Instruction>(name # _PSEUDO) $tile, $Pn, $Pm, $Zn, $Zm)>;
+
//===----------------------------------------------------------------------===//
// SME Outer Products
//===----------------------------------------------------------------------===//
@@ -175,9 +228,7 @@ multiclass sme_outer_product_fp32<bit S, string mnemonic, SDPatternOperator op>
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
- def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm),
- (nxv4f32 ZPR32:$zn), (nxv4f32 ZPR32:$zm)),
- (!cast<Instruction>(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>;
+ def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv4i1, nxv4f32>;
}
multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op> {
@@ -188,9 +239,7 @@ multiclass sme_outer_product_fp64<bit S, string mnemonic, SDPatternOperator op>
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR64, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
- def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm),
- (nxv2f64 ZPR64:$zn), (nxv2f64 ZPR64:$zm)),
- (!cast<Instruction>(NAME # _PSEUDO) timm32_0_7:$tile, $pn, $pm, $zn, $zm)>;
+ def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv2i1, nxv2f64>;
}
multiclass sme2p1_fmop_tile_fp16<string mnemonic, bit bf, bit s>{
@@ -239,9 +288,7 @@ multiclass sme_int_outer_product_i32<bits<3> opc, string mnemonic,
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR8, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
- def : Pat<(op timm32_0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm),
- (nxv16i8 ZPR8:$zn), (nxv16i8 ZPR8:$zm)),
- (!cast<Instruction>(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>;
+ def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv16i1, nxv16i8>;
}
multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
@@ -254,9 +301,7 @@ multiclass sme_int_outer_product_i64<bits<3> opc, string mnemonic,
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileD>, SMEPseudo2Instr<NAME, 0>;
- def : Pat<(op timm32_0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
- (nxv8i16 ZPR16:$zn), (nxv8i16 ZPR16:$zm)),
- (!cast<Instruction>(NAME # _PSEUDO) timm32_0_7:$tile, $pn, $pm, $zn, $zm)>;
+ def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_7, nxv8i1, nxv8i16>;
}
class sme_outer_product_widening_inst<bits<3> opc, ZPRRegOp zpr_ty, string mnemonic>
@@ -291,9 +336,7 @@ multiclass sme_bf16_outer_product<bits<3> opc, string mnemonic, SDPatternOperato
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
- def : Pat<(op timm32_0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
- (nxv8bf16 ZPR16:$zn), (nxv8bf16 ZPR16:$zm)),
- (!cast<Instruction>(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>;
+ def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8bf16>;
}
multiclass sme_f16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator op> {
@@ -301,9 +344,7 @@ multiclass sme_f16_outer_product<bits<3> opc, string mnemonic, SDPatternOperator
def NAME # _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
- def : Pat<(op timm32_0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm),
- (nxv8f16 ZPR16:$zn), (nxv8f16 ZPR16:$zm)),
- (!cast<Instruction>(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>;
+ def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, op, timm32_0_3, nxv8i1, nxv8f16>;
}
//===----------------------------------------------------------------------===//
@@ -719,12 +760,6 @@ multiclass sme_spill<string opcodestr> {
// base
def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
(!cast<Instruction>(NAME) ZA, $idx, 0, $base, 0)>;
- // scalar + immediate (mul vl)
- let AddedComplexity = 2 in {
- def : Pat<(int_aarch64_sme_str MatrixIndexGPR32Op12_15:$idx,
- (am_sme_indexed_b4 GPR64sp:$base, imm0_15:$imm4)),
- (!cast<Instruction>(NAME) ZA, $idx, 0, $base, $imm4)>;
- }
}
multiclass sme_fill<string opcodestr> {
@@ -744,12 +779,6 @@ multiclass sme_fill<string opcodestr> {
// base
def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx, GPR64sp:$base),
(!cast<Instruction>(NAME # _PSEUDO) $idx, 0, $base)>;
- // scalar + immediate (mul vl)
- let AddedComplexity = 2 in {
- def : Pat<(int_aarch64_sme_ldr MatrixIndexGPR32Op12_15:$idx,
- (am_sme_indexed_b4 GPR64sp:$base, imm0_15:$imm4)),
- (!cast<Instruction>(NAME # _PSEUDO) $idx, $imm4, $base)>;
- }
}
//===----------------------------------------------------------------------===//
@@ -796,15 +825,10 @@ multiclass sme_vector_to_tile_patterns<Instruction inst, ValueType zpr_vt,
Operand offset_ty,
SDPatternOperator op,
ComplexPattern tileslice> {
- def : Pat<(op imm_ty:$tile, MatrixIndexGPR32Op12_15:$idx,
+ def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
+ offset_ty:$imm)),
(ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
- (inst imm_ty:$tile, $idx, 0, $pg, $zn)>;
- let AddedComplexity = 1 in {
- def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx,
- offset_ty:$imm)),
- (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)),
- (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>;
- }
+ (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>;
}
class sme_mova_insert_pseudo<SMEMatrixTypeEnum za_flag>
@@ -1285,30 +1309,30 @@ multiclass sve2_int_perm_sel_p<string asm, SDPatternOperator op> {
(!cast<Instruction>(NAME # _D) PNRAny:$Pd,
PNRAny:$Pn, PPR64:$Pm, MatrixIndexGPR32Op12_15:$Rv, sme_elm_idx0_1:$imm), 0>;
- def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm),
+ def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm),
MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, 0)>;
- def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm),
+ def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm),
MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, 0)>;
- def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm),
+ def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm),
MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, 0)>;
- def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm),
+ def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm),
MatrixIndexGPR32Op12_15:$idx)),
(!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, 0)>;
let AddedComplexity = 1 in {
- def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPRAny:$Pm),
+ def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv16i1 PPR8:$Pm),
(i32 (tileslice8 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_15:$imm)))),
(!cast<Instruction>(NAME # _B) $Pn, $Pm, $idx, $imm)>;
- def : Pat<(nxv8i1 (op (nxv8i1 PPRAny:$Pn), (nxv8i1 PPRAny:$Pm),
+ def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv8i1 PPR16:$Pm),
(i32 (tileslice16 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_7:$imm)))),
(!cast<Instruction>(NAME # _H) $Pn, $Pm, $idx, $imm)>;
- def : Pat<(nxv4i1 (op (nxv4i1 PPRAny:$Pn), (nxv4i1 PPRAny:$Pm),
+ def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv4i1 PPR32:$Pm),
(i32 (tileslice32 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_3:$imm)))),
(!cast<Instruction>(NAME # _S) $Pn, $Pm, $idx, $imm)>;
- def : Pat<(nxv2i1 (op (nxv2i1 PPRAny:$Pn), (nxv2i1 PPRAny:$Pm),
+ def : Pat<(nxv16i1 (op (nxv16i1 PPRAny:$Pn), (nxv2i1 PPR64:$Pm),
(i32 (tileslice64 MatrixIndexGPR32Op12_15:$idx, sme_elm_idx0_1:$imm)))),
(!cast<Instruction>(NAME # _D) $Pn, $Pm, $idx, $imm)>;
}
@@ -1510,11 +1534,16 @@ class sme2_multivec_accum_add_sub_vg2<string mnemonic, bit sz, bits<3> op,
multiclass sme2_multivec_accum_add_sub_vg2<string mnemonic, bits<4> op,
MatrixOperand matrix_ty,
- RegisterOperand vector_ty> {
- def NAME : sme2_multivec_accum_add_sub_vg2<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>;
-
+ RegisterOperand vector_ty,
+ ValueType vty,
+ SDPatternOperator intrinsic> {
+ def NAME : sme2_multivec_accum_add_sub_vg2<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>,
+ SMEPseudo2Instr<NAME, 1>;
def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm",
(!cast<Instruction>(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>;
+
+ def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>;
+ def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>;
}
class sme2_multivec_accum_add_sub_vg4<string mnemonic, bit sz, bits<3> op,
@@ -1528,11 +1557,16 @@ class sme2_multivec_accum_add_sub_vg4<string mnemonic, bit sz, bits<3> op,
multiclass sme2_multivec_accum_add_sub_vg4<string mnemonic, bits<4> op,
MatrixOperand matrix_ty,
- RegisterOperand vector_ty> {
- def NAME : sme2_multivec_accum_add_sub_vg4<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>;
-
+ RegisterOperand vector_ty,
+ ValueType vty,
+ SDPatternOperator intrinsic> {
+ def NAME : sme2_multivec_accum_add_sub_vg4<mnemonic, op{3}, op{2-0}, matrix_ty, vector_ty>,
+ SMEPseudo2Instr<NAME, 1>;
def : InstAlias<mnemonic # "\t$ZAdn[$Rv, $imm3], $Zm",
(!cast<Instruction>(NAME) matrix_ty:$ZAdn, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, vector_ty:$Zm), 0>;
+
+ def _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, vector_ty, SMEMatrixArray>;
+ def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, vty, sme_elm_idx0_7, tileslice16>;
}
//===----------------------------------------------------------------------===//
@@ -2493,7 +2527,7 @@ multiclass sme2_multi_vec_array_vg4_index_64b<string mnemonic, bits<3> op,
// SME2 multi-vec indexed long long MLA one source 32-bit
class sme2_mla_ll_array_index_32b<string mnemonic, bits<3> op>
: I<(outs MatrixOp32:$ZAda),
- (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB:$i),
+ (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
"", []>, Sched<[]> {
bits<4> Zm;
@@ -2513,11 +2547,19 @@ class sme2_mla_ll_array_index_32b<string mnemonic, bits<3> op>
let Constraints = "$ZAda = $_ZAda";
}
+multiclass sme2_mla_ll_array_index_32b<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
+ def NAME : sme2_mla_ll_array_index_32b<mnemonic, op>, SMEPseudo2Instr<NAME, 1>;
+
+ def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s4range, ZPR8, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
+
+ def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm2s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange2s4>;
+}
+
// SME2 multi-vec indexed long long MLA one source 64-bit
class sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op>
: I<(outs MatrixOp64:$ZAda),
- (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR16:$Zn, ZPR4b16:$Zm, VectorIndexH:$i),
+ (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR16:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i),
mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i",
"", []>, Sched<[]> {
bits<4> Zm;
@@ -2539,12 +2581,20 @@ class sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op>
let Constraints = "$ZAda = $_ZAda";
}
+multiclass sme2_mla_ll_array_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
+ def NAME : sme2_mla_ll_array_index_64b<mnemonic, op>, SMEPseudo2Instr<NAME, 1>;
+
+ def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm2s4range, ZPR16, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
+
+ def : SME2_ZA_TwoOp_Multi_Index_Pat<NAME, intrinsic, uimm2s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange2s4>;
+}
+
class sme2_mla_ll_array_vg24_index_32b<bit vg4, bits<3> op,
RegisterOperand vector_ty,
string mnemonic>
: I<(outs MatrixOp32:$ZAda),
(ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
- vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB:$i),
+ vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i),
mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
"", []>, Sched<[]> {
bits<4> Zm;
@@ -2566,34 +2616,42 @@ class sme2_mla_ll_array_vg24_index_32b<bit vg4, bits<3> op,
//SME2 multi-vec indexed long long MLA two sources 32-bit
-multiclass sme2_mla_ll_array_vg2_index_32b<string mnemonic, bits<3> op> {
- def NAME: sme2_mla_ll_array_vg24_index_32b<0b0, op, ZZ_b_mul_r, mnemonic> {
+multiclass sme2_mla_ll_array_vg2_index_32b<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
+ def NAME: sme2_mla_ll_array_vg24_index_32b<0b0, op, ZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<4> Zn;
let Inst{9-6} = Zn;
}
+ def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
+
+ def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange1s4>;
+
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
- (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
+ (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>;
}
// SME2 multi-vec indexed long long MLA four sources 32-bit
-multiclass sme2_mla_ll_array_vg4_index_32b<string mnemonic, bits<3> op> {
- def NAME: sme2_mla_ll_array_vg24_index_32b<0b1, op, ZZZZ_b_mul_r, mnemonic> {
+multiclass sme2_mla_ll_array_vg4_index_32b<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
+ def NAME: sme2_mla_ll_array_vg24_index_32b<0b1, op, ZZZZ_b_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<3> Zn;
let Inst{9-7} = Zn;
let Inst{6} = 0b0;
}
+ def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZZZ_b_mul_r, ZPR4b8, VectorIndexB32b_timm, SMEMatrixArray>;
+
+ def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b8, nxv16i8, VectorIndexB32b_timm, tileslicerange1s4>;
+
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
- (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>;
+ (!cast<Instruction>(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB32b_timm:$i), 0>;
}
class sme2_mla_ll_array_vg24_index_64b<bit vg4, bits<2> op,
RegisterOperand vector_ty,
string mnemonic>
: I<(outs MatrixOp64:$ZAda),
(ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm,
- vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH:$i),
+ vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i),
mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i",
"", []>, Sched<[]> {
bits<4> Zm;
@@ -2616,27 +2674,35 @@ class sme2_mla_ll_array_vg24_index_64b<bit vg4, bits<2> op,
// SME2 multi-vec indexed long long MLA two sources 64-bit
-multiclass sme2_mla_ll_array_vg2_index_64b<string mnemonic, bits<2> op> {
- def NAME: sme2_mla_ll_array_vg24_index_64b<0b0, op, ZZ_h_mul_r, mnemonic>{
+multiclass sme2_mla_ll_array_vg2_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
+ def NAME: sme2_mla_ll_array_vg24_index_64b<0b0, op, ZZ_h_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<4> Zn;
let Inst{9-6} = Zn;
}
+ def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
+
+ def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange1s4>;
+
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
- (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>;
+ (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>;
}
// SME2 multi-vec indexed long long MLA four sources 64-bit
-multiclass sme2_mla_ll_array_vg4_index_64b<string mnemonic, bits<2> op> {
- def NAME: sme2_mla_ll_array_vg24_index_64b<0b1, op, ZZZZ_h_mul_r, mnemonic>{
+multiclass sme2_mla_ll_array_vg4_index_64b<string mnemonic, bits<2> op, SDPatternOperator intrinsic> {
+ def NAME: sme2_mla_ll_array_vg24_index_64b<0b1, op, ZZZZ_h_mul_r, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<3> Zn;
let Inst{9-7} = Zn;
let Inst{6} = 0b0;
}
+ def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, uimm1s4range, ZZZZ_h_mul_r, ZPR4b16, VectorIndexH32b_timm, SMEMatrixArray>;
+
+ def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, uimm1s4range, ZPR4b16, nxv8i16, VectorIndexH32b_timm, tileslicerange1s4>;
+
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm$i",
- (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>;
+ (!cast<Instruction>(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i), 0>;
}
@@ -2668,6 +2734,16 @@ class sme2_mla_ll_array_single<string mnemonic, bits<4> op,
let Constraints = "$ZAda = $_ZAda";
}
+multiclass sme2_mla_ll_array_single<string mnemonic, bits<4> op,
+ MatrixOperand matrix_ty, ZPRRegOp vector_ty,
+ ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> {
+ def NAME : sme2_mla_ll_array_single<mnemonic, op, matrix_ty, vector_ty, zpr_ty>, SMEPseudo2Instr<NAME, 1>;
+
+ def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm2s4range, vector_ty, zpr_ty, SMEMatrixArray>;
+
+ def : SME2_ZA_TwoOp_Multi_Single_Pat<NAME, intrinsic, uimm2s4range, zpr_ty, vt, tileslicerange2s4>;
+}
+
class sme2_mla_ll_array_vg24_single<bits<5> op, MatrixOperand matrix_ty,
RegisterOperand vector_ty, ZPRRegOp zpr_ty,
string mnemonic>
@@ -2703,12 +2779,33 @@ multiclass sme2_mla_ll_array_vg24_single<string mnemonic, bits<5> op,
RegisterOperand multi_vector_ty,
ZPRRegOp zpr_ty> {
def NAME: sme2_mla_ll_array_vg24_single<op, matrix_ty, multi_vector_ty,
- zpr_ty, mnemonic>;
+ zpr_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
+
+ def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo<NAME, uimm1s4range, multi_vector_ty, zpr_ty, SMEMatrixArray>;
def : InstAlias<mnemonic # "\t$ZAd[$Rv, $imm], $Zn, $Zm",
(!cast<Instruction>(NAME) matrix_ty:$ZAd, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, multi_vector_ty:$Zn, zpr_ty:$Zm), 0>;
}
+multiclass sme2_mla_ll_array_vg2_single<string mnemonic, bits<5> op,
+ MatrixOperand matrix_ty,
+ RegisterOperand multi_vector_ty,
+ ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> {
+
+ defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, op, matrix_ty, multi_vector_ty, zpr_ty>;
+
+ def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>;
+}
+
+multiclass sme2_mla_ll_array_vg4_single<string mnemonic, bits<5> op,
+ MatrixOperand matrix_ty,
+ RegisterOperand multi_vector_ty,
+ ZPRRegOp zpr_ty, ValueType vt, SDPatternOperator intrinsic> {
+ defm NAME: sme2_mla_ll_array_vg24_single<mnemonic, op, matrix_ty, multi_vector_ty, zpr_ty>;
+
+ def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat<NAME, intrinsic, uimm1s4range, zpr_ty, vt, tileslicerange1s4>;
+}
+
// SME2 multiple vectors long long MLA two sources
class sme2_mla_ll_array_vg2_multi<bits<4> op, MatrixOperand matrix_ty,
@@ -2740,8 +2837,13 @@ class sme2_mla_ll_array_vg2_multi<bits<4> op, MatrixOperand matrix_ty,
multiclass sme2_mla_ll_array_vg2_multi<string mnemonic, bits<4> op,
MatrixOperand matrix_ty,
- RegisterOperand vector_ty> {
- def NAME : sme2_mla_ll_array_vg2_multi<op, matrix_ty, vector_ty, mnemonic>;
+ RegisterOperand vector_ty,
+ ValueType vt, SDPatternOperator intrinsic> {
+ def NAME : sme2_mla_ll_array_vg2_multi<op, matrix_ty, vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
+
+ def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm1s4range, vector_ty, SMEMatrixArray>;
+
+ def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat<NAME, intrinsic, uimm1s4range, vt, tileslicerange1s4>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
(!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>;
@@ -2779,8 +2881,13 @@ class sme2_mla_ll_array_vg4_multi<bits<4> op,MatrixOperand matrix_ty,
multiclass sme2_mla_ll_array_vg4_multi<string mnemonic, bits<4> op,
MatrixOperand matrix_ty,
- RegisterOperand vector_ty> {
- def NAME : sme2_mla_ll_array_vg4_multi<op, matrix_ty, vector_ty, mnemonic>;
+ RegisterOperand vector_ty,
+ ValueType vt, SDPatternOperator intrinsic> {
+ def NAME : sme2_mla_ll_array_vg4_multi<op, matrix_ty, vector_ty, mnemonic>, SMEPseudo2Instr<NAME, 1>;
+
+ def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo<NAME, uimm1s4range, vector_ty, SMEMatrixArray>;
+
+ def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat<NAME, intrinsic, uimm1s4range, vt, tileslicerange1s4>;
def : InstAlias<mnemonic # "\t$ZAda[$Rv, $imm], $Zn, $Zm",
(!cast<Instruction>(NAME) matrix_ty:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, vector_ty:$Zn, vector_ty:$Zm), 0>;
@@ -2789,16 +2896,24 @@ multiclass sme2_mla_ll_array_vg4_multi<string mnemonic, bits<4> op,
//===----------------------------------------------------------------------===//
// SME2 Outer Product and Accumulate
-multiclass sme2_int_mopx_tile<string mnemonic, bits<3> op> {
- def NAME : sme_int_outer_product_inst<op, 0b0, 0b1, TileOp32, ZPR16, mnemonic> {
+multiclass sme2_int_mopx_tile<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
+ def NAME : sme_int_outer_product_inst<op, 0b0, 0b1, TileOp32, ZPR16, mnemonic>, SMEPseudo2Instr<NAME, 1> {
bits<2> ZAda;
let Inst{1-0} = ZAda;
let Inst{2} = 0b0;
}
+
+ def _PSEUDO : sme_outer_product_pseudo<ZPR16, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
+
+ def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv8i1, nxv8i16>;
}
-multiclass sme2_bfp_mopx_tile<string mnemonic, bits<3> op> {
- def NAME : sme_outer_product_widening_inst<op, ZPR32, mnemonic>;
+multiclass sme2_int_bmopx_tile<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
+ def NAME : sme_outer_product_widening_inst<op, ZPR32, mnemonic>, SMEPseudo2Instr<NAME, 1>;
+
+ def _PSEUDO : sme_outer_product_pseudo<ZPR32, SMEMatrixTileS>, SMEPseudo2Instr<NAME, 0>;
+
+ def : SME_ZA_Tile_TwoPred_TwoVec_Pat<NAME, intrinsic, timm32_0_3, nxv4i1, nxv4i32>;
}
//===----------------------------------------------------------------------===///
@@ -3027,13 +3142,13 @@ multiclass sme2_mova_vec_to_tile_or_array_aliases<int prefer, Instruction inst,
}
// SME2 move vector to tile, two registers
-multiclass sme2_mova_vec_to_tile_vg2_multi_base<bit v, string mnemonic> {
+multiclass sme2_mova_vec_to_tile_vg2_multi_base<bit v, string mnemonic, SDPatternOperator intrinsic> {
def _B : sme2_mova_vec_to_tile_vg2_multi_base<0b00, v,
!if(v, TileVectorOpV8,
TileVectorOpH8),
uimm3s2range, ZZ_b_mul_r,
- mnemonic> {
+ mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
bits<3> imm;
let Inst{2-0} = imm;
}
@@ -3042,7 +3157,7 @@ multiclass sme2_mova_vec_to_tile_vg2_multi_base<bit v, string mnemonic> {
!if(v, TileVectorOpV16,
TileVectorOpH16),
uimm2s2range, ZZ_h_mul_r,
- mnemonic> {
+ mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
bits<1> ZAd;
bits<2> imm;
let Inst{2} = ZAd;
@@ -3053,7 +3168,7 @@ multiclass sme2_mova_vec_to_tile_vg2_multi_base<bit v, string mnemonic> {
!if(v, TileVectorOpV32,
TileVectorOpH32),
uimm1s2range, ZZ_s_mul_r,
- mnemonic> {
+ mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
bits<2> ZAd;
bits<1> imm;
let Inst{2-1} = ZAd;
@@ -3064,11 +3179,25 @@ multiclass sme2_mova_vec_to_tile_vg2_multi_base<bit v, string mnemonic> {
!if(v, TileVectorOpV64,
TileVectorOpH64),
uimm0s2range, ZZ_d_mul_r,
- mnemonic> {
+ mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
bits<3> ZAd;
let Inst{2-0} = ZAd;
}
+ def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo<NAME # _B, sme_elm_idx0_0, uimm3s2range, ZZ_b_mul_r, SMEMatrixTileB>;
+ def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo<NAME # _H, sme_elm_idx0_1, uimm2s2range, ZZ_h_mul_r, SMEMatrixTileH>;
+ def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo<NAME # _S, sme_elm_idx0_3, uimm1s2range, ZZ_s_mul_r, SMEMatrixTileS>;
+ def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo<NAME # _D, sme_elm_idx0_7, uimm0s2range, ZZ_d_mul_r, SMEMatrixTileD>;
+
+ def : SME2_Tile_VG2_Multi_Pat<NAME # _B, intrinsic, sme_elm_idx0_0, nxv16i8, uimm3s2range, tileslicerange3s2>;
+ def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8i16, uimm2s2range, tileslicerange2s2>;
+ def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8f16, uimm2s2range, tileslicerange2s2>;
+ def : SME2_Tile_VG2_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8bf16, uimm2s2range, tileslicerange2s2>;
+ def : SME2_Tile_VG2_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4i32, uimm1s2range, tileslicerange1s2>;
+ def : SME2_Tile_VG2_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4f32, uimm1s2range, tileslicerange1s2>;
+ def : SME2_Tile_VG2_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2i64, uimm0s2range, tileslicerange0s2>;
+ def : SME2_Tile_VG2_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2f64, uimm0s2range, tileslicerange0s2>;
+
defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B),
!if(v, TileVectorOpV8,
TileVectorOpH8),
@@ -3145,9 +3274,10 @@ multiclass sme2_mova_vec_to_tile_vg2_multi_base<bit v, string mnemonic> {
"mova">;
}
-multiclass sme2_mova_vec_to_tile_vg2_multi<string mnemonic>{
- defm _H : sme2_mova_vec_to_tile_vg2_multi_base<0b0, mnemonic>;
- defm _V : sme2_mova_vec_to_tile_vg2_multi_base<0b1, mnemonic>;
+multiclass sme2_mova_vec_to_tile_vg2_multi<string mnemonic,
+ SDPatternOperator int_h, SDPatternOperator int_v>{
+ defm _H : sme2_mova_vec_to_tile_vg2_multi_base<0b0, mnemonic, int_h>;
+ defm _V : sme2_mova_vec_to_tile_vg2_multi_base<0b1, mnemonic, int_v>;
}
class sme2_mova_vec_to_tile_vg4_multi_base<bits<2> sz, bit v, bits<3> op,
@@ -3176,13 +3306,13 @@ class sme2_mova_vec_to_tile_vg4_multi_base<bits<2> sz, bit v, bits<3> op,
}
// SME2 move vector to tile, four registers
-multiclass sme2_mova_vec_to_tile_vg4_multi_base<bit v, string mnemonic> {
+multiclass sme2_mova_vec_to_tile_vg4_multi_base<bit v, string mnemonic, SDPatternOperator intrinsic> {
def _B : sme2_mova_vec_to_tile_vg4_multi_base<0b00, v, {0,?,?},
!if(v, TileVectorOpV8,
TileVectorOpH8),
uimm2s4range, ZZZZ_b_mul_r,
- mnemonic> {
+ mnemonic>, SMEPseudo2Instr<NAME # _B, 1> {
bits<2> imm;
let Inst{1-0} = imm;
}
@@ -3191,7 +3321,7 @@ multiclass sme2_mova_vec_to_tile_vg4_multi_base<bit v, string mnemonic> {
!if(v, TileVectorOpV16,
TileVectorOpH16),
uimm1s4range, ZZZZ_h_mul_r,
- mnemonic> {
+ mnemonic>, SMEPseudo2Instr<NAME # _H, 1> {
bits<1> ZAd;
bits<1> imm;
let Inst{1} = ZAd;
@@ -3202,7 +3332,7 @@ multiclass sme2_mova_vec_to_tile_vg4_multi_base<bit v, string mnemonic> {
!if(v, TileVectorOpV32,
TileVectorOpH32),
uimm0s4range, ZZZZ_s_mul_r,
- mnemonic> {
+ mnemonic>, SMEPseudo2Instr<NAME # _S, 1> {
bits<2> ZAd;
let Inst{1-0} = ZAd;
}
@@ -3211,11 +3341,25 @@ multiclass sme2_mova_vec_to_tile_vg4_multi_base<bit v, string mnemonic> {
!if(v, TileVectorOpV64,
TileVectorOpH64),
uimm0s4range, ZZZZ_d_mul_r,
- mnemonic> {
+ mnemonic>, SMEPseudo2Instr<NAME # _D, 1> {
bits<3> ZAd;
let Inst{2-0} = ZAd;
}
+ def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo<NAME # _B, sme_elm_idx0_0, uimm2s4range, ZZZZ_b_mul_r, SMEMatrixTileB>;
+ def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo<NAME # _H, sme_elm_idx0_1, uimm1s4range, ZZZZ_h_mul_r, SMEMatrixTileH>;
+ def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo<NAME # _S, sme_elm_idx0_3, uimm0s4range, ZZZZ_s_mul_r, SMEMatrixTileS>;
+ def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo<NAME # _D, sme_elm_idx0_7, uimm0s4range, ZZZZ_d_mul_r, SMEMatrixTileD>;
+
+ def : SME2_Tile_VG4_Multi_Pat<NAME # _B, intrinsic, sme_elm_idx0_0, nxv16i8, uimm2s4range, tileslicerange2s4>;
+ def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8i16, uimm1s4range, tileslicerange1s4>;
+ def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8f16, uimm1s4range, tileslicerange1s4>;
+ def : SME2_Tile_VG4_Multi_Pat<NAME # _H, intrinsic, sme_elm_idx0_1, nxv8bf16, uimm1s4range, tileslicerange1s4>;
+ def : SME2_Tile_VG4_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4i32, uimm0s4range, tileslicerange0s4>;
+ def : SME2_Tile_VG4_Multi_Pat<NAME # _S, intrinsic, sme_elm_idx0_3, nxv4f32, uimm0s4range, tileslicerange0s4>;
+ def : SME2_Tile_VG4_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2i64, uimm0s4range, tileslicerange0s4>;
+ def : SME2_Tile_VG4_Multi_Pat<NAME # _D, intrinsic, sme_elm_idx0_7, nxv2f64, uimm0s4range, tileslicerange0s4>;
+
defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast<Instruction>(NAME # _B),
!if(v, TileVectorOpV8,
TileVectorOpH8),
@@ -3268,9 +3412,10 @@ multiclass sme2_mova_vec_to_tile_vg4_multi_base<bit v, string mnemonic> {
}
-multiclass sme2_mova_vec_to_tile_vg4_multi<string mnemonic>{
- defm _H : sme2_mova_vec_to_tile_vg4_multi_base<0b0, mnemonic>;
- defm _V : sme2_mova_vec_to_tile_vg4_multi_base<0b1, mnemonic>;
+multiclass sme2_mova_vec_to_tile_vg4_multi<string mnemonic,
+ SDPatternOperator int_h, SDPatternOperator int_v>{
+ defm _H : sme2_mova_vec_to_tile_vg4_multi_base<0b0, mnemonic, int_h>;
+ defm _V : sme2_mova_vec_to_tile_vg4_multi_base<0b1, mnemonic, int_v>;
}
// SME Move into Array
@@ -3296,13 +3441,18 @@ class sme2_mova_vec_to_array_vg24_multi< bits<5> op, RegisterOperand array_ty,
}
// MOVA (vector to array, two registers)
-multiclass sme2_mova_vec_to_array_vg2_multi<string mnemonic> {
+multiclass sme2_mova_vec_to_array_vg2_multi<string mnemonic, SDPatternOperator intrinsic> {
def NAME : sme2_mova_vec_to_array_vg24_multi<{0,?,?,?,?}, MatrixOp64,
- ZZ_d_mul_r, mnemonic, "vgx2">{
+ ZZ_d_mul_r, mnemonic, "vgx2">, SMEPseudo2Instr<NAME, 1> {
bits<4> Zn;
let Inst{9-6} = Zn;
}
+ def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZ_d_mul_r, SMEMatrixArray>;
+
+ def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x2_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
+
defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
MatrixOp8,
MatrixIndexGPR32Op8_11,
@@ -3384,13 +3534,18 @@ multiclass sme2_mova_vec_to_array_vg2_multi<string mnemonic> {
}
// MOVA (vector to array, four registers)
-multiclass sme2_mova_vec_to_array_vg4_multi<string mnemonic> {
+multiclass sme2_mova_vec_to_array_vg4_multi<string mnemonic, SDPatternOperator intrinsic> {
def NAME : sme2_mova_vec_to_array_vg24_multi<{1,?,?,?,0}, MatrixOp64,
- ZZZZ_d_mul_r, mnemonic, "vgx4"> {
+ ZZZZ_d_mul_r, mnemonic, "vgx4">, SMEPseudo2Instr<NAME, 1> {
bits<3> Zn;
let Inst{9-7} = Zn;
}
+ def NAME # _PSEUDO : sme2_move_to_za_pseudo<NAME, sme_elm_idx0_7, ZZZZ_d_mul_r, SMEMatrixArray>;
+
+ def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2i64, sme_elm_idx0_7, tileslice16>;
+ def : SME2_ZA_VG1x4_Multi_Pat<NAME, intrinsic, nxv2f64, sme_elm_idx0_7, tileslice16>;
+
defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast<Instruction>(NAME),
MatrixOp8,
MatrixIndexGPR32Op8_11,
@@ -3918,7 +4073,7 @@ multiclass sme2_mova_array_to_vec_vg4_multi<bits<4> opc, string mnemonic> {
//===----------------------------------------------------------------------===//
// SME2 multi-vec saturating shift right narrow
class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
+ : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
mnemonic, "\t$Zd, $Zn, $imm4",
"", []>, Sched<[]> {
bits<4> imm4;
@@ -3933,8 +4088,10 @@ class sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u>
let Inst{4-0} = Zd;
}
-multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u> {
+multiclass sme2_sat_shift_vector_vg2<string mnemonic, bit op, bit u, SDPatternOperator intrinsic> {
def _H : sme2_sat_shift_vector_vg2<mnemonic, op, u>;
+
+ def : SME2_Sat_Shift_VG2_Pat<NAME # _H, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
}
class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
@@ -3956,18 +4113,21 @@ class sme2_sat_shift_vector_vg4<bits<2> sz, bits<3> op, ZPRRegOp zpr_ty,
let Inst{4-0} = Zd;
}
-multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op> {
- def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, vecshiftR32,
+multiclass sme2_sat_shift_vector_vg4<string mnemonic, bits<3> op, SDPatternOperator intrinsic> {
+ def _B : sme2_sat_shift_vector_vg4<{0,1}, op, ZPR8, ZZZZ_s_mul_r, tvecshiftR32,
mnemonic>{
bits<5> imm;
let Inst{20-16} = imm;
}
- def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, vecshiftR64,
+ def _H : sme2_sat_shift_vector_vg4<{1,?}, op, ZPR16, ZZZZ_d_mul_r, tvecshiftR64,
mnemonic> {
bits<6> imm;
let Inst{22} = imm{5};
let Inst{20-16} = imm{4-0};
}
+
+ def : SME2_Sat_Shift_VG4_Pat<NAME # _B, intrinsic, nxv16i8, nxv4i32, tvecshiftR32>;
+ def : SME2_Sat_Shift_VG4_Pat<NAME # _H, intrinsic, nxv8i16, nxv2i64, tvecshiftR64>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index cef8d41218e8..118862b8c317 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -10,6 +10,36 @@
//
//===----------------------------------------------------------------------===//
+// Helper class to find the largest legal scalable vector type that can hold VT.
+// Non-matches return VT, which often means VT is the container type.
+class SVEContainerVT<ValueType VT> {
+ ValueType Value = !cond(
+ // fixed length vectors
+ !eq(VT, v8i8): nxv16i8,
+ !eq(VT, v16i8): nxv16i8,
+ !eq(VT, v4i16): nxv8i16,
+ !eq(VT, v8i16): nxv8i16,
+ !eq(VT, v2i32): nxv4i32,
+ !eq(VT, v4i32): nxv4i32,
+ !eq(VT, v1i64): nxv2i64,
+ !eq(VT, v2i64): nxv2i64,
+ !eq(VT, v4f16): nxv8f16,
+ !eq(VT, v8f16): nxv8f16,
+ !eq(VT, v2f32): nxv4f32,
+ !eq(VT, v4f32): nxv4f32,
+ !eq(VT, v1f64): nxv2f64,
+ !eq(VT, v2f64): nxv2f64,
+ !eq(VT, v4bf16): nxv8bf16,
+ !eq(VT, v8bf16): nxv8bf16,
+ // unpacked scalable vectors
+ !eq(VT, nxv2f16): nxv8f16,
+ !eq(VT, nxv4f16): nxv8f16,
+ !eq(VT, nxv2f32): nxv4f32,
+ !eq(VT, nxv2bf16): nxv8bf16,
+ !eq(VT, nxv4bf16): nxv8bf16,
+ true : VT);
+}
+
def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVec<3>,
SDTCVecEltisVT<0, i1>, SDTCVecEltisVT<1, i1>, SDTCisSameAs<2, 3>,
@@ -17,6 +47,10 @@ def SDT_AArch64Setcc : SDTypeProfile<1, 4, [
]>;
def AArch64setcc_z : SDNode<"AArch64ISD::SETCC_MERGE_ZERO", SDT_AArch64Setcc>;
+def AArch64setcc_z_oneuse : PatFrag<(ops node:$pg, node:$op1, node:$op2, node:$cc),
+ (AArch64setcc_z node:$pg, node:$op1, node:$op2, node:$cc), [{
+ return N->hasOneUse();
+}]>;
def SVEPatternOperand : AsmOperandClass {
let Name = "SVEPattern";
@@ -250,6 +284,7 @@ def SVEShiftImmR64 : ComplexPattern<i64, 1, "SelectSVEShiftImm<1, 64, true>", []
def SVEShiftSplatImmR : ComplexPattern<iAny, 1, "SelectSVEShiftSplatImmR", []>;
def SVEAllActive : ComplexPattern<untyped, 0, "SelectAllActivePredicate", []>;
+def SVEAnyPredicate : ComplexPattern<untyped, 0, "SelectAnyPredicate", []>;
class SVEExactFPImm<string Suffix, string ValA, string ValB> : AsmOperandClass {
let Name = "SVEExactFPImmOperand" # Suffix;
@@ -328,6 +363,7 @@ class sve_int_ptrue<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
let Defs = !if(!eq (opc{0}, 1), [NZCV], []);
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -398,14 +434,24 @@ multiclass SVE_1_Op_PassthruUndef_Round_Pat<ValueType vtd, SDPatternOperator op,
(inst $Op3, $Op1, $Op2)>;
}
+def SVEDup0 : ComplexPattern<vAny, 0, "SelectDupZero", []>;
+def SVEDupNeg0 : ComplexPattern<vAny, 0, "SelectDupNegativeZero", []>;
+
+class SVE_1_Op_PassthruZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, Instruction inst>
+ : Pat<(vtd (op (vtd (SVEDup0)), vt1:$Op1, vt2:$Op2)),
+ (inst (IMPLICIT_DEF), $Op1, $Op2)>;
+
class SVE_1_Op_Imm_OptLsl_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
ValueType it, ComplexPattern cpx, Instruction inst>
: Pat<(vt (op (vt zprty:$Op1), (vt (splat_vector (it (cpx i32:$imm, i32:$shift)))))),
(inst $Op1, i32:$imm, i32:$shift)>;
-class SVE_1_Op_Imm_Arith_All_Active<ValueType vt, ValueType pt, SDPatternOperator op,
- ZPRRegOp zprty, ValueType it, ComplexPattern cpx, Instruction inst>
- : Pat<(vt (op (pt (SVEAllActive)), (vt zprty:$Op1), (vt (splat_vector (it (cpx i32:$imm)))))),
+class SVE_1_Op_Imm_Arith_Any_Predicate<ValueType vt, ValueType pt,
+ SDPatternOperator op, ZPRRegOp zprty,
+ ValueType it, ComplexPattern cpx,
+ Instruction inst>
+ : Pat<(vt (op (pt (SVEAnyPredicate)), (vt zprty:$Op1), (vt (splat_vector (it (cpx i32:$imm)))))),
(inst $Op1, i32:$imm)>;
class SVE_1_Op_Imm_Log_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
@@ -430,6 +476,12 @@ class SVE_2_Op_Pred_All_Active_Pt<ValueType vtd, SDPatternOperator op,
: Pat<(vtd (op (pt (SVEAllActive:$Op1)), vt1:$Op2, vt2:$Op3)),
(inst $Op1, $Op2, $Op3)>;
+class SVE_2_Op_Pred_Any_Predicate<ValueType vtd, SDPatternOperator op,
+ ValueType pt, ValueType vt1, ValueType vt2,
+ Instruction inst>
+: Pat<(vtd (op (pt (SVEAnyPredicate)), vt1:$Op1, vt2:$Op2)),
+ (inst $Op1, $Op2)>;
+
class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Instruction inst>
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
@@ -454,6 +506,18 @@ class SVE_2_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, (vt2 ImmTy:$Op2))),
(inst $Op1, ImmTy:$Op2)>;
+multiclass SVE2p1_Cntp_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ Instruction inst> {
+ def : Pat<(vtd (op vt1:$Op1, (i32 2))), (inst $Op1, 0)>;
+ def : Pat<(vtd (op vt1:$Op1, (i32 4))), (inst $Op1, 1)>;
+}
+
+multiclass SVE2p1_While_PN_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ Instruction inst> {
+ def : Pat<(vtd (op vt1:$Op1, vt1:$Op2, (i32 2))), (inst $Op1, $Op2, 0)>;
+ def : Pat<(vtd (op vt1:$Op1, vt1:$Op2, (i32 4))), (inst $Op1, $Op2, 1)>;
+}
+
class SVE_3_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Operand ImmTy,
Instruction inst>
@@ -466,7 +530,6 @@ class SVE_4_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, (vt4 ImmTy:$Op4))),
(inst $Op1, $Op2, $Op3, ImmTy:$Op4)>;
-def SVEDup0 : ComplexPattern<vAny, 0, "SelectDupZero", []>;
def SVEDup0Undef : ComplexPattern<vAny, 0, "SelectDupZeroOrUndef", []>;
let AddedComplexity = 1 in {
@@ -515,12 +578,18 @@ class SVE_Shift_DupImm_Pred_Pat<ValueType vt, SDPatternOperator op,
: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))),
(inst $Pg, $Rn, i32:$imm)>;
-class SVE_Shift_DupImm_All_Active_Pat<ValueType vt, SDPatternOperator op,
- ValueType pt, ValueType it,
- ComplexPattern cast, Instruction inst>
-: Pat<(vt (op (pt (SVEAllActive)), vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))),
+class SVE_Shift_DupImm_Any_Predicate_Pat<ValueType vt, SDPatternOperator op,
+ ValueType pt, ValueType it,
+ ComplexPattern cast, Instruction inst>
+: Pat<(vt (op (pt (SVEAnyPredicate)), vt:$Rn, (vt (splat_vector (it (cast i32:$imm)))))),
(inst $Rn, i32:$imm)>;
+class SVE_2_Op_Imm_Pat_Zero<ValueType vt, SDPatternOperator op, ValueType pt,
+ ValueType it, ComplexPattern cpx, Instruction inst>
+: Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Op1, (SVEDup0)),
+ (vt (splat_vector (it (cpx i32:$imm)))))),
+ (inst $Pg, $Op1, i32:$imm)>;
+
class SVE_2_Op_Fp_Imm_Pat<ValueType vt, SDPatternOperator op,
ValueType pt, ValueType it,
FPImmLeaf immL, int imm,
@@ -549,6 +618,10 @@ class SVE_Shift_Add_All_Active_Pat<ValueType vtd, SDPatternOperator op, ValueTyp
: Pat<(vtd (add vt1:$Op1, (op (pt (SVEAllActive)), vt2:$Op2, vt3:$Op3))),
(inst $Op1, $Op2, $Op3)>;
+class SVE2p1_Sat_Shift_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt, Operand imm_ty>
+ : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))),
+ (!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>;
+
class SVE2p1_Cvt_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out_vt, ValueType in_vt>
: Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2)),
(!cast<Instruction>(name) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>;
@@ -558,11 +631,30 @@ class SVE2p1_Cvt_VG2_Pat<string name, SDPatternOperator intrinsic, ValueType out
//===----------------------------------------------------------------------===//
// Matches either an intrinsic, or a predicated operation with an all active predicate
-class EitherVSelectOrPassthruPatFrags<SDPatternOperator intrinsic, SDPatternOperator sdnode>
+class VSelectPredOrPassthruPatFrags<SDPatternOperator intrinsic, SDPatternOperator sdnode>
+: PatFrags<(ops node:$Pg, node:$Op1, node:$Op2), [
+ (intrinsic node:$Pg, node:$Op1, node:$Op2),
+ (vselect node:$Pg, (sdnode (SVEAllActive), node:$Op1, node:$Op2), node:$Op1),
+ ], [{
+ return N->getOpcode() != ISD::VSELECT || N->getOperand(1).hasOneUse();
+ }]>;
+// Same as above with a commutative operation
+class VSelectCommPredOrPassthruPatFrags<SDPatternOperator intrinsic, SDPatternOperator sdnode>
: PatFrags<(ops node:$Pg, node:$Op1, node:$Op2), [
(intrinsic node:$Pg, node:$Op1, node:$Op2),
(vselect node:$Pg, (sdnode (SVEAllActive), node:$Op1, node:$Op2), node:$Op1),
- ]>;
+ (vselect node:$Pg, (sdnode (SVEAllActive), node:$Op2, node:$Op1), node:$Op1),
+ ], [{
+ return N->getOpcode() != ISD::VSELECT || N->getOperand(1).hasOneUse();
+ }]>;
+// Similarly matches either an intrinsic, or an unpredicated operation with a select
+class VSelectUnpredOrPassthruPatFrags<SDPatternOperator intrinsic, SDPatternOperator sdnode>
+: PatFrags<(ops node:$Pg, node:$Op1, node:$Op2), [
+ (intrinsic node:$Pg, node:$Op1, node:$Op2),
+ (vselect node:$Pg, (sdnode node:$Op1, node:$Op2), node:$Op1),
+ ], [{
+ return N->getOpcode() != ISD::VSELECT || N->getOperand(1).hasOneUse();
+ }]>;
//
// Pseudo -> Instruction mappings
@@ -633,9 +725,13 @@ let hasNoSchedulingInfo = 1 in {
// Pseudos for passthru operands
//
let hasNoSchedulingInfo = 1 in {
- class PredOneOpPassthruPseudo<string name, ZPRRegOp zprty>
+ class PredOneOpPassthruPseudo<string name, ZPRRegOp zprty,
+ FalseLanesEnum flags = FalseLanesNone>
: SVEPseudo2Instr<name, 0>,
- Pseudo<(outs zprty:$Zd), (ins zprty:$Passthru, PPR3bAny:$Pg, zprty:$Zs), []>;
+ Pseudo<(outs zprty:$Zd), (ins zprty:$Passthru, PPR3bAny:$Pg, zprty:$Zs), []> {
+ let FalseLanes = flags;
+ let Constraints = !if(!eq(flags, FalseLanesZero), "$Zd = $Passthru,@earlyclobber $Zd", "");
+ }
}
//===----------------------------------------------------------------------===//
@@ -657,14 +753,13 @@ class sve_int_pfalse<bits<6> opc, string asm>
let Inst{8-4} = 0b00000;
let Inst{3-0} = Pd;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
multiclass sve_int_pfalse<bits<6> opc, string asm> {
def NAME : sve_int_pfalse<opc, asm>;
- def : InstAlias<"pfalse\t$Pd", (!cast<Instruction>(NAME) PNR8:$Pd), 0>;
-
def : Pat<(nxv16i1 immAllZerosV), (!cast<Instruction>(NAME))>;
def : Pat<(nxv8i1 immAllZerosV), (!cast<Instruction>(NAME))>;
def : Pat<(nxv4i1 immAllZerosV), (!cast<Instruction>(NAME))>;
@@ -690,6 +785,7 @@ class sve_int_ptest<bits<6> opc, string asm, SDPatternOperator op>
let Inst{4-0} = 0b00000;
let Defs = [NZCV];
+ let hasSideEffects = 0;
let isCompare = 1;
}
@@ -724,8 +820,9 @@ class sve_int_pfirst_next<bits<2> sz8_64, bits<5> opc, string asm,
let Constraints = "$Pdn = $_Pdn";
let Defs = [NZCV];
- let isPTestLike = 1;
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
+ let isPTestLike = 1;
}
multiclass sve_int_pfirst<bits<5> opc, string asm, SDPatternOperator op> {
@@ -772,6 +869,7 @@ class sve_int_count_r<bits<2> sz8_64, bits<5> opc, string asm,
!strconcat(asm, "\t$Rdn, $Pg, $_Rdn"),
!strconcat(asm, "\t$Rdn, $Pg"));
let Constraints = "$Rdn = $_Rdn";
+ let hasSideEffects = 0;
}
multiclass sve_int_count_r_s32<bits<5> opc, string asm,
@@ -855,6 +953,42 @@ multiclass sve_int_count_r_x64<bits<5> opc, string asm,
(!cast<Instruction>(NAME # _S) PPRAny:$pred, $Rn)>;
def : Pat<(i64 (combine_op GPR64:$Rn, (int_aarch64_sve_cntp_oneuse (nxv2i1 PPRAny:$pred), (nxv2i1 PPRAny:$pred)))),
(!cast<Instruction>(NAME # _D) PPRAny:$pred, $Rn)>;
+
+ // combine_op(x, trunc(cntp(all_active, p))) ==> inst p, x
+ def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv16i1 (SVEAllActive)), (nxv16i1 PPRAny:$pred))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME # _B) PPRAny:$pred,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)),
+ sub_32))>;
+ def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv8i1 (SVEAllActive)), (nxv8i1 PPRAny:$pred))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME # _H) PPRAny:$pred,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)),
+ sub_32))>;
+ def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv4i1 (SVEAllActive)), (nxv4i1 PPRAny:$pred))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME # _S) PPRAny:$pred,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)),
+ sub_32))>;
+ def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv2i1 (SVEAllActive)), (nxv2i1 PPRAny:$pred))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME # _D) PPRAny:$pred,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)),
+ sub_32))>;
+
+ // combine_op(x, trunc(cntp(p, p))) ==> inst p, x
+ def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv16i1 PPRAny:$pred), (nxv16i1 PPRAny:$pred))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME # _B) PPRAny:$pred,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)),
+ sub_32))>;
+ def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv8i1 PPRAny:$pred), (nxv8i1 PPRAny:$pred))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME # _H) PPRAny:$pred,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)),
+ sub_32))>;
+ def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv4i1 PPRAny:$pred), (nxv4i1 PPRAny:$pred))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME # _S) PPRAny:$pred,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)),
+ sub_32))>;
+ def : Pat<(i32 (combine_op GPR32:$Rn, (trunc (int_aarch64_sve_cntp_oneuse (nxv2i1 PPRAny:$pred), (nxv2i1 PPRAny:$pred))))),
+ (i32 (EXTRACT_SUBREG (!cast<Instruction>(NAME # _D) PPRAny:$pred,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32)),
+ sub_32))>;
}
class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
@@ -877,6 +1011,7 @@ class sve_int_count_v<bits<2> sz8_64, bits<5> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_count_v<bits<5> opc, string asm,
@@ -915,6 +1050,8 @@ class sve_int_pcount_pred<bits<2> sz8_64, bits<4> opc, string asm,
let Inst{9} = opc{0};
let Inst{8-5} = Pn;
let Inst{4-0} = Rd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_pcount_pred<bits<4> opc, string asm,
@@ -951,6 +1088,7 @@ class sve_int_count<bits<3> opc, string asm>
let Inst{9-5} = pattern;
let Inst{4-0} = Rd;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -993,6 +1131,7 @@ class sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_countvlv<bits<5> opc, string asm, ZPRRegOp zprty,
@@ -1027,6 +1166,7 @@ class sve_int_pred_pattern_a<bits<3> opc, string asm>
let Inst{4-0} = Rdn;
let Constraints = "$Rdn = $_Rdn";
+ let hasSideEffects = 0;
}
multiclass sve_int_pred_pattern_a<bits<3> opc, string asm,
@@ -1093,6 +1233,7 @@ class sve_int_pred_pattern_b<bits<5> opc, string asm, RegisterOperand dt,
!strconcat(asm, "\t$Rdn, $pattern, mul $imm4"));
let Constraints = "$Rdn = $_Rdn";
+ let hasSideEffects = 0;
}
multiclass sve_int_pred_pattern_b_s32<bits<5> opc, string asm,
@@ -1158,6 +1299,8 @@ class sve_int_perm_dup_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{21-10} = 0b100000001110;
let Inst{9-5} = Rn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_dup_r<string asm, SDPatternOperator op> {
@@ -1192,6 +1335,8 @@ class sve_int_perm_dup_i<bits<5> tsz, Operand immtype, string asm,
let Inst{15-10} = 0b001000;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_dup_i<string asm> {
@@ -1296,6 +1441,8 @@ class sve_int_perm_tbl<bits<2> sz8_64, bits<2> opc, string asm, ZPRRegOp zprty,
let Inst{10} = 0b0;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_tbl<string asm, SDPatternOperator op> {
@@ -1391,6 +1538,7 @@ class sve2_int_perm_tbx<bits<2> sz8_64, bits<2> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
}
multiclass sve2_int_perm_tbx<string asm, bits<2> opc, SDPatternOperator op> {
@@ -1423,6 +1571,8 @@ class sve_int_perm_reverse_z<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Inst{21-10} = 0b111000001110;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_reverse_z<string asm, SDPatternOperator op> {
@@ -1448,11 +1598,12 @@ multiclass sve_int_perm_reverse_z<string asm, SDPatternOperator op> {
def : SVE_1_Op_Pat<nxv8bf16, op, nxv8bf16, !cast<Instruction>(NAME # _H)>;
}
-class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty>
+class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty,
+ SDPatternOperator op>
: I<(outs pprty:$Pd), (ins pprty:$Pn),
asm, "\t$Pd, $Pn",
"",
- []>, Sched<[]> {
+ [(set nxv16i1:$Pd, (op nxv16i1:$Pn))]>, Sched<[]> {
bits<4> Pd;
bits<4> Pn;
let Inst{31-24} = 0b00000101;
@@ -1461,18 +1612,22 @@ class sve_int_perm_reverse_p<bits<2> sz8_64, string asm, PPRRegOp pprty>
let Inst{8-5} = Pn;
let Inst{4} = 0b0;
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
}
-multiclass sve_int_perm_reverse_p<string asm, SDPatternOperator op> {
- def _B : sve_int_perm_reverse_p<0b00, asm, PPR8>;
- def _H : sve_int_perm_reverse_p<0b01, asm, PPR16>;
- def _S : sve_int_perm_reverse_p<0b10, asm, PPR32>;
- def _D : sve_int_perm_reverse_p<0b11, asm, PPR64>;
+multiclass sve_int_perm_reverse_p<string asm, SDPatternOperator ir_op,
+ SDPatternOperator op_b16,
+ SDPatternOperator op_b32,
+ SDPatternOperator op_b64> {
+ def _B : sve_int_perm_reverse_p<0b00, asm, PPR8, ir_op>;
+ def _H : sve_int_perm_reverse_p<0b01, asm, PPR16, op_b16>;
+ def _S : sve_int_perm_reverse_p<0b10, asm, PPR32, op_b32>;
+ def _D : sve_int_perm_reverse_p<0b11, asm, PPR64, op_b64>;
- def : SVE_1_Op_Pat<nxv16i1, op, nxv16i1, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Pat<nxv8i1, op, nxv8i1, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Pat<nxv4i1, op, nxv4i1, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Pat<nxv2i1, op, nxv2i1, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Pat<nxv8i1, ir_op, nxv8i1, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Pat<nxv4i1, ir_op, nxv4i1, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Pat<nxv2i1, ir_op, nxv2i1, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_unpk<bits<2> sz16_64, bits<2> opc, string asm,
@@ -1489,6 +1644,8 @@ class sve_int_perm_unpk<bits<2> sz16_64, bits<2> opc, string asm,
let Inst{15-10} = 0b001110;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_unpk<bits<2> opc, string asm, SDPatternOperator op> {
@@ -1517,6 +1674,7 @@ class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_insrs<string asm, SDPatternOperator op> {
@@ -1547,6 +1705,7 @@ class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_insrv<string asm, SDPatternOperator op> {
@@ -1598,6 +1757,7 @@ class sve_int_perm_extract_i<string asm>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_extract_i<string asm, SDPatternOperator op> {
@@ -1620,6 +1780,8 @@ class sve2_int_perm_extract_i_cons<string asm>
let Inst{12-10} = imm8{2-0};
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
//===----------------------------------------------------------------------===//
@@ -1643,6 +1805,8 @@ class sve_int_sel_vvv<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Inst{13-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_sel_vvv<string asm, SDPatternOperator op> {
@@ -1706,7 +1870,7 @@ class sve_int_pred_log<bits<4> opc, string asm>
!strconcat(asm, "\t$Pd, $Pg/z, $Pn, $Pm"));
let Defs = !if(!eq (opc{2}, 1), [NZCV], []);
-
+ let hasSideEffects = 0;
}
multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
@@ -1769,6 +1933,7 @@ class sve_int_log_imm<bits<2> opc, string asm>
let DecoderMethod = "DecodeSVELogicalImmInstruction";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_log_imm<bits<2> opc, string asm, string alias, SDPatternOperator op> {
@@ -1814,8 +1979,9 @@ class sve_int_dup_mask_imm<string asm>
let Inst{17-5} = imms;
let Inst{4-0} = Zd;
- let isReMaterializable = 1;
let DecoderMethod = "DecodeSVELogicalImmInstruction";
+ let hasSideEffects = 0;
+ let isReMaterializable = 1;
}
multiclass sve_int_dup_mask_imm<string asm> {
@@ -1865,6 +2031,8 @@ class sve_int_bin_cons_arit_0<bits<2> sz8_64, bits<3> opc, string asm,
let Inst{12-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_cons_arit_0<bits<3> opc, string asm, SDPatternOperator op> {
@@ -1906,6 +2074,8 @@ class sve_fp_2op_i_p_zds<bits<2> sz, bits<3> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_2op_i_p_zds<bits<3> opc, string asm, string Ps, Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator op> {
@@ -1944,6 +2114,8 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_2op_p_zds<bits<4> opc, string asm, string Ps,
@@ -1975,13 +2147,13 @@ multiclass sve_fp_2op_p_zds_fscale<bits<4> opc, string asm,
}
multiclass sve_fp_2op_p_zds_zeroing_hsd<SDPatternOperator op> {
- def _ZERO_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesZero>;
- def _ZERO_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesZero>;
- def _ZERO_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesZero>;
+ def _H_ZERO : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesZero>;
+ def _S_ZERO : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesZero>;
+ def _D_ZERO : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesZero>;
- def : SVE_3_Op_Pat_SelZero<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _ZERO_H)>;
- def : SVE_3_Op_Pat_SelZero<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _ZERO_S)>;
- def : SVE_3_Op_Pat_SelZero<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _ZERO_D)>;
+ def : SVE_3_Op_Pat_SelZero<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_3_Op_Pat_SelZero<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_3_Op_Pat_SelZero<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _D_ZERO)>;
}
class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
@@ -2003,6 +2175,8 @@ class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_ftmad<string asm, SDPatternOperator op> {
@@ -2019,36 +2193,36 @@ multiclass sve_fp_ftmad<string asm, SDPatternOperator op> {
}
multiclass sve_fp_2op_i_p_zds_hfd<Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator ir_op = null_frag> {
- def _UNDEF_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, imm_ty, FalseLanesUndef>;
- def _UNDEF_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, imm_ty, FalseLanesUndef>;
- def _UNDEF_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, imm_ty, FalseLanesUndef>;
-
- def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, ir_op, nxv8i1, f16, A, 0, !cast<Instruction>(NAME # "_UNDEF_H")>;
- def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, ir_op, nxv8i1, f16, B, 1, !cast<Instruction>(NAME # "_UNDEF_H")>;
- def : SVE_2_Op_Fp_Imm_Pat<nxv4f16, ir_op, nxv4i1, f16, A, 0, !cast<Instruction>(NAME # "_UNDEF_H")>;
- def : SVE_2_Op_Fp_Imm_Pat<nxv4f16, ir_op, nxv4i1, f16, B, 1, !cast<Instruction>(NAME # "_UNDEF_H")>;
- def : SVE_2_Op_Fp_Imm_Pat<nxv2f16, ir_op, nxv2i1, f16, A, 0, !cast<Instruction>(NAME # "_UNDEF_H")>;
- def : SVE_2_Op_Fp_Imm_Pat<nxv2f16, ir_op, nxv2i1, f16, B, 1, !cast<Instruction>(NAME # "_UNDEF_H")>;
- def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, ir_op, nxv4i1, f32, A, 0, !cast<Instruction>(NAME # "_UNDEF_S")>;
- def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, ir_op, nxv4i1, f32, B, 1, !cast<Instruction>(NAME # "_UNDEF_S")>;
- def : SVE_2_Op_Fp_Imm_Pat<nxv2f32, ir_op, nxv2i1, f32, A, 0, !cast<Instruction>(NAME # "_UNDEF_S")>;
- def : SVE_2_Op_Fp_Imm_Pat<nxv2f32, ir_op, nxv2i1, f32, B, 1, !cast<Instruction>(NAME # "_UNDEF_S")>;
- def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, ir_op, nxv2i1, f64, A, 0, !cast<Instruction>(NAME # "_UNDEF_D")>;
- def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, ir_op, nxv2i1, f64, B, 1, !cast<Instruction>(NAME # "_UNDEF_D")>;
+ def _H_UNDEF : PredTwoOpImmPseudo<NAME # _H, ZPR16, imm_ty, FalseLanesUndef>;
+ def _S_UNDEF : PredTwoOpImmPseudo<NAME # _S, ZPR32, imm_ty, FalseLanesUndef>;
+ def _D_UNDEF : PredTwoOpImmPseudo<NAME # _D, ZPR64, imm_ty, FalseLanesUndef>;
+
+ def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, ir_op, nxv8i1, f16, A, 0, !cast<Instruction>(NAME # "_H_UNDEF")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv8f16, ir_op, nxv8i1, f16, B, 1, !cast<Instruction>(NAME # "_H_UNDEF")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f16, ir_op, nxv4i1, f16, A, 0, !cast<Instruction>(NAME # "_H_UNDEF")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f16, ir_op, nxv4i1, f16, B, 1, !cast<Instruction>(NAME # "_H_UNDEF")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f16, ir_op, nxv2i1, f16, A, 0, !cast<Instruction>(NAME # "_H_UNDEF")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f16, ir_op, nxv2i1, f16, B, 1, !cast<Instruction>(NAME # "_H_UNDEF")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, ir_op, nxv4i1, f32, A, 0, !cast<Instruction>(NAME # "_S_UNDEF")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv4f32, ir_op, nxv4i1, f32, B, 1, !cast<Instruction>(NAME # "_S_UNDEF")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f32, ir_op, nxv2i1, f32, A, 0, !cast<Instruction>(NAME # "_S_UNDEF")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f32, ir_op, nxv2i1, f32, B, 1, !cast<Instruction>(NAME # "_S_UNDEF")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, ir_op, nxv2i1, f64, A, 0, !cast<Instruction>(NAME # "_D_UNDEF")>;
+ def : SVE_2_Op_Fp_Imm_Pat<nxv2f64, ir_op, nxv2i1, f64, B, 1, !cast<Instruction>(NAME # "_D_UNDEF")>;
}
multiclass sve_fp_2op_i_p_zds_zeroing_hfd<Operand imm_ty, FPImmLeaf A, FPImmLeaf B, SDPatternOperator op> {
- def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, imm_ty, FalseLanesZero>;
- def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, imm_ty, FalseLanesZero>;
- def _ZERO_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, imm_ty, FalseLanesZero>;
+ def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, imm_ty, FalseLanesZero>;
+ def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, imm_ty, FalseLanesZero>;
+ def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, imm_ty, FalseLanesZero>;
let AddedComplexity = 2 in {
- def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv8f16, op, nxv8i1, f16, A, 0, !cast<Instruction>(NAME # "_ZERO_H")>;
- def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv8f16, op, nxv8i1, f16, B, 1, !cast<Instruction>(NAME # "_ZERO_H")>;
- def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv4f32, op, nxv4i1, f32, A, 0, !cast<Instruction>(NAME # "_ZERO_S")>;
- def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv4f32, op, nxv4i1, f32, B, 1, !cast<Instruction>(NAME # "_ZERO_S")>;
- def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv2f64, op, nxv2i1, f64, A, 0, !cast<Instruction>(NAME # "_ZERO_D")>;
- def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv2f64, op, nxv2i1, f64, B, 1, !cast<Instruction>(NAME # "_ZERO_D")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv8f16, op, nxv8i1, f16, A, 0, !cast<Instruction>(NAME # "_H_ZERO")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv8f16, op, nxv8i1, f16, B, 1, !cast<Instruction>(NAME # "_H_ZERO")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv4f32, op, nxv4i1, f32, A, 0, !cast<Instruction>(NAME # "_S_ZERO")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv4f32, op, nxv4i1, f32, B, 1, !cast<Instruction>(NAME # "_S_ZERO")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv2f64, op, nxv2i1, f64, A, 0, !cast<Instruction>(NAME # "_D_ZERO")>;
+ def : SVE_2_Op_Fp_Imm_Pat_Zero<nxv2f64, op, nxv2i1, f64, B, 1, !cast<Instruction>(NAME # "_D_ZERO")>;
}
}
@@ -2072,6 +2246,9 @@ class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
let Inst{12-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
@@ -2125,6 +2302,8 @@ class sve_fp_3op_p_zds_a<bits<2> sz, bits<2> opc, string asm, ZPRRegOp zprty>
let Constraints = "$Zda = $_Zda";
let ElementSize = zprty.ElementSize;
let DestructiveInstType = DestructiveTernaryCommWithRev;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_3op_p_zds_a<bits<2> opc, string asm, string Ps,
@@ -2165,6 +2344,8 @@ class sve_fp_3op_p_zds_b<bits<2> sz, bits<2> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm, SDPatternOperator op,
@@ -2181,12 +2362,6 @@ multiclass sve_fp_3op_p_zds_b<bits<2> opc, string asm, SDPatternOperator op,
def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_fp_3op_p_zds_zx {
- def _UNDEF_H : PredThreeOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
- def _UNDEF_S : PredThreeOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
- def _UNDEF_D : PredThreeOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
-}
-
//===----------------------------------------------------------------------===//
// SVE Floating Point Multiply-Add - Indexed Group
//===----------------------------------------------------------------------===//
@@ -2209,6 +2384,8 @@ class sve_fp_fma_by_indexed_elem<bits<2> sz, bits<2> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2p1_fp_bfma_by_indexed_elem<string asm, bits<2> opc> {
@@ -2271,6 +2448,9 @@ class sve_fp_fmul_by_indexed_elem<bits<2> sz, bit o2, string asm, ZPRRegOp zprty
let Inst{10} = 0b0;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2p1_fp_bfmul_by_indexed_elem<string asm> {
@@ -2339,6 +2519,8 @@ class sve_fp_fcmla<bits<2> sz, string asm, ZPRRegOp zprty>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_fcmla<string asm, SDPatternOperator op> {
@@ -2379,6 +2561,8 @@ class sve_fp_fcmla_by_indexed_elem<bits<2> sz, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_fcmla_by_indexed_elem<string asm, SDPatternOperator op> {
@@ -2427,6 +2611,8 @@ class sve_fp_fcadd<bits<2> sz, string asm, ZPRRegOp zprty>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_fcadd<string asm, SDPatternOperator op> {
@@ -2465,6 +2651,8 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2_fp_convert_down_narrow<string asm, string op> {
@@ -2514,6 +2702,8 @@ class sve2_fp_pairwise_pred<bits<2> sz, bits<3> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2_fp_pairwise_pred<bits<3> opc, string asm,
@@ -2557,6 +2747,8 @@ class sve2_fp_mla_long_by_indexed_elem<bits<3> opc, string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2_fp_mla_long_by_indexed_elem<bits<3> opc, string asm,
@@ -2592,6 +2784,8 @@ class sve2_fp_mla_long<bits<3> opc, string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2_fp_mla_long<bits<3> opc, string asm, ValueType OutVT,
@@ -2620,6 +2814,8 @@ class sve_int_arith_vl<bit opc, string asm, bit streaming_sve = 0b0>
let Inst{11} = streaming_sve;
let Inst{10-5} = imm6;
let Inst{4-0} = Rd;
+
+ let hasSideEffects = 0;
}
class sve_int_read_vl_a<bit op, bits<5> opc2, string asm, bit streaming_sve = 0b0>
@@ -2638,6 +2834,7 @@ class sve_int_read_vl_a<bit op, bits<5> opc2, string asm, bit streaming_sve = 0b
let Inst{10-5} = imm6;
let Inst{4-0} = Rd;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -2662,6 +2859,8 @@ class sve_int_perm_bin_perm_zz<bits<3> opc, bits<2> sz8_64, string asm,
let Inst{12-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_bin_perm_zz<bits<3> opc, string asm,
@@ -2711,6 +2910,8 @@ class sve_fp_2op_p_zd<bits<7> opc, string asm, RegisterOperand i_zprtype,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveUnaryPassthru;
let ElementSize = Sz;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
@@ -2722,16 +2923,10 @@ multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
def NAME : sve_fp_2op_p_zd<opc, asm, i_zprtype, o_zprtype, Sz>,
SVEPseudo2Instr<NAME, 1>;
// convert vt1 to a packed type for the intrinsic patterns
- defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
- !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,
- !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32,
- 1 : vt1);
+ defvar packedvt1 = SVEContainerVT<vt1>.Value;
// convert vt3 to a packed type for the intrinsic patterns
- defvar packedvt3 = !cond(!eq(!cast<string>(vt3), "nxv2f16"): nxv8f16,
- !eq(!cast<string>(vt3), "nxv4f16"): nxv8f16,
- !eq(!cast<string>(vt3), "nxv2f32"): nxv4f32,
- 1 : vt3);
+ defvar packedvt3 = SVEContainerVT<vt3>.Value;
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, packedvt3, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
@@ -2751,10 +2946,7 @@ multiclass sve_fp_2op_p_zdr<bits<7> opc, string asm,
SVEPseudo2Instr<NAME, 1>;
// convert vt1 to a packed type for the intrinsic patterns
- defvar packedvt1 = !cond(!eq(!cast<string>(vt1), "nxv2f16"): nxv8f16,
- !eq(!cast<string>(vt1), "nxv4f16"): nxv8f16,
- !eq(!cast<string>(vt1), "nxv2f32"): nxv4f32,
- 1 : vt1);
+ defvar packedvt1 = SVEContainerVT<vt1>.Value;
def : SVE_3_Op_Pat<packedvt1, int_op, packedvt1, vt2, vt3, !cast<Instruction>(NAME)>;
def : SVE_1_Op_Passthru_Round_Pat<vt1, ir_op, vt2, vt3, !cast<Instruction>(NAME)>;
@@ -2779,28 +2971,41 @@ multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
- def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _UNDEF_H)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _UNDEF_H)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _UNDEF_H)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _UNDEF_S)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _UNDEF_S)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _UNDEF_D)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D_UNDEF)>;
}
-multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {
- def _H : sve_fp_2op_p_zd<0b0011010, asm, ZPR16, ZPR16, ElementSizeH>;
- def _S : sve_fp_2op_p_zd<0b0011100, asm, ZPR32, ZPR32, ElementSizeS>;
- def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>;
+multiclass sve2_fp_flogb<string asm, string Ps, SDPatternOperator op> {
+ def _H : sve_fp_2op_p_zd<0b0011010, asm, ZPR16, ZPR16, ElementSizeH>,
+ SVEPseudo2Instr<Ps # _H, 1>;
+ def _S : sve_fp_2op_p_zd<0b0011100, asm, ZPR32, ZPR32, ElementSizeS>,
+ SVEPseudo2Instr<Ps # _S, 1>;
+ def _D : sve_fp_2op_p_zd<0b0011110, asm, ZPR64, ZPR64, ElementSizeD>,
+ SVEPseudo2Instr<Ps # _D, 1>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
}
+multiclass sve2_fp_un_pred_zeroing_hsd<SDPatternOperator op> {
+ def _H_ZERO : PredOneOpPassthruPseudo<NAME # _H, ZPR16, FalseLanesZero>;
+ def _S_ZERO : PredOneOpPassthruPseudo<NAME # _S, ZPR32, FalseLanesZero>;
+ def _D_ZERO : PredOneOpPassthruPseudo<NAME # _D, ZPR64, FalseLanesZero>;
+
+ def : SVE_1_Op_PassthruZero_Pat<nxv8i16, op, nxv8i1, nxv8f16, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_1_Op_PassthruZero_Pat<nxv4i32, op, nxv4i1, nxv4f32, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_1_Op_PassthruZero_Pat<nxv2i64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _D_ZERO)>;
+}
+
multiclass sve2_fp_convert_down_odd_rounding<string asm, string op> {
def _DtoS : sve_fp_2op_p_zd<0b0001010, asm, ZPR64, ZPR32, ElementSizeD>;
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
@@ -2825,6 +3030,9 @@ class sve_fp_2op_u_zd<bits<2> sz, bits<3> opc, string asm,
let Inst{15-10} = 0b001100;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_2op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
@@ -2861,6 +3069,7 @@ class sve_int_bin_pred_arit_log<bits<2> sz8_64, bits<2> fmt, bits<3> opc,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_pred_log<bits<3> opc, string asm, string Ps,
@@ -2987,13 +3196,19 @@ class sve_int_mladdsub_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
-multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_mladdsub_vvv_pred<0b00, opc, asm, ZPR8>;
- def _H : sve_int_mladdsub_vvv_pred<0b01, opc, asm, ZPR16>;
- def _S : sve_int_mladdsub_vvv_pred<0b10, opc, asm, ZPR32>;
- def _D : sve_int_mladdsub_vvv_pred<0b11, opc, asm, ZPR64>;
+multiclass sve_int_mladdsub_vvv_pred<bits<1> opc, string asm, SDPatternOperator op,
+ string revname, bit isReverseInstr=0> {
+ def _B : sve_int_mladdsub_vvv_pred<0b00, opc, asm, ZPR8>,
+ SVEInstr2Rev<NAME # _B, revname # _B, isReverseInstr>;
+ def _H : sve_int_mladdsub_vvv_pred<0b01, opc, asm, ZPR16>,
+ SVEInstr2Rev<NAME # _H, revname # _H, isReverseInstr>;
+ def _S : sve_int_mladdsub_vvv_pred<0b10, opc, asm, ZPR32>,
+ SVEInstr2Rev<NAME # _S, revname # _S, isReverseInstr>;
+ def _D : sve_int_mladdsub_vvv_pred<0b11, opc, asm, ZPR64>,
+ SVEInstr2Rev<NAME # _D, revname # _D, isReverseInstr>;
def : SVE_4_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_4_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
@@ -3022,15 +3237,21 @@ class sve_int_mlas_vvv_pred<bits<2> sz8_64, bits<1> opc, string asm,
let Inst{4-0} = Zda;
let Constraints = "$Zda = $_Zda";
- let DestructiveInstType = DestructiveOther;
+ let DestructiveInstType = DestructiveTernaryCommWithRev;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
-multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op> {
- def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>;
- def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>;
- def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>;
- def _D : sve_int_mlas_vvv_pred<0b11, opc, asm, ZPR64>;
+multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op,
+ string Ps, string revname, bit isReverseInstr=0> {
+ def _B : sve_int_mlas_vvv_pred<0b00, opc, asm, ZPR8>,
+ SVEPseudo2Instr<Ps # _B, 1>, SVEInstr2Rev<NAME # _B, revname # _B, isReverseInstr>;
+ def _H : sve_int_mlas_vvv_pred<0b01, opc, asm, ZPR16>,
+ SVEPseudo2Instr<Ps # _H, 1>, SVEInstr2Rev<NAME # _H, revname # _H, isReverseInstr>;
+ def _S : sve_int_mlas_vvv_pred<0b10, opc, asm, ZPR32>,
+ SVEPseudo2Instr<Ps # _S, 1>, SVEInstr2Rev<NAME # _S, revname # _S, isReverseInstr>;
+ def _D : sve_int_mlas_vvv_pred<0b11, opc, asm, ZPR64>,
+ SVEPseudo2Instr<Ps # _D, 1>, SVEInstr2Rev<NAME # _D, revname # _D, isReverseInstr>;
def : SVE_4_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_4_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
@@ -3038,6 +3259,21 @@ multiclass sve_int_mlas_vvv_pred<bits<1> opc, string asm, SDPatternOperator op>
def : SVE_4_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
+//class for generating pseudo for SVE MLA/MAD/MLS/MSB
+multiclass sve_int_3op_p_mladdsub<SDPatternOperator op> {
+ def _B_UNDEF : PredThreeOpPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
+ def _H_UNDEF : PredThreeOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _S_UNDEF : PredThreeOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredThreeOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
+
+ let AddedComplexity = 9 in {
+ def : SVE_4_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B_UNDEF)>;
+ def : SVE_4_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H_UNDEF)>;
+ def : SVE_4_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S_UNDEF)>;
+ def : SVE_4_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D_UNDEF)>;
+ }
+}
+
//===----------------------------------------------------------------------===//
// SVE2 Integer Multiply-Add - Unpredicated Group
//===----------------------------------------------------------------------===//
@@ -3061,6 +3297,7 @@ class sve2_int_mla<bits<2> sz, bits<5> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_mla<bit S, string asm, SDPatternOperator op> {
@@ -3106,6 +3343,7 @@ class sve2_int_mla_by_indexed_elem<bits<2> sz, bits<6> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_mla_by_indexed_elem<bits<2> opc, bit S, string asm,
@@ -3184,6 +3422,7 @@ class sve_intx_dot<bit sz, bit U, string asm, ZPRRegOp zprty1,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
}
multiclass sve_intx_dot<bit opc, string asm, SDPatternOperator op> {
@@ -3216,6 +3455,7 @@ class sve_intx_dot_by_indexed_elem<bit sz, bit U, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
}
multiclass sve_intx_dot_by_indexed_elem<bit opc, string asm,
@@ -3262,6 +3502,7 @@ class sve2_complex_int_arith<bits<2> sz, bits<4> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_cintx_dot<string asm, SDPatternOperator op> {
@@ -3316,6 +3557,7 @@ class sve2_complex_int_arith_indexed<bits<2> sz, bits<4> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_cintx_dot_by_indexed_elem<string asm, SDPatternOperator op> {
@@ -3386,6 +3628,8 @@ class sve2_int_mul<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
let Inst{12-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op,
@@ -3400,10 +3644,10 @@ multiclass sve2_int_mul<bits<3> opc, string asm, SDPatternOperator op,
def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
- def : SVE_2_Op_Pred_All_Active<nxv16i8, op_pred, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pred_All_Active<nxv8i16, op_pred, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pred_All_Active<nxv4i32, op_pred, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pred_All_Active<nxv2i64, op_pred, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pred_Any_Predicate<nxv16i8, op_pred, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pred_Any_Predicate<nxv8i16, op_pred, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pred_Any_Predicate<nxv4i32, op_pred, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pred_Any_Predicate<nxv2i64, op_pred, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve2_int_mul_single<bits<3> opc, string asm, SDPatternOperator op> {
@@ -3430,6 +3674,8 @@ class sve2_int_mul_by_indexed_elem<bits<2> sz, bits<4> opc, string asm,
let Inst{13-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_int_mul_by_indexed_elem<bits<4> opc, string asm,
@@ -3506,6 +3752,7 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve2_int_arith_pred<bits<6> opc, string asm, SDPatternOperator op,
@@ -3548,6 +3795,7 @@ class sve2_int_sadd_long_accum_pairwise<bits<2> sz, bit U, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty1.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve2_int_sadd_long_accum_pairwise<bit U, string asm, SDPatternOperator op> {
@@ -3583,6 +3831,7 @@ class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveUnaryPassthru;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve2_int_un_pred_arit_s<bits<3> opc, string asm,
@@ -3592,9 +3841,9 @@ multiclass sve2_int_un_pred_arit_s<bits<3> opc, string asm,
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
- def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- defm : SVE_3_Op_Undef_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ defm : SVE_3_Op_Undef_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
}
multiclass sve2_int_un_pred_arit<bits<3> opc, string asm, SDPatternOperator op> {
@@ -3612,15 +3861,15 @@ multiclass sve2_int_un_pred_arit<bits<3> opc, string asm, SDPatternOperator op>
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
- def _UNDEF_B : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
- def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
- defm : SVE_3_Op_Undef_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
- defm : SVE_3_Op_Undef_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
- defm : SVE_3_Op_Undef_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
- defm : SVE_3_Op_Undef_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
+ defm : SVE_3_Op_Undef_Pat<nxv16i8, op, nxv16i8, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
+ defm : SVE_3_Op_Undef_Pat<nxv8i16, op, nxv8i16, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_3_Op_Undef_Pat<nxv4i32, op, nxv4i32, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_3_Op_Undef_Pat<nxv2i64, op, nxv2i64, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
//===----------------------------------------------------------------------===//
@@ -3642,6 +3891,8 @@ class sve2_wide_int_arith<bits<2> sz, bits<5> opc, string asm,
let Inst{14-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_wide_int_arith_long<bits<5> opc, string asm,
@@ -3706,6 +3957,8 @@ class sve2_misc<bits<2> sz, bits<4> opc, string asm,
let Inst{13-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_misc_bitwise<bits<4> opc, string asm, SDPatternOperator op> {
@@ -3750,6 +4003,7 @@ class sve2_bitwise_xor_interleaved<bits<2> sz, bits<1> opc, string asm,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_bitwise_xor_interleaved<bit opc, string asm,
@@ -3783,6 +4037,8 @@ class sve2_bitwise_shift_left_long<bits<3> tsz8_64, bits<2> opc, string asm,
let Inst{11-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_bitwise_shift_left_long<bits<2> opc, string asm,
@@ -3825,6 +4081,7 @@ class sve2_int_bin_shift_imm<bits<4> tsz8_64, bit opc, string asm,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
}
multiclass sve2_int_bin_shift_imm_left<bit opc, string asm,
@@ -3888,6 +4145,7 @@ class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm,
@@ -3934,6 +4192,7 @@ class sve2_int_cadd<bits<2> sz, bit opc, string asm, ZPRRegOp zprty>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_cadd<bit opc, string asm, SDPatternOperator op> {
@@ -3967,6 +4226,7 @@ class sve2_int_absdiff_accum<bits<2> sz, bits<4> opc, string asm,
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_absdiff_accum<bit opc, string asm, SDPatternOperator op> {
@@ -4026,6 +4286,8 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
let Inst{10} = 0b0;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm,
@@ -4066,6 +4328,7 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
}
multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm,
@@ -4101,6 +4364,8 @@ class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
let Inst{10} = 0b0; // Top
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_int_addsub_narrow_high_bottom<bits<2> opc, string asm,
@@ -4132,6 +4397,7 @@ class sve2_int_addsub_narrow_high_top<bits<2> sz, bits<2> opc, string asm,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
}
multiclass sve2_int_addsub_narrow_high_top<bits<2> opc, string asm,
@@ -4160,6 +4426,8 @@ class sve2_int_sat_extract_narrow_bottom<bits<3> tsz8_64, bits<2> opc, string as
let Inst{10} = 0b0;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_int_sat_extract_narrow_bottom<bits<2> opc, string asm,
@@ -4190,6 +4458,7 @@ class sve2_int_sat_extract_narrow_top<bits<3> tsz8_64, bits<2> opc, string asm,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
}
multiclass sve2_int_sat_extract_narrow_top<bits<2> opc, string asm,
@@ -4229,6 +4498,7 @@ class sve_int_un_pred_arit<bits<2> sz8_64, bits<4> opc,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveUnaryPassthru;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
@@ -4247,15 +4517,15 @@ multiclass sve_int_un_pred_arit_0<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
- def _UNDEF_B : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
- def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
@@ -4271,13 +4541,13 @@ multiclass sve_int_un_pred_arit_0_h<bits<3> opc, string asm,
def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i8, !cast<Instruction>(NAME # _S)>;
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i8, !cast<Instruction>(NAME # _D)>;
- def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
- defm : SVE_InReg_Extend_PassthruUndef<nxv8i16, op, nxv8i1, nxv8i8, !cast<Pseudo>(NAME # _UNDEF_H)>;
- defm : SVE_InReg_Extend_PassthruUndef<nxv4i32, op, nxv4i1, nxv4i8, !cast<Pseudo>(NAME # _UNDEF_S)>;
- defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i8, !cast<Pseudo>(NAME # _UNDEF_D)>;
+ defm : SVE_InReg_Extend_PassthruUndef<nxv8i16, op, nxv8i1, nxv8i8, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_InReg_Extend_PassthruUndef<nxv4i32, op, nxv4i1, nxv4i8, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i8, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
@@ -4290,11 +4560,11 @@ multiclass sve_int_un_pred_arit_0_w<bits<3> opc, string asm,
def : SVE_InReg_Extend<nxv4i32, op, nxv4i1, nxv4i16, !cast<Instruction>(NAME # _S)>;
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i16, !cast<Instruction>(NAME # _D)>;
- def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
- defm : SVE_InReg_Extend_PassthruUndef<nxv4i32, op, nxv4i1, nxv4i16, !cast<Pseudo>(NAME # _UNDEF_S)>;
- defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i16, !cast<Pseudo>(NAME # _UNDEF_D)>;
+ defm : SVE_InReg_Extend_PassthruUndef<nxv4i32, op, nxv4i1, nxv4i16, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i16, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm,
@@ -4304,9 +4574,9 @@ multiclass sve_int_un_pred_arit_0_d<bits<3> opc, string asm,
def : SVE_InReg_Extend<nxv2i64, op, nxv2i1, nxv2i32, !cast<Instruction>(NAME # _D)>;
- def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
- defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i32, !cast<Pseudo>(NAME # _UNDEF_D)>;
+ defm : SVE_InReg_Extend_PassthruUndef<nxv2i64, op, nxv2i1, nxv2i32, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
@@ -4325,15 +4595,15 @@ multiclass sve_int_un_pred_arit_1<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
- def _UNDEF_B : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
- def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4351,16 +4621,16 @@ multiclass sve_int_un_pred_arit_1_fp<bits<3> opc, string asm, SDPatternOperator
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
- def _UNDEF_H : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _UNDEF_S : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _UNDEF_D : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _UNDEF_D)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
//===----------------------------------------------------------------------===//
@@ -4381,6 +4651,7 @@ class sve_int_dup_imm<bits<2> sz8_64, string asm,
let Inst{12-5} = imm{7-0}; // imm8
let Inst{4-0} = Zd;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -4422,6 +4693,7 @@ class sve_int_dup_fpimm<bits<2> sz8_64, Operand fpimmtype,
let Inst{12-5} = imm8;
let Inst{4-0} = Zd;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -4458,6 +4730,7 @@ class sve_int_arith_imm0<bits<2> sz8_64, bits<3> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_arith_imm0<bits<3> opc, string asm, SDPatternOperator op> {
@@ -4490,6 +4763,7 @@ class sve_int_arith_imm<bits<2> sz8_64, bits<6> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_arith_imm1<bits<2> opc, string asm, SDPatternOperator op> {
@@ -4498,10 +4772,10 @@ multiclass sve_int_arith_imm1<bits<2> opc, string asm, SDPatternOperator op> {
def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, simm8_32b>;
def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, simm8_32b>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_Arith_Any_Predicate<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Any_Predicate<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Any_Predicate<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Any_Predicate<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_arith_imm1_unsigned<bits<2> opc, string asm, SDPatternOperator op> {
@@ -4510,10 +4784,10 @@ multiclass sve_int_arith_imm1_unsigned<bits<2> opc, string asm, SDPatternOperato
def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, imm0_255>;
def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, imm0_255>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithUImm8Pat, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithUImm16Pat, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithUImm32Pat, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithUImm64Pat, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_Arith_Any_Predicate<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithUImm8Pat, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Any_Predicate<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithUImm16Pat, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Any_Predicate<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithUImm32Pat, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Any_Predicate<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithUImm64Pat, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {
@@ -4522,10 +4796,10 @@ multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {
def _S : sve_int_arith_imm<0b10, 0b110000, asm, ZPR32, simm8_32b>;
def _D : sve_int_arith_imm<0b11, 0b110000, asm, ZPR64, simm8_32b>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _B)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _H)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _S)>;
- def : SVE_1_Op_Imm_Arith_All_Active<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_1_Op_Imm_Arith_Any_Predicate<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_Imm_Arith_Any_Predicate<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_Imm_Arith_Any_Predicate<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithSImmPat32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_Imm_Arith_Any_Predicate<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithSImmPat64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -4547,6 +4821,8 @@ class sve_int_bin_cons_log<bits<2> opc, string asm>
let Inst{15-10} = 0b001100;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_cons_log<bits<2> opc, string asm, SDPatternOperator op> {
@@ -4585,6 +4861,7 @@ class sve2_int_bitwise_ternary_op_d<bits<3> opc, string asm>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_bitwise_ternary_op<bits<3> opc, string asm, SDPatternOperator op,
@@ -4631,6 +4908,7 @@ class sve2_int_rotate_right_imm<bits<4> tsz8_64, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve2_int_rotate_right_imm<string asm, SDPatternOperator op> {
@@ -4676,6 +4954,7 @@ class sve_int_dup_fpimm_pred<bits<2> sz, Operand fpimmtype,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_dup_fpimm_pred<string asm> {
@@ -4711,6 +4990,7 @@ class sve_int_dup_imm_pred<bits<2> sz8_64, bit m, string asm,
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_dup_imm_pred_merge_inst<
@@ -4816,6 +5096,7 @@ class sve_int_cmp<bit cmp_1, bits<2> sz8_64, bits<3> opc, string asm,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isPTestLike = 1;
}
@@ -4825,9 +5106,9 @@ multiclass SVE_SETCC_Pat<CondCode cc, CondCode invcc, ValueType predvt,
(cmp $Op1, $Op2, $Op3)>;
def : Pat<(predvt (AArch64setcc_z predvt:$Op1, intvt:$Op2, intvt:$Op3, invcc)),
(cmp $Op1, $Op3, $Op2)>;
- def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))),
+ def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, cc))),
(cmp $Pg, $Op2, $Op3)>;
- def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))),
+ def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op2, intvt:$Op3, invcc))),
(cmp $Pg, $Op3, $Op2)>;
}
@@ -4837,9 +5118,9 @@ multiclass SVE_SETCC_Pat_With_Zero<CondCode cc, CondCode invcc, ValueType predvt
(cmp $Op1, $Op2)>;
def : Pat<(predvt (AArch64setcc_z predvt:$Op1, (SVEDup0), intvt:$Op2, invcc)),
(cmp $Op1, $Op2)>;
- def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))),
+ def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), intvt:$Op1, (SVEDup0), cc))),
(cmp $Pg, $Op1)>;
- def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))),
+ def : Pat<(predvt (and predvt:$Pg, (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)), (SVEDup0), intvt:$Op1, invcc))),
(cmp $Pg, $Op1)>;
}
@@ -4905,6 +5186,7 @@ class sve_int_scmp_vi<bits<2> sz8_64, bits<3> opc, string asm, PPRRegOp pprty,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isPTestLike = 1;
}
@@ -4922,13 +5204,13 @@ multiclass SVE_SETCC_Imm_Pat<CondCode cc, CondCode commuted_cc,
commuted_cc)),
(cmp $Pg, $Zs1, immtype:$imm)>;
def : Pat<(predvt (and predvt:$Pg,
- (AArch64setcc_z (predvt (AArch64ptrue 31)),
+ (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)),
(intvt ZPR:$Zs1),
(intvt (splat_vector (immtype:$imm))),
cc))),
(cmp $Pg, $Zs1, immtype:$imm)>;
def : Pat<(predvt (and predvt:$Pg,
- (AArch64setcc_z (predvt (AArch64ptrue 31)),
+ (AArch64setcc_z_oneuse (predvt (AArch64ptrue 31)),
(intvt (splat_vector (immtype:$imm))),
(intvt ZPR:$Zs1),
commuted_cc))),
@@ -4978,6 +5260,7 @@ class sve_int_ucmp_vi<bits<2> sz8_64, bits<2> opc, string asm, PPRRegOp pprty,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isPTestLike = 1;
}
@@ -5020,6 +5303,7 @@ class sve_int_cterm<bit sz, bit opc, string asm, RegisterClass rt>
let Inst{3-0} = 0b0000;
let Defs = [NZCV];
+ let hasSideEffects = 0;
}
class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
@@ -5042,6 +5326,7 @@ class sve_int_while_rr<bits<2> sz8_64, bits<4> opc, string asm,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isWhile = 1;
}
@@ -5088,6 +5373,7 @@ class sve2_int_while_rr<bits<2> sz8_64, bits<1> rw, string asm,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isWhile = 1;
}
@@ -5124,6 +5410,9 @@ class sve_fp_fast_red<bits<2> sz, bits<3> opc, string asm,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Vd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_fast_red<bits<3> opc, string asm, SDPatternOperator op> {
@@ -5163,6 +5452,8 @@ class sve_fp_2op_p_vd<bits<2> sz, bits<3> opc, string asm,
let Inst{4-0} = Vdn;
let Constraints = "$Vdn = $_Vdn";
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_2op_p_vd<bits<3> opc, string asm, SDPatternOperator op> {
@@ -5203,6 +5494,9 @@ class sve_fp_3op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
let Inst{9-5} = Zn;
let Inst{4} = opc{0};
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_3op_p_pd<bits<3> opc, string asm, SDPatternOperator op> {
@@ -5259,6 +5553,9 @@ class sve_fp_2op_p_pd<bits<2> sz, bits<3> opc, string asm, PPRRegOp pprty,
let Inst{9-5} = Zn;
let Inst{4} = opc{0};
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_2op_p_pd<bits<3> opc, string asm,
@@ -5312,6 +5609,7 @@ class sve_int_index_ii<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{9-5} = imm5;
let Inst{4-0} = Zd;
+ let hasSideEffects = 0;
let isReMaterializable = 1;
}
@@ -5356,6 +5654,8 @@ class sve_int_index_ir<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{15-10} = 0b010010;
let Inst{9-5} = imm5;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_index_ir<string asm, SDPatternOperator mulop, SDPatternOperator muloneuseop> {
@@ -5423,6 +5723,8 @@ class sve_int_index_ri<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{15-10} = 0b010001;
let Inst{9-5} = Rn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_index_ri<string asm> {
@@ -5457,6 +5759,8 @@ class sve_int_index_rr<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Inst{15-10} = 0b010011;
let Inst{9-5} = Rn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_index_rr<string asm, SDPatternOperator mulop> {
@@ -5514,6 +5818,7 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveBinaryImm;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string Ps,
@@ -5551,15 +5856,15 @@ multiclass sve_int_bin_pred_shift_imm_left_dup<bits<4> opc, string asm,
}
multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd<SDPatternOperator op> {
- def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>;
- def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
- def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, tvecshiftL32, FalseLanesZero>;
- def _ZERO_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, tvecshiftL64, FalseLanesZero>;
+ def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, tvecshiftL8, FalseLanesZero>;
+ def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
+ def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, tvecshiftL32, FalseLanesZero>;
+ def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, tvecshiftL64, FalseLanesZero>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _ZERO_B)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _ZERO_H)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _ZERO_S)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _ZERO_D)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8, !cast<Pseudo>(NAME # _B_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _D_ZERO)>;
}
multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm, string Ps,
@@ -5597,15 +5902,15 @@ multiclass sve_int_bin_pred_shift_imm_right_dup<bits<4> opc, string asm,
}
multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd<SDPatternOperator op = null_frag> {
- def _ZERO_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftR8, FalseLanesZero>;
- def _ZERO_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftR16, FalseLanesZero>;
- def _ZERO_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftR32, FalseLanesZero>;
- def _ZERO_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftR64, FalseLanesZero>;
+ def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, vecshiftR8, FalseLanesZero>;
+ def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, vecshiftR16, FalseLanesZero>;
+ def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, vecshiftR32, FalseLanesZero>;
+ def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, vecshiftR64, FalseLanesZero>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftR8, !cast<Pseudo>(NAME # _ZERO_B)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftR16, !cast<Pseudo>(NAME # _ZERO_H)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftR32, !cast<Pseudo>(NAME # _ZERO_S)>;
- def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftR64, !cast<Pseudo>(NAME # _ZERO_D)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftR8, !cast<Pseudo>(NAME # _B_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1, nxv8i16, tvecshiftR16, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1, nxv4i32, tvecshiftR32, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1, nxv2i64, tvecshiftR64, !cast<Pseudo>(NAME # _D_ZERO)>;
}
class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
@@ -5630,6 +5935,7 @@ class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_pred_shift<bits<3> opc, string asm, string Ps,
@@ -5651,15 +5957,29 @@ multiclass sve_int_bin_pred_shift<bits<3> opc, string asm, string Ps,
}
multiclass sve_int_bin_pred_zeroing_bhsd<SDPatternOperator op> {
- def _ZERO_B : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesZero>;
- def _ZERO_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesZero>;
- def _ZERO_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesZero>;
- def _ZERO_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesZero>;
+ def _B_ZERO : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesZero>;
+ def _H_ZERO : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesZero>;
+ def _S_ZERO : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesZero>;
+ def _D_ZERO : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesZero>;
+
+ def : SVE_3_Op_Pat_SelZero<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Pseudo>(NAME # _B_ZERO)>;
+ def : SVE_3_Op_Pat_SelZero<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_3_Op_Pat_SelZero<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_3_Op_Pat_SelZero<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _D_ZERO)>;
+}
+
+multiclass sve_int_bin_pred_imm_zeroing_bhsd<SDPatternOperator op,
+ ComplexPattern imm_b, ComplexPattern imm_h,
+ ComplexPattern imm_s, ComplexPattern imm_d> {
+ def _B_ZERO : PredTwoOpImmPseudo<NAME # _B, ZPR8, Operand<i32>, FalseLanesZero>;
+ def _H_ZERO : PredTwoOpImmPseudo<NAME # _H, ZPR16, Operand<i32>, FalseLanesZero>;
+ def _S_ZERO : PredTwoOpImmPseudo<NAME # _S, ZPR32, Operand<i32>, FalseLanesZero>;
+ def _D_ZERO : PredTwoOpImmPseudo<NAME # _D, ZPR64, Operand<i32>, FalseLanesZero>;
- def : SVE_3_Op_Pat_SelZero<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Pseudo>(NAME # _ZERO_B)>;
- def : SVE_3_Op_Pat_SelZero<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Pseudo>(NAME # _ZERO_H)>;
- def : SVE_3_Op_Pat_SelZero<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _ZERO_S)>;
- def : SVE_3_Op_Pat_SelZero<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _ZERO_D)>;
+ def : SVE_2_Op_Imm_Pat_Zero<nxv16i8, op, nxv16i1, i32, imm_b, !cast<Pseudo>(NAME # _B_ZERO)>;
+ def : SVE_2_Op_Imm_Pat_Zero<nxv8i16, op, nxv8i1, i32, imm_h, !cast<Pseudo>(NAME # _H_ZERO)>;
+ def : SVE_2_Op_Imm_Pat_Zero<nxv4i32, op, nxv4i1, i32, imm_s, !cast<Pseudo>(NAME # _S_ZERO)>;
+ def : SVE_2_Op_Imm_Pat_Zero<nxv2i64, op, nxv2i1, i64, imm_d, !cast<Pseudo>(NAME # _D_ZERO)>;
}
multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm,
@@ -5694,6 +6014,8 @@ class sve_int_bin_cons_shift_wide<bits<2> sz8_64, bits<2> opc, string asm,
let Inst{11-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_cons_shift_wide<bits<2> opc, string asm, SDPatternOperator op> {
@@ -5724,6 +6046,8 @@ class sve_int_bin_cons_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
let Inst{11-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm,
@@ -5740,10 +6064,10 @@ multiclass sve_int_bin_cons_shift_imm_left<bits<2> opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_Shift_DupImm_All_Active_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmL8, !cast<Instruction>(NAME # _B)>;
- def : SVE_Shift_DupImm_All_Active_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmL16, !cast<Instruction>(NAME # _H)>;
- def : SVE_Shift_DupImm_All_Active_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmL32, !cast<Instruction>(NAME # _S)>;
- def : SVE_Shift_DupImm_All_Active_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmL64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_Shift_DupImm_Any_Predicate_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_Shift_DupImm_Any_Predicate_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_Shift_DupImm_Any_Predicate_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_Shift_DupImm_Any_Predicate_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmL64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm,
@@ -5760,10 +6084,10 @@ multiclass sve_int_bin_cons_shift_imm_right<bits<2> opc, string asm,
let Inst{20-19} = imm{4-3};
}
- def : SVE_Shift_DupImm_All_Active_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmR8, !cast<Instruction>(NAME # _B)>;
- def : SVE_Shift_DupImm_All_Active_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmR16, !cast<Instruction>(NAME # _H)>;
- def : SVE_Shift_DupImm_All_Active_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmR32, !cast<Instruction>(NAME # _S)>;
- def : SVE_Shift_DupImm_All_Active_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmR64, !cast<Instruction>(NAME # _D)>;
+ def : SVE_Shift_DupImm_Any_Predicate_Pat<nxv16i8, op, nxv16i1, i32, SVEShiftImmR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_Shift_DupImm_Any_Predicate_Pat<nxv8i16, op, nxv8i1, i32, SVEShiftImmR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_Shift_DupImm_Any_Predicate_Pat<nxv4i32, op, nxv4i1, i32, SVEShiftImmR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_Shift_DupImm_Any_Predicate_Pat<nxv2i64, op, nxv2i1, i64, SVEShiftImmR64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -5790,6 +6114,7 @@ class sve_mem_cst_si<bits<2> msz, bits<2> esz, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5826,6 +6151,7 @@ class sve_mem_est_si<bits<2> sz, bits<2> nregs, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5857,6 +6183,7 @@ class sve_mem_128b_est_si<bits<2> nregs, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5888,6 +6215,7 @@ class sve_mem_est_ss<bits<2> sz, bits<2> nregs, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5911,6 +6239,7 @@ class sve_mem_128b_est_ss<bits<2> nregs, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5933,6 +6262,7 @@ class sve_mem_cst_ss_base<bits<4> dtype, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5963,6 +6293,7 @@ class sve_mem_cstnt_si<bits<2> msz, string asm, RegisterOperand VecList>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -5997,6 +6328,7 @@ class sve_mem_cstnt_ss_base<bits<2> msz, string asm, RegisterOperand listty,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6027,6 +6359,7 @@ class sve2_mem_sstnt_vs_base<bits<3> opc, string asm,
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6083,6 +6416,7 @@ class sve_mem_sst_sv<bits<3> opc, bit xs, bit scaled, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6186,6 +6520,7 @@ class sve_mem_sst_sv2<bits<2> msz, bit scaled, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6234,6 +6569,7 @@ class sve_mem_sst_vi<bits<3> opc, string asm, ZPRRegOp zprty,
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6286,6 +6622,7 @@ class sve_mem_z_spill<string asm>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6312,6 +6649,7 @@ class sve_mem_p_spill<string asm>
let Inst{4} = 0b0;
let Inst{3-0} = Pt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -6327,10 +6665,11 @@ multiclass sve_mem_p_spill<string asm> {
//===----------------------------------------------------------------------===//
class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm,
- PPRRegOp pprty>
+ PPRRegOp pprty, SDPatternOperator op>
: I<(outs pprty:$Pd), (ins pprty:$Pn, pprty:$Pm),
asm, "\t$Pd, $Pn, $Pm",
- "", []>, Sched<[]> {
+ "",
+ [(set nxv16i1:$Pd, (op nxv16i1:$Pn, nxv16i1:$Pm))]>, Sched<[]> {
bits<4> Pd;
bits<4> Pm;
bits<4> Pn;
@@ -6344,19 +6683,23 @@ class sve_int_perm_bin_perm_pp<bits<3> opc, bits<2> sz8_64, string asm,
let Inst{8-5} = Pn;
let Inst{4} = 0b0;
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_bin_perm_pp<bits<3> opc, string asm,
- SDPatternOperator op> {
- def _B : sve_int_perm_bin_perm_pp<opc, 0b00, asm, PPR8>;
- def _H : sve_int_perm_bin_perm_pp<opc, 0b01, asm, PPR16>;
- def _S : sve_int_perm_bin_perm_pp<opc, 0b10, asm, PPR32>;
- def _D : sve_int_perm_bin_perm_pp<opc, 0b11, asm, PPR64>;
+ SDPatternOperator ir_op,
+ SDPatternOperator op_b16,
+ SDPatternOperator op_b32,
+ SDPatternOperator op_b64> {
+ def _B : sve_int_perm_bin_perm_pp<opc, 0b00, asm, PPR8, ir_op>;
+ def _H : sve_int_perm_bin_perm_pp<opc, 0b01, asm, PPR16, op_b16>;
+ def _S : sve_int_perm_bin_perm_pp<opc, 0b10, asm, PPR32, op_b32>;
+ def _D : sve_int_perm_bin_perm_pp<opc, 0b11, asm, PPR64, op_b64>;
- def : SVE_2_Op_Pat<nxv16i1, op, nxv16i1, nxv16i1, !cast<Instruction>(NAME # _B)>;
- def : SVE_2_Op_Pat<nxv8i1, op, nxv8i1, nxv8i1, !cast<Instruction>(NAME # _H)>;
- def : SVE_2_Op_Pat<nxv4i1, op, nxv4i1, nxv4i1, !cast<Instruction>(NAME # _S)>;
- def : SVE_2_Op_Pat<nxv2i1, op, nxv2i1, nxv2i1, !cast<Instruction>(NAME # _D)>;
+ def : SVE_2_Op_Pat<nxv8i1, ir_op, nxv8i1, nxv8i1, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i1, ir_op, nxv4i1, nxv4i1, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i1, ir_op, nxv2i1, nxv2i1, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_punpk<bit opc, string asm>
@@ -6372,6 +6715,8 @@ class sve_int_perm_punpk<bit opc, string asm>
let Inst{8-5} = Pn;
let Inst{4} = 0b0;
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_punpk<bit opc, string asm, SDPatternOperator op> {
@@ -6398,6 +6743,7 @@ class sve_int_rdffr_pred<bit s, string asm>
let Defs = !if(s, [NZCV], []);
let Uses = [FFR];
+ let hasSideEffects = 1;
}
multiclass sve_int_rdffr_pred<bit s, string asm, SDPatternOperator op> {
@@ -6421,6 +6767,7 @@ class sve_int_rdffr_unpred<string asm> : I<
let Inst{3-0} = Pd;
let Uses = [FFR];
+ let hasSideEffects = 1;
}
multiclass sve_int_rdffr_unpred<string asm, SDPatternOperator op> {
@@ -6444,8 +6791,8 @@ class sve_int_wrffr<string asm, SDPatternOperator op>
let Inst{8-5} = Pn;
let Inst{4-0} = 0b00000;
- let hasSideEffects = 1;
let Defs = [FFR];
+ let hasSideEffects = 1;
}
class sve_int_setffr<string asm, SDPatternOperator op>
@@ -6455,8 +6802,8 @@ class sve_int_setffr<string asm, SDPatternOperator op>
[(op)]>, Sched<[]> {
let Inst{31-0} = 0b00100101001011001001000000000000;
- let hasSideEffects = 1;
let Defs = [FFR];
+ let hasSideEffects = 1;
}
//===----------------------------------------------------------------------===//
@@ -6482,6 +6829,7 @@ class sve_int_perm_clast_rz<bits<2> sz8_64, bit ab, string asm,
let Inst{4-0} = Rdn;
let Constraints = "$Rdn = $_Rdn";
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_clast_rz<bit ab, string asm, SDPatternOperator op> {
@@ -6515,6 +6863,7 @@ class sve_int_perm_clast_vz<bits<2> sz8_64, bit ab, string asm,
let Inst{4-0} = Vdn;
let Constraints = "$Vdn = $_Vdn";
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_clast_vz<bit ab, string asm, SDPatternOperator op> {
@@ -6551,6 +6900,7 @@ class sve_int_perm_clast_zz<bits<2> sz8_64, bit ab, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_clast_zz<bit ab, string asm, SDPatternOperator op> {
@@ -6588,6 +6938,8 @@ class sve_int_perm_last_r<bits<2> sz8_64, bit ab, string asm,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Rd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_last_r<bit ab, string asm, SDPatternOperator op> {
@@ -6619,6 +6971,8 @@ class sve_int_perm_last_v<bits<2> sz8_64, bit ab, string asm,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Vd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_last_v<bit ab, string asm, SDPatternOperator op> {
@@ -6653,6 +7007,7 @@ class sve_int_perm_splice<bits<2> sz8_64, string asm, ZPRRegOp zprty>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeNone;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_splice<string asm, SDPatternOperator op> {
@@ -6688,6 +7043,8 @@ class sve2_int_perm_splice_cons<bits<2> sz8_64, string asm,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_int_perm_splice_cons<string asm> {
@@ -6718,6 +7075,7 @@ class sve_int_perm_rev<bits<2> sz8_64, bits<2> opc, string asm,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_rev_rbit<string asm, SDPatternOperator op> {
@@ -6775,6 +7133,7 @@ class sve_int_perm_cpy_r<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_cpy_r<string asm, SDPatternOperator op> {
@@ -6821,6 +7180,7 @@ class sve_int_perm_cpy_v<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_cpy_v<string asm, SDPatternOperator op> {
@@ -6865,6 +7225,8 @@ class sve_int_perm_compact<bit sz, string asm, ZPRRegOp zprty>
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_compact<string asm, SDPatternOperator op> {
@@ -6900,9 +7262,10 @@ class sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
- let mayLoad = 1;
- let Uses = !if(nf, [FFR], []);
let Defs = !if(nf, [FFR], []);
+ let Uses = !if(nf, [FFR], []);
+ let hasSideEffects = nf;
+ let mayLoad = 1;
}
multiclass sve_mem_cld_si_base<bits<4> dtype, bit nf, string asm,
@@ -6946,6 +7309,7 @@ class sve_mem_cldnt_si_base<bits<2> msz, string asm, RegisterOperand VecList>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -6980,6 +7344,7 @@ class sve_mem_cldnt_ss_base<bits<2> msz, string asm, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7007,6 +7372,7 @@ class sve_mem_ldqr_si<bits<2> sz, string asm, RegisterOperand VecList>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7038,6 +7404,7 @@ class sve_mem_ldqr_ss<bits<2> sz, string asm, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7069,6 +7436,7 @@ class sve_mem_ld_dup<bits<2> dtypeh, bits<2> dtypel, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7103,9 +7471,10 @@ class sve_mem_cld_ss_base<bits<4> dtype, bit ff, dag iops, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
- let mayLoad = 1;
- let Uses = !if(ff, [FFR], []);
let Defs = !if(ff, [FFR], []);
+ let Uses = !if(ff, [FFR], []);
+ let hasSideEffects = ff;
+ let mayLoad = 1;
}
multiclass sve_mem_cld_ss<bits<4> dtype, string asm, RegisterOperand listty,
@@ -7163,6 +7532,7 @@ class sve_mem_eld_si<bits<2> sz, bits<3> nregs, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7196,6 +7566,7 @@ class sve_mem_eld_ss<bits<2> sz, bits<3> nregs, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7226,9 +7597,11 @@ class sve_mem_32b_gld_sv<bits<4> opc, bit xs, bit scaled, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
- let mayLoad = 1;
+
let Defs = !if(!eq(opc{0}, 1), [FFR], []);
let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+ let hasSideEffects = opc{0};
+ let mayLoad = 1;
}
multiclass sve_mem_32b_gld_sv_32_scaled<bits<4> opc, string asm,
@@ -7309,9 +7682,11 @@ class sve_mem_32b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
- let mayLoad = 1;
+
let Defs = !if(!eq(opc{0}, 1), [FFR], []);
let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+ let hasSideEffects = opc{0};
+ let mayLoad = 1;
}
multiclass sve_mem_32b_gld_vi_32_ptrs<bits<4> opc, string asm, Operand imm_ty,
@@ -7445,6 +7820,8 @@ class sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty>
let Inst{9-5} = Zn;
let Inst{4} = 0b0;
let Inst{3-0} = prfop;
+
+ let hasSideEffects = 1;
}
multiclass sve_mem_32b_prfm_vi<bits<2> msz, string asm, Operand imm_ty, SDPatternOperator op> {
@@ -7472,6 +7849,7 @@ class sve_mem_z_fill<string asm>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7498,6 +7876,7 @@ class sve_mem_p_fill<string asm>
let Inst{4} = 0b0;
let Inst{3-0} = Pt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7530,6 +7909,7 @@ class sve2_mem_gldnt_vs_base<bits<5> opc, dag iops, string asm,
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -7595,9 +7975,11 @@ class sve_mem_64b_gld_sv<bits<4> opc, bit xs, bit scaled, bit lsl, string asm,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
- let mayLoad = 1;
+
let Defs = !if(!eq(opc{0}, 1), [FFR], []);
let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+ let hasSideEffects = opc{0};
+ let mayLoad = 1;
}
multiclass sve_mem_64b_gld_sv_32_scaled<bits<4> opc, string asm,
@@ -7714,9 +8096,10 @@ class sve_mem_64b_gld_vi<bits<4> opc, string asm, Operand imm_ty>
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
- let mayLoad = 1;
let Defs = !if(!eq(opc{0}, 1), [FFR], []);
let Uses = !if(!eq(opc{0}, 1), [FFR], []);
+ let hasSideEffects = opc{0};
+ let mayLoad = 1;
}
multiclass sve_mem_64b_gld_vi_64_ptrs<bits<4> opc, string asm, Operand imm_ty,
@@ -7844,6 +8227,8 @@ class sve_int_bin_cons_misc_0_a<bits<2> opc, bits<2> msz, string asm,
let Inst{11-10} = msz;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_cons_misc_0_a_uxtw<bits<2> opc, string asm> {
@@ -7893,6 +8278,8 @@ class sve_int_bin_cons_misc_0_b<bits<2> sz, string asm, ZPRRegOp zprty>
let Inst{15-10} = 0b101100;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_bin_cons_misc_0_b<string asm, SDPatternOperator op> {
@@ -7956,6 +8343,8 @@ class sve_int_reduce<bits<2> sz8_32, bits<2> fmt, bits<3> opc, string asm,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Vd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_reduce_0_saddv<bits<3> opc, string asm,
@@ -8027,6 +8416,7 @@ class sve_int_movprfx_pred<bits<2> sz8_32, bits<3> opc, string asm,
let Inst{4-0} = Zd;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_movprfx_pred_merge<bits<3> opc, string asm> {
@@ -8079,6 +8469,7 @@ class sve_int_brkp<bits<2> opc, string asm>
let Inst{3-0} = Pd;
let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
+ let hasSideEffects = 0;
}
multiclass sve_int_brkp<bits<2> opc, string asm, SDPatternOperator op> {
@@ -8112,6 +8503,7 @@ class sve_int_brkn<bit S, string asm>
let Constraints = "$Pdm = $_Pdm";
let Defs = !if(S, [NZCV], []);
let ElementSize = ElementSizeB;
+ let hasSideEffects = 0;
}
multiclass sve_int_brkn<bits<1> opc, string asm, SDPatternOperator op> {
@@ -8139,7 +8531,7 @@ class sve_int_break<bits<3> opc, string asm, string suffix, dag iops>
let Constraints = !if(!eq (opc{0}, 1), "$Pd = $_Pd", "");
let Defs = !if(!eq (opc{1}, 1), [NZCV], []);
-
+ let hasSideEffects = 0;
}
multiclass sve_int_break_m<bits<3> opc, string asm, SDPatternOperator op> {
@@ -8180,6 +8572,7 @@ class sve2_char_match<bit sz, bit opc, string asm,
let Defs = [NZCV];
let ElementSize = pprty.ElementSize;
+ let hasSideEffects = 0;
let isPTestLike = 1;
}
@@ -8208,6 +8601,8 @@ class sve2_hist_gen_segment<string asm, SDPatternOperator op>
let Inst{15-10} = 0b101000;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
//===----------------------------------------------------------------------===//
@@ -8231,6 +8626,8 @@ class sve2_hist_gen_vector<bit sz, string asm, ZPRRegOp zprty>
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_hist_gen_vector<string asm, SDPatternOperator op> {
@@ -8259,6 +8656,8 @@ class sve2_crypto_cons_bin_op<bit opc, string asm, ZPRRegOp zprty>
let Inst{10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2_crypto_cons_bin_op<bit opc, string asm, ZPRRegOp zprty,
@@ -8282,6 +8681,7 @@ class sve2_crypto_des_bin_op<bits<2> opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let hasSideEffects = 0;
}
multiclass sve2_crypto_des_bin_op<bits<2> opc, string asm, ZPRRegOp zprty,
@@ -8302,6 +8702,7 @@ class sve2_crypto_unary_op<bit opc, string asm, ZPRRegOp zprty>
let Inst{4-0} = Zdn;
let Constraints = "$Zdn = $_Zdn";
+ let hasSideEffects = 0;
}
multiclass sve2_crypto_unary_op<bit opc, string asm, SDPatternOperator op> {
@@ -8329,6 +8730,8 @@ class sve_float_dot<bit bf, string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_float_dot<bit bf, string asm, ValueType InVT, SDPatternOperator op> {
@@ -8354,6 +8757,8 @@ class sve_float_dot_indexed<bit bf, string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_float_dot_indexed<bit bf, string asm, ValueType InVT, SDPatternOperator op> {
@@ -8376,6 +8781,8 @@ class sve_bfloat_matmul<string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ElementSizeH;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_bfloat_matmul<string asm, SDPatternOperator op> {
@@ -8398,8 +8805,9 @@ class sve_bfloat_convert<bit N, string asm>
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
- let hasSideEffects = 1;
let ElementSize = ElementSizeS;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op> {
@@ -8428,6 +8836,7 @@ class sve_int_matmul<bits<2> uns, string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ZPR32.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_matmul<bits<2> uns, string asm, SDPatternOperator op> {
@@ -8455,6 +8864,7 @@ class sve_int_dot_mixed<string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ZPR32.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_dot_mixed<string asm, SDPatternOperator op> {
@@ -8485,6 +8895,7 @@ class sve_int_dot_mixed_indexed<bit U, string asm>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = ZPR32.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve_int_dot_mixed_indexed<bit U, string asm, SDPatternOperator op> {
@@ -8514,6 +8925,8 @@ class sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
let ElementSize = zprty.ElementSize;
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve_fp_matrix_mla<bit sz, string asm, ZPRRegOp zprty, SDPatternOperator op, ValueType vt> {
@@ -8542,6 +8955,7 @@ class sve_mem_ldor_si<bits<2> sz, string asm, RegisterOperand VecList>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -8582,6 +8996,7 @@ class sve_mem_ldor_ss<bits<2> sz, string asm, RegisterOperand VecList,
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -8616,6 +9031,8 @@ class sve_int_perm_bin_perm_128_zz<bits<2> opc, bit P, string asm>
let Inst{10} = P;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve_int_perm_bin_perm_128_zz<bits<2> opc, bit P, string asm, SDPatternOperator op> {
@@ -8643,38 +9060,52 @@ def am_sve_regreg_lsl4 : ComplexPattern<iPTR, 2, "SelectSVERegRegAddrMode<4>", [
// Predicated pseudo floating point two operand instructions.
multiclass sve_fp_bin_pred_hfd<SDPatternOperator op> {
- def _UNDEF_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
- def _UNDEF_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
- def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
+ def _H_UNDEF : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _S_UNDEF : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
- def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
- def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
- def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
- def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
- def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
- def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _UNDEF_D)>;
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _D_UNDEF)>;
+}
+
+// Predicated pseudo floating point three operand instructions.
+multiclass sve_fp_3op_pred_hfd<SDPatternOperator op> {
+ def _H_UNDEF : PredThreeOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _S_UNDEF : PredThreeOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredThreeOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
+
+ def : SVE_4_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H_UNDEF)>;
+ def : SVE_4_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, nxv4f16, !cast<Instruction>(NAME # _H_UNDEF)>;
+ def : SVE_4_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, nxv2f16, !cast<Instruction>(NAME # _H_UNDEF)>;
+ def : SVE_4_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S_UNDEF)>;
+ def : SVE_4_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, nxv2f32, !cast<Instruction>(NAME # _S_UNDEF)>;
+ def : SVE_4_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D_UNDEF)>;
}
// Predicated pseudo integer two operand instructions.
multiclass sve_int_bin_pred_bhsd<SDPatternOperator op> {
- def _UNDEF_B : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
- def _UNDEF_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
- def _UNDEF_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
- def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
+ def _B_UNDEF : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
+ def _H_UNDEF : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _S_UNDEF : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
- def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
- def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
- def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
- def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
// As sve_int_bin_pred but when only i32 and i64 vector types are required.
multiclass sve_int_bin_pred_sd<SDPatternOperator op> {
- def _UNDEF_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
- def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
+ def _S_UNDEF : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
- def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
- def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
// Predicated pseudo integer two operand instructions. Second operand is an
@@ -8682,27 +9113,27 @@ multiclass sve_int_bin_pred_sd<SDPatternOperator op> {
multiclass sve_int_shift_pred_bhsd<SDPatternOperator op,
ComplexPattern imm_b, ComplexPattern imm_h,
ComplexPattern imm_s, ComplexPattern imm_d> {
- def _UNDEF_B : PredTwoOpImmPseudo<NAME # _B, ZPR8, Operand<i32>, FalseLanesUndef>;
- def _UNDEF_H : PredTwoOpImmPseudo<NAME # _H, ZPR16, Operand<i32>, FalseLanesUndef>;
- def _UNDEF_S : PredTwoOpImmPseudo<NAME # _S, ZPR32, Operand<i32>, FalseLanesUndef>;
- def _UNDEF_D : PredTwoOpImmPseudo<NAME # _D, ZPR64, Operand<i32>, FalseLanesUndef>;
+ def _B_UNDEF : PredTwoOpImmPseudo<NAME # _B, ZPR8, Operand<i32>, FalseLanesUndef>;
+ def _H_UNDEF : PredTwoOpImmPseudo<NAME # _H, ZPR16, Operand<i32>, FalseLanesUndef>;
+ def _S_UNDEF : PredTwoOpImmPseudo<NAME # _S, ZPR32, Operand<i32>, FalseLanesUndef>;
+ def _D_UNDEF : PredTwoOpImmPseudo<NAME # _D, ZPR64, Operand<i32>, FalseLanesUndef>;
- def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, imm_b, !cast<Instruction>(NAME # _UNDEF_B)>;
- def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1, i32, imm_h, !cast<Instruction>(NAME # _UNDEF_H)>;
- def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, imm_s, !cast<Instruction>(NAME # _UNDEF_S)>;
- def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, imm_d, !cast<Instruction>(NAME # _UNDEF_D)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv16i8, op, nxv16i1, i32, imm_b, !cast<Instruction>(NAME # _B_UNDEF)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv8i16, op, nxv8i1, i32, imm_h, !cast<Instruction>(NAME # _H_UNDEF)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, imm_s, !cast<Instruction>(NAME # _S_UNDEF)>;
+ def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, imm_d, !cast<Instruction>(NAME # _D_UNDEF)>;
}
multiclass sve_int_bin_pred_all_active_bhsd<SDPatternOperator op> {
- def _UNDEF_B : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
- def _UNDEF_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
- def _UNDEF_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
- def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
+ def _B_UNDEF : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
+ def _H_UNDEF : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _S_UNDEF : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _D_UNDEF : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
- def : SVE_2_Op_Pred_All_Active_Pt<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
- def : SVE_2_Op_Pred_All_Active_Pt<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
- def : SVE_2_Op_Pred_All_Active_Pt<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
- def : SVE_2_Op_Pred_All_Active_Pt<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
+ def : SVE_2_Op_Pred_All_Active_Pt<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
+ def : SVE_2_Op_Pred_All_Active_Pt<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ def : SVE_2_Op_Pred_All_Active_Pt<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ def : SVE_2_Op_Pred_All_Active_Pt<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
//===----------------------------------------------------------------------===//
@@ -8727,6 +9158,7 @@ class sve2p1_fclamp<string asm, bits<2> sz, ZPRRegOp zpr_ty>
let Constraints = "$Zd = $_Zd";
let DestructiveInstType = DestructiveOther;
let ElementSize = zpr_ty.ElementSize;
+ let hasSideEffects = 0;
}
multiclass sve2p1_fclamp<string asm, SDPatternOperator op> {
@@ -8756,8 +9188,14 @@ class sve2p1_two_way_dot_vv<string mnemonic, bit u>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
}
+multiclass sve2p1_two_way_dot_vv<string mnemonic, bit u, SDPatternOperator intrinsic> {
+ def NAME : sve2p1_two_way_dot_vv<mnemonic, u>;
+
+ def : SVE_3_Op_Pat<nxv4i32, intrinsic, nxv4i32, nxv8i16, nxv8i16, !cast<Instruction>(NAME)>;
+}
// SVE two-way dot product (indexed)
class sve2p1_two_way_dot_vvi<string mnemonic, bit u>
@@ -8778,25 +9216,33 @@ class sve2p1_two_way_dot_vvi<string mnemonic, bit u>
let Constraints = "$Zda = $_Zda";
let DestructiveInstType = DestructiveOther;
+ let hasSideEffects = 0;
}
+multiclass sve2p1_two_way_dot_vvi<string mnemonic, bit u, SDPatternOperator intrinsic> {
+ def NAME : sve2p1_two_way_dot_vvi<mnemonic, u>;
+
+ def : SVE_4_Op_Imm_Pat<nxv4i32, intrinsic, nxv4i32, nxv8i16, nxv8i16, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
+}
-class sve2p1_ptrue_pn<string mnemonic, bits<2> sz, PNRP8to15RegOp pnrty>
+class sve2p1_ptrue_pn<string mnemonic, bits<2> sz, PNRP8to15RegOp pnrty, SDPatternOperator op>
: I<(outs pnrty:$PNd), (ins ), mnemonic, "\t$PNd",
- "", []>, Sched<[]> {
+ "", [(set pnrty:$PNd, (op))]>, Sched<[]> {
bits<3> PNd;
let Inst{31-24} = 0b00100101;
let Inst{23-22} = sz;
let Inst{21-3} = 0b1000000111100000010;
let Inst{2-0} = PNd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_ptrue_pn<string mnemonic> {
- def _B : sve2p1_ptrue_pn<mnemonic, 0b00, PNR8_p8to15>;
- def _H : sve2p1_ptrue_pn<mnemonic, 0b01, PNR16_p8to15>;
- def _S : sve2p1_ptrue_pn<mnemonic, 0b10, PNR32_p8to15>;
- def _D : sve2p1_ptrue_pn<mnemonic, 0b11, PNR64_p8to15>;
+ def _B : sve2p1_ptrue_pn<mnemonic, 0b00, PNR8_p8to15, int_aarch64_sve_ptrue_c8>;
+ def _H : sve2p1_ptrue_pn<mnemonic, 0b01, PNR16_p8to15, int_aarch64_sve_ptrue_c16>;
+ def _S : sve2p1_ptrue_pn<mnemonic, 0b10, PNR32_p8to15, int_aarch64_sve_ptrue_c32>;
+ def _D : sve2p1_ptrue_pn<mnemonic, 0b11, PNR64_p8to15, int_aarch64_sve_ptrue_c64>;
}
@@ -8816,19 +9262,26 @@ class sve2p1_pred_as_ctr_to_mask_base<string mnemonic, bits<2> sz, bits<3> opc,
let Inst{7-5} = PNn;
let Inst{4} = 0b1;
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
}
class sve2p1_pred_as_ctr_to_mask<string mnemonic, bits<2> sz, PPRRegOp pprty>
- : sve2p1_pred_as_ctr_to_mask_base<mnemonic, sz, {0, ?, ?}, pprty, VectorIndexS> {
+ : sve2p1_pred_as_ctr_to_mask_base<mnemonic, sz, {0, ?, ?}, pprty, VectorIndexS32b_timm> {
bits<2> index;
let Inst{9-8} = index;
}
-multiclass sve2p1_pred_as_ctr_to_mask<string mnemonic> {
+multiclass sve2p1_pred_as_ctr_to_mask<string mnemonic, SDPatternOperator op> {
def _B : sve2p1_pred_as_ctr_to_mask<mnemonic, 0b00, PPR8>;
def _H : sve2p1_pred_as_ctr_to_mask<mnemonic, 0b01, PPR16>;
def _S : sve2p1_pred_as_ctr_to_mask<mnemonic, 0b10, PPR32>;
def _D : sve2p1_pred_as_ctr_to_mask<mnemonic, 0b11, PPR64>;
+
+ def : SVE_2_Op_Imm_Pat<nxv16i1, op, aarch64svcount, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Imm_Pat<nxv8i1, op, aarch64svcount, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Imm_Pat<nxv4i1, op, aarch64svcount, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Imm_Pat<nxv2i1, op, aarch64svcount, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME # _D)>;
}
@@ -8863,6 +9316,8 @@ class sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, bits<3> tsz>
let Inst{9-6} = Zn;
let Inst{5} = 0b0;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatternOperator intrinsic> {
@@ -8872,7 +9327,7 @@ multiclass sve2p1_multi_vec_extract_narrow<string mnemonic, bits<2> opc, SDPatte
// SVE2 multi-vec shift narrow
class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
- : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, vecshiftR16:$imm4),
+ : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4),
mnemonic, "\t$Zd, $Zn, $imm4",
"", []>, Sched<[]> {
bits<5> Zd;
@@ -8889,10 +9344,14 @@ class sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, bits<2> tsz>
let Inst{9-6} = Zn;
let Inst{5} = 0b0;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
-multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc> {
- def : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>;
+multiclass sve2p1_multi_vec_shift_narrow<string mnemonic, bits<3> opc, SDPatternOperator intrinsic> {
+ def NAME : sve2p1_multi_vec_shift_narrow<mnemonic, opc, 0b01>;
+
+ def : SVE2p1_Sat_Shift_VG2_Pat<NAME, intrinsic, nxv8i16, nxv4i32, tvecshiftR16>;
}
@@ -8916,6 +9375,7 @@ class sve2p1_mem_cld_ss_2z<string mnemonic, bits<2> msz, bit n,
let Inst{4-1} = Zt;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -8939,6 +9399,7 @@ class sve2p1_mem_cld_si_2z<string mnemonic, bits<2> msz, bit n,
let Inst{4-1} = Zt;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -8971,6 +9432,7 @@ class sve2p1_mem_cld_ss_4z<string mnemonic, bits<2> msz, bit n,
let Inst{1} = 0b0;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -8995,6 +9457,7 @@ class sve2p1_mem_cld_si_4z<string mnemonic, bits<2> msz, bit n,
let Inst{1} = 0b0;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -9027,6 +9490,7 @@ class sve2p1_mem_cst_ss_2z<string mnemonic, bits<2> msz, bit n,
let Inst{4-1} = Zt;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -9051,6 +9515,7 @@ class sve2p1_mem_cst_si_2z<string mnemonic, bits<2> msz, bit n,
let Inst{4-1} = Zt;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -9110,6 +9575,7 @@ class sve2p1_mem_cst_si_4z<string mnemonic, bits<2> msz, bit n,
let Inst{1} = 0b0;
let Inst{0} = n;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -9140,6 +9606,8 @@ class sve2p1_pcount_pn<string mnemonic, bits<3> opc, bits<2> sz, PNRRegOp pnrty>
let Inst{9} = 0b1;
let Inst{8-5} = PNn;
let Inst{4-0} = Rd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_pcount_pn<string mnemonic, bits<3> opc> {
@@ -9147,6 +9615,11 @@ multiclass sve2p1_pcount_pn<string mnemonic, bits<3> opc> {
def _H : sve2p1_pcount_pn<mnemonic, opc, 0b01, PNR16>;
def _S : sve2p1_pcount_pn<mnemonic, opc, 0b10, PNR32>;
def _D : sve2p1_pcount_pn<mnemonic, opc, 0b11, PNR64>;
+
+ defm : SVE2p1_Cntp_Pat<i64, int_aarch64_sve_cntp_c8, aarch64svcount, !cast<Instruction>(NAME # _B)>;
+ defm : SVE2p1_Cntp_Pat<i64, int_aarch64_sve_cntp_c16, aarch64svcount, !cast<Instruction>(NAME # _H)>;
+ defm : SVE2p1_Cntp_Pat<i64, int_aarch64_sve_cntp_c32, aarch64svcount, !cast<Instruction>(NAME # _S)>;
+ defm : SVE2p1_Cntp_Pat<i64, int_aarch64_sve_cntp_c64, aarch64svcount, !cast<Instruction>(NAME # _D)>;
}
@@ -9174,6 +9647,7 @@ class sve2p1_int_while_rr_pn<string mnemonic, bits<2> sz, bits<3> opc,
let Inst{2-0} = PNd;
let Defs = [NZCV];
+ let hasSideEffects = 0;
}
@@ -9182,6 +9656,15 @@ multiclass sve2p1_int_while_rr_pn<string mnemonic, bits<3> opc> {
def _H : sve2p1_int_while_rr_pn<mnemonic, 0b01, opc, PNR16_p8to15>;
def _S : sve2p1_int_while_rr_pn<mnemonic, 0b10, opc, PNR32_p8to15>;
def _D : sve2p1_int_while_rr_pn<mnemonic, 0b11, opc, PNR64_p8to15>;
+
+ defm : SVE2p1_While_PN_Pat<aarch64svcount, !cast<SDPatternOperator>("int_aarch64_sve_" # mnemonic # "_c8"),
+ i64, !cast<Instruction>(NAME # _B)>;
+ defm : SVE2p1_While_PN_Pat<aarch64svcount, !cast<SDPatternOperator>("int_aarch64_sve_" # mnemonic # "_c16"),
+ i64, !cast<Instruction>(NAME # _H)>;
+ defm : SVE2p1_While_PN_Pat<aarch64svcount, !cast<SDPatternOperator>("int_aarch64_sve_" # mnemonic # "_c32"),
+ i64, !cast<Instruction>(NAME # _S)>;
+ defm : SVE2p1_While_PN_Pat<aarch64svcount, !cast<SDPatternOperator>("int_aarch64_sve_" # mnemonic # "_c64"),
+ i64, !cast<Instruction>(NAME # _D)>;
}
@@ -9206,6 +9689,7 @@ class sve2p1_int_while_rr_pair<string mnemonic, bits<2> sz, bits<3> opc,
let Inst{0} = opc{0};
let Defs = [NZCV];
+ let hasSideEffects = 0;
}
@@ -9232,6 +9716,7 @@ class sve_mem_128b_gld_64_unscaled<string mnemonic>
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -9258,6 +9743,7 @@ class sve_mem_sst_128b_64_unscaled<string mnemonic>
let Inst{9-5} = Zn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayStore = 1;
}
@@ -9288,6 +9774,7 @@ class sve_mem_128b_cld_si<bits<2> dtype, string mnemonic>
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -9321,6 +9808,7 @@ class sve_mem_128b_cld_ss<bits<2> dtype, string mnemonic, RegisterOperand gprsh_
let Inst{9-5} = Rn;
let Inst{4-0} = Zt;
+ let hasSideEffects = 0;
let mayLoad = 1;
}
@@ -9349,6 +9837,9 @@ class sve2p1_fp_reduction_q<bits<2> sz, bits<3> opc, string mnemonic,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Vd;
+
+ let hasSideEffects = 0;
+ let mayRaiseFPException = 1;
}
multiclass sve2p1_fp_reduction_q<bits<3> opc, string mnemonic> {
@@ -9370,6 +9861,8 @@ class sve2p1_dupq<bits<5> ind_tsz, string mnemonic, ZPRRegOp zprty, Operand ityp
let Inst{15-10} = 0b001001;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_dupq<string mnemonic> {
@@ -9409,6 +9902,7 @@ class sve2p1_extq<string mnemonic>
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
let ElementSize = ZPR8.ElementSize;
+ let hasSideEffects = 0;
}
@@ -9428,6 +9922,8 @@ class sve2p1_vector_to_pred<bits<4> opc, string mnemonic,
let Inst{9-5} = Zn;
let Inst{4} = 0b0;
let Inst{3-0} = Pd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_vector_to_pred<string mnemonic> {
@@ -9468,6 +9964,7 @@ class sve2p1_pred_to_vector<bits<4> opc, string mnemonic,
let Inst{4-0} = Zd;
let Constraints = "$Zd = $_Zd";
+ let hasSideEffects = 0;
}
multiclass sve2p1_pred_to_vector<string mnemonic> {
@@ -9510,6 +10007,8 @@ class sve2p1_int_reduce_q<bits<2> sz, bits<4> opc, string mnemonic,
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Vd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_int_reduce_q<bits<4> opc, string mnemonic> {
@@ -9537,6 +10036,8 @@ class sve2p1_permute_vec_elems_q<bits<2> sz, bits<3> opc, string mnemonic,
let Inst{12-10} = opc;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
+
+ let hasSideEffects = 0;
}
multiclass sve2p1_permute_vec_elems_q<bits<3> opc, string mnemonic> {
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index 313ba3df4930..a43b1cf0dd0e 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -19,10 +19,11 @@
// FIXME: Is it easiest to fix this layering violation by moving the .inc
// #includes from AArch64MCTargetDesc.h to here?
#include "MCTargetDesc/AArch64MCTargetDesc.h" // For AArch64::X0 and friends.
+#include "llvm/ADT/BitmaskEnum.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
namespace llvm {
@@ -332,40 +333,6 @@ inline static unsigned getNZCVToSatisfyCondCode(CondCode Code) {
}
}
-/// Return true if Code is a reflexive relationship:
-/// forall x. (CSET Code (CMP x x)) == 1
-inline static bool isReflexive(CondCode Code) {
- switch (Code) {
- case EQ:
- case HS:
- case PL:
- case LS:
- case GE:
- case LE:
- case AL:
- case NV:
- return true;
- default:
- return false;
- }
-}
-
-/// Return true if Code is an irreflexive relationship:
-/// forall x. (CSET Code (CMP x x)) == 0
-inline static bool isIrreflexive(CondCode Code) {
- switch (Code) {
- case NE:
- case LO:
- case MI:
- case HI:
- case LT:
- case GT:
- return true;
- default:
- return false;
- }
-}
-
} // end namespace AArch64CC
struct SysAlias {
@@ -563,6 +530,27 @@ getSVEPredPatternFromNumElements(unsigned MinNumElts) {
}
}
+/// An enum to describe what types of loops we should attempt to tail-fold:
+/// Disabled: None
+/// Reductions: Loops containing reductions
+/// Recurrences: Loops with first-order recurrences, i.e. that would
+/// require a SVE splice instruction
+/// Reverse: Reverse loops
+/// Simple: Loops that are not reversed and don't contain reductions
+/// or first-order recurrences.
+/// All: All
+enum class TailFoldingOpts : uint8_t {
+ Disabled = 0x00,
+ Simple = 0x01,
+ Reductions = 0x02,
+ Recurrences = 0x04,
+ Reverse = 0x08,
+ All = Reductions | Recurrences | Simple | Reverse
+};
+
+LLVM_DECLARE_ENUM_AS_BITMASK(TailFoldingOpts,
+ /* LargestValue */ (long)TailFoldingOpts::Reverse);
+
namespace AArch64ExactFPImm {
struct ExactFPImm {
const char *Name;
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index 2cdfff90b397..1146fd4e3fa8 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -6,10 +6,11 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/Function.h"
-
#ifndef LLVM_LIB_TARGET_AARCH64_UTILS_AARCH64SMEATTRIBUTES_H
#define LLVM_LIB_TARGET_AARCH64_UTILS_AARCH64SMEATTRIBUTES_H
+
+#include "llvm/IR/Function.h"
+
namespace llvm {
class Function;
@@ -30,7 +31,7 @@ public:
Normal = 0,
SM_Enabled = 1 << 0, // aarch64_pstate_sm_enabled
SM_Compatible = 1 << 1, // aarch64_pstate_sm_compatible
- SM_Body = 1 << 2, // aarch64_pstate_sm_locally
+ SM_Body = 1 << 2, // aarch64_pstate_sm_body
ZA_Shared = 1 << 3, // aarch64_pstate_sm_shared
ZA_New = 1 << 4, // aarch64_pstate_sm_new
ZA_Preserved = 1 << 5, // aarch64_pstate_sm_preserved
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index eaf72686c166..b82db82de84e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -26,6 +26,8 @@ FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
void initializeAMDGPURegBankCombinerPass(PassRegistry &);
+void initializeAMDGPURegBankSelectPass(PassRegistry &);
+
// SI Passes
FunctionPass *createGCNDPPCombinePass();
FunctionPass *createSIAnnotateControlFlowPass();
@@ -39,6 +41,7 @@ FunctionPass *createSIFixControlFlowLiveIntervalsPass();
FunctionPass *createSIOptimizeExecMaskingPreRAPass();
FunctionPass *createSIOptimizeVGPRLiveRangePass();
FunctionPass *createSIFixSGPRCopiesPass();
+FunctionPass *createLowerWWMCopiesPass();
FunctionPass *createSIMemoryLegalizerPass();
FunctionPass *createSIInsertWaitcntsPass();
FunctionPass *createSIPreAllocateWWMRegsPass();
@@ -47,13 +50,11 @@ FunctionPass *createSIFormMemoryClausesPass();
FunctionPass *createSIPostRABundlerPass();
FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *);
FunctionPass *createAMDGPUUseNativeCallsPass();
+ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
FunctionPass *createAMDGPUCodeGenPreparePass();
FunctionPass *createAMDGPULateCodeGenPreparePass();
FunctionPass *createAMDGPUMachineCFGStructurizerPass();
-FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *);
-ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *);
FunctionPass *createAMDGPURewriteOutArgumentsPass();
-ModulePass *createAMDGPUReplaceLDSUseWithPointerPass();
ModulePass *createAMDGPULowerModuleLDSPass();
FunctionPass *createSIModeRegisterPass();
FunctionPass *createGCNPreRAOptimizationsPass();
@@ -83,14 +84,13 @@ void initializeAMDGPUAttributorPass(PassRegistry &);
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
-FunctionPass *createAMDGPUAtomicOptimizerPass();
+// DPP/Iterative option enables the atomic optimizer with given strategy
+// whereas None disables the atomic optimizer.
+enum class ScanOptions { DPP, Iterative, None };
+FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy);
void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
extern char &AMDGPUAtomicOptimizerID;
-ModulePass *createAMDGPULowerIntrinsicsPass();
-void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
-extern char &AMDGPULowerIntrinsicsID;
-
ModulePass *createAMDGPUCtorDtorLoweringLegacyPass();
void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &);
extern char &AMDGPUCtorDtorLoweringLegacyPassID;
@@ -117,38 +117,6 @@ struct AMDGPULowerKernelAttributesPass
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
-void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &);
-extern char &AMDGPUPropagateAttributesEarlyID;
-
-struct AMDGPUPropagateAttributesEarlyPass
- : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> {
- AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {}
- PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
-
-private:
- TargetMachine &TM;
-};
-
-void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &);
-extern char &AMDGPUPropagateAttributesLateID;
-
-struct AMDGPUPropagateAttributesLatePass
- : PassInfoMixin<AMDGPUPropagateAttributesLatePass> {
- AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {}
- PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
-
-private:
- TargetMachine &TM;
-};
-
-void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &);
-extern char &AMDGPUReplaceLDSUseWithPointerID;
-
-struct AMDGPUReplaceLDSUseWithPointerPass
- : PassInfoMixin<AMDGPUReplaceLDSUseWithPointerPass> {
- PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
-};
-
void initializeAMDGPULowerModuleLDSPass(PassRegistry &);
extern char &AMDGPULowerModuleLDSID;
@@ -177,6 +145,9 @@ extern char &SIFixSGPRCopiesID;
void initializeSIFixVGPRCopiesPass(PassRegistry &);
extern char &SIFixVGPRCopiesID;
+void initializeSILowerWWMCopiesPass(PassRegistry &);
+extern char &SILowerWWMCopiesID;
+
void initializeSILowerI1CopiesPass(PassRegistry &);
extern char &SILowerI1CopiesID;
@@ -239,6 +210,16 @@ private:
TargetMachine &TM;
};
+struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
+ AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl)
+ : TM(TM), ScanImpl(ScanImpl) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+private:
+ TargetMachine &TM;
+ ScanOptions ScanImpl;
+};
+
Pass *createAMDGPUStructurizeCFGPass();
FunctionPass *createAMDGPUISelDag(TargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -252,6 +233,16 @@ private:
bool GlobalOpt;
};
+class AMDGPUCodeGenPreparePass
+ : public PassInfoMixin<AMDGPUCodeGenPreparePass> {
+private:
+ TargetMachine &TM;
+
+public:
+ AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){};
+ PreservedAnalyses run(Function &, FunctionAnalysisManager &);
+};
+
FunctionPass *createAMDGPUAnnotateUniformValues();
ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -286,6 +277,9 @@ extern char &AMDGPUAnnotateUniformValuesPassID;
void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
extern char &AMDGPUCodeGenPrepareID;
+void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
+extern char &AMDGPURemoveIncompatibleFunctionsID;
+
void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
extern char &AMDGPULateCodeGenPrepareID;
@@ -302,9 +296,6 @@ extern char &SIMemoryLegalizerID;
void initializeSIModeRegisterPass(PassRegistry&);
extern char &SIModeRegisterID;
-void initializeAMDGPUReleaseVGPRsPass(PassRegistry &);
-extern char &AMDGPUReleaseVGPRsID;
-
void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
extern char &AMDGPUInsertDelayAluID;
@@ -340,12 +331,18 @@ extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
void initializeGCNNSAReassignPass(PassRegistry &);
extern char &GCNNSAReassignID;
+void initializeGCNPreRALongBranchRegPass(PassRegistry &);
+extern char &GCNPreRALongBranchRegID;
+
void initializeGCNPreRAOptimizationsPass(PassRegistry &);
extern char &GCNPreRAOptimizationsID;
FunctionPass *createAMDGPUSetWavePriorityPass();
void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
+void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
+extern char &GCNRewritePartialRegUsesID;
+
namespace AMDGPU {
enum TargetIndex {
TI_CONSTDATA_START,
@@ -363,53 +360,60 @@ enum TargetIndex {
/// a separate piece of memory that is unique from other
/// memory locations.
namespace AMDGPUAS {
- enum : unsigned {
- // The maximum value for flat, generic, local, private, constant and region.
- MAX_AMDGPU_ADDRESS = 7,
-
- FLAT_ADDRESS = 0, ///< Address space for flat memory.
- GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
- REGION_ADDRESS = 2, ///< Address space for region memory. (GDS)
-
- CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2).
- LOCAL_ADDRESS = 3, ///< Address space for local memory.
- PRIVATE_ADDRESS = 5, ///< Address space for private memory.
-
- CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory.
-
- BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.
-
- /// Address space for direct addressable parameter memory (CONST0).
- PARAM_D_ADDRESS = 6,
- /// Address space for indirect addressable parameter memory (VTX1).
- PARAM_I_ADDRESS = 7,
-
- // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on
- // this order to be able to dynamically index a constant buffer, for
- // example:
- //
- // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
-
- CONSTANT_BUFFER_0 = 8,
- CONSTANT_BUFFER_1 = 9,
- CONSTANT_BUFFER_2 = 10,
- CONSTANT_BUFFER_3 = 11,
- CONSTANT_BUFFER_4 = 12,
- CONSTANT_BUFFER_5 = 13,
- CONSTANT_BUFFER_6 = 14,
- CONSTANT_BUFFER_7 = 15,
- CONSTANT_BUFFER_8 = 16,
- CONSTANT_BUFFER_9 = 17,
- CONSTANT_BUFFER_10 = 18,
- CONSTANT_BUFFER_11 = 19,
- CONSTANT_BUFFER_12 = 20,
- CONSTANT_BUFFER_13 = 21,
- CONSTANT_BUFFER_14 = 22,
- CONSTANT_BUFFER_15 = 23,
-
- // Some places use this if the address space can't be determined.
- UNKNOWN_ADDRESS_SPACE = ~0u,
- };
+enum : unsigned {
+ // The maximum value for flat, generic, local, private, constant and region.
+ MAX_AMDGPU_ADDRESS = 8,
+
+ FLAT_ADDRESS = 0, ///< Address space for flat memory.
+ GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
+ REGION_ADDRESS = 2, ///< Address space for region memory. (GDS)
+
+ CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2).
+ LOCAL_ADDRESS = 3, ///< Address space for local memory.
+ PRIVATE_ADDRESS = 5, ///< Address space for private memory.
+
+ CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory.
+
+ BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.
+ ///< Not used in backend.
+
+ BUFFER_RESOURCE = 8, ///< Address space for 128-bit buffer resources.
+
+ /// Internal address spaces. Can be freely renumbered.
+ STREAMOUT_REGISTER = 128, ///< Address space for GS NGG Streamout registers.
+ /// end Internal address spaces.
+
+ /// Address space for direct addressable parameter memory (CONST0).
+ PARAM_D_ADDRESS = 6,
+ /// Address space for indirect addressable parameter memory (VTX1).
+ PARAM_I_ADDRESS = 7,
+
+ // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on
+ // this order to be able to dynamically index a constant buffer, for
+ // example:
+ //
+ // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
+
+ CONSTANT_BUFFER_0 = 8,
+ CONSTANT_BUFFER_1 = 9,
+ CONSTANT_BUFFER_2 = 10,
+ CONSTANT_BUFFER_3 = 11,
+ CONSTANT_BUFFER_4 = 12,
+ CONSTANT_BUFFER_5 = 13,
+ CONSTANT_BUFFER_6 = 14,
+ CONSTANT_BUFFER_7 = 15,
+ CONSTANT_BUFFER_8 = 16,
+ CONSTANT_BUFFER_9 = 17,
+ CONSTANT_BUFFER_10 = 18,
+ CONSTANT_BUFFER_11 = 19,
+ CONSTANT_BUFFER_12 = 20,
+ CONSTANT_BUFFER_13 = 21,
+ CONSTANT_BUFFER_14 = 22,
+ CONSTANT_BUFFER_15 = 23,
+
+ // Some places use this if the address space can't be determined.
+ UNKNOWN_ADDRESS_SPACE = ~0u,
+};
}
namespace AMDGPU {
@@ -421,6 +425,38 @@ inline bool isFlatGlobalAddrSpace(unsigned AS) {
AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
}
+
+inline bool isExtendedGlobalAddrSpace(unsigned AS) {
+ return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS ||
+ AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
+ AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
+}
+
+static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
+ static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 8, "Addr space out of range");
+
+ if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
+ return true;
+
+ // This array is indexed by address space value enum elements 0 ... to 8
+ // clang-format off
+ static const bool ASAliasRules[9][9] = {
+ /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc */
+ /* Flat */ {true, true, false, true, true, true, true, true, true},
+ /* Global */ {true, true, false, false, true, false, true, true, true},
+ /* Region */ {false, false, true, false, false, false, false, false, false},
+ /* Group */ {true, false, false, true, false, false, false, false, false},
+ /* Constant */ {true, true, false, false, false, false, true, true, true},
+ /* Private */ {true, false, false, false, false, true, false, false, false},
+ /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true},
+ /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true},
+ /* Buffer Resource */ {true, true, false, false, true, false, true, true, true},
+ };
+ // clang-format on
+
+ return ASAliasRules[AS1][AS2];
+}
+
}
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index ddc32988881a..b178623a319d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -18,10 +18,6 @@ def p4 : PtrValueType<i64, 4>;
def p5 : PtrValueType<i32, 5>;
def p6 : PtrValueType<i32, 6>;
-class BoolToList<bit Value> {
- list<int> ret = !if(Value, [1]<int>, []<int>);
-}
-
//===------------------------------------------------------------===//
// Subtarget Features (device properties)
//===------------------------------------------------------------===//
@@ -494,6 +490,12 @@ def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
"Support NSA encoding for image instructions"
>;
+def FeaturePartialNSAEncoding : SubtargetFeature<"partial-nsa-encoding",
+ "HasPartialNSAEncoding",
+ "true",
+ "Support partial NSA encoding for image instructions"
+>;
+
def FeatureImageInsts : SubtargetFeature<"image-insts",
"HasImageInsts",
"true",
@@ -581,7 +583,7 @@ def FeatureDot6Insts : SubtargetFeature<"dot6-insts",
def FeatureDot7Insts : SubtargetFeature<"dot7-insts",
"HasDot7Insts",
"true",
- "Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions"
+ "Has v_dot4_u32_u8, v_dot8_u32_u4 instructions"
>;
def FeatureDot8Insts : SubtargetFeature<"dot8-insts",
@@ -596,6 +598,12 @@ def FeatureDot9Insts : SubtargetFeature<"dot9-insts",
"Has v_dot2_f16_f16, v_dot2_bf16_bf16, v_dot2_f32_bf16 instructions"
>;
+def FeatureDot10Insts : SubtargetFeature<"dot10-insts",
+ "HasDot10Insts",
+ "true",
+ "Has v_dot2_f32_f16 instruction"
+>;
+
def FeatureMAIInsts : SubtargetFeature<"mai-insts",
"HasMAIInsts",
"true",
@@ -614,6 +622,19 @@ def FeaturePkFmacF16Inst : SubtargetFeature<"pk-fmac-f16-inst",
"Has v_pk_fmac_f16 instruction"
>;
+def FeatureAtomicDsPkAdd16Insts : SubtargetFeature<"atomic-ds-pk-add-16-insts",
+ "HasAtomicDsPkAdd16Insts",
+ "true",
+ "Has ds_pk_add_bf16, ds_pk_add_f16, ds_pk_add_rtn_bf16, "
+ "ds_pk_add_rtn_f16 instructions"
+>;
+
+def FeatureAtomicFlatPkAdd16Insts : SubtargetFeature<"atomic-flat-pk-add-16-insts",
+ "HasAtomicFlatPkAdd16Insts",
+ "true",
+ "Has flat_atomic_pk_add_f16 and flat_atomic_pk_add_bf16 instructions"
+>;
+
def FeatureAtomicFaddRtnInsts : SubtargetFeature<"atomic-fadd-rtn-insts",
"HasAtomicFaddRtnInsts",
"true",
@@ -630,15 +651,30 @@ def FeatureAtomicFaddNoRtnInsts : SubtargetFeature<"atomic-fadd-no-rtn-insts",
[FeatureFlatGlobalInsts]
>;
-def FeatureAtomicPkFaddNoRtnInsts
- : SubtargetFeature<"atomic-pk-fadd-no-rtn-insts",
- "HasAtomicPkFaddNoRtnInsts",
+def FeatureAtomicBufferGlobalPkAddF16NoRtnInsts
+ : SubtargetFeature<"atomic-buffer-global-pk-add-f16-no-rtn-insts",
+ "HasAtomicBufferGlobalPkAddF16NoRtnInsts",
"true",
"Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that "
"don't return original value",
[FeatureFlatGlobalInsts]
>;
+def FeatureAtomicBufferGlobalPkAddF16Insts : SubtargetFeature<"atomic-buffer-global-pk-add-f16-insts",
+ "HasAtomicBufferGlobalPkAddF16Insts",
+ "true",
+ "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that "
+ "can return original value",
+ [FeatureFlatGlobalInsts]
+>;
+
+def FeatureAtomicGlobalPkAddBF16Inst : SubtargetFeature<"atomic-global-pk-add-bf16-inst",
+ "HasAtomicGlobalPkAddBF16Inst",
+ "true",
+ "Has global_atomic_pk_add_bf16 instruction",
+ [FeatureFlatGlobalInsts]
+>;
+
def FeatureFlatAtomicFaddF32Inst
: SubtargetFeature<"flat-atomic-fadd-f32-inst",
"HasFlatAtomicFaddF32Inst",
@@ -718,15 +754,6 @@ def FeatureGFX11FullVGPRs : SubtargetFeature<"gfx11-full-vgprs",
"GFX11 with 50% more physical VGPRs and 50% larger allocation granule than GFX10"
>;
-class SubtargetFeatureNSAMaxSize <int Value> : SubtargetFeature <
- "nsa-max-size-"#Value,
- "NSAMaxSize",
- !cast<string>(Value),
- "The maximum non-sequential address size in VGPRs."
->;
-
-def FeatureNSAMaxSize5 : SubtargetFeatureNSAMaxSize<5>;
-def FeatureNSAMaxSize13 : SubtargetFeatureNSAMaxSize<13>;
def FeatureVOPD : SubtargetFeature<"vopd",
"HasVOPDInsts",
@@ -740,6 +767,12 @@ def FeatureVALUTransUseHazard : SubtargetFeature<"valu-trans-use-hazard",
"Hazard when TRANS instructions are closely followed by a use of the result"
>;
+def FeatureForceStoreSC0SC1 : SubtargetFeature<"force-store-sc0-sc1",
+ "HasForceStoreSC0SC1",
+ "true",
+ "Has SC0 and SC1 on stores"
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -860,12 +893,20 @@ def FeatureArchitectedFlatScratch : SubtargetFeature<"architected-flat-scratch",
"Flat Scratch register is a readonly SPI initialized architected register"
>;
+def FeatureArchitectedSGPRs : SubtargetFeature<"architected-sgprs",
+ "HasArchitectedSGPRs",
+ "true",
+ "Enable the architected SGPRs"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
"Dummy feature to disable assembler instructions"
>;
+//===----------------------------------------------------------------------===//
+
class GCNSubtargetFeatureGeneration <string Value,
string FeatureName,
list<SubtargetFeature> Implies> :
@@ -962,6 +1003,8 @@ def FeatureGFX11 : GCNSubtargetFeatureGeneration<"GFX11",
]
>;
+//===----------------------------------------------------------------------===//
+
class FeatureSet<list<SubtargetFeature> Features_> {
list<SubtargetFeature> Features = Features_;
}
@@ -1006,30 +1049,28 @@ def FeatureISAVersion7_0_5 : FeatureSet<
[FeatureSeaIslands,
FeatureLDSBankCount16]>;
-def FeatureISAVersion8_0_1 : FeatureSet<
+def FeatureISAVersion8_0_Common : FeatureSet<
[FeatureVolcanicIslands,
- FeatureFastFMAF32,
- HalfRate64Ops,
FeatureLDSBankCount32,
- FeatureSupportsXNACK,
FeatureUnpackedD16VMem]>;
+def FeatureISAVersion8_0_1 : FeatureSet<
+ !listconcat(FeatureISAVersion8_0_Common.Features,
+ [FeatureFastFMAF32,
+ HalfRate64Ops,
+ FeatureSupportsXNACK])>;
+
def FeatureISAVersion8_0_2 : FeatureSet<
- [FeatureVolcanicIslands,
- FeatureLDSBankCount32,
- FeatureSGPRInitBug,
- FeatureUnpackedD16VMem]>;
+ !listconcat(FeatureISAVersion8_0_Common.Features,
+ [FeatureSGPRInitBug])>;
def FeatureISAVersion8_0_3 : FeatureSet<
- [FeatureVolcanicIslands,
- FeatureLDSBankCount32,
- FeatureUnpackedD16VMem]>;
+ !listconcat(FeatureISAVersion8_0_Common.Features,
+ [])>;
def FeatureISAVersion8_0_5 : FeatureSet<
- [FeatureVolcanicIslands,
- FeatureLDSBankCount32,
- FeatureSGPRInitBug,
- FeatureUnpackedD16VMem]>;
+ !listconcat(FeatureISAVersion8_0_Common.Features,
+ [FeatureSGPRInitBug])>;
def FeatureISAVersion8_1_0 : FeatureSet<
[FeatureVolcanicIslands,
@@ -1038,126 +1079,101 @@ def FeatureISAVersion8_1_0 : FeatureSet<
FeatureImageStoreD16Bug,
FeatureImageGather4D16Bug]>;
-def FeatureISAVersion9_0_0 : FeatureSet<
+def FeatureISAVersion9_0_Common : FeatureSet<
[FeatureGFX9,
- FeatureMadMixInsts,
FeatureLDSBankCount32,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
FeatureImageInsts,
- FeatureMadMacF32Insts,
- FeatureImageGather4D16Bug]>;
+ FeatureMadMacF32Insts]>;
+
+def FeatureISAVersion9_0_MI_Common : FeatureSet<
+ !listconcat(FeatureISAVersion9_0_Common.Features,
+ [FeatureFmaMixInsts,
+ FeatureDLInsts,
+ FeatureDot1Insts,
+ FeatureDot2Insts,
+ FeatureDot3Insts,
+ FeatureDot4Insts,
+ FeatureDot5Insts,
+ FeatureDot6Insts,
+ FeatureDot7Insts,
+ FeatureDot10Insts,
+ FeatureMAIInsts,
+ FeaturePkFmacF16Inst,
+ FeatureAtomicFaddNoRtnInsts,
+ FeatureSupportsSRAMECC])>;
+
+def FeatureISAVersion9_0_0 : FeatureSet<
+ !listconcat(FeatureISAVersion9_0_Common.Features,
+ [FeatureMadMixInsts,
+ FeatureDsSrc2Insts,
+ FeatureExtendedImageInsts,
+ FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_2 : FeatureSet<
- [FeatureGFX9,
- FeatureMadMixInsts,
- FeatureLDSBankCount32,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageInsts,
- FeatureMadMacF32Insts,
- FeatureImageGather4D16Bug]>;
+ !listconcat(FeatureISAVersion9_0_Common.Features,
+ [FeatureMadMixInsts,
+ FeatureDsSrc2Insts,
+ FeatureExtendedImageInsts,
+ FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_4 : FeatureSet<
- [FeatureGFX9,
- FeatureLDSBankCount32,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageInsts,
- FeatureMadMacF32Insts,
- FeatureFmaMixInsts,
- FeatureImageGather4D16Bug]>;
+ !listconcat(FeatureISAVersion9_0_Common.Features,
+ [FeatureDsSrc2Insts,
+ FeatureExtendedImageInsts,
+ FeatureFmaMixInsts,
+ FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_6 : FeatureSet<
- [FeatureGFX9,
- HalfRate64Ops,
- FeatureFmaMixInsts,
- FeatureLDSBankCount32,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageInsts,
- FeatureMadMacF32Insts,
- FeatureDLInsts,
- FeatureDot1Insts,
- FeatureDot2Insts,
- FeatureDot7Insts,
- FeatureSupportsSRAMECC,
- FeatureImageGather4D16Bug]>;
+ !listconcat(FeatureISAVersion9_0_Common.Features,
+ [HalfRate64Ops,
+ FeatureFmaMixInsts,
+ FeatureDsSrc2Insts,
+ FeatureExtendedImageInsts,
+ FeatureDLInsts,
+ FeatureDot1Insts,
+ FeatureDot2Insts,
+ FeatureDot7Insts,
+ FeatureDot10Insts,
+ FeatureSupportsSRAMECC,
+ FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_8 : FeatureSet<
- [FeatureGFX9,
- HalfRate64Ops,
- FeatureFmaMixInsts,
- FeatureLDSBankCount32,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageInsts,
- FeatureMadMacF32Insts,
- FeatureDLInsts,
- FeatureDot1Insts,
- FeatureDot2Insts,
- FeatureDot3Insts,
- FeatureDot4Insts,
- FeatureDot5Insts,
- FeatureDot6Insts,
- FeatureDot7Insts,
- FeatureMAIInsts,
- FeaturePkFmacF16Inst,
- FeatureAtomicFaddNoRtnInsts,
- FeatureAtomicPkFaddNoRtnInsts,
- FeatureSupportsSRAMECC,
- FeatureMFMAInlineLiteralBug,
- FeatureImageGather4D16Bug]>;
+ !listconcat(FeatureISAVersion9_0_MI_Common.Features,
+ [HalfRate64Ops,
+ FeatureDsSrc2Insts,
+ FeatureExtendedImageInsts,
+ FeatureAtomicBufferGlobalPkAddF16NoRtnInsts,
+ FeatureMFMAInlineLiteralBug,
+ FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_9 : FeatureSet<
- [FeatureGFX9,
- FeatureMadMixInsts,
- FeatureLDSBankCount32,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageInsts,
- FeatureMadMacF32Insts,
- FeatureImageGather4D16Bug]>;
+ !listconcat(FeatureISAVersion9_0_Common.Features,
+ [FeatureMadMixInsts,
+ FeatureDsSrc2Insts,
+ FeatureExtendedImageInsts,
+ FeatureImageInsts,
+ FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_A : FeatureSet<
- [FeatureGFX9,
- FeatureGFX90AInsts,
- FeatureFmaMixInsts,
- FeatureLDSBankCount32,
- FeatureDLInsts,
- FeatureFmacF64Inst,
- FeatureDot1Insts,
- FeatureDot2Insts,
- FeatureDot3Insts,
- FeatureDot4Insts,
- FeatureDot5Insts,
- FeatureDot6Insts,
- FeatureDot7Insts,
- Feature64BitDPP,
- FeaturePackedFP32Ops,
- FeatureMAIInsts,
- FeaturePkFmacF16Inst,
- FeatureAtomicFaddRtnInsts,
- FeatureAtomicFaddNoRtnInsts,
- FeatureAtomicPkFaddNoRtnInsts,
- FeatureImageInsts,
- FeatureMadMacF32Insts,
- FeatureSupportsSRAMECC,
- FeaturePackedTID,
- FullRate64Ops,
- FeatureBackOffBarrier]>;
+ !listconcat(FeatureISAVersion9_0_MI_Common.Features,
+ [FeatureGFX90AInsts,
+ FeatureFmacF64Inst,
+ Feature64BitDPP,
+ FeaturePackedFP32Ops,
+ FeatureAtomicFaddRtnInsts,
+ FeatureAtomicBufferGlobalPkAddF16Insts,
+ FeaturePackedTID,
+ FullRate64Ops,
+ FeatureBackOffBarrier])>;
def FeatureISAVersion9_0_C : FeatureSet<
- [FeatureGFX9,
- FeatureMadMixInsts,
- FeatureLDSBankCount32,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageInsts,
- FeatureMadMacF32Insts,
- FeatureImageGather4D16Bug]>;
+ !listconcat(FeatureISAVersion9_0_Common.Features,
+ [FeatureMadMixInsts,
+ FeatureDsSrc2Insts,
+ FeatureExtendedImageInsts,
+ FeatureImageGather4D16Bug])>;
-def FeatureISAVersion9_4_0 : FeatureSet<
+def FeatureISAVersion9_4_Common : FeatureSet<
[FeatureGFX9,
FeatureGFX90AInsts,
FeatureGFX940Insts,
@@ -1172,6 +1188,9 @@ def FeatureISAVersion9_4_0 : FeatureSet<
FeatureDot5Insts,
FeatureDot6Insts,
FeatureDot7Insts,
+ FeatureDot10Insts,
+ FeatureAtomicDsPkAdd16Insts,
+ FeatureAtomicFlatPkAdd16Insts,
Feature64BitDPP,
FeaturePackedFP32Ops,
FeatureMAIInsts,
@@ -1179,7 +1198,8 @@ def FeatureISAVersion9_4_0 : FeatureSet<
FeaturePkFmacF16Inst,
FeatureAtomicFaddRtnInsts,
FeatureAtomicFaddNoRtnInsts,
- FeatureAtomicPkFaddNoRtnInsts,
+ FeatureAtomicBufferGlobalPkAddF16Insts,
+ FeatureAtomicGlobalPkAddBF16Inst,
FeatureFlatAtomicFaddF32Inst,
FeatureSupportsSRAMECC,
FeaturePackedTID,
@@ -1187,33 +1207,29 @@ def FeatureISAVersion9_4_0 : FeatureSet<
FullRate64Ops,
FeatureBackOffBarrier]>;
-// TODO: Organize more features into groups.
-def FeatureGroup {
- // Bugs present on gfx10.1.
- list<SubtargetFeature> GFX10_1_Bugs = [
- FeatureVcmpxPermlaneHazard,
- FeatureVMEMtoScalarWriteHazard,
- FeatureSMEMtoVectorWriteHazard,
- FeatureInstFwdPrefetchBug,
- FeatureVcmpxExecWARHazard,
- FeatureLdsBranchVmemWARHazard,
- FeatureNSAtoVMEMBug,
- FeatureNSAClauseBug,
- FeatureOffset3fBug,
- FeatureFlatSegmentOffsetBug,
- FeatureNegativeUnalignedScratchOffsetBug
- ];
-}
+def FeatureISAVersion9_4_0 : FeatureSet<
+ !listconcat(FeatureISAVersion9_4_Common.Features,
+ [FeatureForceStoreSC0SC1])>;
-def FeatureISAVersion10_1_0 : FeatureSet<
- !listconcat(FeatureGroup.GFX10_1_Bugs,
- [FeatureGFX10,
- FeatureLDSBankCount32,
- FeatureDLInsts,
- FeatureNSAEncoding,
- FeatureNSAMaxSize5,
- FeatureWavefrontSize32,
- FeatureScalarStores,
+def FeatureISAVersion9_4_1 : FeatureSet<
+ !listconcat(FeatureISAVersion9_4_Common.Features,
+ [FeatureForceStoreSC0SC1])>;
+
+def FeatureISAVersion9_4_2 : FeatureSet<
+ !listconcat(FeatureISAVersion9_4_Common.Features,
+ [])>;
+
+def FeatureISAVersion10_Common : FeatureSet<
+ [FeatureGFX10,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureNSAEncoding,
+ FeatureWavefrontSize32,
+ FeatureBackOffBarrier]>;
+
+def FeatureISAVersion10_1_Common : FeatureSet<
+ !listconcat(FeatureISAVersion10_Common.Features,
+ [FeatureScalarStores,
FeatureScalarAtomics,
FeatureScalarFlatScratchInsts,
FeatureGetWaveIdInst,
@@ -1221,90 +1237,57 @@ def FeatureISAVersion10_1_0 : FeatureSet<
FeatureDsSrc2Insts,
FeatureLdsMisalignedBug,
FeatureSupportsXNACK,
- FeatureBackOffBarrier])>;
+ // gfx101x bugs
+ FeatureVcmpxPermlaneHazard,
+ FeatureVMEMtoScalarWriteHazard,
+ FeatureSMEMtoVectorWriteHazard,
+ FeatureInstFwdPrefetchBug,
+ FeatureVcmpxExecWARHazard,
+ FeatureLdsBranchVmemWARHazard,
+ FeatureNSAtoVMEMBug,
+ FeatureNSAClauseBug,
+ FeatureOffset3fBug,
+ FeatureFlatSegmentOffsetBug,
+ FeatureNegativeUnalignedScratchOffsetBug])>;
+
+def FeatureISAVersion10_1_0 : FeatureSet<
+ !listconcat(FeatureISAVersion10_1_Common.Features,
+ [])>;
def FeatureISAVersion10_1_1 : FeatureSet<
- !listconcat(FeatureGroup.GFX10_1_Bugs,
- [FeatureGFX10,
- FeatureLDSBankCount32,
- FeatureDLInsts,
- FeatureDot1Insts,
+ !listconcat(FeatureISAVersion10_1_Common.Features,
+ [FeatureDot1Insts,
FeatureDot2Insts,
FeatureDot5Insts,
FeatureDot6Insts,
FeatureDot7Insts,
- FeatureNSAEncoding,
- FeatureNSAMaxSize5,
- FeatureWavefrontSize32,
- FeatureScalarStores,
- FeatureScalarAtomics,
- FeatureScalarFlatScratchInsts,
- FeatureGetWaveIdInst,
- FeatureMadMacF32Insts,
- FeatureDsSrc2Insts,
- FeatureLdsMisalignedBug,
- FeatureSupportsXNACK,
- FeatureBackOffBarrier])>;
+ FeatureDot10Insts])>;
def FeatureISAVersion10_1_2 : FeatureSet<
- !listconcat(FeatureGroup.GFX10_1_Bugs,
- [FeatureGFX10,
- FeatureLDSBankCount32,
- FeatureDLInsts,
- FeatureDot1Insts,
+ !listconcat(FeatureISAVersion10_1_Common.Features,
+ [FeatureDot1Insts,
FeatureDot2Insts,
FeatureDot5Insts,
FeatureDot6Insts,
FeatureDot7Insts,
- FeatureNSAEncoding,
- FeatureNSAMaxSize5,
- FeatureWavefrontSize32,
- FeatureScalarStores,
- FeatureScalarAtomics,
- FeatureScalarFlatScratchInsts,
- FeatureGetWaveIdInst,
- FeatureMadMacF32Insts,
- FeatureDsSrc2Insts,
- FeatureLdsMisalignedBug,
- FeatureSupportsXNACK,
- FeatureBackOffBarrier])>;
+ FeatureDot10Insts])>;
def FeatureISAVersion10_1_3 : FeatureSet<
- !listconcat(FeatureGroup.GFX10_1_Bugs,
- [FeatureGFX10,
- FeatureGFX10_AEncoding,
- FeatureLDSBankCount32,
- FeatureDLInsts,
- FeatureNSAEncoding,
- FeatureNSAMaxSize5,
- FeatureWavefrontSize32,
- FeatureScalarStores,
- FeatureScalarAtomics,
- FeatureScalarFlatScratchInsts,
- FeatureGetWaveIdInst,
- FeatureMadMacF32Insts,
- FeatureDsSrc2Insts,
- FeatureLdsMisalignedBug,
- FeatureSupportsXNACK,
- FeatureBackOffBarrier])>;
+ !listconcat(FeatureISAVersion10_1_Common.Features,
+ [FeatureGFX10_AEncoding])>;
def FeatureISAVersion10_3_0 : FeatureSet<
- [FeatureGFX10,
- FeatureGFX10_AEncoding,
- FeatureGFX10_BEncoding,
- FeatureGFX10_3Insts,
- FeatureLDSBankCount32,
- FeatureDLInsts,
- FeatureDot1Insts,
- FeatureDot2Insts,
- FeatureDot5Insts,
- FeatureDot6Insts,
- FeatureDot7Insts,
- FeatureNSAEncoding,
- FeatureNSAMaxSize13,
- FeatureWavefrontSize32,
- FeatureShaderCyclesRegister,
- FeatureBackOffBarrier]>;
+ !listconcat(FeatureISAVersion10_Common.Features,
+ [FeatureGFX10_AEncoding,
+ FeatureGFX10_BEncoding,
+ FeatureGFX10_3Insts,
+ FeatureDot1Insts,
+ FeatureDot2Insts,
+ FeatureDot5Insts,
+ FeatureDot6Insts,
+ FeatureDot7Insts,
+ FeatureDot10Insts,
+ FeatureShaderCyclesRegister])>;
def FeatureISAVersion11_Common : FeatureSet<
[FeatureGFX11,
@@ -1314,8 +1297,9 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureDot7Insts,
FeatureDot8Insts,
FeatureDot9Insts,
+ FeatureDot10Insts,
FeatureNSAEncoding,
- FeatureNSAMaxSize5,
+ FeaturePartialNSAEncoding,
FeatureWavefrontSize32,
FeatureShaderCyclesRegister,
FeatureArchitectedFlatScratch,
@@ -1325,26 +1309,37 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureImageInsts,
FeaturePackedTID,
FeatureVcmpxPermlaneHazard,
- FeatureVALUTransUseHazard,
FeatureMADIntraFwdBug]>;
-def FeatureISAVersion11_0_0 : FeatureSet<
+def FeatureISAVersion11_0_Common : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
+ [FeatureVALUTransUseHazard])>;
+
+def FeatureISAVersion11_0_0 : FeatureSet<
+ !listconcat(FeatureISAVersion11_0_Common.Features,
[FeatureGFX11FullVGPRs,
FeatureUserSGPRInit16Bug])>;
def FeatureISAVersion11_0_1 : FeatureSet<
- !listconcat(FeatureISAVersion11_Common.Features,
+ !listconcat(FeatureISAVersion11_0_Common.Features,
[FeatureGFX11FullVGPRs])>;
def FeatureISAVersion11_0_2 : FeatureSet<
- !listconcat(FeatureISAVersion11_Common.Features,
+ !listconcat(FeatureISAVersion11_0_Common.Features,
[FeatureUserSGPRInit16Bug])>;
def FeatureISAVersion11_0_3 : FeatureSet<
+ !listconcat(FeatureISAVersion11_0_Common.Features,
+ [])>;
+
+def FeatureISAVersion11_5_0 : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[])>;
+def FeatureISAVersion11_5_1 : FeatureSet<
+ !listconcat(FeatureISAVersion11_Common.Features,
+ [FeatureGFX11FullVGPRs])>;
+
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
@@ -1522,6 +1517,9 @@ def isGFX9Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX9">,
AssemblerPredicate<(all_of FeatureGFX9Insts)>;
+def isNotGFX9Plus :
+ Predicate<"Subtarget->getGeneration() < AMDGPUSubtarget::GFX9">;
+
def isGFX9Only : Predicate <
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
AssemblerPredicate<(all_of FeatureGCN3Encoding, FeatureGFX9Insts)>;
@@ -1655,6 +1653,8 @@ def NotHasTrue16BitInsts : Predicate<"!Subtarget->hasTrue16BitInsts()">;
def HasVOP3PInsts : Predicate<"Subtarget->hasVOP3PInsts()">,
AssemblerPredicate<(all_of FeatureVOP3P)>;
+def NotHasMed3_16 : Predicate<"!Subtarget->hasMed3_16()">;
+
def HasMinMaxDenormModes : Predicate<"Subtarget->supportsMinMaxDenormModes()">;
def NotHasMinMaxDenormModes : Predicate<"!Subtarget->supportsMinMaxDenormModes()">;
@@ -1766,6 +1766,9 @@ def HasDot8Insts : Predicate<"Subtarget->hasDot8Insts()">,
def HasDot9Insts : Predicate<"Subtarget->hasDot9Insts()">,
AssemblerPredicate<(all_of FeatureDot9Insts)>;
+def HasDot10Insts : Predicate<"Subtarget->hasDot10Insts()">,
+ AssemblerPredicate<(all_of FeatureDot10Insts)>;
+
def HasGetWaveIdInst : Predicate<"Subtarget->hasGetWaveIdInst()">,
AssemblerPredicate<(all_of FeatureGetWaveIdInst)>;
@@ -1793,13 +1796,25 @@ def HasMadMacF32Insts : Predicate<"Subtarget->hasMadMacF32Insts()">,
def HasFmaLegacy32 : Predicate<"Subtarget->hasGFX10_3Insts()">,
AssemblerPredicate<(any_of FeatureGFX10_3Insts)>;
+def HasAtomicDsPkAdd16Insts : Predicate<"Subtarget->hasAtomicDsPkAdd16Insts()">,
+ AssemblerPredicate<(any_of FeatureAtomicDsPkAdd16Insts)>;
+
+def HasAtomicFlatPkAdd16Insts : Predicate<"Subtarget->hasAtomicFlatPkAdd16Insts()">,
+ AssemblerPredicate<(any_of FeatureAtomicFlatPkAdd16Insts)>;
+
def HasAtomicFaddRtnInsts : Predicate<"Subtarget->hasAtomicFaddRtnInsts()">,
AssemblerPredicate<(all_of FeatureAtomicFaddRtnInsts)>;
def HasAtomicFaddNoRtnInsts : Predicate<"Subtarget->hasAtomicFaddNoRtnInsts()">,
AssemblerPredicate<(all_of FeatureAtomicFaddNoRtnInsts)>;
-def HasAtomicPkFaddNoRtnInsts
- : Predicate<"Subtarget->hasAtomicPkFaddNoRtnInsts()">,
- AssemblerPredicate<(all_of FeatureAtomicPkFaddNoRtnInsts)>;
+def HasAtomicBufferGlobalPkAddF16NoRtnInsts
+ : Predicate<"Subtarget->hasAtomicBufferGlobalPkAddF16NoRtnInsts() || Subtarget->hasAtomicBufferGlobalPkAddF16Insts()">,
+ AssemblerPredicate<(any_of FeatureAtomicBufferGlobalPkAddF16NoRtnInsts, FeatureAtomicBufferGlobalPkAddF16Insts)>;
+def HasAtomicBufferGlobalPkAddF16Insts
+ : Predicate<"Subtarget->hasAtomicBufferGlobalPkAddF16Insts()">,
+ AssemblerPredicate<(all_of FeatureAtomicBufferGlobalPkAddF16Insts)>;
+def HasAtomicGlobalPkAddBF16Inst
+ : Predicate<"Subtarget->hasAtomicGlobalPkAddBF16Inst()">,
+ AssemblerPredicate<(all_of FeatureAtomicGlobalPkAddBF16Inst)>;
def HasFlatAtomicFaddF32Inst
: Predicate<"Subtarget->hasFlatAtomicFaddF32Inst()">,
AssemblerPredicate<(all_of FeatureFlatAtomicFaddF32Inst)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
index 8155c895e366..63942414bf3c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp
@@ -46,41 +46,14 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
}
-static AliasResult getAliasResult(unsigned AS1, unsigned AS2) {
- static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 7, "Addr space out of range");
-
- if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
- return AliasResult::MayAlias;
-
-#define ASMay AliasResult::MayAlias
-#define ASNo AliasResult::NoAlias
- // This array is indexed by address space value enum elements 0 ... to 7
- static const AliasResult ASAliasRules[8][8] = {
- /* Flat Global Region Group Constant Private Const32 Buf Fat Ptr */
- /* Flat */ {ASMay, ASMay, ASNo, ASMay, ASMay, ASMay, ASMay, ASMay},
- /* Global */ {ASMay, ASMay, ASNo, ASNo, ASMay, ASNo, ASMay, ASMay},
- /* Region */ {ASNo, ASNo, ASMay, ASNo, ASNo, ASNo, ASNo, ASNo},
- /* Group */ {ASMay, ASNo, ASNo, ASMay, ASNo, ASNo, ASNo, ASNo},
- /* Constant */ {ASMay, ASMay, ASNo, ASNo, ASNo, ASNo, ASMay, ASMay},
- /* Private */ {ASMay, ASNo, ASNo, ASNo, ASNo, ASMay, ASNo, ASNo},
- /* Constant 32-bit */ {ASMay, ASMay, ASNo, ASNo, ASMay, ASNo, ASNo, ASMay},
- /* Buffer Fat Ptr */ {ASMay, ASMay, ASNo, ASNo, ASMay, ASNo, ASMay, ASMay}
- };
-#undef ASMay
-#undef ASNo
-
- return ASAliasRules[AS1][AS2];
-}
-
AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA,
const MemoryLocation &LocB, AAQueryInfo &AAQI,
const Instruction *) {
unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace();
unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace();
- AliasResult Result = getAliasResult(asA, asB);
- if (Result == AliasResult::NoAlias)
- return Result;
+ if (!AMDGPU::addrspacesMayAlias(asA, asB))
+ return AliasResult::NoAlias;
// In general, FLAT (generic) pointers could be aliased to LOCAL or PRIVATE
// pointers. However, as LOCAL or PRIVATE pointers point to local objects, in
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 2e24e9f929d2..b53def912ab6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -127,7 +127,7 @@ static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
unsigned AS = GV.getAddressSpace();
if ((AS == AMDGPUAS::REGION_ADDRESS) ||
(AS == AMDGPUAS::LOCAL_ADDRESS &&
- (!AMDGPUTargetMachine::EnableLowerModuleLDS || !GV.hasInitializer())))
+ (!AMDGPUTargetMachine::EnableLowerModuleLDS)))
recursivelyVisitUsers(GV, FuncsToAlwaysInline);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 74be0336851c..6a409f0dcbe7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -16,8 +16,8 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDGPUMemoryUtils.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/InitializePasses.h"
@@ -29,7 +29,7 @@ namespace {
class AMDGPUAnnotateUniformValues : public FunctionPass,
public InstVisitor<AMDGPUAnnotateUniformValues> {
- LegacyDivergenceAnalysis *DA;
+ UniformityInfo *UA;
MemorySSA *MSSA;
AliasAnalysis *AA;
bool isEntryFunc;
@@ -55,7 +55,7 @@ public:
return "AMDGPU Annotate Uniform Values";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LegacyDivergenceAnalysis>();
+ AU.addRequired<UniformityInfoWrapperPass>();
AU.addRequired<MemorySSAWrapperPass>();
AU.addRequired<AAResultsWrapperPass>();
AU.setPreservesAll();
@@ -69,7 +69,7 @@ public:
INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
"Add AMDGPU uniform metadata", false, false)
-INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
@@ -78,13 +78,13 @@ INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
char AMDGPUAnnotateUniformValues::ID = 0;
void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
- if (DA->isUniform(&I))
+ if (UA->isUniform(&I))
setUniformMetadata(&I);
}
void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
Value *Ptr = I.getPointerOperand();
- if (!DA->isUniform(Ptr))
+ if (!UA->isUniform(Ptr))
return;
Instruction *PtrI = dyn_cast<Instruction>(Ptr);
if (PtrI)
@@ -108,7 +108,7 @@ bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
- DA = &getAnalysis<LegacyDivergenceAnalysis>();
+ UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index c916d5d547c4..7cd8e53e6521 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -38,9 +38,9 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/TargetParser.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -65,7 +65,7 @@ using namespace llvm::AMDGPU;
// We want to use these instructions, and using fp32 denormals also causes
// instructions to run at the double precision rate for the device so it's
// probably best to just report no single precision denormals.
-static uint32_t getFPMode(AMDGPU::SIModeRegisterDefaults Mode) {
+static uint32_t getFPMode(SIModeRegisterDefaults Mode) {
return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) |
FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) |
FP_DENORM_MODE_SP(Mode.fpDenormModeSPValue()) |
@@ -78,8 +78,8 @@ createAMDGPUAsmPrinterPass(TargetMachine &tm,
return new AMDGPUAsmPrinter(tm, std::move(Streamer));
}
-extern "C" void LLVM_EXTERNAL_VISIBILITY LLVMInitializeAMDGPUAsmPrinter() {
- TargetRegistry::RegisterAsmPrinter(getTheAMDGPUTarget(),
+extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter() {
+ TargetRegistry::RegisterAsmPrinter(getTheR600Target(),
llvm::createR600AsmPrinterPass);
TargetRegistry::RegisterAsmPrinter(getTheGCNTarget(),
createAMDGPUAsmPrinterPass);
@@ -89,18 +89,6 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {
assert(OutStreamer && "AsmPrinter constructed without streamer");
-
- if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
- if (isHsaAbiVersion2(getGlobalSTI())) {
- HSAMetadataStream.reset(new HSAMD::MetadataStreamerYamlV2());
- } else if (isHsaAbiVersion3(getGlobalSTI())) {
- HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3());
- } else if (isHsaAbiVersion5(getGlobalSTI())) {
- HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV5());
- } else {
- HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4());
- }
- }
}
StringRef AMDGPUAsmPrinter::getPassName() const {
@@ -133,7 +121,7 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
TM.getTargetTriple().getOS() != Triple::AMDPAL)
return;
- if (isHsaAbiVersion3AndAbove(getGlobalSTI()))
+ if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3)
getTargetStreamer()->EmitDirectiveAMDGCNTarget();
if (TM.getTargetTriple().getOS() == Triple::AMDHSA)
@@ -142,7 +130,7 @@ void AMDGPUAsmPrinter::initTargetStreamer(Module &M) {
if (TM.getTargetTriple().getOS() == Triple::AMDPAL)
getTargetStreamer()->getPALMetadata()->readFromIR(M);
- if (isHsaAbiVersion3AndAbove(getGlobalSTI()))
+ if (CodeObjectVersion >= AMDGPU::AMDHSA_COV3)
return;
// HSA emits NT_AMD_HSA_CODE_OBJECT_VERSION for code objects v2.
@@ -161,7 +149,7 @@ void AMDGPUAsmPrinter::emitEndOfAsmFile(Module &M) {
initTargetStreamer(M);
if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
- isHsaAbiVersion2(getGlobalSTI()))
+ CodeObjectVersion == AMDGPU::AMDHSA_COV2)
getTargetStreamer()->EmitISAVersion();
// Emit HSA Metadata (NT_AMD_AMDGPU_HSA_METADATA).
@@ -221,7 +209,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
if (!MFI.isEntryFunction())
return;
- if ((STM.isMesaKernel(F) || isHsaAbiVersion2(getGlobalSTI())) &&
+ if ((STM.isMesaKernel(F) || CodeObjectVersion == AMDGPU::AMDHSA_COV2) &&
(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
F.getCallingConv() == CallingConv::SPIR_KERNEL)) {
amd_kernel_code_t KernelCode;
@@ -239,7 +227,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
return;
if (TM.getTargetTriple().getOS() != Triple::AMDHSA ||
- isHsaAbiVersion2(getGlobalSTI()))
+ CodeObjectVersion == AMDGPU::AMDHSA_COV2)
return;
auto &Streamer = getTargetStreamer()->getStreamer();
@@ -263,17 +251,18 @@ void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
STM, KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
CurrentProgramInfo.NumVGPRsForWavesPerEU,
CurrentProgramInfo.NumSGPRsForWavesPerEU -
- IsaInfo::getNumExtraSGPRs(&STM,
- CurrentProgramInfo.VCCUsed,
- CurrentProgramInfo.FlatUsed),
- CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed);
+ IsaInfo::getNumExtraSGPRs(
+ &STM, CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
+ getTargetStreamer()->getTargetID()->isXnackOnOrAny()),
+ CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
+ CodeObjectVersion);
Streamer.popSection();
}
void AMDGPUAsmPrinter::emitFunctionEntryLabel() {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA &&
- isHsaAbiVersion3AndAbove(getGlobalSTI())) {
+ CodeObjectVersion >= AMDGPU::AMDHSA_COV3) {
AsmPrinter::emitFunctionEntryLabel();
return;
}
@@ -343,6 +332,30 @@ void AMDGPUAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
AsmPrinter::emitGlobalVariable(GV);
}
+bool AMDGPUAsmPrinter::doInitialization(Module &M) {
+ CodeObjectVersion = AMDGPU::getCodeObjectVersion(M);
+
+ if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
+ switch (CodeObjectVersion) {
+ case AMDGPU::AMDHSA_COV2:
+ HSAMetadataStream.reset(new HSAMD::MetadataStreamerYamlV2());
+ break;
+ case AMDGPU::AMDHSA_COV3:
+ HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3());
+ break;
+ case AMDGPU::AMDHSA_COV4:
+ HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4());
+ break;
+ case AMDGPU::AMDHSA_COV5:
+ HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV5());
+ break;
+ default:
+ report_fatal_error("Unexpected code object version");
+ }
+ }
+ return AsmPrinter::doInitialization(M);
+}
+
bool AMDGPUAsmPrinter::doFinalization(Module &M) {
// Pad with s_code_end to help tools and guard against instruction prefetch
// causing stale data in caches. Arguably this should be done by the linker,
@@ -389,7 +402,7 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
}
- if (MFI.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
+ if (MFI.hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5) {
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
}
@@ -411,9 +424,8 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKernelCodeProperties(
}
if (CurrentProgramInfo.DynamicCallStack &&
- AMDGPU::getAmdhsaCodeObjectVersion() >= 5) {
+ CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
KernelCodeProperties |= amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK;
- }
return KernelCodeProperties;
}
@@ -429,7 +441,7 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
assert(isUInt<32>(PI.ScratchSize));
assert(isUInt<32>(PI.getComputePGMRSrc1()));
- assert(isUInt<32>(PI.ComputePGMRSrc2));
+ assert(isUInt<32>(PI.getComputePGMRSrc2()));
KernelDescriptor.group_segment_fixed_size = PI.LDSSize;
KernelDescriptor.private_segment_fixed_size = PI.ScratchSize;
@@ -438,7 +450,7 @@ amdhsa::kernel_descriptor_t AMDGPUAsmPrinter::getAmdhsaKernelDescriptor(
KernelDescriptor.kernarg_size = STM.getKernArgSegmentSize(F, MaxKernArgAlign);
KernelDescriptor.compute_pgm_rsrc1 = PI.getComputePGMRSrc1();
- KernelDescriptor.compute_pgm_rsrc2 = PI.ComputePGMRSrc2;
+ KernelDescriptor.compute_pgm_rsrc2 = PI.getComputePGMRSrc2();
KernelDescriptor.kernel_code_properties = getAmdhsaKernelCodeProperties(MF);
assert(STM.hasGFX90AInsts() || CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
@@ -567,28 +579,27 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
OutStreamer->emitRawComment(
" WaveLimiterHint : " + Twine(MFI->needsWaveLimiter()), false);
- OutStreamer->emitRawComment(
- " COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
- Twine(G_00B84C_SCRATCH_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
- OutStreamer->emitRawComment(
- " COMPUTE_PGM_RSRC2:USER_SGPR: " +
- Twine(G_00B84C_USER_SGPR(CurrentProgramInfo.ComputePGMRSrc2)), false);
- OutStreamer->emitRawComment(
- " COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
- Twine(G_00B84C_TRAP_HANDLER(CurrentProgramInfo.ComputePGMRSrc2)), false);
- OutStreamer->emitRawComment(
- " COMPUTE_PGM_RSRC2:TGID_X_EN: " +
- Twine(G_00B84C_TGID_X_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
- OutStreamer->emitRawComment(
- " COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
- Twine(G_00B84C_TGID_Y_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
- OutStreamer->emitRawComment(
- " COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
- Twine(G_00B84C_TGID_Z_EN(CurrentProgramInfo.ComputePGMRSrc2)), false);
- OutStreamer->emitRawComment(
- " COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
- Twine(G_00B84C_TIDIG_COMP_CNT(CurrentProgramInfo.ComputePGMRSrc2)),
- false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:SCRATCH_EN: " +
+ Twine(CurrentProgramInfo.ScratchEnable),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:USER_SGPR: " +
+ Twine(CurrentProgramInfo.UserSGPR),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TRAP_HANDLER: " +
+ Twine(CurrentProgramInfo.TrapHandlerEnable),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_X_EN: " +
+ Twine(CurrentProgramInfo.TGIdXEnable),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Y_EN: " +
+ Twine(CurrentProgramInfo.TGIdYEnable),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TGID_Z_EN: " +
+ Twine(CurrentProgramInfo.TGIdZEnable),
+ false);
+ OutStreamer->emitRawComment(" COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: " +
+ Twine(CurrentProgramInfo.TIdIGCompCount),
+ false);
assert(STM.hasGFX90AInsts() ||
CurrentProgramInfo.ComputePGMRSrc3GFX90A == 0);
@@ -631,7 +642,7 @@ void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
// In the beginning all features are either 'Any' or 'NotSupported',
// depending on global target features. This will cover empty modules.
getTargetStreamer()->initializeTargetID(
- *getGlobalSTI(), getGlobalSTI()->getFeatureString());
+ *getGlobalSTI(), getGlobalSTI()->getFeatureString(), CodeObjectVersion);
// If module is empty, we are done.
if (M.empty())
@@ -709,7 +720,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
// unified.
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
- &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed);
+ &STM, ProgInfo.VCCUsed, ProgInfo.FlatUsed,
+ getTargetStreamer()->getTargetID()->isXnackOnOrAny());
// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
@@ -761,7 +773,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// There are some rare circumstances where InputAddr is non-zero and
// InputEna can be set to 0. In this case we default to setting LastEna
// to 1.
- LastEna = InputEna ? findLastSet(InputEna) + 1 : 1;
+ LastEna = InputEna ? llvm::Log2_32(InputEna) + 1 : 1;
}
// FIXME: We should be using the number of registers determined during
@@ -909,22 +921,21 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
// anything to disable it if we know the stack isn't used here. We may still
// have emitted code reading it to initialize scratch, but if that's unused
// reading garbage should be OK.
- const bool EnablePrivateSegment =
+ ProgInfo.ScratchEnable =
ProgInfo.ScratchBlocks > 0 || ProgInfo.DynamicCallStack;
- ProgInfo.ComputePGMRSrc2 =
- S_00B84C_SCRATCH_EN(EnablePrivateSegment) |
- S_00B84C_USER_SGPR(MFI->getNumUserSGPRs()) |
- // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
- S_00B84C_TRAP_HANDLER(STM.isAmdHsaOS() ? 0 : STM.isTrapHandlerEnabled()) |
- S_00B84C_TGID_X_EN(MFI->hasWorkGroupIDX()) |
- S_00B84C_TGID_Y_EN(MFI->hasWorkGroupIDY()) |
- S_00B84C_TGID_Z_EN(MFI->hasWorkGroupIDZ()) |
- S_00B84C_TG_SIZE_EN(MFI->hasWorkGroupInfo()) |
- S_00B84C_TIDIG_COMP_CNT(TIDIGCompCnt) |
- S_00B84C_EXCP_EN_MSB(0) |
- // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
- S_00B84C_LDS_SIZE(STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks) |
- S_00B84C_EXCP_EN(0);
+ ProgInfo.UserSGPR = MFI->getNumUserSGPRs();
+ // For AMDHSA, TRAP_HANDLER must be zero, as it is populated by the CP.
+ ProgInfo.TrapHandlerEnable =
+ STM.isAmdHsaOS() ? 0 : STM.isTrapHandlerEnabled();
+ ProgInfo.TGIdXEnable = MFI->hasWorkGroupIDX();
+ ProgInfo.TGIdYEnable = MFI->hasWorkGroupIDY();
+ ProgInfo.TGIdZEnable = MFI->hasWorkGroupIDZ();
+ ProgInfo.TGSizeEnable = MFI->hasWorkGroupInfo();
+ ProgInfo.TIdIGCompCount = TIDIGCompCnt;
+ ProgInfo.EXCPEnMSB = 0;
+ // For AMDHSA, LDS_SIZE must be zero, as it is populated by the CP.
+ ProgInfo.LdsSize = STM.isAmdHsaOS() ? 0 : ProgInfo.LDSBlocks;
+ ProgInfo.EXCPEnable = 0;
if (STM.hasGFX90AInsts()) {
AMDHSA_BITS_SET(ProgInfo.ComputePGMRSrc3GFX90A,
@@ -965,7 +976,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc1());
OutStreamer->emitInt32(R_00B84C_COMPUTE_PGM_RSRC2);
- OutStreamer->emitInt32(CurrentProgramInfo.ComputePGMRSrc2);
+ OutStreamer->emitInt32(CurrentProgramInfo.getComputePGMRSrc2());
OutStreamer->emitInt32(R_00B860_COMPUTE_TMPRING_SIZE);
OutStreamer->emitInt32(
@@ -1025,25 +1036,77 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
}
MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU);
- MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC));
- if (AMDGPU::isCompute(CC)) {
- MD->setRsrc2(CC, CurrentProgramInfo.ComputePGMRSrc2);
+ if (MD->getPALMajorVersion() < 3) {
+ MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC));
+ if (AMDGPU::isCompute(CC)) {
+ MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2());
+ } else {
+ if (CurrentProgramInfo.ScratchBlocks > 0)
+ MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
+ }
} else {
- if (CurrentProgramInfo.ScratchBlocks > 0)
- MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
+ MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);
+ MD->setHwStage(CC, ".ieee_mode", (bool)CurrentProgramInfo.IEEEMode);
+ MD->setHwStage(CC, ".wgp_mode", (bool)CurrentProgramInfo.WgpMode);
+ MD->setHwStage(CC, ".mem_ordered", (bool)CurrentProgramInfo.MemOrdered);
+
+ if (AMDGPU::isCompute(CC)) {
+ MD->setHwStage(CC, ".scratch_en", (bool)CurrentProgramInfo.ScratchEnable);
+ MD->setHwStage(CC, ".trap_present",
+ (bool)CurrentProgramInfo.TrapHandlerEnable);
+
+ // EXCPEnMSB?
+ const unsigned LdsDwGranularity = 128;
+ MD->setHwStage(CC, ".lds_size",
+ (unsigned)(CurrentProgramInfo.LdsSize * LdsDwGranularity *
+ sizeof(uint32_t)));
+ MD->setHwStage(CC, ".excp_en", CurrentProgramInfo.EXCPEnable);
+ } else {
+ MD->setHwStage(CC, ".scratch_en", (bool)CurrentProgramInfo.ScratchEnable);
+ }
}
+
// ScratchSize is in bytes, 16 aligned.
MD->setScratchSize(CC, alignTo(CurrentProgramInfo.ScratchSize, 16));
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
: CurrentProgramInfo.LDSBlocks;
- MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
- MD->setSpiPsInputEna(MFI->getPSInputEnable());
- MD->setSpiPsInputAddr(MFI->getPSInputAddr());
+ if (MD->getPALMajorVersion() < 3) {
+ MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
+ MD->setSpiPsInputEna(MFI->getPSInputEnable());
+ MD->setSpiPsInputAddr(MFI->getPSInputAddr());
+ } else {
+ // Graphics registers
+ const unsigned ExtraLdsDwGranularity =
+ STM.getGeneration() >= AMDGPUSubtarget::GFX11 ? 256 : 128;
+ MD->setGraphicsRegisters(
+ ".ps_extra_lds_size",
+ (unsigned)(ExtraLDSSize * ExtraLdsDwGranularity * sizeof(uint32_t)));
+
+ // Set PsInputEna and PsInputAddr .spi_ps_input_ena and .spi_ps_input_addr
+ static StringLiteral const PsInputFields[] = {
+ ".persp_sample_ena", ".persp_center_ena",
+ ".persp_centroid_ena", ".persp_pull_model_ena",
+ ".linear_sample_ena", ".linear_center_ena",
+ ".linear_centroid_ena", ".line_stipple_tex_ena",
+ ".pos_x_float_ena", ".pos_y_float_ena",
+ ".pos_z_float_ena", ".pos_w_float_ena",
+ ".front_face_ena", ".ancillary_ena",
+ ".sample_coverage_ena", ".pos_fixed_pt_ena"};
+ unsigned PSInputEna = MFI->getPSInputEnable();
+ unsigned PSInputAddr = MFI->getPSInputAddr();
+ for (auto [Idx, Field] : enumerate(PsInputFields)) {
+ MD->setGraphicsRegisters(".spi_ps_input_ena", Field,
+ (bool)((PSInputEna >> Idx) & 1));
+ MD->setGraphicsRegisters(".spi_ps_input_addr", Field,
+ (bool)((PSInputAddr >> Idx) & 1));
+ }
+ }
}
- if (STM.isWave32())
+ // For version 3 and above the wave front size is already set in the metadata
+ if (MD->getPALMajorVersion() < 3 && STM.isWave32())
MD->setWave32(MF.getFunction().getCallingConv());
}
@@ -1055,7 +1118,7 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
// Set compute registers
MD->setRsrc1(CallingConv::AMDGPU_CS,
CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS));
- MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.ComputePGMRSrc2);
+ MD->setRsrc2(CallingConv::AMDGPU_CS, CurrentProgramInfo.getComputePGMRSrc2());
// Set optional info
MD->setFunctionLdsSize(MF, CurrentProgramInfo.LDSSize);
@@ -1091,7 +1154,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
Out.compute_pgm_resource_registers =
CurrentProgramInfo.getComputePGMRSrc1() |
- (CurrentProgramInfo.ComputePGMRSrc2 << 32);
+ (CurrentProgramInfo.getComputePGMRSrc2() << 32);
Out.code_properties |= AMD_CODE_PROPERTY_IS_PTR64;
if (CurrentProgramInfo.DynamicCallStack)
@@ -1109,7 +1172,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(amd_kernel_code_t &Out,
if (MFI->hasDispatchPtr())
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR;
- if (MFI->hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5)
+ if (MFI->hasQueuePtr() && CodeObjectVersion < AMDGPU::AMDHSA_COV5)
Out.code_properties |= AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR;
if (MFI->hasKernargSegmentPtr())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index ea12086751a4..d490209ce35e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -39,6 +39,7 @@ struct kernel_descriptor_t;
class AMDGPUAsmPrinter final : public AsmPrinter {
private:
+ unsigned CodeObjectVersion;
void initializeTargetID(const Module &M);
AMDGPUResourceUsageAnalysis *ResourceUsage;
@@ -90,6 +91,7 @@ public:
AMDGPUTargetStreamer* getTargetStreamer() const;
+ bool doInitialization(Module &M) override;
bool doFinalization(Module &M) override;
bool runOnMachineFunction(MachineFunction &MF) override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
index 28967bb8e5b1..9795928094f4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAtomicOptimizer.cpp
@@ -10,12 +10,21 @@
/// This pass optimizes atomic operations by using a single lane of a wavefront
/// to perform the atomic operation, thus reducing contention on that memory
/// location.
-//
+/// Atomic optimizer uses following strategies to compute scan and reduced
+/// values
+/// 1. DPP -
+/// This is the most efficient implementation for scan. DPP uses Whole Wave
+/// Mode (WWM)
+/// 2. Iterative -
+// An alternative implementation iterates over all active lanes
+/// of Wavefront using llvm.cttz and performs scan using readlane & writelane
+/// intrinsics
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "GCNSubtarget.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -38,36 +47,57 @@ struct ReplacementInfo {
bool ValDivergent;
};
-class AMDGPUAtomicOptimizer : public FunctionPass,
- public InstVisitor<AMDGPUAtomicOptimizer> {
+class AMDGPUAtomicOptimizer : public FunctionPass {
+public:
+ static char ID;
+ ScanOptions ScanImpl;
+ AMDGPUAtomicOptimizer(ScanOptions ScanImpl)
+ : FunctionPass(ID), ScanImpl(ScanImpl) {}
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<UniformityInfoWrapperPass>();
+ AU.addRequired<TargetPassConfig>();
+ }
+};
+
+class AMDGPUAtomicOptimizerImpl
+ : public InstVisitor<AMDGPUAtomicOptimizerImpl> {
private:
SmallVector<ReplacementInfo, 8> ToReplace;
- const LegacyDivergenceAnalysis *DA;
+ const UniformityInfo *UA;
const DataLayout *DL;
- DominatorTree *DT;
+ DomTreeUpdater &DTU;
const GCNSubtarget *ST;
bool IsPixelShader;
+ ScanOptions ScanImpl;
Value *buildReduction(IRBuilder<> &B, AtomicRMWInst::BinOp Op, Value *V,
Value *const Identity) const;
Value *buildScan(IRBuilder<> &B, AtomicRMWInst::BinOp Op, Value *V,
Value *const Identity) const;
Value *buildShiftRight(IRBuilder<> &B, Value *V, Value *const Identity) const;
+
+ std::pair<Value *, Value *>
+ buildScanIteratively(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
+ Value *const Identity, Value *V, Instruction &I,
+ BasicBlock *ComputeLoop, BasicBlock *ComputeEnd) const;
+
void optimizeAtomic(Instruction &I, AtomicRMWInst::BinOp Op, unsigned ValIdx,
bool ValDivergent) const;
public:
- static char ID;
-
- AMDGPUAtomicOptimizer() : FunctionPass(ID) {}
+ AMDGPUAtomicOptimizerImpl() = delete;
- bool runOnFunction(Function &F) override;
+ AMDGPUAtomicOptimizerImpl(const UniformityInfo *UA, const DataLayout *DL,
+ DomTreeUpdater &DTU, const GCNSubtarget *ST,
+ bool IsPixelShader, ScanOptions ScanImpl)
+ : UA(UA), DL(DL), DTU(DTU), ST(ST), IsPixelShader(IsPixelShader),
+ ScanImpl(ScanImpl) {}
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LegacyDivergenceAnalysis>();
- AU.addRequired<TargetPassConfig>();
- }
+ bool run(Function &F);
void visitAtomicRMWInst(AtomicRMWInst &I);
void visitIntrinsicInst(IntrinsicInst &I);
@@ -84,15 +114,56 @@ bool AMDGPUAtomicOptimizer::runOnFunction(Function &F) {
return false;
}
- DA = &getAnalysis<LegacyDivergenceAnalysis>();
- DL = &F.getParent()->getDataLayout();
+ const UniformityInfo *UA =
+ &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+ const DataLayout *DL = &F.getParent()->getDataLayout();
+
DominatorTreeWrapperPass *const DTW =
getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTW ? &DTW->getDomTree() : nullptr;
+ DomTreeUpdater DTU(DTW ? &DTW->getDomTree() : nullptr,
+ DomTreeUpdater::UpdateStrategy::Lazy);
+
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
const TargetMachine &TM = TPC.getTM<TargetMachine>();
- ST = &TM.getSubtarget<GCNSubtarget>(F);
- IsPixelShader = F.getCallingConv() == CallingConv::AMDGPU_PS;
+ const GCNSubtarget *ST = &TM.getSubtarget<GCNSubtarget>(F);
+
+ bool IsPixelShader = F.getCallingConv() == CallingConv::AMDGPU_PS;
+
+ return AMDGPUAtomicOptimizerImpl(UA, DL, DTU, ST, IsPixelShader, ScanImpl)
+ .run(F);
+}
+
+PreservedAnalyses AMDGPUAtomicOptimizerPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+
+ const auto *UA = &AM.getResult<UniformityInfoAnalysis>(F);
+ const DataLayout *DL = &F.getParent()->getDataLayout();
+
+ DomTreeUpdater DTU(&AM.getResult<DominatorTreeAnalysis>(F),
+ DomTreeUpdater::UpdateStrategy::Lazy);
+ const GCNSubtarget *ST = &TM.getSubtarget<GCNSubtarget>(F);
+
+ bool IsPixelShader = F.getCallingConv() == CallingConv::AMDGPU_PS;
+
+ bool IsChanged =
+ AMDGPUAtomicOptimizerImpl(UA, DL, DTU, ST, IsPixelShader, ScanImpl)
+ .run(F);
+
+ if (!IsChanged) {
+ return PreservedAnalyses::all();
+ }
+
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ return PA;
+}
+
+bool AMDGPUAtomicOptimizerImpl::run(Function &F) {
+
+ // Scan option None disables the Pass
+ if (ScanImpl == ScanOptions::None) {
+ return false;
+ }
visit(F);
@@ -107,7 +178,7 @@ bool AMDGPUAtomicOptimizer::runOnFunction(Function &F) {
return Changed;
}
-void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
+void AMDGPUAtomicOptimizerImpl::visitAtomicRMWInst(AtomicRMWInst &I) {
// Early exit for unhandled address space atomic instructions.
switch (I.getPointerAddressSpace()) {
default:
@@ -139,11 +210,11 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
// If the pointer operand is divergent, then each lane is doing an atomic
// operation on a different address, and we cannot optimize that.
- if (DA->isDivergentUse(&I.getOperandUse(PtrIdx))) {
+ if (UA->isDivergentUse(I.getOperandUse(PtrIdx))) {
return;
}
- const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx));
+ const bool ValDivergent = UA->isDivergentUse(I.getOperandUse(ValIdx));
// If the value operand is divergent, each lane is contributing a different
// value to the atomic calculation. We can only optimize divergent values if
@@ -162,7 +233,7 @@ void AMDGPUAtomicOptimizer::visitAtomicRMWInst(AtomicRMWInst &I) {
ToReplace.push_back(Info);
}
-void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
+void AMDGPUAtomicOptimizerImpl::visitIntrinsicInst(IntrinsicInst &I) {
AtomicRMWInst::BinOp Op;
switch (I.getIntrinsicID()) {
@@ -170,54 +241,72 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
return;
case Intrinsic::amdgcn_buffer_atomic_add:
case Intrinsic::amdgcn_struct_buffer_atomic_add:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_add:
case Intrinsic::amdgcn_raw_buffer_atomic_add:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_add:
Op = AtomicRMWInst::Add;
break;
case Intrinsic::amdgcn_buffer_atomic_sub:
case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_sub:
case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub:
Op = AtomicRMWInst::Sub;
break;
case Intrinsic::amdgcn_buffer_atomic_and:
case Intrinsic::amdgcn_struct_buffer_atomic_and:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_and:
case Intrinsic::amdgcn_raw_buffer_atomic_and:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_and:
Op = AtomicRMWInst::And;
break;
case Intrinsic::amdgcn_buffer_atomic_or:
case Intrinsic::amdgcn_struct_buffer_atomic_or:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_or:
case Intrinsic::amdgcn_raw_buffer_atomic_or:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_or:
Op = AtomicRMWInst::Or;
break;
case Intrinsic::amdgcn_buffer_atomic_xor:
case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_xor:
case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor:
Op = AtomicRMWInst::Xor;
break;
case Intrinsic::amdgcn_buffer_atomic_smin:
case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smin:
case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin:
Op = AtomicRMWInst::Min;
break;
case Intrinsic::amdgcn_buffer_atomic_umin:
case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umin:
case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin:
Op = AtomicRMWInst::UMin;
break;
case Intrinsic::amdgcn_buffer_atomic_smax:
case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smax:
case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax:
Op = AtomicRMWInst::Max;
break;
case Intrinsic::amdgcn_buffer_atomic_umax:
case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umax:
case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax:
Op = AtomicRMWInst::UMax;
break;
}
const unsigned ValIdx = 0;
- const bool ValDivergent = DA->isDivergentUse(&I.getOperandUse(ValIdx));
+ const bool ValDivergent = UA->isDivergentUse(I.getOperandUse(ValIdx));
// If the value operand is divergent, each lane is contributing a different
// value to the atomic calculation. We can only optimize divergent values if
@@ -231,7 +320,7 @@ void AMDGPUAtomicOptimizer::visitIntrinsicInst(IntrinsicInst &I) {
// If any of the other arguments to the intrinsic are divergent, we can't
// optimize the operation.
for (unsigned Idx = 1; Idx < I.getNumOperands(); Idx++) {
- if (DA->isDivergentUse(&I.getOperandUse(Idx))) {
+ if (UA->isDivergentUse(I.getOperandUse(Idx))) {
return;
}
}
@@ -283,9 +372,10 @@ static Value *buildNonAtomicBinOp(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
// Use the builder to create a reduction of V across the wavefront, with all
// lanes active, returning the same result in all lanes.
-Value *AMDGPUAtomicOptimizer::buildReduction(IRBuilder<> &B,
- AtomicRMWInst::BinOp Op, Value *V,
- Value *const Identity) const {
+Value *AMDGPUAtomicOptimizerImpl::buildReduction(IRBuilder<> &B,
+ AtomicRMWInst::BinOp Op,
+ Value *V,
+ Value *const Identity) const {
Type *const Ty = V->getType();
Module *M = B.GetInsertBlock()->getModule();
Function *UpdateDPP =
@@ -328,8 +418,9 @@ Value *AMDGPUAtomicOptimizer::buildReduction(IRBuilder<> &B,
// Use the builder to create an inclusive scan of V across the wavefront, with
// all lanes active.
-Value *AMDGPUAtomicOptimizer::buildScan(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
- Value *V, Value *const Identity) const {
+Value *AMDGPUAtomicOptimizerImpl::buildScan(IRBuilder<> &B,
+ AtomicRMWInst::BinOp Op, Value *V,
+ Value *const Identity) const {
Type *const Ty = V->getType();
Module *M = B.GetInsertBlock()->getModule();
Function *UpdateDPP =
@@ -385,8 +476,8 @@ Value *AMDGPUAtomicOptimizer::buildScan(IRBuilder<> &B, AtomicRMWInst::BinOp Op,
// Use the builder to create a shift right of V across the wavefront, with all
// lanes active, to turn an inclusive scan into an exclusive scan.
-Value *AMDGPUAtomicOptimizer::buildShiftRight(IRBuilder<> &B, Value *V,
- Value *const Identity) const {
+Value *AMDGPUAtomicOptimizerImpl::buildShiftRight(IRBuilder<> &B, Value *V,
+ Value *const Identity) const {
Type *const Ty = V->getType();
Module *M = B.GetInsertBlock()->getModule();
Function *UpdateDPP =
@@ -430,6 +521,75 @@ Value *AMDGPUAtomicOptimizer::buildShiftRight(IRBuilder<> &B, Value *V,
return V;
}
+// Use the builder to create an exclusive scan and compute the final reduced
+// value using an iterative approach. This provides an alternative
+// implementation to DPP which uses WMM for scan computations. This API iterate
+// over active lanes to read, compute and update the value using
+// readlane and writelane intrinsics.
+std::pair<Value *, Value *> AMDGPUAtomicOptimizerImpl::buildScanIteratively(
+ IRBuilder<> &B, AtomicRMWInst::BinOp Op, Value *const Identity, Value *V,
+ Instruction &I, BasicBlock *ComputeLoop, BasicBlock *ComputeEnd) const {
+
+ auto *Ty = I.getType();
+ auto *WaveTy = B.getIntNTy(ST->getWavefrontSize());
+ auto *EntryBB = I.getParent();
+ auto NeedResult = !I.use_empty();
+
+ auto *Ballot =
+ B.CreateIntrinsic(Intrinsic::amdgcn_ballot, WaveTy, B.getTrue());
+
+ // Start inserting instructions for ComputeLoop block
+ B.SetInsertPoint(ComputeLoop);
+ // Phi nodes for Accumulator, Scan results destination, and Active Lanes
+ auto *Accumulator = B.CreatePHI(Ty, 2, "Accumulator");
+ Accumulator->addIncoming(Identity, EntryBB);
+ PHINode *OldValuePhi = nullptr;
+ if (NeedResult) {
+ OldValuePhi = B.CreatePHI(Ty, 2, "OldValuePhi");
+ OldValuePhi->addIncoming(PoisonValue::get(Ty), EntryBB);
+ }
+ auto *ActiveBits = B.CreatePHI(WaveTy, 2, "ActiveBits");
+ ActiveBits->addIncoming(Ballot, EntryBB);
+
+ // Use llvm.cttz instrinsic to find the lowest remaining active lane.
+ auto *FF1 =
+ B.CreateIntrinsic(Intrinsic::cttz, WaveTy, {ActiveBits, B.getTrue()});
+ auto *LaneIdxInt = B.CreateTrunc(FF1, Ty);
+
+ // Get the value required for atomic operation
+ auto *LaneValue =
+ B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {}, {V, LaneIdxInt});
+
+ // Perform writelane if intermediate scan results are required later in the
+ // kernel computations
+ Value *OldValue = nullptr;
+ if (NeedResult) {
+ OldValue = B.CreateIntrinsic(Intrinsic::amdgcn_writelane, {},
+ {Accumulator, LaneIdxInt, OldValuePhi});
+ OldValuePhi->addIncoming(OldValue, ComputeLoop);
+ }
+
+ // Accumulate the results
+ auto *NewAccumulator = buildNonAtomicBinOp(B, Op, Accumulator, LaneValue);
+ Accumulator->addIncoming(NewAccumulator, ComputeLoop);
+
+ // Set bit to zero of current active lane so that for next iteration llvm.cttz
+ // return the next active lane
+ auto *Mask = B.CreateShl(ConstantInt::get(WaveTy, 1), FF1);
+
+ auto *InverseMask = B.CreateXor(Mask, ConstantInt::get(WaveTy, -1));
+ auto *NewActiveBits = B.CreateAnd(ActiveBits, InverseMask);
+ ActiveBits->addIncoming(NewActiveBits, ComputeLoop);
+
+ // Branch out of the loop when all lanes are processed.
+ auto *IsEnd = B.CreateICmpEQ(NewActiveBits, ConstantInt::get(WaveTy, 0));
+ B.CreateCondBr(IsEnd, ComputeEnd, ComputeLoop);
+
+ B.SetInsertPoint(ComputeEnd);
+
+ return {OldValue, NewAccumulator};
+}
+
static APInt getIdentityValueForAtomicOp(AtomicRMWInst::BinOp Op,
unsigned BitWidth) {
switch (Op) {
@@ -456,10 +616,10 @@ static Value *buildMul(IRBuilder<> &B, Value *LHS, Value *RHS) {
return (CI && CI->isOne()) ? RHS : B.CreateMul(LHS, RHS);
}
-void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
- AtomicRMWInst::BinOp Op,
- unsigned ValIdx,
- bool ValDivergent) const {
+void AMDGPUAtomicOptimizerImpl::optimizeAtomic(Instruction &I,
+ AtomicRMWInst::BinOp Op,
+ unsigned ValIdx,
+ bool ValDivergent) const {
// Start building just before the instruction.
IRBuilder<> B(&I);
@@ -479,7 +639,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
Value *const Cond = B.CreateIntrinsic(Intrinsic::amdgcn_ps_live, {}, {});
Instruction *const NonHelperTerminator =
- SplitBlockAndInsertIfThen(Cond, &I, false, nullptr, DT, nullptr);
+ SplitBlockAndInsertIfThen(Cond, &I, false, nullptr, &DTU, nullptr);
// Record I's new position as the exit block.
PixelExitBB = I.getParent();
@@ -528,36 +688,50 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
const bool NeedResult = !I.use_empty();
+ Function *F = I.getFunction();
+ LLVMContext &C = F->getContext();
+ BasicBlock *ComputeLoop = nullptr;
+ BasicBlock *ComputeEnd = nullptr;
// If we have a divergent value in each lane, we need to combine the value
// using DPP.
if (ValDivergent) {
- // First we need to set all inactive invocations to the identity value, so
- // that they can correctly contribute to the final result.
- NewV = B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity});
-
const AtomicRMWInst::BinOp ScanOp =
Op == AtomicRMWInst::Sub ? AtomicRMWInst::Add : Op;
- if (!NeedResult && ST->hasPermLaneX16()) {
- // On GFX10 the permlanex16 instruction helps us build a reduction without
- // too many readlanes and writelanes, which are generally bad for
- // performance.
- NewV = buildReduction(B, ScanOp, NewV, Identity);
+ if (ScanImpl == ScanOptions::DPP) {
+ // First we need to set all inactive invocations to the identity value, so
+ // that they can correctly contribute to the final result.
+ NewV =
+ B.CreateIntrinsic(Intrinsic::amdgcn_set_inactive, Ty, {V, Identity});
+ const AtomicRMWInst::BinOp ScanOp =
+ Op == AtomicRMWInst::Sub ? AtomicRMWInst::Add : Op;
+ if (!NeedResult && ST->hasPermLaneX16()) {
+ // On GFX10 the permlanex16 instruction helps us build a reduction
+ // without too many readlanes and writelanes, which are generally bad
+ // for performance.
+ NewV = buildReduction(B, ScanOp, NewV, Identity);
+ } else {
+ NewV = buildScan(B, ScanOp, NewV, Identity);
+ if (NeedResult)
+ ExclScan = buildShiftRight(B, NewV, Identity);
+ // Read the value from the last lane, which has accumulated the values
+ // of each active lane in the wavefront. This will be our new value
+ // which we will provide to the atomic operation.
+ Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
+ assert(TyBitWidth == 32);
+ NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
+ {NewV, LastLaneIdx});
+ }
+ // Finally mark the readlanes in the WWM section.
+ NewV = B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, NewV);
+ } else if (ScanImpl == ScanOptions::Iterative) {
+ // Alternative implementation for scan
+ ComputeLoop = BasicBlock::Create(C, "ComputeLoop", F);
+ ComputeEnd = BasicBlock::Create(C, "ComputeEnd", F);
+ std::tie(ExclScan, NewV) = buildScanIteratively(B, ScanOp, Identity, V, I,
+ ComputeLoop, ComputeEnd);
} else {
- NewV = buildScan(B, ScanOp, NewV, Identity);
- if (NeedResult)
- ExclScan = buildShiftRight(B, NewV, Identity);
-
- // Read the value from the last lane, which has accumulated the values of
- // each active lane in the wavefront. This will be our new value which we
- // will provide to the atomic operation.
- Value *const LastLaneIdx = B.getInt32(ST->getWavefrontSize() - 1);
- assert(TyBitWidth == 32);
- NewV = B.CreateIntrinsic(Intrinsic::amdgcn_readlane, {},
- {NewV, LastLaneIdx});
+ llvm_unreachable("Atomic Optimzer is disabled for None strategy");
}
-
- // Finally mark the readlanes in the WWM section.
- NewV = B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, NewV);
} else {
switch (Op) {
default:
@@ -608,8 +782,39 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
// entry --> single_lane -\
// \------------------> exit
Instruction *const SingleLaneTerminator =
- SplitBlockAndInsertIfThen(Cond, &I, false, nullptr, DT, nullptr);
-
+ SplitBlockAndInsertIfThen(Cond, &I, false, nullptr, &DTU, nullptr);
+
+ // At this point, we have split the I's block to allow one lane in wavefront
+ // to update the precomputed reduced value. Also, completed the codegen for
+ // new control flow i.e. iterative loop which perform reduction and scan using
+ // ComputeLoop and ComputeEnd.
+ // For the new control flow, we need to move branch instruction i.e.
+ // terminator created during SplitBlockAndInsertIfThen from I's block to
+ // ComputeEnd block. We also need to set up predecessor to next block when
+ // single lane done updating the final reduced value.
+ BasicBlock *Predecessor = nullptr;
+ if (ValDivergent && ScanImpl == ScanOptions::Iterative) {
+ // Move terminator from I's block to ComputeEnd block.
+ Instruction *Terminator = EntryBB->getTerminator();
+ B.SetInsertPoint(ComputeEnd);
+ Terminator->removeFromParent();
+ B.Insert(Terminator);
+
+ // Branch to ComputeLoop Block unconditionally from the I's block for
+ // iterative approach.
+ B.SetInsertPoint(EntryBB);
+ B.CreateBr(ComputeLoop);
+
+ // Update the dominator tree for new control flow.
+ DTU.applyUpdates(
+ {{DominatorTree::Insert, EntryBB, ComputeLoop},
+ {DominatorTree::Insert, ComputeLoop, ComputeEnd},
+ {DominatorTree::Delete, EntryBB, SingleLaneTerminator->getParent()}});
+
+ Predecessor = ComputeEnd;
+ } else {
+ Predecessor = EntryBB;
+ }
// Move the IR builder into single_lane next.
B.SetInsertPoint(SingleLaneTerminator);
@@ -626,7 +831,7 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
if (NeedResult) {
// Create a PHI node to get our new atomic result into the exit block.
PHINode *const PHI = B.CreatePHI(Ty, 2);
- PHI->addIncoming(PoisonValue::get(Ty), EntryBB);
+ PHI->addIncoming(PoisonValue::get(Ty), Predecessor);
PHI->addIncoming(NewI, SingleLaneTerminator->getParent());
// We need to broadcast the value who was the lowest active lane (the first
@@ -660,8 +865,14 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
// from the first lane, to get our lane's index into the atomic result.
Value *LaneOffset = nullptr;
if (ValDivergent) {
- LaneOffset =
- B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, ExclScan);
+ if (ScanImpl == ScanOptions::DPP) {
+ LaneOffset =
+ B.CreateIntrinsic(Intrinsic::amdgcn_strict_wwm, Ty, ExclScan);
+ } else if (ScanImpl == ScanOptions::Iterative) {
+ LaneOffset = ExclScan;
+ } else {
+ llvm_unreachable("Atomic Optimzer is disabled for None strategy");
+ }
} else {
switch (Op) {
default:
@@ -705,11 +916,11 @@ void AMDGPUAtomicOptimizer::optimizeAtomic(Instruction &I,
INITIALIZE_PASS_BEGIN(AMDGPUAtomicOptimizer, DEBUG_TYPE,
"AMDGPU atomic optimizations", false, false)
-INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(AMDGPUAtomicOptimizer, DEBUG_TYPE,
"AMDGPU atomic optimizations", false, false)
-FunctionPass *llvm::createAMDGPUAtomicOptimizerPass() {
- return new AMDGPUAtomicOptimizer();
+FunctionPass *llvm::createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy) {
+ return new AMDGPUAtomicOptimizer(ScanStrategy);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index f7298b59f0b9..57c873f00a4a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -56,8 +56,8 @@ static constexpr std::pair<ImplicitArgumentMask,
// size is 1 for y/z.
static ImplicitArgumentMask
intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
- bool HasApertureRegs, bool SupportsGetDoorBellID) {
- unsigned CodeObjectVersion = AMDGPU::getAmdhsaCodeObjectVersion();
+ bool HasApertureRegs, bool SupportsGetDoorBellID,
+ unsigned CodeObjectVersion) {
switch (ID) {
case Intrinsic::amdgcn_workitem_id_x:
NonKernelOnly = true;
@@ -88,7 +88,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
// Need queue_ptr anyway. But under V5, we also need implicitarg_ptr to access
// queue_ptr.
case Intrinsic::amdgcn_queue_ptr:
- NeedsImplicit = (CodeObjectVersion == 5);
+ NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
return QUEUE_PTR;
case Intrinsic::amdgcn_is_shared:
case Intrinsic::amdgcn_is_private:
@@ -97,11 +97,13 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
// Under V5, we need implicitarg_ptr + offsets to access private_base or
// shared_base. For pre-V5, however, need to access them through queue_ptr +
// offsets.
- return CodeObjectVersion == 5 ? IMPLICIT_ARG_PTR : QUEUE_PTR;
+ return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR :
+ QUEUE_PTR;
case Intrinsic::trap:
if (SupportsGetDoorBellID) // GetDoorbellID support implemented since V4.
- return CodeObjectVersion >= 4 ? NOT_IMPLICIT_INPUT : QUEUE_PTR;
- NeedsImplicit = (CodeObjectVersion == 5); // Need impicitarg_ptr under V5.
+ return CodeObjectVersion >= AMDGPU::AMDHSA_COV4 ? NOT_IMPLICIT_INPUT :
+ QUEUE_PTR;
+ NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
return QUEUE_PTR;
default:
return NOT_IMPLICIT_INPUT;
@@ -137,7 +139,9 @@ public:
AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
BumpPtrAllocator &Allocator,
SetVector<Function *> *CGSCC, TargetMachine &TM)
- : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
+ : InformationCache(M, AG, Allocator, CGSCC), TM(TM),
+ CodeObjectVersion(AMDGPU::getCodeObjectVersion(M)) {}
+
TargetMachine &TM;
enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
@@ -165,6 +169,34 @@ public:
return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
}
+ /// Get code object version.
+ unsigned getCodeObjectVersion() const {
+ return CodeObjectVersion;
+ }
+
+ /// Get the effective value of "amdgpu-waves-per-eu" for the function,
+ /// accounting for the interaction with the passed value to use for
+ /// "amdgpu-flat-work-group-size".
+ std::pair<unsigned, unsigned>
+ getWavesPerEU(const Function &F,
+ std::pair<unsigned, unsigned> FlatWorkGroupSize) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ return ST.getWavesPerEU(F, FlatWorkGroupSize);
+ }
+
+ std::pair<unsigned, unsigned>
+ getEffectiveWavesPerEU(const Function &F,
+ std::pair<unsigned, unsigned> WavesPerEU,
+ std::pair<unsigned, unsigned> FlatWorkGroupSize) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize);
+ }
+
+ unsigned getMaxWavesPerEU(const Function &F) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ return ST.getMaxWavesPerEU();
+ }
+
private:
/// Check if the ConstantExpr \p CE requires the queue pointer.
static bool visitConstExpr(const ConstantExpr *CE) {
@@ -176,7 +208,8 @@ private:
}
/// Get the constant access bitmap for \p C.
- uint8_t getConstantAccess(const Constant *C) {
+ uint8_t getConstantAccess(const Constant *C,
+ SmallPtrSetImpl<const Constant *> &Visited) {
auto It = ConstantStatus.find(C);
if (It != ConstantStatus.end())
return It->second;
@@ -191,10 +224,10 @@ private:
for (const Use &U : C->operands()) {
const auto *OpC = dyn_cast<Constant>(U);
- if (!OpC)
+ if (!OpC || !Visited.insert(OpC).second)
continue;
- Result |= getConstantAccess(OpC);
+ Result |= getConstantAccess(OpC, Visited);
}
return Result;
}
@@ -209,7 +242,8 @@ public:
if (!IsNonEntryFunc && HasAperture)
return false;
- uint8_t Access = getConstantAccess(C);
+ SmallPtrSet<const Constant *, 8> Visited;
+ uint8_t Access = getConstantAccess(C, Visited);
// We need to trap on DS globals in non-entry functions.
if (IsNonEntryFunc && (Access & DS_GLOBAL))
@@ -221,6 +255,7 @@ public:
private:
/// Used to determine if the Constant needs the queue pointer.
DenseMap<const Constant *, uint8_t> ConstantStatus;
+ const unsigned CodeObjectVersion;
};
struct AAAMDAttributes
@@ -311,11 +346,13 @@ struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
LLVM_DEBUG(dbgs() << "[AAUniformWorkGroupSize] Call " << Caller->getName()
<< "->" << getAssociatedFunction()->getName() << "\n");
- const auto &CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
+ const auto *CallerInfo = A.getAAFor<AAUniformWorkGroupSize>(
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
+ if (!CallerInfo)
+ return false;
Change = Change | clampStateAndIndicateChange(this->getState(),
- CallerInfo.getState());
+ CallerInfo->getState());
return true;
};
@@ -333,8 +370,8 @@ struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
getAssumed() ? "true" : "false"));
- return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
- /* ForceReplace */ true);
+ return A.manifestAttrs(getIRPosition(), AttrList,
+ /* ForceReplace */ true);
}
bool isValidState() const override {
@@ -342,7 +379,7 @@ struct AAUniformWorkGroupSizeFunction : public AAUniformWorkGroupSize {
return true;
}
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *) const override {
return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
}
@@ -400,9 +437,9 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
auto OrigAssumed = getAssumed();
// Check for Intrinsics and propagate attributes.
- const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
+ const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
*this, this->getIRPosition(), DepClassTy::REQUIRED);
- if (AAEdges.hasNonAsmUnknownCallee())
+ if (!AAEdges || AAEdges->hasNonAsmUnknownCallee())
return indicatePessimisticFixpoint();
bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
@@ -411,20 +448,23 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
bool SupportsGetDoorbellID = InfoCache.supportsGetDoorbellID(*F);
+ unsigned COV = InfoCache.getCodeObjectVersion();
- for (Function *Callee : AAEdges.getOptimisticEdges()) {
+ for (Function *Callee : AAEdges->getOptimisticEdges()) {
Intrinsic::ID IID = Callee->getIntrinsicID();
if (IID == Intrinsic::not_intrinsic) {
- const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
- *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
- *this &= AAAMD;
+ const AAAMDAttributes *AAAMD = A.getAAFor<AAAMDAttributes>(
+ *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
+ if (!AAAMD)
+ return indicatePessimisticFixpoint();
+ *this &= *AAAMD;
continue;
}
bool NonKernelOnly = false;
ImplicitArgumentMask AttrMask =
intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
- HasApertureRegs, SupportsGetDoorbellID);
+ HasApertureRegs, SupportsGetDoorbellID, COV);
if (AttrMask != NOT_IMPLICIT_INPUT) {
if ((IsNonEntryFunc || !NonKernelOnly))
removeAssumedBits(AttrMask);
@@ -438,29 +478,29 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
if (isAssumed(QUEUE_PTR) && checkForQueuePtr(A)) {
// Under V5, we need implicitarg_ptr + offsets to access private_base or
// shared_base. We do not actually need queue_ptr.
- if (AMDGPU::getAmdhsaCodeObjectVersion() == 5)
+ if (COV >= 5)
removeAssumedBits(IMPLICIT_ARG_PTR);
else
removeAssumedBits(QUEUE_PTR);
}
- if (funcRetrievesMultigridSyncArg(A)) {
+ if (funcRetrievesMultigridSyncArg(A, COV)) {
assert(!isAssumed(IMPLICIT_ARG_PTR) &&
"multigrid_sync_arg needs implicitarg_ptr");
removeAssumedBits(MULTIGRID_SYNC_ARG);
}
- if (funcRetrievesHostcallPtr(A)) {
+ if (funcRetrievesHostcallPtr(A, COV)) {
assert(!isAssumed(IMPLICIT_ARG_PTR) && "hostcall needs implicitarg_ptr");
removeAssumedBits(HOSTCALL_PTR);
}
- if (funcRetrievesHeapPtr(A)) {
+ if (funcRetrievesHeapPtr(A, COV)) {
assert(!isAssumed(IMPLICIT_ARG_PTR) && "heap_ptr needs implicitarg_ptr");
removeAssumedBits(HEAP_PTR);
}
- if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A)) {
+ if (isAssumed(QUEUE_PTR) && funcRetrievesQueuePtr(A, COV)) {
assert(!isAssumed(IMPLICIT_ARG_PTR) && "queue_ptr needs implicitarg_ptr");
removeAssumedBits(QUEUE_PTR);
}
@@ -469,10 +509,10 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
removeAssumedBits(LDS_KERNEL_ID);
}
- if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A))
+ if (isAssumed(DEFAULT_QUEUE) && funcRetrievesDefaultQueue(A, COV))
removeAssumedBits(DEFAULT_QUEUE);
- if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A))
+ if (isAssumed(COMPLETION_ACTION) && funcRetrievesCompletionAction(A, COV))
removeAssumedBits(COMPLETION_ACTION);
return getAssumed() != OrigAssumed ? ChangeStatus::CHANGED
@@ -488,16 +528,17 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
AttrList.push_back(Attribute::get(Ctx, Attr.second));
}
- return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
- /* ForceReplace */ true);
+ return A.manifestAttrs(getIRPosition(), AttrList,
+ /* ForceReplace */ true);
}
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *) const override {
std::string Str;
raw_string_ostream OS(Str);
OS << "AMDInfo[";
for (auto Attr : ImplicitAttrs)
- OS << ' ' << Attr.second;
+ if (isAssumed(Attr.first))
+ OS << ' ' << Attr.second;
OS << " ]";
return OS.str();
}
@@ -557,39 +598,39 @@ private:
return false;
}
- bool funcRetrievesMultigridSyncArg(Attributor &A) {
- auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition();
+ bool funcRetrievesMultigridSyncArg(Attributor &A, unsigned COV) {
+ auto Pos = llvm::AMDGPU::getMultigridSyncArgImplicitArgPosition(COV);
AA::RangeTy Range(Pos, 8);
return funcRetrievesImplicitKernelArg(A, Range);
}
- bool funcRetrievesHostcallPtr(Attributor &A) {
- auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition();
+ bool funcRetrievesHostcallPtr(Attributor &A, unsigned COV) {
+ auto Pos = llvm::AMDGPU::getHostcallImplicitArgPosition(COV);
AA::RangeTy Range(Pos, 8);
return funcRetrievesImplicitKernelArg(A, Range);
}
- bool funcRetrievesDefaultQueue(Attributor &A) {
- auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition();
+ bool funcRetrievesDefaultQueue(Attributor &A, unsigned COV) {
+ auto Pos = llvm::AMDGPU::getDefaultQueueImplicitArgPosition(COV);
AA::RangeTy Range(Pos, 8);
return funcRetrievesImplicitKernelArg(A, Range);
}
- bool funcRetrievesCompletionAction(Attributor &A) {
- auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition();
+ bool funcRetrievesCompletionAction(Attributor &A, unsigned COV) {
+ auto Pos = llvm::AMDGPU::getCompletionActionImplicitArgPosition(COV);
AA::RangeTy Range(Pos, 8);
return funcRetrievesImplicitKernelArg(A, Range);
}
- bool funcRetrievesHeapPtr(Attributor &A) {
- if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
+ bool funcRetrievesHeapPtr(Attributor &A, unsigned COV) {
+ if (COV < 5)
return false;
AA::RangeTy Range(AMDGPU::ImplicitArg::HEAP_PTR_OFFSET, 8);
return funcRetrievesImplicitKernelArg(A, Range);
}
- bool funcRetrievesQueuePtr(Attributor &A) {
- if (AMDGPU::getAmdhsaCodeObjectVersion() != 5)
+ bool funcRetrievesQueuePtr(Attributor &A, unsigned COV) {
+ if (COV < 5)
return false;
AA::RangeTy Range(AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET, 8);
return funcRetrievesImplicitKernelArg(A, Range);
@@ -607,10 +648,12 @@ private:
if (Call.getIntrinsicID() != Intrinsic::amdgcn_implicitarg_ptr)
return true;
- const auto &PointerInfoAA = A.getAAFor<AAPointerInfo>(
+ const auto *PointerInfoAA = A.getAAFor<AAPointerInfo>(
*this, IRPosition::callsite_returned(Call), DepClassTy::REQUIRED);
+ if (!PointerInfoAA)
+ return false;
- return PointerInfoAA.forallInterferingAccesses(
+ return PointerInfoAA->forallInterferingAccesses(
Range, [](const AAPointerInfo::Access &Acc, bool IsExact) {
return Acc.getRemoteInst()->isDroppable();
});
@@ -639,42 +682,36 @@ AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
llvm_unreachable("AAAMDAttributes is only valid for function position");
}
-/// Propagate amdgpu-flat-work-group-size attribute.
-struct AAAMDFlatWorkGroupSize
+/// Base class to derive different size ranges.
+struct AAAMDSizeRangeAttribute
: public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
- AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
- : Base(IRP, 32) {}
- /// See AbstractAttribute::getState(...).
- IntegerRangeState &getState() override { return *this; }
- const IntegerRangeState &getState() const override { return *this; }
+ StringRef AttrName;
- void initialize(Attributor &A) override {
- Function *F = getAssociatedFunction();
- auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
- unsigned MinGroupSize, MaxGroupSize;
- std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
- intersectKnown(
- ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
+ AAAMDSizeRangeAttribute(const IRPosition &IRP, Attributor &A,
+ StringRef AttrName)
+ : Base(IRP, 32), AttrName(AttrName) {}
- if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
- indicatePessimisticFixpoint();
- }
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {}
- ChangeStatus updateImpl(Attributor &A) override {
+ template <class AttributeImpl>
+ ChangeStatus updateImplImpl(Attributor &A) {
ChangeStatus Change = ChangeStatus::UNCHANGED;
auto CheckCallSite = [&](AbstractCallSite CS) {
Function *Caller = CS.getInstruction()->getFunction();
- LLVM_DEBUG(dbgs() << "[AAAMDFlatWorkGroupSize] Call " << Caller->getName()
+ LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
<< "->" << getAssociatedFunction()->getName() << '\n');
- const auto &CallerInfo = A.getAAFor<AAAMDFlatWorkGroupSize>(
+ const auto *CallerInfo = A.getAAFor<AttributeImpl>(
*this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
+ if (!CallerInfo)
+ return false;
Change |=
- clampStateAndIndicateChange(this->getState(), CallerInfo.getState());
+ clampStateAndIndicateChange(this->getState(), CallerInfo->getState());
return true;
};
@@ -686,45 +723,65 @@ struct AAAMDFlatWorkGroupSize
return Change;
}
- ChangeStatus manifest(Attributor &A) override {
- SmallVector<Attribute, 8> AttrList;
- Function *F = getAssociatedFunction();
- LLVMContext &Ctx = F->getContext();
-
- auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
- unsigned Min, Max;
- std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
-
+ ChangeStatus emitAttributeIfNotDefault(Attributor &A, unsigned Min,
+ unsigned Max) {
// Don't add the attribute if it's the implied default.
if (getAssumed().getLower() == Min && getAssumed().getUpper() - 1 == Max)
return ChangeStatus::UNCHANGED;
+ Function *F = getAssociatedFunction();
+ LLVMContext &Ctx = F->getContext();
SmallString<10> Buffer;
raw_svector_ostream OS(Buffer);
OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
-
- AttrList.push_back(
- Attribute::get(Ctx, "amdgpu-flat-work-group-size", OS.str()));
- return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
- /* ForceReplace */ true);
+ return A.manifestAttrs(getIRPosition(),
+ {Attribute::get(Ctx, AttrName, OS.str())},
+ /* ForceReplace */ true);
}
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *) const override {
std::string Str;
raw_string_ostream OS(Str);
- OS << "AMDFlatWorkGroupSize[";
+ OS << getName() << '[';
OS << getAssumed().getLower() << ',' << getAssumed().getUpper() - 1;
OS << ']';
return OS.str();
}
+};
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {}
+/// Propagate amdgpu-flat-work-group-size attribute.
+struct AAAMDFlatWorkGroupSize : public AAAMDSizeRangeAttribute {
+ AAAMDFlatWorkGroupSize(const IRPosition &IRP, Attributor &A)
+ : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-flat-work-group-size") {}
+
+ void initialize(Attributor &A) override {
+ Function *F = getAssociatedFunction();
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+ unsigned MinGroupSize, MaxGroupSize;
+ std::tie(MinGroupSize, MaxGroupSize) = InfoCache.getFlatWorkGroupSizes(*F);
+ intersectKnown(
+ ConstantRange(APInt(32, MinGroupSize), APInt(32, MaxGroupSize + 1)));
+
+ if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
+ indicatePessimisticFixpoint();
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ return updateImplImpl<AAAMDFlatWorkGroupSize>(A);
+ }
/// Create an abstract attribute view for the position \p IRP.
static AAAMDFlatWorkGroupSize &createForPosition(const IRPosition &IRP,
Attributor &A);
+ ChangeStatus manifest(Attributor &A) override {
+ Function *F = getAssociatedFunction();
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+ unsigned Min, Max;
+ std::tie(Min, Max) = InfoCache.getMaximumFlatWorkGroupRange(*F);
+ return emitAttributeIfNotDefault(A, Min, Max);
+ }
+
/// See AbstractAttribute::getName()
const std::string getName() const override {
return "AAAMDFlatWorkGroupSize";
@@ -754,6 +811,109 @@ AAAMDFlatWorkGroupSize::createForPosition(const IRPosition &IRP,
"AAAMDFlatWorkGroupSize is only valid for function position");
}
+/// Propagate amdgpu-waves-per-eu attribute.
+struct AAAMDWavesPerEU : public AAAMDSizeRangeAttribute {
+ AAAMDWavesPerEU(const IRPosition &IRP, Attributor &A)
+ : AAAMDSizeRangeAttribute(IRP, A, "amdgpu-waves-per-eu") {}
+
+ bool isValidState() const override {
+ return !Assumed.isEmptySet() && IntegerRangeState::isValidState();
+ }
+
+ void initialize(Attributor &A) override {
+ Function *F = getAssociatedFunction();
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+
+ if (const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
+ *this, IRPosition::function(*F), DepClassTy::REQUIRED)) {
+
+ unsigned Min, Max;
+ std::tie(Min, Max) = InfoCache.getWavesPerEU(
+ *F, {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
+ AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
+
+ ConstantRange Range(APInt(32, Min), APInt(32, Max + 1));
+ intersectKnown(Range);
+ }
+
+ if (AMDGPU::isEntryFunctionCC(F->getCallingConv()))
+ indicatePessimisticFixpoint();
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+ ChangeStatus Change = ChangeStatus::UNCHANGED;
+
+ auto CheckCallSite = [&](AbstractCallSite CS) {
+ Function *Caller = CS.getInstruction()->getFunction();
+ Function *Func = getAssociatedFunction();
+ LLVM_DEBUG(dbgs() << '[' << getName() << "] Call " << Caller->getName()
+ << "->" << Func->getName() << '\n');
+
+ const auto *CallerInfo = A.getAAFor<AAAMDWavesPerEU>(
+ *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
+ const auto *AssumedGroupSize = A.getAAFor<AAAMDFlatWorkGroupSize>(
+ *this, IRPosition::function(*Func), DepClassTy::REQUIRED);
+ if (!CallerInfo || !AssumedGroupSize)
+ return false;
+
+ unsigned Min, Max;
+ std::tie(Min, Max) = InfoCache.getEffectiveWavesPerEU(
+ *Caller,
+ {CallerInfo->getAssumed().getLower().getZExtValue(),
+ CallerInfo->getAssumed().getUpper().getZExtValue() - 1},
+ {AssumedGroupSize->getAssumed().getLower().getZExtValue(),
+ AssumedGroupSize->getAssumed().getUpper().getZExtValue() - 1});
+ ConstantRange CallerRange(APInt(32, Min), APInt(32, Max + 1));
+ IntegerRangeState CallerRangeState(CallerRange);
+ Change |= clampStateAndIndicateChange(this->getState(), CallerRangeState);
+
+ return true;
+ };
+
+ bool AllCallSitesKnown = true;
+ if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
+ return indicatePessimisticFixpoint();
+
+ return Change;
+ }
+
+ /// Create an abstract attribute view for the position \p IRP.
+ static AAAMDWavesPerEU &createForPosition(const IRPosition &IRP,
+ Attributor &A);
+
+ ChangeStatus manifest(Attributor &A) override {
+ Function *F = getAssociatedFunction();
+ auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
+ unsigned Max = InfoCache.getMaxWavesPerEU(*F);
+ return emitAttributeIfNotDefault(A, 1, Max);
+ }
+
+ /// See AbstractAttribute::getName()
+ const std::string getName() const override { return "AAAMDWavesPerEU"; }
+
+ /// See AbstractAttribute::getIdAddr()
+ const char *getIdAddr() const override { return &ID; }
+
+ /// This function should return true if the type of the \p AA is
+ /// AAAMDWavesPerEU
+ static bool classof(const AbstractAttribute *AA) {
+ return (AA->getIdAddr() == &ID);
+ }
+
+ /// Unique ID (due to the unique address)
+ static const char ID;
+};
+
+const char AAAMDWavesPerEU::ID = 0;
+
+AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
+ Attributor &A) {
+ if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
+ return *new (A.Allocator) AAAMDWavesPerEU(IRP, A);
+ llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
+}
+
class AMDGPUAttributor : public ModulePass {
public:
AMDGPUAttributor() : ModulePass(ID) {}
@@ -782,13 +942,17 @@ public:
AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
DenseSet<const char *> Allowed(
{&AAAMDAttributes::ID, &AAUniformWorkGroupSize::ID,
- &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID, &AACallEdges::ID,
- &AAPointerInfo::ID, &AAPotentialConstantValues::ID});
+ &AAPotentialValues::ID, &AAAMDFlatWorkGroupSize::ID,
+ &AAAMDWavesPerEU::ID, &AACallEdges::ID, &AAPointerInfo::ID,
+ &AAPotentialConstantValues::ID, &AAUnderlyingObjects::ID});
AttributorConfig AC(CGUpdater);
AC.Allowed = &Allowed;
AC.IsModulePass = true;
AC.DefaultInitializeLiveInternals = false;
+ AC.IPOAmendableCB = [](const Function &F) {
+ return F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
+ };
Attributor A(Functions, InfoCache, AC);
@@ -798,6 +962,7 @@ public:
A.getOrCreateAAFor<AAUniformWorkGroupSize>(IRPosition::function(F));
if (!AMDGPU::isEntryFunctionCC(F.getCallingConv())) {
A.getOrCreateAAFor<AAAMDFlatWorkGroupSize>(IRPosition::function(F));
+ A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(F));
}
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index da819b6d4a23..9ba5ea8fb73f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -466,7 +466,9 @@ static void allocateHSAUserSGPRs(CCState &CCInfo,
CCInfo.AllocateReg(DispatchPtrReg);
}
- if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
+ const Module *M = MF.getFunction().getParent();
+ if (Info.hasQueuePtr() &&
+ AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
@@ -510,8 +512,6 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
const DataLayout &DL = F.getParent()->getDataLayout();
- Info->allocateKnownAddressLDSGlobal(F);
-
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
@@ -519,7 +519,7 @@ bool AMDGPUCallLowering::lowerFormalArgumentsKernel(
unsigned i = 0;
const Align KernArgBaseAlign(16);
- const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset(F);
+ const unsigned BaseOffset = Subtarget->getExplicitKernelArgOffset();
uint64_t ExplicitArgOffset = 0;
// TODO: Align down to dword alignment and extract bits for extending loads.
@@ -594,8 +594,6 @@ bool AMDGPUCallLowering::lowerFormalArguments(
const SIRegisterInfo *TRI = Subtarget.getRegisterInfo();
const DataLayout &DL = F.getParent()->getDataLayout();
- Info->allocateKnownAddressLDSGlobal(F);
-
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, F.isVarArg(), MF, ArgLocs, F.getContext());
@@ -701,7 +699,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
if ((PsInputBits & 0x7F) == 0 ||
((PsInputBits & 0xF) == 0 &&
(PsInputBits >> 11 & 1)))
- Info->markPSInputEnabled(countTrailingZeros(Info->getPSInputAddr()));
+ Info->markPSInputEnabled(llvm::countr_zero(Info->getPSInputAddr()));
}
}
@@ -724,7 +722,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
if (!handleAssignments(Handler, SplitArgs, CCInfo, ArgLocs, B))
return false;
- uint64_t StackOffset = Assigner.StackOffset;
+ uint64_t StackSize = Assigner.StackSize;
// Start adding system SGPRs.
if (IsEntryFunc) {
@@ -739,7 +737,7 @@ bool AMDGPUCallLowering::lowerFormalArguments(
// the caller's stack. So, whenever we lower formal arguments, we should keep
// track of this information, since we might lower a tail call in this
// function later.
- Info->setBytesInStackArgArea(StackOffset);
+ Info->setBytesInStackArgArea(StackSize);
// Move back to the end of the basic block.
B.setMBB(MBB);
@@ -956,10 +954,14 @@ getAssignFnsForCC(CallingConv::ID CC, const SITargetLowering &TLI) {
}
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect,
- bool IsTailCall) {
+ bool IsTailCall, CallingConv::ID CC) {
assert(!(IsIndirect && IsTailCall) && "Indirect calls can't be tail calls, "
"because the address can be divergent");
- return IsTailCall ? AMDGPU::SI_TCRETURN : AMDGPU::G_SI_CALL;
+ if (!IsTailCall)
+ return AMDGPU::G_SI_CALL;
+
+ return CC == CallingConv::AMDGPU_Gfx ? AMDGPU::SI_TCRETURN_GFX :
+ AMDGPU::SI_TCRETURN;
}
// Add operands to call instruction to track the callee.
@@ -1053,7 +1055,7 @@ bool AMDGPUCallLowering::areCalleeOutgoingArgsTailCallable(
// Make sure that they can fit on the caller's stack.
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- if (OutInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea()) {
+ if (OutInfo.getStackSize() > FuncInfo->getBytesInStackArgArea()) {
LLVM_DEBUG(dbgs() << "... Cannot fit call operands on caller's stack.\n");
return false;
}
@@ -1184,7 +1186,7 @@ bool AMDGPUCallLowering::lowerTailCall(
if (!IsSibCall)
CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);
- unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true);
+ unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), true, CalleeCC);
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
if (!addCallTargetOperands(MIB, MIRBuilder, Info))
return false;
@@ -1224,7 +1226,7 @@ bool AMDGPUCallLowering::lowerTailCall(
// The callee will pop the argument stack as a tail call. Thus, we must
// keep it 16-byte aligned.
- NumBytes = alignTo(OutInfo.getNextStackOffset(), ST.getStackAlignment());
+ NumBytes = alignTo(OutInfo.getStackSize(), ST.getStackAlignment());
// FPDiff will be negative if this tail call requires more space than we
// would automatically have in our incoming argument space. Positive if we
@@ -1348,7 +1350,7 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
- unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false);
+ unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false, Info.CallConv);
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
MIB.addDef(TRI->getReturnAddressReg(MF));
@@ -1390,7 +1392,7 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
handleImplicitCallArguments(MIRBuilder, MIB, ST, *MFI, ImplicitArgRegs);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getStackSize();
// If Callee is a reg, since it is used by a target specific
// instruction, it must have a register class matching the
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index 08b29641d14a..4ec85f3c5588 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -14,23 +14,28 @@
#include "AMDGPU.h"
#include "AMDGPUTargetMachine.h"
+#include "SIModeRegisterDefaults.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
-#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/Utils/IntegerDivision.h"
+#include "llvm/Transforms/Utils/Local.h"
#define DEBUG_TYPE "amdgpu-codegenprepare"
using namespace llvm;
+using namespace llvm::PatternMatch;
namespace {
@@ -46,6 +51,22 @@ static cl::opt<bool> Widen16BitOps(
cl::ReallyHidden,
cl::init(true));
+static cl::opt<bool>
+ ScalarizeLargePHIs("amdgpu-codegenprepare-break-large-phis",
+ cl::desc("Break large PHI nodes for DAGISel"),
+ cl::ReallyHidden, cl::init(true));
+
+static cl::opt<bool>
+ ForceScalarizeLargePHIs("amdgpu-codegenprepare-force-break-large-phis",
+ cl::desc("For testing purposes, always break large "
+ "PHIs even if it isn't profitable."),
+ cl::ReallyHidden, cl::init(false));
+
+static cl::opt<unsigned> ScalarizeLargePHIsThreshold(
+ "amdgpu-codegenprepare-break-large-phis-threshold",
+ cl::desc("Minimum type size in bits for breaking large PHI nodes"),
+ cl::ReallyHidden, cl::init(32));
+
static cl::opt<bool> UseMul24Intrin(
"amdgpu-codegenprepare-mul24",
cl::desc("Introduce mul24 intrinsics in AMDGPUCodeGenPrepare"),
@@ -67,16 +88,30 @@ static cl::opt<bool> DisableIDivExpand(
cl::ReallyHidden,
cl::init(false));
-class AMDGPUCodeGenPrepare : public FunctionPass,
- public InstVisitor<AMDGPUCodeGenPrepare, bool> {
+// Disable processing of fdiv so we can better test the backend implementations.
+static cl::opt<bool> DisableFDivExpand(
+ "amdgpu-codegenprepare-disable-fdiv-expansion",
+ cl::desc("Prevent expanding floating point division in AMDGPUCodeGenPrepare"),
+ cl::ReallyHidden,
+ cl::init(false));
+
+class AMDGPUCodeGenPrepareImpl
+ : public InstVisitor<AMDGPUCodeGenPrepareImpl, bool> {
+public:
const GCNSubtarget *ST = nullptr;
+ const TargetLibraryInfo *TLInfo = nullptr;
AssumptionCache *AC = nullptr;
DominatorTree *DT = nullptr;
- LegacyDivergenceAnalysis *DA = nullptr;
+ UniformityInfo *UA = nullptr;
Module *Mod = nullptr;
const DataLayout *DL = nullptr;
bool HasUnsafeFPMath = false;
- bool HasFP32Denormals = false;
+ bool HasFP32DenormalFlush = false;
+ bool FlowChanged = false;
+
+ DenseMap<const PHINode *, bool> BreakPhiNodesCache;
+
+ bool canBreakPHINode(const PHINode &I);
/// Copies exact/nsw/nuw flags (if any) from binary operation \p I to
/// binary operation \p V.
@@ -102,6 +137,21 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
/// false otherwise.
bool needsPromotionToI32(const Type *T) const;
+ /// Return true if \p T is a legal scalar floating point type.
+ bool isLegalFloatingTy(const Type *T) const;
+
+ /// Wrapper to pass all the arguments to computeKnownFPClass
+ KnownFPClass computeKnownFPClass(const Value *V, FPClassTest Interested,
+ const Instruction *CtxI) const {
+ return llvm::computeKnownFPClass(V, *DL, Interested, 0, TLInfo, AC, CtxI,
+ DT);
+ }
+
+ bool canIgnoreDenormalInput(const Value *V, const Instruction *CtxI) const {
+ return HasFP32DenormalFlush ||
+ computeKnownFPClass(V, fcSubnormal, CtxI).isKnownNeverSubnormal();
+ }
+
/// Promotes uniform binary operation \p I to equivalent 32 bit binary
/// operation.
///
@@ -199,41 +249,104 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
bool canWidenScalarExtLoad(LoadInst &I) const;
-public:
- static char ID;
+ Value *matchFractPat(IntrinsicInst &I);
+ Value *applyFractPat(IRBuilder<> &Builder, Value *FractArg);
- AMDGPUCodeGenPrepare() : FunctionPass(ID) {}
+ bool canOptimizeWithRsq(const FPMathOperator *SqrtOp, FastMathFlags DivFMF,
+ FastMathFlags SqrtFMF) const;
+ Value *optimizeWithRsq(IRBuilder<> &Builder, Value *Num, Value *Den,
+ FastMathFlags DivFMF, FastMathFlags SqrtFMF,
+ const Instruction *CtxI) const;
+
+ Value *optimizeWithRcp(IRBuilder<> &Builder, Value *Num, Value *Den,
+ FastMathFlags FMF, const Instruction *CtxI) const;
+ Value *optimizeWithFDivFast(IRBuilder<> &Builder, Value *Num, Value *Den,
+ float ReqdAccuracy) const;
+
+ Value *visitFDivElement(IRBuilder<> &Builder, Value *Num, Value *Den,
+ FastMathFlags DivFMF, FastMathFlags SqrtFMF,
+ Value *RsqOp, const Instruction *FDiv,
+ float ReqdAccuracy) const;
+
+ std::pair<Value *, Value *> getFrexpResults(IRBuilder<> &Builder,
+ Value *Src) const;
+
+ Value *emitRcpIEEE1ULP(IRBuilder<> &Builder, Value *Src,
+ bool IsNegative) const;
+ Value *emitFrexpDiv(IRBuilder<> &Builder, Value *LHS, Value *RHS,
+ FastMathFlags FMF) const;
+
+public:
bool visitFDiv(BinaryOperator &I);
- bool visitXor(BinaryOperator &I);
bool visitInstruction(Instruction &I) { return false; }
bool visitBinaryOperator(BinaryOperator &I);
bool visitLoadInst(LoadInst &I);
bool visitICmpInst(ICmpInst &I);
bool visitSelectInst(SelectInst &I);
+ bool visitPHINode(PHINode &I);
bool visitIntrinsicInst(IntrinsicInst &I);
bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
+ bool visitMinNum(IntrinsicInst &I);
+ bool run(Function &F);
+};
- bool doInitialization(Module &M) override;
- bool runOnFunction(Function &F) override;
-
- StringRef getPassName() const override { return "AMDGPU IR optimizations"; }
+class AMDGPUCodeGenPrepare : public FunctionPass {
+private:
+ AMDGPUCodeGenPrepareImpl Impl;
+public:
+ static char ID;
+ AMDGPUCodeGenPrepare() : FunctionPass(ID) {
+ initializeAMDGPUCodeGenPreparePass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<LegacyDivergenceAnalysis>();
+ AU.addRequired<UniformityInfoWrapperPass>();
+ AU.addRequired<TargetLibraryInfoWrapperPass>();
// FIXME: Division expansion needs to preserve the dominator tree.
if (!ExpandDiv64InIR)
AU.setPreservesAll();
- }
+ }
+ bool runOnFunction(Function &F) override;
+ bool doInitialization(Module &M) override;
+ StringRef getPassName() const override { return "AMDGPU IR optimizations"; }
};
} // end anonymous namespace
-unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const {
+bool AMDGPUCodeGenPrepareImpl::run(Function &F) {
+ bool MadeChange = false;
+
+ Function::iterator NextBB;
+ for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; FI = NextBB) {
+ BasicBlock *BB = &*FI;
+ NextBB = std::next(FI);
+
+ BasicBlock::iterator Next;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
+ I = Next) {
+ Next = std::next(I);
+
+ MadeChange |= visit(*I);
+
+ if (Next != E) { // Control flow changed
+ BasicBlock *NextInstBB = Next->getParent();
+ if (NextInstBB != BB) {
+ BB = NextInstBB;
+ E = BB->end();
+ FE = F.end();
+ }
+ }
+ }
+ }
+ return MadeChange;
+}
+
+unsigned AMDGPUCodeGenPrepareImpl::getBaseElementBitWidth(const Type *T) const {
assert(needsPromotionToI32(T) && "T does not need promotion to i32");
if (T->isIntegerTy())
@@ -241,7 +354,7 @@ unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const {
return cast<VectorType>(T)->getElementType()->getIntegerBitWidth();
}
-Type *AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type *T) const {
+Type *AMDGPUCodeGenPrepareImpl::getI32Ty(IRBuilder<> &B, const Type *T) const {
assert(needsPromotionToI32(T) && "T does not need promotion to i32");
if (T->isIntegerTy())
@@ -249,17 +362,17 @@ Type *AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type *T) const {
return FixedVectorType::get(B.getInt32Ty(), cast<FixedVectorType>(T));
}
-bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {
+bool AMDGPUCodeGenPrepareImpl::isSigned(const BinaryOperator &I) const {
return I.getOpcode() == Instruction::AShr ||
I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::SRem;
}
-bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
+bool AMDGPUCodeGenPrepareImpl::isSigned(const SelectInst &I) const {
return isa<ICmpInst>(I.getOperand(0)) ?
cast<ICmpInst>(I.getOperand(0))->isSigned() : false;
}
-bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const {
+bool AMDGPUCodeGenPrepareImpl::needsPromotionToI32(const Type *T) const {
if (!Widen16BitOps)
return false;
@@ -279,6 +392,11 @@ bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const {
return false;
}
+bool AMDGPUCodeGenPrepareImpl::isLegalFloatingTy(const Type *Ty) const {
+ return Ty->isFloatTy() || Ty->isDoubleTy() ||
+ (Ty->isHalfTy() && ST->has16BitInsts());
+}
+
// Return true if the op promoted to i32 should have nsw set.
static bool promotedOpIsNSW(const Instruction &I) {
switch (I.getOpcode()) {
@@ -307,16 +425,16 @@ static bool promotedOpIsNUW(const Instruction &I) {
}
}
-bool AMDGPUCodeGenPrepare::canWidenScalarExtLoad(LoadInst &I) const {
+bool AMDGPUCodeGenPrepareImpl::canWidenScalarExtLoad(LoadInst &I) const {
Type *Ty = I.getType();
const DataLayout &DL = Mod->getDataLayout();
int TySize = DL.getTypeSizeInBits(Ty);
Align Alignment = DL.getValueOrABITypeAlignment(I.getAlign(), Ty);
- return I.isSimple() && TySize < 32 && Alignment >= 4 && DA->isUniform(&I);
+ return I.isSimple() && TySize < 32 && Alignment >= 4 && UA->isUniform(&I);
}
-bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
+bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(BinaryOperator &I) const {
assert(needsPromotionToI32(I.getType()) &&
"I does not need promotion to i32");
@@ -363,7 +481,7 @@ bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
return true;
}
-bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(ICmpInst &I) const {
+bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(ICmpInst &I) const {
assert(needsPromotionToI32(I.getOperand(0)->getType()) &&
"I does not need promotion to i32");
@@ -390,7 +508,7 @@ bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(ICmpInst &I) const {
return true;
}
-bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(SelectInst &I) const {
+bool AMDGPUCodeGenPrepareImpl::promoteUniformOpToI32(SelectInst &I) const {
assert(needsPromotionToI32(I.getType()) &&
"I does not need promotion to i32");
@@ -419,7 +537,7 @@ bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(SelectInst &I) const {
return true;
}
-bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
+bool AMDGPUCodeGenPrepareImpl::promoteUniformBitreverseToI32(
IntrinsicInst &I) const {
assert(I.getIntrinsicID() == Intrinsic::bitreverse &&
"I must be bitreverse intrinsic");
@@ -445,11 +563,11 @@ bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
return true;
}
-unsigned AMDGPUCodeGenPrepare::numBitsUnsigned(Value *Op) const {
+unsigned AMDGPUCodeGenPrepareImpl::numBitsUnsigned(Value *Op) const {
return computeKnownBits(Op, *DL, 0, AC).countMaxActiveBits();
}
-unsigned AMDGPUCodeGenPrepare::numBitsSigned(Value *Op) const {
+unsigned AMDGPUCodeGenPrepareImpl::numBitsSigned(Value *Op) const {
return ComputeMaxSignificantBits(Op, *DL, 0, AC);
}
@@ -508,7 +626,7 @@ static Value *getMul24(IRBuilder<> &Builder, Value *LHS, Value *RHS,
return Builder.CreateOr(Lo, Builder.CreateShl(Hi, 32));
}
-bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
+bool AMDGPUCodeGenPrepareImpl::replaceMulWithMul24(BinaryOperator &I) const {
if (I.getOpcode() != Instruction::Mul)
return false;
@@ -518,7 +636,7 @@ bool AMDGPUCodeGenPrepare::replaceMulWithMul24(BinaryOperator &I) const {
return false;
// Prefer scalar if this could be s_mul_i32
- if (DA->isUniform(&I))
+ if (UA->isUniform(&I))
return false;
Value *LHS = I.getOperand(0);
@@ -592,7 +710,7 @@ static SelectInst *findSelectThroughCast(Value *V, CastInst *&Cast) {
return nullptr;
}
-bool AMDGPUCodeGenPrepare::foldBinOpIntoSelect(BinaryOperator &BO) const {
+bool AMDGPUCodeGenPrepareImpl::foldBinOpIntoSelect(BinaryOperator &BO) const {
// Don't do this unless the old select is going away. We want to eliminate the
// binary operator, not replace a binop with a select.
int SelOpNo = 0;
@@ -653,30 +771,191 @@ bool AMDGPUCodeGenPrepare::foldBinOpIntoSelect(BinaryOperator &BO) const {
return true;
}
+std::pair<Value *, Value *>
+AMDGPUCodeGenPrepareImpl::getFrexpResults(IRBuilder<> &Builder,
+ Value *Src) const {
+ Type *Ty = Src->getType();
+ Value *Frexp = Builder.CreateIntrinsic(Intrinsic::frexp,
+ {Ty, Builder.getInt32Ty()}, Src);
+ Value *FrexpMant = Builder.CreateExtractValue(Frexp, {0});
+
+ // Bypass the bug workaround for the exponent result since it doesn't matter.
+ // TODO: Does the bug workaround even really need to consider the exponent
+ // result? It's unspecified by the spec.
+
+ Value *FrexpExp =
+ ST->hasFractBug()
+ ? Builder.CreateIntrinsic(Intrinsic::amdgcn_frexp_exp,
+ {Builder.getInt32Ty(), Ty}, Src)
+ : Builder.CreateExtractValue(Frexp, {1});
+ return {FrexpMant, FrexpExp};
+}
+
+/// Emit an expansion of 1.0 / Src good for 1ulp that supports denormals.
+Value *AMDGPUCodeGenPrepareImpl::emitRcpIEEE1ULP(IRBuilder<> &Builder,
+ Value *Src,
+ bool IsNegative) const {
+ // Same as for 1.0, but expand the sign out of the constant.
+ // -1.0 / x -> rcp (fneg x)
+ if (IsNegative)
+ Src = Builder.CreateFNeg(Src);
+
+ // The rcp instruction doesn't support denormals, so scale the input
+ // out of the denormal range and convert at the end.
+ //
+ // Expand as 2^-n * (1.0 / (x * 2^n))
+
+ // TODO: Skip scaling if input is known never denormal and the input
+ // range won't underflow to denormal. The hard part is knowing the
+ // result. We need a range check, the result could be denormal for
+ // 0x1p+126 < den <= 0x1p+127.
+
+ Type *Ty = Src->getType();
+
+ auto [FrexpMant, FrexpExp] = getFrexpResults(Builder, Src);
+ Value *ScaleFactor = Builder.CreateNeg(FrexpExp);
+ Value *Rcp = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, FrexpMant);
+ return Builder.CreateIntrinsic(Intrinsic::ldexp, {Ty, Builder.getInt32Ty()},
+ {Rcp, ScaleFactor});
+}
+
+/// Emit a 2ulp expansion for fdiv by using frexp for input scaling.
+Value *AMDGPUCodeGenPrepareImpl::emitFrexpDiv(IRBuilder<> &Builder, Value *LHS,
+ Value *RHS,
+ FastMathFlags FMF) const {
+ // If we have have to work around the fract/frexp bug, we're worse off than
+ // using the fdiv.fast expansion. The full safe expansion is faster if we have
+ // fast FMA.
+ if (HasFP32DenormalFlush && ST->hasFractBug() && !ST->hasFastFMAF32() &&
+ (!FMF.noNaNs() || !FMF.noInfs()))
+ return nullptr;
+
+ // We're scaling the LHS to avoid a denormal input, and scale the denominator
+ // to avoid large values underflowing the result.
+ Type *Ty = LHS->getType();
+
+ auto [FrexpMantRHS, FrexpExpRHS] = getFrexpResults(Builder, RHS);
+
+ Value *Rcp =
+ Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, FrexpMantRHS);
+
+ auto [FrexpMantLHS, FrexpExpLHS] = getFrexpResults(Builder, LHS);
+ Value *Mul = Builder.CreateFMul(FrexpMantLHS, Rcp);
+
+ // We multiplied by 2^N/2^M, so we need to multiply by 2^(N-M) to scale the
+ // result.
+ Value *ExpDiff = Builder.CreateSub(FrexpExpLHS, FrexpExpRHS);
+ return Builder.CreateIntrinsic(Intrinsic::ldexp, {Ty, Builder.getInt32Ty()},
+ {Mul, ExpDiff});
+}
+
+/// Emit an expansion of 1.0 / sqrt(Src) good for 1ulp that supports denormals.
+static Value *emitRsqIEEE1ULP(IRBuilder<> &Builder, Value *Src,
+ bool IsNegative) {
+ // bool need_scale = x < 0x1p-126f;
+ // float input_scale = need_scale ? 0x1.0p+24f : 1.0f;
+ // float output_scale = need_scale ? 0x1.0p+12f : 1.0f;
+ // rsq(x * input_scale) * output_scale;
+
+ Type *Ty = Src->getType();
+ APFloat SmallestNormal =
+ APFloat::getSmallestNormalized(Ty->getFltSemantics());
+ Value *NeedScale =
+ Builder.CreateFCmpOLT(Src, ConstantFP::get(Ty, SmallestNormal));
+ Constant *One = ConstantFP::get(Ty, 1.0);
+ Constant *InputScale = ConstantFP::get(Ty, 0x1.0p+24);
+ Constant *OutputScale =
+ ConstantFP::get(Ty, IsNegative ? -0x1.0p+12 : 0x1.0p+12);
+
+ Value *InputScaleFactor = Builder.CreateSelect(NeedScale, InputScale, One);
+
+ Value *ScaledInput = Builder.CreateFMul(Src, InputScaleFactor);
+ Value *Rsq = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, ScaledInput);
+ Value *OutputScaleFactor = Builder.CreateSelect(
+ NeedScale, OutputScale, IsNegative ? ConstantFP::get(Ty, -1.0) : One);
+
+ return Builder.CreateFMul(Rsq, OutputScaleFactor);
+}
+
+bool AMDGPUCodeGenPrepareImpl::canOptimizeWithRsq(const FPMathOperator *SqrtOp,
+ FastMathFlags DivFMF,
+ FastMathFlags SqrtFMF) const {
+ // The rsqrt contraction increases accuracy from ~2ulp to ~1ulp.
+ if (!DivFMF.allowContract() || !SqrtFMF.allowContract())
+ return false;
+
+ // v_rsq_f32 gives 1ulp
+ return SqrtFMF.approxFunc() || HasUnsafeFPMath ||
+ SqrtOp->getFPAccuracy() >= 1.0f;
+}
+
+Value *AMDGPUCodeGenPrepareImpl::optimizeWithRsq(
+ IRBuilder<> &Builder, Value *Num, Value *Den, FastMathFlags DivFMF,
+ FastMathFlags SqrtFMF, const Instruction *CtxI) const {
+ // The rsqrt contraction increases accuracy from ~2ulp to ~1ulp.
+ assert(DivFMF.allowContract() && SqrtFMF.allowContract());
+
+ // rsq_f16 is accurate to 0.51 ulp.
+ // rsq_f32 is accurate for !fpmath >= 1.0ulp and denormals are flushed.
+ // rsq_f64 is never accurate.
+ const ConstantFP *CLHS = dyn_cast<ConstantFP>(Num);
+ if (!CLHS)
+ return nullptr;
+
+ assert(Den->getType()->isFloatTy());
+
+ bool IsNegative = false;
+
+ // TODO: Handle other numerator values with arcp.
+ if (CLHS->isExactlyValue(1.0) || (IsNegative = CLHS->isExactlyValue(-1.0))) {
+ // Add in the sqrt flags.
+ IRBuilder<>::FastMathFlagGuard Guard(Builder);
+ DivFMF |= SqrtFMF;
+ Builder.setFastMathFlags(DivFMF);
+
+ if ((DivFMF.approxFunc() && SqrtFMF.approxFunc()) ||
+ canIgnoreDenormalInput(Den, CtxI)) {
+ Value *Result = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rsq, Den);
+ // -1.0 / sqrt(x) -> fneg(rsq(x))
+ return IsNegative ? Builder.CreateFNeg(Result) : Result;
+ }
+
+ return emitRsqIEEE1ULP(Builder, Den, IsNegative);
+ }
+
+ return nullptr;
+}
+
// Optimize fdiv with rcp:
//
// 1/x -> rcp(x) when rcp is sufficiently accurate or inaccurate rcp is
// allowed with unsafe-fp-math or afn.
//
-// a/b -> a*rcp(b) when inaccurate rcp is allowed with unsafe-fp-math or afn.
-static Value *optimizeWithRcp(Value *Num, Value *Den, bool AllowInaccurateRcp,
- bool RcpIsAccurate, IRBuilder<> &Builder,
- Module *Mod) {
-
- if (!AllowInaccurateRcp && !RcpIsAccurate)
- return nullptr;
+// a/b -> a*rcp(b) when arcp is allowed, and we only need provide ULP 1.0
+Value *
+AMDGPUCodeGenPrepareImpl::optimizeWithRcp(IRBuilder<> &Builder, Value *Num,
+ Value *Den, FastMathFlags FMF,
+ const Instruction *CtxI) const {
+ // rcp_f16 is accurate to 0.51 ulp.
+ // rcp_f32 is accurate for !fpmath >= 1.0ulp and denormals are flushed.
+ // rcp_f64 is never accurate.
+ assert(Den->getType()->isFloatTy());
- Type *Ty = Den->getType();
if (const ConstantFP *CLHS = dyn_cast<ConstantFP>(Num)) {
- if (AllowInaccurateRcp || RcpIsAccurate) {
- if (CLHS->isExactlyValue(1.0)) {
- Function *Decl = Intrinsic::getDeclaration(
- Mod, Intrinsic::amdgcn_rcp, Ty);
+ bool IsNegative = false;
+ if (CLHS->isExactlyValue(1.0) ||
+ (IsNegative = CLHS->isExactlyValue(-1.0))) {
+ Value *Src = Den;
+
+ if (HasFP32DenormalFlush || FMF.approxFunc()) {
+ // -1.0 / x -> 1.0 / fneg(x)
+ if (IsNegative)
+ Src = Builder.CreateFNeg(Src);
// v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
// the CI documentation has a worst case error of 1 ulp.
- // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
- // use it as long as we aren't trying to use denormals.
+ // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK
+ // to use it as long as we aren't trying to use denormals.
//
// v_rcp_f16 and v_rsq_f16 DO support denormals.
@@ -684,30 +963,29 @@ static Value *optimizeWithRcp(Value *Num, Value *Den, bool AllowInaccurateRcp,
// insert rsq intrinsic here.
// 1.0 / x -> rcp(x)
- return Builder.CreateCall(Decl, { Den });
+ return Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, Src);
}
- // Same as for 1.0, but expand the sign out of the constant.
- if (CLHS->isExactlyValue(-1.0)) {
- Function *Decl = Intrinsic::getDeclaration(
- Mod, Intrinsic::amdgcn_rcp, Ty);
-
- // -1.0 / x -> rcp (fneg x)
- Value *FNeg = Builder.CreateFNeg(Den);
- return Builder.CreateCall(Decl, { FNeg });
- }
+ // TODO: If the input isn't denormal, and we know the input exponent isn't
+ // big enough to introduce a denormal we can avoid the scaling.
+ return emitRcpIEEE1ULP(Builder, Src, IsNegative);
}
}
- if (AllowInaccurateRcp) {
- Function *Decl = Intrinsic::getDeclaration(
- Mod, Intrinsic::amdgcn_rcp, Ty);
-
- // Turn into multiply by the reciprocal.
+ if (FMF.allowReciprocal()) {
// x / y -> x * (1.0 / y)
- Value *Recip = Builder.CreateCall(Decl, { Den });
+
+ // TODO: Could avoid denormal scaling and use raw rcp if we knew the output
+ // will never underflow.
+ if (HasFP32DenormalFlush || FMF.approxFunc()) {
+ Value *Recip = Builder.CreateUnaryIntrinsic(Intrinsic::amdgcn_rcp, Den);
+ return Builder.CreateFMul(Num, Recip);
+ }
+
+ Value *Recip = emitRcpIEEE1ULP(Builder, Den, false);
return Builder.CreateFMul(Num, Recip);
}
+
return nullptr;
}
@@ -718,17 +996,14 @@ static Value *optimizeWithRcp(Value *Num, Value *Den, bool AllowInaccurateRcp,
// 1/x -> fdiv.fast(1,x) when !fpmath >= 2.5ulp.
//
// NOTE: optimizeWithRcp should be tried first because rcp is the preference.
-static Value *optimizeWithFDivFast(Value *Num, Value *Den, float ReqdAccuracy,
- bool HasDenormals, IRBuilder<> &Builder,
- Module *Mod) {
+Value *AMDGPUCodeGenPrepareImpl::optimizeWithFDivFast(
+ IRBuilder<> &Builder, Value *Num, Value *Den, float ReqdAccuracy) const {
// fdiv.fast can achieve 2.5 ULP accuracy.
if (ReqdAccuracy < 2.5f)
return nullptr;
// Only have fdiv.fast for f32.
- Type *Ty = Den->getType();
- if (!Ty->isFloatTy())
- return nullptr;
+ assert(Den->getType()->isFloatTy());
bool NumIsOne = false;
if (const ConstantFP *CNum = dyn_cast<ConstantFP>(Num)) {
@@ -737,11 +1012,39 @@ static Value *optimizeWithFDivFast(Value *Num, Value *Den, float ReqdAccuracy,
}
// fdiv does not support denormals. But 1.0/x is always fine to use it.
- if (HasDenormals && !NumIsOne)
+ //
+ // TODO: This works for any value with a specific known exponent range, don't
+ // just limit to constant 1.
+ if (!HasFP32DenormalFlush && !NumIsOne)
return nullptr;
- Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast);
- return Builder.CreateCall(Decl, { Num, Den });
+ return Builder.CreateIntrinsic(Intrinsic::amdgcn_fdiv_fast, {}, {Num, Den});
+}
+
+Value *AMDGPUCodeGenPrepareImpl::visitFDivElement(
+ IRBuilder<> &Builder, Value *Num, Value *Den, FastMathFlags DivFMF,
+ FastMathFlags SqrtFMF, Value *RsqOp, const Instruction *FDivInst,
+ float ReqdDivAccuracy) const {
+ if (RsqOp) {
+ Value *Rsq =
+ optimizeWithRsq(Builder, Num, RsqOp, DivFMF, SqrtFMF, FDivInst);
+ if (Rsq)
+ return Rsq;
+ }
+
+ Value *Rcp = optimizeWithRcp(Builder, Num, Den, DivFMF, FDivInst);
+ if (Rcp)
+ return Rcp;
+
+ // In the basic case fdiv_fast has the same instruction count as the frexp div
+ // expansion. Slightly prefer fdiv_fast since it ends in an fmul that can
+ // potentially be fused into a user. Also, materialization of the constants
+ // can be reused for multiple instances.
+ Value *FDivFast = optimizeWithFDivFast(Builder, Num, Den, ReqdDivAccuracy);
+ if (FDivFast)
+ return FDivFast;
+
+ return emitFrexpDiv(Builder, Num, Den, DivFMF);
}
// Optimizations is performed based on fpmath, fast math flags as well as
@@ -759,100 +1062,96 @@ static Value *optimizeWithFDivFast(Value *Num, Value *Den, float ReqdAccuracy,
// 1/x -> fdiv.fast(1,x) when !fpmath >= 2.5ulp.
//
// NOTE: rcp is the preference in cases that both are legal.
-bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
+bool AMDGPUCodeGenPrepareImpl::visitFDiv(BinaryOperator &FDiv) {
+ if (DisableFDivExpand)
+ return false;
Type *Ty = FDiv.getType()->getScalarType();
-
- // The f64 rcp/rsq approximations are pretty inaccurate. We can do an
- // expansion around them in codegen.
- if (Ty->isDoubleTy())
+ if (!Ty->isFloatTy())
return false;
- // No intrinsic for fdiv16 if target does not support f16.
- if (Ty->isHalfTy() && !ST->has16BitInsts())
- return false;
+ // The f64 rcp/rsq approximations are pretty inaccurate. We can do an
+ // expansion around them in codegen. f16 is good enough to always use.
const FPMathOperator *FPOp = cast<const FPMathOperator>(&FDiv);
- const float ReqdAccuracy = FPOp->getFPAccuracy();
+ const FastMathFlags DivFMF = FPOp->getFastMathFlags();
+ const float ReqdAccuracy = FPOp->getFPAccuracy();
// Inaccurate rcp is allowed with unsafe-fp-math or afn.
- FastMathFlags FMF = FPOp->getFastMathFlags();
- const bool AllowInaccurateRcp = HasUnsafeFPMath || FMF.approxFunc();
+ //
+ // Defer to codegen to handle this.
+ //
+ // TODO: Decide on an interpretation for interactions between afn + arcp +
+ // !fpmath, and make it consistent between here and codegen. For now, defer
+ // expansion of afn to codegen. The current interpretation is so aggressive we
+ // don't need any pre-consideration here when we have better information. A
+ // more conservative interpretation could use handling here.
+ const bool AllowInaccurateRcp = HasUnsafeFPMath || DivFMF.approxFunc();
+ if (AllowInaccurateRcp)
+ return false;
- // rcp_f16 is accurate for !fpmath >= 1.0ulp.
- // rcp_f32 is accurate for !fpmath >= 1.0ulp and denormals are flushed.
- // rcp_f64 is never accurate.
- const bool RcpIsAccurate = (Ty->isHalfTy() && ReqdAccuracy >= 1.0f) ||
- (Ty->isFloatTy() && !HasFP32Denormals && ReqdAccuracy >= 1.0f);
+ // Defer the correct implementations to codegen.
+ if (ReqdAccuracy < 1.0f)
+ return false;
- IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()));
- Builder.setFastMathFlags(FMF);
- Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
+ FastMathFlags SqrtFMF;
Value *Num = FDiv.getOperand(0);
Value *Den = FDiv.getOperand(1);
- Value *NewFDiv = nullptr;
- if (auto *VT = dyn_cast<FixedVectorType>(FDiv.getType())) {
- NewFDiv = PoisonValue::get(VT);
-
- // FIXME: Doesn't do the right thing for cases where the vector is partially
- // constant. This works when the scalarizer pass is run first.
- for (unsigned I = 0, E = VT->getNumElements(); I != E; ++I) {
- Value *NumEltI = Builder.CreateExtractElement(Num, I);
- Value *DenEltI = Builder.CreateExtractElement(Den, I);
- // Try rcp first.
- Value *NewElt = optimizeWithRcp(NumEltI, DenEltI, AllowInaccurateRcp,
- RcpIsAccurate, Builder, Mod);
- if (!NewElt) // Try fdiv.fast.
- NewElt = optimizeWithFDivFast(NumEltI, DenEltI, ReqdAccuracy,
- HasFP32Denormals, Builder, Mod);
- if (!NewElt) // Keep the original.
- NewElt = Builder.CreateFDiv(NumEltI, DenEltI);
-
- NewFDiv = Builder.CreateInsertElement(NewFDiv, NewElt, I);
- }
- } else { // Scalar FDiv.
- // Try rcp first.
- NewFDiv = optimizeWithRcp(Num, Den, AllowInaccurateRcp, RcpIsAccurate,
- Builder, Mod);
- if (!NewFDiv) { // Try fdiv.fast.
- NewFDiv = optimizeWithFDivFast(Num, Den, ReqdAccuracy, HasFP32Denormals,
- Builder, Mod);
- }
+ Value *RsqOp = nullptr;
+ auto *DenII = dyn_cast<IntrinsicInst>(Den);
+ if (DenII && DenII->getIntrinsicID() == Intrinsic::sqrt &&
+ DenII->hasOneUse()) {
+ const auto *SqrtOp = cast<FPMathOperator>(DenII);
+ SqrtFMF = SqrtOp->getFastMathFlags();
+ if (canOptimizeWithRsq(SqrtOp, DivFMF, SqrtFMF))
+ RsqOp = SqrtOp->getOperand(0);
}
- if (NewFDiv) {
- FDiv.replaceAllUsesWith(NewFDiv);
- NewFDiv->takeName(&FDiv);
- FDiv.eraseFromParent();
+ IRBuilder<> Builder(FDiv.getParent(), std::next(FDiv.getIterator()));
+ Builder.setFastMathFlags(DivFMF);
+ Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
+
+ SmallVector<Value *, 4> NumVals;
+ SmallVector<Value *, 4> DenVals;
+ SmallVector<Value *, 4> RsqDenVals;
+ extractValues(Builder, NumVals, Num);
+ extractValues(Builder, DenVals, Den);
+
+ if (RsqOp)
+ extractValues(Builder, RsqDenVals, RsqOp);
+
+ SmallVector<Value *, 4> ResultVals(NumVals.size());
+ for (int I = 0, E = NumVals.size(); I != E; ++I) {
+ Value *NumElt = NumVals[I];
+ Value *DenElt = DenVals[I];
+ Value *RsqDenElt = RsqOp ? RsqDenVals[I] : nullptr;
+
+ Value *NewElt =
+ visitFDivElement(Builder, NumElt, DenElt, DivFMF, SqrtFMF, RsqDenElt,
+ cast<Instruction>(FPOp), ReqdAccuracy);
+ if (!NewElt) {
+ // Keep the original, but scalarized.
+
+ // This has the unfortunate side effect of sometimes scalarizing when
+ // we're not going to do anything.
+ NewElt = Builder.CreateFDiv(NumElt, DenElt);
+ if (auto *NewEltInst = dyn_cast<Instruction>(NewElt))
+ NewEltInst->copyMetadata(FDiv);
+ }
+
+ ResultVals[I] = NewElt;
}
- return !!NewFDiv;
-}
+ Value *NewVal = insertValues(Builder, FDiv.getType(), ResultVals);
+
+ if (NewVal) {
+ FDiv.replaceAllUsesWith(NewVal);
+ NewVal->takeName(&FDiv);
+ RecursivelyDeleteTriviallyDeadInstructions(&FDiv, TLInfo);
+ }
-bool AMDGPUCodeGenPrepare::visitXor(BinaryOperator &I) {
- // Match the Xor instruction, its type and its operands
- IntrinsicInst *IntrinsicCall = dyn_cast<IntrinsicInst>(I.getOperand(0));
- ConstantInt *RHS = dyn_cast<ConstantInt>(I.getOperand(1));
- if (!RHS || !IntrinsicCall || RHS->getSExtValue() != -1)
- return visitBinaryOperator(I);
-
- // Check if the Call is an intrinsic instruction to amdgcn_class intrinsic
- // has only one use
- if (IntrinsicCall->getIntrinsicID() != Intrinsic::amdgcn_class ||
- !IntrinsicCall->hasOneUse())
- return visitBinaryOperator(I);
-
- // "Not" the second argument of the intrinsic call
- ConstantInt *Arg = dyn_cast<ConstantInt>(IntrinsicCall->getOperand(1));
- if (!Arg)
- return visitBinaryOperator(I);
-
- IntrinsicCall->setOperand(
- 1, ConstantInt::get(Arg->getType(), Arg->getZExtValue() ^ 0x3ff));
- I.replaceAllUsesWith(IntrinsicCall);
- I.eraseFromParent();
return true;
}
@@ -882,9 +1181,9 @@ static Value* getMulHu(IRBuilder<> &Builder, Value *LHS, Value *RHS) {
/// Figure out how many bits are really needed for this division. \p AtLeast is
/// an optimization hint to bypass the second ComputeNumSignBits call if we the
/// first one is insufficient. Returns -1 on failure.
-int AMDGPUCodeGenPrepare::getDivNumBits(BinaryOperator &I,
- Value *Num, Value *Den,
- unsigned AtLeast, bool IsSigned) const {
+int AMDGPUCodeGenPrepareImpl::getDivNumBits(BinaryOperator &I, Value *Num,
+ Value *Den, unsigned AtLeast,
+ bool IsSigned) const {
const DataLayout &DL = Mod->getDataLayout();
unsigned LHSSignBits = ComputeNumSignBits(Num, DL, 0, AC, &I);
if (LHSSignBits < AtLeast)
@@ -903,21 +1202,19 @@ int AMDGPUCodeGenPrepare::getDivNumBits(BinaryOperator &I,
// The fractional part of a float is enough to accurately represent up to
// a 24-bit signed integer.
-Value *AMDGPUCodeGenPrepare::expandDivRem24(IRBuilder<> &Builder,
- BinaryOperator &I,
- Value *Num, Value *Den,
- bool IsDiv, bool IsSigned) const {
+Value *AMDGPUCodeGenPrepareImpl::expandDivRem24(IRBuilder<> &Builder,
+ BinaryOperator &I, Value *Num,
+ Value *Den, bool IsDiv,
+ bool IsSigned) const {
int DivBits = getDivNumBits(I, Num, Den, 9, IsSigned);
if (DivBits == -1)
return nullptr;
return expandDivRem24Impl(Builder, I, Num, Den, DivBits, IsDiv, IsSigned);
}
-Value *AMDGPUCodeGenPrepare::expandDivRem24Impl(IRBuilder<> &Builder,
- BinaryOperator &I,
- Value *Num, Value *Den,
- unsigned DivBits,
- bool IsDiv, bool IsSigned) const {
+Value *AMDGPUCodeGenPrepareImpl::expandDivRem24Impl(
+ IRBuilder<> &Builder, BinaryOperator &I, Value *Num, Value *Den,
+ unsigned DivBits, bool IsDiv, bool IsSigned) const {
Type *I32Ty = Builder.getInt32Ty();
Num = Builder.CreateTrunc(Num, I32Ty);
Den = Builder.CreateTrunc(Den, I32Ty);
@@ -1017,8 +1314,9 @@ Value *AMDGPUCodeGenPrepare::expandDivRem24Impl(IRBuilder<> &Builder,
// than the general expansion we do here.
// TODO: It would be better to just directly handle those optimizations here.
-bool AMDGPUCodeGenPrepare::divHasSpecialOptimization(
- BinaryOperator &I, Value *Num, Value *Den) const {
+bool AMDGPUCodeGenPrepareImpl::divHasSpecialOptimization(BinaryOperator &I,
+ Value *Num,
+ Value *Den) const {
if (Constant *C = dyn_cast<Constant>(Den)) {
// Arbitrary constants get a better expansion as long as a wider mulhi is
// legal.
@@ -1059,9 +1357,9 @@ static Value *getSign32(Value *V, IRBuilder<> &Builder, const DataLayout *DL) {
return Builder.CreateAShr(V, Builder.getInt32(31));
}
-Value *AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
- BinaryOperator &I, Value *X,
- Value *Y) const {
+Value *AMDGPUCodeGenPrepareImpl::expandDivRem32(IRBuilder<> &Builder,
+ BinaryOperator &I, Value *X,
+ Value *Y) const {
Instruction::BinaryOps Opc = I.getOpcode();
assert(Opc == Instruction::URem || Opc == Instruction::UDiv ||
Opc == Instruction::SRem || Opc == Instruction::SDiv);
@@ -1147,7 +1445,7 @@ Value *AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
Value *FloatY = Builder.CreateUIToFP(Y, F32Ty);
Function *Rcp = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_rcp, F32Ty);
Value *RcpY = Builder.CreateCall(Rcp, {FloatY});
- Constant *Scale = ConstantFP::get(F32Ty, BitsToFloat(0x4F7FFFFE));
+ Constant *Scale = ConstantFP::get(F32Ty, llvm::bit_cast<float>(0x4F7FFFFE));
Value *ScaledY = Builder.CreateFMul(RcpY, Scale);
Value *Z = Builder.CreateFPToUI(ScaledY, I32Ty);
@@ -1184,9 +1482,9 @@ Value *AMDGPUCodeGenPrepare::expandDivRem32(IRBuilder<> &Builder,
return Res;
}
-Value *AMDGPUCodeGenPrepare::shrinkDivRem64(IRBuilder<> &Builder,
- BinaryOperator &I,
- Value *Num, Value *Den) const {
+Value *AMDGPUCodeGenPrepareImpl::shrinkDivRem64(IRBuilder<> &Builder,
+ BinaryOperator &I, Value *Num,
+ Value *Den) const {
if (!ExpandDiv64InIR && divHasSpecialOptimization(I, Num, Den))
return nullptr; // Keep it for later optimization.
@@ -1215,7 +1513,7 @@ Value *AMDGPUCodeGenPrepare::shrinkDivRem64(IRBuilder<> &Builder,
return nullptr;
}
-void AMDGPUCodeGenPrepare::expandDivRem64(BinaryOperator &I) const {
+void AMDGPUCodeGenPrepareImpl::expandDivRem64(BinaryOperator &I) const {
Instruction::BinaryOps Opc = I.getOpcode();
// Do the general expansion.
if (Opc == Instruction::UDiv || Opc == Instruction::SDiv) {
@@ -1231,12 +1529,12 @@ void AMDGPUCodeGenPrepare::expandDivRem64(BinaryOperator &I) const {
llvm_unreachable("not a division");
}
-bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
+bool AMDGPUCodeGenPrepareImpl::visitBinaryOperator(BinaryOperator &I) {
if (foldBinOpIntoSelect(I))
return true;
if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
- DA->isUniform(&I) && promoteUniformOpToI32(I))
+ UA->isUniform(&I) && promoteUniformOpToI32(I))
return true;
if (UseMul24Intrin && replaceMulWithMul24(I))
@@ -1307,6 +1605,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
// TODO: We get much worse code in specially handled constant cases.
for (BinaryOperator *Div : Div64ToExpand) {
expandDivRem64(*Div);
+ FlowChanged = true;
Changed = true;
}
}
@@ -1314,7 +1613,7 @@ bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
return Changed;
}
-bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
+bool AMDGPUCodeGenPrepareImpl::visitLoadInst(LoadInst &I) {
if (!WidenLoads)
return false;
@@ -1325,9 +1624,7 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
Builder.SetCurrentDebugLocation(I.getDebugLoc());
Type *I32Ty = Builder.getInt32Ty();
- Type *PT = PointerType::get(I32Ty, I.getPointerAddressSpace());
- Value *BitCast= Builder.CreateBitCast(I.getPointerOperand(), PT);
- LoadInst *WidenLoad = Builder.CreateLoad(I32Ty, BitCast);
+ LoadInst *WidenLoad = Builder.CreateLoad(I32Ty, I.getPointerOperand());
WidenLoad->copyMetadata(I);
// If we have range metadata, we need to convert the type, and not make
@@ -1362,48 +1659,420 @@ bool AMDGPUCodeGenPrepare::visitLoadInst(LoadInst &I) {
return false;
}
-bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) {
+bool AMDGPUCodeGenPrepareImpl::visitICmpInst(ICmpInst &I) {
bool Changed = false;
if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
- DA->isUniform(&I))
+ UA->isUniform(&I))
Changed |= promoteUniformOpToI32(I);
return Changed;
}
-bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
- bool Changed = false;
+bool AMDGPUCodeGenPrepareImpl::visitSelectInst(SelectInst &I) {
+ Value *Cond = I.getCondition();
+ Value *TrueVal = I.getTrueValue();
+ Value *FalseVal = I.getFalseValue();
+ Value *CmpVal;
+ FCmpInst::Predicate Pred;
- if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
- DA->isUniform(&I))
- Changed |= promoteUniformOpToI32(I);
+ if (ST->has16BitInsts() && needsPromotionToI32(I.getType())) {
+ if (UA->isUniform(&I))
+ return promoteUniformOpToI32(I);
+ return false;
+ }
- return Changed;
+ // Match fract pattern with nan check.
+ if (!match(Cond, m_FCmp(Pred, m_Value(CmpVal), m_NonNaN())))
+ return false;
+
+ FPMathOperator *FPOp = dyn_cast<FPMathOperator>(&I);
+ if (!FPOp)
+ return false;
+
+ IRBuilder<> Builder(&I);
+ Builder.setFastMathFlags(FPOp->getFastMathFlags());
+
+ auto *IITrue = dyn_cast<IntrinsicInst>(TrueVal);
+ auto *IIFalse = dyn_cast<IntrinsicInst>(FalseVal);
+
+ Value *Fract = nullptr;
+ if (Pred == FCmpInst::FCMP_UNO && TrueVal == CmpVal && IIFalse &&
+ CmpVal == matchFractPat(*IIFalse)) {
+ // isnan(x) ? x : fract(x)
+ Fract = applyFractPat(Builder, CmpVal);
+ } else if (Pred == FCmpInst::FCMP_ORD && FalseVal == CmpVal && IITrue &&
+ CmpVal == matchFractPat(*IITrue)) {
+ // !isnan(x) ? fract(x) : x
+ Fract = applyFractPat(Builder, CmpVal);
+ } else
+ return false;
+
+ Fract->takeName(&I);
+ I.replaceAllUsesWith(Fract);
+ RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
+ return true;
+}
+
+static bool areInSameBB(const Value *A, const Value *B) {
+ const auto *IA = dyn_cast<Instruction>(A);
+ const auto *IB = dyn_cast<Instruction>(B);
+ return IA && IB && IA->getParent() == IB->getParent();
+}
+
+// Helper for breaking large PHIs that returns true when an extractelement on V
+// is likely to be folded away by the DAG combiner.
+static bool isInterestingPHIIncomingValue(const Value *V) {
+ const auto *FVT = dyn_cast<FixedVectorType>(V->getType());
+ if (!FVT)
+ return false;
+
+ const Value *CurVal = V;
+
+ // Check for insertelements, keeping track of the elements covered.
+ BitVector EltsCovered(FVT->getNumElements());
+ while (const auto *IE = dyn_cast<InsertElementInst>(CurVal)) {
+ const auto *Idx = dyn_cast<ConstantInt>(IE->getOperand(2));
+
+ // Non constant index/out of bounds index -> folding is unlikely.
+ // The latter is more of a sanity check because canonical IR should just
+ // have replaced those with poison.
+ if (!Idx || Idx->getSExtValue() >= FVT->getNumElements())
+ return false;
+
+ const auto *VecSrc = IE->getOperand(0);
+
+ // If the vector source is another instruction, it must be in the same basic
+ // block. Otherwise, the DAGCombiner won't see the whole thing and is
+ // unlikely to be able to do anything interesting here.
+ if (isa<Instruction>(VecSrc) && !areInSameBB(VecSrc, IE))
+ return false;
+
+ CurVal = VecSrc;
+ EltsCovered.set(Idx->getSExtValue());
+
+ // All elements covered.
+ if (EltsCovered.all())
+ return true;
+ }
+
+ // We either didn't find a single insertelement, or the insertelement chain
+ // ended before all elements were covered. Check for other interesting values.
+
+ // Constants are always interesting because we can just constant fold the
+ // extractelements.
+ if (isa<Constant>(CurVal))
+ return true;
+
+ // shufflevector is likely to be profitable if either operand is a constant,
+ // or if either source is in the same block.
+ // This is because shufflevector is most often lowered as a series of
+ // insert/extract elements anyway.
+ if (const auto *SV = dyn_cast<ShuffleVectorInst>(CurVal)) {
+ return isa<Constant>(SV->getOperand(1)) ||
+ areInSameBB(SV, SV->getOperand(0)) ||
+ areInSameBB(SV, SV->getOperand(1));
+ }
+
+ return false;
+}
+
+bool AMDGPUCodeGenPrepareImpl::canBreakPHINode(const PHINode &I) {
+ // Check in the cache, or add an entry for this node.
+ //
+ // We init with false because we consider all PHI nodes unbreakable until we
+ // reach a conclusion. Doing the opposite - assuming they're break-able until
+ // proven otherwise - can be harmful in some pathological cases so we're
+ // conservative for now.
+ const auto [It, DidInsert] = BreakPhiNodesCache.insert({&I, false});
+ if (!DidInsert)
+ return It->second;
+
+ // This function may recurse, so to guard against infinite looping, this PHI
+ // is conservatively considered unbreakable until we reach a conclusion.
+
+ // Don't break PHIs that have no interesting incoming values. That is, where
+ // there is no clear opportunity to fold the "extractelement" instructions we
+ // would add.
+ //
+ // Note: IC does not run after this pass, so we're only interested in the
+ // foldings that the DAG combiner can do.
+ if (none_of(I.incoming_values(),
+ [&](Value *V) { return isInterestingPHIIncomingValue(V); }))
+ return false;
+
+ // Now, check users for unbreakable PHI nodes. If we have an unbreakable PHI
+ // node as user, we don't want to break this PHI either because it's unlikely
+ // to be beneficial. We would just explode the vector and reassemble it
+ // directly, wasting instructions.
+ //
+ // In the case where multiple users are PHI nodes, we want at least half of
+ // them to be breakable.
+ int Score = 0;
+ for (const Value *U : I.users()) {
+ if (const auto *PU = dyn_cast<PHINode>(U))
+ Score += canBreakPHINode(*PU) ? 1 : -1;
+ }
+
+ if (Score < 0)
+ return false;
+
+ return BreakPhiNodesCache[&I] = true;
+}
+
+/// Helper class for "break large PHIs" (visitPHINode).
+///
+/// This represents a slice of a PHI's incoming value, which is made up of:
+/// - The type of the slice (Ty)
+/// - The index in the incoming value's vector where the slice starts (Idx)
+/// - The number of elements in the slice (NumElts).
+/// It also keeps track of the NewPHI node inserted for this particular slice.
+///
+/// Slice examples:
+/// <4 x i64> -> Split into four i64 slices.
+/// -> [i64, 0, 1], [i64, 1, 1], [i64, 2, 1], [i64, 3, 1]
+/// <5 x i16> -> Split into 2 <2 x i16> slices + a i16 tail.
+/// -> [<2 x i16>, 0, 2], [<2 x i16>, 2, 2], [i16, 4, 1]
+class VectorSlice {
+public:
+ VectorSlice(Type *Ty, unsigned Idx, unsigned NumElts)
+ : Ty(Ty), Idx(Idx), NumElts(NumElts) {}
+
+ Type *Ty = nullptr;
+ unsigned Idx = 0;
+ unsigned NumElts = 0;
+ PHINode *NewPHI = nullptr;
+
+ /// Slice \p Inc according to the information contained within this slice.
+ /// This is cached, so if called multiple times for the same \p BB & \p Inc
+ /// pair, it returns the same Sliced value as well.
+ ///
+ /// Note this *intentionally* does not return the same value for, say,
+ /// [%bb.0, %0] & [%bb.1, %0] as:
+ /// - It could cause issues with dominance (e.g. if bb.1 is seen first, then
+ /// the value in bb.1 may not be reachable from bb.0 if it's its
+ /// predecessor.)
+ /// - We also want to make our extract instructions as local as possible so
+ /// the DAG has better chances of folding them out. Duplicating them like
+ /// that is beneficial in that regard.
+ ///
+ /// This is both a minor optimization to avoid creating duplicate
+ /// instructions, but also a requirement for correctness. It is not forbidden
+ /// for a PHI node to have the same [BB, Val] pair multiple times. If we
+ /// returned a new value each time, those previously identical pairs would all
+ /// have different incoming values (from the same block) and it'd cause a "PHI
+ /// node has multiple entries for the same basic block with different incoming
+ /// values!" verifier error.
+ Value *getSlicedVal(BasicBlock *BB, Value *Inc, StringRef NewValName) {
+ Value *&Res = SlicedVals[{BB, Inc}];
+ if (Res)
+ return Res;
+
+ IRBuilder<> B(BB->getTerminator());
+ if (Instruction *IncInst = dyn_cast<Instruction>(Inc))
+ B.SetCurrentDebugLocation(IncInst->getDebugLoc());
+
+ if (NumElts > 1) {
+ SmallVector<int, 4> Mask;
+ for (unsigned K = Idx; K < (Idx + NumElts); ++K)
+ Mask.push_back(K);
+ Res = B.CreateShuffleVector(Inc, Mask, NewValName);
+ } else
+ Res = B.CreateExtractElement(Inc, Idx, NewValName);
+
+ return Res;
+ }
+
+private:
+ SmallDenseMap<std::pair<BasicBlock *, Value *>, Value *> SlicedVals;
+};
+
+bool AMDGPUCodeGenPrepareImpl::visitPHINode(PHINode &I) {
+ // Break-up fixed-vector PHIs into smaller pieces.
+ // Default threshold is 32, so it breaks up any vector that's >32 bits into
+ // its elements, or into 32-bit pieces (for 8/16 bit elts).
+ //
+ // This is only helpful for DAGISel because it doesn't handle large PHIs as
+ // well as GlobalISel. DAGISel lowers PHIs by using CopyToReg/CopyFromReg.
+ // With large, odd-sized PHIs we may end up needing many `build_vector`
+ // operations with most elements being "undef". This inhibits a lot of
+ // optimization opportunities and can result in unreasonably high register
+ // pressure and the inevitable stack spilling.
+ if (!ScalarizeLargePHIs || getCGPassBuilderOption().EnableGlobalISelOption)
+ return false;
+
+ FixedVectorType *FVT = dyn_cast<FixedVectorType>(I.getType());
+ if (!FVT || DL->getTypeSizeInBits(FVT) <= ScalarizeLargePHIsThreshold)
+ return false;
+
+ if (!ForceScalarizeLargePHIs && !canBreakPHINode(I))
+ return false;
+
+ std::vector<VectorSlice> Slices;
+
+ Type *EltTy = FVT->getElementType();
+ {
+ unsigned Idx = 0;
+ // For 8/16 bits type, don't scalarize fully but break it up into as many
+ // 32-bit slices as we can, and scalarize the tail.
+ const unsigned EltSize = DL->getTypeSizeInBits(EltTy);
+ const unsigned NumElts = FVT->getNumElements();
+ if (EltSize == 8 || EltSize == 16) {
+ const unsigned SubVecSize = (32 / EltSize);
+ Type *SubVecTy = FixedVectorType::get(EltTy, SubVecSize);
+ for (unsigned End = alignDown(NumElts, SubVecSize); Idx < End;
+ Idx += SubVecSize)
+ Slices.emplace_back(SubVecTy, Idx, SubVecSize);
+ }
+
+ // Scalarize all remaining elements.
+ for (; Idx < NumElts; ++Idx)
+ Slices.emplace_back(EltTy, Idx, 1);
+ }
+
+ if (Slices.size() == 1)
+ return false;
+
+ // Create one PHI per vector piece. The "VectorSlice" class takes care of
+ // creating the necessary instruction to extract the relevant slices of each
+ // incoming value.
+ IRBuilder<> B(I.getParent());
+ B.SetCurrentDebugLocation(I.getDebugLoc());
+
+ unsigned IncNameSuffix = 0;
+ for (VectorSlice &S : Slices) {
+ // We need to reset the build on each iteration, because getSlicedVal may
+ // have inserted something into I's BB.
+ B.SetInsertPoint(I.getParent()->getFirstNonPHI());
+ S.NewPHI = B.CreatePHI(S.Ty, I.getNumIncomingValues());
+
+ for (const auto &[Idx, BB] : enumerate(I.blocks())) {
+ S.NewPHI->addIncoming(S.getSlicedVal(BB, I.getIncomingValue(Idx),
+ "largephi.extractslice" +
+ std::to_string(IncNameSuffix++)),
+ BB);
+ }
+ }
+
+ // And replace this PHI with a vector of all the previous PHI values.
+ Value *Vec = PoisonValue::get(FVT);
+ unsigned NameSuffix = 0;
+ for (VectorSlice &S : Slices) {
+ const auto ValName = "largephi.insertslice" + std::to_string(NameSuffix++);
+ if (S.NumElts > 1)
+ Vec =
+ B.CreateInsertVector(FVT, Vec, S.NewPHI, B.getInt64(S.Idx), ValName);
+ else
+ Vec = B.CreateInsertElement(Vec, S.NewPHI, S.Idx, ValName);
+ }
+
+ I.replaceAllUsesWith(Vec);
+ I.eraseFromParent();
+ return true;
}
-bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
+bool AMDGPUCodeGenPrepareImpl::visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
case Intrinsic::bitreverse:
return visitBitreverseIntrinsicInst(I);
+ case Intrinsic::minnum:
+ return visitMinNum(I);
default:
return false;
}
}
-bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
+bool AMDGPUCodeGenPrepareImpl::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
bool Changed = false;
if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
- DA->isUniform(&I))
+ UA->isUniform(&I))
Changed |= promoteUniformBitreverseToI32(I);
return Changed;
}
+/// Match non-nan fract pattern.
+/// minnum(fsub(x, floor(x)), nextafter(1.0, -1.0)
+///
+/// If fract is a useful instruction for the subtarget. Does not account for the
+/// nan handling; the instruction has a nan check on the input value.
+Value *AMDGPUCodeGenPrepareImpl::matchFractPat(IntrinsicInst &I) {
+ if (ST->hasFractBug())
+ return nullptr;
+
+ if (I.getIntrinsicID() != Intrinsic::minnum)
+ return nullptr;
+
+ Type *Ty = I.getType();
+ if (!isLegalFloatingTy(Ty->getScalarType()))
+ return nullptr;
+
+ Value *Arg0 = I.getArgOperand(0);
+ Value *Arg1 = I.getArgOperand(1);
+
+ const APFloat *C;
+ if (!match(Arg1, m_APFloat(C)))
+ return nullptr;
+
+ APFloat One(1.0);
+ bool LosesInfo;
+ One.convert(C->getSemantics(), APFloat::rmNearestTiesToEven, &LosesInfo);
+
+ // Match nextafter(1.0, -1)
+ One.next(true);
+ if (One != *C)
+ return nullptr;
+
+ Value *FloorSrc;
+ if (match(Arg0, m_FSub(m_Value(FloorSrc),
+ m_Intrinsic<Intrinsic::floor>(m_Deferred(FloorSrc)))))
+ return FloorSrc;
+ return nullptr;
+}
+
+Value *AMDGPUCodeGenPrepareImpl::applyFractPat(IRBuilder<> &Builder,
+ Value *FractArg) {
+ SmallVector<Value *, 4> FractVals;
+ extractValues(Builder, FractVals, FractArg);
+
+ SmallVector<Value *, 4> ResultVals(FractVals.size());
+
+ Type *Ty = FractArg->getType()->getScalarType();
+ for (unsigned I = 0, E = FractVals.size(); I != E; ++I) {
+ ResultVals[I] =
+ Builder.CreateIntrinsic(Intrinsic::amdgcn_fract, {Ty}, {FractVals[I]});
+ }
+
+ return insertValues(Builder, FractArg->getType(), ResultVals);
+}
+
+bool AMDGPUCodeGenPrepareImpl::visitMinNum(IntrinsicInst &I) {
+ Value *FractArg = matchFractPat(I);
+ if (!FractArg)
+ return false;
+
+ // Match pattern for fract intrinsic in contexts where the nan check has been
+ // optimized out (and hope the knowledge the source can't be nan wasn't lost).
+ if (!I.hasNoNaNs() && !isKnownNeverNaN(FractArg, *DL, TLInfo))
+ return false;
+
+ IRBuilder<> Builder(&I);
+ FastMathFlags FMF = I.getFastMathFlags();
+ FMF.setNoNaNs();
+ Builder.setFastMathFlags(FMF);
+
+ Value *Fract = applyFractPat(Builder, FractArg);
+ Fract->takeName(&I);
+ I.replaceAllUsesWith(Fract);
+
+ RecursivelyDeleteTriviallyDeadInstructions(&I, TLInfo);
+ return true;
+}
+
bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
- Mod = &M;
- DL = &Mod->getDataLayout();
+ Impl.Mod = &M;
+ Impl.DL = &Impl.Mod->getDataLayout();
return false;
}
@@ -1416,49 +2085,44 @@ bool AMDGPUCodeGenPrepare::runOnFunction(Function &F) {
return false;
const AMDGPUTargetMachine &TM = TPC->getTM<AMDGPUTargetMachine>();
- ST = &TM.getSubtarget<GCNSubtarget>(F);
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DA = &getAnalysis<LegacyDivergenceAnalysis>();
-
+ Impl.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+ Impl.ST = &TM.getSubtarget<GCNSubtarget>(F);
+ Impl.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ Impl.UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DT = DTWP ? &DTWP->getDomTree() : nullptr;
-
- HasUnsafeFPMath = hasUnsafeFPMath(F);
-
- AMDGPU::SIModeRegisterDefaults Mode(F);
- HasFP32Denormals = Mode.allFP32Denormals();
-
- bool MadeChange = false;
-
- Function::iterator NextBB;
- for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; FI = NextBB) {
- BasicBlock *BB = &*FI;
- NextBB = std::next(FI);
-
- BasicBlock::iterator Next;
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; I = Next) {
- Next = std::next(I);
-
- MadeChange |= visit(*I);
-
- if (Next != E) { // Control flow changed
- BasicBlock *NextInstBB = Next->getParent();
- if (NextInstBB != BB) {
- BB = NextInstBB;
- E = BB->end();
- FE = F.end();
- }
- }
- }
- }
+ Impl.DT = DTWP ? &DTWP->getDomTree() : nullptr;
+ Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
+ SIModeRegisterDefaults Mode(F);
+ Impl.HasFP32DenormalFlush =
+ Mode.FP32Denormals == DenormalMode::getPreserveSign();
+ return Impl.run(F);
+}
- return MadeChange;
+PreservedAnalyses AMDGPUCodeGenPreparePass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ AMDGPUCodeGenPrepareImpl Impl;
+ Impl.Mod = F.getParent();
+ Impl.DL = &Impl.Mod->getDataLayout();
+ Impl.TLInfo = &FAM.getResult<TargetLibraryAnalysis>(F);
+ Impl.ST = &TM.getSubtarget<GCNSubtarget>(F);
+ Impl.AC = &FAM.getResult<AssumptionAnalysis>(F);
+ Impl.UA = &FAM.getResult<UniformityInfoAnalysis>(F);
+ Impl.DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+ Impl.HasUnsafeFPMath = hasUnsafeFPMath(F);
+ SIModeRegisterDefaults Mode(F);
+ Impl.HasFP32DenormalFlush =
+ Mode.FP32Denormals == DenormalMode::getPreserveSign();
+ PreservedAnalyses PA = PreservedAnalyses::none();
+ if (!Impl.FlowChanged)
+ PA.preserveSet<CFGAnalyses>();
+ return Impl.run(F) ? PA : PreservedAnalyses::all();
}
INITIALIZE_PASS_BEGIN(AMDGPUCodeGenPrepare, DEBUG_TYPE,
"AMDGPU IR optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPUCodeGenPrepare, DEBUG_TYPE, "AMDGPU IR optimizations",
false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index c11d4656db3f..892e1eef27a8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -10,31 +10,31 @@ include "llvm/Target/GlobalISel/Combine.td"
// TODO: This really belongs after legalization after scalarization.
-def fmin_fmax_legacy_matchdata : GIDefMatchData<"AMDGPUPostLegalizerCombinerHelper::FMinFMaxLegacyInfo">;
+def fmin_fmax_legacy_matchdata : GIDefMatchData<"FMinFMaxLegacyInfo">;
let Predicates = [HasFminFmaxLegacy] in
def fcmp_select_to_fmin_fmax_legacy : GICombineRule<
(defs root:$select, fmin_fmax_legacy_matchdata:$matchinfo),
(match (wip_match_opcode G_SELECT):$select,
- [{ return PostLegalizerHelper.matchFMinFMaxLegacy(*${select}, ${matchinfo}); }]),
- (apply [{ PostLegalizerHelper.applySelectFCmpToFMinToFMaxLegacy(*${select}, ${matchinfo}); }])>;
+ [{ return matchFMinFMaxLegacy(*${select}, ${matchinfo}); }]),
+ (apply [{ applySelectFCmpToFMinToFMaxLegacy(*${select}, ${matchinfo}); }])>;
def uchar_to_float : GICombineRule<
(defs root:$itofp),
(match (wip_match_opcode G_UITOFP, G_SITOFP):$itofp,
- [{ return PostLegalizerHelper.matchUCharToFloat(*${itofp}); }]),
- (apply [{ PostLegalizerHelper.applyUCharToFloat(*${itofp}); }])>;
+ [{ return matchUCharToFloat(*${itofp}); }]),
+ (apply [{ applyUCharToFloat(*${itofp}); }])>;
def rcp_sqrt_to_rsq : GICombineRule<
(defs root:$rcp, build_fn_matchinfo:$matchinfo),
(match (wip_match_opcode G_INTRINSIC, G_FSQRT):$rcp,
- [{ return PostLegalizerHelper.matchRcpSqrtToRsq(*${rcp}, ${matchinfo}); }]),
+ [{ return matchRcpSqrtToRsq(*${rcp}, ${matchinfo}); }]),
(apply [{ Helper.applyBuildFn(*${rcp}, ${matchinfo}); }])>;
-def cvt_f32_ubyteN_matchdata : GIDefMatchData<"AMDGPUPostLegalizerCombinerHelper::CvtF32UByteMatchInfo">;
+def cvt_f32_ubyteN_matchdata : GIDefMatchData<"CvtF32UByteMatchInfo">;
def cvt_f32_ubyteN : GICombineRule<
(defs root:$cvt_f32_ubyteN, cvt_f32_ubyteN_matchdata:$matchinfo),
@@ -42,18 +42,18 @@ def cvt_f32_ubyteN : GICombineRule<
G_AMDGPU_CVT_F32_UBYTE1,
G_AMDGPU_CVT_F32_UBYTE2,
G_AMDGPU_CVT_F32_UBYTE3):$cvt_f32_ubyteN,
- [{ return PostLegalizerHelper.matchCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }]),
- (apply [{ PostLegalizerHelper.applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>;
+ [{ return matchCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }]),
+ (apply [{ applyCvtF32UByteN(*${cvt_f32_ubyteN}, ${matchinfo}); }])>;
-def clamp_i64_to_i16_matchdata : GIDefMatchData<"AMDGPUPreLegalizerCombinerHelper::ClampI64ToI16MatchInfo">;
+def clamp_i64_to_i16_matchdata : GIDefMatchData<"ClampI64ToI16MatchInfo">;
def clamp_i64_to_i16 : GICombineRule<
(defs root:$clamp_i64_to_i16, clamp_i64_to_i16_matchdata:$matchinfo),
(match (wip_match_opcode G_TRUNC):$clamp_i64_to_i16,
- [{ return PreLegalizerHelper.matchClampI64ToI16(*${clamp_i64_to_i16}, MRI, *MF, ${matchinfo}); }]),
- (apply [{ PreLegalizerHelper.applyClampI64ToI16(*${clamp_i64_to_i16}, ${matchinfo}); }])>;
+ [{ return matchClampI64ToI16(*${clamp_i64_to_i16}, MRI, MF, ${matchinfo}); }]),
+ (apply [{ applyClampI64ToI16(*${clamp_i64_to_i16}, ${matchinfo}); }])>;
-def med3_matchdata : GIDefMatchData<"AMDGPURegBankCombinerHelper::Med3MatchInfo">;
+def med3_matchdata : GIDefMatchData<"Med3MatchInfo">;
def int_minmax_to_med3 : GICombineRule<
(defs root:$min_or_max, med3_matchdata:$matchinfo),
@@ -61,8 +61,8 @@ def int_minmax_to_med3 : GICombineRule<
G_SMIN,
G_UMAX,
G_UMIN):$min_or_max,
- [{ return RegBankHelper.matchIntMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
- (apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>;
+ [{ return matchIntMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
+ (apply [{ applyMed3(*${min_or_max}, ${matchinfo}); }])>;
def fp_minmax_to_med3 : GICombineRule<
(defs root:$min_or_max, med3_matchdata:$matchinfo),
@@ -70,8 +70,8 @@ def fp_minmax_to_med3 : GICombineRule<
G_FMINNUM,
G_FMAXNUM_IEEE,
G_FMINNUM_IEEE):$min_or_max,
- [{ return RegBankHelper.matchFPMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
- (apply [{ RegBankHelper.applyMed3(*${min_or_max}, ${matchinfo}); }])>;
+ [{ return matchFPMinMaxToMed3(*${min_or_max}, ${matchinfo}); }]),
+ (apply [{ applyMed3(*${min_or_max}, ${matchinfo}); }])>;
def fp_minmax_to_clamp : GICombineRule<
(defs root:$min_or_max, register_matchinfo:$matchinfo),
@@ -79,21 +79,21 @@ def fp_minmax_to_clamp : GICombineRule<
G_FMINNUM,
G_FMAXNUM_IEEE,
G_FMINNUM_IEEE):$min_or_max,
- [{ return RegBankHelper.matchFPMinMaxToClamp(*${min_or_max}, ${matchinfo}); }]),
- (apply [{ RegBankHelper.applyClamp(*${min_or_max}, ${matchinfo}); }])>;
+ [{ return matchFPMinMaxToClamp(*${min_or_max}, ${matchinfo}); }]),
+ (apply [{ applyClamp(*${min_or_max}, ${matchinfo}); }])>;
def fmed3_intrinsic_to_clamp : GICombineRule<
(defs root:$fmed3, register_matchinfo:$matchinfo),
- (match (wip_match_opcode G_INTRINSIC):$fmed3,
- [{ return RegBankHelper.matchFPMed3ToClamp(*${fmed3}, ${matchinfo}); }]),
- (apply [{ RegBankHelper.applyClamp(*${fmed3}, ${matchinfo}); }])>;
+ (match (wip_match_opcode G_AMDGPU_FMED3):$fmed3,
+ [{ return matchFPMed3ToClamp(*${fmed3}, ${matchinfo}); }]),
+ (apply [{ applyClamp(*${fmed3}, ${matchinfo}); }])>;
def remove_fcanonicalize_matchinfo : GIDefMatchData<"Register">;
def remove_fcanonicalize : GICombineRule<
(defs root:$fcanonicalize, remove_fcanonicalize_matchinfo:$matchinfo),
(match (wip_match_opcode G_FCANONICALIZE):$fcanonicalize,
- [{ return PostLegalizerHelper.matchRemoveFcanonicalize(*${fcanonicalize}, ${matchinfo}); }]),
+ [{ return matchRemoveFcanonicalize(*${fcanonicalize}, ${matchinfo}); }]),
(apply [{ Helper.replaceSingleDefInstWithReg(*${fcanonicalize}, ${matchinfo}); }])>;
def foldable_fneg_matchdata : GIDefMatchData<"MachineInstr *">;
@@ -104,32 +104,56 @@ def foldable_fneg : GICombineRule<
[{ return Helper.matchFoldableFneg(*${ffn}, ${matchinfo}); }]),
(apply [{ Helper.applyFoldableFneg(*${ffn}, ${matchinfo}); }])>;
-// Combines which should only apply on SI/VI
+def sign_exension_in_reg_matchdata : GIDefMatchData<"MachineInstr *">;
+
+def sign_extension_in_reg : GICombineRule<
+ (defs root:$sign_inreg, sign_exension_in_reg_matchdata:$matchinfo),
+ (match (wip_match_opcode G_SEXT_INREG):$sign_inreg,
+ [{ return matchCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }]),
+ (apply [{ applyCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }])>;
+
+
+let Predicates = [Has16BitInsts, NotHasMed3_16] in {
+// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
+// saves one instruction compared to the promotion.
+//
+// FIXME: Should have ComplexPattern like in/out matchers
+//
+// FIXME: We should be able to match either G_AMDGPU_FMED3 or
+// G_INTRINSIC @llvm.amdgcn.fmed3. Currently the legalizer will
+// replace the intrinsic with G_AMDGPU_FMED3 since we can't write a
+// pattern to match it.
+def expand_promoted_fmed3 : GICombineRule<
+ (defs root:$fptrunc_dst),
+ (match (G_FPTRUNC $fptrunc_dst, $fmed3_dst):$fptrunc,
+ (G_AMDGPU_FMED3 $fmed3_dst, $src0, $src1, $src2),
+ [{ return Helper.matchExpandPromotedF16FMed3(*${fptrunc}, ${src0}.getReg(), ${src1}.getReg(), ${src2}.getReg()); }]),
+ (apply [{ Helper.applyExpandPromotedF16FMed3(*${fptrunc}, ${src0}.getReg(), ${src1}.getReg(), ${src2}.getReg()); }])
+>;
+
+} // End Predicates = [NotHasMed3_16]
+
+// Combines which should only apply on SI/CI
def gfx6gfx7_combines : GICombineGroup<[fcmp_select_to_fmin_fmax_legacy]>;
-def AMDGPUPreLegalizerCombinerHelper: GICombinerHelper<
- "AMDGPUGenPreLegalizerCombinerHelper",
+// Combines which should only apply on VI
+def gfx8_combines : GICombineGroup<[expand_promoted_fmed3]>;
+
+def AMDGPUPreLegalizerCombiner: GICombinerHelper<
+ "AMDGPUPreLegalizerCombinerImpl",
[all_combines, clamp_i64_to_i16, foldable_fneg]> {
- let DisableRuleOption = "amdgpuprelegalizercombiner-disable-rule";
- let StateClass = "AMDGPUPreLegalizerCombinerHelperState";
- let AdditionalArguments = [];
}
-def AMDGPUPostLegalizerCombinerHelper: GICombinerHelper<
- "AMDGPUGenPostLegalizerCombinerHelper",
- [all_combines, gfx6gfx7_combines,
+def AMDGPUPostLegalizerCombiner: GICombinerHelper<
+ "AMDGPUPostLegalizerCombinerImpl",
+ [all_combines, gfx6gfx7_combines, gfx8_combines,
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
- rcp_sqrt_to_rsq]> {
- let DisableRuleOption = "amdgpupostlegalizercombiner-disable-rule";
- let StateClass = "AMDGPUPostLegalizerCombinerHelperState";
- let AdditionalArguments = [];
+ rcp_sqrt_to_rsq, sign_extension_in_reg]> {
}
-def AMDGPURegBankCombinerHelper : GICombinerHelper<
- "AMDGPUGenRegBankCombinerHelper",
- [zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
+def AMDGPURegBankCombiner : GICombinerHelper<
+ "AMDGPURegBankCombinerImpl",
+ [unmerge_merge, unmerge_cst, unmerge_undef,
+ zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp]> {
- let DisableRuleOption = "amdgpuregbankcombiner-disable-rule";
- let StateClass = "AMDGPURegBankCombinerHelperState";
- let AdditionalArguments = [];
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
index 069baf748bfa..78fdedc0b511 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -380,3 +380,56 @@ void AMDGPUCombinerHelper::applyFoldableFneg(MachineInstr &MI,
MI.eraseFromParent();
}
+
+// TODO: Should return converted value / extension source and avoid introducing
+// intermediate fptruncs in the apply function.
+static bool isFPExtFromF16OrConst(const MachineRegisterInfo &MRI,
+ Register Reg) {
+ const MachineInstr *Def = MRI.getVRegDef(Reg);
+ if (Def->getOpcode() == TargetOpcode::G_FPEXT) {
+ Register SrcReg = Def->getOperand(1).getReg();
+ return MRI.getType(SrcReg) == LLT::scalar(16);
+ }
+
+ if (Def->getOpcode() == TargetOpcode::G_FCONSTANT) {
+ APFloat Val = Def->getOperand(1).getFPImm()->getValueAPF();
+ bool LosesInfo = true;
+ Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &LosesInfo);
+ return !LosesInfo;
+ }
+
+ return false;
+}
+
+bool AMDGPUCombinerHelper::matchExpandPromotedF16FMed3(MachineInstr &MI,
+ Register Src0,
+ Register Src1,
+ Register Src2) {
+ assert(MI.getOpcode() == TargetOpcode::G_FPTRUNC);
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!MRI.hasOneNonDBGUse(SrcReg) || MRI.getType(SrcReg) != LLT::scalar(32))
+ return false;
+
+ return isFPExtFromF16OrConst(MRI, Src0) && isFPExtFromF16OrConst(MRI, Src1) &&
+ isFPExtFromF16OrConst(MRI, Src2);
+}
+
+void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI,
+ Register Src0,
+ Register Src1,
+ Register Src2) {
+ Builder.setInstrAndDebugLoc(MI);
+
+ // We expect fptrunc (fpext x) to fold out, and to constant fold any constant
+ // sources.
+ Src0 = Builder.buildFPTrunc(LLT::scalar(16), Src0).getReg(0);
+ Src1 = Builder.buildFPTrunc(LLT::scalar(16), Src1).getReg(0);
+ Src2 = Builder.buildFPTrunc(LLT::scalar(16), Src2).getReg(0);
+
+ LLT Ty = MRI.getType(Src0);
+ auto A1 = Builder.buildFMinNumIEEE(Ty, Src0, Src1);
+ auto B1 = Builder.buildFMaxNumIEEE(Ty, Src0, Src1);
+ auto C1 = Builder.buildFMaxNumIEEE(Ty, A1, Src2);
+ Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);
+ MI.eraseFromParent();
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
index 1d4747136bf7..a933e85ce3ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
@@ -1,4 +1,4 @@
-//=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.h -----------------------===//
+//=== lib/CodeGen/GlobalISel/AMDGPUCombinerHelper.h -------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -12,6 +12,9 @@
///
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H
+
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
@@ -23,4 +26,11 @@ public:
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
+
+ bool matchExpandPromotedF16FMed3(MachineInstr &MI, Register Src0,
+ Register Src1, Register Src2);
+ void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0,
+ Register Src1, Register Src2);
};
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
index ba5a8799792a..a13447586bd4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCtorDtorLowering.cpp
@@ -31,15 +31,14 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
StringRef InitOrFiniKernelName = "amdgcn.device.init";
if (!IsCtor)
InitOrFiniKernelName = "amdgcn.device.fini";
+ if (M.getFunction(InitOrFiniKernelName))
+ return nullptr;
Function *InitOrFiniKernel = Function::createWithDefaultAttr(
FunctionType::get(Type::getVoidTy(M.getContext()), false),
- GlobalValue::ExternalLinkage, 0, InitOrFiniKernelName, &M);
- BasicBlock *InitOrFiniKernelBB =
- BasicBlock::Create(M.getContext(), "", InitOrFiniKernel);
- ReturnInst::Create(M.getContext(), InitOrFiniKernelBB);
-
+ GlobalValue::WeakODRLinkage, 0, InitOrFiniKernelName, &M);
InitOrFiniKernel->setCallingConv(CallingConv::AMDGPU_KERNEL);
+ InitOrFiniKernel->addFnAttr("amdgpu-flat-work-group-size", "1,1");
if (IsCtor)
InitOrFiniKernel->addFnAttr("device-init");
else
@@ -47,6 +46,71 @@ static Function *createInitOrFiniKernelFunction(Module &M, bool IsCtor) {
return InitOrFiniKernel;
}
+// The linker will provide the associated symbols to allow us to traverse the
+// global constructors / destructors in priority order. We create the IR
+// required to call each callback in this section. This is equivalent to the
+// following code.
+//
+// extern "C" void * __init_array_start[];
+// extern "C" void * __init_array_end[];
+//
+// using InitCallback = void();
+//
+// void call_init_array_callbacks() {
+// for (auto start = __init_array_start; start != __init_array_end; ++start)
+// reinterpret_cast<InitCallback *>(*start)();
+// }
+static void createInitOrFiniCalls(Function &F, bool IsCtor) {
+ Module &M = *F.getParent();
+ LLVMContext &C = M.getContext();
+
+ IRBuilder<> IRB(BasicBlock::Create(C, "entry", &F));
+ auto *LoopBB = BasicBlock::Create(C, "while.entry", &F);
+ auto *ExitBB = BasicBlock::Create(C, "while.end", &F);
+ Type *PtrTy = IRB.getPtrTy(AMDGPUAS::GLOBAL_ADDRESS);
+
+ auto *Begin = M.getOrInsertGlobal(
+ IsCtor ? "__init_array_start" : "__fini_array_start",
+ ArrayType::get(PtrTy, 0), [&]() {
+ return new GlobalVariable(
+ M, ArrayType::get(PtrTy, 0),
+ /*isConstant=*/true, GlobalValue::ExternalLinkage,
+ /*Initializer=*/nullptr,
+ IsCtor ? "__init_array_start" : "__fini_array_start",
+ /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal,
+ /*AddressSpace=*/1);
+ });
+ auto *End = M.getOrInsertGlobal(
+ IsCtor ? "__init_array_end" : "__fini_array_end",
+ ArrayType::get(PtrTy, 0), [&]() {
+ return new GlobalVariable(
+ M, ArrayType::get(PtrTy, 0),
+ /*isConstant=*/true, GlobalValue::ExternalLinkage,
+ /*Initializer=*/nullptr,
+ IsCtor ? "__init_array_end" : "__fini_array_end",
+ /*InsertBefore=*/nullptr, GlobalVariable::NotThreadLocal,
+ /*AddressSpace=*/1);
+ });
+
+ // The constructor type is suppoed to allow using the argument vectors, but
+ // for now we just call them with no arguments.
+ auto *CallBackTy = FunctionType::get(IRB.getVoidTy(), {});
+
+ IRB.CreateCondBr(IRB.CreateICmpNE(Begin, End), LoopBB, ExitBB);
+ IRB.SetInsertPoint(LoopBB);
+ auto *CallBackPHI = IRB.CreatePHI(PtrTy, 2, "ptr");
+ auto *CallBack = IRB.CreateLoad(CallBackTy->getPointerTo(F.getAddressSpace()),
+ CallBackPHI, "callback");
+ IRB.CreateCall(CallBackTy, CallBack);
+ auto *NewCallBack = IRB.CreateConstGEP1_64(PtrTy, CallBackPHI, 1, "next");
+ auto *EndCmp = IRB.CreateICmpEQ(NewCallBack, End, "end");
+ CallBackPHI->addIncoming(Begin, &F.getEntryBlock());
+ CallBackPHI->addIncoming(NewCallBack, LoopBB);
+ IRB.CreateCondBr(EndCmp, ExitBB, LoopBB);
+ IRB.SetInsertPoint(ExitBB);
+ IRB.CreateRetVoid();
+}
+
static bool createInitOrFiniKernel(Module &M, StringRef GlobalName,
bool IsCtor) {
GlobalVariable *GV = M.getGlobalVariable(GlobalName);
@@ -57,18 +121,12 @@ static bool createInitOrFiniKernel(Module &M, StringRef GlobalName,
return false;
Function *InitOrFiniKernel = createInitOrFiniKernelFunction(M, IsCtor);
- IRBuilder<> IRB(InitOrFiniKernel->getEntryBlock().getTerminator());
-
- FunctionType *ConstructorTy = InitOrFiniKernel->getFunctionType();
+ if (!InitOrFiniKernel)
+ return false;
- for (Value *V : GA->operands()) {
- auto *CS = cast<ConstantStruct>(V);
- IRB.CreateCall(ConstructorTy, CS->getOperand(1));
- }
+ createInitOrFiniCalls(*InitOrFiniKernel, IsCtor);
appendToUsed(M, {InitOrFiniKernel});
-
- GV->eraseFromParent();
return true;
}
@@ -83,17 +141,15 @@ class AMDGPUCtorDtorLoweringLegacy final : public ModulePass {
public:
static char ID;
AMDGPUCtorDtorLoweringLegacy() : ModulePass(ID) {}
- bool runOnModule(Module &M) override {
- return lowerCtorsAndDtors(M);
- }
+ bool runOnModule(Module &M) override { return lowerCtorsAndDtors(M); }
};
} // End anonymous namespace
PreservedAnalyses AMDGPUCtorDtorLoweringPass::run(Module &M,
ModuleAnalysisManager &AM) {
- lowerCtorsAndDtors(M);
- return PreservedAnalyses::all();
+ return lowerCtorsAndDtors(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
}
char AMDGPUCtorDtorLoweringLegacy::ID = 0;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
index 7e7dbacaac11..37df4f68c265 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGISel.td
@@ -31,6 +31,10 @@ def gi_vop3mods :
GIComplexOperandMatcher<s32, "selectVOP3Mods">,
GIComplexPatternEquiv<VOP3Mods>;
+def gi_vop3modsnoncanonicalizing :
+ GIComplexOperandMatcher<s32, "selectVOP3ModsNonCanonicalizing">,
+ GIComplexPatternEquiv<VOP3ModsNonCanonicalizing>;
+
def gi_vop3_no_mods :
GIComplexOperandMatcher<s32, "selectVOP3NoMods">,
GIComplexPatternEquiv<VOP3NoMods>;
@@ -153,6 +157,10 @@ def gi_vop3_mad_mix_mods :
GIComplexOperandMatcher<s64, "selectVOP3PMadMixMods">,
GIComplexPatternEquiv<VOP3PMadMixMods>;
+def gi_vop3_mad_mix_mods_ext :
+ GIComplexOperandMatcher<s64, "selectVOP3PMadMixModsExt">,
+ GIComplexPatternEquiv<VOP3PMadMixModsExt>;
+
// Separate load nodes are defined to glue m0 initialization in
// SelectionDAG. The GISel selector can just insert m0 initialization
// directly before selecting a glue-less load, so hide this
@@ -227,10 +235,8 @@ def : GINodeEquiv<G_AMDGPU_TBUFFER_STORE_FORMAT, SItbuffer_store>;
def : GINodeEquiv<G_AMDGPU_TBUFFER_STORE_FORMAT_D16, SItbuffer_store_d16>;
// FIXME: Check MMO is atomic
-def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, SIatomic_inc>;
-def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, SIatomic_dec>;
-def : GINodeEquiv<G_AMDGPU_ATOMIC_INC, atomic_inc_glue>;
-def : GINodeEquiv<G_AMDGPU_ATOMIC_DEC, atomic_dec_glue>;
+def : GINodeEquiv<G_ATOMICRMW_UINC_WRAP, atomic_load_uinc_wrap_glue>;
+def : GINodeEquiv<G_ATOMICRMW_UDEC_WRAP, atomic_load_udec_wrap_glue>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_FMIN, SIatomic_fmin>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_FMAX, SIatomic_fmax>;
def : GINodeEquiv<G_AMDGPU_ATOMIC_FMIN, atomic_load_fmin_glue>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
index 2ffc8b2a3a7b..09930dc9612c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp
@@ -10,8 +10,8 @@
#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/IR/Constants.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
using namespace llvm;
using namespace MIPatternMatch;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index a71ba6b77565..dadc0c92ef8b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -418,9 +418,7 @@ void MetadataStreamerYamlV2::emitHiddenKernelArgs(const Function &Func,
}
if (HiddenArgNumBytes >= 48) {
- if (!Func.hasFnAttribute("amdgpu-no-completion-action") &&
- // FIXME: Hack for runtime bug if we fail to optimize this out
- Func.hasFnAttribute("calls-enqueue-kernel")) {
+ if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
} else {
emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
@@ -854,9 +852,7 @@ void MetadataStreamerMsgPackV3::emitHiddenKernelArgs(
}
if (HiddenArgNumBytes >= 48) {
- if (!Func.hasFnAttribute("amdgpu-no-completion-action") &&
- // FIXME: Hack for runtime bug if we fail to optimize this out
- Func.hasFnAttribute("calls-enqueue-kernel")) {
+ if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
Args);
} else {
@@ -876,7 +872,8 @@ void MetadataStreamerMsgPackV3::emitHiddenKernelArgs(
}
msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps(
- const MachineFunction &MF, const SIProgramInfo &ProgramInfo) const {
+ const MachineFunction &MF, const SIProgramInfo &ProgramInfo,
+ unsigned CodeObjectVersion) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
const Function &F = MF.getFunction();
@@ -890,10 +887,11 @@ msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps(
Kern.getDocument()->getNode(ProgramInfo.LDSSize);
Kern[".private_segment_fixed_size"] =
Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
- if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5)
+ if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
Kern[".uses_dynamic_stack"] =
Kern.getDocument()->getNode(ProgramInfo.DynamicCallStack);
- if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5 && STM.supportsWGP())
+
+ if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
Kern[".workgroup_processor_mode"] =
Kern.getDocument()->getNode(ProgramInfo.WgpMode);
@@ -945,10 +943,12 @@ void MetadataStreamerMsgPackV3::end() {
void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) {
auto &Func = MF.getFunction();
- auto Kern = getHSAKernelProps(MF, ProgramInfo);
+ if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
+ Func.getCallingConv() != CallingConv::SPIR_KERNEL)
+ return;
- assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
- Func.getCallingConv() == CallingConv::SPIR_KERNEL);
+ auto CodeObjectVersion = AMDGPU::getCodeObjectVersion(*Func.getParent());
+ auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);
auto Kernels =
getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
@@ -1079,9 +1079,7 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
Offset += 8; // Skipped.
}
- if (!Func.hasFnAttribute("amdgpu-no-completion-action") &&
- // FIXME: Hack for runtime bug
- Func.hasFnAttribute("calls-enqueue-kernel")) {
+ if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
Args);
} else {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 91670b9820a2..7d7080e920f5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -84,7 +84,8 @@ protected:
msgpack::ArrayDocNode getWorkGroupDimensions(MDNode *Node) const;
msgpack::MapDocNode getHSAKernelProps(const MachineFunction &MF,
- const SIProgramInfo &ProgramInfo) const;
+ const SIProgramInfo &ProgramInfo,
+ unsigned CodeObjectVersion) const;
void emitVersion() override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
index fc0df61952e4..ffa6c88f9d41 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
@@ -80,6 +80,37 @@ enum class SchedGroupMask {
LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ ALL)
};
+class SchedGroup;
+
+// InstructionRule class is used to enact a filter which determines whether or
+// not an SU maps to a given SchedGroup. It contains complementary data
+// structures (e.g Cache) to help those filters.
+class InstructionRule {
+protected:
+ const SIInstrInfo *TII;
+ unsigned SGID;
+ // A cache made available to the Filter to store SUnits for subsequent
+ // invocations of the Filter
+ std::optional<SmallVector<SUnit *, 4>> Cache;
+
+public:
+ virtual bool
+ apply(const SUnit *, const ArrayRef<SUnit *>,
+ SmallVectorImpl<SchedGroup> &) {
+ return true;
+ };
+
+ InstructionRule(const SIInstrInfo *TII, unsigned SGID,
+ bool NeedsCache = false)
+ : TII(TII), SGID(SGID) {
+ if (NeedsCache) {
+ Cache = SmallVector<SUnit *, 4>();
+ }
+ }
+
+ virtual ~InstructionRule() = default;
+};
+
typedef DenseMap<SUnit *, SmallVector<int, 4>> SUnitsToCandidateSGsMap;
// Classify instructions into groups to enable fine tuned control over the
@@ -102,11 +133,12 @@ private:
// SGID is used to map instructions to candidate SchedGroups
unsigned SGID;
+ // The different rules each instruction in this SchedGroup must conform to
+ SmallVector<std::shared_ptr<InstructionRule>, 4> Rules;
+
// Count of the number of created SchedGroups, used to initialize SGID.
static unsigned NumSchedGroups;
- ScheduleDAGInstrs *DAG;
-
const SIInstrInfo *TII;
// Try to add and edge from SU A to SU B.
@@ -120,6 +152,8 @@ public:
// Collection of SUnits that are classified as members of this group.
SmallVector<SUnit *, 32> Collection;
+ ScheduleDAGInstrs *DAG;
+
// Returns true if SU can be added to this SchedGroup.
bool canAddSU(SUnit &SU) const;
@@ -145,6 +179,28 @@ public:
// Returns true if no more instructions may be added to this group.
bool isFull() const { return MaxSize && Collection.size() >= *MaxSize; }
+ // Append a constraint that SUs must meet in order to fit into this
+ // SchedGroup. Since many rules involve the relationship between a SchedGroup
+ // and the SUnits in other SchedGroups, rules are checked at Pipeline Solve
+ // time (rather than SchedGroup init time.)
+ void addRule(std::shared_ptr<InstructionRule> NewRule) {
+ Rules.push_back(NewRule);
+ }
+
+ // Returns true if the SU matches all rules
+ bool allowedByRules(const SUnit *SU,
+ SmallVectorImpl<SchedGroup> &SyncPipe) const {
+ if (Rules.empty())
+ return true;
+ for (size_t I = 0; I < Rules.size(); I++) {
+ auto TheRule = Rules[I].get();
+ if (!TheRule->apply(SU, Collection, SyncPipe)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
// Add SU to the SchedGroup.
void add(SUnit &SU) {
LLVM_DEBUG(dbgs() << "For SchedGroup with mask "
@@ -177,13 +233,13 @@ public:
SchedGroup(SchedGroupMask SGMask, std::optional<unsigned> MaxSize,
ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
- : SGMask(SGMask), MaxSize(MaxSize), DAG(DAG), TII(TII) {
+ : SGMask(SGMask), MaxSize(MaxSize), TII(TII), DAG(DAG) {
SGID = NumSchedGroups++;
}
SchedGroup(SchedGroupMask SGMask, std::optional<unsigned> MaxSize, int SyncID,
ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
- : SGMask(SGMask), MaxSize(MaxSize), SyncID(SyncID), DAG(DAG), TII(TII) {
+ : SGMask(SGMask), MaxSize(MaxSize), SyncID(SyncID), TII(TII), DAG(DAG) {
SGID = NumSchedGroups++;
}
};
@@ -254,6 +310,9 @@ class PipelineSolver {
// How many branches we have explored
uint64_t BranchesExplored = 0;
+ // The direction in which we process the candidate SchedGroups per SU
+ bool IsBottomUp = 1;
+
// Update indices to fit next conflicting instruction
void advancePosition();
// Recede indices to attempt to find better fit for previous conflicting
@@ -264,19 +323,35 @@ class PipelineSolver {
bool solveExact();
// The polynomial time algorithm which attempts to find a good fit
bool solveGreedy();
+ // Find the best SchedGroup for the current SU using the heuristic given all
+ // current information. One step in the greedy algorithm. Templated against
+ // the SchedGroup iterator (either reverse or forward).
+ template <typename T>
+ void greedyFind(std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges, T I,
+ T E);
// Whether or not the current solution is optimal
bool checkOptimal();
// Populate the ready list, prioiritizing fewest missed edges first
- void populateReadyList(SUToCandSGsPair &CurrSU,
- SmallVectorImpl<std::pair<int, int>> &ReadyList,
- SmallVectorImpl<SchedGroup> &SyncPipeline);
+ // Templated against the SchedGroup iterator (either reverse or forward).
+ template <typename T>
+ void populateReadyList(SmallVectorImpl<std::pair<int, int>> &ReadyList, T I,
+ T E);
// Add edges corresponding to the SchedGroups as assigned by solver
void makePipeline();
+ // Link the SchedGroups in the best found pipeline.
+ // Tmplated against the SchedGroup iterator (either reverse or forward).
+ template <typename T> void linkSchedGroups(T I, T E);
// Add the edges from the SU to the other SchedGroups in pipeline, and
// return the number of edges missed.
int addEdges(SmallVectorImpl<SchedGroup> &SyncPipeline, SUnit *SU, int SGID,
std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges);
- // Remove the edges passed via AddedEdges
+ // Link the pipeline as if \p SU was in the SchedGroup with ID \p SGID. It
+ // returns the cost (in terms of missed pipeline edges), and tracks the edges
+ // added in \p AddedEdges
+ template <typename T>
+ int linkSUnit(SUnit *SU, int SGID,
+ std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges, T I, T E);
+ // Remove the edges passed via \p AddedEdges
void removeEdges(const std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges);
// Convert the passed in maps to arrays for bidirectional iterators
void convertSyncMapsToArrays();
@@ -290,9 +365,9 @@ public:
PipelineSolver(DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups,
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
- ScheduleDAGMI *DAG)
+ ScheduleDAGMI *DAG, bool IsBottomUp = 1)
: DAG(DAG), SyncedInstrs(SyncedInstrs),
- SyncedSchedGroups(SyncedSchedGroups) {
+ SyncedSchedGroups(SyncedSchedGroups), IsBottomUp(IsBottomUp) {
for (auto &PipelineInstrs : SyncedInstrs) {
if (PipelineInstrs.second.size() > 0) {
@@ -363,14 +438,28 @@ void PipelineSolver::convertSyncMapsToArrays() {
}
}
+template <typename T> void PipelineSolver::linkSchedGroups(T I, T E) {
+ for (; I != E; ++I) {
+ auto &GroupA = *I;
+ for (auto J = std::next(I); J != E; ++J) {
+ auto &GroupB = *J;
+ GroupA.link(GroupB);
+ }
+ }
+}
+
void PipelineSolver::makePipeline() {
// Preserve the order of barrier for subsequent SchedGroupBarrier mutations
for (auto &SyncPipeline : BestPipeline) {
+ LLVM_DEBUG(dbgs() << "Printing SchedGroups\n");
for (auto &SG : SyncPipeline) {
+ LLVM_DEBUG(dbgs() << "SchedGroup with SGID " << SG.getSGID()
+ << " has: \n");
SUnit *SGBarr = nullptr;
for (auto &SU : SG.Collection) {
if (SU->getInstr()->getOpcode() == AMDGPU::SCHED_GROUP_BARRIER)
SGBarr = SU;
+ LLVM_DEBUG(dbgs() << "SU(" << SU->NodeNum << ")\n");
}
// Command line requested IGroupLP doesn't have SGBarr
if (!SGBarr)
@@ -381,43 +470,47 @@ void PipelineSolver::makePipeline() {
}
for (auto &SyncPipeline : BestPipeline) {
- auto I = SyncPipeline.rbegin();
- auto E = SyncPipeline.rend();
- for (; I != E; ++I) {
- auto &GroupA = *I;
- for (auto J = std::next(I); J != E; ++J) {
- auto &GroupB = *J;
- GroupA.link(GroupB);
- }
- }
+ IsBottomUp ? linkSchedGroups(SyncPipeline.rbegin(), SyncPipeline.rend())
+ : linkSchedGroups(SyncPipeline.begin(), SyncPipeline.end());
}
}
-int PipelineSolver::addEdges(
- SmallVectorImpl<SchedGroup> &SyncPipeline, SUnit *SU, int SGID,
- std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges) {
- int AddedCost = 0;
+template <typename T>
+int PipelineSolver::linkSUnit(
+ SUnit *SU, int SGID, std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges,
+ T I, T E) {
bool MakePred = false;
-
- // The groups in the pipeline are in reverse order. Thus,
- // by traversing them from last to first, we are traversing
- // them in the order as they were introduced in the code. After we
- // pass the group the SU is being assigned to, it should be
- // linked as a predecessor of the subsequent SchedGroups
- auto GroupNo = (int)SyncPipeline.size() - 1;
- for (; GroupNo >= 0; GroupNo--) {
- if (SyncPipeline[GroupNo].getSGID() == SGID) {
+ int AddedCost = 0;
+ for (; I < E; ++I) {
+ if (I->getSGID() == SGID) {
MakePred = true;
continue;
}
- auto Group = &SyncPipeline[GroupNo];
- AddedCost += Group->link(*SU, MakePred, AddedEdges);
+ auto Group = *I;
+ AddedCost += Group.link(*SU, MakePred, AddedEdges);
assert(AddedCost >= 0);
}
-
return AddedCost;
}
+int PipelineSolver::addEdges(
+ SmallVectorImpl<SchedGroup> &SyncPipeline, SUnit *SU, int SGID,
+ std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges) {
+
+ // For IsBottomUp, the first SchedGroup in SyncPipeline contains the
+ // instructions that are the ultimate successors in the resultant mutation.
+ // Therefore, in such a configuration, the SchedGroups occurring before the
+ // candidate SGID are successors of the candidate SchedGroup, thus the current
+ // SU should be linked as a predecessor to SUs in those SchedGroups. The
+ // opposite is true if !IsBottomUp. IsBottomUp occurs in the case of multiple
+ // SCHED_GROUP_BARRIERS, or if a user specifies IGLP_OPT SchedGroups using
+ // IsBottomUp (in reverse).
+ return IsBottomUp ? linkSUnit(SU, SGID, AddedEdges, SyncPipeline.rbegin(),
+ SyncPipeline.rend())
+ : linkSUnit(SU, SGID, AddedEdges, SyncPipeline.begin(),
+ SyncPipeline.end());
+}
+
void PipelineSolver::removeEdges(
const std::vector<std::pair<SUnit *, SUnit *>> &EdgesToRemove) {
// Only remove the edges that we have added when testing
@@ -490,12 +583,13 @@ bool PipelineSolver::checkOptimal() {
return (DoneExploring || BestCost == 0);
}
+template <typename T>
void PipelineSolver::populateReadyList(
- SUToCandSGsPair &CurrSU, SmallVectorImpl<std::pair<int, int>> &ReadyList,
- SmallVectorImpl<SchedGroup> &SyncPipeline) {
+ SmallVectorImpl<std::pair<int, int>> &ReadyList, T I, T E) {
+ SUToCandSGsPair CurrSU = PipelineInstrs[CurrSyncGroupIdx][CurrConflInstNo];
+ auto SyncPipeline = CurrPipeline[CurrSyncGroupIdx];
assert(CurrSU.second.size() >= 1);
- auto I = CurrSU.second.rbegin();
- auto E = CurrSU.second.rend();
+
for (; I != E; ++I) {
std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
int CandSGID = *I;
@@ -545,7 +639,10 @@ bool PipelineSolver::solveExact() {
// SchedGroup -> Cost pairs
SmallVector<std::pair<int, int>, 4> ReadyList;
// Prioritize the candidate sched groups in terms of lowest cost first
- populateReadyList(CurrSU, ReadyList, CurrPipeline[CurrSyncGroupIdx]);
+ IsBottomUp ? populateReadyList(ReadyList, CurrSU.second.rbegin(),
+ CurrSU.second.rend())
+ : populateReadyList(ReadyList, CurrSU.second.begin(),
+ CurrSU.second.end());
auto I = ReadyList.begin();
auto E = ReadyList.end();
@@ -569,6 +666,9 @@ bool PipelineSolver::solveExact() {
if (Match->isFull())
continue;
+ if (!Match->allowedByRules(CurrSU.first, SyncPipeline))
+ continue;
+
LLVM_DEBUG(dbgs() << "Assigning to SchedGroup with Mask "
<< (int)Match->getMask() << "and ID " << CandSGID
<< "\n");
@@ -620,64 +720,75 @@ bool PipelineSolver::solveExact() {
return FinishedExploring;
}
-bool PipelineSolver::solveGreedy() {
- BestCost = 0;
- std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
+template <typename T>
+void PipelineSolver::greedyFind(
+ std::vector<std::pair<SUnit *, SUnit *>> &AddedEdges, T I, T E) {
+ SUToCandSGsPair CurrSU = PipelineInstrs[CurrSyncGroupIdx][CurrConflInstNo];
+ int BestNodeCost = -1;
+ int TempCost;
+ SchedGroup *BestGroup = nullptr;
+ int BestGroupID = -1;
+ auto &SyncPipeline = CurrPipeline[CurrSyncGroupIdx];
+ LLVM_DEBUG(dbgs() << "Fitting SU(" << CurrSU.first->NodeNum
+ << ") in Pipeline # " << CurrSyncGroupIdx << "\n");
- while (static_cast<size_t>(CurrSyncGroupIdx) < PipelineInstrs.size()) {
- SUToCandSGsPair CurrSU = PipelineInstrs[CurrSyncGroupIdx][CurrConflInstNo];
- int BestNodeCost = -1;
- int TempCost;
- SchedGroup *BestGroup = nullptr;
- int BestGroupID = -1;
- auto &SyncPipeline = CurrPipeline[CurrSyncGroupIdx];
- LLVM_DEBUG(dbgs() << "Fitting SU(" << CurrSU.first->NodeNum
- << ") in Pipeline # " << CurrSyncGroupIdx << "\n");
-
- // Since we have added the potential SchedGroups from bottom up, but
- // traversed the DAG from top down, parse over the groups from last to
- // first. If we fail to do this for the greedy algorithm, the solution will
- // likely not be good in more complex cases.
- auto I = CurrSU.second.rbegin();
- auto E = CurrSU.second.rend();
- for (; I != E; ++I) {
- std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
- int CandSGID = *I;
- SchedGroup *Match;
- for (auto &SG : SyncPipeline) {
- if (SG.getSGID() == CandSGID)
- Match = &SG;
- }
+ // Since we have added the potential SchedGroups from bottom up, but
+ // traversed the DAG from top down, parse over the groups from last to
+ // first. If we fail to do this for the greedy algorithm, the solution will
+ // likely not be good in more complex cases.
+ for (; I != E; ++I) {
+ std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
+ int CandSGID = *I;
+ SchedGroup *Match;
+ for (auto &SG : SyncPipeline) {
+ if (SG.getSGID() == CandSGID)
+ Match = &SG;
+ }
- LLVM_DEBUG(dbgs() << "Trying SGID # " << CandSGID << " with Mask "
- << (int)Match->getMask() << "\n");
+ LLVM_DEBUG(dbgs() << "Trying SGID # " << CandSGID << " with Mask "
+ << (int)Match->getMask() << "\n");
- if (Match->isFull()) {
- LLVM_DEBUG(dbgs() << "SGID # " << CandSGID << " is full\n");
- continue;
- }
- TempCost = addEdges(SyncPipeline, CurrSU.first, CandSGID, AddedEdges);
- LLVM_DEBUG(dbgs() << "Cost of Group " << TempCost << "\n");
- if (TempCost < BestNodeCost || BestNodeCost == -1) {
- BestGroup = Match;
- BestNodeCost = TempCost;
- BestGroupID = CandSGID;
- }
- removeEdges(AddedEdges);
- if (BestNodeCost == 0)
- break;
+ if (Match->isFull()) {
+ LLVM_DEBUG(dbgs() << "SGID # " << CandSGID << " is full\n");
+ continue;
+ }
+ if (!Match->allowedByRules(CurrSU.first, SyncPipeline)) {
+ LLVM_DEBUG(dbgs() << "SGID # " << CandSGID << " has conflicting rule\n");
+ continue;
}
+ TempCost = addEdges(SyncPipeline, CurrSU.first, CandSGID, AddedEdges);
+ LLVM_DEBUG(dbgs() << "Cost of Group " << TempCost << "\n");
+ if (TempCost < BestNodeCost || BestNodeCost == -1) {
+ BestGroup = Match;
+ BestNodeCost = TempCost;
+ BestGroupID = CandSGID;
+ }
+ removeEdges(AddedEdges);
+ if (BestNodeCost == 0)
+ break;
+ }
- if (BestGroupID != -1) {
- BestGroup->add(*CurrSU.first);
- addEdges(SyncPipeline, CurrSU.first, BestGroupID, AddedEdges);
- LLVM_DEBUG(dbgs() << "Best Group has ID: " << BestGroupID << " and Mask"
- << (int)BestGroup->getMask() << "\n");
- BestCost += TempCost;
- } else
- BestCost += MissPenalty;
+ if (BestGroupID != -1) {
+ BestGroup->add(*CurrSU.first);
+ addEdges(SyncPipeline, CurrSU.first, BestGroupID, AddedEdges);
+ LLVM_DEBUG(dbgs() << "Best Group has ID: " << BestGroupID << " and Mask"
+ << (int)BestGroup->getMask() << "\n");
+ BestCost += TempCost;
+ } else
+ BestCost += MissPenalty;
- CurrPipeline[CurrSyncGroupIdx] = SyncPipeline;
+ CurrPipeline[CurrSyncGroupIdx] = SyncPipeline;
+}
+
+bool PipelineSolver::solveGreedy() {
+ BestCost = 0;
+ std::vector<std::pair<SUnit *, SUnit *>> AddedEdges;
+
+ while (static_cast<size_t>(CurrSyncGroupIdx) < PipelineInstrs.size()) {
+ SUToCandSGsPair CurrSU = PipelineInstrs[CurrSyncGroupIdx][CurrConflInstNo];
+ IsBottomUp
+ ? greedyFind(AddedEdges, CurrSU.second.rbegin(), CurrSU.second.rend())
+ : greedyFind(AddedEdges, CurrSU.second.begin(), CurrSU.second.end());
advancePosition();
}
BestPipeline = CurrPipeline;
@@ -721,9 +832,14 @@ void PipelineSolver::solve() {
}
makePipeline();
+ LLVM_DEBUG(dbgs() << "After applying mutation\n");
+ LLVM_DEBUG(DAG->dump());
}
-enum IGLPStrategyID : int { MFMASmallGemmOptID = 0 };
+enum IGLPStrategyID : int {
+ MFMASmallGemmOptID = 0,
+ MFMASmallGemmSingleWaveOptID = 1,
+};
// Implement a IGLP scheduling strategy.
class IGLPStrategy {
@@ -741,6 +857,8 @@ public:
// Returns true if this strategy should be applied to a ScheduleDAG.
virtual bool shouldApplyStrategy(ScheduleDAGInstrs *DAG) = 0;
+ bool IsBottomUp = 1;
+
IGLPStrategy(ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
: DAG(DAG), TII(TII) {}
@@ -748,6 +866,7 @@ public:
};
class MFMASmallGemmOpt final : public IGLPStrategy {
+private:
public:
void applyIGLPStrategy(
DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
@@ -756,7 +875,9 @@ public:
bool shouldApplyStrategy(ScheduleDAGInstrs *DAG) override { return true; }
MFMASmallGemmOpt(ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
- : IGLPStrategy(DAG, TII) {}
+ : IGLPStrategy(DAG, TII) {
+ IsBottomUp = 1;
+ }
};
void MFMASmallGemmOpt::applyIGLPStrategy(
@@ -781,12 +902,456 @@ void MFMASmallGemmOpt::applyIGLPStrategy(
}
}
+class MFMASmallGemmSingleWaveOpt final : public IGLPStrategy {
+private:
+ // Whether the DS_READ is a predecessor of first four MFMA in region
+ class EnablesInitialMFMA final : public InstructionRule {
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+ if (!SyncPipe.size())
+ return false;
+ int MFMAsFound = 0;
+ if (!Cache->size()) {
+ for (auto &Elt : SyncPipe[0].DAG->SUnits) {
+ if (TII->isMFMAorWMMA(*Elt.getInstr())) {
+ ++MFMAsFound;
+ if (MFMAsFound > 4)
+ break;
+ Cache->push_back(&Elt);
+ }
+ }
+ }
+
+ assert(Cache->size());
+ auto DAG = SyncPipe[0].DAG;
+ for (auto &Elt : *Cache) {
+ if (DAG->IsReachable(Elt, const_cast<SUnit *>(SU)))
+ return true;
+ }
+ return false;
+ }
+
+ EnablesInitialMFMA(const SIInstrInfo *TII, unsigned SGID,
+ bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache) {}
+ };
+
+ // Whether the MI is a V_PERM and is a predecessor of a common DS_WRITE
+ class IsPermForDSW final : public InstructionRule {
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+ auto MI = SU->getInstr();
+ if (MI->getOpcode() != AMDGPU::V_PERM_B32_e64)
+ return false;
+
+ bool FitsInGroup = false;
+ // Does the VALU have a DS_WRITE successor
+ if (!Collection.size()) {
+ for (auto &Succ : SU->Succs) {
+ SUnit *SuccUnit = Succ.getSUnit();
+ if (TII->isDS(*SuccUnit->getInstr()) &&
+ SuccUnit->getInstr()->mayStore()) {
+ Cache->push_back(SuccUnit);
+ FitsInGroup = true;
+ }
+ }
+ return FitsInGroup;
+ }
+
+ assert(Cache->size());
+
+ // Does the VALU have a DS_WRITE successor that is the same as other
+ // VALU already in the group. The V_PERMs will all share 1 DS_W succ
+ return std::any_of(Cache->begin(), Cache->end(), [&SU](SUnit *Elt) {
+ return std::any_of(SU->Succs.begin(), SU->Succs.end(),
+ [&Elt](const SDep &ThisSucc) {
+ return ThisSucc.getSUnit() == Elt;
+ });
+ });
+ }
+
+ IsPermForDSW(const SIInstrInfo *TII, unsigned SGID, bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache) {}
+ };
+
+ // Whether the SU is a successor of any element in previous SchedGroup
+ class IsSuccOfPrevGroup final : public InstructionRule {
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+ SchedGroup *OtherGroup = nullptr;
+ for (auto &PipeSG : SyncPipe) {
+ if ((unsigned)PipeSG.getSGID() == SGID - 1) {
+ OtherGroup = &PipeSG;
+ }
+ }
+
+ if (!OtherGroup)
+ return false;
+ if (!OtherGroup->Collection.size())
+ return true;
+
+ // Does the previous VALU have this DS_Write as a successor
+ return (std::any_of(OtherGroup->Collection.begin(),
+ OtherGroup->Collection.end(), [&SU](SUnit *Elt) {
+ return std::any_of(Elt->Succs.begin(),
+ Elt->Succs.end(),
+ [&SU](SDep &Succ) {
+ return Succ.getSUnit() == SU;
+ });
+ }));
+ }
+ IsSuccOfPrevGroup(const SIInstrInfo *TII, unsigned SGID,
+ bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache) {}
+ };
+
+ // Whether the combined load width of group is 128 bits
+ class VMEMSize final : public InstructionRule {
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+ auto MI = SU->getInstr();
+ if (MI->getOpcode() == TargetOpcode::BUNDLE)
+ return false;
+ if (!Collection.size())
+ return true;
+
+ int NumBits = 0;
+
+ auto TRI = TII->getRegisterInfo();
+ auto &MRI = MI->getParent()->getParent()->getRegInfo();
+ for (auto &Elt : Collection) {
+ auto Op = Elt->getInstr()->getOperand(0);
+ auto Size =
+ TRI.getRegSizeInBits(*TRI.getRegClassForOperandReg(MRI, Op));
+ NumBits += Size;
+ }
+
+ if (NumBits < 128) {
+ assert(TII->isVMEM(*MI) && MI->mayLoad());
+ if (NumBits + TRI.getRegSizeInBits(*TRI.getRegClassForOperandReg(
+ MRI, MI->getOperand(0))) <=
+ 128)
+ return true;
+ }
+
+ return false;
+ }
+
+ VMEMSize(const SIInstrInfo *TII, unsigned SGID, bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache) {}
+ };
+
+ // Whether the SU shares a V_PERM predecessor with any SU in the SchedGroup
+ // that is /p Distance steps away
+ class SharesPredWithPrevNthGroup final : public InstructionRule {
+ private:
+ unsigned Distance = 1;
+
+ public:
+ bool apply(const SUnit *SU, const ArrayRef<SUnit *> Collection,
+ SmallVectorImpl<SchedGroup> &SyncPipe) override {
+ SchedGroup *OtherGroup = nullptr;
+ if (!SyncPipe.size())
+ return false;
+
+ if (!Cache->size()) {
+
+ for (auto &PipeSG : SyncPipe) {
+ if ((unsigned)PipeSG.getSGID() == SGID - Distance) {
+ OtherGroup = &PipeSG;
+ }
+ }
+
+ if (!OtherGroup)
+ return false;
+ if (!OtherGroup->Collection.size())
+ return true;
+
+ for (auto &OtherEle : OtherGroup->Collection) {
+ for (auto &Pred : OtherEle->Preds) {
+ if (Pred.getSUnit()->getInstr()->getOpcode() ==
+ AMDGPU::V_PERM_B32_e64)
+ Cache->push_back(Pred.getSUnit());
+ }
+ }
+ }
+
+ assert(Cache->size());
+ auto DAG = SyncPipe[0].DAG;
+ // Does the previous DS_WRITE share a V_PERM predecessor with this
+ // VMEM_READ
+ return (
+ std::any_of(Cache->begin(), Cache->end(), [&SU, &DAG](SUnit *Elt) {
+ return DAG->IsReachable(const_cast<SUnit *>(SU), Elt);
+ }));
+ }
+ SharesPredWithPrevNthGroup(unsigned Distance, const SIInstrInfo *TII,
+ unsigned SGID, bool NeedsCache = false)
+ : InstructionRule(TII, SGID, NeedsCache), Distance(Distance) {}
+ };
+
+public:
+ void applyIGLPStrategy(
+ DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
+ DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups) override;
+
+ bool shouldApplyStrategy(ScheduleDAGInstrs *DAG) override { return true; }
+
+ MFMASmallGemmSingleWaveOpt(ScheduleDAGInstrs *DAG, const SIInstrInfo *TII)
+ : IGLPStrategy(DAG, TII) {
+ IsBottomUp = 0;
+ }
+};
+
+void MFMASmallGemmSingleWaveOpt::applyIGLPStrategy(
+ DenseMap<int, SUnitsToCandidateSGsMap> &SyncedInstrs,
+ DenseMap<int, SmallVector<SchedGroup, 4>> &SyncedSchedGroups) {
+ unsigned MFMACount = 0;
+ unsigned DSWCount = 0;
+ unsigned DSWWithPermCount = 0;
+ unsigned DSWWithSharedVMEMCount = 0;
+ unsigned DSRCount = 0;
+ SmallVector<SUnit *, 6> DSWithPerms;
+ for (auto &SU : DAG->SUnits) {
+ auto I = SU.getInstr();
+ if (TII->isMFMAorWMMA(*I))
+ ++MFMACount;
+ else if (TII->isDS(*I)) {
+ if (I->mayLoad())
+ ++DSRCount;
+ else if (I->mayStore()) {
+ ++DSWCount;
+ for (auto Pred : SU.Preds) {
+ if (Pred.getSUnit()->getInstr()->getOpcode() ==
+ AMDGPU::V_PERM_B32_e64) {
+ DSWithPerms.push_back(&SU);
+ break;
+ }
+ }
+ }
+ }
+ }
+ DSWWithPermCount = DSWithPerms.size();
+ auto I = DSWithPerms.begin();
+ auto E = DSWithPerms.end();
+
+ // Get the count of DS_WRITES with V_PERM predecessors which
+ // have loop carried dependencies (WAR) on the same VMEM_READs.
+ // We consider partial overlap as a miss -- in other words,
+ // for a given DS_W, we only consider another DS_W as matching
+ // if there is a corresponding (in terms of the VMEM_R it uses) V_PERM pred
+ // for every V_PERM pred of this DS_W.
+ DenseMap<MachineInstr *, SUnit *> VMEMLookup;
+ SmallVector<SUnit *, 6> Counted;
+ for (; I != E; I++) {
+ SUnit *Cand = nullptr;
+ bool MissedAny = false;
+ for (auto &Pred : (*I)->Preds) {
+ if (Pred.getSUnit()->getInstr()->getOpcode() != AMDGPU::V_PERM_B32_e64)
+ continue;
+
+ if (Cand &&
+ std::find(Counted.begin(), Counted.end(), Cand) != Counted.end())
+ break;
+
+ for (auto &Succ : Pred.getSUnit()->Succs) {
+ auto MI = Succ.getSUnit()->getInstr();
+ if (!TII->isVMEM(*MI) || !MI->mayLoad())
+ continue;
+
+ if (MissedAny || !VMEMLookup.size()) {
+ MissedAny = true;
+ VMEMLookup[MI] = *I;
+ continue;
+ }
+
+ if (!VMEMLookup.contains(MI)) {
+ MissedAny = true;
+ VMEMLookup[MI] = *I;
+ continue;
+ }
+
+ Cand = VMEMLookup[MI];
+ if (std::find(Counted.begin(), Counted.end(), Cand) != Counted.end()) {
+ MissedAny = true;
+ break;
+ }
+ }
+ }
+ if (!MissedAny && Cand) {
+ DSWWithSharedVMEMCount += 2;
+ Counted.push_back(Cand);
+ Counted.push_back(*I);
+ }
+ }
+
+ assert(DSWWithSharedVMEMCount <= DSWWithPermCount);
+ SchedGroup *SG;
+ unsigned PipelineSyncID = 0;
+ // For kernels with V_PERM, there are enough VALU to mix in between MFMAs
+ if (DSWWithPermCount) {
+ for (unsigned I = 0; I < MFMACount; I++) {
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::VALU, 2, PipelineSyncID, DAG, TII);
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ }
+ }
+
+ PipelineSyncID = 1;
+ // Phase 1: Break up DS_READ and MFMA clusters.
+ // First DS_READ to make ready initial MFMA, then interleave MFMA with DS_READ
+ // prefetch
+
+ // Make ready initial MFMA
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::DS_READ, 4, PipelineSyncID, DAG, TII);
+ SG->addRule(std::make_shared<EnablesInitialMFMA>(TII, SG->getSGID(), true));
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ // Interleave MFMA with DS_READ prefetch
+ for (unsigned I = 0; I < DSRCount - 4; ++I) {
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::DS_READ, 1, PipelineSyncID, DAG, TII);
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ }
+
+ // Phase 2a: Loop carried dependency with V_PERM
+ // Schedule VPerm & DS_WRITE as closely as possible to the VMEM_READ they
+ // depend on. Interleave MFMA to keep XDL unit busy throughout.
+ for (unsigned I = 0; I < DSWWithPermCount - DSWWithSharedVMEMCount; ++I) {
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::VALU, 4, PipelineSyncID, DAG, TII);
+ SG->addRule(std::make_shared<IsPermForDSW>(TII, SG->getSGID(), true));
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::DS_WRITE, 1, PipelineSyncID, DAG, TII);
+ SG->addRule(std::make_shared<IsSuccOfPrevGroup>(TII, SG->getSGID(), false));
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::VMEM_READ, 4, PipelineSyncID, DAG, TII);
+ SG->addRule(std::make_shared<SharesPredWithPrevNthGroup>(
+ 1, TII, SG->getSGID(), true));
+ SG->addRule(std::make_shared<VMEMSize>(TII, SG->getSGID(), false));
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::VMEM_READ, 4, PipelineSyncID, DAG, TII);
+ SG->addRule(std::make_shared<SharesPredWithPrevNthGroup>(
+ 3, TII, SG->getSGID(), true));
+ SG->addRule(std::make_shared<VMEMSize>(TII, SG->getSGID(), false));
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ }
+
+ // Phase 2b: Loop carried dependency without V_PERM
+ // Schedule DS_WRITE as closely as possible to the VMEM_READ they depend on.
+ // Interleave MFMA to keep XDL unit busy throughout.
+ for (unsigned I = 0; I < DSWCount - DSWWithPermCount; I++) {
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::DS_WRITE, 1, PipelineSyncID, DAG, TII);
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::VMEM_READ, 4, PipelineSyncID, DAG, TII);
+ SG->addRule(std::make_shared<VMEMSize>(TII, SG->getSGID(), false));
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ }
+
+ // Phase 2c: Loop carried dependency with V_PERM, VMEM_READs are
+ // ultimately used by two DS_WRITE
+ // Schedule VPerm & DS_WRITE as closely as possible to the VMEM_READ they
+ // depend on. Interleave MFMA to keep XDL unit busy throughout.
+
+ for (unsigned I = 0; I < DSWWithSharedVMEMCount; ++I) {
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::VALU, 4, PipelineSyncID, DAG, TII);
+ SG->addRule(std::make_shared<IsPermForDSW>(TII, SG->getSGID(), true));
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::DS_WRITE, 1, PipelineSyncID, DAG, TII);
+ SG->addRule(std::make_shared<IsSuccOfPrevGroup>(TII, SG->getSGID(), false));
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::VALU, 4, PipelineSyncID, DAG, TII);
+ SG->addRule(std::make_shared<IsPermForDSW>(TII, SG->getSGID(), true));
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::DS_WRITE, 1, PipelineSyncID, DAG, TII);
+ SG->addRule(std::make_shared<IsSuccOfPrevGroup>(TII, SG->getSGID(), false));
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::VMEM_READ, 4, PipelineSyncID, DAG, TII);
+ SG->addRule(std::make_shared<SharesPredWithPrevNthGroup>(
+ 2, TII, SG->getSGID(), true));
+ SG->addRule(std::make_shared<VMEMSize>(TII, SG->getSGID(), false));
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::VMEM_READ, 4, PipelineSyncID, DAG, TII);
+ SG->addRule(std::make_shared<SharesPredWithPrevNthGroup>(
+ 4, TII, SG->getSGID(), true));
+ SG->addRule(std::make_shared<VMEMSize>(TII, SG->getSGID(), false));
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+
+ SG = &SyncedSchedGroups[PipelineSyncID].emplace_back(
+ SchedGroupMask::MFMA, 1, PipelineSyncID, DAG, TII);
+ SG->initSchedGroup(SyncedInstrs[SG->getSyncID()]);
+ }
+}
+
static std::unique_ptr<IGLPStrategy>
createIGLPStrategy(IGLPStrategyID ID, ScheduleDAGInstrs *DAG,
const SIInstrInfo *TII) {
switch (ID) {
case MFMASmallGemmOptID:
return std::make_unique<MFMASmallGemmOpt>(DAG, TII);
+ case MFMASmallGemmSingleWaveOptID:
+ return std::make_unique<MFMASmallGemmSingleWaveOpt>(DAG, TII);
}
llvm_unreachable("Unknown IGLPStrategyID");
@@ -829,6 +1394,13 @@ private:
public:
void apply(ScheduleDAGInstrs *DAGInstrs) override;
+ // The order in which the PipelineSolver should process the candidate
+ // SchedGroup for a PipelineInstr. BOTTOM_UP will try to add SUs to the last
+ // created SchedGroup first, and will consider that as the ultimate
+ // predecessor group when linking. TOP_DOWN instead links and processes the
+ // first created SchedGroup first.
+ bool IsBottomUp = 1;
+
IGroupLPDAGMutation() = default;
};
@@ -908,6 +1480,7 @@ int SchedGroup::link(SUnit &SU, bool MakePred,
if (DAG->IsReachable(B, A))
continue;
+
// tryAddEdge returns false if there is a dependency that makes adding
// the A->B edge impossible, otherwise it returns true;
bool Added = tryAddEdge(A, B);
@@ -1034,7 +1607,7 @@ void IGroupLPDAGMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
}
if (foundSB || foundIGLP) {
- PipelineSolver PS(SyncedSchedGroups, SyncedInstrs, DAG);
+ PipelineSolver PS(SyncedSchedGroups, SyncedInstrs, DAG, IsBottomUp);
// PipelineSolver performs the mutation by adding the edges it
// determined as the best
PS.solve();
@@ -1114,8 +1687,10 @@ void IGroupLPDAGMutation::initIGLPOpt(SUnit &SU) {
IGLPStrategyID StrategyID =
(IGLPStrategyID)SU.getInstr()->getOperand(0).getImm();
auto S = createIGLPStrategy(StrategyID, DAG, TII);
- if (S->shouldApplyStrategy(DAG))
+ if (S->shouldApplyStrategy(DAG)) {
+ IsBottomUp = S->IsBottomUp;
S->applyIGLPStrategy(SyncedInstrs, SyncedSchedGroups);
+ }
}
} // namespace
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 42d1f58e4239..825c6f0acd0f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -20,7 +20,7 @@
#include "MCTargetDesc/R600MCTargetDesc.h"
#include "R600RegisterInfo.h"
#include "SIMachineFunctionInfo.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/InitializePasses.h"
+#include "llvm/Support/ErrorHandling.h"
#ifdef EXPENSIVE_CHECKS
#include "llvm/Analysis/LoopInfo.h"
@@ -101,7 +102,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",
"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
-INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
#ifdef EXPENSIVE_CHECKS
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
@@ -131,7 +132,7 @@ bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
}
#endif
Subtarget = &MF.getSubtarget<GCNSubtarget>();
- Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction());
+ Mode = SIModeRegisterDefaults(MF.getFunction());
return SelectionDAGISel::runOnMachineFunction(MF);
}
@@ -167,6 +168,7 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
case ISD::FFLOOR:
case ISD::FMINNUM:
case ISD::FMAXNUM:
+ case ISD::FLDEXP:
case AMDGPUISD::FRACT:
case AMDGPUISD::CLAMP:
case AMDGPUISD::COS_HW:
@@ -178,7 +180,6 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
case AMDGPUISD::RCP:
case AMDGPUISD::RSQ:
case AMDGPUISD::RCP_IFLAG:
- case AMDGPUISD::LDEXP:
// On gfx10, all 16-bit instructions preserve the high bits.
return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9;
case ISD::FP_ROUND:
@@ -199,7 +200,7 @@ bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const {
void AMDGPUDAGToDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AMDGPUArgumentUsageInfo>();
- AU.addRequired<LegacyDivergenceAnalysis>();
+ AU.addRequired<UniformityInfoWrapperPass>();
#ifdef EXPENSIVE_CHECKS
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
@@ -503,10 +504,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
// isa<MemSDNode> almost works but is slightly too permissive for some DS
// intrinsics.
if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) ||
- (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
- Opc == ISD::ATOMIC_LOAD_FADD ||
- Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
- Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) {
+ Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
+ Opc == AMDGPUISD::ATOMIC_LOAD_FMAX) {
N = glueCopyToM0LDSInit(N);
SelectCode(N);
return;
@@ -528,8 +527,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
SelectADD_SUB_I64(N);
return;
}
- case ISD::ADDCARRY:
- case ISD::SUBCARRY:
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY:
if (N->getValueType(0) != MVT::i32)
break;
@@ -665,10 +664,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::BRCOND:
SelectBRCOND(N);
return;
- case ISD::FMAD:
- case ISD::FMA:
- SelectFMAD_FMA(N);
- return;
case AMDGPUISD::CVT_PKRTZ_F16_F32:
case AMDGPUISD::CVT_PKNORM_I16_F32:
case AMDGPUISD::CVT_PKNORM_U16_F32:
@@ -714,11 +709,11 @@ bool AMDGPUDAGToDAGISel::isUnneededShiftMask(const SDNode *N,
assert(N->getOpcode() == ISD::AND);
const APInt &RHS = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
- if (RHS.countTrailingOnes() >= ShAmtBits)
+ if (RHS.countr_one() >= ShAmtBits)
return true;
const APInt &LHSKnownZeros = CurDAG->computeKnownBits(N->getOperand(0)).Zero;
- return (LHSKnownZeros | RHS).countTrailingOnes() >= ShAmtBits;
+ return (LHSKnownZeros | RHS).countr_one() >= ShAmtBits;
}
static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr,
@@ -813,7 +808,7 @@ SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val,
return SDValue(Mov, 0);
}
-// FIXME: Should only handle addcarry/subcarry
+// FIXME: Should only handle uaddo_carry/usubo_carry
void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
SDLoc DL(N);
SDValue LHS = N->getOperand(0);
@@ -890,15 +885,15 @@ void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) {
SDValue CI = N->getOperand(2);
if (N->isDivergent()) {
- unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64
- : AMDGPU::V_SUBB_U32_e64;
+ unsigned Opc = N->getOpcode() == ISD::UADDO_CARRY ? AMDGPU::V_ADDC_U32_e64
+ : AMDGPU::V_SUBB_U32_e64;
CurDAG->SelectNodeTo(
N, Opc, N->getVTList(),
{LHS, RHS, CI,
CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
} else {
- unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO
- : AMDGPU::S_SUB_CO_PSEUDO;
+ unsigned Opc = N->getOpcode() == ISD::UADDO_CARRY ? AMDGPU::S_ADD_CO_PSEUDO
+ : AMDGPU::S_SUB_CO_PSEUDO;
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI});
}
}
@@ -913,8 +908,8 @@ void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E;
++UI)
if (UI.getUse().getResNo() == 1) {
- if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) ||
- (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) {
+ if ((IsAdd && (UI->getOpcode() != ISD::UADDO_CARRY)) ||
+ (!IsAdd && (UI->getOpcode() != ISD::USUBO_CARRY))) {
IsVALU = true;
break;
}
@@ -1141,6 +1136,15 @@ bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0,
return CurDAG->SignBitIsZero(Base);
}
+bool AMDGPUDAGToDAGISel::isFlatScratchBaseLegal(SDValue Base,
+ uint64_t FlatVariant) const {
+ if (FlatVariant != SIInstrFlags::FlatScratch)
+ return true;
+ // When value in 32-bit Base can be negative calculate scratch offset using
+ // 32-bit add instruction, otherwise use Base(unsigned) + offset.
+ return CurDAG->SignBitIsZero(Base);
+}
+
// TODO: If offset is too big, put low 16-bit into offset.
bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
SDValue &Offset0,
@@ -1283,7 +1287,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
Ptr = N2;
VAddr = N3;
}
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
} else if (N0->isDivergent()) {
// N0 is divergent. Use it as the addr64, and construct the resource from a
// 0 address.
@@ -1299,18 +1303,18 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr,
if (!C1) {
// No offset.
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
return true;
}
if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
// Legal offset for instruction.
- Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
+ Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
return true;
}
// Illegal offset, store it in soffset.
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
SOffset =
SDValue(CurDAG->getMachineNode(
AMDGPU::S_MOV_B32, DL, MVT::i32,
@@ -1377,13 +1381,15 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS);
// Don't fold null pointer.
if (Imm != NullPtr) {
- SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+ SDValue HighBits =
+ CurDAG->getTargetConstant(Imm & ~MaxOffset, DL, MVT::i32);
MachineSDNode *MovHighBits = CurDAG->getMachineNode(
AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits);
VAddr = SDValue(MovHighBits, 0);
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
- ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
+ ImmOffset = CurDAG->getTargetConstant(Imm & MaxOffset, DL, MVT::i32);
return true;
}
}
@@ -1414,14 +1420,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
(!Subtarget->privateMemoryResourceIsRangeChecked() ||
CurDAG->SignBitIsZero(N0))) {
std::tie(VAddr, SOffset) = foldFrameIndex(N0);
- ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
+ ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
return true;
}
}
// (node)
std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
- ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
return true;
}
@@ -1450,7 +1456,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
if (IsCopyFromSGPR(*TRI, Addr)) {
SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
SOffset = Addr;
- Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
+ Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
return true;
}
@@ -1474,7 +1480,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
- Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
+ Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i32);
return true;
}
@@ -1532,7 +1538,8 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr,
if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) {
SDValue N0, N1;
- if (isBaseWithConstantOffset64(Addr, N0, N1)) {
+ if (isBaseWithConstantOffset64(Addr, N0, N1) &&
+ isFlatScratchBaseLegal(N0, FlatVariant)) {
int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
const SIInstrInfo *TII = Subtarget->getInstrInfo();
@@ -1764,7 +1771,8 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr,
int64_t COffsetVal = 0;
- if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ if (CurDAG->isBaseWithConstantOffset(Addr) &&
+ isFlatScratchBaseLegal(Addr.getOperand(0))) {
COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
SAddr = Addr.getOperand(0);
} else {
@@ -1842,6 +1850,8 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32));
VAddr = SDValue(VMov, 0);
SAddr = LHS;
+ if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
+ return false;
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset))
return false;
Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16);
@@ -1866,6 +1876,9 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
return false;
}
+ if (!isFlatScratchBaseLegal(SAddr) || !isFlatScratchBaseLegal(VAddr))
+ return false;
+
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
return false;
SAddr = SelectSAddrFI(CurDAG, SAddr);
@@ -2283,52 +2296,6 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
VCC.getValue(0));
}
-void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
- MVT VT = N->getSimpleValueType(0);
- bool IsFMA = N->getOpcode() == ISD::FMA;
- if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
- !Subtarget->hasFmaMixInsts()) ||
- ((IsFMA && Subtarget->hasMadMixInsts()) ||
- (!IsFMA && Subtarget->hasFmaMixInsts()))) {
- SelectCode(N);
- return;
- }
-
- SDValue Src0 = N->getOperand(0);
- SDValue Src1 = N->getOperand(1);
- SDValue Src2 = N->getOperand(2);
- unsigned Src0Mods, Src1Mods, Src2Mods;
-
- // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
- // using the conversion from f16.
- bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
- bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
- bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
-
- assert((IsFMA || !Mode.allFP32Denormals()) &&
- "fmad selected with denormals enabled");
- // TODO: We can select this with f32 denormals enabled if all the sources are
- // converted from f16 (in which case fmad isn't legal).
-
- if (Sel0 || Sel1 || Sel2) {
- // For dummy operands.
- SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
- SDValue Ops[] = {
- CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
- CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
- CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
- CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
- Zero, Zero
- };
-
- CurDAG->SelectNodeTo(N,
- IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
- MVT::f32, Ops);
- } else {
- SelectCode(N);
- }
-}
-
void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
// The address is assumed to be uniform, so if it ends up in a VGPR, it will
// be copied to an SGPR with readfirstlane.
@@ -2562,6 +2529,18 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_interp_p1_f16:
SelectInterpP1F16(N);
return;
+ case Intrinsic::amdgcn_inverse_ballot:
+ switch (N->getOperand(1).getValueSizeInBits()) {
+ case 32:
+ Opcode = AMDGPU::S_INVERSE_BALLOT_U32;
+ break;
+ case 64:
+ Opcode = AMDGPU::S_INVERSE_BALLOT_U64;
+ break;
+ default:
+ llvm_unreachable("Unsupported size for inverse ballot mask.");
+ }
+ break;
default:
SelectCode(N);
return;
@@ -2591,13 +2570,22 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
unsigned &Mods,
+ bool IsCanonicalizing,
bool AllowAbs) const {
- Mods = 0;
+ Mods = SISrcMods::NONE;
Src = In;
if (Src.getOpcode() == ISD::FNEG) {
Mods |= SISrcMods::NEG;
Src = Src.getOperand(0);
+ } else if (Src.getOpcode() == ISD::FSUB && IsCanonicalizing) {
+ // Fold fsub [+-]0 into fneg. This may not have folded depending on the
+ // denormal mode, but we're implicitly canonicalizing in a source operand.
+ auto *LHS = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
+ if (LHS && LHS->isZero()) {
+ Mods |= SISrcMods::NEG;
+ Src = Src.getOperand(1);
+ }
}
if (AllowAbs && Src.getOpcode() == ISD::FABS) {
@@ -2611,7 +2599,20 @@ bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods;
- if (SelectVOP3ModsImpl(In, Src, Mods)) {
+ if (SelectVOP3ModsImpl(In, Src, Mods, /*IsCanonicalizing=*/true,
+ /*AllowAbs=*/true)) {
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+ }
+
+ return false;
+}
+
+bool AMDGPUDAGToDAGISel::SelectVOP3ModsNonCanonicalizing(
+ SDValue In, SDValue &Src, SDValue &SrcMods) const {
+ unsigned Mods;
+ if (SelectVOP3ModsImpl(In, Src, Mods, /*IsCanonicalizing=*/false,
+ /*AllowAbs=*/true)) {
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;
}
@@ -2622,7 +2623,9 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods;
- if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
+ if (SelectVOP3ModsImpl(In, Src, Mods,
+ /*IsCanonicalizing=*/true,
+ /*AllowAbs=*/false)) {
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;
}
@@ -2642,7 +2645,9 @@ bool AMDGPUDAGToDAGISel::SelectVINTERPModsImpl(SDValue In, SDValue &Src,
SDValue &SrcMods,
bool OpSel) const {
unsigned Mods;
- if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) {
+ if (SelectVOP3ModsImpl(In, Src, Mods,
+ /*IsCanonicalizing=*/true,
+ /*AllowAbs=*/false)) {
if (OpSel)
Mods |= SISrcMods::OP_SEL_0;
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
@@ -2695,9 +2700,10 @@ bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
SDValue &SrcMods, bool IsDOT) const {
- unsigned Mods = 0;
+ unsigned Mods = SISrcMods::NONE;
Src = In;
+ // TODO: Handle G_FSUB 0 as fneg
if (Src.getOpcode() == ISD::FNEG) {
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
Src = Src.getOperand(0);
@@ -2776,7 +2782,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF()
.bitcastToAPInt().getZExtValue();
if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) {
- Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);;
+ Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
return true;
}
@@ -2804,7 +2810,7 @@ bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const {
assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
unsigned Mods = SISrcMods::OP_SEL_1;
- unsigned SrcSign = C->getAPIntValue().getZExtValue();
+ unsigned SrcSign = C->getZExtValue();
if (SrcSign == 1)
Mods ^= SISrcMods::NEG;
@@ -2818,7 +2824,7 @@ bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
unsigned Mods = SISrcMods::OP_SEL_1;
- unsigned SrcVal = C->getAPIntValue().getZExtValue();
+ unsigned SrcVal = C->getZExtValue();
if (SrcVal == 1)
Mods |= SISrcMods::OP_SEL_0;
@@ -2883,6 +2889,15 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
return false;
}
+bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const {
+ unsigned Mods = 0;
+ if (!SelectVOP3PMadMixModsImpl(In, Src, Mods))
+ return false;
+ SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
unsigned Mods = 0;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index 162b0340a6aa..0605baf3a0cc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -16,6 +16,7 @@
#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
+#include "SIModeRegisterDefaults.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetMachine.h"
@@ -24,11 +25,7 @@ using namespace llvm;
namespace {
static inline bool isNullConstantOrUndef(SDValue V) {
- if (V.isUndef())
- return true;
-
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V);
- return Const != nullptr && Const->isZero();
+ return V.isUndef() || isNullConstant(V);
}
static inline bool getConstantValue(SDValue N, uint32_t &Out) {
@@ -82,7 +79,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
const GCNSubtarget *Subtarget;
// Default FP mode for the current function.
- AMDGPU::SIModeRegisterDefaults Mode;
+ SIModeRegisterDefaults Mode;
bool EnableLateStructurizeCFG;
@@ -157,6 +154,9 @@ private:
bool isDSOffsetLegal(SDValue Base, unsigned Offset) const;
bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1,
unsigned Size) const;
+ bool isFlatScratchBaseLegal(
+ SDValue Base, uint64_t FlatVariant = SIInstrFlags::FlatScratch) const;
+
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
SDValue &Offset1) const;
@@ -216,8 +216,11 @@ private:
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods,
+ bool IsCanonicalizing = true,
bool AllowAbs = true) const;
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
+ bool SelectVOP3ModsNonCanonicalizing(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const;
bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
@@ -247,6 +250,8 @@ private:
bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
unsigned &Mods) const;
+ bool SelectVOP3PMadMixModsExt(SDValue In, SDValue &Src,
+ SDValue &SrcMods) const;
bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
SDValue getHi16Elt(SDValue In) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 8121b381e83f..254d02d4ce5b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -16,12 +16,13 @@
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUMachineFunction.h"
-#include "GCNSubtarget.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Target/TargetMachine.h"
@@ -138,6 +139,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::LOAD, MVT::v16f64, Promote);
AddPromotedToType(ISD::LOAD, MVT::v16f64, MVT::v32i32);
+ setOperationAction(ISD::LOAD, MVT::i128, Promote);
+ AddPromotedToType(ISD::LOAD, MVT::i128, MVT::v4i32);
+
// There are no 64-bit extloads. These should be done as a 32-bit extload and
// an extension to 64-bit.
for (MVT VT : MVT::integer_valuetypes())
@@ -264,6 +268,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STORE, MVT::v16f64, Promote);
AddPromotedToType(ISD::STORE, MVT::v16f64, MVT::v32i32);
+ setOperationAction(ISD::STORE, MVT::i128, Promote);
+ AddPromotedToType(ISD::STORE, MVT::i128, MVT::v4i32);
+
setTruncStoreAction(MVT::i64, MVT::i1, Expand);
setTruncStoreAction(MVT::i64, MVT::i8, Expand);
setTruncStoreAction(MVT::i64, MVT::i16, Expand);
@@ -321,14 +328,15 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
// Library functions. These default to Expand, but we have instructions
// for them.
- setOperationAction({ISD::FCEIL, ISD::FEXP2, ISD::FPOW, ISD::FLOG2, ISD::FABS,
- ISD::FFLOOR, ISD::FRINT, ISD::FTRUNC, ISD::FMINNUM,
- ISD::FMAXNUM},
+ setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR, ISD::FRINT,
+ ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM},
MVT::f32, Legal);
+ setOperationAction(ISD::FLOG2, MVT::f32, Custom);
setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom);
- setOperationAction({ISD::FLOG, ISD::FLOG10, ISD::FEXP}, MVT::f32, Custom);
+ setOperationAction({ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2}, MVT::f32,
+ Custom);
setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom);
@@ -338,8 +346,12 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
if (Subtarget->has16BitInsts())
setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal);
- else
+ else {
setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal);
+ setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom);
+ }
+
+ setOperationAction({ISD::FLOG10, ISD::FLOG, ISD::FEXP}, MVT::f16, Custom);
// FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches
// scalarization code. Can be removed when IS_FPCLASS expand isn't called by
@@ -556,7 +568,7 @@ bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const {
//===----------------------------------------------------------------------===//
LLVM_READNONE
-static bool fnegFoldsIntoOp(unsigned Opc) {
+static bool fnegFoldsIntoOpcode(unsigned Opc) {
switch (Opc) {
case ISD::FADD:
case ISD::FSUB:
@@ -567,6 +579,7 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
case ISD::FMAXNUM:
case ISD::FMINNUM_IEEE:
case ISD::FMAXNUM_IEEE:
+ case ISD::SELECT:
case ISD::FSIN:
case ISD::FTRUNC:
case ISD::FRINT:
@@ -582,17 +595,45 @@ static bool fnegFoldsIntoOp(unsigned Opc) {
case AMDGPUISD::FMED3:
// TODO: handle llvm.amdgcn.fma.legacy
return true;
+ case ISD::BITCAST:
+ llvm_unreachable("bitcast is special cased");
default:
return false;
}
}
+static bool fnegFoldsIntoOp(const SDNode *N) {
+ unsigned Opc = N->getOpcode();
+ if (Opc == ISD::BITCAST) {
+ // TODO: Is there a benefit to checking the conditions performFNegCombine
+ // does? We don't for the other cases.
+ SDValue BCSrc = N->getOperand(0);
+ if (BCSrc.getOpcode() == ISD::BUILD_VECTOR) {
+ return BCSrc.getNumOperands() == 2 &&
+ BCSrc.getOperand(1).getValueSizeInBits() == 32;
+ }
+
+ return BCSrc.getOpcode() == ISD::SELECT && BCSrc.getValueType() == MVT::f32;
+ }
+
+ return fnegFoldsIntoOpcode(Opc);
+}
+
/// \p returns true if the operation will definitely need to use a 64-bit
/// encoding, and thus will use a VOP3 encoding regardless of the source
/// modifiers.
LLVM_READONLY
static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) {
- return N->getNumOperands() > 2 || VT == MVT::f64;
+ return (N->getNumOperands() > 2 && N->getOpcode() != ISD::SELECT) ||
+ VT == MVT::f64;
+}
+
+/// Return true if v_cndmask_b32 will support fabs/fneg source modifiers for the
+/// type for ISD::SELECT.
+LLVM_READONLY
+static bool selectSupportsSourceMods(const SDNode *N) {
+ // TODO: Only applies if select will be vector
+ return N->getValueType(0) == MVT::f32;
}
// Most FP instructions support source modifiers, but this could be refined
@@ -604,7 +645,6 @@ static bool hasSourceMods(const SDNode *N) {
switch (N->getOpcode()) {
case ISD::CopyToReg:
- case ISD::SELECT:
case ISD::FDIV:
case ISD::FREM:
case ISD::INLINEASM:
@@ -629,6 +669,8 @@ static bool hasSourceMods(const SDNode *N) {
return true;
}
}
+ case ISD::SELECT:
+ return selectSupportsSourceMods(N);
default:
return true;
}
@@ -644,6 +686,8 @@ bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N,
unsigned NumMayIncreaseSize = 0;
MVT VT = N->getValueType(0).getScalarType().getSimpleVT();
+ assert(!N->use_empty());
+
// XXX - Should this limit number of uses to check?
for (const SDNode *U : N->uses()) {
if (!hasSourceMods(U))
@@ -800,6 +844,17 @@ SDValue AMDGPUTargetLowering::getNegatedExpression(
return SDValue();
break;
}
+ case AMDGPUISD::RCP: {
+ SDValue Src = Op.getOperand(0);
+ EVT VT = Op.getValueType();
+ SDLoc SL(Op);
+
+ SDValue NegSrc = getNegatedExpression(Src, DAG, LegalOperations,
+ ForCodeSize, Cost, Depth + 1);
+ if (NegSrc)
+ return DAG.getNode(AMDGPUISD::RCP, SL, VT, NegSrc, Op->getFlags());
+ return SDValue();
+ }
default:
break;
}
@@ -827,7 +882,7 @@ bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16;
}
-bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
+bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,
unsigned NumElem,
unsigned AS) const {
return true;
@@ -888,10 +943,6 @@ bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
return Src == MVT::i32 && Dest == MVT::i64;
}
-bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
- return isZExtFree(Val.getValueType(), VT2);
-}
-
bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
// There aren't really 64-bit registers, but pairs of 32-bit ones and only a
// limited number of native 64-bit operations. Shrinking an operation to fit
@@ -1021,7 +1072,7 @@ void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(
const Function &Fn = MF.getFunction();
LLVMContext &Ctx = Fn.getParent()->getContext();
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF);
- const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(Fn);
+ const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset();
CallingConv::ID CC = Fn.getCallingConv();
Align MaxAlign = Align(1);
@@ -1258,12 +1309,15 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
return LowerFROUNDEVEN(Op, DAG);
case ISD::FROUND: return LowerFROUND(Op, DAG);
case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
+ case ISD::FLOG2:
+ return LowerFLOG2(Op, DAG);
case ISD::FLOG:
- return LowerFLOG(Op, DAG, numbers::ln2f);
case ISD::FLOG10:
- return LowerFLOG(Op, DAG, numbers::ln2f / numbers::ln10f);
+ return LowerFLOGCommon(Op, DAG);
case ISD::FEXP:
return lowerFEXP(Op, DAG);
+ case ISD::FEXP2:
+ return lowerFEXP2(Op, DAG);
case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
@@ -1292,6 +1346,23 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
// ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
// nothing here and let the illegal result integer be handled normally.
return;
+ case ISD::FLOG2:
+ if (SDValue Lowered = LowerFLOG2(SDValue(N, 0), DAG))
+ Results.push_back(Lowered);
+ return;
+ case ISD::FLOG:
+ case ISD::FLOG10:
+ if (SDValue Lowered = LowerFLOGCommon(SDValue(N, 0), DAG))
+ Results.push_back(Lowered);
+ return;
+ case ISD::FEXP2:
+ if (SDValue Lowered = lowerFEXP2(SDValue(N, 0), DAG))
+ Results.push_back(Lowered);
+ return;
+ case ISD::FEXP:
+ if (SDValue Lowered = lowerFEXP(SDValue(N, 0), DAG))
+ Results.push_back(Lowered);
+ return;
default:
return;
}
@@ -1305,6 +1376,13 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = G->getGlobal();
+ if (!MFI->isModuleEntryFunction()) {
+ if (std::optional<uint32_t> Address =
+ AMDGPUMachineFunction::getLDSAbsoluteAddress(*GV)) {
+ return DAG.getConstant(*Address, SDLoc(Op), Op.getValueType());
+ }
+ }
+
if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
if (!MFI->isModuleEntryFunction() &&
@@ -1378,43 +1456,60 @@ SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
-
+ SDLoc SL(Op);
SmallVector<SDValue, 8> Args;
unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
EVT VT = Op.getValueType();
EVT SrcVT = Op.getOperand(0).getValueType();
- // For these types, we have some TableGen patterns except if the index is 1
- if (((SrcVT == MVT::v4f16 && VT == MVT::v2f16) ||
- (SrcVT == MVT::v4i16 && VT == MVT::v2i16)) &&
- Start != 1)
- return Op;
-
- if (((SrcVT == MVT::v8f16 && VT == MVT::v4f16) ||
- (SrcVT == MVT::v8i16 && VT == MVT::v4i16)) &&
- (Start == 0 || Start == 4))
- return Op;
+ if (VT.getScalarSizeInBits() == 16 && Start % 2 == 0) {
+ unsigned NumElt = VT.getVectorNumElements();
+ unsigned NumSrcElt = SrcVT.getVectorNumElements();
+ assert(NumElt % 2 == 0 && NumSrcElt % 2 == 0 && "expect legal types");
+
+ // Extract 32-bit registers at a time.
+ EVT NewSrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumSrcElt / 2);
+ EVT NewVT = NumElt == 2
+ ? MVT::i32
+ : EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElt / 2);
+ SDValue Tmp = DAG.getNode(ISD::BITCAST, SL, NewSrcVT, Op.getOperand(0));
+
+ DAG.ExtractVectorElements(Tmp, Args, Start / 2, NumElt / 2);
+ if (NumElt == 2)
+ Tmp = Args[0];
+ else
+ Tmp = DAG.getBuildVector(NewVT, SL, Args);
- if (((SrcVT == MVT::v16f16 && VT == MVT::v8f16) ||
- (SrcVT == MVT::v16i16 && VT == MVT::v8i16)) &&
- (Start == 0 || Start == 8))
- return Op;
+ return DAG.getNode(ISD::BITCAST, SL, VT, Tmp);
+ }
DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
VT.getVectorNumElements());
- return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Args);
+ return DAG.getBuildVector(Op.getValueType(), SL, Args);
}
-/// Generate Min/Max node
-SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
- SDValue LHS, SDValue RHS,
- SDValue True, SDValue False,
- SDValue CC,
- DAGCombinerInfo &DCI) const {
- if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
- return SDValue();
+// TODO: Handle fabs too
+static SDValue peekFNeg(SDValue Val) {
+ if (Val.getOpcode() == ISD::FNEG)
+ return Val.getOperand(0);
+
+ return Val;
+}
+
+static SDValue peekFPSignOps(SDValue Val) {
+ if (Val.getOpcode() == ISD::FNEG)
+ Val = Val.getOperand(0);
+ if (Val.getOpcode() == ISD::FABS)
+ Val = Val.getOperand(0);
+ if (Val.getOpcode() == ISD::FCOPYSIGN)
+ Val = Val.getOperand(0);
+ return Val;
+}
+SDValue AMDGPUTargetLowering::combineFMinMaxLegacyImpl(
+ const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True,
+ SDValue False, SDValue CC, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
switch (CCOpcode) {
@@ -1480,6 +1575,45 @@ SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
return SDValue();
}
+/// Generate Min/Max node
+SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT,
+ SDValue LHS, SDValue RHS,
+ SDValue True, SDValue False,
+ SDValue CC,
+ DAGCombinerInfo &DCI) const {
+ if ((LHS == True && RHS == False) || (LHS == False && RHS == True))
+ return combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, True, False, CC, DCI);
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ // If we can't directly match this, try to see if we can fold an fneg to
+ // match.
+
+ ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
+ ConstantFPSDNode *CFalse = dyn_cast<ConstantFPSDNode>(False);
+ SDValue NegTrue = peekFNeg(True);
+
+ // Undo the combine foldFreeOpFromSelect does if it helps us match the
+ // fmin/fmax.
+ //
+ // select (fcmp olt (lhs, K)), (fneg lhs), -K
+ // -> fneg (fmin_legacy lhs, K)
+ //
+ // TODO: Use getNegatedExpression
+ if (LHS == NegTrue && CFalse && CRHS) {
+ APFloat NegRHS = neg(CRHS->getValueAPF());
+ if (NegRHS == CFalse->getValueAPF()) {
+ SDValue Combined =
+ combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, NegTrue, False, CC, DCI);
+ if (Combined)
+ return DAG.getNode(ISD::FNEG, DL, VT, Combined);
+ return SDValue();
+ }
+ }
+
+ return SDValue();
+}
+
std::pair<SDValue, SDValue>
AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
@@ -1749,7 +1883,8 @@ SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG,
bool UseFmadFtz = false;
if (Subtarget->isGCN()) {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- UseFmadFtz = MFI->getMode().allFP32Denormals();
+ UseFmadFtz =
+ MFI->getMode().FP32Denormals != DenormalMode::getPreserveSign();
}
// float fr = mad(fqneg, fb, fa);
@@ -1811,13 +1946,13 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
SDValue Zero = DAG.getConstant(0, DL, HalfVT);
//HiLo split
+ SDValue LHS_Lo, LHS_Hi;
SDValue LHS = Op.getOperand(0);
- SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero);
- SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, One);
+ std::tie(LHS_Lo, LHS_Hi) = DAG.SplitScalar(LHS, DL, HalfVT, HalfVT);
+ SDValue RHS_Lo, RHS_Hi;
SDValue RHS = Op.getOperand(1);
- SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero);
- SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, One);
+ std::tie(RHS_Lo, RHS_Hi) = DAG.SplitScalar(RHS, DL, HalfVT, HalfVT);
if (DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) &&
DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) {
@@ -1841,11 +1976,11 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
// Compute denominator reciprocal.
- unsigned FMAD = !Subtarget->hasMadMacF32Insts() ?
- (unsigned)ISD::FMA :
- !MFI->getMode().allFP32Denormals() ?
- (unsigned)ISD::FMAD :
- (unsigned)AMDGPUISD::FMAD_FTZ;
+ unsigned FMAD =
+ !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA
+ : MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign()
+ ? (unsigned)ISD::FMAD
+ : (unsigned)AMDGPUISD::FMAD_FTZ;
SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo);
SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi);
@@ -1875,13 +2010,12 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
SDValue Neg_RHS = DAG.getNode(ISD::SUB, DL, VT, Zero64, RHS);
SDValue Mullo1 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Rcp64);
SDValue Mulhi1 = DAG.getNode(ISD::MULHU, DL, VT, Rcp64, Mullo1);
- SDValue Mulhi1_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1,
- Zero);
- SDValue Mulhi1_Hi =
- DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi1, One);
- SDValue Add1_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Lo,
+ SDValue Mulhi1_Lo, Mulhi1_Hi;
+ std::tie(Mulhi1_Lo, Mulhi1_Hi) =
+ DAG.SplitScalar(Mulhi1, DL, HalfVT, HalfVT);
+ SDValue Add1_Lo = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Rcp_Lo,
Mulhi1_Lo, Zero1);
- SDValue Add1_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Rcp_Hi,
+ SDValue Add1_Hi = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Rcp_Hi,
Mulhi1_Hi, Add1_Lo.getValue(1));
SDValue Add1 = DAG.getBitcast(VT,
DAG.getBuildVector(MVT::v2i32, DL, {Add1_Lo, Add1_Hi}));
@@ -1889,13 +2023,12 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
// Second round of UNR.
SDValue Mullo2 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Add1);
SDValue Mulhi2 = DAG.getNode(ISD::MULHU, DL, VT, Add1, Mullo2);
- SDValue Mulhi2_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2,
- Zero);
- SDValue Mulhi2_Hi =
- DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mulhi2, One);
- SDValue Add2_Lo = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Lo,
+ SDValue Mulhi2_Lo, Mulhi2_Hi;
+ std::tie(Mulhi2_Lo, Mulhi2_Hi) =
+ DAG.SplitScalar(Mulhi2, DL, HalfVT, HalfVT);
+ SDValue Add2_Lo = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Add1_Lo,
Mulhi2_Lo, Zero1);
- SDValue Add2_Hi = DAG.getNode(ISD::ADDCARRY, DL, HalfCarryVT, Add1_Hi,
+ SDValue Add2_Hi = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Add1_Hi,
Mulhi2_Hi, Add2_Lo.getValue(1));
SDValue Add2 = DAG.getBitcast(VT,
DAG.getBuildVector(MVT::v2i32, DL, {Add2_Lo, Add2_Hi}));
@@ -1904,11 +2037,11 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
SDValue Mul3 = DAG.getNode(ISD::MUL, DL, VT, RHS, Mulhi3);
- SDValue Mul3_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, Zero);
- SDValue Mul3_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, Mul3, One);
- SDValue Sub1_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Lo,
+ SDValue Mul3_Lo, Mul3_Hi;
+ std::tie(Mul3_Lo, Mul3_Hi) = DAG.SplitScalar(Mul3, DL, HalfVT, HalfVT);
+ SDValue Sub1_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, LHS_Lo,
Mul3_Lo, Zero1);
- SDValue Sub1_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, LHS_Hi,
+ SDValue Sub1_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, LHS_Hi,
Mul3_Hi, Sub1_Lo.getValue(1));
SDValue Sub1_Mi = DAG.getNode(ISD::SUB, DL, HalfVT, LHS_Hi, Mul3_Hi);
SDValue Sub1 = DAG.getBitcast(VT,
@@ -1926,11 +2059,11 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
// potential endif to substitute PHIs.
// if C3 != 0 ...
- SDValue Sub2_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Lo,
+ SDValue Sub2_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub1_Lo,
RHS_Lo, Zero1);
- SDValue Sub2_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub1_Mi,
+ SDValue Sub2_Mi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub1_Mi,
RHS_Hi, Sub1_Lo.getValue(1));
- SDValue Sub2_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi,
+ SDValue Sub2_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Mi,
Zero, Sub2_Lo.getValue(1));
SDValue Sub2 = DAG.getBitcast(VT,
DAG.getBuildVector(MVT::v2i32, DL, {Sub2_Lo, Sub2_Hi}));
@@ -1946,11 +2079,11 @@ void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op,
// if (C6 != 0)
SDValue Add4 = DAG.getNode(ISD::ADD, DL, VT, Add3, One64);
- SDValue Sub3_Lo = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Lo,
+ SDValue Sub3_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Lo,
RHS_Lo, Zero1);
- SDValue Sub3_Mi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub2_Mi,
+ SDValue Sub3_Mi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Mi,
RHS_Hi, Sub2_Lo.getValue(1));
- SDValue Sub3_Hi = DAG.getNode(ISD::SUBCARRY, DL, HalfCarryVT, Sub3_Mi,
+ SDValue Sub3_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub3_Mi,
Zero, Sub3_Lo.getValue(1));
SDValue Sub3 = DAG.getBitcast(VT,
DAG.getBuildVector(MVT::v2i32, DL, {Sub3_Lo, Sub3_Hi}));
@@ -2329,27 +2462,445 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
-SDValue AMDGPUTargetLowering::LowerFLOG(SDValue Op, SelectionDAG &DAG,
- double Log2BaseInverted) const {
- EVT VT = Op.getValueType();
+/// Return true if it's known that \p Src can never be an f32 denormal value.
+static bool valueIsKnownNeverF32Denorm(SDValue Src) {
+ switch (Src.getOpcode()) {
+ case ISD::FP_EXTEND:
+ return Src.getOperand(0).getValueType() == MVT::f16;
+ case ISD::FP16_TO_FP:
+ return true;
+ default:
+ return false;
+ }
+
+ llvm_unreachable("covered opcode switch");
+}
+
+static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags) {
+ if (Flags.hasApproximateFuncs())
+ return true;
+ auto &Options = DAG.getTarget().Options;
+ return Options.UnsafeFPMath || Options.ApproxFuncFPMath;
+}
+
+static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src,
+ SDNodeFlags Flags) {
+ return !valueIsKnownNeverF32Denorm(Src) &&
+ DAG.getMachineFunction()
+ .getDenormalMode(APFloat::IEEEsingle())
+ .Input != DenormalMode::PreserveSign;
+}
+
+SDValue AMDGPUTargetLowering::getIsLtSmallestNormal(SelectionDAG &DAG,
+ SDValue Src,
+ SDNodeFlags Flags) const {
+ SDLoc SL(Src);
+ EVT VT = Src.getValueType();
+ const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT);
+ SDValue SmallestNormal =
+ DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT);
+
+ // Want to scale denormals up, but negatives and 0 work just as well on the
+ // scaled path.
+ SDValue IsLtSmallestNormal = DAG.getSetCC(
+ SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src,
+ SmallestNormal, ISD::SETOLT);
+
+ return IsLtSmallestNormal;
+}
+
+SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src,
+ SDNodeFlags Flags) const {
+ SDLoc SL(Src);
+ EVT VT = Src.getValueType();
+ const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT);
+ SDValue Inf = DAG.getConstantFP(APFloat::getInf(Semantics), SL, VT);
+
+ SDValue Fabs = DAG.getNode(ISD::FABS, SL, VT, Src, Flags);
+ SDValue IsFinite = DAG.getSetCC(
+ SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Fabs,
+ Inf, ISD::SETOLT);
+ return IsFinite;
+}
+
+/// If denormal handling is required return the scaled input to FLOG2, and the
+/// check for denormal range. Otherwise, return null values.
+std::pair<SDValue, SDValue>
+AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL,
+ SDValue Src, SDNodeFlags Flags) const {
+ if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags))
+ return {};
+
+ MVT VT = MVT::f32;
+ const fltSemantics &Semantics = APFloat::IEEEsingle();
+ SDValue SmallestNormal =
+ DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT);
+
+ SDValue IsLtSmallestNormal = DAG.getSetCC(
+ SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src,
+ SmallestNormal, ISD::SETOLT);
+
+ SDValue Scale32 = DAG.getConstantFP(0x1.0p+32, SL, VT);
+ SDValue One = DAG.getConstantFP(1.0, SL, VT);
+ SDValue ScaleFactor =
+ DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, Scale32, One, Flags);
+
+ SDValue ScaledInput = DAG.getNode(ISD::FMUL, SL, VT, Src, ScaleFactor, Flags);
+ return {ScaledInput, IsLtSmallestNormal};
+}
+
+SDValue AMDGPUTargetLowering::LowerFLOG2(SDValue Op, SelectionDAG &DAG) const {
+ // v_log_f32 is good enough for OpenCL, except it doesn't handle denormals.
+ // If we have to handle denormals, scale up the input and adjust the result.
+
+ // scaled = x * (is_denormal ? 0x1.0p+32 : 1.0)
+ // log2 = amdgpu_log2 - (is_denormal ? 32.0 : 0.0)
SDLoc SL(Op);
- SDValue Operand = Op.getOperand(0);
- SDValue Log2Operand = DAG.getNode(ISD::FLOG2, SL, VT, Operand);
- SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT);
+ EVT VT = Op.getValueType();
+ SDValue Src = Op.getOperand(0);
+ SDNodeFlags Flags = Op->getFlags();
+
+ if (VT == MVT::f16) {
+ // Nothing in half is a denormal when promoted to f32.
+ assert(!Subtarget->has16BitInsts());
+ SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags);
+ SDValue Log = DAG.getNode(AMDGPUISD::LOG, SL, MVT::f32, Ext, Flags);
+ return DAG.getNode(ISD::FP_ROUND, SL, VT, Log,
+ DAG.getTargetConstant(0, SL, MVT::i32), Flags);
+ }
+
+ auto [ScaledInput, IsLtSmallestNormal] =
+ getScaledLogInput(DAG, SL, Src, Flags);
+ if (!ScaledInput)
+ return DAG.getNode(AMDGPUISD::LOG, SL, VT, Src, Flags);
+
+ SDValue Log2 = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags);
- return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand);
+ SDValue ThirtyTwo = DAG.getConstantFP(32.0, SL, VT);
+ SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
+ SDValue ResultOffset =
+ DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, ThirtyTwo, Zero);
+ return DAG.getNode(ISD::FSUB, SL, VT, Log2, ResultOffset, Flags);
}
-// exp2(M_LOG2E_F * f);
-SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
+static SDValue getMad(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue X,
+ SDValue Y, SDValue C, SDNodeFlags Flags = SDNodeFlags()) {
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Y, Flags);
+ return DAG.getNode(ISD::FADD, SL, VT, Mul, C, Flags);
+}
+
+SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDValue X = Op.getOperand(0);
EVT VT = Op.getValueType();
+ SDNodeFlags Flags = Op->getFlags();
+ SDLoc DL(Op);
+
+ const bool IsLog10 = Op.getOpcode() == ISD::FLOG10;
+ assert(IsLog10 || Op.getOpcode() == ISD::FLOG);
+
+ const auto &Options = getTargetMachine().Options;
+ if (VT == MVT::f16 || Flags.hasApproximateFuncs() ||
+ Options.ApproxFuncFPMath || Options.UnsafeFPMath) {
+
+ if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {
+ // Log and multiply in f32 is good enough for f16.
+ X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X, Flags);
+ }
+
+ SDValue Lowered = LowerFLOGUnsafe(
+ X, DL, DAG, IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2,
+ Flags);
+ if (VT == MVT::f16 && !Subtarget->has16BitInsts()) {
+ return DAG.getNode(ISD::FP_ROUND, DL, VT, Lowered,
+ DAG.getTargetConstant(0, DL, MVT::i32), Flags);
+ }
+
+ return Lowered;
+ }
+
+ auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, DL, X, Flags);
+ if (ScaledInput)
+ X = ScaledInput;
+
+ SDValue Y = DAG.getNode(AMDGPUISD::LOG, DL, VT, X, Flags);
+
+ SDValue R;
+ if (Subtarget->hasFastFMAF32()) {
+ // c+cc are ln(2)/ln(10) to more than 49 bits
+ const float c_log10 = 0x1.344134p-2f;
+ const float cc_log10 = 0x1.09f79ep-26f;
+
+ // c + cc is ln(2) to more than 49 bits
+ const float c_log = 0x1.62e42ep-1f;
+ const float cc_log = 0x1.efa39ep-25f;
+
+ SDValue C = DAG.getConstantFP(IsLog10 ? c_log10 : c_log, DL, VT);
+ SDValue CC = DAG.getConstantFP(IsLog10 ? cc_log10 : cc_log, DL, VT);
+
+ R = DAG.getNode(ISD::FMUL, DL, VT, Y, C, Flags);
+ SDValue NegR = DAG.getNode(ISD::FNEG, DL, VT, R, Flags);
+ SDValue FMA0 = DAG.getNode(ISD::FMA, DL, VT, Y, C, NegR, Flags);
+ SDValue FMA1 = DAG.getNode(ISD::FMA, DL, VT, Y, CC, FMA0, Flags);
+ R = DAG.getNode(ISD::FADD, DL, VT, R, FMA1, Flags);
+ } else {
+ // ch+ct is ln(2)/ln(10) to more than 36 bits
+ const float ch_log10 = 0x1.344000p-2f;
+ const float ct_log10 = 0x1.3509f6p-18f;
+
+ // ch + ct is ln(2) to more than 36 bits
+ const float ch_log = 0x1.62e000p-1f;
+ const float ct_log = 0x1.0bfbe8p-15f;
+
+ SDValue CH = DAG.getConstantFP(IsLog10 ? ch_log10 : ch_log, DL, VT);
+ SDValue CT = DAG.getConstantFP(IsLog10 ? ct_log10 : ct_log, DL, VT);
+
+ SDValue YAsInt = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Y);
+ SDValue MaskConst = DAG.getConstant(0xfffff000, DL, MVT::i32);
+ SDValue YHInt = DAG.getNode(ISD::AND, DL, MVT::i32, YAsInt, MaskConst);
+ SDValue YH = DAG.getNode(ISD::BITCAST, DL, MVT::f32, YHInt);
+ SDValue YT = DAG.getNode(ISD::FSUB, DL, VT, Y, YH, Flags);
+
+ SDValue YTCT = DAG.getNode(ISD::FMUL, DL, VT, YT, CT, Flags);
+ SDValue Mad0 = getMad(DAG, DL, VT, YH, CT, YTCT, Flags);
+ SDValue Mad1 = getMad(DAG, DL, VT, YT, CH, Mad0, Flags);
+ R = getMad(DAG, DL, VT, YH, CH, Mad1);
+ }
+
+ const bool IsFiniteOnly = (Flags.hasNoNaNs() || Options.NoNaNsFPMath) &&
+ (Flags.hasNoInfs() || Options.NoInfsFPMath);
+
+ // TODO: Check if known finite from source value.
+ if (!IsFiniteOnly) {
+ SDValue IsFinite = getIsFinite(DAG, Y, Flags);
+ R = DAG.getNode(ISD::SELECT, DL, VT, IsFinite, R, Y, Flags);
+ }
+
+ if (IsScaled) {
+ SDValue Zero = DAG.getConstantFP(0.0f, DL, VT);
+ SDValue ShiftK =
+ DAG.getConstantFP(IsLog10 ? 0x1.344136p+3f : 0x1.62e430p+4f, DL, VT);
+ SDValue Shift =
+ DAG.getNode(ISD::SELECT, DL, VT, IsScaled, ShiftK, Zero, Flags);
+ R = DAG.getNode(ISD::FSUB, DL, VT, R, Shift, Flags);
+ }
+
+ return R;
+}
+
+SDValue AMDGPUTargetLowering::LowerFLOG10(SDValue Op, SelectionDAG &DAG) const {
+ return LowerFLOGCommon(Op, DAG);
+}
+
+// Do f32 fast math expansion for flog2 or flog10. This is accurate enough for a
+// promote f16 operation.
+SDValue AMDGPUTargetLowering::LowerFLOGUnsafe(SDValue Src, const SDLoc &SL,
+ SelectionDAG &DAG,
+ double Log2BaseInverted,
+ SDNodeFlags Flags) const {
+ EVT VT = Src.getValueType();
+ unsigned LogOp = VT == MVT::f32 ? AMDGPUISD::LOG : ISD::FLOG2;
+ SDValue Log2Operand = DAG.getNode(LogOp, SL, VT, Src, Flags);
+ SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT);
+
+ return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand,
+ Flags);
+}
+
+SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const {
+ // v_exp_f32 is good enough for OpenCL, except it doesn't handle denormals.
+ // If we have to handle denormals, scale up the input and adjust the result.
+
SDLoc SL(Op);
+ EVT VT = Op.getValueType();
SDValue Src = Op.getOperand(0);
+ SDNodeFlags Flags = Op->getFlags();
+
+ if (VT == MVT::f16) {
+ // Nothing in half is a denormal when promoted to f32.
+ assert(!Subtarget->has16BitInsts());
+ SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags);
+ SDValue Log = DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Ext, Flags);
+ return DAG.getNode(ISD::FP_ROUND, SL, VT, Log,
+ DAG.getTargetConstant(0, SL, MVT::i32), Flags);
+ }
+
+ assert(VT == MVT::f32);
+
+ if (allowApproxFunc(DAG, Flags) || !needsDenormHandlingF32(DAG, Src, Flags))
+ return DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Src, Flags);
+
+ // bool needs_scaling = x < -0x1.f80000p+6f;
+ // v_exp_f32(x + (s ? 0x1.0p+6f : 0.0f)) * (s ? 0x1.0p-64f : 1.0f);
+
+ // -nextafter(128.0, -1)
+ SDValue RangeCheckConst = DAG.getConstantFP(-0x1.f80000p+6f, SL, VT);
+
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ SDValue NeedsScaling =
+ DAG.getSetCC(SL, SetCCVT, Src, RangeCheckConst, ISD::SETOLT);
+
+ SDValue SixtyFour = DAG.getConstantFP(0x1.0p+6f, SL, VT);
+ SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
+ SDValue AddOffset =
+ DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, SixtyFour, Zero);
+
+ SDValue AddInput = DAG.getNode(ISD::FADD, SL, VT, Src, AddOffset, Flags);
+ SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, AddInput, Flags);
+
+ SDValue TwoExpNeg64 = DAG.getConstantFP(0x1.0p-64f, SL, VT);
+ SDValue One = DAG.getConstantFP(1.0, SL, VT);
+ SDValue ResultScale =
+ DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, TwoExpNeg64, One);
+
+ return DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScale, Flags);
+}
+
+SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue Op, const SDLoc &SL,
+ SelectionDAG &DAG,
+ SDNodeFlags Flags) const {
+ // exp2(M_LOG2E_F * f);
+ EVT VT = Op.getValueType();
const SDValue K = DAG.getConstantFP(numbers::log2e, SL, VT);
- SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Src, K, Op->getFlags());
- return DAG.getNode(ISD::FEXP2, SL, VT, Mul, Op->getFlags());
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, Op, K, Flags);
+ return DAG.getNode(VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2, SL, VT, Mul,
+ Flags);
+}
+
+SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ SDLoc SL(Op);
+ SDValue X = Op.getOperand(0);
+ SDNodeFlags Flags = Op->getFlags();
+ const bool IsExp10 = false; // TODO: For some reason exp10 is missing
+
+ if (VT.getScalarType() == MVT::f16) {
+ // v_exp_f16 (fmul x, log2e)
+ if (allowApproxFunc(DAG, Flags)) // TODO: Does this really require fast?
+ return lowerFEXPUnsafe(X, SL, DAG, Flags);
+
+ if (VT.isVector())
+ return SDValue();
+
+ // exp(f16 x) ->
+ // fptrunc (v_exp_f32 (fmul (fpext x), log2e))
+
+ // Nothing in half is a denormal when promoted to f32.
+ SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, X, Flags);
+ SDValue Lowered = lowerFEXPUnsafe(Ext, SL, DAG, Flags);
+ return DAG.getNode(ISD::FP_ROUND, SL, VT, Lowered,
+ DAG.getTargetConstant(0, SL, MVT::i32), Flags);
+ }
+
+ assert(VT == MVT::f32);
+
+ // TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying
+ // library behavior. Also, is known-not-daz source sufficient?
+ if (allowApproxFunc(DAG, Flags) && !needsDenormHandlingF32(DAG, X, Flags)) {
+ assert(!IsExp10 && "todo exp10 support");
+ return lowerFEXPUnsafe(X, SL, DAG, Flags);
+ }
+
+ // Algorithm:
+ //
+ // e^x = 2^(x/ln(2)) = 2^(x*(64/ln(2))/64)
+ //
+ // x*(64/ln(2)) = n + f, |f| <= 0.5, n is integer
+ // n = 64*m + j, 0 <= j < 64
+ //
+ // e^x = 2^((64*m + j + f)/64)
+ // = (2^m) * (2^(j/64)) * 2^(f/64)
+ // = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64))
+ //
+ // f = x*(64/ln(2)) - n
+ // r = f*(ln(2)/64) = x - n*(ln(2)/64)
+ //
+ // e^x = (2^m) * (2^(j/64)) * e^r
+ //
+ // (2^(j/64)) is precomputed
+ //
+ // e^r = 1 + r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5!
+ // e^r = 1 + q
+ //
+ // q = r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5!
+ //
+ // e^x = (2^m) * ( (2^(j/64)) + q*(2^(j/64)) )
+ SDNodeFlags FlagsNoContract = Flags;
+ FlagsNoContract.setAllowContract(false);
+
+ SDValue PH, PL;
+ if (Subtarget->hasFastFMAF32()) {
+ const float c_exp = numbers::log2ef;
+ const float cc_exp = 0x1.4ae0bep-26f; // c+cc are 49 bits
+ const float c_exp10 = 0x1.a934f0p+1f;
+ const float cc_exp10 = 0x1.2f346ep-24f;
+
+ SDValue C = DAG.getConstantFP(IsExp10 ? c_exp10 : c_exp, SL, VT);
+ SDValue CC = DAG.getConstantFP(IsExp10 ? cc_exp10 : cc_exp, SL, VT);
+
+ PH = DAG.getNode(ISD::FMUL, SL, VT, X, C, Flags);
+ SDValue NegPH = DAG.getNode(ISD::FNEG, SL, VT, PH, Flags);
+ SDValue FMA0 = DAG.getNode(ISD::FMA, SL, VT, X, C, NegPH, Flags);
+ PL = DAG.getNode(ISD::FMA, SL, VT, X, CC, FMA0, Flags);
+ } else {
+ const float ch_exp = 0x1.714000p+0f;
+ const float cl_exp = 0x1.47652ap-12f; // ch + cl are 36 bits
+
+ const float ch_exp10 = 0x1.a92000p+1f;
+ const float cl_exp10 = 0x1.4f0978p-11f;
+
+ SDValue CH = DAG.getConstantFP(IsExp10 ? ch_exp10 : ch_exp, SL, VT);
+ SDValue CL = DAG.getConstantFP(IsExp10 ? cl_exp10 : cl_exp, SL, VT);
+
+ SDValue XAsInt = DAG.getNode(ISD::BITCAST, SL, MVT::i32, X);
+ SDValue MaskConst = DAG.getConstant(0xfffff000, SL, MVT::i32);
+ SDValue XHAsInt = DAG.getNode(ISD::AND, SL, MVT::i32, XAsInt, MaskConst);
+ SDValue XH = DAG.getNode(ISD::BITCAST, SL, VT, XHAsInt);
+ SDValue XL = DAG.getNode(ISD::FSUB, SL, VT, X, XH, Flags);
+
+ PH = DAG.getNode(ISD::FMUL, SL, VT, XH, CH, Flags);
+
+ SDValue XLCL = DAG.getNode(ISD::FMUL, SL, VT, XL, CL, Flags);
+ SDValue Mad0 = getMad(DAG, SL, VT, XL, CH, XLCL, Flags);
+ PL = getMad(DAG, SL, VT, XH, CL, Mad0, Flags);
+ }
+
+ SDValue E = DAG.getNode(ISD::FRINT, SL, VT, PH, Flags);
+
+ // It is unsafe to contract this fsub into the PH multiply.
+ SDValue PHSubE = DAG.getNode(ISD::FSUB, SL, VT, PH, E, FlagsNoContract);
+
+ SDValue A = DAG.getNode(ISD::FADD, SL, VT, PHSubE, PL, Flags);
+ SDValue IntE = DAG.getNode(ISD::FP_TO_SINT, SL, MVT::i32, E);
+ SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, A, Flags);
+
+ SDValue R = DAG.getNode(ISD::FLDEXP, SL, VT, Exp2, IntE, Flags);
+
+ SDValue UnderflowCheckConst =
+ DAG.getConstantFP(IsExp10 ? -0x1.66d3e8p+5f : -0x1.9d1da0p+6f, SL, VT);
+
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Zero = DAG.getConstantFP(0.0, SL, VT);
+ SDValue Underflow =
+ DAG.getSetCC(SL, SetCCVT, X, UnderflowCheckConst, ISD::SETOLT);
+
+ R = DAG.getNode(ISD::SELECT, SL, VT, Underflow, Zero, R);
+ const auto &Options = getTargetMachine().Options;
+
+ if (!Flags.hasNoInfs() && !Options.NoInfsFPMath) {
+ SDValue OverflowCheckConst =
+ DAG.getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT);
+ SDValue Overflow =
+ DAG.getSetCC(SL, SetCCVT, X, OverflowCheckConst, ISD::SETOGT);
+ SDValue Inf =
+ DAG.getConstantFP(APFloat::getInf(APFloat::IEEEsingle()), SL, VT);
+ R = DAG.getNode(ISD::SELECT, SL, VT, Overflow, Inf, R);
+ }
+
+ return R;
}
static bool isCtlzOpc(unsigned Opc) {
@@ -2518,7 +3069,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
ShAmt);
// On GCN, use LDEXP directly.
if (Subtarget->isGCN())
- return DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f32, FVal, ShAmt);
+ return DAG.getNode(ISD::FLDEXP, SL, MVT::f32, FVal, ShAmt);
// Otherwise, align 'ShAmt' to the exponent part and add it into the exponent
// part directly to emulate the multiplication of 2^ShAmt. That 8-bit
@@ -2551,7 +3102,7 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo);
- SDValue LdExp = DAG.getNode(AMDGPUISD::LDEXP, SL, MVT::f64, CvtHi,
+ SDValue LdExp = DAG.getNode(ISD::FLDEXP, SL, MVT::f64, CvtHi,
DAG.getConstant(32, SL, MVT::i32));
// TODO: Should this propagate fast-math-flags?
return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo);
@@ -2670,15 +3221,17 @@ SDValue AMDGPUTargetLowering::LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG,
SDValue K0, K1;
if (SrcVT == MVT::f64) {
- K0 = DAG.getConstantFP(BitsToDouble(UINT64_C(/*2^-32*/ 0x3df0000000000000)),
- SL, SrcVT);
- K1 = DAG.getConstantFP(BitsToDouble(UINT64_C(/*-2^32*/ 0xc1f0000000000000)),
- SL, SrcVT);
+ K0 = DAG.getConstantFP(
+ llvm::bit_cast<double>(UINT64_C(/*2^-32*/ 0x3df0000000000000)), SL,
+ SrcVT);
+ K1 = DAG.getConstantFP(
+ llvm::bit_cast<double>(UINT64_C(/*-2^32*/ 0xc1f0000000000000)), SL,
+ SrcVT);
} else {
- K0 = DAG.getConstantFP(BitsToFloat(UINT32_C(/*2^-32*/ 0x2f800000)), SL,
- SrcVT);
- K1 = DAG.getConstantFP(BitsToFloat(UINT32_C(/*-2^32*/ 0xcf800000)), SL,
- SrcVT);
+ K0 = DAG.getConstantFP(
+ llvm::bit_cast<float>(UINT32_C(/*2^-32*/ 0x2f800000)), SL, SrcVT);
+ K1 = DAG.getConstantFP(
+ llvm::bit_cast<float>(UINT32_C(/*-2^32*/ 0xcf800000)), SL, SrcVT);
}
// TODO: Should this propagate fast-math-flags?
SDValue Mul = DAG.getNode(ISD::FMUL, SL, SrcVT, Trunc, K0);
@@ -3128,6 +3681,17 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine(
SDValue Src = N->getOperand(1);
return Src.isUndef() ? Src : SDValue();
}
+ case Intrinsic::amdgcn_frexp_exp: {
+ // frexp_exp (fneg x) -> frexp_exp x
+ // frexp_exp (fabs x) -> frexp_exp x
+ // frexp_exp (fneg (fabs x)) -> frexp_exp x
+ SDValue Src = N->getOperand(1);
+ SDValue PeekSign = peekFPSignOps(Src);
+ if (PeekSign == Src)
+ return SDValue();
+ return SDValue(DCI.DAG.UpdateNodeOperands(N, N->getOperand(0), PeekSign),
+ 0);
+ }
default:
return SDValue();
}
@@ -3419,6 +3983,16 @@ static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL,
return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, MulLo, MulHi);
}
+/// If \p V is an add of a constant 1, returns the other operand. Otherwise
+/// return SDValue().
+static SDValue getAddOneOp(const SDNode *V) {
+ if (V->getOpcode() != ISD::ADD)
+ return SDValue();
+
+ auto *C = dyn_cast<ConstantSDNode>(V->getOperand(1));
+ return C && C->isOne() ? V->getOperand(0) : SDValue();
+}
+
SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
EVT VT = N->getValueType(0);
@@ -3434,16 +4008,49 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
if (VT.isVector() || Size > 64)
return SDValue();
- // There are i16 integer mul/mad.
- if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16))
- return SDValue();
-
SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ // Undo InstCombine canonicalize X * (Y + 1) -> X * Y + X to enable mad
+ // matching.
+
+ // mul x, (add y, 1) -> add (mul x, y), x
+ auto IsFoldableAdd = [](SDValue V) -> SDValue {
+ SDValue AddOp = getAddOneOp(V.getNode());
+ if (!AddOp)
+ return SDValue();
+
+ if (V.hasOneUse() || all_of(V->uses(), [](const SDNode *U) -> bool {
+ return U->getOpcode() == ISD::MUL;
+ }))
+ return AddOp;
+
+ return SDValue();
+ };
+
+ // FIXME: The selection pattern is not properly checking for commuted
+ // operands, so we have to place the mul in the LHS
+ if (SDValue MulOper = IsFoldableAdd(N0)) {
+ SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N1, MulOper);
+ return DAG.getNode(ISD::ADD, DL, VT, MulVal, N1);
+ }
+
+ if (SDValue MulOper = IsFoldableAdd(N1)) {
+ SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N0, MulOper);
+ return DAG.getNode(ISD::ADD, DL, VT, MulVal, N0);
+ }
+
+ // Skip if already mul24.
+ if (N->getOpcode() != ISD::MUL)
+ return SDValue();
+
+ // There are i16 integer mul/mad.
+ if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16))
+ return SDValue();
+
// SimplifyDemandedBits has the annoying habit of turning useful zero_extends
// in the source into any_extends if the result of the mul is truncated. Since
// we can assume the high bits are whatever we want, use the underlying value
@@ -3583,12 +4190,6 @@ SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N,
return DAG.getZExtOrTrunc(Mulhi, DL, VT);
}
-static bool isNegativeOne(SDValue Val) {
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val))
- return C->isAllOnes();
- return false;
-}
-
SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG,
SDValue Op,
const SDLoc &DL,
@@ -3631,7 +4232,7 @@ SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue C
// select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x
if (CCOpcode == ISD::SETEQ &&
(isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
- RHS.getOperand(0) == CmpLHS && isNegativeOne(LHS)) {
+ RHS.getOperand(0) == CmpLHS && isAllOnesConstant(LHS)) {
unsigned Opc =
isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
return getFFBX_U32(DAG, CmpLHS, SL, Opc);
@@ -3641,7 +4242,7 @@ SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue C
// select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x
if (CCOpcode == ISD::SETNE &&
(isCtlzOpc(LHS.getOpcode()) || isCttzOpc(LHS.getOpcode())) &&
- LHS.getOperand(0) == CmpLHS && isNegativeOne(RHS)) {
+ LHS.getOperand(0) == CmpLHS && isAllOnesConstant(RHS)) {
unsigned Opc =
isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
@@ -3673,8 +4274,9 @@ static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI,
//
// select c, (fabs x), (fabs y) -> fabs (select c, x, y)
// select c, (fabs x), +k -> fabs (select c, x, k)
-static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
- SDValue N) {
+SDValue
+AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
+ SDValue N) const {
SelectionDAG &DAG = DCI.DAG;
SDValue Cond = N.getOperand(0);
SDValue LHS = N.getOperand(1);
@@ -3683,6 +4285,9 @@ static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
EVT VT = N.getValueType();
if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) ||
(LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) {
+ if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
+ return SDValue();
+
return distributeOpThroughSelect(DCI, LHS.getOpcode(),
SDLoc(N), Cond, LHS, RHS);
}
@@ -3695,7 +4300,8 @@ static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
// TODO: Support vector constants.
ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS);
- if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS) {
+ if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS &&
+ !selectSupportsSourceMods(N.getNode())) {
SDLoc SL(N);
// If one side is an fneg/fabs and the other is a constant, we can push the
// fneg/fabs down. If it's an fabs, the constant needs to be non-negative.
@@ -3707,17 +4313,31 @@ static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
if (NewLHS.hasOneUse()) {
unsigned Opc = NewLHS.getOpcode();
- if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(Opc))
+ if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(NewLHS.getNode()))
ShouldFoldNeg = false;
if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL)
ShouldFoldNeg = false;
}
if (ShouldFoldNeg) {
+ if (LHS.getOpcode() == ISD::FABS && CRHS->isNegative())
+ return SDValue();
+
+ // We're going to be forced to use a source modifier anyway, there's no
+ // point to pulling the negate out unless we can get a size reduction by
+ // negating the constant.
+ //
+ // TODO: Generalize to use getCheaperNegatedExpression which doesn't know
+ // about cheaper constants.
+ if (NewLHS.getOpcode() == ISD::FABS &&
+ getConstantNegateCost(CRHS) != NegatibleCost::Cheaper)
+ return SDValue();
+
+ if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode()))
+ return SDValue();
+
if (LHS.getOpcode() == ISD::FNEG)
NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
- else if (CRHS->isNegative())
- return SDValue();
if (Inv)
std::swap(NewLHS, NewRHS);
@@ -3732,7 +4352,6 @@ static SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
return SDValue();
}
-
SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0)))
@@ -3791,15 +4410,26 @@ static bool isInv2Pi(const APFloat &APF) {
// 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an
// additional cost to negate them.
-bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const {
- if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) {
- if (C->isZero() && !C->isNegative())
- return true;
+TargetLowering::NegatibleCost
+AMDGPUTargetLowering::getConstantNegateCost(const ConstantFPSDNode *C) const {
+ if (C->isZero())
+ return C->isNegative() ? NegatibleCost::Cheaper : NegatibleCost::Expensive;
- if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))
- return true;
- }
+ if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF()))
+ return C->isNegative() ? NegatibleCost::Cheaper : NegatibleCost::Expensive;
+
+ return NegatibleCost::Neutral;
+}
+bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const {
+ if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
+ return getConstantNegateCost(C) == NegatibleCost::Expensive;
+ return false;
+}
+
+bool AMDGPUTargetLowering::isConstantCheaperToNegate(SDValue N) const {
+ if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N))
+ return getConstantNegateCost(C) == NegatibleCost::Cheaper;
return false;
}
@@ -3822,14 +4452,9 @@ static unsigned inverseMinMax(unsigned Opc) {
}
}
-SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
- SelectionDAG &DAG = DCI.DAG;
- SDValue N0 = N->getOperand(0);
- EVT VT = N->getValueType(0);
-
- unsigned Opc = N0.getOpcode();
-
+/// \return true if it's profitable to try to push an fneg into its source
+/// instruction.
+bool AMDGPUTargetLowering::shouldFoldFNegIntoSrc(SDNode *N, SDValue N0) {
// If the input has multiple uses and we can either fold the negate down, or
// the other uses cannot, give up. This both prevents unprofitable
// transformations and infinite loops: we won't repeatedly try to fold around
@@ -3838,13 +4463,27 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
// This may be able to fold into the source, but at a code size cost. Don't
// fold if the fold into the user is free.
if (allUsesHaveSourceMods(N, 0))
- return SDValue();
+ return false;
} else {
- if (fnegFoldsIntoOp(Opc) &&
+ if (fnegFoldsIntoOp(N0.getNode()) &&
(allUsesHaveSourceMods(N) || !allUsesHaveSourceMods(N0.getNode())))
- return SDValue();
+ return false;
}
+ return true;
+}
+
+SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ unsigned Opc = N0.getOpcode();
+
+ if (!shouldFoldFNegIntoSrc(N, N0))
+ return SDValue();
+
SDLoc SL(N);
switch (Opc) {
case ISD::FADD: {
@@ -4027,6 +4666,67 @@ SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N,
DAG.getConstant(0x8000, SL, SrcVT));
return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg);
}
+ case ISD::SELECT: {
+ // fneg (select c, a, b) -> select c, (fneg a), (fneg b)
+ // TODO: Invert conditions of foldFreeOpFromSelect
+ return SDValue();
+ }
+ case ISD::BITCAST: {
+ SDLoc SL(N);
+ SDValue BCSrc = N0.getOperand(0);
+ if (BCSrc.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue HighBits = BCSrc.getOperand(BCSrc.getNumOperands() - 1);
+ if (HighBits.getValueType().getSizeInBits() != 32 ||
+ !fnegFoldsIntoOp(HighBits.getNode()))
+ return SDValue();
+
+ // f64 fneg only really needs to operate on the high half of of the
+ // register, so try to force it to an f32 operation to help make use of
+ // source modifiers.
+ //
+ //
+ // fneg (f64 (bitcast (build_vector x, y))) ->
+ // f64 (bitcast (build_vector (bitcast i32:x to f32),
+ // (fneg (bitcast i32:y to f32)))
+
+ SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, MVT::f32, HighBits);
+ SDValue NegHi = DAG.getNode(ISD::FNEG, SL, MVT::f32, CastHi);
+ SDValue CastBack =
+ DAG.getNode(ISD::BITCAST, SL, HighBits.getValueType(), NegHi);
+
+ SmallVector<SDValue, 8> Ops(BCSrc->op_begin(), BCSrc->op_end());
+ Ops.back() = CastBack;
+ DCI.AddToWorklist(NegHi.getNode());
+ SDValue Build =
+ DAG.getNode(ISD::BUILD_VECTOR, SL, BCSrc.getValueType(), Ops);
+ SDValue Result = DAG.getNode(ISD::BITCAST, SL, VT, Build);
+
+ if (!N0.hasOneUse())
+ DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Result));
+ return Result;
+ }
+
+ if (BCSrc.getOpcode() == ISD::SELECT && VT == MVT::f32 &&
+ BCSrc.hasOneUse()) {
+ // fneg (bitcast (f32 (select cond, i32:lhs, i32:rhs))) ->
+ // select cond, (bitcast i32:lhs to f32), (bitcast i32:rhs to f32)
+
+ // TODO: Cast back result for multiple uses is beneficial in some cases.
+
+ SDValue LHS =
+ DAG.getNode(ISD::BITCAST, SL, MVT::f32, BCSrc.getOperand(1));
+ SDValue RHS =
+ DAG.getNode(ISD::BITCAST, SL, MVT::f32, BCSrc.getOperand(2));
+
+ SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, LHS);
+ SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, RHS);
+
+ return DAG.getNode(ISD::SELECT, SL, MVT::f32, BCSrc.getOperand(0), NegLHS,
+ NegRHS);
+ }
+
+ return SDValue();
+ }
default:
return SDValue();
}
@@ -4158,6 +4858,15 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performTruncateCombine(N, DCI);
case ISD::MUL:
return performMulCombine(N, DCI);
+ case AMDGPUISD::MUL_U24:
+ case AMDGPUISD::MUL_I24: {
+ if (SDValue Simplified = simplifyMul24(N, DCI))
+ return Simplified;
+ return performMulCombine(N, DCI);
+ }
+ case AMDGPUISD::MULHI_I24:
+ case AMDGPUISD::MULHI_U24:
+ return simplifyMul24(N, DCI);
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI:
return performMulLoHiCombine(N, DCI);
@@ -4165,11 +4874,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performMulhsCombine(N, DCI);
case ISD::MULHU:
return performMulhuCombine(N, DCI);
- case AMDGPUISD::MUL_I24:
- case AMDGPUISD::MUL_U24:
- case AMDGPUISD::MULHI_I24:
- case AMDGPUISD::MULHI_U24:
- return simplifyMul24(N, DCI);
case ISD::SELECT:
return performSelectCombine(N, DCI);
case ISD::FNEG:
@@ -4365,7 +5069,7 @@ SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG,
return V;
unsigned Mask = Arg.getMask();
- unsigned Shift = countTrailingZeros<unsigned>(Mask);
+ unsigned Shift = llvm::countr_zero<unsigned>(Mask);
V = DAG.getNode(ISD::SRL, SL, VT, V,
DAG.getShiftAmountConstant(Shift, VT, SL));
return DAG.getNode(ISD::AND, SL, VT, V,
@@ -4373,14 +5077,11 @@ SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG,
}
uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
- const MachineFunction &MF, const ImplicitParameter Param) const {
- const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
- const AMDGPUSubtarget &ST =
- AMDGPUSubtarget::get(getTargetMachine(), MF.getFunction());
- unsigned ExplicitArgOffset = ST.getExplicitKernelArgOffset(MF.getFunction());
- const Align Alignment = ST.getAlignmentForImplicitArgPtr();
- uint64_t ArgOffset = alignTo(MFI->getExplicitKernArgSize(), Alignment) +
- ExplicitArgOffset;
+ uint64_t ExplicitKernArgSize, const ImplicitParameter Param) const {
+ unsigned ExplicitArgOffset = Subtarget->getExplicitKernelArgOffset();
+ const Align Alignment = Subtarget->getAlignmentForImplicitArgPtr();
+ uint64_t ArgOffset =
+ alignTo(ExplicitKernArgSize, Alignment) + ExplicitArgOffset;
switch (Param) {
case FIRST_IMPLICIT:
return ArgOffset;
@@ -4394,6 +5095,12 @@ uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
llvm_unreachable("unexpected implicit parameter type");
}
+uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
+ const MachineFunction &MF, const ImplicitParameter Param) const {
+ const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
+ return getImplicitParameterOffset(MFI->getExplicitKernArgSize(), Param);
+}
+
#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -4409,10 +5116,12 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(LOOP)
NODE_NAME_CASE(CALL)
NODE_NAME_CASE(TC_RETURN)
+ NODE_NAME_CASE(TC_RETURN_GFX)
NODE_NAME_CASE(TRAP)
- NODE_NAME_CASE(RET_FLAG)
+ NODE_NAME_CASE(RET_GLUE)
NODE_NAME_CASE(RETURN_TO_EPILOG)
NODE_NAME_CASE(ENDPGM)
+ NODE_NAME_CASE(ENDPGM_TRAP)
NODE_NAME_CASE(DWORDADDR)
NODE_NAME_CASE(FRACT)
NODE_NAME_CASE(SETCC)
@@ -4444,9 +5153,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(RSQ)
NODE_NAME_CASE(RCP_LEGACY)
NODE_NAME_CASE(RCP_IFLAG)
+ NODE_NAME_CASE(LOG)
+ NODE_NAME_CASE(EXP)
NODE_NAME_CASE(FMUL_LEGACY)
NODE_NAME_CASE(RSQ_CLAMP)
- NODE_NAME_CASE(LDEXP)
NODE_NAME_CASE(FP_CLASS)
NODE_NAME_CASE(DOT4)
NODE_NAME_CASE(CARRY)
@@ -4508,8 +5218,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
NODE_NAME_CASE(DS_ORDERED_COUNT)
NODE_NAME_CASE(ATOMIC_CMP_SWAP)
- NODE_NAME_CASE(ATOMIC_INC)
- NODE_NAME_CASE(ATOMIC_DEC)
NODE_NAME_CASE(ATOMIC_LOAD_FMIN)
NODE_NAME_CASE(ATOMIC_LOAD_FMAX)
NODE_NAME_CASE(BUFFER_LOAD)
@@ -4725,31 +5433,38 @@ void AMDGPUTargetLowering::computeKnownBitsForTargetNode(
Known.Zero.setLowBits(Log2(Alignment));
break;
}
+ case AMDGPUISD::SMIN3:
+ case AMDGPUISD::SMAX3:
+ case AMDGPUISD::SMED3:
+ case AMDGPUISD::UMIN3:
+ case AMDGPUISD::UMAX3:
+ case AMDGPUISD::UMED3: {
+ KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
+ if (Known2.isUnknown())
+ break;
+
+ KnownBits Known1 = DAG.computeKnownBits(Op.getOperand(1), Depth + 1);
+ if (Known1.isUnknown())
+ break;
+
+ KnownBits Known0 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
+ if (Known0.isUnknown())
+ break;
+
+ // TODO: Handle LeadZero/LeadOne from UMIN/UMAX handling.
+ Known.Zero = Known0.Zero & Known1.Zero & Known2.Zero;
+ Known.One = Known0.One & Known1.One & Known2.One;
+ break;
+ }
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (IID) {
- case Intrinsic::amdgcn_mbcnt_lo:
- case Intrinsic::amdgcn_mbcnt_hi: {
- const GCNSubtarget &ST =
- DAG.getMachineFunction().getSubtarget<GCNSubtarget>();
- // These return at most the (wavefront size - 1) + src1
- // As long as src1 is an immediate we can calc known bits
- KnownBits Src1Known = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
- unsigned Src1ValBits = Src1Known.countMaxActiveBits();
- unsigned MaxActiveBits = std::max(Src1ValBits, ST.getWavefrontSizeLog2());
- // Cater for potential carry
- MaxActiveBits += Src1ValBits ? 1 : 0;
- unsigned Size = Op.getValueType().getSizeInBits();
- if (MaxActiveBits < Size)
- Known.Zero.setHighBits(Size - MaxActiveBits);
- break;
- }
case Intrinsic::amdgcn_workitem_id_x:
case Intrinsic::amdgcn_workitem_id_y:
case Intrinsic::amdgcn_workitem_id_z: {
unsigned MaxValue = Subtarget->getMaxWorkitemID(
DAG.getMachineFunction().getFunction(), workitemIntrinsicDim(IID));
- Known.Zero.setHighBits(countLeadingZeros(MaxValue));
+ Known.Zero.setHighBits(llvm::countl_zero(MaxValue));
break;
}
default:
@@ -4795,6 +5510,26 @@ unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode(
return 16;
case AMDGPUISD::FP_TO_FP16:
return 16;
+ case AMDGPUISD::SMIN3:
+ case AMDGPUISD::SMAX3:
+ case AMDGPUISD::SMED3:
+ case AMDGPUISD::UMIN3:
+ case AMDGPUISD::UMAX3:
+ case AMDGPUISD::UMED3: {
+ unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(2), Depth + 1);
+ if (Tmp2 == 1)
+ return 1; // Early out.
+
+ unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ if (Tmp1 == 1)
+ return 1; // Early out.
+
+ unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (Tmp0 == 1)
+ return 1; // Early out.
+
+ return std::min(Tmp0, std::min(Tmp1, Tmp2));
+ }
default:
return 1;
}
@@ -4818,6 +5553,20 @@ unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr(
return 24;
case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
return 16;
+ case AMDGPU::G_AMDGPU_SMED3:
+ case AMDGPU::G_AMDGPU_UMED3: {
+ auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();
+ unsigned Tmp2 = Analysis.computeNumSignBits(Src2, DemandedElts, Depth + 1);
+ if (Tmp2 == 1)
+ return 1;
+ unsigned Tmp1 = Analysis.computeNumSignBits(Src1, DemandedElts, Depth + 1);
+ if (Tmp1 == 1)
+ return 1;
+ unsigned Tmp0 = Analysis.computeNumSignBits(Src0, DemandedElts, Depth + 1);
+ if (Tmp0 == 1)
+ return 1;
+ return std::min(Tmp0, std::min(Tmp1, Tmp2));
+ }
default:
return 1;
}
@@ -4871,7 +5620,7 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
// TODO: Need is known positive check.
return false;
}
- case AMDGPUISD::LDEXP:
+ case ISD::FLDEXP:
case AMDGPUISD::FRACT: {
if (SNaN)
return true;
@@ -4936,6 +5685,11 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
}
}
+bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
+ Register N0, Register N1) const {
+ return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
+}
+
TargetLowering::AtomicExpansionKind
AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
switch (RMW->getOperation()) {
@@ -4962,3 +5716,22 @@ bool AMDGPUTargetLowering::isConstantUnsignedBitfieldExtractLegal(
return (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64)) &&
Ty2 == LLT::scalar(32);
}
+
+/// Whether it is profitable to sink the operands of an
+/// Instruction I to the basic block of I.
+/// This helps using several modifiers (like abs and neg) more often.
+bool AMDGPUTargetLowering::shouldSinkOperands(
+ Instruction *I, SmallVectorImpl<Use *> &Ops) const {
+ using namespace PatternMatch;
+
+ for (auto &Op : I->operands()) {
+ // Ensure we are not already sinking this operand.
+ if (any_of(Ops, [&](Use *U) { return U->get() == Op.get(); }))
+ continue;
+
+ if (match(&Op, m_FAbs(m_Value())) || match(&Op, m_FNeg(m_Value())))
+ Ops.push_back(&Op);
+ }
+
+ return !Ops.empty();
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index bc3b57a82d08..26b91155ba85 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -60,8 +60,23 @@ protected:
SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFLOG(SDValue Op, SelectionDAG &DAG,
- double Log2BaseInverted) const;
+
+ SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op,
+ SDNodeFlags Flags) const;
+ SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const;
+ std::pair<SDValue, SDValue> getScaledLogInput(SelectionDAG &DAG,
+ const SDLoc SL, SDValue Op,
+ SDNodeFlags Flags) const;
+
+ SDValue LowerFLOG2(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
+ double Log2BaseInverted, SDNodeFlags Flags) const;
+ SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
+ SDNodeFlags Flags) const;
SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const;
@@ -97,9 +112,16 @@ protected:
SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
SDValue RHS, DAGCombinerInfo &DCI) const;
+
+ SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
+ SDValue N) const;
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ TargetLowering::NegatibleCost
+ getConstantNegateCost(const ConstantFPSDNode *C) const;
+
bool isConstantCostlierToNegate(SDValue N) const;
+ bool isConstantCheaperToNegate(SDValue N) const;
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -156,6 +178,7 @@ public:
return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
}
+ static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc);
static bool allUsesHaveSourceMods(const SDNode *N,
unsigned CostThreshold = 4);
bool isFAbsFree(EVT VT) const override;
@@ -165,14 +188,13 @@ public:
bool isZExtFree(Type *Src, Type *Dest) const override;
bool isZExtFree(EVT Src, EVT Dest) const override;
- bool isZExtFree(SDValue Val, EVT VT2) const override;
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
bool LegalOperations, bool ForCodeSize,
NegatibleCost &Cost,
unsigned Depth) const override;
- bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
+ bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override;
bool isDesirableToCommuteWithShift(const SDNode *N,
CombineLevel Level) const override;
@@ -193,7 +215,7 @@ public:
bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG,
const MachineMemOperand &MMO) const final;
- bool storeOfVectorConstantIsCheap(EVT MemVT,
+ bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,
unsigned NumElem,
unsigned AS) const override;
bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override;
@@ -229,6 +251,10 @@ public:
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
+ SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS,
+ SDValue RHS, SDValue True, SDValue False,
+ SDValue CC, DAGCombinerInfo &DCI) const;
+
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
SDValue CC, DAGCombinerInfo &DCI) const;
@@ -281,6 +307,9 @@ public:
bool SNaN = false,
unsigned Depth = 0) const override;
+ bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
+ Register N1) const override;
+
/// Helper function that adds Reg to the LiveIn list of the DAG's
/// MachineFunction.
///
@@ -333,6 +362,8 @@ public:
/// type of implicit parameter.
uint32_t getImplicitParameterOffset(const MachineFunction &MF,
const ImplicitParameter Param) const;
+ uint32_t getImplicitParameterOffset(const uint64_t ExplicitKernArgSize,
+ const ImplicitParameter Param) const;
MVT getFenceOperandTy(const DataLayout &DL) const override {
return MVT::i32;
@@ -342,6 +373,9 @@ public:
bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1,
LLT Ty2) const override;
+
+ bool shouldSinkOperands(Instruction *I,
+ SmallVectorImpl<Use *> &Ops) const override;
};
namespace AMDGPUISD {
@@ -356,6 +390,7 @@ enum NodeType : unsigned {
// Function call.
CALL,
TC_RETURN,
+ TC_RETURN_GFX,
TRAP,
// Masked control flow nodes.
@@ -366,11 +401,14 @@ enum NodeType : unsigned {
// A uniform kernel return that terminates the wavefront.
ENDPGM,
+ // s_endpgm, but we may want to insert it in the middle of the block.
+ ENDPGM_TRAP,
+
// Return to a shader part's epilog code.
RETURN_TO_EPILOG,
// Return with values from a non-entry function.
- RET_FLAG,
+ RET_GLUE,
DWORDADDR,
FRACT,
@@ -421,9 +459,15 @@ enum NodeType : unsigned {
RSQ,
RCP_LEGACY,
RCP_IFLAG,
+
+ // log2, no denormal handling for f32.
+ LOG,
+
+ // exp2, no denormal handling for f32.
+ EXP,
+
FMUL_LEGACY,
RSQ_CLAMP,
- LDEXP,
FP_CLASS,
DOT4,
CARRY,
@@ -505,8 +549,6 @@ enum NodeType : unsigned {
TBUFFER_LOAD_FORMAT_D16,
DS_ORDERED_COUNT,
ATOMIC_CMP_SWAP,
- ATOMIC_INC,
- ATOMIC_DEC,
ATOMIC_LOAD_FMIN,
ATOMIC_LOAD_FMAX,
BUFFER_LOAD,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
index c9cdbc89f3a4..7619a39bac9c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertDelayAlu.cpp
@@ -51,7 +51,7 @@ public:
MI.getOpcode() == AMDGPU::S_SENDMSG_RTN_B64)
return true;
if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- (MI.getOperand(0).getImm() & 0xf000) == 0)
+ AMDGPU::DepCtr::decodeFieldVaVdst(MI.getOperand(0).getImm()) == 0)
return true;
return false;
}
@@ -77,11 +77,15 @@ public:
struct DelayInfo {
// One larger than the maximum number of (non-TRANS) VALU instructions we
// can encode in an s_delay_alu instruction.
- static const unsigned VALU_MAX = 5;
+ static constexpr unsigned VALU_MAX = 5;
// One larger than the maximum number of TRANS instructions we can encode in
// an s_delay_alu instruction.
- static const unsigned TRANS_MAX = 4;
+ static constexpr unsigned TRANS_MAX = 4;
+
+ // One larger than the maximum number of SALU cycles we can encode in an
+ // s_delay_alu instruction.
+ static constexpr unsigned SALU_CYCLES_MAX = 4;
// If it was written by a (non-TRANS) VALU, remember how many clock cycles
// are left until it completes, and how many other (non-TRANS) VALU we have
@@ -120,7 +124,9 @@ public:
TRANSNumVALU = 0;
break;
case SALU:
- SALUCycles = Cycles;
+ // Guard against pseudo-instructions like SI_CALL which are marked as
+ // SALU but with a very high latency.
+ SALUCycles = std::min(Cycles, SALU_CYCLES_MAX);
break;
}
}
@@ -278,6 +284,7 @@ public:
// Wait for an SALU instruction.
if (Delay.SALUCycles) {
+ assert(Delay.SALUCycles < DelayInfo::SALU_CYCLES_MAX);
if (Imm & 0x780) {
// We have already encoded a VALU and a TRANS delay. There's no room in
// the encoding for an SALU delay as well, so just drop it.
@@ -349,6 +356,7 @@ public:
if (instructionWaitsForVALU(MI)) {
// Forget about all outstanding VALU delays.
+ // TODO: This is overkill since it also forgets about SALU delays.
State = DelayState();
} else if (Type != OTHER) {
DelayInfo Delay;
@@ -360,11 +368,11 @@ public:
// ignore this operand.
if (MI.getOpcode() == AMDGPU::V_WRITELANE_B32 && Op.isTied())
continue;
- for (MCRegUnitIterator UI(Op.getReg(), TRI); UI.isValid(); ++UI) {
- auto It = State.find(*UI);
+ for (MCRegUnit Unit : TRI->regunits(Op.getReg())) {
+ auto It = State.find(Unit);
if (It != State.end()) {
Delay.merge(It->second);
- State.erase(*UI);
+ State.erase(Unit);
}
}
}
@@ -380,9 +388,9 @@ public:
// TODO: Scan implicit defs too?
for (const auto &Op : MI.defs()) {
unsigned Latency = SchedModel.computeOperandLatency(
- &MI, MI.getOperandNo(&Op), nullptr, 0);
- for (MCRegUnitIterator UI(Op.getReg(), TRI); UI.isValid(); ++UI)
- State[*UI] = DelayInfo(Type, Latency);
+ &MI, Op.getOperandNo(), nullptr, 0);
+ for (MCRegUnit Unit : TRI->regunits(Op.getReg()))
+ State[Unit] = DelayInfo(Type, Latency);
}
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 62c3eec41836..3c399e497227 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -23,6 +23,7 @@
#include <optional>
using namespace llvm;
+using namespace llvm::PatternMatch;
#define DEBUG_TYPE "AMDGPUtti"
@@ -328,7 +329,8 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
});
}
-bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
+bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Instruction &I,
+ const Value *Op0, const Value *Op1,
InstCombiner &IC) const {
// The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
// infinity, gives +0.0. If we can prove we don't have one of the special
@@ -340,15 +342,72 @@ bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
// One operand is not zero or infinity or NaN.
return true;
}
+
auto *TLI = &IC.getTargetLibraryInfo();
- if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
- isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
+ if (isKnownNeverInfOrNaN(Op0, IC.getDataLayout(), TLI, 0,
+ &IC.getAssumptionCache(), &I,
+ &IC.getDominatorTree()) &&
+ isKnownNeverInfOrNaN(Op1, IC.getDataLayout(), TLI, 0,
+ &IC.getAssumptionCache(), &I,
+ &IC.getDominatorTree())) {
// Neither operand is infinity or NaN.
return true;
}
return false;
}
+/// Match an fpext from half to float, or a constant we can convert.
+static bool matchFPExtFromF16(Value *Arg, Value *&FPExtSrc) {
+ if (match(Arg, m_OneUse(m_FPExt(m_Value(FPExtSrc)))))
+ return FPExtSrc->getType()->isHalfTy();
+
+ ConstantFP *CFP;
+ if (match(Arg, m_ConstantFP(CFP))) {
+ bool LosesInfo;
+ APFloat Val(CFP->getValueAPF());
+ Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &LosesInfo);
+ if (LosesInfo)
+ return false;
+
+ FPExtSrc = ConstantFP::get(Type::getHalfTy(Arg->getContext()), Val);
+ return true;
+ }
+
+ return false;
+}
+
+// Trim all zero components from the end of the vector \p UseV and return
+// an appropriate bitset with known elements.
+static APInt trimTrailingZerosInVector(InstCombiner &IC, Value *UseV,
+ Instruction *I) {
+ auto *VTy = cast<FixedVectorType>(UseV->getType());
+ unsigned VWidth = VTy->getNumElements();
+ APInt DemandedElts = APInt::getAllOnes(VWidth);
+
+ for (int i = VWidth - 1; i > 0; --i) {
+ auto *Elt = findScalarElement(UseV, i);
+ if (!Elt)
+ break;
+
+ if (auto *ConstElt = dyn_cast<Constant>(Elt)) {
+ if (!ConstElt->isNullValue() && !isa<UndefValue>(Elt))
+ break;
+ } else {
+ break;
+ }
+
+ DemandedElts.clearBit(i);
+ }
+
+ return DemandedElts;
+}
+
+static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
+ IntrinsicInst &II,
+ APInt DemandedElts,
+ int DMaskIdx = -1,
+ bool IsLoad = true);
+
std::optional<Instruction *>
GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
Intrinsic::ID IID = II.getIntrinsicID();
@@ -393,6 +452,54 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
+ case Intrinsic::amdgcn_log:
+ case Intrinsic::amdgcn_exp2: {
+ const bool IsLog = IID == Intrinsic::amdgcn_log;
+ const bool IsExp = IID == Intrinsic::amdgcn_exp2;
+ Value *Src = II.getArgOperand(0);
+ Type *Ty = II.getType();
+
+ if (isa<PoisonValue>(Src))
+ return IC.replaceInstUsesWith(II, Src);
+
+ if (IC.getSimplifyQuery().isUndefValue(Src))
+ return IC.replaceInstUsesWith(II, ConstantFP::getNaN(Ty));
+
+ if (ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
+ if (C->isInfinity()) {
+ // exp2(+inf) -> +inf
+ // log2(+inf) -> +inf
+ if (!C->isNegative())
+ return IC.replaceInstUsesWith(II, C);
+
+ // exp2(-inf) -> 0
+ if (IsExp && C->isNegative())
+ return IC.replaceInstUsesWith(II, ConstantFP::getZero(Ty));
+ }
+
+ if (II.isStrictFP())
+ break;
+
+ if (C->isNaN()) {
+ Constant *Quieted = ConstantFP::get(Ty, C->getValue().makeQuiet());
+ return IC.replaceInstUsesWith(II, Quieted);
+ }
+
+ // f32 instruction doesn't handle denormals, f16 does.
+ if (C->isZero() || (C->getValue().isDenormal() && Ty->isFloatTy())) {
+ Constant *FoldedValue = IsLog ? ConstantFP::getInfinity(Ty, true)
+ : ConstantFP::get(Ty, 1.0);
+ return IC.replaceInstUsesWith(II, FoldedValue);
+ }
+
+ if (IsLog && C->isNegative())
+ return IC.replaceInstUsesWith(II, ConstantFP::getNaN(Ty));
+
+ // TODO: Full constant folding matching hardware behavior.
+ }
+
+ break;
+ }
case Intrinsic::amdgcn_frexp_mant:
case Intrinsic::amdgcn_frexp_exp: {
Value *Src = II.getArgOperand(0);
@@ -423,85 +530,31 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
Value *Src0 = II.getArgOperand(0);
Value *Src1 = II.getArgOperand(1);
const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
- if (!CMask) {
- if (isa<UndefValue>(Src0)) {
- return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
- }
+ if (CMask) {
+ II.setCalledOperand(Intrinsic::getDeclaration(
+ II.getModule(), Intrinsic::is_fpclass, Src0->getType()));
- if (isa<UndefValue>(Src1)) {
- return IC.replaceInstUsesWith(II,
- ConstantInt::get(II.getType(), false));
- }
- break;
+ // Clamp any excess bits, as they're illegal for the generic intrinsic.
+ II.setArgOperand(1, ConstantInt::get(Src1->getType(),
+ CMask->getZExtValue() & fcAllFlags));
+ return &II;
}
- uint32_t Mask = CMask->getZExtValue();
+ // Propagate poison.
+ if (isa<PoisonValue>(Src0) || isa<PoisonValue>(Src1))
+ return IC.replaceInstUsesWith(II, PoisonValue::get(II.getType()));
- // If all tests are made, it doesn't matter what the value is.
- if ((Mask & fcAllFlags) == fcAllFlags) {
- return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
- }
-
- if ((Mask & fcAllFlags) == 0) {
+ // llvm.amdgcn.class(_, undef) -> false
+ if (IC.getSimplifyQuery().isUndefValue(Src1))
return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
- }
-
- if (Mask == fcNan && !II.isStrictFP()) {
- // Equivalent of isnan. Replace with standard fcmp.
- Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0);
- FCmp->takeName(&II);
- return IC.replaceInstUsesWith(II, FCmp);
- }
-
- if (Mask == fcZero && !II.isStrictFP()) {
- // Equivalent of == 0.
- Value *FCmp =
- IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0));
- FCmp->takeName(&II);
- return IC.replaceInstUsesWith(II, FCmp);
+ // llvm.amdgcn.class(undef, mask) -> mask != 0
+ if (IC.getSimplifyQuery().isUndefValue(Src0)) {
+ Value *CmpMask = IC.Builder.CreateICmpNE(
+ Src1, ConstantInt::getNullValue(Src1->getType()));
+ return IC.replaceInstUsesWith(II, CmpMask);
}
-
- // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
- if ((Mask & fcNan) && isKnownNeverNaN(Src0, &IC.getTargetLibraryInfo())) {
- return IC.replaceOperand(
- II, 1, ConstantInt::get(Src1->getType(), Mask & ~fcNan));
- }
-
- const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
- if (!CVal) {
- if (isa<UndefValue>(Src0)) {
- return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
- }
-
- // Clamp mask to used bits
- if ((Mask & fcAllFlags) != Mask) {
- CallInst *NewCall = IC.Builder.CreateCall(
- II.getCalledFunction(),
- {Src0, ConstantInt::get(Src1->getType(), Mask & fcAllFlags)});
-
- NewCall->takeName(&II);
- return IC.replaceInstUsesWith(II, NewCall);
- }
-
- break;
- }
-
- const APFloat &Val = CVal->getValueAPF();
-
- bool Result =
- ((Mask & fcSNan) && Val.isNaN() && Val.isSignaling()) ||
- ((Mask & fcQNan) && Val.isNaN() && !Val.isSignaling()) ||
- ((Mask & fcNegInf) && Val.isInfinity() && Val.isNegative()) ||
- ((Mask & fcNegNormal) && Val.isNormal() && Val.isNegative()) ||
- ((Mask & fcNegSubnormal) && Val.isDenormal() && Val.isNegative()) ||
- ((Mask & fcNegZero) && Val.isZero() && Val.isNegative()) ||
- ((Mask & fcPosZero) && Val.isZero() && !Val.isNegative()) ||
- ((Mask & fcPosSubnormal) && Val.isDenormal() && !Val.isNegative()) ||
- ((Mask & fcPosNormal) && Val.isNormal() && !Val.isNegative()) ||
- ((Mask & fcPosInf) && Val.isInfinity() && !Val.isNegative());
-
- return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
+ break;
}
case Intrinsic::amdgcn_cvt_pkrtz: {
Value *Src0 = II.getArgOperand(0);
@@ -695,6 +748,20 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
}
}
+ if (!ST->hasMed3_16())
+ break;
+
+ Value *X, *Y, *Z;
+
+ // Repeat floating-point width reduction done for minnum/maxnum.
+ // fmed3((fpext X), (fpext Y), (fpext Z)) -> fpext (fmed3(X, Y, Z))
+ if (matchFPExtFromF16(Src0, X) && matchFPExtFromF16(Src1, Y) &&
+ matchFPExtFromF16(Src2, Z)) {
+ Value *NewCall = IC.Builder.CreateIntrinsic(IID, {X->getType()},
+ {X, Y, Z}, &II, II.getName());
+ return new FPExtInst(NewCall, II.getType());
+ }
+
break;
}
case Intrinsic::amdgcn_icmp:
@@ -835,31 +902,18 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
break;
}
+ case Intrinsic::amdgcn_mbcnt_hi: {
+ // exec_hi is all 0, so this is just a copy.
+ if (ST->isWave32())
+ return IC.replaceInstUsesWith(II, II.getArgOperand(1));
+ break;
+ }
case Intrinsic::amdgcn_ballot: {
if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
if (Src->isZero()) {
// amdgcn.ballot(i1 0) is zero.
return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
}
-
- if (Src->isOne()) {
- // amdgcn.ballot(i1 1) is exec.
- const char *RegName = "exec";
- if (II.getType()->isIntegerTy(32))
- RegName = "exec_lo";
- else if (!II.getType()->isIntegerTy(64))
- break;
-
- Function *NewF = Intrinsic::getDeclaration(
- II.getModule(), Intrinsic::read_register, II.getType());
- Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)};
- MDNode *MD = MDNode::get(II.getContext(), MDArgs);
- Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
- CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
- NewCall->addFnAttr(Attribute::Convergent);
- NewCall->takeName(&II);
- return IC.replaceInstUsesWith(II, NewCall);
- }
}
break;
}
@@ -981,13 +1035,8 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
if (II.isStrictFP())
break;
- if (C && C->isNaN()) {
- // FIXME: We just need to make the nan quiet here, but that's unavailable
- // on APFloat, only IEEEfloat
- auto *Quieted =
- ConstantFP::get(Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven));
- return IC.replaceInstUsesWith(II, Quieted);
- }
+ if (C && C->isNaN())
+ return IC.replaceInstUsesWith(II, ConstantFP::get(Ty, C->makeQuiet()));
// ldexp(x, 0) -> x
// ldexp(x, undef) -> x
@@ -1006,11 +1055,11 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
// TODO: Move to InstSimplify?
if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
match(Op1, PatternMatch::m_AnyZeroFP()))
- return IC.replaceInstUsesWith(II, ConstantFP::getNullValue(II.getType()));
+ return IC.replaceInstUsesWith(II, ConstantFP::getZero(II.getType()));
// If we can prove we don't have one of the special cases then we can use a
// normal fmul instruction instead.
- if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
+ if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) {
auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
FMul->takeName(&II);
return IC.replaceInstUsesWith(II, FMul);
@@ -1029,7 +1078,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
match(Op1, PatternMatch::m_AnyZeroFP())) {
// It's tempting to just return Op2 here, but that would give the wrong
// result if Op2 was -0.0.
- auto *Zero = ConstantFP::getNullValue(II.getType());
+ auto *Zero = ConstantFP::getZero(II.getType());
auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
FAdd->takeName(&II);
return IC.replaceInstUsesWith(II, FAdd);
@@ -1037,7 +1086,7 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
// If we can prove we don't have one of the special cases then we can use a
// normal fma instead.
- if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
+ if (canSimplifyLegacyMulToMul(II, Op0, Op1, IC)) {
II.setCalledOperand(Intrinsic::getDeclaration(
II.getModule(), Intrinsic::fma, II.getType()));
return &II;
@@ -1053,26 +1102,62 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType()));
break;
}
- default: {
- if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
- AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
- return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
+ case Intrinsic::amdgcn_buffer_store_format:
+ case Intrinsic::amdgcn_raw_buffer_store_format:
+ case Intrinsic::amdgcn_struct_buffer_store_format:
+ case Intrinsic::amdgcn_raw_tbuffer_store:
+ case Intrinsic::amdgcn_struct_tbuffer_store:
+ case Intrinsic::amdgcn_tbuffer_store:
+ case Intrinsic::amdgcn_image_store_1d:
+ case Intrinsic::amdgcn_image_store_1darray:
+ case Intrinsic::amdgcn_image_store_2d:
+ case Intrinsic::amdgcn_image_store_2darray:
+ case Intrinsic::amdgcn_image_store_2darraymsaa:
+ case Intrinsic::amdgcn_image_store_2dmsaa:
+ case Intrinsic::amdgcn_image_store_3d:
+ case Intrinsic::amdgcn_image_store_cube:
+ case Intrinsic::amdgcn_image_store_mip_1d:
+ case Intrinsic::amdgcn_image_store_mip_1darray:
+ case Intrinsic::amdgcn_image_store_mip_2d:
+ case Intrinsic::amdgcn_image_store_mip_2darray:
+ case Intrinsic::amdgcn_image_store_mip_3d:
+ case Intrinsic::amdgcn_image_store_mip_cube: {
+ if (!isa<FixedVectorType>(II.getArgOperand(0)->getType()))
+ break;
+
+ APInt DemandedElts =
+ trimTrailingZerosInVector(IC, II.getArgOperand(0), &II);
+
+ int DMaskIdx = getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID()) ? 1 : -1;
+ if (simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, DMaskIdx,
+ false)) {
+ return IC.eraseInstFromFunction(II);
}
+
+ break;
}
}
+ if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
+ AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
+ return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
+ }
return std::nullopt;
}
/// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
///
+/// The result of simplifying amdgcn image and buffer store intrinsics is updating
+/// definitions of the intrinsics vector argument, not Uses of the result like
+/// image and buffer loads.
/// Note: This only supports non-TFE/LWE image intrinsic calls; those have
/// struct returns.
static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
IntrinsicInst &II,
APInt DemandedElts,
- int DMaskIdx = -1) {
+ int DMaskIdx, bool IsLoad) {
- auto *IIVTy = cast<FixedVectorType>(II.getType());
+ auto *IIVTy = cast<FixedVectorType>(IsLoad ? II.getType()
+ : II.getOperand(0)->getType());
unsigned VWidth = IIVTy->getNumElements();
if (VWidth == 1)
return nullptr;
@@ -1088,7 +1173,7 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
// Buffer case.
const unsigned ActiveBits = DemandedElts.getActiveBits();
- const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
+ const unsigned UnusedComponentsAtFront = DemandedElts.countr_zero();
// Start assuming the prefix of elements is demanded, but possibly clear
// some other bits if there are trailing zeros (unused components at front)
@@ -1101,6 +1186,7 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
unsigned OffsetIdx;
switch (II.getIntrinsicID()) {
case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_raw_ptr_buffer_load:
OffsetIdx = 1;
break;
case Intrinsic::amdgcn_s_buffer_load:
@@ -1113,6 +1199,7 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
OffsetIdx = 1;
break;
case Intrinsic::amdgcn_struct_buffer_load:
+ case Intrinsic::amdgcn_struct_ptr_buffer_load:
OffsetIdx = 2;
break;
default:
@@ -1143,13 +1230,13 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
DemandedElts &= (1 << llvm::popcount(DMaskVal)) - 1;
unsigned NewDMaskVal = 0;
- unsigned OrigLoadIdx = 0;
+ unsigned OrigLdStIdx = 0;
for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
const unsigned Bit = 1 << SrcIdx;
if (!!(DMaskVal & Bit)) {
- if (!!DemandedElts[OrigLoadIdx])
+ if (!!DemandedElts[OrigLdStIdx])
NewDMaskVal |= Bit;
- OrigLoadIdx++;
+ OrigLdStIdx++;
}
}
@@ -1157,7 +1244,7 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
}
- unsigned NewNumElts = DemandedElts.countPopulation();
+ unsigned NewNumElts = DemandedElts.popcount();
if (!NewNumElts)
return UndefValue::get(IIVTy);
@@ -1177,29 +1264,45 @@ static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
(NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
OverloadTys[0] = NewTy;
+ if (!IsLoad) {
+ SmallVector<int, 8> EltMask;
+ for (unsigned OrigStoreIdx = 0; OrigStoreIdx < VWidth; ++OrigStoreIdx)
+ if (DemandedElts[OrigStoreIdx])
+ EltMask.push_back(OrigStoreIdx);
+
+ if (NewNumElts == 1)
+ Args[0] = IC.Builder.CreateExtractElement(II.getOperand(0), EltMask[0]);
+ else
+ Args[0] = IC.Builder.CreateShuffleVector(II.getOperand(0), EltMask);
+ }
+
Function *NewIntrin = Intrinsic::getDeclaration(
II.getModule(), II.getIntrinsicID(), OverloadTys);
CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
NewCall->takeName(&II);
NewCall->copyMetadata(II);
- if (NewNumElts == 1) {
- return IC.Builder.CreateInsertElement(UndefValue::get(IIVTy), NewCall,
- DemandedElts.countTrailingZeros());
- }
+ if (IsLoad) {
+ if (NewNumElts == 1) {
+ return IC.Builder.CreateInsertElement(UndefValue::get(IIVTy), NewCall,
+ DemandedElts.countr_zero());
+ }
- SmallVector<int, 8> EltMask;
- unsigned NewLoadIdx = 0;
- for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
- if (!!DemandedElts[OrigLoadIdx])
- EltMask.push_back(NewLoadIdx++);
- else
- EltMask.push_back(NewNumElts);
- }
+ SmallVector<int, 8> EltMask;
+ unsigned NewLoadIdx = 0;
+ for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
+ if (!!DemandedElts[OrigLoadIdx])
+ EltMask.push_back(NewLoadIdx++);
+ else
+ EltMask.push_back(NewNumElts);
+ }
+
+ auto *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
- Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
+ return Shuffle;
+ }
- return Shuffle;
+ return NewCall;
}
std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
@@ -1211,12 +1314,18 @@ std::optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
case Intrinsic::amdgcn_buffer_load:
case Intrinsic::amdgcn_buffer_load_format:
case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_raw_ptr_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format:
+ case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
case Intrinsic::amdgcn_raw_tbuffer_load:
+ case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
case Intrinsic::amdgcn_s_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load:
+ case Intrinsic::amdgcn_struct_ptr_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load_format:
+ case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
case Intrinsic::amdgcn_struct_tbuffer_load:
+ case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
case Intrinsic::amdgcn_tbuffer_load:
return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
default: {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 15b7f971f09c..b69cae0c73b3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -18,10 +18,6 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
]>;
-def AMDGPULdExpOp : SDTypeProfile<1, 2,
- [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>]
->;
-
def AMDGPUFPClassOp : SDTypeProfile<1, 2,
[SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>]
>;
@@ -43,6 +39,7 @@ def AMDGPUFmasOp : SDTypeProfile<1, 4,
[SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]
>;
+def ImmOp : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def AMDGPUIfOp : SDTypeProfile<1, 2,
@@ -85,9 +82,16 @@ def AMDGPUcall : SDNode<"AMDGPUISD::CALL",
SDNPVariadic]
>;
-def AMDGPUtc_return: SDNode<"AMDGPUISD::TC_RETURN",
- SDTypeProfile<0, 3, [SDTCisPtrTy<0>]>,
- [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
+def AMDGPUTCReturnTP : SDTypeProfile<0, 3, [
+ SDTCisPtrTy<0>
+]>;
+
+def AMDGPUtc_return: SDNode<"AMDGPUISD::TC_RETURN", AMDGPUTCReturnTP,
+[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
+>;
+
+def AMDGPUtc_return_gfx: SDNode<"AMDGPUISD::TC_RETURN_GFX", AMDGPUTCReturnTP,
+[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;
def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP",
@@ -111,6 +115,12 @@ def AMDGPUfract_impl : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
// out = 1.0 / a
def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
+// v_log_f32, which is log2
+def AMDGPUlog_impl : SDNode<"AMDGPUISD::LOG", SDTFPUnaryOp>;
+
+// v_exp_f32, which is exp2
+def AMDGPUexp_impl : SDNode<"AMDGPUISD::EXP", SDTFPUnaryOp>;
+
// out = 1.0 / sqrt(a)
def AMDGPUrsq_impl : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
@@ -121,8 +131,6 @@ def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>;
// out = 1.0 / sqrt(a) result clamped to +/- max_float.
def AMDGPUrsq_clamp_impl : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>;
-def AMDGPUldexp_impl : SDNode<"AMDGPUISD::LDEXP", AMDGPULdExpOp>;
-
def AMDGPUpkrtz_f16_f32_impl : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>;
def AMDGPUpknorm_i16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>;
def AMDGPUpknorm_u16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>;
@@ -351,11 +359,13 @@ def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChai
//===----------------------------------------------------------------------===//
def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
+def AMDGPUendpgm_trap : SDNode<"AMDGPUISD::ENDPGM_TRAP", SDTNone,
+ [SDNPHasChain]>;
def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def AMDGPUret_flag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
+def AMDGPUret_glue : SDNode<"AMDGPUISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]
>;
@@ -381,10 +391,15 @@ def AMDGPUcos : PatFrags<(ops node:$src), [(int_amdgcn_cos node:$src),
(AMDGPUcos_impl node:$src)]>;
def AMDGPUfract : PatFrags<(ops node:$src), [(int_amdgcn_fract node:$src),
(AMDGPUfract_impl node:$src)]>;
-
-def AMDGPUldexp : PatFrags<(ops node:$src0, node:$src1),
- [(int_amdgcn_ldexp node:$src0, node:$src1),
- (AMDGPUldexp_impl node:$src0, node:$src1)]>;
+def AMDGPUlog : PatFrags<(ops node:$src), [(int_amdgcn_log node:$src),
+ (AMDGPUlog_impl node:$src)]>;
+def AMDGPUlogf16 : PatFrags<(ops node:$src), [(int_amdgcn_log node:$src),
+ (flog2 node:$src)]>;
+
+def AMDGPUexp : PatFrags<(ops node:$src), [(int_amdgcn_exp2 node:$src),
+ (AMDGPUexp_impl node:$src)]>;
+def AMDGPUexpf16 : PatFrags<(ops node:$src), [(int_amdgcn_exp2 node:$src),
+ (fexp2 node:$src)]>;
def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1),
[(int_amdgcn_class node:$src0, node:$src1),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 2639f1f45565..747f9fe2f8ae 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -19,8 +19,8 @@
#include "AMDGPUTargetMachine.h"
#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -63,7 +63,7 @@ AMDGPUInstructionSelector::AMDGPUInstructionSelector(
const char *AMDGPUInstructionSelector::getName() { return DEBUG_TYPE; }
void AMDGPUInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB,
- CodeGenCoverage &CoverageInfo,
+ CodeGenCoverage *CoverageInfo,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) {
MRI = &MF.getRegInfo();
@@ -523,60 +523,6 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT(MachineInstr &I) const {
return true;
}
-bool AMDGPUInstructionSelector::selectG_FMA_FMAD(MachineInstr &I) const {
- assert(I.getOpcode() == AMDGPU::G_FMA || I.getOpcode() == AMDGPU::G_FMAD);
-
- // Try to manually select MAD_MIX/FMA_MIX.
- Register Dst = I.getOperand(0).getReg();
- LLT ResultTy = MRI->getType(Dst);
- bool IsFMA = I.getOpcode() == AMDGPU::G_FMA;
- if (ResultTy != LLT::scalar(32) ||
- (IsFMA ? !Subtarget->hasFmaMixInsts() : !Subtarget->hasMadMixInsts()))
- return false;
-
- // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
- // using the conversion from f16.
- bool MatchedSrc0, MatchedSrc1, MatchedSrc2;
- auto [Src0, Src0Mods] =
- selectVOP3PMadMixModsImpl(I.getOperand(1), MatchedSrc0);
- auto [Src1, Src1Mods] =
- selectVOP3PMadMixModsImpl(I.getOperand(2), MatchedSrc1);
- auto [Src2, Src2Mods] =
- selectVOP3PMadMixModsImpl(I.getOperand(3), MatchedSrc2);
-
-#ifndef NDEBUG
- const SIMachineFunctionInfo *MFI =
- I.getMF()->getInfo<SIMachineFunctionInfo>();
- AMDGPU::SIModeRegisterDefaults Mode = MFI->getMode();
- assert((IsFMA || !Mode.allFP32Denormals()) &&
- "fmad selected with denormals enabled");
-#endif
-
- // TODO: We can select this with f32 denormals enabled if all the sources are
- // converted from f16 (in which case fmad isn't legal).
- if (!MatchedSrc0 && !MatchedSrc1 && !MatchedSrc2)
- return false;
-
- const unsigned OpC = IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32;
- MachineInstr *MixInst =
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpC), Dst)
- .addImm(Src0Mods)
- .addReg(copyToVGPRIfSrcFolded(Src0, Src0Mods, I.getOperand(1), &I))
- .addImm(Src1Mods)
- .addReg(copyToVGPRIfSrcFolded(Src1, Src1Mods, I.getOperand(2), &I))
- .addImm(Src2Mods)
- .addReg(copyToVGPRIfSrcFolded(Src2, Src2Mods, I.getOperand(3), &I))
- .addImm(0)
- .addImm(0)
- .addImm(0);
-
- if (!constrainSelectedInstRegOperands(*MixInst, TII, TRI, RBI))
- return false;
-
- I.eraseFromParent();
- return true;
-}
-
bool AMDGPUInstructionSelector::selectG_MERGE_VALUES(MachineInstr &MI) const {
MachineBasicBlock *BB = MI.getParent();
Register DstReg = MI.getOperand(0).getReg();
@@ -1100,6 +1046,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
return selectIntrinsicCmp(I);
case Intrinsic::amdgcn_ballot:
return selectBallot(I);
+ case Intrinsic::amdgcn_inverse_ballot:
+ return selectInverseBallot(I);
case Intrinsic::amdgcn_reloc_constant:
return selectRelocConstant(I);
case Intrinsic::amdgcn_groupstaticsize:
@@ -1343,27 +1291,26 @@ bool AMDGPUInstructionSelector::selectIntrinsicCmp(MachineInstr &I) const {
if (Opcode == -1)
return false;
- MachineInstr *SelectedMI;
- if (CmpInst::isFPPredicate(Pred)) {
- MachineOperand &LHS = I.getOperand(2);
- MachineOperand &RHS = I.getOperand(3);
- auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS);
- auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS);
- Register Src0Reg =
- copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &I, /*ForceVGPR*/ true);
- Register Src1Reg =
- copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &I, /*ForceVGPR*/ true);
- SelectedMI = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst)
- .addImm(Src0Mods)
- .addReg(Src0Reg)
- .addImm(Src1Mods)
- .addReg(Src1Reg)
- .addImm(0); // clamp
- } else {
- SelectedMI = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst)
- .add(I.getOperand(2))
- .add(I.getOperand(3));
- }
+ MachineInstrBuilder SelectedMI;
+ MachineOperand &LHS = I.getOperand(2);
+ MachineOperand &RHS = I.getOperand(3);
+ auto [Src0, Src0Mods] = selectVOP3ModsImpl(LHS);
+ auto [Src1, Src1Mods] = selectVOP3ModsImpl(RHS);
+ Register Src0Reg =
+ copyToVGPRIfSrcFolded(Src0, Src0Mods, LHS, &I, /*ForceVGPR*/ true);
+ Register Src1Reg =
+ copyToVGPRIfSrcFolded(Src1, Src1Mods, RHS, &I, /*ForceVGPR*/ true);
+ SelectedMI = BuildMI(*BB, &I, DL, TII.get(Opcode), Dst);
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src0_modifiers))
+ SelectedMI.addImm(Src0Mods);
+ SelectedMI.addReg(Src0Reg);
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::src1_modifiers))
+ SelectedMI.addImm(Src1Mods);
+ SelectedMI.addReg(Src1Reg);
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::clamp))
+ SelectedMI.addImm(0); // clamp
+ if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::op_sel))
+ SelectedMI.addImm(0); // op_sel
RBI.constrainGenericRegister(Dst, *TRI.getBoolRC(), *MRI);
if (!constrainSelectedInstRegOperands(*SelectedMI, TII, TRI, RBI))
@@ -1379,28 +1326,56 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
Register DstReg = I.getOperand(0).getReg();
const unsigned Size = MRI->getType(DstReg).getSizeInBits();
const bool Is64 = Size == 64;
+ const bool IsWave32 = (STI.getWavefrontSize() == 32);
- if (Size != STI.getWavefrontSize())
+ // In the common case, the return type matches the wave size.
+ // However we also support emitting i64 ballots in wave32 mode.
+ if (Size != STI.getWavefrontSize() && (!Is64 || !IsWave32))
return false;
std::optional<ValueAndVReg> Arg =
getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI);
+ const auto BuildCopy = [&](Register SrcReg) {
+ if (Size == STI.getWavefrontSize()) {
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg)
+ .addReg(SrcReg);
+ return;
+ }
+
+ // If emitting a i64 ballot in wave32, fill the upper bits with zeroes.
+ Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_MOV_B32), HiReg).addImm(0);
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::REG_SEQUENCE), DstReg)
+ .addReg(SrcReg)
+ .addImm(AMDGPU::sub0)
+ .addReg(HiReg)
+ .addImm(AMDGPU::sub1);
+ };
+
if (Arg) {
const int64_t Value = Arg->Value.getSExtValue();
if (Value == 0) {
unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg).addImm(0);
- } else if (Value == -1) { // all ones
- Register SrcReg = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(SrcReg);
- } else
+ } else if (Value == -1) // all ones
+ BuildCopy(IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC);
+ else
return false;
- } else {
- Register SrcReg = I.getOperand(2).getReg();
- BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(SrcReg);
- }
+ } else
+ BuildCopy(I.getOperand(2).getReg());
+
+ I.eraseFromParent();
+ return true;
+}
+
+bool AMDGPUInstructionSelector::selectInverseBallot(MachineInstr &I) const {
+ MachineBasicBlock *BB = I.getParent();
+ const DebugLoc &DL = I.getDebugLoc();
+ const Register DstReg = I.getOperand(0).getReg();
+ const Register MaskReg = I.getOperand(2).getReg();
+ BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(MaskReg);
I.eraseFromParent();
return true;
}
@@ -1635,7 +1610,7 @@ bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
.addImm(0);
} else {
std::tie(BaseOffset, ImmOffset) =
- AMDGPU::getBaseWithConstantOffset(*MRI, BaseOffset, KnownBits);
+ AMDGPU::getBaseWithConstantOffset(*MRI, BaseOffset, KB);
if (Readfirstlane) {
// We have the constant offset now, so put the readfirstlane back on the
@@ -1824,7 +1799,7 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
}
// Set G16 opcode
- if (IsG16 && !IsA16) {
+ if (Subtarget->hasG16() && IsG16) {
const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
AMDGPU::getMIMGG16MappingInfo(Intr->BaseOpcode);
assert(G16MappingInfo);
@@ -1859,7 +1834,10 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
// The legalizer preprocessed the intrinsic arguments. If we aren't using
// NSA, these should have been packed into a single value in the first
// address register
- const bool UseNSA = NumVAddrRegs != 1 && NumVAddrDwords == NumVAddrRegs;
+ const bool UseNSA =
+ NumVAddrRegs != 1 &&
+ (STI.hasPartialNSAEncoding() ? NumVAddrDwords >= NumVAddrRegs
+ : NumVAddrDwords == NumVAddrRegs);
if (UseNSA && !STI.hasFeature(AMDGPU::FeatureNSAEncoding)) {
LLVM_DEBUG(dbgs() << "Trying to use NSA on non-NSA target\n");
return false;
@@ -1898,7 +1876,8 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
NumVDataDwords, NumVAddrDwords);
}
- assert(Opcode != -1);
+ if (Opcode == -1)
+ return false;
auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opcode))
.cloneMemRefs(MI);
@@ -2050,7 +2029,9 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
case Intrinsic::amdgcn_s_barrier:
return selectSBarrier(I);
case Intrinsic::amdgcn_raw_buffer_load_lds:
+ case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
case Intrinsic::amdgcn_struct_buffer_load_lds:
+ case Intrinsic::amdgcn_struct_ptr_buffer_load_lds:
return selectBufferLoadLds(I);
case Intrinsic::amdgcn_global_load_lds:
return selectGlobalLoadLds(I);
@@ -2137,7 +2118,7 @@ static int sizeToSubRegIndex(unsigned Size) {
return AMDGPU::sub0;
if (Size > 256)
return -1;
- return sizeToSubRegIndex(PowerOf2Ceil(Size));
+ return sizeToSubRegIndex(llvm::bit_ceil(Size));
}
}
@@ -2801,7 +2782,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
// Try to avoid emitting a bit operation when we only need to touch half of
// the 64-bit pointer.
- APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zext(64);
+ APInt MaskOnes = KB->getKnownOnes(MaskReg).zext(64);
const APInt MaskHi32 = APInt::getHighBitsSet(64, 32);
const APInt MaskLo32 = APInt::getLowBitsSet(64, 32);
@@ -2953,7 +2934,7 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
unsigned SubReg;
std::tie(IdxReg, SubReg) = computeIndirectRegIndex(
- *MRI, TRI, SrcRC, IdxReg, DstTy.getSizeInBits() / 8, *KnownBits);
+ *MRI, TRI, SrcRC, IdxReg, DstTy.getSizeInBits() / 8, *KB);
if (SrcRB->getID() == AMDGPU::SGPRRegBankID) {
if (DstTy.getSizeInBits() != 32 && !Is64)
@@ -3033,8 +3014,8 @@ bool AMDGPUInstructionSelector::selectG_INSERT_VECTOR_ELT(
return false;
unsigned SubReg;
- std::tie(IdxReg, SubReg) = computeIndirectRegIndex(*MRI, TRI, VecRC, IdxReg,
- ValSize / 8, *KnownBits);
+ std::tie(IdxReg, SubReg) =
+ computeIndirectRegIndex(*MRI, TRI, VecRC, IdxReg, ValSize / 8, *KB);
const bool IndexMode = VecRB->getID() == AMDGPU::VGPRRegBankID &&
STI.useVGPRIndexMode();
@@ -3402,11 +3383,6 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
return selectG_FABS(I);
case TargetOpcode::G_EXTRACT:
return selectG_EXTRACT(I);
- case TargetOpcode::G_FMA:
- case TargetOpcode::G_FMAD:
- if (selectG_FMA_FMAD(I))
- return true;
- return selectImpl(I, *CoverageInfo);
case TargetOpcode::G_MERGE_VALUES:
case TargetOpcode::G_CONCAT_VECTORS:
return selectG_MERGE_VALUES(I);
@@ -3446,9 +3422,9 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ATOMICRMW_MAX:
case TargetOpcode::G_ATOMICRMW_UMIN:
case TargetOpcode::G_ATOMICRMW_UMAX:
+ case TargetOpcode::G_ATOMICRMW_UINC_WRAP:
+ case TargetOpcode::G_ATOMICRMW_UDEC_WRAP:
case TargetOpcode::G_ATOMICRMW_FADD:
- case AMDGPU::G_AMDGPU_ATOMIC_INC:
- case AMDGPU::G_AMDGPU_ATOMIC_DEC:
case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
case AMDGPU::G_AMDGPU_ATOMIC_FMAX:
return selectG_LOAD_STORE_ATOMICRMW(I);
@@ -3460,7 +3436,11 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_SEXT_INREG:
- if (selectImpl(I, *CoverageInfo))
+ // This is a workaround. For extension from type i1, `selectImpl()` uses
+ // patterns from TD file and generates an illegal VGPR to SGPR COPY as type
+ // i1 can only be hold in a SGPR class.
+ if (MRI->getType(I.getOperand(1).getReg()) != LLT::scalar(1) &&
+ selectImpl(I, *CoverageInfo))
return true;
return selectG_SZA_EXT(I);
case TargetOpcode::G_BRCOND:
@@ -3506,8 +3486,10 @@ AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
}
-std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
- MachineOperand &Root, bool AllowAbs, bool OpSel) const {
+std::pair<Register, unsigned>
+AMDGPUInstructionSelector::selectVOP3ModsImpl(MachineOperand &Root,
+ bool IsCanonicalizing,
+ bool AllowAbs, bool OpSel) const {
Register Src = Root.getReg();
unsigned Mods = 0;
MachineInstr *MI = getDefIgnoringCopies(Src, *MRI);
@@ -3516,6 +3498,15 @@ std::pair<Register, unsigned> AMDGPUInstructionSelector::selectVOP3ModsImpl(
Src = MI->getOperand(1).getReg();
Mods |= SISrcMods::NEG;
MI = getDefIgnoringCopies(Src, *MRI);
+ } else if (MI->getOpcode() == AMDGPU::G_FSUB && IsCanonicalizing) {
+ // Fold fsub [+-]0 into fneg. This may not have folded depending on the
+ // denormal mode, but we're implicitly canonicalizing in a source operand.
+ const ConstantFP *LHS =
+ getConstantFPVRegVal(MI->getOperand(1).getReg(), *MRI);
+ if (LHS && LHS->isZero()) {
+ Mods |= SISrcMods::NEG;
+ Src = MI->getOperand(2).getReg();
+ }
}
if (AllowAbs && MI->getOpcode() == AMDGPU::G_FABS) {
@@ -3578,7 +3569,9 @@ InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3BMods0(MachineOperand &Root) const {
Register Src;
unsigned Mods;
- std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false);
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
+ /*IsCanonicalizing=*/true,
+ /*AllowAbs=*/false);
return {{
[=](MachineInstrBuilder &MIB) {
@@ -3614,10 +3607,26 @@ AMDGPUInstructionSelector::selectVOP3Mods(MachineOperand &Root) const {
}
InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3ModsNonCanonicalizing(
+ MachineOperand &Root) const {
+ Register Src;
+ unsigned Mods;
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /*IsCanonicalizing=*/false);
+
+ return {{
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addReg(copyToVGPRIfSrcFolded(Src, Mods, Root, MIB));
+ },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3BMods(MachineOperand &Root) const {
Register Src;
unsigned Mods;
- std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /* AllowAbs */ false);
+ std::tie(Src, Mods) = selectVOP3ModsImpl(Root, /*IsCanonicalizing=*/true,
+ /*AllowAbs=*/false);
return {{
[=](MachineInstrBuilder &MIB) {
@@ -3653,6 +3662,8 @@ AMDGPUInstructionSelector::selectVOP3PModsImpl(
MI = MRI.getVRegDef(Src);
}
+ // TODO: Handle G_FSUB 0 as fneg
+
// TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector.
(void)IsDOT; // DOTs do not use OPSEL on gfx940+, check ST.hasDOTOpSelHazard()
@@ -3739,8 +3750,9 @@ AMDGPUInstructionSelector::selectVINTERPMods(MachineOperand &Root) const {
Register Src;
unsigned Mods;
std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
- /* AllowAbs */ false,
- /* OpSel */ false);
+ /*IsCanonicalizing=*/true,
+ /*AllowAbs=*/false,
+ /*OpSel=*/false);
return {{
[=](MachineInstrBuilder &MIB) {
@@ -3756,8 +3768,9 @@ AMDGPUInstructionSelector::selectVINTERPModsHi(MachineOperand &Root) const {
Register Src;
unsigned Mods;
std::tie(Src, Mods) = selectVOP3ModsImpl(Root,
- /* AllowAbs */ false,
- /* OpSel */ true);
+ /*IsCanonicalizing=*/true,
+ /*AllowAbs=*/false,
+ /*OpSel=*/true);
return {{
[=](MachineInstrBuilder &MIB) {
@@ -3903,7 +3916,7 @@ AMDGPUInstructionSelector::selectFlatOffsetImpl(MachineOperand &Root,
int64_t ConstOffset;
std::tie(PtrBase, ConstOffset) =
getPtrBaseWithConstantOffset(Root.getReg(), *MRI);
- if (ConstOffset == 0)
+ if (ConstOffset == 0 || !isFlatScratchBaseLegal(PtrBase, FlatVariant))
return Default;
unsigned AddrSpace = (*MI->memoperands_begin())->getAddrSpace();
@@ -4066,7 +4079,7 @@ AMDGPUInstructionSelector::selectScratchSAddr(MachineOperand &Root) const {
// possible.
std::tie(PtrBase, ConstOffset) = getPtrBaseWithConstantOffset(Addr, *MRI);
- if (ConstOffset != 0 &&
+ if (ConstOffset != 0 && isFlatScratchBaseLegal(PtrBase) &&
TII.isLegalFLATOffset(ConstOffset, AMDGPUAS::PRIVATE_ADDRESS,
SIInstrFlags::FlatScratch)) {
Addr = PtrBase;
@@ -4122,9 +4135,9 @@ bool AMDGPUInstructionSelector::checkFlatScratchSVSSwizzleBug(
// The bug affects the swizzling of SVS accesses if there is any carry out
// from the two low order bits (i.e. from bit 1 into bit 2) when adding
// voffset to (soffset + inst_offset).
- auto VKnown = KnownBits->getKnownBits(VAddr);
+ auto VKnown = KB->getKnownBits(VAddr);
auto SKnown = KnownBits::computeForAddSub(
- true, false, KnownBits->getKnownBits(SAddr),
+ true, false, KB->getKnownBits(SAddr),
KnownBits::makeConstant(APInt(32, ImmOffset)));
uint64_t VMax = VKnown.getMaxValue().getZExtValue();
uint64_t SMax = SKnown.getMaxValue().getZExtValue();
@@ -4159,6 +4172,9 @@ AMDGPUInstructionSelector::selectScratchSVAddr(MachineOperand &Root) const {
Register LHS = AddrDef->MI->getOperand(1).getReg();
auto LHSDef = getDefSrcRegIgnoringCopies(LHS, *MRI);
+ if (!isFlatScratchBaseLegal(LHS) || !isFlatScratchBaseLegal(RHS))
+ return std::nullopt;
+
if (checkFlatScratchSVSSwizzleBug(RHS, LHS, ImmOffset))
return std::nullopt;
@@ -4195,9 +4211,10 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
// TODO: Should this be inside the render function? The iterator seems to
// move.
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
BuildMI(*MBB, MI, MI->getDebugLoc(), TII.get(AMDGPU::V_MOV_B32_e32),
HighBits)
- .addImm(Offset & ~4095);
+ .addImm(Offset & ~MaxOffset);
return {{[=](MachineInstrBuilder &MIB) { // rsrc
MIB.addReg(Info->getScratchRSrcReg());
@@ -4211,7 +4228,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
MIB.addImm(0);
},
[=](MachineInstrBuilder &MIB) { // offset
- MIB.addImm(Offset & 4095);
+ MIB.addImm(Offset & MaxOffset);
}}};
}
@@ -4228,7 +4245,7 @@ AMDGPUInstructionSelector::selectMUBUFScratchOffen(MachineOperand &Root) const {
if (ConstOffset != 0) {
if (SIInstrInfo::isLegalMUBUFImmOffset(ConstOffset) &&
(!STI.privateMemoryResourceIsRangeChecked() ||
- KnownBits->signBitIsZero(PtrBase))) {
+ KB->signBitIsZero(PtrBase))) {
const MachineInstr *PtrBaseDef = MRI->getVRegDef(PtrBase);
if (PtrBaseDef->getOpcode() == AMDGPU::G_FRAME_INDEX)
FI = PtrBaseDef->getOperand(1).getIndex();
@@ -4270,7 +4287,7 @@ bool AMDGPUInstructionSelector::isDSOffsetLegal(Register Base,
// On Southern Islands instruction with a negative base value and an offset
// don't seem to work.
- return KnownBits->signBitIsZero(Base);
+ return KB->signBitIsZero(Base);
}
bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,
@@ -4286,7 +4303,17 @@ bool AMDGPUInstructionSelector::isDSOffset2Legal(Register Base, int64_t Offset0,
// On Southern Islands instruction with a negative base value and an offset
// don't seem to work.
- return KnownBits->signBitIsZero(Base);
+ return KB->signBitIsZero(Base);
+}
+
+bool AMDGPUInstructionSelector::isFlatScratchBaseLegal(
+ Register Base, uint64_t FlatVariant) const {
+ if (FlatVariant != SIInstrFlags::FlatScratch)
+ return true;
+
+ // When value in 32-bit Base can be negative calculate scratch offset using
+ // 32-bit add instruction, otherwise use Base(unsigned) + offset.
+ return KB->signBitIsZero(Base);
}
bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,
@@ -4298,12 +4325,11 @@ bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,
if (!RHS)
return false;
- if (RHS->countTrailingOnes() >= ShAmtBits)
+ if (RHS->countr_one() >= ShAmtBits)
return true;
- const APInt &LHSKnownZeros =
- KnownBits->getKnownZeroes(MI.getOperand(1).getReg());
- return (LHSKnownZeros | *RHS).countTrailingOnes() >= ShAmtBits;
+ const APInt &LHSKnownZeros = KB->getKnownZeroes(MI.getOperand(1).getReg());
+ return (LHSKnownZeros | *RHS).countr_one() >= ShAmtBits;
}
// Return the wave level SGPR base address if this is a wave address.
@@ -4746,64 +4772,6 @@ AMDGPUInstructionSelector::selectMUBUFOffset(MachineOperand &Root) const {
}};
}
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectMUBUFAddr64Atomic(MachineOperand &Root) const {
- Register VAddr;
- Register RSrcReg;
- Register SOffset;
- int64_t Offset = 0;
-
- if (!selectMUBUFAddr64Impl(Root, VAddr, RSrcReg, SOffset, Offset))
- return {};
-
- // FIXME: Use defaulted operands for trailing 0s and remove from the complex
- // pattern.
- return {{
- [=](MachineInstrBuilder &MIB) { // rsrc
- MIB.addReg(RSrcReg);
- },
- [=](MachineInstrBuilder &MIB) { // vaddr
- MIB.addReg(VAddr);
- },
- [=](MachineInstrBuilder &MIB) { // soffset
- if (SOffset)
- MIB.addReg(SOffset);
- else
- MIB.addImm(0);
- },
- [=](MachineInstrBuilder &MIB) { // offset
- MIB.addImm(Offset);
- },
- [=](MachineInstrBuilder &MIB) {
- MIB.addImm(AMDGPU::CPol::GLC); // cpol
- }
- }};
-}
-
-InstructionSelector::ComplexRendererFns
-AMDGPUInstructionSelector::selectMUBUFOffsetAtomic(MachineOperand &Root) const {
- Register RSrcReg;
- Register SOffset;
- int64_t Offset = 0;
-
- if (!selectMUBUFOffsetImpl(Root, RSrcReg, SOffset, Offset))
- return {};
-
- return {{
- [=](MachineInstrBuilder &MIB) { // rsrc
- MIB.addReg(RSrcReg);
- },
- [=](MachineInstrBuilder &MIB) { // soffset
- if (SOffset)
- MIB.addReg(SOffset);
- else
- MIB.addImm(0);
- },
- [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); }, // offset
- [=](MachineInstrBuilder &MIB) { MIB.addImm(AMDGPU::CPol::GLC); } // cpol
- }};
-}
-
/// Get an immediate that must be 32-bits, and treated as zero extended.
static std::optional<uint64_t>
getConstantZext32Val(Register Reg, const MachineRegisterInfo &MRI) {
@@ -4851,7 +4819,7 @@ AMDGPUInstructionSelector::selectSMRDBufferSgprImm(MachineOperand &Root) const {
Register SOffset;
unsigned Offset;
std::tie(SOffset, Offset) =
- AMDGPU::getBaseWithConstantOffset(*MRI, Root.getReg(), KnownBits);
+ AMDGPU::getBaseWithConstantOffset(*MRI, Root.getReg(), KB);
if (!SOffset)
return std::nullopt;
@@ -4984,6 +4952,22 @@ AMDGPUInstructionSelector::selectVOP3PMadMixModsImpl(MachineOperand &Root,
}
InstructionSelector::ComplexRendererFns
+AMDGPUInstructionSelector::selectVOP3PMadMixModsExt(
+ MachineOperand &Root) const {
+ Register Src;
+ unsigned Mods;
+ bool Matched;
+ std::tie(Src, Mods) = selectVOP3PMadMixModsImpl(Root, Matched);
+ if (!Matched)
+ return {};
+
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.addReg(Src); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(Mods); } // src_mods
+ }};
+}
+
+InstructionSelector::ComplexRendererFns
AMDGPUInstructionSelector::selectVOP3PMadMixMods(MachineOperand &Root) const {
Register Src;
unsigned Mods;
@@ -5031,7 +5015,7 @@ void AMDGPUInstructionSelector::renderPopcntImm(MachineInstrBuilder &MIB,
int OpIdx) const {
assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
"Expected G_CONSTANT");
- MIB.addImm(MI.getOperand(1).getCImm()->getValue().countPopulation());
+ MIB.addImm(MI.getOperand(1).getCImm()->getValue().popcount());
}
/// This only really exists to satisfy DAG type checking machinery, so is a
@@ -5042,6 +5026,13 @@ void AMDGPUInstructionSelector::renderTruncTImm(MachineInstrBuilder &MIB,
MIB.addImm(MI.getOperand(OpIdx).getImm());
}
+void AMDGPUInstructionSelector::renderOpSelTImm(MachineInstrBuilder &MIB,
+ const MachineInstr &MI,
+ int OpIdx) const {
+ assert(OpIdx >= 0 && "expected to match an immediate operand");
+ MIB.addImm(MI.getOperand(OpIdx).getImm() ? (int64_t)SISrcMods::OP_SEL_0 : 0);
+}
+
void AMDGPUInstructionSelector::renderExtractCPol(MachineInstrBuilder &MIB,
const MachineInstr &MI,
int OpIdx) const {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 33a01ed0a1ce..243ff72e2979 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H
+#include "SIDefines.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/IR/InstrTypes.h"
@@ -58,7 +59,7 @@ public:
static const char *getName();
void setupMF(MachineFunction &MF, GISelKnownBits *KB,
- CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
+ CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) override;
private:
@@ -111,6 +112,7 @@ private:
bool selectDivScale(MachineInstr &MI) const;
bool selectIntrinsicCmp(MachineInstr &MI) const;
bool selectBallot(MachineInstr &I) const;
+ bool selectInverseBallot(MachineInstr &I) const;
bool selectRelocConstant(MachineInstr &I) const;
bool selectGroupStaticSize(MachineInstr &I) const;
bool selectReturnAddress(MachineInstr &I) const;
@@ -146,9 +148,10 @@ private:
bool selectSMFMACIntrin(MachineInstr &I) const;
bool selectWaveAddress(MachineInstr &I) const;
- std::pair<Register, unsigned>
- selectVOP3ModsImpl(MachineOperand &Root, bool AllowAbs = true,
- bool OpSel = false) const;
+ std::pair<Register, unsigned> selectVOP3ModsImpl(MachineOperand &Root,
+ bool IsCanonicalizing = true,
+ bool AllowAbs = true,
+ bool OpSel = false) const;
Register copyToVGPRIfSrcFolded(Register Src, unsigned Mods,
MachineOperand Root, MachineInstr *InsertPt,
@@ -169,6 +172,8 @@ private:
InstructionSelector::ComplexRendererFns
selectVOP3Mods(MachineOperand &Root) const;
InstructionSelector::ComplexRendererFns
+ selectVOP3ModsNonCanonicalizing(MachineOperand &Root) const;
+ InstructionSelector::ComplexRendererFns
selectVOP3BMods(MachineOperand &Root) const;
ComplexRendererFns selectVOP3NoMods(MachineOperand &Root) const;
@@ -236,6 +241,8 @@ private:
bool isDSOffsetLegal(Register Base, int64_t Offset) const;
bool isDSOffset2Legal(Register Base, int64_t Offset0, int64_t Offset1,
unsigned Size) const;
+ bool isFlatScratchBaseLegal(
+ Register Base, uint64_t FlatVariant = SIInstrFlags::FlatScratch) const;
std::pair<Register, unsigned>
selectDS1Addr1OffsetImpl(MachineOperand &Root) const;
@@ -285,18 +292,13 @@ private:
InstructionSelector::ComplexRendererFns
selectMUBUFOffset(MachineOperand &Root) const;
- InstructionSelector::ComplexRendererFns
- selectMUBUFOffsetAtomic(MachineOperand &Root) const;
-
- InstructionSelector::ComplexRendererFns
- selectMUBUFAddr64Atomic(MachineOperand &Root) const;
-
ComplexRendererFns selectSMRDBufferImm(MachineOperand &Root) const;
ComplexRendererFns selectSMRDBufferImm32(MachineOperand &Root) const;
ComplexRendererFns selectSMRDBufferSgprImm(MachineOperand &Root) const;
std::pair<Register, unsigned> selectVOP3PMadMixModsImpl(MachineOperand &Root,
bool &Matched) const;
+ ComplexRendererFns selectVOP3PMadMixModsExt(MachineOperand &Root) const;
ComplexRendererFns selectVOP3PMadMixMods(MachineOperand &Root) const;
void renderTruncImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
@@ -305,6 +307,9 @@ private:
void renderTruncTImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
+ void renderOpSelTImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
+ int OpIdx) const;
+
void renderNegateImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
int OpIdx) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 22b327279211..2305097e3f94 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -104,15 +104,18 @@ class PredicateControl {
}
class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
- PredicateControl;
+ PredicateControl, GISelFlags;
+
+let GIIgnoreCopies = 1 in
+class AMDGPUPatIgnoreCopies<dag pattern, dag result> : AMDGPUPat<pattern, result>;
let RecomputePerFunction = 1 in {
-def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
-def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
-def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
-def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
-def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">;
-def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">;
+def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
+def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals != DenormalMode::getPreserveSign()">;
+def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">;
+def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
+def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">;
+def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">;
def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
}
@@ -120,37 +123,45 @@ def FMA : Predicate<"Subtarget->hasFMA()">;
def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
-def u16ImmTarget : AsmOperandClass {
- let Name = "U16Imm";
- let RenderMethod = "addImmOperands";
-}
+def i1imm_0 : OperandWithDefaultOps<i1, (ops (i1 0))>;
-def s16ImmTarget : AsmOperandClass {
- let Name = "S16Imm";
+class CustomOperandClass<string name, bit optional, string parserMethod,
+ string defaultMethod>
+ : AsmOperandClass {
+ let Name = name;
+ let PredicateMethod = "is"#name;
+ let ParserMethod = parserMethod;
let RenderMethod = "addImmOperands";
+ let IsOptional = optional;
+ let DefaultMethod = defaultMethod;
}
-let OperandType = "OPERAND_IMMEDIATE" in {
-
-def u32imm : Operand<i32> {
- let PrintMethod = "printU32ImmOperand";
-}
-
-def u16imm : Operand<i16> {
- let PrintMethod = "printU16ImmOperand";
- let ParserMatchClass = u16ImmTarget;
+class CustomOperandProps<bit optional = 0, string name = NAME> {
+ string ImmTy = "ImmTy"#name;
+ string ParserMethod = "parse"#name;
+ string DefaultValue = "0";
+ string DefaultMethod = "[this]() { return "#
+ "AMDGPUOperand::CreateImm(this, "#DefaultValue#", SMLoc(), "#
+ "AMDGPUOperand::"#ImmTy#"); }";
+ string PrintMethod = "print"#name;
+ AsmOperandClass ParserMatchClass =
+ CustomOperandClass<name, optional, ParserMethod, DefaultMethod>;
+ string OperandType = "OPERAND_IMMEDIATE";
}
-def s16imm : Operand<i16> {
- let PrintMethod = "printU16ImmOperand";
- let ParserMatchClass = s16ImmTarget;
-}
+class CustomOperand<ValueType type, bit optional = 0, string name = NAME>
+ : Operand<type>, CustomOperandProps<optional, name>;
-def u8imm : Operand<i8> {
- let PrintMethod = "printU8ImmOperand";
+class ImmOperand<ValueType type, string name = NAME, bit optional = 0,
+ string printer = "print"#name>
+ : CustomOperand<type, optional, name> {
+ let ImmTy = "ImmTyNone";
+ let ParserMethod = "";
+ let PrintMethod = printer;
}
-} // End OperandType = "OPERAND_IMMEDIATE"
+def s16imm : ImmOperand<i16, "S16Imm", 0, "printU16ImmOperand">;
+def u16imm : ImmOperand<i16, "U16Imm", 0, "printU16ImmOperand">;
//===--------------------------------------------------------------------===//
// Custom Operands
@@ -210,6 +221,12 @@ class is_canonicalized<SDPatternOperator op> : PatFrag<
}];
}
+class FoldTernaryOpPat<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
+ (ops node:$src0, node:$src1, node:$src2),
+ (op2 (op1 node:$src0, node:$src1), node:$src2)
+>;
+
+def imad : FoldTernaryOpPat<mul, add>;
let Properties = [SDNPCommutative, SDNPAssociative] in {
def smax_oneuse : HasOneUseBinOp<smax>;
@@ -638,6 +655,8 @@ defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>;
defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>;
defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>;
defm atomic_load_fadd : binary_atomic_op_all_as<atomic_load_fadd, 0>;
+defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>;
+defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>;
let MemoryVT = v2f16 in
defm atomic_load_fadd_v2f16 : binary_atomic_op_all_as<atomic_load_fadd, 0>;
defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 9e86bd0c2b97..fb7148ba10ac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -14,7 +14,7 @@
#include "AMDGPU.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -46,7 +46,7 @@ class AMDGPULateCodeGenPrepare
const DataLayout *DL = nullptr;
AssumptionCache *AC = nullptr;
- LegacyDivergenceAnalysis *DA = nullptr;
+ UniformityInfo *UA = nullptr;
public:
static char ID;
@@ -59,7 +59,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<LegacyDivergenceAnalysis>();
+ AU.addRequired<UniformityInfoWrapperPass>();
AU.setPreservesAll();
}
@@ -91,7 +91,7 @@ bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) {
return false;
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DA = &getAnalysis<LegacyDivergenceAnalysis>();
+ UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
bool Changed = false;
for (auto &BB : F)
@@ -122,7 +122,7 @@ bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {
if (LI.getAlign() < DL->getABITypeAlign(Ty))
return false;
// It should be uniform, i.e. a scalar load.
- return DA->isUniform(&LI);
+ return UA->isUniform(&LI);
}
bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
@@ -156,18 +156,14 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
IRBuilder<> IRB(&LI);
IRB.SetCurrentDebugLocation(LI.getDebugLoc());
- unsigned AS = LI.getPointerAddressSpace();
- unsigned LdBits = DL->getTypeStoreSize(LI.getType()) * 8;
+ unsigned LdBits = DL->getTypeStoreSizeInBits(LI.getType());
auto IntNTy = Type::getIntNTy(LI.getContext(), LdBits);
- PointerType *Int32PtrTy = Type::getInt32PtrTy(LI.getContext(), AS);
- PointerType *Int8PtrTy = Type::getInt8PtrTy(LI.getContext(), AS);
- auto *NewPtr = IRB.CreateBitCast(
- IRB.CreateConstGEP1_64(
- IRB.getInt8Ty(),
- IRB.CreatePointerBitCastOrAddrSpaceCast(Base, Int8PtrTy),
- Offset - Adjust),
- Int32PtrTy);
+ auto *NewPtr = IRB.CreateConstGEP1_64(
+ IRB.getInt8Ty(),
+ IRB.CreateAddrSpaceCast(Base, LI.getPointerOperand()->getType()),
+ Offset - Adjust);
+
LoadInst *NewLd = IRB.CreateAlignedLoad(IRB.getInt32Ty(), NewPtr, Align(4));
NewLd->copyMetadata(LI);
NewLd->setMetadata(LLVMContext::MD_range, nullptr);
@@ -184,7 +180,7 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {
INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
"AMDGPU IR late optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
"AMDGPU IR late optimizations", false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 41cb0a99b420..120c00b14a36 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
@@ -131,6 +132,42 @@ static LegalizeMutation moreEltsToNext32Bit(unsigned TypeIdx) {
};
}
+// Increase the number of vector elements to reach the next legal RegClass.
+static LegalizeMutation moreElementsToNextExistingRegClass(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ const unsigned NumElts = Ty.getNumElements();
+ const unsigned EltSize = Ty.getElementType().getSizeInBits();
+ const unsigned MaxNumElts = MaxRegisterSize / EltSize;
+
+ assert(EltSize == 32 || EltSize == 64);
+ assert(Ty.getSizeInBits() < MaxRegisterSize);
+
+ unsigned NewNumElts;
+ // Find the nearest legal RegClass that is larger than the current type.
+ for (NewNumElts = NumElts; NewNumElts < MaxNumElts; ++NewNumElts) {
+ if (SIRegisterInfo::getSGPRClassForBitWidth(NewNumElts * EltSize))
+ break;
+ }
+
+ return std::pair(TypeIdx, LLT::fixed_vector(NewNumElts, EltSize));
+ };
+}
+
+static LLT getBufferRsrcScalarType(const LLT Ty) {
+ if (!Ty.isVector())
+ return LLT::scalar(128);
+ const ElementCount NumElems = Ty.getElementCount();
+ return LLT::vector(NumElems, LLT::scalar(128));
+}
+
+static LLT getBufferRsrcRegisterType(const LLT Ty) {
+ if (!Ty.isVector())
+ return LLT::fixed_vector(4, LLT::scalar(32));
+ const unsigned NumElems = Ty.getElementCount().getFixedValue();
+ return LLT::fixed_vector(NumElems * 4, LLT::scalar(32));
+}
+
static LLT getBitcastRegisterType(const LLT Ty) {
const unsigned Size = Ty.getSizeInBits();
@@ -215,6 +252,15 @@ static LegalityPredicate isRegisterType(unsigned TypeIdx) {
};
}
+// RegisterType that doesn't have a corresponding RegClass.
+static LegalityPredicate isIllegalRegisterType(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ LLT Ty = Query.Types[TypeIdx];
+ return isRegisterType(Ty) &&
+ !SIRegisterInfo::getSGPRClassForBitWidth(Ty.getSizeInBits());
+ };
+}
+
static LegalityPredicate elementTypeIsLegal(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
const LLT QueryTy = Query.Types[TypeIdx];
@@ -239,7 +285,7 @@ static LegalityPredicate isWideScalarExtLoadTruncStore(unsigned TypeIdx) {
// handle some operations by just promoting the register during
// selection. There are also d16 loads on GFX9+ which preserve the high bits.
static unsigned maxSizeForAddrSpace(const GCNSubtarget &ST, unsigned AS,
- bool IsLoad) {
+ bool IsLoad, bool IsAtomic) {
switch (AS) {
case AMDGPUAS::PRIVATE_ADDRESS:
// FIXME: Private element size.
@@ -249,6 +295,7 @@ static unsigned maxSizeForAddrSpace(const GCNSubtarget &ST, unsigned AS,
case AMDGPUAS::GLOBAL_ADDRESS:
case AMDGPUAS::CONSTANT_ADDRESS:
case AMDGPUAS::CONSTANT_ADDRESS_32BIT:
+ case AMDGPUAS::BUFFER_RESOURCE:
// Treat constant and global as identical. SMRD loads are sometimes usable for
// global loads (ideally constant address space should be eliminated)
// depending on the context. Legality cannot be context dependent, but
@@ -257,9 +304,10 @@ static unsigned maxSizeForAddrSpace(const GCNSubtarget &ST, unsigned AS,
// kernel.
return IsLoad ? 512 : 128;
default:
- // Flat addresses may contextually need to be split to 32-bit parts if they
- // may alias scratch depending on the subtarget.
- return 128;
+ // FIXME: Flat addresses may contextually need to be split to 32-bit parts
+ // if they may alias scratch depending on the subtarget. This needs to be
+ // moved to custom handling to use addressMayBeAccessedAsPrivate
+ return ST.hasMultiDwordFlatScratchAddressing() || IsAtomic ? 128 : 32;
}
}
@@ -295,7 +343,9 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
if (MemSize != RegSize && RegSize != 32)
return false;
- if (MemSize > maxSizeForAddrSpace(ST, AS, IsLoad))
+ if (MemSize > maxSizeForAddrSpace(ST, AS, IsLoad,
+ Query.MMODescrs[0].Ordering !=
+ AtomicOrdering::NotAtomic))
return false;
switch (MemSize) {
@@ -329,6 +379,21 @@ static bool isLoadStoreSizeLegal(const GCNSubtarget &ST,
return true;
}
+// The newer buffer intrinsic forms take their resource arguments as
+// pointers in address space 8, aka s128 values. However, in order to not break
+// SelectionDAG, the underlying operations have to continue to take v4i32
+// arguments. Therefore, we convert resource pointers - or vectors of them
+// to integer values here.
+static bool hasBufferRsrcWorkaround(const LLT Ty) {
+ if (Ty.isPointer() && Ty.getAddressSpace() == AMDGPUAS::BUFFER_RESOURCE)
+ return true;
+ if (Ty.isVector()) {
+ const LLT ElemTy = Ty.getElementType();
+ return hasBufferRsrcWorkaround(ElemTy);
+ }
+ return false;
+}
+
// The current selector can't handle <6 x s16>, <8 x s16>, s96, s128 etc, so
// workaround this. Eventually it should ignore the type for loads and only care
// about the size. Return true in cases where we will workaround this for now by
@@ -340,6 +405,9 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
const unsigned Size = Ty.getSizeInBits();
if (Size <= 64)
return false;
+ // Address space 8 pointers get their own workaround.
+ if (hasBufferRsrcWorkaround(Ty))
+ return false;
if (!Ty.isVector())
return true;
@@ -354,7 +422,7 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
static bool isLoadStoreLegal(const GCNSubtarget &ST, const LegalityQuery &Query) {
const LLT Ty = Query.Types[0];
return isRegisterType(Ty) && isLoadStoreSizeLegal(ST, Query) &&
- !loadStoreBitcastWorkaround(Ty);
+ !hasBufferRsrcWorkaround(Ty) && !loadStoreBitcastWorkaround(Ty);
}
/// Return true if a load or store of the type should be lowered with a bitcast
@@ -392,7 +460,7 @@ static bool shouldWidenLoad(const GCNSubtarget &ST, LLT MemoryTy,
if (SizeInBits == 96 && ST.hasDwordx3LoadStores())
return false;
- if (SizeInBits >= maxSizeForAddrSpace(ST, AddrSpace, Opcode))
+ if (SizeInBits >= maxSizeForAddrSpace(ST, AddrSpace, Opcode, false))
return false;
// A load is known dereferenceable up to the alignment, so it's legal to widen
@@ -422,6 +490,80 @@ static bool shouldWidenLoad(const GCNSubtarget &ST, const LegalityQuery &Query,
Query.Types[1].getAddressSpace(), Opcode);
}
+/// Mutates IR (typicaly a load instruction) to use a <4 x s32> as the initial
+/// type of the operand `idx` and then to transform it to a `p8` via bitcasts
+/// and inttoptr. In addition, handle vectors of p8. Returns the new type.
+static LLT castBufferRsrcFromV4I32(MachineInstr &MI, MachineIRBuilder &B,
+ MachineRegisterInfo &MRI, unsigned Idx) {
+ MachineOperand &MO = MI.getOperand(Idx);
+
+ const LLT PointerTy = MRI.getType(MO.getReg());
+
+ // Paranoidly prevent us from doing this multiple times.
+ if (!hasBufferRsrcWorkaround(PointerTy))
+ return PointerTy;
+
+ const LLT ScalarTy = getBufferRsrcScalarType(PointerTy);
+ const LLT VectorTy = getBufferRsrcRegisterType(PointerTy);
+ if (!PointerTy.isVector()) {
+ // Happy path: (4 x s32) -> (s32, s32, s32, s32) -> (p8)
+ const unsigned NumParts = PointerTy.getSizeInBits() / 32;
+ const LLT S32 = LLT::scalar(32);
+
+ Register VectorReg = MRI.createGenericVirtualRegister(VectorTy);
+ std::array<Register, 4> VectorElems;
+ B.setInsertPt(B.getMBB(), ++B.getInsertPt());
+ for (unsigned I = 0; I < NumParts; ++I)
+ VectorElems[I] =
+ B.buildExtractVectorElementConstant(S32, VectorReg, I).getReg(0);
+ B.buildMergeValues(MO, VectorElems);
+ MO.setReg(VectorReg);
+ return VectorTy;
+ }
+ Register BitcastReg = MRI.createGenericVirtualRegister(VectorTy);
+ B.setInsertPt(B.getMBB(), ++B.getInsertPt());
+ auto Scalar = B.buildBitcast(ScalarTy, BitcastReg);
+ B.buildIntToPtr(MO, Scalar);
+ MO.setReg(BitcastReg);
+
+ return VectorTy;
+}
+
+/// Cast a buffer resource (an address space 8 pointer) into a 4xi32, which is
+/// the form in which the value must be in order to be passed to the low-level
+/// representations used for MUBUF/MTBUF intrinsics. This is a hack, which is
+/// needed in order to account for the fact that we can't define a register
+/// class for s128 without breaking SelectionDAG.
+static Register castBufferRsrcToV4I32(Register Pointer, MachineIRBuilder &B) {
+ MachineRegisterInfo &MRI = *B.getMRI();
+ const LLT PointerTy = MRI.getType(Pointer);
+ const LLT ScalarTy = getBufferRsrcScalarType(PointerTy);
+ const LLT VectorTy = getBufferRsrcRegisterType(PointerTy);
+
+ if (!PointerTy.isVector()) {
+ // Special case: p8 -> (s32, s32, s32, s32) -> (4xs32)
+ SmallVector<Register, 4> PointerParts;
+ const unsigned NumParts = PointerTy.getSizeInBits() / 32;
+ auto Unmerged = B.buildUnmerge(LLT::scalar(32), Pointer);
+ for (unsigned I = 0; I < NumParts; ++I)
+ PointerParts.push_back(Unmerged.getReg(I));
+ return B.buildBuildVector(VectorTy, PointerParts).getReg(0);
+ }
+ Register Scalar = B.buildPtrToInt(ScalarTy, Pointer).getReg(0);
+ return B.buildBitcast(VectorTy, Scalar).getReg(0);
+}
+
+static void castBufferRsrcArgToV4I32(MachineInstr &MI, MachineIRBuilder &B,
+ unsigned Idx) {
+ MachineOperand &MO = MI.getOperand(Idx);
+
+ const LLT PointerTy = B.getMRI()->getType(MO.getReg());
+ // Paranoidly prevent us from doing this multiple times.
+ if (!hasBufferRsrcWorkaround(PointerTy))
+ return;
+ MO.setReg(castBufferRsrcToV4I32(MO.getReg(), B));
+}
+
AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const GCNTargetMachine &TM)
: ST(ST_) {
@@ -484,6 +626,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT RegionPtr = GetAddrSpacePtr(AMDGPUAS::REGION_ADDRESS);
const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
+ const LLT BufferFatPtr = GetAddrSpacePtr(AMDGPUAS::BUFFER_FAT_POINTER);
+ const LLT RsrcPtr = GetAddrSpacePtr(AMDGPUAS::BUFFER_RESOURCE);
const LLT CodePtr = FlatPtr;
@@ -495,6 +639,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
LocalPtr, PrivatePtr, Constant32Ptr, RegionPtr
};
+ const std::initializer_list<LLT> AddrSpaces128 = {RsrcPtr};
+
const std::initializer_list<LLT> FPTypesBase = {
S32, S64
};
@@ -515,17 +661,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
// elements for v3s16
getActionDefinitionsBuilder(G_PHI)
- .legalFor({S32, S64, V2S16, S16, V4S16, S1, S128, S256})
- .legalFor(AllS32Vectors)
- .legalFor(AllS64Vectors)
- .legalFor(AddrSpaces64)
- .legalFor(AddrSpaces32)
- .legalIf(isPointer(0))
- .clampScalar(0, S16, S256)
- .widenScalarToNextPow2(0, 32)
- .clampMaxNumElements(0, S32, 16)
- .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
- .scalarize(0);
+ .legalFor({S32, S64, V2S16, S16, V4S16, S1, S128, S256})
+ .legalFor(AllS32Vectors)
+ .legalFor(AllS64Vectors)
+ .legalFor(AddrSpaces64)
+ .legalFor(AddrSpaces32)
+ .legalFor(AddrSpaces128)
+ .legalIf(isPointer(0))
+ .clampScalar(0, S16, S256)
+ .widenScalarToNextPow2(0, 32)
+ .clampMaxNumElements(0, S32, 16)
+ .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+ .scalarize(0);
if (ST.hasVOP3PInsts() && ST.hasAddNoCarry() && ST.hasIntClamp()) {
// Full set of gfx9 features.
@@ -760,13 +907,31 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(0, S16, S64);
if (ST.has16BitInsts()) {
- getActionDefinitionsBuilder({G_FSQRT, G_FFLOOR})
+ getActionDefinitionsBuilder(G_FSQRT)
+ .legalFor({S32, S16})
+ .customFor({S64})
+ .scalarize(0)
+ .clampScalar(0, S16, S64);
+ getActionDefinitionsBuilder(G_FFLOOR)
.legalFor({S32, S64, S16})
.scalarize(0)
.clampScalar(0, S16, S64);
+
+ getActionDefinitionsBuilder({G_FLDEXP, G_STRICT_FLDEXP})
+ .legalFor({{S32, S32}, {S64, S32}, {S16, S16}})
+ .scalarize(0)
+ .maxScalarIf(typeIs(0, S16), 1, S16)
+ .clampScalar(1, S32, S32)
+ .lower();
+
+ getActionDefinitionsBuilder(G_FFREXP)
+ .customFor({{S32, S32}, {S64, S32}, {S16, S16}, {S16, S32}})
+ .scalarize(0)
+ .lower();
} else {
getActionDefinitionsBuilder(G_FSQRT)
- .legalFor({S32, S64})
+ .legalFor({S32})
+ .customFor({S64})
.scalarize(0)
.clampScalar(0, S32, S64);
@@ -782,6 +947,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0)
.clampScalar(0, S32, S64);
}
+
+ getActionDefinitionsBuilder({G_FLDEXP, G_STRICT_FLDEXP})
+ .legalFor({{S32, S32}, {S64, S32}})
+ .scalarize(0)
+ .clampScalar(0, S32, S64)
+ .clampScalar(1, S32, S32)
+ .lower();
+
+ getActionDefinitionsBuilder(G_FFREXP)
+ .customFor({{S32, S32}, {S64, S32}})
+ .scalarize(0)
+ .minScalar(0, S32)
+ .clampScalar(1, S32, S32)
+ .lower();
}
getActionDefinitionsBuilder(G_FPTRUNC)
@@ -906,9 +1085,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
}
getActionDefinitionsBuilder(G_PTR_ADD)
- .legalIf(all(isPointer(0), sameSize(0, 1)))
- .scalarize(0)
- .scalarSameSizeAs(1, 0);
+ .unsupportedFor({BufferFatPtr, RsrcPtr})
+ .legalIf(all(isPointer(0), sameSize(0, 1)))
+ .scalarize(0)
+ .scalarSameSizeAs(1, 0);
getActionDefinitionsBuilder(G_PTRMASK)
.legalIf(all(sameSize(0, 1), typeInSet(1, {S64, S32})))
@@ -948,15 +1128,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.scalarize(0);
// FIXME: fpow has a selection pattern that should move to custom lowering.
- auto &Exp2Ops = getActionDefinitionsBuilder({G_FEXP2, G_FLOG2});
- if (ST.has16BitInsts())
- Exp2Ops.legalFor({S32, S16});
- else
- Exp2Ops.legalFor({S32});
- Exp2Ops.clampScalar(0, MinScalarFPTy, S32);
- Exp2Ops.scalarize(0);
-
- auto &ExpOps = getActionDefinitionsBuilder({G_FEXP, G_FLOG, G_FLOG10, G_FPOW});
+ auto &ExpOps = getActionDefinitionsBuilder(G_FPOW);
if (ST.has16BitInsts())
ExpOps.customFor({{S32}, {S16}});
else
@@ -968,6 +1140,20 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(0, MinScalarFPTy, S32)
.lower();
+ auto &Log2Ops = getActionDefinitionsBuilder({G_FLOG2, G_FEXP2});
+ Log2Ops.customFor({S32});
+ if (ST.has16BitInsts())
+ Log2Ops.legalFor({S16});
+ else
+ Log2Ops.customFor({S16});
+ Log2Ops.scalarize(0)
+ .lower();
+
+ auto &LogOps = getActionDefinitionsBuilder({G_FLOG, G_FLOG10, G_FEXP});
+ LogOps.customFor({S32, S16});
+ LogOps.clampScalar(0, MinScalarFPTy, S32)
+ .scalarize(0);
+
// The 64-bit versions produce 32-bit results, but only on the SALU.
getActionDefinitionsBuilder(G_CTPOP)
.legalFor({{S32, S32}, {S32, S64}})
@@ -1115,7 +1301,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT PtrTy = Query.Types[1];
unsigned AS = PtrTy.getAddressSpace();
- if (MemSize > maxSizeForAddrSpace(ST, AS, IsLoad))
+ if (MemSize > maxSizeForAddrSpace(ST, AS, IsLoad,
+ Query.MMODescrs[0].Ordering !=
+ AtomicOrdering::NotAtomic))
return true;
// Catch weird sized loads that don't evenly divide into the access sizes
@@ -1178,6 +1366,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
return isLoadStoreLegal(ST, Query);
});
+ // The custom pointers (fat pointers, buffer resources) don't work with load
+ // and store at this level. Fat pointers should have been lowered to
+ // intrinsics before the translation to MIR.
+ Actions.unsupportedIf(typeInSet(1, {BufferFatPtr, RsrcPtr}));
+
+ // Address space 8 pointers are handled by a 4xs32 load, bitcast, and
+ // ptrtoint. This is needed to account for the fact that we can't have i128
+ // as a register class for SelectionDAG reasons.
+ Actions.customIf([=](const LegalityQuery &Query) -> bool {
+ return hasBufferRsrcWorkaround(Query.Types[0]);
+ });
+
// Constant 32-bit is handled by addrspacecasting the 32-bit pointer to
// 64-bits.
//
@@ -1223,9 +1423,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
if (DstSize > MemSize)
return std::pair(0, LLT::scalar(MemSize));
- unsigned MaxSize = maxSizeForAddrSpace(ST,
- PtrTy.getAddressSpace(),
- Op == G_LOAD);
+ unsigned MaxSize = maxSizeForAddrSpace(
+ ST, PtrTy.getAddressSpace(), Op == G_LOAD,
+ Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic);
if (MemSize > MaxSize)
return std::pair(0, LLT::scalar(MaxSize));
@@ -1242,9 +1442,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT PtrTy = Query.Types[1];
LLT EltTy = DstTy.getElementType();
- unsigned MaxSize = maxSizeForAddrSpace(ST,
- PtrTy.getAddressSpace(),
- Op == G_LOAD);
+ unsigned MaxSize = maxSizeForAddrSpace(
+ ST, PtrTy.getAddressSpace(), Op == G_LOAD,
+ Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic);
// FIXME: Handle widened to power of 2 results better. This ends
// up scalarizing.
@@ -1284,7 +1484,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
// We're probably decomposing an odd sized store. Try to split
// to the widest type. TODO: Account for alignment. As-is it
// should be OK, since the new parts will be further legalized.
- unsigned FloorSize = PowerOf2Floor(DstSize);
+ unsigned FloorSize = llvm::bit_floor(DstSize);
return std::pair(
0, LLT::scalarOrVector(
ElementCount::getFixed(FloorSize / EltSize), EltTy));
@@ -1335,7 +1535,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
{G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
- G_ATOMICRMW_UMIN})
+ G_ATOMICRMW_UMIN, G_ATOMICRMW_UINC_WRAP, G_ATOMICRMW_UDEC_WRAP})
.legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
{S64, GlobalPtr}, {S64, LocalPtr},
{S32, RegionPtr}, {S64, RegionPtr}});
@@ -1348,7 +1548,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
Atomic.legalFor({{S32, LocalPtr}, {S32, RegionPtr}});
if (ST.hasGFX90AInsts())
Atomic.legalFor({{S64, LocalPtr}});
- if (ST.hasGFX940Insts())
+ if (ST.hasAtomicDsPkAdd16Insts())
Atomic.legalFor({{V2S16, LocalPtr}});
}
if (ST.hasAtomicFaddInsts())
@@ -1450,10 +1650,21 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT VecTy = Query.Types[VecTypeIdx];
const LLT IdxTy = Query.Types[IdxTypeIdx];
const unsigned EltSize = EltTy.getSizeInBits();
+ const bool isLegalVecType =
+ !!SIRegisterInfo::getSGPRClassForBitWidth(VecTy.getSizeInBits());
+ // Address space 8 pointers are 128-bit wide values, but the logic
+ // below will try to bitcast them to 2N x s64, which will fail.
+ // Therefore, as an intermediate step, wrap extracts/insertions from a
+ // ptrtoint-ing the vector and scalar arguments (or inttoptring the
+ // extraction result) in order to produce a vector operation that can
+ // be handled by the logic below.
+ if (EltTy.isPointer() && EltSize > 64)
+ return true;
return (EltSize == 32 || EltSize == 64) &&
VecTy.getSizeInBits() % 32 == 0 &&
VecTy.getSizeInBits() <= MaxRegisterSize &&
- IdxTy.getSizeInBits() == 32;
+ IdxTy.getSizeInBits() == 32 &&
+ isLegalVecType;
})
.bitcastIf(all(sizeIsMultipleOf32(VecTypeIdx), scalarOrEltNarrowerThan(VecTypeIdx, 32)),
bitcastToVectorElement32(VecTypeIdx))
@@ -1479,6 +1690,9 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.clampScalar(IdxTypeIdx, S32, S32)
.clampMaxNumElements(VecTypeIdx, S32, 32)
// TODO: Clamp elements for 64-bit vectors?
+ .moreElementsIf(
+ isIllegalRegisterType(VecTypeIdx),
+ moreElementsToNextExistingRegClass(VecTypeIdx))
// It should only be necessary with variable indexes.
// As a last resort, lower to the stack
.lower();
@@ -1533,7 +1747,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
.legalForCartesianProduct(AllS64Vectors, {S64})
.clampNumElements(0, V16S32, V32S32)
.clampNumElements(0, V2S64, V16S64)
- .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16));
+ .fewerElementsIf(isWideVec16(0), changeTo(0, V2S16))
+ .moreElementsIf(
+ isIllegalRegisterType(0),
+ moreElementsToNextExistingRegClass(0));
if (ST.hasScalarPackInsts()) {
BuildVector
@@ -1575,7 +1792,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
const LLT &EltTy = Ty.getElementType();
if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 512)
return true;
- if (!isPowerOf2_32(EltTy.getSizeInBits()))
+ if (!llvm::has_single_bit<uint32_t>(EltTy.getSizeInBits()))
return true;
}
return false;
@@ -1623,8 +1840,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
Builder.widenScalarIf(
[=](const LegalityQuery &Query) {
const LLT Ty = Query.Types[BigTyIdx];
- return !isPowerOf2_32(Ty.getSizeInBits()) &&
- Ty.getSizeInBits() % 16 != 0;
+ return Ty.getSizeInBits() % 16 != 0;
},
[=](const LegalityQuery &Query) {
// Pick the next power of 2, or a multiple of 64 over 128.
@@ -1778,10 +1994,16 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
return legalizeLoad(Helper, MI);
+ case TargetOpcode::G_STORE:
+ return legalizeStore(Helper, MI);
case TargetOpcode::G_FMAD:
return legalizeFMad(MI, MRI, B);
case TargetOpcode::G_FDIV:
return legalizeFDIV(MI, MRI, B);
+ case TargetOpcode::G_FFREXP:
+ return legalizeFFREXP(MI, MRI, B);
+ case TargetOpcode::G_FSQRT:
+ return legalizeFSQRT(MI, MRI, B);
case TargetOpcode::G_UDIV:
case TargetOpcode::G_UREM:
case TargetOpcode::G_UDIVREM:
@@ -1792,10 +2014,13 @@ bool AMDGPULegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
return legalizeSignedDIV_REM(MI, MRI, B);
case TargetOpcode::G_ATOMIC_CMPXCHG:
return legalizeAtomicCmpXChg(MI, MRI, B);
+ case TargetOpcode::G_FLOG2:
+ return legalizeFlog2(MI, B);
case TargetOpcode::G_FLOG:
- return legalizeFlog(MI, B, numbers::ln2f);
case TargetOpcode::G_FLOG10:
- return legalizeFlog(MI, B, numbers::ln2f / numbers::ln10f);
+ return legalizeFlogCommon(MI, B);
+ case TargetOpcode::G_FEXP2:
+ return legalizeFExp2(MI, B);
case TargetOpcode::G_FEXP:
return legalizeFExp(MI, B);
case TargetOpcode::G_FPOW:
@@ -1856,7 +2081,8 @@ Register AMDGPULegalizerInfo::getSegmentAperture(
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
// For code object version 5, private_base and shared_base are passed through
// implicit kernargs.
- if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) {
+ if (AMDGPU::getCodeObjectVersion(*MF.getFunction().getParent()) >=
+ AMDGPU::AMDHSA_COV5) {
AMDGPUTargetLowering::ImplicitParameter Param =
AS == AMDGPUAS::LOCAL_ADDRESS ? AMDGPUTargetLowering::SHARED_BASE
: AMDGPUTargetLowering::PRIVATE_BASE;
@@ -2192,9 +2418,7 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
: B.buildUITOFP(S64, Unmerge.getReg(1));
auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
- auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
- .addUse(CvtHi.getReg(0))
- .addUse(ThirtyTwo.getReg(0));
+ auto LdExp = B.buildFLdexp(S64, CvtHi, ThirtyTwo);
// TODO: Should this propagate fast-math-flags?
B.buildFAdd(Dst, LdExp, CvtLo);
@@ -2225,10 +2449,7 @@ bool AMDGPULegalizerInfo::legalizeITOFP(
auto Norm2 = B.buildOr(S32, Unmerge2.getReg(1), Adjust);
auto FVal = Signed ? B.buildSITOFP(S32, Norm2) : B.buildUITOFP(S32, Norm2);
auto Scale = B.buildSub(S32, ThirtyTwo, ShAmt);
- B.buildIntrinsic(Intrinsic::amdgcn_ldexp, ArrayRef<Register>{Dst},
- /*HasSideEffects=*/false)
- .addUse(FVal.getReg(0))
- .addUse(Scale.getReg(0));
+ B.buildFLdexp(Dst, FVal, Scale);
MI.eraseFromParent();
return true;
}
@@ -2273,13 +2494,15 @@ bool AMDGPULegalizerInfo::legalizeFPTOI(MachineInstr &MI,
}
MachineInstrBuilder K0, K1;
if (SrcLT == S64) {
- K0 = B.buildFConstant(S64,
- BitsToDouble(UINT64_C(/*2^-32*/ 0x3df0000000000000)));
- K1 = B.buildFConstant(S64,
- BitsToDouble(UINT64_C(/*-2^32*/ 0xc1f0000000000000)));
+ K0 = B.buildFConstant(
+ S64, llvm::bit_cast<double>(UINT64_C(/*2^-32*/ 0x3df0000000000000)));
+ K1 = B.buildFConstant(
+ S64, llvm::bit_cast<double>(UINT64_C(/*-2^32*/ 0xc1f0000000000000)));
} else {
- K0 = B.buildFConstant(S32, BitsToFloat(UINT32_C(/*2^-32*/ 0x2f800000)));
- K1 = B.buildFConstant(S32, BitsToFloat(UINT32_C(/*-2^32*/ 0xcf800000)));
+ K0 = B.buildFConstant(
+ S32, llvm::bit_cast<float>(UINT32_C(/*2^-32*/ 0x2f800000)));
+ K1 = B.buildFConstant(
+ S32, llvm::bit_cast<float>(UINT32_C(/*-2^32*/ 0xcf800000)));
}
auto Mul = B.buildFMul(SrcLT, Trunc, K0, Flags);
@@ -2329,6 +2552,30 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
// TODO: Promote dynamic indexing of s16 to s32
+ Register Dst = MI.getOperand(0).getReg();
+ Register Vec = MI.getOperand(1).getReg();
+
+ LLT VecTy = MRI.getType(Vec);
+ LLT EltTy = VecTy.getElementType();
+ assert(EltTy == MRI.getType(Dst));
+
+ // Other legalization maps vector<? x [type bigger than 64 bits]> via bitcasts
+ // but we can't go directly to that logic becasue you can't bitcast a vector
+ // of pointers to a vector of integers. Therefore, introduce an intermediate
+ // vector of integers using ptrtoint (and inttoptr on the output) in order to
+ // drive the legalization forward.
+ if (EltTy.isPointer() && EltTy.getSizeInBits() > 64) {
+ LLT IntTy = LLT::scalar(EltTy.getSizeInBits());
+ LLT IntVecTy = VecTy.changeElementType(IntTy);
+
+ auto IntVec = B.buildPtrToInt(IntVecTy, Vec);
+ auto IntElt = B.buildExtractVectorElement(IntTy, IntVec, MI.getOperand(2));
+ B.buildIntToPtr(Dst, IntElt);
+
+ MI.eraseFromParent();
+ return true;
+ }
+
// FIXME: Artifact combiner probably should have replaced the truncated
// constant before this, so we shouldn't need
// getIConstantVRegValWithLookThrough.
@@ -2338,13 +2585,6 @@ bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
return true;
const uint64_t IdxVal = MaybeIdxVal->Value.getZExtValue();
- Register Dst = MI.getOperand(0).getReg();
- Register Vec = MI.getOperand(1).getReg();
-
- LLT VecTy = MRI.getType(Vec);
- LLT EltTy = VecTy.getElementType();
- assert(EltTy == MRI.getType(Dst));
-
if (IdxVal < VecTy.getNumElements()) {
auto Unmerge = B.buildUnmerge(EltTy, Vec);
B.buildCopy(Dst, Unmerge.getReg(IdxVal));
@@ -2363,6 +2603,33 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
// TODO: Promote dynamic indexing of s16 to s32
+ Register Dst = MI.getOperand(0).getReg();
+ Register Vec = MI.getOperand(1).getReg();
+ Register Ins = MI.getOperand(2).getReg();
+
+ LLT VecTy = MRI.getType(Vec);
+ LLT EltTy = VecTy.getElementType();
+ assert(EltTy == MRI.getType(Ins));
+
+ // Other legalization maps vector<? x [type bigger than 64 bits]> via bitcasts
+ // but we can't go directly to that logic becasue you can't bitcast a vector
+ // of pointers to a vector of integers. Therefore, make the pointer vector
+ // into an equivalent vector of integers with ptrtoint, insert the ptrtoint'd
+ // new value, and then inttoptr the result vector back. This will then allow
+ // the rest of legalization to take over.
+ if (EltTy.isPointer() && EltTy.getSizeInBits() > 64) {
+ LLT IntTy = LLT::scalar(EltTy.getSizeInBits());
+ LLT IntVecTy = VecTy.changeElementType(IntTy);
+
+ auto IntVecSource = B.buildPtrToInt(IntVecTy, Vec);
+ auto IntIns = B.buildPtrToInt(IntTy, Ins);
+ auto IntVecDest = B.buildInsertVectorElement(IntVecTy, IntVecSource, IntIns,
+ MI.getOperand(3));
+ B.buildIntToPtr(Dst, IntVecDest);
+ MI.eraseFromParent();
+ return true;
+ }
+
// FIXME: Artifact combiner probably should have replaced the truncated
// constant before this, so we shouldn't need
// getIConstantVRegValWithLookThrough.
@@ -2372,14 +2639,6 @@ bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
return true;
const uint64_t IdxVal = MaybeIdxVal->Value.getZExtValue();
- Register Dst = MI.getOperand(0).getReg();
- Register Vec = MI.getOperand(1).getReg();
- Register Ins = MI.getOperand(2).getReg();
-
- LLT VecTy = MRI.getType(Vec);
- LLT EltTy = VecTy.getElementType();
- assert(EltTy == MRI.getType(Ins));
- (void)Ins;
unsigned NumElts = VecTy.getNumElements();
if (IdxVal < NumElts) {
@@ -2479,7 +2738,8 @@ bool AMDGPULegalizerInfo::buildPCRelGlobalAddress(Register DstReg, LLT PtrTy,
else
MIB.addGlobalAddress(GV, Offset + 12, GAFlags + 1);
- B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass);
+ if (!B.getMRI()->getRegClassOrNull(PCReg))
+ B.getMRI()->setRegClass(PCReg, &AMDGPU::SReg_64RegClass);
if (PtrTy.getSizeInBits() == 32)
B.buildExtract(DstReg, PCReg, 0);
@@ -2535,7 +2795,7 @@ bool AMDGPULegalizerInfo::legalizeGlobalValue(
// allocated ones. They all share the same offset.
if (B.getDataLayout().getTypeAllocSize(Ty).isZero()) {
// Adjust alignment for that dynamic shared memory array.
- MFI->setDynLDSAlign(B.getDataLayout(), *cast<GlobalVariable>(GV));
+ MFI->setDynLDSAlign(MF.getFunction(), *cast<GlobalVariable>(GV));
LLT S32 = LLT::scalar(32);
auto Sz =
B.buildIntrinsic(Intrinsic::amdgcn_groupstaticsize, {S32}, false);
@@ -2620,6 +2880,13 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
Register ValReg = MI.getOperand(0).getReg();
LLT ValTy = MRI.getType(ValReg);
+ if (hasBufferRsrcWorkaround(ValTy)) {
+ Observer.changingInstr(MI);
+ castBufferRsrcFromV4I32(MI, B, MRI, 0);
+ Observer.changedInstr(MI);
+ return true;
+ }
+
MachineMemOperand *MMO = *MI.memoperands_begin();
const unsigned ValSize = ValTy.getSizeInBits();
const LLT MemTy = MMO->getMemoryType();
@@ -2677,6 +2944,24 @@ bool AMDGPULegalizerInfo::legalizeLoad(LegalizerHelper &Helper,
return false;
}
+bool AMDGPULegalizerInfo::legalizeStore(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ MachineIRBuilder &B = Helper.MIRBuilder;
+ MachineRegisterInfo &MRI = *B.getMRI();
+ GISelChangeObserver &Observer = Helper.Observer;
+
+ Register DataReg = MI.getOperand(0).getReg();
+ LLT DataTy = MRI.getType(DataReg);
+
+ if (hasBufferRsrcWorkaround(DataTy)) {
+ Observer.changingInstr(MI);
+ castBufferRsrcArgToV4I32(MI, B, 0);
+ Observer.changedInstr(MI);
+ return true;
+ }
+ return false;
+}
+
bool AMDGPULegalizerInfo::legalizeFMad(
MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -2688,9 +2973,11 @@ bool AMDGPULegalizerInfo::legalizeFMad(
// TODO: Always legal with future ftz flag.
// FIXME: Do we need just output?
- if (Ty == LLT::scalar(32) && !MFI->getMode().allFP32Denormals())
+ if (Ty == LLT::scalar(32) &&
+ MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign())
return true;
- if (Ty == LLT::scalar(16) && !MFI->getMode().allFP64FP16Denormals())
+ if (Ty == LLT::scalar(16) &&
+ MFI->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign())
return true;
MachineIRBuilder HelperBuilder(MI);
@@ -2724,31 +3011,449 @@ bool AMDGPULegalizerInfo::legalizeAtomicCmpXChg(
return true;
}
-bool AMDGPULegalizerInfo::legalizeFlog(
- MachineInstr &MI, MachineIRBuilder &B, double Log2BaseInverted) const {
+/// Return true if it's known that \p Src can never be an f32 denormal value.
+static bool valueIsKnownNeverF32Denorm(const MachineRegisterInfo &MRI,
+ Register Src) {
+ Register ExtSrc;
+ if (mi_match(Src, MRI, m_GFPExt(m_Reg(ExtSrc))))
+ return MRI.getType(ExtSrc) == LLT::scalar(16);
+ return false;
+}
+
+static bool allowApproxFunc(const MachineFunction &MF, unsigned Flags) {
+ if (Flags & MachineInstr::FmAfn)
+ return true;
+ const auto &Options = MF.getTarget().Options;
+ return Options.UnsafeFPMath || Options.ApproxFuncFPMath;
+}
+
+static bool needsDenormHandlingF32(const MachineFunction &MF, Register Src,
+ unsigned Flags) {
+ return !valueIsKnownNeverF32Denorm(MF.getRegInfo(), Src) &&
+ MF.getDenormalMode(APFloat::IEEEsingle()).Input !=
+ DenormalMode::PreserveSign;
+}
+
+std::pair<Register, Register>
+AMDGPULegalizerInfo::getScaledLogInput(MachineIRBuilder &B, Register Src,
+ unsigned Flags) const {
+ if (allowApproxFunc(B.getMF(), Flags) ||
+ !needsDenormHandlingF32(B.getMF(), Src, Flags))
+ return {};
+
+ const LLT F32 = LLT::scalar(32);
+ auto SmallestNormal = B.buildFConstant(
+ F32, APFloat::getSmallestNormalized(APFloat::IEEEsingle()));
+ auto IsLtSmallestNormal =
+ B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Src, SmallestNormal);
+
+ auto Scale32 = B.buildFConstant(F32, 0x1.0p+32);
+ auto One = B.buildFConstant(F32, 1.0);
+ auto ScaleFactor =
+ B.buildSelect(F32, IsLtSmallestNormal, Scale32, One, Flags);
+ auto ScaledInput = B.buildFMul(F32, Src, ScaleFactor, Flags);
+
+ return {ScaledInput.getReg(0), IsLtSmallestNormal.getReg(0)};
+}
+
+bool AMDGPULegalizerInfo::legalizeFlog2(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ // v_log_f32 is good enough for OpenCL, except it doesn't handle denormals.
+ // If we have to handle denormals, scale up the input and adjust the result.
+
+ // scaled = x * (is_denormal ? 0x1.0p+32 : 1.0)
+ // log2 = amdgpu_log2 - (is_denormal ? 32.0 : 0.0)
+
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
LLT Ty = B.getMRI()->getType(Dst);
unsigned Flags = MI.getFlags();
- auto Log2Operand = B.buildFLog2(Ty, Src, Flags);
- auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted);
+ if (Ty == LLT::scalar(16)) {
+ const LLT F32 = LLT::scalar(32);
+ // Nothing in half is a denormal when promoted to f32.
+ auto Ext = B.buildFPExt(F32, Src, Flags);
+ auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {F32}, false)
+ .addUse(Ext.getReg(0))
+ .setMIFlags(Flags);
+ B.buildFPTrunc(Dst, Log2, Flags);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ assert(Ty == LLT::scalar(32));
+
+ auto [ScaledInput, IsLtSmallestNormal] = getScaledLogInput(B, Src, Flags);
+ if (!ScaledInput) {
+ B.buildIntrinsic(Intrinsic::amdgcn_log, {MI.getOperand(0)}, false)
+ .addUse(Src)
+ .setMIFlags(Flags);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
+ .addUse(ScaledInput)
+ .setMIFlags(Flags);
+
+ auto ThirtyTwo = B.buildFConstant(Ty, 32.0);
+ auto Zero = B.buildFConstant(Ty, 0.0);
+ auto ResultOffset =
+ B.buildSelect(Ty, IsLtSmallestNormal, ThirtyTwo, Zero, Flags);
+ B.buildFSub(Dst, Log2, ResultOffset, Flags);
- B.buildFMul(Dst, Log2Operand, Log2BaseInvertedOperand, Flags);
MI.eraseFromParent();
return true;
}
-bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
- MachineIRBuilder &B) const {
+static Register getMad(MachineIRBuilder &B, LLT Ty, Register X, Register Y,
+ Register Z, unsigned Flags) {
+ auto FMul = B.buildFMul(Ty, X, Y, Flags);
+ return B.buildFAdd(Ty, FMul, Z, Flags).getReg(0);
+}
+
+bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ const bool IsLog10 = MI.getOpcode() == TargetOpcode::G_FLOG10;
+ assert(IsLog10 || MI.getOpcode() == TargetOpcode::G_FLOG);
+
+ MachineRegisterInfo &MRI = *B.getMRI();
+ Register Dst = MI.getOperand(0).getReg();
+ Register X = MI.getOperand(1).getReg();
+ unsigned Flags = MI.getFlags();
+ const LLT Ty = MRI.getType(X);
+ MachineFunction &MF = B.getMF();
+
+ const LLT F32 = LLT::scalar(32);
+ const LLT F16 = LLT::scalar(16);
+
+ const AMDGPUTargetMachine &TM =
+ static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
+
+ if (Ty == F16 || MI.getFlag(MachineInstr::FmAfn) ||
+ TM.Options.ApproxFuncFPMath || TM.Options.UnsafeFPMath) {
+ const double Log2BaseInv =
+ IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2;
+
+ if (Ty == F16 && !ST.has16BitInsts()) {
+ Register LogVal = MRI.createGenericVirtualRegister(F32);
+ auto PromoteSrc = B.buildFPExt(F32, X);
+ legalizeFlogUnsafe(B, LogVal, PromoteSrc.getReg(0), Log2BaseInv, Flags);
+ B.buildFPTrunc(Dst, LogVal);
+ } else {
+ legalizeFlogUnsafe(B, Dst, X, Log2BaseInv, Flags);
+ }
+
+ MI.eraseFromParent();
+ return true;
+ }
+
+ auto [ScaledInput, IsScaled] = getScaledLogInput(B, X, Flags);
+ if (ScaledInput)
+ X = ScaledInput;
+
+ auto Y = B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
+ .addUse(X)
+ .setMIFlags(Flags);
+
+ Register R;
+ if (ST.hasFastFMAF32()) {
+ // c+cc are ln(2)/ln(10) to more than 49 bits
+ const float c_log10 = 0x1.344134p-2f;
+ const float cc_log10 = 0x1.09f79ep-26f;
+
+ // c + cc is ln(2) to more than 49 bits
+ const float c_log = 0x1.62e42ep-1f;
+ const float cc_log = 0x1.efa39ep-25f;
+
+ auto C = B.buildFConstant(Ty, IsLog10 ? c_log10 : c_log);
+ auto CC = B.buildFConstant(Ty, IsLog10 ? cc_log10 : cc_log);
+
+ R = B.buildFMul(Ty, Y, C, Flags).getReg(0);
+ auto NegR = B.buildFNeg(Ty, R, Flags);
+ auto FMA0 = B.buildFMA(Ty, Y, C, NegR, Flags);
+ auto FMA1 = B.buildFMA(Ty, Y, CC, FMA0, Flags);
+ R = B.buildFAdd(Ty, R, FMA1, Flags).getReg(0);
+ } else {
+ // ch+ct is ln(2)/ln(10) to more than 36 bits
+ const float ch_log10 = 0x1.344000p-2f;
+ const float ct_log10 = 0x1.3509f6p-18f;
+
+ // ch + ct is ln(2) to more than 36 bits
+ const float ch_log = 0x1.62e000p-1f;
+ const float ct_log = 0x1.0bfbe8p-15f;
+
+ auto CH = B.buildFConstant(Ty, IsLog10 ? ch_log10 : ch_log);
+ auto CT = B.buildFConstant(Ty, IsLog10 ? ct_log10 : ct_log);
+
+ auto MaskConst = B.buildConstant(Ty, 0xfffff000);
+ auto YH = B.buildAnd(Ty, Y, MaskConst);
+ auto YT = B.buildFSub(Ty, Y, YH, Flags);
+ auto YTCT = B.buildFMul(Ty, YT, CT, Flags);
+
+ Register Mad0 =
+ getMad(B, Ty, YH.getReg(0), CT.getReg(0), YTCT.getReg(0), Flags);
+ Register Mad1 = getMad(B, Ty, YT.getReg(0), CH.getReg(0), Mad0, Flags);
+ R = getMad(B, Ty, YH.getReg(0), CH.getReg(0), Mad1, Flags);
+ }
+
+ const bool IsFiniteOnly =
+ (MI.getFlag(MachineInstr::FmNoNans) || TM.Options.NoNaNsFPMath) &&
+ (MI.getFlag(MachineInstr::FmNoInfs) || TM.Options.NoInfsFPMath);
+
+ if (!IsFiniteOnly) {
+ // Expand isfinite(x) => fabs(x) < inf
+ auto Inf = B.buildFConstant(Ty, APFloat::getInf(APFloat::IEEEsingle()));
+ auto Fabs = B.buildFAbs(Ty, Y);
+ auto IsFinite =
+ B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Fabs, Inf, Flags);
+ R = B.buildSelect(Ty, IsFinite, R, Y, Flags).getReg(0);
+ }
+
+ if (ScaledInput) {
+ auto Zero = B.buildFConstant(Ty, 0.0);
+ auto ShiftK =
+ B.buildFConstant(Ty, IsLog10 ? 0x1.344136p+3f : 0x1.62e430p+4f);
+ auto Shift = B.buildSelect(Ty, IsScaled, ShiftK, Zero, Flags);
+ B.buildFSub(Dst, R, Shift, Flags);
+ } else {
+ B.buildCopy(Dst, R);
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst,
+ Register Src,
+ double Log2BaseInverted,
+ unsigned Flags) const {
+ LLT Ty = B.getMRI()->getType(Dst);
+ auto Log2Operand = Ty == LLT::scalar(16)
+ ? B.buildFLog2(Ty, Src, Flags)
+ : B.buildIntrinsic(Intrinsic::amdgcn_log, {Ty}, false)
+ .addUse(Src)
+ .setMIFlags(Flags);
+ auto Log2BaseInvertedOperand = B.buildFConstant(Ty, Log2BaseInverted);
+ B.buildFMul(Dst, Log2Operand, Log2BaseInvertedOperand, Flags);
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFExp2(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ // v_exp_f32 is good enough for OpenCL, except it doesn't handle denormals.
+ // If we have to handle denormals, scale up the input and adjust the result.
+
Register Dst = MI.getOperand(0).getReg();
Register Src = MI.getOperand(1).getReg();
unsigned Flags = MI.getFlags();
LLT Ty = B.getMRI()->getType(Dst);
+ const LLT F16 = LLT::scalar(16);
+ const LLT F32 = LLT::scalar(32);
+
+ if (Ty == F16) {
+ // Nothing in half is a denormal when promoted to f32.
+ auto Ext = B.buildFPExt(F32, Src, Flags);
+ auto Log2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {F32}, false)
+ .addUse(Ext.getReg(0))
+ .setMIFlags(Flags);
+ B.buildFPTrunc(Dst, Log2, Flags);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ assert(Ty == F32);
+
+ if (allowApproxFunc(B.getMF(), Flags) ||
+ !needsDenormHandlingF32(B.getMF(), Src, Flags)) {
+ B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst}, false)
+ .addUse(Src)
+ .setMIFlags(Flags);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // bool needs_scaling = x < -0x1.f80000p+6f;
+ // v_exp_f32(x + (s ? 0x1.0p+6f : 0.0f)) * (s ? 0x1.0p-64f : 1.0f);
+
+ // -nextafter(128.0, -1)
+ auto RangeCheckConst = B.buildFConstant(Ty, -0x1.f80000p+6f);
+ auto NeedsScaling = B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Src,
+ RangeCheckConst, Flags);
+
+ auto SixtyFour = B.buildFConstant(Ty, 0x1.0p+6f);
+ auto Zero = B.buildFConstant(Ty, 0.0);
+ auto AddOffset = B.buildSelect(F32, NeedsScaling, SixtyFour, Zero, Flags);
+ auto AddInput = B.buildFAdd(F32, Src, AddOffset, Flags);
+ auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}, false)
+ .addUse(AddInput.getReg(0))
+ .setMIFlags(Flags);
+
+ auto TwoExpNeg64 = B.buildFConstant(Ty, 0x1.0p-64f);
+ auto One = B.buildFConstant(Ty, 1.0);
+ auto ResultScale = B.buildSelect(F32, NeedsScaling, TwoExpNeg64, One, Flags);
+ B.buildFMul(Dst, Exp2, ResultScale, Flags);
+ MI.eraseFromParent();
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst,
+ Register Src,
+ unsigned Flags) const {
+ LLT Ty = B.getMRI()->getType(Dst);
auto K = B.buildFConstant(Ty, numbers::log2e);
auto Mul = B.buildFMul(Ty, Src, K, Flags);
- B.buildFExp2(Dst, Mul, Flags);
+
+ if (Ty == LLT::scalar(32)) {
+ B.buildIntrinsic(Intrinsic::amdgcn_exp2, ArrayRef<Register>{Dst}, false)
+ .addUse(Mul.getReg(0))
+ .setMIFlags(Flags);
+ } else {
+ B.buildFExp2(Dst, Mul.getReg(0), Flags);
+ }
+
+ return true;
+}
+
+bool AMDGPULegalizerInfo::legalizeFExp(MachineInstr &MI,
+ MachineIRBuilder &B) const {
+ Register Dst = MI.getOperand(0).getReg();
+ Register X = MI.getOperand(1).getReg();
+ const unsigned Flags = MI.getFlags();
+ MachineFunction &MF = B.getMF();
+ MachineRegisterInfo &MRI = *B.getMRI();
+ LLT Ty = MRI.getType(Dst);
+ const LLT F16 = LLT::scalar(16);
+ const LLT F32 = LLT::scalar(32);
+ const bool IsExp10 = false; // TODO: For some reason exp10 is missing
+
+ if (Ty == F16) {
+ // v_exp_f16 (fmul x, log2e)
+ if (allowApproxFunc(MF, Flags)) {
+ // TODO: Does this really require fast?
+ legalizeFExpUnsafe(B, Dst, X, Flags);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // exp(f16 x) ->
+ // fptrunc (v_exp_f32 (fmul (fpext x), log2e))
+
+ // Nothing in half is a denormal when promoted to f32.
+ auto Ext = B.buildFPExt(F32, X, Flags);
+ Register Lowered = MRI.createGenericVirtualRegister(F32);
+ legalizeFExpUnsafe(B, Lowered, Ext.getReg(0), Flags);
+ B.buildFPTrunc(Dst, Lowered, Flags);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ assert(Ty == F32);
+
+ // TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying
+ // library behavior. Also, is known-not-daz source sufficient?
+ if (allowApproxFunc(MF, Flags) && !needsDenormHandlingF32(MF, X, Flags)) {
+ legalizeFExpUnsafe(B, Dst, X, Flags);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // Algorithm:
+ //
+ // e^x = 2^(x/ln(2)) = 2^(x*(64/ln(2))/64)
+ //
+ // x*(64/ln(2)) = n + f, |f| <= 0.5, n is integer
+ // n = 64*m + j, 0 <= j < 64
+ //
+ // e^x = 2^((64*m + j + f)/64)
+ // = (2^m) * (2^(j/64)) * 2^(f/64)
+ // = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64))
+ //
+ // f = x*(64/ln(2)) - n
+ // r = f*(ln(2)/64) = x - n*(ln(2)/64)
+ //
+ // e^x = (2^m) * (2^(j/64)) * e^r
+ //
+ // (2^(j/64)) is precomputed
+ //
+ // e^r = 1 + r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5!
+ // e^r = 1 + q
+ //
+ // q = r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5!
+ //
+ // e^x = (2^m) * ( (2^(j/64)) + q*(2^(j/64)) )
+ const unsigned FlagsNoContract = Flags & ~MachineInstr::FmContract;
+ Register PH, PL;
+
+ if (ST.hasFastFMAF32()) {
+ const float c_exp = numbers::log2ef;
+ const float cc_exp = 0x1.4ae0bep-26f; // c+cc are 49 bits
+ const float c_exp10 = 0x1.a934f0p+1f;
+ const float cc_exp10 = 0x1.2f346ep-24f;
+
+ auto C = B.buildFConstant(Ty, IsExp10 ? c_exp10 : c_exp);
+ PH = B.buildFMul(Ty, X, C, Flags).getReg(0);
+ auto NegPH = B.buildFNeg(Ty, PH, Flags);
+ auto FMA0 = B.buildFMA(Ty, X, C, NegPH, Flags);
+
+ auto CC = B.buildFConstant(Ty, IsExp10 ? cc_exp10 : cc_exp);
+ PL = B.buildFMA(Ty, X, CC, FMA0, Flags).getReg(0);
+ } else {
+ const float ch_exp = 0x1.714000p+0f;
+ const float cl_exp = 0x1.47652ap-12f; // ch + cl are 36 bits
+
+ const float ch_exp10 = 0x1.a92000p+1f;
+ const float cl_exp10 = 0x1.4f0978p-11f;
+
+ auto MaskConst = B.buildConstant(Ty, 0xfffff000);
+ auto XH = B.buildAnd(Ty, X, MaskConst);
+ auto XL = B.buildFSub(Ty, X, XH, Flags);
+
+ auto CH = B.buildFConstant(Ty, IsExp10 ? ch_exp10 : ch_exp);
+ PH = B.buildFMul(Ty, XH, CH, Flags).getReg(0);
+
+ auto CL = B.buildFConstant(Ty, IsExp10 ? cl_exp10 : cl_exp);
+ auto XLCL = B.buildFMul(Ty, XL, CL, Flags);
+
+ Register Mad0 =
+ getMad(B, Ty, XL.getReg(0), CH.getReg(0), XLCL.getReg(0), Flags);
+ PL = getMad(B, Ty, XH.getReg(0), CL.getReg(0), Mad0, Flags);
+ }
+
+ auto E = B.buildFRint(Ty, PH, Flags);
+
+ // It is unsafe to contract this fsub into the PH multiply.
+ auto PHSubE = B.buildFSub(Ty, PH, E, FlagsNoContract);
+ auto A = B.buildFAdd(Ty, PHSubE, PL, Flags);
+ auto IntE = B.buildFPTOSI(LLT::scalar(32), E);
+
+ auto Exp2 = B.buildIntrinsic(Intrinsic::amdgcn_exp2, {Ty}, false)
+ .addUse(A.getReg(0))
+ .setMIFlags(Flags);
+ auto R = B.buildFLdexp(Ty, Exp2, IntE, Flags);
+
+ auto UnderflowCheckConst =
+ B.buildFConstant(Ty, IsExp10 ? -0x1.66d3e8p+5f : -0x1.9d1da0p+6f);
+ auto Zero = B.buildFConstant(Ty, 0.0);
+ auto Underflow =
+ B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), X, UnderflowCheckConst);
+
+ R = B.buildSelect(Ty, Underflow, Zero, R);
+
+ const auto &Options = MF.getTarget().Options;
+
+ if (!(Flags & MachineInstr::FmNoInfs) && !Options.NoInfsFPMath) {
+ auto OverflowCheckConst =
+ B.buildFConstant(Ty, IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f);
+
+ auto Overflow =
+ B.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), X, OverflowCheckConst);
+ auto Inf = B.buildFConstant(Ty, APFloat::getInf(APFloat::IEEEsingle()));
+ R = B.buildSelect(Ty, Overflow, Inf, R, Flags);
+ }
+
+ B.buildCopy(Dst, R);
MI.eraseFromParent();
return true;
}
@@ -2831,7 +3536,8 @@ bool AMDGPULegalizerInfo::legalizeFFloor(MachineInstr &MI,
// shouldn't matter?
Register ModSrc = stripAnySourceMods(OrigSrc, MRI);
- auto Const = B.buildFConstant(S64, BitsToDouble(0x3fefffffffffffff));
+ auto Const =
+ B.buildFConstant(S64, llvm::bit_cast<double>(0x3fefffffffffffff));
Register Min = MRI.createGenericVirtualRegister(S64);
@@ -2890,15 +3596,18 @@ bool AMDGPULegalizerInfo::legalizeBuildVector(
// the outer loop going over parts of the result, the outer loop should go
// over parts of one of the factors. This should result in instruction
// selection that makes full use of S_ADDC_U32 instructions.
-void AMDGPULegalizerInfo::buildMultiply(
- LegalizerHelper &Helper, MutableArrayRef<Register> Accum,
- ArrayRef<Register> Src0, ArrayRef<Register> Src1,
- bool UsePartialMad64_32, bool SeparateOddAlignedProducts) const {
+void AMDGPULegalizerInfo::buildMultiply(LegalizerHelper &Helper,
+ MutableArrayRef<Register> Accum,
+ ArrayRef<Register> Src0,
+ ArrayRef<Register> Src1,
+ bool UsePartialMad64_32,
+ bool SeparateOddAlignedProducts) const {
// Use (possibly empty) vectors of S1 registers to represent the set of
// carries from one pair of positions to the next.
using Carry = SmallVector<Register, 2>;
MachineIRBuilder &B = Helper.MIRBuilder;
+ GISelKnownBits &KB = *Helper.getKnownBits();
const LLT S1 = LLT::scalar(1);
const LLT S32 = LLT::scalar(32);
@@ -2918,6 +3627,12 @@ void AMDGPULegalizerInfo::buildMultiply(
return Zero64;
};
+ SmallVector<bool, 2> Src0KnownZeros, Src1KnownZeros;
+ for (unsigned i = 0; i < Src0.size(); ++i) {
+ Src0KnownZeros.push_back(KB.getKnownBits(Src0[i]).isZero());
+ Src1KnownZeros.push_back(KB.getKnownBits(Src1[i]).isZero());
+ }
+
// Merge the given carries into the 32-bit LocalAccum, which is modified
// in-place.
//
@@ -2980,9 +3695,14 @@ void AMDGPULegalizerInfo::buildMultiply(
if (LocalAccum.size() == 1 &&
(!UsePartialMad64_32 || !CarryIn.empty())) {
do {
+ // Skip multiplication if one of the operands is 0
unsigned j1 = DstIndex - j0;
+ if (Src0KnownZeros[j0] || Src1KnownZeros[j1]) {
+ ++j0;
+ continue;
+ }
auto Mul = B.buildMul(S32, Src0[j0], Src1[j1]);
- if (!LocalAccum[0]) {
+ if (!LocalAccum[0] || KB.getKnownBits(LocalAccum[0]).isZero()) {
LocalAccum[0] = Mul.getReg(0);
} else {
if (CarryIn.empty()) {
@@ -3022,12 +3742,17 @@ void AMDGPULegalizerInfo::buildMultiply(
do {
unsigned j1 = DstIndex - j0;
+ if (Src0KnownZeros[j0] || Src1KnownZeros[j1]) {
+ ++j0;
+ continue;
+ }
auto Mad = B.buildInstr(AMDGPU::G_AMDGPU_MAD_U64_U32, {S64, S1},
{Src0[j0], Src1[j1], Tmp});
Tmp = Mad.getReg(0);
if (!HaveSmallAccum)
CarryOut.push_back(Mad.getReg(1));
HaveSmallAccum = false;
+
++j0;
} while (j0 <= DstIndex);
@@ -3170,7 +3895,6 @@ bool AMDGPULegalizerInfo::legalizeMul(LegalizerHelper &Helper,
B.buildMergeLikeInstr(DstReg, AccumRegs);
MI.eraseFromParent();
return true;
-
}
// Legalize ctlz/cttz to ffbh/ffbl instead of the default legalization to
@@ -3259,7 +3983,7 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
// TODO: Should we try to emit this once in the entry block?
const LLT S32 = LLT::scalar(32);
const unsigned Mask = Arg->getMask();
- const unsigned Shift = countTrailingZeros<unsigned>(Mask);
+ const unsigned Shift = llvm::countr_zero<unsigned>(Mask);
Register AndMaskSrc = LiveIn;
@@ -3432,7 +4156,7 @@ void AMDGPULegalizerInfo::legalizeUnsignedDIV_REM32Impl(MachineIRBuilder &B,
// Initial estimate of inv(y).
auto FloatY = B.buildUITOFP(S32, Y);
auto RcpIFlag = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {FloatY});
- auto Scale = B.buildFConstant(S32, BitsToFloat(0x4f7ffffe));
+ auto Scale = B.buildFConstant(S32, llvm::bit_cast<float>(0x4f7ffffe));
auto ScaledY = B.buildFMul(S32, RcpIFlag, Scale);
auto Z = B.buildFPTOUI(S32, ScaledY);
@@ -3482,21 +4206,23 @@ static std::pair<Register, Register> emitReciprocalU64(MachineIRBuilder &B,
auto CvtLo = B.buildUITOFP(S32, Unmerge.getReg(0));
auto CvtHi = B.buildUITOFP(S32, Unmerge.getReg(1));
- auto Mad = B.buildFMAD(S32, CvtHi, // 2**32
- B.buildFConstant(S32, BitsToFloat(0x4f800000)), CvtLo);
+ auto Mad = B.buildFMAD(
+ S32, CvtHi, // 2**32
+ B.buildFConstant(S32, llvm::bit_cast<float>(0x4f800000)), CvtLo);
auto Rcp = B.buildInstr(AMDGPU::G_AMDGPU_RCP_IFLAG, {S32}, {Mad});
- auto Mul1 =
- B.buildFMul(S32, Rcp, B.buildFConstant(S32, BitsToFloat(0x5f7ffffc)));
+ auto Mul1 = B.buildFMul(
+ S32, Rcp, B.buildFConstant(S32, llvm::bit_cast<float>(0x5f7ffffc)));
// 2**(-32)
- auto Mul2 =
- B.buildFMul(S32, Mul1, B.buildFConstant(S32, BitsToFloat(0x2f800000)));
+ auto Mul2 = B.buildFMul(
+ S32, Mul1, B.buildFConstant(S32, llvm::bit_cast<float>(0x2f800000)));
auto Trunc = B.buildIntrinsicTrunc(S32, Mul2);
// -(2**32)
- auto Mad2 = B.buildFMAD(S32, Trunc,
- B.buildFConstant(S32, BitsToFloat(0xcf800000)), Mul1);
+ auto Mad2 = B.buildFMAD(
+ S32, Trunc, B.buildFConstant(S32, llvm::bit_cast<float>(0xcf800000)),
+ Mul1);
auto ResultLo = B.buildFPTOUI(S32, Mad2);
auto ResultHi = B.buildFPTOUI(S32, Trunc);
@@ -3734,13 +4460,20 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
LLT ResTy = MRI.getType(Res);
const MachineFunction &MF = B.getMF();
- bool AllowInaccurateRcp = MF.getTarget().Options.UnsafeFPMath ||
- MI.getFlag(MachineInstr::FmAfn);
-
- if (!AllowInaccurateRcp)
- return false;
+ bool AllowInaccurateRcp = MI.getFlag(MachineInstr::FmAfn) ||
+ MF.getTarget().Options.UnsafeFPMath;
if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) {
+ if (!AllowInaccurateRcp && ResTy != LLT::scalar(16))
+ return false;
+
+ // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
+ // the CI documentation has a worst case error of 1 ulp.
+ // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
+ // use it as long as we aren't trying to use denormals.
+ //
+ // v_rcp_f16 and v_rsq_f16 DO support denormals and 0.51ulp.
+
// 1 / x -> RCP(x)
if (CLHS->isExactlyValue(1.0)) {
B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
@@ -3751,6 +4484,8 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
return true;
}
+ // TODO: Match rsq
+
// -1 / x -> RCP( FNEG(x) )
if (CLHS->isExactlyValue(-1.0)) {
auto FNeg = B.buildFNeg(ResTy, RHS, Flags);
@@ -3763,6 +4498,12 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
}
}
+ // For f16 require arcp only.
+ // For f32 require afn+arcp.
+ if (!AllowInaccurateRcp && (ResTy != LLT::scalar(16) ||
+ !MI.getFlag(MachineInstr::FmArcp)))
+ return false;
+
// x / y -> x * (1.0 / y)
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
.addUse(RHS)
@@ -3847,10 +4588,9 @@ bool AMDGPULegalizerInfo::legalizeFDIV16(MachineInstr &MI,
// Enable or disable FP32 denorm mode. When 'Enable' is true, emit instructions
// to enable denorm mode. When 'Enable' is false, disable denorm mode.
-static void toggleSPDenormMode(bool Enable,
- MachineIRBuilder &B,
+static void toggleSPDenormMode(bool Enable, MachineIRBuilder &B,
const GCNSubtarget &ST,
- AMDGPU::SIModeRegisterDefaults Mode) {
+ SIModeRegisterDefaults Mode) {
// Set SP denorm mode to this value.
unsigned SPDenormMode =
Enable ? FP_DENORM_FLUSH_NONE : Mode.fpDenormModeSPValue();
@@ -3885,7 +4625,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
- AMDGPU::SIModeRegisterDefaults Mode = MFI->getMode();
+ SIModeRegisterDefaults Mode = MFI->getMode();
uint16_t Flags = MI.getFlags();
@@ -3914,7 +4654,7 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
// FIXME: Doesn't correctly model the FP mode switch, and the FP operations
// aren't modeled as reading it.
- if (!Mode.allFP32Denormals())
+ if (Mode.FP32Denormals != DenormalMode::getIEEE())
toggleSPDenormMode(true, B, ST, Mode);
auto Fma0 = B.buildFMA(S32, NegDivScale0, ApproxRcp, One, Flags);
@@ -3924,7 +4664,9 @@ bool AMDGPULegalizerInfo::legalizeFDIV32(MachineInstr &MI,
auto Fma3 = B.buildFMA(S32, Fma2, Fma1, Mul, Flags);
auto Fma4 = B.buildFMA(S32, NegDivScale0, Fma3, NumeratorScaled, Flags);
- if (!Mode.allFP32Denormals())
+ // FIXME: This mishandles dynamic denormal mode. We need to query the
+ // current mode and restore the original.
+ if (Mode.FP32Denormals != DenormalMode::getIEEE())
toggleSPDenormMode(false, B, ST, Mode);
auto Fmas = B.buildIntrinsic(Intrinsic::amdgcn_div_fmas, {S32}, false)
@@ -4025,6 +4767,41 @@ bool AMDGPULegalizerInfo::legalizeFDIV64(MachineInstr &MI,
return true;
}
+bool AMDGPULegalizerInfo::legalizeFFREXP(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ Register Res0 = MI.getOperand(0).getReg();
+ Register Res1 = MI.getOperand(1).getReg();
+ Register Val = MI.getOperand(2).getReg();
+ uint16_t Flags = MI.getFlags();
+
+ LLT Ty = MRI.getType(Res0);
+ LLT InstrExpTy = Ty == LLT::scalar(16) ? LLT::scalar(16) : LLT::scalar(32);
+
+ auto Mant = B.buildIntrinsic(Intrinsic::amdgcn_frexp_mant, {Ty}, false)
+ .addUse(Val)
+ .setMIFlags(Flags);
+ auto Exp = B.buildIntrinsic(Intrinsic::amdgcn_frexp_exp, {InstrExpTy}, false)
+ .addUse(Val)
+ .setMIFlags(Flags);
+
+ if (ST.hasFractBug()) {
+ auto Fabs = B.buildFAbs(Ty, Val);
+ auto Inf = B.buildFConstant(Ty, APFloat::getInf(getFltSemanticForLLT(Ty)));
+ auto IsFinite =
+ B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Fabs, Inf, Flags);
+ auto Zero = B.buildConstant(InstrExpTy, 0);
+ Exp = B.buildSelect(InstrExpTy, IsFinite, Exp, Zero);
+ Mant = B.buildSelect(Ty, IsFinite, Mant, Val);
+ }
+
+ B.buildCopy(Res0, Mant);
+ B.buildSExtOrTrunc(Res1, Exp);
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -4039,9 +4816,9 @@ bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
auto Abs = B.buildFAbs(S32, RHS, Flags);
const APFloat C0Val(1.0f);
- auto C0 = B.buildConstant(S32, 0x6f800000);
- auto C1 = B.buildConstant(S32, 0x2f800000);
- auto C2 = B.buildConstant(S32, FloatToBits(1.0f));
+ auto C0 = B.buildFConstant(S32, 0x1p+96f);
+ auto C1 = B.buildFConstant(S32, 0x1p-32f);
+ auto C2 = B.buildFConstant(S32, 1.0f);
auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags);
auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags);
@@ -4060,6 +4837,90 @@ bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
return true;
}
+bool AMDGPULegalizerInfo::legalizeFSQRT(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ // For double type, the SQRT and RSQ instructions don't have required
+ // precision, we apply Goldschmidt's algorithm to improve the result:
+ //
+ // y0 = rsq(x)
+ // g0 = x * y0
+ // h0 = 0.5 * y0
+ //
+ // r0 = 0.5 - h0 * g0
+ // g1 = g0 * r0 + g0
+ // h1 = h0 * r0 + h0
+ //
+ // r1 = 0.5 - h1 * g1 => d0 = x - g1 * g1
+ // g2 = g1 * r1 + g1 g2 = d0 * h1 + g1
+ // h2 = h1 * r1 + h1
+ //
+ // r2 = 0.5 - h2 * g2 => d1 = x - g2 * g2
+ // g3 = g2 * r2 + g2 g3 = d1 * h1 + g2
+ //
+ // sqrt(x) = g3
+
+ const LLT S1 = LLT::scalar(1);
+ const LLT S32 = LLT::scalar(32);
+ const LLT F64 = LLT::scalar(64);
+
+ Register Dst = MI.getOperand(0).getReg();
+ assert(MRI.getType(Dst) == F64 && "only expect to lower f64 sqrt");
+
+ Register X = MI.getOperand(1).getReg();
+ unsigned Flags = MI.getFlags();
+
+ auto ScaleConstant = B.buildFConstant(F64, 0x1.0p-767);
+
+ auto ZeroInt = B.buildConstant(S32, 0);
+ auto Scaling = B.buildFCmp(FCmpInst::FCMP_OLT, S1, X, ScaleConstant);
+
+ // Scale up input if it is too small.
+ auto ScaleUpFactor = B.buildConstant(S32, 256);
+ auto ScaleUp = B.buildSelect(S32, Scaling, ScaleUpFactor, ZeroInt);
+ auto SqrtX = B.buildFLdexp(F64, X, ScaleUp, Flags);
+
+ auto SqrtY = B.buildIntrinsic(Intrinsic::amdgcn_rsq, {F64}, false)
+ .addReg(SqrtX.getReg(0));
+
+ auto Half = B.buildFConstant(F64, 0.5);
+ auto SqrtH0 = B.buildFMul(F64, SqrtY, Half);
+ auto SqrtS0 = B.buildFMul(F64, SqrtX, SqrtY);
+
+ auto NegSqrtH0 = B.buildFNeg(F64, SqrtH0);
+ auto SqrtR0 = B.buildFMA(F64, NegSqrtH0, SqrtS0, Half);
+
+ auto SqrtS1 = B.buildFMA(F64, SqrtS0, SqrtR0, SqrtS0);
+ auto SqrtH1 = B.buildFMA(F64, SqrtH0, SqrtR0, SqrtH0);
+
+ auto NegSqrtS1 = B.buildFNeg(F64, SqrtS1);
+ auto SqrtD0 = B.buildFMA(F64, NegSqrtS1, SqrtS1, SqrtX);
+
+ auto SqrtS2 = B.buildFMA(F64, SqrtD0, SqrtH1, SqrtS1);
+
+ auto NegSqrtS2 = B.buildFNeg(F64, SqrtS2);
+ auto SqrtD1 = B.buildFMA(F64, NegSqrtS2, SqrtS2, SqrtX);
+
+ auto SqrtRet = B.buildFMA(F64, SqrtD1, SqrtH1, SqrtS2);
+
+ // Scale down the result.
+ auto ScaleDownFactor = B.buildConstant(S32, -128);
+ auto ScaleDown = B.buildSelect(S32, Scaling, ScaleDownFactor, ZeroInt);
+ SqrtRet = B.buildFLdexp(F64, SqrtRet, ScaleDown, Flags);
+
+ // TODO: Switch to fcmp oeq 0 for finite only. Can't fully remove this check
+ // with finite only or nsz because rsq(+/-0) = +/-inf
+
+ // TODO: Check for DAZ and expand to subnormals
+ auto IsZeroOrInf = B.buildIsFPClass(LLT::scalar(1), SqrtX, fcZero | fcPosInf);
+
+ // If x is +INF, +0, or -0, use its original value
+ B.buildSelect(Dst, IsZeroOrInf, SqrtX, SqrtRet, Flags);
+
+ MI.eraseFromParent();
+ return true;
+}
+
// Expand llvm.amdgcn.rsq.clamp on targets that don't support the instruction.
// FIXME: Why do we handle this one but not other removed instructions?
//
@@ -4159,6 +5020,50 @@ bool AMDGPULegalizerInfo::getImplicitArgPtr(Register DstReg,
return true;
}
+/// To create a buffer resource from a 64-bit pointer, mask off the upper 32
+/// bits of the pointer and replace them with the stride argument, then
+/// merge_values everything together. In the common case of a raw buffer (the
+/// stride component is 0), we can just AND off the upper half.
+bool AMDGPULegalizerInfo::legalizePointerAsRsrcIntrin(
+ MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
+ Register Result = MI.getOperand(0).getReg();
+ Register Pointer = MI.getOperand(2).getReg();
+ Register Stride = MI.getOperand(3).getReg();
+ Register NumRecords = MI.getOperand(4).getReg();
+ Register Flags = MI.getOperand(5).getReg();
+
+ LLT S32 = LLT::scalar(32);
+
+ B.setInsertPt(B.getMBB(), ++B.getInsertPt());
+ auto Unmerge = B.buildUnmerge(S32, Pointer);
+ Register LowHalf = Unmerge.getReg(0);
+ Register HighHalf = Unmerge.getReg(1);
+
+ auto AndMask = B.buildConstant(S32, 0x0000ffff);
+ auto Masked = B.buildAnd(S32, HighHalf, AndMask);
+
+ MachineInstrBuilder NewHighHalf = Masked;
+ std::optional<ValueAndVReg> StrideConst =
+ getIConstantVRegValWithLookThrough(Stride, MRI);
+ if (!StrideConst || !StrideConst->Value.isZero()) {
+ MachineInstrBuilder ShiftedStride;
+ if (StrideConst) {
+ uint32_t StrideVal = StrideConst->Value.getZExtValue();
+ uint32_t ShiftedStrideVal = StrideVal << 16;
+ ShiftedStride = B.buildConstant(S32, ShiftedStrideVal);
+ } else {
+ auto ExtStride = B.buildAnyExt(S32, Stride);
+ auto ShiftConst = B.buildConstant(S32, 16);
+ ShiftedStride = B.buildShl(S32, ExtStride, ShiftConst);
+ }
+ NewHighHalf = B.buildOr(S32, Masked, ShiftedStride);
+ }
+ Register NewHighHalfReg = NewHighHalf.getReg(0);
+ B.buildMergeValues(Result, {LowHalf, NewHighHalfReg, NumRecords, Flags});
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -4227,7 +5132,7 @@ bool AMDGPULegalizerInfo::legalizeIsAddrSpace(MachineInstr &MI,
std::pair<Register, unsigned>
AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
- const unsigned MaxImm = 4095;
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
@@ -4240,13 +5145,14 @@ AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
if (MRI.getType(BaseReg).isPointer())
BaseReg = B.buildPtrToInt(MRI.getType(OrigOffset), BaseReg).getReg(0);
- // If the immediate value is too big for the immoffset field, put the value
- // and -4096 into the immoffset field so that the value that is copied/added
- // for the voffset field is a multiple of 4096, and it stands more chance
- // of being CSEd with the copy/add for another similar load/store.
- // However, do not do that rounding down to a multiple of 4096 if that is a
- // negative number, as it appears to be illegal to have a negative offset
- // in the vgpr, even if adding the immediate offset makes it positive.
+ // If the immediate value is too big for the immoffset field, put only bits
+ // that would normally fit in the immoffset field. The remaining value that
+ // is copied/added for the voffset field is a large power of 2, and it
+ // stands more chance of being CSEd with the copy/add for another similar
+ // load/store.
+ // However, do not do that rounding down if that is a negative
+ // number, as it appears to be illegal to have a negative offset in the
+ // vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
@@ -4269,31 +5175,6 @@ AMDGPULegalizerInfo::splitBufferOffsets(MachineIRBuilder &B,
return std::pair(BaseReg, ImmOffset);
}
-/// Update \p MMO based on the offset inputs to a raw/struct buffer intrinsic.
-void AMDGPULegalizerInfo::updateBufferMMO(MachineMemOperand *MMO,
- Register VOffset, Register SOffset,
- unsigned ImmOffset, Register VIndex,
- MachineRegisterInfo &MRI) const {
- std::optional<ValueAndVReg> MaybeVOffsetVal =
- getIConstantVRegValWithLookThrough(VOffset, MRI);
- std::optional<ValueAndVReg> MaybeSOffsetVal =
- getIConstantVRegValWithLookThrough(SOffset, MRI);
- std::optional<ValueAndVReg> MaybeVIndexVal =
- getIConstantVRegValWithLookThrough(VIndex, MRI);
- // If the combined VOffset + SOffset + ImmOffset + strided VIndex is constant,
- // update the MMO with that offset. The stride is unknown so we can only do
- // this if VIndex is constant 0.
- if (MaybeVOffsetVal && MaybeSOffsetVal && MaybeVIndexVal &&
- MaybeVIndexVal->Value == 0) {
- uint64_t TotalOffset = MaybeVOffsetVal->Value.getZExtValue() +
- MaybeSOffsetVal->Value.getZExtValue() + ImmOffset;
- MMO->setOffset(TotalOffset);
- } else {
- // We don't have a constant combined offset to use in the MMO. Give up.
- MMO->setValue((Value *)nullptr);
- }
-}
-
/// Handle register layout difference for f16 images for some subtargets.
Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
MachineRegisterInfo &MRI,
@@ -4365,6 +5246,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
const LLT S16 = LLT::scalar(16);
+ // Fixup buffer resources themselves needing to be v4i128.
+ if (hasBufferRsrcWorkaround(Ty))
+ return castBufferRsrcToV4I32(VData, B);
+
// Fixup illegal register types for i8 stores.
if (Ty == LLT::scalar(8) || Ty == S16) {
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
@@ -4393,6 +5278,7 @@ bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
const LLT S32 = LLT::scalar(32);
VData = fixStoreSourceType(B, VData, IsFormat);
+ castBufferRsrcArgToV4I32(MI, B, 2);
Register RSrc = MI.getOperand(2).getReg();
MachineMemOperand *MMO = *MI.memoperands_begin();
@@ -4426,7 +5312,6 @@ bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
unsigned AuxiliaryData = MI.getOperand(5 + OpOffset).getImm();
std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset);
- updateBufferMMO(MMO, VOffset, SOffset, ImmOffset, VIndex, MRI);
unsigned Opc;
if (IsTyped) {
@@ -4510,6 +5395,7 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
++OpOffset;
}
+ castBufferRsrcArgToV4I32(MI, B, 2 + OpOffset);
Register RSrc = MI.getOperand(2 + OpOffset).getReg();
// The typed intrinsics add an immediate after the registers.
@@ -4538,12 +5424,17 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
unsigned ImmOffset;
LLT Ty = MRI.getType(Dst);
+ // Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
+ // logic doesn't have to handle that case.
+ if (hasBufferRsrcWorkaround(Ty)) {
+ Ty = castBufferRsrcFromV4I32(MI, B, MRI, 0);
+ Dst = MI.getOperand(0).getReg();
+ }
LLT EltTy = Ty.getScalarType();
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
const bool Unpacked = ST.hasUnpackedD16VMem();
std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset);
- updateBufferMMO(MMO, VOffset, SOffset, ImmOffset, VIndex, MRI);
unsigned Opc;
@@ -4624,69 +5515,87 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
return true;
}
-bool AMDGPULegalizerInfo::legalizeAtomicIncDec(MachineInstr &MI,
- MachineIRBuilder &B,
- bool IsInc) const {
- unsigned Opc = IsInc ? AMDGPU::G_AMDGPU_ATOMIC_INC :
- AMDGPU::G_AMDGPU_ATOMIC_DEC;
- B.buildInstr(Opc)
- .addDef(MI.getOperand(0).getReg())
- .addUse(MI.getOperand(2).getReg())
- .addUse(MI.getOperand(3).getReg())
- .cloneMemRefs(MI);
- MI.eraseFromParent();
- return true;
-}
-
static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) {
switch (IntrID) {
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_swap:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SWAP;
case Intrinsic::amdgcn_raw_buffer_atomic_add:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_add:
case Intrinsic::amdgcn_struct_buffer_atomic_add:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_add:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_ADD;
case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub:
case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_sub:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SUB;
case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin:
case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smin:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMIN;
case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin:
case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umin:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMIN;
case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax:
case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smax:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_SMAX;
case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax:
case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umax:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_UMAX;
case Intrinsic::amdgcn_raw_buffer_atomic_and:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_and:
case Intrinsic::amdgcn_struct_buffer_atomic_and:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_and:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_AND;
case Intrinsic::amdgcn_raw_buffer_atomic_or:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_or:
case Intrinsic::amdgcn_struct_buffer_atomic_or:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_or:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_OR;
case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor:
case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_xor:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_XOR;
case Intrinsic::amdgcn_raw_buffer_atomic_inc:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_inc:
case Intrinsic::amdgcn_struct_buffer_atomic_inc:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_inc:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_INC;
case Intrinsic::amdgcn_raw_buffer_atomic_dec:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_dec:
case Intrinsic::amdgcn_struct_buffer_atomic_dec:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_dec:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_DEC;
case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap:
case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_cmpswap:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP;
case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd:
case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD;
case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin:
case Intrinsic::amdgcn_struct_buffer_atomic_fmin:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmin:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMIN;
case Intrinsic::amdgcn_raw_buffer_atomic_fmax:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax:
case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmax:
return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FMAX;
default:
llvm_unreachable("unhandled atomic opcode");
@@ -4696,8 +5605,11 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) {
bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
MachineIRBuilder &B,
Intrinsic::ID IID) const {
- const bool IsCmpSwap = IID == Intrinsic::amdgcn_raw_buffer_atomic_cmpswap ||
- IID == Intrinsic::amdgcn_struct_buffer_atomic_cmpswap;
+ const bool IsCmpSwap =
+ IID == Intrinsic::amdgcn_raw_buffer_atomic_cmpswap ||
+ IID == Intrinsic::amdgcn_struct_buffer_atomic_cmpswap ||
+ IID == Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap ||
+ IID == Intrinsic::amdgcn_struct_ptr_buffer_atomic_cmpswap;
const bool HasReturn = MI.getNumExplicitDefs() != 0;
Register Dst;
@@ -4710,6 +5622,8 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
OpOffset = -1;
}
+ // Since we don't have 128-bit atomics, we don't need to handle the case of
+ // p8 argmunents to the atomic itself
Register VData = MI.getOperand(2 + OpOffset).getReg();
Register CmpVal;
@@ -4718,6 +5632,7 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
++OpOffset;
}
+ castBufferRsrcArgToV4I32(MI, B, 3 + OpOffset);
Register RSrc = MI.getOperand(3 + OpOffset).getReg();
const unsigned NumVIndexOps = (IsCmpSwap ? 8 : 7) + HasReturn;
@@ -4739,7 +5654,6 @@ bool AMDGPULegalizerInfo::legalizeBufferAtomic(MachineInstr &MI,
unsigned ImmOffset;
std::tie(VOffset, ImmOffset) = splitBufferOffsets(B, VOffset);
- updateBufferMMO(MMO, VOffset, SOffset, ImmOffset, VIndex, *B.getMRI());
auto MIB = B.buildInstr(getBufferAtomicPseudo(IID));
@@ -4896,7 +5810,8 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
MRI->getType(MI.getOperand(ArgOffset + Intr->GradientStart).getReg());
LLT AddrTy =
MRI->getType(MI.getOperand(ArgOffset + Intr->CoordStart).getReg());
- const bool IsG16 = GradTy == S16;
+ const bool IsG16 =
+ ST.hasG16() ? (BaseOpcode->Gradients && GradTy == S16) : GradTy == S16;
const bool IsA16 = AddrTy == S16;
const bool IsD16 = Ty.getScalarType() == S16;
@@ -4967,6 +5882,9 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
return false;
}
+ const unsigned NSAMaxSize = ST.getNSAMaxSize();
+ const unsigned HasPartialNSA = ST.hasPartialNSAEncoding();
+
if (IsA16 || IsG16) {
if (Intr->NumVAddrs > 1) {
SmallVector<Register, 4> PackedRegs;
@@ -4977,9 +5895,19 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
// See also below in the non-a16 branch
const bool UseNSA = ST.hasNSAEncoding() &&
PackedRegs.size() >= ST.getNSAThreshold(MF) &&
- PackedRegs.size() <= ST.getNSAMaxSize();
-
- if (!UseNSA && PackedRegs.size() > 1) {
+ (PackedRegs.size() <= NSAMaxSize || HasPartialNSA);
+ const bool UsePartialNSA =
+ UseNSA && HasPartialNSA && PackedRegs.size() > NSAMaxSize;
+
+ if (UsePartialNSA) {
+ // Pack registers that would go over NSAMaxSize into last VAddr register
+ LLT PackedAddrTy =
+ LLT::fixed_vector(2 * (PackedRegs.size() - NSAMaxSize + 1), 16);
+ auto Concat = B.buildConcatVectors(
+ PackedAddrTy, ArrayRef(PackedRegs).slice(NSAMaxSize - 1));
+ PackedRegs[NSAMaxSize - 1] = Concat.getReg(0);
+ PackedRegs.resize(NSAMaxSize);
+ } else if (!UseNSA && PackedRegs.size() > 1) {
LLT PackedAddrTy = LLT::fixed_vector(2 * PackedRegs.size(), 16);
auto Concat = B.buildConcatVectors(PackedAddrTy, PackedRegs);
PackedRegs[0] = Concat.getReg(0);
@@ -5015,16 +5943,22 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
// SIShrinkInstructions will convert NSA encodings to non-NSA after register
// allocation when possible.
//
- // TODO: we can actually allow partial NSA where the final register is a
- // contiguous set of the remaining addresses.
- // This could help where there are more addresses than supported.
+ // Partial NSA is allowed on GFX11 where the final register is a contiguous
+ // set of the remaining addresses.
const bool UseNSA = ST.hasNSAEncoding() &&
CorrectedNumVAddrs >= ST.getNSAThreshold(MF) &&
- CorrectedNumVAddrs <= ST.getNSAMaxSize();
-
- if (!UseNSA && Intr->NumVAddrs > 1)
+ (CorrectedNumVAddrs <= NSAMaxSize || HasPartialNSA);
+ const bool UsePartialNSA =
+ UseNSA && HasPartialNSA && CorrectedNumVAddrs > NSAMaxSize;
+
+ if (UsePartialNSA) {
+ convertImageAddrToPacked(B, MI,
+ ArgOffset + Intr->VAddrStart + NSAMaxSize - 1,
+ Intr->NumVAddrs - NSAMaxSize + 1);
+ } else if (!UseNSA && Intr->NumVAddrs > 1) {
convertImageAddrToPacked(B, MI, ArgOffset + Intr->VAddrStart,
Intr->NumVAddrs);
+ }
}
int Flags = 0;
@@ -5237,6 +6171,12 @@ bool AMDGPULegalizerInfo::legalizeSBufferLoad(
Observer.changingInstr(MI);
+ // Handle needing to s.buffer.load() a p8 value.
+ if (hasBufferRsrcWorkaround(Ty)) {
+ Ty = castBufferRsrcFromV4I32(MI, B, *B.getMRI(), 0);
+ Dst = MI.getOperand(0).getReg();
+ B.setInsertPt(B.getMBB(), MI);
+ }
if (shouldBitcastLoadStoreType(ST, Ty, LLT::scalar(Size))) {
Ty = getBitcastRegisterType(Ty);
Helper.bitcastDst(MI, Ty, 0);
@@ -5283,25 +6223,40 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
return legalizeTrapEndpgm(MI, MRI, B);
- if (std::optional<uint8_t> HsaAbiVer = AMDGPU::getHsaAbiVersion(&ST)) {
- switch (*HsaAbiVer) {
- case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
- case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
- return legalizeTrapHsaQueuePtr(MI, MRI, B);
- case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
- case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
- return ST.supportsGetDoorbellID() ?
- legalizeTrapHsa(MI, MRI, B) :
- legalizeTrapHsaQueuePtr(MI, MRI, B);
- }
- }
+ const Module *M = B.getMF().getFunction().getParent();
+ unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M);
+ if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3)
+ return legalizeTrapHsaQueuePtr(MI, MRI, B);
- llvm_unreachable("Unknown trap handler");
+ return ST.supportsGetDoorbellID() ?
+ legalizeTrapHsa(MI, MRI, B) : legalizeTrapHsaQueuePtr(MI, MRI, B);
}
bool AMDGPULegalizerInfo::legalizeTrapEndpgm(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const {
- B.buildInstr(AMDGPU::S_ENDPGM).addImm(0);
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock &BB = B.getMBB();
+ MachineFunction *MF = BB.getParent();
+
+ if (BB.succ_empty() && std::next(MI.getIterator()) == BB.end()) {
+ BuildMI(BB, BB.end(), DL, B.getTII().get(AMDGPU::S_ENDPGM))
+ .addImm(0);
+ MI.eraseFromParent();
+ return true;
+ }
+
+ // We need a block split to make the real endpgm a terminator. We also don't
+ // want to break phis in successor blocks, so we can't just delete to the
+ // end of the block.
+ BB.splitAt(MI, false /*UpdateLiveIns*/);
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ MF->push_back(TrapBB);
+ BuildMI(*TrapBB, TrapBB->end(), DL, B.getTII().get(AMDGPU::S_ENDPGM))
+ .addImm(0);
+ BuildMI(BB, &MI, DL, B.getTII().get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addMBB(TrapBB);
+
+ BB.addSuccessor(TrapBB);
MI.eraseFromParent();
return true;
}
@@ -5313,7 +6268,8 @@ bool AMDGPULegalizerInfo::legalizeTrapHsaQueuePtr(
Register SGPR01(AMDGPU::SGPR0_SGPR1);
// For code object version 5, queue_ptr is passed through implicit kernarg.
- if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) {
+ if (AMDGPU::getCodeObjectVersion(*MF.getFunction().getParent()) >=
+ AMDGPU::AMDHSA_COV5) {
AMDGPUTargetLowering::ImplicitParameter Param =
AMDGPUTargetLowering::QUEUE_PTR;
uint64_t Offset =
@@ -5652,6 +6608,8 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return false;
}
+ case Intrinsic::amdgcn_make_buffer_rsrc:
+ return legalizePointerAsRsrcIntrin(MI, MRI, B);
case Intrinsic::amdgcn_kernarg_segment_ptr:
if (!AMDGPU::isKernel(B.getMF().getFunction().getCallingConv())) {
// This only makes sense to call in a kernel, so just lower to null.
@@ -5736,60 +6694,100 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
case Intrinsic::amdgcn_s_buffer_load:
return legalizeSBufferLoad(Helper, MI);
case Intrinsic::amdgcn_raw_buffer_store:
+ case Intrinsic::amdgcn_raw_ptr_buffer_store:
case Intrinsic::amdgcn_struct_buffer_store:
+ case Intrinsic::amdgcn_struct_ptr_buffer_store:
return legalizeBufferStore(MI, MRI, B, false, false);
case Intrinsic::amdgcn_raw_buffer_store_format:
+ case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
case Intrinsic::amdgcn_struct_buffer_store_format:
+ case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
return legalizeBufferStore(MI, MRI, B, false, true);
case Intrinsic::amdgcn_raw_tbuffer_store:
+ case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
case Intrinsic::amdgcn_struct_tbuffer_store:
+ case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
return legalizeBufferStore(MI, MRI, B, true, true);
case Intrinsic::amdgcn_raw_buffer_load:
+ case Intrinsic::amdgcn_raw_ptr_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load:
+ case Intrinsic::amdgcn_struct_ptr_buffer_load:
return legalizeBufferLoad(MI, MRI, B, false, false);
case Intrinsic::amdgcn_raw_buffer_load_format:
+ case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
case Intrinsic::amdgcn_struct_buffer_load_format:
+ case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
return legalizeBufferLoad(MI, MRI, B, true, false);
case Intrinsic::amdgcn_raw_tbuffer_load:
+ case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
case Intrinsic::amdgcn_struct_tbuffer_load:
+ case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
return legalizeBufferLoad(MI, MRI, B, true, true);
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_swap:
case Intrinsic::amdgcn_raw_buffer_atomic_add:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_add:
case Intrinsic::amdgcn_struct_buffer_atomic_add:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_add:
case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub:
case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_sub:
case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin:
case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smin:
case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin:
case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umin:
case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax:
case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smax:
case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax:
case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umax:
case Intrinsic::amdgcn_raw_buffer_atomic_and:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_and:
case Intrinsic::amdgcn_struct_buffer_atomic_and:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_and:
case Intrinsic::amdgcn_raw_buffer_atomic_or:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_or:
case Intrinsic::amdgcn_struct_buffer_atomic_or:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_or:
case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor:
case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_xor:
case Intrinsic::amdgcn_raw_buffer_atomic_inc:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_inc:
case Intrinsic::amdgcn_struct_buffer_atomic_inc:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_inc:
case Intrinsic::amdgcn_raw_buffer_atomic_dec:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_dec:
case Intrinsic::amdgcn_struct_buffer_atomic_dec:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_dec:
case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap:
case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_cmpswap:
case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin:
case Intrinsic::amdgcn_struct_buffer_atomic_fmin:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmin:
case Intrinsic::amdgcn_raw_buffer_atomic_fmax:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax:
case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmax:
case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd:
case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd:
return legalizeBufferAtomic(MI, B, IntrID);
- case Intrinsic::amdgcn_atomic_inc:
- return legalizeAtomicIncDec(MI, B, true);
- case Intrinsic::amdgcn_atomic_dec:
- return legalizeAtomicIncDec(MI, B, false);
case Intrinsic::trap:
return legalizeTrapIntrinsic(MI, MRI, B);
case Intrinsic::debugtrap:
@@ -5802,6 +6800,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return legalizeDSAtomicFPIntrinsic(Helper, MI, IntrID);
case Intrinsic::amdgcn_image_bvh_intersect_ray:
return legalizeBVHIntrinsic(MI, B);
+ case Intrinsic::amdgcn_fmed3: {
+ GISelChangeObserver &Observer = Helper.Observer;
+
+ // FIXME: This is to workaround the inability of tablegen match combiners to
+ // match intrinsics in patterns.
+ Observer.changingInstr(MI);
+ MI.setDesc(B.getTII().get(AMDGPU::G_AMDGPU_FMED3));
+ MI.removeOperand(1);
+ Observer.changedInstr(MI);
+ return true;
+ }
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrID))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
index 37c987108bc4..04773f275c87 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
@@ -71,14 +71,24 @@ public:
bool legalizeGlobalValue(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeLoad(LegalizerHelper &Helper, MachineInstr &MI) const;
+ bool legalizeStore(LegalizerHelper &Helper, MachineInstr &MI) const;
bool legalizeFMad(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeAtomicCmpXChg(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
- bool legalizeFlog(MachineInstr &MI, MachineIRBuilder &B,
- double Log2BaseInverted) const;
+
+ std::pair<Register, Register>
+ getScaledLogInput(MachineIRBuilder &B, Register Src, unsigned Flags) const;
+
+ bool legalizeFlog2(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeFlogCommon(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeFlogUnsafe(MachineIRBuilder &B, Register Dst, Register Src,
+ double Log2BaseInverted, unsigned Flags) const;
+ bool legalizeFExp2(MachineInstr &MI, MachineIRBuilder &B) const;
+ bool legalizeFExpUnsafe(MachineIRBuilder &B, Register Dst, Register Src,
+ unsigned Flags) const;
bool legalizeFExp(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFPow(MachineInstr &MI, MachineIRBuilder &B) const;
bool legalizeFFloor(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -101,6 +111,9 @@ public:
bool loadInputValue(Register DstReg, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
+ bool legalizePointerAsRsrcIntrin(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
bool legalizePreloadedArgIntrin(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
@@ -135,6 +148,8 @@ public:
MachineIRBuilder &B) const;
bool legalizeFDIV64(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeFFREXP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeFastUnsafeFDIV64(MachineInstr &MI, MachineRegisterInfo &MRI,
@@ -142,6 +157,9 @@ public:
bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
+ bool legalizeFSQRT(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
+
bool legalizeRsqClampIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
@@ -165,16 +183,9 @@ public:
std::pair<Register, unsigned> splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const;
- void updateBufferMMO(MachineMemOperand *MMO, Register VOffset,
- Register SOffset, unsigned ImmOffset, Register VIndex,
- MachineRegisterInfo &MRI) const;
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
Register Reg, bool ImageStore = false) const;
- bool legalizeRawBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B, bool IsFormat) const;
- bool legalizeRawBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineIRBuilder &B, bool IsFormat) const;
Register fixStoreSourceType(MachineIRBuilder &B, Register VData,
bool IsFormat) const;
@@ -198,9 +209,6 @@ public:
bool legalizeSBufferLoad(LegalizerHelper &Helper, MachineInstr &MI) const;
- bool legalizeAtomicIncDec(MachineInstr &MI, MachineIRBuilder &B,
- bool IsInc) const;
-
bool legalizeTrapIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B) const;
bool legalizeTrapEndpgm(MachineInstr &MI, MachineRegisterInfo &MRI,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
deleted file mode 100644
index 93d1eed2cf63..000000000000
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-//===-- AMDGPULowerIntrinsics.cpp -----------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/IntrinsicsR600.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Utils/LowerMemIntrinsics.h"
-
-#define DEBUG_TYPE "amdgpu-lower-intrinsics"
-
-using namespace llvm;
-
-namespace {
-
-static int MaxStaticSize;
-
-static cl::opt<int, true> MemIntrinsicExpandSizeThresholdOpt(
- "amdgpu-mem-intrinsic-expand-size",
- cl::desc("Set minimum mem intrinsic size to expand in IR"),
- cl::location(MaxStaticSize),
- cl::init(1024),
- cl::Hidden);
-
-
-class AMDGPULowerIntrinsics : public ModulePass {
-private:
- bool makeLIDRangeMetadata(Function &F) const;
-
-public:
- static char ID;
-
- AMDGPULowerIntrinsics() : ModulePass(ID) {}
-
- bool runOnModule(Module &M) override;
- bool expandMemIntrinsicUses(Function &F);
- StringRef getPassName() const override {
- return "AMDGPU Lower Intrinsics";
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
-};
-
-}
-
-char AMDGPULowerIntrinsics::ID = 0;
-
-char &llvm::AMDGPULowerIntrinsicsID = AMDGPULowerIntrinsics::ID;
-
-INITIALIZE_PASS(AMDGPULowerIntrinsics, DEBUG_TYPE, "Lower intrinsics", false,
- false)
-
-// TODO: Should refine based on estimated number of accesses (e.g. does it
-// require splitting based on alignment)
-static bool shouldExpandOperationWithSize(Value *Size) {
- ConstantInt *CI = dyn_cast<ConstantInt>(Size);
- return !CI || (CI->getSExtValue() > MaxStaticSize);
-}
-
-bool AMDGPULowerIntrinsics::expandMemIntrinsicUses(Function &F) {
- Intrinsic::ID ID = F.getIntrinsicID();
- bool Changed = false;
-
- for (User *U : llvm::make_early_inc_range(F.users())) {
- Instruction *Inst = cast<Instruction>(U);
-
- switch (ID) {
- case Intrinsic::memcpy: {
- auto *Memcpy = cast<MemCpyInst>(Inst);
- if (shouldExpandOperationWithSize(Memcpy->getLength())) {
- Function *ParentFunc = Memcpy->getParent()->getParent();
- const TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*ParentFunc);
- expandMemCpyAsLoop(Memcpy, TTI);
- Changed = true;
- Memcpy->eraseFromParent();
- }
-
- break;
- }
- case Intrinsic::memmove: {
- auto *Memmove = cast<MemMoveInst>(Inst);
- if (shouldExpandOperationWithSize(Memmove->getLength())) {
- expandMemMoveAsLoop(Memmove);
- Changed = true;
- Memmove->eraseFromParent();
- }
-
- break;
- }
- case Intrinsic::memset: {
- auto *Memset = cast<MemSetInst>(Inst);
- if (shouldExpandOperationWithSize(Memset->getLength())) {
- expandMemSetAsLoop(Memset);
- Changed = true;
- Memset->eraseFromParent();
- }
-
- break;
- }
- default:
- break;
- }
- }
-
- return Changed;
-}
-
-bool AMDGPULowerIntrinsics::makeLIDRangeMetadata(Function &F) const {
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
- return false;
-
- const TargetMachine &TM = TPC->getTM<TargetMachine>();
- bool Changed = false;
-
- for (auto *U : F.users()) {
- auto *CI = dyn_cast<CallInst>(U);
- if (!CI)
- continue;
-
- Function *Caller = CI->getParent()->getParent();
- const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, *Caller);
- Changed |= ST.makeLIDRangeMetadata(CI);
- }
- return Changed;
-}
-
-bool AMDGPULowerIntrinsics::runOnModule(Module &M) {
- bool Changed = false;
-
- for (Function &F : M) {
- if (!F.isDeclaration())
- continue;
-
- switch (F.getIntrinsicID()) {
- case Intrinsic::memcpy:
- case Intrinsic::memmove:
- case Intrinsic::memset:
- if (expandMemIntrinsicUses(F))
- Changed = true;
- break;
-
- case Intrinsic::r600_read_tidig_x:
- case Intrinsic::r600_read_tidig_y:
- case Intrinsic::r600_read_tidig_z:
- case Intrinsic::r600_read_local_size_x:
- case Intrinsic::r600_read_local_size_y:
- case Intrinsic::r600_read_local_size_z:
- Changed |= makeLIDRangeMetadata(F);
- break;
-
- default:
- break;
- }
- }
-
- return Changed;
-}
-
-ModulePass *llvm::createAMDGPULowerIntrinsicsPass() {
- return new AMDGPULowerIntrinsics();
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index f3ff9b753585..f5323725250f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -70,7 +70,7 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
IRBuilder<> Builder(&*getInsertPt(EntryBlock));
const Align KernArgBaseAlign(16); // FIXME: Increase if necessary
- const uint64_t BaseOffset = ST.getExplicitKernelArgOffset(F);
+ const uint64_t BaseOffset = ST.getExplicitKernelArgOffset();
Align MaxAlign;
// FIXME: Alignment is broken with explicit arg offset.;
@@ -86,7 +86,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
KernArgSegment->addRetAttr(
Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));
- unsigned AS = KernArgSegment->getType()->getPointerAddressSpace();
uint64_t ExplicitArgOffset = 0;
for (Argument &Arg : F.args()) {
@@ -111,8 +110,8 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
Builder.getInt8Ty(), KernArgSegment, EltOffset,
Arg.getName() + ".byval.kernarg.offset");
- Value *CastOffsetPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
- ArgOffsetPtr, Arg.getType());
+ Value *CastOffsetPtr =
+ Builder.CreateAddrSpaceCast(ArgOffsetPtr, Arg.getType());
Arg.replaceAllUsesWith(CastOffsetPtr);
continue;
}
@@ -170,8 +169,6 @@ bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
AdjustedArgTy = V4Ty;
}
- ArgPtr = Builder.CreateBitCast(ArgPtr, AdjustedArgTy->getPointerTo(AS),
- ArgPtr->getName() + ".cast");
LoadInst *Load =
Builder.CreateAlignedLoad(AdjustedArgTy, ArgPtr, AdjustedAlign);
Load->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(Ctx, {}));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
index 56e5e0708492..26074cf06071 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
@@ -322,7 +322,7 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) {
// TargetPassConfig for subtarget.
bool AMDGPULowerKernelAttributes::runOnModule(Module &M) {
bool MadeChange = false;
- bool IsV5OrAbove = AMDGPU::getAmdhsaCodeObjectVersion() >= 5;
+ bool IsV5OrAbove = AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5;
Function *BasePtr = getBasePtrIntrinsic(M, IsV5OrAbove);
if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used.
@@ -354,7 +354,8 @@ ModulePass *llvm::createAMDGPULowerKernelAttributesPass() {
PreservedAnalyses
AMDGPULowerKernelAttributesPass::run(Function &F, FunctionAnalysisManager &AM) {
- bool IsV5OrAbove = AMDGPU::getAmdhsaCodeObjectVersion() >= 5;
+ bool IsV5OrAbove =
+ AMDGPU::getCodeObjectVersion(*F.getParent()) >= AMDGPU::AMDHSA_COV5;
Function *BasePtr = getBasePtrIntrinsic(*F.getParent(), IsV5OrAbove);
if (!BasePtr) // ImplicitArgPtr/DispatchPtr not used.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index 11ba5c91dae9..e3a645977f92 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -20,9 +20,8 @@
// This model means the GPU runtime can specify the amount of memory allocated.
// If this is more than the kernel assumed, the excess can be made available
// using a language specific feature, which IR represents as a variable with
-// no initializer. This feature is not yet implemented for non-kernel functions.
-// This lowering could be extended to handle that use case, but would probably
-// require closer integration with promoteAllocaToLDS.
+// no initializer. This feature is referred to here as "Dynamic LDS" and is
+// lowered slightly differently to the normal case.
//
// Consequences of this GPU feature:
// - memory is limited and exceeding it halts compilation
@@ -65,17 +64,15 @@
// Kernel | Yes | Yes | No |
// Hybrid | Yes | Partial | Yes |
//
-// Module spends LDS memory to save cycles. Table spends cycles and global
-// memory to save LDS. Kernel is as fast as kernel allocation but only works
-// for variables that are known reachable from a single kernel. Hybrid picks
-// between all three. When forced to choose between LDS and cycles it minimises
+// "Module" spends LDS memory to save cycles. "Table" spends cycles and global
+// memory to save LDS. "Kernel" is as fast as kernel allocation but only works
+// for variables that are known reachable from a single kernel. "Hybrid" picks
+// between all three. When forced to choose between LDS and cycles we minimise
// LDS use.
// The "module" lowering implemented here finds LDS variables which are used by
// non-kernel functions and creates a new struct with a field for each of those
// LDS variables. Variables that are only used from kernels are excluded.
-// Kernels that do not use this struct are annoteated with the attribute
-// amdgpu-elide-module-lds which allows the back end to elide the allocation.
//
// The "table" lowering implemented here has three components.
// First kernels are assigned a unique integer identifier which is available in
@@ -115,6 +112,68 @@
// use LDS are expected to hit the "Kernel" lowering strategy
// - The runtime properties impose a cost in compiler implementation complexity
//
+// Dynamic LDS implementation
+// Dynamic LDS is lowered similarly to the "table" strategy above and uses the
+// same intrinsic to identify which kernel is at the root of the dynamic call
+// graph. This relies on the specified behaviour that all dynamic LDS variables
+// alias one another, i.e. are at the same address, with respect to a given
+// kernel. Therefore this pass creates new dynamic LDS variables for each kernel
+// that allocates any dynamic LDS and builds a table of addresses out of those.
+// The AMDGPUPromoteAlloca pass skips kernels that use dynamic LDS.
+// The corresponding optimisation for "kernel" lowering where the table lookup
+// is elided is not implemented.
+//
+//
+// Implementation notes / limitations
+// A single LDS global variable represents an instance per kernel that can reach
+// said variables. This pass essentially specialises said variables per kernel.
+// Handling ConstantExpr during the pass complicated this significantly so now
+// all ConstantExpr uses of LDS variables are expanded to instructions. This
+// may need amending when implementing non-undef initialisers.
+//
+// Lowering is split between this IR pass and the back end. This pass chooses
+// where given variables should be allocated and marks them with metadata,
+// MD_absolute_symbol. The backend places the variables in coincidentally the
+// same location and raises a fatal error if something has gone awry. This works
+// in practice because the only pass between this one and the backend that
+// changes LDS is PromoteAlloca and the changes it makes do not conflict.
+//
+// Addresses are written to constant global arrays based on the same metadata.
+//
+// The backend lowers LDS variables in the order of traversal of the function.
+// This is at odds with the deterministic layout required. The workaround is to
+// allocate the fixed-address variables immediately upon starting the function
+// where they can be placed as intended. This requires a means of mapping from
+// the function to the variables that it allocates. For the module scope lds,
+// this is via metadata indicating whether the variable is not required. If a
+// pass deletes that metadata, a fatal error on disagreement with the absolute
+// symbol metadata will occur. For kernel scope and dynamic, this is by _name_
+// correspondence between the function and the variable. It requires the
+// kernel to have a name (which is only a limitation for tests in practice) and
+// for nothing to rename the corresponding symbols. This is a hazard if the pass
+// is run multiple times during debugging. Alternative schemes considered all
+// involve bespoke metadata.
+//
+// If the name correspondence can be replaced, multiple distinct kernels that
+// have the same memory layout can map to the same kernel id (as the address
+// itself is handled by the absolute symbol metadata) and that will allow more
+// uses of the "kernel" style faster lowering and reduce the size of the lookup
+// tables.
+//
+// There is a test that checks this does not fire for a graphics shader. This
+// lowering is expected to work for graphics if the isKernel test is changed.
+//
+// The current markUsedByKernel is sufficient for PromoteAlloca but is elided
+// before codegen. Replacing this with an equivalent intrinsic which lasts until
+// shortly after the machine function lowering of LDS would help break the name
+// mapping. The other part needed is probably to amend PromoteAlloca to embed
+// the LDS variables it creates in the same struct created here. That avoids the
+// current hazard where a PromoteAlloca LDS variable might be allocated before
+// the kernel scope (and thus error on the address check). Given a new invariant
+// that no LDS variables exist outside of the structs managed here, and an
+// intrinsic that lasts until after the LDS frame lowering, it should be
+// possible to drop the name mapping and fold equivalent memory layouts.
+//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
@@ -134,11 +193,14 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/ReplaceConstant.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/OptimizedStructLayout.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -162,7 +224,7 @@ enum class LoweringKind { module, table, kernel, hybrid };
cl::opt<LoweringKind> LoweringKindLoc(
"amdgpu-lower-module-lds-strategy",
cl::desc("Specify lowering strategy for function LDS access:"), cl::Hidden,
- cl::init(LoweringKind::module),
+ cl::init(LoweringKind::hybrid),
cl::values(
clEnumValN(LoweringKind::table, "table", "Lower via table lookup"),
clEnumValN(LoweringKind::module, "module", "Lower via module struct"),
@@ -183,6 +245,13 @@ bool isKernelLDS(const Function *F) {
return AMDGPU::isKernel(F->getCallingConv());
}
+template <typename T> std::vector<T> sortByName(std::vector<T> &&V) {
+ llvm::sort(V.begin(), V.end(), [](const auto *L, const auto *R) {
+ return L->getName() < R->getName();
+ });
+ return {std::move(V)};
+}
+
class AMDGPULowerModuleLDS : public ModulePass {
static void
@@ -201,8 +270,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
LocalVar->removeDeadConstantUsers();
}
- static void markUsedByKernel(IRBuilder<> &Builder, Function *Func,
- GlobalVariable *SGV) {
+ static void markUsedByKernel(Function *Func, GlobalVariable *SGV) {
// The llvm.amdgcn.module.lds instance is implicitly used by all kernels
// that might call a function which accesses a field within it. This is
// presently approximated to 'all kernels' if there are any such functions
@@ -217,21 +285,22 @@ class AMDGPULowerModuleLDS : public ModulePass {
// llvm.donothing that takes a pointer to the instance and is lowered to a
// no-op after LDS is allocated, but that is not presently necessary.
- LLVMContext &Ctx = Func->getContext();
-
- Builder.SetInsertPoint(Func->getEntryBlock().getFirstNonPHI());
-
- FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), {});
+ // This intrinsic is eliminated shortly before instruction selection. It
+ // does not suffice to indicate to ISel that a given global which is not
+ // immediately used by the kernel must still be allocated by it. An
+ // equivalent target specific intrinsic which lasts until immediately after
+ // codegen would suffice for that, but one would still need to ensure that
+ // the variables are allocated in the anticpated order.
+ IRBuilder<> Builder(Func->getEntryBlock().getFirstNonPHI());
Function *Decl =
Intrinsic::getDeclaration(Func->getParent(), Intrinsic::donothing, {});
- Value *UseInstance[1] = {Builder.CreateInBoundsGEP(
- SGV->getValueType(), SGV, ConstantInt::get(Type::getInt32Ty(Ctx), 0))};
+ Value *UseInstance[1] = {
+ Builder.CreateConstInBoundsGEP1_32(SGV->getValueType(), SGV, 0)};
- Builder.CreateCall(FTy, Decl, {},
- {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)},
- "");
+ Builder.CreateCall(
+ Decl, {}, {OperandBundleDefT<Value *>("ExplicitUse", UseInstance)});
}
static bool eliminateConstantExprUsesOfLDSFromAllInstructions(Module &M) {
@@ -240,7 +309,7 @@ class AMDGPULowerModuleLDS : public ModulePass {
// This pass specialises LDS variables with respect to the kernel that
// allocates them.
- // This is semantically equivalent to:
+ // This is semantically equivalent to (the unimplemented as slow):
// for (auto &F : M.functions())
// for (auto &BB : F)
// for (auto &I : BB)
@@ -248,63 +317,12 @@ class AMDGPULowerModuleLDS : public ModulePass {
// if (constantExprUsesLDS(Op))
// replaceConstantExprInFunction(I, Op);
- bool Changed = false;
-
- // Find all ConstantExpr that are direct users of an LDS global
- SmallVector<ConstantExpr *> Stack;
+ SmallVector<Constant *> LDSGlobals;
for (auto &GV : M.globals())
if (AMDGPU::isLDSVariableToLower(GV))
- for (User *U : GV.users())
- if (ConstantExpr *C = dyn_cast<ConstantExpr>(U))
- Stack.push_back(C);
-
- // Expand to include constexpr users of direct users
- SetVector<ConstantExpr *> ConstExprUsersOfLDS;
- while (!Stack.empty()) {
- ConstantExpr *V = Stack.pop_back_val();
- if (ConstExprUsersOfLDS.contains(V))
- continue;
+ LDSGlobals.push_back(&GV);
- ConstExprUsersOfLDS.insert(V);
-
- for (auto *Nested : V->users())
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Nested))
- Stack.push_back(CE);
- }
-
- // Find all instructions that use any of the ConstExpr users of LDS
- SetVector<Instruction *> InstructionWorklist;
- for (ConstantExpr *CE : ConstExprUsersOfLDS)
- for (User *U : CE->users())
- if (auto *I = dyn_cast<Instruction>(U))
- InstructionWorklist.insert(I);
-
- // Replace those ConstExpr operands with instructions
- while (!InstructionWorklist.empty()) {
- Instruction *I = InstructionWorklist.pop_back_val();
- for (Use &U : I->operands()) {
-
- auto *BI = I;
- if (auto *Phi = dyn_cast<PHINode>(I)) {
- BasicBlock *BB = Phi->getIncomingBlock(U);
- BasicBlock::iterator It = BB->getFirstInsertionPt();
- assert(It != BB->end() && "Unexpected empty basic block");
- BI = &(*(It));
- }
-
- if (ConstantExpr *C = dyn_cast<ConstantExpr>(U.get())) {
- if (ConstExprUsersOfLDS.contains(C)) {
- Changed = true;
- Instruction *NI = C->getAsInstruction(BI);
- InstructionWorklist.insert(NI);
- U.set(NI);
- C->removeDeadConstantUsers();
- }
- }
- }
- }
-
- return Changed;
+ return convertUsersOfConstantsToInstructions(LDSGlobals);
}
public:
@@ -329,7 +347,11 @@ public:
continue;
}
- SmallVector<User *, 16> Stack(GV.users());
+ if (GV.isAbsoluteSymbolRef()) {
+ report_fatal_error(
+ "LDS variables with absolute addresses are unimplemented.");
+ }
+
for (User *V : GV.users()) {
if (auto *I = dyn_cast<Instruction>(V)) {
Function *F = I->getFunction();
@@ -358,11 +380,11 @@ public:
DenseSet<GlobalVariable *> VariablesReachableThroughFunctionPointer;
for (Function &F : M.functions()) {
if (!isKernelLDS(&F))
- if (F.hasAddressTaken(nullptr,
- /* IgnoreCallbackUses */ false,
- /* IgnoreAssumeLikeCalls */ false,
- /* IgnoreLLVMUsed */ true,
- /* IgnoreArcAttachedCall */ false)) {
+ if (F.hasAddressTaken(nullptr,
+ /* IgnoreCallbackUses */ false,
+ /* IgnoreAssumeLikeCalls */ false,
+ /* IgnoreLLVMUsed */ true,
+ /* IgnoreArcAttachedCall */ false)) {
set_union(VariablesReachableThroughFunctionPointer,
direct_map_function[&F]);
}
@@ -370,7 +392,7 @@ public:
auto functionMakesUnknownCall = [&](const Function *F) -> bool {
assert(!F->isDeclaration());
- for (CallGraphNode::CallRecord R : *CG[F]) {
+ for (const CallGraphNode::CallRecord &R : *CG[F]) {
if (!R.second->getFunction()) {
return true;
}
@@ -408,7 +430,7 @@ public:
// have already been computed, with more care than this
set_union(transitive_map_function[&Func], direct_map_function[F]);
- for (CallGraphNode::CallRecord R : *CG[F]) {
+ for (const CallGraphNode::CallRecord &R : *CG[F]) {
Function *ith = R.second->getFunction();
if (ith) {
if (!seen.contains(ith)) {
@@ -428,7 +450,7 @@ public:
if (Func.isDeclaration() || !isKernelLDS(&Func))
continue;
- for (CallGraphNode::CallRecord R : *CG[&Func]) {
+ for (const CallGraphNode::CallRecord &R : *CG[&Func]) {
Function *ith = R.second->getFunction();
if (ith) {
set_union(indirect_map_kernel[&Func], transitive_map_function[ith]);
@@ -454,7 +476,7 @@ public:
static Constant *getAddressesOfVariablesInKernel(
LLVMContext &Ctx, ArrayRef<GlobalVariable *> Variables,
- DenseMap<GlobalVariable *, Constant *> &LDSVarsToConstantGEP) {
+ const DenseMap<GlobalVariable *, Constant *> &LDSVarsToConstantGEP) {
// Create a ConstantArray containing the address of each Variable within the
// kernel corresponding to LDSVarsToConstantGEP, or poison if that kernel
// does not allocate it
@@ -467,8 +489,9 @@ public:
SmallVector<Constant *> Elements;
for (size_t i = 0; i < Variables.size(); i++) {
GlobalVariable *GV = Variables[i];
- if (LDSVarsToConstantGEP.count(GV) != 0) {
- auto elt = ConstantExpr::getPtrToInt(LDSVarsToConstantGEP[GV], I32);
+ auto ConstantGepIt = LDSVarsToConstantGEP.find(GV);
+ if (ConstantGepIt != LDSVarsToConstantGEP.end()) {
+ auto elt = ConstantExpr::getPtrToInt(ConstantGepIt->second, I32);
Elements.push_back(elt);
} else {
Elements.push_back(PoisonValue::get(I32));
@@ -495,11 +518,15 @@ public:
ArrayType *AllKernelsOffsetsType =
ArrayType::get(KernelOffsetsType, NumberKernels);
+ Constant *Missing = PoisonValue::get(KernelOffsetsType);
std::vector<Constant *> overallConstantExprElts(NumberKernels);
for (size_t i = 0; i < NumberKernels; i++) {
- LDSVariableReplacement Replacement = KernelToReplacement[kernels[i]];
- overallConstantExprElts[i] = getAddressesOfVariablesInKernel(
- Ctx, Variables, Replacement.LDSVarsToConstantGEP);
+ auto Replacement = KernelToReplacement.find(kernels[i]);
+ overallConstantExprElts[i] =
+ (Replacement == KernelToReplacement.end())
+ ? Missing
+ : getAddressesOfVariablesInKernel(
+ Ctx, Variables, Replacement->second.LDSVarsToConstantGEP);
}
Constant *init =
@@ -511,36 +538,49 @@ public:
AMDGPUAS::CONSTANT_ADDRESS);
}
- void replaceUsesInInstructionsWithTableLookup(
- Module &M, ArrayRef<GlobalVariable *> ModuleScopeVariables,
- GlobalVariable *LookupTable) {
-
+ void replaceUseWithTableLookup(Module &M, IRBuilder<> &Builder,
+ GlobalVariable *LookupTable,
+ GlobalVariable *GV, Use &U,
+ Value *OptionalIndex) {
+ // Table is a constant array of the same length as OrderedKernels
LLVMContext &Ctx = M.getContext();
- IRBuilder<> Builder(Ctx);
Type *I32 = Type::getInt32Ty(Ctx);
+ auto *I = cast<Instruction>(U.getUser());
- // Accesses from a function use the amdgcn_lds_kernel_id intrinsic which
- // lowers to a read from a live in register. Emit it once in the entry
- // block to spare deduplicating it later.
+ Value *tableKernelIndex = getTableLookupKernelIndex(M, I->getFunction());
- DenseMap<Function *, Value *> tableKernelIndexCache;
- auto getTableKernelIndex = [&](Function *F) -> Value * {
- if (tableKernelIndexCache.count(F) == 0) {
- LLVMContext &Ctx = M.getContext();
- FunctionType *FTy = FunctionType::get(Type::getInt32Ty(Ctx), {});
- Function *Decl =
- Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_lds_kernel_id, {});
+ if (auto *Phi = dyn_cast<PHINode>(I)) {
+ BasicBlock *BB = Phi->getIncomingBlock(U);
+ Builder.SetInsertPoint(&(*(BB->getFirstInsertionPt())));
+ } else {
+ Builder.SetInsertPoint(I);
+ }
- BasicBlock::iterator it =
- F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
- Instruction &i = *it;
- Builder.SetInsertPoint(&i);
+ SmallVector<Value *, 3> GEPIdx = {
+ ConstantInt::get(I32, 0),
+ tableKernelIndex,
+ };
+ if (OptionalIndex)
+ GEPIdx.push_back(OptionalIndex);
- tableKernelIndexCache[F] = Builder.CreateCall(FTy, Decl, {});
- }
+ Value *Address = Builder.CreateInBoundsGEP(
+ LookupTable->getValueType(), LookupTable, GEPIdx, GV->getName());
- return tableKernelIndexCache[F];
- };
+ Value *loaded = Builder.CreateLoad(I32, Address);
+
+ Value *replacement =
+ Builder.CreateIntToPtr(loaded, GV->getType(), GV->getName());
+
+ U.set(replacement);
+ }
+
+ void replaceUsesInInstructionsWithTableLookup(
+ Module &M, ArrayRef<GlobalVariable *> ModuleScopeVariables,
+ GlobalVariable *LookupTable) {
+
+ LLVMContext &Ctx = M.getContext();
+ IRBuilder<> Builder(Ctx);
+ Type *I32 = Type::getInt32Ty(Ctx);
for (size_t Index = 0; Index < ModuleScopeVariables.size(); Index++) {
auto *GV = ModuleScopeVariables[Index];
@@ -550,32 +590,8 @@ public:
if (!I)
continue;
- Value *tableKernelIndex = getTableKernelIndex(I->getFunction());
-
- // So if the phi uses this value multiple times, what does this look
- // like?
- if (auto *Phi = dyn_cast<PHINode>(I)) {
- BasicBlock *BB = Phi->getIncomingBlock(U);
- Builder.SetInsertPoint(&(*(BB->getFirstInsertionPt())));
- } else {
- Builder.SetInsertPoint(I);
- }
-
- Value *GEPIdx[3] = {
- ConstantInt::get(I32, 0),
- tableKernelIndex,
- ConstantInt::get(I32, Index),
- };
-
- Value *Address = Builder.CreateInBoundsGEP(
- LookupTable->getValueType(), LookupTable, GEPIdx, GV->getName());
-
- Value *loaded = Builder.CreateLoad(I32, Address);
-
- Value *replacement =
- Builder.CreateIntToPtr(loaded, GV->getType(), GV->getName());
-
- U.set(replacement);
+ replaceUseWithTableLookup(M, Builder, LookupTable, GV, U,
+ ConstantInt::get(I32, Index));
}
}
}
@@ -586,7 +602,8 @@ public:
DenseSet<Function *> KernelSet;
- if (VariableSet.empty()) return KernelSet;
+ if (VariableSet.empty())
+ return KernelSet;
for (Function &Func : M.functions()) {
if (Func.isDeclaration() || !isKernelLDS(&Func))
@@ -649,8 +666,9 @@ public:
// strategy
continue;
}
- CandidateTy Candidate(GV, K.second.size(),
- DL.getTypeAllocSize(GV->getValueType()).getFixedValue());
+ CandidateTy Candidate(
+ GV, K.second.size(),
+ DL.getTypeAllocSize(GV->getValueType()).getFixedValue());
if (MostUsed < Candidate)
MostUsed = Candidate;
}
@@ -658,173 +676,258 @@ public:
return MostUsed.GV;
}
- bool runOnModule(Module &M) override {
- LLVMContext &Ctx = M.getContext();
- CallGraph CG = CallGraph(M);
- bool Changed = superAlignLDSGlobals(M);
+ static void recordLDSAbsoluteAddress(Module *M, GlobalVariable *GV,
+ uint32_t Address) {
+ // Write the specified address into metadata where it can be retrieved by
+ // the assembler. Format is a half open range, [Address Address+1)
+ LLVMContext &Ctx = M->getContext();
+ auto *IntTy =
+ M->getDataLayout().getIntPtrType(Ctx, AMDGPUAS::LOCAL_ADDRESS);
+ auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address));
+ auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address + 1));
+ GV->setMetadata(LLVMContext::MD_absolute_symbol,
+ MDNode::get(Ctx, {MinC, MaxC}));
+ }
- Changed |= eliminateConstantExprUsesOfLDSFromAllInstructions(M);
+ DenseMap<Function *, Value *> tableKernelIndexCache;
+ Value *getTableLookupKernelIndex(Module &M, Function *F) {
+ // Accesses from a function use the amdgcn_lds_kernel_id intrinsic which
+ // lowers to a read from a live in register. Emit it once in the entry
+ // block to spare deduplicating it later.
+ auto [It, Inserted] = tableKernelIndexCache.try_emplace(F);
+ if (Inserted) {
+ Function *Decl =
+ Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_lds_kernel_id, {});
- Changed = true; // todo: narrow this down
+ auto InsertAt = F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca();
+ IRBuilder<> Builder(&*InsertAt);
- // For each kernel, what variables does it access directly or through
- // callees
- LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
+ It->second = Builder.CreateCall(Decl, {});
+ }
- // For each variable accessed through callees, which kernels access it
- VariableFunctionMap LDSToKernelsThatNeedToAccessItIndirectly;
- for (auto &K : LDSUsesInfo.indirect_access) {
- Function *F = K.first;
- assert(isKernelLDS(F));
- for (GlobalVariable *GV : K.second) {
- LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F);
+ return It->second;
+ }
+
+ static std::vector<Function *> assignLDSKernelIDToEachKernel(
+ Module *M, DenseSet<Function *> const &KernelsThatAllocateTableLDS,
+ DenseSet<Function *> const &KernelsThatIndirectlyAllocateDynamicLDS) {
+ // Associate kernels in the set with an arbirary but reproducible order and
+ // annotate them with that order in metadata. This metadata is recognised by
+ // the backend and lowered to a SGPR which can be read from using
+ // amdgcn_lds_kernel_id.
+
+ std::vector<Function *> OrderedKernels;
+ if (!KernelsThatAllocateTableLDS.empty() ||
+ !KernelsThatIndirectlyAllocateDynamicLDS.empty()) {
+
+ for (Function &Func : M->functions()) {
+ if (Func.isDeclaration())
+ continue;
+ if (!isKernelLDS(&Func))
+ continue;
+
+ if (KernelsThatAllocateTableLDS.contains(&Func) ||
+ KernelsThatIndirectlyAllocateDynamicLDS.contains(&Func)) {
+ assert(Func.hasName()); // else fatal error earlier
+ OrderedKernels.push_back(&Func);
+ }
+ }
+
+ // Put them in an arbitrary but reproducible order
+ OrderedKernels = sortByName(std::move(OrderedKernels));
+
+ // Annotate the kernels with their order in this vector
+ LLVMContext &Ctx = M->getContext();
+ IRBuilder<> Builder(Ctx);
+
+ if (OrderedKernels.size() > UINT32_MAX) {
+ // 32 bit keeps it in one SGPR. > 2**32 kernels won't fit on the GPU
+ report_fatal_error("Unimplemented LDS lowering for > 2**32 kernels");
+ }
+
+ for (size_t i = 0; i < OrderedKernels.size(); i++) {
+ Metadata *AttrMDArgs[1] = {
+ ConstantAsMetadata::get(Builder.getInt32(i)),
+ };
+ OrderedKernels[i]->setMetadata("llvm.amdgcn.lds.kernel.id",
+ MDNode::get(Ctx, AttrMDArgs));
}
}
+ return OrderedKernels;
+ }
- // Partition variables into the different strategies
- DenseSet<GlobalVariable *> ModuleScopeVariables;
- DenseSet<GlobalVariable *> TableLookupVariables;
- DenseSet<GlobalVariable *> KernelAccessVariables;
+ static void partitionVariablesIntoIndirectStrategies(
+ Module &M, LDSUsesInfoTy const &LDSUsesInfo,
+ VariableFunctionMap &LDSToKernelsThatNeedToAccessItIndirectly,
+ DenseSet<GlobalVariable *> &ModuleScopeVariables,
+ DenseSet<GlobalVariable *> &TableLookupVariables,
+ DenseSet<GlobalVariable *> &KernelAccessVariables,
+ DenseSet<GlobalVariable *> &DynamicVariables) {
+
+ GlobalVariable *HybridModuleRoot =
+ LoweringKindLoc != LoweringKind::hybrid
+ ? nullptr
+ : chooseBestVariableForModuleStrategy(
+ M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly);
+
+ DenseSet<Function *> const EmptySet;
+ DenseSet<Function *> const &HybridModuleRootKernels =
+ HybridModuleRoot
+ ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
+ : EmptySet;
+
+ for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
+ // Each iteration of this loop assigns exactly one global variable to
+ // exactly one of the implementation strategies.
- {
- GlobalVariable *HybridModuleRoot =
- LoweringKindLoc != LoweringKind::hybrid
- ? nullptr
- : chooseBestVariableForModuleStrategy(
- M.getDataLayout(),
- LDSToKernelsThatNeedToAccessItIndirectly);
-
- DenseSet<Function *> const EmptySet;
- DenseSet<Function *> const &HybridModuleRootKernels =
- HybridModuleRoot
- ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot]
- : EmptySet;
-
- for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) {
- // Each iteration of this loop assigns exactly one global variable to
- // exactly one of the implementation strategies.
-
- GlobalVariable *GV = K.first;
- assert(AMDGPU::isLDSVariableToLower(*GV));
- assert(K.second.size() != 0);
-
- switch (LoweringKindLoc) {
- case LoweringKind::module:
- ModuleScopeVariables.insert(GV);
- break;
+ GlobalVariable *GV = K.first;
+ assert(AMDGPU::isLDSVariableToLower(*GV));
+ assert(K.second.size() != 0);
- case LoweringKind::table:
- TableLookupVariables.insert(GV);
- break;
+ if (AMDGPU::isDynamicLDS(*GV)) {
+ DynamicVariables.insert(GV);
+ continue;
+ }
- case LoweringKind::kernel:
- if (K.second.size() == 1) {
- KernelAccessVariables.insert(GV);
- } else {
- report_fatal_error(
- "cannot lower LDS '" + GV->getName() +
- "' to kernel access as it is reachable from multiple kernels");
- }
- break;
+ switch (LoweringKindLoc) {
+ case LoweringKind::module:
+ ModuleScopeVariables.insert(GV);
+ break;
- case LoweringKind::hybrid: {
- if (GV == HybridModuleRoot) {
- assert(K.second.size() != 1);
- ModuleScopeVariables.insert(GV);
- } else if (K.second.size() == 1) {
- KernelAccessVariables.insert(GV);
- } else if (set_is_subset(K.second, HybridModuleRootKernels)) {
- ModuleScopeVariables.insert(GV);
- } else {
- TableLookupVariables.insert(GV);
- }
- break;
+ case LoweringKind::table:
+ TableLookupVariables.insert(GV);
+ break;
+
+ case LoweringKind::kernel:
+ if (K.second.size() == 1) {
+ KernelAccessVariables.insert(GV);
+ } else {
+ report_fatal_error(
+ "cannot lower LDS '" + GV->getName() +
+ "' to kernel access as it is reachable from multiple kernels");
}
+ break;
+
+ case LoweringKind::hybrid: {
+ if (GV == HybridModuleRoot) {
+ assert(K.second.size() != 1);
+ ModuleScopeVariables.insert(GV);
+ } else if (K.second.size() == 1) {
+ KernelAccessVariables.insert(GV);
+ } else if (set_is_subset(K.second, HybridModuleRootKernels)) {
+ ModuleScopeVariables.insert(GV);
+ } else {
+ TableLookupVariables.insert(GV);
}
+ break;
}
+ }
+ }
- assert(ModuleScopeVariables.size() + TableLookupVariables.size() +
- KernelAccessVariables.size() ==
- LDSToKernelsThatNeedToAccessItIndirectly.size());
- } // Variables have now been partitioned into the three lowering strategies.
+ // All LDS variables accessed indirectly have now been partitioned into
+ // the distinct lowering strategies.
+ assert(ModuleScopeVariables.size() + TableLookupVariables.size() +
+ KernelAccessVariables.size() + DynamicVariables.size() ==
+ LDSToKernelsThatNeedToAccessItIndirectly.size());
+ }
- // If the kernel accesses a variable that is going to be stored in the
- // module instance through a call then that kernel needs to allocate the
- // module instance
- DenseSet<Function *> KernelsThatAllocateModuleLDS =
- kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
- ModuleScopeVariables);
- DenseSet<Function *> KernelsThatAllocateTableLDS =
- kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
- TableLookupVariables);
+ static GlobalVariable *lowerModuleScopeStructVariables(
+ Module &M, DenseSet<GlobalVariable *> const &ModuleScopeVariables,
+ DenseSet<Function *> const &KernelsThatAllocateModuleLDS) {
+ // Create a struct to hold the ModuleScopeVariables
+ // Replace all uses of those variables from non-kernel functions with the
+ // new struct instance Replace only the uses from kernel functions that will
+ // allocate this instance. That is a space optimisation - kernels that use a
+ // subset of the module scope struct and do not need to allocate it for
+ // indirect calls will only allocate the subset they use (they do so as part
+ // of the per-kernel lowering).
+ if (ModuleScopeVariables.empty()) {
+ return nullptr;
+ }
- if (!ModuleScopeVariables.empty()) {
- LDSVariableReplacement ModuleScopeReplacement =
- createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",
- ModuleScopeVariables);
+ LLVMContext &Ctx = M.getContext();
- appendToCompilerUsed(M,
- {static_cast<GlobalValue *>(
- ConstantExpr::getPointerBitCastOrAddrSpaceCast(
- cast<Constant>(ModuleScopeReplacement.SGV),
- Type::getInt8PtrTy(Ctx)))});
+ LDSVariableReplacement ModuleScopeReplacement =
+ createLDSVariableReplacement(M, "llvm.amdgcn.module.lds",
+ ModuleScopeVariables);
- // historic
- removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
+ appendToCompilerUsed(M, {static_cast<GlobalValue *>(
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+ cast<Constant>(ModuleScopeReplacement.SGV),
+ Type::getInt8PtrTy(Ctx)))});
- // Replace all uses of module scope variable from non-kernel functions
- replaceLDSVariablesWithStruct(
- M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
- Instruction *I = dyn_cast<Instruction>(U.getUser());
- if (!I) {
- return false;
- }
- Function *F = I->getFunction();
- return !isKernelLDS(F);
- });
+ // module.lds will be allocated at zero in any kernel that allocates it
+ recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0);
- // Replace uses of module scope variable from kernel functions that
- // allocate the module scope variable, otherwise leave them unchanged
- // Record on each kernel whether the module scope global is used by it
+ // historic
+ removeLocalVarsFromUsedLists(M, ModuleScopeVariables);
- LLVMContext &Ctx = M.getContext();
- IRBuilder<> Builder(Ctx);
-
- for (Function &Func : M.functions()) {
- if (Func.isDeclaration() || !isKernelLDS(&Func))
- continue;
+ // Replace all uses of module scope variable from non-kernel functions
+ replaceLDSVariablesWithStruct(
+ M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
+ Instruction *I = dyn_cast<Instruction>(U.getUser());
+ if (!I) {
+ return false;
+ }
+ Function *F = I->getFunction();
+ return !isKernelLDS(F);
+ });
- if (KernelsThatAllocateModuleLDS.contains(&Func)) {
- replaceLDSVariablesWithStruct(
- M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
- Instruction *I = dyn_cast<Instruction>(U.getUser());
- if (!I) {
- return false;
- }
- Function *F = I->getFunction();
- return F == &Func;
- });
+ // Replace uses of module scope variable from kernel functions that
+ // allocate the module scope variable, otherwise leave them unchanged
+ // Record on each kernel whether the module scope global is used by it
- markUsedByKernel(Builder, &Func, ModuleScopeReplacement.SGV);
+ for (Function &Func : M.functions()) {
+ if (Func.isDeclaration() || !isKernelLDS(&Func))
+ continue;
- } else {
- Func.addFnAttr("amdgpu-elide-module-lds");
- }
+ if (KernelsThatAllocateModuleLDS.contains(&Func)) {
+ replaceLDSVariablesWithStruct(
+ M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) {
+ Instruction *I = dyn_cast<Instruction>(U.getUser());
+ if (!I) {
+ return false;
+ }
+ Function *F = I->getFunction();
+ return F == &Func;
+ });
+
+ markUsedByKernel(&Func, ModuleScopeReplacement.SGV);
}
}
- // Create a struct for each kernel for the non-module-scope variables
+ return ModuleScopeReplacement.SGV;
+ }
+
+ static DenseMap<Function *, LDSVariableReplacement>
+ lowerKernelScopeStructVariables(
+ Module &M, LDSUsesInfoTy &LDSUsesInfo,
+ DenseSet<GlobalVariable *> const &ModuleScopeVariables,
+ DenseSet<Function *> const &KernelsThatAllocateModuleLDS,
+ GlobalVariable *MaybeModuleScopeStruct) {
+
+ // Create a struct for each kernel for the non-module-scope variables.
+
DenseMap<Function *, LDSVariableReplacement> KernelToReplacement;
for (Function &Func : M.functions()) {
if (Func.isDeclaration() || !isKernelLDS(&Func))
continue;
DenseSet<GlobalVariable *> KernelUsedVariables;
+ // Allocating variables that are used directly in this struct to get
+ // alignment aware allocation and predictable frame size.
for (auto &v : LDSUsesInfo.direct_access[&Func]) {
- KernelUsedVariables.insert(v);
+ if (!AMDGPU::isDynamicLDS(*v)) {
+ KernelUsedVariables.insert(v);
+ }
}
+
+ // Allocating variables that are accessed indirectly so that a lookup of
+ // this struct instance can find them from nested functions.
for (auto &v : LDSUsesInfo.indirect_access[&Func]) {
- KernelUsedVariables.insert(v);
+ if (!AMDGPU::isDynamicLDS(*v)) {
+ KernelUsedVariables.insert(v);
+ }
}
// Variables allocated in module lds must all resolve to that struct,
@@ -836,7 +939,8 @@ public:
}
if (KernelUsedVariables.empty()) {
- // Either used no LDS, or all the LDS it used was also in module
+ // Either used no LDS, or the LDS it used was all in the module struct
+ // or dynamically sized
continue;
}
@@ -856,6 +960,14 @@ public:
auto Replacement =
createLDSVariableReplacement(M, VarName, KernelUsedVariables);
+ // If any indirect uses, create a direct use to ensure allocation
+ // TODO: Simpler to unconditionally mark used but that regresses
+ // codegen in test/CodeGen/AMDGPU/noclobber-barrier.ll
+ auto Accesses = LDSUsesInfo.indirect_access.find(&Func);
+ if ((Accesses != LDSUsesInfo.indirect_access.end()) &&
+ !Accesses->second.empty())
+ markUsedByKernel(&Func, Replacement.SGV);
+
// remove preserves existing codegen
removeLocalVarsFromUsedLists(M, KernelUsedVariables);
KernelToReplacement[&Func] = Replacement;
@@ -867,6 +979,169 @@ public:
return I && I->getFunction() == &Func;
});
}
+ return KernelToReplacement;
+ }
+
+ static GlobalVariable *
+ buildRepresentativeDynamicLDSInstance(Module &M, LDSUsesInfoTy &LDSUsesInfo,
+ Function *func) {
+ // Create a dynamic lds variable with a name associated with the passed
+ // function that has the maximum alignment of any dynamic lds variable
+ // reachable from this kernel. Dynamic LDS is allocated after the static LDS
+ // allocation, possibly after alignment padding. The representative variable
+ // created here has the maximum alignment of any other dynamic variable
+ // reachable by that kernel. All dynamic LDS variables are allocated at the
+ // same address in each kernel in order to provide the documented aliasing
+ // semantics. Setting the alignment here allows this IR pass to accurately
+ // predict the exact constant at which it will be allocated.
+
+ assert(isKernelLDS(func));
+
+ LLVMContext &Ctx = M.getContext();
+ const DataLayout &DL = M.getDataLayout();
+ Align MaxDynamicAlignment(1);
+
+ auto UpdateMaxAlignment = [&MaxDynamicAlignment, &DL](GlobalVariable *GV) {
+ if (AMDGPU::isDynamicLDS(*GV)) {
+ MaxDynamicAlignment =
+ std::max(MaxDynamicAlignment, AMDGPU::getAlign(DL, GV));
+ }
+ };
+
+ for (GlobalVariable *GV : LDSUsesInfo.indirect_access[func]) {
+ UpdateMaxAlignment(GV);
+ }
+
+ for (GlobalVariable *GV : LDSUsesInfo.direct_access[func]) {
+ UpdateMaxAlignment(GV);
+ }
+
+ assert(func->hasName()); // Checked by caller
+ auto emptyCharArray = ArrayType::get(Type::getInt8Ty(Ctx), 0);
+ GlobalVariable *N = new GlobalVariable(
+ M, emptyCharArray, false, GlobalValue::ExternalLinkage, nullptr,
+ Twine("llvm.amdgcn." + func->getName() + ".dynlds"), nullptr, GlobalValue::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS,
+ false);
+ N->setAlignment(MaxDynamicAlignment);
+
+ assert(AMDGPU::isDynamicLDS(*N));
+ return N;
+ }
+
+ DenseMap<Function *, GlobalVariable *> lowerDynamicLDSVariables(
+ Module &M, LDSUsesInfoTy &LDSUsesInfo,
+ DenseSet<Function *> const &KernelsThatIndirectlyAllocateDynamicLDS,
+ DenseSet<GlobalVariable *> const &DynamicVariables,
+ std::vector<Function *> const &OrderedKernels) {
+ DenseMap<Function *, GlobalVariable *> KernelToCreatedDynamicLDS;
+ if (!KernelsThatIndirectlyAllocateDynamicLDS.empty()) {
+ LLVMContext &Ctx = M.getContext();
+ IRBuilder<> Builder(Ctx);
+ Type *I32 = Type::getInt32Ty(Ctx);
+
+ std::vector<Constant *> newDynamicLDS;
+
+ // Table is built in the same order as OrderedKernels
+ for (auto &func : OrderedKernels) {
+
+ if (KernelsThatIndirectlyAllocateDynamicLDS.contains(func)) {
+ assert(isKernelLDS(func));
+ if (!func->hasName()) {
+ report_fatal_error("Anonymous kernels cannot use LDS variables");
+ }
+
+ GlobalVariable *N =
+ buildRepresentativeDynamicLDSInstance(M, LDSUsesInfo, func);
+
+ KernelToCreatedDynamicLDS[func] = N;
+
+ markUsedByKernel(func, N);
+
+ auto emptyCharArray = ArrayType::get(Type::getInt8Ty(Ctx), 0);
+ auto GEP = ConstantExpr::getGetElementPtr(
+ emptyCharArray, N, ConstantInt::get(I32, 0), true);
+ newDynamicLDS.push_back(ConstantExpr::getPtrToInt(GEP, I32));
+ } else {
+ newDynamicLDS.push_back(PoisonValue::get(I32));
+ }
+ }
+ assert(OrderedKernels.size() == newDynamicLDS.size());
+
+ ArrayType *t = ArrayType::get(I32, newDynamicLDS.size());
+ Constant *init = ConstantArray::get(t, newDynamicLDS);
+ GlobalVariable *table = new GlobalVariable(
+ M, t, true, GlobalValue::InternalLinkage, init,
+ "llvm.amdgcn.dynlds.offset.table", nullptr,
+ GlobalValue::NotThreadLocal, AMDGPUAS::CONSTANT_ADDRESS);
+
+ for (GlobalVariable *GV : DynamicVariables) {
+ for (Use &U : make_early_inc_range(GV->uses())) {
+ auto *I = dyn_cast<Instruction>(U.getUser());
+ if (!I)
+ continue;
+ if (isKernelLDS(I->getFunction()))
+ continue;
+
+ replaceUseWithTableLookup(M, Builder, table, GV, U, nullptr);
+ }
+ }
+ }
+ return KernelToCreatedDynamicLDS;
+ }
+
+ bool runOnModule(Module &M) override {
+ CallGraph CG = CallGraph(M);
+ bool Changed = superAlignLDSGlobals(M);
+
+ Changed |= eliminateConstantExprUsesOfLDSFromAllInstructions(M);
+
+ Changed = true; // todo: narrow this down
+
+ // For each kernel, what variables does it access directly or through
+ // callees
+ LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M);
+
+ // For each variable accessed through callees, which kernels access it
+ VariableFunctionMap LDSToKernelsThatNeedToAccessItIndirectly;
+ for (auto &K : LDSUsesInfo.indirect_access) {
+ Function *F = K.first;
+ assert(isKernelLDS(F));
+ for (GlobalVariable *GV : K.second) {
+ LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F);
+ }
+ }
+
+ // Partition variables accessed indirectly into the different strategies
+ DenseSet<GlobalVariable *> ModuleScopeVariables;
+ DenseSet<GlobalVariable *> TableLookupVariables;
+ DenseSet<GlobalVariable *> KernelAccessVariables;
+ DenseSet<GlobalVariable *> DynamicVariables;
+ partitionVariablesIntoIndirectStrategies(
+ M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly,
+ ModuleScopeVariables, TableLookupVariables, KernelAccessVariables,
+ DynamicVariables);
+
+ // If the kernel accesses a variable that is going to be stored in the
+ // module instance through a call then that kernel needs to allocate the
+ // module instance
+ const DenseSet<Function *> KernelsThatAllocateModuleLDS =
+ kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
+ ModuleScopeVariables);
+ const DenseSet<Function *> KernelsThatAllocateTableLDS =
+ kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
+ TableLookupVariables);
+
+ const DenseSet<Function *> KernelsThatIndirectlyAllocateDynamicLDS =
+ kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo,
+ DynamicVariables);
+
+ GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables(
+ M, ModuleScopeVariables, KernelsThatAllocateModuleLDS);
+
+ DenseMap<Function *, LDSVariableReplacement> KernelToReplacement =
+ lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables,
+ KernelsThatAllocateModuleLDS,
+ MaybeModuleScopeStruct);
// Lower zero cost accesses to the kernel instances just created
for (auto &GV : KernelAccessVariables) {
@@ -879,72 +1154,113 @@ public:
Vec.insert(GV);
replaceLDSVariablesWithStruct(M, Vec, Replacement, [](Use &U) {
- return isa<Instruction>(U.getUser());
+ return isa<Instruction>(U.getUser());
});
}
+ // The ith element of this vector is kernel id i
+ std::vector<Function *> OrderedKernels =
+ assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS,
+ KernelsThatIndirectlyAllocateDynamicLDS);
+
if (!KernelsThatAllocateTableLDS.empty()) {
- // Collect the kernels that allocate table lookup LDS
- std::vector<Function *> OrderedKernels;
- {
- for (Function &Func : M.functions()) {
- if (Func.isDeclaration())
- continue;
- if (!isKernelLDS(&Func))
- continue;
+ LLVMContext &Ctx = M.getContext();
+ IRBuilder<> Builder(Ctx);
- if (KernelsThatAllocateTableLDS.contains(&Func)) {
- assert(Func.hasName()); // else fatal error earlier
- OrderedKernels.push_back(&Func);
- }
- }
+ // The order must be consistent between lookup table and accesses to
+ // lookup table
+ auto TableLookupVariablesOrdered =
+ sortByName(std::vector<GlobalVariable *>(TableLookupVariables.begin(),
+ TableLookupVariables.end()));
- // Put them in an arbitrary but reproducible order
- llvm::sort(OrderedKernels.begin(), OrderedKernels.end(),
- [](const Function *lhs, const Function *rhs) -> bool {
- return lhs->getName() < rhs->getName();
- });
+ GlobalVariable *LookupTable = buildLookupTable(
+ M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
+ replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
+ LookupTable);
+ }
- // Annotate the kernels with their order in this vector
- LLVMContext &Ctx = M.getContext();
- IRBuilder<> Builder(Ctx);
+ DenseMap<Function *, GlobalVariable *> KernelToCreatedDynamicLDS =
+ lowerDynamicLDSVariables(M, LDSUsesInfo,
+ KernelsThatIndirectlyAllocateDynamicLDS,
+ DynamicVariables, OrderedKernels);
- if (OrderedKernels.size() > UINT32_MAX) {
- // 32 bit keeps it in one SGPR. > 2**32 kernels won't fit on the GPU
- report_fatal_error("Unimplemented LDS lowering for > 2**32 kernels");
- }
+ // All kernel frames have been allocated. Calculate and record the
+ // addresses.
+ {
+ const DataLayout &DL = M.getDataLayout();
- for (size_t i = 0; i < OrderedKernels.size(); i++) {
- Metadata *AttrMDArgs[1] = {
- ConstantAsMetadata::get(Builder.getInt32(i)),
- };
- OrderedKernels[i]->setMetadata("llvm.amdgcn.lds.kernel.id",
- MDNode::get(Ctx, AttrMDArgs));
+ for (Function &Func : M.functions()) {
+ if (Func.isDeclaration() || !isKernelLDS(&Func))
+ continue;
- markUsedByKernel(Builder, OrderedKernels[i],
- KernelToReplacement[OrderedKernels[i]].SGV);
+ // All three of these are optional. The first variable is allocated at
+ // zero. They are allocated by AMDGPUMachineFunction as one block.
+ // Layout:
+ //{
+ // module.lds
+ // alignment padding
+ // kernel instance
+ // alignment padding
+ // dynamic lds variables
+ //}
+
+ const bool AllocateModuleScopeStruct =
+ MaybeModuleScopeStruct &&
+ KernelsThatAllocateModuleLDS.contains(&Func);
+
+ auto Replacement = KernelToReplacement.find(&Func);
+ const bool AllocateKernelScopeStruct =
+ Replacement != KernelToReplacement.end();
+
+ const bool AllocateDynamicVariable =
+ KernelToCreatedDynamicLDS.contains(&Func);
+
+ uint32_t Offset = 0;
+
+ if (AllocateModuleScopeStruct) {
+ // Allocated at zero, recorded once on construction, not once per
+ // kernel
+ Offset += DL.getTypeAllocSize(MaybeModuleScopeStruct->getValueType());
}
- }
- // The order must be consistent between lookup table and accesses to
- // lookup table
- std::vector<GlobalVariable *> TableLookupVariablesOrdered(
- TableLookupVariables.begin(), TableLookupVariables.end());
- llvm::sort(TableLookupVariablesOrdered.begin(),
- TableLookupVariablesOrdered.end(),
- [](const GlobalVariable *lhs, const GlobalVariable *rhs) {
- return lhs->getName() < rhs->getName();
- });
+ if (AllocateKernelScopeStruct) {
+ GlobalVariable *KernelStruct = Replacement->second.SGV;
+ Offset = alignTo(Offset, AMDGPU::getAlign(DL, KernelStruct));
+ recordLDSAbsoluteAddress(&M, KernelStruct, Offset);
+ Offset += DL.getTypeAllocSize(KernelStruct->getValueType());
+ }
- GlobalVariable *LookupTable = buildLookupTable(
- M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
- replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
- LookupTable);
+ // If there is dynamic allocation, the alignment needed is included in
+ // the static frame size. There may be no reference to the dynamic
+ // variable in the kernel itself, so without including it here, that
+ // alignment padding could be missed.
+ if (AllocateDynamicVariable) {
+ GlobalVariable *DynamicVariable = KernelToCreatedDynamicLDS[&Func];
+ Offset = alignTo(Offset, AMDGPU::getAlign(DL, DynamicVariable));
+ recordLDSAbsoluteAddress(&M, DynamicVariable, Offset);
+ }
+
+ if (Offset != 0) {
+ std::string Buffer;
+ raw_string_ostream SS{Buffer};
+ SS << format("%u", Offset);
+
+ // Instead of explictly marking kernels that access dynamic variables
+ // using special case metadata, annotate with min-lds == max-lds, i.e.
+ // that there is no more space available for allocating more static
+ // LDS variables. That is the right condition to prevent allocating
+ // more variables which would collide with the addresses assigned to
+ // dynamic variables.
+ if (AllocateDynamicVariable)
+ SS << format(",%u", Offset);
+
+ Func.addFnAttr("amdgpu-lds-size", Buffer);
+ }
+ }
}
for (auto &GV : make_early_inc_range(M.globals()))
if (AMDGPU::isLDSVariableToLower(GV)) {
-
// probably want to remove from used lists
GV.removeDeadConstantUsers();
if (GV.use_empty())
@@ -1017,12 +1333,9 @@ private:
// The order of fields in this struct depends on the order of
// varables in the argument which varies when changing how they
// are identified, leading to spurious test breakage.
- std::vector<GlobalVariable *> Sorted(LDSVarsToTransform.begin(),
- LDSVarsToTransform.end());
- llvm::sort(Sorted.begin(), Sorted.end(),
- [](const GlobalVariable *lhs, const GlobalVariable *rhs) {
- return lhs->getName() < rhs->getName();
- });
+ auto Sorted = sortByName(std::vector<GlobalVariable *>(
+ LDSVarsToTransform.begin(), LDSVarsToTransform.end()));
+
for (GlobalVariable *GV : Sorted) {
OptimizedStructLayoutField F(GV,
DL.getTypeAllocSize(GV->getValueType()),
@@ -1101,21 +1414,17 @@ private:
}
template <typename PredicateTy>
- void replaceLDSVariablesWithStruct(
+ static void replaceLDSVariablesWithStruct(
Module &M, DenseSet<GlobalVariable *> const &LDSVarsToTransformArg,
- LDSVariableReplacement Replacement, PredicateTy Predicate) {
+ const LDSVariableReplacement &Replacement, PredicateTy Predicate) {
LLVMContext &Ctx = M.getContext();
const DataLayout &DL = M.getDataLayout();
// A hack... we need to insert the aliasing info in a predictable order for
// lit tests. Would like to have them in a stable order already, ideally the
// same order they get allocated, which might mean an ordered set container
- std::vector<GlobalVariable *> LDSVarsToTransform(
- LDSVarsToTransformArg.begin(), LDSVarsToTransformArg.end());
- llvm::sort(LDSVarsToTransform.begin(), LDSVarsToTransform.end(),
- [](const GlobalVariable *lhs, const GlobalVariable *rhs) {
- return lhs->getName() < rhs->getName();
- });
+ auto LDSVarsToTransform = sortByName(std::vector<GlobalVariable *>(
+ LDSVarsToTransformArg.begin(), LDSVarsToTransformArg.end()));
// Create alias.scope and their lists. Each field in the new structure
// does not alias with all other fields.
@@ -1137,7 +1446,7 @@ private:
// field of the instance that will be allocated by AMDGPUMachineFunction
for (size_t I = 0; I < NumberVars; I++) {
GlobalVariable *GV = LDSVarsToTransform[I];
- Constant *GEP = Replacement.LDSVarsToConstantGEP[GV];
+ Constant *GEP = Replacement.LDSVarsToConstantGEP.at(GV);
GV->replaceUsesWithIf(GEP, Predicate);
@@ -1159,9 +1468,9 @@ private:
}
}
- void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL,
- MDNode *AliasScope, MDNode *NoAlias,
- unsigned MaxDepth = 5) {
+ static void refineUsesAlignmentAndAA(Value *Ptr, Align A,
+ const DataLayout &DL, MDNode *AliasScope,
+ MDNode *NoAlias, unsigned MaxDepth = 5) {
if (!MaxDepth || (A == 1 && !AliasScope))
return;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index d88a2cd961b2..c24d39b9e5fd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -13,6 +13,7 @@
//
#include "AMDGPUMCInstLower.h"
+#include "AMDGPU.h"
#include "AMDGPUAsmPrinter.h"
#include "AMDGPUMachineFunction.h"
#include "AMDGPUTargetMachine.h"
@@ -133,7 +134,8 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.addOperand(Dest);
OutMI.addOperand(Src);
return;
- } else if (Opcode == AMDGPU::SI_TCRETURN) {
+ } else if (Opcode == AMDGPU::SI_TCRETURN ||
+ Opcode == AMDGPU::SI_TCRETURN_GFX) {
// TODO: How to use branch immediate and avoid register+add?
Opcode = AMDGPU::S_SETPC_B64;
}
@@ -168,12 +170,11 @@ bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
const MCExpr *AMDGPUAsmPrinter::lowerConstant(const Constant *CV) {
// Intercept LDS variables with known addresses
- if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(CV)) {
- if (AMDGPUMachineFunction::isKnownAddressLDSGlobal(*GV)) {
- unsigned offset =
- AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(*GV);
- Constant *C = ConstantInt::get(CV->getContext(), APInt(32, offset));
- return AsmPrinter::lowerConstant(C);
+ if (const GlobalVariable *GV = dyn_cast<const GlobalVariable>(CV)) {
+ if (std::optional<uint32_t> Address =
+ AMDGPUMachineFunction::getLDSAbsoluteAddress(*GV)) {
+ auto *IntTy = Type::getInt32Ty(CV->getContext());
+ return AsmPrinter::lowerConstant(ConstantInt::get(IntTy, *Address));
}
}
@@ -285,11 +286,10 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
(!STI.hasOffset3fBug() || !MI->isBranch())) {
SmallVector<MCFixup, 4> Fixups;
SmallVector<char, 16> CodeBytes;
- raw_svector_ostream CodeStream(CodeBytes);
- std::unique_ptr<MCCodeEmitter> InstEmitter(createSIMCCodeEmitter(
+ std::unique_ptr<MCCodeEmitter> InstEmitter(createAMDGPUMCCodeEmitter(
*STI.getInstrInfo(), OutContext));
- InstEmitter->encodeInstruction(TmpInst, CodeStream, Fixups, STI);
+ InstEmitter->encodeInstruction(TmpInst, CodeBytes, Fixups, STI);
assert(CodeBytes.size() == STI.getInstrInfo()->getInstSizeInBytes(*MI));
}
@@ -308,10 +308,9 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
// Disassemble instruction/operands to hex representation.
SmallVector<MCFixup, 4> Fixups;
SmallVector<char, 16> CodeBytes;
- raw_svector_ostream CodeStream(CodeBytes);
DumpCodeInstEmitter->encodeInstruction(
- TmpInst, CodeStream, Fixups, MF->getSubtarget<MCSubtargetInfo>());
+ TmpInst, CodeBytes, Fixups, MF->getSubtarget<MCSubtargetInfo>());
HexLines.resize(HexLines.size() + 1);
std::string &HexLine = HexLines.back();
raw_string_ostream HexStream(HexLine);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
index c96fab08a267..d90fcac87540 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineCFGStructurizer.cpp
@@ -2600,9 +2600,6 @@ bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
LLVM_DEBUG(dbgs() << "CurrentRegion: \n");
LLVM_DEBUG(LRegion->print(dbgs(), TRI));
- auto CNI = CI;
- ++CNI;
-
MRT *Child = (*CI);
if (Child->isRegion()) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index a6a32b98f44c..44bbfe6f13d9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -10,8 +10,11 @@
#include "AMDGPU.h"
#include "AMDGPUPerfHintAnalysis.h"
#include "AMDGPUSubtarget.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -41,6 +44,18 @@ AMDGPUMachineFunction::AMDGPUMachineFunction(const Function &F,
// Assume the attribute allocates before any known GDS globals.
StaticGDSSize = GDSSize;
+ // Second value, if present, is the maximum value that can be assigned.
+ // Useful in PromoteAlloca or for LDS spills. Could be used for diagnostics
+ // during codegen.
+ std::pair<unsigned, unsigned> LDSSizeRange = AMDGPU::getIntegerPairAttribute(
+ F, "amdgpu-lds-size", {0, UINT32_MAX}, true);
+
+ // The two separate variables are only profitable when the LDS module lowering
+ // pass is disabled. If graphics does not use dynamic LDS, this is never
+ // profitable. Leaving cleanup for a later change.
+ LDSSize = LDSSizeRange.first;
+ StaticLDSSize = LDSSize;
+
CallingConv::ID CC = F.getCallingConv();
if (CC == CallingConv::AMDGPU_KERNEL || CC == CallingConv::SPIR_KERNEL)
ExplicitKernArgSize = ST.getExplicitKernArgSize(F, MaxKernArgAlign);
@@ -63,6 +78,42 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
unsigned Offset;
if (GV.getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
+
+ std::optional<uint32_t> MaybeAbs = getLDSAbsoluteAddress(GV);
+ if (MaybeAbs) {
+ // Absolute address LDS variables that exist prior to the LDS lowering
+ // pass raise a fatal error in that pass. These failure modes are only
+ // reachable if that lowering pass is disabled or broken. If/when adding
+ // support for absolute addresses on user specified variables, the
+ // alignment check moves to the lowering pass and the frame calculation
+ // needs to take the user variables into consideration.
+
+ uint32_t ObjectStart = *MaybeAbs;
+
+ if (ObjectStart != alignTo(ObjectStart, Alignment)) {
+ report_fatal_error("Absolute address LDS variable inconsistent with "
+ "variable alignment");
+ }
+
+ if (isModuleEntryFunction()) {
+ // If this is a module entry function, we can also sanity check against
+ // the static frame. Strictly it would be better to check against the
+ // attribute, i.e. that the variable is within the always-allocated
+ // section, and not within some other non-absolute-address object
+ // allocated here, but the extra error detection is minimal and we would
+ // have to pass the Function around or cache the attribute value.
+ uint32_t ObjectEnd =
+ ObjectStart + DL.getTypeAllocSize(GV.getValueType());
+ if (ObjectEnd > StaticLDSSize) {
+ report_fatal_error(
+ "Absolute address LDS variable outside of static frame");
+ }
+ }
+
+ Entry.first->second = ObjectStart;
+ return ObjectStart;
+ }
+
/// TODO: We should sort these to minimize wasted space due to alignment
/// padding. Currently the padding is decided by the first encountered use
/// during lowering.
@@ -87,135 +138,54 @@ unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
return Offset;
}
-static constexpr StringLiteral ModuleLDSName = "llvm.amdgcn.module.lds";
-
-bool AMDGPUMachineFunction::isKnownAddressLDSGlobal(const GlobalVariable &GV) {
- auto name = GV.getName();
- return (name == ModuleLDSName) ||
- (name.startswith("llvm.amdgcn.kernel.") && name.endswith(".lds"));
-}
-
-const Function *AMDGPUMachineFunction::getKernelLDSFunctionFromGlobal(
- const GlobalVariable &GV) {
- const Module &M = *GV.getParent();
- StringRef N(GV.getName());
- if (N.consume_front("llvm.amdgcn.kernel.") && N.consume_back(".lds")) {
- return M.getFunction(N);
- }
- return nullptr;
-}
-
-const GlobalVariable *
-AMDGPUMachineFunction::getKernelLDSGlobalFromFunction(const Function &F) {
+static const GlobalVariable *
+getKernelDynLDSGlobalFromFunction(const Function &F) {
const Module *M = F.getParent();
- std::string KernelLDSName = "llvm.amdgcn.kernel.";
- KernelLDSName += F.getName();
- KernelLDSName += ".lds";
- return M->getNamedGlobal(KernelLDSName);
-}
-
-// This kernel calls no functions that require the module lds struct
-static bool canElideModuleLDS(const Function &F) {
- return F.hasFnAttribute("amdgpu-elide-module-lds");
-}
-
-unsigned AMDGPUMachineFunction::calculateKnownAddressOfLDSGlobal(
- const GlobalVariable &GV) {
- // module.lds, then alignment padding, then kernel.lds, then other variables
- // if any
-
- assert(isKnownAddressLDSGlobal(GV));
- unsigned Offset = 0;
-
- if (GV.getName() == ModuleLDSName) {
- return 0;
- }
-
- const Module *M = GV.getParent();
- const DataLayout &DL = M->getDataLayout();
-
- const GlobalVariable *GVM = M->getNamedGlobal(ModuleLDSName);
- const Function *f = getKernelLDSFunctionFromGlobal(GV);
-
- // Account for module.lds if allocated for this function
- if (GVM && f && !canElideModuleLDS(*f)) {
- // allocator aligns this to var align, but it's zero to begin with
- Offset += DL.getTypeAllocSize(GVM->getValueType());
- }
-
- // No dynamic LDS alignment done by allocateModuleLDSGlobal
- Offset = alignTo(
- Offset, DL.getValueOrABITypeAlignment(GV.getAlign(), GV.getValueType()));
-
- return Offset;
+ std::string KernelDynLDSName = "llvm.amdgcn.";
+ KernelDynLDSName += F.getName();
+ KernelDynLDSName += ".dynlds";
+ return M->getNamedGlobal(KernelDynLDSName);
}
-void AMDGPUMachineFunction::allocateKnownAddressLDSGlobal(const Function &F) {
- const Module *M = F.getParent();
-
- // This function is called before allocating any other LDS so that it can
- // reliably put values at known addresses. Consequently, dynamic LDS, if
- // present, will not yet have been allocated
-
- assert(getDynLDSAlign() == Align() && "dynamic LDS not yet allocated");
-
- if (isModuleEntryFunction()) {
-
- // Pointer values start from zero, memory allocated per-kernel-launch
- // Variables can be grouped into a module level struct and a struct per
- // kernel function by AMDGPULowerModuleLDSPass. If that is done, they
- // are allocated at statically computable addresses here.
- //
- // Address 0
- // {
- // llvm.amdgcn.module.lds
- // }
- // alignment padding
- // {
- // llvm.amdgcn.kernel.some-name.lds
- // }
- // other variables, e.g. dynamic lds, allocated after this call
-
- const GlobalVariable *GV = M->getNamedGlobal(ModuleLDSName);
- const GlobalVariable *KV = getKernelLDSGlobalFromFunction(F);
-
- if (GV && !canElideModuleLDS(F)) {
- assert(isKnownAddressLDSGlobal(*GV));
- unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *GV, Align());
- (void)Offset;
- assert(Offset == calculateKnownAddressOfLDSGlobal(*GV) &&
- "Module LDS expected to be allocated before other LDS");
- }
-
- if (KV) {
- // The per-kernel offset is deterministic because it is allocated
- // before any other non-module LDS variables.
- assert(isKnownAddressLDSGlobal(*KV));
- unsigned Offset = allocateLDSGlobal(M->getDataLayout(), *KV, Align());
- (void)Offset;
- assert(Offset == calculateKnownAddressOfLDSGlobal(*KV) &&
- "Kernel LDS expected to be immediately after module LDS");
+std::optional<uint32_t>
+AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
+ // TODO: Would be more consistent with the abs symbols to use a range
+ MDNode *MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
+ if (MD && MD->getNumOperands() == 1) {
+ if (ConstantInt *KnownSize =
+ mdconst::extract<ConstantInt>(MD->getOperand(0))) {
+ uint64_t ZExt = KnownSize->getZExtValue();
+ if (ZExt <= UINT32_MAX) {
+ return ZExt;
+ }
}
}
+ return {};
}
std::optional<uint32_t>
-AMDGPUMachineFunction::getLDSKernelIdMetadata(const Function &F) {
- auto MD = F.getMetadata("llvm.amdgcn.lds.kernel.id");
- if (MD && MD->getNumOperands() == 1) {
- ConstantInt *KnownSize = mdconst::extract<ConstantInt>(MD->getOperand(0));
- if (KnownSize) {
- uint64_t V = KnownSize->getZExtValue();
- if (V <= UINT32_MAX) {
- return V;
- }
+AMDGPUMachineFunction::getLDSAbsoluteAddress(const GlobalValue &GV) {
+ if (GV.getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS)
+ return {};
+
+ std::optional<ConstantRange> AbsSymRange = GV.getAbsoluteSymbolRange();
+ if (!AbsSymRange)
+ return {};
+
+ if (const APInt *V = AbsSymRange->getSingleElement()) {
+ std::optional<uint64_t> ZExt = V->tryZExtValue();
+ if (ZExt && (*ZExt <= UINT32_MAX)) {
+ return *ZExt;
}
}
+
return {};
}
-void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
+void AMDGPUMachineFunction::setDynLDSAlign(const Function &F,
const GlobalVariable &GV) {
+ const Module *M = F.getParent();
+ const DataLayout &DL = M->getDataLayout();
assert(DL.getTypeAllocSize(GV.getValueType()).isZero());
Align Alignment =
@@ -225,4 +195,17 @@ void AMDGPUMachineFunction::setDynLDSAlign(const DataLayout &DL,
LDSSize = alignTo(StaticLDSSize, Alignment);
DynLDSAlign = Alignment;
+
+ // If there is a dynamic LDS variable associated with this function F, every
+ // further dynamic LDS instance (allocated by calling setDynLDSAlign) must
+ // map to the same address. This holds because no LDS is allocated after the
+ // lowering pass if there are dynamic LDS variables present.
+ const GlobalVariable *Dyn = getKernelDynLDSGlobalFromFunction(F);
+ if (Dyn) {
+ unsigned Offset = LDSSize; // return this?
+ std::optional<uint32_t> Expect = getLDSAbsoluteAddress(*Dyn);
+ if (!Expect || (Offset != *Expect)) {
+ report_fatal_error("Inconsistent metadata on dynamic LDS variable");
+ }
+ }
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index f27f8252a4d8..5780fa64a7e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -104,26 +104,12 @@ public:
unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalVariable &GV,
Align Trailing);
- void allocateKnownAddressLDSGlobal(const Function &F);
-
- // A kernel function may have an associated LDS allocation, and a kernel-scope
- // LDS allocation must have an associated kernel function
-
- // LDS allocation should have an associated kernel function
- static const Function *
- getKernelLDSFunctionFromGlobal(const GlobalVariable &GV);
- static const GlobalVariable *
- getKernelLDSGlobalFromFunction(const Function &F);
-
- // Module or kernel scope LDS variable
- static bool isKnownAddressLDSGlobal(const GlobalVariable &GV);
- static unsigned calculateKnownAddressOfLDSGlobal(const GlobalVariable &GV);
-
static std::optional<uint32_t> getLDSKernelIdMetadata(const Function &F);
+ static std::optional<uint32_t> getLDSAbsoluteAddress(const GlobalValue &GV);
Align getDynLDSAlign() const { return DynLDSAlign; }
- void setDynLDSAlign(const DataLayout &DL, const GlobalVariable &GV);
+ void setDynLDSAlign(const Function &F, const GlobalVariable &GV);
};
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
index 98c5c96cd4b2..2092707c8a3f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUOpenCLEnqueuedBlockLowering.cpp
@@ -72,31 +72,6 @@ ModulePass* llvm::createAMDGPUOpenCLEnqueuedBlockLoweringPass() {
return new AMDGPUOpenCLEnqueuedBlockLowering();
}
-/// Collect direct or indirect callers of \p F and save them
-/// to \p Callers.
-static void collectCallers(Function *F, DenseSet<Function *> &Callers) {
- for (auto *U : F->users()) {
- if (auto *CI = dyn_cast<CallInst>(&*U)) {
- auto *Caller = CI->getParent()->getParent();
- if (Callers.insert(Caller).second)
- collectCallers(Caller, Callers);
- }
- }
-}
-
-/// If \p U is instruction or constant, collect functions which directly or
-/// indirectly use it.
-static void collectFunctionUsers(User *U, DenseSet<Function *> &Funcs) {
- if (auto *I = dyn_cast<Instruction>(U)) {
- auto *F = I->getParent()->getParent();
- if (Funcs.insert(F).second)
- collectCallers(F, Funcs);
- return;
- }
- for (User *U : U->users())
- collectFunctionUsers(U, Funcs);
-}
-
bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
DenseSet<Function *> Callers;
auto &C = M.getContext();
@@ -131,9 +106,6 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
/*isExternallyInitialized=*/true);
LLVM_DEBUG(dbgs() << "runtime handle created: " << *GV << '\n');
- for (User *U : F.users())
- collectFunctionUsers(U, Callers);
-
F.replaceAllUsesWith(ConstantExpr::getAddrSpaceCast(GV, F.getType()));
F.addFnAttr("runtime-handle", RuntimeHandle);
F.setLinkage(GlobalValue::ExternalLinkage);
@@ -141,15 +113,5 @@ bool AMDGPUOpenCLEnqueuedBlockLowering::runOnModule(Module &M) {
}
}
- // FIXME: This call graph analysis is broken and should be
- // removed. AMDGPUAttributor infers the individual implicit argument fields
- // are needed or not, but the runtime crashes in cases where we fail to
- // optimize these out at -O0.
- for (auto *F : Callers) {
- if (F->getCallingConv() != CallingConv::AMDGPU_KERNEL)
- continue;
- F->addFnAttr("calls-enqueue-kernel");
- LLVM_DEBUG(dbgs() << "mark enqueue_kernel caller:" << F->getName() << '\n');
- }
return Changed;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 9c04df0b3683..536fb02cb4ec 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -1,4 +1,4 @@
-//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
+//=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -19,6 +19,8 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -26,22 +28,41 @@
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Target/TargetMachine.h"
+#define GET_GICOMBINER_DEPS
+#include "AMDGPUGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_DEPS
+
#define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
using namespace llvm;
using namespace MIPatternMatch;
-class AMDGPUPostLegalizerCombinerHelper {
+namespace {
+#define GET_GICOMBINER_TYPES
+#include "AMDGPUGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_TYPES
+
+class AMDGPUPostLegalizerCombinerImpl : public GIMatchTableExecutor {
protected:
+ const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;
+
MachineIRBuilder &B;
MachineFunction &MF;
MachineRegisterInfo &MRI;
+ const GCNSubtarget &STI;
+ const SIInstrInfo &TII;
AMDGPUCombinerHelper &Helper;
+ GISelChangeObserver &Observer;
public:
- AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B,
- AMDGPUCombinerHelper &Helper)
- : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
+ AMDGPUPostLegalizerCombinerImpl(
+ const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
+ MachineIRBuilder &B, AMDGPUCombinerHelper &Helper,
+ GISelChangeObserver &Observer);
+
+ static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }
+
+ bool tryCombineAll(MachineInstr &I) const;
struct FMinFMaxLegacyInfo {
Register LHS;
@@ -52,15 +73,16 @@ public:
};
// TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
- bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
+ bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info) const;
void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
- const FMinFMaxLegacyInfo &Info);
+ const FMinFMaxLegacyInfo &Info) const;
- bool matchUCharToFloat(MachineInstr &MI);
- void applyUCharToFloat(MachineInstr &MI);
+ bool matchUCharToFloat(MachineInstr &MI) const;
+ void applyUCharToFloat(MachineInstr &MI) const;
- bool matchRcpSqrtToRsq(MachineInstr &MI,
- std::function<void(MachineIRBuilder &)> &MatchInfo);
+ bool
+ matchRcpSqrtToRsq(MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo) const;
// FIXME: Should be able to have 2 separate matchdatas rather than custom
// struct boilerplate.
@@ -69,15 +91,49 @@ public:
unsigned ShiftOffset;
};
- bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
+ bool matchCvtF32UByteN(MachineInstr &MI,
+ CvtF32UByteMatchInfo &MatchInfo) const;
void applyCvtF32UByteN(MachineInstr &MI,
- const CvtF32UByteMatchInfo &MatchInfo);
+ const CvtF32UByteMatchInfo &MatchInfo) const;
+
+ bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const;
- bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg);
+ // Combine unsigned buffer load and signed extension instructions to generate
+ // signed buffer laod instructions.
+ bool matchCombineSignExtendInReg(MachineInstr &MI,
+ MachineInstr *&MatchInfo) const;
+ void applyCombineSignExtendInReg(MachineInstr &MI,
+ MachineInstr *&MatchInfo) const;
+
+private:
+#define GET_GICOMBINER_CLASS_MEMBERS
+#define AMDGPUSubtarget GCNSubtarget
+#include "AMDGPUGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CLASS_MEMBERS
+#undef AMDGPUSubtarget
};
-bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
- MachineInstr &MI, FMinFMaxLegacyInfo &Info) {
+#define GET_GICOMBINER_IMPL
+#define AMDGPUSubtarget GCNSubtarget
+#include "AMDGPUGenPostLegalizeGICombiner.inc"
+#undef AMDGPUSubtarget
+#undef GET_GICOMBINER_IMPL
+
+AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
+ const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
+ MachineIRBuilder &B, AMDGPUCombinerHelper &Helper,
+ GISelChangeObserver &Observer)
+ : RuleConfig(RuleConfig), B(B), MF(B.getMF()), MRI(*B.getMRI()),
+ STI(MF.getSubtarget<GCNSubtarget>()), TII(*STI.getInstrInfo()),
+ Helper(Helper), Observer(Observer),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
+#include "AMDGPUGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
+
+bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
+ MachineInstr &MI, FMinFMaxLegacyInfo &Info) const {
// FIXME: Type predicate on pattern
if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
return false;
@@ -91,6 +147,8 @@ bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
Info.True = MI.getOperand(2).getReg();
Info.False = MI.getOperand(3).getReg();
+ // TODO: Handle case where the the selected value is an fneg and the compared
+ // constant is the negation of the selected value.
if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
!(Info.LHS == Info.False && Info.RHS == Info.True))
return false;
@@ -110,8 +168,8 @@ bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
}
}
-void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
- MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
+void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinToFMaxLegacy(
+ MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {
B.setInstrAndDebugLoc(MI);
auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
@@ -159,7 +217,8 @@ void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
MI.eraseFromParent();
}
-bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
+bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(
+ MachineInstr &MI) const {
Register DstReg = MI.getOperand(0).getReg();
// TODO: We could try to match extracting the higher bytes, which would be
@@ -178,7 +237,8 @@ bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
return false;
}
-void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
+void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(
+ MachineInstr &MI) const {
B.setInstrAndDebugLoc(MI);
const LLT S32 = LLT::scalar(32);
@@ -191,19 +251,20 @@ void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
if (Ty == S32) {
- B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg},
- {SrcReg}, MI.getFlags());
+ B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},
+ MI.getFlags());
} else {
- auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32},
- {SrcReg}, MI.getFlags());
+ auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},
+ MI.getFlags());
B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
}
MI.eraseFromParent();
}
-bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq(
- MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
+ MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo) const {
auto getRcpSrc = [=](const MachineInstr &MI) {
MachineInstr *ResMI = nullptr;
@@ -246,8 +307,8 @@ bool AMDGPUPostLegalizerCombinerHelper::matchRcpSqrtToRsq(
return false;
}
-bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
- MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
+bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
+ MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
Register SrcReg = MI.getOperand(1).getReg();
// Look through G_ZEXT.
@@ -274,8 +335,8 @@ bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
return false;
}
-void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
- MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
+void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(
+ MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const {
B.setInstrAndDebugLoc(MI);
unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
@@ -292,57 +353,66 @@ void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
MI.eraseFromParent();
}
-bool AMDGPUPostLegalizerCombinerHelper::matchRemoveFcanonicalize(
- MachineInstr &MI, Register &Reg) {
+bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
+ MachineInstr &MI, Register &Reg) const {
const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
MF.getSubtarget().getTargetLowering());
Reg = MI.getOperand(1).getReg();
return TLI->isCanonicalized(Reg, MF);
}
-class AMDGPUPostLegalizerCombinerHelperState {
-protected:
- AMDGPUCombinerHelper &Helper;
- AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
+// The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
+// u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
+// with sign extension instrucions in order to generate buffer_load_{i8, i16}
+// instructions.
- // Note: pointer is necessary because Target Predicates use
- // "Subtarget->"
- const GCNSubtarget *Subtarget;
+// Identify buffer_load_{u8, u16}.
+bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
+ MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const {
+ Register Op0Reg = MI.getOperand(1).getReg();
+ SubwordBufferLoad = MRI.getVRegDef(Op0Reg);
-public:
- AMDGPUPostLegalizerCombinerHelperState(
- AMDGPUCombinerHelper &Helper,
- AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper,
- const GCNSubtarget &Subtarget)
- : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper),
- Subtarget(&Subtarget) {}
-};
+ if (!MRI.hasOneNonDBGUse(Op0Reg))
+ return false;
-#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
-#include "AMDGPUGenPostLegalizeGICombiner.inc"
-#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+ // Check if the first operand of the sign extension is a subword buffer load
+ // instruction.
+ return SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE ||
+ SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
+}
-namespace {
-#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
-#include "AMDGPUGenPostLegalizeGICombiner.inc"
-#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+// Combine buffer_load_{u8, u16} and the sign extension instruction to generate
+// buffer_load_{i8, i16}.
+void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
+ MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const {
+ // Modify the opcode and the destination of buffer_load_{u8, u16}:
+ // Replace the opcode.
+ unsigned Opc =
+ SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE
+ ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE
+ : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
+ SubwordBufferLoad->setDesc(TII.get(Opc));
+ // Update the destination register of SubwordBufferLoad with the destination
+ // register of the sign extension.
+ Register SignExtendInsnDst = MI.getOperand(0).getReg();
+ SubwordBufferLoad->getOperand(0).setReg(SignExtendInsnDst);
+ // Remove the sign extension.
+ MI.eraseFromParent();
+}
class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
GISelKnownBits *KB;
MachineDominatorTree *MDT;
- const GCNSubtarget &Subtarget;
+ AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;
public:
- AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
-
- AMDGPUPostLegalizerCombinerInfo(const GCNSubtarget &Subtarget, bool EnableOpt,
- bool OptSize, bool MinSize,
+ AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
const AMDGPULegalizerInfo *LI,
GISelKnownBits *KB, MachineDominatorTree *MDT)
: CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
/*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
- KB(KB), MDT(MDT), Subtarget(Subtarget) {
- if (!GeneratedRuleCfg.parseCommandLineOption())
+ KB(KB), MDT(MDT) {
+ if (!RuleConfig.parseCommandLineOption())
report_fatal_error("Invalid rule identifier");
}
@@ -355,11 +425,11 @@ bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineIRBuilder &B) const {
AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ false, KB, MDT,
LInfo);
- AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
- AMDGPUGenPostLegalizerCombinerHelper Generated(
- GeneratedRuleCfg, Helper, PostLegalizerHelper, Subtarget);
+ // TODO: Do not re-create the Impl on every inst, it should be per function.
+ AMDGPUPostLegalizerCombinerImpl Impl(RuleConfig, B, Helper, Observer);
+ Impl.setupMF(*MI.getMF(), KB);
- if (Generated.tryCombineAll(Observer, MI, B))
+ if (Impl.tryCombineAll(MI))
return true;
switch (MI.getOpcode()) {
@@ -375,10 +445,6 @@ bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return false;
}
-#define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
-#include "AMDGPUGenPostLegalizeGICombiner.inc"
-#undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
-
// Pass boilerplate
// ================
@@ -414,7 +480,7 @@ void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
}
AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
- : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
+ : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
}
@@ -428,13 +494,13 @@ bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const AMDGPULegalizerInfo *LI
- = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
+ const AMDGPULegalizerInfo *LI =
+ static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
MachineDominatorTree *MDT =
IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
- AMDGPUPostLegalizerCombinerInfo PCInfo(ST, EnableOpt, F.hasOptSize(),
+ AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
F.hasMinSize(), LI, KB, MDT);
Combiner C(PCInfo, TPC);
return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
@@ -442,8 +508,8 @@ bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
char AMDGPUPostLegalizerCombiner::ID = 0;
INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
- "Combine AMDGPU machine instrs after legalization",
- false, false)
+ "Combine AMDGPU machine instrs after legalization", false,
+ false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index a02d2cd302fb..936ca54fcf2e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -20,28 +20,48 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Target/TargetMachine.h"
+#define GET_GICOMBINER_DEPS
+#include "AMDGPUGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_DEPS
+
#define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
using namespace llvm;
using namespace MIPatternMatch;
+namespace {
+
+#define GET_GICOMBINER_TYPES
+#include "AMDGPUGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_TYPES
-class AMDGPUPreLegalizerCombinerHelper {
+class AMDGPUPreLegalizerCombinerImpl : public GIMatchTableExecutor {
protected:
+ const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig;
+ const GCNSubtarget &STI;
+
+ GISelChangeObserver &Observer;
MachineIRBuilder &B;
MachineFunction &MF;
MachineRegisterInfo &MRI;
AMDGPUCombinerHelper &Helper;
public:
- AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B,
- AMDGPUCombinerHelper &Helper)
- : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
+ AMDGPUPreLegalizerCombinerImpl(
+ const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
+ const GCNSubtarget &STI, GISelChangeObserver &Observer,
+ MachineIRBuilder &B, AMDGPUCombinerHelper &Helper);
+
+ static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; }
+
+ bool tryCombineAll(MachineInstr &I) const;
struct ClampI64ToI16MatchInfo {
int64_t Cmp1 = 0;
@@ -49,17 +69,42 @@ public:
Register Origin;
};
- bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI,
- MachineFunction &MF,
- ClampI64ToI16MatchInfo &MatchInfo);
+ bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI,
+ const MachineFunction &MF,
+ ClampI64ToI16MatchInfo &MatchInfo) const;
void applyClampI64ToI16(MachineInstr &MI,
- const ClampI64ToI16MatchInfo &MatchInfo);
+ const ClampI64ToI16MatchInfo &MatchInfo) const;
+
+private:
+#define GET_GICOMBINER_CLASS_MEMBERS
+#define AMDGPUSubtarget GCNSubtarget
+#include "AMDGPUGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CLASS_MEMBERS
+#undef AMDGPUSubtarget
};
-bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16(
- MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF,
- ClampI64ToI16MatchInfo &MatchInfo) {
+#define GET_GICOMBINER_IMPL
+#define AMDGPUSubtarget GCNSubtarget
+#include "AMDGPUGenPreLegalizeGICombiner.inc"
+#undef AMDGPUSubtarget
+#undef GET_GICOMBINER_IMPL
+
+AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
+ const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
+ const GCNSubtarget &STI, GISelChangeObserver &Observer, MachineIRBuilder &B,
+ AMDGPUCombinerHelper &Helper)
+ : RuleConfig(RuleConfig), STI(STI), Observer(Observer), B(B), MF(B.getMF()),
+ MRI(*B.getMRI()), Helper(Helper),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
+#include "AMDGPUGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
+
+bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16(
+ MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF,
+ ClampI64ToI16MatchInfo &MatchInfo) const {
assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
// Try to find a pattern where an i64 value should get clamped to short.
@@ -118,8 +163,8 @@ bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16(
// This can be efficiently written as following:
// v_cvt_pk_i16_i32 v0, v0, v1
// v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
-void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
- MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) {
+void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16(
+ MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const {
Register Src = MatchInfo.Origin;
assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
@@ -154,40 +199,18 @@ void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
MI.eraseFromParent();
}
-class AMDGPUPreLegalizerCombinerHelperState {
-protected:
- AMDGPUCombinerHelper &Helper;
- AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper;
-
-public:
- AMDGPUPreLegalizerCombinerHelperState(
- AMDGPUCombinerHelper &Helper,
- AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper)
- : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {}
-};
-
-#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
-#include "AMDGPUGenPreLegalizeGICombiner.inc"
-#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
-
-namespace {
-#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
-#include "AMDGPUGenPreLegalizeGICombiner.inc"
-#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
-
class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo {
GISelKnownBits *KB;
MachineDominatorTree *MDT;
+ AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig;
public:
- AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
-
AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
- GISelKnownBits *KB, MachineDominatorTree *MDT)
+ GISelKnownBits *KB, MachineDominatorTree *MDT)
: CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
/*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
KB(KB), MDT(MDT) {
- if (!GeneratedRuleCfg.parseCommandLineOption())
+ if (!RuleConfig.parseCommandLineOption())
report_fatal_error("Invalid rule identifier");
}
@@ -196,15 +219,17 @@ public:
};
bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
- MachineInstr &MI,
- MachineIRBuilder &B) const {
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
const auto *LI = MI.getMF()->getSubtarget().getLegalizerInfo();
AMDGPUCombinerHelper Helper(Observer, B, /*IsPreLegalize*/ true, KB, MDT, LI);
- AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper);
- AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
- PreLegalizerHelper);
- if (Generated.tryCombineAll(Observer, MI, B))
+ const GCNSubtarget &STI = MI.getMF()->getSubtarget<GCNSubtarget>();
+ // TODO: Do not re-create the Impl on every inst, it should be per function.
+ AMDGPUPreLegalizerCombinerImpl Impl(RuleConfig, STI, Observer, B, Helper);
+ Impl.setupMF(*MI.getMF(), KB);
+
+ if (Impl.tryCombineAll(MI))
return true;
switch (MI.getOpcode()) {
@@ -217,10 +242,6 @@ bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
return false;
}
-#define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
-#include "AMDGPUGenPreLegalizeGICombiner.inc"
-#undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
-
// Pass boilerplate
// ================
@@ -237,6 +258,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+
private:
bool IsOptNone;
};
@@ -259,7 +281,7 @@ void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
}
AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
- : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
+ : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
index b24300923780..13f83e298cf4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp
@@ -19,9 +19,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
@@ -29,6 +27,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/DataExtractor.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -46,19 +45,11 @@ public:
private:
bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- }
};
class AMDGPUPrintfRuntimeBindingImpl {
public:
- AMDGPUPrintfRuntimeBindingImpl(
- function_ref<const DominatorTree &(Function &)> GetDT,
- function_ref<const TargetLibraryInfo &(Function &)> GetTLI)
- : GetDT(GetDT), GetTLI(GetTLI) {}
+ AMDGPUPrintfRuntimeBindingImpl() {}
bool run(Module &M);
private:
@@ -67,14 +58,7 @@ private:
bool lowerPrintfForGpu(Module &M);
- Value *simplify(Instruction *I, const TargetLibraryInfo *TLI,
- const DominatorTree *DT) {
- return simplifyInstruction(I, {*TD, TLI, DT});
- }
-
const DataLayout *TD;
- function_ref<const DominatorTree &(Function &)> GetDT;
- function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
SmallVector<CallInst *, 32> Printfs;
};
} // namespace
@@ -175,23 +159,6 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
SmallString<16> OpConvSpecifiers;
Value *Op = CI->getArgOperand(0);
- if (auto LI = dyn_cast<LoadInst>(Op)) {
- Op = LI->getPointerOperand();
- for (auto *Use : Op->users()) {
- if (auto SI = dyn_cast<StoreInst>(Use)) {
- Op = SI->getValueOperand();
- break;
- }
- }
- }
-
- if (auto I = dyn_cast<Instruction>(Op)) {
- Value *Op_simplified =
- simplify(I, &GetTLI(*I->getFunction()), &GetDT(*I->getFunction()));
- if (Op_simplified)
- Op = Op_simplified;
- }
-
StringRef FormatStr;
if (!getConstantStringInfo(Op, FormatStr)) {
Value *Stripped = Op->stripPointerCasts();
@@ -438,20 +405,15 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) {
for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) {
Value *TheBtCast = WhatToStore[I];
unsigned ArgSize = TD->getTypeAllocSize(TheBtCast->getType());
- SmallVector<Value *, 1> BuffOffset;
- BuffOffset.push_back(ConstantInt::get(I32Ty, ArgSize));
-
- Type *ArgPointer = PointerType::get(TheBtCast->getType(), 1);
- Value *CastedGEP =
- new BitCastInst(BufferIdx, ArgPointer, "PrintBuffPtrCast", Brnch);
- StoreInst *StBuff = new StoreInst(TheBtCast, CastedGEP, Brnch);
+ StoreInst *StBuff = new StoreInst(TheBtCast, BufferIdx, Brnch);
LLVM_DEBUG(dbgs() << "inserting store to printf buffer:\n"
<< *StBuff << '\n');
(void)StBuff;
if (I + 1 == E && ArgCount + 1 == CI->arg_size())
break;
- BufferIdx = GetElementPtrInst::Create(I8Ty, BufferIdx, BuffOffset,
- "PrintBuffNextPtr", Brnch);
+ BufferIdx = GetElementPtrInst::Create(
+ I8Ty, BufferIdx, {ConstantInt::get(I32Ty, ArgSize)},
+ "PrintBuffNextPtr", Brnch);
LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:\n"
<< *BufferIdx << '\n');
}
@@ -491,26 +453,11 @@ bool AMDGPUPrintfRuntimeBindingImpl::run(Module &M) {
}
bool AMDGPUPrintfRuntimeBinding::runOnModule(Module &M) {
- auto GetDT = [this](Function &F) -> DominatorTree & {
- return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
- };
- auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
- return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- };
-
- return AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
+ return AMDGPUPrintfRuntimeBindingImpl().run(M);
}
PreservedAnalyses
AMDGPUPrintfRuntimeBindingPass::run(Module &M, ModuleAnalysisManager &AM) {
- FunctionAnalysisManager &FAM =
- AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- auto GetDT = [&FAM](Function &F) -> DominatorTree & {
- return FAM.getResult<DominatorTreeAnalysis>(F);
- };
- auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
- return FAM.getResult<TargetLibraryAnalysis>(F);
- };
- bool Changed = AMDGPUPrintfRuntimeBindingImpl(GetDT, GetTLI).run(M);
+ bool Changed = AMDGPUPrintfRuntimeBindingImpl().run(M);
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index a7da4005e867..1d69f0434b58 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -6,23 +6,42 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass eliminates allocas by either converting them into vectors or
-// by migrating them to local address space.
+// Eliminates allocas by either converting them into vectors or by migrating
+// them to local address space.
+//
+// Two passes are exposed by this file:
+// - "promote-alloca-to-vector", which runs early in the pipeline and only
+// promotes to vector. Promotion to vector is almost always profitable
+// except when the alloca is too big and the promotion would result in
+// very high register pressure.
+// - "promote-alloca", which does both promotion to vector and LDS and runs
+// much later in the pipeline. This runs after SROA because promoting to
+// LDS is of course less profitable than getting rid of the alloca or
+// vectorizing it, thus we only want to do it when the only alternative is
+// lowering the alloca to stack.
+//
+// Note that both of them exist for the old and new PMs. The new PM passes are
+// declared in AMDGPU.h and the legacy PM ones are declared here.s
//
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/InstSimplifyFolder.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
#define DEBUG_TYPE "amdgpu-promote-alloca"
@@ -30,40 +49,22 @@ using namespace llvm;
namespace {
-static cl::opt<bool> DisablePromoteAllocaToVector(
- "disable-promote-alloca-to-vector",
- cl::desc("Disable promote alloca to vector"),
- cl::init(false));
+static cl::opt<bool>
+ DisablePromoteAllocaToVector("disable-promote-alloca-to-vector",
+ cl::desc("Disable promote alloca to vector"),
+ cl::init(false));
-static cl::opt<bool> DisablePromoteAllocaToLDS(
- "disable-promote-alloca-to-lds",
- cl::desc("Disable promote alloca to LDS"),
- cl::init(false));
+static cl::opt<bool>
+ DisablePromoteAllocaToLDS("disable-promote-alloca-to-lds",
+ cl::desc("Disable promote alloca to LDS"),
+ cl::init(false));
static cl::opt<unsigned> PromoteAllocaToVectorLimit(
- "amdgpu-promote-alloca-to-vector-limit",
- cl::desc("Maximum byte size to consider promote alloca to vector"),
- cl::init(0));
-
-// FIXME: This can create globals so should be a module pass.
-class AMDGPUPromoteAlloca : public FunctionPass {
-public:
- static char ID;
-
- AMDGPUPromoteAlloca() : FunctionPass(ID) {}
-
- bool runOnFunction(Function &F) override;
-
- StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }
-
- bool handleAlloca(AllocaInst &I, bool SufficientLDS);
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- FunctionPass::getAnalysisUsage(AU);
- }
-};
+ "amdgpu-promote-alloca-to-vector-limit",
+ cl::desc("Maximum byte size to consider promote alloca to vector"),
+ cl::init(0));
+// Shared implementation which can do both promotion to vector and to LDS.
class AMDGPUPromoteAllocaImpl {
private:
const TargetMachine &TM;
@@ -83,26 +84,55 @@ private:
/// BaseAlloca is the alloca root the search started from.
/// Val may be that alloca or a recursive user of it.
- bool collectUsesWithPtrTypes(Value *BaseAlloca,
- Value *Val,
- std::vector<Value*> &WorkList) const;
+ bool collectUsesWithPtrTypes(Value *BaseAlloca, Value *Val,
+ std::vector<Value *> &WorkList) const;
/// Val is a derived pointer from Alloca. OpIdx0/OpIdx1 are the operand
/// indices to an instruction with 2 pointer inputs (e.g. select, icmp).
/// Returns true if both operands are derived from the same alloca. Val should
/// be the same value as one of the input operands of UseInst.
bool binaryOpIsDerivedFromSameAlloca(Value *Alloca, Value *Val,
- Instruction *UseInst,
- int OpIdx0, int OpIdx1) const;
+ Instruction *UseInst, int OpIdx0,
+ int OpIdx1) const;
/// Check whether we have enough local memory for promotion.
bool hasSufficientLocalMem(const Function &F);
- bool handleAlloca(AllocaInst &I, bool SufficientLDS);
+ bool tryPromoteAllocaToVector(AllocaInst &I);
+ bool tryPromoteAllocaToLDS(AllocaInst &I, bool SufficientLDS);
public:
- AMDGPUPromoteAllocaImpl(TargetMachine &TM) : TM(TM) {}
- bool run(Function &F);
+ AMDGPUPromoteAllocaImpl(TargetMachine &TM) : TM(TM) {
+ const Triple &TT = TM.getTargetTriple();
+ IsAMDGCN = TT.getArch() == Triple::amdgcn;
+ IsAMDHSA = TT.getOS() == Triple::AMDHSA;
+ }
+
+ bool run(Function &F, bool PromoteToLDS);
+};
+
+// FIXME: This can create globals so should be a module pass.
+class AMDGPUPromoteAlloca : public FunctionPass {
+public:
+ static char ID;
+
+ AMDGPUPromoteAlloca() : FunctionPass(ID) {}
+
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
+ return AMDGPUPromoteAllocaImpl(TPC->getTM<TargetMachine>())
+ .run(F, /*PromoteToLDS*/ true);
+ return false;
+ }
+
+ StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ }
};
class AMDGPUPromoteAllocaToVector : public FunctionPass {
@@ -111,7 +141,14 @@ public:
AMDGPUPromoteAllocaToVector() : FunctionPass(ID) {}
- bool runOnFunction(Function &F) override;
+ bool runOnFunction(Function &F) override {
+ if (skipFunction(F))
+ return false;
+ if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>())
+ return AMDGPUPromoteAllocaImpl(TPC->getTM<TargetMachine>())
+ .run(F, /*PromoteToLDS*/ false);
+ return false;
+ }
StringRef getPassName() const override {
return "AMDGPU Promote Alloca to vector";
@@ -123,6 +160,22 @@ public:
}
};
+unsigned getMaxVGPRs(const TargetMachine &TM, const Function &F) {
+ if (!TM.getTargetTriple().isAMDGCN())
+ return 128;
+
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ unsigned MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
+
+ // A non-entry function has only 32 caller preserved registers.
+ // Do not promote alloca which will force spilling unless we know the function
+ // will be inlined.
+ if (!F.hasFnAttribute(Attribute::AlwaysInline) &&
+ !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
+ MaxVGPRs = std::min(MaxVGPRs, 32u);
+ return MaxVGPRs;
+}
+
} // end anonymous namespace
char AMDGPUPromoteAlloca::ID = 0;
@@ -142,19 +195,20 @@ INITIALIZE_PASS(AMDGPUPromoteAllocaToVector, DEBUG_TYPE "-to-vector",
char &llvm::AMDGPUPromoteAllocaID = AMDGPUPromoteAlloca::ID;
char &llvm::AMDGPUPromoteAllocaToVectorID = AMDGPUPromoteAllocaToVector::ID;
-bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
- return AMDGPUPromoteAllocaImpl(TPC->getTM<TargetMachine>()).run(F);
+PreservedAnalyses AMDGPUPromoteAllocaPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ bool Changed = AMDGPUPromoteAllocaImpl(TM).run(F, /*PromoteToLDS*/ true);
+ if (Changed) {
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
- return false;
+ return PreservedAnalyses::all();
}
-PreservedAnalyses AMDGPUPromoteAllocaPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- bool Changed = AMDGPUPromoteAllocaImpl(TM).run(F);
+PreservedAnalyses
+AMDGPUPromoteAllocaToVectorPass::run(Function &F, FunctionAnalysisManager &AM) {
+ bool Changed = AMDGPUPromoteAllocaImpl(TM).run(F, /*PromoteToLDS*/ false);
if (Changed) {
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
@@ -163,175 +217,72 @@ PreservedAnalyses AMDGPUPromoteAllocaPass::run(Function &F,
return PreservedAnalyses::all();
}
-bool AMDGPUPromoteAllocaImpl::run(Function &F) {
+FunctionPass *llvm::createAMDGPUPromoteAlloca() {
+ return new AMDGPUPromoteAlloca();
+}
+
+FunctionPass *llvm::createAMDGPUPromoteAllocaToVector() {
+ return new AMDGPUPromoteAllocaToVector();
+}
+
+bool AMDGPUPromoteAllocaImpl::run(Function &F, bool PromoteToLDS) {
Mod = F.getParent();
DL = &Mod->getDataLayout();
- const Triple &TT = TM.getTargetTriple();
- IsAMDGCN = TT.getArch() == Triple::amdgcn;
- IsAMDHSA = TT.getOS() == Triple::AMDHSA;
-
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
if (!ST.isPromoteAllocaEnabled())
return false;
- if (IsAMDGCN) {
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
- MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
- // A non-entry function has only 32 caller preserved registers.
- // Do not promote alloca which will force spilling.
- if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
- MaxVGPRs = std::min(MaxVGPRs, 32u);
- } else {
- MaxVGPRs = 128;
- }
+ MaxVGPRs = getMaxVGPRs(TM, F);
- bool SufficientLDS = hasSufficientLocalMem(F);
- bool Changed = false;
- BasicBlock &EntryBB = *F.begin();
+ bool SufficientLDS = PromoteToLDS ? hasSufficientLocalMem(F) : false;
SmallVector<AllocaInst *, 16> Allocas;
- for (Instruction &I : EntryBB) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
+ for (Instruction &I : F.getEntryBlock()) {
+ if (AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
+ // Array allocations are probably not worth handling, since an allocation
+ // of the array type is the canonical form.
+ if (!AI->isStaticAlloca() || AI->isArrayAllocation())
+ continue;
Allocas.push_back(AI);
+ }
}
+ bool Changed = false;
for (AllocaInst *AI : Allocas) {
- if (handleAlloca(*AI, SufficientLDS))
+ if (tryPromoteAllocaToVector(*AI))
+ Changed = true;
+ else if (PromoteToLDS && tryPromoteAllocaToLDS(*AI, SufficientLDS))
Changed = true;
}
+ // NOTE: tryPromoteAllocaToVector removes the alloca, so Allocas contains
+ // dangling pointers. If we want to reuse it past this point, the loop above
+ // would need to be updated to remove successfully promoted allocas.
+
return Changed;
}
-std::pair<Value *, Value *>
-AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
- Function &F = *Builder.GetInsertBlock()->getParent();
- const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
-
- if (!IsAMDHSA) {
- Function *LocalSizeYFn
- = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_y);
- Function *LocalSizeZFn
- = Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_z);
-
- CallInst *LocalSizeY = Builder.CreateCall(LocalSizeYFn, {});
- CallInst *LocalSizeZ = Builder.CreateCall(LocalSizeZFn, {});
-
- ST.makeLIDRangeMetadata(LocalSizeY);
- ST.makeLIDRangeMetadata(LocalSizeZ);
-
- return std::pair(LocalSizeY, LocalSizeZ);
- }
-
- // We must read the size out of the dispatch pointer.
- assert(IsAMDGCN);
+struct MemTransferInfo {
+ ConstantInt *SrcIndex = nullptr;
+ ConstantInt *DestIndex = nullptr;
+};
- // We are indexing into this struct, and want to extract the workgroup_size_*
- // fields.
+// Checks if the instruction I is a memset user of the alloca AI that we can
+// deal with. Currently, only non-volatile memsets that affect the whole alloca
+// are handled.
+static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI,
+ const DataLayout &DL) {
+ using namespace PatternMatch;
+ // For now we only care about non-volatile memsets that affect the whole type
+ // (start at index 0 and fill the whole alloca).
//
- // typedef struct hsa_kernel_dispatch_packet_s {
- // uint16_t header;
- // uint16_t setup;
- // uint16_t workgroup_size_x ;
- // uint16_t workgroup_size_y;
- // uint16_t workgroup_size_z;
- // uint16_t reserved0;
- // uint32_t grid_size_x ;
- // uint32_t grid_size_y ;
- // uint32_t grid_size_z;
- //
- // uint32_t private_segment_size;
- // uint32_t group_segment_size;
- // uint64_t kernel_object;
- //
- // #ifdef HSA_LARGE_MODEL
- // void *kernarg_address;
- // #elif defined HSA_LITTLE_ENDIAN
- // void *kernarg_address;
- // uint32_t reserved1;
- // #else
- // uint32_t reserved1;
- // void *kernarg_address;
- // #endif
- // uint64_t reserved2;
- // hsa_signal_t completion_signal; // uint64_t wrapper
- // } hsa_kernel_dispatch_packet_t
- //
- Function *DispatchPtrFn
- = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_dispatch_ptr);
-
- CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
- DispatchPtr->addRetAttr(Attribute::NoAlias);
- DispatchPtr->addRetAttr(Attribute::NonNull);
- F.removeFnAttr("amdgpu-no-dispatch-ptr");
-
- // Size of the dispatch packet struct.
- DispatchPtr->addDereferenceableRetAttr(64);
-
- Type *I32Ty = Type::getInt32Ty(Mod->getContext());
- Value *CastDispatchPtr = Builder.CreateBitCast(
- DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
-
- // We could do a single 64-bit load here, but it's likely that the basic
- // 32-bit and extract sequence is already present, and it is probably easier
- // to CSE this. The loads should be mergeable later anyway.
- Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
- LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, Align(4));
-
- Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 2);
- LoadInst *LoadZU = Builder.CreateAlignedLoad(I32Ty, GEPZU, Align(4));
-
- MDNode *MD = MDNode::get(Mod->getContext(), std::nullopt);
- LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);
- LoadZU->setMetadata(LLVMContext::MD_invariant_load, MD);
- ST.makeLIDRangeMetadata(LoadZU);
-
- // Extract y component. Upper half of LoadZU should be zero already.
- Value *Y = Builder.CreateLShr(LoadXY, 16);
-
- return std::pair(Y, LoadZU);
-}
-
-Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
- unsigned N) {
- Function *F = Builder.GetInsertBlock()->getParent();
- const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, *F);
- Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
- StringRef AttrName;
-
- switch (N) {
- case 0:
- IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
- : (Intrinsic::ID)Intrinsic::r600_read_tidig_x;
- AttrName = "amdgpu-no-workitem-id-x";
- break;
- case 1:
- IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
- : (Intrinsic::ID)Intrinsic::r600_read_tidig_y;
- AttrName = "amdgpu-no-workitem-id-y";
- break;
-
- case 2:
- IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
- : (Intrinsic::ID)Intrinsic::r600_read_tidig_z;
- AttrName = "amdgpu-no-workitem-id-z";
- break;
- default:
- llvm_unreachable("invalid dimension");
- }
-
- Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
- CallInst *CI = Builder.CreateCall(WorkitemIdFn);
- ST.makeLIDRangeMetadata(CI);
- F->removeFnAttr(AttrName);
-
- return CI;
-}
-
-static FixedVectorType *arrayTypeToVecType(ArrayType *ArrayTy) {
- return FixedVectorType::get(ArrayTy->getElementType(),
- ArrayTy->getNumElements());
+ // TODO: Now that we moved to PromoteAlloca we could handle any memsets
+ // (except maybe volatile ones?) - we just need to use shufflevector if it
+ // only affects a subset of the vector.
+ const unsigned Size = DL.getTypeStoreSize(AI->getAllocatedType());
+ return I->getOperand(0) == AI &&
+ match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile();
}
static Value *
@@ -379,60 +330,336 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
return ConstantInt::get(GEP->getContext(), Quot);
}
-struct MemTransferInfo {
- ConstantInt *SrcIndex = nullptr;
- ConstantInt *DestIndex = nullptr;
-};
+/// Promotes a single user of the alloca to a vector form.
+///
+/// \param Inst Instruction to be promoted.
+/// \param DL Module Data Layout.
+/// \param VectorTy Vectorized Type.
+/// \param VecStoreSize Size of \p VectorTy in bytes.
+/// \param ElementSize Size of \p VectorTy element type in bytes.
+/// \param TransferInfo MemTransferInst info map.
+/// \param GEPVectorIdx GEP -> VectorIdx cache.
+/// \param CurVal Current value of the vector (e.g. last stored value)
+/// \param[out] DeferredLoads \p Inst is added to this vector if it can't
+/// be promoted now. This happens when promoting requires \p
+/// CurVal, but \p CurVal is nullptr.
+/// \return the stored value if \p Inst would have written to the alloca, or
+/// nullptr otherwise.
+static Value *promoteAllocaUserToVector(
+ Instruction *Inst, const DataLayout &DL, FixedVectorType *VectorTy,
+ unsigned VecStoreSize, unsigned ElementSize,
+ DenseMap<MemTransferInst *, MemTransferInfo> &TransferInfo,
+ std::map<GetElementPtrInst *, Value *> &GEPVectorIdx, Value *CurVal,
+ SmallVectorImpl<LoadInst *> &DeferredLoads) {
+ // Note: we use InstSimplifyFolder because it can leverage the DataLayout
+ // to do more folding, especially in the case of vector splats.
+ IRBuilder<InstSimplifyFolder> Builder(Inst->getContext(),
+ InstSimplifyFolder(DL));
+ Builder.SetInsertPoint(Inst);
+
+ const auto GetOrLoadCurrentVectorValue = [&]() -> Value * {
+ if (CurVal)
+ return CurVal;
+
+ // If the current value is not known, insert a dummy load and lower it on
+ // the second pass.
+ LoadInst *Dummy =
+ Builder.CreateLoad(VectorTy, PoisonValue::get(Builder.getPtrTy()),
+ "promotealloca.dummyload");
+ DeferredLoads.push_back(Dummy);
+ return Dummy;
+ };
+
+ const auto CreateTempPtrIntCast = [&Builder, DL](Value *Val,
+ Type *PtrTy) -> Value * {
+ assert(DL.getTypeStoreSize(Val->getType()) == DL.getTypeStoreSize(PtrTy));
+ const unsigned Size = DL.getTypeStoreSizeInBits(PtrTy);
+ if (!PtrTy->isVectorTy())
+ return Builder.CreateBitOrPointerCast(Val, Builder.getIntNTy(Size));
+ const unsigned NumPtrElts = cast<FixedVectorType>(PtrTy)->getNumElements();
+ // If we want to cast to cast, e.g. a <2 x ptr> into a <4 x i32>, we need to
+ // first cast the ptr vector to <2 x i64>.
+ assert((Size % NumPtrElts == 0) && "Vector size not divisble");
+ Type *EltTy = Builder.getIntNTy(Size / NumPtrElts);
+ return Builder.CreateBitOrPointerCast(
+ Val, FixedVectorType::get(EltTy, NumPtrElts));
+ };
+
+ Type *VecEltTy = VectorTy->getElementType();
+ switch (Inst->getOpcode()) {
+ case Instruction::Load: {
+ // Loads can only be lowered if the value is known.
+ if (!CurVal) {
+ DeferredLoads.push_back(cast<LoadInst>(Inst));
+ return nullptr;
+ }
+
+ Value *Index = calculateVectorIndex(
+ cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx);
+
+ // We're loading the full vector.
+ Type *AccessTy = Inst->getType();
+ TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);
+ if (AccessSize == VecStoreSize && cast<Constant>(Index)->isZeroValue()) {
+ if (AccessTy->isPtrOrPtrVectorTy())
+ CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
+ else if (CurVal->getType()->isPtrOrPtrVectorTy())
+ CurVal = CreateTempPtrIntCast(CurVal, CurVal->getType());
+ Value *NewVal = Builder.CreateBitOrPointerCast(CurVal, AccessTy);
+ Inst->replaceAllUsesWith(NewVal);
+ return nullptr;
+ }
+
+ // Loading a subvector.
+ if (isa<FixedVectorType>(AccessTy)) {
+ assert(AccessSize.isKnownMultipleOf(DL.getTypeStoreSize(VecEltTy)));
+ const unsigned NumElts = AccessSize / DL.getTypeStoreSize(VecEltTy);
+ auto *SubVecTy = FixedVectorType::get(VecEltTy, NumElts);
+ assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy));
+
+ unsigned IndexVal = cast<ConstantInt>(Index)->getZExtValue();
+ Value *SubVec = PoisonValue::get(SubVecTy);
+ for (unsigned K = 0; K < NumElts; ++K) {
+ SubVec = Builder.CreateInsertElement(
+ SubVec, Builder.CreateExtractElement(CurVal, IndexVal + K), K);
+ }
+
+ if (AccessTy->isPtrOrPtrVectorTy())
+ SubVec = CreateTempPtrIntCast(SubVec, AccessTy);
+ else if (SubVecTy->isPtrOrPtrVectorTy())
+ SubVec = CreateTempPtrIntCast(SubVec, SubVecTy);
+
+ SubVec = Builder.CreateBitOrPointerCast(SubVec, AccessTy);
+ Inst->replaceAllUsesWith(SubVec);
+ return nullptr;
+ }
+
+ // We're loading one element.
+ Value *ExtractElement = Builder.CreateExtractElement(CurVal, Index);
+ if (AccessTy != VecEltTy)
+ ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, AccessTy);
+
+ Inst->replaceAllUsesWith(ExtractElement);
+ return nullptr;
+ }
+ case Instruction::Store: {
+ // For stores, it's a bit trickier and it depends on whether we're storing
+ // the full vector or not. If we're storing the full vector, we don't need
+ // to know the current value. If this is a store of a single element, we
+ // need to know the value.
+ StoreInst *SI = cast<StoreInst>(Inst);
+ Value *Index = calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx);
+ Value *Val = SI->getValueOperand();
+
+ // We're storing the full vector, we can handle this without knowing CurVal.
+ Type *AccessTy = Val->getType();
+ TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);
+ if (AccessSize == VecStoreSize && cast<Constant>(Index)->isZeroValue()) {
+ if (AccessTy->isPtrOrPtrVectorTy())
+ Val = CreateTempPtrIntCast(Val, AccessTy);
+ else if (VectorTy->isPtrOrPtrVectorTy())
+ Val = CreateTempPtrIntCast(Val, VectorTy);
+ return Builder.CreateBitOrPointerCast(Val, VectorTy);
+ }
+
+ // Storing a subvector.
+ if (isa<FixedVectorType>(AccessTy)) {
+ assert(AccessSize.isKnownMultipleOf(DL.getTypeStoreSize(VecEltTy)));
+ const unsigned NumElts = AccessSize / DL.getTypeStoreSize(VecEltTy);
+ auto *SubVecTy = FixedVectorType::get(VecEltTy, NumElts);
+ assert(DL.getTypeStoreSize(SubVecTy) == DL.getTypeStoreSize(AccessTy));
+
+ if (SubVecTy->isPtrOrPtrVectorTy())
+ Val = CreateTempPtrIntCast(Val, SubVecTy);
+ else if (AccessTy->isPtrOrPtrVectorTy())
+ Val = CreateTempPtrIntCast(Val, AccessTy);
+
+ Val = Builder.CreateBitOrPointerCast(Val, SubVecTy);
+
+ unsigned IndexVal = cast<ConstantInt>(Index)->getZExtValue();
+ Value *CurVec = GetOrLoadCurrentVectorValue();
+ for (unsigned K = 0; (IndexVal + K) < NumElts; ++K) {
+ CurVec = Builder.CreateInsertElement(
+ CurVec, Builder.CreateExtractElement(Val, K), IndexVal + K);
+ }
+ return CurVec;
+ }
+
+ if (Val->getType() != VecEltTy)
+ Val = Builder.CreateBitOrPointerCast(Val, VecEltTy);
+ return Builder.CreateInsertElement(GetOrLoadCurrentVectorValue(), Val,
+ Index);
+ }
+ case Instruction::Call: {
+ if (auto *MTI = dyn_cast<MemTransferInst>(Inst)) {
+ // For memcpy, we need to know curval.
+ ConstantInt *Length = cast<ConstantInt>(MTI->getLength());
+ unsigned NumCopied = Length->getZExtValue() / ElementSize;
+ MemTransferInfo *TI = &TransferInfo[MTI];
+ unsigned SrcBegin = TI->SrcIndex->getZExtValue();
+ unsigned DestBegin = TI->DestIndex->getZExtValue();
+
+ SmallVector<int> Mask;
+ for (unsigned Idx = 0; Idx < VectorTy->getNumElements(); ++Idx) {
+ if (Idx >= DestBegin && Idx < DestBegin + NumCopied) {
+ Mask.push_back(SrcBegin++);
+ } else {
+ Mask.push_back(Idx);
+ }
+ }
+
+ return Builder.CreateShuffleVector(GetOrLoadCurrentVectorValue(), Mask);
+ }
+
+ if (auto *MSI = dyn_cast<MemSetInst>(Inst)) {
+ // For memset, we don't need to know the previous value because we
+ // currently only allow memsets that cover the whole alloca.
+ Value *Elt = MSI->getOperand(1);
+ if (DL.getTypeStoreSize(VecEltTy) > 1) {
+ Value *EltBytes =
+ Builder.CreateVectorSplat(DL.getTypeStoreSize(VecEltTy), Elt);
+ Elt = Builder.CreateBitCast(EltBytes, VecEltTy);
+ }
+
+ return Builder.CreateVectorSplat(VectorTy->getElementCount(), Elt);
+ }
+
+ llvm_unreachable("Unsupported call when promoting alloca to vector");
+ }
+
+ default:
+ llvm_unreachable("Inconsistency in instructions promotable to vector");
+ }
+
+ llvm_unreachable("Did not return after promoting instruction!");
+}
+
+static bool isSupportedAccessType(FixedVectorType *VecTy, Type *AccessTy,
+ const DataLayout &DL) {
+ // Access as a vector type can work if the size of the access vector is a
+ // multiple of the size of the alloca's vector element type.
+ //
+ // Examples:
+ // - VecTy = <8 x float>, AccessTy = <4 x float> -> OK
+ // - VecTy = <4 x double>, AccessTy = <2 x float> -> OK
+ // - VecTy = <4 x double>, AccessTy = <3 x float> -> NOT OK
+ // - 3*32 is not a multiple of 64
+ //
+ // We could handle more complicated cases, but it'd make things a lot more
+ // complicated.
+ if (isa<FixedVectorType>(AccessTy)) {
+ TypeSize AccTS = DL.getTypeStoreSize(AccessTy);
+ TypeSize VecTS = DL.getTypeStoreSize(VecTy->getElementType());
+ return AccTS.isKnownMultipleOf(VecTS);
+ }
+
+ return CastInst::isBitOrNoopPointerCastable(VecTy->getElementType(), AccessTy,
+ DL);
+}
+
+/// Iterates over an instruction worklist that may contain multiple instructions
+/// from the same basic block, but in a different order.
+template <typename InstContainer>
+static void forEachWorkListItem(const InstContainer &WorkList,
+ std::function<void(Instruction *)> Fn) {
+ // Bucket up uses of the alloca by the block they occur in.
+ // This is important because we have to handle multiple defs/uses in a block
+ // ourselves: SSAUpdater is purely for cross-block references.
+ DenseMap<BasicBlock *, SmallDenseSet<Instruction *>> UsesByBlock;
+ for (Instruction *User : WorkList)
+ UsesByBlock[User->getParent()].insert(User);
+
+ for (Instruction *User : WorkList) {
+ BasicBlock *BB = User->getParent();
+ auto &BlockUses = UsesByBlock[BB];
+
+ // Already processed, skip.
+ if (BlockUses.empty())
+ continue;
+
+ // Only user in the block, directly process it.
+ if (BlockUses.size() == 1) {
+ Fn(User);
+ continue;
+ }
-static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
- unsigned MaxVGPRs) {
+ // Multiple users in the block, do a linear scan to see users in order.
+ for (Instruction &Inst : *BB) {
+ if (!BlockUses.contains(&Inst))
+ continue;
+
+ Fn(&Inst);
+ }
+
+ // Clear the block so we know it's been processed.
+ BlockUses.clear();
+ }
+}
+
+// FIXME: Should try to pick the most likely to be profitable allocas first.
+bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
+ LLVM_DEBUG(dbgs() << "Trying to promote to vector: " << Alloca << '\n');
if (DisablePromoteAllocaToVector) {
- LLVM_DEBUG(dbgs() << " Promotion alloca to vector is disabled\n");
+ LLVM_DEBUG(dbgs() << " Promote alloca to vector is disabled\n");
return false;
}
- Type *AllocaTy = Alloca->getAllocatedType();
+ Type *AllocaTy = Alloca.getAllocatedType();
auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
if (auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
if (VectorType::isValidElementType(ArrayTy->getElementType()) &&
ArrayTy->getNumElements() > 0)
- VectorTy = arrayTypeToVecType(ArrayTy);
+ VectorTy = FixedVectorType::get(ArrayTy->getElementType(),
+ ArrayTy->getNumElements());
}
// Use up to 1/4 of available register budget for vectorization.
unsigned Limit = PromoteAllocaToVectorLimit ? PromoteAllocaToVectorLimit * 8
: (MaxVGPRs * 32);
- if (DL.getTypeSizeInBits(AllocaTy) * 4 > Limit) {
- LLVM_DEBUG(dbgs() << " Alloca too big for vectorization with "
- << MaxVGPRs << " registers available\n");
+ if (DL->getTypeSizeInBits(AllocaTy) * 4 > Limit) {
+ LLVM_DEBUG(dbgs() << " Alloca too big for vectorization with " << MaxVGPRs
+ << " registers available\n");
return false;
}
- LLVM_DEBUG(dbgs() << "Alloca candidate for vectorization\n");
-
// FIXME: There is no reason why we can't support larger arrays, we
// are just being conservative for now.
- // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or equivalent. Potentially these
- // could also be promoted but we don't currently handle this case
- if (!VectorTy || VectorTy->getNumElements() > 16 ||
- VectorTy->getNumElements() < 2) {
+ // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or
+ // equivalent. Potentially these could also be promoted but we don't currently
+ // handle this case
+ if (!VectorTy) {
LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");
return false;
}
- std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
+ if (VectorTy->getNumElements() > 16 || VectorTy->getNumElements() < 2) {
+ LLVM_DEBUG(dbgs() << " " << *VectorTy
+ << " has an unsupported number of elements\n");
+ return false;
+ }
+
+ std::map<GetElementPtrInst *, Value *> GEPVectorIdx;
SmallVector<Instruction *> WorkList;
+ SmallVector<Instruction *> UsersToRemove;
SmallVector<Instruction *> DeferredInsts;
SmallVector<Use *, 8> Uses;
DenseMap<MemTransferInst *, MemTransferInfo> TransferInfo;
- for (Use &U : Alloca->uses())
+ const auto RejectUser = [&](Instruction *Inst, Twine Msg) {
+ LLVM_DEBUG(dbgs() << " Cannot promote alloca to vector: " << Msg << "\n"
+ << " " << *Inst << "\n");
+ return false;
+ };
+
+ for (Use &U : Alloca.uses())
Uses.push_back(&U);
+ LLVM_DEBUG(dbgs() << " Attempting promotion to: " << *VectorTy << "\n");
+
Type *VecEltTy = VectorTy->getElementType();
- unsigned ElementSize = DL.getTypeSizeInBits(VecEltTy) / 8;
+ unsigned ElementSize = DL->getTypeSizeInBits(VecEltTy) / 8;
while (!Uses.empty()) {
Use *U = Uses.pop_back_val();
Instruction *Inst = cast<Instruction>(U->getUser());
@@ -441,22 +668,29 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
// This is a store of the pointer, not to the pointer.
if (isa<StoreInst>(Inst) &&
U->getOperandNo() != StoreInst::getPointerOperandIndex())
- return false;
+ return RejectUser(Inst, "pointer is being stored");
Type *AccessTy = getLoadStoreType(Inst);
+ if (AccessTy->isAggregateType())
+ return RejectUser(Inst, "unsupported load/store as aggregate");
+ assert(!AccessTy->isAggregateType() || AccessTy->isArrayTy());
+
Ptr = Ptr->stripPointerCasts();
- // Alloca already accessed as vector, leave alone.
- if (Ptr == Alloca && DL.getTypeStoreSize(Alloca->getAllocatedType()) ==
- DL.getTypeStoreSize(AccessTy))
+ // Alloca already accessed as vector.
+ if (Ptr == &Alloca && DL->getTypeStoreSize(Alloca.getAllocatedType()) ==
+ DL->getTypeStoreSize(AccessTy)) {
+ WorkList.push_back(Inst);
continue;
+ }
// Check that this is a simple access of a vector element.
bool IsSimple = isa<LoadInst>(Inst) ? cast<LoadInst>(Inst)->isSimple()
: cast<StoreInst>(Inst)->isSimple();
- if (!IsSimple ||
- !CastInst::isBitOrNoopPointerCastable(VecEltTy, AccessTy, DL))
- return false;
+ if (!IsSimple)
+ return RejectUser(Inst, "not a simple load or store");
+ if (!isSupportedAccessType(VectorTy, AccessTy, *DL))
+ return RejectUser(Inst, "not a supported access type");
WorkList.push_back(Inst);
continue;
@@ -466,32 +700,38 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
// Look through bitcasts.
for (Use &U : Inst->uses())
Uses.push_back(&U);
+ UsersToRemove.push_back(Inst);
continue;
}
if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
// If we can't compute a vector index from this GEP, then we can't
// promote this alloca to vector.
- Value *Index = GEPToVectorIndex(GEP, Alloca, VecEltTy, DL);
- if (!Index) {
- LLVM_DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP
- << '\n');
- return false;
- }
+ Value *Index = GEPToVectorIndex(GEP, &Alloca, VecEltTy, *DL);
+ if (!Index)
+ return RejectUser(Inst, "cannot compute vector index for GEP");
GEPVectorIdx[GEP] = Index;
for (Use &U : Inst->uses())
Uses.push_back(&U);
+ UsersToRemove.push_back(Inst);
+ continue;
+ }
+
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst);
+ MSI && isSupportedMemset(MSI, &Alloca, *DL)) {
+ WorkList.push_back(Inst);
continue;
}
if (MemTransferInst *TransferInst = dyn_cast<MemTransferInst>(Inst)) {
if (TransferInst->isVolatile())
- return false;
+ return RejectUser(Inst, "mem transfer inst is volatile");
ConstantInt *Len = dyn_cast<ConstantInt>(TransferInst->getLength());
- if (!Len || !!(Len->getZExtValue() % ElementSize))
- return false;
+ if (!Len || (Len->getZExtValue() % ElementSize))
+ return RejectUser(Inst, "mem transfer inst length is non-constant or "
+ "not a multiple of the vector element size");
if (!TransferInfo.count(TransferInst)) {
DeferredInsts.push_back(Inst);
@@ -501,7 +741,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
auto getPointerIndexOfAlloca = [&](Value *Ptr) -> ConstantInt * {
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
- if (Ptr != Alloca && !GEPVectorIdx.count(GEP))
+ if (Ptr != &Alloca && !GEPVectorIdx.count(GEP))
return nullptr;
return dyn_cast<ConstantInt>(calculateVectorIndex(Ptr, GEPVectorIdx));
@@ -513,30 +753,33 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
Value *Dest = TransferInst->getDest();
ConstantInt *Index = getPointerIndexOfAlloca(Dest);
if (!Index)
- return false;
+ return RejectUser(Inst, "could not calculate constant dest index");
TI->DestIndex = Index;
} else {
assert(OpNum == 1);
Value *Src = TransferInst->getSource();
ConstantInt *Index = getPointerIndexOfAlloca(Src);
if (!Index)
- return false;
+ return RejectUser(Inst, "could not calculate constant src index");
TI->SrcIndex = Index;
}
continue;
}
// Ignore assume-like intrinsics and comparisons used in assumes.
- if (isAssumeLikeIntrinsic(Inst))
+ if (isAssumeLikeIntrinsic(Inst)) {
+ UsersToRemove.push_back(Inst);
continue;
+ }
if (isa<ICmpInst>(Inst) && all_of(Inst->users(), [](User *U) {
return isAssumeLikeIntrinsic(cast<Instruction>(U));
- }))
+ })) {
+ UsersToRemove.push_back(Inst);
continue;
+ }
- // Unknown user.
- return false;
+ return RejectUser(Inst, "unhandled alloca user");
}
while (!DeferredInsts.empty()) {
@@ -546,82 +789,194 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
// from different address spaces.
MemTransferInfo &Info = TransferInfo[TransferInst];
if (!Info.SrcIndex || !Info.DestIndex)
- return false;
+ return RejectUser(
+ Inst, "mem transfer inst is missing constant src and/or dst index");
}
LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "
<< *VectorTy << '\n');
+ const unsigned VecStoreSize = DL->getTypeStoreSize(VectorTy);
+
+ // Alloca is uninitialized memory. Imitate that by making the first value
+ // undef.
+ SSAUpdater Updater;
+ Updater.Initialize(VectorTy, "promotealloca");
+ Updater.AddAvailableValue(Alloca.getParent(), UndefValue::get(VectorTy));
+
+ // First handle the initial worklist.
+ SmallVector<LoadInst *, 4> DeferredLoads;
+ forEachWorkListItem(WorkList, [&](Instruction *I) {
+ BasicBlock *BB = I->getParent();
+ // On the first pass, we only take values that are trivially known, i.e.
+ // where AddAvailableValue was already called in this block.
+ Value *Result = promoteAllocaUserToVector(
+ I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
+ Updater.FindValueForBlock(BB), DeferredLoads);
+ if (Result)
+ Updater.AddAvailableValue(BB, Result);
+ });
+
+ // Then handle deferred loads.
+ forEachWorkListItem(DeferredLoads, [&](Instruction *I) {
+ SmallVector<LoadInst *, 0> NewDLs;
+ BasicBlock *BB = I->getParent();
+ // On the second pass, we use GetValueInMiddleOfBlock to guarantee we always
+ // get a value, inserting PHIs as needed.
+ Value *Result = promoteAllocaUserToVector(
+ I, *DL, VectorTy, VecStoreSize, ElementSize, TransferInfo, GEPVectorIdx,
+ Updater.GetValueInMiddleOfBlock(I->getParent()), NewDLs);
+ if (Result)
+ Updater.AddAvailableValue(BB, Result);
+ assert(NewDLs.empty() && "No more deferred loads should be queued!");
+ });
+
+ // Delete all instructions. On the first pass, new dummy loads may have been
+ // added so we need to collect them too.
+ DenseSet<Instruction *> InstsToDelete(WorkList.begin(), WorkList.end());
+ InstsToDelete.insert(DeferredLoads.begin(), DeferredLoads.end());
+ for (Instruction *I : InstsToDelete) {
+ assert(I->use_empty());
+ I->eraseFromParent();
+ }
- for (Instruction *Inst : WorkList) {
- IRBuilder<> Builder(Inst);
- switch (Inst->getOpcode()) {
- case Instruction::Load: {
- Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
- Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
- Type *VecPtrTy = VectorTy->getPointerTo(Alloca->getAddressSpace());
- Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
- Value *VecValue =
- Builder.CreateAlignedLoad(VectorTy, BitCast, Alloca->getAlign());
- Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
- if (Inst->getType() != VecEltTy)
- ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, Inst->getType());
- Inst->replaceAllUsesWith(ExtractElement);
- Inst->eraseFromParent();
- break;
- }
- case Instruction::Store: {
- StoreInst *SI = cast<StoreInst>(Inst);
- Value *Ptr = SI->getPointerOperand();
- Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
- Type *VecPtrTy = VectorTy->getPointerTo(Alloca->getAddressSpace());
- Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
- Value *VecValue =
- Builder.CreateAlignedLoad(VectorTy, BitCast, Alloca->getAlign());
- Value *Elt = SI->getValueOperand();
- if (Elt->getType() != VecEltTy)
- Elt = Builder.CreateBitOrPointerCast(Elt, VecEltTy);
- Value *NewVecValue = Builder.CreateInsertElement(VecValue, Elt, Index);
- Builder.CreateAlignedStore(NewVecValue, BitCast, Alloca->getAlign());
- Inst->eraseFromParent();
- break;
- }
- case Instruction::Call: {
- if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst)) {
- ConstantInt *Length = cast<ConstantInt>(MTI->getLength());
- unsigned NumCopied = Length->getZExtValue() / ElementSize;
- MemTransferInfo *TI = &TransferInfo[cast<MemTransferInst>(Inst)];
- unsigned SrcBegin = TI->SrcIndex->getZExtValue();
- unsigned DestBegin = TI->DestIndex->getZExtValue();
-
- SmallVector<int> Mask;
- for (unsigned Idx = 0; Idx < VectorTy->getNumElements(); ++Idx) {
- if (Idx >= DestBegin && Idx < DestBegin + NumCopied) {
- Mask.push_back(SrcBegin++);
- } else {
- Mask.push_back(Idx);
- }
- }
- Type *VecPtrTy = VectorTy->getPointerTo(Alloca->getAddressSpace());
- Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
- Value *VecValue =
- Builder.CreateAlignedLoad(VectorTy, BitCast, Alloca->getAlign());
- Value *NewVecValue = Builder.CreateShuffleVector(VecValue, Mask);
- Builder.CreateAlignedStore(NewVecValue, BitCast, Alloca->getAlign());
-
- Inst->eraseFromParent();
- } else {
- llvm_unreachable("Unsupported call when promoting alloca to vector");
- }
- break;
- }
-
- default:
- llvm_unreachable("Inconsistency in instructions promotable to vector");
- }
+ // Delete all the users that are known to be removeable.
+ for (Instruction *I : reverse(UsersToRemove)) {
+ I->dropDroppableUses();
+ assert(I->use_empty());
+ I->eraseFromParent();
}
+
+ // Alloca should now be dead too.
+ assert(Alloca.use_empty());
+ Alloca.eraseFromParent();
return true;
}
+std::pair<Value *, Value *>
+AMDGPUPromoteAllocaImpl::getLocalSizeYZ(IRBuilder<> &Builder) {
+ Function &F = *Builder.GetInsertBlock()->getParent();
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
+
+ if (!IsAMDHSA) {
+ Function *LocalSizeYFn =
+ Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_y);
+ Function *LocalSizeZFn =
+ Intrinsic::getDeclaration(Mod, Intrinsic::r600_read_local_size_z);
+
+ CallInst *LocalSizeY = Builder.CreateCall(LocalSizeYFn, {});
+ CallInst *LocalSizeZ = Builder.CreateCall(LocalSizeZFn, {});
+
+ ST.makeLIDRangeMetadata(LocalSizeY);
+ ST.makeLIDRangeMetadata(LocalSizeZ);
+
+ return std::pair(LocalSizeY, LocalSizeZ);
+ }
+
+ // We must read the size out of the dispatch pointer.
+ assert(IsAMDGCN);
+
+ // We are indexing into this struct, and want to extract the workgroup_size_*
+ // fields.
+ //
+ // typedef struct hsa_kernel_dispatch_packet_s {
+ // uint16_t header;
+ // uint16_t setup;
+ // uint16_t workgroup_size_x ;
+ // uint16_t workgroup_size_y;
+ // uint16_t workgroup_size_z;
+ // uint16_t reserved0;
+ // uint32_t grid_size_x ;
+ // uint32_t grid_size_y ;
+ // uint32_t grid_size_z;
+ //
+ // uint32_t private_segment_size;
+ // uint32_t group_segment_size;
+ // uint64_t kernel_object;
+ //
+ // #ifdef HSA_LARGE_MODEL
+ // void *kernarg_address;
+ // #elif defined HSA_LITTLE_ENDIAN
+ // void *kernarg_address;
+ // uint32_t reserved1;
+ // #else
+ // uint32_t reserved1;
+ // void *kernarg_address;
+ // #endif
+ // uint64_t reserved2;
+ // hsa_signal_t completion_signal; // uint64_t wrapper
+ // } hsa_kernel_dispatch_packet_t
+ //
+ Function *DispatchPtrFn =
+ Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_dispatch_ptr);
+
+ CallInst *DispatchPtr = Builder.CreateCall(DispatchPtrFn, {});
+ DispatchPtr->addRetAttr(Attribute::NoAlias);
+ DispatchPtr->addRetAttr(Attribute::NonNull);
+ F.removeFnAttr("amdgpu-no-dispatch-ptr");
+
+ // Size of the dispatch packet struct.
+ DispatchPtr->addDereferenceableRetAttr(64);
+
+ Type *I32Ty = Type::getInt32Ty(Mod->getContext());
+ Value *CastDispatchPtr = Builder.CreateBitCast(
+ DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS));
+
+ // We could do a single 64-bit load here, but it's likely that the basic
+ // 32-bit and extract sequence is already present, and it is probably easier
+ // to CSE this. The loads should be mergeable later anyway.
+ Value *GEPXY = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 1);
+ LoadInst *LoadXY = Builder.CreateAlignedLoad(I32Ty, GEPXY, Align(4));
+
+ Value *GEPZU = Builder.CreateConstInBoundsGEP1_64(I32Ty, CastDispatchPtr, 2);
+ LoadInst *LoadZU = Builder.CreateAlignedLoad(I32Ty, GEPZU, Align(4));
+
+ MDNode *MD = MDNode::get(Mod->getContext(), std::nullopt);
+ LoadXY->setMetadata(LLVMContext::MD_invariant_load, MD);
+ LoadZU->setMetadata(LLVMContext::MD_invariant_load, MD);
+ ST.makeLIDRangeMetadata(LoadZU);
+
+ // Extract y component. Upper half of LoadZU should be zero already.
+ Value *Y = Builder.CreateLShr(LoadXY, 16);
+
+ return std::pair(Y, LoadZU);
+}
+
+Value *AMDGPUPromoteAllocaImpl::getWorkitemID(IRBuilder<> &Builder,
+ unsigned N) {
+ Function *F = Builder.GetInsertBlock()->getParent();
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, *F);
+ Intrinsic::ID IntrID = Intrinsic::not_intrinsic;
+ StringRef AttrName;
+
+ switch (N) {
+ case 0:
+ IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_x
+ : (Intrinsic::ID)Intrinsic::r600_read_tidig_x;
+ AttrName = "amdgpu-no-workitem-id-x";
+ break;
+ case 1:
+ IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_y
+ : (Intrinsic::ID)Intrinsic::r600_read_tidig_y;
+ AttrName = "amdgpu-no-workitem-id-y";
+ break;
+
+ case 2:
+ IntrID = IsAMDGCN ? (Intrinsic::ID)Intrinsic::amdgcn_workitem_id_z
+ : (Intrinsic::ID)Intrinsic::r600_read_tidig_z;
+ AttrName = "amdgpu-no-workitem-id-z";
+ break;
+ default:
+ llvm_unreachable("invalid dimension");
+ }
+
+ Function *WorkitemIdFn = Intrinsic::getDeclaration(Mod, IntrID);
+ CallInst *CI = Builder.CreateCall(WorkitemIdFn);
+ ST.makeLIDRangeMetadata(CI);
+ F->removeFnAttr(AttrName);
+
+ return CI;
+}
+
static bool isCallPromotable(CallInst *CI) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (!II)
@@ -883,8 +1238,8 @@ bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) {
CurrentLocalMemUsage += Alloc.first;
}
- unsigned MaxOccupancy = ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage,
- F);
+ unsigned MaxOccupancy =
+ ST.getOccupancyWithLocalMemSize(CurrentLocalMemUsage, F);
// Restrict local memory usage so that we don't drastically reduce occupancy,
// unless it is already significantly reduced.
@@ -902,10 +1257,9 @@ bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) {
// usage.
MaxOccupancy = std::min(OccupancyHint, MaxOccupancy);
-
// Round up to the next tier of usage.
- unsigned MaxSizeWithWaveCount
- = ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);
+ unsigned MaxSizeWithWaveCount =
+ ST.getMaxLocalMemSizeWithWaveCount(MaxOccupancy, F);
// Program is possibly broken by using more local mem than available.
if (CurrentLocalMemUsage > MaxSizeWithWaveCount)
@@ -924,26 +1278,18 @@ bool AMDGPUPromoteAllocaImpl::hasSufficientLocalMem(const Function &F) {
}
// FIXME: Should try to pick the most likely to be profitable allocas first.
-bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
- // Array allocations are probably not worth handling, since an allocation of
- // the array type is the canonical form.
- if (!I.isStaticAlloca() || I.isArrayAllocation())
+bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToLDS(AllocaInst &I,
+ bool SufficientLDS) {
+ LLVM_DEBUG(dbgs() << "Trying to promote to LDS: " << I << '\n');
+
+ if (DisablePromoteAllocaToLDS) {
+ LLVM_DEBUG(dbgs() << " Promote alloca to LDS is disabled\n");
return false;
+ }
const DataLayout &DL = Mod->getDataLayout();
IRBuilder<> Builder(&I);
- // First try to replace the alloca with a vector
- Type *AllocaTy = I.getAllocatedType();
-
- LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
-
- if (tryPromoteAllocaToVector(&I, DL, MaxVGPRs))
- return true; // Promoted to vector.
-
- if (DisablePromoteAllocaToLDS)
- return false;
-
const Function &ContainingFunction = *I.getParent()->getParent();
CallingConv::ID CC = ContainingFunction.getCallingConv();
@@ -978,7 +1324,8 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
// could end up using more than the maximum due to alignment padding.
uint32_t NewSize = alignTo(CurrentLocalMemUsage, Alignment);
- uint32_t AllocSize = WorkGroupSize * DL.getTypeAllocSize(AllocaTy);
+ uint32_t AllocSize =
+ WorkGroupSize * DL.getTypeAllocSize(I.getAllocatedType());
NewSize += AllocSize;
if (NewSize > LocalMemLimit) {
@@ -989,7 +1336,7 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
CurrentLocalMemUsage = NewSize;
- std::vector<Value*> WorkList;
+ std::vector<Value *> WorkList;
if (!collectUsesWithPtrTypes(&I, &I, WorkList)) {
LLVM_DEBUG(dbgs() << " Do not know how to convert all uses\n");
@@ -1021,10 +1368,8 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
Value *TID = Builder.CreateAdd(Tmp0, Tmp1);
TID = Builder.CreateAdd(TID, TIdZ);
- Value *Indices[] = {
- Constant::getNullValue(Type::getInt32Ty(Mod->getContext())),
- TID
- };
+ LLVMContext &Context = Mod->getContext();
+ Value *Indices[] = {Constant::getNullValue(Type::getInt32Ty(Context)), TID};
Value *Offset = Builder.CreateInBoundsGEP(GVTy, GV, Indices);
I.mutateType(Offset->getType());
@@ -1037,9 +1382,7 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
CallInst *Call = dyn_cast<CallInst>(V);
if (!Call) {
if (ICmpInst *CI = dyn_cast<ICmpInst>(V)) {
- Value *Src0 = CI->getOperand(0);
- PointerType *NewTy = PointerType::getWithSamePointeeType(
- cast<PointerType>(Src0->getType()), AMDGPUAS::LOCAL_ADDRESS);
+ PointerType *NewTy = PointerType::get(Context, AMDGPUAS::LOCAL_ADDRESS);
if (isa<ConstantPointerNull>(CI->getOperand(0)))
CI->setOperand(0, ConstantPointerNull::get(NewTy));
@@ -1055,8 +1398,7 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
if (isa<AddrSpaceCastInst>(V))
continue;
- PointerType *NewTy = PointerType::getWithSamePointeeType(
- cast<PointerType>(V->getType()), AMDGPUAS::LOCAL_ADDRESS);
+ PointerType *NewTy = PointerType::get(Context, AMDGPUAS::LOCAL_ADDRESS);
// FIXME: It doesn't really make sense to try to do this for all
// instructions.
@@ -1116,8 +1458,7 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
Function *ObjectSize = Intrinsic::getDeclaration(
Mod, Intrinsic::objectsize,
{Intr->getType(),
- PointerType::getWithSamePointeeType(
- cast<PointerType>(Src->getType()), AMDGPUAS::LOCAL_ADDRESS)});
+ PointerType::get(Context, AMDGPUAS::LOCAL_ADDRESS)});
CallInst *NewCall = Builder.CreateCall(
ObjectSize,
@@ -1138,10 +1479,9 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
assert(ID == Intrinsic::memcpy || ID == Intrinsic::memmove);
MemTransferInst *MI = cast<MemTransferInst>(Intr);
- auto *B =
- Builder.CreateMemTransferInst(ID, MI->getRawDest(), MI->getDestAlign(),
- MI->getRawSource(), MI->getSourceAlign(),
- MI->getLength(), MI->isVolatile());
+ auto *B = Builder.CreateMemTransferInst(
+ ID, MI->getRawDest(), MI->getDestAlign(), MI->getRawSource(),
+ MI->getSourceAlign(), MI->getLength(), MI->isVolatile());
for (unsigned I = 0; I != 2; ++I) {
if (uint64_t Bytes = Intr->getParamDereferenceableBytes(I)) {
@@ -1154,80 +1494,3 @@ bool AMDGPUPromoteAllocaImpl::handleAlloca(AllocaInst &I, bool SufficientLDS) {
return true;
}
-
-bool handlePromoteAllocaToVector(AllocaInst &I, unsigned MaxVGPRs) {
- // Array allocations are probably not worth handling, since an allocation of
- // the array type is the canonical form.
- if (!I.isStaticAlloca() || I.isArrayAllocation())
- return false;
-
- LLVM_DEBUG(dbgs() << "Trying to promote " << I << '\n');
-
- Module *Mod = I.getParent()->getParent()->getParent();
- return tryPromoteAllocaToVector(&I, Mod->getDataLayout(), MaxVGPRs);
-}
-
-bool promoteAllocasToVector(Function &F, TargetMachine &TM) {
- if (DisablePromoteAllocaToVector)
- return false;
-
- const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
- if (!ST.isPromoteAllocaEnabled())
- return false;
-
- unsigned MaxVGPRs;
- if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
- MaxVGPRs = ST.getMaxNumVGPRs(ST.getWavesPerEU(F).first);
- // A non-entry function has only 32 caller preserved registers.
- // Do not promote alloca which will force spilling.
- if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
- MaxVGPRs = std::min(MaxVGPRs, 32u);
- } else {
- MaxVGPRs = 128;
- }
-
- bool Changed = false;
- BasicBlock &EntryBB = *F.begin();
-
- SmallVector<AllocaInst *, 16> Allocas;
- for (Instruction &I : EntryBB) {
- if (AllocaInst *AI = dyn_cast<AllocaInst>(&I))
- Allocas.push_back(AI);
- }
-
- for (AllocaInst *AI : Allocas) {
- if (handlePromoteAllocaToVector(*AI, MaxVGPRs))
- Changed = true;
- }
-
- return Changed;
-}
-
-bool AMDGPUPromoteAllocaToVector::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
- if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
- return promoteAllocasToVector(F, TPC->getTM<TargetMachine>());
- }
- return false;
-}
-
-PreservedAnalyses
-AMDGPUPromoteAllocaToVectorPass::run(Function &F, FunctionAnalysisManager &AM) {
- bool Changed = promoteAllocasToVector(F, TM);
- if (Changed) {
- PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
- return PA;
- }
- return PreservedAnalyses::all();
-}
-
-FunctionPass *llvm::createAMDGPUPromoteAlloca() {
- return new AMDGPUPromoteAlloca();
-}
-
-FunctionPass *llvm::createAMDGPUPromoteAllocaToVector() {
- return new AMDGPUPromoteAllocaToVector();
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
index ed450f59e4b3..9b654a2bba7f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp
@@ -116,7 +116,7 @@ bool AMDGPUPromoteKernelArguments::promotePointer(Value *Ptr) {
// Cast pointer to global address space and back to flat and let
// Infer Address Spaces pass to do all necessary rewriting.
PointerType *NewPT =
- PointerType::getWithSamePointeeType(PT, AMDGPUAS::GLOBAL_ADDRESS);
+ PointerType::get(PT->getContext(), AMDGPUAS::GLOBAL_ADDRESS);
Value *Cast =
B.CreateAddrSpaceCast(Ptr, NewPT, Twine(Ptr->getName(), ".global"));
Value *CastBack =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
deleted file mode 100644
index 5a4ab467731e..000000000000
--- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
+++ /dev/null
@@ -1,426 +0,0 @@
-//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// \brief This pass propagates attributes from kernels to the non-entry
-/// functions. Most of the library functions were not compiled for specific ABI,
-/// yet will be correctly compiled if proper attributes are propagated from the
-/// caller.
-///
-/// The pass analyzes call graph and propagates ABI target features through the
-/// call graph.
-///
-/// It can run in two modes: as a function or module pass. A function pass
-/// simply propagates attributes. A module pass clones functions if there are
-/// callers with different ABI. If a function is cloned all call sites will
-/// be updated to use a correct clone.
-///
-/// A function pass is limited in functionality but can run early in the
-/// pipeline. A module pass is more powerful but has to run late, so misses
-/// library folding opportunities.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/IR/InstrTypes.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-
-#define DEBUG_TYPE "amdgpu-propagate-attributes"
-
-using namespace llvm;
-
-namespace llvm {
-extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
-}
-
-namespace {
-
-// Target features to propagate.
-static constexpr const FeatureBitset TargetFeatures = {
- AMDGPU::FeatureWavefrontSize16,
- AMDGPU::FeatureWavefrontSize32,
- AMDGPU::FeatureWavefrontSize64
-};
-
-// Attributes to propagate.
-// TODO: Support conservative min/max merging instead of cloning.
-static constexpr const char *AttributeNames[] = {"amdgpu-waves-per-eu"};
-
-static constexpr unsigned NumAttr = std::size(AttributeNames);
-
-class AMDGPUPropagateAttributes {
-
- class FnProperties {
- private:
- explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
-
- public:
- explicit FnProperties(const TargetMachine &TM, const Function &F) {
- Features = TM.getSubtargetImpl(F)->getFeatureBits();
-
- for (unsigned I = 0; I < NumAttr; ++I)
- if (F.hasFnAttribute(AttributeNames[I]))
- Attributes[I] = F.getFnAttribute(AttributeNames[I]);
- }
-
- bool operator == (const FnProperties &Other) const {
- if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
- return false;
- for (unsigned I = 0; I < NumAttr; ++I)
- if (Attributes[I] != Other.Attributes[I])
- return false;
- return true;
- }
-
- FnProperties adjustToCaller(const FnProperties &CallerProps) const {
- FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
- for (unsigned I = 0; I < NumAttr; ++I)
- New.Attributes[I] = CallerProps.Attributes[I];
- return New;
- }
-
- FeatureBitset Features;
- std::optional<Attribute> Attributes[NumAttr];
- };
-
- class Clone {
- public:
- Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
- Properties(Props), OrigF(OrigF), NewF(NewF) {}
-
- FnProperties Properties;
- Function *OrigF;
- Function *NewF;
- };
-
- const TargetMachine *TM;
-
- // Clone functions as needed or just set attributes.
- bool AllowClone;
-
- // Option propagation roots.
- SmallSet<Function *, 32> Roots;
-
- // Clones of functions with their attributes.
- SmallVector<Clone, 32> Clones;
-
- // Find a clone with required features.
- Function *findFunction(const FnProperties &PropsNeeded,
- Function *OrigF);
-
- // Clone function \p F and set \p NewProps on the clone.
- // Cole takes the name of original function.
- Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
-
- // Set new function's features in place.
- void setFeatures(Function &F, const FeatureBitset &NewFeatures);
-
- // Set new function's attributes in place.
- void setAttributes(Function &F,
- const ArrayRef<std::optional<Attribute>> NewAttrs);
-
- std::string getFeatureString(const FeatureBitset &Features) const;
-
- // Propagate attributes from Roots.
- bool process();
-
-public:
- AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
- TM(TM), AllowClone(AllowClone) {}
-
- // Use F as a root and propagate its attributes.
- bool process(Function &F);
-
- // Propagate attributes starting from kernel functions.
- bool process(Module &M);
-};
-
-// Allows to propagate attributes early, but no cloning is allowed as it must
-// be a function pass to run before any optimizations.
-// TODO: We shall only need a one instance of module pass, but that needs to be
-// in the linker pipeline which is currently not possible.
-class AMDGPUPropagateAttributesEarly : public FunctionPass {
- const TargetMachine *TM;
-
-public:
- static char ID; // Pass identification
-
- AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
- FunctionPass(ID), TM(TM) {
- initializeAMDGPUPropagateAttributesEarlyPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-};
-
-// Allows to propagate attributes with cloning but does that late in the
-// pipeline.
-class AMDGPUPropagateAttributesLate : public ModulePass {
- const TargetMachine *TM;
-
-public:
- static char ID; // Pass identification
-
- AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
- ModulePass(ID), TM(TM) {
- initializeAMDGPUPropagateAttributesLatePass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
-};
-
-} // end anonymous namespace.
-
-char AMDGPUPropagateAttributesEarly::ID = 0;
-char AMDGPUPropagateAttributesLate::ID = 0;
-
-INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
- "amdgpu-propagate-attributes-early",
- "Early propagate attributes from kernels to functions",
- false, false)
-INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
- "amdgpu-propagate-attributes-late",
- "Late propagate attributes from kernels to functions",
- false, false)
-
-Function *
-AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
- Function *OrigF) {
- // TODO: search for clone's clones.
- for (Clone &C : Clones)
- if (C.OrigF == OrigF && PropsNeeded == C.Properties)
- return C.NewF;
-
- return nullptr;
-}
-
-bool AMDGPUPropagateAttributes::process(Module &M) {
- for (auto &F : M.functions())
- if (AMDGPU::isKernel(F.getCallingConv()))
- Roots.insert(&F);
-
- return Roots.empty() ? false : process();
-}
-
-bool AMDGPUPropagateAttributes::process(Function &F) {
- Roots.insert(&F);
- return process();
-}
-
-bool AMDGPUPropagateAttributes::process() {
- bool Changed = false;
- SmallSet<Function *, 32> NewRoots;
- SmallSet<Function *, 32> Replaced;
-
- assert(!Roots.empty());
- Module &M = *(*Roots.begin())->getParent();
-
- do {
- Roots.insert(NewRoots.begin(), NewRoots.end());
- NewRoots.clear();
-
- for (auto &F : M.functions()) {
- if (F.isDeclaration())
- continue;
-
- const FnProperties CalleeProps(*TM, F);
- SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
- SmallSet<CallBase *, 32> Visited;
-
- for (User *U : F.users()) {
- Instruction *I = dyn_cast<Instruction>(U);
- if (!I)
- continue;
- CallBase *CI = dyn_cast<CallBase>(I);
- // Only propagate attributes if F is the called function. Specifically,
- // do not propagate attributes if F is passed as an argument.
- // FIXME: handle bitcasted callee, e.g.
- // %retval = call i8* bitcast (i32* ()* @f to i8* ()*)()
- if (!CI || CI->getCalledOperand() != &F)
- continue;
- Function *Caller = CI->getCaller();
- if (!Caller || !Visited.insert(CI).second)
- continue;
- if (!Roots.count(Caller) && !NewRoots.count(Caller))
- continue;
-
- const FnProperties CallerProps(*TM, *Caller);
-
- if (CalleeProps == CallerProps) {
- if (!Roots.count(&F))
- NewRoots.insert(&F);
- continue;
- }
-
- Function *NewF = findFunction(CallerProps, &F);
- if (!NewF) {
- const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
- if (!AllowClone) {
- // This may set different features on different iterations if
- // there is a contradiction in callers' attributes. In this case
- // we rely on a second pass running on Module, which is allowed
- // to clone.
- setFeatures(F, NewProps.Features);
- setAttributes(F, NewProps.Attributes);
- NewRoots.insert(&F);
- Changed = true;
- break;
- }
-
- NewF = cloneWithProperties(F, NewProps);
- Clones.push_back(Clone(CallerProps, &F, NewF));
- NewRoots.insert(NewF);
- }
-
- ToReplace.push_back(std::pair(CI, NewF));
- Replaced.insert(&F);
-
- Changed = true;
- }
-
- while (!ToReplace.empty()) {
- auto R = ToReplace.pop_back_val();
- R.first->setCalledFunction(R.second);
- }
- }
- } while (!NewRoots.empty());
-
- for (Function *F : Replaced) {
- if (F->use_empty())
- F->eraseFromParent();
- }
-
- Roots.clear();
- Clones.clear();
-
- return Changed;
-}
-
-Function *
-AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
- const FnProperties &NewProps) {
- LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
-
- ValueToValueMapTy dummy;
- Function *NewF = CloneFunction(&F, dummy);
- setFeatures(*NewF, NewProps.Features);
- setAttributes(*NewF, NewProps.Attributes);
- NewF->setVisibility(GlobalValue::DefaultVisibility);
- NewF->setLinkage(GlobalValue::InternalLinkage);
-
- // Swap names. If that is the only clone it will retain the name of now
- // dead value. Preserve original name for externally visible functions.
- if (F.hasName() && F.hasLocalLinkage()) {
- std::string NewName = std::string(NewF->getName());
- NewF->takeName(&F);
- F.setName(NewName);
- }
-
- return NewF;
-}
-
-void AMDGPUPropagateAttributes::setFeatures(Function &F,
- const FeatureBitset &NewFeatures) {
- std::string NewFeatureStr = getFeatureString(NewFeatures);
-
- LLVM_DEBUG(dbgs() << "Set features "
- << getFeatureString(NewFeatures & TargetFeatures)
- << " on " << F.getName() << '\n');
-
- F.removeFnAttr("target-features");
- F.addFnAttr("target-features", NewFeatureStr);
-}
-
-void AMDGPUPropagateAttributes::setAttributes(
- Function &F, const ArrayRef<std::optional<Attribute>> NewAttrs) {
- LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
- for (unsigned I = 0; I < NumAttr; ++I) {
- F.removeFnAttr(AttributeNames[I]);
- if (NewAttrs[I]) {
- LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
- F.addFnAttr(*NewAttrs[I]);
- }
- }
-}
-
-std::string
-AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
-{
- std::string Ret;
- for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
- if (Features[KV.Value])
- Ret += (StringRef("+") + KV.Key + ",").str();
- else if (TargetFeatures[KV.Value])
- Ret += (StringRef("-") + KV.Key + ",").str();
- }
- Ret.pop_back(); // Remove last comma.
- return Ret;
-}
-
-bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
- if (!TM) {
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
- return false;
-
- TM = &TPC->getTM<TargetMachine>();
- }
-
- if (!AMDGPU::isKernel(F.getCallingConv()))
- return false;
-
- return AMDGPUPropagateAttributes(TM, false).process(F);
-}
-
-bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
- if (!TM) {
- auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
- if (!TPC)
- return false;
-
- TM = &TPC->getTM<TargetMachine>();
- }
-
- return AMDGPUPropagateAttributes(TM, true).process(M);
-}
-
-FunctionPass
-*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
- return new AMDGPUPropagateAttributesEarly(TM);
-}
-
-ModulePass
-*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
- return new AMDGPUPropagateAttributesLate(TM);
-}
-
-PreservedAnalyses
-AMDGPUPropagateAttributesEarlyPass::run(Function &F,
- FunctionAnalysisManager &AM) {
- if (!AMDGPU::isEntryFunctionCC(F.getCallingConv()))
- return PreservedAnalyses::all();
-
- return AMDGPUPropagateAttributes(&TM, false).process(F)
- ? PreservedAnalyses::none()
- : PreservedAnalyses::all();
-}
-
-PreservedAnalyses
-AMDGPUPropagateAttributesLatePass::run(Module &M, ModuleAnalysisManager &AM) {
- return AMDGPUPropagateAttributes(&TM, true).process(M)
- ? PreservedAnalyses::none()
- : PreservedAnalyses::all();
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
index b4315950b225..c935e384da8e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp
@@ -20,37 +20,55 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/Target/TargetMachine.h"
+
+#define GET_GICOMBINER_DEPS
+#include "AMDGPUGenPreLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_DEPS
+
#define DEBUG_TYPE "amdgpu-regbank-combiner"
using namespace llvm;
using namespace MIPatternMatch;
-class AMDGPURegBankCombinerHelper {
+namespace {
+#define GET_GICOMBINER_TYPES
+#include "AMDGPUGenRegBankGICombiner.inc"
+#undef GET_GICOMBINER_TYPES
+
+class AMDGPURegBankCombinerImpl : public GIMatchTableExecutor {
protected:
+ const AMDGPURegBankCombinerImplRuleConfig &RuleConfig;
+
MachineIRBuilder &B;
MachineFunction &MF;
MachineRegisterInfo &MRI;
- const GCNSubtarget &Subtarget;
+ const GCNSubtarget &STI;
const RegisterBankInfo &RBI;
const TargetRegisterInfo &TRI;
const SIInstrInfo &TII;
CombinerHelper &Helper;
+ GISelChangeObserver &Observer;
public:
- AMDGPURegBankCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
- : B(B), MF(B.getMF()), MRI(*B.getMRI()),
- Subtarget(MF.getSubtarget<GCNSubtarget>()),
- RBI(*Subtarget.getRegBankInfo()), TRI(*Subtarget.getRegisterInfo()),
- TII(*Subtarget.getInstrInfo()), Helper(Helper){};
+ AMDGPURegBankCombinerImpl(
+ const AMDGPURegBankCombinerImplRuleConfig &RuleConfig,
+ MachineIRBuilder &B, CombinerHelper &Helper,
+ GISelChangeObserver &Observer);
+
+ static const char *getName() { return "AMDGPURegBankCombinerImpl"; }
- bool isVgprRegBank(Register Reg);
- Register getAsVgpr(Register Reg);
+ bool tryCombineAll(MachineInstr &I) const;
+
+ bool isVgprRegBank(Register Reg) const;
+ Register getAsVgpr(Register Reg) const;
struct MinMaxMedOpc {
unsigned Min, Max, Med;
@@ -61,33 +79,58 @@ public:
Register Val0, Val1, Val2;
};
- MinMaxMedOpc getMinMaxPair(unsigned Opc);
+ MinMaxMedOpc getMinMaxPair(unsigned Opc) const;
template <class m_Cst, typename CstTy>
bool matchMed(MachineInstr &MI, MachineRegisterInfo &MRI, MinMaxMedOpc MMMOpc,
- Register &Val, CstTy &K0, CstTy &K1);
+ Register &Val, CstTy &K0, CstTy &K1) const;
- bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
- bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
- bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg);
- bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg);
- void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo);
- void applyClamp(MachineInstr &MI, Register &Reg);
+ bool matchIntMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
+ bool matchFPMinMaxToMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
+ bool matchFPMinMaxToClamp(MachineInstr &MI, Register &Reg) const;
+ bool matchFPMed3ToClamp(MachineInstr &MI, Register &Reg) const;
+ void applyMed3(MachineInstr &MI, Med3MatchInfo &MatchInfo) const;
+ void applyClamp(MachineInstr &MI, Register &Reg) const;
private:
- AMDGPU::SIModeRegisterDefaults getMode();
- bool getIEEE();
- bool getDX10Clamp();
- bool isFminnumIeee(const MachineInstr &MI);
- bool isFCst(MachineInstr *MI);
- bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1);
+ SIModeRegisterDefaults getMode() const;
+ bool getIEEE() const;
+ bool getDX10Clamp() const;
+ bool isFminnumIeee(const MachineInstr &MI) const;
+ bool isFCst(MachineInstr *MI) const;
+ bool isClampZeroToOne(MachineInstr *K0, MachineInstr *K1) const;
+
+#define GET_GICOMBINER_CLASS_MEMBERS
+#define AMDGPUSubtarget GCNSubtarget
+#include "AMDGPUGenRegBankGICombiner.inc"
+#undef GET_GICOMBINER_CLASS_MEMBERS
+#undef AMDGPUSubtarget
};
-bool AMDGPURegBankCombinerHelper::isVgprRegBank(Register Reg) {
+#define GET_GICOMBINER_IMPL
+#define AMDGPUSubtarget GCNSubtarget
+#include "AMDGPUGenRegBankGICombiner.inc"
+#undef AMDGPUSubtarget
+#undef GET_GICOMBINER_IMPL
+
+AMDGPURegBankCombinerImpl::AMDGPURegBankCombinerImpl(
+ const AMDGPURegBankCombinerImplRuleConfig &RuleConfig, MachineIRBuilder &B,
+ CombinerHelper &Helper, GISelChangeObserver &Observer)
+ : RuleConfig(RuleConfig), B(B), MF(B.getMF()), MRI(*B.getMRI()),
+ STI(MF.getSubtarget<GCNSubtarget>()), RBI(*STI.getRegBankInfo()),
+ TRI(*STI.getRegisterInfo()), TII(*STI.getInstrInfo()), Helper(Helper),
+ Observer(Observer),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
+#include "AMDGPUGenRegBankGICombiner.inc"
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
+
+bool AMDGPURegBankCombinerImpl::isVgprRegBank(Register Reg) const {
return RBI.getRegBank(Reg, MRI, TRI)->getID() == AMDGPU::VGPRRegBankID;
}
-Register AMDGPURegBankCombinerHelper::getAsVgpr(Register Reg) {
+Register AMDGPURegBankCombinerImpl::getAsVgpr(Register Reg) const {
if (isVgprRegBank(Reg))
return Reg;
@@ -104,8 +147,8 @@ Register AMDGPURegBankCombinerHelper::getAsVgpr(Register Reg) {
return VgprReg;
}
-AMDGPURegBankCombinerHelper::MinMaxMedOpc
-AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
+AMDGPURegBankCombinerImpl::MinMaxMedOpc
+AMDGPURegBankCombinerImpl::getMinMaxPair(unsigned Opc) const {
switch (Opc) {
default:
llvm_unreachable("Unsupported opcode");
@@ -126,10 +169,10 @@ AMDGPURegBankCombinerHelper::getMinMaxPair(unsigned Opc) {
}
template <class m_Cst, typename CstTy>
-bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
- MachineRegisterInfo &MRI,
- MinMaxMedOpc MMMOpc, Register &Val,
- CstTy &K0, CstTy &K1) {
+bool AMDGPURegBankCombinerImpl::matchMed(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MinMaxMedOpc MMMOpc, Register &Val,
+ CstTy &K0, CstTy &K1) const {
// 4 operand commutes of: min(max(Val, K0), K1).
// Find K1 from outer instr: min(max(...), K1) or min(K1, max(...)).
// Find K0 and Val from inner instr: max(K0, Val) or max(Val, K0).
@@ -147,16 +190,15 @@ bool AMDGPURegBankCombinerHelper::matchMed(MachineInstr &MI,
m_Cst(K0))));
}
-bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
- MachineInstr &MI, Med3MatchInfo &MatchInfo) {
+bool AMDGPURegBankCombinerImpl::matchIntMinMaxToMed3(
+ MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
Register Dst = MI.getOperand(0).getReg();
if (!isVgprRegBank(Dst))
return false;
// med3 for i16 is only available on gfx9+, and not available for v2i16.
LLT Ty = MRI.getType(Dst);
- if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
- Ty != LLT::scalar(32))
+ if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
return false;
MinMaxMedOpc OpcodeTriple = getMinMaxPair(MI.getOpcode());
@@ -193,14 +235,13 @@ bool AMDGPURegBankCombinerHelper::matchIntMinMaxToMed3(
// fmed3(NaN, K0, K1) = min(min(NaN, K0), K1) = min(K0, K1) = K0
// min(max(NaN, K0), K1) = min(K0, K1) = K0 (can clamp when dx10_clamp = true)
// max(min(NaN, K1), K0) = max(K1, K0) = K1 != K0
-bool AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3(
- MachineInstr &MI, Med3MatchInfo &MatchInfo) {
+bool AMDGPURegBankCombinerImpl::matchFPMinMaxToMed3(
+ MachineInstr &MI, Med3MatchInfo &MatchInfo) const {
Register Dst = MI.getOperand(0).getReg();
LLT Ty = MRI.getType(Dst);
// med3 for f16 is only available on gfx9+, and not available for v2f16.
- if ((Ty != LLT::scalar(16) || !Subtarget.hasMed3_16()) &&
- Ty != LLT::scalar(32))
+ if ((Ty != LLT::scalar(16) || !STI.hasMed3_16()) && Ty != LLT::scalar(32))
return false;
auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
@@ -233,8 +274,8 @@ bool AMDGPURegBankCombinerHelper::matchFPMinMaxToMed3(
return false;
}
-bool AMDGPURegBankCombinerHelper::matchFPMinMaxToClamp(MachineInstr &MI,
- Register &Reg) {
+bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI,
+ Register &Reg) const {
// Clamp is available on all types after regbankselect (f16, f32, f64, v2f16).
auto OpcodeTriple = getMinMaxPair(MI.getOpcode());
Register Val;
@@ -269,16 +310,13 @@ bool AMDGPURegBankCombinerHelper::matchFPMinMaxToClamp(MachineInstr &MI,
// min(min(NaN, 0.0), 1.0) = min(0.0, 1.0) = 0.0
// min(min(NaN, 1.0), 0.0) = min(1.0, 0.0) = 0.0
// min(min(0.0, 1.0), NaN) = min(0.0, NaN) = 0.0
-bool AMDGPURegBankCombinerHelper::matchFPMed3ToClamp(MachineInstr &MI,
- Register &Reg) {
- if (MI.getIntrinsicID() != Intrinsic::amdgcn_fmed3)
- return false;
-
+bool AMDGPURegBankCombinerImpl::matchFPMed3ToClamp(MachineInstr &MI,
+ Register &Reg) const {
// In llvm-ir, clamp is often represented as an intrinsic call to
// @llvm.amdgcn.fmed3.f32(%Val, 0.0, 1.0). Check for other operand orders.
- MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
- MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
- MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(4).getReg(), MRI);
+ MachineInstr *Src0 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
+ MachineInstr *Src1 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI);
+ MachineInstr *Src2 = getDefIgnoringCopies(MI.getOperand(3).getReg(), MRI);
if (isFCst(Src0) && !isFCst(Src1))
std::swap(Src0, Src1);
@@ -311,15 +349,16 @@ bool AMDGPURegBankCombinerHelper::matchFPMed3ToClamp(MachineInstr &MI,
return false;
}
-void AMDGPURegBankCombinerHelper::applyClamp(MachineInstr &MI, Register &Reg) {
+void AMDGPURegBankCombinerImpl::applyClamp(MachineInstr &MI,
+ Register &Reg) const {
B.setInstrAndDebugLoc(MI);
B.buildInstr(AMDGPU::G_AMDGPU_CLAMP, {MI.getOperand(0)}, {Reg},
MI.getFlags());
MI.eraseFromParent();
}
-void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
- Med3MatchInfo &MatchInfo) {
+void AMDGPURegBankCombinerImpl::applyMed3(MachineInstr &MI,
+ Med3MatchInfo &MatchInfo) const {
B.setInstrAndDebugLoc(MI);
B.buildInstr(MatchInfo.Opc, {MI.getOperand(0)},
{getAsVgpr(MatchInfo.Val0), getAsVgpr(MatchInfo.Val1),
@@ -328,24 +367,26 @@ void AMDGPURegBankCombinerHelper::applyMed3(MachineInstr &MI,
MI.eraseFromParent();
}
-AMDGPU::SIModeRegisterDefaults AMDGPURegBankCombinerHelper::getMode() {
+SIModeRegisterDefaults AMDGPURegBankCombinerImpl::getMode() const {
return MF.getInfo<SIMachineFunctionInfo>()->getMode();
}
-bool AMDGPURegBankCombinerHelper::getIEEE() { return getMode().IEEE; }
+bool AMDGPURegBankCombinerImpl::getIEEE() const { return getMode().IEEE; }
-bool AMDGPURegBankCombinerHelper::getDX10Clamp() { return getMode().DX10Clamp; }
+bool AMDGPURegBankCombinerImpl::getDX10Clamp() const {
+ return getMode().DX10Clamp;
+}
-bool AMDGPURegBankCombinerHelper::isFminnumIeee(const MachineInstr &MI) {
+bool AMDGPURegBankCombinerImpl::isFminnumIeee(const MachineInstr &MI) const {
return MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE;
}
-bool AMDGPURegBankCombinerHelper::isFCst(MachineInstr *MI) {
+bool AMDGPURegBankCombinerImpl::isFCst(MachineInstr *MI) const {
return MI->getOpcode() == AMDGPU::G_FCONSTANT;
}
-bool AMDGPURegBankCombinerHelper::isClampZeroToOne(MachineInstr *K0,
- MachineInstr *K1) {
+bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0,
+ MachineInstr *K1) const {
if (isFCst(K0) && isFCst(K1)) {
const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm();
const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm();
@@ -355,40 +396,19 @@ bool AMDGPURegBankCombinerHelper::isClampZeroToOne(MachineInstr *K0,
return false;
}
-class AMDGPURegBankCombinerHelperState {
-protected:
- CombinerHelper &Helper;
- AMDGPURegBankCombinerHelper &RegBankHelper;
-
-public:
- AMDGPURegBankCombinerHelperState(CombinerHelper &Helper,
- AMDGPURegBankCombinerHelper &RegBankHelper)
- : Helper(Helper), RegBankHelper(RegBankHelper) {}
-};
-
-#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
-#include "AMDGPUGenRegBankGICombiner.inc"
-#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_DEPS
-
-namespace {
-#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
-#include "AMDGPUGenRegBankGICombiner.inc"
-#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_H
-
class AMDGPURegBankCombinerInfo final : public CombinerInfo {
GISelKnownBits *KB;
MachineDominatorTree *MDT;
+ AMDGPURegBankCombinerImplRuleConfig RuleConfig;
public:
- AMDGPUGenRegBankCombinerHelperRuleConfig GeneratedRuleCfg;
-
AMDGPURegBankCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
- const AMDGPULegalizerInfo *LI,
- GISelKnownBits *KB, MachineDominatorTree *MDT)
+ const AMDGPULegalizerInfo *LI, GISelKnownBits *KB,
+ MachineDominatorTree *MDT)
: CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
/*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
KB(KB), MDT(MDT) {
- if (!GeneratedRuleCfg.parseCommandLineOption())
+ if (!RuleConfig.parseCommandLineOption())
report_fatal_error("Invalid rule identifier");
}
@@ -397,23 +417,15 @@ public:
};
bool AMDGPURegBankCombinerInfo::combine(GISelChangeObserver &Observer,
- MachineInstr &MI,
- MachineIRBuilder &B) const {
+ MachineInstr &MI,
+ MachineIRBuilder &B) const {
CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false, KB, MDT);
- AMDGPURegBankCombinerHelper RegBankHelper(B, Helper);
- AMDGPUGenRegBankCombinerHelper Generated(GeneratedRuleCfg, Helper,
- RegBankHelper);
-
- if (Generated.tryCombineAll(Observer, MI, B))
- return true;
-
- return false;
+ // TODO: Do not re-create the Impl on every inst, it should be per function.
+ AMDGPURegBankCombinerImpl Impl(RuleConfig, B, Helper, Observer);
+ Impl.setupMF(*MI.getMF(), KB);
+ return Impl.tryCombineAll(MI);
}
-#define AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
-#include "AMDGPUGenRegBankGICombiner.inc"
-#undef AMDGPUREGBANKCOMBINERHELPER_GENCOMBINERHELPER_CPP
-
// Pass boilerplate
// ================
@@ -423,9 +435,7 @@ public:
AMDGPURegBankCombiner(bool IsOptNone = false);
- StringRef getPassName() const override {
- return "AMDGPURegBankCombiner";
- }
+ StringRef getPassName() const override { return "AMDGPURegBankCombiner"; }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -449,7 +459,7 @@ void AMDGPURegBankCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
}
AMDGPURegBankCombiner::AMDGPURegBankCombiner(bool IsOptNone)
- : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
+ : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
initializeAMDGPURegBankCombinerPass(*PassRegistry::getPassRegistry());
}
@@ -463,14 +473,14 @@ bool AMDGPURegBankCombiner::runOnMachineFunction(MachineFunction &MF) {
MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const AMDGPULegalizerInfo *LI
- = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
+ const AMDGPULegalizerInfo *LI =
+ static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
MachineDominatorTree *MDT =
IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
- AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
- F.hasMinSize(), LI, KB, MDT);
+ AMDGPURegBankCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), F.hasMinSize(),
+ LI, KB, MDT);
Combiner C(PCInfo, TPC);
return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp
new file mode 100644
index 000000000000..2ea03ddb1fcc
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.cpp
@@ -0,0 +1,77 @@
+//===- AMDGPURegBankSelect.cpp -----------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Use MachineUniformityAnalysis as the primary basis for making SGPR vs. VGPR
+// register bank selection. Use/def analysis as in the default RegBankSelect can
+// be useful in narrower circumstances (e.g. choosing AGPR vs. VGPR for gfx908).
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPURegBankSelect.h"
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "llvm/CodeGen/MachineUniformityAnalysis.h"
+#include "llvm/InitializePasses.h"
+
+#define DEBUG_TYPE "regbankselect"
+
+using namespace llvm;
+
+AMDGPURegBankSelect::AMDGPURegBankSelect(Mode RunningMode)
+ : RegBankSelect(AMDGPURegBankSelect::ID, RunningMode) {}
+
+char AMDGPURegBankSelect::ID = 0;
+
+StringRef AMDGPURegBankSelect::getPassName() const {
+ return "AMDGPURegBankSelect";
+}
+
+void AMDGPURegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineCycleInfoWrapperPass>();
+ AU.addRequired<MachineDominatorTree>();
+ // TODO: Preserve DomTree
+ RegBankSelect::getAnalysisUsage(AU);
+}
+
+INITIALIZE_PASS_BEGIN(AMDGPURegBankSelect, "amdgpu-" DEBUG_TYPE,
+ "AMDGPU Register Bank Select", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(AMDGPURegBankSelect, "amdgpu-" DEBUG_TYPE,
+ "AMDGPU Register Bank Select", false, false)
+
+bool AMDGPURegBankSelect::runOnMachineFunction(MachineFunction &MF) {
+ // If the ISel pipeline failed, do not bother running that pass.
+ if (MF.getProperties().hasProperty(
+ MachineFunctionProperties::Property::FailedISel))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
+ const Function &F = MF.getFunction();
+ Mode SaveOptMode = OptMode;
+ if (F.hasOptNone())
+ OptMode = Mode::Fast;
+ init(MF);
+
+ assert(checkFunctionIsLegal(MF));
+
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ MachineCycleInfo &CycleInfo =
+ getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
+ MachineDominatorTree &DomTree = getAnalysis<MachineDominatorTree>();
+
+ MachineUniformityInfo Uniformity =
+ computeMachineUniformityInfo(MF, CycleInfo, DomTree.getBase(),
+ !ST.isSingleLaneExecution(F));
+ (void)Uniformity; // TODO: Use this
+
+ assignRegisterBanks(MF);
+
+ OptMode = SaveOptMode;
+ return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.h
new file mode 100644
index 000000000000..83e4a6b41da1
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankSelect.h
@@ -0,0 +1,29 @@
+//===- AMDGPURegBankSelect.h -------------------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKSELECT_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUREGBANKSELECT_H
+
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+
+namespace llvm {
+
+class AMDGPURegBankSelect final : public RegBankSelect {
+public:
+ static char ID;
+
+ AMDGPURegBankSelect(Mode RunningMode = Fast);
+
+ StringRef getPassName() const override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+};
+
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 5e16a405f375..0203af32e389 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -215,6 +215,10 @@ static bool isVectorRegisterBank(const RegisterBank &Bank) {
return BankID == AMDGPU::VGPRRegBankID || BankID == AMDGPU::AGPRRegBankID;
}
+bool AMDGPURegisterBankInfo::isDivergentRegBank(const RegisterBank *RB) const {
+ return RB != &AMDGPU::SGPRRegBank;
+}
+
unsigned AMDGPURegisterBankInfo::copyCost(const RegisterBank &Dst,
const RegisterBank &Src,
unsigned Size) const {
@@ -846,10 +850,7 @@ bool AMDGPURegisterBankInfo::executeInWaterfallLoop(
assert(std::distance(NewBegin, NewEnd) == OrigRangeSize);
for (MachineInstr &MI : make_range(NewBegin, NewEnd)) {
- for (MachineOperand &Op : MI.uses()) {
- if (!Op.isReg() || Op.isDef())
- continue;
-
+ for (MachineOperand &Op : MI.all_uses()) {
Register OldReg = Op.getReg();
if (!SGPROperandRegs.count(OldReg))
continue;
@@ -1233,31 +1234,18 @@ bool AMDGPURegisterBankInfo::applyMappingImage(
return true;
}
-static Register getSrcRegIgnoringCopies(const MachineRegisterInfo &MRI,
- Register Reg) {
- MachineInstr *Def = getDefIgnoringCopies(Reg, MRI);
- if (!Def)
- return Reg;
-
- // TODO: Guard against this being an implicit def
- return Def->getOperand(0).getReg();
-}
-
// Analyze a combined offset from an llvm.amdgcn.s.buffer intrinsic and store
// the three offsets (voffset, soffset and instoffset)
-static unsigned setBufferOffsets(MachineIRBuilder &B,
- const AMDGPURegisterBankInfo &RBI,
- Register CombinedOffset, Register &VOffsetReg,
- Register &SOffsetReg, int64_t &InstOffsetVal,
- Align Alignment) {
+unsigned AMDGPURegisterBankInfo::setBufferOffsets(
+ MachineIRBuilder &B, Register CombinedOffset, Register &VOffsetReg,
+ Register &SOffsetReg, int64_t &InstOffsetVal, Align Alignment) const {
const LLT S32 = LLT::scalar(32);
MachineRegisterInfo *MRI = B.getMRI();
if (std::optional<int64_t> Imm =
getIConstantVRegSExtVal(CombinedOffset, *MRI)) {
uint32_t SOffset, ImmOffset;
- if (AMDGPU::splitMUBUFOffset(*Imm, SOffset, ImmOffset, &RBI.Subtarget,
- Alignment)) {
+ if (TII->splitMUBUFOffset(*Imm, SOffset, ImmOffset, Alignment)) {
VOffsetReg = B.buildConstant(S32, 0).getReg(0);
SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
InstOffsetVal = ImmOffset;
@@ -1275,9 +1263,9 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
AMDGPU::getBaseWithConstantOffset(*MRI, CombinedOffset);
uint32_t SOffset, ImmOffset;
- if ((int)Offset > 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
- &RBI.Subtarget, Alignment)) {
- if (RBI.getRegBank(Base, *MRI, *RBI.TRI) == &AMDGPU::VGPRRegBank) {
+ if ((int)Offset > 0 &&
+ TII->splitMUBUFOffset(Offset, SOffset, ImmOffset, Alignment)) {
+ if (getRegBank(Base, *MRI, *TRI) == &AMDGPU::VGPRRegBank) {
VOffsetReg = Base;
SOffsetReg = B.buildConstant(S32, SOffset).getReg(0);
B.getMRI()->setRegBank(SOffsetReg, AMDGPU::SGPRRegBank);
@@ -1298,11 +1286,11 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
// Handle the variable sgpr + vgpr case.
MachineInstr *Add = getOpcodeDef(AMDGPU::G_ADD, CombinedOffset, *MRI);
if (Add && (int)Offset >= 0) {
- Register Src0 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(1).getReg());
- Register Src1 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(2).getReg());
+ Register Src0 = getSrcRegIgnoringCopies(Add->getOperand(1).getReg(), *MRI);
+ Register Src1 = getSrcRegIgnoringCopies(Add->getOperand(2).getReg(), *MRI);
- const RegisterBank *Src0Bank = RBI.getRegBank(Src0, *MRI, *RBI.TRI);
- const RegisterBank *Src1Bank = RBI.getRegBank(Src1, *MRI, *RBI.TRI);
+ const RegisterBank *Src0Bank = getRegBank(Src0, *MRI, *TRI);
+ const RegisterBank *Src1Bank = getRegBank(Src1, *MRI, *TRI);
if (Src0Bank == &AMDGPU::VGPRRegBank && Src1Bank == &AMDGPU::SGPRRegBank) {
VOffsetReg = Src0;
@@ -1319,7 +1307,7 @@ static unsigned setBufferOffsets(MachineIRBuilder &B,
// Ensure we have a VGPR for the combined offset. This could be an issue if we
// have an SGPR offset and a VGPR resource.
- if (RBI.getRegBank(CombinedOffset, *MRI, *RBI.TRI) == &AMDGPU::VGPRRegBank) {
+ if (getRegBank(CombinedOffset, *MRI, *TRI) == &AMDGPU::VGPRRegBank) {
VOffsetReg = CombinedOffset;
} else {
VOffsetReg = B.buildCopy(S32, CombinedOffset).getReg(0);
@@ -1369,8 +1357,8 @@ bool AMDGPURegisterBankInfo::applyMappingSBufferLoad(
Register VOffset;
int64_t ImmOffset = 0;
- unsigned MMOOffset = setBufferOffsets(B, *this, MI.getOperand(2).getReg(),
- VOffset, SOffset, ImmOffset, Alignment);
+ unsigned MMOOffset = setBufferOffsets(B, MI.getOperand(2).getReg(), VOffset,
+ SOffset, ImmOffset, Alignment);
// TODO: 96-bit loads were widened to 128-bit results. Shrink the result if we
// can, but we need to track an MMO for that.
@@ -1804,7 +1792,7 @@ getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) {
std::pair<Register, unsigned>
AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
Register OrigOffset) const {
- const unsigned MaxImm = 4095;
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
Register BaseReg;
unsigned ImmOffset;
const LLT S32 = LLT::scalar(32);
@@ -1815,13 +1803,14 @@ AMDGPURegisterBankInfo::splitBufferOffsets(MachineIRBuilder &B,
unsigned C1 = 0;
if (ImmOffset != 0) {
- // If the immediate value is too big for the immoffset field, put the value
- // and -4096 into the immoffset field so that the value that is copied/added
- // for the voffset field is a multiple of 4096, and it stands more chance
- // of being CSEd with the copy/add for another similar load/store.
- // However, do not do that rounding down to a multiple of 4096 if that is a
- // negative number, as it appears to be illegal to have a negative offset
- // in the vgpr, even if adding the immediate offset makes it positive.
+ // If the immediate value is too big for the immoffset field, put only bits
+ // that would normally fit in the immoffset field. The remaining value that
+ // is copied/added for the voffset field is a large power of 2, and it
+ // stands more chance of being CSEd with the copy/add for another similar
+ // load/store.
+ // However, do not do that rounding down if that is a negative
+ // number, as it appears to be illegal to have a negative offset in the
+ // vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
@@ -3016,6 +3005,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
case Intrinsic::amdgcn_ubfe:
applyMappingBFE(OpdMapper, false);
return;
+ case Intrinsic::amdgcn_inverse_ballot:
+ applyDefaultMapping(OpdMapper);
+ constrainOpWithReadfirstlane(MI, MRI, 2); // Mask
+ return;
case Intrinsic::amdgcn_ballot:
// Use default handling and insert copy to vcc source.
break;
@@ -3082,14 +3075,16 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
constrainOpWithReadfirstlane(MI, MRI, 2);
return;
}
- case Intrinsic::amdgcn_raw_buffer_load_lds: {
+ case Intrinsic::amdgcn_raw_buffer_load_lds:
+ case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
applyDefaultMapping(OpdMapper);
constrainOpWithReadfirstlane(MI, MRI, 1); // rsrc
constrainOpWithReadfirstlane(MI, MRI, 2); // M0
constrainOpWithReadfirstlane(MI, MRI, 5); // soffset
return;
}
- case Intrinsic::amdgcn_struct_buffer_load_lds: {
+ case Intrinsic::amdgcn_struct_buffer_load_lds:
+ case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
applyDefaultMapping(OpdMapper);
constrainOpWithReadfirstlane(MI, MRI, 1); // rsrc
constrainOpWithReadfirstlane(MI, MRI, 2); // M0
@@ -3745,6 +3740,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FPEXT:
case AMDGPU::G_FEXP2:
case AMDGPU::G_FLOG2:
+ case AMDGPU::G_FLDEXP:
case AMDGPU::G_FMINNUM:
case AMDGPU::G_FMAXNUM:
case AMDGPU::G_FMINNUM_IEEE:
@@ -3755,6 +3751,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_STRICT_FSUB:
case AMDGPU::G_STRICT_FMUL:
case AMDGPU::G_STRICT_FMA:
+ case AMDGPU::G_STRICT_FLDEXP:
case AMDGPU::G_BSWAP: // TODO: Somehow expand for scalar?
case AMDGPU::G_FSHR: // TODO: Expand for scalar
case AMDGPU::G_AMDGPU_FMIN_LEGACY:
@@ -3766,6 +3763,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_AMDGPU_CVT_F32_UBYTE3:
case AMDGPU::G_AMDGPU_CVT_PK_I16_I32:
case AMDGPU::G_AMDGPU_SMED3:
+ case AMDGPU::G_AMDGPU_FMED3:
return getDefaultMappingVOP(MI);
case AMDGPU::G_UMULH:
case AMDGPU::G_SMULH: {
@@ -4209,6 +4207,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_sin:
case Intrinsic::amdgcn_cos:
case Intrinsic::amdgcn_log_clamp:
+ case Intrinsic::amdgcn_log:
+ case Intrinsic::amdgcn_exp2:
case Intrinsic::amdgcn_rcp:
case Intrinsic::amdgcn_rcp_legacy:
case Intrinsic::amdgcn_sqrt:
@@ -4217,7 +4217,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_rsq_clamp:
case Intrinsic::amdgcn_fmul_legacy:
case Intrinsic::amdgcn_fma_legacy:
- case Intrinsic::amdgcn_ldexp:
case Intrinsic::amdgcn_frexp_mant:
case Intrinsic::amdgcn_frexp_exp:
case Intrinsic::amdgcn_fract:
@@ -4506,6 +4505,25 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
break;
}
+ case Intrinsic::amdgcn_inverse_ballot: {
+ // This must be an SGPR, but accept a VGPR.
+ Register MaskReg = MI.getOperand(2).getReg();
+ unsigned MaskSize = MRI.getType(MaskReg).getSizeInBits();
+ unsigned MaskBank = getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, 1);
+ OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, MaskSize);
+ break;
+ }
+ case Intrinsic::amdgcn_wave_reduce_umin:
+ case Intrinsic::amdgcn_wave_reduce_umax: {
+ unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
+ unsigned OpSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+ auto regBankID =
+ isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID;
+ OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
+ break;
+ }
}
break;
}
@@ -4636,7 +4654,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case Intrinsic::amdgcn_raw_buffer_load:
- case Intrinsic::amdgcn_raw_tbuffer_load: {
+ case Intrinsic::amdgcn_raw_ptr_buffer_load:
+ case Intrinsic::amdgcn_raw_tbuffer_load:
+ case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
// FIXME: Should make intrinsic ID the last operand of the instruction,
// then this would be the same as store
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
@@ -4645,7 +4665,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[4] = getSGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
break;
}
- case Intrinsic::amdgcn_raw_buffer_load_lds: {
+ case Intrinsic::amdgcn_raw_buffer_load_lds:
+ case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: {
OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
@@ -4653,8 +4674,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case Intrinsic::amdgcn_raw_buffer_store:
+ case Intrinsic::amdgcn_raw_ptr_buffer_store:
case Intrinsic::amdgcn_raw_buffer_store_format:
- case Intrinsic::amdgcn_raw_tbuffer_store: {
+ case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
+ case Intrinsic::amdgcn_raw_tbuffer_store:
+ case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
@@ -4662,7 +4686,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case Intrinsic::amdgcn_struct_buffer_load:
- case Intrinsic::amdgcn_struct_tbuffer_load: {
+ case Intrinsic::amdgcn_struct_ptr_buffer_load:
+ case Intrinsic::amdgcn_struct_tbuffer_load:
+ case Intrinsic::amdgcn_struct_ptr_tbuffer_load: {
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
@@ -4670,7 +4696,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
break;
}
- case Intrinsic::amdgcn_struct_buffer_load_lds: {
+ case Intrinsic::amdgcn_struct_buffer_load_lds:
+ case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[4] = getVGPROpMapping(MI.getOperand(4).getReg(), MRI, *TRI);
@@ -4679,7 +4706,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
break;
}
case Intrinsic::amdgcn_struct_buffer_store:
- case Intrinsic::amdgcn_struct_tbuffer_store: {
+ case Intrinsic::amdgcn_struct_ptr_buffer_store:
+ case Intrinsic::amdgcn_struct_tbuffer_store:
+ case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
OpdsMapping[1] = getVGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
OpdsMapping[3] = getVGPROpMapping(MI.getOperand(3).getReg(), MRI, *TRI);
@@ -4828,9 +4857,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_ATOMICRMW_UMAX:
case AMDGPU::G_ATOMICRMW_UMIN:
case AMDGPU::G_ATOMICRMW_FADD:
+ case AMDGPU::G_ATOMICRMW_UINC_WRAP:
+ case AMDGPU::G_ATOMICRMW_UDEC_WRAP:
case AMDGPU::G_AMDGPU_ATOMIC_CMPXCHG:
- case AMDGPU::G_AMDGPU_ATOMIC_INC:
- case AMDGPU::G_AMDGPU_ATOMIC_DEC:
case AMDGPU::G_AMDGPU_ATOMIC_FMIN:
case AMDGPU::G_AMDGPU_ATOMIC_FMAX: {
OpdsMapping[0] = getVGPROpMapping(MI.getOperand(0).getReg(), MRI, *TRI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
index c9741c2202e6..78214d7a1058 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.h
@@ -82,6 +82,9 @@ public:
applyMappingImage(MachineInstr &MI,
const OperandsMapper &OpdMapper,
MachineRegisterInfo &MRI, int RSrcIdx) const;
+ unsigned setBufferOffsets(MachineIRBuilder &B, Register CombinedOffset,
+ Register &VOffsetReg, Register &SOffsetReg,
+ int64_t &InstOffsetVal, Align Alignment) const;
bool applyMappingSBufferLoad(const OperandsMapper &OpdMapper) const;
bool applyMappingBFE(const OperandsMapper &OpdMapper, bool Signed) const;
@@ -165,6 +168,8 @@ public:
public:
AMDGPURegisterBankInfo(const GCNSubtarget &STI);
+ bool isDivergentRegBank(const RegisterBank *RB) const override;
+
unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
unsigned Size) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp
deleted file mode 100644
index b7521540c020..000000000000
--- a/llvm/lib/Target/AMDGPU/AMDGPUReleaseVGPRs.cpp
+++ /dev/null
@@ -1,156 +0,0 @@
-//===- AMDGPUReleaseVGPRs.cpp - Automatically release vgprs on GFX11+ -----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// Insert S_SENDMSG instructions to release vgprs on GFX11+.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "AMDGPUSubtarget.h"
-#include "GCNSubtarget.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineOperand.h"
-#include <optional>
-using namespace llvm;
-
-#define DEBUG_TYPE "release-vgprs"
-
-namespace {
-
-class AMDGPUReleaseVGPRs : public MachineFunctionPass {
-public:
- static char ID;
-
- AMDGPUReleaseVGPRs() : MachineFunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- // Track if the last instruction referencing a vgpr in a MBB is a VMEM
- // store. Because this pass is late in the pipeline, it is expected that the
- // last vgpr use will likely be one of vmem store, ds, exp.
- // Loads and others vgpr operations would have been
- // deleted by this point, except for complex control flow involving loops.
- // This is why we are just testing the type of instructions rather
- // than the operands.
- class LastVGPRUseIsVMEMStore {
- BitVector BlockVMEMStore;
-
- static std::optional<bool>
- lastVGPRUseIsStore(const MachineBasicBlock &MBB) {
- for (auto &MI : reverse(MBB.instrs())) {
- // If it's a VMEM store, a VGPR will be used, return true.
- if ((SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI)) &&
- MI.mayStore())
- return true;
-
- // If it's referencing a VGPR but is not a VMEM store, return false.
- if (SIInstrInfo::isDS(MI) || SIInstrInfo::isEXP(MI) ||
- SIInstrInfo::isVMEM(MI) || SIInstrInfo::isFLAT(MI) ||
- SIInstrInfo::isVALU(MI))
- return false;
- }
- // Wait until the values are propagated from the predecessors
- return std::nullopt;
- }
-
- public:
- LastVGPRUseIsVMEMStore(const MachineFunction &MF)
- : BlockVMEMStore(MF.getNumBlockIDs()) {
-
- df_iterator_default_set<const MachineBasicBlock *> Visited;
- SmallVector<const MachineBasicBlock *> EndWithVMEMStoreBlocks;
-
- for (const auto &MBB : MF) {
- auto LastUseIsStore = lastVGPRUseIsStore(MBB);
- if (!LastUseIsStore.has_value())
- continue;
-
- if (*LastUseIsStore) {
- EndWithVMEMStoreBlocks.push_back(&MBB);
- } else {
- Visited.insert(&MBB);
- }
- }
-
- for (const auto *MBB : EndWithVMEMStoreBlocks) {
- for (const auto *Succ : depth_first_ext(MBB, Visited)) {
- BlockVMEMStore[Succ->getNumber()] = true;
- }
- }
- }
-
- // Return true if the last instruction referencing a vgpr in this MBB
- // is a VMEM store, otherwise return false.
- bool isLastVGPRUseVMEMStore(const MachineBasicBlock &MBB) const {
- return BlockVMEMStore[MBB.getNumber()];
- }
- };
-
- static bool
- runOnMachineBasicBlock(MachineBasicBlock &MBB, const SIInstrInfo *SII,
- const LastVGPRUseIsVMEMStore &BlockVMEMStore) {
-
- bool Changed = false;
-
- for (auto &MI : MBB.terminators()) {
- // Look for S_ENDPGM instructions
- if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
- MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
- // If the last instruction using a VGPR in the block is a VMEM store,
- // release VGPRs. The VGPRs release will be placed just before ending
- // the program
- if (BlockVMEMStore.isLastVGPRUseVMEMStore(MBB)) {
- BuildMI(MBB, MI, DebugLoc(), SII->get(AMDGPU::S_SENDMSG))
- .addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus);
- Changed = true;
- }
- }
- }
-
- return Changed;
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override {
- Function &F = MF.getFunction();
- if (skipFunction(F) || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
- return false;
-
- // This pass only runs on GFX11+
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- if (ST.getGeneration() < AMDGPUSubtarget::GFX11)
- return false;
-
- LLVM_DEBUG(dbgs() << "AMDGPUReleaseVGPRs running on " << MF.getName()
- << "\n");
-
- const SIInstrInfo *SII = ST.getInstrInfo();
- LastVGPRUseIsVMEMStore BlockVMEMStore(MF);
-
- bool Changed = false;
- for (auto &MBB : MF) {
- Changed |= runOnMachineBasicBlock(MBB, SII, BlockVMEMStore);
- }
-
- return Changed;
- }
-};
-
-} // namespace
-
-char AMDGPUReleaseVGPRs::ID = 0;
-
-char &llvm::AMDGPUReleaseVGPRsID = AMDGPUReleaseVGPRs::ID;
-
-INITIALIZE_PASS(AMDGPUReleaseVGPRs, DEBUG_TYPE, "Release VGPRs", false, false)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
new file mode 100644
index 000000000000..580352fb8cf4
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
@@ -0,0 +1,186 @@
+//===-- AMDGPURemoveIncompatibleFunctions.cpp -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This pass replaces all uses of functions that use GPU features
+/// incompatible with the current GPU with null then deletes the function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Target/TargetMachine.h"
+
+#define DEBUG_TYPE "amdgpu-remove-incompatible-functions"
+
+using namespace llvm;
+
+namespace llvm {
+extern const SubtargetFeatureKV
+ AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures - 1];
+}
+
+namespace {
+
+using Generation = AMDGPUSubtarget::Generation;
+
+class AMDGPURemoveIncompatibleFunctions : public ModulePass {
+public:
+ static char ID;
+
+ AMDGPURemoveIncompatibleFunctions(const TargetMachine *TM = nullptr)
+ : ModulePass(ID), TM(TM) {
+ assert(TM && "No TargetMachine!");
+ }
+
+ StringRef getPassName() const override {
+ return "AMDGPU Remove Incompatible Functions";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {}
+
+ /// Checks a single function, returns true if the function must be deleted.
+ bool checkFunction(Function &F);
+
+ bool runOnModule(Module &M) override {
+ assert(TM->getTargetTriple().isAMDGCN());
+
+ SmallVector<Function *, 4> FnsToDelete;
+ for (Function &F : M) {
+ if (checkFunction(F))
+ FnsToDelete.push_back(&F);
+ }
+
+ for (Function *F : FnsToDelete) {
+ F->replaceAllUsesWith(ConstantPointerNull::get(F->getType()));
+ F->eraseFromParent();
+ }
+ return !FnsToDelete.empty();
+ }
+
+private:
+ const TargetMachine *TM = nullptr;
+};
+
+StringRef getFeatureName(unsigned Feature) {
+ for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV)
+ if (Feature == KV.Value)
+ return KV.Key;
+
+ llvm_unreachable("Unknown Target feature");
+}
+
+const SubtargetSubTypeKV *getGPUInfo(const GCNSubtarget &ST,
+ StringRef GPUName) {
+ for (const SubtargetSubTypeKV &KV : ST.getAllProcessorDescriptions())
+ if (StringRef(KV.Key) == GPUName)
+ return &KV;
+
+ return nullptr;
+}
+
+constexpr unsigned FeaturesToCheck[] = {
+ AMDGPU::FeatureGFX11Insts, AMDGPU::FeatureGFX10Insts,
+ AMDGPU::FeatureGFX9Insts, AMDGPU::FeatureGFX8Insts,
+ AMDGPU::FeatureDPP, AMDGPU::Feature16BitInsts,
+ AMDGPU::FeatureDot1Insts, AMDGPU::FeatureDot2Insts,
+ AMDGPU::FeatureDot3Insts, AMDGPU::FeatureDot4Insts,
+ AMDGPU::FeatureDot5Insts, AMDGPU::FeatureDot6Insts,
+ AMDGPU::FeatureDot7Insts, AMDGPU::FeatureDot8Insts,
+};
+
+FeatureBitset expandImpliedFeatures(const FeatureBitset &Features) {
+ FeatureBitset Result = Features;
+ for (const SubtargetFeatureKV &FE : AMDGPUFeatureKV) {
+ if (Features.test(FE.Value) && FE.Implies.any())
+ Result |= expandImpliedFeatures(FE.Implies.getAsBitset());
+ }
+ return Result;
+}
+
+void reportFunctionRemoved(Function &F, unsigned Feature) {
+ OptimizationRemarkEmitter ORE(&F);
+ ORE.emit([&]() {
+ // Note: we print the function name as part of the diagnostic because if
+ // debug info is not present, users get "<unknown>:0:0" as the debug
+ // loc. If we didn't print the function name there would be no way to
+ // tell which function got removed.
+ return OptimizationRemark(DEBUG_TYPE, "AMDGPUIncompatibleFnRemoved", &F)
+ << "removing function '" << F.getName() << "': +"
+ << getFeatureName(Feature)
+ << " is not supported on the current target";
+ });
+ return;
+}
+} // end anonymous namespace
+
+bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) {
+ if (F.isDeclaration())
+ return false;
+
+ const GCNSubtarget *ST =
+ static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F));
+
+ // Check the GPU isn't generic. Generic is used for testing only
+ // and we don't want this pass to interfere with it.
+ StringRef GPUName = ST->getCPU();
+ if (GPUName.empty() || GPUName.contains("generic"))
+ return false;
+
+ // Try to fetch the GPU's info. If we can't, it's likely an unknown processor
+ // so just bail out.
+ const SubtargetSubTypeKV *GPUInfo = getGPUInfo(*ST, GPUName);
+ if (!GPUInfo)
+ return false;
+
+ // Get all the features implied by the current GPU, and recursively expand
+ // the features that imply other features.
+ //
+ // e.g. GFX90A implies FeatureGFX9, and FeatureGFX9 implies a whole set of
+ // other features.
+ const FeatureBitset GPUFeatureBits =
+ expandImpliedFeatures(GPUInfo->Implies.getAsBitset());
+
+ // Now that the have a FeatureBitset containing all possible features for
+ // the chosen GPU, check our list of "suspicious" features.
+
+ // Check that the user didn't enable any features that aren't part of that
+ // GPU's feature set. We only check a predetermined set of features.
+ for (unsigned Feature : FeaturesToCheck) {
+ if (ST->hasFeature(Feature) && !GPUFeatureBits.test(Feature)) {
+ reportFunctionRemoved(F, Feature);
+ return true;
+ }
+ }
+
+ // Delete FeatureWavefrontSize32 functions for
+ // gfx9 and below targets that don't support the mode.
+ // gfx10+ is implied to support both wave32 and 64 features.
+ // They are not in the feature set. So, we need a separate check
+ if (ST->getGeneration() < AMDGPUSubtarget::GFX10 &&
+ ST->hasFeature(AMDGPU::FeatureWavefrontSize32)) {
+ reportFunctionRemoved(F, AMDGPU::FeatureWavefrontSize32);
+ return true;
+ }
+ return false;
+}
+
+INITIALIZE_PASS(AMDGPURemoveIncompatibleFunctions, DEBUG_TYPE,
+ "AMDGPU Remove Incompatible Functions", false, false)
+
+char AMDGPURemoveIncompatibleFunctions::ID = 0;
+
+ModulePass *
+llvm::createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *TM) {
+ return new AMDGPURemoveIncompatibleFunctions(TM);
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
deleted file mode 100644
index 299ac106ebee..000000000000
--- a/llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp
+++ /dev/null
@@ -1,648 +0,0 @@
-//===-- AMDGPUReplaceLDSUseWithPointer.cpp --------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass replaces all the uses of LDS within non-kernel functions by
-// corresponding pointer counter-parts.
-//
-// The main motivation behind this pass is - to *avoid* subsequent LDS lowering
-// pass from directly packing LDS (assume large LDS) into a struct type which
-// would otherwise cause allocating huge memory for struct instance within every
-// kernel.
-//
-// Brief sketch of the algorithm implemented in this pass is as below:
-//
-// 1. Collect all the LDS defined in the module which qualify for pointer
-// replacement, say it is, LDSGlobals set.
-//
-// 2. Collect all the reachable callees for each kernel defined in the module,
-// say it is, KernelToCallees map.
-//
-// 3. FOR (each global GV from LDSGlobals set) DO
-// LDSUsedNonKernels = Collect all non-kernel functions which use GV.
-// FOR (each kernel K in KernelToCallees map) DO
-// ReachableCallees = KernelToCallees[K]
-// ReachableAndLDSUsedCallees =
-// SetIntersect(LDSUsedNonKernels, ReachableCallees)
-// IF (ReachableAndLDSUsedCallees is not empty) THEN
-// Pointer = Create a pointer to point-to GV if not created.
-// Initialize Pointer to point-to GV within kernel K.
-// ENDIF
-// ENDFOR
-// Replace all uses of GV within non kernel functions by Pointer.
-// ENFOR
-//
-// LLVM IR example:
-//
-// Input IR:
-//
-// @lds = internal addrspace(3) global [4 x i32] undef, align 16
-//
-// define internal void @f0() {
-// entry:
-// %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* @lds,
-// i32 0, i32 0
-// ret void
-// }
-//
-// define protected amdgpu_kernel void @k0() {
-// entry:
-// call void @f0()
-// ret void
-// }
-//
-// Output IR:
-//
-// @lds = internal addrspace(3) global [4 x i32] undef, align 16
-// @lds.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2
-//
-// define internal void @f0() {
-// entry:
-// %0 = load i16, i16 addrspace(3)* @lds.ptr, align 2
-// %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0
-// %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)*
-// %gep = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2,
-// i32 0, i32 0
-// ret void
-// }
-//
-// define protected amdgpu_kernel void @k0() {
-// entry:
-// store i16 ptrtoint ([4 x i32] addrspace(3)* @lds to i16),
-// i16 addrspace(3)* @lds.ptr, align 2
-// call void @f0()
-// ret void
-// }
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "GCNSubtarget.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "Utils/AMDGPUMemoryUtils.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetOperations.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicsAMDGPU.h"
-#include "llvm/IR/ReplaceConstant.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/ModuleUtils.h"
-#include <algorithm>
-#include <vector>
-
-#define DEBUG_TYPE "amdgpu-replace-lds-use-with-pointer"
-
-using namespace llvm;
-
-namespace {
-
-namespace AMDGPU {
-/// Collect all the instructions where user \p U belongs to. \p U could be
-/// instruction itself or it could be a constant expression which is used within
-/// an instruction. If \p CollectKernelInsts is true, collect instructions only
-/// from kernels, otherwise collect instructions only from non-kernel functions.
-DenseMap<Function *, SmallPtrSet<Instruction *, 8>>
-getFunctionToInstsMap(User *U, bool CollectKernelInsts);
-
-SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV);
-
-} // namespace AMDGPU
-
-class ReplaceLDSUseImpl {
- Module &M;
- LLVMContext &Ctx;
- const DataLayout &DL;
- Constant *LDSMemBaseAddr;
-
- DenseMap<GlobalVariable *, GlobalVariable *> LDSToPointer;
- DenseMap<GlobalVariable *, SmallPtrSet<Function *, 8>> LDSToNonKernels;
- DenseMap<Function *, SmallPtrSet<Function *, 8>> KernelToCallees;
- DenseMap<Function *, SmallPtrSet<GlobalVariable *, 8>> KernelToLDSPointers;
- DenseMap<Function *, BasicBlock *> KernelToInitBB;
- DenseMap<Function *, DenseMap<GlobalVariable *, Value *>>
- FunctionToLDSToReplaceInst;
-
- // Collect LDS which requires their uses to be replaced by pointer.
- std::vector<GlobalVariable *> collectLDSRequiringPointerReplace() {
- // Collect LDS which requires module lowering.
- std::vector<GlobalVariable *> LDSGlobals =
- llvm::AMDGPU::findLDSVariablesToLower(M, nullptr);
-
- // Remove LDS which don't qualify for replacement.
- llvm::erase_if(LDSGlobals, [&](GlobalVariable *GV) {
- return shouldIgnorePointerReplacement(GV);
- });
-
- return LDSGlobals;
- }
-
- // Returns true if uses of given LDS global within non-kernel functions should
- // be keep as it is without pointer replacement.
- bool shouldIgnorePointerReplacement(GlobalVariable *GV) {
- // LDS whose size is very small and doesn't exceed pointer size is not worth
- // replacing.
- if (DL.getTypeAllocSize(GV->getValueType()) <= 2)
- return true;
-
- // LDS which is not used from non-kernel function scope or it is used from
- // global scope does not qualify for replacement.
- LDSToNonKernels[GV] = AMDGPU::collectNonKernelAccessorsOfLDS(GV);
- return LDSToNonKernels[GV].empty();
-
- // FIXME: When GV is used within all (or within most of the kernels), then
- // it does not make sense to create a pointer for it.
- }
-
- // Insert new global LDS pointer which points to LDS.
- GlobalVariable *createLDSPointer(GlobalVariable *GV) {
- // LDS pointer which points to LDS is already created? Return it.
- auto PointerEntry = LDSToPointer.insert(std::pair(GV, nullptr));
- if (!PointerEntry.second)
- return PointerEntry.first->second;
-
- // We need to create new LDS pointer which points to LDS.
- //
- // Each CU owns at max 64K of LDS memory, so LDS address ranges from 0 to
- // 2^16 - 1. Hence 16 bit pointer is enough to hold the LDS address.
- auto *I16Ty = Type::getInt16Ty(Ctx);
- GlobalVariable *LDSPointer = new GlobalVariable(
- M, I16Ty, false, GlobalValue::InternalLinkage, UndefValue::get(I16Ty),
- GV->getName() + Twine(".ptr"), nullptr, GlobalVariable::NotThreadLocal,
- AMDGPUAS::LOCAL_ADDRESS);
-
- LDSPointer->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
- LDSPointer->setAlignment(llvm::AMDGPU::getAlign(DL, LDSPointer));
-
- // Mark that an associated LDS pointer is created for LDS.
- LDSToPointer[GV] = LDSPointer;
-
- return LDSPointer;
- }
-
- // Split entry basic block in such a way that only lane 0 of each wave does
- // the LDS pointer initialization, and return newly created basic block.
- BasicBlock *activateLaneZero(Function *K) {
- // If the entry basic block of kernel K is already split, then return
- // newly created basic block.
- auto BasicBlockEntry = KernelToInitBB.insert(std::pair(K, nullptr));
- if (!BasicBlockEntry.second)
- return BasicBlockEntry.first->second;
-
- // Split entry basic block of kernel K.
- auto *EI = &(*(K->getEntryBlock().getFirstInsertionPt()));
- IRBuilder<> Builder(EI);
-
- Value *Mbcnt =
- Builder.CreateIntrinsic(Intrinsic::amdgcn_mbcnt_lo, {},
- {Builder.getInt32(-1), Builder.getInt32(0)});
- Value *Cond = Builder.CreateICmpEQ(Mbcnt, Builder.getInt32(0));
- Instruction *WB = cast<Instruction>(
- Builder.CreateIntrinsic(Intrinsic::amdgcn_wave_barrier, {}, {}));
-
- BasicBlock *NBB = SplitBlockAndInsertIfThen(Cond, WB, false)->getParent();
-
- // Mark that the entry basic block of kernel K is split.
- KernelToInitBB[K] = NBB;
-
- return NBB;
- }
-
- // Within given kernel, initialize given LDS pointer to point to given LDS.
- void initializeLDSPointer(Function *K, GlobalVariable *GV,
- GlobalVariable *LDSPointer) {
- // If LDS pointer is already initialized within K, then nothing to do.
- auto PointerEntry = KernelToLDSPointers.insert(
- std::pair(K, SmallPtrSet<GlobalVariable *, 8>()));
- if (!PointerEntry.second)
- if (PointerEntry.first->second.contains(LDSPointer))
- return;
-
- // Insert instructions at EI which initialize LDS pointer to point-to LDS
- // within kernel K.
- //
- // That is, convert pointer type of GV to i16, and then store this converted
- // i16 value within LDSPointer which is of type i16*.
- auto *EI = &(*(activateLaneZero(K)->getFirstInsertionPt()));
- IRBuilder<> Builder(EI);
- Builder.CreateStore(Builder.CreatePtrToInt(GV, Type::getInt16Ty(Ctx)),
- LDSPointer);
-
- // Mark that LDS pointer is initialized within kernel K.
- KernelToLDSPointers[K].insert(LDSPointer);
- }
-
- // We have created an LDS pointer for LDS, and initialized it to point-to LDS
- // within all relevant kernels. Now replace all the uses of LDS within
- // non-kernel functions by LDS pointer.
- void replaceLDSUseByPointer(GlobalVariable *GV, GlobalVariable *LDSPointer) {
- SmallVector<User *, 8> LDSUsers(GV->users());
- for (auto *U : LDSUsers) {
- // When `U` is a constant expression, it is possible that same constant
- // expression exists within multiple instructions, and within multiple
- // non-kernel functions. Collect all those non-kernel functions and all
- // those instructions within which `U` exist.
- auto FunctionToInsts =
- AMDGPU::getFunctionToInstsMap(U, false /*=CollectKernelInsts*/);
-
- for (const auto &FunctionToInst : FunctionToInsts) {
- Function *F = FunctionToInst.first;
- auto &Insts = FunctionToInst.second;
- for (auto *I : Insts) {
- // If `U` is a constant expression, then we need to break the
- // associated instruction into a set of separate instructions by
- // converting constant expressions into instructions.
- SmallPtrSet<Instruction *, 8> UserInsts;
-
- if (U == I) {
- // `U` is an instruction, conversion from constant expression to
- // set of instructions is *not* required.
- UserInsts.insert(I);
- } else {
- // `U` is a constant expression, convert it into corresponding set
- // of instructions.
- auto *CE = cast<ConstantExpr>(U);
- convertConstantExprsToInstructions(I, CE, &UserInsts);
- }
-
- // Go through all the user instructions, if LDS exist within them as
- // an operand, then replace it by replace instruction.
- for (auto *II : UserInsts) {
- auto *ReplaceInst = getReplacementInst(F, GV, LDSPointer);
- II->replaceUsesOfWith(GV, ReplaceInst);
- }
- }
- }
- }
- }
-
- // Create a set of replacement instructions which together replace LDS within
- // non-kernel function F by accessing LDS indirectly using LDS pointer.
- Value *getReplacementInst(Function *F, GlobalVariable *GV,
- GlobalVariable *LDSPointer) {
- // If the instruction which replaces LDS within F is already created, then
- // return it.
- auto LDSEntry = FunctionToLDSToReplaceInst.insert(
- std::pair(F, DenseMap<GlobalVariable *, Value *>()));
- if (!LDSEntry.second) {
- auto ReplaceInstEntry =
- LDSEntry.first->second.insert(std::pair(GV, nullptr));
- if (!ReplaceInstEntry.second)
- return ReplaceInstEntry.first->second;
- }
-
- // Get the instruction insertion point within the beginning of the entry
- // block of current non-kernel function.
- auto *EI = &(*(F->getEntryBlock().getFirstInsertionPt()));
- IRBuilder<> Builder(EI);
-
- // Insert required set of instructions which replace LDS within F.
- auto *V = Builder.CreateBitCast(
- Builder.CreateGEP(
- Builder.getInt8Ty(), LDSMemBaseAddr,
- Builder.CreateLoad(LDSPointer->getValueType(), LDSPointer)),
- GV->getType());
-
- // Mark that the replacement instruction which replace LDS within F is
- // created.
- FunctionToLDSToReplaceInst[F][GV] = V;
-
- return V;
- }
-
-public:
- ReplaceLDSUseImpl(Module &M)
- : M(M), Ctx(M.getContext()), DL(M.getDataLayout()) {
- LDSMemBaseAddr = Constant::getIntegerValue(
- PointerType::get(Type::getInt8Ty(M.getContext()),
- AMDGPUAS::LOCAL_ADDRESS),
- APInt(32, 0));
- }
-
- // Entry-point function which interface ReplaceLDSUseImpl with outside of the
- // class.
- bool replaceLDSUse();
-
-private:
- // For a given LDS from collected LDS globals set, replace its non-kernel
- // function scope uses by pointer.
- bool replaceLDSUse(GlobalVariable *GV);
-};
-
-// For given LDS from collected LDS globals set, replace its non-kernel function
-// scope uses by pointer.
-bool ReplaceLDSUseImpl::replaceLDSUse(GlobalVariable *GV) {
- // Holds all those non-kernel functions within which LDS is being accessed.
- SmallPtrSet<Function *, 8> &LDSAccessors = LDSToNonKernels[GV];
-
- // The LDS pointer which points to LDS and replaces all the uses of LDS.
- GlobalVariable *LDSPointer = nullptr;
-
- // Traverse through each kernel K, check and if required, initialize the
- // LDS pointer to point to LDS within K.
- for (const auto &KernelToCallee : KernelToCallees) {
- Function *K = KernelToCallee.first;
- SmallPtrSet<Function *, 8> Callees = KernelToCallee.second;
-
- // Compute reachable and LDS used callees for kernel K.
- set_intersect(Callees, LDSAccessors);
-
- // None of the LDS accessing non-kernel functions are reachable from
- // kernel K. Hence, no need to initialize LDS pointer within kernel K.
- if (Callees.empty())
- continue;
-
- // We have found reachable and LDS used callees for kernel K, and we need to
- // initialize LDS pointer within kernel K, and we need to replace LDS use
- // within those callees by LDS pointer.
- //
- // But, first check if LDS pointer is already created, if not create one.
- LDSPointer = createLDSPointer(GV);
-
- // Initialize LDS pointer to point to LDS within kernel K.
- initializeLDSPointer(K, GV, LDSPointer);
- }
-
- // We have not found reachable and LDS used callees for any of the kernels,
- // and hence we have not created LDS pointer.
- if (!LDSPointer)
- return false;
-
- // We have created an LDS pointer for LDS, and initialized it to point-to LDS
- // within all relevant kernels. Now replace all the uses of LDS within
- // non-kernel functions by LDS pointer.
- replaceLDSUseByPointer(GV, LDSPointer);
-
- return true;
-}
-
-namespace AMDGPU {
-
-// An helper class for collecting all reachable callees for each kernel defined
-// within the module.
-class CollectReachableCallees {
- Module &M;
- CallGraph CG;
- SmallPtrSet<CallGraphNode *, 8> AddressTakenFunctions;
-
- // Collect all address taken functions within the module.
- void collectAddressTakenFunctions() {
- auto *ECNode = CG.getExternalCallingNode();
-
- for (const auto &GI : *ECNode) {
- auto *CGN = GI.second;
- auto *F = CGN->getFunction();
- if (!F || F->isDeclaration() || llvm::AMDGPU::isKernelCC(F))
- continue;
- AddressTakenFunctions.insert(CGN);
- }
- }
-
- // For given kernel, collect all its reachable non-kernel functions.
- SmallPtrSet<Function *, 8> collectReachableCallees(Function *K) {
- SmallPtrSet<Function *, 8> ReachableCallees;
-
- // Call graph node which represents this kernel.
- auto *KCGN = CG[K];
-
- // Go through all call graph nodes reachable from the node representing this
- // kernel, visit all their call sites, if the call site is direct, add
- // corresponding callee to reachable callee set, if it is indirect, resolve
- // the indirect call site to potential reachable callees, add them to
- // reachable callee set, and repeat the process for the newly added
- // potential callee nodes.
- //
- // FIXME: Need to handle bit-casted function pointers.
- //
- SmallVector<CallGraphNode *, 8> CGNStack(depth_first(KCGN));
- SmallPtrSet<CallGraphNode *, 8> VisitedCGNodes;
- while (!CGNStack.empty()) {
- auto *CGN = CGNStack.pop_back_val();
-
- if (!VisitedCGNodes.insert(CGN).second)
- continue;
-
- // Ignore call graph node which does not have associated function or
- // associated function is not a definition.
- if (!CGN->getFunction() || CGN->getFunction()->isDeclaration())
- continue;
-
- for (const auto &GI : *CGN) {
- auto *RCB = cast<CallBase>(*GI.first);
- auto *RCGN = GI.second;
-
- if (auto *DCallee = RCGN->getFunction()) {
- ReachableCallees.insert(DCallee);
- } else if (RCB->isIndirectCall()) {
- auto *RCBFTy = RCB->getFunctionType();
- for (auto *ACGN : AddressTakenFunctions) {
- auto *ACallee = ACGN->getFunction();
- if (ACallee->getFunctionType() == RCBFTy) {
- ReachableCallees.insert(ACallee);
- CGNStack.append(df_begin(ACGN), df_end(ACGN));
- }
- }
- }
- }
- }
-
- return ReachableCallees;
- }
-
-public:
- explicit CollectReachableCallees(Module &M) : M(M), CG(CallGraph(M)) {
- // Collect address taken functions.
- collectAddressTakenFunctions();
- }
-
- void collectReachableCallees(
- DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
- // Collect reachable callee set for each kernel defined in the module.
- for (Function &F : M.functions()) {
- if (!llvm::AMDGPU::isKernelCC(&F))
- continue;
- Function *K = &F;
- KernelToCallees[K] = collectReachableCallees(K);
- }
- }
-};
-
-/// Collect reachable callees for each kernel defined in the module \p M and
-/// return collected callees at \p KernelToCallees.
-void collectReachableCallees(
- Module &M,
- DenseMap<Function *, SmallPtrSet<Function *, 8>> &KernelToCallees) {
- CollectReachableCallees CRC{M};
- CRC.collectReachableCallees(KernelToCallees);
-}
-
-/// For the given LDS global \p GV, visit all its users and collect all
-/// non-kernel functions within which \p GV is used and return collected list of
-/// such non-kernel functions.
-SmallPtrSet<Function *, 8> collectNonKernelAccessorsOfLDS(GlobalVariable *GV) {
- SmallPtrSet<Function *, 8> LDSAccessors;
- SmallVector<User *, 8> UserStack(GV->users());
- SmallPtrSet<User *, 8> VisitedUsers;
-
- while (!UserStack.empty()) {
- auto *U = UserStack.pop_back_val();
-
- // `U` is already visited? continue to next one.
- if (!VisitedUsers.insert(U).second)
- continue;
-
- // `U` is a global variable which is initialized with LDS. Ignore LDS.
- if (isa<GlobalValue>(U))
- return SmallPtrSet<Function *, 8>();
-
- // Recursively explore constant users.
- if (isa<Constant>(U)) {
- append_range(UserStack, U->users());
- continue;
- }
-
- // `U` should be an instruction, if it belongs to a non-kernel function F,
- // then collect F.
- Function *F = cast<Instruction>(U)->getFunction();
- if (!llvm::AMDGPU::isKernelCC(F))
- LDSAccessors.insert(F);
- }
-
- return LDSAccessors;
-}
-
-DenseMap<Function *, SmallPtrSet<Instruction *, 8>>
-getFunctionToInstsMap(User *U, bool CollectKernelInsts) {
- DenseMap<Function *, SmallPtrSet<Instruction *, 8>> FunctionToInsts;
- SmallVector<User *, 8> UserStack;
- SmallPtrSet<User *, 8> VisitedUsers;
-
- UserStack.push_back(U);
-
- while (!UserStack.empty()) {
- auto *UU = UserStack.pop_back_val();
-
- if (!VisitedUsers.insert(UU).second)
- continue;
-
- if (isa<GlobalValue>(UU))
- continue;
-
- if (isa<Constant>(UU)) {
- append_range(UserStack, UU->users());
- continue;
- }
-
- auto *I = cast<Instruction>(UU);
- Function *F = I->getFunction();
- if (CollectKernelInsts) {
- if (!llvm::AMDGPU::isKernelCC(F)) {
- continue;
- }
- } else {
- if (llvm::AMDGPU::isKernelCC(F)) {
- continue;
- }
- }
-
- FunctionToInsts.insert(std::pair(F, SmallPtrSet<Instruction *, 8>()));
- FunctionToInsts[F].insert(I);
- }
-
- return FunctionToInsts;
-}
-
-} // namespace AMDGPU
-
-// Entry-point function which interface ReplaceLDSUseImpl with outside of the
-// class.
-bool ReplaceLDSUseImpl::replaceLDSUse() {
- // Collect LDS which requires their uses to be replaced by pointer.
- std::vector<GlobalVariable *> LDSGlobals =
- collectLDSRequiringPointerReplace();
-
- // No LDS to pointer-replace. Nothing to do.
- if (LDSGlobals.empty())
- return false;
-
- // Collect reachable callee set for each kernel defined in the module.
- AMDGPU::collectReachableCallees(M, KernelToCallees);
-
- if (KernelToCallees.empty()) {
- // Either module does not have any kernel definitions, or none of the kernel
- // has a call to non-kernel functions, or we could not resolve any of the
- // call sites to proper non-kernel functions, because of the situations like
- // inline asm calls. Nothing to replace.
- return false;
- }
-
- // For every LDS from collected LDS globals set, replace its non-kernel
- // function scope use by pointer.
- bool Changed = false;
- for (auto *GV : LDSGlobals)
- Changed |= replaceLDSUse(GV);
-
- return Changed;
-}
-
-class AMDGPUReplaceLDSUseWithPointer : public ModulePass {
-public:
- static char ID;
-
- AMDGPUReplaceLDSUseWithPointer() : ModulePass(ID) {
- initializeAMDGPUReplaceLDSUseWithPointerPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetPassConfig>();
- }
-};
-
-} // namespace
-
-char AMDGPUReplaceLDSUseWithPointer::ID = 0;
-char &llvm::AMDGPUReplaceLDSUseWithPointerID =
- AMDGPUReplaceLDSUseWithPointer::ID;
-
-INITIALIZE_PASS_BEGIN(
- AMDGPUReplaceLDSUseWithPointer, DEBUG_TYPE,
- "Replace within non-kernel function use of LDS with pointer",
- false /*only look at the cfg*/, false /*analysis pass*/)
-INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
-INITIALIZE_PASS_END(
- AMDGPUReplaceLDSUseWithPointer, DEBUG_TYPE,
- "Replace within non-kernel function use of LDS with pointer",
- false /*only look at the cfg*/, false /*analysis pass*/)
-
-bool AMDGPUReplaceLDSUseWithPointer::runOnModule(Module &M) {
- ReplaceLDSUseImpl LDSUseReplacer{M};
- return LDSUseReplacer.replaceLDSUse();
-}
-
-ModulePass *llvm::createAMDGPUReplaceLDSUseWithPointerPass() {
- return new AMDGPUReplaceLDSUseWithPointer();
-}
-
-PreservedAnalyses
-AMDGPUReplaceLDSUseWithPointerPass::run(Module &M, ModuleAnalysisManager &AM) {
- ReplaceLDSUseImpl LDSUseReplacer{M};
- LDSUseReplacer.replaceLDSUse();
- return PreservedAnalyses::all();
-}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
index 31e134d42e23..804bf503e4f9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.cpp
@@ -104,6 +104,7 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
const TargetMachine &TM = TPC->getTM<TargetMachine>();
+ const MCSubtargetInfo &STI = *TM.getMCSubtargetInfo();
bool HasIndirectCall = false;
CallGraph CG = CallGraph(M);
@@ -111,7 +112,8 @@ bool AMDGPUResourceUsageAnalysis::runOnModule(Module &M) {
// By default, for code object v5 and later, track only the minimum scratch
// size
- if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) {
+ if (AMDGPU::getCodeObjectVersion(M) >= AMDGPU::AMDHSA_COV5 ||
+ STI.getTargetTriple().getOS() == Triple::AMDPAL) {
if (!AssumedStackSizeForDynamicSizeObjects.getNumOccurrences())
AssumedStackSizeForDynamicSizeObjects = 0;
if (!AssumedStackSizeForExternalCall.getNumOccurrences())
@@ -338,11 +340,9 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
break;
}
- if (AMDGPU::SReg_32RegClass.contains(Reg) ||
- AMDGPU::SReg_LO16RegClass.contains(Reg) ||
+ if (AMDGPU::SGPR_32RegClass.contains(Reg) ||
+ AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
- assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
- "trap handler registers should not be used");
IsSGPR = true;
Width = 1;
} else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
@@ -355,9 +355,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
IsSGPR = false;
IsAGPR = true;
Width = 1;
- } else if (AMDGPU::SReg_64RegClass.contains(Reg)) {
- assert(!AMDGPU::TTMP_64RegClass.contains(Reg) &&
- "trap handler registers should not be used");
+ } else if (AMDGPU::SGPR_64RegClass.contains(Reg)) {
IsSGPR = true;
Width = 2;
} else if (AMDGPU::VReg_64RegClass.contains(Reg)) {
@@ -377,9 +375,7 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
IsSGPR = false;
IsAGPR = true;
Width = 3;
- } else if (AMDGPU::SReg_128RegClass.contains(Reg)) {
- assert(!AMDGPU::TTMP_128RegClass.contains(Reg) &&
- "trap handler registers should not be used");
+ } else if (AMDGPU::SGPR_128RegClass.contains(Reg)) {
IsSGPR = true;
Width = 4;
} else if (AMDGPU::VReg_128RegClass.contains(Reg)) {
@@ -420,8 +416,6 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
IsAGPR = true;
Width = 7;
} else if (AMDGPU::SReg_256RegClass.contains(Reg)) {
- assert(!AMDGPU::TTMP_256RegClass.contains(Reg) &&
- "trap handler registers should not be used");
IsSGPR = true;
Width = 8;
} else if (AMDGPU::VReg_256RegClass.contains(Reg)) {
@@ -472,8 +466,6 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
IsAGPR = true;
Width = 12;
} else if (AMDGPU::SReg_512RegClass.contains(Reg)) {
- assert(!AMDGPU::TTMP_512RegClass.contains(Reg) &&
- "trap handler registers should not be used");
IsSGPR = true;
Width = 16;
} else if (AMDGPU::VReg_512RegClass.contains(Reg)) {
@@ -494,7 +486,15 @@ AMDGPUResourceUsageAnalysis::analyzeResourceUsage(
IsAGPR = true;
Width = 32;
} else {
- llvm_unreachable("Unknown register class");
+ // We only expect TTMP registers or registers that do not belong to
+ // any RC.
+ assert((AMDGPU::TTMP_32RegClass.contains(Reg) ||
+ AMDGPU::TTMP_64RegClass.contains(Reg) ||
+ AMDGPU::TTMP_128RegClass.contains(Reg) ||
+ AMDGPU::TTMP_256RegClass.contains(Reg) ||
+ AMDGPU::TTMP_512RegClass.contains(Reg) ||
+ !TRI.getPhysRegBaseClass(Reg)) &&
+ "Unknown register class");
}
unsigned HWReg = TRI.getHWRegIndex(Reg);
int MaxUsed = HWReg + Width - 1;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index 3ff3546f4f92..2fde7afc0c14 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -46,6 +46,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
@@ -377,19 +378,12 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
if (!OutArgIndexes.count(Arg.getArgNo()))
continue;
- PointerType *ArgType = cast<PointerType>(Arg.getType());
-
Type *EltTy = OutArgIndexes[Arg.getArgNo()];
const auto Align =
DL->getValueOrABITypeAlignment(Arg.getParamAlign(), EltTy);
Value *Val = B.CreateExtractValue(StubCall, RetIdx++);
- Type *PtrTy = Val->getType()->getPointerTo(ArgType->getAddressSpace());
-
- // We can peek through bitcasts, so the type may not match.
- Value *PtrVal = B.CreateBitCast(&Arg, PtrTy);
-
- B.CreateAlignedStore(Val, PtrVal, Align);
+ B.CreateAlignedStore(Val, &Arg, Align);
}
if (!RetTy->isVoidTy()) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp
index ff34726fdf02..9c07851243c9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteUndefForPHI.cpp
@@ -10,7 +10,7 @@
// StructurizedCFG pass, and this pass has some additional limitation that make
// it can only run after SIAnnotateControlFlow.
//
-// To achieve optimal code generation for AMDGPU, we assume that divergence
+// To achieve optimal code generation for AMDGPU, we assume that uniformity
// analysis reports the PHI in join block of divergent branch as uniform if
// it has one unique uniform value plus additional undefined/poisoned incoming
// value. That is to say the later compiler pipeline will ensure such PHI always
@@ -56,7 +56,7 @@
// \---
#include "AMDGPU.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
@@ -81,11 +81,11 @@ public:
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LegacyDivergenceAnalysis>();
+ AU.addRequired<UniformityInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<LegacyDivergenceAnalysis>();
+ AU.addPreserved<UniformityInfoWrapperPass>();
AU.setPreservesCFG();
}
};
@@ -95,17 +95,17 @@ char AMDGPURewriteUndefForPHI::ID = 0;
INITIALIZE_PASS_BEGIN(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
"Rewrite undef for PHI", false, false)
-INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(AMDGPURewriteUndefForPHI, DEBUG_TYPE,
"Rewrite undef for PHI", false, false)
-bool rewritePHIs(Function &F, LegacyDivergenceAnalysis *DA, DominatorTree *DT) {
+bool rewritePHIs(Function &F, UniformityInfo &UA, DominatorTree *DT) {
bool Changed = false;
SmallVector<PHINode *> ToBeDeleted;
for (auto &BB : F) {
for (auto &PHI : BB.phis()) {
- if (DA->isDivergent(&PHI))
+ if (UA.isDivergent(&PHI))
continue;
// The unique incoming value except undef/poison for the PHI node.
@@ -147,7 +147,7 @@ bool rewritePHIs(Function &F, LegacyDivergenceAnalysis *DA, DominatorTree *DT) {
// TODO: We should still be able to replace undef value if the unique
// value is a Constant.
if (!UniqueDefinedIncoming || Undefs.empty() ||
- !DA->isDivergent(DominateBB->getTerminator()))
+ !UA.isDivergent(DominateBB->getTerminator()))
continue;
// We only replace the undef when DominateBB truly dominates all the
@@ -171,9 +171,10 @@ bool rewritePHIs(Function &F, LegacyDivergenceAnalysis *DA, DominatorTree *DT) {
}
bool AMDGPURewriteUndefForPHI::runOnFunction(Function &F) {
- LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
+ UniformityInfo &UA =
+ getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return rewritePHIs(F, DA, DT);
+ return rewritePHIs(F, UA, DT);
}
FunctionPass *llvm::createAMDGPURewriteUndefForPHIPass() {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
index ca714baffe3e..317f3f21d240 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSearchableTables.td
@@ -237,8 +237,6 @@ def : SourceOfDivergence<int_amdgcn_mbcnt_lo>;
def : SourceOfDivergence<int_r600_read_tidig_x>;
def : SourceOfDivergence<int_r600_read_tidig_y>;
def : SourceOfDivergence<int_r600_read_tidig_z>;
-def : SourceOfDivergence<int_amdgcn_atomic_inc>;
-def : SourceOfDivergence<int_amdgcn_atomic_dec>;
def : SourceOfDivergence<int_amdgcn_global_atomic_csub>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_global_atomic_fmin>;
@@ -279,6 +277,22 @@ def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_fmax>;
def : SourceOfDivergence<int_amdgcn_raw_buffer_atomic_cmpswap>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_swap>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_add>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_sub>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_smin>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_umin>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_smax>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_umax>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_and>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_or>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_xor>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_inc>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_dec>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fadd>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fmin>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_fmax>;
+def : SourceOfDivergence<int_amdgcn_raw_ptr_buffer_atomic_cmpswap>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_swap>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_add>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_sub>;
@@ -295,6 +309,22 @@ def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fadd>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmin>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_fmax>;
def : SourceOfDivergence<int_amdgcn_struct_buffer_atomic_cmpswap>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_swap>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_add>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_sub>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_smin>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_umin>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_smax>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_umax>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_and>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_or>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_xor>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_inc>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_dec>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fadd>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmin>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_fmax>;
+def : SourceOfDivergence<int_amdgcn_struct_ptr_buffer_atomic_cmpswap>;
def : SourceOfDivergence<int_amdgcn_buffer_atomic_csub>;
def : SourceOfDivergence<int_amdgcn_ps_live>;
def : SourceOfDivergence<int_amdgcn_live_mask>;
@@ -376,6 +406,26 @@ def : SourceOfDivergence<int_amdgcn_wmma_i32_16x16x16_iu4>;
def : SourceOfDivergence<int_amdgcn_if>;
def : SourceOfDivergence<int_amdgcn_else>;
def : SourceOfDivergence<int_amdgcn_loop>;
+def : SourceOfDivergence<int_amdgcn_inverse_ballot>;
foreach intr = AMDGPUImageDimAtomicIntrinsics in
def : SourceOfDivergence<intr>;
+
+class AlwaysUniform<Intrinsic intr> {
+ Intrinsic Intr = intr;
+}
+
+def UniformIntrinsics : GenericTable {
+ let FilterClass = "AlwaysUniform";
+ let Fields = ["Intr"];
+
+ let PrimaryKey = ["Intr"];
+ let PrimaryKeyName = "lookupAlwaysUniform";
+}
+
+def : AlwaysUniform<int_amdgcn_readfirstlane>;
+def : AlwaysUniform<int_amdgcn_readlane>;
+def : AlwaysUniform<int_amdgcn_icmp>;
+def : AlwaysUniform<int_amdgcn_fcmp>;
+def : AlwaysUniform<int_amdgcn_ballot>;
+def : AlwaysUniform<int_amdgcn_if_break>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 03ccd563975f..9b50f4fa53ac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -416,8 +416,9 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
return Requested;
}
-std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
- const Function &F, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
+std::pair<unsigned, unsigned> AMDGPUSubtarget::getEffectiveWavesPerEU(
+ std::pair<unsigned, unsigned> Requested,
+ std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
// Default minimum/maximum number of waves per execution unit.
std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
@@ -429,10 +430,6 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second);
Default.first = MinImpliedByFlatWorkGroupSize;
- // Requested minimum/maximum number of waves per execution unit.
- std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
- F, "amdgpu-waves-per-eu", Default, true);
-
// Make sure requested minimum is less than requested maximum.
if (Requested.second && Requested.first > Requested.second)
return Default;
@@ -450,6 +447,17 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
return Requested;
}
+std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
+ const Function &F, std::pair<unsigned, unsigned> FlatWorkGroupSizes) const {
+ // Default minimum/maximum number of waves per execution unit.
+ std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
+
+ // Requested minimum/maximum number of waves per execution unit.
+ std::pair<unsigned, unsigned> Requested =
+ AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", Default, true);
+ return getEffectiveWavesPerEU(Requested, FlatWorkGroupSizes);
+}
+
static unsigned getReqdWorkGroupSize(const Function &Kernel, unsigned Dim) {
auto Node = Kernel.getMetadata("reqd_work_group_size");
if (Node && Node->getNumOperands() == 3)
@@ -469,6 +477,15 @@ unsigned AMDGPUSubtarget::getMaxWorkitemID(const Function &Kernel,
return getFlatWorkGroupSizes(Kernel).second - 1;
}
+bool AMDGPUSubtarget::isSingleLaneExecution(const Function &Func) const {
+ for (int I = 0; I < 3; ++I) {
+ if (getMaxWorkitemID(Func, I) > 0)
+ return false;
+ }
+
+ return true;
+}
+
bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
Function *Kernel = I->getParent()->getParent();
unsigned MinSize = 0;
@@ -543,7 +560,9 @@ unsigned AMDGPUSubtarget::getImplicitArgNumBytes(const Function &F) const {
return 16;
// Assume all implicit inputs are used by default
- unsigned NBytes = (AMDGPU::getAmdhsaCodeObjectVersion() >= 5) ? 256 : 56;
+ const Module *M = F.getParent();
+ unsigned NBytes =
+ AMDGPU::getCodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5 ? 256 : 56;
return F.getFnAttributeAsParsedInteger("amdgpu-implicitarg-num-bytes",
NBytes);
}
@@ -572,9 +591,13 @@ uint64_t AMDGPUSubtarget::getExplicitKernArgSize(const Function &F,
unsigned AMDGPUSubtarget::getKernArgSegmentSize(const Function &F,
Align &MaxAlign) const {
+ if (F.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
+ F.getCallingConv() != CallingConv::SPIR_KERNEL)
+ return 0;
+
uint64_t ExplicitArgBytes = getExplicitKernArgSize(F, MaxAlign);
- unsigned ExplicitOffset = getExplicitKernelArgOffset(F);
+ unsigned ExplicitOffset = getExplicitKernelArgOffset();
uint64_t TotalSize = ExplicitOffset + ExplicitArgBytes;
unsigned ImplicitBytes = getImplicitArgNumBytes(F);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 972f996ad85a..10ce00fe68ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -14,9 +14,9 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/Support/Alignment.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
@@ -61,6 +61,7 @@ protected:
bool HasFminFmaxLegacy = true;
bool EnablePromoteAlloca = false;
bool HasTrigReducedRange = false;
+ bool FastFMAF32 = false;
unsigned EUsPerCU = 4;
unsigned MaxWavesPerEU = 10;
unsigned LocalMemorySize = 0;
@@ -107,6 +108,9 @@ public:
std::pair<unsigned, unsigned>
getWavesPerEU(const Function &F,
std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
+ std::pair<unsigned, unsigned> getEffectiveWavesPerEU(
+ std::pair<unsigned, unsigned> WavesPerEU,
+ std::pair<unsigned, unsigned> FlatWorkGroupSizes) const;
/// Return the amount of LDS that can be used that will not restrict the
/// occupancy lower than WaveCount.
@@ -195,6 +199,10 @@ public:
return HasTrigReducedRange;
}
+ bool hasFastFMAF32() const {
+ return FastFMAF32;
+ }
+
bool isPromoteAllocaEnabled() const {
return EnablePromoteAlloca;
}
@@ -226,7 +234,7 @@ public:
/// Returns the offset in bytes from the start of the input buffer
/// of the first explicit kernel argument.
- unsigned getExplicitKernelArgOffset(const Function &F) const {
+ unsigned getExplicitKernelArgOffset() const {
switch (TargetTriple.getOS()) {
case Triple::AMDHSA:
case Triple::AMDPAL:
@@ -269,6 +277,9 @@ public:
/// 2) dimension.
unsigned getMaxWorkitemID(const Function &Kernel, unsigned Dimension) const;
+ /// Return true if only a single workitem can be active in a wave.
+ bool isSingleLaneExecution(const Function &Kernel) const;
+
/// Creates value range metadata on an workitemid.* intrinsic call or load.
bool makeLIDRangeMetadata(Instruction *I) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 5694acf40527..f90c8e4bdddd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -19,8 +19,10 @@
#include "AMDGPUExportClustering.h"
#include "AMDGPUIGroupLP.h"
#include "AMDGPUMacroFusion.h"
+#include "AMDGPURegBankSelect.h"
#include "AMDGPUTargetObjectFile.h"
#include "AMDGPUTargetTransformInfo.h"
+#include "AMDGPUUnifyDivergentExitNodes.h"
#include "GCNIterativeScheduler.h"
#include "GCNSchedStrategy.h"
#include "GCNVOPDUtils.h"
@@ -43,7 +45,6 @@
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
@@ -58,7 +59,7 @@
#include "llvm/Transforms/Scalar/InferAddressSpaces.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
-#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include <optional>
using namespace llvm;
@@ -188,6 +189,11 @@ OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
cl::desc("Run pre-RA exec mask optimizations"),
cl::init(true));
+static cl::opt<bool>
+ LowerCtorDtor("amdgpu-lower-global-ctor-dtor",
+ cl::desc("Lower GPU ctor / dtors to globals on the device."),
+ cl::init(true), cl::Hidden);
+
// Option to disable vectorizer for tests.
static cl::opt<bool> EnableLoadStoreVectorizer(
"amdgpu-load-store-vectorizer",
@@ -216,6 +222,12 @@ static cl::opt<bool> EarlyInlineAll(
cl::init(false),
cl::Hidden);
+static cl::opt<bool> RemoveIncompatibleFunctions(
+ "amdgpu-enable-remove-incompatible-functions", cl::Hidden,
+ cl::desc("Enable removal of functions when they"
+ "use features not supported by the target GPU"),
+ cl::init(true));
+
static cl::opt<bool> EnableSDWAPeephole(
"amdgpu-sdwa-peephole",
cl::desc("Enable SDWA peepholer"),
@@ -262,12 +274,15 @@ static cl::opt<bool> OptVGPRLiveRange(
cl::desc("Enable VGPR liverange optimizations for if-else structure"),
cl::init(true), cl::Hidden);
-// Enable atomic optimization
-static cl::opt<bool> EnableAtomicOptimizations(
- "amdgpu-atomic-optimizations",
- cl::desc("Enable atomic optimizations"),
- cl::init(false),
- cl::Hidden);
+static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy(
+ "amdgpu-atomic-optimizer-strategy",
+ cl::desc("Select DPP or Iterative strategy for scan"),
+ cl::init(ScanOptions::Iterative),
+ cl::values(
+ clEnumValN(ScanOptions::DPP, "DPP", "Use DPP operations for scan"),
+ clEnumValN(ScanOptions::Iterative, "Iterative",
+ "Use Iterative approach for scan"),
+ clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer")));
// Enable Mode register optimization
static cl::opt<bool> EnableSIModeRegisterPass(
@@ -309,11 +324,6 @@ static cl::opt<bool> EnableStructurizerWorkarounds(
cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
cl::Hidden);
-static cl::opt<bool> EnableLDSReplaceWithPointer(
- "amdgpu-enable-lds-replace-with-pointer",
- cl::desc("Enable LDS replace with pointer pass"), cl::init(false),
- cl::Hidden);
-
static cl::opt<bool, true> EnableLowerModuleLDS(
"amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true),
@@ -334,9 +344,14 @@ static cl::opt<bool> EnableMaxIlpSchedStrategy(
cl::desc("Enable scheduling strategy to maximize ILP for a single wave."),
cl::Hidden, cl::init(false));
+static cl::opt<bool> EnableRewritePartialRegUses(
+ "amdgpu-enable-rewrite-partial-reg-uses",
+ cl::desc("Enable rewrite partial reg uses pass"), cl::init(false),
+ cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
// Register the target
- RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
+ RegisterTargetMachine<R600TargetMachine> X(getTheR600Target());
RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());
PassRegistry *PR = PassRegistry::getPassRegistry();
@@ -349,6 +364,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUDAGToDAGISelPass(*PR);
initializeGCNDPPCombinePass(*PR);
initializeSILowerI1CopiesPass(*PR);
+ initializeSILowerWWMCopiesPass(*PR);
initializeSILowerSGPRSpillsPass(*PR);
initializeSIFixSGPRCopiesPass(*PR);
initializeSIFixVGPRCopiesPass(*PR);
@@ -368,24 +384,21 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPULowerKernelArgumentsPass(*PR);
initializeAMDGPUPromoteKernelArgumentsPass(*PR);
initializeAMDGPULowerKernelAttributesPass(*PR);
- initializeAMDGPULowerIntrinsicsPass(*PR);
initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
initializeAMDGPUPostLegalizerCombinerPass(*PR);
initializeAMDGPUPreLegalizerCombinerPass(*PR);
initializeAMDGPURegBankCombinerPass(*PR);
+ initializeAMDGPURegBankSelectPass(*PR);
initializeAMDGPUPromoteAllocaPass(*PR);
initializeAMDGPUPromoteAllocaToVectorPass(*PR);
initializeAMDGPUCodeGenPreparePass(*PR);
initializeAMDGPULateCodeGenPreparePass(*PR);
- initializeAMDGPUPropagateAttributesEarlyPass(*PR);
- initializeAMDGPUPropagateAttributesLatePass(*PR);
- initializeAMDGPUReplaceLDSUseWithPointerPass(*PR);
+ initializeAMDGPURemoveIncompatibleFunctionsPass(*PR);
initializeAMDGPULowerModuleLDSPass(*PR);
initializeAMDGPURewriteOutArgumentsPass(*PR);
initializeAMDGPURewriteUndefForPHIPass(*PR);
initializeAMDGPUUnifyMetadataPass(*PR);
initializeSIAnnotateControlFlowPass(*PR);
- initializeAMDGPUReleaseVGPRsPass(*PR);
initializeAMDGPUInsertDelayAluPass(*PR);
initializeSIInsertHardClausesPass(*PR);
initializeSIInsertWaitcntsPass(*PR);
@@ -409,6 +422,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeAMDGPUResourceUsageAnalysisPass(*PR);
initializeGCNNSAReassignPass(*PR);
initializeGCNPreRAOptimizationsPass(*PR);
+ initializeGCNPreRALongBranchRegPass(*PR);
+ initializeGCNRewritePartialRegUsesPass(*PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -505,11 +520,15 @@ static StringRef computeDataLayout(const Triple &TT) {
}
// 32-bit private, local, and region pointers. 64-bit global, constant and
- // flat, non-integral buffer fat pointers.
+ // flat. 160-bit non-integral fat buffer pointers that include a 128-bit
+ // buffer descriptor and a 32-bit offset, which are indexed by 32-bit values
+ // (address space 7), and 128-bit non-integral buffer resourcees (address
+ // space 8) which cannot be non-trivilally accessed by LLVM memory operations
+ // like getelementptr.
return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
- "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
- "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
- "-ni:7";
+ "-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:"
+ "128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-"
+ "G1-ni:7:8";
}
LLVM_READNONE
@@ -584,12 +603,8 @@ void AMDGPUTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineParsingCallback(
- [this](StringRef PassName, ModulePassManager &PM,
- ArrayRef<PassBuilder::PipelineElement>) {
- if (PassName == "amdgpu-propagate-attributes-late") {
- PM.addPass(AMDGPUPropagateAttributesLatePass(*this));
- return true;
- }
+ [](StringRef PassName, ModulePassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
if (PassName == "amdgpu-unify-metadata") {
PM.addPass(AMDGPUUnifyMetadataPass());
return true;
@@ -602,10 +617,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(AMDGPUAlwaysInlinePass());
return true;
}
- if (PassName == "amdgpu-replace-lds-use-with-pointer") {
- PM.addPass(AMDGPUReplaceLDSUseWithPointerPass());
- return true;
- }
if (PassName == "amdgpu-lower-module-lds") {
PM.addPass(AMDGPULowerModuleLDSPass());
return true;
@@ -639,14 +650,23 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(AMDGPULowerKernelAttributesPass());
return true;
}
- if (PassName == "amdgpu-propagate-attributes-early") {
- PM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
- return true;
- }
if (PassName == "amdgpu-promote-kernel-arguments") {
PM.addPass(AMDGPUPromoteKernelArgumentsPass());
return true;
}
+ if (PassName == "amdgpu-unify-divergent-exit-nodes") {
+ PM.addPass(AMDGPUUnifyDivergentExitNodesPass());
+ return true;
+ }
+ if (PassName == "amdgpu-atomic-optimizer") {
+ PM.addPass(
+ AMDGPUAtomicOptimizerPass(*this, AMDGPUAtomicOptimizerStrategy));
+ return true;
+ }
+ if (PassName == "amdgpu-codegenprepare") {
+ PM.addPass(AMDGPUCodeGenPreparePass(*this));
+ return true;
+ }
return false;
});
@@ -665,7 +685,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineStartEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
- FPM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
FPM.addPass(AMDGPUUseNativeCallsPass());
if (EnableLibCallSimplify && Level != OptimizationLevel::O0)
FPM.addPass(AMDGPUSimplifyLibCallsPass(*this));
@@ -673,20 +692,19 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
PB.registerPipelineEarlySimplificationEPCallback(
- [this](ModulePassManager &PM, OptimizationLevel Level) {
+ [](ModulePassManager &PM, OptimizationLevel Level) {
+ PM.addPass(AMDGPUPrintfRuntimeBindingPass());
+
if (Level == OptimizationLevel::O0)
return;
PM.addPass(AMDGPUUnifyMetadataPass());
- PM.addPass(AMDGPUPrintfRuntimeBindingPass());
if (InternalizeSymbols) {
PM.addPass(InternalizePass(mustPreserveGV));
- }
- PM.addPass(AMDGPUPropagateAttributesLatePass(*this));
- if (InternalizeSymbols) {
PM.addPass(GlobalDCEPass());
}
+
if (EarlyInlineAll && !EnableFunctionCalls)
PM.addPass(AMDGPUAlwaysInlinePass());
});
@@ -932,7 +950,6 @@ void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
}
void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
- addPass(createLICMPass());
addPass(createSeparateConstOffsetFromGEPPass());
// ReassociateGEPs exposes more opportunities for SLSR. See
// the example in reassociate-geps-and-slsr.ll.
@@ -956,22 +973,12 @@ void AMDGPUPassConfig::addIRPasses() {
disablePass(&PatchableFunctionID);
addPass(createAMDGPUPrintfRuntimeBinding());
- addPass(createAMDGPUCtorDtorLoweringLegacyPass());
-
- // A call to propagate attributes pass in the backend in case opt was not run.
- addPass(createAMDGPUPropagateAttributesEarlyPass(&TM));
-
- addPass(createAMDGPULowerIntrinsicsPass());
+ if (LowerCtorDtor)
+ addPass(createAMDGPUCtorDtorLoweringLegacyPass());
// Function calls are not supported, so make sure we inline everything.
addPass(createAMDGPUAlwaysInlinePass());
addPass(createAlwaysInlinerLegacyPass());
- // We need to add the barrier noop pass, otherwise adding the function
- // inlining pass will cause all of the PassConfigs passes to be run
- // one function at a time, which means if we have a module with two
- // functions, then we will generate code for the first function
- // without ever running any passes on the second.
- addPass(createBarrierNoopPass());
// Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
if (TM.getTargetTriple().getArch() == Triple::r600)
@@ -980,17 +987,16 @@ void AMDGPUPassConfig::addIRPasses() {
// Replace OpenCL enqueued block function pointers with global variables.
addPass(createAMDGPUOpenCLEnqueuedBlockLoweringPass());
- // Can increase LDS used by kernel so runs before PromoteAlloca
+ // Runs before PromoteAlloca so the latter can account for function uses
if (EnableLowerModuleLDS) {
- // The pass "amdgpu-replace-lds-use-with-pointer" need to be run before the
- // pass "amdgpu-lower-module-lds", and also it required to be run only if
- // "amdgpu-lower-module-lds" pass is enabled.
- if (EnableLDSReplaceWithPointer)
- addPass(createAMDGPUReplaceLDSUseWithPointerPass());
-
addPass(createAMDGPULowerModuleLDSPass());
}
+ // AMDGPUAttributor infers lack of llvm.amdgcn.lds.kernel.id calls, so run
+ // after their introduction
+ if (TM.getOptLevel() > CodeGenOpt::None)
+ addPass(createAMDGPUAttributorPass());
+
if (TM.getOptLevel() > CodeGenOpt::None)
addPass(createInferAddressSpacesPass());
@@ -1017,6 +1023,11 @@ void AMDGPUPassConfig::addIRPasses() {
// TODO: May want to move later or split into an early and late one.
addPass(createAMDGPUCodeGenPreparePass());
}
+
+ // Try to hoist loop invariant parts of divisions AMDGPUCodeGenPrepare may
+ // have expanded.
+ if (TM.getOptLevel() > CodeGenOpt::Less)
+ addPass(createLICMPass());
}
TargetPassConfig::addIRPasses();
@@ -1039,7 +1050,8 @@ void AMDGPUPassConfig::addIRPasses() {
void AMDGPUPassConfig::addCodeGenPrepare() {
if (TM->getTargetTriple().getArch() == Triple::amdgcn) {
- addPass(createAMDGPUAttributorPass());
+ if (RemoveIncompatibleFunctions)
+ addPass(createAMDGPURemoveIncompatibleFunctionsPass(TM));
// FIXME: This pass adds 2 hacky attributes that can be replaced with an
// analysis, and should be removed.
@@ -1117,8 +1129,9 @@ bool GCNPassConfig::addPreISel() {
if (TM->getOptLevel() > CodeGenOpt::None)
addPass(createAMDGPULateCodeGenPreparePass());
- if (isPassEnabled(EnableAtomicOptimizations, CodeGenOpt::Less)) {
- addPass(createAMDGPUAtomicOptimizerPass());
+ if ((TM->getOptLevel() >= CodeGenOpt::Less) &&
+ (AMDGPUAtomicOptimizerStrategy != ScanOptions::None)) {
+ addPass(createAMDGPUAtomicOptimizerPass(AMDGPUAtomicOptimizerStrategy));
}
if (TM->getOptLevel() > CodeGenOpt::None)
@@ -1211,7 +1224,7 @@ void GCNPassConfig::addPreRegBankSelect() {
}
bool GCNPassConfig::addRegBankSelect() {
- addPass(new RegBankSelect());
+ addPass(new AMDGPURegBankSelect());
return false;
}
@@ -1255,6 +1268,9 @@ void GCNPassConfig::addOptimizedRegAlloc() {
if (OptExecMaskPreRA)
insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
+ if (EnableRewritePartialRegUses)
+ insertPass(&RenameIndependentSubregsID, &GCNRewritePartialRegUsesID);
+
if (isPassEnabled(EnablePreRAOptimizations))
insertPass(&RenameIndependentSubregsID, &GCNPreRAOptimizationsID);
@@ -1281,6 +1297,7 @@ void GCNPassConfig::addOptimizedRegAlloc() {
}
bool GCNPassConfig::addPreRewrite() {
+ addPass(&SILowerWWMCopiesID);
if (EnableRegReassign)
addPass(&GCNNSAReassignID);
return true;
@@ -1327,12 +1344,16 @@ bool GCNPassConfig::addRegAssignAndRewriteFast() {
if (!usingDefaultRegAlloc())
report_fatal_error(RegAllocOptNotSupportedMessage);
+ addPass(&GCNPreRALongBranchRegID);
+
addPass(createSGPRAllocPass(false));
// Equivalent of PEI for SGPRs.
addPass(&SILowerSGPRSpillsID);
addPass(createVGPRAllocPass(false));
+
+ addPass(&SILowerWWMCopiesID);
return true;
}
@@ -1340,6 +1361,8 @@ bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
if (!usingDefaultRegAlloc())
report_fatal_error(RegAllocOptNotSupportedMessage);
+ addPass(&GCNPreRALongBranchRegID);
+
addPass(createSGPRAllocPass(true));
// Commit allocated register changes. This is mostly necessary because too
@@ -1398,9 +1421,6 @@ void GCNPassConfig::addPreEmitPass() {
// cases.
addPass(&PostRAHazardRecognizerID);
- if (getOptLevel() > CodeGenOpt::Less)
- addPass(&AMDGPUReleaseVGPRsID);
-
if (isPassEnabled(EnableInsertDelayAlu, CodeGenOpt::Less))
addPass(&AMDGPUInsertDelayAluID);
@@ -1411,6 +1431,12 @@ TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
return new GCNPassConfig(*this, PM);
}
+void GCNTargetMachine::registerMachineRegisterInfoCallback(
+ MachineFunction &MF) const {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ MF.getRegInfo().addDelegate(MFI);
+}
+
MachineFunctionInfo *GCNTargetMachine::createMachineFunctionInfo(
BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const {
@@ -1465,6 +1491,13 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))
return true;
+ if (parseOptionalRegister(YamlMFI.SGPRForEXECCopy, MFI->SGPRForEXECCopy))
+ return true;
+
+ if (parseOptionalRegister(YamlMFI.LongBranchReservedReg,
+ MFI->LongBranchReservedReg))
+ return true;
+
auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
// Create a diagnostic for a the register string literal.
const MemoryBuffer &Buffer =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index ce93704b78f4..2426be405a65 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -92,6 +92,8 @@ public:
return true;
}
+ void registerMachineRegisterInfoCallback(MachineFunction &MF) const override;
+
MachineFunctionInfo *
createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 0c3324f84b25..81d083c1c88a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -17,8 +17,11 @@
#include "AMDGPUTargetTransformInfo.h"
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIModeRegisterDefaults.h"
+#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/PatternMatch.h"
@@ -49,11 +52,6 @@ static cl::opt<bool> UnrollRuntimeLocal(
cl::desc("Allow runtime unroll for AMDGPU if local memory used in a loop"),
cl::init(true), cl::Hidden);
-static cl::opt<bool> UseLegacyDA(
- "amdgpu-use-legacy-divergence-analysis",
- cl::desc("Enable legacy divergence analysis for AMDGPU"),
- cl::init(false), cl::Hidden);
-
static cl::opt<unsigned> UnrollMaxBlockToAnalyze(
"amdgpu-unroll-max-block-to-analyze",
cl::desc("Inner loop block size threshold to analyze in unroll for AMDGPU"),
@@ -115,6 +113,9 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// manipulations in average.
UP.BEInsns += 3;
+ // We want to run unroll even for the loops which have been vectorized.
+ UP.UnrollVectorizedLoop = true;
+
// TODO: Do we want runtime unrolling?
// Maximum alloca size than can fit registers. Reserve 16 registers.
@@ -266,6 +267,10 @@ void AMDGPUTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
BaseT::getPeelingPreferences(L, SE, PP);
}
+int64_t AMDGPUTTIImpl::getMaxMemIntrinsicInlineSizeThreshold() const {
+ return 1024;
+}
+
const FeatureBitset GCNTTIImpl::InlineFeatureIgnoreList = {
// Codegen control options which don't matter.
AMDGPU::FeatureEnableLoadStoreOpt, AMDGPU::FeatureEnableSIScheduler,
@@ -291,9 +296,14 @@ GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
ST(static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F))),
TLI(ST->getTargetLowering()), CommonTTI(TM, F),
IsGraphics(AMDGPU::isGraphics(F.getCallingConv())) {
- AMDGPU::SIModeRegisterDefaults Mode(F);
- HasFP32Denormals = Mode.allFP32Denormals();
- HasFP64FP16Denormals = Mode.allFP64FP16Denormals();
+ SIModeRegisterDefaults Mode(F);
+ HasFP32Denormals = Mode.FP32Denormals != DenormalMode::getPreserveSign();
+ HasFP64FP16Denormals =
+ Mode.FP64FP16Denormals != DenormalMode::getPreserveSign();
+}
+
+bool GCNTTIImpl::hasBranchDivergence(const Function *F) const {
+ return !F || !ST->isSingleLaneExecution(*F);
}
unsigned GCNTTIImpl::getNumberOfRegisters(unsigned RCID) const {
@@ -357,7 +367,8 @@ unsigned GCNTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
if (AddrSpace == AMDGPUAS::GLOBAL_ADDRESS ||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
- AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER) {
+ AddrSpace == AMDGPUAS::BUFFER_FAT_POINTER ||
+ AddrSpace == AMDGPUAS::BUFFER_RESOURCE) {
return 512;
}
@@ -393,6 +404,10 @@ bool GCNTTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
}
+int64_t GCNTTIImpl::getMaxMemIntrinsicInlineSizeThreshold() const {
+ return 1024;
+}
+
// FIXME: Really we would like to issue multiple 128-bit loads and stores per
// iteration. Should we report a larger size and let it legalize?
//
@@ -472,10 +487,10 @@ void GCNTTIImpl::getMemcpyLoopResidualLoweringType(
}
}
-unsigned GCNTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned GCNTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
// Disable unrolling if the loop is not vectorized.
// TODO: Enable this again.
- if (VF == 1)
+ if (VF.isScalar())
return 1;
return 8;
@@ -484,8 +499,6 @@ unsigned GCNTTIImpl::getMaxInterleaveFactor(unsigned VF) {
bool GCNTTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
MemIntrinsicInfo &Info) const {
switch (Inst->getIntrinsicID()) {
- case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_fadd:
@@ -775,15 +788,15 @@ GCNTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
}
InstructionCost
-GCNTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned,
+GCNTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
EVT OrigTy = TLI->getValueType(DL, Ty);
// Computes cost on targets that have packed math instructions(which support
// 16-bit types only).
if (!ST->hasVOP3PInsts() || OrigTy.getScalarSizeInBits() != 16)
- return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
+ return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
return LT.first * getHalfRateInstrCost(CostKind);
@@ -857,11 +870,6 @@ bool GCNTTIImpl::isInlineAsmSourceOfDivergence(
return false;
}
-/// \returns true if the new GPU divergence analysis is enabled.
-bool GCNTTIImpl::useGPUDivergenceAnalysis() const {
- return !UseLegacyDA;
-}
-
bool GCNTTIImpl::isReadRegisterSourceOfDivergence(
const IntrinsicInst *ReadReg) const {
Metadata *MD =
@@ -928,19 +936,8 @@ bool GCNTTIImpl::isSourceOfDivergence(const Value *V) const {
}
bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
- if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V)) {
- switch (Intrinsic->getIntrinsicID()) {
- default:
- return false;
- case Intrinsic::amdgcn_readfirstlane:
- case Intrinsic::amdgcn_readlane:
- case Intrinsic::amdgcn_icmp:
- case Intrinsic::amdgcn_fcmp:
- case Intrinsic::amdgcn_ballot:
- case Intrinsic::amdgcn_if_break:
- return true;
- }
- }
+ if (const IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(V))
+ return AMDGPU::isIntrinsicAlwaysUniform(Intrinsic->getIntrinsicID());
if (const CallInst *CI = dyn_cast<CallInst>(V)) {
if (CI->isInlineAsm())
@@ -1012,8 +1009,6 @@ bool GCNTTIImpl::isAlwaysUniform(const Value *V) const {
bool GCNTTIImpl::collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
Intrinsic::ID IID) const {
switch (IID) {
- case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax:
@@ -1034,8 +1029,6 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
Value *NewV) const {
auto IntrID = II->getIntrinsicID();
switch (IntrID) {
- case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_fadd:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
@@ -1099,9 +1092,12 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
case Intrinsic::amdgcn_flat_atomic_fadd:
case Intrinsic::amdgcn_flat_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fmin: {
- Module *M = II->getParent()->getParent()->getParent();
Type *DestTy = II->getType();
Type *SrcTy = NewV->getType();
+ unsigned NewAS = SrcTy->getPointerAddressSpace();
+ if (!AMDGPU::isExtendedGlobalAddrSpace(NewAS))
+ return nullptr;
+ Module *M = II->getModule();
Function *NewDecl = Intrinsic::getDeclaration(M, II->getIntrinsicID(),
{DestTy, SrcTy, DestTy});
II->setArgOperand(0, NewV);
@@ -1157,8 +1153,8 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
// FIXME: dx10_clamp can just take the caller setting, but there seems to be
// no way to support merge for backend defined attributes.
- AMDGPU::SIModeRegisterDefaults CallerMode(*Caller);
- AMDGPU::SIModeRegisterDefaults CalleeMode(*Callee);
+ SIModeRegisterDefaults CallerMode(*Caller);
+ SIModeRegisterDefaults CalleeMode(*Callee);
if (!CallerMode.isInlineCompatible(CalleeMode))
return false;
@@ -1178,34 +1174,129 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller,
return true;
}
-unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
- // If we have a pointer to private array passed into a function
+static unsigned adjustInliningThresholdUsingCallee(const CallBase *CB,
+ const SITargetLowering *TLI,
+ const GCNTTIImpl *TTIImpl) {
+ const int NrOfSGPRUntilSpill = 26;
+ const int NrOfVGPRUntilSpill = 32;
+
+ const DataLayout &DL = TTIImpl->getDataLayout();
+
+ unsigned adjustThreshold = 0;
+ int SGPRsInUse = 0;
+ int VGPRsInUse = 0;
+ for (const Use &A : CB->args()) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(*TLI, DL, A.get()->getType(), ValueVTs);
+ for (auto ArgVT : ValueVTs) {
+ unsigned CCRegNum = TLI->getNumRegistersForCallingConv(
+ CB->getContext(), CB->getCallingConv(), ArgVT);
+ if (AMDGPU::isArgPassedInSGPR(CB, CB->getArgOperandNo(&A)))
+ SGPRsInUse += CCRegNum;
+ else
+ VGPRsInUse += CCRegNum;
+ }
+ }
+
+ // The cost of passing function arguments through the stack:
+ // 1 instruction to put a function argument on the stack in the caller.
+ // 1 instruction to take a function argument from the stack in callee.
+ // 1 instruction is explicitly take care of data dependencies in callee
+ // function.
+ InstructionCost ArgStackCost(1);
+ ArgStackCost += const_cast<GCNTTIImpl *>(TTIImpl)->getMemoryOpCost(
+ Instruction::Store, Type::getInt32Ty(CB->getContext()), Align(4),
+ AMDGPUAS::PRIVATE_ADDRESS, TTI::TCK_SizeAndLatency);
+ ArgStackCost += const_cast<GCNTTIImpl *>(TTIImpl)->getMemoryOpCost(
+ Instruction::Load, Type::getInt32Ty(CB->getContext()), Align(4),
+ AMDGPUAS::PRIVATE_ADDRESS, TTI::TCK_SizeAndLatency);
+
+ // The penalty cost is computed relative to the cost of instructions and does
+ // not model any storage costs.
+ adjustThreshold += std::max(0, SGPRsInUse - NrOfSGPRUntilSpill) *
+ *ArgStackCost.getValue() * InlineConstants::getInstrCost();
+ adjustThreshold += std::max(0, VGPRsInUse - NrOfVGPRUntilSpill) *
+ *ArgStackCost.getValue() * InlineConstants::getInstrCost();
+ return adjustThreshold;
+}
+
+static unsigned getCallArgsTotalAllocaSize(const CallBase *CB,
+ const DataLayout &DL) {
+ // If we have a pointer to a private array passed into a function
// it will not be optimized out, leaving scratch usage.
- // Increase the inline threshold to allow inlining in this case.
- uint64_t AllocaSize = 0;
+ // This function calculates the total size in bytes of the memory that would
+ // end in scratch if the call was not inlined.
+ unsigned AllocaSize = 0;
SmallPtrSet<const AllocaInst *, 8> AIVisited;
for (Value *PtrArg : CB->args()) {
PointerType *Ty = dyn_cast<PointerType>(PtrArg->getType());
- if (!Ty || (Ty->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS &&
- Ty->getAddressSpace() != AMDGPUAS::FLAT_ADDRESS))
+ if (!Ty)
continue;
- PtrArg = getUnderlyingObject(PtrArg);
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(PtrArg)) {
- if (!AI->isStaticAlloca() || !AIVisited.insert(AI).second)
- continue;
- AllocaSize += DL.getTypeAllocSize(AI->getAllocatedType());
- // If the amount of stack memory is excessive we will not be able
- // to get rid of the scratch anyway, bail out.
- if (AllocaSize > ArgAllocaCutoff) {
- AllocaSize = 0;
- break;
- }
- }
+ unsigned AddrSpace = Ty->getAddressSpace();
+ if (AddrSpace != AMDGPUAS::FLAT_ADDRESS &&
+ AddrSpace != AMDGPUAS::PRIVATE_ADDRESS)
+ continue;
+
+ const AllocaInst *AI = dyn_cast<AllocaInst>(getUnderlyingObject(PtrArg));
+ if (!AI || !AI->isStaticAlloca() || !AIVisited.insert(AI).second)
+ continue;
+
+ AllocaSize += DL.getTypeAllocSize(AI->getAllocatedType());
}
- if (AllocaSize)
- return ArgAllocaCost;
- return 0;
+ return AllocaSize;
+}
+
+unsigned GCNTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
+ unsigned Threshold = adjustInliningThresholdUsingCallee(CB, TLI, this);
+
+ // Private object passed as arguments may end up in scratch usage if the call
+ // is not inlined. Increase the inline threshold to promote inlining.
+ unsigned AllocaSize = getCallArgsTotalAllocaSize(CB, DL);
+ if (AllocaSize > 0)
+ Threshold += ArgAllocaCost;
+ return Threshold;
+}
+
+unsigned GCNTTIImpl::getCallerAllocaCost(const CallBase *CB,
+ const AllocaInst *AI) const {
+
+ // Below the cutoff, assume that the private memory objects would be
+ // optimized
+ auto AllocaSize = getCallArgsTotalAllocaSize(CB, DL);
+ if (AllocaSize <= ArgAllocaCutoff)
+ return 0;
+
+ // Above the cutoff, we give a cost to each private memory object
+ // depending its size. If the array can be optimized by SROA this cost is not
+ // added to the total-cost in the inliner cost analysis.
+ //
+ // We choose the total cost of the alloca such that their sum cancels the
+ // bonus given in the threshold (ArgAllocaCost).
+ //
+ // Cost_Alloca_0 + ... + Cost_Alloca_N == ArgAllocaCost
+ //
+ // Awkwardly, the ArgAllocaCost bonus is multiplied by threshold-multiplier,
+ // the single-bb bonus and the vector-bonus.
+ //
+ // We compensate the first two multipliers, by repeating logic from the
+ // inliner-cost in here. The vector-bonus is 0 on AMDGPU.
+ static_assert(InlinerVectorBonusPercent == 0, "vector bonus assumed to be 0");
+ unsigned Threshold = ArgAllocaCost * getInliningThresholdMultiplier();
+
+ bool SingleBB = none_of(*CB->getCalledFunction(), [](const BasicBlock &BB) {
+ return BB.getTerminator()->getNumSuccessors() > 1;
+ });
+ if (SingleBB) {
+ Threshold += Threshold / 2;
+ }
+
+ auto ArgAllocaSize = DL.getTypeAllocSize(AI->getAllocatedType());
+
+ // Attribute the bonus proportionally to the alloca size
+ unsigned AllocaThresholdBonus = (Threshold * ArgAllocaSize) / AllocaSize;
+
+ return AllocaThresholdBonus;
}
void GCNTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index 7862f21cfc35..1e6c5bbfc0d7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -55,6 +55,8 @@ public:
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
+
+ int64_t getMaxMemIntrinsicInlineSizeThreshold() const;
};
class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
@@ -69,6 +71,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
bool IsGraphics;
bool HasFP32Denormals;
bool HasFP64FP16Denormals;
+ static constexpr bool InlinerVectorBonusPercent = 0;
static const FeatureBitset InlineFeatureIgnoreList;
@@ -100,8 +103,7 @@ class GCNTTIImpl final : public BasicTTIImplBase<GCNTTIImpl> {
public:
explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F);
- bool hasBranchDivergence() { return true; }
- bool useGPUDivergenceAnalysis() const;
+ bool hasBranchDivergence(const Function *F = nullptr) const;
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
@@ -133,6 +135,8 @@ public:
unsigned AddrSpace) const;
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
+
+ int64_t getMaxMemIntrinsicInlineSizeThreshold() const;
Type *getMemcpyLoopLoweringType(
LLVMContext & Context, Value * Length, unsigned SrcAddrSpace,
unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
@@ -143,7 +147,7 @@ public:
unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign,
std::optional<uint32_t> AtomicCpySize) const;
- unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getMaxInterleaveFactor(ElementCount VF);
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
@@ -169,6 +173,32 @@ public:
bool isSourceOfDivergence(const Value *V) const;
bool isAlwaysUniform(const Value *V) const;
+ bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
+ if (ToAS == AMDGPUAS::FLAT_ADDRESS) {
+ switch (FromAS) {
+ case AMDGPUAS::GLOBAL_ADDRESS:
+ case AMDGPUAS::CONSTANT_ADDRESS:
+ case AMDGPUAS::CONSTANT_ADDRESS_32BIT:
+ case AMDGPUAS::LOCAL_ADDRESS:
+ case AMDGPUAS::PRIVATE_ADDRESS:
+ return true;
+ default:
+ break;
+ }
+ return false;
+ }
+ if ((FromAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
+ ToAS == AMDGPUAS::CONSTANT_ADDRESS) ||
+ (FromAS == AMDGPUAS::CONSTANT_ADDRESS &&
+ ToAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT))
+ return true;
+ return false;
+ }
+
+ bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
+ return AMDGPU::addrspacesMayAlias(AS0, AS1);
+ }
+
unsigned getFlatAddressSpace() const {
// Don't bother running InferAddressSpaces pass on graphics shaders which
// don't use flat addressing.
@@ -188,8 +218,8 @@ public:
Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const;
- bool canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
- InstCombiner &IC) const;
+ bool canSimplifyLegacyMulToMul(const Instruction &I, const Value *Op0,
+ const Value *Op1, InstCombiner &IC) const;
std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
IntrinsicInst &II) const;
std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
@@ -209,10 +239,11 @@ public:
bool areInlineCompatible(const Function *Caller,
const Function *Callee) const;
- unsigned getInliningThresholdMultiplier() { return 11; }
+ unsigned getInliningThresholdMultiplier() const { return 11; }
unsigned adjustInliningThreshold(const CallBase *CB) const;
+ unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const;
- int getInlinerVectorBonusPercent() { return 0; }
+ int getInlinerVectorBonusPercent() const { return InlinerVectorBonusPercent; }
InstructionCost getArithmeticReductionCost(
unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
@@ -220,9 +251,9 @@ public:
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
- InstructionCost getMinMaxReductionCost(
- VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
- TTI::TargetCostKind CostKind);
+ InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
+ TTI::TargetCostKind CostKind);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index c27e69a0bcbb..9ad841c3c8a5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -19,6 +19,7 @@
//
//===----------------------------------------------------------------------===//
+#include "AMDGPUUnifyDivergentExitNodes.h"
#include "AMDGPU.h"
#include "SIDefines.h"
#include "llvm/ADT/ArrayRef.h"
@@ -26,9 +27,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -53,25 +54,33 @@ using namespace llvm;
namespace {
-class AMDGPUUnifyDivergentExitNodes : public FunctionPass {
+class AMDGPUUnifyDivergentExitNodesImpl {
private:
const TargetTransformInfo *TTI = nullptr;
public:
- static char ID; // Pass identification, replacement for typeid
-
- AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) {
- initializeAMDGPUUnifyDivergentExitNodesPass(*PassRegistry::getPassRegistry());
- }
+ AMDGPUUnifyDivergentExitNodesImpl() = delete;
+ AMDGPUUnifyDivergentExitNodesImpl(const TargetTransformInfo *TTI)
+ : TTI(TTI) {}
// We can preserve non-critical-edgeness when we unify function exit nodes
- void getAnalysisUsage(AnalysisUsage &AU) const override;
BasicBlock *unifyReturnBlockSet(Function &F, DomTreeUpdater &DTU,
ArrayRef<BasicBlock *> ReturningBlocks,
StringRef Name);
- bool runOnFunction(Function &F) override;
+ bool run(Function &F, DominatorTree *DT, const PostDominatorTree &PDT,
+ const UniformityInfo &UA);
};
+class AMDGPUUnifyDivergentExitNodes : public FunctionPass {
+public:
+ static char ID;
+ AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) {
+ initializeAMDGPUUnifyDivergentExitNodesPass(
+ *PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+};
} // end anonymous namespace
char AMDGPUUnifyDivergentExitNodes::ID = 0;
@@ -79,20 +88,20 @@ char AMDGPUUnifyDivergentExitNodes::ID = 0;
char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID;
INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
- "Unify divergent function exit nodes", false, false)
+ "Unify divergent function exit nodes", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE,
"Unify divergent function exit nodes", false, false)
-void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
+void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const {
if (RequireAndPreserveDomTree)
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.addRequired<LegacyDivergenceAnalysis>();
+ AU.addRequired<UniformityInfoWrapperPass>();
if (RequireAndPreserveDomTree) {
AU.addPreserved<DominatorTreeWrapperPass>();
@@ -100,7 +109,7 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
}
// No divergent values are changed, only blocks and branch edges.
- AU.addPreserved<LegacyDivergenceAnalysis>();
+ AU.addPreserved<UniformityInfoWrapperPass>();
// We preserve the non-critical-edgeness property
AU.addPreservedID(BreakCriticalEdgesID);
@@ -114,14 +123,13 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{
/// \returns true if \p BB is reachable through only uniform branches.
/// XXX - Is there a more efficient way to find this?
-static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA,
- BasicBlock &BB) {
+static bool isUniformlyReached(const UniformityInfo &UA, BasicBlock &BB) {
SmallVector<BasicBlock *, 8> Stack(predecessors(&BB));
SmallPtrSet<BasicBlock *, 8> Visited;
while (!Stack.empty()) {
BasicBlock *Top = Stack.pop_back_val();
- if (!DA.isUniform(Top->getTerminator()))
+ if (!UA.isUniform(Top->getTerminator()))
return false;
for (BasicBlock *Pred : predecessors(Top)) {
@@ -133,7 +141,7 @@ static bool isUniformlyReached(const LegacyDivergenceAnalysis &DA,
return true;
}
-BasicBlock *AMDGPUUnifyDivergentExitNodes::unifyReturnBlockSet(
+BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet(
Function &F, DomTreeUpdater &DTU, ArrayRef<BasicBlock *> ReturningBlocks,
StringRef Name) {
// Otherwise, we need to insert a new basic block into the function, add a PHI
@@ -181,20 +189,14 @@ BasicBlock *AMDGPUUnifyDivergentExitNodes::unifyReturnBlockSet(
return NewRetBlock;
}
-bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
- DominatorTree *DT = nullptr;
- if (RequireAndPreserveDomTree)
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
+ const PostDominatorTree &PDT,
+ const UniformityInfo &UA) {
if (PDT.root_size() == 0 ||
(PDT.root_size() == 1 &&
!isa<BranchInst>(PDT.getRoot()->getTerminator())))
return false;
- LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>();
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-
// Loop over all of the blocks in a function, tracking all of the blocks that
// return.
SmallVector<BasicBlock *, 4> ReturningBlocks;
@@ -213,7 +215,7 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
// exits, we should only unify UnreachableBlocks that are not uniformly
// reachable.
bool HasDivergentExitBlock = llvm::any_of(
- PDT.roots(), [&](auto BB) { return !isUniformlyReached(DA, *BB); });
+ PDT.roots(), [&](auto BB) { return !isUniformlyReached(UA, *BB); });
for (BasicBlock *BB : PDT.roots()) {
if (isa<ReturnInst>(BB->getTerminator())) {
@@ -327,3 +329,30 @@ bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
unifyReturnBlockSet(F, DTU, ReturningBlocks, "UnifiedReturnBlock");
return true;
}
+
+bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
+ DominatorTree *DT = nullptr;
+ if (RequireAndPreserveDomTree)
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ const auto &PDT =
+ getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+ const auto &UA = getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+ const auto *TranformInfo =
+ &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ return AMDGPUUnifyDivergentExitNodesImpl(TranformInfo).run(F, DT, PDT, UA);
+}
+
+PreservedAnalyses
+AMDGPUUnifyDivergentExitNodesPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ DominatorTree *DT = nullptr;
+ if (RequireAndPreserveDomTree)
+ DT = &AM.getResult<DominatorTreeAnalysis>(F);
+
+ const auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
+ const auto &UA = AM.getResult<UniformityInfoAnalysis>(F);
+ const auto *TransformInfo = &AM.getResult<TargetIRAnalysis>(F);
+ return AMDGPUUnifyDivergentExitNodesImpl(TransformInfo).run(F, DT, PDT, UA)
+ ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h
new file mode 100644
index 000000000000..2fd98a2ee1a9
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h
@@ -0,0 +1,36 @@
+//===- AMDGPUUnifyDivergentExitNodes.h ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a variant of the UnifyFunctionExitNodes pass. Rather than ensuring
+// there is at most one ret and one unreachable instruction, it ensures there is
+// at most one divergent exiting block.
+//
+// StructurizeCFG can't deal with multi-exit regions formed by branches to
+// multiple return nodes. It is not desirable to structurize regions with
+// uniform branches, so unifying those to the same return block as divergent
+// branches inhibits use of scalar branching. It still can't deal with the case
+// where one branch goes to return, and one unreachable. Replace unreachable in
+// this case with a return.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUUNIFYDIVERGENTEXITNODES_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUUNIFYDIVERGENTEXITNODES_H
+
+#include "AMDGPU.h"
+
+namespace llvm {
+class AMDGPUUnifyDivergentExitNodesPass
+ : public PassInfoMixin<AMDGPUUnifyDivergentExitNodesPass> {
+public:
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUUNIFYDIVERGENTEXITNODES_H
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 671d263a41a4..b9443559132f 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -21,6 +21,7 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -35,9 +36,8 @@
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/TargetParser/TargetParser.h"
#include <optional>
using namespace llvm;
@@ -119,16 +119,16 @@ public:
ImmTyInstOffset,
ImmTyOffset0,
ImmTyOffset1,
+ ImmTySMEMOffsetMod,
ImmTyCPol,
- ImmTySWZ,
ImmTyTFE,
ImmTyD16,
ImmTyClampSI,
ImmTyOModSI,
- ImmTySdwaDstSel,
- ImmTySdwaSrc0Sel,
- ImmTySdwaSrc1Sel,
- ImmTySdwaDstUnused,
+ ImmTySDWADstSel,
+ ImmTySDWASrc0Sel,
+ ImmTySDWASrc1Sel,
+ ImmTySDWADstUnused,
ImmTyDMask,
ImmTyDim,
ImmTyUNorm,
@@ -145,7 +145,7 @@ public:
ImmTySendMsg,
ImmTyInterpSlot,
ImmTyInterpAttr,
- ImmTyAttrChan,
+ ImmTyInterpAttrChan,
ImmTyOpSel,
ImmTyOpSelHi,
ImmTyNegLo,
@@ -155,7 +155,7 @@ public:
ImmTyDppRowMask,
ImmTyDppBankMask,
ImmTyDppBoundCtrl,
- ImmTyDppFi,
+ ImmTyDppFI,
ImmTySwizzle,
ImmTyGprIdxMode,
ImmTyHigh,
@@ -347,6 +347,8 @@ public:
return isImm() && Imm.Type == ImmT;
}
+ bool isImmLiteral() const { return isImmTy(ImmTyNone); }
+
bool isImmModifier() const {
return isImm() && Imm.Type != ImmTyNone;
}
@@ -370,26 +372,25 @@ public:
bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
-
+ bool isSMEMOffsetMod() const { return isImmTy(ImmTySMEMOffsetMod); }
bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
bool isGDS() const { return isImmTy(ImmTyGDS); }
bool isLDS() const { return isImmTy(ImmTyLDS); }
bool isCPol() const { return isImmTy(ImmTyCPol); }
- bool isSWZ() const { return isImmTy(ImmTySWZ); }
bool isTFE() const { return isImmTy(ImmTyTFE); }
bool isD16() const { return isImmTy(ImmTyD16); }
bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
- bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
- bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
+ bool isDppBankMask() const { return isImmTy(ImmTyDppBankMask); }
+ bool isDppRowMask() const { return isImmTy(ImmTyDppRowMask); }
bool isDppBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
- bool isFI() const { return isImmTy(ImmTyDppFi); }
- bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
- bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
- bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
- bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
+ bool isDppFI() const { return isImmTy(ImmTyDppFI); }
+ bool isSDWADstSel() const { return isImmTy(ImmTySDWADstSel); }
+ bool isSDWASrc0Sel() const { return isImmTy(ImmTySDWASrc0Sel); }
+ bool isSDWASrc1Sel() const { return isImmTy(ImmTySDWASrc1Sel); }
+ bool isSDWADstUnused() const { return isImmTy(ImmTySDWADstUnused); }
bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
- bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
+ bool isInterpAttrChan() const { return isImmTy(ImmTyInterpAttrChan); }
bool isOpSel() const { return isImmTy(ImmTyOpSel); }
bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
bool isNegLo() const { return isImmTy(ImmTyNegLo); }
@@ -855,13 +856,11 @@ public:
return Kind == Expression;
}
- bool isSoppBrTarget() const {
- return isExpr() || isImm();
- }
+ bool isSOPPBrTarget() const { return isExpr() || isImm(); }
bool isSWaitCnt() const;
bool isDepCtr() const;
- bool isSDelayAlu() const;
+ bool isSDelayALU() const;
bool isHwreg() const;
bool isSendMsg() const;
bool isSwizzle() const;
@@ -948,28 +947,11 @@ public:
void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
- template <unsigned Bitwidth>
- void addKImmFPOperands(MCInst &Inst, unsigned N) const;
-
- void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
- addKImmFPOperands<16>(Inst, N);
- }
-
- void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
- addKImmFPOperands<32>(Inst, N);
- }
-
void addRegOperands(MCInst &Inst, unsigned N) const;
- void addBoolRegOperands(MCInst &Inst, unsigned N) const {
- addRegOperands(Inst, N);
- }
-
void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
if (isRegKind())
addRegOperands(Inst, N);
- else if (isExpr())
- Inst.addOperand(MCOperand::createExpr(Expr));
else
addImmOperands(Inst, N);
}
@@ -1011,15 +993,6 @@ public:
addRegWithInputModsOperands(Inst, N);
}
- void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
- if (isImm())
- addImmOperands(Inst, N);
- else {
- assert(isExpr());
- Inst.addOperand(MCOperand::createExpr(Expr));
- }
- }
-
static void printImmTy(raw_ostream& OS, ImmTy Type) {
switch (Type) {
case ImmTyNone: OS << "None"; break;
@@ -1032,8 +1005,8 @@ public:
case ImmTyInstOffset: OS << "InstOffset"; break;
case ImmTyOffset0: OS << "Offset0"; break;
case ImmTyOffset1: OS << "Offset1"; break;
+ case ImmTySMEMOffsetMod: OS << "SMEMOffsetMod"; break;
case ImmTyCPol: OS << "CPol"; break;
- case ImmTySWZ: OS << "SWZ"; break;
case ImmTyTFE: OS << "TFE"; break;
case ImmTyD16: OS << "D16"; break;
case ImmTyFORMAT: OS << "FORMAT"; break;
@@ -1044,11 +1017,11 @@ public:
case ImmTyDppRowMask: OS << "DppRowMask"; break;
case ImmTyDppBankMask: OS << "DppBankMask"; break;
case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
- case ImmTyDppFi: OS << "FI"; break;
- case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
- case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
- case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
- case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
+ case ImmTyDppFI: OS << "DppFI"; break;
+ case ImmTySDWADstSel: OS << "SDWADstSel"; break;
+ case ImmTySDWASrc0Sel: OS << "SDWASrc0Sel"; break;
+ case ImmTySDWASrc1Sel: OS << "SDWASrc1Sel"; break;
+ case ImmTySDWADstUnused: OS << "SDWADstUnused"; break;
case ImmTyDMask: OS << "DMask"; break;
case ImmTyDim: OS << "Dim"; break;
case ImmTyUNorm: OS << "UNorm"; break;
@@ -1064,7 +1037,7 @@ public:
case ImmTySendMsg: OS << "SendMsg"; break;
case ImmTyInterpSlot: OS << "InterpSlot"; break;
case ImmTyInterpAttr: OS << "InterpAttr"; break;
- case ImmTyAttrChan: OS << "AttrChan"; break;
+ case ImmTyInterpAttrChan: OS << "InterpAttrChan"; break;
case ImmTyOpSel: OS << "OpSel"; break;
case ImmTyOpSelHi: OS << "OpSelHi"; break;
case ImmTyNegLo: OS << "NegLo"; break;
@@ -1339,8 +1312,6 @@ private:
unsigned RegWidth);
void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
bool IsAtomic);
- void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
- bool IsGdsHardcoded);
public:
enum AMDGPUMatchResultTy {
@@ -1481,6 +1452,14 @@ public:
return getFeatureBits()[AMDGPU::FeatureIntClamp];
}
+ bool hasPartialNSAEncoding() const {
+ return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
+ }
+
+ unsigned getNSAMaxSize() const {
+ return AMDGPU::getNSAMaxSize(getSTI());
+ }
+
AMDGPUTargetStreamer &getTargetStreamer() {
MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
return static_cast<AMDGPUTargetStreamer &>(TS);
@@ -1526,36 +1505,34 @@ public:
uint64_t &ErrorInfo,
bool MatchingInlineAsm) override;
bool ParseDirective(AsmToken DirectiveID) override;
- OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
- OperandMode Mode = OperandMode_Default);
+ ParseStatus parseOperand(OperandVector &Operands, StringRef Mnemonic,
+ OperandMode Mode = OperandMode_Default);
StringRef parseMnemonicSuffix(StringRef Name);
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
//bool ProcessInstruction(MCInst &Inst);
- OperandMatchResultTy parseTokenOp(StringRef Name, OperandVector &Operands);
+ ParseStatus parseTokenOp(StringRef Name, OperandVector &Operands);
- OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
+ ParseStatus parseIntWithPrefix(const char *Prefix, int64_t &Int);
- OperandMatchResultTy
+ ParseStatus
parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
- bool (*ConvertResult)(int64_t &) = nullptr);
+ std::function<bool(int64_t &)> ConvertResult = nullptr);
- OperandMatchResultTy
- parseOperandArrayWithPrefix(const char *Prefix,
- OperandVector &Operands,
- AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
- bool (*ConvertResult)(int64_t&) = nullptr);
+ ParseStatus parseOperandArrayWithPrefix(
+ const char *Prefix, OperandVector &Operands,
+ AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
+ bool (*ConvertResult)(int64_t &) = nullptr);
- OperandMatchResultTy
+ ParseStatus
parseNamedBit(StringRef Name, OperandVector &Operands,
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
- OperandMatchResultTy parseCPol(OperandVector &Operands);
- OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
- StringRef &Value,
- SMLoc &StringLoc);
+ ParseStatus parseCPol(OperandVector &Operands);
+ ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
+ SMLoc &StringLoc);
bool isModifier();
bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
@@ -1563,42 +1540,44 @@ public:
bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
bool parseSP3NegModifier();
- OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
- OperandMatchResultTy parseReg(OperandVector &Operands);
- OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
- OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
- OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
- OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
- OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
- OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
- OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
- OperandMatchResultTy parseUfmt(int64_t &Format);
- OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
- OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
- OperandMatchResultTy parseFORMAT(OperandVector &Operands);
- OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
- OperandMatchResultTy parseNumericFormat(int64_t &Format);
- OperandMatchResultTy parseFlatOffset(OperandVector &Operands);
- OperandMatchResultTy parseR128A16(OperandVector &Operands);
+ ParseStatus parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
+ ParseStatus parseReg(OperandVector &Operands);
+ ParseStatus parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
+ ParseStatus parseRegOrImmWithFPInputMods(OperandVector &Operands,
+ bool AllowImm = true);
+ ParseStatus parseRegOrImmWithIntInputMods(OperandVector &Operands,
+ bool AllowImm = true);
+ ParseStatus parseRegWithFPInputMods(OperandVector &Operands);
+ ParseStatus parseRegWithIntInputMods(OperandVector &Operands);
+ ParseStatus parseVReg32OrOff(OperandVector &Operands);
+ ParseStatus parseDfmtNfmt(int64_t &Format);
+ ParseStatus parseUfmt(int64_t &Format);
+ ParseStatus parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc,
+ int64_t &Format);
+ ParseStatus parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc,
+ int64_t &Format);
+ ParseStatus parseFORMAT(OperandVector &Operands);
+ ParseStatus parseSymbolicOrNumericFormat(int64_t &Format);
+ ParseStatus parseNumericFormat(int64_t &Format);
+ ParseStatus parseFlatOffset(OperandVector &Operands);
+ ParseStatus parseR128A16(OperandVector &Operands);
+ ParseStatus parseBLGP(OperandVector &Operands);
bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
- void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
- void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
- void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
void cvtExp(MCInst &Inst, const OperandVector &Operands);
bool parseCnt(int64_t &IntVal);
- OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
+ ParseStatus parseSWaitCnt(OperandVector &Operands);
bool parseDepCtr(int64_t &IntVal, unsigned &Mask);
void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName);
- OperandMatchResultTy parseDepCtrOps(OperandVector &Operands);
+ ParseStatus parseDepCtr(OperandVector &Operands);
bool parseDelay(int64_t &Delay);
- OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands);
+ ParseStatus parseSDelayALU(OperandVector &Operands);
- OperandMatchResultTy parseHwreg(OperandVector &Operands);
+ ParseStatus parseHwreg(OperandVector &Operands);
private:
struct OperandInfoTy {
@@ -1648,7 +1627,7 @@ private:
bool validateMIMGGatherDMask(const MCInst &Inst);
bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
bool validateMIMGDataSize(const MCInst &Inst, const SMLoc &IDLoc);
- bool validateMIMGAddrSize(const MCInst &Inst);
+ bool validateMIMGAddrSize(const MCInst &Inst, const SMLoc &IDLoc);
bool validateMIMGD16(const MCInst &Inst);
bool validateMIMGMSAA(const MCInst &Inst);
bool validateOpSel(const MCInst &Inst);
@@ -1706,15 +1685,14 @@ private:
public:
void onBeginOfFile() override;
- OperandMatchResultTy parseCustomOperand(OperandVector &Operands,
- unsigned MCK);
+ ParseStatus parseCustomOperand(OperandVector &Operands, unsigned MCK);
- OperandMatchResultTy parseExpTgt(OperandVector &Operands);
- OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
- OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
- OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
- OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
- OperandMatchResultTy parseBoolReg(OperandVector &Operands);
+ ParseStatus parseExpTgt(OperandVector &Operands);
+ ParseStatus parseSendMsg(OperandVector &Operands);
+ ParseStatus parseInterpSlot(OperandVector &Operands);
+ ParseStatus parseInterpAttr(OperandVector &Operands);
+ ParseStatus parseSOPPBrTarget(OperandVector &Operands);
+ ParseStatus parseBoolReg(OperandVector &Operands);
bool parseSwizzleOperand(int64_t &Op,
const unsigned MinVal,
@@ -1725,7 +1703,7 @@ public:
const unsigned MinVal,
const unsigned MaxVal,
const StringRef ErrMsg);
- OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
+ ParseStatus parseSwizzle(OperandVector &Operands);
bool parseSwizzleOffset(int64_t &Imm);
bool parseSwizzleMacro(int64_t &Imm);
bool parseSwizzleQuadPerm(int64_t &Imm);
@@ -1734,21 +1712,13 @@ public:
bool parseSwizzleSwap(int64_t &Imm);
bool parseSwizzleReverse(int64_t &Imm);
- OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
+ ParseStatus parseGPRIdxMode(OperandVector &Operands);
int64_t parseGPRIdxMacro();
void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
- void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
-
- AMDGPUOperand::Ptr defaultCPol() const;
- AMDGPUOperand::Ptr defaultSMRDOffset8() const;
- AMDGPUOperand::Ptr defaultSMEMOffset() const;
- AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
- AMDGPUOperand::Ptr defaultFlatOffset() const;
-
- OperandMatchResultTy parseOModOperand(OperandVector &Operands);
+ ParseStatus parseOModSI(OperandVector &Operands);
void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
OptionalImmIndexMap &OptionalIdx);
@@ -1763,25 +1733,16 @@ public:
void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
void cvtVINTERP(MCInst &Inst, const OperandVector &Operands);
-
- void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
- bool IsAtomic = false);
- void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
- void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
-
void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
bool parseDimId(unsigned &Encoding);
- OperandMatchResultTy parseDim(OperandVector &Operands);
- OperandMatchResultTy parseDPP8(OperandVector &Operands);
- OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
+ ParseStatus parseDim(OperandVector &Operands);
+ bool convertDppBoundCtrl(int64_t &BoundCtrl);
+ ParseStatus parseDPP8(OperandVector &Operands);
+ ParseStatus parseDPPCtrl(OperandVector &Operands);
bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
int64_t parseDPPCtrlSel(StringRef Ctrl);
int64_t parseDPPCtrlPerm();
- AMDGPUOperand::Ptr defaultRowMask() const;
- AMDGPUOperand::Ptr defaultBankMask() const;
- AMDGPUOperand::Ptr defaultDppBoundCtrl() const;
- AMDGPUOperand::Ptr defaultFI() const;
void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
void cvtDPP8(MCInst &Inst, const OperandVector &Operands) {
cvtDPP(Inst, Operands, true);
@@ -1792,9 +1753,9 @@ public:
cvtVOP3DPP(Inst, Operands, true);
}
- OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
- AMDGPUOperand::ImmTy Type);
- OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
+ ParseStatus parseSDWASel(OperandVector &Operands, StringRef Prefix,
+ AMDGPUOperand::ImmTy Type);
+ ParseStatus parseSDWADstUnused(OperandVector &Operands);
void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
@@ -1805,16 +1766,9 @@ public:
bool SkipDstVcc = false,
bool SkipSrcVcc = false);
- AMDGPUOperand::Ptr defaultBLGP() const;
- AMDGPUOperand::Ptr defaultCBSZ() const;
- AMDGPUOperand::Ptr defaultABID() const;
-
- OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
- AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
+ ParseStatus parseEndpgm(OperandVector &Operands);
- AMDGPUOperand::Ptr defaultWaitVDST() const;
- AMDGPUOperand::Ptr defaultWaitEXP() const;
- OperandMatchResultTy parseVOPD(OperandVector &Operands);
+ ParseStatus parseVOPD(OperandVector &Operands);
};
} // end anonymous namespace
@@ -2089,6 +2043,11 @@ uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
}
void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
+ if (isExpr()) {
+ Inst.addOperand(MCOperand::createExpr(Expr));
+ return;
+ }
+
if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
Inst.getNumOperands())) {
addLiteralImmOperand(Inst, Imm.Val,
@@ -2285,24 +2244,6 @@ void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyMo
}
}
-template <unsigned Bitwidth>
-void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
- APInt Literal(64, Imm.Val);
- setImmKindMandatoryLiteral();
-
- if (!Imm.IsFPImm) {
- // We got int literal token.
- Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
- return;
- }
-
- bool Lost;
- APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
- FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
- APFloat::rmNearestTiesToEven, &Lost);
- Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
-}
-
void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
}
@@ -2922,12 +2863,12 @@ AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
+ParseStatus AMDGPUAsmParser::parseImm(OperandVector &Operands,
+ bool HasSP3AbsModifier) {
// TODO: add syntactic sugar for 1/(2*PI)
if (isRegister())
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
assert(!isModifier());
const auto& Tok = getToken();
@@ -2952,9 +2893,8 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
APFloat RealVal(APFloat::IEEEdouble());
auto roundMode = APFloat::rmNearestTiesToEven;
- if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
- return MatchOperand_ParseFail;
- }
+ if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError()))
+ return ParseStatus::Failure;
if (Negate)
RealVal.changeSign();
@@ -2962,7 +2902,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
AMDGPUOperand::ImmTyNone, true));
- return MatchOperand_Success;
+ return ParseStatus::Success;
} else {
int64_t IntVal;
@@ -2979,10 +2919,10 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
// MC expressions (due to the trailing '|').
SMLoc EndLoc;
if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
} else {
if (Parser.parseExpression(Expr))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
if (Expr->evaluateAsAbsolute(IntVal)) {
@@ -2991,35 +2931,32 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseReg(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseReg(OperandVector &Operands) {
if (!isRegister())
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (auto R = parseRegister()) {
assert(R->isReg());
Operands.push_back(std::move(R));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
- auto res = parseReg(Operands);
- if (res != MatchOperand_NoMatch) {
- return res;
- } else if (isModifier()) {
- return MatchOperand_NoMatch;
- } else {
- return parseImm(Operands, HasSP3AbsMod);
- }
+ParseStatus AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands,
+ bool HasSP3AbsMod) {
+ ParseStatus Res = parseReg(Operands);
+ if (!Res.isNoMatch())
+ return Res;
+ if (isModifier())
+ return ParseStatus::NoMatch;
+ return parseImm(Operands, HasSP3AbsMod);
}
bool
@@ -3110,7 +3047,7 @@ AMDGPUAsmParser::parseSP3NegModifier() {
return false;
}
-OperandMatchResultTy
+ParseStatus
AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
bool AllowImm) {
bool Neg, SP3Neg;
@@ -3118,49 +3055,42 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
SMLoc Loc;
// Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
- if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
- Error(getLoc(), "invalid syntax, expected 'neg' modifier");
- return MatchOperand_ParseFail;
- }
+ if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus))
+ return Error(getLoc(), "invalid syntax, expected 'neg' modifier");
SP3Neg = parseSP3NegModifier();
Loc = getLoc();
Neg = trySkipId("neg");
- if (Neg && SP3Neg) {
- Error(Loc, "expected register or immediate");
- return MatchOperand_ParseFail;
- }
+ if (Neg && SP3Neg)
+ return Error(Loc, "expected register or immediate");
if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Abs = trySkipId("abs");
if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Loc = getLoc();
SP3Abs = trySkipToken(AsmToken::Pipe);
- if (Abs && SP3Abs) {
- Error(Loc, "expected register or immediate");
- return MatchOperand_ParseFail;
- }
+ if (Abs && SP3Abs)
+ return Error(Loc, "expected register or immediate");
- OperandMatchResultTy Res;
+ ParseStatus Res;
if (AllowImm) {
Res = parseRegOrImm(Operands, SP3Abs);
} else {
Res = parseReg(Operands);
}
- if (Res != MatchOperand_Success) {
- return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
- }
+ if (!Res.isSuccess())
+ return (SP3Neg || Neg || SP3Abs || Abs) ? ParseStatus::Failure : Res;
if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
AMDGPUOperand::Modifiers Mods;
Mods.Abs = Abs || SP3Abs;
@@ -3168,79 +3098,71 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
if (Mods.hasFPModifiers()) {
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
- if (Op.isExpr()) {
- Error(Op.getStartLoc(), "expected an absolute expression");
- return MatchOperand_ParseFail;
- }
+ if (Op.isExpr())
+ return Error(Op.getStartLoc(), "expected an absolute expression");
Op.setModifiers(Mods);
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
+ParseStatus
AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
bool AllowImm) {
bool Sext = trySkipId("sext");
if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
- OperandMatchResultTy Res;
+ ParseStatus Res;
if (AllowImm) {
Res = parseRegOrImm(Operands);
} else {
Res = parseReg(Operands);
}
- if (Res != MatchOperand_Success) {
- return Sext? MatchOperand_ParseFail : Res;
- }
+ if (!Res.isSuccess())
+ return Sext ? ParseStatus::Failure : Res;
if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
AMDGPUOperand::Modifiers Mods;
Mods.Sext = Sext;
if (Mods.hasIntModifiers()) {
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
- if (Op.isExpr()) {
- Error(Op.getStartLoc(), "expected an absolute expression");
- return MatchOperand_ParseFail;
- }
+ if (Op.isExpr())
+ return Error(Op.getStartLoc(), "expected an absolute expression");
Op.setModifiers(Mods);
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
return parseRegOrImmWithFPInputMods(Operands, false);
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
return parseRegOrImmWithIntInputMods(Operands, false);
}
-OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
auto Loc = getLoc();
if (trySkipId("off")) {
Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
AMDGPUOperand::ImmTyOff, false));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
if (!isRegister())
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
if (Reg) {
Operands.push_back(std::move(Reg));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- return MatchOperand_ParseFail;
-
+ return ParseStatus::Failure;
}
unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
@@ -3647,7 +3569,8 @@ bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
return false;
}
-bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
+bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
+ const SMLoc &IDLoc) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
@@ -3667,8 +3590,13 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
assert(SrsrcIdx != -1);
assert(SrsrcIdx > VAddr0Idx);
- if (DimIdx == -1)
- return true; // intersect_ray
+ bool IsA16 = Inst.getOperand(A16Idx).getImm();
+ if (BaseOpcode->BVH) {
+ if (IsA16 == BaseOpcode->A16)
+ return true;
+ Error(IDLoc, "image address size does not match a16");
+ return false;
+ }
unsigned Dim = Inst.getOperand(DimIdx).getImm();
const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
@@ -3676,12 +3604,19 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
unsigned ActualAddrSize =
IsNSA ? SrsrcIdx - VAddr0Idx
: AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
- bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
unsigned ExpectedAddrSize =
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
- if (!IsNSA) {
+ if (IsNSA) {
+ if (hasPartialNSAEncoding() && ExpectedAddrSize > getNSAMaxSize()) {
+ int VAddrLastIdx = SrsrcIdx - 1;
+ unsigned VAddrLastSize =
+ AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
+
+ ActualAddrSize = VAddrLastIdx - VAddr0Idx + VAddrLastSize;
+ }
+ } else {
if (ExpectedAddrSize > 12)
ExpectedAddrSize = 16;
@@ -3692,7 +3627,11 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
return true;
}
- return ActualAddrSize == ExpectedAddrSize;
+ if (ActualAddrSize == ExpectedAddrSize)
+ return true;
+
+ Error(IDLoc, "image address size does not match dim and a16");
+ return false;
}
bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
@@ -4136,7 +4075,7 @@ SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
// Start with second operand because SMEM Offset cannot be dst or src0.
for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
- if (Op.isSMEMOffset())
+ if (Op.isSMEMOffset() || Op.isSMEMOffsetMod())
return Op.getStartLoc();
}
return getLoc();
@@ -4628,11 +4567,8 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
if (!validateMIMGDataSize(Inst, IDLoc)) {
return false;
}
- if (!validateMIMGAddrSize(Inst)) {
- Error(IDLoc,
- "image address size does not match dim and a16");
+ if (!validateMIMGAddrSize(Inst, IDLoc))
return false;
- }
if (!validateMIMGAtomicDMask(Inst)) {
Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
"invalid atomic image dmask");
@@ -5242,10 +5178,10 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
#undef PARSE_BITS_ENTRY
}
- if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
+ if (!Seen.contains(".amdhsa_next_free_vgpr"))
return TokError(".amdhsa_next_free_vgpr directive is required");
- if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
+ if (!Seen.contains(".amdhsa_next_free_sgpr"))
return TokError(".amdhsa_next_free_sgpr directive is required");
unsigned VGPRBlocks;
@@ -5283,7 +5219,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
UserSGPRCount);
if (isGFX90A()) {
- if (Seen.find(".amdhsa_accum_offset") == Seen.end())
+ if (!Seen.contains(".amdhsa_accum_offset"))
return TokError(".amdhsa_accum_offset directive is required");
if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
return TokError("accum_offset should be in range [4..256] in "
@@ -5294,9 +5230,9 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
(AccumOffset / 4 - 1));
}
- if (IVersion.Major == 10) {
+ if (IVersion.Major >= 10) {
// SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS
- if (SharedVGPRCount && EnableWavefrontSize32) {
+ if (SharedVGPRCount && EnableWavefrontSize32 && *EnableWavefrontSize32) {
return TokError("shared_vgpr_count directive not valid on "
"wavefront size 32");
}
@@ -5309,7 +5245,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
getTargetStreamer().EmitAmdhsaKernelDescriptor(
getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
- ReserveFlatScr);
+ ReserveFlatScr, AMDGPU::getAmdhsaCodeObjectVersion());
return false;
}
@@ -5487,10 +5423,10 @@ bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
const char *AssemblerDirectiveEnd;
std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
isHsaAbiVersion3AndAbove(&getSTI())
- ? std::tuple(HSAMD::V3::AssemblerDirectiveBegin,
- HSAMD::V3::AssemblerDirectiveEnd)
- : std::tuple(HSAMD::AssemblerDirectiveBegin,
- HSAMD::AssemblerDirectiveEnd);
+ ? std::pair(HSAMD::V3::AssemblerDirectiveBegin,
+ HSAMD::V3::AssemblerDirectiveEnd)
+ : std::pair(HSAMD::AssemblerDirectiveBegin,
+ HSAMD::AssemblerDirectiveEnd);
if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
return Error(getLoc(),
@@ -5609,7 +5545,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
return TokError("expected identifier in directive");
MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
- if (parseToken(AsmToken::Comma, "expected ','"))
+ if (getParser().parseComma())
return true;
unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
@@ -5758,16 +5694,15 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
return true;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
- OperandMode Mode) {
- OperandMatchResultTy ResTy = parseVOPD(Operands);
- if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
- isToken(AsmToken::EndOfStatement))
- return ResTy;
+ParseStatus AMDGPUAsmParser::parseOperand(OperandVector &Operands,
+ StringRef Mnemonic,
+ OperandMode Mode) {
+ ParseStatus Res = parseVOPD(Operands);
+ if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
+ return Res;
// Try to parse with a custom parser
- ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ Res = MatchOperandParserImpl(Operands, Mnemonic);
// If we successfully parsed the operand or if there as an error parsing,
// we are done.
@@ -5775,9 +5710,8 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
// If we are parsing after we reach EndOfStatement then this means we
// are appending default values to the Operands list. This is only done
// by custom parser, so we shouldn't continue on to the generic parsing.
- if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
- isToken(AsmToken::EndOfStatement))
- return ResTy;
+ if (Res.isSuccess() || Res.isFailure() || isToken(AsmToken::EndOfStatement))
+ return Res;
SMLoc RBraceLoc;
SMLoc LBraceLoc = getLoc();
@@ -5786,20 +5720,19 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
for (;;) {
auto Loc = getLoc();
- ResTy = parseReg(Operands);
- if (ResTy == MatchOperand_NoMatch)
+ Res = parseReg(Operands);
+ if (Res.isNoMatch())
Error(Loc, "expected a register");
- if (ResTy != MatchOperand_Success)
- return MatchOperand_ParseFail;
+ if (!Res.isSuccess())
+ return ParseStatus::Failure;
RBraceLoc = getLoc();
if (trySkipToken(AsmToken::RBrac))
break;
if (!skipToken(AsmToken::Comma,
- "expected a comma or a closing square bracket")) {
- return MatchOperand_ParseFail;
- }
+ "expected a comma or a closing square bracket"))
+ return ParseStatus::Failure;
}
if (Operands.size() - Prefix > 1) {
@@ -5808,7 +5741,7 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
return parseRegOrImm(Operands);
@@ -5862,15 +5795,14 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
OperandMode Mode = OperandMode_Default;
if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
Mode = OperandMode_NSA;
- OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
+ ParseStatus Res = parseOperand(Operands, Name, Mode);
- if (Res != MatchOperand_Success) {
+ if (!Res.isSuccess()) {
checkUnsupportedInstruction(Name, NameLoc);
if (!Parser.hasPendingError()) {
// FIXME: use real operand location rather than the current location.
- StringRef Msg =
- (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
- "not a valid operand.";
+ StringRef Msg = Res.isFailure() ? "failed parsing operand."
+ : "not a valid operand.";
Error(getLoc(), Msg);
}
while (!trySkipToken(AsmToken::EndOfStatement)) {
@@ -5890,34 +5822,33 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
// Utility functions
//===----------------------------------------------------------------------===//
-OperandMatchResultTy AMDGPUAsmParser::parseTokenOp(StringRef Name,
- OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseTokenOp(StringRef Name,
+ OperandVector &Operands) {
SMLoc S = getLoc();
if (!trySkipId(Name))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Operands.push_back(AMDGPUOperand::CreateToken(this, Name, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
+ParseStatus AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix,
+ int64_t &IntVal) {
if (!trySkipId(Prefix, AsmToken::Colon))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
- return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
+ return parseExpr(IntVal) ? ParseStatus::Success : ParseStatus::Failure;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
- AMDGPUOperand::ImmTy ImmTy,
- bool (*ConvertResult)(int64_t&)) {
+ParseStatus AMDGPUAsmParser::parseIntWithPrefix(
+ const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
+ std::function<bool(int64_t &)> ConvertResult) {
SMLoc S = getLoc();
int64_t Value = 0;
- OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
- if (Res != MatchOperand_Success)
+ ParseStatus Res = parseIntWithPrefix(Prefix, Value);
+ if (!Res.isSuccess())
return Res;
if (ConvertResult && !ConvertResult(Value)) {
@@ -5925,20 +5856,18 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
- OperandVector &Operands,
- AMDGPUOperand::ImmTy ImmTy,
- bool (*ConvertResult)(int64_t&)) {
+ParseStatus AMDGPUAsmParser::parseOperandArrayWithPrefix(
+ const char *Prefix, OperandVector &Operands, AMDGPUOperand::ImmTy ImmTy,
+ bool (*ConvertResult)(int64_t &)) {
SMLoc S = getLoc();
if (!trySkipId(Prefix, AsmToken::Colon))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
unsigned Val = 0;
const unsigned MaxSize = 4;
@@ -5949,34 +5878,30 @@ AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
int64_t Op;
SMLoc Loc = getLoc();
if (!parseExpr(Op))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
- if (Op != 0 && Op != 1) {
- Error(Loc, "invalid " + StringRef(Prefix) + " value.");
- return MatchOperand_ParseFail;
- }
+ if (Op != 0 && Op != 1)
+ return Error(Loc, "invalid " + StringRef(Prefix) + " value.");
Val |= (Op << I);
if (trySkipToken(AsmToken::RBrac))
break;
- if (I + 1 == MaxSize) {
- Error(getLoc(), "expected a closing square bracket");
- return MatchOperand_ParseFail;
- }
+ if (I + 1 == MaxSize)
+ return Error(getLoc(), "expected a closing square bracket");
if (!skipToken(AsmToken::Comma, "expected a comma"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
- AMDGPUOperand::ImmTy ImmTy) {
+ParseStatus AMDGPUAsmParser::parseNamedBit(StringRef Name,
+ OperandVector &Operands,
+ AMDGPUOperand::ImmTy ImmTy) {
int64_t Bit;
SMLoc S = getLoc();
@@ -5985,54 +5910,42 @@ AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
} else if (trySkipId("no", Name)) {
Bit = 0;
} else {
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
- if (Name == "r128" && !hasMIMG_R128()) {
- Error(S, "r128 modifier is not supported on this GPU");
- return MatchOperand_ParseFail;
- }
- if (Name == "a16" && !hasA16()) {
- Error(S, "a16 modifier is not supported on this GPU");
- return MatchOperand_ParseFail;
- }
+ if (Name == "r128" && !hasMIMG_R128())
+ return Error(S, "r128 modifier is not supported on this GPU");
+ if (Name == "a16" && !hasA16())
+ return Error(S, "a16 modifier is not supported on this GPU");
if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
ImmTy = AMDGPUOperand::ImmTyR128A16;
Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling) const {
- Disabling = Id.startswith("no");
+ Disabling = Id.consume_front("no");
if (isGFX940() && !Mnemo.startswith("s_")) {
return StringSwitch<unsigned>(Id)
.Case("nt", AMDGPU::CPol::NT)
- .Case("nont", AMDGPU::CPol::NT)
.Case("sc0", AMDGPU::CPol::SC0)
- .Case("nosc0", AMDGPU::CPol::SC0)
.Case("sc1", AMDGPU::CPol::SC1)
- .Case("nosc1", AMDGPU::CPol::SC1)
.Default(0);
}
return StringSwitch<unsigned>(Id)
.Case("dlc", AMDGPU::CPol::DLC)
- .Case("nodlc", AMDGPU::CPol::DLC)
.Case("glc", AMDGPU::CPol::GLC)
- .Case("noglc", AMDGPU::CPol::GLC)
.Case("scc", AMDGPU::CPol::SCC)
- .Case("noscc", AMDGPU::CPol::SCC)
.Case("slc", AMDGPU::CPol::SLC)
- .Case("noslc", AMDGPU::CPol::SLC)
.Default(0);
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
SMLoc OpLoc = getLoc();
unsigned Enabled = 0, Seen = 0;
@@ -6045,20 +5958,14 @@ AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
lex();
- if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC) {
- Error(S, "dlc modifier is not supported on this GPU");
- return MatchOperand_ParseFail;
- }
+ if (!isGFX10Plus() && CPol == AMDGPU::CPol::DLC)
+ return Error(S, "dlc modifier is not supported on this GPU");
- if (!isGFX90A() && CPol == AMDGPU::CPol::SCC) {
- Error(S, "scc modifier is not supported on this GPU");
- return MatchOperand_ParseFail;
- }
+ if (!isGFX90A() && CPol == AMDGPU::CPol::SCC)
+ return Error(S, "scc modifier is not supported on this GPU");
- if (Seen & CPol) {
- Error(S, "duplicate cache policy modifier");
- return MatchOperand_ParseFail;
- }
+ if (Seen & CPol)
+ return Error(S, "duplicate cache policy modifier");
if (!Disabling)
Enabled |= CPol;
@@ -6067,11 +5974,11 @@ AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
}
if (!Seen)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Operands.push_back(
AMDGPUOperand::CreateImm(this, Enabled, OpLoc, AMDGPUOperand::ImmTyCPol));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
static void addOptionalImmOperand(
@@ -6088,16 +5995,15 @@ static void addOptionalImmOperand(
}
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
- StringRef &Value,
- SMLoc &StringLoc) {
+ParseStatus AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
+ StringRef &Value,
+ SMLoc &StringLoc) {
if (!trySkipId(Prefix, AsmToken::Colon))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
StringLoc = getLoc();
- return parseId(Value, "expected an identifier") ? MatchOperand_Success
- : MatchOperand_ParseFail;
+ return parseId(Value, "expected an identifier") ? ParseStatus::Success
+ : ParseStatus::Failure;
}
//===----------------------------------------------------------------------===//
@@ -6111,9 +6017,9 @@ bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
SMLoc Loc = getLoc();
auto Res = parseIntWithPrefix(Pref, Val);
- if (Res == MatchOperand_ParseFail)
+ if (Res.isFailure())
return false;
- if (Res == MatchOperand_NoMatch)
+ if (Res.isNoMatch())
return true;
if (Val < 0 || Val > MaxVal) {
@@ -6127,8 +6033,7 @@ bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
// dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
// values to live in a joint format operand in the MCInst encoding.
-OperandMatchResultTy
-AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
+ParseStatus AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
using namespace llvm::AMDGPU::MTBUFFormat;
int64_t Dfmt = DFMT_UNDEF;
@@ -6137,11 +6042,11 @@ AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
// dfmt and nfmt can appear in either order, and each is optional.
for (int I = 0; I < 2; ++I) {
if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
+
+ if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt))
+ return ParseStatus::Failure;
- if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
- return MatchOperand_ParseFail;
- }
// Skip optional comma between dfmt/nfmt
// but guard against 2 commas following each other.
if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
@@ -6151,29 +6056,28 @@ AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
}
if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
Format = encodeDfmtNfmt(Dfmt, Nfmt);
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseUfmt(int64_t &Format) {
+ParseStatus AMDGPUAsmParser::parseUfmt(int64_t &Format) {
using namespace llvm::AMDGPU::MTBUFFormat;
int64_t Fmt = UFMT_UNDEF;
if (!tryParseFmt("format", UFMT_MAX, Fmt))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
if (Fmt == UFMT_UNDEF)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Format = Fmt;
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
@@ -6199,31 +6103,26 @@ bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
return false;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
- SMLoc FormatLoc,
- int64_t &Format) {
+ParseStatus AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
+ SMLoc FormatLoc,
+ int64_t &Format) {
using namespace llvm::AMDGPU::MTBUFFormat;
int64_t Dfmt = DFMT_UNDEF;
int64_t Nfmt = NFMT_UNDEF;
if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
if (trySkipToken(AsmToken::Comma)) {
StringRef Str;
SMLoc Loc = getLoc();
if (!parseId(Str, "expected a format string") ||
- !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
- return MatchOperand_ParseFail;
- }
- if (Dfmt == DFMT_UNDEF) {
- Error(Loc, "duplicate numeric format");
- return MatchOperand_ParseFail;
- } else if (Nfmt == NFMT_UNDEF) {
- Error(Loc, "duplicate data format");
- return MatchOperand_ParseFail;
- }
+ !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc))
+ return ParseStatus::Failure;
+ if (Dfmt == DFMT_UNDEF)
+ return Error(Loc, "duplicate numeric format");
+ if (Nfmt == NFMT_UNDEF)
+ return Error(Loc, "duplicate data format");
}
Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
@@ -6231,94 +6130,84 @@ AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
if (isGFX10Plus()) {
auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI());
- if (Ufmt == UFMT_UNDEF) {
- Error(FormatLoc, "unsupported format");
- return MatchOperand_ParseFail;
- }
+ if (Ufmt == UFMT_UNDEF)
+ return Error(FormatLoc, "unsupported format");
Format = Ufmt;
} else {
Format = encodeDfmtNfmt(Dfmt, Nfmt);
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
- SMLoc Loc,
- int64_t &Format) {
+ParseStatus AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
+ SMLoc Loc,
+ int64_t &Format) {
using namespace llvm::AMDGPU::MTBUFFormat;
auto Id = getUnifiedFormat(FormatStr, getSTI());
if (Id == UFMT_UNDEF)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
- if (!isGFX10Plus()) {
- Error(Loc, "unified format is not supported on this GPU");
- return MatchOperand_ParseFail;
- }
+ if (!isGFX10Plus())
+ return Error(Loc, "unified format is not supported on this GPU");
Format = Id;
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
+ParseStatus AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
using namespace llvm::AMDGPU::MTBUFFormat;
SMLoc Loc = getLoc();
if (!parseExpr(Format))
- return MatchOperand_ParseFail;
- if (!isValidFormatEncoding(Format, getSTI())) {
- Error(Loc, "out of range format");
- return MatchOperand_ParseFail;
- }
+ return ParseStatus::Failure;
+ if (!isValidFormatEncoding(Format, getSTI()))
+ return Error(Loc, "out of range format");
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
+ParseStatus AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
using namespace llvm::AMDGPU::MTBUFFormat;
if (!trySkipId("format", AsmToken::Colon))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (trySkipToken(AsmToken::LBrac)) {
StringRef FormatStr;
SMLoc Loc = getLoc();
if (!parseId(FormatStr, "expected a format string"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
- if (Res == MatchOperand_NoMatch)
+ if (Res.isNoMatch())
Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
- if (Res != MatchOperand_Success)
+ if (!Res.isSuccess())
return Res;
if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
return parseNumericFormat(Format);
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
using namespace llvm::AMDGPU::MTBUFFormat;
int64_t Format = getDefaultFormatEncoding(getSTI());
- OperandMatchResultTy Res;
+ ParseStatus Res;
SMLoc Loc = getLoc();
// Parse legacy format syntax.
Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
- if (Res == MatchOperand_ParseFail)
+ if (Res.isFailure())
return Res;
- bool FormatFound = (Res == MatchOperand_Success);
+ bool FormatFound = Res.isSuccess();
Operands.push_back(
AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
@@ -6329,124 +6218,65 @@ AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
if (isToken(AsmToken::EndOfStatement)) {
// We are expecting an soffset operand,
// but let matcher handle the error.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// Parse soffset.
Res = parseRegOrImm(Operands);
- if (Res != MatchOperand_Success)
+ if (!Res.isSuccess())
return Res;
trySkipToken(AsmToken::Comma);
if (!FormatFound) {
Res = parseSymbolicOrNumericFormat(Format);
- if (Res == MatchOperand_ParseFail)
+ if (Res.isFailure())
return Res;
- if (Res == MatchOperand_Success) {
+ if (Res.isSuccess()) {
auto Size = Operands.size();
AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
Op.setImm(Format);
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- if (isId("format") && peekToken().is(AsmToken::Colon)) {
- Error(getLoc(), "duplicate format");
- return MatchOperand_ParseFail;
- }
- return MatchOperand_Success;
+ if (isId("format") && peekToken().is(AsmToken::Colon))
+ return Error(getLoc(), "duplicate format");
+ return ParseStatus::Success;
}
-OperandMatchResultTy AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
- OperandMatchResultTy Res =
+ParseStatus AMDGPUAsmParser::parseFlatOffset(OperandVector &Operands) {
+ ParseStatus Res =
parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
- if (Res == MatchOperand_NoMatch) {
+ if (Res.isNoMatch()) {
Res = parseIntWithPrefix("inst_offset", Operands,
AMDGPUOperand::ImmTyInstOffset);
}
return Res;
}
-OperandMatchResultTy AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
- OperandMatchResultTy Res =
+ParseStatus AMDGPUAsmParser::parseR128A16(OperandVector &Operands) {
+ ParseStatus Res =
parseNamedBit("r128", Operands, AMDGPUOperand::ImmTyR128A16);
- if (Res == MatchOperand_NoMatch)
+ if (Res.isNoMatch())
Res = parseNamedBit("a16", Operands, AMDGPUOperand::ImmTyA16);
return Res;
}
-//===----------------------------------------------------------------------===//
-// ds
-//===----------------------------------------------------------------------===//
-
-void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
- const OperandVector &Operands) {
- OptionalImmIndexMap OptionalIdx;
-
- for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
-
- // Add the register arguments
- if (Op.isReg()) {
- Op.addRegOperands(Inst, 1);
- continue;
- }
-
- // Handle optional arguments
- OptionalIdx[Op.getImmTy()] = i;
+ParseStatus AMDGPUAsmParser::parseBLGP(OperandVector &Operands) {
+ ParseStatus Res =
+ parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
+ if (Res.isNoMatch()) {
+ Res =
+ parseOperandArrayWithPrefix("neg", Operands, AMDGPUOperand::ImmTyBLGP);
}
-
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
-
- Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
+ return Res;
}
-void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
- bool IsGdsHardcoded) {
- OptionalImmIndexMap OptionalIdx;
- const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
- AMDGPUOperand::ImmTy OffsetType = AMDGPUOperand::ImmTyOffset;
-
- for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
-
- auto TiedTo =
- Desc.getOperandConstraint(Inst.getNumOperands(), MCOI::TIED_TO);
-
- if (TiedTo != -1) {
- assert((unsigned)TiedTo < Inst.getNumOperands());
- Inst.addOperand(Inst.getOperand(TiedTo));
- }
-
- // Add the register arguments
- if (Op.isReg()) {
- Op.addRegOperands(Inst, 1);
- continue;
- }
-
- if (Op.isToken() && Op.getToken() == "gds") {
- IsGdsHardcoded = true;
- continue;
- }
-
- // Handle optional arguments
- OptionalIdx[Op.getImmTy()] = i;
-
- if (Op.getImmTy() == AMDGPUOperand::ImmTySwizzle)
- OffsetType = AMDGPUOperand::ImmTySwizzle;
- }
-
- addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
-
- if (!IsGdsHardcoded) {
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
- }
- Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
-}
+//===----------------------------------------------------------------------===//
+// Exp
+//===----------------------------------------------------------------------===//
void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
@@ -6583,8 +6413,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
return true;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseSWaitCnt(OperandVector &Operands) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
int64_t Waitcnt = getWaitcntBitMask(ISA);
SMLoc S = getLoc();
@@ -6592,15 +6421,15 @@ AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
while (!isToken(AsmToken::EndOfStatement)) {
if (!parseCnt(Waitcnt))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
} else {
if (!parseExpr(Waitcnt))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
@@ -6665,23 +6494,22 @@ bool AMDGPUAsmParser::parseDelay(int64_t &Delay) {
return true;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseSDelayALU(OperandVector &Operands) {
int64_t Delay = 0;
SMLoc S = getLoc();
if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
do {
if (!parseDelay(Delay))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
} while (trySkipToken(AsmToken::Pipe));
} else {
if (!parseExpr(Delay))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool
@@ -6689,7 +6517,7 @@ AMDGPUOperand::isSWaitCnt() const {
return isImm();
}
-bool AMDGPUOperand::isSDelayAlu() const { return isImm(); }
+bool AMDGPUOperand::isSDelayALU() const { return isImm(); }
//===----------------------------------------------------------------------===//
// DepCtr
@@ -6753,7 +6581,7 @@ bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) {
return true;
}
-OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseDepCtr(OperandVector &Operands) {
using namespace llvm::AMDGPU::DepCtr;
int64_t DepCtr = getDefaultDepCtrEncoding(getSTI());
@@ -6763,15 +6591,15 @@ OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) {
unsigned UsedOprMask = 0;
while (!isToken(AsmToken::EndOfStatement)) {
if (!parseDepCtr(DepCtr, UsedOprMask))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
} else {
if (!parseExpr(DepCtr))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); }
@@ -6847,8 +6675,7 @@ AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
return true;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
using namespace llvm::AMDGPU::Hwreg;
int64_t ImmVal = 0;
@@ -6862,19 +6689,17 @@ AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
validateHwreg(HwReg, Offset, Width)) {
ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
} else {
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
} else if (parseExpr(ImmVal, "a hwreg macro")) {
- if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
- Error(Loc, "invalid immediate: only 16-bit values are legal");
- return MatchOperand_ParseFail;
- }
+ if (ImmVal < 0 || !isUInt<16>(ImmVal))
+ return Error(Loc, "invalid immediate: only 16-bit values are legal");
} else {
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool AMDGPUOperand::isHwreg() const {
@@ -6967,8 +6792,7 @@ AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
return true;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseSendMsg(OperandVector &Operands) {
using namespace llvm::AMDGPU::SendMsg;
int64_t ImmVal = 0;
@@ -6982,19 +6806,17 @@ AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
validateSendMsg(Msg, Op, Stream)) {
ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
} else {
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
} else if (parseExpr(ImmVal, "a sendmsg macro")) {
- if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
- Error(Loc, "invalid immediate: only 16-bit values are legal");
- return MatchOperand_ParseFail;
- }
+ if (ImmVal < 0 || !isUInt<16>(ImmVal))
+ return Error(Loc, "invalid immediate: only 16-bit values are legal");
} else {
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool AMDGPUOperand::isSendMsg() const {
@@ -7005,12 +6827,12 @@ bool AMDGPUOperand::isSendMsg() const {
// v_interp
//===----------------------------------------------------------------------===//
-OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
StringRef Str;
SMLoc S = getLoc();
if (!parseId(Str))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
int Slot = StringSwitch<int>(Str)
.Case("p10", 0)
@@ -7018,27 +6840,23 @@ OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
.Case("p0", 2)
.Default(-1);
- if (Slot == -1) {
- Error(S, "invalid interpolation slot");
- return MatchOperand_ParseFail;
- }
+ if (Slot == -1)
+ return Error(S, "invalid interpolation slot");
Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
AMDGPUOperand::ImmTyInterpSlot));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
StringRef Str;
SMLoc S = getLoc();
if (!parseId(Str))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
- if (!Str.startswith("attr")) {
- Error(S, "invalid interpolation attribute");
- return MatchOperand_ParseFail;
- }
+ if (!Str.startswith("attr"))
+ return Error(S, "invalid interpolation attribute");
StringRef Chan = Str.take_back(2);
int AttrChan = StringSwitch<int>(Chan)
@@ -7047,57 +6865,49 @@ OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
.Case(".z", 2)
.Case(".w", 3)
.Default(-1);
- if (AttrChan == -1) {
- Error(S, "invalid or missing interpolation attribute channel");
- return MatchOperand_ParseFail;
- }
+ if (AttrChan == -1)
+ return Error(S, "invalid or missing interpolation attribute channel");
Str = Str.drop_back(2).drop_front(4);
uint8_t Attr;
- if (Str.getAsInteger(10, Attr)) {
- Error(S, "invalid or missing interpolation attribute number");
- return MatchOperand_ParseFail;
- }
+ if (Str.getAsInteger(10, Attr))
+ return Error(S, "invalid or missing interpolation attribute number");
- if (Attr > 63) {
- Error(S, "out of bounds interpolation attribute number");
- return MatchOperand_ParseFail;
- }
+ if (Attr > 32)
+ return Error(S, "out of bounds interpolation attribute number");
SMLoc SChan = SMLoc::getFromPointer(Chan.data());
Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
AMDGPUOperand::ImmTyInterpAttr));
- Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
- AMDGPUOperand::ImmTyAttrChan));
- return MatchOperand_Success;
+ Operands.push_back(AMDGPUOperand::CreateImm(
+ this, AttrChan, SChan, AMDGPUOperand::ImmTyInterpAttrChan));
+ return ParseStatus::Success;
}
//===----------------------------------------------------------------------===//
// exp
//===----------------------------------------------------------------------===//
-OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
using namespace llvm::AMDGPU::Exp;
StringRef Str;
SMLoc S = getLoc();
if (!parseId(Str))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
unsigned Id = getTgtId(Str);
- if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
- Error(S, (Id == ET_INVALID) ?
- "invalid exp target" :
- "exp target is not supported on this GPU");
- return MatchOperand_ParseFail;
- }
+ if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI()))
+ return Error(S, (Id == ET_INVALID)
+ ? "invalid exp target"
+ : "exp target is not supported on this GPU");
Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
AMDGPUOperand::ImmTyExpTgt));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
//===----------------------------------------------------------------------===//
@@ -7562,8 +7372,7 @@ AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
return false;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseSwizzle(OperandVector &Operands) {
SMLoc S = getLoc();
int64_t Imm = 0;
@@ -7580,9 +7389,9 @@ AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
- return Ok ? MatchOperand_Success : MatchOperand_ParseFail;
+ return Ok ? ParseStatus::Success : ParseStatus::Failure;
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
bool
@@ -7638,8 +7447,7 @@ int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
return Imm;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
using namespace llvm::AMDGPU::VGPRIndexMode;
@@ -7649,19 +7457,17 @@ AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
if (trySkipId("gpr_idx", AsmToken::LParen)) {
Imm = parseGPRIdxMacro();
if (Imm == UNDEF)
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
} else {
if (getParser().parseAbsoluteExpression(Imm))
- return MatchOperand_ParseFail;
- if (Imm < 0 || !isUInt<4>(Imm)) {
- Error(S, "invalid immediate: only 4-bit values are legal");
- return MatchOperand_ParseFail;
- }
+ return ParseStatus::Failure;
+ if (Imm < 0 || !isUInt<4>(Imm))
+ return Error(S, "invalid immediate: only 4-bit values are legal");
}
Operands.push_back(
AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool AMDGPUOperand::isGPRIdxMode() const {
@@ -7672,17 +7478,16 @@ bool AMDGPUOperand::isGPRIdxMode() const {
// sopp branch targets
//===----------------------------------------------------------------------===//
-OperandMatchResultTy
-AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseSOPPBrTarget(OperandVector &Operands) {
// Make sure we are not parsing something
// that looks like a label or an expression but is not.
// This will improve error messages.
if (isRegister() || isModifier())
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (!parseExpr(Operands))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
assert(Opr.isImm() || Opr.isExpr());
@@ -7696,15 +7501,14 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
Error(Loc, "expected a 16-bit signed jump offset");
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
//===----------------------------------------------------------------------===//
// Boolean holding registers
//===----------------------------------------------------------------------===//
-OperandMatchResultTy
-AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
return parseReg(Operands);
}
@@ -7712,10 +7516,6 @@ AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
// mubuf
//===----------------------------------------------------------------------===//
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
-}
-
void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
const OperandVector &Operands,
bool IsAtomic) {
@@ -7775,100 +7575,12 @@ void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
-}
-
-void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
- OptionalImmIndexMap OptionalIdx;
-
- for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
-
- // Add the register arguments
- if (Op.isReg()) {
- Op.addRegOperands(Inst, 1);
- continue;
- }
-
- // Handle the case where soffset is an immediate
- if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
- Op.addImmOperands(Inst, 1);
- continue;
- }
-
- // Handle tokens like 'offen' which are sometimes hard-coded into the
- // asm string. There are no MCInst operands for these.
- if (Op.isToken()) {
- continue;
- }
- assert(Op.isImm());
-
- // Handle optional arguments
- OptionalIdx[Op.getImmTy()] = i;
- }
-
- addOptionalImmOperand(Inst, Operands, OptionalIdx,
- AMDGPUOperand::ImmTyOffset);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
}
//===----------------------------------------------------------------------===//
-// mimg
+// SMEM
//===----------------------------------------------------------------------===//
-void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
- bool IsAtomic) {
- unsigned I = 1;
- const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
- for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
- ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
- }
-
- if (IsAtomic) {
- // Add src, same as dst
- assert(Desc.getNumDefs() == 1);
- ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
- }
-
- OptionalImmIndexMap OptionalIdx;
-
- for (unsigned E = Operands.size(); I != E; ++I) {
- AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
-
- // Add the register arguments
- if (Op.isReg()) {
- Op.addRegOperands(Inst, 1);
- } else if (Op.isImmModifier()) {
- OptionalIdx[Op.getImmTy()] = I;
- } else if (!Op.isToken()) {
- llvm_unreachable("unexpected operand type");
- }
- }
-
- bool IsGFX10Plus = isGFX10Plus();
-
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
- if (IsGFX10Plus)
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
- if (IsGFX10Plus)
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
- if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::tfe))
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
- if (!IsGFX10Plus)
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
-}
-
-void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
- cvtMIMG(Inst, Operands, true);
-}
-
void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
OptionalImmIndexMap OptionalIdx;
bool IsAtomicReturn = false;
@@ -7920,54 +7632,28 @@ void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands)
if ((int)Inst.getNumOperands() <=
AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx,
+ AMDGPUOperand::ImmTySMEMOffsetMod);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
}
-void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
- const OperandVector &Operands) {
- for (unsigned I = 1; I < Operands.size(); ++I) {
- auto &Operand = (AMDGPUOperand &)*Operands[I];
- if (Operand.isReg())
- Operand.addRegOperands(Inst, 1);
- }
-
- Inst.addOperand(MCOperand::createImm(1)); // a16
-}
-
//===----------------------------------------------------------------------===//
// smrd
//===----------------------------------------------------------------------===//
bool AMDGPUOperand::isSMRDOffset8() const {
- return isImm() && isUInt<8>(getImm());
+ return isImmLiteral() && isUInt<8>(getImm());
}
bool AMDGPUOperand::isSMEMOffset() const {
- return isImmTy(ImmTyNone) ||
- isImmTy(ImmTyOffset); // Offset range is checked later by validator.
+ // Offset range is checked later by validator.
+ return isImmLiteral();
}
bool AMDGPUOperand::isSMRDLiteralOffset() const {
// 32-bit literals are only supported on CI and we only want to use them
// when the offset is > 8-bits.
- return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+ return isImmLiteral() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
}
//===----------------------------------------------------------------------===//
@@ -7996,12 +7682,13 @@ static bool ConvertOmodDiv(int64_t &Div) {
return false;
}
-// Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
+// For pre-gfx11 targets, both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
// This is intentional and ensures compatibility with sp3.
// See bug 35397 for details.
-static bool ConvertDppBoundCtrl(int64_t &BoundCtrl) {
+bool AMDGPUAsmParser::convertDppBoundCtrl(int64_t &BoundCtrl) {
if (BoundCtrl == 0 || BoundCtrl == 1) {
- BoundCtrl = 1;
+ if (!isGFX11Plus())
+ BoundCtrl = 1;
return true;
}
return false;
@@ -8013,13 +7700,15 @@ void AMDGPUAsmParser::onBeginOfFile() {
return;
if (!getTargetStreamer().getTargetID())
- getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
+ getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString(),
+ // TODO: Should try to check code object version from directive???
+ AMDGPU::getAmdhsaCodeObjectVersion());
if (isHsaAbiVersion3AndAbove(&getSTI()))
getTargetStreamer().EmitDirectiveAMDGCNTarget();
}
-OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseOModSI(OperandVector &Operands) {
StringRef Name = getTokenStr();
if (Name == "mul") {
return parseIntWithPrefix("mul", Operands,
@@ -8031,7 +7720,7 @@ OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands)
AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
// Determines which bit DST_OP_SEL occupies in the op_sel operand according to
@@ -8100,9 +7789,8 @@ void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
- } else if (Op.isInterpSlot() ||
- Op.isInterpAttr() ||
- Op.isAttrChan()) {
+ } else if (Op.isInterpSlot() || Op.isInterpAttr() ||
+ Op.isInterpAttrChan()) {
Inst.addOperand(MCOperand::createImm(Op.getImm()));
} else if (Op.isImmModifier()) {
OptionalIdx[Op.getImmTy()] = I;
@@ -8335,9 +8023,9 @@ void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
// VOPD
//===----------------------------------------------------------------------===//
-OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
if (!hasVOPD(getSTI()))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (isToken(AsmToken::Colon) && peekToken(false).is(AsmToken::Colon)) {
SMLoc S = getLoc();
@@ -8348,12 +8036,11 @@ OperandMatchResultTy AMDGPUAsmParser::parseVOPD(OperandVector &Operands) {
StringRef OpYName;
if (isToken(AsmToken::Identifier) && !Parser.parseIdentifier(OpYName)) {
Operands.push_back(AMDGPUOperand::CreateToken(this, OpYName, OpYLoc));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- Error(OpYLoc, "expected a VOPDY instruction after ::");
- return MatchOperand_ParseFail;
+ return Error(OpYLoc, "expected a VOPDY instruction after ::");
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
// Create VOPD MCInst operands using parsed assembler operands.
@@ -8439,11 +8126,11 @@ bool AMDGPUOperand::isABID() const {
}
bool AMDGPUOperand::isS16Imm() const {
- return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
+ return isImmLiteral() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
}
bool AMDGPUOperand::isU16Imm() const {
- return isImm() && isUInt<16>(getImm());
+ return isImmLiteral() && isUInt<16>(getImm());
}
//===----------------------------------------------------------------------===//
@@ -8479,66 +8166,62 @@ bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
return true;
}
-OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseDim(OperandVector &Operands) {
if (!isGFX10Plus())
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
SMLoc S = getLoc();
if (!trySkipId("dim", AsmToken::Colon))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
unsigned Encoding;
SMLoc Loc = getLoc();
- if (!parseDimId(Encoding)) {
- Error(Loc, "invalid dim value");
- return MatchOperand_ParseFail;
- }
+ if (!parseDimId(Encoding))
+ return Error(Loc, "invalid dim value");
Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
AMDGPUOperand::ImmTyDim));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
//===----------------------------------------------------------------------===//
// dpp
//===----------------------------------------------------------------------===//
-OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
SMLoc S = getLoc();
if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
// dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
int64_t Sels[8];
if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
for (size_t i = 0; i < 8; ++i) {
if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
SMLoc Loc = getLoc();
if (getParser().parseAbsoluteExpression(Sels[i]))
- return MatchOperand_ParseFail;
- if (0 > Sels[i] || 7 < Sels[i]) {
- Error(Loc, "expected a 3-bit value");
- return MatchOperand_ParseFail;
- }
+ return ParseStatus::Failure;
+ if (0 > Sels[i] || 7 < Sels[i])
+ return Error(Loc, "expected a 3-bit value");
}
if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
unsigned DPP8 = 0;
for (size_t i = 0; i < 8; ++i)
DPP8 |= (Sels[i] << (i * 3));
Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool
@@ -8644,13 +8327,12 @@ AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
return Val;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
using namespace AMDGPU::DPP;
if (!isToken(AsmToken::Identifier) ||
!isSupportedDPPCtrl(getTokenStr(), Operands))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
SMLoc S = getLoc();
int64_t Val = -1;
@@ -8673,31 +8355,11 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
}
if (Val == -1)
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Operands.push_back(
AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
- return MatchOperand_Success;
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
- return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
- return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDppBoundCtrl() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
+ return ParseStatus::Success;
}
void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
@@ -8744,7 +8406,7 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
}
AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
// Add the register arguments
- if (IsDPP8 && Op.isFI()) {
+ if (IsDPP8 && Op.isDppFI()) {
Fi = Op.getImm();
} else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
@@ -8786,7 +8448,7 @@ void AMDGPUAsmParser::cvtVOP3DPP(MCInst &Inst, const OperandVector &Operands,
if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi))
addOptionalImmOperand(Inst, Operands, OptionalIdx,
- AMDGPUOperand::ImmTyDppFi);
+ AMDGPUOperand::ImmTyDppFI);
}
}
@@ -8821,7 +8483,7 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
Op.addImmOperands(Inst, 1);
} else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
Op.addRegWithFPInputModsOperands(Inst, 2);
- } else if (Op.isFI()) {
+ } else if (Op.isDppFI()) {
Fi = Op.getImm();
} else if (Op.isReg()) {
Op.addRegOperands(Inst, 1);
@@ -8852,7 +8514,8 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::fi)) {
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx,
+ AMDGPUOperand::ImmTyDppFI);
}
}
}
@@ -8861,20 +8524,18 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool I
// sdwa
//===----------------------------------------------------------------------===//
-OperandMatchResultTy
-AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
- AMDGPUOperand::ImmTy Type) {
+ParseStatus AMDGPUAsmParser::parseSDWASel(OperandVector &Operands,
+ StringRef Prefix,
+ AMDGPUOperand::ImmTy Type) {
using namespace llvm::AMDGPU::SDWA;
SMLoc S = getLoc();
StringRef Value;
- OperandMatchResultTy res;
SMLoc StringLoc;
- res = parseStringWithPrefix(Prefix, Value, StringLoc);
- if (res != MatchOperand_Success) {
- return res;
- }
+ ParseStatus Res = parseStringWithPrefix(Prefix, Value, StringLoc);
+ if (!Res.isSuccess())
+ return Res;
int64_t Int;
Int = StringSwitch<int64_t>(Value)
@@ -8887,28 +8548,23 @@ AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
.Case("DWORD", SdwaSel::DWORD)
.Default(0xffffffff);
- if (Int == 0xffffffff) {
- Error(StringLoc, "invalid " + Twine(Prefix) + " value");
- return MatchOperand_ParseFail;
- }
+ if (Int == 0xffffffff)
+ return Error(StringLoc, "invalid " + Twine(Prefix) + " value");
Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
using namespace llvm::AMDGPU::SDWA;
SMLoc S = getLoc();
StringRef Value;
- OperandMatchResultTy res;
SMLoc StringLoc;
- res = parseStringWithPrefix("dst_unused", Value, StringLoc);
- if (res != MatchOperand_Success) {
- return res;
- }
+ ParseStatus Res = parseStringWithPrefix("dst_unused", Value, StringLoc);
+ if (!Res.isSuccess())
+ return Res;
int64_t Int;
Int = StringSwitch<int64_t>(Value)
@@ -8917,13 +8573,11 @@ AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
.Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
.Default(0xffffffff);
- if (Int == 0xffffffff) {
- Error(StringLoc, "invalid dst_unused value");
- return MatchOperand_ParseFail;
- }
+ if (Int == 0xffffffff)
+ return Error(StringLoc, "invalid dst_unused value");
- Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
- return MatchOperand_Success;
+ Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySDWADstUnused));
+ return ParseStatus::Success;
}
void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
@@ -9009,14 +8663,14 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_sel))
addOptionalImmOperand(Inst, Operands, OptionalIdx,
- AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
+ AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::dst_unused))
addOptionalImmOperand(Inst, Operands, OptionalIdx,
- AMDGPUOperand::ImmTySdwaDstUnused,
+ AMDGPUOperand::ImmTySDWADstUnused,
DstUnused::UNUSED_PRESERVE);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
break;
case SIInstrFlags::VOP2:
@@ -9025,17 +8679,17 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::omod))
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstSel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWADstUnused, DstUnused::UNUSED_PRESERVE);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
break;
case SIInstrFlags::VOPC:
if (AMDGPU::hasNamedOperand(Inst.getOpcode(), AMDGPU::OpName::clamp))
addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
- addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc0Sel, SdwaSel::DWORD);
+ addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySDWASrc1Sel, SdwaSel::DWORD);
break;
default:
@@ -9054,25 +8708,9 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
}
}
-//===----------------------------------------------------------------------===//
-// mAI
-//===----------------------------------------------------------------------===//
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
-}
-
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
-}
-
/// Force static initialization.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
- RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
+ RegisterMCAsmParser<AMDGPUAsmParser> A(getTheR600Target());
RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
}
@@ -9082,8 +8720,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
#define GET_MNEMONIC_CHECKER
#include "AMDGPUGenAsmMatcher.inc"
-OperandMatchResultTy
-AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, unsigned MCK) {
+ParseStatus AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands,
+ unsigned MCK) {
switch (MCK) {
case MCK_addr64:
return parseTokenOp("addr64", Operands);
@@ -9099,55 +8737,8 @@ AMDGPUAsmParser::parseCustomOperand(OperandVector &Operands, unsigned MCK) {
return parseTokenOp("off", Operands);
case MCK_row_95_en:
return parseTokenOp("row_en", Operands);
- case MCK_ImmABID:
- return parseIntWithPrefix("abid", Operands, AMDGPUOperand::ImmTyABID);
- case MCK_ImmBankMask:
- return parseIntWithPrefix("bank_mask", Operands,
- AMDGPUOperand::ImmTyDppBankMask);
- case MCK_ImmBLGP: {
- OperandMatchResultTy Res =
- parseIntWithPrefix("blgp", Operands, AMDGPUOperand::ImmTyBLGP);
- if (Res == MatchOperand_NoMatch) {
- Res = parseOperandArrayWithPrefix("neg", Operands,
- AMDGPUOperand::ImmTyBLGP);
- }
- return Res;
- }
- case MCK_ImmCBSZ:
- return parseIntWithPrefix("cbsz", Operands, AMDGPUOperand::ImmTyCBSZ);
- case MCK_ImmCPol:
- return parseCPol(Operands);
- case MCK_ImmFI:
- return parseIntWithPrefix("fi", Operands, AMDGPUOperand::ImmTyDppFi);
case MCK_gds:
return parseNamedBit("gds", Operands, AMDGPUOperand::ImmTyGDS);
- case MCK_ImmNegHi:
- return parseOperandArrayWithPrefix("neg_hi", Operands,
- AMDGPUOperand::ImmTyNegHi);
- case MCK_ImmNegLo:
- return parseOperandArrayWithPrefix("neg_lo", Operands,
- AMDGPUOperand::ImmTyNegLo);
- case MCK_ImmSMEMOffset:
- return parseIntWithPrefix("offset", Operands, AMDGPUOperand::ImmTyOffset);
- case MCK_ImmOModSI:
- return parseOModOperand(Operands);
- case MCK_ImmOpSel:
- return parseOperandArrayWithPrefix("op_sel", Operands,
- AMDGPUOperand::ImmTyOpSel);
- case MCK_ImmOpSelHi:
- return parseOperandArrayWithPrefix("op_sel_hi", Operands,
- AMDGPUOperand::ImmTyOpSelHi);
- case MCK_ImmRowMask:
- return parseIntWithPrefix("row_mask", Operands,
- AMDGPUOperand::ImmTyDppRowMask);
- case MCK_ImmSDWADstSel:
- return parseSDWASel(Operands, "dst_sel", AMDGPUOperand::ImmTySdwaDstSel);
- case MCK_ImmSDWADstUnused:
- return parseSDWADstUnused(Operands);
- case MCK_ImmSDWASrc0Sel:
- return parseSDWASel(Operands, "src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel);
- case MCK_ImmSDWASrc1Sel:
- return parseSDWASel(Operands, "src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel);
case MCK_tfe:
return parseNamedBit("tfe", Operands, AMDGPUOperand::ImmTyTFE);
}
@@ -9186,18 +8777,16 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
case MCK_SSrcF32:
return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
- case MCK_SoppBrTarget:
- return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
+ case MCK_SOPPBrTarget:
+ return Operand.isSOPPBrTarget() ? Match_Success : Match_InvalidOperand;
case MCK_VReg32OrOff:
return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
case MCK_InterpSlot:
return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
- case MCK_Attr:
+ case MCK_InterpAttr:
return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
- case MCK_AttrChan:
- return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
- case MCK_ImmSMEMOffset:
- return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
+ case MCK_InterpAttrChan:
+ return Operand.isInterpAttrChan() ? Match_Success : Match_InvalidOperand;
case MCK_SReg_64:
case MCK_SReg_64_XEXEC:
// Null is defined as a 32-bit register but
@@ -9215,7 +8804,7 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
// endpgm
//===----------------------------------------------------------------------===//
-OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
+ParseStatus AMDGPUAsmParser::parseEndpgm(OperandVector &Operands) {
SMLoc S = getLoc();
int64_t Imm = 0;
@@ -9224,14 +8813,12 @@ OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
Imm = 0;
}
- if (!isUInt<16>(Imm)) {
- Error(S, "expected a 16-bit value");
- return MatchOperand_ParseFail;
- }
+ if (!isUInt<16>(Imm))
+ return Error(S, "expected a 16-bit value");
Operands.push_back(
AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
@@ -9240,10 +8827,6 @@ bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
// LDSDIR
//===----------------------------------------------------------------------===//
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST);
-}
-
bool AMDGPUOperand::isWaitVDST() const {
return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm());
}
@@ -9252,10 +8835,6 @@ bool AMDGPUOperand::isWaitVDST() const {
// VINTERP
//===----------------------------------------------------------------------===//
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitEXP() const {
- return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitEXP);
-}
-
bool AMDGPUOperand::isWaitEXP() const {
return isImmTy(ImmTyWaitEXP) && isUInt<3>(getImm());
}
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index bd7f088c76e3..ea1578e30ae8 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -110,7 +110,6 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
Instruction BaseOpcode = !cast<Instruction>(MTBUFGetBaseOpcode<NAME>.ret);
let MTBUF = 1;
- let AsmMatchConverter = "cvtMtbuf";
}
class MTBUF_Real <MTBUF_Pseudo ps, string real_name = ps.Mnemonic> :
@@ -158,7 +157,7 @@ class getMTBUFInsDA<list<RegisterClass> vdataList,
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret;
- dag NonVaddrInputs = (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol, SWZ:$swz);
+ dag NonVaddrInputs = (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, CPol:$cpol, i1imm:$swz);
dag Inputs = !if(!empty(vaddrList), NonVaddrInputs, !con((ins vaddrClass:$vaddr), NonVaddrInputs));
dag ret = !if(!empty(vdataList), Inputs, !con((ins vdata_op:$vdata), Inputs));
}
@@ -186,7 +185,7 @@ class getMTBUFAsmOps<int addrKind> {
!if(!eq(addrKind, BUFAddrKind.Addr64),
"$vaddr, $srsrc,$format $soffset addr64",
"")))));
- string ret = " $vdata, " # Pfx # "$offset$cpol$swz";
+ string ret = " $vdata, " # Pfx # "$offset$cpol";
}
class MTBUF_SetupAddr<int addrKind> {
@@ -387,7 +386,7 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
RegisterOperand vdata_op = getLdStVDataRegisterOperand<vdataClass, isTFE>.ret;
- dag NonVaddrInputs = (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol, SWZ_0:$swz);
+ dag NonVaddrInputs = (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol_0:$cpol, i1imm_0:$swz);
dag Inputs = !if(!empty(vaddrList), NonVaddrInputs, !con((ins vaddrClass:$vaddr), NonVaddrInputs));
dag ret = !if(!empty(vdataList), Inputs, !con((ins vdata_op:$vdata), Inputs));
}
@@ -421,7 +420,7 @@ class getMUBUFIns<int addrKind, list<RegisterClass> vdataList, bit isTFE> {
(ins))))));
}
-class getMUBUFAsmOps<int addrKind, bit noVdata = 0, bit isLds = 0, bit isTFE = 0, bit isSwz = 0> {
+class getMUBUFAsmOps<int addrKind, bit noVdata = 0, bit isLds = 0, bit isTFE = 0> {
string Vdata = !if(noVdata, " ", " $vdata, ");
string Lds = !if(isLds, " lds", "");
string TFE = !if(isTFE, " tfe", "");
@@ -434,9 +433,8 @@ class getMUBUFAsmOps<int addrKind, bit noVdata = 0, bit isLds = 0, bit isTFE = 0
"")))));
string Offset = "$offset";
string OtherArgs = "$cpol";
- string Swz = !if(isSwz, "$swz", "");
- string ret = Vdata # MainArgs # Offset # OtherArgs # Lds # TFE # Swz;
+ string ret = Vdata # MainArgs # Offset # OtherArgs # Lds # TFE;
}
class MUBUF_SetupAddr<int addrKind> {
@@ -467,7 +465,7 @@ class MUBUF_Load_Pseudo <string opName,
!if(!or(isLds, isLdsOpc), (outs), (outs vdata_op:$vdata)),
!con(getMUBUFIns<addrKindCopy, [], isTFE>.ret,
!if(HasTiedDest, (ins vdata_op:$vdata_in), (ins))),
- getMUBUFAsmOps<addrKindCopy, !or(isLds, isLdsOpc), isLds, isTFE, 1>.ret,
+ getMUBUFAsmOps<addrKindCopy, !or(isLds, isLdsOpc), isLds, isTFE>.ret,
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # !if(isLds, "_lds", "") # !if(isTFE, "_tfe", "") #
@@ -488,15 +486,15 @@ class MUBUF_Load_Pseudo <string opName,
}
class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat <
- (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset))),
- (load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset))
+ (load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset))),
+ (load_vt (inst v4i32:$srsrc, i32:$soffset, i32:$offset))
>;
class MUBUF_Addr64_Load_Pat <Instruction inst,
ValueType load_vt = i32,
SDPatternOperator ld = null_frag> : Pat <
- (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))),
- (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset))
+ (load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset))),
+ (load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i32:$offset))
>;
multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
@@ -562,7 +560,7 @@ class MUBUF_Store_Pseudo <string opName,
: MUBUF_Pseudo<opName,
(outs),
getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret], isTFE>.ret,
- getMUBUFAsmOps<addrKindCopy, 0, 0, isTFE, 1>.ret,
+ getMUBUFAsmOps<addrKindCopy, 0, 0, isTFE>.ret,
pattern>,
MUBUF_SetupAddr<addrKindCopy> {
let PseudoInstr = opName # "_" # !if(isTFE, "_tfe", "") #
@@ -580,12 +578,12 @@ multiclass MUBUF_Pseudo_Stores_Helper<string opName, ValueType store_vt,
def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, legal_store_vt, isTFE,
[(st legal_store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
- i16:$offset))]>,
+ i32:$offset))]>,
MUBUFAddr64Table<0, NAME>;
def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, legal_store_vt, isTFE,
[(st legal_store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset))]>,
+ i32:$offset))]>,
MUBUFAddr64Table<1, NAME>;
def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, legal_store_vt, isTFE>;
@@ -609,8 +607,8 @@ multiclass MUBUF_Pseudo_Stores<string opName, ValueType store_vt = i32,
class MUBUF_Pseudo_Store_Lds<string opName>
: MUBUF_Pseudo<opName,
(outs),
- (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol:$cpol, SWZ:$swz),
- " $srsrc, $soffset$offset lds$cpol$swz"> {
+ (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, CPol:$cpol, i1imm:$swz),
+ " $srsrc, $soffset$offset lds$cpol"> {
let LGKM_CNT = 1;
let mayLoad = 1;
let mayStore = 1;
@@ -635,7 +633,7 @@ class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in,
dag MainInputs = (ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset);
dag CPol = !if(vdata_in, (ins CPol_GLC1:$cpol), (ins CPol_0:$cpol));
- dag ret = !con(Data, !con(MainInputs, CPol));
+ dag ret = !con(Data, MainInputs, CPol);
}
class getMUBUFAtomicIns<int addrKind,
@@ -724,23 +722,15 @@ multiclass MUBUF_Pseudo_Atomics_NO_RTN <string opName,
RegisterClass vdataClass,
ValueType vdataType,
bit isFP = isFloatType<vdataType>.ret> {
- let FPAtomic = isFP in
- def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
- MUBUFAddr64Table <0, NAME>;
-
- let FPAtomic = isFP in
- def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>,
- MUBUFAddr64Table <1, NAME>;
-
- let FPAtomic = isFP in
- def _OFFEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
-
- let FPAtomic = isFP in
-
- def _IDXEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
-
- let FPAtomic = isFP in
- def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ let FPAtomic = isFP in {
+ def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
+ MUBUFAddr64Table <0, NAME>;
+ def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>,
+ MUBUFAddr64Table <1, NAME>;
+ def _OFFEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+ def _IDXEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+ def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ }
}
multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
@@ -748,28 +738,23 @@ multiclass MUBUF_Pseudo_Atomics_RTN <string opName,
ValueType vdataType,
SDPatternOperator atomic,
bit isFP = isFloatType<vdataType>.ret> {
- let FPAtomic = isFP in
- def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
- [(set vdataType:$vdata,
- (atomic (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset),
- vdataType:$vdata_in))]>,
- MUBUFAddr64Table <0, NAME # "_RTN">;
-
- let FPAtomic = isFP in
- def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
- [(set vdataType:$vdata,
- (atomic (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset),
- vdataType:$vdata_in))]>,
- MUBUFAddr64Table <1, NAME # "_RTN">;
-
- let FPAtomic = isFP in
- def _OFFEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
-
- let FPAtomic = isFP in
- def _IDXEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
-
- let FPAtomic = isFP in
- def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ let FPAtomic = isFP in {
+ def _OFFSET_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
+ [(set vdataType:$vdata,
+ (atomic (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset),
+ vdataType:$vdata_in))]>,
+ MUBUFAddr64Table <0, NAME # "_RTN">;
+
+ def _ADDR64_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
+ [(set vdataType:$vdata,
+ (atomic (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset),
+ vdataType:$vdata_in))]>,
+ MUBUFAddr64Table <1, NAME # "_RTN">;
+
+ def _OFFEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+ def _IDXEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+ def _BOTHEN_RTN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+ }
}
multiclass MUBUF_Pseudo_Atomics <string opName,
@@ -1124,7 +1109,7 @@ defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_NO_RTN<
"buffer_atomic_add_f32", VGPR_32, f32
>;
-let SubtargetPredicate = HasAtomicPkFaddNoRtnInsts in
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
"buffer_atomic_pk_add_f16", VGPR_32, v2f16
>;
@@ -1134,7 +1119,7 @@ defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Pseudo_Atomics_RTN<
"buffer_atomic_add_f32", VGPR_32, f32, null_frag
>;
-let OtherPredicates = [isGFX90APlus] in
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_RTN <
"buffer_atomic_pk_add_f16", VGPR_32, v2f16, null_frag
>;
@@ -1233,21 +1218,21 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, 0)),
- (!cast<MUBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
def : GCNPat<
(vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, 0)),
- (!cast<MUBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
def : GCNPat<
(vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, timm)),
- (!cast<MUBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (!cast<MUBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
@@ -1256,7 +1241,7 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$auxiliary, timm)),
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
}
@@ -1320,7 +1305,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, 0),
- (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
@@ -1328,14 +1313,14 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$auxiliary, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (extract_cpol $auxiliary), (extract_swz $auxiliary))
+ timm:$offset, (extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
def : GCNPat<
(st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$auxiliary, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (extract_cpol $auxiliary), (extract_swz $auxiliary))
+ timm:$offset, (extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
def : GCNPat<
@@ -1344,7 +1329,7 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
getVregSrcForVT<vt>.ret:$vdata,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (extract_cpol $auxiliary),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, (extract_cpol $auxiliary),
(extract_swz $auxiliary))
>;
}
@@ -1408,13 +1393,13 @@ multiclass BufferAtomicPat<string OpPrefix, ValueType vt, string Inst, bit isInt
let AddedComplexity = !if(!eq(RtnMode, "ret"), 0, 1) in {
def : GCNPat<
- (vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), vt:$vdata_in)),
+ (vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset), vt:$vdata_in)),
(!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in,
SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset)
>;
def : GCNPat<
- (vt (Op (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset),
+ (vt (Op (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset),
vt:$vdata_in)),
(!cast<MUBUF_Pseudo>(Inst # "_ADDR64" # InstSuffix) getVregSrcForVT<vt>.ret:$vdata_in,
VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset)
@@ -1441,7 +1426,7 @@ multiclass BufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst>
getVregSrcForVT<data_vt>.ret:$vdata_in, SReg_128:$srsrc, SCSrc_b32:$soffset,
offset:$offset);
def : GCNPat<
- (vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset), data_vt:$vdata_in)),
+ (vt (Op (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset), data_vt:$vdata_in)),
!if(!eq(RtnMode, "ret"),
(EXTRACT_SUBREG (vt (COPY_TO_REGCLASS OffsetResDag, getVregSrcForVT<data_vt>.ret)),
!if(!eq(vt, i32), sub0, sub0_sub1)),
@@ -1452,7 +1437,7 @@ multiclass BufferAtomicCmpSwapPat<ValueType vt, ValueType data_vt, string Inst>
getVregSrcForVT<data_vt>.ret:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc,
SCSrc_b32:$soffset, offset:$offset);
def : GCNPat<
- (vt (Op (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset),
+ (vt (Op (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset),
data_vt:$vdata_in)),
!if(!eq(RtnMode, "ret"),
(EXTRACT_SUBREG (vt (COPY_TO_REGCLASS Addr64ResDag, getVregSrcForVT<data_vt>.ret)),
@@ -1478,8 +1463,8 @@ defm : BufferAtomicPat<"atomic_load_umax_global", Ty, "BUFFER_ATOMIC_UMAX" # Suf
defm : BufferAtomicPat<"atomic_load_and_global", Ty, "BUFFER_ATOMIC_AND" # Suffix>;
defm : BufferAtomicPat<"atomic_load_or_global", Ty, "BUFFER_ATOMIC_OR" # Suffix>;
defm : BufferAtomicPat<"atomic_load_xor_global", Ty, "BUFFER_ATOMIC_XOR" # Suffix>;
-defm : BufferAtomicPat<"atomic_inc_global", Ty, "BUFFER_ATOMIC_INC" # Suffix>;
-defm : BufferAtomicPat<"atomic_dec_global", Ty, "BUFFER_ATOMIC_DEC" # Suffix>;
+defm : BufferAtomicPat<"atomic_load_uinc_wrap_global", Ty, "BUFFER_ATOMIC_INC" # Suffix>;
+defm : BufferAtomicPat<"atomic_load_udec_wrap_global", Ty, "BUFFER_ATOMIC_DEC" # Suffix>;
} // end foreach Ty
@@ -1503,7 +1488,7 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
timm:$offset, timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(Inst # "_OFFSET" # InstSuffix)
getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), CachePolicy)
+ timm:$offset, CachePolicy)
>;
def : GCNPat<
@@ -1511,7 +1496,7 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
timm:$offset, timm:$cachepolicy, timm)),
(!cast<MUBUF_Pseudo>(Inst # "_IDXEN" # InstSuffix)
getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc,
- SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy)
+ SCSrc_b32:$soffset, timm:$offset, CachePolicy)
>;
def : GCNPat<
@@ -1519,7 +1504,7 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
i32:$soffset, timm:$offset, timm:$cachepolicy, 0)),
(!cast<MUBUF_Pseudo>(Inst # "_OFFEN" # InstSuffix)
getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc,
- SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy)
+ SCSrc_b32:$soffset, timm:$offset, CachePolicy)
>;
def : GCNPat<
@@ -1528,7 +1513,7 @@ multiclass SIBufferAtomicPat<string OpPrefix, ValueType vt, string Inst,
(!cast<MUBUF_Pseudo>(Inst # "_BOTHEN" # InstSuffix)
getVregSrcForVT<vt>.ret:$vdata_in,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy)
+ SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy)
>;
} // end let AddedComplexity
@@ -1584,7 +1569,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
0, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) getVregSrcForVT<vt>.ret:$vdata_in, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), timm:$cachepolicy)
+ timm:$offset, timm:$cachepolicy)
>;
def : GCNPat<
@@ -1592,7 +1577,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
0, i32:$soffset, timm:$offset,
timm:$cachepolicy, timm),
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), timm:$cachepolicy)
+ timm:$offset, timm:$cachepolicy)
>;
def : GCNPat<
@@ -1600,7 +1585,7 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
i32:$voffset, i32:$soffset, timm:$offset,
timm:$cachepolicy, 0),
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) getVregSrcForVT<vt>.ret:$vdata_in, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), timm:$cachepolicy)
+ timm:$offset, timm:$cachepolicy)
>;
def : GCNPat<
@@ -1610,22 +1595,23 @@ multiclass BufferAtomicPatterns_NO_RTN<SDPatternOperator name, ValueType vt,
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
getVregSrcForVT<vt>.ret:$vdata_in,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), timm:$cachepolicy)
+ SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, timm:$cachepolicy)
>;
}
let SubtargetPredicate = HasAtomicFaddNoRtnInsts in
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["noret"]>;
-let SubtargetPredicate = HasAtomicPkFaddNoRtnInsts in
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16NoRtnInsts in
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["noret"]>;
let SubtargetPredicate = HasAtomicFaddRtnInsts in
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f32, "BUFFER_ATOMIC_ADD_F32", ["ret"]>;
-let SubtargetPredicate = isGFX90APlus in {
- defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
+let SubtargetPredicate = HasAtomicBufferGlobalPkAddF16Insts in
+defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2f16, "BUFFER_ATOMIC_PK_ADD_F16", ["ret"]>;
+let SubtargetPredicate = isGFX90APlus in {
defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", f64, "BUFFER_ATOMIC_ADD_F64">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmin", f64, "BUFFER_ATOMIC_MIN_F64">;
defm : SIBufferAtomicPat<"SIbuffer_atomic_fmax", f64, "BUFFER_ATOMIC_MAX_F64">;
@@ -1641,7 +1627,7 @@ defvar CachePolicy = !if(!eq(RtnMode, "ret"), (set_glc $cachepolicy),
defvar OffsetResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_OFFSET" # InstSuffix)
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy);
+ SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy);
def : GCNPat<
(Op
i32:$data, i32:$cmp, v4i32:$rsrc, 0, 0, i32:$soffset,
@@ -1653,7 +1639,7 @@ def : GCNPat<
defvar IdxenResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_IDXEN" # InstSuffix)
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
- VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
CachePolicy);
def : GCNPat<
(Op
@@ -1667,7 +1653,7 @@ def : GCNPat<
defvar OffenResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_OFFEN" # InstSuffix)
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
- VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
CachePolicy);
def : GCNPat<
(Op
@@ -1682,7 +1668,7 @@ def : GCNPat<
defvar BothenResDag = (!cast<MUBUF_Pseudo>("BUFFER_ATOMIC_CMPSWAP_BOTHEN" # InstSuffix)
(REG_SEQUENCE VReg_64, VGPR_32:$data, sub0, VGPR_32:$cmp, sub1),
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), CachePolicy);
+ SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, CachePolicy);
def : GCNPat<
(Op
i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
@@ -1698,19 +1684,19 @@ def : GCNPat<
class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
PatFrag constant_ld> : GCNPat <
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
- i16:$offset))),
+ i32:$offset))),
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset)
>;
multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
ValueType vt, PatFrag atomic_ld> {
def : GCNPat <
- (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset))),
+ (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset))),
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset)
>;
def : GCNPat <
- (vt (atomic_ld (MUBUFOffset v4i32:$rsrc, i32:$soffset, i16:$offset))),
+ (vt (atomic_ld (MUBUFOffset v4i32:$rsrc, i32:$soffset, i32:$offset))),
(Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset))
>;
}
@@ -1731,7 +1717,7 @@ multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
PatFrag ld> {
def : GCNPat <
- (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset))),
+ (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset))),
(Instr_OFFSET $srsrc, $soffset, $offset)
>;
}
@@ -1754,12 +1740,12 @@ multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
ValueType vt, PatFrag ld> {
def : GCNPat <
(vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
- i32:$soffset, u16imm:$offset))),
+ i32:$soffset, i32:$offset))),
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0)
>;
def : GCNPat <
- (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
+ (vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, i32:$offset))),
(InstrOffset $srsrc, $soffset, $offset, 0, 0)
>;
}
@@ -1769,12 +1755,12 @@ multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen,
MUBUF_Pseudo InstrOffset,
ValueType vt, PatFrag ld_frag> {
def : GCNPat <
- (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in),
+ (ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, i32:$offset), vt:$in),
(InstrOffen $vaddr, $srsrc, $soffset, $offset, $in)
>;
def : GCNPat <
- (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in),
+ (ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, i32:$offset), vt:$in),
(InstrOffset $srsrc, $soffset, $offset, $in)
>;
}
@@ -1820,12 +1806,12 @@ multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo In
ValueType vt, PatFrag atomic_st> {
// Store follows atomic op convention so address is first
def : GCNPat <
- (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset), vt:$val),
+ (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i32:$offset), vt:$val),
(Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset)
>;
def : GCNPat <
- (atomic_st (MUBUFOffset v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
+ (atomic_st (MUBUFOffset v4i32:$rsrc, i32:$soffset, i32:$offset), vt:$val),
(Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset))
>;
}
@@ -1843,7 +1829,7 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
PatFrag st> {
def : GCNPat <
- (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset)),
+ (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset, i32:$offset)),
(Instr_OFFSET $vdata, $srsrc, $soffset, $offset)
>;
}
@@ -1857,13 +1843,13 @@ multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
RegisterClass rc = VGPR_32> {
def : GCNPat <
(st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
- i32:$soffset, u16imm:$offset)),
+ i32:$soffset, i32:$offset)),
(InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0)
>;
def : GCNPat <
(st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
- u16imm:$offset)),
+ i32:$offset)),
(InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0)
>;
}
@@ -1908,7 +1894,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(vt (st v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, 0)),
- (!cast<MTBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFSET) SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(as_i8timm $format),
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
@@ -1916,7 +1902,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(vt (st v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, timm)),
- (!cast<MTBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (!cast<MTBUF_Pseudo>(opcode # _IDXEN) VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(as_i8timm $format),
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
@@ -1924,7 +1910,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
def : GCNPat<
(vt (st v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, 0)),
- (!cast<MTBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ (!cast<MTBUF_Pseudo>(opcode # _OFFEN) VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(as_i8timm $format),
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
@@ -1934,7 +1920,7 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
timm:$format, timm:$auxiliary, timm)),
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset,
(as_i8timm $format),
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
@@ -1973,7 +1959,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(st vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) getVregSrcForVT<vt>.ret:$vdata, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (as_i8timm $format),
+ timm:$offset, (as_i8timm $format),
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
@@ -1981,7 +1967,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(st vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, timm),
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$vindex, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (as_i8timm $format),
+ timm:$offset, (as_i8timm $format),
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
@@ -1989,7 +1975,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(st vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
timm:$format, timm:$auxiliary, 0),
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) getVregSrcForVT<vt>.ret:$vdata, VGPR_32:$voffset, SReg_128:$rsrc, SCSrc_b32:$soffset,
- (as_i16timm $offset), (as_i8timm $format),
+ timm:$offset, (as_i8timm $format),
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
@@ -1999,7 +1985,7 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
getVregSrcForVT<vt>.ret:$vdata,
(REG_SEQUENCE VReg_64, VGPR_32:$vindex, sub0, VGPR_32:$voffset, sub1),
- SReg_128:$rsrc, SCSrc_b32:$soffset, (as_i16timm $offset), (as_i8timm $format),
+ SReg_128:$rsrc, SCSrc_b32:$soffset, timm:$offset, (as_i8timm $format),
(extract_cpol $auxiliary), (extract_swz $auxiliary))
>;
}
@@ -2710,11 +2696,11 @@ multiclass MUBUF_Real_vi_gfx90a<bits<7> op, MUBUF_Pseudo ps, bit isTFE = 0> {
def _vi : MUBUF_Real_vi<op, ps>;
if !not(isTFE) then {
- foreach _ = BoolToList<!not(ps.FPAtomic)>.ret in
+ if !not(ps.FPAtomic) then
def _gfx90a : MUBUF_Real_gfx90a<op, ps>;
}
- foreach _ = BoolToList<ps.FPAtomic>.ret in {
+ if ps.FPAtomic then {
def _gfx90a : MUBUF_Real_gfx90a<op, ps, 0> {
let SubtargetPredicate = isGFX90AOnly;
let AssemblerPredicate = isGFX90AOnly;
@@ -2897,11 +2883,11 @@ def BUFFER_WBINVL1_vi : MUBUF_Real_vi <0x3e, BUFFER_WBINVL1>;
def BUFFER_WBINVL1_VOL_vi : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>;
} // End AssemblerPredicate = isGFX8GFX9
-let SubtargetPredicate = HasAtomicFaddNoRtnInsts in {
-defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>;
defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Real_Atomic_vi <0x4e>;
+let SubtargetPredicate = HasAtomicFaddNoRtnInsts in {
+defm BUFFER_ATOMIC_ADD_F32 : MUBUF_Real_Atomic_vi <0x4d>;
} // End SubtargetPredicate = HasAtomicFaddNoRtnInsts
let SubtargetPredicate = isGFX90APlus in {
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 26f3537ff095..85a3f763cd5a 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -26,8 +26,6 @@ class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> patt
let isPseudo = 1;
let isCodeGenOnly = 1;
- let AsmMatchConverter = "cvtDS";
-
string Mnemonic = opName;
string AsmOperands = asmOps;
@@ -65,7 +63,6 @@ class DS_Real <DS_Pseudo ps, string opName = ps.Mnemonic> :
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
let OtherPredicates = ps.OtherPredicates;
- let AsmMatchConverter = ps.AsmMatchConverter;
let SchedRW = ps.SchedRW;
let mayLoad = ps.mayLoad;
let mayStore = ps.mayStore;
@@ -164,7 +161,6 @@ class DS_1A2D_Off8_NORET <string opName, RegisterClass rc = VGPR_32,
let has_vdst = 0;
let has_offset = 0;
- let AsmMatchConverter = "cvtDSOffset01";
}
multiclass DS_1A2D_Off8_NORET_mc <string opName, RegisterClass rc = VGPR_32> {
@@ -187,7 +183,6 @@ class DS_0A1D_RET_GDS<string opName, RegisterClass rc = VGPR_32, RegisterClass s
let has_data1 = 0;
let has_gds = 0;
let gdsValue = 1;
- let AsmMatchConverter = "cvtDSGds";
let hasSideEffects = 1;
}
@@ -220,7 +215,7 @@ multiclass DS_1A1D_RET_mc_gfx9 <string opName, RegisterClass rc = VGPR_32,
let has_m0_read = 0 in {
def "" : DS_1A1D_RET<opName, rc>,
AtomicNoRet<!if(!eq(NoRetOp, ""), "", NoRetOp),
- !if(!eq(NoRetOp, ""), 0, 1)>;
+ !ne(NoRetOp, "")>;
}
}
@@ -262,8 +257,6 @@ class DS_1A2D_Off8_RET<string opName,
" $vdst, $addr, $data0, $data1$offset0$offset1$gds"> {
let has_offset = 0;
- let AsmMatchConverter = "cvtDSOffset01";
-
let hasPostISelHook = 1;
}
@@ -325,7 +318,6 @@ class DS_1A_Off8_RET <string opName, RegisterClass rc = VGPR_32>
let has_offset = 0;
let has_data0 = 0;
let has_data1 = 0;
- let AsmMatchConverter = "cvtDSOffset01";
}
multiclass DS_1A_Off8_RET_mc <string opName, RegisterClass rc = VGPR_32> {
@@ -345,7 +337,6 @@ class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
let has_data1 = 0;
let has_gds = 0;
let gdsValue = 1;
- let AsmMatchConverter = "cvtDSGds";
}
class DS_0A_RET <string opName> : DS_Pseudo<opName,
@@ -393,7 +384,6 @@ class DS_GWS <string opName, dag ins, string asmOps>
let has_gds = 0;
let gdsValue = 1;
- let AsmMatchConverter = "cvtDSGds";
}
class DS_GWS_0D <string opName>
@@ -417,7 +407,6 @@ class DS_VOID <string opName> : DS_Pseudo<opName,
let mayStore = 0;
let hasSideEffects = 1;
let UseNamedOperandTable = 0;
- let AsmMatchConverter = "";
let has_vdst = 0;
let has_addr = 0;
@@ -436,7 +425,7 @@ class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag,
(ins VGPR_32:$addr, data_op:$data0, offset:$offset),
" $vdst, $addr, $data0$offset",
[(set i32:$vdst,
- (node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))] > {
+ (node (DS1Addr1Offset i32:$addr, i32:$offset), i32:$data0))] > {
let mayLoad = 0;
let mayStore = 0;
@@ -494,12 +483,12 @@ let SubtargetPredicate = isGFX90APlus in {
defm DS_ADD_RTN_F64 : DS_1A1D_RET_mc_gfx9<"ds_add_rtn_f64", VReg_64, "ds_add_f64">;
} // End SubtargetPredicate = isGFX90APlus
-let SubtargetPredicate = isGFX940Plus in {
+let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
defm DS_PK_ADD_F16 : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_f16">;
defm DS_PK_ADD_RTN_F16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_f16", VGPR_32, "ds_pk_add_f16">;
defm DS_PK_ADD_BF16 : DS_1A1D_NORET_mc_gfx9<"ds_pk_add_bf16">;
defm DS_PK_ADD_RTN_BF16 : DS_1A1D_RET_mc_gfx9<"ds_pk_add_rtn_bf16", VGPR_32, "ds_pk_add_bf16">;
-} // End SubtargetPredicate = isGFX940Plus
+} // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
defm DS_CMPSTORE_B32 : DS_1A2D_NORET_mc<"ds_cmpstore_b32">;
defm DS_CMPSTORE_F32 : DS_1A2D_NORET_mc<"ds_cmpstore_f32">;
@@ -631,7 +620,7 @@ def DS_WRITE_SRC2_B64 : DS_1A<"ds_write_src2_b64">;
} // End SubtargetPredicate = HasDsSrc2Insts
let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
-def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, SwizzleImm>;
+def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32", VGPR_32, 0, Swizzle>;
}
let mayStore = 0 in {
@@ -740,7 +729,7 @@ def : GCNPat <
>;
class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
- (vt (frag (DS1Addr1Offset i32:$ptr, i16:$offset))),
+ (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
(inst $ptr, offset:$offset, (i1 gds))
>;
@@ -756,7 +745,7 @@ multiclass DSReadPat_mc<DS_Pseudo inst, ValueType vt, string frag> {
}
class DSReadPat_D16 <DS_Pseudo inst, PatFrag frag, ValueType vt> : GCNPat <
- (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$in),
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$in),
(inst $ptr, offset:$offset, (i1 0), $in)
>;
@@ -800,7 +789,7 @@ def : DSReadPat_D16<DS_READ_I8_D16, sextloadi8_d16_lo_local, v2f16>;
}
class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag, int gds=0> : GCNPat <
- (frag vt:$value, (DS1Addr1Offset i32:$ptr, i16:$offset)),
+ (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
(inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))
>;
@@ -817,7 +806,7 @@ multiclass DSWritePat_mc <DS_Pseudo inst, ValueType vt, string frag> {
// Irritatingly, atomic_store reverses the order of operands from a
// normal store.
class DSAtomicWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag> : GCNPat <
- (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value),
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
(inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 0))
>;
@@ -965,7 +954,7 @@ defm : DSWritePat_mc <DS_WRITE_B128, vt, "store_align_less_than_4_local">;
} // End AddedComplexity = 100
class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag, int complexity = 0,
- bit gds=0> : GCNPat <(frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value),
+ bit gds=0> : GCNPat <(frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
(inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))> {
let AddedComplexity = complexity;
}
@@ -1014,7 +1003,7 @@ let SubtargetPredicate = isGFX6GFX7GFX8GFX9GFX10 in {
// Caution, the order of src and cmp is the *opposite* of the BUFFER_ATOMIC_CMPSWAP opcode.
class DSAtomicCmpXChgSwapped<DS_Pseudo inst, ValueType vt, PatFrag frag,
int complexity = 0, bit gds=0> : GCNPat<
- (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap),
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
(inst $ptr, getVregSrcForVT<vt>.ret:$cmp, getVregSrcForVT<vt>.ret:$swap, offset:$offset, (i1 gds))> {
let AddedComplexity = complexity;
}
@@ -1046,7 +1035,7 @@ let SubtargetPredicate = isGFX11Plus in {
// The order of src and cmp agrees with the BUFFER_ATOMIC_CMPSWAP opcode.
class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag,
int complexity = 0, bit gds=0> : GCNPat<
- (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$cmp, vt:$swap),
+ (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
(inst $ptr, getVregSrcForVT<vt>.ret:$swap, getVregSrcForVT<vt>.ret:$cmp, offset:$offset, (i1 gds))> {
let AddedComplexity = complexity;
}
@@ -1069,8 +1058,8 @@ multiclass DSAtomicCmpXChg_mc<DS_Pseudo inst, DS_Pseudo noRetInst, ValueType vt,
defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B32, i32, "atomic_swap">;
defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U32, DS_ADD_U32, i32, "atomic_load_add">;
defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U32, DS_SUB_U32, i32, "atomic_load_sub">;
-defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U32, DS_INC_U32, i32, "atomic_inc">;
-defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U32, DS_DEC_U32, i32, "atomic_dec">;
+defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U32, DS_INC_U32, i32, "atomic_load_uinc_wrap">;
+defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U32, DS_DEC_U32, i32, "atomic_load_udec_wrap">;
defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B32, DS_AND_B32, i32, "atomic_load_and">;
defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B32, DS_OR_B32, i32, "atomic_load_or">;
defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B32, DS_XOR_B32, i32, "atomic_load_xor">;
@@ -1097,8 +1086,8 @@ defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_F32, DS_ADD_F32, f32, "atomic_load_fadd
defm : DSAtomicRetPat_mc<DS_WRXCHG_RTN_B64, i64, "atomic_swap">;
defm : DSAtomicRetNoRetPat_mc<DS_ADD_RTN_U64, DS_ADD_U64, i64, "atomic_load_add">;
defm : DSAtomicRetNoRetPat_mc<DS_SUB_RTN_U64, DS_SUB_U64, i64, "atomic_load_sub">;
-defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U64, DS_INC_U64, i64, "atomic_inc">;
-defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U64, DS_DEC_U64, i64, "atomic_dec">;
+defm : DSAtomicRetNoRetPat_mc<DS_INC_RTN_U64, DS_INC_U64, i64, "atomic_load_uinc_wrap">;
+defm : DSAtomicRetNoRetPat_mc<DS_DEC_RTN_U64, DS_DEC_U64, i64, "atomic_load_udec_wrap">;
defm : DSAtomicRetNoRetPat_mc<DS_AND_RTN_B64, DS_AND_B64, i64, "atomic_load_and">;
defm : DSAtomicRetNoRetPat_mc<DS_OR_RTN_B64, DS_OR_B64, i64, "atomic_load_or">;
defm : DSAtomicRetNoRetPat_mc<DS_XOR_RTN_B64, DS_XOR_B64, i64, "atomic_load_xor">;
@@ -1124,7 +1113,7 @@ def : DSAtomicRetPat<DS_ADD_F64, f64, atomic_load_fadd_local_noret_64>;
class DSAtomicRetPatIntrinsic<DS_Pseudo inst, ValueType vt, PatFrag frag,
bit gds=0> : GCNPat <
- (vt (frag (DS1Addr1Offset i32:$ptr, i16:$offset), vt:$value)),
+ (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value)),
(inst $ptr, getVregSrcForVT<vt>.ret:$value, offset:$offset, (i1 gds))> {
}
@@ -1133,7 +1122,7 @@ let AddedComplexity = 1 in
def : DSAtomicRetPatIntrinsic<DS_ADD_F64, f64, int_amdgcn_flat_atomic_fadd_noret_local_addrspace>;
}
-let SubtargetPredicate = isGFX940Plus in {
+let SubtargetPredicate = HasAtomicDsPkAdd16Insts in {
def : DSAtomicRetPat<DS_PK_ADD_RTN_F16, v2f16, atomic_load_fadd_v2f16_local_32>;
let AddedComplexity = 1 in
def : DSAtomicRetPat<DS_PK_ADD_F16, v2f16, atomic_load_fadd_v2f16_local_noret_32>;
@@ -1146,7 +1135,7 @@ def : GCNPat <
(v2i16 (int_amdgcn_ds_fadd_v2bf16_noret i32:$ptr, v2i16:$src)),
(DS_PK_ADD_BF16 VGPR_32:$ptr, VGPR_32:$src, 0, 0)
>;
-}
+} // End SubtargetPredicate = HasAtomicDsPkAdd16Insts
def : Pat <
(SIds_ordered_count i32:$value, i16:$offset),
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index c4e85210848a..1b05acd5c90a 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -45,13 +45,11 @@ using namespace llvm;
using DecodeStatus = llvm::MCDisassembler::DecodeStatus;
AMDGPUDisassembler::AMDGPUDisassembler(const MCSubtargetInfo &STI,
- MCContext &Ctx,
- MCInstrInfo const *MCII) :
- MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
- TargetMaxInstBytes(Ctx.getAsmInfo()->getMaxInstLength(&STI)) {
-
+ MCContext &Ctx, MCInstrInfo const *MCII)
+ : MCDisassembler(STI, Ctx), MCII(MCII), MRI(*Ctx.getRegisterInfo()),
+ MAI(*Ctx.getAsmInfo()), TargetMaxInstBytes(MAI.getMaxInstLength(&STI)) {
// ToDo: AMDGPUDisassembler supports only VI ISA.
- if (!STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding] && !isGFX10Plus())
+ if (!STI.hasFeature(AMDGPU::FeatureGCN3Encoding) && !isGFX10Plus())
report_fatal_error("Disassembly not yet supported for subtarget");
}
@@ -74,7 +72,7 @@ static int insertNamedMCOperand(MCInst &MI, const MCOperand &Op,
return OpIdx;
}
-static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
+static DecodeStatus decodeSOPPBrTarget(MCInst &Inst, unsigned Imm,
uint64_t Addr,
const MCDisassembler *Decoder) {
auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
@@ -115,183 +113,160 @@ static DecodeStatus decodeBoolReg(MCInst &Inst, unsigned Val, uint64_t Addr,
return addOperand(Inst, DAsm->DecoderName(Imm)); \
}
-#define DECODE_OPERAND_REG(RegClass) \
-DECODE_OPERAND(Decode##RegClass##RegisterClass, decodeOperand_##RegClass)
-
-DECODE_OPERAND_REG(VGPR_32)
-DECODE_OPERAND_REG(VGPR_32_Lo128)
-DECODE_OPERAND_REG(VRegOrLds_32)
-DECODE_OPERAND_REG(VS_32)
-DECODE_OPERAND_REG(VS_64)
-DECODE_OPERAND_REG(VS_128)
-
-DECODE_OPERAND_REG(VReg_64)
-DECODE_OPERAND_REG(VReg_96)
-DECODE_OPERAND_REG(VReg_128)
-DECODE_OPERAND_REG(VReg_256)
-DECODE_OPERAND_REG(VReg_288)
-DECODE_OPERAND_REG(VReg_352)
-DECODE_OPERAND_REG(VReg_384)
-DECODE_OPERAND_REG(VReg_512)
-DECODE_OPERAND_REG(VReg_1024)
-
-DECODE_OPERAND_REG(SReg_32)
-DECODE_OPERAND_REG(SReg_32_XM0_XEXEC)
-DECODE_OPERAND_REG(SReg_32_XEXEC_HI)
-DECODE_OPERAND_REG(SRegOrLds_32)
-DECODE_OPERAND_REG(SReg_64)
-DECODE_OPERAND_REG(SReg_64_XEXEC)
-DECODE_OPERAND_REG(SReg_128)
-DECODE_OPERAND_REG(SReg_256)
-DECODE_OPERAND_REG(SReg_512)
-
-DECODE_OPERAND_REG(AGPR_32)
-DECODE_OPERAND_REG(AReg_64)
-DECODE_OPERAND_REG(AReg_128)
-DECODE_OPERAND_REG(AReg_256)
-DECODE_OPERAND_REG(AReg_512)
-DECODE_OPERAND_REG(AReg_1024)
-DECODE_OPERAND_REG(AV_32)
-DECODE_OPERAND_REG(AV_64)
-DECODE_OPERAND_REG(AV_128)
-DECODE_OPERAND_REG(AVDst_128)
-DECODE_OPERAND_REG(AVDst_512)
-
-static DecodeStatus decodeOperand_VSrc16(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
-}
-
-static DecodeStatus decodeOperand_VSrcV216(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeOperand_VSrcV216(Imm));
-}
-
-static DecodeStatus decodeOperand_VSrcV232(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeOperand_VSrcV232(Imm));
-}
-
-static DecodeStatus decodeOperand_VS_16(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
-}
-
-static DecodeStatus decodeOperand_VS_32(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeOperand_VS_32(Imm));
-}
-
-static DecodeStatus decodeOperand_AReg_64(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm | 512));
-}
-
-static DecodeStatus decodeOperand_AReg_128(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm | 512));
-}
-
-static DecodeStatus decodeOperand_AReg_256(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm | 512));
-}
-
-static DecodeStatus decodeOperand_AReg_512(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm | 512));
-}
-
-static DecodeStatus decodeOperand_AReg_1024(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm | 512));
-}
-
-static DecodeStatus decodeOperand_VReg_64(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW64, Imm));
-}
-
-static DecodeStatus decodeOperand_VReg_128(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW128, Imm));
-}
-
-static DecodeStatus decodeOperand_VReg_256(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW256, Imm));
-}
-
-static DecodeStatus decodeOperand_VReg_512(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW512, Imm));
-}
+// Decoder for registers, decode directly using RegClassID. Imm(8-bit) is
+// number of register. Used by VGPR only and AGPR only operands.
+#define DECODE_OPERAND_REG_8(RegClass) \
+ static DecodeStatus Decode##RegClass##RegisterClass( \
+ MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
+ const MCDisassembler *Decoder) { \
+ assert(Imm < (1 << 8) && "8-bit encoding"); \
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
+ return addOperand( \
+ Inst, DAsm->createRegOperand(AMDGPU::RegClass##RegClassID, Imm)); \
+ }
-static DecodeStatus decodeOperand_VReg_1024(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeSrcOp(AMDGPUDisassembler::OPW1024, Imm));
-}
+#define DECODE_SrcOp(Name, EncSize, OpWidth, EncImm, MandatoryLiteral, \
+ ImmWidth) \
+ static DecodeStatus Name(MCInst &Inst, unsigned Imm, uint64_t /*Addr*/, \
+ const MCDisassembler *Decoder) { \
+ assert(Imm < (1 << EncSize) && #EncSize "-bit encoding"); \
+ auto DAsm = static_cast<const AMDGPUDisassembler *>(Decoder); \
+ return addOperand(Inst, \
+ DAsm->decodeSrcOp(AMDGPUDisassembler::OpWidth, EncImm, \
+ MandatoryLiteral, ImmWidth)); \
+ }
-static DecodeStatus decodeOperand_f32kimm(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
- return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
-}
+// Decoder for registers. Imm(7-bit) is number of register, uses decodeSrcOp to
+// get register class. Used by SGPR only operands.
+#define DECODE_OPERAND_REG_7(RegClass, OpWidth) \
+ DECODE_SrcOp(Decode##RegClass##RegisterClass, 7, OpWidth, Imm, false, 0)
+
+// Decoder for registers. Imm(10-bit): Imm{7-0} is number of register,
+// Imm{9} is acc(agpr or vgpr) Imm{8} should be 0 (see VOP3Pe_SMFMAC).
+// Set Imm{8} to 1 (IS_VGPR) to decode using 'enum10' from decodeSrcOp.
+// Used by AV_ register classes (AGPR or VGPR only register operands).
+#define DECODE_OPERAND_REG_AV10(RegClass, OpWidth) \
+ DECODE_SrcOp(Decode##RegClass##RegisterClass, 10, OpWidth, \
+ Imm | AMDGPU::EncValues::IS_VGPR, false, 0)
+
+// Decoder for Src(9-bit encoding) registers only.
+#define DECODE_OPERAND_SRC_REG_9(RegClass, OpWidth) \
+ DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm, false, 0)
+
+// Decoder for Src(9-bit encoding) AGPR, register number encoded in 9bits, set
+// Imm{9} to 1 (set acc) and decode using 'enum10' from decodeSrcOp, registers
+// only.
+#define DECODE_OPERAND_SRC_REG_A9(RegClass, OpWidth) \
+ DECODE_SrcOp(decodeOperand_##RegClass, 9, OpWidth, Imm | 512, false, 0)
+
+// Decoder for 'enum10' from decodeSrcOp, Imm{0-8} is 9-bit Src encoding
+// Imm{9} is acc, registers only.
+#define DECODE_SRC_OPERAND_REG_AV10(RegClass, OpWidth) \
+ DECODE_SrcOp(decodeOperand_##RegClass, 10, OpWidth, Imm, false, 0)
+
+// Decoder for RegisterOperands using 9-bit Src encoding. Operand can be
+// register from RegClass or immediate. Registers that don't belong to RegClass
+// will be decoded and InstPrinter will report warning. Immediate will be
+// decoded into constant of size ImmWidth, should match width of immediate used
+// by OperandType (important for floating point types).
+#define DECODE_OPERAND_SRC_REG_OR_IMM_9(RegClass, OpWidth, ImmWidth) \
+ DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, Imm, \
+ false, ImmWidth)
+
+// Decoder for Src(9-bit encoding) AGPR or immediate. Set Imm{9} to 1 (set acc)
+// and decode using 'enum10' from decodeSrcOp.
+#define DECODE_OPERAND_SRC_REG_OR_IMM_A9(RegClass, OpWidth, ImmWidth) \
+ DECODE_SrcOp(decodeOperand_##RegClass##_Imm##ImmWidth, 9, OpWidth, \
+ Imm | 512, false, ImmWidth)
+
+#define DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(RegClass, OpWidth, ImmWidth) \
+ DECODE_SrcOp(decodeOperand_##RegClass##_Deferred##_Imm##ImmWidth, 9, \
+ OpWidth, Imm, true, ImmWidth)
+
+// Default decoders generated by tablegen: 'Decode<RegClass>RegisterClass'
+// when RegisterClass is used as an operand. Most often used for destination
+// operands.
-static DecodeStatus decodeOperand_f16kimm(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
+DECODE_OPERAND_REG_8(VGPR_32)
+DECODE_OPERAND_REG_8(VGPR_32_Lo128)
+DECODE_OPERAND_REG_8(VReg_64)
+DECODE_OPERAND_REG_8(VReg_96)
+DECODE_OPERAND_REG_8(VReg_128)
+DECODE_OPERAND_REG_8(VReg_256)
+DECODE_OPERAND_REG_8(VReg_288)
+DECODE_OPERAND_REG_8(VReg_352)
+DECODE_OPERAND_REG_8(VReg_384)
+DECODE_OPERAND_REG_8(VReg_512)
+DECODE_OPERAND_REG_8(VReg_1024)
+
+DECODE_OPERAND_REG_7(SReg_32, OPW32)
+DECODE_OPERAND_REG_7(SReg_32_XM0_XEXEC, OPW32)
+DECODE_OPERAND_REG_7(SReg_32_XEXEC_HI, OPW32)
+DECODE_OPERAND_REG_7(SReg_64, OPW64)
+DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
+DECODE_OPERAND_REG_7(SReg_128, OPW128)
+DECODE_OPERAND_REG_7(SReg_256, OPW256)
+DECODE_OPERAND_REG_7(SReg_512, OPW512)
+
+DECODE_OPERAND_REG_8(AGPR_32)
+DECODE_OPERAND_REG_8(AReg_64)
+DECODE_OPERAND_REG_8(AReg_128)
+DECODE_OPERAND_REG_8(AReg_256)
+DECODE_OPERAND_REG_8(AReg_512)
+DECODE_OPERAND_REG_8(AReg_1024)
+
+DECODE_OPERAND_REG_AV10(AVDst_128, OPW128)
+DECODE_OPERAND_REG_AV10(AVDst_512, OPW512)
+
+// Decoders for register only source RegisterOperands that use use 9-bit Src
+// encoding: 'decodeOperand_<RegClass>'.
+
+DECODE_OPERAND_SRC_REG_9(VGPR_32, OPW32)
+DECODE_OPERAND_SRC_REG_9(VReg_64, OPW64)
+DECODE_OPERAND_SRC_REG_9(VReg_128, OPW128)
+DECODE_OPERAND_SRC_REG_9(VReg_256, OPW256)
+DECODE_OPERAND_SRC_REG_9(VRegOrLds_32, OPW32)
+
+DECODE_OPERAND_SRC_REG_A9(AGPR_32, OPW32)
+
+DECODE_SRC_OPERAND_REG_AV10(AV_32, OPW32)
+DECODE_SRC_OPERAND_REG_AV10(AV_64, OPW64)
+DECODE_SRC_OPERAND_REG_AV10(AV_128, OPW128)
+
+// Decoders for register or immediate RegisterOperands that use 9-bit Src
+// encoding: 'decodeOperand_<RegClass>_Imm<ImmWidth>'.
+
+DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_64, OPW64, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(SReg_32, OPW32, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(SRegOrLds_32, OPW32, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32_Lo128, OPW16, 16)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 16)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_32, OPW32, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VS_64, OPW64, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_64, OPW64, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_128, OPW128, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_256, OPW256, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_512, OPW512, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_9(VReg_1024, OPW1024, 32)
+
+DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_64, OPW64, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_128, OPW128, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_256, OPW256, 64)
+DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_512, OPW512, 32)
+DECODE_OPERAND_SRC_REG_OR_IMM_A9(AReg_1024, OPW1024, 32)
+
+DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32_Lo128, OPW16, 16)
+DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW16, 16)
+DECODE_OPERAND_SRC_REG_OR_IMM_DEFERRED_9(VS_32, OPW32, 32)
+
+static DecodeStatus decodeOperand_KImmFP(MCInst &Inst, unsigned Imm,
+ uint64_t Addr,
+ const MCDisassembler *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
return addOperand(Inst, DAsm->decodeMandatoryLiteralConstant(Imm));
}
-static DecodeStatus
-decodeOperand_VS_16_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
- const MCDisassembler *Decoder) {
- const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
- return addOperand(
- Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW16, Imm, true));
-}
-
-static DecodeStatus
-decodeOperand_VS_32_Deferred(MCInst &Inst, unsigned Imm, uint64_t Addr,
- const MCDisassembler *Decoder) {
- const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
- return addOperand(
- Inst, DAsm->decodeSrcOp(llvm::AMDGPUDisassembler::OPW32, Imm, true));
-}
-
static DecodeStatus decodeOperandVOPDDstY(MCInst &Inst, unsigned Val,
uint64_t Addr, const void *Decoder) {
const auto *DAsm = static_cast<const AMDGPUDisassembler *>(Decoder);
@@ -381,13 +356,6 @@ DecodeAVLdSt_160RegisterClass(MCInst &Inst, unsigned Imm, uint64_t Addr,
Decoder);
}
-static DecodeStatus decodeOperand_SReg_32(MCInst &Inst, unsigned Imm,
- uint64_t Addr,
- const MCDisassembler *Decoder) {
- auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
- return addOperand(Inst, DAsm->decodeOperand_SReg_32(Imm));
-}
-
#define DECODE_SDWA(DecName) \
DECODE_OPERAND(decodeSDWA##DecName, decodeSDWA##DecName)
@@ -436,7 +404,6 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes_,
uint64_t Address,
raw_ostream &CS) const {
- CommentStream = &CS;
bool IsSDWA = false;
unsigned MaxInstBytesNum = std::min((size_t)TargetMaxInstBytes, Bytes_.size());
@@ -451,13 +418,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// encodings
if (isGFX11Plus() && Bytes.size() >= 12 ) {
DecoderUInt128 DecW = eat12Bytes(Bytes);
- Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW,
- Address);
+ Res = tryDecodeInst(DecoderTableDPP8GFX1196, MI, DecW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW,
- Address);
+ Res = tryDecodeInst(DecoderTableDPPGFX1196, MI, DecW, Address, CS);
if (Res) {
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOP3P)
convertVOP3PDPPInst(MI);
@@ -469,7 +434,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
break;
}
- Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address);
+ Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);
if (Res)
break;
}
@@ -479,8 +444,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Bytes.size() >= 8) {
const uint64_t QW = eatBytes<uint64_t>(Bytes);
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
- Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
+ Res = tryDecodeInst(DecoderTableGFX10_B64, MI, QW, Address, CS);
if (Res) {
if (AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dpp8)
== -1)
@@ -491,37 +456,37 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
}
- Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableDPP864, MI, QW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableDPP8GFX1164, MI, QW, Address, CS);
if (Res && convertDPP8Inst(MI) == MCDisassembler::Success)
break;
MI = MCInst(); // clear
- Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableDPP64, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableDPPGFX1164, MI, QW, Address, CS);
if (Res) {
if (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::VOPC)
convertVOPCDPPInst(MI);
break;
}
- Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableSDWA64, MI, QW, Address, CS);
if (Res) { IsSDWA = true; break; }
- Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableSDWA964, MI, QW, Address, CS);
if (Res) { IsSDWA = true; break; }
- Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableSDWA1064, MI, QW, Address, CS);
if (Res) { IsSDWA = true; break; }
- if (STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]) {
- Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem)) {
+ Res = tryDecodeInst(DecoderTableGFX80_UNPACKED64, MI, QW, Address, CS);
if (Res)
break;
}
@@ -529,8 +494,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Some GFX9 subtargets repurposed the v_mad_mix_f32, v_mad_mixlo_f16 and
// v_mad_mixhi_f16 for FMA variants. Try to decode using this special
// table first so we print the correct name.
- if (STI.getFeatureBits()[AMDGPU::FeatureFmaMixInsts]) {
- Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureFmaMixInsts)) {
+ Res = tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS);
if (Res)
break;
}
@@ -542,64 +507,64 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Try decode 32-bit instruction
if (Bytes.size() < 4) break;
const uint32_t DW = eatBytes<uint32_t>(Bytes);
- Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address);
+ Res = tryDecodeInst(DecoderTableGFX832, MI, DW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address);
+ Res = tryDecodeInst(DecoderTableAMDGPU32, MI, DW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address);
+ Res = tryDecodeInst(DecoderTableGFX932, MI, DW, Address, CS);
if (Res) break;
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
- Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
+ Res = tryDecodeInst(DecoderTableGFX90A32, MI, DW, Address, CS);
if (Res)
break;
}
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]) {
- Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding)) {
+ Res = tryDecodeInst(DecoderTableGFX10_B32, MI, DW, Address, CS);
if (Res) break;
}
- Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address);
+ Res = tryDecodeInst(DecoderTableGFX1032, MI, DW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address);
+ Res = tryDecodeInst(DecoderTableGFX1132, MI, DW, Address, CS);
if (Res) break;
if (Bytes.size() < 4) break;
const uint64_t QW = ((uint64_t)eatBytes<uint32_t>(Bytes) << 32) | DW;
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts]) {
- Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureGFX940Insts)) {
+ Res = tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS);
if (Res)
break;
}
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) {
- Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address);
+ if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts)) {
+ Res = tryDecodeInst(DecoderTableGFX90A64, MI, QW, Address, CS);
if (Res)
break;
}
- Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableGFX864, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableAMDGPU64, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableGFX964, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableGFX1064, MI, QW, Address, CS);
if (Res) break;
- Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableGFX1164, MI, QW, Address, CS);
if (Res)
break;
- Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address);
+ Res = tryDecodeInst(DecoderTableWMMAGFX1164, MI, QW, Address, CS);
} while (false);
if (Res && AMDGPU::isMAC(MI.getOpcode())) {
@@ -627,7 +592,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
if (Res && (MCII->get(MI.getOpcode()).TSFlags &
(SIInstrFlags::MTBUF | SIInstrFlags::MUBUF)) &&
- (STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts])) {
+ (STI.hasFeature(AMDGPU::FeatureGFX90AInsts))) {
// GFX90A lost TFE, its place is occupied by ACC.
int TFEOpIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
@@ -714,7 +679,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
}
DecodeStatus AMDGPUDisassembler::convertEXPInst(MCInst &MI) const {
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX11]) {
+ if (STI.hasFeature(AMDGPU::FeatureGFX11)) {
// The MCInst still has these fields even though they are no longer encoded
// in the GFX11 instruction.
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::vm);
@@ -736,12 +701,12 @@ DecodeStatus AMDGPUDisassembler::convertVINTERPInst(MCInst &MI) const {
}
DecodeStatus AMDGPUDisassembler::convertSDWAInst(MCInst &MI) const {
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
- STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
+ if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
+ STI.hasFeature(AMDGPU::FeatureGFX10)) {
if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::sdst))
// VOPC - insert clamp
insertNamedMCOperand(MI, MCOperand::createImm(0), AMDGPU::OpName::clamp);
- } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
+ } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
int SDst = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::sdst);
if (SDst != -1) {
// VOPC - insert VCC register as sdst
@@ -883,6 +848,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AMDGPU::OpName::vdata);
int VAddr0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
+ int RsrcIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::dmask);
@@ -898,14 +865,14 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
assert(VDataIdx != -1);
if (BaseOpcode->BVH) {
// Add A16 operand for intersect_ray instructions
- if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::a16))
- addOperand(MI, MCOperand::createImm(1));
+ addOperand(MI, MCOperand::createImm(BaseOpcode->A16));
return MCDisassembler::Success;
}
bool IsAtomic = (VDstIdx != -1);
bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
bool IsNSA = false;
+ bool IsPartialNSA = false;
unsigned AddrSize = Info->VAddrDwords;
if (isGFX10Plus()) {
@@ -927,9 +894,12 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AddrSize = 16;
} else {
if (AddrSize > Info->VAddrDwords) {
- // The NSA encoding does not contain enough operands for the combination
- // of base opcode / dimension. Should this be an error?
- return MCDisassembler::Success;
+ if (!STI.hasFeature(AMDGPU::FeaturePartialNSAEncoding)) {
+ // The NSA encoding does not contain enough operands for the
+ // combination of base opcode / dimension. Should this be an error?
+ return MCDisassembler::Success;
+ }
+ IsPartialNSA = true;
}
}
}
@@ -972,17 +942,20 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
}
}
- // If not using NSA on GFX10+, widen address register to correct size.
- unsigned NewVAddr0 = AMDGPU::NoRegister;
- if (isGFX10Plus() && !IsNSA && AddrSize != Info->VAddrDwords) {
- unsigned VAddr0 = MI.getOperand(VAddr0Idx).getReg();
- unsigned VAddrSub0 = MRI.getSubReg(VAddr0, AMDGPU::sub0);
- VAddr0 = (VAddrSub0 != 0) ? VAddrSub0 : VAddr0;
-
- auto AddrRCID = MCII->get(NewOpcode).operands()[VAddr0Idx].RegClass;
- NewVAddr0 = MRI.getMatchingSuperReg(VAddr0, AMDGPU::sub0,
+ // If not using NSA on GFX10+, widen vaddr0 address register to correct size.
+ // If using partial NSA on GFX11+ widen last address register.
+ int VAddrSAIdx = IsPartialNSA ? (RsrcIdx - 1) : VAddr0Idx;
+ unsigned NewVAddrSA = AMDGPU::NoRegister;
+ if (STI.hasFeature(AMDGPU::FeatureNSAEncoding) && (!IsNSA || IsPartialNSA) &&
+ AddrSize != Info->VAddrDwords) {
+ unsigned VAddrSA = MI.getOperand(VAddrSAIdx).getReg();
+ unsigned VAddrSubSA = MRI.getSubReg(VAddrSA, AMDGPU::sub0);
+ VAddrSA = VAddrSubSA ? VAddrSubSA : VAddrSA;
+
+ auto AddrRCID = MCII->get(NewOpcode).operands()[VAddrSAIdx].RegClass;
+ NewVAddrSA = MRI.getMatchingSuperReg(VAddrSA, AMDGPU::sub0,
&MRI.getRegClass(AddrRCID));
- if (NewVAddr0 == AMDGPU::NoRegister)
+ if (!NewVAddrSA)
return MCDisassembler::Success;
}
@@ -997,8 +970,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
}
}
- if (NewVAddr0 != AMDGPU::NoRegister) {
- MI.getOperand(VAddr0Idx) = MCOperand::createReg(NewVAddr0);
+ if (NewVAddrSA) {
+ MI.getOperand(VAddrSAIdx) = MCOperand::createReg(NewVAddrSA);
} else if (IsNSA) {
assert(AddrSize <= Info->VAddrDwords);
MI.erase(MI.begin() + VAddr0Idx + AddrSize,
@@ -1159,214 +1132,6 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
return createRegOperand(SRegClassID, Val >> shift);
}
-MCOperand AMDGPUDisassembler::decodeOperand_VS_32(unsigned Val) const {
- return decodeSrcOp(OPW32, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
- return decodeSrcOp(OPW64, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VS_128(unsigned Val) const {
- return decodeSrcOp(OPW128, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
- return decodeSrcOp(OPW16, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VSrcV216(unsigned Val) const {
- return decodeSrcOp(OPWV216, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VSrcV232(unsigned Val) const {
- return decodeSrcOp(OPWV232, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32_Lo128(unsigned Val) const {
- return createRegOperand(AMDGPU::VGPR_32_Lo128RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
- // Some instructions have operand restrictions beyond what the encoding
- // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
- // high bit.
- Val &= 255;
-
- return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VRegOrLds_32(unsigned Val) const {
- return decodeSrcOp(OPW32, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AGPR_32(unsigned Val) const {
- return createRegOperand(AMDGPU::AGPR_32RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_64(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_64RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_128(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_128RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_256(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_256RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_288(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_288RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_320(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_320RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_352(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_352RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_384(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_384RegClassID, Val & 255);
-}
-
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_512(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_512RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AReg_1024(unsigned Val) const {
- return createRegOperand(AMDGPU::AReg_1024RegClassID, Val & 255);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AV_32(unsigned Val) const {
- return decodeSrcOp(OPW32, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AV_64(unsigned Val) const {
- return decodeSrcOp(OPW64, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AV_128(unsigned Val) const {
- return decodeSrcOp(OPW128, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AVDst_128(unsigned Val) const {
- using namespace AMDGPU::EncValues;
- assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
- return decodeSrcOp(OPW128, Val | IS_VGPR);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_AVDst_512(unsigned Val) const {
- using namespace AMDGPU::EncValues;
- assert((Val & IS_VGPR) == 0); // Val{8} is not encoded but assumed to be 1.
- return decodeSrcOp(OPW512, Val | IS_VGPR);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_64(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_64RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_96(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_96RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_128(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_128RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_256(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_256RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_288(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_288RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_320(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_320RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_352(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_352RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_384(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_384RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_512(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_512RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_VReg_1024(unsigned Val) const {
- return createRegOperand(AMDGPU::VReg_1024RegClassID, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
- // table-gen generated disassembler doesn't care about operand types
- // leaving only registry class so SSrc_32 operand turns into SReg_32
- // and therefore we accept immediates and literals here as well
- return decodeSrcOp(OPW32, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC(
- unsigned Val) const {
- // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI
- return decodeOperand_SReg_32(Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XEXEC_HI(
- unsigned Val) const {
- // SReg_32_XM0 is SReg_32 without EXEC_HI
- return decodeOperand_SReg_32(Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SRegOrLds_32(unsigned Val) const {
- // table-gen generated disassembler doesn't care about operand types
- // leaving only registry class so SSrc_32 operand turns into SReg_32
- // and therefore we accept immediates and literals here as well
- return decodeSrcOp(OPW32, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
- return decodeSrcOp(OPW64, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const {
- return decodeSrcOp(OPW64, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_128(unsigned Val) const {
- return decodeSrcOp(OPW128, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_256(unsigned Val) const {
- return decodeDstOp(OPW256, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_288(unsigned Val) const {
- return decodeDstOp(OPW288, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_320(unsigned Val) const {
- return decodeDstOp(OPW320, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_352(unsigned Val) const {
- return decodeDstOp(OPW352, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_384(unsigned Val) const {
- return decodeDstOp(OPW384, Val);
-}
-
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_512(unsigned Val) const {
- return decodeDstOp(OPW512, Val);
-}
-
// Decode Literals for insts which always have a literal in the encoding
MCOperand
AMDGPUDisassembler::decodeMandatoryLiteralConstant(unsigned Val) const {
@@ -1410,21 +1175,21 @@ MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
static int64_t getInlineImmVal32(unsigned Imm) {
switch (Imm) {
case 240:
- return FloatToBits(0.5f);
+ return llvm::bit_cast<uint32_t>(0.5f);
case 241:
- return FloatToBits(-0.5f);
+ return llvm::bit_cast<uint32_t>(-0.5f);
case 242:
- return FloatToBits(1.0f);
+ return llvm::bit_cast<uint32_t>(1.0f);
case 243:
- return FloatToBits(-1.0f);
+ return llvm::bit_cast<uint32_t>(-1.0f);
case 244:
- return FloatToBits(2.0f);
+ return llvm::bit_cast<uint32_t>(2.0f);
case 245:
- return FloatToBits(-2.0f);
+ return llvm::bit_cast<uint32_t>(-2.0f);
case 246:
- return FloatToBits(4.0f);
+ return llvm::bit_cast<uint32_t>(4.0f);
case 247:
- return FloatToBits(-4.0f);
+ return llvm::bit_cast<uint32_t>(-4.0f);
case 248: // 1 / (2 * PI)
return 0x3e22f983;
default:
@@ -1435,21 +1200,21 @@ static int64_t getInlineImmVal32(unsigned Imm) {
static int64_t getInlineImmVal64(unsigned Imm) {
switch (Imm) {
case 240:
- return DoubleToBits(0.5);
+ return llvm::bit_cast<uint64_t>(0.5);
case 241:
- return DoubleToBits(-0.5);
+ return llvm::bit_cast<uint64_t>(-0.5);
case 242:
- return DoubleToBits(1.0);
+ return llvm::bit_cast<uint64_t>(1.0);
case 243:
- return DoubleToBits(-1.0);
+ return llvm::bit_cast<uint64_t>(-1.0);
case 244:
- return DoubleToBits(2.0);
+ return llvm::bit_cast<uint64_t>(2.0);
case 245:
- return DoubleToBits(-2.0);
+ return llvm::bit_cast<uint64_t>(-2.0);
case 246:
- return DoubleToBits(4.0);
+ return llvm::bit_cast<uint64_t>(4.0);
case 247:
- return DoubleToBits(-4.0);
+ return llvm::bit_cast<uint64_t>(-4.0);
case 248: // 1 / (2 * PI)
return 0x3fc45f306dc9c882;
default:
@@ -1482,23 +1247,21 @@ static int64_t getInlineImmVal16(unsigned Imm) {
}
}
-MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
+MCOperand AMDGPUDisassembler::decodeFPImmed(unsigned ImmWidth, unsigned Imm) {
assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
&& Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
// ToDo: case 248: 1/(2*PI) - is allowed only on VI
- switch (Width) {
- case OPW32:
- case OPW128: // splat constants
- case OPW512:
- case OPW1024:
- case OPWV232:
+ // ImmWidth 0 is a default case where operand should not allow immediates.
+ // Imm value is still decoded into 32 bit immediate operand, inst printer will
+ // use it to print verbose error message.
+ switch (ImmWidth) {
+ case 0:
+ case 32:
return MCOperand::createImm(getInlineImmVal32(Imm));
- case OPW64:
- case OPW256:
+ case 64:
return MCOperand::createImm(getInlineImmVal64(Imm));
- case OPW16:
- case OPWV216:
+ case 16:
return MCOperand::createImm(getInlineImmVal16(Imm));
default:
llvm_unreachable("implement me");
@@ -1612,7 +1375,8 @@ int AMDGPUDisassembler::getTTmpIdx(unsigned Val) const {
}
MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
- bool MandatoryLiteral) const {
+ bool MandatoryLiteral,
+ unsigned ImmWidth) const {
using namespace AMDGPU::EncValues;
assert(Val < 1024); // enum10
@@ -1639,7 +1403,7 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
return decodeIntImmed(Val);
if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
- return decodeFPImmed(Width, Val);
+ return decodeFPImmed(ImmWidth, Val);
if (Val == LITERAL_CONST) {
if (MandatoryLiteral)
@@ -1662,26 +1426,6 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val,
}
}
-MCOperand AMDGPUDisassembler::decodeDstOp(const OpWidthTy Width, unsigned Val) const {
- using namespace AMDGPU::EncValues;
-
- assert(Val < 128);
- assert(Width == OPW256 || Width == OPW512);
-
- if (Val <= SGPR_MAX) {
- // "SGPR_MIN <= Val" is always true and causes compilation warning.
- static_assert(SGPR_MIN == 0);
- return createSRegOperand(getSgprClassId(Width), Val - SGPR_MIN);
- }
-
- int TTmpIdx = getTTmpIdx(Val);
- if (TTmpIdx >= 0) {
- return createSRegOperand(getTtmpClassId(Width), TTmpIdx);
- }
-
- llvm_unreachable("unknown dst register");
-}
-
// Bit 0 of DstY isn't stored in the instruction, because it's always the
// opposite of bit 0 of DstX.
MCOperand AMDGPUDisassembler::decodeVOPDDstYOp(MCInst &Inst,
@@ -1764,12 +1508,13 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
}
MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
- const unsigned Val) const {
+ const unsigned Val,
+ unsigned ImmWidth) const {
using namespace AMDGPU::SDWA;
using namespace AMDGPU::EncValues;
- if (STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
- STI.getFeatureBits()[AMDGPU::FeatureGFX10]) {
+ if (STI.hasFeature(AMDGPU::FeatureGFX9) ||
+ STI.hasFeature(AMDGPU::FeatureGFX10)) {
// XXX: cast to int is needed to avoid stupid warning:
// compare with unsigned is always true
if (int(SDWA9EncValues::SRC_VGPR_MIN) <= int(Val) &&
@@ -1795,31 +1540,31 @@ MCOperand AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width,
return decodeIntImmed(SVal);
if (INLINE_FLOATING_C_MIN <= SVal && SVal <= INLINE_FLOATING_C_MAX)
- return decodeFPImmed(Width, SVal);
+ return decodeFPImmed(ImmWidth, SVal);
return decodeSpecialReg32(SVal);
- } else if (STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]) {
+ } else if (STI.hasFeature(AMDGPU::FeatureVolcanicIslands)) {
return createRegOperand(getVgprClassId(Width), Val);
}
llvm_unreachable("unsupported target");
}
MCOperand AMDGPUDisassembler::decodeSDWASrc16(unsigned Val) const {
- return decodeSDWASrc(OPW16, Val);
+ return decodeSDWASrc(OPW16, Val, 16);
}
MCOperand AMDGPUDisassembler::decodeSDWASrc32(unsigned Val) const {
- return decodeSDWASrc(OPW32, Val);
+ return decodeSDWASrc(OPW32, Val, 32);
}
MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
using namespace AMDGPU::SDWA;
- assert((STI.getFeatureBits()[AMDGPU::FeatureGFX9] ||
- STI.getFeatureBits()[AMDGPU::FeatureGFX10]) &&
+ assert((STI.hasFeature(AMDGPU::FeatureGFX9) ||
+ STI.hasFeature(AMDGPU::FeatureGFX10)) &&
"SDWAVopcDst should be present only on GFX9+");
- bool IsWave64 = STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64];
+ bool IsWave64 = STI.hasFeature(AMDGPU::FeatureWavefrontSize64);
if (Val & SDWA9EncValues::VOPC_DST_VCC_MASK) {
Val &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
@@ -1840,18 +1585,19 @@ MCOperand AMDGPUDisassembler::decodeSDWAVopcDst(unsigned Val) const {
}
MCOperand AMDGPUDisassembler::decodeBoolReg(unsigned Val) const {
- return STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64] ?
- decodeOperand_SReg_64(Val) : decodeOperand_SReg_32(Val);
+ return STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
+ ? decodeSrcOp(OPW64, Val)
+ : decodeSrcOp(OPW32, Val);
}
bool AMDGPUDisassembler::isVI() const {
- return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
+ return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
}
bool AMDGPUDisassembler::isGFX9() const { return AMDGPU::isGFX9(STI); }
bool AMDGPUDisassembler::isGFX90A() const {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts];
+ return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
}
bool AMDGPUDisassembler::isGFX9Plus() const { return AMDGPU::isGFX9Plus(STI); }
@@ -1863,7 +1609,7 @@ bool AMDGPUDisassembler::isGFX10Plus() const {
}
bool AMDGPUDisassembler::isGFX11() const {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX11];
+ return STI.hasFeature(AMDGPU::FeatureGFX11);
}
bool AMDGPUDisassembler::isGFX11Plus() const {
@@ -1872,16 +1618,21 @@ bool AMDGPUDisassembler::isGFX11Plus() const {
bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
- return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
+ return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
}
//===----------------------------------------------------------------------===//
// AMDGPU specific symbol handling
//===----------------------------------------------------------------------===//
+#define GET_FIELD(MASK) (AMDHSA_BITS_GET(FourByteBuffer, MASK))
#define PRINT_DIRECTIVE(DIRECTIVE, MASK) \
do { \
- KdStream << Indent << DIRECTIVE " " \
- << ((FourByteBuffer & MASK) >> (MASK##_SHIFT)) << '\n'; \
+ KdStream << Indent << DIRECTIVE " " << GET_FIELD(MASK) << '\n'; \
+ } while (0)
+#define PRINT_PSEUDO_DIRECTIVE_COMMENT(DIRECTIVE, MASK) \
+ do { \
+ KdStream << Indent << MAI.getCommentString() << ' ' << DIRECTIVE " " \
+ << GET_FIELD(MASK) << '\n'; \
} while (0)
// NOLINTNEXTLINE(readability-identifier-naming)
@@ -1896,11 +1647,11 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
// simply calculate the inverse of what the assembler does.
uint32_t GranulatedWorkitemVGPRCount =
- (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT) >>
- COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_SHIFT;
+ GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT);
- uint32_t NextFreeVGPR = (GranulatedWorkitemVGPRCount + 1) *
- AMDGPU::IsaInfo::getVGPREncodingGranule(&STI);
+ uint32_t NextFreeVGPR =
+ (GranulatedWorkitemVGPRCount + 1) *
+ AMDGPU::IsaInfo::getVGPREncodingGranule(&STI, EnableWavefrontSize32);
KdStream << Indent << ".amdhsa_next_free_vgpr " << NextFreeVGPR << '\n';
@@ -1924,8 +1675,7 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
// The disassembler cannot recover the original values of those 3 directives.
uint32_t GranulatedWavefrontSGPRCount =
- (FourByteBuffer & COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT) >>
- COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_SHIFT;
+ GET_FIELD(COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT);
if (isGFX10Plus() && GranulatedWavefrontSGPRCount)
return MCDisassembler::Fail;
@@ -2035,7 +1785,46 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC2(
return MCDisassembler::Success;
}
+// NOLINTNEXTLINE(readability-identifier-naming)
+MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
+ uint32_t FourByteBuffer, raw_string_ostream &KdStream) const {
+ using namespace amdhsa;
+ StringRef Indent = "\t";
+ if (isGFX90A()) {
+ KdStream << Indent << ".amdhsa_accum_offset "
+ << (GET_FIELD(COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET) + 1) * 4
+ << '\n';
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED0)
+ return MCDisassembler::Fail;
+ PRINT_DIRECTIVE(".amdhsa_tg_split", COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT);
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX90A_RESERVED1)
+ return MCDisassembler::Fail;
+ } else if (isGFX10Plus()) {
+ if (!EnableWavefrontSize32 || !*EnableWavefrontSize32) {
+ PRINT_DIRECTIVE(".amdhsa_shared_vgpr_count",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
+ } else {
+ PRINT_PSEUDO_DIRECTIVE_COMMENT(
+ "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
+ }
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);
+ if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)
+ return MCDisassembler::Fail;
+ PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
+ COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
+ } else if (FourByteBuffer) {
+ return MCDisassembler::Fail;
+ }
+ return MCDisassembler::Success;
+}
+#undef PRINT_PSEUDO_DIRECTIVE_COMMENT
#undef PRINT_DIRECTIVE
+#undef GET_FIELD
MCDisassembler::DecodeStatus
AMDGPUDisassembler::decodeKernelDescriptorDirective(
@@ -2103,30 +1892,16 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
return MCDisassembler::Success;
case amdhsa::COMPUTE_PGM_RSRC3_OFFSET:
- // COMPUTE_PGM_RSRC3
- // - Only set for GFX10, GFX6-9 have this to be 0.
- // - Currently no directives directly control this.
FourByteBuffer = DE.getU32(Cursor);
- if (!isGFX10Plus() && FourByteBuffer) {
- return MCDisassembler::Fail;
- }
- return MCDisassembler::Success;
+ return decodeCOMPUTE_PGM_RSRC3(FourByteBuffer, KdStream);
case amdhsa::COMPUTE_PGM_RSRC1_OFFSET:
FourByteBuffer = DE.getU32(Cursor);
- if (decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream) ==
- MCDisassembler::Fail) {
- return MCDisassembler::Fail;
- }
- return MCDisassembler::Success;
+ return decodeCOMPUTE_PGM_RSRC1(FourByteBuffer, KdStream);
case amdhsa::COMPUTE_PGM_RSRC2_OFFSET:
FourByteBuffer = DE.getU32(Cursor);
- if (decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream) ==
- MCDisassembler::Fail) {
- return MCDisassembler::Fail;
- }
- return MCDisassembler::Success;
+ return decodeCOMPUTE_PGM_RSRC2(FourByteBuffer, KdStream);
case amdhsa::KERNEL_CODE_PROPERTIES_OFFSET:
using namespace amdhsa;
@@ -2161,7 +1936,7 @@ AMDGPUDisassembler::decodeKernelDescriptorDirective(
KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
}
- if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5)
+ if (AMDGPU::getAmdhsaCodeObjectVersion() >= AMDGPU::AMDHSA_COV5)
PRINT_DIRECTIVE(".amdhsa_uses_dynamic_stack",
KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
@@ -2192,6 +1967,20 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeKernelDescriptor(
if (Bytes.size() != 64 || KdAddress % 64 != 0)
return MCDisassembler::Fail;
+ // FIXME: We can't actually decode "in order" as is done below, as e.g. GFX10
+ // requires us to know the setting of .amdhsa_wavefront_size32 in order to
+ // accurately produce .amdhsa_next_free_vgpr, and they appear in the wrong
+ // order. Workaround this by first looking up .amdhsa_wavefront_size32 here
+ // when required.
+ if (isGFX10Plus()) {
+ uint16_t KernelCodeProperties =
+ support::endian::read16(&Bytes[amdhsa::KERNEL_CODE_PROPERTIES_OFFSET],
+ support::endianness::little);
+ EnableWavefrontSize32 =
+ AMDHSA_BITS_GET(KernelCodeProperties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
+ }
+
std::string Kd;
raw_string_ostream KdStream(Kd);
KdStream << ".amdhsa_kernel " << KdName << '\n';
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 870f7b17df20..444312473a5f 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -16,14 +16,16 @@
#define LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
-#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/Support/DataExtractor.h"
#include <memory>
namespace llvm {
+class MCAsmInfo;
class MCInst;
class MCOperand;
class MCSubtargetInfo;
@@ -91,10 +93,12 @@ class AMDGPUDisassembler : public MCDisassembler {
private:
std::unique_ptr<MCInstrInfo const> const MCII;
const MCRegisterInfo &MRI;
+ const MCAsmInfo &MAI;
const unsigned TargetMaxInstBytes;
mutable ArrayRef<uint8_t> Bytes;
mutable uint32_t Literal;
mutable bool HasLiteral;
+ mutable std::optional<bool> EnableWavefrontSize32;
public:
AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
@@ -115,14 +119,25 @@ public:
template <typename InsnType>
DecodeStatus tryDecodeInst(const uint8_t *Table, MCInst &MI, InsnType Inst,
- uint64_t Address) const {
+ uint64_t Address, raw_ostream &Comments) const {
assert(MI.getOpcode() == 0);
assert(MI.getNumOperands() == 0);
MCInst TmpInst;
HasLiteral = false;
const auto SavedBytes = Bytes;
- if (decodeInstruction(Table, TmpInst, Inst, Address, this, STI)) {
+
+ SmallString<64> LocalComments;
+ raw_svector_ostream LocalCommentStream(LocalComments);
+ CommentStream = &LocalCommentStream;
+
+ DecodeStatus Res =
+ decodeInstruction(Table, TmpInst, Inst, Address, this, STI);
+
+ CommentStream = nullptr;
+
+ if (Res != Fail) {
MI = TmpInst;
+ Comments << LocalComments;
return MCDisassembler::Success;
}
Bytes = SavedBytes;
@@ -155,6 +170,13 @@ public:
DecodeStatus decodeCOMPUTE_PGM_RSRC2(uint32_t FourByteBuffer,
raw_string_ostream &KdStream) const;
+ /// Decode as directives that handle COMPUTE_PGM_RSRC3.
+ /// \param FourByteBuffer - Bytes holding contents of COMPUTE_PGM_RSRC3.
+ /// \param KdStream - Stream to write the disassembled directives to.
+ // NOLINTNEXTLINE(readability-identifier-naming)
+ DecodeStatus decodeCOMPUTE_PGM_RSRC3(uint32_t FourByteBuffer,
+ raw_string_ostream &KdStream) const;
+
DecodeStatus convertEXPInst(MCInst &MI) const;
DecodeStatus convertVINTERPInst(MCInst &MI) const;
DecodeStatus convertFMAanyK(MCInst &MI, int ImmLitIdx) const;
@@ -166,58 +188,6 @@ public:
DecodeStatus convertVOPCDPPInst(MCInst &MI) const;
void convertMacDPPInst(MCInst &MI) const;
- MCOperand decodeOperand_VGPR_32(unsigned Val) const;
- MCOperand decodeOperand_VGPR_32_Lo128(unsigned Val) const;
- MCOperand decodeOperand_VRegOrLds_32(unsigned Val) const;
-
- MCOperand decodeOperand_VS_32(unsigned Val) const;
- MCOperand decodeOperand_VS_64(unsigned Val) const;
- MCOperand decodeOperand_VS_128(unsigned Val) const;
- MCOperand decodeOperand_VSrc16(unsigned Val) const;
- MCOperand decodeOperand_VSrcV216(unsigned Val) const;
- MCOperand decodeOperand_VSrcV232(unsigned Val) const;
-
- MCOperand decodeOperand_VReg_64(unsigned Val) const;
- MCOperand decodeOperand_VReg_96(unsigned Val) const;
- MCOperand decodeOperand_VReg_128(unsigned Val) const;
- MCOperand decodeOperand_VReg_256(unsigned Val) const;
- MCOperand decodeOperand_VReg_288(unsigned Val) const;
- MCOperand decodeOperand_VReg_320(unsigned Val) const;
- MCOperand decodeOperand_VReg_352(unsigned Val) const;
- MCOperand decodeOperand_VReg_384(unsigned Val) const;
- MCOperand decodeOperand_VReg_512(unsigned Val) const;
- MCOperand decodeOperand_VReg_1024(unsigned Val) const;
-
- MCOperand decodeOperand_SReg_32(unsigned Val) const;
- MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const;
- MCOperand decodeOperand_SReg_32_XEXEC_HI(unsigned Val) const;
- MCOperand decodeOperand_SRegOrLds_32(unsigned Val) const;
- MCOperand decodeOperand_SReg_64(unsigned Val) const;
- MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const;
- MCOperand decodeOperand_SReg_128(unsigned Val) const;
- MCOperand decodeOperand_SReg_256(unsigned Val) const;
- MCOperand decodeOperand_SReg_288(unsigned Val) const;
- MCOperand decodeOperand_SReg_320(unsigned Val) const;
- MCOperand decodeOperand_SReg_352(unsigned Val) const;
- MCOperand decodeOperand_SReg_384(unsigned Val) const;
- MCOperand decodeOperand_SReg_512(unsigned Val) const;
-
- MCOperand decodeOperand_AGPR_32(unsigned Val) const;
- MCOperand decodeOperand_AReg_64(unsigned Val) const;
- MCOperand decodeOperand_AReg_128(unsigned Val) const;
- MCOperand decodeOperand_AReg_256(unsigned Val) const;
- MCOperand decodeOperand_AReg_288(unsigned Val) const;
- MCOperand decodeOperand_AReg_320(unsigned Val) const;
- MCOperand decodeOperand_AReg_352(unsigned Val) const;
- MCOperand decodeOperand_AReg_384(unsigned Val) const;
- MCOperand decodeOperand_AReg_512(unsigned Val) const;
- MCOperand decodeOperand_AReg_1024(unsigned Val) const;
- MCOperand decodeOperand_AV_32(unsigned Val) const;
- MCOperand decodeOperand_AV_64(unsigned Val) const;
- MCOperand decodeOperand_AV_128(unsigned Val) const;
- MCOperand decodeOperand_AVDst_128(unsigned Val) const;
- MCOperand decodeOperand_AVDst_512(unsigned Val) const;
-
enum OpWidthTy {
OPW32,
OPW64,
@@ -244,18 +214,21 @@ public:
unsigned getTtmpClassId(const OpWidthTy Width) const;
static MCOperand decodeIntImmed(unsigned Imm);
- static MCOperand decodeFPImmed(OpWidthTy Width, unsigned Imm);
+ static MCOperand decodeFPImmed(unsigned ImmWidth, unsigned Imm);
+
MCOperand decodeMandatoryLiteralConstant(unsigned Imm) const;
MCOperand decodeLiteralConstant() const;
MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val,
- bool MandatoryLiteral = false) const;
- MCOperand decodeDstOp(const OpWidthTy Width, unsigned Val) const;
+ bool MandatoryLiteral = false,
+ unsigned ImmWidth = 0) const;
+
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const;
MCOperand decodeSpecialReg32(unsigned Val) const;
MCOperand decodeSpecialReg64(unsigned Val) const;
- MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val) const;
+ MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val,
+ unsigned ImmWidth = 0) const;
MCOperand decodeSDWASrc16(unsigned Val) const;
MCOperand decodeSDWASrc32(unsigned Val) const;
MCOperand decodeSDWAVopcDst(unsigned Val) const;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 09f59af06589..5c86d80e7dd2 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -466,7 +466,7 @@ class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins,
let PseudoInstr = NAME # "_RTN";
}
-multiclass FLAT_Atomic_Pseudo<
+multiclass FLAT_Atomic_Pseudo_NO_RTN<
string opName,
RegisterClass vdst_rc,
ValueType vt,
@@ -484,7 +484,16 @@ multiclass FLAT_Atomic_Pseudo<
let FPAtomic = isFP;
let AddedComplexity = -1; // Prefer global atomics if available
}
+}
+multiclass FLAT_Atomic_Pseudo_RTN<
+ string opName,
+ RegisterClass vdst_rc,
+ ValueType vt,
+ ValueType data_vt = vt,
+ RegisterClass data_rc = vdst_rc,
+ bit isFP = isFloatType<data_vt>.ret,
+ RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
def _RTN : FLAT_AtomicRet_Pseudo <opName,
(outs getLdStRegisterOperand<vdst_rc>.ret:$vdst),
(ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol),
@@ -496,6 +505,18 @@ multiclass FLAT_Atomic_Pseudo<
}
}
+multiclass FLAT_Atomic_Pseudo<
+ string opName,
+ RegisterClass vdst_rc,
+ ValueType vt,
+ ValueType data_vt = vt,
+ RegisterClass data_rc = vdst_rc,
+ bit isFP = isFloatType<data_vt>.ret,
+ RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> {
+ defm "" : FLAT_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc, isFP, data_op>;
+ defm "" : FLAT_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc, isFP, data_op>;
+}
+
multiclass FLAT_Global_Atomic_Pseudo_NO_RTN<
string opName,
RegisterClass vdst_rc,
@@ -709,11 +730,14 @@ let SubtargetPredicate = isGFX90APlus in {
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>;
} // End SubtargetPredicate = isGFX90APlus
-let SubtargetPredicate = isGFX940Plus in {
+let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in {
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>;
- defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>;
- defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>;
-} // End SubtargetPredicate = isGFX940Plus
+ let FPAtomic = 1 in
+ defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2i16>;
+} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts
+
+let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst, FPAtomic = 1 in
+ defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2i16>;
// GFX7-, GFX10-, GFX11-only flat instructions.
let SubtargetPredicate = isGFX7GFX10GFX11 in {
@@ -917,7 +941,7 @@ let OtherPredicates = [HasAtomicFaddNoRtnInsts] in
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN <
"global_atomic_add_f32", VGPR_32, f32
>;
-let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN <
"global_atomic_pk_add_f16", VGPR_32, v2f16
>;
@@ -925,7 +949,7 @@ let OtherPredicates = [HasAtomicFaddRtnInsts] in
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN <
"global_atomic_add_f32", VGPR_32, f32
>;
-let OtherPredicates = [isGFX90APlus] in
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN <
"global_atomic_pk_add_f16", VGPR_32, v2f16
>;
@@ -937,73 +961,73 @@ let OtherPredicates = [isGFX90APlus] in
// Patterns for global loads with no offset.
class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (FlatOffset i64:$vaddr, i16:$offset))),
+ (vt (node (FlatOffset i64:$vaddr, i32:$offset))),
(inst $vaddr, $offset)
>;
class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (node (FlatOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
+ (node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
(inst $vaddr, $offset, 0, $in)
>;
class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (node (GlobalOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in),
+ (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in),
(inst $vaddr, $offset, 0, $in)
>;
class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$in)),
+ (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)),
(inst $saddr, $voffset, $offset, 0, $in)
>;
class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i16:$offset))),
+ (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))),
(inst $vaddr, $offset)
>;
class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset))),
+ (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))),
(inst $saddr, $voffset, $offset, 0)
>;
class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> : GCNPat <
- (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset)),
+ (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset)),
(inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
>;
class GlobalAtomicStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> : GCNPat <
- (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data),
+ (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data),
(inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
>;
class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt, ValueType data_vt = vt> : GCNPat <
- (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), data_vt:$data)),
+ (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), data_vt:$data)),
(inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset)
>;
class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> : GCNPat <
- (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data),
+ (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data),
(inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
>;
class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (node vt:$data, (FlatOffset i64:$vaddr, i16:$offset)),
+ (node vt:$data, (FlatOffset i64:$vaddr, i32:$offset)),
(inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;
class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (node vt:$data, (GlobalOffset i64:$vaddr, i16:$offset)),
+ (node vt:$data, (GlobalOffset i64:$vaddr, i32:$offset)),
(inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;
class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
// atomic store follows atomic binop convention so the address comes
// first.
- (node (FlatOffset i64:$vaddr, i16:$offset), vt:$data),
+ (node (FlatOffset i64:$vaddr, i32:$offset), vt:$data),
(inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset)
>;
@@ -1011,7 +1035,7 @@ class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt, ValueType data_vt = vt> : GCNPat <
// atomic store follows atomic binop convention so the address comes
// first.
- (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data),
+ (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data),
(inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
>;
@@ -1020,17 +1044,17 @@ multiclass FlatAtomicPat <string inst, string node, ValueType vt,
defvar rtnNode = !cast<PatFrags>(node#"_"#vt.Size);
defvar noRtnNode = !cast<PatFrags>(node#"_noret_"#vt.Size);
- def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)),
+ def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
(!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
let AddedComplexity = 1 in
- def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)),
+ def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)),
(!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>;
}
class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt, ValueType data_vt = vt> : GCNPat <
- (vt (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)),
+ (vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)),
(inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
>;
@@ -1063,49 +1087,49 @@ multiclass FlatSignedAtomicPatWithAddrSpace<string inst, string intr, string add
}
class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset))),
+ (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))),
(inst $vaddr, $offset)
>;
class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in),
+ (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset), vt:$in),
(inst $vaddr, $offset, 0, $in)
>;
class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset)),
+ (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset)),
(inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset)
>;
class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset))),
+ (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))),
(inst $saddr, $offset)
>;
class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)),
+ (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset), vt:$in)),
(inst $saddr, $offset, 0, $in)
>;
class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> : GCNPat <
- (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset)),
+ (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset)),
(inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset)
>;
class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset))),
+ (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))),
(inst $vaddr, $saddr, $offset, 0)
>;
class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node,
ValueType vt> : GCNPat <
- (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset)),
+ (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset)),
(inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset)
>;
class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
- (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset), vt:$in)),
+ (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset), vt:$in)),
(inst $vaddr, $saddr, $offset, 0, $in)
>;
@@ -1160,8 +1184,8 @@ def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>;
foreach as = [ "flat", "global" ] in {
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>;
defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>;
-defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_inc_"#as, i32>;
-defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_dec_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>;
defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>;
defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>;
defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>;
@@ -1174,8 +1198,8 @@ defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>;
defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>;
defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>;
-defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_inc_"#as, i64>;
-defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_dec_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>;
+defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>;
defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>;
defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>;
defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>;
@@ -1429,8 +1453,8 @@ defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global,
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_inc_global", i32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_dec_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_load_uinc_wrap_global", i32>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_load_udec_wrap_global", i32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>;
@@ -1444,8 +1468,8 @@ defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_inc_global", i64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_dec_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_load_udec_wrap_global", i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>;
@@ -1459,12 +1483,23 @@ defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i
let OtherPredicates = [isGFX10Plus] in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>;
-defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>;
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>;
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>;
+defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>;
+defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>;
+defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>;
+defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>;
+}
+
+let OtherPredicates = [isGFX10Only] in {
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>;
+defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>;
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_atomic_fmin", f64>;
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>;
+defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN_X2", "atomic_load_fmin_flat", f64>;
+defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX_X2", "atomic_load_fmax_flat", f64>;
+defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN_X2", "int_amdgcn_flat_atomic_fmin", f64>;
+defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX_X2", "int_amdgcn_flat_atomic_fmax", f64>;
}
let OtherPredicates = [HasAtomicFaddNoRtnInsts] in {
@@ -1473,7 +1508,7 @@ defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amd
defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
}
-let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in {
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in {
defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
}
@@ -1484,14 +1519,17 @@ defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgc
defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>;
}
+let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in {
+defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
+defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
+}
+
let OtherPredicates = [isGFX90APlus] in {
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>;
defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>;
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>;
defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>;
-defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>;
-defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>;
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>;
defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>;
defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>;
@@ -1507,12 +1545,14 @@ defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>
defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f32>;
}
-let OtherPredicates = [isGFX940Plus] in {
+let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in {
defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", v2f16>;
defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>;
-defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>;
}
+let OtherPredicates = [HasAtomicGlobalPkAddBF16Inst] in
+defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>;
+
} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10
let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in {
@@ -2171,12 +2211,16 @@ class FLAT_Real_gfx11 <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic>
let Inst{55} = ps.sve;
}
-multiclass FLAT_Real_Base_gfx11<bits<7> op, string ps, string opName, int renamed = false> {
+multiclass FLAT_Aliases_gfx11<string ps, string opName, int renamed> {
+ if renamed then
+ def _renamed_gfx11 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX11Plus]>;
+}
+
+multiclass FLAT_Real_Base_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
+ FLAT_Aliases_gfx11<ps, opName, renamed> {
def _gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps), opName> {
let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding);
}
- if renamed then
- def _renamed_gfx11 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX11Plus]>;
}
multiclass FLAT_Real_RTN_gfx11<bits<7> op, string ps, string opName> {
@@ -2219,7 +2263,8 @@ multiclass FLAT_Real_GlblAtomics_gfx11<bits<7> op, string ps, string opName, int
FLAT_Real_RTN_gfx11<op, ps, opName>,
FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>;
-multiclass FLAT_Real_GlblAtomics_RTN_gfx11<bits<7> op, string ps, string opName> :
+multiclass FLAT_Real_GlblAtomics_RTN_gfx11<bits<7> op, string ps, string opName, int renamed = false> :
+ FLAT_Aliases_gfx11<ps#"_RTN", opName, renamed>,
FLAT_Real_RTN_gfx11<op, ps, opName>,
FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>;
@@ -2312,7 +2357,7 @@ defm GLOBAL_ATOMIC_SWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATO
defm GLOBAL_ATOMIC_CMPSWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>;
defm GLOBAL_ATOMIC_ADD_U32 : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>;
defm GLOBAL_ATOMIC_SUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>;
-defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_RTN_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32">;
+defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_RTN_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>;
defm GLOBAL_ATOMIC_MIN_I32 : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>;
defm GLOBAL_ATOMIC_MIN_U32 : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>;
defm GLOBAL_ATOMIC_MAX_I32 : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>;
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index f2452a275bdc..c9e0c6849568 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -42,6 +42,16 @@ namespace {
class GCNCreateVOPD : public MachineFunctionPass {
private:
+ class VOPDCombineInfo {
+ public:
+ VOPDCombineInfo() {}
+ VOPDCombineInfo(MachineInstr *First, MachineInstr *Second)
+ : FirstMI(First), SecondMI(Second) {}
+
+ MachineInstr *FirstMI;
+ MachineInstr *SecondMI;
+ };
+
public:
static char ID;
const GCNSubtarget *ST = nullptr;
@@ -57,10 +67,9 @@ public:
return "GCN Create VOPD Instructions";
}
- bool doReplace(const SIInstrInfo *SII,
- std::pair<MachineInstr *, MachineInstr *> &Pair) {
- auto *FirstMI = Pair.first;
- auto *SecondMI = Pair.second;
+ bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
+ auto *FirstMI = CI.FirstMI;
+ auto *SecondMI = CI.SecondMI;
unsigned Opc1 = FirstMI->getOpcode();
unsigned Opc2 = SecondMI->getOpcode();
int NewOpcode = AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
@@ -94,7 +103,7 @@ public:
VOPDInst.copyImplicitOps(*MI[CompIdx]);
LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
- << *Pair.first << "\tY: " << *Pair.second << "\n");
+ << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
for (auto CompIdx : VOPD::COMPONENTS)
MI[CompIdx]->eraseFromParent();
@@ -114,7 +123,7 @@ public:
const SIInstrInfo *SII = ST->getInstrInfo();
bool Changed = false;
- SmallVector<std::pair<MachineInstr *, MachineInstr *>> ReplaceCandidates;
+ SmallVector<VOPDCombineInfo> ReplaceCandidates;
for (auto &MBB : MF) {
auto MII = MBB.begin(), E = MBB.end();
@@ -130,24 +139,24 @@ public:
unsigned Opc2 = SecondMI->getOpcode();
llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
- std::pair<MachineInstr *, MachineInstr *> Pair;
+ VOPDCombineInfo CI;
if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y)
- Pair = {FirstMI, SecondMI};
+ CI = VOPDCombineInfo(FirstMI, SecondMI);
else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)
- Pair = {SecondMI, FirstMI};
+ CI = VOPDCombineInfo(SecondMI, FirstMI);
else
continue;
// checkVOPDRegConstraints cares about program order, but doReplace
// cares about X-Y order in the constituted VOPD
if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) {
- ReplaceCandidates.push_back(Pair);
+ ReplaceCandidates.push_back(CI);
++MII;
}
}
}
- for (auto &Pair : ReplaceCandidates) {
- Changed |= doReplace(SII, Pair);
+ for (auto &CI : ReplaceCandidates) {
+ Changed |= doReplace(SII, CI);
}
return Changed;
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index b33e614a071c..2d53b2a70dbe 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -16,7 +16,7 @@
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/TargetParser/TargetParser.h"
using namespace llvm;
@@ -588,23 +588,21 @@ int GCNHazardRecognizer::getWaitStatesSinceSetReg(IsHazardFn IsHazard,
static void addRegUnits(const SIRegisterInfo &TRI, BitVector &BV,
MCRegister Reg) {
- for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI)
- BV.set(*RUI);
+ for (MCRegUnit Unit : TRI.regunits(Reg))
+ BV.set(Unit);
}
static void addRegsToSet(const SIRegisterInfo &TRI,
iterator_range<MachineInstr::const_mop_iterator> Ops,
- BitVector &Set) {
+ BitVector &DefSet, BitVector &UseSet) {
for (const MachineOperand &Op : Ops) {
if (Op.isReg())
- addRegUnits(TRI, Set, Op.getReg().asMCReg());
+ addRegUnits(TRI, Op.isDef() ? DefSet : UseSet, Op.getReg().asMCReg());
}
}
void GCNHazardRecognizer::addClauseInst(const MachineInstr &MI) {
- // XXX: Do we need to worry about implicit operands
- addRegsToSet(TRI, MI.defs(), ClauseDefs);
- addRegsToSet(TRI, MI.uses(), ClauseUses);
+ addRegsToSet(TRI, MI.operands(), ClauseDefs, ClauseUses);
}
static bool breaksSMEMSoftClause(MachineInstr *MI) {
@@ -1033,11 +1031,11 @@ int GCNHazardRecognizer::checkInlineAsmHazards(MachineInstr *IA) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
int WaitStatesNeeded = 0;
- for (unsigned I = InlineAsm::MIOp_FirstOperand, E = IA->getNumOperands();
- I != E; ++I) {
- const MachineOperand &Op = IA->getOperand(I);
+ for (const MachineOperand &Op :
+ llvm::drop_begin(IA->operands(), InlineAsm::MIOp_FirstOperand)) {
if (Op.isReg() && Op.isDef()) {
- WaitStatesNeeded = std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
+ WaitStatesNeeded =
+ std::max(WaitStatesNeeded, checkVALUHazardsHelper(Op, MRI));
}
}
@@ -1172,7 +1170,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
(MI.getOpcode() == AMDGPU::S_WAITCNT &&
!MI.getOperand(0).getImm()) ||
(MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- MI.getOperand(0).getImm() == 0xffe3);
+ AMDGPU::DepCtr::decodeFieldVmVsrc(MI.getOperand(0).getImm()) == 0);
};
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
@@ -1182,7 +1180,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
const SIInstrInfo *TII = ST.getInstrInfo();
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(0xffe3);
+ .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
return true;
}
@@ -1295,7 +1293,7 @@ bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
return true;
}
if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- (MI.getOperand(0).getImm() & 0xfffe) == 0xfffe)
+ AMDGPU::DepCtr::decodeFieldSaSdst(MI.getOperand(0).getImm()) == 0)
return true;
return false;
};
@@ -1306,7 +1304,7 @@ bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(0xfffe);
+ .addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));
return true;
}
@@ -1454,7 +1452,7 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- I.getOperand(0).getImm() == 0xffe3);
+ AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0);
};
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
@@ -1463,7 +1461,7 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(0xffe3);
+ .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
return true;
}
@@ -1525,7 +1523,7 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- I.getOperand(0).getImm() == 0x0fff))
+ AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
return HazardExpired;
// Track registers writes
@@ -1687,10 +1685,10 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
return false;
// Hazard is observed - insert a wait on va_dst counter to ensure hazard is
- // avoided (mask 0x0fff achieves this).
+ // avoided.
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(0x0fff);
+ .addImm(AMDGPU::DepCtr::encodeFieldVaVdst(0));
return true;
}
@@ -2026,7 +2024,7 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
MaxWaitStates);
int NeedWaitStates = MFMAWritesAGPROverlappedSrcABWaitStates;
int SrcCIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
- int OpNo = MI->getOperandNo(&Op);
+ int OpNo = Op.getOperandNo();
if (OpNo == SrcCIdx) {
NeedWaitStates = MFMAWritesAGPROverlappedSrcCWaitStates;
} else if (Opc == AMDGPU::V_ACCVGPR_READ_B32_e64) {
@@ -2205,7 +2203,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
if (NumWaitStates == std::numeric_limits<int>::max())
continue;
- int OpNo = MI->getOperandNo(&Use);
+ int OpNo = Use.getOperandNo();
unsigned Opc1 = MI1->getOpcode();
int NeedWaitStates = 0;
if (OpNo == SrcCIdx) {
@@ -2781,7 +2779,7 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
auto IsExpiredFn = [&MRI, this](const MachineInstr &I, int) {
// s_waitcnt_depctr sa_sdst(0) mitigates hazard.
if (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- !(I.getOperand(0).getImm() & 0x1))
+ AMDGPU::DepCtr::decodeFieldSaSdst(I.getOperand(0).getImm()) == 0)
return true;
// VALU access to any SGPR or literal constant other than HazardReg
@@ -2831,7 +2829,7 @@ bool GCNHazardRecognizer::fixVALUMaskWriteHazard(MachineInstr *MI) {
// Add s_waitcnt_depctr sa_sdst(0) after SALU write.
BuildMI(*MI->getParent(), NextMI, MI->getDebugLoc(),
TII.get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(0xfffe);
+ .addImm(AMDGPU::DepCtr::encodeFieldSaSdst(0));
// SALU write may be s_getpc in a bundle.
if (MI->getOpcode() == AMDGPU::S_GETPC_B64) {
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index 77960ef62f3a..d89c9b1febde 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -367,9 +367,8 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
}
if (!MI->isDebugInstr()) {
// Reset read - undef flags and update them later.
- for (auto &Op : MI->operands())
- if (Op.isReg() && Op.isDef())
- Op.setIsUndef(false);
+ for (auto &Op : MI->all_defs())
+ Op.setIsUndef(false);
RegisterOperands RegOpers;
RegOpers.collect(*MI, *TRI, MRI, /*ShouldTrackLaneMasks*/true,
diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
index 366bc0a8ec0d..4c9ad9b5bcf7 100644
--- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp
@@ -237,7 +237,7 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const {
bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) {
ST = &MF.getSubtarget<GCNSubtarget>();
- if (ST->getGeneration() < GCNSubtarget::GFX10)
+ if (!ST->hasNSAEncoding())
return false;
MRI = &MF.getRegInfo();
diff --git a/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.cpp b/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.cpp
new file mode 100644
index 000000000000..b50af38683ed
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNPreRALongBranchReg.cpp
@@ -0,0 +1,139 @@
+//===-- GCNPreRALongBranchReg.cpp ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// \file
+// \brief Pass to estimate pre RA branch size and reserve a pair of SGPRs if
+// there is a long branch. Branch size at this point is difficult to track since
+// we have no idea what spills will be inserted later on. We just assume 8 bytes
+// per instruction to compute approximations without computing the actual
+// instruction size to see if we're in the neighborhood of the maximum branch
+// distrance threshold tuning of what is considered "long" is handled through
+// amdgpu-long-branch-factor cl argument which sets LongBranchFactor.
+//===----------------------------------------------------------------------===//
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "amdgpu-pre-ra-long-branch-reg"
+
+namespace {
+
+static cl::opt<double> LongBranchFactor(
+ "amdgpu-long-branch-factor", cl::init(1.0), cl::Hidden,
+ cl::desc("Factor to apply to what qualifies as a long branch "
+ "to reserve a pair of scalar registers. If this value "
+ "is 0 the long branch registers are never reserved. As this "
+ "value grows the greater chance the branch distance will fall "
+ "within the threshold and the registers will be marked to be "
+ "reserved. We lean towards always reserving a register for "
+ "long jumps"));
+
+class GCNPreRALongBranchReg : public MachineFunctionPass {
+
+ struct BasicBlockInfo {
+ // Offset - Distance from the beginning of the function to the beginning
+ // of this basic block.
+ uint64_t Offset = 0;
+ // Size - Size of the basic block in bytes
+ uint64_t Size = 0;
+ };
+ void generateBlockInfo(MachineFunction &MF,
+ SmallVectorImpl<BasicBlockInfo> &BlockInfo);
+
+public:
+ static char ID;
+ GCNPreRALongBranchReg() : MachineFunctionPass(ID) {
+ initializeGCNPreRALongBranchRegPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override {
+ return "AMDGPU Pre-RA Long Branch Reg";
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+} // End anonymous namespace.
+char GCNPreRALongBranchReg::ID = 0;
+
+INITIALIZE_PASS(GCNPreRALongBranchReg, DEBUG_TYPE,
+ "AMDGPU Pre-RA Long Branch Reg", false, false)
+
+char &llvm::GCNPreRALongBranchRegID = GCNPreRALongBranchReg::ID;
+void GCNPreRALongBranchReg::generateBlockInfo(
+ MachineFunction &MF, SmallVectorImpl<BasicBlockInfo> &BlockInfo) {
+
+ BlockInfo.resize(MF.getNumBlockIDs());
+
+ // Approximate the size of all basic blocks by just
+ // assuming 8 bytes per instruction
+ for (const MachineBasicBlock &MBB : MF) {
+ uint64_t NumInstr = 0;
+ // Loop through the basic block and add up all non-debug
+ // non-meta instructions
+ for (const MachineInstr &MI : MBB) {
+ // isMetaInstruction is a superset of isDebugIstr
+ if (MI.isMetaInstruction())
+ continue;
+ NumInstr += 1;
+ }
+ // Approximate size as just 8 bytes per instruction
+ BlockInfo[MBB.getNumber()].Size = 8 * NumInstr;
+ }
+ uint64_t PrevNum = (&MF)->begin()->getNumber();
+ for (auto &MBB :
+ make_range(std::next(MachineFunction::iterator((&MF)->begin())),
+ (&MF)->end())) {
+ uint64_t Num = MBB.getNumber();
+ // Compute the offset immediately following this block.
+ BlockInfo[Num].Offset = BlockInfo[PrevNum].Offset + BlockInfo[PrevNum].Size;
+ PrevNum = Num;
+ }
+}
+bool GCNPreRALongBranchReg::runOnMachineFunction(MachineFunction &MF) {
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = STM.getInstrInfo();
+ const SIRegisterInfo *TRI = STM.getRegisterInfo();
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ // For now, reserve highest available SGPR pair. After RA,
+ // shift down to a lower unused pair of SGPRs
+ // If all registers are used, then findUnusedRegister will return
+ // AMDGPU::NoRegister.
+ constexpr bool ReserveHighestRegister = true;
+ Register LongBranchReservedReg = TRI->findUnusedRegister(
+ MRI, &AMDGPU::SGPR_64RegClass, MF, ReserveHighestRegister);
+ if (!LongBranchReservedReg)
+ return false;
+
+ // Approximate code size and offsets of each basic block
+ SmallVector<BasicBlockInfo, 16> BlockInfo;
+ generateBlockInfo(MF, BlockInfo);
+
+ for (const MachineBasicBlock &MBB : MF) {
+ MachineBasicBlock::const_iterator Last = MBB.getLastNonDebugInstr();
+ if (Last == MBB.end() || !Last->isUnconditionalBranch())
+ continue;
+ MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last);
+ uint64_t BlockDistance = static_cast<uint64_t>(
+ LongBranchFactor * BlockInfo[DestBB->getNumber()].Offset);
+ // If the distance falls outside the threshold assume it is a long branch
+ // and we need to reserve the registers
+ if (!TII->isBranchOffsetInRange(Last->getOpcode(), BlockDistance)) {
+ MFI->setLongBranchReservedReg(LongBranchReservedReg);
+ return true;
+ }
+ }
+ return false;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index d86138154be6..b9c9358f88b9 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -196,6 +196,14 @@ def : ProcessorModel<"gfx940", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_0.Features
>;
+def : ProcessorModel<"gfx941", SIDPGFX940FullSpeedModel,
+ FeatureISAVersion9_4_1.Features
+>;
+
+def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
+ FeatureISAVersion9_4_2.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//
@@ -263,3 +271,11 @@ def : ProcessorModel<"gfx1102", GFX11SpeedModel,
def : ProcessorModel<"gfx1103", GFX11SpeedModel,
FeatureISAVersion11_0_3.Features
>;
+
+def : ProcessorModel<"gfx1150", GFX11SpeedModel,
+ FeatureISAVersion11_5_0.Features
+>;
+
+def : ProcessorModel<"gfx1151", GFX11SpeedModel,
+ FeatureISAVersion11_5_1.Features
+>;
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index f9bed9a76c6f..68cf97170369 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -286,8 +286,8 @@ void GCNUpwardRPTracker::recede(const MachineInstr &MI) {
// update max pressure
MaxPressure = max(AtMIPressure, MaxPressure);
- for (const auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual() || MO.isDead())
+ for (const auto &MO : MI.all_defs()) {
+ if (!MO.getReg().isVirtual() || MO.isDead())
continue;
auto Reg = MO.getReg();
@@ -336,23 +336,38 @@ bool GCNDownwardRPTracker::advanceBeforeNext() {
assert(SI.isValid());
// Remove dead registers or mask bits.
- for (auto &It : LiveRegs) {
- const LiveInterval &LI = LIS.getInterval(It.first);
+ SmallSet<Register, 8> SeenRegs;
+ for (auto &MO : LastTrackedMI->operands()) {
+ if (!MO.isReg() || !MO.getReg().isVirtual())
+ continue;
+ if (MO.isUse() && !MO.readsReg())
+ continue;
+ if (!SeenRegs.insert(MO.getReg()).second)
+ continue;
+ const LiveInterval &LI = LIS.getInterval(MO.getReg());
if (LI.hasSubRanges()) {
+ auto It = LiveRegs.end();
for (const auto &S : LI.subranges()) {
if (!S.liveAt(SI)) {
- auto PrevMask = It.second;
- It.second &= ~S.LaneMask;
- CurPressure.inc(It.first, PrevMask, It.second, *MRI);
+ if (It == LiveRegs.end()) {
+ It = LiveRegs.find(MO.getReg());
+ if (It == LiveRegs.end())
+ llvm_unreachable("register isn't live");
+ }
+ auto PrevMask = It->second;
+ It->second &= ~S.LaneMask;
+ CurPressure.inc(MO.getReg(), PrevMask, It->second, *MRI);
}
}
+ if (It != LiveRegs.end() && It->second.none())
+ LiveRegs.erase(It);
} else if (!LI.liveAt(SI)) {
- auto PrevMask = It.second;
- It.second = LaneBitmask::getNone();
- CurPressure.inc(It.first, PrevMask, It.second, *MRI);
+ auto It = LiveRegs.find(MO.getReg());
+ if (It == LiveRegs.end())
+ llvm_unreachable("register isn't live");
+ CurPressure.inc(MO.getReg(), It->second, LaneBitmask::getNone(), *MRI);
+ LiveRegs.erase(It);
}
- if (It.second.none())
- LiveRegs.erase(It.first);
}
MaxPressure = max(MaxPressure, CurPressure);
@@ -367,9 +382,7 @@ void GCNDownwardRPTracker::advanceToNext() {
NextMI = skipDebugInstructionsForward(NextMI, MBBEnd);
// Add new registers or mask bits.
- for (const auto &MO : LastTrackedMI->operands()) {
- if (!MO.isReg() || !MO.isDef())
- continue;
+ for (const auto &MO : LastTrackedMI->all_defs()) {
Register Reg = MO.getReg();
if (!Reg.isVirtual())
continue;
diff --git a/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
new file mode 100644
index 000000000000..99db7e4af9fd
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/GCNRewritePartialRegUses.cpp
@@ -0,0 +1,502 @@
+//===-------------- GCNRewritePartialRegUses.cpp --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// RenameIndependentSubregs pass leaves large partially used super registers,
+/// for example:
+/// undef %0.sub4:VReg_1024 = ...
+/// %0.sub5:VReg_1024 = ...
+/// %0.sub6:VReg_1024 = ...
+/// %0.sub7:VReg_1024 = ...
+/// use %0.sub4_sub5_sub6_sub7
+/// use %0.sub6_sub7
+///
+/// GCNRewritePartialRegUses goes right after RenameIndependentSubregs and
+/// rewrites such partially used super registers with registers of minimal size:
+/// undef %0.sub0:VReg_128 = ...
+/// %0.sub1:VReg_128 = ...
+/// %0.sub2:VReg_128 = ...
+/// %0.sub3:VReg_128 = ...
+/// use %0.sub0_sub1_sub2_sub3
+/// use %0.sub2_sub3
+///
+/// This allows to avoid subreg lanemasks tracking during register pressure
+/// calculation and creates more possibilities for the code unaware of lanemasks
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "rewrite-partial-reg-uses"
+
+namespace {
+
+class GCNRewritePartialRegUses : public MachineFunctionPass {
+public:
+ static char ID;
+ GCNRewritePartialRegUses() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "Rewrite Partial Register Uses";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addPreserved<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+ MachineRegisterInfo *MRI;
+ const SIRegisterInfo *TRI;
+ const TargetInstrInfo *TII;
+ LiveIntervals *LIS;
+
+ /// Rewrite partially used register Reg by shifting all its subregisters to
+ /// the right and replacing the original register with a register of minimal
+ /// size. Return true if the change has been made.
+ bool rewriteReg(Register Reg) const;
+
+ /// Value type for SubRegMap below.
+ struct SubRegInfo {
+ /// Register class required to hold the value stored in the SubReg.
+ const TargetRegisterClass *RC;
+
+ /// Index for the right-shifted subregister. If 0 this is the "covering"
+ /// subreg i.e. subreg that covers all others. Covering subreg becomes the
+ /// whole register after the replacement.
+ unsigned SubReg = AMDGPU::NoSubRegister;
+ SubRegInfo(const TargetRegisterClass *RC_ = nullptr) : RC(RC_) {}
+ };
+
+ /// Map OldSubReg -> { RC, NewSubReg }. Used as in/out container.
+ typedef SmallDenseMap<unsigned, SubRegInfo> SubRegMap;
+
+ /// Given register class RC and the set of used subregs as keys in the SubRegs
+ /// map return new register class and indexes of right-shifted subregs as
+ /// values in SubRegs map such that the resulting regclass would contain
+ /// registers of minimal size.
+ const TargetRegisterClass *getMinSizeReg(const TargetRegisterClass *RC,
+ SubRegMap &SubRegs) const;
+
+ /// Given regclass RC and pairs of [OldSubReg, SubRegRC] in SubRegs try to
+ /// find new regclass such that:
+ /// 1. It has subregs obtained by shifting each OldSubReg by RShift number
+ /// of bits to the right. Every "shifted" subreg should have the same
+ /// SubRegRC. SubRegRC can be null, in this case it initialized using
+ /// getSubRegisterClass. If CoverSubregIdx is not zero it's a subreg that
+ /// "covers" all other subregs in pairs. Basically such subreg becomes a
+ /// whole register.
+ /// 2. Resulting register class contains registers of minimal size but not
+ /// less than RegNumBits.
+ ///
+ /// SubRegs is map of OldSubReg -> [SubRegRC, NewSubReg] and is used as in/out
+ /// parameter:
+ /// OldSubReg - input parameter,
+ /// SubRegRC - in/out, should be changed for unknown regclass,
+ /// NewSubReg - output, contains shifted subregs on return.
+ const TargetRegisterClass *
+ getRegClassWithShiftedSubregs(const TargetRegisterClass *RC, unsigned RShift,
+ unsigned RegNumBits, unsigned CoverSubregIdx,
+ SubRegMap &SubRegs) const;
+
+ /// Update live intervals after rewriting OldReg to NewReg with SubRegs map
+ /// describing OldSubReg -> NewSubReg mapping.
+ void updateLiveIntervals(Register OldReg, Register NewReg,
+ SubRegMap &SubRegs) const;
+
+ /// Helper methods.
+
+ /// Return reg class expected by a MO's parent instruction for a given MO.
+ const TargetRegisterClass *getOperandRegClass(MachineOperand &MO) const;
+
+ /// Find right-shifted by RShift amount version of the SubReg if it exists,
+ /// return 0 otherwise.
+ unsigned shiftSubReg(unsigned SubReg, unsigned RShift) const;
+
+ /// Find subreg index with a given Offset and Size, return 0 if there is no
+ /// such subregister index. The result is cached in SubRegs data-member.
+ unsigned getSubReg(unsigned Offset, unsigned Size) const;
+
+ /// Cache for getSubReg method: {Offset, Size} -> SubReg index.
+ mutable SmallDenseMap<std::pair<unsigned, unsigned>, unsigned> SubRegs;
+
+ /// Return bit mask that contains all register classes that are projected into
+ /// RC by SubRegIdx. The result is cached in SuperRegMasks data-member.
+ const uint32_t *getSuperRegClassMask(const TargetRegisterClass *RC,
+ unsigned SubRegIdx) const;
+
+ /// Cache for getSuperRegClassMask method: { RC, SubRegIdx } -> Class bitmask.
+ mutable SmallDenseMap<std::pair<const TargetRegisterClass *, unsigned>,
+ const uint32_t *>
+ SuperRegMasks;
+
+ /// Return bitmask containing all allocatable register classes with registers
+ /// aligned at AlignNumBits. The result is cached in
+ /// AllocatableAndAlignedRegClassMasks data-member.
+ const BitVector &
+ getAllocatableAndAlignedRegClassMask(unsigned AlignNumBits) const;
+
+ /// Cache for getAllocatableAndAlignedRegClassMask method:
+ /// AlignNumBits -> Class bitmask.
+ mutable SmallDenseMap<unsigned, BitVector> AllocatableAndAlignedRegClassMasks;
+};
+
+} // end anonymous namespace
+
+// TODO: move this to the tablegen and use binary search by Offset.
+unsigned GCNRewritePartialRegUses::getSubReg(unsigned Offset,
+ unsigned Size) const {
+ const auto [I, Inserted] = SubRegs.try_emplace({Offset, Size}, 0);
+ if (Inserted) {
+ for (unsigned Idx = 1, E = TRI->getNumSubRegIndices(); Idx < E; ++Idx) {
+ if (TRI->getSubRegIdxOffset(Idx) == Offset &&
+ TRI->getSubRegIdxSize(Idx) == Size) {
+ I->second = Idx;
+ break;
+ }
+ }
+ }
+ return I->second;
+}
+
+unsigned GCNRewritePartialRegUses::shiftSubReg(unsigned SubReg,
+ unsigned RShift) const {
+ unsigned Offset = TRI->getSubRegIdxOffset(SubReg) - RShift;
+ return getSubReg(Offset, TRI->getSubRegIdxSize(SubReg));
+}
+
+const uint32_t *
+GCNRewritePartialRegUses::getSuperRegClassMask(const TargetRegisterClass *RC,
+ unsigned SubRegIdx) const {
+ const auto [I, Inserted] =
+ SuperRegMasks.try_emplace({RC, SubRegIdx}, nullptr);
+ if (Inserted) {
+ for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) {
+ if (RCI.getSubReg() == SubRegIdx) {
+ I->second = RCI.getMask();
+ break;
+ }
+ }
+ }
+ return I->second;
+}
+
+const BitVector &GCNRewritePartialRegUses::getAllocatableAndAlignedRegClassMask(
+ unsigned AlignNumBits) const {
+ const auto [I, Inserted] =
+ AllocatableAndAlignedRegClassMasks.try_emplace(AlignNumBits);
+ if (Inserted) {
+ BitVector &BV = I->second;
+ BV.resize(TRI->getNumRegClasses());
+ for (unsigned ClassID = 0; ClassID < TRI->getNumRegClasses(); ++ClassID) {
+ auto *RC = TRI->getRegClass(ClassID);
+ if (RC->isAllocatable() && TRI->isRegClassAligned(RC, AlignNumBits))
+ BV.set(ClassID);
+ }
+ }
+ return I->second;
+}
+
+const TargetRegisterClass *
+GCNRewritePartialRegUses::getRegClassWithShiftedSubregs(
+ const TargetRegisterClass *RC, unsigned RShift, unsigned RegNumBits,
+ unsigned CoverSubregIdx, SubRegMap &SubRegs) const {
+
+ unsigned RCAlign = TRI->getRegClassAlignmentNumBits(RC);
+ LLVM_DEBUG(dbgs() << " Shift " << RShift << ", reg align " << RCAlign
+ << '\n');
+
+ BitVector ClassMask(getAllocatableAndAlignedRegClassMask(RCAlign));
+ for (auto &[OldSubReg, SRI] : SubRegs) {
+ auto &[SubRegRC, NewSubReg] = SRI;
+
+ // Register class may be unknown, for example:
+ // undef %0.sub4:sgpr_1024 = S_MOV_B32 01
+ // %0.sub5:sgpr_1024 = S_MOV_B32 02
+ // %1:vreg_64 = COPY %0.sub4_sub5
+ // Register classes for subregs 'sub4' and 'sub5' are known from the
+ // description of destination operand of S_MOV_B32 instruction but the
+ // class for the subreg 'sub4_sub5' isn't specified by the COPY instruction.
+ if (!SubRegRC)
+ SubRegRC = TRI->getSubRegisterClass(RC, OldSubReg);
+
+ if (!SubRegRC)
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << " " << TRI->getSubRegIndexName(OldSubReg) << ':'
+ << TRI->getRegClassName(SubRegRC)
+ << (SubRegRC->isAllocatable() ? "" : " not alloc")
+ << " -> ");
+
+ if (OldSubReg == CoverSubregIdx) {
+ NewSubReg = AMDGPU::NoSubRegister;
+ LLVM_DEBUG(dbgs() << "whole reg");
+ } else {
+ NewSubReg = shiftSubReg(OldSubReg, RShift);
+ if (!NewSubReg) {
+ LLVM_DEBUG(dbgs() << "none\n");
+ return nullptr;
+ }
+ LLVM_DEBUG(dbgs() << TRI->getSubRegIndexName(NewSubReg));
+ }
+
+ const uint32_t *Mask = NewSubReg ? getSuperRegClassMask(SubRegRC, NewSubReg)
+ : SubRegRC->getSubClassMask();
+ if (!Mask)
+ llvm_unreachable("no register class mask?");
+
+ ClassMask.clearBitsNotInMask(Mask);
+ // Don't try to early exit because checking if ClassMask has set bits isn't
+ // that cheap and we expect it to pass in most cases.
+ LLVM_DEBUG(dbgs() << ", num regclasses " << ClassMask.count() << '\n');
+ }
+
+ // ClassMask is the set of all register classes such that each class is
+ // allocatable, aligned, has all shifted subregs and each subreg has required
+ // register class (see SubRegRC above). Now select first (that is largest)
+ // register class with registers of minimal but not less than RegNumBits size.
+ // We have to check register size because we may encounter classes of smaller
+ // registers like VReg_1 in some situations.
+ const TargetRegisterClass *MinRC = nullptr;
+ unsigned MinNumBits = std::numeric_limits<unsigned>::max();
+ for (unsigned ClassID : ClassMask.set_bits()) {
+ auto *RC = TRI->getRegClass(ClassID);
+ unsigned NumBits = TRI->getRegSizeInBits(*RC);
+ if (NumBits < MinNumBits && NumBits >= RegNumBits) {
+ MinNumBits = NumBits;
+ MinRC = RC;
+ }
+ if (MinNumBits == RegNumBits)
+ break;
+ }
+#ifndef NDEBUG
+ if (MinRC) {
+ assert(MinRC->isAllocatable() && TRI->isRegClassAligned(MinRC, RCAlign));
+ for (auto [SubReg, SRI] : SubRegs)
+ // Check that all registers in MinRC support SRI.SubReg subregister.
+ assert(MinRC == TRI->getSubClassWithSubReg(MinRC, SRI.SubReg));
+ }
+#endif
+ // There might be zero RShift - in this case we just trying to find smaller
+ // register.
+ return (MinRC != RC || RShift != 0) ? MinRC : nullptr;
+}
+
+const TargetRegisterClass *
+GCNRewritePartialRegUses::getMinSizeReg(const TargetRegisterClass *RC,
+ SubRegMap &SubRegs) const {
+ unsigned CoverSubreg = AMDGPU::NoSubRegister;
+ unsigned Offset = std::numeric_limits<unsigned>::max();
+ unsigned End = 0;
+ for (auto [SubReg, SRI] : SubRegs) {
+ unsigned SubRegOffset = TRI->getSubRegIdxOffset(SubReg);
+ unsigned SubRegEnd = SubRegOffset + TRI->getSubRegIdxSize(SubReg);
+ if (SubRegOffset < Offset) {
+ Offset = SubRegOffset;
+ CoverSubreg = AMDGPU::NoSubRegister;
+ }
+ if (SubRegEnd > End) {
+ End = SubRegEnd;
+ CoverSubreg = AMDGPU::NoSubRegister;
+ }
+ if (SubRegOffset == Offset && SubRegEnd == End)
+ CoverSubreg = SubReg;
+ }
+ // If covering subreg is found shift everything so the covering subreg would
+ // be in the rightmost position.
+ if (CoverSubreg != AMDGPU::NoSubRegister)
+ return getRegClassWithShiftedSubregs(RC, Offset, End - Offset, CoverSubreg,
+ SubRegs);
+
+ // Otherwise find subreg with maximum required alignment and shift it and all
+ // other subregs to the rightmost possible position with respect to the
+ // alignment.
+ unsigned MaxAlign = 0;
+ for (auto [SubReg, SRI] : SubRegs)
+ MaxAlign = std::max(MaxAlign, TRI->getSubRegAlignmentNumBits(RC, SubReg));
+
+ unsigned FirstMaxAlignedSubRegOffset = std::numeric_limits<unsigned>::max();
+ for (auto [SubReg, SRI] : SubRegs) {
+ if (TRI->getSubRegAlignmentNumBits(RC, SubReg) != MaxAlign)
+ continue;
+ FirstMaxAlignedSubRegOffset =
+ std::min(FirstMaxAlignedSubRegOffset, TRI->getSubRegIdxOffset(SubReg));
+ if (FirstMaxAlignedSubRegOffset == Offset)
+ break;
+ }
+
+ unsigned NewOffsetOfMaxAlignedSubReg =
+ alignTo(FirstMaxAlignedSubRegOffset - Offset, MaxAlign);
+
+ if (NewOffsetOfMaxAlignedSubReg > FirstMaxAlignedSubRegOffset)
+ llvm_unreachable("misaligned subreg");
+
+ unsigned RShift = FirstMaxAlignedSubRegOffset - NewOffsetOfMaxAlignedSubReg;
+ return getRegClassWithShiftedSubregs(RC, RShift, End - RShift, 0, SubRegs);
+}
+
+// Only the subrange's lanemasks of the original interval need to be modified.
+// Subrange for a covering subreg becomes the main range.
+void GCNRewritePartialRegUses::updateLiveIntervals(Register OldReg,
+ Register NewReg,
+ SubRegMap &SubRegs) const {
+ if (!LIS->hasInterval(OldReg))
+ return;
+
+ auto &OldLI = LIS->getInterval(OldReg);
+ auto &NewLI = LIS->createEmptyInterval(NewReg);
+
+ auto &Allocator = LIS->getVNInfoAllocator();
+ NewLI.setWeight(OldLI.weight());
+
+ for (auto &SR : OldLI.subranges()) {
+ auto I = find_if(SubRegs, [&](auto &P) {
+ return SR.LaneMask == TRI->getSubRegIndexLaneMask(P.first);
+ });
+
+ if (I == SubRegs.end()) {
+ // There might be a situation when subranges don't exactly match used
+ // subregs, for example:
+ // %120 [160r,1392r:0) 0@160r
+ // L000000000000C000 [160r,1392r:0) 0@160r
+ // L0000000000003000 [160r,1392r:0) 0@160r
+ // L0000000000000C00 [160r,1392r:0) 0@160r
+ // L0000000000000300 [160r,1392r:0) 0@160r
+ // L0000000000000003 [160r,1104r:0) 0@160r
+ // L000000000000000C [160r,1104r:0) 0@160r
+ // L0000000000000030 [160r,1104r:0) 0@160r
+ // L00000000000000C0 [160r,1104r:0) 0@160r
+ // but used subregs are:
+ // sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7, L000000000000FFFF
+ // sub0_sub1_sub2_sub3, L00000000000000FF
+ // sub4_sub5_sub6_sub7, L000000000000FF00
+ // In this example subregs sub0_sub1_sub2_sub3 and sub4_sub5_sub6_sub7
+ // have several subranges with the same lifetime. For such cases just
+ // recreate the interval.
+ LIS->removeInterval(OldReg);
+ LIS->removeInterval(NewReg);
+ LIS->createAndComputeVirtRegInterval(NewReg);
+ return;
+ }
+
+ if (unsigned NewSubReg = I->second.SubReg)
+ NewLI.createSubRangeFrom(Allocator,
+ TRI->getSubRegIndexLaneMask(NewSubReg), SR);
+ else // This is the covering subreg (0 index) - set it as main range.
+ NewLI.assign(SR, Allocator);
+
+ SubRegs.erase(I);
+ }
+ if (NewLI.empty())
+ NewLI.assign(OldLI, Allocator);
+ NewLI.verify(MRI);
+ LIS->removeInterval(OldReg);
+}
+
+const TargetRegisterClass *
+GCNRewritePartialRegUses::getOperandRegClass(MachineOperand &MO) const {
+ MachineInstr *MI = MO.getParent();
+ return TII->getRegClass(TII->get(MI->getOpcode()), MI->getOperandNo(&MO), TRI,
+ *MI->getParent()->getParent());
+}
+
+bool GCNRewritePartialRegUses::rewriteReg(Register Reg) const {
+ auto Range = MRI->reg_nodbg_operands(Reg);
+ if (Range.begin() == Range.end())
+ return false;
+
+ for (MachineOperand &MO : Range) {
+ if (MO.getSubReg() == AMDGPU::NoSubRegister) // Whole reg used, quit.
+ return false;
+ }
+
+ auto *RC = MRI->getRegClass(Reg);
+ LLVM_DEBUG(dbgs() << "Try to rewrite partial reg " << printReg(Reg, TRI)
+ << ':' << TRI->getRegClassName(RC) << '\n');
+
+ // Collect used subregs and constrained reg classes infered from instruction
+ // operands.
+ SubRegMap SubRegs;
+ for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
+ assert(MO.getSubReg() != AMDGPU::NoSubRegister);
+ auto *OpDescRC = getOperandRegClass(MO);
+ const auto [I, Inserted] = SubRegs.try_emplace(MO.getSubReg(), OpDescRC);
+ if (!Inserted && OpDescRC) {
+ SubRegInfo &SRI = I->second;
+ SRI.RC = SRI.RC ? TRI->getCommonSubClass(SRI.RC, OpDescRC) : OpDescRC;
+ if (!SRI.RC) {
+ LLVM_DEBUG(dbgs() << " Couldn't find common target regclass\n");
+ return false;
+ }
+ }
+ }
+
+ auto *NewRC = getMinSizeReg(RC, SubRegs);
+ if (!NewRC) {
+ LLVM_DEBUG(dbgs() << " No improvement achieved\n");
+ return false;
+ }
+
+ Register NewReg = MRI->createVirtualRegister(NewRC);
+ LLVM_DEBUG(dbgs() << " Success " << printReg(Reg, TRI) << ':'
+ << TRI->getRegClassName(RC) << " -> "
+ << printReg(NewReg, TRI) << ':'
+ << TRI->getRegClassName(NewRC) << '\n');
+
+ for (auto &MO : make_early_inc_range(MRI->reg_operands(Reg))) {
+ MO.setReg(NewReg);
+ // Debug info can refer to the whole reg, just leave it as it is for now.
+ // TODO: create some DI shift expression?
+ if (MO.isDebug() && MO.getSubReg() == 0)
+ continue;
+ unsigned SubReg = SubRegs[MO.getSubReg()].SubReg;
+ MO.setSubReg(SubReg);
+ if (SubReg == AMDGPU::NoSubRegister && MO.isDef())
+ MO.setIsUndef(false);
+ }
+
+ if (LIS)
+ updateLiveIntervals(Reg, NewReg, SubRegs);
+
+ return true;
+}
+
+bool GCNRewritePartialRegUses::runOnMachineFunction(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ TRI = static_cast<const SIRegisterInfo *>(MRI->getTargetRegisterInfo());
+ TII = MF.getSubtarget().getInstrInfo();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
+ bool Changed = false;
+ for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
+ Changed |= rewriteReg(Register::index2VirtReg(I));
+ }
+ return Changed;
+}
+
+char GCNRewritePartialRegUses::ID;
+
+char &llvm::GCNRewritePartialRegUsesID = GCNRewritePartialRegUses::ID;
+
+INITIALIZE_PASS_BEGIN(GCNRewritePartialRegUses, DEBUG_TYPE,
+ "Rewrite Partial Register Uses", false, false)
+INITIALIZE_PASS_END(GCNRewritePartialRegUses, DEBUG_TYPE,
+ "Rewrite Partial Register Uses", false, false)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 6946a05bc551..994cfea1fd7d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -45,6 +45,13 @@ static cl::opt<unsigned> ScheduleMetricBias(
"100 to chase the occupancy only."),
cl::init(10));
+static cl::opt<bool>
+ RelaxedOcc("amdgpu-schedule-relaxed-occupancy", cl::Hidden,
+ cl::desc("Relax occupancy targets for kernels which are memory "
+ "bound (amdgpu-membound-threshold), or "
+ "Wave Limited (amdgpu-limit-wave-threshold)."),
+ cl::init(false));
+
const unsigned ScheduleMetrics::ScaleFactor = 100;
GCNSchedStrategy::GCNSchedStrategy(const MachineSchedContext *C)
@@ -67,7 +74,10 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) {
// Set the initial TargetOccupnacy to the maximum occupancy that we can
// achieve for this function. This effectively sets a lower bound on the
// 'Critical' register limits in the scheduler.
- TargetOccupancy = MFI.getOccupancy();
+ // Allow for lower occupancy targets if kernel is wave limited or memory
+ // bound, and using the relaxed occupancy feature.
+ TargetOccupancy =
+ RelaxedOcc ? MFI.getMinAllowedOccupancy() : MFI.getOccupancy();
SGPRCriticalLimit =
std::min(ST.getMaxNumSGPRs(TargetOccupancy, true), SGPRExcessLimit);
@@ -471,6 +481,12 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILive(
StartingOccupancy(MFI.getOccupancy()), MinOccupancy(StartingOccupancy) {
LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
+ if (RelaxedOcc) {
+ MinOccupancy = std::min(MFI.getMinAllowedOccupancy(), StartingOccupancy);
+ if (MinOccupancy != StartingOccupancy)
+ LLVM_DEBUG(dbgs() << "Allowing Occupancy drops to " << MinOccupancy
+ << ".\n");
+ }
}
std::unique_ptr<GCNSchedStage>
@@ -511,11 +527,19 @@ void GCNScheduleDAGMILive::computeBlockPressure(unsigned RegionIdx,
// If the block has the only successor then live-ins of that successor are
// live-outs of the current block. We can reuse calculated live set if the
// successor will be sent to scheduling past current block.
+
+ // However, due to the bug in LiveInterval analysis it may happen that two
+ // predecessors of the same successor block have different lane bitmasks for
+ // a live-out register. Workaround that by sticking to one-to-one relationship
+ // i.e. one predecessor with one successor block.
const MachineBasicBlock *OnlySucc = nullptr;
- if (MBB->succ_size() == 1 && !(*MBB->succ_begin())->empty()) {
- SlotIndexes *Ind = LIS->getSlotIndexes();
- if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(*MBB->succ_begin()))
- OnlySucc = *MBB->succ_begin();
+ if (MBB->succ_size() == 1) {
+ auto *Candidate = *MBB->succ_begin();
+ if (!Candidate->empty() && Candidate->pred_size() == 1) {
+ SlotIndexes *Ind = LIS->getSlotIndexes();
+ if (Ind->getMBBStartIdx(MBB) < Ind->getMBBStartIdx(Candidate))
+ OnlySucc = Candidate;
+ }
}
// Scheduler sends regions from the end of the block upwards.
@@ -864,7 +888,8 @@ void GCNSchedStage::setupNewBlock() {
DAG.startBlock(CurrentMBB);
// Get real RP for the region if it hasn't be calculated before. After the
// initial schedule stage real RP will be collected after scheduling.
- if (StageID == GCNSchedStageID::OccInitialSchedule)
+ if (StageID == GCNSchedStageID::OccInitialSchedule ||
+ StageID == GCNSchedStageID::ILPInitialSchedule)
DAG.computeBlockPressure(RegionIdx, CurrentMBB);
}
@@ -1100,6 +1125,10 @@ bool UnclusteredHighRPStage::shouldRevertScheduling(unsigned WavesAfter) {
return true;
}
+ // Do not attempt to relax schedule even more if we are already spilling.
+ if (isRegionWithExcessRP())
+ return false;
+
LLVM_DEBUG(
dbgs()
<< "\n\t *** In shouldRevertScheduling ***\n"
@@ -1188,9 +1217,8 @@ void GCNSchedStage::revertScheduling() {
}
// Reset read-undef flags and update them later.
- for (auto &Op : MI->operands())
- if (Op.isReg() && Op.isDef())
- Op.setIsUndef(false);
+ for (auto &Op : MI->all_defs())
+ Op.setIsUndef(false);
RegisterOperands RegOpers;
RegOpers.collect(*MI, *DAG.TRI, DAG.MRI, DAG.ShouldTrackLaneMasks, false);
if (!MI->isDebugInstr()) {
@@ -1463,8 +1491,8 @@ bool PreRARematStage::isTriviallyReMaterializable(const MachineInstr &MI) {
if (!DAG.TII->isTriviallyReMaterializable(MI))
return false;
- for (const MachineOperand &MO : MI.operands())
- if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
+ for (const MachineOperand &MO : MI.all_uses())
+ if (MO.getReg().isVirtual())
return false;
return true;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 2017ae84353c..ef5470df876d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -15,10 +15,12 @@
#define LLVM_LIB_TARGET_AMDGPU_GCNSUBTARGET_H
#include "AMDGPUCallLowering.h"
+#include "AMDGPURegisterBankInfo.h"
#include "AMDGPUSubtarget.h"
#include "SIFrameLowering.h"
#include "SIISelLowering.h"
#include "SIInstrInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#define GET_SUBTARGETINFO_HEADER
@@ -51,7 +53,7 @@ private:
std::unique_ptr<InlineAsmLowering> InlineAsmLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
std::unique_ptr<LegalizerInfo> Legalizer;
- std::unique_ptr<RegisterBankInfo> RegBankInfo;
+ std::unique_ptr<AMDGPURegisterBankInfo> RegBankInfo;
protected:
// Basic subtarget description.
@@ -63,7 +65,6 @@ protected:
unsigned MaxPrivateElementSize = 0;
// Possibly statically set by tablegen, but may want to be overridden.
- bool FastFMAF32 = false;
bool FastDenormalF32 = false;
bool HalfRate64Ops = false;
bool FullRate64Ops = false;
@@ -132,7 +133,7 @@ protected:
bool HasA16 = false;
bool HasG16 = false;
bool HasNSAEncoding = false;
- unsigned NSAMaxSize = 0;
+ bool HasPartialNSAEncoding = false;
bool GFX10_AEncoding = false;
bool GFX10_BEncoding = false;
bool HasDLInsts = false;
@@ -146,12 +147,17 @@ protected:
bool HasDot7Insts = false;
bool HasDot8Insts = false;
bool HasDot9Insts = false;
+ bool HasDot10Insts = false;
bool HasMAIInsts = false;
bool HasFP8Insts = false;
bool HasPkFmacF16Inst = false;
+ bool HasAtomicDsPkAdd16Insts = false;
+ bool HasAtomicFlatPkAdd16Insts = false;
bool HasAtomicFaddRtnInsts = false;
bool HasAtomicFaddNoRtnInsts = false;
- bool HasAtomicPkFaddNoRtnInsts = false;
+ bool HasAtomicBufferGlobalPkAddF16NoRtnInsts = false;
+ bool HasAtomicBufferGlobalPkAddF16Insts = false;
+ bool HasAtomicGlobalPkAddBF16Inst = false;
bool HasFlatAtomicFaddF32Inst = false;
bool SupportsSRAMECC = false;
@@ -173,6 +179,7 @@ protected:
bool ScalarFlatScratchInsts = false;
bool HasArchitectedFlatScratch = false;
bool EnableFlatScratch = false;
+ bool HasArchitectedSGPRs = false;
bool AddNoCarryInsts = false;
bool HasUnpackedD16VMem = false;
bool LDSMisalignedBug = false;
@@ -198,6 +205,7 @@ protected:
bool HasMADIntraFwdBug = false;
bool HasVOPDInsts = false;
bool HasVALUTransUseHazard = false;
+ bool HasForceStoreSC0SC1 = false;
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -248,7 +256,7 @@ public:
return Legalizer.get();
}
- const RegisterBankInfo *getRegBankInfo() const override {
+ const AMDGPURegisterBankInfo *getRegBankInfo() const override {
return RegBankInfo.get();
}
@@ -283,7 +291,7 @@ public:
/// Return the number of high bits known to be zero for a frame index.
unsigned getKnownHighZeroBitsForFrameIndex() const {
- return countLeadingZeros(getMaxWaveScratchSize()) + getWavefrontSizeLog2();
+ return llvm::countl_zero(getMaxWaveScratchSize()) + getWavefrontSizeLog2();
}
int getLDSBankCount() const {
@@ -319,10 +327,6 @@ public:
return FP64;
}
- bool hasFastFMAF32() const {
- return FastFMAF32;
- }
-
bool hasHalfRate64Ops() const {
return HalfRate64Ops;
}
@@ -738,6 +742,10 @@ public:
return HasDot9Insts;
}
+ bool hasDot10Insts() const {
+ return HasDot10Insts;
+ }
+
bool hasMAIInsts() const {
return HasMAIInsts;
}
@@ -750,6 +758,10 @@ public:
return HasPkFmacF16Inst;
}
+ bool hasAtomicDsPkAdd16Insts() const { return HasAtomicDsPkAdd16Insts; }
+
+ bool hasAtomicFlatPkAdd16Insts() const { return HasAtomicFlatPkAdd16Insts; }
+
bool hasAtomicFaddInsts() const {
return HasAtomicFaddRtnInsts || HasAtomicFaddNoRtnInsts;
}
@@ -758,7 +770,17 @@ public:
bool hasAtomicFaddNoRtnInsts() const { return HasAtomicFaddNoRtnInsts; }
- bool hasAtomicPkFaddNoRtnInsts() const { return HasAtomicPkFaddNoRtnInsts; }
+ bool hasAtomicBufferGlobalPkAddF16NoRtnInsts() const {
+ return HasAtomicBufferGlobalPkAddF16NoRtnInsts;
+ }
+
+ bool hasAtomicBufferGlobalPkAddF16Insts() const {
+ return HasAtomicBufferGlobalPkAddF16Insts;
+ }
+
+ bool hasAtomicGlobalPkAddBF16Inst() const {
+ return HasAtomicGlobalPkAddBF16Inst;
+ }
bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
@@ -924,7 +946,9 @@ public:
bool hasNSAEncoding() const { return HasNSAEncoding; }
- unsigned getNSAMaxSize() const { return NSAMaxSize; }
+ bool hasPartialNSAEncoding() const { return HasPartialNSAEncoding; }
+
+ unsigned getNSAMaxSize() const { return AMDGPU::getNSAMaxSize(*this); }
bool hasGFX10_AEncoding() const {
return GFX10_AEncoding;
@@ -1070,6 +1094,8 @@ public:
bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; }
+ bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; }
+
bool hasVALUMaskWriteHazard() const { return getGeneration() >= GFX11; }
/// Return if operations acting on VGPR tuples require even alignment.
@@ -1126,6 +1152,9 @@ public:
/// In this case it is readonly.
bool flatScratchIsArchitected() const { return HasArchitectedFlatScratch; }
+ /// \returns true if the architected SGPRs are enabled.
+ bool hasArchitectedSGPRs() const { return HasArchitectedSGPRs; }
+
/// \returns true if the machine has merged shaders in which s0-s7 are
/// reserved by the hardware and user SGPRs start at s8
bool hasMergedShaders() const {
@@ -1323,6 +1352,14 @@ public:
// \returns the number of address arguments from which to enable MIMG NSA
// on supported architectures.
unsigned getNSAThreshold(const MachineFunction &MF) const;
+
+ // \returns true if the subtarget has a hazard requiring an "s_nop 0"
+ // instruction before "s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)".
+ bool requiresNopBeforeDeallocVGPRs() const {
+ // Currently all targets that support the dealloc VGPRs message also require
+ // the nop.
+ return true;
+ }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
index 95ea42267ccf..29c9b9ccf276 100644
--- a/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNVOPDUtils.cpp
@@ -63,7 +63,7 @@ bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
}() && "Expected FirstMI to precede SecondMI");
// Cannot pair dependent instructions
for (const auto &Use : SecondMI.uses())
- if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg()))
+ if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg(), TRI))
return false;
auto getVRegIdx = [&](unsigned OpcodeIdx, unsigned OperandIdx) {
diff --git a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
index 1f65376890da..4956a1586774 100644
--- a/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
+++ b/llvm/lib/Target/AMDGPU/LDSDIRInstructions.td
@@ -34,7 +34,7 @@ class LDSDIRe<bits<2> op, bit is_direct> : Enc32 {
class LDSDIR_getIns<bit direct> {
dag ret = !if(direct,
(ins wait_vdst:$waitvdst),
- (ins Attr:$attr, AttrChan:$attrchan, wait_vdst:$waitvdst)
+ (ins InterpAttr:$attr, InterpAttrChan:$attrchan, wait_vdst:$waitvdst)
);
}
diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
index 24c9cc2d7dd2..a1f8be403c44 100644
--- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
+++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
@@ -348,9 +348,9 @@ createAMDGPUInstrPostProcess(const MCSubtargetInfo &STI,
/// Extern function to initialize the targets for the AMDGPU backend
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMCA() {
- TargetRegistry::RegisterCustomBehaviour(getTheAMDGPUTarget(),
+ TargetRegistry::RegisterCustomBehaviour(getTheR600Target(),
createAMDGPUCustomBehaviour);
- TargetRegistry::RegisterInstrPostProcess(getTheAMDGPUTarget(),
+ TargetRegistry::RegisterInstrPostProcess(getTheR600Target(),
createAMDGPUInstrPostProcess);
TargetRegistry::RegisterCustomBehaviour(getTheGCNTarget(),
diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
index 7a0d454c3578..cb1436d319c9 100644
--- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
+++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
@@ -19,7 +19,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/MCA/CustomBehaviour.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/TargetParser/TargetParser.h"
namespace llvm {
namespace mca {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index f0653aec925d..44109b9d2919 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -19,7 +19,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/TargetParser/TargetParser.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -79,7 +79,7 @@ bool AMDGPUAsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
bool AMDGPUAsmBackend::mayNeedRelaxation(const MCInst &Inst,
const MCSubtargetInfo &STI) const {
- if (!STI.getFeatureBits()[AMDGPU::FeatureOffset3fBug])
+ if (!STI.hasFeature(AMDGPU::FeatureOffset3fBug))
return false;
if (AMDGPU::getSOPPWithRelaxation(Inst.getOpcode()) >= 0)
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index 066b36622a16..3f188478ca8b 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -74,9 +74,9 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_AMDGPU_REL32;
case FK_Data_4:
case FK_SecRel_4:
- return ELF::R_AMDGPU_ABS32;
+ return IsPCRel ? ELF::R_AMDGPU_REL32 : ELF::R_AMDGPU_ABS32;
case FK_Data_8:
- return ELF::R_AMDGPU_ABS64;
+ return IsPCRel ? ELF::R_AMDGPU_REL64 : ELF::R_AMDGPU_ABS64;
}
if (Fixup.getTargetKind() == AMDGPU::fixup_si_sopp_br) {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index e465267f2c20..ad55c73b22ea 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -19,7 +19,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/TargetParser/TargetParser.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -60,11 +60,6 @@ void AMDGPUInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
O << formatHex(MI->getOperand(OpNo).getImm() & 0xf);
}
-void AMDGPUInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- O << formatHex(MI->getOperand(OpNo).getImm() & 0xff);
-}
-
void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -105,21 +100,6 @@ void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
}
}
-void AMDGPUInstPrinter::printOffen(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "offen");
-}
-
-void AMDGPUInstPrinter::printIdxen(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "idxen");
-}
-
-void AMDGPUInstPrinter::printAddr64(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "addr64");
-}
-
void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -141,15 +121,10 @@ void AMDGPUInstPrinter::printFlatOffset(const MCInst *MI, unsigned OpNo,
bool IsFlatSeg = !(Desc.TSFlags &
(SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch));
- if (IsFlatSeg) { // Unsigned offset
+ if (IsFlatSeg) // Unsigned offset
printU16ImmDecOperand(MI, OpNo, O);
- } else { // Signed offset
- if (AMDGPU::isGFX10(STI)) {
- O << formatDec(SignExtend32<12>(MI->getOperand(OpNo).getImm()));
- } else {
- O << formatDec(SignExtend32<13>(MI->getOperand(OpNo).getImm()));
- }
- }
+ else // Signed offset
+ O << formatDec(SignExtend32(Imm, AMDGPU::getNumFlatOffsetBits(STI)));
}
}
@@ -196,11 +171,6 @@ void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
printU32ImmOperand(MI, OpNo, STI, O);
}
-void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "gds");
-}
-
void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
auto Imm = MI->getOperand(OpNo).getImm();
@@ -218,15 +188,6 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
O << " /* unexpected cache policy bit */";
}
-void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
-}
-
-void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "tfe");
-}
-
void AMDGPUInstPrinter::printDMask(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
if (MI->getOperand(OpNo).getImm()) {
@@ -247,16 +208,6 @@ void AMDGPUInstPrinter::printDim(const MCInst *MI, unsigned OpNo,
O << Dim;
}
-void AMDGPUInstPrinter::printUNorm(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "unorm");
-}
-
-void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "da");
-}
-
void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
if (STI.hasFeature(AMDGPU::FeatureR128A16))
@@ -265,33 +216,6 @@ void AMDGPUInstPrinter::printR128A16(const MCInst *MI, unsigned OpNo,
printNamedBit(MI, OpNo, O, "r128");
}
-void AMDGPUInstPrinter::printA16(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "a16");
-}
-
-void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "lwe");
-}
-
-void AMDGPUInstPrinter::printD16(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "d16");
-}
-
-void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "compr");
-}
-
-void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "vm");
-}
-
void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -462,7 +386,7 @@ void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
else if (Imm == 0xC400)
O<< "-4.0";
else if (Imm == 0x3118 &&
- STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]) {
+ STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm)) {
O << "0.15915494";
} else {
uint64_t Imm16 = static_cast<uint16_t>(Imm);
@@ -486,26 +410,26 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
return;
}
- if (Imm == FloatToBits(0.0f))
+ if (Imm == llvm::bit_cast<uint32_t>(0.0f))
O << "0.0";
- else if (Imm == FloatToBits(1.0f))
+ else if (Imm == llvm::bit_cast<uint32_t>(1.0f))
O << "1.0";
- else if (Imm == FloatToBits(-1.0f))
+ else if (Imm == llvm::bit_cast<uint32_t>(-1.0f))
O << "-1.0";
- else if (Imm == FloatToBits(0.5f))
+ else if (Imm == llvm::bit_cast<uint32_t>(0.5f))
O << "0.5";
- else if (Imm == FloatToBits(-0.5f))
+ else if (Imm == llvm::bit_cast<uint32_t>(-0.5f))
O << "-0.5";
- else if (Imm == FloatToBits(2.0f))
+ else if (Imm == llvm::bit_cast<uint32_t>(2.0f))
O << "2.0";
- else if (Imm == FloatToBits(-2.0f))
+ else if (Imm == llvm::bit_cast<uint32_t>(-2.0f))
O << "-2.0";
- else if (Imm == FloatToBits(4.0f))
+ else if (Imm == llvm::bit_cast<uint32_t>(4.0f))
O << "4.0";
- else if (Imm == FloatToBits(-4.0f))
+ else if (Imm == llvm::bit_cast<uint32_t>(-4.0f))
O << "-4.0";
else if (Imm == 0x3e22f983 &&
- STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+ STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
O << "0.15915494";
else
O << formatHex(static_cast<uint64_t>(Imm));
@@ -520,26 +444,26 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
return;
}
- if (Imm == DoubleToBits(0.0))
+ if (Imm == llvm::bit_cast<uint64_t>(0.0))
O << "0.0";
- else if (Imm == DoubleToBits(1.0))
+ else if (Imm == llvm::bit_cast<uint64_t>(1.0))
O << "1.0";
- else if (Imm == DoubleToBits(-1.0))
+ else if (Imm == llvm::bit_cast<uint64_t>(-1.0))
O << "-1.0";
- else if (Imm == DoubleToBits(0.5))
+ else if (Imm == llvm::bit_cast<uint64_t>(0.5))
O << "0.5";
- else if (Imm == DoubleToBits(-0.5))
+ else if (Imm == llvm::bit_cast<uint64_t>(-0.5))
O << "-0.5";
- else if (Imm == DoubleToBits(2.0))
+ else if (Imm == llvm::bit_cast<uint64_t>(2.0))
O << "2.0";
- else if (Imm == DoubleToBits(-2.0))
+ else if (Imm == llvm::bit_cast<uint64_t>(-2.0))
O << "-2.0";
- else if (Imm == DoubleToBits(4.0))
+ else if (Imm == llvm::bit_cast<uint64_t>(4.0))
O << "4.0";
- else if (Imm == DoubleToBits(-4.0))
+ else if (Imm == llvm::bit_cast<uint64_t>(-4.0))
O << "-4.0";
else if (Imm == 0x3fc45f306dc9c882 &&
- STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+ STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
O << "0.15915494309189532";
else {
assert(isUInt<32>(Imm) || isInt<32>(Imm));
@@ -597,7 +521,7 @@ void AMDGPUInstPrinter::printDefaultVccOperand(bool FirstOperand,
raw_ostream &O) {
if (!FirstOperand)
O << ", ";
- printRegOperand(STI.getFeatureBits()[AMDGPU::FeatureWavefrontSize64]
+ printRegOperand(STI.hasFeature(AMDGPU::FeatureWavefrontSize64)
? AMDGPU::VCC
: AMDGPU::VCC_LO,
O, MRI);
@@ -718,7 +642,7 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_IMM_V2FP16:
if (!isUInt<16>(Op.getImm()) &&
- STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
+ STI.hasFeature(AMDGPU::FeatureVOP3Literal)) {
printImmediate32(Op.getImm(), STI, O);
break;
}
@@ -742,9 +666,10 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
O << formatDec(Op.getImm());
break;
case MCOI::OPERAND_REGISTER:
- // FIXME: This should be removed and handled somewhere else. Seems to come
- // from a disassembler bug.
- O << "/*invalid immediate*/";
+ // Disassembler does not fail when operand should not allow immediate
+ // operands but decodes them into 32bit immediate operand.
+ printImmediate32(Op.getImm(), STI, O);
+ O << "/*Invalid immediate*/";
break;
default:
// We hit this for the immediate instruction bits that don't yet have a
@@ -761,9 +686,9 @@ void AMDGPUInstPrinter::printRegularOperand(const MCInst *MI, unsigned OpNo,
int RCID = Desc.operands()[OpNo].RegClass;
unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID));
if (RCBits == 32)
- printImmediate32(FloatToBits(Value), STI, O);
+ printImmediate32(llvm::bit_cast<uint32_t>((float)Value), STI, O);
else if (RCBits == 64)
- printImmediate64(DoubleToBits(Value), STI, O);
+ printImmediate64(llvm::bit_cast<uint64_t>(Value), STI, O);
else
llvm_unreachable("Invalid register class size");
}
@@ -1012,16 +937,16 @@ void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
}
}
-void AMDGPUInstPrinter::printRowMask(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
+void AMDGPUInstPrinter::printDppRowMask(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
O << " row_mask:";
printU4ImmOperand(MI, OpNo, STI, O);
}
-void AMDGPUInstPrinter::printBankMask(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
+void AMDGPUInstPrinter::printDppBankMask(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
O << " bank_mask:";
printU4ImmOperand(MI, OpNo, STI, O);
}
@@ -1035,9 +960,8 @@ void AMDGPUInstPrinter::printDppBoundCtrl(const MCInst *MI, unsigned OpNo,
}
}
-void AMDGPUInstPrinter::printFI(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
+void AMDGPUInstPrinter::printDppFI(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
using namespace llvm::AMDGPU::DPP;
unsigned Imm = MI->getOperand(OpNo).getImm();
if (Imm == DPP_FI_1 || Imm == DPP8_FI_1) {
@@ -1287,9 +1211,9 @@ void AMDGPUInstPrinter::printInterpAttrChan(const MCInst *MI, unsigned OpNum,
O << '.' << "xyzw"[Chan & 0x3];
}
-void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
+void AMDGPUInstPrinter::printGPRIdxMode(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
using namespace llvm::AMDGPU::VGPRIndexMode;
unsigned Val = MI->getOperand(OpNo).getImm();
@@ -1338,18 +1262,6 @@ void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
O << Asm;
}
-void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "high");
-}
-
-void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- printNamedBit(MI, OpNo, O, "clamp");
-}
-
void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -1496,7 +1408,7 @@ void AMDGPUInstPrinter::printSwizzle(const MCInst *MI, unsigned OpNo,
}
}
-void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printSWaitCnt(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
@@ -1558,7 +1470,7 @@ void AMDGPUInstPrinter::printDepCtr(const MCInst *MI, unsigned OpNo,
}
}
-void AMDGPUInstPrinter::printDelayFlag(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printSDelayALU(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
const char *BadInstId = "/* invalid instid value */";
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
index 3486cca712ae..3b14faab136b 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h
@@ -38,7 +38,6 @@ public:
private:
void printU4ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -48,9 +47,6 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O,
StringRef BitName);
- void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printFlatOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -68,34 +64,14 @@ private:
const MCSubtargetInfo &STI, raw_ostream &O);
void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
void printCPol(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printSWZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
- void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printDim(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printUNorm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
- void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
void printR128A16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printA16(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
- void printLWE(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
- void printD16(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
- void printExpCompr(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
- void printExpVM(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
void printFORMAT(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printSymbolicFormat(const MCInst *MI,
@@ -132,14 +108,14 @@ private:
raw_ostream &O);
void printDPPCtrl(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printRowMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
- void printBankMask(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
+ void printDppRowMask(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
+ void printDppBankMask(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printDppBoundCtrl(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printFI(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
+ void printDppFI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
void printSDWASel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printSDWADstSel(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
@@ -166,8 +142,8 @@ private:
void printInterpAttrChan(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
- void printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O);
+ void printGPRIdxMode(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printMemOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printBLGP(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -205,12 +181,8 @@ public:
protected:
void printAbs(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printHigh(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
void printClamp(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printClampSI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
- raw_ostream &O);
void printOModSI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printLiteral(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
@@ -241,11 +213,11 @@ protected:
raw_ostream &O);
void printSwizzle(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printWaitFlag(const MCInst *MI, unsigned OpNo,
+ void printSWaitCnt(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printDepCtr(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
- void printDelayFlag(const MCInst *MI, unsigned OpNo,
+ void printSDelayALU(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index ded3fb7ab8d9..d539d75fdff0 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -8,9 +8,9 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUMCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
@@ -40,7 +40,7 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT,
HasNoDeadStrip = true;
//===--- Dwarf Emission Directives -----------------------------------===//
SupportsDebugInformation = true;
- UsesCFIForDebug = true;
+ UsesCFIWithoutEH = true;
DwarfRegNumForCFI = true;
UseIntegratedAssembler = false;
@@ -58,11 +58,11 @@ unsigned AMDGPUMCAsmInfo::getMaxInstLength(const MCSubtargetInfo *STI) const {
return MaxInstLength;
// Maximum for NSA encoded images
- if (STI->getFeatureBits()[AMDGPU::FeatureNSAEncoding])
+ if (STI->hasFeature(AMDGPU::FeatureNSAEncoding))
return 20;
// 64-bit instruction with 32-bit literal.
- if (STI->getFeatureBits()[AMDGPU::FeatureVOP3Literal])
+ if (STI->hasFeature(AMDGPU::FeatureVOP3Literal))
return 12;
return 8;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
index 93bec8aaadfd..5e77a8caa04e 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- AMDGPUCodeEmitter.cpp - AMDGPU Code Emitter interface -------------===//
+//===-- AMDGPUMCCodeEmitter.cpp - AMDGPU Code Emitter ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,14 +7,586 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// CodeEmitter interface for SI codegen.
+/// The AMDGPU code emitter produces machine code that can be executed
+/// directly on the GPU device.
//
//===----------------------------------------------------------------------===//
-#include "AMDGPUMCCodeEmitter.h"
+#include "MCTargetDesc/AMDGPUFixupKinds.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIDefines.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include <optional>
using namespace llvm;
-// pin vtable to this file
-void AMDGPUMCCodeEmitter::anchor() {}
+namespace {
+class AMDGPUMCCodeEmitter : public MCCodeEmitter {
+ const MCRegisterInfo &MRI;
+ const MCInstrInfo &MCII;
+
+public:
+ AMDGPUMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI)
+ : MRI(MRI), MCII(MCII) {}
+
+ /// Encode the instruction and write it to the OS.
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const override;
+
+ void getMachineOpValue(const MCInst &MI, const MCOperand &MO, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// Use a fixup to encode the simm16 field for SOPP branch
+ /// instructions.
+ void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ void getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ void getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ void getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ void getAVOperandEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+private:
+ uint64_t getImplicitOpSelHiEncoding(int Opcode) const;
+ void getMachineOpValueCommon(const MCInst &MI, const MCOperand &MO,
+ unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
+ /// Encode an fp or int literal.
+ std::optional<uint32_t> getLitEncoding(const MCOperand &MO,
+ const MCOperandInfo &OpInfo,
+ const MCSubtargetInfo &STI) const;
+
+ void getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
+ APInt &Inst, APInt &Scratch,
+ const MCSubtargetInfo &STI) const;
+};
+
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
+ MCContext &Ctx) {
+ return new AMDGPUMCCodeEmitter(MCII, *Ctx.getRegisterInfo());
+}
+
+// Returns the encoding value to use if the given integer is an integer inline
+// immediate value, or 0 if it is not.
+template <typename IntTy>
+static uint32_t getIntInlineImmEncoding(IntTy Imm) {
+ if (Imm >= 0 && Imm <= 64)
+ return 128 + Imm;
+
+ if (Imm >= -16 && Imm <= -1)
+ return 192 + std::abs(Imm);
+
+ return 0;
+}
+
+static uint32_t getLit16IntEncoding(uint16_t Val, const MCSubtargetInfo &STI) {
+ uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
+ return IntImm == 0 ? 255 : IntImm;
+}
+
+static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) {
+ uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
+ if (IntImm != 0)
+ return IntImm;
+
+ if (Val == 0x3800) // 0.5
+ return 240;
+
+ if (Val == 0xB800) // -0.5
+ return 241;
+
+ if (Val == 0x3C00) // 1.0
+ return 242;
+
+ if (Val == 0xBC00) // -1.0
+ return 243;
+
+ if (Val == 0x4000) // 2.0
+ return 244;
+
+ if (Val == 0xC000) // -2.0
+ return 245;
+
+ if (Val == 0x4400) // 4.0
+ return 246;
+
+ if (Val == 0xC400) // -4.0
+ return 247;
+
+ if (Val == 0x3118 && // 1.0 / (2.0 * pi)
+ STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
+ return 248;
+
+ return 255;
+}
+
+static uint32_t getLit32Encoding(uint32_t Val, const MCSubtargetInfo &STI) {
+ uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val));
+ if (IntImm != 0)
+ return IntImm;
+
+ if (Val == llvm::bit_cast<uint32_t>(0.5f))
+ return 240;
+
+ if (Val == llvm::bit_cast<uint32_t>(-0.5f))
+ return 241;
+
+ if (Val == llvm::bit_cast<uint32_t>(1.0f))
+ return 242;
+
+ if (Val == llvm::bit_cast<uint32_t>(-1.0f))
+ return 243;
+
+ if (Val == llvm::bit_cast<uint32_t>(2.0f))
+ return 244;
+
+ if (Val == llvm::bit_cast<uint32_t>(-2.0f))
+ return 245;
+
+ if (Val == llvm::bit_cast<uint32_t>(4.0f))
+ return 246;
+
+ if (Val == llvm::bit_cast<uint32_t>(-4.0f))
+ return 247;
+
+ if (Val == 0x3e22f983 && // 1.0 / (2.0 * pi)
+ STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
+ return 248;
+
+ return 255;
+}
+
+static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI) {
+ uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val));
+ if (IntImm != 0)
+ return IntImm;
+
+ if (Val == llvm::bit_cast<uint64_t>(0.5))
+ return 240;
+
+ if (Val == llvm::bit_cast<uint64_t>(-0.5))
+ return 241;
+
+ if (Val == llvm::bit_cast<uint64_t>(1.0))
+ return 242;
+
+ if (Val == llvm::bit_cast<uint64_t>(-1.0))
+ return 243;
+
+ if (Val == llvm::bit_cast<uint64_t>(2.0))
+ return 244;
+
+ if (Val == llvm::bit_cast<uint64_t>(-2.0))
+ return 245;
+
+ if (Val == llvm::bit_cast<uint64_t>(4.0))
+ return 246;
+
+ if (Val == llvm::bit_cast<uint64_t>(-4.0))
+ return 247;
+
+ if (Val == 0x3fc45f306dc9c882 && // 1.0 / (2.0 * pi)
+ STI.hasFeature(AMDGPU::FeatureInv2PiInlineImm))
+ return 248;
+
+ return 255;
+}
+
+std::optional<uint32_t>
+AMDGPUMCCodeEmitter::getLitEncoding(const MCOperand &MO,
+ const MCOperandInfo &OpInfo,
+ const MCSubtargetInfo &STI) const {
+ int64_t Imm;
+ if (MO.isExpr()) {
+ const auto *C = dyn_cast<MCConstantExpr>(MO.getExpr());
+ if (!C)
+ return 255;
+
+ Imm = C->getValue();
+ } else {
+
+ assert(!MO.isDFPImm());
+
+ if (!MO.isImm())
+ return {};
+
+ Imm = MO.getImm();
+ }
+
+ switch (OpInfo.OperandType) {
+ case AMDGPU::OPERAND_REG_IMM_INT32:
+ case AMDGPU::OPERAND_REG_IMM_FP32:
+ case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
+ case AMDGPU::OPERAND_REG_IMM_V2INT32:
+ case AMDGPU::OPERAND_REG_IMM_V2FP32:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
+ return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
+
+ case AMDGPU::OPERAND_REG_IMM_INT64:
+ case AMDGPU::OPERAND_REG_IMM_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
+ return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
+
+ case AMDGPU::OPERAND_REG_IMM_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
+ return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
+ case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
+ case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
+ // FIXME Is this correct? What do inline immediates do on SI for f16 src
+ // which does not have f16 support?
+ return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16: {
+ if (!isUInt<16>(Imm) && STI.hasFeature(AMDGPU::FeatureVOP3Literal))
+ return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
+ if (OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
+ return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
+ [[fallthrough]];
+ }
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
+ return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
+ case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
+ uint16_t Lo16 = static_cast<uint16_t>(Imm);
+ uint32_t Encoding = getLit16Encoding(Lo16, STI);
+ return Encoding;
+ }
+ case AMDGPU::OPERAND_KIMM32:
+ case AMDGPU::OPERAND_KIMM16:
+ return MO.getImm();
+ default:
+ llvm_unreachable("invalid operand size");
+ }
+}
+
+uint64_t AMDGPUMCCodeEmitter::getImplicitOpSelHiEncoding(int Opcode) const {
+ using namespace AMDGPU::VOP3PEncoding;
+ using namespace AMDGPU::OpName;
+
+ if (AMDGPU::hasNamedOperand(Opcode, op_sel_hi)) {
+ if (AMDGPU::hasNamedOperand(Opcode, src2))
+ return 0;
+ if (AMDGPU::hasNamedOperand(Opcode, src1))
+ return OP_SEL_HI_2;
+ if (AMDGPU::hasNamedOperand(Opcode, src0))
+ return OP_SEL_HI_1 | OP_SEL_HI_2;
+ }
+ return OP_SEL_HI_0 | OP_SEL_HI_1 | OP_SEL_HI_2;
+}
+
+static bool isVCMPX64(const MCInstrDesc &Desc) {
+ return (Desc.TSFlags & SIInstrFlags::VOP3) &&
+ Desc.hasImplicitDefOfPhysReg(AMDGPU::EXEC);
+}
+
+void AMDGPUMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ int Opcode = MI.getOpcode();
+ APInt Encoding, Scratch;
+ getBinaryCodeForInstr(MI, Fixups, Encoding, Scratch, STI);
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ unsigned bytes = Desc.getSize();
+
+ // Set unused op_sel_hi bits to 1 for VOP3P and MAI instructions.
+ // Note that accvgpr_read/write are MAI, have src0, but do not use op_sel.
+ if ((Desc.TSFlags & SIInstrFlags::VOP3P) ||
+ Opcode == AMDGPU::V_ACCVGPR_READ_B32_vi ||
+ Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_vi) {
+ Encoding |= getImplicitOpSelHiEncoding(Opcode);
+ }
+
+ // GFX10+ v_cmpx opcodes promoted to VOP3 have implied dst=EXEC.
+ // Documentation requires dst to be encoded as EXEC (0x7E),
+ // but it looks like the actual value encoded for dst operand
+ // is ignored by HW. It was decided to define dst as "do not care"
+ // in td files to allow disassembler accept any dst value.
+ // However, dst is encoded as EXEC for compatibility with SP3.
+ if (AMDGPU::isGFX10Plus(STI) && isVCMPX64(Desc)) {
+ assert((Encoding & 0xFF) == 0);
+ Encoding |= MRI.getEncodingValue(AMDGPU::EXEC_LO);
+ }
+
+ for (unsigned i = 0; i < bytes; i++) {
+ CB.push_back((uint8_t)Encoding.extractBitsAsZExtValue(8, 8 * i));
+ }
+
+ // NSA encoding.
+ if (AMDGPU::isGFX10Plus(STI) && Desc.TSFlags & SIInstrFlags::MIMG) {
+ int vaddr0 = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::vaddr0);
+ int srsrc = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+ AMDGPU::OpName::srsrc);
+ assert(vaddr0 >= 0 && srsrc > vaddr0);
+ unsigned NumExtraAddrs = srsrc - vaddr0 - 1;
+ unsigned NumPadding = (-NumExtraAddrs) & 3;
+
+ for (unsigned i = 0; i < NumExtraAddrs; ++i) {
+ getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i), Encoding, Fixups,
+ STI);
+ CB.push_back((uint8_t)Encoding.getLimitedValue());
+ }
+ CB.append(NumPadding, 0);
+ }
+
+ if ((bytes > 8 && STI.hasFeature(AMDGPU::FeatureVOP3Literal)) ||
+ (bytes > 4 && !STI.hasFeature(AMDGPU::FeatureVOP3Literal)))
+ return;
+
+ // Do not print literals from SISrc Operands for insts with mandatory literals
+ if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm))
+ return;
+
+ // Check for additional literals
+ for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) {
+
+ // Check if this operand should be encoded as [SV]Src
+ if (!AMDGPU::isSISrcOperand(Desc, i))
+ continue;
+
+ // Is this operand a literal immediate?
+ const MCOperand &Op = MI.getOperand(i);
+ auto Enc = getLitEncoding(Op, Desc.operands()[i], STI);
+ if (!Enc || *Enc != 255)
+ continue;
+
+ // Yes! Encode it
+ int64_t Imm = 0;
+
+ if (Op.isImm())
+ Imm = Op.getImm();
+ else if (Op.isExpr()) {
+ if (const auto *C = dyn_cast<MCConstantExpr>(Op.getExpr()))
+ Imm = C->getValue();
+
+ } else if (!Op.isExpr()) // Exprs will be replaced with a fixup value.
+ llvm_unreachable("Must be immediate or expr");
+
+ support::endian::write<uint32_t>(CB, Imm, support::endianness::little);
+
+ // Only one literal value allowed
+ break;
+ }
+}
+
+void AMDGPUMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
+ APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ if (MO.isExpr()) {
+ const MCExpr *Expr = MO.getExpr();
+ MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br;
+ Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
+ Op = APInt::getZero(96);
+ } else {
+ getMachineOpValue(MI, MO, Op, Fixups, STI);
+ }
+}
+
+void AMDGPUMCCodeEmitter::getSMEMOffsetEncoding(
+ const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
+ auto Offset = MI.getOperand(OpNo).getImm();
+ // VI only supports 20-bit unsigned offsets.
+ assert(!AMDGPU::isVI(STI) || isUInt<20>(Offset));
+ Op = Offset;
+}
+
+void AMDGPUMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
+ APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ using namespace AMDGPU::SDWA;
+
+ uint64_t RegEnc = 0;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ if (MO.isReg()) {
+ unsigned Reg = MO.getReg();
+ RegEnc |= MRI.getEncodingValue(Reg);
+ RegEnc &= SDWA9EncValues::SRC_VGPR_MASK;
+ if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) {
+ RegEnc |= SDWA9EncValues::SRC_SGPR_MASK;
+ }
+ Op = RegEnc;
+ return;
+ } else {
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ auto Enc = getLitEncoding(MO, Desc.operands()[OpNo], STI);
+ if (Enc && *Enc != 255) {
+ Op = *Enc | SDWA9EncValues::SRC_SGPR_MASK;
+ return;
+ }
+ }
+
+ llvm_unreachable("Unsupported operand kind");
+}
+
+void AMDGPUMCCodeEmitter::getSDWAVopcDstEncoding(
+ const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
+ using namespace AMDGPU::SDWA;
+
+ uint64_t RegEnc = 0;
+
+ const MCOperand &MO = MI.getOperand(OpNo);
+
+ unsigned Reg = MO.getReg();
+ if (Reg != AMDGPU::VCC && Reg != AMDGPU::VCC_LO) {
+ RegEnc |= MRI.getEncodingValue(Reg);
+ RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
+ RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
+ }
+ Op = RegEnc;
+}
+
+void AMDGPUMCCodeEmitter::getAVOperandEncoding(
+ const MCInst &MI, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
+ unsigned Reg = MI.getOperand(OpNo).getReg();
+ uint64_t Enc = MRI.getEncodingValue(Reg);
+
+ // VGPR and AGPR have the same encoding, but SrcA and SrcB operands of mfma
+ // instructions use acc[0:1] modifier bits to distinguish. These bits are
+ // encoded as a virtual 9th bit of the register for these operands.
+ if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_224RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_288RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_320RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_352RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_384RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(Reg) ||
+ MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg))
+ Enc |= 512;
+
+ Op = Enc;
+}
+
+static bool needsPCRel(const MCExpr *Expr) {
+ switch (Expr->getKind()) {
+ case MCExpr::SymbolRef: {
+ auto *SE = cast<MCSymbolRefExpr>(Expr);
+ MCSymbolRefExpr::VariantKind Kind = SE->getKind();
+ return Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_LO &&
+ Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_HI;
+ }
+ case MCExpr::Binary: {
+ auto *BE = cast<MCBinaryExpr>(Expr);
+ if (BE->getOpcode() == MCBinaryExpr::Sub)
+ return false;
+ return needsPCRel(BE->getLHS()) || needsPCRel(BE->getRHS());
+ }
+ case MCExpr::Unary:
+ return needsPCRel(cast<MCUnaryExpr>(Expr)->getSubExpr());
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ return false;
+ }
+ llvm_unreachable("invalid kind");
+}
+
+void AMDGPUMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+ const MCOperand &MO, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ if (MO.isReg()){
+ Op = MRI.getEncodingValue(MO.getReg());
+ return;
+ }
+ unsigned OpNo = &MO - MI.begin();
+ getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI);
+}
+
+void AMDGPUMCCodeEmitter::getMachineOpValueCommon(
+ const MCInst &MI, const MCOperand &MO, unsigned OpNo, APInt &Op,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
+
+ if (MO.isExpr() && MO.getExpr()->getKind() != MCExpr::Constant) {
+ // FIXME: If this is expression is PCRel or not should not depend on what
+ // the expression looks like. Given that this is just a general expression,
+ // it should probably be FK_Data_4 and whatever is producing
+ //
+ // s_add_u32 s2, s2, (extern_const_addrspace+16
+ //
+ // And expecting a PCRel should instead produce
+ //
+ // .Ltmp1:
+ // s_add_u32 s2, s2, (extern_const_addrspace+16)-.Ltmp1
+ MCFixupKind Kind;
+ if (needsPCRel(MO.getExpr()))
+ Kind = FK_PCRel_4;
+ else
+ Kind = FK_Data_4;
+
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint32_t Offset = Desc.getSize();
+ assert(Offset == 4 || Offset == 8);
+
+ Fixups.push_back(MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc()));
+ }
+
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
+ if (auto Enc = getLitEncoding(MO, Desc.operands()[OpNo], STI)) {
+ Op = *Enc;
+ return;
+ }
+ } else if (MO.isImm()) {
+ Op = MO.getImm();
+ return;
+ }
+
+ llvm_unreachable("Encoding of this operand type is not supported yet.");
+}
+
+#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
deleted file mode 100644
index 200c9b8726e2..000000000000
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface -----*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// CodeEmitter interface for SI codegen.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCCODEEMITTER_H
-#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUMCCODEEMITTER_H
-
-#include "llvm/ADT/APInt.h"
-#include "llvm/MC/MCCodeEmitter.h"
-
-namespace llvm {
-
-class MCInst;
-class MCInstrInfo;
-class MCOperand;
-class MCSubtargetInfo;
-
-class AMDGPUMCCodeEmitter : public MCCodeEmitter {
- virtual void anchor();
-
-protected:
- const MCInstrInfo &MCII;
-
- AMDGPUMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {}
-
-public:
- void getBinaryCodeForInstr(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixups,
- APInt &Inst, APInt &Scratch,
- const MCSubtargetInfo &STI) const;
-
- virtual void getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- APInt &Op, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const = 0;
-
- virtual void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const = 0;
-
- virtual void getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const = 0;
-
- virtual void getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const = 0;
-
- virtual void getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
- APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const = 0;
-
- virtual void getAVOperandEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const = 0;
-};
-
-} // End namespace llvm
-
-#endif
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 8a9fea3c8d26..a6a01479b5b1 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -150,8 +150,9 @@ static MCInstrAnalysis *createAMDGPUMCInstrAnalysis(const MCInstrInfo *Info) {
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMC() {
TargetRegistry::RegisterMCInstrInfo(getTheGCNTarget(), createAMDGPUMCInstrInfo);
- TargetRegistry::RegisterMCInstrInfo(getTheAMDGPUTarget(), createR600MCInstrInfo);
- for (Target *T : {&getTheAMDGPUTarget(), &getTheGCNTarget()}) {
+ TargetRegistry::RegisterMCInstrInfo(getTheR600Target(),
+ createR600MCInstrInfo);
+ for (Target *T : {&getTheR600Target(), &getTheGCNTarget()}) {
RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
@@ -163,14 +164,14 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetMC() {
}
// R600 specific registration
- TargetRegistry::RegisterMCCodeEmitter(getTheAMDGPUTarget(),
+ TargetRegistry::RegisterMCCodeEmitter(getTheR600Target(),
createR600MCCodeEmitter);
TargetRegistry::RegisterObjectTargetStreamer(
- getTheAMDGPUTarget(), createAMDGPUObjectTargetStreamer);
+ getTheR600Target(), createAMDGPUObjectTargetStreamer);
// GCN specific registration
TargetRegistry::RegisterMCCodeEmitter(getTheGCNTarget(),
- createSIMCCodeEmitter);
+ createAMDGPUMCCodeEmitter);
TargetRegistry::RegisterAsmTargetStreamer(getTheGCNTarget(),
createAMDGPUAsmTargetStreamer);
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index c2e2563c3989..006115ba14fc 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -32,8 +32,8 @@ enum AMDGPUDwarfFlavour : unsigned { Wave64 = 0, Wave32 = 1 };
MCRegisterInfo *createGCNMCRegisterInfo(AMDGPUDwarfFlavour DwarfFlavour);
-MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
- MCContext &Ctx);
+MCCodeEmitter *createAMDGPUMCCodeEmitter(const MCInstrInfo &MCII,
+ MCContext &Ctx);
MCAsmBackend *createAMDGPUAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 7a4af1af33d6..1bd3cdc67800 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -26,7 +26,7 @@
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/TargetParser/TargetParser.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -107,6 +107,8 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
@@ -122,6 +124,8 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101: AK = GK_GFX1101; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102: AK = GK_GFX1102; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103: AK = GK_GFX1103; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150: AK = GK_GFX1150; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
}
@@ -176,6 +180,8 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
+ case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
+ case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
@@ -191,6 +197,8 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX1101: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1101;
case GK_GFX1102: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1102;
case GK_GFX1103: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1103;
+ case GK_GFX1150: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1150;
+ case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
@@ -320,7 +328,7 @@ bool AMDGPUTargetAsmStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KD, uint64_t NextVGPR, uint64_t NextSGPR,
- bool ReserveVCC, bool ReserveFlatScr) {
+ bool ReserveVCC, bool ReserveFlatScr, unsigned CodeObjectVersion) {
IsaVersion IVersion = getIsaVersion(STI.getCPU());
OS << "\t.amdhsa_kernel " << KernelName << '\n';
@@ -367,7 +375,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
kernel_code_properties,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
- if (AMDGPU::getAmdhsaCodeObjectVersion() >= 5)
+ if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5)
PRINT_FIELD(OS, ".amdhsa_uses_dynamic_stack", KD, kernel_code_properties,
amdhsa::KERNEL_CODE_PROPERTY_USES_DYNAMIC_STACK);
PRINT_FIELD(OS,
@@ -407,19 +415,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
if (IVersion.Major >= 7 && !ReserveFlatScr && !hasArchitectedFlatScratch(STI))
OS << "\t\t.amdhsa_reserve_flat_scratch " << ReserveFlatScr << '\n';
- if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(&STI)) {
- switch (*HsaAbiVer) {
- default:
- break;
- case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
- break;
- case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
- case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
- case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
- if (getTargetID()->isXnackSupported())
- OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
- break;
- }
+ switch (CodeObjectVersion) {
+ default:
+ break;
+ case AMDGPU::AMDHSA_COV2:
+ break;
+ case AMDGPU::AMDHSA_COV3:
+ case AMDGPU::AMDHSA_COV4:
+ case AMDGPU::AMDHSA_COV5:
+ if (getTargetID()->isXnackSupported())
+ OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
+ break;
}
PRINT_FIELD(OS, ".amdhsa_float_round_mode_32", KD,
@@ -850,7 +856,8 @@ bool AMDGPUTargetELFStreamer::EmitCodeEnd(const MCSubtargetInfo &STI) {
void AMDGPUTargetELFStreamer::EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) {
+ uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
+ unsigned CodeObjectVersion) {
auto &Streamer = getStreamer();
auto &Context = Streamer.getContext();
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 50511794a013..db43de8fcc5f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -93,7 +93,8 @@ public:
virtual void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr){};
+ uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
+ unsigned CodeObjectVersion){};
static StringRef getArchNameFromElfMach(unsigned ElfMach);
static unsigned getElfMach(StringRef GPU);
@@ -104,12 +105,15 @@ public:
std::optional<AMDGPU::IsaInfo::AMDGPUTargetID> &getTargetID() {
return TargetID;
}
- void initializeTargetID(const MCSubtargetInfo &STI) {
+ void initializeTargetID(const MCSubtargetInfo &STI,
+ unsigned CodeObjectVersion) {
assert(TargetID == std::nullopt && "TargetID can only be initialized once");
TargetID.emplace(STI);
+ getTargetID()->setCodeObjectVersion(CodeObjectVersion);
}
- void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString) {
- initializeTargetID(STI);
+ void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString,
+ unsigned CodeObjectVersion) {
+ initializeTargetID(STI, CodeObjectVersion);
assert(getTargetID() != std::nullopt && "TargetID is None");
getTargetID()->setTargetIDFromFeaturesString(FeatureString);
@@ -153,7 +157,8 @@ public:
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override;
+ uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
+ unsigned CodeObjectVersion) override;
};
class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
@@ -213,7 +218,8 @@ public:
void EmitAmdhsaKernelDescriptor(
const MCSubtargetInfo &STI, StringRef KernelName,
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
- uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr) override;
+ uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
+ unsigned CodeObjectVersion) override;
};
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp
index f77ed1faf029..22d0594e2b86 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp
@@ -97,7 +97,7 @@ void R600InstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
assert(Op.isImm() || Op.isExpr());
if (Op.isImm()) {
int64_t Imm = Op.getImm();
- O << Imm << '(' << BitsToFloat(Imm) << ')';
+ O << Imm << '(' << llvm::bit_cast<float>(static_cast<uint32_t>(Imm)) << ')';
}
if (Op.isExpr()) {
Op.getExpr()->print(O << '@', &MAI);
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 3d926e52c368..bbbfbe4faa0f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -21,8 +21,8 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/EndianStream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
using namespace llvm;
@@ -39,7 +39,7 @@ public:
R600MCCodeEmitter &operator=(const R600MCCodeEmitter &) = delete;
/// Encode the instruction and write it to the OS.
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -49,9 +49,8 @@ public:
const MCSubtargetInfo &STI) const;
private:
-
- void Emit(uint32_t value, raw_ostream &OS) const;
- void Emit(uint64_t value, raw_ostream &OS) const;
+ void emit(uint32_t value, SmallVectorImpl<char> &CB) const;
+ void emit(uint64_t value, SmallVectorImpl<char> &CB) const;
unsigned getHWReg(unsigned regNo) const;
@@ -84,7 +83,8 @@ MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
return new R600MCCodeEmitter(MCII, *Ctx.getRegisterInfo());
}
-void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void R600MCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
@@ -97,13 +97,13 @@ void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
} else if (IS_VTX(Desc)) {
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI);
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
- if (!(STI.getFeatureBits()[R600::FeatureCaymanISA])) {
+ if (!(STI.hasFeature(R600::FeatureCaymanISA))) {
InstWord2 |= 1 << 19; // Mega-Fetch bit
}
- Emit(InstWord01, OS);
- Emit(InstWord2, OS);
- Emit((uint32_t) 0, OS);
+ emit(InstWord01, CB);
+ emit(InstWord2, CB);
+ emit((uint32_t)0, CB);
} else if (IS_TEX(Desc)) {
int64_t Sampler = MI.getOperand(14).getImm();
@@ -125,28 +125,28 @@ void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
SrcSelect[ELEMENT_W] << 29 | Offsets[0] << 0 | Offsets[1] << 5 |
Offsets[2] << 10;
- Emit(Word01, OS);
- Emit(Word2, OS);
- Emit((uint32_t) 0, OS);
+ emit(Word01, CB);
+ emit(Word2, CB);
+ emit((uint32_t)0, CB);
} else {
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI);
- if ((STI.getFeatureBits()[R600::FeatureR600ALUInst]) &&
+ if ((STI.hasFeature(R600::FeatureR600ALUInst)) &&
((Desc.TSFlags & R600_InstFlag::OP1) ||
Desc.TSFlags & R600_InstFlag::OP2)) {
uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
Inst &= ~(0x3FFULL << 39);
Inst |= ISAOpCode << 1;
}
- Emit(Inst, OS);
+ emit(Inst, CB);
}
}
-void R600MCCodeEmitter::Emit(uint32_t Value, raw_ostream &OS) const {
- support::endian::write(OS, Value, support::little);
+void R600MCCodeEmitter::emit(uint32_t Value, SmallVectorImpl<char> &CB) const {
+ support::endian::write(CB, Value, support::little);
}
-void R600MCCodeEmitter::Emit(uint64_t Value, raw_ostream &OS) const {
- support::endian::write(OS, Value, support::little);
+void R600MCCodeEmitter::emit(uint64_t Value, SmallVectorImpl<char> &CB) const {
+ support::endian::write(CB, Value, support::little);
}
unsigned R600MCCodeEmitter::getHWReg(unsigned RegNo) const {
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
index b9ff195e0ddc..6f2ccb137235 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
@@ -13,7 +13,7 @@
#include "R600MCTargetDesc.h"
#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
deleted file mode 100644
index f659f08de027..000000000000
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ /dev/null
@@ -1,594 +0,0 @@
-//===-- SIMCCodeEmitter.cpp - SI Code Emitter -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-/// \file
-/// The SI code emitter produces machine code that can be executed
-/// directly on the GPU device.
-//
-//===----------------------------------------------------------------------===//
-
-#include "MCTargetDesc/AMDGPUFixupKinds.h"
-#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
-#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/MC/MCCodeEmitter.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/SubtargetFeature.h"
-#include "llvm/Support/Casting.h"
-#include <optional>
-
-using namespace llvm;
-
-namespace {
-
-class SIMCCodeEmitter : public AMDGPUMCCodeEmitter {
- const MCRegisterInfo &MRI;
-
- /// Encode an fp or int literal
- std::optional<uint32_t> getLitEncoding(const MCOperand &MO,
- const MCOperandInfo &OpInfo,
- const MCSubtargetInfo &STI) const;
-
-public:
- SIMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
- : AMDGPUMCCodeEmitter(mcii), MRI(*ctx.getRegisterInfo()) {}
- SIMCCodeEmitter(const SIMCCodeEmitter &) = delete;
- SIMCCodeEmitter &operator=(const SIMCCodeEmitter &) = delete;
-
- /// Encode the instruction and write it to the OS.
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-
- void getMachineOpValue(const MCInst &MI, const MCOperand &MO, APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-
- /// Use a fixup to encode the simm16 field for SOPP branch
- /// instructions.
- void getSOPPBrEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-
- void getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-
- void getSDWASrcEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-
- void getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-
- void getAVOperandEncoding(const MCInst &MI, unsigned OpNo, APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
-
-private:
- uint64_t getImplicitOpSelHiEncoding(int Opcode) const;
- void getMachineOpValueCommon(const MCInst &MI, const MCOperand &MO,
- unsigned OpNo, APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-};
-
-} // end anonymous namespace
-
-MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
- MCContext &Ctx) {
- return new SIMCCodeEmitter(MCII, Ctx);
-}
-
-// Returns the encoding value to use if the given integer is an integer inline
-// immediate value, or 0 if it is not.
-template <typename IntTy>
-static uint32_t getIntInlineImmEncoding(IntTy Imm) {
- if (Imm >= 0 && Imm <= 64)
- return 128 + Imm;
-
- if (Imm >= -16 && Imm <= -1)
- return 192 + std::abs(Imm);
-
- return 0;
-}
-
-static uint32_t getLit16IntEncoding(uint16_t Val, const MCSubtargetInfo &STI) {
- uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
- return IntImm == 0 ? 255 : IntImm;
-}
-
-static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) {
- uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
- if (IntImm != 0)
- return IntImm;
-
- if (Val == 0x3800) // 0.5
- return 240;
-
- if (Val == 0xB800) // -0.5
- return 241;
-
- if (Val == 0x3C00) // 1.0
- return 242;
-
- if (Val == 0xBC00) // -1.0
- return 243;
-
- if (Val == 0x4000) // 2.0
- return 244;
-
- if (Val == 0xC000) // -2.0
- return 245;
-
- if (Val == 0x4400) // 4.0
- return 246;
-
- if (Val == 0xC400) // -4.0
- return 247;
-
- if (Val == 0x3118 && // 1.0 / (2.0 * pi)
- STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
- return 248;
-
- return 255;
-}
-
-static uint32_t getLit32Encoding(uint32_t Val, const MCSubtargetInfo &STI) {
- uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val));
- if (IntImm != 0)
- return IntImm;
-
- if (Val == FloatToBits(0.5f))
- return 240;
-
- if (Val == FloatToBits(-0.5f))
- return 241;
-
- if (Val == FloatToBits(1.0f))
- return 242;
-
- if (Val == FloatToBits(-1.0f))
- return 243;
-
- if (Val == FloatToBits(2.0f))
- return 244;
-
- if (Val == FloatToBits(-2.0f))
- return 245;
-
- if (Val == FloatToBits(4.0f))
- return 246;
-
- if (Val == FloatToBits(-4.0f))
- return 247;
-
- if (Val == 0x3e22f983 && // 1.0 / (2.0 * pi)
- STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
- return 248;
-
- return 255;
-}
-
-static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI) {
- uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val));
- if (IntImm != 0)
- return IntImm;
-
- if (Val == DoubleToBits(0.5))
- return 240;
-
- if (Val == DoubleToBits(-0.5))
- return 241;
-
- if (Val == DoubleToBits(1.0))
- return 242;
-
- if (Val == DoubleToBits(-1.0))
- return 243;
-
- if (Val == DoubleToBits(2.0))
- return 244;
-
- if (Val == DoubleToBits(-2.0))
- return 245;
-
- if (Val == DoubleToBits(4.0))
- return 246;
-
- if (Val == DoubleToBits(-4.0))
- return 247;
-
- if (Val == 0x3fc45f306dc9c882 && // 1.0 / (2.0 * pi)
- STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
- return 248;
-
- return 255;
-}
-
-std::optional<uint32_t>
-SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
- const MCOperandInfo &OpInfo,
- const MCSubtargetInfo &STI) const {
- int64_t Imm;
- if (MO.isExpr()) {
- const auto *C = dyn_cast<MCConstantExpr>(MO.getExpr());
- if (!C)
- return 255;
-
- Imm = C->getValue();
- } else {
-
- assert(!MO.isDFPImm());
-
- if (!MO.isImm())
- return {};
-
- Imm = MO.getImm();
- }
-
- switch (OpInfo.OperandType) {
- case AMDGPU::OPERAND_REG_IMM_INT32:
- case AMDGPU::OPERAND_REG_IMM_FP32:
- case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
- case AMDGPU::OPERAND_REG_INLINE_C_INT32:
- case AMDGPU::OPERAND_REG_INLINE_C_FP32:
- case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
- case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
- case AMDGPU::OPERAND_REG_IMM_V2INT32:
- case AMDGPU::OPERAND_REG_IMM_V2FP32:
- case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
- return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
-
- case AMDGPU::OPERAND_REG_IMM_INT64:
- case AMDGPU::OPERAND_REG_IMM_FP64:
- case AMDGPU::OPERAND_REG_INLINE_C_INT64:
- case AMDGPU::OPERAND_REG_INLINE_C_FP64:
- case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
- return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
-
- case AMDGPU::OPERAND_REG_IMM_INT16:
- case AMDGPU::OPERAND_REG_INLINE_C_INT16:
- case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
- return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
- case AMDGPU::OPERAND_REG_IMM_FP16:
- case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
- case AMDGPU::OPERAND_REG_INLINE_C_FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
- // FIXME Is this correct? What do inline immediates do on SI for f16 src
- // which does not have f16 support?
- return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
- case AMDGPU::OPERAND_REG_IMM_V2INT16:
- case AMDGPU::OPERAND_REG_IMM_V2FP16: {
- if (!isUInt<16>(Imm) && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal])
- return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
- if (OpInfo.OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
- return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
- [[fallthrough]];
- }
- case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
- return getLit16IntEncoding(static_cast<uint16_t>(Imm), STI);
- case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
- case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
- uint16_t Lo16 = static_cast<uint16_t>(Imm);
- uint32_t Encoding = getLit16Encoding(Lo16, STI);
- return Encoding;
- }
- case AMDGPU::OPERAND_KIMM32:
- case AMDGPU::OPERAND_KIMM16:
- return MO.getImm();
- default:
- llvm_unreachable("invalid operand size");
- }
-}
-
-uint64_t SIMCCodeEmitter::getImplicitOpSelHiEncoding(int Opcode) const {
- using namespace AMDGPU::VOP3PEncoding;
- using namespace AMDGPU::OpName;
-
- if (AMDGPU::hasNamedOperand(Opcode, op_sel_hi)) {
- if (AMDGPU::hasNamedOperand(Opcode, src2))
- return 0;
- if (AMDGPU::hasNamedOperand(Opcode, src1))
- return OP_SEL_HI_2;
- if (AMDGPU::hasNamedOperand(Opcode, src0))
- return OP_SEL_HI_1 | OP_SEL_HI_2;
- }
- return OP_SEL_HI_0 | OP_SEL_HI_1 | OP_SEL_HI_2;
-}
-
-static bool isVCMPX64(const MCInstrDesc &Desc) {
- return (Desc.TSFlags & SIInstrFlags::VOP3) &&
- Desc.hasImplicitDefOfPhysReg(AMDGPU::EXEC);
-}
-
-void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- int Opcode = MI.getOpcode();
- APInt Encoding, Scratch;
- getBinaryCodeForInstr(MI, Fixups, Encoding, Scratch, STI);
- const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
- unsigned bytes = Desc.getSize();
-
- // Set unused op_sel_hi bits to 1 for VOP3P and MAI instructions.
- // Note that accvgpr_read/write are MAI, have src0, but do not use op_sel.
- if ((Desc.TSFlags & SIInstrFlags::VOP3P) ||
- Opcode == AMDGPU::V_ACCVGPR_READ_B32_vi ||
- Opcode == AMDGPU::V_ACCVGPR_WRITE_B32_vi) {
- Encoding |= getImplicitOpSelHiEncoding(Opcode);
- }
-
- // GFX10+ v_cmpx opcodes promoted to VOP3 have implied dst=EXEC.
- // Documentation requires dst to be encoded as EXEC (0x7E),
- // but it looks like the actual value encoded for dst operand
- // is ignored by HW. It was decided to define dst as "do not care"
- // in td files to allow disassembler accept any dst value.
- // However, dst is encoded as EXEC for compatibility with SP3.
- if (AMDGPU::isGFX10Plus(STI) && isVCMPX64(Desc)) {
- assert((Encoding & 0xFF) == 0);
- Encoding |= MRI.getEncodingValue(AMDGPU::EXEC_LO);
- }
-
- for (unsigned i = 0; i < bytes; i++) {
- OS.write((uint8_t)Encoding.extractBitsAsZExtValue(8, 8 * i));
- }
-
- // NSA encoding.
- if (AMDGPU::isGFX10Plus(STI) && Desc.TSFlags & SIInstrFlags::MIMG) {
- int vaddr0 = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
- AMDGPU::OpName::vaddr0);
- int srsrc = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
- AMDGPU::OpName::srsrc);
- assert(vaddr0 >= 0 && srsrc > vaddr0);
- unsigned NumExtraAddrs = srsrc - vaddr0 - 1;
- unsigned NumPadding = (-NumExtraAddrs) & 3;
-
- for (unsigned i = 0; i < NumExtraAddrs; ++i) {
- getMachineOpValue(MI, MI.getOperand(vaddr0 + 1 + i), Encoding, Fixups,
- STI);
- OS.write((uint8_t)Encoding.getLimitedValue());
- }
- for (unsigned i = 0; i < NumPadding; ++i)
- OS.write(0);
- }
-
- if ((bytes > 8 && STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]) ||
- (bytes > 4 && !STI.getFeatureBits()[AMDGPU::FeatureVOP3Literal]))
- return;
-
- // Do not print literals from SISrc Operands for insts with mandatory literals
- if (AMDGPU::hasNamedOperand(MI.getOpcode(), AMDGPU::OpName::imm))
- return;
-
- // Check for additional literals
- for (unsigned i = 0, e = Desc.getNumOperands(); i < e; ++i) {
-
- // Check if this operand should be encoded as [SV]Src
- if (!AMDGPU::isSISrcOperand(Desc, i))
- continue;
-
- // Is this operand a literal immediate?
- const MCOperand &Op = MI.getOperand(i);
- auto Enc = getLitEncoding(Op, Desc.operands()[i], STI);
- if (!Enc || *Enc != 255)
- continue;
-
- // Yes! Encode it
- int64_t Imm = 0;
-
- if (Op.isImm())
- Imm = Op.getImm();
- else if (Op.isExpr()) {
- if (const auto *C = dyn_cast<MCConstantExpr>(Op.getExpr()))
- Imm = C->getValue();
-
- } else if (!Op.isExpr()) // Exprs will be replaced with a fixup value.
- llvm_unreachable("Must be immediate or expr");
-
- for (unsigned j = 0; j < 4; j++) {
- OS.write((uint8_t) ((Imm >> (8 * j)) & 0xff));
- }
-
- // Only one literal value allowed
- break;
- }
-}
-
-void SIMCCodeEmitter::getSOPPBrEncoding(const MCInst &MI, unsigned OpNo,
- APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpNo);
-
- if (MO.isExpr()) {
- const MCExpr *Expr = MO.getExpr();
- MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_sopp_br;
- Fixups.push_back(MCFixup::create(0, Expr, Kind, MI.getLoc()));
- Op = APInt::getNullValue(96);
- } else {
- getMachineOpValue(MI, MO, Op, Fixups, STI);
- }
-}
-
-void SIMCCodeEmitter::getSMEMOffsetEncoding(const MCInst &MI, unsigned OpNo,
- APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- auto Offset = MI.getOperand(OpNo).getImm();
- // VI only supports 20-bit unsigned offsets.
- assert(!AMDGPU::isVI(STI) || isUInt<20>(Offset));
- Op = Offset;
-}
-
-void SIMCCodeEmitter::getSDWASrcEncoding(const MCInst &MI, unsigned OpNo,
- APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- using namespace AMDGPU::SDWA;
-
- uint64_t RegEnc = 0;
-
- const MCOperand &MO = MI.getOperand(OpNo);
-
- if (MO.isReg()) {
- unsigned Reg = MO.getReg();
- RegEnc |= MRI.getEncodingValue(Reg);
- RegEnc &= SDWA9EncValues::SRC_VGPR_MASK;
- if (AMDGPU::isSGPR(AMDGPU::mc2PseudoReg(Reg), &MRI)) {
- RegEnc |= SDWA9EncValues::SRC_SGPR_MASK;
- }
- Op = RegEnc;
- return;
- } else {
- const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
- auto Enc = getLitEncoding(MO, Desc.operands()[OpNo], STI);
- if (Enc && *Enc != 255) {
- Op = *Enc | SDWA9EncValues::SRC_SGPR_MASK;
- return;
- }
- }
-
- llvm_unreachable("Unsupported operand kind");
-}
-
-void SIMCCodeEmitter::getSDWAVopcDstEncoding(const MCInst &MI, unsigned OpNo,
- APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- using namespace AMDGPU::SDWA;
-
- uint64_t RegEnc = 0;
-
- const MCOperand &MO = MI.getOperand(OpNo);
-
- unsigned Reg = MO.getReg();
- if (Reg != AMDGPU::VCC && Reg != AMDGPU::VCC_LO) {
- RegEnc |= MRI.getEncodingValue(Reg);
- RegEnc &= SDWA9EncValues::VOPC_DST_SGPR_MASK;
- RegEnc |= SDWA9EncValues::VOPC_DST_VCC_MASK;
- }
- Op = RegEnc;
-}
-
-void SIMCCodeEmitter::getAVOperandEncoding(const MCInst &MI, unsigned OpNo,
- APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- unsigned Reg = MI.getOperand(OpNo).getReg();
- uint64_t Enc = MRI.getEncodingValue(Reg);
-
- // VGPR and AGPR have the same encoding, but SrcA and SrcB operands of mfma
- // instructions use acc[0:1] modifier bits to distinguish. These bits are
- // encoded as a virtual 9th bit of the register for these operands.
- if (MRI.getRegClass(AMDGPU::AGPR_32RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_64RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_96RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_128RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_160RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_192RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_224RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_256RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_288RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_320RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_352RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_384RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AReg_512RegClassID).contains(Reg) ||
- MRI.getRegClass(AMDGPU::AGPR_LO16RegClassID).contains(Reg))
- Enc |= 512;
-
- Op = Enc;
-}
-
-static bool needsPCRel(const MCExpr *Expr) {
- switch (Expr->getKind()) {
- case MCExpr::SymbolRef: {
- auto *SE = cast<MCSymbolRefExpr>(Expr);
- MCSymbolRefExpr::VariantKind Kind = SE->getKind();
- return Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_LO &&
- Kind != MCSymbolRefExpr::VK_AMDGPU_ABS32_HI;
- }
- case MCExpr::Binary: {
- auto *BE = cast<MCBinaryExpr>(Expr);
- if (BE->getOpcode() == MCBinaryExpr::Sub)
- return false;
- return needsPCRel(BE->getLHS()) || needsPCRel(BE->getRHS());
- }
- case MCExpr::Unary:
- return needsPCRel(cast<MCUnaryExpr>(Expr)->getSubExpr());
- case MCExpr::Target:
- case MCExpr::Constant:
- return false;
- }
- llvm_unreachable("invalid kind");
-}
-
-void SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
- const MCOperand &MO, APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- if (MO.isReg()){
- Op = MRI.getEncodingValue(MO.getReg());
- return;
- }
- unsigned OpNo = &MO - MI.begin();
- getMachineOpValueCommon(MI, MO, OpNo, Op, Fixups, STI);
-}
-
-void SIMCCodeEmitter::getMachineOpValueCommon(
- const MCInst &MI, const MCOperand &MO, unsigned OpNo, APInt &Op,
- SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
-
- if (MO.isExpr() && MO.getExpr()->getKind() != MCExpr::Constant) {
- // FIXME: If this is expression is PCRel or not should not depend on what
- // the expression looks like. Given that this is just a general expression,
- // it should probably be FK_Data_4 and whatever is producing
- //
- // s_add_u32 s2, s2, (extern_const_addrspace+16
- //
- // And expecting a PCRel should instead produce
- //
- // .Ltmp1:
- // s_add_u32 s2, s2, (extern_const_addrspace+16)-.Ltmp1
- MCFixupKind Kind;
- if (needsPCRel(MO.getExpr()))
- Kind = FK_PCRel_4;
- else
- Kind = FK_Data_4;
-
- const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
- uint32_t Offset = Desc.getSize();
- assert(Offset == 4 || Offset == 8);
-
- Fixups.push_back(MCFixup::create(Offset, MO.getExpr(), Kind, MI.getLoc()));
- }
-
- const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
- if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
- if (auto Enc = getLitEncoding(MO, Desc.operands()[OpNo], STI)) {
- Op = *Enc;
- return;
- }
- } else if (MO.isImm()) {
- Op = MO.getImm();
- return;
- }
-
- llvm_unreachable("Encoding of this operand type is not supported yet.");
-}
-
-#include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index c295b7f79442..d924f733624a 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -48,6 +48,7 @@ class MIMGBaseOpcode : PredicateControl {
bit IsAtomicRet = 0;
bit MSAA = 0;
bit BVH = 0;
+ bit A16 = 0;
}
def MIMGBaseOpcode : GenericEnum {
@@ -59,7 +60,7 @@ def MIMGBaseOpcodesTable : GenericTable {
let CppTypeName = "MIMGBaseOpcodeInfo";
let Fields = ["BaseOpcode", "Store", "Atomic", "AtomicX2", "Sampler",
"Gather4", "NumExtraArgs", "Gradients", "G16", "Coordinates",
- "LodOrClampOrMip", "HasD16", "MSAA", "BVH"];
+ "LodOrClampOrMip", "HasD16", "MSAA", "BVH", "A16"];
string TypeOf_BaseOpcode = "MIMGBaseOpcode";
let PrimaryKey = ["BaseOpcode"];
@@ -206,7 +207,6 @@ class MIMG <dag outs, string dns = "">
: MIMG_Base <outs, dns> {
let hasPostISelHook = 1;
- let AsmMatchConverter = "cvtMIMG";
Instruction Opcode = !cast<Instruction>(NAME);
MIMGBaseOpcode BaseOpcode;
@@ -235,22 +235,41 @@ def getMIMGInfo : SearchIndex {
let Key = ["Opcode"];
}
-// This class used to use !foldl to memoize the AddrAsmNames list.
-// It turned out that that was much slower than using !filter.
+class NSAHelper {
+ dag AddrIns;
+ string AddrAsm;
+ int NSA;
+}
+
class MIMGNSAHelper<int num_addrs,
- list<RegisterClass> addr_types=!listsplat(VGPR_32, num_addrs)> {
- list<string> AddrAsmNames =
- !foreach(i, !filter(i, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11],
- !lt(i, num_addrs)), "vaddr" # i);
- dag AddrIns = !dag(ins, addr_types, AddrAsmNames);
- string AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]";
-
- int NSA = !if(!le(num_addrs, 1), ?,
+ list<RegisterClass> addr_types=!listsplat(VGPR_32, num_addrs)>
+ : NSAHelper<> {
+ list<string> AddrAsmNames = !foreach(i, !range(num_addrs), "vaddr" # i);
+ let AddrIns = !dag(ins, addr_types, AddrAsmNames);
+ let AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]";
+
+ let NSA = !if(!le(num_addrs, 1), ?,
!if(!le(num_addrs, 5), 1,
!if(!le(num_addrs, 9), 2,
!if(!le(num_addrs, 13), 3, ?))));
}
+class PartialNSAHelper<int num_addrs, int max_addr, RegisterClass LastAddrRC>
+ : NSAHelper<> {
+
+ list<RegisterClass> addr_types =
+ !if(!ge(num_addrs, max_addr),
+ !listconcat(!listsplat(VGPR_32, !sub(max_addr, 1)), [LastAddrRC]),
+ !listsplat(VGPR_32, num_addrs));
+
+ int VAddrCount = !if(!gt(num_addrs, max_addr), max_addr, num_addrs);
+ list<string> AddrAsmNames = !foreach(i, !range(VAddrCount), "vaddr" # i);
+
+ let AddrIns = !dag(ins, addr_types, AddrAsmNames);
+ let AddrAsm = "[$" # !interleave(AddrAsmNames, ", $") # "]";
+ let NSA = 1;
+}
+
// Base class of all pre-gfx10 MIMG instructions.
class MIMG_gfx6789<bits<8> op, dag outs, string dns = "">
: MIMG<outs, dns>, MIMGe_gfx6789<op> {
@@ -321,7 +340,8 @@ class MIMG_gfx11<int op, dag outs, string dns = "">
// Base class for all NSA MIMG instructions.
// Note that 1-dword addresses always use non-NSA variants.
class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="",
- list<RegisterClass> addr_types=[]>
+ list<RegisterClass> addr_types=[],
+ RegisterClass LastAddrRC = VGPR_32>
: MIMG<outs, dns>, MIMGe_gfx11<op> {
let SubtargetPredicate = isGFX11Plus;
let AssemblerPredicate = isGFX11Plus;
@@ -329,9 +349,9 @@ class MIMG_nsa_gfx11<int op, dag outs, int num_addrs, string dns="",
let MIMGEncoding = MIMGEncGfx11NSA;
let VAddrOperands = num_addrs;
- MIMGNSAHelper nsah = !if(!empty(addr_types),
- MIMGNSAHelper<num_addrs>,
- MIMGNSAHelper<num_addrs, addr_types>);
+ NSAHelper nsah = !if(!empty(addr_types),
+ PartialNSAHelper<num_addrs, 5, LastAddrRC>,
+ MIMGNSAHelper<num_addrs, addr_types>);
dag AddrIns = nsah.AddrIns;
string AddrAsm = nsah.AddrAsm;
@@ -672,7 +692,6 @@ class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterClass data_rc,
RegisterClass addr_rc, string dns="">
: MIMG_gfx6789 <op, (outs data_rc:$vdst), dns> {
let Constraints = "$vdst = $vdata";
- let AsmMatchConverter = "cvtMIMGAtomic";
let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
@@ -684,7 +703,6 @@ class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterClass data_rc,
RegisterClass addr_rc, string dns="">
: MIMG_gfx90a <op, (outs getLdStRegisterOperand<data_rc>.ret:$vdst), dns> {
let Constraints = "$vdst = $vdata";
- let AsmMatchConverter = "cvtMIMGAtomic";
let InOperandList = (ins getLdStRegisterOperand<data_rc>.ret:$vdata,
addr_rc:$vaddr, SReg_256:$srsrc,
@@ -720,7 +738,6 @@ class MIMG_Atomic_gfx10<mimgopc op, string opcode,
: MIMG_gfx10<!cast<int>(op.GFX10M), (outs DataRC:$vdst),
!if(enableDisasm, "AMDGPU", "")> {
let Constraints = "$vdst = $vdata";
- let AsmMatchConverter = "cvtMIMGAtomic";
let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
@@ -734,7 +751,6 @@ class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
: MIMG_nsa_gfx10<!cast<int>(op.GFX10M), (outs DataRC:$vdst), num_addrs,
!if(enableDisasm, "AMDGPU", "")> {
let Constraints = "$vdst = $vdata";
- let AsmMatchConverter = "cvtMIMGAtomic";
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
@@ -750,7 +766,6 @@ class MIMG_Atomic_gfx11<mimgopc op, string opcode,
: MIMG_gfx11<!cast<int>(op.GFX11), (outs DataRC:$vdst),
!if(enableDisasm, "AMDGPU", "")> {
let Constraints = "$vdst = $vdata";
- let AsmMatchConverter = "cvtMIMGAtomic";
let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
@@ -764,7 +779,6 @@ class MIMG_Atomic_nsa_gfx11<mimgopc op, string opcode,
: MIMG_nsa_gfx11<!cast<int>(op.GFX11), (outs DataRC:$vdst), num_addrs,
!if(enableDisasm, "AMDGPU", "")> {
let Constraints = "$vdst = $vdata";
- let AsmMatchConverter = "cvtMIMGAtomic";
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
@@ -934,8 +948,9 @@ class MIMG_Sampler_gfx11<mimgopc op, string opcode,
class MIMG_Sampler_nsa_gfx11<mimgopc op, string opcode,
RegisterClass DataRC, int num_addrs,
- string dns="">
- : MIMG_nsa_gfx11<op.GFX11, (outs DataRC:$vdata), num_addrs, dns> {
+ RegisterClass LastVAddrSize, string dns="">
+ : MIMG_nsa_gfx11<op.GFX11, (outs DataRC:$vdata), num_addrs, dns, [],
+ LastVAddrSize> {
let InOperandList = !con(AddrIns,
(ins SReg_256:$srsrc, SReg_128:$ssamp, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
@@ -946,29 +961,34 @@ class MIMG_Sampler_nsa_gfx11<mimgopc op, string opcode,
#!if(BaseOpcode.HasD16, "$d16", "");
}
-class MIMGAddrSize<int dw, bit enable_disasm> {
+class MIMGAddrSize<int dw, bit enable_disasm, int AddrDW = dw> {
int NumWords = dw;
- RegisterClass RegClass = !if(!le(NumWords, 0), ?,
- !if(!eq(NumWords, 1), VGPR_32,
- !if(!eq(NumWords, 2), VReg_64,
- !if(!eq(NumWords, 3), VReg_96,
- !if(!eq(NumWords, 4), VReg_128,
- !if(!eq(NumWords, 5), VReg_160,
- !if(!eq(NumWords, 6), VReg_192,
- !if(!eq(NumWords, 7), VReg_224,
- !if(!le(NumWords, 8), VReg_256,
- !if(!le(NumWords, 9), VReg_288,
- !if(!le(NumWords, 10), VReg_320,
- !if(!le(NumWords, 11), VReg_352,
- !if(!le(NumWords, 12), VReg_384,
- !if(!le(NumWords, 16), VReg_512, ?))))))))))))));
+ RegisterClass RegClass = !if(!le(AddrDW, 0), ?,
+ !if(!eq(AddrDW, 1), VGPR_32,
+ !if(!eq(AddrDW, 2), VReg_64,
+ !if(!eq(AddrDW, 3), VReg_96,
+ !if(!eq(AddrDW, 4), VReg_128,
+ !if(!eq(AddrDW, 5), VReg_160,
+ !if(!eq(AddrDW, 6), VReg_192,
+ !if(!eq(AddrDW, 7), VReg_224,
+ !if(!eq(AddrDW, 8), VReg_256,
+ !if(!eq(AddrDW, 9), VReg_288,
+ !if(!eq(AddrDW, 10), VReg_320,
+ !if(!eq(AddrDW, 11), VReg_352,
+ !if(!eq(AddrDW, 12), VReg_384,
+ !if(!le(AddrDW, 16), VReg_512, ?))))))))))))));
// Whether the instruction variant with this vaddr size should be enabled for
// the auto-generated disassembler.
bit Disassemble = enable_disasm;
}
+// Returns the MIMGAddrSize with the size of last VAddr for partial NSA
+class LastVAddrSize <int dw, int max_idx, bit enable_disasm>
+ : MIMGAddrSize<dw, enable_disasm,
+ !if(!gt(dw, max_idx), !sub(dw, max_idx), 0)>;
+
// Return whether x is in lst.
class isIntInList<int x, list<int> lst> {
bit ret = !foldl(0, lst, lhs, y, !or(lhs, !eq(x, y)));
@@ -985,7 +1005,8 @@ class MIMGAddrSizes_dw_range<list<int> range> {
int Max = !if(!empty(!tail(range)), Min, !head(!tail(range)));
}
-class MIMG_Sampler_AddrSizes<AMDGPUSampleVariant sample, bit isG16> {
+class MIMG_Sampler_AddrSizes<AMDGPUSampleVariant sample, bit isG16,
+ int nsa_max_addr = 5> {
// List of all possible numbers of address words, taking all combinations of
// A16 and image dimension into account (note: no MSAA, since this is for
// sample/gather ops).
@@ -1031,6 +1052,21 @@ class MIMG_Sampler_AddrSizes<AMDGPUSampleVariant sample, bit isG16> {
!if(isIntInList<dw, AllNumAddrWords>.ret,
!listconcat(lhs, [MIMGAddrSize<dw, !empty(lhs)>]),
lhs))));
+
+ // In NSA format if there is a requirement for more VGPRs than the format
+ // supports, then the rest are sequential after the last one. Generate
+ // machine instructions for all possible number of words. The disassembler
+ // defaults to the largest number of arguments but no larger than max nsa
+ // size. List is generated with the register class needed for last vaddr since
+ // it is the only one that could have a register other than VGPR32.
+ int EnableDisasmNum = !foldl(!head(AllNumAddrWords), !tail(AllNumAddrWords),
+ acc, var, !if(!le(var, nsa_max_addr), var, acc));
+ list<LastVAddrSize> PartialNSAInstrs =
+ !foldl([]<LastVAddrSize>, [12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2], lhs, dw,
+ !if(isIntInList<dw, AllNumAddrWords>.ret,
+ !listconcat(lhs, [LastVAddrSize<dw, !sub(nsa_max_addr, 1),
+ !eq(dw, EnableDisasmNum)>]),
+ lhs));
}
multiclass MIMG_Sampler_Src_Helper <mimgopc op, string asm,
@@ -1066,9 +1102,14 @@ multiclass MIMG_Sampler_Src_Helper <mimgopc op, string asm,
: MIMG_Sampler_nsa_gfx10<op, asm, dst_rc, addr.NumWords,
!if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
}
- if !and(op.HAS_GFX11, !le(addr.NumWords, 5)) then {
+ }
+ }
+
+ foreach addr = MIMG_Sampler_AddrSizes<sample, isG16, 5/*MaxNSASize*/>.PartialNSAInstrs in {
+ let VAddrDwords = addr.NumWords in {
+ if op.HAS_GFX11 then {
def _V # addr.NumWords # _nsa_gfx11
- : MIMG_Sampler_nsa_gfx11<op, asm, dst_rc, addr.NumWords,
+ : MIMG_Sampler_nsa_gfx11<op, asm, dst_rc, addr.NumWords, addr.RegClass,
!if(!and(enableDisasm, addr.Disassemble), "AMDGPU", "")>;
}
}
@@ -1144,51 +1185,43 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16> {
[node_ptr_type, VGPR_32, VReg_96, VReg_96, VReg_96]);
}
-class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC, bit IsA16>
+class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
: MIMG_gfx10<op.GFX10M, (outs VReg_128:$vdata), "AMDGPU"> {
-
- let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc),
- !if(IsA16, (ins A16:$a16), (ins)));
- let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(IsA16, "$a16", "");
+ let InOperandList = (ins AddrRC:$vaddr0, SReg_128:$srsrc, A16:$a16);
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16";
let nsa = 0;
}
-class MIMG_IntersectRay_nsa_gfx10<mimgopc op, string opcode, int num_addrs, bit IsA16>
+class MIMG_IntersectRay_nsa_gfx10<mimgopc op, string opcode, int num_addrs>
: MIMG_nsa_gfx10<op.GFX10M, (outs VReg_128:$vdata), num_addrs, "AMDGPU"> {
- let InOperandList = !con(nsah.AddrIns,
- (ins SReg_128:$srsrc),
- !if(IsA16, (ins A16:$a16), (ins)));
- let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(IsA16, "$a16", "");
+ let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc, A16:$a16));
+ let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16";
}
-class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterClass AddrRC, bit IsA16>
+class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterClass AddrRC>
: MIMG_gfx11<op.GFX11, (outs VReg_128:$vdata), "AMDGPU"> {
-
- let InOperandList = !con((ins AddrRC:$vaddr0, SReg_128:$srsrc),
- !if(IsA16, (ins A16:$a16), (ins)));
- let AsmString = opcode#" $vdata, $vaddr0, $srsrc"#!if(IsA16, "$a16", "");
+ let InOperandList = (ins AddrRC:$vaddr0, SReg_128:$srsrc, A16:$a16);
+ let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16";
let nsa = 0;
}
class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
- bit IsA16, list<RegisterClass> addr_types>
+ list<RegisterClass> addr_types>
: MIMG_nsa_gfx11<op.GFX11, (outs VReg_128:$vdata), num_addrs, "AMDGPU",
addr_types> {
- let InOperandList = !con(nsah.AddrIns,
- (ins SReg_128:$srsrc),
- !if(IsA16, (ins A16:$a16), (ins)));
- let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc"#!if(IsA16, "$a16", "");
+ let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc, A16:$a16));
+ let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16";
}
multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
defvar info = MIMG_IntersectRay_Helper<Is64, IsA16>;
def "" : MIMGBaseOpcode {
let BVH = 1;
+ let A16 = IsA16;
}
- let AsmMatchConverter = !if(IsA16, "cvtIntersectRay", ""),
- dmask = 0xf,
+ let dmask = 0xf,
unorm = 1,
d16 = 0,
cpol = 0,
@@ -1201,17 +1234,17 @@ multiclass MIMG_IntersectRay<mimgopc op, string opcode, bit Is64, bit IsA16> {
d16 = 0,
BaseOpcode = !cast<MIMGBaseOpcode>(NAME),
VDataDwords = 4 in {
- def _sa_gfx10 : MIMG_IntersectRay_gfx10<op, opcode, info.RegClass, IsA16> {
+ def _sa_gfx10 : MIMG_IntersectRay_gfx10<op, opcode, info.RegClass> {
let VAddrDwords = info.VAddrDwords;
}
- def _sa_gfx11 : MIMG_IntersectRay_gfx11<op, opcode, info.RegClass, IsA16> {
+ def _sa_gfx11 : MIMG_IntersectRay_gfx11<op, opcode, info.RegClass> {
let VAddrDwords = info.VAddrDwords;
}
- def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10<op, opcode, info.num_addrs, IsA16> {
+ def _nsa_gfx10 : MIMG_IntersectRay_nsa_gfx10<op, opcode, info.num_addrs> {
let VAddrDwords = info.num_addrs;
}
def _nsa_gfx11 : MIMG_IntersectRay_nsa_gfx11<op, opcode,
- info.gfx11_nsa_addrs, IsA16,
+ info.gfx11_nsa_addrs,
info.gfx11_addr_types> {
let VAddrDwords = info.num_addrs;
}
diff --git a/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
index 50a90dd03f38..20c2ff8a4fd7 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelDAGToDAG.cpp
@@ -20,7 +20,7 @@
namespace {
class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
- const R600Subtarget *Subtarget;
+ const R600Subtarget *Subtarget = nullptr;
bool isConstantLoad(const MemSDNode *N, int cbID) const;
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue &IntPtr);
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index fad393267a71..ad072cfe23b1 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -953,10 +953,8 @@ SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
unsigned SrcAS = ASC->getSrcAddressSpace();
unsigned DestAS = ASC->getDestAddressSpace();
- if (auto *ConstSrc = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
- if (SrcAS == AMDGPUAS::FLAT_ADDRESS && ConstSrc->isNullValue())
- return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT);
- }
+ if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)
+ return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT);
return Op;
}
@@ -1656,7 +1654,7 @@ SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
for (unsigned i = 0; i < 4; i++) {
unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
- if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
+ if (SwizzleRemap.contains(Idx))
Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
}
@@ -1664,7 +1662,7 @@ SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
for (unsigned i = 0; i < 4; i++) {
unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
- if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
+ if (SwizzleRemap.contains(Idx))
Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
}
@@ -2182,3 +2180,18 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
return Node;
}
+
+TargetLowering::AtomicExpansionKind
+R600TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
+ switch (RMW->getOperation()) {
+ case AtomicRMWInst::UIncWrap:
+ case AtomicRMWInst::UDecWrap:
+ // FIXME: Cayman at least appears to have instructions for this, but the
+ // instruction defintions appear to be missing.
+ return AtomicExpansionKind::CmpXChg;
+ default:
+ break;
+ }
+
+ return AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(RMW);
+}
diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.h b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
index 8a5479db4ee6..fc361c01bc67 100644
--- a/llvm/lib/Target/AMDGPU/R600ISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.h
@@ -114,6 +114,9 @@ private:
SelectionDAG &DAG) const;
SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const override;
+
+ TargetLowering::AtomicExpansionKind
+ shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override;
};
} // End namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 4056274cd440..7f874b245b8f 100644
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -328,7 +328,7 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI,
if (Reg == R600::OQAP) {
Result.push_back(std::pair(Index, 0U));
}
- if (PV.find(Reg) != PV.end()) {
+ if (PV.contains(Reg)) {
// 255 is used to tells its a PS/PV reg
Result.push_back(std::pair(255, 0U));
continue;
diff --git a/llvm/lib/Target/AMDGPU/R600Instructions.td b/llvm/lib/Target/AMDGPU/R600Instructions.td
index b53e9c258fd9..f4dfbe8adc75 100644
--- a/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -1090,7 +1090,7 @@ multiclass CUBE_Common <bits<11> inst> {
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 0
class EXP_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "EXP_IEEE", fexp2
+ inst, "EXP_IEEE", AMDGPUexp
> {
let Itinerary = TransALU;
}
@@ -1124,7 +1124,7 @@ class LOG_CLAMPED_Common <bits<11> inst> : R600_1OP <
>;
class LOG_IEEE_Common <bits<11> inst> : R600_1OP_Helper <
- inst, "LOG_IEEE", flog2
+ inst, "LOG_IEEE", AMDGPUlog
> {
let Itinerary = TransALU;
}
diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
index c01f9c4794c7..1a1be4a44285 100644
--- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.cpp
@@ -82,10 +82,10 @@ bool R600TTIImpl::isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
return isLegalToVectorizeMemChain(ChainSizeInBytes, Alignment, AddrSpace);
}
-unsigned R600TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned R600TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
// Disable unrolling if the loop is not vectorized.
// TODO: Enable this again.
- if (VF == 1)
+ if (VF.isScalar())
return 1;
return 8;
diff --git a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
index 8dacae0abb7b..2934b0151f4d 100644
--- a/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
+++ b/llvm/lib/Target/AMDGPU/R600TargetTransformInfo.h
@@ -57,7 +57,7 @@ public:
unsigned AddrSpace) const;
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
- unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getMaxInterleaveFactor(ElementCount VF);
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
using BaseT::getVectorInstrCost;
diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index f232bc9b3852..b87cd8c66cc8 100644
--- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -13,8 +13,8 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -36,7 +36,7 @@ using StackEntry = std::pair<BasicBlock *, Value *>;
using StackVector = SmallVector<StackEntry, 16>;
class SIAnnotateControlFlow : public FunctionPass {
- LegacyDivergenceAnalysis *DA;
+ UniformityInfo *UA;
Type *Boolean;
Type *Void;
@@ -99,7 +99,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LegacyDivergenceAnalysis>();
+ AU.addRequired<UniformityInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<TargetPassConfig>();
@@ -112,7 +112,7 @@ public:
INITIALIZE_PASS_BEGIN(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(SIAnnotateControlFlow, DEBUG_TYPE,
"Annotate SI Control Flow", false, false)
@@ -146,7 +146,7 @@ void SIAnnotateControlFlow::initialize(Module &M, const GCNSubtarget &ST) {
/// Is the branch condition uniform or did the StructurizeCFG pass
/// consider it as such?
bool SIAnnotateControlFlow::isUniform(BranchInst *T) {
- return DA->isUniform(T) ||
+ return UA->isUniform(T) ||
T->getMetadata("structurizecfg.uniform") != nullptr;
}
@@ -336,7 +336,7 @@ bool SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
bool SIAnnotateControlFlow::runOnFunction(Function &F) {
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- DA = &getAnalysis<LegacyDivergenceAnalysis>();
+ UA = &getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
const TargetMachine &TM = TPC.getTM<TargetMachine>();
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 97a583421a7e..cd1818285e3e 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -16,11 +16,36 @@ namespace llvm {
// This needs to be kept in sync with the field bits in SIRegisterClass.
enum SIRCFlags : uint8_t {
- // For vector registers.
- HasVGPR = 1 << 0,
- HasAGPR = 1 << 1,
- HasSGPR = 1 << 2
-}; // enum SIRCFlags
+ RegTupleAlignUnitsWidth = 2,
+ HasVGPRBit = RegTupleAlignUnitsWidth,
+ HasAGPRBit,
+ HasSGPRbit,
+
+ HasVGPR = 1 << HasVGPRBit,
+ HasAGPR = 1 << HasAGPRBit,
+ HasSGPR = 1 << HasSGPRbit,
+
+ RegTupleAlignUnitsMask = (1 << RegTupleAlignUnitsWidth) - 1,
+ RegKindMask = (HasVGPR | HasAGPR | HasSGPR)
+}; // enum SIRCFlagsr
+
+namespace SIEncodingFamily {
+// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
+// and the columns of the getMCOpcodeGen table.
+enum {
+ SI = 0,
+ VI = 1,
+ SDWA = 2,
+ SDWA9 = 3,
+ GFX80 = 4,
+ GFX9 = 5,
+ GFX10 = 6,
+ SDWA10 = 7,
+ GFX90A = 8,
+ GFX940 = 9,
+ GFX11 = 10,
+};
+}
namespace SIInstrFlags {
// This needs to be kept in sync with the field bits in InstSI.
@@ -133,6 +158,9 @@ enum : uint64_t {
// Whether tied sources will be read.
TiedSourceNotRead = UINT64_C(1) << 60,
+
+ // Is never uniform.
+ IsNeverUniform = UINT64_C(1) << 61,
};
// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
@@ -222,6 +250,7 @@ enum OperandType : unsigned {
// NEG and SEXT share same bit-mask because they can't be set simultaneously.
namespace SISrcMods {
enum : unsigned {
+ NONE = 0,
NEG = 1 << 0, // Floating-point negate modifier
ABS = 1 << 1, // Floating-point absolute modifier
SEXT = 1 << 0, // Integer sign-extend modifier
@@ -333,7 +362,7 @@ enum Id { // Message ID, width(4) [3:0].
ID_SAVEWAVE = 4, // added in GFX8, removed in GFX11
ID_STALL_WAVE_GEN = 5, // added in GFX9
ID_HALT_WAVES = 6, // added in GFX9
- ID_ORDERED_PS_DONE = 7, // added in GFX9
+ ID_ORDERED_PS_DONE = 7, // added in GFX9, removed in GFX11
ID_EARLY_PRIM_DEALLOC = 8, // added in GFX9, removed in GFX10
ID_GS_ALLOC_REQ = 9, // added in GFX9
ID_GET_DOORBELL = 10, // added in GFX9, removed in GFX11
@@ -401,19 +430,26 @@ enum Id { // HwRegCode, (6) [5:0]
ID_TBA_HI = 17,
ID_TMA_LO = 18,
ID_TMA_HI = 19,
- ID_XCC_ID = 20,
- ID_SQ_PERF_SNAPSHOT_DATA = 21,
- ID_SQ_PERF_SNAPSHOT_DATA1 = 22,
- ID_SQ_PERF_SNAPSHOT_PC_LO = 23,
- ID_SQ_PERF_SNAPSHOT_PC_HI = 24,
ID_FLAT_SCR_LO = 20,
ID_FLAT_SCR_HI = 21,
ID_XNACK_MASK = 22,
ID_HW_ID1 = 23,
ID_HW_ID2 = 24,
ID_POPS_PACKER = 25,
+ ID_PERF_SNAPSHOT_DATA = 27,
ID_SHADER_CYCLES = 29,
+ // Register numbers reused in GFX11+
+ ID_PERF_SNAPSHOT_PC_LO = 18,
+ ID_PERF_SNAPSHOT_PC_HI = 19,
+
+ // GFX940 specific registers
+ ID_XCC_ID = 20,
+ ID_SQ_PERF_SNAPSHOT_DATA = 21,
+ ID_SQ_PERF_SNAPSHOT_DATA1 = 22,
+ ID_SQ_PERF_SNAPSHOT_PC_LO = 23,
+ ID_SQ_PERF_SNAPSHOT_PC_HI = 24,
+
ID_SHIFT_ = 0,
ID_WIDTH_ = 6,
ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_)
@@ -909,6 +945,17 @@ enum Offset_COV5 : unsigned {
};
} // namespace ImplicitArg
+
+namespace VirtRegFlag {
+// Virtual register flags used for various target specific handlings during
+// codegen.
+enum Register_Flag : uint8_t {
+ // Register operand in a whole-wave mode operation.
+ WWM_REG = 1 << 0,
+};
+
+} // namespace VirtRegFlag
+
} // namespace AMDGPU
#define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index e5a028823e72..db323465c153 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -231,7 +231,7 @@ static bool tryChangeVGPRtoSGPRinCopy(MachineInstr &MI,
UseMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
return false;
- unsigned OpIdx = UseMI->getOperandNo(&MO);
+ unsigned OpIdx = MO.getOperandNo();
if (OpIdx >= UseMI->getDesc().getNumOperands() ||
!TII->isOperandLegal(*UseMI, OpIdx, &Src))
return false;
@@ -658,7 +658,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
TRI->getEquivalentSGPRClass(SrcRC);
Register NewDst = MRI->createVirtualRegister(DestRC);
MachineBasicBlock *BlockToInsertCopy =
- MI.isPHI() ? MI.getOperand(MI.getOperandNo(&MO) + 1).getMBB()
+ MI.isPHI() ? MI.getOperand(MO.getOperandNo() + 1).getMBB()
: MBB;
MachineBasicBlock::iterator PointToInsertCopy =
MI.isPHI() ? BlockToInsertCopy->getFirstInstrTerminator() : I;
@@ -869,7 +869,9 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
return true;
}
if (!SrcReg.isVirtual() || TRI->isAGPR(*MRI, SrcReg)) {
- TII->moveToVALU(MI, MDT);
+ SIInstrWorklist worklist;
+ worklist.insert(&MI);
+ TII->moveToVALU(worklist, MDT);
return true;
}
@@ -991,6 +993,10 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
LoweringWorklist.push_back(C.second.ID);
}
+ // Store all the V2S copy instructions that need to be moved to VALU
+ // in the Copies worklist.
+ SIInstrWorklist Copies;
+
while (!LoweringWorklist.empty()) {
unsigned CurID = LoweringWorklist.pop_back_val();
auto CurInfoIt = V2SCopies.find(CurID);
@@ -1013,10 +1019,13 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "V2S copy " << *C.Copy
<< " is being turned to VALU\n");
V2SCopies.erase(C.ID);
- TII->moveToVALU(*C.Copy, MDT);
+ Copies.insert(C.Copy);
}
}
+ TII->moveToVALU(Copies, MDT);
+ Copies.clear();
+
// Now do actual lowering
for (auto C : V2SCopies) {
MachineInstr *MI = C.second.Copy;
diff --git a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
index f7e3ea5fc072..08272a9ddfd3 100644
--- a/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixVGPRCopies.cpp
@@ -31,6 +31,11 @@ public:
initializeSIFixVGPRCopiesPass(*PassRegistry::getPassRegistry());
}
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override { return "SI Fix VGPR copies"; }
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 9c0c665a318c..9f1d6038f1b6 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -111,9 +111,11 @@ public:
std::pair<const MachineOperand *, int> isOMod(const MachineInstr &MI) const;
bool tryFoldOMod(MachineInstr &MI);
bool tryFoldRegSequence(MachineInstr &MI);
- bool tryFoldLCSSAPhi(MachineInstr &MI);
+ bool tryFoldPhiAGPR(MachineInstr &MI);
bool tryFoldLoad(MachineInstr &MI);
+ bool tryOptimizeAGPRPhis(MachineBasicBlock &MBB);
+
public:
SIFoldOperands() : MachineFunctionPass(ID) {
initializeSIFoldOperandsPass(*PassRegistry::getPassRegistry());
@@ -138,6 +140,16 @@ char SIFoldOperands::ID = 0;
char &llvm::SIFoldOperandsID = SIFoldOperands::ID;
+static const TargetRegisterClass *getRegOpRC(const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo &TRI,
+ const MachineOperand &MO) {
+ const TargetRegisterClass *RC = MRI.getRegClass(MO.getReg());
+ if (const TargetRegisterClass *SubRC =
+ TRI.getSubRegisterClass(RC, MO.getSubReg()))
+ RC = SubRC;
+ return RC;
+}
+
// Map multiply-accumulate opcode to corresponding multiply-add opcode if any.
static unsigned macToMad(unsigned Opc) {
switch (Opc) {
@@ -341,14 +353,17 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
// Check if changing this to a v_mad_{f16, f32} instruction will allow us
// to fold the operand.
MI->setDesc(TII->get(NewOpc));
- if (!AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel) &&
- AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel))
+ bool AddOpSel = !AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel) &&
+ AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel);
+ if (AddOpSel)
MI->addOperand(MachineOperand::CreateImm(0));
bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold);
if (FoldAsMAD) {
MI->untieRegOperand(OpNo);
return true;
}
+ if (AddOpSel)
+ MI->removeOperand(MI->getNumExplicitOperands() - 1);
MI->setDesc(TII->get(Opc));
}
@@ -893,11 +908,10 @@ void SIFoldOperands::foldOperand(
TRI->getRegClass(FoldDesc.operands()[0].RegClass);
// Split 64-bit constants into 32-bits for folding.
- if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
+ if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(*FoldRC) == 64) {
Register UseReg = UseOp.getReg();
const TargetRegisterClass *UseRC = MRI->getRegClass(UseReg);
-
- if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
+ if (AMDGPU::getRegBitWidth(*UseRC) != 64)
return;
APInt Imm(64, OpToFold.getImm());
@@ -1628,52 +1642,175 @@ bool SIFoldOperands::tryFoldRegSequence(MachineInstr &MI) {
return true;
}
-// Try to hoist an AGPR to VGPR copy out of the loop across a LCSSA PHI.
-// This should allow folding of an AGPR into a consumer which may support it.
-// I.e.:
-//
-// loop: // loop:
-// %1:vreg = COPY %0:areg // exit:
-// exit: => // %1:areg = PHI %0:areg, %loop
-// %2:vreg = PHI %1:vreg, %loop // %2:vreg = COPY %1:areg
-bool SIFoldOperands::tryFoldLCSSAPhi(MachineInstr &PHI) {
- assert(PHI.isPHI());
+/// Checks whether \p Copy is a AGPR -> VGPR copy. Returns `true` on success and
+/// stores the AGPR register in \p OutReg and the subreg in \p OutSubReg
+static bool isAGPRCopy(const SIRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI, const MachineInstr &Copy,
+ Register &OutReg, unsigned &OutSubReg) {
+ assert(Copy.isCopy());
- if (PHI.getNumExplicitOperands() != 3) // Single input LCSSA PHI
+ const MachineOperand &CopySrc = Copy.getOperand(1);
+ Register CopySrcReg = CopySrc.getReg();
+ if (!CopySrcReg.isVirtual())
return false;
- Register PhiIn = PHI.getOperand(1).getReg();
- Register PhiOut = PHI.getOperand(0).getReg();
- if (PHI.getOperand(1).getSubReg() ||
- !TRI->isVGPR(*MRI, PhiIn) || !TRI->isVGPR(*MRI, PhiOut))
+ // Common case: copy from AGPR directly, e.g.
+ // %1:vgpr_32 = COPY %0:agpr_32
+ if (TRI.isAGPR(MRI, CopySrcReg)) {
+ OutReg = CopySrcReg;
+ OutSubReg = CopySrc.getSubReg();
+ return true;
+ }
+
+ // Sometimes it can also involve two copies, e.g.
+ // %1:vgpr_256 = COPY %0:agpr_256
+ // %2:vgpr_32 = COPY %1:vgpr_256.sub0
+ const MachineInstr *CopySrcDef = MRI.getVRegDef(CopySrcReg);
+ if (!CopySrcDef || !CopySrcDef->isCopy())
return false;
- // A single use should not matter for correctness, but if it has another use
- // inside the loop we may perform copy twice in a worst case.
- if (!MRI->hasOneNonDBGUse(PhiIn))
+ const MachineOperand &OtherCopySrc = CopySrcDef->getOperand(1);
+ Register OtherCopySrcReg = OtherCopySrc.getReg();
+ if (!OtherCopySrcReg.isVirtual() ||
+ CopySrcDef->getOperand(0).getSubReg() != AMDGPU::NoSubRegister ||
+ OtherCopySrc.getSubReg() != AMDGPU::NoSubRegister ||
+ !TRI.isAGPR(MRI, OtherCopySrcReg))
return false;
- MachineInstr *Copy = MRI->getVRegDef(PhiIn);
- if (!Copy || !Copy->isCopy())
+ OutReg = OtherCopySrcReg;
+ OutSubReg = CopySrc.getSubReg();
+ return true;
+}
+
+// Try to hoist an AGPR to VGPR copy across a PHI.
+// This should allow folding of an AGPR into a consumer which may support it.
+//
+// Example 1: LCSSA PHI
+// loop:
+// %1:vreg = COPY %0:areg
+// exit:
+// %2:vreg = PHI %1:vreg, %loop
+// =>
+// loop:
+// exit:
+// %1:areg = PHI %0:areg, %loop
+// %2:vreg = COPY %1:areg
+//
+// Example 2: PHI with multiple incoming values:
+// entry:
+// %1:vreg = GLOBAL_LOAD(..)
+// loop:
+// %2:vreg = PHI %1:vreg, %entry, %5:vreg, %loop
+// %3:areg = COPY %2:vreg
+// %4:areg = (instr using %3:areg)
+// %5:vreg = COPY %4:areg
+// =>
+// entry:
+// %1:vreg = GLOBAL_LOAD(..)
+// %2:areg = COPY %1:vreg
+// loop:
+// %3:areg = PHI %2:areg, %entry, %X:areg,
+// %4:areg = (instr using %3:areg)
+bool SIFoldOperands::tryFoldPhiAGPR(MachineInstr &PHI) {
+ assert(PHI.isPHI());
+
+ Register PhiOut = PHI.getOperand(0).getReg();
+ if (!TRI->isVGPR(*MRI, PhiOut))
return false;
- Register CopyIn = Copy->getOperand(1).getReg();
- if (!TRI->isAGPR(*MRI, CopyIn) || Copy->getOperand(1).getSubReg())
+ // Iterate once over all incoming values of the PHI to check if this PHI is
+ // eligible, and determine the exact AGPR RC we'll target.
+ const TargetRegisterClass *ARC = nullptr;
+ for (unsigned K = 1; K < PHI.getNumExplicitOperands(); K += 2) {
+ MachineOperand &MO = PHI.getOperand(K);
+ MachineInstr *Copy = MRI->getVRegDef(MO.getReg());
+ if (!Copy || !Copy->isCopy())
+ continue;
+
+ Register AGPRSrc;
+ unsigned AGPRRegMask = AMDGPU::NoSubRegister;
+ if (!isAGPRCopy(*TRI, *MRI, *Copy, AGPRSrc, AGPRRegMask))
+ continue;
+
+ const TargetRegisterClass *CopyInRC = MRI->getRegClass(AGPRSrc);
+ if (const auto *SubRC = TRI->getSubRegisterClass(CopyInRC, AGPRRegMask))
+ CopyInRC = SubRC;
+
+ if (ARC && !ARC->hasSubClassEq(CopyInRC))
+ return false;
+ ARC = CopyInRC;
+ }
+
+ if (!ARC)
return false;
- const TargetRegisterClass *ARC = MRI->getRegClass(CopyIn);
+ bool IsAGPR32 = (ARC == &AMDGPU::AGPR_32RegClass);
+
+ // Rewrite the PHI's incoming values to ARC.
+ LLVM_DEBUG(dbgs() << "Folding AGPR copies into: " << PHI);
+ for (unsigned K = 1; K < PHI.getNumExplicitOperands(); K += 2) {
+ MachineOperand &MO = PHI.getOperand(K);
+ Register Reg = MO.getReg();
+
+ MachineBasicBlock::iterator InsertPt;
+ MachineBasicBlock *InsertMBB = nullptr;
+
+ // Look at the def of Reg, ignoring all copies.
+ unsigned CopyOpc = AMDGPU::COPY;
+ if (MachineInstr *Def = MRI->getVRegDef(Reg)) {
+
+ // Look at pre-existing COPY instructions from ARC: Steal the operand. If
+ // the copy was single-use, it will be removed by DCE later.
+ if (Def->isCopy()) {
+ Register AGPRSrc;
+ unsigned AGPRSubReg = AMDGPU::NoSubRegister;
+ if (isAGPRCopy(*TRI, *MRI, *Def, AGPRSrc, AGPRSubReg)) {
+ MO.setReg(AGPRSrc);
+ MO.setSubReg(AGPRSubReg);
+ continue;
+ }
+
+ // If this is a multi-use SGPR -> VGPR copy, use V_ACCVGPR_WRITE on
+ // GFX908 directly instead of a COPY. Otherwise, SIFoldOperand may try
+ // to fold the sgpr -> vgpr -> agpr copy into a sgpr -> agpr copy which
+ // is unlikely to be profitable.
+ //
+ // Note that V_ACCVGPR_WRITE is only used for AGPR_32.
+ MachineOperand &CopyIn = Def->getOperand(1);
+ if (IsAGPR32 && !ST->hasGFX90AInsts() && !MRI->hasOneNonDBGUse(Reg) &&
+ TRI->isSGPRReg(*MRI, CopyIn.getReg()))
+ CopyOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
+ }
+
+ InsertMBB = Def->getParent();
+ InsertPt = InsertMBB->SkipPHIsLabelsAndDebug(++Def->getIterator());
+ } else {
+ InsertMBB = PHI.getOperand(MO.getOperandNo() + 1).getMBB();
+ InsertPt = InsertMBB->getFirstTerminator();
+ }
+
+ Register NewReg = MRI->createVirtualRegister(ARC);
+ MachineInstr *MI = BuildMI(*InsertMBB, InsertPt, PHI.getDebugLoc(),
+ TII->get(CopyOpc), NewReg)
+ .addReg(Reg);
+ MO.setReg(NewReg);
+
+ (void)MI;
+ LLVM_DEBUG(dbgs() << " Created COPY: " << *MI);
+ }
+
+ // Replace the PHI's result with a new register.
Register NewReg = MRI->createVirtualRegister(ARC);
- PHI.getOperand(1).setReg(CopyIn);
PHI.getOperand(0).setReg(NewReg);
+ // COPY that new register back to the original PhiOut register. This COPY will
+ // usually be folded out later.
MachineBasicBlock *MBB = PHI.getParent();
- BuildMI(*MBB, MBB->getFirstNonPHI(), Copy->getDebugLoc(),
+ BuildMI(*MBB, MBB->getFirstNonPHI(), PHI.getDebugLoc(),
TII->get(AMDGPU::COPY), PhiOut)
- .addReg(NewReg, RegState::Kill);
- Copy->eraseFromParent(); // We know this copy had a single use.
-
- LLVM_DEBUG(dbgs() << "Folded " << PHI);
+ .addReg(NewReg);
+ LLVM_DEBUG(dbgs() << " Done: Folded " << PHI);
return true;
}
@@ -1733,6 +1870,101 @@ bool SIFoldOperands::tryFoldLoad(MachineInstr &MI) {
return true;
}
+// tryFoldPhiAGPR will aggressively try to create AGPR PHIs.
+// For GFX90A and later, this is pretty much always a good thing, but for GFX908
+// there's cases where it can create a lot more AGPR-AGPR copies, which are
+// expensive on this architecture due to the lack of V_ACCVGPR_MOV.
+//
+// This function looks at all AGPR PHIs in a basic block and collects their
+// operands. Then, it checks for register that are used more than once across
+// all PHIs and caches them in a VGPR. This prevents ExpandPostRAPseudo from
+// having to create one VGPR temporary per use, which can get very messy if
+// these PHIs come from a broken-up large PHI (e.g. 32 AGPR phis, one per vector
+// element).
+//
+// Example
+// a:
+// %in:agpr_256 = COPY %foo:vgpr_256
+// c:
+// %x:agpr_32 = ..
+// b:
+// %0:areg = PHI %in.sub0:agpr_32, %a, %x, %c
+// %1:areg = PHI %in.sub0:agpr_32, %a, %y, %c
+// %2:areg = PHI %in.sub0:agpr_32, %a, %z, %c
+// =>
+// a:
+// %in:agpr_256 = COPY %foo:vgpr_256
+// %tmp:vgpr_32 = V_ACCVGPR_READ_B32_e64 %in.sub0:agpr_32
+// %tmp_agpr:agpr_32 = COPY %tmp
+// c:
+// %x:agpr_32 = ..
+// b:
+// %0:areg = PHI %tmp_agpr, %a, %x, %c
+// %1:areg = PHI %tmp_agpr, %a, %y, %c
+// %2:areg = PHI %tmp_agpr, %a, %z, %c
+bool SIFoldOperands::tryOptimizeAGPRPhis(MachineBasicBlock &MBB) {
+ // This is only really needed on GFX908 where AGPR-AGPR copies are
+ // unreasonably difficult.
+ if (ST->hasGFX90AInsts())
+ return false;
+
+ // Look at all AGPR Phis and collect the register + subregister used.
+ DenseMap<std::pair<Register, unsigned>, std::vector<MachineOperand *>>
+ RegToMO;
+
+ for (auto &MI : MBB) {
+ if (!MI.isPHI())
+ break;
+
+ if (!TRI->isAGPR(*MRI, MI.getOperand(0).getReg()))
+ continue;
+
+ for (unsigned K = 1; K < MI.getNumOperands(); K += 2) {
+ MachineOperand &PhiMO = MI.getOperand(K);
+ RegToMO[{PhiMO.getReg(), PhiMO.getSubReg()}].push_back(&PhiMO);
+ }
+ }
+
+ // For all (Reg, SubReg) pair that are used more than once, cache the value in
+ // a VGPR.
+ bool Changed = false;
+ for (const auto &[Entry, MOs] : RegToMO) {
+ if (MOs.size() == 1)
+ continue;
+
+ const auto [Reg, SubReg] = Entry;
+ MachineInstr *Def = MRI->getVRegDef(Reg);
+ MachineBasicBlock *DefMBB = Def->getParent();
+
+ // Create a copy in a VGPR using V_ACCVGPR_READ_B32_e64 so it's not folded
+ // out.
+ const TargetRegisterClass *ARC = getRegOpRC(*MRI, *TRI, *MOs.front());
+ Register TempVGPR =
+ MRI->createVirtualRegister(TRI->getEquivalentVGPRClass(ARC));
+ MachineInstr *VGPRCopy =
+ BuildMI(*DefMBB, ++Def->getIterator(), Def->getDebugLoc(),
+ TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64), TempVGPR)
+ .addReg(Reg, /* flags */ 0, SubReg);
+
+ // Copy back to an AGPR and use that instead of the AGPR subreg in all MOs.
+ Register TempAGPR = MRI->createVirtualRegister(ARC);
+ BuildMI(*DefMBB, ++VGPRCopy->getIterator(), Def->getDebugLoc(),
+ TII->get(AMDGPU::COPY), TempAGPR)
+ .addReg(TempVGPR);
+
+ LLVM_DEBUG(dbgs() << "Caching AGPR into VGPR: " << *VGPRCopy);
+ for (MachineOperand *MO : MOs) {
+ MO->setReg(TempAGPR);
+ MO->setSubReg(AMDGPU::NoSubRegister);
+ LLVM_DEBUG(dbgs() << " Changed PHI Operand: " << *MO << "\n");
+ }
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -1766,7 +1998,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- if (MI.isPHI() && tryFoldLCSSAPhi(MI)) {
+ if (MI.isPHI() && tryFoldPhiAGPR(MI)) {
Changed = true;
continue;
}
@@ -1791,6 +2023,8 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
!tryFoldOMod(MI))
Changed |= tryFoldClamp(MI);
}
+
+ Changed |= tryOptimizeAGPRPhis(*MBB);
}
return Changed;
diff --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index a1eb8150595f..edcfd994033e 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -119,9 +119,7 @@ static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
// If this is a load instruction where the result has been coalesced with an operand, then we cannot clause it.
for (const MachineOperand &ResMO : MI.defs()) {
Register ResReg = ResMO.getReg();
- for (const MachineOperand &MO : MI.uses()) {
- if (!MO.isReg() || MO.isDef())
- continue;
+ for (const MachineOperand &MO : MI.all_uses()) {
if (MO.getReg() == ResReg)
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index c2bc95930272..865caae240f3 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -64,9 +64,12 @@ static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
return MCRegister();
}
+/// Query target location for spilling SGPRs
+/// \p IncludeScratchCopy : Also look for free scratch SGPRs
static void getVGPRSpillLaneOrTempRegister(
MachineFunction &MF, LivePhysRegs &LiveRegs, Register SGPR,
- const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass) {
+ const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
+ bool IncludeScratchCopy = true) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
@@ -77,9 +80,12 @@ static void getVGPRSpillLaneOrTempRegister(
// We need to save and restore the given SGPR.
+ Register ScratchSGPR;
// 1: Try to save the given register into an unused scratch SGPR. The LiveRegs
- // should have all the callee saved registers marked as used.
- Register ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC);
+ // should have all the callee saved registers marked as used. For certain
+ // cases we skip copy to scratch SGPR.
+ if (IncludeScratchCopy)
+ ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC);
if (!ScratchSGPR) {
int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
@@ -93,10 +99,10 @@ static void getVGPRSpillLaneOrTempRegister(
SGPR, PrologEpilogSGPRSaveRestoreInfo(
SGPRSaveKind::SPILL_TO_VGPR_LANE, FI));
- LLVM_DEBUG(
- auto Spill = MFI->getPrologEpilogSGPRSpillToVGPRLanes(FI).front();
- dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
- << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane << '\n';);
+ LLVM_DEBUG(auto Spill = MFI->getSGPRSpillToPhysicalVGPRLanes(FI).front();
+ dbgs() << printReg(SGPR, TRI) << " requires fallback spill to "
+ << printReg(Spill.VGPR, TRI) << ':' << Spill.Lane
+ << '\n';);
} else {
// Remove dead <FI> index
MF.getFrameInfo().RemoveStackObject(FI);
@@ -258,7 +264,7 @@ class PrologEpilogSGPRSpillBuilder {
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
ArrayRef<SIRegisterInfo::SpilledReg> Spill =
- FuncInfo->getPrologEpilogSGPRSpillToVGPRLanes(FI);
+ FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
assert(Spill.size() == NumSubRegs);
for (unsigned I = 0; I < NumSubRegs; ++I) {
@@ -303,7 +309,7 @@ class PrologEpilogSGPRSpillBuilder {
void restoreFromVGPRLane(const int FI) {
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
ArrayRef<SIRegisterInfo::SpilledReg> Spill =
- FuncInfo->getPrologEpilogSGPRSpillToVGPRLanes(FI);
+ FuncInfo->getSGPRSpillToPhysicalVGPRLanes(FI);
assert(Spill.size() == NumSubRegs);
for (unsigned I = 0; I < NumSubRegs; ++I) {
@@ -565,7 +571,7 @@ Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
// reserved input we needed. Also for PAL, make sure we don't clobber
// the GIT pointer passed in SGPR0 or SGPR8.
if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg) &&
- !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
+ (!GITPtrLoReg || !TRI->isSubRegisterEq(Reg, GITPtrLoReg))) {
MRI.replaceRegWith(ScratchRsrcReg, Reg);
MFI->setScratchRSrcReg(Reg);
return Reg;
@@ -935,8 +941,7 @@ void SIFrameLowering::emitCSRSpillStores(
if (!WWMCalleeSavedRegs.empty()) {
if (ScratchExecCopy) {
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1);
+ BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
} else {
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
/*IsProlog*/ true,
@@ -948,8 +953,7 @@ void SIFrameLowering::emitCSRSpillStores(
if (ScratchExecCopy) {
// FIXME: Split block and make terminator.
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
+ BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
.addReg(ScratchExecCopy, RegState::Kill);
LiveRegs.addReg(ScratchExecCopy);
}
@@ -1040,8 +1044,7 @@ void SIFrameLowering::emitCSRSpillRestores(
if (!WWMCalleeSavedRegs.empty()) {
if (ScratchExecCopy) {
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1);
+ BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
} else {
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
/*IsProlog*/ false,
@@ -1053,8 +1056,7 @@ void SIFrameLowering::emitCSRSpillRestores(
if (ScratchExecCopy) {
// FIXME: Split block and make terminator.
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
- MCRegister Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- BuildMI(MBB, MBBI, DL, TII->get(ExecMov), Exec)
+ BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
.addReg(ScratchExecCopy, RegState::Kill);
}
}
@@ -1350,8 +1352,9 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
TRI->isAGPR(MRI, VReg))) {
- // FIXME: change to enterBasicBlockEnd()
- RS->enterBasicBlock(MBB);
+ assert(RS != nullptr);
+ RS->enterBasicBlockEnd(MBB);
+ RS->backward(MI);
TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
SpillFIs.set(FI);
continue;
@@ -1436,20 +1439,36 @@ void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(
TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
if (UnusedLowVGPR && (TRI->getHWRegIndex(UnusedLowVGPR) <
TRI->getHWRegIndex(VGPRForAGPRCopy))) {
- // Call to setVGPRForAGPRCopy() should happen first before calling
- // freezeReservedRegs() so that getReservedRegs() can reserve this newly
- // identified VGPR (for AGPR copy).
+ // Reserve this newly identified VGPR (for AGPR copy)
+ // reserved registers should already be frozen at this point
+ // so we can avoid calling MRI.freezeReservedRegs and just use
+ // MRI.reserveReg
FuncInfo->setVGPRForAGPRCopy(UnusedLowVGPR);
- MRI.freezeReservedRegs(MF);
+ MRI.reserveReg(UnusedLowVGPR, TRI);
}
}
+ // We initally reserved the highest available SGPR pair for long branches
+ // now, after RA, we shift down to a lower unused one if one exists
+ Register LongBranchReservedReg = FuncInfo->getLongBranchReservedReg();
+ Register UnusedLowSGPR =
+ TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass, MF);
+ // If LongBranchReservedReg is null then we didn't find a long branch
+ // and never reserved a register to begin with so there is nothing to
+ // shift down. Then if UnusedLowSGPR is null, there isn't available lower
+ // register to use so just keep the original one we set.
+ if (LongBranchReservedReg && UnusedLowSGPR) {
+ FuncInfo->setLongBranchReservedReg(UnusedLowSGPR);
+ MRI.reserveReg(UnusedLowSGPR, TRI);
+ }
}
// The special SGPR spills like the one needed for FP, BP or any reserved
// registers delayed until frame lowering.
void SIFrameLowering::determinePrologEpilogSGPRSaves(
- MachineFunction &MF, BitVector &SavedVGPRs) const {
+ MachineFunction &MF, BitVector &SavedVGPRs,
+ bool NeedExecCopyReservedReg) const {
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -1461,6 +1480,26 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
for (unsigned I = 0; CSRegs[I]; ++I)
LiveRegs.addReg(CSRegs[I]);
+ const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();
+
+ if (NeedExecCopyReservedReg) {
+ Register ReservedReg = MFI->getSGPRForEXECCopy();
+ assert(ReservedReg && "Should have reserved an SGPR for EXEC copy.");
+ Register UnusedScratchReg = findUnusedRegister(MRI, LiveRegs, RC);
+ if (UnusedScratchReg) {
+ // If found any unused scratch SGPR, reserve the register itself for Exec
+ // copy and there is no need for any spills in that case.
+ MFI->setSGPRForEXECCopy(UnusedScratchReg);
+ LiveRegs.addReg(UnusedScratchReg);
+ } else {
+ // Needs spill.
+ assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedReg) &&
+ "Re-reserving spill slot for EXEC copy register");
+ getVGPRSpillLaneOrTempRegister(MF, LiveRegs, ReservedReg, RC,
+ /*IncludeScratchCopy=*/false);
+ }
+ }
+
// hasFP only knows about stack objects that already exist. We're now
// determining the stack slots that will be created, so we have to predict
// them. Stack objects force FP usage with calls.
@@ -1499,7 +1538,10 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ bool NeedExecCopyReservedReg = false;
+ MachineInstr *ReturnMI = nullptr;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
// WRITELANE instructions used for SGPR spills can overwrite the inactive
@@ -1516,6 +1558,25 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
else if (MI.getOpcode() == AMDGPU::V_READLANE_B32)
MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
+ else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
+ NeedExecCopyReservedReg = true;
+ else if (MI.getOpcode() == AMDGPU::SI_RETURN ||
+ MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
+ // We expect all return to be the same size.
+ assert(!ReturnMI ||
+ (count_if(MI.operands(), [](auto Op) { return Op.isReg(); }) ==
+ count_if(ReturnMI->operands(), [](auto Op) { return Op.isReg(); })));
+ ReturnMI = &MI;
+ }
+ }
+ }
+
+ // Remove any VGPRs used in the return value because these do not need to be saved.
+ // This prevents CSR restore from clobbering return VGPRs.
+ if (ReturnMI) {
+ for (auto &Op : ReturnMI->operands()) {
+ if (Op.isReg())
+ SavedVGPRs.reset(Op.getReg());
}
}
@@ -1528,7 +1589,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (!ST.hasGFX90AInsts())
SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());
- determinePrologEpilogSGPRSaves(MF, SavedVGPRs);
+ determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
// The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
// allow the default insertion to handle them.
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index def07dc4b1f7..0060fc0be431 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -34,8 +34,8 @@ public:
RegScavenger *RS = nullptr) const override;
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const;
- void determinePrologEpilogSGPRSaves(MachineFunction &MF,
- BitVector &SavedRegs) const;
+ void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs,
+ bool NeedExecCopyReservedReg) const;
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
LivePhysRegs &LiveRegs, Register FrameReg,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e0ad11d5af24..3148f49ff0d5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -15,14 +15,17 @@
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUTargetMachine.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/ByteProvider.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
@@ -35,8 +38,9 @@
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ModRef.h"
#include "llvm/Support/KnownBits.h"
+#include "llvm/Support/ModRef.h"
+#include <optional>
using namespace llvm;
@@ -55,14 +59,14 @@ static cl::opt<bool> UseDivergentRegisterIndexing(
cl::desc("Use indirect register addressing for divergent indexes"),
cl::init(false));
-static bool hasFP32Denormals(const MachineFunction &MF) {
+static bool denormalModeIsFlushAllF32(const MachineFunction &MF) {
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- return Info->getMode().allFP32Denormals();
+ return Info->getMode().FP32Denormals == DenormalMode::getPreserveSign();
}
-static bool hasFP64FP16Denormals(const MachineFunction &MF) {
+static bool denormalModeIsFlushAllF64F16(const MachineFunction &MF) {
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
- return Info->getMode().allFP64FP16Denormals();
+ return Info->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign();
}
static unsigned findFirstFreeSGPR(CCState &CCInfo) {
@@ -215,6 +219,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::f64, Promote);
AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
+ setOperationAction(ISD::FSQRT, MVT::f64, Custom);
+
setOperationAction(ISD::SELECT_CC,
{MVT::f32, MVT::i32, MVT::i64, MVT::f64, MVT::i1}, Expand);
@@ -244,13 +250,13 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction({ISD::UADDO, ISD::USUBO}, MVT::i32, Legal);
- setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY}, MVT::i32, Legal);
+ setOperationAction({ISD::UADDO_CARRY, ISD::USUBO_CARRY}, MVT::i32, Legal);
setOperationAction({ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS}, MVT::i64,
Expand);
#if 0
- setOperationAction({ISD::ADDCARRY, ISD::SUBCARRY}, MVT::i64, Legal);
+ setOperationAction({ISD::UADDO_CARRY, ISD::USUBO_CARRY}, MVT::i64, Legal);
#endif
// We only support LOAD/STORE and vector manipulation ops for vectors
@@ -470,6 +476,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
MVT::f64, Custom);
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction({ISD::FLDEXP, ISD::STRICT_FLDEXP}, {MVT::f32, MVT::f64},
+ Legal);
+ setOperationAction(ISD::FFREXP, {MVT::f32, MVT::f64}, Custom);
setOperationAction({ISD::FSIN, ISD::FCOS, ISD::FDIV}, MVT::f32, Custom);
setOperationAction(ISD::FDIV, MVT::f64, Custom);
@@ -514,9 +523,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
AddPromotedToType(ISD::STORE, MVT::f16, MVT::i16);
// F16 - VOP1 Actions.
- setOperationAction(
- {ISD::FP_ROUND, ISD::FCOS, ISD::FSIN, ISD::FROUND, ISD::FPTRUNC_ROUND},
- MVT::f16, Custom);
+ setOperationAction({ISD::FP_ROUND, ISD::STRICT_FP_ROUND, ISD::FCOS,
+ ISD::FSIN, ISD::FROUND, ISD::FPTRUNC_ROUND},
+ MVT::f16, Custom);
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, MVT::i16, Custom);
@@ -526,7 +535,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
// F16 - VOP2 Actions.
setOperationAction({ISD::BR_CC, ISD::SELECT_CC}, MVT::f16, Expand);
-
+ setOperationAction({ISD::FLDEXP, ISD::STRICT_FLDEXP}, MVT::f16, Custom);
+ setOperationAction(ISD::FFREXP, MVT::f16, Custom);
setOperationAction(ISD::FDIV, MVT::f16, Custom);
// F16 - VOP3 Actions.
@@ -728,25 +738,25 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INTRINSIC_WO_CHAIN,
{MVT::Other, MVT::f32, MVT::v4f32, MVT::i16, MVT::f16,
- MVT::v2i16, MVT::v2f16},
+ MVT::v2i16, MVT::v2f16, MVT::i128},
Custom);
setOperationAction(ISD::INTRINSIC_W_CHAIN,
{MVT::v2f16, MVT::v2i16, MVT::v3f16, MVT::v3i16,
MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::Other, MVT::f16,
- MVT::i16, MVT::i8},
+ MVT::i16, MVT::i8, MVT::i128},
Custom);
setOperationAction(ISD::INTRINSIC_VOID,
{MVT::Other, MVT::v2i16, MVT::v2f16, MVT::v3i16,
MVT::v3f16, MVT::v4f16, MVT::v4i16, MVT::f16, MVT::i16,
- MVT::i8},
+ MVT::i8, MVT::i128},
Custom);
setTargetDAGCombine({ISD::ADD,
- ISD::ADDCARRY,
+ ISD::UADDO_CARRY,
ISD::SUB,
- ISD::SUBCARRY,
+ ISD::USUBO_CARRY,
ISD::FADD,
ISD::FSUB,
ISD::FMINNUM,
@@ -769,7 +779,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
ISD::ZERO_EXTEND,
ISD::SIGN_EXTEND_INREG,
ISD::EXTRACT_VECTOR_ELT,
- ISD::INSERT_VECTOR_ELT});
+ ISD::INSERT_VECTOR_ELT,
+ ISD::FCOPYSIGN});
+
+ if (Subtarget->has16BitInsts() && !Subtarget->hasMed3_16())
+ setTargetDAGCombine(ISD::FP_ROUND);
// All memory operations. Some folding on the pointer operand is done to help
// matching the constant offsets in the addressing modes.
@@ -791,6 +805,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
ISD::ATOMIC_LOAD_UMIN,
ISD::ATOMIC_LOAD_UMAX,
ISD::ATOMIC_LOAD_FADD,
+ ISD::ATOMIC_LOAD_UINC_WRAP,
+ ISD::ATOMIC_LOAD_UDEC_WRAP,
ISD::INTRINSIC_VOID,
ISD::INTRINSIC_W_CHAIN});
@@ -816,10 +832,10 @@ bool SITargetLowering::isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
EVT DestVT, EVT SrcVT) const {
return ((Opcode == ISD::FMAD && Subtarget->hasMadMixInsts()) ||
(Opcode == ISD::FMA && Subtarget->hasFmaMixInsts())) &&
- DestVT.getScalarType() == MVT::f32 &&
- SrcVT.getScalarType() == MVT::f16 &&
- // TODO: This probably only requires no input flushing?
- !hasFP32Denormals(DAG.getMachineFunction());
+ DestVT.getScalarType() == MVT::f32 &&
+ SrcVT.getScalarType() == MVT::f16 &&
+ // TODO: This probably only requires no input flushing?
+ denormalModeIsFlushAllF32(DAG.getMachineFunction());
}
bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
@@ -829,7 +845,7 @@ bool SITargetLowering::isFPExtFoldable(const MachineInstr &MI, unsigned Opcode,
DestTy.getScalarSizeInBits() == 32 &&
SrcTy.getScalarSizeInBits() == 16 &&
// TODO: This probably only requires no input flushing?
- !hasFP32Denormals(*MI.getMF());
+ denormalModeIsFlushAllF32(*MI.getMF());
}
bool SITargetLowering::isShuffleMaskLegal(ArrayRef<int>, EVT) const {
@@ -976,6 +992,26 @@ static EVT memVTFromLoadIntrReturn(Type *Ty, unsigned MaxNumLanes) {
return memVTFromLoadIntrData(ST->getContainedType(0), MaxNumLanes);
}
+/// Map address space 7 to MVT::v5i32 because that's its in-memory
+/// representation. This return value is vector-typed because there is no
+/// MVT::i160 and it is not clear if one can be added. While this could
+/// cause issues during codegen, these address space 7 pointers will be
+/// rewritten away by then. Therefore, we can return MVT::v5i32 in order
+/// to allow pre-codegen passes that query TargetTransformInfo, often for cost
+/// modeling, to work.
+MVT SITargetLowering::getPointerTy(const DataLayout &DL, unsigned AS) const {
+ if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160)
+ return MVT::v5i32;
+ return AMDGPUTargetLowering::getPointerTy(DL, AS);
+}
+/// Similarly, the in-memory representation of a p7 is {p8, i32}, aka
+/// v8i32 when padding is added.
+MVT SITargetLowering::getPointerMemTy(const DataLayout &DL, unsigned AS) const {
+ if (AMDGPUAS::BUFFER_FAT_POINTER == AS && DL.getPointerSizeInBits(AS) == 160)
+ return MVT::v8i32;
+ return AMDGPUTargetLowering::getPointerMemTy(DL, AS);
+}
+
bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &CI,
MachineFunction &MF,
@@ -993,11 +1029,22 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return false;
// TODO: Should images get their own address space?
- Info.fallbackAddressSpace = AMDGPUAS::BUFFER_FAT_POINTER;
+ Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
if (RsrcIntr->IsImage)
Info.align.reset();
+ Value *RsrcArg = CI.getArgOperand(RsrcIntr->RsrcArg);
+ if (auto *RsrcPtrTy = dyn_cast<PointerType>(RsrcArg->getType())) {
+ if (RsrcPtrTy->getAddressSpace() == AMDGPUAS::BUFFER_RESOURCE)
+ // We conservatively set the memory operand of a buffer intrinsic to the
+ // base resource pointer, so that we can access alias information about
+ // those pointers. Cases like "this points at the same value
+ // but with a different offset" are handled in
+ // areMemAccessesTriviallyDisjoint.
+ Info.ptrVal = RsrcArg;
+ }
+
Info.flags |= MachineMemOperand::MODereferenceable;
if (ME.onlyReadsMemory()) {
unsigned MaxNumLanes = 4;
@@ -1050,7 +1097,9 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
default:
break;
case Intrinsic::amdgcn_raw_buffer_load_lds:
- case Intrinsic::amdgcn_struct_buffer_load_lds: {
+ case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
+ case Intrinsic::amdgcn_struct_buffer_load_lds:
+ case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
unsigned Width = cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue();
Info.memVT = EVT::getIntegerVT(CI.getContext(), Width * 8);
return true;
@@ -1061,8 +1110,6 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
}
switch (IntrID) {
- case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_fadd:
@@ -1083,7 +1130,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::amdgcn_buffer_atomic_fadd: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
- Info.fallbackAddressSpace = AMDGPUAS::BUFFER_FAT_POINTER;
+ Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
Info.align.reset();
Info.flags |= MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
@@ -1093,6 +1140,15 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
return true;
}
+ case Intrinsic::amdgcn_ds_add_gs_reg_rtn:
+ case Intrinsic::amdgcn_ds_sub_gs_reg_rtn: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(CI.getOperand(0)->getType());
+ Info.ptrVal = nullptr;
+ Info.fallbackAddressSpace = AMDGPUAS::STREAMOUT_REGISTER;
+ Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ return true;
+ }
case Intrinsic::amdgcn_ds_append:
case Intrinsic::amdgcn_ds_consume: {
Info.opc = ISD::INTRINSIC_W_CHAIN;
@@ -1121,7 +1177,7 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::getVT(CI.getType()); // XXX: what is correct VT?
- Info.fallbackAddressSpace = AMDGPUAS::BUFFER_FAT_POINTER;
+ Info.fallbackAddressSpace = AMDGPUAS::BUFFER_RESOURCE;
Info.align.reset();
Info.flags |= MachineMemOperand::MOLoad |
MachineMemOperand::MODereferenceable;
@@ -1204,8 +1260,6 @@ bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
SmallVectorImpl<Value*> &Ops,
Type *&AccessTy) const {
switch (II->getIntrinsicID()) {
- case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_ordered_add:
case Intrinsic::amdgcn_ds_ordered_swap:
case Intrinsic::amdgcn_ds_append:
@@ -1313,7 +1367,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AS == AMDGPUAS::CONSTANT_ADDRESS ||
AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
- AS == AMDGPUAS::BUFFER_FAT_POINTER) {
+ AS == AMDGPUAS::BUFFER_FAT_POINTER || AS == AMDGPUAS::BUFFER_RESOURCE) {
// If the offset isn't a multiple of 4, it probably isn't going to be
// correctly aligned.
// FIXME: Can we get the real alignment here?
@@ -1336,12 +1390,16 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
// in 8-bits, it can use a smaller encoding.
if (!isUInt<32>(AM.BaseOffs / 4))
return false;
- } else if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ } else if (Subtarget->getGeneration() < AMDGPUSubtarget::GFX9) {
// On VI, these use the SMEM format and the offset is 20-bit in bytes.
if (!isUInt<20>(AM.BaseOffs))
return false;
- } else
- llvm_unreachable("unhandled generation");
+ } else {
+ // On GFX9 the offset is signed 21-bit in bytes (but must not be negative
+ // for S_BUFFER_* instructions).
+ if (!isInt<21>(AM.BaseOffs))
+ return false;
+ }
if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
return true;
@@ -1350,11 +1408,12 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return true;
return false;
+ }
- } else if (AS == AMDGPUAS::PRIVATE_ADDRESS) {
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS)
return isLegalMUBUFAddressingMode(AM);
- } else if (AS == AMDGPUAS::LOCAL_ADDRESS ||
- AS == AMDGPUAS::REGION_ADDRESS) {
+
+ if (AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS) {
// Basic, single offset DS instructions allow a 16-bit unsigned immediate
// field.
// XXX - If doing a 4-byte aligned 8-byte type access, we effectively have
@@ -1369,8 +1428,9 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
return true;
return false;
- } else if (AS == AMDGPUAS::FLAT_ADDRESS ||
- AS == AMDGPUAS::UNKNOWN_ADDRESS_SPACE) {
+ }
+
+ if (AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::UNKNOWN_ADDRESS_SPACE) {
// For an unknown address space, this usually means that this is for some
// reason being used for pure arithmetic, and not based on some addressing
// computation. We don't have instructions that compute pointers with any
@@ -1544,18 +1604,14 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
return AlignedBy4;
}
- if (Subtarget->hasUnalignedBufferAccessEnabled()) {
- // If we have a uniform constant load, it still requires using a slow
- // buffer instruction if unaligned.
- if (IsFast) {
- // Accesses can really be issued as 1-byte aligned or 4-byte aligned, so
- // 2-byte alignment is worse than 1 unless doing a 2-byte access.
- *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS ||
- AddrSpace == AMDGPUAS::CONSTANT_ADDRESS_32BIT) ?
- Alignment >= Align(4) : Alignment != Align(2);
- }
+ // So long as they are correct, wide global memory operations perform better
+ // than multiple smaller memory ops -- even when misaligned
+ if (AMDGPU::isExtendedGlobalAddrSpace(AddrSpace)) {
+ if (IsFast)
+ *IsFast = Size;
- return true;
+ return Alignment >= Align(4) ||
+ Subtarget->hasUnalignedBufferAccessEnabled();
}
// Smaller than dword value must be aligned.
@@ -1864,7 +1920,7 @@ SDValue SITargetLowering::getPreloadedValue(SelectionDAG &DAG,
return DAG.getUNDEF(VT);
}
- return CreateLiveInRegister(DAG, RC, Reg->getRegister(), VT);
+ return loadInputValue(DAG, RC, VT, SDLoc(DAG.getEntryNode()), *Reg);
}
static void processPSInputArgs(SmallVectorImpl<ISD::InputArg> &Splits,
@@ -2082,7 +2138,9 @@ void SITargetLowering::allocateSpecialInputSGPRs(
if (Info.hasDispatchPtr())
allocateSGPR64Input(CCInfo, ArgInfo.DispatchPtr);
- if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5)
+ const Module *M = MF.getFunction().getParent();
+ if (Info.hasQueuePtr() &&
+ AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5)
allocateSGPR64Input(CCInfo, ArgInfo.QueuePtr);
// Implicit arg ptr takes the place of the kernarg segment pointer. This is a
@@ -2132,7 +2190,9 @@ void SITargetLowering::allocateHSAUserSGPRs(CCState &CCInfo,
CCInfo.AllocateReg(DispatchPtrReg);
}
- if (Info.hasQueuePtr() && AMDGPU::getAmdhsaCodeObjectVersion() < 5) {
+ const Module *M = MF.getFunction().getParent();
+ if (Info.hasQueuePtr() &&
+ AMDGPU::getCodeObjectVersion(*M) < AMDGPU::AMDHSA_COV5) {
Register QueuePtrReg = Info.addQueuePtr(TRI);
MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
CCInfo.AllocateReg(QueuePtrReg);
@@ -2175,11 +2235,16 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
SIMachineFunctionInfo &Info,
CallingConv::ID CallConv,
bool IsShader) const {
+ bool HasArchitectedSGPRs = Subtarget->hasArchitectedSGPRs();
if (Subtarget->hasUserSGPRInit16Bug() && !IsShader) {
// Note: user SGPRs are handled by the front-end for graphics shaders
// Pad up the used user SGPRs with dead inputs.
- unsigned CurrentUserSGPRs = Info.getNumUserSGPRs();
+ // TODO: NumRequiredSystemSGPRs computation should be adjusted appropriately
+ // before enabling architected SGPRs for workgroup IDs.
+ assert(!HasArchitectedSGPRs && "Unhandled feature for the subtarget");
+
+ unsigned CurrentUserSGPRs = Info.getNumUserSGPRs();
// Note we do not count the PrivateSegmentWaveByteOffset. We do not want to
// rely on it to reach 16 since if we end up having no stack usage, it will
// not really be added.
@@ -2195,20 +2260,26 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo,
}
if (Info.hasWorkGroupIDX()) {
- Register Reg = Info.addWorkGroupIDX();
- MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+ Register Reg = Info.addWorkGroupIDX(HasArchitectedSGPRs);
+ if (!HasArchitectedSGPRs)
+ MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+
CCInfo.AllocateReg(Reg);
}
if (Info.hasWorkGroupIDY()) {
- Register Reg = Info.addWorkGroupIDY();
- MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+ Register Reg = Info.addWorkGroupIDY(HasArchitectedSGPRs);
+ if (!HasArchitectedSGPRs)
+ MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+
CCInfo.AllocateReg(Reg);
}
if (Info.hasWorkGroupIDZ()) {
- Register Reg = Info.addWorkGroupIDZ();
- MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+ Register Reg = Info.addWorkGroupIDZ(HasArchitectedSGPRs);
+ if (!HasArchitectedSGPRs)
+ MF.addLiveIn(Reg, &AMDGPU::SGPR_32RegClass);
+
CCInfo.AllocateReg(Reg);
}
@@ -2395,8 +2466,6 @@ SDValue SITargetLowering::LowerFormalArguments(
return DAG.getEntryNode();
}
- Info->allocateKnownAddressLDSGlobal(Fn);
-
SmallVector<ISD::InputArg, 16> Splits;
SmallVector<CCValAssign, 16> ArgLocs;
BitVector Skipped(Ins.size());
@@ -2409,11 +2478,14 @@ SDValue SITargetLowering::LowerFormalArguments(
if (IsGraphics) {
assert(!Info->hasDispatchPtr() && !Info->hasKernargSegmentPtr() &&
- (!Info->hasFlatScratchInit() || Subtarget->enableFlatScratch()) &&
- !Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
- !Info->hasWorkGroupIDZ() && !Info->hasWorkGroupInfo() &&
- !Info->hasLDSKernelId() && !Info->hasWorkItemIDX() &&
- !Info->hasWorkItemIDY() && !Info->hasWorkItemIDZ());
+ !Info->hasWorkGroupInfo() && !Info->hasLDSKernelId() &&
+ !Info->hasWorkItemIDX() && !Info->hasWorkItemIDY() &&
+ !Info->hasWorkItemIDZ());
+ if (!Subtarget->enableFlatScratch())
+ assert(!Info->hasFlatScratchInit());
+ if (CallConv != CallingConv::AMDGPU_CS || !Subtarget->hasArchitectedSGPRs())
+ assert(!Info->hasWorkGroupIDX() && !Info->hasWorkGroupIDY() &&
+ !Info->hasWorkGroupIDZ());
}
if (CallConv == CallingConv::AMDGPU_PS) {
@@ -2451,7 +2523,7 @@ SDValue SITargetLowering::LowerFormalArguments(
unsigned PsInputBits = Info->getPSInputAddr() & Info->getPSInputEnable();
if ((PsInputBits & 0x7F) == 0 ||
((PsInputBits & 0xF) == 0 && (PsInputBits >> 11 & 1)))
- Info->markPSInputEnabled(countTrailingZeros(Info->getPSInputAddr()));
+ Info->markPSInputEnabled(llvm::countr_zero(Info->getPSInputAddr()));
}
} else if (IsKernel) {
assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
@@ -2610,7 +2682,7 @@ SDValue SITargetLowering::LowerFormalArguments(
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
ArgUsageInfo.setFuncArgInfo(Fn, Info->getArgInfo());
- unsigned StackArgSize = CCInfo.getNextStackOffset();
+ unsigned StackArgSize = CCInfo.getStackSize();
Info->setBytesInStackArgArea(StackArgSize);
return Chains.empty() ? Chain :
@@ -2632,7 +2704,17 @@ bool SITargetLowering::CanLowerReturn(
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
- return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, IsVarArg));
+ if (!CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, IsVarArg)))
+ return false;
+
+ // We must use the stack if return would require unavailable registers.
+ unsigned MaxNumVGPRs = Subtarget->getMaxNumVGPRs(MF);
+ unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
+ for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i)
+ if (CCInfo.isAllocated(AMDGPU::VGPR_32RegClass.getRegister(i)))
+ return false;
+
+ return true;
}
SDValue
@@ -2665,7 +2747,7 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Analyze outgoing return values.
CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 48> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
@@ -2697,8 +2779,8 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
llvm_unreachable("Unknown loc info!");
}
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
@@ -2721,17 +2803,17 @@ SITargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Update chain and glue.
RetOps[0] = Chain;
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
unsigned Opc = AMDGPUISD::ENDPGM;
if (!IsWaveEnd)
- Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_FLAG;
+ Opc = IsShader ? AMDGPUISD::RETURN_TO_EPILOG : AMDGPUISD::RET_GLUE;
return DAG.getNode(Opc, DL, MVT::Other, RetOps);
}
SDValue SITargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg,
+ SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool IsThisReturn,
SDValue ThisVal) const {
@@ -2749,9 +2831,9 @@ SDValue SITargetLowering::LowerCallResult(
SDValue Val;
if (VA.isRegLoc()) {
- Val = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
+ Val = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InGlue);
Chain = Val.getValue(1);
- InFlag = Val.getValue(2);
+ InGlue = Val.getValue(2);
} else if (VA.isMemLoc()) {
report_fatal_error("TODO: return values in memory");
} else
@@ -3066,7 +3148,7 @@ bool SITargetLowering::isEligibleForTailCallOptimization(
// If the stack arguments for this call do not fit into our own save area then
// the call cannot be made tail.
// TODO: Is this really necessary?
- if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
+ if (CCInfo.getStackSize() > FuncInfo->getBytesInStackArgArea())
return false;
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -3122,21 +3204,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
"unsupported required tail call to function ");
}
- if (AMDGPU::isShader(CallConv)) {
- // Note the issue is with the CC of the called function, not of the call
- // itself.
- return lowerUnhandledCall(CLI, InVals,
- "unsupported call to a shader function ");
- }
-
- if (AMDGPU::isShader(MF.getFunction().getCallingConv()) &&
- CallConv != CallingConv::AMDGPU_Gfx) {
- // Only allow calls with specific calling conventions.
- return lowerUnhandledCall(CLI, InVals,
- "unsupported calling convention for call from "
- "graphics shader of function ");
- }
-
if (IsTailCall) {
IsTailCall = isEligibleForTailCallOptimization(
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
@@ -3173,7 +3240,7 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
CCInfo.AnalyzeCallOperands(Outs, AssignFn);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getStackSize();
if (IsSibCall) {
// Since we're not changing the ABI to make this a tail call, the memory
@@ -3309,11 +3376,11 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
- SDValue InFlag;
+ SDValue InGlue;
for (auto &RegToPass : RegsToPass) {
Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
- RegToPass.second, InFlag);
- InFlag = Chain.getValue(1);
+ RegToPass.second, InGlue);
+ InGlue = Chain.getValue(1);
}
@@ -3322,8 +3389,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
// we've carefully laid out the parameters so that when sp is reset they'll be
// in the correct location.
if (IsTailCall && !IsSibCall) {
- Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InFlag, DL);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, DL);
+ InGlue = Chain.getValue(1);
}
std::vector<SDValue> Ops;
@@ -3359,8 +3426,8 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -3368,22 +3435,24 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
// actual call instruction.
if (IsTailCall) {
MFI.setHasTailCall();
- return DAG.getNode(AMDGPUISD::TC_RETURN, DL, NodeTys, Ops);
+ unsigned OPC = CallConv == CallingConv::AMDGPU_Gfx ?
+ AMDGPUISD::TC_RETURN_GFX : AMDGPUISD::TC_RETURN;
+ return DAG.getNode(OPC, DL, NodeTys, Ops);
}
// Returns a chain and a flag for retval copy to use.
SDValue Call = DAG.getNode(AMDGPUISD::CALL, DL, NodeTys, Ops);
Chain = Call.getValue(0);
- InFlag = Call.getValue(1);
+ InGlue = Call.getValue(1);
uint64_t CalleePopBytes = NumBytes;
- Chain = DAG.getCALLSEQ_END(Chain, 0, CalleePopBytes, InFlag, DL);
+ Chain = DAG.getCALLSEQ_END(Chain, 0, CalleePopBytes, InGlue, DL);
if (!Ins.empty())
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
+ return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, DL, DAG,
InVals, IsThisReturn,
IsThisReturn ? OutVals[0] : SDValue());
}
@@ -4000,6 +4069,120 @@ static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
return LoopBB;
}
+static MachineBasicBlock *lowerWaveReduce(MachineInstr &MI,
+ MachineBasicBlock &BB,
+ const GCNSubtarget &ST,
+ unsigned Opc) {
+ MachineRegisterInfo &MRI = BB.getParent()->getRegInfo();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const DebugLoc &DL = MI.getDebugLoc();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ // Reduction operations depend on whether the input operand is SGPR or VGPR.
+ Register SrcReg = MI.getOperand(1).getReg();
+ bool isSGPR = TRI->isSGPRClass(MRI.getRegClass(SrcReg));
+ Register DstReg = MI.getOperand(0).getReg();
+ MachineBasicBlock *RetBB = nullptr;
+ if (isSGPR) {
+ // These operations with a uniform value i.e. SGPR are idempotent.
+ // Reduced value will be same as given sgpr.
+ BuildMI(BB, MI, DL, TII->get(AMDGPU::S_MOV_B32), DstReg).addReg(SrcReg);
+ RetBB = &BB;
+ } else {
+ // TODO: Implement DPP Strategy and switch based on immediate strategy
+ // operand. For now, for all the cases (default, Iterative and DPP we use
+ // iterative approach by default.)
+
+ // To reduce the VGPR using iterative approach, we need to iterate
+ // over all the active lanes. Lowering consists of ComputeLoop,
+ // which iterate over only active lanes. We use copy of EXEC register
+ // as induction variable and every active lane modifies it using bitset0
+ // so that we will get the next active lane for next iteration.
+ MachineBasicBlock::iterator I = BB.end();
+ Register SrcReg = MI.getOperand(1).getReg();
+
+ // Create Control flow for loop
+ // Split MI's Machine Basic block into For loop
+ auto [ComputeLoop, ComputeEnd] = splitBlockForLoop(MI, BB, true);
+
+ // Create virtual registers required for lowering.
+ const TargetRegisterClass *WaveMaskRegClass = TRI->getWaveMaskRegClass();
+ const TargetRegisterClass *DstRegClass = MRI.getRegClass(DstReg);
+ Register LoopIterator = MRI.createVirtualRegister(WaveMaskRegClass);
+ Register InitalValReg = MRI.createVirtualRegister(DstRegClass);
+
+ Register AccumulatorReg = MRI.createVirtualRegister(DstRegClass);
+ Register ActiveBitsReg = MRI.createVirtualRegister(WaveMaskRegClass);
+ Register NewActiveBitsReg = MRI.createVirtualRegister(WaveMaskRegClass);
+
+ Register FF1Reg = MRI.createVirtualRegister(DstRegClass);
+ Register LaneValueReg = MRI.createVirtualRegister(DstRegClass);
+
+ bool IsWave32 = ST.isWave32();
+ unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+
+ // Create initail values of induction variable from Exec, Accumulator and
+ // insert branch instr to newly created ComputeBlockk
+ uint32_t InitalValue =
+ (Opc == AMDGPU::S_MIN_U32) ? std::numeric_limits<uint32_t>::max() : 0;
+ auto TmpSReg =
+ BuildMI(BB, I, DL, TII->get(MovOpc), LoopIterator).addReg(ExecReg);
+ BuildMI(BB, I, DL, TII->get(AMDGPU::S_MOV_B32), InitalValReg)
+ .addImm(InitalValue);
+ BuildMI(BB, I, DL, TII->get(AMDGPU::S_BRANCH)).addMBB(ComputeLoop);
+
+ // Start constructing ComputeLoop
+ I = ComputeLoop->end();
+ auto Accumulator =
+ BuildMI(*ComputeLoop, I, DL, TII->get(AMDGPU::PHI), AccumulatorReg)
+ .addReg(InitalValReg)
+ .addMBB(&BB);
+ auto ActiveBits =
+ BuildMI(*ComputeLoop, I, DL, TII->get(AMDGPU::PHI), ActiveBitsReg)
+ .addReg(TmpSReg->getOperand(0).getReg())
+ .addMBB(&BB);
+
+ // Perform the computations
+ unsigned SFFOpc = IsWave32 ? AMDGPU::S_FF1_I32_B32 : AMDGPU::S_FF1_I32_B64;
+ auto FF1 = BuildMI(*ComputeLoop, I, DL, TII->get(SFFOpc), FF1Reg)
+ .addReg(ActiveBits->getOperand(0).getReg());
+ auto LaneValue = BuildMI(*ComputeLoop, I, DL,
+ TII->get(AMDGPU::V_READLANE_B32), LaneValueReg)
+ .addReg(SrcReg)
+ .addReg(FF1->getOperand(0).getReg());
+ auto NewAccumulator = BuildMI(*ComputeLoop, I, DL, TII->get(Opc), DstReg)
+ .addReg(Accumulator->getOperand(0).getReg())
+ .addReg(LaneValue->getOperand(0).getReg());
+
+ // Manipulate the iterator to get the next active lane
+ unsigned BITSETOpc =
+ IsWave32 ? AMDGPU::S_BITSET0_B32 : AMDGPU::S_BITSET0_B64;
+ auto NewActiveBits =
+ BuildMI(*ComputeLoop, I, DL, TII->get(BITSETOpc), NewActiveBitsReg)
+ .addReg(FF1->getOperand(0).getReg())
+ .addReg(ActiveBits->getOperand(0).getReg());
+
+ // Add phi nodes
+ Accumulator.addReg(NewAccumulator->getOperand(0).getReg())
+ .addMBB(ComputeLoop);
+ ActiveBits.addReg(NewActiveBits->getOperand(0).getReg())
+ .addMBB(ComputeLoop);
+
+ // Creating branching
+ unsigned CMPOpc = IsWave32 ? AMDGPU::S_CMP_LG_U32 : AMDGPU::S_CMP_LG_U64;
+ BuildMI(*ComputeLoop, I, DL, TII->get(CMPOpc))
+ .addReg(NewActiveBits->getOperand(0).getReg())
+ .addImm(0);
+ BuildMI(*ComputeLoop, I, DL, TII->get(AMDGPU::S_CBRANCH_SCC1))
+ .addMBB(ComputeLoop);
+
+ RetBB = ComputeEnd;
+ }
+ MI.eraseFromParent();
+ return RetBB;
+}
+
MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr &MI, MachineBasicBlock *BB) const {
@@ -4008,6 +4191,10 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
switch (MI.getOpcode()) {
+ case AMDGPU::WAVE_REDUCE_UMIN_PSEUDO_U32:
+ return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_MIN_U32);
+ case AMDGPU::WAVE_REDUCE_UMAX_PSEUDO_U32:
+ return lowerWaveReduce(MI, *BB, *getSubtarget(), AMDGPU::S_MAX_U32);
case AMDGPU::S_UADDO_PSEUDO:
case AMDGPU::S_USUBO_PSEUDO: {
const DebugLoc &DL = MI.getDebugLoc();
@@ -4460,15 +4647,54 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
return BB;
}
+ case AMDGPU::S_INVERSE_BALLOT_U32:
+ case AMDGPU::S_INVERSE_BALLOT_U64: {
+ MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const DebugLoc &DL = MI.getDebugLoc();
+ const Register DstReg = MI.getOperand(0).getReg();
+ Register MaskReg = MI.getOperand(1).getReg();
+
+ const bool IsVALU = TRI->isVectorRegister(MRI, MaskReg);
+
+ if (IsVALU) {
+ MaskReg = TII->readlaneVGPRToSGPR(MaskReg, MI, MRI);
+ }
+
+ BuildMI(*BB, &MI, DL, TII->get(AMDGPU::COPY), DstReg).addReg(MaskReg);
+ MI.eraseFromParent();
+ return BB;
+ }
+ case AMDGPU::ENDPGM_TRAP: {
+ const DebugLoc &DL = MI.getDebugLoc();
+ if (BB->succ_empty() && std::next(MI.getIterator()) == BB->end()) {
+ MI.setDesc(TII->get(AMDGPU::S_ENDPGM));
+ MI.addOperand(MachineOperand::CreateImm(0));
+ return BB;
+ }
+
+ // We need a block split to make the real endpgm a terminator. We also don't
+ // want to break phis in successor blocks, so we can't just delete to the
+ // end of the block.
+
+ MachineBasicBlock *SplitBB = BB->splitAt(MI, false /*UpdateLiveIns*/);
+ MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+ MF->push_back(TrapBB);
+ BuildMI(*TrapBB, TrapBB->end(), DL, TII->get(AMDGPU::S_ENDPGM))
+ .addImm(0);
+ BuildMI(*BB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+ .addMBB(TrapBB);
+
+ BB->addSuccessor(TrapBB);
+ MI.eraseFromParent();
+ return SplitBB;
+ }
default:
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
}
}
-bool SITargetLowering::hasBitPreservingFPLogic(EVT VT) const {
- return isTypeLegal(VT.getScalarType());
-}
-
bool SITargetLowering::hasAtomicFaddRtnForTy(SDValue &Op) const {
switch (Op.getValue(0).getSimpleValueType().SimpleTy) {
case MVT::f32:
@@ -4542,7 +4768,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
// Otherwise f32 mad is always full rate and returns the same result as
// the separate operations so should be preferred over fma.
// However does not support denormals.
- if (hasFP32Denormals(MF))
+ if (!denormalModeIsFlushAllF32(MF))
return Subtarget->hasFastFMAF32() || Subtarget->hasDLInsts();
// If the subtarget has v_fmac_f32, that's just as good as v_mac_f32.
@@ -4551,7 +4777,7 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
case MVT::f64:
return true;
case MVT::f16:
- return Subtarget->has16BitInsts() && hasFP64FP16Denormals(MF);
+ return Subtarget->has16BitInsts() && !denormalModeIsFlushAllF64F16(MF);
default:
break;
}
@@ -4580,9 +4806,10 @@ bool SITargetLowering::isFMADLegal(const MachineInstr &MI, LLT Ty) const {
return false;
if (Ty.getScalarSizeInBits() == 16)
- return Subtarget->hasMadF16() && !hasFP64FP16Denormals(*MI.getMF());
+ return Subtarget->hasMadF16() && denormalModeIsFlushAllF64F16(*MI.getMF());
if (Ty.getScalarSizeInBits() == 32)
- return Subtarget->hasMadMacF32Insts() && !hasFP32Denormals(*MI.getMF());
+ return Subtarget->hasMadMacF32Insts() &&
+ denormalModeIsFlushAllF32(*MI.getMF());
return false;
}
@@ -4594,10 +4821,10 @@ bool SITargetLowering::isFMADLegal(const SelectionDAG &DAG,
EVT VT = N->getValueType(0);
if (VT == MVT::f32)
return Subtarget->hasMadMacF32Insts() &&
- !hasFP32Denormals(DAG.getMachineFunction());
+ denormalModeIsFlushAllF32(DAG.getMachineFunction());
if (VT == MVT::f16) {
return Subtarget->hasMadF16() &&
- !hasFP64FP16Denormals(DAG.getMachineFunction());
+ denormalModeIsFlushAllF64F16(DAG.getMachineFunction());
}
return false;
@@ -4613,7 +4840,10 @@ SDValue SITargetLowering::splitUnaryVectorOp(SDValue Op,
SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
EVT VT = Op.getValueType();
- assert(VT == MVT::v4f16 || VT == MVT::v4i16);
+ assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
+ VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i16 ||
+ VT == MVT::v16f16 || VT == MVT::v8f32 || VT == MVT::v16f32 ||
+ VT == MVT::v32f32);
SDValue Lo, Hi;
std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
@@ -4696,12 +4926,16 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
"Load should return a value and a chain");
return Result;
}
-
+ case ISD::FSQRT:
+ if (Op.getValueType() == MVT::f64)
+ return lowerFSQRTF64(Op, DAG);
+ return SDValue();
case ISD::FSIN:
case ISD::FCOS:
return LowerTrig(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::FDIV: return LowerFDIV(Op, DAG);
+ case ISD::FFREXP: return LowerFFREXP(Op, DAG);
case ISD::ATOMIC_CMP_SWAP: return LowerATOMIC_CMP_SWAP(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::GlobalAddress: {
@@ -4726,6 +4960,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG);
case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND:
return lowerFP_ROUND(Op, DAG);
case ISD::FPTRUNC_ROUND: {
unsigned Opc;
@@ -4757,6 +4992,9 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FMINNUM:
case ISD::FMAXNUM:
return lowerFMINNUM_FMAXNUM(Op, DAG);
+ case ISD::FLDEXP:
+ case ISD::STRICT_FLDEXP:
+ return lowerFLDEXP(Op, DAG);
case ISD::FMA:
return splitTernaryVectorOp(Op, DAG);
case ISD::FP_TO_SINT:
@@ -5038,6 +5276,9 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
switch (IID) {
+ case Intrinsic::amdgcn_make_buffer_rsrc:
+ Results.push_back(lowerPointerAsRsrcIntrin(N, DAG));
+ return;
case Intrinsic::amdgcn_cvt_pkrtz: {
SDValue Src0 = N->getOperand(1);
SDValue Src1 = N->getOperand(2);
@@ -5142,6 +5383,7 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
default:
+ AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
break;
}
}
@@ -5349,6 +5591,10 @@ SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
if (SrcVT != MVT::f64)
return Op;
+ // TODO: Handle strictfp
+ if (Op.getOpcode() != ISD::FP_ROUND)
+ return Op;
+
SDLoc DL(Op);
SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src);
@@ -5375,6 +5621,40 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
return Op;
}
+SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const {
+ bool IsStrict = Op.getOpcode() == ISD::STRICT_FLDEXP;
+ EVT VT = Op.getValueType();
+ assert(VT == MVT::f16);
+
+ SDValue Exp = Op.getOperand(IsStrict ? 2 : 1);
+ EVT ExpVT = Exp.getValueType();
+ if (ExpVT == MVT::i16)
+ return Op;
+
+ SDLoc DL(Op);
+
+ // Correct the exponent type for f16 to i16.
+ // Clamp the range of the exponent to the instruction's range.
+
+ // TODO: This should be a generic narrowing legalization, and can easily be
+ // for GlobalISel.
+
+ SDValue MinExp = DAG.getConstant(minIntN(16), DL, ExpVT);
+ SDValue ClampMin = DAG.getNode(ISD::SMAX, DL, ExpVT, Exp, MinExp);
+
+ SDValue MaxExp = DAG.getConstant(maxIntN(16), DL, ExpVT);
+ SDValue Clamp = DAG.getNode(ISD::SMIN, DL, ExpVT, ClampMin, MaxExp);
+
+ SDValue TruncExp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Clamp);
+
+ if (IsStrict) {
+ return DAG.getNode(ISD::STRICT_FLDEXP, DL, {VT, MVT::Other},
+ {Op.getOperand(0), Op.getOperand(1), TruncExp});
+ }
+
+ return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(0), TruncExp);
+}
+
SDValue SITargetLowering::lowerXMULO(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
SDLoc SL(Op);
@@ -5431,26 +5711,20 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
return lowerTrapEndpgm(Op, DAG);
- if (std::optional<uint8_t> HsaAbiVer = AMDGPU::getHsaAbiVersion(Subtarget)) {
- switch (*HsaAbiVer) {
- case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
- case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
- return lowerTrapHsaQueuePtr(Op, DAG);
- case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
- case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
- return Subtarget->supportsGetDoorbellID() ?
- lowerTrapHsa(Op, DAG) : lowerTrapHsaQueuePtr(Op, DAG);
- }
- }
+ const Module *M = DAG.getMachineFunction().getFunction().getParent();
+ unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M);
+ if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3)
+ return lowerTrapHsaQueuePtr(Op, DAG);
- llvm_unreachable("Unknown trap handler");
+ return Subtarget->supportsGetDoorbellID() ? lowerTrapHsa(Op, DAG) :
+ lowerTrapHsaQueuePtr(Op, DAG);
}
SDValue SITargetLowering::lowerTrapEndpgm(
SDValue Op, SelectionDAG &DAG) const {
SDLoc SL(Op);
SDValue Chain = Op.getOperand(0);
- return DAG.getNode(AMDGPUISD::ENDPGM, SL, MVT::Other, Chain);
+ return DAG.getNode(AMDGPUISD::ENDPGM_TRAP, SL, MVT::Other, Chain);
}
SDValue SITargetLowering::loadImplicitKernelArgument(SelectionDAG &DAG, MVT VT,
@@ -5471,7 +5745,8 @@ SDValue SITargetLowering::lowerTrapHsaQueuePtr(
SDValue QueuePtr;
// For code object version 5, QueuePtr is passed through implicit kernarg.
- if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) {
+ const Module *M = DAG.getMachineFunction().getFunction().getParent();
+ if (AMDGPU::getCodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5) {
QueuePtr =
loadImplicitKernelArgument(DAG, MVT::i64, SL, Align(8), QUEUE_PTR);
} else {
@@ -5574,7 +5849,8 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, const SDLoc &DL,
// For code object version 5, private_base and shared_base are passed through
// implicit kernargs.
- if (AMDGPU::getAmdhsaCodeObjectVersion() == 5) {
+ const Module *M = DAG.getMachineFunction().getFunction().getParent();
+ if (AMDGPU::getCodeObjectVersion(*M) >= AMDGPU::AMDHSA_COV5) {
ImplicitParameter Param =
(AS == AMDGPUAS::LOCAL_ADDRESS) ? SHARED_BASE : PRIVATE_BASE;
return loadImplicitKernelArgument(DAG, MVT::i32, DL, Align(4), Param);
@@ -5721,6 +5997,35 @@ SDValue SITargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
SDLoc SL(Op);
+ if (EltVT.getScalarSizeInBits() == 16 && IdxVal % 2 == 0) {
+ // Insert 32-bit registers at a time.
+ assert(InsNumElts % 2 == 0 && "expect legal vector types");
+
+ unsigned VecNumElts = VecVT.getVectorNumElements();
+ EVT NewVecVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::i32, VecNumElts / 2);
+ EVT NewInsVT = InsNumElts == 2 ? MVT::i32
+ : EVT::getVectorVT(*DAG.getContext(),
+ MVT::i32, InsNumElts / 2);
+
+ Vec = DAG.getNode(ISD::BITCAST, SL, NewVecVT, Vec);
+ Ins = DAG.getNode(ISD::BITCAST, SL, NewInsVT, Ins);
+
+ for (unsigned I = 0; I != InsNumElts / 2; ++I) {
+ SDValue Elt;
+ if (InsNumElts == 2) {
+ Elt = Ins;
+ } else {
+ Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Ins,
+ DAG.getConstant(I, SL, MVT::i32));
+ }
+ Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NewVecVT, Vec, Elt,
+ DAG.getConstant(IdxVal / 2 + I, SL, MVT::i32));
+ }
+
+ return DAG.getNode(ISD::BITCAST, SL, VecVT, Vec);
+ }
+
for (unsigned I = 0; I != InsNumElts; ++I) {
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Ins,
DAG.getConstant(I, SL, MVT::i32));
@@ -6130,7 +6435,8 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
if (DAG.getDataLayout().getTypeAllocSize(Ty).isZero()) {
assert(PtrVT == MVT::i32 && "32-bit pointer is expected.");
// Adjust alignment for that dynamic shared memory array.
- MFI->setDynLDSAlign(DAG.getDataLayout(), *cast<GlobalVariable>(GV));
+ Function &F = DAG.getMachineFunction().getFunction();
+ MFI->setDynLDSAlign(F, *cast<GlobalVariable>(GV));
return SDValue(
DAG.getMachineNode(AMDGPU::GET_GROUPSTATICSIZE, DL, PtrVT), 0);
}
@@ -6572,15 +6878,24 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
// SIShrinkInstructions will convert NSA encodings to non-NSA after register
// allocation when possible.
//
- // TODO: we can actually allow partial NSA where the final register is a
- // contiguous set of the remaining addresses.
- // This could help where there are more addresses than supported.
- bool UseNSA = ST->hasFeature(AMDGPU::FeatureNSAEncoding) &&
- VAddrs.size() >= (unsigned)ST->getNSAThreshold(MF) &&
- VAddrs.size() <= (unsigned)ST->getNSAMaxSize();
+ // Partial NSA is allowed on GFX11 where the final register is a contiguous
+ // set of the remaining addresses.
+ const unsigned NSAMaxSize = ST->getNSAMaxSize();
+ const bool HasPartialNSAEncoding = ST->hasPartialNSAEncoding();
+ const bool UseNSA = ST->hasNSAEncoding() &&
+ VAddrs.size() >= ST->getNSAThreshold(MF) &&
+ (VAddrs.size() <= NSAMaxSize || HasPartialNSAEncoding);
+ const bool UsePartialNSA =
+ UseNSA && HasPartialNSAEncoding && VAddrs.size() > NSAMaxSize;
+
SDValue VAddr;
- if (!UseNSA)
+ if (UsePartialNSA) {
+ VAddr = getBuildDwordsVector(DAG, DL,
+ ArrayRef(VAddrs).drop_front(NSAMaxSize - 1));
+ }
+ else if (!UseNSA) {
VAddr = getBuildDwordsVector(DAG, DL, VAddrs);
+ }
SDValue True = DAG.getTargetConstant(1, DL, MVT::i1);
SDValue False = DAG.getTargetConstant(0, DL, MVT::i1);
@@ -6648,7 +6963,11 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
SmallVector<SDValue, 26> Ops;
if (BaseOpcode->Store || BaseOpcode->Atomic)
Ops.push_back(VData); // vdata
- if (UseNSA)
+ if (UsePartialNSA) {
+ append_range(Ops, ArrayRef(VAddrs).take_front(NSAMaxSize - 1));
+ Ops.push_back(VAddr);
+ }
+ else if (UseNSA)
append_range(Ops, VAddrs);
else
Ops.push_back(VAddr);
@@ -6696,7 +7015,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx90a,
NumVDataDwords, NumVAddrDwords);
if (Opcode == -1)
- return makeV_ILLEGAL(Op, DAG);
+ report_fatal_error(
+ "requested image instruction is not supported on this GPU");
}
if (Opcode == -1 &&
Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
@@ -6706,7 +7026,8 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
Opcode = AMDGPU::getMIMGOpcode(IntrOpcode, AMDGPU::MIMGEncGfx6,
NumVDataDwords, NumVAddrDwords);
}
- assert(Opcode != -1);
+ if (Opcode == -1)
+ return Op;
MachineSDNode *NewNode = DAG.getMachineNode(Opcode, DL, ResultTypes, Ops);
if (auto MemOp = dyn_cast<MemSDNode>(Op)) {
@@ -7021,8 +7342,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return emitRemovedIntrinsicError(DAG, DL, VT);
}
case Intrinsic::amdgcn_ldexp:
- return DAG.getNode(AMDGPUISD::LDEXP, DL, VT,
- Op.getOperand(1), Op.getOperand(2));
+ return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(1), Op.getOperand(2));
case Intrinsic::amdgcn_fract:
return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
@@ -7170,52 +7490,27 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
}
}
-/// Update \p MMO based on the offset inputs to an intrinsic.
-static void updateBufferMMO(MachineMemOperand *MMO, SDValue VOffset,
- SDValue SOffset, SDValue Offset,
- SDValue VIndex = SDValue()) {
- if (!isa<ConstantSDNode>(VOffset) || !isa<ConstantSDNode>(SOffset) ||
- !isa<ConstantSDNode>(Offset)) {
- // The combined offset is not known to be constant, so we cannot represent
- // it in the MMO. Give up.
- MMO->setValue((Value *)nullptr);
- return;
- }
-
- if (VIndex && (!isa<ConstantSDNode>(VIndex) ||
- !cast<ConstantSDNode>(VIndex)->isZero())) {
- // The strided index component of the address is not known to be zero, so we
- // cannot represent it in the MMO. Give up.
- MMO->setValue((Value *)nullptr);
- return;
- }
-
- MMO->setOffset(cast<ConstantSDNode>(VOffset)->getSExtValue() +
- cast<ConstantSDNode>(SOffset)->getSExtValue() +
- cast<ConstantSDNode>(Offset)->getSExtValue());
-}
-
SDValue SITargetLowering::lowerRawBufferAtomicIntrin(SDValue Op,
SelectionDAG &DAG,
unsigned NewOpcode) const {
SDLoc DL(Op);
SDValue VData = Op.getOperand(2);
+ SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
SDValue Ops[] = {
- Op.getOperand(0), // Chain
- VData, // vdata
- Op.getOperand(3), // rsrc
- DAG.getConstant(0, DL, MVT::i32), // vindex
- Offsets.first, // voffset
- Op.getOperand(5), // soffset
- Offsets.second, // offset
- Op.getOperand(6), // cachepolicy
- DAG.getTargetConstant(0, DL, MVT::i1), // idxen
+ Op.getOperand(0), // Chain
+ VData, // vdata
+ Rsrc, // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // cachepolicy
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
auto *M = cast<MemSDNode>(Op);
- updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6]);
EVT MemVT = VData.getValueType();
return DAG.getMemIntrinsicNode(NewOpcode, DL, Op->getVTList(), Ops, MemVT,
@@ -7224,10 +7519,8 @@ SDValue SITargetLowering::lowerRawBufferAtomicIntrin(SDValue Op,
// Return a value to use for the idxen operand by examining the vindex operand.
static unsigned getIdxEn(SDValue VIndex) {
- if (auto VIndexC = dyn_cast<ConstantSDNode>(VIndex))
- // No need to set idxen if vindex is known to be zero.
- return VIndexC->getZExtValue() != 0;
- return 1;
+ // No need to set idxen if vindex is known to be zero.
+ return isNullConstant(VIndex) ? 0 : 1;
}
SDValue
@@ -7236,21 +7529,21 @@ SITargetLowering::lowerStructBufferAtomicIntrin(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
SDValue VData = Op.getOperand(2);
+ SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
SDValue Ops[] = {
- Op.getOperand(0), // Chain
- VData, // vdata
- Op.getOperand(3), // rsrc
- Op.getOperand(4), // vindex
- Offsets.first, // voffset
- Op.getOperand(6), // soffset
- Offsets.second, // offset
- Op.getOperand(7), // cachepolicy
- DAG.getTargetConstant(1, DL, MVT::i1), // idxen
+ Op.getOperand(0), // Chain
+ VData, // vdata
+ Rsrc, // rsrc
+ Op.getOperand(4), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(6), // soffset
+ Offsets.second, // offset
+ Op.getOperand(7), // cachepolicy
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
auto *M = cast<MemSDNode>(Op);
- updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]);
EVT MemVT = VData.getValueType();
return DAG.getMemIntrinsicNode(NewOpcode, DL, Op->getVTList(), Ops, MemVT,
@@ -7330,19 +7623,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
M->getOperand(0), M->getOperand(2), M->getOperand(3),
M->getMemOperand());
}
- case Intrinsic::amdgcn_atomic_inc:
- case Intrinsic::amdgcn_atomic_dec:
case Intrinsic::amdgcn_ds_fmin:
case Intrinsic::amdgcn_ds_fmax: {
MemSDNode *M = cast<MemSDNode>(Op);
unsigned Opc;
switch (IntrID) {
- case Intrinsic::amdgcn_atomic_inc:
- Opc = AMDGPUISD::ATOMIC_INC;
- break;
- case Intrinsic::amdgcn_atomic_dec:
- Opc = AMDGPUISD::ATOMIC_DEC;
- break;
case Intrinsic::amdgcn_ds_fmin:
Opc = AMDGPUISD::ATOMIC_LOAD_FMIN;
break;
@@ -7384,7 +7669,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
EVT VT = Op.getValueType();
EVT IntVT = VT.changeTypeToInteger();
auto *M = cast<MemSDNode>(Op);
- updateBufferMMO(M->getMemOperand(), Ops[3], Ops[4], Ops[5], Ops[2]);
EVT LoadVT = Op.getValueType();
if (LoadVT.getScalarType() == MVT::f16)
@@ -7400,43 +7684,50 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
M->getMemOperand(), DAG);
}
case Intrinsic::amdgcn_raw_buffer_load:
- case Intrinsic::amdgcn_raw_buffer_load_format: {
- const bool IsFormat = IntrID == Intrinsic::amdgcn_raw_buffer_load_format;
+ case Intrinsic::amdgcn_raw_ptr_buffer_load:
+ case Intrinsic::amdgcn_raw_buffer_load_format:
+ case Intrinsic::amdgcn_raw_ptr_buffer_load_format: {
+ const bool IsFormat =
+ IntrID == Intrinsic::amdgcn_raw_buffer_load_format ||
+ IntrID == Intrinsic::amdgcn_raw_ptr_buffer_load_format;
+ SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
SDValue Ops[] = {
- Op.getOperand(0), // Chain
- Op.getOperand(2), // rsrc
- DAG.getConstant(0, DL, MVT::i32), // vindex
- Offsets.first, // voffset
- Op.getOperand(4), // soffset
- Offsets.second, // offset
- Op.getOperand(5), // cachepolicy, swizzled buffer
- DAG.getTargetConstant(0, DL, MVT::i1), // idxen
+ Op.getOperand(0), // Chain
+ Rsrc, // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(4), // soffset
+ Offsets.second, // offset
+ Op.getOperand(5), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
auto *M = cast<MemSDNode>(Op);
- updateBufferMMO(M->getMemOperand(), Ops[3], Ops[4], Ops[5]);
return lowerIntrinsicLoad(M, IsFormat, DAG, Ops);
}
case Intrinsic::amdgcn_struct_buffer_load:
- case Intrinsic::amdgcn_struct_buffer_load_format: {
- const bool IsFormat = IntrID == Intrinsic::amdgcn_struct_buffer_load_format;
+ case Intrinsic::amdgcn_struct_ptr_buffer_load:
+ case Intrinsic::amdgcn_struct_buffer_load_format:
+ case Intrinsic::amdgcn_struct_ptr_buffer_load_format: {
+ const bool IsFormat =
+ IntrID == Intrinsic::amdgcn_struct_buffer_load_format ||
+ IntrID == Intrinsic::amdgcn_struct_ptr_buffer_load_format;
+ SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
SDValue Ops[] = {
- Op.getOperand(0), // Chain
- Op.getOperand(2), // rsrc
- Op.getOperand(3), // vindex
- Offsets.first, // voffset
- Op.getOperand(5), // soffset
- Offsets.second, // offset
- Op.getOperand(6), // cachepolicy, swizzled buffer
- DAG.getTargetConstant(1, DL, MVT::i1), // idxen
+ Op.getOperand(0), // Chain
+ Rsrc, // rsrc
+ Op.getOperand(3), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
- auto *M = cast<MemSDNode>(Op);
- updateBufferMMO(M->getMemOperand(), Ops[3], Ops[4], Ops[5], Ops[2]);
return lowerIntrinsicLoad(cast<MemSDNode>(Op), IsFormat, DAG, Ops);
}
case Intrinsic::amdgcn_tbuffer_load: {
@@ -7467,21 +7758,23 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
DAG);
}
- case Intrinsic::amdgcn_raw_tbuffer_load: {
+ case Intrinsic::amdgcn_raw_tbuffer_load:
+ case Intrinsic::amdgcn_raw_ptr_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
EVT LoadVT = Op.getValueType();
+ SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(3), DAG);
SDValue Ops[] = {
- Op.getOperand(0), // Chain
- Op.getOperand(2), // rsrc
- DAG.getConstant(0, DL, MVT::i32), // vindex
- Offsets.first, // voffset
- Op.getOperand(4), // soffset
- Offsets.second, // offset
- Op.getOperand(5), // format
- Op.getOperand(6), // cachepolicy, swizzled buffer
- DAG.getTargetConstant(0, DL, MVT::i1), // idxen
+ Op.getOperand(0), // Chain
+ Rsrc, // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(4), // soffset
+ Offsets.second, // offset
+ Op.getOperand(5), // format
+ Op.getOperand(6), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
@@ -7491,21 +7784,23 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Op->getVTList(), Ops, LoadVT, M->getMemOperand(),
DAG);
}
- case Intrinsic::amdgcn_struct_tbuffer_load: {
+ case Intrinsic::amdgcn_struct_tbuffer_load:
+ case Intrinsic::amdgcn_struct_ptr_tbuffer_load: {
MemSDNode *M = cast<MemSDNode>(Op);
EVT LoadVT = Op.getValueType();
+ SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
SDValue Ops[] = {
- Op.getOperand(0), // Chain
- Op.getOperand(2), // rsrc
- Op.getOperand(3), // vindex
- Offsets.first, // voffset
- Op.getOperand(5), // soffset
- Offsets.second, // offset
- Op.getOperand(6), // format
- Op.getOperand(7), // cachepolicy, swizzled buffer
- DAG.getTargetConstant(1, DL, MVT::i1), // idxen
+ Op.getOperand(0), // Chain
+ Rsrc, // rsrc
+ Op.getOperand(3), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // format
+ Op.getOperand(7), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
if (LoadVT.getScalarType() == MVT::f16)
@@ -7545,7 +7840,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
- updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]);
unsigned Opcode = 0;
switch (IntrID) {
@@ -7593,69 +7887,99 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
M->getMemOperand());
}
case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fadd:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fadd:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FADD);
case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmin:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMIN);
case Intrinsic::amdgcn_struct_buffer_atomic_fmin:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmin:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMIN);
case Intrinsic::amdgcn_raw_buffer_atomic_fmax:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_fmax:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMAX);
case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_fmax:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_FMAX);
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SWAP);
case Intrinsic::amdgcn_raw_buffer_atomic_add:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_add:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_ADD);
case Intrinsic::amdgcn_raw_buffer_atomic_sub:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_sub:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SUB);
case Intrinsic::amdgcn_raw_buffer_atomic_smin:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smin:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SMIN);
case Intrinsic::amdgcn_raw_buffer_atomic_umin:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umin:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_UMIN);
case Intrinsic::amdgcn_raw_buffer_atomic_smax:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_smax:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SMAX);
case Intrinsic::amdgcn_raw_buffer_atomic_umax:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_umax:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_UMAX);
case Intrinsic::amdgcn_raw_buffer_atomic_and:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_and:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_AND);
case Intrinsic::amdgcn_raw_buffer_atomic_or:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_or:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_OR);
case Intrinsic::amdgcn_raw_buffer_atomic_xor:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_xor:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_XOR);
case Intrinsic::amdgcn_raw_buffer_atomic_inc:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_inc:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_INC);
case Intrinsic::amdgcn_raw_buffer_atomic_dec:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_dec:
return lowerRawBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_DEC);
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_swap:
return lowerStructBufferAtomicIntrin(Op, DAG,
AMDGPUISD::BUFFER_ATOMIC_SWAP);
case Intrinsic::amdgcn_struct_buffer_atomic_add:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_add:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_ADD);
case Intrinsic::amdgcn_struct_buffer_atomic_sub:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_sub:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_SUB);
case Intrinsic::amdgcn_struct_buffer_atomic_smin:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smin:
return lowerStructBufferAtomicIntrin(Op, DAG,
AMDGPUISD::BUFFER_ATOMIC_SMIN);
case Intrinsic::amdgcn_struct_buffer_atomic_umin:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umin:
return lowerStructBufferAtomicIntrin(Op, DAG,
AMDGPUISD::BUFFER_ATOMIC_UMIN);
case Intrinsic::amdgcn_struct_buffer_atomic_smax:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_smax:
return lowerStructBufferAtomicIntrin(Op, DAG,
AMDGPUISD::BUFFER_ATOMIC_SMAX);
case Intrinsic::amdgcn_struct_buffer_atomic_umax:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_umax:
return lowerStructBufferAtomicIntrin(Op, DAG,
AMDGPUISD::BUFFER_ATOMIC_UMAX);
case Intrinsic::amdgcn_struct_buffer_atomic_and:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_and:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_AND);
case Intrinsic::amdgcn_struct_buffer_atomic_or:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_or:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_OR);
case Intrinsic::amdgcn_struct_buffer_atomic_xor:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_xor:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_XOR);
case Intrinsic::amdgcn_struct_buffer_atomic_inc:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_inc:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_INC);
case Intrinsic::amdgcn_struct_buffer_atomic_dec:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_dec:
return lowerStructBufferAtomicIntrin(Op, DAG, AMDGPUISD::BUFFER_ATOMIC_DEC);
case Intrinsic::amdgcn_buffer_atomic_cmpswap: {
@@ -7677,49 +8001,50 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
- updateBufferMMO(M->getMemOperand(), Ops[5], Ops[6], Ops[7], Ops[4]);
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
}
- case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap: {
+ case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
+ case Intrinsic::amdgcn_raw_ptr_buffer_atomic_cmpswap: {
+ SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(4), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
SDValue Ops[] = {
- Op.getOperand(0), // Chain
- Op.getOperand(2), // src
- Op.getOperand(3), // cmp
- Op.getOperand(4), // rsrc
- DAG.getConstant(0, DL, MVT::i32), // vindex
- Offsets.first, // voffset
- Op.getOperand(6), // soffset
- Offsets.second, // offset
- Op.getOperand(7), // cachepolicy
- DAG.getTargetConstant(0, DL, MVT::i1), // idxen
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // src
+ Op.getOperand(3), // cmp
+ Rsrc, // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(6), // soffset
+ Offsets.second, // offset
+ Op.getOperand(7), // cachepolicy
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
- updateBufferMMO(M->getMemOperand(), Ops[5], Ops[6], Ops[7]);
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
}
- case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap: {
+ case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
+ case Intrinsic::amdgcn_struct_ptr_buffer_atomic_cmpswap: {
+ SDValue Rsrc = bufferRsrcPtrToVector(Op->getOperand(4), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(6), DAG);
SDValue Ops[] = {
- Op.getOperand(0), // Chain
- Op.getOperand(2), // src
- Op.getOperand(3), // cmp
- Op.getOperand(4), // rsrc
- Op.getOperand(5), // vindex
- Offsets.first, // voffset
- Op.getOperand(7), // soffset
- Offsets.second, // offset
- Op.getOperand(8), // cachepolicy
- DAG.getTargetConstant(1, DL, MVT::i1), // idxen
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // src
+ Op.getOperand(3), // cmp
+ Rsrc, // rsrc
+ Op.getOperand(5), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(7), // soffset
+ Offsets.second, // offset
+ Op.getOperand(8), // cachepolicy
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
EVT VT = Op.getValueType();
auto *M = cast<MemSDNode>(Op);
- updateBufferMMO(M->getMemOperand(), Ops[5], Ops[6], Ops[7], Ops[4]);
return DAG.getMemIntrinsicNode(AMDGPUISD::BUFFER_ATOMIC_CMPSWAP, DL,
Op->getVTList(), Ops, VT, M->getMemOperand());
@@ -7844,8 +8169,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
}
Ops.push_back(TDescr);
- if (IsA16)
- Ops.push_back(DAG.getTargetConstant(1, DL, MVT::i1));
+ Ops.push_back(DAG.getTargetConstant(IsA16, DL, MVT::i1));
Ops.push_back(M->getChain());
auto *NewNode = DAG.getMachineNode(Opcode, DL, M->getVTList(), Ops);
@@ -7853,11 +8177,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
DAG.setNodeMemRefs(NewNode, {MemRef});
return SDValue(NewNode, 0);
}
- case Intrinsic::amdgcn_global_atomic_fadd: {
- if (!Subtarget->hasAtomicFaddNoRtnInsts())
- return makeV_ILLEGAL(Op, DAG);
- return SDValue();
- }
case Intrinsic::amdgcn_global_atomic_fmin:
case Intrinsic::amdgcn_global_atomic_fmax:
case Intrinsic::amdgcn_flat_atomic_fmin:
@@ -8102,23 +8421,25 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
M->getMemoryVT(), M->getMemOperand());
}
- case Intrinsic::amdgcn_struct_tbuffer_store: {
+ case Intrinsic::amdgcn_struct_tbuffer_store:
+ case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
SDValue VData = Op.getOperand(2);
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
if (IsD16)
VData = handleD16VData(VData, DAG);
+ SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
SDValue Ops[] = {
- Chain,
- VData, // vdata
- Op.getOperand(3), // rsrc
- Op.getOperand(4), // vindex
- Offsets.first, // voffset
- Op.getOperand(6), // soffset
- Offsets.second, // offset
- Op.getOperand(7), // format
- Op.getOperand(8), // cachepolicy, swizzled buffer
- DAG.getTargetConstant(1, DL, MVT::i1), // idxen
+ Chain,
+ VData, // vdata
+ Rsrc, // rsrc
+ Op.getOperand(4), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(6), // soffset
+ Offsets.second, // offset
+ Op.getOperand(7), // format
+ Op.getOperand(8), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -8127,23 +8448,25 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
M->getMemoryVT(), M->getMemOperand());
}
- case Intrinsic::amdgcn_raw_tbuffer_store: {
+ case Intrinsic::amdgcn_raw_tbuffer_store:
+ case Intrinsic::amdgcn_raw_ptr_tbuffer_store: {
SDValue VData = Op.getOperand(2);
bool IsD16 = (VData.getValueType().getScalarType() == MVT::f16);
if (IsD16)
VData = handleD16VData(VData, DAG);
+ SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
SDValue Ops[] = {
- Chain,
- VData, // vdata
- Op.getOperand(3), // rsrc
- DAG.getConstant(0, DL, MVT::i32), // vindex
- Offsets.first, // voffset
- Op.getOperand(5), // soffset
- Offsets.second, // offset
- Op.getOperand(6), // format
- Op.getOperand(7), // cachepolicy, swizzled buffer
- DAG.getTargetConstant(0, DL, MVT::i1), // idxen
+ Chain,
+ VData, // vdata
+ Rsrc, // rsrc
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // format
+ Op.getOperand(7), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
AMDGPUISD::TBUFFER_STORE_FORMAT;
@@ -8178,7 +8501,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
- updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]);
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
EVT VDataType = VData.getValueType().getScalarType();
@@ -8190,9 +8512,12 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
case Intrinsic::amdgcn_raw_buffer_store:
- case Intrinsic::amdgcn_raw_buffer_store_format: {
+ case Intrinsic::amdgcn_raw_ptr_buffer_store:
+ case Intrinsic::amdgcn_raw_buffer_store_format:
+ case Intrinsic::amdgcn_raw_ptr_buffer_store_format: {
const bool IsFormat =
- IntrinsicID == Intrinsic::amdgcn_raw_buffer_store_format;
+ IntrinsicID == Intrinsic::amdgcn_raw_buffer_store_format ||
+ IntrinsicID == Intrinsic::amdgcn_raw_ptr_buffer_store_format;
SDValue VData = Op.getOperand(2);
EVT VDataVT = VData.getValueType();
@@ -8209,23 +8534,23 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
getEquivalentMemType(*DAG.getContext(), VDataVT), VData);
}
+ SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(4), DAG);
SDValue Ops[] = {
- Chain,
- VData,
- Op.getOperand(3), // rsrc
- DAG.getConstant(0, DL, MVT::i32), // vindex
- Offsets.first, // voffset
- Op.getOperand(5), // soffset
- Offsets.second, // offset
- Op.getOperand(6), // cachepolicy, swizzled buffer
- DAG.getTargetConstant(0, DL, MVT::i1), // idxen
+ Chain,
+ VData,
+ Rsrc,
+ DAG.getConstant(0, DL, MVT::i32), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(5), // soffset
+ Offsets.second, // offset
+ Op.getOperand(6), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(0, DL, MVT::i1), // idxen
};
unsigned Opc =
IsFormat ? AMDGPUISD::BUFFER_STORE_FORMAT : AMDGPUISD::BUFFER_STORE;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
- updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6]);
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
if (!IsD16 && !VDataVT.isVector() && EltType.getSizeInBits() < 32)
@@ -8236,9 +8561,12 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
case Intrinsic::amdgcn_struct_buffer_store:
- case Intrinsic::amdgcn_struct_buffer_store_format: {
+ case Intrinsic::amdgcn_struct_ptr_buffer_store:
+ case Intrinsic::amdgcn_struct_buffer_store_format:
+ case Intrinsic::amdgcn_struct_ptr_buffer_store_format: {
const bool IsFormat =
- IntrinsicID == Intrinsic::amdgcn_struct_buffer_store_format;
+ IntrinsicID == Intrinsic::amdgcn_struct_buffer_store_format ||
+ IntrinsicID == Intrinsic::amdgcn_struct_ptr_buffer_store_format;
SDValue VData = Op.getOperand(2);
EVT VDataVT = VData.getValueType();
@@ -8256,23 +8584,23 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
getEquivalentMemType(*DAG.getContext(), VDataVT), VData);
}
+ auto Rsrc = bufferRsrcPtrToVector(Op.getOperand(3), DAG);
auto Offsets = splitBufferOffsets(Op.getOperand(5), DAG);
SDValue Ops[] = {
- Chain,
- VData,
- Op.getOperand(3), // rsrc
- Op.getOperand(4), // vindex
- Offsets.first, // voffset
- Op.getOperand(6), // soffset
- Offsets.second, // offset
- Op.getOperand(7), // cachepolicy, swizzled buffer
- DAG.getTargetConstant(1, DL, MVT::i1), // idxen
+ Chain,
+ VData,
+ Rsrc,
+ Op.getOperand(4), // vindex
+ Offsets.first, // voffset
+ Op.getOperand(6), // soffset
+ Offsets.second, // offset
+ Op.getOperand(7), // cachepolicy, swizzled buffer
+ DAG.getTargetConstant(1, DL, MVT::i1), // idxen
};
- unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ?
- AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
+ unsigned Opc =
+ !IsFormat ? AMDGPUISD::BUFFER_STORE : AMDGPUISD::BUFFER_STORE_FORMAT;
Opc = IsD16 ? AMDGPUISD::BUFFER_STORE_FORMAT_D16 : Opc;
MemSDNode *M = cast<MemSDNode>(Op);
- updateBufferMMO(M->getMemOperand(), Ops[4], Ops[5], Ops[6], Ops[3]);
// Handle BUFFER_STORE_BYTE/SHORT overloaded intrinsics
EVT VDataType = VData.getValueType().getScalarType();
@@ -8283,9 +8611,13 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
M->getMemoryVT(), M->getMemOperand());
}
case Intrinsic::amdgcn_raw_buffer_load_lds:
- case Intrinsic::amdgcn_struct_buffer_load_lds: {
+ case Intrinsic::amdgcn_raw_ptr_buffer_load_lds:
+ case Intrinsic::amdgcn_struct_buffer_load_lds:
+ case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: {
unsigned Opc;
- bool HasVIndex = IntrinsicID == Intrinsic::amdgcn_struct_buffer_load_lds;
+ bool HasVIndex =
+ IntrinsicID == Intrinsic::amdgcn_struct_buffer_load_lds ||
+ IntrinsicID == Intrinsic::amdgcn_struct_ptr_buffer_load_lds;
unsigned OpOffset = HasVIndex ? 1 : 0;
SDValue VOffset = Op.getOperand(5 + OpOffset);
auto CVOffset = dyn_cast<ConstantSDNode>(VOffset);
@@ -8328,7 +8660,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
else if (HasVOffset)
Ops.push_back(VOffset);
- Ops.push_back(Op.getOperand(2)); // rsrc
+ SDValue Rsrc = bufferRsrcPtrToVector(Op.getOperand(2), DAG);
+ Ops.push_back(Rsrc);
Ops.push_back(Op.getOperand(6 + OpOffset)); // soffset
Ops.push_back(Op.getOperand(7 + OpOffset)); // imm offset
unsigned Aux = Op.getConstantOperandVal(8 + OpOffset);
@@ -8341,8 +8674,10 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
auto *M = cast<MemSDNode>(Op);
MachineMemOperand *LoadMMO = M->getMemOperand();
+ // Don't set the offset value here because the pointer points to the base of
+ // the buffer.
MachinePointerInfo LoadPtrI = LoadMMO->getPointerInfo();
- LoadPtrI.Offset = Op->getConstantOperandVal(7 + OpOffset);
+
MachinePointerInfo StorePtrI = LoadPtrI;
StorePtrI.V = nullptr;
StorePtrI.AddrSpace = AMDGPUAS::LOCAL_ADDRESS;
@@ -8450,27 +8785,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
}
}
-SDValue SITargetLowering::makeV_ILLEGAL(SDValue Op, SelectionDAG & DAG) const {
- // Create the V_ILLEGAL node.
- SDLoc DL(Op);
- auto Opcode = Subtarget->getGeneration() < AMDGPUSubtarget::GFX10 ?
- AMDGPU::V_ILLEGAL_gfx6_gfx7_gfx8_gfx9 : AMDGPU::V_ILLEGAL;
- auto EntryNode = DAG.getEntryNode();
- auto IllegalNode = DAG.getMachineNode(Opcode, DL, MVT::Other, EntryNode);
- auto IllegalVal = SDValue(IllegalNode, 0u);
-
- // Add the V_ILLEGAL node to the root chain to prevent its removal.
- auto Chains = SmallVector<SDValue, 2u>();
- Chains.push_back(IllegalVal);
- Chains.push_back(DAG.getRoot());
- auto Root = DAG.getTokenFactor(SDLoc(Chains.back()), Chains);
- DAG.setRoot(Root);
-
- // Merge with UNDEF to satisfy return value requirements.
- auto UndefVal = DAG.getUNDEF(Op.getValueType());
- return DAG.getMergeValues({UndefVal, IllegalVal}, DL);
-}
-
// The raw.(t)buffer and struct.(t)buffer intrinsics have two offset args:
// offset (the offset that is included in bounds checking and swizzling, to be
// split between the instruction's voffset and immoffset fields) and soffset
@@ -8480,7 +8794,7 @@ SDValue SITargetLowering::makeV_ILLEGAL(SDValue Op, SelectionDAG & DAG) const {
std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
SDValue Offset, SelectionDAG &DAG) const {
SDLoc DL(Offset);
- const unsigned MaxImm = 4095;
+ const unsigned MaxImm = SIInstrInfo::getMaxMUBUFImmOffset();
SDValue N0 = Offset;
ConstantSDNode *C1 = nullptr;
@@ -8493,13 +8807,14 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
if (C1) {
unsigned ImmOffset = C1->getZExtValue();
- // If the immediate value is too big for the immoffset field, put the value
- // and -4096 into the immoffset field so that the value that is copied/added
- // for the voffset field is a multiple of 4096, and it stands more chance
- // of being CSEd with the copy/add for another similar load/store.
- // However, do not do that rounding down to a multiple of 4096 if that is a
- // negative number, as it appears to be illegal to have a negative offset
- // in the vgpr, even if adding the immediate offset makes it positive.
+ // If the immediate value is too big for the immoffset field, put only bits
+ // that would normally fit in the immoffset field. The remaining value that
+ // is copied/added for the voffset field is a large power of 2, and it
+ // stands more chance of being CSEd with the copy/add for another similar
+ // load/store.
+ // However, do not do that rounding down if that is a negative
+ // number, as it appears to be illegal to have a negative offset in the
+ // vgpr, even if adding the immediate offset makes it positive.
unsigned Overflow = ImmOffset & ~MaxImm;
ImmOffset -= Overflow;
if ((int32_t)Overflow < 0) {
@@ -8530,12 +8845,12 @@ std::pair<SDValue, SDValue> SITargetLowering::splitBufferOffsets(
void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
SelectionDAG &DAG, SDValue *Offsets,
Align Alignment) const {
+ const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
SDLoc DL(CombinedOffset);
- if (auto C = dyn_cast<ConstantSDNode>(CombinedOffset)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(CombinedOffset)) {
uint32_t Imm = C->getZExtValue();
uint32_t SOffset, ImmOffset;
- if (AMDGPU::splitMUBUFOffset(Imm, SOffset, ImmOffset, Subtarget,
- Alignment)) {
+ if (TII->splitMUBUFOffset(Imm, SOffset, ImmOffset, Alignment)) {
Offsets[0] = DAG.getConstant(0, DL, MVT::i32);
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
@@ -8547,8 +8862,8 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
SDValue N1 = CombinedOffset.getOperand(1);
uint32_t SOffset, ImmOffset;
int Offset = cast<ConstantSDNode>(N1)->getSExtValue();
- if (Offset >= 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset,
- Subtarget, Alignment)) {
+ if (Offset >= 0 &&
+ TII->splitMUBUFOffset(Offset, SOffset, ImmOffset, Alignment)) {
Offsets[0] = N0;
Offsets[1] = DAG.getConstant(SOffset, DL, MVT::i32);
Offsets[2] = DAG.getTargetConstant(ImmOffset, DL, MVT::i32);
@@ -8560,6 +8875,55 @@ void SITargetLowering::setBufferOffsets(SDValue CombinedOffset,
Offsets[2] = DAG.getTargetConstant(0, DL, MVT::i32);
}
+SDValue SITargetLowering::bufferRsrcPtrToVector(SDValue MaybePointer,
+ SelectionDAG &DAG) const {
+ if (!MaybePointer.getValueType().isScalarInteger())
+ return MaybePointer;
+
+ SDLoc DL(MaybePointer);
+
+ SDValue Rsrc = DAG.getBitcast(MVT::v4i32, MaybePointer);
+ return Rsrc;
+}
+
+// Wrap a global or flat pointer into a buffer intrinsic using the flags
+// specified in the intrinsic.
+SDValue SITargetLowering::lowerPointerAsRsrcIntrin(SDNode *Op,
+ SelectionDAG &DAG) const {
+ SDLoc Loc(Op);
+
+ SDValue Pointer = Op->getOperand(1);
+ SDValue Stride = Op->getOperand(2);
+ SDValue NumRecords = Op->getOperand(3);
+ SDValue Flags = Op->getOperand(4);
+
+ auto [LowHalf, HighHalf] = DAG.SplitScalar(Pointer, Loc, MVT::i32, MVT::i32);
+ SDValue Mask = DAG.getConstant(0x0000ffff, Loc, MVT::i32);
+ SDValue Masked = DAG.getNode(ISD::AND, Loc, MVT::i32, HighHalf, Mask);
+ std::optional<uint32_t> ConstStride = std::nullopt;
+ if (auto *ConstNode = dyn_cast<ConstantSDNode>(Stride))
+ ConstStride = ConstNode->getZExtValue();
+
+ SDValue NewHighHalf = Masked;
+ if (!ConstStride || *ConstStride != 0) {
+ SDValue ShiftedStride;
+ if (ConstStride) {
+ ShiftedStride = DAG.getConstant(*ConstStride << 16, Loc, MVT::i32);
+ } else {
+ SDValue ExtStride = DAG.getAnyExtOrTrunc(Stride, Loc, MVT::i32);
+ ShiftedStride =
+ DAG.getNode(ISD::SHL, Loc, MVT::i32, ExtStride,
+ DAG.getShiftAmountConstant(16, MVT::i32, Loc));
+ }
+ NewHighHalf = DAG.getNode(ISD::OR, Loc, MVT::i32, Masked, ShiftedStride);
+ }
+
+ SDValue Rsrc = DAG.getNode(ISD::BUILD_VECTOR, Loc, MVT::v4i32, LowHalf,
+ NewHighHalf, NumRecords, Flags);
+ SDValue RsrcPtr = DAG.getNode(ISD::BITCAST, Loc, MVT::i128, Rsrc);
+ return RsrcPtr;
+}
+
// Handle 8 bit and 16 bit buffer loads
SDValue SITargetLowering::handleByteShortBufferLoads(SelectionDAG &DAG,
EVT LoadVT, SDLoc DL,
@@ -8683,6 +9047,14 @@ SDValue SITargetLowering::widenLoad(LoadSDNode *Ld, DAGCombinerInfo &DCI) const
return DAG.getMergeValues({ Cvt, NewLoad.getValue(1) }, SL);
}
+static bool addressMayBeAccessedAsPrivate(const MachineMemOperand *MMO,
+ const SIMachineFunctionInfo &Info) {
+ // TODO: Should check if the address can definitely not access stack.
+ if (Info.isEntryFunction())
+ return Info.hasFlatScratchInit();
+ return true;
+}
+
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
LoadSDNode *Load = cast<LoadSDNode>(Op);
@@ -8749,7 +9121,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// then we need to use the same legalization rules we use for private.
if (AS == AMDGPUAS::FLAT_ADDRESS &&
!Subtarget->hasMultiDwordFlatScratchAddressing())
- AS = MFI->hasFlatScratchInit() ?
+ AS = addressMayBeAccessedAsPrivate(Load->getMemOperand(), *MFI) ?
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
unsigned NumElements = MemVT.getVectorNumElements();
@@ -8883,26 +9255,30 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
EVT VT = Op.getValueType();
const SDNodeFlags Flags = Op->getFlags();
- bool AllowInaccurateRcp = Flags.hasApproximateFuncs();
-
- // Without !fpmath accuracy information, we can't do more because we don't
- // know exactly whether rcp is accurate enough to meet !fpmath requirement.
- if (!AllowInaccurateRcp)
- return SDValue();
+ bool AllowInaccurateRcp = Flags.hasApproximateFuncs() ||
+ DAG.getTarget().Options.UnsafeFPMath;
if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
+ // Without !fpmath accuracy information, we can't do more because we don't
+ // know exactly whether rcp is accurate enough to meet !fpmath requirement.
+ // f16 is always accurate enough
+ if (!AllowInaccurateRcp && VT != MVT::f16)
+ return SDValue();
+
if (CLHS->isExactlyValue(1.0)) {
// v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
// the CI documentation has a worst case error of 1 ulp.
// OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
// use it as long as we aren't trying to use denormals.
//
- // v_rcp_f16 and v_rsq_f16 DO support denormals.
+ // v_rcp_f16 and v_rsq_f16 DO support denormals and 0.51ulp.
// 1.0 / sqrt(x) -> rsq(x)
// XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP
// error seems really high at 2^29 ULP.
+
+ // XXX - do we need afn for this or is arcp sufficent?
if (RHS.getOpcode() == ISD::FSQRT)
return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0));
@@ -8918,6 +9294,11 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
}
}
+ // For f16 require arcp only.
+ // For f32 require afn+arcp.
+ if (!AllowInaccurateRcp && (VT != MVT::f16 || !Flags.hasAllowReciprocal()))
+ return SDValue();
+
// Turn into multiply by the reciprocal.
// x / y -> x * (1.0 / y)
SDValue Recip = DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
@@ -9017,16 +9398,17 @@ SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const {
// Faster 2.5 ULP division that does not support denormals.
SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
+ SDNodeFlags Flags = Op->getFlags();
SDLoc SL(Op);
SDValue LHS = Op.getOperand(1);
SDValue RHS = Op.getOperand(2);
- SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS);
+ SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS, Flags);
- const APFloat K0Val(BitsToFloat(0x6f800000));
+ const APFloat K0Val(0x1p+96f);
const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32);
- const APFloat K1Val(BitsToFloat(0x2f800000));
+ const APFloat K1Val(0x1p-32f);
const SDValue K1 = DAG.getConstantFP(K1Val, SL, MVT::f32);
const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f32);
@@ -9036,30 +9418,27 @@ SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT);
- SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One);
+ SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One, Flags);
- // TODO: Should this propagate fast-math-flags?
- r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3);
+ r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3, Flags);
// rcp does not support denormals.
- SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1);
+ SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1, Flags);
- SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0);
+ SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0, Flags);
- return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul);
+ return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul, Flags);
}
// Returns immediate value for setting the F32 denorm mode when using the
// S_DENORM_MODE instruction.
-static SDValue getSPDenormModeValue(int SPDenormMode, SelectionDAG &DAG,
- const SDLoc &SL, const GCNSubtarget *ST) {
+static SDValue getSPDenormModeValue(uint32_t SPDenormMode, SelectionDAG &DAG,
+ const SIMachineFunctionInfo *Info,
+ const GCNSubtarget *ST) {
assert(ST->hasDenormModeInst() && "Requires S_DENORM_MODE");
- int DPDenormModeDefault = hasFP64FP16Denormals(DAG.getMachineFunction())
- ? FP_DENORM_FLUSH_NONE
- : FP_DENORM_FLUSH_IN_FLUSH_OUT;
-
- int Mode = SPDenormMode | (DPDenormModeDefault << 2);
- return DAG.getTargetConstant(Mode, SL, MVT::i32);
+ uint32_t DPDenormModeDefault = Info->getMode().fpDenormModeDPValue();
+ uint32_t Mode = SPDenormMode | (DPDenormModeDefault << 2);
+ return DAG.getTargetConstant(Mode, SDLoc(), MVT::i32);
}
SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
@@ -9097,7 +9476,11 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
(1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i32);
- const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
+ const MachineFunction &MF = DAG.getMachineFunction();
+ const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+ const DenormalMode DenormMode = Info->getMode().FP32Denormals;
+
+ const bool HasFP32Denormals = DenormMode == DenormalMode::getIEEE();
if (!HasFP32Denormals) {
// Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
@@ -9109,7 +9492,7 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
SDNode *EnableDenorm;
if (Subtarget->hasDenormModeInst()) {
const SDValue EnableDenormValue =
- getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, SL, Subtarget);
+ getSPDenormModeValue(FP_DENORM_FLUSH_NONE, DAG, Info, Subtarget);
EnableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, BindParamVTs,
DAG.getEntryNode(), EnableDenormValue).getNode();
@@ -9149,10 +9532,13 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
NumeratorScaled, Fma3, Flags);
if (!HasFP32Denormals) {
+ // FIXME: This mishandles dynamic denormal mode. We need to query the
+ // current mode and restore the original.
+
SDNode *DisableDenorm;
if (Subtarget->hasDenormModeInst()) {
- const SDValue DisableDenormValue =
- getSPDenormModeValue(FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, SL, Subtarget);
+ const SDValue DisableDenormValue = getSPDenormModeValue(
+ FP_DENORM_FLUSH_IN_FLUSH_OUT, DAG, Info, Subtarget);
DisableDenorm = DAG.getNode(AMDGPUISD::DENORM_MODE, SL, MVT::Other,
Fma4.getValue(1), DisableDenormValue,
@@ -9260,6 +9646,36 @@ SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
llvm_unreachable("Unexpected type for fdiv");
}
+SDValue SITargetLowering::LowerFFREXP(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc dl(Op);
+ SDValue Val = Op.getOperand(0);
+ EVT VT = Val.getValueType();
+ EVT ResultExpVT = Op->getValueType(1);
+ EVT InstrExpVT = VT == MVT::f16 ? MVT::i16 : MVT::i32;
+
+ SDValue Mant = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, dl, VT,
+ DAG.getTargetConstant(Intrinsic::amdgcn_frexp_mant, dl, MVT::i32), Val);
+
+ SDValue Exp = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, dl, InstrExpVT,
+ DAG.getTargetConstant(Intrinsic::amdgcn_frexp_exp, dl, MVT::i32), Val);
+
+ if (Subtarget->hasFractBug()) {
+ SDValue Fabs = DAG.getNode(ISD::FABS, dl, VT, Val);
+ SDValue Inf = DAG.getConstantFP(
+ APFloat::getInf(SelectionDAG::EVTToAPFloatSemantics(VT)), dl, VT);
+
+ SDValue IsFinite = DAG.getSetCC(dl, MVT::i1, Fabs, Inf, ISD::SETOLT);
+ SDValue Zero = DAG.getConstant(0, dl, InstrExpVT);
+ Exp = DAG.getNode(ISD::SELECT, dl, InstrExpVT, IsFinite, Exp, Zero);
+ Mant = DAG.getNode(ISD::SELECT, dl, VT, IsFinite, Mant, Val);
+ }
+
+ SDValue CastExp = DAG.getSExtOrTrunc(Exp, dl, ResultExpVT);
+ return DAG.getMergeValues({Mant, CastExp}, dl);
+}
+
SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
StoreSDNode *Store = cast<StoreSDNode>(Op);
@@ -9287,7 +9703,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// then we need to use the same legalization rules we use for private.
if (AS == AMDGPUAS::FLAT_ADDRESS &&
!Subtarget->hasMultiDwordFlatScratchAddressing())
- AS = MFI->hasFlatScratchInit() ?
+ AS = addressMayBeAccessedAsPrivate(Store->getMemOperand(), *MFI) ?
AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
unsigned NumElements = VT.getVectorNumElements();
@@ -9338,6 +9754,87 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+SDValue SITargetLowering::lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const {
+ // For double type, the SQRT and RSQ instructions don't have required
+ // precision, we apply Goldschmidt's algorithm to improve the result:
+ //
+ // y0 = rsq(x)
+ // g0 = x * y0
+ // h0 = 0.5 * y0
+ //
+ // r0 = 0.5 - h0 * g0
+ // g1 = g0 * r0 + g0
+ // h1 = h0 * r0 + h0
+ //
+ // r1 = 0.5 - h1 * g1 => d0 = x - g1 * g1
+ // g2 = g1 * r1 + g1 g2 = d0 * h1 + g1
+ // h2 = h1 * r1 + h1
+ //
+ // r2 = 0.5 - h2 * g2 => d1 = x - g2 * g2
+ // g3 = g2 * r2 + g2 g3 = d1 * h1 + g2
+ //
+ // sqrt(x) = g3
+
+ SDNodeFlags Flags = Op->getFlags();
+
+ SDLoc DL(Op);
+
+ SDValue X = Op.getOperand(0);
+ SDValue ScaleConstant = DAG.getConstantFP(0x1.0p-767, DL, MVT::f64);
+
+ SDValue Scaling = DAG.getSetCC(DL, MVT::i1, X, ScaleConstant, ISD::SETOLT);
+
+ SDValue ZeroInt = DAG.getConstant(0, DL, MVT::i32);
+
+ // Scale up input if it is too small.
+ SDValue ScaleUpFactor = DAG.getConstant(256, DL, MVT::i32);
+ SDValue ScaleUp =
+ DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling, ScaleUpFactor, ZeroInt);
+ SDValue SqrtX = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, X, ScaleUp, Flags);
+
+ SDValue SqrtY = DAG.getNode(AMDGPUISD::RSQ, DL, MVT::f64, SqrtX);
+
+ SDValue SqrtS0 = DAG.getNode(ISD::FMUL, DL, MVT::f64, SqrtX, SqrtY);
+
+ SDValue Half = DAG.getConstantFP(0.5, DL, MVT::f64);
+ SDValue SqrtH0 = DAG.getNode(ISD::FMUL, DL, MVT::f64, SqrtY, Half);
+
+ SDValue NegSqrtH0 = DAG.getNode(ISD::FNEG, DL, MVT::f64, SqrtH0);
+ SDValue SqrtR0 = DAG.getNode(ISD::FMA, DL, MVT::f64, NegSqrtH0, SqrtS0, Half);
+
+ SDValue SqrtH1 = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtH0, SqrtR0, SqrtH0);
+
+ SDValue SqrtS1 = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtS0, SqrtR0, SqrtS0);
+
+ SDValue NegSqrtS1 = DAG.getNode(ISD::FNEG, DL, MVT::f64, SqrtS1);
+ SDValue SqrtD0 = DAG.getNode(ISD::FMA, DL, MVT::f64, NegSqrtS1, SqrtS1, SqrtX);
+
+ SDValue SqrtS2 = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtD0, SqrtH1, SqrtS1);
+
+ SDValue NegSqrtS2 = DAG.getNode(ISD::FNEG, DL, MVT::f64, SqrtS2);
+ SDValue SqrtD1 =
+ DAG.getNode(ISD::FMA, DL, MVT::f64, NegSqrtS2, SqrtS2, SqrtX);
+
+ SDValue SqrtRet = DAG.getNode(ISD::FMA, DL, MVT::f64, SqrtD1, SqrtH1, SqrtS2);
+
+ SDValue ScaleDownFactor = DAG.getConstant(-128, DL, MVT::i32);
+ SDValue ScaleDown =
+ DAG.getNode(ISD::SELECT, DL, MVT::i32, Scaling, ScaleDownFactor, ZeroInt);
+ SqrtRet = DAG.getNode(ISD::FLDEXP, DL, MVT::f64, SqrtRet, ScaleDown, Flags);
+
+ // TODO: Switch to fcmp oeq 0 for finite only. Can't fully remove this check
+ // with finite only or nsz because rsq(+/-0) = +/-inf
+
+ // TODO: Check for DAZ and expand to subnormals
+ SDValue IsZeroOrInf =
+ DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, SqrtX,
+ DAG.getTargetConstant(fcZero | fcPosInf, DL, MVT::i32));
+
+ // If x is +INF, +0, or -0, use its original value
+ return DAG.getNode(ISD::SELECT, DL, MVT::f64, IsZeroOrInf, SqrtX, SqrtRet,
+ Flags);
+}
+
SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
@@ -9432,7 +9929,53 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
return SDValue();
}
+SDValue SITargetLowering::performFCopySignCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDValue MagnitudeOp = N->getOperand(0);
+ SDValue SignOp = N->getOperand(1);
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
+
+ // f64 fcopysign is really an f32 copysign on the high bits, so replace the
+ // lower half with a copy.
+ // fcopysign f64:x, _:y -> x.lo32, (fcopysign (f32 x.hi32), _:y)
+ if (MagnitudeOp.getValueType() == MVT::f64) {
+ SDValue MagAsVector = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, MagnitudeOp);
+ SDValue MagLo =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, MagAsVector,
+ DAG.getConstant(0, DL, MVT::i32));
+ SDValue MagHi =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, MagAsVector,
+ DAG.getConstant(1, DL, MVT::i32));
+
+ SDValue HiOp =
+ DAG.getNode(ISD::FCOPYSIGN, DL, MVT::f32, MagHi, SignOp);
+
+ SDValue Vector = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32, MagLo, HiOp);
+
+ return DAG.getNode(ISD::BITCAST, DL, MVT::f64, Vector);
+ }
+
+ if (SignOp.getValueType() != MVT::f64)
+ return SDValue();
+
+ // Reduce width of sign operand, we only need the highest bit.
+ //
+ // fcopysign f64:x, f64:y ->
+ // fcopysign f64:x, (extract_vector_elt (bitcast f64:y to v2f32), 1)
+ // TODO: In some cases it might make sense to go all the way to f16.
+ SDValue SignAsVector = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, SignOp);
+ SDValue SignAsF32 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, SignAsVector,
+ DAG.getConstant(1, DL, MVT::i32));
+
+ return DAG.getNode(ISD::FCOPYSIGN, DL, N->getValueType(0), N->getOperand(0),
+ SignAsF32);
+}
+
// (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2)
+// (shl (or x, c1), c2) -> add (shl x, c2), (shl c1, c2) iff x and c1 share no
+// bits
// This is a variant of
// (mul (add x, c1), c2) -> add (mul x, c2), (mul c1, c2),
@@ -9467,8 +10010,14 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
if (!CAdd)
return SDValue();
- // If the resulting offset is too large, we can't fold it into the addressing
- // mode offset.
+ SelectionDAG &DAG = DCI.DAG;
+
+ if (N0->getOpcode() == ISD::OR &&
+ !DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1)))
+ return SDValue();
+
+ // If the resulting offset is too large, we can't fold it into the
+ // addressing mode offset.
APInt Offset = CAdd->getAPIntValue() << CN1->getAPIntValue();
Type *Ty = MemVT.getTypeForEVT(*DCI.DAG.getContext());
@@ -9478,7 +10027,6 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
if (!isLegalAddressingMode(DCI.DAG.getDataLayout(), AM, Ty, AddrSpace))
return SDValue();
- SelectionDAG &DAG = DCI.DAG;
SDLoc SL(N);
EVT VT = N->getValueType(0);
@@ -9604,7 +10152,7 @@ static uint32_t getConstantPermuteMask(uint32_t C) {
// value 0-3 selects corresponding source byte;
// value 0xc selects zero;
// value 0xff selects 0xff.
-static uint32_t getPermuteMask(SelectionDAG &DAG, SDValue V) {
+static uint32_t getPermuteMask(SDValue V) {
assert(V.getValueSizeInBits() == 32);
if (V.getNumOperands() != 2)
@@ -9620,15 +10168,13 @@ static uint32_t getPermuteMask(SelectionDAG &DAG, SDValue V) {
default:
break;
case ISD::AND:
- if (uint32_t ConstMask = getConstantPermuteMask(C)) {
+ if (uint32_t ConstMask = getConstantPermuteMask(C))
return (0x03020100 & ConstMask) | (0x0c0c0c0c & ~ConstMask);
- }
break;
case ISD::OR:
- if (uint32_t ConstMask = getConstantPermuteMask(C)) {
+ if (uint32_t ConstMask = getConstantPermuteMask(C))
return (0x03020100 & ~ConstMask) | ConstMask;
- }
break;
case ISD::SHL:
@@ -9676,7 +10222,7 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
(Bits == 8 || Bits == 16) && isShiftedMask_64(Mask) && !(Mask & 1)) {
if (auto *CShift = dyn_cast<ConstantSDNode>(LHS->getOperand(1))) {
unsigned Shift = CShift->getZExtValue();
- unsigned NB = CRHS->getAPIntValue().countTrailingZeros();
+ unsigned NB = CRHS->getAPIntValue().countr_zero();
unsigned Offset = NB + Shift;
if ((Offset & (Bits - 1)) == 0) { // Starts at a byte or word boundary.
SDLoc SL(N);
@@ -9787,8 +10333,8 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
N->isDivergent() && TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
- uint32_t LHSMask = getPermuteMask(DAG, LHS);
- uint32_t RHSMask = getPermuteMask(DAG, RHS);
+ uint32_t LHSMask = getPermuteMask(LHS);
+ uint32_t RHSMask = getPermuteMask(RHS);
if (LHSMask != ~0u && RHSMask != ~0u) {
// Canonicalize the expression in an attempt to have fewer unique masks
// and therefore fewer registers used to hold the masks.
@@ -9834,6 +10380,325 @@ SDValue SITargetLowering::performAndCombine(SDNode *N,
return SDValue();
}
+// A key component of v_perm is a mapping between byte position of the src
+// operands, and the byte position of the dest. To provide such, we need: 1. the
+// node that provides x byte of the dest of the OR, and 2. the byte of the node
+// used to provide that x byte. calculateByteProvider finds which node provides
+// a certain byte of the dest of the OR, and calculateSrcByte takes that node,
+// and finds an ultimate src and byte position For example: The supported
+// LoadCombine pattern for vector loads is as follows
+// t1
+// or
+// / \
+// t2 t3
+// zext shl
+// | | \
+// t4 t5 16
+// or anyext
+// / \ |
+// t6 t7 t8
+// srl shl or
+// / | / \ / \
+// t9 t10 t11 t12 t13 t14
+// trunc* 8 trunc* 8 and and
+// | | / | | \
+// t15 t16 t17 t18 t19 t20
+// trunc* 255 srl -256
+// | / \
+// t15 t15 16
+//
+// *In this example, the truncs are from i32->i16
+//
+// calculateByteProvider would find t6, t7, t13, and t14 for bytes 0-3
+// respectively. calculateSrcByte would find (given node) -> ultimate src &
+// byteposition: t6 -> t15 & 1, t7 -> t16 & 0, t13 -> t15 & 0, t14 -> t15 & 3.
+// After finding the mapping, we can combine the tree into vperm t15, t16,
+// 0x05000407
+
+// Find the source and byte position from a node.
+// \p DestByte is the byte position of the dest of the or that the src
+// ultimately provides. \p SrcIndex is the byte of the src that maps to this
+// dest of the or byte. \p Depth tracks how many recursive iterations we have
+// performed.
+static const std::optional<ByteProvider<SDValue>>
+calculateSrcByte(const SDValue Op, uint64_t DestByte, uint64_t SrcIndex = 0,
+ unsigned Depth = 0) {
+ // We may need to recursively traverse a series of SRLs
+ if (Depth >= 6)
+ return std::nullopt;
+
+ switch (Op->getOpcode()) {
+ case ISD::TRUNCATE: {
+ if (Op->getOperand(0).getScalarValueSizeInBits() != 32)
+ return std::nullopt;
+ return calculateSrcByte(Op->getOperand(0), DestByte, SrcIndex, Depth + 1);
+ }
+
+ case ISD::SRL: {
+ auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!ShiftOp)
+ return std::nullopt;
+
+ uint64_t BitShift = ShiftOp->getZExtValue();
+
+ if (BitShift % 8 != 0)
+ return std::nullopt;
+
+ SrcIndex += BitShift / 8;
+
+ return calculateSrcByte(Op->getOperand(0), DestByte, SrcIndex, Depth + 1);
+ }
+
+ default: {
+ if (Op.getScalarValueSizeInBits() != 32)
+ return std::nullopt;
+
+ return ByteProvider<SDValue>::getSrc(Op, DestByte, SrcIndex);
+ }
+ }
+ llvm_unreachable("fully handled switch");
+}
+
+// For a byte position in the result of an Or, traverse the tree and find the
+// node (and the byte of the node) which ultimately provides this {Or,
+// BytePosition}. \p Op is the operand we are currently examining. \p Index is
+// the byte position of the Op that corresponds with the originally requested
+// byte of the Or \p Depth tracks how many recursive iterations we have
+// performed. \p StartingIndex is the originally requested byte of the Or
+static const std::optional<ByteProvider<SDValue>>
+calculateByteProvider(const SDValue &Op, unsigned Index, unsigned Depth,
+ unsigned StartingIndex = 0) {
+ // Finding Src tree of RHS of or typically requires at least 1 additional
+ // depth
+ if (Depth > 6)
+ return std::nullopt;
+
+ unsigned BitWidth = Op.getScalarValueSizeInBits();
+ if (BitWidth % 8 != 0)
+ return std::nullopt;
+ assert(Index < BitWidth / 8 && "invalid index requested");
+
+ switch (Op.getOpcode()) {
+ case ISD::OR: {
+ auto RHS = calculateByteProvider(Op.getOperand(1), Index, Depth + 1,
+ StartingIndex);
+ if (!RHS)
+ return std::nullopt;
+ auto LHS = calculateByteProvider(Op.getOperand(0), Index, Depth + 1,
+ StartingIndex);
+ if (!LHS)
+ return std::nullopt;
+ // A well formed Or will have two ByteProviders for each byte, one of which
+ // is constant zero
+ if (!LHS->isConstantZero() && !RHS->isConstantZero())
+ return std::nullopt;
+ if (!LHS || LHS->isConstantZero())
+ return RHS;
+ if (!RHS || RHS->isConstantZero())
+ return LHS;
+ return std::nullopt;
+ }
+
+ case ISD::AND: {
+ auto BitMaskOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!BitMaskOp)
+ return std::nullopt;
+
+ uint32_t BitMask = BitMaskOp->getZExtValue();
+ // Bits we expect for our StartingIndex
+ uint32_t IndexMask = 0xFF << (Index * 8);
+
+ if ((IndexMask & BitMask) != IndexMask) {
+ // If the result of the and partially provides the byte, then it
+ // is not well formatted
+ if (IndexMask & BitMask)
+ return std::nullopt;
+ return ByteProvider<SDValue>::getConstantZero();
+ }
+
+ return calculateSrcByte(Op->getOperand(0), StartingIndex, Index);
+ }
+
+ case ISD::SRL: {
+ auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!ShiftOp)
+ return std::nullopt;
+
+ uint64_t BitShift = ShiftOp->getZExtValue();
+ if (BitShift % 8)
+ return std::nullopt;
+
+ auto BitsProvided = Op.getScalarValueSizeInBits();
+ if (BitsProvided % 8 != 0)
+ return std::nullopt;
+
+ uint64_t BytesProvided = BitsProvided / 8;
+ uint64_t ByteShift = BitShift / 8;
+ // The dest of shift will have good [0 : (BytesProvided - ByteShift)] bytes.
+ // If the byte we are trying to provide (as tracked by index) falls in this
+ // range, then the SRL provides the byte. The byte of interest of the src of
+ // the SRL is Index + ByteShift
+ return BytesProvided - ByteShift > Index
+ ? calculateSrcByte(Op->getOperand(0), StartingIndex,
+ Index + ByteShift)
+ : ByteProvider<SDValue>::getConstantZero();
+ }
+
+ case ISD::SHL: {
+ auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
+ if (!ShiftOp)
+ return std::nullopt;
+
+ uint64_t BitShift = ShiftOp->getZExtValue();
+ if (BitShift % 8 != 0)
+ return std::nullopt;
+ uint64_t ByteShift = BitShift / 8;
+
+ // If we are shifting by an amount greater than (or equal to)
+ // the index we are trying to provide, then it provides 0s. If not,
+ // then this bytes are not definitively 0s, and the corresponding byte
+ // of interest is Index - ByteShift of the src
+ return Index < ByteShift
+ ? ByteProvider<SDValue>::getConstantZero()
+ : calculateByteProvider(Op.getOperand(0), Index - ByteShift,
+ Depth + 1, StartingIndex);
+ }
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND: {
+ SDValue NarrowOp = Op->getOperand(0);
+ unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return std::nullopt;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ if (Index >= NarrowByteWidth)
+ return Op.getOpcode() == ISD::ZERO_EXTEND
+ ? std::optional<ByteProvider<SDValue>>(
+ ByteProvider<SDValue>::getConstantZero())
+ : std::nullopt;
+ return calculateByteProvider(NarrowOp, Index, Depth + 1, StartingIndex);
+ }
+
+ case ISD::TRUNCATE: {
+ unsigned NarrowBitWidth = Op.getScalarValueSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return std::nullopt;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ if (NarrowByteWidth >= Index) {
+ return calculateByteProvider(Op.getOperand(0), Index, Depth + 1,
+ StartingIndex);
+ }
+
+ return std::nullopt;
+ }
+
+ case ISD::LOAD: {
+ auto L = cast<LoadSDNode>(Op.getNode());
+ unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
+ if (NarrowBitWidth % 8 != 0)
+ return std::nullopt;
+ uint64_t NarrowByteWidth = NarrowBitWidth / 8;
+
+ // If the width of the load does not reach byte we are trying to provide for
+ // and it is not a ZEXTLOAD, then the load does not provide for the byte in
+ // question
+ if (Index >= NarrowByteWidth) {
+ return L->getExtensionType() == ISD::ZEXTLOAD
+ ? std::optional<ByteProvider<SDValue>>(
+ ByteProvider<SDValue>::getConstantZero())
+ : std::nullopt;
+ }
+
+ if (NarrowByteWidth > Index) {
+ return calculateSrcByte(Op, StartingIndex, Index);
+ }
+
+ return std::nullopt;
+ }
+
+ case ISD::BSWAP:
+ return calculateByteProvider(Op->getOperand(0), BitWidth / 8 - Index - 1,
+ Depth + 1, StartingIndex);
+ default: {
+ return std::nullopt;
+ }
+ }
+
+ llvm_unreachable("fully handled switch");
+}
+
+// Returns true if the Operand is a scalar and is 16 bits
+static bool is16BitScalarOp(SDValue &Operand) {
+ switch (Operand.getOpcode()) {
+ case ISD::ANY_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ZERO_EXTEND: {
+ auto OpVT = Operand.getOperand(0).getValueType();
+ return !OpVT.isVector() && OpVT.getSizeInBits() == 16;
+ }
+ case ISD::LOAD: {
+ LoadSDNode *L = cast<LoadSDNode>(Operand.getNode());
+ auto ExtType = cast<LoadSDNode>(L)->getExtensionType();
+ if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::SEXTLOAD ||
+ ExtType == ISD::EXTLOAD) {
+ auto MemVT = L->getMemoryVT();
+ return !MemVT.isVector() && MemVT.getSizeInBits() == 16;
+ }
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+// Returns true if the mask matches consecutive bytes, and the first byte
+// begins at a power of 2 byte offset from 0th byte
+static bool addresses16Bits(int Mask) {
+ int Low8 = Mask & 0xff;
+ int Hi8 = (Mask & 0xff00) >> 8;
+
+ assert(Low8 < 8 && Hi8 < 8);
+ // Are the bytes contiguous in the order of increasing addresses.
+ bool IsConsecutive = (Hi8 - Low8 == 1);
+ // Is the first byte at location that is aligned for 16 bit instructions.
+ // A counter example is taking 2 consecutive bytes starting at the 8th bit.
+ // In this case, we still need code to extract the 16 bit operand, so it
+ // is better to use i8 v_perm
+ bool Is16Aligned = !(Low8 % 2);
+
+ return IsConsecutive && Is16Aligned;
+}
+
+// Do not lower into v_perm if the operands are actually 16 bit
+// and the selected bits (based on PermMask) correspond with two
+// easily addressable 16 bit operands.
+static bool hasEightBitAccesses(uint64_t PermMask, SDValue &Op,
+ SDValue &OtherOp) {
+ int Low16 = PermMask & 0xffff;
+ int Hi16 = (PermMask & 0xffff0000) >> 16;
+
+ // ByteProvider only accepts 32 bit operands
+ assert(Op.getValueType().getSizeInBits() == 32);
+ assert(OtherOp.getValueType().getSizeInBits() == 32);
+
+ auto OpIs16Bit = is16BitScalarOp(Op);
+ auto OtherOpIs16Bit = is16BitScalarOp(Op);
+
+ // If there is a size mismatch, then we must use masking on at least one
+ // operand
+ if (OpIs16Bit != OtherOpIs16Bit)
+ return true;
+
+ // If both operands are 16 bit, return whether or not we cleanly address both
+ if (is16BitScalarOp(Op) && is16BitScalarOp(OtherOp))
+ return !addresses16Bits(Low16) || !addresses16Bits(Hi16);
+
+ // Both are 32 bit operands
+ return true;
+}
+
SDValue SITargetLowering::performOrCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -9884,8 +10749,36 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
if (VT == MVT::i32 && LHS.hasOneUse() && RHS.hasOneUse() &&
N->isDivergent() && TII->pseudoToMCOpcode(AMDGPU::V_PERM_B32_e64) != -1) {
- uint32_t LHSMask = getPermuteMask(DAG, LHS);
- uint32_t RHSMask = getPermuteMask(DAG, RHS);
+
+ // If all the uses of an or need to extract the individual elements, do not
+ // attempt to lower into v_perm
+ auto usesCombinedOperand = [](SDNode *OrUse) {
+ // If we have any non-vectorized use, then it is a candidate for v_perm
+ if (OrUse->getOpcode() != ISD::BITCAST ||
+ !OrUse->getValueType(0).isVector())
+ return true;
+
+ // If we have any non-vectorized use, then it is a candidate for v_perm
+ for (auto VUse : OrUse->uses()) {
+ if (!VUse->getValueType(0).isVector())
+ return true;
+
+ // If the use of a vector is a store, then combining via a v_perm
+ // is beneficial.
+ // TODO -- whitelist more uses
+ for (auto VectorwiseOp : {ISD::STORE, ISD::CopyToReg, ISD::CopyFromReg})
+ if (VUse->getOpcode() == VectorwiseOp)
+ return true;
+ }
+ return false;
+ };
+
+ if (!any_of(N->uses(), usesCombinedOperand))
+ return SDValue();
+
+ uint32_t LHSMask = getPermuteMask(LHS);
+ uint32_t RHSMask = getPermuteMask(RHS);
+
if (LHSMask != ~0u && RHSMask != ~0u) {
// Canonicalize the expression in an attempt to have fewer unique masks
// and therefore fewer registers used to hold the masks.
@@ -9918,6 +10811,71 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
DAG.getConstant(Sel, DL, MVT::i32));
}
}
+ if (LHSMask == ~0u || RHSMask == ~0u) {
+ SmallVector<ByteProvider<SDValue>, 8> PermNodes;
+
+ // VT is known to be MVT::i32, so we need to provide 4 bytes.
+ assert(VT == MVT::i32);
+ for (int i = 0; i < 4; i++) {
+ // Find the ByteProvider that provides the ith byte of the result of OR
+ std::optional<ByteProvider<SDValue>> P =
+ calculateByteProvider(SDValue(N, 0), i, 0, /*StartingIndex = */ i);
+ // TODO support constantZero
+ if (!P || P->isConstantZero())
+ return SDValue();
+
+ PermNodes.push_back(*P);
+ }
+ if (PermNodes.size() != 4)
+ return SDValue();
+
+ int FirstSrc = 0;
+ std::optional<int> SecondSrc;
+ uint64_t permMask = 0x00000000;
+ for (size_t i = 0; i < PermNodes.size(); i++) {
+ auto PermOp = PermNodes[i];
+ // Since the mask is applied to Src1:Src2, Src1 bytes must be offset
+ // by sizeof(Src2) = 4
+ int SrcByteAdjust = 4;
+
+ if (!PermOp.hasSameSrc(PermNodes[FirstSrc])) {
+ if (SecondSrc.has_value())
+ if (!PermOp.hasSameSrc(PermNodes[*SecondSrc]))
+ return SDValue();
+ // Set the index of the second distinct Src node
+ SecondSrc = i;
+ assert(PermNodes[*SecondSrc].Src->getValueType().getSizeInBits() ==
+ 32);
+ SrcByteAdjust = 0;
+ }
+ assert(PermOp.SrcOffset + SrcByteAdjust < 8);
+ assert(!DAG.getDataLayout().isBigEndian());
+ permMask |= (PermOp.SrcOffset + SrcByteAdjust) << (i * 8);
+ }
+
+ SDValue Op = *PermNodes[FirstSrc].Src;
+ SDValue OtherOp = SecondSrc.has_value() ? *PermNodes[*SecondSrc].Src
+ : *PermNodes[FirstSrc].Src;
+
+ // Check that we are not just extracting the bytes in order from an op
+ if (Op == OtherOp) {
+ int Low16 = permMask & 0xffff;
+ int Hi16 = (permMask & 0xffff0000) >> 16;
+
+ bool WellFormedLow = (Low16 == 0x0504) || (Low16 == 0x0100);
+ bool WellFormedHi = (Hi16 == 0x0706) || (Hi16 == 0x0302);
+
+ // The perm op would really just produce Op. So combine into Op
+ if (WellFormedLow && WellFormedHi)
+ return Op;
+ }
+
+ if (hasEightBitAccesses(permMask, Op, OtherOp)) {
+ SDLoc DL(N);
+ return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op, OtherOp,
+ DAG.getConstant(permMask, DL, MVT::i32));
+ }
+ }
}
if (VT != MVT::i64 || DCI.isBeforeLegalizeOps())
@@ -9966,20 +10924,40 @@ SDValue SITargetLowering::performXorCombine(SDNode *N,
if (SDValue RV = reassociateScalarOps(N, DCI.DAG))
return RV;
- EVT VT = N->getValueType(0);
- if (VT != MVT::i64)
- return SDValue();
-
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
- if (CRHS) {
+ SelectionDAG &DAG = DCI.DAG;
+
+ EVT VT = N->getValueType(0);
+ if (CRHS && VT == MVT::i64) {
if (SDValue Split
= splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::XOR, LHS, CRHS))
return Split;
}
+ // Make sure to apply the 64-bit constant splitting fold before trying to fold
+ // fneg-like xors into 64-bit select.
+ if (LHS.getOpcode() == ISD::SELECT && VT == MVT::i32) {
+ // This looks like an fneg, try to fold as a source modifier.
+ if (CRHS && CRHS->getAPIntValue().isSignMask() &&
+ shouldFoldFNegIntoSrc(N, LHS)) {
+ // xor (select c, a, b), 0x80000000 ->
+ // bitcast (select c, (fneg (bitcast a)), (fneg (bitcast b)))
+ SDLoc DL(N);
+ SDValue CastLHS =
+ DAG.getNode(ISD::BITCAST, DL, MVT::f32, LHS->getOperand(1));
+ SDValue CastRHS =
+ DAG.getNode(ISD::BITCAST, DL, MVT::f32, LHS->getOperand(2));
+ SDValue FNegLHS = DAG.getNode(ISD::FNEG, DL, MVT::f32, CastLHS);
+ SDValue FNegRHS = DAG.getNode(ISD::FNEG, DL, MVT::f32, CastRHS);
+ SDValue NewSelect = DAG.getNode(ISD::SELECT, DL, MVT::f32,
+ LHS->getOperand(0), FNegLHS, FNegRHS);
+ return DAG.getNode(ISD::BITCAST, DL, VT, NewSelect);
+ }
+ }
+
return SDValue();
}
@@ -10086,10 +11064,15 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
return true;
if (auto *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
- auto F = CFP->getValueAPF();
+ const auto &F = CFP->getValueAPF();
if (F.isNaN() && F.isSignaling())
return false;
- return !F.isDenormal() || denormalsEnabledForType(DAG, Op.getValueType());
+ if (!F.isDenormal())
+ return true;
+
+ DenormalMode Mode =
+ DAG.getMachineFunction().getDenormalMode(F.getSemantics());
+ return Mode == DenormalMode::getIEEE();
}
// If source is a result of another standard FP operation it is already in
@@ -10111,6 +11094,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
case ISD::FREM:
case ISD::FP_ROUND:
case ISD::FP_EXTEND:
+ case ISD::FLDEXP:
case AMDGPUISD::FMUL_LEGACY:
case AMDGPUISD::FMAD_FTZ:
case AMDGPUISD::RCP:
@@ -10118,11 +11102,12 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
case AMDGPUISD::RSQ_CLAMP:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::RCP_IFLAG:
+ case AMDGPUISD::LOG:
+ case AMDGPUISD::EXP:
case AMDGPUISD::DIV_SCALE:
case AMDGPUISD::DIV_FMAS:
case AMDGPUISD::DIV_FIXUP:
case AMDGPUISD::FRACT:
- case AMDGPUISD::LDEXP:
case AMDGPUISD::CVT_PKRTZ_F16_F32:
case AMDGPUISD::CVT_F32_UBYTE0:
case AMDGPUISD::CVT_F32_UBYTE1:
@@ -10156,6 +11141,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
// snans will be quieted, so we only need to worry about denormals.
if (Subtarget->supportsMinMaxDenormModes() ||
+ // FIXME: denormalsEnabledForType is broken for dynamic
denormalsEnabledForType(DAG, Op.getValueType()))
return true;
@@ -10225,6 +11211,8 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
case Intrinsic::amdgcn_rcp_legacy:
case Intrinsic::amdgcn_rsq_legacy:
case Intrinsic::amdgcn_trig_preop:
+ case Intrinsic::amdgcn_log:
+ case Intrinsic::amdgcn_exp2:
return true;
default:
break;
@@ -10233,6 +11221,7 @@ bool SITargetLowering::isCanonicalized(SelectionDAG &DAG, SDValue Op,
[[fallthrough]];
}
default:
+ // FIXME: denormalsEnabledForType is broken for dynamic
return denormalsEnabledForType(DAG, Op.getValueType()) &&
DAG.isKnownNeverSNaN(Op);
}
@@ -10254,8 +11243,11 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
if (mi_match(Reg, MRI, MIPatternMatch::m_GFCstOrSplat(FCR))) {
if (FCR->Value.isSignaling())
return false;
- return !FCR->Value.isDenormal() ||
- denormalsEnabledForType(MRI.getType(FCR->VReg), MF);
+ if (!FCR->Value.isDenormal())
+ return true;
+
+ DenormalMode Mode = MF.getDenormalMode(FCR->Value.getSemantics());
+ return Mode == DenormalMode::getIEEE();
}
if (MaxDepth == 0)
@@ -10298,6 +11290,7 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
case AMDGPU::G_FMINNUM_IEEE:
case AMDGPU::G_FMAXNUM_IEEE: {
if (Subtarget->supportsMinMaxDenormModes() ||
+ // FIXME: denormalsEnabledForType is broken for dynamic
denormalsEnabledForType(MRI.getType(Reg), MF))
return true;
@@ -10316,6 +11309,8 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
case Intrinsic::amdgcn_fmed3:
case Intrinsic::amdgcn_sin:
case Intrinsic::amdgcn_cos:
+ case Intrinsic::amdgcn_log:
+ case Intrinsic::amdgcn_exp2:
case Intrinsic::amdgcn_log_clamp:
case Intrinsic::amdgcn_rcp:
case Intrinsic::amdgcn_rcp_legacy:
@@ -10352,9 +11347,16 @@ bool SITargetLowering::isCanonicalized(Register Reg, MachineFunction &MF,
SDValue SITargetLowering::getCanonicalConstantFP(
SelectionDAG &DAG, const SDLoc &SL, EVT VT, const APFloat &C) const {
// Flush denormals to 0 if not enabled.
- if (C.isDenormal() && !denormalsEnabledForType(DAG, VT)) {
- return DAG.getConstantFP(APFloat::getZero(C.getSemantics(),
- C.isNegative()), SL, VT);
+ if (C.isDenormal()) {
+ DenormalMode Mode =
+ DAG.getMachineFunction().getDenormalMode(C.getSemantics());
+ if (Mode == DenormalMode::getPreserveSign()) {
+ return DAG.getConstantFP(
+ APFloat::getZero(C.getSemantics(), C.isNegative()), SL, VT);
+ }
+
+ if (Mode != DenormalMode::getIEEE())
+ return SDValue();
}
if (C.isNaN()) {
@@ -10490,45 +11492,41 @@ static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
}
}
-SDValue SITargetLowering::performIntMed3ImmCombine(
- SelectionDAG &DAG, const SDLoc &SL,
- SDValue Op0, SDValue Op1, bool Signed) const {
- ConstantSDNode *K1 = dyn_cast<ConstantSDNode>(Op1);
- if (!K1)
- return SDValue();
+SDValue SITargetLowering::performIntMed3ImmCombine(SelectionDAG &DAG,
+ const SDLoc &SL, SDValue Src,
+ SDValue MinVal,
+ SDValue MaxVal,
+ bool Signed) const {
- ConstantSDNode *K0 = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
- if (!K0)
+ // med3 comes from
+ // min(max(x, K0), K1), K0 < K1
+ // max(min(x, K0), K1), K1 < K0
+ //
+ // "MinVal" and "MaxVal" respectively refer to the rhs of the
+ // min/max op.
+ ConstantSDNode *MinK = dyn_cast<ConstantSDNode>(MinVal);
+ ConstantSDNode *MaxK = dyn_cast<ConstantSDNode>(MaxVal);
+
+ if (!MinK || !MaxK)
return SDValue();
if (Signed) {
- if (K0->getAPIntValue().sge(K1->getAPIntValue()))
+ if (MaxK->getAPIntValue().sge(MinK->getAPIntValue()))
return SDValue();
} else {
- if (K0->getAPIntValue().uge(K1->getAPIntValue()))
+ if (MaxK->getAPIntValue().uge(MinK->getAPIntValue()))
return SDValue();
}
- EVT VT = K0->getValueType(0);
+ EVT VT = MinK->getValueType(0);
unsigned Med3Opc = Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3;
- if (VT == MVT::i32 || (VT == MVT::i16 && Subtarget->hasMed3_16())) {
- return DAG.getNode(Med3Opc, SL, VT,
- Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));
- }
-
- // If there isn't a 16-bit med3 operation, convert to 32-bit.
- if (VT == MVT::i16) {
- MVT NVT = MVT::i32;
- unsigned ExtOp = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
-
- SDValue Tmp1 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(0));
- SDValue Tmp2 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(1));
- SDValue Tmp3 = DAG.getNode(ExtOp, SL, NVT, Op1);
-
- SDValue Med3 = DAG.getNode(Med3Opc, SL, NVT, Tmp1, Tmp2, Tmp3);
- return DAG.getNode(ISD::TRUNCATE, SL, VT, Med3);
- }
+ if (VT == MVT::i32 || (VT == MVT::i16 && Subtarget->hasMed3_16()))
+ return DAG.getNode(Med3Opc, SL, VT, Src, MaxVal, MinVal);
+ // Note: we could also extend to i32 and use i32 med3 if i16 med3 is
+ // not available, but this is unlikely to be profitable as constants
+ // will often need to be materialized & extended, especially on
+ // pre-GFX10 where VOP3 instructions couldn't take literal operands.
return SDValue();
}
@@ -10640,13 +11638,26 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
}
// min(max(x, K0), K1), K0 < K1 -> med3(x, K0, K1)
+ // max(min(x, K0), K1), K1 < K0 -> med3(x, K1, K0)
if (Opc == ISD::SMIN && Op0.getOpcode() == ISD::SMAX && Op0.hasOneUse()) {
- if (SDValue Med3 = performIntMed3ImmCombine(DAG, SDLoc(N), Op0, Op1, true))
+ if (SDValue Med3 = performIntMed3ImmCombine(
+ DAG, SDLoc(N), Op0->getOperand(0), Op1, Op0->getOperand(1), true))
+ return Med3;
+ }
+ if (Opc == ISD::SMAX && Op0.getOpcode() == ISD::SMIN && Op0.hasOneUse()) {
+ if (SDValue Med3 = performIntMed3ImmCombine(
+ DAG, SDLoc(N), Op0->getOperand(0), Op0->getOperand(1), Op1, true))
return Med3;
}
if (Opc == ISD::UMIN && Op0.getOpcode() == ISD::UMAX && Op0.hasOneUse()) {
- if (SDValue Med3 = performIntMed3ImmCombine(DAG, SDLoc(N), Op0, Op1, false))
+ if (SDValue Med3 = performIntMed3ImmCombine(
+ DAG, SDLoc(N), Op0->getOperand(0), Op1, Op0->getOperand(1), false))
+ return Med3;
+ }
+ if (Opc == ISD::UMAX && Op0.getOpcode() == ISD::UMIN && Op0.hasOneUse()) {
+ if (SDValue Med3 = performIntMed3ImmCombine(
+ DAG, SDLoc(N), Op0->getOperand(0), Op0->getOperand(1), Op1, false))
return Med3;
}
@@ -10930,6 +11941,70 @@ SITargetLowering::performInsertVectorEltCombine(SDNode *N,
return DAG.getBuildVector(VecVT, SL, Ops);
}
+/// Return the source of an fp_extend from f16 to f32, or a converted FP
+/// constant.
+static SDValue strictFPExtFromF16(SelectionDAG &DAG, SDValue Src) {
+ if (Src.getOpcode() == ISD::FP_EXTEND &&
+ Src.getOperand(0).getValueType() == MVT::f16) {
+ return Src.getOperand(0);
+ }
+
+ if (auto *CFP = dyn_cast<ConstantFPSDNode>(Src)) {
+ APFloat Val = CFP->getValueAPF();
+ bool LosesInfo = true;
+ Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &LosesInfo);
+ if (!LosesInfo)
+ return DAG.getConstantFP(Val, SDLoc(Src), MVT::f16);
+ }
+
+ return SDValue();
+}
+
+SDValue SITargetLowering::performFPRoundCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ assert(Subtarget->has16BitInsts() && !Subtarget->hasMed3_16() &&
+ "combine only useful on gfx8");
+
+ SDValue TruncSrc = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::f16)
+ return SDValue();
+
+ if (TruncSrc.getOpcode() != AMDGPUISD::FMED3 ||
+ TruncSrc.getValueType() != MVT::f32 || !TruncSrc.hasOneUse())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+
+ // Optimize f16 fmed3 pattern performed on f32. On gfx8 there is no f16 fmed3,
+ // and expanding it with min/max saves 1 instruction vs. casting to f32 and
+ // casting back.
+
+ // fptrunc (f32 (fmed3 (fpext f16:a, fpext f16:b, fpext f16:c))) =>
+ // fmin(fmax(a, b), fmax(fmin(a, b), c))
+ SDValue A = strictFPExtFromF16(DAG, TruncSrc.getOperand(0));
+ if (!A)
+ return SDValue();
+
+ SDValue B = strictFPExtFromF16(DAG, TruncSrc.getOperand(1));
+ if (!B)
+ return SDValue();
+
+ SDValue C = strictFPExtFromF16(DAG, TruncSrc.getOperand(2));
+ if (!C)
+ return SDValue();
+
+ // This changes signaling nan behavior. If an input is a signaling nan, it
+ // would have been quieted by the fpext originally. We don't care because
+ // these are unconstrained ops. If we needed to insert quieting canonicalizes
+ // we would be worse off than just doing the promotion.
+ SDValue A1 = DAG.getNode(ISD::FMINNUM_IEEE, SL, VT, A, B);
+ SDValue B1 = DAG.getNode(ISD::FMAXNUM_IEEE, SL, VT, A, B);
+ SDValue C1 = DAG.getNode(ISD::FMAXNUM_IEEE, SL, VT, A1, C);
+ return DAG.getNode(ISD::FMINNUM_IEEE, SL, VT, B1, C1);
+}
+
unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
const SDNode *N0,
const SDNode *N1) const {
@@ -10937,10 +12012,11 @@ unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
// Only do this if we are not trying to support denormals. v_mad_f32 does not
// support denormals ever.
- if (((VT == MVT::f32 && !hasFP32Denormals(DAG.getMachineFunction())) ||
- (VT == MVT::f16 && !hasFP64FP16Denormals(DAG.getMachineFunction()) &&
- getSubtarget()->hasMadF16())) &&
- isOperationLegal(ISD::FMAD, VT))
+ if (((VT == MVT::f32 &&
+ denormalModeIsFlushAllF32(DAG.getMachineFunction())) ||
+ (VT == MVT::f16 && Subtarget->hasMadF16() &&
+ denormalModeIsFlushAllF64F16(DAG.getMachineFunction()))) &&
+ isOperationLegal(ISD::FMAD, VT))
return ISD::FMAD;
const TargetOptions &Options = DAG.getTarget().Options;
@@ -11093,7 +12169,6 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
// The actual DAG is noisier than the pseudo code, but only due to
// instructions that disassemble values into low and high parts, and
// assemble the final result.
- SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
SDValue One = DAG.getConstant(1, SL, MVT::i32);
auto MulLHSLo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, MulLHS);
@@ -11102,8 +12177,8 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
getMad64_32(DAG, SL, MVT::i64, MulLHSLo, MulRHSLo, AddRHS, MulSignedLo);
if (!MulSignedLo && (!MulLHSUnsigned32 || !MulRHSUnsigned32)) {
- auto AccumLo = DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, Accum, Zero);
- auto AccumHi = DAG.getNode(ISD::EXTRACT_ELEMENT, SL, MVT::i32, Accum, One);
+ SDValue AccumLo, AccumHi;
+ std::tie(AccumLo, AccumHi) = DAG.SplitScalar(Accum, SL, MVT::i32, MVT::i32);
if (!MulLHSUnsigned32) {
auto MulLHSHi =
@@ -11152,11 +12227,11 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
if (VT != MVT::i32 || !DCI.isAfterLegalizeDAG())
return SDValue();
- // add x, zext (setcc) => addcarry x, 0, setcc
- // add x, sext (setcc) => subcarry x, 0, setcc
+ // add x, zext (setcc) => uaddo_carry x, 0, setcc
+ // add x, sext (setcc) => usubo_carry x, 0, setcc
unsigned Opc = LHS.getOpcode();
if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND ||
- Opc == ISD::ANY_EXTEND || Opc == ISD::ADDCARRY)
+ Opc == ISD::ANY_EXTEND || Opc == ISD::UADDO_CARRY)
std::swap(RHS, LHS);
Opc = RHS.getOpcode();
@@ -11172,15 +12247,15 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
break;
SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
- Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::SUBCARRY : ISD::ADDCARRY;
+ Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::USUBO_CARRY : ISD::UADDO_CARRY;
return DAG.getNode(Opc, SL, VTList, Args);
}
- case ISD::ADDCARRY: {
- // add x, (addcarry y, 0, cc) => addcarry x, y, cc
- auto C = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
- if (!C || C->getZExtValue() != 0) break;
+ case ISD::UADDO_CARRY: {
+ // add x, (uaddo_carry y, 0, cc) => uaddo_carry x, y, cc
+ if (!isNullConstant(RHS.getOperand(1)))
+ break;
SDValue Args[] = { LHS, RHS.getOperand(0), RHS.getOperand(2) };
- return DAG.getNode(ISD::ADDCARRY, SDLoc(N), RHS->getVTList(), Args);
+ return DAG.getNode(ISD::UADDO_CARRY, SDLoc(N), RHS->getVTList(), Args);
}
}
return SDValue();
@@ -11198,8 +12273,8 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
- // sub x, zext (setcc) => subcarry x, 0, setcc
- // sub x, sext (setcc) => addcarry x, 0, setcc
+ // sub x, zext (setcc) => usubo_carry x, 0, setcc
+ // sub x, sext (setcc) => uaddo_carry x, 0, setcc
unsigned Opc = RHS.getOpcode();
switch (Opc) {
default: break;
@@ -11213,18 +12288,18 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
break;
SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
- Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::ADDCARRY : ISD::SUBCARRY;
+ Opc = (Opc == ISD::SIGN_EXTEND) ? ISD::UADDO_CARRY : ISD::USUBO_CARRY;
return DAG.getNode(Opc, SL, VTList, Args);
}
}
- if (LHS.getOpcode() == ISD::SUBCARRY) {
- // sub (subcarry x, 0, cc), y => subcarry x, y, cc
+ if (LHS.getOpcode() == ISD::USUBO_CARRY) {
+ // sub (usubo_carry x, 0, cc), y => usubo_carry x, y, cc
auto C = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
if (!C || !C->isZero())
return SDValue();
SDValue Args[] = { LHS.getOperand(0), RHS, LHS.getOperand(2) };
- return DAG.getNode(ISD::SUBCARRY, SDLoc(N), LHS->getVTList(), Args);
+ return DAG.getNode(ISD::USUBO_CARRY, SDLoc(N), LHS->getVTList(), Args);
}
return SDValue();
}
@@ -11235,19 +12310,18 @@ SDValue SITargetLowering::performAddCarrySubCarryCombine(SDNode *N,
if (N->getValueType(0) != MVT::i32)
return SDValue();
- auto C = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!C || C->getZExtValue() != 0)
+ if (!isNullConstant(N->getOperand(1)))
return SDValue();
SelectionDAG &DAG = DCI.DAG;
SDValue LHS = N->getOperand(0);
- // addcarry (add x, y), 0, cc => addcarry x, y, cc
- // subcarry (sub x, y), 0, cc => subcarry x, y, cc
+ // uaddo_carry (add x, y), 0, cc => uaddo_carry x, y, cc
+ // usubo_carry (sub x, y), 0, cc => usubo_carry x, y, cc
unsigned LHSOpc = LHS.getOpcode();
unsigned Opc = N->getOpcode();
- if ((LHSOpc == ISD::ADD && Opc == ISD::ADDCARRY) ||
- (LHSOpc == ISD::SUB && Opc == ISD::SUBCARRY)) {
+ if ((LHSOpc == ISD::ADD && Opc == ISD::UADDO_CARRY) ||
+ (LHSOpc == ISD::SUB && Opc == ISD::USUBO_CARRY)) {
SDValue Args[] = { LHS.getOperand(0), LHS.getOperand(1), N->getOperand(2) };
return DAG.getNode(Opc, SDLoc(N), N->getVTList(), Args);
}
@@ -11599,8 +12673,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performAddCombine(N, DCI);
case ISD::SUB:
return performSubCombine(N, DCI);
- case ISD::ADDCARRY:
- case ISD::SUBCARRY:
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY:
return performAddCarrySubCarryCombine(N, DCI);
case ISD::FADD:
return performFAddCombine(N, DCI);
@@ -11637,12 +12711,12 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performFCanonicalizeCombine(N, DCI);
case AMDGPUISD::RCP:
return performRcpCombine(N, DCI);
+ case ISD::FLDEXP:
case AMDGPUISD::FRACT:
case AMDGPUISD::RSQ:
case AMDGPUISD::RCP_LEGACY:
case AMDGPUISD::RCP_IFLAG:
- case AMDGPUISD::RSQ_CLAMP:
- case AMDGPUISD::LDEXP: {
+ case AMDGPUISD::RSQ_CLAMP: {
// FIXME: This is probably wrong. If src is an sNaN, it won't be quieted
SDValue Src = N->getOperand(0);
if (Src.isUndef())
@@ -11652,6 +12726,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
return performUCharToFloatCombine(N, DCI);
+ case ISD::FCOPYSIGN:
+ return performFCopySignCombine(N, DCI);
case AMDGPUISD::CVT_F32_UBYTE0:
case AMDGPUISD::CVT_F32_UBYTE1:
case AMDGPUISD::CVT_F32_UBYTE2:
@@ -11685,6 +12761,8 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
return performExtractVectorEltCombine(N, DCI);
case ISD::INSERT_VECTOR_ELT:
return performInsertVectorEltCombine(N, DCI);
+ case ISD::FP_ROUND:
+ return performFPRoundCombine(N, DCI);
case ISD::LOAD: {
if (SDValue Widended = widenLoad(cast<LoadSDNode>(N), DCI))
return Widended;
@@ -11778,7 +12856,7 @@ SDNode *SITargetLowering::adjustWritemask(MachineSDNode *&Node,
// Set which texture component corresponds to the lane.
unsigned Comp;
for (unsigned i = 0, Dmask = OldDmask; (i <= Lane) && (Dmask != 0); i++) {
- Comp = countTrailingZeros(Dmask);
+ Comp = llvm::countr_zero(Dmask);
Dmask &= ~(1 << Comp);
}
@@ -12548,6 +13626,15 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
reservePrivateMemoryRegs(getTargetMachine(), MF, *TRI, *Info);
}
+ // TODO: Move this logic to getReservedRegs()
+ // Reserve the SGPR(s) to save/restore EXEC for WWM spill/copy handling.
+ unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
+ Register SReg = ST.isWave32()
+ ? AMDGPU::SGPR_32RegClass.getRegister(MaxNumSGPRs - 1)
+ : TRI->getAlignedHighSGPRForRC(MF, /*Align=*/2,
+ &AMDGPU::SGPR_64RegClass);
+ Info->setSGPRForEXECCopy(SReg);
+
assert(!TRI->isSubRegister(Info->getScratchRSrcReg(),
Info->getStackPtrOffsetReg()));
if (Info->getStackPtrOffsetReg() != AMDGPU::SP_REG)
@@ -12591,6 +13678,41 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
TargetLoweringBase::finalizeLowering(MF);
}
+void SITargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ Known.resetAll();
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ switch (IID) {
+ case Intrinsic::amdgcn_mbcnt_lo:
+ case Intrinsic::amdgcn_mbcnt_hi: {
+ const GCNSubtarget &ST =
+ DAG.getMachineFunction().getSubtarget<GCNSubtarget>();
+ // These return at most the (wavefront size - 1) + src1
+ // As long as src1 is an immediate we can calc known bits
+ KnownBits Src1Known = DAG.computeKnownBits(Op.getOperand(2), Depth + 1);
+ unsigned Src1ValBits = Src1Known.countMaxActiveBits();
+ unsigned MaxActiveBits = std::max(Src1ValBits, ST.getWavefrontSizeLog2());
+ // Cater for potential carry
+ MaxActiveBits += Src1ValBits ? 1 : 0;
+ unsigned Size = Op.getValueType().getSizeInBits();
+ if (MaxActiveBits < Size)
+ Known.Zero.setHighBits(Size - MaxActiveBits);
+ return;
+ }
+ }
+ break;
+ }
+ }
+ return AMDGPUTargetLowering::computeKnownBitsForTargetNode(
+ Op, Known, DemandedElts, DAG, Depth);
+}
+
void SITargetLowering::computeKnownBitsForFrameIndex(
const int FI, KnownBits &Known, const MachineFunction &MF) const {
TargetLowering::computeKnownBitsForFrameIndex(FI, Known, MF);
@@ -12605,7 +13727,7 @@ static void knownBitsForWorkitemID(const GCNSubtarget &ST, GISelKnownBits &KB,
KnownBits &Known, unsigned Dim) {
unsigned MaxValue =
ST.getMaxWorkitemID(KB.getMachineFunction().getFunction(), Dim);
- Known.Zero.setHighBits(countLeadingZeros(MaxValue));
+ Known.Zero.setHighBits(llvm::countl_zero(MaxValue));
}
void SITargetLowering::computeKnownBitsForTargetInstr(
@@ -12636,7 +13758,7 @@ void SITargetLowering::computeKnownBitsForTargetInstr(
// based on the actual size because we don't know if it's accurate or not
// at any given point.
Known.Zero.setHighBits(
- countLeadingZeros(getSubtarget()->getAddressableLocalMemorySize()));
+ llvm::countl_zero(getSubtarget()->getAddressableLocalMemorySize()));
break;
}
}
@@ -12648,6 +13770,30 @@ void SITargetLowering::computeKnownBitsForTargetInstr(
case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
Known.Zero.setHighBits(16);
break;
+ case AMDGPU::G_AMDGPU_SMED3:
+ case AMDGPU::G_AMDGPU_UMED3: {
+ auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs();
+
+ KnownBits Known2;
+ KB.computeKnownBitsImpl(Src2, Known2, DemandedElts, Depth + 1);
+ if (Known2.isUnknown())
+ break;
+
+ KnownBits Known1;
+ KB.computeKnownBitsImpl(Src1, Known1, DemandedElts, Depth + 1);
+ if (Known1.isUnknown())
+ break;
+
+ KnownBits Known0;
+ KB.computeKnownBitsImpl(Src0, Known0, DemandedElts, Depth + 1);
+ if (Known0.isUnknown())
+ break;
+
+ // TODO: Handle LeadZero/LeadOne from UMIN/UMAX handling.
+ Known.Zero = Known0.Zero & Known1.Zero & Known2.Zero;
+ Known.One = Known0.One & Known1.One & Known2.One;
+ break;
+ }
}
}
@@ -12759,9 +13905,9 @@ static bool isCopyFromRegOfInlineAsm(const SDNode *N) {
return false;
}
-bool SITargetLowering::isSDNodeSourceOfDivergence(
- const SDNode *N, FunctionLoweringInfo *FLI,
- LegacyDivergenceAnalysis *KDA) const {
+bool SITargetLowering::isSDNodeSourceOfDivergence(const SDNode *N,
+ FunctionLoweringInfo *FLI,
+ UniformityInfo *UA) const {
switch (N->getOpcode()) {
case ISD::CopyFromReg: {
const RegisterSDNode *R = cast<RegisterSDNode>(N->getOperand(1));
@@ -12774,7 +13920,7 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(
return !TRI->isSGPRReg(MRI, Reg);
if (const Value *V = FLI->getValueFromVirtualReg(R->getReg()))
- return KDA->isDivergent(V);
+ return UA->isDivergent(V);
assert(Reg == FLI->DemoteRegister || isCopyFromRegOfInlineAsm(N));
return !TRI->isSGPRReg(MRI, Reg);
@@ -12794,8 +13940,6 @@ bool SITargetLowering::isSDNodeSourceOfDivergence(
return AMDGPU::isIntrinsicSourceOfDivergence(
cast<ConstantSDNode>(N->getOperand(1))->getZExtValue());
case AMDGPUISD::ATOMIC_CMP_SWAP:
- case AMDGPUISD::ATOMIC_INC:
- case AMDGPUISD::ATOMIC_DEC:
case AMDGPUISD::ATOMIC_LOAD_FMIN:
case AMDGPUISD::ATOMIC_LOAD_FMAX:
case AMDGPUISD::BUFFER_ATOMIC_SWAP:
@@ -12830,10 +13974,10 @@ bool SITargetLowering::denormalsEnabledForType(const SelectionDAG &DAG,
EVT VT) const {
switch (VT.getScalarType().getSimpleVT().SimpleTy) {
case MVT::f32:
- return hasFP32Denormals(DAG.getMachineFunction());
+ return !denormalModeIsFlushAllF32(DAG.getMachineFunction());
case MVT::f64:
case MVT::f16:
- return hasFP64FP16Denormals(DAG.getMachineFunction());
+ return !denormalModeIsFlushAllF64F16(DAG.getMachineFunction());
default:
return false;
}
@@ -12843,10 +13987,10 @@ bool SITargetLowering::denormalsEnabledForType(LLT Ty,
MachineFunction &MF) const {
switch (Ty.getScalarSizeInBits()) {
case 32:
- return hasFP32Denormals(MF);
+ return !denormalModeIsFlushAllF32(MF);
case 64:
case 16:
- return hasFP64FP16Denormals(MF);
+ return !denormalModeIsFlushAllF64F16(MF);
default:
return false;
}
@@ -12930,6 +14074,9 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
if (AMDGPU::isFlatGlobalAddrSpace(AS) &&
Subtarget->hasAtomicFaddNoRtnInsts()) {
+ if (Subtarget->hasGFX940Insts())
+ return AtomicExpansionKind::None;
+
if (unsafeFPAtomicsDisabled(RMW->getFunction()))
return AtomicExpansionKind::CmpXChg;
@@ -13054,6 +14201,8 @@ SITargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
// uniform values (as produced by the mask results of control flow intrinsics)
// used outside of divergent blocks. The phi users need to also be treated as
// always uniform.
+//
+// FIXME: DA is no longer in-use. Does this still apply to UniformityAnalysis?
static bool hasCFUser(const Value *V, SmallPtrSet<const Value *, 16> &Visited,
unsigned WaveSize) {
// FIXME: We assume we never cast the mask results of a control flow
@@ -13153,6 +14302,11 @@ bool SITargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
hasMemSDNodeUser(*N0->use_begin()));
}
+bool SITargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
+ Register N0, Register N1) const {
+ return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks
+}
+
MachineMemOperand::Flags
SITargetLowering::getTargetMMOFlags(const Instruction &I) const {
// Propagate metadata set by AMDGPUAnnotateUniformValues to the MMO of a load.
@@ -13196,37 +14350,36 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
assert(AI->getOperation() == AtomicRMWInst::FAdd &&
"only fadd is supported for now");
- // Given: atomicrmw fadd float* %addr, float %val ordering
+ // Given: atomicrmw fadd ptr %addr, float %val ordering
//
// With this expansion we produce the following code:
// [...]
- // %int8ptr = bitcast float* %addr to i8*
// br label %atomicrmw.check.shared
//
// atomicrmw.check.shared:
- // %is.shared = call i1 @llvm.amdgcn.is.shared(i8* %int8ptr)
+ // %is.shared = call i1 @llvm.amdgcn.is.shared(ptr %addr)
// br i1 %is.shared, label %atomicrmw.shared, label %atomicrmw.check.private
//
// atomicrmw.shared:
- // %cast.shared = addrspacecast float* %addr to float addrspace(3)*
- // %loaded.shared = atomicrmw fadd float addrspace(3)* %cast.shared,
+ // %cast.shared = addrspacecast ptr %addr to ptr addrspace(3)
+ // %loaded.shared = atomicrmw fadd ptr addrspace(3) %cast.shared,
// float %val ordering
// br label %atomicrmw.phi
//
// atomicrmw.check.private:
- // %is.private = call i1 @llvm.amdgcn.is.private(i8* %int8ptr)
+ // %is.private = call i1 @llvm.amdgcn.is.private(ptr %int8ptr)
// br i1 %is.private, label %atomicrmw.private, label %atomicrmw.global
//
// atomicrmw.private:
- // %cast.private = addrspacecast float* %addr to float addrspace(5)*
- // %loaded.private = load float, float addrspace(5)* %cast.private
+ // %cast.private = addrspacecast ptr %addr to ptr addrspace(5)
+ // %loaded.private = load float, ptr addrspace(5) %cast.private
// %val.new = fadd float %loaded.private, %val
- // store float %val.new, float addrspace(5)* %cast.private
+ // store float %val.new, ptr addrspace(5) %cast.private
// br label %atomicrmw.phi
//
// atomicrmw.global:
- // %cast.global = addrspacecast float* %addr to float addrspace(1)*
- // %loaded.global = atomicrmw fadd float addrspace(1)* %cast.global,
+ // %cast.global = addrspacecast ptr %addr to ptr addrspace(1)
+ // %loaded.global = atomicrmw fadd ptr addrspace(1) %cast.global,
// float %val ordering
// br label %atomicrmw.phi
//
@@ -13259,7 +14412,6 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
Value *Val = AI->getValOperand();
Type *ValTy = Val->getType();
Value *Addr = AI->getPointerOperand();
- PointerType *PtrTy = cast<PointerType>(Addr->getType());
auto CreateNewAtomicRMW = [AI](IRBuilder<> &Builder, Value *Addr,
Value *Val) -> Value * {
@@ -13275,30 +14427,27 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
std::prev(BB->end())->eraseFromParent();
Builder.SetInsertPoint(BB);
- Value *Int8Ptr = Builder.CreateBitCast(Addr, Builder.getInt8PtrTy());
Builder.CreateBr(CheckSharedBB);
Builder.SetInsertPoint(CheckSharedBB);
CallInst *IsShared = Builder.CreateIntrinsic(Intrinsic::amdgcn_is_shared, {},
- {Int8Ptr}, nullptr, "is.shared");
+ {Addr}, nullptr, "is.shared");
Builder.CreateCondBr(IsShared, SharedBB, CheckPrivateBB);
Builder.SetInsertPoint(SharedBB);
Value *CastToLocal = Builder.CreateAddrSpaceCast(
- Addr,
- PointerType::getWithSamePointeeType(PtrTy, AMDGPUAS::LOCAL_ADDRESS));
+ Addr, PointerType::get(Ctx, AMDGPUAS::LOCAL_ADDRESS));
Value *LoadedShared = CreateNewAtomicRMW(Builder, CastToLocal, Val);
Builder.CreateBr(PhiBB);
Builder.SetInsertPoint(CheckPrivateBB);
CallInst *IsPrivate = Builder.CreateIntrinsic(
- Intrinsic::amdgcn_is_private, {}, {Int8Ptr}, nullptr, "is.private");
+ Intrinsic::amdgcn_is_private, {}, {Addr}, nullptr, "is.private");
Builder.CreateCondBr(IsPrivate, PrivateBB, GlobalBB);
Builder.SetInsertPoint(PrivateBB);
Value *CastToPrivate = Builder.CreateAddrSpaceCast(
- Addr,
- PointerType::getWithSamePointeeType(PtrTy, AMDGPUAS::PRIVATE_ADDRESS));
+ Addr, PointerType::get(Ctx, AMDGPUAS::PRIVATE_ADDRESS));
Value *LoadedPrivate =
Builder.CreateLoad(ValTy, CastToPrivate, "loaded.private");
Value *NewVal = Builder.CreateFAdd(LoadedPrivate, Val, "val.new");
@@ -13307,8 +14456,7 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
Builder.SetInsertPoint(GlobalBB);
Value *CastToGlobal = Builder.CreateAddrSpaceCast(
- Addr,
- PointerType::getWithSamePointeeType(PtrTy, AMDGPUAS::GLOBAL_ADDRESS));
+ Addr, PointerType::get(Ctx, AMDGPUAS::GLOBAL_ADDRESS));
Value *LoadedGlobal = CreateNewAtomicRMW(Builder, CastToGlobal, Val);
Builder.CreateBr(PhiBB);
@@ -13322,3 +14470,25 @@ void SITargetLowering::emitExpandAtomicRMW(AtomicRMWInst *AI) const {
AI->replaceAllUsesWith(Loaded);
AI->eraseFromParent();
}
+
+LoadInst *
+SITargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
+ IRBuilder<> Builder(AI);
+ auto Order = AI->getOrdering();
+
+ // The optimization removes store aspect of the atomicrmw. Therefore, cache
+ // must be flushed if the atomic ordering had a release semantics. This is
+ // not necessary a fence, a release fence just coincides to do that flush.
+ // Avoid replacing of an atomicrmw with a release semantics.
+ if (isReleaseOrStronger(Order))
+ return nullptr;
+
+ LoadInst *LI = Builder.CreateAlignedLoad(
+ AI->getType(), AI->getPointerOperand(), AI->getAlign());
+ LI->setAtomic(Order, AI->getSyncScopeID());
+ LI->copyMetadata(*AI);
+ LI->takeName(AI);
+ AI->replaceAllUsesWith(LI);
+ AI->eraseFromParent();
+ return LI;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 3b2c58108667..1745c0b9e88e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -87,8 +87,6 @@ private:
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
- SDValue makeV_ILLEGAL(SDValue Op, SelectionDAG &DAG) const;
-
// The raw.tbuffer and struct.tbuffer intrinsics have two offset args: offset
// (the offset that is included in bounds checking and swizzling, to be split
// between the instruction's voffset and immoffset fields) and soffset (the
@@ -108,8 +106,10 @@ private:
SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFFREXP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFSQRTF64(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
@@ -143,6 +143,7 @@ private:
/// Custom lowering for ISD::FP_ROUND for MVT::f16.
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFMINNUM_FMAXNUM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerXMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
@@ -167,6 +168,8 @@ private:
SDValue performUCharToFloatCombine(SDNode *N,
DAGCombinerInfo &DCI) const;
+ SDValue performFCopySignCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
SDValue performSHLPtrCombine(SDNode *N,
unsigned AS,
EVT MemVT,
@@ -191,12 +194,14 @@ private:
SDValue performFPMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
SDValue Op0, SDValue Op1) const;
SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
- SDValue Op0, SDValue Op1, bool Signed) const;
+ SDValue Src, SDValue MinVal, SDValue MaxVal,
+ bool Signed) const;
SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performFMed3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performCvtPkRTZCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performExtractVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performInsertVectorEltCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performFPRoundCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue reassociateScalarOps(SDNode *N, SelectionDAG &DAG) const;
unsigned getFusedOpcode(const SelectionDAG &DAG,
@@ -250,6 +255,17 @@ private:
void setBufferOffsets(SDValue CombinedOffset, SelectionDAG &DAG,
SDValue *Offsets, Align Alignment = Align(4)) const;
+ // Convert the i128 that an addrspace(8) pointer is natively represented as
+ // into the v4i32 that all the buffer intrinsics expect to receive. We can't
+ // add register classes for i128 on pain of the promotion logic going haywire,
+ // so this slightly ugly hack is what we've got. If passed a non-pointer
+ // argument (as would be seen in older buffer intrinsics), does nothing.
+ SDValue bufferRsrcPtrToVector(SDValue MaybePointer, SelectionDAG &DAG) const;
+
+ // Wrap a 64-bit pointer into a v4i32 (which is how all SelectionDAG code
+ // represents ptr addrspace(8)) using the flags specified in the intrinsic.
+ SDValue lowerPointerAsRsrcIntrin(SDNode *Op, SelectionDAG &DAG) const;
+
// Handle 8 bit and 16 bit buffer loads
SDValue handleByteShortBufferLoads(SelectionDAG &DAG, EVT LoadVT, SDLoc DL,
ArrayRef<SDValue> Ops, MemSDNode *M) const;
@@ -272,6 +288,12 @@ public:
bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const override;
+ // While address space 7 should never make it to codegen, it still needs to
+ // have a MVT to prevent some analyses that query this function from breaking,
+ // so, to work around the lack of i160, map it to v5i32.
+ MVT getPointerTy(const DataLayout &DL, unsigned AS) const override;
+ MVT getPointerMemTy(const DataLayout &DL, unsigned AS) const override;
+
bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
MachineFunction &MF,
unsigned IntrinsicID) const override;
@@ -331,6 +353,12 @@ public:
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ unsigned combineRepeatedFPDivisors() const override {
+ // Combine multiple FDIVs with the same divisor into multiple FMULs by the
+ // reciprocal.
+ return 2;
+ }
+
bool supportSplitCSR(MachineFunction *MF) const override;
void initializeSplitCSR(MachineBasicBlock *Entry) const override;
void insertCopiesSplitCSR(
@@ -361,7 +389,7 @@ public:
SmallVectorImpl<SDValue> &MemOpChains,
SDValue Chain) const;
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &DL, SelectionDAG &DAG,
@@ -396,7 +424,6 @@ public:
EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const override;
- bool hasBitPreservingFPLogic(EVT VT) const override;
bool hasAtomicFaddRtnForTy(SDValue &Op) const;
bool enableAggressiveFMAFusion(EVT VT) const override;
bool enableAggressiveFMAFusion(LLT Ty) const override;
@@ -452,6 +479,10 @@ public:
void finalizeLowering(MachineFunction &MF) const override;
+ void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth = 0) const override;
void computeKnownBitsForFrameIndex(int FrameIdx,
KnownBits &Known,
const MachineFunction &MF) const override;
@@ -464,14 +495,17 @@ public:
Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, Register R,
const MachineRegisterInfo &MRI,
unsigned Depth = 0) const override;
- bool isSDNodeSourceOfDivergence(const SDNode *N,
- FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *DA) const override;
+ bool isSDNodeSourceOfDivergence(const SDNode *N, FunctionLoweringInfo *FLI,
+ UniformityInfo *UA) const override;
bool hasMemSDNodeUser(SDNode *N) const;
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
SDValue N1) const override;
+ bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
+ Register N1) const override;
+
bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
unsigned MaxDepth = 5) const;
bool isCanonicalized(Register Reg, MachineFunction &MF,
@@ -495,6 +529,9 @@ public:
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
void emitExpandAtomicRMW(AtomicRMWInst *AI) const override;
+ LoadInst *
+ lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
+
const TargetRegisterClass *getRegClassFor(MVT VT,
bool isDivergent) const override;
bool requiresUniformRegister(MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 851c407bb255..4b0283b27a6f 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -35,7 +35,7 @@
#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/DebugCounter.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/TargetParser/TargetParser.h"
using namespace llvm;
#define DEBUG_TYPE "si-insert-waitcnts"
@@ -57,8 +57,6 @@ namespace {
// associated with the operand. Used for determining whether
// s_waitcnt instruction needs to be emitted.
-#define CNT_MASK(t) (1u << (t))
-
enum InstCounterType { VM_CNT = 0, LGKM_CNT, EXP_CNT, VS_CNT, NUM_INST_CNTS };
} // namespace
@@ -88,19 +86,20 @@ struct RegisterEncoding {
};
enum WaitEventType {
- VMEM_ACCESS, // vector-memory read & write
- VMEM_READ_ACCESS, // vector-memory read
- VMEM_WRITE_ACCESS, // vector-memory write
- LDS_ACCESS, // lds read & write
- GDS_ACCESS, // gds read & write
- SQ_MESSAGE, // send message
- SMEM_ACCESS, // scalar-memory read & write
- EXP_GPR_LOCK, // export holding on its data src
- GDS_GPR_LOCK, // GDS holding on its data and addr src
- EXP_POS_ACCESS, // write to export position
- EXP_PARAM_ACCESS, // write to export parameter
- VMW_GPR_LOCK, // vector-memory write holding on its data src
- EXP_LDS_ACCESS, // read by ldsdir counting as export
+ VMEM_ACCESS, // vector-memory read & write
+ VMEM_READ_ACCESS, // vector-memory read
+ VMEM_WRITE_ACCESS, // vector-memory write that is not scratch
+ SCRATCH_WRITE_ACCESS, // vector-memory write that may be scratch
+ LDS_ACCESS, // lds read & write
+ GDS_ACCESS, // gds read & write
+ SQ_MESSAGE, // send message
+ SMEM_ACCESS, // scalar-memory read & write
+ EXP_GPR_LOCK, // export holding on its data src
+ GDS_GPR_LOCK, // GDS holding on its data and addr src
+ EXP_POS_ACCESS, // write to export position
+ EXP_PARAM_ACCESS, // write to export parameter
+ VMW_GPR_LOCK, // vector-memory write holding on its data src
+ EXP_LDS_ACCESS, // read by ldsdir counting as export
NUM_WAIT_EVENTS,
};
@@ -110,7 +109,7 @@ static const unsigned WaitEventMaskForInst[NUM_INST_CNTS] = {
(1 << SQ_MESSAGE),
(1 << EXP_GPR_LOCK) | (1 << GDS_GPR_LOCK) | (1 << VMW_GPR_LOCK) |
(1 << EXP_PARAM_ACCESS) | (1 << EXP_POS_ACCESS) | (1 << EXP_LDS_ACCESS),
- (1 << VMEM_WRITE_ACCESS)};
+ (1 << VMEM_WRITE_ACCESS) | (1 << SCRATCH_WRITE_ACCESS)};
// The mapping is:
// 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs
@@ -372,11 +371,8 @@ private:
MachinePostDominatorTree *PDT;
struct BlockInfo {
- MachineBasicBlock *MBB;
std::unique_ptr<WaitcntBrackets> Incoming;
bool Dirty = true;
-
- explicit BlockInfo(MachineBasicBlock *MBB) : MBB(MBB) {}
};
MapVector<MachineBasicBlock *, BlockInfo> BlockInfos;
@@ -386,6 +382,10 @@ private:
bool ForceEmitZeroWaitcnts;
bool ForceEmitWaitcnt[NUM_INST_CNTS];
+ // S_ENDPGM instructions before which we should insert a DEALLOC_VGPRS
+ // message.
+ DenseSet<MachineInstr *> ReleaseVGPRInsts;
+
public:
static char ID;
@@ -398,6 +398,7 @@ public:
bool shouldFlushVmCnt(MachineLoop *ML, WaitcntBrackets &Brackets);
bool isPreheaderToFlush(MachineBasicBlock &MBB,
WaitcntBrackets &ScoreBrackets);
+ bool isVMEMOrFlatVMEM(const MachineInstr &MI) const;
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
@@ -418,10 +419,6 @@ public:
return false;
}
- AMDGPU::Waitcnt allZeroWaitcnt() const {
- return AMDGPU::Waitcnt::allZero(ST->hasVscnt());
- }
-
void setForceEmitWaitcnt() {
// For non-debug builds, ForceEmitWaitcnt has been initialized to false;
// For debug builds, get the debug counter info and adjust if need be
@@ -455,13 +452,19 @@ public:
assert(SIInstrInfo::isVMEM(Inst) || SIInstrInfo::isFLAT(Inst));
if (!ST->hasVscnt())
return VMEM_ACCESS;
- if (Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst))
+ if (Inst.mayStore() && !SIInstrInfo::isAtomicRet(Inst)) {
+ // FLAT and SCRATCH instructions may access scratch. Other VMEM
+ // instructions do not.
+ if (SIInstrInfo::isFLAT(Inst) && mayAccessScratchThroughFlat(Inst))
+ return SCRATCH_WRITE_ACCESS;
return VMEM_WRITE_ACCESS;
+ }
return VMEM_READ_ACCESS;
}
bool mayAccessVMEMThroughFlat(const MachineInstr &MI) const;
bool mayAccessLDSThroughFlat(const MachineInstr &MI) const;
+ bool mayAccessScratchThroughFlat(const MachineInstr &MI) const;
bool generateWaitcntInstBefore(MachineInstr &MI,
WaitcntBrackets &ScoreBrackets,
MachineInstr *OldWaitcntInstr,
@@ -1029,7 +1032,18 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
MI.getOpcode() == AMDGPU::SI_RETURN ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return ||
(MI.isReturn() && MI.isCall() && !callWaitsOnFunctionEntry(MI))) {
- Wait = Wait.combined(allZeroWaitcnt());
+ Wait = Wait.combined(AMDGPU::Waitcnt::allZeroExceptVsCnt());
+ }
+ // Identify S_ENDPGM instructions which may have to wait for outstanding VMEM
+ // stores. In this case it can be useful to send a message to explicitly
+ // release all VGPRs before the stores have completed, but it is only safe to
+ // do this if there are no outstanding scratch stores.
+ else if (MI.getOpcode() == AMDGPU::S_ENDPGM ||
+ MI.getOpcode() == AMDGPU::S_ENDPGM_SAVED) {
+ if (ST->getGeneration() >= AMDGPUSubtarget::GFX11 &&
+ ScoreBrackets.getScoreRange(VS_CNT) != 0 &&
+ !ScoreBrackets.hasPendingEvent(SCRATCH_WRITE_ACCESS))
+ ReleaseVGPRInsts.insert(&MI);
}
// Resolve vm waits before gs-done.
else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
@@ -1214,7 +1228,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
// cause an exception. Otherwise, insert an explicit S_WAITCNT 0 here.
if (MI.getOpcode() == AMDGPU::S_BARRIER &&
!ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) {
- Wait = Wait.combined(allZeroWaitcnt());
+ Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt()));
}
// TODO: Remove this work-around, enable the assert for Bug 457939
@@ -1230,7 +1244,7 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
ScoreBrackets.simplifyWaitcnt(Wait);
if (ForceEmitZeroWaitcnts)
- Wait = allZeroWaitcnt();
+ Wait = AMDGPU::Waitcnt::allZeroExceptVsCnt();
if (ForceEmitWaitcnt[VM_CNT])
Wait.VmCnt = 0;
@@ -1238,8 +1252,6 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI,
Wait.ExpCnt = 0;
if (ForceEmitWaitcnt[LGKM_CNT])
Wait.LgkmCnt = 0;
- if (ForceEmitWaitcnt[VS_CNT])
- Wait.VsCnt = 0;
if (FlushVmCnt) {
if (ScoreBrackets.hasPendingEvent(VM_CNT))
@@ -1384,6 +1396,32 @@ bool SIInsertWaitcnts::mayAccessLDSThroughFlat(const MachineInstr &MI) const {
return false;
}
+// This is a flat memory operation. Check to see if it has memory tokens for
+// either scratch or FLAT.
+bool SIInsertWaitcnts::mayAccessScratchThroughFlat(
+ const MachineInstr &MI) const {
+ assert(TII->isFLAT(MI));
+
+ // SCRATCH instructions always access scratch.
+ if (TII->isFLATScratch(MI))
+ return true;
+
+ // GLOBAL instructions never access scratch.
+ if (TII->isFLATGlobal(MI))
+ return false;
+
+ // If there are no memory operands then conservatively assume the flat
+ // operation may access scratch.
+ if (MI.memoperands_empty())
+ return true;
+
+ // See if any memory operand specifies an address space that involves scratch.
+ return any_of(MI.memoperands(), [](const MachineMemOperand *Memop) {
+ unsigned AS = Memop->getAddrSpace();
+ return AS == AMDGPUAS::PRIVATE_ADDRESS || AS == AMDGPUAS::FLAT_ADDRESS;
+ });
+}
+
void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
WaitcntBrackets *ScoreBrackets) {
// Now look at the instruction opcode. If it is a memory access
@@ -1436,7 +1474,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
} else if (Inst.isCall()) {
if (callWaitsOnFunctionReturn(Inst)) {
// Act as a wait on everything
- ScoreBrackets->applyWaitcnt(allZeroWaitcnt());
+ ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt());
} else {
// May need to way wait for anything.
ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt());
@@ -1703,6 +1741,11 @@ bool SIInsertWaitcnts::isPreheaderToFlush(MachineBasicBlock &MBB,
return UpdateCache(false);
}
+bool SIInsertWaitcnts::isVMEMOrFlatVMEM(const MachineInstr &MI) const {
+ return SIInstrInfo::isVMEM(MI) ||
+ (SIInstrInfo::isFLAT(MI) && mayAccessVMEMThroughFlat(MI));
+}
+
// Return true if it is better to flush the vmcnt counter in the preheader of
// the given loop. We currently decide to flush in two situations:
// 1. The loop contains vmem store(s), no vmem load and at least one use of a
@@ -1721,7 +1764,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
for (MachineBasicBlock *MBB : ML->blocks()) {
for (MachineInstr &MI : *MBB) {
- if (SIInstrInfo::isVMEM(MI)) {
+ if (isVMEMOrFlatVMEM(MI)) {
if (MI.mayLoad())
HasVMemLoad = true;
if (MI.mayStore())
@@ -1749,7 +1792,7 @@ bool SIInsertWaitcnts::shouldFlushVmCnt(MachineLoop *ML,
}
}
// VMem load vgpr def
- else if (SIInstrInfo::isVMEM(MI) && MI.mayLoad() && Op.isDef())
+ else if (isVMEMOrFlatVMEM(MI) && MI.mayLoad() && Op.isDef())
for (int RegNo = Interval.first; RegNo < Interval.second; ++RegNo) {
// If we find a register that is loaded inside the loop, 1. and 2.
// are invalidated and we can exit.
@@ -1813,10 +1856,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
;
BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
- if (ST->hasVscnt())
- BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
- .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
- .addImm(0);
Modified = true;
}
@@ -1824,7 +1863,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
// Keep iterating over the blocks in reverse post order, inserting and
// updating s_waitcnt where needed, until a fix point is reached.
for (auto *MBB : ReversePostOrderTraversal<MachineFunction *>(&MF))
- BlockInfos.insert({MBB, BlockInfo(MBB)});
+ BlockInfos.insert({MBB, BlockInfo()});
std::unique_ptr<WaitcntBrackets> Brackets;
bool Repeat;
@@ -1833,6 +1872,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
for (auto BII = BlockInfos.begin(), BIE = BlockInfos.end(); BII != BIE;
++BII) {
+ MachineBasicBlock *MBB = BII->first;
BlockInfo &BI = BII->second;
if (!BI.Dirty)
continue;
@@ -1849,12 +1889,12 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
*Brackets = WaitcntBrackets(ST, Limits, Encoding);
}
- Modified |= insertWaitcntInBlock(MF, *BI.MBB, *Brackets);
+ Modified |= insertWaitcntInBlock(MF, *MBB, *Brackets);
BI.Dirty = false;
if (Brackets->hasPendingEvent()) {
BlockInfo *MoveBracketsToSucc = nullptr;
- for (MachineBasicBlock *Succ : BI.MBB->successors()) {
+ for (MachineBasicBlock *Succ : MBB->successors()) {
auto SuccBII = BlockInfos.find(Succ);
BlockInfo &SuccBI = SuccBII->second;
if (!SuccBI.Incoming) {
@@ -1924,5 +1964,18 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
}
}
+ // Insert DEALLOC_VGPR messages before previously identified S_ENDPGM
+ // instructions.
+ for (MachineInstr *MI : ReleaseVGPRInsts) {
+ if (ST->requiresNopBeforeDeallocVGPRs()) {
+ BuildMI(*MI->getParent(), MI, DebugLoc(), TII->get(AMDGPU::S_NOP))
+ .addImm(0);
+ }
+ BuildMI(*MI->getParent(), MI, DebugLoc(), TII->get(AMDGPU::S_SENDMSG))
+ .addImm(AMDGPU::SendMsg::ID_DEALLOC_VGPRS_GFX11Plus);
+ Modified = true;
+ }
+ ReleaseVGPRInsts.clear();
+
return Modified;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index d86d4e659803..f674777724eb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -153,6 +153,9 @@ class InstSI <dag outs, dag ins, string asm = "",
// This bit indicates that tied source will not be read.
field bit TiedSourceNotRead = 0;
+ // This bit indicates that the instruction is never-uniform/divergent
+ field bit IsNeverUniform = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = SALU;
let TSFlags{1} = VALU;
@@ -234,6 +237,8 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{60} = TiedSourceNotRead;
+ let TSFlags{61} = IsNeverUniform;
+
let SchedRW = [Write32Bit];
let AsmVariantName = AMDGPUAsmVariants.Default;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 2cbc90219334..278cf2b69ee3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -329,8 +329,8 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
const MachineOperand *Offset1Op =
getNamedOperand(LdSt, AMDGPU::OpName::offset1);
- unsigned Offset0 = Offset0Op->getImm();
- unsigned Offset1 = Offset1Op->getImm();
+ unsigned Offset0 = Offset0Op->getImm() & 0xff;
+ unsigned Offset1 = Offset1Op->getImm() & 0xff;
if (Offset0 + 1 != Offset1)
return false;
@@ -537,7 +537,7 @@ static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc,
- const char *Msg = "illegal SGPR to VGPR copy") {
+ const char *Msg = "illegal VGPR to SGPR copy") {
MachineFunction *MF = MBB.getParent();
DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error);
LLVMContext &C = MF->getFunction().getContext();
@@ -578,9 +578,12 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
if (!RegsOverlap) {
for (auto Def = MI, E = MBB.begin(); Def != E; ) {
--Def;
- if (!Def->definesRegister(SrcReg, &RI))
+
+ if (!Def->modifiesRegister(SrcReg, &RI))
continue;
- if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64)
+
+ if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32_e64 ||
+ Def->getOperand(0).getReg() != SrcReg)
break;
MachineOperand &DefOp = Def->getOperand(1);
@@ -615,8 +618,8 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
}
}
- RS.enterBasicBlock(MBB);
- RS.forward(MI);
+ RS.enterBasicBlockEnd(MBB);
+ RS.backward(MI);
// Ideally we want to have three registers for a long reg_sequence copy
// to hide 2 waitstates between v_mov_b32 and accvgpr_write.
@@ -631,11 +634,12 @@ static void indirectCopyToAGPR(const SIInstrInfo &TII,
assert(MBB.getParent()->getRegInfo().isReserved(Tmp) &&
"VGPR used for an intermediate copy should have been reserved.");
- // Only loop through if there are any free registers left, otherwise
- // scavenger may report a fatal error without emergency spill slot
- // or spill with the slot.
- while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
- Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
+ // Only loop through if there are any free registers left. We don't want to
+ // spill.
+ while (RegNo--) {
+ Register Tmp2 = RS.scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI,
+ /* RestoreAfter */ false, 0,
+ /* AllowSpill */ false);
if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs)
break;
Tmp = Tmp2;
@@ -1394,6 +1398,14 @@ static unsigned getIndirectSGPRWriteMovRelPseudo32(unsigned VecSize) {
return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5;
if (VecSize <= 256) // 32 bytes
return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8;
+ if (VecSize <= 288) // 36 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9;
+ if (VecSize <= 320) // 40 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10;
+ if (VecSize <= 352) // 44 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11;
+ if (VecSize <= 384) // 48 bytes
+ return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12;
if (VecSize <= 512) // 64 bytes
return AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16;
if (VecSize <= 1024) // 128 bytes
@@ -1575,6 +1587,30 @@ static unsigned getAVSpillSaveOpcode(unsigned Size) {
}
}
+static unsigned getWWMRegSpillSaveOpcode(unsigned Size) {
+ // Currently, there is only 32-bit WWM register spills needed.
+ if (Size != 4)
+ llvm_unreachable("unknown wwm register spill size");
+
+ return AMDGPU::SI_SPILL_WWM_V32_SAVE;
+}
+
+static unsigned getVectorRegSpillSaveOpcode(Register Reg,
+ const TargetRegisterClass *RC,
+ unsigned Size,
+ const SIRegisterInfo &TRI,
+ const SIMachineFunctionInfo &MFI) {
+ // Choose the right opcode if spilling a WWM register.
+ if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
+ return getWWMRegSpillSaveOpcode(Size);
+
+ if (TRI.isVectorSuperClass(RC))
+ return getAVSpillSaveOpcode(Size);
+
+ return TRI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(Size)
+ : getVGPRSpillSaveOpcode(Size);
+}
+
void SIInstrInfo::storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
bool isKill, int FrameIndex, const TargetRegisterClass *RC,
@@ -1619,11 +1655,8 @@ void SIInstrInfo::storeRegToStackSlot(
return;
}
- unsigned Opcode = RI.isVectorSuperClass(RC)
- ? getAVSpillSaveOpcode(SpillSize)
- : RI.isAGPRClass(RC)
- ? getAGPRSpillSaveOpcode(SpillSize)
- : getVGPRSpillSaveOpcode(SpillSize);
+ unsigned Opcode = getVectorRegSpillSaveOpcode(VReg ? VReg : SrcReg, RC,
+ SpillSize, RI, *MFI);
MFI->setHasSpilledVGPRs();
BuildMI(MBB, MI, DL, get(Opcode))
@@ -1774,6 +1807,29 @@ static unsigned getAVSpillRestoreOpcode(unsigned Size) {
}
}
+static unsigned getWWMRegSpillRestoreOpcode(unsigned Size) {
+ // Currently, there is only 32-bit WWM register spills needed.
+ if (Size != 4)
+ llvm_unreachable("unknown wwm register spill size");
+
+ return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
+}
+
+static unsigned
+getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC,
+ unsigned Size, const SIRegisterInfo &TRI,
+ const SIMachineFunctionInfo &MFI) {
+ // Choose the right opcode if restoring a WWM register.
+ if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
+ return getWWMRegSpillRestoreOpcode(Size);
+
+ if (TRI.isVectorSuperClass(RC))
+ return getAVSpillRestoreOpcode(Size);
+
+ return TRI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(Size)
+ : getVGPRSpillRestoreOpcode(Size);
+}
+
void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
Register DestReg, int FrameIndex,
@@ -1817,11 +1873,8 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
return;
}
- unsigned Opcode = RI.isVectorSuperClass(RC)
- ? getAVSpillRestoreOpcode(SpillSize)
- : RI.isAGPRClass(RC)
- ? getAGPRSpillRestoreOpcode(SpillSize)
- : getVGPRSpillRestoreOpcode(SpillSize);
+ unsigned Opcode = getVectorRegSpillRestoreOpcode(VReg ? VReg : DestReg, RC,
+ SpillSize, RI, *MFI);
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addFrameIndex(FrameIndex) // vaddr
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
@@ -1941,6 +1994,18 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
MI.setDesc(get(AMDGPU::S_AND_B32));
break;
+ case AMDGPU::S_AND_SAVEEXEC_B64_term:
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(get(AMDGPU::S_AND_SAVEEXEC_B64));
+ break;
+
+ case AMDGPU::S_AND_SAVEEXEC_B32_term:
+ // This is only a terminator to get the correct spill code placement during
+ // register allocation.
+ MI.setDesc(get(AMDGPU::S_AND_SAVEEXEC_B32));
+ break;
+
case AMDGPU::V_MOV_B64_PSEUDO: {
Register Dst = MI.getOperand(0).getReg();
Register DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
@@ -2084,6 +2149,10 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V4:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V5:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V8:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V9:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V10:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V11:
+ case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V12:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V16:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B32_V32:
case AMDGPU::S_INDIRECT_REG_WRITE_MOVREL_B64_V1:
@@ -2345,6 +2414,14 @@ SIInstrInfo::expandMovDPP64(MachineInstr &MI) const {
return std::pair(Split[0], Split[1]);
}
+std::optional<DestSourcePair>
+SIInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
+ if (MI.getOpcode() == AMDGPU::WWM_COPY)
+ return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
+
+ return std::nullopt;
+}
+
bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
MachineOperand &Src0,
unsigned Src0OpName,
@@ -2522,6 +2599,7 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
+ const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
// FIXME: Virtual register workaround for RegScavenger not working with empty
// blocks.
@@ -2555,12 +2633,6 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
.addReg(PCReg);
- // FIXME: If spilling is necessary, this will fail because this scavenger has
- // no emergency stack slots. It is non-trivial to spill in this situation,
- // because the restore code needs to be specially placed after the
- // jump. BranchRelaxation then needs to be made aware of the newly inserted
- // block.
- //
// If a spill is needed for the pc register pair, we need to insert a spill
// restore block right before the destination block, and insert a short branch
// into the old destination block's fallthrough predecessor.
@@ -2591,10 +2663,20 @@ void SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
// dest_bb:
// buzz;
- RS->enterBasicBlockEnd(MBB);
- Register Scav = RS->scavengeRegisterBackwards(
- AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC),
- /* RestoreAfter */ false, 0, /* AllowSpill */ false);
+ Register LongBranchReservedReg = MFI->getLongBranchReservedReg();
+ Register Scav;
+
+ // If we've previously reserved a register for long branches
+ // avoid running the scavenger and just use those registers
+ if (LongBranchReservedReg) {
+ RS->enterBasicBlock(MBB);
+ Scav = LongBranchReservedReg;
+ } else {
+ RS->enterBasicBlockEnd(MBB);
+ Scav = RS->scavengeRegisterBackwards(
+ AMDGPU::SReg_64RegClass, MachineBasicBlock::iterator(GetPC),
+ /* RestoreAfter */ false, 0, /* AllowSpill */ false);
+ }
if (Scav) {
RS->setRegUsed(Scav);
MRI.replaceRegWith(PCReg, Scav);
@@ -2720,11 +2802,13 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
case AMDGPU::S_OR_B64_term:
case AMDGPU::S_ANDN2_B64_term:
case AMDGPU::S_AND_B64_term:
+ case AMDGPU::S_AND_SAVEEXEC_B64_term:
case AMDGPU::S_MOV_B32_term:
case AMDGPU::S_XOR_B32_term:
case AMDGPU::S_OR_B32_term:
case AMDGPU::S_ANDN2_B32_term:
case AMDGPU::S_AND_B32_term:
+ case AMDGPU::S_AND_SAVEEXEC_B32_term:
break;
case AMDGPU::SI_IF:
case AMDGPU::SI_ELSE:
@@ -2858,7 +2942,7 @@ bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
if (MRI.getRegClass(FalseReg) != RC)
return false;
- int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
+ int NumInsts = AMDGPU::getRegBitWidth(*RC) / 32;
CondCycles = TrueCycles = FalseCycles = NumInsts; // ???
// Limit to equal cost for branch vs. N v_cndmask_b32s.
@@ -2873,7 +2957,7 @@ bool SIInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
if (MRI.getRegClass(FalseReg) != RC)
return false;
- int NumInsts = AMDGPU::getRegBitWidth(RC->getID()) / 32;
+ int NumInsts = AMDGPU::getRegBitWidth(*RC) / 32;
// Multiples of 8 can do s_cselect_b64
if (NumInsts % 2 == 0)
@@ -3004,6 +3088,7 @@ bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
case AMDGPU::S_MOV_B32:
case AMDGPU::S_MOV_B64:
case AMDGPU::COPY:
+ case AMDGPU::WWM_COPY:
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
case AMDGPU::V_ACCVGPR_READ_B32_e64:
case AMDGPU::V_ACCVGPR_MOV_B32:
@@ -3084,7 +3169,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
assert(UseMI.getOperand(1).getReg().isVirtual());
}
- UseMI.setDesc(get(NewOpc));
+ const MCInstrDesc &NewMCID = get(NewOpc);
+ if (DstReg.isPhysical() &&
+ !RI.getRegClass(NewMCID.operands()[0].RegClass)->contains(DstReg))
+ return false;
+
+ UseMI.setDesc(NewMCID);
UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
return true;
@@ -4352,7 +4442,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
// Adjust for packed 16 bit values
if (D16 && D16->getImm() && !ST.hasUnpackedD16VMem())
- RegCount >>= 1;
+ RegCount = divideCeil(RegCount, 2);
// Adjust if using LWE or TFE
if ((LWE && LWE->getImm()) || (TFE && TFE->getImm()))
@@ -4365,7 +4455,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
const TargetRegisterClass *DstRC = getOpRegClass(MI, DstIdx);
uint32_t DstSize = RI.getRegSizeInBits(*DstRC) / 32;
if (RegCount > DstSize) {
- ErrInfo = "MIMG instruction returns too many registers for dst "
+ ErrInfo = "Image instruction returns too many registers for dst "
"register class";
return false;
}
@@ -4636,9 +4726,12 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
unsigned VAddrWords;
if (IsNSA) {
VAddrWords = SRsrcIdx - VAddr0Idx;
+ if (ST.hasPartialNSAEncoding() && AddrWords > ST.getNSAMaxSize()) {
+ unsigned LastVAddrIdx = SRsrcIdx - 1;
+ VAddrWords += getOpSize(MI, LastVAddrIdx) / 4 - 1;
+ }
} else {
- const TargetRegisterClass *RC = getOpRegClass(MI, VAddr0Idx);
- VAddrWords = MRI.getTargetRegisterInfo()->getRegSizeInBits(*RC) / 32;
+ VAddrWords = getOpSize(MI, VAddr0Idx) / 4;
if (AddrWords > 12)
AddrWords = 16;
}
@@ -4881,6 +4974,51 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
"Unexpected scalar opcode without corresponding vector one!");
}
+void SIInstrInfo::insertScratchExecCopy(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, Register Reg,
+ bool IsSCCLive,
+ SlotIndexes *Indexes) const {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ bool IsWave32 = ST.isWave32();
+ if (IsSCCLive) {
+ // Insert two move instructions, one to save the original value of EXEC and
+ // the other to turn on all bits in EXEC. This is required as we can't use
+ // the single instruction S_OR_SAVEEXEC that clobbers SCC.
+ unsigned MovOpc = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ MCRegister Exec = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ auto StoreExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Reg)
+ .addReg(Exec, RegState::Kill);
+ auto FlipExecMI = BuildMI(MBB, MBBI, DL, TII->get(MovOpc), Exec).addImm(-1);
+ if (Indexes) {
+ Indexes->insertMachineInstrInMaps(*StoreExecMI);
+ Indexes->insertMachineInstrInMaps(*FlipExecMI);
+ }
+ } else {
+ const unsigned OrSaveExec =
+ IsWave32 ? AMDGPU::S_OR_SAVEEXEC_B32 : AMDGPU::S_OR_SAVEEXEC_B64;
+ auto SaveExec =
+ BuildMI(MBB, MBBI, DL, TII->get(OrSaveExec), Reg).addImm(-1);
+ SaveExec->getOperand(3).setIsDead(); // Mark SCC as dead.
+ if (Indexes)
+ Indexes->insertMachineInstrInMaps(*SaveExec);
+ }
+}
+
+void SIInstrInfo::restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, Register Reg,
+ SlotIndexes *Indexes) const {
+ unsigned ExecMov = isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
+ MCRegister Exec = isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ auto ExecRestoreMI =
+ BuildMI(MBB, MBBI, DL, get(ExecMov), Exec).addReg(Reg, RegState::Kill);
+ if (Indexes)
+ Indexes->insertMachineInstrInMaps(*ExecRestoreMI);
+}
+
static const TargetRegisterClass *
adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI,
const MachineRegisterInfo &MRI,
@@ -4979,12 +5117,6 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
Opcode = (Size == 64) ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
const TargetRegisterClass *VRC = RI.getEquivalentVGPRClass(RC);
- const TargetRegisterClass *VRC64 = RI.getVGPR64Class();
- if (RI.getCommonSubClass(VRC64, VRC))
- VRC = VRC64;
- else
- VRC = &AMDGPU::VGPR_32RegClass;
-
Register Reg = MRI.createVirtualRegister(VRC);
DebugLoc DL = MBB->findDebugLoc(I);
BuildMI(*MI.getParent(), I, DL, get(Opcode), Reg).add(MO);
@@ -5585,13 +5717,12 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
}
// Emit the actual waterfall loop, executing the wrapped instruction for each
-// unique value of \p Rsrc across all lanes. In the best case we execute 1
+// unique value of \p ScalarOps across all lanes. In the best case we execute 1
// iteration, in the worst case we execute 64 (once per lane).
-static void
-emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
- MachineBasicBlock &OrigBB, MachineBasicBlock &LoopBB,
- MachineBasicBlock &BodyBB, const DebugLoc &DL,
- MachineOperand &Rsrc) {
+static void emitLoadScalarOpsFromVGPRLoop(
+ const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &OrigBB,
+ MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL,
+ ArrayRef<MachineOperand *> ScalarOps) {
MachineFunction &MF = *OrigBB.getParent();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -5609,73 +5740,106 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
SmallVector<Register, 8> ReadlanePieces;
Register CondReg;
- Register VRsrc = Rsrc.getReg();
- unsigned VRsrcUndef = getUndefRegState(Rsrc.isUndef());
+ for (MachineOperand *ScalarOp : ScalarOps) {
+ unsigned RegSize = TRI->getRegSizeInBits(ScalarOp->getReg(), MRI);
+ unsigned NumSubRegs = RegSize / 32;
+ Register VScalarOp = ScalarOp->getReg();
- unsigned RegSize = TRI->getRegSizeInBits(Rsrc.getReg(), MRI);
- unsigned NumSubRegs = RegSize / 32;
- assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 && "Unhandled register size");
+ if (NumSubRegs == 1) {
+ Register CurReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- for (unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg)
+ .addReg(VScalarOp);
- Register CurRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
- Register CurRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC);
- // Read the next variant <- also loop target.
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
- .addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx));
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg)
+ .addReg(CurReg)
+ .addReg(VScalarOp);
- // Read the next variant <- also loop target.
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
- .addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx + 1));
+ // Combine the comparison results with AND.
+ if (!CondReg) // First.
+ CondReg = NewCondReg;
+ else { // If not the first, we create an AND.
+ Register AndReg = MRI.createVirtualRegister(BoolXExecRC);
+ BuildMI(LoopBB, I, DL, TII.get(AndOpc), AndReg)
+ .addReg(CondReg)
+ .addReg(NewCondReg);
+ CondReg = AndReg;
+ }
- ReadlanePieces.push_back(CurRegLo);
- ReadlanePieces.push_back(CurRegHi);
+ // Update ScalarOp operand to use the SGPR ScalarOp.
+ ScalarOp->setReg(CurReg);
+ ScalarOp->setIsKill();
+ } else {
+ unsigned VScalarOpUndef = getUndefRegState(ScalarOp->isUndef());
+ assert(NumSubRegs % 2 == 0 && NumSubRegs <= 32 &&
+ "Unhandled register size");
- // Comparison is to be done as 64-bit.
- Register CurReg = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), CurReg)
+ for (unsigned Idx = 0; Idx < NumSubRegs; Idx += 2) {
+ Register CurRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+ Register CurRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+
+ // Read the next variant <- also loop target.
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegLo)
+ .addReg(VScalarOp, VScalarOpUndef, TRI->getSubRegFromChannel(Idx));
+
+ // Read the next variant <- also loop target.
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurRegHi)
+ .addReg(VScalarOp, VScalarOpUndef,
+ TRI->getSubRegFromChannel(Idx + 1));
+
+ ReadlanePieces.push_back(CurRegLo);
+ ReadlanePieces.push_back(CurRegHi);
+
+ // Comparison is to be done as 64-bit.
+ Register CurReg = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), CurReg)
.addReg(CurRegLo)
.addImm(AMDGPU::sub0)
.addReg(CurRegHi)
.addImm(AMDGPU::sub1);
- Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC);
- auto Cmp =
- BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), NewCondReg)
- .addReg(CurReg);
- if (NumSubRegs <= 2)
- Cmp.addReg(VRsrc);
- else
- Cmp.addReg(VRsrc, VRsrcUndef, TRI->getSubRegFromChannel(Idx, 2));
-
- // Combine the comparison results with AND.
- if (!CondReg) // First.
- CondReg = NewCondReg;
- else { // If not the first, we create an AND.
- Register AndReg = MRI.createVirtualRegister(BoolXExecRC);
- BuildMI(LoopBB, I, DL, TII.get(AndOpc), AndReg)
+ Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC);
+ auto Cmp = BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64),
+ NewCondReg)
+ .addReg(CurReg);
+ if (NumSubRegs <= 2)
+ Cmp.addReg(VScalarOp);
+ else
+ Cmp.addReg(VScalarOp, VScalarOpUndef,
+ TRI->getSubRegFromChannel(Idx, 2));
+
+ // Combine the comparison results with AND.
+ if (!CondReg) // First.
+ CondReg = NewCondReg;
+ else { // If not the first, we create an AND.
+ Register AndReg = MRI.createVirtualRegister(BoolXExecRC);
+ BuildMI(LoopBB, I, DL, TII.get(AndOpc), AndReg)
.addReg(CondReg)
.addReg(NewCondReg);
- CondReg = AndReg;
- }
- } // End for loop.
-
- auto SRsrcRC = TRI->getEquivalentSGPRClass(MRI.getRegClass(VRsrc));
- Register SRsrc = MRI.createVirtualRegister(SRsrcRC);
+ CondReg = AndReg;
+ }
+ } // End for loop.
+
+ auto SScalarOpRC =
+ TRI->getEquivalentSGPRClass(MRI.getRegClass(VScalarOp));
+ Register SScalarOp = MRI.createVirtualRegister(SScalarOpRC);
+
+ // Build scalar ScalarOp.
+ auto Merge =
+ BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SScalarOp);
+ unsigned Channel = 0;
+ for (Register Piece : ReadlanePieces) {
+ Merge.addReg(Piece).addImm(TRI->getSubRegFromChannel(Channel++));
+ }
- // Build scalar Rsrc.
- auto Merge = BuildMI(LoopBB, I, DL, TII.get(AMDGPU::REG_SEQUENCE), SRsrc);
- unsigned Channel = 0;
- for (Register Piece : ReadlanePieces) {
- Merge.addReg(Piece)
- .addImm(TRI->getSubRegFromChannel(Channel++));
+ // Update ScalarOp operand to use the SGPR ScalarOp.
+ ScalarOp->setReg(SScalarOp);
+ ScalarOp->setIsKill();
+ }
}
- // Update Rsrc operand to use the SGPR Rsrc.
- Rsrc.setReg(SRsrc);
- Rsrc.setIsKill();
-
Register SaveExec = MRI.createVirtualRegister(BoolXExecRC);
MRI.setSimpleHint(SaveExec, CondReg);
@@ -5694,14 +5858,15 @@ emitLoadSRsrcFromVGPRLoop(const SIInstrInfo &TII, MachineRegisterInfo &MRI,
BuildMI(BodyBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB);
}
-// Build a waterfall loop around \p MI, replacing the VGPR \p Rsrc register
+// Build a waterfall loop around \p MI, replacing the VGPR \p ScalarOp register
// with SGPRs by iterating over all unique values across all lanes.
// Returns the loop basic block that now contains \p MI.
static MachineBasicBlock *
-loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
- MachineOperand &Rsrc, MachineDominatorTree *MDT,
- MachineBasicBlock::iterator Begin = nullptr,
- MachineBasicBlock::iterator End = nullptr) {
+loadMBUFScalarOperandsFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
+ ArrayRef<MachineOperand *> ScalarOps,
+ MachineDominatorTree *MDT,
+ MachineBasicBlock::iterator Begin = nullptr,
+ MachineBasicBlock::iterator End = nullptr) {
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -5728,11 +5893,8 @@ loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
MachineBasicBlock::iterator AfterMI = MI;
++AfterMI;
for (auto I = Begin; I != AfterMI; I++) {
- for (auto &MO : I->uses()) {
- if (MO.isReg() && MO.isUse()) {
- MRI.clearKillFlags(MO.getReg());
- }
- }
+ for (auto &MO : I->all_uses())
+ MRI.clearKillFlags(MO.getReg());
}
// To insert the loop we need to split the block. Move everything after this
@@ -5774,7 +5936,7 @@ loadSRsrcFromVGPR(const SIInstrInfo &TII, MachineInstr &MI,
}
}
- emitLoadSRsrcFromVGPRLoop(TII, MRI, MBB, *LoopBB, *BodyBB, DL, Rsrc);
+ emitLoadScalarOpsFromVGPRLoop(TII, MRI, MBB, *LoopBB, *BodyBB, DL, ScalarOps);
// Restore the EXEC mask
MachineBasicBlock::iterator First = RemainderBB->begin();
@@ -5971,11 +6133,11 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
(isMUBUF(MI) || isMTBUF(MI)))) {
MachineOperand *SRsrc = getNamedOperand(MI, AMDGPU::OpName::srsrc);
if (SRsrc && !RI.isSGPRClass(MRI.getRegClass(SRsrc->getReg())))
- CreatedBB = loadSRsrcFromVGPR(*this, MI, *SRsrc, MDT);
+ CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {SRsrc}, MDT);
MachineOperand *SSamp = getNamedOperand(MI, AMDGPU::OpName::ssamp);
if (SSamp && !RI.isSGPRClass(MRI.getRegClass(SSamp->getReg())))
- CreatedBB = loadSRsrcFromVGPR(*this, MI, *SSamp, MDT);
+ CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {SSamp}, MDT);
return CreatedBB;
}
@@ -6003,25 +6165,39 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
while (End != MBB.end() && End->isCopy() && End->getOperand(1).isReg() &&
MI.definesRegister(End->getOperand(1).getReg()))
++End;
- CreatedBB = loadSRsrcFromVGPR(*this, MI, *Dest, MDT, Start, End);
+ CreatedBB =
+ loadMBUFScalarOperandsFromVGPR(*this, MI, {Dest}, MDT, Start, End);
}
}
- // Legalize MUBUF* instructions.
+ // Legalize MUBUF instructions.
+ bool isSoffsetLegal = true;
+ int SoffsetIdx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::soffset);
+ if (SoffsetIdx != -1) {
+ MachineOperand *Soffset = &MI.getOperand(SoffsetIdx);
+ if (Soffset->isReg() &&
+ !RI.isSGPRClass(MRI.getRegClass(Soffset->getReg()))) {
+ isSoffsetLegal = false;
+ }
+ }
+
+ bool isRsrcLegal = true;
int RsrcIdx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
if (RsrcIdx != -1) {
- // We have an MUBUF instruction
MachineOperand *Rsrc = &MI.getOperand(RsrcIdx);
- unsigned RsrcRC = get(MI.getOpcode()).operands()[RsrcIdx].RegClass;
- if (RI.getCommonSubClass(MRI.getRegClass(Rsrc->getReg()),
- RI.getRegClass(RsrcRC))) {
- // The operands are legal.
- // FIXME: We may need to legalize operands besides srsrc.
- return CreatedBB;
+ if (Rsrc->isReg() && !RI.isSGPRClass(MRI.getRegClass(Rsrc->getReg()))) {
+ isRsrcLegal = false;
}
+ }
- // Legalize a VGPR Rsrc.
+ // The operands are legal.
+ if (isRsrcLegal && isSoffsetLegal)
+ return CreatedBB;
+
+ if (!isRsrcLegal) {
+ // Legalize a VGPR Rsrc
//
// If the instruction is _ADDR64, we can avoid a waterfall by extracting
// the base pointer from the VGPR Rsrc, adding it to the VAddr, then using
@@ -6034,6 +6210,7 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
// Otherwise we are on non-ADDR64 hardware, and/or we have
// idxen/offen/bothen and we fall back to a waterfall loop.
+ MachineOperand *Rsrc = &MI.getOperand(RsrcIdx);
MachineBasicBlock &MBB = *MI.getParent();
MachineOperand *VAddr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
@@ -6143,433 +6320,447 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
.addReg(RsrcPtr, 0, AMDGPU::sub1)
.addImm(AMDGPU::sub1);
} else {
- // This is another variant; legalize Rsrc with waterfall loop from VGPRs
- // to SGPRs.
- CreatedBB = loadSRsrcFromVGPR(*this, MI, *Rsrc, MDT);
+ // Legalize a VGPR Rsrc and soffset together.
+ if (!isSoffsetLegal) {
+ MachineOperand *Soffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
+ CreatedBB =
+ loadMBUFScalarOperandsFromVGPR(*this, MI, {Rsrc, Soffset}, MDT);
+ return CreatedBB;
+ }
+ CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {Rsrc}, MDT);
return CreatedBB;
}
}
+
+ // Legalize a VGPR soffset.
+ if (!isSoffsetLegal) {
+ MachineOperand *Soffset = getNamedOperand(MI, AMDGPU::OpName::soffset);
+ CreatedBB = loadMBUFScalarOperandsFromVGPR(*this, MI, {Soffset}, MDT);
+ return CreatedBB;
+ }
return CreatedBB;
}
-MachineBasicBlock *SIInstrInfo::moveToVALU(MachineInstr &TopInst,
- MachineDominatorTree *MDT) const {
- SetVectorType Worklist;
- Worklist.insert(&TopInst);
- MachineBasicBlock *CreatedBB = nullptr;
- MachineBasicBlock *CreatedBBTmp = nullptr;
-
- while (!Worklist.empty()) {
- MachineInstr &Inst = *Worklist.pop_back_val();
- MachineBasicBlock *MBB = Inst.getParent();
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-
- unsigned Opcode = Inst.getOpcode();
- unsigned NewOpcode = getVALUOp(Inst);
-
- // Handle some special cases
- switch (Opcode) {
- default:
- break;
- case AMDGPU::S_ADD_U64_PSEUDO:
- case AMDGPU::S_SUB_U64_PSEUDO:
- splitScalar64BitAddSub(Worklist, Inst, MDT);
- Inst.eraseFromParent();
- continue;
- case AMDGPU::S_ADD_I32:
- case AMDGPU::S_SUB_I32: {
- // FIXME: The u32 versions currently selected use the carry.
- bool Changed;
- std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
- if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp)
- CreatedBB = CreatedBBTmp;
- if (Changed)
- continue;
-
- // Default handling
- break;
- }
- case AMDGPU::S_AND_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
- Inst.eraseFromParent();
- continue;
-
- case AMDGPU::S_OR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
- Inst.eraseFromParent();
- continue;
-
- case AMDGPU::S_XOR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
- Inst.eraseFromParent();
- continue;
-
- case AMDGPU::S_NAND_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
- Inst.eraseFromParent();
- continue;
-
- case AMDGPU::S_NOR_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
- Inst.eraseFromParent();
- continue;
+void SIInstrWorklist::insert(MachineInstr *MI) {
+ InstrList.insert(MI);
+ // Add MBUF instructiosn to deferred list.
+ int RsrcIdx =
+ AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::srsrc);
+ if (RsrcIdx != -1) {
+ DeferredList.insert(MI);
+ }
+}
- case AMDGPU::S_XNOR_B64:
- if (ST.hasDLInsts())
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
- else
- splitScalar64BitXnor(Worklist, Inst, MDT);
- Inst.eraseFromParent();
- continue;
+bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
+ return DeferredList.contains(MI);
+}
- case AMDGPU::S_ANDN2_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
- Inst.eraseFromParent();
- continue;
+void SIInstrInfo::moveToVALU(SIInstrWorklist &Worklist,
+ MachineDominatorTree *MDT) const {
- case AMDGPU::S_ORN2_B64:
- splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
- Inst.eraseFromParent();
+ while (!Worklist.empty()) {
+ MachineInstr &Inst = *Worklist.top();
+ Worklist.erase_top();
+ // Skip MachineInstr in the deferred list.
+ if (Worklist.isDeferred(&Inst))
continue;
+ moveToVALUImpl(Worklist, MDT, Inst);
+ }
- case AMDGPU::S_BREV_B64:
- splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32, true);
- Inst.eraseFromParent();
- continue;
+ // Deferred list of instructions will be processed once
+ // all the MachineInstr in the worklist are done.
+ for (MachineInstr *Inst : Worklist.getDeferredList()) {
+ moveToVALUImpl(Worklist, MDT, *Inst);
+ assert(Worklist.empty() &&
+ "Deferred MachineInstr are not supposed to re-populate worklist");
+ }
+}
- case AMDGPU::S_NOT_B64:
- splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
- Inst.eraseFromParent();
- continue;
+void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
+ MachineDominatorTree *MDT,
+ MachineInstr &Inst) const {
- case AMDGPU::S_BCNT1_I32_B64:
- splitScalar64BitBCNT(Worklist, Inst);
- Inst.eraseFromParent();
- continue;
+ MachineBasicBlock *MBB = Inst.getParent();
+ if (!MBB)
+ return;
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+ unsigned Opcode = Inst.getOpcode();
+ unsigned NewOpcode = getVALUOp(Inst);
+ // Handle some special cases
+ switch (Opcode) {
+ default:
+ break;
+ case AMDGPU::S_ADD_U64_PSEUDO:
+ case AMDGPU::S_SUB_U64_PSEUDO:
+ splitScalar64BitAddSub(Worklist, Inst, MDT);
+ Inst.eraseFromParent();
+ return;
+ case AMDGPU::S_ADD_I32:
+ case AMDGPU::S_SUB_I32: {
+ // FIXME: The u32 versions currently selected use the carry.
+ bool Changed;
+ MachineBasicBlock *CreatedBBTmp = nullptr;
+ std::tie(Changed, CreatedBBTmp) = moveScalarAddSub(Worklist, Inst, MDT);
+ if (Changed)
+ return;
- case AMDGPU::S_BFE_I64:
- splitScalar64BitBFE(Worklist, Inst);
- Inst.eraseFromParent();
- continue;
+ // Default handling
+ break;
+ }
+ case AMDGPU::S_AND_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32, MDT);
+ Inst.eraseFromParent();
+ return;
- case AMDGPU::S_LSHL_B32:
- if (ST.hasOnlyRevVALUShifts()) {
- NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
- swapOperands(Inst);
- }
- break;
- case AMDGPU::S_ASHR_I32:
- if (ST.hasOnlyRevVALUShifts()) {
- NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
- swapOperands(Inst);
- }
- break;
- case AMDGPU::S_LSHR_B32:
- if (ST.hasOnlyRevVALUShifts()) {
- NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
- swapOperands(Inst);
- }
- break;
- case AMDGPU::S_LSHL_B64:
- if (ST.hasOnlyRevVALUShifts()) {
- NewOpcode = AMDGPU::V_LSHLREV_B64_e64;
- swapOperands(Inst);
- }
- break;
- case AMDGPU::S_ASHR_I64:
- if (ST.hasOnlyRevVALUShifts()) {
- NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
- swapOperands(Inst);
- }
- break;
- case AMDGPU::S_LSHR_B64:
- if (ST.hasOnlyRevVALUShifts()) {
- NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
- swapOperands(Inst);
- }
- break;
+ case AMDGPU::S_OR_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32, MDT);
+ Inst.eraseFromParent();
+ return;
- case AMDGPU::S_ABS_I32:
- lowerScalarAbs(Worklist, Inst);
- Inst.eraseFromParent();
- continue;
+ case AMDGPU::S_XOR_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32, MDT);
+ Inst.eraseFromParent();
+ return;
- case AMDGPU::S_CBRANCH_SCC0:
- case AMDGPU::S_CBRANCH_SCC1: {
- // Clear unused bits of vcc
- Register CondReg = Inst.getOperand(1).getReg();
- bool IsSCC = CondReg == AMDGPU::SCC;
- Register VCC = RI.getVCC();
- Register EXEC = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
- unsigned Opc = ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
- BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(Opc), VCC)
- .addReg(EXEC)
- .addReg(IsSCC ? VCC : CondReg);
- Inst.removeOperand(1);
- }
- break;
+ case AMDGPU::S_NAND_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NAND_B32, MDT);
+ Inst.eraseFromParent();
+ return;
- case AMDGPU::S_BFE_U64:
- case AMDGPU::S_BFM_B64:
- llvm_unreachable("Moving this op to VALU not implemented");
+ case AMDGPU::S_NOR_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_NOR_B32, MDT);
+ Inst.eraseFromParent();
+ return;
- case AMDGPU::S_PACK_LL_B32_B16:
- case AMDGPU::S_PACK_LH_B32_B16:
- case AMDGPU::S_PACK_HL_B32_B16:
- case AMDGPU::S_PACK_HH_B32_B16:
- movePackToVALU(Worklist, MRI, Inst);
- Inst.eraseFromParent();
- continue;
+ case AMDGPU::S_XNOR_B64:
+ if (ST.hasDLInsts())
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32, MDT);
+ else
+ splitScalar64BitXnor(Worklist, Inst, MDT);
+ Inst.eraseFromParent();
+ return;
- case AMDGPU::S_XNOR_B32:
- lowerScalarXnor(Worklist, Inst);
- Inst.eraseFromParent();
- continue;
+ case AMDGPU::S_ANDN2_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ANDN2_B32, MDT);
+ Inst.eraseFromParent();
+ return;
- case AMDGPU::S_NAND_B32:
- splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
- Inst.eraseFromParent();
- continue;
+ case AMDGPU::S_ORN2_B64:
+ splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_ORN2_B32, MDT);
+ Inst.eraseFromParent();
+ return;
- case AMDGPU::S_NOR_B32:
- splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
- Inst.eraseFromParent();
- continue;
+ case AMDGPU::S_BREV_B64:
+ splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_BREV_B32, true);
+ Inst.eraseFromParent();
+ return;
- case AMDGPU::S_ANDN2_B32:
- splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
- Inst.eraseFromParent();
- continue;
+ case AMDGPU::S_NOT_B64:
+ splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32);
+ Inst.eraseFromParent();
+ return;
- case AMDGPU::S_ORN2_B32:
- splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
- Inst.eraseFromParent();
- continue;
+ case AMDGPU::S_BCNT1_I32_B64:
+ splitScalar64BitBCNT(Worklist, Inst);
+ Inst.eraseFromParent();
+ return;
- // TODO: remove as soon as everything is ready
- // to replace VGPR to SGPR copy with V_READFIRSTLANEs.
- // S_ADD/SUB_CO_PSEUDO as well as S_UADDO/USUBO_PSEUDO
- // can only be selected from the uniform SDNode.
- case AMDGPU::S_ADD_CO_PSEUDO:
- case AMDGPU::S_SUB_CO_PSEUDO: {
- unsigned Opc = (Inst.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
- ? AMDGPU::V_ADDC_U32_e64
- : AMDGPU::V_SUBB_U32_e64;
- const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
-
- Register CarryInReg = Inst.getOperand(4).getReg();
- if (!MRI.constrainRegClass(CarryInReg, CarryRC)) {
- Register NewCarryReg = MRI.createVirtualRegister(CarryRC);
- BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), NewCarryReg)
- .addReg(CarryInReg);
- }
+ case AMDGPU::S_BFE_I64:
+ splitScalar64BitBFE(Worklist, Inst);
+ Inst.eraseFromParent();
+ return;
- Register CarryOutReg = Inst.getOperand(1).getReg();
-
- Register DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
- MRI.getRegClass(Inst.getOperand(0).getReg())));
- MachineInstr *CarryOp =
- BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(Opc), DestReg)
- .addReg(CarryOutReg, RegState::Define)
- .add(Inst.getOperand(2))
- .add(Inst.getOperand(3))
- .addReg(CarryInReg)
- .addImm(0);
- CreatedBBTmp = legalizeOperands(*CarryOp);
- if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp)
- CreatedBB = CreatedBBTmp;
- MRI.replaceRegWith(Inst.getOperand(0).getReg(), DestReg);
- addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
- Inst.eraseFromParent();
+ case AMDGPU::S_LSHL_B32:
+ if (ST.hasOnlyRevVALUShifts()) {
+ NewOpcode = AMDGPU::V_LSHLREV_B32_e64;
+ swapOperands(Inst);
}
- continue;
- case AMDGPU::S_UADDO_PSEUDO:
- case AMDGPU::S_USUBO_PSEUDO: {
- const DebugLoc &DL = Inst.getDebugLoc();
- MachineOperand &Dest0 = Inst.getOperand(0);
- MachineOperand &Dest1 = Inst.getOperand(1);
- MachineOperand &Src0 = Inst.getOperand(2);
- MachineOperand &Src1 = Inst.getOperand(3);
-
- unsigned Opc = (Inst.getOpcode() == AMDGPU::S_UADDO_PSEUDO)
- ? AMDGPU::V_ADD_CO_U32_e64
- : AMDGPU::V_SUB_CO_U32_e64;
- const TargetRegisterClass *NewRC =
- RI.getEquivalentVGPRClass(MRI.getRegClass(Dest0.getReg()));
- Register DestReg = MRI.createVirtualRegister(NewRC);
- MachineInstr *NewInstr = BuildMI(*MBB, &Inst, DL, get(Opc), DestReg)
- .addReg(Dest1.getReg(), RegState::Define)
- .add(Src0)
- .add(Src1)
- .addImm(0); // clamp bit
-
- CreatedBBTmp = legalizeOperands(*NewInstr, MDT);
- if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp)
- CreatedBB = CreatedBBTmp;
-
- MRI.replaceRegWith(Dest0.getReg(), DestReg);
- addUsersToMoveToVALUWorklist(NewInstr->getOperand(0).getReg(), MRI,
- Worklist);
- Inst.eraseFromParent();
+ break;
+ case AMDGPU::S_ASHR_I32:
+ if (ST.hasOnlyRevVALUShifts()) {
+ NewOpcode = AMDGPU::V_ASHRREV_I32_e64;
+ swapOperands(Inst);
}
- continue;
-
- case AMDGPU::S_CSELECT_B32:
- case AMDGPU::S_CSELECT_B64:
- lowerSelect(Worklist, Inst, MDT);
- Inst.eraseFromParent();
- continue;
- case AMDGPU::S_CMP_EQ_I32:
- case AMDGPU::S_CMP_LG_I32:
- case AMDGPU::S_CMP_GT_I32:
- case AMDGPU::S_CMP_GE_I32:
- case AMDGPU::S_CMP_LT_I32:
- case AMDGPU::S_CMP_LE_I32:
- case AMDGPU::S_CMP_EQ_U32:
- case AMDGPU::S_CMP_LG_U32:
- case AMDGPU::S_CMP_GT_U32:
- case AMDGPU::S_CMP_GE_U32:
- case AMDGPU::S_CMP_LT_U32:
- case AMDGPU::S_CMP_LE_U32:
- case AMDGPU::S_CMP_EQ_U64:
- case AMDGPU::S_CMP_LG_U64: {
- const MCInstrDesc &NewDesc = get(NewOpcode);
- Register CondReg = MRI.createVirtualRegister(RI.getWaveMaskRegClass());
- MachineInstr *NewInstr =
- BuildMI(*MBB, Inst, Inst.getDebugLoc(), NewDesc, CondReg)
- .add(Inst.getOperand(0))
- .add(Inst.getOperand(1));
- legalizeOperands(*NewInstr, MDT);
- int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC);
- MachineOperand SCCOp = Inst.getOperand(SCCIdx);
- addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
- Inst.eraseFromParent();
- }
- continue;
+ break;
+ case AMDGPU::S_LSHR_B32:
+ if (ST.hasOnlyRevVALUShifts()) {
+ NewOpcode = AMDGPU::V_LSHRREV_B32_e64;
+ swapOperands(Inst);
}
-
- if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
- // We cannot move this instruction to the VALU, so we should try to
- // legalize its operands instead.
- CreatedBBTmp = legalizeOperands(Inst, MDT);
- if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp)
- CreatedBB = CreatedBBTmp;
- continue;
+ break;
+ case AMDGPU::S_LSHL_B64:
+ if (ST.hasOnlyRevVALUShifts()) {
+ NewOpcode = AMDGPU::V_LSHLREV_B64_e64;
+ swapOperands(Inst);
}
-
- // Handle converting generic instructions like COPY-to-SGPR into
- // COPY-to-VGPR.
- if (NewOpcode == Opcode) {
- Register DstReg = Inst.getOperand(0).getReg();
- const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
-
- if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() &&
- NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
- // Instead of creating a copy where src and dst are the same register
- // class, we just replace all uses of dst with src. These kinds of
- // copies interfere with the heuristics MachineSink uses to decide
- // whether or not to split a critical edge. Since the pass assumes
- // that copies will end up as machine instructions and not be
- // eliminated.
- addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
- MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
- MRI.clearKillFlags(Inst.getOperand(1).getReg());
- Inst.getOperand(0).setReg(DstReg);
-
- // Make sure we don't leave around a dead VGPR->SGPR copy. Normally
- // these are deleted later, but at -O0 it would leave a suspicious
- // looking illegal copy of an undef register.
- for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I)
- Inst.removeOperand(I);
- Inst.setDesc(get(AMDGPU::IMPLICIT_DEF));
- continue;
- }
-
- Register NewDstReg = MRI.createVirtualRegister(NewDstRC);
- MRI.replaceRegWith(DstReg, NewDstReg);
- legalizeOperands(Inst, MDT);
- addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
- continue;
+ break;
+ case AMDGPU::S_ASHR_I64:
+ if (ST.hasOnlyRevVALUShifts()) {
+ NewOpcode = AMDGPU::V_ASHRREV_I64_e64;
+ swapOperands(Inst);
}
-
- // Use the new VALU Opcode.
- auto NewInstr = BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode))
- .setMIFlags(Inst.getFlags());
- for (const MachineOperand &Op : Inst.explicit_operands())
- NewInstr->addOperand(Op);
-
- // Remove any references to SCC. Vector instructions can't read from it, and
- // We're just about to add the implicit use / defs of VCC, and we don't want
- // both.
- for (MachineOperand &Op : Inst.implicit_operands()) {
- if (Op.getReg() == AMDGPU::SCC) {
- // Only propagate through live-def of SCC.
- if (Op.isDef() && !Op.isDead())
- addSCCDefUsersToVALUWorklist(Op, Inst, Worklist);
- if (Op.isUse())
- addSCCDefsToVALUWorklist(NewInstr, Worklist);
- }
+ break;
+ case AMDGPU::S_LSHR_B64:
+ if (ST.hasOnlyRevVALUShifts()) {
+ NewOpcode = AMDGPU::V_LSHRREV_B64_e64;
+ swapOperands(Inst);
}
+ break;
+ case AMDGPU::S_ABS_I32:
+ lowerScalarAbs(Worklist, Inst);
Inst.eraseFromParent();
+ return;
- Register NewDstReg;
- if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
- Register DstReg = NewInstr->getOperand(0).getReg();
- assert(DstReg.isVirtual());
+ case AMDGPU::S_CBRANCH_SCC0:
+ case AMDGPU::S_CBRANCH_SCC1: {
+ // Clear unused bits of vcc
+ Register CondReg = Inst.getOperand(1).getReg();
+ bool IsSCC = CondReg == AMDGPU::SCC;
+ Register VCC = RI.getVCC();
+ Register EXEC = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
+ unsigned Opc = ST.isWave32() ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(Opc), VCC)
+ .addReg(EXEC)
+ .addReg(IsSCC ? VCC : CondReg);
+ Inst.removeOperand(1);
+ } break;
+
+ case AMDGPU::S_BFE_U64:
+ case AMDGPU::S_BFM_B64:
+ llvm_unreachable("Moving this op to VALU not implemented");
+
+ case AMDGPU::S_PACK_LL_B32_B16:
+ case AMDGPU::S_PACK_LH_B32_B16:
+ case AMDGPU::S_PACK_HL_B32_B16:
+ case AMDGPU::S_PACK_HH_B32_B16:
+ movePackToVALU(Worklist, MRI, Inst);
+ Inst.eraseFromParent();
+ return;
- // Update the destination register class.
- const TargetRegisterClass *NewDstRC =
- getDestEquivalentVGPRClass(*NewInstr);
- assert(NewDstRC);
+ case AMDGPU::S_XNOR_B32:
+ lowerScalarXnor(Worklist, Inst);
+ Inst.eraseFromParent();
+ return;
- NewDstReg = MRI.createVirtualRegister(NewDstRC);
- MRI.replaceRegWith(DstReg, NewDstReg);
- }
+ case AMDGPU::S_NAND_B32:
+ splitScalarNotBinop(Worklist, Inst, AMDGPU::S_AND_B32);
+ Inst.eraseFromParent();
+ return;
- if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
- // We are converting these to a BFE, so we need to add the missing
- // operands for the size and offset.
- unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
- NewInstr.addImm(0);
- NewInstr.addImm(Size);
- } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
- // The VALU version adds the second operand to the result, so insert an
- // extra 0 operand.
- NewInstr.addImm(0);
- }
+ case AMDGPU::S_NOR_B32:
+ splitScalarNotBinop(Worklist, Inst, AMDGPU::S_OR_B32);
+ Inst.eraseFromParent();
+ return;
- if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
- const MachineOperand &OffsetWidthOp = NewInstr->getOperand(2);
- // If we need to move this to VGPRs, we need to unpack the second operand
- // back into the 2 separate ones for bit offset and width.
- assert(OffsetWidthOp.isImm() &&
- "Scalar BFE is only implemented for constant width and offset");
- uint32_t Imm = OffsetWidthOp.getImm();
+ case AMDGPU::S_ANDN2_B32:
+ splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_AND_B32);
+ Inst.eraseFromParent();
+ return;
- uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
- uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
- NewInstr->removeOperand(2);
- NewInstr.addImm(Offset);
- NewInstr.addImm(BitWidth);
- }
+ case AMDGPU::S_ORN2_B32:
+ splitScalarBinOpN2(Worklist, Inst, AMDGPU::S_OR_B32);
+ Inst.eraseFromParent();
+ return;
- fixImplicitOperands(*NewInstr);
+ // TODO: remove as soon as everything is ready
+ // to replace VGPR to SGPR copy with V_READFIRSTLANEs.
+ // S_ADD/SUB_CO_PSEUDO as well as S_UADDO/USUBO_PSEUDO
+ // can only be selected from the uniform SDNode.
+ case AMDGPU::S_ADD_CO_PSEUDO:
+ case AMDGPU::S_SUB_CO_PSEUDO: {
+ unsigned Opc = (Inst.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
+ ? AMDGPU::V_ADDC_U32_e64
+ : AMDGPU::V_SUBB_U32_e64;
+ const auto *CarryRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
+
+ Register CarryInReg = Inst.getOperand(4).getReg();
+ if (!MRI.constrainRegClass(CarryInReg, CarryRC)) {
+ Register NewCarryReg = MRI.createVirtualRegister(CarryRC);
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(AMDGPU::COPY), NewCarryReg)
+ .addReg(CarryInReg);
+ }
+
+ Register CarryOutReg = Inst.getOperand(1).getReg();
+
+ Register DestReg = MRI.createVirtualRegister(RI.getEquivalentVGPRClass(
+ MRI.getRegClass(Inst.getOperand(0).getReg())));
+ MachineInstr *CarryOp =
+ BuildMI(*MBB, &Inst, Inst.getDebugLoc(), get(Opc), DestReg)
+ .addReg(CarryOutReg, RegState::Define)
+ .add(Inst.getOperand(2))
+ .add(Inst.getOperand(3))
+ .addReg(CarryInReg)
+ .addImm(0);
+ legalizeOperands(*CarryOp);
+ MRI.replaceRegWith(Inst.getOperand(0).getReg(), DestReg);
+ addUsersToMoveToVALUWorklist(DestReg, MRI, Worklist);
+ Inst.eraseFromParent();
+ }
+ return;
+ case AMDGPU::S_UADDO_PSEUDO:
+ case AMDGPU::S_USUBO_PSEUDO: {
+ const DebugLoc &DL = Inst.getDebugLoc();
+ MachineOperand &Dest0 = Inst.getOperand(0);
+ MachineOperand &Dest1 = Inst.getOperand(1);
+ MachineOperand &Src0 = Inst.getOperand(2);
+ MachineOperand &Src1 = Inst.getOperand(3);
+
+ unsigned Opc = (Inst.getOpcode() == AMDGPU::S_UADDO_PSEUDO)
+ ? AMDGPU::V_ADD_CO_U32_e64
+ : AMDGPU::V_SUB_CO_U32_e64;
+ const TargetRegisterClass *NewRC =
+ RI.getEquivalentVGPRClass(MRI.getRegClass(Dest0.getReg()));
+ Register DestReg = MRI.createVirtualRegister(NewRC);
+ MachineInstr *NewInstr = BuildMI(*MBB, &Inst, DL, get(Opc), DestReg)
+ .addReg(Dest1.getReg(), RegState::Define)
+ .add(Src0)
+ .add(Src1)
+ .addImm(0); // clamp bit
+
+ legalizeOperands(*NewInstr, MDT);
+ MRI.replaceRegWith(Dest0.getReg(), DestReg);
+ addUsersToMoveToVALUWorklist(NewInstr->getOperand(0).getReg(), MRI,
+ Worklist);
+ Inst.eraseFromParent();
+ }
+ return;
- // Legalize the operands
- CreatedBBTmp = legalizeOperands(*NewInstr, MDT);
- if (CreatedBBTmp && TopInst.getParent() == CreatedBBTmp)
- CreatedBB = CreatedBBTmp;
+ case AMDGPU::S_CSELECT_B32:
+ case AMDGPU::S_CSELECT_B64:
+ lowerSelect(Worklist, Inst, MDT);
+ Inst.eraseFromParent();
+ return;
+ case AMDGPU::S_CMP_EQ_I32:
+ case AMDGPU::S_CMP_LG_I32:
+ case AMDGPU::S_CMP_GT_I32:
+ case AMDGPU::S_CMP_GE_I32:
+ case AMDGPU::S_CMP_LT_I32:
+ case AMDGPU::S_CMP_LE_I32:
+ case AMDGPU::S_CMP_EQ_U32:
+ case AMDGPU::S_CMP_LG_U32:
+ case AMDGPU::S_CMP_GT_U32:
+ case AMDGPU::S_CMP_GE_U32:
+ case AMDGPU::S_CMP_LT_U32:
+ case AMDGPU::S_CMP_LE_U32:
+ case AMDGPU::S_CMP_EQ_U64:
+ case AMDGPU::S_CMP_LG_U64: {
+ const MCInstrDesc &NewDesc = get(NewOpcode);
+ Register CondReg = MRI.createVirtualRegister(RI.getWaveMaskRegClass());
+ MachineInstr *NewInstr =
+ BuildMI(*MBB, Inst, Inst.getDebugLoc(), NewDesc, CondReg)
+ .add(Inst.getOperand(0))
+ .add(Inst.getOperand(1));
+ legalizeOperands(*NewInstr, MDT);
+ int SCCIdx = Inst.findRegisterDefOperandIdx(AMDGPU::SCC);
+ MachineOperand SCCOp = Inst.getOperand(SCCIdx);
+ addSCCDefUsersToVALUWorklist(SCCOp, Inst, Worklist, CondReg);
+ Inst.eraseFromParent();
+ }
+ return;
+ }
- if (NewDstReg)
- addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
+ if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
+ // We cannot move this instruction to the VALU, so we should try to
+ // legalize its operands instead.
+ legalizeOperands(Inst, MDT);
+ return;
}
- return CreatedBB;
+ // Handle converting generic instructions like COPY-to-SGPR into
+ // COPY-to-VGPR.
+ if (NewOpcode == Opcode) {
+ Register DstReg = Inst.getOperand(0).getReg();
+ const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(Inst);
+
+ if (Inst.isCopy() && Inst.getOperand(1).getReg().isVirtual() &&
+ NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
+ // Instead of creating a copy where src and dst are the same register
+ // class, we just replace all uses of dst with src. These kinds of
+ // copies interfere with the heuristics MachineSink uses to decide
+ // whether or not to split a critical edge. Since the pass assumes
+ // that copies will end up as machine instructions and not be
+ // eliminated.
+ addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
+ MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
+ MRI.clearKillFlags(Inst.getOperand(1).getReg());
+ Inst.getOperand(0).setReg(DstReg);
+ // Make sure we don't leave around a dead VGPR->SGPR copy. Normally
+ // these are deleted later, but at -O0 it would leave a suspicious
+ // looking illegal copy of an undef register.
+ for (unsigned I = Inst.getNumOperands() - 1; I != 0; --I)
+ Inst.removeOperand(I);
+ Inst.setDesc(get(AMDGPU::IMPLICIT_DEF));
+ return;
+ }
+ Register NewDstReg = MRI.createVirtualRegister(NewDstRC);
+ MRI.replaceRegWith(DstReg, NewDstReg);
+ legalizeOperands(Inst, MDT);
+ addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
+ return;
+ }
+
+ // Use the new VALU Opcode.
+ auto NewInstr = BuildMI(*MBB, Inst, Inst.getDebugLoc(), get(NewOpcode))
+ .setMIFlags(Inst.getFlags());
+ for (const MachineOperand &Op : Inst.explicit_operands())
+ NewInstr->addOperand(Op);
+ // Remove any references to SCC. Vector instructions can't read from it, and
+ // We're just about to add the implicit use / defs of VCC, and we don't want
+ // both.
+ for (MachineOperand &Op : Inst.implicit_operands()) {
+ if (Op.getReg() == AMDGPU::SCC) {
+ // Only propagate through live-def of SCC.
+ if (Op.isDef() && !Op.isDead())
+ addSCCDefUsersToVALUWorklist(Op, Inst, Worklist);
+ if (Op.isUse())
+ addSCCDefsToVALUWorklist(NewInstr, Worklist);
+ }
+ }
+ Inst.eraseFromParent();
+ Register NewDstReg;
+ if (NewInstr->getOperand(0).isReg() && NewInstr->getOperand(0).isDef()) {
+ Register DstReg = NewInstr->getOperand(0).getReg();
+ assert(DstReg.isVirtual());
+ // Update the destination register class.
+ const TargetRegisterClass *NewDstRC = getDestEquivalentVGPRClass(*NewInstr);
+ assert(NewDstRC);
+ NewDstReg = MRI.createVirtualRegister(NewDstRC);
+ MRI.replaceRegWith(DstReg, NewDstReg);
+ }
+ if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
+ // We are converting these to a BFE, so we need to add the missing
+ // operands for the size and offset.
+ unsigned Size = (Opcode == AMDGPU::S_SEXT_I32_I8) ? 8 : 16;
+ NewInstr.addImm(0);
+ NewInstr.addImm(Size);
+ } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) {
+ // The VALU version adds the second operand to the result, so insert an
+ // extra 0 operand.
+ NewInstr.addImm(0);
+ }
+ if (Opcode == AMDGPU::S_BFE_I32 || Opcode == AMDGPU::S_BFE_U32) {
+ const MachineOperand &OffsetWidthOp = NewInstr->getOperand(2);
+ // If we need to move this to VGPRs, we need to unpack the second operand
+ // back into the 2 separate ones for bit offset and width.
+ assert(OffsetWidthOp.isImm() &&
+ "Scalar BFE is only implemented for constant width and offset");
+ uint32_t Imm = OffsetWidthOp.getImm();
+ uint32_t Offset = Imm & 0x3f; // Extract bits [5:0].
+ uint32_t BitWidth = (Imm & 0x7f0000) >> 16; // Extract bits [22:16].
+ NewInstr->removeOperand(2);
+ NewInstr.addImm(Offset);
+ NewInstr.addImm(BitWidth);
+ }
+ fixImplicitOperands(*NewInstr);
+ // Legalize the operands
+ legalizeOperands(*NewInstr, MDT);
+ if (NewDstReg)
+ addUsersToMoveToVALUWorklist(NewDstReg, MRI, Worklist);
}
// Add/sub require special handling to deal with carry outs.
std::pair<bool, MachineBasicBlock *>
-SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+SIInstrInfo::moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
MachineDominatorTree *MDT) const {
if (ST.hasAddNoCarry()) {
// Assume there is no user of scc since we don't select this in that case.
@@ -6604,7 +6795,7 @@ SIInstrInfo::moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
return std::pair(false, nullptr);
}
-void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
+void SIInstrInfo::lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
MachineDominatorTree *MDT) const {
MachineBasicBlock &MBB = *Inst.getParent();
@@ -6680,7 +6871,7 @@ void SIInstrInfo::lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
-void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
+void SIInstrInfo::lowerScalarAbs(SIInstrWorklist &Worklist,
MachineInstr &Inst) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -6707,7 +6898,7 @@ void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
-void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
+void SIInstrInfo::lowerScalarXnor(SIInstrWorklist &Worklist,
MachineInstr &Inst) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -6772,7 +6963,7 @@ void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
}
}
-void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist,
+void SIInstrInfo::splitScalarNotBinop(SIInstrWorklist &Worklist,
MachineInstr &Inst,
unsigned Opcode) const {
MachineBasicBlock &MBB = *Inst.getParent();
@@ -6801,7 +6992,7 @@ void SIInstrInfo::splitScalarNotBinop(SetVectorType &Worklist,
addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
}
-void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist,
+void SIInstrInfo::splitScalarBinOpN2(SIInstrWorklist &Worklist,
MachineInstr &Inst,
unsigned Opcode) const {
MachineBasicBlock &MBB = *Inst.getParent();
@@ -6830,9 +7021,9 @@ void SIInstrInfo::splitScalarBinOpN2(SetVectorType& Worklist,
addUsersToMoveToVALUWorklist(NewDest, MRI, Worklist);
}
-void SIInstrInfo::splitScalar64BitUnaryOp(
- SetVectorType &Worklist, MachineInstr &Inst,
- unsigned Opcode, bool Swap) const {
+void SIInstrInfo::splitScalar64BitUnaryOp(SIInstrWorklist &Worklist,
+ MachineInstr &Inst, unsigned Opcode,
+ bool Swap) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -6889,7 +7080,7 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
-void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
+void SIInstrInfo::splitScalar64BitAddSub(SIInstrWorklist &Worklist,
MachineInstr &Inst,
MachineDominatorTree *MDT) const {
bool IsAdd = (Inst.getOpcode() == AMDGPU::S_ADD_U64_PSEUDO);
@@ -6963,7 +7154,7 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
-void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
+void SIInstrInfo::splitScalar64BitBinaryOp(SIInstrWorklist &Worklist,
MachineInstr &Inst, unsigned Opcode,
MachineDominatorTree *MDT) const {
MachineBasicBlock &MBB = *Inst.getParent();
@@ -7030,7 +7221,7 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
addUsersToMoveToVALUWorklist(FullDestReg, MRI, Worklist);
}
-void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist,
+void SIInstrInfo::splitScalar64BitXnor(SIInstrWorklist &Worklist,
MachineInstr &Inst,
MachineDominatorTree *MDT) const {
MachineBasicBlock &MBB = *Inst.getParent();
@@ -7072,8 +7263,8 @@ void SIInstrInfo::splitScalar64BitXnor(SetVectorType &Worklist,
Worklist.insert(&Xor);
}
-void SIInstrInfo::splitScalar64BitBCNT(
- SetVectorType &Worklist, MachineInstr &Inst) const {
+void SIInstrInfo::splitScalar64BitBCNT(SIInstrWorklist &Worklist,
+ MachineInstr &Inst) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -7110,7 +7301,7 @@ void SIInstrInfo::splitScalar64BitBCNT(
addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
}
-void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
+void SIInstrInfo::splitScalar64BitBFE(SIInstrWorklist &Worklist,
MachineInstr &Inst) const {
MachineBasicBlock &MBB = *Inst.getParent();
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
@@ -7172,9 +7363,8 @@ void SIInstrInfo::splitScalar64BitBFE(SetVectorType &Worklist,
}
void SIInstrInfo::addUsersToMoveToVALUWorklist(
- Register DstReg,
- MachineRegisterInfo &MRI,
- SetVectorType &Worklist) const {
+ Register DstReg, MachineRegisterInfo &MRI,
+ SIInstrWorklist &Worklist) const {
for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
E = MRI.use_end(); I != E;) {
MachineInstr &UseMI = *I->getParent();
@@ -7208,7 +7398,7 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
}
}
-void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
+void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist,
MachineRegisterInfo &MRI,
MachineInstr &Inst) const {
Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -7283,7 +7473,7 @@ void SIInstrInfo::movePackToVALU(SetVectorType &Worklist,
void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
MachineInstr &SCCDefInst,
- SetVectorType &Worklist,
+ SIInstrWorklist &Worklist,
Register NewCond) const {
// Ensure that def inst defines SCC, which is still live.
@@ -7326,7 +7516,7 @@ void SIInstrInfo::addSCCDefUsersToVALUWorklist(MachineOperand &Op,
// sure that the instruction that defines SCC is added to the moveToVALU
// worklist.
void SIInstrInfo::addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
- SetVectorType &Worklist) const {
+ SIInstrWorklist &Worklist) const {
// Look for a preceding instruction that either defines VCC or SCC. If VCC
// then there is nothing to do because the defining instruction has been
// converted to a VALU already. If SCC then that instruction needs to be
@@ -7811,6 +8001,16 @@ SIInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
return ArrayRef(TargetFlags);
}
+unsigned SIInstrInfo::getLiveRangeSplitOpcode(Register SrcReg,
+ const MachineFunction &MF) const {
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ assert(SrcReg.isVirtual());
+ if (MFI->checkFlag(SrcReg, AMDGPU::VirtRegFlag::WWM_REG))
+ return AMDGPU::WWM_COPY;
+
+ return AMDGPU::COPY;
+}
+
bool SIInstrInfo::isBasicBlockPrologue(const MachineInstr &MI) const {
return !MI.isTerminator() && MI.getOpcode() != AMDGPU::COPY &&
MI.modifiesRegister(AMDGPU::EXEC, &RI);
@@ -7843,7 +8043,9 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
// If available, prefer to use vcc.
Register UnusedCarry = !RS.isRegUsed(AMDGPU::VCC)
? Register(RI.getVCC())
- : RS.scavengeRegister(RI.getBoolRC(), I, 0, false);
+ : RS.scavengeRegisterBackwards(
+ *RI.getBoolRC(), I, /* RestoreAfter */ false,
+ 0, /* AllowSpill */ false);
// TODO: Users need to deal with this.
if (!UnusedCarry.isValid())
@@ -7874,10 +8076,15 @@ const MCInstrDesc &SIInstrInfo::getKillTerminatorFromPseudo(unsigned Opcode) con
}
}
+unsigned SIInstrInfo::getMaxMUBUFImmOffset() { return (1 << 12) - 1; }
+
void SIInstrInfo::fixImplicitOperands(MachineInstr &MI) const {
if (!ST.isWave32())
return;
+ if (MI.isInlineAsm())
+ return;
+
for (auto &Op : MI.implicit_operands()) {
if (Op.isReg() && Op.getReg() == AMDGPU::VCC)
Op.setReg(AMDGPU::VCC_LO);
@@ -7897,6 +8104,52 @@ bool SIInstrInfo::isBufferSMRD(const MachineInstr &MI) const {
return RI.getRegClass(RCID)->hasSubClassEq(&AMDGPU::SGPR_128RegClass);
}
+// Given Imm, split it into the values to put into the SOffset and ImmOffset
+// fields in an MUBUF instruction. Return false if it is not possible (due to a
+// hardware bug needing a workaround).
+//
+// The required alignment ensures that individual address components remain
+// aligned if they are aligned to begin with. It also ensures that additional
+// offsets within the given alignment can be added to the resulting ImmOffset.
+bool SIInstrInfo::splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset,
+ uint32_t &ImmOffset, Align Alignment) const {
+ const uint32_t MaxOffset = SIInstrInfo::getMaxMUBUFImmOffset();
+ const uint32_t MaxImm = alignDown(MaxOffset, Alignment.value());
+ uint32_t Overflow = 0;
+
+ if (Imm > MaxImm) {
+ if (Imm <= MaxImm + 64) {
+ // Use an SOffset inline constant for 4..64
+ Overflow = Imm - MaxImm;
+ Imm = MaxImm;
+ } else {
+ // Try to keep the same value in SOffset for adjacent loads, so that
+ // the corresponding register contents can be re-used.
+ //
+ // Load values with all low-bits (except for alignment bits) set into
+ // SOffset, so that a larger range of values can be covered using
+ // s_movk_i32.
+ //
+ // Atomic operations fail to work correctly when individual address
+ // components are unaligned, even if their sum is aligned.
+ uint32_t High = (Imm + Alignment.value()) & ~MaxOffset;
+ uint32_t Low = (Imm + Alignment.value()) & MaxOffset;
+ Imm = Low;
+ Overflow = High - Alignment.value();
+ }
+ }
+
+ // There is a hardware bug in SI and CI which prevents address clamping in
+ // MUBUF instructions from working correctly with SOffsets. The immediate
+ // offset is unaffected.
+ if (Overflow > 0 && ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
+ return false;
+
+ ImmOffset = Imm;
+ SOffset = Overflow;
+ return true;
+}
+
// Depending on the used address space and instructions, some immediate offsets
// are allowed and some are not.
// In general, flat instruction offsets can only be non-negative, global and
@@ -7987,23 +8240,7 @@ SIInstrInfo::splitFlatOffset(int64_t COffsetVal, unsigned AddrSpace,
return {ImmField, RemainderOffset};
}
-// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
-// and the columns of the getMCOpcodeGen table.
-enum SIEncodingFamily {
- SI = 0,
- VI = 1,
- SDWA = 2,
- SDWA9 = 3,
- GFX80 = 4,
- GFX9 = 5,
- GFX10 = 6,
- SDWA10 = 7,
- GFX90A = 8,
- GFX940 = 9,
- GFX11 = 10,
-};
-
-static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
+static unsigned subtargetEncodingFamily(const GCNSubtarget &ST) {
switch (ST.getGeneration()) {
default:
break;
@@ -8042,7 +8279,7 @@ bool SIInstrInfo::isAsmOnlyOpcode(int MCOp) const {
}
int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
- SIEncodingFamily Gen = subtargetEncodingFamily(ST);
+ unsigned Gen = subtargetEncodingFamily(ST);
if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
ST.getGeneration() == AMDGPUSubtarget::GFX9)
@@ -8325,7 +8562,7 @@ MachineInstr *SIInstrInfo::foldMemoryOperandImpl(
// A similar issue also exists with spilling and reloading $exec registers.
//
// To prevent that, constrain the %0 register class here.
- if (MI.isFullCopy()) {
+ if (isFullCopyInstr(MI)) {
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
if ((DstReg.isVirtual() || SrcReg.isVirtual()) &&
@@ -8368,9 +8605,20 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
unsigned opcode = MI.getOpcode();
if (opcode == AMDGPU::G_INTRINSIC ||
opcode == AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS) {
- return AMDGPU::isIntrinsicSourceOfDivergence(MI.getIntrinsicID())
- ? InstructionUniformity::NeverUniform
- : InstructionUniformity::AlwaysUniform;
+ auto IID = static_cast<Intrinsic::ID>(MI.getIntrinsicID());
+ if (AMDGPU::isIntrinsicSourceOfDivergence(IID))
+ return InstructionUniformity::NeverUniform;
+ if (AMDGPU::isIntrinsicAlwaysUniform(IID))
+ return InstructionUniformity::AlwaysUniform;
+
+ switch (IID) {
+ case Intrinsic::amdgcn_if:
+ case Intrinsic::amdgcn_else:
+ // FIXME: Uniform if second result
+ break;
+ }
+
+ return InstructionUniformity::Default;
}
// Loads from the private and flat address spaces are divergent, because
@@ -8403,6 +8651,29 @@ SIInstrInfo::getGenericInstructionUniformity(const MachineInstr &MI) const {
InstructionUniformity
SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
+
+ if (isNeverUniform(MI))
+ return InstructionUniformity::NeverUniform;
+
+ unsigned opcode = MI.getOpcode();
+ if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
+ return InstructionUniformity::AlwaysUniform;
+
+ if (isCopyInstr(MI)) {
+ const MachineOperand &srcOp = MI.getOperand(1);
+ if (srcOp.isReg() && srcOp.getReg().isPhysical()) {
+ const TargetRegisterClass *regClass =
+ RI.getPhysRegBaseClass(srcOp.getReg());
+ return RI.isSGPRClass(regClass) ? InstructionUniformity::AlwaysUniform
+ : InstructionUniformity::NeverUniform;
+ }
+ return InstructionUniformity::Default;
+ }
+
+ // GMIR handling
+ if (MI.isPreISelOpcode())
+ return SIInstrInfo::getGenericInstructionUniformity(MI);
+
// Atomics are divergent because they are executed sequentially: when an
// atomic operation refers to the same address in each thread, then each
// thread after the first sees the value written by the previous thread as
@@ -8429,44 +8700,26 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
return InstructionUniformity::Default;
}
- unsigned opcode = MI.getOpcode();
- if (opcode == AMDGPU::COPY) {
- const MachineOperand &srcOp = MI.getOperand(1);
- if (srcOp.isReg() && srcOp.getReg().isPhysical()) {
- const TargetRegisterClass *regClass = RI.getPhysRegBaseClass(srcOp.getReg());
- return RI.isSGPRClass(regClass) ? InstructionUniformity::AlwaysUniform
- : InstructionUniformity::NeverUniform;
- }
- return InstructionUniformity::Default;
- }
- if (opcode == AMDGPU::INLINEASM || opcode == AMDGPU::INLINEASM_BR) {
- const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
- for (auto &op : MI.operands()) {
- if (!op.isReg() || !op.isDef())
- continue;
- auto *RC = MRI.getRegClass(op.getReg());
- if (!RC || RI.isDivergentRegClass(RC))
- return InstructionUniformity::NeverUniform;
- }
- return InstructionUniformity::AlwaysUniform;
- }
- if (opcode == AMDGPU::V_READLANE_B32 || opcode == AMDGPU::V_READFIRSTLANE_B32)
- return InstructionUniformity::AlwaysUniform;
-
- if (opcode == AMDGPU::V_WRITELANE_B32)
- return InstructionUniformity::NeverUniform;
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ const AMDGPURegisterBankInfo *RBI = ST.getRegBankInfo();
+
+ // FIXME: It's conceptually broken to report this for an instruction, and not
+ // a specific def operand. For inline asm in particular, there could be mixed
+ // uniform and divergent results.
+ for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
+ const MachineOperand &SrcOp = MI.getOperand(I);
+ if (!SrcOp.isReg())
+ continue;
- // GMIR handling
- if (SIInstrInfo::isGenericOpcode(opcode))
- return SIInstrInfo::getGenericInstructionUniformity(MI);
+ Register Reg = SrcOp.getReg();
+ if (!Reg || !SrcOp.readsReg())
+ continue;
- // Handling $vpgr reads
- for (auto srcOp : MI.operands()) {
- if (srcOp.isReg() && srcOp.getReg().isPhysical()) {
- const TargetRegisterClass *regClass = RI.getPhysRegBaseClass(srcOp.getReg());
- if (RI.isVGPRClass(regClass))
- return InstructionUniformity::NeverUniform;
- }
+ // If RegBank is null, this is unassigned or an unallocatable special
+ // register, which are all scalars.
+ const RegisterBank *RegBank = RBI->getRegBank(Reg, MRI, RI);
+ if (RegBank && RegBank->getID() != AMDGPU::SGPRRegBankID)
+ return InstructionUniformity::NeverUniform;
}
// TODO: Uniformity check condtions above can be rearranged for more
@@ -8622,7 +8875,7 @@ bool SIInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
else
return false;
- unsigned BitNo = countTrailingZeros((uint64_t)Mask);
+ unsigned BitNo = llvm::countr_zero((uint64_t)Mask);
if (IsSigned && BitNo == SrcSize - 1)
return false;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 025faec0e2cc..b25aae7b2fb0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -41,6 +41,41 @@ class ScheduleHazardRecognizer;
static const MachineMemOperand::Flags MONoClobber =
MachineMemOperand::MOTargetFlag1;
+/// Utility to store machine instructions worklist.
+struct SIInstrWorklist {
+ SIInstrWorklist() : InstrList() {}
+
+ void insert(MachineInstr *MI);
+
+ MachineInstr *top() const {
+ auto iter = InstrList.begin();
+ return *iter;
+ }
+
+ void erase_top() {
+ auto iter = InstrList.begin();
+ InstrList.erase(iter);
+ }
+
+ bool empty() const { return InstrList.empty(); }
+
+ void clear() {
+ InstrList.clear();
+ DeferredList.clear();
+ }
+
+ bool isDeferred(MachineInstr *MI);
+
+ SetVector<MachineInstr *> &getDeferredList() { return DeferredList; }
+
+private:
+ /// InstrList contains the MachineInstrs.
+ SetVector<MachineInstr *> InstrList;
+ /// Deferred instructions are specific MachineInstr
+ /// that will be added by insert method.
+ SetVector<MachineInstr *> DeferredList;
+};
+
class SIInstrInfo final : public AMDGPUGenInstrInfo {
private:
const SIRegisterInfo RI;
@@ -81,57 +116,50 @@ private:
void swapOperands(MachineInstr &Inst) const;
std::pair<bool, MachineBasicBlock *>
- moveScalarAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+ moveScalarAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
MachineDominatorTree *MDT = nullptr) const;
- void lowerSelect(SetVectorType &Worklist, MachineInstr &Inst,
+ void lowerSelect(SIInstrWorklist &Worklist, MachineInstr &Inst,
MachineDominatorTree *MDT = nullptr) const;
- void lowerScalarAbs(SetVectorType &Worklist,
- MachineInstr &Inst) const;
+ void lowerScalarAbs(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
- void lowerScalarXnor(SetVectorType &Worklist,
- MachineInstr &Inst) const;
+ void lowerScalarXnor(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
- void splitScalarNotBinop(SetVectorType &Worklist,
- MachineInstr &Inst,
+ void splitScalarNotBinop(SIInstrWorklist &Worklist, MachineInstr &Inst,
unsigned Opcode) const;
- void splitScalarBinOpN2(SetVectorType &Worklist,
- MachineInstr &Inst,
+ void splitScalarBinOpN2(SIInstrWorklist &Worklist, MachineInstr &Inst,
unsigned Opcode) const;
- void splitScalar64BitUnaryOp(SetVectorType &Worklist,
- MachineInstr &Inst, unsigned Opcode,
- bool Swap = false) const;
+ void splitScalar64BitUnaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
+ unsigned Opcode, bool Swap = false) const;
- void splitScalar64BitAddSub(SetVectorType &Worklist, MachineInstr &Inst,
+ void splitScalar64BitAddSub(SIInstrWorklist &Worklist, MachineInstr &Inst,
MachineDominatorTree *MDT = nullptr) const;
- void splitScalar64BitBinaryOp(SetVectorType &Worklist, MachineInstr &Inst,
+ void splitScalar64BitBinaryOp(SIInstrWorklist &Worklist, MachineInstr &Inst,
unsigned Opcode,
MachineDominatorTree *MDT = nullptr) const;
- void splitScalar64BitXnor(SetVectorType &Worklist, MachineInstr &Inst,
- MachineDominatorTree *MDT = nullptr) const;
+ void splitScalar64BitXnor(SIInstrWorklist &Worklist, MachineInstr &Inst,
+ MachineDominatorTree *MDT = nullptr) const;
- void splitScalar64BitBCNT(SetVectorType &Worklist,
+ void splitScalar64BitBCNT(SIInstrWorklist &Worklist,
MachineInstr &Inst) const;
- void splitScalar64BitBFE(SetVectorType &Worklist,
- MachineInstr &Inst) const;
- void movePackToVALU(SetVectorType &Worklist,
- MachineRegisterInfo &MRI,
+ void splitScalar64BitBFE(SIInstrWorklist &Worklist, MachineInstr &Inst) const;
+ void movePackToVALU(SIInstrWorklist &Worklist, MachineRegisterInfo &MRI,
MachineInstr &Inst) const;
void addUsersToMoveToVALUWorklist(Register Reg, MachineRegisterInfo &MRI,
- SetVectorType &Worklist) const;
+ SIInstrWorklist &Worklist) const;
void addSCCDefUsersToVALUWorklist(MachineOperand &Op,
MachineInstr &SCCDefInst,
- SetVectorType &Worklist,
+ SIInstrWorklist &Worklist,
Register NewCond = Register()) const;
void addSCCDefsToVALUWorklist(MachineInstr *SCCUseInst,
- SetVectorType &Worklist) const;
+ SIInstrWorklist &Worklist) const;
const TargetRegisterClass *
getDestEquivalentVGPRClass(const MachineInstr &Inst) const;
@@ -142,6 +170,12 @@ private:
Register findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
protected:
+ /// If the specific machine instruction is a instruction that moves/copies
+ /// value from one register to another register return destination and source
+ /// registers as machine operands.
+ std::optional<DestSourcePair>
+ isCopyInstrImpl(const MachineInstr &MI) const override;
+
bool swapSourceModifiers(MachineInstr &MI,
MachineOperand &Src0, unsigned Src0OpName,
MachineOperand &Src1, unsigned Src1OpName) const;
@@ -626,6 +660,11 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
}
+ static bool isWWMRegSpillOpcode(uint16_t Opcode) {
+ return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
+ Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE;
+ }
+
static bool isDPP(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::DPP;
}
@@ -781,6 +820,10 @@ public:
return get(Opcode).TSFlags & SIInstrFlags::FPAtomic;
}
+ static bool isNeverUniform(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::IsNeverUniform;
+ }
+
static bool doesNotReadTiedSource(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
}
@@ -790,7 +833,7 @@ public:
}
bool isVGPRCopy(const MachineInstr &MI) const {
- assert(MI.isCopy());
+ assert(isCopyInstr(MI));
Register Dest = MI.getOperand(0).getReg();
const MachineFunction &MF = *MI.getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -841,7 +884,7 @@ public:
const MachineOperand &UseMO,
const MachineOperand &DefMO) const {
assert(UseMO.getParent() == &MI);
- int OpIdx = MI.getOperandNo(&UseMO);
+ int OpIdx = UseMO.getOperandNo();
if (OpIdx >= MI.getDesc().NumOperands)
return false;
@@ -860,7 +903,7 @@ public:
if (OpIdx >= MI.getDesc().NumOperands)
return false;
- if (MI.isCopy()) {
+ if (isCopyInstr(MI)) {
unsigned Size = getOpSize(MI, OpIdx);
assert(Size == 8 || Size == 4);
@@ -873,8 +916,7 @@ public:
}
bool isInlineConstant(const MachineOperand &MO) const {
- const MachineInstr *Parent = MO.getParent();
- return isInlineConstant(*Parent, Parent->getOperandNo(&MO));
+ return isInlineConstant(*MO.getParent(), MO.getOperandNo());
}
bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
@@ -908,6 +950,15 @@ public:
unsigned getVALUOp(const MachineInstr &MI) const;
+ void insertScratchExecCopy(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, Register Reg, bool IsSCCLive,
+ SlotIndexes *Indexes = nullptr) const;
+
+ void restoreExec(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ Register Reg, SlotIndexes *Indexes = nullptr) const;
+
/// Return the correct register class for \p OpNo. For target-specific
/// instructions, this will return the register class that has been defined
/// in tablegen. For generic instructions, like REG_SEQUENCE it will return
@@ -1005,11 +1056,14 @@ public:
/// was moved to VGPR. \returns true if succeeded.
bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
- /// Replace this instruction's opcode with the equivalent VALU
- /// opcode. This function will also move the users of \p MI to the
- /// VALU if necessary. If present, \p MDT is updated.
- MachineBasicBlock *moveToVALU(MachineInstr &MI,
- MachineDominatorTree *MDT = nullptr) const;
+ /// Replace the instructions opcode with the equivalent VALU
+ /// opcode. This function will also move the users of MachineInstruntions
+ /// in the \p WorkList to the VALU if necessary. If present, \p MDT is
+ /// updated.
+ void moveToVALU(SIInstrWorklist &Worklist, MachineDominatorTree *MDT) const;
+
+ void moveToVALUImpl(SIInstrWorklist &Worklist, MachineDominatorTree *MDT,
+ MachineInstr &Inst) const;
void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
@@ -1095,6 +1149,9 @@ public:
CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAGMI *DAG) const override;
+ unsigned getLiveRangeSplitOpcode(Register Reg,
+ const MachineFunction &MF) const override;
+
bool isBasicBlockPrologue(const MachineInstr &MI) const override;
MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
@@ -1132,6 +1189,11 @@ public:
return isUInt<12>(Imm);
}
+ static unsigned getMaxMUBUFImmOffset();
+
+ bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
+ Align Alignment = Align(4)) const;
+
/// Returns if \p Offset is legal for the subtarget as the offset to a FLAT
/// encoded instruction. If \p Signed, this is for an instruction that
/// interprets the offset as signed.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 2066abb0268d..044bc4507d3a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -50,14 +50,6 @@ def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
[SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
>;
-def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
- [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
->;
-
-def SIatomic_dec : SDNode<"AMDGPUISD::ATOMIC_DEC", SDTAtomic2,
- [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
->;
-
def SDTAtomic2_f32 : SDTypeProfile<1, 2, [
SDTCisSameAs<0,2>, SDTCisFP<0>, SDTCisPtrTy<1>
]>;
@@ -355,8 +347,6 @@ class isPackedType<ValueType SrcVT> {
// PatFrags for global memory operations
//===----------------------------------------------------------------------===//
-defm atomic_inc : binary_atomic_op_all_as<SIatomic_inc>;
-defm atomic_dec : binary_atomic_op_all_as<SIatomic_dec>;
defm atomic_load_fmin : binary_atomic_op_all_as<SIatomic_fmin, 0>;
defm atomic_load_fmax : binary_atomic_op_all_as<SIatomic_fmax, 0>;
@@ -762,8 +752,8 @@ multiclass SIAtomicM0Glue2 <string op_name, bit is_amdgpu = 0,
defm atomic_load_add : SIAtomicM0Glue2 <"LOAD_ADD">;
defm atomic_load_sub : SIAtomicM0Glue2 <"LOAD_SUB">;
-defm atomic_inc : SIAtomicM0Glue2 <"INC", 1>;
-defm atomic_dec : SIAtomicM0Glue2 <"DEC", 1>;
+defm atomic_load_uinc_wrap : SIAtomicM0Glue2 <"LOAD_UINC_WRAP">;
+defm atomic_load_udec_wrap : SIAtomicM0Glue2 <"LOAD_UDEC_WRAP">;
defm atomic_load_and : SIAtomicM0Glue2 <"LOAD_AND">;
defm atomic_load_min : SIAtomicM0Glue2 <"LOAD_MIN">;
defm atomic_load_max : SIAtomicM0Glue2 <"LOAD_MAX">;
@@ -931,144 +921,39 @@ def set_glc : SDNodeXForm<timm, [{
// Custom Operands
//===----------------------------------------------------------------------===//
-def SoppBrTarget : AsmOperandClass {
- let Name = "SoppBrTarget";
- let ParserMethod = "parseSOppBrTarget";
-}
-
-def sopp_brtarget : Operand<OtherVT> {
+def SOPPBrTarget : CustomOperand<OtherVT> {
+ let PrintMethod = "printOperand";
let EncoderMethod = "getSOPPBrEncoding";
- let DecoderMethod = "decodeSoppBrTarget";
+ let DecoderMethod = "decodeSOPPBrTarget";
let OperandType = "OPERAND_PCREL";
- let ParserMatchClass = SoppBrTarget;
}
def si_ga : Operand<iPTR>;
-def InterpSlotMatchClass : AsmOperandClass {
- let Name = "InterpSlot";
- let PredicateMethod = "isInterpSlot";
- let ParserMethod = "parseInterpSlot";
- let RenderMethod = "addImmOperands";
-}
-
-def InterpSlot : Operand<i32> {
- let PrintMethod = "printInterpSlot";
- let ParserMatchClass = InterpSlotMatchClass;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def AttrMatchClass : AsmOperandClass {
- let Name = "Attr";
- let PredicateMethod = "isInterpAttr";
- let ParserMethod = "parseInterpAttr";
- let RenderMethod = "addImmOperands";
-}
+def InterpSlot : CustomOperand<i32>;
// It appears to be necessary to create a separate operand for this to
// be able to parse attr<num> with no space.
-def Attr : Operand<i32> {
- let PrintMethod = "printInterpAttr";
- let ParserMatchClass = AttrMatchClass;
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-def AttrChanMatchClass : AsmOperandClass {
- let Name = "AttrChan";
- let PredicateMethod = "isAttrChan";
- let RenderMethod = "addImmOperands";
-}
-
-def AttrChan : Operand<i32> {
- let PrintMethod = "printInterpAttrChan";
- let ParserMatchClass = AttrChanMatchClass;
- let OperandType = "OPERAND_IMMEDIATE";
-}
+def InterpAttr : CustomOperand<i32>;
-def SendMsgMatchClass : AsmOperandClass {
- let Name = "SendMsg";
- let PredicateMethod = "isSendMsg";
- let ParserMethod = "parseSendMsgOp";
- let RenderMethod = "addImmOperands";
-}
-
-def SwizzleMatchClass : AsmOperandClass {
- let Name = "Swizzle";
- let PredicateMethod = "isSwizzle";
- let ParserMethod = "parseSwizzleOp";
- let RenderMethod = "addImmOperands";
- let IsOptional = 1;
-}
-
-def EndpgmMatchClass : AsmOperandClass {
- let Name = "EndpgmImm";
- let PredicateMethod = "isEndpgm";
- let ParserMethod = "parseEndpgmOp";
- let RenderMethod = "addImmOperands";
- let IsOptional = 1;
-}
-
-def ExpTgtMatchClass : AsmOperandClass {
- let Name = "ExpTgt";
- let PredicateMethod = "isExpTgt";
- let ParserMethod = "parseExpTgt";
- let RenderMethod = "printExpTgt";
-}
-
-def SWaitMatchClass : AsmOperandClass {
- let Name = "SWaitCnt";
- let RenderMethod = "addImmOperands";
- let ParserMethod = "parseSWaitCntOps";
-}
-
-def DepCtrMatchClass : AsmOperandClass {
- let Name = "DepCtr";
- let RenderMethod = "addImmOperands";
- let ParserMethod = "parseDepCtrOps";
-}
-
-def SDelayMatchClass : AsmOperandClass {
- let Name = "SDelayAlu";
- let RenderMethod = "addImmOperands";
- let ParserMethod = "parseSDelayAluOps";
-}
+def InterpAttrChan : ImmOperand<i32>;
def VReg32OrOffClass : AsmOperandClass {
let Name = "VReg32OrOff";
let ParserMethod = "parseVReg32OrOff";
}
-let OperandType = "OPERAND_IMMEDIATE" in {
-def SendMsgImm : Operand<i32> {
- let PrintMethod = "printSendMsg";
- let ParserMatchClass = SendMsgMatchClass;
-}
+def SendMsg : CustomOperand<i32>;
-def SwizzleImm : Operand<i16> {
- let PrintMethod = "printSwizzle";
- let ParserMatchClass = SwizzleMatchClass;
-}
+def Swizzle : CustomOperand<i16, 1>;
-def EndpgmImm : Operand<i16> {
- let PrintMethod = "printEndpgm";
- let ParserMatchClass = EndpgmMatchClass;
-}
+def Endpgm : CustomOperand<i16, 1>;
-def WAIT_FLAG : Operand <i32> {
- let ParserMatchClass = SWaitMatchClass;
- let PrintMethod = "printWaitFlag";
-}
+def SWaitCnt : CustomOperand<i32>;
-def DepCtrImm : Operand <i32> {
- let ParserMatchClass = DepCtrMatchClass;
- let PrintMethod = "printDepCtr";
-}
+def DepCtr : CustomOperand<i32>;
-def DELAY_FLAG : Operand <i32> {
- let ParserMatchClass = SDelayMatchClass;
- let PrintMethod = "printDelayFlag";
-}
-} // End OperandType = "OPERAND_IMMEDIATE"
+def SDelayALU : CustomOperand<i32>;
include "SIInstrFormats.td"
include "VIInstrFormats.td"
@@ -1148,111 +1033,71 @@ def SDWAVopcDst : BoolRC {
let PrintMethod = "printVOPDst";
}
-class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
- let Name = "Imm"#CName;
- let PredicateMethod = "is"#CName;
- let ParserMethod = !if(Optional, "", "parse"#CName);
- let RenderMethod = "addImmOperands";
- let IsOptional = Optional;
- let DefaultMethod = !if(Optional, "default"#CName, ?);
-}
-
-class CustomOperandClass<string CName, bit Optional> : AsmOperandClass {
- let Name = CName;
- let PredicateMethod = "is"#CName;
- let ParserMethod = "parse"#CName;
- let RenderMethod = "addImmOperands";
- let IsOptional = Optional;
- let DefaultMethod = "default"#CName;
-}
-
-class CustomOperandProps<bit Optional = 0, string Name = NAME,
- AsmOperandClass Class = CustomOperandClass<Name, Optional>> {
- string PrintMethod = "print"#Name;
- AsmOperandClass ParserMatchClass = Class;
-}
-
-class CustomOperand<ValueType Type, bit Optional = 0, string Name = NAME,
- AsmOperandClass Class = CustomOperandClass<Name, Optional>>
- : Operand<Type>, CustomOperandProps<Optional, Name, Class>;
-
-class NamedIntOperandClass<string Prefix, string Name, string ConvertMethod>
- : CustomOperandClass<Name, 1> {
- string ImmTy = "AMDGPUOperand::ImmTy"#Name;
- let ParserMethod =
- "[this](OperandVector &Operands) -> OperandMatchResultTy { "#
- "return parseIntWithPrefix(\""#Prefix#"\", Operands, "#ImmTy#", "#
- ConvertMethod#"); }";
-}
-
class NamedIntOperand<ValueType Type, string Prefix, string Name = NAME,
string ConvertMethod = "nullptr">
- : CustomOperand<Type, 1, Name, NamedIntOperandClass<Prefix, Name, ConvertMethod>>;
-
-class BitOperandClass<string Id, string Name>
- : CustomOperandClass<Name, 1> {
- string ImmTy = "AMDGPUOperand::ImmTy"#Name;
+ : CustomOperand<Type, 1, Name> {
let ParserMethod =
- "[this](OperandVector &Operands) -> OperandMatchResultTy { "#
- "return parseNamedBit(\""#Id#"\", Operands, "#ImmTy#"); }";
+ "[this](OperandVector &Operands) -> ParseStatus { "#
+ "return parseIntWithPrefix(\""#Prefix#"\", Operands, "#
+ "AMDGPUOperand::"#ImmTy#", "#ConvertMethod#"); }";
}
class NamedBitOperand<string Id, string Name = NAME>
- : CustomOperand<i1, 1, Name, BitOperandClass<Id, Name>>;
-
-class DefaultOperand_0<CustomOperand Op>
- : OperandWithDefaultOps<Op.Type, (ops (Op.Type 0))>,
- CustomOperandProps<1, Op.ParserMatchClass.Name, Op.ParserMatchClass>;
-
-class NamedOperandU32<string Name, AsmOperandClass MatchClass> : Operand<i32> {
- let PrintMethod = "print"#Name;
- let ParserMatchClass = MatchClass;
+ : CustomOperand<i1, 1, Name> {
+ let ParserMethod =
+ "[this](OperandVector &Operands) -> ParseStatus { "#
+ "return parseNamedBit(\""#Id#"\", Operands, AMDGPUOperand::"#ImmTy#"); }";
+ let PrintMethod = "[this](const MCInst *MI, unsigned OpNo, "#
+ "const MCSubtargetInfo &STI, raw_ostream &O) { "#
+ "printNamedBit(MI, OpNo, O, \""#Id#"\"); }";
}
-class NamedOperandU32_0<string Name, AsmOperandClass MatchClass> :
- OperandWithDefaultOps<i32, (ops (i32 0))> {
- let PrintMethod = "print"#Name;
- let ParserMatchClass = MatchClass;
+class DefaultOperand<CustomOperand Op, int Value>
+ : OperandWithDefaultOps<Op.Type, (ops (Op.Type Value))>,
+ CustomOperandProps<1, Op.ParserMatchClass.Name> {
+ let ParserMethod = Op.ParserMatchClass.ParserMethod;
+ let PrintMethod = Op.PrintMethod;
}
-class NamedOperandU32Default0<string Name, AsmOperandClass MatchClass> :
- OperandWithDefaultOps<i32, (ops (i32 0))> {
- let PrintMethod = "print"#Name;
- let ParserMatchClass = MatchClass;
+class SDWAOperand<string Id, string Name = NAME>
+ : CustomOperand<i32, 1, Name> {
+ let ParserMethod =
+ "[this](OperandVector &Operands) -> ParseStatus { "#
+ "return parseSDWASel(Operands, \""#Id#"\", AMDGPUOperand::"#ImmTy#"); }";
}
-class NamedOperandU32Default1<string Name, AsmOperandClass MatchClass> :
- OperandWithDefaultOps<i32, (ops (i32 1))> {
- let PrintMethod = "print"#Name;
- let ParserMatchClass = MatchClass;
+class ArrayOperand0<string Id, string Name = NAME>
+ : OperandWithDefaultOps<i32, (ops (i32 0))>,
+ CustomOperandProps<1, Name> {
+ let ParserMethod =
+ "[this](OperandVector &Operands) -> ParseStatus { "#
+ "return parseOperandArrayWithPrefix(\""#Id#"\", Operands, "#
+ "AMDGPUOperand::"#ImmTy#"); }";
}
-let OperandType = "OPERAND_IMMEDIATE" in {
-
-def flat_offset : CustomOperand<i16, 1, "FlatOffset">;
-def offset : NamedIntOperand<i16, "offset", "Offset">;
+let ImmTy = "ImmTyOffset" in
+def flat_offset : CustomOperand<i32, 1, "FlatOffset">;
+def offset : NamedIntOperand<i32, "offset", "Offset">;
def offset0 : NamedIntOperand<i8, "offset0", "Offset0">;
def offset1 : NamedIntOperand<i8, "offset1", "Offset1">;
def gds : NamedBitOperand<"gds", "GDS">;
-def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>;
-def omod0 : NamedOperandU32_0<"OModSI", NamedMatchClass<"OModSI">>;
+def omod : CustomOperand<i32, 1, "OModSI">;
+def omod0 : DefaultOperand<omod, 0>;
// We need to make the cases with a default of 0 distinct from no
// default to help deal with some cases where the operand appears
// before a mandatory operand.
def clampmod : NamedBitOperand<"clamp", "ClampSI">;
-def clampmod0 : DefaultOperand_0<clampmod>;
+def clampmod0 : DefaultOperand<clampmod, 0>;
def highmod : NamedBitOperand<"high", "High">;
-def CPol : NamedOperandU32<"CPol", NamedMatchClass<"CPol">>;
-def CPol_0 : NamedOperandU32Default0<"CPol", NamedMatchClass<"CPol">>;
-def CPol_GLC1 : NamedOperandU32Default1<"CPol", NamedMatchClass<"CPol">>;
+def CPol : CustomOperand<i32, 1>;
+def CPol_0 : DefaultOperand<CPol, 0>;
+def CPol_GLC1 : DefaultOperand<CPol, 1>;
def TFE : NamedBitOperand<"tfe">;
-def SWZ : NamedBitOperand<"swz">;
-def SWZ_0 : DefaultOperand_0<SWZ>;
def UNorm : NamedBitOperand<"unorm">;
def DA : NamedBitOperand<"da">;
def R128A16 : CustomOperand<i1, 1>;
@@ -1267,62 +1112,51 @@ def FORMAT : CustomOperand<i8>;
def DMask : NamedIntOperand<i16, "dmask">;
def Dim : CustomOperand<i8>;
-def dst_sel : NamedOperandU32<"SDWADstSel", NamedMatchClass<"SDWADstSel">>;
-def src0_sel : NamedOperandU32<"SDWASrc0Sel", NamedMatchClass<"SDWASrc0Sel">>;
-def src1_sel : NamedOperandU32<"SDWASrc1Sel", NamedMatchClass<"SDWASrc1Sel">>;
-def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused">>;
-
-def op_sel0 : NamedOperandU32Default0<"OpSel", NamedMatchClass<"OpSel">>;
-def op_sel_hi0 : NamedOperandU32Default0<"OpSelHi", NamedMatchClass<"OpSelHi">>;
-def neg_lo0 : NamedOperandU32Default0<"NegLo", NamedMatchClass<"NegLo">>;
-def neg_hi0 : NamedOperandU32Default0<"NegHi", NamedMatchClass<"NegHi">>;
+def dst_sel : SDWAOperand<"dst_sel", "SDWADstSel">;
+def src0_sel : SDWAOperand<"src0_sel", "SDWASrc0Sel">;
+def src1_sel : SDWAOperand<"src1_sel", "SDWASrc1Sel">;
+def dst_unused : CustomOperand<i32, 1, "SDWADstUnused">;
-def dpp8 : NamedOperandU32<"DPP8", NamedMatchClass<"DPP8", 0>>;
-def dpp_ctrl : NamedOperandU32<"DPPCtrl", NamedMatchClass<"DPPCtrl", 0>>;
+def op_sel0 : ArrayOperand0<"op_sel", "OpSel">;
+def op_sel_hi0 : ArrayOperand0<"op_sel_hi", "OpSelHi">;
+def neg_lo0 : ArrayOperand0<"neg_lo", "NegLo">;
+def neg_hi0 : ArrayOperand0<"neg_hi", "NegHi">;
-def row_mask : NamedOperandU32<"RowMask", NamedMatchClass<"RowMask">>;
-def bank_mask : NamedOperandU32<"BankMask", NamedMatchClass<"BankMask">>;
-def bound_ctrl : NamedIntOperand<i1, "bound_ctrl", "DppBoundCtrl", "ConvertDppBoundCtrl">;
-def FI : NamedOperandU32<"FI", NamedMatchClass<"FI">>;
+def dpp8 : CustomOperand<i32, 0, "DPP8">;
+def dpp_ctrl : CustomOperand<i32, 0, "DPPCtrl">;
-def blgp : NamedOperandU32<"BLGP", NamedMatchClass<"BLGP">>;
-def cbsz : NamedOperandU32<"CBSZ", NamedMatchClass<"CBSZ">>;
-def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
+let DefaultValue = "0xf" in {
+def row_mask : NamedIntOperand<i32, "row_mask", "DppRowMask">;
+def bank_mask : NamedIntOperand<i32, "bank_mask", "DppBankMask">;
+}
+def bound_ctrl : NamedIntOperand<i1, "bound_ctrl", "DppBoundCtrl",
+ "[this] (int64_t &BC) -> bool { return convertDppBoundCtrl(BC); }">;
+def FI : NamedIntOperand<i32, "fi", "DppFI">;
-def hwreg : NamedOperandU32<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
+def blgp : CustomOperand<i32, 1, "BLGP">;
+def cbsz : NamedIntOperand<i32, "cbsz", "CBSZ">;
+def abid : NamedIntOperand<i32, "abid", "ABID">;
-def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
+def hwreg : CustomOperand<i32, 0, "Hwreg">;
-}
+def exp_tgt : CustomOperand<i32, 0, "ExpTgt">;
def wait_vdst : NamedIntOperand<i8, "wait_vdst", "WaitVDST">;
def wait_exp : NamedIntOperand<i8, "wait_exp", "WaitEXP">;
-} // End OperandType = "OPERAND_IMMEDIATE"
-
-class KImmMatchClass<int size> : AsmOperandClass {
- let Name = "KImmFP"#size;
- let PredicateMethod = "isKImmFP"#size;
- let ParserMethod = "parseImm";
- let RenderMethod = "addKImmFP"#size#"Operands";
-}
-
-class kimmOperand<ValueType vt> : Operand<vt> {
+class KImmFPOperand<ValueType vt> : ImmOperand<vt> {
let OperandNamespace = "AMDGPU";
let OperandType = "OPERAND_KIMM"#vt.Size;
let PrintMethod = "printU"#vt.Size#"ImmOperand";
- let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass");
- let DecoderMethod = "decodeOperand_f"#vt.Size#"kimm";
+ let DecoderMethod = "decodeOperand_KImmFP";
}
// 32-bit VALU immediate operand that uses the constant bus.
-def KImmFP32MatchClass : KImmMatchClass<32>;
-def f32kimm : kimmOperand<i32>;
+def KImmFP32 : KImmFPOperand<i32>;
// 32-bit VALU immediate operand with a 16-bit value that uses the
// constant bus.
-def KImmFP16MatchClass : KImmMatchClass<16>;
-def f16kimm : kimmOperand<i16>;
+def KImmFP16 : KImmFPOperand<i16>;
class FPInputModsMatchClass <int opSize> : AsmOperandClass {
let Name = "RegOrImmWithFP"#opSize#"InputMods";
@@ -1506,7 +1340,16 @@ def DS128Bit8ByteAligned : ComplexPattern<iPTR, 3, "SelectDS128Bit8ByteAligned">
def MOVRELOffset : ComplexPattern<iPTR, 2, "SelectMOVRELOffset">;
def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
+
+// Modifiers for floating point instructions.
def VOP3Mods : ComplexPattern<untyped, 2, "SelectVOP3Mods">;
+
+// VOP3 modifiers used for instructions that do not read canonicalized
+// floating point values (i.e. integer operations with FP source
+// modifiers)
+def VOP3ModsNonCanonicalizing : ComplexPattern<untyped, 2,
+ "SelectVOP3ModsNonCanonicalizing">;
+
def VOP3NoMods : ComplexPattern<untyped, 1, "SelectVOP3NoMods">;
def VOP3OMods : ComplexPattern<untyped, 3, "SelectVOP3OMods">;
@@ -1521,7 +1364,8 @@ def VOP3OpSel : ComplexPattern<untyped, 2, "SelectVOP3OpSel">;
def VOP3OpSelMods : ComplexPattern<untyped, 2, "SelectVOP3OpSelMods">;
-def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
+def VOP3PMadMixModsExt : ComplexPattern<untyped, 2, "SelectVOP3PMadMixModsExt">;
+def VOP3PMadMixMods : ComplexPattern<untyped, 2, "SelectVOP3PMadMixMods">;
def VINTERPMods : ComplexPattern<untyped, 2, "SelectVINTERPMods">;
def VINTERPModsHi : ComplexPattern<untyped, 2, "SelectVINTERPModsHi">;
@@ -1717,7 +1561,7 @@ class getVOP3SrcForVT<ValueType VT> {
bit isFP = isFloatType<VT>.ret;
RegisterOperand ret =
!if(!eq(VT.Size, 128),
- VSrc_128,
+ VRegSrc_128,
!if(!eq(VT.Size, 64),
!if(isFP,
!if(!eq(VT.Value, v2f32.Value),
@@ -2390,14 +2234,6 @@ class getLdStRegisterOperand<RegisterClass RC> {
)))));
}
-class BitOr<bit a, bit b> {
- bit ret = !if(a, 1, !if(b, 1, 0));
-}
-
-class BitAnd<bit a, bit b> {
- bit ret = !if(a, !if(b, 1, 0), 0);
-}
-
class getHasVOP3DPP <ValueType DstVT = i32, ValueType Src0VT = i32,
ValueType Src1VT = i32, ValueType Src2VT = i32> {
bit ret = !if(!eq(DstVT.Size, 64),
@@ -2445,7 +2281,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
field RegisterClass Src2DPP = getVregSrcForVT<Src2VT>.ret;
field RegisterOperand Src0VOP3DPP = VGPRSrc_32;
- field RegisterOperand Src1VOP3DPP = VGPRSrc_32;
+ field RegisterOperand Src1VOP3DPP = VRegSrc_32;
field RegisterOperand Src2VOP3DPP = getVOP3DPPSrcForVT<Src2VT>.ret;
field RegisterOperand Src0SDWA = getSDWASrcForVT<Src0VT>.ret;
field RegisterOperand Src1SDWA = getSDWASrcForVT<Src0VT>.ret;
@@ -2509,8 +2345,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExtVOP3DPP = getHasVOP3DPP<DstVT, Src0VT, Src1VT, Src2VT>.ret;
- field bit HasExtDPP = !if(!or(getHasDPP<NumSrcArgs>.ret,
- HasExtVOP3DPP), 1, 0);
+ field bit HasExtDPP = !or(getHasDPP<NumSrcArgs>.ret, HasExtVOP3DPP);
field bit HasExt32BitDPP = getHasExt32BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExt64BitDPP = getHasExt64BitDPP<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
field bit HasExtSDWA = getHasSDWA<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 0c2a13852fcb..7fe76b4c13ca 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -47,7 +47,7 @@ let Uses = [MODE, M0, EXEC] in {
multiclass V_INTERP_P1_F32_m : VINTRP_m <
0x00000000,
(outs VINTRPDst:$vdst),
- (ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
+ (ins VGPR_32:$vsrc, InterpAttr:$attr, InterpAttrChan:$attrchan),
"v_interp_p1_f32$vdst, $vsrc, $attr$attrchan",
[(set f32:$vdst, (int_amdgcn_interp_p1 f32:$vsrc,
(i32 timm:$attrchan), (i32 timm:$attr), M0))]
@@ -73,7 +73,8 @@ let DisableEncoding = "$src0", Constraints = "$src0 = $vdst" in {
defm V_INTERP_P2_F32 : VINTRP_m <
0x00000001,
(outs VINTRPDst:$vdst),
- (ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
+ (ins VGPR_32:$src0, VGPR_32:$vsrc, InterpAttr:$attr,
+ InterpAttrChan:$attrchan),
"v_interp_p2_f32$vdst, $vsrc, $attr$attrchan",
[(set f32:$vdst, (int_amdgcn_interp_p2 f32:$src0, f32:$vsrc,
(i32 timm:$attrchan), (i32 timm:$attr), M0))]>;
@@ -83,7 +84,7 @@ defm V_INTERP_P2_F32 : VINTRP_m <
defm V_INTERP_MOV_F32 : VINTRP_m <
0x00000002,
(outs VINTRPDst:$vdst),
- (ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
+ (ins InterpSlot:$vsrc, InterpAttr:$attr, InterpAttrChan:$attrchan),
"v_interp_mov_f32$vdst, $vsrc, $attr$attrchan",
[(set f32:$vdst, (int_amdgcn_interp_mov (i32 timm:$vsrc),
(i32 timm:$attrchan), (i32 timm:$attr), M0))]>;
@@ -95,6 +96,16 @@ defm V_INTERP_MOV_F32 : VINTRP_m <
//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
+
+// Insert a branch to an endpgm block to use as a fallback trap.
+def ENDPGM_TRAP : SPseudoInstSI<
+ (outs), (ins),
+ [(AMDGPUendpgm_trap)],
+ "ENDPGM_TRAP"> {
+ let hasSideEffects = 1;
+ let usesCustomInserter = 1;
+}
+
def ATOMIC_FENCE : SPseudoInstSI<
(outs), (ins i32imm:$ordering, i32imm:$scope),
[(atomic_fence (i32 timm:$ordering), (i32 timm:$scope))],
@@ -161,6 +172,13 @@ def STRICT_WQM : PseudoInstSI <(outs unknown:$vdst), (ins unknown:$src0)>;
} // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
+def WWM_COPY : SPseudoInstSI <
+ (outs unknown:$dst), (ins unknown:$src)> {
+ let hasSideEffects = 0;
+ let isAsCheapAsAMove = 1;
+ let isConvergent = 1;
+}
+
def ENTER_STRICT_WWM : SPseudoInstSI <(outs SReg_1:$sdst), (ins i64imm:$src0)> {
let Uses = [EXEC];
let Defs = [EXEC, SCC];
@@ -189,6 +207,12 @@ def EXIT_STRICT_WQM : SPseudoInstSI <(outs SReg_1:$sdst), (ins SReg_1:$src0)> {
let mayStore = 0;
}
+let usesCustomInserter = 1 in {
+def S_INVERSE_BALLOT_U32 : SPseudoInstSI <(outs SReg_32:$sdst), (ins SSrc_b32:$mask)>;
+
+def S_INVERSE_BALLOT_U64 : SPseudoInstSI <(outs SReg_64:$sdst), (ins SSrc_b64:$mask)>;
+} // End usesCustomInserter = 1
+
// PSEUDO_WM is treated like STRICT_WWM/STRICT_WQM without exec changes.
def ENTER_PSEUDO_WM : SPseudoInstSI <(outs), (ins)> {
let Uses = [EXEC];
@@ -222,7 +246,7 @@ def FPTRUNC_DOWNWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst),
// Invert the exec mask and overwrite the inactive lanes of dst with inactive,
// restoring it after we're done.
-let Defs = [SCC] in {
+let Defs = [SCC], isConvergent = 1 in {
def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst),
(ins VSrc_b32: $src, VSrc_b32:$inactive),
[(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> {
@@ -234,6 +258,18 @@ def V_SET_INACTIVE_B64 : VPseudoInstSI <(outs VReg_64:$vdst),
}
} // End Defs = [SCC]
+let usesCustomInserter = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
+ def WAVE_REDUCE_UMIN_PSEUDO_U32 : VPseudoInstSI <(outs SGPR_32:$sdst),
+ (ins VSrc_b32: $src, VSrc_b32:$strategy),
+ [(set i32:$sdst, (int_amdgcn_wave_reduce_umin i32:$src, i32:$strategy))]> {
+ }
+
+ def WAVE_REDUCE_UMAX_PSEUDO_U32 : VPseudoInstSI <(outs SGPR_32:$sdst),
+ (ins VSrc_b32: $src, VSrc_b32:$strategy),
+ [(set i32:$sdst, (int_amdgcn_wave_reduce_umax i32:$src, i32:$strategy))]> {
+ }
+}
+
let usesCustomInserter = 1, Defs = [VCC, EXEC] in {
def V_ADD_U64_PSEUDO : VPseudoInstSI <
(outs VReg_64:$vdst), (ins VSrc_b64:$src0, VSrc_b64:$src1),
@@ -300,6 +336,7 @@ def S_XOR_B64_term : WrapTerminatorInst<S_XOR_B64>;
def S_OR_B64_term : WrapTerminatorInst<S_OR_B64>;
def S_ANDN2_B64_term : WrapTerminatorInst<S_ANDN2_B64>;
def S_AND_B64_term : WrapTerminatorInst<S_AND_B64>;
+def S_AND_SAVEEXEC_B64_term : WrapTerminatorInst<S_AND_SAVEEXEC_B64>;
}
let WaveSizePredicate = isWave32 in {
@@ -308,6 +345,7 @@ def S_XOR_B32_term : WrapTerminatorInst<S_XOR_B32>;
def S_OR_B32_term : WrapTerminatorInst<S_OR_B32>;
def S_ANDN2_B32_term : WrapTerminatorInst<S_ANDN2_B32>;
def S_AND_B32_term : WrapTerminatorInst<S_AND_B32>;
+def S_AND_SAVEEXEC_B32_term : WrapTerminatorInst<S_AND_SAVEEXEC_B32>;
}
@@ -368,7 +406,13 @@ def IGLP_OPT : SPseudoInstSI<(outs), (ins i32imm:$mask),
// SI pseudo instructions. These are used by the CFG structurizer pass
// and should be lowered to ISA instructions prior to codegen.
-let isTerminator = 1 in {
+// As we have enhanced control flow intrinsics to work under unstructured CFG,
+// duplicating such intrinsics can be actually treated as legal. On the contrary,
+// by making them non-duplicable, we are observing better code generation result.
+// So we choose to mark them non-duplicable in hope of getting better code
+// generation as well as simplied CFG during Machine IR optimization stage.
+
+let isTerminator = 1, isNotDuplicable = 1 in {
let OtherPredicates = [EnableLateCFGStructurize] in {
def SI_NON_UNIFORM_BRCOND_PSEUDO : CFPseudoInstSI <
@@ -385,6 +429,7 @@ def SI_IF: CFPseudoInstSI <
let Constraints = "";
let Size = 12;
let hasSideEffects = 1;
+ let IsNeverUniform = 1;
}
def SI_ELSE : CFPseudoInstSI <
@@ -392,6 +437,7 @@ def SI_ELSE : CFPseudoInstSI <
(ins SReg_1:$src, brtarget:$target), [], 1, 1> {
let Size = 12;
let hasSideEffects = 1;
+ let IsNeverUniform = 1;
}
def SI_WATERFALL_LOOP : CFPseudoInstSI <
@@ -408,6 +454,7 @@ def SI_LOOP : CFPseudoInstSI <
let Size = 8;
let isBranch = 1;
let hasSideEffects = 1;
+ let IsNeverUniform = 1;
}
} // End isTerminator = 1
@@ -418,6 +465,7 @@ def SI_END_CF : CFPseudoInstSI <
let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
let hasSideEffects = 1;
+ let isNotDuplicable = 1; // Not a hard requirement, see long comments above for details.
let mayLoad = 1; // FIXME: Should not need memory flags
let mayStore = 1;
}
@@ -425,6 +473,7 @@ def SI_END_CF : CFPseudoInstSI <
def SI_IF_BREAK : CFPseudoInstSI <
(outs SReg_1:$dst), (ins SReg_1:$vcc, SReg_1:$src), []> {
let Size = 4;
+ let isNotDuplicable = 1; // Not a hard requirement, see long comments above for details.
let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
}
@@ -470,7 +519,7 @@ def SI_ILLEGAL_COPY : SPseudoInstSI <
// Branch on undef scc. Used to avoid intermediate copy from
// IMPLICIT_DEF to SCC.
-def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> {
+def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins SOPPBrTarget:$simm16)> {
let isTerminator = 1;
let usesCustomInserter = 1;
let isBranch = 1;
@@ -543,7 +592,7 @@ def SI_RETURN_TO_EPILOG : SPseudoInstSI <
// Return for returning function calls.
def SI_RETURN : SPseudoInstSI <
- (outs), (ins), [(AMDGPUret_flag)],
+ (outs), (ins), [(AMDGPUret_glue)],
"; return"> {
let isTerminator = 1;
let isBarrier = 1;
@@ -584,10 +633,9 @@ def SI_CALL : SPseudoInstSI <
let isConvergent = 1;
}
-// Tail call handling pseudo
-def SI_TCRETURN : SPseudoInstSI <(outs),
- (ins SReg_64:$src0, unknown:$callee, i32imm:$fpdiff),
- [(AMDGPUtc_return i64:$src0, tglobaladdr:$callee, i32:$fpdiff)]> {
+class SI_TCRETURN_Pseudo<RegisterClass rc, SDNode sd> : SPseudoInstSI <(outs),
+ (ins rc:$src0, unknown:$callee, i32imm:$fpdiff),
+ [(sd i64:$src0, tglobaladdr:$callee, i32:$fpdiff)]> {
let Size = 4;
let FixedSize = 1;
let isCall = 1;
@@ -600,10 +648,20 @@ def SI_TCRETURN : SPseudoInstSI <(outs),
let isConvergent = 1;
}
+// Tail call handling pseudo
+def SI_TCRETURN : SI_TCRETURN_Pseudo<CCR_SGPR_64, AMDGPUtc_return>;
+def SI_TCRETURN_GFX : SI_TCRETURN_Pseudo<Gfx_CCR_SGPR_64, AMDGPUtc_return_gfx>;
+
// Handle selecting indirect tail calls
def : GCNPat<
(AMDGPUtc_return i64:$src0, (i64 0), (i32 timm:$fpdiff)),
- (SI_TCRETURN SReg_64:$src0, (i64 0), i32imm:$fpdiff)
+ (SI_TCRETURN CCR_SGPR_64:$src0, (i64 0), i32imm:$fpdiff)
+>;
+
+// Handle selecting indirect tail calls for AMDGPU_gfx
+def : GCNPat<
+ (AMDGPUtc_return_gfx i64:$src0, (i64 0), (i32 timm:$fpdiff)),
+ (SI_TCRETURN_GFX Gfx_CCR_SGPR_64:$src0, (i64 0), i32imm:$fpdiff)
>;
def ADJCALLSTACKUP : SPseudoInstSI<
@@ -720,6 +778,10 @@ def S_INDIRECT_REG_WRITE_MOVREL_B32_V3 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<
def S_INDIRECT_REG_WRITE_MOVREL_B32_V4 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_128>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V5 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_160>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V8 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_256>;
+def S_INDIRECT_REG_WRITE_MOVREL_B32_V9 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_288>;
+def S_INDIRECT_REG_WRITE_MOVREL_B32_V10 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_320>;
+def S_INDIRECT_REG_WRITE_MOVREL_B32_V11 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_352>;
+def S_INDIRECT_REG_WRITE_MOVREL_B32_V12 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_384>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V16 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_512>;
def S_INDIRECT_REG_WRITE_MOVREL_B32_V32 : S_INDIRECT_REG_WRITE_MOVREL_B32_pseudo<SReg_1024>;
@@ -890,6 +952,9 @@ defm SI_SPILL_AV384 : SI_SPILL_VGPR <AV_384, 1>;
defm SI_SPILL_AV512 : SI_SPILL_VGPR <AV_512, 1>;
defm SI_SPILL_AV1024 : SI_SPILL_VGPR <AV_1024, 1>;
+let isConvergent = 1 in
+defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR <VGPR_32>;
+
def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
(outs SReg_64:$dst),
(ins si_ga:$ptr_lo, si_ga:$ptr_hi),
@@ -954,25 +1019,6 @@ def : Pat <
// VOP1 Patterns
//===----------------------------------------------------------------------===//
-let OtherPredicates = [UnsafeFPMath] in {
-
-// Convert (x - floor(x)) to fract(x)
-def : GCNPat <
- (f32 (fsub (f32 (VOP3Mods f32:$x, i32:$mods)),
- (f32 (ffloor (f32 (VOP3Mods f32:$x, i32:$mods)))))),
- (V_FRACT_F32_e64 $mods, $x)
->;
-
-// Convert (x + (-floor(x))) to fract(x)
-def : GCNPat <
- (f64 (fadd (f64 (VOP3Mods f64:$x, i32:$mods)),
- (f64 (fneg (f64 (ffloor (f64 (VOP3Mods f64:$x, i32:$mods)))))))),
- (V_FRACT_F64_e64 $mods, $x)
->;
-
-} // End OtherPredicates = [UnsafeFPMath]
-
-
multiclass f16_fp_Pats<Instruction cvt_f16_f32_inst_e64, Instruction cvt_f32_f16_inst_e64> {
// f16_to_fp patterns
def : GCNPat <
@@ -1094,8 +1140,8 @@ def : GCNPat <
>;
class VOPSelectModsPat <ValueType vt> : GCNPat <
- (vt (select i1:$src0, (VOP3Mods vt:$src1, i32:$src1_mods),
- (VOP3Mods vt:$src2, i32:$src2_mods))),
+ (vt (select i1:$src0, (VOP3ModsNonCanonicalizing vt:$src1, i32:$src1_mods),
+ (VOP3ModsNonCanonicalizing vt:$src2, i32:$src2_mods))),
(V_CNDMASK_B32_e64 FP32InputMods:$src2_mods, VSrc_b32:$src2,
FP32InputMods:$src1_mods, VSrc_b32:$src1, SSrc_i1:$src0)
>;
@@ -1343,66 +1389,6 @@ foreach Index = 0-15 in {
}
-def : Pat <
- (extract_subvector v4i16:$vec, (i32 0)),
- (v2i16 (EXTRACT_SUBREG v4i16:$vec, sub0))
->;
-
-def : Pat <
- (extract_subvector v4i16:$vec, (i32 2)),
- (v2i16 (EXTRACT_SUBREG v4i16:$vec, sub1))
->;
-
-def : Pat <
- (extract_subvector v4f16:$vec, (i32 0)),
- (v2f16 (EXTRACT_SUBREG v4f16:$vec, sub0))
->;
-
-def : Pat <
- (extract_subvector v4f16:$vec, (i32 2)),
- (v2f16 (EXTRACT_SUBREG v4f16:$vec, sub1))
->;
-
-def : Pat <
- (extract_subvector v8i16:$vec, (i32 0)),
- (v4i16 (EXTRACT_SUBREG v8i16:$vec, sub0_sub1))
->;
-
-def : Pat <
- (extract_subvector v8i16:$vec, (i32 4)),
- (v4i16 (EXTRACT_SUBREG v8i16:$vec, sub2_sub3))
->;
-
-def : Pat <
- (extract_subvector v8f16:$vec, (i32 0)),
- (v4f16 (EXTRACT_SUBREG v8f16:$vec, sub0_sub1))
->;
-
-def : Pat <
- (extract_subvector v8f16:$vec, (i32 4)),
- (v4f16 (EXTRACT_SUBREG v8f16:$vec, sub2_sub3))
->;
-
-def : Pat <
- (extract_subvector v16i16:$vec, (i32 0)),
- (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub0_sub1_sub2_sub3))
->;
-
-def : Pat <
- (extract_subvector v16i16:$vec, (i32 8)),
- (v8i16 (EXTRACT_SUBREG v16i16:$vec, sub4_sub5_sub6_sub7))
->;
-
-def : Pat <
- (extract_subvector v16f16:$vec, (i32 0)),
- (v8f16 (EXTRACT_SUBREG v16f16:$vec, sub0_sub1_sub2_sub3))
->;
-
-def : Pat <
- (extract_subvector v16f16:$vec, (i32 8)),
- (v8f16 (EXTRACT_SUBREG v16f16:$vec, sub4_sub5_sub6_sub7))
->;
-
foreach Index = 0-31 in {
def Extract_Element_v32i32_#Index : Extract_Element <
i32, v32i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -2002,13 +1988,13 @@ def : GCNPat <
def : GCNPat <
(i32 (sext i1:$src0)),
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
- /*src1mod*/(i32 0), /*src1*/(i32 -1), $src0)
+ /*src1mod*/(i32 0), /*src1*/(i32 -1), i1:$src0)
>;
class Ext32Pat <SDNode ext> : GCNPat <
(i32 (ext i1:$src0)),
(V_CNDMASK_B32_e64 /*src0mod*/(i32 0), /*src0*/(i32 0),
- /*src1mod*/(i32 0), /*src1*/(i32 1), $src0)
+ /*src1mod*/(i32 0), /*src1*/(i32 1), i1:$src0)
>;
def : Ext32Pat <zext>;
@@ -2043,48 +2029,53 @@ def BFIImm32 : PatFrag<
}]
>;
+
// Definition from ISA doc:
// (y & x) | (z & ~x)
-def : AMDGPUPat <
+def : AMDGPUPatIgnoreCopies <
(DivergentBinFrag<or> (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
- (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
+ (V_BFI_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32),
+ (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32),
+ (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32))
>;
// (y & C) | (z & ~C)
-def : AMDGPUPat <
+def : AMDGPUPatIgnoreCopies <
(BFIImm32 i32:$x, i32:$y, i32:$z),
(V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
>;
// 64-bit version
-def : AMDGPUPat <
+def : AMDGPUPatIgnoreCopies <
(DivergentBinFrag<or> (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
(REG_SEQUENCE VReg_64,
(V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)),
- (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)),
- (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0,
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0,
(V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)),
- (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)),
- (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1)
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1)
>;
// SHA-256 Ch function
// z ^ (x & (y ^ z))
-def : AMDGPUPat <
+def : AMDGPUPatIgnoreCopies <
(DivergentBinFrag<xor> i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
- (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
+ (V_BFI_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32),
+ (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32),
+ (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32))
>;
// 64-bit version
-def : AMDGPUPat <
+def : AMDGPUPatIgnoreCopies <
(DivergentBinFrag<xor> i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
(REG_SEQUENCE VReg_64,
(V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)),
- (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)),
- (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0,
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0,
(V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)),
- (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)),
- (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1)
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1)
>;
def : AMDGPUPat <
@@ -3185,24 +3176,27 @@ def : AMDGPUPat <
// SHA-256 Ma patterns
// ((x & z) | (y & (x | z))) -> BFI (XOR x, y), z, y
-def : AMDGPUPat <
+def : AMDGPUPatIgnoreCopies <
(DivergentBinFrag<or> (and i32:$x, i32:$z),
(and i32:$y, (or i32:$x, i32:$z))),
- (V_BFI_B32_e64 (V_XOR_B32_e64 VSrc_b32:$x, VSrc_b32:$y), VSrc_b32:$z, VSrc_b32:$y)
+ (V_BFI_B32_e64 (V_XOR_B32_e64 (COPY_TO_REGCLASS VSrc_b32:$x, VGPR_32),
+ (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32)),
+ (COPY_TO_REGCLASS VSrc_b32:$z, VGPR_32),
+ (COPY_TO_REGCLASS VSrc_b32:$y, VGPR_32))
>;
-def : AMDGPUPat <
+def : AMDGPUPatIgnoreCopies <
(DivergentBinFrag<or> (and i64:$x, i64:$z),
(and i64:$y, (or i64:$x, i64:$z))),
(REG_SEQUENCE VReg_64,
(V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)),
(i32 (EXTRACT_SUBREG VReg_64:$y, sub0))),
- (i32 (EXTRACT_SUBREG VReg_64:$z, sub0)),
- (i32 (EXTRACT_SUBREG VReg_64:$y, sub0))), sub0,
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub0)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub0))), sub0,
(V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)),
(i32 (EXTRACT_SUBREG VReg_64:$y, sub1))),
- (i32 (EXTRACT_SUBREG VReg_64:$z, sub1)),
- (i32 (EXTRACT_SUBREG VReg_64:$y, sub1))), sub1)
+ (i32 (EXTRACT_SUBREG VReg_64:$z, sub1)),
+ (i32 (EXTRACT_SUBREG VReg_64:$y, sub1))), sub1)
>;
multiclass IntMed3Pat<Instruction med3Inst,
@@ -3486,8 +3480,6 @@ def G_AMDGPU_ATOMIC_CMPXCHG : AMDGPUGenericInstruction {
}
let Namespace = "AMDGPU" in {
-def G_AMDGPU_ATOMIC_INC : G_ATOMICRMW_OP;
-def G_AMDGPU_ATOMIC_DEC : G_ATOMICRMW_OP;
def G_AMDGPU_ATOMIC_FMIN : G_ATOMICRMW_OP;
def G_AMDGPU_ATOMIC_FMAX : G_ATOMICRMW_OP;
}
@@ -3614,15 +3606,6 @@ def G_FPTRUNC_ROUND_DOWNWARD : AMDGPUGenericInstruction {
// Dummy Instructions
//============================================================================//
-def V_ILLEGAL_gfx6_gfx7_gfx8_gfx9 : Enc32, InstSI<(outs), (ins), "v_illegal"> {
- let Inst{31-0} = 0xFFFFFFFF;
- let FixedSize = 1;
- let Size = 4;
- let Uses = [EXEC];
- let hasSideEffects = 1;
- let SubtargetPredicate = isGFX6GFX7GFX8GFX9;
-}
-
def V_ILLEGAL : Enc32, InstSI<(outs), (ins), "v_illegal"> {
let Inst{31-0} = 0x00000000;
let FixedSize = 1;
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 2b5ca33b0e4f..c252d30e250e 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -331,7 +331,6 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
switch (Opc) {
case AMDGPU::S_BUFFER_LOAD_DWORD_IMM:
- case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
case AMDGPU::S_LOAD_DWORD_IMM:
case AMDGPU::GLOBAL_LOAD_DWORD:
@@ -342,7 +341,6 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
case AMDGPU::FLAT_STORE_DWORD:
return 1;
case AMDGPU::S_BUFFER_LOAD_DWORDX2_IMM:
- case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
case AMDGPU::S_LOAD_DWORDX2_IMM:
case AMDGPU::GLOBAL_LOAD_DWORDX2:
@@ -360,7 +358,6 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
case AMDGPU::FLAT_STORE_DWORDX3:
return 3;
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
- case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
case AMDGPU::S_LOAD_DWORDX4_IMM:
case AMDGPU::GLOBAL_LOAD_DWORDX4:
@@ -371,7 +368,6 @@ static unsigned getOpcodeWidth(const MachineInstr &MI, const SIInstrInfo &TII) {
case AMDGPU::FLAT_STORE_DWORDX4:
return 4;
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
- case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM:
case AMDGPU::S_LOAD_DWORDX8_IMM:
return 8;
@@ -432,6 +428,10 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFEN_exact:
case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFSET:
case AMDGPU::TBUFFER_LOAD_FORMAT_X_OFFSET_exact:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_IDXEN:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_IDXEN_exact:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_BOTHEN:
+ case AMDGPU::TBUFFER_LOAD_FORMAT_X_BOTHEN_exact:
return TBUFFER_LOAD;
case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFEN:
case AMDGPU::TBUFFER_STORE_FORMAT_X_OFFEN_exact:
@@ -446,12 +446,6 @@ static InstClassEnum getInstClass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
return S_BUFFER_LOAD_IMM;
- // For the purposes of this optimization SGPR variants of buffer loads
- // are considered to be zero-offsetted SGPR_IMM loads.
- case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
@@ -533,12 +527,6 @@ static unsigned getInstSubclass(unsigned Opc, const SIInstrInfo &TII) {
case AMDGPU::S_BUFFER_LOAD_DWORDX4_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX8_IMM:
return AMDGPU::S_BUFFER_LOAD_DWORD_IMM;
- // For the purposes of this optimization SGPR variants of buffer loads
- // are considered to be zero-offsetted SGPR_IMM loads.
- case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
@@ -641,10 +629,6 @@ static AddressRegs getRegs(unsigned Opc, const SIInstrInfo &TII) {
switch (Opc) {
default:
return Result;
- case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR:
- case AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR:
case AMDGPU::S_BUFFER_LOAD_DWORD_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM:
case AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM:
@@ -740,7 +724,7 @@ void SILoadStoreOptimizer::CombineInfo::setMI(MachineBasicBlock::iterator MI,
Offset = 0;
} else {
int OffsetIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::offset);
- Offset = OffsetIdx == -1 ? 0 : I->getOperand(OffsetIdx).getImm();
+ Offset = I->getOperand(OffsetIdx).getImm();
}
if (InstClass == TBUFFER_LOAD || InstClass == TBUFFER_STORE)
@@ -887,7 +871,7 @@ bool SILoadStoreOptimizer::dmasksCanBeCombined(const CombineInfo &CI,
unsigned MaxMask = std::max(CI.DMask, Paired.DMask);
unsigned MinMask = std::min(CI.DMask, Paired.DMask);
- unsigned AllowedBitsForMin = llvm::countTrailingZeros(MaxMask);
+ unsigned AllowedBitsForMin = llvm::countr_zero(MaxMask);
if ((1u << AllowedBitsForMin) <= MinMask)
return false;
@@ -926,7 +910,7 @@ static unsigned getBufferFormatWithCompCount(unsigned OldFormat,
// - if Lo == 0, return 0 (even though the "- 1" below underflows
// - if Lo > Hi, return 0 (as if the range wrapped around)
static uint32_t mostAlignedValueInRange(uint32_t Lo, uint32_t Hi) {
- return Hi & maskLeadingOnes<uint32_t>(countLeadingZeros((Lo - 1) ^ Hi) + 1);
+ return Hi & maskLeadingOnes<uint32_t>(llvm::countl_zero((Lo - 1) ^ Hi) + 1);
}
bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
@@ -975,9 +959,12 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI,
// Handle all non-DS instructions.
if ((CI.InstClass != DS_READ) && (CI.InstClass != DS_WRITE)) {
- return (EltOffset0 + CI.Width == EltOffset1 ||
- EltOffset1 + Paired.Width == EltOffset0) &&
- CI.CPol == Paired.CPol;
+ if (EltOffset0 + CI.Width != EltOffset1 &&
+ EltOffset1 + Paired.Width != EltOffset0)
+ return false;
+ if (CI.CPol != Paired.CPol)
+ return false;
+ return true;
}
// If the offset in elements doesn't fit in 8-bits, we might be able to use
@@ -1383,10 +1370,7 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeSMemLoadImmPair(
.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::sbase));
if (CI.InstClass == S_BUFFER_LOAD_SGPR_IMM)
New.add(*TII->getNamedOperand(*CI.I, AMDGPU::OpName::soffset));
- // For convenience, when SGPR_IMM buffer loads are merged into a
- // zero-offset load, we generate its SGPR variant.
- if (AMDGPU::hasNamedOperand(Opcode, AMDGPU::OpName::offset))
- New.addImm(MergedOffset);
+ New.addImm(MergedOffset);
New.addImm(CI.CPol).addMemOperand(combineKnownAdjacentMMOs(CI, Paired));
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI, Paired);
@@ -1697,14 +1681,11 @@ unsigned SILoadStoreOptimizer::getNewOpcode(const CombineInfo &CI,
default:
return 0;
case 2:
- return CI.Offset == 0 ? AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR
- : AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM;
+ return AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR_IMM;
case 4:
- return CI.Offset == 0 ? AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR
- : AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM;
+ return AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR_IMM;
case 8:
- return CI.Offset == 0 ? AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR
- : AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
+ return AMDGPU::S_BUFFER_LOAD_DWORDX8_SGPR_IMM;
}
case S_LOAD_IMM:
switch (Width) {
@@ -2092,7 +2073,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
// Step1: Find the base-registers and a 64bit constant offset.
MachineOperand &Base = *TII->getNamedOperand(MI, AMDGPU::OpName::vaddr);
MemAddress MAddr;
- if (Visited.find(&MI) == Visited.end()) {
+ if (!Visited.contains(&MI)) {
processBaseWithConstOffset(Base, MAddr);
Visited[&MI] = MAddr;
} else
@@ -2155,7 +2136,7 @@ bool SILoadStoreOptimizer::promoteConstantOffsetToImm(
const MachineOperand &BaseNext =
*TII->getNamedOperand(MINext, AMDGPU::OpName::vaddr);
MemAddress MAddrNext;
- if (Visited.find(&MINext) == Visited.end()) {
+ if (!Visited.contains(&MINext)) {
processBaseWithConstOffset(BaseNext, MAddrNext);
Visited[&MINext] = MAddrNext;
} else
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 67077a2eaa6b..00cb5b2878f4 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -427,6 +427,8 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
BuildMI(MBB, &MI, DL, TII->get(Andn2TermOpc), Exec)
.addReg(Exec)
.add(MI.getOperand(0));
+ if (LV)
+ LV->replaceKillInstruction(MI.getOperand(0).getReg(), MI, *AndN2);
auto BranchPt = skipToUncondBrOrEnd(MBB, MI.getIterator());
MachineInstr *Branch =
@@ -514,13 +516,18 @@ MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
LV->replaceKillInstruction(DataReg, MI, *NewMI);
if (SplitBB != &MBB) {
- // Track the set of registers defined in the split block so we don't
- // accidentally add the original block to AliveBlocks.
- DenseSet<Register> SplitDefs;
- for (MachineInstr &X : *SplitBB) {
- for (MachineOperand &Op : X.operands()) {
- if (Op.isReg() && Op.isDef() && Op.getReg().isVirtual())
- SplitDefs.insert(Op.getReg());
+ // Track the set of registers defined in the original block so we don't
+ // accidentally add the original block to AliveBlocks. AliveBlocks only
+ // includes blocks which are live through, which excludes live outs and
+ // local defs.
+ DenseSet<Register> DefInOrigBlock;
+
+ for (MachineBasicBlock *BlockPiece : {&MBB, SplitBB}) {
+ for (MachineInstr &X : *BlockPiece) {
+ for (MachineOperand &Op : X.all_defs()) {
+ if (Op.getReg().isVirtual())
+ DefInOrigBlock.insert(Op.getReg());
+ }
}
}
@@ -532,7 +539,7 @@ MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
VI.AliveBlocks.set(SplitBB->getNumber());
else {
for (MachineInstr *Kill : VI.Kills) {
- if (Kill->getParent() == SplitBB && !SplitDefs.contains(Reg))
+ if (Kill->getParent() == SplitBB && !DefInOrigBlock.contains(Reg))
VI.AliveBlocks.set(MBB.getNumber());
}
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 3450a9f0681f..47d28d5d0eab 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -50,7 +50,9 @@ public:
SILowerSGPRSpills() : MachineFunctionPass(ID) {}
void calculateSaveRestoreBlocks(MachineFunction &MF);
- bool spillCalleeSavedRegs(MachineFunction &MF);
+ bool spillCalleeSavedRegs(MachineFunction &MF,
+ SmallVectorImpl<int> &CalleeSavedFIs);
+ void extendWWMVirtRegLiveness(MachineFunction &MF, LiveIntervals *LIS);
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -58,6 +60,13 @@ public:
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
+
+ MachineFunctionProperties getClearedProperties() const override {
+ // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs.
+ return MachineFunctionProperties()
+ .set(MachineFunctionProperties::Property::IsSSA)
+ .set(MachineFunctionProperties::Property::NoVRegs);
+ }
};
} // end anonymous namespace
@@ -197,7 +206,8 @@ static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
EntryBB.sortUniqueLiveIns();
}
-bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
+bool SILowerSGPRSpills::spillCalleeSavedRegs(
+ MachineFunction &MF, SmallVectorImpl<int> &CalleeSavedFIs) {
MachineRegisterInfo &MRI = MF.getRegInfo();
const Function &F = MF.getFunction();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -228,6 +238,7 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
TRI->getSpillAlign(*RC), true);
CSI.push_back(CalleeSavedInfo(Reg, JunkFI));
+ CalleeSavedFIs.push_back(JunkFI);
}
}
@@ -248,6 +259,50 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
return false;
}
+void SILowerSGPRSpills::extendWWMVirtRegLiveness(MachineFunction &MF,
+ LiveIntervals *LIS) {
+ // TODO: This is a workaround to avoid the unmodelled liveness computed with
+ // whole-wave virtual registers when allocated together with the regular VGPR
+ // virtual registers. Presently, the liveness computed during the regalloc is
+ // only uniform (or single lane aware) and it doesn't take account of the
+ // divergent control flow that exists for our GPUs. Since the WWM registers
+ // can modify inactive lanes, the wave-aware liveness should be computed for
+ // the virtual registers to accurately plot their interferences. Without
+ // having the divergent CFG for the function, it is difficult to implement the
+ // wave-aware liveness info. Until then, we conservatively extend the liveness
+ // of the wwm registers into the entire function so that they won't be reused
+ // without first spilling/splitting their liveranges.
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ // Insert the IMPLICIT_DEF for the wwm-registers in the entry blocks.
+ for (auto Reg : MFI->getSGPRSpillVGPRs()) {
+ for (MachineBasicBlock *SaveBlock : SaveBlocks) {
+ MachineBasicBlock::iterator InsertBefore = SaveBlock->begin();
+ auto MIB = BuildMI(*SaveBlock, *InsertBefore, InsertBefore->getDebugLoc(),
+ TII->get(AMDGPU::IMPLICIT_DEF), Reg);
+ MFI->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
+ if (LIS) {
+ LIS->InsertMachineInstrInMaps(*MIB);
+ }
+ }
+ }
+
+ // Insert the KILL in the return blocks to extend their liveness untill the
+ // end of function. Insert a separate KILL for each VGPR.
+ for (MachineBasicBlock *RestoreBlock : RestoreBlocks) {
+ MachineBasicBlock::iterator InsertBefore =
+ RestoreBlock->getFirstTerminator();
+ for (auto Reg : MFI->getSGPRSpillVGPRs()) {
+ auto MIB =
+ BuildMI(*RestoreBlock, *InsertBefore, InsertBefore->getDebugLoc(),
+ TII->get(TargetOpcode::KILL));
+ MIB.addReg(Reg);
+ if (LIS)
+ LIS->InsertMachineInstrInMaps(*MIB);
+ }
+ }
+}
+
bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
@@ -261,7 +316,8 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
// First, expose any CSR SGPR spills. This is mostly the same as what PEI
// does, but somewhat simpler.
calculateSaveRestoreBlocks(MF);
- bool HasCSRs = spillCalleeSavedRegs(MF);
+ SmallVector<int> CalleeSavedFIs;
+ bool HasCSRs = spillCalleeSavedRegs(MF, CalleeSavedFIs);
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -275,6 +331,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
bool NewReservedRegs = false;
+ bool SpilledToVirtVGPRLanes = false;
// TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
// handled as SpilledToReg in regular PrologEpilogInserter.
@@ -297,23 +354,53 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
int FI = TII->getNamedOperand(MI, AMDGPU::OpName::addr)->getIndex();
assert(MFI.getStackID(FI) == TargetStackID::SGPRSpill);
- if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
- NewReservedRegs = true;
- bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
- MI, FI, nullptr, Indexes, LIS);
- (void)Spilled;
- assert(Spilled && "failed to spill SGPR to VGPR when allocated");
- SpillFIs.set(FI);
+
+ bool IsCalleeSaveSGPRSpill =
+ std::find(CalleeSavedFIs.begin(), CalleeSavedFIs.end(), FI) !=
+ CalleeSavedFIs.end();
+ if (IsCalleeSaveSGPRSpill) {
+ // Spill callee-saved SGPRs into physical VGPR lanes.
+
+ // TODO: This is to ensure the CFIs are static for efficient frame
+ // unwinding in the debugger. Spilling them into virtual VGPR lanes
+ // involve regalloc to allocate the physical VGPRs and that might
+ // cause intermediate spill/split of such liveranges for successful
+ // allocation. This would result in broken CFI encoding unless the
+ // regalloc aware CFI generation to insert new CFIs along with the
+ // intermediate spills is implemented. There is no such support
+ // currently exist in the LLVM compiler.
+ if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI, true)) {
+ NewReservedRegs = true;
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
+ MI, FI, nullptr, Indexes, LIS, true);
+ if (!Spilled)
+ llvm_unreachable(
+ "failed to spill SGPR to physical VGPR lane when allocated");
+ }
+ } else {
+ if (FuncInfo->allocateSGPRSpillToVGPRLane(MF, FI)) {
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(
+ MI, FI, nullptr, Indexes, LIS);
+ if (!Spilled)
+ llvm_unreachable(
+ "failed to spill SGPR to virtual VGPR lane when allocated");
+ SpillFIs.set(FI);
+ SpilledToVirtVGPRLanes = true;
+ }
}
}
}
- // FIXME: Adding to live-ins redundant with reserving registers.
- for (MachineBasicBlock &MBB : MF) {
- for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
- MBB.addLiveIn(Reg);
- MBB.sortUniqueLiveIns();
+ if (SpilledToVirtVGPRLanes) {
+ extendWWMVirtRegLiveness(MF, LIS);
+ if (LIS) {
+ // Compute the LiveInterval for the newly created virtual registers.
+ for (auto Reg : FuncInfo->getSGPRSpillVGPRs())
+ LIS->createAndComputeVirtRegInterval(Reg);
+ }
+ }
+ for (MachineBasicBlock &MBB : MF) {
// FIXME: The dead frame indices are replaced with a null register from
// the debug value instructions. We should instead, update it with the
// correct register value. But not sure the register value alone is
@@ -337,12 +424,30 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
MadeChange = true;
}
+ if (SpilledToVirtVGPRLanes) {
+ const TargetRegisterClass *RC = TRI->getWaveMaskRegClass();
+ // Shift back the reserved SGPR for EXEC copy into the lowest range.
+ // This SGPR is reserved to handle the whole-wave spill/copy operations
+ // that might get inserted during vgpr regalloc.
+ Register UnusedLowSGPR = TRI->findUnusedRegister(MRI, RC, MF);
+ if (UnusedLowSGPR && TRI->getHWRegIndex(UnusedLowSGPR) <
+ TRI->getHWRegIndex(FuncInfo->getSGPRForEXECCopy()))
+ FuncInfo->setSGPRForEXECCopy(UnusedLowSGPR);
+ } else {
+ // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
+ // spills/copies. Reset the SGPR reserved for EXEC copy.
+ FuncInfo->setSGPRForEXECCopy(AMDGPU::NoRegister);
+ }
+
SaveBlocks.clear();
RestoreBlocks.clear();
- // Updated the reserved registers with any VGPRs added for SGPR spills.
- if (NewReservedRegs)
- MRI.freezeReservedRegs(MF);
+ // Updated the reserved registers with any physical VGPRs added for SGPR
+ // spills.
+ if (NewReservedRegs) {
+ for (Register Reg : FuncInfo->getWWMReservedRegs())
+ MRI.reserveReg(Reg, TRI);
+ }
return MadeChange;
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp b/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp
new file mode 100644
index 000000000000..9c3cd1bbd6b0
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SILowerWWMCopies.cpp
@@ -0,0 +1,141 @@
+//===-- SILowerWWMCopies.cpp - Lower Copies after regalloc ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// Lowering the WWM_COPY instructions for various register classes.
+/// AMDGPU target generates WWM_COPY instruction to differentiate WWM
+/// copy from COPY. This pass generates the necessary exec mask manipulation
+/// instructions to replicate 'Whole Wave Mode' and lowers WWM_COPY back to
+/// COPY.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "GCNSubtarget.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/VirtRegMap.h"
+#include "llvm/InitializePasses.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-lower-wwm-copies"
+
+namespace {
+
+class SILowerWWMCopies : public MachineFunctionPass {
+public:
+ static char ID;
+
+ SILowerWWMCopies() : MachineFunctionPass(ID) {
+ initializeSILowerWWMCopiesPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ StringRef getPassName() const override { return "SI Lower WWM Copies"; }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+private:
+ bool isSCCLiveAtMI(const MachineInstr &MI);
+ void addToWWMSpills(MachineFunction &MF, Register Reg);
+
+ LiveIntervals *LIS;
+ SlotIndexes *Indexes;
+ VirtRegMap *VRM;
+ const SIRegisterInfo *TRI;
+ const MachineRegisterInfo *MRI;
+ SIMachineFunctionInfo *MFI;
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(SILowerWWMCopies, DEBUG_TYPE, "SI Lower WWM Copies", false,
+ false)
+
+char SILowerWWMCopies::ID = 0;
+
+char &llvm::SILowerWWMCopiesID = SILowerWWMCopies::ID;
+
+bool SILowerWWMCopies::isSCCLiveAtMI(const MachineInstr &MI) {
+ // We can't determine the liveness info if LIS isn't available. Early return
+ // in that case and always assume SCC is live.
+ if (!LIS)
+ return true;
+
+ LiveRange &LR =
+ LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI));
+ SlotIndex Idx = LIS->getInstructionIndex(MI);
+ return LR.liveAt(Idx);
+}
+
+// If \p Reg is assigned with a physical VGPR, add the latter into wwm-spills
+// for preserving its entire lanes at function prolog/epilog.
+void SILowerWWMCopies::addToWWMSpills(MachineFunction &MF, Register Reg) {
+ if (Reg.isPhysical())
+ return;
+
+ Register PhysReg = VRM->getPhys(Reg);
+ assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
+ "should have allocated a physical register");
+
+ MFI->allocateWWMSpill(MF, PhysReg);
+}
+
+bool SILowerWWMCopies::runOnMachineFunction(MachineFunction &MF) {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+ const SIInstrInfo *TII = ST.getInstrInfo();
+
+ MFI = MF.getInfo<SIMachineFunctionInfo>();
+ LIS = getAnalysisIfAvailable<LiveIntervals>();
+ Indexes = getAnalysisIfAvailable<SlotIndexes>();
+ VRM = getAnalysisIfAvailable<VirtRegMap>();
+ TRI = ST.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ if (!MFI->hasVRegFlags())
+ return false;
+
+ bool Changed = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (MI.getOpcode() != AMDGPU::WWM_COPY)
+ continue;
+
+ // TODO: Club adjacent WWM ops between same exec save/restore
+ assert(TII->isVGPRCopy(MI));
+
+ // For WWM vector copies, manipulate the exec mask around the copy
+ // instruction.
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock::iterator InsertPt = MI.getIterator();
+ Register RegForExecCopy = MFI->getSGPRForEXECCopy();
+ TII->insertScratchExecCopy(MF, MBB, InsertPt, DL, RegForExecCopy,
+ isSCCLiveAtMI(MI), Indexes);
+ TII->restoreExec(MF, MBB, ++InsertPt, DL, RegForExecCopy, Indexes);
+ addToWWMSpills(MF, MI.getOperand(0).getReg());
+ LLVM_DEBUG(dbgs() << "WWM copy manipulation for " << MI);
+
+ // Lower WWM_COPY back to COPY
+ MI.setDesc(TII->get(AMDGPU::COPY));
+ Changed |= true;
+ }
+ }
+
+ return Changed;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index b2a433dd3db9..219464eac9ec 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -65,6 +65,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
Occupancy = ST.computeOccupancy(F, getLDSSize());
CallingConv::ID CC = F.getCallingConv();
+ VRegFlags.reserve(1024);
+
// FIXME: Should have analysis or something rather than attribute to detect
// calls.
const bool HasCalls = F.hasFnAttribute("amdgpu-calls");
@@ -119,7 +121,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
else if (ST.isMesaGfxShader(F))
ImplicitBufferPtr = true;
- if (!AMDGPU::isGraphics(CC)) {
+ if (!AMDGPU::isGraphics(CC) ||
+ (CC == CallingConv::AMDGPU_CS && ST.hasArchitectedSGPRs())) {
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workgroup-id-x"))
WorkGroupIDX = true;
@@ -128,7 +131,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
if (!F.hasFnAttribute("amdgpu-no-workgroup-id-z"))
WorkGroupIDZ = true;
+ }
+ if (!AMDGPU::isGraphics(CC)) {
if (IsKernel || !F.hasFnAttribute("amdgpu-no-workitem-id-x"))
WorkItemIDX = true;
@@ -309,37 +314,23 @@ bool SIMachineFunctionInfo::isCalleeSavedReg(const MCPhysReg *CSRegs,
return false;
}
-bool SIMachineFunctionInfo::allocateVGPRForSGPRSpills(MachineFunction &MF,
- int FI,
- unsigned LaneIndex) {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
+bool SIMachineFunctionInfo::allocateVirtualVGPRForSGPRSpills(
+ MachineFunction &MF, int FI, unsigned LaneIndex) {
MachineRegisterInfo &MRI = MF.getRegInfo();
Register LaneVGPR;
if (!LaneIndex) {
- LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
- if (LaneVGPR == AMDGPU::NoRegister) {
- // We have no VGPRs left for spilling SGPRs. Reset because we will not
- // partially spill the SGPR to VGPRs.
- SGPRSpillToVGPRLanes.erase(FI);
- return false;
- }
-
+ LaneVGPR = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
SpillVGPRs.push_back(LaneVGPR);
- // Add this register as live-in to all blocks to avoid machine verifier
- // complaining about use of an undefined physical register.
- for (MachineBasicBlock &BB : MF)
- BB.addLiveIn(LaneVGPR);
} else {
LaneVGPR = SpillVGPRs.back();
}
- SGPRSpillToVGPRLanes[FI].push_back(
+ SGPRSpillsToVirtualVGPRLanes[FI].push_back(
SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
return true;
}
-bool SIMachineFunctionInfo::allocateVGPRForPrologEpilogSGPRSpills(
+bool SIMachineFunctionInfo::allocatePhysicalVGPRForSGPRSpills(
MachineFunction &MF, int FI, unsigned LaneIndex) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
@@ -350,16 +341,21 @@ bool SIMachineFunctionInfo::allocateVGPRForPrologEpilogSGPRSpills(
if (LaneVGPR == AMDGPU::NoRegister) {
// We have no VGPRs left for spilling SGPRs. Reset because we will not
// partially spill the SGPR to VGPRs.
- PrologEpilogSGPRSpillToVGPRLanes.erase(FI);
+ SGPRSpillsToPhysicalVGPRLanes.erase(FI);
return false;
}
allocateWWMSpill(MF, LaneVGPR);
+ reserveWWMRegister(LaneVGPR);
+ for (MachineBasicBlock &MBB : MF) {
+ MBB.addLiveIn(LaneVGPR);
+ MBB.sortUniqueLiveIns();
+ }
} else {
- LaneVGPR = WWMSpills.back().first;
+ LaneVGPR = WWMReservedRegs.back();
}
- PrologEpilogSGPRSpillToVGPRLanes[FI].push_back(
+ SGPRSpillsToPhysicalVGPRLanes[FI].push_back(
SIRegisterInfo::SpilledReg(LaneVGPR, LaneIndex));
return true;
}
@@ -368,8 +364,8 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF,
int FI,
bool IsPrologEpilog) {
std::vector<SIRegisterInfo::SpilledReg> &SpillLanes =
- IsPrologEpilog ? PrologEpilogSGPRSpillToVGPRLanes[FI]
- : SGPRSpillToVGPRLanes[FI];
+ IsPrologEpilog ? SGPRSpillsToPhysicalVGPRLanes[FI]
+ : SGPRSpillsToVirtualVGPRLanes[FI];
// This has already been allocated.
if (!SpillLanes.empty())
@@ -390,15 +386,14 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPRLane(MachineFunction &MF,
"not spilling SGPRs to VGPRs");
unsigned &NumSpillLanes =
- IsPrologEpilog ? NumVGPRPrologEpilogSpillLanes : NumVGPRSpillLanes;
+ IsPrologEpilog ? NumPhysicalVGPRSpillLanes : NumVirtualVGPRSpillLanes;
for (unsigned I = 0; I < NumLanes; ++I, ++NumSpillLanes) {
unsigned LaneIndex = (NumSpillLanes % WaveSize);
- bool Allocated =
- IsPrologEpilog
- ? allocateVGPRForPrologEpilogSGPRSpills(MF, FI, LaneIndex)
- : allocateVGPRForSGPRSpills(MF, FI, LaneIndex);
+ bool Allocated = IsPrologEpilog
+ ? allocatePhysicalVGPRForSGPRSpills(MF, FI, LaneIndex)
+ : allocateVirtualVGPRForSGPRSpills(MF, FI, LaneIndex);
if (!Allocated) {
NumSpillLanes -= I;
return false;
@@ -479,16 +474,25 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
bool SIMachineFunctionInfo::removeDeadFrameIndices(
MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
- // Remove dead frame indices from function frame. And also make sure to remove
- // the frame indices from `SGPRSpillToVGPRLanes` data structure, otherwise, it
- // could result in an unexpected side effect and bug, in case of any
- // re-mapping of freed frame indices by later pass(es) like "stack slot
+ // Remove dead frame indices from function frame, however keep FP & BP since
+ // spills for them haven't been inserted yet. And also make sure to remove the
+ // frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,
+ // otherwise, it could result in an unexpected side effect and bug, in case of
+ // any re-mapping of freed frame indices by later pass(es) like "stack slot
// coloring".
- for (auto &R : make_early_inc_range(SGPRSpillToVGPRLanes)) {
+ for (auto &R : make_early_inc_range(SGPRSpillsToVirtualVGPRLanes)) {
MFI.RemoveStackObject(R.first);
- SGPRSpillToVGPRLanes.erase(R.first);
+ SGPRSpillsToVirtualVGPRLanes.erase(R.first);
}
+ // Remove the dead frame indices of CSR SGPRs which are spilled to physical
+ // VGPR lanes during SILowerSGPRSpills pass.
+ if (!ResetSGPRSpillStackIDs) {
+ for (auto &R : make_early_inc_range(SGPRSpillsToPhysicalVGPRLanes)) {
+ MFI.RemoveStackObject(R.first);
+ SGPRSpillsToPhysicalVGPRLanes.erase(R.first);
+ }
+ }
bool HaveSGPRToMemory = false;
if (ResetSGPRSpillStackIDs) {
@@ -537,6 +541,16 @@ MCPhysReg SIMachineFunctionInfo::getNextSystemSGPR() const {
return AMDGPU::SGPR0 + NumUserSGPRs + NumSystemSGPRs;
}
+void SIMachineFunctionInfo::MRI_NoteNewVirtualRegister(Register Reg) {
+ VRegFlags.grow(Reg);
+}
+
+void SIMachineFunctionInfo::MRI_NoteCloneVirtualRegister(Register NewReg,
+ Register SrcReg) {
+ VRegFlags.grow(NewReg);
+ VRegFlags[NewReg] = VRegFlags[SrcReg];
+}
+
Register
SIMachineFunctionInfo::getGITPtrLoReg(const MachineFunction &MF) const {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -638,12 +652,21 @@ yaml::SIMachineFunctionInfo::SIMachineFunctionInfo(
StackPtrOffsetReg(regToString(MFI.getStackPtrOffsetReg(), TRI)),
BytesInStackArgArea(MFI.getBytesInStackArgArea()),
ReturnsVoid(MFI.returnsVoid()),
- ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)), Mode(MFI.getMode()) {
+ ArgInfo(convertArgumentInfo(MFI.getArgInfo(), TRI)),
+ PSInputAddr(MFI.getPSInputAddr()),
+ PSInputEnable(MFI.getPSInputEnable()),
+ Mode(MFI.getMode()) {
for (Register Reg : MFI.getWWMReservedRegs())
WWMReservedRegs.push_back(regToString(Reg, TRI));
+ if (MFI.getLongBranchReservedReg())
+ LongBranchReservedReg = regToString(MFI.getLongBranchReservedReg(), TRI);
if (MFI.getVGPRForAGPRCopy())
VGPRForAGPRCopy = regToString(MFI.getVGPRForAGPRCopy(), TRI);
+
+ if (MFI.getSGPRForEXECCopy())
+ SGPRForEXECCopy = regToString(MFI.getSGPRForEXECCopy(), TRI);
+
auto SFI = MFI.getOptionalScavengeFI();
if (SFI)
ScavengeFI = yaml::FrameIndex(*SFI, MF.getFrameInfo());
@@ -661,6 +684,8 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
LDSSize = YamlMFI.LDSSize;
GDSSize = YamlMFI.GDSSize;
DynLDSAlign = YamlMFI.DynLDSAlign;
+ PSInputAddr = YamlMFI.PSInputAddr;
+ PSInputEnable = YamlMFI.PSInputEnable;
HighBitsOf32BitAddress = YamlMFI.HighBitsOf32BitAddress;
Occupancy = YamlMFI.Occupancy;
IsEntryFunction = YamlMFI.IsEntryFunction;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index c869ee875711..37572d30dff6 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -18,6 +18,7 @@
#include "AMDGPUTargetMachine.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
+#include "SIModeRegisterDefaults.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
@@ -215,7 +216,7 @@ struct SIMode {
SIMode() = default;
- SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
+ SIMode(const SIModeRegisterDefaults &Mode) {
IEEE = Mode.IEEE;
DX10Clamp = Mode.DX10Clamp;
FP32InputDenormals = Mode.FP32Denormals.Input != DenormalMode::PreserveSign;
@@ -275,9 +276,15 @@ struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
bool ReturnsVoid = true;
std::optional<SIArgumentInfo> ArgInfo;
+
+ unsigned PSInputAddr = 0;
+ unsigned PSInputEnable = 0;
+
SIMode Mode;
std::optional<FrameIndex> ScavengeFI;
StringValue VGPRForAGPRCopy;
+ StringValue SGPRForEXECCopy;
+ StringValue LongBranchReservedReg;
SIMachineFunctionInfo() = default;
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
@@ -311,6 +318,8 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
YamlIO.mapOptional("bytesInStackArgArea", MFI.BytesInStackArgArea, 0u);
YamlIO.mapOptional("returnsVoid", MFI.ReturnsVoid, true);
YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
+ YamlIO.mapOptional("psInputAddr", MFI.PSInputAddr, 0u);
+ YamlIO.mapOptional("psInputEnable", MFI.PSInputEnable, 0u);
YamlIO.mapOptional("mode", MFI.Mode, SIMode());
YamlIO.mapOptional("highBitsOf32BitAddress",
MFI.HighBitsOf32BitAddress, 0u);
@@ -319,6 +328,10 @@ template <> struct MappingTraits<SIMachineFunctionInfo> {
YamlIO.mapOptional("scavengeFI", MFI.ScavengeFI);
YamlIO.mapOptional("vgprForAGPRCopy", MFI.VGPRForAGPRCopy,
StringValue()); // Don't print out when it's empty.
+ YamlIO.mapOptional("sgprForEXECCopy", MFI.SGPRForEXECCopy,
+ StringValue()); // Don't print out when it's empty.
+ YamlIO.mapOptional("longBranchReservedReg", MFI.LongBranchReservedReg,
+ StringValue());
}
};
@@ -355,11 +368,12 @@ public:
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
-class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
+class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
+ private MachineRegisterInfo::Delegate {
friend class GCNTargetMachine;
// State of MODE register, assumed FP mode.
- AMDGPU::SIModeRegisterDefaults Mode;
+ SIModeRegisterDefaults Mode;
// Registers that may be reserved for spilling purposes. These may be the same
// as the input registers.
@@ -374,6 +388,11 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
// base to the beginning of the new function's frame.
Register StackPtrOffsetReg = AMDGPU::SP_REG;
+ // Registers that may be reserved when RA doesn't allocate enough
+ // registers to plan for the case where an indirect branch ends up
+ // being needed during branch relaxation.
+ Register LongBranchReservedReg;
+
AMDGPUFunctionArgInfo ArgInfo;
// Graphics info.
@@ -453,6 +472,9 @@ private:
unsigned HighBitsOf32BitAddress;
+ // Flags associated with the virtual registers.
+ IndexedMap<uint8_t, VirtReg2IndexFunctor> VRegFlags;
+
// Current recorded maximum possible occupancy.
unsigned Occupancy;
@@ -462,6 +484,10 @@ private:
MCPhysReg getNextSystemSGPR() const;
+ // MachineRegisterInfo callback functions to notify events.
+ void MRI_NoteNewVirtualRegister(Register Reg) override;
+ void MRI_NoteCloneVirtualRegister(Register NewReg, Register SrcReg) override;
+
public:
struct VGPRSpillToAGPR {
SmallVector<MCPhysReg, 32> Lanes;
@@ -470,15 +496,16 @@ public:
};
private:
- // To track VGPR + lane index for each subregister of the SGPR spilled to
- // frameindex key during SILowerSGPRSpills pass.
- DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>> SGPRSpillToVGPRLanes;
- // To track VGPR + lane index for spilling special SGPRs like Frame Pointer
- // identified during PrologEpilogInserter.
+ // To track virtual VGPR + lane index for each subregister of the SGPR spilled
+ // to frameindex key during SILowerSGPRSpills pass.
+ DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>>
+ SGPRSpillsToVirtualVGPRLanes;
+ // To track physical VGPR + lane index for CSR SGPR spills and special SGPRs
+ // like Frame Pointer identified during PrologEpilogInserter.
DenseMap<int, std::vector<SIRegisterInfo::SpilledReg>>
- PrologEpilogSGPRSpillToVGPRLanes;
- unsigned NumVGPRSpillLanes = 0;
- unsigned NumVGPRPrologEpilogSpillLanes = 0;
+ SGPRSpillsToPhysicalVGPRLanes;
+ unsigned NumVirtualVGPRSpillLanes = 0;
+ unsigned NumPhysicalVGPRSpillLanes = 0;
SmallVector<Register, 2> SpillVGPRs;
using WWMSpillsMap = MapVector<Register, int>;
// To track the registers used in instructions that can potentially modify the
@@ -504,6 +531,9 @@ private:
// PrologEpilogInserter.
PrologEpilogSGPRSpillsMap PrologEpilogSGPRSpills;
+ // To save/restore EXEC MASK around WWM spills and copies.
+ Register SGPRForEXECCopy;
+
DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
// AGPRs used for VGPR spills.
@@ -519,10 +549,10 @@ private:
private:
Register VGPRForAGPRCopy;
- bool allocateVGPRForSGPRSpills(MachineFunction &MF, int FI,
- unsigned LaneIndex);
- bool allocateVGPRForPrologEpilogSGPRSpills(MachineFunction &MF, int FI,
- unsigned LaneIndex);
+ bool allocateVirtualVGPRForSGPRSpills(MachineFunction &MF, int FI,
+ unsigned LaneIndex);
+ bool allocatePhysicalVGPRForSGPRSpills(MachineFunction &MF, int FI,
+ unsigned LaneIndex);
public:
Register getVGPRForAGPRCopy() const {
@@ -551,14 +581,12 @@ public:
void reserveWWMRegister(Register Reg) { WWMReservedRegs.insert(Reg); }
- AMDGPU::SIModeRegisterDefaults getMode() const {
- return Mode;
- }
+ SIModeRegisterDefaults getMode() const { return Mode; }
ArrayRef<SIRegisterInfo::SpilledReg>
- getSGPRSpillToVGPRLanes(int FrameIndex) const {
- auto I = SGPRSpillToVGPRLanes.find(FrameIndex);
- return (I == SGPRSpillToVGPRLanes.end())
+ getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const {
+ auto I = SGPRSpillsToVirtualVGPRLanes.find(FrameIndex);
+ return (I == SGPRSpillsToVirtualVGPRLanes.end())
? ArrayRef<SIRegisterInfo::SpilledReg>()
: ArrayRef(I->second);
}
@@ -579,7 +607,7 @@ public:
// Check if an entry created for \p Reg in PrologEpilogSGPRSpills. Return true
// on success and false otherwise.
bool hasPrologEpilogSGPRSpillEntry(Register Reg) const {
- return PrologEpilogSGPRSpills.find(Reg) != PrologEpilogSGPRSpills.end();
+ return PrologEpilogSGPRSpills.contains(Reg);
}
// Get the scratch SGPR if allocated to save/restore \p Reg.
@@ -620,13 +648,28 @@ public:
}
ArrayRef<SIRegisterInfo::SpilledReg>
- getPrologEpilogSGPRSpillToVGPRLanes(int FrameIndex) const {
- auto I = PrologEpilogSGPRSpillToVGPRLanes.find(FrameIndex);
- return (I == PrologEpilogSGPRSpillToVGPRLanes.end())
+ getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const {
+ auto I = SGPRSpillsToPhysicalVGPRLanes.find(FrameIndex);
+ return (I == SGPRSpillsToPhysicalVGPRLanes.end())
? ArrayRef<SIRegisterInfo::SpilledReg>()
: ArrayRef(I->second);
}
+ void setFlag(Register Reg, uint8_t Flag) {
+ assert(Reg.isVirtual());
+ if (VRegFlags.inBounds(Reg))
+ VRegFlags[Reg] |= Flag;
+ }
+
+ bool checkFlag(Register Reg, uint8_t Flag) const {
+ if (Reg.isPhysical())
+ return false;
+
+ return VRegFlags.inBounds(Reg) && VRegFlags[Reg] & Flag;
+ }
+
+ bool hasVRegFlags() { return VRegFlags.size(); }
+
void allocateWWMSpill(MachineFunction &MF, Register VGPR, uint64_t Size = 4,
Align Alignment = Align(4));
@@ -639,6 +682,10 @@ public:
return SpillAGPR;
}
+ Register getSGPRForEXECCopy() const { return SGPRForEXECCopy; }
+
+ void setSGPRForEXECCopy(Register Reg) { SGPRForEXECCopy = Reg; }
+
ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
return SpillVGPR;
}
@@ -693,21 +740,35 @@ public:
}
// Add system SGPRs.
- Register addWorkGroupIDX() {
- ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
- NumSystemSGPRs += 1;
+ Register addWorkGroupIDX(bool HasArchitectedSGPRs) {
+ Register Reg =
+ HasArchitectedSGPRs ? (MCPhysReg)AMDGPU::TTMP9 : getNextSystemSGPR();
+ ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(Reg);
+ if (!HasArchitectedSGPRs)
+ NumSystemSGPRs += 1;
+
return ArgInfo.WorkGroupIDX.getRegister();
}
- Register addWorkGroupIDY() {
- ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
- NumSystemSGPRs += 1;
+ Register addWorkGroupIDY(bool HasArchitectedSGPRs) {
+ Register Reg =
+ HasArchitectedSGPRs ? (MCPhysReg)AMDGPU::TTMP7 : getNextSystemSGPR();
+ unsigned Mask = HasArchitectedSGPRs && hasWorkGroupIDZ() ? 0xffff : ~0u;
+ ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(Reg, Mask);
+ if (!HasArchitectedSGPRs)
+ NumSystemSGPRs += 1;
+
return ArgInfo.WorkGroupIDY.getRegister();
}
- Register addWorkGroupIDZ() {
- ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
- NumSystemSGPRs += 1;
+ Register addWorkGroupIDZ(bool HasArchitectedSGPRs) {
+ Register Reg =
+ HasArchitectedSGPRs ? (MCPhysReg)AMDGPU::TTMP7 : getNextSystemSGPR();
+ unsigned Mask = HasArchitectedSGPRs ? 0xffff << 16 : ~0u;
+ ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(Reg, Mask);
+ if (!HasArchitectedSGPRs)
+ NumSystemSGPRs += 1;
+
return ArgInfo.WorkGroupIDZ.getRegister();
}
@@ -872,6 +933,8 @@ public:
StackPtrOffsetReg = Reg;
}
+ void setLongBranchReservedReg(Register Reg) { LongBranchReservedReg = Reg; }
+
// Note the unset value for this is AMDGPU::SP_REG rather than
// NoRegister. This is mostly a workaround for MIR tests where state that
// can't be directly computed from the function is not preserved in serialized
@@ -880,6 +943,8 @@ public:
return StackPtrOffsetReg;
}
+ Register getLongBranchReservedReg() const { return LongBranchReservedReg; }
+
Register getQueuePtrUserSGPR() const {
return ArgInfo.QueuePtr.getRegister();
}
diff --git a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 6d901d6783f0..677f1590287e 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -1883,7 +1883,7 @@ void SIScheduleDAGMI::schedule()
LLVM_DEBUG(dbgs() << "Preparing Scheduling\n");
buildDAGWithRegPressure();
- postprocessDAG();
+ postProcessDAG();
LLVM_DEBUG(dump());
if (PrintDAGs)
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 5f2707317984..bc48f7b76c6d 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -22,7 +22,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/Support/AtomicOrdering.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/TargetParser/TargetParser.h"
using namespace llvm;
using namespace llvm::AMDGPU;
@@ -351,6 +351,10 @@ public:
/// Virtual destructor to allow derivations to be deleted.
virtual ~SICacheControl() = default;
+ virtual bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI,
+ MachineBasicBlock::iterator &MI) const {
+ return false;
+ }
};
class SIGfx6CacheControl : public SICacheControl {
@@ -509,6 +513,20 @@ public:
bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,
Position Pos) const override;
+
+ bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI,
+ MachineBasicBlock::iterator &MI) const override {
+ bool Changed = false;
+ if (ST.hasForceStoreSC0SC1() &&
+ (MOI.getInstrAddrSpace() & (SIAtomicAddrSpace::SCRATCH |
+ SIAtomicAddrSpace::GLOBAL |
+ SIAtomicAddrSpace::OTHER)) !=
+ SIAtomicAddrSpace::NONE) {
+ Changed |= enableSC0Bit(MI);
+ Changed |= enableSC1Bit(MI);
+ }
+ return Changed;
+ }
};
class SIGfx10CacheControl : public SIGfx7CacheControl {
@@ -2209,8 +2227,13 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI,
bool Changed = false;
if (MOI.isAtomic()) {
- if (MOI.getOrdering() == AtomicOrdering::Acquire ||
- MOI.getOrdering() == AtomicOrdering::Release ||
+ if (MOI.getOrdering() == AtomicOrdering::Acquire)
+ Changed |= CC->insertWait(MI, MOI.getScope(), MOI.getOrderingAddrSpace(),
+ SIMemOp::LOAD | SIMemOp::STORE,
+ MOI.getIsCrossAddressSpaceOrdering(),
+ Position::BEFORE);
+
+ if (MOI.getOrdering() == AtomicOrdering::Release ||
MOI.getOrdering() == AtomicOrdering::AcquireRelease ||
MOI.getOrdering() == AtomicOrdering::SequentiallyConsistent)
/// TODO: This relies on a barrier always generating a waitcnt
@@ -2319,9 +2342,10 @@ bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
if (const auto &MOI = MOA.getLoadInfo(MI))
Changed |= expandLoad(*MOI, MI);
- else if (const auto &MOI = MOA.getStoreInfo(MI))
+ else if (const auto &MOI = MOA.getStoreInfo(MI)) {
Changed |= expandStore(*MOI, MI);
- else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
+ Changed |= CC->tryForceStoreSC0SC1(*MOI, MI);
+ } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
Changed |= expandAtomicFence(*MOI, MI);
else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI);
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
index 0d48c3159c6f..be395d53c34e 100644
--- a/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
+++ b/llvm/lib/Target/AMDGPU/SIModeRegister.cpp
@@ -29,10 +29,10 @@ using namespace llvm;
struct Status {
// Mask is a bitmask where a '1' indicates the corresponding Mode bit has a
// known value
- unsigned Mask;
- unsigned Mode;
+ unsigned Mask = 0;
+ unsigned Mode = 0;
- Status() : Mask(0), Mode(0){};
+ Status() = default;
Status(unsigned NewMask, unsigned NewMode) : Mask(NewMask), Mode(NewMode) {
Mode &= Mask;
@@ -96,13 +96,13 @@ public:
// In Phase 1 we record the first instruction that has a mode requirement,
// which is used in Phase 3 if we need to insert a mode change.
- MachineInstr *FirstInsertionPoint;
+ MachineInstr *FirstInsertionPoint = nullptr;
// A flag to indicate whether an Exit value has been set (we can't tell by
// examining the Exit value itself as all values may be valid results).
- bool ExitSet;
+ bool ExitSet = false;
- BlockData() : FirstInsertionPoint(nullptr), ExitSet(false){};
+ BlockData() = default;
};
namespace {
@@ -222,8 +222,8 @@ Status SIModeRegister::getInstructionMode(MachineInstr &MI,
void SIModeRegister::insertSetreg(MachineBasicBlock &MBB, MachineInstr *MI,
const SIInstrInfo *TII, Status InstrMode) {
while (InstrMode.Mask) {
- unsigned Offset = countTrailingZeros<unsigned>(InstrMode.Mask);
- unsigned Width = countTrailingOnes<unsigned>(InstrMode.Mask >> Offset);
+ unsigned Offset = llvm::countr_zero<unsigned>(InstrMode.Mask);
+ unsigned Width = llvm::countr_one<unsigned>(InstrMode.Mask >> Offset);
unsigned Value = (InstrMode.Mode >> Offset) & ((1 << Width) - 1);
BuildMI(MBB, MI, nullptr, TII->get(AMDGPU::S_SETREG_IMM32_B32))
.addImm(Value)
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp b/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp
new file mode 100644
index 000000000000..413ef5d162a7
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.cpp
@@ -0,0 +1,38 @@
+//===-- SIModeRegisterDefaults.cpp ------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SIModeRegisterDefaults.h"
+
+using namespace llvm;
+
+SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
+ *this = getDefaultForCallingConv(F.getCallingConv());
+
+ StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
+ if (!IEEEAttr.empty())
+ IEEE = IEEEAttr == "true";
+
+ StringRef DX10ClampAttr =
+ F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
+ if (!DX10ClampAttr.empty())
+ DX10Clamp = DX10ClampAttr == "true";
+
+ StringRef DenormF32Attr =
+ F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
+ if (!DenormF32Attr.empty())
+ FP32Denormals = parseDenormalFPAttribute(DenormF32Attr);
+
+ StringRef DenormAttr =
+ F.getFnAttribute("denormal-fp-math").getValueAsString();
+ if (!DenormAttr.empty()) {
+ DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
+ if (DenormF32Attr.empty())
+ FP32Denormals = DenormMode;
+ FP64FP16Denormals = DenormMode;
+ }
+}
diff --git a/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h b/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
new file mode 100644
index 000000000000..df2e3f9bff32
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/SIModeRegisterDefaults.h
@@ -0,0 +1,90 @@
+//===-- SIModeRegisterDefaults.h --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H
+#define LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H
+
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/FloatingPointMode.h"
+
+namespace llvm {
+
+// Track defaults for fields in the MODE register.
+struct SIModeRegisterDefaults {
+ /// Floating point opcodes that support exception flag gathering quiet and
+ /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
+ /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
+ /// quieting.
+ bool IEEE : 1;
+
+ /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
+ /// clamp NaN to zero; otherwise, pass NaN through.
+ bool DX10Clamp : 1;
+
+ /// If this is set, neither input or output denormals are flushed for most f32
+ /// instructions.
+ DenormalMode FP32Denormals;
+
+ /// If this is set, neither input or output denormals are flushed for both f64
+ /// and f16/v2f16 instructions.
+ DenormalMode FP64FP16Denormals;
+
+ SIModeRegisterDefaults() :
+ IEEE(true),
+ DX10Clamp(true),
+ FP32Denormals(DenormalMode::getIEEE()),
+ FP64FP16Denormals(DenormalMode::getIEEE()) {}
+
+ SIModeRegisterDefaults(const Function &F);
+
+ static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
+ SIModeRegisterDefaults Mode;
+ Mode.IEEE = !AMDGPU::isShader(CC);
+ return Mode;
+ }
+
+ bool operator==(const SIModeRegisterDefaults Other) const {
+ return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
+ FP32Denormals == Other.FP32Denormals &&
+ FP64FP16Denormals == Other.FP64FP16Denormals;
+ }
+
+ /// Get the encoding value for the FP_DENORM bits of the mode register for the
+ /// FP32 denormal mode.
+ uint32_t fpDenormModeSPValue() const {
+ if (FP32Denormals == DenormalMode::getPreserveSign())
+ return FP_DENORM_FLUSH_IN_FLUSH_OUT;
+ if (FP32Denormals.Output == DenormalMode::PreserveSign)
+ return FP_DENORM_FLUSH_OUT;
+ if (FP32Denormals.Input == DenormalMode::PreserveSign)
+ return FP_DENORM_FLUSH_IN;
+ return FP_DENORM_FLUSH_NONE;
+ }
+
+ /// Get the encoding value for the FP_DENORM bits of the mode register for the
+ /// FP64/FP16 denormal mode.
+ uint32_t fpDenormModeDPValue() const {
+ if (FP64FP16Denormals == DenormalMode::getPreserveSign())
+ return FP_DENORM_FLUSH_IN_FLUSH_OUT;
+ if (FP64FP16Denormals.Output == DenormalMode::PreserveSign)
+ return FP_DENORM_FLUSH_OUT;
+ if (FP64FP16Denormals.Input == DenormalMode::PreserveSign)
+ return FP_DENORM_FLUSH_IN;
+ return FP_DENORM_FLUSH_NONE;
+ }
+
+ // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
+ // be able to override.
+ bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
+ return DX10Clamp == CalleeMode.DX10Clamp && IEEE == CalleeMode.IEEE;
+ }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_SIMODEREGISTERDEFAULTS_H
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 85de3a548411..d2a5eb89da12 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -96,8 +96,8 @@ static bool isDefBetween(const SIRegisterInfo &TRI,
if (Reg.isVirtual())
return isDefBetween(LIS->getInterval(Reg), AndIdx, SelIdx);
- for (MCRegUnitIterator UI(Reg.asMCReg(), &TRI); UI.isValid(); ++UI) {
- if (isDefBetween(LIS->getRegUnit(*UI), AndIdx, SelIdx))
+ for (MCRegUnit Unit : TRI.regunits(Reg.asMCReg())) {
+ if (isDefBetween(LIS->getRegUnit(Unit), AndIdx, SelIdx))
return true;
}
@@ -106,7 +106,7 @@ static bool isDefBetween(const SIRegisterInfo &TRI,
// Optimize sequence
// %sel = V_CNDMASK_B32_e64 0, 1, %cc
-// %cmp = V_CMP_NE_U32 1, %1
+// %cmp = V_CMP_NE_U32 1, %sel
// $vcc = S_AND_B64 $exec, %cmp
// S_CBRANCH_VCC[N]Z
// =>
@@ -218,46 +218,11 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
// and their associated liveness information.
SlotIndex CmpIdx = LIS->getInstructionIndex(*Cmp);
if (CCReg.isVirtual()) {
- // Apply live ranges from SelLI to CCReg potentially matching splits
- // and extending to loop boundaries.
-
- auto applyLiveRanges = [&](LiveRange &Dst, VNInfo *VNI) {
- // Copy live ranges from SelLI, adjusting start and end as required
- auto DefSegment = SelLI->FindSegmentContaining(SelIdx.getRegSlot());
- assert(DefSegment != SelLI->end() &&
- "No live interval segment covering definition?");
- for (auto I = DefSegment; I != SelLI->end() && I->start <= AndIdx; ++I) {
- SlotIndex Start = I->start < SelIdx.getRegSlot() ?
- SelIdx.getRegSlot() : I->start;
- SlotIndex End = I->end < AndIdx.getRegSlot() || I->end.isBlock() ?
- I->end : AndIdx.getRegSlot();
- Dst.addSegment(LiveRange::Segment(Start, End, VNI));
- }
- // If SelLI does not cover AndIdx (because Cmp killed Sel) then extend.
- if (!SelLI->getSegmentContaining(AndIdx.getRegSlot()))
- Dst.addSegment(LiveRange::Segment(CmpIdx.getRegSlot(), AndIdx.getRegSlot(), VNI));
- };
-
LiveInterval &CCLI = LIS->getInterval(CCReg);
auto CCQ = CCLI.Query(SelIdx.getRegSlot());
- if (CCQ.valueIn())
- applyLiveRanges(CCLI, CCQ.valueIn());
-
- if (CC->getSubReg()) {
- LaneBitmask Mask = TRI->getSubRegIndexLaneMask(CC->getSubReg());
- BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- CCLI.refineSubRanges(
- Allocator, Mask,
- [=](LiveInterval::SubRange &SR) {
- auto CCQS = SR.Query(SelIdx.getRegSlot());
- if (CCQS.valueIn())
- applyLiveRanges(SR, CCQS.valueIn());
- },
- *LIS->getSlotIndexes(), *TRI);
- CCLI.removeEmptySubRanges();
-
- SmallVector<LiveInterval *> SplitLIs;
- LIS->splitSeparateComponents(CCLI, SplitLIs);
+ if (CCQ.valueIn()) {
+ LIS->removeInterval(CCReg);
+ LIS->createAndComputeVirtRegInterval(CCReg);
}
} else
LIS->removeAllRegUnitsForPhysReg(CCReg);
@@ -287,7 +252,13 @@ bool SIOptimizeExecMaskingPreRA::optimizeVcndVcmpPair(MachineBasicBlock &MBB) {
LIS->removeVRegDefAt(*SelLI, SelIdx.getRegSlot());
LIS->RemoveMachineInstrFromMaps(*Sel);
+ bool ShrinkSel = Sel->getOperand(0).readsReg();
Sel->eraseFromParent();
+ if (ShrinkSel) {
+ // The result of the V_CNDMASK was a subreg def which counted as a read
+ // from the other parts of the reg. Shrink their live ranges.
+ LIS->shrinkToUses(SelLI);
+ }
}
}
@@ -349,8 +320,8 @@ bool SIOptimizeExecMaskingPreRA::optimizeElseBranch(MachineBasicBlock &MBB) {
// Instead just check that the def segments are adjacent.
SlotIndex StartIdx = LIS->getInstructionIndex(SaveExecMI);
SlotIndex EndIdx = LIS->getInstructionIndex(*AndExecMI);
- for (MCRegUnitIterator UI(ExecReg, TRI); UI.isValid(); ++UI) {
- LiveRange &RegUnit = LIS->getRegUnit(*UI);
+ for (MCRegUnit Unit : TRI->regunits(ExecReg)) {
+ LiveRange &RegUnit = LIS->getRegUnit(Unit);
if (RegUnit.find(StartIdx) != std::prev(RegUnit.find(EndIdx)))
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
index ae2c10116de8..e95abae88d7a 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeVGPRLiveRange.cpp
@@ -357,8 +357,8 @@ void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
for (auto *I : Instructions) {
auto &MI = *I;
- for (auto &MO : MI.operands()) {
- if (!MO.isReg() || !MO.getReg() || MO.isDef())
+ for (auto &MO : MI.all_uses()) {
+ if (!MO.getReg())
continue;
Register MOReg = MO.getReg();
@@ -522,8 +522,15 @@ void SIOptimizeVGPRLiveRange::optimizeLiveRange(
auto *UseBlock = UseMI->getParent();
// Replace uses in Endif block
if (UseBlock == Endif) {
- assert(UseMI->isPHI() && "Uses should be PHI in Endif block");
- O.setReg(NewReg);
+ if (UseMI->isPHI()) {
+ O.setReg(NewReg);
+ } else {
+ // DetectDeadLanes may mark register uses as undef without removing
+ // them, in which case a non-phi instruction using the original register
+ // may exist in the Endif block even though the register is not live
+ // into it.
+ assert(!O.readsReg());
+ }
continue;
}
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index c21ff06454da..97b3161c7f98 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -759,7 +759,7 @@ SIPeepholeSDWA::matchSDWAOperand(MachineInstr &MI) {
break;
SdwaSel DstSel = static_cast<SdwaSel>(
- TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));;
+ TII->getNamedImmOperand(*SDWAInst, AMDGPU::OpName::dst_sel));
SdwaSel OtherDstSel = static_cast<SdwaSel>(
TII->getNamedImmOperand(*OtherInst, AMDGPU::OpName::dst_sel));
@@ -1158,7 +1158,7 @@ void SIPeepholeSDWA::legalizeScalarOperands(MachineInstr &MI,
if (!Op.isImm() && !(Op.isReg() && !TRI->isVGPR(*MRI, Op.getReg())))
continue;
- unsigned I = MI.getOperandNo(&Op);
+ unsigned I = Op.getOperandNo();
if (Desc.operands()[I].RegClass == -1 ||
!TRI->isVSSuperClass(TRI->getRegClass(Desc.operands()[I].RegClass)))
continue;
diff --git a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
index 8553a0ab2a68..8464cb3d6fc4 100644
--- a/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPostRABundler.cpp
@@ -101,8 +101,8 @@ void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI,
assert(!Op.getSubReg() &&
"subregister indexes should not be present after RA");
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
- UsedRegUnits.set(*Units);
+ for (MCRegUnit Unit : TRI->regunits(Reg))
+ UsedRegUnits.set(Unit);
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
index 877c8b81b2c0..b6839c8308d8 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
@@ -54,3 +54,23 @@ uint64_t SIProgramInfo::getPGMRSrc1(CallingConv::ID CC) const {
}
return Reg;
}
+
+uint64_t SIProgramInfo::getComputePGMRSrc2() const {
+ uint64_t Reg =
+ S_00B84C_SCRATCH_EN(ScratchEnable) | S_00B84C_USER_SGPR(UserSGPR) |
+ S_00B84C_TRAP_HANDLER(TrapHandlerEnable) |
+ S_00B84C_TGID_X_EN(TGIdXEnable) | S_00B84C_TGID_Y_EN(TGIdYEnable) |
+ S_00B84C_TGID_Z_EN(TGIdZEnable) | S_00B84C_TG_SIZE_EN(TGSizeEnable) |
+ S_00B84C_TIDIG_COMP_CNT(TIdIGCompCount) |
+ S_00B84C_EXCP_EN_MSB(EXCPEnMSB) | S_00B84C_LDS_SIZE(LdsSize) |
+ S_00B84C_EXCP_EN(EXCPEnable);
+
+ return Reg;
+}
+
+uint64_t SIProgramInfo::getPGMRSrc2(CallingConv::ID CC) const {
+ if (AMDGPU::isCompute(CC))
+ return getComputePGMRSrc2();
+
+ return 0;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
index 553fb4cf496c..aab127e49463 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -36,11 +36,23 @@ struct SIProgramInfo {
uint32_t MemOrdered = 0; // GFX10+
uint64_t ScratchSize = 0;
- // Fields set in PGM_RSRC2 pm4 packet.
+ // State used to calculate fields set in PGM_RSRC2 pm4 packet.
uint32_t LDSBlocks = 0;
uint32_t ScratchBlocks = 0;
- uint64_t ComputePGMRSrc2 = 0;
+ // Fields set in PGM_RSRC2 pm4 packet
+ uint32_t ScratchEnable = 0;
+ uint32_t UserSGPR = 0;
+ uint32_t TrapHandlerEnable = 0;
+ uint32_t TGIdXEnable = 0;
+ uint32_t TGIdYEnable = 0;
+ uint32_t TGIdZEnable = 0;
+ uint32_t TGSizeEnable = 0;
+ uint32_t TIdIGCompCount = 0;
+ uint32_t EXCPEnMSB = 0;
+ uint32_t LdsSize = 0;
+ uint32_t EXCPEnable = 0;
+
uint64_t ComputePGMRSrc3GFX90A = 0;
uint32_t NumVGPR = 0;
@@ -75,6 +87,10 @@ struct SIProgramInfo {
/// Compute the value of the ComputePGMRsrc1 register.
uint64_t getComputePGMRSrc1() const;
uint64_t getPGMRSrc1(CallingConv::ID CC) const;
+
+ /// Compute the value of the ComputePGMRsrc2 register.
+ uint64_t getComputePGMRSrc2() const;
+ uint64_t getPGMRSrc2(CallingConv::ID CC) const;
};
} // namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index f7ce581f9736..1d50dff4a7d9 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -31,7 +31,7 @@ using namespace llvm;
static cl::opt<bool> EnableSpillSGPRToVGPR(
"amdgpu-spill-sgpr-to-vgpr",
- cl::desc("Enable spilling VGPRs to SGPRs"),
+ cl::desc("Enable spilling SGPRs to VGPRs"),
cl::ReallyHidden,
cl::init(true));
@@ -170,7 +170,8 @@ struct SGPRSpillBuilder {
// a register as actually in use in another lane, so we need to save all
// used lanes of the chosen VGPR.
assert(RS && "Cannot spill SGPR to memory without RegScavenger");
- TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false);
+ TmpVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false,
+ 0, false);
// Reserve temporary stack slot
TmpVGPRIndex = MFI.getScavengeFI(MF.getFrameInfo(), TRI);
@@ -199,7 +200,7 @@ struct SGPRSpillBuilder {
const TargetRegisterClass &RC =
IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
RS->setRegUsed(SuperReg);
- SavedExecReg = RS->scavengeRegister(&RC, MI, 0, false);
+ SavedExecReg = RS->scavengeRegisterBackwards(RC, MI, false, 0, false);
int64_t VGPRLanes = getPerVGPRData().VGPRLanes;
@@ -328,10 +329,9 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
"getNumCoveredRegs() will not work with generated subreg masks!");
RegPressureIgnoredUnits.resize(getNumRegUnits());
- RegPressureIgnoredUnits.set(
- *MCRegUnitIterator(MCRegister::from(AMDGPU::M0), this));
+ RegPressureIgnoredUnits.set(*regunits(MCRegister::from(AMDGPU::M0)).begin());
for (auto Reg : AMDGPU::VGPR_HI16RegClass)
- RegPressureIgnoredUnits.set(*MCRegUnitIterator(Reg, this));
+ RegPressureIgnoredUnits.set(*regunits(Reg).begin());
// HACK: Until this is fully tablegen'd.
static llvm::once_flag InitializeRegSplitPartsFlag;
@@ -380,9 +380,7 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
MCRegister Reg) const {
- MCRegAliasIterator R(Reg, this, true);
-
- for (; R.isValid(); ++R)
+ for (MCRegAliasIterator R(Reg, this, true); R.isValid(); ++R)
Reserved.set(*R);
}
@@ -535,11 +533,18 @@ unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
return SubRegFromChannelTable[NumRegIndex - 1][Channel];
}
+MCRegister
+SIRegisterInfo::getAlignedHighSGPRForRC(const MachineFunction &MF,
+ const unsigned Align,
+ const TargetRegisterClass *RC) const {
+ unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), Align) - Align;
+ MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
+ return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
+}
+
MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
const MachineFunction &MF) const {
- unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
- MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
- return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
+ return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass);
}
BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -609,14 +614,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, Reg);
}
- for (auto Reg : AMDGPU::SReg_32RegClass) {
- Reserved.set(getSubReg(Reg, AMDGPU::hi16));
- Register Low = getSubReg(Reg, AMDGPU::lo16);
- // This is to prevent BB vcc liveness errors.
- if (!AMDGPU::SGPR_LO16RegClass.contains(Low))
- Reserved.set(Low);
- }
-
Register ScratchRSrcReg = MFI->getScratchRSrcReg();
if (ScratchRSrcReg != AMDGPU::NoRegister) {
// Reserve 4 SGPRs for the scratch buffer resource descriptor in case we
@@ -625,6 +622,10 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, ScratchRSrcReg);
}
+ Register LongBranchReservedReg = MFI->getLongBranchReservedReg();
+ if (LongBranchReservedReg)
+ reserveRegisterTuples(Reserved, LongBranchReservedReg);
+
// We have to assume the SP is needed in case there are calls in the function,
// which is detected after the function is lowered. If we aren't really going
// to need SP, don't bother reserving it.
@@ -646,24 +647,18 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
}
+ // FIXME: Use same reserved register introduced in D149775
+ // SGPR used to preserve EXEC MASK around WWM spill/copy instructions.
+ Register ExecCopyReg = MFI->getSGPRForEXECCopy();
+ if (ExecCopyReg)
+ reserveRegisterTuples(Reserved, ExecCopyReg);
+
// Reserve VGPRs/AGPRs.
//
unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
unsigned MaxNumAGPRs = MaxNumVGPRs;
unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
- // Reserve all the AGPRs if there are no instructions to use it.
- if (!ST.hasMAIInsts()) {
- for (unsigned i = 0; i < MaxNumAGPRs; ++i) {
- unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
- reserveRegisterTuples(Reserved, Reg);
- }
- }
-
- for (auto Reg : AMDGPU::AGPR_32RegClass) {
- Reserved.set(getSubReg(Reg, AMDGPU::hi16));
- }
-
// On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
// a wave may have up to 512 total vector registers combining together both
// VGPRs and AGPRs. Hence, in an entry function without calls and without
@@ -690,9 +685,15 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, Reg);
}
- for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
- unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
- reserveRegisterTuples(Reserved, Reg);
+ if (ST.hasMAIInsts()) {
+ for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
+ unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
+ reserveRegisterTuples(Reserved, Reg);
+ }
+ } else {
+ // Reserve all the AGPRs if there are no instructions to use it.
+ for (MCRegister Reg : AMDGPU::AGPR_32RegClass)
+ reserveRegisterTuples(Reserved, Reg);
}
// On GFX908, in order to guarantee copying between AGPRs, we need a scratch
@@ -711,9 +712,6 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
reserveRegisterTuples(Reserved, Reg);
- for (auto Reg : MFI->getSGPRSpillVGPRs())
- reserveRegisterTuples(Reserved, Reg);
-
return Reserved;
}
@@ -1065,6 +1063,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
case AMDGPU::SI_SPILL_A32_RESTORE:
case AMDGPU::SI_SPILL_AV32_SAVE:
case AMDGPU::SI_SPILL_AV32_RESTORE:
+ case AMDGPU::SI_SPILL_WWM_V32_SAVE:
+ case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
return 1;
default: llvm_unreachable("Invalid spill opcode");
}
@@ -1326,7 +1326,7 @@ void SIRegisterInfo::buildSpillLoadStore(
const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
// On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
- const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8;
+ const unsigned RegWidth = AMDGPU::getRegBitWidth(*RC) / 8;
// Always use 4 byte operations for AGPRs because we need to scavenge
// a temporary VGPR.
@@ -1607,7 +1607,8 @@ void SIRegisterInfo::buildSpillLoadStore(
} else if (UseVGPROffset) {
// FIXME: change to scavengeRegisterBackwards()
if (!TmpOffsetVGPR) {
- TmpOffsetVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
+ TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
+ MI, false, 0);
RS->setRegUsed(TmpOffsetVGPR);
}
}
@@ -1660,6 +1661,33 @@ void SIRegisterInfo::buildSpillLoadStore(
if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
+
+ // The epilog restore of a wwm-scratch register can cause undesired
+ // optimization during machine-cp post PrologEpilogInserter if the same
+ // register was assigned for return value ABI lowering with a COPY
+ // instruction. As given below, with the epilog reload, the earlier COPY
+ // appeared to be dead during machine-cp.
+ // ...
+ // v0 in WWM operation, needs the WWM spill at prolog/epilog.
+ // $vgpr0 = V_WRITELANE_B32 $sgpr20, 0, $vgpr0
+ // ...
+ // Epilog block:
+ // $vgpr0 = COPY $vgpr1 // outgoing value moved to v0
+ // ...
+ // WWM spill restore to preserve the inactive lanes of v0.
+ // $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1
+ // $vgpr0 = BUFFER_LOAD $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0
+ // $exec = S_MOV_B64 killed $sgpr4_sgpr5
+ // ...
+ // SI_RETURN implicit $vgpr0
+ // ...
+ // To fix it, mark the same reg as a tied op for such restore instructions
+ // so that it marks a usage for the preceding COPY.
+ if (!IsStore && MI != MBB.end() && MI->isReturn() &&
+ MI->readsRegister(SubReg, this)) {
+ MIB.addReg(SubReg, RegState::Implicit);
+ MIB->tieOperands(0, MIB->getNumOperands() - 1);
+ }
}
if (ScratchOffsetRegDelta != 0) {
@@ -1705,10 +1733,13 @@ void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index,
bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
RegScavenger *RS, SlotIndexes *Indexes,
- LiveIntervals *LIS, bool OnlyToVGPR) const {
+ LiveIntervals *LIS, bool OnlyToVGPR,
+ bool SpillToPhysVGPRLane) const {
SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
- ArrayRef<SpilledReg> VGPRSpills = SB.MFI.getSGPRSpillToVGPRLanes(Index);
+ ArrayRef<SpilledReg> VGPRSpills =
+ SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
+ : SB.MFI.getSGPRSpillToVirtualVGPRLanes(Index);
bool SpillToVGPR = !VGPRSpills.empty();
if (OnlyToVGPR && !SpillToVGPR)
return false;
@@ -1825,10 +1856,13 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index,
bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index,
RegScavenger *RS, SlotIndexes *Indexes,
- LiveIntervals *LIS, bool OnlyToVGPR) const {
+ LiveIntervals *LIS, bool OnlyToVGPR,
+ bool SpillToPhysVGPRLane) const {
SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
- ArrayRef<SpilledReg> VGPRSpills = SB.MFI.getSGPRSpillToVGPRLanes(Index);
+ ArrayRef<SpilledReg> VGPRSpills =
+ SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
+ : SB.MFI.getSGPRSpillToVirtualVGPRLanes(Index);
bool SpillToVGPR = !VGPRSpills.empty();
if (OnlyToVGPR && !SpillToVGPR)
return false;
@@ -1974,7 +2008,7 @@ bool SIRegisterInfo::spillEmergencySGPR(MachineBasicBlock::iterator MI,
/// handled.
bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
- SlotIndexes *Indexes, LiveIntervals *LIS) const {
+ SlotIndexes *Indexes, LiveIntervals *LIS, bool SpillToPhysVGPRLane) const {
switch (MI->getOpcode()) {
case AMDGPU::SI_SPILL_S1024_SAVE:
case AMDGPU::SI_SPILL_S512_SAVE:
@@ -1990,7 +2024,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
case AMDGPU::SI_SPILL_S96_SAVE:
case AMDGPU::SI_SPILL_S64_SAVE:
case AMDGPU::SI_SPILL_S32_SAVE:
- return spillSGPR(MI, FI, RS, Indexes, LIS, true);
+ return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
case AMDGPU::SI_SPILL_S1024_RESTORE:
case AMDGPU::SI_SPILL_S512_RESTORE:
case AMDGPU::SI_SPILL_S384_RESTORE:
@@ -2005,7 +2039,7 @@ bool SIRegisterInfo::eliminateSGPRToVGPRSpillFrameIndex(
case AMDGPU::SI_SPILL_S96_RESTORE:
case AMDGPU::SI_SPILL_S64_RESTORE:
case AMDGPU::SI_SPILL_S32_RESTORE:
- return restoreSGPR(MI, FI, RS, Indexes, LIS, true);
+ return restoreSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
default:
llvm_unreachable("not an SGPR spill instruction");
}
@@ -2109,7 +2143,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_AV128_SAVE:
case AMDGPU::SI_SPILL_AV96_SAVE:
case AMDGPU::SI_SPILL_AV64_SAVE:
- case AMDGPU::SI_SPILL_AV32_SAVE: {
+ case AMDGPU::SI_SPILL_AV32_SAVE:
+ case AMDGPU::SI_SPILL_WWM_V32_SAVE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
AMDGPU::OpName::vdata);
assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
@@ -2118,11 +2153,19 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
: AMDGPU::BUFFER_STORE_DWORD_OFFSET;
auto *MBB = MI->getParent();
+ bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
+ if (IsWWMRegSpill) {
+ TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
+ RS->isRegUsed(AMDGPU::SCC));
+ }
buildSpillLoadStore(
*MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(), RS);
MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
+ if (IsWWMRegSpill)
+ TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
+
MI->eraseFromParent();
return true;
}
@@ -2167,7 +2210,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
case AMDGPU::SI_SPILL_AV352_RESTORE:
case AMDGPU::SI_SPILL_AV384_RESTORE:
case AMDGPU::SI_SPILL_AV512_RESTORE:
- case AMDGPU::SI_SPILL_AV1024_RESTORE: {
+ case AMDGPU::SI_SPILL_AV1024_RESTORE:
+ case AMDGPU::SI_SPILL_WWM_V32_RESTORE: {
const MachineOperand *VData = TII->getNamedOperand(*MI,
AMDGPU::OpName::vdata);
assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
@@ -2176,10 +2220,19 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
: AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
auto *MBB = MI->getParent();
+ bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
+ if (IsWWMRegSpill) {
+ TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
+ RS->isRegUsed(AMDGPU::SCC));
+ }
buildSpillLoadStore(
*MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
*MI->memoperands_begin(), RS);
+
+ if (IsWWMRegSpill)
+ TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
+
MI->eraseFromParent();
return true;
}
@@ -2271,7 +2324,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
: &AMDGPU::VGPR_32RegClass;
- Register TmpReg = RS->scavengeRegister(RC, MI, 0, !UseSGPR);
+ Register TmpReg =
+ RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
FIOp.setReg(TmpReg);
FIOp.setIsKill();
@@ -2291,8 +2345,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
Register TmpSReg =
UseSGPR ? TmpReg
- : RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0,
- !UseSGPR);
+ : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
+ MI, false, 0, !UseSGPR);
// TODO: for flat scratch another attempt can be made with a VGPR index
// if no SGPRs can be scavenged.
@@ -2366,8 +2420,9 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
: &AMDGPU::VGPR_32RegClass;
bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
- Register ResultReg = IsCopy ? MI->getOperand(0).getReg()
- : RS->scavengeRegister(RC, MI, 0);
+ Register ResultReg =
+ IsCopy ? MI->getOperand(0).getReg()
+ : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
int64_t Offset = FrameInfo.getObjectOffset(Index);
if (Offset == 0) {
@@ -2380,8 +2435,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
if (IsSALU && !LiveSCC)
Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
if (IsSALU && LiveSCC) {
- Register NewDest =
- RS->scavengeRegister(&AMDGPU::SReg_32RegClass, Shift, 0);
+ Register NewDest = RS->scavengeRegisterBackwards(
+ AMDGPU::SReg_32RegClass, Shift, false, 0);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
NewDest)
.addReg(ResultReg);
@@ -2435,8 +2490,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
// We may have 1 free scratch SGPR even though a carry out is
// unavailable. Only one additional mov is needed.
- Register TmpScaledReg =
- RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
+ Register TmpScaledReg = RS->scavengeRegisterBackwards(
+ AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false);
Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
@@ -2501,7 +2556,8 @@ bool SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
FIOp.ChangeToImmediate(Offset);
if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
- Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
+ Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
+ MI, false, 0);
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
.addImm(Offset);
FIOp.ChangeToRegister(TmpReg, false, false, true);
@@ -2517,31 +2573,31 @@ StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {
static const TargetRegisterClass *
getAnyVGPRClassForBitWidth(unsigned BitWidth) {
- if (BitWidth <= 64)
+ if (BitWidth == 64)
return &AMDGPU::VReg_64RegClass;
- if (BitWidth <= 96)
+ if (BitWidth == 96)
return &AMDGPU::VReg_96RegClass;
- if (BitWidth <= 128)
+ if (BitWidth == 128)
return &AMDGPU::VReg_128RegClass;
- if (BitWidth <= 160)
+ if (BitWidth == 160)
return &AMDGPU::VReg_160RegClass;
- if (BitWidth <= 192)
+ if (BitWidth == 192)
return &AMDGPU::VReg_192RegClass;
- if (BitWidth <= 224)
+ if (BitWidth == 224)
return &AMDGPU::VReg_224RegClass;
- if (BitWidth <= 256)
+ if (BitWidth == 256)
return &AMDGPU::VReg_256RegClass;
- if (BitWidth <= 288)
+ if (BitWidth == 288)
return &AMDGPU::VReg_288RegClass;
- if (BitWidth <= 320)
+ if (BitWidth == 320)
return &AMDGPU::VReg_320RegClass;
- if (BitWidth <= 352)
+ if (BitWidth == 352)
return &AMDGPU::VReg_352RegClass;
- if (BitWidth <= 384)
+ if (BitWidth == 384)
return &AMDGPU::VReg_384RegClass;
- if (BitWidth <= 512)
+ if (BitWidth == 512)
return &AMDGPU::VReg_512RegClass;
- if (BitWidth <= 1024)
+ if (BitWidth == 1024)
return &AMDGPU::VReg_1024RegClass;
return nullptr;
@@ -2549,31 +2605,31 @@ getAnyVGPRClassForBitWidth(unsigned BitWidth) {
static const TargetRegisterClass *
getAlignedVGPRClassForBitWidth(unsigned BitWidth) {
- if (BitWidth <= 64)
+ if (BitWidth == 64)
return &AMDGPU::VReg_64_Align2RegClass;
- if (BitWidth <= 96)
+ if (BitWidth == 96)
return &AMDGPU::VReg_96_Align2RegClass;
- if (BitWidth <= 128)
+ if (BitWidth == 128)
return &AMDGPU::VReg_128_Align2RegClass;
- if (BitWidth <= 160)
+ if (BitWidth == 160)
return &AMDGPU::VReg_160_Align2RegClass;
- if (BitWidth <= 192)
+ if (BitWidth == 192)
return &AMDGPU::VReg_192_Align2RegClass;
- if (BitWidth <= 224)
+ if (BitWidth == 224)
return &AMDGPU::VReg_224_Align2RegClass;
- if (BitWidth <= 256)
+ if (BitWidth == 256)
return &AMDGPU::VReg_256_Align2RegClass;
- if (BitWidth <= 288)
+ if (BitWidth == 288)
return &AMDGPU::VReg_288_Align2RegClass;
- if (BitWidth <= 320)
+ if (BitWidth == 320)
return &AMDGPU::VReg_320_Align2RegClass;
- if (BitWidth <= 352)
+ if (BitWidth == 352)
return &AMDGPU::VReg_352_Align2RegClass;
- if (BitWidth <= 384)
+ if (BitWidth == 384)
return &AMDGPU::VReg_384_Align2RegClass;
- if (BitWidth <= 512)
+ if (BitWidth == 512)
return &AMDGPU::VReg_512_Align2RegClass;
- if (BitWidth <= 1024)
+ if (BitWidth == 1024)
return &AMDGPU::VReg_1024_Align2RegClass;
return nullptr;
@@ -2583,9 +2639,9 @@ const TargetRegisterClass *
SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const {
if (BitWidth == 1)
return &AMDGPU::VReg_1RegClass;
- if (BitWidth <= 16)
+ if (BitWidth == 16)
return &AMDGPU::VGPR_LO16RegClass;
- if (BitWidth <= 32)
+ if (BitWidth == 32)
return &AMDGPU::VGPR_32RegClass;
return ST.needsAlignedVGPRs() ? getAlignedVGPRClassForBitWidth(BitWidth)
: getAnyVGPRClassForBitWidth(BitWidth);
@@ -2593,31 +2649,31 @@ SIRegisterInfo::getVGPRClassForBitWidth(unsigned BitWidth) const {
static const TargetRegisterClass *
getAnyAGPRClassForBitWidth(unsigned BitWidth) {
- if (BitWidth <= 64)
+ if (BitWidth == 64)
return &AMDGPU::AReg_64RegClass;
- if (BitWidth <= 96)
+ if (BitWidth == 96)
return &AMDGPU::AReg_96RegClass;
- if (BitWidth <= 128)
+ if (BitWidth == 128)
return &AMDGPU::AReg_128RegClass;
- if (BitWidth <= 160)
+ if (BitWidth == 160)
return &AMDGPU::AReg_160RegClass;
- if (BitWidth <= 192)
+ if (BitWidth == 192)
return &AMDGPU::AReg_192RegClass;
- if (BitWidth <= 224)
+ if (BitWidth == 224)
return &AMDGPU::AReg_224RegClass;
- if (BitWidth <= 256)
+ if (BitWidth == 256)
return &AMDGPU::AReg_256RegClass;
- if (BitWidth <= 288)
+ if (BitWidth == 288)
return &AMDGPU::AReg_288RegClass;
- if (BitWidth <= 320)
+ if (BitWidth == 320)
return &AMDGPU::AReg_320RegClass;
- if (BitWidth <= 352)
+ if (BitWidth == 352)
return &AMDGPU::AReg_352RegClass;
- if (BitWidth <= 384)
+ if (BitWidth == 384)
return &AMDGPU::AReg_384RegClass;
- if (BitWidth <= 512)
+ if (BitWidth == 512)
return &AMDGPU::AReg_512RegClass;
- if (BitWidth <= 1024)
+ if (BitWidth == 1024)
return &AMDGPU::AReg_1024RegClass;
return nullptr;
@@ -2625,31 +2681,31 @@ getAnyAGPRClassForBitWidth(unsigned BitWidth) {
static const TargetRegisterClass *
getAlignedAGPRClassForBitWidth(unsigned BitWidth) {
- if (BitWidth <= 64)
+ if (BitWidth == 64)
return &AMDGPU::AReg_64_Align2RegClass;
- if (BitWidth <= 96)
+ if (BitWidth == 96)
return &AMDGPU::AReg_96_Align2RegClass;
- if (BitWidth <= 128)
+ if (BitWidth == 128)
return &AMDGPU::AReg_128_Align2RegClass;
- if (BitWidth <= 160)
+ if (BitWidth == 160)
return &AMDGPU::AReg_160_Align2RegClass;
- if (BitWidth <= 192)
+ if (BitWidth == 192)
return &AMDGPU::AReg_192_Align2RegClass;
- if (BitWidth <= 224)
+ if (BitWidth == 224)
return &AMDGPU::AReg_224_Align2RegClass;
- if (BitWidth <= 256)
+ if (BitWidth == 256)
return &AMDGPU::AReg_256_Align2RegClass;
- if (BitWidth <= 288)
+ if (BitWidth == 288)
return &AMDGPU::AReg_288_Align2RegClass;
- if (BitWidth <= 320)
+ if (BitWidth == 320)
return &AMDGPU::AReg_320_Align2RegClass;
- if (BitWidth <= 352)
+ if (BitWidth == 352)
return &AMDGPU::AReg_352_Align2RegClass;
- if (BitWidth <= 384)
+ if (BitWidth == 384)
return &AMDGPU::AReg_384_Align2RegClass;
- if (BitWidth <= 512)
+ if (BitWidth == 512)
return &AMDGPU::AReg_512_Align2RegClass;
- if (BitWidth <= 1024)
+ if (BitWidth == 1024)
return &AMDGPU::AReg_1024_Align2RegClass;
return nullptr;
@@ -2657,9 +2713,9 @@ getAlignedAGPRClassForBitWidth(unsigned BitWidth) {
const TargetRegisterClass *
SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const {
- if (BitWidth <= 16)
+ if (BitWidth == 16)
return &AMDGPU::AGPR_LO16RegClass;
- if (BitWidth <= 32)
+ if (BitWidth == 32)
return &AMDGPU::AGPR_32RegClass;
return ST.needsAlignedVGPRs() ? getAlignedAGPRClassForBitWidth(BitWidth)
: getAnyAGPRClassForBitWidth(BitWidth);
@@ -2667,31 +2723,31 @@ SIRegisterInfo::getAGPRClassForBitWidth(unsigned BitWidth) const {
static const TargetRegisterClass *
getAnyVectorSuperClassForBitWidth(unsigned BitWidth) {
- if (BitWidth <= 64)
+ if (BitWidth == 64)
return &AMDGPU::AV_64RegClass;
- if (BitWidth <= 96)
+ if (BitWidth == 96)
return &AMDGPU::AV_96RegClass;
- if (BitWidth <= 128)
+ if (BitWidth == 128)
return &AMDGPU::AV_128RegClass;
- if (BitWidth <= 160)
+ if (BitWidth == 160)
return &AMDGPU::AV_160RegClass;
- if (BitWidth <= 192)
+ if (BitWidth == 192)
return &AMDGPU::AV_192RegClass;
- if (BitWidth <= 224)
+ if (BitWidth == 224)
return &AMDGPU::AV_224RegClass;
- if (BitWidth <= 256)
+ if (BitWidth == 256)
return &AMDGPU::AV_256RegClass;
- if (BitWidth <= 288)
+ if (BitWidth == 288)
return &AMDGPU::AV_288RegClass;
- if (BitWidth <= 320)
+ if (BitWidth == 320)
return &AMDGPU::AV_320RegClass;
- if (BitWidth <= 352)
+ if (BitWidth == 352)
return &AMDGPU::AV_352RegClass;
- if (BitWidth <= 384)
+ if (BitWidth == 384)
return &AMDGPU::AV_384RegClass;
- if (BitWidth <= 512)
+ if (BitWidth == 512)
return &AMDGPU::AV_512RegClass;
- if (BitWidth <= 1024)
+ if (BitWidth == 1024)
return &AMDGPU::AV_1024RegClass;
return nullptr;
@@ -2699,31 +2755,31 @@ getAnyVectorSuperClassForBitWidth(unsigned BitWidth) {
static const TargetRegisterClass *
getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) {
- if (BitWidth <= 64)
+ if (BitWidth == 64)
return &AMDGPU::AV_64_Align2RegClass;
- if (BitWidth <= 96)
+ if (BitWidth == 96)
return &AMDGPU::AV_96_Align2RegClass;
- if (BitWidth <= 128)
+ if (BitWidth == 128)
return &AMDGPU::AV_128_Align2RegClass;
- if (BitWidth <= 160)
+ if (BitWidth == 160)
return &AMDGPU::AV_160_Align2RegClass;
- if (BitWidth <= 192)
+ if (BitWidth == 192)
return &AMDGPU::AV_192_Align2RegClass;
- if (BitWidth <= 224)
+ if (BitWidth == 224)
return &AMDGPU::AV_224_Align2RegClass;
- if (BitWidth <= 256)
+ if (BitWidth == 256)
return &AMDGPU::AV_256_Align2RegClass;
- if (BitWidth <= 288)
+ if (BitWidth == 288)
return &AMDGPU::AV_288_Align2RegClass;
- if (BitWidth <= 320)
+ if (BitWidth == 320)
return &AMDGPU::AV_320_Align2RegClass;
- if (BitWidth <= 352)
+ if (BitWidth == 352)
return &AMDGPU::AV_352_Align2RegClass;
- if (BitWidth <= 384)
+ if (BitWidth == 384)
return &AMDGPU::AV_384_Align2RegClass;
- if (BitWidth <= 512)
+ if (BitWidth == 512)
return &AMDGPU::AV_512_Align2RegClass;
- if (BitWidth <= 1024)
+ if (BitWidth == 1024)
return &AMDGPU::AV_1024_Align2RegClass;
return nullptr;
@@ -2731,9 +2787,9 @@ getAlignedVectorSuperClassForBitWidth(unsigned BitWidth) {
const TargetRegisterClass *
SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const {
- if (BitWidth <= 16)
+ if (BitWidth == 16)
return &AMDGPU::VGPR_LO16RegClass;
- if (BitWidth <= 32)
+ if (BitWidth == 32)
return &AMDGPU::AV_32RegClass;
return ST.needsAlignedVGPRs()
? getAlignedVectorSuperClassForBitWidth(BitWidth)
@@ -2742,35 +2798,35 @@ SIRegisterInfo::getVectorSuperClassForBitWidth(unsigned BitWidth) const {
const TargetRegisterClass *
SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
- if (BitWidth <= 16)
+ if (BitWidth == 16)
return &AMDGPU::SGPR_LO16RegClass;
- if (BitWidth <= 32)
+ if (BitWidth == 32)
return &AMDGPU::SReg_32RegClass;
- if (BitWidth <= 64)
+ if (BitWidth == 64)
return &AMDGPU::SReg_64RegClass;
- if (BitWidth <= 96)
+ if (BitWidth == 96)
return &AMDGPU::SGPR_96RegClass;
- if (BitWidth <= 128)
+ if (BitWidth == 128)
return &AMDGPU::SGPR_128RegClass;
- if (BitWidth <= 160)
+ if (BitWidth == 160)
return &AMDGPU::SGPR_160RegClass;
- if (BitWidth <= 192)
+ if (BitWidth == 192)
return &AMDGPU::SGPR_192RegClass;
- if (BitWidth <= 224)
+ if (BitWidth == 224)
return &AMDGPU::SGPR_224RegClass;
- if (BitWidth <= 256)
+ if (BitWidth == 256)
return &AMDGPU::SGPR_256RegClass;
- if (BitWidth <= 288)
+ if (BitWidth == 288)
return &AMDGPU::SGPR_288RegClass;
- if (BitWidth <= 320)
+ if (BitWidth == 320)
return &AMDGPU::SGPR_320RegClass;
- if (BitWidth <= 352)
+ if (BitWidth == 352)
return &AMDGPU::SGPR_352RegClass;
- if (BitWidth <= 384)
+ if (BitWidth == 384)
return &AMDGPU::SGPR_384RegClass;
- if (BitWidth <= 512)
+ if (BitWidth == 512)
return &AMDGPU::SGPR_512RegClass;
- if (BitWidth <= 1024)
+ if (BitWidth == 1024)
return &AMDGPU::SGPR_1024RegClass;
return nullptr;
@@ -2863,13 +2919,12 @@ bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
/// Returns a lowest register that is not used at any point in the function.
/// If all registers are used, then this function will return
-/// AMDGPU::NoRegister. If \p ReserveHighestVGPR = true, then return
+/// AMDGPU::NoRegister. If \p ReserveHighestRegister = true, then return
/// highest unused register.
-MCRegister SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
- const TargetRegisterClass *RC,
- const MachineFunction &MF,
- bool ReserveHighestVGPR) const {
- if (ReserveHighestVGPR) {
+MCRegister SIRegisterInfo::findUnusedRegister(
+ const MachineRegisterInfo &MRI, const TargetRegisterClass *RC,
+ const MachineFunction &MF, bool ReserveHighestRegister) const {
+ if (ReserveHighestRegister) {
for (MCRegister Reg : reverse(*RC))
if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
return Reg;
@@ -2881,9 +2936,19 @@ MCRegister SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
return MCRegister();
}
+bool SIRegisterInfo::isUniformReg(const MachineRegisterInfo &MRI,
+ const RegisterBankInfo &RBI,
+ Register Reg) const {
+ auto *RB = RBI.getRegBank(Reg, MRI, *MRI.getTargetRegisterInfo());
+ if (!RB)
+ return false;
+
+ return !RBI.isDivergentRegBank(RB);
+}
+
ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
unsigned EltSize) const {
- const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC->MC);
+ const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC);
assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
const unsigned RegDWORDs = RegBitWidth / 32;
@@ -3084,9 +3149,8 @@ MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg,
DefIdx = V->def;
} else {
// Find last def.
- for (MCRegUnitIterator Units(Reg.asMCReg(), this); Units.isValid();
- ++Units) {
- LiveRange &LR = LIS->getRegUnit(*Units);
+ for (MCRegUnit Unit : regunits(Reg.asMCReg())) {
+ LiveRange &LR = LIS->getRegUnit(Unit);
if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
if (!DefIdx.isValid() ||
MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
@@ -3173,3 +3237,19 @@ ArrayRef<MCPhysReg>
SIRegisterInfo::getAllSGPR32(const MachineFunction &MF) const {
return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
}
+
+unsigned
+SIRegisterInfo::getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
+ unsigned SubReg) const {
+ switch (RC->TSFlags & SIRCFlags::RegKindMask) {
+ case SIRCFlags::HasSGPR:
+ return std::min(128u, getSubRegIdxSize(SubReg));
+ case SIRCFlags::HasAGPR:
+ case SIRCFlags::HasVGPR:
+ case SIRCFlags::HasVGPR | SIRCFlags::HasAGPR:
+ return std::min(32u, getSubRegIdxSize(SubReg));
+ default:
+ break;
+ }
+ return 0;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index e9ddf82fb5c8..17fce43891c5 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -70,6 +70,12 @@ public:
return SpillSGPRToVGPR;
}
+ /// Return the largest available SGPR aligned to \p Align for the register
+ /// class \p RC.
+ MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF,
+ const unsigned Align,
+ const TargetRegisterClass *RC) const;
+
/// Return the end register initially reserved for the scratch buffer in case
/// spilling is needed.
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;
@@ -136,14 +142,17 @@ public:
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset,
bool IsLoad, bool IsKill = true) const;
- /// If \p OnlyToVGPR is true, this will only succeed if this
+ /// If \p OnlyToVGPR is true, this will only succeed if this manages to find a
+ /// free VGPR lane to spill.
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
- bool OnlyToVGPR = false) const;
+ bool OnlyToVGPR = false,
+ bool SpillToPhysVGPRLane = false) const;
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
- bool OnlyToVGPR = false) const;
+ bool OnlyToVGPR = false,
+ bool SpillToPhysVGPRLane = false) const;
bool spillEmergencySGPR(MachineBasicBlock::iterator MI,
MachineBasicBlock &RestoreMBB, Register SGPR,
@@ -157,10 +166,10 @@ public:
unsigned FIOperandNum,
RegScavenger *RS) const override;
- bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI,
- int FI, RegScavenger *RS,
- SlotIndexes *Indexes = nullptr,
- LiveIntervals *LIS = nullptr) const;
+ bool eliminateSGPRToVGPRSpillFrameIndex(
+ MachineBasicBlock::iterator MI, int FI, RegScavenger *RS,
+ SlotIndexes *Indexes = nullptr, LiveIntervals *LIS = nullptr,
+ bool SpillToPhysVGPRLane = false) const;
StringRef getRegAsmName(MCRegister Reg) const override;
@@ -286,10 +295,17 @@ public:
return isVGPR(MRI, Reg) || isAGPR(MRI, Reg);
}
+ // FIXME: SGPRs are assumed to be uniform, but this is not true for i1 SGPRs
+ // (such as VCC) which hold a wave-wide vector of boolean values. Examining
+ // just the register class is not suffcient; it needs to be combined with a
+ // value type. The next predicate isUniformReg() does this correctly.
bool isDivergentRegClass(const TargetRegisterClass *RC) const override {
return !isSGPRClass(RC);
}
+ bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI,
+ Register Reg) const override;
+
ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
unsigned EltSize) const;
@@ -411,6 +427,25 @@ public:
int64_t InstrOffset, MachineMemOperand *MMO,
RegScavenger *RS,
LivePhysRegs *LiveRegs = nullptr) const;
+
+ // Return alignment in register file of first register in a register tuple.
+ unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const {
+ return (RC->TSFlags & SIRCFlags::RegTupleAlignUnitsMask) * 32;
+ }
+
+ // Check if register class RC has required alignment.
+ bool isRegClassAligned(const TargetRegisterClass *RC,
+ unsigned AlignNumBits) const {
+ assert(AlignNumBits != 0);
+ unsigned RCAlign = getRegClassAlignmentNumBits(RC);
+ return RCAlign == AlignNumBits ||
+ (RCAlign > AlignNumBits && (RCAlign % AlignNumBits) == 0);
+ }
+
+ // Return alignment of a SubReg relative to start of a register in RC class.
+ // No check if the subreg is supported by the current RC is made.
+ unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC,
+ unsigned SubReg) const;
};
} // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 12053c4b8724..b2b1b458a63a 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -10,16 +10,6 @@
// Subregister declarations
//===----------------------------------------------------------------------===//
-class Indexes<int N> {
- list<int> all = [0, 1, 2, 3, 4, 5, 6 , 7,
- 8, 9, 10, 11, 12, 13, 14, 15,
- 16, 17, 18, 19, 20, 21, 22, 23,
- 24, 25, 26, 27, 28, 29, 30, 31];
-
- // Returns list of indexes [0..N)
- list<int> slice = !filter(i, all, !lt(i, N));
-}
-
let Namespace = "AMDGPU" in {
def lo16 : SubRegIndex<16, 0>;
@@ -35,13 +25,11 @@ foreach Index = 1...31 in {
}
foreach Size = {2...6,8,16} in {
- foreach Index = Indexes<!sub(33, Size)>.slice in {
- def !interleave(!foreach(cur, Indexes<Size>.slice, "sub"#!add(cur, Index)),
- "_") :
+ foreach Index = !range(!sub(33, Size)) in {
+ def !interleave(!foreach(cur, !range(Size), "sub"#!add(cur, Index)), "_") :
SubRegIndex<!mul(Size, 32), !shl(Index, 5)> {
let CoveringSubRegIndices =
- !foreach(cur, Indexes<Size>.slice,
- !cast<SubRegIndex>(sub#!add(cur, Index)));
+ !foreach(cur, !range(Size), !cast<SubRegIndex>(sub#!add(cur, Index)));
}
}
}
@@ -150,10 +138,14 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
// For scalar register classes.
field bit HasSGPR = 0;
+ // Alignment of the first register in tuple (in 32-bit units).
+ field int RegTupleAlignUnits = 1;
+
// These need to be kept in sync with the enum SIRCFlags.
- let TSFlags{0} = HasVGPR;
- let TSFlags{1} = HasAGPR;
- let TSFlags{2} = HasSGPR;
+ let TSFlags{1-0} = RegTupleAlignUnits;
+ let TSFlags{2} = HasVGPR;
+ let TSFlags{3} = HasAGPR;
+ let TSFlags{4} = HasSGPR;
}
multiclass SIRegLoHi16 <string n, bits<16> regIdx, bit ArtificialHigh = 1,
@@ -421,7 +413,7 @@ def SGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
def SGPR_64Regs : SIRegisterTuples<getSubRegs<2>.ret, SGPR_32, 105, 2, 2, "s">;
// SGPR 96-bit registers. No operations use these, but for symmetry with 96-bit VGPRs.
-def SGPR_96Regs : SIRegisterTuples<getSubRegs<3>.ret, SGPR_32, 105, 3, 3, "s">;
+def SGPR_96Regs : SIRegisterTuples<getSubRegs<3>.ret, SGPR_32, 105, 4, 3, "s">;
// SGPR 128-bit registers
def SGPR_128Regs : SIRegisterTuples<getSubRegs<4>.ret, SGPR_32, 105, 4, 4, "s">;
@@ -774,7 +766,7 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
SRC_PRIVATE_LIMIT_HI_LO16, SRC_POPS_EXITING_WAVE_ID_LO16, SRC_VCCZ_LO16,
SRC_EXECZ_LO16, SRC_SCC_LO16, EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16)> {
let Size = 16;
- let AllocationPriority = 0;
+ let isAllocatable = 0;
let BaseClassOrder = 16;
}
@@ -817,6 +809,21 @@ def SGPR_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, v4i16, v4f16],
let HasSGPR = 1;
}
+// CCR (call clobbered registers) SGPR 64-bit registers
+def CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32, (add (trunc SGPR_64, 15))> {
+ let CopyCost = SGPR_64.CopyCost;
+ let AllocationPriority = SGPR_64.AllocationPriority;
+ let HasSGPR = 1;
+}
+
+// Call clobbered 64-bit SGPRs for AMDGPU_Gfx CC
+def Gfx_CCR_SGPR_64 : SIRegisterClass<"AMDGPU", SGPR_64.RegTypes, 32,
+ (add (trunc (shl SGPR_64, 18), 14))> { // s[36:37]-s[s62:63]
+ let CopyCost = SGPR_64.CopyCost;
+ let AllocationPriority = SGPR_64.AllocationPriority;
+ let HasSGPR = 1;
+}
+
def TTMP_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, f64, v4i16, v4f16], 32,
(add TTMP_64Regs)> {
let isAllocatable = 0;
@@ -931,6 +938,7 @@ multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)> {
// Give aligned class higher priority in base class resolution
let BaseClassOrder = !sub(!mul(numRegs, 32), 1);
+ let RegTupleAlignUnits = 2;
}
}
}
@@ -965,6 +973,7 @@ multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)> {
// Give aligned class higher priority in base class resolution
let BaseClassOrder = !sub(!mul(numRegs, 32), 1);
+ let RegTupleAlignUnits = 2;
}
}
}
@@ -1033,10 +1042,12 @@ multiclass AVRegClass<int numRegs, list<ValueType> regTypes,
// Define the regular class.
def "" : VRegClassBase<numRegs, regTypes, (add vregList, aregList)>;
- // Define 2-aligned variant
+ // Define 2-aligned variant
def _Align2 : VRegClassBase<numRegs, regTypes,
(add (decimate vregList, 2),
- (decimate aregList, 2))>;
+ (decimate aregList, 2))> {
+ let RegTupleAlignUnits = 2;
+ }
}
}
@@ -1066,185 +1077,123 @@ class RegImmMatcher<string name> : AsmOperandClass {
let RenderMethod = "addRegOrImmOperands";
}
-// For VOP1,2,C True16 instructions. Uses first 128 32-bit VGPRs only
-multiclass SIRegOperand16 <string rc, string MatchName, string opType,
- string rc_suffix = "_32"> {
- let OperandNamespace = "AMDGPU" in {
- def _b16_Lo128 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix#"_Lo128")> {
- let OperandType = opType#"_INT16";
- let ParserMatchClass = RegImmMatcher<MatchName#"B16_Lo128">;
- let DecoderMethod = "decodeOperand_VSrc16";
- }
-
- def _f16_Lo128 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix#"_Lo128")> {
- let OperandType = opType#"_FP16";
- let ParserMatchClass = RegImmMatcher<MatchName#"F16_Lo128">;
- let DecoderMethod = "decodeOperand_" # rc # "_16";
- }
- }
-}
-
-
-multiclass SIRegOperand32 <string rc, string MatchName, string opType,
- string rc_suffix = "_32"> {
- let OperandNamespace = "AMDGPU" in {
- def _b16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
- let OperandType = opType#"_INT16";
- let ParserMatchClass = RegImmMatcher<MatchName#"B16">;
- let DecoderMethod = "decodeOperand_VSrc16";
- }
-
- def _f16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
- let OperandType = opType#"_FP16";
- let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
- let DecoderMethod = "decodeOperand_" # rc # "_16";
- }
-
- def _b32 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
- let OperandType = opType#"_INT32";
- let ParserMatchClass = RegImmMatcher<MatchName#"B32">;
- let DecoderMethod = "decodeOperand_" # rc # rc_suffix;
- }
-
- def _f32 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
- let OperandType = opType#"_FP32";
- let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
- let DecoderMethod = "decodeOperand_" # rc # rc_suffix;
- }
-
- def _v2b16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
- let OperandType = opType#"_V2INT16";
- let ParserMatchClass = RegImmMatcher<MatchName#"V2B16">;
- let DecoderMethod = "decodeOperand_VSrcV216";
- }
-
- def _v2f16 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
- let OperandType = opType#"_V2FP16";
- let ParserMatchClass = RegImmMatcher<MatchName#"V2F16">;
- let DecoderMethod = "decodeOperand_VSrcV216";
- }
- }
-}
-
-multiclass SIRegOperand64 <string rc, string MatchName, string opType,
- string rc_suffix = "_64", bit Vectors = 1> {
- let OperandNamespace = "AMDGPU" in {
- def _b64 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
- let OperandType = opType#"_INT64";
- let ParserMatchClass = RegImmMatcher<MatchName#"B64">;
- }
-
- def _f64 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
- let OperandType = opType#"_FP64";
- let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
- }
-
- if Vectors then
- def _v2f32 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
- let OperandType = opType#"_V2FP32";
- let ParserMatchClass = RegImmMatcher<MatchName#"V2FP32">;
- let DecoderMethod = "decodeOperand_VSrcV232";
- }
- if Vectors then
- def _v2b32 : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
- let OperandType = opType#"_V2INT32";
- let ParserMatchClass = RegImmMatcher<MatchName#"V2INT32">;
- let DecoderMethod = "decodeOperand_VSrcV232";
- }
- }
-}
-
-multiclass SIRegOperand <string rc, string MatchName, string opType> :
- SIRegOperand32<rc, MatchName, opType>,
- SIRegOperand64<rc, MatchName, opType>;
-
-// FIXME: 64-bit sources can sometimes use 32-bit constants.
-multiclass RegImmOperand <string rc, string MatchName>
- : SIRegOperand<rc, MatchName, "OPERAND_REG_IMM">;
-
-multiclass RegInlineOperand <string rc, string MatchName>
- : SIRegOperand<rc, MatchName, "OPERAND_REG_INLINE_C">;
-
-multiclass RegInlineOperand32 <string rc, string MatchName,
- string rc_suffix = "_32">
- : SIRegOperand32<rc, MatchName, "OPERAND_REG_INLINE_C", rc_suffix>;
-
-multiclass RegInlineOperand64 <string rc, string MatchName,
- string rc_suffix = "_64">
- : SIRegOperand64<rc, MatchName, "OPERAND_REG_INLINE_C", rc_suffix>;
-
-multiclass RegInlineOperandAC <string rc, string MatchName,
- string rc_suffix = "_32">
- : SIRegOperand32<rc, MatchName, "OPERAND_REG_INLINE_AC", rc_suffix>;
-
-multiclass RegInlineOperandAC64 <string rc, string MatchName,
- string rc_suffix = "_64">
- : SIRegOperand64<rc, MatchName, "OPERAND_REG_INLINE_AC", rc_suffix, 0>;
-
+class RegOrImmOperand <string RegisterClassName, string OperandTypeName,
+ string ParserMatchClassName, string decoderImmSize>
+ : RegisterOperand<!cast<RegisterClass>(RegisterClassName)> {
+ let OperandNamespace = "AMDGPU";
+ let OperandType = OperandTypeName;
+ let ParserMatchClass = RegImmMatcher<ParserMatchClassName>;
+ let DecoderMethod = "decodeOperand_" # RegisterClassName # decoderImmSize;
+ }
+
+class RegOrB16 <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_INT16",
+ !subst("_b16", "B16", NAME), "_Imm16">;
+
+class RegOrF16 <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_FP16",
+ !subst("_f16", "F16", NAME), "_Imm16">;
+
+class RegOrB32 <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_INT32",
+ !subst("_b32", "B32", NAME), "_Imm32">;
+
+class RegOrF32 <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_FP32",
+ !subst("_f32", "F32", NAME), "_Imm32">;
+
+class RegOrV2B16 <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_V2INT16",
+ !subst("_v2b16", "V2B16", NAME), "_Imm16">;
+
+class RegOrV2F16 <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_V2FP16",
+ !subst("_v2f16", "V2F16", NAME), "_Imm16">;
+
+class RegOrF64 <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_FP64",
+ !subst("_f64", "F64", NAME), "_Imm64">;
+
+class RegOrB64 <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_INT64",
+ !subst("_b64", "B64", NAME), "_Imm64">;
+
+class RegOrV2F32 <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_V2FP32",
+ !subst("_v2f32", "V2FP32", NAME), "_Imm32">;
+
+class RegOrV2B32 <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_V2INT32",
+ !subst("_v2b32", "V2INT32", NAME), "_Imm32">;
+
+// For VOP1,2,C True16 instructions. _Lo128 use first 128 32-bit VGPRs only.
+class RegOrB16_Lo128 <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_INT16",
+ !subst("_b16_Lo128", "B16_Lo128", NAME), "_Imm16">;
+
+class RegOrF16_Lo128 <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_FP16",
+ !subst("_f16_Lo128", "F16_Lo128", NAME), "_Imm16">;
+
+// Deferred operands
+class RegOrF16_Deferred <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_FP16_DEFERRED",
+ !subst("_f16_Deferred", "F16", NAME), "_Deferred_Imm16">;
+
+class RegOrF32_Deferred <string RegisterClass, string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_FP32_DEFERRED",
+ !subst("_f32_Deferred", "F32", NAME), "_Deferred_Imm32">;
+
+class RegOrF16_Lo128_Deferred <string RegisterClass,
+ string OperandTypePrefix>
+ : RegOrImmOperand <RegisterClass, OperandTypePrefix # "_FP16_DEFERRED",
+ !subst("_f16_Lo128_Deferred", "F16_Lo128", NAME),
+ "_Deferred_Imm16">;
//===----------------------------------------------------------------------===//
// SSrc_* Operands with an SGPR or a 32-bit immediate
//===----------------------------------------------------------------------===//
-defm SSrc : RegImmOperand<"SReg", "SSrc">;
+def SSrc_b32 : RegOrB32 <"SReg_32", "OPERAND_REG_IMM">;
+def SSrc_f32 : RegOrF32 <"SReg_32", "OPERAND_REG_IMM">;
+def SSrc_b64 : RegOrB64 <"SReg_64", "OPERAND_REG_IMM">;
-def SSrcOrLds_b32 : RegisterOperand<SRegOrLds_32> {
- let OperandNamespace = "AMDGPU";
- let OperandType = "OPERAND_REG_IMM_INT32";
- let ParserMatchClass = RegImmMatcher<"SSrcOrLdsB32">;
-}
+def SSrcOrLds_b32 : RegOrB32 <"SRegOrLds_32", "OPERAND_REG_IMM">;
//===----------------------------------------------------------------------===//
// SCSrc_* Operands with an SGPR or a inline constant
//===----------------------------------------------------------------------===//
-defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ;
+def SCSrc_b32 : RegOrB32 <"SReg_32", "OPERAND_REG_INLINE_C">;
+def SCSrc_b64 : RegOrB64 <"SReg_64", "OPERAND_REG_INLINE_C">;
//===----------------------------------------------------------------------===//
// VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
//===----------------------------------------------------------------------===//
-defm VSrc : RegImmOperand<"VS", "VSrc">;
-defm VSrcT : SIRegOperand16<"VS", "VSrcT", "OPERAND_REG_IMM">;
+def VSrc_b16 : RegOrB16 <"VS_32", "OPERAND_REG_IMM">;
+def VSrc_f16 : RegOrF16 <"VS_32", "OPERAND_REG_IMM">;
+def VSrc_b32 : RegOrB32 <"VS_32", "OPERAND_REG_IMM">;
+def VSrc_f32 : RegOrF32 <"VS_32", "OPERAND_REG_IMM">;
+def VSrc_v2b16 : RegOrV2B16 <"VS_32", "OPERAND_REG_IMM">;
+def VSrc_v2f16 : RegOrV2F16 <"VS_32", "OPERAND_REG_IMM">;
+def VSrc_b64 : RegOrB64 <"VS_64", "OPERAND_REG_IMM">;
+def VSrc_f64 : RegOrF64 <"VS_64", "OPERAND_REG_IMM">;
+def VSrc_v2b32 : RegOrV2B32 <"VS_64", "OPERAND_REG_IMM">;
+def VSrc_v2f32 : RegOrV2F32 <"VS_64", "OPERAND_REG_IMM">;
-def VSrc_128 : RegisterOperand<VReg_128> {
- let DecoderMethod = "DecodeVS_128RegisterClass";
-}
+def VSrcT_b16_Lo128 : RegOrB16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">;
+def VSrcT_f16_Lo128 : RegOrF16_Lo128 <"VS_32_Lo128", "OPERAND_REG_IMM">;
//===----------------------------------------------------------------------===//
// VSrc_*_Deferred Operands with an SGPR, VGPR or a 32-bit immediate for use
// with FMAMK/FMAAK
//===----------------------------------------------------------------------===//
-multiclass SIRegOperand16_Deferred <string rc, string MatchName, string opType,
- string rc_suffix = "_32"> {
- let OperandNamespace = "AMDGPU" in {
- def _f16_Lo128_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix#"_Lo128")> {
- let OperandType = opType#"_FP16_DEFERRED";
- let ParserMatchClass = RegImmMatcher<MatchName#"F16_Lo128">;
- let DecoderMethod = "decodeOperand_" # rc # "_16_Deferred";
- }
- }
-}
-
-multiclass SIRegOperand32_Deferred <string rc, string MatchName, string opType,
- string rc_suffix = "_32"> {
- let OperandNamespace = "AMDGPU" in {
- def _f16_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
- let OperandType = opType#"_FP16_DEFERRED";
- let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
- let DecoderMethod = "decodeOperand_" # rc # "_16_Deferred";
- }
-
- def _f32_Deferred : RegisterOperand<!cast<RegisterClass>(rc#rc_suffix)> {
- let OperandType = opType#"_FP32_DEFERRED";
- let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
- let DecoderMethod = "decodeOperand_" # rc # "_32_Deferred";
- }
- }
-}
+def VSrc_f16_Deferred : RegOrF16_Deferred<"VS_32", "OPERAND_REG_IMM">;
+def VSrc_f32_Deferred : RegOrF32_Deferred<"VS_32", "OPERAND_REG_IMM">;
-defm VSrc : SIRegOperand32_Deferred<"VS", "VSrc", "OPERAND_REG_IMM">;
-defm VSrcT : SIRegOperand16_Deferred<"VS", "VSrcT", "OPERAND_REG_IMM">;
+def VSrcT_f16_Lo128_Deferred : RegOrF16_Lo128_Deferred<"VS_32_Lo128",
+ "OPERAND_REG_IMM">;
//===----------------------------------------------------------------------===//
// VRegSrc_* Operands with a VGPR
@@ -1253,8 +1202,7 @@ defm VSrcT : SIRegOperand16_Deferred<"VS", "VSrcT", "OPERAND_REG_IMM">;
// This is for operands with the enum(9), VSrc encoding restriction,
// but only allows VGPRs.
def VRegSrc_32 : RegisterOperand<VGPR_32> {
- //let ParserMatchClass = RegImmMatcher<"VRegSrc32">;
- let DecoderMethod = "DecodeVS_32RegisterClass";
+ let DecoderMethod = "decodeOperand_VGPR_32";
}
def VRegSrc_64 : RegisterOperand<VReg_64> {
@@ -1269,6 +1217,10 @@ def VRegSrc_256 : RegisterOperand<VReg_256> {
let DecoderMethod = "decodeOperand_VReg_256";
}
+def VRegOrLdsSrc_32 : RegisterOperand<VRegOrLds_32> {
+ let DecoderMethod = "decodeOperand_VRegOrLds_32";
+}
+
//===----------------------------------------------------------------------===//
// VGPRSrc_*
//===----------------------------------------------------------------------===//
@@ -1286,7 +1238,7 @@ def VGPRSrc_32_Lo128 : RegisterOperand<VGPR_32_Lo128> {
//===----------------------------------------------------------------------===//
def ARegSrc_32 : RegisterOperand<AGPR_32> {
- let DecoderMethod = "DecodeAGPR_32RegisterClass";
+ let DecoderMethod = "decodeOperand_AGPR_32";
let EncoderMethod = "getAVOperandEncoding";
}
@@ -1294,38 +1246,42 @@ def ARegSrc_32 : RegisterOperand<AGPR_32> {
// VCSrc_* Operands with an SGPR, VGPR or an inline constant
//===----------------------------------------------------------------------===//
-defm VCSrc : RegInlineOperand<"VS", "VCSrc">;
-defm VCSrcT : SIRegOperand16<"VS", "VCSrcT", "OPERAND_REG_INLINE_C">;
+def VCSrc_b16 : RegOrB16 <"VS_32", "OPERAND_REG_INLINE_C">;
+def VCSrc_f16 : RegOrF16 <"VS_32", "OPERAND_REG_INLINE_C">;
+def VCSrc_b32 : RegOrB32 <"VS_32", "OPERAND_REG_INLINE_C">;
+def VCSrc_f32 : RegOrF32 <"VS_32", "OPERAND_REG_INLINE_C">;
+def VCSrc_v2b16 : RegOrV2B16 <"VS_32", "OPERAND_REG_INLINE_C">;
+def VCSrc_v2f16 : RegOrV2F16 <"VS_32", "OPERAND_REG_INLINE_C">;
//===----------------------------------------------------------------------===//
// VISrc_* Operands with a VGPR or an inline constant
//===----------------------------------------------------------------------===//
-defm VISrc : RegInlineOperand32<"VGPR", "VISrc">;
-let DecoderMethod = "decodeOperand_VReg_64" in
-defm VISrc_64 : RegInlineOperand64<"VReg", "VISrc_64", "_64">;
-defm VISrc_128 : RegInlineOperandAC<"VReg", "VISrc_128", "_128">;
-let DecoderMethod = "decodeOperand_VReg_256" in
-defm VISrc_256 : RegInlineOperand64<"VReg", "VISrc_256", "_256">;
-defm VISrc_512 : RegInlineOperandAC<"VReg", "VISrc_512", "_512">;
-defm VISrc_1024 : RegInlineOperandAC<"VReg", "VISrc_1024", "_1024">;
+def VISrc_64_f64 : RegOrF64 <"VReg_64", "OPERAND_REG_INLINE_C">;
+def VISrc_128_b32 : RegOrB32 <"VReg_128", "OPERAND_REG_INLINE_C">;
+def VISrc_128_f32 : RegOrF32 <"VReg_128", "OPERAND_REG_INLINE_C">;
+def VISrc_256_f64 : RegOrF64 <"VReg_256", "OPERAND_REG_INLINE_C">;
+def VISrc_512_b32 : RegOrB32 <"VReg_512", "OPERAND_REG_INLINE_C">;
+def VISrc_512_f32 : RegOrF32 <"VReg_512", "OPERAND_REG_INLINE_C">;
+def VISrc_1024_b32 : RegOrB32 <"VReg_1024", "OPERAND_REG_INLINE_C">;
+def VISrc_1024_f32 : RegOrF32 <"VReg_1024", "OPERAND_REG_INLINE_C">;
//===----------------------------------------------------------------------===//
// AVSrc_*, AVDst_*, AVLdSt_* Operands with an AGPR or VGPR
//===----------------------------------------------------------------------===//
def AVSrc_32 : RegisterOperand<AV_32> {
- let DecoderMethod = "DecodeAV_32RegisterClass";
+ let DecoderMethod = "decodeOperand_AV_32";
let EncoderMethod = "getAVOperandEncoding";
}
def AVSrc_64 : RegisterOperand<AV_64> {
- let DecoderMethod = "DecodeAV_64RegisterClass";
+ let DecoderMethod = "decodeOperand_AV_64";
let EncoderMethod = "getAVOperandEncoding";
}
def AVSrc_128 : RegisterOperand<AV_128> {
- let DecoderMethod = "DecodeAV_128RegisterClass";
+ let DecoderMethod = "decodeOperand_AV_128";
let EncoderMethod = "getAVOperandEncoding";
}
@@ -1368,12 +1324,11 @@ def AVLdSt_160 : RegisterOperand<AV_160> {
// ACSrc_* Operands with an AGPR or an inline constant
//===----------------------------------------------------------------------===//
-defm AISrc : RegInlineOperandAC<"AGPR", "AISrc">;
-defm AISrc_128 : RegInlineOperandAC<"AReg", "AISrc_128", "_128">;
-defm AISrc_512 : RegInlineOperandAC<"AReg", "AISrc_512", "_512">;
-defm AISrc_1024 : RegInlineOperandAC<"AReg", "AISrc_1024", "_1024">;
-
-let DecoderMethod = "decodeOperand_AReg_64" in
-defm AISrc_64 : RegInlineOperandAC64<"AReg", "AISrc_64", "_64">;
-let DecoderMethod = "decodeOperand_AReg_256" in
-defm AISrc_256 : RegInlineOperandAC64<"AReg", "AISrc_256", "_256">;
+def AISrc_64_f64 : RegOrF64 <"AReg_64", "OPERAND_REG_INLINE_AC">;
+def AISrc_128_f32 : RegOrF32 <"AReg_128", "OPERAND_REG_INLINE_AC">;
+def AISrc_128_b32 : RegOrB32 <"AReg_128", "OPERAND_REG_INLINE_AC">;
+def AISrc_256_f64 : RegOrF64 <"AReg_256", "OPERAND_REG_INLINE_AC">;
+def AISrc_512_f32 : RegOrF32 <"AReg_512", "OPERAND_REG_INLINE_AC">;
+def AISrc_512_b32 : RegOrB32 <"AReg_512", "OPERAND_REG_INLINE_AC">;
+def AISrc_1024_f32 : RegOrF32 <"AReg_1024", "OPERAND_REG_INLINE_AC">;
+def AISrc_1024_b32 : RegOrB32 <"AReg_1024", "OPERAND_REG_INLINE_AC">;
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index bec07d990380..4159dc694c1e 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -161,14 +161,12 @@ bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const {
return isInt<16>(Src.getImm()) &&
- !TII->isInlineConstant(*Src.getParent(),
- Src.getParent()->getOperandNo(&Src));
+ !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
}
bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const {
return isUInt<16>(Src.getImm()) &&
- !TII->isInlineConstant(*Src.getParent(),
- Src.getParent()->getOperandNo(&Src));
+ !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
}
bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,
@@ -310,7 +308,10 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
unsigned NextVgpr = 0;
bool IsUndef = true;
bool IsKill = NewAddrDwords == Info->VAddrDwords;
- for (unsigned Idx = 0; Idx < Info->VAddrOperands; ++Idx) {
+ const unsigned NSAMaxSize = ST->getNSAMaxSize();
+ const bool IsPartialNSA = NewAddrDwords > NSAMaxSize;
+ const unsigned EndVAddr = IsPartialNSA ? NSAMaxSize : Info->VAddrOperands;
+ for (unsigned Idx = 0; Idx < EndVAddr; ++Idx) {
const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx);
unsigned Vgpr = TRI->getHWRegIndex(Op.getReg());
unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32;
@@ -363,13 +364,13 @@ void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
MI.getOperand(VAddr0Idx).setIsKill(IsKill);
- for (int i = 1; i < Info->VAddrOperands; ++i)
+ for (unsigned i = 1; i < EndVAddr; ++i)
MI.removeOperand(VAddr0Idx + 1);
if (ToUntie >= 0) {
MI.tieOperands(
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
- ToUntie - (Info->VAddrOperands - 1));
+ ToUntie - (EndVAddr - 1));
}
}
@@ -475,7 +476,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
}
}
-/// Attempt to shink AND/OR/XOR operations requiring non-inlineable literals.
+/// Attempt to shrink AND/OR/XOR operations requiring non-inlineable literals.
/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
/// XNOR (as a ^ b == ~(a ^ ~b)).
@@ -497,7 +498,7 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
if (Opc == AMDGPU::S_AND_B32) {
if (isPowerOf2_32(~Imm)) {
- NewImm = countTrailingOnes(Imm);
+ NewImm = llvm::countr_one(Imm);
Opc = AMDGPU::S_BITSET0_B32;
} else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
NewImm = ~Imm;
@@ -505,7 +506,7 @@ bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
}
} else if (Opc == AMDGPU::S_OR_B32) {
if (isPowerOf2_32(Imm)) {
- NewImm = countTrailingZeros(Imm);
+ NewImm = llvm::countr_zero(Imm);
Opc = AMDGPU::S_BITSET1_B32;
} else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
NewImm = ~Imm;
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 4d6669f8f94d..3143d437e370 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -158,10 +158,11 @@ private:
MachinePostDominatorTree *PDT;
unsigned AndOpc;
+ unsigned AndTermOpc;
unsigned AndN2Opc;
unsigned XorOpc;
unsigned AndSaveExecOpc;
- unsigned OrSaveExecOpc;
+ unsigned AndSaveExecTermOpc;
unsigned WQMOpc;
Register Exec;
Register LiveMaskReg;
@@ -380,8 +381,8 @@ void SIWholeQuadMode::markDefs(const MachineInstr &UseMI, LiveRange &LR,
if (Reg.isVirtual()) {
// Iterate over all operands to find relevant definitions
bool HasDef = false;
- for (const MachineOperand &Op : MI->operands()) {
- if (!(Op.isReg() && Op.isDef() && Op.getReg() == Reg))
+ for (const MachineOperand &Op : MI->all_defs()) {
+ if (Op.getReg() != Reg)
continue;
// Compute lanes defined and overlap with use
@@ -453,14 +454,13 @@ void SIWholeQuadMode::markOperand(const MachineInstr &MI,
// Handle physical registers that we need to track; this is mostly relevant
// for VCC, which can appear as the (implicit) input of a uniform branch,
// e.g. when a loop counter is stored in a VGPR.
- for (MCRegUnitIterator RegUnit(Reg.asMCReg(), TRI); RegUnit.isValid();
- ++RegUnit) {
- LiveRange &LR = LIS->getRegUnit(*RegUnit);
+ for (MCRegUnit Unit : TRI->regunits(Reg.asMCReg())) {
+ LiveRange &LR = LIS->getRegUnit(Unit);
const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();
if (!Value)
continue;
- markDefs(MI, LR, *RegUnit, AMDGPU::NoSubRegister, Flag, Worklist);
+ markDefs(MI, LR, Unit, AMDGPU::NoSubRegister, Flag, Worklist);
}
}
}
@@ -471,11 +471,8 @@ void SIWholeQuadMode::markInstructionUses(const MachineInstr &MI, char Flag,
LLVM_DEBUG(dbgs() << "markInstructionUses " << PrintState(Flag) << ": "
<< MI);
- for (const MachineOperand &Use : MI.uses()) {
- if (!Use.isReg() || !Use.isUse())
- continue;
+ for (const MachineOperand &Use : MI.all_uses())
markOperand(MI, Use, Flag, Worklist);
- }
}
// Scan instructions to determine which ones require an Exact execmask and
@@ -1139,7 +1136,7 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
return PreferLast ? Last : First;
LiveRange &LR =
- LIS->getRegUnit(*MCRegUnitIterator(MCRegister::from(AMDGPU::SCC), TRI));
+ LIS->getRegUnit(*TRI->regunits(MCRegister::from(AMDGPU::SCC)).begin());
auto MBBE = MBB.end();
SlotIndex FirstIdx = First != MBBE ? LIS->getInstructionIndex(*First)
: LIS->getMBBEndIdx(&MBB);
@@ -1185,11 +1182,9 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
// does not need to be preserved.
while (MBBI != Last) {
bool IsExecDef = false;
- for (const MachineOperand &MO : MBBI->operands()) {
- if (MO.isReg() && MO.isDef()) {
- IsExecDef |=
- MO.getReg() == AMDGPU::EXEC_LO || MO.getReg() == AMDGPU::EXEC;
- }
+ for (const MachineOperand &MO : MBBI->all_defs()) {
+ IsExecDef |=
+ MO.getReg() == AMDGPU::EXEC_LO || MO.getReg() == AMDGPU::EXEC;
}
if (!IsExecDef)
break;
@@ -1206,13 +1201,25 @@ MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
MachineBasicBlock::iterator Before,
Register SaveWQM) {
+ bool IsTerminator = Before == MBB.end();
+ if (!IsTerminator) {
+ auto FirstTerm = MBB.getFirstTerminator();
+ if (FirstTerm != MBB.end()) {
+ SlotIndex FirstTermIdx = LIS->getInstructionIndex(*FirstTerm);
+ SlotIndex BeforeIdx = LIS->getInstructionIndex(*Before);
+ IsTerminator = BeforeIdx > FirstTermIdx;
+ }
+ }
+
MachineInstr *MI;
if (SaveWQM) {
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AndSaveExecOpc), SaveWQM)
+ unsigned Opcode = IsTerminator ? AndSaveExecTermOpc : AndSaveExecOpc;
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(Opcode), SaveWQM)
.addReg(LiveMaskReg);
} else {
- MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AndOpc), Exec)
+ unsigned Opcode = IsTerminator ? AndTermOpc : AndOpc;
+ MI = BuildMI(MBB, Before, DebugLoc(), TII->get(Opcode), Exec)
.addReg(Exec)
.addReg(LiveMaskReg);
}
@@ -1365,7 +1372,8 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, bool IsEntry) {
Needs = StateExact | StateWQM | StateStrict;
}
- if (MI.isTerminator() && OutNeeds == StateExact)
+ // Exact mode exit can occur in terminators, but must be before branches.
+ if (MI.isBranch() && OutNeeds == StateExact)
Needs = StateExact;
++Next;
@@ -1539,7 +1547,11 @@ void SIWholeQuadMode::lowerCopyInstrs() {
assert(MI->getNumExplicitOperands() == 2);
}
- MI->setDesc(TII->get(AMDGPU::COPY));
+ unsigned CopyOp = MI->getOperand(1).isReg()
+ ? (unsigned)AMDGPU::COPY
+ : TII->getMovOpcode(TRI->getRegClassForOperandReg(
+ *MRI, MI->getOperand(0)));
+ MI->setDesc(TII->get(CopyOp));
}
}
@@ -1587,18 +1599,20 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
if (ST->isWave32()) {
AndOpc = AMDGPU::S_AND_B32;
+ AndTermOpc = AMDGPU::S_AND_B32_term;
AndN2Opc = AMDGPU::S_ANDN2_B32;
XorOpc = AMDGPU::S_XOR_B32;
AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B32;
- OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B32;
+ AndSaveExecTermOpc = AMDGPU::S_AND_SAVEEXEC_B32_term;
WQMOpc = AMDGPU::S_WQM_B32;
Exec = AMDGPU::EXEC_LO;
} else {
AndOpc = AMDGPU::S_AND_B64;
+ AndTermOpc = AMDGPU::S_AND_B64_term;
AndN2Opc = AMDGPU::S_ANDN2_B64;
XorOpc = AMDGPU::S_XOR_B64;
AndSaveExecOpc = AMDGPU::S_AND_SAVEEXEC_B64;
- OrSaveExecOpc = AMDGPU::S_OR_SAVEEXEC_B64;
+ AndSaveExecTermOpc = AMDGPU::S_AND_SAVEEXEC_B64_term;
WQMOpc = AMDGPU::S_WQM_B64;
Exec = AMDGPU::EXEC;
}
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index f271f6d42857..7ca685a0cc5d 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -6,22 +6,12 @@
//
//===----------------------------------------------------------------------===//
-def smrd_offset_8 : NamedOperandU32<"SMRDOffset8",
- NamedMatchClass<"SMRDOffset8">> {
- let OperandType = "OPERAND_IMMEDIATE";
-}
-
-class SMEMOffset : NamedOperandU32<"SMEMOffset",
- NamedMatchClass<"SMEMOffset">> {
- let OperandType = "OPERAND_IMMEDIATE";
- let EncoderMethod = "getSMEMOffsetEncoding";
- let DecoderMethod = "decodeSMEMOffset";
-}
+def smrd_offset_8 : ImmOperand<i32, "SMRDOffset8", 1>;
-def smem_offset : SMEMOffset;
-
-def smem_offset_mod : SMEMOffset {
- let PrintMethod = "printSMEMOffsetMod";
+let EncoderMethod = "getSMEMOffsetEncoding",
+ DecoderMethod = "decodeSMEMOffset" in {
+def smem_offset : ImmOperand<i32, "SMEMOffset", 1>;
+def smem_offset_mod : NamedIntOperand<i32, "offset", "SMEMOffsetMod">;
}
//===----------------------------------------------------------------------===//
@@ -124,6 +114,7 @@ class SM_Load_Pseudo <string opName, RegisterClass baseClass,
" $sdst, $sbase, " # offsets.Asm # "$cpol", []> {
RegisterClass BaseClass = baseClass;
let mayLoad = 1;
+ let isReMaterializable = 1;
let mayStore = 0;
let has_glc = 1;
let has_dlc = 1;
@@ -138,7 +129,6 @@ class SM_Store_Pseudo <string opName, RegisterClass baseClass,
offsets.Ins, (ins CPol:$cpol)),
" $sdata, $sbase, " # offsets.Asm # "$cpol"> {
RegisterClass BaseClass = baseClass;
- RegisterClass SrcClass = srcClass;
let mayLoad = 0;
let mayStore = 1;
let has_glc = 1;
@@ -163,23 +153,24 @@ class SM_Discard_Pseudo <string opName, OffsetMode offsets>
let PseudoInstr = opName # offsets.Variant;
}
-multiclass SM_Pseudo_Loads<string opName,
- RegisterClass baseClass,
+multiclass SM_Pseudo_Loads<RegisterClass baseClass,
RegisterClass dstClass> {
+ defvar opName = !tolower(NAME);
def _IMM : SM_Load_Pseudo <opName, baseClass, dstClass, IMM_Offset>;
def _SGPR : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_Offset>;
def _SGPR_IMM : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_IMM_Offset>;
}
-multiclass SM_Pseudo_Stores<string opName,
- RegisterClass baseClass,
- RegisterClass srcClass> {
+multiclass SM_Pseudo_Stores<RegisterClass baseClass,
+ RegisterClass srcClass> {
+ defvar opName = !tolower(NAME);
def _IMM : SM_Store_Pseudo <opName, baseClass, srcClass, IMM_Offset>;
def _SGPR : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_Offset>;
def _SGPR_IMM : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_IMM_Offset>;
}
-multiclass SM_Pseudo_Discards<string opName> {
+multiclass SM_Pseudo_Discards {
+ defvar opName = !tolower(NAME);
def _IMM : SM_Discard_Pseudo <opName, IMM_Offset>;
def _SGPR : SM_Discard_Pseudo <opName, SGPR_Offset>;
def _SGPR_IMM : SM_Discard_Pseudo <opName, SGPR_IMM_Offset>;
@@ -204,7 +195,8 @@ class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_P
let has_sbase = 0;
}
-multiclass SM_Pseudo_Probe<string opName, RegisterClass baseClass> {
+multiclass SM_Pseudo_Probe<RegisterClass baseClass> {
+ defvar opName = !tolower(NAME);
def _IMM : SM_Probe_Pseudo <opName, baseClass, IMM_Offset>;
def _SGPR : SM_Probe_Pseudo <opName, baseClass, SGPR_Offset>;
def _SGPR_IMM : SM_Probe_Pseudo <opName, baseClass, SGPR_IMM_Offset>;
@@ -270,9 +262,9 @@ class SM_Pseudo_Atomic<string opName,
let DisableEncoding = !if(isRet, "$sdata", "");
}
-multiclass SM_Pseudo_Atomics<string opName,
- RegisterClass baseClass,
+multiclass SM_Pseudo_Atomics<RegisterClass baseClass,
RegisterClass dataClass> {
+ defvar opName = !tolower(NAME);
def _IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 0>;
def _SGPR : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 0>;
def _SGPR_IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 0>;
@@ -291,53 +283,31 @@ multiclass SM_Pseudo_Atomics<string opName,
// XXX - SMEM instructions do not allow exec for data operand, but
// does sdst for SMRD on SI/CI?
-defm S_LOAD_DWORD : SM_Pseudo_Loads <"s_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_load_dwordx2", SReg_64, SReg_64_XEXEC>;
-defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_load_dwordx4", SReg_64, SReg_128>;
-defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <"s_load_dwordx8", SReg_64, SReg_256>;
-defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <"s_load_dwordx16", SReg_64, SReg_512>;
+defm S_LOAD_DWORD : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>;
+defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_64, SReg_512>;
let is_buffer = 1 in {
-defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <
- "s_buffer_load_dword", SReg_128, SReg_32_XM0_XEXEC
->;
-
+defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
// SI/CI, bit disallowed for SMEM on VI.
-defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <
- "s_buffer_load_dwordx2", SReg_128, SReg_64_XEXEC
->;
-
-defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <
- "s_buffer_load_dwordx4", SReg_128, SReg_128
->;
-
-defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <
- "s_buffer_load_dwordx8", SReg_128, SReg_256
->;
-
-defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <
- "s_buffer_load_dwordx16", SReg_128, SReg_512
->;
+defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>;
+defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>;
+defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>;
}
let SubtargetPredicate = HasScalarStores in {
-defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>;
-defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
+defm S_STORE_DWORD : SM_Pseudo_Stores <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_64, SReg_64_XEXEC>;
+defm S_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>;
let is_buffer = 1 in {
-defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <
- "s_buffer_store_dword", SReg_128, SReg_32_XM0_XEXEC
->;
-
-defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <
- "s_buffer_store_dwordx2", SReg_128, SReg_64_XEXEC
->;
-
-defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
- "s_buffer_store_dwordx4", SReg_128, SReg_128
->;
+defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_128, SReg_128>;
}
} // End SubtargetPredicate = HasScalarStores
@@ -355,9 +325,9 @@ def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
} // End OtherPredicates = [HasScalarStores]
-defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>;
+defm S_ATC_PROBE : SM_Pseudo_Probe <SReg_64>;
let is_buffer = 1 in {
-defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>;
+defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <SReg_128>;
}
} // SubtargetPredicate = isGFX8Plus
@@ -371,80 +341,80 @@ def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", i
let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in {
-defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <"s_scratch_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_scratch_load_dwordx2", SReg_64, SReg_64_XEXEC>;
-defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_64, SReg_128>;
+defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>;
+defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_64, SReg_128>;
-defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores <"s_scratch_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <"s_scratch_store_dwordx2", SReg_64, SReg_64_XEXEC>;
-defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg_64, SReg_128>;
+defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_64, SReg_64_XEXEC>;
+defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>;
} // SubtargetPredicate = HasScalarFlatScratchInsts
let SubtargetPredicate = HasScalarAtomics in {
let is_buffer = 1 in {
-defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_buffer_atomic_swap", SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap", SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <"s_buffer_atomic_add", SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_SUB : SM_Pseudo_Atomics <"s_buffer_atomic_sub", SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_SMIN : SM_Pseudo_Atomics <"s_buffer_atomic_smin", SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_UMIN : SM_Pseudo_Atomics <"s_buffer_atomic_umin", SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_SMAX : SM_Pseudo_Atomics <"s_buffer_atomic_smax", SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_UMAX : SM_Pseudo_Atomics <"s_buffer_atomic_umax", SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_AND : SM_Pseudo_Atomics <"s_buffer_atomic_and", SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_OR : SM_Pseudo_Atomics <"s_buffer_atomic_or", SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_XOR : SM_Pseudo_Atomics <"s_buffer_atomic_xor", SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_INC : SM_Pseudo_Atomics <"s_buffer_atomic_inc", SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_DEC : SM_Pseudo_Atomics <"s_buffer_atomic_dec", SReg_128, SReg_32_XM0_XEXEC>;
-
-defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_swap_x2", SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap_x2", SReg_128, SReg_128>;
-defm S_BUFFER_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_add_x2", SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_sub_x2", SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_smin_x2", SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_umin_x2", SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_smax_x2", SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_umax_x2", SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_AND_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_and_x2", SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_or_x2", SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_xor_x2", SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_inc_x2", SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_dec_x2", SReg_128, SReg_64_XEXEC>;
-}
-
-defm S_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_atomic_swap", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_atomic_cmpswap", SReg_64, SReg_64_XEXEC>;
-defm S_ATOMIC_ADD : SM_Pseudo_Atomics <"s_atomic_add", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_ATOMIC_SUB : SM_Pseudo_Atomics <"s_atomic_sub", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_ATOMIC_SMIN : SM_Pseudo_Atomics <"s_atomic_smin", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_ATOMIC_UMIN : SM_Pseudo_Atomics <"s_atomic_umin", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_ATOMIC_SMAX : SM_Pseudo_Atomics <"s_atomic_smax", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_ATOMIC_UMAX : SM_Pseudo_Atomics <"s_atomic_umax", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_ATOMIC_AND : SM_Pseudo_Atomics <"s_atomic_and", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_ATOMIC_OR : SM_Pseudo_Atomics <"s_atomic_or", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_ATOMIC_XOR : SM_Pseudo_Atomics <"s_atomic_xor", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_ATOMIC_INC : SM_Pseudo_Atomics <"s_atomic_inc", SReg_64, SReg_32_XM0_XEXEC>;
-defm S_ATOMIC_DEC : SM_Pseudo_Atomics <"s_atomic_dec", SReg_64, SReg_32_XM0_XEXEC>;
-
-defm S_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <"s_atomic_swap_x2", SReg_64, SReg_64_XEXEC>;
-defm S_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <"s_atomic_cmpswap_x2", SReg_64, SReg_128>;
-defm S_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <"s_atomic_add_x2", SReg_64, SReg_64_XEXEC>;
-defm S_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <"s_atomic_sub_x2", SReg_64, SReg_64_XEXEC>;
-defm S_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <"s_atomic_smin_x2", SReg_64, SReg_64_XEXEC>;
-defm S_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <"s_atomic_umin_x2", SReg_64, SReg_64_XEXEC>;
-defm S_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <"s_atomic_smax_x2", SReg_64, SReg_64_XEXEC>;
-defm S_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <"s_atomic_umax_x2", SReg_64, SReg_64_XEXEC>;
-defm S_ATOMIC_AND_X2 : SM_Pseudo_Atomics <"s_atomic_and_x2", SReg_64, SReg_64_XEXEC>;
-defm S_ATOMIC_OR_X2 : SM_Pseudo_Atomics <"s_atomic_or_x2", SReg_64, SReg_64_XEXEC>;
-defm S_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <"s_atomic_xor_x2", SReg_64, SReg_64_XEXEC>;
-defm S_ATOMIC_INC_X2 : SM_Pseudo_Atomics <"s_atomic_inc_x2", SReg_64, SReg_64_XEXEC>;
-defm S_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_atomic_dec_x2", SReg_64, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_SUB : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_SMIN : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_UMIN : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_SMAX : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_UMAX : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_AND : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_OR : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_XOR : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_INC : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_DEC : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
+
+defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <SReg_128, SReg_128>;
+defm S_BUFFER_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_AND_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+}
+
+defm S_ATOMIC_SWAP : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
+defm S_ATOMIC_ADD : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_ATOMIC_SUB : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_ATOMIC_SMIN : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_ATOMIC_UMIN : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_ATOMIC_SMAX : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_ATOMIC_UMAX : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_ATOMIC_AND : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_ATOMIC_OR : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_ATOMIC_XOR : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_ATOMIC_INC : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
+defm S_ATOMIC_DEC : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
+
+defm S_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
+defm S_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <SReg_64, SReg_128>;
+defm S_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
+defm S_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
+defm S_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
+defm S_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
+defm S_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
+defm S_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
+defm S_ATOMIC_AND_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
+defm S_ATOMIC_OR_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
+defm S_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
+defm S_ATOMIC_INC_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
+defm S_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>;
} // let SubtargetPredicate = HasScalarAtomics
let SubtargetPredicate = HasScalarAtomics in {
-defm S_DCACHE_DISCARD : SM_Pseudo_Discards <"s_dcache_discard">;
-defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">;
+defm S_DCACHE_DISCARD : SM_Pseudo_Discards;
+defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards;
}
//===----------------------------------------------------------------------===//
@@ -471,30 +441,27 @@ class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
let Inst{31-27} = 0x18; //encoding
}
-multiclass SM_Real_Loads_si<bits<5> op, string ps,
- SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
- SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
-
+multiclass SM_Real_Loads_si<bits<5> op> {
+ defvar ps = NAME;
+ defvar immPs = !cast<SM_Load_Pseudo>(ps#_IMM);
def _IMM_si : SMRD_Real_si <op, immPs> {
let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol);
}
- def _SGPR_si : SMRD_Real_si <op, sgprPs> {
- let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol);
- }
-
+ defvar sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR);
+ def _SGPR_si : SMRD_Real_si <op, sgprPs>;
}
-defm S_LOAD_DWORD : SM_Real_Loads_si <0x00, "S_LOAD_DWORD">;
-defm S_LOAD_DWORDX2 : SM_Real_Loads_si <0x01, "S_LOAD_DWORDX2">;
-defm S_LOAD_DWORDX4 : SM_Real_Loads_si <0x02, "S_LOAD_DWORDX4">;
-defm S_LOAD_DWORDX8 : SM_Real_Loads_si <0x03, "S_LOAD_DWORDX8">;
-defm S_LOAD_DWORDX16 : SM_Real_Loads_si <0x04, "S_LOAD_DWORDX16">;
-defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_si <0x08, "S_BUFFER_LOAD_DWORD">;
-defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_si <0x09, "S_BUFFER_LOAD_DWORDX2">;
-defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_si <0x0a, "S_BUFFER_LOAD_DWORDX4">;
-defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_si <0x0b, "S_BUFFER_LOAD_DWORDX8">;
-defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c, "S_BUFFER_LOAD_DWORDX16">;
+defm S_LOAD_DWORD : SM_Real_Loads_si <0x00>;
+defm S_LOAD_DWORDX2 : SM_Real_Loads_si <0x01>;
+defm S_LOAD_DWORDX4 : SM_Real_Loads_si <0x02>;
+defm S_LOAD_DWORDX8 : SM_Real_Loads_si <0x03>;
+defm S_LOAD_DWORDX16 : SM_Real_Loads_si <0x04>;
+defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_si <0x08>;
+defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_si <0x09>;
+defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_si <0x0a>;
+defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_si <0x0b>;
+defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c>;
def S_MEMTIME_si : SMRD_Real_si <0x1e, S_MEMTIME>;
def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>;
@@ -548,11 +515,8 @@ class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
soffset{6-0}, ?);
}
-class SMEM_Real_Load_vi<bits<8> op, string ps, OffsetMode offsets>
- : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps # offsets.Variant)> {
- RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
- let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
-}
+class SMEM_Real_Load_vi<bits<8> op, string ps>
+ : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps)>;
// The alternative GFX9 SGPR encoding using soffset to encode the
// offset register. Not available in assembler and goes to the GFX9
@@ -565,13 +529,14 @@ class SMEM_Real_SGPR_alt_gfx9 {
string AsmVariantName = "NonParsable";
}
-multiclass SM_Real_Loads_vi<bits<8> op, string ps> {
- def _IMM_vi : SMEM_Real_Load_vi <op, ps, IMM_Offset>;
- def _SGPR_vi : SMEM_Real_Load_vi <op, ps, SGPR_Offset>;
- def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps, SGPR_Offset>,
+multiclass SM_Real_Loads_vi<bits<8> op> {
+ defvar ps = NAME;
+ def _IMM_vi : SMEM_Real_Load_vi <op, ps#"_IMM">;
+ def _SGPR_vi : SMEM_Real_Load_vi <op, ps#"_SGPR">;
+ def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR">,
SMEM_Real_SGPR_alt_gfx9;
let IsGFX9SpecificEncoding = true in
- def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi <op, ps, SGPR_IMM_Offset>;
+ def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR_IMM">;
}
class SMEM_Real_Store_Base_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> {
@@ -582,24 +547,21 @@ class SMEM_Real_Store_Base_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps>
let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
}
-class SMEM_Real_Store_vi <bits<8> op, string ps, OffsetMode offsets>
- : SMEM_Real_Store_Base_vi <op, !cast<SM_Pseudo>(ps # offsets.Variant)> {
- RegisterClass SrcClass = !cast<SM_Store_Pseudo>(ps # offsets.Variant).SrcClass;
- RegisterClass BaseClass = !cast<SM_Store_Pseudo>(ps # offsets.Variant).BaseClass;
- let InOperandList = !con((ins SrcClass:$sdata, BaseClass:$sbase),
- offsets.Ins, (ins CPol:$cpol));
-}
+class SMEM_Real_Store_vi <bits<8> op, string ps>
+ : SMEM_Real_Store_Base_vi <op, !cast<SM_Pseudo>(ps)>;
-multiclass SM_Real_Stores_vi<bits<8> op, string ps> {
- def _IMM_vi : SMEM_Real_Store_vi <op, ps, IMM_Offset>;
- def _SGPR_vi : SMEM_Real_Store_vi <op, ps, SGPR_Offset>;
- def _SGPR_alt_gfx9 : SMEM_Real_Store_vi <op, ps, SGPR_Offset>,
+multiclass SM_Real_Stores_vi<bits<8> op> {
+ defvar ps = NAME;
+ def _IMM_vi : SMEM_Real_Store_vi <op, ps#"_IMM">;
+ def _SGPR_vi : SMEM_Real_Store_vi <op, ps#"_SGPR">;
+ def _SGPR_alt_gfx9 : SMEM_Real_Store_vi <op, ps#"_SGPR">,
SMEM_Real_SGPR_alt_gfx9;
let IsGFX9SpecificEncoding = true in
- def _SGPR_IMM_gfx9 : SMEM_Real_Store_vi <op, ps, SGPR_IMM_Offset>;
+ def _SGPR_IMM_gfx9 : SMEM_Real_Store_vi <op, ps#"_SGPR_IMM">;
}
-multiclass SM_Real_Probe_vi<bits<8> op, string ps> {
+multiclass SM_Real_Probe_vi<bits<8> op> {
+ defvar ps = NAME;
def _IMM_vi : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
def _SGPR_vi : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
def _SGPR_alt_gfx9
@@ -610,24 +572,24 @@ multiclass SM_Real_Probe_vi<bits<8> op, string ps> {
: SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
}
-defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">;
-defm S_LOAD_DWORDX2 : SM_Real_Loads_vi <0x01, "S_LOAD_DWORDX2">;
-defm S_LOAD_DWORDX4 : SM_Real_Loads_vi <0x02, "S_LOAD_DWORDX4">;
-defm S_LOAD_DWORDX8 : SM_Real_Loads_vi <0x03, "S_LOAD_DWORDX8">;
-defm S_LOAD_DWORDX16 : SM_Real_Loads_vi <0x04, "S_LOAD_DWORDX16">;
-defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_vi <0x08, "S_BUFFER_LOAD_DWORD">;
-defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_vi <0x09, "S_BUFFER_LOAD_DWORDX2">;
-defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_vi <0x0a, "S_BUFFER_LOAD_DWORDX4">;
-defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b, "S_BUFFER_LOAD_DWORDX8">;
-defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c, "S_BUFFER_LOAD_DWORDX16">;
-
-defm S_STORE_DWORD : SM_Real_Stores_vi <0x10, "S_STORE_DWORD">;
-defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11, "S_STORE_DWORDX2">;
-defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12, "S_STORE_DWORDX4">;
-
-defm S_BUFFER_STORE_DWORD : SM_Real_Stores_vi <0x18, "S_BUFFER_STORE_DWORD">;
-defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_vi <0x19, "S_BUFFER_STORE_DWORDX2">;
-defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_vi <0x1a, "S_BUFFER_STORE_DWORDX4">;
+defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00>;
+defm S_LOAD_DWORDX2 : SM_Real_Loads_vi <0x01>;
+defm S_LOAD_DWORDX4 : SM_Real_Loads_vi <0x02>;
+defm S_LOAD_DWORDX8 : SM_Real_Loads_vi <0x03>;
+defm S_LOAD_DWORDX16 : SM_Real_Loads_vi <0x04>;
+defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_vi <0x08>;
+defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_vi <0x09>;
+defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_vi <0x0a>;
+defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b>;
+defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c>;
+
+defm S_STORE_DWORD : SM_Real_Stores_vi <0x10>;
+defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11>;
+defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12>;
+
+defm S_BUFFER_STORE_DWORD : SM_Real_Stores_vi <0x18>;
+defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_vi <0x19>;
+defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_vi <0x1a>;
// These instructions use same encoding
def S_DCACHE_INV_vi : SMEM_Real_vi <0x20, S_DCACHE_INV>;
@@ -637,16 +599,16 @@ def S_DCACHE_WB_VOL_vi : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>;
def S_MEMTIME_vi : SMEM_Real_vi <0x24, S_MEMTIME>;
def S_MEMREALTIME_vi : SMEM_Real_vi <0x25, S_MEMREALTIME>;
-defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_vi <0x05, "S_SCRATCH_LOAD_DWORD">;
-defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_vi <0x06, "S_SCRATCH_LOAD_DWORDX2">;
-defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_vi <0x07, "S_SCRATCH_LOAD_DWORDX4">;
+defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_vi <0x05>;
+defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_vi <0x06>;
+defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_vi <0x07>;
-defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_vi <0x15, "S_SCRATCH_STORE_DWORD">;
-defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_vi <0x16, "S_SCRATCH_STORE_DWORDX2">;
-defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_vi <0x17, "S_SCRATCH_STORE_DWORDX4">;
+defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_vi <0x15>;
+defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_vi <0x16>;
+defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_vi <0x17>;
-defm S_ATC_PROBE : SM_Real_Probe_vi <0x26, "S_ATC_PROBE">;
-defm S_ATC_PROBE_BUFFER : SM_Real_Probe_vi <0x27, "S_ATC_PROBE_BUFFER">;
+defm S_ATC_PROBE : SM_Real_Probe_vi <0x26>;
+defm S_ATC_PROBE_BUFFER : SM_Real_Probe_vi <0x27>;
//===----------------------------------------------------------------------===//
// GFX9
@@ -665,7 +627,8 @@ class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps>
let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
}
-multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
+multiclass SM_Real_Atomics_vi<bits<8> op> {
+ defvar ps = NAME;
def _IMM_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
def _SGPR_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
def _SGPR_alt_gfx9
@@ -684,63 +647,64 @@ multiclass SM_Real_Atomics_vi<bits<8> op, string ps> {
: SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>;
}
-defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40, "S_BUFFER_ATOMIC_SWAP">;
-defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
-defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_vi <0x42, "S_BUFFER_ATOMIC_ADD">;
-defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_vi <0x43, "S_BUFFER_ATOMIC_SUB">;
-defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_vi <0x44, "S_BUFFER_ATOMIC_SMIN">;
-defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_vi <0x45, "S_BUFFER_ATOMIC_UMIN">;
-defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_vi <0x46, "S_BUFFER_ATOMIC_SMAX">;
-defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_vi <0x47, "S_BUFFER_ATOMIC_UMAX">;
-defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_vi <0x48, "S_BUFFER_ATOMIC_AND">;
-defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_vi <0x49, "S_BUFFER_ATOMIC_OR">;
-defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_vi <0x4a, "S_BUFFER_ATOMIC_XOR">;
-defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_vi <0x4b, "S_BUFFER_ATOMIC_INC">;
-defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_vi <0x4c, "S_BUFFER_ATOMIC_DEC">;
-
-defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
-defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
-defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
-defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
-defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
-defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
-defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
-defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
-defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0x68, "S_BUFFER_ATOMIC_AND_X2">;
-defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0x69, "S_BUFFER_ATOMIC_OR_X2">;
-defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
-defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
-defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
-
-defm S_ATOMIC_SWAP : SM_Real_Atomics_vi <0x80, "S_ATOMIC_SWAP">;
-defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x81, "S_ATOMIC_CMPSWAP">;
-defm S_ATOMIC_ADD : SM_Real_Atomics_vi <0x82, "S_ATOMIC_ADD">;
-defm S_ATOMIC_SUB : SM_Real_Atomics_vi <0x83, "S_ATOMIC_SUB">;
-defm S_ATOMIC_SMIN : SM_Real_Atomics_vi <0x84, "S_ATOMIC_SMIN">;
-defm S_ATOMIC_UMIN : SM_Real_Atomics_vi <0x85, "S_ATOMIC_UMIN">;
-defm S_ATOMIC_SMAX : SM_Real_Atomics_vi <0x86, "S_ATOMIC_SMAX">;
-defm S_ATOMIC_UMAX : SM_Real_Atomics_vi <0x87, "S_ATOMIC_UMAX">;
-defm S_ATOMIC_AND : SM_Real_Atomics_vi <0x88, "S_ATOMIC_AND">;
-defm S_ATOMIC_OR : SM_Real_Atomics_vi <0x89, "S_ATOMIC_OR">;
-defm S_ATOMIC_XOR : SM_Real_Atomics_vi <0x8a, "S_ATOMIC_XOR">;
-defm S_ATOMIC_INC : SM_Real_Atomics_vi <0x8b, "S_ATOMIC_INC">;
-defm S_ATOMIC_DEC : SM_Real_Atomics_vi <0x8c, "S_ATOMIC_DEC">;
-
-defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0xa0, "S_ATOMIC_SWAP_X2">;
-defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0xa1, "S_ATOMIC_CMPSWAP_X2">;
-defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0xa2, "S_ATOMIC_ADD_X2">;
-defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0xa3, "S_ATOMIC_SUB_X2">;
-defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0xa4, "S_ATOMIC_SMIN_X2">;
-defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0xa5, "S_ATOMIC_UMIN_X2">;
-defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0xa6, "S_ATOMIC_SMAX_X2">;
-defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0xa7, "S_ATOMIC_UMAX_X2">;
-defm S_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0xa8, "S_ATOMIC_AND_X2">;
-defm S_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0xa9, "S_ATOMIC_OR_X2">;
-defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0xaa, "S_ATOMIC_XOR_X2">;
-defm S_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0xab, "S_ATOMIC_INC_X2">;
-defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0xac, "S_ATOMIC_DEC_X2">;
-
-multiclass SM_Real_Discard_vi<bits<8> op, string ps> {
+defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40>;
+defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x41>;
+defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_vi <0x42>;
+defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_vi <0x43>;
+defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_vi <0x44>;
+defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_vi <0x45>;
+defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_vi <0x46>;
+defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_vi <0x47>;
+defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_vi <0x48>;
+defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_vi <0x49>;
+defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_vi <0x4a>;
+defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_vi <0x4b>;
+defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_vi <0x4c>;
+
+defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0x60>;
+defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0x61>;
+defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0x62>;
+defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0x63>;
+defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0x64>;
+defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0x65>;
+defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0x66>;
+defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0x67>;
+defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0x68>;
+defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0x69>;
+defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0x6a>;
+defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0x6b>;
+defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0x6c>;
+
+defm S_ATOMIC_SWAP : SM_Real_Atomics_vi <0x80>;
+defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x81>;
+defm S_ATOMIC_ADD : SM_Real_Atomics_vi <0x82>;
+defm S_ATOMIC_SUB : SM_Real_Atomics_vi <0x83>;
+defm S_ATOMIC_SMIN : SM_Real_Atomics_vi <0x84>;
+defm S_ATOMIC_UMIN : SM_Real_Atomics_vi <0x85>;
+defm S_ATOMIC_SMAX : SM_Real_Atomics_vi <0x86>;
+defm S_ATOMIC_UMAX : SM_Real_Atomics_vi <0x87>;
+defm S_ATOMIC_AND : SM_Real_Atomics_vi <0x88>;
+defm S_ATOMIC_OR : SM_Real_Atomics_vi <0x89>;
+defm S_ATOMIC_XOR : SM_Real_Atomics_vi <0x8a>;
+defm S_ATOMIC_INC : SM_Real_Atomics_vi <0x8b>;
+defm S_ATOMIC_DEC : SM_Real_Atomics_vi <0x8c>;
+
+defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0xa0>;
+defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0xa1>;
+defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0xa2>;
+defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0xa3>;
+defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0xa4>;
+defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0xa5>;
+defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0xa6>;
+defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0xa7>;
+defm S_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0xa8>;
+defm S_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0xa9>;
+defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0xaa>;
+defm S_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0xab>;
+defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0xac>;
+
+multiclass SM_Real_Discard_vi<bits<8> op> {
+ defvar ps = NAME;
def _IMM_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_IMM)>;
def _SGPR_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>;
def _SGPR_alt_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>,
@@ -749,17 +713,14 @@ multiclass SM_Real_Discard_vi<bits<8> op, string ps> {
def _SGPR_IMM_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR_IMM)>;
}
-defm S_DCACHE_DISCARD : SM_Real_Discard_vi <0x28, "S_DCACHE_DISCARD">;
-defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_vi <0x29, "S_DCACHE_DISCARD_X2">;
+defm S_DCACHE_DISCARD : SM_Real_Discard_vi <0x28>;
+defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_vi <0x29>;
//===----------------------------------------------------------------------===//
// CI
//===----------------------------------------------------------------------===//
-def smrd_literal_offset : NamedOperandU32<"SMRDLiteralOffset",
- NamedMatchClass<"SMRDLiteralOffset">> {
- let OperandType = "OPERAND_IMMEDIATE";
-}
+def smrd_literal_offset : ImmOperand<i32, "SMRDLiteralOffset">;
class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
SM_Real<ps>,
@@ -854,8 +815,14 @@ multiclass SMRD_Pattern <string Instr, ValueType vt> {
// 3. SGPR offset
def : GCNPat <
(smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))
- >;
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))> {
+ let OtherPredicates = [isNotGFX9Plus];
+ }
+ def : GCNPat <
+ (smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))> {
+ let OtherPredicates = [isGFX9Plus];
+ }
// 4. SGPR+IMM offset
def : GCNPat <
@@ -891,8 +858,14 @@ multiclass SMLoad_Pattern <string Instr, ValueType vt> {
// 3. Offset loaded in an 32bit SGPR
def : GCNPat <
(SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy),
- (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$soffset, (extract_cpol $cachepolicy)))
- >;
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$soffset, (extract_cpol $cachepolicy)))> {
+ let OtherPredicates = [isNotGFX9Plus];
+ }
+ def : GCNPat <
+ (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy),
+ (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, 0, (extract_cpol $cachepolicy)))> {
+ let OtherPredicates = [isGFX9Plus];
+ }
// 4. Offset as an 32-bit SGPR + immediate
def : GCNPat <
@@ -929,6 +902,8 @@ foreach vt = SReg_512.RegTypes in {
defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>;
}
+} // End let AddedComplexity = 100
+
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>;
@@ -940,7 +915,6 @@ defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2f32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4f32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>;
defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>;
-} // End let AddedComplexity = 100
let OtherPredicates = [HasSMemTimeInst] in {
def : GCNPat <
@@ -987,16 +961,14 @@ class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps>
let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?);
}
-class SMEM_Real_Load_gfx10<bits<8> op, string ps, OffsetMode offsets>
- : SMEM_Real_gfx10<op, !cast<SM_Pseudo>(ps # offsets.Variant)> {
- RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
- let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
-}
+class SMEM_Real_Load_gfx10<bits<8> op, string ps>
+ : SMEM_Real_gfx10<op, !cast<SM_Pseudo>(ps)>;
-multiclass SM_Real_Loads_gfx10<bits<8> op, string ps> {
- def _IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps, IMM_Offset>;
- def _SGPR_gfx10 : SMEM_Real_Load_gfx10<op, ps, SGPR_Offset>;
- def _SGPR_IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps, SGPR_IMM_Offset>;
+multiclass SM_Real_Loads_gfx10<bits<8> op> {
+ defvar ps = NAME;
+ def _IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_IMM">;
+ def _SGPR_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_SGPR">;
+ def _SGPR_IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_SGPR_IMM">;
}
class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> {
@@ -1006,53 +978,48 @@ class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps>
let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
}
-multiclass SM_Real_Stores_gfx10<bits<8> op, string ps,
- SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
- SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
- def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> {
- let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol);
- }
+multiclass SM_Real_Stores_gfx10<bits<8> op> {
+ defvar ps = NAME;
+ defvar immPs = !cast<SM_Store_Pseudo>(ps#_IMM);
+ def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs>;
- def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> {
- let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol);
- }
+ defvar sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR);
+ def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs>;
- def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Store_Pseudo>(ps#_SGPR_IMM)> {
- let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase,
- SReg_32:$soffset, smem_offset_mod:$offset, CPol:$cpol);
- }
+ defvar sgprImmPs = !cast<SM_Store_Pseudo>(ps#_SGPR_IMM);
+ def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, sgprImmPs>;
}
-defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">;
-defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001, "S_LOAD_DWORDX2">;
-defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002, "S_LOAD_DWORDX4">;
-defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003, "S_LOAD_DWORDX8">;
-defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004, "S_LOAD_DWORDX16">;
+defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000>;
+defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001>;
+defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002>;
+defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003>;
+defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004>;
let SubtargetPredicate = HasScalarFlatScratchInsts in {
-defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005, "S_SCRATCH_LOAD_DWORD">;
-defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006, "S_SCRATCH_LOAD_DWORDX2">;
-defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007, "S_SCRATCH_LOAD_DWORDX4">;
+defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005>;
+defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006>;
+defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007>;
} // End SubtargetPredicate = HasScalarFlatScratchInsts
-defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008, "S_BUFFER_LOAD_DWORD">;
-defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009, "S_BUFFER_LOAD_DWORDX2">;
-defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a, "S_BUFFER_LOAD_DWORDX4">;
-defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b, "S_BUFFER_LOAD_DWORDX8">;
-defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c, "S_BUFFER_LOAD_DWORDX16">;
+defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008>;
+defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009>;
+defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a>;
+defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b>;
+defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c>;
let SubtargetPredicate = HasScalarStores in {
-defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010, "S_STORE_DWORD">;
-defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011, "S_STORE_DWORDX2">;
-defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012, "S_STORE_DWORDX4">;
+defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010>;
+defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011>;
+defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012>;
let OtherPredicates = [HasScalarFlatScratchInsts] in {
-defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015, "S_SCRATCH_STORE_DWORD">;
-defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016, "S_SCRATCH_STORE_DWORDX2">;
-defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017, "S_SCRATCH_STORE_DWORDX4">;
+defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015>;
+defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016>;
+defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017>;
} // End OtherPredicates = [HasScalarFlatScratchInsts]
-defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018, "S_BUFFER_STORE_DWORD">;
-defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019, "S_BUFFER_STORE_DWORDX2">;
-defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a, "S_BUFFER_STORE_DWORDX4">;
+defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018>;
+defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019>;
+defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a>;
} // End SubtargetPredicate = HasScalarStores
def S_MEMREALTIME_gfx10 : SMEM_Real_gfx10<0x025, S_MEMREALTIME>;
@@ -1065,15 +1032,16 @@ let SubtargetPredicate = HasScalarStores in {
def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>;
} // End SubtargetPredicate = HasScalarStores
-multiclass SM_Real_Probe_gfx10<bits<8> op, string ps> {
+multiclass SM_Real_Probe_gfx10<bits<8> op> {
+ defvar ps = NAME;
def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
def _SGPR_IMM_gfx10
: SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>;
}
-defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">;
-defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27, "S_ATC_PROBE_BUFFER">;
+defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26>;
+defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27>;
class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
: SMEM_Real_gfx10 <op, ps>,
@@ -1090,7 +1058,8 @@ class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps>
let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0});
}
-multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
+multiclass SM_Real_Atomics_gfx10<bits<8> op> {
+ defvar ps = NAME;
def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>;
def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>;
def _SGPR_IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>;
@@ -1101,70 +1070,71 @@ multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> {
let SubtargetPredicate = HasScalarAtomics in {
-defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40, "S_BUFFER_ATOMIC_SWAP">;
-defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41, "S_BUFFER_ATOMIC_CMPSWAP">;
-defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42, "S_BUFFER_ATOMIC_ADD">;
-defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43, "S_BUFFER_ATOMIC_SUB">;
-defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44, "S_BUFFER_ATOMIC_SMIN">;
-defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45, "S_BUFFER_ATOMIC_UMIN">;
-defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46, "S_BUFFER_ATOMIC_SMAX">;
-defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47, "S_BUFFER_ATOMIC_UMAX">;
-defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48, "S_BUFFER_ATOMIC_AND">;
-defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49, "S_BUFFER_ATOMIC_OR">;
-defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a, "S_BUFFER_ATOMIC_XOR">;
-defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b, "S_BUFFER_ATOMIC_INC">;
-defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c, "S_BUFFER_ATOMIC_DEC">;
-
-defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60, "S_BUFFER_ATOMIC_SWAP_X2">;
-defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">;
-defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62, "S_BUFFER_ATOMIC_ADD_X2">;
-defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63, "S_BUFFER_ATOMIC_SUB_X2">;
-defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64, "S_BUFFER_ATOMIC_SMIN_X2">;
-defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65, "S_BUFFER_ATOMIC_UMIN_X2">;
-defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66, "S_BUFFER_ATOMIC_SMAX_X2">;
-defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67, "S_BUFFER_ATOMIC_UMAX_X2">;
-defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68, "S_BUFFER_ATOMIC_AND_X2">;
-defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69, "S_BUFFER_ATOMIC_OR_X2">;
-defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a, "S_BUFFER_ATOMIC_XOR_X2">;
-defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b, "S_BUFFER_ATOMIC_INC_X2">;
-defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c, "S_BUFFER_ATOMIC_DEC_X2">;
-
-defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80, "S_ATOMIC_SWAP">;
-defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81, "S_ATOMIC_CMPSWAP">;
-defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82, "S_ATOMIC_ADD">;
-defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83, "S_ATOMIC_SUB">;
-defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84, "S_ATOMIC_SMIN">;
-defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85, "S_ATOMIC_UMIN">;
-defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86, "S_ATOMIC_SMAX">;
-defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87, "S_ATOMIC_UMAX">;
-defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88, "S_ATOMIC_AND">;
-defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89, "S_ATOMIC_OR">;
-defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a, "S_ATOMIC_XOR">;
-defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b, "S_ATOMIC_INC">;
-defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c, "S_ATOMIC_DEC">;
-
-defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0, "S_ATOMIC_SWAP_X2">;
-defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1, "S_ATOMIC_CMPSWAP_X2">;
-defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2, "S_ATOMIC_ADD_X2">;
-defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3, "S_ATOMIC_SUB_X2">;
-defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4, "S_ATOMIC_SMIN_X2">;
-defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5, "S_ATOMIC_UMIN_X2">;
-defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6, "S_ATOMIC_SMAX_X2">;
-defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7, "S_ATOMIC_UMAX_X2">;
-defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8, "S_ATOMIC_AND_X2">;
-defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9, "S_ATOMIC_OR_X2">;
-defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa, "S_ATOMIC_XOR_X2">;
-defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab, "S_ATOMIC_INC_X2">;
-defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X2">;
-
-multiclass SM_Real_Discard_gfx10<bits<8> op, string ps> {
+defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40>;
+defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41>;
+defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42>;
+defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43>;
+defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44>;
+defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45>;
+defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46>;
+defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47>;
+defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48>;
+defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49>;
+defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a>;
+defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b>;
+defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c>;
+
+defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60>;
+defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61>;
+defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62>;
+defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63>;
+defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64>;
+defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65>;
+defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66>;
+defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67>;
+defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68>;
+defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69>;
+defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a>;
+defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b>;
+defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c>;
+
+defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80>;
+defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81>;
+defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82>;
+defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83>;
+defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84>;
+defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85>;
+defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86>;
+defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87>;
+defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88>;
+defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89>;
+defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a>;
+defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b>;
+defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c>;
+
+defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0>;
+defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1>;
+defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2>;
+defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3>;
+defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4>;
+defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5>;
+defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6>;
+defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7>;
+defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8>;
+defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9>;
+defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa>;
+defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab>;
+defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac>;
+
+multiclass SM_Real_Discard_gfx10<bits<8> op> {
+ defvar ps = NAME;
def _IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>;
def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>;
def _SGPR_IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>;
}
-defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">;
-defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">;
+defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28>;
+defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29>;
} // End SubtargetPredicate = HasScalarAtomics
@@ -1190,31 +1160,29 @@ class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> :
let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0);
}
-class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName, OffsetMode offsets> :
- SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> {
- RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass;
- let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol));
-}
+class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName> :
+ SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps), opName>;
-multiclass SM_Real_Loads_gfx11<bits<8> op, string ps, string opName> {
- def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, IMM_Offset>;
- def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, SGPR_Offset>;
- def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, SGPR_IMM_Offset>;
+multiclass SM_Real_Loads_gfx11<bits<8> op, string ps> {
+ defvar opName = !tolower(NAME);
+ def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_IMM", opName>;
+ def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR", opName>;
+ def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR_IMM", opName>;
def : MnemonicAlias<!cast<SM_Pseudo>(ps#"_IMM").Mnemonic, opName>,
Requires<[isGFX11Plus]>;
}
-defm S_LOAD_B32 : SM_Real_Loads_gfx11<0x000, "S_LOAD_DWORD", "s_load_b32">;
-defm S_LOAD_B64 : SM_Real_Loads_gfx11<0x001, "S_LOAD_DWORDX2", "s_load_b64">;
-defm S_LOAD_B128 : SM_Real_Loads_gfx11<0x002, "S_LOAD_DWORDX4", "s_load_b128">;
-defm S_LOAD_B256 : SM_Real_Loads_gfx11<0x003, "S_LOAD_DWORDX8", "s_load_b256">;
-defm S_LOAD_B512 : SM_Real_Loads_gfx11<0x004, "S_LOAD_DWORDX16", "s_load_b512">;
+defm S_LOAD_B32 : SM_Real_Loads_gfx11<0x000, "S_LOAD_DWORD">;
+defm S_LOAD_B64 : SM_Real_Loads_gfx11<0x001, "S_LOAD_DWORDX2">;
+defm S_LOAD_B128 : SM_Real_Loads_gfx11<0x002, "S_LOAD_DWORDX4">;
+defm S_LOAD_B256 : SM_Real_Loads_gfx11<0x003, "S_LOAD_DWORDX8">;
+defm S_LOAD_B512 : SM_Real_Loads_gfx11<0x004, "S_LOAD_DWORDX16">;
-defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx11<0x008, "S_BUFFER_LOAD_DWORD", "s_buffer_load_b32">;
-defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx11<0x009, "S_BUFFER_LOAD_DWORDX2", "s_buffer_load_b64">;
-defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx11<0x00a, "S_BUFFER_LOAD_DWORDX4", "s_buffer_load_b128">;
-defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx11<0x00b, "S_BUFFER_LOAD_DWORDX8", "s_buffer_load_b256">;
-defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx11<0x00c, "S_BUFFER_LOAD_DWORDX16", "s_buffer_load_b512">;
+defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx11<0x008, "S_BUFFER_LOAD_DWORD">;
+defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx11<0x009, "S_BUFFER_LOAD_DWORDX2">;
+defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx11<0x00a, "S_BUFFER_LOAD_DWORDX4">;
+defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx11<0x00b, "S_BUFFER_LOAD_DWORDX8">;
+defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx11<0x00c, "S_BUFFER_LOAD_DWORDX16">;
def S_GL1_INV_gfx11 : SMEM_Real_gfx11<0x020, S_GL1_INV>;
def S_DCACHE_INV_gfx11 : SMEM_Real_gfx11<0x021, S_DCACHE_INV>;
@@ -1227,12 +1195,13 @@ class SMEM_Real_Store_gfx11 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx11<op, ps>
let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?);
}
-multiclass SM_Real_Probe_gfx11<bits<8> op, string ps> {
+multiclass SM_Real_Probe_gfx11<bits<8> op> {
+ defvar ps = NAME;
def _IMM_gfx11 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>;
def _SGPR_gfx11 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>;
def _SGPR_IMM_gfx11
: SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>;
}
-defm S_ATC_PROBE : SM_Real_Probe_gfx11 <0x22, "S_ATC_PROBE">;
-defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23, "S_ATC_PROBE_BUFFER">;
+defm S_ATC_PROBE : SM_Real_Probe_gfx11 <0x22>;
+defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23>;
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index ad9af662307f..bee996d1b0df 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -6,18 +6,7 @@
//
//===----------------------------------------------------------------------===//
-def GPRIdxModeMatchClass : AsmOperandClass {
- let Name = "GPRIdxMode";
- let PredicateMethod = "isGPRIdxMode";
- let ParserMethod = "parseGPRIdxMode";
- let RenderMethod = "addImmOperands";
-}
-
-def GPRIdxMode : Operand<i32> {
- let PrintMethod = "printVGPRIndexMode";
- let ParserMatchClass = GPRIdxModeMatchClass;
- let OperandType = "OPERAND_IMMEDIATE";
-}
+def GPRIdxMode : CustomOperand<i32>;
class SOP_Pseudo<string opName, dag outs, dag ins, string asmOps,
list<dag> pattern=[]> :
@@ -402,11 +391,11 @@ let SubtargetPredicate = isGFX11Plus in {
// For s_sendmsg_rtn_* the src0 field encodes the message type directly; it
// is not an SGPR number.
def S_SENDMSG_RTN_B32 : SOP1_Pseudo<
- "s_sendmsg_rtn_b32", (outs SReg_32:$sdst), (ins SendMsgImm:$src0),
+ "s_sendmsg_rtn_b32", (outs SReg_32:$sdst), (ins SendMsg:$src0),
"$sdst, $src0", [(set i32:$sdst, (int_amdgcn_s_sendmsg_rtn timm:$src0))]
>;
def S_SENDMSG_RTN_B64 : SOP1_Pseudo<
- "s_sendmsg_rtn_b64", (outs SReg_64:$sdst), (ins SendMsgImm:$src0),
+ "s_sendmsg_rtn_b64", (outs SReg_64:$sdst), (ins SendMsg:$src0),
"$sdst, $src0", [(set i64:$sdst, (int_amdgcn_s_sendmsg_rtn timm:$src0))]
>;
}
@@ -795,7 +784,7 @@ class SOPK_32 <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
class SOPK_32_BR <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
opName,
(outs),
- (ins sopp_brtarget:$simm16, SReg_32:$sdst),
+ (ins SOPPBrTarget:$simm16, SReg_32:$sdst),
"$sdst, $simm16",
pattern> {
let Defs = [EXEC];
@@ -875,7 +864,7 @@ let isCommutable = 1, DisableEncoding = "$src0",
let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in
def S_CBRANCH_I_FORK : SOPK_Pseudo <
"s_cbranch_i_fork",
- (outs), (ins SReg_64:$sdst, sopp_brtarget:$simm16),
+ (outs), (ins SReg_64:$sdst, SOPPBrTarget:$simm16),
"$sdst, $simm16"
>;
@@ -953,7 +942,7 @@ let SubtargetPredicate = isGFX9Plus in {
def S_CALL_B64 : SOPK_Pseudo<
"s_call_b64",
(outs SReg_64:$sdst),
- (ins sopp_brtarget:$simm16),
+ (ins SOPPBrTarget:$simm16),
"$sdst, $simm16"> {
let isCall = 1;
}
@@ -1175,7 +1164,7 @@ multiclass SOPP_With_Relaxation <string opName, dag ins,
def S_NOP : SOPP_Pseudo<"s_nop" , (ins i16imm:$simm16), "$simm16">;
let isTerminator = 1 in {
-def S_ENDPGM : SOPP_Pseudo<"s_endpgm", (ins EndpgmImm:$simm16), "$simm16", [], ""> {
+def S_ENDPGM : SOPP_Pseudo<"s_endpgm", (ins Endpgm:$simm16), "$simm16", [], ""> {
let isBarrier = 1;
let isReturn = 1;
let hasSideEffects = 1;
@@ -1206,60 +1195,60 @@ let SubtargetPredicate = isGFX10Plus in {
let isBranch = 1, SchedRW = [WriteBranch] in {
let isBarrier = 1 in {
defm S_BRANCH : SOPP_With_Relaxation<
- "s_branch" , (ins sopp_brtarget:$simm16), "$simm16",
+ "s_branch" , (ins SOPPBrTarget:$simm16), "$simm16",
[(br bb:$simm16)]>;
}
let Uses = [SCC] in {
defm S_CBRANCH_SCC0 : SOPP_With_Relaxation<
- "s_cbranch_scc0" , (ins sopp_brtarget:$simm16),
+ "s_cbranch_scc0" , (ins SOPPBrTarget:$simm16),
"$simm16"
>;
defm S_CBRANCH_SCC1 : SOPP_With_Relaxation <
- "s_cbranch_scc1" , (ins sopp_brtarget:$simm16),
+ "s_cbranch_scc1" , (ins SOPPBrTarget:$simm16),
"$simm16"
>;
} // End Uses = [SCC]
let Uses = [VCC] in {
defm S_CBRANCH_VCCZ : SOPP_With_Relaxation <
- "s_cbranch_vccz" , (ins sopp_brtarget:$simm16),
+ "s_cbranch_vccz" , (ins SOPPBrTarget:$simm16),
"$simm16"
>;
defm S_CBRANCH_VCCNZ : SOPP_With_Relaxation <
- "s_cbranch_vccnz" , (ins sopp_brtarget:$simm16),
+ "s_cbranch_vccnz" , (ins SOPPBrTarget:$simm16),
"$simm16"
>;
} // End Uses = [VCC]
let Uses = [EXEC] in {
defm S_CBRANCH_EXECZ : SOPP_With_Relaxation <
- "s_cbranch_execz" , (ins sopp_brtarget:$simm16),
+ "s_cbranch_execz" , (ins SOPPBrTarget:$simm16),
"$simm16"
>;
defm S_CBRANCH_EXECNZ : SOPP_With_Relaxation <
- "s_cbranch_execnz" , (ins sopp_brtarget:$simm16),
+ "s_cbranch_execnz" , (ins SOPPBrTarget:$simm16),
"$simm16"
>;
} // End Uses = [EXEC]
defm S_CBRANCH_CDBGSYS : SOPP_With_Relaxation <
- "s_cbranch_cdbgsys" , (ins sopp_brtarget:$simm16),
+ "s_cbranch_cdbgsys" , (ins SOPPBrTarget:$simm16),
"$simm16"
>;
defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_With_Relaxation <
- "s_cbranch_cdbgsys_and_user" , (ins sopp_brtarget:$simm16),
+ "s_cbranch_cdbgsys_and_user" , (ins SOPPBrTarget:$simm16),
"$simm16"
>;
defm S_CBRANCH_CDBGSYS_OR_USER : SOPP_With_Relaxation <
- "s_cbranch_cdbgsys_or_user" , (ins sopp_brtarget:$simm16),
+ "s_cbranch_cdbgsys_or_user" , (ins SOPPBrTarget:$simm16),
"$simm16"
>;
defm S_CBRANCH_CDBGUSER : SOPP_With_Relaxation <
- "s_cbranch_cdbguser" , (ins sopp_brtarget:$simm16),
+ "s_cbranch_cdbguser" , (ins SOPPBrTarget:$simm16),
"$simm16"
>;
@@ -1284,7 +1273,7 @@ def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > {
}
let hasSideEffects = 1 in
-def S_WAITCNT : SOPP_Pseudo <"s_waitcnt" , (ins WAIT_FLAG:$simm16), "$simm16",
+def S_WAITCNT : SOPP_Pseudo <"s_waitcnt" , (ins SWaitCnt:$simm16), "$simm16",
[(int_amdgcn_s_waitcnt timm:$simm16)]>;
def S_SETHALT : SOPP_Pseudo <"s_sethalt" , (ins i32imm:$simm16), "$simm16",
[(int_amdgcn_s_sethalt timm:$simm16)]>;
@@ -1305,12 +1294,12 @@ def S_SETPRIO : SOPP_Pseudo <"s_setprio", (ins i16imm:$simm16), "$simm16",
}
let Uses = [EXEC, M0] in {
-def S_SENDMSG : SOPP_Pseudo <"s_sendmsg" , (ins SendMsgImm:$simm16), "$simm16",
+def S_SENDMSG : SOPP_Pseudo <"s_sendmsg" , (ins SendMsg:$simm16), "$simm16",
[(int_amdgcn_s_sendmsg (i32 timm:$simm16), M0)]> {
let hasSideEffects = 1;
}
-def S_SENDMSGHALT : SOPP_Pseudo <"s_sendmsghalt" , (ins SendMsgImm:$simm16), "$simm16",
+def S_SENDMSGHALT : SOPP_Pseudo <"s_sendmsghalt" , (ins SendMsg:$simm16), "$simm16",
[(int_amdgcn_s_sendmsghalt (i32 timm:$simm16), M0)]> {
let hasSideEffects = 1;
}
@@ -1367,7 +1356,7 @@ let SubtargetPredicate = isGFX10Plus in {
let fixed_imm = 1;
}
def S_WAITCNT_DEPCTR :
- SOPP_Pseudo <"s_waitcnt_depctr" , (ins DepCtrImm:$simm16), "$simm16">;
+ SOPP_Pseudo <"s_waitcnt_depctr" , (ins DepCtr:$simm16), "$simm16">;
let hasSideEffects = 0, Uses = [MODE], Defs = [MODE] in {
def S_ROUND_MODE :
@@ -1386,7 +1375,7 @@ let SubtargetPredicate = isGFX11Plus in {
"$simm16"> {
let hasSideEffects = 1;
}
- def S_DELAY_ALU : SOPP_Pseudo<"s_delay_alu", (ins DELAY_FLAG:$simm16),
+ def S_DELAY_ALU : SOPP_Pseudo<"s_delay_alu", (ins SDelayALU:$simm16),
"$simm16">;
} // End SubtargetPredicate = isGFX11Plus
diff --git a/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp b/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
index 7573af597056..98fd16e59bf1 100644
--- a/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
@@ -15,13 +15,13 @@
using namespace llvm;
-/// The target which supports all AMD GPUs. This will eventually
-/// be deprecated and there will be a R600 target and a GCN target.
-Target &llvm::getTheAMDGPUTarget() {
+/// The target for R600 GPUs.
+Target &llvm::getTheR600Target() {
static Target TheAMDGPUTarget;
return TheAMDGPUTarget;
}
-/// The target for GCN GPUs
+
+/// The target for GCN GPUs.
Target &llvm::getTheGCNTarget() {
static Target TheGCNTarget;
return TheGCNTarget;
@@ -29,7 +29,7 @@ Target &llvm::getTheGCNTarget() {
/// Extern function to initialize the targets for the AMDGPU backend
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTargetInfo() {
- RegisterTarget<Triple::r600, false> R600(getTheAMDGPUTarget(), "r600",
+ RegisterTarget<Triple::r600, false> R600(getTheR600Target(), "r600",
"AMD GPUs HD2XXX-HD6XXX", "AMDGPU");
RegisterTarget<Triple::amdgcn, false> GCN(getTheGCNTarget(), "amdgcn",
"AMD GCN GPUs", "AMDGPU");
diff --git a/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h b/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h
index 1e6dbd90b0c1..45470167a331 100644
--- a/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h
+++ b/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.h
@@ -17,11 +17,10 @@ namespace llvm {
class Target;
-/// The target which supports all AMD GPUs. This will eventually
-/// be deprecated and there will be a R600 target and a GCN target.
-Target &getTheAMDGPUTarget();
+/// The target for R600 GPUs.
+Target &getTheR600Target();
-/// The target for GCN GPUs
+/// The target for GCN GPUs.
Target &getTheGCNTarget();
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index c0fd5bc69325..ce40d82021cf 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -44,7 +44,7 @@ const CustomOperand<const MCSubtargetInfo &> Msg[] = {
{{"MSG_SAVEWAVE"}, ID_SAVEWAVE, isGFX8_GFX9_GFX10},
{{"MSG_STALL_WAVE_GEN"}, ID_STALL_WAVE_GEN, isGFX9Plus},
{{"MSG_HALT_WAVES"}, ID_HALT_WAVES, isGFX9Plus},
- {{"MSG_ORDERED_PS_DONE"}, ID_ORDERED_PS_DONE, isGFX9Plus},
+ {{"MSG_ORDERED_PS_DONE"}, ID_ORDERED_PS_DONE, isGFX9_GFX10},
{{"MSG_EARLY_PRIM_DEALLOC"}, ID_EARLY_PRIM_DEALLOC, isGFX9_GFX10},
{{"MSG_GS_ALLOC_REQ"}, ID_GS_ALLOC_REQ, isGFX9Plus},
{{"MSG_GET_DOORBELL"}, ID_GET_DOORBELL, isGFX9_GFX10},
@@ -115,10 +115,14 @@ const CustomOperand<const MCSubtargetInfo &> Opr[] = {
{{"HW_REG_HW_ID2"}, ID_HW_ID2, isGFX10Plus},
{{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10},
{{""}},
- {{""}},
+ {{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA, isGFX11Plus},
{{""}},
{{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_BEncoding},
+ // Register numbers reused in GFX11+
+ {{"HW_REG_PERF_SNAPSHOT_PC_LO"}, ID_PERF_SNAPSHOT_PC_LO, isGFX11Plus},
+ {{"HW_REG_PERF_SNAPSHOT_PC_HI"}, ID_PERF_SNAPSHOT_PC_HI, isGFX11Plus},
+
// GFX940 specific registers
{{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940},
{{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 4263e3e9eeac..296ea18b2a8d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -10,19 +10,22 @@
#include "AMDGPU.h"
#include "AMDGPUAsmUtils.h"
#include "AMDKernelCodeT.h"
-#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/TargetParser/TargetParser.h"
#include <optional>
#define GET_INSTRINFO_NAMED_OPS
@@ -92,6 +95,24 @@ unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
}
+/// \returns VmVsrc bit width
+inline unsigned getVmVsrcBitWidth() { return 3; }
+
+/// \returns VmVsrc bit shift
+inline unsigned getVmVsrcBitShift() { return 2; }
+
+/// \returns VaVdst bit width
+inline unsigned getVaVdstBitWidth() { return 4; }
+
+/// \returns VaVdst bit shift
+inline unsigned getVaVdstBitShift() { return 12; }
+
+/// \returns SaSdst bit width
+inline unsigned getSaSdstBitWidth() { return 1; }
+
+/// \returns SaSdst bit shift
+inline unsigned getSaSdstBitShift() { return 0; }
+
} // end namespace anonymous
namespace llvm {
@@ -150,56 +171,62 @@ unsigned getAmdhsaCodeObjectVersion() {
return AmdhsaCodeObjectVersion;
}
-unsigned getMultigridSyncArgImplicitArgPosition() {
- switch (AmdhsaCodeObjectVersion) {
- case 2:
- case 3:
- case 4:
+unsigned getCodeObjectVersion(const Module &M) {
+ if (auto Ver = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("amdgpu_code_object_version"))) {
+ return (unsigned)Ver->getZExtValue() / 100;
+ }
+
+ // Default code object version.
+ return AMDHSA_COV4;
+}
+
+unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
+ switch (CodeObjectVersion) {
+ case AMDHSA_COV2:
+ case AMDHSA_COV3:
+ case AMDHSA_COV4:
return 48;
- case 5:
- return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
+ case AMDHSA_COV5:
default:
- llvm_unreachable("Unexpected code object version");
- return 0;
+ return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
}
}
// FIXME: All such magic numbers about the ABI should be in a
// central TD file.
-unsigned getHostcallImplicitArgPosition() {
- switch (AmdhsaCodeObjectVersion) {
- case 2:
- case 3:
- case 4:
+unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
+ switch (CodeObjectVersion) {
+ case AMDHSA_COV2:
+ case AMDHSA_COV3:
+ case AMDHSA_COV4:
return 24;
- case 5:
- return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
+ case AMDHSA_COV5:
default:
- llvm_unreachable("Unexpected code object version");
- return 0;
+ return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
}
}
-unsigned getDefaultQueueImplicitArgPosition() {
- switch (AmdhsaCodeObjectVersion) {
- case 2:
- case 3:
- case 4:
+unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
+ switch (CodeObjectVersion) {
+ case AMDHSA_COV2:
+ case AMDHSA_COV3:
+ case AMDHSA_COV4:
return 32;
- case 5:
+ case AMDHSA_COV5:
default:
return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
}
}
-unsigned getCompletionActionImplicitArgPosition() {
- switch (AmdhsaCodeObjectVersion) {
- case 2:
- case 3:
- case 4:
+unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
+ switch (CodeObjectVersion) {
+ case AMDHSA_COV2:
+ case AMDHSA_COV3:
+ case AMDHSA_COV4:
return 40;
- case 5:
+ case AMDHSA_COV5:
default:
return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
}
@@ -568,9 +595,10 @@ std::optional<unsigned> InstInfo::getInvalidCompOperandIndex(
unsigned CompOprIdx;
for (CompOprIdx = 0; CompOprIdx < Component::MAX_OPR_NUM; ++CompOprIdx) {
- unsigned BanksNum = BANKS_NUM[CompOprIdx];
+ unsigned BanksMasks = VOPD_VGPR_BANK_MASKS[CompOprIdx];
if (OpXRegs[CompOprIdx] && OpYRegs[CompOprIdx] &&
- (OpXRegs[CompOprIdx] % BanksNum == OpYRegs[CompOprIdx] % BanksNum))
+ ((OpXRegs[CompOprIdx] & BanksMasks) ==
+ (OpYRegs[CompOprIdx] & BanksMasks)))
return CompOprIdx;
}
@@ -624,7 +652,7 @@ namespace IsaInfo {
AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
: STI(STI), XnackSetting(TargetIDSetting::Any),
- SramEccSetting(TargetIDSetting::Any) {
+ SramEccSetting(TargetIDSetting::Any), CodeObjectVersion(0) {
if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
XnackSetting = TargetIDSetting::Unsupported;
if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
@@ -735,9 +763,9 @@ std::string AMDGPUTargetID::toString() const {
.str();
std::string Features;
- if (std::optional<uint8_t> HsaAbiVersion = getHsaAbiVersion(&STI)) {
- switch (*HsaAbiVersion) {
- case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
+ if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
+ switch (CodeObjectVersion) {
+ case AMDGPU::AMDHSA_COV2:
// Code object V2 only supported specific processors and had fixed
// settings for the XNACK.
if (Processor == "gfx600") {
@@ -785,7 +813,7 @@ std::string AMDGPUTargetID::toString() const {
Twine(Processor));
}
break;
- case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
+ case AMDGPU::AMDHSA_COV3:
// xnack.
if (isXnackOnOrAny())
Features += "+xnack";
@@ -794,8 +822,8 @@ std::string AMDGPUTargetID::toString() const {
if (isSramEccOnOrAny())
Features += "+sram-ecc";
break;
- case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
- case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
+ case AMDGPU::AMDHSA_COV4:
+ case AMDGPU::AMDHSA_COV5:
// sramecc.
if (getSramEccSetting() == TargetIDSetting::Off)
Features += ":sramecc-";
@@ -1205,16 +1233,16 @@ bool shouldEmitConstantsToTextSection(const Triple &TT) {
return TT.getArch() == Triple::r600;
}
-std::pair<int, int> getIntegerPairAttribute(const Function &F,
- StringRef Name,
- std::pair<int, int> Default,
- bool OnlyFirstRequired) {
+std::pair<unsigned, unsigned>
+getIntegerPairAttribute(const Function &F, StringRef Name,
+ std::pair<unsigned, unsigned> Default,
+ bool OnlyFirstRequired) {
Attribute A = F.getFnAttribute(Name);
if (!A.isStringAttribute())
return Default;
LLVMContext &Ctx = F.getContext();
- std::pair<int, int> Ints = Default;
+ std::pair<unsigned, unsigned> Ints = Default;
std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
if (Strs.first.trim().getAsInteger(0, Ints.first)) {
Ctx.emitError("can't parse first integer attribute " + Name);
@@ -1491,6 +1519,42 @@ int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
STI);
}
+unsigned decodeFieldVmVsrc(unsigned Encoded) {
+ return unpackBits(Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
+}
+
+unsigned decodeFieldVaVdst(unsigned Encoded) {
+ return unpackBits(Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
+}
+
+unsigned decodeFieldSaSdst(unsigned Encoded) {
+ return unpackBits(Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
+}
+
+unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc) {
+ return packBits(VmVsrc, Encoded, getVmVsrcBitShift(), getVmVsrcBitWidth());
+}
+
+unsigned encodeFieldVmVsrc(unsigned VmVsrc) {
+ return encodeFieldVmVsrc(0xffff, VmVsrc);
+}
+
+unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst) {
+ return packBits(VaVdst, Encoded, getVaVdstBitShift(), getVaVdstBitWidth());
+}
+
+unsigned encodeFieldVaVdst(unsigned VaVdst) {
+ return encodeFieldVaVdst(0xffff, VaVdst);
+}
+
+unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst) {
+ return packBits(SaSdst, Encoded, getSaSdstBitShift(), getSaSdstBitWidth());
+}
+
+unsigned encodeFieldSaSdst(unsigned SaSdst) {
+ return encodeFieldSaSdst(0xffff, SaSdst);
+}
+
} // namespace DepCtr
//===----------------------------------------------------------------------===//
@@ -1913,44 +1977,53 @@ bool isKernelCC(const Function *Func) {
}
bool hasXNACK(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
+ return STI.hasFeature(AMDGPU::FeatureXNACK);
}
bool hasSRAMECC(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
+ return STI.hasFeature(AMDGPU::FeatureSRAMECC);
}
bool hasMIMG_R128(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16];
+ return STI.hasFeature(AMDGPU::FeatureMIMG_R128) && !STI.hasFeature(AMDGPU::FeatureR128A16);
}
bool hasA16(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureA16];
+ return STI.hasFeature(AMDGPU::FeatureA16);
}
bool hasG16(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureG16];
+ return STI.hasFeature(AMDGPU::FeatureG16);
}
bool hasPackedD16(const MCSubtargetInfo &STI) {
- return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem] && !isCI(STI) &&
+ return !STI.hasFeature(AMDGPU::FeatureUnpackedD16VMem) && !isCI(STI) &&
!isSI(STI);
}
+unsigned getNSAMaxSize(const MCSubtargetInfo &STI) {
+ auto Version = getIsaVersion(STI.getCPU());
+ if (Version.Major == 10)
+ return Version.Minor >= 3 ? 13 : 5;
+ if (Version.Major == 11)
+ return 5;
+ return 0;
+}
+
bool isSI(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
+ return STI.hasFeature(AMDGPU::FeatureSouthernIslands);
}
bool isCI(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
+ return STI.hasFeature(AMDGPU::FeatureSeaIslands);
}
bool isVI(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
+ return STI.hasFeature(AMDGPU::FeatureVolcanicIslands);
}
bool isGFX9(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
+ return STI.hasFeature(AMDGPU::FeatureGFX9);
}
bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
@@ -1970,7 +2043,7 @@ bool isGFX9Plus(const MCSubtargetInfo &STI) {
}
bool isGFX10(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
+ return STI.hasFeature(AMDGPU::FeatureGFX10);
}
bool isGFX10Plus(const MCSubtargetInfo &STI) {
@@ -1978,7 +2051,7 @@ bool isGFX10Plus(const MCSubtargetInfo &STI) {
}
bool isGFX11(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX11];
+ return STI.hasFeature(AMDGPU::FeatureGFX11);
}
bool isGFX11Plus(const MCSubtargetInfo &STI) {
@@ -1998,39 +2071,39 @@ bool isGFX10Before1030(const MCSubtargetInfo &STI) {
}
bool isGCN3Encoding(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
+ return STI.hasFeature(AMDGPU::FeatureGCN3Encoding);
}
bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX10_AEncoding];
+ return STI.hasFeature(AMDGPU::FeatureGFX10_AEncoding);
}
bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding];
+ return STI.hasFeature(AMDGPU::FeatureGFX10_BEncoding);
}
bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX10_3Insts];
+ return STI.hasFeature(AMDGPU::FeatureGFX10_3Insts);
}
bool isGFX90A(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts];
+ return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
}
bool isGFX940(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts];
+ return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
}
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
+ return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
}
bool hasMAIInsts(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureMAIInsts];
+ return STI.hasFeature(AMDGPU::FeatureMAIInsts);
}
bool hasVOPD(const MCSubtargetInfo &STI) {
- return STI.getFeatureBits()[AMDGPU::FeatureVOPD];
+ return STI.hasFeature(AMDGPU::FeatureVOPD);
}
int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
@@ -2350,11 +2423,15 @@ unsigned getRegBitWidth(const MCRegisterClass &RC) {
return getRegBitWidth(RC.getID());
}
+unsigned getRegBitWidth(const TargetRegisterClass &RC) {
+ return getRegBitWidth(RC.getID());
+}
+
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
unsigned OpNo) {
assert(OpNo < Desc.NumOperands);
unsigned RCID = Desc.operands()[OpNo].RegClass;
- return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
+ return getRegBitWidth(RCID) / 8;
}
bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
@@ -2362,15 +2439,15 @@ bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
return true;
uint64_t Val = static_cast<uint64_t>(Literal);
- return (Val == DoubleToBits(0.0)) ||
- (Val == DoubleToBits(1.0)) ||
- (Val == DoubleToBits(-1.0)) ||
- (Val == DoubleToBits(0.5)) ||
- (Val == DoubleToBits(-0.5)) ||
- (Val == DoubleToBits(2.0)) ||
- (Val == DoubleToBits(-2.0)) ||
- (Val == DoubleToBits(4.0)) ||
- (Val == DoubleToBits(-4.0)) ||
+ return (Val == llvm::bit_cast<uint64_t>(0.0)) ||
+ (Val == llvm::bit_cast<uint64_t>(1.0)) ||
+ (Val == llvm::bit_cast<uint64_t>(-1.0)) ||
+ (Val == llvm::bit_cast<uint64_t>(0.5)) ||
+ (Val == llvm::bit_cast<uint64_t>(-0.5)) ||
+ (Val == llvm::bit_cast<uint64_t>(2.0)) ||
+ (Val == llvm::bit_cast<uint64_t>(-2.0)) ||
+ (Val == llvm::bit_cast<uint64_t>(4.0)) ||
+ (Val == llvm::bit_cast<uint64_t>(-4.0)) ||
(Val == 0x3fc45f306dc9c882 && HasInv2Pi);
}
@@ -2388,15 +2465,15 @@ bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
// floating-point, so it is a legal inline immediate.
uint32_t Val = static_cast<uint32_t>(Literal);
- return (Val == FloatToBits(0.0f)) ||
- (Val == FloatToBits(1.0f)) ||
- (Val == FloatToBits(-1.0f)) ||
- (Val == FloatToBits(0.5f)) ||
- (Val == FloatToBits(-0.5f)) ||
- (Val == FloatToBits(2.0f)) ||
- (Val == FloatToBits(-2.0f)) ||
- (Val == FloatToBits(4.0f)) ||
- (Val == FloatToBits(-4.0f)) ||
+ return (Val == llvm::bit_cast<uint32_t>(0.0f)) ||
+ (Val == llvm::bit_cast<uint32_t>(1.0f)) ||
+ (Val == llvm::bit_cast<uint32_t>(-1.0f)) ||
+ (Val == llvm::bit_cast<uint32_t>(0.5f)) ||
+ (Val == llvm::bit_cast<uint32_t>(-0.5f)) ||
+ (Val == llvm::bit_cast<uint32_t>(2.0f)) ||
+ (Val == llvm::bit_cast<uint32_t>(-2.0f)) ||
+ (Val == llvm::bit_cast<uint32_t>(4.0f)) ||
+ (Val == llvm::bit_cast<uint32_t>(-4.0f)) ||
(Val == 0x3e22f983 && HasInv2Pi);
}
@@ -2475,10 +2552,35 @@ bool isArgPassedInSGPR(const Argument *A) {
case CallingConv::AMDGPU_PS:
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_Gfx:
- // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
- // Everything else is in VGPRs.
- return F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::InReg) ||
- F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::ByVal);
+ // For non-compute shaders, SGPR inputs are marked with either inreg or
+ // byval. Everything else is in VGPRs.
+ return A->hasAttribute(Attribute::InReg) ||
+ A->hasAttribute(Attribute::ByVal);
+ default:
+ // TODO: Should calls support inreg for SGPR inputs?
+ return false;
+ }
+}
+
+bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo) {
+ // Arguments to compute shaders are never a source of divergence.
+ CallingConv::ID CC = CB->getCallingConv();
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ return true;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_LS:
+ case CallingConv::AMDGPU_HS:
+ case CallingConv::AMDGPU_ES:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_Gfx:
+ // For non-compute shaders, SGPR inputs are marked with either inreg or
+ // byval. Everything else is in VGPRs.
+ return CB->paramHasAttr(ArgNo, Attribute::InReg) ||
+ CB->paramHasAttr(ArgNo, Attribute::ByVal);
default:
// TODO: Should calls support inreg for SGPR inputs?
return false;
@@ -2556,77 +2658,6 @@ unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
return 13;
}
-// Given Imm, split it into the values to put into the SOffset and ImmOffset
-// fields in an MUBUF instruction. Return false if it is not possible (due to a
-// hardware bug needing a workaround).
-//
-// The required alignment ensures that individual address components remain
-// aligned if they are aligned to begin with. It also ensures that additional
-// offsets within the given alignment can be added to the resulting ImmOffset.
-bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
- const GCNSubtarget *Subtarget, Align Alignment) {
- const uint32_t MaxImm = alignDown(4095, Alignment.value());
- uint32_t Overflow = 0;
-
- if (Imm > MaxImm) {
- if (Imm <= MaxImm + 64) {
- // Use an SOffset inline constant for 4..64
- Overflow = Imm - MaxImm;
- Imm = MaxImm;
- } else {
- // Try to keep the same value in SOffset for adjacent loads, so that
- // the corresponding register contents can be re-used.
- //
- // Load values with all low-bits (except for alignment bits) set into
- // SOffset, so that a larger range of values can be covered using
- // s_movk_i32.
- //
- // Atomic operations fail to work correctly when individual address
- // components are unaligned, even if their sum is aligned.
- uint32_t High = (Imm + Alignment.value()) & ~4095;
- uint32_t Low = (Imm + Alignment.value()) & 4095;
- Imm = Low;
- Overflow = High - Alignment.value();
- }
- }
-
- // There is a hardware bug in SI and CI which prevents address clamping in
- // MUBUF instructions from working correctly with SOffsets. The immediate
- // offset is unaffected.
- if (Overflow > 0 &&
- Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
- return false;
-
- ImmOffset = Imm;
- SOffset = Overflow;
- return true;
-}
-
-SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
- *this = getDefaultForCallingConv(F.getCallingConv());
-
- StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
- if (!IEEEAttr.empty())
- IEEE = IEEEAttr == "true";
-
- StringRef DX10ClampAttr
- = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
- if (!DX10ClampAttr.empty())
- DX10Clamp = DX10ClampAttr == "true";
-
- StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
- if (!DenormF32Attr.empty())
- FP32Denormals = parseDenormalFPAttribute(DenormF32Attr);
-
- StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString();
- if (!DenormAttr.empty()) {
- DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
- if (DenormF32Attr.empty())
- FP32Denormals = DenormMode;
- FP64FP16Denormals = DenormMode;
- }
-}
-
namespace {
struct SourceOfDivergence {
@@ -2634,7 +2665,13 @@ struct SourceOfDivergence {
};
const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
+struct AlwaysUniform {
+ unsigned Intr;
+};
+const AlwaysUniform *lookupAlwaysUniform(unsigned Intr);
+
#define GET_SourcesOfDivergence_IMPL
+#define GET_UniformIntrinsics_IMPL
#define GET_Gfx9BufferFormat_IMPL
#define GET_Gfx10BufferFormat_IMPL
#define GET_Gfx11PlusBufferFormat_IMPL
@@ -2646,6 +2683,10 @@ bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
return lookupSourceOfDivergence(IntrID);
}
+bool isIntrinsicAlwaysUniform(unsigned IntrID) {
+ return lookupAlwaysUniform(IntrID);
+}
+
const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
uint8_t NumComponents,
uint8_t NumFormat,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 4d3423592353..bdf7ccad9c76 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -10,8 +10,9 @@
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H
#include "SIDefines.h"
-#include "llvm/ADT/FloatingPointMode.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Module.h"
#include "llvm/Support/Alignment.h"
#include <array>
#include <functional>
@@ -24,13 +25,13 @@ namespace llvm {
struct Align;
class Argument;
class Function;
-class GCNSubtarget;
class GlobalValue;
class MCInstrInfo;
class MCRegisterClass;
class MCRegisterInfo;
class MCSubtargetInfo;
class StringRef;
+class TargetRegisterClass;
class Triple;
class raw_ostream;
@@ -42,6 +43,13 @@ namespace AMDGPU {
struct IsaVersion;
+enum {
+ AMDHSA_COV2 = 2,
+ AMDHSA_COV3 = 3,
+ AMDHSA_COV4 = 4,
+ AMDHSA_COV5 = 5
+};
+
/// \returns HSA OS ABI Version identification.
std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 2,
@@ -61,17 +69,20 @@ bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI);
/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
-unsigned getMultigridSyncArgImplicitArgPosition();
+unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
/// \returns The offset of the hostcall pointer argument from implicitarg_ptr
-unsigned getHostcallImplicitArgPosition();
+unsigned getHostcallImplicitArgPosition(unsigned COV);
-unsigned getDefaultQueueImplicitArgPosition();
-unsigned getCompletionActionImplicitArgPosition();
+unsigned getDefaultQueueImplicitArgPosition(unsigned COV);
+unsigned getCompletionActionImplicitArgPosition(unsigned COV);
/// \returns Code object version.
unsigned getAmdhsaCodeObjectVersion();
+/// \returns Code object version.
+unsigned getCodeObjectVersion(const Module &M);
+
struct GcnBufferFormatInfo {
unsigned Format;
unsigned BitsPerComp;
@@ -116,6 +127,7 @@ private:
const MCSubtargetInfo &STI;
TargetIDSetting XnackSetting;
TargetIDSetting SramEccSetting;
+ unsigned CodeObjectVersion;
public:
explicit AMDGPUTargetID(const MCSubtargetInfo &STI);
@@ -145,6 +157,10 @@ public:
return XnackSetting;
}
+ void setCodeObjectVersion(unsigned COV) {
+ CodeObjectVersion = COV;
+ }
+
/// Sets xnack setting to \p NewXnackSetting.
void setXnackSetting(TargetIDSetting NewXnackSetting) {
XnackSetting = NewXnackSetting;
@@ -339,6 +355,7 @@ struct MIMGBaseOpcodeInfo {
bool HasD16;
bool MSAA;
bool BVH;
+ bool A16;
};
LLVM_READONLY
@@ -544,8 +561,9 @@ enum Component : unsigned {
MAX_OPR_NUM = DST_NUM + MAX_SRC_NUM
};
-// Number of VGPR banks per VOPD component operand.
-constexpr unsigned BANKS_NUM[] = {2, 4, 4, 2};
+// LSB mask for VGPR banks per VOPD component operand.
+// 4 banks result in a mask 3, setting 2 lower bits.
+constexpr unsigned VOPD_VGPR_BANK_MASKS[] = {1, 3, 3, 1};
enum ComponentIndex : unsigned { X = 0, Y = 1 };
constexpr unsigned COMPONENTS[] = {ComponentIndex::X, ComponentIndex::Y};
@@ -555,7 +573,7 @@ constexpr unsigned COMPONENTS_NUM = 2;
class ComponentProps {
private:
unsigned SrcOperandsNum = 0;
- std::optional<unsigned> MandatoryLiteralIdx;
+ unsigned MandatoryLiteralIdx = ~0u;
bool HasSrc2Acc = false;
public:
@@ -571,13 +589,13 @@ public:
}
// Return true iif this component has a mandatory literal.
- bool hasMandatoryLiteral() const { return MandatoryLiteralIdx.has_value(); }
+ bool hasMandatoryLiteral() const { return MandatoryLiteralIdx != ~0u; }
// If this component has a mandatory literal, return component operand
// index of this literal (i.e. either Component::SRC1 or Component::SRC2).
unsigned getMandatoryLiteralCompOperandIndex() const {
assert(hasMandatoryLiteral());
- return *MandatoryLiteralIdx;
+ return MandatoryLiteralIdx;
}
// Return true iif this component has operand
@@ -593,8 +611,7 @@ public:
private:
bool hasMandatoryLiteralAt(unsigned CompSrcIdx) const {
assert(CompSrcIdx < Component::MAX_SRC_NUM);
- return hasMandatoryLiteral() &&
- *MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
+ return MandatoryLiteralIdx == Component::DST_NUM + CompSrcIdx;
}
};
@@ -811,10 +828,10 @@ int getIntegerAttribute(const Function &F, StringRef Name, int Default);
/// \returns \p Default and emits error if one of the requested values cannot be
/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
/// not present.
-std::pair<int, int> getIntegerPairAttribute(const Function &F,
- StringRef Name,
- std::pair<int, int> Default,
- bool OnlyFirstRequired = false);
+std::pair<unsigned, unsigned>
+getIntegerPairAttribute(const Function &F, StringRef Name,
+ std::pair<unsigned, unsigned> Default,
+ bool OnlyFirstRequired = false);
/// Represents the counter values to wait for in an s_waitcnt instruction.
///
@@ -847,11 +864,6 @@ struct Waitcnt {
return VsCnt != ~0u;
}
- bool dominates(const Waitcnt &Other) const {
- return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
- LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
- }
-
Waitcnt combined(const Waitcnt &Other) const {
return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
std::min(LgkmCnt, Other.LgkmCnt),
@@ -965,6 +977,33 @@ bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
bool &IsDefault, const MCSubtargetInfo &STI);
+/// \returns Decoded VaVdst from given immediate \p Encoded.
+unsigned decodeFieldVaVdst(unsigned Encoded);
+
+/// \returns Decoded VmVsrc from given immediate \p Encoded.
+unsigned decodeFieldVmVsrc(unsigned Encoded);
+
+/// \returns Decoded SaSdst from given immediate \p Encoded.
+unsigned decodeFieldSaSdst(unsigned Encoded);
+
+/// \returns \p VmVsrc as an encoded Depctr immediate.
+unsigned encodeFieldVmVsrc(unsigned VmVsrc);
+
+/// \returns \p Encoded combined with encoded \p VmVsrc.
+unsigned encodeFieldVmVsrc(unsigned Encoded, unsigned VmVsrc);
+
+/// \returns \p VaVdst as an encoded Depctr immediate.
+unsigned encodeFieldVaVdst(unsigned VaVdst);
+
+/// \returns \p Encoded combined with encoded \p VaVdst.
+unsigned encodeFieldVaVdst(unsigned Encoded, unsigned VaVdst);
+
+/// \returns \p SaSdst as an encoded Depctr immediate.
+unsigned encodeFieldSaSdst(unsigned SaSdst);
+
+/// \returns \p Encoded combined with encoded \p SaSdst.
+unsigned encodeFieldSaSdst(unsigned Encoded, unsigned SaSdst);
+
} // namespace DepCtr
namespace Exp {
@@ -1102,6 +1141,7 @@ bool hasMIMG_R128(const MCSubtargetInfo &STI);
bool hasA16(const MCSubtargetInfo &STI);
bool hasG16(const MCSubtargetInfo &STI);
bool hasPackedD16(const MCSubtargetInfo &STI);
+unsigned getNSAMaxSize(const MCSubtargetInfo &STI);
bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
@@ -1162,6 +1202,9 @@ unsigned getRegBitWidth(unsigned RCID);
/// Get the size in bits of a register from the register class \p RC.
unsigned getRegBitWidth(const MCRegisterClass &RC);
+/// Get the size in bits of a register from the register class \p RC.
+unsigned getRegBitWidth(const TargetRegisterClass &RC);
+
/// Get size of register operand
unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
unsigned OpNo);
@@ -1244,6 +1287,8 @@ bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi);
bool isArgPassedInSGPR(const Argument *Arg);
+bool isArgPassedInSGPR(const CallBase *CB, unsigned ArgNo);
+
LLVM_READONLY
bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
int64_t EncodedOffset);
@@ -1282,10 +1327,6 @@ unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST);
/// not the encoded offset.
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset);
-bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
- const GCNSubtarget *Subtarget,
- Align Alignment = Align(4));
-
LLVM_READNONE
inline bool isLegal64BitDPPControl(unsigned DC) {
return DC >= DPP::ROW_NEWBCAST_FIRST && DC <= DPP::ROW_NEWBCAST_LAST;
@@ -1294,109 +1335,8 @@ inline bool isLegal64BitDPPControl(unsigned DC) {
/// \returns true if the intrinsic is divergent
bool isIntrinsicSourceOfDivergence(unsigned IntrID);
-// Track defaults for fields in the MODE register.
-struct SIModeRegisterDefaults {
- /// Floating point opcodes that support exception flag gathering quiet and
- /// propagate signaling NaN inputs per IEEE 754-2008. Min_dx10 and max_dx10
- /// become IEEE 754- 2008 compliant due to signaling NaN propagation and
- /// quieting.
- bool IEEE : 1;
-
- /// Used by the vector ALU to force DX10-style treatment of NaNs: when set,
- /// clamp NaN to zero; otherwise, pass NaN through.
- bool DX10Clamp : 1;
-
- /// If this is set, neither input or output denormals are flushed for most f32
- /// instructions.
- DenormalMode FP32Denormals;
-
- /// If this is set, neither input or output denormals are flushed for both f64
- /// and f16/v2f16 instructions.
- DenormalMode FP64FP16Denormals;
-
- SIModeRegisterDefaults() :
- IEEE(true),
- DX10Clamp(true),
- FP32Denormals(DenormalMode::getIEEE()),
- FP64FP16Denormals(DenormalMode::getIEEE()) {}
-
- SIModeRegisterDefaults(const Function &F);
-
- static SIModeRegisterDefaults getDefaultForCallingConv(CallingConv::ID CC) {
- SIModeRegisterDefaults Mode;
- Mode.IEEE = !AMDGPU::isShader(CC);
- return Mode;
- }
-
- bool operator ==(const SIModeRegisterDefaults Other) const {
- return IEEE == Other.IEEE && DX10Clamp == Other.DX10Clamp &&
- FP32Denormals == Other.FP32Denormals &&
- FP64FP16Denormals == Other.FP64FP16Denormals;
- }
-
- bool allFP32Denormals() const {
- return FP32Denormals == DenormalMode::getIEEE();
- }
-
- bool allFP64FP16Denormals() const {
- return FP64FP16Denormals == DenormalMode::getIEEE();
- }
-
- /// Get the encoding value for the FP_DENORM bits of the mode register for the
- /// FP32 denormal mode.
- uint32_t fpDenormModeSPValue() const {
- if (FP32Denormals == DenormalMode::getPreserveSign())
- return FP_DENORM_FLUSH_IN_FLUSH_OUT;
- if (FP32Denormals.Output == DenormalMode::PreserveSign)
- return FP_DENORM_FLUSH_OUT;
- if (FP32Denormals.Input == DenormalMode::PreserveSign)
- return FP_DENORM_FLUSH_IN;
- return FP_DENORM_FLUSH_NONE;
- }
-
- /// Get the encoding value for the FP_DENORM bits of the mode register for the
- /// FP64/FP16 denormal mode.
- uint32_t fpDenormModeDPValue() const {
- if (FP64FP16Denormals == DenormalMode::getPreserveSign())
- return FP_DENORM_FLUSH_IN_FLUSH_OUT;
- if (FP64FP16Denormals.Output == DenormalMode::PreserveSign)
- return FP_DENORM_FLUSH_OUT;
- if (FP64FP16Denormals.Input == DenormalMode::PreserveSign)
- return FP_DENORM_FLUSH_IN;
- return FP_DENORM_FLUSH_NONE;
- }
-
- /// Returns true if a flag is compatible if it's enabled in the callee, but
- /// disabled in the caller.
- static bool oneWayCompatible(bool CallerMode, bool CalleeMode) {
- return CallerMode == CalleeMode || (!CallerMode && CalleeMode);
- }
-
- // FIXME: Inlining should be OK for dx10-clamp, since the caller's mode should
- // be able to override.
- bool isInlineCompatible(SIModeRegisterDefaults CalleeMode) const {
- if (DX10Clamp != CalleeMode.DX10Clamp)
- return false;
- if (IEEE != CalleeMode.IEEE)
- return false;
-
- // Allow inlining denormals enabled into denormals flushed functions.
- return oneWayCompatible(FP64FP16Denormals.Input !=
- DenormalMode::PreserveSign,
- CalleeMode.FP64FP16Denormals.Input !=
- DenormalMode::PreserveSign) &&
- oneWayCompatible(FP64FP16Denormals.Output !=
- DenormalMode::PreserveSign,
- CalleeMode.FP64FP16Denormals.Output !=
- DenormalMode::PreserveSign) &&
- oneWayCompatible(FP32Denormals.Input != DenormalMode::PreserveSign,
- CalleeMode.FP32Denormals.Input !=
- DenormalMode::PreserveSign) &&
- oneWayCompatible(FP32Denormals.Output != DenormalMode::PreserveSign,
- CalleeMode.FP32Denormals.Output !=
- DenormalMode::PreserveSign);
- }
-};
+/// \returns true if the intrinsic is uniform
+bool isIntrinsicAlwaysUniform(unsigned IntrID);
} // end namespace AMDGPU
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
index b1418253fd13..cbdbf1c16f9f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp
@@ -31,65 +31,25 @@ Align getAlign(DataLayout const &DL, const GlobalVariable *GV) {
GV->getValueType());
}
-static bool shouldLowerLDSToStruct(const GlobalVariable &GV,
- const Function *F) {
- // We are not interested in kernel LDS lowering for module LDS itself.
- if (F && GV.getName() == "llvm.amdgcn.module.lds")
+bool isDynamicLDS(const GlobalVariable &GV) {
+ // external zero size addrspace(3) without initializer implies cuda/hip extern
+ // __shared__ the semantics for such a variable appears to be that all extern
+ // __shared__ variables alias one another. This hits different handling.
+ const Module *M = GV.getParent();
+ const DataLayout &DL = M->getDataLayout();
+ if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
return false;
-
- bool Ret = false;
- SmallPtrSet<const User *, 8> Visited;
- SmallVector<const User *, 16> Stack(GV.users());
-
- assert(!F || isKernelCC(F));
-
- while (!Stack.empty()) {
- const User *V = Stack.pop_back_val();
- Visited.insert(V);
-
- if (isa<GlobalValue>(V)) {
- // This use of the LDS variable is the initializer of a global variable.
- // This is ill formed. The address of an LDS variable is kernel dependent
- // and unknown until runtime. It can't be written to a global variable.
- continue;
- }
-
- if (auto *I = dyn_cast<Instruction>(V)) {
- const Function *UF = I->getFunction();
- if (UF == F) {
- // Used from this kernel, we want to put it into the structure.
- Ret = true;
- } else if (!F) {
- // For module LDS lowering, lowering is required if the user instruction
- // is from non-kernel function.
- Ret |= !isKernelCC(UF);
- }
- continue;
- }
-
- // User V should be a constant, recursively visit users of V.
- assert(isa<Constant>(V) && "Expected a constant.");
- append_range(Stack, V->users());
}
-
- return Ret;
+ uint64_t AllocSize = DL.getTypeAllocSize(GV.getValueType());
+ return GV.hasExternalLinkage() && AllocSize == 0;
}
bool isLDSVariableToLower(const GlobalVariable &GV) {
if (GV.getType()->getPointerAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) {
return false;
}
- if (!GV.hasInitializer()) {
- // addrspace(3) without initializer implies cuda/hip extern __shared__
- // the semantics for such a variable appears to be that all extern
- // __shared__ variables alias one another, in which case this transform
- // is not required
- return false;
- }
- if (!isa<UndefValue>(GV.getInitializer())) {
- // Initializers are unimplemented for LDS address space.
- // Leave such variables in place for consistent error reporting.
- return false;
+ if (isDynamicLDS(GV)) {
+ return true;
}
if (GV.isConstant()) {
// A constant undef variable can't be written to, and any load is
@@ -97,22 +57,12 @@ bool isLDSVariableToLower(const GlobalVariable &GV) {
// dropped by the back end if not. This pass skips over it.
return false;
}
- return true;
-}
-
-std::vector<GlobalVariable *> findLDSVariablesToLower(Module &M,
- const Function *F) {
- std::vector<llvm::GlobalVariable *> LocalVars;
- for (auto &GV : M.globals()) {
- if (!isLDSVariableToLower(GV)) {
- continue;
- }
- if (!shouldLowerLDSToStruct(GV, F)) {
- continue;
- }
- LocalVars.push_back(&GV);
+ if (GV.hasInitializer() && !isa<UndefValue>(GV.getInitializer())) {
+ // Initializers are unimplemented for LDS address space.
+ // Leave such variables in place for consistent error reporting.
+ return false;
}
- return LocalVars;
+ return true;
}
bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
index 92373fc14a98..df37c420fa72 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h
@@ -28,9 +28,8 @@ namespace AMDGPU {
Align getAlign(DataLayout const &DL, const GlobalVariable *GV);
+bool isDynamicLDS(const GlobalVariable &GV);
bool isLDSVariableToLower(const GlobalVariable &GV);
-std::vector<GlobalVariable *> findLDSVariablesToLower(Module &M,
- const Function *F);
/// Given a \p Def clobbering a load from \p Ptr according to the MSSA check
/// if this is actually a memory update or an artificial clobber to facilitate
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index 4ad93f7b0b68..a92d574b1848 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -811,6 +811,38 @@ msgpack::MapDocNode AMDGPUPALMetadata::getShaderFunction(StringRef Name) {
return Functions[Name].getMap(/*Convert=*/true);
}
+msgpack::DocNode &AMDGPUPALMetadata::refComputeRegisters() {
+ auto &N =
+ MsgPackDoc.getRoot()
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
+ .getArray(/*Convert=*/true)[0]
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".compute_registers")];
+ N.getMap(/*Convert=*/true);
+ return N;
+}
+
+msgpack::MapDocNode AMDGPUPALMetadata::getComputeRegisters() {
+ if (ComputeRegisters.isEmpty())
+ ComputeRegisters = refComputeRegisters();
+ return ComputeRegisters.getMap();
+}
+
+msgpack::DocNode &AMDGPUPALMetadata::refGraphicsRegisters() {
+ auto &N =
+ MsgPackDoc.getRoot()
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
+ .getArray(/*Convert=*/true)[0]
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".graphics_registers")];
+ N.getMap(/*Convert=*/true);
+ return N;
+}
+
+msgpack::MapDocNode AMDGPUPALMetadata::getGraphicsRegisters() {
+ if (GraphicsRegisters.isEmpty())
+ GraphicsRegisters = refGraphicsRegisters();
+ return GraphicsRegisters.getMap();
+}
+
// Return the PAL metadata hardware shader stage name.
static const char *getStageName(CallingConv::ID CC) {
switch (CC) {
@@ -833,15 +865,21 @@ static const char *getStageName(CallingConv::ID CC) {
}
}
+msgpack::DocNode &AMDGPUPALMetadata::refHwStage() {
+ auto &N =
+ MsgPackDoc.getRoot()
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode("amdpal.pipelines")]
+ .getArray(/*Convert=*/true)[0]
+ .getMap(/*Convert=*/true)[MsgPackDoc.getNode(".hardware_stages")];
+ N.getMap(/*Convert=*/true);
+ return N;
+}
+
// Get (create if necessary) the .hardware_stages entry for the given calling
// convention.
msgpack::MapDocNode AMDGPUPALMetadata::getHwStage(unsigned CC) {
if (HwStages.isEmpty())
- HwStages = MsgPackDoc.getRoot()
- .getMap(/*Convert=*/true)["amdpal.pipelines"]
- .getArray(/*Convert=*/true)[0]
- .getMap(/*Convert=*/true)[".hardware_stages"]
- .getMap(/*Convert=*/true);
+ HwStages = refHwStage();
return HwStages.getMap()[getStageName(CC)].getMap(/*Convert=*/true);
}
@@ -874,3 +912,78 @@ void AMDGPUPALMetadata::reset() {
Registers = MsgPackDoc.getEmptyNode();
HwStages = MsgPackDoc.getEmptyNode();
}
+
+unsigned AMDGPUPALMetadata::getPALVersion(unsigned idx) {
+ assert(idx < 2 &&
+ "illegal index to PAL version - should be 0 (major) or 1 (minor)");
+ if (!VersionChecked) {
+ if (Version.isEmpty()) {
+ auto &M = MsgPackDoc.getRoot().getMap(/*Convert=*/true);
+ auto I = M.find(MsgPackDoc.getNode("amdpal.version"));
+ if (I != M.end())
+ Version = I->second;
+ }
+ VersionChecked = true;
+ }
+ if (Version.isEmpty())
+ // Default to 2.6 if there's no version info
+ return idx ? 6 : 2;
+ return Version.getArray()[idx].getUInt();
+}
+
+unsigned AMDGPUPALMetadata::getPALMajorVersion() { return getPALVersion(0); }
+
+unsigned AMDGPUPALMetadata::getPALMinorVersion() { return getPALVersion(1); }
+
+// Set the field in a given .hardware_stages entry
+void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field, unsigned Val) {
+ getHwStage(CC)[field] = Val;
+}
+
+void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field, bool Val) {
+ getHwStage(CC)[field] = Val;
+}
+
+void AMDGPUPALMetadata::setComputeRegisters(StringRef field, unsigned Val) {
+ getComputeRegisters()[field] = Val;
+}
+
+void AMDGPUPALMetadata::setComputeRegisters(StringRef field, bool Val) {
+ getComputeRegisters()[field] = Val;
+}
+
+msgpack::DocNode *AMDGPUPALMetadata::refComputeRegister(StringRef field) {
+ auto M = getComputeRegisters();
+ auto I = M.find(field);
+ return I == M.end() ? nullptr : &I->second;
+}
+
+bool AMDGPUPALMetadata::checkComputeRegisters(StringRef field, unsigned Val) {
+ if (auto N = refComputeRegister(field))
+ return N->getUInt() == Val;
+ return false;
+}
+
+bool AMDGPUPALMetadata::checkComputeRegisters(StringRef field, bool Val) {
+ if (auto N = refComputeRegister(field))
+ return N->getBool() == Val;
+ return false;
+}
+
+void AMDGPUPALMetadata::setGraphicsRegisters(StringRef field, unsigned Val) {
+ getGraphicsRegisters()[field] = Val;
+}
+
+void AMDGPUPALMetadata::setGraphicsRegisters(StringRef field, bool Val) {
+ getGraphicsRegisters()[field] = Val;
+}
+
+void AMDGPUPALMetadata::setGraphicsRegisters(StringRef field1, StringRef field2,
+ unsigned Val) {
+ getGraphicsRegisters()[field1].getMap(true)[field2] = Val;
+}
+
+void AMDGPUPALMetadata::setGraphicsRegisters(StringRef field1, StringRef field2,
+ bool Val) {
+ getGraphicsRegisters()[field1].getMap(true)[field2] = Val;
+}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index a45a799e38a9..e477904cb81f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -27,6 +27,11 @@ class AMDGPUPALMetadata {
msgpack::DocNode Registers;
msgpack::DocNode HwStages;
msgpack::DocNode ShaderFunctions;
+ bool VersionChecked = false;
+ msgpack::DocNode Version;
+ // From PAL version >= 3.0
+ msgpack::DocNode ComputeRegisters;
+ msgpack::DocNode GraphicsRegisters;
public:
// Read the amdgpu.pal.metadata supplied by the frontend, ready for
@@ -129,6 +134,26 @@ public:
// Set legacy PAL metadata format.
void setLegacy();
+ unsigned getPALMajorVersion();
+ unsigned getPALMinorVersion();
+
+ void setHwStage(unsigned CC, StringRef field, unsigned Val);
+ void setHwStage(unsigned CC, StringRef field, bool Val);
+
+ void setComputeRegisters(StringRef field, unsigned Val);
+ void setComputeRegisters(StringRef field, bool Val);
+
+ // If the field does not exist will return nullptr rather than creating a new
+ // entry (which is the behaviour of the other functions).
+ msgpack::DocNode *refComputeRegister(StringRef field);
+ bool checkComputeRegisters(StringRef field, unsigned Val);
+ bool checkComputeRegisters(StringRef field, bool Val);
+
+ void setGraphicsRegisters(StringRef field, unsigned Val);
+ void setGraphicsRegisters(StringRef field, bool Val);
+ void setGraphicsRegisters(StringRef field1, StringRef field2, unsigned Val);
+ void setGraphicsRegisters(StringRef field1, StringRef field2, bool Val);
+
// Erase all PAL metadata.
void reset();
@@ -151,10 +176,29 @@ private:
// Get (create if necessary) a function in the shader functions map.
msgpack::MapDocNode getShaderFunction(StringRef Name);
+ // Reference (create if necessary) the node for the compute_registers map.
+ msgpack::DocNode &refComputeRegisters();
+
+ // Get (create if necessary) the .compute_registers entry.
+ msgpack::MapDocNode getComputeRegisters();
+
+ // Reference (create if necessary) the node for the graphics registers map.
+ msgpack::DocNode &refGraphicsRegisters();
+
+ // Get (create if necessary) the .graphics_registers entry.
+ msgpack::MapDocNode getGraphicsRegisters();
+
+ // Reference (create if necessary) the node for the hardware_stages map.
+ msgpack::DocNode &refHwStage();
+
// Get (create if necessary) the .hardware_stages entry for the given calling
// convention.
msgpack::MapDocNode getHwStage(unsigned CC);
+ // Get the PAL version major (idx 0) or minor (idx 1). This is an internal
+ // helper for the public wrapper functions that request Major or Minor
+ unsigned getPALVersion(unsigned idx);
+
bool setFromLegacyBlob(StringRef Blob);
bool setFromMsgPackBlob(StringRef Blob);
void toLegacyBlob(std::string &Blob);
diff --git a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
index 71de20223e9f..7d03150bf5b1 100644
--- a/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VINTERPInstructions.td
@@ -23,7 +23,6 @@ class VINTERPe_gfx11 <bits<7> op, VOPProfile P> : Enc64 {
let Inst{31-26} = 0x33; // VOP3P encoding
let Inst{25-24} = 0x1; // VINTERP sub-encoding
- let Inst{23} = 0; // reserved
let Inst{7-0} = vdst;
let Inst{10-8} = waitexp;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 285499ad6984..1a8efc6e3df2 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -120,28 +120,28 @@ multiclass VOP1Inst <string opName, VOPProfile P,
def _e32 : VOP1_Pseudo <opName, P>;
else
// Only for V_MOV_B32
- def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, "v_mov_b32">;
+ def _e32 : VOP1_Pseudo <opName, P>, VOPD_Component<VOPDOp, opName>;
def _e64 : VOP3InstBase <opName, P, node>;
}
- foreach _ = BoolToList<P.HasExtSDWA>.ret in
+ if P.HasExtSDWA then
def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
- foreach _ = BoolToList<P.HasExtDPP>.ret in
+ if P.HasExtDPP then
def _dpp : VOP1_DPP_Pseudo <opName, P>;
let SubtargetPredicate = isGFX11Plus in {
- foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in
+ if P.HasExtVOP3DPP then
def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
} // End SubtargetPredicate = isGFX11Plus
def : MnemonicAlias<opName#"_e32", opName>, LetDummies;
def : MnemonicAlias<opName#"_e64", opName>, LetDummies;
- foreach _ = BoolToList<P.HasExtSDWA>.ret in
+ if P.HasExtSDWA then
def : MnemonicAlias<opName#"_sdwa", opName>, LetDummies;
- foreach _ = BoolToList<P.HasExtDPP>.ret in
+ if P.HasExtDPP then
def : MnemonicAlias<opName#"_dpp", opName, AMDGPUAsmVariants.DPP>, LetDummies;
}
@@ -229,9 +229,9 @@ defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
// TODO: Make profile for this, there is VOP3 encoding also
def V_READFIRSTLANE_B32 :
InstSI <(outs SReg_32:$vdst),
- (ins VRegOrLds_32:$src0),
+ (ins VRegOrLdsSrc_32:$src0),
"v_readfirstlane_b32 $vdst, $src0",
- [(set i32:$vdst, (int_amdgcn_readfirstlane (i32 VRegOrLds_32:$src0)))]>,
+ [(set i32:$vdst, (int_amdgcn_readfirstlane (i32 VRegOrLdsSrc_32:$src0)))]>,
Enc32 {
let isCodeGenOnly = 0;
@@ -266,7 +266,7 @@ defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP1_F64_I32, sint_to_fp>;
}
defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
-defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
+defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, any_fpextend>;
// OMod clears exceptions when set in this instruction
defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_uint>;
@@ -290,15 +290,15 @@ defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_
defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32_SPECIAL_OMOD, fp_to_sint>;
let FPDPRounding = 1, isReMaterializable = 0 in {
let OtherPredicates = [NotHasTrue16BitInsts] in
- defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, fpround>;
+ defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_F16_F32, any_fpround>;
let OtherPredicates = [HasTrue16BitInsts] in
- defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, fpround>;
+ defm V_CVT_F16_F32_t16 : VOP1Inst <"v_cvt_f16_f32_t16", VOPProfile_True16<VOP_F16_F32>, any_fpround>;
} // End FPDPRounding = 1, isReMaterializable = 0
let OtherPredicates = [NotHasTrue16BitInsts] in
-defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, fpextend>;
+defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_F16, any_fpextend>;
let OtherPredicates = [HasTrue16BitInsts] in
-defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, fpextend>;
+defm V_CVT_F32_F16_t16 : VOP1Inst <"v_cvt_f32_f16_t16", VOPProfile_True16<VOP_F32_F16>, any_fpextend>;
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
@@ -321,8 +321,8 @@ defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>;
defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>;
let TRANS = 1, SchedRW = [WriteTrans32] in {
-defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>;
-defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>;
+defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, AMDGPUexp>;
+defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, AMDGPUlog>;
defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>;
defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32, AMDGPUrcp_iflag>;
defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>;
@@ -332,7 +332,7 @@ defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, any_amdgcn_sqrt>;
let TRANS = 1, SchedRW = [WriteTrans64] in {
defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>;
defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>;
-defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, any_amdgcn_sqrt>;
+defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, int_amdgcn_sqrt>;
} // End TRANS = 1, SchedRW = [WriteTrans64]
let TRANS = 1, SchedRW = [WriteTrans32] in {
@@ -487,8 +487,8 @@ let TRANS = 1, SchedRW = [WriteTrans32] in {
defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
defm V_SQRT_F16 : VOP1Inst_t16 <"v_sqrt_f16", VOP_F16_F16, any_amdgcn_sqrt>;
defm V_RSQ_F16 : VOP1Inst_t16 <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>;
-defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, flog2>;
-defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, fexp2>;
+defm V_LOG_F16 : VOP1Inst_t16 <"v_log_f16", VOP_F16_F16, AMDGPUlogf16>;
+defm V_EXP_F16 : VOP1Inst_t16 <"v_exp_f16", VOP_F16_F16, AMDGPUexpf16>;
defm V_SIN_F16 : VOP1Inst_t16 <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
defm V_COS_F16 : VOP1Inst_t16 <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
} // End TRANS = 1, SchedRW = [WriteTrans32]
@@ -528,13 +528,10 @@ def : GCNPat<
>;
}
-def VOP_SWAP_I32 : VOPProfile<[i32, i32, i32, untyped]> {
- let Outs32 = (outs VGPR_32:$vdst, VGPR_32:$vdst1);
- let Ins32 = (ins VGPR_32:$src0, VGPR_32:$src1);
- let Outs64 = Outs32;
+def VOP_SWAP_I32 : VOPProfile<[i32, i32, untyped, untyped]> {
+ let Outs32 = (outs VGPR_32:$vdst, VRegSrc_32:$vdst1);
+ let Ins32 = (ins VRegSrc_32:$src0, VGPR_32:$src1);
let Asm32 = " $vdst, $src0";
- let Asm64 = "";
- let Ins64 = (ins);
}
let SubtargetPredicate = isGFX9Plus in {
@@ -633,7 +630,7 @@ let SubtargetPredicate = isGFX10Plus in {
def VOPProfileAccMov : VOP_NO_EXT<VOP_I32_I32> {
let DstRC = RegisterOperand<AGPR_32>;
- let Src0RC32 = RegisterOperand<AGPR_32>;
+ let Src0RC32 = ARegSrc_32;
let Asm32 = " $vdst, $src0";
}
@@ -847,7 +844,7 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
VOP3e_gfx10<{0, 1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
multiclass VOP1_Real_sdwa_gfx10<bits<9> op> {
- foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae<op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
@@ -855,13 +852,13 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
}
}
multiclass VOP1_Real_dpp_gfx10<bits<9> op> {
- foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
def _dpp_gfx10 : VOP1_DPP16<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> {
let DecoderNamespace = "SDWA10";
}
}
multiclass VOP1_Real_dpp8_gfx10<bits<9> op> {
- foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
def _dpp8_gfx10 : VOP1_DPP8<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")> {
let DecoderNamespace = "DPP8";
}
@@ -1067,17 +1064,17 @@ multiclass VOP1_Real_e32e64_vi <bits<10> op> {
multiclass VOP1_Real_vi <bits<10> op> {
defm NAME : VOP1_Real_e32e64_vi <op>;
- foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then
def _sdwa_vi :
VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
- foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx9 :
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
- foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
def _dpp_vi :
VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
@@ -1241,12 +1238,12 @@ multiclass VOP1_Real_gfx9 <bits<10> op> {
defm NAME : VOP1_Real_e32e64_vi <op>;
}
- foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx9 :
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
- foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
def _dpp_gfx9 :
VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
@@ -1258,14 +1255,14 @@ multiclass VOP1_Real_NoDstSel_SDWA_gfx9 <bits<10> op> {
defm NAME : VOP1_Real_e32e64_vi <op>;
}
- foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx9 :
VOP_SDWA9_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP1_SDWA9Ae <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
let Inst{42-40} = 6;
}
- foreach _ = BoolToList<!cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP1_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
def _dpp_gfx9 :
VOP_DPP_Real<!cast<VOP1_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
VOP1_DPPe<op{7-0}, !cast<VOP1_DPP_Pseudo>(NAME#"_dpp")>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index a1f99ca3aefa..481a162748e6 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -157,7 +157,7 @@ multiclass VOP2Inst_e64<string opName,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
let SubtargetPredicate = isGFX11Plus in {
- foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in
+ if P.HasExtVOP3DPP then
def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
} // End SubtargetPredicate = isGFX11Plus
} // End renamedInGFX9 = GFX9Renamed
@@ -167,7 +167,7 @@ multiclass VOP2Inst_sdwa<string opName,
VOPProfile P,
bit GFX9Renamed = 0> {
let renamedInGFX9 = GFX9Renamed in {
- foreach _ = BoolToList<P.HasExtSDWA>.ret in
+ if P.HasExtSDWA then
def _sdwa : VOP2_SDWA_Pseudo <opName, P>;
} // End renamedInGFX9 = GFX9Renamed
}
@@ -181,7 +181,7 @@ multiclass VOP2Inst<string opName,
VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
VOP2Inst_sdwa<opName, P, GFX9Renamed> {
let renamedInGFX9 = GFX9Renamed in {
- foreach _ = BoolToList<P.HasExtDPP>.ret in
+ if P.HasExtDPP then
def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
}
@@ -227,7 +227,7 @@ multiclass VOP2Inst_VOPD<string opName,
VOP2Inst_e64<opName, P, node, revOp, GFX9Renamed>,
VOP2Inst_sdwa<opName, P, GFX9Renamed> {
let renamedInGFX9 = GFX9Renamed in {
- foreach _ = BoolToList<P.HasExtDPP>.ret in
+ if P.HasExtDPP then
def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
}
@@ -246,11 +246,11 @@ multiclass VOP2bInst <string opName,
let usesCustomInserter = true;
}
- foreach _ = BoolToList<P.HasExtSDWA>.ret in
+ if P.HasExtSDWA then
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
let AsmMatchConverter = "cvtSdwaVOP2b";
}
- foreach _ = BoolToList<P.HasExtDPP>.ret in
+ if P.HasExtDPP then
def _dpp : VOP2_DPP_Pseudo <opName, P>;
} // End Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC]
@@ -258,7 +258,7 @@ multiclass VOP2bInst <string opName,
Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
let SubtargetPredicate = isGFX11Plus in {
- foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in
+ if P.HasExtVOP3DPP then
def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
} // End SubtargetPredicate = isGFX11Plus
}
@@ -297,12 +297,12 @@ multiclass
Commutable_REV<revOp#"_e32", !eq(revOp, opName)>,
VOPD_Component<VOPDOp, VOPDName>;
- foreach _ = BoolToList<P.HasExtSDWA>.ret in
+ if P.HasExtSDWA then
def _sdwa : VOP2_SDWA_Pseudo <opName, P> {
let AsmMatchConverter = "cvtSdwaVOP2e";
}
- foreach _ = BoolToList<P.HasExtDPP>.ret in
+ if P.HasExtDPP then
def _dpp : VOP2_DPP_Pseudo <opName, P>;
}
@@ -312,7 +312,7 @@ multiclass
}
let SubtargetPredicate = isGFX11Plus in {
- foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in
+ if P.HasExtVOP3DPP then
def _e64_dpp : VOP3_DPP_Pseudo <opName, P>;
} // End SubtargetPredicate = isGFX11Plus
}
@@ -357,7 +357,7 @@ class VOP_MADK_Base<ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
}
class VOP_MADAK <ValueType vt> : VOP_MADK_Base<vt> {
- field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
+ field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16);
field dag Ins32 = !if(!eq(vt.Size, 32),
(ins VSrc_f32_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm),
(ins VSrc_f16_Deferred:$src0, VGPR_32:$src1, ImmOpType:$imm));
@@ -383,7 +383,7 @@ def VOP_MADAK_F16_t16 : VOP_MADAK <f16> {
def VOP_MADAK_F32 : VOP_MADAK <f32>;
class VOP_MADMK <ValueType vt> : VOP_MADK_Base<vt> {
- field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
+ field Operand ImmOpType = !if(!eq(vt.Size, 32), KImmFP32, KImmFP16);
field dag Ins32 = !if(!eq(vt.Size, 32),
(ins VSrc_f32_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1),
(ins VSrc_f16_Deferred:$src0, ImmOpType:$imm, VGPR_32:$src1));
@@ -660,7 +660,7 @@ def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>;
def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> {
let Outs32 = (outs SReg_32:$vdst);
let Outs64 = Outs32;
- let Ins32 = (ins VRegOrLds_32:$src0, SCSrc_b32:$src1);
+ let Ins32 = (ins VRegOrLdsSrc_32:$src0, SCSrc_b32:$src1);
let Ins64 = Ins32;
let Asm32 = " $vdst, $src0, $src1";
let Asm64 = Asm32;
@@ -764,19 +764,20 @@ defm V_SUBREV_U32 : VOP2Inst <"v_subrev_u32", VOP_I32_I32_I32_ARITH, null_frag,
let isConvergent = 1, Uses = []<Register> in {
def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
[(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]>;
-
-let Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
+let IsNeverUniform = 1, Constraints = "$vdst = $vdst_in", DisableEncoding="$vdst_in" in {
def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE,
[(set i32:$vdst, (int_amdgcn_writelane i32:$src0, i32:$src1, i32:$vdst_in))]>;
-} // End $vdst = $vdst_in, DisableEncoding $vdst_in
+} // End IsNeverUniform, $vdst = $vdst_in, DisableEncoding $vdst_in
} // End isConvergent = 1
let isReMaterializable = 1 in {
defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>;
defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32, add_ctpop>;
+let IsNeverUniform = 1 in {
defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>;
defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>;
-defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>;
+} // End IsNeverUniform = 1
+defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, any_fldexp>;
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_V2I16_F32_F32, AMDGPUpknorm_i16_f32>;
@@ -862,9 +863,18 @@ def : divergent_i64_BinOp <xor, V_XOR_B32_e64>;
// 16-Bit Operand Instructions
//===----------------------------------------------------------------------===//
-def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_I32> {
- // The ldexp.f16 intrinsic expects a i32 src1 operand, though the hardware
- // encoding treats src1 as an f16
+// The ldexp.f16 intrinsic expects a integer src1 operand, though the hardware
+// encoding treats src1 as an f16
+def LDEXP_F16_VOPProfile : VOPProfile <[f16, f16, f16, untyped]> {
+ let Src1Mod = Int32InputMods;
+ let Src1ModDPP = IntVRegInputMods;
+ let Src1ModVOP3DPP = IntVRegInputMods;
+ // SDWA sext is the only modifier allowed.
+ let HasSrc1IntMods = 1;
+ let HasSrc1FloatMods = 0;
+ let Src1ModSDWA = Int16SDWAInputMods;
+}
+def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_F16> {
let Src1RC32 = RegisterOperand<VGPR_32_Lo128>;
let Src1DPP = VGPR_32_Lo128;
let Src1ModDPP = IntT16VRegInputMods;
@@ -873,9 +883,9 @@ def LDEXP_F16_VOPProfile_True16 : VOPProfile_True16<VOP_F16_F16_I32> {
let isReMaterializable = 1 in {
let FPDPRounding = 1 in {
let SubtargetPredicate = NotHasTrue16BitInsts, OtherPredicates = [Has16BitInsts] in
- defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
+ defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", LDEXP_F16_VOPProfile>;
let SubtargetPredicate = HasTrue16BitInsts in
- defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16, AMDGPUldexp>;
+ defm V_LDEXP_F16_t16 : VOP2Inst <"v_ldexp_f16_t16", LDEXP_F16_VOPProfile_True16>;
} // End FPDPRounding = 1
// FIXME VOP3 Only instructions. NFC using VOPProfile_True16 for these until a planned change to use a new register class for VOP3 encoded True16 instuctions
defm V_LSHLREV_B16 : VOP2Inst_e64_t16 <"v_lshlrev_b16", VOP_I16_I16_I16, clshl_rev_16>;
@@ -898,6 +908,21 @@ defm V_MIN_I16 : VOP2Inst_e64_t16 <"v_min_i16", VOP_I16_I16_I16, smin>;
} // End isCommutable = 1
} // End isReMaterializable = 1
+class LDEXP_F16_Pat <SDPatternOperator op, VOP_Pseudo inst, VOPProfile P = inst.Pfl> : GCNPat <
+ (P.DstVT (op (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+ (i16 (VOP3Mods0 P.Src1VT:$src1, i32:$src1_modifiers)))),
+ (inst $src0_modifiers, $src0,
+ $src1_modifiers, $src1,
+ $clamp, /* clamp */
+ $omod /* omod */)
+>;
+
+let OtherPredicates = [NotHasTrue16BitInsts] in
+def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_e64>;
+
+let OtherPredicates = [HasTrue16BitInsts] in
+def : LDEXP_F16_Pat<any_fldexp, V_LDEXP_F16_t16_e64>;
+
let SubtargetPredicate = isGFX11Plus in {
let isCommutable = 1 in {
defm V_AND_B16_t16 : VOP2Inst_e64 <"v_and_b16_t16", VOPProfile_True16<VOP_I16_I16_I16>, and>;
@@ -1266,13 +1291,13 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
VOP3e_gfx11<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
multiclass VOP2_Real_dpp_gfx11<bits<6> op> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX11> {
let DecoderNamespace = "DPPGFX11";
}
}
multiclass VOP2_Real_dpp8_gfx11<bits<6> op> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
def _dpp8_gfx11 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
let DecoderNamespace = "DPP8GFX11";
}
@@ -1302,7 +1327,7 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
multiclass VOP2_Real_dpp_with_name_gfx11<bits<6> op, string opName,
string asmName> {
defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
- foreach _ = BoolToList<ps.Pfl.HasExtDPP>.ret in
+ if ps.Pfl.HasExtDPP then
def _dpp_gfx11 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"),
SIEncodingFamily.GFX11> {
let AsmString = asmName # ps.Pfl.AsmDPP16;
@@ -1312,7 +1337,7 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
multiclass VOP2_Real_dpp8_with_name_gfx11<bits<6> op, string opName,
string asmName> {
defvar ps = !cast<VOP2_Pseudo>(opName#"_e32");
- foreach _ = BoolToList<ps.Pfl.HasExtDPP>.ret in
+ if ps.Pfl.HasExtDPP then
def _dpp8_gfx11 : VOP2_DPP8<op, ps> {
let AsmString = asmName # ps.Pfl.AsmDPP8;
let DecoderNamespace = "DPP8GFX11";
@@ -1329,14 +1354,14 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
}
}
multiclass VOP2be_Real_dpp_gfx11<bits<6> op, string opName, string asmName> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
def _dpp_gfx11 :
VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX11, asmName> {
string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
let AsmString = asmName # !subst(", vcc", "", AsmDPP);
let DecoderNamespace = "DPPGFX11";
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
def _dpp_w32_gfx11 :
Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
@@ -1344,7 +1369,7 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
def _dpp_w64_gfx11 :
Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
@@ -1354,14 +1379,14 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
}
}
multiclass VOP2be_Real_dpp8_gfx11<bits<6> op, string opName, string asmName> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
def _dpp8_gfx11 :
VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
let DecoderNamespace = "DPP8GFX11";
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
def _dpp8_w32_gfx11 :
VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
@@ -1369,7 +1394,7 @@ let AssemblerPredicate = isGFX11Only, DecoderNamespace = "GFX11" in {
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtDPP then
def _dpp8_w64_gfx11 :
VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
@@ -1477,19 +1502,19 @@ defm V_FMAMK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x037, "v_fmamk_
defm V_FMAAK_F16_t16 : VOP2Only_Real_MADK_gfx11_with_name<0x038, "v_fmaak_f16">;
// VOP3 only.
-defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11<0x25d>;
-defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11<0x31c>;
-defm V_BFM_B32 : VOP3Only_Realtriple_gfx11<0x31d>;
-defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11<0x31e>;
-defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11<0x31f>;
-defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11<0x320>;
-defm V_CVT_PKNORM_I16_F32 : VOP3Only_Realtriple_gfx11<0x321>;
-defm V_CVT_PKNORM_U16_F32 : VOP3Only_Realtriple_gfx11<0x322>;
-defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11<0x323>;
-defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11<0x324>;
-defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x300>;
-defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x301>;
-defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x302>;
+defm V_CNDMASK_B16 : VOP3Only_Realtriple_gfx11<0x25d>;
+defm V_LDEXP_F32 : VOP3Only_Realtriple_gfx11<0x31c>;
+defm V_BFM_B32 : VOP3Only_Realtriple_gfx11<0x31d>;
+defm V_BCNT_U32_B32 : VOP3Only_Realtriple_gfx11<0x31e>;
+defm V_MBCNT_LO_U32_B32 : VOP3Only_Realtriple_gfx11<0x31f>;
+defm V_MBCNT_HI_U32_B32 : VOP3Only_Realtriple_gfx11<0x320>;
+defm V_CVT_PK_NORM_I16_F32 : VOP3Only_Realtriple_with_name_gfx11<0x321, "V_CVT_PKNORM_I16_F32", "v_cvt_pk_norm_i16_f32">;
+defm V_CVT_PK_NORM_U16_F32 : VOP3Only_Realtriple_with_name_gfx11<0x322, "V_CVT_PKNORM_U16_F32", "v_cvt_pk_norm_u16_f32">;
+defm V_CVT_PK_U16_U32 : VOP3Only_Realtriple_gfx11<0x323>;
+defm V_CVT_PK_I16_I32 : VOP3Only_Realtriple_gfx11<0x324>;
+defm V_ADD_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x300>;
+defm V_SUB_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x301>;
+defm V_SUBREV_CO_U32 : VOP3beOnly_Realtriple_gfx11<0x302>;
let SubtargetPredicate = isGFX11Plus in {
defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx11>;
@@ -1533,7 +1558,7 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
VOP3e_gfx10<{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
}
multiclass VOP2_Real_sdwa_gfx10<bits<6> op> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
@@ -1541,13 +1566,13 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
}
}
multiclass VOP2_Real_dpp_gfx10<bits<6> op> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX10> {
let DecoderNamespace = "SDWA10";
}
}
multiclass VOP2_Real_dpp8_gfx10<bits<6> op> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP>.ret in
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExt32BitDPP then
def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(NAME#"_e32")> {
let DecoderNamespace = "DPP8";
}
@@ -1576,7 +1601,7 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
let DecoderNamespace = "SDWA10" in {
multiclass VOP2_Real_sdwa_gfx10_with_name<bits<6> op, string opName,
string asmName> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
@@ -1586,7 +1611,7 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
}
multiclass VOP2_Real_dpp_gfx10_with_name<bits<6> op, string opName,
string asmName> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
def _dpp_gfx10 : VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10> {
VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
let AsmString = asmName # ps.Pfl.AsmDPP16;
@@ -1594,7 +1619,7 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
}
multiclass VOP2_Real_dpp8_gfx10_with_name<bits<6> op, string opName,
string asmName> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
def _dpp8_gfx10 : VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
VOP2_Pseudo ps = !cast<VOP2_Pseudo>(opName#"_e32");
let AsmString = asmName # ps.Pfl.AsmDPP8;
@@ -1622,7 +1647,7 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
}
}
multiclass VOP2be_Real_sdwa_gfx10<bits<6> op, string opName, string asmName> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
@@ -1630,7 +1655,7 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
let AsmString = asmName # !subst(", vcc", "", Ps.AsmOperands);
let DecoderNamespace = "SDWA10";
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_w32_gfx10 :
Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
@@ -1640,7 +1665,7 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
let DecoderNamespace = "SDWA10";
let WaveSizePredicate = isWave32;
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_w64_gfx10 :
Base_VOP_SDWA10_Real<!cast<VOP2_SDWA_Pseudo>(opName#"_sdwa")>,
VOP2_SDWA9Ae<op{5-0}, !cast<VOP2_SDWA_Pseudo>(opName#"_sdwa").Pfl> {
@@ -1652,14 +1677,14 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
}
}
multiclass VOP2be_Real_dpp_gfx10<bits<6> op, string opName, string asmName> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
def _dpp_gfx10 :
VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), SIEncodingFamily.GFX10, asmName> {
string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
let AsmString = asmName # !subst(", vcc", "", AsmDPP);
let DecoderNamespace = "SDWA10";
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
def _dpp_w32_gfx10 :
Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
@@ -1667,7 +1692,7 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
def _dpp_w64_gfx10 :
Base_VOP2_DPP16<op, !cast<VOP2_DPP_Pseudo>(opName#"_dpp"), asmName> {
string AsmDPP = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP16;
@@ -1677,14 +1702,14 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
}
}
multiclass VOP2be_Real_dpp8_gfx10<bits<6> op, string opName, string asmName> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
def _dpp8_gfx10 :
VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
let AsmString = asmName # !subst(", vcc", "", AsmDPP8);
let DecoderNamespace = "DPP8";
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
def _dpp8_w32_gfx10 :
VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
@@ -1692,7 +1717,7 @@ let AssemblerPredicate = isGFX10Only, DecoderNamespace = "GFX10" in {
let isAsmParserOnly = 1;
let WaveSizePredicate = isWave32;
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP>.ret in
+ if !cast<VOP2_Pseudo>(opName#"_e32").Pfl.HasExt32BitDPP then
def _dpp8_w64_gfx10 :
VOP2_DPP8<op, !cast<VOP2_Pseudo>(opName#"_e32")> {
string AsmDPP8 = !cast<VOP2_Pseudo>(opName#"_e32").Pfl.AsmDPP8;
@@ -2014,14 +2039,14 @@ multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
} // End AssemblerPredicate = isGFX8GFX9, DecoderNamespace = "GFX8"
multiclass VOP2_SDWA_Real <bits<6> op> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then
def _sdwa_vi :
VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
}
multiclass VOP2_SDWA9_Real <bits<6> op> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx9 :
VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
@@ -2044,14 +2069,14 @@ multiclass VOP2be_Real_e32e64_vi_only <bits<6> op, string OpName, string AsmName
let AsmString = AsmName # ps.AsmOperands;
let DecoderNamespace = "GFX8";
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA>.ret in
+ if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA then
def _sdwa_vi :
VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
let AsmString = AsmName # ps.AsmOperands;
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then
def _dpp_vi :
VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.VI>,
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
@@ -2078,14 +2103,14 @@ multiclass VOP2be_Real_e32e64_gfx9 <bits<6> op, string OpName, string AsmName> {
let AsmString = AsmName # ps.AsmOperands;
let DecoderNamespace = "GFX9";
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx9 :
VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa")>,
VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa").Pfl> {
VOP2_SDWA_Pseudo ps = !cast<VOP2_SDWA_Pseudo>(OpName#"_sdwa");
let AsmString = AsmName # ps.AsmOperands;
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP2_Pseudo>(OpName#"_e32").Pfl.HasExtDPP then
def _dpp_gfx9 :
VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(OpName#"_dpp"), SIEncodingFamily.GFX9>,
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(OpName#"_dpp")> {
@@ -2106,12 +2131,12 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
VOP3e_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
let DecoderNamespace = "GFX9";
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx9 :
VOP_SDWA9_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
VOP2_SDWA9Ae <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl> {
}
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
def _dpp_gfx9 :
VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX9>,
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
@@ -2124,7 +2149,7 @@ multiclass VOP2_Real_e32e64_gfx9 <bits<6> op> {
multiclass VOP2_Real_e32e64_vi <bits<6> op> :
Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op>, VOP2_SDWA9_Real<op> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
def _dpp_vi :
VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.VI>,
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
@@ -2271,7 +2296,7 @@ let AssemblerPredicate = isGFX90APlus, DecoderNamespace = "GFX90A" in {
multiclass VOP2_Real_e32e64_gfx90a <bits<6> op> :
Base_VOP2_Real_e32e64_gfx90a<op> {
- foreach _ = BoolToList<!cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP>.ret in
+ if !cast<VOP2_Pseudo>(NAME#"_e32").Pfl.HasExtDPP then
def _dpp_gfx90a :
VOP_DPP_Real<!cast<VOP2_DPP_Pseudo>(NAME#"_dpp"), SIEncodingFamily.GFX90A>,
VOP2_DPPe<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")> {
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 848d1ad1f6c7..c0e0ac1b4ec8 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -61,7 +61,7 @@ class VOP3Interp<string OpName, VOPProfile P, list<dag> pattern = []> :
def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> {
let Src0Mod = FPVRegInputMods;
let Ins64 = (ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
- Attr:$attr, AttrChan:$attrchan,
+ InterpAttr:$attr, InterpAttrChan:$attrchan,
clampmod0:$clamp, omod0:$omod);
let Asm64 = "$vdst, $src0_modifiers, $attr$attrchan$clamp$omod";
@@ -69,7 +69,7 @@ def VOP3_INTERP : VOPProfile<[f32, f32, i32, untyped]> {
def VOP3_INTERP_MOV : VOPProfile<[f32, i32, i32, untyped]> {
let Ins64 = (ins InterpSlot:$src0,
- Attr:$attr, AttrChan:$attrchan,
+ InterpAttr:$attr, InterpAttrChan:$attrchan,
clampmod0:$clamp, omod0:$omod);
let Asm64 = "$vdst, $src0, $attr$attrchan$clamp$omod";
@@ -90,16 +90,16 @@ class getInterp16Ins <bit HasSrc2, bit HasOMod,
dag ret = !if(HasSrc2,
!if(HasOMod,
(ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
- Attr:$attr, AttrChan:$attrchan,
+ InterpAttr:$attr, InterpAttrChan:$attrchan,
Src2Mod:$src2_modifiers, VRegSrc_32:$src2,
highmod:$high, clampmod0:$clamp, omod0:$omod),
(ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
- Attr:$attr, AttrChan:$attrchan,
+ InterpAttr:$attr, InterpAttrChan:$attrchan,
Src2Mod:$src2_modifiers, VRegSrc_32:$src2,
highmod:$high, clampmod0:$clamp)
),
(ins Src0Mod:$src0_modifiers, VRegSrc_32:$src0,
- Attr:$attr, AttrChan:$attrchan,
+ InterpAttr:$attr, InterpAttrChan:$attrchan,
highmod:$high, clampmod0:$clamp, omod0:$omod)
);
}
@@ -219,7 +219,7 @@ defm V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", DIV_FIXUP_F32_PROF, AMDGPUdi
let SchedRW = [WriteDoubleAdd], FPDPRounding = 1 in {
defm V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
- defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp>;
+ defm V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, any_fldexp>;
} // End SchedRW = [WriteDoubleAdd], FPDPRounding = 1
} // End isReMaterializable = 1
@@ -263,7 +263,7 @@ let SchedRW = [Write64Bit] in {
def : GCNPat<
(i32 (DivergentUnaryFrag<sext> i16:$src)),
- (i32 (V_BFE_I32_e64 $src, (S_MOV_B32 (i32 0)), (S_MOV_B32 (i32 0x10))))
+ (i32 (V_BFE_I32_e64 i16:$src, (i32 0), (i32 0x10)))
>;
let isReMaterializable = 1 in {
@@ -308,11 +308,11 @@ let FPDPRounding = 1 in {
defm V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fma>;
} // End Predicates = [Has16BitInsts, isGFX8Only]
- let renamedInGFX9 = 1, Predicates = [Has16BitInsts, isGFX9Plus] in {
+ let renamedInGFX9 = 1, SubtargetPredicate = isGFX9Plus in {
defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst <"v_div_fixup_f16_gfx9",
VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUdiv_fixup>;
defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>;
- } // End renamedInGFX9 = 1, Predicates = [Has16BitInsts, isGFX9Plus]
+ } // End renamedInGFX9 = 1, SubtargetPredicate = isGFX9Plus
} // End FPDPRounding = 1
let SubtargetPredicate = Has16BitInsts, isCommutable = 1 in {
@@ -381,36 +381,43 @@ def V_INTERP_P2_F32_e64 : VOP3Interp <"v_interp_p2_f32", VOP3_INTERP>;
def V_INTERP_MOV_F32_e64 : VOP3Interp <"v_interp_mov_f32", VOP3_INTERP_MOV>;
} // End SubtargetPredicate = isGFX8Plus, Uses = [MODE, M0, EXEC], OtherPredicates = [isNotGFX90APlus]
-let Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in {
+// Note: 16-bit instructions produce a 0 result in the high 16-bits
+// on GFX8 and GFX9 and preserve high 16 bits on GFX10+
+multiclass Arithmetic_i16_0Hi_TernaryPats <SDPatternOperator op, Instruction inst> {
+ def : GCNPat<
+ (i32 (zext (op i16:$src0, i16:$src1, i16:$src2))),
+ (inst VSrc_b16:$src0, VSrc_b16:$src1, VSrc_b16:$src2)
+ >;
+}
-multiclass Ternary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
- Instruction inst> {
-def : GCNPat <
- (op2 (op1 i16:$src0, i16:$src1), i16:$src2),
- (inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
->;
+let Predicates = [Has16BitInsts, isGFX8GFX9] in {
+defm : Arithmetic_i16_0Hi_TernaryPats<imad, V_MAD_U16_e64>;
+}
+
+let Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9] in {
+// FIXME: Should be able to just pass imad to the instruction
+// definition pattern, but the implied clamp input interferes.
+multiclass Ternary_i16_Pats <SDPatternOperator op, Instruction inst> {
+ def : GCNPat <
+ (op i16:$src0, i16:$src1, i16:$src2),
+ (inst i16:$src0, i16:$src1, i16:$src2, (i1 0))
+ >;
}
-defm: Ternary_i16_Pats<mul, add, V_MAD_U16_e64>;
-defm: Ternary_i16_Pats<mul, add, V_MAD_I16_e64>;
+defm: Ternary_i16_Pats<imad, V_MAD_U16_e64>;
} // End Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9]
-let Predicates = [Has16BitInsts, isGFX10Plus] in {
-multiclass Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
- Instruction inst> {
-def : GCNPat <
+class Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
+ Instruction inst> : GCNPat <
(op2 (op1 i16:$src0, i16:$src1), i16:$src2),
(inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
>;
-}
-
-defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64>;
-defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_I16_gfx9_e64>;
-
+let Predicates = [Has16BitInsts, isGFX10Plus] in {
+def: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64>;
} // End Predicates = [Has16BitInsts, isGFX10Plus]
class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
@@ -673,11 +680,19 @@ def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3
let HasExtDPP = 0;
}
+def opsel_i1timm : SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(
+ N->getZExtValue() ? SISrcMods::OP_SEL_0 : SISrcMods::NONE,
+ SDLoc(N), MVT::i32);
+}]>;
+def gi_opsel_i1timm : GICustomOperandRenderer<"renderOpSelTImm">,
+ GISDNodeXFormEquiv<opsel_i1timm>;
+
class PermlanePat<SDPatternOperator permlane,
Instruction inst> : GCNPat<
(permlane i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2,
timm:$fi, timm:$bc),
- (inst (as_i1timm $fi), VGPR_32:$src0, (as_i1timm $bc),
+ (inst (opsel_i1timm $fi), VGPR_32:$src0, (opsel_i1timm $bc),
SCSrc_b32:$src1, 0, SCSrc_b32:$src2, VGPR_32:$vdst_in)
>;
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index da12515c817b..71e09611e74e 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -35,7 +35,7 @@ class VOP3P_Mix_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR,
FP16InputMods:$src2_modifiers, VCSrc_f16:$src2);
dag dpp_srcs =
(ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0,
- FPVRegInputMods:$src1_modifiers, VGPRSrc_32:$src1,
+ FPVRegInputMods:$src1_modifiers, VRegSrc_32:$src1,
FP16InputMods:$src2_modifiers, VCSrc_f16:$src2);
// FIXME: clampmod0 misbehaves with the non-default vdst_in
@@ -142,9 +142,34 @@ def : VOP3PSatPat<usubsat, V_PK_SUB_U16>;
def : VOP3PSatPat<ssubsat, V_PK_SUB_I16>;
} // End SubtargetPredicate = HasVOP3PInsts
+// TODO: Make sure we're doing the right thing with denormals. Note
+// that FMA and MAD will differ.
multiclass MadFmaMixPats<SDPatternOperator fma_like,
+ Instruction mix_inst,
Instruction mixlo_inst,
Instruction mixhi_inst> {
+ // At least one of the operands needs to be an fpextend of an f16
+ // for this to be worthwhile, so we need three patterns here.
+ // TODO: Could we use a predicate to inspect src1/2/3 instead?
+ def : GCNPat <
+ (f32 (fma_like (f32 (VOP3PMadMixModsExt f16:$src0, i32:$src0_mods)),
+ (f32 (VOP3PMadMixMods f16:$src1, i32:$src1_mods)),
+ (f32 (VOP3PMadMixMods f16:$src2, i32:$src2_mods)))),
+ (mix_inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2,
+ DSTCLAMP.NONE)>;
+ def : GCNPat <
+ (f32 (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_mods)),
+ (f32 (VOP3PMadMixModsExt f16:$src1, i32:$src1_mods)),
+ (f32 (VOP3PMadMixMods f32:$src2, i32:$src2_mods)))),
+ (mix_inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2,
+ DSTCLAMP.NONE)>;
+ def : GCNPat <
+ (f32 (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_mods)),
+ (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_mods)),
+ (f32 (VOP3PMadMixModsExt f16:$src2, i32:$src2_mods)))),
+ (mix_inst $src0_mods, $src0, $src1_mods, $src1, $src2_mods, $src2,
+ DSTCLAMP.NONE)>;
+
def : GCNPat <
(f16 (fpround (fma_like (f32 (VOP3PMadMixMods f16:$src0, i32:$src0_modifiers)),
(f32 (VOP3PMadMixMods f16:$src1, i32:$src1_modifiers)),
@@ -201,9 +226,29 @@ multiclass MadFmaMixPats<SDPatternOperator fma_like,
DSTCLAMP.ENABLE,
(i32 (IMPLICIT_DEF)))))
>;
+
+ def : GCNPat <
+ (f16 (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers))))),
+ (mixlo_inst $src0_modifiers, $src0,
+ $src1_modifiers, $src1,
+ (i32 0), (i32 0),
+ DSTCLAMP.NONE,
+ (i32 (IMPLICIT_DEF)))
+ >;
+
+ def : GCNPat <
+ (build_vector f16:$elt0, (fpround (fmul (f32 (VOP3PMadMixMods f32:$src0, i32:$src0_modifiers)),
+ (f32 (VOP3PMadMixMods f32:$src1, i32:$src1_modifiers))))),
+ (v2f16 (mixhi_inst $src0_modifiers, $src0,
+ $src1_modifiers, $src1,
+ (i32 0), (i32 0),
+ DSTCLAMP.NONE,
+ VGPR_32:$elt0))
+ >;
}
-let SubtargetPredicate = HasMadMixInsts in {
+let SubtargetPredicate = HasMadMixInsts, OtherPredicates = [NoFP32Denormals] in {
// These are VOP3a-like opcodes which accept no omod.
// Size of src arguments (16/32) is controlled by op_sel.
@@ -222,8 +267,8 @@ defm V_MAD_MIXHI_F16 : VOP3_VOP3PInst<"v_mad_mixhi_f16", VOP3P_Mix_Profile<VOP_F
} // End FPDPRounding = 1
}
-defm : MadFmaMixPats<fmad, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
-} // End SubtargetPredicate = HasMadMixInsts
+defm : MadFmaMixPats<fmad, V_MAD_MIX_F32, V_MAD_MIXLO_F16, V_MAD_MIXHI_F16>;
+} // End SubtargetPredicate = HasMadMixInsts, OtherPredicates = [NoFP32Denormals]
// Essentially the same as the mad_mix versions
@@ -243,7 +288,7 @@ defm V_FMA_MIXHI_F16 : VOP3_VOP3PInst<"v_fma_mixhi_f16", VOP3P_Mix_Profile<VOP_F
} // End FPDPRounding = 1
}
-defm : MadFmaMixPats<fma, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
+defm : MadFmaMixPats<fma, V_FMA_MIX_F32, V_FMA_MIXLO_F16, V_FMA_MIXHI_F16>;
}
// Defines patterns that extract signed 4bit from each Idx[0].
@@ -337,11 +382,12 @@ defm V_DOT2_U32_U16 : VOP3PInst<"v_dot2_u32_u16",
} // End SubtargetPredicate = HasDot2Insts
-let SubtargetPredicate = HasDot7Insts in {
-
+let SubtargetPredicate = HasDot10Insts in
defm V_DOT2_F32_F16 : VOP3PInst<"v_dot2_f32_f16",
VOP3P_Profile<VOP_F32_V2F16_V2F16_F32, VOP3_REGULAR, /*HasDPP*/ 1>,
AMDGPUfdot2, 1/*ExplicitClamp*/>;
+
+let SubtargetPredicate = HasDot7Insts in {
defm V_DOT4_U32_U8 : VOP3PInst<"v_dot4_u32_u8",
VOP3P_Profile<VOP_I32_I32_I32_I32, VOP3_PACKED>, int_amdgcn_udot4, 1>;
defm V_DOT8_U32_U4 : VOP3PInst<"v_dot8_u32_u4",
@@ -581,7 +627,7 @@ multiclass MAIInst<string OpName, string P, SDPatternOperator node,
MFMATable<0, NAME # "_vgprcd_e64">;
}
- foreach _ = BoolToList<NoDstOverlap>.ret in {
+ if NoDstOverlap then {
let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""),
isConvertibleToThreeAddress = NoDstOverlap,
Mnemonic = OpName in {
@@ -989,7 +1035,7 @@ multiclass VOP3P_Real_MFMA_gfx940_aliases<string NameFrom, string NameTo, string
VOPProfile Pfl_ACD = PS_ACD.Pfl,
VOPProfile Pfl_VCD = PS_VCD.Pfl> {
let Predicates = [isGFX940Plus] in {
- foreach _ = BoolToList<!ne(NameFrom, NameTo)>.ret in {
+ if !ne(NameFrom, NameTo) then {
def : InstAlias <NameTo # " " # PS_ACD.AsmOperands,
(!cast<VOP3P_Real>(Op # "_gfx940_acd") Pfl_ACD.DstRC:$vdst,
Pfl_ACD.Src0RC64:$src0, Pfl_ACD.Src1RC64:$src1, Pfl_ACD.Src2RC64:$src2,
@@ -1017,7 +1063,7 @@ multiclass VOP3P_Real_MFMA_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(N
defm : VOP3P_Real_MFMA_gfx940_aliases<Name, PS_ACD.Mnemonic, NAME>;
- foreach _ = BoolToList<!ne(!subst("_1k", "", PS_ACD.Mnemonic), PS_ACD.Mnemonic)>.ret in
+ if !ne(!subst("_1k", "", PS_ACD.Mnemonic), PS_ACD.Mnemonic) then
defm : VOP3P_Real_MFMA_gfx940_aliases<Name, !subst("_1k", "", PS_ACD.Mnemonic), NAME>;
}
@@ -1081,28 +1127,16 @@ defm V_FMA_MIXHI_F16 : VOP3P_Real_vi <0x22>;
}
-let SubtargetPredicate = HasDot2Insts in {
-
defm V_DOT2_I32_I16 : VOP3P_Real_vi <0x26>;
defm V_DOT2_U32_U16 : VOP3P_Real_vi <0x27>;
-} // End SubtargetPredicate = HasDot2Insts
-
-let SubtargetPredicate = HasDot7Insts in {
-
defm V_DOT2_F32_F16 : VOP3P_Real_vi <0x23>;
defm V_DOT4_U32_U8 : VOP3P_Real_vi <0x29>;
defm V_DOT8_U32_U4 : VOP3P_Real_vi <0x2b>;
-} // End SubtargetPredicate = HasDot7Insts
-
-let SubtargetPredicate = HasDot1Insts in {
-
defm V_DOT4_I32_I8 : VOP3P_Real_vi <0x28>;
defm V_DOT8_I32_I4 : VOP3P_Real_vi <0x2a>;
-} // End SubtargetPredicate = HasDot1Insts
-
let SubtargetPredicate = HasMAIInsts in {
defm V_ACCVGPR_READ_B32 : VOP3P_Real_MAI <0x58>;
@@ -1225,24 +1259,12 @@ defm V_FMA_MIX_F32 : VOP3P_Real_gfx10_gfx11_Triple <0x20>;
defm V_FMA_MIXLO_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x21>;
defm V_FMA_MIXHI_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x22>;
-let SubtargetPredicate = HasDot2Insts in {
-
defm V_DOT2_I32_I16 : VOP3P_Real_gfx10 <0x14>;
defm V_DOT2_U32_U16 : VOP3P_Real_gfx10 <0x15>;
-} // End SubtargetPredicate = HasDot2Insts
-
-let SubtargetPredicate = HasDot7Insts in {
-
defm V_DOT2_F32_F16 : VOP3P_Real_gfx10_gfx11_Triple <0x13>;
defm V_DOT4_U32_U8 : VOP3P_Real_gfx10_gfx11 <0x17>;
defm V_DOT8_U32_U4 : VOP3P_Real_gfx10_gfx11 <0x19>;
-} // End SubtargetPredicate = HasDot7Insts
-
-let SubtargetPredicate = HasDot1Insts in {
-
defm V_DOT4_I32_I8 : VOP3P_Real_gfx10 <0x16>;
defm V_DOT8_I32_I4 : VOP3P_Real_gfx10 <0x18>;
-
-} // End SubtargetPredicate = HasDot1Insts
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 439ca40ae3fb..6fc3d0957dce 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -299,7 +299,7 @@ multiclass VOPC_Pseudos <string opName,
let isCommutable = 1;
}
- foreach _ = BoolToList<P.HasExtSDWA>.ret in
+ if P.HasExtSDWA then
def _sdwa : VOPC_SDWA_Pseudo <opName, P> {
let Defs = !if(DefExec, [EXEC], []);
let SchedRW = P.Schedule;
@@ -360,7 +360,7 @@ multiclass VOPCX_Pseudos <string opName,
let IsVCMPX = 1;
}
- foreach _ = BoolToList<P_NoSDst.HasExtSDWA>.ret in
+ if P_NoSDst.HasExtSDWA then
def _nosdst_sdwa : VOPC_SDWA_Pseudo <opName#"_nosdst", P_NoSDst> {
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
@@ -770,7 +770,7 @@ class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType src0VT, ValueType
// DPP8 forbids modifiers and can inherit from VOPC_Profile
let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
- dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VGPRSrc_32:$src1);
+ dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VRegSrc_32:$src1);
let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel),
(ins)));
let AsmVOP3Base = "$sdst, $src0_modifiers, $src1";
@@ -831,7 +831,7 @@ class getVOPCClassPat64 <VOPProfile P> {
list<dag> ret =
[(set i1:$sdst,
(AMDGPUfp_class
- (P.Src0VT (VOP3Mods P.Src0VT:$src0, i32:$src0_modifiers)),
+ (P.Src0VT (VOP3ModsNonCanonicalizing P.Src0VT:$src0, i32:$src0_modifiers)),
i32:$src1))];
}
@@ -854,7 +854,7 @@ multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec,
let SchedRW = p.Schedule;
}
- foreach _ = BoolToList<p.HasExtSDWA>.ret in
+ if p.HasExtSDWA then
def _sdwa : VOPC_SDWA_Pseudo <opName, p> {
let Defs = !if(DefExec, !if(DefVcc, [VCC, EXEC], [EXEC]),
!if(DefVcc, [VCC], []));
@@ -902,7 +902,7 @@ multiclass VOPCX_Class_Pseudos <string opName,
let SubtargetPredicate = HasNoSdstCMPX;
}
- foreach _ = BoolToList<P_NoSDst.HasExtSDWA>.ret in
+ if P_NoSDst.HasExtSDWA then
def _nosdst_sdwa : VOPC_SDWA_Pseudo <opName#"_nosdst", P_NoSDst> {
let Defs = [EXEC];
let SchedRW = P_NoSDst.Schedule;
@@ -992,11 +992,18 @@ multiclass ICMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
(i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64))
>;
- let WaveSizePredicate = isWave32 in
- def : GCNPat <
- (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
- (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
- >;
+ let WaveSizePredicate = isWave32 in {
+ def : GCNPat <
+ (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
+ (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32))
+ >;
+
+ // Support codegen of i64 setcc in wave32 mode.
+ def : GCNPat <
+ (i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)),
+ (i64 (REG_SEQUENCE SReg_64, (inst $src0, $src1), sub0, (S_MOV_B32 (i32 0)), sub1))
+ >;
+ }
}
defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
@@ -1056,13 +1063,22 @@ multiclass FCMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
DSTCLAMP.NONE), SReg_64))
>;
- let WaveSizePredicate = isWave32 in
- def : GCNPat <
- (i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
- (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
- (i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
- DSTCLAMP.NONE), SReg_32))
- >;
+ let WaveSizePredicate = isWave32 in {
+ def : GCNPat <
+ (i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
+ (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
+ (i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
+ DSTCLAMP.NONE), SReg_32))
+ >;
+
+ def : GCNPat <
+ (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
+ (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
+ (i64 (REG_SEQUENCE SReg_64, (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
+ DSTCLAMP.NONE), sub0,
+ (S_MOV_B32 (i32 0)), sub1))
+ >;
+ }
}
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
@@ -1320,7 +1336,7 @@ let AssemblerPredicate = isGFX11Only in {
defm : VOPCInstAliases<NAME, "gfx11">;
- foreach _ = BoolToList<ps32.Pfl.HasExtDPP>.ret in {
+ if ps32.Pfl.HasExtDPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_e32" #"_dpp");
defvar AsmDPP = ps32.Pfl.AsmDPP16;
let DecoderNamespace = "DPPGFX11" in {
@@ -1352,7 +1368,7 @@ let AssemblerPredicate = isGFX11Only in {
}
}
}
- foreach _ = BoolToList<ps64.Pfl.HasExtVOP3DPP>.ret in {
+ if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_e64" #"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
let DecoderNamespace = "DPPGFX11" in {
@@ -1419,7 +1435,7 @@ let AssemblerPredicate = isGFX11Only in {
defm : VOPCInstAliases<OpName, "gfx11", NAME, asm_name>;
- foreach _ = BoolToList<ps32.Pfl.HasExtDPP>.ret in {
+ if ps32.Pfl.HasExtDPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e32" #"_dpp");
defvar AsmDPP = ps32.Pfl.AsmDPP16;
let DecoderNamespace = "DPPGFX11" in {
@@ -1456,7 +1472,7 @@ let AssemblerPredicate = isGFX11Only in {
}
}
- foreach _ = BoolToList<ps64.Pfl.HasExtVOP3DPP>.ret in {
+ if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName #"_e64" #"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
let DecoderNamespace = "DPPGFX11" in {
@@ -1518,7 +1534,7 @@ let AssemblerPredicate = isGFX11Only in {
defm : VOPCXInstAliases<NAME, "gfx11">;
- foreach _ = BoolToList<ps32.Pfl.HasExtDPP>.ret in {
+ if ps32.Pfl.HasExtDPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_nosdst_e32" #"_dpp");
defvar AsmDPP = ps32.Pfl.AsmDPP16;
let DecoderNamespace = "DPPGFX11" in {
@@ -1535,7 +1551,7 @@ let AssemblerPredicate = isGFX11Only in {
}
}
- foreach _ = BoolToList<ps64.Pfl.HasExtVOP3DPP>.ret in {
+ if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(NAME #"_nosdst_e64" #"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
let DecoderNamespace = "DPPGFX11" in {
@@ -1584,7 +1600,7 @@ let AssemblerPredicate = isGFX11Only in {
defm : VOPCXInstAliases<OpName, "gfx11", NAME, asm_name>;
- foreach _ = BoolToList<ps32.Pfl.HasExtDPP>.ret in {
+ if ps32.Pfl.HasExtDPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName#"_nosdst_e32"#"_dpp");
let DecoderNamespace = "DPPGFX11" in {
def _e32_dpp_gfx11 : VOPC_DPP16_SIMC<op{7-0}, psDPP,
@@ -1594,7 +1610,7 @@ let AssemblerPredicate = isGFX11Only in {
def _e32_dpp8_gfx11 : VOPC_DPP8<op{7-0}, ps32, asm_name>;
}
}
- foreach _ = BoolToList<ps64.Pfl.HasExtVOP3DPP>.ret in {
+ if ps64.Pfl.HasExtVOP3DPP then {
defvar psDPP = !cast<VOP_DPP_Pseudo>(OpName#"_nosdst_e64"#"_dpp");
defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16;
let DecoderNamespace = "DPPGFX11" in {
@@ -1821,7 +1837,7 @@ let AssemblerPredicate = isGFX10Only in {
}
} // End DecoderNamespace = "GFX10"
- foreach _ = BoolToList<!cast<VOPC_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOPC_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
VOPC_SDWA9e<op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
@@ -1847,7 +1863,7 @@ let AssemblerPredicate = isGFX10Only in {
}
} // End DecoderNamespace = "GFX10"
- foreach _ = BoolToList<!cast<VOPC_Pseudo>(NAME#"_nosdst_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOPC_Pseudo>(NAME#"_nosdst_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx10 :
VOP_SDWA10_Real<!cast<VOPC_SDWA_Pseudo>(NAME#"_nosdst_sdwa")>,
VOPC_SDWA9e<op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_nosdst_sdwa").Pfl> {
@@ -2174,12 +2190,12 @@ multiclass VOPC_Real_vi <bits<10> op> {
}
}
- foreach _ = BoolToList<!cast<VOPC_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA>.ret in
+ if !cast<VOPC_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA then
def _sdwa_vi :
VOP_SDWA_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
VOPC_SDWAe <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
- foreach _ = BoolToList<!cast<VOPC_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9>.ret in
+ if !cast<VOPC_Pseudo>(NAME#"_e32").Pfl.HasExtSDWA9 then
def _sdwa_gfx9 :
VOP_SDWA9_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
VOPC_SDWA9e <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index d5c662ac0574..3755daf4f9b1 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1268,7 +1268,7 @@ class VOP3InstBase<string OpName, VOPProfile P, SDPatternOperator node = null_fr
multiclass VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag> {
def _e64 : VOP3InstBase<OpName, P, node>;
let SubtargetPredicate = isGFX11Plus in {
- foreach _ = BoolToList<P.HasExtVOP3DPP>.ret in
+ if P.HasExtVOP3DPP then
def _e64_dpp : VOP3_DPP_Pseudo <OpName, P>;
} // end SubtargetPredicate = isGFX11Plus
}
@@ -1329,11 +1329,11 @@ let AssemblerPredicate = isGFX11Only,
bit isSingle = 0> {
defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
let IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
- foreach _ = BoolToList<ps.Pfl.HasOpSel>.ret in
+ if ps.Pfl.HasOpSel then
def _e64_gfx11 :
VOP3_Real<ps, SIEncodingFamily.GFX11>,
VOP3OpSel_gfx11<op, ps.Pfl>;
- foreach _ = BoolToList<!not(ps.Pfl.HasOpSel)>.ret in
+ if !not(ps.Pfl.HasOpSel) then
def _e64_gfx11 :
VOP3_Real<ps, SIEncodingFamily.GFX11>,
VOP3e_gfx11<op, ps.Pfl>;
@@ -1353,11 +1353,11 @@ let AssemblerPredicate = isGFX11Only,
defvar ps = !cast<VOP_Pseudo>(opName#"_e64");
let AsmString = asmName # ps.AsmOperands,
IsSingle = !or(isSingle, ps.Pfl.IsSingle) in {
- foreach _ = BoolToList<ps.Pfl.HasOpSel>.ret in
+ if ps.Pfl.HasOpSel then
def _e64_gfx11 :
VOP3_Real<ps, SIEncodingFamily.GFX11>,
VOP3OpSel_gfx11<op, ps.Pfl>;
- foreach _ = BoolToList<!not(ps.Pfl.HasOpSel)>.ret in
+ if !not(ps.Pfl.HasOpSel) then
def _e64_gfx11 :
VOP3_Real<ps, SIEncodingFamily.GFX11>,
VOP3e_gfx11<op, ps.Pfl>;
@@ -1487,7 +1487,7 @@ include "VOP3PInstructions.td"
include "VOPDInstructions.td"
class ClassPat<Instruction inst, ValueType vt> : GCNPat <
- (is_fpclass (vt (VOP3Mods vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
+ (is_fpclass (vt (VOP3ModsNonCanonicalizing vt:$src0, i32:$src0_mods)), (i32 timm:$mask)),
(inst i32:$src0_mods, vt:$src0, (V_MOV_B32_e32 timm:$mask))
>;
diff --git a/llvm/lib/Target/ARC/ARCISelLowering.cpp b/llvm/lib/Target/ARC/ARCISelLowering.cpp
index ad63e0c98201..5d9a366f5ed5 100644
--- a/llvm/lib/Target/ARC/ARCISelLowering.cpp
+++ b/llvm/lib/Target/ARC/ARCISelLowering.cpp
@@ -32,7 +32,7 @@
using namespace llvm;
-static SDValue lowerCallResult(SDValue Chain, SDValue InFlag,
+static SDValue lowerCallResult(SDValue Chain, SDValue InGlue,
const SmallVectorImpl<CCValAssign> &RVLocs,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals);
@@ -283,11 +283,11 @@ SDValue ARCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Analyze return values to determine the number of bytes of stack required.
CCState RetCCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
- RetCCInfo.AllocateStack(CCInfo.getNextStackOffset(), Align(4));
+ RetCCInfo.AllocateStack(CCInfo.getStackSize(), Align(4));
RetCCInfo.AnalyzeCallResult(Ins, RetCC_ARC);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = RetCCInfo.getNextStackOffset();
+ unsigned NumBytes = RetCCInfo.getStackSize();
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
@@ -345,7 +345,7 @@ SDValue ARCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emitted instructions must be
+ // The Glue in necessary since all emitted instructions must be
// stuck together.
SDValue Glue;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
@@ -367,7 +367,7 @@ SDValue ARCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Branch + Link = #chain, #target_address, #opt_in_flags...
// = Chain, Callee, Reg#1, Reg#2, ...
//
- // Returns a chain & a flag for retval copy to use.
+ // Returns a chain & a glue for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
Ops.push_back(Chain);
@@ -498,7 +498,7 @@ SDValue ARCTargetLowering::LowerCallArguments(
unsigned StackSlotSize = 4;
if (!IsVarArg)
- AFI->setReturnStackOffset(CCInfo.getNextStackOffset());
+ AFI->setReturnStackOffset(CCInfo.getStackSize());
// All getCopyFromReg ops must precede any getMemcpys to prevent the
// scheduler clobbering a register before it has been copied.
@@ -565,7 +565,7 @@ SDValue ARCTargetLowering::LowerCallArguments(
// There are (std::size(ArgRegs) - FirstVAReg) registers which
// need to be saved.
int VarFI = MFI.CreateFixedObject((std::size(ArgRegs) - FirstVAReg) * 4,
- CCInfo.getNextStackOffset(), true);
+ CCInfo.getStackSize(), true);
AFI->setVarArgsFrameIndex(VarFI);
SDValue FIN = DAG.getFrameIndex(VarFI, MVT::i32);
for (unsigned i = FirstVAReg; i < std::size(ArgRegs); i++) {
@@ -633,7 +633,7 @@ bool ARCTargetLowering::CanLowerReturn(
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
if (!CCInfo.CheckReturn(Outs, RetCC_ARC))
return false;
- if (CCInfo.getNextStackOffset() != 0 && IsVarArg)
+ if (CCInfo.getStackSize() != 0 && IsVarArg)
return false;
return true;
}
@@ -661,7 +661,7 @@ ARCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
CCInfo.AnalyzeReturn(Outs, RetCC_ARC);
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
SmallVector<SDValue, 4> MemOpChains;
// Handle return values that must be copied to memory.
@@ -698,19 +698,19 @@ ARCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (!VA.isRegLoc())
continue;
// Copy the result values into the output registers.
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Glue);
// guarantee that all emitted copies are
// stuck together, avoiding something bad
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
RetOps[0] = Chain; // Update chain.
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ // Add the glue if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
// What to do with the RetOps?
return DAG.getNode(ARCISD::RET, dl, MVT::Other, RetOps);
diff --git a/llvm/lib/Target/ARC/ARCOptAddrMode.cpp b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
index 358f9039ae22..e7a0b352db8d 100644
--- a/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
+++ b/llvm/lib/Target/ARC/ARCOptAddrMode.cpp
@@ -153,11 +153,10 @@ static bool dominatesAllUsesOf(const MachineInstr *MI, unsigned VReg,
assert(Register::isVirtualRegister(VReg) && "Expected virtual register!");
- for (auto it = MRI->use_nodbg_begin(VReg), end = MRI->use_nodbg_end();
- it != end; ++it) {
- MachineInstr *User = it->getParent();
+ for (const MachineOperand &Use : MRI->use_nodbg_operands(VReg)) {
+ const MachineInstr *User = Use.getParent();
if (User->isPHI()) {
- unsigned BBOperandIdx = User->getOperandNo(&*it) + 1;
+ unsigned BBOperandIdx = Use.getOperandNo() + 1;
MachineBasicBlock *MBB = User->getOperand(BBOperandIdx).getMBB();
if (MBB->empty()) {
const MachineBasicBlock *InstBB = MI->getParent();
diff --git a/llvm/lib/Target/ARC/ARCRegisterInfo.cpp b/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
index 4b818c0b539d..28dca4f4663f 100644
--- a/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
+++ b/llvm/lib/Target/ARC/ARCRegisterInfo.cpp
@@ -63,7 +63,8 @@ static void replaceFrameIndex(MachineBasicBlock::iterator II,
// of the load offset.
const TargetRegisterInfo *TRI =
MBB.getParent()->getSubtarget().getRegisterInfo();
- BaseReg = RS->scavengeRegister(&ARC::GPR32RegClass, II, SPAdj);
+ BaseReg =
+ RS->scavengeRegisterBackwards(ARC::GPR32RegClass, II, false, SPAdj);
assert(BaseReg && "Register scavenging failed.");
LLVM_DEBUG(dbgs() << "Scavenged register " << printReg(BaseReg, TRI)
<< " for FrameReg=" << printReg(FrameReg, TRI)
diff --git a/llvm/lib/Target/ARC/ARCRegisterInfo.h b/llvm/lib/Target/ARC/ARCRegisterInfo.h
index ea82289022eb..fce4b6980450 100644
--- a/llvm/lib/Target/ARC/ARCRegisterInfo.h
+++ b/llvm/lib/Target/ARC/ARCRegisterInfo.h
@@ -39,6 +39,8 @@ public:
bool useFPForScavengingIndex(const MachineFunction &MF) const override;
+ bool supportsBackwardScavenger() const override { return true; }
+
bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 5ccc603f6b42..4bb20271d0f2 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -228,10 +228,13 @@ def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true",
def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true",
"CPU fuses literal generation operations">;
-// The way of reading thread pointer.
-// True if read thread pointer from coprocessor register.
-def FeatureReadTp : SubtargetFeature<"read-tp-hard", "IsReadTPHard", "true",
- "Reading thread pointer from register">;
+// Choice of hardware register to use as the thread pointer, if any.
+def FeatureReadTpTPIDRURW : SubtargetFeature<"read-tp-tpidrurw", "IsReadTPTPIDRURW", "true",
+ "Reading thread pointer from TPIDRURW register">;
+def FeatureReadTpTPIDRURO : SubtargetFeature<"read-tp-tpidruro", "IsReadTPTPIDRURO", "true",
+ "Reading thread pointer from TPIDRURO register">;
+def FeatureReadTpTPIDRPRW : SubtargetFeature<"read-tp-tpidrprw", "IsReadTPTPIDRPRW", "true",
+ "Reading thread pointer from TPIDRPRW register">;
// Cyclone can zero VFP registers in 0 cycles.
// True if the instructions "vmov.i32 d0, #0" and "vmov.i32 q0, #0" are
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 5c0c3b30d96e..69df1d12aa8e 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -44,7 +44,6 @@
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -200,6 +199,15 @@ void ARMAsmPrinter::PrintSymbolOperand(const MachineOperand &MO,
O << ":lower16:";
else if (TF & ARMII::MO_HI16)
O << ":upper16:";
+ else if (TF & ARMII::MO_LO_0_7)
+ O << ":lower0_7:";
+ else if (TF & ARMII::MO_LO_8_15)
+ O << ":lower8_15:";
+ else if (TF & ARMII::MO_HI_0_7)
+ O << ":upper0_7:";
+ else if (TF & ARMII::MO_HI_8_15)
+ O << ":upper8_15:";
+
GetARMGVSymbol(MO.getGlobal(), TF)->print(O, MAI);
printOffset(MO.getOffset(), O);
}
@@ -229,6 +237,14 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
O << ":lower16:";
else if (TF == ARMII::MO_HI16)
O << ":upper16:";
+ else if (TF == ARMII::MO_LO_0_7)
+ O << ":lower0_7:";
+ else if (TF == ARMII::MO_LO_8_15)
+ O << ":lower8_15:";
+ else if (TF == ARMII::MO_HI_0_7)
+ O << ":upper0_7:";
+ else if (TF == ARMII::MO_HI_8_15)
+ O << ":upper8_15:";
O << MO.getImm();
break;
}
@@ -287,11 +303,11 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
// Find the 'd' register that has this 's' register as a sub-register,
// and determine the lane number.
- for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) {
- if (!ARM::DPRRegClass.contains(*SR))
+ for (MCPhysReg SR : TRI->superregs(Reg)) {
+ if (!ARM::DPRRegClass.contains(SR))
continue;
- bool Lane0 = TRI->getSubReg(*SR, ARM::ssub_0) == Reg;
- O << ARMInstPrinter::getRegisterName(*SR) << (Lane0 ? "[0]" : "[1]");
+ bool Lane0 = TRI->getSubReg(SR, ARM::ssub_0) == Reg;
+ O << ARMInstPrinter::getRegisterName(SR) << (Lane0 ? "[0]" : "[1]");
return false;
}
}
@@ -466,7 +482,7 @@ bool ARMAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
static bool isThumb(const MCSubtargetInfo& STI) {
- return STI.getFeatureBits()[ARM::ModeThumb];
+ return STI.hasFeature(ARM::ModeThumb);
}
void ARMAsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo,
@@ -763,7 +779,7 @@ void ARMAsmPrinter::emitAttributes() {
auto *PACValue = mdconst::extract_or_null<ConstantInt>(
SourceModule->getModuleFlag("sign-return-address"));
- if (PACValue && PACValue->getZExtValue() == 1) {
+ if (PACValue && PACValue->isOne()) {
// If "+pacbti" is used as an architecture extension,
// Tag_PAC_extension is emitted in
// ARMTargetStreamer::emitTargetAttributes().
@@ -776,7 +792,7 @@ void ARMAsmPrinter::emitAttributes() {
auto *BTIValue = mdconst::extract_or_null<ConstantInt>(
SourceModule->getModuleFlag("branch-target-enforcement"));
- if (BTIValue && BTIValue->getZExtValue() == 1) {
+ if (BTIValue && BTIValue->isOne()) {
// If "+pacbti" is used as an architecture extension,
// Tag_BTI_extension is emitted in
// ARMTargetStreamer::emitTargetAttributes().
@@ -1124,10 +1140,24 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
case ARM::tLDRpci:
case ARM::t2MOVi16:
case ARM::t2MOVTi16:
+ case ARM::tMOVi8:
+ case ARM::tADDi8:
+ case ARM::tLSLri:
// special cases:
// 1) for Thumb1 code we sometimes materialize the constant via constpool
// load.
- // 2) for Thumb2 execute only code we materialize the constant via
+ // 2) for Thumb1 execute only code we materialize the constant via the
+ // following pattern:
+ // movs r3, #:upper8_15:<const>
+ // lsls r3, #8
+ // adds r3, #:upper0_7:<const>
+ // lsls r3, #8
+ // adds r3, #:lower8_15:<const>
+ // lsls r3, #8
+ // adds r3, #:lower0_7:<const>
+ // So we need to special-case MOVS, ADDS and LSLS, and keep track of
+ // where we are in the sequence with the simplest of state machines.
+ // 3) for Thumb2 execute only code we materialize the constant via
// immediate constants in 2 separate instructions (MOVW/MOVT).
SrcReg = ~0U;
DstReg = MI->getOperand(0).getReg();
@@ -1318,6 +1348,23 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
Offset = MI->getOperand(2).getImm();
AFI->EHPrologueOffsetInRegs[DstReg] |= (Offset << 16);
break;
+ case ARM::tMOVi8:
+ Offset = MI->getOperand(2).getImm();
+ AFI->EHPrologueOffsetInRegs[DstReg] = Offset;
+ break;
+ case ARM::tLSLri:
+ assert(MI->getOperand(3).getImm() == 8 &&
+ "The shift amount is not equal to 8");
+ assert(MI->getOperand(2).getReg() == MI->getOperand(0).getReg() &&
+ "The source register is not equal to the destination register");
+ AFI->EHPrologueOffsetInRegs[DstReg] <<= 8;
+ break;
+ case ARM::tADDi8:
+ assert(MI->getOperand(2).getReg() == MI->getOperand(0).getReg() &&
+ "The source register is not equal to the destination register");
+ Offset = MI->getOperand(3).getImm();
+ AFI->EHPrologueOffsetInRegs[DstReg] += Offset;
+ break;
case ARM::t2PAC:
case ARM::t2PACBTI:
AFI->EHPrologueRemappedRegs[ARM::R12] = ARM::RA_AUTH_CODE;
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.h b/llvm/lib/Target/ARM/ARMAsmPrinter.h
index f8ff047a1d06..bd2d9c762119 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -120,9 +120,6 @@ private:
// Helpers for emitStartOfAsmFile() and emitEndOfAsmFile()
void emitAttributes();
- // Generic helper used to emit e.g. ARMv5 mul pseudos
- void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc);
-
void EmitUnwindingInstruction(const MachineInstr *MI);
// emitPseudoExpansionLowering - tblgen'erated.
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index d5127b751761..f903d583d7c6 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -24,7 +24,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -61,6 +60,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -1666,8 +1666,8 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
// Sort the scratch registers into ascending order.
const TargetRegisterInfo &TRI = getRegisterInfo();
SmallVector<unsigned, 6> ScratchRegs;
- for(unsigned I = 5; I < MI->getNumOperands(); ++I)
- ScratchRegs.push_back(MI->getOperand(I).getReg());
+ for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))
+ ScratchRegs.push_back(MO.getReg());
llvm::sort(ScratchRegs,
[&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {
return TRI.getEncodingValue(Reg1) <
@@ -2499,7 +2499,7 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
while (NumBytes) {
unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
- unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
+ unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);
assert(ThisVal && "Didn't extract field correctly");
// We will handle these bits from offset, clear them.
@@ -2680,7 +2680,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Otherwise, pull as much of the immedidate into this ADDri/SUBri
// as possible.
unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
- unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
+ unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);
// We will handle these bits from offset, clear them.
Offset &= ~ThisImmVal;
@@ -3327,7 +3327,8 @@ bool ARMBaseInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
MachineRegisterInfo *MRI) const {
// Fold large immediates into add, sub, or, xor.
unsigned DefOpc = DefMI.getOpcode();
- if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
+ if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&
+ DefOpc != ARM::tMOVi32imm)
return false;
if (!DefMI.getOperand(1).isImm())
// Could be t2MOVi32imm @xx
@@ -4939,7 +4940,7 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
unsigned int Offset = 0;
if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {
- assert(Subtarget.isReadTPHard() &&
+ assert(!Subtarget.isReadTPSoft() &&
"TLS stack protector requires hardware TLS register");
BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)
@@ -5433,7 +5434,7 @@ void ARMBaseInstrInfo::breakPartialRegDependency(
}
bool ARMBaseInstrInfo::hasNOP() const {
- return Subtarget.getFeatureBits()[ARM::HasV6KOps];
+ return Subtarget.hasFeature(ARM::HasV6KOps);
}
bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
@@ -5538,7 +5539,10 @@ ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
using namespace ARMII;
static const std::pair<unsigned, const char *> TargetFlags[] = {
- {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"}};
+ {MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},
+ {MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},
+ {MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},
+ };
return ArrayRef(TargetFlags);
}
@@ -5869,7 +5873,8 @@ static bool isLRAvailable(const TargetRegisterInfo &TRI,
return !Live;
}
-outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
+std::optional<outliner::OutlinedFunction>
+ARMBaseInstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
unsigned SequenceSize =
@@ -5915,7 +5920,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
- return outliner::OutlinedFunction();
+ return std::nullopt;
}
// We expect the majority of the outlining candidates to be in consensus with
@@ -5941,7 +5946,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);
if (RepeatedSequenceLocs.size() < 2)
- return outliner::OutlinedFunction();
+ return std::nullopt;
// Likewise, partition the candidates according to PAC-RET enablement.
auto NoPAC =
@@ -5958,7 +5963,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo(
RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);
if (RepeatedSequenceLocs.size() < 2)
- return outliner::OutlinedFunction();
+ return std::nullopt;
// At this point, we have only "safe" candidates to outline. Figure out
// frame + call instruction information.
@@ -6275,24 +6280,11 @@ bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
}
outliner::InstrType
-ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
+ARMBaseInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
unsigned Flags) const {
MachineInstr &MI = *MIT;
const TargetRegisterInfo *TRI = &getRegisterInfo();
- // Be conservative with inline ASM
- if (MI.isInlineAsm())
- return outliner::InstrType::Illegal;
-
- // Don't allow debug values to impact outlining type.
- if (MI.isDebugInstr() || MI.isIndirectDebugValue())
- return outliner::InstrType::Invisible;
-
- // At this point, KILL or IMPLICIT_DEF instructions don't really tell us much
- // so we can go ahead and skip over them.
- if (MI.isKill() || MI.isImplicitDef())
- return outliner::InstrType::Invisible;
-
// PIC instructions contain labels, outlining them would break offset
// computing. unsigned Opc = MI.getOpcode();
unsigned Opc = MI.getOpcode();
@@ -6318,25 +6310,10 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
return outliner::InstrType::Illegal;
// Is this a terminator for a basic block?
- if (MI.isTerminator()) {
- // Don't outline if the branch is not unconditional.
- if (isPredicated(MI))
- return outliner::InstrType::Illegal;
-
- // Is this the end of a function?
- if (MI.getParent()->succ_empty())
- return outliner::InstrType::Legal;
-
- // It's not, so don't outline it.
- return outliner::InstrType::Illegal;
- }
-
- // Make sure none of the operands are un-outlinable.
- for (const MachineOperand &MOP : MI.operands()) {
- if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
- MOP.isTargetIndex())
- return outliner::InstrType::Illegal;
- }
+ if (MI.isTerminator())
+ // TargetInstrInfo::getOutliningType has already filtered out anything
+ // that would break this, so we can allow it here.
+ return outliner::InstrType::Legal;
// Don't outline if link register or program counter value are used.
if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))
@@ -6441,8 +6418,8 @@ ARMBaseInstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
MI.modifiesRegister(ARM::ITSTATE, TRI))
return outliner::InstrType::Illegal;
- // Don't outline positions.
- if (MI.isPosition())
+ // Don't outline CFI instructions.
+ if (MI.isCFIInstruction())
return outliner::InstrType::Illegal;
return outliner::InstrType::Legal;
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index aa9f9c4db172..5efcc1a0d9fc 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -348,11 +348,11 @@ public:
/// ARM supports the MachineOutliner.
bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
bool OutlineFromLinkOnceODRs) const override;
- outliner::OutlinedFunction getOutliningCandidateInfo(
+ std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
void mergeOutliningCandidateAttributes(
Function &F, std::vector<outliner::Candidate> &Candidates) const override;
- outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MIT,
+ outliner::InstrType getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,
unsigned Flags) const override;
bool isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
unsigned &Flags) const override;
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index e6c6ab2efd50..9adf758b46c4 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -222,8 +222,8 @@ getReservedRegs(const MachineFunction &MF) const {
}
const TargetRegisterClass &RC = ARM::GPRPairRegClass;
for (unsigned Reg : RC)
- for (MCSubRegIterator SI(Reg, this); SI.isValid(); ++SI)
- if (Reserved.test(*SI))
+ for (MCPhysReg S : subregs(Reg))
+ if (Reserved.test(S))
markSuperRegs(Reserved, Reg);
// For v8.1m architecture
markSuperRegs(Reserved, ARM::ZR);
@@ -326,9 +326,9 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
// Get the other register in a GPRPair.
static MCPhysReg getPairedGPR(MCPhysReg Reg, bool Odd,
const MCRegisterInfo *RI) {
- for (MCSuperRegIterator Supers(Reg, RI); Supers.isValid(); ++Supers)
- if (ARM::GPRPairRegClass.contains(*Supers))
- return RI->getSubReg(*Supers, Odd ? ARM::gsub_1 : ARM::gsub_0);
+ for (MCPhysReg Super : RI->superregs(Reg))
+ if (ARM::GPRPairRegClass.contains(Super))
+ return RI->getSubReg(Super, Odd ? ARM::gsub_1 : ARM::gsub_0);
return 0;
}
@@ -338,7 +338,7 @@ bool ARMBaseRegisterInfo::getRegAllocationHints(
SmallVectorImpl<MCPhysReg> &Hints, const MachineFunction &MF,
const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const {
const MachineRegisterInfo &MRI = MF.getRegInfo();
- std::pair<Register, Register> Hint = MRI.getRegAllocationHint(VirtReg);
+ std::pair<unsigned, Register> Hint = MRI.getRegAllocationHint(VirtReg);
unsigned Odd;
switch (Hint.first) {
@@ -391,7 +391,7 @@ bool ARMBaseRegisterInfo::getRegAllocationHints(
void ARMBaseRegisterInfo::updateRegAllocHint(Register Reg, Register NewReg,
MachineFunction &MF) const {
MachineRegisterInfo *MRI = &MF.getRegInfo();
- std::pair<Register, Register> Hint = MRI->getRegAllocationHint(Reg);
+ std::pair<unsigned, Register> Hint = MRI->getRegAllocationHint(Reg);
if ((Hint.first == ARMRI::RegPairOdd || Hint.first == ARMRI::RegPairEven) &&
Hint.second.isVirtual()) {
// If 'Reg' is one of the even / odd register pair and it's now changed
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index d1d0254a3de9..926d702b4092 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -134,9 +134,6 @@ protected:
// Can be only subclassed.
explicit ARMBaseRegisterInfo();
- // Return the opcode that implements 'Op', or 0 if no opcode
- unsigned getOpcode(int Op) const;
-
public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
diff --git a/llvm/lib/Target/ARM/ARMBasicBlockInfo.h b/llvm/lib/Target/ARM/ARMBasicBlockInfo.h
index 47d9a4049fa0..daf8f9b4b836 100644
--- a/llvm/lib/Target/ARM/ARMBasicBlockInfo.h
+++ b/llvm/lib/Target/ARM/ARMBasicBlockInfo.h
@@ -80,7 +80,7 @@ struct BasicBlockInfo {
// If the block size isn't a multiple of the known bits, assume the
// worst case padding.
if (Size & ((1u << Bits) - 1))
- Bits = countTrailingZeros(Size);
+ Bits = llvm::countr_zero(Size);
return Bits;
}
diff --git a/llvm/lib/Target/ARM/ARMBranchTargets.cpp b/llvm/lib/Target/ARM/ARMBranchTargets.cpp
index 8ba3e627c039..17d0bdd87512 100644
--- a/llvm/lib/Target/ARM/ARMBranchTargets.cpp
+++ b/llvm/lib/Target/ARM/ARMBranchTargets.cpp
@@ -65,31 +65,19 @@ bool ARMBranchTargets::runOnMachineFunction(MachineFunction &MF) {
const ARMInstrInfo &TII =
*static_cast<const ARMInstrInfo *>(MF.getSubtarget().getInstrInfo());
- // LLVM does not consider basic blocks which are the targets of jump tables
- // to be address-taken (the address can't escape anywhere else), but they are
- // used for indirect branches, so need BTI instructions.
- SmallPtrSet<const MachineBasicBlock *, 8> JumpTableTargets;
- if (const MachineJumpTableInfo *JTI = MF.getJumpTableInfo())
- for (const MachineJumpTableEntry &JTE : JTI->getJumpTables())
- for (const MachineBasicBlock *MBB : JTE.MBBs)
- JumpTableTargets.insert(MBB);
-
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
- bool NeedBTI = false;
bool IsFirstBB = &MBB == &MF.front();
// Every function can potentially be called indirectly (even if it has
// static linkage, due to linker-generated veneers).
- if (IsFirstBB)
- NeedBTI = true;
-
// If the block itself is address-taken, or is an exception landing pad, it
// could be indirectly branched to.
- if (MBB.hasAddressTaken() || MBB.isEHPad() || JumpTableTargets.count(&MBB))
- NeedBTI = true;
+ // Jump tables only emit indirect jumps (JUMPTABLE_ADDRS) in ARM or Thumb1
+ // modes. These modes do not support PACBTI. As a result, BTI instructions
+ // are not added in the destination blocks.
- if (NeedBTI) {
+ if (IsFirstBB || MBB.hasAddressTaken() || MBB.isEHPad()) {
addBTI(TII, MBB, IsFirstBB);
MadeChange = true;
}
diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp
index 76b99403a2f8..0383145afdb0 100644
--- a/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -30,6 +31,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -40,8 +42,6 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/MachineValueType.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -528,12 +528,10 @@ bool ARMCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, CallLoweringInfo &
// We now know the size of the stack - update the ADJCALLSTACKDOWN
// accordingly.
- CallSeqStart.addImm(ArgAssigner.StackOffset)
- .addImm(0)
- .add(predOps(ARMCC::AL));
+ CallSeqStart.addImm(ArgAssigner.StackSize).addImm(0).add(predOps(ARMCC::AL));
MIRBuilder.buildInstr(ARM::ADJCALLSTACKUP)
- .addImm(ArgAssigner.StackOffset)
+ .addImm(ArgAssigner.StackSize)
.addImm(-1ULL)
.add(predOps(ARMCC::AL));
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.cpp b/llvm/lib/Target/ARM/ARMCallingConv.cpp
index 32f3a4a632f5..4878c7313894 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.cpp
+++ b/llvm/lib/Target/ARM/ARMCallingConv.cpp
@@ -241,7 +241,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned ValNo, MVT ValVT,
// Register allocation failed, we'll be needing the stack
unsigned Size = LocVT.getSizeInBits() / 8;
- if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) {
+ if (LocVT == MVT::i32 && State.getStackSize() == 0) {
// If nothing else has used the stack until this point, a non-HFA aggregate
// can be split between regs and stack.
unsigned RegIdx = State.getFirstUnallocated(RegList);
diff --git a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 386a74877bc1..a6682f0ca162 100644
--- a/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -277,8 +277,6 @@ namespace {
unsigned &DeadSize, bool &CanDeleteLEA,
bool &BaseRegKill);
bool optimizeThumb2JumpTables();
- void fixupBTI(unsigned JTI, MachineBasicBlock &OldBB,
- MachineBasicBlock &NewBB);
MachineBasicBlock *adjustJTTargetBlockForward(unsigned JTI,
MachineBasicBlock *BB,
MachineBasicBlock *JTBB);
@@ -607,6 +605,10 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
auto MJTI = MF->getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ // Only inline jump tables are placed in the function.
+ if (MJTI->getEntryKind() != MachineJumpTableInfo::EK_Inline)
+ return;
+
MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
for (MachineBasicBlock &MBB : *MF) {
auto MI = MBB.getLastNonDebugInstr();
@@ -628,6 +630,11 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
case ARM::tBR_JTr:
case ARM::BR_JTm_i12:
case ARM::BR_JTm_rs:
+ // These instructions are emitted only in ARM or Thumb1 modes which do not
+ // support PACBTI. Hence we don't add BTI instructions in the destination
+ // blocks.
+ assert(!MF->getInfo<ARMFunctionInfo>()->branchTargetEnforcement() &&
+ "Branch protection must not be enabled for Arm or Thumb1 modes");
JTOpcode = ARM::JUMPTABLE_ADDRS;
break;
case ARM::t2BR_JT:
@@ -774,6 +781,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// Compute block offsets and known bits.
BBUtils->adjustBBOffsetsAfter(&MF->front());
+ // We only care about jump table instructions when jump tables are inline.
+ MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ bool InlineJumpTables =
+ MJTI && MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline;
+
// Now go back through the instructions and build up our data structures.
for (MachineBasicBlock &MBB : *MF) {
// If this block doesn't fall through into the next MBB, then this is
@@ -796,7 +808,8 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
continue; // Ignore other JT branches
case ARM::t2BR_JT:
case ARM::tBR_JTr:
- T2JumpTables.push_back(&I);
+ if (InlineJumpTables)
+ T2JumpTables.push_back(&I);
continue; // Does not get an entry in ImmBranches
case ARM::Bcc:
isCond = true;
@@ -843,7 +856,8 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
// Scan the instructions for constant pool operands.
for (unsigned op = 0, e = I.getNumOperands(); op != e; ++op)
- if (I.getOperand(op).isCPI() || I.getOperand(op).isJTI()) {
+ if (I.getOperand(op).isCPI() ||
+ (I.getOperand(op).isJTI() && InlineJumpTables)) {
// We found one. The addressing mode tells us the max displacement
// from the PC that this instruction permits.
@@ -2116,8 +2130,7 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
break;
}
- for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
- const MachineOperand &MO = I->getOperand(K);
+ for (const MachineOperand &MO : I->operands()) {
if (!MO.isReg() || !MO.getReg())
continue;
if (MO.isDef() && MO.getReg() == BaseReg)
@@ -2135,8 +2148,7 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
// Check the add really is removable, and that nothing else in the block
// clobbers BaseReg.
for (++I; &*I != JumpMI; ++I) {
- for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
- const MachineOperand &MO = I->getOperand(K);
+ for (const MachineOperand &MO : I->operands()) {
if (!MO.isReg() || !MO.getReg())
continue;
if (MO.isDef() && MO.getReg() == BaseReg)
@@ -2195,8 +2207,7 @@ static void RemoveDeadAddBetweenLEAAndJT(MachineInstr *LEAMI,
// Ensure EntryReg is not clobbered or used.
MachineBasicBlock::iterator J(RemovableAdd);
for (++J; &*J != JumpMI; ++J) {
- for (unsigned K = 0, E = J->getNumOperands(); K != E; ++K) {
- const MachineOperand &MO = J->getOperand(K);
+ for (const MachineOperand &MO : J->operands()) {
if (!MO.isReg() || !MO.getReg())
continue;
if (MO.isDef() && MO.getReg() == EntryReg)
@@ -2278,9 +2289,12 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
// %t = tLDRr %base, %idx
Register BaseReg = User.MI->getOperand(0).getReg();
- if (User.MI->getIterator() == User.MI->getParent()->begin())
+ MachineBasicBlock *UserMBB = User.MI->getParent();
+ MachineBasicBlock::iterator Shift = User.MI->getIterator();
+ if (Shift == UserMBB->begin())
continue;
- MachineInstr *Shift = User.MI->getPrevNode();
+
+ Shift = prev_nodbg(Shift, UserMBB->begin());
if (Shift->getOpcode() != ARM::tLSLri ||
Shift->getOperand(3).getImm() != 2 ||
!Shift->getOperand(2).isKill())
@@ -2447,38 +2461,6 @@ bool ARMConstantIslands::reorderThumb2JumpTables() {
return MadeChange;
}
-void ARMConstantIslands::fixupBTI(unsigned JTI, MachineBasicBlock &OldBB,
- MachineBasicBlock &NewBB) {
- assert(isThumb2 && "BTI in Thumb1?");
-
- // Insert a BTI instruction into NewBB
- BuildMI(NewBB, NewBB.begin(), DebugLoc(), TII->get(ARM::t2BTI));
-
- // Update jump table reference counts.
- const MachineJumpTableInfo &MJTI = *MF->getJumpTableInfo();
- const MachineJumpTableEntry &JTE = MJTI.getJumpTables()[JTI];
- for (const MachineBasicBlock *MBB : JTE.MBBs) {
- if (MBB != &OldBB)
- continue;
- --BlockJumpTableRefCount[MBB];
- ++BlockJumpTableRefCount[&NewBB];
- }
-
- // If the old basic block reference count dropped to zero, remove
- // the BTI instruction at its beginning.
- if (BlockJumpTableRefCount[&OldBB] > 0)
- return;
-
- // Skip meta instructions
- auto BTIPos = llvm::find_if_not(OldBB.instrs(), [](const MachineInstr &MI) {
- return MI.isMetaInstruction();
- });
- assert(BTIPos->getOpcode() == ARM::t2BTI &&
- "BasicBlock is mentioned in a jump table but does start with BTI");
- if (BTIPos->getOpcode() == ARM::t2BTI)
- BTIPos->eraseFromParent();
-}
-
MachineBasicBlock *ARMConstantIslands::adjustJTTargetBlockForward(
unsigned JTI, MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
// If the destination block is terminated by an unconditional branch,
@@ -2538,9 +2520,6 @@ MachineBasicBlock *ARMConstantIslands::adjustJTTargetBlockForward(
NewBB->addSuccessor(BB);
JTBB->replaceSuccessor(BB, NewBB);
- if (MF->getInfo<ARMFunctionInfo>()->branchTargetEnforcement())
- fixupBTI(JTI, *BB, *NewBB);
-
++NumJTInserted;
return NewBB;
}
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 7edd58e0ae56..2f9236bb977f 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/Debug.h"
@@ -59,8 +60,6 @@ namespace {
}
private:
- void TransferImpOps(MachineInstr &OldMI,
- MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
bool ExpandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
@@ -71,6 +70,8 @@ namespace {
void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
unsigned Opc, bool IsExt);
void ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI);
+ void ExpandTMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
void CMSEClearGPRegs(MachineBasicBlock &MBB,
@@ -120,22 +121,6 @@ namespace {
INITIALIZE_PASS(ARMExpandPseudo, DEBUG_TYPE, ARM_EXPAND_PSEUDO_NAME, false,
false)
-/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
-/// the instructions created from the expansion.
-void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
- MachineInstrBuilder &UseMI,
- MachineInstrBuilder &DefMI) {
- const MCInstrDesc &Desc = OldMI.getDesc();
- for (const MachineOperand &MO :
- llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
- assert(MO.isReg() && MO.getReg());
- if (MO.isUse())
- UseMI.add(MO);
- else
- DefMI.add(MO);
- }
-}
-
namespace {
// Constants for register spacing in NEON load/store instructions.
// For quad-register load-lane and store-lane pseudo instructors, the
@@ -675,7 +660,7 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
}
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
- TransferImpOps(MI, MIB, MIB);
+ MIB.copyImplicitOps(MI);
// Transfer memoperands.
MIB.cloneMemRefs(MI);
@@ -751,7 +736,7 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
MIB->addRegisterKilled(SrcReg, TRI, true);
else if (!SrcIsUndef)
MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg.
- TransferImpOps(MI, MIB, MIB);
+ MIB.copyImplicitOps(MI);
// Transfer memoperands.
MIB.cloneMemRefs(MI);
@@ -843,7 +828,7 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
if (TableEntry->IsLoad)
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
- TransferImpOps(MI, MIB, MIB);
+ MIB.copyImplicitOps(MI);
// Transfer memoperands.
MIB.cloneMemRefs(MI);
MI.eraseFromParent();
@@ -883,7 +868,7 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
// Add an implicit kill and use for the super-reg.
MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
- TransferImpOps(MI, MIB, MIB);
+ MIB.copyImplicitOps(MI);
MI.eraseFromParent();
LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
}
@@ -920,7 +905,7 @@ void ARMExpandPseudo::ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI) {
if (NewOpc == ARM::VSTMDIA)
MIB.addReg(SrcReg, RegState::Implicit);
- TransferImpOps(MI, MIB, MIB);
+ MIB.copyImplicitOps(MI);
MIB.cloneMemRefs(MI);
MI.eraseFromParent();
}
@@ -969,6 +954,111 @@ static MachineOperand makeImplicit(const MachineOperand &MO) {
return NewMO;
}
+static MachineOperand getMovOperand(const MachineOperand &MO,
+ unsigned TargetFlag) {
+ unsigned TF = MO.getTargetFlags() | TargetFlag;
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate: {
+ unsigned Imm = MO.getImm();
+ switch (TargetFlag) {
+ case ARMII::MO_HI_8_15:
+ Imm = (Imm >> 24) & 0xff;
+ break;
+ case ARMII::MO_HI_0_7:
+ Imm = (Imm >> 16) & 0xff;
+ break;
+ case ARMII::MO_LO_8_15:
+ Imm = (Imm >> 8) & 0xff;
+ break;
+ case ARMII::MO_LO_0_7:
+ Imm = Imm & 0xff;
+ break;
+ case ARMII::MO_HI16:
+ Imm = (Imm >> 16) & 0xffff;
+ break;
+ case ARMII::MO_LO16:
+ Imm = Imm & 0xffff;
+ break;
+ default:
+ llvm_unreachable("Only HI/LO target flags are expected");
+ }
+ return MachineOperand::CreateImm(Imm);
+ }
+ case MachineOperand::MO_ExternalSymbol:
+ return MachineOperand::CreateES(MO.getSymbolName(), TF);
+ case MachineOperand::MO_JumpTableIndex:
+ return MachineOperand::CreateJTI(MO.getIndex(), TF);
+ default:
+ return MachineOperand::CreateGA(MO.getGlobal(), MO.getOffset(), TF);
+ }
+}
+
+void ARMExpandPseudo::ExpandTMOV32BitImm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr &MI = *MBBI;
+ Register DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ const MachineOperand &MO = MI.getOperand(1);
+ unsigned MIFlags = MI.getFlags();
+
+ LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
+
+ // Expand the mov into a sequence of mov/add+lsl of the individual bytes. We
+ // want to avoid emitting any zero bytes, as they won't change the result, and
+ // also don't want any pointless shifts, so instead of immediately emitting
+ // the shift for a byte we keep track of how much we will need to shift and do
+ // it before the next nonzero byte.
+ unsigned PendingShift = 0;
+ for (unsigned Byte = 0; Byte < 4; ++Byte) {
+ unsigned Flag = Byte == 0 ? ARMII::MO_HI_8_15
+ : Byte == 1 ? ARMII::MO_HI_0_7
+ : Byte == 2 ? ARMII::MO_LO_8_15
+ : ARMII::MO_LO_0_7;
+ MachineOperand Operand = getMovOperand(MO, Flag);
+ bool ZeroImm = Operand.isImm() && Operand.getImm() == 0;
+ unsigned Op = PendingShift ? ARM::tADDi8 : ARM::tMOVi8;
+
+ // Emit the pending shift if we're going to emit this byte or if we've
+ // reached the end.
+ if (PendingShift && (!ZeroImm || Byte == 3)) {
+ MachineInstr *Lsl =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tLSLri), DstReg)
+ .add(t1CondCodeOp(true))
+ .addReg(DstReg)
+ .addImm(PendingShift)
+ .add(predOps(ARMCC::AL))
+ .setMIFlags(MIFlags);
+ (void)Lsl;
+ LLVM_DEBUG(dbgs() << "And: "; Lsl->dump(););
+ PendingShift = 0;
+ }
+
+ // Emit this byte if it's nonzero.
+ if (!ZeroImm) {
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Op), DstReg)
+ .add(t1CondCodeOp(true));
+ if (Op == ARM::tADDi8)
+ MIB.addReg(DstReg);
+ MIB.add(Operand);
+ MIB.add(predOps(ARMCC::AL));
+ MIB.setMIFlags(MIFlags);
+ LLVM_DEBUG(dbgs() << (Op == ARM::tMOVi8 ? "To: " : "And:") << " ";
+ MIB.getInstr()->dump(););
+ }
+
+ // Don't accumulate the shift value if we've not yet seen a nonzero byte.
+ if (PendingShift || !ZeroImm)
+ PendingShift += 8;
+ }
+
+ // The dest is dead on the last instruction we emitted if it was dead on the
+ // original instruction.
+ (--MBBI)->getOperand(0).setIsDead(DstIsDead);
+
+ MI.eraseFromParent();
+}
+
void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
@@ -1020,7 +1110,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
if (isCC)
LO16.add(makeImplicit(MI.getOperand(1)));
- TransferImpOps(MI, LO16, HI16);
+ LO16.copyImplicitOps(MI);
+ HI16.copyImplicitOps(MI);
MI.eraseFromParent();
return;
}
@@ -1037,52 +1128,34 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
}
LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
- HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg);
-
LO16.setMIFlags(MIFlags);
- HI16.setMIFlags(MIFlags);
-
- switch (MO.getType()) {
- case MachineOperand::MO_Immediate: {
- unsigned Imm = MO.getImm();
- unsigned Lo16 = Imm & 0xffff;
- unsigned Hi16 = (Imm >> 16) & 0xffff;
- LO16 = LO16.addImm(Lo16);
- HI16 = HI16.addImm(Hi16);
- break;
- }
- case MachineOperand::MO_ExternalSymbol: {
- const char *ES = MO.getSymbolName();
- unsigned TF = MO.getTargetFlags();
- LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16);
- HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16);
- break;
- }
- default: {
- const GlobalValue *GV = MO.getGlobal();
- unsigned TF = MO.getTargetFlags();
- LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
- HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
- break;
- }
- }
-
+ LO16.add(getMovOperand(MO, ARMII::MO_LO16));
LO16.cloneMemRefs(MI);
- HI16.cloneMemRefs(MI);
LO16.addImm(Pred).addReg(PredReg);
- HI16.addImm(Pred).addReg(PredReg);
+ if (isCC)
+ LO16.add(makeImplicit(MI.getOperand(1)));
+ LO16.copyImplicitOps(MI);
+ LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump(););
+
+ MachineOperand HIOperand = getMovOperand(MO, ARMII::MO_HI16);
+ if (!(HIOperand.isImm() && HIOperand.getImm() == 0)) {
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+ HI16.setMIFlags(MIFlags);
+ HI16.add(HIOperand);
+ HI16.cloneMemRefs(MI);
+ HI16.addImm(Pred).addReg(PredReg);
+ HI16.copyImplicitOps(MI);
+ LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump(););
+ } else {
+ LO16->getOperand(0).setIsDead(DstIsDead);
+ }
if (RequiresBundling)
finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator());
- if (isCC)
- LO16.add(makeImplicit(MI.getOperand(1)));
- TransferImpOps(MI, LO16, HI16);
MI.eraseFromParent();
- LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump(););
- LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump(););
}
// The size of the area, accessed by that VLSTM/VLLDM
@@ -2171,6 +2244,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Update call site info and delete the pseudo instruction TCRETURN.
if (MI.isCandidateForCallSiteEntry())
MI.getMF()->moveCallSiteInfo(&MI, &*NewMI);
+ // Copy nomerge flag over to new instruction.
+ if (MI.getFlag(MachineInstr::NoMerge))
+ NewMI->setFlag(MachineInstr::NoMerge);
MBB.erase(MBBI);
MBBI = NewMI;
@@ -2454,14 +2530,14 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return true;
}
- case ARM::MOVsrl_flag:
- case ARM::MOVsra_flag: {
+ case ARM::MOVsrl_glue:
+ case ARM::MOVsra_glue: {
// These are just fancy MOVs instructions.
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
MI.getOperand(0).getReg())
.add(MI.getOperand(1))
.addImm(ARM_AM::getSORegOpc(
- (Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1))
+ (Opcode == ARM::MOVsrl_glue ? ARM_AM::lsr : ARM_AM::asr), 1))
.add(predOps(ARMCC::AL))
.addReg(ARM::CPSR, RegState::Define);
MI.eraseFromParent();
@@ -2469,14 +2545,13 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
}
case ARM::RRX: {
// This encodes as "MOVs Rd, Rm, rrx
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
- MI.getOperand(0).getReg())
- .add(MI.getOperand(1))
- .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
- TransferImpOps(MI, MIB, MIB);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1))
+ .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp())
+ .copyImplicitOps(MI);
MI.eraseFromParent();
return true;
}
@@ -2516,7 +2591,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
}
MIB.cloneMemRefs(MI);
- TransferImpOps(MI, MIB, MIB);
+ MIB.copyImplicitOps(MI);
// Update the call site info.
if (MI.isCandidateForCallSiteEntry())
MF->moveCallSiteInfo(&MI, &*MIB);
@@ -2529,17 +2604,16 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
? ARM::tLDRpci : ARM::t2LDRpci;
Register DstReg = MI.getOperand(0).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
- MachineInstrBuilder MIB1 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
- .add(MI.getOperand(1))
- .add(predOps(ARMCC::AL));
- MIB1.cloneMemRefs(MI);
- MachineInstrBuilder MIB2 =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
- .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
- .addReg(DstReg)
- .add(MI.getOperand(2));
- TransferImpOps(MI, MIB1, MIB2);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
+ .add(MI.getOperand(1))
+ .add(predOps(ARMCC::AL))
+ .cloneMemRefs(MI)
+ .copyImplicitOps(MI);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
+ .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .add(MI.getOperand(2))
+ .copyImplicitOps(MI);
MI.eraseFromParent();
return true;
}
@@ -2624,15 +2698,16 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
unsigned PICAddOpc = isARM
? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
: ARM::tPICADD;
- MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(LO16Opc), DstReg)
- .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
- .addImm(LabelId);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
+ .addImm(LabelId)
+ .copyImplicitOps(MI);
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg)
- .addReg(DstReg)
- .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
- .addImm(LabelId);
+ .addReg(DstReg)
+ .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
+ .addImm(LabelId)
+ .copyImplicitOps(MI);
MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(PICAddOpc))
@@ -2643,7 +2718,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
if (Opcode == ARM::MOV_ga_pcrel_ldr)
MIB3.cloneMemRefs(MI);
}
- TransferImpOps(MI, MIB1, MIB3);
+ MIB3.copyImplicitOps(MI);
MI.eraseFromParent();
return true;
}
@@ -2655,15 +2730,29 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
ExpandMOV32BitImm(MBB, MBBI);
return true;
+ case ARM::tMOVi32imm:
+ ExpandTMOV32BitImm(MBB, MBBI);
+ return true;
+
+ case ARM::tLEApcrelJT:
+ // Inline jump tables are handled in ARMAsmPrinter.
+ if (MI.getMF()->getJumpTableInfo()->getEntryKind() ==
+ MachineJumpTableInfo::EK_Inline)
+ return false;
+
+ // Use a 32-bit immediate move to generate the address of the jump table.
+ assert(STI->isThumb() && "Non-inline jump tables expected only in thumb");
+ ExpandTMOV32BitImm(MBB, MBBI);
+ return true;
+
case ARM::SUBS_PC_LR: {
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
- .addReg(ARM::LR)
- .add(MI.getOperand(0))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .addReg(ARM::CPSR, RegState::Undef);
- TransferImpOps(MI, MIB, MIB);
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
+ .addReg(ARM::LR)
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .addReg(ARM::CPSR, RegState::Undef)
+ .copyImplicitOps(MI);
MI.eraseFromParent();
return true;
}
@@ -2692,7 +2781,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
// Add an implicit def for the super-register.
MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
- TransferImpOps(MI, MIB, MIB);
+ MIB.copyImplicitOps(MI);
MIB.cloneMemRefs(MI);
MI.eraseFromParent();
return true;
@@ -2725,7 +2814,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
if (SrcIsKill) // Add an implicit kill for the Q register.
MIB->addRegisterKilled(SrcReg, TRI, true);
- TransferImpOps(MI, MIB, MIB);
+ MIB.copyImplicitOps(MI);
MIB.cloneMemRefs(MI);
MI.eraseFromParent();
return true;
diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp
index 62a090f4bca8..0b35f134ec7b 100644
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -40,6 +40,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -72,7 +73,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -1842,7 +1842,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::CXX_FAST_TLS:
// Use target triple & subtarget features to do actual dispatch.
if (Subtarget->isAAPCS_ABI()) {
- if (Subtarget->hasVFP2Base() &&
+ if (Subtarget->hasFPRegs() &&
TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
else
@@ -1928,7 +1928,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
// At the point, we are able to handle the call's arguments in fast isel.
// Get a count of how many bytes are to be pushed on the stack.
- NumBytes = CCInfo.getNextStackOffset();
+ NumBytes = CCInfo.getStackSize();
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 5fa7068c89eb..4496d4928ebe 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -324,8 +324,8 @@ static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
NewInstr.add(MBBI->getOperand(0));
NewInstr.add(t1CondCodeOp(/*isDead=*/true));
- for (unsigned i = 1, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
- NewInstr.add(MBBI->getOperand(i));
+ for (MachineOperand &MO : llvm::drop_begin(MBBI->operands()))
+ NewInstr.add(MO);
MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
MBB->erase(MBBI);
MBBI = NewMBBI;
@@ -357,6 +357,34 @@ static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
.setMIFlags(Flags);
break;
+ case ARM::t2STR_PRE:
+ if (MBBI->getOperand(0).getReg() == ARM::SP &&
+ MBBI->getOperand(2).getReg() == ARM::SP &&
+ MBBI->getOperand(3).getImm() == -4) {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
+ .addImm(1ULL << Reg)
+ .addImm(/*Wide=*/1)
+ .setMIFlags(Flags);
+ } else {
+ report_fatal_error("No matching SEH Opcode for t2STR_PRE");
+ }
+ break;
+
+ case ARM::t2LDR_POST:
+ if (MBBI->getOperand(1).getReg() == ARM::SP &&
+ MBBI->getOperand(2).getReg() == ARM::SP &&
+ MBBI->getOperand(3).getImm() == 4) {
+ unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
+ MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
+ .addImm(1ULL << Reg)
+ .addImm(/*Wide=*/1)
+ .setMIFlags(Flags);
+ } else {
+ report_fatal_error("No matching SEH Opcode for t2LDR_POST");
+ }
+ break;
+
case ARM::t2LDMIA_RET:
case ARM::t2LDMIA_UPD:
case ARM::t2STMDB_UPD: {
@@ -409,8 +437,7 @@ static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,
case ARM::VSTMDDB_UPD:
case ARM::VLDMDIA_UPD: {
int First = -1, Last = 0;
- for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
- const MachineOperand &MO = MBBI->getOperand(i);
+ for (const MachineOperand &MO : llvm::drop_begin(MBBI->operands(), 4)) {
unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
if (First == -1)
First = Reg;
@@ -3141,12 +3168,12 @@ void ARMFrameLowering::adjustForSegmentedStacks(
.addReg(ScratchReg1)
.add(predOps(ARMCC::AL));
- // This jump is taken if StackLimit < SP - stack required.
+ // This jump is taken if StackLimit <= SP - stack required.
Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
- BuildMI(GetMBB, DL, TII.get(Opcode)).addMBB(PostStackMBB)
- .addImm(ARMCC::LO)
- .addReg(ARM::CPSR);
-
+ BuildMI(GetMBB, DL, TII.get(Opcode))
+ .addMBB(PostStackMBB)
+ .addImm(ARMCC::LS)
+ .addReg(ARM::CPSR);
// Calling __morestack(StackSize, Size of stack arguments).
// __morestack knows that the stack size requested is in SR0(r4)
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index e880e9d66fc1..a0607cb5662e 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -443,7 +443,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() {
continue;
// Check if the AND mask is an immediate of the form: 000.....1111111100
- unsigned TZ = countTrailingZeros(And_imm);
+ unsigned TZ = llvm::countr_zero(And_imm);
if (TZ != 1 && TZ != 2)
// Be conservative here. Shifter operands aren't always free. e.g. On
// Swift, left shifter operand of 1 / 2 for free but others are not.
@@ -2720,10 +2720,7 @@ void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
}
auto OpIsZero = [N](size_t OpNo) {
- if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo)))
- if (OpConst->getZExtValue() == 0)
- return true;
- return false;
+ return isNullConstant(N->getOperand(OpNo));
};
// If the input accumulator value is not zero, select an instruction with
@@ -3365,7 +3362,7 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
And_imm &= -1U >> Srl_imm;
// Note: The width operand is encoded as width-1.
- unsigned Width = countTrailingOnes(And_imm) - 1;
+ unsigned Width = llvm::countr_one(And_imm) - 1;
unsigned LSB = Srl_imm;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
@@ -3431,11 +3428,11 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
isShiftedMask_32(And_imm)) {
unsigned Srl_imm = 0;
- unsigned LSB = countTrailingZeros(And_imm);
+ unsigned LSB = llvm::countr_zero(And_imm);
// Shift must be the same as the ands lsb
if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
- unsigned MSB = 31 - countLeadingZeros(And_imm);
+ unsigned MSB = llvm::Log2_32(And_imm);
// Note: The width operand is encoded as width-1.
unsigned Width = MSB - LSB;
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
@@ -3540,9 +3537,9 @@ void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
static std::optional<std::pair<unsigned, unsigned>>
getContiguousRangeOfSetBits(const APInt &A) {
- unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
- unsigned LastOne = A.countTrailingZeros();
- if (A.countPopulation() != (FirstOne - LastOne + 1))
+ unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
+ unsigned LastOne = A.countr_zero();
+ if (A.popcount() != (FirstOne - LastOne + 1))
return std::nullopt;
return std::make_pair(FirstOne, LastOne);
}
@@ -3704,7 +3701,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
case ISD::Constant: {
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
// If we can't materialize the constant we need to use a literal pool
- if (ConstantMaterializationCost(Val, Subtarget) > 2) {
+ if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
+ !Subtarget->genExecuteOnly()) {
SDValue CPIdx = CurDAG->getTargetConstantPool(
ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
TLI->getPointerTy(CurDAG->getDataLayout()));
@@ -3990,10 +3988,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
SDValue SmulLoHi = N->getOperand(1);
SDValue Subc = N->getOperand(2);
- auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
+ SDValue Zero = Subc.getOperand(0);
- if (!Zero || Zero->getZExtValue() != 0 ||
- Subc.getOperand(1) != SmulLoHi.getValue(0) ||
+ if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) ||
N->getOperand(1) != SmulLoHi.getValue(1) ||
N->getOperand(2) != Subc.getValue(1))
break;
@@ -4132,16 +4129,16 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
SDValue N3 = N->getOperand(3);
- SDValue InFlag = N->getOperand(4);
+ SDValue InGlue = N->getOperand(4);
assert(N1.getOpcode() == ISD::BasicBlock);
assert(N2.getOpcode() == ISD::Constant);
assert(N3.getOpcode() == ISD::Register);
unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
- if (InFlag.getOpcode() == ARMISD::CMPZ) {
- if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
- SDValue Int = InFlag.getOperand(0);
+ if (InGlue.getOpcode() == ARMISD::CMPZ) {
+ if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+ SDValue Int = InGlue.getOperand(0);
uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue();
// Handle low-overhead loops.
@@ -4164,15 +4161,15 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
ReplaceUses(N, LoopEnd);
CurDAG->RemoveDeadNode(N);
- CurDAG->RemoveDeadNode(InFlag.getNode());
+ CurDAG->RemoveDeadNode(InGlue.getNode());
CurDAG->RemoveDeadNode(Int.getNode());
return;
}
}
bool SwitchEQNEToPLMI;
- SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
- InFlag = N->getOperand(4);
+ SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
+ InGlue = N->getOperand(4);
if (SwitchEQNEToPLMI) {
switch ((ARMCC::CondCodes)CC) {
@@ -4188,13 +4185,13 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
- SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
+ SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue };
SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
MVT::Glue, Ops);
Chain = SDValue(ResNode, 0);
if (N->getNumValues() == 2) {
- InFlag = SDValue(ResNode, 1);
- ReplaceUses(SDValue(N, 1), InFlag);
+ InGlue = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(N, 1), InGlue);
}
ReplaceUses(SDValue(N, 0),
SDValue(Chain.getNode(), Chain.getResNo()));
@@ -4241,11 +4238,11 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
}
case ARMISD::CMOV: {
- SDValue InFlag = N->getOperand(4);
+ SDValue InGlue = N->getOperand(4);
- if (InFlag.getOpcode() == ARMISD::CMPZ) {
+ if (InGlue.getOpcode() == ARMISD::CMPZ) {
bool SwitchEQNEToPLMI;
- SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
+ SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
if (SwitchEQNEToPLMI) {
SDValue ARMcc = N->getOperand(2);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 2e78b52d0993..5239e5c4d91b 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -37,7 +37,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -53,6 +52,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
@@ -99,11 +99,11 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -1007,6 +1007,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
}
}
+
+ for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
+ MVT::v4i32}) {
+ setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
+ setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+ }
}
if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
@@ -1124,8 +1132,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
- setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
- setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
+ setOperationAction(ISD::UADDO_CARRY, MVT::i32, Custom);
+ setOperationAction(ISD::USUBO_CARRY, MVT::i32, Custom);
if (Subtarget->hasDSP()) {
setOperationAction(ISD::SADDSAT, MVT::i8, Custom);
setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
@@ -1682,9 +1690,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(ARMISD::BRCOND)
MAKE_CASE(ARMISD::BR_JT)
MAKE_CASE(ARMISD::BR2_JT)
- MAKE_CASE(ARMISD::RET_FLAG)
- MAKE_CASE(ARMISD::SERET_FLAG)
- MAKE_CASE(ARMISD::INTRET_FLAG)
+ MAKE_CASE(ARMISD::RET_GLUE)
+ MAKE_CASE(ARMISD::SERET_GLUE)
+ MAKE_CASE(ARMISD::INTRET_GLUE)
MAKE_CASE(ARMISD::PIC_ADD)
MAKE_CASE(ARMISD::CMP)
MAKE_CASE(ARMISD::CMN)
@@ -1702,8 +1710,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(ARMISD::ASRL)
MAKE_CASE(ARMISD::LSRL)
MAKE_CASE(ARMISD::LSLL)
- MAKE_CASE(ARMISD::SRL_FLAG)
- MAKE_CASE(ARMISD::SRA_FLAG)
+ MAKE_CASE(ARMISD::SRL_GLUE)
+ MAKE_CASE(ARMISD::SRA_GLUE)
MAKE_CASE(ARMISD::RRX)
MAKE_CASE(ARMISD::ADDC)
MAKE_CASE(ARMISD::ADDE)
@@ -2073,6 +2081,8 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
return CC;
case CallingConv::PreserveMost:
return CallingConv::PreserveMost;
+ case CallingConv::PreserveAll:
+ return CallingConv::PreserveAll;
case CallingConv::ARM_AAPCS_VFP:
case CallingConv::Swift:
case CallingConv::SwiftTail:
@@ -2081,7 +2091,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
case CallingConv::Tail:
if (!Subtarget->isAAPCS_ABI())
return CallingConv::ARM_APCS;
- else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
+ else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
!isVarArg)
return CallingConv::ARM_AAPCS_VFP;
@@ -2131,6 +2141,8 @@ CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
case CallingConv::PreserveMost:
return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
+ case CallingConv::PreserveAll:
+ return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
case CallingConv::CFGuard_Check:
return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
}
@@ -2168,7 +2180,7 @@ SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue ARMTargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
+ SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
SDValue ThisVal) const {
@@ -2196,14 +2208,14 @@ SDValue ARMTargetLowering::LowerCallResult(
(VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
// Handle f64 or half of a v2f64.
SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
- InFlag);
+ InGlue);
Chain = Lo.getValue(1);
- InFlag = Lo.getValue(2);
+ InGlue = Lo.getValue(2);
VA = RVLocs[++i]; // skip ahead to next loc
SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
- InFlag);
+ InGlue);
Chain = Hi.getValue(1);
- InFlag = Hi.getValue(2);
+ InGlue = Hi.getValue(2);
if (!Subtarget->isLittle())
std::swap (Lo, Hi);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
@@ -2214,13 +2226,13 @@ SDValue ARMTargetLowering::LowerCallResult(
DAG.getConstant(0, dl, MVT::i32));
VA = RVLocs[++i]; // skip ahead to next loc
- Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
Chain = Lo.getValue(1);
- InFlag = Lo.getValue(2);
+ InGlue = Lo.getValue(2);
VA = RVLocs[++i]; // skip ahead to next loc
- Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
+ Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
Chain = Hi.getValue(1);
- InFlag = Hi.getValue(2);
+ InGlue = Hi.getValue(2);
if (!Subtarget->isLittle())
std::swap (Lo, Hi);
Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
@@ -2229,9 +2241,9 @@ SDValue ARMTargetLowering::LowerCallResult(
}
} else {
Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
- InFlag);
+ InGlue);
Chain = Val.getValue(1);
- InFlag = Val.getValue(2);
+ InGlue = Val.getValue(2);
}
switch (VA.getLocInfo()) {
@@ -2405,7 +2417,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getStackSize();
// SPDiff is the byte offset of the call's argument area from the callee's.
// Stores to callee stack arguments will be placed in FixedStackSlots offset
@@ -2617,11 +2629,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
- SDValue InFlag;
+ SDValue InGlue;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
+ RegsToPass[i].second, InGlue);
+ InGlue = Chain.getValue(1);
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -2648,13 +2660,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// those, the target's already in a register, so we don't need to do
// anything extra.
if (isa<GlobalAddressSDNode>(Callee)) {
- // When generating execute-only code we use movw movt pair.
- // Currently execute-only is only available for architectures that
- // support movw movt, so we are safe to assume that.
if (Subtarget->genExecuteOnly()) {
- assert(Subtarget->useMovt() &&
- "long-calls with execute-only requires movt and movw!");
- ++NumMovwMovt;
+ if (Subtarget->useMovt())
+ ++NumMovwMovt;
Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
} else {
@@ -2673,13 +2681,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
} else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
- // When generating execute-only code we use movw movt pair.
- // Currently execute-only is only available for architectures that
- // support movw movt, so we are safe to assume that.
if (Subtarget->genExecuteOnly()) {
- assert(Subtarget->useMovt() &&
- "long-calls with execute-only requires movt and movw!");
- ++NumMovwMovt;
+ if (Subtarget->useMovt())
+ ++NumMovwMovt;
Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
} else {
@@ -2801,8 +2805,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// we've carefully laid out the parameters so that when sp is reset they'll be
// in the correct location.
if (isTailCall && !isSibCall) {
- Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, dl);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
+ InGlue = Chain.getValue(1);
}
std::vector<SDValue> Ops;
@@ -2838,13 +2842,14 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (isTailCall) {
MF.getFrameInfo().setHasTailCall();
SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
+ DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
return Ret;
}
@@ -2852,7 +2857,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Returns a chain and a flag for retval copy to use.
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
// If we're guaranteeing tail-calls will be honoured, the callee must
@@ -2862,13 +2867,13 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
uint64_t CalleePopBytes =
canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
- Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InFlag, dl);
+ Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
if (!Ins.empty())
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
InVals, isThisReturn,
isThisReturn ? OutVals[0] : SDValue());
}
@@ -2900,7 +2905,7 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
// all remained GPR regs. In that case we can't split parameter, we must
// send it to stack. We also must set NCRN to R4, so waste all
// remained registers.
- const unsigned NSAAOffset = State->getNextStackOffset();
+ const unsigned NSAAOffset = State->getStackSize();
if (NSAAOffset != 0 && Size > Excess) {
while (State->AllocateReg(GPRArgRegs))
;
@@ -3066,7 +3071,7 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
- if (CCInfo.getNextStackOffset()) {
+ if (CCInfo.getStackSize()) {
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -3152,7 +3157,7 @@ static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
RetOps.insert(RetOps.begin() + 1,
DAG.getConstant(LROffset, DL, MVT::i32, false));
- return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
+ return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
}
SDValue
@@ -3171,7 +3176,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Analyze outgoing return values.
CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 4> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
bool isLittleEndian = Subtarget->isLittle();
@@ -3260,14 +3265,14 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Chain =
DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- HalfGPRs.getValue(isLittleEndian ? 0 : 1), Flag);
- Flag = Chain.getValue(1);
+ HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain =
DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- HalfGPRs.getValue(isLittleEndian ? 1 : 0), Flag);
- Flag = Chain.getValue(1);
+ HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
@@ -3280,18 +3285,18 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- fmrrd.getValue(isLittleEndian ? 0 : 1), Flag);
- Flag = Chain.getValue(1);
+ fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- fmrrd.getValue(isLittleEndian ? 1 : 0), Flag);
+ fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
} else
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
// Guarantee that all emitted copies are
// stuck together, avoiding something bad.
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(
VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
}
@@ -3311,8 +3316,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Update chain and glue.
RetOps[0] = Chain;
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
// CPUs which aren't M-class use a special sequence to return from
// exceptions (roughly, any instruction setting pc and cpsr simultaneously,
@@ -3327,8 +3332,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return LowerInterruptReturn(RetOps, dl, DAG);
}
- ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_FLAG :
- ARMISD::RET_FLAG;
+ ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE :
+ ARMISD::RET_GLUE;
return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
}
@@ -3391,8 +3396,8 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
bool HasRet = false;
for (const SDNode *U : Copy->uses()) {
- if (U->getOpcode() != ARMISD::RET_FLAG &&
- U->getOpcode() != ARMISD::INTRET_FLAG)
+ if (U->getOpcode() != ARMISD::RET_GLUE &&
+ U->getOpcode() != ARMISD::INTRET_GLUE)
return false;
HasRet = true;
}
@@ -3424,10 +3429,8 @@ static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
assert(WriteValue.getValueType() == MVT::i64
&& "LowerWRITE_REGISTER called for non-i64 type argument.");
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
- DAG.getConstant(1, DL, MVT::i32));
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
}
@@ -3481,6 +3484,11 @@ SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
}
unsigned ARMTargetLowering::getJumpTableEncoding() const {
+ // If we don't have a 32-bit pc-relative branch instruction then the jump
+ // table consists of block addresses. Usually this is inline, but for
+ // execute-only it must be placed out-of-line.
+ if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
+ return MachineJumpTableInfo::EK_BlockAddress;
return MachineJumpTableInfo::EK_Inline;
}
@@ -3945,9 +3953,12 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
}
// If we have T2 ops, we can materialize the address directly via movt/movw
- // pair. This is always cheaper.
- if (Subtarget->useMovt()) {
- ++NumMovwMovt;
+ // pair. This is always cheaper. If need to generate Execute Only code, and we
+ // only have Thumb1 available, we can't use a constant pool and are forced to
+ // use immediate relocations.
+ if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
+ if (Subtarget->useMovt())
+ ++NumMovwMovt;
// FIXME: Once remat is capable of dealing with instructions with register
// operands, expand this into two nodes.
return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
@@ -4112,11 +4123,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
// else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
const SDValue &Operand = Op.getOperand(1);
const EVT VTy = Op.getValueType();
-
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
- DAG.getConstant(1, dl, VTy));
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VTy, Operand,
- DAG.getConstant(0, dl, VTy));
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);
SDValue Constant0 = DAG.getConstant(0, dl, VTy);
SDValue Constant1 = DAG.getConstant(1, dl, VTy);
SDValue Constant31 = DAG.getConstant(31, dl, VTy);
@@ -4412,20 +4420,17 @@ void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
// the result of va_next.
// If there is no regs to be stored, just point address after last
// argument passed via stack.
- int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
- CCInfo.getInRegsParamsCount(),
- CCInfo.getNextStackOffset(),
- std::max(4U, TotalArgRegsSaveSize));
+ int FrameIndex = StoreByValRegs(
+ CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(),
+ CCInfo.getStackSize(), std::max(4U, TotalArgRegsSaveSize));
AFI->setVarArgsFrameIndex(FrameIndex);
}
bool ARMTargetLowering::splitValueIntoRegisterParts(
SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
- bool IsABIRegCopy = CC.has_value();
EVT ValueVT = Val.getValueType();
- if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
- PartVT == MVT::f32) {
+ if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
unsigned ValueBits = ValueVT.getSizeInBits();
unsigned PartBits = PartVT.getSizeInBits();
Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
@@ -4440,9 +4445,7 @@ bool ARMTargetLowering::splitValueIntoRegisterParts(
SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
- bool IsABIRegCopy = CC.has_value();
- if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
- PartVT == MVT::f32) {
+ if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
unsigned ValueBits = ValueVT.getSizeInBits();
unsigned PartBits = PartVT.getSizeInBits();
SDValue Val = Parts[0];
@@ -4654,7 +4657,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
// varargs
if (isVarArg && MFI.hasVAStart()) {
- VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset(),
+ VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getStackSize(),
TotalArgRegsSaveSize);
if (AFI->isCmseNSEntryFunction()) {
DiagnosticInfoUnsupported Diag(
@@ -4664,7 +4667,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
}
}
- unsigned StackArgSize = CCInfo.getNextStackOffset();
+ unsigned StackArgSize = CCInfo.getStackSize();
bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
if (canGuaranteeTCO(CallConv, TailCallOpt)) {
// The only way to guarantee a tail call is if the callee restores its
@@ -4676,7 +4679,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
}
AFI->setArgumentStackSize(StackArgSize);
- if (CCInfo.getNextStackOffset() > 0 && AFI->isCmseNSEntryFunction()) {
+ if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
DiagnosticInfoUnsupported Diag(
DAG.getMachineFunction().getFunction(),
"secure entry function requires arguments on stack", dl.getDebugLoc());
@@ -4788,7 +4791,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
uint64_t RHSV = RHSC->getZExtValue();
if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
- unsigned ShiftBits = countLeadingZeros(Mask);
+ unsigned ShiftBits = llvm::countl_zero(Mask);
if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
@@ -5049,7 +5052,7 @@ SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) {
EVT VT = Op.getValueType();
- if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
+ if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
return SDValue();
if (!VT.isSimple())
return SDValue();
@@ -5335,10 +5338,10 @@ static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG) {
SDLoc dl(Op);
if (Val1 == ~Val2)
return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
- DAG.getConstant(countTrailingOnes(K), dl, VT));
+ DAG.getConstant(llvm::countr_one(K), dl, VT));
if (NegVal == 0)
return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
- DAG.getConstant(countTrailingOnes(K), dl, VT));
+ DAG.getConstant(llvm::countr_one(K), dl, VT));
return SDValue();
}
@@ -6247,11 +6250,8 @@ SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
// if we can combine the bitcast with its source.
if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
return Val;
-
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
- DAG.getConstant(0, dl, MVT::i32));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
- DAG.getConstant(1, dl, MVT::i32));
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);
return DAG.getNode(ISD::BITCAST, dl, DstVT,
DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
}
@@ -6663,13 +6663,10 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
} else if (ShOpc == ISD::SRA)
ShPartsOpc = ARMISD::ASRL;
- // Lower 32 bits of the destination/source
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
- DAG.getConstant(0, dl, MVT::i32));
- // Upper 32 bits of the destination/source
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
- DAG.getConstant(1, dl, MVT::i32));
-
+ // Split Lower/Upper 32 bits of the destination/source
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) =
+ DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
// Generate the shift operation as computed above
Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
ShAmt);
@@ -6687,14 +6684,12 @@ static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
- DAG.getConstant(0, dl, MVT::i32));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
- DAG.getConstant(1, dl, MVT::i32));
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
- // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
+ // First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
// captures the result into a carry flag.
- unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
+ unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
// The low part is an ARMISD::RRX operand, which shifts the carry in.
@@ -6893,7 +6888,7 @@ static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) {
assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
- // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
+ // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
// have to invert the carry first.
Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
DAG.getConstant(1, DL, MVT::i32), Carry);
@@ -7075,6 +7070,10 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
// Prevent floating-point constants from using literal loads
// when execute-only is enabled.
if (ST->genExecuteOnly()) {
+ // We shouldn't trigger this for v6m execute-only
+ assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
+ "Unexpected architecture");
+
// If we can represent the constant as an immediate, don't lower it
if (isFPImmLegal(FPVal, VT))
return Op;
@@ -9796,7 +9795,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG,
return N0;
}
-static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) {
SDNode *N = Op.getNode();
EVT VT = N->getValueType(0);
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
@@ -9806,7 +9805,7 @@ static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
SDValue Result;
- if (Op.getOpcode() == ISD::ADDCARRY) {
+ if (Op.getOpcode() == ISD::UADDO_CARRY) {
// This converts the boolean value carry into the carry flag.
Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
@@ -9817,7 +9816,7 @@ static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
// Now convert the carry flag into a boolean value.
Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
} else {
- // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
+ // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
// have to invert the carry first.
Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
DAG.getConstant(1, DL, MVT::i32), Carry);
@@ -9831,7 +9830,7 @@ static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
// Now convert the carry flag into a boolean value.
Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
// But the carry returned by ARMISD::SUBE is not a borrow as expected
- // by ISD::SUBCARRY, so compute 1 - C.
+ // by ISD::USUBO_CARRY, so compute 1 - C.
Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
DAG.getConstant(1, DL, MVT::i32), Carry);
}
@@ -10012,10 +10011,8 @@ static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InCh
SDValue Op = N->getOperand(1);
if (N->getValueType(0) == MVT::i32)
return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
- DAG.getConstant(1, DL, MVT::i32));
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);
return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
}
@@ -10087,7 +10084,8 @@ void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
- !Subtarget->isThumb1Only() && LD->isVolatile()) {
+ !Subtarget->isThumb1Only() && LD->isVolatile() &&
+ LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
SDLoc dl(N);
SDValue Result = DAG.getMemIntrinsicNode(
ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
@@ -10144,7 +10142,8 @@ static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG,
assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
- !Subtarget->isThumb1Only() && ST->isVolatile()) {
+ !Subtarget->isThumb1Only() && ST->isVolatile() &&
+ ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
SDNode *N = Op.getNode();
SDLoc dl(N);
@@ -10275,6 +10274,80 @@ static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG,
return LowerVecReduce(Op, DAG, ST);
}
+static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ if (!ST->hasNEON())
+ return SDValue();
+
+ SDLoc dl(Op);
+ SDValue Op0 = Op->getOperand(0);
+ EVT VT = Op0.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+
+ unsigned PairwiseIntrinsic = 0;
+ switch (Op->getOpcode()) {
+ default:
+ llvm_unreachable("Expected VECREDUCE opcode");
+ case ISD::VECREDUCE_UMIN:
+ PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
+ break;
+ case ISD::VECREDUCE_UMAX:
+ PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
+ break;
+ case ISD::VECREDUCE_SMIN:
+ PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
+ break;
+ case ISD::VECREDUCE_SMAX:
+ PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
+ break;
+ }
+ SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);
+
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned NumActiveLanes = NumElts;
+
+ assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
+ NumActiveLanes == 2) &&
+ "Only expected a power 2 vector size");
+
+ // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
+ if (VT.is128BitVector()) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);
+ VT = Lo.getValueType();
+ Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});
+ NumActiveLanes /= 2;
+ }
+
+ // Use pairwise reductions until one lane remains
+ while (NumActiveLanes > 1) {
+ Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});
+ NumActiveLanes /= 2;
+ }
+
+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
+ DAG.getConstant(0, dl, MVT::i32));
+
+ // Result type may be wider than element type.
+ if (EltVT != Op.getValueType()) {
+ unsigned Extend = 0;
+ switch (Op->getOpcode()) {
+ default:
+ llvm_unreachable("Expected VECREDUCE opcode");
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_UMAX:
+ Extend = ISD::ZERO_EXTEND;
+ break;
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_SMAX:
+ Extend = ISD::SIGN_EXTEND;
+ break;
+ }
+ Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);
+ }
+ return Res;
+}
+
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
// Acquire/Release load/store is not legal for targets without a dmb or
@@ -10477,8 +10550,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
return LowerDIV_Windows(Op, DAG, /* Signed */ false);
return LowerUDIV(Op, DAG, Subtarget);
- case ISD::ADDCARRY:
- case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY:
+ return LowerUADDSUBO_CARRY(Op, DAG);
case ISD::SADDO:
case ISD::SSUBO:
return LowerSignedALUO(Op, DAG);
@@ -10506,6 +10580,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VECREDUCE_FMIN:
case ISD::VECREDUCE_FMAX:
return LowerVecReduceF(Op, DAG, Subtarget);
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_SMAX:
+ return LowerVecReduceMinMax(Op, DAG, Subtarget);
case ISD::ATOMIC_LOAD:
case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
@@ -10543,12 +10622,8 @@ static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl<SDValue> &Results,
return;
SDLoc dl(N);
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- N->getOperand(3),
- DAG.getConstant(0, dl, MVT::i32));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
- N->getOperand(3),
- DAG.getConstant(1, dl, MVT::i32));
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
SDValue LongMul = DAG.getNode(Opc, dl,
DAG.getVTList(MVT::i32, MVT::i32),
@@ -11418,18 +11493,12 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
// Load an immediate to varEnd.
Register varEnd = MRI.createVirtualRegister(TRC);
if (Subtarget->useMovt()) {
- unsigned Vtmp = varEnd;
- if ((LoopSize & 0xFFFF0000) != 0)
- Vtmp = MRI.createVirtualRegister(TRC);
- BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16), Vtmp)
- .addImm(LoopSize & 0xFFFF)
- .add(predOps(ARMCC::AL));
-
- if ((LoopSize & 0xFFFF0000) != 0)
- BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16), varEnd)
- .addReg(Vtmp)
- .addImm(LoopSize >> 16)
- .add(predOps(ARMCC::AL));
+ BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
+ varEnd)
+ .addImm(LoopSize);
+ } else if (Subtarget->genExecuteOnly()) {
+ assert(IsThumb && "Non-thumb expected to have used movt");
+ BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
} else {
MachineConstantPool *ConstantPool = MF->getConstantPool();
Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
@@ -13595,11 +13664,9 @@ static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG,
NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
}
- SmallVector<SDValue, 4> Ops;
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, NA,
- DAG.getConstant(0, dl, MVT::i32)));
- Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, NA,
- DAG.getConstant(1, dl, MVT::i32)));
+ SmallVector<SDValue, 4> Ops(2);
+ std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);
+
unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
Ops.push_back(VecRed->getOperand(I));
@@ -13725,6 +13792,11 @@ bool ARMTargetLowering::shouldFoldConstantShiftPairToMask(
return false;
}
+bool ARMTargetLowering::shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
+ EVT VT) const {
+ return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT);
+}
+
bool ARMTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
if (!Subtarget->hasNEON()) {
if (Subtarget->isThumb1Only())
@@ -13838,7 +13910,7 @@ static SDValue PerformSHLSimplify(SDNode *N,
// The immediates are encoded as an 8-bit value that can be rotated.
auto LargeImm = [](const APInt &Imm) {
- unsigned Zeros = Imm.countLeadingZeros() + Imm.countTrailingZeros();
+ unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
return Imm.getBitWidth() - Zeros > 8;
};
@@ -14079,7 +14151,7 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
int64_t MulAmt = C->getSExtValue();
- unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt);
+ unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
ShiftAmt = ShiftAmt & (32 - 1);
SDValue V = N->getOperand(0);
@@ -14089,7 +14161,7 @@ static SDValue PerformMULCombine(SDNode *N,
MulAmt >>= ShiftAmt;
if (MulAmt >= 0) {
- if (isPowerOf2_32(MulAmt - 1)) {
+ if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
// (mul x, 2^N + 1) => (add (shl x, N), x)
Res = DAG.getNode(ISD::ADD, DL, VT,
V,
@@ -14097,7 +14169,7 @@ static SDValue PerformMULCombine(SDNode *N,
V,
DAG.getConstant(Log2_32(MulAmt - 1), DL,
MVT::i32)));
- } else if (isPowerOf2_32(MulAmt + 1)) {
+ } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
// (mul x, 2^N - 1) => (sub (shl x, N), x)
Res = DAG.getNode(ISD::SUB, DL, VT,
DAG.getNode(ISD::SHL, DL, VT,
@@ -14109,7 +14181,7 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
} else {
uint64_t MulAmtAbs = -MulAmt;
- if (isPowerOf2_32(MulAmtAbs + 1)) {
+ if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
// (mul x, -(2^N - 1)) => (sub x, (shl x, N))
Res = DAG.getNode(ISD::SUB, DL, VT,
V,
@@ -14117,7 +14189,7 @@ static SDValue PerformMULCombine(SDNode *N,
V,
DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
MVT::i32)));
- } else if (isPowerOf2_32(MulAmtAbs - 1)) {
+ } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
// (mul x, -(2^N + 1)) => - (add (shl x, N), x)
Res = DAG.getNode(ISD::ADD, DL, VT,
V,
@@ -14192,7 +14264,7 @@ static SDValue CombineANDShift(SDNode *N,
// First pattern: right shift, then mask off leading bits.
// FIXME: Use demanded bits?
if (!LeftShift && isMask_32(C1)) {
- uint32_t C3 = countLeadingZeros(C1);
+ uint32_t C3 = llvm::countl_zero(C1);
if (C2 < C3) {
SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
DAG.getConstant(C3 - C2, DL, MVT::i32));
@@ -14203,7 +14275,7 @@ static SDValue CombineANDShift(SDNode *N,
// First pattern, reversed: left shift, then mask off trailing bits.
if (LeftShift && isMask_32(~C1)) {
- uint32_t C3 = countTrailingZeros(C1);
+ uint32_t C3 = llvm::countr_zero(C1);
if (C2 < C3) {
SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
DAG.getConstant(C3 - C2, DL, MVT::i32));
@@ -14215,8 +14287,8 @@ static SDValue CombineANDShift(SDNode *N,
// Second pattern: left shift, then mask off leading bits.
// FIXME: Use demanded bits?
if (LeftShift && isShiftedMask_32(C1)) {
- uint32_t Trailing = countTrailingZeros(C1);
- uint32_t C3 = countLeadingZeros(C1);
+ uint32_t Trailing = llvm::countr_zero(C1);
+ uint32_t C3 = llvm::countl_zero(C1);
if (Trailing == C2 && C2 + C3 < 32) {
SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
DAG.getConstant(C2 + C3, DL, MVT::i32));
@@ -14228,8 +14300,8 @@ static SDValue CombineANDShift(SDNode *N,
// Second pattern, reversed: right shift, then mask off trailing bits.
// FIXME: Handle other patterns of known/demanded bits.
if (!LeftShift && isShiftedMask_32(C1)) {
- uint32_t Leading = countLeadingZeros(C1);
- uint32_t C3 = countTrailingZeros(C1);
+ uint32_t Leading = llvm::countl_zero(C1);
+ uint32_t C3 = llvm::countr_zero(C1);
if (Leading == C2 && C2 + C3 < 32) {
SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
DAG.getConstant(C2 + C3, DL, MVT::i32));
@@ -14400,7 +14472,7 @@ static SDValue PerformORCombineToBFI(SDNode *N,
return SDValue();
if (ARM::isBitFieldInvertedMask(Mask)) {
- Val >>= countTrailingZeros(~Mask);
+ Val >>= llvm::countr_zero(~Mask);
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
DAG.getConstant(Val, DL, MVT::i32),
@@ -14428,7 +14500,7 @@ static SDValue PerformORCombineToBFI(SDNode *N,
(Mask == 0xffff || Mask == 0xffff0000))
return SDValue();
// 2a
- unsigned amt = countTrailingZeros(Mask2);
+ unsigned amt = llvm::countr_zero(Mask2);
Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
DAG.getConstant(amt, DL, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
@@ -14445,7 +14517,7 @@ static SDValue PerformORCombineToBFI(SDNode *N,
(Mask2 == 0xffff || Mask2 == 0xffff0000))
return SDValue();
// 2b
- unsigned lsb = countTrailingZeros(Mask);
+ unsigned lsb = llvm::countr_zero(Mask);
Res = DAG.getNode(ISD::SRL, DL, VT, N00,
DAG.getConstant(lsb, DL, MVT::i32));
Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
@@ -14464,7 +14536,7 @@ static SDValue PerformORCombineToBFI(SDNode *N,
// where lsb(mask) == #shamt and masked bits of B are known zero.
SDValue ShAmt = N00.getOperand(1);
unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
- unsigned LSB = countTrailingZeros(Mask);
+ unsigned LSB = llvm::countr_zero(Mask);
if (ShAmtC != LSB)
return SDValue();
@@ -14687,7 +14759,7 @@ static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
SDValue From = N->getOperand(1);
ToMask = ~cast<ConstantSDNode>(N->getOperand(2))->getAPIntValue();
- FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.countPopulation());
+ FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
// If the Base came from a SHR #C, we can deduce that it is really testing bit
// #C in the base of the SHR.
@@ -14706,8 +14778,8 @@ static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
//
// Neither A nor B must be zero.
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
- unsigned LastActiveBitInA = A.countTrailingZeros();
- unsigned FirstActiveBitInB = B.getBitWidth() - B.countLeadingZeros() - 1;
+ unsigned LastActiveBitInA = A.countr_zero();
+ unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
return LastActiveBitInA - 1 == FirstActiveBitInB;
}
@@ -14753,7 +14825,7 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {
if (!N11C)
return SDValue();
unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
- unsigned LSB = countTrailingZeros(~InvMask);
+ unsigned LSB = llvm::countr_zero(~InvMask);
unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
assert(Width <
static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
@@ -14785,9 +14857,8 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {
SDLoc dl(N);
if (NewFromMask[0] == 0)
- From1 = DAG.getNode(
- ISD::SRL, dl, VT, From1,
- DAG.getConstant(NewFromMask.countTrailingZeros(), dl, VT));
+ From1 = DAG.getNode(ISD::SRL, dl, VT, From1,
+ DAG.getConstant(NewFromMask.countr_zero(), dl, VT));
return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
DAG.getConstant(~NewToMask, dl, VT));
}
@@ -14801,7 +14872,7 @@ static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG) {
APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
- ToMask1.countLeadingZeros() < ToMask2.countLeadingZeros())
+ ToMask1.countl_zero() < ToMask2.countl_zero())
return SDValue();
EVT VT = N->getValueType(0);
@@ -15005,16 +15076,31 @@ static SDValue PerformVMOVhrCombine(SDNode *N,
// FullFP16: half values are passed in S-registers, and we don't
// need any of the bitcast and moves:
//
- // t2: f32,ch = CopyFromReg t0, Register:f32 %0
+ // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
// t5: i32 = bitcast t2
// t18: f16 = ARMISD::VMOVhr t5
+ // =>
+ // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
if (Op0->getOpcode() == ISD::BITCAST) {
SDValue Copy = Op0->getOperand(0);
if (Copy.getValueType() == MVT::f32 &&
Copy->getOpcode() == ISD::CopyFromReg) {
- SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1)};
+ bool HasGlue = Copy->getNumOperands() == 3;
+ SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
+ HasGlue ? Copy->getOperand(2) : SDValue()};
+ EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
SDValue NewCopy =
- DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N), N->getValueType(0), Ops);
+ DCI.DAG.getNode(ISD::CopyFromReg, SDLoc(N),
+ DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
+ ArrayRef(Ops, HasGlue ? 3 : 2));
+
+ // Update Users, Chains, and Potential Glue.
+ DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
+ DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
+ if (HasGlue)
+ DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
+ NewCopy.getValue(2));
+
return NewCopy;
}
}
@@ -15468,51 +15554,6 @@ static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-// When lowering complex nodes that we recognize, like VQDMULH and MULH, we
-// can end up with shuffle(binop(shuffle, shuffle)), that can be simplified to
-// binop as the shuffles cancel out.
-static SDValue FlattenVectorShuffle(ShuffleVectorSDNode *N, SelectionDAG &DAG) {
- EVT VT = N->getValueType(0);
- if (!N->getOperand(1).isUndef() || N->getOperand(0).getValueType() != VT)
- return SDValue();
- SDValue Op = N->getOperand(0);
-
- // Looking for binary operators that will have been folded from
- // truncates/extends.
- switch (Op.getOpcode()) {
- case ARMISD::VQDMULH:
- case ISD::MULHS:
- case ISD::MULHU:
- case ISD::ABDS:
- case ISD::ABDU:
- case ISD::AVGFLOORS:
- case ISD::AVGFLOORU:
- case ISD::AVGCEILS:
- case ISD::AVGCEILU:
- break;
- default:
- return SDValue();
- }
-
- ShuffleVectorSDNode *Op0 = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(0));
- ShuffleVectorSDNode *Op1 = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(1));
- if (!Op0 || !Op1 || !Op0->getOperand(1).isUndef() ||
- !Op1->getOperand(1).isUndef() || Op0->getMask() != Op1->getMask() ||
- Op0->getOperand(0).getValueType() != VT)
- return SDValue();
-
- // Check the mask turns into an identity shuffle.
- ArrayRef<int> NMask = N->getMask();
- ArrayRef<int> OpMask = Op0->getMask();
- for (int i = 0, e = NMask.size(); i != e; i++) {
- if (NMask[i] > 0 && OpMask[NMask[i]] > 0 && OpMask[NMask[i]] != i)
- return SDValue();
- }
-
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- Op0->getOperand(0), Op1->getOperand(0));
-}
-
static SDValue
PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SDValue Vec = N->getOperand(0);
@@ -15581,8 +15622,6 @@ static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N,
/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
/// ISD::VECTOR_SHUFFLE.
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
- if (SDValue R = FlattenVectorShuffle(cast<ShuffleVectorSDNode>(N), DAG))
- return R;
if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))
return R;
@@ -16855,6 +16894,46 @@ static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
}
+static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (!N->getFlags().hasAllowReassociation())
+ return SDValue();
+
+ // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
+ auto ReassocComplex = [&](SDValue A, SDValue B) {
+ if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
+ return SDValue();
+ unsigned Opc = A.getConstantOperandVal(0);
+ if (Opc != Intrinsic::arm_mve_vcmlaq)
+ return SDValue();
+ SDValue VCMLA = DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),
+ DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
+ A.getOperand(3), A.getOperand(4));
+ VCMLA->setFlags(A->getFlags());
+ return VCMLA;
+ };
+ if (SDValue R = ReassocComplex(LHS, RHS))
+ return R;
+ if (SDValue R = ReassocComplex(RHS, LHS))
+ return R;
+
+ return SDValue();
+}
+
+static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))
+ return S;
+ if (SDValue S = PerformFADDVCMLACombine(N, DAG))
+ return S;
+ return SDValue();
+}
+
/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
/// can replace combinations of VCVT (integer to floating-point) and VDIV
/// when the VDIV has a constant operand that is a power of 2.
@@ -17171,6 +17250,42 @@ static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
+// the lanes are used. Due to the reduction being commutative the shuffle can be
+// removed.
+static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG) {
+ unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
+ auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
+ if (!Shuf || !Shuf->getOperand(1).isUndef())
+ return SDValue();
+
+ // Check all elements are used once in the mask.
+ ArrayRef<int> Mask = Shuf->getMask();
+ APInt SetElts(Mask.size(), 0);
+ for (int E : Mask) {
+ if (E < 0 || E >= (int)Mask.size())
+ return SDValue();
+ SetElts.setBit(E);
+ }
+ if (!SetElts.isAllOnes())
+ return SDValue();
+
+ if (N->getNumOperands() != VecOp + 1) {
+ auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
+ if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
+ return SDValue();
+ }
+
+ SmallVector<SDValue> Ops;
+ for (SDValue Op : N->ops()) {
+ if (Op.getValueType().isVector())
+ Ops.push_back(Op.getOperand(0));
+ else
+ Ops.push_back(Op);
+ }
+ return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
+}
+
static SDValue PerformVMOVNCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
SDValue Op0 = N->getOperand(0);
@@ -17227,6 +17342,27 @@ static SDValue PerformVQMOVNCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformVQDMULHCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
+ auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
+ // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
+ if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
+ LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
+ (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
+ SDLoc DL(N);
+ SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
+ LHS.getOperand(0), RHS.getOperand(0));
+ SDValue UndefV = LHS.getOperand(1);
+ return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
+ }
+ return SDValue();
+}
+
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
SDValue Op0 = N->getOperand(0);
@@ -17511,7 +17647,7 @@ static SDValue PerformShiftCombine(SDNode *N,
if (AndMask == 255 || AndMask == 65535)
return SDValue();
if (isMask_32(AndMask)) {
- uint32_t MaskedBits = countLeadingZeros(AndMask);
+ uint32_t MaskedBits = llvm::countl_zero(AndMask);
if (MaskedBits > ShiftAmt) {
SDLoc DL(N);
SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
@@ -17726,10 +17862,10 @@ static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
if (MinC == ~MaxC)
return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
- DAG.getConstant(MinC.countTrailingOnes(), DL, VT));
+ DAG.getConstant(MinC.countr_one(), DL, VT));
if (MaxC == 0)
return DAG.getNode(ARMISD::USAT, DL, VT, Input,
- DAG.getConstant(MinC.countTrailingOnes(), DL, VT));
+ DAG.getConstant(MinC.countr_one(), DL, VT));
return SDValue();
}
@@ -17905,7 +18041,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
// Now, is it profitable to continue?
APInt OrCI = OrC->getAPIntValue();
unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
- if (OrCI.countPopulation() > Heuristic)
+ if (OrCI.popcount() > Heuristic)
return SDValue();
// Lastly, can we determine that the bits defined by OrCI
@@ -18131,19 +18267,13 @@ ARMTargetLowering::PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const {
// -> (brcond Chain BB CC CPSR Cmp)
if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
- LHS->getOperand(0)->hasOneUse()) {
- auto *LHS00C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(0));
- auto *LHS01C = dyn_cast<ConstantSDNode>(LHS->getOperand(0)->getOperand(1));
- auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
- auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
- if ((LHS00C && LHS00C->getZExtValue() == 0) &&
- (LHS01C && LHS01C->getZExtValue() == 1) &&
- (LHS1C && LHS1C->getZExtValue() == 1) &&
- (RHSC && RHSC->getZExtValue() == 0)) {
- return DAG.getNode(
- ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
- LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
- }
+ LHS->getOperand(0)->hasOneUse() &&
+ isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
+ isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
+ isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
+ return DAG.getNode(
+ ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
+ LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
}
return SDValue();
@@ -18204,17 +18334,12 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
// (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
// -> (cmov F T CC CPSR Cmp)
- if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse()) {
- auto *LHS0C = dyn_cast<ConstantSDNode>(LHS->getOperand(0));
- auto *LHS1C = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
- auto *RHSC = dyn_cast<ConstantSDNode>(RHS);
- if ((LHS0C && LHS0C->getZExtValue() == 0) &&
- (LHS1C && LHS1C->getZExtValue() == 1) &&
- (RHSC && RHSC->getZExtValue() == 0)) {
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
- LHS->getOperand(2), LHS->getOperand(3),
- LHS->getOperand(4));
- }
+ if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
+ isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
+ isNullConstant(RHS)) {
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
+ LHS->getOperand(2), LHS->getOperand(3),
+ LHS->getOperand(4));
}
if (!VT.isInteger())
@@ -18249,23 +18374,23 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
DAG.getConstant(5, dl, MVT::i32));
} else {
// CMOV 0, 1, ==, (CMPZ x, y) ->
- // (ADDCARRY (SUB x, y), t:0, t:1)
- // where t = (SUBCARRY 0, (SUB x, y), 0)
+ // (UADDO_CARRY (SUB x, y), t:0, t:1)
+ // where t = (USUBO_CARRY 0, (SUB x, y), 0)
//
- // The SUBCARRY computes 0 - (x - y) and this will give a borrow when
+ // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
// x != y. In other words, a carry C == 1 when x == y, C == 0
// otherwise.
- // The final ADDCARRY computes
+ // The final UADDO_CARRY computes
// x - y + (0 - (x - y)) + C == C
SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
SDVTList VTs = DAG.getVTList(VT, MVT::i32);
SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
- // ISD::SUBCARRY returns a borrow but we want the carry here
+ // ISD::USUBO_CARRY returns a borrow but we want the carry here
// actually.
SDValue Carry =
DAG.getNode(ISD::SUB, dl, MVT::i32,
DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
- Res = DAG.getNode(ISD::ADDCARRY, dl, VTs, Sub, Neg, Carry);
+ Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);
}
} else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
(!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
@@ -18300,14 +18425,14 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
// (z == 2 ^ K).
// CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
// t1 = (USUBO (SUB x, y), 1)
- // t2 = (SUBCARRY (SUB x, y), t1:0, t1:1)
+ // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)
// Result = if K != 0 then (SHL t2:0, K) else t2:0
//
// This also handles the special case of comparing against zero; it's
// essentially, the same pattern, except there's no SUBS:
// CMOV x, z, !=, (CMPZ x, 0) ->
// t1 = (USUBO x, 1)
- // t2 = (SUBCARRY x, t1:0, t1:1)
+ // t2 = (USUBO_CARRY x, t1:0, t1:1)
// Result = if K != 0 then (SHL t2:0, K) else t2:0
const APInt *TrueConst;
if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
@@ -18320,7 +18445,8 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
if (ShiftAmount)
TrueVal = DAG.getConstant(1, dl, VT);
SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
- Res = DAG.getNode(ISD::SUBCARRY, dl, VTs, FalseVal, Subc, Subc.getValue(1));
+ Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,
+ Subc.getValue(1));
if (ShiftAmount)
Res = DAG.getNode(ISD::SHL, dl, VT, Res,
@@ -18696,7 +18822,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_TO_UINT:
return PerformVCVTCombine(N, DCI.DAG, Subtarget);
case ISD::FADD:
- return PerformFAddVSelectCombine(N, DCI.DAG, Subtarget);
+ return PerformFADDCombine(N, DCI.DAG, Subtarget);
case ISD::FDIV:
return PerformVDIVCombine(N, DCI.DAG, Subtarget);
case ISD::INTRINSIC_WO_CHAIN:
@@ -18750,11 +18876,26 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return PerformVCMPCombine(N, DCI.DAG, Subtarget);
case ISD::VECREDUCE_ADD:
return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
+ case ARMISD::VADDVs:
+ case ARMISD::VADDVu:
+ case ARMISD::VADDLVs:
+ case ARMISD::VADDLVu:
+ case ARMISD::VADDLVAs:
+ case ARMISD::VADDLVAu:
+ case ARMISD::VMLAVs:
+ case ARMISD::VMLAVu:
+ case ARMISD::VMLALVs:
+ case ARMISD::VMLALVu:
+ case ARMISD::VMLALVAs:
+ case ARMISD::VMLALVAu:
+ return PerformReduceShuffleCombine(N, DCI.DAG);
case ARMISD::VMOVN:
return PerformVMOVNCombine(N, DCI);
case ARMISD::VQMOVNs:
case ARMISD::VQMOVNu:
return PerformVQMOVNCombine(N, DCI);
+ case ARMISD::VQDMULH:
+ return PerformVQDMULHCombine(N, DCI);
case ARMISD::ASRL:
case ARMISD::LSRL:
case ARMISD::LSLL:
@@ -19868,7 +20009,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
return;
KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
- Known = KnownBits::commonBits(Known, KnownRHS);
+ Known = Known.intersectWith(KnownRHS);
return;
}
case ISD::INTRINSIC_W_CHAIN: {
@@ -19950,7 +20091,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownOp1 = KnownBits::mul(
KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
- Known = KnownBits::commonBits(KnownOp0, KnownOp1);
+ Known = KnownOp0.intersectWith(KnownOp1);
break;
}
}
@@ -20217,9 +20358,7 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
case 'w':
if (VT == MVT::Other)
break;
- if (VT == MVT::f16 || VT == MVT::bf16)
- return RCPair(0U, &ARM::HPRRegClass);
- if (VT == MVT::f32)
+ if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
return RCPair(0U, &ARM::DPRRegClass);
@@ -20229,7 +20368,7 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
case 'x':
if (VT == MVT::Other)
break;
- if (VT == MVT::f32)
+ if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
return RCPair(0U, &ARM::SPR_8RegClass);
if (VT.getSizeInBits() == 64)
return RCPair(0U, &ARM::DPR_8RegClass);
@@ -20239,9 +20378,7 @@ RCPair ARMTargetLowering::getRegForInlineAsmConstraint(
case 't':
if (VT == MVT::Other)
break;
- if (VT == MVT::f16 || VT == MVT::bf16)
- return RCPair(0U, &ARM::HPRRegClass);
- if (VT == MVT::f32 || VT == MVT::i32)
+ if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
return RCPair(0U, &ARM::SPRRegClass);
if (VT.getSizeInBits() == 64)
return RCPair(0U, &ARM::DPR_VFP2RegClass);
@@ -20635,12 +20772,12 @@ ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
DAG.getConstant(2, DL, MVT::i32));
- SDValue Flag;
- Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag);
- Flag = Chain.getValue(1);
+ SDValue Glue;
+ Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);
+ Glue = Chain.getValue(1);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag);
+ Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);
SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
Chain = NewSP.getValue(1);
@@ -21923,14 +22060,12 @@ bool ARMTargetLowering::isComplexDeinterleavingOperationSupported(
}
Value *ARMTargetLowering::createComplexDeinterleavingIR(
- Instruction *I, ComplexDeinterleavingOperation OperationType,
+ IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
Value *Accumulator) const {
FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
- IRBuilder<> B(I);
-
unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");
@@ -21955,9 +22090,9 @@ Value *ARMTargetLowering::createComplexDeinterleavingIR(
}
auto *LowerSplitInt = createComplexDeinterleavingIR(
- I, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
+ B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
auto *UpperSplitInt = createComplexDeinterleavingIR(
- I, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
+ B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 06da9977f892..2dd54602ef61 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -20,6 +20,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -29,7 +30,6 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/CodeGen.h"
-#include "llvm/Support/MachineValueType.h"
#include <optional>
#include <utility>
@@ -74,9 +74,9 @@ class VectorType;
BRCOND, // Conditional branch.
BR_JT, // Jumptable branch.
BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
- RET_FLAG, // Return with a flag operand.
- SERET_FLAG, // CMSE Entry function return with a flag operand.
- INTRET_FLAG, // Interrupt return with an LR-offset and a flag operand.
+ RET_GLUE, // Return with a flag operand.
+ SERET_GLUE, // CMSE Entry function return with a flag operand.
+ INTRET_GLUE, // Interrupt return with an LR-offset and a flag operand.
PIC_ADD, // Add with a PC operand and a PIC label.
@@ -102,8 +102,8 @@ class VectorType;
BCC_i64,
- SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
- SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
+ SRL_GLUE, // V,Flag = srl_flag X -> srl X, 1 + save carry out.
+ SRA_GLUE, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
ADDC, // Add with carry
@@ -244,8 +244,7 @@ class VectorType;
VADDLVAps, // Same as VADDLVp[su] but with a v4i1 predicate mask
VADDLVApu,
VMLAVs, // sign- or zero-extend the elements of two vectors to i32, multiply
- // them
- VMLAVu, // and add the results together, returning an i32 of their sum
+ VMLAVu, // them and add the results together, returning an i32 of their sum
VMLAVps, // Same as VMLAV[su] with a v4i1 predicate mask
VMLAVpu,
VMLALVs, // Same as VMLAV but with i64, returning the low and
@@ -618,6 +617,10 @@ class VectorType;
return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
}
+ bool shouldReassociateReduction(unsigned Opc, EVT VT) const override {
+ return Opc != ISD::VECREDUCE_ADD;
+ }
+
/// Returns true if an argument of type Ty needs to be passed in a
/// contiguous block of registers in calling convention CallConv.
bool functionArgumentNeedsConsecutiveRegisters(
@@ -735,6 +738,9 @@ class VectorType;
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
+ bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
+ EVT VT) const override;
+
bool preferIncOfAddToSubOfNot(EVT VT) const override;
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
@@ -744,7 +750,7 @@ class VectorType;
ComplexDeinterleavingOperation Operation, Type *Ty) const override;
Value *createComplexDeinterleavingIR(
- Instruction *I, ComplexDeinterleavingOperation OperationType,
+ IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
Value *Accumulator = nullptr) const override;
@@ -762,9 +768,6 @@ class VectorType;
const InstrItineraryData *Itins;
- /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
- unsigned ARMPCLabelIndex;
-
// TODO: remove this, and have shouldInsertFencesForAtomic do the proper
// check.
bool InsertFencesForAtomic;
@@ -818,7 +821,6 @@ class VectorType;
TLSModel::Model model) const;
SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddressWindows(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
@@ -874,7 +876,7 @@ class VectorType;
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
@@ -968,8 +970,6 @@ class VectorType;
void EmitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *MBB) const;
- bool RemapAddSubWithFlags(MachineInstr &MI, MachineBasicBlock *BB) const;
-
MachineBasicBlock *EmitStructByval(MachineInstr &MI,
MachineBasicBlock *MBB) const;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index f5415c5b5895..471b706cc408 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -152,11 +152,11 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
+def ARMretglue : SDNode<"ARMISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def ARMseretflag : SDNode<"ARMISD::SERET_FLAG", SDTNone,
+def ARMseretglue : SDNode<"ARMISD::SERET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall,
+def ARMintretglue : SDNode<"ARMISD::INTRET_GLUE", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
[SDNPInGlue]>;
@@ -192,8 +192,8 @@ def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>;
def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>;
def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>;
-def ARMsrl_flag : SDNode<"ARMISD::SRL_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
-def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMsrl_glue : SDNode<"ARMISD::SRL_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
+def ARMsra_glue : SDNode<"ARMISD::SRA_GLUE", SDTIntUnaryOp, [SDNPOutGlue]>;
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInGlue ]>;
def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags,
@@ -460,6 +460,11 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
return hasNoVMLxHazardUse(N);
}]>;
+// An 'fadd' node which can be contracted into a fma
+def fadd_contract : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
+ return N->getFlags().hasAllowContract();
+}]>;
+
def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
@@ -960,6 +965,19 @@ def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
let ParserMatchClass = Imm0_255AsmOperand;
}
+// imm0_255_expr - For Thumb1 movs/adds - 8-bit immediate that can also reference
+// a relocatable expression.
+def Imm0_255ExprAsmOperand: AsmOperandClass {
+ let Name = "Imm0_255Expr";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticString = "operand must be an immediate in the range [0,255] or a relocatable expression";
+}
+
+def imm0_255_expr : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
+ let EncoderMethod = "getHiLoImmOpValue";
+ let ParserMatchClass = Imm0_255ExprAsmOperand;
+}
+
/// imm0_65535 - An immediate is in the range [0,65535].
def Imm0_65535AsmOperand: ImmAsmOperand<0,65535> { let Name = "Imm0_65535"; }
def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
@@ -985,8 +1003,10 @@ def Imm0_65535ExprAsmOperand: AsmOperandClass {
let DiagnosticString = "operand must be an immediate in the range [0,0xffff] or a relocatable expression";
}
-def imm0_65535_expr : Operand<i32> {
- let EncoderMethod = "getHiLo16ImmOpValue";
+def imm0_65535_expr : Operand<i32>, ImmLeaf<i32, [{
+ return Imm >= 0 && Imm < 65536;
+}]> {
+ let EncoderMethod = "getHiLoImmOpValue";
let ParserMatchClass = Imm0_65535ExprAsmOperand;
}
@@ -2450,14 +2470,14 @@ def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
// ARMV4T and above
def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
- "bx", "\tlr", [(ARMretflag)]>,
+ "bx", "\tlr", [(ARMretglue)]>,
Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> {
let Inst{27-0} = 0b0001001011111111111100011110;
}
// ARMV4 only
def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br,
- "mov", "\tpc, lr", [(ARMretflag)]>,
+ "mov", "\tpc, lr", [(ARMretglue)]>,
Requires<[IsARM, NoV4T]>, Sched<[WriteBr]> {
let Inst{27-0} = 0b0001101000001111000000001110;
}
@@ -2466,7 +2486,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
// the user-space one).
def SUBS_PC_LR : ARMPseudoInst<(outs), (ins i32imm:$offset, pred:$p),
4, IIC_Br,
- [(ARMintretflag imm:$offset)]>;
+ [(ARMintretglue imm:$offset)]>;
}
// Indirect branches
@@ -3709,14 +3729,14 @@ def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi,
Requires<[IsARM]>, Sched<[WriteALU]>;
// These aren't really mov instructions, but we have to define them this way
-// due to flag operands.
+// due to glue operands.
let Defs = [CPSR] in {
-def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP,
+def MOVsrl_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsrl_glue GPR:$src))]>, UnaryDP,
Sched<[WriteALU]>, Requires<[IsARM]>;
-def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP,
+def MOVsra_glue : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ [(set GPR:$dst, (ARMsra_glue GPR:$src))]>, UnaryDP,
Sched<[WriteALU]>, Requires<[IsARM]>;
}
@@ -5898,8 +5918,12 @@ let isCall = 1,
}
// Reading thread pointer from coprocessor register
+def : ARMPat<(ARMthread_pointer), (MRC 15, 0, 13, 0, 2)>,
+ Requires<[IsARM, IsReadTPTPIDRURW]>;
def : ARMPat<(ARMthread_pointer), (MRC 15, 0, 13, 0, 3)>,
- Requires<[IsARM, IsReadTPHard]>;
+ Requires<[IsARM, IsReadTPTPIDRURO]>;
+def : ARMPat<(ARMthread_pointer), (MRC 15, 0, 13, 0, 4)>,
+ Requires<[IsARM, IsReadTPTPIDRPRW]>;
//===----------------------------------------------------------------------===//
// SJLJ Exception handling intrinsics
diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 2cc470fdf474..fa25c571a9bd 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3192,19 +3192,18 @@ defm MVE_VQRSHL_by_vec : mve_shift_by_vec_multi<"vqrshl", 0b1, 0b1>;
defm MVE_VRSHL_by_vec : mve_shift_by_vec_multi<"vrshl", 0b0, 0b1>;
let Predicates = [HasMVEInt] in {
- def : Pat<(v4i32 (ARMvshlu (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
- (v4i32 (MVE_VSHL_by_vecu32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
- def : Pat<(v8i16 (ARMvshlu (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
- (v8i16 (MVE_VSHL_by_vecu16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
- def : Pat<(v16i8 (ARMvshlu (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
- (v16i8 (MVE_VSHL_by_vecu8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
-
- def : Pat<(v4i32 (ARMvshls (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn))),
- (v4i32 (MVE_VSHL_by_vecs32 (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)))>;
- def : Pat<(v8i16 (ARMvshls (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn))),
- (v8i16 (MVE_VSHL_by_vecs16 (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)))>;
- def : Pat<(v16i8 (ARMvshls (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn))),
- (v16i8 (MVE_VSHL_by_vecs8 (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)))>;
+ defm : MVE_TwoOpPattern<MVE_v16i8, ARMvshlu, int_arm_mve_vshl_vector_predicated,
+ (? (i32 0), (i32 0), (i32 1)), MVE_VSHL_by_vecu8, null_frag>;
+ defm : MVE_TwoOpPattern<MVE_v8i16, ARMvshlu, int_arm_mve_vshl_vector_predicated,
+ (? (i32 0), (i32 0), (i32 1)), MVE_VSHL_by_vecu16, null_frag>;
+ defm : MVE_TwoOpPattern<MVE_v4i32, ARMvshlu, int_arm_mve_vshl_vector_predicated,
+ (? (i32 0), (i32 0), (i32 1)), MVE_VSHL_by_vecu32, null_frag>;
+ defm : MVE_TwoOpPattern<MVE_v16i8, ARMvshls, int_arm_mve_vshl_vector_predicated,
+ (? (i32 0), (i32 0), (i32 0)), MVE_VSHL_by_vecs8, null_frag>;
+ defm : MVE_TwoOpPattern<MVE_v8i16, ARMvshls, int_arm_mve_vshl_vector_predicated,
+ (? (i32 0), (i32 0), (i32 0)), MVE_VSHL_by_vecs16, null_frag>;
+ defm : MVE_TwoOpPattern<MVE_v4i32, ARMvshls, int_arm_mve_vshl_vector_predicated,
+ (? (i32 0), (i32 0), (i32 0)), MVE_VSHL_by_vecs32, null_frag>;
}
class MVE_shift_with_imm<string iname, string suffix, dag oops, dag iops,
@@ -3683,6 +3682,13 @@ multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
imm:$rot))>;
+ def: Pat<(VTI.Vec (fadd_contract MQPR:$Qd_src,
+ (int_arm_mve_vcmulq imm:$rot,
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm)))),
+ (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src),
+ (VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
+ imm:$rot))>;
+
def : Pat<(VTI.Vec (int_arm_mve_vcmlaq_predicated
imm:$rot, (VTI.Vec MQPR:$Qd_src),
(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm),
@@ -3690,7 +3696,6 @@ multiclass MVE_VCMLA_m<MVEVectorVTInfo VTI> {
(VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qn),
(VTI.Vec MQPR:$Qm), imm:$rot,
ARMVCCThen, (VTI.Pred VCCR:$mask), zero_reg))>;
-
}
}
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 4c8fe4493f9a..32c6843026dd 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -7992,28 +7992,25 @@ multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, strin
(!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (!cast<Instruction>("VREV16d8")
- (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ (VREV16d8 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
dsub_0)),
- dsub_0)>,
+ dsub_0)>,
Requires<[HasNEON]>;
def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty)
- (!cast<Instruction>("VREV16d8")
- (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ (VREV16d8 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
dsub_0)),
- dsub_0)>,
+ dsub_0)>,
Requires<[HasNEON]>;
def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy)
(!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)),
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty)
(EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty)
- (!cast<Instruction>("VREV16d8")
- (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
+ (VREV16d8 (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))),
dsub_0)),
- dsub_0)>,
+ dsub_0)>,
Requires<[HasNEON]>;
}
@@ -8066,17 +8063,17 @@ let Predicates = [HasNEON,IsLE] in {
let Predicates = [HasNEON,IsBE] in {
def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (!cast<Instruction>("VREV16d8")
+ (VREV16d8
(VLD1LNd16 addrmode6:$addr,
(f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)),
(VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16
- (!cast<Instruction>("VREV16d8")
+ (VREV16d8
(VLD1LNd16 addrmode6:$addr,
(f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)),
(VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16
- (!cast<Instruction>("VREV16d8")
+ (VREV16d8
(VLD1LNd16 addrmode6:$addr,
(f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>;
}
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index 8f7039a327b3..df6c129a1857 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -501,11 +501,11 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br,
- [(ARMretflag)], (tBX LR, pred:$p)>, Sched<[WriteBr]>;
+ [(ARMretglue)], (tBX LR, pred:$p)>, Sched<[WriteBr]>;
// alternative return for CMSE entry functions
def tBXNS_RET : tPseudoInst<(outs), (ins), 2, IIC_Br,
- [(ARMseretflag)]>, Sched<[WriteBr]>;
+ [(ARMseretglue)]>, Sched<[WriteBr]>;
// Alternative return instruction used by vararg functions.
def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p),
@@ -984,9 +984,9 @@ let isAdd = 1 in {
def tADDi8 : // A8.6.4 T2
T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn),
- (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
+ (ins tGPR:$Rn, imm0_255_expr:$imm8), IIC_iALUi,
"add", "\t$Rdn, $imm8",
- [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>,
+ [(set tGPR:$Rdn, (add tGPR:$Rn, imm0_255_expr:$imm8))]>,
Sched<[WriteALU]>;
// Add register
@@ -995,7 +995,8 @@ let isAdd = 1 in {
T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
IIC_iALUr,
"add", "\t$Rd, $Rn, $Rm",
- [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+ [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>,
+ Sched<[WriteALU]>;
/// Similar to the above except these set the 's' bit so the
/// instruction modifies the CPSR register.
@@ -1018,10 +1019,10 @@ let isAdd = 1 in {
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
- def tADDSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255:$imm8),
+ def tADDSi8 : tPseudoInst<(outs tGPR:$Rdn), (ins tGPR:$Rn, imm0_255_expr:$imm8),
2, IIC_iALUi,
[(set tGPR:$Rdn, CPSR, (ARMaddc tGPR:$Rn,
- imm8_255:$imm8))]>,
+ imm0_255_expr:$imm8))]>,
Requires<[IsThumb1Only]>,
Sched<[WriteALU]>;
@@ -1196,9 +1197,9 @@ def tLSRrr : // A8.6.91
// Move register
let isMoveImm = 1 in
-def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi,
+def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255_expr:$imm8), IIC_iMOVi,
"mov", "\t$Rd, $imm8",
- [(set tGPR:$Rd, imm0_255:$imm8)]>,
+ [(set tGPR:$Rd, imm0_255_expr:$imm8)]>,
T1General<{1,0,0,?,?}>, Sched<[WriteALU]> {
// A8.6.96
bits<3> Rd;
@@ -1208,8 +1209,8 @@ def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi,
}
// Because we have an explicit tMOVSr below, we need an alias to handle
// the immediate "movs" form here. Blech.
-def : tInstAlias <"movs $Rdn, $imm",
- (tMOVi8 tGPR:$Rdn, CPSR, imm0_255:$imm, 14, 0)>;
+def : tInstAlias <"movs $Rdn, $imm8",
+ (tMOVi8 tGPR:$Rdn, CPSR, imm0_255_expr:$imm8, 14, 0)>;
// A7-73: MOV(2) - mov setting flag.
@@ -1602,7 +1603,22 @@ def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src),
IIC_iLoad_i,
[(set tGPR:$dst,
(ARMWrapper tglobaladdr:$src))]>,
- Requires<[IsThumb, DontUseMovt]>;
+ Requires<[IsThumb, DontUseMovt, DontGenExecuteOnly]>;
+
+// 32-bit immediate using mov/add with the 4 :lower0_7: to :upper8_15:
+// relocations.
+// This is a single pseudo instruction to make it re-materializable.
+// FIXME: Remove this when we can do generalized remat.
+let Defs = [CPSR], isReMaterializable = 1, isMoveImm = 1, Size = 16, hasNoSchedulingInfo = 1 in
+def tMOVi32imm : PseudoInst<(outs rGPR:$dst), (ins i32imm:$src), NoItinerary,
+ [(set rGPR:$dst, (i32 imm:$src))]>,
+ Requires<[IsThumb1Only, GenExecuteOnly, DontUseMovt]>;
+
+def : ARMPat<(ARMWrapper tglobaladdr :$dst), (tMOVi32imm tglobaladdr :$dst)>,
+ Requires<[GenT1ExecuteOnly]>;
+def : ARMPat<(ARMWrapper texternalsym :$dst), (tMOVi32imm texternalsym :$dst)>,
+ Requires<[GenT1ExecuteOnly]>;
+
// TLS globals
def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 521cb0695384..610a71d68ec8 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -1022,9 +1022,9 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, SDNode opnode,
/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
/// for a binary operation that produces a value and use the carry
/// bit. It's not predicable.
-let Defs = [CPSR], Uses = [CPSR] in {
multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, SDNode opnode,
- bit Commutable = 0> {
+ bit Commutable = 0, bit PostISelHook = 0> {
+ let Defs = [CPSR], Uses = [CPSR], hasPostISelHook = PostISelHook in {
// shifted imm
def ri : T2sTwoRegImm<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
@@ -1058,7 +1058,26 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, SDNode opnode,
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
}
-}
+ }
+ // Shortened forms
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", ".w $Rdn, $ShiftedRm"),
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$ShiftedRm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", "$Rdn, $imm"),
+ (!cast<Instruction>(NAME#"ri") rGPR:$Rdn, rGPR:$Rdn, t2_so_imm:$imm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", "$Rdn, $Rm"),
+ (!cast<Instruction>(NAME#"rr") rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p,
+ cc_out:$s)>;
+ def : t2InstAlias<!strconcat(opc, "${s}${p}", "$Rdn, $ShiftedRm"),
+ (!cast<Instruction>(NAME#"rs") rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$ShiftedRm, pred:$p,
+ cc_out:$s)>;
}
/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
@@ -1577,6 +1596,46 @@ def t2LDR_PRE_imm : t2AsmPseudo<"ldr${p}.w $Rt, $addr!",
def t2LDR_POST_imm : t2AsmPseudo<"ldr${p}.w $Rt, $Rn, $imm",
(ins GPR:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$imm, pred:$p)>;
+// A7.7.46 LDRB (immediate) T3
+// .w suffixes; Constraints can't be used on t2InstAlias to describe
+// "$Rn = $Rn_wb" on POST or "$addr.base = $Rn_wb" on PRE.
+def t2LDRB_OFFSET_imm : t2AsmPseudo<"ldrb${p}.w $Rt, $addr",
+ (ins GPR:$Rt, t2addrmode_negimm8:$addr, pred:$p)>;
+def t2LDRB_PRE_imm : t2AsmPseudo<"ldrb${p}.w $Rt, $addr!",
+ (ins GPR:$Rt, t2addrmode_imm8_pre:$addr, pred:$p)>;
+def t2LDRB_POST_imm : t2AsmPseudo<"ldrb${p}.w $Rt, $Rn, $imm",
+ (ins GPR:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$imm, pred:$p)>;
+
+// A7.7.55 LDRH (immediate) T3
+// .w suffixes; Constraints can't be used on t2InstAlias to describe
+// "$Rn = $Rn_wb" on POST or "$addr.base = $Rn_wb" on PRE.
+def t2LDRH_OFFSET_imm : t2AsmPseudo<"ldrh${p}.w $Rt, $addr",
+ (ins GPR:$Rt, t2addrmode_negimm8:$addr, pred:$p)>;
+def t2LDRH_PRE_imm : t2AsmPseudo<"ldrh${p}.w $Rt, $addr!",
+ (ins GPR:$Rt, t2addrmode_imm8_pre:$addr, pred:$p)>;
+def t2LDRH_POST_imm : t2AsmPseudo<"ldrh${p}.w $Rt, $Rn, $imm",
+ (ins GPR:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$imm, pred:$p)>;
+
+// A7.7.59 LDRSB (immediate) T2
+// .w suffixes; Constraints can't be used on t2InstAlias to describe
+// "$Rn = $Rn_wb" on POST or "$addr.base = $Rn_wb" on PRE.
+def t2LDRSB_OFFSET_imm : t2AsmPseudo<"ldrsb${p}.w $Rt, $addr",
+ (ins GPR:$Rt, t2addrmode_negimm8:$addr, pred:$p)>;
+def t2LDRSB_PRE_imm : t2AsmPseudo<"ldrsb${p}.w $Rt, $addr!",
+ (ins GPR:$Rt, t2addrmode_imm8_pre:$addr, pred:$p)>;
+def t2LDRSB_POST_imm : t2AsmPseudo<"ldrsb${p}.w $Rt, $Rn, $imm",
+ (ins GPR:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$imm, pred:$p)>;
+
+// A7.7.63 LDRSH (immediate) T2
+// .w suffixes; Constraints can't be used on t2InstAlias to describe
+// "$Rn = $Rn_wb" on POST or "$addr.base = $Rn_wb" on PRE.
+def t2LDRSH_OFFSET_imm : t2AsmPseudo<"ldrsh${p}.w $Rt, $addr",
+ (ins GPR:$Rt, t2addrmode_negimm8:$addr, pred:$p)>;
+def t2LDRSH_PRE_imm : t2AsmPseudo<"ldrsh${p}.w $Rt, $addr!",
+ (ins GPR:$Rt, t2addrmode_imm8_pre:$addr, pred:$p)>;
+def t2LDRSH_POST_imm : t2AsmPseudo<"ldrsh${p}.w $Rt, $Rn, $imm",
+ (ins GPR:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$imm, pred:$p)>;
+
// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110).
// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4
class T2IldT<bit signed, bits<2> type, string opc, InstrItinClass ii>
@@ -1743,6 +1802,26 @@ def t2STR_PRE_imm : t2AsmPseudo<"str${p}.w $Rt, $addr!",
def t2STR_POST_imm : t2AsmPseudo<"str${p}.w $Rt, $Rn, $imm",
(ins GPR:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$imm, pred:$p)>;
+// A7.7.163 STRB (immediate) T3
+// .w suffixes; Constraints can't be used on t2InstAlias to describe
+// "$Rn = $Rn_wb" on POST or "$addr.base = $Rn_wb" on PRE.
+def t2STRB_OFFSET_imm : t2AsmPseudo<"strb${p}.w $Rt, $addr",
+ (ins GPR:$Rt, t2addrmode_negimm8:$addr, pred:$p)>;
+def t2STRB_PRE_imm : t2AsmPseudo<"strb${p}.w $Rt, $addr!",
+ (ins GPR:$Rt, t2addrmode_imm8_pre:$addr, pred:$p)>;
+def t2STRB_POST_imm : t2AsmPseudo<"strb${p}.w $Rt, $Rn, $imm",
+ (ins GPR:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$imm, pred:$p)>;
+
+// A7.7.170 STRH (immediate) T3
+// .w suffixes; Constraints can't be used on t2InstAlias to describe
+// "$Rn = $Rn_wb" on POST or "$addr.base = $Rn_wb" on PRE.
+def t2STRH_OFFSET_imm : t2AsmPseudo<"strh${p}.w $Rt, $addr",
+ (ins GPR:$Rt, t2addrmode_negimm8:$addr, pred:$p)>;
+def t2STRH_PRE_imm : t2AsmPseudo<"strh${p}.w $Rt, $addr!",
+ (ins GPR:$Rt, t2addrmode_imm8_pre:$addr, pred:$p)>;
+def t2STRH_POST_imm : t2AsmPseudo<"strh${p}.w $Rt, $Rn, $imm",
+ (ins GPR:$Rt, addr_offset_none:$Rn, t2am_imm8_offset:$imm, pred:$p)>;
+
// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly
// only.
// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4
@@ -2365,15 +2444,17 @@ def : T2Pat<(ARMsubs GPRnopc:$Rn, rGPR:$Rm), (t2SUBSrr $Rn, $Rm)>;
def : T2Pat<(ARMsubs GPRnopc:$Rn, t2_so_reg:$ShiftedRm),
(t2SUBSrs $Rn, t2_so_reg:$ShiftedRm)>;
-let hasPostISelHook = 1 in {
-defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1>;
-defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", ARMsube>;
-}
+defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", ARMadde, 1, 1>;
+defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", ARMsube, 0, 1>;
def : t2InstSubst<"adc${s}${p} $rd, $rn, $imm",
(t2SBCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"adc${s}${p} $rdn, $imm",
+ (t2SBCri rGPR:$rdn, rGPR:$rdn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
def : t2InstSubst<"sbc${s}${p} $rd, $rn, $imm",
(t2ADCri rGPR:$rd, rGPR:$rn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
+def : t2InstSubst<"sbc${s}${p} $rdn, $imm",
+ (t2ADCri rGPR:$rdn, rGPR:$rdn, t2_so_imm_not:$imm, pred:$p, s_cc_out:$s)>;
def : t2InstSubst<"add${s}${p}.w $rd, $rn, $imm",
(t2SUBri rGPR:$rd, GPRnopc:$rn, t2_so_imm_neg:$imm, pred:$p, s_cc_out:$s)>;
@@ -2726,10 +2807,10 @@ def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
}
let isCodeGenOnly = 1, Defs = [CPSR] in {
-def t2MOVsrl_flag : T2TwoRegShiftImm<
+def t2MOVsrl_glue : T2TwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"lsrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, (ARMsrl_flag rGPR:$Rm))]>,
+ [(set rGPR:$Rd, (ARMsrl_glue rGPR:$Rm))]>,
Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -2741,10 +2822,10 @@ def t2MOVsrl_flag : T2TwoRegShiftImm<
let Inst{14-12} = 0b000;
let Inst{7-6} = 0b01;
}
-def t2MOVsra_flag : T2TwoRegShiftImm<
+def t2MOVsra_glue : T2TwoRegShiftImm<
(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi,
"asrs", ".w\t$Rd, $Rm, #1",
- [(set rGPR:$Rd, (ARMsra_flag rGPR:$Rm))]>,
+ [(set rGPR:$Rd, (ARMsra_glue rGPR:$Rm))]>,
Sched<[WriteALU]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -4193,7 +4274,7 @@ def t2RFEIA : T2RFE<0b111010011001,
let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
def t2SUBS_PC_LR : T2I <(outs), (ins imm0_255:$imm), NoItinerary,
"subs", "\tpc, lr, $imm",
- [(ARMintretflag imm0_255:$imm)]>,
+ [(ARMintretglue imm0_255:$imm)]>,
Requires<[IsThumb2,IsNotMClass]> {
let Inst{31-8} = 0b111100111101111010001111;
@@ -4699,8 +4780,12 @@ def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
// Reading thread pointer from coprocessor register
+def : T2Pat<(ARMthread_pointer), (t2MRC 15, 0, 13, 0, 2)>,
+ Requires<[IsThumb2, IsReadTPTPIDRURW]>;
def : T2Pat<(ARMthread_pointer), (t2MRC 15, 0, 13, 0, 3)>,
- Requires<[IsThumb2, IsReadTPHard]>;
+ Requires<[IsThumb2, IsReadTPTPIDRURO]>;
+def : T2Pat<(ARMthread_pointer), (t2MRC 15, 0, 13, 0, 4)>,
+ Requires<[IsThumb2, IsReadTPTPIDRPRW]>;
//===----------------------------------------------------------------------===//
// ARMv8.1 Privilege Access Never extension
@@ -4847,6 +4932,11 @@ def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $Rm",
def : t2InstAlias<"adc${s}${p} $Rd, $Rn, $ShiftedRm",
(t2ADCrs rGPR:$Rd, rGPR:$Rn, t2_so_reg:$ShiftedRm,
pred:$p, cc_out:$s)>;
+def : t2InstAlias<"adc${s}${p} $Rdn, $Rm",
+ (t2ADCrr rGPR:$Rdn, rGPR:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>;
+def : t2InstAlias<"adc${s}${p} $Rdn, $ShiftedRm",
+ (t2ADCrs rGPR:$Rdn, rGPR:$Rdn, t2_so_reg:$ShiftedRm,
+ pred:$p, cc_out:$s)>;
// Aliases for SBC without the ".w" optional width specifier.
def : t2InstAlias<"sbc${s}${p} $Rd, $Rn, $Rm",
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index c1fecf3673a0..5d940cc29af8 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -1201,7 +1201,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010,
let D = VFPNeonDomain;
}
} // isMoveReg
-def : Pat<(arm_vmovsr GPR:$Rt), (VMOVSR GPR:$Rt)>, Requires<[HasVFP2, UseVMOVSR]>;
+def : Pat<(arm_vmovsr GPR:$Rt), (VMOVSR GPR:$Rt)>, Requires<[HasFPRegs, UseVMOVSR]>;
let hasSideEffects = 0 in {
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
diff --git a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
index a4ab2f86d046..f391058a7051 100644
--- a/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
+++ b/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -13,8 +13,8 @@
#include "ARMRegisterBankInfo.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/IntrinsicsARM.h"
diff --git a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
index 52b6b6f3bcf7..3ffde86ce1bb 100644
--- a/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -15,7 +15,7 @@
#include "ARMSubtarget.h"
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
-#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 0a38f5633ae3..93db983b92c0 100644
--- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -2172,10 +2172,10 @@ namespace {
unsigned &NewOpc, Register &EvenReg, Register &OddReg,
Register &BaseReg, int &Offset, Register &PredReg,
ARMCC::CondCodes &Pred, bool &isT2);
- bool RescheduleOps(MachineBasicBlock *MBB,
- SmallVectorImpl<MachineInstr *> &Ops,
- unsigned Base, bool isLd,
- DenseMap<MachineInstr*, unsigned> &MI2LocMap);
+ bool RescheduleOps(
+ MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &Ops,
+ unsigned Base, bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
+ SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> &RegisterMap);
bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
bool DistributeIncrements();
bool DistributeIncrements(Register Base);
@@ -2289,10 +2289,7 @@ bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
return false;
Align Alignment = (*Op0->memoperands_begin())->getAlign();
- const Function &Func = MF->getFunction();
- Align ReqAlign =
- STI->hasV6Ops() ? TD->getABITypeAlign(Type::getInt64Ty(Func.getContext()))
- : Align(8); // Pre-v6 need 8-byte align
+ Align ReqAlign = STI->getDualLoadStoreAlignment();
if (Alignment < ReqAlign)
return false;
@@ -2324,10 +2321,10 @@ bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
return true;
}
-bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
- SmallVectorImpl<MachineInstr *> &Ops,
- unsigned Base, bool isLd,
- DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
+bool ARMPreAllocLoadStoreOpt::RescheduleOps(
+ MachineBasicBlock *MBB, SmallVectorImpl<MachineInstr *> &Ops, unsigned Base,
+ bool isLd, DenseMap<MachineInstr *, unsigned> &MI2LocMap,
+ SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> &RegisterMap) {
bool RetVal = false;
// Sort by offset (in reverse order).
@@ -2476,6 +2473,12 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
} else {
for (unsigned i = 0; i != NumMove; ++i) {
MachineInstr *Op = Ops.pop_back_val();
+ if (isLd) {
+ // Populate RegisterMap with all Registers defined by loads.
+ Register Reg = Op->getOperand(0).getReg();
+ RegisterMap[Reg];
+ }
+
MBB->splice(InsertPos, MBB, Op);
}
}
@@ -2489,6 +2492,44 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
return RetVal;
}
+static void forEachDbgRegOperand(MachineInstr *MI,
+ std::function<void(MachineOperand &)> Fn) {
+ if (MI->isNonListDebugValue()) {
+ auto &Op = MI->getOperand(0);
+ if (Op.isReg())
+ Fn(Op);
+ } else {
+ for (unsigned I = 2; I < MI->getNumOperands(); I++) {
+ auto &Op = MI->getOperand(I);
+ if (Op.isReg())
+ Fn(Op);
+ }
+ }
+}
+
+// Update the RegisterMap with the instruction that was moved because a
+// DBG_VALUE_LIST may need to be moved again.
+static void updateRegisterMapForDbgValueListAfterMove(
+ SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> &RegisterMap,
+ MachineInstr *DbgValueListInstr, MachineInstr *InstrToReplace) {
+
+ forEachDbgRegOperand(DbgValueListInstr, [&](MachineOperand &Op) {
+ auto RegIt = RegisterMap.find(Op.getReg());
+ if (RegIt == RegisterMap.end())
+ return;
+ auto &InstrVec = RegIt->getSecond();
+ for (unsigned I = 0; I < InstrVec.size(); I++)
+ if (InstrVec[I] == InstrToReplace)
+ InstrVec[I] = DbgValueListInstr;
+ });
+}
+
+static DebugVariable createDebugVariableFromMachineInstr(MachineInstr *MI) {
+ auto DbgVar = DebugVariable(MI->getDebugVariable(), MI->getDebugExpression(),
+ MI->getDebugLoc()->getInlinedAt());
+ return DbgVar;
+}
+
bool
ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
bool RetVal = false;
@@ -2501,6 +2542,10 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
Base2InstMap Base2StsMap;
BaseVec LdBases;
BaseVec StBases;
+ // This map is used to track the relationship between the virtual
+ // register that is the result of a load that is moved and the DBG_VALUE
+ // MachineInstr pointer that uses that virtual register.
+ SmallDenseMap<Register, SmallVector<MachineInstr *>, 8> RegisterMap;
unsigned Loc = 0;
MachineBasicBlock::iterator MBBI = MBB->begin();
@@ -2563,7 +2608,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
unsigned Base = LdBases[i];
SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
if (Lds.size() > 1)
- RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+ RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap, RegisterMap);
}
// Re-schedule stores.
@@ -2571,7 +2616,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
unsigned Base = StBases[i];
SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
if (Sts.size() > 1)
- RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+ RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap, RegisterMap);
}
if (MBBI != E) {
@@ -2582,6 +2627,255 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
}
}
+ // Reschedule DBG_VALUEs to match any loads that were moved. When a load is
+ // sunk beyond a DBG_VALUE that is referring to it, the DBG_VALUE becomes a
+ // use-before-def, resulting in a loss of debug info.
+
+ // Example:
+ // Before the Pre Register Allocation Load Store Pass
+ // inst_a
+ // %2 = ld ...
+ // inst_b
+ // DBG_VALUE %2, "x", ...
+ // %3 = ld ...
+
+ // After the Pass:
+ // inst_a
+ // inst_b
+ // DBG_VALUE %2, "x", ...
+ // %2 = ld ...
+ // %3 = ld ...
+
+ // The code below addresses this by moving the DBG_VALUE to the position
+ // immediately after the load.
+
+ // Example:
+ // After the code below:
+ // inst_a
+ // inst_b
+ // %2 = ld ...
+ // DBG_VALUE %2, "x", ...
+ // %3 = ld ...
+
+ // The algorithm works in two phases: First RescheduleOps() populates the
+ // RegisterMap with registers that were moved as keys, there is no value
+ // inserted. In the next phase, every MachineInstr in a basic block is
+ // iterated over. If it is a valid DBG_VALUE or DBG_VALUE_LIST and it uses one
+ // or more registers in the RegisterMap, the RegisterMap and InstrMap are
+ // populated with the MachineInstr. If the DBG_VALUE or DBG_VALUE_LIST
+ // describes debug information for a variable that already exists in the
+ // DbgValueSinkCandidates, the MachineInstr in the DbgValueSinkCandidates must
+ // be set to undef. If the current MachineInstr is a load that was moved,
+ // undef the corresponding DBG_VALUE or DBG_VALUE_LIST and clone it to below
+ // the load.
+
+ // To illustrate the above algorithm visually let's take this example.
+
+ // Before the Pre Register Allocation Load Store Pass:
+ // %2 = ld ...
+ // DBG_VALUE %2, A, .... # X
+ // DBG_VALUE 0, A, ... # Y
+ // %3 = ld ...
+ // DBG_VALUE %3, A, ..., # Z
+ // %4 = ld ...
+
+ // After Pre Register Allocation Load Store Pass:
+ // DBG_VALUE %2, A, .... # X
+ // DBG_VALUE 0, A, ... # Y
+ // DBG_VALUE %3, A, ..., # Z
+ // %2 = ld ...
+ // %3 = ld ...
+ // %4 = ld ...
+
+ // The algorithm below does the following:
+
+ // In the beginning, the RegisterMap will have been populated with the virtual
+ // registers %2, and %3, the DbgValueSinkCandidates and the InstrMap will be
+ // empty. DbgValueSinkCandidates = {}, RegisterMap = {2 -> {}, 3 -> {}},
+ // InstrMap {}
+ // -> DBG_VALUE %2, A, .... # X
+ // DBG_VALUE 0, A, ... # Y
+ // DBG_VALUE %3, A, ..., # Z
+ // %2 = ld ...
+ // %3 = ld ...
+ // %4 = ld ...
+
+ // After the first DBG_VALUE (denoted with an X) is processed, the
+ // DbgValueSinkCandidates and InstrMap will be populated and the RegisterMap
+ // entry for %2 will be populated as well. DbgValueSinkCandidates = {A -> X},
+ // RegisterMap = {2 -> {X}, 3 -> {}}, InstrMap {X -> 2}
+ // DBG_VALUE %2, A, .... # X
+ // -> DBG_VALUE 0, A, ... # Y
+ // DBG_VALUE %3, A, ..., # Z
+ // %2 = ld ...
+ // %3 = ld ...
+ // %4 = ld ...
+
+ // After the DBG_VALUE Y is processed, the DbgValueSinkCandidates is updated
+ // to now hold Y for A and the RegisterMap is also updated to remove X from
+ // %2, this is because both X and Y describe the same debug variable A. X is
+ // also updated to have a $noreg as the first operand.
+ // DbgValueSinkCandidates = {A -> {Y}}, RegisterMap = {2 -> {}, 3 -> {}},
+ // InstrMap = {X-> 2}
+ // DBG_VALUE $noreg, A, .... # X
+ // DBG_VALUE 0, A, ... # Y
+ // -> DBG_VALUE %3, A, ..., # Z
+ // %2 = ld ...
+ // %3 = ld ...
+ // %4 = ld ...
+
+ // After DBG_VALUE Z is processed, the DbgValueSinkCandidates is updated to
+ // hold Z fr A, the RegisterMap is updated to hold Z for %3, and the InstrMap
+ // is updated to have Z mapped to %3. This is again because Z describes the
+ // debug variable A, Y is not updated to have $noreg as first operand because
+ // its first operand is an immediate, not a register.
+ // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
+ // InstrMap = {X -> 2, Z -> 3}
+ // DBG_VALUE $noreg, A, .... # X
+ // DBG_VALUE 0, A, ... # Y
+ // DBG_VALUE %3, A, ..., # Z
+ // -> %2 = ld ...
+ // %3 = ld ...
+ // %4 = ld ...
+
+ // Nothing happens here since the RegisterMap for %2 contains no value.
+ // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
+ // InstrMap = {X -> 2, Z -> 3}
+ // DBG_VALUE $noreg, A, .... # X
+ // DBG_VALUE 0, A, ... # Y
+ // DBG_VALUE %3, A, ..., # Z
+ // %2 = ld ...
+ // -> %3 = ld ...
+ // %4 = ld ...
+
+ // Since the RegisterMap contains Z as a value for %3, the MachineInstr
+ // pointer Z is copied to come after the load for %3 and the old Z's first
+ // operand is changed to $noreg the Basic Block iterator is moved to after the
+ // DBG_VALUE Z's new position.
+ // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
+ // InstrMap = {X -> 2, Z -> 3}
+ // DBG_VALUE $noreg, A, .... # X
+ // DBG_VALUE 0, A, ... # Y
+ // DBG_VALUE $noreg, A, ..., # Old Z
+ // %2 = ld ...
+ // %3 = ld ...
+ // DBG_VALUE %3, A, ..., # Z
+ // -> %4 = ld ...
+
+ // Nothing happens for %4 and the algorithm exits having processed the entire
+ // Basic Block.
+ // DbgValueSinkCandidates = {A -> {Z}}, RegisterMap = {2 -> {}, 3 -> {Z}},
+ // InstrMap = {X -> 2, Z -> 3}
+ // DBG_VALUE $noreg, A, .... # X
+ // DBG_VALUE 0, A, ... # Y
+ // DBG_VALUE $noreg, A, ..., # Old Z
+ // %2 = ld ...
+ // %3 = ld ...
+ // DBG_VALUE %3, A, ..., # Z
+ // %4 = ld ...
+
+ // This map is used to track the relationship between
+ // a Debug Variable and the DBG_VALUE MachineInstr pointer that describes the
+ // debug information for that Debug Variable.
+ SmallDenseMap<DebugVariable, MachineInstr *, 8> DbgValueSinkCandidates;
+ // This map is used to track the relationship between a DBG_VALUE or
+ // DBG_VALUE_LIST MachineInstr pointer and Registers that it uses.
+ SmallDenseMap<MachineInstr *, SmallVector<Register>, 8> InstrMap;
+ for (MBBI = MBB->begin(), E = MBB->end(); MBBI != E; ++MBBI) {
+ MachineInstr &MI = *MBBI;
+
+ auto PopulateRegisterAndInstrMapForDebugInstr = [&](Register Reg) {
+ auto RegIt = RegisterMap.find(Reg);
+ if (RegIt == RegisterMap.end())
+ return;
+ auto &InstrVec = RegIt->getSecond();
+ InstrVec.push_back(&MI);
+ InstrMap[&MI].push_back(Reg);
+ };
+
+ if (MI.isDebugValue()) {
+ assert(MI.getDebugVariable() &&
+ "DBG_VALUE or DBG_VALUE_LIST must contain a DILocalVariable");
+
+ auto DbgVar = createDebugVariableFromMachineInstr(&MI);
+ // If the first operand is a register and it exists in the RegisterMap, we
+ // know this is a DBG_VALUE that uses the result of a load that was moved,
+ // and is therefore a candidate to also be moved, add it to the
+ // RegisterMap and InstrMap.
+ forEachDbgRegOperand(&MI, [&](MachineOperand &Op) {
+ PopulateRegisterAndInstrMapForDebugInstr(Op.getReg());
+ });
+
+ // If the current DBG_VALUE describes the same variable as one of the
+ // in-flight DBG_VALUEs, remove the candidate from the list and set it to
+ // undef. Moving one DBG_VALUE past another would result in the variable's
+ // value going back in time when stepping through the block in the
+ // debugger.
+ auto InstrIt = DbgValueSinkCandidates.find(DbgVar);
+ if (InstrIt != DbgValueSinkCandidates.end()) {
+ auto *Instr = InstrIt->getSecond();
+ auto RegIt = InstrMap.find(Instr);
+ if (RegIt != InstrMap.end()) {
+ const auto &RegVec = RegIt->getSecond();
+ // For every Register in the RegVec, remove the MachineInstr in the
+ // RegisterMap that describes the DbgVar.
+ for (auto &Reg : RegVec) {
+ auto RegIt = RegisterMap.find(Reg);
+ if (RegIt == RegisterMap.end())
+ continue;
+ auto &InstrVec = RegIt->getSecond();
+ auto IsDbgVar = [&](MachineInstr *I) -> bool {
+ auto Var = createDebugVariableFromMachineInstr(I);
+ return Var == DbgVar;
+ };
+
+ InstrVec.erase(
+ std::remove_if(InstrVec.begin(), InstrVec.end(), IsDbgVar),
+ InstrVec.end());
+ }
+ forEachDbgRegOperand(Instr,
+ [&](MachineOperand &Op) { Op.setReg(0); });
+ }
+ }
+ DbgValueSinkCandidates[DbgVar] = &MI;
+ } else {
+ // If the first operand of a load matches with a DBG_VALUE in RegisterMap,
+ // then move that DBG_VALUE to below the load.
+ auto Opc = MI.getOpcode();
+ if (!isLoadSingle(Opc))
+ continue;
+ auto Reg = MI.getOperand(0).getReg();
+ auto RegIt = RegisterMap.find(Reg);
+ if (RegIt == RegisterMap.end())
+ continue;
+ auto &DbgInstrVec = RegIt->getSecond();
+ if (!DbgInstrVec.size())
+ continue;
+ for (auto *DbgInstr : DbgInstrVec) {
+ MachineBasicBlock::iterator InsertPos = std::next(MBBI);
+ auto *ClonedMI = MI.getMF()->CloneMachineInstr(DbgInstr);
+ MBB->insert(InsertPos, ClonedMI);
+ MBBI++;
+ // Erase the entry into the DbgValueSinkCandidates for the DBG_VALUE
+ // that was moved.
+ auto DbgVar = createDebugVariableFromMachineInstr(DbgInstr);
+ auto DbgIt = DbgValueSinkCandidates.find(DbgVar);
+ // If the instruction is a DBG_VALUE_LIST, it may have already been
+ // erased from the DbgValueSinkCandidates. Only erase if it exists in
+ // the DbgValueSinkCandidates.
+ if (DbgIt != DbgValueSinkCandidates.end())
+ DbgValueSinkCandidates.erase(DbgIt);
+ // Zero out original dbg instr
+ forEachDbgRegOperand(DbgInstr,
+ [&](MachineOperand &Op) { Op.setReg(0); });
+ // Update RegisterMap with ClonedMI because it might have to be moved
+ // again.
+ if (DbgInstr->isDebugValueList())
+ updateRegisterMapForDbgValueListAfterMove(RegisterMap, ClonedMI,
+ DbgInstr);
+ }
+ }
+ }
return RetVal;
}
diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
index bd07cb5c1742..247730c7b9ae 100644
--- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
+++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp
@@ -427,11 +427,6 @@ namespace {
// after tail predication conversion.
bool ValidateLiveOuts();
- // Is it safe to define LR with DLS/WLS?
- // LR can be defined if it is the operand to start, because it's the same
- // value, or if it's going to be equivalent to the operand to Start.
- MachineInstr *isSafeToDefineLR();
-
// Check the branch targets are within range and we satisfy our
// restrictions.
void Validate(ARMBasicBlockUtils *BBUtils);
@@ -907,7 +902,7 @@ static bool producesFalseLanesZero(MachineInstr &MI,
continue;
// Skip the lr predicate reg
int PIdx = llvm::findFirstVPTPredOperandIdx(MI);
- if (PIdx != -1 && (int)MI.getOperandNo(&MO) == PIdx + 2)
+ if (PIdx != -1 && (int)MO.getOperandNo() == PIdx + 2)
continue;
// Check that this instruction will produce zeros in its false lanes:
@@ -1249,7 +1244,7 @@ bool LowOverheadLoop::ValidateMVEInst(MachineInstr *MI) {
const MCInstrDesc &MCID = MI->getDesc();
bool IsUse = false;
unsigned LastOpIdx = MI->getNumOperands() - 1;
- for (auto &Op : enumerate(reverse(MCID.operands()))) {
+ for (const auto &Op : enumerate(reverse(MCID.operands()))) {
const MachineOperand &MO = MI->getOperand(LastOpIdx - Op.index());
if (!MO.isReg() || !MO.isUse() || MO.getReg() != ARM::VPR)
continue;
diff --git a/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index a6b68e55e54a..2c2853223ba5 100644
--- a/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -58,6 +58,22 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
Expr = ARMMCExpr::createUpper16(Expr, OutContext);
break;
+ case ARMII::MO_LO_0_7:
+ Expr = MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
+ Expr = ARMMCExpr::createLower0_7(Expr, OutContext);
+ break;
+ case ARMII::MO_LO_8_15:
+ Expr = MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
+ Expr = ARMMCExpr::createLower8_15(Expr, OutContext);
+ break;
+ case ARMII::MO_HI_0_7:
+ Expr = MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
+ Expr = ARMMCExpr::createUpper0_7(Expr, OutContext);
+ break;
+ case ARMII::MO_HI_8_15:
+ Expr = MCSymbolRefExpr::create(Symbol, SymbolVariant, OutContext);
+ Expr = ARMMCExpr::createUpper8_15(Expr, OutContext);
+ break;
}
if (!MO.isJTI() && MO.getOffset())
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index aba1afec3d48..f7531ce78cca 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -38,13 +38,6 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// 'isThumb'.
bool hasThumb2 = false;
- /// StByValParamsPadding - For parameter that is split between
- /// GPRs and memory; while recovering GPRs part, when
- /// StackAlignment > 4, and GPRs-part-size mod StackAlignment != 0,
- /// we need to insert gap before parameter start address. It allows to
- /// "attach" GPR-part to the part that was passed via stack.
- unsigned StByValParamsPadding = 0;
-
/// ArgsRegSaveSize - Size of the register save area for vararg functions or
/// those making guaranteed tail calls that need more stack argument space
/// than is provided by this functions incoming parameters.
@@ -173,9 +166,6 @@ public:
bool isCmseNSEntryFunction() const { return IsCmseNSEntry; }
bool isCmseNSCallFunction() const { return IsCmseNSCall; }
- unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; }
- void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; }
-
unsigned getArgRegsSaveSize() const { return ArgRegsSaveSize; }
void setArgRegsSaveSize(unsigned s) { ArgRegsSaveSize = s; }
diff --git a/llvm/lib/Target/ARM/ARMParallelDSP.cpp b/llvm/lib/Target/ARM/ARMParallelDSP.cpp
index d9b90af4fa99..1efda5d1c937 100644
--- a/llvm/lib/Target/ARM/ARMParallelDSP.cpp
+++ b/llvm/lib/Target/ARM/ARMParallelDSP.cpp
@@ -64,7 +64,6 @@ namespace {
Value* LHS;
Value* RHS;
bool Exchange = false;
- bool ReadOnly = true;
bool Paired = false;
SmallVector<LoadInst*, 2> VecLd; // Container for loads to widen.
@@ -152,10 +151,6 @@ namespace {
MulPairs.push_back(std::make_pair(Mul0, Mul1));
}
- /// Return true if enough mul operations are found that can be executed in
- /// parallel.
- bool CreateParallelPairs();
-
/// Return the add instruction which is the root of the reduction.
Instruction *getRoot() { return Root; }
@@ -765,12 +760,10 @@ LoadInst* ARMParallelDSP::CreateWideLoad(MemInstList &Loads,
IRBuilder<NoFolder> IRB(DomLoad->getParent(),
++BasicBlock::iterator(DomLoad));
- // Bitcast the pointer to a wider type and create the wide load, while making
- // sure to maintain the original alignment as this prevents ldrd from being
- // generated when it could be illegal due to memory alignment.
- const unsigned AddrSpace = DomLoad->getPointerAddressSpace();
- Value *VecPtr = IRB.CreateBitCast(Base->getPointerOperand(),
- LoadTy->getPointerTo(AddrSpace));
+ // Create the wide load, while making sure to maintain the original alignment
+ // as this prevents ldrd from being generated when it could be illegal due to
+ // memory alignment.
+ Value *VecPtr = Base->getPointerOperand();
LoadInst *WideLoad = IRB.CreateAlignedLoad(LoadTy, VecPtr, Base->getAlign());
// Make sure everything is in the correct order in the basic block.
diff --git a/llvm/lib/Target/ARM/ARMPredicates.td b/llvm/lib/Target/ARM/ARMPredicates.td
index 59562efea6b9..aca970d900a8 100644
--- a/llvm/lib/Target/ARM/ARMPredicates.td
+++ b/llvm/lib/Target/ARM/ARMPredicates.td
@@ -170,8 +170,10 @@ def IsNotMachO : Predicate<"!Subtarget->isTargetMachO()">;
def IsNaCl : Predicate<"Subtarget->isTargetNaCl()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
def IsNotWindows : Predicate<"!Subtarget->isTargetWindows()">;
-def IsReadTPHard : Predicate<"Subtarget->isReadTPHard()">;
-def IsReadTPSoft : Predicate<"!Subtarget->isReadTPHard()">;
+def IsReadTPTPIDRURW : Predicate<"Subtarget->isReadTPTPIDRURW()">;
+def IsReadTPTPIDRURO : Predicate<"Subtarget->isReadTPTPIDRURO()">;
+def IsReadTPTPIDRPRW : Predicate<"Subtarget->isReadTPTPIDRPRW()">;
+def IsReadTPSoft : Predicate<"Subtarget->isReadTPSoft()">;
def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">,
AssemblerPredicate<(all_of FeatureNaClTrap), "NaCl">;
def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">;
@@ -222,6 +224,10 @@ let RecomputePerFunction = 1 in {
}
def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
+def DontGenExecuteOnly : Predicate<"!Subtarget->genExecuteOnly()">;
+def GenT1ExecuteOnly : Predicate<"Subtarget->genExecuteOnly() && "
+ "Subtarget->isThumb1Only() && "
+ "!Subtarget->hasV8MBaselineOps()">;
// Armv8.5-A extensions
def HasSB : Predicate<"Subtarget->hasSB()">,
diff --git a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
index 527fefbd291e..f7977941e895 100644
--- a/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -162,7 +162,8 @@ ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI) {
"Subclass not added?");
assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPROdd_and_tcGPRRegClassID)) &&
"Subclass not added?");
- assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit");
+ assert(getMaximumSize(RBGPR.getID()) == 32 &&
+ "GPRs should hold up to 32-bit");
#ifndef NDEBUG
ARM::checkPartialMappings();
diff --git a/llvm/lib/Target/ARM/ARMSLSHardening.cpp b/llvm/lib/Target/ARM/ARMSLSHardening.cpp
index 3dd9428c7589..09357ae2e3a3 100644
--- a/llvm/lib/Target/ARM/ARMSLSHardening.cpp
+++ b/llvm/lib/Target/ARM/ARMSLSHardening.cpp
@@ -202,7 +202,8 @@ ArmInsertedThunks SLSBLRThunkInserter::insertThunks(MachineModuleInfo &MMI,
const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
for (auto T : SLSBLRThunks)
if (ST->isThumb() == T.isThumb)
- createThunkFunction(MMI, T.Name, ComdatThunks);
+ createThunkFunction(MMI, T.Name, ComdatThunks,
+ T.isThumb ? "+thumb-mode" : "");
return ST->isThumb() ? ThumbThunk : ArmThunk;
}
diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index 913724daf0ad..c57825949c1c 100644
--- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -65,9 +65,8 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall(
break;
case RTLIB::MEMSET:
AEABILibcall = AEABI_MEMSET;
- if (ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src))
- if (ConstantSrc->getZExtValue() == 0)
- AEABILibcall = AEABI_MEMCLR;
+ if (isNullConstant(Src))
+ AEABILibcall = AEABI_MEMCLR;
break;
default:
return SDValue();
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 79244f634ce3..1505e9214050 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -13,10 +13,10 @@
#include "ARM.h"
#include "ARMCallLowering.h"
-#include "ARMLegalizerInfo.h"
-#include "ARMRegisterBankInfo.h"
#include "ARMFrameLowering.h"
#include "ARMInstrInfo.h"
+#include "ARMLegalizerInfo.h"
+#include "ARMRegisterBankInfo.h"
#include "ARMSubtarget.h"
#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
@@ -24,7 +24,6 @@
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -35,9 +34,9 @@
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ARMTargetParser.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/ARMTargetParser.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
@@ -188,10 +187,12 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
// Assert this for now to make the change obvious.
assert(hasV6T2Ops() || !hasThumb2());
- // Execute only support requires movt support
if (genExecuteOnly()) {
- NoMovt = false;
- assert(hasV8MBaselineOps() && "Cannot generate execute-only code for this target");
+ // Execute only support for >= v8-M Baseline requires movt support
+ if (hasV8MBaselineOps())
+ NoMovt = false;
+ if (!hasV6MOps())
+ report_fatal_error("Cannot generate execute-only code for this target");
}
// Keep a pointer to static instruction cost data for the specified CPU.
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 98863e845d00..715b5bee6dc6 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -20,7 +20,6 @@
#include "ARMISelLowering.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMSelectionDAGInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
@@ -32,6 +31,7 @@
#include "llvm/MC/MCSchedule.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include <memory>
#include <string>
@@ -305,8 +305,6 @@ public:
bool GETTER() const { return ATTRIBUTE; }
#include "ARMGenSubtargetInfo.inc"
- void computeIssueWidth();
-
/// @{
/// These functions are obsolete, please consider adding subtarget features
/// or properties instead of calling them.
@@ -348,7 +346,7 @@ public:
bool useSjLjEH() const { return UseSjLjEH; }
bool hasBaseDSP() const {
if (isThumb())
- return hasDSP();
+ return hasThumb2() && hasDSP();
else
return hasV5TEOps();
}
@@ -391,7 +389,8 @@ public:
}
bool isTargetMuslAEABI() const {
return (TargetTriple.getEnvironment() == Triple::MuslEABI ||
- TargetTriple.getEnvironment() == Triple::MuslEABIHF) &&
+ TargetTriple.getEnvironment() == Triple::MuslEABIHF ||
+ TargetTriple.getEnvironment() == Triple::OpenHOS) &&
!isTargetDarwin() && !isTargetWindows();
}
@@ -403,6 +402,10 @@ public:
bool isTargetHardFloat() const;
+ bool isReadTPSoft() const {
+ return !(isReadTPTPIDRURW() || isReadTPTPIDRURO() || isReadTPTPIDRPRW());
+ }
+
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
bool isXRaySupported() const override;
@@ -495,6 +498,11 @@ public:
/// function for this subtarget.
Align getStackAlignment() const { return stackAlignment; }
+ // Returns the required alignment for LDRD/STRD instructions
+ Align getDualLoadStoreAlignment() const {
+ return Align(hasV7Ops() || allowsUnalignedMem() ? 4 : 8);
+ }
+
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getPartialUpdateClearance() const { return PartialUpdateClearance; }
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 775d098fbaed..39d8607818f7 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -20,7 +20,6 @@
#include "TargetInfo/ARMTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ExecutionDomainFix.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
@@ -41,13 +40,14 @@
#include "llvm/IR/Function.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
-#include "llvm/Support/ARMTargetParser.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/ARMTargetParser.h"
+#include "llvm/TargetParser/TargetParser.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/CFGuard.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Scalar.h"
@@ -242,7 +242,8 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
if ((TargetTriple.getEnvironment() == Triple::GNUEABI ||
TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
TargetTriple.getEnvironment() == Triple::MuslEABI ||
- TargetTriple.getEnvironment() == Triple::MuslEABIHF) &&
+ TargetTriple.getEnvironment() == Triple::MuslEABIHF ||
+ TargetTriple.getEnvironment() == Triple::OpenHOS) &&
!(TargetTriple.isOSWindows() || TargetTriple.isOSDarwin()))
this->Options.EABIVersion = EABI::GNU;
else
@@ -481,7 +482,7 @@ bool ARMPassConfig::addPreISel() {
}
if (TM->getOptLevel() != CodeGenOpt::None) {
- addPass(createHardwareLoopsPass());
+ addPass(createHardwareLoopsLegacyPass());
addPass(createMVETailPredicationPass());
// FIXME: IR passes can delete address-taken basic blocks, deleting
// corresponding blockaddresses. ARMConstantPoolConstant holds references to
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 048790afb496..c2d7d605fbc2 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -14,6 +14,7 @@
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DataLayout.h"
@@ -25,11 +26,10 @@
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Type.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -445,7 +445,7 @@ InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
return 0;
// We can convert <= -1 to < 0, which is generally quite cheap.
- if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnesValue()) {
+ if (Inst && Opcode == Instruction::ICmp && Idx == 1 && Imm.isAllOnes()) {
ICmpInst::Predicate Pred = cast<ICmpInst>(Inst)->getPredicate();
if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLE)
return std::min(getIntImmCost(Imm, Ty, CostKind),
@@ -1585,9 +1585,11 @@ InstructionCost ARMTTIImpl::getGatherScatterOpCost(
InstructionCost VectorCost =
NumElems * LT.first * ST->getMVEVectorCostFactor(CostKind);
// The scalarization cost should be a lot higher. We use the number of vector
- // elements plus the scalarization overhead.
+ // elements plus the scalarization overhead. If masking is required then a lot
+ // of little blocks will be needed and potentially a scalarized p0 mask,
+ // greatly increasing the cost.
InstructionCost ScalarCost =
- NumElems * LT.first +
+ NumElems * LT.first + (VariableMask ? NumElems * 5 : 0) +
BaseT::getScalarizationOverhead(VTy, /*Insert*/ true, /*Extract*/ false,
CostKind) +
BaseT::getScalarizationOverhead(VTy, /*Insert*/ false, /*Extract*/ true,
@@ -1689,7 +1691,7 @@ ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
InstructionCost ARMTTIImpl::getExtendedReductionCost(
unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,
- std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) {
+ FastMathFlags FMF, TTI::TargetCostKind CostKind) {
EVT ValVT = TLI->getValueType(DL, ValTy);
EVT ResVT = TLI->getValueType(DL, ResTy);
@@ -2238,10 +2240,7 @@ static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
return true;
}
-bool ARMTTIImpl::preferPredicateOverEpilogue(
- Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC,
- TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL,
- InterleavedAccessInfo *IAI) {
+bool ARMTTIImpl::preferPredicateOverEpilogue(TailFoldingInfo *TFI) {
if (!EnableTailPredication) {
LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n");
return false;
@@ -2253,6 +2252,9 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(
if (!ST->hasMVEIntegerOps())
return false;
+ LoopVectorizationLegality *LVL = TFI->LVL;
+ Loop *L = LVL->getLoop();
+
// For now, restrict this to single block loops.
if (L->getNumBlocks() > 1) {
LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: not a single block "
@@ -2262,6 +2264,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(
assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected");
+ LoopInfo *LI = LVL->getLoopInfo();
HardwareLoopInfo HWLoopInfo(L);
if (!HWLoopInfo.canAnalyze(*LI)) {
LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
@@ -2269,32 +2272,37 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(
return false;
}
+ AssumptionCache *AC = LVL->getAssumptionCache();
+ ScalarEvolution *SE = LVL->getScalarEvolution();
+
// This checks if we have the low-overhead branch architecture
// extension, and if we will create a hardware-loop:
- if (!isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
+ if (!isHardwareLoopProfitable(L, *SE, *AC, TFI->TLI, HWLoopInfo)) {
LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
"profitable.\n");
return false;
}
- if (!HWLoopInfo.isHardwareLoopCandidate(SE, *LI, *DT)) {
+ DominatorTree *DT = LVL->getDominatorTree();
+ if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT)) {
LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not "
"a candidate.\n");
return false;
}
- return canTailPredicateLoop(L, LI, SE, DL, LVL->getLAI());
+ return canTailPredicateLoop(L, LI, *SE, DL, LVL->getLAI());
}
-PredicationStyle ARMTTIImpl::emitGetActiveLaneMask() const {
+TailFoldingStyle
+ARMTTIImpl::getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const {
if (!ST->hasMVEIntegerOps() || !EnableTailPredication)
- return PredicationStyle::None;
+ return TailFoldingStyle::DataWithoutLaneMask;
// Intrinsic @llvm.get.active.lane.mask is supported.
// It is used in the MVETailPredication pass, which requires the number of
// elements processed by this vector loop to setup the tail-predicated
// loop.
- return PredicationStyle::Data;
+ return TailFoldingStyle::Data;
}
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
@@ -2441,3 +2449,16 @@ InstructionCost ARMTTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
}
return -1;
}
+
+bool ARMTTIImpl::hasArmWideBranch(bool Thumb) const {
+ if (Thumb) {
+ // B.W is available in any Thumb2-supporting target, and also in every
+ // version of Armv8-M, even Baseline which does not include the rest of
+ // Thumb2.
+ return ST->isThumb2() || ST->hasV8MBaselineOps();
+ } else {
+ // B is available in all versions of the Arm ISA, so the only question is
+ // whether that ISA is available at all.
+ return ST->hasARMOps();
+ }
+}
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 69b7a31d487c..588704d5b7e5 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -25,7 +25,7 @@
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Function.h"
-#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <optional>
namespace llvm {
@@ -178,7 +178,7 @@ public:
llvm_unreachable("Unsupported register kind");
}
- unsigned getMaxInterleaveFactor(unsigned VF) {
+ unsigned getMaxInterleaveFactor(ElementCount VF) {
return ST->getMaxInterleaveFactor();
}
@@ -210,6 +210,10 @@ public:
InstructionCost getMemcpyCost(const Instruction *I);
+ uint64_t getMaxMemIntrinsicInlineSizeThreshold() const {
+ return ST->getMaxInlineSizeThreshold();
+ }
+
int getNumMemOps(const IntrinsicInst *I) const;
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
@@ -280,7 +284,7 @@ public:
TTI::TargetCostKind CostKind);
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
Type *ResTy, VectorType *ValTy,
- std::optional<FastMathFlags> FMF,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind);
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy,
VectorType *ValTy,
@@ -303,16 +307,13 @@ public:
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo);
- bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
- AssumptionCache &AC, TargetLibraryInfo *TLI,
- DominatorTree *DT,
- LoopVectorizationLegality *LVL,
- InterleavedAccessInfo *IAI);
+ bool preferPredicateOverEpilogue(TailFoldingInfo *TFI);
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE);
- PredicationStyle emitGetActiveLaneMask() const;
+ TailFoldingStyle
+ getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
TTI::PeelingPreferences &PP);
@@ -325,6 +326,9 @@ public:
return true;
}
+
+ bool hasArmWideBranch(bool Thumb) const;
+
/// @}
};
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 207cf32590cb..ef4c70916eeb 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -24,7 +24,6 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -42,7 +41,6 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ARMBuildAttributes.h"
#include "llvm/Support/ARMEHABI.h"
@@ -52,8 +50,10 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/TargetParser.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -70,7 +70,12 @@
using namespace llvm;
namespace llvm {
-extern const MCInstrDesc ARMInsts[];
+struct ARMInstrTable {
+ MCInstrDesc Insts[4445];
+ MCOperandInfo OperandInfo[3026];
+ MCPhysReg ImplicitOps[130];
+};
+extern const ARMInstrTable ARMDescs;
} // end namespace llvm
namespace {
@@ -318,7 +323,7 @@ class ARMAsmParser : public MCTargetAsmParser {
bool inImplicitITBlock() { return inITBlock() && !ITState.IsExplicit; }
bool lastInITBlock() {
- return ITState.CurPosition == 4 - countTrailingZeros(ITState.Mask);
+ return ITState.CurPosition == 4 - (unsigned)llvm::countr_zero(ITState.Mask);
}
void forwardITPosition() {
@@ -326,7 +331,7 @@ class ARMAsmParser : public MCTargetAsmParser {
// Move to the next instruction in the IT block, if there is one. If not,
// mark the block as done, except for implicit IT blocks, which we leave
// open until we find an instruction that can't be added to it.
- unsigned TZ = countTrailingZeros(ITState.Mask);
+ unsigned TZ = llvm::countr_zero(ITState.Mask);
if (++ITState.CurPosition == 5 - TZ && ITState.IsExplicit)
ITState.CurPosition = ~0U; // Done with the IT block after this.
}
@@ -336,7 +341,7 @@ class ARMAsmParser : public MCTargetAsmParser {
assert(inImplicitITBlock());
assert(ITState.CurPosition > 1);
ITState.CurPosition--;
- unsigned TZ = countTrailingZeros(ITState.Mask);
+ unsigned TZ = llvm::countr_zero(ITState.Mask);
unsigned NewMask = 0;
NewMask |= ITState.Mask & (0xC << TZ);
NewMask |= 0x2 << TZ;
@@ -384,7 +389,7 @@ class ARMAsmParser : public MCTargetAsmParser {
assert(!isITBlockFull());
assert(Cond == ITState.Cond ||
Cond == ARMCC::getOppositeCondition(ITState.Cond));
- unsigned TZ = countTrailingZeros(ITState.Mask);
+ unsigned TZ = llvm::countr_zero(ITState.Mask);
unsigned NewMask = 0;
// Keep any existing condition bits.
NewMask |= ITState.Mask & (0xE << TZ);
@@ -423,7 +428,7 @@ class ARMAsmParser : public MCTargetAsmParser {
bool inVPTBlock() { return VPTState.CurPosition != ~0U; }
void forwardVPTPosition() {
if (!inVPTBlock()) return;
- unsigned TZ = countTrailingZeros(VPTState.Mask);
+ unsigned TZ = llvm::countr_zero(VPTState.Mask);
if (++VPTState.CurPosition == 5 - TZ)
VPTState.CurPosition = ~0U;
}
@@ -516,86 +521,86 @@ class ARMAsmParser : public MCTargetAsmParser {
bool isThumb() const {
// FIXME: Can tablegen auto-generate this?
- return getSTI().getFeatureBits()[ARM::ModeThumb];
+ return getSTI().hasFeature(ARM::ModeThumb);
}
bool isThumbOne() const {
- return isThumb() && !getSTI().getFeatureBits()[ARM::FeatureThumb2];
+ return isThumb() && !getSTI().hasFeature(ARM::FeatureThumb2);
}
bool isThumbTwo() const {
- return isThumb() && getSTI().getFeatureBits()[ARM::FeatureThumb2];
+ return isThumb() && getSTI().hasFeature(ARM::FeatureThumb2);
}
bool hasThumb() const {
- return getSTI().getFeatureBits()[ARM::HasV4TOps];
+ return getSTI().hasFeature(ARM::HasV4TOps);
}
bool hasThumb2() const {
- return getSTI().getFeatureBits()[ARM::FeatureThumb2];
+ return getSTI().hasFeature(ARM::FeatureThumb2);
}
bool hasV6Ops() const {
- return getSTI().getFeatureBits()[ARM::HasV6Ops];
+ return getSTI().hasFeature(ARM::HasV6Ops);
}
bool hasV6T2Ops() const {
- return getSTI().getFeatureBits()[ARM::HasV6T2Ops];
+ return getSTI().hasFeature(ARM::HasV6T2Ops);
}
bool hasV6MOps() const {
- return getSTI().getFeatureBits()[ARM::HasV6MOps];
+ return getSTI().hasFeature(ARM::HasV6MOps);
}
bool hasV7Ops() const {
- return getSTI().getFeatureBits()[ARM::HasV7Ops];
+ return getSTI().hasFeature(ARM::HasV7Ops);
}
bool hasV8Ops() const {
- return getSTI().getFeatureBits()[ARM::HasV8Ops];
+ return getSTI().hasFeature(ARM::HasV8Ops);
}
bool hasV8MBaseline() const {
- return getSTI().getFeatureBits()[ARM::HasV8MBaselineOps];
+ return getSTI().hasFeature(ARM::HasV8MBaselineOps);
}
bool hasV8MMainline() const {
- return getSTI().getFeatureBits()[ARM::HasV8MMainlineOps];
+ return getSTI().hasFeature(ARM::HasV8MMainlineOps);
}
bool hasV8_1MMainline() const {
- return getSTI().getFeatureBits()[ARM::HasV8_1MMainlineOps];
+ return getSTI().hasFeature(ARM::HasV8_1MMainlineOps);
}
bool hasMVE() const {
- return getSTI().getFeatureBits()[ARM::HasMVEIntegerOps];
+ return getSTI().hasFeature(ARM::HasMVEIntegerOps);
}
bool hasMVEFloat() const {
- return getSTI().getFeatureBits()[ARM::HasMVEFloatOps];
+ return getSTI().hasFeature(ARM::HasMVEFloatOps);
}
bool hasCDE() const {
- return getSTI().getFeatureBits()[ARM::HasCDEOps];
+ return getSTI().hasFeature(ARM::HasCDEOps);
}
bool has8MSecExt() const {
- return getSTI().getFeatureBits()[ARM::Feature8MSecExt];
+ return getSTI().hasFeature(ARM::Feature8MSecExt);
}
bool hasARM() const {
- return !getSTI().getFeatureBits()[ARM::FeatureNoARM];
+ return !getSTI().hasFeature(ARM::FeatureNoARM);
}
bool hasDSP() const {
- return getSTI().getFeatureBits()[ARM::FeatureDSP];
+ return getSTI().hasFeature(ARM::FeatureDSP);
}
bool hasD32() const {
- return getSTI().getFeatureBits()[ARM::FeatureD32];
+ return getSTI().hasFeature(ARM::FeatureD32);
}
bool hasV8_1aOps() const {
- return getSTI().getFeatureBits()[ARM::HasV8_1aOps];
+ return getSTI().hasFeature(ARM::HasV8_1aOps);
}
bool hasRAS() const {
- return getSTI().getFeatureBits()[ARM::FeatureRAS];
+ return getSTI().hasFeature(ARM::FeatureRAS);
}
void SwitchMode() {
@@ -607,7 +612,7 @@ class ARMAsmParser : public MCTargetAsmParser {
void FixModeAfterArchChange(bool WasThumb, SMLoc Loc);
bool isMClass() const {
- return getSTI().getFeatureBits()[ARM::FeatureMClass];
+ return getSTI().hasFeature(ARM::FeatureMClass);
}
/// @name Auto-generated Match Functions
@@ -618,35 +623,34 @@ class ARMAsmParser : public MCTargetAsmParser {
/// }
- OperandMatchResultTy parseITCondCode(OperandVector &);
- OperandMatchResultTy parseCoprocNumOperand(OperandVector &);
- OperandMatchResultTy parseCoprocRegOperand(OperandVector &);
- OperandMatchResultTy parseCoprocOptionOperand(OperandVector &);
- OperandMatchResultTy parseMemBarrierOptOperand(OperandVector &);
- OperandMatchResultTy parseTraceSyncBarrierOptOperand(OperandVector &);
- OperandMatchResultTy parseInstSyncBarrierOptOperand(OperandVector &);
- OperandMatchResultTy parseProcIFlagsOperand(OperandVector &);
- OperandMatchResultTy parseMSRMaskOperand(OperandVector &);
- OperandMatchResultTy parseBankedRegOperand(OperandVector &);
- OperandMatchResultTy parsePKHImm(OperandVector &O, StringRef Op, int Low,
- int High);
- OperandMatchResultTy parsePKHLSLImm(OperandVector &O) {
+ ParseStatus parseITCondCode(OperandVector &);
+ ParseStatus parseCoprocNumOperand(OperandVector &);
+ ParseStatus parseCoprocRegOperand(OperandVector &);
+ ParseStatus parseCoprocOptionOperand(OperandVector &);
+ ParseStatus parseMemBarrierOptOperand(OperandVector &);
+ ParseStatus parseTraceSyncBarrierOptOperand(OperandVector &);
+ ParseStatus parseInstSyncBarrierOptOperand(OperandVector &);
+ ParseStatus parseProcIFlagsOperand(OperandVector &);
+ ParseStatus parseMSRMaskOperand(OperandVector &);
+ ParseStatus parseBankedRegOperand(OperandVector &);
+ ParseStatus parsePKHImm(OperandVector &O, StringRef Op, int Low, int High);
+ ParseStatus parsePKHLSLImm(OperandVector &O) {
return parsePKHImm(O, "lsl", 0, 31);
}
- OperandMatchResultTy parsePKHASRImm(OperandVector &O) {
+ ParseStatus parsePKHASRImm(OperandVector &O) {
return parsePKHImm(O, "asr", 1, 32);
}
- OperandMatchResultTy parseSetEndImm(OperandVector &);
- OperandMatchResultTy parseShifterImm(OperandVector &);
- OperandMatchResultTy parseRotImm(OperandVector &);
- OperandMatchResultTy parseModImm(OperandVector &);
- OperandMatchResultTy parseBitfield(OperandVector &);
- OperandMatchResultTy parsePostIdxReg(OperandVector &);
- OperandMatchResultTy parseAM3Offset(OperandVector &);
- OperandMatchResultTy parseFPImm(OperandVector &);
- OperandMatchResultTy parseVectorList(OperandVector &);
- OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index,
- SMLoc &EndLoc);
+ ParseStatus parseSetEndImm(OperandVector &);
+ ParseStatus parseShifterImm(OperandVector &);
+ ParseStatus parseRotImm(OperandVector &);
+ ParseStatus parseModImm(OperandVector &);
+ ParseStatus parseBitfield(OperandVector &);
+ ParseStatus parsePostIdxReg(OperandVector &);
+ ParseStatus parseAM3Offset(OperandVector &);
+ ParseStatus parseFPImm(OperandVector &);
+ ParseStatus parseVectorList(OperandVector &);
+ ParseStatus parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index,
+ SMLoc &EndLoc);
// Asm Match Converter Methods
void cvtThumbMultiply(MCInst &Inst, const OperandVector &);
@@ -1227,6 +1231,18 @@ public:
return isImmediate<8, 255>();
}
+ bool isImm0_255Expr() const {
+ if (!isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ // If it's not a constant expression, it'll generate a fixup and be
+ // handled later.
+ if (!CE)
+ return true;
+ int64_t Value = CE->getValue();
+ return isUInt<8>(Value);
+ }
+
bool isImm256_65535Expr() const {
if (!isImm()) return false;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
@@ -1255,34 +1271,6 @@ public:
return isImmediate<1, 33>();
}
- template<int shift>
- bool isExpImmValue(uint64_t Value) const {
- uint64_t mask = (1 << shift) - 1;
- if ((Value & mask) != 0 || (Value >> shift) > 0xff)
- return false;
- return true;
- }
-
- template<int shift>
- bool isExpImm() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
-
- return isExpImmValue<shift>(CE->getValue());
- }
-
- template<int shift, int size>
- bool isInvertedExpImm() const {
- if (!isImm()) return false;
- const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
- if (!CE) return false;
-
- uint64_t OriginalValue = CE->getValue();
- uint64_t InvertedValue = OriginalValue ^ (((uint64_t)1 << size) - 1);
- return isExpImmValue<shift>(InvertedValue);
- }
-
bool isPKHLSLImm() const {
return isImmediate<0, 32>();
}
@@ -2504,7 +2492,7 @@ public:
} else {
unsigned NextOpIndex = Inst.getNumOperands();
const MCInstrDesc &MCID =
- ARMInsts[ARM::INSTRUCTION_LIST_END - 1 - Inst.getOpcode()];
+ ARMDescs.Insts[ARM::INSTRUCTION_LIST_END - 1 - Inst.getOpcode()];
int TiedOp = MCID.getOperandConstraint(NextOpIndex, MCOI::TIED_TO);
assert(TiedOp >= 0 &&
"Inactive register in vpred_r is not tied to an output!");
@@ -4338,98 +4326,91 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
}
/// parseITCondCode - Try to parse a condition code for an IT instruction.
-OperandMatchResultTy
-ARMAsmParser::parseITCondCode(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseITCondCode(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
unsigned CC = ARMCondCodeFromString(Tok.getString());
if (CC == ~0U)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex(); // Eat the token.
Operands.push_back(ARMOperand::CreateCondCode(ARMCC::CondCodes(CC), S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// parseCoprocNumOperand - Try to parse an coprocessor number operand. The
/// token must be an Identifier when called, and if it is a coprocessor
/// number, the token is eaten and the operand is added to the operand list.
-OperandMatchResultTy
-ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
int Num = MatchCoprocessorOperandName(Tok.getString().lower(), 'p');
if (Num == -1)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (!isValidCoprocessorNumber(Num, getSTI().getFeatureBits()))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateCoprocNum(Num, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// parseCoprocRegOperand - Try to parse an coprocessor register operand. The
/// token must be an Identifier when called, and if it is a coprocessor
/// number, the token is eaten and the operand is added to the operand list.
-OperandMatchResultTy
-ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
int Reg = MatchCoprocessorOperandName(Tok.getString().lower(), 'c');
if (Reg == -1)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateCoprocReg(Reg, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// parseCoprocOptionOperand - Try to parse an coprocessor option operand.
/// coproc_option : '{' imm0_255 '}'
-OperandMatchResultTy
-ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
// If this isn't a '{', this isn't a coprocessor immediate operand.
if (Parser.getTok().isNot(AsmToken::LCurly))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex(); // Eat the '{'
const MCExpr *Expr;
SMLoc Loc = Parser.getTok().getLoc();
- if (getParser().parseExpression(Expr)) {
- Error(Loc, "illegal expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(Expr))
+ return Error(Loc, "illegal expression");
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr);
- if (!CE || CE->getValue() < 0 || CE->getValue() > 255) {
- Error(Loc, "coprocessor option must be an immediate in range [0, 255]");
- return MatchOperand_ParseFail;
- }
+ if (!CE || CE->getValue() < 0 || CE->getValue() > 255)
+ return Error(Loc,
+ "coprocessor option must be an immediate in range [0, 255]");
int Val = CE->getValue();
// Check for and consume the closing '}'
if (Parser.getTok().isNot(AsmToken::RCurly))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
SMLoc E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the '}'
Operands.push_back(ARMOperand::CreateCoprocOption(Val, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// For register list parsing, we need to map from raw GPR register numbering
@@ -4643,8 +4624,8 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder,
}
// Helper function to parse the lane index for vector lists.
-OperandMatchResultTy ARMAsmParser::
-parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
+ParseStatus ARMAsmParser::parseVectorLane(VectorLaneTy &LaneKind,
+ unsigned &Index, SMLoc &EndLoc) {
MCAsmParser &Parser = getParser();
Index = 0; // Always return a defined index value.
if (Parser.getTok().is(AsmToken::LBrac)) {
@@ -4654,7 +4635,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
LaneKind = AllLanes;
EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the ']'.
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// There's an optional '#' token here. Normally there wouldn't be, but
@@ -4664,39 +4645,30 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
const MCExpr *LaneIndex;
SMLoc Loc = Parser.getTok().getLoc();
- if (getParser().parseExpression(LaneIndex)) {
- Error(Loc, "illegal expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(LaneIndex))
+ return Error(Loc, "illegal expression");
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LaneIndex);
- if (!CE) {
- Error(Loc, "lane index must be empty or an integer");
- return MatchOperand_ParseFail;
- }
- if (Parser.getTok().isNot(AsmToken::RBrac)) {
- Error(Parser.getTok().getLoc(), "']' expected");
- return MatchOperand_ParseFail;
- }
+ if (!CE)
+ return Error(Loc, "lane index must be empty or an integer");
+ if (Parser.getTok().isNot(AsmToken::RBrac))
+ return Error(Parser.getTok().getLoc(), "']' expected");
EndLoc = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat the ']'.
int64_t Val = CE->getValue();
// FIXME: Make this range check context sensitive for .8, .16, .32.
- if (Val < 0 || Val > 7) {
- Error(Parser.getTok().getLoc(), "lane index out of range");
- return MatchOperand_ParseFail;
- }
+ if (Val < 0 || Val > 7)
+ return Error(Parser.getTok().getLoc(), "lane index out of range");
Index = Val;
LaneKind = IndexedLane;
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
LaneKind = NoLanes;
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// parse a vector register list
-OperandMatchResultTy
-ARMAsmParser::parseVectorList(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseVectorList(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
VectorLaneTy LaneKind;
unsigned LaneIndex;
@@ -4708,10 +4680,10 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
SMLoc E = Parser.getTok().getEndLoc();
int Reg = tryParseRegister();
if (Reg == -1)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) {
- OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
- if (Res != MatchOperand_Success)
+ ParseStatus Res = parseVectorLane(LaneKind, LaneIndex, E);
+ if (!Res.isSuccess())
return Res;
switch (LaneKind) {
case NoLanes:
@@ -4727,12 +4699,12 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
false, S, E));
break;
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
Reg = getDRegFromQReg(Reg);
- OperandMatchResultTy Res = parseVectorLane(LaneKind, LaneIndex, E);
- if (Res != MatchOperand_Success)
+ ParseStatus Res = parseVectorLane(LaneKind, LaneIndex, E);
+ if (!Res.isSuccess())
return Res;
switch (LaneKind) {
case NoLanes:
@@ -4752,31 +4724,27 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
false, S, E));
break;
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- Error(S, "vector register expected");
- return MatchOperand_ParseFail;
+ return Error(S, "vector register expected");
}
if (Parser.getTok().isNot(AsmToken::LCurly))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex(); // Eat '{' token.
SMLoc RegLoc = Parser.getTok().getLoc();
int Reg = tryParseRegister();
- if (Reg == -1) {
- Error(RegLoc, "register expected");
- return MatchOperand_ParseFail;
- }
+ if (Reg == -1)
+ return Error(RegLoc, "register expected");
unsigned Count = 1;
int Spacing = 0;
unsigned FirstReg = Reg;
- if (hasMVE() && !ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(Reg)) {
- Error(Parser.getTok().getLoc(), "vector register in range Q0-Q7 expected");
- return MatchOperand_ParseFail;
- }
+ if (hasMVE() && !ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(Reg))
+ return Error(Parser.getTok().getLoc(),
+ "vector register in range Q0-Q7 expected");
// The list is of D registers, but we also allow Q regs and just interpret
// them as the two D sub-registers.
else if (!hasMVE() && ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
@@ -4788,26 +4756,22 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
}
SMLoc E;
- if (parseVectorLane(LaneKind, LaneIndex, E) != MatchOperand_Success)
- return MatchOperand_ParseFail;
+ if (!parseVectorLane(LaneKind, LaneIndex, E).isSuccess())
+ return ParseStatus::Failure;
while (Parser.getTok().is(AsmToken::Comma) ||
Parser.getTok().is(AsmToken::Minus)) {
if (Parser.getTok().is(AsmToken::Minus)) {
if (!Spacing)
Spacing = 1; // Register range implies a single spaced list.
- else if (Spacing == 2) {
- Error(Parser.getTok().getLoc(),
- "sequential registers in double spaced list");
- return MatchOperand_ParseFail;
- }
+ else if (Spacing == 2)
+ return Error(Parser.getTok().getLoc(),
+ "sequential registers in double spaced list");
Parser.Lex(); // Eat the minus.
SMLoc AfterMinusLoc = Parser.getTok().getLoc();
int EndReg = tryParseRegister();
- if (EndReg == -1) {
- Error(AfterMinusLoc, "register expected");
- return MatchOperand_ParseFail;
- }
+ if (EndReg == -1)
+ return Error(AfterMinusLoc, "register expected");
// Allow Q regs and just interpret them as the two D sub-registers.
if (!hasMVE() && ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg))
EndReg = getDRegFromQReg(EndReg) + 1;
@@ -4819,25 +4783,18 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
if ((hasMVE() &&
!ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(EndReg)) ||
(!hasMVE() &&
- !ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg))) {
- Error(AfterMinusLoc, "invalid register in register list");
- return MatchOperand_ParseFail;
- }
+ !ARMMCRegisterClasses[ARM::DPRRegClassID].contains(EndReg)))
+ return Error(AfterMinusLoc, "invalid register in register list");
// Ranges must go from low to high.
- if (Reg > EndReg) {
- Error(AfterMinusLoc, "bad range in register list");
- return MatchOperand_ParseFail;
- }
+ if (Reg > EndReg)
+ return Error(AfterMinusLoc, "bad range in register list");
// Parse the lane specifier if present.
VectorLaneTy NextLaneKind;
unsigned NextLaneIndex;
- if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
- MatchOperand_Success)
- return MatchOperand_ParseFail;
- if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
- Error(AfterMinusLoc, "mismatched lane index in register list");
- return MatchOperand_ParseFail;
- }
+ if (!parseVectorLane(NextLaneKind, NextLaneIndex, E).isSuccess())
+ return ParseStatus::Failure;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex)
+ return Error(AfterMinusLoc, "mismatched lane index in register list");
// Add all the registers in the range to the register list.
Count += EndReg - Reg;
@@ -4848,16 +4805,12 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
RegLoc = Parser.getTok().getLoc();
int OldReg = Reg;
Reg = tryParseRegister();
- if (Reg == -1) {
- Error(RegLoc, "register expected");
- return MatchOperand_ParseFail;
- }
+ if (Reg == -1)
+ return Error(RegLoc, "register expected");
if (hasMVE()) {
- if (!ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(Reg)) {
- Error(RegLoc, "vector register in range Q0-Q7 expected");
- return MatchOperand_ParseFail;
- }
+ if (!ARMMCRegisterClasses[ARM::MQPRRegClassID].contains(Reg))
+ return Error(RegLoc, "vector register in range Q0-Q7 expected");
Spacing = 1;
}
// vector register lists must be contiguous.
@@ -4869,29 +4822,23 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
else if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) {
if (!Spacing)
Spacing = 1; // Register range implies a single spaced list.
- else if (Spacing == 2) {
- Error(RegLoc,
- "invalid register in double-spaced list (must be 'D' register')");
- return MatchOperand_ParseFail;
- }
+ else if (Spacing == 2)
+ return Error(
+ RegLoc,
+ "invalid register in double-spaced list (must be 'D' register')");
Reg = getDRegFromQReg(Reg);
- if (Reg != OldReg + 1) {
- Error(RegLoc, "non-contiguous register range");
- return MatchOperand_ParseFail;
- }
+ if (Reg != OldReg + 1)
+ return Error(RegLoc, "non-contiguous register range");
++Reg;
Count += 2;
// Parse the lane specifier if present.
VectorLaneTy NextLaneKind;
unsigned NextLaneIndex;
SMLoc LaneLoc = Parser.getTok().getLoc();
- if (parseVectorLane(NextLaneKind, NextLaneIndex, E) !=
- MatchOperand_Success)
- return MatchOperand_ParseFail;
- if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
- Error(LaneLoc, "mismatched lane index in register list");
- return MatchOperand_ParseFail;
- }
+ if (!parseVectorLane(NextLaneKind, NextLaneIndex, E).isSuccess())
+ return ParseStatus::Failure;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex)
+ return Error(LaneLoc, "mismatched lane index in register list");
continue;
}
// Normal D register.
@@ -4901,27 +4848,21 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
Spacing = 1 + (Reg == OldReg + 2);
// Just check that it's contiguous and keep going.
- if (Reg != OldReg + Spacing) {
- Error(RegLoc, "non-contiguous register range");
- return MatchOperand_ParseFail;
- }
+ if (Reg != OldReg + Spacing)
+ return Error(RegLoc, "non-contiguous register range");
++Count;
// Parse the lane specifier if present.
VectorLaneTy NextLaneKind;
unsigned NextLaneIndex;
SMLoc EndLoc = Parser.getTok().getLoc();
- if (parseVectorLane(NextLaneKind, NextLaneIndex, E) != MatchOperand_Success)
- return MatchOperand_ParseFail;
- if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex) {
- Error(EndLoc, "mismatched lane index in register list");
- return MatchOperand_ParseFail;
- }
+ if (!parseVectorLane(NextLaneKind, NextLaneIndex, E).isSuccess())
+ return ParseStatus::Failure;
+ if (NextLaneKind != LaneKind || LaneIndex != NextLaneIndex)
+ return Error(EndLoc, "mismatched lane index in register list");
}
- if (Parser.getTok().isNot(AsmToken::RCurly)) {
- Error(Parser.getTok().getLoc(), "'}' expected");
- return MatchOperand_ParseFail;
- }
+ if (Parser.getTok().isNot(AsmToken::RCurly))
+ return Error(Parser.getTok().getLoc(), "'}' expected");
E = Parser.getTok().getEndLoc();
Parser.Lex(); // Eat '}' token.
@@ -4948,12 +4889,11 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
S, E));
break;
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
-OperandMatchResultTy
-ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
@@ -4987,7 +4927,7 @@ ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
Opt = ~0U;
if (Opt == ~0U)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex(); // Eat identifier token.
} else if (Tok.is(AsmToken::Hash) ||
@@ -4998,51 +4938,45 @@ ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
SMLoc Loc = Parser.getTok().getLoc();
const MCExpr *MemBarrierID;
- if (getParser().parseExpression(MemBarrierID)) {
- Error(Loc, "illegal expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(MemBarrierID))
+ return Error(Loc, "illegal expression");
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(MemBarrierID);
- if (!CE) {
- Error(Loc, "constant expression expected");
- return MatchOperand_ParseFail;
- }
+ if (!CE)
+ return Error(Loc, "constant expression expected");
int Val = CE->getValue();
- if (Val & ~0xf) {
- Error(Loc, "immediate value out of range");
- return MatchOperand_ParseFail;
- }
+ if (Val & ~0xf)
+ return Error(Loc, "immediate value out of range");
Opt = ARM_MB::RESERVED_0 + Val;
} else
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Operands.push_back(ARMOperand::CreateMemBarrierOpt((ARM_MB::MemBOpt)Opt, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
+ParseStatus
ARMAsmParser::parseTraceSyncBarrierOptOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
if (!Tok.getString().equals_insensitive("csync"))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateTraceSyncBarrierOpt(ARM_TSB::CSYNC, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options.
-OperandMatchResultTy
+ParseStatus
ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
@@ -5055,7 +4989,7 @@ ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
if (OptStr.equals_insensitive("sy"))
Opt = ARM_ISB::SY;
else
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex(); // Eat identifier token.
} else if (Tok.is(AsmToken::Hash) ||
@@ -5066,41 +5000,33 @@ ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
SMLoc Loc = Parser.getTok().getLoc();
const MCExpr *ISBarrierID;
- if (getParser().parseExpression(ISBarrierID)) {
- Error(Loc, "illegal expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(ISBarrierID))
+ return Error(Loc, "illegal expression");
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ISBarrierID);
- if (!CE) {
- Error(Loc, "constant expression expected");
- return MatchOperand_ParseFail;
- }
+ if (!CE)
+ return Error(Loc, "constant expression expected");
int Val = CE->getValue();
- if (Val & ~0xf) {
- Error(Loc, "immediate value out of range");
- return MatchOperand_ParseFail;
- }
+ if (Val & ~0xf)
+ return Error(Loc, "immediate value out of range");
Opt = ARM_ISB::RESERVED_0 + Val;
} else
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Operands.push_back(ARMOperand::CreateInstSyncBarrierOpt(
(ARM_ISB::InstSyncBOpt)Opt, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-
/// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
-OperandMatchResultTy
-ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
StringRef IFlagsStr = Tok.getString();
// An iflags string of "none" is interpreted to mean that none of the AIF
@@ -5117,7 +5043,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
// If some specific iflag is already set, it means that some letter is
// present more than once, this is not acceptable.
if (Flag == ~0U || (IFlags & Flag))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
IFlags |= Flag;
}
@@ -5125,12 +5051,11 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateProcIFlags((ARM_PROC::IFlags)IFlags, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
-OperandMatchResultTy
-ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
@@ -5138,28 +5063,28 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
if (Tok.is(AsmToken::Integer)) {
int64_t Val = Tok.getIntVal();
if (Val > 255 || Val < 0) {
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
unsigned SYSmvalue = Val & 0xFF;
Parser.Lex();
Operands.push_back(ARMOperand::CreateMSRMask(SYSmvalue, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
if (!Tok.is(AsmToken::Identifier))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
StringRef Mask = Tok.getString();
if (isMClass()) {
auto TheReg = ARMSysReg::lookupMClassSysRegByName(Mask.lower());
if (!TheReg || !TheReg->hasRequiredFeatures(getSTI().getFeatureBits()))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
unsigned SYSmvalue = TheReg->Encoding & 0xFFF;
Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateMSRMask(SYSmvalue, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf"
@@ -5183,7 +5108,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
if (FlagsVal == ~0U) {
if (!Flags.empty())
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
else
FlagsVal = 8; // No flag
}
@@ -5202,11 +5127,11 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
// If some specific flag is already set, it means that some letter is
// present more than once, this is not acceptable.
if (Flag == ~0U || (FlagsVal & Flag))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
FlagsVal |= Flag;
}
} else // No match for special register.
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
// Special register without flags is NOT equivalent to "fc" flags.
// NOTE: This is a divergence from gas' behavior. Uncommenting the following
@@ -5222,102 +5147,83 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateMSRMask(FlagsVal, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// parseBankedRegOperand - Try to parse a banked register (e.g. "lr_irq") for
/// use in the MRS/MSR instructions added to support virtualization.
-OperandMatchResultTy
-ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
const AsmToken &Tok = Parser.getTok();
if (!Tok.is(AsmToken::Identifier))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
StringRef RegName = Tok.getString();
auto TheReg = ARMBankedReg::lookupBankedRegByName(RegName.lower());
if (!TheReg)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
unsigned Encoding = TheReg->Encoding;
Parser.Lex(); // Eat identifier token.
Operands.push_back(ARMOperand::CreateBankedReg(Encoding, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
- int High) {
+ParseStatus ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op,
+ int Low, int High) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
- if (Tok.isNot(AsmToken::Identifier)) {
- Error(Parser.getTok().getLoc(), Op + " operand expected.");
- return MatchOperand_ParseFail;
- }
+ if (Tok.isNot(AsmToken::Identifier))
+ return Error(Parser.getTok().getLoc(), Op + " operand expected.");
StringRef ShiftName = Tok.getString();
std::string LowerOp = Op.lower();
std::string UpperOp = Op.upper();
- if (ShiftName != LowerOp && ShiftName != UpperOp) {
- Error(Parser.getTok().getLoc(), Op + " operand expected.");
- return MatchOperand_ParseFail;
- }
+ if (ShiftName != LowerOp && ShiftName != UpperOp)
+ return Error(Parser.getTok().getLoc(), Op + " operand expected.");
Parser.Lex(); // Eat shift type token.
// There must be a '#' and a shift amount.
if (Parser.getTok().isNot(AsmToken::Hash) &&
- Parser.getTok().isNot(AsmToken::Dollar)) {
- Error(Parser.getTok().getLoc(), "'#' expected");
- return MatchOperand_ParseFail;
- }
+ Parser.getTok().isNot(AsmToken::Dollar))
+ return Error(Parser.getTok().getLoc(), "'#' expected");
Parser.Lex(); // Eat hash token.
const MCExpr *ShiftAmount;
SMLoc Loc = Parser.getTok().getLoc();
SMLoc EndLoc;
- if (getParser().parseExpression(ShiftAmount, EndLoc)) {
- Error(Loc, "illegal expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(ShiftAmount, EndLoc))
+ return Error(Loc, "illegal expression");
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
- if (!CE) {
- Error(Loc, "constant expression expected");
- return MatchOperand_ParseFail;
- }
+ if (!CE)
+ return Error(Loc, "constant expression expected");
int Val = CE->getValue();
- if (Val < Low || Val > High) {
- Error(Loc, "immediate value out of range");
- return MatchOperand_ParseFail;
- }
+ if (Val < Low || Val > High)
+ return Error(Loc, "immediate value out of range");
Operands.push_back(ARMOperand::CreateImm(CE, Loc, EndLoc));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
- if (Tok.isNot(AsmToken::Identifier)) {
- Error(S, "'be' or 'le' operand expected");
- return MatchOperand_ParseFail;
- }
+ if (Tok.isNot(AsmToken::Identifier))
+ return Error(S, "'be' or 'le' operand expected");
int Val = StringSwitch<int>(Tok.getString().lower())
.Case("be", 1)
.Case("le", 0)
.Default(-1);
Parser.Lex(); // Eat the token.
- if (Val == -1) {
- Error(S, "'be' or 'le' operand expected");
- return MatchOperand_ParseFail;
- }
+ if (Val == -1)
+ return Error(S, "'be' or 'le' operand expected");
Operands.push_back(ARMOperand::CreateImm(MCConstantExpr::create(Val,
getContext()),
S, Tok.getEndLoc()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// parseShifterImm - Parse the shifter immediate operand for SSAT/USAT
@@ -5325,126 +5231,99 @@ ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
/// lsl #n 'n' in [0,31]
/// asr #n 'n' in [1,32]
/// n == 32 encoded as n == 0.
-OperandMatchResultTy
-ARMAsmParser::parseShifterImm(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseShifterImm(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
- if (Tok.isNot(AsmToken::Identifier)) {
- Error(S, "shift operator 'asr' or 'lsl' expected");
- return MatchOperand_ParseFail;
- }
+ if (Tok.isNot(AsmToken::Identifier))
+ return Error(S, "shift operator 'asr' or 'lsl' expected");
StringRef ShiftName = Tok.getString();
bool isASR;
if (ShiftName == "lsl" || ShiftName == "LSL")
isASR = false;
else if (ShiftName == "asr" || ShiftName == "ASR")
isASR = true;
- else {
- Error(S, "shift operator 'asr' or 'lsl' expected");
- return MatchOperand_ParseFail;
- }
+ else
+ return Error(S, "shift operator 'asr' or 'lsl' expected");
Parser.Lex(); // Eat the operator.
// A '#' and a shift amount.
if (Parser.getTok().isNot(AsmToken::Hash) &&
- Parser.getTok().isNot(AsmToken::Dollar)) {
- Error(Parser.getTok().getLoc(), "'#' expected");
- return MatchOperand_ParseFail;
- }
+ Parser.getTok().isNot(AsmToken::Dollar))
+ return Error(Parser.getTok().getLoc(), "'#' expected");
Parser.Lex(); // Eat hash token.
SMLoc ExLoc = Parser.getTok().getLoc();
const MCExpr *ShiftAmount;
SMLoc EndLoc;
- if (getParser().parseExpression(ShiftAmount, EndLoc)) {
- Error(ExLoc, "malformed shift expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(ShiftAmount, EndLoc))
+ return Error(ExLoc, "malformed shift expression");
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
- if (!CE) {
- Error(ExLoc, "shift amount must be an immediate");
- return MatchOperand_ParseFail;
- }
+ if (!CE)
+ return Error(ExLoc, "shift amount must be an immediate");
int64_t Val = CE->getValue();
if (isASR) {
// Shift amount must be in [1,32]
- if (Val < 1 || Val > 32) {
- Error(ExLoc, "'asr' shift amount must be in range [1,32]");
- return MatchOperand_ParseFail;
- }
+ if (Val < 1 || Val > 32)
+ return Error(ExLoc, "'asr' shift amount must be in range [1,32]");
// asr #32 encoded as asr #0, but is not allowed in Thumb2 mode.
- if (isThumb() && Val == 32) {
- Error(ExLoc, "'asr #32' shift amount not allowed in Thumb mode");
- return MatchOperand_ParseFail;
- }
+ if (isThumb() && Val == 32)
+ return Error(ExLoc, "'asr #32' shift amount not allowed in Thumb mode");
if (Val == 32) Val = 0;
} else {
// Shift amount must be in [1,32]
- if (Val < 0 || Val > 31) {
- Error(ExLoc, "'lsr' shift amount must be in range [0,31]");
- return MatchOperand_ParseFail;
- }
+ if (Val < 0 || Val > 31)
+ return Error(ExLoc, "'lsr' shift amount must be in range [0,31]");
}
Operands.push_back(ARMOperand::CreateShifterImm(isASR, Val, S, EndLoc));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family
/// of instructions. Legal values are:
/// ror #n 'n' in {0, 8, 16, 24}
-OperandMatchResultTy
-ARMAsmParser::parseRotImm(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseRotImm(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
const AsmToken &Tok = Parser.getTok();
SMLoc S = Tok.getLoc();
if (Tok.isNot(AsmToken::Identifier))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
StringRef ShiftName = Tok.getString();
if (ShiftName != "ror" && ShiftName != "ROR")
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex(); // Eat the operator.
// A '#' and a rotate amount.
if (Parser.getTok().isNot(AsmToken::Hash) &&
- Parser.getTok().isNot(AsmToken::Dollar)) {
- Error(Parser.getTok().getLoc(), "'#' expected");
- return MatchOperand_ParseFail;
- }
+ Parser.getTok().isNot(AsmToken::Dollar))
+ return Error(Parser.getTok().getLoc(), "'#' expected");
Parser.Lex(); // Eat hash token.
SMLoc ExLoc = Parser.getTok().getLoc();
const MCExpr *ShiftAmount;
SMLoc EndLoc;
- if (getParser().parseExpression(ShiftAmount, EndLoc)) {
- Error(ExLoc, "malformed rotate expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(ShiftAmount, EndLoc))
+ return Error(ExLoc, "malformed rotate expression");
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftAmount);
- if (!CE) {
- Error(ExLoc, "rotate amount must be an immediate");
- return MatchOperand_ParseFail;
- }
+ if (!CE)
+ return Error(ExLoc, "rotate amount must be an immediate");
int64_t Val = CE->getValue();
// Shift amount must be in {0, 8, 16, 24} (0 is undocumented extension)
// normally, zero is represented in asm by omitting the rotate operand
// entirely.
- if (Val != 8 && Val != 16 && Val != 24 && Val != 0) {
- Error(ExLoc, "'ror' rotate amount must be 8, 16, or 24");
- return MatchOperand_ParseFail;
- }
+ if (Val != 8 && Val != 16 && Val != 24 && Val != 0)
+ return Error(ExLoc, "'ror' rotate amount must be 8, 16, or 24");
Operands.push_back(ARMOperand::CreateRotImm(Val, S, EndLoc));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-ARMAsmParser::parseModImm(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseModImm(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
MCAsmLexer &Lexer = getLexer();
int64_t Imm1, Imm2;
@@ -5462,14 +5341,14 @@ ARMAsmParser::parseModImm(OperandVector &Operands) {
// mov r0, :lower16:(_foo)
if (Parser.getTok().is(AsmToken::Identifier) ||
Parser.getTok().is(AsmToken::Colon))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
// Hash (dollar) is optional as per the ARMARM
if (Parser.getTok().is(AsmToken::Hash) ||
Parser.getTok().is(AsmToken::Dollar)) {
// Avoid parsing into complex operands (#:)
if (Lexer.peekTok().is(AsmToken::Colon))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
// Eat the hash (dollar)
Parser.Lex();
@@ -5478,10 +5357,8 @@ ARMAsmParser::parseModImm(OperandVector &Operands) {
SMLoc Sx1, Ex1;
Sx1 = Parser.getTok().getLoc();
const MCExpr *Imm1Exp;
- if (getParser().parseExpression(Imm1Exp, Ex1)) {
- Error(Sx1, "malformed expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(Imm1Exp, Ex1))
+ return Error(Sx1, "malformed expression");
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Imm1Exp);
@@ -5494,7 +5371,7 @@ ARMAsmParser::parseModImm(OperandVector &Operands) {
Operands.push_back(ARMOperand::CreateModImm((Enc & 0xFF),
(Enc & 0xF00) >> 7,
Sx1, Ex1));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// We have parsed an immediate which is not for us, fallback to a plain
@@ -5505,25 +5382,22 @@ ARMAsmParser::parseModImm(OperandVector &Operands) {
// parser method is shared, that's why we have to do this here.
if (Parser.getTok().is(AsmToken::EndOfStatement)) {
Operands.push_back(ARMOperand::CreateImm(Imm1Exp, Sx1, Ex1));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
} else {
// Operands like #(l1 - l2) can only be evaluated at a later stage (via an
// MCFixup). Fallback to a plain immediate.
Operands.push_back(ARMOperand::CreateImm(Imm1Exp, Sx1, Ex1));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// From this point onward, we expect the input to be a (#bits, #rot) pair
- if (Parser.getTok().isNot(AsmToken::Comma)) {
- Error(Sx1, "expected modified immediate operand: #[0, 255], #even[0-30]");
- return MatchOperand_ParseFail;
- }
+ if (Parser.getTok().isNot(AsmToken::Comma))
+ return Error(Sx1,
+ "expected modified immediate operand: #[0, 255], #even[0-30]");
- if (Imm1 & ~0xFF) {
- Error(Sx1, "immediate operand must a number in the range [0, 255]");
- return MatchOperand_ParseFail;
- }
+ if (Imm1 & ~0xFF)
+ return Error(Sx1, "immediate operand must a number in the range [0, 255]");
// Eat the comma
Parser.Lex();
@@ -5538,10 +5412,8 @@ ARMAsmParser::parseModImm(OperandVector &Operands) {
Parser.Lex();
const MCExpr *Imm2Exp;
- if (getParser().parseExpression(Imm2Exp, Ex2)) {
- Error(Sx2, "malformed expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(Imm2Exp, Ex2))
+ return Error(Sx2, "malformed expression");
CE = dyn_cast<MCConstantExpr>(Imm2Exp);
@@ -5550,93 +5422,72 @@ ARMAsmParser::parseModImm(OperandVector &Operands) {
if (!(Imm2 & ~0x1E)) {
// We have a match!
Operands.push_back(ARMOperand::CreateModImm(Imm1, Imm2, S, Ex2));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- Error(Sx2, "immediate operand must an even number in the range [0, 30]");
- return MatchOperand_ParseFail;
+ return Error(Sx2,
+ "immediate operand must an even number in the range [0, 30]");
} else {
- Error(Sx2, "constant expression expected");
- return MatchOperand_ParseFail;
+ return Error(Sx2, "constant expression expected");
}
}
-OperandMatchResultTy
-ARMAsmParser::parseBitfield(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseBitfield(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SMLoc S = Parser.getTok().getLoc();
// The bitfield descriptor is really two operands, the LSB and the width.
if (Parser.getTok().isNot(AsmToken::Hash) &&
- Parser.getTok().isNot(AsmToken::Dollar)) {
- Error(Parser.getTok().getLoc(), "'#' expected");
- return MatchOperand_ParseFail;
- }
+ Parser.getTok().isNot(AsmToken::Dollar))
+ return Error(Parser.getTok().getLoc(), "'#' expected");
Parser.Lex(); // Eat hash token.
const MCExpr *LSBExpr;
SMLoc E = Parser.getTok().getLoc();
- if (getParser().parseExpression(LSBExpr)) {
- Error(E, "malformed immediate expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(LSBExpr))
+ return Error(E, "malformed immediate expression");
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(LSBExpr);
- if (!CE) {
- Error(E, "'lsb' operand must be an immediate");
- return MatchOperand_ParseFail;
- }
+ if (!CE)
+ return Error(E, "'lsb' operand must be an immediate");
int64_t LSB = CE->getValue();
// The LSB must be in the range [0,31]
- if (LSB < 0 || LSB > 31) {
- Error(E, "'lsb' operand must be in the range [0,31]");
- return MatchOperand_ParseFail;
- }
+ if (LSB < 0 || LSB > 31)
+ return Error(E, "'lsb' operand must be in the range [0,31]");
E = Parser.getTok().getLoc();
// Expect another immediate operand.
- if (Parser.getTok().isNot(AsmToken::Comma)) {
- Error(Parser.getTok().getLoc(), "too few operands");
- return MatchOperand_ParseFail;
- }
+ if (Parser.getTok().isNot(AsmToken::Comma))
+ return Error(Parser.getTok().getLoc(), "too few operands");
Parser.Lex(); // Eat hash token.
if (Parser.getTok().isNot(AsmToken::Hash) &&
- Parser.getTok().isNot(AsmToken::Dollar)) {
- Error(Parser.getTok().getLoc(), "'#' expected");
- return MatchOperand_ParseFail;
- }
+ Parser.getTok().isNot(AsmToken::Dollar))
+ return Error(Parser.getTok().getLoc(), "'#' expected");
Parser.Lex(); // Eat hash token.
const MCExpr *WidthExpr;
SMLoc EndLoc;
- if (getParser().parseExpression(WidthExpr, EndLoc)) {
- Error(E, "malformed immediate expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(WidthExpr, EndLoc))
+ return Error(E, "malformed immediate expression");
CE = dyn_cast<MCConstantExpr>(WidthExpr);
- if (!CE) {
- Error(E, "'width' operand must be an immediate");
- return MatchOperand_ParseFail;
- }
+ if (!CE)
+ return Error(E, "'width' operand must be an immediate");
int64_t Width = CE->getValue();
// The LSB must be in the range [1,32-lsb]
- if (Width < 1 || Width > 32 - LSB) {
- Error(E, "'width' operand must be in the range [1,32-lsb]");
- return MatchOperand_ParseFail;
- }
+ if (Width < 1 || Width > 32 - LSB)
+ return Error(E, "'width' operand must be in the range [1,32-lsb]");
Operands.push_back(ARMOperand::CreateBitfield(LSB, Width, S, EndLoc));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
// Check for a post-index addressing register operand. Specifically:
// postidx_reg := '+' register {, shift}
// | '-' register {, shift}
// | register {, shift}
- // This method must return MatchOperand_NoMatch without consuming any tokens
+ // This method must return ParseStatus::NoMatch without consuming any tokens
// in the case where there is no match, as other alternatives take other
// parse methods.
MCAsmParser &Parser = getParser();
@@ -5657,9 +5508,8 @@ ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
int Reg = tryParseRegister();
if (Reg == -1) {
if (!haveEaten)
- return MatchOperand_NoMatch;
- Error(Parser.getTok().getLoc(), "register expected");
- return MatchOperand_ParseFail;
+ return ParseStatus::NoMatch;
+ return Error(Parser.getTok().getLoc(), "register expected");
}
ARM_AM::ShiftOpc ShiftTy = ARM_AM::no_shift;
@@ -5667,7 +5517,7 @@ ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
if (Parser.getTok().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the ','.
if (parseMemRegOffsetShift(ShiftTy, ShiftImm))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
// FIXME: Only approximates end...may include intervening whitespace.
E = Parser.getTok().getLoc();
@@ -5676,11 +5526,10 @@ ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ShiftTy,
ShiftImm, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
// Check for a post-index addressing register operand. Specifically:
// am3offset := '+' register
// | '-' register
@@ -5689,7 +5538,7 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
// | # + imm
// | # - imm
- // This method must return MatchOperand_NoMatch without consuming any tokens
+ // This method must return ParseStatus::NoMatch without consuming any tokens
// in the case where there is no match, as other alternatives take other
// parse methods.
MCAsmParser &Parser = getParser();
@@ -5706,12 +5555,10 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
const MCExpr *Offset;
SMLoc E;
if (getParser().parseExpression(Offset, E))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Offset);
- if (!CE) {
- Error(S, "constant expression expected");
- return MatchOperand_ParseFail;
- }
+ if (!CE)
+ return Error(S, "constant expression expected");
// Negative zero is encoded as the flag value
// std::numeric_limits<int32_t>::min().
int32_t Val = CE->getValue();
@@ -5721,7 +5568,7 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
Operands.push_back(
ARMOperand::CreateImm(MCConstantExpr::create(Val, getContext()), S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool haveEaten = false;
@@ -5739,15 +5586,14 @@ ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
int Reg = tryParseRegister();
if (Reg == -1) {
if (!haveEaten)
- return MatchOperand_NoMatch;
- Error(Tok.getLoc(), "register expected");
- return MatchOperand_ParseFail;
+ return ParseStatus::NoMatch;
+ return Error(Tok.getLoc(), "register expected");
}
Operands.push_back(ARMOperand::CreatePostIdxReg(Reg, isAdd, ARM_AM::no_shift,
0, S, Tok.getEndLoc()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// Convert parsed operands to MCInst. Needed here because this instruction
@@ -6097,8 +5943,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
}
/// parseFPImm - A floating point immediate expression operand.
-OperandMatchResultTy
-ARMAsmParser::parseFPImm(OperandVector &Operands) {
+ParseStatus ARMAsmParser::parseFPImm(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
// Anything that can accept a floating point constant as an operand
// needs to go through here, as the regular parseExpression is
@@ -6113,7 +5958,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) {
if (Parser.getTok().isNot(AsmToken::Hash) &&
Parser.getTok().isNot(AsmToken::Dollar))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
// Disambiguate the VMOV forms that can accept an FP immediate.
// vmov.f32 <sreg>, #imm
@@ -6133,7 +5978,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) {
bool isFconst = Mnemonic.isToken() && (Mnemonic.getToken() == "fconstd" ||
Mnemonic.getToken() == "fconsts");
if (!(isVmovf || isFconst))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
Parser.Lex(); // Eat '#' or '$'.
@@ -6154,28 +5999,25 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) {
Operands.push_back(ARMOperand::CreateImm(
MCConstantExpr::create(IntVal, getContext()),
S, Parser.getTok().getLoc()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
// Also handle plain integers. Instructions which allow floating point
// immediates also allow a raw encoded 8-bit value.
if (Tok.is(AsmToken::Integer) && isFconst) {
int64_t Val = Tok.getIntVal();
Parser.Lex(); // Eat the token.
- if (Val > 255 || Val < 0) {
- Error(Loc, "encoded floating point value out of range");
- return MatchOperand_ParseFail;
- }
+ if (Val > 255 || Val < 0)
+ return Error(Loc, "encoded floating point value out of range");
float RealVal = ARM_AM::getFPImmFloat(Val);
Val = APFloat(RealVal).bitcastToAPInt().getZExtValue();
Operands.push_back(ARMOperand::CreateImm(
MCConstantExpr::create(Val, getContext()), S,
Parser.getTok().getLoc()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- Error(Loc, "invalid floating point immediate");
- return MatchOperand_ParseFail;
+ return Error(Loc, "invalid floating point immediate");
}
/// Parse a arm instruction operand. For now this parses the operand regardless
@@ -6186,13 +6028,13 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
- OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
- if (ResTy == MatchOperand_Success)
+ ParseStatus ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+ if (ResTy.isSuccess())
return false;
// If there wasn't a custom match, try the generic matcher below. Otherwise,
// there was a match, but an error occurred, in which case, just return that
// the operand parsing failed.
- if (ResTy == MatchOperand_ParseFail)
+ if (ResTy.isFailure())
return true;
switch (getLexer().getKind()) {
@@ -6295,7 +6137,8 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
}
case AsmToken::Colon: {
S = Parser.getTok().getLoc();
- // ":lower16:" and ":upper16:" expression prefixes
+ // ":lower16:", ":upper16:", ":lower0_7:", ":lower8_15:", ":upper0_7:" and
+ // ":upper8_15:", expression prefixes
// FIXME: Check it's an expression prefix,
// e.g. (FOO - :lower16:BAR) isn't legal.
ARMMCExpr::VariantKind RefKind;
@@ -6342,8 +6185,9 @@ bool ARMAsmParser::parseImmExpr(int64_t &Out) {
return false;
}
-// parsePrefix - Parse ARM 16-bit relocations expression prefix, i.e.
-// :lower16: and :upper16:.
+// parsePrefix - Parse ARM 16-bit relocations expression prefixes, i.e.
+// :lower16: and :upper16: and Thumb 8-bit relocation expression prefixes, i.e.
+// :upper8_15:, :upper0_7:, :lower8_15: and :lower0_7:
bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
MCAsmParser &Parser = getParser();
RefKind = ARMMCExpr::VK_ARM_None;
@@ -6352,7 +6196,6 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
if (getLexer().is(AsmToken::Hash))
Parser.Lex();
- // :lower16: and :upper16: modifiers
assert(getLexer().is(AsmToken::Colon) && "expected a :");
Parser.Lex(); // Eat ':'
@@ -6372,8 +6215,12 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
ARMMCExpr::VariantKind VariantKind;
uint8_t SupportedFormats;
} PrefixEntries[] = {
- { "lower16", ARMMCExpr::VK_ARM_LO16, COFF | ELF | MACHO },
- { "upper16", ARMMCExpr::VK_ARM_HI16, COFF | ELF | MACHO },
+ {"upper16", ARMMCExpr::VK_ARM_HI16, COFF | ELF | MACHO},
+ {"lower16", ARMMCExpr::VK_ARM_LO16, COFF | ELF | MACHO},
+ {"upper8_15", ARMMCExpr::VK_ARM_HI_8_15, ELF},
+ {"upper0_7", ARMMCExpr::VK_ARM_HI_0_7, ELF},
+ {"lower8_15", ARMMCExpr::VK_ARM_LO_8_15, ELF},
+ {"lower0_7", ARMMCExpr::VK_ARM_LO_0_7, ELF},
};
StringRef IDVal = Parser.getTok().getIdentifier();
@@ -6424,6 +6271,9 @@ bool ARMAsmParser::parsePrefix(ARMMCExpr::VariantKind &RefKind) {
}
Parser.Lex(); // Eat the last ':'
+ // consume an optional trailing '#' (GNU compatibility) bla
+ parseOptionalToken(AsmToken::Hash);
+
return false;
}
@@ -6734,6 +6584,27 @@ void ARMAsmParser::tryConvertingToTwoOperandForm(StringRef Mnemonic,
}
}
+// this function returns true if the operand is one of the following
+// relocations: :upper8_15:, :upper0_7:, :lower8_15: or :lower0_7:
+static bool isThumbI8Relocation(MCParsedAsmOperand &MCOp) {
+ ARMOperand &Op = static_cast<ARMOperand &>(MCOp);
+ if (!Op.isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm());
+ if (CE)
+ return false;
+ const MCExpr *E = dyn_cast<MCExpr>(Op.getImm());
+ if (!E)
+ return false;
+ const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(E);
+ if (ARM16Expr && (ARM16Expr->getKind() == ARMMCExpr::VK_ARM_HI_8_15 ||
+ ARM16Expr->getKind() == ARMMCExpr::VK_ARM_HI_0_7 ||
+ ARM16Expr->getKind() == ARMMCExpr::VK_ARM_LO_8_15 ||
+ ARM16Expr->getKind() == ARMMCExpr::VK_ARM_LO_0_7))
+ return true;
+ return false;
+}
+
bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
OperandVector &Operands) {
// FIXME: This is all horribly hacky. We really need a better way to deal
@@ -6753,6 +6624,10 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic,
static_cast<ARMOperand &>(*Operands[1]).getReg() == 0)
return true;
+ if (Mnemonic == "movs" && Operands.size() > 3 && isThumb() &&
+ isThumbI8Relocation(*Operands[3]))
+ return true;
+
// Register-register 'add' for thumb does not have a cc_out operand
// when there are only two register operands.
if (isThumb() && Mnemonic == "add" && Operands.size() == 5 &&
@@ -7637,6 +7512,19 @@ static bool isVectorPredicable(const MCInstrDesc &MCID) {
return findFirstVectorPredOperandIdx(MCID) != -1;
}
+static bool isARMMCExpr(MCParsedAsmOperand &MCOp) {
+ ARMOperand &Op = static_cast<ARMOperand &>(MCOp);
+ if (!Op.isImm())
+ return false;
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm());
+ if (CE)
+ return false;
+ const MCExpr *E = dyn_cast<MCExpr>(Op.getImm());
+ if (!E)
+ return false;
+ return true;
+}
+
// FIXME: We would really like to be able to tablegen'erate this.
bool ARMAsmParser::validateInstruction(MCInst &Inst,
const OperandVector &Operands) {
@@ -7831,6 +7719,107 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
}
return false;
}
+
+ case ARM::t2LDRB_OFFSET_imm:
+ case ARM::t2LDRB_PRE_imm:
+ case ARM::t2LDRB_POST_imm:
+ case ARM::t2STRB_OFFSET_imm:
+ case ARM::t2STRB_PRE_imm:
+ case ARM::t2STRB_POST_imm: {
+ if (Inst.getOpcode() == ARM::t2LDRB_POST_imm ||
+ Inst.getOpcode() == ARM::t2STRB_POST_imm ||
+ Inst.getOpcode() == ARM::t2LDRB_PRE_imm ||
+ Inst.getOpcode() == ARM::t2STRB_PRE_imm) {
+ int Imm = Inst.getOperand(2).getImm();
+ if (Imm > 255 || Imm < -255)
+ return Error(Operands[5]->getStartLoc(),
+ "operand must be in range [-255, 255]");
+ } else if (Inst.getOpcode() == ARM::t2LDRB_OFFSET_imm ||
+ Inst.getOpcode() == ARM::t2STRB_OFFSET_imm) {
+ int Imm = Inst.getOperand(2).getImm();
+ if (Imm > 0 || Imm < -255)
+ return Error(Operands[5]->getStartLoc(),
+ "operand must be in range [0, 255] with a negative sign");
+ }
+ if (Inst.getOperand(0).getReg() == ARM::PC) {
+ return Error(Operands[3]->getStartLoc(),
+ "if operand is PC, should call the LDRB (literal)");
+ }
+ return false;
+ }
+
+ case ARM::t2LDRH_OFFSET_imm:
+ case ARM::t2LDRH_PRE_imm:
+ case ARM::t2LDRH_POST_imm:
+ case ARM::t2STRH_OFFSET_imm:
+ case ARM::t2STRH_PRE_imm:
+ case ARM::t2STRH_POST_imm: {
+ if (Inst.getOpcode() == ARM::t2LDRH_POST_imm ||
+ Inst.getOpcode() == ARM::t2STRH_POST_imm ||
+ Inst.getOpcode() == ARM::t2LDRH_PRE_imm ||
+ Inst.getOpcode() == ARM::t2STRH_PRE_imm) {
+ int Imm = Inst.getOperand(2).getImm();
+ if (Imm > 255 || Imm < -255)
+ return Error(Operands[5]->getStartLoc(),
+ "operand must be in range [-255, 255]");
+ } else if (Inst.getOpcode() == ARM::t2LDRH_OFFSET_imm ||
+ Inst.getOpcode() == ARM::t2STRH_OFFSET_imm) {
+ int Imm = Inst.getOperand(2).getImm();
+ if (Imm > 0 || Imm < -255)
+ return Error(Operands[5]->getStartLoc(),
+ "operand must be in range [0, 255] with a negative sign");
+ }
+ if (Inst.getOperand(0).getReg() == ARM::PC) {
+ return Error(Operands[3]->getStartLoc(),
+ "if operand is PC, should call the LDRH (literal)");
+ }
+ return false;
+ }
+
+ case ARM::t2LDRSB_OFFSET_imm:
+ case ARM::t2LDRSB_PRE_imm:
+ case ARM::t2LDRSB_POST_imm: {
+ if (Inst.getOpcode() == ARM::t2LDRSB_POST_imm ||
+ Inst.getOpcode() == ARM::t2LDRSB_PRE_imm) {
+ int Imm = Inst.getOperand(2).getImm();
+ if (Imm > 255 || Imm < -255)
+ return Error(Operands[5]->getStartLoc(),
+ "operand must be in range [-255, 255]");
+ } else if (Inst.getOpcode() == ARM::t2LDRSB_OFFSET_imm) {
+ int Imm = Inst.getOperand(2).getImm();
+ if (Imm > 0 || Imm < -255)
+ return Error(Operands[5]->getStartLoc(),
+ "operand must be in range [0, 255] with a negative sign");
+ }
+ if (Inst.getOperand(0).getReg() == ARM::PC) {
+ return Error(Operands[3]->getStartLoc(),
+ "if operand is PC, should call the LDRH (literal)");
+ }
+ return false;
+ }
+
+ case ARM::t2LDRSH_OFFSET_imm:
+ case ARM::t2LDRSH_PRE_imm:
+ case ARM::t2LDRSH_POST_imm: {
+ if (Inst.getOpcode() == ARM::t2LDRSH_POST_imm ||
+ Inst.getOpcode() == ARM::t2LDRSH_PRE_imm) {
+ int Imm = Inst.getOperand(2).getImm();
+ if (Imm > 255 || Imm < -255)
+ return Error(Operands[5]->getStartLoc(),
+ "operand must be in range [-255, 255]");
+ } else if (Inst.getOpcode() == ARM::t2LDRSH_OFFSET_imm) {
+ int Imm = Inst.getOperand(2).getImm();
+ if (Imm > 0 || Imm < -255)
+ return Error(Operands[5]->getStartLoc(),
+ "operand must be in range [0, 255] with a negative sign");
+ }
+ if (Inst.getOperand(0).getReg() == ARM::PC) {
+ return Error(Operands[3]->getStartLoc(),
+ "if operand is PC, should call the LDRH (literal)");
+ }
+ return false;
+ }
+
case ARM::LDR_PRE_IMM:
case ARM::LDR_PRE_REG:
case ARM::t2LDR_PRE:
@@ -8145,6 +8134,22 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
"immediate expression for mov requires :lower16: or :upper16");
break;
}
+ case ARM::tADDi8: {
+ MCParsedAsmOperand &Op = *Operands[4];
+ if (isARMMCExpr(Op) && !isThumbI8Relocation(Op))
+ return Error(Op.getStartLoc(),
+ "Immediate expression for Thumb adds requires :lower0_7:,"
+ " :lower8_15:, :upper0_7: or :upper8_15:");
+ break;
+ }
+ case ARM::tMOVi8: {
+ MCParsedAsmOperand &Op = *Operands[2];
+ if (isARMMCExpr(Op) && !isThumbI8Relocation(Op))
+ return Error(Op.getStartLoc(),
+ "Immediate expression for Thumb movs requires :lower0_7:,"
+ " :lower8_15:, :upper0_7: or :upper8_15:");
+ break;
+ }
case ARM::HINT:
case ARM::t2HINT: {
unsigned Imm8 = Inst.getOperand(0).getImm();
@@ -8798,7 +8803,7 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// before passing it to the ADR instruction.
unsigned Enc = Inst.getOperand(2).getImm();
TmpInst.addOperand(MCOperand::createImm(
- ARM_AM::rotr32(Enc & 0xFF, (Enc & 0xF00) >> 7)));
+ llvm::rotr<uint32_t>(Enc & 0xFF, (Enc & 0xF00) >> 7)));
} else {
// Turn PC-relative expression into absolute expression.
// Reading PC provides the start of the current instruction + 8 and
@@ -8849,6 +8854,156 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
Inst = TmpInst;
return true;
}
+ // Aliases for imm syntax of LDRB instructions.
+ case ARM::t2LDRB_OFFSET_imm: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2LDRBi8);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // imm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2LDRB_PRE_imm:
+ case ARM::t2LDRB_POST_imm: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOpcode() == ARM::t2LDRB_PRE_imm
+ ? ARM::t2LDRB_PRE
+ : ARM::t2LDRB_POST);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt_wb
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // imm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ Inst = TmpInst;
+ return true;
+ }
+ // Aliases for imm syntax of STRB instructions.
+ case ARM::t2STRB_OFFSET_imm: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2STRBi8);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // imm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2STRB_PRE_imm:
+ case ARM::t2STRB_POST_imm: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOpcode() == ARM::t2STRB_PRE_imm
+ ? ARM::t2STRB_PRE
+ : ARM::t2STRB_POST);
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt_wb
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // imm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ Inst = TmpInst;
+ return true;
+ }
+ // Aliases for imm syntax of LDRH instructions.
+ case ARM::t2LDRH_OFFSET_imm: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2LDRHi8);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // imm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2LDRH_PRE_imm:
+ case ARM::t2LDRH_POST_imm: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOpcode() == ARM::t2LDRH_PRE_imm
+ ? ARM::t2LDRH_PRE
+ : ARM::t2LDRH_POST);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt_wb
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // imm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ Inst = TmpInst;
+ return true;
+ }
+ // Aliases for imm syntax of STRH instructions.
+ case ARM::t2STRH_OFFSET_imm: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2STRHi8);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // imm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2STRH_PRE_imm:
+ case ARM::t2STRH_POST_imm: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOpcode() == ARM::t2STRH_PRE_imm
+ ? ARM::t2STRH_PRE
+ : ARM::t2STRH_POST);
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt_wb
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // imm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ Inst = TmpInst;
+ return true;
+ }
+ // Aliases for imm syntax of LDRSB instructions.
+ case ARM::t2LDRSB_OFFSET_imm: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2LDRSBi8);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // imm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2LDRSB_PRE_imm:
+ case ARM::t2LDRSB_POST_imm: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOpcode() == ARM::t2LDRSB_PRE_imm
+ ? ARM::t2LDRSB_PRE
+ : ARM::t2LDRSB_POST);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt_wb
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // imm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ Inst = TmpInst;
+ return true;
+ }
+ // Aliases for imm syntax of LDRSH instructions.
+ case ARM::t2LDRSH_OFFSET_imm: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(ARM::t2LDRSHi8);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // imm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ Inst = TmpInst;
+ return true;
+ }
+ case ARM::t2LDRSH_PRE_imm:
+ case ARM::t2LDRSH_POST_imm: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(Inst.getOpcode() == ARM::t2LDRSH_PRE_imm
+ ? ARM::t2LDRSH_PRE
+ : ARM::t2LDRSH_POST);
+ TmpInst.addOperand(Inst.getOperand(0)); // Rt
+ TmpInst.addOperand(Inst.getOperand(4)); // Rt_wb
+ TmpInst.addOperand(Inst.getOperand(1)); // Rn
+ TmpInst.addOperand(Inst.getOperand(2)); // imm
+ TmpInst.addOperand(Inst.getOperand(3)); // CondCode
+ Inst = TmpInst;
+ return true;
+ }
// Aliases for alternate PC+imm syntax of LDR instructions.
case ARM::t2LDRpcrel:
// Select the narrow version if the immediate will fit.
@@ -10313,7 +10468,8 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
// explicitly specified. From the ARM ARM: "Encoding T1 is preferred
// to encoding T2 if <Rd> is specified and encoding T2 is preferred
// to encoding T1 if <Rd> is omitted."
- if ((unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) {
+ if (Inst.getOperand(3).isImm() &&
+ (unsigned)Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) {
Inst.setOpcode(ARM::tADDi3);
return true;
}
@@ -10756,7 +10912,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
// Find the optional-def operand (cc_out).
unsigned OpNo;
for (OpNo = 0;
- !MCID.operands()[OpNo].isOptionalDef() && OpNo < MCID.NumOperands;
+ OpNo < MCID.NumOperands && !MCID.operands()[OpNo].isOptionalDef();
++OpNo)
;
// If we're parsing Thumb1, reject it completely.
@@ -11190,6 +11346,7 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
SwitchMode();
getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
+ getParser().getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
return false;
}
@@ -11202,6 +11359,7 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
if (isThumb())
SwitchMode();
getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
+ getParser().getStreamer().emitCodeAlignment(Align(4), &getSTI(), 0);
return false;
}
@@ -11422,7 +11580,7 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
Tag = CE->getValue();
}
- if (Parser.parseToken(AsmToken::Comma, "comma expected"))
+ if (Parser.parseComma())
return true;
StringRef StringValue = "";
@@ -11456,7 +11614,7 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
}
if (Tag == ARMBuildAttrs::compatibility) {
- if (Parser.parseToken(AsmToken::Comma, "comma expected"))
+ if (Parser.parseComma())
return true;
}
@@ -11515,7 +11673,7 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
SMLoc FPUNameLoc = getTok().getLoc();
StringRef FPU = getParser().parseStringToEndOfStatement().trim();
- unsigned ID = ARM::parseFPU(FPU);
+ ARM::FPUKind ID = ARM::parseFPU(FPU);
std::vector<StringRef> Features;
if (!ARM::getFPUFeatures(ID, Features))
return Error(FPUNameLoc, "Unknown FPU name");
@@ -11668,7 +11826,7 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
int FPReg = tryParseRegister();
if (check(FPReg == -1, FPRegLoc, "frame pointer register expected") ||
- Parser.parseToken(AsmToken::Comma, "comma expected"))
+ Parser.parseComma())
return true;
// Parse spreg
@@ -11701,7 +11859,7 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
Offset = CE->getValue();
}
- if (Parser.parseToken(AsmToken::EndOfStatement))
+ if (Parser.parseEOL())
return true;
getTargetStreamer().emitSetFP(static_cast<unsigned>(FPReg),
@@ -11927,7 +12085,7 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
StackOffset = CE->getValue();
- if (Parser.parseToken(AsmToken::Comma, "expected comma"))
+ if (Parser.parseComma())
return true;
SmallVector<uint8_t, 16> Opcodes;
@@ -12071,7 +12229,7 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
StringRef Name;
if (check(Parser.parseIdentifier(Name),
"expected identifier after '.thumb_set'") ||
- parseToken(AsmToken::Comma, "expected comma after name '" + Name + "'"))
+ Parser.parseComma())
return true;
MCSymbol *Sym;
@@ -12519,7 +12677,7 @@ bool ARMAsmParser::enableArchExtFeature(StringRef Name, SMLoc &ExtLoc) {
{ARM::AEK_XSCALE, {}, {}},
};
bool EnableFeature = true;
- if (Name.startswith_insensitive("no")) {
+ if (Name.starts_with_insensitive("no")) {
EnableFeature = false;
Name = Name.substr(2);
}
@@ -12636,71 +12794,41 @@ bool ARMAsmParser::isMnemonicVPTPredicable(StringRef Mnemonic,
if (!hasMVE())
return false;
- return Mnemonic.startswith("vabav") || Mnemonic.startswith("vaddv") ||
- Mnemonic.startswith("vaddlv") || Mnemonic.startswith("vminnmv") ||
- Mnemonic.startswith("vminnmav") || Mnemonic.startswith("vminv") ||
- Mnemonic.startswith("vminav") || Mnemonic.startswith("vmaxnmv") ||
- Mnemonic.startswith("vmaxnmav") || Mnemonic.startswith("vmaxv") ||
- Mnemonic.startswith("vmaxav") || Mnemonic.startswith("vmladav") ||
- Mnemonic.startswith("vrmlaldavh") || Mnemonic.startswith("vrmlalvh") ||
- Mnemonic.startswith("vmlsdav") || Mnemonic.startswith("vmlav") ||
- Mnemonic.startswith("vmlaldav") || Mnemonic.startswith("vmlalv") ||
- Mnemonic.startswith("vmaxnm") || Mnemonic.startswith("vminnm") ||
- Mnemonic.startswith("vmax") || Mnemonic.startswith("vmin") ||
- Mnemonic.startswith("vshlc") || Mnemonic.startswith("vmovlt") ||
- Mnemonic.startswith("vmovlb") || Mnemonic.startswith("vshll") ||
- Mnemonic.startswith("vrshrn") || Mnemonic.startswith("vshrn") ||
- Mnemonic.startswith("vqrshrun") || Mnemonic.startswith("vqshrun") ||
- Mnemonic.startswith("vqrshrn") || Mnemonic.startswith("vqshrn") ||
- Mnemonic.startswith("vbic") || Mnemonic.startswith("vrev64") ||
- Mnemonic.startswith("vrev32") || Mnemonic.startswith("vrev16") ||
- Mnemonic.startswith("vmvn") || Mnemonic.startswith("veor") ||
- Mnemonic.startswith("vorn") || Mnemonic.startswith("vorr") ||
- Mnemonic.startswith("vand") || Mnemonic.startswith("vmul") ||
- Mnemonic.startswith("vqrdmulh") || Mnemonic.startswith("vqdmulh") ||
- Mnemonic.startswith("vsub") || Mnemonic.startswith("vadd") ||
- Mnemonic.startswith("vqsub") || Mnemonic.startswith("vqadd") ||
- Mnemonic.startswith("vabd") || Mnemonic.startswith("vrhadd") ||
- Mnemonic.startswith("vhsub") || Mnemonic.startswith("vhadd") ||
- Mnemonic.startswith("vdup") || Mnemonic.startswith("vcls") ||
- Mnemonic.startswith("vclz") || Mnemonic.startswith("vneg") ||
- Mnemonic.startswith("vabs") || Mnemonic.startswith("vqneg") ||
- Mnemonic.startswith("vqabs") ||
- (Mnemonic.startswith("vrint") && Mnemonic != "vrintr") ||
- Mnemonic.startswith("vcmla") || Mnemonic.startswith("vfma") ||
- Mnemonic.startswith("vfms") || Mnemonic.startswith("vcadd") ||
- Mnemonic.startswith("vadd") || Mnemonic.startswith("vsub") ||
- Mnemonic.startswith("vshl") || Mnemonic.startswith("vqshl") ||
- Mnemonic.startswith("vqrshl") || Mnemonic.startswith("vrshl") ||
- Mnemonic.startswith("vsri") || Mnemonic.startswith("vsli") ||
- Mnemonic.startswith("vrshr") || Mnemonic.startswith("vshr") ||
- Mnemonic.startswith("vpsel") || Mnemonic.startswith("vcmp") ||
- Mnemonic.startswith("vqdmladh") || Mnemonic.startswith("vqrdmladh") ||
- Mnemonic.startswith("vqdmlsdh") || Mnemonic.startswith("vqrdmlsdh") ||
- Mnemonic.startswith("vcmul") || Mnemonic.startswith("vrmulh") ||
- Mnemonic.startswith("vqmovn") || Mnemonic.startswith("vqmovun") ||
- Mnemonic.startswith("vmovnt") || Mnemonic.startswith("vmovnb") ||
- Mnemonic.startswith("vmaxa") || Mnemonic.startswith("vmaxnma") ||
- Mnemonic.startswith("vhcadd") || Mnemonic.startswith("vadc") ||
- Mnemonic.startswith("vsbc") || Mnemonic.startswith("vrshr") ||
- Mnemonic.startswith("vshr") || Mnemonic.startswith("vstrb") ||
- Mnemonic.startswith("vldrb") ||
- (Mnemonic.startswith("vstrh") && Mnemonic != "vstrhi") ||
- (Mnemonic.startswith("vldrh") && Mnemonic != "vldrhi") ||
- Mnemonic.startswith("vstrw") || Mnemonic.startswith("vldrw") ||
- Mnemonic.startswith("vldrd") || Mnemonic.startswith("vstrd") ||
- Mnemonic.startswith("vqdmull") || Mnemonic.startswith("vbrsr") ||
- Mnemonic.startswith("vfmas") || Mnemonic.startswith("vmlas") ||
- Mnemonic.startswith("vmla") || Mnemonic.startswith("vqdmlash") ||
- Mnemonic.startswith("vqdmlah") || Mnemonic.startswith("vqrdmlash") ||
- Mnemonic.startswith("vqrdmlah") || Mnemonic.startswith("viwdup") ||
- Mnemonic.startswith("vdwdup") || Mnemonic.startswith("vidup") ||
- Mnemonic.startswith("vddup") || Mnemonic.startswith("vctp") ||
- Mnemonic.startswith("vpnot") || Mnemonic.startswith("vbic") ||
- Mnemonic.startswith("vrmlsldavh") || Mnemonic.startswith("vmlsldav") ||
- Mnemonic.startswith("vcvt") ||
- MS.isVPTPredicableCDEInstr(Mnemonic) ||
- (Mnemonic.startswith("vmov") &&
- !(ExtraToken == ".f16" || ExtraToken == ".32" ||
- ExtraToken == ".16" || ExtraToken == ".8"));
+ if (MS.isVPTPredicableCDEInstr(Mnemonic) ||
+ (Mnemonic.startswith("vldrh") && Mnemonic != "vldrhi") ||
+ (Mnemonic.startswith("vmov") &&
+ !(ExtraToken == ".f16" || ExtraToken == ".32" || ExtraToken == ".16" ||
+ ExtraToken == ".8")) ||
+ (Mnemonic.startswith("vrint") && Mnemonic != "vrintr") ||
+ (Mnemonic.startswith("vstrh") && Mnemonic != "vstrhi"))
+ return true;
+
+ const char *predicable_prefixes[] = {
+ "vabav", "vabd", "vabs", "vadc", "vadd",
+ "vaddlv", "vaddv", "vand", "vbic", "vbrsr",
+ "vcadd", "vcls", "vclz", "vcmla", "vcmp",
+ "vcmul", "vctp", "vcvt", "vddup", "vdup",
+ "vdwdup", "veor", "vfma", "vfmas", "vfms",
+ "vhadd", "vhcadd", "vhsub", "vidup", "viwdup",
+ "vldrb", "vldrd", "vldrw", "vmax", "vmaxa",
+ "vmaxav", "vmaxnm", "vmaxnma", "vmaxnmav", "vmaxnmv",
+ "vmaxv", "vmin", "vminav", "vminnm", "vminnmav",
+ "vminnmv", "vminv", "vmla", "vmladav", "vmlaldav",
+ "vmlalv", "vmlas", "vmlav", "vmlsdav", "vmlsldav",
+ "vmovlb", "vmovlt", "vmovnb", "vmovnt", "vmul",
+ "vmvn", "vneg", "vorn", "vorr", "vpnot",
+ "vpsel", "vqabs", "vqadd", "vqdmladh", "vqdmlah",
+ "vqdmlash", "vqdmlsdh", "vqdmulh", "vqdmull", "vqmovn",
+ "vqmovun", "vqneg", "vqrdmladh", "vqrdmlah", "vqrdmlash",
+ "vqrdmlsdh", "vqrdmulh", "vqrshl", "vqrshrn", "vqrshrun",
+ "vqshl", "vqshrn", "vqshrun", "vqsub", "vrev16",
+ "vrev32", "vrev64", "vrhadd", "vrmlaldavh", "vrmlalvh",
+ "vrmlsldavh", "vrmulh", "vrshl", "vrshr", "vrshrn",
+ "vsbc", "vshl", "vshlc", "vshll", "vshr",
+ "vshrn", "vsli", "vsri", "vstrb", "vstrd",
+ "vstrw", "vsub"};
+
+ return std::any_of(
+ std::begin(predicable_prefixes), std::end(predicable_prefixes),
+ [&Mnemonic](const char *prefix) { return Mnemonic.startswith(prefix); });
}
diff --git a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index fa696d8952e4..ee81bfa65c6b 100644
--- a/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -19,12 +19,12 @@
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -71,7 +71,7 @@ namespace {
// is in the MCOperand format in which 1 means 'else' and 0 'then'.
void setITState(char Firstcond, char Mask) {
// (3 - the number of trailing zeros) is the number of then / else.
- unsigned NumTZ = countTrailingZeros<uint8_t>(Mask);
+ unsigned NumTZ = llvm::countr_zero<uint8_t>(Mask);
unsigned char CCBits = static_cast<unsigned char>(Firstcond & 0xf);
assert(NumTZ <= 3 && "Invalid IT mask!");
// push condition codes onto the stack the correct order for the pops
@@ -110,7 +110,7 @@ namespace {
void setVPTState(char Mask) {
// (3 - the number of trailing zeros) is the number of then / else.
- unsigned NumTZ = countTrailingZeros<uint8_t>(Mask);
+ unsigned NumTZ = llvm::countr_zero<uint8_t>(Mask);
assert(NumTZ <= 3 && "Invalid VPT mask!");
// push predicates onto the stack the correct order for the pops
for (unsigned Pos = NumTZ+1; Pos <= 3; ++Pos) {
@@ -135,7 +135,7 @@ public:
ARMDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
const MCInstrInfo *MCII)
: MCDisassembler(STI, Ctx), MCII(MCII) {
- InstructionEndianness = STI.getFeatureBits()[ARM::ModeBigEndianInstructions]
+ InstructionEndianness = STI.hasFeature(ARM::ModeBigEndianInstructions)
? llvm::support::big
: llvm::support::little;
}
@@ -746,7 +746,7 @@ uint64_t ARMDisassembler::suggestBytesToSkip(ArrayRef<uint8_t> Bytes,
// In Arm state, instructions are always 4 bytes wide, so there's no
// point in skipping any smaller number of bytes if an instruction
// can't be decoded.
- if (!STI.getFeatureBits()[ARM::ModeThumb])
+ if (!STI.hasFeature(ARM::ModeThumb))
return 4;
// In a Thumb instruction stream, a halfword is a standalone 2-byte
@@ -773,7 +773,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &CS) const {
- if (STI.getFeatureBits()[ARM::ModeThumb])
+ if (STI.hasFeature(ARM::ModeThumb))
return getThumbInstruction(MI, Size, Bytes, Address, CS);
return getARMInstruction(MI, Size, Bytes, Address, CS);
}
@@ -784,7 +784,7 @@ DecodeStatus ARMDisassembler::getARMInstruction(MCInst &MI, uint64_t &Size,
raw_ostream &CS) const {
CommentStream = &CS;
- assert(!STI.getFeatureBits()[ARM::ModeThumb] &&
+ assert(!STI.hasFeature(ARM::ModeThumb) &&
"Asked to disassemble an ARM instruction but Subtarget is in Thumb "
"mode!");
@@ -1070,7 +1070,7 @@ DecodeStatus ARMDisassembler::getThumbInstruction(MCInst &MI, uint64_t &Size,
raw_ostream &CS) const {
CommentStream = &CS;
- assert(STI.getFeatureBits()[ARM::ModeThumb] &&
+ assert(STI.hasFeature(ARM::ModeThumb) &&
"Asked to disassemble in Thumb mode but Subtarget is in ARM mode!");
// We want to read exactly 2 bytes of data.
@@ -4910,7 +4910,7 @@ static DecodeStatus DecodeT2SOImm(MCInst &Inst, unsigned Val, uint64_t Address,
} else {
unsigned unrot = fieldFromInstruction(Val, 0, 7) | 0x80;
unsigned rot = fieldFromInstruction(Val, 7, 5);
- unsigned imm = (unrot >> rot) | (unrot << ((32-rot)&31));
+ unsigned imm = llvm::rotr<uint32_t>(unrot, rot);
Inst.addOperand(MCOperand::createImm(imm));
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
index 8459b4ff2a14..163360c08ffb 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAddressingModes.h
@@ -81,20 +81,6 @@ namespace ARM_AM {
}
}
- /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits.
- ///
- inline unsigned rotr32(unsigned Val, unsigned Amt) {
- assert(Amt < 32 && "Invalid rotate amount");
- return (Val >> Amt) | (Val << ((32-Amt)&31));
- }
-
- /// rotl32 - Rotate a 32-bit unsigned value left by a specified # bits.
- ///
- inline unsigned rotl32(unsigned Val, unsigned Amt) {
- assert(Amt < 32 && "Invalid rotate amount");
- return (Val << Amt) | (Val >> ((32-Amt)&31));
- }
-
//===--------------------------------------------------------------------===//
// Addressing Mode #1: shift_operand with registers
//===--------------------------------------------------------------------===//
@@ -132,22 +118,22 @@ namespace ARM_AM {
if ((Imm & ~255U) == 0) return 0;
// Use CTZ to compute the rotate amount.
- unsigned TZ = countTrailingZeros(Imm);
+ unsigned TZ = llvm::countr_zero(Imm);
// Rotate amount must be even. Something like 0x200 must be rotated 8 bits,
// not 9.
unsigned RotAmt = TZ & ~1;
// If we can handle this spread, return it.
- if ((rotr32(Imm, RotAmt) & ~255U) == 0)
+ if ((llvm::rotr<uint32_t>(Imm, RotAmt) & ~255U) == 0)
return (32-RotAmt)&31; // HW rotates right, not left.
// For values like 0xF000000F, we should ignore the low 6 bits, then
// retry the hunt.
if (Imm & 63U) {
- unsigned TZ2 = countTrailingZeros(Imm & ~63U);
+ unsigned TZ2 = llvm::countr_zero(Imm & ~63U);
unsigned RotAmt2 = TZ2 & ~1;
- if ((rotr32(Imm, RotAmt2) & ~255U) == 0)
+ if ((llvm::rotr<uint32_t>(Imm, RotAmt2) & ~255U) == 0)
return (32-RotAmt2)&31; // HW rotates right, not left.
}
@@ -168,40 +154,40 @@ namespace ARM_AM {
unsigned RotAmt = getSOImmValRotate(Arg);
// If this cannot be handled with a single shifter_op, bail out.
- if (rotr32(~255U, RotAmt) & Arg)
+ if (llvm::rotr<uint32_t>(~255U, RotAmt) & Arg)
return -1;
// Encode this correctly.
- return rotl32(Arg, RotAmt) | ((RotAmt>>1) << 8);
+ return llvm::rotl<uint32_t>(Arg, RotAmt) | ((RotAmt >> 1) << 8);
}
/// isSOImmTwoPartVal - Return true if the specified value can be obtained by
/// or'ing together two SOImmVal's.
inline bool isSOImmTwoPartVal(unsigned V) {
// If this can be handled with a single shifter_op, bail out.
- V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ V = llvm::rotr<uint32_t>(~255U, getSOImmValRotate(V)) & V;
if (V == 0)
return false;
// If this can be handled with two shifter_op's, accept.
- V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ V = llvm::rotr<uint32_t>(~255U, getSOImmValRotate(V)) & V;
return V == 0;
}
/// getSOImmTwoPartFirst - If V is a value that satisfies isSOImmTwoPartVal,
/// return the first chunk of it.
inline unsigned getSOImmTwoPartFirst(unsigned V) {
- return rotr32(255U, getSOImmValRotate(V)) & V;
+ return llvm::rotr<uint32_t>(255U, getSOImmValRotate(V)) & V;
}
/// getSOImmTwoPartSecond - If V is a value that satisfies isSOImmTwoPartVal,
/// return the second chunk of it.
inline unsigned getSOImmTwoPartSecond(unsigned V) {
// Mask out the first hunk.
- V = rotr32(~255U, getSOImmValRotate(V)) & V;
+ V = llvm::rotr<uint32_t>(~255U, getSOImmValRotate(V)) & V;
// Take what's left.
- assert(V == (rotr32(255U, getSOImmValRotate(V)) & V));
+ assert(V == (llvm::rotr<uint32_t>(255U, getSOImmValRotate(V)) & V));
return V;
}
@@ -216,7 +202,7 @@ namespace ARM_AM {
// Return false if ~(-First) is not a SoImmval.
First = getSOImmTwoPartFirst(-V);
First = ~(-First);
- return !(rotr32(~255U, getSOImmValRotate(First)) & First);
+ return !(llvm::rotr<uint32_t>(~255U, getSOImmValRotate(First)) & First);
}
/// getThumbImmValShift - Try to handle Imm with a 8-bit immediate followed
@@ -227,7 +213,7 @@ namespace ARM_AM {
if ((Imm & ~255U) == 0) return 0;
// Use CTZ to compute the shift amount.
- return countTrailingZeros(Imm);
+ return llvm::countr_zero(Imm);
}
/// isThumbImmShiftedVal - Return true if the specified value can be obtained
@@ -246,7 +232,7 @@ namespace ARM_AM {
if ((Imm & ~65535U) == 0) return 0;
// Use CTZ to compute the shift amount.
- return countTrailingZeros(Imm);
+ return llvm::countr_zero(Imm);
}
/// isThumbImm16ShiftedVal - Return true if the specified value can be
@@ -302,13 +288,14 @@ namespace ARM_AM {
/// encoding is possible.
/// See ARM Reference Manual A6.3.2.
inline int getT2SOImmValRotateVal(unsigned V) {
- unsigned RotAmt = countLeadingZeros(V);
+ unsigned RotAmt = llvm::countl_zero(V);
if (RotAmt >= 24)
return -1;
// If 'Arg' can be handled with a single shifter_op return the value.
- if ((rotr32(0xff000000U, RotAmt) & V) == V)
- return (rotr32(V, 24 - RotAmt) & 0x7f) | ((RotAmt + 8) << 7);
+ if ((llvm::rotr<uint32_t>(0xff000000U, RotAmt) & V) == V)
+ return (llvm::rotr<uint32_t>(V, 24 - RotAmt) & 0x7f) |
+ ((RotAmt + 8) << 7);
return -1;
}
@@ -334,7 +321,7 @@ namespace ARM_AM {
inline unsigned getT2SOImmValRotate(unsigned V) {
if ((V & ~255U) == 0) return 0;
// Use CTZ to compute the rotate amount.
- unsigned RotAmt = countTrailingZeros(V);
+ unsigned RotAmt = llvm::countr_zero(V);
return (32 - RotAmt) & 31;
}
@@ -345,7 +332,7 @@ namespace ARM_AM {
// out. Those should be handled directly, not with a two-part val.
if (getT2SOImmValSplatVal(V) != -1)
return false;
- V = rotr32 (~255U, getT2SOImmValRotate(V)) & V;
+ V = llvm::rotr<uint32_t>(~255U, getT2SOImmValRotate(V)) & V;
if (V == 0)
return false;
@@ -369,7 +356,7 @@ namespace ARM_AM {
assert (isT2SOImmTwoPartVal(Imm) &&
"Immedate cannot be encoded as two part immediate!");
// Try a shifter operand as one part
- unsigned V = rotr32 (~255, getT2SOImmValRotate(Imm)) & Imm;
+ unsigned V = llvm::rotr<uint32_t>(~255, getT2SOImmValRotate(Imm)) & Imm;
// If the rest is encodable as an immediate, then return it.
if (getT2SOImmVal(V) != -1) return V;
@@ -759,4 +746,3 @@ namespace ARM_AM {
} // end namespace llvm
#endif
-
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index dfa305a3b968..701691804620 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -17,6 +17,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDirectives.h"
@@ -29,12 +30,10 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/MC/MCAsmLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -115,6 +114,10 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_arm_movw_lo16", 0, 20, 0},
{"fixup_t2_movt_hi16", 0, 20, 0},
{"fixup_t2_movw_lo16", 0, 20, 0},
+ {"fixup_arm_thumb_upper_8_15", 0, 8, 0},
+ {"fixup_arm_thumb_upper_0_7", 0, 8, 0},
+ {"fixup_arm_thumb_lower_8_15", 0, 8, 0},
+ {"fixup_arm_thumb_lower_0_7", 0, 8, 0},
{"fixup_arm_mod_imm", 0, 12, 0},
{"fixup_t2_so_imm", 0, 26, 0},
{"fixup_bf_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
@@ -169,6 +172,10 @@ const MCFixupKindInfo &ARMAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
{"fixup_arm_movw_lo16", 12, 20, 0},
{"fixup_t2_movt_hi16", 12, 20, 0},
{"fixup_t2_movw_lo16", 12, 20, 0},
+ {"fixup_arm_thumb_upper_8_15", 24, 8, 0},
+ {"fixup_arm_thumb_upper_0_7", 24, 8, 0},
+ {"fixup_arm_thumb_lower_8_15", 24, 8, 0},
+ {"fixup_arm_thumb_lower_0_7", 24, 8, 0},
{"fixup_arm_mod_imm", 20, 12, 0},
{"fixup_t2_so_imm", 26, 6, 0},
{"fixup_bf_branch", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
@@ -208,8 +215,8 @@ void ARMAsmBackend::handleAssemblerFlag(MCAssemblerFlag Flag) {
unsigned ARMAsmBackend::getRelaxedOpcode(unsigned Op,
const MCSubtargetInfo &STI) const {
- bool HasThumb2 = STI.getFeatureBits()[ARM::FeatureThumb2];
- bool HasV8MBaselineOps = STI.getFeatureBits()[ARM::HasV8MBaselineOps];
+ bool HasThumb2 = STI.hasFeature(ARM::FeatureThumb2);
+ bool HasV8MBaselineOps = STI.hasFeature(ARM::HasV8MBaselineOps);
switch (Op) {
default:
@@ -449,7 +456,6 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
switch (Kind) {
default:
- Ctx.reportError(Fixup.getLoc(), "bad relocation fixup type");
return 0;
case FK_Data_1:
case FK_Data_2:
@@ -489,6 +495,20 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
return swapHalfWords(Value, Endian == support::little);
}
+ case ARM::fixup_arm_thumb_upper_8_15:
+ if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
+ return (Value & 0xff000000) >> 24;
+ return Value & 0xff;
+ case ARM::fixup_arm_thumb_upper_0_7:
+ if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
+ return (Value & 0x00ff0000) >> 16;
+ return Value & 0xff;
+ case ARM::fixup_arm_thumb_lower_8_15:
+ if (IsResolved || !STI->getTargetTriple().isOSBinFormatELF())
+ return (Value & 0x0000ff00) >> 8;
+ return Value & 0xff;
+ case ARM::fixup_arm_thumb_lower_0_7:
+ return Value & 0x000000ff;
case ARM::fixup_arm_ldst_pcrel_12:
// ARM PC-relative values are offset by 8.
Value -= 4;
@@ -604,9 +624,9 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
}
case ARM::fixup_arm_thumb_bl: {
if (!isInt<25>(Value - 4) ||
- (!STI->getFeatureBits()[ARM::FeatureThumb2] &&
- !STI->getFeatureBits()[ARM::HasV8MBaselineOps] &&
- !STI->getFeatureBits()[ARM::HasV6MOps] &&
+ (!STI->hasFeature(ARM::FeatureThumb2) &&
+ !STI->hasFeature(ARM::HasV8MBaselineOps) &&
+ !STI->hasFeature(ARM::HasV6MOps) &&
!isInt<23>(Value - 4))) {
Ctx.reportError(Fixup.getLoc(), "Relocation out of range");
return 0;
@@ -679,7 +699,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
// On CPUs supporting Thumb2, this will be relaxed to an ldr.w, otherwise we
// could have an error on our hands.
assert(STI != nullptr);
- if (!STI->getFeatureBits()[ARM::FeatureThumb2] && IsResolved) {
+ if (!STI->hasFeature(ARM::FeatureThumb2) && IsResolved) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
@@ -704,8 +724,8 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
case ARM::fixup_arm_thumb_br:
// Offset by 4 and don't encode the lower bit, which is always 0.
assert(STI != nullptr);
- if (!STI->getFeatureBits()[ARM::FeatureThumb2] &&
- !STI->getFeatureBits()[ARM::HasV8MBaselineOps]) {
+ if (!STI->hasFeature(ARM::FeatureThumb2) &&
+ !STI->hasFeature(ARM::HasV8MBaselineOps)) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
@@ -716,7 +736,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCAssembler &Asm,
case ARM::fixup_arm_thumb_bcc:
// Offset by 4 and don't encode the lower bit, which is always 0.
assert(STI != nullptr);
- if (!STI->getFeatureBits()[ARM::FeatureThumb2]) {
+ if (!STI->hasFeature(ARM::FeatureThumb2)) {
const char *FixupDiagnostic = reasonForFixupRelaxation(Fixup, Value);
if (FixupDiagnostic) {
Ctx.reportError(Fixup.getLoc(), FixupDiagnostic);
@@ -933,6 +953,10 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
case ARM::fixup_arm_thumb_bcc:
case ARM::fixup_arm_thumb_cp:
case ARM::fixup_thumb_adr_pcrel_10:
+ case ARM::fixup_arm_thumb_upper_8_15:
+ case ARM::fixup_arm_thumb_upper_0_7:
+ case ARM::fixup_arm_thumb_lower_8_15:
+ case ARM::fixup_arm_thumb_lower_0_7:
return 1;
case FK_Data_2:
@@ -1003,6 +1027,10 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
case ARM::fixup_thumb_adr_pcrel_10:
case ARM::fixup_arm_thumb_br:
case ARM::fixup_arm_thumb_cb:
+ case ARM::fixup_arm_thumb_upper_8_15:
+ case ARM::fixup_arm_thumb_upper_0_7:
+ case ARM::fixup_arm_thumb_lower_8_15:
+ case ARM::fixup_arm_thumb_lower_0_7:
// Instruction size is 2 bytes.
return 2;
@@ -1109,14 +1137,19 @@ enum CompactUnwindEncodings {
/// encoded in compact unwind, the method returns UNWIND_ARM_MODE_DWARF which
/// tells the runtime to fallback and unwind using dwarf.
uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
- ArrayRef<MCCFIInstruction> Instrs) const {
+ const MCDwarfFrameInfo *FI, const MCContext *Ctxt) const {
DEBUG_WITH_TYPE("compact-unwind", llvm::dbgs() << "generateCU()\n");
// Only armv7k uses CFI based unwinding.
if (Subtype != MachO::CPU_SUBTYPE_ARM_V7K)
return 0;
// No .cfi directives means no frame.
+ ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
if (Instrs.empty())
return 0;
+ if (!isDarwinCanonicalPersonality(FI->Personality) &&
+ !Ctxt->emitCompactUnwindNonCanonical())
+ return CU::UNWIND_ARM_MODE_DWARF;
+
// Start off assuming CFA is at SP+0.
unsigned CFARegister = ARM::SP;
int CFARegisterOffset = 0;
@@ -1158,7 +1191,7 @@ uint32_t ARMAsmBackendDarwin::generateCompactUnwindEncoding(
// Directive not convertable to compact unwind, bail out.
DEBUG_WITH_TYPE("compact-unwind",
llvm::dbgs()
- << "CFI directive not compatiable with comact "
+ << "CFI directive not compatible with compact "
"unwind encoding, opcode=" << Inst.getOperation()
<< "\n");
return CU::UNWIND_ARM_MODE_DWARF;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
index a952823d1aad..64c78d352895 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.h
@@ -28,7 +28,7 @@ public:
}
bool hasNOP(const MCSubtargetInfo *STI) const {
- return STI->getFeatureBits()[ARM::HasV6T2Ops];
+ return STI->hasFeature(ARM::HasV6T2Ops);
}
std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
index 85013b5f099a..ace573c8fa96 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackendDarwin.h
@@ -11,6 +11,7 @@
#include "ARMAsmBackend.h"
#include "llvm/BinaryFormat/MachO.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCObjectWriter.h"
namespace llvm {
@@ -32,8 +33,8 @@ public:
/*Is64Bit=*/false, cantFail(MachO::getCPUType(TT)), Subtype);
}
- uint32_t generateCompactUnwindEncoding(
- ArrayRef<MCCFIInstruction> Instrs) const override;
+ uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
+ const MCContext *Ctxt) const override;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
index f8de0320166a..1e87085d7bf0 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
@@ -255,7 +255,7 @@ namespace ARMII {
/// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects
/// just that part of the flag set.
- MO_OPTION_MASK = 0x3,
+ MO_OPTION_MASK = 0xf03,
/// MO_COFFSTUB - On a symbol operand "FOO", this indicates that the
/// reference is actually to the ".refptr.FOO" symbol. This is used for
@@ -287,11 +287,27 @@ namespace ARMII {
/// example).
MO_NONLAZY = 0x80,
- // It's undefined behaviour if an enum overflows the range between its
- // smallest and largest values, but since these are |ed together, it can
- // happen. Put a sentinel in (values of this enum are stored as "unsigned
- // char").
- MO_UNUSED_MAXIMUM = 0xff
+ /// MO_LO_0_7 - On a symbol operand, this represents a relocation containing
+ /// bits 0 through 7 of the address. Used only with Thumb1 MOV and ADD
+ // instructions.
+ MO_LO_0_7 = 0x100,
+
+ /// MO_LO_8_15 - On a symbol operand, this represents a relocation
+ /// containing
+ /// bits 8 through 15 of the address. Used only with Thumb1 MOV and ADD
+ // instructions.
+ MO_LO_8_15 = 0x200,
+
+ /// MO_HI_0_7 - On a symbol operand, this represents a relocation containing
+ /// bits 16 through 23 of the address. Used only with Thumb1 MOV and ADD
+ // instructions.
+ MO_HI_0_7 = 0x400,
+
+ /// MO_HI_8_15 - On a symbol operand, this represents a relocation
+ /// containing
+ /// bits 24 through 31 of the address. Used only with Thumb1 MOV and ADD
+ // instructions.
+ MO_HI_8_15 = 0x800
};
enum {
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index df8f54d14a86..caebace2eb78 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -87,7 +87,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
if (IsPCRel) {
switch (Fixup.getTargetKind()) {
default:
- Ctx.reportError(Fixup.getLoc(), "unsupported relocation on symbol");
+ Ctx.reportError(Fixup.getLoc(), "unsupported relocation type");
return ELF::R_ARM_NONE;
case FK_Data_4:
switch (Modifier) {
@@ -137,6 +137,14 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
return ELF::R_ARM_THM_MOVT_PREL;
case ARM::fixup_t2_movw_lo16:
return ELF::R_ARM_THM_MOVW_PREL_NC;
+ case ARM::fixup_arm_thumb_upper_8_15:
+ return ELF::R_ARM_THM_ALU_ABS_G3;
+ case ARM::fixup_arm_thumb_upper_0_7:
+ return ELF::R_ARM_THM_ALU_ABS_G2_NC;
+ case ARM::fixup_arm_thumb_lower_8_15:
+ return ELF::R_ARM_THM_ALU_ABS_G1_NC;
+ case ARM::fixup_arm_thumb_lower_0_7:
+ return ELF::R_ARM_THM_ALU_ABS_G0_NC;
case ARM::fixup_arm_thumb_br:
return ELF::R_ARM_THM_JUMP11;
case ARM::fixup_arm_thumb_bcc:
@@ -159,7 +167,7 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
}
switch (Kind) {
default:
- Ctx.reportError(Fixup.getLoc(), "unsupported relocation on symbol");
+ Ctx.reportError(Fixup.getLoc(), "unsupported relocation type");
return ELF::R_ARM_NONE;
case FK_Data_1:
switch (Modifier) {
@@ -265,6 +273,15 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_ARM_SBREL:
return ELF::R_ARM_THM_MOVW_BREL_NC;
}
+
+ case ARM::fixup_arm_thumb_upper_8_15:
+ return ELF::R_ARM_THM_ALU_ABS_G3;
+ case ARM::fixup_arm_thumb_upper_0_7:
+ return ELF::R_ARM_THM_ALU_ABS_G2_NC;
+ case ARM::fixup_arm_thumb_lower_8_15:
+ return ELF::R_ARM_THM_ALU_ABS_G1_NC;
+ case ARM::fixup_arm_thumb_lower_0_7:
+ return ELF::R_ARM_THM_ALU_ABS_G0_NC;
}
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6eeec84b7e26..25bbc4ee7eb5 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -46,7 +46,6 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
@@ -94,7 +93,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer {
void emitArch(ARM::ArchKind Arch) override;
void emitArchExtension(uint64_t ArchExt) override;
void emitObjectArch(ARM::ArchKind Arch) override;
- void emitFPU(unsigned FPU) override;
+ void emitFPU(ARM::FPUKind FPU) override;
void emitInst(uint32_t Inst, char Suffix = '\0') override;
void finishAttributeSection() override;
@@ -249,7 +248,7 @@ void ARMTargetAsmStreamer::emitObjectArch(ARM::ArchKind Arch) {
OS << "\t.object_arch\t" << ARM::getArchName(Arch) << '\n';
}
-void ARMTargetAsmStreamer::emitFPU(unsigned FPU) {
+void ARMTargetAsmStreamer::emitFPU(ARM::FPUKind FPU) {
OS << "\t.fpu\t" << ARM::getFPUName(FPU) << "\n";
}
@@ -383,7 +382,7 @@ void ARMTargetAsmStreamer::emitARMWinCFICustom(unsigned Opcode) {
class ARMTargetELFStreamer : public ARMTargetStreamer {
private:
StringRef CurrentVendor;
- unsigned FPU = ARM::FK_INVALID;
+ ARM::FPUKind FPU = ARM::FK_INVALID;
ARM::ArchKind Arch = ARM::ArchKind::INVALID;
ARM::ArchKind EmittedArch = ARM::ArchKind::INVALID;
@@ -415,7 +414,7 @@ private:
StringRef StringValue) override;
void emitArch(ARM::ArchKind Arch) override;
void emitObjectArch(ARM::ArchKind Arch) override;
- void emitFPU(unsigned FPU) override;
+ void emitFPU(ARM::FPUKind FPU) override;
void emitInst(uint32_t Inst, char Suffix = '\0') override;
void finishAttributeSection() override;
void emitLabel(MCSymbol *Symbol) override;
@@ -929,9 +928,7 @@ void ARMTargetELFStreamer::emitArchDefaultAttributes() {
}
}
-void ARMTargetELFStreamer::emitFPU(unsigned Value) {
- FPU = Value;
-}
+void ARMTargetELFStreamer::emitFPU(ARM::FPUKind Value) { FPU = Value; }
void ARMTargetELFStreamer::emitFPUDefaultAttributes() {
ARMELFStreamer &S = getStreamer();
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
index 53258a88c7fa..3bcea577b9b6 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMFixupKinds.h
@@ -99,6 +99,12 @@ enum Fixups {
fixup_t2_movt_hi16, // :upper16:
fixup_t2_movw_lo16, // :lower16:
+ // Fixup for Thumb movs (enc T1) and adds (enc T2) 8-bit immediate field (7-0)
+ fixup_arm_thumb_upper_8_15, // :upper8_15:
+ fixup_arm_thumb_upper_0_7, // :upper0_7:
+ fixup_arm_thumb_lower_8_15, // :lower8_15:
+ fixup_arm_thumb_lower_0_7, // :lower0_7:
+
// Fixup for mod_imm
fixup_arm_mod_imm,
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
index 9f275145adfd..4f5c067abb0c 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMInstPrinter.cpp
@@ -21,11 +21,11 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -739,7 +739,7 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand(const MCInst *MI,
raw_ostream &O) {
const MCOperand &MO = MI->getOperand(OpNum);
uint32_t v = ~MO.getImm();
- int32_t lsb = countTrailingZeros(v);
+ int32_t lsb = llvm::countr_zero(v);
int32_t width = llvm::bit_width(v) - lsb;
assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
O << markup("<imm:") << '#' << lsb << markup(">") << ", " << markup("<imm:")
@@ -750,7 +750,7 @@ void ARMInstPrinter::printMemBOption(const MCInst *MI, unsigned OpNum,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned val = MI->getOperand(OpNum).getImm();
- O << ARM_MB::MemBOptToString(val, STI.getFeatureBits()[ARM::HasV8Ops]);
+ O << ARM_MB::MemBOptToString(val, STI.hasFeature(ARM::HasV8Ops));
}
void ARMInstPrinter::printInstSyncBOption(const MCInst *MI, unsigned OpNum,
@@ -1073,7 +1073,7 @@ void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
// (3 - the number of trailing zeros) is the number of then / else.
unsigned Mask = MI->getOperand(OpNum).getImm();
- unsigned NumTZ = countTrailingZeros(Mask);
+ unsigned NumTZ = llvm::countr_zero(Mask);
assert(NumTZ <= 3 && "Invalid IT mask!");
for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
if ((Mask >> Pos) & 1)
@@ -1386,7 +1386,7 @@ void ARMInstPrinter::printModImmOperand(const MCInst *MI, unsigned OpNum,
break;
}
- int32_t Rotated = ARM_AM::rotr32(Bits, Rot);
+ int32_t Rotated = llvm::rotr<uint32_t>(Bits, Rot);
if (ARM_AM::getSOImmVal(Rotated) == Op.getImm()) {
// #rot has the least possible value
O << "#" << markup("<imm:");
@@ -1657,7 +1657,7 @@ void ARMInstPrinter::printVPTMask(const MCInst *MI, unsigned OpNum,
raw_ostream &O) {
// (3 - the number of trailing zeroes) is the number of them / else.
unsigned Mask = MI->getOperand(OpNum).getImm();
- unsigned NumTZ = countTrailingZeros(Mask);
+ unsigned NumTZ = llvm::countr_zero(Mask);
assert(NumTZ <= 3 && "Invalid VPT mask!");
for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) {
bool T = ((Mask >> Pos) & 1) == 0;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index febd8ab8bbc0..46b4750e8999 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "ARMMCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index bafc0f853756..dae323ec24fb 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -33,6 +32,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -61,11 +61,11 @@ public:
~ARMMCCodeEmitter() override = default;
bool isThumb(const MCSubtargetInfo &STI) const {
- return STI.getFeatureBits()[ARM::ModeThumb];
+ return STI.hasFeature(ARM::ModeThumb);
}
bool isThumb2(const MCSubtargetInfo &STI) const {
- return isThumb(STI) && STI.getFeatureBits()[ARM::FeatureThumb2];
+ return isThumb(STI) && STI.hasFeature(ARM::FeatureThumb2);
}
bool isTargetMachO(const MCSubtargetInfo &STI) const {
@@ -87,12 +87,13 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- /// getHiLo16ImmOpValue - Return the encoding for the hi / low 16-bit of
- /// the specified operand. This is used for operands with :lower16: and
- /// :upper16: prefixes.
- uint32_t getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ /// getHiLoImmOpValue - Return the encoding for either the hi / low 16-bit, or
+ /// high/middle-high/middle-low/low 8 bits of the specified operand. This is
+ /// used for operands with :lower16:, :upper16: :lower0_7:, :lower8_15:,
+ /// :higher0_7:, and :higher8_15: prefixes.
+ uint32_t getHiLoImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
bool EncodeAddrModeOpValues(const MCInst &MI, unsigned OpIdx,
unsigned &Reg, unsigned &Imm,
@@ -562,7 +563,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
// the encodings all refer to Q-registers by their literal
// register number.
- if (STI.getFeatureBits()[ARM::HasMVEIntegerOps])
+ if (STI.hasFeature(ARM::HasMVEIntegerOps))
return RegNo;
switch (Reg) {
@@ -1189,18 +1190,18 @@ getT2AddrModeImm0_1020s4OpValue(const MCInst &MI, unsigned OpIdx,
return (Reg << 8) | Imm8;
}
-uint32_t
-ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+uint32_t ARMMCCodeEmitter::getHiLoImmOpValue(const MCInst &MI, unsigned OpIdx,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
// {20-16} = imm{15-12}
// {11-0} = imm{11-0}
const MCOperand &MO = MI.getOperand(OpIdx);
if (MO.isImm())
- // Hi / lo 16 bits already extracted during earlier passes.
+ // Hi / lo bits already extracted during earlier passes.
return static_cast<unsigned>(MO.getImm());
- // Handle :upper16: and :lower16: assembly prefixes.
+ // Handle :upper16:, :lower16:, :upper8_15:, :upper0_7:, :lower8_15:
+ // :lower0_7: assembly prefixes.
const MCExpr *E = MO.getExpr();
MCFixupKind Kind;
if (E->getKind() == MCExpr::Target) {
@@ -1217,6 +1218,16 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
return (int32_t(Value) & 0xffff0000) >> 16;
case ARMMCExpr::VK_ARM_LO16:
return (int32_t(Value) & 0x0000ffff);
+
+ case ARMMCExpr::VK_ARM_HI_8_15:
+ return (int32_t(Value) & 0xff000000) >> 24;
+ case ARMMCExpr::VK_ARM_HI_0_7:
+ return (int32_t(Value) & 0x00ff0000) >> 16;
+ case ARMMCExpr::VK_ARM_LO_8_15:
+ return (int32_t(Value) & 0x0000ff00) >> 8;
+ case ARMMCExpr::VK_ARM_LO_0_7:
+ return (int32_t(Value) & 0x000000ff);
+
default: llvm_unreachable("Unsupported ARMFixup");
}
}
@@ -1231,18 +1242,39 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
Kind = MCFixupKind(isThumb(STI) ? ARM::fixup_t2_movw_lo16
: ARM::fixup_arm_movw_lo16);
break;
+ case ARMMCExpr::VK_ARM_HI_8_15:
+ if (!isThumb(STI))
+ llvm_unreachable(":upper_8_15: not supported in Arm state");
+ Kind = MCFixupKind(ARM::fixup_arm_thumb_upper_8_15);
+ break;
+ case ARMMCExpr::VK_ARM_HI_0_7:
+ if (!isThumb(STI))
+ llvm_unreachable(":upper_0_7: not supported in Arm state");
+ Kind = MCFixupKind(ARM::fixup_arm_thumb_upper_0_7);
+ break;
+ case ARMMCExpr::VK_ARM_LO_8_15:
+ if (!isThumb(STI))
+ llvm_unreachable(":lower_8_15: not supported in Arm state");
+ Kind = MCFixupKind(ARM::fixup_arm_thumb_lower_8_15);
+ break;
+ case ARMMCExpr::VK_ARM_LO_0_7:
+ if (!isThumb(STI))
+ llvm_unreachable(":lower_0_7: not supported in Arm state");
+ Kind = MCFixupKind(ARM::fixup_arm_thumb_lower_0_7);
+ break;
}
Fixups.push_back(MCFixup::create(0, E, Kind, MI.getLoc()));
return 0;
}
- // If the expression doesn't have :upper16: or :lower16: on it,
- // it's just a plain immediate expression, previously those evaluated to
- // the lower 16 bits of the expression regardless of whether
- // we have a movt or a movw, but that led to misleadingly results.
- // This is disallowed in the AsmParser in validateInstruction()
- // so this should never happen.
- llvm_unreachable("expression without :upper16: or :lower16:");
+ // If the expression doesn't have :upper16:, :lower16: on it, it's just a
+ // plain immediate expression, previously those evaluated to the lower 16 bits
+ // of the expression regardless of whether we have a movt or a movw, but that
+ // led to misleadingly results. This is disallowed in the AsmParser in
+ // validateInstruction() so this should never happen. The same holds for
+ // thumb1 :upper8_15:, :upper0_7:, lower8_15: or :lower0_7: with movs or adds.
+ llvm_unreachable("expression without :upper16:, :lower16:, :upper8_15:,"
+ ":upper0_7:, lower8_15: or :lower0_7:");
}
uint32_t ARMMCCodeEmitter::
@@ -1700,8 +1732,8 @@ getBitfieldInvertedMaskOpValue(const MCInst &MI, unsigned Op,
// msb of the mask.
const MCOperand &MO = MI.getOperand(Op);
uint32_t v = ~MO.getImm();
- uint32_t lsb = countTrailingZeros(v);
- uint32_t msb = (32 - countLeadingZeros (v)) - 1;
+ uint32_t lsb = llvm::countr_zero(v);
+ uint32_t msb = llvm::Log2_32(v);
assert(v != 0 && lsb < 32 && msb < 32 && "Illegal bitfield mask!");
return lsb | (msb << 5);
}
@@ -1988,7 +2020,7 @@ getPowerTwoOpValue(const MCInst &MI, unsigned OpIdx,
const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpIdx);
assert(MO.isImm() && "Unexpected operand type!");
- return countTrailingZeros((uint64_t)MO.getImm());
+ return llvm::countr_zero((uint64_t)MO.getImm());
}
template <unsigned start>
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
index fbad05fb1759..6be308f4e712 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp
@@ -22,8 +22,24 @@ ARMMCExpr::create(VariantKind Kind, const MCExpr *Expr,
void ARMMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
switch (Kind) {
default: llvm_unreachable("Invalid kind!");
- case VK_ARM_HI16: OS << ":upper16:"; break;
- case VK_ARM_LO16: OS << ":lower16:"; break;
+ case VK_ARM_HI16:
+ OS << ":upper16:";
+ break;
+ case VK_ARM_LO16:
+ OS << ":lower16:";
+ break;
+ case VK_ARM_HI_8_15:
+ OS << ":upper8_15:";
+ break;
+ case VK_ARM_HI_0_7:
+ OS << ":upper0_7:";
+ break;
+ case VK_ARM_LO_8_15:
+ OS << ":lower8_15:";
+ break;
+ case VK_ARM_LO_0_7:
+ OS << ":lower0_7:";
+ break;
}
const MCExpr *Expr = getSubExpr();
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
index 033a43288f3e..edeff9c4ced2 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h
@@ -17,8 +17,17 @@ class ARMMCExpr : public MCTargetExpr {
public:
enum VariantKind {
VK_ARM_None,
- VK_ARM_HI16, // The R_ARM_MOVT_ABS relocation (:upper16: in the .s file)
- VK_ARM_LO16 // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the .s file)
+ VK_ARM_HI16, // The R_ARM_MOVT_ABS relocation (:upper16: in the .s file)
+ VK_ARM_LO16, // The R_ARM_MOVW_ABS_NC relocation (:lower16: in the .s file)
+
+ VK_ARM_HI_8_15, // The R_ARM_THM_ALU_ABS_G3 relocation (:upper8_15: in
+ // the .s file)
+ VK_ARM_HI_0_7, // The R_ARM_THM_ALU_ABS_G2_NC relocation (:upper0_8: in the
+ // .s file)
+ VK_ARM_LO_8_15, // The R_ARM_THM_ALU_ABS_G1_NC relocation (:lower8_15: in
+ // the .s file)
+ VK_ARM_LO_0_7, // The R_ARM_THM_ALU_ABS_G0_NC relocation (:lower0_7: in the
+ // .s file)
};
private:
@@ -43,6 +52,22 @@ public:
return create(VK_ARM_LO16, Expr, Ctx);
}
+ static const ARMMCExpr *createUpper8_15(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_ARM_HI_8_15, Expr, Ctx);
+ }
+
+ static const ARMMCExpr *createUpper0_7(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_ARM_HI_0_7, Expr, Ctx);
+ }
+
+ static const ARMMCExpr *createLower8_15(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_ARM_LO_8_15, Expr, Ctx);
+ }
+
+ static const ARMMCExpr *createLower0_7(const MCExpr *Expr, MCContext &Ctx) {
+ return create(VK_ARM_LO_0_7, Expr, Ctx);
+ }
+
/// @}
/// @name Accessors
/// @{
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index 48ad7f3a2b28..8d9959a9457d 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -16,7 +16,6 @@
#include "ARMInstPrinter.h"
#include "ARMMCAsmInfo.h"
#include "TargetInfo/ARMTargetInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -29,7 +28,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
@@ -38,7 +37,7 @@ using namespace llvm;
static bool getMCRDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
- if (STI.getFeatureBits()[llvm::ARM::HasV7Ops] &&
+ if (STI.hasFeature(llvm::ARM::HasV7Ops) &&
(MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 15) &&
(MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) &&
// Checks for the deprecated CP15ISB encoding:
@@ -65,7 +64,7 @@ static bool getMCRDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
return true;
}
}
- if (STI.getFeatureBits()[llvm::ARM::HasV7Ops] &&
+ if (STI.hasFeature(llvm::ARM::HasV7Ops) &&
((MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 10) ||
(MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 11))) {
Info = "since v7, cp10 and cp11 are reserved for advanced SIMD or floating "
@@ -77,7 +76,7 @@ static bool getMCRDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
static bool getMRCDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
- if (STI.getFeatureBits()[llvm::ARM::HasV7Ops] &&
+ if (STI.hasFeature(llvm::ARM::HasV7Ops) &&
((MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 10) ||
(MI.getOperand(0).isImm() && MI.getOperand(0).getImm() == 11))) {
Info = "since v7, cp10 and cp11 are reserved for advanced SIMD or floating "
@@ -89,7 +88,7 @@ static bool getMRCDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
static bool getARMStoreDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
- assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
+ assert(!STI.hasFeature(llvm::ARM::ModeThumb) &&
"cannot predicate thumb instructions");
assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments");
@@ -105,7 +104,7 @@ static bool getARMStoreDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
static bool getARMLoadDeprecationInfo(MCInst &MI, const MCSubtargetInfo &STI,
std::string &Info) {
- assert(!STI.getFeatureBits()[llvm::ARM::ModeThumb] &&
+ assert(!STI.hasFeature(llvm::ARM::ModeThumb) &&
"cannot predicate thumb instructions");
assert(MI.getNumOperands() >= 4 && "expected >= 4 arguments");
@@ -598,7 +597,7 @@ std::optional<uint64_t> ARMMCInstrAnalysis::evaluateMemoryOperandAddress(
// VLDR* instructions share the same opcode (and thus the same form) for Arm
// and Thumb. Use a bit longer route through STI in that case.
case ARMII::VFPLdStFrm:
- Addr += STI->getFeatureBits()[ARM::ModeThumb] ? 4 : 8;
+ Addr += STI->hasFeature(ARM::ModeThumb) ? 4 : 8;
break;
}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index b26773099a88..4a41fce711f3 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -79,7 +79,7 @@ static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
return true;
case FK_Data_8:
Log2Size = llvm::Log2_32(8);
- return true;
+ return false;
// These fixups are expected to always be resolvable at assembly time and
// have no relocations supported.
@@ -386,8 +386,7 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer,
// relocation type for the fixup kind. This happens when it's a fixup that's
// expected to always be resolvable at assembly time and not have any
// relocations needed.
- Asm.getContext().reportError(Fixup.getLoc(),
- "unsupported relocation on symbol");
+ Asm.getContext().reportError(Fixup.getLoc(), "unsupported relocation type");
return;
}
@@ -428,8 +427,10 @@ void ARMMachObjectWriter::recordRelocation(MachObjectWriter *Writer,
unsigned Type = 0;
const MCSymbol *RelSymbol = nullptr;
- if (Target.isAbsolute()) { // constant
- // FIXME!
+ if (!A) { // constant
+ // FIXME! This is Target.isAbsolute() case as we check SymB above. We check
+ // !A to ensure that null pointer isn't dereferenced and suppress static
+ // analyzer warnings.
report_fatal_error("FIXME: relocations to absolute targets "
"not yet implemented");
} else {
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index 16d1ae62053e..b65d1b24e63d 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -18,7 +18,6 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ARMBuildAttributes.h"
-#include "llvm/Support/TargetParser.h"
using namespace llvm;
@@ -112,7 +111,7 @@ void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute,
void ARMTargetStreamer::emitArch(ARM::ArchKind Arch) {}
void ARMTargetStreamer::emitArchExtension(uint64_t ArchExt) {}
void ARMTargetStreamer::emitObjectArch(ARM::ArchKind Arch) {}
-void ARMTargetStreamer::emitFPU(unsigned FPU) {}
+void ARMTargetStreamer::emitFPU(ARM::FPUKind FPU) {}
void ARMTargetStreamer::finishAttributeSection() {}
void ARMTargetStreamer::annotateTLSDescriptorSequence(
const MCSymbolRefExpr *SRE) {}
diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
index 4686bb0b4509..62404f7add48 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp
@@ -77,7 +77,7 @@ void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) {
// Compute the consecutive registers from r4 to r11.
uint32_t Mask = RegSave & 0xff0u;
- uint32_t Range = countTrailingOnes(Mask >> 5); // Exclude r4.
+ uint32_t Range = llvm::countr_one(Mask >> 5); // Exclude r4.
// Mask off non-consecutive registers. Keep r4.
Mask &= ~(0xffffffe0u << Range);
@@ -111,7 +111,7 @@ void UnwindOpcodeAssembler::EmitVFPRegSave(uint32_t VFPRegSave) {
while (Regs) {
// Now look for a run of set bits. Remember the MSB and LSB of the run.
auto RangeMSB = llvm::bit_width(Regs);
- auto RangeLen = countLeadingOnes(Regs << (32 - RangeMSB));
+ auto RangeLen = llvm::countl_one(Regs << (32 - RangeMSB));
auto RangeLSB = RangeMSB - RangeLen;
int Opcode = RangeLSB >= 16
diff --git a/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp b/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp
index 34f9ea107e16..5ac79cbfe6d9 100644
--- a/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp
+++ b/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp
@@ -154,7 +154,6 @@ static bool isProfitableToInterleave(SmallSetVector<Instruction *, 4> &Exts,
static bool tryInterleave(Instruction *Start,
SmallPtrSetImpl<Instruction *> &Visited) {
LLVM_DEBUG(dbgs() << "tryInterleave from " << *Start << "\n");
- auto *VT = cast<FixedVectorType>(Start->getType());
if (!isa<Instruction>(Start->getOperand(0)))
return false;
@@ -165,6 +164,7 @@ static bool tryInterleave(Instruction *Start,
Worklist.push_back(cast<Instruction>(Start->getOperand(0)));
SmallSetVector<Instruction *, 4> Truncs;
+ SmallSetVector<Instruction *, 4> Reducts;
SmallSetVector<Instruction *, 4> Exts;
SmallSetVector<Use *, 4> OtherLeafs;
SmallSetVector<Instruction *, 4> Ops;
@@ -198,6 +198,13 @@ static bool tryInterleave(Instruction *Start,
if (!II)
return false;
+ if (II->getIntrinsicID() == Intrinsic::vector_reduce_add) {
+ if (!Reducts.insert(I))
+ continue;
+ Visited.insert(I);
+ break;
+ }
+
switch (II->getIntrinsicID()) {
case Intrinsic::abs:
case Intrinsic::smin:
@@ -267,21 +274,32 @@ static bool tryInterleave(Instruction *Start,
return false;
LLVM_DEBUG({
- dbgs() << "Found group:\n Exts:";
+ dbgs() << "Found group:\n Exts:\n";
for (auto *I : Exts)
dbgs() << " " << *I << "\n";
- dbgs() << " Ops:";
+ dbgs() << " Ops:\n";
for (auto *I : Ops)
dbgs() << " " << *I << "\n";
- dbgs() << " OtherLeafs:";
+ dbgs() << " OtherLeafs:\n";
for (auto *I : OtherLeafs)
dbgs() << " " << *I->get() << " of " << *I->getUser() << "\n";
- dbgs() << "Truncs:";
+ dbgs() << " Truncs:\n";
for (auto *I : Truncs)
dbgs() << " " << *I << "\n";
+ dbgs() << " Reducts:\n";
+ for (auto *I : Reducts)
+ dbgs() << " " << *I << "\n";
});
- assert(!Truncs.empty() && "Expected some truncs");
+ assert((!Truncs.empty() || !Reducts.empty()) &&
+ "Expected some truncs or reductions");
+ if (Truncs.empty() && Exts.empty())
+ return false;
+
+ auto *VT = !Truncs.empty()
+ ? cast<FixedVectorType>(Truncs[0]->getType())
+ : cast<FixedVectorType>(Exts[0]->getOperand(0)->getType());
+ LLVM_DEBUG(dbgs() << "Using VT:" << *VT << "\n");
// Check types
unsigned NumElts = VT->getNumElements();
@@ -311,6 +329,14 @@ static bool tryInterleave(Instruction *Start,
// Check that it looks beneficial
if (!isProfitableToInterleave(Exts, Truncs))
return false;
+ if (!Reducts.empty() && (Ops.empty() || all_of(Ops, [](Instruction *I) {
+ return I->getOpcode() == Instruction::Mul ||
+ I->getOpcode() == Instruction::Select ||
+ I->getOpcode() == Instruction::ICmp;
+ }))) {
+ LLVM_DEBUG(dbgs() << "Reduction does not look profitable\n");
+ return false;
+ }
// Create new shuffles around the extends / truncs / other leaves.
IRBuilder<> Builder(Start);
@@ -367,6 +393,14 @@ static bool tryInterleave(Instruction *Start,
return true;
}
+// Add reductions are fairly common and associative, meaning we can start the
+// interleaving from them and don't need to emit a shuffle.
+static bool isAddReduction(Instruction &I) {
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ return II->getIntrinsicID() == Intrinsic::vector_reduce_add;
+ return false;
+}
+
bool MVELaneInterleaving::runOnFunction(Function &F) {
if (!EnableInterleave)
return false;
@@ -380,8 +414,10 @@ bool MVELaneInterleaving::runOnFunction(Function &F) {
SmallPtrSet<Instruction *, 16> Visited;
for (Instruction &I : reverse(instructions(F))) {
- if (I.getType()->isVectorTy() &&
- (isa<TruncInst>(I) || isa<FPTruncInst>(I)) && !Visited.count(&I))
+ if (((I.getType()->isVectorTy() &&
+ (isa<TruncInst>(I) || isa<FPTruncInst>(I))) ||
+ isAddReduction(I)) &&
+ !Visited.count(&I))
Changed |= tryInterleave(&I, Visited);
}
diff --git a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
index 6bad9d61238e..5c113ccfdc15 100644
--- a/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
+++ b/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp
@@ -913,7 +913,7 @@ bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
}
bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
- MachineDominatorTree *DT) {
+ MachineDominatorTree *DT) {
// Scan through the block, looking for instructions that use constants moves
// into VPR that are the negative of one another. These are expected to be
// COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
@@ -965,6 +965,7 @@ bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
DeadInstructions.insert(MRI->getVRegDef(GPR));
}
LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr);
+ VPR = LastVPTReg;
} else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
// We have found the not of a previous constant. Create a VPNot of the
// earlier predicate reg and use it instead of the copy.
diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp
index e1e18347cfd1..9e5488313770 100644
--- a/llvm/lib/Target/ARM/MVETailPredication.cpp
+++ b/llvm/lib/Target/ARM/MVETailPredication.cpp
@@ -39,6 +39,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -111,15 +112,10 @@ private:
/// intrinsic. E.g., check that the loop induction variable and the element
/// count are of the form we expect, and also perform overflow checks for
/// the new expressions that are created.
- bool IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount);
+ const SCEV *IsSafeActiveMask(IntrinsicInst *ActiveLaneMask, Value *TripCount);
/// Insert the intrinsic to represent the effect of tail predication.
- void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *TripCount);
-
- /// Rematerialize the iteration count in exit blocks, which enables
- /// ARMLowOverheadLoops to better optimise away loop update statements inside
- /// hardware-loops.
- void RematerializeIterCount();
+ void InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask, Value *Start);
};
} // end namespace
@@ -198,8 +194,8 @@ bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
// (((ElementCount + (VectorWidth - 1)) / VectorWidth) - TripCount
// 3) The IV must be an induction phi with an increment equal to the
// vector width.
-bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
- Value *TripCount) {
+const SCEV *MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
+ Value *TripCount) {
bool ForceTailPredication =
EnableTailPredication == TailPredication::ForceEnabledNoReductions ||
EnableTailPredication == TailPredication::ForceEnabled;
@@ -207,7 +203,7 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
Value *ElemCount = ActiveLaneMask->getOperand(1);
bool Changed = false;
if (!L->makeLoopInvariant(ElemCount, Changed))
- return false;
+ return nullptr;
auto *EC= SE->getSCEV(ElemCount);
auto *TC = SE->getSCEV(TripCount);
@@ -215,7 +211,7 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
cast<FixedVectorType>(ActiveLaneMask->getType())->getNumElements();
if (VectorWidth != 2 && VectorWidth != 4 && VectorWidth != 8 &&
VectorWidth != 16)
- return false;
+ return nullptr;
ConstantInt *ConstElemCount = nullptr;
// 1) Smoke tests that the original scalar loop TripCount (TC) belongs to
@@ -223,7 +219,38 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
// processed by the loop, so we will refer to that from this point on.
if (!SE->isLoopInvariant(EC, L)) {
LLVM_DEBUG(dbgs() << "ARM TP: element count must be loop invariant.\n");
- return false;
+ return nullptr;
+ }
+
+ // 2) Find out if IV is an induction phi. Note that we can't use Loop
+ // helpers here to get the induction variable, because the hardware loop is
+ // no longer in loopsimplify form, and also the hwloop intrinsic uses a
+ // different counter. Using SCEV, we check that the induction is of the
+ // form i = i + 4, where the increment must be equal to the VectorWidth.
+ auto *IV = ActiveLaneMask->getOperand(0);
+ auto *IVExpr = SE->getSCEV(IV);
+ auto *AddExpr = dyn_cast<SCEVAddRecExpr>(IVExpr);
+
+ if (!AddExpr) {
+ LLVM_DEBUG(dbgs() << "ARM TP: induction not an add expr: "; IVExpr->dump());
+ return nullptr;
+ }
+ // Check that this AddRec is associated with this loop.
+ if (AddExpr->getLoop() != L) {
+ LLVM_DEBUG(dbgs() << "ARM TP: phi not part of this loop\n");
+ return nullptr;
+ }
+ auto *Step = dyn_cast<SCEVConstant>(AddExpr->getOperand(1));
+ if (!Step) {
+ LLVM_DEBUG(dbgs() << "ARM TP: induction step is not a constant: ";
+ AddExpr->getOperand(1)->dump());
+ return nullptr;
+ }
+ auto StepValue = Step->getValue()->getSExtValue();
+ if (VectorWidth != StepValue) {
+ LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue
+ << " doesn't match vector width " << VectorWidth << "\n");
+ return nullptr;
}
if ((ConstElemCount = dyn_cast<ConstantInt>(ElemCount))) {
@@ -231,7 +258,7 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
if (!TC) {
LLVM_DEBUG(dbgs() << "ARM TP: Constant tripcount expected in "
"set.loop.iterations\n");
- return false;
+ return nullptr;
}
// Calculate 2 tripcount values and check that they are consistent with
@@ -249,10 +276,10 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
LLVM_DEBUG(dbgs() << "ARM TP: inconsistent constant tripcount values: "
<< TC1 << " from set.loop.iterations, and "
<< TC2 << " from get.active.lane.mask\n");
- return false;
+ return nullptr;
}
} else if (!ForceTailPredication) {
- // 2) We need to prove that the sub expression that we create in the
+ // 3) We need to prove that the sub expression that we create in the
// tail-predicated loop body, which calculates the remaining elements to be
// processed, is non-negative, i.e. it doesn't overflow:
//
@@ -266,6 +293,7 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
//
auto *VW = SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth));
// ElementCount + (VW-1):
+ auto *Start = AddExpr->getStart();
auto *ECPlusVWMinus1 = SE->getAddExpr(EC,
SE->getSCEV(ConstantInt::get(TripCount->getType(), VectorWidth - 1)));
@@ -274,18 +302,20 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
// Prevent unused variable warnings with TC
(void)TC;
- LLVM_DEBUG(
+ LLVM_DEBUG({
dbgs() << "ARM TP: Analysing overflow behaviour for:\n";
- dbgs() << "ARM TP: - TripCount = "; TC->dump();
- dbgs() << "ARM TP: - ElemCount = "; EC->dump();
+ dbgs() << "ARM TP: - TripCount = " << *TC << "\n";
+ dbgs() << "ARM TP: - ElemCount = " << *EC << "\n";
+ dbgs() << "ARM TP: - Start = " << *Start << "\n";
+ dbgs() << "ARM TP: - BETC = " << *SE->getBackedgeTakenCount(L) << "\n";
dbgs() << "ARM TP: - VecWidth = " << VectorWidth << "\n";
- dbgs() << "ARM TP: - (ElemCount+VW-1) / VW = "; Ceil->dump();
- );
+ dbgs() << "ARM TP: - (ElemCount+VW-1) / VW = " << *Ceil << "\n";
+ });
// As an example, almost all the tripcount expressions (produced by the
// vectoriser) look like this:
//
- // TC = ((-4 + (4 * ((3 + %N) /u 4))<nuw>) /u 4)
+ // TC = ((-4 + (4 * ((3 + %N) /u 4))<nuw> - start) /u 4)
//
// and "ElementCount + (VW-1) / VW":
//
@@ -294,64 +324,56 @@ bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
// Check for equality of TC and Ceil by calculating SCEV expression
// TC - Ceil and test it for zero.
//
- const SCEV *Sub =
- SE->getMinusSCEV(SE->getBackedgeTakenCount(L),
- SE->getUDivExpr(SE->getAddExpr(SE->getMulExpr(Ceil, VW),
- SE->getNegativeSCEV(VW)),
- VW));
+ const SCEV *Div = SE->getUDivExpr(
+ SE->getAddExpr(SE->getMulExpr(Ceil, VW), SE->getNegativeSCEV(VW),
+ SE->getNegativeSCEV(Start)),
+ VW);
+ const SCEV *Sub = SE->getMinusSCEV(SE->getBackedgeTakenCount(L), Div);
+ LLVM_DEBUG(dbgs() << "ARM TP: - Sub = "; Sub->dump());
// Use context sensitive facts about the path to the loop to refine. This
// comes up as the backedge taken count can incorporate context sensitive
// reasoning, and our RHS just above doesn't.
Sub = SE->applyLoopGuards(Sub, L);
+ LLVM_DEBUG(dbgs() << "ARM TP: - (Guarded) = "; Sub->dump());
if (!Sub->isZero()) {
LLVM_DEBUG(dbgs() << "ARM TP: possible overflow in sub expression.\n");
- return false;
+ return nullptr;
}
}
- // 3) Find out if IV is an induction phi. Note that we can't use Loop
- // helpers here to get the induction variable, because the hardware loop is
- // no longer in loopsimplify form, and also the hwloop intrinsic uses a
- // different counter. Using SCEV, we check that the induction is of the
- // form i = i + 4, where the increment must be equal to the VectorWidth.
- auto *IV = ActiveLaneMask->getOperand(0);
- auto *IVExpr = SE->getSCEV(IV);
- auto *AddExpr = dyn_cast<SCEVAddRecExpr>(IVExpr);
-
- if (!AddExpr) {
- LLVM_DEBUG(dbgs() << "ARM TP: induction not an add expr: "; IVExpr->dump());
- return false;
+ // Check that the start value is a multiple of the VectorWidth.
+ // TODO: This could do with a method to check if the scev is a multiple of
+ // VectorWidth. For the moment we just check for constants, muls and unknowns
+ // (which use MaskedValueIsZero and seems to be the most common).
+ if (auto *BaseC = dyn_cast<SCEVConstant>(AddExpr->getStart())) {
+ if (BaseC->getAPInt().urem(VectorWidth) == 0)
+ return SE->getMinusSCEV(EC, BaseC);
+ } else if (auto *BaseV = dyn_cast<SCEVUnknown>(AddExpr->getStart())) {
+ Type *Ty = BaseV->getType();
+ APInt Mask = APInt::getLowBitsSet(Ty->getPrimitiveSizeInBits(),
+ Log2_64(VectorWidth));
+ if (MaskedValueIsZero(BaseV->getValue(), Mask,
+ L->getHeader()->getModule()->getDataLayout()))
+ return SE->getMinusSCEV(EC, BaseV);
+ } else if (auto *BaseMul = dyn_cast<SCEVMulExpr>(AddExpr->getStart())) {
+ if (auto *BaseC = dyn_cast<SCEVConstant>(BaseMul->getOperand(0)))
+ if (BaseC->getAPInt().urem(VectorWidth) == 0)
+ return SE->getMinusSCEV(EC, BaseC);
+ if (auto *BaseC = dyn_cast<SCEVConstant>(BaseMul->getOperand(1)))
+ if (BaseC->getAPInt().urem(VectorWidth) == 0)
+ return SE->getMinusSCEV(EC, BaseC);
}
- // Check that this AddRec is associated with this loop.
- if (AddExpr->getLoop() != L) {
- LLVM_DEBUG(dbgs() << "ARM TP: phi not part of this loop\n");
- return false;
- }
- auto *Base = dyn_cast<SCEVConstant>(AddExpr->getOperand(0));
- if (!Base || !Base->isZero()) {
- LLVM_DEBUG(dbgs() << "ARM TP: induction base is not 0\n");
- return false;
- }
- auto *Step = dyn_cast<SCEVConstant>(AddExpr->getOperand(1));
- if (!Step) {
- LLVM_DEBUG(dbgs() << "ARM TP: induction step is not a constant: ";
- AddExpr->getOperand(1)->dump());
- return false;
- }
- auto StepValue = Step->getValue()->getSExtValue();
- if (VectorWidth == StepValue)
- return true;
-
- LLVM_DEBUG(dbgs() << "ARM TP: Step value " << StepValue
- << " doesn't match vector width " << VectorWidth << "\n");
- return false;
+ LLVM_DEBUG(
+ dbgs() << "ARM TP: induction base is not know to be a multiple of VF: "
+ << *AddExpr->getOperand(0) << "\n");
+ return nullptr;
}
void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
- Value *TripCount) {
+ Value *Start) {
IRBuilder<> Builder(L->getLoopPreheader()->getTerminator());
Module *M = L->getHeader()->getModule();
Type *Ty = IntegerType::get(M->getContext(), 32);
@@ -361,7 +383,7 @@ void MVETailPredication::InsertVCTPIntrinsic(IntrinsicInst *ActiveLaneMask,
// Insert a phi to count the number of elements processed by the loop.
Builder.SetInsertPoint(L->getHeader()->getFirstNonPHI());
PHINode *Processed = Builder.CreatePHI(Ty, 2);
- Processed->addIncoming(ActiveLaneMask->getOperand(1), L->getLoopPreheader());
+ Processed->addIncoming(Start, L->getLoopPreheader());
// Replace @llvm.get.active.mask() with the ARM specific VCTP intrinic, and
// thus represent the effect of tail predication.
@@ -407,12 +429,19 @@ bool MVETailPredication::TryConvertActiveLaneMask(Value *TripCount) {
LLVM_DEBUG(dbgs() << "ARM TP: Found active lane mask: "
<< *ActiveLaneMask << "\n");
- if (!IsSafeActiveMask(ActiveLaneMask, TripCount)) {
+ const SCEV *StartSCEV = IsSafeActiveMask(ActiveLaneMask, TripCount);
+ if (!StartSCEV) {
LLVM_DEBUG(dbgs() << "ARM TP: Not safe to insert VCTP.\n");
return false;
}
- LLVM_DEBUG(dbgs() << "ARM TP: Safe to insert VCTP.\n");
- InsertVCTPIntrinsic(ActiveLaneMask, TripCount);
+ LLVM_DEBUG(dbgs() << "ARM TP: Safe to insert VCTP. Start is " << *StartSCEV
+ << "\n");
+ SCEVExpander Expander(*SE, L->getHeader()->getModule()->getDataLayout(),
+ "start");
+ Instruction *Ins = L->getLoopPreheader()->getTerminator();
+ Value *Start = Expander.expandCodeFor(StartSCEV, StartSCEV->getType(), Ins);
+ LLVM_DEBUG(dbgs() << "ARM TP: Created start value " << *Start << "\n");
+ InsertVCTPIntrinsic(ActiveLaneMask, Start);
}
// Remove dead instructions and now dead phis.
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 9855e48b623e..c2962c4857c3 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -81,8 +81,9 @@ emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
if (ST.genExecuteOnly()) {
- BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ScratchReg)
- .addImm(NumBytes).setMIFlags(MIFlags);
+ unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
+ BuildMI(MBB, MBBI, dl, TII.get(XOInstr), ScratchReg)
+ .addImm(NumBytes).setMIFlags(MIFlags);
} else {
MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL,
0, MIFlags);
diff --git a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 48eaa80ebc65..e2f3fad20079 100644
--- a/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -136,14 +136,21 @@ void Thumb1InstrInfo::expandLoadStackGuard(
MachineBasicBlock::iterator MI) const {
MachineFunction &MF = *MI->getParent()->getParent();
const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
assert(MF.getFunction().getParent()->getStackProtectorGuard() != "tls" &&
"TLS stack protector not supported for Thumb1 targets");
+ unsigned Instr;
if (TM.isPositionIndependent())
- expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_pcrel, ARM::tLDRi);
+ Instr = ARM::tLDRLIT_ga_pcrel;
+ else if (ST.genExecuteOnly() && ST.hasV8MBaselineOps())
+ Instr = ARM::t2MOVi32imm;
+ else if (ST.genExecuteOnly())
+ Instr = ARM::tMOVi32imm;
else
- expandLoadStackGuardBase(MI, ARM::tLDRLIT_ga_abs, ARM::tLDRi);
+ Instr = ARM::tLDRLIT_ga_abs;
+ expandLoadStackGuardBase(MI, Instr, ARM::tLDRi);
}
bool Thumb1InstrInfo::canCopyGluedNodeDuringSchedule(SDNode *N) const {
diff --git a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 155555152ced..2945b5eaae3e 100644
--- a/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -98,9 +98,8 @@ static void TrackDefUses(MachineInstr *MI, RegisterSet &Defs, RegisterSet &Uses,
auto InsertUsesDefs = [&](RegList &Regs, RegisterSet &UsesDefs) {
for (unsigned Reg : Regs)
- for (MCSubRegIterator Subreg(Reg, TRI, /*IncludeSelf=*/true);
- Subreg.isValid(); ++Subreg)
- UsesDefs.insert(*Subreg);
+ for (MCPhysReg Subreg : TRI->subregs_inclusive(Reg))
+ UsesDefs.insert(Subreg);
};
InsertUsesDefs(LocalDefs, Defs);
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 1b24c289061d..2ea0eaa0aad8 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -398,8 +398,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
} else {
// Use one T2 instruction to reduce NumBytes
// FIXME: Move this to ARMAddressingModes.h?
- unsigned RotAmt = countLeadingZeros(ThisVal);
- ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt);
+ unsigned RotAmt = llvm::countl_zero(ThisVal);
+ ThisVal = ThisVal & llvm::rotr<uint32_t>(0xff000000U, RotAmt);
NumBytes &= ~ThisVal;
assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 &&
"Bit extraction didn't work?");
@@ -603,8 +603,8 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
// Otherwise, extract 8 adjacent bits from the immediate into this
// t2ADDri/t2SUBri.
- unsigned RotAmt = countLeadingZeros<unsigned>(Offset);
- unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xff000000U, RotAmt);
+ unsigned RotAmt = llvm::countl_zero<unsigned>(Offset);
+ unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xff000000U, RotAmt);
// We will handle these bits from offset, clear them.
Offset &= ~ThisImmVal;
diff --git a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index a29095e6b81a..0c010ed1eb34 100644
--- a/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -116,9 +116,10 @@ void ThumbRegisterInfo::emitLoadConstPool(
PredReg, MIFlags);
}
-/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize
-/// a destreg = basereg + immediate in Thumb code. Materialize the immediate
-/// in a register using mov / mvn sequences or load the immediate from a
+/// emitThumbRegPlusImmInReg - Emits a series of instructions to materialize a
+/// destreg = basereg + immediate in Thumb code. Materialize the immediate in a
+/// register using mov / mvn (armv6-M >) sequences, movs / lsls / adds / lsls /
+/// adds / lsls / adds sequences (armv6-M) or load the immediate from a
/// constpool entry.
static void emitThumbRegPlusImmInReg(
MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
@@ -159,7 +160,8 @@ static void emitThumbRegPlusImmInReg(
.addReg(LdReg, RegState::Kill)
.setMIFlags(MIFlags);
} else if (ST.genExecuteOnly()) {
- BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), LdReg)
+ unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
+ BuildMI(MBB, MBBI, dl, TII.get(XOInstr), LdReg)
.addImm(NumBytes).setMIFlags(MIFlags);
} else
MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, ARMCC::AL, 0,
diff --git a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp
index 3356d56481e5..43c2c89d259e 100644
--- a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp
+++ b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.cpp
@@ -19,8 +19,7 @@ ARM::PredBlockMask expandPredBlockMask(ARM::PredBlockMask BlockMask,
ARMVCC::VPTCodes Kind) {
using PredBlockMask = ARM::PredBlockMask;
assert(Kind != ARMVCC::None && "Cannot expand a mask with None!");
- assert(countTrailingZeros((unsigned)BlockMask) != 0 &&
- "Mask is already full");
+ assert(llvm::countr_zero((unsigned)BlockMask) != 0 && "Mask is already full");
auto ChooseMask = [&](PredBlockMask AddedThen, PredBlockMask AddedElse) {
return Kind == ARMVCC::Then ? AddedThen : AddedElse;
diff --git a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
index 80b7276adb4e..56a925f09ea7 100644
--- a/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
+++ b/llvm/lib/Target/ARM/Utils/ARMBaseInfo.h
@@ -18,7 +18,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include "MCTargetDesc/ARMMCTargetDesc.h"
namespace llvm {
diff --git a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
index c62529075108..ceee44ec0f20 100644
--- a/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
+++ b/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -101,56 +101,51 @@ bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
const char *ExtraCode, raw_ostream &O) {
// Default asm printer can only deal with some extra codes,
// so try it first.
- bool Error = AsmPrinter::PrintAsmOperand(MI, OpNum, ExtraCode, O);
-
- if (Error && ExtraCode && ExtraCode[0]) {
- if (ExtraCode[1] != 0)
- return true; // Unknown modifier.
+ if (!AsmPrinter::PrintAsmOperand(MI, OpNum, ExtraCode, O))
+ return false;
- if (ExtraCode[0] >= 'A' && ExtraCode[0] <= 'Z') {
- const MachineOperand &RegOp = MI->getOperand(OpNum);
+ const MachineOperand &MO = MI->getOperand(OpNum);
- assert(RegOp.isReg() && "Operand must be a register when you're"
- "using 'A'..'Z' operand extracodes.");
- Register Reg = RegOp.getReg();
+ if (ExtraCode && ExtraCode[0]) {
+ // Unknown extra code.
+ if (ExtraCode[1] != 0 || ExtraCode[0] < 'A' || ExtraCode[0] > 'Z')
+ return true;
- unsigned ByteNumber = ExtraCode[0] - 'A';
+ // Operand must be a register when using 'A' ~ 'Z' extra code.
+ if (!MO.isReg())
+ return true;
- unsigned OpFlags = MI->getOperand(OpNum - 1).getImm();
- unsigned NumOpRegs = InlineAsm::getNumOperandRegisters(OpFlags);
- (void)NumOpRegs;
+ Register Reg = MO.getReg();
- const AVRSubtarget &STI = MF->getSubtarget<AVRSubtarget>();
- const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+ unsigned ByteNumber = ExtraCode[0] - 'A';
+ unsigned OpFlags = MI->getOperand(OpNum - 1).getImm();
+ unsigned NumOpRegs = InlineAsm::getNumOperandRegisters(OpFlags);
- const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
- unsigned BytesPerReg = TRI.getRegSizeInBits(*RC) / 8;
- assert(BytesPerReg <= 2 && "Only 8 and 16 bit regs are supported.");
+ const AVRSubtarget &STI = MF->getSubtarget<AVRSubtarget>();
+ const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
- unsigned RegIdx = ByteNumber / BytesPerReg;
- if (RegIdx >= NumOpRegs)
- return true;
- Reg = MI->getOperand(OpNum + RegIdx).getReg();
+ const TargetRegisterClass *RC = TRI.getMinimalPhysRegClass(Reg);
+ unsigned BytesPerReg = TRI.getRegSizeInBits(*RC) / 8;
+ assert(BytesPerReg <= 2 && "Only 8 and 16 bit regs are supported.");
- if (BytesPerReg == 2) {
- Reg = TRI.getSubReg(Reg, ByteNumber % BytesPerReg ? AVR::sub_hi
- : AVR::sub_lo);
- }
+ unsigned RegIdx = ByteNumber / BytesPerReg;
+ if (RegIdx >= NumOpRegs)
+ return true;
+ Reg = MI->getOperand(OpNum + RegIdx).getReg();
- O << AVRInstPrinter::getPrettyRegisterName(Reg, MRI);
- return false;
+ if (BytesPerReg == 2) {
+ Reg = TRI.getSubReg(Reg,
+ ByteNumber % BytesPerReg ? AVR::sub_hi : AVR::sub_lo);
}
- }
- // Print global symbols.
- const auto &MO = MI->getOperand(OpNum);
- if (Error && MO.getType() == MachineOperand::MO_GlobalAddress) {
- PrintSymbolOperand(MO, O);
+ O << AVRInstPrinter::getPrettyRegisterName(Reg, MRI);
return false;
}
- if (Error)
- printOperand(MI, OpNum, O);
+ if (MO.getType() == MachineOperand::MO_GlobalAddress)
+ PrintSymbolOperand(MO, O); // Print global symbols.
+ else
+ printOperand(MI, OpNum, O); // Fallback to ordinary cases.
return false;
}
@@ -194,9 +189,8 @@ bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void AVRAsmPrinter::emitInstruction(const MachineInstr *MI) {
- // FIXME: Enable feature predicate checks once all the test pass.
- // AVR_MC::verifyInstructionPredicates(MI->getOpcode(),
- // getSubtargetInfo().getFeatureBits());
+ AVR_MC::verifyInstructionPredicates(MI->getOpcode(),
+ getSubtargetInfo().getFeatureBits());
AVRMCInstLower MCInstLowering(OutContext, *this);
@@ -259,9 +253,9 @@ bool AVRAsmPrinter::doFinalization(Module &M) {
auto *Section = cast<MCSectionELF>(TLOF.SectionForGlobal(&GO, TM));
if (Section->getName().startswith(".data"))
NeedsCopyData = true;
- else if (Section->getName().startswith(".rodata") && SubTM->hasPROGMEM())
- // AVRs that have a separate PROGMEM (that's most AVRs) store .rodata
- // sections in RAM.
+ else if (Section->getName().startswith(".rodata") && SubTM->hasLPM())
+ // AVRs that have a separate program memory (that's most AVRs) store
+ // .rodata sections in RAM.
NeedsCopyData = true;
else if (Section->getName().startswith(".bss"))
NeedsClearBSS = true;
diff --git a/llvm/lib/Target/AVR/AVRDevices.td b/llvm/lib/Target/AVR/AVRDevices.td
index f2c8a2e7a71e..f6b36dba7733 100644
--- a/llvm/lib/Target/AVR/AVRDevices.td
+++ b/llvm/lib/Target/AVR/AVRDevices.td
@@ -65,11 +65,6 @@ def FeatureMOVW : SubtargetFeature<"movw", "m_hasMOVW", "true",
"The device supports the 16-bit MOVW "
"instruction">;
-// The device has a separate flash namespace that must be accessed using special
-// instructions like lpm.
-def FeaturePROGMEM : SubtargetFeature<"progmem", "m_hasPROGMEM", "true",
- "The device has a separate flash namespace">;
-
// The device supports the `LPM` instruction, with implied destination being r0.
def FeatureLPM : SubtargetFeature<"lpm", "m_hasLPM", "true",
"The device supports the `LPM` instruction">;
@@ -125,6 +120,12 @@ def FeatureTinyEncoding
"The device has Tiny core specific "
"instruction encodings">;
+// When writing a 16-bit port or storing a 16-bit word, do the low byte first.
+def FeatureLowByteFirst
+ : SubtargetFeature<"lowbytefirst", "m_hasLowByteFirst", "true",
+ "Do the low byte first when writing a 16-bit port or "
+ "storing a 16-bit word">;
+
// The device has CPU registers mapped in data address space
def FeatureMMR : SubtargetFeature<"memmappedregs", "m_hasMemMappedGPR", "true",
"The device has CPU registers "
@@ -161,7 +162,7 @@ def ELFArchXMEGA7 : ELFArch<"EF_AVR_ARCH_XMEGA7">;
// device should have.
def FamilyAVR0 : Family<"avr0", []>;
-def FamilyAVR1 : Family<"avr1", [FamilyAVR0, FeatureLPM, FeaturePROGMEM, FeatureMMR]>;
+def FamilyAVR1 : Family<"avr1", [FamilyAVR0, FeatureLPM, FeatureMMR]>;
def FamilyAVR2
: Family<"avr2",
@@ -197,17 +198,18 @@ def FamilyTiny
FeatureSmallStack]>;
def FamilyXMEGA3 : Family<"xmega3",
- [FamilyAVR0, FeatureLPM, FeaturePROGMEM, FeatureIJMPCALL,
+ [FamilyAVR0, FeatureLPM, FeatureIJMPCALL,
FeatureADDSUBIW, FeatureSRAM, FeatureJMPCALL,
FeatureMultiplication, FeatureMOVW, FeatureLPMX,
- FeatureBREAK]>;
+ FeatureBREAK, FeatureLowByteFirst]>;
def FamilyXMEGA : Family<"xmega",
- [FamilyAVR0, FeatureLPM, FeaturePROGMEM, FeatureIJMPCALL,
+ [FamilyAVR0, FeatureLPM, FeatureIJMPCALL,
FeatureADDSUBIW, FeatureSRAM, FeatureJMPCALL,
FeatureMultiplication, FeatureMOVW, FeatureLPMX,
FeatureSPM, FeatureBREAK, FeatureEIJMPCALL,
- FeatureSPMX, FeatureDES, FeatureELPM, FeatureELPMX]>;
+ FeatureSPMX, FeatureDES, FeatureELPM, FeatureELPMX,
+ FeatureLowByteFirst]>;
def FamilyXMEGAU : Family<"xmegau", [FamilyXMEGA, FeatureRMW]>;
diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
index 2c97dea0bce0..f257ccea6c50 100644
--- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -70,6 +70,7 @@ private:
bool expandLogic(unsigned Op, Block &MBB, BlockIt MBBI);
bool expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI);
bool isLogicImmOpRedundant(unsigned Op, unsigned ImmVal) const;
+ bool isLogicRegOpUndef(unsigned Op, unsigned ImmVal) const;
template <typename Func> bool expandAtomic(Block &MBB, BlockIt MBBI, Func f);
@@ -97,7 +98,11 @@ private:
bool expandASRW15Rd(Block &MBB, BlockIt MBBI);
// Common implementation of LPMWRdZ and ELPMWRdZ.
- bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt);
+ bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsELPM);
+ // Common implementation of LPMBRdZ and ELPMBRdZ.
+ bool expandLPMBELPMB(Block &MBB, BlockIt MBBI, bool IsELPM);
+ // Common implementation of ROLBRdR1 and ROLBRdR17.
+ bool expandROLBRd(Block &MBB, BlockIt MBBI);
};
char AVRExpandPseudo::ID = 0;
@@ -224,6 +229,18 @@ bool AVRExpandPseudo::isLogicImmOpRedundant(unsigned Op,
return false;
}
+bool AVRExpandPseudo::isLogicRegOpUndef(unsigned Op, unsigned ImmVal) const {
+ // ANDI Rd, 0x00 clears all input bits.
+ if (Op == AVR::ANDIRdK && ImmVal == 0x00)
+ return true;
+
+ // ORI Rd, 0xff sets all input bits.
+ if (Op == AVR::ORIRdK && ImmVal == 0xff)
+ return true;
+
+ return false;
+}
+
bool AVRExpandPseudo::expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
@@ -245,6 +262,9 @@ bool AVRExpandPseudo::expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) {
// SREG is always implicitly dead
MIBLO->getOperand(3).setIsDead();
+
+ if (isLogicRegOpUndef(Op, Lo8))
+ MIBLO->getOperand(1).setIsUndef(true);
}
if (!isLogicImmOpRedundant(Op, Hi8)) {
@@ -256,6 +276,9 @@ bool AVRExpandPseudo::expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) {
if (ImpIsDead)
MIBHI->getOperand(3).setIsDead();
+
+ if (isLogicRegOpUndef(Op, Hi8))
+ MIBHI->getOperand(1).setIsUndef(true);
}
MI.eraseFromParent();
@@ -810,19 +833,21 @@ bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
return true;
}
-bool AVRExpandPseudo::expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt) {
+bool AVRExpandPseudo::expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsELPM) {
MachineInstr &MI = *MBBI;
Register DstLoReg, DstHiReg;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
+ Register SrcLoReg, SrcHiReg;
bool SrcIsKill = MI.getOperand(1).isKill();
- unsigned OpLo = IsExt ? AVR::ELPMRdZPi : AVR::LPMRdZPi;
- unsigned OpHi = IsExt ? AVR::ELPMRdZ : AVR::LPMRdZ;
+ const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
+ bool IsLPMRn = IsELPM ? STI.hasELPMX() : STI.hasLPMX();
+
TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+ TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
// Set the I/O register RAMPZ for ELPM.
- if (IsExt) {
- const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
+ if (IsELPM) {
Register Bank = MI.getOperand(2).getReg();
// out RAMPZ, rtmp
buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(Bank);
@@ -831,18 +856,81 @@ bool AVRExpandPseudo::expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt) {
// This is enforced by the @earlyclobber constraint.
assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
- // Load low byte.
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstLoReg, RegState::Define)
- .addReg(SrcReg);
-
- // Load high byte.
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstHiReg, RegState::Define)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
+ if (IsLPMRn) {
+ unsigned OpLo = IsELPM ? AVR::ELPMRdZPi : AVR::LPMRdZPi;
+ unsigned OpHi = IsELPM ? AVR::ELPMRdZ : AVR::LPMRdZ;
+ // Load low byte.
+ auto MIBLO = buildMI(MBB, MBBI, OpLo)
+ .addReg(DstLoReg, RegState::Define)
+ .addReg(SrcReg);
+ // Load high byte.
+ auto MIBHI = buildMI(MBB, MBBI, OpHi)
+ .addReg(DstHiReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
+ MIBLO.setMemRefs(MI.memoperands());
+ MIBHI.setMemRefs(MI.memoperands());
+ } else {
+ unsigned Opc = IsELPM ? AVR::ELPM : AVR::LPM;
+ // Load low byte, and copy to the low destination register.
+ auto MIBLO = buildMI(MBB, MBBI, Opc);
+ buildMI(MBB, MBBI, AVR::MOVRdRr)
+ .addReg(DstLoReg, RegState::Define)
+ .addReg(AVR::R0, RegState::Kill);
+ MIBLO.setMemRefs(MI.memoperands());
+ // Increase the Z register by 1.
+ if (STI.hasADDSUBIW()) {
+ // adiw r31:r30, 1
+ auto MIINC = buildMI(MBB, MBBI, AVR::ADIWRdK)
+ .addReg(SrcReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill))
+ .addImm(1);
+ MIINC->getOperand(3).setIsDead();
+ } else {
+ // subi r30, 255
+ // sbci r31, 255
+ buildMI(MBB, MBBI, AVR::SUBIRdK)
+ .addReg(SrcLoReg, RegState::Define)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .addImm(255);
+ auto MIZHI = buildMI(MBB, MBBI, AVR::SBCIRdK)
+ .addReg(SrcHiReg, RegState::Define)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .addImm(255);
+ MIZHI->getOperand(3).setIsDead();
+ MIZHI->getOperand(4).setIsKill();
+ }
+ // Load high byte, and copy to the high destination register.
+ auto MIBHI = buildMI(MBB, MBBI, Opc);
+ buildMI(MBB, MBBI, AVR::MOVRdRr)
+ .addReg(DstHiReg, RegState::Define)
+ .addReg(AVR::R0, RegState::Kill);
+ MIBHI.setMemRefs(MI.memoperands());
+ }
- MIBLO.setMemRefs(MI.memoperands());
- MIBHI.setMemRefs(MI.memoperands());
+ // Restore the Z register if it is not killed.
+ if (!SrcIsKill) {
+ if (STI.hasADDSUBIW()) {
+ // sbiw r31:r30, 1
+ auto MIDEC = buildMI(MBB, MBBI, AVR::SBIWRdK)
+ .addReg(SrcReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill))
+ .addImm(1);
+ MIDEC->getOperand(3).setIsDead();
+ } else {
+ // subi r30, 1
+ // sbci r31, 0
+ buildMI(MBB, MBBI, AVR::SUBIRdK)
+ .addReg(SrcLoReg, RegState::Define)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .addImm(1);
+ auto MIZHI = buildMI(MBB, MBBI, AVR::SBCIRdK)
+ .addReg(SrcHiReg, RegState::Define)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .addImm(0);
+ MIZHI->getOperand(3).setIsDead();
+ MIZHI->getOperand(4).setIsKill();
+ }
+ }
MI.eraseFromParent();
return true;
@@ -858,30 +946,53 @@ bool AVRExpandPseudo::expand<AVR::ELPMWRdZ>(Block &MBB, BlockIt MBBI) {
return expandLPMWELPMW(MBB, MBBI, true);
}
-template <>
-bool AVRExpandPseudo::expand<AVR::ELPMBRdZ>(Block &MBB, BlockIt MBBI) {
+bool AVRExpandPseudo::expandLPMBELPMB(Block &MBB, BlockIt MBBI, bool IsELPM) {
MachineInstr &MI = *MBBI;
Register DstReg = MI.getOperand(0).getReg();
Register SrcReg = MI.getOperand(1).getReg();
- Register BankReg = MI.getOperand(2).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
+ bool IsLPMRn = IsELPM ? STI.hasELPMX() : STI.hasLPMX();
// Set the I/O register RAMPZ for ELPM (out RAMPZ, rtmp).
- buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(BankReg);
+ if (IsELPM) {
+ Register BankReg = MI.getOperand(2).getReg();
+ buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(BankReg);
+ }
// Load byte.
- auto MILB = buildMI(MBB, MBBI, AVR::ELPMRdZ)
- .addReg(DstReg, RegState::Define)
- .addReg(SrcReg, getKillRegState(SrcIsKill));
-
- MILB.setMemRefs(MI.memoperands());
+ if (IsLPMRn) {
+ unsigned Opc = IsELPM ? AVR::ELPMRdZ : AVR::LPMRdZ;
+ auto MILB = buildMI(MBB, MBBI, Opc)
+ .addReg(DstReg, RegState::Define)
+ .addReg(SrcReg, getKillRegState(SrcIsKill));
+ MILB.setMemRefs(MI.memoperands());
+ } else {
+ // For the basic ELPM/LPM instruction, its operand[0] is the implicit
+ // 'Z' register, and its operand[1] is the implicit 'R0' register.
+ unsigned Opc = IsELPM ? AVR::ELPM : AVR::LPM;
+ auto MILB = buildMI(MBB, MBBI, Opc);
+ buildMI(MBB, MBBI, AVR::MOVRdRr)
+ .addReg(DstReg, RegState::Define)
+ .addReg(AVR::R0, RegState::Kill);
+ MILB.setMemRefs(MI.memoperands());
+ }
MI.eraseFromParent();
return true;
}
template <>
+bool AVRExpandPseudo::expand<AVR::ELPMBRdZ>(Block &MBB, BlockIt MBBI) {
+ return expandLPMBELPMB(MBB, MBBI, true);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::LPMBRdZ>(Block &MBB, BlockIt MBBI) {
+ return expandLPMBELPMB(MBB, MBBI, false);
+}
+
+template <>
bool AVRExpandPseudo::expand<AVR::LPMWRdZPi>(Block &MBB, BlockIt MBBI) {
llvm_unreachable("16-bit LPMPi is unimplemented");
}
@@ -967,18 +1078,15 @@ bool AVRExpandPseudo::expand<AVR::AtomicFence>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::STSWKRr>(Block &MBB, BlockIt MBBI) {
+ const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
MachineInstr &MI = *MBBI;
Register SrcLoReg, SrcHiReg;
Register SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
- unsigned OpLo = AVR::STSKRr;
- unsigned OpHi = AVR::STSKRr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
- // Write the high byte first in case this address belongs to a special
- // I/O address with a special temporary register.
- auto MIBHI = buildMI(MBB, MBBI, OpHi);
- auto MIBLO = buildMI(MBB, MBBI, OpLo);
+ auto MIB0 = buildMI(MBB, MBBI, AVR::STSKRr);
+ auto MIB1 = buildMI(MBB, MBBI, AVR::STSKRr);
switch (MI.getOperand(0).getType()) {
case MachineOperand::MO_GlobalAddress: {
@@ -986,26 +1094,50 @@ bool AVRExpandPseudo::expand<AVR::STSWKRr>(Block &MBB, BlockIt MBBI) {
int64_t Offs = MI.getOperand(0).getOffset();
unsigned TF = MI.getOperand(0).getTargetFlags();
- MIBLO.addGlobalAddress(GV, Offs, TF);
- MIBHI.addGlobalAddress(GV, Offs + 1, TF);
+ if (STI.hasLowByteFirst()) {
+ // Write the low byte first for XMEGA devices.
+ MIB0.addGlobalAddress(GV, Offs, TF);
+ MIB1.addGlobalAddress(GV, Offs + 1, TF);
+ } else {
+ // Write the high byte first for traditional devices.
+ MIB0.addGlobalAddress(GV, Offs + 1, TF);
+ MIB1.addGlobalAddress(GV, Offs, TF);
+ }
+
break;
}
case MachineOperand::MO_Immediate: {
unsigned Imm = MI.getOperand(0).getImm();
- MIBLO.addImm(Imm);
- MIBHI.addImm(Imm + 1);
+ if (STI.hasLowByteFirst()) {
+ // Write the low byte first for XMEGA devices.
+ MIB0.addImm(Imm);
+ MIB1.addImm(Imm + 1);
+ } else {
+ // Write the high byte first for traditional devices.
+ MIB0.addImm(Imm + 1);
+ MIB1.addImm(Imm);
+ }
+
break;
}
default:
llvm_unreachable("Unknown operand type!");
}
- MIBLO.addReg(SrcLoReg, getKillRegState(SrcIsKill));
- MIBHI.addReg(SrcHiReg, getKillRegState(SrcIsKill));
-
- MIBLO.setMemRefs(MI.memoperands());
- MIBHI.setMemRefs(MI.memoperands());
+ if (STI.hasLowByteFirst()) {
+ // Write the low byte first for XMEGA devices.
+ MIB0.addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .setMemRefs(MI.memoperands());
+ MIB1.addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .setMemRefs(MI.memoperands());
+ } else {
+ // Write the high byte first for traditional devices.
+ MIB0.addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .setMemRefs(MI.memoperands());
+ MIB1.addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .setMemRefs(MI.memoperands());
+ }
MI.eraseFromParent();
return true;
@@ -1036,16 +1168,27 @@ bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) {
} else {
Register SrcLoReg, SrcHiReg;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
- buildMI(MBB, MBBI, AVR::STPtrRr)
- .addReg(DstReg, getUndefRegState(DstIsUndef))
- .addReg(SrcLoReg, getKillRegState(SrcIsKill))
- .setMemRefs(MI.memoperands());
-
- buildMI(MBB, MBBI, AVR::STDPtrQRr)
- .addReg(DstReg, getUndefRegState(DstIsUndef))
- .addImm(1)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill))
- .setMemRefs(MI.memoperands());
+ if (STI.hasLowByteFirst()) {
+ buildMI(MBB, MBBI, AVR::STPtrRr)
+ .addReg(DstReg, getUndefRegState(DstIsUndef))
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .setMemRefs(MI.memoperands());
+ buildMI(MBB, MBBI, AVR::STDPtrQRr)
+ .addReg(DstReg, getUndefRegState(DstIsUndef))
+ .addImm(1)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .setMemRefs(MI.memoperands());
+ } else {
+ buildMI(MBB, MBBI, AVR::STDPtrQRr)
+ .addReg(DstReg, getUndefRegState(DstIsUndef))
+ .addImm(1)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .setMemRefs(MI.memoperands());
+ buildMI(MBB, MBBI, AVR::STPtrRr)
+ .addReg(DstReg, getUndefRegState(DstIsUndef))
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .setMemRefs(MI.memoperands());
+ }
}
MI.eraseFromParent();
@@ -1162,23 +1305,32 @@ bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
.addImm(Imm + 2);
}
} else {
- unsigned OpLo = AVR::STDPtrQRr;
- unsigned OpHi = AVR::STDPtrQRr;
Register SrcLoReg, SrcHiReg;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addReg(DstReg)
- .addImm(Imm)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
-
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addReg(DstReg, getKillRegState(DstIsKill))
- .addImm(Imm + 1)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
-
- MIBLO.setMemRefs(MI.memoperands());
- MIBHI.setMemRefs(MI.memoperands());
+ if (STI.hasLowByteFirst()) {
+ buildMI(MBB, MBBI, AVR::STDPtrQRr)
+ .addReg(DstReg)
+ .addImm(Imm)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .setMemRefs(MI.memoperands());
+ buildMI(MBB, MBBI, AVR::STDPtrQRr)
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addImm(Imm + 1)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .setMemRefs(MI.memoperands());
+ } else {
+ buildMI(MBB, MBBI, AVR::STDPtrQRr)
+ .addReg(DstReg)
+ .addImm(Imm + 1)
+ .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+ .setMemRefs(MI.memoperands());
+ buildMI(MBB, MBBI, AVR::STDPtrQRr)
+ .addReg(DstReg, getKillRegState(DstIsKill))
+ .addImm(Imm)
+ .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+ .setMemRefs(MI.memoperands());
+ }
}
MI.eraseFromParent();
@@ -1257,27 +1409,28 @@ bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) {
template <>
bool AVRExpandPseudo::expand<AVR::OUTWARr>(Block &MBB, BlockIt MBBI) {
+ const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
MachineInstr &MI = *MBBI;
Register SrcLoReg, SrcHiReg;
unsigned Imm = MI.getOperand(0).getImm();
Register SrcReg = MI.getOperand(1).getReg();
bool SrcIsKill = MI.getOperand(1).isKill();
- unsigned OpLo = AVR::OUTARr;
- unsigned OpHi = AVR::OUTARr;
TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
// Since we add 1 to the Imm value for the high byte below, and 63 is the
// highest Imm value allowed for the instruction, 62 is the limit here.
assert(Imm <= 62 && "Address is out of range");
- // 16 bit I/O writes need the high byte first
- auto MIBHI = buildMI(MBB, MBBI, OpHi)
- .addImm(Imm + 1)
- .addReg(SrcHiReg, getKillRegState(SrcIsKill));
-
- auto MIBLO = buildMI(MBB, MBBI, OpLo)
- .addImm(Imm)
- .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+ // 16 bit I/O writes need the high byte first on normal AVR devices,
+ // and in reverse order for the XMEGA/XMEGA3/XMEGAU families.
+ auto MIBHI = buildMI(MBB, MBBI, AVR::OUTARr)
+ .addImm(STI.hasLowByteFirst() ? Imm : Imm + 1)
+ .addReg(STI.hasLowByteFirst() ? SrcLoReg : SrcHiReg,
+ getKillRegState(SrcIsKill));
+ auto MIBLO = buildMI(MBB, MBBI, AVR::OUTARr)
+ .addImm(STI.hasLowByteFirst() ? Imm + 1 : Imm)
+ .addReg(STI.hasLowByteFirst() ? SrcHiReg : SrcLoReg,
+ getKillRegState(SrcIsKill));
MIBLO.setMemRefs(MI.memoperands());
MIBHI.setMemRefs(MI.memoperands());
@@ -1328,8 +1481,7 @@ bool AVRExpandPseudo::expand<AVR::POPWRd>(Block &MBB, BlockIt MBBI) {
return true;
}
-template <>
-bool AVRExpandPseudo::expand<AVR::ROLBRd>(Block &MBB, BlockIt MBBI) {
+bool AVRExpandPseudo::expandROLBRd(Block &MBB, BlockIt MBBI) {
// In AVR, the rotate instructions behave quite unintuitively. They rotate
// bits through the carry bit in SREG, effectively rotating over 9 bits,
// instead of 8. This is useful when we are dealing with numbers over
@@ -1339,7 +1491,7 @@ bool AVRExpandPseudo::expand<AVR::ROLBRd>(Block &MBB, BlockIt MBBI) {
MachineInstr &MI = *MBBI;
unsigned OpShift, OpCarry;
Register DstReg = MI.getOperand(0).getReg();
- Register ZeroReg = MI.getOperand(2).getReg();
+ Register ZeroReg = MI.getOperand(3).getReg();
bool DstIsDead = MI.getOperand(0).isDead();
bool DstIsKill = MI.getOperand(1).isKill();
OpShift = AVR::ADDRdRr;
@@ -1368,6 +1520,16 @@ bool AVRExpandPseudo::expand<AVR::ROLBRd>(Block &MBB, BlockIt MBBI) {
}
template <>
+bool AVRExpandPseudo::expand<AVR::ROLBRdR1>(Block &MBB, BlockIt MBBI) {
+ return expandROLBRd(MBB, MBBI);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ROLBRdR17>(Block &MBB, BlockIt MBBI) {
+ return expandROLBRd(MBB, MBBI);
+}
+
+template <>
bool AVRExpandPseudo::expand<AVR::RORBRd>(Block &MBB, BlockIt MBBI) {
// In AVR, the rotate instructions behave quite unintuitively. They rotate
// bits through the carry bit in SREG, effectively rotating over 9 bits,
@@ -2428,6 +2590,7 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
EXPAND(AVR::LDWRdPtrPd);
case AVR::LDDWRdYQ: //: FIXME: remove this once PR13375 gets fixed
EXPAND(AVR::LDDWRdPtrQ);
+ EXPAND(AVR::LPMBRdZ);
EXPAND(AVR::LPMWRdZ);
EXPAND(AVR::LPMWRdZPi);
EXPAND(AVR::ELPMBRdZ);
@@ -2450,7 +2613,8 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
EXPAND(AVR::OUTWARr);
EXPAND(AVR::PUSHWRr);
EXPAND(AVR::POPWRd);
- EXPAND(AVR::ROLBRd);
+ EXPAND(AVR::ROLBRdR1);
+ EXPAND(AVR::ROLBRdR17);
EXPAND(AVR::RORBRd);
EXPAND(AVR::LSLWRd);
EXPAND(AVR::LSRWRd);
diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
index 904cdf8420eb..aff2d5ed7b12 100644
--- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
@@ -260,6 +260,16 @@ bool AVRFrameLowering::spillCalleeSavedRegisters(
Register Reg = I.getReg();
bool IsNotLiveIn = !MBB.isLiveIn(Reg);
+ // Check if Reg is a sub register of a 16-bit livein register, and then
+ // add it to the livein list.
+ if (IsNotLiveIn)
+ for (const auto &LiveIn : MBB.liveins())
+ if (STI.getRegisterInfo()->isSubRegister(LiveIn.PhysReg, Reg)) {
+ IsNotLiveIn = false;
+ MBB.addLiveIn(Reg);
+ break;
+ }
+
assert(TRI->getRegSizeInBits(*TRI->getMinimalPhysRegClass(Reg)) == 8 &&
"Invalid register size");
diff --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
index 5511d53dfa31..bbb1de40be63 100644
--- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -275,8 +275,7 @@ bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(
}
if (ImmNode->getValueType(0) != MVT::i8) {
- Disp = CurDAG->getTargetConstant(
- ImmNode->getAPIntValue().getZExtValue(), dl, MVT::i8);
+ Disp = CurDAG->getTargetConstant(ImmNode->getZExtValue(), dl, MVT::i8);
} else {
Disp = ImmOp;
}
@@ -366,6 +365,8 @@ template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
int ProgMemBank = AVR::getProgramMemoryBank(LD);
if (ProgMemBank < 0 || ProgMemBank > 5)
report_fatal_error("unexpected program memory bank");
+ if (ProgMemBank > 0 && !Subtarget->hasELPM())
+ report_fatal_error("unexpected program memory bank");
// This is a flash memory load, move the pointer into R31R30 and emit
// the lpm instruction.
@@ -398,8 +399,9 @@ template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
switch (VT.SimpleTy) {
case MVT::i8:
if (ProgMemBank == 0) {
+ unsigned Opc = Subtarget->hasLPMX() ? AVR::LPMRdZ : AVR::LPMBRdZ;
ResNode =
- CurDAG->getMachineNode(AVR::LPMRdZ, DL, MVT::i8, MVT::Other, Ptr);
+ CurDAG->getMachineNode(Opc, DL, MVT::i8, MVT::Other, Ptr);
} else {
// Do not combine the LDI instruction into the ELPM pseudo instruction,
// since it may be reused by other ELPM pseudo instructions.
@@ -438,7 +440,7 @@ template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
}
template <> bool AVRDAGToDAGISel::select<AVRISD::CALL>(SDNode *N) {
- SDValue InFlag;
+ SDValue InGlue;
SDValue Chain = N->getOperand(0);
SDValue Callee = N->getOperand(1);
unsigned LastOpNum = N->getNumOperands() - 1;
@@ -455,7 +457,7 @@ template <> bool AVRDAGToDAGISel::select<AVRISD::CALL>(SDNode *N) {
}
SDLoc DL(N);
- Chain = CurDAG->getCopyToReg(Chain, DL, AVR::R31R30, Callee, InFlag);
+ Chain = CurDAG->getCopyToReg(Chain, DL, AVR::R31R30, Callee, InGlue);
SmallVector<SDValue, 8> Ops;
Ops.push_back(CurDAG->getRegister(AVR::R31R30, MVT::i16));
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp
index ee2c48917971..ee0693cd0103 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -245,8 +245,8 @@ const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default:
return nullptr;
- NODE(RET_FLAG);
- NODE(RETI_FLAG);
+ NODE(RET_GLUE);
+ NODE(RETI_GLUE);
NODE(CALL);
NODE(WRAPPER);
NODE(LSL);
@@ -282,7 +282,7 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
const SDNode *N = Op.getNode();
EVT VT = Op.getValueType();
SDLoc dl(N);
- assert(isPowerOf2_32(VT.getSizeInBits()) &&
+ assert(llvm::has_single_bit<uint32_t>(VT.getSizeInBits()) &&
"Expected power-of-2 shift amount");
if (VT.getSizeInBits() == 32) {
@@ -427,6 +427,33 @@ SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
Victim = DAG.getNode(AVRISD::ASRBN, dl, VT, Victim,
DAG.getConstant(7, dl, VT));
ShiftAmount = 0;
+ } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 3) {
+ // Optimize left rotation 3 bits to swap then right rotation 1 bit.
+ Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
+ Victim =
+ DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT));
+ ShiftAmount = 0;
+ } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 3) {
+ // Optimize right rotation 3 bits to swap then left rotation 1 bit.
+ Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
+ Victim =
+ DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT));
+ ShiftAmount = 0;
+ } else if (Op.getOpcode() == ISD::ROTL && ShiftAmount == 7) {
+ // Optimize left rotation 7 bits to right rotation 1 bit.
+ Victim =
+ DAG.getNode(AVRISD::ROR, dl, VT, Victim, DAG.getConstant(1, dl, VT));
+ ShiftAmount = 0;
+ } else if (Op.getOpcode() == ISD::ROTR && ShiftAmount == 7) {
+ // Optimize right rotation 7 bits to left rotation 1 bit.
+ Victim =
+ DAG.getNode(AVRISD::ROL, dl, VT, Victim, DAG.getConstant(1, dl, VT));
+ ShiftAmount = 0;
+ } else if ((Op.getOpcode() == ISD::ROTR || Op.getOpcode() == ISD::ROTL) &&
+ ShiftAmount >= 4) {
+ // Optimize left/right rotation with the SWAP instruction.
+ Victim = DAG.getNode(AVRISD::SWAP, dl, VT, Victim);
+ ShiftAmount -= 4;
}
} else if (VT.getSizeInBits() == 16) {
if (Op.getOpcode() == ISD::SRA)
@@ -634,11 +661,35 @@ SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS,
SDValue Cmp;
if (LHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(RHS)) {
- // Generate a CPI/CPC pair if RHS is a 16-bit constant.
+ uint64_t Imm = cast<ConstantSDNode>(RHS)->getZExtValue();
+ // Generate a CPI/CPC pair if RHS is a 16-bit constant. Use the zero
+ // register for the constant RHS if its lower or higher byte is zero.
SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
DAG.getIntPtrConstant(0, DL));
SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
DAG.getIntPtrConstant(1, DL));
+ SDValue RHSlo = (Imm & 0xff) == 0
+ ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
+ : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue RHShi = (Imm & 0xff00) == 0
+ ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
+ : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
+ DAG.getIntPtrConstant(1, DL));
+ Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
+ Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
+ } else if (RHS.getSimpleValueType() == MVT::i16 && isa<ConstantSDNode>(LHS)) {
+ // Generate a CPI/CPC pair if LHS is a 16-bit constant. Use the zero
+ // register for the constant LHS if its lower or higher byte is zero.
+ uint64_t Imm = cast<ConstantSDNode>(LHS)->getZExtValue();
+ SDValue LHSlo = (Imm & 0xff) == 0
+ ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
+ : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue LHShi = (Imm & 0xff00) == 0
+ ? DAG.getRegister(Subtarget.getZeroRegister(), MVT::i8)
+ : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS,
+ DAG.getIntPtrConstant(1, DL));
SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
DAG.getIntPtrConstant(0, DL));
SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, RHS,
@@ -1104,9 +1155,15 @@ bool AVRTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
return false;
} else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
- if (AVR::isProgramMemoryAccess(ST)) {
+ // We can not store to program memory.
+ if (AVR::isProgramMemoryAccess(ST))
+ return false;
+ // Since the high byte need to be stored first, we can not emit
+ // i16 post increment store like:
+ // st X+, r24
+ // st X+, r25
+ if (VT == MVT::i16 && !Subtarget.hasLowByteFirst())
return false;
- }
} else {
return false;
}
@@ -1127,6 +1184,12 @@ bool AVRTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
return false;
}
+ // FIXME: We temporarily disable post increment load from program memory,
+ // due to bug https://github.com/llvm/llvm-project/issues/59914.
+ if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
+ if (AVR::isProgramMemoryAccess(LD))
+ return false;
+
Base = Op->getOperand(0);
Offset = DAG.getConstant(RHSC, DL, MVT::i8);
AM = ISD::POST_INC;
@@ -1405,7 +1468,7 @@ SDValue AVRTargetLowering::LowerFormalArguments(
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- unsigned StackSize = CCInfo.getNextStackOffset();
+ unsigned StackSize = CCInfo.getStackSize();
AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
AFI->setVarArgsFrameIndex(MFI.CreateFixedObject(2, StackSize, true));
@@ -1466,7 +1529,7 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getStackSize();
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
@@ -1542,12 +1605,12 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
// Build a sequence of copy-to-reg nodes chained together with token chain and
- // flag operands which copy the outgoing args into registers. The InFlag in
+ // flag operands which copy the outgoing args into registers. The InGlue in
// necessary since all emited instructions must be stuck together.
- SDValue InFlag;
+ SDValue InGlue;
for (auto Reg : RegsToPass) {
- Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, InFlag);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, InGlue);
+ InGlue = Chain.getValue(1);
}
// Returns a chain & a flag for retval copy to use.
@@ -1573,23 +1636,23 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
- if (InFlag.getNode()) {
- Ops.push_back(InFlag);
+ if (InGlue.getNode()) {
+ Ops.push_back(InGlue);
}
Chain = DAG.getNode(AVRISD::CALL, DL, NodeTys, Ops);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
// Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InFlag, DL);
+ Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, DL);
if (!Ins.empty()) {
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
}
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, DL, DAG,
+ return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, DL, DAG,
InVals);
}
@@ -1597,7 +1660,7 @@ SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// appropriate copies out of appropriate physical registers.
///
SDValue AVRTargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
+ SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
@@ -1616,9 +1679,9 @@ SDValue AVRTargetLowering::LowerCallResult(
// Copy all of the result registers out of their specified physreg.
for (CCValAssign const &RVLoc : RVLocs) {
Chain = DAG.getCopyFromReg(Chain, dl, RVLoc.getLocReg(), RVLoc.getValVT(),
- InFlag)
+ InGlue)
.getValue(1);
- InFlag = Chain.getValue(2);
+ InGlue = Chain.getValue(2);
InVals.push_back(Chain.getValue(0));
}
@@ -1664,17 +1727,17 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
analyzeReturnValues(Outs, CCInfo, Subtarget.hasTinyEncoding());
}
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Glue);
// Guarantee that all emitted copies are stuck together with flags.
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
@@ -1695,12 +1758,12 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
}
unsigned RetOpc =
- AFI->isInterruptOrSignalHandler() ? AVRISD::RETI_FLAG : AVRISD::RET_FLAG;
+ AFI->isInterruptOrSignalHandler() ? AVRISD::RETI_GLUE : AVRISD::RET_GLUE;
RetOps[0] = Chain; // Update chain.
- if (Flag.getNode()) {
- RetOps.push_back(Flag);
+ if (Glue.getNode()) {
+ RetOps.push_back(Glue);
}
return DAG.getNode(RetOpc, dl, MVT::Other, RetOps);
@@ -1711,11 +1774,11 @@ AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
//===----------------------------------------------------------------------===//
MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
- MachineBasicBlock *BB) const {
+ MachineBasicBlock *BB,
+ bool Tiny) const {
unsigned Opc;
const TargetRegisterClass *RC;
bool HasRepeatedOperand = false;
- bool HasZeroOperand = false;
MachineFunction *F = BB->getParent();
MachineRegisterInfo &RI = F->getRegInfo();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
@@ -1750,9 +1813,8 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
RC = &AVR::DREGSRegClass;
break;
case AVR::Rol8:
- Opc = AVR::ROLBRd;
+ Opc = Tiny ? AVR::ROLBRdR17 : AVR::ROLBRdR1;
RC = &AVR::GPR8RegClass;
- HasZeroOperand = true;
break;
case AVR::Rol16:
Opc = AVR::ROLWRd;
@@ -1814,8 +1876,6 @@ MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
auto ShiftMI = BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
if (HasRepeatedOperand)
ShiftMI.addReg(ShiftReg);
- if (HasZeroOperand)
- ShiftMI.addReg(Subtarget.getZeroRegister());
// CheckBB:
// ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
@@ -2296,6 +2356,7 @@ MachineBasicBlock *
AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *MBB) const {
int Opc = MI.getOpcode();
+ const AVRSubtarget &STI = MBB->getParent()->getSubtarget<AVRSubtarget>();
// Pseudo shift instructions with a non constant shift amount are expanded
// into a loop.
@@ -2310,7 +2371,7 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case AVR::Ror16:
case AVR::Asr8:
case AVR::Asr16:
- return insertShift(MI, MBB);
+ return insertShift(MI, MBB, STI.hasTinyEncoding());
case AVR::Lsl32:
case AVR::Lsr32:
case AVR::Asr32:
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h
index 80d94dc188a5..b696bebe7136 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -26,9 +26,9 @@ enum NodeType {
/// Start the numbering where the builtin ops leave off.
FIRST_NUMBER = ISD::BUILTIN_OP_END,
/// Return from subroutine.
- RET_FLAG,
+ RET_GLUE,
/// Return from ISR.
- RETI_FLAG,
+ RETI_GLUE,
/// Represents an abstract call instruction,
/// which includes a bunch of information.
CALL,
@@ -184,7 +184,7 @@ private:
SmallVectorImpl<SDValue> &InVals) const override;
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
@@ -194,7 +194,8 @@ protected:
const AVRSubtarget &Subtarget;
private:
- MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const;
+ MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB,
+ bool Tiny) const;
MachineBasicBlock *insertWideShift(MachineInstr &MI,
MachineBasicBlock *BB) const;
MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const;
diff --git a/llvm/lib/Target/AVR/AVRInstrFormats.td b/llvm/lib/Target/AVR/AVRInstrFormats.td
index 96b48a504376..653c7276ba7f 100644
--- a/llvm/lib/Target/AVR/AVRInstrFormats.td
+++ b/llvm/lib/Target/AVR/AVRInstrFormats.td
@@ -432,6 +432,8 @@ class FBRsk<bit f, bits<3> s, dag outs, dag ins, string asmstr,
let Inst{10} = f;
let Inst{9 - 3} = k;
let Inst{2 - 0} = s;
+
+ let DecoderMethod = "decodeCondBranch";
}
//===----------------------------------------------------------------------===//
@@ -561,6 +563,8 @@ class FSK<bit f, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{3} = k{0};
let Inst{2 - 0} = s;
+
+ let DecoderMethod = "decodeCondBranch";
}
class ExtensionPseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index a1bc865ffb8a..b9d27c78ce8e 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -35,8 +35,9 @@
namespace llvm {
-AVRInstrInfo::AVRInstrInfo()
- : AVRGenInstrInfo(AVR::ADJCALLSTACKDOWN, AVR::ADJCALLSTACKUP), RI() {}
+AVRInstrInfo::AVRInstrInfo(AVRSubtarget &STI)
+ : AVRGenInstrInfo(AVR::ADJCALLSTACKDOWN, AVR::ADJCALLSTACKUP), RI(),
+ STI(STI) {}
void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
@@ -58,16 +59,21 @@ void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
TRI.splitReg(DestReg, DestLo, DestHi);
TRI.splitReg(SrcReg, SrcLo, SrcHi);
+ // Emit the copies.
+ // The original instruction was for a register pair, of which only one
+ // register might have been live. Add 'undef' to satisfy the machine
+ // verifier, when subreg liveness is enabled.
+ // TODO: Eliminate these unnecessary copies.
if (DestLo == SrcHi) {
BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestHi)
- .addReg(SrcHi, getKillRegState(KillSrc));
+ .addReg(SrcHi, getKillRegState(KillSrc) | RegState::Undef);
BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestLo)
- .addReg(SrcLo, getKillRegState(KillSrc));
+ .addReg(SrcLo, getKillRegState(KillSrc) | RegState::Undef);
} else {
BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestLo)
- .addReg(SrcLo, getKillRegState(KillSrc));
+ .addReg(SrcLo, getKillRegState(KillSrc) | RegState::Undef);
BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestHi)
- .addReg(SrcHi, getKillRegState(KillSrc));
+ .addReg(SrcHi, getKillRegState(KillSrc) | RegState::Undef);
}
}
} else {
@@ -564,7 +570,10 @@ void AVRInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
// insertBranch or some hypothetical "insertDirectBranch".
// See lib/CodeGen/RegisterRelaxation.cpp for details.
// We end up here when a jump is too long for a RJMP instruction.
- BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB);
+ if (STI.hasJMPCALL())
+ BuildMI(&MBB, DL, get(AVR::JMPk)).addMBB(&NewDestBB);
+ else
+ report_fatal_error("cannot create long jump without FeatureJMPCALL");
}
} // end of namespace llvm
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.h b/llvm/lib/Target/AVR/AVRInstrInfo.h
index f84837a92e1e..290177f5eec6 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.h
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.h
@@ -23,6 +23,8 @@
namespace llvm {
+class AVRSubtarget;
+
namespace AVRCC {
/// AVR specific condition codes.
@@ -63,7 +65,7 @@ enum TOF {
/// Utilities related to the AVR instruction set.
class AVRInstrInfo : public AVRGenInstrInfo {
public:
- explicit AVRInstrInfo();
+ explicit AVRInstrInfo(AVRSubtarget &STI);
const AVRRegisterInfo &getRegisterInfo() const { return RI; }
const MCInstrDesc &getBrCond(AVRCC::CondCodes CC) const;
@@ -116,6 +118,9 @@ public:
private:
const AVRRegisterInfo RI;
+
+protected:
+ const AVRSubtarget &STI;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
index 05ee94be7926..f93248b4940c 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -32,9 +32,9 @@ def SDT_AVRSelectCC
// AVR Specific Node Definitions
//===----------------------------------------------------------------------===//
-def AVRretflag : SDNode<"AVRISD::RET_FLAG", SDTNone,
+def AVRretglue : SDNode<"AVRISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def AVRretiflag : SDNode<"AVRISD::RETI_FLAG", SDTNone,
+def AVRretiglue : SDNode<"AVRISD::RETI_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def AVRcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AVRCallSeqStart,
@@ -1010,9 +1010,9 @@ let isCall = 1 in {
// Return instructions.
//===----------------------------------------------------------------------===//
let isTerminator = 1, isReturn = 1, isBarrier = 1 in {
- def RET : F16<0b1001010100001000, (outs), (ins), "ret", [(AVRretflag)]>;
+ def RET : F16<0b1001010100001000, (outs), (ins), "ret", [(AVRretglue)]>;
- def RETI : F16<0b1001010100011000, (outs), (ins), "reti", [(AVRretiflag)]>;
+ def RETI : F16<0b1001010100011000, (outs), (ins), "reti", [(AVRretiglue)]>;
}
//===----------------------------------------------------------------------===//
@@ -1690,6 +1690,16 @@ let canFoldAsLoad = 1, isReMaterializable = 1, mayLoad = 1,
: F16<0b1001010111001000, (outs), (ins), "lpm", []>,
Requires<[HasLPM]>;
+ // These pseudo instructions are combination of the OUT and LPM instructions.
+ let Defs = [R0] in {
+ def LPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z), "lpmb\t$dst, $z", []>,
+ Requires<[HasLPM]>;
+
+ let Constraints = "@earlyclobber $dst" in
+ def LPMWRdZ : Pseudo<(outs DREGS:$dst), (ins ZREG:$z), "lpmw\t$dst, $z", []>,
+ Requires<[HasLPM]>;
+ }
+
def LPMRdZ : FLPMX<0, 0,
(outs GPR8
: $rd),
@@ -1708,14 +1718,6 @@ let canFoldAsLoad = 1, isReMaterializable = 1, mayLoad = 1,
"lpm\t$rd, $z+", []>,
Requires<[HasLPMX]>;
- let Constraints = "@earlyclobber $dst" in
- def LPMWRdZ : Pseudo<(outs DREGS
- : $dst),
- (ins ZREG
- : $z),
- "lpmw\t$dst, $z", []>,
- Requires<[HasLPMX]>;
-
def LPMWRdZPi : Pseudo<(outs DREGS
: $dst),
(ins ZREG
@@ -1742,17 +1744,20 @@ let mayLoad = 1, hasSideEffects = 0 in {
Requires<[HasELPMX]>;
}
- // These pseudos are combination of the OUT and ELPM instructions.
- let Defs = [R31R30], hasSideEffects = 1 in {
+ // These pseudo instructions are combination of the OUT and ELPM instructions.
+ let Defs = [R0] in {
def ELPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, LD8:$p),
"elpmb\t$dst, $z, $p", []>,
- Requires<[HasELPMX]>;
+ Requires<[HasELPM]>;
let Constraints = "@earlyclobber $dst" in
def ELPMWRdZ : Pseudo<(outs DREGS:$dst), (ins ZREG:$z, LD8:$p),
"elpmw\t$dst, $z, $p", []>,
- Requires<[HasELPMX]>;
+ Requires<[HasELPM]>;
+ }
+ // These pseudos are combination of the OUT and ELPM instructions.
+ let Defs = [R31R30], hasSideEffects = 1 in {
def ELPMBRdZPi : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, LD8:$p),
"elpmb\t$dst, $z+, $p", []>,
Requires<[HasELPMX]>;
@@ -2023,13 +2028,21 @@ let Constraints = "$src = $rd", Defs = [SREG] in {
def ASRWLoRd : Pseudo<(outs DREGS:$rd), (ins DREGS:$src), "asrwlo\t$rd",
[(set i16:$rd, (AVRasrlo i16:$src)), (implicit SREG)]>;
-
- let hasSideEffects=0 in
- def ROLBRd : Pseudo<(outs GPR8
- : $rd),
- (ins GPR8:$src, GPR8:$zero),
- "rolb\t$rd",
- []>;
+ let Uses = [R1] in
+ def ROLBRdR1 : Pseudo<(outs GPR8:$rd),
+ (ins GPR8:$src),
+ "rolb\t$rd",
+ [(set i8:$rd, (AVRrol i8:$src)),
+ (implicit SREG)]>,
+ Requires<[HasNonTinyEncoding]>;
+
+ let Uses = [R17] in
+ def ROLBRdR17 : Pseudo<(outs GPR8:$rd),
+ (ins GPR8:$src),
+ "rolb\t$rd",
+ [(set i8:$rd, (AVRrol i8:$src)),
+ (implicit SREG)]>,
+ Requires<[HasTinyEncoding]>;
def RORBRd : Pseudo<(outs GPR8
: $rd),
diff --git a/llvm/lib/Target/AVR/AVRShiftExpand.cpp b/llvm/lib/Target/AVR/AVRShiftExpand.cpp
index b7dcd860467d..f549ae62c8b2 100644
--- a/llvm/lib/Target/AVR/AVRShiftExpand.cpp
+++ b/llvm/lib/Target/AVR/AVRShiftExpand.cpp
@@ -7,9 +7,10 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// Expand 32-bit shift instructions (shl, lshr, ashr) to inline loops, just
-/// like avr-gcc. This must be done in IR because otherwise the type legalizer
-/// will turn 32-bit shifts into (non-existing) library calls such as __ashlsi3.
+/// Expand non-8-bit and non-16-bit shift instructions (shl, lshr, ashr) to
+/// inline loops, just like avr-gcc. This must be done in IR because otherwise
+/// the type legalizer will turn 32-bit shifts into (non-existing) library calls
+/// such as __ashlsi3.
//
//===----------------------------------------------------------------------===//
@@ -51,8 +52,9 @@ bool AVRShiftExpand::runOnFunction(Function &F) {
if (!I.isShift())
// Only expand shift instructions (shl, lshr, ashr).
continue;
- if (I.getType() != Type::getInt32Ty(Ctx))
- // Only expand plain i32 types.
+ if (I.getType() == Type::getInt8Ty(Ctx) || I.getType() == Type::getInt16Ty(Ctx))
+ // Only expand non-8-bit and non-16-bit shifts, since those are expanded
+ // directly during isel.
continue;
if (isa<ConstantInt>(I.getOperand(1)))
// Only expand when the shift amount is not known.
@@ -75,7 +77,7 @@ bool AVRShiftExpand::runOnFunction(Function &F) {
void AVRShiftExpand::expand(BinaryOperator *BI) {
auto &Ctx = BI->getContext();
IRBuilder<> Builder(BI);
- Type *Int32Ty = Type::getInt32Ty(Ctx);
+ Type *InputTy = cast<Instruction>(BI)->getType();
Type *Int8Ty = Type::getInt8Ty(Ctx);
Value *Int8Zero = ConstantInt::get(Int8Ty, 0);
@@ -101,7 +103,7 @@ void AVRShiftExpand::expand(BinaryOperator *BI) {
Builder.SetInsertPoint(LoopBB);
PHINode *ShiftAmountPHI = Builder.CreatePHI(Int8Ty, 2);
ShiftAmountPHI->addIncoming(ShiftAmount, BB);
- PHINode *ValuePHI = Builder.CreatePHI(Int32Ty, 2);
+ PHINode *ValuePHI = Builder.CreatePHI(InputTy, 2);
ValuePHI->addIncoming(BI->getOperand(0), BB);
// Subtract the shift amount by one, as we're shifting one this loop
@@ -116,13 +118,13 @@ void AVRShiftExpand::expand(BinaryOperator *BI) {
Value *ValueShifted;
switch (BI->getOpcode()) {
case Instruction::Shl:
- ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(Int32Ty, 1));
+ ValueShifted = Builder.CreateShl(ValuePHI, ConstantInt::get(InputTy, 1));
break;
case Instruction::LShr:
- ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
+ ValueShifted = Builder.CreateLShr(ValuePHI, ConstantInt::get(InputTy, 1));
break;
case Instruction::AShr:
- ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(Int32Ty, 1));
+ ValueShifted = Builder.CreateAShr(ValuePHI, ConstantInt::get(InputTy, 1));
break;
default:
llvm_unreachable("asked to expand an instruction that is not a shift");
@@ -137,7 +139,7 @@ void AVRShiftExpand::expand(BinaryOperator *BI) {
// Collect the resulting value. This is necessary in the IR but won't produce
// any actual instructions.
Builder.SetInsertPoint(BI);
- PHINode *Result = Builder.CreatePHI(Int32Ty, 2);
+ PHINode *Result = Builder.CreatePHI(InputTy, 2);
Result->addIncoming(BI->getOperand(0), BB);
Result->addIncoming(ValueShifted, LoopBB);
diff --git a/llvm/lib/Target/AVR/AVRSubtarget.cpp b/llvm/lib/Target/AVR/AVRSubtarget.cpp
index c4e8d9afd3a9..8051f9d21714 100644
--- a/llvm/lib/Target/AVR/AVRSubtarget.cpp
+++ b/llvm/lib/Target/AVR/AVRSubtarget.cpp
@@ -29,7 +29,7 @@ namespace llvm {
AVRSubtarget::AVRSubtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const AVRTargetMachine &TM)
- : AVRGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
+ : AVRGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), InstrInfo(*this),
TLInfo(TM, initializeSubtargetDependencies(CPU, FS, TM)) {
// Parse features string.
ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS);
diff --git a/llvm/lib/Target/AVR/AVRSubtarget.h b/llvm/lib/Target/AVR/AVRSubtarget.h
index 25046e6cf1ea..5c7c600ebbf1 100644
--- a/llvm/lib/Target/AVR/AVRSubtarget.h
+++ b/llvm/lib/Target/AVR/AVRSubtarget.h
@@ -73,7 +73,6 @@ public:
bool hasLPMX() const { return m_hasLPMX; }
bool hasELPM() const { return m_hasELPM; }
bool hasELPMX() const { return m_hasELPMX; }
- bool hasPROGMEM() const { return m_hasPROGMEM; }
bool hasSPM() const { return m_hasSPM; }
bool hasSPMX() const { return m_hasSPMX; }
bool hasDES() const { return m_hasDES; }
@@ -82,9 +81,12 @@ public:
bool hasBREAK() const { return m_hasBREAK; }
bool hasTinyEncoding() const { return m_hasTinyEncoding; }
bool hasMemMappedGPR() const { return m_hasMemMappedGPR; }
+ bool hasLowByteFirst() const { return m_hasLowByteFirst; }
uint8_t getIORegisterOffset() const { return hasMemMappedGPR() ? 0x20 : 0x0; }
+ bool enableSubRegLiveness() const override { return true; }
+
/// Gets the ELF architecture for the e_flags field
/// of an ELF object file.
unsigned getELFArch() const {
@@ -128,7 +130,6 @@ private:
bool m_hasLPMX = false;
bool m_hasELPM = false;
bool m_hasELPMX = false;
- bool m_hasPROGMEM = false;
bool m_hasSPM = false;
bool m_hasSPMX = false;
bool m_hasDES = false;
@@ -136,6 +137,7 @@ private:
bool m_supportsMultiplication = false;
bool m_hasBREAK = false;
bool m_hasTinyEncoding = false;
+ bool m_hasLowByteFirst = false;
bool m_hasMemMappedGPR = false;
// Dummy member, used by FeatureSet's. We cannot have a SubtargetFeature with
diff --git a/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index b87664eecef0..e0c0514f62c4 100644
--- a/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -14,7 +14,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/TargetRegistry.h"
diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index 8d30b7886040..6c328ffc58a4 100644
--- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -64,7 +64,7 @@ class AVRAsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool ParseDirective(AsmToken DirectiveID) override;
+ ParseStatus parseDirective(AsmToken DirectiveID) override;
OperandMatchResultTy parseMemriOperand(OperandVector &Operands);
@@ -90,7 +90,7 @@ class AVRAsmParser : public MCTargetAsmParser {
uint64_t const &ErrorInfo);
bool missingFeature(SMLoc const &Loc, uint64_t const &ErrorInfo);
- bool parseLiteralValues(unsigned SizeInBytes, SMLoc L);
+ ParseStatus parseLiteralValues(unsigned SizeInBytes, SMLoc L);
public:
AVRAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
@@ -674,19 +674,18 @@ bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info,
return false;
}
-bool AVRAsmParser::ParseDirective(llvm::AsmToken DirectiveID) {
+ParseStatus AVRAsmParser::parseDirective(llvm::AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
- if (IDVal.lower() == ".long") {
- parseLiteralValues(SIZE_LONG, DirectiveID.getLoc());
- } else if (IDVal.lower() == ".word" || IDVal.lower() == ".short") {
- parseLiteralValues(SIZE_WORD, DirectiveID.getLoc());
- } else if (IDVal.lower() == ".byte") {
- parseLiteralValues(1, DirectiveID.getLoc());
- }
- return true;
+ if (IDVal.lower() == ".long")
+ return parseLiteralValues(SIZE_LONG, DirectiveID.getLoc());
+ if (IDVal.lower() == ".word" || IDVal.lower() == ".short")
+ return parseLiteralValues(SIZE_WORD, DirectiveID.getLoc());
+ if (IDVal.lower() == ".byte")
+ return parseLiteralValues(1, DirectiveID.getLoc());
+ return ParseStatus::NoMatch;
}
-bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
+ParseStatus AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
MCAsmParser &Parser = getParser();
AVRMCELFStreamer &AVRStreamer =
static_cast<AVRMCELFStreamer &>(Parser.getStreamer());
@@ -698,7 +697,7 @@ bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
MCSymbol *Symbol = getContext().getOrCreateSymbol(".text");
AVRStreamer.emitValueForModiferKind(Symbol, SizeInBytes, L,
AVRMCExpr::VK_AVR_None);
- return false;
+ return ParseStatus::NoMatch;
}
if (Parser.getTok().getKind() == AsmToken::Identifier &&
@@ -715,7 +714,10 @@ bool AVRAsmParser::parseLiteralValues(unsigned SizeInBytes, SMLoc L) {
MCSymbol *Symbol =
getContext().getOrCreateSymbol(Parser.getTok().getString());
AVRStreamer.emitValueForModiferKind(Symbol, SizeInBytes, L, ModifierKind);
- return false;
+ Lex(); // Eat the symbol name.
+ if (parseToken(AsmToken::RParen))
+ return ParseStatus::Failure;
+ return parseEOL();
}
auto parseOne = [&]() -> bool {
diff --git a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
index 7674d9e354fa..07121ec29fff 100644
--- a/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
+++ b/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -16,6 +16,9 @@
#include "MCTargetDesc/AVRMCTargetDesc.h"
#include "TargetInfo/AVRTargetInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDecoderOps.h"
@@ -126,6 +129,10 @@ static DecodeStatus decodeMemri(MCInst &Inst, unsigned Insn, uint64_t Address,
static DecodeStatus decodeFBRk(MCInst &Inst, unsigned Insn, uint64_t Address,
const MCDisassembler *Decoder);
+static DecodeStatus decodeCondBranch(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+
static DecodeStatus decodeLoadStore(MCInst &Inst, unsigned Insn,
uint64_t Address,
const MCDisassembler *Decoder);
@@ -287,6 +294,40 @@ static DecodeStatus decodeFBRk(MCInst &Inst, unsigned Insn, uint64_t Address,
return MCDisassembler::Success;
}
+static DecodeStatus decodeCondBranch(MCInst &Inst, unsigned Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ // These 8 instructions are not defined as aliases of BRBS/BRBC.
+ DenseMap<unsigned, unsigned> brInsts = {
+ {0x000, AVR::BRLOk}, {0x400, AVR::BRSHk}, {0x001, AVR::BREQk},
+ {0x401, AVR::BRNEk}, {0x002, AVR::BRMIk}, {0x402, AVR::BRPLk},
+ {0x004, AVR::BRLTk}, {0x404, AVR::BRGEk}};
+
+ // Get the relative offset.
+ int16_t Offset = ((int16_t)((Insn & 0x3f8) << 6)) >> 8;
+
+ // Search the instruction pattern.
+ auto NotAlias = [&Insn](const std::pair<unsigned, unsigned> &I) {
+ return (Insn & 0x407) != I.first;
+ };
+ llvm::partition(brInsts, NotAlias);
+ auto It = llvm::partition_point(brInsts, NotAlias);
+
+ // Decode the instruction.
+ if (It != brInsts.end()) {
+ // This instruction is not an alias of BRBC/BRBS.
+ Inst.setOpcode(It->second);
+ Inst.addOperand(MCOperand::createImm(Offset));
+ } else {
+ // Fall back to an ordinary BRBS/BRBC.
+ Inst.setOpcode(Insn & 0x400 ? AVR::BRBCsk : AVR::BRBSsk);
+ Inst.addOperand(MCOperand::createImm(Insn & 7));
+ Inst.addOperand(MCOperand::createImm(Offset));
+ }
+
+ return MCDisassembler::Success;
+}
+
static DecodeStatus decodeLoadStore(MCInst &Inst, unsigned Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -448,7 +489,7 @@ DecodeStatus AVRDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
return MCDisassembler::Fail;
// Try to decode AVRTiny instructions.
- if (STI.getFeatureBits()[AVR::FeatureTinyEncoding]) {
+ if (STI.hasFeature(AVR::FeatureTinyEncoding)) {
Result = decodeInstruction(DecoderTableAVRTiny16, Instr, Insn, Address,
this, STI);
if (Result != MCDisassembler::Fail)
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
index cf87106ec5a3..c94469c8d9f3 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -514,6 +514,12 @@ bool AVRAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
// Fixups which should always be recorded as relocations.
case AVR::fixup_7_pcrel:
case AVR::fixup_13_pcrel:
+ // Do not force relocation for PC relative branch like 'rjmp .',
+ // 'rcall . - off' and 'breq . + off'.
+ if (const auto *SymA = Target.getSymA())
+ if (SymA->getSymbol().getName().size() == 0)
+ return false;
+ [[fallthrough]];
case AVR::fixup_call:
return true;
}
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
index c4cb595f775a..d6a30e4dfa22 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -16,8 +16,8 @@
#include "MCTargetDesc/AVRFixupKinds.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
index ade5df18c3b9..4ac54c8876d7 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRELFStreamer.cpp
@@ -3,8 +3,8 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include "AVRMCTargetDesc.h"
@@ -61,6 +61,7 @@ AVRELFStreamer::AVRELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI)
unsigned EFlags = MCA.getELFHeaderEFlags();
EFlags |= getEFlagsForFeatureSet(STI.getFeatureBits());
+ EFlags |= ELF::EF_AVR_LINKRELAX_PREPARED;
MCA.setELFHeaderEFlags(EFlags);
}
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
index c377721b09ba..66786eb3244e 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp
@@ -12,7 +12,7 @@
#include "AVRMCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
index c8bb410e4882..c08e293d0437 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
@@ -146,7 +146,8 @@ unsigned AVRMCCodeEmitter::encodeMemri(const MCInst &MI, unsigned OpNo,
switch (RegOp.getReg()) {
default:
- llvm_unreachable("Expected either Y or Z register");
+ Ctx.reportError(MI.getLoc(), "Expected either Y or Z register");
+ return 0;
case AVR::R31R30:
RegBit = 0;
break; // Z register
@@ -164,7 +165,7 @@ unsigned AVRMCCodeEmitter::encodeMemri(const MCInst &MI, unsigned OpNo,
Fixups.push_back(MCFixup::create(0, OffsetOp.getExpr(),
MCFixupKind(AVR::fixup_6), MI.getLoc()));
} else {
- llvm_unreachable("invalid value for offset");
+ llvm_unreachable("Invalid value for offset");
}
return (RegBit << 6) | OffsetBits;
@@ -269,18 +270,8 @@ unsigned AVRMCCodeEmitter::getMachineOpValue(const MCInst &MI,
return getExprOpValue(MO.getExpr(), Fixups, STI);
}
-void AVRMCCodeEmitter::emitInstruction(uint64_t Val, unsigned Size,
- const MCSubtargetInfo &STI,
- raw_ostream &OS) const {
- size_t WordCount = Size / 2;
-
- for (int64_t i = WordCount - 1; i >= 0; --i) {
- uint16_t Word = (Val >> (i * 16)) & 0xFFFF;
- support::endian::write(OS, Word, support::endianness::little);
- }
-}
-
-void AVRMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void AVRMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
@@ -291,7 +282,11 @@ void AVRMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
assert(Size > 0 && "Instruction size cannot be zero");
uint64_t BinaryOpCode = getBinaryCodeForInstr(MI, Fixups, STI);
- emitInstruction(BinaryOpCode, Size, STI, OS);
+
+ for (int64_t i = Size / 2 - 1; i >= 0; --i) {
+ uint16_t Word = (BinaryOpCode >> (i * 16)) & 0xFFFF;
+ support::endian::write(CB, Word, support::endianness::little);
+ }
}
MCCodeEmitter *createAVRMCCodeEmitter(const MCInstrInfo &MCII,
diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
index 1bfa79f26b27..a00bbb9ae498 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
@@ -95,10 +95,7 @@ private:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- void emitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI,
- raw_ostream &OS) const;
-
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
index 137e67bd215b..43edcaace322 100644
--- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
+++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp
@@ -47,8 +47,6 @@ class BPFAsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool ParseDirective(AsmToken DirectiveID) override;
-
// "=" is used as assignment operator for assembly statment, so can't be used
// for symbol assignment.
bool equalIsAsmAssignment() override { return false; }
@@ -253,6 +251,14 @@ public:
.Case("ll", true)
.Case("skb", true)
.Case("s", true)
+ .Case("atomic_fetch_add", true)
+ .Case("atomic_fetch_and", true)
+ .Case("atomic_fetch_or", true)
+ .Case("atomic_fetch_xor", true)
+ .Case("xchg_64", true)
+ .Case("xchg32_32", true)
+ .Case("cmpxchg_64", true)
+ .Case("cmpxchg32_32", true)
.Default(false);
}
};
@@ -483,6 +489,11 @@ bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
if (parseRegister(Operands) == MatchOperand_Success)
continue;
+ if (getLexer().is(AsmToken::Comma)) {
+ getLexer().Lex();
+ continue;
+ }
+
// Attempt to parse token as an immediate
if (parseImmediate(Operands) != MatchOperand_Success) {
SMLoc Loc = getLexer().getLoc();
@@ -503,8 +514,6 @@ bool BPFAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return false;
}
-bool BPFAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
-
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFAsmParser() {
RegisterMCAsmParser<BPFAsmParser> X(getTheBPFTarget());
RegisterMCAsmParser<BPFAsmParser> Y(getTheBPFleTarget());
diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index b48c122f48b4..9b7bab785ee9 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -18,12 +18,8 @@ namespace llvm {
class BPFTargetMachine;
class PassRegistry;
-ModulePass *createBPFAdjustOpt();
ModulePass *createBPFCheckAndAdjustIR();
-FunctionPass *createBPFAbstractMemberAccess(BPFTargetMachine *TM);
-FunctionPass *createBPFPreserveDIType();
-FunctionPass *createBPFIRPeephole();
FunctionPass *createBPFISelDag(BPFTargetMachine &TM);
FunctionPass *createBPFMISimplifyPatchablePass();
FunctionPass *createBPFMIPeepholePass();
@@ -31,17 +27,13 @@ FunctionPass *createBPFMIPeepholeTruncElimPass();
FunctionPass *createBPFMIPreEmitPeepholePass();
FunctionPass *createBPFMIPreEmitCheckingPass();
-void initializeBPFAbstractMemberAccessLegacyPassPass(PassRegistry &);
-void initializeBPFAdjustOptPass(PassRegistry&);
void initializeBPFCheckAndAdjustIRPass(PassRegistry&);
void initializeBPFDAGToDAGISelPass(PassRegistry &);
-void initializeBPFIRPeepholePass(PassRegistry &);
void initializeBPFMIPeepholePass(PassRegistry&);
void initializeBPFMIPeepholeTruncElimPass(PassRegistry &);
void initializeBPFMIPreEmitCheckingPass(PassRegistry&);
void initializeBPFMIPreEmitPeepholePass(PassRegistry &);
void initializeBPFMISimplifyPatchablePass(PassRegistry &);
-void initializeBPFPreserveDITypePass(PassRegistry &);
class BPFAbstractMemberAccessPass
: public PassInfoMixin<BPFAbstractMemberAccessPass> {
diff --git a/llvm/lib/Target/BPF/BPF.td b/llvm/lib/Target/BPF/BPF.td
index fad966ff5a13..0cc409dfcee1 100644
--- a/llvm/lib/Target/BPF/BPF.td
+++ b/llvm/lib/Target/BPF/BPF.td
@@ -17,12 +17,6 @@ def BPFInstrInfo : InstrInfo;
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
-def : Proc<"generic", []>;
-def : Proc<"v1", []>;
-def : Proc<"v2", []>;
-def : Proc<"v3", []>;
-def : Proc<"probe", []>;
-
def DummyFeature : SubtargetFeature<"dummy", "isDummyMode",
"true", "unused feature">;
@@ -32,6 +26,12 @@ def ALU32 : SubtargetFeature<"alu32", "HasAlu32", "true",
def DwarfRIS: SubtargetFeature<"dwarfris", "UseDwarfRIS", "true",
"Disable MCAsmInfo DwarfUsesRelocationsAcrossSections">;
+def : Proc<"generic", []>;
+def : Proc<"v1", []>;
+def : Proc<"v2", []>;
+def : Proc<"v3", [ALU32]>;
+def : Proc<"probe", []>;
+
def BPFInstPrinter : AsmWriter {
string AsmWriterClassName = "InstPrinter";
bit isMCAsmWriter = 1;
diff --git a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
index a130a9c3e08e..9c99765b60c0 100644
--- a/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
+++ b/llvm/lib/Target/BPF/BPFAbstractMemberAccess.cpp
@@ -188,35 +188,8 @@ private:
};
std::map<std::string, GlobalVariable *> BPFAbstractMemberAccess::GEPGlobals;
-
-class BPFAbstractMemberAccessLegacyPass final : public FunctionPass {
- BPFTargetMachine *TM;
-
- bool runOnFunction(Function &F) override {
- return BPFAbstractMemberAccess(TM).run(F);
- }
-
-public:
- static char ID;
-
- // Add optional BPFTargetMachine parameter so that BPF backend can add the
- // phase with target machine to find out the endianness. The default
- // constructor (without parameters) is used by the pass manager for managing
- // purposes.
- BPFAbstractMemberAccessLegacyPass(BPFTargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM) {}
-};
-
} // End anonymous namespace
-char BPFAbstractMemberAccessLegacyPass::ID = 0;
-INITIALIZE_PASS(BPFAbstractMemberAccessLegacyPass, DEBUG_TYPE,
- "BPF Abstract Member Access", false, false)
-
-FunctionPass *llvm::createBPFAbstractMemberAccess(BPFTargetMachine *TM) {
- return new BPFAbstractMemberAccessLegacyPass(TM);
-}
-
bool BPFAbstractMemberAccess::run(Function &F) {
LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n");
diff --git a/llvm/lib/Target/BPF/BPFAdjustOpt.cpp b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
index e109235434e9..4ab0cbcc9247 100644
--- a/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
+++ b/llvm/lib/Target/BPF/BPFAdjustOpt.cpp
@@ -40,15 +40,6 @@ static cl::opt<bool> DisableBPFavoidSpeculation(
cl::init(false));
namespace {
-
-class BPFAdjustOpt final : public ModulePass {
-public:
- static char ID;
-
- BPFAdjustOpt() : ModulePass(ID) {}
- bool runOnModule(Module &M) override;
-};
-
class BPFAdjustOptImpl {
struct PassThroughInfo {
Instruction *Input;
@@ -78,14 +69,6 @@ private:
} // End anonymous namespace
-char BPFAdjustOpt::ID = 0;
-INITIALIZE_PASS(BPFAdjustOpt, "bpf-adjust-opt", "BPF Adjust Optimization",
- false, false)
-
-ModulePass *llvm::createBPFAdjustOpt() { return new BPFAdjustOpt(); }
-
-bool BPFAdjustOpt::runOnModule(Module &M) { return BPFAdjustOptImpl(&M).run(); }
-
bool BPFAdjustOptImpl::run() {
bool Changed = adjustICmpToBuiltin();
diff --git a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
index 6b74e56d6b3e..a3616ae7ebab 100644
--- a/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
+++ b/llvm/lib/Target/BPF/BPFCheckAndAdjustIR.cpp
@@ -18,8 +18,10 @@
#include "BPF.h"
#include "BPFCORE.h"
#include "BPFTargetMachine.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
@@ -41,12 +43,14 @@ class BPFCheckAndAdjustIR final : public ModulePass {
public:
static char ID;
BPFCheckAndAdjustIR() : ModulePass(ID) {}
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const override;
private:
void checkIR(Module &M);
bool adjustIR(Module &M);
bool removePassThroughBuiltin(Module &M);
bool removeCompareBuiltin(Module &M);
+ bool sinkMinMax(Module &M);
};
} // End anonymous namespace
@@ -161,9 +165,206 @@ bool BPFCheckAndAdjustIR::removeCompareBuiltin(Module &M) {
return Changed;
}
+struct MinMaxSinkInfo {
+ ICmpInst *ICmp;
+ Value *Other;
+ ICmpInst::Predicate Predicate;
+ CallInst *MinMax;
+ ZExtInst *ZExt;
+ SExtInst *SExt;
+
+ MinMaxSinkInfo(ICmpInst *ICmp, Value *Other, ICmpInst::Predicate Predicate)
+ : ICmp(ICmp), Other(Other), Predicate(Predicate), MinMax(nullptr),
+ ZExt(nullptr), SExt(nullptr) {}
+};
+
+static bool sinkMinMaxInBB(BasicBlock &BB,
+ const std::function<bool(Instruction *)> &Filter) {
+ // Check if V is:
+ // (fn %a %b) or (ext (fn %a %b))
+ // Where:
+ // ext := sext | zext
+ // fn := smin | umin | smax | umax
+ auto IsMinMaxCall = [=](Value *V, MinMaxSinkInfo &Info) {
+ if (auto *ZExt = dyn_cast<ZExtInst>(V)) {
+ V = ZExt->getOperand(0);
+ Info.ZExt = ZExt;
+ } else if (auto *SExt = dyn_cast<SExtInst>(V)) {
+ V = SExt->getOperand(0);
+ Info.SExt = SExt;
+ }
+
+ auto *Call = dyn_cast<CallInst>(V);
+ if (!Call)
+ return false;
+
+ auto *Called = dyn_cast<Function>(Call->getCalledOperand());
+ if (!Called)
+ return false;
+
+ switch (Called->getIntrinsicID()) {
+ case Intrinsic::smin:
+ case Intrinsic::umin:
+ case Intrinsic::smax:
+ case Intrinsic::umax:
+ break;
+ default:
+ return false;
+ }
+
+ if (!Filter(Call))
+ return false;
+
+ Info.MinMax = Call;
+
+ return true;
+ };
+
+ auto ZeroOrSignExtend = [](IRBuilder<> &Builder, Value *V,
+ MinMaxSinkInfo &Info) {
+ if (Info.SExt) {
+ if (Info.SExt->getType() == V->getType())
+ return V;
+ return Builder.CreateSExt(V, Info.SExt->getType());
+ }
+ if (Info.ZExt) {
+ if (Info.ZExt->getType() == V->getType())
+ return V;
+ return Builder.CreateZExt(V, Info.ZExt->getType());
+ }
+ return V;
+ };
+
+ bool Changed = false;
+ SmallVector<MinMaxSinkInfo, 2> SinkList;
+
+ // Check BB for instructions like:
+ // insn := (icmp %a (fn ...)) | (icmp (fn ...) %a)
+ //
+ // Where:
+ // fn := min | max | (sext (min ...)) | (sext (max ...))
+ //
+ // Put such instructions to SinkList.
+ for (Instruction &I : BB) {
+ ICmpInst *ICmp = dyn_cast<ICmpInst>(&I);
+ if (!ICmp)
+ continue;
+ if (!ICmp->isRelational())
+ continue;
+ MinMaxSinkInfo First(ICmp, ICmp->getOperand(1),
+ ICmpInst::getSwappedPredicate(ICmp->getPredicate()));
+ MinMaxSinkInfo Second(ICmp, ICmp->getOperand(0), ICmp->getPredicate());
+ bool FirstMinMax = IsMinMaxCall(ICmp->getOperand(0), First);
+ bool SecondMinMax = IsMinMaxCall(ICmp->getOperand(1), Second);
+ if (!(FirstMinMax ^ SecondMinMax))
+ continue;
+ SinkList.push_back(FirstMinMax ? First : Second);
+ }
+
+ // Iterate SinkList and replace each (icmp ...) with corresponding
+ // `x < a && x < b` or similar expression.
+ for (auto &Info : SinkList) {
+ ICmpInst *ICmp = Info.ICmp;
+ CallInst *MinMax = Info.MinMax;
+ Intrinsic::ID IID = MinMax->getCalledFunction()->getIntrinsicID();
+ ICmpInst::Predicate P = Info.Predicate;
+ if (ICmpInst::isSigned(P) && IID != Intrinsic::smin &&
+ IID != Intrinsic::smax)
+ continue;
+
+ IRBuilder<> Builder(ICmp);
+ Value *X = Info.Other;
+ Value *A = ZeroOrSignExtend(Builder, MinMax->getArgOperand(0), Info);
+ Value *B = ZeroOrSignExtend(Builder, MinMax->getArgOperand(1), Info);
+ bool IsMin = IID == Intrinsic::smin || IID == Intrinsic::umin;
+ bool IsMax = IID == Intrinsic::smax || IID == Intrinsic::umax;
+ bool IsLess = ICmpInst::isLE(P) || ICmpInst::isLT(P);
+ bool IsGreater = ICmpInst::isGE(P) || ICmpInst::isGT(P);
+ assert(IsMin ^ IsMax);
+ assert(IsLess ^ IsGreater);
+
+ Value *Replacement;
+ Value *LHS = Builder.CreateICmp(P, X, A);
+ Value *RHS = Builder.CreateICmp(P, X, B);
+ if ((IsLess && IsMin) || (IsGreater && IsMax))
+ // x < min(a, b) -> x < a && x < b
+ // x > max(a, b) -> x > a && x > b
+ Replacement = Builder.CreateLogicalAnd(LHS, RHS);
+ else
+ // x > min(a, b) -> x > a || x > b
+ // x < max(a, b) -> x < a || x < b
+ Replacement = Builder.CreateLogicalOr(LHS, RHS);
+
+ ICmp->replaceAllUsesWith(Replacement);
+
+ Instruction *ToRemove[] = {ICmp, Info.ZExt, Info.SExt, MinMax};
+ for (Instruction *I : ToRemove)
+ if (I && I->use_empty())
+ I->eraseFromParent();
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+// Do the following transformation:
+//
+// x < min(a, b) -> x < a && x < b
+// x > min(a, b) -> x > a || x > b
+// x < max(a, b) -> x < a || x < b
+// x > max(a, b) -> x > a && x > b
+//
+// Such patterns are introduced by LICM.cpp:hoistMinMax()
+// transformation and might lead to BPF verification failures for
+// older kernels.
+//
+// To minimize "collateral" changes only do it for icmp + min/max
+// calls when icmp is inside a loop and min/max is outside of that
+// loop.
+//
+// Verification failure happens when:
+// - RHS operand of some `icmp LHS, RHS` is replaced by some RHS1;
+// - verifier can recognize RHS as a constant scalar in some context;
+// - verifier can't recognize RHS1 as a constant scalar in the same
+// context;
+//
+// The "constant scalar" is not a compile time constant, but a register
+// that holds a scalar value known to verifier at some point in time
+// during abstract interpretation.
+//
+// See also:
+// https://lore.kernel.org/bpf/20230406164505.1046801-1-yhs@fb.com/
+bool BPFCheckAndAdjustIR::sinkMinMax(Module &M) {
+ bool Changed = false;
+
+ for (Function &F : M) {
+ if (F.isDeclaration())
+ continue;
+
+ LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo();
+ for (Loop *L : LI)
+ for (BasicBlock *BB : L->blocks()) {
+ // Filter out instructions coming from the same loop
+ Loop *BBLoop = LI.getLoopFor(BB);
+ auto OtherLoopFilter = [&](Instruction *I) {
+ return LI.getLoopFor(I->getParent()) != BBLoop;
+ };
+ Changed |= sinkMinMaxInBB(*BB, OtherLoopFilter);
+ }
+ }
+
+ return Changed;
+}
+
+void BPFCheckAndAdjustIR::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<LoopInfoWrapperPass>();
+}
+
bool BPFCheckAndAdjustIR::adjustIR(Module &M) {
bool Changed = removePassThroughBuiltin(M);
Changed = removeCompareBuiltin(M) || Changed;
+ Changed = sinkMinMax(M) || Changed;
return Changed;
}
diff --git a/llvm/lib/Target/BPF/BPFIRPeephole.cpp b/llvm/lib/Target/BPF/BPFIRPeephole.cpp
index 283b3687f7cc..a257f582f2a1 100644
--- a/llvm/lib/Target/BPF/BPFIRPeephole.cpp
+++ b/llvm/lib/Target/BPF/BPFIRPeephole.cpp
@@ -90,23 +90,8 @@ static bool BPFIRPeepholeImpl(Function &F) {
return Changed;
}
-
-class BPFIRPeephole final : public FunctionPass {
- bool runOnFunction(Function &F) override;
-
-public:
- static char ID;
- BPFIRPeephole() : FunctionPass(ID) {}
-};
} // End anonymous namespace
-char BPFIRPeephole::ID = 0;
-INITIALIZE_PASS(BPFIRPeephole, DEBUG_TYPE, "BPF IR Peephole", false, false)
-
-FunctionPass *llvm::createBPFIRPeephole() { return new BPFIRPeephole(); }
-
-bool BPFIRPeephole::runOnFunction(Function &F) { return BPFIRPeepholeImpl(F); }
-
PreservedAnalyses BPFIRPeepholePass::run(Function &F,
FunctionAnalysisManager &AM) {
return BPFIRPeepholeImpl(F) ? PreservedAnalyses::none()
diff --git a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index e830eb20fec0..fa626a775c83 100644
--- a/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -77,7 +77,6 @@ private:
// Node preprocessing cases
void PreprocessLoad(SDNode *Node, SelectionDAG::allnodes_iterator &I);
- void PreprocessCopyToReg(SDNode *Node);
void PreprocessTrunc(SDNode *Node, SelectionDAG::allnodes_iterator &I);
// Find constants from a constant structure
@@ -172,7 +171,7 @@ bool BPFDAGToDAGISel::SelectInlineAsmMemoryOperand(
}
SDLoc DL(Op);
- SDValue AluOp = CurDAG->getTargetConstant(ISD::ADD, DL, MVT::i32);;
+ SDValue AluOp = CurDAG->getTargetConstant(ISD::ADD, DL, MVT::i32);
OutOps.push_back(Op0);
OutOps.push_back(Op1);
OutOps.push_back(AluOp);
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp
index c5666b395899..83a4bfb2f758 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.cpp
+++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp
@@ -325,7 +325,7 @@ SDValue BPFTargetLowering::LowerFormalArguments(
switch (SimpleTy) {
default: {
errs() << "LowerFormalArguments Unhandled argument type: "
- << RegVT.getEVTString() << '\n';
+ << RegVT << '\n';
llvm_unreachable(nullptr);
}
case MVT::i32:
@@ -396,7 +396,7 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCInfo.AnalyzeCallOperands(Outs, getHasAlu32() ? CC_BPF32 : CC_BPF64);
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getStackSize();
if (Outs.size() > MaxArgs)
fail(CLI.DL, DAG, "too many args to ", Callee);
@@ -445,14 +445,14 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
llvm_unreachable("call arg pass bug");
}
- SDValue InFlag;
+ SDValue InGlue;
// Build a sequence of copy-to-reg nodes chained together with token chain and
- // flag operands which copy the outgoing args into registers. The InFlag in
+ // flag operands which copy the outgoing args into registers. The InGlue in
// necessary since all emitted instructions must be stuck together.
for (auto &Reg : RegsToPass) {
- Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InFlag);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InGlue);
+ InGlue = Chain.getValue(1);
}
// If the callee is a GlobalAddress node (quite common, every direct call is)
@@ -479,19 +479,21 @@ SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
for (auto &Reg : RegsToPass)
Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
Chain = DAG.getNode(BPFISD::CALL, CLI.DL, NodeTys, Ops);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
+
+ DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
// Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InFlag, CLI.DL);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, CLI.DL);
+ InGlue = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, CLI.DL, DAG,
+ return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, CLI.DL, DAG,
InVals);
}
@@ -501,7 +503,7 @@ BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SDLoc &DL, SelectionDAG &DAG) const {
- unsigned Opc = BPFISD::RET_FLAG;
+ unsigned Opc = BPFISD::RET_GLUE;
// CCValAssign - represent the assignment of the return value to a location
SmallVector<CCValAssign, 16> RVLocs;
@@ -518,7 +520,7 @@ BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Analize return values.
CCInfo.AnalyzeReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
@@ -526,25 +528,25 @@ BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Flag);
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue);
// Guarantee that all emitted copies are stuck together,
// avoiding something bad.
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
RetOps[0] = Chain; // Update chain.
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ // Add the glue if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
return DAG.getNode(Opc, DL, MVT::Other, RetOps);
}
SDValue BPFTargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg,
+ SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
@@ -557,7 +559,7 @@ SDValue BPFTargetLowering::LowerCallResult(
fail(DL, DAG, "only small returns supported");
for (unsigned i = 0, e = Ins.size(); i != e; ++i)
InVals.push_back(DAG.getConstant(0, DL, Ins[i].VT));
- return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InFlag).getValue(1);
+ return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InGlue).getValue(1);
}
CCInfo.AnalyzeCallResult(Ins, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
@@ -565,8 +567,8 @@ SDValue BPFTargetLowering::LowerCallResult(
// Copy all of the result registers out of their specified physreg.
for (auto &Val : RVLocs) {
Chain = DAG.getCopyFromReg(Chain, DL, Val.getLocReg(),
- Val.getValVT(), InFlag).getValue(1);
- InFlag = Chain.getValue(2);
+ Val.getValVT(), InGlue).getValue(1);
+ InGlue = Chain.getValue(2);
InVals.push_back(Chain.getValue(0));
}
@@ -624,8 +626,8 @@ const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((BPFISD::NodeType)Opcode) {
case BPFISD::FIRST_NUMBER:
break;
- case BPFISD::RET_FLAG:
- return "BPFISD::RET_FLAG";
+ case BPFISD::RET_GLUE:
+ return "BPFISD::RET_GLUE";
case BPFISD::CALL:
return "BPFISD::CALL";
case BPFISD::SELECT_CC:
diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h
index dcc53019db75..9b6fe8531443 100644
--- a/llvm/lib/Target/BPF/BPFISelLowering.h
+++ b/llvm/lib/Target/BPF/BPFISelLowering.h
@@ -23,7 +23,7 @@ class BPFSubtarget;
namespace BPFISD {
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- RET_FLAG,
+ RET_GLUE,
CALL,
SELECT_CC,
BR_CC,
@@ -77,7 +77,7 @@ private:
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
// Lower the result values of a call, copying them out of physregs into vregs
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &DL, SelectionDAG &DAG,
diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td
index 6cac478561b2..27bd87667b84 100644
--- a/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -35,7 +35,7 @@ def SDT_BPFMEMCPY : SDTypeProfile<0, 4, [SDTCisVT<0, i64>,
def BPFcall : SDNode<"BPFISD::CALL", SDT_BPFCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def BPFretflag : SDNode<"BPFISD::RET_FLAG", SDTNone,
+def BPFretglue : SDNode<"BPFISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def BPFcallseq_start: SDNode<"ISD::CALLSEQ_START", SDT_BPFCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
@@ -536,7 +536,7 @@ class RET<string OpcodeStr>
(outs),
(ins),
!strconcat(OpcodeStr, ""),
- [(BPFretflag)]> {
+ [(BPFretglue)]> {
let Inst{31-0} = 0;
let BPFClass = BPF_JMP;
}
diff --git a/llvm/lib/Target/BPF/BPFMIChecking.cpp b/llvm/lib/Target/BPF/BPFMIChecking.cpp
index b462f1d1427d..89ac485b1675 100644
--- a/llvm/lib/Target/BPF/BPFMIChecking.cpp
+++ b/llvm/lib/Target/BPF/BPFMIChecking.cpp
@@ -145,8 +145,8 @@ static bool hasLiveDefs(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
// Otherwise, return true if any aliased SuperReg of GPR32 is not dead.
for (auto I : GPR32LiveDefs)
- for (MCSuperRegIterator SR(I, TRI); SR.isValid(); ++SR)
- if (!llvm::is_contained(GPR64DeadDefs, *SR))
+ for (MCPhysReg SR : TRI->superregs(I))
+ if (!llvm::is_contained(GPR64DeadDefs, SR))
return true;
return false;
diff --git a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
index 58d18e66a6aa..ec770eecb2e5 100644
--- a/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
+++ b/llvm/lib/Target/BPF/BPFPreserveDIType.cpp
@@ -125,28 +125,8 @@ static bool BPFPreserveDITypeImpl(Function &F) {
return true;
}
-
-class BPFPreserveDIType final : public FunctionPass {
- bool runOnFunction(Function &F) override;
-
-public:
- static char ID;
- BPFPreserveDIType() : FunctionPass(ID) {}
-};
} // End anonymous namespace
-char BPFPreserveDIType::ID = 0;
-INITIALIZE_PASS(BPFPreserveDIType, DEBUG_TYPE, "BPF Preserve Debuginfo Type",
- false, false)
-
-FunctionPass *llvm::createBPFPreserveDIType() {
- return new BPFPreserveDIType();
-}
-
-bool BPFPreserveDIType::runOnFunction(Function &F) {
- return BPFPreserveDITypeImpl(F);
-}
-
PreservedAnalyses BPFPreserveDITypePass::run(Function &F,
FunctionAnalysisManager &AM) {
return BPFPreserveDITypeImpl(F) ? PreservedAnalyses::none()
diff --git a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
index 9bd39fd285a0..8761e4aa258c 100644
--- a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -20,12 +20,18 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#define GET_REGINFO_TARGET_DESC
#include "BPFGenRegisterInfo.inc"
using namespace llvm;
+static cl::opt<int>
+ BPFStackSizeOption("bpf-stack-size",
+ cl::desc("Specify the BPF stack size limit"),
+ cl::init(512));
+
BPFRegisterInfo::BPFRegisterInfo()
: BPFGenRegisterInfo(BPF::R0) {}
@@ -43,13 +49,16 @@ BitVector BPFRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
static void WarnSize(int Offset, MachineFunction &MF, DebugLoc& DL)
{
- if (Offset <= -512) {
- const Function &F = MF.getFunction();
- DiagnosticInfoUnsupported DiagStackSize(F,
- "Looks like the BPF stack limit of 512 bytes is exceeded. "
- "Please move large on stack variables into BPF per-cpu array map.\n",
- DL);
- F.getContext().diagnose(DiagStackSize);
+ if (Offset <= -BPFStackSizeOption) {
+ const Function &F = MF.getFunction();
+ DiagnosticInfoUnsupported DiagStackSize(
+ F,
+ "Looks like the BPF stack limit is exceeded. "
+ "Please move large on stack variables into BPF per-cpu array map. For "
+ "non-kernel uses, the stack can be increased using -mllvm "
+ "-bpf-stack-size.\n",
+ DL);
+ F.getContext().diagnose(DiagStackSize);
}
}
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp
index e4d98b85e58b..d66933fef72d 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.cpp
+++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp
@@ -13,7 +13,7 @@
#include "BPFSubtarget.h"
#include "BPF.h"
#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Support/Host.h"
+#include "llvm/TargetParser/Host.h"
using namespace llvm;
diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h
index 7649e0e92222..8f833b3c75d0 100644
--- a/llvm/lib/Target/BPF/BPFSubtarget.h
+++ b/llvm/lib/Target/BPF/BPFSubtarget.h
@@ -38,7 +38,6 @@ class BPFSubtarget : public BPFGenSubtargetInfo {
private:
void initializeEnvironment();
void initSubtargetFeatures(StringRef CPU, StringRef FS);
- bool probeJmpExt();
protected:
// unused
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 320acdbc769f..c47e8274b2e2 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -18,7 +18,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
@@ -41,10 +40,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeBPFTarget() {
RegisterTargetMachine<BPFTargetMachine> Z(getTheBPFTarget());
PassRegistry &PR = *PassRegistry::getPassRegistry();
- initializeBPFAbstractMemberAccessLegacyPassPass(PR);
- initializeBPFPreserveDITypePass(PR);
- initializeBPFIRPeepholePass(PR);
- initializeBPFAdjustOptPass(PR);
initializeBPFCheckAndAdjustIRPass(PR);
initializeBPFMIPeepholePass(PR);
initializeBPFMIPeepholeTruncElimPass(PR);
@@ -104,6 +99,15 @@ TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
}
void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
+ PB.registerPipelineParsingCallback(
+ [](StringRef PassName, FunctionPassManager &FPM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+ if (PassName == "bpf-ir-peephole") {
+ FPM.addPass(BPFIRPeepholePass());
+ return true;
+ }
+ return false;
+ });
PB.registerPipelineStartEPCallback(
[=](ModulePassManager &MPM, OptimizationLevel) {
FunctionPassManager FPM;
diff --git a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
index ba1cb5699e79..5aa9ec283406 100644
--- a/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
+++ b/llvm/lib/Target/BPF/BPFTargetTransformInfo.h
@@ -77,6 +77,10 @@ public:
return Options;
}
+ unsigned getMaxNumArgs() const {
+ return 5;
+ }
+
};
} // end namespace llvm
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index 9a873413db87..485ba88a4654 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -30,7 +30,7 @@ using namespace llvm;
static const char *BTFKindStr[] = {
#define HANDLE_BTF_KIND(ID, NAME) "BTF_KIND_" #NAME,
-#include "BTF.def"
+#include "llvm/DebugInfo/BTF/BTF.def"
};
/// Emit a BTF common type.
@@ -782,6 +782,17 @@ void BTFDebug::visitCompositeType(const DICompositeType *CTy,
visitEnumType(CTy, TypeId);
}
+bool BTFDebug::IsForwardDeclCandidate(const DIType *Base) {
+ if (const auto *CTy = dyn_cast<DICompositeType>(Base)) {
+ auto CTag = CTy->getTag();
+ if ((CTag == dwarf::DW_TAG_structure_type ||
+ CTag == dwarf::DW_TAG_union_type) &&
+ !CTy->getName().empty() && !CTy->isForwardDecl())
+ return true;
+ }
+ return false;
+}
+
/// Handle pointer, typedef, const, volatile, restrict and member types.
void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
bool CheckPointer, bool SeenPointer) {
@@ -796,20 +807,15 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
if (CheckPointer && SeenPointer) {
const DIType *Base = DTy->getBaseType();
if (Base) {
- if (const auto *CTy = dyn_cast<DICompositeType>(Base)) {
- auto CTag = CTy->getTag();
- if ((CTag == dwarf::DW_TAG_structure_type ||
- CTag == dwarf::DW_TAG_union_type) &&
- !CTy->getName().empty() && !CTy->isForwardDecl()) {
- /// Find a candidate, generate a fixup. Later on the struct/union
- /// pointee type will be replaced with either a real type or
- /// a forward declaration.
- auto TypeEntry = std::make_unique<BTFTypeDerived>(DTy, Tag, true);
- auto &Fixup = FixupDerivedTypes[CTy];
- Fixup.push_back(std::make_pair(DTy, TypeEntry.get()));
- TypeId = addType(std::move(TypeEntry), DTy);
- return;
- }
+ if (IsForwardDeclCandidate(Base)) {
+ /// Find a candidate, generate a fixup. Later on the struct/union
+ /// pointee type will be replaced with either a real type or
+ /// a forward declaration.
+ auto TypeEntry = std::make_unique<BTFTypeDerived>(DTy, Tag, true);
+ auto &Fixup = FixupDerivedTypes[cast<DICompositeType>(Base)];
+ Fixup.push_back(std::make_pair(DTy, TypeEntry.get()));
+ TypeId = addType(std::move(TypeEntry), DTy);
+ return;
}
}
}
@@ -844,6 +850,13 @@ void BTFDebug::visitDerivedType(const DIDerivedType *DTy, uint32_t &TypeId,
visitTypeEntry(DTy->getBaseType(), TempTypeId, CheckPointer, SeenPointer);
}
+/// Visit a type entry. CheckPointer is true if the type has
+/// one of its predecessors as one struct/union member. SeenPointer
+/// is true if CheckPointer is true and one of its predecessors
+/// is a pointer. The goal of CheckPointer and SeenPointer is to
+/// do pruning for struct/union types so some of these types
+/// will not be emitted in BTF and rather forward declarations
+/// will be generated.
void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId,
bool CheckPointer, bool SeenPointer) {
if (!Ty || DIToIdMap.find(Ty) != DIToIdMap.end()) {
@@ -888,6 +901,11 @@ void BTFDebug::visitTypeEntry(const DIType *Ty, uint32_t &TypeId,
if (DIToIdMap.find(BaseTy) != DIToIdMap.end()) {
DTy = dyn_cast<DIDerivedType>(BaseTy);
} else {
+ if (CheckPointer && DTy->getTag() == dwarf::DW_TAG_pointer_type) {
+ SeenPointer = true;
+ if (IsForwardDeclCandidate(BaseTy))
+ break;
+ }
uint32_t TmpTypeId;
visitTypeEntry(BaseTy, TmpTypeId, CheckPointer, SeenPointer);
break;
@@ -964,7 +982,7 @@ std::string BTFDebug::populateFileContent(const DISubprogram *SP) {
FileName = std::string(File->getFilename());
// No need to populate the contends if it has been populated!
- if (FileContent.find(FileName) != FileContent.end())
+ if (FileContent.contains(FileName))
return FileName;
std::vector<std::string> Content;
diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h
index aa982babd458..7536006ed21c 100644
--- a/llvm/lib/Target/BPF/BTFDebug.h
+++ b/llvm/lib/Target/BPF/BTFDebug.h
@@ -16,11 +16,11 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/DebugHandlerBase.h"
+#include "llvm/DebugInfo/BTF/BTF.h"
#include <cstdint>
#include <map>
#include <set>
#include <unordered_map>
-#include "BTF.h"
namespace llvm {
@@ -338,6 +338,9 @@ class BTFDebug : public DebugHandlerBase {
void visitMapDefType(const DIType *Ty, uint32_t &TypeId);
/// @}
+ /// Check whether the type is a forward declaration candidate or not.
+ bool IsForwardDeclCandidate(const DIType *Base);
+
/// Get the file content for the subprogram. Certain lines of the file
/// later may be put into string table and referenced by line info.
std::string populateFileContent(const DISubprogram *SP);
diff --git a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
index aa408f8b65f7..2565d8a0d763 100644
--- a/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
+++ b/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -18,9 +18,9 @@
#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <cstdint>
using namespace llvm;
@@ -179,7 +179,7 @@ DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
if ((InstClass == BPF_LDX || InstClass == BPF_STX) &&
getInstSize(Insn) != BPF_DW &&
(InstMode == BPF_MEM || InstMode == BPF_ATOMIC) &&
- STI.getFeatureBits()[BPF::ALU32])
+ STI.hasFeature(BPF::ALU32))
Result = decodeInstruction(DecoderTableBPFALU3264, Instr, Insn, Address,
this, STI);
else
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 14f6b367b8c7..7b2168458c93 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -13,8 +13,8 @@
#ifndef LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCASMINFO_H
#define LLVM_LIB_TARGET_BPF_MCTARGETDESC_BPFMCASMINFO_H
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index cb321906db03..4bc74b54a11d 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -58,7 +58,7 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
};
@@ -107,20 +107,22 @@ static uint8_t SwapBits(uint8_t Val)
return (Val & 0x0F) << 4 | (Val & 0xF0) >> 4;
}
-void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
unsigned Opcode = MI.getOpcode();
+ raw_svector_ostream OS(CB);
support::endian::Writer OSE(OS,
IsLittleEndian ? support::little : support::big);
if (Opcode == BPF::LD_imm64 || Opcode == BPF::LD_pseudo) {
uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);
- OS << char(Value >> 56);
+ CB.push_back(Value >> 56);
if (IsLittleEndian)
- OS << char((Value >> 48) & 0xff);
+ CB.push_back((Value >> 48) & 0xff);
else
- OS << char(SwapBits((Value >> 48) & 0xff));
+ CB.push_back(SwapBits((Value >> 48) & 0xff));
OSE.write<uint16_t>(0);
OSE.write<uint32_t>(Value & 0xffffFFFF);
@@ -133,11 +135,11 @@ void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
} else {
// Get instruction encoding and emit it
uint64_t Value = getBinaryCodeForInstr(MI, Fixups, STI);
- OS << char(Value >> 56);
+ CB.push_back(Value >> 56);
if (IsLittleEndian)
- OS << char((Value >> 48) & 0xff);
+ CB.push_back(char((Value >> 48) & 0xff));
else
- OS << char(SwapBits((Value >> 48) & 0xff));
+ CB.push_back(SwapBits((Value >> 48) & 0xff));
OSE.write<uint16_t>((Value >> 32) & 0xffff);
OSE.write<uint32_t>(Value & 0xffffFFFF);
}
diff --git a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 77db5f99225e..e687650ab886 100644
--- a/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -19,7 +19,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Support/Host.h"
+#include "llvm/TargetParser/Host.h"
#define GET_INSTRINFO_MC_DESC
#define ENABLE_INSTR_PREDICATE_VERIFIER
diff --git a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
index 94ef40e658a3..19f33f38cbfd 100644
--- a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
+++ b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
@@ -13,6 +13,7 @@
#include "TargetInfo/CSKYTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/Register.h"
@@ -29,10 +30,10 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/CSKYAttributes.h"
-#include "llvm/Support/CSKYTargetParser.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/TargetParser/CSKYTargetParser.h"
using namespace llvm;
@@ -61,7 +62,8 @@ class CSKYAsmParser : public MCTargetAsmParser {
unsigned Kind) override;
bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo,
- int64_t Lower, int64_t Upper, Twine Msg);
+ int64_t Lower, int64_t Upper,
+ const Twine &Msg);
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
@@ -76,7 +78,7 @@ class CSKYAsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool ParseDirective(AsmToken DirectiveID) override;
+ ParseStatus parseDirective(AsmToken DirectiveID) override;
// Helper to actually emit an instruction to the MCStreamer. Also, when
// possible, compression of the instruction is performed.
@@ -102,15 +104,15 @@ class CSKYAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "CSKYGenAsmMatcher.inc"
- OperandMatchResultTy parseImmediate(OperandVector &Operands);
- OperandMatchResultTy parseRegister(OperandVector &Operands);
- OperandMatchResultTy parseBaseRegImm(OperandVector &Operands);
- OperandMatchResultTy parseCSKYSymbol(OperandVector &Operands);
- OperandMatchResultTy parseConstpoolSymbol(OperandVector &Operands);
- OperandMatchResultTy parseDataSymbol(OperandVector &Operands);
- OperandMatchResultTy parsePSRFlag(OperandVector &Operands);
- OperandMatchResultTy parseRegSeq(OperandVector &Operands);
- OperandMatchResultTy parseRegList(OperandVector &Operands);
+ ParseStatus parseImmediate(OperandVector &Operands);
+ ParseStatus parseRegister(OperandVector &Operands);
+ ParseStatus parseBaseRegImm(OperandVector &Operands);
+ ParseStatus parseCSKYSymbol(OperandVector &Operands);
+ ParseStatus parseConstpoolSymbol(OperandVector &Operands);
+ ParseStatus parseDataSymbol(OperandVector &Operands);
+ ParseStatus parsePSRFlag(OperandVector &Operands);
+ ParseStatus parseRegSeq(OperandVector &Operands);
+ ParseStatus parseRegList(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
@@ -650,7 +652,7 @@ static std::string CSKYMnemonicSpellCheck(StringRef S, const FeatureBitset &FBS,
bool CSKYAsmParser::generateImmOutOfRangeError(
OperandVector &Operands, uint64_t ErrorInfo, int64_t Lower, int64_t Upper,
- Twine Msg = "immediate must be an integer in the range") {
+ const Twine &Msg = "immediate must be an integer in the range") {
SMLoc ErrorLoc = ((CSKYOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]");
}
@@ -835,7 +837,7 @@ bool CSKYAsmParser::processLRW(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out) {
if (isUInt<8>(Inst.getOperand(1).getImm()) &&
Inst.getOperand(0).getReg() <= CSKY::R7) {
Opcode = CSKY::MOVI16;
- } else if (getSTI().getFeatureBits()[CSKY::HasE2] &&
+ } else if (getSTI().hasFeature(CSKY::HasE2) &&
isUInt<16>(Inst.getOperand(1).getImm())) {
Opcode = CSKY::MOVI32;
} else {
@@ -1021,93 +1023,84 @@ bool CSKYAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
return false;
}
- return MatchOperand_NoMatch;
+ return true;
}
-OperandMatchResultTy CSKYAsmParser::parseRegister(OperandVector &Operands) {
+ParseStatus CSKYAsmParser::parseRegister(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
switch (getLexer().getKind()) {
default:
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::Identifier: {
StringRef Name = getLexer().getTok().getIdentifier();
MCRegister RegNo;
if (matchRegisterNameHelper(getSTI(), (MCRegister &)RegNo, Name))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
getLexer().Lex();
Operands.push_back(CSKYOperand::createReg(RegNo, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
}
}
-OperandMatchResultTy CSKYAsmParser::parseBaseRegImm(OperandVector &Operands) {
+ParseStatus CSKYAsmParser::parseBaseRegImm(OperandVector &Operands) {
assert(getLexer().is(AsmToken::LParen));
Operands.push_back(CSKYOperand::createToken("(", getLoc()));
auto Tok = getParser().Lex(); // Eat '('
- if (parseRegister(Operands) != MatchOperand_Success) {
+ if (!parseRegister(Operands).isSuccess()) {
getLexer().UnLex(Tok);
Operands.pop_back();
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
if (getLexer().is(AsmToken::RParen)) {
Operands.push_back(CSKYOperand::createToken(")", getLoc()));
getParser().Lex(); // Eat ')'
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- if (getLexer().isNot(AsmToken::Comma)) {
- Error(getLoc(), "expected ','");
- return MatchOperand_ParseFail;
- }
+ if (getLexer().isNot(AsmToken::Comma))
+ return Error(getLoc(), "expected ','");
getParser().Lex(); // Eat ','
- if (parseRegister(Operands) == MatchOperand_Success) {
- if (getLexer().isNot(AsmToken::LessLess)) {
- Error(getLoc(), "expected '<<'");
- return MatchOperand_ParseFail;
- }
+ if (parseRegister(Operands).isSuccess()) {
+ if (getLexer().isNot(AsmToken::LessLess))
+ return Error(getLoc(), "expected '<<'");
Operands.push_back(CSKYOperand::createToken("<<", getLoc()));
getParser().Lex(); // Eat '<<'
- if (parseImmediate(Operands) != MatchOperand_Success) {
- Error(getLoc(), "expected imm");
- return MatchOperand_ParseFail;
- }
+ if (!parseImmediate(Operands).isSuccess())
+ return Error(getLoc(), "expected imm");
- } else if (parseImmediate(Operands) != MatchOperand_Success) {
- Error(getLoc(), "expected imm");
- return MatchOperand_ParseFail;
+ } else if (!parseImmediate(Operands).isSuccess()) {
+ return Error(getLoc(), "expected imm");
}
- if (getLexer().isNot(AsmToken::RParen)) {
- Error(getLoc(), "expected ')'");
- return MatchOperand_ParseFail;
- }
+ if (getLexer().isNot(AsmToken::RParen))
+ return Error(getLoc(), "expected ')'");
Operands.push_back(CSKYOperand::createToken(")", getLoc()));
getParser().Lex(); // Eat ')'
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy CSKYAsmParser::parseImmediate(OperandVector &Operands) {
+ParseStatus CSKYAsmParser::parseImmediate(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -1118,14 +1111,12 @@ OperandMatchResultTy CSKYAsmParser::parseImmediate(OperandVector &Operands) {
const MCExpr *IdVal;
SMLoc S = getLoc();
- if (getParser().parseExpression(IdVal)) {
- Error(getLoc(), "unknown expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(IdVal))
+ return Error(getLoc(), "unknown expression");
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
Operands.push_back(CSKYOperand::createImm(IdVal, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// Looks at a token type and creates the relevant operand from this
@@ -1134,33 +1125,33 @@ OperandMatchResultTy CSKYAsmParser::parseImmediate(OperandVector &Operands) {
bool CSKYAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
- OperandMatchResultTy Result =
+ ParseStatus Result =
MatchOperandParserImpl(Operands, Mnemonic, /*ParseForAllFeatures=*/true);
- if (Result == MatchOperand_Success)
+ if (Result.isSuccess())
return false;
- if (Result == MatchOperand_ParseFail)
+ if (Result.isFailure())
return true;
// Attempt to parse token as register
auto Res = parseRegister(Operands);
- if (Res == MatchOperand_Success)
+ if (Res.isSuccess())
return false;
- else if (Res == MatchOperand_ParseFail)
+ if (Res.isFailure())
return true;
// Attempt to parse token as (register, imm)
if (getLexer().is(AsmToken::LParen)) {
Res = parseBaseRegImm(Operands);
- if (Res == MatchOperand_Success)
+ if (Res.isSuccess())
return false;
- else if (Res == MatchOperand_ParseFail)
+ if (Res.isFailure())
return true;
}
Res = parseImmediate(Operands);
- if (Res == MatchOperand_Success)
+ if (Res.isSuccess())
return false;
- else if (Res == MatchOperand_ParseFail)
+ if (Res.isFailure())
return true;
// Finally we have exhausted all options and must declare defeat.
@@ -1168,21 +1159,19 @@ bool CSKYAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
return true;
}
-OperandMatchResultTy CSKYAsmParser::parseCSKYSymbol(OperandVector &Operands) {
+ParseStatus CSKYAsmParser::parseCSKYSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
const MCExpr *Res;
if (getLexer().getKind() != AsmToken::Identifier)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
StringRef Identifier;
AsmToken Tok = getLexer().getTok();
- if (getParser().parseIdentifier(Identifier)) {
- Error(getLoc(), "unknown identifier");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseIdentifier(Identifier))
+ return Error(getLoc(), "unknown identifier");
CSKYMCExpr::VariantKind Kind = CSKYMCExpr::VK_CSKY_None;
if (Identifier.consume_back("@GOT"))
@@ -1213,8 +1202,7 @@ OperandMatchResultTy CSKYAsmParser::parseCSKYSymbol(OperandVector &Operands) {
const MCExpr *V = Sym->getVariableValue(/*SetUsed=*/false);
if (!isa<MCSymbolRefExpr>(V)) {
getLexer().UnLex(Tok); // Put back if it's not a bare symbol.
- Error(getLoc(), "unknown symbol");
- return MatchOperand_ParseFail;
+ return Error(getLoc(), "unknown symbol");
}
Res = V;
} else
@@ -1227,7 +1215,7 @@ OperandMatchResultTy CSKYAsmParser::parseCSKYSymbol(OperandVector &Operands) {
Res = CSKYMCExpr::create(Res, Kind, getContext());
Operands.push_back(CSKYOperand::createImm(Res, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
case AsmToken::Plus:
Opcode = MCBinaryExpr::Add;
break;
@@ -1239,50 +1227,37 @@ OperandMatchResultTy CSKYAsmParser::parseCSKYSymbol(OperandVector &Operands) {
getLexer().Lex(); // eat + or -
const MCExpr *Expr;
- if (getParser().parseExpression(Expr)) {
- Error(getLoc(), "unknown expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(Expr))
+ return Error(getLoc(), "unknown expression");
Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
Operands.push_back(CSKYOperand::createImm(Res, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy CSKYAsmParser::parseDataSymbol(OperandVector &Operands) {
+ParseStatus CSKYAsmParser::parseDataSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
const MCExpr *Res;
- if (getLexer().getKind() != AsmToken::LBrac)
- return MatchOperand_NoMatch;
-
- getLexer().Lex(); // Eat '['.
-
+ if (!parseOptionalToken(AsmToken::LBrac))
+ return ParseStatus::NoMatch;
if (getLexer().getKind() != AsmToken::Identifier) {
const MCExpr *Expr;
- if (getParser().parseExpression(Expr)) {
- Error(getLoc(), "unknown expression");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(Expr))
+ return Error(getLoc(), "unknown expression");
- if (getLexer().getKind() != AsmToken::RBrac) {
- Error(getLoc(), "expected ]");
- return MatchOperand_ParseFail;
- }
-
- getLexer().Lex(); // Eat ']'.
+ if (parseToken(AsmToken::RBrac, "expected ']'"))
+ return ParseStatus::Failure;
Operands.push_back(CSKYOperand::createConstpoolOp(Expr, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
AsmToken Tok = getLexer().getTok();
StringRef Identifier;
- if (getParser().parseIdentifier(Identifier)) {
- Error(getLoc(), "unknown identifier " + Identifier);
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseIdentifier(Identifier))
+ return Error(getLoc(), "unknown identifier " + Identifier);
CSKYMCExpr::VariantKind Kind = CSKYMCExpr::VK_CSKY_None;
if (Identifier.consume_back("@GOT"))
@@ -1299,8 +1274,7 @@ OperandMatchResultTy CSKYAsmParser::parseDataSymbol(OperandVector &Operands) {
const MCExpr *V = Sym->getVariableValue(/*SetUsed=*/false);
if (!isa<MCSymbolRefExpr>(V)) {
getLexer().UnLex(Tok); // Put back if it's not a bare symbol.
- Error(getLoc(), "unknown symbol");
- return MatchOperand_ParseFail;
+ return Error(getLoc(), "unknown symbol");
}
Res = V;
} else {
@@ -1310,8 +1284,7 @@ OperandMatchResultTy CSKYAsmParser::parseDataSymbol(OperandVector &Operands) {
MCBinaryExpr::Opcode Opcode;
switch (getLexer().getKind()) {
default:
- Error(getLoc(), "unknown symbol");
- return MatchOperand_ParseFail;
+ return Error(getLoc(), "unknown symbol");
case AsmToken::RBrac:
getLexer().Lex(); // Eat ']'.
@@ -1320,7 +1293,7 @@ OperandMatchResultTy CSKYAsmParser::parseDataSymbol(OperandVector &Operands) {
Res = CSKYMCExpr::create(Res, Kind, getContext());
Operands.push_back(CSKYOperand::createConstpoolOp(Res, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
case AsmToken::Plus:
Opcode = MCBinaryExpr::Add;
break;
@@ -1332,59 +1305,40 @@ OperandMatchResultTy CSKYAsmParser::parseDataSymbol(OperandVector &Operands) {
getLexer().Lex(); // eat + or -
const MCExpr *Expr;
- if (getParser().parseExpression(Expr)) {
- Error(getLoc(), "unknown expression");
- return MatchOperand_ParseFail;
- }
-
- if (getLexer().getKind() != AsmToken::RBrac) {
- Error(getLoc(), "expected ']'");
- return MatchOperand_ParseFail;
- }
-
- getLexer().Lex(); // Eat ']'.
+ if (getParser().parseExpression(Expr))
+ return Error(getLoc(), "unknown expression");
+ if (parseToken(AsmToken::RBrac, "expected ']'"))
+ return ParseStatus::Failure;
Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
Operands.push_back(CSKYOperand::createConstpoolOp(Res, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-CSKYAsmParser::parseConstpoolSymbol(OperandVector &Operands) {
+ParseStatus CSKYAsmParser::parseConstpoolSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
const MCExpr *Res;
- if (getLexer().getKind() != AsmToken::LBrac)
- return MatchOperand_NoMatch;
-
- getLexer().Lex(); // Eat '['.
+ if (!parseOptionalToken(AsmToken::LBrac))
+ return ParseStatus::NoMatch;
if (getLexer().getKind() != AsmToken::Identifier) {
const MCExpr *Expr;
- if (getParser().parseExpression(Expr)) {
- Error(getLoc(), "unknown expression");
- return MatchOperand_ParseFail;
- }
-
- if (getLexer().getKind() != AsmToken::RBrac) {
- Error(getLoc(), "expected ']'");
- return MatchOperand_ParseFail;
- }
-
- getLexer().Lex(); // Eat ']'.
+ if (getParser().parseExpression(Expr))
+ return Error(getLoc(), "unknown expression");
+ if (parseToken(AsmToken::RBrac))
+ return ParseStatus::Failure;
Operands.push_back(CSKYOperand::createConstpoolOp(Expr, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
AsmToken Tok = getLexer().getTok();
StringRef Identifier;
- if (getParser().parseIdentifier(Identifier)) {
- Error(getLoc(), "unknown identifier");
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseIdentifier(Identifier))
+ return Error(getLoc(), "unknown identifier");
MCSymbol *Sym = getContext().getInlineAsmLabel(Identifier);
@@ -1395,8 +1349,7 @@ CSKYAsmParser::parseConstpoolSymbol(OperandVector &Operands) {
const MCExpr *V = Sym->getVariableValue(/*SetUsed=*/false);
if (!isa<MCSymbolRefExpr>(V)) {
getLexer().UnLex(Tok); // Put back if it's not a bare symbol.
- Error(getLoc(), "unknown symbol");
- return MatchOperand_ParseFail;
+ return Error(getLoc(), "unknown symbol");
}
Res = V;
} else {
@@ -1406,14 +1359,13 @@ CSKYAsmParser::parseConstpoolSymbol(OperandVector &Operands) {
MCBinaryExpr::Opcode Opcode;
switch (getLexer().getKind()) {
default:
- Error(getLoc(), "unknown symbol");
- return MatchOperand_ParseFail;
+ return Error(getLoc(), "unknown symbol");
case AsmToken::RBrac:
getLexer().Lex(); // Eat ']'.
Operands.push_back(CSKYOperand::createConstpoolOp(Res, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
case AsmToken::Plus:
Opcode = MCBinaryExpr::Add;
break;
@@ -1425,24 +1377,17 @@ CSKYAsmParser::parseConstpoolSymbol(OperandVector &Operands) {
getLexer().Lex(); // eat + or -
const MCExpr *Expr;
- if (getParser().parseExpression(Expr)) {
- Error(getLoc(), "unknown expression");
- return MatchOperand_ParseFail;
- }
-
- if (getLexer().getKind() != AsmToken::RBrac) {
- Error(getLoc(), "expected ']'");
- return MatchOperand_ParseFail;
- }
-
- getLexer().Lex(); // Eat ']'.
+ if (getParser().parseExpression(Expr))
+ return Error(getLoc(), "unknown expression");
+ if (parseToken(AsmToken::RBrac, "expected ']'"))
+ return ParseStatus::Failure;
Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
Operands.push_back(CSKYOperand::createConstpoolOp(Res, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy CSKYAsmParser::parsePSRFlag(OperandVector &Operands) {
+ParseStatus CSKYAsmParser::parsePSRFlag(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
@@ -1450,10 +1395,8 @@ OperandMatchResultTy CSKYAsmParser::parsePSRFlag(OperandVector &Operands) {
while (getLexer().isNot(AsmToken::EndOfStatement)) {
StringRef Identifier;
- if (getParser().parseIdentifier(Identifier)) {
- Error(getLoc(), "unknown identifier " + Identifier);
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseIdentifier(Identifier))
+ return Error(getLoc(), "unknown identifier " + Identifier);
if (Identifier == "sie")
Flag = (1 << 4) | Flag;
@@ -1465,77 +1408,58 @@ OperandMatchResultTy CSKYAsmParser::parsePSRFlag(OperandVector &Operands) {
Flag = (1 << 1) | Flag;
else if (Identifier == "af")
Flag = (1 << 0) | Flag;
- else {
- Error(getLoc(), "expected " + Identifier);
- return MatchOperand_ParseFail;
- }
+ else
+ return Error(getLoc(), "expected " + Identifier);
if (getLexer().is(AsmToken::EndOfStatement))
break;
- if (getLexer().is(AsmToken::Comma)) {
- getLexer().Lex(); // eat ','
- } else {
- Error(getLoc(), "expected ,");
- return MatchOperand_ParseFail;
- }
+ if (parseToken(AsmToken::Comma, "expected ','"))
+ return ParseStatus::Failure;
}
Operands.push_back(
CSKYOperand::createImm(MCConstantExpr::create(Flag, getContext()), S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy CSKYAsmParser::parseRegSeq(OperandVector &Operands) {
+ParseStatus CSKYAsmParser::parseRegSeq(OperandVector &Operands) {
SMLoc S = getLoc();
- if (parseRegister(Operands) != MatchOperand_Success)
- return MatchOperand_NoMatch;
+ if (!parseRegister(Operands).isSuccess())
+ return ParseStatus::NoMatch;
auto Ry = Operands.back()->getReg();
Operands.pop_back();
- if (getLexer().isNot(AsmToken::Minus)) {
- Error(getLoc(), "expected '-'");
- return MatchOperand_ParseFail;
- }
-
- getLexer().Lex(); // eat '-'
-
- if (parseRegister(Operands) != MatchOperand_Success) {
- Error(getLoc(), "invalid register");
- return MatchOperand_ParseFail;
- }
+ if (parseToken(AsmToken::Minus, "expected '-'"))
+ return ParseStatus::Failure;
+ if (!parseRegister(Operands).isSuccess())
+ return Error(getLoc(), "invalid register");
auto Rz = Operands.back()->getReg();
Operands.pop_back();
Operands.push_back(CSKYOperand::createRegSeq(Ry, Rz, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy CSKYAsmParser::parseRegList(OperandVector &Operands) {
+ParseStatus CSKYAsmParser::parseRegList(OperandVector &Operands) {
SMLoc S = getLoc();
SmallVector<unsigned, 4> reglist;
while (true) {
- if (parseRegister(Operands) != MatchOperand_Success) {
- Error(getLoc(), "invalid register");
- return MatchOperand_ParseFail;
- }
+ if (!parseRegister(Operands).isSuccess())
+ return Error(getLoc(), "invalid register");
auto Ry = Operands.back()->getReg();
Operands.pop_back();
- if (getLexer().is(AsmToken::Minus)) {
- getLexer().Lex(); // eat '-'
-
- if (parseRegister(Operands) != MatchOperand_Success) {
- Error(getLoc(), "invalid register");
- return MatchOperand_ParseFail;
- }
+ if (parseOptionalToken(AsmToken::Minus)) {
+ if (!parseRegister(Operands).isSuccess())
+ return Error(getLoc(), "invalid register");
auto Rz = Operands.back()->getReg();
Operands.pop_back();
@@ -1543,28 +1467,23 @@ OperandMatchResultTy CSKYAsmParser::parseRegList(OperandVector &Operands) {
reglist.push_back(Ry);
reglist.push_back(Rz);
- if (getLexer().is(AsmToken::Comma))
- getLexer().Lex(); // eat ','
- else if (getLexer().is(AsmToken::EndOfStatement))
+ if (getLexer().is(AsmToken::EndOfStatement))
break;
-
- } else if (getLexer().is(AsmToken::Comma)) {
+ (void)parseOptionalToken(AsmToken::Comma);
+ } else if (parseOptionalToken(AsmToken::Comma)) {
reglist.push_back(Ry);
reglist.push_back(Ry);
-
- getLexer().Lex(); // eat ','
} else if (getLexer().is(AsmToken::EndOfStatement)) {
reglist.push_back(Ry);
reglist.push_back(Ry);
break;
} else {
- Error(getLoc(), "invalid register list");
- return MatchOperand_ParseFail;
+ return Error(getLoc(), "invalid register list");
}
}
Operands.push_back(CSKYOperand::createRegList(reglist, S));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool CSKYAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -1581,14 +1500,9 @@ bool CSKYAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return true;
// Parse until end of statement, consuming commas between operands.
- while (getLexer().is(AsmToken::Comma)) {
- // Consume comma token.
- getLexer().Lex();
-
- // Parse next operand.
+ while (parseOptionalToken(AsmToken::Comma))
if (parseOperand(Operands, Name))
return true;
- }
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
@@ -1616,17 +1530,13 @@ OperandMatchResultTy CSKYAsmParser::tryParseRegister(MCRegister &RegNo,
return MatchOperand_Success;
}
-bool CSKYAsmParser::ParseDirective(AsmToken DirectiveID) {
- // This returns false if this function recognizes the directive
- // regardless of whether it is successfully handles or reports an
- // error. Otherwise it returns true to give the generic parser a
- // chance at recognizing it.
+ParseStatus CSKYAsmParser::parseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
if (IDVal == ".csky_attribute")
return parseDirectiveAttribute();
- return true;
+ return ParseStatus::NoMatch;
}
/// parseDirectiveAttribute
@@ -1640,10 +1550,8 @@ bool CSKYAsmParser::parseDirectiveAttribute() {
StringRef Name = Parser.getTok().getIdentifier();
std::optional<unsigned> Ret =
ELFAttrs::attrTypeFromString(Name, CSKYAttrs::getCSKYAttributeTags());
- if (!Ret) {
- Error(TagLoc, "attribute name not recognised: " + Name);
- return false;
- }
+ if (!Ret)
+ return Error(TagLoc, "attribute name not recognised: " + Name);
Tag = *Ret;
Parser.Lex();
} else {
@@ -1654,13 +1562,13 @@ bool CSKYAsmParser::parseDirectiveAttribute() {
return true;
const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(AttrExpr);
- if (check(!CE, TagLoc, "expected numeric constant"))
- return true;
+ if (!CE)
+ return Error(TagLoc, "expected numeric constant");
Tag = CE->getValue();
}
- if (Parser.parseToken(AsmToken::Comma, "comma expected"))
+ if (Parser.parseComma())
return true;
StringRef StringValue;
diff --git a/llvm/lib/Target/CSKY/CSKY.td b/llvm/lib/Target/CSKY/CSKY.td
index e5ac106c9b59..9809caa8bd8f 100644
--- a/llvm/lib/Target/CSKY/CSKY.td
+++ b/llvm/lib/Target/CSKY/CSKY.td
@@ -40,17 +40,17 @@ def HasFdivdu : Predicate<"Subtarget->hasFdivdu()">,
def FeatureFPUV3_HI
: SubtargetFeature<"fpuv3_hi", "HasFPUv3HalfWord", "true",
- "Enable FPUv3 harf word converting instructions">;
+ "Enable FPUv3 half word converting instructions">;
def HasFPUv3_HI : Predicate<"Subtarget->hasFPUv3HalfWord()">,
AssemblerPredicate<(all_of FeatureFPUV3_HI),
- "Enable FPUv3 harf word converting instructions">;
+ "Enable FPUv3 half word converting instructions">;
def FeatureFPUV3_HF
: SubtargetFeature<"fpuv3_hf", "HasFPUv3HalfFloat", "true",
- "Enable FPUv3 harf precision operate instructions">;
+ "Enable FPUv3 half precision operate instructions">;
def HasFPUv3_HF : Predicate<"Subtarget->hasFPUv3HalfFloat()">,
AssemblerPredicate<(all_of FeatureFPUV3_HF),
- "Enable FPUv3 harf precision operate instructions">;
+ "Enable FPUv3 half precision operate instructions">;
def FeatureFPUV3_SF
: SubtargetFeature<"fpuv3_sf", "HasFPUv3SingleFloat", "true",
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
index 5e87594e4fdf..379189512405 100644
--- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
@@ -57,7 +57,7 @@ public:
bool runOnMachineFunction(MachineFunction &MF) override;
// we emit constant pools customly!
- void emitConstantPool() override{};
+ void emitConstantPool() override {}
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &OS) override;
diff --git a/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
index 09c2d5161aba..702053e02332 100644
--- a/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelDAGToDAG.cpp
@@ -74,10 +74,10 @@ void CSKYDAGToDAGISel::Select(SDNode *N) {
switch (Opcode) {
default:
break;
- case ISD::ADDCARRY:
+ case ISD::UADDO_CARRY:
IsSelected = selectAddCarry(N);
break;
- case ISD::SUBCARRY:
+ case ISD::USUBO_CARRY:
IsSelected = selectSubCarry(N);
break;
case ISD::GLOBAL_OFFSET_TABLE: {
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
index a65a0283777f..5d21aab513dd 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -51,8 +51,8 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::f64, &CSKY::FPR64RegClass);
}
- setOperationAction(ISD::ADDCARRY, MVT::i32, Legal);
- setOperationAction(ISD::SUBCARRY, MVT::i32, Legal);
+ setOperationAction(ISD::UADDO_CARRY, MVT::i32, Legal);
+ setOperationAction(ISD::USUBO_CARRY, MVT::i32, Legal);
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
setOperationAction(ISD::SREM, MVT::i32, Expand);
@@ -116,8 +116,9 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
ISD::SETUGE, ISD::SETULT, ISD::SETULE,
};
- ISD::NodeType FPOpToExpand[] = {ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
- ISD::FPOW, ISD::FREM, ISD::FCOPYSIGN};
+ ISD::NodeType FPOpToExpand[] = {
+ ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
+ ISD::FREM, ISD::FCOPYSIGN, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
if (STI.useHardFloat()) {
@@ -136,10 +137,14 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
if (STI.hasFPUv2SingleFloat() || STI.hasFPUv3SingleFloat()) {
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::f16, Expand);
}
if (STI.hasFPUv2DoubleFloat() || STI.hasFPUv3DoubleFloat()) {
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f16, Expand);
}
}
@@ -153,8 +158,7 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
setMaxAtomicSizeInBitsSupported(0);
setStackPointerRegisterToSaveRestore(CSKY::R14);
- const Align FunctionAlignment(2);
- setMinFunctionAlignment(FunctionAlignment);
+ setMinFunctionAlignment(Align(2));
setSchedulingPreference(Sched::Source);
}
@@ -379,7 +383,7 @@ SDValue CSKYTargetLowering::LowerFormalArguments(
// If all registers are allocated, then all varargs must be passed on the
// stack and we don't need to save any argregs.
if (ArgRegs.size() == Idx) {
- VaArgOffset = CCInfo.getNextStackOffset();
+ VaArgOffset = CCInfo.getStackSize();
VarArgsSaveSize = 0;
} else {
VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
@@ -532,7 +536,7 @@ SDValue CSKYTargetLowering::LowerCall(CallLoweringInfo &CLI,
"site marked musttail");
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+ unsigned NumBytes = ArgCCInfo.getStackSize();
// Create local copies for byval args
SmallVector<SDValue, 8> ByValArgs;
@@ -1372,3 +1376,28 @@ SDValue CSKYTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
return V;
}
+
+bool CSKYTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
+ SDValue C) const {
+ if (!VT.isScalarInteger())
+ return false;
+
+ // Omit if data size exceeds.
+ if (VT.getSizeInBits() > Subtarget.XLen)
+ return false;
+
+ if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
+ const APInt &Imm = ConstNode->getAPIntValue();
+ // Break MULT to LSLI + ADDU/SUBU.
+ if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
+ (1 - Imm).isPowerOf2())
+ return true;
+ // Only break MULT for sub targets without MULT32, since an extra
+ // instruction will be generated against the above 3 cases. We leave it
+ // unchanged on sub targets with MULT32, since not sure it is better.
+ if (!Subtarget.hasE2() && (-1 - Imm).isPowerOf2())
+ return true;
+ }
+
+ return false;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.h b/llvm/lib/Target/CSKY/CSKYISelLowering.h
index d0abc7e9a7e4..c724882c6042 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.h
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.h
@@ -173,6 +173,9 @@ private:
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg) const;
+
+ bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
+ SDValue C) const override;
};
} // namespace llvm
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
index b99dbf08f112..549c883c34a7 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
@@ -102,6 +102,14 @@ class oimm<int num> : Operand<i32>,
let DecoderMethod = "decodeOImmOperand<"#num#">";
}
+def imm_neg_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
+class oimm_neg<int num> : Operand<i32>,
+ ImmLeaf<i32, "return isUInt<"#num#">(-Imm - 1);"> {
+}
+
class uimm<int num, int shift = 0> : Operand<i32>,
ImmLeaf<i32, "return isShiftedUInt<"#num#", "#shift#">(Imm);"> {
let EncoderMethod = "getImmOpValue<"#shift#">";
@@ -112,6 +120,10 @@ class uimm<int num, int shift = 0> : Operand<i32>,
let DecoderMethod = "decodeUImmOperand<"#num#", "#shift#">";
}
+class uimm_neg<int num, int shift = 0> : Operand<i32>,
+ ImmLeaf<i32, "return isShiftedUInt<"#num#", "#shift#">(-Imm);"> {
+}
+
class simm<int num, int shift = 0> : Operand<i32>,
ImmLeaf<i32, "return isShiftedInt<"#num#", "#shift#">(Imm);"> {
let EncoderMethod = "getImmOpValue<"#shift#">";
@@ -259,6 +271,23 @@ def oimm16 : oimm<16> {
}];
}
+def oimm8_neg : oimm_neg<8> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isUInt<8>(-Imm - 1);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+def oimm12_neg : oimm_neg<12> {
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isUInt<12>(-Imm - 1);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+
def nimm12 : nimm<12>;
def uimm1 : uimm<1>;
@@ -371,6 +400,8 @@ def uimm20 : uimm<20>;
def uimm24 : uimm<24>;
def uimm24_8 : uimm<24, 8>;
+def uimm5_neg : uimm_neg<5>;
+
def simm8_2 : simm<8, 2>;
class RegSeqAsmOperand<string Suffix = ""> : AsmOperandClass {
@@ -518,6 +549,9 @@ let Predicates = [iHasE2] in {
let Size = 8 in
def RSUBI32 : CSKYPseudo<(outs GPR:$rd), (ins GPR:$rx, uimm12:$imm12), "rsubi32 $rd, $rx, $imm12", []>;
+ def : Pat<(add GPR:$rs1, (oimm12_neg:$im)),
+ (SUBI32 GPR:$rs1, (imm_neg_XFORM oimm12_neg:$im))>;
+
def LSL32 : R_YXZ_SP_F1<0x10, 0x1,
BinOpFrag<(shl node:$LHS, node:$RHS)>, "lsl32">;
def LSR32 : R_YXZ_SP_F1<0x10, 0x2,
@@ -885,9 +919,9 @@ let Predicates = [iHasE2] in {
def XTRB3 : R_XZ<0x1C, 0x8, "xtrb3.32">;
def BTSTI32 : I_5_X<0x0A, 0x4, "btsti32", uimm5, []>;
def BCLRI32 : I_5_XZ<0xA, 0x1, "bclri32",
- (outs GPR:$rz), (ins GPR:$rx, uimm5:$imm5), []>;
+ (outs GPR:$rz), (ins GPR:$rx, uimm5:$imm5), []>;
def BSETI32 : I_5_XZ<0xA, 0x2, "bseti32",
- (outs GPR:$rz), (ins GPR:$rx, uimm5:$imm5), []>;
+ (outs GPR:$rz), (ins GPR:$rx, uimm5:$imm5), []>;
}
//===----------------------------------------------------------------------===//
@@ -1131,22 +1165,19 @@ def : Pat<(i32 (load constpool:$src)), (LRW32 (to_tconstpool tconstpool:$src))>;
// Branch Patterns.
let Predicates = [iHasE2] in {
- def : Pat<(brcond CARRY:$ca, bb:$imm16),
+def : Pat<(brcond CARRY:$ca, bb:$imm16),
(BT32 CARRY:$ca, bb:$imm16)>;
- def : Pat<(brcond (i32 (setne GPR:$rs1, uimm16:$rs2)), bb:$imm16),
- (BT32 (CMPNEI32 GPR:$rs1, uimm16:$rs2), bb:$imm16)>;
- def : Pat<(brcond (i32 (seteq GPR:$rs1, uimm16:$rs2)), bb:$imm16),
- (BF32 (CMPNEI32 GPR:$rs1, uimm16:$rs2), bb:$imm16)>;
- def : Pat<(brcond (i32 (setuge GPR:$rs1, oimm16:$rs2)), bb:$imm16),
- (BT32 (CMPHSI32 GPR:$rs1, oimm16:$rs2), bb:$imm16)>;
- def : Pat<(brcond (i32 (setult GPR:$rs1, oimm16:$rs2)), bb:$imm16),
- (BF32 (CMPHSI32 GPR:$rs1, oimm16:$rs2), bb:$imm16)>;
- def : Pat<(brcond (i32 (setlt GPR:$rs1, oimm16:$rs2)), bb:$imm16),
- (BT32 (CMPLTI32 GPR:$rs1, oimm16:$rs2), bb:$imm16)>;
- def : Pat<(brcond (i32 (setge GPR:$rs1, oimm16:$rs2)), bb:$imm16),
- (BF32 (CMPLTI32 GPR:$rs1, oimm16:$rs2), bb:$imm16)>;
+multiclass BTF32Pat0<PatFrag cond0, PatFrag cond1, ImmLeaf imm_ty, Instruction inst> {
+ def : Pat<(brcond (i32 (cond0 GPR:$rs1, uimm16:$rs2)), bb:$imm16),
+ (BT32 (inst GPR:$rs1, imm_ty:$rs2), bb:$imm16)>;
+ def : Pat<(brcond (i32 (cond1 GPR:$rs1, uimm16:$rs2)), bb:$imm16),
+ (BF32 (inst GPR:$rs1, imm_ty:$rs2), bb:$imm16)>;
+}
+defm : BTF32Pat0<setne, seteq, uimm16, CMPNEI32>;
+defm : BTF32Pat0<setuge, setult, oimm16, CMPHSI32>;
+defm : BTF32Pat0<setlt, setge, oimm16, CMPLTI32>;
}
let Predicates = [iHas2E3] in {
@@ -1155,22 +1186,19 @@ def : Pat<(brcond (i32 (setne GPR:$rs1, GPR:$rs2)), bb:$imm16),
(BT32 (CMPNE32 GPR:$rs1, GPR:$rs2), bb:$imm16)>;
def : Pat<(brcond (i32 (seteq GPR:$rs1, GPR:$rs2)), bb:$imm16),
(BF32 (CMPNE32 GPR:$rs1, GPR:$rs2), bb:$imm16)>;
-def : Pat<(brcond (i32 (setuge GPR:$rs1, GPR:$rs2)), bb:$imm16),
- (BT32 (CMPHS32 GPR:$rs1, GPR:$rs2), bb:$imm16)>;
-def : Pat<(brcond (i32 (setule GPR:$rs1, GPR:$rs2)), bb:$imm16),
- (BT32 (CMPHS32 GPR:$rs2, GPR:$rs1), bb:$imm16)>;
-def : Pat<(brcond (i32 (setult GPR:$rs1, GPR:$rs2)), bb:$imm16),
- (BF32 (CMPHS32 GPR:$rs1, GPR:$rs2), bb:$imm16)>;
-def : Pat<(brcond (i32 (setugt GPR:$rs1, GPR:$rs2)), bb:$imm16),
- (BF32 (CMPHS32 GPR:$rs2, GPR:$rs1), bb:$imm16)>;
-def : Pat<(brcond (i32 (setlt GPR:$rs1, GPR:$rs2)), bb:$imm16),
- (BT32 (CMPLT32 GPR:$rs1, GPR:$rs2), bb:$imm16)>;
-def : Pat<(brcond (i32 (setgt GPR:$rs1, GPR:$rs2)), bb:$imm16),
- (BT32 (CMPLT32 GPR:$rs2, GPR:$rs1), bb:$imm16)>;
-def : Pat<(brcond (i32 (setge GPR:$rs1, GPR:$rs2)), bb:$imm16),
- (BF32 (CMPLT32 GPR:$rs1, GPR:$rs2), bb:$imm16)>;
-def : Pat<(brcond (i32 (setle GPR:$rs1, GPR:$rs2)), bb:$imm16),
- (BF32 (CMPLT32 GPR:$rs2, GPR:$rs1), bb:$imm16)>;
+
+multiclass BTF32Pat1<PatFrag cond0, PatFrag cond1, Instruction cmp,
+ Instruction br> {
+ def : Pat<(brcond (i32 (cond0 GPR:$rs1, GPR:$rs2)), bb:$imm16),
+ (br (cmp GPR:$rs1, GPR:$rs2), bb:$imm16)>;
+ def : Pat<(brcond (i32 (cond1 GPR:$rs1, GPR:$rs2)), bb:$imm16),
+ (br (cmp GPR:$rs2, GPR:$rs1), bb:$imm16)>;
+}
+
+defm : BTF32Pat1<setuge, setule, CMPHS32, BT32>;
+defm : BTF32Pat1<setult, setugt, CMPHS32, BF32>;
+defm : BTF32Pat1<setlt, setgt, CMPLT32, BT32>;
+defm : BTF32Pat1<setge, setle, CMPLT32, BF32>;
def : Pat<(brcond (i32 (seteq GPR:$rs1, (i32 0))), bb:$imm16),
(BEZ32 GPR:$rs1, bb:$imm16)>;
@@ -1178,12 +1206,20 @@ def : Pat<(brcond (i32 (setne GPR:$rs1, (i32 0))), bb:$imm16),
(BNEZ32 GPR:$rs1, bb:$imm16)>;
def : Pat<(brcond (i32 (setlt GPR:$rs1, (i32 0))), bb:$imm16),
(BLZ32 GPR:$rs1, bb:$imm16)>;
+def : Pat<(brcond (i32 (setlt GPR:$rs1, (i32 1))), bb:$imm16),
+ (BLSZ32 GPR:$rs1, bb:$imm16)>;
def : Pat<(brcond (i32 (setge GPR:$rs1, (i32 0))), bb:$imm16),
(BHSZ32 GPR:$rs1, bb:$imm16)>;
+def : Pat<(brcond (i32 (setge GPR:$rs1, (i32 1))), bb:$imm16),
+ (BHZ32 GPR:$rs1, bb:$imm16)>;
def : Pat<(brcond (i32 (setgt GPR:$rs1, (i32 0))), bb:$imm16),
(BHZ32 GPR:$rs1, bb:$imm16)>;
+def : Pat<(brcond (i32 (setgt GPR:$rs1, (i32 -1))), bb:$imm16),
+ (BHSZ32 GPR:$rs1, bb:$imm16)>;
def : Pat<(brcond (i32 (setle GPR:$rs1, (i32 0))), bb:$imm16),
(BLSZ32 GPR:$rs1, bb:$imm16)>;
+def : Pat<(brcond (i32 (setle GPR:$rs1, (i32 -1))), bb:$imm16),
+ (BLZ32 GPR:$rs1, bb:$imm16)>;
}
// Compare Patterns.
@@ -1231,25 +1267,76 @@ let Predicates = [iHasE2] in {
// Select Patterns.
let Predicates = [iHasE2] in {
+def : Pat<(select (i32 (setne GPR:$rs1, uimm16:$rs2)), (add GPR:$rx, uimm5:$imm), GPR:$false),
+ (INCT32 (CMPNEI32 GPR:$rs1, uimm16:$rs2), GPR:$false, GPR:$rx, uimm5:$imm)>;
+def : Pat<(select (i32 (seteq GPR:$rs1, uimm16:$rs2)), (add GPR:$rx, uimm5:$imm), GPR:$false),
+ (INCF32 (CMPNEI32 GPR:$rs1, uimm16:$rs2), GPR:$false, GPR:$rx, uimm5:$imm)>;
+def : Pat<(select (i32 (setne GPR:$rs1, uimm16:$rs2)), (add GPR:$rx, uimm5_neg:$imm), GPR:$false),
+ (DECT32 (CMPNEI32 GPR:$rs1, uimm16:$rs2), GPR:$false, GPR:$rx,
+ (imm_neg_XFORM uimm5_neg:$imm))>;
+def : Pat<(select (i32 (seteq GPR:$rs1, uimm16:$rs2)), (add GPR:$rx, uimm5_neg:$imm), GPR:$false),
+ (DECF32 (CMPNEI32 GPR:$rs1, uimm16:$rs2), GPR:$false, GPR:$rx,
+ (imm_neg_XFORM uimm5:$imm))>;
+
+multiclass INCDECPat<PatFrag cond0, PatFrag cond1, Instruction cmp> {
+ def : Pat<(select (i32 (cond0 GPR:$rs1, oimm16:$rs2)), (add GPR:$rx, uimm5:$imm), GPR:$other),
+ (INCT32 (cmp GPR:$rs1, oimm16:$rs2), GPR:$other, GPR:$rx, uimm5:$imm)>;
+ def : Pat<(select (i32 (cond1 GPR:$rs1, oimm16:$rs2)), (add GPR:$rx, uimm5:$imm), GPR:$other),
+ (INCF32 (cmp GPR:$rs1, oimm16:$rs2), GPR:$other, GPR:$rx, uimm5:$imm)>;
+ def : Pat<(select (i32 (cond0 GPR:$rs1, oimm16:$rs2)), GPR:$other, (add GPR:$rx, uimm5:$imm)),
+ (INCF32 (cmp GPR:$rs1, oimm16:$rs2), GPR:$other, GPR:$rx, uimm5:$imm)>;
+ def : Pat<(select (i32 (cond1 GPR:$rs1, oimm16:$rs2)), GPR:$other, (add GPR:$rx, uimm5:$imm)),
+ (INCT32 (cmp GPR:$rs1, oimm16:$rs2), GPR:$other, GPR:$rx, uimm5:$imm)>;
+ def : Pat<(select (i32 (cond0 GPR:$rs1, oimm16:$rs2)), (add GPR:$rx, uimm5_neg:$imm), GPR:$other),
+ (DECT32 (cmp GPR:$rs1, oimm16:$rs2), GPR:$other, GPR:$rx,
+ (imm_neg_XFORM uimm5_neg:$imm))>;
+ def : Pat<(select (i32 (cond1 GPR:$rs1, oimm16:$rs2)), (add GPR:$rx, uimm5_neg:$imm), GPR:$other),
+ (DECF32 (cmp GPR:$rs1, oimm16:$rs2), GPR:$other, GPR:$rx,
+ (imm_neg_XFORM uimm5_neg:$imm))>;
+ def : Pat<(select (i32 (cond0 GPR:$rs1, oimm16:$rs2)), GPR:$other, (add GPR:$rx, uimm5_neg:$imm)),
+ (DECF32 (cmp GPR:$rs1, oimm16:$rs2), GPR:$other, GPR:$rx,
+ (imm_neg_XFORM uimm5_neg:$imm))>;
+ def : Pat<(select (i32 (cond1 GPR:$rs1, oimm16:$rs2)), GPR:$other, (add GPR:$rx, uimm5_neg:$imm)),
+ (DECT32 (cmp GPR:$rs1, oimm16:$rs2), GPR:$other, GPR:$rx,
+ (imm_neg_XFORM uimm5_neg:$imm))>;
+}
+
+defm : INCDECPat<setuge, setult, CMPHSI32>;
+defm : INCDECPat<setlt, setge, CMPLTI32>;
+
+def : Pat<(select CARRY:$ca, (add GPR:$rx, uimm5:$imm), GPR:$other),
+ (INCT32 CARRY:$ca, GPR:$other, GPR:$rx, uimm5:$imm)>;
+def : Pat<(select CARRY:$ca, GPR:$other, (add GPR:$rx, uimm5:$imm)),
+ (INCF32 CARRY:$ca, GPR:$other, GPR:$rx, uimm5:$imm)>;
+def : Pat<(select (and CARRY:$ca, 1), (add GPR:$rx, uimm5:$imm), GPR:$other),
+ (INCT32 CARRY:$ca, GPR:$other, GPR:$rx, uimm5:$imm)>;
+def : Pat<(select (and CARRY:$ca, 1), GPR:$other, (add GPR:$rx, uimm5:$imm)),
+ (INCF32 CARRY:$ca, GPR:$other, GPR:$rx, uimm5:$imm)>;
+
+def : Pat<(select CARRY:$ca, (add GPR:$rx, uimm5_neg:$imm), GPR:$other),
+ (DECT32 CARRY:$ca, GPR:$other, GPR:$rx, (imm_neg_XFORM uimm5_neg:$imm))>;
+def : Pat<(select CARRY:$ca, GPR:$other, (add GPR:$rx, uimm5_neg:$imm)),
+ (DECF32 CARRY:$ca, GPR:$other, GPR:$rx, (imm_neg_XFORM uimm5_neg:$imm))>;
+def : Pat<(select (and CARRY:$ca, 1), (add GPR:$rx, uimm5_neg:$imm), GPR:$other),
+ (DECT32 CARRY:$ca, GPR:$other, GPR:$rx, (imm_neg_XFORM uimm5_neg:$imm))>;
+def : Pat<(select (and CARRY:$ca, 1), GPR:$other, (add GPR:$rx, uimm5_neg:$imm)),
+ (DECF32 CARRY:$ca, GPR:$other, GPR:$rx, (imm_neg_XFORM uimm5_neg:$imm))>;
+
def : Pat<(select CARRY:$ca, GPR:$rx, GPR:$false),
(MOVT32 CARRY:$ca, GPR:$rx, GPR:$false)>;
def : Pat<(select (and CARRY:$ca, 1), GPR:$rx, GPR:$false),
(MOVT32 CARRY:$ca, GPR:$rx, GPR:$false)>;
-def : Pat<(select (i32 (setne GPR:$rs1, uimm16:$rs2)), GPR:$rx, GPR:$false),
- (MOVT32 (CMPNEI32 GPR:$rs1, uimm16:$rs2), GPR:$rx, GPR:$false)>;
-def : Pat<(select (i32 (seteq GPR:$rs1, uimm16:$rs2)), GPR:$rx, GPR:$false),
- (MOVF32 (CMPNEI32 GPR:$rs1, uimm16:$rs2), GPR:$rx, GPR:$false)>;
-
-def : Pat<(select (i32 (setuge GPR:$rs1, oimm16:$rs2)), GPR:$rx, GPR:$false),
- (MOVT32 (CMPHSI32 GPR:$rs1, oimm16:$rs2), GPR:$rx, GPR:$false)>;
-def : Pat<(select (i32 (setult GPR:$rs1, oimm16:$rs2)), GPR:$rx, GPR:$false),
- (MOVF32 (CMPHSI32 GPR:$rs1, oimm16:$rs2), GPR:$rx, GPR:$false)>;
+multiclass MOVTF32Pat0<PatFrag cond0, PatFrag cond1, ImmLeaf imm_ty, Instruction inst> {
+ def : Pat<(select (i32 (cond0 GPR:$rs1, imm_ty:$rs2)), GPR:$rx, GPR:$false),
+ (MOVT32 (inst GPR:$rs1, imm_ty:$rs2), GPR:$rx, GPR:$false)>;
+ def : Pat<(select (i32 (cond1 GPR:$rs1, imm_ty:$rs2)), GPR:$rx, GPR:$false),
+ (MOVF32 (inst GPR:$rs1, imm_ty:$rs2), GPR:$rx, GPR:$false)>;
+}
-def : Pat<(select (i32 (setlt GPR:$rs1, oimm16:$rs2)), GPR:$rx, GPR:$false),
- (MOVT32 (CMPLTI32 GPR:$rs1, oimm16:$rs2), GPR:$rx, GPR:$false)>;
-def : Pat<(select (i32 (setge GPR:$rs1, oimm16:$rs2)), GPR:$rx, GPR:$false),
- (MOVF32 (CMPLTI32 GPR:$rs1, oimm16:$rs2), GPR:$rx, GPR:$false)>;
+defm : MOVTF32Pat0<setne, seteq, uimm16, CMPNEI32>;
+defm : MOVTF32Pat0<setuge, setult, oimm16, CMPHSI32>;
+defm : MOVTF32Pat0<setlt, setge, oimm16, CMPLTI32>;
def : Pat<(select CARRY:$ca, GPR:$rx, GPR:$false),
(ISEL32 CARRY:$ca, GPR:$rx, GPR:$false)>;
@@ -1259,30 +1346,75 @@ def : Pat<(select (and CARRY:$ca, 1), GPR:$rx, GPR:$false),
let Predicates = [iHas2E3] in {
+def : Pat<(select (i32 (setne GPR:$rs1, GPR:$rs2)), (add GPR:$rx, uimm5:$imm), GPR:$false),
+ (INCT32 (CMPNE32 GPR:$rs1, GPR:$rs2), GPR:$false, GPR:$rx, uimm5:$imm)>;
+def : Pat<(select (i32 (seteq GPR:$rs1, GPR:$rs2)), (add GPR:$rx, uimm5:$imm), GPR:$false),
+ (INCF32 (CMPNE32 GPR:$rs1, GPR:$rs2), GPR:$false, GPR:$rx, uimm5:$imm)>;
+def : Pat<(select (i32 (setne GPR:$rs1, GPR:$rs2)), (add GPR:$rx, uimm5_neg:$imm), GPR:$false),
+ (DECT32 (CMPNE32 GPR:$rs1, GPR:$rs2), GPR:$false, GPR:$rx,
+ (imm_neg_XFORM uimm5_neg:$imm))>;
+def : Pat<(select (i32 (seteq GPR:$rs1, GPR:$rs2)), (add GPR:$rx, uimm5_neg:$imm), GPR:$false),
+ (DECF32 (CMPNE32 GPR:$rs1, GPR:$rs2), GPR:$false, GPR:$rx,
+ (imm_neg_XFORM uimm5_neg:$imm))>;
+
+multiclass INCPat<PatFrag cond0, PatFrag cond1, Instruction cmp, Instruction inc0, Instruction inc1> {
+ def : Pat<(select (i32 (cond0 GPR:$rs1, GPR:$rs2)), (add GPR:$rx, uimm5:$imm), GPR:$other),
+ (inc0 (cmp GPR:$rs1, GPR:$rs2), GPR:$other, GPR:$rx, uimm5:$imm)>;
+ def : Pat<(select (i32 (cond0 GPR:$rs1, GPR:$rs2)), GPR:$other, (add GPR:$rx, uimm5:$imm)),
+ (inc1 (cmp GPR:$rs1, GPR:$rs2), GPR:$other, GPR:$rx, uimm5:$imm)>;
+ def : Pat<(select (i32 (cond1 GPR:$rs1, GPR:$rs2)), (add GPR:$rx, uimm5:$imm), GPR:$other),
+ (inc0 (cmp GPR:$rs2, GPR:$rs1), GPR:$other, GPR:$rx, uimm5:$imm)>;
+ def : Pat<(select (i32 (cond1 GPR:$rs1, GPR:$rs2)), GPR:$other, (add GPR:$rx, uimm5:$imm)),
+ (inc1 (cmp GPR:$rs2, GPR:$rs1), GPR:$other, GPR:$rx, uimm5:$imm)>;
+}
+
+defm : INCPat<setuge, setule, CMPHS32, INCT32, INCF32>;
+defm : INCPat<setult, setugt, CMPHS32, INCF32, INCT32>;
+defm : INCPat<setlt, setgt, CMPLT32, INCT32, INCF32>;
+defm : INCPat<setge, setle, CMPLT32, INCF32, INCT32>;
+
+multiclass DECPat<PatFrag cond0, PatFrag cond1, Instruction cmp, Instruction dec0, Instruction dec1> {
+ def : Pat<(select (i32 (cond0 GPR:$rs1, GPR:$rs2)), (add GPR:$rx, uimm5_neg:$imm), GPR:$other),
+ (dec0 (cmp GPR:$rs1, GPR:$rs2), GPR:$other, GPR:$rx,
+ (imm_neg_XFORM uimm5_neg:$imm))>;
+ def : Pat<(select (i32 (cond0 GPR:$rs1, GPR:$rs2)), GPR:$other, (add GPR:$rx, uimm5_neg:$imm)),
+ (dec1 (cmp GPR:$rs1, GPR:$rs2), GPR:$other, GPR:$rx,
+ (imm_neg_XFORM uimm5_neg:$imm))>;
+ def : Pat<(select (i32 (cond1 GPR:$rs1, GPR:$rs2)), (add GPR:$rx, uimm5_neg:$imm), GPR:$other),
+ (dec0 (cmp GPR:$rs2, GPR:$rs1), GPR:$other, GPR:$rx,
+ (imm_neg_XFORM uimm5_neg:$imm))>;
+ def : Pat<(select (i32 (cond1 GPR:$rs1, GPR:$rs2)), GPR:$other, (add GPR:$rx, uimm5_neg:$imm)),
+ (dec1 (cmp GPR:$rs2, GPR:$rs1), GPR:$other, GPR:$rx,
+ (imm_neg_XFORM uimm5_neg:$imm))>;
+}
+
+defm : DECPat<setuge, setule, CMPHS32, DECT32, DECF32>;
+defm : DECPat<setult, setugt, CMPHS32, DECF32, DECT32>;
+defm : DECPat<setlt, setgt, CMPLT32, DECT32, DECF32>;
+defm : DECPat<setge, setle, CMPLT32, DECF32, DECT32>;
def : Pat<(select (i32 (setne GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
(MOVT32 (CMPNE32 GPR:$rs1, GPR:$rs2), GPR:$rx, GPR:$false)>;
def : Pat<(select (i32 (seteq GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
(MOVF32 (CMPNE32 GPR:$rs1, GPR:$rs2), GPR:$rx, GPR:$false)>;
-def : Pat<(select (i32 (setuge GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
- (MOVT32 (CMPHS32 GPR:$rs1, GPR:$rs2), GPR:$rx, GPR:$false)>;
-def : Pat<(select (i32 (setule GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
- (MOVT32 (CMPHS32 GPR:$rs2, GPR:$rs1), GPR:$rx, GPR:$false)>;
-def : Pat<(select (i32 (setult GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
- (MOVF32 (CMPHS32 GPR:$rs1, GPR:$rs2), GPR:$rx, GPR:$false)>;
-def : Pat<(select (i32 (setugt GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
- (MOVF32 (CMPHS32 GPR:$rs2, GPR:$rs1), GPR:$rx, GPR:$false)>;
+multiclass MOVTF32Pat1<PatFrag cond0, PatFrag cond1, Instruction cmp_inst,
+ Instruction mov_inst> {
+ def : Pat<(select (i32 (cond0 GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
+ (mov_inst (cmp_inst GPR:$rs1, GPR:$rs2), GPR:$rx, GPR:$false)>;
+ def : Pat<(select (i32 (cond1 GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
+ (mov_inst (cmp_inst GPR:$rs2, GPR:$rs1), GPR:$rx, GPR:$false)>;
+}
-def : Pat<(select (i32 (setlt GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
- (MOVT32 (CMPLT32 GPR:$rs1, GPR:$rs2), GPR:$rx, GPR:$false)>;
-def : Pat<(select (i32 (setgt GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
- (MOVT32 (CMPLT32 GPR:$rs2, GPR:$rs1), GPR:$rx, GPR:$false)>;
-def : Pat<(select (i32 (setge GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
- (MOVF32 (CMPLT32 GPR:$rs1, GPR:$rs2), GPR:$rx, GPR:$false)>;
-def : Pat<(select (i32 (setle GPR:$rs1, GPR:$rs2)), GPR:$rx, GPR:$false),
- (MOVF32 (CMPLT32 GPR:$rs2, GPR:$rs1), GPR:$rx, GPR:$false)>;
+defm : MOVTF32Pat1<setuge, setule, CMPHS32, MOVT32>;
+defm : MOVTF32Pat1<setult, setugt, CMPHS32, MOVF32>;
+defm : MOVTF32Pat1<setlt, setgt, CMPLT32, MOVT32>;
+defm : MOVTF32Pat1<setge, setle, CMPLT32, MOVF32>;
+def : Pat<(select CARRY:$ca, (i32 0), GPR:$other),
+ (CLRT32 CARRY:$ca, GPR:$other)>;
+def : Pat<(select CARRY:$ca, GPR:$other, (i32 0)),
+ (CLRF32 CARRY:$ca, GPR:$other)>;
}
// Constant materialize patterns.
@@ -1290,7 +1422,6 @@ let Predicates = [iHasE2] in
def : Pat<(i32 imm:$imm),
(ORI32 (MOVIH32 (uimm32_hi16 imm:$imm)), (uimm32_lo16 imm:$imm))>;
-
// Other operations.
let Predicates = [iHasE2] in {
def : Pat<(rotl GPR:$rs1, GPR:$rs2),
@@ -1353,9 +1484,6 @@ def JBF_E : CSKYPseudo<(outs), (ins CARRY:$ca, br_symbol:$src1), "!jbf_e\t$src1"
let mayLoad = 1, Size = 2, isCodeGenOnly = 0 in
def PseudoLRW32 : CSKYPseudo<(outs GPR:$rz), (ins bare_symbol:$src), "lrw32 $rz, $src", []>;
-
-
-
let mayLoad = 1, Size = 4, isCodeGenOnly = 0 in
def PseudoJSRI32 : CSKYPseudo<(outs), (ins call_symbol:$src), "jsri32 $src", []>;
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
index 86719d36d23e..3e248019d73f 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo16Instr.td
@@ -97,6 +97,9 @@ let Constraints = "$rZ = $rz", isReMaterializable = 1, isAsCheapAsAMove = 1 in {
def SUBI16 : I16_Z_8<0b101, (ins mGPR:$rZ, oimm8:$imm8), "subi16\t$rz, $imm8">;
}
+def : Pat<(add GPR:$rs1, (oimm8_neg:$im)),
+ (SUBI16 GPR:$rs1, (imm_neg_XFORM oimm8_neg:$im))>;
+
let isAdd = 1 in
def ADDI16ZSP : I16_Z_8<0b011, (ins GPRSP:$sp, uimm8_2:$imm8),
"addi16\t$rz, $sp, $imm8">;
@@ -483,39 +486,34 @@ def : Pat<(brcond CARRY:$ca, bb:$offset),
def : Pat<(br bb:$offset), (BR16 bb:$offset)>;
-def : Pat<(brcond (i32 (setne mGPR:$rs1, uimm5:$rs2)), bb:$offset),
- (BT16 (CMPNEI16 mGPR:$rs1, uimm5:$rs2), bb:$offset)>;
-def : Pat<(brcond (i32 (seteq mGPR:$rs1, uimm5:$rs2)), bb:$offset),
- (BF16 (CMPNEI16 mGPR:$rs1, uimm5:$rs2), bb:$offset)>;
-def : Pat<(brcond (i32 (setuge mGPR:$rs1, oimm5:$rs2)), bb:$offset),
- (BT16 (CMPHSI16 mGPR:$rs1, oimm5:$rs2), bb:$offset)>;
-def : Pat<(brcond (i32 (setult mGPR:$rs1, oimm5:$rs2)), bb:$offset),
- (BF16 (CMPHSI16 mGPR:$rs1, oimm5:$rs2), bb:$offset)>;
-def : Pat<(brcond (i32 (setlt mGPR:$rs1, oimm5:$rs2)), bb:$offset),
- (BT16 (CMPLTI16 mGPR:$rs1, oimm5:$rs2), bb:$offset)>;
-def : Pat<(brcond (i32 (setge mGPR:$rs1, oimm5:$rs2)), bb:$offset),
- (BF16 (CMPLTI16 mGPR:$rs1, oimm5:$rs2), bb:$offset)>;
+multiclass BTF16Pat0<PatFrag cond0, PatFrag cond1, ImmLeaf imm_ty, Instruction inst> {
+ def : Pat<(brcond (i32 (cond0 mGPR:$rs1, imm_ty:$rs2)), bb:$offset),
+ (BT16 (inst mGPR:$rs1, imm_ty:$rs2), bb:$offset)>;
+ def : Pat<(brcond (i32 (cond1 mGPR:$rs1, imm_ty:$rs2)), bb:$offset),
+ (BF16 (inst mGPR:$rs1, imm_ty:$rs2), bb:$offset)>;
+}
+
+defm : BTF16Pat0<setne, seteq, uimm5, CMPNEI16>;
+defm : BTF16Pat0<setuge, setult, oimm5, CMPHSI16>;
+defm : BTF16Pat0<setlt, setge, oimm5, CMPLTI16>;
def : Pat<(brcond (i32 (setne sGPR:$rs1, sGPR:$rs2)), bb:$offset),
(BT16 (CMPNE16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>;
def : Pat<(brcond (i32 (seteq sGPR:$rs1, sGPR:$rs2)), bb:$offset),
(BF16 (CMPNE16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>;
-def : Pat<(brcond (i32 (setuge sGPR:$rs1, sGPR:$rs2)), bb:$offset),
- (BT16 (CMPHS16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>;
-def : Pat<(brcond (i32 (setule sGPR:$rs1, sGPR:$rs2)), bb:$offset),
- (BT16 (CMPHS16 sGPR:$rs2, sGPR:$rs1), bb:$offset)>;
-def : Pat<(brcond (i32 (setult sGPR:$rs1, sGPR:$rs2)), bb:$offset),
- (BF16 (CMPHS16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>;
-def : Pat<(brcond (i32 (setugt sGPR:$rs1, sGPR:$rs2)), bb:$offset),
- (BF16 (CMPHS16 sGPR:$rs2, sGPR:$rs1), bb:$offset)>;
-def : Pat<(brcond (i32 (setlt sGPR:$rs1, sGPR:$rs2)), bb:$offset),
- (BT16 (CMPLT16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>;
-def : Pat<(brcond (i32 (setgt sGPR:$rs1, sGPR:$rs2)), bb:$offset),
- (BT16 (CMPLT16 sGPR:$rs2, sGPR:$rs1), bb:$offset)>;
-def : Pat<(brcond (i32 (setge sGPR:$rs1, sGPR:$rs2)), bb:$offset),
- (BF16 (CMPLT16 sGPR:$rs1, sGPR:$rs2), bb:$offset)>;
-def : Pat<(brcond (i32 (setle sGPR:$rs1, sGPR:$rs2)), bb:$offset),
- (BF16 (CMPLT16 sGPR:$rs2, sGPR:$rs1), bb:$offset)>;
+
+multiclass BTF16Pat1<PatFrag cond0, PatFrag cond1, Instruction cmp,
+ Instruction br> {
+ def : Pat<(brcond (i32 (cond0 sGPR:$rs1, sGPR:$rs2)), bb:$offset),
+ (br (cmp sGPR:$rs1, sGPR:$rs2), bb:$offset)>;
+ def : Pat<(brcond (i32 (cond1 sGPR:$rs1, sGPR:$rs2)), bb:$offset),
+ (br (cmp sGPR:$rs2, sGPR:$rs1), bb:$offset)>;
+}
+
+defm : BTF16Pat1<setuge, setule, CMPHS16, BT16>;
+defm : BTF16Pat1<setult, setugt, CMPHS16, BF16>;
+defm : BTF16Pat1<setlt, setgt, CMPLT16, BT16>;
+defm : BTF16Pat1<setge, setle, CMPLT16, BF16>;
// Compare Patterns.
def : Pat<(setne sGPR:$rs1, sGPR:$rs2),
diff --git a/llvm/lib/Target/CSKY/CSKYSubtarget.h b/llvm/lib/Target/CSKY/CSKYSubtarget.h
index 9e7ad00c0a50..b8be347935ac 100644
--- a/llvm/lib/Target/CSKY/CSKYSubtarget.h
+++ b/llvm/lib/Target/CSKY/CSKYSubtarget.h
@@ -206,6 +206,8 @@ public:
bool isCK810V() const { return CSKYProcFamily == CK810V; }
bool isCK860() const { return CSKYProcFamily == CK860; }
bool isCK860V() const { return CSKYProcFamily == CK860V; }
+
+ const unsigned XLen = 32;
};
} // namespace llvm
diff --git a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
index ce0f63b99d68..d78d9acc2aa2 100644
--- a/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
+++ b/llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp
@@ -496,9 +496,9 @@ static bool decodeFPUV3Instruction(MCInst &MI, uint32_t insn, uint64_t Address,
const MCDisassembler *DisAsm,
const MCSubtargetInfo &STI) {
LLVM_DEBUG(dbgs() << "Trying CSKY 32-bit fpuv3 table :\n");
- if (!STI.getFeatureBits()[CSKY::FeatureFPUV3_HF] &&
- !STI.getFeatureBits()[CSKY::FeatureFPUV3_SF] &&
- !STI.getFeatureBits()[CSKY::FeatureFPUV3_DF])
+ if (!STI.hasFeature(CSKY::FeatureFPUV3_HF) &&
+ !STI.hasFeature(CSKY::FeatureFPUV3_SF) &&
+ !STI.hasFeature(CSKY::FeatureFPUV3_DF))
return false;
DecodeStatus Result =
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
index 4171a97e9000..d53d2e9e00e9 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp
@@ -248,7 +248,7 @@ bool CSKYAsmBackend::mayNeedRelaxation(const MCInst &Inst,
case CSKY::JBT32:
case CSKY::JBF32:
case CSKY::JBSR32:
- if (!STI.getFeatureBits()[CSKY::Has2E3])
+ if (!STI.hasFeature(CSKY::Has2E3))
return false;
return true;
case CSKY::JBR16:
@@ -330,7 +330,7 @@ void CSKYAsmBackend::relaxInstruction(MCInst &Inst,
case CSKY::JBF16:
// ck801
unsigned opcode;
- if (STI.getFeatureBits()[CSKY::HasE2])
+ if (STI.hasFeature(CSKY::HasE2))
opcode = Inst.getOpcode() == CSKY::JBT16 ? CSKY::JBT32 : CSKY::JBF32;
else
opcode = Inst.getOpcode() == CSKY::JBT16 ? CSKY::JBT_E : CSKY::JBF_E;
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp
index d7cc4c8525ee..2548c83770ff 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFObjectWriter.cpp
@@ -117,6 +117,12 @@ unsigned CSKYELFObjectWriter::getRelocType(MCContext &Ctx,
return ELF::R_CKCORE_GOTOFF;
case MCSymbolRefExpr::VK_PLT:
return ELF::R_CKCORE_PLT32;
+ case MCSymbolRefExpr::VK_TLSGD:
+ return ELF::R_CKCORE_TLS_GD32;
+ case MCSymbolRefExpr::VK_TLSLDM:
+ return ELF::R_CKCORE_TLS_LDM32;
+ case MCSymbolRefExpr::VK_TPOFF:
+ return ELF::R_CKCORE_TLS_LE32;
case MCSymbolRefExpr::VK_None:
return ELF::R_CKCORE_ADDR32;
}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp
index 90775c1b70f2..059c3c143c31 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYELFStreamer.cpp
@@ -21,9 +21,9 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/Support/CSKYAttributes.h"
-#include "llvm/Support/CSKYTargetParser.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/TargetParser/CSKYTargetParser.h"
using namespace llvm;
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
index 3e4fdb5e67c3..9af7958112fc 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
@@ -113,7 +113,7 @@ void CSKYInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (Reg == CSKY::C)
O << "";
- else if (STI.getFeatureBits()[CSKY::FeatureJAVA]) {
+ else if (STI.hasFeature(CSKY::FeatureJAVA)) {
if (Reg == CSKY::R23)
O << (useABIName ? "fp" : "r23");
else if (Reg == CSKY::R24)
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
index 540f901fd479..ea41d53ef30f 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCCodeEmitter.cpp
@@ -82,7 +82,7 @@ void CSKYMCCodeEmitter::expandJBTF(const MCInst &MI, raw_ostream &OS,
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
writeData(Binary, 2, OS);
- if (!STI.getFeatureBits()[CSKY::Has2E3])
+ if (!STI.hasFeature(CSKY::Has2E3))
TmpInst = MCInstBuilder(CSKY::BR32)
.addOperand(MI.getOperand(1))
.addOperand(MI.getOperand(2));
diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
index a75c79a71bcb..56063b487f68 100644
--- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
+++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
@@ -12,6 +12,7 @@
#include "DXILShaderFlags.h"
#include "DirectX.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/DXContainer.h"
#include "llvm/CodeGen/Passes.h"
diff --git a/llvm/lib/Target/DirectX/DXILMetadata.cpp b/llvm/lib/Target/DirectX/DXILMetadata.cpp
index 60dda8c7ca31..db55f25c5077 100644
--- a/llvm/lib/Target/DirectX/DXILMetadata.cpp
+++ b/llvm/lib/Target/DirectX/DXILMetadata.cpp
@@ -11,12 +11,12 @@
//===----------------------------------------------------------------------===//
#include "DXILMetadata.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/VersionTuple.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
using namespace llvm::dxil;
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index 316c93806d86..660ca415b1a4 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Module.h"
@@ -126,9 +127,6 @@ public:
I.eraseFromParent();
continue;
}
- // Only insert bitcasts if the IR is using opaque pointers.
- if (M.getContext().supportsTypedPointers())
- continue;
// Emtting NoOp bitcast instructions allows the ValueEnumerator to be
// unmodified as it reserves instruction IDs during contruction.
diff --git a/llvm/lib/Target/DirectX/DXILResource.cpp b/llvm/lib/Target/DirectX/DXILResource.cpp
index 763432911dbf..dde7255e0425 100644
--- a/llvm/lib/Target/DirectX/DXILResource.cpp
+++ b/llvm/lib/Target/DirectX/DXILResource.cpp
@@ -105,6 +105,7 @@ StringRef ResourceBase::getComponentTypeName(ComponentType CompType) {
case ComponentType::PackedU8x32:
return "p32u8";
}
+ llvm_unreachable("All ComponentType enums are handled in switch");
}
void ResourceBase::printComponentType(Kinds Kind, ComponentType CompType,
@@ -172,6 +173,7 @@ StringRef ResourceBase::getKindName(Kinds Kind) {
case Kinds::FeedbackTexture2DArray:
return "fbtex2darray";
}
+ llvm_unreachable("All Kinds enums are handled in switch");
}
void ResourceBase::printKind(Kinds Kind, unsigned Alignment, raw_ostream &OS,
diff --git a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h
index 1b46f8b11e25..8ffa1d7cd9b3 100644
--- a/llvm/lib/Target/DirectX/DXILResourceAnalysis.h
+++ b/llvm/lib/Target/DirectX/DXILResourceAnalysis.h
@@ -10,6 +10,9 @@
///
//===----------------------------------------------------------------------===//
+#ifndef LLVM_TARGET_DIRECTX_DXILRESOURCEANALYSIS_H
+#define LLVM_TARGET_DIRECTX_DXILRESOURCEANALYSIS_H
+
#include "DXILResource.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
@@ -54,3 +57,5 @@ public:
void print(raw_ostream &O, const Module *M = nullptr) const override;
};
} // namespace llvm
+
+#endif // LLVM_TARGET_DIRECTX_DXILRESOURCEANALYSIS_H
diff --git a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
index 4995712b23b4..9c959c66be8b 100644
--- a/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
+++ b/llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp
@@ -14,11 +14,11 @@
#include "DXILShaderFlags.h"
#include "DirectX.h"
#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
using namespace llvm::dxil;
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
index 0aafb08091f3..2c321f4a79af 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.cpp
@@ -14,7 +14,6 @@
#include "DXILValueEnumerator.h"
#include "DirectXIRPasses/PointerTypeAnalysis.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Bitcode/BitcodeCommon.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Bitcode/LLVMBitCodes.h"
@@ -51,6 +50,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ModRef.h"
#include "llvm/Support/SHA1.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
namespace dxil {
@@ -513,7 +513,7 @@ unsigned DXILBitcodeWriter::getEncodedBinaryOpcode(unsigned Opcode) {
}
unsigned DXILBitcodeWriter::getTypeID(Type *T, const Value *V) {
- if (!T->isOpaquePointerTy() &&
+ if (!T->isPointerTy() &&
// For Constant, always check PointerMap to make sure OpaquePointer in
// things like constant struct/array works.
(!V || !isa<Constant>(V)))
@@ -1070,24 +1070,14 @@ void DXILBitcodeWriter::writeTypeTable() {
break;
}
case Type::PointerTyID: {
- PointerType *PTy = cast<PointerType>(T);
// POINTER: [pointee type, address space]
- Code = bitc::TYPE_CODE_POINTER;
- // Emitting an empty struct type for the opaque pointer's type allows
- // this to be order-independent. Non-struct types must be emitted in
- // bitcode before they can be referenced.
- if (PTy->isOpaquePointerTy()) {
- TypeVals.push_back(false);
- Code = bitc::TYPE_CODE_OPAQUE;
- writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME,
- "dxilOpaquePtrReservedName", StructNameAbbrev);
- } else {
- TypeVals.push_back(getTypeID(PTy->getNonOpaquePointerElementType()));
- unsigned AddressSpace = PTy->getAddressSpace();
- TypeVals.push_back(AddressSpace);
- if (AddressSpace == 0)
- AbbrevToUse = PtrAbbrev;
- }
+ // Emitting an empty struct type for the pointer's type allows this to be
+ // order-independent. Non-struct types must be emitted in bitcode before
+ // they can be referenced.
+ TypeVals.push_back(false);
+ Code = bitc::TYPE_CODE_OPAQUE;
+ writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME,
+ "dxilOpaquePtrReservedName", StructNameAbbrev);
break;
}
case Type::FunctionTyID: {
diff --git a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
index 9e26cd6d9738..cbf21485fb01 100644
--- a/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
+++ b/llvm/lib/Target/DirectX/DXILWriter/DXILBitcodeWriter.h
@@ -10,6 +10,9 @@
//
//===----------------------------------------------------------------------===//
+#ifndef LLVM_DXILWRITER_DXILBITCODEWRITER_H
+#define LLVM_DXILWRITER_DXILBITCODEWRITER_H
+
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/MC/StringTableBuilder.h"
@@ -61,3 +64,5 @@ void WriteDXILToFile(const Module &M, raw_ostream &Out);
} // namespace dxil
} // namespace llvm
+
+#endif // LLVM_DXILWRITER_DXILBITCODEWRITER_H
diff --git a/llvm/lib/Target/DirectX/DirectXIRPasses/PointerTypeAnalysis.cpp b/llvm/lib/Target/DirectX/DirectXIRPasses/PointerTypeAnalysis.cpp
index eea89941983b..97b7a41cac6d 100644
--- a/llvm/lib/Target/DirectX/DirectXIRPasses/PointerTypeAnalysis.cpp
+++ b/llvm/lib/Target/DirectX/DirectXIRPasses/PointerTypeAnalysis.cpp
@@ -22,15 +22,15 @@ namespace {
// Classifies the type of the value passed in by walking the value's users to
// find a typed instruction to materialize a type from.
Type *classifyPointerType(const Value *V, PointerTypeMap &Map) {
- assert(V->getType()->isOpaquePointerTy() &&
- "classifyPointerType called with non-opaque pointer");
+ assert(V->getType()->isPointerTy() &&
+ "classifyPointerType called with non-pointer");
auto It = Map.find(V);
if (It != Map.end())
return It->second;
Type *PointeeTy = nullptr;
if (auto *Inst = dyn_cast<GetElementPtrInst>(V)) {
- if (!Inst->getResultElementType()->isOpaquePointerTy())
+ if (!Inst->getResultElementType()->isPointerTy())
PointeeTy = Inst->getResultElementType();
} else if (auto *Inst = dyn_cast<AllocaInst>(V)) {
PointeeTy = Inst->getAllocatedType();
@@ -45,7 +45,7 @@ Type *classifyPointerType(const Value *V, PointerTypeMap &Map) {
} else if (const auto *Inst = dyn_cast<StoreInst>(User)) {
NewPointeeTy = Inst->getValueOperand()->getType();
// When store value is ptr type, cannot get more type info.
- if (NewPointeeTy->isOpaquePointerTy())
+ if (NewPointeeTy->isPointerTy())
continue;
} else if (const auto *Inst = dyn_cast<GetElementPtrInst>(User)) {
NewPointeeTy = Inst->getSourceElementType();
@@ -54,7 +54,7 @@ Type *classifyPointerType(const Value *V, PointerTypeMap &Map) {
// HLSL doesn't support pointers, so it is unlikely to get more than one
// or two levels of indirection in the IR. Because of this, recursion is
// pretty safe.
- if (NewPointeeTy->isOpaquePointerTy()) {
+ if (NewPointeeTy->isPointerTy()) {
PointeeTy = classifyPointerType(User, Map);
break;
}
@@ -85,7 +85,7 @@ Type *classifyFunctionType(const Function &F, PointerTypeMap &Map) {
SmallVector<Type *, 8> NewArgs;
Type *RetTy = F.getReturnType();
LLVMContext &Ctx = F.getContext();
- if (RetTy->isOpaquePointerTy()) {
+ if (RetTy->isPointerTy()) {
RetTy = nullptr;
for (const auto &B : F) {
const auto *RetInst = dyn_cast_or_null<ReturnInst>(B.getTerminator());
@@ -106,7 +106,7 @@ Type *classifyFunctionType(const Function &F, PointerTypeMap &Map) {
}
for (auto &A : F.args()) {
Type *ArgTy = A.getType();
- if (ArgTy->isOpaquePointerTy())
+ if (ArgTy->isPointerTy())
ArgTy = classifyPointerType(&A, Map);
NewArgs.push_back(ArgTy);
}
@@ -189,7 +189,7 @@ static void classifyGlobalCtorPointerType(const GlobalVariable &GV,
PointerTypeMap PointerTypeAnalysis::run(const Module &M) {
PointerTypeMap Map;
for (auto &G : M.globals()) {
- if (G.getType()->isOpaquePointerTy())
+ if (G.getType()->isPointerTy())
classifyPointerType(&G, Map);
if (G.getName() == "llvm.global_ctors")
classifyGlobalCtorPointerType(G, Map);
@@ -200,7 +200,7 @@ PointerTypeMap PointerTypeAnalysis::run(const Module &M) {
for (const auto &B : F) {
for (const auto &I : B) {
- if (I.getType()->isOpaquePointerTy())
+ if (I.getType()->isPointerTy())
classifyPointerType(&I, Map);
}
}
diff --git a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
index 0c97ab62a37b..cb6d4c5cd0a3 100644
--- a/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
+++ b/llvm/lib/Target/DirectX/MCTargetDesc/DirectXMCTargetDesc.cpp
@@ -14,7 +14,6 @@
#include "DirectXMCTargetDesc.h"
#include "DirectXContainerObjectWriter.h"
#include "TargetInfo/DirectXTargetInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/LaneBitmask.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -27,6 +26,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/TargetParser/Triple.h"
#include <memory>
using namespace llvm;
diff --git a/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp
index 54c577debc34..ae01626e5229 100644
--- a/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp
+++ b/llvm/lib/Target/DirectX/TargetInfo/DirectXTargetInfo.cpp
@@ -11,9 +11,9 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
Target &getTheDirectXTarget() {
diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 248bd3130c25..ce93715d6c42 100644
--- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -71,7 +71,7 @@ static cl::opt<bool> ErrorMissingParenthesis(
static cl::opt<bool> WarnSignedMismatch(
"mwarn-sign-mismatch",
cl::desc("Warn for mismatching a signed and unsigned value"),
- cl::init(true));
+ cl::init(false));
static cl::opt<bool> WarnNoncontigiousRegister(
"mwarn-noncontigious-register",
cl::desc("Warn for register names that arent contigious"), cl::init(true));
@@ -516,7 +516,7 @@ bool HexagonAsmParser::matchBundleOptions() {
} else if (Option.compare_insensitive("endloop1") == 0) {
HexagonMCInstrInfo::setOuterLoop(MCB);
} else if (Option.compare_insensitive("mem_noshuf") == 0) {
- if (getSTI().getFeatureBits()[Hexagon::FeatureMemNoShuf])
+ if (getSTI().hasFeature(Hexagon::FeatureMemNoShuf))
HexagonMCInstrInfo::setMemReorderDisabled(MCB);
else
return getParser().Error(IDLoc, MemNoShuffMsg);
@@ -813,7 +813,7 @@ bool HexagonAsmParser::ParseDirectiveComm(bool IsLocal, SMLoc Loc) {
// validate register against architecture
bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const {
if (HexagonMCRegisterClasses[Hexagon::V62RegsRegClassID].contains(MatchNum))
- if (!getSTI().getFeatureBits()[Hexagon::ArchV62])
+ if (!getSTI().hasFeature(Hexagon::ArchV62))
return false;
return true;
}
@@ -1329,7 +1329,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
break;
case Hexagon::J2_trap1:
- if (!getSTI().getFeatureBits()[Hexagon::ArchV65]) {
+ if (!getSTI().hasFeature(Hexagon::ArchV65)) {
MCOperand &Rx = Inst.getOperand(0);
MCOperand &Ry = Inst.getOperand(1);
if (Rx.getReg() != Hexagon::R0 || Ry.getReg() != Hexagon::R0) {
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index de6ca0aa9cbb..c7e22d7d308b 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -428,7 +428,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(MCInst &MI, MCInst &MCB,
STI);
if (Result != MCDisassembler::Success &&
- STI.getFeatureBits()[Hexagon::ExtensionHVX])
+ STI.hasFeature(Hexagon::ExtensionHVX))
Result = decodeInstruction(DecoderTableEXT_mmvec32, MI, Instruction,
Address, this, STI);
diff --git a/llvm/lib/Target/Hexagon/Hexagon.td b/llvm/lib/Target/Hexagon/Hexagon.td
index 5fa9f1ad7211..d31597158cc1 100644
--- a/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/llvm/lib/Target/Hexagon/Hexagon.td
@@ -160,8 +160,8 @@ def UseSmallData : Predicate<"HST->useSmallData()">;
def UseCabac : Predicate<"HST->useCabac()">,
AssemblerPredicate<(any_of FeatureCabac)>;
-def Hvx64: HwMode<"+hvx-length64b">;
-def Hvx128: HwMode<"+hvx-length128b">;
+def Hvx64: HwMode<"+hvx-length64b", [UseHVX64B]>;
+def Hvx128: HwMode<"+hvx-length128b", [UseHVX128B]>;
//===----------------------------------------------------------------------===//
// Classes used for relation maps.
diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index 688fe6861224..4ee67cb05d49 100644
--- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -66,8 +66,7 @@ void HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI,
inline static unsigned getHexagonRegisterPair(unsigned Reg,
const MCRegisterInfo *RI) {
assert(Hexagon::IntRegsRegClass.contains(Reg));
- MCSuperRegIterator SR(Reg, RI, false);
- unsigned Pair = *SR;
+ unsigned Pair = *RI->superregs(Reg).begin();
assert(Hexagon::DoubleRegsRegClass.contains(Pair));
return Pair;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 5b12fff8e9a0..6024d9f7b154 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -3119,8 +3119,7 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
DebugLoc DL = SI->getDebugLoc();
auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);
- for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) {
- const MachineOperand &Op = SI->getOperand(j);
+ for (const MachineOperand &Op : SI->operands()) {
if (!Op.isReg()) {
MIB.add(Op);
continue;
diff --git a/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index aab543dde76b..a027f2cedca0 100644
--- a/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -329,7 +329,7 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
int FI = op(1).getIndex();
int Off = op(2).getImm();
unsigned A = MFI.getObjectAlign(FI).value() + std::abs(Off);
- unsigned L = countTrailingZeros(A);
+ unsigned L = llvm::countr_zero(A);
RegisterCell RC = RegisterCell::self(Reg[0].Reg, W0);
RC.fill(0, L, BT::BitValue::Zero);
return rr0(RC, Outputs);
diff --git a/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp b/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
index 38103cdd30ad..0d3b986b9629 100644
--- a/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
@@ -269,11 +269,10 @@ HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs(
}
if (R.Reg.isPhysical()) {
- MCSubRegIterator I(R.Reg, &TRI);
- if (!I.isValid())
+ if (TRI.subregs(R.Reg).empty())
SRs.insert({R.Reg, 0});
- for (; I.isValid(); ++I)
- SRs.insert({*I, 0});
+ for (MCPhysReg I : TRI.subregs(R.Reg))
+ SRs.insert({I, 0});
} else {
assert(R.Reg.isVirtual());
auto &RC = *MRI.getRegClass(R.Reg);
@@ -355,7 +354,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
// Skip registers that have subregisters. A register is preserved
// iff its bit is set in the regmask, so if R1:0 was preserved, both
// R1 and R0 would also be present.
- if (MCSubRegIterator(PR, &TRI, false).isValid())
+ if (!TRI.subregs(PR).empty())
continue;
if (Reserved[PR])
continue;
@@ -374,8 +373,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
// Update maps for defs.
for (RegisterRef S : Defs) {
// Defs should already be expanded into subregs.
- assert(!S.Reg.isPhysical() ||
- !MCSubRegIterator(S.Reg, &TRI, false).isValid());
+ assert(!S.Reg.isPhysical() || TRI.subregs(S.Reg).empty());
if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None)
closeRange(S);
LastDef[S] = Index;
@@ -383,8 +381,7 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
// Update maps for clobbers.
for (RegisterRef S : Clobbers) {
// Clobbers should already be expanded into subregs.
- assert(!S.Reg.isPhysical() ||
- !MCSubRegIterator(S.Reg, &TRI, false).isValid());
+ assert(!S.Reg.isPhysical() || TRI.subregs(S.Reg).empty());
if (LastDef[S] != IndexType::None || LastUse[S] != IndexType::None)
closeRange(S);
// Create a single-instruction range.
diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index 56fb50cdb09e..400bb6cfc731 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -1745,7 +1745,7 @@ bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
// "alignment" as Diff.
uint32_t UD = Diff;
OffsetRange R = getOffsetRange(MI.getOperand(0));
- uint32_t A = std::min<uint32_t>(R.Align, 1u << countTrailingZeros(UD));
+ uint32_t A = std::min<uint32_t>(R.Align, 1u << llvm::countr_zero(UD));
D &= ~(A-1);
}
BuildMI(MBB, At, dl, HII->get(IdxOpc))
diff --git a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
index 88517ee663d1..47fbf0a69518 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -357,7 +357,6 @@ namespace {
bool getCell(const RegisterSubReg &R, const CellMap &Inputs, LatticeCell &RC);
bool constToInt(const Constant *C, APInt &Val) const;
- bool constToFloat(const Constant *C, APFloat &Val) const;
const ConstantInt *intToConst(const APInt &Val) const;
// Compares.
@@ -1687,9 +1686,9 @@ bool MachineConstEvaluator::evaluateCLBi(const APInt &A1, bool Zeros,
return false;
unsigned Count = 0;
if (Zeros && (Count == 0))
- Count = A1.countLeadingZeros();
+ Count = A1.countl_zero();
if (Ones && (Count == 0))
- Count = A1.countLeadingOnes();
+ Count = A1.countl_one();
Result = APInt(BW, static_cast<uint64_t>(Count), false);
return true;
}
@@ -1722,9 +1721,9 @@ bool MachineConstEvaluator::evaluateCTBi(const APInt &A1, bool Zeros,
return false;
unsigned Count = 0;
if (Zeros && (Count == 0))
- Count = A1.countTrailingZeros();
+ Count = A1.countr_zero();
if (Ones && (Count == 0))
- Count = A1.countTrailingOnes();
+ Count = A1.countr_one();
Result = APInt(BW, static_cast<uint64_t>(Count), false);
return true;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index f630bcba379f..033e6737f8bb 100644
--- a/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -436,8 +436,8 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) {
continue;
Register Reg = Op.getReg();
if (Hexagon::DoubleRegsRegClass.contains(Reg)) {
- for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
- LastDef[*SubRegs] = &MI;
+ for (MCPhysReg SubReg : TRI->subregs(Reg))
+ LastDef[SubReg] = &MI;
} else if (Hexagon::IntRegsRegClass.contains(Reg))
LastDef[Reg] = &MI;
} else if (Op.isRegMask()) {
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index dcb59773ce48..231ac0825ee1 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -252,13 +252,13 @@ static Register getMax32BitSubRegister(Register Reg,
return Reg;
Register RegNo = 0;
- for (MCSubRegIterator SubRegs(Reg, &TRI); SubRegs.isValid(); ++SubRegs) {
+ for (MCPhysReg SubReg : TRI.subregs(Reg)) {
if (hireg) {
- if (*SubRegs > RegNo)
- RegNo = *SubRegs;
+ if (SubReg > RegNo)
+ RegNo = SubReg;
} else {
- if (!RegNo || *SubRegs < RegNo)
- RegNo = *SubRegs;
+ if (!RegNo || SubReg < RegNo)
+ RegNo = SubReg;
}
}
return RegNo;
@@ -307,12 +307,15 @@ static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
return true;
if (MO.isReg()) {
Register R = MO.getReg();
+ // Debug instructions may refer to $noreg.
+ if (!R)
+ continue;
// Virtual registers will need scavenging, which then may require
// a stack slot.
if (R.isVirtual())
return true;
- for (MCSubRegIterator S(R, &HRI, true); S.isValid(); ++S)
- if (CSR[*S])
+ for (MCPhysReg S : HRI.subregs_inclusive(R))
+ if (CSR[S])
return true;
continue;
}
@@ -439,8 +442,8 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
SmallVector<MachineBasicBlock*,16> SFBlocks;
BitVector CSR(Hexagon::NUM_TARGET_REGS);
for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P)
- for (MCSubRegIterator S(*P, &HRI, true); S.isValid(); ++S)
- CSR[*S] = true;
+ for (MCPhysReg S : HRI.subregs_inclusive(*P))
+ CSR[S] = true;
for (auto &I : MF)
if (needsStackFrame(I, CSR, HRI))
@@ -1569,8 +1572,8 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
for (const CalleeSavedInfo &I : CSI) {
Register R = I.getReg();
LLVM_DEBUG(dbgs() << ' ' << printReg(R, TRI));
- for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
- SRegs[*SR] = true;
+ for (MCPhysReg SR : TRI->subregs_inclusive(R))
+ SRegs[SR] = true;
}
LLVM_DEBUG(dbgs() << " }\n");
LLVM_DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI);
@@ -1586,23 +1589,23 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
if (AP.isValid()) {
Reserved[AP] = false;
// Unreserve super-regs if no other subregisters are reserved.
- for (MCSuperRegIterator SP(AP, TRI, false); SP.isValid(); ++SP) {
+ for (MCPhysReg SP : TRI->superregs(AP)) {
bool HasResSub = false;
- for (MCSubRegIterator SB(*SP, TRI, false); SB.isValid(); ++SB) {
- if (!Reserved[*SB])
+ for (MCPhysReg SB : TRI->subregs(SP)) {
+ if (!Reserved[SB])
continue;
HasResSub = true;
break;
}
if (!HasResSub)
- Reserved[*SP] = false;
+ Reserved[SP] = false;
}
}
for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) {
Register R = x;
- for (MCSuperRegIterator SR(R, TRI, true); SR.isValid(); ++SR)
- SRegs[*SR] = false;
+ for (MCPhysReg SR : TRI->superregs_inclusive(R))
+ SRegs[SR] = false;
}
LLVM_DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI);
dbgs() << "\n");
@@ -1616,13 +1619,13 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
BitVector TmpSup(Hexagon::NUM_TARGET_REGS);
for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
Register R = x;
- for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR)
- TmpSup[*SR] = true;
+ for (MCPhysReg SR : TRI->superregs(R))
+ TmpSup[SR] = true;
}
for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) {
Register R = x;
- for (MCSubRegIterator SR(R, TRI, true); SR.isValid(); ++SR) {
- if (!Reserved[*SR])
+ for (MCPhysReg SR : TRI->subregs_inclusive(R)) {
+ if (!Reserved[SR])
continue;
TmpSup[R] = false;
break;
@@ -1640,8 +1643,8 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
// remove R from SRegs.
for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
Register R = x;
- for (MCSuperRegIterator SR(R, TRI); SR.isValid(); ++SR) {
- if (!SRegs[*SR])
+ for (MCPhysReg SR : TRI->superregs(R)) {
+ if (!SRegs[SR])
continue;
SRegs[R] = false;
break;
diff --git a/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp b/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
index d9307190ae16..3274f9162b54 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -178,7 +178,7 @@ bool HexagonGenExtract::convert(Instruction *In) {
// CM is the shifted-left mask. Shift it back right to remove the zero
// bits on least-significant positions.
APInt M = CM->getValue().lshr(SL);
- uint32_t T = M.countTrailingOnes();
+ uint32_t T = M.countr_one();
// During the shifts some of the bits will be lost. Calculate how many
// of the original value will remain after shift right and then left.
diff --git a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
index 9461532ab159..509144ceb6a0 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -144,8 +144,8 @@ INITIALIZE_PASS(HexagonGenMux, "hexagon-gen-mux",
"Hexagon generate mux instructions", false, false)
void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const {
- for (MCSubRegIterator I(Reg, HRI); I.isValid(); ++I)
- SRs[*I] = true;
+ for (MCPhysReg I : HRI->subregs(Reg))
+ SRs[I] = true;
}
void HexagonGenMux::expandReg(unsigned Reg, BitVector &Set) const {
@@ -348,9 +348,9 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
LivePhysRegs LPR(*HRI);
LPR.addLiveOuts(B);
- auto IsLive = [&LPR,this] (unsigned Reg) -> bool {
- for (MCSubRegIterator S(Reg, HRI, true); S.isValid(); ++S)
- if (LPR.contains(*S))
+ auto IsLive = [&LPR, this](unsigned Reg) -> bool {
+ for (MCPhysReg S : HRI->subregs_inclusive(Reg))
+ if (LPR.contains(S))
return true;
return false;
};
diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index becc66a4ee90..6b0315bc1bef 100644
--- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -1059,8 +1059,7 @@ bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
return false;
MachineInstr *OnePhi = I->getParent();
- for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
- const MachineOperand &OPO = OnePhi->getOperand(j);
+ for (const MachineOperand &OPO : OnePhi->operands()) {
if (!OPO.isReg() || !OPO.isDef())
continue;
@@ -1702,8 +1701,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
// operands. Assume that if the compare has a single register use and a
// single immediate operand, then the register is being compared with the
// immediate value.
- for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
- MachineOperand &MO = PredDef->getOperand(i);
+ for (MachineOperand &MO : PredDef->operands()) {
if (MO.isReg()) {
// Skip all implicit references. In one case there was:
// %140 = FCMPUGT32_rr %138, %139, implicit %usr
@@ -1818,8 +1816,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
// Finally, fix the compare instruction.
setImmediate(*CmpImmOp, CmpImm);
- for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
- MachineOperand &MO = PredDef->getOperand(i);
+ for (MachineOperand &MO : PredDef->operands()) {
if (MO.isReg() && MO.getReg() == RB.first) {
MO.setReg(I->first);
return true;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 855c4ac4bca2..5aad71a0a1c9 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -1170,9 +1170,9 @@ void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
continue;
uint32_t Mask = MN->getZExtValue();
// Examine the mask.
- uint32_t TZ = countTrailingZeros(Mask);
- uint32_t M1 = countTrailingOnes(Mask >> TZ);
- uint32_t LZ = countLeadingZeros(Mask);
+ uint32_t TZ = llvm::countr_zero(Mask);
+ uint32_t M1 = llvm::countr_one(Mask >> TZ);
+ uint32_t LZ = llvm::countl_zero(Mask);
// Trailing zeros + middle ones + leading zeros must equal the width.
if (TZ + M1 + LZ != 32)
continue;
@@ -1867,7 +1867,7 @@ static unsigned getPowerOf2Factor(SDValue Val) {
continue;
const APInt &CInt = C->getAPIntValue();
if (CInt.getBoolValue())
- MaxFactor = CInt.countTrailingZeros();
+ MaxFactor = CInt.countr_zero();
}
return MaxFactor;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
index 020fb2d1dd16..57b5f9a28794 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAGHVX.cpp
@@ -1749,40 +1749,72 @@ void HvxSelector::select(SDNode *ISelN) {
// node in the DAG.
assert(ISelN->getOpcode() == HexagonISD::ISEL);
SDNode *N0 = ISelN->getOperand(0).getNode();
- if (N0->isMachineOpcode()) {
- ISel.ReplaceNode(ISelN, N0);
- return;
- }
// There could have been nodes created (i.e. inserted into the DAG)
// that are now dead. Remove them, in case they use any of the nodes
// to select (and make them look shared).
DAG.RemoveDeadNodes();
- SetVector<SDNode*> SubNodes, TmpQ;
- std::map<SDNode*,unsigned> NumOps;
+ SetVector<SDNode *> SubNodes;
+
+ if (!N0->isMachineOpcode()) {
+ // Don't want to select N0 if it's shared with another node, except if
+ // it's shared with other ISELs.
+ auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
+ if (llvm::all_of(N0->uses(), IsISelN))
+ SubNodes.insert(N0);
+ }
+ if (SubNodes.empty()) {
+ ISel.ReplaceNode(ISelN, N0);
+ return;
+ }
+
+ // Need to manually select the nodes that are dominated by the ISEL. Other
+ // nodes are reachable from the rest of the DAG, and so will be selected
+ // by the DAG selection routine.
+ SetVector<SDNode*> Dom, NonDom;
+ Dom.insert(N0);
+
+ auto IsDomRec = [&Dom, &NonDom] (SDNode *T, auto Rec) -> bool {
+ if (Dom.count(T))
+ return true;
+ if (T->use_empty() || NonDom.count(T))
+ return false;
+ for (SDNode *U : T->uses()) {
+ // If T is reachable from a known non-dominated node, then T itself
+ // is non-dominated.
+ if (!Rec(U, Rec)) {
+ NonDom.insert(T);
+ return false;
+ }
+ }
+ Dom.insert(T);
+ return true;
+ };
- // Don't want to select N0 if it's shared with another node, except if
- // it's shared with other ISELs.
- auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
- if (llvm::all_of(N0->uses(), IsISelN))
- SubNodes.insert(N0);
+ auto IsDom = [&IsDomRec] (SDNode *T) { return IsDomRec(T, IsDomRec); };
- auto InSubNodes = [&SubNodes](SDNode *T) { return SubNodes.count(T); };
+ // Add the rest of nodes dominated by ISEL to SubNodes.
for (unsigned I = 0; I != SubNodes.size(); ++I) {
- SDNode *S = SubNodes[I];
- unsigned OpN = 0;
- // Only add subnodes that are only reachable from N0.
- for (SDValue Op : S->ops()) {
+ for (SDValue Op : SubNodes[I]->ops()) {
SDNode *O = Op.getNode();
- if (llvm::all_of(O->uses(), InSubNodes)) {
+ if (IsDom(O))
SubNodes.insert(O);
- ++OpN;
- }
}
- NumOps.insert({S, OpN});
- if (OpN == 0)
- TmpQ.insert(S);
+ }
+
+ // Do a topological sort of nodes from Dom.
+ SetVector<SDNode*> TmpQ;
+
+ std::map<SDNode *, unsigned> OpCount;
+ for (SDNode *T : Dom) {
+ unsigned NumDomOps = llvm::count_if(T->ops(), [&Dom](const SDUse &U) {
+ return Dom.count(U.getNode());
+ });
+
+ OpCount.insert({T, NumDomOps});
+ if (NumDomOps == 0)
+ TmpQ.insert(T);
}
for (unsigned I = 0; I != TmpQ.size(); ++I) {
@@ -1790,8 +1822,8 @@ void HvxSelector::select(SDNode *ISelN) {
for (SDNode *U : S->uses()) {
if (U == ISelN)
continue;
- auto F = NumOps.find(U);
- assert(F != NumOps.end());
+ auto F = OpCount.find(U);
+ assert(F != OpCount.end());
if (F->second > 0 && !--F->second)
TmpQ.insert(F->first);
}
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 202fc473f9e4..db2d2eb9813c 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -216,7 +216,7 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
else
CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
@@ -244,20 +244,20 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
break;
}
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Val, Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Val, Glue);
// Guarantee that all emitted copies are stuck together with flags.
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
RetOps[0] = Chain; // Update chain.
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ // Add the glue if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
- return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
+ return DAG.getNode(HexagonISD::RET_GLUE, dl, MVT::Other, RetOps);
}
bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
@@ -453,7 +453,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
"Not eligible for Tail Call\n"));
}
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getStackSize();
SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
@@ -907,7 +907,7 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
if (RegSaveAreaSizePlusPadding > 0) {
// The offset to saved register area should be 8 byte aligned.
- int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
+ int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
if (!(RegAreaStart % 8))
RegAreaStart = (RegAreaStart + 7) & -8;
@@ -922,7 +922,7 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
} else {
// This will point to the next argument passed via stack, when
// there is no saved register area.
- int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
+ int Offset = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
HMFI.setRegSavedAreaStartFrameIndex(FI);
HMFI.setVarArgsFrameIndex(FI);
@@ -932,7 +932,7 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
if (IsVarArg && !Subtarget.isEnvironmentMusl()) {
// This will point to the next argument passed via stack.
- int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
+ int Offset = HEXAGON_LRFP_SIZE + CCInfo.getStackSize();
int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
HMFI.setVarArgsFrameIndex(FI);
}
@@ -1391,15 +1391,15 @@ HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
SDValue Chain = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
// Copy over the argument to R0
- SDValue InFlag;
- Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
- InFlag = Chain.getValue(1);
+ SDValue InGlue;
+ Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InGlue);
+ InGlue = Chain.getValue(1);
unsigned Flags = DAG.getSubtarget<HexagonSubtarget>().useLongCalls()
? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended
: HexagonII::MO_GDPLT;
- return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT,
+ return GetDynamicTLSAddr(DAG, Chain, GA, InGlue, PtrVT,
Hexagon::R0, Flags);
}
@@ -1545,15 +1545,15 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
// Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
// but they only operate on i64.
for (MVT VT : MVT::integer_valuetypes()) {
- setOperationAction(ISD::UADDO, VT, Custom);
- setOperationAction(ISD::USUBO, VT, Custom);
- setOperationAction(ISD::SADDO, VT, Expand);
- setOperationAction(ISD::SSUBO, VT, Expand);
- setOperationAction(ISD::ADDCARRY, VT, Expand);
- setOperationAction(ISD::SUBCARRY, VT, Expand);
+ setOperationAction(ISD::UADDO, VT, Custom);
+ setOperationAction(ISD::USUBO, VT, Custom);
+ setOperationAction(ISD::SADDO, VT, Expand);
+ setOperationAction(ISD::SSUBO, VT, Expand);
+ setOperationAction(ISD::UADDO_CARRY, VT, Expand);
+ setOperationAction(ISD::USUBO_CARRY, VT, Expand);
}
- setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
- setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);
+ setOperationAction(ISD::UADDO_CARRY, MVT::i64, Custom);
+ setOperationAction(ISD::USUBO_CARRY, MVT::i64, Custom);
setOperationAction(ISD::CTLZ, MVT::i8, Promote);
setOperationAction(ISD::CTLZ, MVT::i16, Promote);
@@ -1628,14 +1628,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
ISD::UADDO, ISD::SSUBO, ISD::USUBO, ISD::SMUL_LOHI, ISD::UMUL_LOHI,
// Logical/bit:
ISD::AND, ISD::OR, ISD::XOR, ISD::ROTL, ISD::ROTR,
- ISD::CTPOP, ISD::CTLZ, ISD::CTTZ,
+ ISD::CTPOP, ISD::CTLZ, ISD::CTTZ, ISD::BSWAP, ISD::BITREVERSE,
// Floating point arithmetic/math functions:
ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FMA, ISD::FDIV,
ISD::FREM, ISD::FNEG, ISD::FABS, ISD::FSQRT, ISD::FSIN,
ISD::FCOS, ISD::FPOW, ISD::FLOG, ISD::FLOG2,
ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FCEIL, ISD::FTRUNC,
ISD::FRINT, ISD::FNEARBYINT, ISD::FROUND, ISD::FFLOOR,
- ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS, ISD::FLDEXP,
// Misc:
ISD::BR_CC, ISD::SELECT_CC, ISD::ConstantPool,
// Vector:
@@ -1701,8 +1701,11 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::OR, NativeVT, Legal);
setOperationAction(ISD::XOR, NativeVT, Legal);
- if (NativeVT.getVectorElementType() != MVT::i1)
+ if (NativeVT.getVectorElementType() != MVT::i1) {
setOperationAction(ISD::SPLAT_VECTOR, NativeVT, Legal);
+ setOperationAction(ISD::BSWAP, NativeVT, Legal);
+ setOperationAction(ISD::BITREVERSE, NativeVT, Legal);
+ }
}
for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32}) {
@@ -1728,6 +1731,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STORE, VT, Custom);
}
+ // Normalize integer compares to EQ/GT/UGT
for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16,
MVT::v2i32}) {
setCondCodeAction(ISD::SETNE, VT, Expand);
@@ -1739,6 +1743,14 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
setCondCodeAction(ISD::SETULT, VT, Expand);
}
+ // Normalize boolean compares to [U]LE/[U]LT
+ for (MVT VT : {MVT::i1, MVT::v2i1, MVT::v4i1, MVT::v8i1}) {
+ setCondCodeAction(ISD::SETGE, VT, Expand);
+ setCondCodeAction(ISD::SETGT, VT, Expand);
+ setCondCodeAction(ISD::SETUGE, VT, Expand);
+ setCondCodeAction(ISD::SETUGT, VT, Expand);
+ }
+
// Custom-lower bitcasts from i8 to v8i1.
setOperationAction(ISD::BITCAST, MVT::i8, Custom);
setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
@@ -1896,7 +1908,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU";
case HexagonISD::INSERT: return "HexagonISD::INSERT";
case HexagonISD::JT: return "HexagonISD::JT";
- case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
+ case HexagonISD::RET_GLUE: return "HexagonISD::RET_GLUE";
case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
case HexagonISD::VASL: return "HexagonISD::VASL";
case HexagonISD::VASR: return "HexagonISD::VASR";
@@ -1940,7 +1952,7 @@ HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, Align NeedAlign,
return true;
unsigned Addr = CA->getZExtValue();
Align HaveAlign =
- Addr != 0 ? Align(1ull << countTrailingZeros(Addr)) : NeedAlign;
+ Addr != 0 ? Align(1ull << llvm::countr_zero(Addr)) : NeedAlign;
if (HaveAlign >= NeedAlign)
return true;
@@ -2573,7 +2585,7 @@ HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
}
#ifndef NDEBUG
- dbgs() << "VecTy: " << EVT(VecTy).getEVTString() << '\n';
+ dbgs() << "VecTy: " << VecTy << '\n';
#endif
llvm_unreachable("Unexpected vector element type");
}
@@ -3240,7 +3252,7 @@ HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const {
unsigned Opc = Op.getOpcode();
if (CY) {
- uint32_t VY = CY->getZExtValue();
+ uint64_t VY = CY->getZExtValue();
assert(VY != 0 && "This should have been folded");
// X +/- 1
if (VY != 1)
@@ -3263,13 +3275,13 @@ HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
-SDValue
-HexagonTargetLowering::LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const {
+SDValue HexagonTargetLowering::LowerUAddSubOCarry(SDValue Op,
+ SelectionDAG &DAG) const {
const SDLoc &dl(Op);
unsigned Opc = Op.getOpcode();
SDValue X = Op.getOperand(0), Y = Op.getOperand(1), C = Op.getOperand(2);
- if (Opc == ISD::ADDCARRY)
+ if (Opc == ISD::UADDO_CARRY)
return DAG.getNode(HexagonISD::ADDC, dl, Op.getNode()->getVTList(),
{ X, Y, C });
@@ -3342,8 +3354,8 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::STORE: return LowerStore(Op, DAG);
case ISD::UADDO:
case ISD::USUBO: return LowerUAddSubO(Op, DAG);
- case ISD::ADDCARRY:
- case ISD::SUBCARRY: return LowerAddSubCarry(Op, DAG);
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: return LowerUAddSubOCarry(Op, DAG);
case ISD::SRA:
case ISD::SHL:
case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG);
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index c922aa30155b..8c7d0b70f385 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -18,12 +18,12 @@
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/InlineAsm.h"
-#include "llvm/Support/MachineValueType.h"
#include <cstdint>
#include <utility>
@@ -47,7 +47,7 @@ enum NodeType : unsigned {
CALLnr, // Function call that does not return.
CALLR,
- RET_FLAG, // Return with a flag operand.
+ RET_GLUE, // Return with a glue operand.
BARRIER, // Memory barrier.
JT, // Jump table.
CP, // Constant pool.
@@ -200,7 +200,7 @@ public:
SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerUAddSubOCarry(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
@@ -223,13 +223,13 @@ public:
SDValue LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG) const;
SDValue GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
- GlobalAddressSDNode *GA, SDValue InFlag, EVT PtrVT,
- unsigned ReturnReg, unsigned char OperandFlags) const;
+ GlobalAddressSDNode *GA, SDValue InGlue, EVT PtrVT,
+ unsigned ReturnReg, unsigned char OperandGlues) const;
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
@@ -545,7 +545,6 @@ private:
SDValue LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerHvxSetCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerHvxShift(SDValue Op, SelectionDAG &DAG) const;
@@ -565,7 +564,6 @@ private:
SDValue WidenHvxStore(SDValue Op, SelectionDAG &DAG) const;
SDValue WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const;
SDValue LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const;
- SDValue WidenHvxFpIntConv(SDValue Op, SelectionDAG &DAG) const;
SDValue ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG) const;
SDValue EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
index b306e79c0b12..659997036170 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLoweringHVX.cpp
@@ -166,6 +166,7 @@ HexagonTargetLowering::initializeHVXLowering() {
setOperationAction(ISD::FMUL, P, Custom);
setOperationAction(ISD::FMINNUM, P, Custom);
setOperationAction(ISD::FMAXNUM, P, Custom);
+ setOperationAction(ISD::SETCC, P, Custom);
setOperationAction(ISD::VSELECT, P, Custom);
// Custom-lower BUILD_VECTOR. The standard (target-independent)
@@ -1414,9 +1415,9 @@ HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
// would be by HwLen-Idx, but if two words are inserted, it will need to be
// by (HwLen-4)-Idx.
unsigned RolBase = HwLen;
- if (VecTy.getSizeInBits() == 32) {
+ if (SubTy.getSizeInBits() == 32) {
SDValue V = DAG.getBitcast(MVT::i32, SubV);
- SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, V);
+ SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
} else {
SDValue V = DAG.getBitcast(MVT::i64, SubV);
SDValue R0 = LoHalf(V, DAG);
@@ -3625,7 +3626,7 @@ HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
break;
case HexagonISD::VINSERTW0:
if (isUndef(Ops[1]))
- return Ops[0];;
+ return Ops[0];
break;
case HexagonISD::VROR: {
if (Ops[0].getOpcode() == HexagonISD::VROR) {
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index a636f7c4264e..6f0210763bc5 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DFAPacketizer.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -33,6 +34,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
@@ -48,7 +50,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -2177,11 +2178,17 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const {
// have 'isExtended' flag set.
assert(MO.isImm() && "Extendable operand must be Immediate type");
- int MinValue = getMinValue(MI);
- int MaxValue = getMaxValue(MI);
- int ImmValue = MO.getImm();
-
- return (ImmValue < MinValue || ImmValue > MaxValue);
+ int64_t Value = MO.getImm();
+ if ((F >> HexagonII::ExtentSignedPos) & HexagonII::ExtentSignedMask) {
+ int32_t SValue = Value;
+ int32_t MinValue = getMinValue(MI);
+ int32_t MaxValue = getMaxValue(MI);
+ return SValue < MinValue || SValue > MaxValue;
+ }
+ uint32_t UValue = Value;
+ uint32_t MinValue = getMinValue(MI);
+ uint32_t MaxValue = getMaxValue(MI);
+ return UValue < MinValue || UValue > MaxValue;
}
bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const {
@@ -2219,15 +2226,11 @@ bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI,
if (RegA == RegB)
return true;
- if (RegA.isPhysical())
- for (MCSubRegIterator SubRegs(RegA, &HRI); SubRegs.isValid(); ++SubRegs)
- if (RegB == *SubRegs)
- return true;
+ if (RegA.isPhysical() && llvm::is_contained(HRI.subregs(RegA), RegB))
+ return true;
- if (RegB.isPhysical())
- for (MCSubRegIterator SubRegs(RegB, &HRI); SubRegs.isValid(); ++SubRegs)
- if (RegA == *SubRegs)
- return true;
+ if (RegB.isPhysical() && llvm::is_contained(HRI.subregs(RegB), RegA))
+ return true;
}
return false;
@@ -3844,7 +3847,7 @@ int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const {
// All Hexagon architectures have prediction bits on dot-new branches,
// but only Hexagon V60+ has prediction bits on dot-old ones. Make sure
// to pick the right opcode when converting back to dot-old.
- if (!Subtarget.getFeatureBits()[Hexagon::ArchV60]) {
+ if (!Subtarget.hasFeature(Hexagon::ArchV60)) {
switch (NewOp) {
case Hexagon::J2_jumptpt:
NewOp = Hexagon::J2_jumpt;
@@ -4304,8 +4307,8 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
if (DefMO.isReg() && DefMO.getReg().isPhysical()) {
if (DefMO.isImplicit()) {
- for (MCSuperRegIterator SR(DefMO.getReg(), &HRI); SR.isValid(); ++SR) {
- int Idx = DefMI.findRegisterDefOperandIdx(*SR, false, false, &HRI);
+ for (MCPhysReg SR : HRI.superregs(DefMO.getReg())) {
+ int Idx = DefMI.findRegisterDefOperandIdx(SR, false, false, &HRI);
if (Idx != -1) {
DefIdx = Idx;
break;
@@ -4315,8 +4318,8 @@ int HexagonInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
const MachineOperand &UseMO = UseMI.getOperand(UseIdx);
if (UseMO.isImplicit()) {
- for (MCSuperRegIterator SR(UseMO.getReg(), &HRI); SR.isValid(); ++SR) {
- int Idx = UseMI.findRegisterUseOperandIdx(*SR, false, &HRI);
+ for (MCPhysReg SR : HRI.superregs(UseMO.getReg())) {
+ int Idx = UseMI.findRegisterUseOperandIdx(SR, false, &HRI);
if (Idx != -1) {
UseIdx = Idx;
break;
diff --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 7c9a9f7918a1..0bc0877f6e70 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -17,9 +17,9 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/Support/MachineValueType.h"
#include <cstdint>
#include <vector>
diff --git a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index 370ea5fc83d6..6f20c823df85 100644
--- a/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -356,14 +356,14 @@ defm : T_VVI_inv_pat <V6_valignbi, int_hexagon_V6_vlalignb>;
defm : T_VVR_pat <V6_vlalignb, int_hexagon_V6_vlalignbi>;
def: Pat<(int_hexagon_V6_vd0),
- (V6_vd0)>, Requires<[HasV60, UseHVX64B]>;
+ (V6_vd0)>, Requires<[UseHVXV60, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vd0_128B ),
- (V6_vd0)>, Requires<[HasV60, UseHVX128B]>;
+ (V6_vd0)>, Requires<[UseHVXV60, UseHVX128B]>;
def: Pat<(int_hexagon_V6_vdd0),
- (V6_vdd0)>, Requires<[HasV65, UseHVX64B]>;
+ (V6_vdd0)>, Requires<[UseHVXV65, UseHVX64B]>;
def: Pat<(int_hexagon_V6_vdd0_128B),
- (V6_vdd0)>, Requires<[HasV65, UseHVX128B]>;
+ (V6_vdd0)>, Requires<[UseHVXV65, UseHVX128B]>;
multiclass T_VP_pat<InstHexagon MI, Intrinsic IntID> {
@@ -383,7 +383,7 @@ multiclass T_WVP_pat<InstHexagon MI, Intrinsic IntID> {
}
// These are actually only in V65.
-let Predicates = [HasV65, UseHVX] in {
+let Predicates = [UseHVXV65, UseHVX] in {
defm: T_VP_pat<V6_vrmpyub_rtt, int_hexagon_V6_vrmpyub_rtt>;
defm: T_VP_pat<V6_vrmpybub_rtt, int_hexagon_V6_vrmpybub_rtt>;
@@ -408,7 +408,7 @@ multiclass T_pRM_pat<InstHexagon MI, Intrinsic IntID> {
(MI PredRegs:$P, IntRegs:$R, ModRegs:$M)>;
}
-let Predicates = [HasV62, UseHVX] in {
+let Predicates = [UseHVXV62, UseHVX] in {
defm: T_pRI_pat<V6_vL32b_pred_ai, int_hexagon_V6_vL32b_pred_ai>;
defm: T_pRI_pat<V6_vL32b_npred_ai, int_hexagon_V6_vL32b_npred_ai>;
defm: T_pRI_pat<V6_vL32b_pred_pi, int_hexagon_V6_vL32b_pred_pi>;
@@ -440,7 +440,7 @@ multiclass T_pRMV_pat<InstHexagon MI, Intrinsic IntID> {
(MI PredRegs:$P, IntRegs:$R, ModRegs:$M, HvxVR:$V)>;
}
-let Predicates = [HasV60, UseHVX] in {
+let Predicates = [UseHVXV60, UseHVX] in {
defm: T_pRIV_pat<V6_vS32b_pred_ai, int_hexagon_V6_vS32b_pred_ai>;
defm: T_pRIV_pat<V6_vS32b_npred_ai, int_hexagon_V6_vS32b_npred_ai>;
defm: T_pRIV_pat<V6_vS32b_pred_pi, int_hexagon_V6_vS32b_pred_pi>;
diff --git a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
index 113e63fdb934..d3d12664228b 100644
--- a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
@@ -14,7 +14,6 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
@@ -56,6 +55,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -336,7 +336,7 @@ void Simplifier::Context::initialize(Instruction *Exp) {
while (!Q.empty()) {
Value *V = Q.pop_front_val();
- if (M.find(V) != M.end())
+ if (M.contains(V))
continue;
if (Instruction *U = dyn_cast<Instruction>(V)) {
if (isa<PHINode>(U) || U->getParent() != Block)
@@ -664,7 +664,7 @@ Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
continue;
if (auto *T = dyn_cast<ConstantInt>(IncV))
- if (T->getZExtValue() == 1)
+ if (T->isOne())
return PN;
}
return nullptr;
@@ -1280,7 +1280,7 @@ bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
// Assume that all inputs to the value have the high bits zero.
// Check if the value itself preserves the zeros in the high bits.
if (auto *C = dyn_cast<ConstantInt>(V))
- return C->getValue().countLeadingZeros() >= IterCount;
+ return C->getValue().countl_zero() >= IterCount;
if (auto *I = dyn_cast<Instruction>(V)) {
switch (I->getOpcode()) {
diff --git a/llvm/lib/Target/Hexagon/HexagonPatterns.td b/llvm/lib/Target/Hexagon/HexagonPatterns.td
index a75ac0e1378e..d03c39d949ff 100644
--- a/llvm/lib/Target/Hexagon/HexagonPatterns.td
+++ b/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -117,8 +117,8 @@ def usat: PatFrag<(ops node:$V, node:$Ty), (HexagonUSAT node:$V, node:$Ty)>;
// Pattern fragments to extract the low and high subregisters from a
// 64-bit value.
-def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>;
-def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>;
+def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG $Rs, isub_lo)>;
+def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG $Rs, isub_hi)>;
def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
return isOrEquivalentToAdd(N);
@@ -562,6 +562,20 @@ def: Pat<(v4i8 (trunc V4I16:$Rs)),
def: Pat<(v2i16 (trunc V2I32:$Rs)),
(A2_combine_ll (HiReg $Rs), (LoReg $Rs))>;
+// Truncate to vNi1
+def: Pat<(v2i1 (trunc V2I32:$Rs)),
+ (A4_vcmpweqi (A2_andp V2I32:$Rs, (A2_combineii (i32 1), (i32 1))),
+ (i32 1))>;
+def: Pat<(v4i1 (trunc V4I16:$Rs)),
+ (A4_vcmpheqi (Combinew (A2_andir (HiReg $Rs), (i32 0x00010001)),
+ (A2_andir (LoReg $Rs), (i32 0x00010001))),
+ (i32 1))>;
+def: Pat<(v8i1 (trunc V8I8:$Rs)),
+ (A4_vcmpbeqi (Combinew (A2_andir (HiReg $Rs), (i32 0x01010101)),
+ (A2_andir (LoReg $Rs), (i32 0x01010101))),
+ (i32 1))>;
+
+
// Saturation:
// Note: saturation assumes the same signed-ness for the input and the
// output.
@@ -831,9 +845,32 @@ def: Pat<(i32 (zext (i1 (setne I32:$Rs, anyimm:$s8)))),
def: Pat<(i1 (seteq I1:$Ps, (i1 -1))), (I1:$Ps)>;
def: Pat<(i1 (setne I1:$Ps, (i1 -1))), (C2_not I1:$Ps)>;
-def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, (C2_not I1:$Pt))>;
+def: Pat<(i1 (seteq I1:$Ps, I1:$Pt)), (C2_not (C2_xor I1:$Ps, I1:$Pt))>;
def: Pat<(i1 (setne I1:$Ps, I1:$Pt)), (C2_xor I1:$Ps, I1:$Pt)>;
+multiclass BoolE_pat<PatFrag OpPred, ValueType ResTy> {
+ def: Pat<(ResTy (seteq OpPred:$Ps, OpPred:$Pt)), (C2_not (C2_xor $Ps, $Pt))>;
+ def: Pat<(ResTy (setne OpPred:$Ps, OpPred:$Pt)), (C2_xor $Ps, $Pt)>;
+}
+
+defm: BoolE_pat<I1, i1>;
+defm: BoolE_pat<V2I1, v2i1>;
+defm: BoolE_pat<V4I1, v4i1>;
+defm: BoolE_pat<V8I1, v8i1>;
+
+multiclass BoolL_pat<PatFrag OpPred, ValueType ResTy> {
+ // Signed "true" == -1
+ def: Pat<(ResTy (setlt OpPred:$Ps, OpPred:$Pt)), (C2_andn $Ps, $Pt)>;
+ def: Pat<(ResTy (setle OpPred:$Ps, OpPred:$Pt)), (C2_orn $Ps, $Pt)>;
+ def: Pat<(ResTy (setult OpPred:$Ps, OpPred:$Pt)), (C2_andn $Pt, $Ps)>;
+ def: Pat<(ResTy (setule OpPred:$Ps, OpPred:$Pt)), (C2_orn $Pt, $Ps)>;
+}
+
+defm: BoolL_pat<I1, i1>;
+defm: BoolL_pat<V2I1, v2i1>;
+defm: BoolL_pat<V4I1, v4i1>;
+defm: BoolL_pat<V8I1, v8i1>;
+
// Floating-point comparisons with checks for ordered/unordered status.
class T3<InstHexagon MI1, InstHexagon MI2, InstHexagon MI3>
@@ -941,10 +978,22 @@ def: Pat<(vselect (pnot V2I1:$Pu), V2I32:$Rs, V2I32:$Rt),
// From LegalizeDAG.cpp: (Pu ? Pv : Pw) <=> (Pu & Pv) | (!Pu & Pw).
-def: Pat<(select I1:$Pu, I1:$Pv, I1:$Pw),
- (C2_or (C2_and I1:$Pu, I1:$Pv),
- (C2_andn I1:$Pw, I1:$Pu))>;
-
+def: Pat<(select I1:$Pu, I1:$Ps, I1:$Pt),
+ (C4_or_andn (C2_and $Ps, $Pu), $Pt, $Pu)>;
+
+def: Pat<(vselect V2I1:$Pu, V2I1:$Ps, V2I1:$Pt),
+ (C4_or_andn (C2_and $Ps, $Pu), $Pt, $Pu)>;
+def: Pat<(vselect V4I1:$Pu, V4I1:$Ps, V4I1:$Pt),
+ (C4_or_andn (C2_and $Ps, $Pu), $Pt, $Pu)>;
+def: Pat<(vselect V8I1:$Pu, V8I1:$Ps, V8I1:$Pt),
+ (C4_or_andn (C2_and $Ps, $Pu), $Pt, $Pu)>;
+
+def: Pat<(select I1:$Pu, V2I1:$Ps, V2I1:$Pt),
+ (C2_tfrrp (C2_mux $Pu, (C2_tfrpr $Ps), (C2_tfrpr $Pt)))>;
+def: Pat<(select I1:$Pu, V4I1:$Ps, V4I1:$Pt),
+ (C2_tfrrp (C2_mux $Pu, (C2_tfrpr $Ps), (C2_tfrpr $Pt)))>;
+def: Pat<(select I1:$Pu, V8I1:$Ps, V8I1:$Pt),
+ (C2_tfrrp (C2_mux $Pu, (C2_tfrpr $Ps), (C2_tfrpr $Pt)))>;
def IsPosHalf : PatLeaf<(i32 IntRegs:$a), [{
return isPositiveHalfWord(N);
@@ -1123,6 +1172,12 @@ def: Pat<(bswap I32:$Rs), (A2_swiz I32:$Rs)>;
def: Pat<(bswap I64:$Rss), (Combinew (A2_swiz (LoReg $Rss)),
(A2_swiz (HiReg $Rss)))>;
+def: Pat<(bswap V2I16:$Rs), (A2_combine_lh (A2_swiz $Rs), (A2_swiz $Rs))>;
+def: Pat<(bswap V2I32:$Rs), (Combinew (A2_swiz (HiReg $Rs)),
+ (A2_swiz (LoReg $Rs)))>;
+def: Pat<(bswap V4I16:$Rs), (A2_orp (S2_lsr_i_vh $Rs, 8),
+ (S2_asl_i_vh $Rs, 8))>;
+
def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt), (S4_lsli imm:$s6, I32:$Rt)>;
def: Pat<(shl I32:$Rs, (i32 16)), (A2_aslh I32:$Rs)>;
def: Pat<(sra I32:$Rs, (i32 16)), (A2_asrh I32:$Rs)>;
@@ -1854,6 +1909,20 @@ def: Pat<(i32 (ctpop I32:$Rs)), (S5_popcountp (A4_combineir 0, I32:$Rs))>;
def: Pat<(bitreverse I32:$Rs), (S2_brev I32:$Rs)>;
def: Pat<(bitreverse I64:$Rss), (S2_brevp I64:$Rss)>;
+def: Pat<(bitreverse V4I8:$Rs), (A2_swiz (S2_brev $Rs))>;
+def: Pat<(bitreverse V8I8:$Rs), (Combinew (A2_swiz (LoReg (S2_brevp $Rs))),
+ (A2_swiz (HiReg (S2_brevp $Rs))))>;
+def: Pat<(bitreverse V2I16:$Rs), (A2_combine_lh (S2_brev $Rs),
+ (S2_brev $Rs))>;
+def: Pat<(bitreverse V4I16:$Rs),
+ (Combinew (A2_combine_lh (LoReg (S2_brevp $Rs)),
+ (LoReg (S2_brevp $Rs))),
+ (A2_combine_lh (HiReg (S2_brevp $Rs)),
+ (HiReg (S2_brevp $Rs))))>;
+def: Pat<(bitreverse V2I32:$Rs),
+ (Combinew (i32 (LoReg (S2_brevp $Rs))),
+ (i32 (HiReg (S2_brevp $Rs))))>;
+
let AddedComplexity = 20 in { // Complexity greater than and/or/xor
def: Pat<(and I32:$Rs, IsNPow2_32:$V),
(S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>;
@@ -2735,7 +2804,7 @@ let AddedComplexity = 90 in {
def: Storexr_shl_pat<store, F64, S4_storerd_rr>;
def: Pat<(store I1:$Pu, (add (shl I32:$Rs, u2_0ImmPred:$u2), I32:$Rt)),
- (S4_storerb_ur IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>;
+ (S4_storerb_rr IntRegs:$Rt, IntRegs:$Rs, imm:$u2, (I1toI32 I1:$Pu))>;
}
class SS_<PatFrag F> : SmallStackStore<F>;
@@ -3243,11 +3312,11 @@ def: Pat<(callv3nr I32:$dst), (PS_callr_nr I32:$dst)>;
def: Pat<(callv3nr tglobaladdr:$dst), (PS_call_nr tglobaladdr:$dst)>;
def: Pat<(callv3nr texternalsym:$dst), (PS_call_nr texternalsym:$dst)>;
-def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+def retglue : SDNode<"HexagonISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
-def: Pat<(retflag), (PS_jmpret (i32 R31))>;
+def: Pat<(retglue), (PS_jmpret (i32 R31))>;
def: Pat<(eh_return), (EH_RETURN_JMPR (i32 R31))>;
diff --git a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
index 99aaf1c1b592..7eccbd2cb023 100644
--- a/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -47,9 +47,11 @@ namespace llvm {
static unsigned RDFCount = 0;
-static cl::opt<unsigned> RDFLimit("rdf-limit",
- cl::init(std::numeric_limits<unsigned>::max()));
-static cl::opt<bool> RDFDump("rdf-dump", cl::init(false));
+static cl::opt<unsigned>
+ RDFLimit("hexagon-rdf-limit",
+ cl::init(std::numeric_limits<unsigned>::max()));
+static cl::opt<bool> RDFDump("hexagon-rdf-dump", cl::Hidden);
+static cl::opt<bool> RDFTrackReserved("hexagon-rdf-track-reserved", cl::Hidden);
namespace {
@@ -303,7 +305,11 @@ bool HexagonRDFOpt::runOnMachineFunction(MachineFunction &MF) {
// Dead phi nodes are necessary for copy propagation: we can add a use
// of a register in a block where it would need a phi node, but which
// was dead (and removed) during the graph build time.
- G.build(BuildOptions::KeepDeadPhis);
+ DataFlowGraph::Config Cfg;
+ Cfg.Options = RDFTrackReserved
+ ? BuildOptions::KeepDeadPhis
+ : BuildOptions::KeepDeadPhis | BuildOptions::OmitReserved;
+ G.build(Cfg);
if (RDFDump)
dbgs() << "Starting copy propagation on: " << MF.getName() << '\n'
@@ -320,8 +326,10 @@ bool HexagonRDFOpt::runOnMachineFunction(MachineFunction &MF) {
Changed |= DCE.run();
if (Changed) {
- if (RDFDump)
- dbgs() << "Starting liveness recomputation on: " << MF.getName() << '\n';
+ if (RDFDump) {
+ dbgs() << "Starting liveness recomputation on: " << MF.getName() << '\n'
+ << PrintNode<FuncNode*>(G.getFunc(), G) << '\n';
+ }
Liveness LV(*MRI, G);
LV.trace(RDFDump);
LV.computeLiveIns();
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 8a84f55e33cd..a3f31df368c5 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -452,8 +452,3 @@ HexagonRegisterInfo::getPointerRegClass(const MachineFunction &MF,
unsigned Kind) const {
return &Hexagon::IntRegsRegClass;
}
-
-Register HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const {
- return Hexagon::R6;
-}
-
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
index 4766fb5a8497..72153980236e 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -71,8 +71,6 @@ public:
const MCPhysReg *getCallerSavedRegs(const MachineFunction *MF,
const TargetRegisterClass *RC) const;
- Register getFirstCallerSavedNonParamReg() const;
-
const TargetRegisterClass *
getPointerRegClass(const MachineFunction &MF,
unsigned Kind = 0) const override;
diff --git a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index cb5b6c6e50f5..3a77fcd04e35 100644
--- a/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -117,11 +117,11 @@ let Namespace = "Hexagon" in {
def isub_lo : SubRegIndex<32>;
def isub_hi : SubRegIndex<32, 32>;
- def vsub_lo : SubRegIndex<512>;
- def vsub_hi : SubRegIndex<512, 512>;
- def vsub_fake: SubRegIndex<512>;
- def wsub_lo : SubRegIndex<1024>;
- def wsub_hi : SubRegIndex<1024, 1024>;
+ def vsub_lo : SubRegIndex<-1, -1>;
+ def vsub_hi : SubRegIndex<-1, -1>;
+ def vsub_fake: SubRegIndex<-1, -1>;
+ def wsub_lo : SubRegIndex<-1, -1>;
+ def wsub_hi : SubRegIndex<-1, -1>;
def subreg_overflow : SubRegIndex<1, 0>;
// Integer registers.
diff --git a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
index 2e1a8c39887e..58d488064212 100644
--- a/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -253,8 +253,7 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
MachineInstr *UseI = Op.getParent();
if (isFixedInstr(UseI))
continue;
- for (unsigned i = 0, n = UseI->getNumOperands(); i < n; ++i) {
- MachineOperand &MO = UseI->getOperand(i);
+ for (MachineOperand &MO : UseI->operands()) {
// Skip non-registers or registers with subregisters.
if (&MO == &Op || !MO.isReg() || MO.getSubReg())
continue;
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index b52e1c9c7fdc..9654c9be303f 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -23,7 +23,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/VLIWMachineScheduler.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
index 1818697476dd..208b47d765c7 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h
@@ -36,8 +36,6 @@ public:
~HexagonTargetMachine() override;
const HexagonSubtarget *getSubtargetImpl(const Function &F) const override;
- static unsigned getModuleMatchQuality(const Module &M);
-
void registerPassBuilderCallbacks(PassBuilder &PB) override;
TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 979a436756b8..cf4b66f8bf86 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -110,7 +110,7 @@ unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
return 32;
}
-unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned HexagonTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
return useHVX() ? 2 : 1;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 3d1e51ad3d73..ec0fd454c808 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -82,7 +82,7 @@ public:
/// @{
unsigned getNumberOfRegisters(bool vector) const;
- unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getMaxInterleaveFactor(ElementCount VF);
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
unsigned getMinVectorRegisterBitWidth() const;
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
@@ -92,9 +92,7 @@ public:
return true;
}
bool supportsEfficientVectorElementLoadStore() { return false; }
- bool hasBranchDivergence() {
- return false;
- }
+ bool hasBranchDivergence(const Function *F = nullptr) { return false; }
bool enableAggressiveInterleaving(bool LoopHasReductions) {
return false;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index df4b14b70f25..e38c8bacaf2b 100644
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -1318,7 +1318,7 @@ bool HexagonPacketizerList::hasDualStoreDependence(const MachineInstr &I,
return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J));
}
-// SUI is the current instruction that is out side of the current packet.
+// SUI is the current instruction that is outside of the current packet.
// SUJ is the current instruction inside the current packet against which that
// SUI will be packetized.
bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index 3ff6e9b46939..b2a55219df06 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -9,6 +9,7 @@
// that assist in vector-based optimizations.
//
// AlignVectors: replace unaligned vector loads and stores with aligned ones.
+// HvxIdioms: recognize various opportunities to generate HVX intrinsic code.
//===----------------------------------------------------------------------===//
#include "llvm/ADT/APInt.h"
@@ -20,6 +21,7 @@
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -34,6 +36,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
@@ -56,13 +59,23 @@
using namespace llvm;
namespace {
+cl::opt<bool> DumpModule("hvc-dump-module", cl::Hidden);
+cl::opt<bool> VAEnabled("hvc-va", cl::Hidden, cl::init(true)); // Align
+cl::opt<bool> VIEnabled("hvc-vi", cl::Hidden, cl::init(true)); // Idioms
+cl::opt<bool> VADoFullStores("hvc-va-full-stores", cl::Hidden);
+
+cl::opt<unsigned> VAGroupCountLimit("hvc-va-group-count-limit", cl::Hidden,
+ cl::init(~0));
+cl::opt<unsigned> VAGroupSizeLimit("hvc-va-group-size-limit", cl::Hidden,
+ cl::init(~0));
+
class HexagonVectorCombine {
public:
HexagonVectorCombine(Function &F_, AliasAnalysis &AA_, AssumptionCache &AC_,
- DominatorTree &DT_, TargetLibraryInfo &TLI_,
- const TargetMachine &TM_)
+ DominatorTree &DT_, ScalarEvolution &SE_,
+ TargetLibraryInfo &TLI_, const TargetMachine &TM_)
: F(F_), DL(F.getParent()->getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
- TLI(TLI_),
+ SE(SE_), TLI(TLI_),
HST(static_cast<const HexagonSubtarget &>(*TM_.getSubtargetImpl(F))) {}
bool run();
@@ -79,10 +92,14 @@ public:
ConstantInt *getConstInt(int Val, unsigned Width = 32) const;
// Get the integer value of V, if it exists.
std::optional<APInt> getIntValue(const Value *Val) const;
- // Is V a constant 0, or a vector of 0s?
+ // Is Val a constant 0, or a vector of 0s?
bool isZero(const Value *Val) const;
- // Is V an undef value?
+ // Is Val an undef value?
bool isUndef(const Value *Val) const;
+ // Is Val a scalar (i1 true) or a vector of (i1 true)?
+ bool isTrue(const Value *Val) const;
+ // Is Val a scalar (i1 false) or a vector of (i1 false)?
+ bool isFalse(const Value *Val) const;
// Get HVX vector type with the given element type.
VectorType *getHvxTy(Type *ElemTy, bool Pair = false) const;
@@ -125,7 +142,8 @@ public:
Value *createHvxIntrinsic(IRBuilderBase &Builder, Intrinsic::ID IntID,
Type *RetTy, ArrayRef<Value *> Args,
- ArrayRef<Type *> ArgTys = std::nullopt) const;
+ ArrayRef<Type *> ArgTys = std::nullopt,
+ ArrayRef<Value *> MDSources = std::nullopt) const;
SmallVector<Value *> splitVectorElements(IRBuilderBase &Builder, Value *Vec,
unsigned ToWidth) const;
Value *joinVectorElements(IRBuilderBase &Builder, ArrayRef<Value *> Values,
@@ -138,6 +156,8 @@ public:
KnownBits getKnownBits(const Value *V,
const Instruction *CtxI = nullptr) const;
+ bool isSafeToClone(const Instruction &In) const;
+
template <typename T = std::vector<Instruction *>>
bool isSafeToMoveBeforeInBB(const Instruction &In,
BasicBlock::const_iterator To,
@@ -151,6 +171,7 @@ public:
AliasAnalysis &AA;
AssumptionCache &AC;
DominatorTree &DT;
+ ScalarEvolution &SE;
TargetLibraryInfo &TLI;
const HexagonSubtarget &HST;
@@ -160,6 +181,20 @@ private:
};
class AlignVectors {
+ // This code tries to replace unaligned vector loads/stores with aligned
+ // ones.
+ // Consider unaligned load:
+ // %v = original_load %some_addr, align <bad>
+ // %user = %v
+ // It will generate
+ // = load ..., align <good>
+ // = load ..., align <good>
+ // = valign
+ // etc.
+ // %synthesize = combine/shuffle the loaded data so that it looks
+ // exactly like what "original_load" has loaded.
+ // %user = %synthesize
+ // Similarly for stores.
public:
AlignVectors(const HexagonVectorCombine &HVC_) : HVC(HVC_) {}
@@ -167,12 +202,7 @@ public:
private:
using InstList = std::vector<Instruction *>;
-
- struct Segment {
- void *Data;
- int Start;
- int Size;
- };
+ using InstMap = DenseMap<Instruction *, Instruction *>;
struct AddrInfo {
AddrInfo(const AddrInfo &) = default;
@@ -202,16 +232,33 @@ private:
struct MoveGroup {
MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)
- : Base(B), Main{AI.Inst}, IsHvx(Hvx), IsLoad(Load) {}
+ : Base(B), Main{AI.Inst}, Clones{}, IsHvx(Hvx), IsLoad(Load) {}
+ MoveGroup() = default;
Instruction *Base; // Base instruction of the parent address group.
InstList Main; // Main group of instructions.
InstList Deps; // List of dependencies.
+ InstMap Clones; // Map from original Deps to cloned ones.
bool IsHvx; // Is this group of HVX instructions?
bool IsLoad; // Is this a load group?
};
using MoveList = std::vector<MoveGroup>;
struct ByteSpan {
+ // A representation of "interesting" bytes within a given span of memory.
+ // These bytes are those that are loaded or stored, and they don't have
+ // to cover the entire span of memory.
+ //
+ // The representation works by picking a contiguous sequence of bytes
+ // from somewhere within a llvm::Value, and placing it at a given offset
+ // within the span.
+ //
+ // The sequence of bytes from llvm:Value is represented by Segment.
+ // Block is Segment, plus where it goes in the span.
+ //
+ // An important feature of ByteSpan is being able to make a "section",
+ // i.e. creating another ByteSpan corresponding to a range of offsets
+ // relative to the source span.
+
struct Segment {
// Segment of a Value: 'Len' bytes starting at byte 'Begin'.
Segment(Value *Val, int Begin, int Len)
@@ -230,7 +277,7 @@ private:
Block(const Block &Blk) = default;
Block &operator=(const Block &Blk) = default;
Segment Seg; // Value segment.
- int Pos; // Position (offset) of the segment in the Block.
+ int Pos; // Position (offset) of the block in the span.
};
int extent() const;
@@ -240,6 +287,7 @@ private:
int size() const { return Blocks.size(); }
Block &operator[](int i) { return Blocks[i]; }
+ const Block &operator[](int i) const { return Blocks[i]; }
std::vector<Block> Blocks;
@@ -252,7 +300,6 @@ private:
};
Align getAlignFromValue(const Value *V) const;
- std::optional<MemoryLocation> getLocation(const Instruction &In) const;
std::optional<AddrInfo> getAddrInfo(Instruction &In) const;
bool isHvx(const AddrInfo &AI) const;
// This function is only used for assertions at the moment.
@@ -263,25 +310,51 @@ private:
Value *getPassThrough(Value *Val) const;
Value *createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr, Type *ValTy,
- int Adjust) const;
+ int Adjust,
+ const InstMap &CloneMap = InstMap()) const;
Value *createAlignedPointer(IRBuilderBase &Builder, Value *Ptr, Type *ValTy,
- int Alignment) const;
- Value *createAlignedLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
- int Alignment, Value *Mask, Value *PassThru) const;
- Value *createAlignedStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
- int Alignment, Value *Mask) const;
+ int Alignment,
+ const InstMap &CloneMap = InstMap()) const;
+
+ Value *createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
+ Value *Predicate, int Alignment, Value *Mask,
+ Value *PassThru,
+ ArrayRef<Value *> MDSources = std::nullopt) const;
+ Value *createSimpleLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
+ int Alignment,
+ ArrayRef<Value *> MDSources = std::nullopt) const;
+
+ Value *createStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
+ Value *Predicate, int Alignment, Value *Mask,
+ ArrayRef<Value *> MDSources = std ::nullopt) const;
+ Value *createSimpleStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
+ int Alignment,
+ ArrayRef<Value *> MDSources = std ::nullopt) const;
+
+ Value *createPredicatedLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
+ Value *Predicate, int Alignment,
+ ArrayRef<Value *> MDSources = std::nullopt) const;
+ Value *
+ createPredicatedStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
+ Value *Predicate, int Alignment,
+ ArrayRef<Value *> MDSources = std::nullopt) const;
DepList getUpwardDeps(Instruction *In, Instruction *Base) const;
bool createAddressGroups();
MoveList createLoadGroups(const AddrList &Group) const;
MoveList createStoreGroups(const AddrList &Group) const;
- bool move(const MoveGroup &Move) const;
+ bool moveTogether(MoveGroup &Move) const;
+ template <typename T> InstMap cloneBefore(Instruction *To, T &&Insts) const;
+
void realignLoadGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,
int ScLen, Value *AlignVal, Value *AlignAddr) const;
void realignStoreGroup(IRBuilderBase &Builder, const ByteSpan &VSpan,
int ScLen, Value *AlignVal, Value *AlignAddr) const;
bool realignGroup(const MoveGroup &Move) const;
+ Value *makeTestIfUnaligned(IRBuilderBase &Builder, Value *AlignVal,
+ int Alignment) const;
+
friend raw_ostream &operator<<(raw_ostream &OS, const AddrInfo &AI);
friend raw_ostream &operator<<(raw_ostream &OS, const MoveGroup &MG);
friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan::Block &B);
@@ -304,20 +377,34 @@ raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
LLVM_ATTRIBUTE_UNUSED
raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
+ OS << "IsLoad:" << (MG.IsLoad ? "yes" : "no");
+ OS << ", IsHvx:" << (MG.IsHvx ? "yes" : "no") << '\n';
OS << "Main\n";
for (Instruction *I : MG.Main)
OS << " " << *I << '\n';
OS << "Deps\n";
for (Instruction *I : MG.Deps)
OS << " " << *I << '\n';
+ OS << "Clones\n";
+ for (auto [K, V] : MG.Clones) {
+ OS << " ";
+ K->printAsOperand(OS, false);
+ OS << "\t-> " << *V << '\n';
+ }
return OS;
}
LLVM_ATTRIBUTE_UNUSED
raw_ostream &operator<<(raw_ostream &OS,
const AlignVectors::ByteSpan::Block &B) {
- OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "
- << *B.Seg.Val;
+ OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] ";
+ if (B.Seg.Val == reinterpret_cast<const Value *>(&B)) {
+ OS << "(self:" << B.Seg.Val << ')';
+ } else if (B.Seg.Val != nullptr) {
+ OS << *B.Seg.Val;
+ } else {
+ OS << "(null)";
+ }
return OS;
}
@@ -456,6 +543,36 @@ template <typename Pred, typename T> void erase_if(T &&container, Pred p) {
// --- Begin AlignVectors
+// For brevity, only consider loads. We identify a group of loads where we
+// know the relative differences between their addresses, so we know how they
+// are laid out in memory (relative to one another). These loads can overlap,
+// can be shorter or longer than the desired vector length.
+// Ultimately we want to generate a sequence of aligned loads that will load
+// every byte that the original loads loaded, and have the program use these
+// loaded values instead of the original loads.
+// We consider the contiguous memory area spanned by all these loads.
+//
+// Let's say that a single aligned vector load can load 16 bytes at a time.
+// If the program wanted to use a byte at offset 13 from the beginning of the
+// original span, it will be a byte at offset 13+x in the aligned data for
+// some x>=0. This may happen to be in the first aligned load, or in the load
+// following it. Since we generally don't know what the that alignment value
+// is at compile time, we proactively do valigns on the aligned loads, so that
+// byte that was at offset 13 is still at offset 13 after the valigns.
+//
+// This will be the starting point for making the rest of the program use the
+// data loaded by the new loads.
+// For each original load, and its users:
+// %v = load ...
+// ... = %v
+// ... = %v
+// we create
+// %new_v = extract/combine/shuffle data from loaded/valigned vectors so
+// it contains the same value as %v did before
+// then replace all users of %v with %new_v.
+// ... = %new_v
+// ... = %new_v
+
auto AlignVectors::ByteSpan::extent() const -> int {
if (size() == 0)
return 0;
@@ -564,56 +681,165 @@ auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
}
auto AlignVectors::createAdjustedPointer(IRBuilderBase &Builder, Value *Ptr,
- Type *ValTy, int Adjust) const
+ Type *ValTy, int Adjust,
+ const InstMap &CloneMap) const
-> Value * {
- // The adjustment is in bytes, but if it's a multiple of the type size,
- // we don't need to do pointer casts.
- auto *PtrTy = cast<PointerType>(Ptr->getType());
- if (!PtrTy->isOpaque()) {
- Type *ElemTy = PtrTy->getNonOpaquePointerElementType();
- int ElemSize = HVC.getSizeOf(ElemTy, HVC.Alloc);
- if (Adjust % ElemSize == 0 && Adjust != 0) {
- Value *Tmp0 =
- Builder.CreateGEP(ElemTy, Ptr, HVC.getConstInt(Adjust / ElemSize));
- return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo());
- }
- }
-
- PointerType *CharPtrTy = Type::getInt8PtrTy(HVC.F.getContext());
- Value *Tmp0 = Builder.CreatePointerCast(Ptr, CharPtrTy);
- Value *Tmp1 = Builder.CreateGEP(Type::getInt8Ty(HVC.F.getContext()), Tmp0,
- HVC.getConstInt(Adjust));
- return Builder.CreatePointerCast(Tmp1, ValTy->getPointerTo());
+ if (auto *I = dyn_cast<Instruction>(Ptr))
+ if (Instruction *New = CloneMap.lookup(I))
+ Ptr = New;
+ return Builder.CreateGEP(Type::getInt8Ty(HVC.F.getContext()), Ptr,
+ HVC.getConstInt(Adjust), "gep");
}
auto AlignVectors::createAlignedPointer(IRBuilderBase &Builder, Value *Ptr,
- Type *ValTy, int Alignment) const
+ Type *ValTy, int Alignment,
+ const InstMap &CloneMap) const
-> Value * {
- Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy());
+ auto remap = [&](Value *V) -> Value * {
+ if (auto *I = dyn_cast<Instruction>(V)) {
+ for (auto [Old, New] : CloneMap)
+ I->replaceUsesOfWith(Old, New);
+ return I;
+ }
+ return V;
+ };
+ Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy(), "pti");
Value *Mask = HVC.getConstInt(-Alignment);
- Value *And = Builder.CreateAnd(AsInt, Mask);
- return Builder.CreateIntToPtr(And, ValTy->getPointerTo());
-}
-
-auto AlignVectors::createAlignedLoad(IRBuilderBase &Builder, Type *ValTy,
- Value *Ptr, int Alignment, Value *Mask,
- Value *PassThru) const -> Value * {
+ Value *And = Builder.CreateAnd(remap(AsInt), Mask, "and");
+ return Builder.CreateIntToPtr(And, ValTy->getPointerTo(), "itp");
+}
+
+auto AlignVectors::createLoad(IRBuilderBase &Builder, Type *ValTy, Value *Ptr,
+ Value *Predicate, int Alignment, Value *Mask,
+ Value *PassThru,
+ ArrayRef<Value *> MDSources) const -> Value * {
+ bool HvxHasPredLoad = HVC.HST.useHVXV62Ops();
+ // Predicate is nullptr if not creating predicated load
+ if (Predicate) {
+ assert(!Predicate->getType()->isVectorTy() &&
+ "Expectning scalar predicate");
+ if (HVC.isFalse(Predicate))
+ return UndefValue::get(ValTy);
+ if (!HVC.isTrue(Predicate) && HvxHasPredLoad) {
+ Value *Load = createPredicatedLoad(Builder, ValTy, Ptr, Predicate,
+ Alignment, MDSources);
+ return Builder.CreateSelect(Mask, Load, PassThru);
+ }
+ // Predicate == true here.
+ }
assert(!HVC.isUndef(Mask)); // Should this be allowed?
if (HVC.isZero(Mask))
return PassThru;
- if (Mask == ConstantInt::getTrue(Mask->getType()))
- return Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment));
- return Builder.CreateMaskedLoad(ValTy, Ptr, Align(Alignment), Mask, PassThru);
+ if (HVC.isTrue(Mask))
+ return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
+
+ Instruction *Load = Builder.CreateMaskedLoad(ValTy, Ptr, Align(Alignment),
+ Mask, PassThru, "mld");
+ propagateMetadata(Load, MDSources);
+ return Load;
+}
+
+auto AlignVectors::createSimpleLoad(IRBuilderBase &Builder, Type *ValTy,
+ Value *Ptr, int Alignment,
+ ArrayRef<Value *> MDSources) const
+ -> Value * {
+ Instruction *Load =
+ Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment), "ald");
+ propagateMetadata(Load, MDSources);
+ return Load;
}
-auto AlignVectors::createAlignedStore(IRBuilderBase &Builder, Value *Val,
- Value *Ptr, int Alignment,
- Value *Mask) const -> Value * {
+auto AlignVectors::createPredicatedLoad(IRBuilderBase &Builder, Type *ValTy,
+ Value *Ptr, Value *Predicate,
+ int Alignment,
+ ArrayRef<Value *> MDSources) const
+ -> Value * {
+ assert(HVC.HST.isTypeForHVX(ValTy) &&
+ "Predicates 'scalar' vector loads not yet supported");
+ assert(Predicate);
+ assert(!Predicate->getType()->isVectorTy() && "Expectning scalar predicate");
+ assert(HVC.getSizeOf(ValTy, HVC.Alloc) % Alignment == 0);
+ if (HVC.isFalse(Predicate))
+ return UndefValue::get(ValTy);
+ if (HVC.isTrue(Predicate))
+ return createSimpleLoad(Builder, ValTy, Ptr, Alignment, MDSources);
+
+ auto V6_vL32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vL32b_pred_ai);
+ // FIXME: This may not put the offset from Ptr into the vmem offset.
+ return HVC.createHvxIntrinsic(Builder, V6_vL32b_pred_ai, ValTy,
+ {Predicate, Ptr, HVC.getConstInt(0)},
+ std::nullopt, MDSources);
+}
+
+auto AlignVectors::createStore(IRBuilderBase &Builder, Value *Val, Value *Ptr,
+ Value *Predicate, int Alignment, Value *Mask,
+ ArrayRef<Value *> MDSources) const -> Value * {
if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
return UndefValue::get(Val->getType());
- if (Mask == ConstantInt::getTrue(Mask->getType()))
- return Builder.CreateAlignedStore(Val, Ptr, Align(Alignment));
- return Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
+ assert(!Predicate || (!Predicate->getType()->isVectorTy() &&
+ "Expectning scalar predicate"));
+ if (Predicate) {
+ if (HVC.isFalse(Predicate))
+ return UndefValue::get(Val->getType());
+ if (HVC.isTrue(Predicate))
+ Predicate = nullptr;
+ }
+ // Here both Predicate and Mask are true or unknown.
+
+ if (HVC.isTrue(Mask)) {
+ if (Predicate) { // Predicate unknown
+ return createPredicatedStore(Builder, Val, Ptr, Predicate, Alignment,
+ MDSources);
+ }
+ // Predicate is true:
+ return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
+ }
+
+ // Mask is unknown
+ if (!Predicate) {
+ Instruction *Store =
+ Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
+ propagateMetadata(Store, MDSources);
+ return Store;
+ }
+
+ // Both Predicate and Mask are unknown.
+ // Emulate masked store with predicated-load + mux + predicated-store.
+ Value *PredLoad = createPredicatedLoad(Builder, Val->getType(), Ptr,
+ Predicate, Alignment, MDSources);
+ Value *Mux = Builder.CreateSelect(Mask, Val, PredLoad);
+ return createPredicatedStore(Builder, Mux, Ptr, Predicate, Alignment,
+ MDSources);
+}
+
+auto AlignVectors::createSimpleStore(IRBuilderBase &Builder, Value *Val,
+ Value *Ptr, int Alignment,
+ ArrayRef<Value *> MDSources) const
+ -> Value * {
+ Instruction *Store = Builder.CreateAlignedStore(Val, Ptr, Align(Alignment));
+ propagateMetadata(Store, MDSources);
+ return Store;
+}
+
+auto AlignVectors::createPredicatedStore(IRBuilderBase &Builder, Value *Val,
+ Value *Ptr, Value *Predicate,
+ int Alignment,
+ ArrayRef<Value *> MDSources) const
+ -> Value * {
+ assert(HVC.HST.isTypeForHVX(Val->getType()) &&
+ "Predicates 'scalar' vector stores not yet supported");
+ assert(Predicate);
+ if (HVC.isFalse(Predicate))
+ return UndefValue::get(Val->getType());
+ if (HVC.isTrue(Predicate))
+ return createSimpleStore(Builder, Val, Ptr, Alignment, MDSources);
+
+ assert(HVC.getSizeOf(Val, HVC.Alloc) % Alignment == 0);
+ auto V6_vS32b_pred_ai = HVC.HST.getIntrinsicId(Hexagon::V6_vS32b_pred_ai);
+ // FIXME: This may not put the offset from Ptr into the vmem offset.
+ return HVC.createHvxIntrinsic(Builder, V6_vS32b_pred_ai, nullptr,
+ {Predicate, Ptr, HVC.getConstInt(0), Val},
+ std::nullopt, MDSources);
}
auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *Base) const
@@ -628,7 +854,8 @@ auto AlignVectors::getUpwardDeps(Instruction *In, Instruction *Base) const
while (!WorkQ.empty()) {
Instruction *D = WorkQ.front();
WorkQ.pop_front();
- Deps.insert(D);
+ if (D != In)
+ Deps.insert(D);
for (Value *Op : D->operands()) {
if (auto *I = dyn_cast<Instruction>(Op)) {
if (I->getParent() == Parent && Base->comesBefore(I))
@@ -697,9 +924,14 @@ auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
// Form load groups.
// To avoid complications with moving code across basic blocks, only form
// groups that are contained within a single basic block.
+ unsigned SizeLimit = VAGroupSizeLimit;
+ if (SizeLimit == 0)
+ return {};
auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
assert(!Move.Main.empty() && "Move group should have non-empty Main");
+ if (Move.Main.size() >= SizeLimit)
+ return false;
// Don't mix HVX and non-HVX instructions.
if (Move.IsHvx != isHvx(Info))
return false;
@@ -707,20 +939,18 @@ auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
Instruction *Base = Move.Main.front();
if (Base->getParent() != Info.Inst->getParent())
return false;
-
- auto isSafeToMoveToBase = [&](const Instruction *I) {
- return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator());
+ // Check if it's safe to move the load.
+ if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator()))
+ return false;
+ // And if it's safe to clone the dependencies.
+ auto isSafeToCopyAtBase = [&](const Instruction *I) {
+ return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator()) &&
+ HVC.isSafeToClone(*I);
};
DepList Deps = getUpwardDeps(Info.Inst, Base);
- if (!llvm::all_of(Deps, isSafeToMoveToBase))
+ if (!llvm::all_of(Deps, isSafeToCopyAtBase))
return false;
- // The dependencies will be moved together with the load, so make sure
- // that none of them could be moved independently in another group.
- Deps.erase(Info.Inst);
- auto inAddrMap = [&](Instruction *I) { return AddrGroups.count(I) > 0; };
- if (llvm::any_of(Deps, inAddrMap))
- return false;
Move.Main.push_back(Info.Inst);
llvm::append_range(Move.Deps, Deps);
return true;
@@ -737,6 +967,11 @@ auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
// Erase singleton groups.
erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
+
+ // Erase HVX groups on targets < HvxV62 (due to lack of predicated loads).
+ if (!HVC.HST.useHVXV62Ops())
+ erase_if(LoadGroups, [](const MoveGroup &G) { return G.IsHvx; });
+
return LoadGroups;
}
@@ -744,9 +979,14 @@ auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
// Form store groups.
// To avoid complications with moving code across basic blocks, only form
// groups that are contained within a single basic block.
+ unsigned SizeLimit = VAGroupSizeLimit;
+ if (SizeLimit == 0)
+ return {};
auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
assert(!Move.Main.empty() && "Move group should have non-empty Main");
+ if (Move.Main.size() >= SizeLimit)
+ return false;
// For stores with return values we'd have to collect downward depenencies.
// There are no such stores that we handle at the moment, so omit that.
assert(Info.Inst->getType()->isVoidTy() &&
@@ -778,24 +1018,51 @@ auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
// Erase singleton groups.
erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
+
+ // Erase HVX groups on targets < HvxV62 (due to lack of predicated loads).
+ if (!HVC.HST.useHVXV62Ops())
+ erase_if(StoreGroups, [](const MoveGroup &G) { return G.IsHvx; });
+
+ // Erase groups where every store is a full HVX vector. The reason is that
+ // aligning predicated stores generates complex code that may be less
+ // efficient than a sequence of unaligned vector stores.
+ if (!VADoFullStores) {
+ erase_if(StoreGroups, [this](const MoveGroup &G) {
+ return G.IsHvx && llvm::all_of(G.Main, [this](Instruction *S) {
+ auto MaybeInfo = this->getAddrInfo(*S);
+ assert(MaybeInfo.has_value());
+ return HVC.HST.isHVXVectorType(
+ EVT::getEVT(MaybeInfo->ValTy, false));
+ });
+ });
+ }
+
return StoreGroups;
}
-auto AlignVectors::move(const MoveGroup &Move) const -> bool {
+auto AlignVectors::moveTogether(MoveGroup &Move) const -> bool {
+ // Move all instructions to be adjacent.
assert(!Move.Main.empty() && "Move group should have non-empty Main");
Instruction *Where = Move.Main.front();
if (Move.IsLoad) {
- // Move all deps to before Where, keeping order.
- for (Instruction *D : Move.Deps)
- D->moveBefore(Where);
+ // Move all the loads (and dependencies) to where the first load is.
+ // Clone all deps to before Where, keeping order.
+ Move.Clones = cloneBefore(Where, Move.Deps);
// Move all main instructions to after Where, keeping order.
ArrayRef<Instruction *> Main(Move.Main);
- for (Instruction *M : Main.drop_front(1)) {
- M->moveAfter(Where);
+ for (Instruction *M : Main) {
+ if (M != Where)
+ M->moveAfter(Where);
+ for (auto [Old, New] : Move.Clones)
+ M->replaceUsesOfWith(Old, New);
Where = M;
}
+ // Replace Deps with the clones.
+ for (int i = 0, e = Move.Deps.size(); i != e; ++i)
+ Move.Deps[i] = Move.Clones[Move.Deps[i]];
} else {
+ // Move all the stores to where the last store is.
// NOTE: Deps are empty for "store" groups. If they need to be
// non-empty, decide on the order.
assert(Move.Deps.empty());
@@ -810,10 +1077,29 @@ auto AlignVectors::move(const MoveGroup &Move) const -> bool {
return Move.Main.size() + Move.Deps.size() > 1;
}
+template <typename T>
+auto AlignVectors::cloneBefore(Instruction *To, T &&Insts) const -> InstMap {
+ InstMap Map;
+
+ for (Instruction *I : Insts) {
+ assert(HVC.isSafeToClone(*I));
+ Instruction *C = I->clone();
+ C->setName(Twine("c.") + I->getName() + ".");
+ C->insertBefore(To);
+
+ for (auto [Old, New] : Map)
+ C->replaceUsesOfWith(Old, New);
+ Map.insert(std::make_pair(I, C));
+ }
+ return Map;
+}
+
auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
const ByteSpan &VSpan, int ScLen,
Value *AlignVal, Value *AlignAddr) const
-> void {
+ LLVM_DEBUG(dbgs() << __func__ << "\n");
+
Type *SecTy = HVC.getByteTy(ScLen);
int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
bool DoAlign = !HVC.isZero(AlignVal);
@@ -824,7 +1110,8 @@ auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
auto *Undef = UndefValue::get(SecTy);
- SmallVector<Instruction *> Loads(NumSectors + DoAlign, nullptr);
+ // Created load does not have to be "Instruction" (e.g. "undef").
+ SmallVector<Value *> Loads(NumSectors + DoAlign, nullptr);
// We could create all of the aligned loads, and generate the valigns
// at the location of the first load, but for large load groups, this
@@ -834,12 +1121,16 @@ auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
// In any case we need to have a mapping from the blocks of VSpan (the
// span covered by the pre-existing loads) to ASpan (the span covered
// by the aligned loads). There is a small problem, though: ASpan needs
- // to have pointers to the loads/valigns, but we don't know where to put
- // them yet. We can't use nullptr, because when we create sections of
- // ASpan (corresponding to blocks from VSpan), for each block in the
- // section we need to know which blocks of ASpan they are a part of.
- // To have 1-1 mapping between blocks of ASpan and the temporary value
- // pointers, use the addresses of the blocks themselves.
+ // to have pointers to the loads/valigns, but we don't have these loads
+ // because we don't know where to put them yet. We find out by creating
+ // a section of ASpan that corresponds to values (blocks) from VSpan,
+ // and checking where the new load should be placed. We need to attach
+ // this location information to each block in ASpan somehow, so we put
+ // distincts values for Seg.Val in each ASpan.Blocks[i], and use a map
+ // to store the location for each Seg.Val.
+ // The distinct values happen to be Blocks[i].Seg.Val = &Blocks[i],
+ // which helps with printing ByteSpans without crashing when printing
+ // Segments with these temporary identifiers in place of Val.
// Populate the blocks first, to avoid reallocations of the vector
// interfering with generating the placeholder addresses.
@@ -867,9 +1158,9 @@ auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
for (const Use &U : Uses) {
auto *I = dyn_cast<Instruction>(U.getUser());
assert(I != nullptr && "Load used in a non-instruction?");
- // Make sure we only consider at users in this block, but we need
+ // Make sure we only consider users in this block, but we need
// to remember if there were users outside the block too. This is
- // because if there are no users, aligned loads will not be created.
+ // because if no users are found, aligned loads will not be created.
if (I->getParent() == BaseBlock) {
if (!isa<PHINode>(I))
User = std::min(User, I, isEarlier);
@@ -888,53 +1179,73 @@ auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
}
}
+ LLVM_DEBUG({
+ dbgs() << "ASpan:\n" << ASpan << '\n';
+ dbgs() << "Earliest users of ASpan:\n";
+ for (auto &[Val, User] : EarliestUser) {
+ dbgs() << Val << "\n ->" << *User << '\n';
+ }
+ });
+
auto createLoad = [&](IRBuilderBase &Builder, const ByteSpan &VSpan,
- int Index) {
+ int Index, bool MakePred) {
Value *Ptr =
createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
- // FIXME: generate a predicated load?
- Value *Load = createAlignedLoad(Builder, SecTy, Ptr, ScLen, True, Undef);
+ Value *Predicate =
+ MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
+
// If vector shifting is potentially needed, accumulate metadata
// from source sections of twice the load width.
int Start = (Index - DoAlign) * ScLen;
int Width = (1 + DoAlign) * ScLen;
- propagateMetadata(cast<Instruction>(Load),
- VSpan.section(Start, Width).values());
- return cast<Instruction>(Load);
+ return this->createLoad(Builder, SecTy, Ptr, Predicate, ScLen, True, Undef,
+ VSpan.section(Start, Width).values());
};
auto moveBefore = [this](Instruction *In, Instruction *To) {
// Move In and its upward dependencies to before To.
assert(In->getParent() == To->getParent());
DepList Deps = getUpwardDeps(In, To);
+ In->moveBefore(To);
// DepList is sorted with respect to positions in the basic block.
- for (Instruction *I : Deps)
- I->moveBefore(To);
+ InstMap Map = cloneBefore(In, Deps);
+ for (auto [Old, New] : Map)
+ In->replaceUsesOfWith(Old, New);
};
// Generate necessary loads at appropriate locations.
+ LLVM_DEBUG(dbgs() << "Creating loads for ASpan sectors\n");
for (int Index = 0; Index != NumSectors + 1; ++Index) {
// In ASpan, each block will be either a single aligned load, or a
// valign of a pair of loads. In the latter case, an aligned load j
// will belong to the current valign, and the one in the previous
// block (for j > 0).
+ // Place the load at a location which will dominate the valign, assuming
+ // the valign will be placed right before the earliest user.
Instruction *PrevAt =
DoAlign && Index > 0 ? EarliestUser[&ASpan[Index - 1]] : nullptr;
Instruction *ThisAt =
Index < NumSectors ? EarliestUser[&ASpan[Index]] : nullptr;
if (auto *Where = std::min(PrevAt, ThisAt, isEarlier)) {
Builder.SetInsertPoint(Where);
- Loads[Index] = createLoad(Builder, VSpan, Index);
- // We know it's safe to put the load at BasePos, so if it's not safe
- // to move it from this location to BasePos, then the current location
- // is not valid.
+ Loads[Index] =
+ createLoad(Builder, VSpan, Index, DoAlign && Index == NumSectors);
+ // We know it's safe to put the load at BasePos, but we'd prefer to put
+ // it at "Where". To see if the load is safe to be placed at Where, put
+ // it there first and then check if it's safe to move it to BasePos.
+ // If not, then the load needs to be placed at BasePos.
// We can't do this check proactively because we need the load to exist
// in order to check legality.
- if (!HVC.isSafeToMoveBeforeInBB(*Loads[Index], BasePos))
- moveBefore(Loads[Index], &*BasePos);
+ if (auto *Load = dyn_cast<Instruction>(Loads[Index])) {
+ if (!HVC.isSafeToMoveBeforeInBB(*Load, BasePos))
+ moveBefore(Load, &*BasePos);
+ }
+ LLVM_DEBUG(dbgs() << "Loads[" << Index << "]:" << *Loads[Index] << '\n');
}
}
+
// Generate valigns if needed, and fill in proper values in ASpan
+ LLVM_DEBUG(dbgs() << "Creating values for ASpan sectors\n");
for (int Index = 0; Index != NumSectors; ++Index) {
ASpan[Index].Seg.Val = nullptr;
if (auto *Where = EarliestUser[&ASpan[Index]]) {
@@ -947,6 +1258,7 @@ auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
Val = HVC.vralignb(Builder, Val, NextLoad, AlignVal);
}
ASpan[Index].Seg.Val = Val;
+ LLVM_DEBUG(dbgs() << "ASpan[" << Index << "]:" << *Val << '\n');
}
}
@@ -955,15 +1267,27 @@ auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
Value *Accum = UndefValue::get(HVC.getByteTy(B.Seg.Size));
Builder.SetInsertPoint(cast<Instruction>(B.Seg.Val));
+ // We're generating a reduction, where each instruction depends on
+ // the previous one, so we need to order them according to the position
+ // of their inputs in the code.
+ std::vector<ByteSpan::Block *> ABlocks;
for (ByteSpan::Block &S : ASection) {
- if (S.Seg.Val == nullptr)
- continue;
+ if (S.Seg.Val != nullptr)
+ ABlocks.push_back(&S);
+ }
+ llvm::sort(ABlocks,
+ [&](const ByteSpan::Block *A, const ByteSpan::Block *B) {
+ return isEarlier(cast<Instruction>(A->Seg.Val),
+ cast<Instruction>(B->Seg.Val));
+ });
+ for (ByteSpan::Block *S : ABlocks) {
// The processing of the data loaded by the aligned loads
// needs to be inserted after the data is available.
- Instruction *SegI = cast<Instruction>(S.Seg.Val);
+ Instruction *SegI = cast<Instruction>(S->Seg.Val);
Builder.SetInsertPoint(&*std::next(SegI->getIterator()));
- Value *Pay = HVC.vbytes(Builder, getPayload(S.Seg.Val));
- Accum = HVC.insertb(Builder, Accum, Pay, S.Seg.Start, S.Seg.Size, S.Pos);
+ Value *Pay = HVC.vbytes(Builder, getPayload(S->Seg.Val));
+ Accum =
+ HVC.insertb(Builder, Accum, Pay, S->Seg.Start, S->Seg.Size, S->Pos);
}
// Instead of casting everything to bytes for the vselect, cast to the
// original value type. This will avoid complications with casting masks.
@@ -972,9 +1296,9 @@ auto AlignVectors::realignLoadGroup(IRBuilderBase &Builder,
// but if the mask is not exactly of HVX length, extra handling would be
// needed to make it work.
Type *ValTy = getPayload(B.Seg.Val)->getType();
- Value *Cast = Builder.CreateBitCast(Accum, ValTy);
+ Value *Cast = Builder.CreateBitCast(Accum, ValTy, "cst");
Value *Sel = Builder.CreateSelect(getMask(B.Seg.Val), Cast,
- getPassThrough(B.Seg.Val));
+ getPassThrough(B.Seg.Val), "sel");
B.Seg.Val->replaceAllUsesWith(Sel);
}
}
@@ -983,6 +1307,8 @@ auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
const ByteSpan &VSpan, int ScLen,
Value *AlignVal, Value *AlignAddr) const
-> void {
+ LLVM_DEBUG(dbgs() << __func__ << "\n");
+
Type *SecTy = HVC.getByteTy(ScLen);
int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
bool DoAlign = !HVC.isZero(AlignVal);
@@ -997,59 +1323,87 @@ auto AlignVectors::realignStoreGroup(IRBuilderBase &Builder,
if (Ty->isVectorTy())
return Val;
auto *VecTy = VectorType::get(Ty, 1, /*Scalable=*/false);
- return Builder.CreateBitCast(Val, VecTy);
+ return Builder.CreateBitCast(Val, VecTy, "cst");
};
// Create an extra "undef" sector at the beginning and at the end.
// They will be used as the left/right filler in the vlalign step.
- for (int i = (DoAlign ? -1 : 0); i != NumSectors + DoAlign; ++i) {
+ for (int Index = (DoAlign ? -1 : 0); Index != NumSectors + DoAlign; ++Index) {
// For stores, the size of each section is an aligned vector length.
// Adjust the store offsets relative to the section start offset.
- ByteSpan VSection = VSpan.section(i * ScLen, ScLen).shift(-i * ScLen);
- Value *AccumV = UndefValue::get(SecTy);
- Value *AccumM = HVC.getNullValue(SecTy);
+ ByteSpan VSection =
+ VSpan.section(Index * ScLen, ScLen).shift(-Index * ScLen);
+ Value *Undef = UndefValue::get(SecTy);
+ Value *Zero = HVC.getNullValue(SecTy);
+ Value *AccumV = Undef;
+ Value *AccumM = Zero;
for (ByteSpan::Block &S : VSection) {
Value *Pay = getPayload(S.Seg.Val);
Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
Pay->getType(), HVC.getByteTy());
- AccumM = HVC.insertb(Builder, AccumM, HVC.vbytes(Builder, Mask),
- S.Seg.Start, S.Seg.Size, S.Pos);
- AccumV = HVC.insertb(Builder, AccumV, HVC.vbytes(Builder, Pay),
- S.Seg.Start, S.Seg.Size, S.Pos);
+ Value *PartM = HVC.insertb(Builder, Zero, HVC.vbytes(Builder, Mask),
+ S.Seg.Start, S.Seg.Size, S.Pos);
+ AccumM = Builder.CreateOr(AccumM, PartM);
+
+ Value *PartV = HVC.insertb(Builder, Undef, HVC.vbytes(Builder, Pay),
+ S.Seg.Start, S.Seg.Size, S.Pos);
+
+ AccumV = Builder.CreateSelect(
+ Builder.CreateICmp(CmpInst::ICMP_NE, PartM, Zero), PartV, AccumV);
}
- ASpanV.Blocks.emplace_back(AccumV, ScLen, i * ScLen);
- ASpanM.Blocks.emplace_back(AccumM, ScLen, i * ScLen);
+ ASpanV.Blocks.emplace_back(AccumV, ScLen, Index * ScLen);
+ ASpanM.Blocks.emplace_back(AccumM, ScLen, Index * ScLen);
}
+ LLVM_DEBUG({
+ dbgs() << "ASpanV before vlalign:\n" << ASpanV << '\n';
+ dbgs() << "ASpanM before vlalign:\n" << ASpanM << '\n';
+ });
+
// vlalign
if (DoAlign) {
- for (int j = 1; j != NumSectors + 2; ++j) {
- Value *PrevV = ASpanV[j - 1].Seg.Val, *ThisV = ASpanV[j].Seg.Val;
- Value *PrevM = ASpanM[j - 1].Seg.Val, *ThisM = ASpanM[j].Seg.Val;
+ for (int Index = 1; Index != NumSectors + 2; ++Index) {
+ Value *PrevV = ASpanV[Index - 1].Seg.Val, *ThisV = ASpanV[Index].Seg.Val;
+ Value *PrevM = ASpanM[Index - 1].Seg.Val, *ThisM = ASpanM[Index].Seg.Val;
assert(isSectorTy(PrevV->getType()) && isSectorTy(PrevM->getType()));
- ASpanV[j - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
- ASpanM[j - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
+ ASpanV[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevV, ThisV, AlignVal);
+ ASpanM[Index - 1].Seg.Val = HVC.vlalignb(Builder, PrevM, ThisM, AlignVal);
}
}
- for (int i = 0; i != NumSectors + DoAlign; ++i) {
- Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
- Value *Val = ASpanV[i].Seg.Val;
- Value *Mask = ASpanM[i].Seg.Val; // bytes
- if (!HVC.isUndef(Val) && !HVC.isZero(Mask)) {
- Value *Store =
- createAlignedStore(Builder, Val, Ptr, ScLen, HVC.vlsb(Builder, Mask));
- // If vector shifting is potentially needed, accumulate metadata
- // from source sections of twice the store width.
- int Start = (i - DoAlign) * ScLen;
- int Width = (1 + DoAlign) * ScLen;
- propagateMetadata(cast<Instruction>(Store),
- VSpan.section(Start, Width).values());
- }
+ LLVM_DEBUG({
+ dbgs() << "ASpanV after vlalign:\n" << ASpanV << '\n';
+ dbgs() << "ASpanM after vlalign:\n" << ASpanM << '\n';
+ });
+
+ auto createStore = [&](IRBuilderBase &Builder, const ByteSpan &ASpanV,
+ const ByteSpan &ASpanM, int Index, bool MakePred) {
+ Value *Val = ASpanV[Index].Seg.Val;
+ Value *Mask = ASpanM[Index].Seg.Val; // bytes
+ if (HVC.isUndef(Val) || HVC.isZero(Mask))
+ return;
+ Value *Ptr =
+ createAdjustedPointer(Builder, AlignAddr, SecTy, Index * ScLen);
+ Value *Predicate =
+ MakePred ? makeTestIfUnaligned(Builder, AlignVal, ScLen) : nullptr;
+
+ // If vector shifting is potentially needed, accumulate metadata
+ // from source sections of twice the store width.
+ int Start = (Index - DoAlign) * ScLen;
+ int Width = (1 + DoAlign) * ScLen;
+ this->createStore(Builder, Val, Ptr, Predicate, ScLen,
+ HVC.vlsb(Builder, Mask),
+ VSpan.section(Start, Width).values());
+ };
+
+ for (int Index = 0; Index != NumSectors + DoAlign; ++Index) {
+ createStore(Builder, ASpanV, ASpanM, Index, DoAlign && Index == NumSectors);
}
}
auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
+ LLVM_DEBUG(dbgs() << "Realigning group:\n" << Move << '\n');
+
// TODO: Needs support for masked loads/stores of "scalar" vectors.
if (!Move.IsHvx)
return false;
@@ -1122,7 +1476,7 @@ auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
// of potential bitcasts to i8*.
int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());
AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
- WithMaxAlign.ValTy, Adjust);
+ WithMaxAlign.ValTy, Adjust, Move.Clones);
int Diff = Start - (OffAtMax + Adjust);
AlignVal = HVC.getConstInt(Diff);
assert(Diff >= 0);
@@ -1135,9 +1489,15 @@ auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
// the alignment amount.
// Do an explicit down-alignment of the address to avoid creating an
// aligned instruction with an address that is not really aligned.
- AlignAddr = createAlignedPointer(Builder, WithMinOffset.Addr,
- WithMinOffset.ValTy, MinNeeded.value());
- AlignVal = Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy());
+ AlignAddr =
+ createAlignedPointer(Builder, WithMinOffset.Addr, WithMinOffset.ValTy,
+ MinNeeded.value(), Move.Clones);
+ AlignVal =
+ Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy(), "pti");
+ if (auto *I = dyn_cast<Instruction>(AlignVal)) {
+ for (auto [Old, New] : Move.Clones)
+ I->replaceUsesOfWith(Old, New);
+ }
}
ByteSpan VSpan;
@@ -1154,6 +1514,13 @@ auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
+ LLVM_DEBUG({
+ dbgs() << "ScLen: " << ScLen << "\n";
+ dbgs() << "AlignVal:" << *AlignVal << "\n";
+ dbgs() << "AlignAddr:" << *AlignAddr << "\n";
+ dbgs() << "VSpan:\n" << VSpan << '\n';
+ });
+
if (Move.IsLoad)
realignLoadGroup(Builder, VSpan, ScLen, AlignVal, AlignAddr);
else
@@ -1165,6 +1532,15 @@ auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
return true;
}
+auto AlignVectors::makeTestIfUnaligned(IRBuilderBase &Builder, Value *AlignVal,
+ int Alignment) const -> Value * {
+ auto *AlignTy = AlignVal->getType();
+ Value *And = Builder.CreateAnd(
+ AlignVal, ConstantInt::get(AlignTy, Alignment - 1), "and");
+ Value *Zero = ConstantInt::get(AlignTy, 0);
+ return Builder.CreateICmpNE(And, Zero, "isz");
+}
+
auto AlignVectors::isSectorTy(Type *Ty) const -> bool {
if (!HVC.isByteVecTy(Ty))
return false;
@@ -1175,9 +1551,19 @@ auto AlignVectors::isSectorTy(Type *Ty) const -> bool {
}
auto AlignVectors::run() -> bool {
+ LLVM_DEBUG(dbgs() << "Running HVC::AlignVectors on " << HVC.F.getName()
+ << '\n');
if (!createAddressGroups())
return false;
+ LLVM_DEBUG({
+ dbgs() << "Address groups(" << AddrGroups.size() << "):\n";
+ for (auto &[In, AL] : AddrGroups) {
+ for (const AddrInfo &AI : AL)
+ dbgs() << "---\n" << AI << '\n';
+ }
+ });
+
bool Changed = false;
MoveList LoadGroups, StoreGroups;
@@ -1186,10 +1572,35 @@ auto AlignVectors::run() -> bool {
llvm::append_range(StoreGroups, createStoreGroups(G.second));
}
+ LLVM_DEBUG({
+ dbgs() << "\nLoad groups(" << LoadGroups.size() << "):\n";
+ for (const MoveGroup &G : LoadGroups)
+ dbgs() << G << "\n";
+ dbgs() << "Store groups(" << StoreGroups.size() << "):\n";
+ for (const MoveGroup &G : StoreGroups)
+ dbgs() << G << "\n";
+ });
+
+ // Cumulative limit on the number of groups.
+ unsigned CountLimit = VAGroupCountLimit;
+ if (CountLimit == 0)
+ return false;
+
+ if (LoadGroups.size() > CountLimit) {
+ LoadGroups.resize(CountLimit);
+ StoreGroups.clear();
+ } else {
+ unsigned StoreLimit = CountLimit - LoadGroups.size();
+ if (StoreGroups.size() > StoreLimit)
+ StoreGroups.resize(StoreLimit);
+ }
+
for (auto &M : LoadGroups)
- Changed |= move(M);
+ Changed |= moveTogether(M);
for (auto &M : StoreGroups)
- Changed |= move(M);
+ Changed |= moveTogether(M);
+
+ LLVM_DEBUG(dbgs() << "After moveTogether:\n" << HVC.F);
for (auto &M : LoadGroups)
Changed |= realignGroup(M);
@@ -1356,13 +1767,13 @@ auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const
auto *ResizeTy = VectorType::get(HVC.getIntTy(Width), VecTy);
if (Width < ElemWidth) {
- X = Builder.CreateTrunc(X, ResizeTy);
- Y = Builder.CreateTrunc(Y, ResizeTy);
+ X = Builder.CreateTrunc(X, ResizeTy, "trn");
+ Y = Builder.CreateTrunc(Y, ResizeTy, "trn");
} else if (Width > ElemWidth) {
- X = SignX == Signed ? Builder.CreateSExt(X, ResizeTy)
- : Builder.CreateZExt(X, ResizeTy);
- Y = SignY == Signed ? Builder.CreateSExt(Y, ResizeTy)
- : Builder.CreateZExt(Y, ResizeTy);
+ X = SignX == Signed ? Builder.CreateSExt(X, ResizeTy, "sxt")
+ : Builder.CreateZExt(X, ResizeTy, "zxt");
+ Y = SignY == Signed ? Builder.CreateSExt(Y, ResizeTy, "sxt")
+ : Builder.CreateZExt(Y, ResizeTy, "zxt");
};
assert(X->getType() == Y->getType() && X->getType() == ResizeTy);
@@ -1387,8 +1798,8 @@ auto HvxIdioms::processFxpMul(Instruction &In, const FxpOp &Op) const
Value *Cat = HVC.concat(Builder, Results);
Value *Ext = SignX == Signed || SignY == Signed
- ? Builder.CreateSExt(Cat, VecTy)
- : Builder.CreateZExt(Cat, VecTy);
+ ? Builder.CreateSExt(Cat, VecTy, "sxt")
+ : Builder.CreateZExt(Cat, VecTy, "zxt");
return Ext;
}
@@ -1434,14 +1845,14 @@ auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
Value *Prod32 = createMul16(Builder, Op.X, Op.Y);
if (Rounding) {
Value *RoundVal = HVC.getConstSplat(Prod32->getType(), 1 << *Op.RoundAt);
- Prod32 = Builder.CreateAdd(Prod32, RoundVal);
+ Prod32 = Builder.CreateAdd(Prod32, RoundVal, "add");
}
Value *ShiftAmt = HVC.getConstSplat(Prod32->getType(), Op.Frac);
Value *Shifted = Op.X.Sgn == Signed || Op.Y.Sgn == Signed
- ? Builder.CreateAShr(Prod32, ShiftAmt)
- : Builder.CreateLShr(Prod32, ShiftAmt);
- return Builder.CreateTrunc(Shifted, InpTy);
+ ? Builder.CreateAShr(Prod32, ShiftAmt, "asr")
+ : Builder.CreateLShr(Prod32, ShiftAmt, "lsr");
+ return Builder.CreateTrunc(Shifted, InpTy, "trn");
}
// Width >= 32
@@ -1475,10 +1886,11 @@ auto HvxIdioms::processFxpMulChopped(IRBuilderBase &Builder, Instruction &In,
if (Src + 1 < End) {
Value *Hi = WordP[Src + 1];
WordP[Dst] = Builder.CreateIntrinsic(HvxWordTy, Intrinsic::fshr,
- {Hi, Lo, ShiftAmt});
+ {Hi, Lo, ShiftAmt},
+ /*FMFSource*/ nullptr, "int");
} else {
// The shift of the most significant word.
- WordP[Dst] = Builder.CreateAShr(Lo, ShiftAmt);
+ WordP[Dst] = Builder.CreateAShr(Lo, ShiftAmt, "asr");
}
}
if (SkipWords != 0)
@@ -1540,8 +1952,8 @@ auto HvxIdioms::createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y,
}
Value *Ret = HVC.createHvxIntrinsic(Builder, AddCarry,
/*RetTy=*/nullptr, Args);
- Value *Result = Builder.CreateExtractValue(Ret, {0});
- Value *CarryOut = Builder.CreateExtractValue(Ret, {1});
+ Value *Result = Builder.CreateExtractValue(Ret, {0}, "ext");
+ Value *CarryOut = Builder.CreateExtractValue(Ret, {1}, "ext");
return {Result, CarryOut};
}
@@ -1560,13 +1972,13 @@ auto HvxIdioms::createAddCarry(IRBuilderBase &Builder, Value *X, Value *Y,
Value *ValueIn =
HVC.createHvxIntrinsic(Builder, V6_vandqrt, /*RetTy=*/nullptr,
{CarryIn, HVC.getConstInt(Mask)});
- Result1 = Builder.CreateAdd(X, ValueIn);
+ Result1 = Builder.CreateAdd(X, ValueIn, "add");
}
- Value *CarryOut1 = Builder.CreateCmp(CmpInst::ICMP_ULT, Result1, X);
- Value *Result2 = Builder.CreateAdd(Result1, Y);
- Value *CarryOut2 = Builder.CreateCmp(CmpInst::ICMP_ULT, Result2, Y);
- return {Result2, Builder.CreateOr(CarryOut1, CarryOut2)};
+ Value *CarryOut1 = Builder.CreateCmp(CmpInst::ICMP_ULT, Result1, X, "cmp");
+ Value *Result2 = Builder.CreateAdd(Result1, Y, "add");
+ Value *CarryOut2 = Builder.CreateCmp(CmpInst::ICMP_ULT, Result2, Y, "cmp");
+ return {Result2, Builder.CreateOr(CarryOut1, CarryOut2, "orb")};
}
auto HvxIdioms::createMul16(IRBuilderBase &Builder, SValue X, SValue Y) const
@@ -1603,15 +2015,16 @@ auto HvxIdioms::createMulH16(IRBuilderBase &Builder, SValue X, SValue Y) const
}
Type *HvxP16Ty = HVC.getHvxTy(HVC.getIntTy(16), /*Pair=*/true);
- Value *Pair16 = Builder.CreateBitCast(createMul16(Builder, X, Y), HvxP16Ty);
+ Value *Pair16 =
+ Builder.CreateBitCast(createMul16(Builder, X, Y), HvxP16Ty, "cst");
unsigned Len = HVC.length(HvxP16Ty) / 2;
SmallVector<int, 128> PickOdd(Len);
for (int i = 0; i != static_cast<int>(Len); ++i)
PickOdd[i] = 2 * i + 1;
- return Builder.CreateShuffleVector(HVC.sublo(Builder, Pair16),
- HVC.subhi(Builder, Pair16), PickOdd);
+ return Builder.CreateShuffleVector(
+ HVC.sublo(Builder, Pair16), HVC.subhi(Builder, Pair16), PickOdd, "shf");
}
auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const
@@ -1632,8 +2045,8 @@ auto HvxIdioms::createMul32(IRBuilderBase &Builder, SValue X, SValue Y) const
Value *Parts = HVC.createHvxIntrinsic(Builder, V6_vmpy_parts, nullptr,
{X.Val, Y.Val}, {HvxI32Ty});
- Value *Hi = Builder.CreateExtractValue(Parts, {0});
- Value *Lo = Builder.CreateExtractValue(Parts, {1});
+ Value *Hi = Builder.CreateExtractValue(Parts, {0}, "ext");
+ Value *Lo = Builder.CreateExtractValue(Parts, {1}, "ext");
return {Lo, Hi};
}
@@ -1741,13 +2154,22 @@ auto HvxIdioms::run() -> bool {
// --- End HvxIdioms
auto HexagonVectorCombine::run() -> bool {
- if (!HST.useHVXOps())
- return false;
+ if (DumpModule)
+ dbgs() << "Module before HexagonVectorCombine\n" << *F.getParent();
bool Changed = false;
- Changed |= AlignVectors(*this).run();
- Changed |= HvxIdioms(*this).run();
+ if (HST.useHVXOps()) {
+ if (VAEnabled)
+ Changed |= AlignVectors(*this).run();
+ if (VIEnabled)
+ Changed |= HvxIdioms(*this).run();
+ }
+ if (DumpModule) {
+ dbgs() << "Module " << (Changed ? "(modified)" : "(unchanged)")
+ << " after HexagonVectorCombine\n"
+ << *F.getParent();
+ }
return Changed;
}
@@ -1793,6 +2215,14 @@ auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {
return isa<UndefValue>(Val);
}
+auto HexagonVectorCombine::isTrue(const Value *Val) const -> bool {
+ return Val == ConstantInt::getTrue(Val->getType());
+}
+
+auto HexagonVectorCombine::isFalse(const Value *Val) const -> bool {
+ return isZero(Val);
+}
+
auto HexagonVectorCombine::getHvxTy(Type *ElemTy, bool Pair) const
-> VectorType * {
EVT ETy = EVT::getEVT(ElemTy, false);
@@ -1899,7 +2329,7 @@ auto HexagonVectorCombine::insertb(IRBuilderBase &Builder, Value *Dst,
(Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;
}
- Value *P2Insert = Builder.CreateShuffleVector(P2Dst, P2Src, SMask);
+ Value *P2Insert = Builder.CreateShuffleVector(P2Dst, P2Src, SMask, "shf");
return vresize(Builder, P2Insert, DstLen, Undef);
}
@@ -1922,12 +2352,14 @@ auto HexagonVectorCombine::vlalignb(IRBuilderBase &Builder, Value *Lo,
if (VecLen == 4) {
Value *Pair = concat(Builder, {Lo, Hi});
- Value *Shift = Builder.CreateLShr(Builder.CreateShl(Pair, Amt), 32);
- Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
- return Builder.CreateBitCast(Trunc, Hi->getType());
+ Value *Shift =
+ Builder.CreateLShr(Builder.CreateShl(Pair, Amt, "shl"), 32, "lsr");
+ Value *Trunc =
+ Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");
+ return Builder.CreateBitCast(Trunc, Hi->getType(), "cst");
}
if (VecLen == 8) {
- Value *Sub = Builder.CreateSub(getConstInt(VecLen), Amt);
+ Value *Sub = Builder.CreateSub(getConstInt(VecLen), Amt, "sub");
return vralignb(Builder, Lo, Hi, Sub);
}
llvm_unreachable("Unexpected vector length");
@@ -1951,18 +2383,19 @@ auto HexagonVectorCombine::vralignb(IRBuilderBase &Builder, Value *Lo,
if (VecLen == 4) {
Value *Pair = concat(Builder, {Lo, Hi});
- Value *Shift = Builder.CreateLShr(Pair, Amt);
- Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
- return Builder.CreateBitCast(Trunc, Lo->getType());
+ Value *Shift = Builder.CreateLShr(Pair, Amt, "lsr");
+ Value *Trunc =
+ Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()), "trn");
+ return Builder.CreateBitCast(Trunc, Lo->getType(), "cst");
}
if (VecLen == 8) {
Type *Int64Ty = Type::getInt64Ty(F.getContext());
- Value *Lo64 = Builder.CreateBitCast(Lo, Int64Ty);
- Value *Hi64 = Builder.CreateBitCast(Hi, Int64Ty);
+ Value *Lo64 = Builder.CreateBitCast(Lo, Int64Ty, "cst");
+ Value *Hi64 = Builder.CreateBitCast(Hi, Int64Ty, "cst");
Function *FI = Intrinsic::getDeclaration(F.getParent(),
Intrinsic::hexagon_S2_valignrb);
- Value *Call = Builder.CreateCall(FI, {Hi64, Lo64, Amt});
- return Builder.CreateBitCast(Call, Lo->getType());
+ Value *Call = Builder.CreateCall(FI, {Hi64, Lo64, Amt}, "cup");
+ return Builder.CreateBitCast(Call, Lo->getType(), "cst");
}
llvm_unreachable("Unexpected vector length");
}
@@ -1985,8 +2418,8 @@ auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
if (Work[ThisW].size() % 2 != 0)
Work[ThisW].push_back(UndefValue::get(Ty));
for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {
- Value *Joined = Builder.CreateShuffleVector(Work[ThisW][i],
- Work[ThisW][i + 1], SMask);
+ Value *Joined = Builder.CreateShuffleVector(
+ Work[ThisW][i], Work[ThisW][i + 1], SMask, "shf");
Work[OtherW].push_back(Joined);
}
std::swap(ThisW, OtherW);
@@ -1998,7 +2431,7 @@ auto HexagonVectorCombine::concat(IRBuilderBase &Builder,
SMask.resize(Vecs.size() * length(Vecs.front()->getType()));
std::iota(SMask.begin(), SMask.end(), 0);
Value *Total = Work[ThisW].front();
- return Builder.CreateShuffleVector(Total, SMask);
+ return Builder.CreateShuffleVector(Total, SMask, "shf");
}
auto HexagonVectorCombine::vresize(IRBuilderBase &Builder, Value *Val,
@@ -2017,8 +2450,8 @@ auto HexagonVectorCombine::vresize(IRBuilderBase &Builder, Value *Val,
SmallVector<int, 128> SMask(NewSize);
std::iota(SMask.begin(), SMask.begin() + CurSize, 0);
std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);
- Value *PadVec = Builder.CreateVectorSplat(CurSize, Pad);
- return Builder.CreateShuffleVector(Val, PadVec, SMask);
+ Value *PadVec = Builder.CreateVectorSplat(CurSize, Pad, "spt");
+ return Builder.CreateShuffleVector(Val, PadVec, SMask, "shf");
}
auto HexagonVectorCombine::rescale(IRBuilderBase &Builder, Value *Mask,
@@ -2048,11 +2481,11 @@ auto HexagonVectorCombine::rescale(IRBuilderBase &Builder, Value *Mask,
// Mask <N x i1> -> sext to <N x FromTy> -> bitcast to <M x ToTy> ->
// -> trunc to <M x i1>.
Value *Ext = Builder.CreateSExt(
- Mask, VectorType::get(FromITy, FromCount, /*Scalable=*/false));
+ Mask, VectorType::get(FromITy, FromCount, /*Scalable=*/false), "sxt");
Value *Cast = Builder.CreateBitCast(
- Ext, VectorType::get(ToITy, ToCount, /*Scalable=*/false));
+ Ext, VectorType::get(ToITy, ToCount, /*Scalable=*/false), "cst");
return Builder.CreateTrunc(
- Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable=*/false));
+ Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable=*/false), "trn");
}
// Bitcast to bytes, and return least significant bits.
@@ -2064,10 +2497,10 @@ auto HexagonVectorCombine::vlsb(IRBuilderBase &Builder, Value *Val) const
Value *Bytes = vbytes(Builder, Val);
if (auto *VecTy = dyn_cast<VectorType>(Bytes->getType()))
- return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)));
+ return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)), "trn");
// If Bytes is a scalar (i.e. Val was a scalar byte), return i1, not
// <1 x i1>.
- return Builder.CreateTrunc(Bytes, getBoolTy());
+ return Builder.CreateTrunc(Bytes, getBoolTy(), "trn");
}
// Bitcast to bytes for non-bool. For bool, convert i1 -> i8.
@@ -2078,11 +2511,11 @@ auto HexagonVectorCombine::vbytes(IRBuilderBase &Builder, Value *Val) const
return Val;
if (ScalarTy != getBoolTy())
- return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)));
+ return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)), "cst");
// For bool, return a sext from i1 to i8.
if (auto *VecTy = dyn_cast<VectorType>(Val->getType()))
- return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy));
- return Builder.CreateSExt(Val, getByteTy());
+ return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy), "sxt");
+ return Builder.CreateSExt(Val, getByteTy(), "sxt");
}
auto HexagonVectorCombine::subvector(IRBuilderBase &Builder, Value *Val,
@@ -2116,7 +2549,7 @@ auto HexagonVectorCombine::vdeal(IRBuilderBase &Builder, Value *Val0,
Mask[i] = 2 * i; // Even
Mask[i + Len] = 2 * i + 1; // Odd
}
- return Builder.CreateShuffleVector(Val0, Val1, Mask);
+ return Builder.CreateShuffleVector(Val0, Val1, Mask, "shf");
}
auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder, Value *Val0,
@@ -2129,13 +2562,14 @@ auto HexagonVectorCombine::vshuff(IRBuilderBase &Builder, Value *Val0,
Mask[2 * i + 0] = i; // Val0
Mask[2 * i + 1] = i + Len; // Val1
}
- return Builder.CreateShuffleVector(Val0, Val1, Mask);
+ return Builder.CreateShuffleVector(Val0, Val1, Mask, "shf");
}
auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
Intrinsic::ID IntID, Type *RetTy,
ArrayRef<Value *> Args,
- ArrayRef<Type *> ArgTys) const
+ ArrayRef<Type *> ArgTys,
+ ArrayRef<Value *> MDSources) const
-> Value * {
auto getCast = [&](IRBuilderBase &Builder, Value *Val,
Type *DestTy) -> Value * {
@@ -2149,7 +2583,7 @@ auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
Type *BoolTy = Type::getInt1Ty(F.getContext());
if (cast<VectorType>(SrcTy)->getElementType() != BoolTy)
- return Builder.CreateBitCast(Val, DestTy);
+ return Builder.CreateBitCast(Val, DestTy, "cst");
// Predicate HVX vector.
unsigned HwLen = HST.getVectorLength();
@@ -2157,7 +2591,7 @@ auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
: Intrinsic::hexagon_V6_pred_typecast_128B;
Function *FI =
Intrinsic::getDeclaration(F.getParent(), TC, {DestTy, Val->getType()});
- return Builder.CreateCall(FI, {Val});
+ return Builder.CreateCall(FI, {Val}, "cup");
};
Function *IntrFn = Intrinsic::getDeclaration(F.getParent(), IntID, ArgTys);
@@ -2173,7 +2607,12 @@ auto HexagonVectorCombine::createHvxIntrinsic(IRBuilderBase &Builder,
IntrArgs.push_back(A);
}
}
- Value *Call = Builder.CreateCall(IntrFn, IntrArgs);
+ StringRef MaybeName = !IntrTy->getReturnType()->isVoidTy() ? "cup" : "";
+ CallInst *Call = Builder.CreateCall(IntrFn, IntrArgs, MaybeName);
+
+ MemoryEffects ME = Call->getAttributes().getMemoryEffects();
+ if (!ME.doesNotAccessMemory() && !ME.onlyAccessesInaccessibleMem())
+ propagateMetadata(Call, MDSources);
Type *CallTy = Call->getType();
if (RetTy == nullptr || CallTy == RetTy)
@@ -2223,7 +2662,7 @@ auto HexagonVectorCombine::splitVectorElements(IRBuilderBase &Builder,
unsigned Width = Val->getType()->getScalarSizeInBits();
auto *VTy = VectorType::get(getIntTy(Width / 2), 2 * Length, false);
- Value *VVal = Builder.CreateBitCast(Val, VTy);
+ Value *VVal = Builder.CreateBitCast(Val, VTy, "cst");
Value *Res = vdeal(Builder, sublo(Builder, VVal), subhi(Builder, VVal));
@@ -2265,8 +2704,8 @@ auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
// Having too many inputs is ok: drop the high bits (usual wrap-around).
// If there are too few, fill them with the sign bit.
Value *Last = Inputs.back();
- Value *Sign =
- Builder.CreateAShr(Last, getConstSplat(Last->getType(), Width - 1));
+ Value *Sign = Builder.CreateAShr(
+ Last, getConstSplat(Last->getType(), Width - 1), "asr");
Inputs.resize(NeedInputs, Sign);
}
@@ -2275,7 +2714,7 @@ auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
auto *VTy = VectorType::get(getIntTy(Width), Length, false);
for (int i = 0, e = Inputs.size(); i < e; i += 2) {
Value *Res = vshuff(Builder, Inputs[i], Inputs[i + 1]);
- Inputs[i / 2] = Builder.CreateBitCast(Res, VTy);
+ Inputs[i / 2] = Builder.CreateBitCast(Res, VTy, "cst");
}
Inputs.resize(Inputs.size() / 2);
}
@@ -2287,6 +2726,16 @@ auto HexagonVectorCombine::joinVectorElements(IRBuilderBase &Builder,
auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
Value *Ptr1) const
-> std::optional<int> {
+ // Try SCEV first.
+ const SCEV *Scev0 = SE.getSCEV(Ptr0);
+ const SCEV *Scev1 = SE.getSCEV(Ptr1);
+ const SCEV *ScevDiff = SE.getMinusSCEV(Scev0, Scev1);
+ if (auto *Const = dyn_cast<SCEVConstant>(ScevDiff)) {
+ APInt V = Const->getAPInt();
+ if (V.isSignedIntN(8 * sizeof(int)))
+ return static_cast<int>(V.getSExtValue());
+ }
+
struct Builder : IRBuilder<> {
Builder(BasicBlock *B) : IRBuilder<>(B->getTerminator()) {}
~Builder() {
@@ -2385,8 +2834,17 @@ auto HexagonVectorCombine::getNumSignificantBits(const Value *V,
auto HexagonVectorCombine::getKnownBits(const Value *V,
const Instruction *CtxI) const
-> KnownBits {
- return computeKnownBits(V, DL, /*Depth=*/0, &AC, CtxI, &DT, /*ORE=*/nullptr,
- /*UseInstrInfo=*/true);
+ return computeKnownBits(V, DL, /*Depth=*/0, &AC, CtxI, &DT);
+}
+
+auto HexagonVectorCombine::isSafeToClone(const Instruction &In) const -> bool {
+ if (In.mayHaveSideEffects() || In.isAtomic() || In.isVolatile() ||
+ In.isFenceLike() || In.mayReadOrWriteMemory()) {
+ return false;
+ }
+ if (isa<CallBase>(In) || isa<AllocaInst>(In))
+ return false;
+ return true;
}
template <typename T>
@@ -2468,7 +2926,7 @@ auto HexagonVectorCombine::getElementRange(IRBuilderBase &Builder, Value *Lo,
assert(0 <= Start && size_t(Start + Length) < length(Lo) + length(Hi));
SmallVector<int, 128> SMask(Length);
std::iota(SMask.begin(), SMask.end(), Start);
- return Builder.CreateShuffleVector(Lo, Hi, SMask);
+ return Builder.CreateShuffleVector(Lo, Hi, SMask, "shf");
}
// Pass management.
@@ -2492,6 +2950,7 @@ public:
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetPassConfig>();
FunctionPass::getAnalysisUsage(AU);
@@ -2504,10 +2963,11 @@ public:
AssumptionCache &AC =
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
TargetLibraryInfo &TLI =
getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
- HexagonVectorCombine HVC(F, AA, AC, DT, TLI, TM);
+ HexagonVectorCombine HVC(F, AA, AC, DT, SE, TLI, TM);
return HVC.run();
}
};
@@ -2520,6 +2980,7 @@ INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE,
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(HexagonVectorCombineLegacy, DEBUG_TYPE,
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 08ab3dbfee4a..12c84ceb5fd2 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -51,8 +51,7 @@ class HexagonAsmBackend : public MCAsmBackend {
MCInst &HMB) const {
SmallVector<MCFixup, 4> Fixups;
SmallString<256> Code;
- raw_svector_ostream VecOS(Code);
- E.encodeInstruction(HMB, VecOS, Fixups, *RF.getSubtargetInfo());
+ E.encodeInstruction(HMB, Code, Fixups, *RF.getSubtargetInfo());
// Update the fragment.
RF.setInst(HMB);
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
index 6fbe94072e73..ef4c23df5412 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp
@@ -79,9 +79,9 @@ void HexagonMCChecker::initReg(MCInst const &MCI, unsigned R, unsigned &PredReg,
} else
// Note register use. Super-registers are not tracked directly,
// but their components.
- for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
- SRI.isValid(); ++SRI)
- if (!MCSubRegIterator(*SRI, &RI).isValid())
+ for (MCRegAliasIterator SRI(R, &RI, RI.subregs(R).empty()); SRI.isValid();
+ ++SRI)
+ if (RI.subregs(*SRI).empty())
// Skip super-registers used indirectly.
Uses.insert(*SRI);
@@ -103,7 +103,7 @@ void HexagonMCChecker::init(MCInst const &MCI) {
const bool IgnoreTmpDst = (HexagonMCInstrInfo::hasTmpDst(MCII, MCI) ||
HexagonMCInstrInfo::hasHvxTmp(MCII, MCI)) &&
- STI.getFeatureBits()[Hexagon::ArchV69];
+ STI.hasFeature(Hexagon::ArchV69);
// Get implicit register definitions.
for (MCPhysReg R : MCID.implicit_defs()) {
@@ -145,9 +145,9 @@ void HexagonMCChecker::init(MCInst const &MCI) {
// Note register definitions, direct ones as well as indirect side-effects.
// Super-registers are not tracked directly, but their components.
- for (MCRegAliasIterator SRI(R, &RI, !MCSubRegIterator(R, &RI).isValid());
- SRI.isValid(); ++SRI) {
- if (MCSubRegIterator(*SRI, &RI).isValid())
+ for (MCRegAliasIterator SRI(R, &RI, RI.subregs(R).empty()); SRI.isValid();
+ ++SRI) {
+ if (!RI.subregs(*SRI).empty())
// Skip super-registers defined indirectly.
continue;
@@ -178,10 +178,6 @@ void HexagonMCChecker::init(MCInst const &MCI) {
// TODO: relies on the impossibility of a current and a temporary loads
// in the same packet.
TmpDefs.insert(*SRI);
- else if (i <= 1 && HexagonMCInstrInfo::hasNewValue2(MCII, MCI))
- // vshuff(Vx, Vy, Rx) <- Vx(0) and Vy(1) are both source and
- // destination registers with this instruction. same for vdeal(Vx,Vy,Rx)
- Uses.insert(*SRI);
else if (!IgnoreTmpDst)
Defs[*SRI].insert(PredSense(PredReg, isTrue));
}
@@ -713,7 +709,7 @@ bool HexagonMCChecker::checkShuffle() {
}
bool HexagonMCChecker::checkValidTmpDst() {
- if (!STI.getFeatureBits()[Hexagon::ArchV69]) {
+ if (!STI.hasFeature(Hexagon::ArchV69)) {
return true;
}
auto HasTmp = [&](MCInst const &I) {
@@ -803,7 +799,7 @@ void HexagonMCChecker::reportWarning(Twine const &Msg) {
}
bool HexagonMCChecker::checkLegalVecRegPair() {
- const bool IsPermitted = STI.getFeatureBits()[Hexagon::ArchV67];
+ const bool IsPermitted = STI.hasFeature(Hexagon::ArchV67);
const bool HasReversePairs = ReversePairs.size() != 0;
if (!IsPermitted && HasReversePairs) {
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index a9167489562d..8bf4d0a41298 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -364,7 +364,8 @@ uint32_t HexagonMCCodeEmitter::parseBits(size_t Last, MCInst const &MCB,
}
/// Emit the bundle.
-void HexagonMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void HexagonMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
MCInst &HMB = const_cast<MCInst &>(MI);
@@ -380,7 +381,7 @@ void HexagonMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) {
MCInst &HMI = const_cast<MCInst &>(*I.getInst());
- EncodeSingleInstruction(HMI, OS, Fixups, STI, parseBits(Last, HMB, HMI));
+ encodeSingleInstruction(HMI, CB, Fixups, STI, parseBits(Last, HMB, HMI));
State.Extended = HexagonMCInstrInfo::isImmext(HMI);
State.Addend += HEXAGON_INSTR_SIZE;
++State.Index;
@@ -394,10 +395,10 @@ static bool RegisterMatches(unsigned Consumer, unsigned Producer,
Consumer);
}
-/// EncodeSingleInstruction - Emit a single
-void HexagonMCCodeEmitter::EncodeSingleInstruction(const MCInst &MI,
- raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI, uint32_t Parse) const {
+void HexagonMCCodeEmitter::encodeSingleInstruction(
+ const MCInst &MI, SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI,
+ uint32_t Parse) const {
assert(!HexagonMCInstrInfo::isBundle(MI));
uint64_t Binary;
@@ -442,7 +443,7 @@ void HexagonMCCodeEmitter::EncodeSingleInstruction(const MCInst &MI,
Binary |= SubBits0 | (SubBits1 << 16);
}
- support::endian::write<uint32_t>(OS, Binary, support::little);
+ support::endian::write<uint32_t>(CB, Binary, support::little);
++MCNumEmitted;
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index 151964bf818b..10607f3bd3c5 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -17,7 +17,7 @@
#include "MCTargetDesc/HexagonFixupKinds.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <cstddef>
#include <cstdint>
#include <memory>
@@ -49,11 +49,11 @@ public:
HexagonMCCodeEmitter(MCInstrInfo const &MII, MCContext &MCT)
: MCT(MCT), MCII(MII) {}
- void encodeInstruction(MCInst const &MI, raw_ostream &OS,
+ void encodeInstruction(MCInst const &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
MCSubtargetInfo const &STI) const override;
- void EncodeSingleInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeSingleInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI,
uint32_t Parse) const;
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
index ab5e9eb4eca6..0cfea7749aa8 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.cpp
@@ -99,10 +99,6 @@ void HexagonMCExpr::setS27_2_reloc(bool Val) {
S27_2_reloc = Val;
}
-bool HexagonMCExpr::classof(MCExpr const *E) {
- return E->getKind() == MCExpr::Target;
-}
-
HexagonMCExpr::HexagonMCExpr(MCExpr const *Expr)
: Expr(Expr), MustNotExtend(false), MustExtend(false), S27_2_reloc(false),
SignMismatch(false) {}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
index e88f46a04dae..6438ac98e38d 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCExpr.h
@@ -21,7 +21,6 @@ public:
void visitUsedExpr(MCStreamer &Streamer) const override;
MCFragment *findAssociatedFragment() const override;
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override;
- static bool classof(MCExpr const *E);
MCExpr const *getExpr() const;
void setMustExtend(bool Val = true);
bool mustExtend() const;
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index ef1ccea6add7..9cf004cf4c9a 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -138,7 +138,7 @@ bool canonicalizePacketImpl(MCInstrInfo const &MCII, MCSubtargetInfo const &STI,
HexagonMCShuffle(Context, false, MCII, STI, MCB);
const SmallVector<DuplexCandidate, 8> possibleDuplexes =
- (STI.getFeatureBits()[Hexagon::FeatureDuplex])
+ (STI.hasFeature(Hexagon::FeatureDuplex))
? HexagonMCInstrInfo::getDuplexPossibilties(MCII, STI, MCB)
: SmallVector<DuplexCandidate, 8>();
@@ -568,12 +568,20 @@ bool HexagonMCInstrInfo::isConstExtended(MCInstrInfo const &MCII,
if (isa<HexagonMCExpr>(MO.getExpr()) &&
HexagonMCInstrInfo::mustNotExtend(*MO.getExpr()))
return false;
+
int64_t Value;
if (!MO.getExpr()->evaluateAsAbsolute(Value))
return true;
- int MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI);
- int MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI);
- return (MinValue > Value || Value > MaxValue);
+ if (HexagonMCInstrInfo::isExtentSigned(MCII, MCI)) {
+ int32_t SValue = Value;
+ int32_t MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI);
+ int32_t MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI);
+ return SValue < MinValue || SValue > MaxValue;
+ }
+ uint32_t UValue = Value;
+ uint32_t MinValue = HexagonMCInstrInfo::getMinValue(MCII, MCI);
+ uint32_t MaxValue = HexagonMCInstrInfo::getMaxValue(MCII, MCI);
+ return UValue < MinValue || UValue > MaxValue;
}
bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
@@ -907,7 +915,7 @@ bool HexagonMCInstrInfo::s27_2_reloc(MCExpr const &Expr) {
}
unsigned HexagonMCInstrInfo::packetSizeSlots(MCSubtargetInfo const &STI) {
- const bool IsTiny = STI.getFeatureBits()[Hexagon::ProcTinyCore];
+ const bool IsTiny = STI.hasFeature(Hexagon::ProcTinyCore);
return IsTiny ? (HEXAGON_PACKET_SIZE - 1) : HEXAGON_PACKET_SIZE;
}
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 6c9a3eb4b346..cf6fa78a2005 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -364,12 +364,12 @@ static MCTargetStreamer *createHexagonNullTargetStreamer(MCStreamer &S) {
}
static void LLVM_ATTRIBUTE_UNUSED clearFeature(MCSubtargetInfo* STI, uint64_t F) {
- if (STI->getFeatureBits()[F])
+ if (STI->hasFeature(F))
STI->ToggleFeature(F);
}
static bool LLVM_ATTRIBUTE_UNUSED checkFeature(MCSubtargetInfo* STI, uint64_t F) {
- return STI->getFeatureBits()[F];
+ return STI->hasFeature(F);
}
namespace {
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 5774cad0f102..2bbc2f644f58 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -16,6 +16,7 @@
#include "MCTargetDesc/HexagonMCInstrInfo.h"
#include "MCTargetDesc/HexagonMCTargetDesc.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInst.h"
@@ -101,7 +102,7 @@ unsigned HexagonResource::setWeight(unsigned s) {
return Weight = 0;
unsigned Ctpop = llvm::popcount(Units);
- unsigned Cttz = countTrailingZeros(Units);
+ unsigned Cttz = llvm::countr_zero(Units);
Weight = (1u << (SlotWeight * s)) * ((MaskWeight - Ctpop) << Cttz);
return Weight;
}
diff --git a/llvm/lib/Target/Hexagon/RDFCopy.cpp b/llvm/lib/Target/Hexagon/RDFCopy.cpp
index cb31ec068325..b26821cd0171 100644
--- a/llvm/lib/Target/Hexagon/RDFCopy.cpp
+++ b/llvm/lib/Target/Hexagon/RDFCopy.cpp
@@ -51,6 +51,8 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) {
if (TRI.getMinimalPhysRegClass(DstR.Reg) !=
TRI.getMinimalPhysRegClass(SrcR.Reg))
return false;
+ if (!DFG.isTracked(SrcR) || !DFG.isTracked(DstR))
+ return false;
EM.insert(std::make_pair(DstR, SrcR));
return true;
}
@@ -63,41 +65,67 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) {
void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM) {
CopyMap.insert(std::make_pair(SA.Id, EM));
Copies.push_back(SA.Id);
+
+ for (auto I : EM) {
+ auto FS = DefM.find(I.second.Reg);
+ if (FS == DefM.end() || FS->second.empty())
+ continue; // Undefined source
+ RDefMap[I.second][SA.Id] = FS->second.top()->Id;
+ // Insert DstR into the map.
+ RDefMap[I.first];
+ }
+}
+
+
+void CopyPropagation::updateMap(NodeAddr<InstrNode*> IA) {
+ RegisterSet RRs(DFG.getPRI());
+ for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
+ RRs.insert(RA.Addr->getRegRef(DFG));
+ bool Common = false;
+ for (auto &R : RDefMap) {
+ if (!RRs.count(R.first))
+ continue;
+ Common = true;
+ break;
+ }
+ if (!Common)
+ return;
+
+ for (auto &R : RDefMap) {
+ if (!RRs.count(R.first))
+ continue;
+ auto F = DefM.find(R.first.Reg);
+ if (F == DefM.end() || F->second.empty())
+ continue;
+ R.second[IA.Id] = F->second.top()->Id;
+ }
}
bool CopyPropagation::scanBlock(MachineBasicBlock *B) {
bool Changed = false;
NodeAddr<BlockNode*> BA = DFG.findBlock(B);
+ DFG.markBlock(BA.Id, DefM);
for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
if (DFG.IsCode<NodeAttrs::Stmt>(IA)) {
NodeAddr<StmtNode*> SA = IA;
- EqualityMap EM;
+ EqualityMap EM(std::less<RegisterRef>(DFG.getPRI()));
if (interpretAsCopy(SA.Addr->getCode(), EM))
recordCopy(SA, EM);
}
+
+ updateMap(IA);
+ DFG.pushAllDefs(IA, DefM);
}
MachineDomTreeNode *N = MDT.getNode(B);
for (auto *I : *N)
Changed |= scanBlock(I->getBlock());
+ DFG.releaseBlock(BA.Id, DefM);
return Changed;
}
-NodeId CopyPropagation::getLocalReachingDef(RegisterRef RefRR,
- NodeAddr<InstrNode*> IA) {
- NodeAddr<RefNode*> RA = L.getNearestAliasedRef(RefRR, IA);
- if (RA.Id != 0) {
- if (RA.Addr->getKind() == NodeAttrs::Def)
- return RA.Id;
- assert(RA.Addr->getKind() == NodeAttrs::Use);
- if (NodeId RD = RA.Addr->getReachingDef())
- return RD;
- }
- return 0;
-}
-
bool CopyPropagation::run() {
scanBlock(&DFG.getMF().front());
@@ -106,9 +134,19 @@ bool CopyPropagation::run() {
for (NodeId I : Copies) {
dbgs() << "Instr: " << *DFG.addr<StmtNode*>(I).Addr->getCode();
dbgs() << " eq: {";
- for (auto J : CopyMap[I])
- dbgs() << ' ' << Print<RegisterRef>(J.first, DFG) << '='
- << Print<RegisterRef>(J.second, DFG);
+ if (CopyMap.count(I)) {
+ for (auto J : CopyMap.at(I))
+ dbgs() << ' ' << Print<RegisterRef>(J.first, DFG) << '='
+ << Print<RegisterRef>(J.second, DFG);
+ }
+ dbgs() << " }\n";
+ }
+ dbgs() << "\nRDef map:\n";
+ for (auto R : RDefMap) {
+ dbgs() << Print<RegisterRef>(R.first, DFG) << " -> {";
+ for (auto &M : R.second)
+ dbgs() << ' ' << Print<NodeId>(M.first, DFG) << ':'
+ << Print<NodeId>(M.second, DFG);
dbgs() << " }\n";
}
}
@@ -130,6 +168,8 @@ bool CopyPropagation::run() {
return 0;
};
+ const PhysicalRegisterInfo &PRI = DFG.getPRI();
+
for (NodeId C : Copies) {
#ifndef NDEBUG
if (HasLimit && CpCount >= CpLimit)
@@ -147,10 +187,11 @@ bool CopyPropagation::run() {
if (FR == EM.end())
continue;
RegisterRef SR = FR->second;
- if (DR == SR)
+ if (PRI.equal_to(DR, SR))
continue;
- NodeId AtCopy = getLocalReachingDef(SR, SA);
+ auto &RDefSR = RDefMap[SR];
+ NodeId RDefSR_SA = RDefSR[SA.Id];
for (NodeId N = DA.Addr->getReachedUse(), NextN; N; N = NextN) {
auto UA = DFG.addr<UseNode*>(N);
@@ -158,13 +199,12 @@ bool CopyPropagation::run() {
uint16_t F = UA.Addr->getFlags();
if ((F & NodeAttrs::PhiRef) || (F & NodeAttrs::Fixed))
continue;
- if (UA.Addr->getRegRef(DFG) != DR)
+ if (!PRI.equal_to(UA.Addr->getRegRef(DFG), DR))
continue;
NodeAddr<InstrNode*> IA = UA.Addr->getOwner(DFG);
assert(DFG.IsCode<NodeAttrs::Stmt>(IA));
- NodeId AtUse = getLocalReachingDef(SR, IA);
- if (AtCopy != AtUse)
+ if (RDefSR[IA.Id] != RDefSR_SA)
continue;
MachineOperand &Op = UA.Addr->getOp();
@@ -180,8 +220,8 @@ bool CopyPropagation::run() {
Op.setReg(NewReg);
Op.setSubReg(0);
DFG.unlinkUse(UA, false);
- if (AtCopy != 0) {
- UA.Addr->linkToDef(UA.Id, DFG.addr<DefNode*>(AtCopy));
+ if (RDefSR_SA != 0) {
+ UA.Addr->linkToDef(UA.Id, DFG.addr<DefNode*>(RDefSR_SA));
} else {
UA.Addr->setReachingDef(0);
UA.Addr->setSibling(0);
@@ -199,7 +239,7 @@ bool CopyPropagation::run() {
// Update the EM map in the copy's entry.
auto &M = FC->second;
for (auto &J : M) {
- if (J.second != DR)
+ if (!PRI.equal_to(J.second, DR))
continue;
J.second = SR;
break;
diff --git a/llvm/lib/Target/Hexagon/RDFCopy.h b/llvm/lib/Target/Hexagon/RDFCopy.h
index 99b18a75d8c2..e4fb89892831 100644
--- a/llvm/lib/Target/Hexagon/RDFCopy.h
+++ b/llvm/lib/Target/Hexagon/RDFCopy.h
@@ -26,7 +26,7 @@ namespace rdf {
struct CopyPropagation {
CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg),
- L(dfg.getMF().getRegInfo(), dfg) {}
+ RDefMap(std::less<RegisterRef>(DFG.getPRI())) {}
virtual ~CopyPropagation() = default;
@@ -36,22 +36,23 @@ namespace rdf {
DataFlowGraph &getDFG() { return DFG; }
using EqualityMap = std::map<RegisterRef, RegisterRef>;
-
virtual bool interpretAsCopy(const MachineInstr *MI, EqualityMap &EM);
private:
const MachineDominatorTree &MDT;
DataFlowGraph &DFG;
- Liveness L;
+ DataFlowGraph::DefStackMap DefM;
bool Trace = false;
+ // map: register -> (map: stmt -> reaching def)
+ std::map<RegisterRef,std::map<NodeId,NodeId>> RDefMap;
// map: statement -> (map: dst reg -> src reg)
std::map<NodeId, EqualityMap> CopyMap;
std::vector<NodeId> Copies;
void recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM);
+ void updateMap(NodeAddr<InstrNode*> IA);
bool scanBlock(MachineBasicBlock *B);
- NodeId getLocalReachingDef(RegisterRef RefRR, NodeAddr<InstrNode*> IA);
};
} // end namespace rdf
diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
index 894bdf38fe17..d90923d6d3ed 100644
--- a/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
+++ b/llvm/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -55,7 +55,8 @@ private:
// overly conservative (i.e. return "true" for all instructions), but it
// is not safe to return "false" for an instruction that should not be
// considered removable.
-bool DeadCodeElimination::isLiveInstr(const MachineInstr *MI) const {
+bool DeadCodeElimination::isLiveInstr(NodeAddr<StmtNode *> S) const {
+ const MachineInstr *MI = S.Addr->getCode();
if (MI->mayStore() || MI->isBranch() || MI->isCall() || MI->isReturn())
return true;
if (MI->hasOrderedMemoryRef() || MI->hasUnmodeledSideEffects() ||
@@ -83,7 +84,7 @@ void DeadCodeElimination::scanInstr(NodeAddr<InstrNode*> IA,
SetQueue<NodeId> &WorkQ) {
if (!DFG.IsCode<NodeAttrs::Stmt>(IA))
return;
- if (!isLiveInstr(NodeAddr<StmtNode*>(IA).Addr->getCode()))
+ if (!isLiveInstr(IA))
return;
for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG)) {
if (!LiveNodes.count(RA.Id))
@@ -160,7 +161,7 @@ bool DeadCodeElimination::collect() {
if (!LiveNodes.count(RA.Id))
DeadNodes.insert(RA.Id);
if (DFG.IsCode<NodeAttrs::Stmt>(IA))
- if (isLiveInstr(NodeAddr<StmtNode*>(IA).Addr->getCode()))
+ if (isLiveInstr(IA) || DFG.hasUntrackedRef(IA))
continue;
if (IsDead(IA)) {
DeadInstrs.insert(IA.Id);
diff --git a/llvm/lib/Target/Hexagon/RDFDeadCode.h b/llvm/lib/Target/Hexagon/RDFDeadCode.h
index 859c8161d355..16e6c6a39aa1 100644
--- a/llvm/lib/Target/Hexagon/RDFDeadCode.h
+++ b/llvm/lib/Target/Hexagon/RDFDeadCode.h
@@ -55,7 +55,7 @@ namespace rdf {
template<typename T> struct SetQueue;
- bool isLiveInstr(const MachineInstr *MI) const;
+ bool isLiveInstr(NodeAddr<StmtNode*> S) const;
void scanInstr(NodeAddr<InstrNode*> IA, SetQueue<NodeId> &WorkQ);
void processDef(NodeAddr<DefNode*> DA, SetQueue<NodeId> &WorkQ);
void processUse(NodeAddr<UseNode*> UA, SetQueue<NodeId> &WorkQ);
diff --git a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index 2c7013dd492e..6b74423f9bc5 100644
--- a/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -62,8 +62,6 @@ class LanaiAsmParser : public MCTargetAsmParser {
bool parsePrePost(StringRef Type, int *OffsetValue);
- bool ParseDirective(AsmToken DirectiveID) override;
-
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
@@ -649,8 +647,6 @@ public:
} // end anonymous namespace
-bool LanaiAsmParser::ParseDirective(AsmToken /*DirectiveId*/) { return true; }
-
bool LanaiAsmParser::MatchAndEmitInstruction(SMLoc IdLoc, unsigned &Opcode,
OperandVector &Operands,
MCStreamer &Out,
diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index d5639bd9a329..157f86027433 100644
--- a/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -42,7 +43,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -478,7 +478,7 @@ SDValue LanaiTargetLowering::LowerCCCArguments(
}
default:
LLVM_DEBUG(dbgs() << "LowerFormalArguments Unhandled argument type: "
- << RegVT.getEVTString() << "\n");
+ << RegVT << "\n");
llvm_unreachable("unhandled argument type");
}
} else {
@@ -489,7 +489,7 @@ SDValue LanaiTargetLowering::LowerCCCArguments(
// Check that the argument fits in stack slot
if (ObjSize > 4) {
errs() << "LowerFormalArguments Unhandled argument type: "
- << EVT(VA.getLocVT()).getEVTString() << "\n";
+ << VA.getLocVT() << "\n";
}
// Create the frame index object for this incoming parameter...
int FI = MFI.CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
@@ -519,7 +519,7 @@ SDValue LanaiTargetLowering::LowerCCCArguments(
if (IsVarArg) {
// Record the frame index of the first variable argument
// which is a value necessary to VASTART.
- int FI = MFI.CreateFixedObject(4, CCInfo.getNextStackOffset(), true);
+ int FI = MFI.CreateFixedObject(4, CCInfo.getStackSize(), true);
LanaiMFI->setVarArgsFrameIndex(FI);
}
@@ -551,7 +551,7 @@ LanaiTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Analize return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Lanai32);
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
@@ -559,10 +559,10 @@ LanaiTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Flag);
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue);
// Guarantee that all emitted copies are stuck together with flags.
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
@@ -579,17 +579,17 @@ LanaiTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue Val =
DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(DAG.getDataLayout()));
- Chain = DAG.getCopyToReg(Chain, DL, Lanai::RV, Val, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, Lanai::RV, Val, Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(
DAG.getRegister(Lanai::RV, getPointerTy(DAG.getDataLayout())));
}
RetOps[0] = Chain; // Update chain
- unsigned Opc = LanaiISD::RET_FLAG;
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ unsigned Opc = LanaiISD::RET_GLUE;
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
// Return Void
return DAG.getNode(Opc, DL, MVT::Other,
@@ -627,7 +627,7 @@ SDValue LanaiTargetLowering::LowerCCCCallTo(
}
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getStackSize();
// Create local copies for byval args.
SmallVector<SDValue, 8> ByValArgs;
@@ -711,15 +711,15 @@ SDValue LanaiTargetLowering::LowerCCCCallTo(
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
ArrayRef<SDValue>(&MemOpChains[0], MemOpChains.size()));
- SDValue InFlag;
+ SDValue InGlue;
// Build a sequence of copy-to-reg nodes chained together with token chain and
- // flag operands which copy the outgoing args into registers. The InFlag in
+ // flag operands which copy the outgoing args into registers. The InGlue in
// necessary since all emitted instructions must be stuck together.
for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
- RegsToPass[I].second, InFlag);
- InFlag = Chain.getValue(1);
+ RegsToPass[I].second, InGlue);
+ InGlue = Chain.getValue(1);
}
// If the callee is a GlobalAddress node (quite common, every direct call is)
@@ -753,27 +753,27 @@ SDValue LanaiTargetLowering::LowerCCCCallTo(
Ops.push_back(DAG.getRegister(RegsToPass[I].first,
RegsToPass[I].second.getValueType()));
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
Chain = DAG.getNode(LanaiISD::CALL, DL, NodeTys,
ArrayRef<SDValue>(&Ops[0], Ops.size()));
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
// Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InFlag, DL);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, DL);
+ InGlue = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
+ return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, DL, DAG,
InVals);
}
// LowerCallResult - Lower the result values of a call into the
// appropriate copies out of appropriate physical registers.
SDValue LanaiTargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg,
+ SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
// Assign locations to each value returned by this call.
@@ -786,9 +786,9 @@ SDValue LanaiTargetLowering::LowerCallResult(
// Copy all of the result registers out of their specified physreg.
for (unsigned I = 0; I != RVLocs.size(); ++I) {
Chain = DAG.getCopyFromReg(Chain, DL, RVLocs[I].getLocReg(),
- RVLocs[I].getValVT(), InFlag)
+ RVLocs[I].getValVT(), InGlue)
.getValue(1);
- InFlag = Chain.getValue(2);
+ InGlue = Chain.getValue(2);
InVals.push_back(Chain.getValue(0));
}
@@ -874,11 +874,11 @@ SDValue LanaiTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
LPCC::CondCode CC = IntCondCCodeToICC(Cond, DL, RHS, DAG);
SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i32);
- SDValue Flag =
+ SDValue Glue =
DAG.getNode(LanaiISD::SET_FLAG, DL, MVT::Glue, LHS, RHS, TargetCC);
return DAG.getNode(LanaiISD::BR_CC, DL, Op.getValueType(), Chain, Dest,
- TargetCC, Flag);
+ TargetCC, Glue);
}
SDValue LanaiTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
@@ -974,10 +974,10 @@ SDValue LanaiTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
LPCC::CondCode CC = IntCondCCodeToICC(Cond, DL, RHS, DAG);
SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i32);
- SDValue Flag =
+ SDValue Glue =
DAG.getNode(LanaiISD::SET_FLAG, DL, MVT::Glue, LHS, RHS, TargetCC);
- return DAG.getNode(LanaiISD::SETCC, DL, Op.getValueType(), TargetCC, Flag);
+ return DAG.getNode(LanaiISD::SETCC, DL, Op.getValueType(), TargetCC, Glue);
}
SDValue LanaiTargetLowering::LowerSELECT_CC(SDValue Op,
@@ -991,12 +991,12 @@ SDValue LanaiTargetLowering::LowerSELECT_CC(SDValue Op,
LPCC::CondCode CC = IntCondCCodeToICC(Cond, DL, RHS, DAG);
SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i32);
- SDValue Flag =
+ SDValue Glue =
DAG.getNode(LanaiISD::SET_FLAG, DL, MVT::Glue, LHS, RHS, TargetCC);
SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
return DAG.getNode(LanaiISD::SELECT_CC, DL, VTs, TrueV, FalseV, TargetCC,
- Flag);
+ Glue);
}
SDValue LanaiTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
@@ -1095,8 +1095,8 @@ const char *LanaiTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
case LanaiISD::ADJDYNALLOC:
return "LanaiISD::ADJDYNALLOC";
- case LanaiISD::RET_FLAG:
- return "LanaiISD::RET_FLAG";
+ case LanaiISD::RET_GLUE:
+ return "LanaiISD::RET_GLUE";
case LanaiISD::CALL:
return "LanaiISD::CALL";
case LanaiISD::SELECT_CC:
@@ -1499,7 +1499,7 @@ void LanaiTargetLowering::computeKnownBitsForTargetNode(
KnownBits Known2;
Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
}
}
diff --git a/llvm/lib/Target/Lanai/LanaiISelLowering.h b/llvm/lib/Target/Lanai/LanaiISelLowering.h
index 2f58560f4efe..ea1159db9e59 100644
--- a/llvm/lib/Target/Lanai/LanaiISelLowering.h
+++ b/llvm/lib/Target/Lanai/LanaiISelLowering.h
@@ -26,8 +26,8 @@ enum {
ADJDYNALLOC,
- // Return with a flag operand. Operand 0 is the chain operand.
- RET_FLAG,
+ // Return with a glue operand. Operand 0 is the chain operand.
+ RET_GLUE,
// CALL - These operations represent an abstract call instruction, which
// includes a bunch of information.
@@ -130,7 +130,7 @@ private:
const SDLoc &DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &DL, SelectionDAG &DAG,
diff --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.td b/llvm/lib/Target/Lanai/LanaiInstrInfo.td
index d1fd327722ef..638b3c94d054 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.td
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.td
@@ -39,7 +39,7 @@ def SDT_LanaiAdjDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
def Call : SDNode<"LanaiISD::CALL", SDT_LanaiCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def RetFlag : SDNode<"LanaiISD::RET_FLAG", SDTNone,
+def RetGlue : SDNode<"LanaiISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def CallSeqStart : SDNode<"ISD::CALLSEQ_START", SDT_LanaiCallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
@@ -732,7 +732,7 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1,
Uses = [RCA] in {
def RET : InstRM<0b0, (outs), (ins),
"ld\t-4[%fp], %pc ! return",
- [(RetFlag)]> {
+ [(RetGlue)]> {
let Rd = PC.Num;
let Rs1 = FP.Num;
let P = 1;
diff --git a/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp b/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
index 5450bdb30764..3a271cb95c07 100644
--- a/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
@@ -167,7 +167,7 @@ bool LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(RS && "Register scavenging must be on");
Register Reg = RS->FindUnusedReg(&Lanai::GPRRegClass);
if (!Reg)
- Reg = RS->scavengeRegister(&Lanai::GPRRegClass, II, SPAdj);
+ Reg = RS->scavengeRegisterBackwards(Lanai::GPRRegClass, II, false, SPAdj);
assert(Reg && "Register scavenger failed");
bool HasNegOffset = false;
@@ -235,10 +235,11 @@ bool LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
.addReg(FrameReg)
.addImm(-Offset);
MI.eraseFromParent();
- } else {
- MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/false);
- MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ return true;
}
+
+ MI.getOperand(FIOperandNum).ChangeToRegister(FrameReg, /*isDef=*/false);
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
return false;
}
diff --git a/llvm/lib/Target/Lanai/LanaiRegisterInfo.h b/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
index 5168dddd9301..89d9eba7f891 100644
--- a/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
+++ b/llvm/lib/Target/Lanai/LanaiRegisterInfo.h
@@ -34,6 +34,8 @@ struct LanaiRegisterInfo : public LanaiGenRegisterInfo {
bool requiresRegisterScavenging(const MachineFunction &MF) const override;
+ bool supportsBackwardScavenger() const override { return true; }
+
bool eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
index d8c7bd15aacb..7ae693130da5 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCAsmInfo.cpp
@@ -12,7 +12,7 @@
#include "LanaiMCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index ec573a189a70..5f9c2a100223 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -24,6 +24,7 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/EndianStream.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
@@ -74,7 +75,7 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &SubtargetInfo) const;
- void encodeInstruction(const MCInst &Inst, raw_ostream &Ostream,
+ void encodeInstruction(const MCInst &Inst, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &SubtargetInfo) const override;
@@ -170,15 +171,14 @@ LanaiMCCodeEmitter::adjustPqBitsSpls(const MCInst &Inst, unsigned Value,
}
void LanaiMCCodeEmitter::encodeInstruction(
- const MCInst &Inst, raw_ostream &Ostream, SmallVectorImpl<MCFixup> &Fixups,
+ const MCInst &Inst, SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &SubtargetInfo) const {
// Get instruction encoding and emit it
unsigned Value = getBinaryCodeForInstr(Inst, Fixups, SubtargetInfo);
++MCNumEmitted; // Keep track of the number of emitted insns.
- // Emit bytes in big-endian
- for (int i = (4 - 1) * 8; i >= 0; i -= 8)
- Ostream << static_cast<char>((Value >> i) & 0xff);
+ support::endian::write<uint32_t>(CB, Value, support::big);
}
// Encode Lanai Memory Operand
diff --git a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index 97d33ea2a0ab..8f83c883e822 100644
--- a/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -15,7 +15,6 @@
#include "LanaiMCAsmInfo.h"
#include "TargetInfo/LanaiTargetInfo.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrAnalysis.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -24,6 +23,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TargetParser/Triple.h"
#include <cstdint>
#include <string>
diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index 9d6d981bb908..94d530306536 100644
--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -51,8 +51,6 @@ class LoongArchAsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool ParseDirective(AsmToken DirectiveID) override { return true; }
-
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -64,7 +62,8 @@ class LoongArchAsmParser : public MCTargetAsmParser {
unsigned Kind) override;
bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo,
- int64_t Lower, int64_t Upper, Twine Msg);
+ int64_t Lower, int64_t Upper,
+ const Twine &Msg);
/// Helper for processing MC instructions that have been successfully matched
/// by MatchAndEmitInstruction.
@@ -75,11 +74,11 @@ class LoongArchAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "LoongArchGenAsmMatcher.inc"
- OperandMatchResultTy parseRegister(OperandVector &Operands);
- OperandMatchResultTy parseImmediate(OperandVector &Operands);
- OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
- OperandMatchResultTy parseSImm26Operand(OperandVector &Operands);
- OperandMatchResultTy parseAtomicMemOp(OperandVector &Operands);
+ ParseStatus parseRegister(OperandVector &Operands);
+ ParseStatus parseImmediate(OperandVector &Operands);
+ ParseStatus parseOperandWithModifier(OperandVector &Operands);
+ ParseStatus parseSImm26Operand(OperandVector &Operands);
+ ParseStatus parseAtomicMemOp(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
@@ -235,12 +234,24 @@ public:
VK == LoongArchMCExpr::VK_LoongArch_None;
}
+ bool isUImm1() const { return isUImm<1>(); }
bool isUImm2() const { return isUImm<2>(); }
bool isUImm2plus1() const { return isUImm<2, 1>(); }
bool isUImm3() const { return isUImm<3>(); }
+ bool isUImm4() const { return isUImm<4>(); }
+ bool isSImm5() const { return isSImm<5>(); }
bool isUImm5() const { return isUImm<5>(); }
bool isUImm6() const { return isUImm<6>(); }
+ bool isUImm7() const { return isUImm<7>(); }
+ bool isSImm8() const { return isSImm<8>(); }
+ bool isSImm8lsl1() const { return isSImm<8, 1>(); }
+ bool isSImm8lsl2() const { return isSImm<8, 2>(); }
+ bool isSImm8lsl3() const { return isSImm<8, 3>(); }
bool isUImm8() const { return isUImm<8>(); }
+ bool isSImm9lsl3() const { return isSImm<9, 3>(); }
+ bool isSImm10() const { return isSImm<10>(); }
+ bool isSImm10lsl2() const { return isSImm<10, 2>(); }
+ bool isSImm11lsl1() const { return isSImm<11, 1>(); }
bool isSImm12() const { return isSImm<12>(); }
bool isSImm12addlike() const {
@@ -304,6 +315,7 @@ public:
IsValidKind;
}
+ bool isSImm13() const { return isSImm<13>(); }
bool isUImm14() const { return isUImm<14>(); }
bool isUImm15() const { return isUImm<15>(); }
@@ -563,39 +575,34 @@ bool LoongArchAsmParser::classifySymbolRef(const MCExpr *Expr,
return false;
}
-OperandMatchResultTy
-LoongArchAsmParser::parseRegister(OperandVector &Operands) {
- if (getLexer().getTok().isNot(AsmToken::Dollar))
- return MatchOperand_NoMatch;
-
- // Eat the $ prefix.
- getLexer().Lex();
+ParseStatus LoongArchAsmParser::parseRegister(OperandVector &Operands) {
+ if (!parseOptionalToken(AsmToken::Dollar))
+ return ParseStatus::NoMatch;
if (getLexer().getKind() != AsmToken::Identifier)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
StringRef Name = getLexer().getTok().getIdentifier();
MCRegister RegNo;
matchRegisterNameHelper(RegNo, Name);
if (RegNo == LoongArch::NoRegister)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size());
getLexer().Lex();
Operands.push_back(LoongArchOperand::createReg(RegNo, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-LoongArchAsmParser::parseImmediate(OperandVector &Operands) {
+ParseStatus LoongArchAsmParser::parseImmediate(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E;
const MCExpr *Res;
switch (getLexer().getKind()) {
default:
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::LParen:
case AsmToken::Dot:
case AsmToken::Minus:
@@ -606,59 +613,49 @@ LoongArchAsmParser::parseImmediate(OperandVector &Operands) {
case AsmToken::String:
case AsmToken::Identifier:
if (getParser().parseExpression(Res, E))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
break;
case AsmToken::Percent:
return parseOperandWithModifier(Operands);
}
Operands.push_back(LoongArchOperand::createImm(Res, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
+ParseStatus
LoongArchAsmParser::parseOperandWithModifier(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E;
- if (getLexer().getKind() != AsmToken::Percent) {
- Error(getLoc(), "expected '%' for operand modifier");
- return MatchOperand_ParseFail;
- }
+ if (getLexer().getKind() != AsmToken::Percent)
+ return Error(getLoc(), "expected '%' for operand modifier");
getParser().Lex(); // Eat '%'
- if (getLexer().getKind() != AsmToken::Identifier) {
- Error(getLoc(), "expected valid identifier for operand modifier");
- return MatchOperand_ParseFail;
- }
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return Error(getLoc(), "expected valid identifier for operand modifier");
StringRef Identifier = getParser().getTok().getIdentifier();
LoongArchMCExpr::VariantKind VK =
LoongArchMCExpr::getVariantKindForName(Identifier);
- if (VK == LoongArchMCExpr::VK_LoongArch_Invalid) {
- Error(getLoc(), "unrecognized operand modifier");
- return MatchOperand_ParseFail;
- }
+ if (VK == LoongArchMCExpr::VK_LoongArch_Invalid)
+ return Error(getLoc(), "unrecognized operand modifier");
getParser().Lex(); // Eat the identifier
- if (getLexer().getKind() != AsmToken::LParen) {
- Error(getLoc(), "expected '('");
- return MatchOperand_ParseFail;
- }
+ if (getLexer().getKind() != AsmToken::LParen)
+ return Error(getLoc(), "expected '('");
getParser().Lex(); // Eat '('
const MCExpr *SubExpr;
- if (getParser().parseParenExpression(SubExpr, E)) {
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseParenExpression(SubExpr, E))
+ return ParseStatus::Failure;
const MCExpr *ModExpr = LoongArchMCExpr::create(SubExpr, VK, getContext());
Operands.push_back(LoongArchOperand::createImm(ModExpr, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-LoongArchAsmParser::parseSImm26Operand(OperandVector &Operands) {
+ParseStatus LoongArchAsmParser::parseSImm26Operand(OperandVector &Operands) {
SMLoc S = getLoc();
const MCExpr *Res;
@@ -666,11 +663,11 @@ LoongArchAsmParser::parseSImm26Operand(OperandVector &Operands) {
return parseOperandWithModifier(Operands);
if (getLexer().getKind() != AsmToken::Identifier)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
StringRef Identifier;
if (getParser().parseIdentifier(Identifier))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
@@ -679,30 +676,26 @@ LoongArchAsmParser::parseSImm26Operand(OperandVector &Operands) {
Res = LoongArchMCExpr::create(Res, LoongArchMCExpr::VK_LoongArch_CALL,
getContext());
Operands.push_back(LoongArchOperand::createImm(Res, S, E));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-LoongArchAsmParser::parseAtomicMemOp(OperandVector &Operands) {
+ParseStatus LoongArchAsmParser::parseAtomicMemOp(OperandVector &Operands) {
// Parse "$r*".
- if (parseRegister(Operands) != MatchOperand_Success)
- return MatchOperand_NoMatch;
+ if (!parseRegister(Operands).isSuccess())
+ return ParseStatus::NoMatch;
// If there is a next operand and it is 0, ignore it. Otherwise print a
// diagnostic message.
- if (getLexer().is(AsmToken::Comma)) {
- getLexer().Lex(); // Consume comma token.
+ if (parseOptionalToken(AsmToken::Comma)) {
int64_t ImmVal;
SMLoc ImmStart = getLoc();
if (getParser().parseIntToken(ImmVal, "expected optional integer offset"))
- return MatchOperand_ParseFail;
- if (ImmVal) {
- Error(ImmStart, "optional integer offset must be 0");
- return MatchOperand_ParseFail;
- }
+ return ParseStatus::Failure;
+ if (ImmVal)
+ return Error(ImmStart, "optional integer offset must be 0");
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// Looks at a token type and creates the relevant operand from this
/// information, adding to Operands. Return true upon an error.
@@ -710,20 +703,19 @@ bool LoongArchAsmParser::parseOperand(OperandVector &Operands,
StringRef Mnemonic) {
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
- OperandMatchResultTy Result =
+ ParseStatus Result =
MatchOperandParserImpl(Operands, Mnemonic, /*ParseForAllFeatures=*/true);
- if (Result == MatchOperand_Success)
+ if (Result.isSuccess())
return false;
- if (Result == MatchOperand_ParseFail)
+ if (Result.isFailure())
return true;
- if (parseRegister(Operands) == MatchOperand_Success ||
- parseImmediate(Operands) == MatchOperand_Success)
+ if (parseRegister(Operands).isSuccess() ||
+ parseImmediate(Operands).isSuccess())
return false;
// Finally we have exhausted all options and must declare defeat.
- Error(getLoc(), "unknown operand");
- return true;
+ return Error(getLoc(), "unknown operand");
}
bool LoongArchAsmParser::ParseInstruction(ParseInstructionInfo &Info,
@@ -1193,7 +1185,8 @@ unsigned LoongArchAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
return Match_RequiresLAORdDifferRj;
break;
}
- case LoongArch::CSRXCHG: {
+ case LoongArch::CSRXCHG:
+ case LoongArch::GCSRXCHG: {
unsigned Rj = Inst.getOperand(2).getReg();
if (Rj == LoongArch::R0 || Rj == LoongArch::R1)
return Match_RequiresOpnd2NotR0R1;
@@ -1242,7 +1235,7 @@ LoongArchAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
bool LoongArchAsmParser::generateImmOutOfRangeError(
OperandVector &Operands, uint64_t ErrorInfo, int64_t Lower, int64_t Upper,
- Twine Msg = "immediate must be an integer in the range") {
+ const Twine &Msg = "immediate must be an integer in the range") {
SMLoc ErrorLoc = ((LoongArchOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]");
}
@@ -1319,6 +1312,9 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
"$rd must be different from both $rk and $rj");
case Match_RequiresLAORdDifferRj:
return Error(Operands[1]->getStartLoc(), "$rd must be different from $rj");
+ case Match_InvalidUImm1:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
+ /*Upper=*/(1 << 1) - 1);
case Match_InvalidUImm2:
return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
/*Upper=*/(1 << 2) - 1);
@@ -1328,12 +1324,21 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
case Match_InvalidUImm3:
return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
/*Upper=*/(1 << 3) - 1);
+ case Match_InvalidUImm4:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
+ /*Upper=*/(1 << 4) - 1);
case Match_InvalidUImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
/*Upper=*/(1 << 5) - 1);
case Match_InvalidUImm6:
return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
/*Upper=*/(1 << 6) - 1);
+ case Match_InvalidUImm7:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
+ /*Upper=*/(1 << 7) - 1);
+ case Match_InvalidUImm8:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
+ /*Upper=*/(1 << 8) - 1);
case Match_InvalidUImm12:
return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
/*Upper=*/(1 << 12) - 1);
@@ -1343,9 +1348,45 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
/*Upper=*/(1 << 12) - 1,
"operand must be a symbol with modifier (e.g. %abs_lo12) or an "
"integer in the range");
+ case Match_InvalidUImm14:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
+ /*Upper=*/(1 << 14) - 1);
case Match_InvalidUImm15:
return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/0,
/*Upper=*/(1 << 15) - 1);
+ case Match_InvalidSImm5:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 4),
+ /*Upper=*/(1 << 4) - 1);
+ case Match_InvalidSImm8:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 7),
+ /*Upper=*/(1 << 7) - 1);
+ case Match_InvalidSImm8lsl1:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, /*Lower=*/-(1 << 8), /*Upper=*/(1 << 8) - 2,
+ "immediate must be a multiple of 2 in the range");
+ case Match_InvalidSImm8lsl2:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, /*Lower=*/-(1 << 9), /*Upper=*/(1 << 9) - 4,
+ "immediate must be a multiple of 4 in the range");
+ case Match_InvalidSImm10:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 9),
+ /*Upper=*/(1 << 9) - 1);
+ case Match_InvalidSImm8lsl3:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, /*Lower=*/-(1 << 10), /*Upper=*/(1 << 10) - 8,
+ "immediate must be a multiple of 8 in the range");
+ case Match_InvalidSImm9lsl3:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, /*Lower=*/-(1 << 11), /*Upper=*/(1 << 11) - 8,
+ "immediate must be a multiple of 8 in the range");
+ case Match_InvalidSImm10lsl2:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, /*Lower=*/-(1 << 11), /*Upper=*/(1 << 11) - 4,
+ "immediate must be a multiple of 4 in the range");
+ case Match_InvalidSImm11lsl1:
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, /*Lower=*/-(1 << 11), /*Upper=*/(1 << 11) - 2,
+ "immediate must be a multiple of 2 in the range");
case Match_InvalidSImm12:
return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 11),
/*Upper=*/(1 << 11) - 1);
@@ -1361,6 +1402,9 @@ bool LoongArchAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
/*Upper=*/(1 << 11) - 1,
"operand must be a symbol with modifier (e.g. %pc64_hi12) or an "
"integer in the range");
+ case Match_InvalidSImm13:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, /*Lower=*/-(1 << 12),
+ /*Upper=*/(1 << 12) - 1);
case Match_InvalidSImm14lsl2:
return generateImmOutOfRangeError(
Operands, ErrorInfo, /*Lower=*/-(1 << 15), /*Upper=*/(1 << 15) - 4,
diff --git a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp
index 2335152e5ab1..8f61dfe7bb0e 100644
--- a/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp
+++ b/llvm/lib/Target/LoongArch/Disassembler/LoongArchDisassembler.cpp
@@ -100,6 +100,33 @@ static DecodeStatus DecodeFCSRRegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeLSX128RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 32)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(LoongArch::VR0 + RegNo));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLASX256RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 32)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(LoongArch::XR0 + RegNo));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeSCRRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo >= 4)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createReg(LoongArch::SCR0 + RegNo));
+ return MCDisassembler::Success;
+}
+
template <unsigned N, int P = 0>
static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
int64_t Address,
diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td
index 7e5c3563f39a..7241a5d63526 100644
--- a/llvm/lib/Target/LoongArch/LoongArch.td
+++ b/llvm/lib/Target/LoongArch/LoongArch.td
@@ -30,63 +30,45 @@ def IsLA32
"LA32 Basic Integer and Privilege Instruction Set">;
defvar LA32 = DefaultMode;
-def LA64 : HwMode<"+64bit">;
+def LA64 : HwMode<"+64bit", [IsLA64]>;
// Single Precision floating point
def FeatureBasicF
: SubtargetFeature<"f", "HasBasicF", "true",
"'F' (Single-Precision Floating-Point)">;
-def HasBasicF
- : Predicate<"Subtarget->hasBasicF()">,
- AssemblerPredicate<(all_of FeatureBasicF),
- "'F' (Single-Precision Floating-Point)">;
+def HasBasicF : Predicate<"Subtarget->hasBasicF()">;
// Double Precision floating point
def FeatureBasicD
: SubtargetFeature<"d", "HasBasicD", "true",
"'D' (Double-Precision Floating-Point)",
[FeatureBasicF]>;
-def HasBasicD
- : Predicate<"Subtarget->hasBasicD()">,
- AssemblerPredicate<(all_of FeatureBasicD),
- "'D' (Double-Precision Floating-Point)">;
+def HasBasicD : Predicate<"Subtarget->hasBasicD()">;
// Loongson SIMD eXtension (LSX)
def FeatureExtLSX
: SubtargetFeature<"lsx", "HasExtLSX", "true",
"'LSX' (Loongson SIMD Extension)", [FeatureBasicD]>;
-def HasExtLSX
- : Predicate<"Subtarget->hasExtLSX()">,
- AssemblerPredicate<(all_of FeatureExtLSX),
- "'LSX' (Loongson SIMD Extension)">;
+def HasExtLSX : Predicate<"Subtarget->hasExtLSX()">;
// Loongson Advanced SIMD eXtension (LASX)
def FeatureExtLASX
: SubtargetFeature<"lasx", "HasExtLASX", "true",
"'LASX' (Loongson Advanced SIMD Extension)",
[FeatureExtLSX]>;
-def HasExtLASX
- : Predicate<"Subtarget->hasExtLASX()">,
- AssemblerPredicate<(all_of FeatureExtLASX),
- "'LASX' (Loongson Advanced SIMD Extension)">;
+def HasExtLASX : Predicate<"Subtarget->hasExtLASX()">;
// Loongson VirtualiZation (LVZ)
def FeatureExtLVZ
: SubtargetFeature<"lvz", "HasExtLVZ", "true",
"'LVZ' (Loongson Virtualization Extension)">;
-def HasExtLVZ
- : Predicate<"Subtarget->hasExtLVZ()">,
- AssemblerPredicate<(all_of FeatureExtLVZ),
- "'LVZ' (Loongson Virtualization Extension)">;
+def HasExtLVZ : Predicate<"Subtarget->hasExtLVZ()">;
// Loongson Binary Translation (LBT)
def FeatureExtLBT
: SubtargetFeature<"lbt", "HasExtLBT", "true",
"'LBT' (Loongson Binary Translation Extension)">;
-def HasExtLBT
- : Predicate<"Subtarget->hasExtLBT()">,
- AssemblerPredicate<(all_of FeatureExtLBT),
- "'LBT' (Loongson Binary Translation Extension)">;
+def HasExtLBT : Predicate<"Subtarget->hasExtLBT()">;
// Expand la.global as la.pcrel
def LaGlobalWithPcrel
@@ -115,6 +97,11 @@ def HasLaLocalWithAbs
AssemblerPredicate<(all_of LaLocalWithAbs),
"Expand la.local as la.abs">;
+// Unaligned memory access
+def FeatureUAL
+ : SubtargetFeature<"ual", "HasUAL", "true",
+ "Allow memory accesses to be unaligned">;
+
//===----------------------------------------------------------------------===//
// Registers, instruction descriptions ...
//===----------------------------------------------------------------------===//
@@ -128,13 +115,14 @@ include "LoongArchInstrInfo.td"
//===----------------------------------------------------------------------===//
def : ProcessorModel<"generic-la32", NoSchedModel, [Feature32Bit]>;
-def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit]>;
+def : ProcessorModel<"generic-la64", NoSchedModel, [Feature64Bit, FeatureUAL]>;
// Support generic for compatibility with other targets. The triple will be used
// to change to the appropriate la32/la64 version.
def : ProcessorModel<"generic", NoSchedModel, []>;
def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit,
+ FeatureUAL,
FeatureExtLASX,
FeatureExtLVZ,
FeatureExtLBT]>;
@@ -144,9 +132,7 @@ def : ProcessorModel<"la464", NoSchedModel, [Feature64Bit,
//===----------------------------------------------------------------------===//
def LoongArchInstrInfo : InstrInfo {
- // guess mayLoad, mayStore, and hasSideEffects
- // This option is a temporary migration help. It will go away.
- let guessInstructionProperties = 1;
+ let guessInstructionProperties = 0;
}
def LoongArchAsmParser : AsmParser {
diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
index 6d9cb5e174d9..27979a830b10 100644
--- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
@@ -17,6 +17,8 @@
#include "MCTargetDesc/LoongArchInstPrinter.h"
#include "TargetInfo/LoongArchTargetInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/TargetRegistry.h"
using namespace llvm;
@@ -35,6 +37,18 @@ void LoongArchAsmPrinter::emitInstruction(const MachineInstr *MI) {
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
+ switch (MI->getOpcode()) {
+ case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
+ LowerPATCHABLE_FUNCTION_ENTER(*MI);
+ return;
+ case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
+ LowerPATCHABLE_FUNCTION_EXIT(*MI);
+ return;
+ case TargetOpcode::PATCHABLE_TAIL_CALL:
+ LowerPATCHABLE_TAIL_CALL(*MI);
+ return;
+ }
+
MCInst TmpInst;
if (!lowerLoongArchMachineInstrToMCInst(MI, TmpInst, *this))
EmitToStreamer(*OutStreamer, TmpInst);
@@ -61,6 +75,20 @@ bool LoongArchAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
return false;
}
break;
+ case 'w': // Print LSX registers.
+ if (MO.getReg().id() >= LoongArch::VR0 &&
+ MO.getReg().id() <= LoongArch::VR31)
+ break;
+ // The modifier is 'w' but the operand is not an LSX register; Report an
+ // unknown operand error.
+ return true;
+ case 'u': // Print LASX registers.
+ if (MO.getReg().id() >= LoongArch::XR0 &&
+ MO.getReg().id() <= LoongArch::XR31)
+ break;
+ // The modifier is 'u' but the operand is not an LASX register; Report an
+ // unknown operand error.
+ return true;
// TODO: handle other extra codes if any.
}
}
@@ -110,8 +138,59 @@ bool LoongArchAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
+void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(
+ const MachineInstr &MI) {
+ const Function &F = MF->getFunction();
+ if (F.hasFnAttribute("patchable-function-entry")) {
+ unsigned Num;
+ if (F.getFnAttribute("patchable-function-entry")
+ .getValueAsString()
+ .getAsInteger(10, Num))
+ return;
+ emitNops(Num);
+ return;
+ }
+
+ emitSled(MI, SledKind::FUNCTION_ENTER);
+}
+
+void LoongArchAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI) {
+ emitSled(MI, SledKind::FUNCTION_EXIT);
+}
+
+void LoongArchAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI) {
+ emitSled(MI, SledKind::TAIL_CALL);
+}
+
+void LoongArchAsmPrinter::emitSled(const MachineInstr &MI, SledKind Kind) {
+ // For loongarch64 we want to emit the following pattern:
+ //
+ // .Lxray_sled_beginN:
+ // B .Lxray_sled_endN
+ // 11 NOPs (44 bytes)
+ // .Lxray_sled_endN:
+ //
+ // We need the extra bytes because at runtime they may be used for the
+ // actual pattern defined at compiler-rt/lib/xray/xray_loongarch64.cpp.
+ // The count here should be adjusted accordingly if the implementation
+ // changes.
+ const int8_t NoopsInSledCount = 11;
+ OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo());
+ MCSymbol *BeginOfSled = OutContext.createTempSymbol("xray_sled_begin");
+ MCSymbol *EndOfSled = OutContext.createTempSymbol("xray_sled_end");
+ OutStreamer->emitLabel(BeginOfSled);
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(LoongArch::B)
+ .addExpr(MCSymbolRefExpr::create(EndOfSled, OutContext)));
+ emitNops(NoopsInSledCount);
+ OutStreamer->emitLabel(EndOfSled);
+ recordSled(BeginOfSled, MI, Kind, 2);
+}
+
bool LoongArchAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
AsmPrinter::runOnMachineFunction(MF);
+ // Emit the XRay table for this function.
+ emitXRayTable();
return true;
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
index 23e29354743e..693456443c7a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
+++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h
@@ -41,6 +41,11 @@ public:
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &OS) override;
+ void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
+ void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
+ void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
+ void emitSled(const MachineInstr &MI, SledKind Kind);
+
// tblgen'erated function.
bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
const MachineInstr *MI);
diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
index bad39dc3a14f..dd0b2cfde544 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
@@ -19,8 +19,11 @@
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -57,24 +60,39 @@ private:
MachineBasicBlock::iterator &NextMBBI,
unsigned FlagsHi, unsigned SecondOpcode,
unsigned FlagsLo);
+ bool expandLargeAddressLoad(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI,
+ unsigned LastOpcode, unsigned IdentifyingMO);
+ bool expandLargeAddressLoad(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI,
+ unsigned LastOpcode, unsigned IdentifyingMO,
+ const MachineOperand &Symbol, Register DestReg,
+ bool EraseFromParent);
bool expandLoadAddressPcrel(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI);
+ MachineBasicBlock::iterator &NextMBBI,
+ bool Large = false);
bool expandLoadAddressGot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI);
+ MachineBasicBlock::iterator &NextMBBI,
+ bool Large = false);
bool expandLoadAddressTLSLE(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool expandLoadAddressTLSIE(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI);
+ MachineBasicBlock::iterator &NextMBBI,
+ bool Large = false);
bool expandLoadAddressTLSLD(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI);
+ MachineBasicBlock::iterator &NextMBBI,
+ bool Large = false);
bool expandLoadAddressTLSGD(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI);
+ MachineBasicBlock::iterator &NextMBBI,
+ bool Large = false);
bool expandFunctionCALL(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI,
@@ -111,16 +129,26 @@ bool LoongArchPreRAExpandPseudo::expandMI(
switch (MBBI->getOpcode()) {
case LoongArch::PseudoLA_PCREL:
return expandLoadAddressPcrel(MBB, MBBI, NextMBBI);
+ case LoongArch::PseudoLA_PCREL_LARGE:
+ return expandLoadAddressPcrel(MBB, MBBI, NextMBBI, /*Large=*/true);
case LoongArch::PseudoLA_GOT:
return expandLoadAddressGot(MBB, MBBI, NextMBBI);
+ case LoongArch::PseudoLA_GOT_LARGE:
+ return expandLoadAddressGot(MBB, MBBI, NextMBBI, /*Large=*/true);
case LoongArch::PseudoLA_TLS_LE:
return expandLoadAddressTLSLE(MBB, MBBI, NextMBBI);
case LoongArch::PseudoLA_TLS_IE:
return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI);
+ case LoongArch::PseudoLA_TLS_IE_LARGE:
+ return expandLoadAddressTLSIE(MBB, MBBI, NextMBBI, /*Large=*/true);
case LoongArch::PseudoLA_TLS_LD:
return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI);
+ case LoongArch::PseudoLA_TLS_LD_LARGE:
+ return expandLoadAddressTLSLD(MBB, MBBI, NextMBBI, /*Large=*/true);
case LoongArch::PseudoLA_TLS_GD:
return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI);
+ case LoongArch::PseudoLA_TLS_GD_LARGE:
+ return expandLoadAddressTLSGD(MBB, MBBI, NextMBBI, /*Large=*/true);
case LoongArch::PseudoCALL:
return expandFunctionCALL(MBB, MBBI, NextMBBI, /*IsTailCall=*/false);
case LoongArch::PseudoTAIL:
@@ -157,9 +185,118 @@ bool LoongArchPreRAExpandPseudo::expandPcalau12iInstPair(
return true;
}
+bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode,
+ unsigned IdentifyingMO) {
+ MachineInstr &MI = *MBBI;
+ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LastOpcode, IdentifyingMO,
+ MI.getOperand(2), MI.getOperand(0).getReg(),
+ true);
+}
+
+bool LoongArchPreRAExpandPseudo::expandLargeAddressLoad(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI, unsigned LastOpcode,
+ unsigned IdentifyingMO, const MachineOperand &Symbol, Register DestReg,
+ bool EraseFromParent) {
+ // Code Sequence:
+ //
+ // Part1: pcalau12i $scratch, %MO1(sym)
+ // Part0: addi.d $dest, $zero, %MO0(sym)
+ // Part2: lu32i.d $dest, %MO2(sym)
+ // Part3: lu52i.d $dest, $dest, %MO3(sym)
+ // Fin: LastOpcode $dest, $dest, $scratch
+
+ unsigned MO0, MO1, MO2, MO3;
+ switch (IdentifyingMO) {
+ default:
+ llvm_unreachable("unsupported identifying MO");
+ case LoongArchII::MO_PCREL_LO:
+ MO0 = IdentifyingMO;
+ MO1 = LoongArchII::MO_PCREL_HI;
+ MO2 = LoongArchII::MO_PCREL64_LO;
+ MO3 = LoongArchII::MO_PCREL64_HI;
+ break;
+ case LoongArchII::MO_GOT_PC_HI:
+ case LoongArchII::MO_LD_PC_HI:
+ case LoongArchII::MO_GD_PC_HI:
+ // These cases relocate just like the GOT case, except for Part1.
+ MO0 = LoongArchII::MO_GOT_PC_LO;
+ MO1 = IdentifyingMO;
+ MO2 = LoongArchII::MO_GOT_PC64_LO;
+ MO3 = LoongArchII::MO_GOT_PC64_HI;
+ break;
+ case LoongArchII::MO_IE_PC_LO:
+ MO0 = IdentifyingMO;
+ MO1 = LoongArchII::MO_IE_PC_HI;
+ MO2 = LoongArchII::MO_IE_PC64_LO;
+ MO3 = LoongArchII::MO_IE_PC64_HI;
+ break;
+ }
+
+ MachineFunction *MF = MBB.getParent();
+ MachineInstr &MI = *MBBI;
+ DebugLoc DL = MI.getDebugLoc();
+
+ assert(MF->getSubtarget<LoongArchSubtarget>().is64Bit() &&
+ "Large code model requires LA64");
+
+ Register TmpPart1 =
+ MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
+ Register TmpPart0 =
+ DestReg.isVirtual()
+ ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass)
+ : DestReg;
+ Register TmpParts02 =
+ DestReg.isVirtual()
+ ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass)
+ : DestReg;
+ Register TmpParts023 =
+ DestReg.isVirtual()
+ ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass)
+ : DestReg;
+
+ auto Part1 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::PCALAU12I), TmpPart1);
+ auto Part0 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ADDI_D), TmpPart0)
+ .addReg(LoongArch::R0);
+ auto Part2 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), TmpParts02)
+ // "rj" is needed due to InstrInfo pattern requirement.
+ .addReg(TmpPart0, RegState::Kill);
+ auto Part3 = BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), TmpParts023)
+ .addReg(TmpParts02, RegState::Kill);
+ BuildMI(MBB, MBBI, DL, TII->get(LastOpcode), DestReg)
+ .addReg(TmpParts023)
+ .addReg(TmpPart1, RegState::Kill);
+
+ if (Symbol.getType() == MachineOperand::MO_ExternalSymbol) {
+ const char *SymName = Symbol.getSymbolName();
+ Part0.addExternalSymbol(SymName, MO0);
+ Part1.addExternalSymbol(SymName, MO1);
+ Part2.addExternalSymbol(SymName, MO2);
+ Part3.addExternalSymbol(SymName, MO3);
+ } else {
+ Part0.addDisp(Symbol, 0, MO0);
+ Part1.addDisp(Symbol, 0, MO1);
+ Part2.addDisp(Symbol, 0, MO2);
+ Part3.addDisp(Symbol, 0, MO3);
+ }
+
+ if (EraseFromParent)
+ MI.eraseFromParent();
+
+ return true;
+}
+
bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI) {
+ MachineBasicBlock::iterator &NextMBBI, bool Large) {
+ if (Large)
+ // Emit the 5-insn large address load sequence with the `%pc` family of
+ // relocs.
+ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D,
+ LoongArchII::MO_PCREL_LO);
+
// Code Sequence:
// pcalau12i $rd, %pc_hi20(sym)
// addi.w/d $rd, $rd, %pc_lo12(sym)
@@ -172,7 +309,13 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressPcrel(
bool LoongArchPreRAExpandPseudo::expandLoadAddressGot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI) {
+ MachineBasicBlock::iterator &NextMBBI, bool Large) {
+ if (Large)
+ // Emit the 5-insn large address load sequence with the `%got_pc` family
+ // of relocs, loading the result from GOT with `ldx.d` in the end.
+ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D,
+ LoongArchII::MO_GOT_PC_HI);
+
// Code Sequence:
// pcalau12i $rd, %got_pc_hi20(sym)
// ld.w/d $rd, $rd, %got_pc_lo12(sym)
@@ -189,29 +332,57 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLE(
// Code Sequence:
// lu12i.w $rd, %le_hi20(sym)
// ori $rd, $rd, %le_lo12(sym)
+ //
+ // And additionally if generating code using the large code model:
+ //
+ // lu32i.d $rd, %le64_lo20(sym)
+ // lu52i.d $rd, $rd, %le64_hi12(sym)
MachineFunction *MF = MBB.getParent();
MachineInstr &MI = *MBBI;
DebugLoc DL = MI.getDebugLoc();
+ bool Large = MF->getTarget().getCodeModel() == CodeModel::Large;
Register DestReg = MI.getOperand(0).getReg();
- Register ScratchReg =
+ Register Parts01 =
+ Large ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass)
+ : DestReg;
+ Register Part1 =
MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
MachineOperand &Symbol = MI.getOperand(1);
- BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU12I_W), ScratchReg)
+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU12I_W), Part1)
.addDisp(Symbol, 0, LoongArchII::MO_LE_HI);
- BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ORI), DestReg)
- .addReg(ScratchReg)
+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::ORI), Parts01)
+ .addReg(Part1, RegState::Kill)
.addDisp(Symbol, 0, LoongArchII::MO_LE_LO);
+ if (Large) {
+ Register Parts012 =
+ MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass);
+
+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU32I_D), Parts012)
+ // "rj" is needed due to InstrInfo pattern requirement.
+ .addReg(Parts01, RegState::Kill)
+ .addDisp(Symbol, 0, LoongArchII::MO_LE64_LO);
+ BuildMI(MBB, MBBI, DL, TII->get(LoongArch::LU52I_D), DestReg)
+ .addReg(Parts012, RegState::Kill)
+ .addDisp(Symbol, 0, LoongArchII::MO_LE64_HI);
+ }
+
MI.eraseFromParent();
return true;
}
bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI) {
+ MachineBasicBlock::iterator &NextMBBI, bool Large) {
+ if (Large)
+ // Emit the 5-insn large address load sequence with the `%ie_pc` family
+ // of relocs, loading the result with `ldx.d` in the end.
+ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::LDX_D,
+ LoongArchII::MO_IE_PC_LO);
+
// Code Sequence:
// pcalau12i $rd, %ie_pc_hi20(sym)
// ld.w/d $rd, $rd, %ie_pc_lo12(sym)
@@ -224,7 +395,13 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSIE(
bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI) {
+ MachineBasicBlock::iterator &NextMBBI, bool Large) {
+ if (Large)
+ // Emit the 5-insn large address load sequence with the `%got_pc` family
+ // of relocs, with the `pcalau12i` insn relocated with `%ld_pc_hi20`.
+ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D,
+ LoongArchII::MO_LD_PC_HI);
+
// Code Sequence:
// pcalau12i $rd, %ld_pc_hi20(sym)
// addi.w/d $rd, $rd, %got_pc_lo12(sym)
@@ -237,7 +414,13 @@ bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSLD(
bool LoongArchPreRAExpandPseudo::expandLoadAddressTLSGD(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI) {
+ MachineBasicBlock::iterator &NextMBBI, bool Large) {
+ if (Large)
+ // Emit the 5-insn large address load sequence with the `%got_pc` family
+ // of relocs, with the `pcalau12i` insn relocated with `%gd_pc_hi20`.
+ return expandLargeAddressLoad(MBB, MBBI, NextMBBI, LoongArch::ADD_D,
+ LoongArchII::MO_GD_PC_HI);
+
// Code Sequence:
// pcalau12i $rd, %gd_pc_hi20(sym)
// addi.w/d $rd, $rd, %got_pc_lo12(sym)
@@ -299,6 +482,25 @@ bool LoongArchPreRAExpandPseudo::expandFunctionCALL(
CALL.addGlobalAddress(GV, 0, LoongArchII::MO_PCREL_LO);
break;
}
+ case CodeModel::Large: {
+ // Emit the 5-insn large address load sequence, either directly or
+ // indirectly in case of going through the GOT, then JIRL_TAIL or
+ // JIRL_CALL to $addr.
+ Opcode =
+ IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL;
+ Register AddrReg =
+ IsTailCall
+ ? MF->getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass)
+ : LoongArch::R1;
+
+ bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal();
+ unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO;
+ unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D;
+ expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg,
+ false);
+ CALL = BuildMI(MBB, MBBI, DL, TII->get(Opcode)).addReg(AddrReg).addImm(0);
+ break;
+ }
}
// Transfer implicit operands.
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index 40e7665fb1f7..eb49ae329ebe 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -1,4 +1,4 @@
-//=-- LoongArchInstrInfoF.td - Single-Precision Float instr --*- tablegen -*-=//
+// LoongArchFloat32InstrInfo.td - Single-Precision Float instr --*- tablegen -*-
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -33,93 +33,91 @@ def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
let Predicates = [HasBasicF] in {
// Arithmetic Operation Instructions
-def FADD_S : FP_ALU_3R<0b00000001000000001, "fadd.s", FPR32>;
-def FSUB_S : FP_ALU_3R<0b00000001000000101, "fsub.s", FPR32>;
-def FMUL_S : FP_ALU_3R<0b00000001000001001, "fmul.s", FPR32>;
-def FDIV_S : FP_ALU_3R<0b00000001000001101, "fdiv.s", FPR32>;
-def FMADD_S : FP_ALU_4R<0b000010000001, "fmadd.s", FPR32>;
-def FMSUB_S : FP_ALU_4R<0b000010000101, "fmsub.s", FPR32>;
-def FNMADD_S : FP_ALU_4R<0b000010001001, "fnmadd.s", FPR32>;
-def FNMSUB_S : FP_ALU_4R<0b000010001101, "fnmsub.s", FPR32>;
-def FMAX_S : FP_ALU_3R<0b00000001000010001, "fmax.s", FPR32>;
-def FMIN_S : FP_ALU_3R<0b00000001000010101, "fmin.s", FPR32>;
-def FMAXA_S : FP_ALU_3R<0b00000001000011001, "fmaxa.s", FPR32>;
-def FMINA_S : FP_ALU_3R<0b00000001000011101, "fmina.s", FPR32>;
-def FABS_S : FP_ALU_2R<0b0000000100010100000001, "fabs.s", FPR32>;
-def FNEG_S : FP_ALU_2R<0b0000000100010100000101, "fneg.s", FPR32>;
-def FSQRT_S : FP_ALU_2R<0b0000000100010100010001, "fsqrt.s", FPR32>;
-def FRECIP_S : FP_ALU_2R<0b0000000100010100010101, "frecip.s", FPR32>;
-def FRSQRT_S : FP_ALU_2R<0b0000000100010100011001, "frsqrt.s", FPR32>;
-def FSCALEB_S : FP_ALU_3R<0b00000001000100001, "fscaleb.s", FPR32>;
-def FLOGB_S : FP_ALU_2R<0b0000000100010100001001, "flogb.s", FPR32>;
-def FCOPYSIGN_S : FP_ALU_3R<0b00000001000100101, "fcopysign.s", FPR32>;
-def FCLASS_S : FP_ALU_2R<0b0000000100010100001101, "fclass.s", FPR32>;
+def FADD_S : FP_ALU_3R<0x01008000>;
+def FSUB_S : FP_ALU_3R<0x01028000>;
+def FMUL_S : FP_ALU_3R<0x01048000>;
+def FDIV_S : FP_ALU_3R<0x01068000>;
+def FMADD_S : FP_ALU_4R<0x08100000>;
+def FMSUB_S : FP_ALU_4R<0x08500000>;
+def FNMADD_S : FP_ALU_4R<0x08900000>;
+def FNMSUB_S : FP_ALU_4R<0x08d00000>;
+def FMAX_S : FP_ALU_3R<0x01088000>;
+def FMIN_S : FP_ALU_3R<0x010a8000>;
+def FMAXA_S : FP_ALU_3R<0x010c8000>;
+def FMINA_S : FP_ALU_3R<0x010e8000>;
+def FABS_S : FP_ALU_2R<0x01140400>;
+def FNEG_S : FP_ALU_2R<0x01141400>;
+def FSQRT_S : FP_ALU_2R<0x01144400>;
+def FRECIP_S : FP_ALU_2R<0x01145400>;
+def FRSQRT_S : FP_ALU_2R<0x01146400>;
+def FSCALEB_S : FP_ALU_3R<0x01108000>;
+def FLOGB_S : FP_ALU_2R<0x01142400>;
+def FCOPYSIGN_S : FP_ALU_3R<0x01128000>;
+def FCLASS_S : FP_ALU_2R<0x01143400>;
// Comparison Instructions
-def FCMP_CAF_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CAF, "fcmp.caf.s", FPR32>;
-def FCMP_CUN_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CUN, "fcmp.cun.s", FPR32>;
-def FCMP_CEQ_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CEQ, "fcmp.ceq.s", FPR32>;
-def FCMP_CUEQ_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CUEQ, "fcmp.cueq.s", FPR32>;
-def FCMP_CLT_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CLT, "fcmp.clt.s", FPR32>;
-def FCMP_CULT_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CULT, "fcmp.cult.s", FPR32>;
-def FCMP_CLE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CLE, "fcmp.cle.s", FPR32>;
-def FCMP_CULE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CULE, "fcmp.cule.s", FPR32>;
-def FCMP_CNE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CNE, "fcmp.cne.s", FPR32>;
-def FCMP_COR_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_COR, "fcmp.cor.s", FPR32>;
-def FCMP_CUNE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_CUNE, "fcmp.cune.s", FPR32>;
-def FCMP_SAF_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SAF, "fcmp.saf.s", FPR32>;
-def FCMP_SUN_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SUN, "fcmp.sun.s", FPR32>;
-def FCMP_SEQ_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SEQ, "fcmp.seq.s", FPR32>;
-def FCMP_SUEQ_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SUEQ, "fcmp.sueq.s", FPR32>;
-def FCMP_SLT_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SLT, "fcmp.slt.s", FPR32>;
-def FCMP_SULT_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SULT, "fcmp.sult.s", FPR32>;
-def FCMP_SLE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SLE, "fcmp.sle.s", FPR32>;
-def FCMP_SULE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SULE, "fcmp.sule.s", FPR32>;
-def FCMP_SNE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SNE, "fcmp.sne.s", FPR32>;
-def FCMP_SOR_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SOR, "fcmp.sor.s", FPR32>;
-def FCMP_SUNE_S : FP_CMP<FPCMP_OPC_S, FPCMP_COND_SUNE, "fcmp.sune.s", FPR32>;
+def FCMP_CAF_S : FP_CMP<0x0c100000>;
+def FCMP_CUN_S : FP_CMP<0x0c140000>;
+def FCMP_CEQ_S : FP_CMP<0x0c120000>;
+def FCMP_CUEQ_S : FP_CMP<0x0c160000>;
+def FCMP_CLT_S : FP_CMP<0x0c110000>;
+def FCMP_CULT_S : FP_CMP<0x0c150000>;
+def FCMP_CLE_S : FP_CMP<0x0c130000>;
+def FCMP_CULE_S : FP_CMP<0x0c170000>;
+def FCMP_CNE_S : FP_CMP<0x0c180000>;
+def FCMP_COR_S : FP_CMP<0x0c1a0000>;
+def FCMP_CUNE_S : FP_CMP<0x0c1c0000>;
+def FCMP_SAF_S : FP_CMP<0x0c108000>;
+def FCMP_SUN_S : FP_CMP<0x0c148000>;
+def FCMP_SEQ_S : FP_CMP<0x0c128000>;
+def FCMP_SUEQ_S : FP_CMP<0x0c168000>;
+def FCMP_SLT_S : FP_CMP<0x0c118000>;
+def FCMP_SULT_S : FP_CMP<0x0c158000>;
+def FCMP_SLE_S : FP_CMP<0x0c138000>;
+def FCMP_SULE_S : FP_CMP<0x0c178000>;
+def FCMP_SNE_S : FP_CMP<0x0c188000>;
+def FCMP_SOR_S : FP_CMP<0x0c1a8000>;
+def FCMP_SUNE_S : FP_CMP<0x0c1c8000>;
// Conversion Instructions
-def FFINT_S_W : FP_CONV<0b0000000100011101000100, "ffint.s.w", FPR32, FPR32>;
-def FTINT_W_S : FP_CONV<0b0000000100011011000001, "ftint.w.s", FPR32, FPR32>;
-def FTINTRM_W_S : FP_CONV<0b0000000100011010000001, "ftintrm.w.s", FPR32,
- FPR32>;
-def FTINTRP_W_S : FP_CONV<0b0000000100011010010001, "ftintrp.w.s", FPR32,
- FPR32>;
-def FTINTRZ_W_S : FP_CONV<0b0000000100011010100001, "ftintrz.w.s", FPR32,
- FPR32>;
-def FTINTRNE_W_S : FP_CONV<0b0000000100011010110001, "ftintrne.w.s", FPR32,
- FPR32>;
-def FRINT_S : FP_CONV<0b0000000100011110010001, "frint.s", FPR32, FPR32>;
+def FFINT_S_W : FP_CONV<0x011d1000>;
+def FTINT_W_S : FP_CONV<0x011b0400>;
+def FTINTRM_W_S : FP_CONV<0x011a0400>;
+def FTINTRP_W_S : FP_CONV<0x011a4400>;
+def FTINTRZ_W_S : FP_CONV<0x011a8400>;
+def FTINTRNE_W_S : FP_CONV<0x011ac400>;
+def FRINT_S : FP_CONV<0x011e4400>;
// Move Instructions
-def FSEL_S : FP_SEL<0b00001101000000, "fsel", FPR32>;
-def FMOV_S : FP_MOV<0b0000000100010100100101, "fmov.s", FPR32, FPR32>;
-def MOVGR2FR_W : FP_MOV<0b0000000100010100101001, "movgr2fr.w", FPR32, GPR>;
-def MOVFR2GR_S : FP_MOV<0b0000000100010100101101, "movfr2gr.s", GPR, FPR32>;
-def MOVGR2FCSR : FP_MOV<0b0000000100010100110000, "movgr2fcsr", FCSR, GPR>;
-def MOVFCSR2GR : FP_MOV<0b0000000100010100110010, "movfcsr2gr", GPR, FCSR>;
-def MOVFR2CF_S : FP_MOV<0b0000000100010100110100, "movfr2cf", CFR, FPR32>;
-def MOVCF2FR_S : FP_MOV<0b0000000100010100110101, "movcf2fr", FPR32, CFR>;
-def MOVGR2CF : FP_MOV<0b0000000100010100110110, "movgr2cf", CFR, GPR>;
-def MOVCF2GR : FP_MOV<0b0000000100010100110111, "movcf2gr", GPR, CFR>;
+def FSEL_xS : FP_SEL<0x0d000000>;
+def FMOV_S : FP_MOV<0x01149400>;
+def MOVGR2FR_W : FP_MOV<0x0114a400, FPR32, GPR>;
+def MOVFR2GR_S : FP_MOV<0x0114b400, GPR, FPR32>;
+let hasSideEffects = 1 in {
+def MOVGR2FCSR : FP_MOV<0x0114c000, FCSR, GPR>;
+def MOVFCSR2GR : FP_MOV<0x0114c800, GPR, FCSR>;
+} // hasSideEffects = 1
+def MOVFR2CF_xS : FP_MOV<0x0114d000, CFR, FPR32>;
+def MOVCF2FR_xS : FP_MOV<0x0114d400, FPR32, CFR>;
+def MOVGR2CF : FP_MOV<0x0114d800, CFR, GPR>;
+def MOVCF2GR : FP_MOV<0x0114dc00, GPR, CFR>;
// Branch Instructions
-def BCEQZ : FP_BRANCH<0b01001000, "bceqz">;
-def BCNEZ : FP_BRANCH<0b01001001, "bcnez">;
+def BCEQZ : FP_BRANCH<0x48000000>;
+def BCNEZ : FP_BRANCH<0x48000100>;
// Common Memory Access Instructions
-def FLD_S : FP_LOAD_2RI12<0b0010101100, "fld.s", FPR32>;
-def FST_S : FP_STORE_2RI12<0b0010101101, "fst.s", FPR32>;
-def FLDX_S : FP_LOAD_3R<0b00111000001100000, "fldx.s", FPR32>;
-def FSTX_S : FP_STORE_3R<0b00111000001110000, "fstx.s", FPR32>;
+def FLD_S : FP_LOAD_2RI12<0x2b000000>;
+def FST_S : FP_STORE_2RI12<0x2b400000>;
+def FLDX_S : FP_LOAD_3R<0x38300000>;
+def FSTX_S : FP_STORE_3R<0x38380000>;
// Bound Check Memory Access Instructions
-def FLDGT_S : FP_LOAD_3R<0b00111000011101000, "fldgt.s", FPR32>;
-def FLDLE_S : FP_LOAD_3R<0b00111000011101010, "fldle.s", FPR32>;
-def FSTGT_S : FP_STORE_3R<0b00111000011101100, "fstgt.s", FPR32>;
-def FSTLE_S : FP_STORE_3R<0b00111000011101110, "fstle.s", FPR32>;
+def FLDGT_S : FP_LOAD_3R<0x38740000>;
+def FLDLE_S : FP_LOAD_3R<0x38750000>;
+def FSTGT_S : FP_STORE_3R<0x38760000>;
+def FSTLE_S : FP_STORE_3R<0x38770000>;
// Pseudo instructions for spill/reload CFRs.
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
@@ -223,7 +221,7 @@ def : PatStrictFsetccs<SETLT, FCMP_SLT_S, FPR32>;
/// Select
def : Pat<(select CFR:$cc, FPR32:$fk, FPR32:$fj),
- (FSEL_S FPR32:$fj, FPR32:$fk, CFR:$cc)>;
+ (FSEL_xS FPR32:$fj, FPR32:$fk, CFR:$cc)>;
/// Selectcc
@@ -231,16 +229,16 @@ class PatFPSelectcc<CondCode cc, LAInst CmpInst, LAInst SelInst,
RegisterClass RegTy>
: Pat<(select (GRLenVT (setcc RegTy:$a, RegTy:$b, cc)), RegTy:$t, RegTy:$f),
(SelInst RegTy:$f, RegTy:$t, (CmpInst RegTy:$a, RegTy:$b))>;
-def : PatFPSelectcc<SETOEQ, FCMP_CEQ_S, FSEL_S, FPR32>;
-def : PatFPSelectcc<SETOLT, FCMP_CLT_S, FSEL_S, FPR32>;
-def : PatFPSelectcc<SETOLE, FCMP_CLE_S, FSEL_S, FPR32>;
-def : PatFPSelectcc<SETONE, FCMP_CNE_S, FSEL_S, FPR32>;
-def : PatFPSelectcc<SETO, FCMP_COR_S, FSEL_S, FPR32>;
-def : PatFPSelectcc<SETUEQ, FCMP_CUEQ_S, FSEL_S, FPR32>;
-def : PatFPSelectcc<SETULT, FCMP_CULT_S, FSEL_S, FPR32>;
-def : PatFPSelectcc<SETULE, FCMP_CULE_S, FSEL_S, FPR32>;
-def : PatFPSelectcc<SETUNE, FCMP_CUNE_S, FSEL_S, FPR32>;
-def : PatFPSelectcc<SETUO, FCMP_CUN_S, FSEL_S, FPR32>;
+def : PatFPSelectcc<SETOEQ, FCMP_CEQ_S, FSEL_xS, FPR32>;
+def : PatFPSelectcc<SETOLT, FCMP_CLT_S, FSEL_xS, FPR32>;
+def : PatFPSelectcc<SETOLE, FCMP_CLE_S, FSEL_xS, FPR32>;
+def : PatFPSelectcc<SETONE, FCMP_CNE_S, FSEL_xS, FPR32>;
+def : PatFPSelectcc<SETO, FCMP_COR_S, FSEL_xS, FPR32>;
+def : PatFPSelectcc<SETUEQ, FCMP_CUEQ_S, FSEL_xS, FPR32>;
+def : PatFPSelectcc<SETULT, FCMP_CULT_S, FSEL_xS, FPR32>;
+def : PatFPSelectcc<SETULE, FCMP_CULE_S, FSEL_xS, FPR32>;
+def : PatFPSelectcc<SETUNE, FCMP_CUNE_S, FSEL_xS, FPR32>;
+def : PatFPSelectcc<SETUO, FCMP_CUN_S, FSEL_xS, FPR32>;
/// Loads
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index 50d7e9920ea9..5118474725b6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -1,4 +1,4 @@
-//=-- LoongArchInstrInfoD.td - Double-Precision Float instr -*- tablegen -*-==//
+// LoongArchFloat64InstrInfo.td - Double-Precision Float instr --*- tablegen -*-
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -17,123 +17,111 @@
let Predicates = [HasBasicD] in {
// Arithmetic Operation Instructions
-def FADD_D : FP_ALU_3R<0b00000001000000010, "fadd.d", FPR64>;
-def FSUB_D : FP_ALU_3R<0b00000001000000110, "fsub.d", FPR64>;
-def FMUL_D : FP_ALU_3R<0b00000001000001010, "fmul.d", FPR64>;
-def FDIV_D : FP_ALU_3R<0b00000001000001110, "fdiv.d", FPR64>;
-def FMADD_D : FP_ALU_4R<0b000010000010, "fmadd.d", FPR64>;
-def FMSUB_D : FP_ALU_4R<0b000010000110, "fmsub.d", FPR64>;
-def FNMADD_D : FP_ALU_4R<0b000010001010, "fnmadd.d", FPR64>;
-def FNMSUB_D : FP_ALU_4R<0b000010001110, "fnmsub.d", FPR64>;
-def FMAX_D : FP_ALU_3R<0b00000001000010010, "fmax.d", FPR64>;
-def FMIN_D : FP_ALU_3R<0b00000001000010110, "fmin.d", FPR64>;
-def FMAXA_D : FP_ALU_3R<0b00000001000011010, "fmaxa.d", FPR64>;
-def FMINA_D : FP_ALU_3R<0b00000001000011110, "fmina.d", FPR64>;
-def FABS_D : FP_ALU_2R<0b0000000100010100000010, "fabs.d", FPR64>;
-def FNEG_D : FP_ALU_2R<0b0000000100010100000110, "fneg.d", FPR64>;
-def FSQRT_D : FP_ALU_2R<0b0000000100010100010010, "fsqrt.d", FPR64>;
-def FRECIP_D : FP_ALU_2R<0b0000000100010100010110, "frecip.d", FPR64>;
-def FRSQRT_D : FP_ALU_2R<0b0000000100010100011010, "frsqrt.d", FPR64>;
-def FSCALEB_D : FP_ALU_3R<0b00000001000100010, "fscaleb.d", FPR64>;
-def FLOGB_D : FP_ALU_2R<0b0000000100010100001010, "flogb.d", FPR64>;
-def FCOPYSIGN_D : FP_ALU_3R<0b00000001000100110, "fcopysign.d", FPR64>;
-def FCLASS_D : FP_ALU_2R<0b0000000100010100001110, "fclass.d", FPR64>;
+def FADD_D : FP_ALU_3R<0x01010000, FPR64>;
+def FSUB_D : FP_ALU_3R<0x01030000, FPR64>;
+def FMUL_D : FP_ALU_3R<0x01050000, FPR64>;
+def FDIV_D : FP_ALU_3R<0x01070000, FPR64>;
+def FMADD_D : FP_ALU_4R<0x08200000, FPR64>;
+def FMSUB_D : FP_ALU_4R<0x08600000, FPR64>;
+def FNMADD_D : FP_ALU_4R<0x08a00000, FPR64>;
+def FNMSUB_D : FP_ALU_4R<0x08e00000, FPR64>;
+def FMAX_D : FP_ALU_3R<0x01090000, FPR64>;
+def FMIN_D : FP_ALU_3R<0x010b0000, FPR64>;
+def FMAXA_D : FP_ALU_3R<0x010d0000, FPR64>;
+def FMINA_D : FP_ALU_3R<0x010f0000, FPR64>;
+def FABS_D : FP_ALU_2R<0x01140800, FPR64>;
+def FNEG_D : FP_ALU_2R<0x01141800, FPR64>;
+def FSQRT_D : FP_ALU_2R<0x01144800, FPR64>;
+def FRECIP_D : FP_ALU_2R<0x01145800, FPR64>;
+def FRSQRT_D : FP_ALU_2R<0x01146800, FPR64>;
+def FSCALEB_D : FP_ALU_3R<0x01110000, FPR64>;
+def FLOGB_D : FP_ALU_2R<0x01142800, FPR64>;
+def FCOPYSIGN_D : FP_ALU_3R<0x01130000, FPR64>;
+def FCLASS_D : FP_ALU_2R<0x01143800, FPR64>;
// Comparison Instructions
-def FCMP_CAF_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CAF, "fcmp.caf.d", FPR64>;
-def FCMP_CUN_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CUN, "fcmp.cun.d", FPR64>;
-def FCMP_CEQ_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CEQ, "fcmp.ceq.d", FPR64>;
-def FCMP_CUEQ_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CUEQ, "fcmp.cueq.d", FPR64>;
-def FCMP_CLT_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CLT, "fcmp.clt.d", FPR64>;
-def FCMP_CULT_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CULT, "fcmp.cult.d", FPR64>;
-def FCMP_CLE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CLE, "fcmp.cle.d", FPR64>;
-def FCMP_CULE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CULE, "fcmp.cule.d", FPR64>;
-def FCMP_CNE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CNE, "fcmp.cne.d", FPR64>;
-def FCMP_COR_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_COR, "fcmp.cor.d", FPR64>;
-def FCMP_CUNE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_CUNE, "fcmp.cune.d", FPR64>;
-def FCMP_SAF_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SAF, "fcmp.saf.d", FPR64>;
-def FCMP_SUN_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SUN, "fcmp.sun.d", FPR64>;
-def FCMP_SEQ_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SEQ, "fcmp.seq.d", FPR64>;
-def FCMP_SUEQ_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SUEQ, "fcmp.sueq.d", FPR64>;
-def FCMP_SLT_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SLT, "fcmp.slt.d", FPR64>;
-def FCMP_SULT_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SULT, "fcmp.sult.d", FPR64>;
-def FCMP_SLE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SLE, "fcmp.sle.d", FPR64>;
-def FCMP_SULE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SULE, "fcmp.sule.d", FPR64>;
-def FCMP_SNE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SNE, "fcmp.sne.d", FPR64>;
-def FCMP_SOR_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SOR, "fcmp.sor.d", FPR64>;
-def FCMP_SUNE_D : FP_CMP<FPCMP_OPC_D, FPCMP_COND_SUNE, "fcmp.sune.d", FPR64>;
+def FCMP_CAF_D : FP_CMP<0x0c200000, FPR64>;
+def FCMP_CUN_D : FP_CMP<0x0c240000, FPR64>;
+def FCMP_CEQ_D : FP_CMP<0x0c220000, FPR64>;
+def FCMP_CUEQ_D : FP_CMP<0x0c260000, FPR64>;
+def FCMP_CLT_D : FP_CMP<0x0c210000, FPR64>;
+def FCMP_CULT_D : FP_CMP<0x0c250000, FPR64>;
+def FCMP_CLE_D : FP_CMP<0x0c230000, FPR64>;
+def FCMP_CULE_D : FP_CMP<0x0c270000, FPR64>;
+def FCMP_CNE_D : FP_CMP<0x0c280000, FPR64>;
+def FCMP_COR_D : FP_CMP<0x0c2a0000, FPR64>;
+def FCMP_CUNE_D : FP_CMP<0x0c2c0000, FPR64>;
+def FCMP_SAF_D : FP_CMP<0x0c208000, FPR64>;
+def FCMP_SUN_D : FP_CMP<0x0c248000, FPR64>;
+def FCMP_SEQ_D : FP_CMP<0x0c228000, FPR64>;
+def FCMP_SUEQ_D : FP_CMP<0x0c268000, FPR64>;
+def FCMP_SLT_D : FP_CMP<0x0c218000, FPR64>;
+def FCMP_SULT_D : FP_CMP<0x0c258000, FPR64>;
+def FCMP_SLE_D : FP_CMP<0x0c238000, FPR64>;
+def FCMP_SULE_D : FP_CMP<0x0c278000, FPR64>;
+def FCMP_SNE_D : FP_CMP<0x0c288000, FPR64>;
+def FCMP_SOR_D : FP_CMP<0x0c2a8000, FPR64>;
+def FCMP_SUNE_D : FP_CMP<0x0c2c8000, FPR64>;
// Conversion Instructions
-def FFINT_S_L : FP_CONV<0b0000000100011101000110, "ffint.s.l", FPR32, FPR64>;
-def FTINT_L_S : FP_CONV<0b0000000100011011001001, "ftint.l.s", FPR64, FPR32>;
-def FTINTRM_L_S : FP_CONV<0b0000000100011010001001, "ftintrm.l.s", FPR64,
- FPR32>;
-def FTINTRP_L_S : FP_CONV<0b0000000100011010011001, "ftintrp.l.s", FPR64,
- FPR32>;
-def FTINTRZ_L_S : FP_CONV<0b0000000100011010101001, "ftintrz.l.s", FPR64,
- FPR32>;
-def FTINTRNE_L_S : FP_CONV<0b0000000100011010111001, "ftintrne.l.s", FPR64,
- FPR32>;
-def FCVT_S_D : FP_CONV<0b0000000100011001000110, "fcvt.s.d", FPR32, FPR64>;
-def FCVT_D_S : FP_CONV<0b0000000100011001001001, "fcvt.d.s", FPR64, FPR32>;
-def FFINT_D_W : FP_CONV<0b0000000100011101001000, "ffint.d.w", FPR64, FPR32>;
-def FFINT_D_L : FP_CONV<0b0000000100011101001010, "ffint.d.l", FPR64, FPR64>;
-def FTINT_W_D : FP_CONV<0b0000000100011011000010, "ftint.w.d", FPR32, FPR64>;
-def FTINT_L_D : FP_CONV<0b0000000100011011001010, "ftint.l.d", FPR64, FPR64>;
-def FTINTRM_W_D : FP_CONV<0b0000000100011010000010, "ftintrm.w.d", FPR32,
- FPR64>;
-def FTINTRM_L_D : FP_CONV<0b0000000100011010001010, "ftintrm.l.d", FPR64,
- FPR64>;
-def FTINTRP_W_D : FP_CONV<0b0000000100011010010010, "ftintrp.w.d", FPR32,
- FPR64>;
-def FTINTRP_L_D : FP_CONV<0b0000000100011010011010, "ftintrp.l.d", FPR64,
- FPR64>;
-def FTINTRZ_W_D : FP_CONV<0b0000000100011010100010, "ftintrz.w.d", FPR32,
- FPR64>;
-def FTINTRZ_L_D : FP_CONV<0b0000000100011010101010, "ftintrz.l.d", FPR64,
- FPR64>;
-def FTINTRNE_W_D : FP_CONV<0b0000000100011010110010, "ftintrne.w.d", FPR32,
- FPR64>;
-def FTINTRNE_L_D : FP_CONV<0b0000000100011010111010, "ftintrne.l.d", FPR64,
- FPR64>;
-def FRINT_D : FP_CONV<0b0000000100011110010010, "frint.d", FPR64, FPR64>;
+def FFINT_S_L : FP_CONV<0x011d1800, FPR32, FPR64>;
+def FTINT_L_S : FP_CONV<0x011b2400, FPR64, FPR32>;
+def FTINTRM_L_S : FP_CONV<0x011a2400, FPR64, FPR32>;
+def FTINTRP_L_S : FP_CONV<0x011a6400, FPR64, FPR32>;
+def FTINTRZ_L_S : FP_CONV<0x011aa400, FPR64, FPR32>;
+def FTINTRNE_L_S : FP_CONV<0x011ae400, FPR64, FPR32>;
+def FCVT_S_D : FP_CONV<0x01191800, FPR32, FPR64>;
+def FCVT_D_S : FP_CONV<0x01192400, FPR64, FPR32>;
+def FFINT_D_W : FP_CONV<0x011d2000, FPR64, FPR32>;
+def FFINT_D_L : FP_CONV<0x011d2800, FPR64, FPR64>;
+def FTINT_W_D : FP_CONV<0x011b0800, FPR32, FPR64>;
+def FTINT_L_D : FP_CONV<0x011b2800, FPR64, FPR64>;
+def FTINTRM_W_D : FP_CONV<0x011a0800, FPR32, FPR64>;
+def FTINTRM_L_D : FP_CONV<0x011a2800, FPR64, FPR64>;
+def FTINTRP_W_D : FP_CONV<0x011a4800, FPR32, FPR64>;
+def FTINTRP_L_D : FP_CONV<0x011a6800, FPR64, FPR64>;
+def FTINTRZ_W_D : FP_CONV<0x011a8800, FPR32, FPR64>;
+def FTINTRZ_L_D : FP_CONV<0x011aa800, FPR64, FPR64>;
+def FTINTRNE_W_D : FP_CONV<0x011ac800, FPR32, FPR64>;
+def FTINTRNE_L_D : FP_CONV<0x011ae800, FPR64, FPR64>;
+def FRINT_D : FP_CONV<0x011e4800, FPR64, FPR64>;
// Move Instructions
-def FMOV_D : FP_MOV<0b0000000100010100100110, "fmov.d", FPR64, FPR64>;
-def MOVFRH2GR_S : FP_MOV<0b0000000100010100101111, "movfrh2gr.s", GPR, FPR64>;
+def FMOV_D : FP_MOV<0x01149800, FPR64, FPR64>;
+def MOVFRH2GR_S : FP_MOV<0x0114bc00, GPR, FPR64>;
let isCodeGenOnly = 1 in {
-def MOVFR2GR_S_64 : FP_MOV<0b0000000100010100101101, "movfr2gr.s", GPR, FPR64>;
-def FSEL_D : FP_SEL<0b00001101000000, "fsel", FPR64>;
+def MOVFR2GR_S_64 : FP_MOV<0x0114b400, GPR, FPR64>;
+def FSEL_xD : FP_SEL<0x0d000000, FPR64>;
} // isCodeGenOnly = 1
-let Constraints = "$dst = $out" in {
-def MOVGR2FRH_W : FPFmtMOV<0b0000000100010100101011, (outs FPR64:$out),
- (ins FPR64:$dst, GPR:$src), "movgr2frh.w",
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Constraints = "$dst = $out" in {
+def MOVGR2FRH_W : FPFmtMOV<0x0114ac00, (outs FPR64:$out),
+ (ins FPR64:$dst, GPR:$src),
"$dst, $src">;
-} // Constraints = "$dst = $out"
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0, Constraints = "$dst = $out"
// Common Memory Access Instructions
-def FLD_D : FP_LOAD_2RI12<0b0010101110, "fld.d", FPR64>;
-def FST_D : FP_STORE_2RI12<0b0010101111, "fst.d", FPR64>;
-def FLDX_D : FP_LOAD_3R<0b00111000001101000, "fldx.d", FPR64>;
-def FSTX_D : FP_STORE_3R<0b00111000001111000, "fstx.d", FPR64>;
+def FLD_D : FP_LOAD_2RI12<0x2b800000, FPR64>;
+def FST_D : FP_STORE_2RI12<0x2bc00000, FPR64>;
+def FLDX_D : FP_LOAD_3R<0x38340000, FPR64>;
+def FSTX_D : FP_STORE_3R<0x383c0000, FPR64>;
// Bound Check Memory Access Instructions
-def FLDGT_D : FP_LOAD_3R<0b00111000011101001, "fldgt.d", FPR64>;
-def FLDLE_D : FP_LOAD_3R<0b00111000011101011, "fldle.d", FPR64>;
-def FSTGT_D : FP_STORE_3R<0b00111000011101101, "fstgt.d", FPR64>;
-def FSTLE_D : FP_STORE_3R<0b00111000011101111, "fstle.d", FPR64>;
+def FLDGT_D : FP_LOAD_3R<0x38748000, FPR64>;
+def FLDLE_D : FP_LOAD_3R<0x38758000, FPR64>;
+def FSTGT_D : FP_STORE_3R<0x38768000, FPR64>;
+def FSTLE_D : FP_STORE_3R<0x38778000, FPR64>;
} // Predicates = [HasBasicD]
// Instructions only available on LA64
let Predicates = [HasBasicD, IsLA64] in {
-def MOVGR2FR_D : FP_MOV<0b0000000100010100101010, "movgr2fr.d", FPR64, GPR>;
-def MOVFR2GR_D : FP_MOV<0b0000000100010100101110, "movfr2gr.d", GPR, FPR64>;
+def MOVGR2FR_D : FP_MOV<0x0114a800, FPR64, GPR>;
+def MOVFR2GR_D : FP_MOV<0x0114b800, GPR, FPR64>;
} // Predicates = [HasBasicD, IsLA64]
// Instructions only available on LA32
let Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1 in {
-def MOVGR2FR_W_64 : FP_MOV<0b0000000100010100101001, "movgr2fr.w", FPR64, GPR>;
+def MOVGR2FR_W_64 : FP_MOV<0x0114a400, FPR64, GPR>;
} // Predicates = [HasBasicD, IsLA32], isCodeGenOnly = 1
//===----------------------------------------------------------------------===//
@@ -213,20 +201,20 @@ def : PatStrictFsetccs<SETLT, FCMP_SLT_D, FPR64>;
/// Select
def : Pat<(select CFR:$cc, FPR64:$fk, FPR64:$fj),
- (FSEL_D FPR64:$fj, FPR64:$fk, CFR:$cc)>;
+ (FSEL_xD FPR64:$fj, FPR64:$fk, CFR:$cc)>;
/// Selectcc
-def : PatFPSelectcc<SETOEQ, FCMP_CEQ_D, FSEL_D, FPR64>;
-def : PatFPSelectcc<SETOLT, FCMP_CLT_D, FSEL_D, FPR64>;
-def : PatFPSelectcc<SETOLE, FCMP_CLE_D, FSEL_D, FPR64>;
-def : PatFPSelectcc<SETONE, FCMP_CNE_D, FSEL_D, FPR64>;
-def : PatFPSelectcc<SETO, FCMP_COR_D, FSEL_D, FPR64>;
-def : PatFPSelectcc<SETUEQ, FCMP_CUEQ_D, FSEL_D, FPR64>;
-def : PatFPSelectcc<SETULT, FCMP_CULT_D, FSEL_D, FPR64>;
-def : PatFPSelectcc<SETULE, FCMP_CULE_D, FSEL_D, FPR64>;
-def : PatFPSelectcc<SETUNE, FCMP_CUNE_D, FSEL_D, FPR64>;
-def : PatFPSelectcc<SETUO, FCMP_CUN_D, FSEL_D, FPR64>;
+def : PatFPSelectcc<SETOEQ, FCMP_CEQ_D, FSEL_xD, FPR64>;
+def : PatFPSelectcc<SETOLT, FCMP_CLT_D, FSEL_xD, FPR64>;
+def : PatFPSelectcc<SETOLE, FCMP_CLE_D, FSEL_xD, FPR64>;
+def : PatFPSelectcc<SETONE, FCMP_CNE_D, FSEL_xD, FPR64>;
+def : PatFPSelectcc<SETO, FCMP_COR_D, FSEL_xD, FPR64>;
+def : PatFPSelectcc<SETUEQ, FCMP_CUEQ_D, FSEL_xD, FPR64>;
+def : PatFPSelectcc<SETULT, FCMP_CULT_D, FSEL_xD, FPR64>;
+def : PatFPSelectcc<SETULE, FCMP_CULE_D, FSEL_xD, FPR64>;
+def : PatFPSelectcc<SETUNE, FCMP_CUNE_D, FSEL_xD, FPR64>;
+def : PatFPSelectcc<SETUO, FCMP_CUN_D, FSEL_xD, FPR64>;
/// Loads
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
index d2ba1fdfffe4..f853fca5c8b6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloatInstrFormats.td
@@ -1,4 +1,4 @@
-//==- LoongArchInstrFormatsF.td - LoongArch FP Instr Formats -*- tablegen -*-=//
+// LoongArchFloatInstrFormats.td - LoongArch FP Instr Formats -*- tablegen -*-//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -16,29 +16,42 @@
//
//===----------------------------------------------------------------------===//
+// Some FP instructions are defined twice, for accepting FPR32 and FPR64, but
+// with the same mnemonic. Also some are codegen-only definitions that
+// nevertheless require a "normal" mnemonic.
+//
+// In order to accommodate these needs, the instruction defs have names
+// suffixed with `_x[SD]` or `_64`, that will get trimmed before the mnemonics
+// are derived.
+class deriveFPInsnMnemonic<string name> {
+ string ret = deriveInsnMnemonic<!subst("_64", "",
+ !subst("_xD", "",
+ !subst("_xS", "", name)))>.ret;
+}
+
// 2R-type
// <opcode | fj | fd>
-class FPFmt2R<bits<22> op, dag outs, dag ins, string opcstr, string opnstr,
+class FPFmt2R<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveFPInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<5> fj;
bits<5> fd;
- let Inst{31-10} = op;
+ let Inst{31-0} = op;
let Inst{9-5} = fj;
let Inst{4-0} = fd;
}
// 3R-type
// <opcode | fk | fj | fd>
-class FPFmt3R<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
+class FPFmt3R<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveFPInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<5> fk;
bits<5> fj;
bits<5> fd;
- let Inst{31-15} = op;
+ let Inst{31-0} = op;
let Inst{14-10} = fk;
let Inst{9-5} = fj;
let Inst{4-0} = fd;
@@ -46,15 +59,15 @@ class FPFmt3R<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
// 4R-type
// <opcode | fa | fk | fj | fd>
-class FPFmt4R<bits<12> op, dag outs, dag ins, string opcstr, string opnstr,
+class FPFmt4R<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveFPInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<5> fa;
bits<5> fk;
bits<5> fj;
bits<5> fd;
- let Inst{31-20} = op;
+ let Inst{31-0} = op;
let Inst{19-15} = fa;
let Inst{14-10} = fk;
let Inst{9-5} = fj;
@@ -63,62 +76,59 @@ class FPFmt4R<bits<12> op, dag outs, dag ins, string opcstr, string opnstr,
// 2RI12-type
// <opcode | I12 | rj | fd>
-class FPFmt2RI12<bits<10> op, dag outs, dag ins, string opcstr, string opnstr,
+class FPFmt2RI12<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveFPInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<12> imm12;
bits<5> rj;
bits<5> fd;
- let Inst{31-22} = op;
+ let Inst{31-0} = op;
let Inst{21-10} = imm12;
let Inst{9-5} = rj;
let Inst{4-0} = fd;
}
// FmtFCMP
-// <opcode | cond | fk | fj | 0b00 | cd>
-class FPFmtFCMP<bits<12> op, bits<5> cond, dag outs, dag ins, string opcstr,
- string opnstr, list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+// <opcode | fk | fj | cd>
+class FPFmtFCMP<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveFPInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<5> fk;
bits<5> fj;
bits<3> cd;
- let Inst{31-20} = op;
- let Inst{19-15} = cond;
+ let Inst{31-0} = op;
let Inst{14-10} = fk;
let Inst{9-5} = fj;
- let Inst{4-3} = 0b00;
let Inst{2-0} = cd;
}
// FPFmtBR
-// <opcode[7:2] | I21[15:0] | opcode[1:0] | cj | I21[20:16]>
-class FPFmtBR<bits<8> opcode, dag outs, dag ins, string opcstr,
- string opnstr, list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+// <opcode | I21[15:0] | cj | I21[20:16]>
+class FPFmtBR<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveFPInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<21> imm21;
bits<3> cj;
- let Inst{31-26} = opcode{7-2};
+ let Inst{31-0} = op;
let Inst{25-10} = imm21{15-0};
- let Inst{9-8} = opcode{1-0};
let Inst{7-5} = cj;
let Inst{4-0} = imm21{20-16};
}
// FmtFSEL
// <opcode | ca | fk | fj | fd>
-class FPFmtFSEL<bits<14> op, dag outs, dag ins, string opcstr, string opnstr,
+class FPFmtFSEL<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveFPInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<3> ca;
bits<5> fk;
bits<5> fj;
bits<5> fd;
- let Inst{31-18} = op;
+ let Inst{31-0} = op;
let Inst{17-15} = ca;
let Inst{14-10} = fk;
let Inst{9-5} = fj;
@@ -127,27 +137,27 @@ class FPFmtFSEL<bits<14> op, dag outs, dag ins, string opcstr, string opnstr,
// FPFmtMOV
// <opcode | src | dst>
-class FPFmtMOV<bits<22> op, dag outs, dag ins, string opcstr, string opnstr,
+class FPFmtMOV<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveFPInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<5> src;
bits<5> dst;
- let Inst{31-10} = op;
+ let Inst{31-0} = op;
let Inst{9-5} = src;
let Inst{4-0} = dst;
}
// FPFmtMEM
// <opcode | rk | rj | fd>
-class FPFmtMEM<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
+class FPFmtMEM<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveFPInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<5> rk;
bits<5> rj;
bits<5> fd;
- let Inst{31-15} = op;
+ let Inst{31-0} = op;
let Inst{14-10} = rk;
let Inst{9-5} = rj;
let Inst{4-0} = fd;
@@ -157,85 +167,54 @@ class FPFmtMEM<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
// Instruction class templates
//===----------------------------------------------------------------------===//
-class FP_ALU_2R<bits<22> op, string opstr, RegisterClass rc>
- : FPFmt2R<op, (outs rc:$fd), (ins rc:$fj), opstr, "$fd, $fj">;
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+class FP_ALU_2R<bits<32> op, RegisterClass rc = FPR32>
+ : FPFmt2R<op, (outs rc:$fd), (ins rc:$fj), "$fd, $fj">;
-class FP_ALU_3R<bits<17> op, string opstr, RegisterClass rc>
- : FPFmt3R<op, (outs rc:$fd), (ins rc:$fj, rc:$fk), opstr, "$fd, $fj, $fk">;
+class FP_ALU_3R<bits<32> op, RegisterClass rc = FPR32>
+ : FPFmt3R<op, (outs rc:$fd), (ins rc:$fj, rc:$fk), "$fd, $fj, $fk">;
-class FP_ALU_4R<bits<12> op, string opstr, RegisterClass rc>
- : FPFmt4R<op, (outs rc:$fd), (ins rc:$fj, rc:$fk, rc:$fa), opstr,
+class FP_ALU_4R<bits<32> op, RegisterClass rc = FPR32>
+ : FPFmt4R<op, (outs rc:$fd), (ins rc:$fj, rc:$fk, rc:$fa),
"$fd, $fj, $fk, $fa">;
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
-class FPCMPOpc<bits<12> value> {
- bits<12> val = value;
-}
-
-class FPCMPCond<bits<5> value> {
- bits<5> val = value;
-}
-
-class FP_CMP<FPCMPOpc op, FPCMPCond cond, string opstr, RegisterClass rc>
- : FPFmtFCMP<op.val, cond.val, (outs CFR:$cd), (ins rc:$fj, rc:$fk), opstr,
- "$cd, $fj, $fk">;
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+class FP_CMP<bits<32> op, RegisterClass rc = FPR32>
+ : FPFmtFCMP<op, (outs CFR:$cd), (ins rc:$fj, rc:$fk), "$cd, $fj, $fk">;
-class FP_CONV<bits<22> op, string opstr, RegisterClass rcd, RegisterClass rcs>
- : FPFmt2R<op, (outs rcd:$fd), (ins rcs:$fj), opstr, "$fd, $fj">;
+class FP_CONV<bits<32> op, RegisterClass rcd = FPR32, RegisterClass rcs = FPR32>
+ : FPFmt2R<op, (outs rcd:$fd), (ins rcs:$fj), "$fd, $fj">;
-class FP_MOV<bits<22> op, string opstr, RegisterClass rcd, RegisterClass rcs>
- : FPFmtMOV<op, (outs rcd:$dst), (ins rcs:$src), opstr, "$dst, $src">;
+class FP_MOV<bits<32> op, RegisterClass rcd = FPR32, RegisterClass rcs = FPR32>
+ : FPFmtMOV<op, (outs rcd:$dst), (ins rcs:$src), "$dst, $src">;
-class FP_SEL<bits<14> op, string opstr, RegisterClass rc>
- : FPFmtFSEL<op, (outs rc:$fd), (ins rc:$fj, rc:$fk, CFR:$ca), opstr,
+class FP_SEL<bits<32> op, RegisterClass rc = FPR32>
+ : FPFmtFSEL<op, (outs rc:$fd), (ins rc:$fj, rc:$fk, CFR:$ca),
"$fd, $fj, $fk, $ca">;
-class FP_BRANCH<bits<8> opcode, string opstr>
- : FPFmtBR<opcode, (outs), (ins CFR:$cj, simm21_lsl2:$imm21), opstr,
+class FP_BRANCH<bits<32> opcode>
+ : FPFmtBR<opcode, (outs), (ins CFR:$cj, simm21_lsl2:$imm21),
"$cj, $imm21"> {
let isBranch = 1;
let isTerminator = 1;
}
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
-let mayLoad = 1 in {
-class FP_LOAD_3R<bits<17> op, string opstr, RegisterClass rc>
- : FPFmtMEM<op, (outs rc:$fd), (ins GPR:$rj, GPR:$rk), opstr,
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+class FP_LOAD_3R<bits<32> op, RegisterClass rc = FPR32>
+ : FPFmtMEM<op, (outs rc:$fd), (ins GPR:$rj, GPR:$rk),
"$fd, $rj, $rk">;
-class FP_LOAD_2RI12<bits<10> op, string opstr, RegisterClass rc>
- : FPFmt2RI12<op, (outs rc:$fd), (ins GPR:$rj, simm12:$imm12), opstr,
+class FP_LOAD_2RI12<bits<32> op, RegisterClass rc = FPR32>
+ : FPFmt2RI12<op, (outs rc:$fd), (ins GPR:$rj, simm12:$imm12),
"$fd, $rj, $imm12">;
-} // mayLoad = 1
+} // hasSideEffects = 0, mayLoad = 1, mayStore = 0
-let mayStore = 1 in {
-class FP_STORE_3R<bits<17> op, string opstr, RegisterClass rc>
- : FPFmtMEM<op, (outs), (ins rc:$fd, GPR:$rj, GPR:$rk), opstr,
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+class FP_STORE_3R<bits<32> op, RegisterClass rc = FPR32>
+ : FPFmtMEM<op, (outs), (ins rc:$fd, GPR:$rj, GPR:$rk),
"$fd, $rj, $rk">;
-class FP_STORE_2RI12<bits<10> op, string opstr, RegisterClass rc>
- : FPFmt2RI12<op, (outs), (ins rc:$fd, GPR:$rj, simm12:$imm12), opstr,
+class FP_STORE_2RI12<bits<32> op, RegisterClass rc = FPR32>
+ : FPFmt2RI12<op, (outs), (ins rc:$fd, GPR:$rj, simm12:$imm12),
"$fd, $rj, $imm12">;
-} // mayStore = 1
-
-def FPCMP_OPC_S : FPCMPOpc<0b000011000001>;
-def FPCMP_OPC_D : FPCMPOpc<0b000011000010>;
-
-def FPCMP_COND_CAF : FPCMPCond<0x0>;
-def FPCMP_COND_CUN : FPCMPCond<0x8>;
-def FPCMP_COND_CEQ : FPCMPCond<0x4>;
-def FPCMP_COND_CUEQ : FPCMPCond<0xC>;
-def FPCMP_COND_CLT : FPCMPCond<0x2>;
-def FPCMP_COND_CULT : FPCMPCond<0xA>;
-def FPCMP_COND_CLE : FPCMPCond<0x6>;
-def FPCMP_COND_CULE : FPCMPCond<0xE>;
-def FPCMP_COND_CNE : FPCMPCond<0x10>;
-def FPCMP_COND_COR : FPCMPCond<0x14>;
-def FPCMP_COND_CUNE : FPCMPCond<0x18>;
-def FPCMP_COND_SAF : FPCMPCond<0x1>;
-def FPCMP_COND_SUN : FPCMPCond<0x9>;
-def FPCMP_COND_SEQ : FPCMPCond<0x5>;
-def FPCMP_COND_SUEQ : FPCMPCond<0xD>;
-def FPCMP_COND_SLT : FPCMPCond<0x3>;
-def FPCMP_COND_SULT : FPCMPCond<0xB>;
-def FPCMP_COND_SLE : FPCMPCond<0x7>;
-def FPCMP_COND_SULE : FPCMPCond<0xF>;
-def FPCMP_COND_SNE : FPCMPCond<0x11>;
-def FPCMP_COND_SOR : FPCMPCond<0x15>;
-def FPCMP_COND_SUNE : FPCMPCond<0x19>;
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 1
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
index 3bba2d658ec5..0d78e39b3828 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp
@@ -519,3 +519,12 @@ StackOffset LoongArchFrameLowering::getFrameIndexReference(
return Offset;
}
+
+bool LoongArchFrameLowering::enableShrinkWrapping(
+ const MachineFunction &MF) const {
+ // Keep the conventional code flow when not optimizing.
+ if (MF.getFunction().hasOptNone())
+ return false;
+
+ return true;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
index 414d671593d0..57d2565c32c0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.h
@@ -55,6 +55,8 @@ public:
uint64_t getFirstSPAdjustAmount(const MachineFunction &MF,
bool IsPrologue = false) const;
+ bool enableShrinkWrapping(const MachineFunction &MF) const override;
+
private:
void determineFrameLayout(MachineFunction &MF) const;
void adjustReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index 9fe7d94acc7e..ae7167cb5ce7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -138,6 +138,25 @@ bool LoongArchDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) {
return true;
}
+// Fold constant addresses.
+bool LoongArchDAGToDAGISel::SelectAddrConstant(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ SDLoc DL(Addr);
+ MVT VT = Addr.getSimpleValueType();
+
+ if (!isa<ConstantSDNode>(Addr))
+ return false;
+
+ // If the constant is a simm12, we can fold the whole constant and use R0 as
+ // the base.
+ int64_t CVal = cast<ConstantSDNode>(Addr)->getSExtValue();
+ if (!isInt<12>(CVal))
+ return false;
+ Base = CurDAG->getRegister(LoongArch::R0, VT);
+ Offset = CurDAG->getTargetConstant(SignExtend64<12>(CVal), DL, VT);
+ return true;
+}
+
bool LoongArchDAGToDAGISel::selectNonFIBaseAddr(SDValue Addr, SDValue &Base) {
// If this is FrameIndex, don't select it.
if (isa<FrameIndexSDNode>(Addr))
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index 230151b5340e..3099407aea3e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -42,6 +42,7 @@ public:
std::vector<SDValue> &OutOps) override;
bool SelectBaseAddr(SDValue Addr, SDValue &Base);
+ bool SelectAddrConstant(SDValue Addr, SDValue &Base, SDValue &Offset);
bool selectNonFIBaseAddr(SDValue Addr, SDValue &Base);
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index a4a82bdef5af..db5961fc501a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -20,11 +20,15 @@
#include "MCTargetDesc/LoongArchBaseInfo.h"
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicsLoongArch.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
@@ -34,10 +38,9 @@ using namespace llvm;
STATISTIC(NumTailCalls, "Number of tail calls");
-static cl::opt<bool> ZeroDivCheck(
- "loongarch-check-zero-division", cl::Hidden,
- cl::desc("Trap on integer division by zero."),
- cl::init(false));
+static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
+ cl::desc("Trap on integer division by zero."),
+ cl::init(false));
LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
const LoongArchSubtarget &STI)
@@ -50,6 +53,14 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
if (Subtarget.hasBasicD())
addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
+ if (Subtarget.hasExtLSX())
+ for (auto VT : {MVT::v4f32, MVT::v2f64, MVT::v16i8, MVT::v8i16, MVT::v4i32,
+ MVT::v2i64})
+ addRegisterClass(VT, &LoongArch::LSX128RegClass);
+ if (Subtarget.hasExtLASX())
+ for (auto VT : {MVT::v8f32, MVT::v4f64, MVT::v32i8, MVT::v16i16, MVT::v8i32,
+ MVT::v4i64})
+ addRegisterClass(VT, &LoongArch::LASX256RegClass);
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
MVT::i1, Promote);
@@ -184,7 +195,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
}
// Compute derived properties from the register classes.
- computeRegisterProperties(STI.getRegisterInfo());
+ computeRegisterProperties(Subtarget.getRegisterInfo());
setStackPointerRegisterToSaveRestore(LoongArch::R3);
@@ -195,8 +206,11 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setMinCmpXchgSizeInBits(32);
// Function alignments.
- const Align FunctionAlignment(4);
- setMinFunctionAlignment(FunctionAlignment);
+ setMinFunctionAlignment(Align(4));
+ // Set preferred alignments.
+ setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
+ setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
+ setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
@@ -469,16 +483,44 @@ SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
- // TODO: Check CodeModel.
- if (IsLocal)
- // This generates the pattern (PseudoLA_PCREL sym), which expands to
- // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
- return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr),
- 0);
- // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
- // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
- return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
+ switch (DAG.getTarget().getCodeModel()) {
+ default:
+ report_fatal_error("Unsupported code model");
+
+ case CodeModel::Large: {
+ assert(Subtarget.is64Bit() && "Large code model requires LA64");
+
+ // This is not actually used, but is necessary for successfully matching
+ // the PseudoLA_*_LARGE nodes.
+ SDValue Tmp = DAG.getConstant(0, DL, Ty);
+ if (IsLocal)
+ // This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
+ // eventually becomes the desired 5-insn code sequence.
+ return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
+ Tmp, Addr),
+ 0);
+
+ // This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that eventually
+ // becomes the desired 5-insn code sequence.
+ return SDValue(
+ DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
+ 0);
+ }
+
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ if (IsLocal)
+ // This generates the pattern (PseudoLA_PCREL sym), which expands to
+ // (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
+ return SDValue(
+ DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
+
+ // This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
+ // (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
+ return SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr),
+ 0);
+ }
}
SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
@@ -505,13 +547,19 @@ SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
SelectionDAG &DAG,
- unsigned Opc) const {
+ unsigned Opc,
+ bool Large) const {
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
MVT GRLenVT = Subtarget.getGRLenVT();
+ // This is not actually used, but is necessary for successfully matching the
+ // PseudoLA_*_LARGE nodes.
+ SDValue Tmp = DAG.getConstant(0, DL, Ty);
SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
- SDValue Offset = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
+ SDValue Offset = Large
+ ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
+ : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
// Add the thread pointer.
return DAG.getNode(ISD::ADD, DL, Ty, Offset,
@@ -520,14 +568,20 @@ SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
SelectionDAG &DAG,
- unsigned Opc) const {
+ unsigned Opc,
+ bool Large) const {
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
+ // This is not actually used, but is necessary for successfully matching the
+ // PseudoLA_*_LARGE nodes.
+ SDValue Tmp = DAG.getConstant(0, DL, Ty);
+
// Use a PC-relative addressing mode to access the dynamic GOT address.
SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
- SDValue Load = SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
+ SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
+ : SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
// Prepare argument list to generate call.
ArgListTy Args;
@@ -554,6 +608,9 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
CallingConv::GHC)
report_fatal_error("In GHC calling convention TLS is not supported");
+ bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
+ assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
+
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
assert(N->getOffset() == 0 && "unexpected offset in global node");
@@ -563,20 +620,31 @@ LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
// In this model, application code calls the dynamic linker function
// __tls_get_addr to locate TLS offsets into the dynamic thread vector at
// runtime.
- Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_GD);
+ Addr = getDynamicTLSAddr(N, DAG,
+ Large ? LoongArch::PseudoLA_TLS_GD_LARGE
+ : LoongArch::PseudoLA_TLS_GD,
+ Large);
break;
case TLSModel::LocalDynamic:
// Same as GeneralDynamic, except for assembly modifiers and relocation
// records.
- Addr = getDynamicTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LD);
+ Addr = getDynamicTLSAddr(N, DAG,
+ Large ? LoongArch::PseudoLA_TLS_LD_LARGE
+ : LoongArch::PseudoLA_TLS_LD,
+ Large);
break;
case TLSModel::InitialExec:
// This model uses the GOT to resolve TLS offsets.
- Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_IE);
+ Addr = getStaticTLSAddr(N, DAG,
+ Large ? LoongArch::PseudoLA_TLS_IE_LARGE
+ : LoongArch::PseudoLA_TLS_IE,
+ Large);
break;
case TLSModel::LocalExec:
// This model is used when static linking as the TLS offsets are resolved
// during program linking.
+ //
+ // This node doesn't need an extra argument for the large code model.
Addr = getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE);
break;
}
@@ -597,13 +665,12 @@ LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
}
}
-// Helper function that emits error message for intrinsics with chain.
+// Helper function that emits error message for intrinsics with chain and return
+// merge values of a UNDEF and the chain.
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
StringRef ErrorMsg,
SelectionDAG &DAG) {
-
- DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " +
- ErrorMsg);
+ DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
SDLoc(Op));
}
@@ -613,9 +680,11 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT GRLenVT = Subtarget.getGRLenVT();
- SDValue Op0 = Op.getOperand(0);
- std::string Name = Op->getOperationName(0);
- const StringRef ErrorMsgOOR = "out of range";
+ EVT VT = Op.getValueType();
+ SDValue Chain = Op.getOperand(0);
+ const StringRef ErrorMsgOOR = "argument out of range";
+ const StringRef ErrorMsgReqLA64 = "requires loongarch64";
+ const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
switch (Op.getConstantOperandVal(1)) {
default:
@@ -627,115 +696,76 @@ LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
case Intrinsic::loongarch_crcc_w_b_w:
case Intrinsic::loongarch_crcc_w_h_w:
case Intrinsic::loongarch_crcc_w_w_w:
- case Intrinsic::loongarch_crcc_w_d_w: {
- std::string Name = Op->getOperationName(0);
- DAG.getContext()->emitError(Name + " requires target: loongarch64");
- return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
- }
+ case Intrinsic::loongarch_crcc_w_d_w:
+ return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
case Intrinsic::loongarch_csrrd_w:
case Intrinsic::loongarch_csrrd_d: {
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- if (!isUInt<14>(Imm))
- return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
- return DAG.getMergeValues(
- {DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0,
- DAG.getConstant(Imm, DL, GRLenVT)),
- Op0},
- DL);
+ return !isUInt<14>(Imm)
+ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
+ : DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
+ {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
}
case Intrinsic::loongarch_csrwr_w:
case Intrinsic::loongarch_csrwr_d: {
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- if (!isUInt<14>(Imm))
- return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
- return DAG.getMergeValues(
- {DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0, Op.getOperand(2),
- DAG.getConstant(Imm, DL, GRLenVT)),
- Op0},
- DL);
+ return !isUInt<14>(Imm)
+ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
+ : DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
+ {Chain, Op.getOperand(2),
+ DAG.getConstant(Imm, DL, GRLenVT)});
}
case Intrinsic::loongarch_csrxchg_w:
case Intrinsic::loongarch_csrxchg_d: {
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
- if (!isUInt<14>(Imm))
- return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG);
- return DAG.getMergeValues(
- {DAG.getNode(LoongArchISD::CSRXCHG, DL, GRLenVT, Op0, Op.getOperand(2),
- Op.getOperand(3), DAG.getConstant(Imm, DL, GRLenVT)),
- Op0},
- DL);
+ return !isUInt<14>(Imm)
+ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
+ : DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
+ {Chain, Op.getOperand(2), Op.getOperand(3),
+ DAG.getConstant(Imm, DL, GRLenVT)});
}
case Intrinsic::loongarch_iocsrrd_d: {
- if (Subtarget.is64Bit())
- return DAG.getMergeValues(
- {DAG.getNode(
- LoongArchISD::IOCSRRD_D, DL, GRLenVT, Op0,
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))),
- Op0},
- DL);
- else {
- DAG.getContext()->emitError(
- "llvm.loongarch.crc.w.d.w requires target: loongarch64");
- return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
- }
+ return DAG.getNode(
+ LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
+ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
}
#define IOCSRRD_CASE(NAME, NODE) \
case Intrinsic::loongarch_##NAME: { \
- return DAG.getMergeValues( \
- {DAG.getNode(LoongArchISD::NODE, DL, GRLenVT, Op0, Op.getOperand(2)), \
- Op0}, \
- DL); \
+ return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
+ {Chain, Op.getOperand(2)}); \
}
IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
#undef IOCSRRD_CASE
case Intrinsic::loongarch_cpucfg: {
- return DAG.getMergeValues(
- {DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0, Op.getOperand(2)),
- Op0},
- DL);
+ return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
+ {Chain, Op.getOperand(2)});
}
case Intrinsic::loongarch_lddir_d: {
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- if (!isUInt<8>(Imm)) {
- DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) +
- "' out of range");
- return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op0}, DL);
- }
-
- return Op;
+ return !isUInt<8>(Imm)
+ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
+ : Op;
}
case Intrinsic::loongarch_movfcsr2gr: {
- if (!Subtarget.hasBasicF()) {
- DAG.getContext()->emitError(
- "llvm.loongarch.movfcsr2gr expects basic f target feature");
- return DAG.getMergeValues(
- {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op));
- }
+ if (!Subtarget.hasBasicF())
+ return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
- if (!isUInt<2>(Imm)) {
- DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) +
- "' " + ErrorMsgOOR);
- return DAG.getMergeValues(
- {DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)}, SDLoc(Op));
- }
- return DAG.getMergeValues(
- {DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, Op.getValueType(),
- DAG.getConstant(Imm, DL, GRLenVT)),
- Op.getOperand(0)},
- DL);
+ return !isUInt<2>(Imm)
+ ? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
+ : DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
+ {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
}
}
}
// Helper function that emits error message for intrinsics with void return
-// value.
+// value and return the chain.
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
SelectionDAG &DAG) {
- DAG.getContext()->emitError("argument to '" + Op->getOperationName(0) + "' " +
- ErrorMsg);
+ DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
return Op.getOperand(0);
}
@@ -743,10 +773,13 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT GRLenVT = Subtarget.getGRLenVT();
- SDValue Op0 = Op.getOperand(0);
+ SDValue Chain = Op.getOperand(0);
uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
SDValue Op2 = Op.getOperand(2);
- const StringRef ErrorMsgOOR = "out of range";
+ const StringRef ErrorMsgOOR = "argument out of range";
+ const StringRef ErrorMsgReqLA64 = "requires loongarch64";
+ const StringRef ErrorMsgReqLA32 = "requires loongarch32";
+ const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
switch (IntrinsicEnum) {
default:
@@ -754,122 +787,93 @@ SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
return SDValue();
case Intrinsic::loongarch_cacop_d:
case Intrinsic::loongarch_cacop_w: {
- if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit()) {
- DAG.getContext()->emitError(
- "llvm.loongarch.cacop.d requires target: loongarch64");
- return Op.getOperand(0);
- }
- if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit()) {
- DAG.getContext()->emitError(
- "llvm.loongarch.cacop.w requires target: loongarch32");
- return Op.getOperand(0);
- }
+ if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
+ return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
+ if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
+ return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
// call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
unsigned Imm1 = cast<ConstantSDNode>(Op2)->getZExtValue();
- if (!isUInt<5>(Imm1))
+ int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
+ if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
- SDValue Op4 = Op.getOperand(4);
- int Imm2 = cast<ConstantSDNode>(Op4)->getSExtValue();
- if (!isInt<12>(Imm2))
- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
-
return Op;
}
-
case Intrinsic::loongarch_dbar: {
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
- if (!isUInt<15>(Imm))
- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
-
- return DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Op0,
- DAG.getConstant(Imm, DL, GRLenVT));
+ return !isUInt<15>(Imm)
+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
+ : DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
+ DAG.getConstant(Imm, DL, GRLenVT));
}
case Intrinsic::loongarch_ibar: {
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
- if (!isUInt<15>(Imm))
- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
-
- return DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Op0,
- DAG.getConstant(Imm, DL, GRLenVT));
+ return !isUInt<15>(Imm)
+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
+ : DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
+ DAG.getConstant(Imm, DL, GRLenVT));
}
case Intrinsic::loongarch_break: {
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
- if (!isUInt<15>(Imm))
- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
-
- return DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Op0,
- DAG.getConstant(Imm, DL, GRLenVT));
+ return !isUInt<15>(Imm)
+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
+ : DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
+ DAG.getConstant(Imm, DL, GRLenVT));
}
case Intrinsic::loongarch_movgr2fcsr: {
- if (!Subtarget.hasBasicF()) {
- DAG.getContext()->emitError(
- "llvm.loongarch.movgr2fcsr expects basic f target feature");
- return Op0;
- }
+ if (!Subtarget.hasBasicF())
+ return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
- if (!isUInt<2>(Imm))
- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
-
- return DAG.getNode(
- LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Op0,
- DAG.getConstant(Imm, DL, GRLenVT),
- DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Op.getOperand(3)));
+ return !isUInt<2>(Imm)
+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
+ : DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
+ DAG.getConstant(Imm, DL, GRLenVT),
+ DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
+ Op.getOperand(3)));
}
case Intrinsic::loongarch_syscall: {
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
- if (!isUInt<15>(Imm))
- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
-
- return DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Op0,
- DAG.getConstant(Imm, DL, GRLenVT));
+ return !isUInt<15>(Imm)
+ ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
+ : DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
+ DAG.getConstant(Imm, DL, GRLenVT));
}
#define IOCSRWR_CASE(NAME, NODE) \
case Intrinsic::loongarch_##NAME: { \
SDValue Op3 = Op.getOperand(3); \
- if (Subtarget.is64Bit()) \
- return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, \
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)); \
- else \
- return DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Op0, Op2, Op3); \
+ return Subtarget.is64Bit() \
+ ? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
+ : DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
+ Op3); \
}
IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
#undef IOCSRWR_CASE
case Intrinsic::loongarch_iocsrwr_d: {
- if (Subtarget.is64Bit())
- return DAG.getNode(
- LoongArchISD::IOCSRWR_D, DL, MVT::Other, Op0, Op2,
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(3)));
- else {
- DAG.getContext()->emitError(
- "llvm.loongarch.iocsrwr.d requires target: loongarch64");
- return Op.getOperand(0);
- }
+ return !Subtarget.is64Bit()
+ ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
+ : DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
+ Op2,
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
+ Op.getOperand(3)));
}
#define ASRT_LE_GT_CASE(NAME) \
case Intrinsic::loongarch_##NAME: { \
- if (!Subtarget.is64Bit()) { \
- DAG.getContext()->emitError(Op->getOperationName(0) + \
- " requires target: loongarch64"); \
- return Op.getOperand(0); \
- } \
- return Op; \
+ return !Subtarget.is64Bit() \
+ ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
+ : Op; \
}
ASRT_LE_GT_CASE(asrtle_d)
ASRT_LE_GT_CASE(asrtgt_d)
#undef ASRT_LE_GT_CASE
case Intrinsic::loongarch_ldpte_d: {
unsigned Imm = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
- if (!isUInt<8>(Imm))
- return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
- if (!Subtarget.is64Bit()) {
- DAG.getContext()->emitError(Op->getOperationName(0) +
- " requires target: loongarch64");
- return Op.getOperand(0);
- }
- return Op;
+ return !Subtarget.is64Bit()
+ ? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
+ : !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
+ : Op;
}
}
}
@@ -1022,6 +1026,16 @@ static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
}
+// Helper function that emits error message for intrinsics with chain and return
+// a UNDEF and the chain as the results.
+static void emitErrorAndReplaceIntrinsicWithChainResults(
+ SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
+ StringRef ErrorMsg) {
+ DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
+ Results.push_back(DAG.getUNDEF(N->getValueType(0)));
+ Results.push_back(N->getOperand(0));
+}
+
void LoongArchTargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
SDLoc DL(N);
@@ -1142,50 +1156,44 @@ void LoongArchTargetLowering::ReplaceNodeResults(
break;
}
case ISD::INTRINSIC_W_CHAIN: {
- SDValue Op0 = N->getOperand(0);
- EVT VT = N->getValueType(0);
- uint64_t Op1 = N->getConstantOperandVal(1);
+ SDValue Chain = N->getOperand(0);
+ SDValue Op2 = N->getOperand(2);
MVT GRLenVT = Subtarget.getGRLenVT();
- if (Op1 == Intrinsic::loongarch_movfcsr2gr) {
+ const StringRef ErrorMsgOOR = "argument out of range";
+ const StringRef ErrorMsgReqLA64 = "requires loongarch64";
+ const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
+
+ switch (N->getConstantOperandVal(1)) {
+ default:
+ llvm_unreachable("Unexpected Intrinsic.");
+ case Intrinsic::loongarch_movfcsr2gr: {
if (!Subtarget.hasBasicF()) {
- DAG.getContext()->emitError(
- "llvm.loongarch.movfcsr2gr expects basic f target feature");
- Results.push_back(DAG.getMergeValues(
- {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N)));
- Results.push_back(N->getOperand(0));
+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
+ ErrorMsgReqF);
return;
}
- unsigned Imm = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+ unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
if (!isUInt<2>(Imm)) {
- DAG.getContext()->emitError("argument to '" + N->getOperationName(0) +
- "' " + "out of range");
- Results.push_back(DAG.getMergeValues(
- {DAG.getUNDEF(N->getValueType(0)), N->getOperand(0)}, SDLoc(N)));
- Results.push_back(N->getOperand(0));
+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
+ ErrorMsgOOR);
return;
}
+ SDValue MOVFCSR2GRResults = DAG.getNode(
+ LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
+ {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
Results.push_back(
- DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(LoongArchISD::MOVFCSR2GR, SDLoc(N), MVT::i64,
- DAG.getConstant(Imm, DL, GRLenVT))));
- Results.push_back(N->getOperand(0));
- return;
+ DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
+ Results.push_back(MOVFCSR2GRResults.getValue(1));
+ break;
}
- SDValue Op2 = N->getOperand(2);
- std::string Name = N->getOperationName(0);
-
- switch (Op1) {
- default:
- llvm_unreachable("Unexpected Intrinsic.");
#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
case Intrinsic::loongarch_##NAME: { \
- Results.push_back(DAG.getNode( \
- ISD::TRUNCATE, DL, VT, \
- DAG.getNode( \
- LoongArchISD::NODE, DL, MVT::i64, \
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))))); \
- Results.push_back(N->getOperand(0)); \
+ SDValue NODE = DAG.getNode( \
+ LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
+ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
+ Results.push_back(NODE.getValue(1)); \
break; \
}
CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
@@ -1198,12 +1206,12 @@ void LoongArchTargetLowering::ReplaceNodeResults(
#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
case Intrinsic::loongarch_##NAME: { \
- Results.push_back( \
- DAG.getNode(ISD::TRUNCATE, DL, VT, \
- DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op2, \
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, \
- N->getOperand(3))))); \
- Results.push_back(N->getOperand(0)); \
+ SDValue NODE = DAG.getNode( \
+ LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
+ {Chain, Op2, \
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
+ Results.push_back(NODE.getValue(1)); \
break; \
}
CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
@@ -1211,11 +1219,9 @@ void LoongArchTargetLowering::ReplaceNodeResults(
#undef CRC_CASE_EXT_UNARYOP
#define CSR_CASE(ID) \
case Intrinsic::loongarch_##ID: { \
- if (!Subtarget.is64Bit()) { \
- DAG.getContext()->emitError(Name + " requires target: loongarch64"); \
- Results.push_back(DAG.getUNDEF(VT)); \
- Results.push_back(N->getOperand(0)); \
- } \
+ if (!Subtarget.is64Bit()) \
+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG, \
+ ErrorMsgReqLA64); \
break; \
}
CSR_CASE(csrrd_d);
@@ -1226,62 +1232,59 @@ void LoongArchTargetLowering::ReplaceNodeResults(
case Intrinsic::loongarch_csrrd_w: {
unsigned Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
if (!isUInt<14>(Imm)) {
- DAG.getContext()->emitError("argument to '" + Name + "' out of range");
- Results.push_back(DAG.getUNDEF(VT));
- Results.push_back(N->getOperand(0));
- break;
+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
+ ErrorMsgOOR);
+ return;
}
-
+ SDValue CSRRDResults =
+ DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
+ {Chain, DAG.getConstant(Imm, DL, GRLenVT)});
Results.push_back(
- DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(LoongArchISD::CSRRD, DL, GRLenVT, Op0,
- DAG.getConstant(Imm, DL, GRLenVT))));
- Results.push_back(N->getOperand(0));
+ DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
+ Results.push_back(CSRRDResults.getValue(1));
break;
}
case Intrinsic::loongarch_csrwr_w: {
unsigned Imm = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
if (!isUInt<14>(Imm)) {
- DAG.getContext()->emitError("argument to '" + Name + "' out of range");
- Results.push_back(DAG.getUNDEF(VT));
- Results.push_back(N->getOperand(0));
- break;
+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
+ ErrorMsgOOR);
+ return;
}
-
- Results.push_back(DAG.getNode(
- ISD::TRUNCATE, DL, VT,
- DAG.getNode(LoongArchISD::CSRWR, DL, GRLenVT, Op0,
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
- DAG.getConstant(Imm, DL, GRLenVT))));
- Results.push_back(N->getOperand(0));
+ SDValue CSRWRResults =
+ DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
+ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
+ DAG.getConstant(Imm, DL, GRLenVT)});
+ Results.push_back(
+ DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
+ Results.push_back(CSRWRResults.getValue(1));
break;
}
case Intrinsic::loongarch_csrxchg_w: {
unsigned Imm = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue();
if (!isUInt<14>(Imm)) {
- DAG.getContext()->emitError("argument to '" + Name + "' out of range");
- Results.push_back(DAG.getUNDEF(VT));
- Results.push_back(N->getOperand(0));
- break;
+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
+ ErrorMsgOOR);
+ return;
}
-
- Results.push_back(DAG.getNode(
- ISD::TRUNCATE, DL, VT,
- DAG.getNode(
- LoongArchISD::CSRXCHG, DL, GRLenVT, Op0,
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
- DAG.getConstant(Imm, DL, GRLenVT))));
- Results.push_back(N->getOperand(0));
+ SDValue CSRXCHGResults = DAG.getNode(
+ LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
+ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
+ DAG.getConstant(Imm, DL, GRLenVT)});
+ Results.push_back(
+ DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
+ Results.push_back(CSRXCHGResults.getValue(1));
break;
}
#define IOCSRRD_CASE(NAME, NODE) \
case Intrinsic::loongarch_##NAME: { \
- Results.push_back(DAG.getNode( \
- ISD::TRUNCATE, DL, N->getValueType(0), \
- DAG.getNode(LoongArchISD::NODE, DL, MVT::i64, Op0, \
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)))); \
- Results.push_back(N->getOperand(0)); \
+ SDValue IOCSRRDResults = \
+ DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
+ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
+ Results.push_back( \
+ DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
+ Results.push_back(IOCSRRDResults.getValue(1)); \
break; \
}
IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
@@ -1289,20 +1292,19 @@ void LoongArchTargetLowering::ReplaceNodeResults(
IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
#undef IOCSRRD_CASE
case Intrinsic::loongarch_cpucfg: {
- Results.push_back(DAG.getNode(
- ISD::TRUNCATE, DL, VT,
- DAG.getNode(LoongArchISD::CPUCFG, DL, GRLenVT, Op0,
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2))));
- Results.push_back(Op0);
+ SDValue CPUCFGResults =
+ DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
+ {Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
+ Results.push_back(
+ DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
+ Results.push_back(CPUCFGResults.getValue(1));
break;
}
case Intrinsic::loongarch_lddir_d: {
if (!Subtarget.is64Bit()) {
- DAG.getContext()->emitError(N->getOperationName(0) +
- " requires target: loongarch64");
- Results.push_back(DAG.getUNDEF(VT));
- Results.push_back(Op0);
- break;
+ emitErrorAndReplaceIntrinsicWithChainResults(N, Results, DAG,
+ ErrorMsgReqLA64);
+ return;
}
break;
}
@@ -1373,16 +1375,39 @@ static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
if (CN->getZExtValue() <= 0xfff)
return SDValue();
- // Return if the mask doesn't start at position 0.
- if (SMIdx)
+ // Return if the MSB exceeds.
+ if (SMIdx + SMLen > ValTy.getSizeInBits())
return SDValue();
- lsb = 0;
+ if (SMIdx > 0) {
+ // Omit if the constant has more than 2 uses. This a conservative
+ // decision. Whether it is a win depends on the HW microarchitecture.
+ // However it should always be better for 1 and 2 uses.
+ if (CN->use_size() > 2)
+ return SDValue();
+ // Return if the constant can be composed by a single LU12I.W.
+ if ((CN->getZExtValue() & 0xfff) == 0)
+ return SDValue();
+ // Return if the constand can be composed by a single ADDI with
+ // the zero register.
+ if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
+ return SDValue();
+ }
+
+ lsb = SMIdx;
NewOperand = FirstOperand;
}
+
msb = lsb + SMLen - 1;
- return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
- DAG.getConstant(msb, DL, GRLenVT),
+ SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
+ DAG.getConstant(msb, DL, GRLenVT),
+ DAG.getConstant(lsb, DL, GRLenVT));
+ if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
+ return NR0;
+ // Try to optimize to
+ // bstrpick $Rd, $Rs, msb, lsb
+ // slli $Rd, $Rd, lsb
+ return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
DAG.getConstant(lsb, DL, GRLenVT));
}
@@ -1764,6 +1789,18 @@ MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
}
}
+bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
+ unsigned *Fast) const {
+ if (!Subtarget.hasUAL())
+ return false;
+
+ // TODO: set reasonable speed number.
+ if (Fast)
+ *Fast = 1;
+ return true;
+}
+
const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((LoongArchISD::NodeType)Opcode) {
case LoongArchISD::FIRST_NUMBER:
@@ -1907,7 +1944,6 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
default:
llvm_unreachable("Unexpected ABI");
case LoongArchABI::ABI_ILP32S:
- case LoongArchABI::ABI_LP64S:
case LoongArchABI::ABI_ILP32F:
case LoongArchABI::ABI_LP64F:
report_fatal_error("Unimplemented ABI");
@@ -1916,6 +1952,8 @@ static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
case LoongArchABI::ABI_LP64D:
UseGPRForFloat = !IsFixed;
break;
+ case LoongArchABI::ABI_LP64S:
+ break;
}
// FPR32 and FPR64 alias each other.
@@ -2048,8 +2086,8 @@ void LoongArchTargetLowering::analyzeInputArgs(
MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
- LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString() << '\n');
+ LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
+ << '\n');
llvm_unreachable("");
}
}
@@ -2066,8 +2104,8 @@ void LoongArchTargetLowering::analyzeOutputArgs(
MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
- LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString() << "\n");
+ LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
+ << "\n");
llvm_unreachable("");
}
}
@@ -2155,14 +2193,15 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
}
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
- CCValAssign::LocInfo LocInfo,
- ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
if (LocVT == MVT::i32 || LocVT == MVT::i64) {
// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
// s0 s1 s2 s3 s4 s5 s6 s7 s8
static const MCPhysReg GPRList[] = {
- LoongArch::R23, LoongArch::R24, LoongArch::R25, LoongArch::R26, LoongArch::R27,
- LoongArch::R28, LoongArch::R29, LoongArch::R30, LoongArch::R31};
+ LoongArch::R23, LoongArch::R24, LoongArch::R25,
+ LoongArch::R26, LoongArch::R27, LoongArch::R28,
+ LoongArch::R29, LoongArch::R30, LoongArch::R31};
if (unsigned Reg = State.AllocateReg(GPRList)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
@@ -2210,10 +2249,10 @@ SDValue LoongArchTargetLowering::LowerFormalArguments(
case CallingConv::Fast:
break;
case CallingConv::GHC:
- if (!MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicF] ||
- !MF.getSubtarget().getFeatureBits()[LoongArch::FeatureBasicD])
+ if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
+ !MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
report_fatal_error(
- "GHC calling convention requires the F and D extensions");
+ "GHC calling convention requires the F and D extensions");
}
EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -2276,7 +2315,7 @@ SDValue LoongArchTargetLowering::LowerFormalArguments(
// If all registers are allocated, then all varargs must be passed on the
// stack and we don't need to save any argregs.
if (ArgRegs.size() == Idx) {
- VaArgOffset = CCInfo.getNextStackOffset();
+ VaArgOffset = CCInfo.getStackSize();
VarArgsSaveSize = 0;
} else {
VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
@@ -2330,6 +2369,39 @@ bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return CI->isTailCall();
}
+// Check if the return value is used as only a return value, as otherwise
+// we can't perform a tail-call.
+bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
+ SDValue &Chain) const {
+ if (N->getNumValues() != 1)
+ return false;
+ if (!N->hasNUsesOfValue(1, 0))
+ return false;
+
+ SDNode *Copy = *N->use_begin();
+ if (Copy->getOpcode() != ISD::CopyToReg)
+ return false;
+
+ // If the ISD::CopyToReg has a glue operand, we conservatively assume it
+ // isn't safe to perform a tail call.
+ if (Copy->getGluedNode())
+ return false;
+
+ // The copy must be used by a LoongArchISD::RET, and nothing else.
+ bool HasRet = false;
+ for (SDNode *Node : Copy->uses()) {
+ if (Node->getOpcode() != LoongArchISD::RET)
+ return false;
+ HasRet = true;
+ }
+
+ if (!HasRet)
+ return false;
+
+ Chain = Copy->getOperand(0);
+ return true;
+}
+
// Check whether the call is eligible for tail call optimization.
bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
@@ -2341,7 +2413,7 @@ bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
auto CallerCC = Caller.getCallingConv();
// Do not tail call opt if the stack is used to pass parameters.
- if (CCInfo.getNextStackOffset() != 0)
+ if (CCInfo.getStackSize() != 0)
return false;
// Do not tail call opt if any parameters need to be passed indirectly.
@@ -2417,7 +2489,7 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
"site marked musttail");
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+ unsigned NumBytes = ArgCCInfo.getStackSize();
// Create local copies for byval args.
SmallVector<SDValue> ByValArgs;
@@ -2583,7 +2655,9 @@ LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (IsTailCall) {
MF.getFrameInfo().setHasTailCall();
- return DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops);
+ SDValue Ret = DAG.getNode(LoongArchISD::TAIL, DL, NodeTys, Ops);
+ DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
+ return Ret;
}
Chain = DAG.getNode(LoongArchISD::CALL, DL, NodeTys, Ops);
@@ -2982,6 +3056,12 @@ LoongArchTargetLowering::getRegForInlineAsmConstraint(
return std::make_pair(0U, &LoongArch::FPR32RegClass);
if (Subtarget.hasBasicD() && VT == MVT::f64)
return std::make_pair(0U, &LoongArch::FPR64RegClass);
+ if (Subtarget.hasExtLSX() &&
+ TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
+ return std::make_pair(0U, &LoongArch::LSX128RegClass);
+ if (Subtarget.hasExtLASX() &&
+ TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
+ return std::make_pair(0U, &LoongArch::LASX256RegClass);
break;
default:
break;
@@ -2999,7 +3079,8 @@ LoongArchTargetLowering::getRegForInlineAsmConstraint(
// decode the usage of register name aliases into their official names. And
// AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
// official register names.
- if (Constraint.startswith("{$r") || Constraint.startswith("{$f")) {
+ if (Constraint.startswith("{$r") || Constraint.startswith("{$f") ||
+ Constraint.startswith("{$vr") || Constraint.startswith("{$xr")) {
bool IsFP = Constraint[2] == 'f';
std::pair<StringRef, StringRef> Temp = Constraint.split('$');
std::pair<unsigned, const TargetRegisterClass *> R;
@@ -3099,13 +3180,119 @@ bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
if (VT.getSizeInBits() > Subtarget.getGRLen())
return false;
- // Break MUL into (SLLI + ADD/SUB) or ALSL.
if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
const APInt &Imm = ConstNode->getAPIntValue();
+ // Break MUL into (SLLI + ADD/SUB) or ALSL.
if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
(1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
return true;
+ // Break MUL into (ALSL x, (SLLI x, imm0), imm1).
+ if (ConstNode->hasOneUse() &&
+ ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
+ (Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
+ return true;
+ // Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
+ // in which the immediate has two set bits. Or Break (MUL x, imm)
+ // into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
+ // equals to (1 << s0) - (1 << s1).
+ if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
+ unsigned Shifts = Imm.countr_zero();
+ // Reject immediates which can be composed via a single LUI.
+ if (Shifts >= 12)
+ return false;
+ // Reject multiplications can be optimized to
+ // (SLLI (ALSL x, x, 1/2/3/4), s).
+ APInt ImmPop = Imm.ashr(Shifts);
+ if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
+ return false;
+ // We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
+ // since it needs one more instruction than other 3 cases.
+ APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
+ if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
+ (ImmSmall - Imm).isPowerOf2())
+ return true;
+ }
}
return false;
}
+
+bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+ const AddrMode &AM,
+ Type *Ty, unsigned AS,
+ Instruction *I) const {
+ // LoongArch has four basic addressing modes:
+ // 1. reg
+ // 2. reg + 12-bit signed offset
+ // 3. reg + 14-bit signed offset left-shifted by 2
+ // 4. reg1 + reg2
+ // TODO: Add more checks after support vector extension.
+
+ // No global is ever allowed as a base.
+ if (AM.BaseGV)
+ return false;
+
+ // Require a 12 or 14 bit signed offset.
+ if (!isInt<12>(AM.BaseOffs) || !isShiftedInt<14, 2>(AM.BaseOffs))
+ return false;
+
+ switch (AM.Scale) {
+ case 0:
+ // "i" is not allowed.
+ if (!AM.HasBaseReg)
+ return false;
+ // Otherwise we have "r+i".
+ break;
+ case 1:
+ // "r+r+i" is not allowed.
+ if (AM.HasBaseReg && AM.BaseOffs != 0)
+ return false;
+ // Otherwise we have "r+r" or "r+i".
+ break;
+ case 2:
+ // "2*r+r" or "2*r+i" is not allowed.
+ if (AM.HasBaseReg || AM.BaseOffs)
+ return false;
+ // Otherwise we have "r+r".
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ return isInt<12>(Imm);
+}
+
+bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
+ return isInt<12>(Imm);
+}
+
+bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
+ // Zexts are free if they can be combined with a load.
+ // Don't advertise i32->i64 zextload as being free for LA64. It interacts
+ // poorly with type legalization of compares preferring sext.
+ if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
+ EVT MemVT = LD->getMemoryVT();
+ if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
+ (LD->getExtensionType() == ISD::NON_EXTLOAD ||
+ LD->getExtensionType() == ISD::ZEXTLOAD))
+ return true;
+ }
+
+ return TargetLowering::isZExtFree(Val, VT2);
+}
+
+bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
+ return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
+}
+
+bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
+ // TODO: Support vectors.
+ if (Y.getValueType().isVector())
+ return false;
+
+ return !isa<ConstantSDNode>(Y);
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 0ddcda66d281..500407493fe5 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -80,7 +80,22 @@ enum NodeType : unsigned {
CRCC_W_D_W,
CSRRD,
+
+ // Write new value to CSR and return old value.
+ // Operand 0: A chain pointer.
+ // Operand 1: The new value to write.
+ // Operand 2: The address of the required CSR.
+ // Result 0: The old value of the CSR.
+ // Result 1: The new chain pointer.
CSRWR,
+
+ // Similar to CSRWR but with a write mask.
+ // Operand 0: A chain pointer.
+ // Operand 1: The new value to write.
+ // Operand 2: The write mask.
+ // Operand 3: The address of the required CSR.
+ // Result 0: The old value of the CSR.
+ // Result 1: The new chain pointer.
CSRXCHG,
// IOCSR access operations
@@ -181,6 +196,26 @@ public:
bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const override;
+ bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
+
+ bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
+ unsigned AS,
+ Instruction *I = nullptr) const override;
+
+ bool isLegalICmpImmediate(int64_t Imm) const override;
+ bool isLegalAddImmediate(int64_t Imm) const override;
+ bool isZExtFree(SDValue Val, EVT VT2) const override;
+ bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
+
+ bool hasAndNotCompare(SDValue Y) const override;
+
+ bool convertSelectOfConstantsToMath(EVT VT) const override { return true; }
+
+ bool allowsMisalignedMemoryAccesses(
+ EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
+ unsigned *Fast = nullptr) const override;
+
private:
/// Target-specific function used to lower LoongArch calling conventions.
typedef bool LoongArchCCAssignFn(const DataLayout &DL, LoongArchABI::ABI ABI,
@@ -200,9 +235,9 @@ private:
template <class NodeTy>
SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
- unsigned Opc) const;
+ unsigned Opc, bool Large = false) const;
SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
- unsigned Opc) const;
+ unsigned Opc, bool Large = false) const;
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td
index bebc83a861ae..6ffc8823baee 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrFormats.td
@@ -42,29 +42,33 @@ class Pseudo<dag outs, dag ins, list<dag> pattern = [], string opcstr = "",
let isCodeGenOnly = 1;
}
+class deriveInsnMnemonic<string name> {
+ string ret = !tolower(!subst("@", "_", !subst("_", ".", !subst("__", "@", name))));
+}
+
// 2R-type
// <opcode | rj | rd>
-class Fmt2R<bits<22> op, dag outs, dag ins, string opcstr, string opnstr,
+class Fmt2R<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<5> rj;
bits<5> rd;
- let Inst{31-10} = op;
+ let Inst{31-0} = op;
let Inst{9-5} = rj;
let Inst{4-0} = rd;
}
// 3R-type
// <opcode | rk | rj | rd>
-class Fmt3R<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
+class Fmt3R<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<5> rk;
bits<5> rj;
bits<5> rd;
- let Inst{31-15} = op;
+ let Inst{31-0} = op;
let Inst{14-10} = rk;
let Inst{9-5} = rj;
let Inst{4-0} = rd;
@@ -72,15 +76,15 @@ class Fmt3R<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
// 3RI2-type
// <opcode | I2 | rk | rj | rd>
-class Fmt3RI2<bits<15> op, dag outs, dag ins, string opcstr, string opnstr,
+class Fmt3RI2<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<2> imm2;
bits<5> rk;
bits<5> rj;
bits<5> rd;
- let Inst{31-17} = op;
+ let Inst{31-0} = op;
let Inst{16-15} = imm2;
let Inst{14-10} = rk;
let Inst{9-5} = rj;
@@ -89,15 +93,15 @@ class Fmt3RI2<bits<15> op, dag outs, dag ins, string opcstr, string opnstr,
// 3RI3-type
// <opcode | I3 | rk | rj | rd>
-class Fmt3RI3<bits<14> op, dag outs, dag ins, string opcstr, string opnstr,
+class Fmt3RI3<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<3> imm3;
bits<5> rk;
bits<5> rj;
bits<5> rd;
- let Inst{31-18} = op;
+ let Inst{31-0} = op;
let Inst{17-15} = imm3;
let Inst{14-10} = rk;
let Inst{9-5} = rj;
@@ -106,14 +110,14 @@ class Fmt3RI3<bits<14> op, dag outs, dag ins, string opcstr, string opnstr,
// 2RI5-type
// <opcode | I5 | rj | rd>
-class Fmt2RI5<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
+class Fmt2RI5<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<5> imm5;
bits<5> rj;
bits<5> rd;
- let Inst{31-15} = op;
+ let Inst{31-0} = op;
let Inst{14-10} = imm5;
let Inst{9-5} = rj;
let Inst{4-0} = rd;
@@ -121,14 +125,14 @@ class Fmt2RI5<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
// 2RI6-type
// <opcode | I6 | rj | rd>
-class Fmt2RI6<bits<16> op, dag outs, dag ins, string opcstr, string opnstr,
+class Fmt2RI6<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<6> imm6;
bits<5> rj;
bits<5> rd;
- let Inst{31-16} = op;
+ let Inst{31-0} = op;
let Inst{15-10} = imm6;
let Inst{9-5} = rj;
let Inst{4-0} = rd;
@@ -136,14 +140,14 @@ class Fmt2RI6<bits<16> op, dag outs, dag ins, string opcstr, string opnstr,
// 2RI8-type
// <opcode | I8 | rj | rd>
-class Fmt2RI8<bits<14> op, dag outs, dag ins, string opcstr, string opnstr,
+class Fmt2RI8<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<8> imm8;
bits<5> rj;
bits<5> rd;
- let Inst{31-18} = op;
+ let Inst{31-0} = op;
let Inst{17-10} = imm8;
let Inst{9-5} = rj;
let Inst{4-0} = rd;
@@ -151,14 +155,14 @@ class Fmt2RI8<bits<14> op, dag outs, dag ins, string opcstr, string opnstr,
// 2RI12-type
// <opcode | I12 | rj | rd>
-class Fmt2RI12<bits<10> op, dag outs, dag ins, string opcstr, string opnstr,
+class Fmt2RI12<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<12> imm12;
bits<5> rj;
bits<5> rd;
- let Inst{31-22} = op;
+ let Inst{31-0} = op;
let Inst{21-10} = imm12;
let Inst{9-5} = rj;
let Inst{4-0} = rd;
@@ -166,14 +170,14 @@ class Fmt2RI12<bits<10> op, dag outs, dag ins, string opcstr, string opnstr,
// 2RI14-type
// <opcode | I14 | rj | rd>
-class Fmt2RI14<bits<8> op, dag outs, dag ins, string opcstr, string opnstr,
+class Fmt2RI14<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<14> imm14;
bits<5> rj;
bits<5> rd;
- let Inst{31-24} = op;
+ let Inst{31-0} = op;
let Inst{23-10} = imm14;
let Inst{9-5} = rj;
let Inst{4-0} = rd;
@@ -181,14 +185,14 @@ class Fmt2RI14<bits<8> op, dag outs, dag ins, string opcstr, string opnstr,
// 2RI16-type
// <opcode | I16 | rj | rd>
-class Fmt2RI16<bits<6> op, dag outs, dag ins, string opcstr, string opnstr,
+class Fmt2RI16<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<16> imm16;
bits<5> rj;
bits<5> rd;
- let Inst{31-26} = op;
+ let Inst{31-0} = op;
let Inst{25-10} = imm16;
let Inst{9-5} = rj;
let Inst{4-0} = rd;
@@ -196,26 +200,26 @@ class Fmt2RI16<bits<6> op, dag outs, dag ins, string opcstr, string opnstr,
// 1RI20-type
// <opcode | I20 | rd>
-class Fmt1RI20<bits<7> op, dag outs, dag ins, string opcstr, string opnstr,
+class Fmt1RI20<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<20> imm20;
bits<5> rd;
- let Inst{31-25} = op;
+ let Inst{31-0} = op;
let Inst{24-5} = imm20;
let Inst{4-0} = rd;
}
// 1RI21-type
// <opcode | I21[15:0] | rj | I21[20:16]>
-class Fmt1RI21<bits<6> op, dag outs, dag ins, string opcstr, string opnstr,
+class Fmt1RI21<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<21> imm21;
bits<5> rj;
- let Inst{31-26} = op;
+ let Inst{31-0} = op;
let Inst{25-10} = imm21{15-0};
let Inst{9-5} = rj;
let Inst{4-0} = imm21{20-16};
@@ -223,40 +227,39 @@ class Fmt1RI21<bits<6> op, dag outs, dag ins, string opcstr, string opnstr,
// I15-type
// <opcode | I15>
-class FmtI15<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
+class FmtI15<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<15> imm15;
- let Inst{31-15} = op;
+ let Inst{31-0} = op;
let Inst{14-0} = imm15;
}
// I26-type
// <opcode | I26[15:0] | I26[25:16]>
-class FmtI26<bits<6> op, dag outs, dag ins, string opcstr, string opnstr,
+class FmtI26<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<26> imm26;
- let Inst{31-26} = op;
+ let Inst{31-0} = op;
let Inst{25-10} = imm26{15-0};
let Inst{9-0} = imm26{25-16};
}
// FmtBSTR_W
-// <opcode[11:1] | msbw | opcode[0] | lsbw | rj | rd>
-class FmtBSTR_W<bits<12> op, dag outs, dag ins, string opcstr, string opnstr,
+// <opcode | msbw | lsbw | rj | rd>
+class FmtBSTR_W<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<5> msbw;
bits<5> lsbw;
bits<5> rj;
bits<5> rd;
- let Inst{31-21} = op{11-1};
+ let Inst{31-0} = op;
let Inst{20-16} = msbw;
- let Inst{15} = op{0};
let Inst{14-10} = lsbw;
let Inst{9-5} = rj;
let Inst{4-0} = rd;
@@ -264,15 +267,15 @@ class FmtBSTR_W<bits<12> op, dag outs, dag ins, string opcstr, string opnstr,
// FmtBSTR_D
// <opcode | msbd | lsbd | rj | rd>
-class FmtBSTR_D<bits<10> op, dag outs, dag ins, string opcstr, string opnstr,
+class FmtBSTR_D<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<6> msbd;
bits<6> lsbd;
bits<5> rj;
bits<5> rd;
- let Inst{31-22} = op;
+ let Inst{31-0} = op;
let Inst{21-16} = msbd;
let Inst{15-10} = lsbd;
let Inst{9-5} = rj;
@@ -280,24 +283,22 @@ class FmtBSTR_D<bits<10> op, dag outs, dag ins, string opcstr, string opnstr,
}
// FmtASRT
-// <opcode | rk | rj | 0x0>
-class FmtASRT<bits<17> op, dag outs, dag ins, string opcstr, string opnstr,
+// <opcode | rk | rj>
+class FmtASRT<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<5> rk;
bits<5> rj;
- let Inst{31-15} = op;
+ let Inst{31-0} = op;
let Inst{14-10} = rk;
let Inst{9-5} = rj;
- let Inst{4-0} = 0x0;
}
// FmtPRELD
// < 0b0010101011 | I12 | rj | I5>
-class FmtPRELD<dag outs, dag ins, string opcstr, string opnstr,
- list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+class FmtPRELD<dag outs, dag ins, string opnstr, list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<12> imm12;
bits<5> rj;
bits<5> imm5;
@@ -310,9 +311,8 @@ class FmtPRELD<dag outs, dag ins, string opcstr, string opnstr,
// FmtPRELDX
// < 0b00111000001011000 | rk | rj | I5>
-class FmtPRELDX<dag outs, dag ins, string opcstr, string opnstr,
- list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+class FmtPRELDX<dag outs, dag ins, string opnstr, list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<5> rk;
bits<5> rj;
bits<5> imm5;
@@ -324,29 +324,28 @@ class FmtPRELDX<dag outs, dag ins, string opcstr, string opnstr,
}
// FmtCSR
-// <opcode[12:5] | csr_num | opcode[4:0] | rd>
-class FmtCSR<bits<13> op, dag outs, dag ins, string opcstr, string opnstr,
+// <opcode | csr_num | rd>
+class FmtCSR<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<14> csr_num;
bits<5> rd;
- let Inst{31-24} = op{12-5};
+ let Inst{31-0} = op;
let Inst{23-10} = csr_num;
- let Inst{9-5} = op{4-0};
let Inst{4-0} = rd;
}
// FmtCSRXCHG
// <opcode | csr_num | rj | rd>
-class FmtCSRXCHG<bits<8> op, dag outs, dag ins, string opcstr, string opnstr,
+class FmtCSRXCHG<bits<32> op, dag outs, dag ins, string opnstr,
list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<14> csr_num;
bits<5> rj;
bits<5> rd;
- let Inst{31-24} = op;
+ let Inst{31-0} = op;
let Inst{23-10} = csr_num;
let Inst{9-5} = rj;
let Inst{4-0} = rd;
@@ -354,9 +353,8 @@ class FmtCSRXCHG<bits<8> op, dag outs, dag ins, string opcstr, string opnstr,
// FmtCACOP
// <0b0000011000 | I12 | rj | I5>
-class FmtCACOP<dag outs, dag ins, string opcstr, string opnstr,
- list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+class FmtCACOP<dag outs, dag ins, string opnstr, list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<12> imm12;
bits<5> rj;
bits<5> op;
@@ -369,16 +367,15 @@ class FmtCACOP<dag outs, dag ins, string opcstr, string opnstr,
// FmtIMM32
// <I32>
-class FmtI32<bits<32> op, string opstr, list<dag> pattern = []>
- : LAInst<(outs), (ins), opstr, "", pattern> {
+class FmtI32<bits<32> op, list<dag> pattern = []>
+ : LAInst<(outs), (ins), deriveInsnMnemonic<NAME>.ret, "", pattern> {
let Inst{31-0} = op;
}
// FmtINVTLB
// <0b00000110010010011 | rk | rj | I5>
-class FmtINVTLB<dag outs, dag ins, string opcstr, string opnstr,
- list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+class FmtINVTLB<dag outs, dag ins, string opnstr, list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<5> rk;
bits<5> rj;
bits<5> op;
@@ -391,9 +388,8 @@ class FmtINVTLB<dag outs, dag ins, string opcstr, string opnstr,
// FmtLDPTE
// <0b00000110010001 | seq | rj | 00000>
-class FmtLDPTE<dag outs, dag ins, string opcstr, string opnstr,
- list<dag> pattern = []>
- : LAInst<outs, ins, opcstr, opnstr, pattern> {
+class FmtLDPTE<dag outs, dag ins, string opnstr, list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
bits<8> seq;
bits<5> rj;
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
index fbbb764b8be1..f5e32c452933 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp
@@ -17,6 +17,7 @@
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
#include "MCTargetDesc/LoongArchMatInt.h"
#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/MC/MCInstBuilder.h"
using namespace llvm;
@@ -28,6 +29,13 @@ LoongArchInstrInfo::LoongArchInstrInfo(LoongArchSubtarget &STI)
LoongArch::ADJCALLSTACKUP),
STI(STI) {}
+MCInst LoongArchInstrInfo::getNop() const {
+ return MCInstBuilder(LoongArch::ANDI)
+ .addReg(LoongArch::R0)
+ .addReg(LoongArch::R0)
+ .addImm(0);
+}
+
void LoongArchInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg,
@@ -73,9 +81,6 @@ void LoongArchInstrInfo::storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, Register SrcReg,
bool IsKill, int FI, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI, Register VReg) const {
- DebugLoc DL;
- if (I != MBB.end())
- DL = I->getDebugLoc();
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
@@ -97,7 +102,7 @@ void LoongArchInstrInfo::storeRegToStackSlot(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
- BuildMI(MBB, I, DL, get(Opcode))
+ BuildMI(MBB, I, DebugLoc(), get(Opcode))
.addReg(SrcReg, getKillRegState(IsKill))
.addFrameIndex(FI)
.addImm(0)
@@ -110,9 +115,6 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI,
Register VReg) const {
- DebugLoc DL;
- if (I != MBB.end())
- DL = I->getDebugLoc();
MachineFunction *MF = MBB.getParent();
MachineFrameInfo &MFI = MF->getFrameInfo();
@@ -134,7 +136,7 @@ void LoongArchInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
- BuildMI(MBB, I, DL, get(Opcode), DstReg)
+ BuildMI(MBB, I, DebugLoc(), get(Opcode), DstReg)
.addFrameIndex(FI)
.addImm(0)
.addMemOperand(MMO);
@@ -476,12 +478,20 @@ LoongArchInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
{MO_CALL_PLT, "loongarch-call-plt"},
{MO_PCREL_HI, "loongarch-pcrel-hi"},
{MO_PCREL_LO, "loongarch-pcrel-lo"},
+ {MO_PCREL64_LO, "loongarch-pcrel64-lo"},
+ {MO_PCREL64_HI, "loongarch-pcrel64-hi"},
{MO_GOT_PC_HI, "loongarch-got-pc-hi"},
{MO_GOT_PC_LO, "loongarch-got-pc-lo"},
+ {MO_GOT_PC64_LO, "loongarch-got-pc64-lo"},
+ {MO_GOT_PC64_HI, "loongarch-got-pc64-hi"},
{MO_LE_HI, "loongarch-le-hi"},
{MO_LE_LO, "loongarch-le-lo"},
+ {MO_LE64_LO, "loongarch-le64-lo"},
+ {MO_LE64_HI, "loongarch-le64-hi"},
{MO_IE_PC_HI, "loongarch-ie-pc-hi"},
{MO_IE_PC_LO, "loongarch-ie-pc-lo"},
+ {MO_IE_PC64_LO, "loongarch-ie-pc64-lo"},
+ {MO_IE_PC64_HI, "loongarch-ie-pc64-hi"},
{MO_LD_PC_HI, "loongarch-ld-pc-hi"},
{MO_GD_PC_HI, "loongarch-gd-pc-hi"}};
return ArrayRef(TargetFlags);
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
index e2b80460fcaf..cf83abf27a1e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.h
@@ -27,6 +27,8 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo {
public:
explicit LoongArchInstrInfo(LoongArchSubtarget &STI);
+ MCInst getNop() const override;
+
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg,
bool KillSrc) const override;
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 75b2adc729d0..ac391ef471b1 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -75,21 +75,21 @@ def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>;
def loongarch_rotl_w : SDNode<"LoongArchISD::ROTL_W", SDT_LoongArchIntBinOpW>;
def loongarch_crc_w_b_w
- : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW>;
+ : SDNode<"LoongArchISD::CRC_W_B_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
def loongarch_crc_w_h_w
- : SDNode<"LoongArchISD::CRC_W_H_W", SDT_LoongArchIntBinOpW>;
+ : SDNode<"LoongArchISD::CRC_W_H_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
def loongarch_crc_w_w_w
- : SDNode<"LoongArchISD::CRC_W_W_W", SDT_LoongArchIntBinOpW>;
+ : SDNode<"LoongArchISD::CRC_W_W_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
def loongarch_crc_w_d_w
- : SDNode<"LoongArchISD::CRC_W_D_W", SDT_LoongArchIntBinOpW>;
-def loongarch_crcc_w_b_w
- : SDNode<"LoongArchISD::CRCC_W_B_W", SDT_LoongArchIntBinOpW>;
-def loongarch_crcc_w_h_w
- : SDNode<"LoongArchISD::CRCC_W_H_W", SDT_LoongArchIntBinOpW>;
-def loongarch_crcc_w_w_w
- : SDNode<"LoongArchISD::CRCC_W_W_W", SDT_LoongArchIntBinOpW>;
-def loongarch_crcc_w_d_w
- : SDNode<"LoongArchISD::CRCC_W_D_W", SDT_LoongArchIntBinOpW>;
+ : SDNode<"LoongArchISD::CRC_W_D_W", SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
+def loongarch_crcc_w_b_w : SDNode<"LoongArchISD::CRCC_W_B_W",
+ SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
+def loongarch_crcc_w_h_w : SDNode<"LoongArchISD::CRCC_W_H_W",
+ SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
+def loongarch_crcc_w_w_w : SDNode<"LoongArchISD::CRCC_W_W_W",
+ SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
+def loongarch_crcc_w_d_w : SDNode<"LoongArchISD::CRCC_W_D_W",
+ SDT_LoongArchIntBinOpW, [SDNPHasChain]>;
def loongarch_bstrins
: SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>;
def loongarch_bstrpick
@@ -106,9 +106,11 @@ def loongarch_ibar : SDNode<"LoongArchISD::IBAR", SDT_LoongArchVI,
[SDNPHasChain, SDNPSideEffect]>;
def loongarch_break : SDNode<"LoongArchISD::BREAK", SDT_LoongArchVI,
[SDNPHasChain, SDNPSideEffect]>;
-def loongarch_movfcsr2gr : SDNode<"LoongArchISD::MOVFCSR2GR", SDT_LoongArchMovfcsr2gr>;
-def loongarch_movgr2fcsr : SDNode<"LoongArchISD::MOVGR2FCSR", SDT_LoongArchMovgr2fcsr,
- [SDNPHasChain, SDNPSideEffect]>;
+def loongarch_movfcsr2gr : SDNode<"LoongArchISD::MOVFCSR2GR",
+ SDT_LoongArchMovfcsr2gr, [SDNPHasChain]>;
+def loongarch_movgr2fcsr : SDNode<"LoongArchISD::MOVGR2FCSR",
+ SDT_LoongArchMovgr2fcsr,
+ [SDNPHasChain, SDNPSideEffect]>;
def loongarch_syscall : SDNode<"LoongArchISD::SYSCALL", SDT_LoongArchVI,
[SDNPHasChain, SDNPSideEffect]>;
def loongarch_csrrd : SDNode<"LoongArchISD::CSRRD", SDT_LoongArchCsrrd,
@@ -139,7 +141,7 @@ def loongarch_iocsrwr_d : SDNode<"LoongArchISD::IOCSRWR_D",
SDT_LoongArchIocsrwr,
[SDNPHasChain, SDNPSideEffect]>;
def loongarch_cpucfg : SDNode<"LoongArchISD::CPUCFG", SDTUnaryOp,
- [SDNPHasChain, SDNPSideEffect]>;
+ [SDNPHasChain]>;
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
@@ -180,6 +182,10 @@ def imm32 : Operand<GRLenVT> {
let ParserMatchClass = ImmAsmOperand<"", 32, "">;
}
+def uimm1 : Operand<GRLenVT> {
+ let ParserMatchClass = UImmAsmOperand<1>;
+}
+
def uimm2 : Operand<GRLenVT>, ImmLeaf<GRLenVT, [{return isUInt<2>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<2>;
}
@@ -195,6 +201,10 @@ def uimm3 : Operand<GRLenVT> {
let ParserMatchClass = UImmAsmOperand<3>;
}
+def uimm4 : Operand<GRLenVT> {
+ let ParserMatchClass = UImmAsmOperand<4>;
+}
+
def uimm5 : Operand<GRLenVT>, ImmLeaf<GRLenVT, [{return isUInt<5>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<5>;
}
@@ -203,6 +213,10 @@ def uimm6 : Operand<GRLenVT>, ImmLeaf<GRLenVT, [{return isUInt<6>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<6>;
}
+def uimm7 : Operand<GRLenVT> {
+ let ParserMatchClass = UImmAsmOperand<7>;
+}
+
def uimm8 : Operand<GRLenVT>, ImmLeaf<GRLenVT, [{return isUInt<8>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<8>;
}
@@ -230,6 +244,46 @@ def uimm15 : Operand<GRLenVT>,
let ParserMatchClass = UImmAsmOperand<15>;
}
+def simm5 : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<5>;
+ let DecoderMethod = "decodeSImmOperand<5>";
+}
+
+def simm8 : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<8>;
+ let DecoderMethod = "decodeSImmOperand<8>";
+}
+
+foreach I = [1, 2, 3] in {
+def simm8_lsl # I : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<8, "lsl" # I>;
+ let EncoderMethod = "getImmOpValueAsr<" # I # ">";
+ let DecoderMethod = "decodeSImmOperand<8," # I # ">";
+}
+}
+
+def simm9_lsl3 : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<9, "lsl3">;
+ let EncoderMethod = "getImmOpValueAsr<3>";
+ let DecoderMethod = "decodeSImmOperand<9, 3>";
+}
+
+def simm10 : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<10>;
+}
+
+def simm10_lsl2 : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<10, "lsl2">;
+ let EncoderMethod = "getImmOpValueAsr<2>";
+ let DecoderMethod = "decodeSImmOperand<10, 2>";
+}
+
+def simm11_lsl1 : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<11, "lsl1">;
+ let EncoderMethod = "getImmOpValueAsr<1>";
+ let DecoderMethod = "decodeSImmOperand<11, 1>";
+}
+
class SImm12Operand : Operand<GRLenVT>,
ImmLeaf <GRLenVT, [{return isInt<12>(Imm);}]> {
let DecoderMethod = "decodeSImmOperand<12>";
@@ -247,10 +301,15 @@ def simm12_lu52id : SImm12Operand {
let ParserMatchClass = SImmAsmOperand<12, "lu52id">;
}
+def simm13 : Operand<GRLenVT> {
+ let ParserMatchClass = SImmAsmOperand<13>;
+ let DecoderMethod = "decodeSImmOperand<13>";
+}
+
def simm14_lsl2 : Operand<GRLenVT>,
ImmLeaf<GRLenVT, [{return isShiftedInt<14,2>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<14, "lsl2">;
- let EncoderMethod = "getImmOpValueAsr2";
+ let EncoderMethod = "getImmOpValueAsr<2>";
let DecoderMethod = "decodeSImmOperand<14, 2>";
}
@@ -262,13 +321,13 @@ def simm16 : Operand<GRLenVT> {
def simm16_lsl2 : Operand<GRLenVT>,
ImmLeaf<GRLenVT, [{return isInt<16>(Imm>>2);}]> {
let ParserMatchClass = SImmAsmOperand<16, "lsl2">;
- let EncoderMethod = "getImmOpValueAsr2";
+ let EncoderMethod = "getImmOpValueAsr<2>";
let DecoderMethod = "decodeSImmOperand<16, 2>";
}
def simm16_lsl2_br : Operand<OtherVT> {
let ParserMatchClass = SImmAsmOperand<16, "lsl2">;
- let EncoderMethod = "getImmOpValueAsr2";
+ let EncoderMethod = "getImmOpValueAsr<2>";
let DecoderMethod = "decodeSImmOperand<16, 2>";
}
@@ -294,7 +353,7 @@ def simm20_lu32id : SImm20Operand {
def simm21_lsl2 : Operand<OtherVT> {
let ParserMatchClass = SImmAsmOperand<21, "lsl2">;
- let EncoderMethod = "getImmOpValueAsr2";
+ let EncoderMethod = "getImmOpValueAsr<2>";
let DecoderMethod = "decodeSImmOperand<21, 2>";
}
@@ -309,7 +368,7 @@ def SImm26OperandB: AsmOperandClass {
// A symbol or an imm used in B/PseudoBR.
def simm26_b : Operand<OtherVT> {
let ParserMatchClass = SImm26OperandB;
- let EncoderMethod = "getImmOpValueAsr2";
+ let EncoderMethod = "getImmOpValueAsr<2>";
let DecoderMethod = "decodeSImmOperand<26, 2>";
}
@@ -324,10 +383,21 @@ def SImm26OperandBL: AsmOperandClass {
// A symbol or an imm used in BL/PseudoCALL/PseudoTAIL.
def simm26_symbol : Operand<GRLenVT> {
let ParserMatchClass = SImm26OperandBL;
- let EncoderMethod = "getImmOpValueAsr2";
+ let EncoderMethod = "getImmOpValueAsr<2>";
let DecoderMethod = "decodeSImmOperand<26, 2>";
}
+// A 32-bit signed immediate with the lowest 16 bits zeroed, suitable for
+// direct use with `addu16i.d`.
+def simm16_lsl16 : Operand<GRLenVT>,
+ ImmLeaf<GRLenVT, [{return isShiftedInt<16, 16>(Imm);}]>;
+
+// A 32-bit signed immediate expressible with a pair of `addu16i.d + addi` for
+// use in additions.
+def simm32_hi16_lo12: Operand<GRLenVT>, ImmLeaf<GRLenVT, [{
+ return isShiftedInt<16, 16>(Imm - SignExtend64<12>(Imm));
+}]>;
+
def BareSymbol : AsmOperandClass {
let Name = "BareSymbol";
let RenderMethod = "addImmOperands";
@@ -363,7 +433,28 @@ def ImmSubFrom32 : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
+// Return the lowest 12 bits of the signed immediate.
+def LO12: SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(SignExtend64<12>(N->getSExtValue()),
+ SDLoc(N), N->getValueType(0));
+}]>;
+
+// Return the higher 16 bits of the signed immediate.
+def HI16 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue() >> 16, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+// Return the higher 16 bits of the signed immediate, adjusted for use within an
+// `addu16i.d + addi` pair.
+def HI16ForAddu16idAddiPair: SDNodeXForm<imm, [{
+ auto Imm = N->getSExtValue();
+ return CurDAG->getTargetConstant((Imm - SignExtend64<12>(Imm)) >> 16,
+ SDLoc(N), N->getValueType(0));
+}]>;
+
def BaseAddr : ComplexPattern<iPTR, 1, "SelectBaseAddr">;
+def AddrConstant : ComplexPattern<iPTR, 2, "SelectAddrConstant">;
def NonFIBaseAddr : ComplexPattern<iPTR, 1, "selectNonFIBaseAddr">;
def fma_nsz : PatFrag<(ops node:$fj, node:$fk, node:$fa),
@@ -371,226 +462,295 @@ def fma_nsz : PatFrag<(ops node:$fj, node:$fk, node:$fa),
return N->getFlags().hasNoSignedZeros();
}]>;
+// Check if (add r, imm) can be optimized to (ADDI (ADDI r, imm0), imm1),
+// in which imm = imm0 + imm1, and both imm0 & imm1 are simm12.
+def AddiPair : PatLeaf<(imm), [{
+ if (!N->hasOneUse())
+ return false;
+ // The immediate operand must be in range [-4096,-2049] or [2048,4094].
+ int64_t Imm = N->getSExtValue();
+ return (-4096 <= Imm && Imm <= -2049) || (2048 <= Imm && Imm <= 4094);
+}]>;
+
+// Return -2048 if immediate is negative or 2047 if positive.
+def AddiPairImmLarge : SDNodeXForm<imm, [{
+ int64_t Imm = N->getSExtValue() < 0 ? -2048 : 2047;
+ return CurDAG->getTargetConstant(Imm, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+// Return imm - (imm < 0 ? -2048 : 2047).
+def AddiPairImmSmall : SDNodeXForm<imm, [{
+ int64_t Imm = N->getSExtValue();
+ int64_t Adj = Imm < 0 ? -2048 : 2047;
+ return CurDAG->getTargetConstant(Imm - Adj, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+// Check if (mul r, imm) can be optimized to (SLLI (ALSL r, r, i0), i1),
+// in which imm = (1 + (1 << i0)) << i1.
+def AlslSlliImm : PatLeaf<(imm), [{
+ if (!N->hasOneUse())
+ return false;
+ uint64_t Imm = N->getZExtValue();
+ unsigned I1 = llvm::countr_zero(Imm);
+ uint64_t Rem = Imm >> I1;
+ return Rem == 3 || Rem == 5 || Rem == 9 || Rem == 17;
+}]>;
+
+def AlslSlliImmI1 : SDNodeXForm<imm, [{
+ uint64_t Imm = N->getZExtValue();
+ unsigned I1 = llvm::countr_zero(Imm);
+ return CurDAG->getTargetConstant(I1, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+def AlslSlliImmI0 : SDNodeXForm<imm, [{
+ uint64_t Imm = N->getZExtValue();
+ unsigned I1 = llvm::countr_zero(Imm);
+ uint64_t I0;
+ switch (Imm >> I1) {
+ case 3: I0 = 1; break;
+ case 5: I0 = 2; break;
+ case 9: I0 = 3; break;
+ default: I0 = 4; break;
+ }
+ return CurDAG->getTargetConstant(I0, SDLoc(N),
+ N->getValueType(0));
+}]>;
+
//===----------------------------------------------------------------------===//
// Instruction Formats
//===----------------------------------------------------------------------===//
include "LoongArchInstrFormats.td"
include "LoongArchFloatInstrFormats.td"
+include "LoongArchLSXInstrFormats.td"
+include "LoongArchLASXInstrFormats.td"
+include "LoongArchLBTInstrFormats.td"
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
-class ALU_3R<bits<17> op, string opstr>
- : Fmt3R<op, (outs GPR:$rd), (ins GPR:$rj, GPR:$rk), opstr, "$rd, $rj, $rk">;
-class ALU_2R<bits<22> op, string opstr>
- : Fmt2R<op, (outs GPR:$rd), (ins GPR:$rj), opstr, "$rd, $rj">;
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+class ALU_3R<bits<32> op>
+ : Fmt3R<op, (outs GPR:$rd), (ins GPR:$rj, GPR:$rk), "$rd, $rj, $rk">;
+class ALU_2R<bits<32> op>
+ : Fmt2R<op, (outs GPR:$rd), (ins GPR:$rj), "$rd, $rj">;
-class ALU_3RI2<bits<15> op, string opstr, Operand ImmOpnd>
- : Fmt3RI2<op, (outs GPR:$rd), (ins GPR:$rj, GPR:$rk, ImmOpnd:$imm2), opstr,
+class ALU_3RI2<bits<32> op, Operand ImmOpnd>
+ : Fmt3RI2<op, (outs GPR:$rd), (ins GPR:$rj, GPR:$rk, ImmOpnd:$imm2),
"$rd, $rj, $rk, $imm2">;
-class ALU_3RI3<bits<14> op, string opstr, Operand ImmOpnd>
- : Fmt3RI3<op, (outs GPR:$rd), (ins GPR:$rj, GPR:$rk, ImmOpnd:$imm3), opstr,
+class ALU_3RI3<bits<32> op, Operand ImmOpnd>
+ : Fmt3RI3<op, (outs GPR:$rd), (ins GPR:$rj, GPR:$rk, ImmOpnd:$imm3),
"$rd, $rj, $rk, $imm3">;
-class ALU_2RI5<bits<17> op, string opstr, Operand ImmOpnd>
- : Fmt2RI5<op, (outs GPR:$rd), (ins GPR:$rj, ImmOpnd:$imm5), opstr,
+class ALU_2RI5<bits<32> op, Operand ImmOpnd>
+ : Fmt2RI5<op, (outs GPR:$rd), (ins GPR:$rj, ImmOpnd:$imm5),
"$rd, $rj, $imm5">;
-class ALU_2RI6<bits<16> op, string opstr, Operand ImmOpnd>
- : Fmt2RI6<op, (outs GPR:$rd), (ins GPR:$rj, ImmOpnd:$imm6), opstr,
+class ALU_2RI6<bits<32> op, Operand ImmOpnd>
+ : Fmt2RI6<op, (outs GPR:$rd), (ins GPR:$rj, ImmOpnd:$imm6),
"$rd, $rj, $imm6">;
-class ALU_2RI12<bits<10> op, string opstr, Operand ImmOpnd>
- : Fmt2RI12<op, (outs GPR:$rd), (ins GPR:$rj, ImmOpnd:$imm12), opstr,
+class ALU_2RI12<bits<32> op, Operand ImmOpnd>
+ : Fmt2RI12<op, (outs GPR:$rd), (ins GPR:$rj, ImmOpnd:$imm12),
"$rd, $rj, $imm12">;
-class ALU_2RI16<bits<6> op, string opstr, Operand ImmOpnd>
- : Fmt2RI16<op, (outs GPR:$rd), (ins GPR:$rj, ImmOpnd:$imm16), opstr,
+class ALU_2RI16<bits<32> op, Operand ImmOpnd>
+ : Fmt2RI16<op, (outs GPR:$rd), (ins GPR:$rj, ImmOpnd:$imm16),
"$rd, $rj, $imm16">;
-class ALU_1RI20<bits<7> op, string opstr, Operand ImmOpnd>
- : Fmt1RI20<op, (outs GPR:$rd), (ins ImmOpnd:$imm20), opstr, "$rd, $imm20">;
+class ALU_1RI20<bits<32> op, Operand ImmOpnd>
+ : Fmt1RI20<op, (outs GPR:$rd), (ins ImmOpnd:$imm20), "$rd, $imm20">;
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
-class MISC_I15<bits<17> op, string opstr>
- : FmtI15<op, (outs), (ins uimm15:$imm15), opstr, "$imm15">;
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
+class MISC_I15<bits<32> op>
+ : FmtI15<op, (outs), (ins uimm15:$imm15), "$imm15">;
-class RDTIME_2R<bits<22> op, string opstr>
- : Fmt2R<op, (outs GPR:$rd, GPR:$rj), (ins), opstr, "$rd, $rj">;
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
+class RDTIME_2R<bits<32> op>
+ : Fmt2R<op, (outs GPR:$rd, GPR:$rj), (ins), "$rd, $rj">;
-class BrCC_2RI16<bits<6> op, string opstr>
- : Fmt2RI16<op, (outs), (ins GPR:$rj, GPR:$rd, simm16_lsl2_br:$imm16), opstr,
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+class BrCC_2RI16<bits<32> op>
+ : Fmt2RI16<op, (outs), (ins GPR:$rj, GPR:$rd, simm16_lsl2_br:$imm16),
"$rj, $rd, $imm16"> {
let isBranch = 1;
let isTerminator = 1;
}
-class BrCCZ_1RI21<bits<6> op, string opstr>
- : Fmt1RI21<op, (outs), (ins GPR:$rj, simm21_lsl2:$imm21), opstr,
+class BrCCZ_1RI21<bits<32> op>
+ : Fmt1RI21<op, (outs), (ins GPR:$rj, simm21_lsl2:$imm21),
"$rj, $imm21"> {
let isBranch = 1;
let isTerminator = 1;
}
-class Br_I26<bits<6> op, string opstr>
- : FmtI26<op, (outs), (ins simm26_b:$imm26), opstr, "$imm26"> {
+class Br_I26<bits<32> op>
+ : FmtI26<op, (outs), (ins simm26_b:$imm26), "$imm26"> {
let isBranch = 1;
let isTerminator = 1;
}
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
-let mayLoad = 1 in {
-class LOAD_3R<bits<17> op, string opstr>
- : Fmt3R<op, (outs GPR:$rd), (ins GPR:$rj, GPR:$rk), opstr, "$rd, $rj, $rk">;
-class LOAD_2RI12<bits<10> op, string opstr>
- : Fmt2RI12<op, (outs GPR:$rd), (ins GPR:$rj, simm12_addlike:$imm12), opstr,
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+class LOAD_3R<bits<32> op>
+ : Fmt3R<op, (outs GPR:$rd), (ins GPR:$rj, GPR:$rk), "$rd, $rj, $rk">;
+class LOAD_2RI12<bits<32> op>
+ : Fmt2RI12<op, (outs GPR:$rd), (ins GPR:$rj, simm12_addlike:$imm12),
"$rd, $rj, $imm12">;
-class LOAD_2RI14<bits<8> op, string opstr>
- : Fmt2RI14<op, (outs GPR:$rd), (ins GPR:$rj, simm14_lsl2:$imm14), opstr,
+class LOAD_2RI14<bits<32> op>
+ : Fmt2RI14<op, (outs GPR:$rd), (ins GPR:$rj, simm14_lsl2:$imm14),
"$rd, $rj, $imm14">;
-} // mayLoad = 1
+} // hasSideEffects = 0, mayLoad = 1, mayStore = 0
-let mayStore = 1 in {
-class STORE_3R<bits<17> op, string opstr>
- : Fmt3R<op, (outs), (ins GPR:$rd, GPR:$rj, GPR:$rk), opstr,
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+class STORE_3R<bits<32> op>
+ : Fmt3R<op, (outs), (ins GPR:$rd, GPR:$rj, GPR:$rk),
"$rd, $rj, $rk">;
-class STORE_2RI12<bits<10> op, string opstr>
- : Fmt2RI12<op, (outs), (ins GPR:$rd, GPR:$rj, simm12_addlike:$imm12), opstr,
+class STORE_2RI12<bits<32> op>
+ : Fmt2RI12<op, (outs), (ins GPR:$rd, GPR:$rj, simm12_addlike:$imm12),
"$rd, $rj, $imm12">;
-class STORE_2RI14<bits<8> op, string opstr>
- : Fmt2RI14<op, (outs), (ins GPR:$rd, GPR:$rj, simm14_lsl2:$imm14), opstr,
+class STORE_2RI14<bits<32> op>
+ : Fmt2RI14<op, (outs), (ins GPR:$rd, GPR:$rj, simm14_lsl2:$imm14),
"$rd, $rj, $imm14">;
-} // mayStore = 1
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 1
-let mayLoad = 1, mayStore = 1, Constraints = "@earlyclobber $rd" in
-class AM_3R<bits<17> op, string opstr>
- : Fmt3R<op, (outs GPR:$rd), (ins GPR:$rk, GPRMemAtomic:$rj), opstr,
+let hasSideEffects = 0, mayLoad = 1, mayStore = 1, Constraints = "@earlyclobber $rd" in
+class AM_3R<bits<32> op>
+ : Fmt3R<op, (outs GPR:$rd), (ins GPR:$rk, GPRMemAtomic:$rj),
"$rd, $rk, $rj">;
-let mayLoad = 1 in
-class LLBase<bits<8> op, string opstr>
- : Fmt2RI14<op, (outs GPR:$rd), (ins GPR:$rj, simm14_lsl2:$imm14), opstr,
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+class LLBase<bits<32> op>
+ : Fmt2RI14<op, (outs GPR:$rd), (ins GPR:$rj, simm14_lsl2:$imm14),
"$rd, $rj, $imm14">;
-let mayStore = 1, Constraints = "$rd = $dst" in
-class SCBase<bits<8> op, string opstr>
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1, Constraints = "$rd = $dst" in
+class SCBase<bits<32> op>
: Fmt2RI14<op, (outs GPR:$dst), (ins GPR:$rd, GPR:$rj, simm14_lsl2:$imm14),
- opstr, "$rd, $rj, $imm14">;
+ "$rd, $rj, $imm14">;
-class IOCSRRD<bits<22> op, string opstr>
- : Fmt2R<op, (outs GPR:$rd), (ins GPR:$rj), opstr, "$rd, $rj">;
+let hasSideEffects = 1 in
+class IOCSRRD<bits<32> op>
+ : Fmt2R<op, (outs GPR:$rd), (ins GPR:$rj), "$rd, $rj">;
-class IOCSRWR<bits<22> op, string opstr>
- : Fmt2R<op, (outs), (ins GPR:$rd, GPR:$rj), opstr, "$rd, $rj">;
+let hasSideEffects = 1 in
+class IOCSRWR<bits<32> op>
+ : Fmt2R<op, (outs), (ins GPR:$rd, GPR:$rj), "$rd, $rj">;
//===----------------------------------------------------------------------===//
// Basic Integer Instructions
//===----------------------------------------------------------------------===//
// Arithmetic Operation Instructions
-def ADD_W : ALU_3R<0b00000000000100000, "add.w">;
-def SUB_W : ALU_3R<0b00000000000100010, "sub.w">;
-def ADDI_W : ALU_2RI12<0b0000001010, "addi.w", simm12_addlike>;
-def ALSL_W : ALU_3RI2<0b000000000000010, "alsl.w", uimm2_plus1>;
-def LU12I_W : ALU_1RI20<0b0001010, "lu12i.w", simm20_lu12iw>;
-def SLT : ALU_3R<0b00000000000100100, "slt">;
-def SLTU : ALU_3R<0b00000000000100101, "sltu">;
-def SLTI : ALU_2RI12<0b0000001000, "slti", simm12>;
-def SLTUI : ALU_2RI12<0b0000001001, "sltui", simm12>;
-def PCADDI : ALU_1RI20<0b0001100, "pcaddi", simm20>;
-def PCADDU12I : ALU_1RI20<0b0001110, "pcaddu12i", simm20>;
-def PCALAU12I : ALU_1RI20<0b0001101, "pcalau12i", simm20_pcalau12i>;
-def AND : ALU_3R<0b00000000000101001, "and">;
-def OR : ALU_3R<0b00000000000101010, "or">;
-def NOR : ALU_3R<0b00000000000101000, "nor">;
-def XOR : ALU_3R<0b00000000000101011, "xor">;
-def ANDN : ALU_3R<0b00000000000101101, "andn">;
-def ORN : ALU_3R<0b00000000000101100, "orn">;
-def ANDI : ALU_2RI12<0b0000001101, "andi", uimm12>;
-def ORI : ALU_2RI12<0b0000001110, "ori", uimm12_ori>;
-def XORI : ALU_2RI12<0b0000001111, "xori", uimm12>;
-def MUL_W : ALU_3R<0b00000000000111000, "mul.w">;
-def MULH_W : ALU_3R<0b00000000000111001, "mulh.w">;
-def MULH_WU : ALU_3R<0b00000000000111010, "mulh.wu">;
+def ADD_W : ALU_3R<0x00100000>;
+def SUB_W : ALU_3R<0x00110000>;
+def ADDI_W : ALU_2RI12<0x02800000, simm12_addlike>;
+def ALSL_W : ALU_3RI2<0x00040000, uimm2_plus1>;
+def LU12I_W : ALU_1RI20<0x14000000, simm20_lu12iw>;
+def SLT : ALU_3R<0x00120000>;
+def SLTU : ALU_3R<0x00128000>;
+def SLTI : ALU_2RI12<0x02000000, simm12>;
+def SLTUI : ALU_2RI12<0x02400000, simm12>;
+def PCADDI : ALU_1RI20<0x18000000, simm20>;
+def PCADDU12I : ALU_1RI20<0x1c000000, simm20>;
+def PCALAU12I : ALU_1RI20<0x1a000000, simm20_pcalau12i>;
+def AND : ALU_3R<0x00148000>;
+def OR : ALU_3R<0x00150000>;
+def NOR : ALU_3R<0x00140000>;
+def XOR : ALU_3R<0x00158000>;
+def ANDN : ALU_3R<0x00168000>;
+def ORN : ALU_3R<0x00160000>;
+def ANDI : ALU_2RI12<0x03400000, uimm12>;
+def ORI : ALU_2RI12<0x03800000, uimm12_ori>;
+def XORI : ALU_2RI12<0x03c00000, uimm12>;
+def MUL_W : ALU_3R<0x001c0000>;
+def MULH_W : ALU_3R<0x001c8000>;
+def MULH_WU : ALU_3R<0x001d0000>;
let usesCustomInserter = true in {
-def DIV_W : ALU_3R<0b00000000001000000, "div.w">;
-def MOD_W : ALU_3R<0b00000000001000001, "mod.w">;
-def DIV_WU : ALU_3R<0b00000000001000010, "div.wu">;
-def MOD_WU : ALU_3R<0b00000000001000011, "mod.wu">;
+def DIV_W : ALU_3R<0x00200000>;
+def MOD_W : ALU_3R<0x00208000>;
+def DIV_WU : ALU_3R<0x00210000>;
+def MOD_WU : ALU_3R<0x00218000>;
} // usesCustomInserter = true
// Bit-shift Instructions
-def SLL_W : ALU_3R<0b00000000000101110, "sll.w">;
-def SRL_W : ALU_3R<0b00000000000101111, "srl.w">;
-def SRA_W : ALU_3R<0b00000000000110000, "sra.w">;
-def ROTR_W : ALU_3R<0b00000000000110110, "rotr.w">;
+def SLL_W : ALU_3R<0x00170000>;
+def SRL_W : ALU_3R<0x00178000>;
+def SRA_W : ALU_3R<0x00180000>;
+def ROTR_W : ALU_3R<0x001b0000>;
-def SLLI_W : ALU_2RI5<0b00000000010000001, "slli.w", uimm5>;
-def SRLI_W : ALU_2RI5<0b00000000010001001, "srli.w", uimm5>;
-def SRAI_W : ALU_2RI5<0b00000000010010001, "srai.w", uimm5>;
-def ROTRI_W : ALU_2RI5<0b00000000010011001, "rotri.w", uimm5>;
+def SLLI_W : ALU_2RI5<0x00408000, uimm5>;
+def SRLI_W : ALU_2RI5<0x00448000, uimm5>;
+def SRAI_W : ALU_2RI5<0x00488000, uimm5>;
+def ROTRI_W : ALU_2RI5<0x004c8000, uimm5>;
// Bit-manipulation Instructions
-def EXT_W_B : ALU_2R<0b0000000000000000010111, "ext.w.b">;
-def EXT_W_H : ALU_2R<0b0000000000000000010110, "ext.w.h">;
-def CLO_W : ALU_2R<0b0000000000000000000100, "clo.w">;
-def CLZ_W : ALU_2R<0b0000000000000000000101, "clz.w">;
-def CTO_W : ALU_2R<0b0000000000000000000110, "cto.w">;
-def CTZ_W : ALU_2R<0b0000000000000000000111, "ctz.w">;
-def BYTEPICK_W : ALU_3RI2<0b000000000000100, "bytepick.w", uimm2>;
-def REVB_2H : ALU_2R<0b0000000000000000001100, "revb.2h">;
-def BITREV_4B : ALU_2R<0b0000000000000000010010, "bitrev.4b">;
-def BITREV_W : ALU_2R<0b0000000000000000010100, "bitrev.w">;
+def EXT_W_B : ALU_2R<0x00005c00>;
+def EXT_W_H : ALU_2R<0x00005800>;
+def CLO_W : ALU_2R<0x00001000>;
+def CLZ_W : ALU_2R<0x00001400>;
+def CTO_W : ALU_2R<0x00001800>;
+def CTZ_W : ALU_2R<0x00001c00>;
+def BYTEPICK_W : ALU_3RI2<0x00080000, uimm2>;
+def REVB_2H : ALU_2R<0x00003000>;
+def BITREV_4B : ALU_2R<0x00004800>;
+def BITREV_W : ALU_2R<0x00005000>;
let Constraints = "$rd = $dst" in {
-def BSTRINS_W : FmtBSTR_W<0b000000000110, (outs GPR:$dst),
+def BSTRINS_W : FmtBSTR_W<0x00600000, (outs GPR:$dst),
(ins GPR:$rd, GPR:$rj, uimm5:$msbw, uimm5:$lsbw),
- "bstrins.w", "$rd, $rj, $msbw, $lsbw">;
+ "$rd, $rj, $msbw, $lsbw">;
}
-def BSTRPICK_W : FmtBSTR_W<0b000000000111, (outs GPR:$rd),
+def BSTRPICK_W : FmtBSTR_W<0x00608000, (outs GPR:$rd),
(ins GPR:$rj, uimm5:$msbw, uimm5:$lsbw),
- "bstrpick.w", "$rd, $rj, $msbw, $lsbw">;
-def MASKEQZ : ALU_3R<0b00000000000100110, "maskeqz">;
-def MASKNEZ : ALU_3R<0b00000000000100111, "masknez">;
+ "$rd, $rj, $msbw, $lsbw">;
+def MASKEQZ : ALU_3R<0x00130000>;
+def MASKNEZ : ALU_3R<0x00138000>;
// Branch Instructions
-def BEQ : BrCC_2RI16<0b010110, "beq">;
-def BNE : BrCC_2RI16<0b010111, "bne">;
-def BLT : BrCC_2RI16<0b011000, "blt">;
-def BGE : BrCC_2RI16<0b011001, "bge">;
-def BLTU : BrCC_2RI16<0b011010, "bltu">;
-def BGEU : BrCC_2RI16<0b011011, "bgeu">;
-def BEQZ : BrCCZ_1RI21<0b010000, "beqz">;
-def BNEZ : BrCCZ_1RI21<0b010001, "bnez">;
-def B : Br_I26<0b010100, "b">;
-
-let isCall = 1, Defs=[R1] in
-def BL : FmtI26<0b010101, (outs), (ins simm26_symbol:$imm26), "bl", "$imm26">;
-def JIRL : Fmt2RI16<0b010011, (outs GPR:$rd),
- (ins GPR:$rj, simm16_lsl2:$imm16), "jirl",
- "$rd, $rj, $imm16">;
+def BEQ : BrCC_2RI16<0x58000000>;
+def BNE : BrCC_2RI16<0x5c000000>;
+def BLT : BrCC_2RI16<0x60000000>;
+def BGE : BrCC_2RI16<0x64000000>;
+def BLTU : BrCC_2RI16<0x68000000>;
+def BGEU : BrCC_2RI16<0x6c000000>;
+def BEQZ : BrCCZ_1RI21<0x40000000>;
+def BNEZ : BrCCZ_1RI21<0x44000000>;
+def B : Br_I26<0x50000000>;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCall = 1, Defs=[R1] in
+def BL : FmtI26<0x54000000, (outs), (ins simm26_symbol:$imm26), "$imm26">;
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+def JIRL : Fmt2RI16<0x4c000000, (outs GPR:$rd),
+ (ins GPR:$rj, simm16_lsl2:$imm16), "$rd, $rj, $imm16">;
// Common Memory Access Instructions
-def LD_B : LOAD_2RI12<0b0010100000, "ld.b">;
-def LD_H : LOAD_2RI12<0b0010100001, "ld.h">;
-def LD_W : LOAD_2RI12<0b0010100010, "ld.w">;
-def LD_BU : LOAD_2RI12<0b0010101000, "ld.bu">;
-def LD_HU : LOAD_2RI12<0b0010101001, "ld.hu">;
-def ST_B : STORE_2RI12<0b0010100100, "st.b">;
-def ST_H : STORE_2RI12<0b0010100101, "st.h">;
-def ST_W : STORE_2RI12<0b0010100110, "st.w">;
-def PRELD : FmtPRELD<(outs), (ins uimm5:$imm5, GPR:$rj, simm12:$imm12), "preld",
+def LD_B : LOAD_2RI12<0x28000000>;
+def LD_H : LOAD_2RI12<0x28400000>;
+def LD_W : LOAD_2RI12<0x28800000>;
+def LD_BU : LOAD_2RI12<0x2a000000>;
+def LD_HU : LOAD_2RI12<0x2a400000>;
+def ST_B : STORE_2RI12<0x29000000>;
+def ST_H : STORE_2RI12<0x29400000>;
+def ST_W : STORE_2RI12<0x29800000>;
+let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in
+def PRELD : FmtPRELD<(outs), (ins uimm5:$imm5, GPR:$rj, simm12:$imm12),
"$imm5, $rj, $imm12">;
// Atomic Memory Access Instructions
-def LL_W : LLBase<0b00100000, "ll.w">;
-def SC_W : SCBase<0b00100001, "sc.w">;
+def LL_W : LLBase<0x20000000>;
+def SC_W : SCBase<0x21000000>;
// Barrier Instructions
-def DBAR : MISC_I15<0b00111000011100100, "dbar">;
-def IBAR : MISC_I15<0b00111000011100101, "ibar">;
+def DBAR : MISC_I15<0x38720000>;
+def IBAR : MISC_I15<0x38728000>;
// Other Miscellaneous Instructions
-def SYSCALL : MISC_I15<0b00000000001010110, "syscall">;
-def BREAK : MISC_I15<0b00000000001010100, "break">;
-def RDTIMEL_W : RDTIME_2R<0b0000000000000000011000, "rdtimel.w">;
-def RDTIMEH_W : RDTIME_2R<0b0000000000000000011001, "rdtimeh.w">;
-def CPUCFG : ALU_2R<0b0000000000000000011011, "cpucfg">;
+def SYSCALL : MISC_I15<0x002b0000>;
+def BREAK : MISC_I15<0x002a0000>;
+def RDTIMEL_W : RDTIME_2R<0x00006000>;
+def RDTIMEH_W : RDTIME_2R<0x00006400>;
+def CPUCFG : ALU_2R<0x00006c00>;
// Cache Maintenance Instructions
-def CACOP : FmtCACOP<(outs), (ins uimm5:$op, GPR:$rj, simm12:$imm12), "cacop",
+def CACOP : FmtCACOP<(outs), (ins uimm5:$op, GPR:$rj, simm12:$imm12),
"$op, $rj, $imm12">;
/// LA64 instructions
@@ -598,159 +758,161 @@ def CACOP : FmtCACOP<(outs), (ins uimm5:$op, GPR:$rj, simm12:$imm12), "cacop",
let Predicates = [IsLA64] in {
// Arithmetic Operation Instructions for 64-bits
-def ADD_D : ALU_3R<0b00000000000100001, "add.d">;
-def SUB_D : ALU_3R<0b00000000000100011, "sub.d">;
-def ADDI_D : ALU_2RI12<0b0000001011, "addi.d", simm12_addlike>;
-def ADDU16I_D : ALU_2RI16<0b000100, "addu16i.d", simm16>;
-def ALSL_WU : ALU_3RI2<0b000000000000011, "alsl.wu", uimm2_plus1>;
-def ALSL_D : ALU_3RI2<0b000000000010110, "alsl.d", uimm2_plus1>;
+def ADD_D : ALU_3R<0x00108000>;
+def SUB_D : ALU_3R<0x00118000>;
+def ADDI_D : ALU_2RI12<0x02c00000, simm12_addlike>;
+def ADDU16I_D : ALU_2RI16<0x10000000, simm16>;
+def ALSL_WU : ALU_3RI2<0x00060000, uimm2_plus1>;
+def ALSL_D : ALU_3RI2<0x002c0000, uimm2_plus1>;
let Constraints = "$rd = $dst" in {
-def LU32I_D : Fmt1RI20<0b0001011, (outs GPR:$dst),
- (ins GPR:$rd, simm20_lu32id:$imm20), "lu32i.d",
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+def LU32I_D : Fmt1RI20<0x16000000, (outs GPR:$dst),
+ (ins GPR:$rd, simm20_lu32id:$imm20),
"$rd, $imm20">;
}
-def LU52I_D : ALU_2RI12<0b0000001100, "lu52i.d", simm12_lu52id>;
-def PCADDU18I : ALU_1RI20<0b0001111, "pcaddu18i", simm20>;
-def MUL_D : ALU_3R<0b00000000000111011, "mul.d">;
-def MULH_D : ALU_3R<0b00000000000111100, "mulh.d">;
-def MULH_DU : ALU_3R<0b00000000000111101, "mulh.du">;
-def MULW_D_W : ALU_3R<0b00000000000111110, "mulw.d.w">;
-def MULW_D_WU : ALU_3R<0b00000000000111111, "mulw.d.wu">;
+def LU52I_D : ALU_2RI12<0x03000000, simm12_lu52id>;
+def PCADDU18I : ALU_1RI20<0x1e000000, simm20>;
+def MUL_D : ALU_3R<0x001d8000>;
+def MULH_D : ALU_3R<0x001e0000>;
+def MULH_DU : ALU_3R<0x001e8000>;
+def MULW_D_W : ALU_3R<0x001f0000>;
+def MULW_D_WU : ALU_3R<0x001f8000>;
let usesCustomInserter = true in {
-def DIV_D : ALU_3R<0b00000000001000100, "div.d">;
-def MOD_D : ALU_3R<0b00000000001000101, "mod.d">;
-def DIV_DU : ALU_3R<0b00000000001000110, "div.du">;
-def MOD_DU : ALU_3R<0b00000000001000111, "mod.du">;
+def DIV_D : ALU_3R<0x00220000>;
+def MOD_D : ALU_3R<0x00228000>;
+def DIV_DU : ALU_3R<0x00230000>;
+def MOD_DU : ALU_3R<0x00238000>;
} // usesCustomInserter = true
// Bit-shift Instructions for 64-bits
-def SLL_D : ALU_3R<0b00000000000110001, "sll.d">;
-def SRL_D : ALU_3R<0b00000000000110010, "srl.d">;
-def SRA_D : ALU_3R<0b00000000000110011, "sra.d">;
-def ROTR_D : ALU_3R<0b00000000000110111, "rotr.d">;
-def SLLI_D : ALU_2RI6<0b0000000001000001, "slli.d", uimm6>;
-def SRLI_D : ALU_2RI6<0b0000000001000101, "srli.d", uimm6>;
-def SRAI_D : ALU_2RI6<0b0000000001001001, "srai.d", uimm6>;
-def ROTRI_D : ALU_2RI6<0b0000000001001101, "rotri.d", uimm6>;
+def SLL_D : ALU_3R<0x00188000>;
+def SRL_D : ALU_3R<0x00190000>;
+def SRA_D : ALU_3R<0x00198000>;
+def ROTR_D : ALU_3R<0x001b8000>;
+def SLLI_D : ALU_2RI6<0x00410000, uimm6>;
+def SRLI_D : ALU_2RI6<0x00450000, uimm6>;
+def SRAI_D : ALU_2RI6<0x00490000, uimm6>;
+def ROTRI_D : ALU_2RI6<0x004d0000, uimm6>;
// Bit-manipulation Instructions for 64-bits
-def CLO_D : ALU_2R<0b0000000000000000001000, "clo.d">;
-def CLZ_D : ALU_2R<0b0000000000000000001001, "clz.d">;
-def CTO_D : ALU_2R<0b0000000000000000001010, "cto.d">;
-def CTZ_D : ALU_2R<0b0000000000000000001011, "ctz.d">;
-def BYTEPICK_D : ALU_3RI3<0b00000000000011, "bytepick.d", uimm3>;
-def REVB_4H : ALU_2R<0b0000000000000000001101, "revb.4h">;
-def REVB_2W : ALU_2R<0b0000000000000000001110, "revb.2w">;
-def REVB_D : ALU_2R<0b0000000000000000001111, "revb.d">;
-def REVH_2W : ALU_2R<0b0000000000000000010000, "revh.2w">;
-def REVH_D : ALU_2R<0b0000000000000000010001, "revh.d">;
-def BITREV_8B : ALU_2R<0b0000000000000000010011, "bitrev.8b">;
-def BITREV_D : ALU_2R<0b0000000000000000010101, "bitrev.d">;
+def CLO_D : ALU_2R<0x00002000>;
+def CLZ_D : ALU_2R<0x00002400>;
+def CTO_D : ALU_2R<0x00002800>;
+def CTZ_D : ALU_2R<0x00002c00>;
+def BYTEPICK_D : ALU_3RI3<0x000c0000, uimm3>;
+def REVB_4H : ALU_2R<0x00003400>;
+def REVB_2W : ALU_2R<0x00003800>;
+def REVB_D : ALU_2R<0x00003c00>;
+def REVH_2W : ALU_2R<0x00004000>;
+def REVH_D : ALU_2R<0x00004400>;
+def BITREV_8B : ALU_2R<0x00004c00>;
+def BITREV_D : ALU_2R<0x00005400>;
let Constraints = "$rd = $dst" in {
-def BSTRINS_D : FmtBSTR_D<0b0000000010, (outs GPR:$dst),
+def BSTRINS_D : FmtBSTR_D<0x00800000, (outs GPR:$dst),
(ins GPR:$rd, GPR:$rj, uimm6:$msbd, uimm6:$lsbd),
- "bstrins.d", "$rd, $rj, $msbd, $lsbd">;
+ "$rd, $rj, $msbd, $lsbd">;
}
-def BSTRPICK_D : FmtBSTR_D<0b0000000011, (outs GPR:$rd),
+def BSTRPICK_D : FmtBSTR_D<0x00c00000, (outs GPR:$rd),
(ins GPR:$rj, uimm6:$msbd, uimm6:$lsbd),
- "bstrpick.d", "$rd, $rj, $msbd, $lsbd">;
+ "$rd, $rj, $msbd, $lsbd">;
// Common Memory Access Instructions for 64-bits
-def LD_WU : LOAD_2RI12<0b0010101010, "ld.wu">;
-def LD_D : LOAD_2RI12<0b0010100011, "ld.d">;
-def ST_D : STORE_2RI12<0b0010100111, "st.d">;
-def LDX_B : LOAD_3R<0b00111000000000000, "ldx.b">;
-def LDX_H : LOAD_3R<0b00111000000001000, "ldx.h">;
-def LDX_W : LOAD_3R<0b00111000000010000, "ldx.w">;
-def LDX_D : LOAD_3R<0b00111000000011000, "ldx.d">;
-def LDX_BU : LOAD_3R<0b00111000001000000, "ldx.bu">;
-def LDX_HU : LOAD_3R<0b00111000001001000, "ldx.hu">;
-def LDX_WU : LOAD_3R<0b00111000001010000, "ldx.wu">;
-def STX_B : STORE_3R<0b00111000000100000, "stx.b">;
-def STX_H : STORE_3R<0b00111000000101000, "stx.h">;
-def STX_W : STORE_3R<0b00111000000110000, "stx.w">;
-def STX_D : STORE_3R<0b00111000000111000, "stx.d">;
-def LDPTR_W : LOAD_2RI14<0b00100100, "ldptr.w">;
-def LDPTR_D : LOAD_2RI14<0b00100110, "ldptr.d">;
-def STPTR_W : STORE_2RI14<0b00100101, "stptr.w">;
-def STPTR_D : STORE_2RI14<0b00100111, "stptr.d">;
-def PRELDX : FmtPRELDX<(outs), (ins uimm5:$imm5, GPR:$rj, GPR:$rk), "preldx",
+def LD_WU : LOAD_2RI12<0x2a800000>;
+def LD_D : LOAD_2RI12<0x28c00000>;
+def ST_D : STORE_2RI12<0x29c00000>;
+def LDX_B : LOAD_3R<0x38000000>;
+def LDX_H : LOAD_3R<0x38040000>;
+def LDX_W : LOAD_3R<0x38080000>;
+def LDX_D : LOAD_3R<0x380c0000>;
+def LDX_BU : LOAD_3R<0x38200000>;
+def LDX_HU : LOAD_3R<0x38240000>;
+def LDX_WU : LOAD_3R<0x38280000>;
+def STX_B : STORE_3R<0x38100000>;
+def STX_H : STORE_3R<0x38140000>;
+def STX_W : STORE_3R<0x38180000>;
+def STX_D : STORE_3R<0x381c0000>;
+def LDPTR_W : LOAD_2RI14<0x24000000>;
+def LDPTR_D : LOAD_2RI14<0x26000000>;
+def STPTR_W : STORE_2RI14<0x25000000>;
+def STPTR_D : STORE_2RI14<0x27000000>;
+let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in
+def PRELDX : FmtPRELDX<(outs), (ins uimm5:$imm5, GPR:$rj, GPR:$rk),
"$imm5, $rj, $rk">;
// Bound Check Memory Access Instructions
-def LDGT_B : LOAD_3R<0b00111000011110000, "ldgt.b">;
-def LDGT_H : LOAD_3R<0b00111000011110001, "ldgt.h">;
-def LDGT_W : LOAD_3R<0b00111000011110010, "ldgt.w">;
-def LDGT_D : LOAD_3R<0b00111000011110011, "ldgt.d">;
-def LDLE_B : LOAD_3R<0b00111000011110100, "ldle.b">;
-def LDLE_H : LOAD_3R<0b00111000011110101, "ldle.h">;
-def LDLE_W : LOAD_3R<0b00111000011110110, "ldle.w">;
-def LDLE_D : LOAD_3R<0b00111000011110111, "ldle.d">;
-def STGT_B : STORE_3R<0b00111000011111000, "stgt.b">;
-def STGT_H : STORE_3R<0b00111000011111001, "stgt.h">;
-def STGT_W : STORE_3R<0b00111000011111010, "stgt.w">;
-def STGT_D : STORE_3R<0b00111000011111011, "stgt.d">;
-def STLE_B : STORE_3R<0b00111000011111100, "stle.b">;
-def STLE_H : STORE_3R<0b00111000011111101, "stle.h">;
-def STLE_W : STORE_3R<0b00111000011111110, "stle.w">;
-def STLE_D : STORE_3R<0b00111000011111111, "stle.d">;
+def LDGT_B : LOAD_3R<0x38780000>;
+def LDGT_H : LOAD_3R<0x38788000>;
+def LDGT_W : LOAD_3R<0x38790000>;
+def LDGT_D : LOAD_3R<0x38798000>;
+def LDLE_B : LOAD_3R<0x387a0000>;
+def LDLE_H : LOAD_3R<0x387a8000>;
+def LDLE_W : LOAD_3R<0x387b0000>;
+def LDLE_D : LOAD_3R<0x387b8000>;
+def STGT_B : STORE_3R<0x387c0000>;
+def STGT_H : STORE_3R<0x387c8000>;
+def STGT_W : STORE_3R<0x387d0000>;
+def STGT_D : STORE_3R<0x387d8000>;
+def STLE_B : STORE_3R<0x387e0000>;
+def STLE_H : STORE_3R<0x387e8000>;
+def STLE_W : STORE_3R<0x387f0000>;
+def STLE_D : STORE_3R<0x387f8000>;
// Atomic Memory Access Instructions for 64-bits
-def AMSWAP_W : AM_3R<0b00111000011000000, "amswap.w">;
-def AMSWAP_D : AM_3R<0b00111000011000001, "amswap.d">;
-def AMADD_W : AM_3R<0b00111000011000010, "amadd.w">;
-def AMADD_D : AM_3R<0b00111000011000011, "amadd.d">;
-def AMAND_W : AM_3R<0b00111000011000100, "amand.w">;
-def AMAND_D : AM_3R<0b00111000011000101, "amand.d">;
-def AMOR_W : AM_3R<0b00111000011000110, "amor.w">;
-def AMOR_D : AM_3R<0b00111000011000111, "amor.d">;
-def AMXOR_W : AM_3R<0b00111000011001000, "amxor.w">;
-def AMXOR_D : AM_3R<0b00111000011001001, "amxor.d">;
-def AMMAX_W : AM_3R<0b00111000011001010, "ammax.w">;
-def AMMAX_D : AM_3R<0b00111000011001011, "ammax.d">;
-def AMMIN_W : AM_3R<0b00111000011001100, "ammin.w">;
-def AMMIN_D : AM_3R<0b00111000011001101, "ammin.d">;
-def AMMAX_WU : AM_3R<0b00111000011001110, "ammax.wu">;
-def AMMAX_DU : AM_3R<0b00111000011001111, "ammax.du">;
-def AMMIN_WU : AM_3R<0b00111000011010000, "ammin.wu">;
-def AMMIN_DU : AM_3R<0b00111000011010001, "ammin.du">;
-def AMSWAP_DB_W : AM_3R<0b00111000011010010, "amswap_db.w">;
-def AMSWAP_DB_D : AM_3R<0b00111000011010011, "amswap_db.d">;
-def AMADD_DB_W : AM_3R<0b00111000011010100, "amadd_db.w">;
-def AMADD_DB_D : AM_3R<0b00111000011010101, "amadd_db.d">;
-def AMAND_DB_W : AM_3R<0b00111000011010110, "amand_db.w">;
-def AMAND_DB_D : AM_3R<0b00111000011010111, "amand_db.d">;
-def AMOR_DB_W : AM_3R<0b00111000011011000, "amor_db.w">;
-def AMOR_DB_D : AM_3R<0b00111000011011001, "amor_db.d">;
-def AMXOR_DB_W : AM_3R<0b00111000011011010, "amxor_db.w">;
-def AMXOR_DB_D : AM_3R<0b00111000011011011, "amxor_db.d">;
-def AMMAX_DB_W : AM_3R<0b00111000011011100, "ammax_db.w">;
-def AMMAX_DB_D : AM_3R<0b00111000011011101, "ammax_db.d">;
-def AMMIN_DB_W : AM_3R<0b00111000011011110, "ammin_db.w">;
-def AMMIN_DB_D : AM_3R<0b00111000011011111, "ammin_db.d">;
-def AMMAX_DB_WU : AM_3R<0b00111000011100000, "ammax_db.wu">;
-def AMMAX_DB_DU : AM_3R<0b00111000011100001, "ammax_db.du">;
-def AMMIN_DB_WU : AM_3R<0b00111000011100010, "ammin_db.wu">;
-def AMMIN_DB_DU : AM_3R<0b00111000011100011, "ammin_db.du">;
-def LL_D : LLBase<0b00100010, "ll.d">;
-def SC_D : SCBase<0b00100011, "sc.d">;
+def AMSWAP_W : AM_3R<0x38600000>;
+def AMSWAP_D : AM_3R<0x38608000>;
+def AMADD_W : AM_3R<0x38610000>;
+def AMADD_D : AM_3R<0x38618000>;
+def AMAND_W : AM_3R<0x38620000>;
+def AMAND_D : AM_3R<0x38628000>;
+def AMOR_W : AM_3R<0x38630000>;
+def AMOR_D : AM_3R<0x38638000>;
+def AMXOR_W : AM_3R<0x38640000>;
+def AMXOR_D : AM_3R<0x38648000>;
+def AMMAX_W : AM_3R<0x38650000>;
+def AMMAX_D : AM_3R<0x38658000>;
+def AMMIN_W : AM_3R<0x38660000>;
+def AMMIN_D : AM_3R<0x38668000>;
+def AMMAX_WU : AM_3R<0x38670000>;
+def AMMAX_DU : AM_3R<0x38678000>;
+def AMMIN_WU : AM_3R<0x38680000>;
+def AMMIN_DU : AM_3R<0x38688000>;
+def AMSWAP__DB_W : AM_3R<0x38690000>;
+def AMSWAP__DB_D : AM_3R<0x38698000>;
+def AMADD__DB_W : AM_3R<0x386a0000>;
+def AMADD__DB_D : AM_3R<0x386a8000>;
+def AMAND__DB_W : AM_3R<0x386b0000>;
+def AMAND__DB_D : AM_3R<0x386b8000>;
+def AMOR__DB_W : AM_3R<0x386c0000>;
+def AMOR__DB_D : AM_3R<0x386c8000>;
+def AMXOR__DB_W : AM_3R<0x386d0000>;
+def AMXOR__DB_D : AM_3R<0x386d8000>;
+def AMMAX__DB_W : AM_3R<0x386e0000>;
+def AMMAX__DB_D : AM_3R<0x386e8000>;
+def AMMIN__DB_W : AM_3R<0x386f0000>;
+def AMMIN__DB_D : AM_3R<0x386f8000>;
+def AMMAX__DB_WU : AM_3R<0x38700000>;
+def AMMAX__DB_DU : AM_3R<0x38708000>;
+def AMMIN__DB_WU : AM_3R<0x38710000>;
+def AMMIN__DB_DU : AM_3R<0x38718000>;
+def LL_D : LLBase<0x22000000>;
+def SC_D : SCBase<0x23000000>;
// CRC Check Instructions
-def CRC_W_B_W : ALU_3R<0b00000000001001000, "crc.w.b.w">;
-def CRC_W_H_W : ALU_3R<0b00000000001001001, "crc.w.h.w">;
-def CRC_W_W_W : ALU_3R<0b00000000001001010, "crc.w.w.w">;
-def CRC_W_D_W : ALU_3R<0b00000000001001011, "crc.w.d.w">;
-def CRCC_W_B_W : ALU_3R<0b00000000001001100, "crcc.w.b.w">;
-def CRCC_W_H_W : ALU_3R<0b00000000001001101, "crcc.w.h.w">;
-def CRCC_W_W_W : ALU_3R<0b00000000001001110, "crcc.w.w.w">;
-def CRCC_W_D_W : ALU_3R<0b00000000001001111, "crcc.w.d.w">;
+def CRC_W_B_W : ALU_3R<0x00240000>;
+def CRC_W_H_W : ALU_3R<0x00248000>;
+def CRC_W_W_W : ALU_3R<0x00250000>;
+def CRC_W_D_W : ALU_3R<0x00258000>;
+def CRCC_W_B_W : ALU_3R<0x00260000>;
+def CRCC_W_H_W : ALU_3R<0x00268000>;
+def CRCC_W_W_W : ALU_3R<0x00270000>;
+def CRCC_W_D_W : ALU_3R<0x00278000>;
// Other Miscellaneous Instructions for 64-bits
-def ASRTLE_D : FmtASRT<0b00000000000000010, (outs), (ins GPR:$rj, GPR:$rk),
- "asrtle.d", "$rj, $rk">;
-def ASRTGT_D : FmtASRT<0b00000000000000011, (outs), (ins GPR:$rj, GPR:$rk),
- "asrtgt.d", "$rj, $rk">;
-def RDTIME_D : RDTIME_2R<0b0000000000000000011010, "rdtime.d">;
+def ASRTLE_D : FmtASRT<0x00010000, (outs), (ins GPR:$rj, GPR:$rk),
+ "$rj, $rk">;
+def ASRTGT_D : FmtASRT<0x00018000, (outs), (ins GPR:$rj, GPR:$rk),
+ "$rj, $rk">;
+def RDTIME_D : RDTIME_2R<0x00006800>;
} // Predicates = [IsLA64]
//===----------------------------------------------------------------------===//
@@ -802,6 +964,13 @@ class shiftopw<SDPatternOperator operator>
: PatFrag<(ops node:$val, node:$count),
(operator node:$val, (i64 (shiftMask32 node:$count)))>;
+def mul_const_oneuse : PatFrag<(ops node:$A, node:$B),
+ (mul node:$A, node:$B), [{
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ return N1C->hasOneUse();
+ return false;
+}]>;
+
let Predicates = [IsLA32] in {
def : PatGprGpr<add, ADD_W>;
def : PatGprImm<add, ADDI_W, simm12>;
@@ -815,6 +984,13 @@ def : PatGprGpr<mulhs, MULH_W>;
def : PatGprGpr<mulhu, MULH_WU>;
def : PatGprGpr<rotr, ROTR_W>;
def : PatGprImm<rotr, ROTRI_W, uimm5>;
+
+foreach Idx = 1...3 in {
+ defvar ShamtA = !mul(8, Idx);
+ defvar ShamtB = !mul(8, !sub(4, Idx));
+ def : Pat<(or (shl GPR:$rk, (i32 ShamtA)), (srl GPR:$rj, (i32 ShamtB))),
+ (BYTEPICK_W GPR:$rj, GPR:$rk, Idx)>;
+}
} // Predicates = [IsLA32]
let Predicates = [IsLA64] in {
@@ -850,6 +1026,109 @@ def : Pat<(i64 (mul (sext_inreg GPR:$rj, i32), (sext_inreg GPR:$rk, i32))),
def : Pat<(i64 (mul (loongarch_bstrpick GPR:$rj, (i64 31), (i64 0)),
(loongarch_bstrpick GPR:$rk, (i64 31), (i64 0)))),
(MULW_D_WU GPR:$rj, GPR:$rk)>;
+
+def : Pat<(add GPR:$rj, simm16_lsl16:$imm),
+ (ADDU16I_D GPR:$rj, (HI16 $imm))>;
+def : Pat<(add GPR:$rj, simm32_hi16_lo12:$imm),
+ (ADDI_D (ADDU16I_D GPR:$rj, (HI16ForAddu16idAddiPair $imm)),
+ (LO12 $imm))>;
+def : Pat<(sext_inreg (add GPR:$rj, simm32_hi16_lo12:$imm), i32),
+ (ADDI_W (ADDU16I_D GPR:$rj, (HI16ForAddu16idAddiPair $imm)),
+ (LO12 $imm))>;
+
+let Predicates = [IsLA32] in {
+def : Pat<(add GPR:$rj, (AddiPair:$im)),
+ (ADDI_W (ADDI_W GPR:$rj, (AddiPairImmLarge AddiPair:$im)),
+ (AddiPairImmSmall AddiPair:$im))>;
+} // Predicates = [IsLA32]
+
+let Predicates = [IsLA64] in {
+def : Pat<(add GPR:$rj, (AddiPair:$im)),
+ (ADDI_D (ADDI_D GPR:$rj, (AddiPairImmLarge AddiPair:$im)),
+ (AddiPairImmSmall AddiPair:$im))>;
+def : Pat<(sext_inreg (add GPR:$rj, (AddiPair:$im)), i32),
+ (ADDI_W (ADDI_W GPR:$rj, (AddiPairImmLarge AddiPair:$im)),
+ (AddiPairImmSmall AddiPair:$im))>;
+} // Predicates = [IsLA64]
+
+let Predicates = [IsLA32] in {
+foreach Idx0 = 1...4 in {
+ foreach Idx1 = 1...4 in {
+ defvar CImm = !add(1, !shl(!add(1, !shl(1, Idx0)), Idx1));
+ def : Pat<(mul_const_oneuse GPR:$r, (i32 CImm)),
+ (ALSL_W (ALSL_W GPR:$r, GPR:$r, (i32 Idx0)),
+ GPR:$r, (i32 Idx1))>;
+ }
+}
+foreach Idx0 = 1...4 in {
+ foreach Idx1 = 1...4 in {
+ defvar Cb = !add(1, !shl(1, Idx0));
+ defvar CImm = !add(Cb, !shl(Cb, Idx1));
+ def : Pat<(mul_const_oneuse GPR:$r, (i32 CImm)),
+ (ALSL_W (ALSL_W GPR:$r, GPR:$r, (i32 Idx0)),
+ (ALSL_W GPR:$r, GPR:$r, (i32 Idx0)), (i32 Idx1))>;
+ }
+}
+} // Predicates = [IsLA32]
+
+let Predicates = [IsLA64] in {
+foreach Idx0 = 1...4 in {
+ foreach Idx1 = 1...4 in {
+ defvar CImm = !add(1, !shl(!add(1, !shl(1, Idx0)), Idx1));
+ def : Pat<(sext_inreg (mul_const_oneuse GPR:$r, (i64 CImm)), i32),
+ (ALSL_W (ALSL_W GPR:$r, GPR:$r, (i64 Idx0)),
+ GPR:$r, (i64 Idx1))>;
+ def : Pat<(mul_const_oneuse GPR:$r, (i64 CImm)),
+ (ALSL_D (ALSL_D GPR:$r, GPR:$r, (i64 Idx0)),
+ GPR:$r, (i64 Idx1))>;
+ }
+}
+foreach Idx0 = 1...4 in {
+ foreach Idx1 = 1...4 in {
+ defvar Cb = !add(1, !shl(1, Idx0));
+ defvar CImm = !add(Cb, !shl(Cb, Idx1));
+ def : Pat<(sext_inreg (mul_const_oneuse GPR:$r, (i64 CImm)), i32),
+ (ALSL_W (ALSL_W GPR:$r, GPR:$r, (i64 Idx0)),
+ (ALSL_W GPR:$r, GPR:$r, (i64 Idx0)), (i64 Idx1))>;
+ def : Pat<(mul_const_oneuse GPR:$r, (i64 CImm)),
+ (ALSL_D (ALSL_D GPR:$r, GPR:$r, (i64 Idx0)),
+ (ALSL_D GPR:$r, GPR:$r, (i64 Idx0)), (i64 Idx1))>;
+ }
+}
+} // Predicates = [IsLA64]
+
+let Predicates = [IsLA32] in {
+def : Pat<(mul GPR:$rj, (AlslSlliImm:$im)),
+ (SLLI_W (ALSL_W GPR:$rj, GPR:$rj, (AlslSlliImmI0 AlslSlliImm:$im)),
+ (AlslSlliImmI1 AlslSlliImm:$im))>;
+} // Predicates = [IsLA32]
+
+let Predicates = [IsLA64] in {
+def : Pat<(sext_inreg (mul GPR:$rj, (AlslSlliImm:$im)), i32),
+ (SLLI_W (ALSL_W GPR:$rj, GPR:$rj, (AlslSlliImmI0 AlslSlliImm:$im)),
+ (AlslSlliImmI1 AlslSlliImm:$im))>;
+def : Pat<(mul GPR:$rj, (AlslSlliImm:$im)),
+ (SLLI_D (ALSL_D GPR:$rj, GPR:$rj, (AlslSlliImmI0 AlslSlliImm:$im)),
+ (AlslSlliImmI1 AlslSlliImm:$im))>;
+} // Predicates = [IsLA64]
+
+foreach Idx = 1...7 in {
+ defvar ShamtA = !mul(8, Idx);
+ defvar ShamtB = !mul(8, !sub(8, Idx));
+ def : Pat<(or (shl GPR:$rk, (i64 ShamtA)), (srl GPR:$rj, (i64 ShamtB))),
+ (BYTEPICK_D GPR:$rj, GPR:$rk, Idx)>;
+}
+
+foreach Idx = 1...3 in {
+ defvar ShamtA = !mul(8, Idx);
+ defvar ShamtB = !mul(8, !sub(4, Idx));
+ // NOTE: the srl node would already be transformed into a loongarch_bstrpick
+ // by the time this pattern gets to execute, hence the weird construction.
+ def : Pat<(sext_inreg (or (shl GPR:$rk, (i64 ShamtA)),
+ (loongarch_bstrpick GPR:$rj, (i64 31),
+ (i64 ShamtB))), i32),
+ (BYTEPICK_W GPR:$rj, GPR:$rk, Idx)>;
+}
} // Predicates = [IsLA64]
def : PatGprGpr<and, AND>;
@@ -916,6 +1195,8 @@ def : Pat<(add GPR:$rk, (shl GPR:$rj, uimm2_plus1:$imm2)),
let Predicates = [IsLA64] in {
def : Pat<(add GPR:$rk, (shl GPR:$rj, uimm2_plus1:$imm2)),
(ALSL_D GPR:$rj, GPR:$rk, uimm2_plus1:$imm2)>;
+def : Pat<(sext_inreg (add GPR:$rk, (shl GPR:$rj, uimm2_plus1:$imm2)), i32),
+ (ALSL_W GPR:$rj, GPR:$rk, uimm2_plus1:$imm2)>;
def : Pat<(loongarch_bstrpick (add GPR:$rk, (shl GPR:$rj, uimm2_plus1:$imm2)),
(i64 31), (i64 0)),
(ALSL_WU GPR:$rj, GPR:$rk, uimm2_plus1:$imm2)>;
@@ -991,6 +1272,8 @@ def : Pat<(setle GPR:$rj, GPR:$rk), (XORI (SLT GPR:$rk, GPR:$rj), 1)>;
/// Select
+def : Pat<(select GPR:$cond, GPR:$t, 0), (MASKEQZ GPR:$t, GPR:$cond)>;
+def : Pat<(select GPR:$cond, 0, GPR:$f), (MASKNEZ GPR:$f, GPR:$cond)>;
def : Pat<(select GPR:$cond, GPR:$t, GPR:$f),
(OR (MASKEQZ GPR:$t, GPR:$cond), (MASKNEZ GPR:$f, GPR:$cond))>;
@@ -1050,7 +1333,7 @@ def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rj),
[(loongarch_call GPR:$rj)]>,
PseudoInstExpansion<(JIRL R1, GPR:$rj, 0)>;
-let isCall = 1, Defs = [R1] in
+let isCall = 1, hasSideEffects = 0, mayStore = 0, mayLoad = 0, Defs = [R1] in
def PseudoJIRL_CALL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>,
PseudoInstExpansion<(JIRL R1, GPR:$rj,
simm16_lsl2:$imm16)>;
@@ -1072,11 +1355,13 @@ def PseudoTAILIndirect : Pseudo<(outs), (ins GPRT:$rj),
[(loongarch_tail GPRT:$rj)]>,
PseudoInstExpansion<(JIRL R0, GPR:$rj, 0)>;
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasSideEffects = 0, mayStore = 0, mayLoad = 0, Uses = [R3] in
def PseudoB_TAIL : Pseudo<(outs), (ins simm26_b:$imm26)>,
PseudoInstExpansion<(B simm26_b:$imm26)>;
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
+ hasSideEffects = 0, mayStore = 0, mayLoad = 0, Uses = [R3] in
def PseudoJIRL_TAIL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>,
PseudoInstExpansion<(JIRL R0, GPR:$rj,
simm16_lsl2:$imm16)>;
@@ -1219,6 +1504,8 @@ def : Pat<(bitreverse (bswap GPR:$rj)), (BITREV_8B GPR:$rj)>;
multiclass LdPat<PatFrag LoadOp, LAInst Inst, ValueType vt = GRLenVT> {
def : Pat<(vt (LoadOp BaseAddr:$rj)), (Inst BaseAddr:$rj, 0)>;
+ def : Pat<(vt (LoadOp (AddrConstant GPR:$rj, simm12:$imm12))),
+ (Inst GPR:$rj, simm12:$imm12)>;
def : Pat<(vt (LoadOp (AddLike BaseAddr:$rj, simm12:$imm12))),
(Inst BaseAddr:$rj, simm12:$imm12)>;
}
@@ -1261,6 +1548,8 @@ multiclass StPat<PatFrag StoreOp, LAInst Inst, RegisterClass StTy,
ValueType vt> {
def : Pat<(StoreOp (vt StTy:$rd), BaseAddr:$rj),
(Inst StTy:$rd, BaseAddr:$rj, 0)>;
+ def : Pat<(StoreOp (vt StTy:$rs2), (AddrConstant GPR:$rj, simm12:$imm12)),
+ (Inst StTy:$rs2, GPR:$rj, simm12:$imm12)>;
def : Pat<(StoreOp (vt StTy:$rd), (AddLike BaseAddr:$rj, simm12:$imm12)),
(Inst StTy:$rd, BaseAddr:$rj, simm12:$imm12)>;
}
@@ -1342,7 +1631,7 @@ defm : AtomicStPat<atomic_store_unordered_monotonic_32, ST_W, GPR, i32>,
def PseudoAtomicStoreW
: Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk)>,
- PseudoInstExpansion<(AMSWAP_DB_W R0, GPR:$rk, GPRMemAtomic:$rj)>;
+ PseudoInstExpansion<(AMSWAP__DB_W R0, GPR:$rk, GPRMemAtomic:$rj)>;
def : Pat<(atomic_store_release_seqcst_32 GPR:$rj, GPR:$rk),
(PseudoAtomicStoreW GPR:$rj, GPR:$rk)>;
@@ -1350,7 +1639,7 @@ def : Pat<(atomic_store_release_seqcst_32 GPR:$rj, GPR:$rk),
let Predicates = [IsLA64] in {
def PseudoAtomicStoreD
: Pseudo<(outs GPR:$dst), (ins GPR:$rj, GPR:$rk)>,
- PseudoInstExpansion<(AMSWAP_DB_D R0, GPR:$rk, GPRMemAtomic:$rj)>;
+ PseudoInstExpansion<(AMSWAP__DB_D R0, GPR:$rk, GPRMemAtomic:$rj)>;
def : Pat<(atomic_store_release_seqcst_64 GPR:$rj, GPR:$rk),
(PseudoAtomicStoreD GPR:$rj, GPR:$rk)>;
@@ -1477,54 +1766,54 @@ let Predicates = [IsLA64] in {
def : AtomicPat<int_loongarch_masked_atomicrmw_xchg_i64,
PseudoMaskedAtomicSwap32>;
def : Pat<(atomic_swap_32 GPR:$addr, GPR:$incr),
- (AMSWAP_DB_W GPR:$incr, GPR:$addr)>;
+ (AMSWAP__DB_W GPR:$incr, GPR:$addr)>;
def : Pat<(atomic_swap_64 GPR:$addr, GPR:$incr),
- (AMSWAP_DB_D GPR:$incr, GPR:$addr)>;
+ (AMSWAP__DB_D GPR:$incr, GPR:$addr)>;
def : Pat<(atomic_load_add_64 GPR:$rj, GPR:$rk),
- (AMADD_DB_D GPR:$rk, GPR:$rj)>;
+ (AMADD__DB_D GPR:$rk, GPR:$rj)>;
def : AtomicPat<int_loongarch_masked_atomicrmw_add_i64,
PseudoMaskedAtomicLoadAdd32>;
def : Pat<(atomic_load_sub_32 GPR:$rj, GPR:$rk),
- (AMADD_DB_W (SUB_W R0, GPR:$rk), GPR:$rj)>;
+ (AMADD__DB_W (SUB_W R0, GPR:$rk), GPR:$rj)>;
def : Pat<(atomic_load_sub_64 GPR:$rj, GPR:$rk),
- (AMADD_DB_D (SUB_D R0, GPR:$rk), GPR:$rj)>;
+ (AMADD__DB_D (SUB_D R0, GPR:$rk), GPR:$rj)>;
def : AtomicPat<int_loongarch_masked_atomicrmw_sub_i64,
PseudoMaskedAtomicLoadSub32>;
defm : PseudoBinPat<"atomic_load_nand_64", PseudoAtomicLoadNand64>;
def : AtomicPat<int_loongarch_masked_atomicrmw_nand_i64,
PseudoMaskedAtomicLoadNand32>;
def : Pat<(atomic_load_add_32 GPR:$rj, GPR:$rk),
- (AMADD_DB_W GPR:$rk, GPR:$rj)>;
+ (AMADD__DB_W GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_and_32 GPR:$rj, GPR:$rk),
- (AMAND_DB_W GPR:$rk, GPR:$rj)>;
+ (AMAND__DB_W GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_and_64 GPR:$rj, GPR:$rk),
- (AMAND_DB_D GPR:$rk, GPR:$rj)>;
+ (AMAND__DB_D GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_or_32 GPR:$rj, GPR:$rk),
- (AMOR_DB_W GPR:$rk, GPR:$rj)>;
+ (AMOR__DB_W GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_or_64 GPR:$rj, GPR:$rk),
- (AMOR_DB_D GPR:$rk, GPR:$rj)>;
+ (AMOR__DB_D GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_xor_32 GPR:$rj, GPR:$rk),
- (AMXOR_DB_W GPR:$rk, GPR:$rj)>;
+ (AMXOR__DB_W GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_xor_64 GPR:$rj, GPR:$rk),
- (AMXOR_DB_D GPR:$rk, GPR:$rj)>;
+ (AMXOR__DB_D GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_umin_32 GPR:$rj, GPR:$rk),
- (AMMIN_DB_WU GPR:$rk, GPR:$rj)>;
+ (AMMIN__DB_WU GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_umin_64 GPR:$rj, GPR:$rk),
- (AMMIN_DB_DU GPR:$rk, GPR:$rj)>;
+ (AMMIN__DB_DU GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_umax_32 GPR:$rj, GPR:$rk),
- (AMMAX_DB_WU GPR:$rk, GPR:$rj)>;
+ (AMMAX__DB_WU GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_umax_64 GPR:$rj, GPR:$rk),
- (AMMAX_DB_DU GPR:$rk, GPR:$rj)>;
+ (AMMAX__DB_DU GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_min_32 GPR:$rj, GPR:$rk),
- (AMMIN_DB_W GPR:$rk, GPR:$rj)>;
+ (AMMIN__DB_W GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_min_64 GPR:$rj, GPR:$rk),
- (AMMIN_DB_D GPR:$rk, GPR:$rj)>;
+ (AMMIN__DB_D GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_max_32 GPR:$rj, GPR:$rk),
- (AMMAX_DB_W GPR:$rk, GPR:$rj)>;
+ (AMMAX__DB_W GPR:$rk, GPR:$rj)>;
def : Pat<(atomic_load_max_64 GPR:$rj, GPR:$rk),
- (AMMAX_DB_D GPR:$rk, GPR:$rj)>;
+ (AMMAX__DB_D GPR:$rk, GPR:$rj)>;
def : AtomicPat<int_loongarch_masked_atomicrmw_umax_i64,
PseudoMaskedAtomicLoadUMax32>;
@@ -1659,48 +1948,52 @@ let Predicates = [HasBasicF], usesCustomInserter = 1 in {
//===----------------------------------------------------------------------===//
// CSR Access Instructions
-def CSRRD : FmtCSR<0b0000010000000, (outs GPR:$rd), (ins uimm14:$csr_num),
- "csrrd", "$rd, $csr_num">;
-let Constraints = "$rd = $dst" in {
-def CSRWR : FmtCSR<0b0000010000001, (outs GPR:$dst),
- (ins GPR:$rd, uimm14:$csr_num), "csrwr", "$rd, $csr_num">;
-def CSRXCHG : FmtCSRXCHG<0b00000100, (outs GPR:$dst),
+let hasSideEffects = 1 in
+def CSRRD : FmtCSR<0x04000000, (outs GPR:$rd), (ins uimm14:$csr_num),
+ "$rd, $csr_num">;
+let hasSideEffects = 1, Constraints = "$rd = $dst" in {
+def CSRWR : FmtCSR<0x04000020, (outs GPR:$dst),
+ (ins GPR:$rd, uimm14:$csr_num), "$rd, $csr_num">;
+def CSRXCHG : FmtCSRXCHG<0x04000000, (outs GPR:$dst),
(ins GPR:$rd, GPR:$rj, uimm14:$csr_num),
- "csrxchg", "$rd, $rj, $csr_num">;
-} // Constraints = "$rd = $dst"
+ "$rd, $rj, $csr_num">;
+} // hasSideEffects = 1, Constraints = "$rd = $dst"
// IOCSR Access Instructions
-def IOCSRRD_B : IOCSRRD<0b0000011001001000000000, "iocsrrd.b">;
-def IOCSRRD_H : IOCSRRD<0b0000011001001000000001, "iocsrrd.h">;
-def IOCSRRD_W : IOCSRRD<0b0000011001001000000010, "iocsrrd.w">;
-def IOCSRWR_B : IOCSRWR<0b0000011001001000000100, "iocsrwr.b">;
-def IOCSRWR_H : IOCSRWR<0b0000011001001000000101, "iocsrwr.h">;
-def IOCSRWR_W : IOCSRWR<0b0000011001001000000110, "iocsrwr.w">;
+def IOCSRRD_B : IOCSRRD<0x06480000>;
+def IOCSRRD_H : IOCSRRD<0x06480400>;
+def IOCSRRD_W : IOCSRRD<0x06480800>;
+def IOCSRWR_B : IOCSRWR<0x06481000>;
+def IOCSRWR_H : IOCSRWR<0x06481400>;
+def IOCSRWR_W : IOCSRWR<0x06481800>;
let Predicates = [IsLA64] in {
-def IOCSRRD_D : IOCSRRD<0b0000011001001000000011, "iocsrrd.d">;
-def IOCSRWR_D : IOCSRWR<0b0000011001001000000111, "iocsrwr.d">;
+def IOCSRRD_D : IOCSRRD<0x06480c00>;
+def IOCSRWR_D : IOCSRWR<0x06481c00>;
} // Predicates = [IsLA64]
// TLB Maintenance Instructions
-def TLBSRCH : FmtI32<0b00000110010010000010100000000000, "tlbsrch">;
-def TLBRD : FmtI32<0b00000110010010000010110000000000, "tlbrd">;
-def TLBWR : FmtI32<0b00000110010010000011000000000000, "tlbwr">;
-def TLBFILL : FmtI32<0b00000110010010000011010000000000, "tlbfill">;
-def TLBCLR : FmtI32<0b00000110010010000010000000000000, "tlbclr">;
-def TLBFLUSH : FmtI32<0b00000110010010000010010000000000, "tlbflush">;
-def INVTLB : FmtINVTLB<(outs), (ins GPR:$rk, GPR:$rj, uimm5:$op), "invtlb",
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
+def TLBSRCH : FmtI32<0x06482800>;
+def TLBRD : FmtI32<0x06482c00>;
+def TLBWR : FmtI32<0x06483000>;
+def TLBFILL : FmtI32<0x06483400>;
+def TLBCLR : FmtI32<0x06482000>;
+def TLBFLUSH : FmtI32<0x06482400>;
+def INVTLB : FmtINVTLB<(outs), (ins GPR:$rk, GPR:$rj, uimm5:$op),
"$op, $rj, $rk">;
+} // hasSideEffects = 1, mayLoad = 0, mayStore = 0
// Software Page Walking Instructions
-def LDDIR : Fmt2RI8<0b00000110010000, (outs GPR:$rd),
- (ins GPR:$rj, uimm8:$imm8), "lddir", "$rd, $rj, $imm8">;
-def LDPTE : FmtLDPTE<(outs), (ins GPR:$rj, uimm8:$seq), "ldpte", "$rj, $seq">;
+def LDDIR : Fmt2RI8<0x06400000, (outs GPR:$rd),
+ (ins GPR:$rj, uimm8:$imm8), "$rd, $rj, $imm8">;
+def LDPTE : FmtLDPTE<(outs), (ins GPR:$rj, uimm8:$seq), "$rj, $seq">;
// Other Miscellaneous Instructions
-def ERTN : FmtI32<0b00000110010010000011100000000000, "ertn">;
-def DBCL : MISC_I15<0b00000000001010101, "dbcl">;
-def IDLE : MISC_I15<0b00000110010010001, "idle">;
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
+def ERTN : FmtI32<0x06483800>;
+def DBCL : MISC_I15<0x002a8000>;
+def IDLE : MISC_I15<0x06488000>;
//===----------------------------------------------------------------------===//
// Privilege Intrinsics
@@ -1734,3 +2027,23 @@ def : Pat<(int_loongarch_lddir_d GPR:$rj, timm:$imm8),
def : Pat<(int_loongarch_ldpte_d GPR:$rj, timm:$imm8),
(LDPTE GPR:$rj, uimm8:$imm8)>;
} // Predicates = [IsLA64]
+
+//===----------------------------------------------------------------------===//
+// LSX Instructions
+//===----------------------------------------------------------------------===//
+include "LoongArchLSXInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// LASX Instructions
+//===----------------------------------------------------------------------===//
+include "LoongArchLASXInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// LVZ Instructions
+//===----------------------------------------------------------------------===//
+include "LoongArchLVZInstrInfo.td"
+
+//===----------------------------------------------------------------------===//
+// LBT Instructions
+//===----------------------------------------------------------------------===//
+include "LoongArchLBTInstrInfo.td"
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td
new file mode 100644
index 000000000000..ba21d68b9304
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrFormats.td
@@ -0,0 +1,459 @@
+// LoongArchLASXInstrFormats.td - LoongArch LASX Instr Formats - tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe LoongArch LASX instructions format
+//
+// opcode - operation code.
+// xd/rd/cd - destination register operand.
+// {r/x}{j/k} - source register operand.
+// immN - immediate data operand.
+//
+//===----------------------------------------------------------------------===//
+
+// 1RI13-type
+// <opcode | I13 | xd>
+class Fmt1RI13_XI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<13> imm13;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{17-5} = imm13;
+ let Inst{4-0} = xd;
+}
+
+// 2R-type
+// <opcode | xj | xd>
+class Fmt2R_XX<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> xj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = xd;
+}
+
+// <opcode | rj | xd>
+class Fmt2R_XR<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> rj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = xd;
+}
+
+// <opcode | xj | cd>
+class Fmt2R_CX<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> xj;
+ bits<3> cd;
+
+ let Inst{31-0} = op;
+ let Inst{9-5} = xj;
+ let Inst{2-0} = cd;
+}
+
+// 2RI1-type
+// <opcode | I1 | xj | xd>
+class Fmt2RI1_XXI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<1> imm1;
+ bits<5> xj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{10} = imm1;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = xd;
+}
+
+// 2RI2-type
+// <opcode | I2 | xj | xd>
+class Fmt2RI2_XXI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<2> imm2;
+ bits<5> xj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{11-10} = imm2;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = xd;
+}
+
+// <opcode | I2 | rj | xd>
+class Fmt2RI2_XRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<2> imm2;
+ bits<5> rj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{11-10} = imm2;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = xd;
+}
+
+// <opcode | I2 | xj | rd>
+class Fmt2RI2_RXI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<2> imm2;
+ bits<5> xj;
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{11-10} = imm2;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI3-type
+// <opcode | I3 | xj | xd>
+class Fmt2RI3_XXI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<3> imm3;
+ bits<5> xj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{12-10} = imm3;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = xd;
+}
+
+// <opcode | I3 | rj | xd>
+class Fmt2RI3_XRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<3> imm3;
+ bits<5> rj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{12-10} = imm3;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = xd;
+}
+
+// <opcode | I3 | xj | rd>
+class Fmt2RI3_RXI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<3> imm3;
+ bits<5> xj;
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{12-10} = imm3;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI4-type
+// <opcode | I4 | xj | xd>
+class Fmt2RI4_XXI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<4> imm4;
+ bits<5> xj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{13-10} = imm4;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = xd;
+}
+
+// <opcode | I4 | rj | xd>
+class Fmt2RI4_XRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<4> imm4;
+ bits<5> rj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{13-10} = imm4;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = xd;
+}
+
+// <opcode | I4 | xj | rd>
+class Fmt2RI4_RXI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<4> imm4;
+ bits<5> xj;
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{13-10} = imm4;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI5-type
+// <opcode | I5 | xj | xd>
+class Fmt2RI5_XXI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> imm5;
+ bits<5> xj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{14-10} = imm5;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = xd;
+}
+
+// 2RI6-type
+// <opcode | I6 | xj | xd>
+class Fmt2RI6_XXI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<6> imm6;
+ bits<5> xj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{15-10} = imm6;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = xd;
+}
+
+// 2RI7-type
+// <opcode | I7 | xj | xd>
+class Fmt2RI7_XXI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<7> imm7;
+ bits<5> xj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{16-10} = imm7;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = xd;
+}
+
+// 2RI8-type
+// <opcode | I8 | xj | xd>
+class Fmt2RI8_XXI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<8> imm8;
+ bits<5> xj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{17-10} = imm8;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = xd;
+}
+
+// 2RI8I2-type
+// <opcode | I2 | I8 | xj | xd>
+class Fmt2RI8I2_XRII<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<2> imm2;
+ bits<8> imm8;
+ bits<5> rj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{19-18} = imm2;
+ let Inst{17-10} = imm8;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = xd;
+}
+
+// 2RI8I3-type
+// <opcode | I3 | I8 | xj | xd>
+class Fmt2RI8I3_XRII<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<3> imm3;
+ bits<8> imm8;
+ bits<5> rj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{20-18} = imm3;
+ let Inst{17-10} = imm8;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = xd;
+}
+
+// 2RI8I4-type
+// <opcode | I4 | I8 | xj | xd>
+class Fmt2RI8I4_XRII<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<4> imm4;
+ bits<8> imm8;
+ bits<5> rj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{21-18} = imm4;
+ let Inst{17-10} = imm8;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = xd;
+}
+
+// 2RI8I5-type
+// <opcode | I5 | I8 | xj | xd>
+class Fmt2RI8I5_XRII<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> imm5;
+ bits<8> imm8;
+ bits<5> rj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{22-18} = imm5;
+ let Inst{17-10} = imm8;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = xd;
+}
+
+// 2RI9-type
+// <opcode | I9 | rj | xd>
+class Fmt2RI9_XRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<9> imm9;
+ bits<5> rj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{18-10} = imm9;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = xd;
+}
+
+// 2RI10-type
+// <opcode | I10 | rj | xd>
+class Fmt2RI10_XRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<10> imm10;
+ bits<5> rj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{19-10} = imm10;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = xd;
+}
+
+// 2RI11-type
+// <opcode | I11 | rj | xd>
+class Fmt2RI11_XRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<11> imm11;
+ bits<5> rj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{20-10} = imm11;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = xd;
+}
+
+// 2RI12-type
+// <opcode | I12 | rj | xd>
+class Fmt2RI12_XRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<12> imm12;
+ bits<5> rj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{21-10} = imm12;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = xd;
+}
+
+// 3R-type
+// <opcode | xk | xj | xd>
+class Fmt3R_XXX<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> xk;
+ bits<5> xj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{14-10} = xk;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = xd;
+}
+
+// <opcode | rk | xj | xd>
+class Fmt3R_XXR<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> rk;
+ bits<5> xj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{14-10} = rk;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = xd;
+}
+
+// <opcode | rk | rj | xd>
+class Fmt3R_XRR<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> rk;
+ bits<5> rj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{14-10} = rk;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = xd;
+}
+
+// 4R-type
+// <opcode | xa | xk | xj | xd>
+class Fmt4R_XXXX<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> xa;
+ bits<5> xk;
+ bits<5> xj;
+ bits<5> xd;
+
+ let Inst{31-0} = op;
+ let Inst{19-15} = xa;
+ let Inst{14-10} = xk;
+ let Inst{9-5} = xj;
+ let Inst{4-0} = xd;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
new file mode 100644
index 000000000000..dc37b37b2186
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -0,0 +1,1032 @@
+//=- LoongArchLASXInstrInfo.td - LoongArch LASX instructions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the Advanced SIMD extension instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction class templates
+//===----------------------------------------------------------------------===//
+
+class LASX1RI13_XI<bits<32> op, Operand ImmOpnd = simm13>
+ : Fmt1RI13_XI<op, (outs LASX256:$xd), (ins ImmOpnd:$imm13), "$xd, $imm13">;
+
+class LASX2R_XX<bits<32> op>
+ : Fmt2R_XX<op, (outs LASX256:$xd), (ins LASX256:$xj), "$xd, $xj">;
+
+class LASX2R_XR<bits<32> op>
+ : Fmt2R_XR<op, (outs LASX256:$xd), (ins GPR:$rj), "$xd, $rj">;
+
+class LASX2R_CX<bits<32> op>
+ : Fmt2R_CX<op, (outs CFR:$cd), (ins LASX256:$xj), "$cd, $xj">;
+
+class LASX2RI1_XXI<bits<32> op, Operand ImmOpnd = uimm1>
+ : Fmt2RI1_XXI<op, (outs LASX256:$xd), (ins LASX256:$xj, ImmOpnd:$imm1),
+ "$xd, $xj, $imm1">;
+
+class LASX2RI2_XXI<bits<32> op, Operand ImmOpnd = uimm2>
+ : Fmt2RI2_XXI<op, (outs LASX256:$xd), (ins LASX256:$xj, ImmOpnd:$imm2),
+ "$xd, $xj, $imm2">;
+
+class LASX2RI2_RXI<bits<32> op, Operand ImmOpnd = uimm2>
+ : Fmt2RI2_RXI<op, (outs GPR:$rd), (ins LASX256:$xj, ImmOpnd:$imm2),
+ "$rd, $xj, $imm2">;
+
+class LASX2RI3_XXI<bits<32> op, Operand ImmOpnd = uimm3>
+ : Fmt2RI3_XXI<op, (outs LASX256:$xd), (ins LASX256:$xj, ImmOpnd:$imm3),
+ "$xd, $xj, $imm3">;
+
+class LASX2RI3_RXI<bits<32> op, Operand ImmOpnd = uimm3>
+ : Fmt2RI3_RXI<op, (outs GPR:$rd), (ins LASX256:$xj, ImmOpnd:$imm3),
+ "$rd, $xj, $imm3">;
+
+class LASX2RI4_XXI<bits<32> op, Operand ImmOpnd = uimm4>
+ : Fmt2RI4_XXI<op, (outs LASX256:$xd), (ins LASX256:$xj, ImmOpnd:$imm4),
+ "$xd, $xj, $imm4">;
+
+class LASX2RI4_XRI<bits<32> op, Operand ImmOpnd = uimm4>
+ : Fmt2RI4_XRI<op, (outs LASX256:$xd), (ins GPR:$rj, ImmOpnd:$imm4),
+ "$xd, $rj, $imm4">;
+
+class LASX2RI4_RXI<bits<32> op, Operand ImmOpnd = uimm4>
+ : Fmt2RI4_RXI<op, (outs GPR:$rd), (ins LASX256:$xj, ImmOpnd:$imm4),
+ "$rd, $xj, $imm4">;
+
+class LASX2RI5_XXI<bits<32> op, Operand ImmOpnd = uimm5>
+ : Fmt2RI5_XXI<op, (outs LASX256:$xd), (ins LASX256:$xj, ImmOpnd:$imm5),
+ "$xd, $xj, $imm5">;
+
+class LASX2RI6_XXI<bits<32> op, Operand ImmOpnd = uimm6>
+ : Fmt2RI6_XXI<op, (outs LASX256:$xd), (ins LASX256:$xj, ImmOpnd:$imm6),
+ "$xd, $xj, $imm6">;
+
+class LASX2RI8_XXI<bits<32> op, Operand ImmOpnd = uimm8>
+ : Fmt2RI8_XXI<op, (outs LASX256:$xd), (ins LASX256:$xj, ImmOpnd:$imm8),
+ "$xd, $xj, $imm8">;
+
+class LASX2RI8I2_XRII<bits<32> op, Operand ImmOpnd = simm8,
+ Operand IdxOpnd = uimm2>
+ : Fmt2RI8I2_XRII<op, (outs),
+ (ins LASX256:$xd, GPR:$rj, ImmOpnd:$imm8, IdxOpnd:$imm2),
+ "$xd, $rj, $imm8, $imm2">;
+class LASX2RI8I3_XRII<bits<32> op, Operand ImmOpnd = simm8,
+ Operand IdxOpnd = uimm3>
+ : Fmt2RI8I3_XRII<op, (outs),
+ (ins LASX256:$xd, GPR:$rj, ImmOpnd:$imm8, IdxOpnd:$imm3),
+ "$xd, $rj, $imm8, $imm3">;
+class LASX2RI8I4_XRII<bits<32> op, Operand ImmOpnd = simm8,
+ Operand IdxOpnd = uimm4>
+ : Fmt2RI8I4_XRII<op, (outs),
+ (ins LASX256:$xd, GPR:$rj, ImmOpnd:$imm8, IdxOpnd:$imm4),
+ "$xd, $rj, $imm8, $imm4">;
+class LASX2RI8I5_XRII<bits<32> op, Operand ImmOpnd = simm8,
+ Operand IdxOpnd = uimm5>
+ : Fmt2RI8I5_XRII<op, (outs),
+ (ins LASX256:$xd, GPR:$rj, ImmOpnd:$imm8, IdxOpnd:$imm5),
+ "$xd, $rj, $imm8, $imm5">;
+
+class LASX3R_XXX<bits<32> op>
+ : Fmt3R_XXX<op, (outs LASX256:$xd), (ins LASX256:$xj, LASX256:$xk),
+ "$xd, $xj, $xk">;
+
+class LASX3R_XXR<bits<32> op>
+ : Fmt3R_XXR<op, (outs LASX256:$xd), (ins LASX256:$xj, GPR:$rk),
+ "$xd, $xj, $rk">;
+
+class LASX4R_XXXX<bits<32> op>
+ : Fmt4R_XXXX<op, (outs LASX256:$xd),
+ (ins LASX256:$xj, LASX256:$xk, LASX256:$xa),
+ "$xd, $xj, $xk, $xa">;
+
+let Constraints = "$xd = $dst" in {
+
+class LASX2RI2_XXXI<bits<32> op, Operand ImmOpnd = uimm2>
+ : Fmt2RI2_XXI<op, (outs LASX256:$dst), (ins LASX256:$xd, LASX256:$xj, ImmOpnd:$imm2),
+ "$xd, $xj, $imm2">;
+class LASX2RI3_XXXI<bits<32> op, Operand ImmOpnd = uimm3>
+ : Fmt2RI3_XXI<op, (outs LASX256:$dst), (ins LASX256:$xd, LASX256:$xj, ImmOpnd:$imm3),
+ "$xd, $xj, $imm3">;
+
+class LASX2RI2_XXRI<bits<32> op, Operand ImmOpnd = uimm2>
+ : Fmt2RI2_XRI<op, (outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, ImmOpnd:$imm2),
+ "$xd, $rj, $imm2">;
+class LASX2RI3_XXRI<bits<32> op, Operand ImmOpnd = uimm3>
+ : Fmt2RI3_XRI<op, (outs LASX256:$dst), (ins LASX256:$xd, GPR:$rj, ImmOpnd:$imm3),
+ "$xd, $rj, $imm3">;
+
+class LASX2RI4_XXXI<bits<32> op, Operand ImmOpnd = uimm4>
+ : Fmt2RI4_XXI<op, (outs LASX256:$dst), (ins LASX256:$xd, LASX256:$xj, ImmOpnd:$imm4),
+ "$xd, $xj, $imm4">;
+class LASX2RI5_XXXI<bits<32> op, Operand ImmOpnd = uimm5>
+ : Fmt2RI5_XXI<op, (outs LASX256:$dst), (ins LASX256:$xd, LASX256:$xj, ImmOpnd:$imm5),
+ "$xd, $xj, $imm5">;
+class LASX2RI6_XXXI<bits<32> op, Operand ImmOpnd = uimm6>
+ : Fmt2RI6_XXI<op, (outs LASX256:$dst), (ins LASX256:$xd, LASX256:$xj, ImmOpnd:$imm6),
+ "$xd, $xj, $imm6">;
+class LASX2RI7_XXXI<bits<32> op, Operand ImmOpnd = uimm7>
+ : Fmt2RI7_XXI<op, (outs LASX256:$dst), (ins LASX256:$xd, LASX256:$xj, ImmOpnd:$imm7),
+ "$xd, $xj, $imm7">;
+
+class LASX2RI8_XXXI<bits<32> op, Operand ImmOpnd = uimm8>
+ : Fmt2RI8_XXI<op, (outs LASX256:$dst), (ins LASX256:$xd, LASX256:$xj, ImmOpnd:$imm8),
+ "$xd, $xj, $imm8">;
+
+class LASX3R_XXXX<bits<32> op>
+ : Fmt3R_XXX<op, (outs LASX256:$dst), (ins LASX256:$xd, LASX256:$xj, LASX256:$xk),
+ "$xd, $xj, $xk">;
+
+} // Constraints = "$xd = $dst"
+
+class LASX2RI9_Load<bits<32> op, Operand ImmOpnd = simm9_lsl3>
+ : Fmt2RI9_XRI<op, (outs LASX256:$xd), (ins GPR:$rj, ImmOpnd:$imm9),
+ "$xd, $rj, $imm9">;
+class LASX2RI10_Load<bits<32> op, Operand ImmOpnd = simm10_lsl2>
+ : Fmt2RI10_XRI<op, (outs LASX256:$xd), (ins GPR:$rj, ImmOpnd:$imm10),
+ "$xd, $rj, $imm10">;
+class LASX2RI11_Load<bits<32> op, Operand ImmOpnd = simm11_lsl1>
+ : Fmt2RI11_XRI<op, (outs LASX256:$xd), (ins GPR:$rj, ImmOpnd:$imm11),
+ "$xd, $rj, $imm11">;
+class LASX2RI12_Load<bits<32> op, Operand ImmOpnd = simm12>
+ : Fmt2RI12_XRI<op, (outs LASX256:$xd), (ins GPR:$rj, ImmOpnd:$imm12),
+ "$xd, $rj, $imm12">;
+class LASX2RI12_Store<bits<32> op, Operand ImmOpnd = simm12>
+ : Fmt2RI12_XRI<op, (outs), (ins LASX256:$xd, GPR:$rj, ImmOpnd:$imm12),
+ "$xd, $rj, $imm12">;
+
+class LASX3R_Load<bits<32> op>
+ : Fmt3R_XRR<op, (outs LASX256:$xd), (ins GPR:$rj, GPR:$rk),
+ "$xd, $rj, $rk">;
+class LASX3R_Store<bits<32> op>
+ : Fmt3R_XRR<op, (outs), (ins LASX256:$xd, GPR:$rj, GPR:$rk),
+ "$xd, $rj, $rk">;
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0, Predicates = [HasExtLASX] in {
+
+let mayLoad = 0, mayStore = 0 in {
+def XVADD_B : LASX3R_XXX<0x740a0000>;
+def XVADD_H : LASX3R_XXX<0x740a8000>;
+def XVADD_W : LASX3R_XXX<0x740b0000>;
+def XVADD_D : LASX3R_XXX<0x740b8000>;
+def XVADD_Q : LASX3R_XXX<0x752d0000>;
+
+def XVSUB_B : LASX3R_XXX<0x740c0000>;
+def XVSUB_H : LASX3R_XXX<0x740c8000>;
+def XVSUB_W : LASX3R_XXX<0x740d0000>;
+def XVSUB_D : LASX3R_XXX<0x740d8000>;
+def XVSUB_Q : LASX3R_XXX<0x752d8000>;
+
+def XVADDI_BU : LASX2RI5_XXI<0x768a0000>;
+def XVADDI_HU : LASX2RI5_XXI<0x768a8000>;
+def XVADDI_WU : LASX2RI5_XXI<0x768b0000>;
+def XVADDI_DU : LASX2RI5_XXI<0x768b8000>;
+
+def XVSUBI_BU : LASX2RI5_XXI<0x768c0000>;
+def XVSUBI_HU : LASX2RI5_XXI<0x768c8000>;
+def XVSUBI_WU : LASX2RI5_XXI<0x768d0000>;
+def XVSUBI_DU : LASX2RI5_XXI<0x768d8000>;
+
+def XVNEG_B : LASX2R_XX<0x769c3000>;
+def XVNEG_H : LASX2R_XX<0x769c3400>;
+def XVNEG_W : LASX2R_XX<0x769c3800>;
+def XVNEG_D : LASX2R_XX<0x769c3c00>;
+
+def XVSADD_B : LASX3R_XXX<0x74460000>;
+def XVSADD_H : LASX3R_XXX<0x74468000>;
+def XVSADD_W : LASX3R_XXX<0x74470000>;
+def XVSADD_D : LASX3R_XXX<0x74478000>;
+def XVSADD_BU : LASX3R_XXX<0x744a0000>;
+def XVSADD_HU : LASX3R_XXX<0x744a8000>;
+def XVSADD_WU : LASX3R_XXX<0x744b0000>;
+def XVSADD_DU : LASX3R_XXX<0x744b8000>;
+
+def XVSSUB_B : LASX3R_XXX<0x74480000>;
+def XVSSUB_H : LASX3R_XXX<0x74488000>;
+def XVSSUB_W : LASX3R_XXX<0x74490000>;
+def XVSSUB_D : LASX3R_XXX<0x74498000>;
+def XVSSUB_BU : LASX3R_XXX<0x744c0000>;
+def XVSSUB_HU : LASX3R_XXX<0x744c8000>;
+def XVSSUB_WU : LASX3R_XXX<0x744d0000>;
+def XVSSUB_DU : LASX3R_XXX<0x744d8000>;
+
+def XVHADDW_H_B : LASX3R_XXX<0x74540000>;
+def XVHADDW_W_H : LASX3R_XXX<0x74548000>;
+def XVHADDW_D_W : LASX3R_XXX<0x74550000>;
+def XVHADDW_Q_D : LASX3R_XXX<0x74558000>;
+def XVHADDW_HU_BU : LASX3R_XXX<0x74580000>;
+def XVHADDW_WU_HU : LASX3R_XXX<0x74588000>;
+def XVHADDW_DU_WU : LASX3R_XXX<0x74590000>;
+def XVHADDW_QU_DU : LASX3R_XXX<0x74598000>;
+
+def XVHSUBW_H_B : LASX3R_XXX<0x74560000>;
+def XVHSUBW_W_H : LASX3R_XXX<0x74568000>;
+def XVHSUBW_D_W : LASX3R_XXX<0x74570000>;
+def XVHSUBW_Q_D : LASX3R_XXX<0x74578000>;
+def XVHSUBW_HU_BU : LASX3R_XXX<0x745a0000>;
+def XVHSUBW_WU_HU : LASX3R_XXX<0x745a8000>;
+def XVHSUBW_DU_WU : LASX3R_XXX<0x745b0000>;
+def XVHSUBW_QU_DU : LASX3R_XXX<0x745b8000>;
+
+def XVADDWEV_H_B : LASX3R_XXX<0x741e0000>;
+def XVADDWEV_W_H : LASX3R_XXX<0x741e8000>;
+def XVADDWEV_D_W : LASX3R_XXX<0x741f0000>;
+def XVADDWEV_Q_D : LASX3R_XXX<0x741f8000>;
+def XVADDWOD_H_B : LASX3R_XXX<0x74220000>;
+def XVADDWOD_W_H : LASX3R_XXX<0x74228000>;
+def XVADDWOD_D_W : LASX3R_XXX<0x74230000>;
+def XVADDWOD_Q_D : LASX3R_XXX<0x74238000>;
+
+def XVSUBWEV_H_B : LASX3R_XXX<0x74200000>;
+def XVSUBWEV_W_H : LASX3R_XXX<0x74208000>;
+def XVSUBWEV_D_W : LASX3R_XXX<0x74210000>;
+def XVSUBWEV_Q_D : LASX3R_XXX<0x74218000>;
+def XVSUBWOD_H_B : LASX3R_XXX<0x74240000>;
+def XVSUBWOD_W_H : LASX3R_XXX<0x74248000>;
+def XVSUBWOD_D_W : LASX3R_XXX<0x74250000>;
+def XVSUBWOD_Q_D : LASX3R_XXX<0x74258000>;
+
+def XVADDWEV_H_BU : LASX3R_XXX<0x742e0000>;
+def XVADDWEV_W_HU : LASX3R_XXX<0x742e8000>;
+def XVADDWEV_D_WU : LASX3R_XXX<0x742f0000>;
+def XVADDWEV_Q_DU : LASX3R_XXX<0x742f8000>;
+def XVADDWOD_H_BU : LASX3R_XXX<0x74320000>;
+def XVADDWOD_W_HU : LASX3R_XXX<0x74328000>;
+def XVADDWOD_D_WU : LASX3R_XXX<0x74330000>;
+def XVADDWOD_Q_DU : LASX3R_XXX<0x74338000>;
+
+def XVSUBWEV_H_BU : LASX3R_XXX<0x74300000>;
+def XVSUBWEV_W_HU : LASX3R_XXX<0x74308000>;
+def XVSUBWEV_D_WU : LASX3R_XXX<0x74310000>;
+def XVSUBWEV_Q_DU : LASX3R_XXX<0x74318000>;
+def XVSUBWOD_H_BU : LASX3R_XXX<0x74340000>;
+def XVSUBWOD_W_HU : LASX3R_XXX<0x74348000>;
+def XVSUBWOD_D_WU : LASX3R_XXX<0x74350000>;
+def XVSUBWOD_Q_DU : LASX3R_XXX<0x74358000>;
+
+def XVADDWEV_H_BU_B : LASX3R_XXX<0x743e0000>;
+def XVADDWEV_W_HU_H : LASX3R_XXX<0x743e8000>;
+def XVADDWEV_D_WU_W : LASX3R_XXX<0x743f0000>;
+def XVADDWEV_Q_DU_D : LASX3R_XXX<0x743f8000>;
+def XVADDWOD_H_BU_B : LASX3R_XXX<0x74400000>;
+def XVADDWOD_W_HU_H : LASX3R_XXX<0x74408000>;
+def XVADDWOD_D_WU_W : LASX3R_XXX<0x74410000>;
+def XVADDWOD_Q_DU_D : LASX3R_XXX<0x74418000>;
+
+def XVAVG_B : LASX3R_XXX<0x74640000>;
+def XVAVG_H : LASX3R_XXX<0x74648000>;
+def XVAVG_W : LASX3R_XXX<0x74650000>;
+def XVAVG_D : LASX3R_XXX<0x74658000>;
+def XVAVG_BU : LASX3R_XXX<0x74660000>;
+def XVAVG_HU : LASX3R_XXX<0x74668000>;
+def XVAVG_WU : LASX3R_XXX<0x74670000>;
+def XVAVG_DU : LASX3R_XXX<0x74678000>;
+def XVAVGR_B : LASX3R_XXX<0x74680000>;
+def XVAVGR_H : LASX3R_XXX<0x74688000>;
+def XVAVGR_W : LASX3R_XXX<0x74690000>;
+def XVAVGR_D : LASX3R_XXX<0x74698000>;
+def XVAVGR_BU : LASX3R_XXX<0x746a0000>;
+def XVAVGR_HU : LASX3R_XXX<0x746a8000>;
+def XVAVGR_WU : LASX3R_XXX<0x746b0000>;
+def XVAVGR_DU : LASX3R_XXX<0x746b8000>;
+
+def XVABSD_B : LASX3R_XXX<0x74600000>;
+def XVABSD_H : LASX3R_XXX<0x74608000>;
+def XVABSD_W : LASX3R_XXX<0x74610000>;
+def XVABSD_D : LASX3R_XXX<0x74618000>;
+def XVABSD_BU : LASX3R_XXX<0x74620000>;
+def XVABSD_HU : LASX3R_XXX<0x74628000>;
+def XVABSD_WU : LASX3R_XXX<0x74630000>;
+def XVABSD_DU : LASX3R_XXX<0x74638000>;
+
+def XVADDA_B : LASX3R_XXX<0x745c0000>;
+def XVADDA_H : LASX3R_XXX<0x745c8000>;
+def XVADDA_W : LASX3R_XXX<0x745d0000>;
+def XVADDA_D : LASX3R_XXX<0x745d8000>;
+
+def XVMAX_B : LASX3R_XXX<0x74700000>;
+def XVMAX_H : LASX3R_XXX<0x74708000>;
+def XVMAX_W : LASX3R_XXX<0x74710000>;
+def XVMAX_D : LASX3R_XXX<0x74718000>;
+def XVMAXI_B : LASX2RI5_XXI<0x76900000, simm5>;
+def XVMAXI_H : LASX2RI5_XXI<0x76908000, simm5>;
+def XVMAXI_W : LASX2RI5_XXI<0x76910000, simm5>;
+def XVMAXI_D : LASX2RI5_XXI<0x76918000, simm5>;
+def XVMAX_BU : LASX3R_XXX<0x74740000>;
+def XVMAX_HU : LASX3R_XXX<0x74748000>;
+def XVMAX_WU : LASX3R_XXX<0x74750000>;
+def XVMAX_DU : LASX3R_XXX<0x74758000>;
+def XVMAXI_BU : LASX2RI5_XXI<0x76940000>;
+def XVMAXI_HU : LASX2RI5_XXI<0x76948000>;
+def XVMAXI_WU : LASX2RI5_XXI<0x76950000>;
+def XVMAXI_DU : LASX2RI5_XXI<0x76958000>;
+
+def XVMIN_B : LASX3R_XXX<0x74720000>;
+def XVMIN_H : LASX3R_XXX<0x74728000>;
+def XVMIN_W : LASX3R_XXX<0x74730000>;
+def XVMIN_D : LASX3R_XXX<0x74738000>;
+def XVMINI_B : LASX2RI5_XXI<0x76920000, simm5>;
+def XVMINI_H : LASX2RI5_XXI<0x76928000, simm5>;
+def XVMINI_W : LASX2RI5_XXI<0x76930000, simm5>;
+def XVMINI_D : LASX2RI5_XXI<0x76938000, simm5>;
+def XVMIN_BU : LASX3R_XXX<0x74760000>;
+def XVMIN_HU : LASX3R_XXX<0x74768000>;
+def XVMIN_WU : LASX3R_XXX<0x74770000>;
+def XVMIN_DU : LASX3R_XXX<0x74778000>;
+def XVMINI_BU : LASX2RI5_XXI<0x76960000>;
+def XVMINI_HU : LASX2RI5_XXI<0x76968000>;
+def XVMINI_WU : LASX2RI5_XXI<0x76970000>;
+def XVMINI_DU : LASX2RI5_XXI<0x76978000>;
+
+def XVMUL_B : LASX3R_XXX<0x74840000>;
+def XVMUL_H : LASX3R_XXX<0x74848000>;
+def XVMUL_W : LASX3R_XXX<0x74850000>;
+def XVMUL_D : LASX3R_XXX<0x74858000>;
+
+def XVMUH_B : LASX3R_XXX<0x74860000>;
+def XVMUH_H : LASX3R_XXX<0x74868000>;
+def XVMUH_W : LASX3R_XXX<0x74870000>;
+def XVMUH_D : LASX3R_XXX<0x74878000>;
+def XVMUH_BU : LASX3R_XXX<0x74880000>;
+def XVMUH_HU : LASX3R_XXX<0x74888000>;
+def XVMUH_WU : LASX3R_XXX<0x74890000>;
+def XVMUH_DU : LASX3R_XXX<0x74898000>;
+
+def XVMULWEV_H_B : LASX3R_XXX<0x74900000>;
+def XVMULWEV_W_H : LASX3R_XXX<0x74908000>;
+def XVMULWEV_D_W : LASX3R_XXX<0x74910000>;
+def XVMULWEV_Q_D : LASX3R_XXX<0x74918000>;
+def XVMULWOD_H_B : LASX3R_XXX<0x74920000>;
+def XVMULWOD_W_H : LASX3R_XXX<0x74928000>;
+def XVMULWOD_D_W : LASX3R_XXX<0x74930000>;
+def XVMULWOD_Q_D : LASX3R_XXX<0x74938000>;
+def XVMULWEV_H_BU : LASX3R_XXX<0x74980000>;
+def XVMULWEV_W_HU : LASX3R_XXX<0x74988000>;
+def XVMULWEV_D_WU : LASX3R_XXX<0x74990000>;
+def XVMULWEV_Q_DU : LASX3R_XXX<0x74998000>;
+def XVMULWOD_H_BU : LASX3R_XXX<0x749a0000>;
+def XVMULWOD_W_HU : LASX3R_XXX<0x749a8000>;
+def XVMULWOD_D_WU : LASX3R_XXX<0x749b0000>;
+def XVMULWOD_Q_DU : LASX3R_XXX<0x749b8000>;
+def XVMULWEV_H_BU_B : LASX3R_XXX<0x74a00000>;
+def XVMULWEV_W_HU_H : LASX3R_XXX<0x74a08000>;
+def XVMULWEV_D_WU_W : LASX3R_XXX<0x74a10000>;
+def XVMULWEV_Q_DU_D : LASX3R_XXX<0x74a18000>;
+def XVMULWOD_H_BU_B : LASX3R_XXX<0x74a20000>;
+def XVMULWOD_W_HU_H : LASX3R_XXX<0x74a28000>;
+def XVMULWOD_D_WU_W : LASX3R_XXX<0x74a30000>;
+def XVMULWOD_Q_DU_D : LASX3R_XXX<0x74a38000>;
+
+def XVMADD_B : LASX3R_XXXX<0x74a80000>;
+def XVMADD_H : LASX3R_XXXX<0x74a88000>;
+def XVMADD_W : LASX3R_XXXX<0x74a90000>;
+def XVMADD_D : LASX3R_XXXX<0x74a98000>;
+
+def XVMSUB_B : LASX3R_XXXX<0x74aa0000>;
+def XVMSUB_H : LASX3R_XXXX<0x74aa8000>;
+def XVMSUB_W : LASX3R_XXXX<0x74ab0000>;
+def XVMSUB_D : LASX3R_XXXX<0x74ab8000>;
+
+def XVMADDWEV_H_B : LASX3R_XXXX<0x74ac0000>;
+def XVMADDWEV_W_H : LASX3R_XXXX<0x74ac8000>;
+def XVMADDWEV_D_W : LASX3R_XXXX<0x74ad0000>;
+def XVMADDWEV_Q_D : LASX3R_XXXX<0x74ad8000>;
+def XVMADDWOD_H_B : LASX3R_XXXX<0x74ae0000>;
+def XVMADDWOD_W_H : LASX3R_XXXX<0x74ae8000>;
+def XVMADDWOD_D_W : LASX3R_XXXX<0x74af0000>;
+def XVMADDWOD_Q_D : LASX3R_XXXX<0x74af8000>;
+def XVMADDWEV_H_BU : LASX3R_XXXX<0x74b40000>;
+def XVMADDWEV_W_HU : LASX3R_XXXX<0x74b48000>;
+def XVMADDWEV_D_WU : LASX3R_XXXX<0x74b50000>;
+def XVMADDWEV_Q_DU : LASX3R_XXXX<0x74b58000>;
+def XVMADDWOD_H_BU : LASX3R_XXXX<0x74b60000>;
+def XVMADDWOD_W_HU : LASX3R_XXXX<0x74b68000>;
+def XVMADDWOD_D_WU : LASX3R_XXXX<0x74b70000>;
+def XVMADDWOD_Q_DU : LASX3R_XXXX<0x74b78000>;
+def XVMADDWEV_H_BU_B : LASX3R_XXXX<0x74bc0000>;
+def XVMADDWEV_W_HU_H : LASX3R_XXXX<0x74bc8000>;
+def XVMADDWEV_D_WU_W : LASX3R_XXXX<0x74bd0000>;
+def XVMADDWEV_Q_DU_D : LASX3R_XXXX<0x74bd8000>;
+def XVMADDWOD_H_BU_B : LASX3R_XXXX<0x74be0000>;
+def XVMADDWOD_W_HU_H : LASX3R_XXXX<0x74be8000>;
+def XVMADDWOD_D_WU_W : LASX3R_XXXX<0x74bf0000>;
+def XVMADDWOD_Q_DU_D : LASX3R_XXXX<0x74bf8000>;
+
+def XVDIV_B : LASX3R_XXX<0x74e00000>;
+def XVDIV_H : LASX3R_XXX<0x74e08000>;
+def XVDIV_W : LASX3R_XXX<0x74e10000>;
+def XVDIV_D : LASX3R_XXX<0x74e18000>;
+def XVDIV_BU : LASX3R_XXX<0x74e40000>;
+def XVDIV_HU : LASX3R_XXX<0x74e48000>;
+def XVDIV_WU : LASX3R_XXX<0x74e50000>;
+def XVDIV_DU : LASX3R_XXX<0x74e58000>;
+
+def XVMOD_B : LASX3R_XXX<0x74e20000>;
+def XVMOD_H : LASX3R_XXX<0x74e28000>;
+def XVMOD_W : LASX3R_XXX<0x74e30000>;
+def XVMOD_D : LASX3R_XXX<0x74e38000>;
+def XVMOD_BU : LASX3R_XXX<0x74e60000>;
+def XVMOD_HU : LASX3R_XXX<0x74e68000>;
+def XVMOD_WU : LASX3R_XXX<0x74e70000>;
+def XVMOD_DU : LASX3R_XXX<0x74e78000>;
+
+def XVSAT_B : LASX2RI3_XXI<0x77242000>;
+def XVSAT_H : LASX2RI4_XXI<0x77244000>;
+def XVSAT_W : LASX2RI5_XXI<0x77248000>;
+def XVSAT_D : LASX2RI6_XXI<0x77250000>;
+def XVSAT_BU : LASX2RI3_XXI<0x77282000>;
+def XVSAT_HU : LASX2RI4_XXI<0x77284000>;
+def XVSAT_WU : LASX2RI5_XXI<0x77288000>;
+def XVSAT_DU : LASX2RI6_XXI<0x77290000>;
+
+def XVEXTH_H_B : LASX2R_XX<0x769ee000>;
+def XVEXTH_W_H : LASX2R_XX<0x769ee400>;
+def XVEXTH_D_W : LASX2R_XX<0x769ee800>;
+def XVEXTH_Q_D : LASX2R_XX<0x769eec00>;
+def XVEXTH_HU_BU : LASX2R_XX<0x769ef000>;
+def XVEXTH_WU_HU : LASX2R_XX<0x769ef400>;
+def XVEXTH_DU_WU : LASX2R_XX<0x769ef800>;
+def XVEXTH_QU_DU : LASX2R_XX<0x769efc00>;
+
+def VEXT2XV_H_B : LASX2R_XX<0x769f1000>;
+def VEXT2XV_W_B : LASX2R_XX<0x769f1400>;
+def VEXT2XV_D_B : LASX2R_XX<0x769f1800>;
+def VEXT2XV_W_H : LASX2R_XX<0x769f1c00>;
+def VEXT2XV_D_H : LASX2R_XX<0x769f2000>;
+def VEXT2XV_D_W : LASX2R_XX<0x769f2400>;
+def VEXT2XV_HU_BU : LASX2R_XX<0x769f2800>;
+def VEXT2XV_WU_BU : LASX2R_XX<0x769f2c00>;
+def VEXT2XV_DU_BU : LASX2R_XX<0x769f3000>;
+def VEXT2XV_WU_HU : LASX2R_XX<0x769f3400>;
+def VEXT2XV_DU_HU : LASX2R_XX<0x769f3800>;
+def VEXT2XV_DU_WU : LASX2R_XX<0x769f3c00>;
+
+def XVHSELI_D : LASX2RI5_XXI<0x769f8000>;
+
+def XVSIGNCOV_B : LASX3R_XXX<0x752e0000>;
+def XVSIGNCOV_H : LASX3R_XXX<0x752e8000>;
+def XVSIGNCOV_W : LASX3R_XXX<0x752f0000>;
+def XVSIGNCOV_D : LASX3R_XXX<0x752f8000>;
+
+def XVMSKLTZ_B : LASX2R_XX<0x769c4000>;
+def XVMSKLTZ_H : LASX2R_XX<0x769c4400>;
+def XVMSKLTZ_W : LASX2R_XX<0x769c4800>;
+def XVMSKLTZ_D : LASX2R_XX<0x769c4c00>;
+
+def XVMSKGEZ_B : LASX2R_XX<0x769c5000>;
+
+def XVMSKNZ_B : LASX2R_XX<0x769c6000>;
+
+def XVLDI : LASX1RI13_XI<0x77e00000>;
+
+def XVAND_V : LASX3R_XXX<0x75260000>;
+def XVOR_V : LASX3R_XXX<0x75268000>;
+def XVXOR_V : LASX3R_XXX<0x75270000>;
+def XVNOR_V : LASX3R_XXX<0x75278000>;
+def XVANDN_V : LASX3R_XXX<0x75280000>;
+def XVORN_V : LASX3R_XXX<0x75288000>;
+
+def XVANDI_B : LASX2RI8_XXI<0x77d00000>;
+def XVORI_B : LASX2RI8_XXI<0x77d40000>;
+def XVXORI_B : LASX2RI8_XXI<0x77d80000>;
+def XVNORI_B : LASX2RI8_XXI<0x77dc0000>;
+
+def XVSLL_B : LASX3R_XXX<0x74e80000>;
+def XVSLL_H : LASX3R_XXX<0x74e88000>;
+def XVSLL_W : LASX3R_XXX<0x74e90000>;
+def XVSLL_D : LASX3R_XXX<0x74e98000>;
+def XVSLLI_B : LASX2RI3_XXI<0x772c2000>;
+def XVSLLI_H : LASX2RI4_XXI<0x772c4000>;
+def XVSLLI_W : LASX2RI5_XXI<0x772c8000>;
+def XVSLLI_D : LASX2RI6_XXI<0x772d0000>;
+
+def XVSRL_B : LASX3R_XXX<0x74ea0000>;
+def XVSRL_H : LASX3R_XXX<0x74ea8000>;
+def XVSRL_W : LASX3R_XXX<0x74eb0000>;
+def XVSRL_D : LASX3R_XXX<0x74eb8000>;
+def XVSRLI_B : LASX2RI3_XXI<0x77302000>;
+def XVSRLI_H : LASX2RI4_XXI<0x77304000>;
+def XVSRLI_W : LASX2RI5_XXI<0x77308000>;
+def XVSRLI_D : LASX2RI6_XXI<0x77310000>;
+
+def XVSRA_B : LASX3R_XXX<0x74ec0000>;
+def XVSRA_H : LASX3R_XXX<0x74ec8000>;
+def XVSRA_W : LASX3R_XXX<0x74ed0000>;
+def XVSRA_D : LASX3R_XXX<0x74ed8000>;
+def XVSRAI_B : LASX2RI3_XXI<0x77342000>;
+def XVSRAI_H : LASX2RI4_XXI<0x77344000>;
+def XVSRAI_W : LASX2RI5_XXI<0x77348000>;
+def XVSRAI_D : LASX2RI6_XXI<0x77350000>;
+
+def XVROTR_B : LASX3R_XXX<0x74ee0000>;
+def XVROTR_H : LASX3R_XXX<0x74ee8000>;
+def XVROTR_W : LASX3R_XXX<0x74ef0000>;
+def XVROTR_D : LASX3R_XXX<0x74ef8000>;
+def XVROTRI_B : LASX2RI3_XXI<0x76a02000>;
+def XVROTRI_H : LASX2RI4_XXI<0x76a04000>;
+def XVROTRI_W : LASX2RI5_XXI<0x76a08000>;
+def XVROTRI_D : LASX2RI6_XXI<0x76a10000>;
+
+def XVSLLWIL_H_B : LASX2RI3_XXI<0x77082000>;
+def XVSLLWIL_W_H : LASX2RI4_XXI<0x77084000>;
+def XVSLLWIL_D_W : LASX2RI5_XXI<0x77088000>;
+def XVEXTL_Q_D : LASX2R_XX<0x77090000>;
+def XVSLLWIL_HU_BU : LASX2RI3_XXI<0x770c2000>;
+def XVSLLWIL_WU_HU : LASX2RI4_XXI<0x770c4000>;
+def XVSLLWIL_DU_WU : LASX2RI5_XXI<0x770c8000>;
+def XVEXTL_QU_DU : LASX2R_XX<0x770d0000>;
+
+def XVSRLR_B : LASX3R_XXX<0x74f00000>;
+def XVSRLR_H : LASX3R_XXX<0x74f08000>;
+def XVSRLR_W : LASX3R_XXX<0x74f10000>;
+def XVSRLR_D : LASX3R_XXX<0x74f18000>;
+def XVSRLRI_B : LASX2RI3_XXI<0x76a42000>;
+def XVSRLRI_H : LASX2RI4_XXI<0x76a44000>;
+def XVSRLRI_W : LASX2RI5_XXI<0x76a48000>;
+def XVSRLRI_D : LASX2RI6_XXI<0x76a50000>;
+
+def XVSRAR_B : LASX3R_XXX<0x74f20000>;
+def XVSRAR_H : LASX3R_XXX<0x74f28000>;
+def XVSRAR_W : LASX3R_XXX<0x74f30000>;
+def XVSRAR_D : LASX3R_XXX<0x74f38000>;
+def XVSRARI_B : LASX2RI3_XXI<0x76a82000>;
+def XVSRARI_H : LASX2RI4_XXI<0x76a84000>;
+def XVSRARI_W : LASX2RI5_XXI<0x76a88000>;
+def XVSRARI_D : LASX2RI6_XXI<0x76a90000>;
+
+def XVSRLN_B_H : LASX3R_XXX<0x74f48000>;
+def XVSRLN_H_W : LASX3R_XXX<0x74f50000>;
+def XVSRLN_W_D : LASX3R_XXX<0x74f58000>;
+def XVSRAN_B_H : LASX3R_XXX<0x74f68000>;
+def XVSRAN_H_W : LASX3R_XXX<0x74f70000>;
+def XVSRAN_W_D : LASX3R_XXX<0x74f78000>;
+
+def XVSRLNI_B_H : LASX2RI4_XXXI<0x77404000>;
+def XVSRLNI_H_W : LASX2RI5_XXXI<0x77408000>;
+def XVSRLNI_W_D : LASX2RI6_XXXI<0x77410000>;
+def XVSRLNI_D_Q : LASX2RI7_XXXI<0x77420000>;
+def XVSRANI_B_H : LASX2RI4_XXXI<0x77584000>;
+def XVSRANI_H_W : LASX2RI5_XXXI<0x77588000>;
+def XVSRANI_W_D : LASX2RI6_XXXI<0x77590000>;
+def XVSRANI_D_Q : LASX2RI7_XXXI<0x775a0000>;
+
+def XVSRLRN_B_H : LASX3R_XXX<0x74f88000>;
+def XVSRLRN_H_W : LASX3R_XXX<0x74f90000>;
+def XVSRLRN_W_D : LASX3R_XXX<0x74f98000>;
+def XVSRARN_B_H : LASX3R_XXX<0x74fa8000>;
+def XVSRARN_H_W : LASX3R_XXX<0x74fb0000>;
+def XVSRARN_W_D : LASX3R_XXX<0x74fb8000>;
+
+def XVSRLRNI_B_H : LASX2RI4_XXXI<0x77444000>;
+def XVSRLRNI_H_W : LASX2RI5_XXXI<0x77448000>;
+def XVSRLRNI_W_D : LASX2RI6_XXXI<0x77450000>;
+def XVSRLRNI_D_Q : LASX2RI7_XXXI<0x77460000>;
+def XVSRARNI_B_H : LASX2RI4_XXXI<0x775c4000>;
+def XVSRARNI_H_W : LASX2RI5_XXXI<0x775c8000>;
+def XVSRARNI_W_D : LASX2RI6_XXXI<0x775d0000>;
+def XVSRARNI_D_Q : LASX2RI7_XXXI<0x775e0000>;
+
+def XVSSRLN_B_H : LASX3R_XXX<0x74fc8000>;
+def XVSSRLN_H_W : LASX3R_XXX<0x74fd0000>;
+def XVSSRLN_W_D : LASX3R_XXX<0x74fd8000>;
+def XVSSRAN_B_H : LASX3R_XXX<0x74fe8000>;
+def XVSSRAN_H_W : LASX3R_XXX<0x74ff0000>;
+def XVSSRAN_W_D : LASX3R_XXX<0x74ff8000>;
+def XVSSRLN_BU_H : LASX3R_XXX<0x75048000>;
+def XVSSRLN_HU_W : LASX3R_XXX<0x75050000>;
+def XVSSRLN_WU_D : LASX3R_XXX<0x75058000>;
+def XVSSRAN_BU_H : LASX3R_XXX<0x75068000>;
+def XVSSRAN_HU_W : LASX3R_XXX<0x75070000>;
+def XVSSRAN_WU_D : LASX3R_XXX<0x75078000>;
+
+def XVSSRLNI_B_H : LASX2RI4_XXXI<0x77484000>;
+def XVSSRLNI_H_W : LASX2RI5_XXXI<0x77488000>;
+def XVSSRLNI_W_D : LASX2RI6_XXXI<0x77490000>;
+def XVSSRLNI_D_Q : LASX2RI7_XXXI<0x774a0000>;
+def XVSSRANI_B_H : LASX2RI4_XXXI<0x77604000>;
+def XVSSRANI_H_W : LASX2RI5_XXXI<0x77608000>;
+def XVSSRANI_W_D : LASX2RI6_XXXI<0x77610000>;
+def XVSSRANI_D_Q : LASX2RI7_XXXI<0x77620000>;
+def XVSSRLNI_BU_H : LASX2RI4_XXXI<0x774c4000>;
+def XVSSRLNI_HU_W : LASX2RI5_XXXI<0x774c8000>;
+def XVSSRLNI_WU_D : LASX2RI6_XXXI<0x774d0000>;
+def XVSSRLNI_DU_Q : LASX2RI7_XXXI<0x774e0000>;
+def XVSSRANI_BU_H : LASX2RI4_XXXI<0x77644000>;
+def XVSSRANI_HU_W : LASX2RI5_XXXI<0x77648000>;
+def XVSSRANI_WU_D : LASX2RI6_XXXI<0x77650000>;
+def XVSSRANI_DU_Q : LASX2RI7_XXXI<0x77660000>;
+
+def XVSSRLRN_B_H : LASX3R_XXX<0x75008000>;
+def XVSSRLRN_H_W : LASX3R_XXX<0x75010000>;
+def XVSSRLRN_W_D : LASX3R_XXX<0x75018000>;
+def XVSSRARN_B_H : LASX3R_XXX<0x75028000>;
+def XVSSRARN_H_W : LASX3R_XXX<0x75030000>;
+def XVSSRARN_W_D : LASX3R_XXX<0x75038000>;
+def XVSSRLRN_BU_H : LASX3R_XXX<0x75088000>;
+def XVSSRLRN_HU_W : LASX3R_XXX<0x75090000>;
+def XVSSRLRN_WU_D : LASX3R_XXX<0x75098000>;
+def XVSSRARN_BU_H : LASX3R_XXX<0x750a8000>;
+def XVSSRARN_HU_W : LASX3R_XXX<0x750b0000>;
+def XVSSRARN_WU_D : LASX3R_XXX<0x750b8000>;
+
+def XVSSRLRNI_B_H : LASX2RI4_XXXI<0x77504000>;
+def XVSSRLRNI_H_W : LASX2RI5_XXXI<0x77508000>;
+def XVSSRLRNI_W_D : LASX2RI6_XXXI<0x77510000>;
+def XVSSRLRNI_D_Q : LASX2RI7_XXXI<0x77520000>;
+def XVSSRARNI_B_H : LASX2RI4_XXXI<0x77684000>;
+def XVSSRARNI_H_W : LASX2RI5_XXXI<0x77688000>;
+def XVSSRARNI_W_D : LASX2RI6_XXXI<0x77690000>;
+def XVSSRARNI_D_Q : LASX2RI7_XXXI<0x776a0000>;
+def XVSSRLRNI_BU_H : LASX2RI4_XXXI<0x77544000>;
+def XVSSRLRNI_HU_W : LASX2RI5_XXXI<0x77548000>;
+def XVSSRLRNI_WU_D : LASX2RI6_XXXI<0x77550000>;
+def XVSSRLRNI_DU_Q : LASX2RI7_XXXI<0x77560000>;
+def XVSSRARNI_BU_H : LASX2RI4_XXXI<0x776c4000>;
+def XVSSRARNI_HU_W : LASX2RI5_XXXI<0x776c8000>;
+def XVSSRARNI_WU_D : LASX2RI6_XXXI<0x776d0000>;
+def XVSSRARNI_DU_Q : LASX2RI7_XXXI<0x776e0000>;
+
+def XVCLO_B : LASX2R_XX<0x769c0000>;
+def XVCLO_H : LASX2R_XX<0x769c0400>;
+def XVCLO_W : LASX2R_XX<0x769c0800>;
+def XVCLO_D : LASX2R_XX<0x769c0c00>;
+def XVCLZ_B : LASX2R_XX<0x769c1000>;
+def XVCLZ_H : LASX2R_XX<0x769c1400>;
+def XVCLZ_W : LASX2R_XX<0x769c1800>;
+def XVCLZ_D : LASX2R_XX<0x769c1c00>;
+
+def XVPCNT_B : LASX2R_XX<0x769c2000>;
+def XVPCNT_H : LASX2R_XX<0x769c2400>;
+def XVPCNT_W : LASX2R_XX<0x769c2800>;
+def XVPCNT_D : LASX2R_XX<0x769c2c00>;
+
+def XVBITCLR_B : LASX3R_XXX<0x750c0000>;
+def XVBITCLR_H : LASX3R_XXX<0x750c8000>;
+def XVBITCLR_W : LASX3R_XXX<0x750d0000>;
+def XVBITCLR_D : LASX3R_XXX<0x750d8000>;
+def XVBITCLRI_B : LASX2RI3_XXI<0x77102000>;
+def XVBITCLRI_H : LASX2RI4_XXI<0x77104000>;
+def XVBITCLRI_W : LASX2RI5_XXI<0x77108000>;
+def XVBITCLRI_D : LASX2RI6_XXI<0x77110000>;
+
+def XVBITSET_B : LASX3R_XXX<0x750e0000>;
+def XVBITSET_H : LASX3R_XXX<0x750e8000>;
+def XVBITSET_W : LASX3R_XXX<0x750f0000>;
+def XVBITSET_D : LASX3R_XXX<0x750f8000>;
+def XVBITSETI_B : LASX2RI3_XXI<0x77142000>;
+def XVBITSETI_H : LASX2RI4_XXI<0x77144000>;
+def XVBITSETI_W : LASX2RI5_XXI<0x77148000>;
+def XVBITSETI_D : LASX2RI6_XXI<0x77150000>;
+
+def XVBITREV_B : LASX3R_XXX<0x75100000>;
+def XVBITREV_H : LASX3R_XXX<0x75108000>;
+def XVBITREV_W : LASX3R_XXX<0x75110000>;
+def XVBITREV_D : LASX3R_XXX<0x75118000>;
+def XVBITREVI_B : LASX2RI3_XXI<0x77182000>;
+def XVBITREVI_H : LASX2RI4_XXI<0x77184000>;
+def XVBITREVI_W : LASX2RI5_XXI<0x77188000>;
+def XVBITREVI_D : LASX2RI6_XXI<0x77190000>;
+
+def XVFRSTP_B : LASX3R_XXXX<0x752b0000>;
+def XVFRSTP_H : LASX3R_XXXX<0x752b8000>;
+def XVFRSTPI_B : LASX2RI5_XXXI<0x769a0000>;
+def XVFRSTPI_H : LASX2RI5_XXXI<0x769a8000>;
+
+def XVFADD_S : LASX3R_XXX<0x75308000>;
+def XVFADD_D : LASX3R_XXX<0x75310000>;
+def XVFSUB_S : LASX3R_XXX<0x75328000>;
+def XVFSUB_D : LASX3R_XXX<0x75330000>;
+def XVFMUL_S : LASX3R_XXX<0x75388000>;
+def XVFMUL_D : LASX3R_XXX<0x75390000>;
+def XVFDIV_S : LASX3R_XXX<0x753a8000>;
+def XVFDIV_D : LASX3R_XXX<0x753b0000>;
+
+def XVFMADD_S : LASX4R_XXXX<0x0a100000>;
+def XVFMADD_D : LASX4R_XXXX<0x0a200000>;
+def XVFMSUB_S : LASX4R_XXXX<0x0a500000>;
+def XVFMSUB_D : LASX4R_XXXX<0x0a600000>;
+def XVFNMADD_S : LASX4R_XXXX<0x0a900000>;
+def XVFNMADD_D : LASX4R_XXXX<0x0aa00000>;
+def XVFNMSUB_S : LASX4R_XXXX<0x0ad00000>;
+def XVFNMSUB_D : LASX4R_XXXX<0x0ae00000>;
+
+def XVFMAX_S : LASX3R_XXX<0x753c8000>;
+def XVFMAX_D : LASX3R_XXX<0x753d0000>;
+def XVFMIN_S : LASX3R_XXX<0x753e8000>;
+def XVFMIN_D : LASX3R_XXX<0x753f0000>;
+
+def XVFMAXA_S : LASX3R_XXX<0x75408000>;
+def XVFMAXA_D : LASX3R_XXX<0x75410000>;
+def XVFMINA_S : LASX3R_XXX<0x75428000>;
+def XVFMINA_D : LASX3R_XXX<0x75430000>;
+
+def XVFLOGB_S : LASX2R_XX<0x769cc400>;
+def XVFLOGB_D : LASX2R_XX<0x769cc800>;
+
+def XVFCLASS_S : LASX2R_XX<0x769cd400>;
+def XVFCLASS_D : LASX2R_XX<0x769cd800>;
+
+def XVFSQRT_S : LASX2R_XX<0x769ce400>;
+def XVFSQRT_D : LASX2R_XX<0x769ce800>;
+def XVFRECIP_S : LASX2R_XX<0x769cf400>;
+def XVFRECIP_D : LASX2R_XX<0x769cf800>;
+def XVFRSQRT_S : LASX2R_XX<0x769d0400>;
+def XVFRSQRT_D : LASX2R_XX<0x769d0800>;
+
+def XVFCVTL_S_H : LASX2R_XX<0x769de800>;
+def XVFCVTH_S_H : LASX2R_XX<0x769dec00>;
+def XVFCVTL_D_S : LASX2R_XX<0x769df000>;
+def XVFCVTH_D_S : LASX2R_XX<0x769df400>;
+def XVFCVT_H_S : LASX3R_XXX<0x75460000>;
+def XVFCVT_S_D : LASX3R_XXX<0x75468000>;
+
+def XVFRINTRNE_S : LASX2R_XX<0x769d7400>;
+def XVFRINTRNE_D : LASX2R_XX<0x769d7800>;
+def XVFRINTRZ_S : LASX2R_XX<0x769d6400>;
+def XVFRINTRZ_D : LASX2R_XX<0x769d6800>;
+def XVFRINTRP_S : LASX2R_XX<0x769d5400>;
+def XVFRINTRP_D : LASX2R_XX<0x769d5800>;
+def XVFRINTRM_S : LASX2R_XX<0x769d4400>;
+def XVFRINTRM_D : LASX2R_XX<0x769d4800>;
+def XVFRINT_S : LASX2R_XX<0x769d3400>;
+def XVFRINT_D : LASX2R_XX<0x769d3800>;
+
+def XVFTINTRNE_W_S : LASX2R_XX<0x769e5000>;
+def XVFTINTRNE_L_D : LASX2R_XX<0x769e5400>;
+def XVFTINTRZ_W_S : LASX2R_XX<0x769e4800>;
+def XVFTINTRZ_L_D : LASX2R_XX<0x769e4c00>;
+def XVFTINTRP_W_S : LASX2R_XX<0x769e4000>;
+def XVFTINTRP_L_D : LASX2R_XX<0x769e4400>;
+def XVFTINTRM_W_S : LASX2R_XX<0x769e3800>;
+def XVFTINTRM_L_D : LASX2R_XX<0x769e3c00>;
+def XVFTINT_W_S : LASX2R_XX<0x769e3000>;
+def XVFTINT_L_D : LASX2R_XX<0x769e3400>;
+def XVFTINTRZ_WU_S : LASX2R_XX<0x769e7000>;
+def XVFTINTRZ_LU_D : LASX2R_XX<0x769e7400>;
+def XVFTINT_WU_S : LASX2R_XX<0x769e5800>;
+def XVFTINT_LU_D : LASX2R_XX<0x769e5c00>;
+
+def XVFTINTRNE_W_D : LASX3R_XXX<0x754b8000>;
+def XVFTINTRZ_W_D : LASX3R_XXX<0x754b0000>;
+def XVFTINTRP_W_D : LASX3R_XXX<0x754a8000>;
+def XVFTINTRM_W_D : LASX3R_XXX<0x754a0000>;
+def XVFTINT_W_D : LASX3R_XXX<0x75498000>;
+
+def XVFTINTRNEL_L_S : LASX2R_XX<0x769ea000>;
+def XVFTINTRNEH_L_S : LASX2R_XX<0x769ea400>;
+def XVFTINTRZL_L_S : LASX2R_XX<0x769e9800>;
+def XVFTINTRZH_L_S : LASX2R_XX<0x769e9c00>;
+def XVFTINTRPL_L_S : LASX2R_XX<0x769e9000>;
+def XVFTINTRPH_L_S : LASX2R_XX<0x769e9400>;
+def XVFTINTRML_L_S : LASX2R_XX<0x769e8800>;
+def XVFTINTRMH_L_S : LASX2R_XX<0x769e8c00>;
+def XVFTINTL_L_S : LASX2R_XX<0x769e8000>;
+def XVFTINTH_L_S : LASX2R_XX<0x769e8400>;
+
+def XVFFINT_S_W : LASX2R_XX<0x769e0000>;
+def XVFFINT_D_L : LASX2R_XX<0x769e0800>;
+def XVFFINT_S_WU : LASX2R_XX<0x769e0400>;
+def XVFFINT_D_LU : LASX2R_XX<0x769e0c00>;
+def XVFFINTL_D_W : LASX2R_XX<0x769e1000>;
+def XVFFINTH_D_W : LASX2R_XX<0x769e1400>;
+def XVFFINT_S_L : LASX3R_XXX<0x75480000>;
+
+def XVSEQ_B : LASX3R_XXX<0x74000000>;
+def XVSEQ_H : LASX3R_XXX<0x74008000>;
+def XVSEQ_W : LASX3R_XXX<0x74010000>;
+def XVSEQ_D : LASX3R_XXX<0x74018000>;
+def XVSEQI_B : LASX2RI5_XXI<0x76800000, simm5>;
+def XVSEQI_H : LASX2RI5_XXI<0x76808000, simm5>;
+def XVSEQI_W : LASX2RI5_XXI<0x76810000, simm5>;
+def XVSEQI_D : LASX2RI5_XXI<0x76818000, simm5>;
+
+def XVSLE_B : LASX3R_XXX<0x74020000>;
+def XVSLE_H : LASX3R_XXX<0x74028000>;
+def XVSLE_W : LASX3R_XXX<0x74030000>;
+def XVSLE_D : LASX3R_XXX<0x74038000>;
+def XVSLEI_B : LASX2RI5_XXI<0x76820000, simm5>;
+def XVSLEI_H : LASX2RI5_XXI<0x76828000, simm5>;
+def XVSLEI_W : LASX2RI5_XXI<0x76830000, simm5>;
+def XVSLEI_D : LASX2RI5_XXI<0x76838000, simm5>;
+
+def XVSLE_BU : LASX3R_XXX<0x74040000>;
+def XVSLE_HU : LASX3R_XXX<0x74048000>;
+def XVSLE_WU : LASX3R_XXX<0x74050000>;
+def XVSLE_DU : LASX3R_XXX<0x74058000>;
+def XVSLEI_BU : LASX2RI5_XXI<0x76840000>;
+def XVSLEI_HU : LASX2RI5_XXI<0x76848000>;
+def XVSLEI_WU : LASX2RI5_XXI<0x76850000>;
+def XVSLEI_DU : LASX2RI5_XXI<0x76858000>;
+
+def XVSLT_B : LASX3R_XXX<0x74060000>;
+def XVSLT_H : LASX3R_XXX<0x74068000>;
+def XVSLT_W : LASX3R_XXX<0x74070000>;
+def XVSLT_D : LASX3R_XXX<0x74078000>;
+def XVSLTI_B : LASX2RI5_XXI<0x76860000, simm5>;
+def XVSLTI_H : LASX2RI5_XXI<0x76868000, simm5>;
+def XVSLTI_W : LASX2RI5_XXI<0x76870000, simm5>;
+def XVSLTI_D : LASX2RI5_XXI<0x76878000, simm5>;
+
+def XVSLT_BU : LASX3R_XXX<0x74080000>;
+def XVSLT_HU : LASX3R_XXX<0x74088000>;
+def XVSLT_WU : LASX3R_XXX<0x74090000>;
+def XVSLT_DU : LASX3R_XXX<0x74098000>;
+def XVSLTI_BU : LASX2RI5_XXI<0x76880000>;
+def XVSLTI_HU : LASX2RI5_XXI<0x76888000>;
+def XVSLTI_WU : LASX2RI5_XXI<0x76890000>;
+def XVSLTI_DU : LASX2RI5_XXI<0x76898000>;
+
+def XVFCMP_CAF_S : LASX3R_XXX<0x0c900000>;
+def XVFCMP_SAF_S : LASX3R_XXX<0x0c908000>;
+def XVFCMP_CLT_S : LASX3R_XXX<0x0c910000>;
+def XVFCMP_SLT_S : LASX3R_XXX<0x0c918000>;
+def XVFCMP_CEQ_S : LASX3R_XXX<0x0c920000>;
+def XVFCMP_SEQ_S : LASX3R_XXX<0x0c928000>;
+def XVFCMP_CLE_S : LASX3R_XXX<0x0c930000>;
+def XVFCMP_SLE_S : LASX3R_XXX<0x0c938000>;
+def XVFCMP_CUN_S : LASX3R_XXX<0x0c940000>;
+def XVFCMP_SUN_S : LASX3R_XXX<0x0c948000>;
+def XVFCMP_CULT_S : LASX3R_XXX<0x0c950000>;
+def XVFCMP_SULT_S : LASX3R_XXX<0x0c958000>;
+def XVFCMP_CUEQ_S : LASX3R_XXX<0x0c960000>;
+def XVFCMP_SUEQ_S : LASX3R_XXX<0x0c968000>;
+def XVFCMP_CULE_S : LASX3R_XXX<0x0c970000>;
+def XVFCMP_SULE_S : LASX3R_XXX<0x0c978000>;
+def XVFCMP_CNE_S : LASX3R_XXX<0x0c980000>;
+def XVFCMP_SNE_S : LASX3R_XXX<0x0c988000>;
+def XVFCMP_COR_S : LASX3R_XXX<0x0c9a0000>;
+def XVFCMP_SOR_S : LASX3R_XXX<0x0c9a8000>;
+def XVFCMP_CUNE_S : LASX3R_XXX<0x0c9c0000>;
+def XVFCMP_SUNE_S : LASX3R_XXX<0x0c9c8000>;
+
+def XVFCMP_CAF_D : LASX3R_XXX<0x0ca00000>;
+def XVFCMP_SAF_D : LASX3R_XXX<0x0ca08000>;
+def XVFCMP_CLT_D : LASX3R_XXX<0x0ca10000>;
+def XVFCMP_SLT_D : LASX3R_XXX<0x0ca18000>;
+def XVFCMP_CEQ_D : LASX3R_XXX<0x0ca20000>;
+def XVFCMP_SEQ_D : LASX3R_XXX<0x0ca28000>;
+def XVFCMP_CLE_D : LASX3R_XXX<0x0ca30000>;
+def XVFCMP_SLE_D : LASX3R_XXX<0x0ca38000>;
+def XVFCMP_CUN_D : LASX3R_XXX<0x0ca40000>;
+def XVFCMP_SUN_D : LASX3R_XXX<0x0ca48000>;
+def XVFCMP_CULT_D : LASX3R_XXX<0x0ca50000>;
+def XVFCMP_SULT_D : LASX3R_XXX<0x0ca58000>;
+def XVFCMP_CUEQ_D : LASX3R_XXX<0x0ca60000>;
+def XVFCMP_SUEQ_D : LASX3R_XXX<0x0ca68000>;
+def XVFCMP_CULE_D : LASX3R_XXX<0x0ca70000>;
+def XVFCMP_SULE_D : LASX3R_XXX<0x0ca78000>;
+def XVFCMP_CNE_D : LASX3R_XXX<0x0ca80000>;
+def XVFCMP_SNE_D : LASX3R_XXX<0x0ca88000>;
+def XVFCMP_COR_D : LASX3R_XXX<0x0caa0000>;
+def XVFCMP_SOR_D : LASX3R_XXX<0x0caa8000>;
+def XVFCMP_CUNE_D : LASX3R_XXX<0x0cac0000>;
+def XVFCMP_SUNE_D : LASX3R_XXX<0x0cac8000>;
+
+def XVBITSEL_V : LASX4R_XXXX<0x0d200000>;
+
+def XVBITSELI_B : LASX2RI8_XXXI<0x77c40000>;
+
+def XVSETEQZ_V : LASX2R_CX<0x769c9800>;
+def XVSETNEZ_V : LASX2R_CX<0x769c9c00>;
+def XVSETANYEQZ_B : LASX2R_CX<0x769ca000>;
+def XVSETANYEQZ_H : LASX2R_CX<0x769ca400>;
+def XVSETANYEQZ_W : LASX2R_CX<0x769ca800>;
+def XVSETANYEQZ_D : LASX2R_CX<0x769cac00>;
+def XVSETALLNEZ_B : LASX2R_CX<0x769cb000>;
+def XVSETALLNEZ_H : LASX2R_CX<0x769cb400>;
+def XVSETALLNEZ_W : LASX2R_CX<0x769cb800>;
+def XVSETALLNEZ_D : LASX2R_CX<0x769cbc00>;
+
+def XVINSGR2VR_W : LASX2RI3_XXRI<0x76ebc000>;
+def XVINSGR2VR_D : LASX2RI2_XXRI<0x76ebe000>;
+def XVPICKVE2GR_W : LASX2RI3_RXI<0x76efc000>;
+def XVPICKVE2GR_D : LASX2RI2_RXI<0x76efe000>;
+def XVPICKVE2GR_WU : LASX2RI3_RXI<0x76f3c000>;
+def XVPICKVE2GR_DU : LASX2RI2_RXI<0x76f3e000>;
+
+def XVREPLGR2VR_B : LASX2R_XR<0x769f0000>;
+def XVREPLGR2VR_H : LASX2R_XR<0x769f0400>;
+def XVREPLGR2VR_W : LASX2R_XR<0x769f0800>;
+def XVREPLGR2VR_D : LASX2R_XR<0x769f0c00>;
+
+def XVREPLVE_B : LASX3R_XXR<0x75220000>;
+def XVREPLVE_H : LASX3R_XXR<0x75228000>;
+def XVREPLVE_W : LASX3R_XXR<0x75230000>;
+def XVREPLVE_D : LASX3R_XXR<0x75238000>;
+def XVREPL128VEI_B : LASX2RI4_XXI<0x76f78000>;
+def XVREPL128VEI_H : LASX2RI3_XXI<0x76f7c000>;
+def XVREPL128VEI_W : LASX2RI2_XXI<0x76f7e000>;
+def XVREPL128VEI_D : LASX2RI1_XXI<0x76f7f000>;
+
+def XVREPLVE0_B : LASX2R_XX<0x77070000>;
+def XVREPLVE0_H : LASX2R_XX<0x77078000>;
+def XVREPLVE0_W : LASX2R_XX<0x7707c000>;
+def XVREPLVE0_D : LASX2R_XX<0x7707e000>;
+def XVREPLVE0_Q : LASX2R_XX<0x7707f000>;
+
+def XVINSVE0_W : LASX2RI3_XXXI<0x76ffc000>;
+def XVINSVE0_D : LASX2RI2_XXXI<0x76ffe000>;
+
+def XVPICKVE_W : LASX2RI3_XXI<0x7703c000>;
+def XVPICKVE_D : LASX2RI2_XXI<0x7703e000>;
+
+def XVBSLL_V : LASX2RI5_XXI<0x768e0000>;
+def XVBSRL_V : LASX2RI5_XXI<0x768e8000>;
+
+def XVPACKEV_B : LASX3R_XXX<0x75160000>;
+def XVPACKEV_H : LASX3R_XXX<0x75168000>;
+def XVPACKEV_W : LASX3R_XXX<0x75170000>;
+def XVPACKEV_D : LASX3R_XXX<0x75178000>;
+def XVPACKOD_B : LASX3R_XXX<0x75180000>;
+def XVPACKOD_H : LASX3R_XXX<0x75188000>;
+def XVPACKOD_W : LASX3R_XXX<0x75190000>;
+def XVPACKOD_D : LASX3R_XXX<0x75198000>;
+
+def XVPICKEV_B : LASX3R_XXX<0x751e0000>;
+def XVPICKEV_H : LASX3R_XXX<0x751e8000>;
+def XVPICKEV_W : LASX3R_XXX<0x751f0000>;
+def XVPICKEV_D : LASX3R_XXX<0x751f8000>;
+def XVPICKOD_B : LASX3R_XXX<0x75200000>;
+def XVPICKOD_H : LASX3R_XXX<0x75208000>;
+def XVPICKOD_W : LASX3R_XXX<0x75210000>;
+def XVPICKOD_D : LASX3R_XXX<0x75218000>;
+
+def XVILVL_B : LASX3R_XXX<0x751a0000>;
+def XVILVL_H : LASX3R_XXX<0x751a8000>;
+def XVILVL_W : LASX3R_XXX<0x751b0000>;
+def XVILVL_D : LASX3R_XXX<0x751b8000>;
+def XVILVH_B : LASX3R_XXX<0x751c0000>;
+def XVILVH_H : LASX3R_XXX<0x751c8000>;
+def XVILVH_W : LASX3R_XXX<0x751d0000>;
+def XVILVH_D : LASX3R_XXX<0x751d8000>;
+
+def XVSHUF_B : LASX4R_XXXX<0x0d600000>;
+
+def XVSHUF_H : LASX3R_XXXX<0x757a8000>;
+def XVSHUF_W : LASX3R_XXXX<0x757b0000>;
+def XVSHUF_D : LASX3R_XXXX<0x757b8000>;
+
+def XVPERM_W : LASX3R_XXX<0x757d0000>;
+
+def XVSHUF4I_B : LASX2RI8_XXI<0x77900000>;
+def XVSHUF4I_H : LASX2RI8_XXI<0x77940000>;
+def XVSHUF4I_W : LASX2RI8_XXI<0x77980000>;
+def XVSHUF4I_D : LASX2RI8_XXXI<0x779c0000>;
+
+def XVPERMI_W : LASX2RI8_XXXI<0x77e40000>;
+def XVPERMI_D : LASX2RI8_XXI<0x77e80000>;
+def XVPERMI_Q : LASX2RI8_XXXI<0x77ec0000>;
+
+def XVEXTRINS_D : LASX2RI8_XXXI<0x77800000>;
+def XVEXTRINS_W : LASX2RI8_XXXI<0x77840000>;
+def XVEXTRINS_H : LASX2RI8_XXXI<0x77880000>;
+def XVEXTRINS_B : LASX2RI8_XXXI<0x778c0000>;
+} // mayLoad = 0, mayStore = 0
+
+let mayLoad = 1, mayStore = 0 in {
+def XVLD : LASX2RI12_Load<0x2c800000>;
+def XVLDX : LASX3R_Load<0x38480000>;
+
+def XVLDREPL_B : LASX2RI12_Load<0x32800000>;
+def XVLDREPL_H : LASX2RI11_Load<0x32400000>;
+def XVLDREPL_W : LASX2RI10_Load<0x32200000>;
+def XVLDREPL_D : LASX2RI9_Load<0x32100000>;
+} // mayLoad = 1, mayStore = 0
+
+let mayLoad = 0, mayStore = 1 in {
+def XVST : LASX2RI12_Store<0x2cc00000>;
+def XVSTX : LASX3R_Store<0x384c0000>;
+
+def XVSTELM_B : LASX2RI8I5_XRII<0x33800000>;
+def XVSTELM_H : LASX2RI8I4_XRII<0x33400000, simm8_lsl1>;
+def XVSTELM_W : LASX2RI8I3_XRII<0x33200000, simm8_lsl2>;
+def XVSTELM_D : LASX2RI8I2_XRII<0x33100000, simm8_lsl3>;
+} // mayLoad = 0, mayStore = 1
+
+} // hasSideEffects = 0, Predicates = [HasExtLASX]
+
+/// Pseudo-instructions
+
+let Predicates = [HasExtLASX] in {
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
+ isAsmParserOnly = 1 in {
+def PseudoXVREPLI_B : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [],
+ "xvrepli.b", "$xd, $imm">;
+def PseudoXVREPLI_H : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [],
+ "xvrepli.h", "$xd, $imm">;
+def PseudoXVREPLI_W : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [],
+ "xvrepli.w", "$xd, $imm">;
+def PseudoXVREPLI_D : Pseudo<(outs LASX256:$xd), (ins simm10:$imm), [],
+ "xvrepli.d", "$xd, $imm">;
+}
+
+} // Predicates = [HasExtLASX]
diff --git a/llvm/lib/Target/LoongArch/LoongArchLBTInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchLBTInstrFormats.td
new file mode 100644
index 000000000000..2faee056e191
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchLBTInstrFormats.td
@@ -0,0 +1,256 @@
+// LoongArchLBTInstrFormats.td - LoongArch LBT Instr Formats -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe LoongArch LBT instructions format
+//
+// opcode - operation code.
+// rd/sd - destination register operand.
+// rj/rk/sj - source register operand.
+// immN/ptr - immediate data operand.
+//
+// Note: The definition of "NoDstFmt..." conveys the meaning of no explicit
+// output operand. In other words, there will be no output operand in the
+// assembly notation of these instructions. In fact, they always manipulate
+// the "EFLAGS" register.
+// Since these instructions are currently not used for code generation,
+// we do not need to add `let Defs/Uses = [EFLAGS]`.
+//===----------------------------------------------------------------------===//
+
+// 1R-type (no outs)
+// <opcode | rj>
+class NoDstFmt1R<bits<32> op>
+ : LAInst<(outs), (ins GPR:$rj),
+ deriveInsnMnemonic<NAME>.ret, "$rj"> {
+ bits<5> rj;
+
+ let Inst{31-0} = op;
+ let Inst{9-5} = rj;
+}
+
+// 1RI3-type (no outs)
+// <opcode | I3 | rj>
+class NoDstFmt1RI3<bits<32> op>
+ : LAInst<(outs), (ins GPR:$rj, uimm3:$imm3),
+ deriveInsnMnemonic<NAME>.ret, "$rj, $imm3"> {
+ bits<3> imm3;
+ bits<5> rj;
+
+ let Inst{31-0} = op;
+ let Inst{12-10} = imm3;
+ let Inst{9-5} = rj;
+}
+
+// 1RI4-type (no outs)
+// <opcode | I4 | rj>
+class NoDstFmt1RI4<bits<32> op>
+ : LAInst<(outs), (ins GPR:$rj, uimm4:$imm4),
+ deriveInsnMnemonic<NAME>.ret, "$rj, $imm4"> {
+ bits<4> imm4;
+ bits<5> rj;
+
+ let Inst{31-0} = op;
+ let Inst{13-10} = imm4;
+ let Inst{9-5} = rj;
+}
+
+// 1RI4-type
+// <opcode | I4 | rd>
+class Fmt1RI4<bits<32> op>
+ : LAInst<(outs GPR:$rd), (ins uimm4:$imm4),
+ deriveInsnMnemonic<NAME>.ret, "$rd, $imm4"> {
+ bits<4> imm4;
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{13-10} = imm4;
+ let Inst{4-0} = rd;
+}
+
+// 1RI5-type (no outs)
+// <opcode | I5 | rj>
+class NoDstFmt1RI5<bits<32> op>
+ : LAInst<(outs), (ins GPR:$rj, uimm5:$imm5),
+ deriveInsnMnemonic<NAME>.ret, "$rj, $imm5"> {
+ bits<5> imm5;
+ bits<5> rj;
+
+ let Inst{31-0} = op;
+ let Inst{14-10} = imm5;
+ let Inst{9-5} = rj;
+}
+
+// 1RI5I4-type (no outs)
+// <opcode | rd | I5 | I4>
+class NoDstFmt1RI5I4<bits<32> op>
+ : LAInst<(outs), (ins GPR:$rj, uimm5:$imm5, uimm4:$imm4),
+ deriveInsnMnemonic<NAME>.ret, "$rj, $imm5, $imm4"> {
+ bits<5> imm5;
+ bits<5> rj;
+ bits<4> imm4;
+
+ let Inst{31-0} = op;
+ let Inst{14-10} = imm5;
+ let Inst{9-5} = rj;
+ let Inst{3-0} = imm4;
+}
+
+// 1RI5I8-type
+// <opcode | rd | I5 | I8>
+class Fmt1RI5I8<bits<32> op>
+ : LAInst<(outs GPR:$rd), (ins uimm5:$imm5, uimm8:$imm8),
+ deriveInsnMnemonic<NAME>.ret, "$rd, $imm5, $imm8"> {
+ bits<8> imm8;
+ bits<5> imm5;
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{17-10} = imm8;
+ let Inst{9-5} = imm5;
+ let Inst{4-0} = rd;
+}
+
+// 1RI6-type (no outs)
+// <opcode | I6 | rj>
+class NoDstFmt1RI6<bits<32> op>
+ : LAInst<(outs), (ins GPR:$rj, uimm6:$imm6),
+ deriveInsnMnemonic<NAME>.ret, "$rj, $imm6"> {
+ bits<6> imm6;
+ bits<5> rj;
+
+ let Inst{31-0} = op;
+ let Inst{15-10} = imm6;
+ let Inst{9-5} = rj;
+}
+
+// 1RI8-type
+// <opcode | I8 | rd>
+class Fmt1RI8<bits<32> op>
+ : LAInst<(outs GPR:$rd), (ins uimm8:$imm8),
+ deriveInsnMnemonic<NAME>.ret, "$rd, $imm8"> {
+ bits<8> imm8;
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{17-10} = imm8;
+ let Inst{4-0} = rd;
+}
+
+// 2R-type (no outs)
+// <opcode | rk | rj>
+class NoDstFmt2R<bits<32> op>
+ : LAInst<(outs), (ins GPR:$rj, GPR:$rk),
+ deriveInsnMnemonic<NAME>.ret, "$rj, $rk"> {
+ bits<5> rk;
+ bits<5> rj;
+
+ let Inst{31-0} = op;
+ let Inst{14-10} = rk;
+ let Inst{9-5} = rj;
+}
+
+// 2RI4-type (no outs)
+// <opcode | rk | rj | imm4>
+class NoDstFmt2RI4<bits<32> op>
+ : LAInst<(outs), (ins GPR:$rj, GPR:$rk, uimm4:$imm4),
+ deriveInsnMnemonic<NAME>.ret, "$rj, $rk, $imm4"> {
+ bits<4> imm4;
+ bits<5> rk;
+ bits<5> rj;
+
+ let Inst{31-0} = op;
+ let Inst{14-10} = rk;
+ let Inst{9-5} = rj;
+ let Inst{3-0} = imm4;
+}
+
+// 2RI3-type
+// <opcode | I3 | rj | rd>
+class Fmt2RI3<bits<32> op>
+ : LAInst<(outs GPR:$rd), (ins GPR:$rj, uimm3:$imm3),
+ deriveInsnMnemonic<NAME>.ret, "$rd, $rj, $imm3"> {
+ bits<3> imm3;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{12-10} = imm3;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI4-type
+// <opcode | I4 | rj | rd>
+class Fmt2RI4<bits<32> op>
+ : LAInst<(outs GPR:$rd), (ins GPR:$rj, uimm4:$imm4),
+ deriveInsnMnemonic<NAME>.ret, "$rd, $rj, $imm4"> {
+ bits<4> imm4;
+ bits<5> rj;
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{13-10} = imm4;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = rd;
+}
+
+// <opcode | rj | sd>
+class FmtGR2SCR<bits<32> op>
+ : LAInst<(outs SCR:$sd), (ins GPR:$rj), deriveInsnMnemonic<NAME>.ret,
+ "$sd, $rj"> {
+ bits<5> rj;
+ bits<2> sd;
+
+ let Inst{31-0} = op;
+ let Inst{9-5} = rj;
+ let Inst{1-0} = sd;
+}
+
+// <opcode | sj | rd>
+class FmtSCR2GR<bits<32> op>
+ : LAInst<(outs GPR:$rd), (ins SCR:$sj), deriveInsnMnemonic<NAME>.ret,
+ "$rd, $sj"> {
+ bits<2> sj;
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{6-5} = sj;
+ let Inst{4-0} = rd;
+}
+
+// <opcode | I21[15:0] | I21[20:16]>
+class FmtJISCR<bits<32> op>
+ : LAInst<(outs), (ins simm21_lsl2:$imm21), deriveInsnMnemonic<NAME>.ret,
+ "$imm21"> {
+ bits<21> imm21;
+ bits<5> rj;
+
+ let Inst{31-0} = op;
+ let Inst{25-10} = imm21{15-0};
+ let Inst{4-0} = imm21{20-16};
+}
+
+// <opcode | rd>
+class FmtMFTOP<bits<32> op>
+ : LAInst<(outs GPR:$rd), (ins), deriveInsnMnemonic<NAME>.ret,
+ "$rd"> {
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{4-0} = rd;
+}
+
+// <opcode | ptr>
+class FmtMTTOP<bits<32> op>
+ : LAInst<(outs), (ins uimm3:$ptr), deriveInsnMnemonic<NAME>.ret,
+ "$ptr"> {
+ bits<3> ptr;
+
+ let Inst{31-0} = op;
+ let Inst{7-5} = ptr;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLBTInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLBTInstrInfo.td
new file mode 100644
index 000000000000..76e92701691d
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchLBTInstrInfo.td
@@ -0,0 +1,241 @@
+//===- LoongArchLBTInstrInfo.td - LoongArch LBT instructions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the LBT extension instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Predicates = [HasExtLBT] in {
+
+def MOVGR2SCR : FmtGR2SCR<0x00000800>;
+def MOVSCR2GR : FmtSCR2GR<0x00000c00>;
+
+def JISCR0 : FmtJISCR<0x48000200>;
+def JISCR1 : FmtJISCR<0x48000300>;
+
+def ADDU12I_W : ALU_2RI5<0x00290000, simm5>;
+
+def ADC_B : ALU_3R<0x00300000>;
+def ADC_H : ALU_3R<0x00308000>;
+def ADC_W : ALU_3R<0x00310000>;
+
+def SBC_B : ALU_3R<0x00320000>;
+def SBC_H : ALU_3R<0x00328000>;
+def SBC_W : ALU_3R<0x00330000>;
+
+def ROTR_B : ALU_3R<0x001a0000>;
+def ROTR_H : ALU_3R<0x001a8000>;
+
+def ROTRI_B : Fmt2RI3<0x004c2000>;
+def ROTRI_H : Fmt2RI4<0x004c4000>;
+
+def RCR_B : ALU_3R<0x00340000>;
+def RCR_H : ALU_3R<0x00348000>;
+def RCR_W : ALU_3R<0x00350000>;
+
+def RCRI_B : Fmt2RI3<0x00502000>;
+def RCRI_H : Fmt2RI4<0x00504000>;
+def RCRI_W : ALU_2RI5<0x00508000, uimm5>;
+
+def FCVT_UD_D : FP_CONV<0x0114e400>;
+def FCVT_LD_D : FP_CONV<0x0114e000>;
+def FCVT_D_LD : FP_ALU_3R<0x01150000>;
+
+let mayLoad = 1 in {
+def LDL_W : LOAD_2RI12<0x2e000000>;
+def LDR_W : LOAD_2RI12<0x2e400000>;
+} // mayLoad = 1
+
+let mayStore = 1 in {
+def STL_W : STORE_2RI12<0x2f000000>;
+def STR_W : STORE_2RI12<0x2f400000>;
+} // mayStore = 1
+
+def X86ADC_B : NoDstFmt2R<0x003f000c>;
+def X86ADC_H : NoDstFmt2R<0x003f000d>;
+def X86ADC_W : NoDstFmt2R<0x003f000e>;
+def X86ADD_B : NoDstFmt2R<0x003f0004>;
+def X86ADD_H : NoDstFmt2R<0x003f0005>;
+def X86ADD_W : NoDstFmt2R<0x003f0006>;
+
+def X86INC_B : NoDstFmt1R<0x00008000>;
+def X86INC_H : NoDstFmt1R<0x00008001>;
+def X86INC_W : NoDstFmt1R<0x00008002>;
+
+def X86SBC_B : NoDstFmt2R<0x003f0010>;
+def X86SBC_H : NoDstFmt2R<0x003f0011>;
+def X86SBC_W : NoDstFmt2R<0x003f0012>;
+def X86SUB_B : NoDstFmt2R<0x003f0008>;
+def X86SUB_H : NoDstFmt2R<0x003f0009>;
+def X86SUB_W : NoDstFmt2R<0x003f000a>;
+
+def X86DEC_B : NoDstFmt1R<0x00008004>;
+def X86DEC_H : NoDstFmt1R<0x00008005>;
+def X86DEC_W : NoDstFmt1R<0x00008006>;
+
+def X86AND_B : NoDstFmt2R<0x003f8010>;
+def X86AND_H : NoDstFmt2R<0x003f8011>;
+def X86AND_W : NoDstFmt2R<0x003f8012>;
+
+def X86OR_B : NoDstFmt2R<0x003f8014>;
+def X86OR_H : NoDstFmt2R<0x003f8015>;
+def X86OR_W : NoDstFmt2R<0x003f8016>;
+
+def X86XOR_B : NoDstFmt2R<0x003f8018>;
+def X86XOR_H : NoDstFmt2R<0x003f8019>;
+def X86XOR_W : NoDstFmt2R<0x003f801a>;
+
+def X86MUL_B : NoDstFmt2R<0x003e8000>;
+def X86MUL_H : NoDstFmt2R<0x003e8001>;
+def X86MUL_W : NoDstFmt2R<0x003e8002>;
+def X86MUL_BU : NoDstFmt2R<0x003e8004>;
+def X86MUL_HU : NoDstFmt2R<0x003e8005>;
+
+def X86RCL_B : NoDstFmt2R<0x003f800c>;
+def X86RCL_H : NoDstFmt2R<0x003f800d>;
+def X86RCL_W : NoDstFmt2R<0x003f800e>;
+def X86RCLI_B : NoDstFmt1RI3<0x00542018>;
+def X86RCLI_H : NoDstFmt1RI4<0x00544019>;
+def X86RCLI_W : NoDstFmt1RI5<0x0054801a>;
+
+def X86RCR_B : NoDstFmt2R<0x003f8008>;
+def X86RCR_H : NoDstFmt2R<0x003f8009>;
+def X86RCR_W : NoDstFmt2R<0x003f800a>;
+def X86RCRI_B : NoDstFmt1RI3<0x00542010>;
+def X86RCRI_H : NoDstFmt1RI4<0x00544011>;
+def X86RCRI_W : NoDstFmt1RI5<0x00548012>;
+
+def X86ROTL_B : NoDstFmt2R<0x003f8004>;
+def X86ROTL_H : NoDstFmt2R<0x003f8005>;
+def X86ROTL_W : NoDstFmt2R<0x003f8006>;
+def X86ROTLI_B : NoDstFmt1RI3<0x00542014>;
+def X86ROTLI_H : NoDstFmt1RI4<0x00544015>;
+def X86ROTLI_W : NoDstFmt1RI5<0x00548016>;
+
+def X86ROTR_B : NoDstFmt2R<0x003f8000>;
+def X86ROTR_H : NoDstFmt2R<0x003f8001>;
+def X86ROTR_W : NoDstFmt2R<0x003f8003>;
+def X86ROTRI_B : NoDstFmt1RI3<0x0054200c>;
+def X86ROTRI_H : NoDstFmt1RI4<0x0054400d>;
+def X86ROTRI_W : NoDstFmt1RI5<0x0054800e>;
+
+def X86SLL_B : NoDstFmt2R<0x003f0014>;
+def X86SLL_H : NoDstFmt2R<0x003f0015>;
+def X86SLL_W : NoDstFmt2R<0x003f0016>;
+def X86SLLI_B : NoDstFmt1RI3<0x00542000>;
+def X86SLLI_H : NoDstFmt1RI4<0x00544001>;
+def X86SLLI_W : NoDstFmt1RI5<0x00548002>;
+
+def X86SRL_B : NoDstFmt2R<0x003f0018>;
+def X86SRL_H : NoDstFmt2R<0x003f0019>;
+def X86SRL_W : NoDstFmt2R<0x003f001a>;
+def X86SRLI_B : NoDstFmt1RI3<0x00542004>;
+def X86SRLI_H : NoDstFmt1RI4<0x00544005>;
+def X86SRLI_W : NoDstFmt1RI5<0x00548006>;
+
+def X86SRA_B : NoDstFmt2R<0x003f001c>;
+def X86SRA_H : NoDstFmt2R<0x003f001d>;
+def X86SRA_W : NoDstFmt2R<0x003f001e>;
+def X86SRAI_B : NoDstFmt1RI3<0x00542008>;
+def X86SRAI_H : NoDstFmt1RI4<0x00544009>;
+def X86SRAI_W : NoDstFmt1RI5<0x0054800a>;
+
+def SETX86J : Fmt1RI4<0x00368000>;
+def SETX86LOOPE : ALU_2R<0x00007800>;
+def SETX86LOOPNE : ALU_2R<0x00007c00>;
+def X86MFFLAG : Fmt1RI8<0x005c0000>;
+def X86MTFLAG : Fmt1RI8<0x005c0020>;
+def X86MFTOP : FmtMFTOP<0x00007400>;
+def X86MTTOP : FmtMTTOP<0x00007000>;
+
+def X86INCTOP : FmtI32<0x00008009>;
+def X86DECTOP : FmtI32<0x00008029>;
+def X86SETTM : FmtI32<0x00008008>;
+def X86CLRTM : FmtI32<0x00008028>;
+def X86SETTAG : Fmt1RI5I8<0x00580000>;
+
+def ARMADD_W : NoDstFmt2RI4<0x00370010>;
+def ARMSUB_W : NoDstFmt2RI4<0x00378010>;
+def ARMADC_W : NoDstFmt2RI4<0x00380010>;
+def ARMSBC_W : NoDstFmt2RI4<0x00388010>;
+def ARMAND_W : NoDstFmt2RI4<0x00390010>;
+def ARMOR_W : NoDstFmt2RI4<0x00398010>;
+def ARMXOR_W : NoDstFmt2RI4<0x003a0010>;
+def ARMNOT_W : NoDstFmt1RI4<0x003fc01c>;
+def ARMSLL_W : NoDstFmt2RI4<0x003a8010>;
+def ARMSRL_W : NoDstFmt2RI4<0x003b0010>;
+def ARMSRA_W : NoDstFmt2RI4<0x003b8010>;
+def ARMROTR_W : NoDstFmt2RI4<0x003c0010>;
+def ARMSLLI_W : NoDstFmt1RI5I4<0x003c8010>;
+def ARMSRLI_W : NoDstFmt1RI5I4<0x003d0010>;
+def ARMSRAI_W : NoDstFmt1RI5I4<0x003d8010>;
+def ARMROTRI_W : NoDstFmt1RI5I4<0x003e0010>;
+def ARMRRX_W : NoDstFmt1RI4<0x003fc01f>;
+def ARMMOVE : Fmt2RI4<0x00364000>;
+def ARMMOV_W : NoDstFmt1RI4<0x003fc01d>;
+
+def ARMMFFLAG : Fmt1RI8<0x005c0040>;
+def ARMMTFLAG : Fmt1RI8<0x005c0060>;
+def SETARMJ : Fmt1RI4<0x0036c000>;
+
+let Predicates = [IsLA64] in {
+def ADDU12I_D : ALU_2RI5<0x00298000, simm5>;
+def ADC_D : ALU_3R<0x00318000>;
+def SBC_D : ALU_3R<0x00338000>;
+def RCR_D : ALU_3R<0x00358000>;
+def RCRI_D : ALU_2RI6<0x00510000, uimm6>;
+
+// mayLoad = 1
+let mayLoad = 1 in {
+def LDL_D : LOAD_2RI12<0x2e800000>;
+def LDR_D : LOAD_2RI12<0x2ec00000>;
+} // mayLoad = 1
+
+let mayStore = 1 in {
+def STL_D : STORE_2RI12<0x2f800000>;
+def STR_D : STORE_2RI12<0x2fc00000>;
+} // mayStore = 1
+
+def X86ADC_D : NoDstFmt2R<0x003f000f>;
+def X86ADD_D : NoDstFmt2R<0x003f0007>;
+def X86ADD_WU : NoDstFmt2R<0x003f0000>;
+def X86ADD_DU : NoDstFmt2R<0x003f0001>;
+def X86INC_D : NoDstFmt1R<0x00008003>;
+def X86SBC_D : NoDstFmt2R<0x003f0013>;
+def X86SUB_WU : NoDstFmt2R<0x003f0002>;
+def X86SUB_D : NoDstFmt2R<0x003f000b>;
+def X86SUB_DU : NoDstFmt2R<0x003f0003>;
+def X86DEC_D : NoDstFmt1R<0x00008007>;
+def X86AND_D : NoDstFmt2R<0x003f8013>;
+def X86OR_D : NoDstFmt2R<0x003f8017>;
+def X86XOR_D : NoDstFmt2R<0x003f801b>;
+def X86MUL_D : NoDstFmt2R<0x003e8003>;
+def X86MUL_WU : NoDstFmt2R<0x003e8006>;
+def X86MUL_DU : NoDstFmt2R<0x003e8007>;
+def X86RCL_D : NoDstFmt2R<0x003f800f>;
+def X86RCLI_D : NoDstFmt1RI6<0x0055001b>;
+def X86RCR_D : NoDstFmt2R<0x003f800b>;
+def X86RCRI_D : NoDstFmt1RI6<0x00550013>;
+def X86ROTL_D : NoDstFmt2R<0x003f8007>;
+def X86ROTLI_D : NoDstFmt1RI6<0x00550017>;
+def X86ROTR_D : NoDstFmt2R<0x003f8002>;
+def X86ROTRI_D : NoDstFmt1RI6<0x0055000f>;
+def X86SLL_D : NoDstFmt2R<0x003f0017>;
+def X86SLLI_D : NoDstFmt1RI6<0x00550003>;
+def X86SRL_D : NoDstFmt2R<0x003f001b>;
+def X86SRLI_D : NoDstFmt1RI6<0x00550007>;
+def X86SRA_D : NoDstFmt2R<0x003f001f>;
+def X86SRAI_D : NoDstFmt1RI6<0x0055000b>;
+def ARMMOV_D : NoDstFmt1RI4<0x003fc01e>;
+
+} // Predicates = [IsLA64]
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0, Predicates = [HasExtLBT]
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td
new file mode 100644
index 000000000000..843f9cbd94e7
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrFormats.td
@@ -0,0 +1,486 @@
+// LoongArchLSXInstrFormats.td - LoongArch LSX Instr Formats -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe LoongArch LSX instructions format
+//
+// opcode - operation code.
+// vd/rd/cd - destination register operand.
+// {r/v}{j/k} - source register operand.
+// immN - immediate data operand.
+//
+//===----------------------------------------------------------------------===//
+
+// 1RI13-type
+// <opcode | I13 | vd>
+class Fmt1RI13_VI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<13> imm13;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{17-5} = imm13;
+ let Inst{4-0} = vd;
+}
+
+// 2R-type
+// <opcode | vj | vd>
+class Fmt2R_VV<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> vj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = vd;
+}
+
+// <opcode | rj | vd>
+class Fmt2R_VR<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+
+// <opcode | vj | cd>
+class Fmt2R_CV<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> vj;
+ bits<3> cd;
+
+ let Inst{31-0} = op;
+ let Inst{9-5} = vj;
+ let Inst{2-0} = cd;
+}
+
+// 2RI1-type
+// <opcode | I1 | vj | vd>
+class Fmt2RI1_VVI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<1> imm1;
+ bits<5> vj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{10} = imm1;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = vd;
+}
+
+// <opcode | I1 | rj | vd>
+class Fmt2RI1_VRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<1> imm1;
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{10} = imm1;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+
+// <opcode | I1 | vj | rd>
+class Fmt2RI1_RVI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<1> imm1;
+ bits<5> vj;
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{10} = imm1;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI2-type
+// <opcode | I2 | vj | vd>
+class Fmt2RI2_VVI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<2> imm2;
+ bits<5> vj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{11-10} = imm2;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = vd;
+}
+
+// <opcode | I2 | rj | vd>
+class Fmt2RI2_VRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<2> imm2;
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{11-10} = imm2;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+
+// <opcode | I2 | vj | rd>
+class Fmt2RI2_RVI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<2> imm2;
+ bits<5> vj;
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{11-10} = imm2;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI3-type
+// <opcode | I3 | vj | vd>
+class Fmt2RI3_VVI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<3> imm3;
+ bits<5> vj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{12-10} = imm3;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = vd;
+}
+
+// <opcode | I3 | rj | vd>
+class Fmt2RI3_VRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<3> imm3;
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{12-10} = imm3;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+
+// <opcode | I3 | vj | rd>
+class Fmt2RI3_RVI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<3> imm3;
+ bits<5> vj;
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{12-10} = imm3;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI4-type
+// <opcode | I4 | vj | vd>
+class Fmt2RI4_VVI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<4> imm4;
+ bits<5> vj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{13-10} = imm4;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = vd;
+}
+
+// <opcode | I4 | rj | vd>
+class Fmt2RI4_VRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<4> imm4;
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{13-10} = imm4;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+
+// <opcode | I4 | vj | rd>
+class Fmt2RI4_RVI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<4> imm4;
+ bits<5> vj;
+ bits<5> rd;
+
+ let Inst{31-0} = op;
+ let Inst{13-10} = imm4;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = rd;
+}
+
+// 2RI5-type
+// <opcode | I5 | vj | vd>
+class Fmt2RI5_VVI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> imm5;
+ bits<5> vj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{14-10} = imm5;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = vd;
+}
+
+// 2RI6-type
+// <opcode | I6 | vj | vd>
+class Fmt2RI6_VVI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<6> imm6;
+ bits<5> vj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{15-10} = imm6;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = vd;
+}
+
+// 2RI7-type
+// <opcode | I7 | vj | vd>
+class Fmt2RI7_VVI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<7> imm7;
+ bits<5> vj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{16-10} = imm7;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = vd;
+}
+
+// 2RI8-type
+// <opcode | I8 | vj | vd>
+class Fmt2RI8_VVI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<8> imm8;
+ bits<5> vj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{17-10} = imm8;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = vd;
+}
+
+// 2RI8I1-type
+// <opcode | I1 | I8 | vj | vd>
+class Fmt2RI8I1_VRII<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<1> imm1;
+ bits<8> imm8;
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{18} = imm1;
+ let Inst{17-10} = imm8;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+
+// 2RI8I2-type
+// <opcode | I2 | I8 | vj | vd>
+class Fmt2RI8I2_VRII<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<2> imm2;
+ bits<8> imm8;
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{19-18} = imm2;
+ let Inst{17-10} = imm8;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+
+// 2RI8I3-type
+// <opcode | I3 | I8 | vj | vd>
+class Fmt2RI8I3_VRII<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<3> imm3;
+ bits<8> imm8;
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{20-18} = imm3;
+ let Inst{17-10} = imm8;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+
+// 2RI8I4-type
+// <opcode | I4 | I8 | vj | vd>
+class Fmt2RI8I4_VRII<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<4> imm4;
+ bits<8> imm8;
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{21-18} = imm4;
+ let Inst{17-10} = imm8;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+// 2RI9-type
+// <opcode | I9 | rj | vd>
+class Fmt2RI9_VRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<9> imm9;
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{18-10} = imm9;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+
+// 2RI10-type
+// <opcode | I10 | rj | vd>
+class Fmt2RI10_VRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<10> imm10;
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{19-10} = imm10;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+
+// 2RI11-type
+// <opcode | I11 | rj | vd>
+class Fmt2RI11_VRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<11> imm11;
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{20-10} = imm11;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+
+// 2RI12-type
+// <opcode | I12 | rj | vd>
+class Fmt2RI12_VRI<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<12> imm12;
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{21-10} = imm12;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+
+// 3R-type
+// <opcode | vk | vj | vd>
+class Fmt3R_VVV<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> vk;
+ bits<5> vj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{14-10} = vk;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = vd;
+}
+
+// <opcode | rk | vj | vd>
+class Fmt3R_VVR<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> rk;
+ bits<5> vj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{14-10} = rk;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = vd;
+}
+
+// <opcode | rk | rj | vd>
+class Fmt3R_VRR<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> rk;
+ bits<5> rj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{14-10} = rk;
+ let Inst{9-5} = rj;
+ let Inst{4-0} = vd;
+}
+
+// 4R-type
+// <opcode | va | vk | vj | vd>
+class Fmt4R_VVVV<bits<32> op, dag outs, dag ins, string opnstr,
+ list<dag> pattern = []>
+ : LAInst<outs, ins, deriveInsnMnemonic<NAME>.ret, opnstr, pattern> {
+ bits<5> va;
+ bits<5> vk;
+ bits<5> vj;
+ bits<5> vd;
+
+ let Inst{31-0} = op;
+ let Inst{19-15} = va;
+ let Inst{14-10} = vk;
+ let Inst{9-5} = vj;
+ let Inst{4-0} = vd;
+}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
new file mode 100644
index 000000000000..a8ed285a37cf
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -0,0 +1,1007 @@
+//===- LoongArchLSXInstrInfo.td - LoongArch LSX instructions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the SIMD extension instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction class templates
+//===----------------------------------------------------------------------===//
+
+class LSX1RI13_VI<bits<32> op, Operand ImmOpnd = simm13>
+ : Fmt1RI13_VI<op, (outs LSX128:$vd), (ins ImmOpnd:$imm13), "$vd, $imm13">;
+
+class LSX2R_VV<bits<32> op>
+ : Fmt2R_VV<op, (outs LSX128:$vd), (ins LSX128:$vj), "$vd, $vj">;
+
+class LSX2R_VR<bits<32> op>
+ : Fmt2R_VR<op, (outs LSX128:$vd), (ins GPR:$rj), "$vd, $rj">;
+
+class LSX2R_CV<bits<32> op>
+ : Fmt2R_CV<op, (outs CFR:$cd), (ins LSX128:$vj), "$cd, $vj">;
+
+class LSX2RI1_VVI<bits<32> op, Operand ImmOpnd = uimm1>
+ : Fmt2RI1_VVI<op, (outs LSX128:$vd), (ins LSX128:$vj, ImmOpnd:$imm1),
+ "$vd, $vj, $imm1">;
+
+class LSX2RI1_RVI<bits<32> op, Operand ImmOpnd = uimm1>
+ : Fmt2RI1_RVI<op, (outs GPR:$rd), (ins LSX128:$vj, ImmOpnd:$imm1),
+ "$rd, $vj, $imm1">;
+
+class LSX2RI2_VVI<bits<32> op, Operand ImmOpnd = uimm2>
+ : Fmt2RI2_VVI<op, (outs LSX128:$vd), (ins LSX128:$vj, ImmOpnd:$imm2),
+ "$vd, $vj, $imm2">;
+
+class LSX2RI2_RVI<bits<32> op, Operand ImmOpnd = uimm2>
+ : Fmt2RI2_RVI<op, (outs GPR:$rd), (ins LSX128:$vj, ImmOpnd:$imm2),
+ "$rd, $vj, $imm2">;
+
+class LSX2RI3_VVI<bits<32> op, Operand ImmOpnd = uimm3>
+ : Fmt2RI3_VVI<op, (outs LSX128:$vd), (ins LSX128:$vj, ImmOpnd:$imm3),
+ "$vd, $vj, $imm3">;
+
+class LSX2RI3_RVI<bits<32> op, Operand ImmOpnd = uimm3>
+ : Fmt2RI3_RVI<op, (outs GPR:$rd), (ins LSX128:$vj, ImmOpnd:$imm3),
+ "$rd, $vj, $imm3">;
+
+class LSX2RI4_VVI<bits<32> op, Operand ImmOpnd = uimm4>
+ : Fmt2RI4_VVI<op, (outs LSX128:$vd), (ins LSX128:$vj, ImmOpnd:$imm4),
+ "$vd, $vj, $imm4">;
+
+class LSX2RI4_RVI<bits<32> op, Operand ImmOpnd = uimm4>
+ : Fmt2RI4_RVI<op, (outs GPR:$rd), (ins LSX128:$vj, ImmOpnd:$imm4),
+ "$rd, $vj, $imm4">;
+
+class LSX2RI5_VVI<bits<32> op, Operand ImmOpnd = uimm5>
+ : Fmt2RI5_VVI<op, (outs LSX128:$vd), (ins LSX128:$vj, ImmOpnd:$imm5),
+ "$vd, $vj, $imm5">;
+
+class LSX2RI6_VVI<bits<32> op, Operand ImmOpnd = uimm6>
+ : Fmt2RI6_VVI<op, (outs LSX128:$vd), (ins LSX128:$vj, ImmOpnd:$imm6),
+ "$vd, $vj, $imm6">;
+
+class LSX2RI8_VVI<bits<32> op, Operand ImmOpnd = uimm8>
+ : Fmt2RI8_VVI<op, (outs LSX128:$vd), (ins LSX128:$vj, ImmOpnd:$imm8),
+ "$vd, $vj, $imm8">;
+
+class LSX2RI8I1_VRII<bits<32> op, Operand ImmOpnd = simm8,
+ Operand IdxOpnd = uimm1>
+ : Fmt2RI8I1_VRII<op, (outs),
+ (ins LSX128:$vd, GPR:$rj, ImmOpnd:$imm8, IdxOpnd:$imm1),
+ "$vd, $rj, $imm8, $imm1">;
+class LSX2RI8I2_VRII<bits<32> op, Operand ImmOpnd = simm8,
+ Operand IdxOpnd = uimm2>
+ : Fmt2RI8I2_VRII<op, (outs),
+ (ins LSX128:$vd, GPR:$rj, ImmOpnd:$imm8, IdxOpnd:$imm2),
+ "$vd, $rj, $imm8, $imm2">;
+class LSX2RI8I3_VRII<bits<32> op, Operand ImmOpnd = simm8,
+ Operand IdxOpnd = uimm3>
+ : Fmt2RI8I3_VRII<op, (outs),
+ (ins LSX128:$vd, GPR:$rj, ImmOpnd:$imm8, IdxOpnd:$imm3),
+ "$vd, $rj, $imm8, $imm3">;
+class LSX2RI8I4_VRII<bits<32> op, Operand ImmOpnd = simm8,
+ Operand IdxOpnd = uimm4>
+ : Fmt2RI8I4_VRII<op, (outs),
+ (ins LSX128:$vd, GPR:$rj, ImmOpnd:$imm8, IdxOpnd:$imm4),
+ "$vd, $rj, $imm8, $imm4">;
+
+class LSX3R_VVV<bits<32> op>
+ : Fmt3R_VVV<op, (outs LSX128:$vd), (ins LSX128:$vj, LSX128:$vk),
+ "$vd, $vj, $vk">;
+
+class LSX3R_VVR<bits<32> op>
+ : Fmt3R_VVR<op, (outs LSX128:$vd), (ins LSX128:$vj, GPR:$rk),
+ "$vd, $vj, $rk">;
+
+class LSX4R_VVVV<bits<32> op>
+ : Fmt4R_VVVV<op, (outs LSX128:$vd),
+ (ins LSX128:$vj, LSX128:$vk, LSX128:$va),
+ "$vd, $vj, $vk, $va">;
+
+let Constraints = "$vd = $dst" in {
+
+class LSX2RI1_VVRI<bits<32> op, Operand ImmOpnd = uimm1>
+ : Fmt2RI1_VRI<op, (outs LSX128:$dst), (ins LSX128:$vd, GPR:$rj, ImmOpnd:$imm1),
+ "$vd, $rj, $imm1">;
+class LSX2RI2_VVRI<bits<32> op, Operand ImmOpnd = uimm2>
+ : Fmt2RI2_VRI<op, (outs LSX128:$dst), (ins LSX128:$vd, GPR:$rj, ImmOpnd:$imm2),
+ "$vd, $rj, $imm2">;
+class LSX2RI3_VVRI<bits<32> op, Operand ImmOpnd = uimm3>
+ : Fmt2RI3_VRI<op, (outs LSX128:$dst), (ins LSX128:$vd, GPR:$rj, ImmOpnd:$imm3),
+ "$vd, $rj, $imm3">;
+class LSX2RI4_VVRI<bits<32> op, Operand ImmOpnd = uimm4>
+ : Fmt2RI4_VRI<op, (outs LSX128:$dst), (ins LSX128:$vd, GPR:$rj, ImmOpnd:$imm4),
+ "$vd, $rj, $imm4">;
+
+class LSX2RI4_VVVI<bits<32> op, Operand ImmOpnd = uimm4>
+ : Fmt2RI4_VVI<op, (outs LSX128:$dst), (ins LSX128:$vd, LSX128:$vj, ImmOpnd:$imm4),
+ "$vd, $vj, $imm4">;
+class LSX2RI5_VVVI<bits<32> op, Operand ImmOpnd = uimm5>
+ : Fmt2RI5_VVI<op, (outs LSX128:$dst), (ins LSX128:$vd, LSX128:$vj, ImmOpnd:$imm5),
+ "$vd, $vj, $imm5">;
+class LSX2RI6_VVVI<bits<32> op, Operand ImmOpnd = uimm6>
+ : Fmt2RI6_VVI<op, (outs LSX128:$dst), (ins LSX128:$vd, LSX128:$vj, ImmOpnd:$imm6),
+ "$vd, $vj, $imm6">;
+class LSX2RI7_VVVI<bits<32> op, Operand ImmOpnd = uimm7>
+ : Fmt2RI7_VVI<op, (outs LSX128:$dst), (ins LSX128:$vd, LSX128:$vj, ImmOpnd:$imm7),
+ "$vd, $vj, $imm7">;
+
+class LSX2RI8_VVVI<bits<32> op, Operand ImmOpnd = uimm8>
+ : Fmt2RI8_VVI<op, (outs LSX128:$dst), (ins LSX128:$vd, LSX128:$vj, ImmOpnd:$imm8),
+ "$vd, $vj, $imm8">;
+
+class LSX3R_VVVV<bits<32> op>
+ : Fmt3R_VVV<op, (outs LSX128:$dst), (ins LSX128:$vd, LSX128:$vj, LSX128:$vk),
+ "$vd, $vj, $vk">;
+
+} // Constraints = "$vd = $dst"
+
+class LSX2RI9_Load<bits<32> op, Operand ImmOpnd = simm9_lsl3>
+ : Fmt2RI9_VRI<op, (outs LSX128:$vd), (ins GPR:$rj, ImmOpnd:$imm9),
+ "$vd, $rj, $imm9">;
+class LSX2RI10_Load<bits<32> op, Operand ImmOpnd = simm10_lsl2>
+ : Fmt2RI10_VRI<op, (outs LSX128:$vd), (ins GPR:$rj, ImmOpnd:$imm10),
+ "$vd, $rj, $imm10">;
+class LSX2RI11_Load<bits<32> op, Operand ImmOpnd = simm11_lsl1>
+ : Fmt2RI11_VRI<op, (outs LSX128:$vd), (ins GPR:$rj, ImmOpnd:$imm11),
+ "$vd, $rj, $imm11">;
+class LSX2RI12_Load<bits<32> op, Operand ImmOpnd = simm12>
+ : Fmt2RI12_VRI<op, (outs LSX128:$vd), (ins GPR:$rj, ImmOpnd:$imm12),
+ "$vd, $rj, $imm12">;
+class LSX2RI12_Store<bits<32> op, Operand ImmOpnd = simm12>
+ : Fmt2RI12_VRI<op, (outs), (ins LSX128:$vd, GPR:$rj, ImmOpnd:$imm12),
+ "$vd, $rj, $imm12">;
+
+class LSX3R_Load<bits<32> op>
+ : Fmt3R_VRR<op, (outs LSX128:$vd), (ins GPR:$rj, GPR:$rk),
+ "$vd, $rj, $rk">;
+class LSX3R_Store<bits<32> op>
+ : Fmt3R_VRR<op, (outs), (ins LSX128:$vd, GPR:$rj, GPR:$rk),
+ "$vd, $rj, $rk">;
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0, Predicates = [HasExtLSX] in {
+
+let mayLoad = 0, mayStore = 0 in {
+
+def VADD_B : LSX3R_VVV<0x700a0000>;
+def VADD_H : LSX3R_VVV<0x700a8000>;
+def VADD_W : LSX3R_VVV<0x700b0000>;
+def VADD_D : LSX3R_VVV<0x700b8000>;
+def VADD_Q : LSX3R_VVV<0x712d0000>;
+
+def VSUB_B : LSX3R_VVV<0x700c0000>;
+def VSUB_H : LSX3R_VVV<0x700c8000>;
+def VSUB_W : LSX3R_VVV<0x700d0000>;
+def VSUB_D : LSX3R_VVV<0x700d8000>;
+def VSUB_Q : LSX3R_VVV<0x712d8000>;
+
+def VADDI_BU : LSX2RI5_VVI<0x728a0000>;
+def VADDI_HU : LSX2RI5_VVI<0x728a8000>;
+def VADDI_WU : LSX2RI5_VVI<0x728b0000>;
+def VADDI_DU : LSX2RI5_VVI<0x728b8000>;
+
+def VSUBI_BU : LSX2RI5_VVI<0x728c0000>;
+def VSUBI_HU : LSX2RI5_VVI<0x728c8000>;
+def VSUBI_WU : LSX2RI5_VVI<0x728d0000>;
+def VSUBI_DU : LSX2RI5_VVI<0x728d8000>;
+
+def VNEG_B : LSX2R_VV<0x729c3000>;
+def VNEG_H : LSX2R_VV<0x729c3400>;
+def VNEG_W : LSX2R_VV<0x729c3800>;
+def VNEG_D : LSX2R_VV<0x729c3c00>;
+
+def VSADD_B : LSX3R_VVV<0x70460000>;
+def VSADD_H : LSX3R_VVV<0x70468000>;
+def VSADD_W : LSX3R_VVV<0x70470000>;
+def VSADD_D : LSX3R_VVV<0x70478000>;
+def VSADD_BU : LSX3R_VVV<0x704a0000>;
+def VSADD_HU : LSX3R_VVV<0x704a8000>;
+def VSADD_WU : LSX3R_VVV<0x704b0000>;
+def VSADD_DU : LSX3R_VVV<0x704b8000>;
+
+def VSSUB_B : LSX3R_VVV<0x70480000>;
+def VSSUB_H : LSX3R_VVV<0x70488000>;
+def VSSUB_W : LSX3R_VVV<0x70490000>;
+def VSSUB_D : LSX3R_VVV<0x70498000>;
+def VSSUB_BU : LSX3R_VVV<0x704c0000>;
+def VSSUB_HU : LSX3R_VVV<0x704c8000>;
+def VSSUB_WU : LSX3R_VVV<0x704d0000>;
+def VSSUB_DU : LSX3R_VVV<0x704d8000>;
+
+def VHADDW_H_B : LSX3R_VVV<0x70540000>;
+def VHADDW_W_H : LSX3R_VVV<0x70548000>;
+def VHADDW_D_W : LSX3R_VVV<0x70550000>;
+def VHADDW_Q_D : LSX3R_VVV<0x70558000>;
+def VHADDW_HU_BU : LSX3R_VVV<0x70580000>;
+def VHADDW_WU_HU : LSX3R_VVV<0x70588000>;
+def VHADDW_DU_WU : LSX3R_VVV<0x70590000>;
+def VHADDW_QU_DU : LSX3R_VVV<0x70598000>;
+
+def VHSUBW_H_B : LSX3R_VVV<0x70560000>;
+def VHSUBW_W_H : LSX3R_VVV<0x70568000>;
+def VHSUBW_D_W : LSX3R_VVV<0x70570000>;
+def VHSUBW_Q_D : LSX3R_VVV<0x70578000>;
+def VHSUBW_HU_BU : LSX3R_VVV<0x705a0000>;
+def VHSUBW_WU_HU : LSX3R_VVV<0x705a8000>;
+def VHSUBW_DU_WU : LSX3R_VVV<0x705b0000>;
+def VHSUBW_QU_DU : LSX3R_VVV<0x705b8000>;
+
+def VADDWEV_H_B : LSX3R_VVV<0x701e0000>;
+def VADDWEV_W_H : LSX3R_VVV<0x701e8000>;
+def VADDWEV_D_W : LSX3R_VVV<0x701f0000>;
+def VADDWEV_Q_D : LSX3R_VVV<0x701f8000>;
+def VADDWOD_H_B : LSX3R_VVV<0x70220000>;
+def VADDWOD_W_H : LSX3R_VVV<0x70228000>;
+def VADDWOD_D_W : LSX3R_VVV<0x70230000>;
+def VADDWOD_Q_D : LSX3R_VVV<0x70238000>;
+
+def VSUBWEV_H_B : LSX3R_VVV<0x70200000>;
+def VSUBWEV_W_H : LSX3R_VVV<0x70208000>;
+def VSUBWEV_D_W : LSX3R_VVV<0x70210000>;
+def VSUBWEV_Q_D : LSX3R_VVV<0x70218000>;
+def VSUBWOD_H_B : LSX3R_VVV<0x70240000>;
+def VSUBWOD_W_H : LSX3R_VVV<0x70248000>;
+def VSUBWOD_D_W : LSX3R_VVV<0x70250000>;
+def VSUBWOD_Q_D : LSX3R_VVV<0x70258000>;
+
+def VADDWEV_H_BU : LSX3R_VVV<0x702e0000>;
+def VADDWEV_W_HU : LSX3R_VVV<0x702e8000>;
+def VADDWEV_D_WU : LSX3R_VVV<0x702f0000>;
+def VADDWEV_Q_DU : LSX3R_VVV<0x702f8000>;
+def VADDWOD_H_BU : LSX3R_VVV<0x70320000>;
+def VADDWOD_W_HU : LSX3R_VVV<0x70328000>;
+def VADDWOD_D_WU : LSX3R_VVV<0x70330000>;
+def VADDWOD_Q_DU : LSX3R_VVV<0x70338000>;
+
+def VSUBWEV_H_BU : LSX3R_VVV<0x70300000>;
+def VSUBWEV_W_HU : LSX3R_VVV<0x70308000>;
+def VSUBWEV_D_WU : LSX3R_VVV<0x70310000>;
+def VSUBWEV_Q_DU : LSX3R_VVV<0x70318000>;
+def VSUBWOD_H_BU : LSX3R_VVV<0x70340000>;
+def VSUBWOD_W_HU : LSX3R_VVV<0x70348000>;
+def VSUBWOD_D_WU : LSX3R_VVV<0x70350000>;
+def VSUBWOD_Q_DU : LSX3R_VVV<0x70358000>;
+
+def VADDWEV_H_BU_B : LSX3R_VVV<0x703e0000>;
+def VADDWEV_W_HU_H : LSX3R_VVV<0x703e8000>;
+def VADDWEV_D_WU_W : LSX3R_VVV<0x703f0000>;
+def VADDWEV_Q_DU_D : LSX3R_VVV<0x703f8000>;
+def VADDWOD_H_BU_B : LSX3R_VVV<0x70400000>;
+def VADDWOD_W_HU_H : LSX3R_VVV<0x70408000>;
+def VADDWOD_D_WU_W : LSX3R_VVV<0x70410000>;
+def VADDWOD_Q_DU_D : LSX3R_VVV<0x70418000>;
+
+def VAVG_B : LSX3R_VVV<0x70640000>;
+def VAVG_H : LSX3R_VVV<0x70648000>;
+def VAVG_W : LSX3R_VVV<0x70650000>;
+def VAVG_D : LSX3R_VVV<0x70658000>;
+def VAVG_BU : LSX3R_VVV<0x70660000>;
+def VAVG_HU : LSX3R_VVV<0x70668000>;
+def VAVG_WU : LSX3R_VVV<0x70670000>;
+def VAVG_DU : LSX3R_VVV<0x70678000>;
+def VAVGR_B : LSX3R_VVV<0x70680000>;
+def VAVGR_H : LSX3R_VVV<0x70688000>;
+def VAVGR_W : LSX3R_VVV<0x70690000>;
+def VAVGR_D : LSX3R_VVV<0x70698000>;
+def VAVGR_BU : LSX3R_VVV<0x706a0000>;
+def VAVGR_HU : LSX3R_VVV<0x706a8000>;
+def VAVGR_WU : LSX3R_VVV<0x706b0000>;
+def VAVGR_DU : LSX3R_VVV<0x706b8000>;
+
+def VABSD_B : LSX3R_VVV<0x70600000>;
+def VABSD_H : LSX3R_VVV<0x70608000>;
+def VABSD_W : LSX3R_VVV<0x70610000>;
+def VABSD_D : LSX3R_VVV<0x70618000>;
+def VABSD_BU : LSX3R_VVV<0x70620000>;
+def VABSD_HU : LSX3R_VVV<0x70628000>;
+def VABSD_WU : LSX3R_VVV<0x70630000>;
+def VABSD_DU : LSX3R_VVV<0x70638000>;
+
+def VADDA_B : LSX3R_VVV<0x705c0000>;
+def VADDA_H : LSX3R_VVV<0x705c8000>;
+def VADDA_W : LSX3R_VVV<0x705d0000>;
+def VADDA_D : LSX3R_VVV<0x705d8000>;
+
+def VMAX_B : LSX3R_VVV<0x70700000>;
+def VMAX_H : LSX3R_VVV<0x70708000>;
+def VMAX_W : LSX3R_VVV<0x70710000>;
+def VMAX_D : LSX3R_VVV<0x70718000>;
+def VMAXI_B : LSX2RI5_VVI<0x72900000, simm5>;
+def VMAXI_H : LSX2RI5_VVI<0x72908000, simm5>;
+def VMAXI_W : LSX2RI5_VVI<0x72910000, simm5>;
+def VMAXI_D : LSX2RI5_VVI<0x72918000, simm5>;
+def VMAX_BU : LSX3R_VVV<0x70740000>;
+def VMAX_HU : LSX3R_VVV<0x70748000>;
+def VMAX_WU : LSX3R_VVV<0x70750000>;
+def VMAX_DU : LSX3R_VVV<0x70758000>;
+def VMAXI_BU : LSX2RI5_VVI<0x72940000>;
+def VMAXI_HU : LSX2RI5_VVI<0x72948000>;
+def VMAXI_WU : LSX2RI5_VVI<0x72950000>;
+def VMAXI_DU : LSX2RI5_VVI<0x72958000>;
+
+def VMIN_B : LSX3R_VVV<0x70720000>;
+def VMIN_H : LSX3R_VVV<0x70728000>;
+def VMIN_W : LSX3R_VVV<0x70730000>;
+def VMIN_D : LSX3R_VVV<0x70738000>;
+def VMINI_B : LSX2RI5_VVI<0x72920000, simm5>;
+def VMINI_H : LSX2RI5_VVI<0x72928000, simm5>;
+def VMINI_W : LSX2RI5_VVI<0x72930000, simm5>;
+def VMINI_D : LSX2RI5_VVI<0x72938000, simm5>;
+def VMIN_BU : LSX3R_VVV<0x70760000>;
+def VMIN_HU : LSX3R_VVV<0x70768000>;
+def VMIN_WU : LSX3R_VVV<0x70770000>;
+def VMIN_DU : LSX3R_VVV<0x70778000>;
+def VMINI_BU : LSX2RI5_VVI<0x72960000>;
+def VMINI_HU : LSX2RI5_VVI<0x72968000>;
+def VMINI_WU : LSX2RI5_VVI<0x72970000>;
+def VMINI_DU : LSX2RI5_VVI<0x72978000>;
+
+def VMUL_B : LSX3R_VVV<0x70840000>;
+def VMUL_H : LSX3R_VVV<0x70848000>;
+def VMUL_W : LSX3R_VVV<0x70850000>;
+def VMUL_D : LSX3R_VVV<0x70858000>;
+
+def VMUH_B : LSX3R_VVV<0x70860000>;
+def VMUH_H : LSX3R_VVV<0x70868000>;
+def VMUH_W : LSX3R_VVV<0x70870000>;
+def VMUH_D : LSX3R_VVV<0x70878000>;
+def VMUH_BU : LSX3R_VVV<0x70880000>;
+def VMUH_HU : LSX3R_VVV<0x70888000>;
+def VMUH_WU : LSX3R_VVV<0x70890000>;
+def VMUH_DU : LSX3R_VVV<0x70898000>;
+
+def VMULWEV_H_B : LSX3R_VVV<0x70900000>;
+def VMULWEV_W_H : LSX3R_VVV<0x70908000>;
+def VMULWEV_D_W : LSX3R_VVV<0x70910000>;
+def VMULWEV_Q_D : LSX3R_VVV<0x70918000>;
+def VMULWOD_H_B : LSX3R_VVV<0x70920000>;
+def VMULWOD_W_H : LSX3R_VVV<0x70928000>;
+def VMULWOD_D_W : LSX3R_VVV<0x70930000>;
+def VMULWOD_Q_D : LSX3R_VVV<0x70938000>;
+def VMULWEV_H_BU : LSX3R_VVV<0x70980000>;
+def VMULWEV_W_HU : LSX3R_VVV<0x70988000>;
+def VMULWEV_D_WU : LSX3R_VVV<0x70990000>;
+def VMULWEV_Q_DU : LSX3R_VVV<0x70998000>;
+def VMULWOD_H_BU : LSX3R_VVV<0x709a0000>;
+def VMULWOD_W_HU : LSX3R_VVV<0x709a8000>;
+def VMULWOD_D_WU : LSX3R_VVV<0x709b0000>;
+def VMULWOD_Q_DU : LSX3R_VVV<0x709b8000>;
+def VMULWEV_H_BU_B : LSX3R_VVV<0x70a00000>;
+def VMULWEV_W_HU_H : LSX3R_VVV<0x70a08000>;
+def VMULWEV_D_WU_W : LSX3R_VVV<0x70a10000>;
+def VMULWEV_Q_DU_D : LSX3R_VVV<0x70a18000>;
+def VMULWOD_H_BU_B : LSX3R_VVV<0x70a20000>;
+def VMULWOD_W_HU_H : LSX3R_VVV<0x70a28000>;
+def VMULWOD_D_WU_W : LSX3R_VVV<0x70a30000>;
+def VMULWOD_Q_DU_D : LSX3R_VVV<0x70a38000>;
+
+def VMADD_B : LSX3R_VVVV<0x70a80000>;
+def VMADD_H : LSX3R_VVVV<0x70a88000>;
+def VMADD_W : LSX3R_VVVV<0x70a90000>;
+def VMADD_D : LSX3R_VVVV<0x70a98000>;
+
+def VMSUB_B : LSX3R_VVVV<0x70aa0000>;
+def VMSUB_H : LSX3R_VVVV<0x70aa8000>;
+def VMSUB_W : LSX3R_VVVV<0x70ab0000>;
+def VMSUB_D : LSX3R_VVVV<0x70ab8000>;
+
+def VMADDWEV_H_B : LSX3R_VVVV<0x70ac0000>;
+def VMADDWEV_W_H : LSX3R_VVVV<0x70ac8000>;
+def VMADDWEV_D_W : LSX3R_VVVV<0x70ad0000>;
+def VMADDWEV_Q_D : LSX3R_VVVV<0x70ad8000>;
+def VMADDWOD_H_B : LSX3R_VVVV<0x70ae0000>;
+def VMADDWOD_W_H : LSX3R_VVVV<0x70ae8000>;
+def VMADDWOD_D_W : LSX3R_VVVV<0x70af0000>;
+def VMADDWOD_Q_D : LSX3R_VVVV<0x70af8000>;
+def VMADDWEV_H_BU : LSX3R_VVVV<0x70b40000>;
+def VMADDWEV_W_HU : LSX3R_VVVV<0x70b48000>;
+def VMADDWEV_D_WU : LSX3R_VVVV<0x70b50000>;
+def VMADDWEV_Q_DU : LSX3R_VVVV<0x70b58000>;
+def VMADDWOD_H_BU : LSX3R_VVVV<0x70b60000>;
+def VMADDWOD_W_HU : LSX3R_VVVV<0x70b68000>;
+def VMADDWOD_D_WU : LSX3R_VVVV<0x70b70000>;
+def VMADDWOD_Q_DU : LSX3R_VVVV<0x70b78000>;
+def VMADDWEV_H_BU_B : LSX3R_VVVV<0x70bc0000>;
+def VMADDWEV_W_HU_H : LSX3R_VVVV<0x70bc8000>;
+def VMADDWEV_D_WU_W : LSX3R_VVVV<0x70bd0000>;
+def VMADDWEV_Q_DU_D : LSX3R_VVVV<0x70bd8000>;
+def VMADDWOD_H_BU_B : LSX3R_VVVV<0x70be0000>;
+def VMADDWOD_W_HU_H : LSX3R_VVVV<0x70be8000>;
+def VMADDWOD_D_WU_W : LSX3R_VVVV<0x70bf0000>;
+def VMADDWOD_Q_DU_D : LSX3R_VVVV<0x70bf8000>;
+
+def VDIV_B : LSX3R_VVV<0x70e00000>;
+def VDIV_H : LSX3R_VVV<0x70e08000>;
+def VDIV_W : LSX3R_VVV<0x70e10000>;
+def VDIV_D : LSX3R_VVV<0x70e18000>;
+def VDIV_BU : LSX3R_VVV<0x70e40000>;
+def VDIV_HU : LSX3R_VVV<0x70e48000>;
+def VDIV_WU : LSX3R_VVV<0x70e50000>;
+def VDIV_DU : LSX3R_VVV<0x70e58000>;
+
+def VMOD_B : LSX3R_VVV<0x70e20000>;
+def VMOD_H : LSX3R_VVV<0x70e28000>;
+def VMOD_W : LSX3R_VVV<0x70e30000>;
+def VMOD_D : LSX3R_VVV<0x70e38000>;
+def VMOD_BU : LSX3R_VVV<0x70e60000>;
+def VMOD_HU : LSX3R_VVV<0x70e68000>;
+def VMOD_WU : LSX3R_VVV<0x70e70000>;
+def VMOD_DU : LSX3R_VVV<0x70e78000>;
+
+def VSAT_B : LSX2RI3_VVI<0x73242000>;
+def VSAT_H : LSX2RI4_VVI<0x73244000>;
+def VSAT_W : LSX2RI5_VVI<0x73248000>;
+def VSAT_D : LSX2RI6_VVI<0x73250000>;
+def VSAT_BU : LSX2RI3_VVI<0x73282000>;
+def VSAT_HU : LSX2RI4_VVI<0x73284000>;
+def VSAT_WU : LSX2RI5_VVI<0x73288000>;
+def VSAT_DU : LSX2RI6_VVI<0x73290000>;
+
+def VEXTH_H_B : LSX2R_VV<0x729ee000>;
+def VEXTH_W_H : LSX2R_VV<0x729ee400>;
+def VEXTH_D_W : LSX2R_VV<0x729ee800>;
+def VEXTH_Q_D : LSX2R_VV<0x729eec00>;
+def VEXTH_HU_BU : LSX2R_VV<0x729ef000>;
+def VEXTH_WU_HU : LSX2R_VV<0x729ef400>;
+def VEXTH_DU_WU : LSX2R_VV<0x729ef800>;
+def VEXTH_QU_DU : LSX2R_VV<0x729efc00>;
+
+def VSIGNCOV_B : LSX3R_VVV<0x712e0000>;
+def VSIGNCOV_H : LSX3R_VVV<0x712e8000>;
+def VSIGNCOV_W : LSX3R_VVV<0x712f0000>;
+def VSIGNCOV_D : LSX3R_VVV<0x712f8000>;
+
+def VMSKLTZ_B : LSX2R_VV<0x729c4000>;
+def VMSKLTZ_H : LSX2R_VV<0x729c4400>;
+def VMSKLTZ_W : LSX2R_VV<0x729c4800>;
+def VMSKLTZ_D : LSX2R_VV<0x729c4c00>;
+
+def VMSKGEZ_B : LSX2R_VV<0x729c5000>;
+
+def VMSKNZ_B : LSX2R_VV<0x729c6000>;
+
+def VLDI : LSX1RI13_VI<0x73e00000>;
+
+def VAND_V : LSX3R_VVV<0x71260000>;
+def VOR_V : LSX3R_VVV<0x71268000>;
+def VXOR_V : LSX3R_VVV<0x71270000>;
+def VNOR_V : LSX3R_VVV<0x71278000>;
+def VANDN_V : LSX3R_VVV<0x71280000>;
+def VORN_V : LSX3R_VVV<0x71288000>;
+
+def VANDI_B : LSX2RI8_VVI<0x73d00000>;
+def VORI_B : LSX2RI8_VVI<0x73d40000>;
+def VXORI_B : LSX2RI8_VVI<0x73d80000>;
+def VNORI_B : LSX2RI8_VVI<0x73dc0000>;
+
+def VSLL_B : LSX3R_VVV<0x70e80000>;
+def VSLL_H : LSX3R_VVV<0x70e88000>;
+def VSLL_W : LSX3R_VVV<0x70e90000>;
+def VSLL_D : LSX3R_VVV<0x70e98000>;
+def VSLLI_B : LSX2RI3_VVI<0x732c2000>;
+def VSLLI_H : LSX2RI4_VVI<0x732c4000>;
+def VSLLI_W : LSX2RI5_VVI<0x732c8000>;
+def VSLLI_D : LSX2RI6_VVI<0x732d0000>;
+
+def VSRL_B : LSX3R_VVV<0x70ea0000>;
+def VSRL_H : LSX3R_VVV<0x70ea8000>;
+def VSRL_W : LSX3R_VVV<0x70eb0000>;
+def VSRL_D : LSX3R_VVV<0x70eb8000>;
+def VSRLI_B : LSX2RI3_VVI<0x73302000>;
+def VSRLI_H : LSX2RI4_VVI<0x73304000>;
+def VSRLI_W : LSX2RI5_VVI<0x73308000>;
+def VSRLI_D : LSX2RI6_VVI<0x73310000>;
+
+def VSRA_B : LSX3R_VVV<0x70ec0000>;
+def VSRA_H : LSX3R_VVV<0x70ec8000>;
+def VSRA_W : LSX3R_VVV<0x70ed0000>;
+def VSRA_D : LSX3R_VVV<0x70ed8000>;
+def VSRAI_B : LSX2RI3_VVI<0x73342000>;
+def VSRAI_H : LSX2RI4_VVI<0x73344000>;
+def VSRAI_W : LSX2RI5_VVI<0x73348000>;
+def VSRAI_D : LSX2RI6_VVI<0x73350000>;
+
+def VROTR_B : LSX3R_VVV<0x70ee0000>;
+def VROTR_H : LSX3R_VVV<0x70ee8000>;
+def VROTR_W : LSX3R_VVV<0x70ef0000>;
+def VROTR_D : LSX3R_VVV<0x70ef8000>;
+def VROTRI_B : LSX2RI3_VVI<0x72a02000>;
+def VROTRI_H : LSX2RI4_VVI<0x72a04000>;
+def VROTRI_W : LSX2RI5_VVI<0x72a08000>;
+def VROTRI_D : LSX2RI6_VVI<0x72a10000>;
+
+def VSLLWIL_H_B : LSX2RI3_VVI<0x73082000>;
+def VSLLWIL_W_H : LSX2RI4_VVI<0x73084000>;
+def VSLLWIL_D_W : LSX2RI5_VVI<0x73088000>;
+def VEXTL_Q_D : LSX2R_VV<0x73090000>;
+def VSLLWIL_HU_BU : LSX2RI3_VVI<0x730c2000>;
+def VSLLWIL_WU_HU : LSX2RI4_VVI<0x730c4000>;
+def VSLLWIL_DU_WU : LSX2RI5_VVI<0x730c8000>;
+def VEXTL_QU_DU : LSX2R_VV<0x730d0000>;
+
+def VSRLR_B : LSX3R_VVV<0x70f00000>;
+def VSRLR_H : LSX3R_VVV<0x70f08000>;
+def VSRLR_W : LSX3R_VVV<0x70f10000>;
+def VSRLR_D : LSX3R_VVV<0x70f18000>;
+def VSRLRI_B : LSX2RI3_VVI<0x72a42000>;
+def VSRLRI_H : LSX2RI4_VVI<0x72a44000>;
+def VSRLRI_W : LSX2RI5_VVI<0x72a48000>;
+def VSRLRI_D : LSX2RI6_VVI<0x72a50000>;
+
+def VSRAR_B : LSX3R_VVV<0x70f20000>;
+def VSRAR_H : LSX3R_VVV<0x70f28000>;
+def VSRAR_W : LSX3R_VVV<0x70f30000>;
+def VSRAR_D : LSX3R_VVV<0x70f38000>;
+def VSRARI_B : LSX2RI3_VVI<0x72a82000>;
+def VSRARI_H : LSX2RI4_VVI<0x72a84000>;
+def VSRARI_W : LSX2RI5_VVI<0x72a88000>;
+def VSRARI_D : LSX2RI6_VVI<0x72a90000>;
+
+def VSRLN_B_H : LSX3R_VVV<0x70f48000>;
+def VSRLN_H_W : LSX3R_VVV<0x70f50000>;
+def VSRLN_W_D : LSX3R_VVV<0x70f58000>;
+def VSRAN_B_H : LSX3R_VVV<0x70f68000>;
+def VSRAN_H_W : LSX3R_VVV<0x70f70000>;
+def VSRAN_W_D : LSX3R_VVV<0x70f78000>;
+
+def VSRLNI_B_H : LSX2RI4_VVVI<0x73404000>;
+def VSRLNI_H_W : LSX2RI5_VVVI<0x73408000>;
+def VSRLNI_W_D : LSX2RI6_VVVI<0x73410000>;
+def VSRLNI_D_Q : LSX2RI7_VVVI<0x73420000>;
+def VSRANI_B_H : LSX2RI4_VVVI<0x73584000>;
+def VSRANI_H_W : LSX2RI5_VVVI<0x73588000>;
+def VSRANI_W_D : LSX2RI6_VVVI<0x73590000>;
+def VSRANI_D_Q : LSX2RI7_VVVI<0x735a0000>;
+
+def VSRLRN_B_H : LSX3R_VVV<0x70f88000>;
+def VSRLRN_H_W : LSX3R_VVV<0x70f90000>;
+def VSRLRN_W_D : LSX3R_VVV<0x70f98000>;
+def VSRARN_B_H : LSX3R_VVV<0x70fa8000>;
+def VSRARN_H_W : LSX3R_VVV<0x70fb0000>;
+def VSRARN_W_D : LSX3R_VVV<0x70fb8000>;
+
+def VSRLRNI_B_H : LSX2RI4_VVVI<0x73444000>;
+def VSRLRNI_H_W : LSX2RI5_VVVI<0x73448000>;
+def VSRLRNI_W_D : LSX2RI6_VVVI<0x73450000>;
+def VSRLRNI_D_Q : LSX2RI7_VVVI<0x73460000>;
+def VSRARNI_B_H : LSX2RI4_VVVI<0x735c4000>;
+def VSRARNI_H_W : LSX2RI5_VVVI<0x735c8000>;
+def VSRARNI_W_D : LSX2RI6_VVVI<0x735d0000>;
+def VSRARNI_D_Q : LSX2RI7_VVVI<0x735e0000>;
+
+def VSSRLN_B_H : LSX3R_VVV<0x70fc8000>;
+def VSSRLN_H_W : LSX3R_VVV<0x70fd0000>;
+def VSSRLN_W_D : LSX3R_VVV<0x70fd8000>;
+def VSSRAN_B_H : LSX3R_VVV<0x70fe8000>;
+def VSSRAN_H_W : LSX3R_VVV<0x70ff0000>;
+def VSSRAN_W_D : LSX3R_VVV<0x70ff8000>;
+def VSSRLN_BU_H : LSX3R_VVV<0x71048000>;
+def VSSRLN_HU_W : LSX3R_VVV<0x71050000>;
+def VSSRLN_WU_D : LSX3R_VVV<0x71058000>;
+def VSSRAN_BU_H : LSX3R_VVV<0x71068000>;
+def VSSRAN_HU_W : LSX3R_VVV<0x71070000>;
+def VSSRAN_WU_D : LSX3R_VVV<0x71078000>;
+
+def VSSRLNI_B_H : LSX2RI4_VVVI<0x73484000>;
+def VSSRLNI_H_W : LSX2RI5_VVVI<0x73488000>;
+def VSSRLNI_W_D : LSX2RI6_VVVI<0x73490000>;
+def VSSRLNI_D_Q : LSX2RI7_VVVI<0x734a0000>;
+def VSSRANI_B_H : LSX2RI4_VVVI<0x73604000>;
+def VSSRANI_H_W : LSX2RI5_VVVI<0x73608000>;
+def VSSRANI_W_D : LSX2RI6_VVVI<0x73610000>;
+def VSSRANI_D_Q : LSX2RI7_VVVI<0x73620000>;
+def VSSRLNI_BU_H : LSX2RI4_VVVI<0x734c4000>;
+def VSSRLNI_HU_W : LSX2RI5_VVVI<0x734c8000>;
+def VSSRLNI_WU_D : LSX2RI6_VVVI<0x734d0000>;
+def VSSRLNI_DU_Q : LSX2RI7_VVVI<0x734e0000>;
+def VSSRANI_BU_H : LSX2RI4_VVVI<0x73644000>;
+def VSSRANI_HU_W : LSX2RI5_VVVI<0x73648000>;
+def VSSRANI_WU_D : LSX2RI6_VVVI<0x73650000>;
+def VSSRANI_DU_Q : LSX2RI7_VVVI<0x73660000>;
+
+def VSSRLRN_B_H : LSX3R_VVV<0x71008000>;
+def VSSRLRN_H_W : LSX3R_VVV<0x71010000>;
+def VSSRLRN_W_D : LSX3R_VVV<0x71018000>;
+def VSSRARN_B_H : LSX3R_VVV<0x71028000>;
+def VSSRARN_H_W : LSX3R_VVV<0x71030000>;
+def VSSRARN_W_D : LSX3R_VVV<0x71038000>;
+def VSSRLRN_BU_H : LSX3R_VVV<0x71088000>;
+def VSSRLRN_HU_W : LSX3R_VVV<0x71090000>;
+def VSSRLRN_WU_D : LSX3R_VVV<0x71098000>;
+def VSSRARN_BU_H : LSX3R_VVV<0x710a8000>;
+def VSSRARN_HU_W : LSX3R_VVV<0x710b0000>;
+def VSSRARN_WU_D : LSX3R_VVV<0x710b8000>;
+
+def VSSRLRNI_B_H : LSX2RI4_VVVI<0x73504000>;
+def VSSRLRNI_H_W : LSX2RI5_VVVI<0x73508000>;
+def VSSRLRNI_W_D : LSX2RI6_VVVI<0x73510000>;
+def VSSRLRNI_D_Q : LSX2RI7_VVVI<0x73520000>;
+def VSSRARNI_B_H : LSX2RI4_VVVI<0x73684000>;
+def VSSRARNI_H_W : LSX2RI5_VVVI<0x73688000>;
+def VSSRARNI_W_D : LSX2RI6_VVVI<0x73690000>;
+def VSSRARNI_D_Q : LSX2RI7_VVVI<0x736a0000>;
+def VSSRLRNI_BU_H : LSX2RI4_VVVI<0x73544000>;
+def VSSRLRNI_HU_W : LSX2RI5_VVVI<0x73548000>;
+def VSSRLRNI_WU_D : LSX2RI6_VVVI<0x73550000>;
+def VSSRLRNI_DU_Q : LSX2RI7_VVVI<0x73560000>;
+def VSSRARNI_BU_H : LSX2RI4_VVVI<0x736c4000>;
+def VSSRARNI_HU_W : LSX2RI5_VVVI<0x736c8000>;
+def VSSRARNI_WU_D : LSX2RI6_VVVI<0x736d0000>;
+def VSSRARNI_DU_Q : LSX2RI7_VVVI<0x736e0000>;
+
+def VCLO_B : LSX2R_VV<0x729c0000>;
+def VCLO_H : LSX2R_VV<0x729c0400>;
+def VCLO_W : LSX2R_VV<0x729c0800>;
+def VCLO_D : LSX2R_VV<0x729c0c00>;
+def VCLZ_B : LSX2R_VV<0x729c1000>;
+def VCLZ_H : LSX2R_VV<0x729c1400>;
+def VCLZ_W : LSX2R_VV<0x729c1800>;
+def VCLZ_D : LSX2R_VV<0x729c1c00>;
+
+def VPCNT_B : LSX2R_VV<0x729c2000>;
+def VPCNT_H : LSX2R_VV<0x729c2400>;
+def VPCNT_W : LSX2R_VV<0x729c2800>;
+def VPCNT_D : LSX2R_VV<0x729c2c00>;
+
+def VBITCLR_B : LSX3R_VVV<0x710c0000>;
+def VBITCLR_H : LSX3R_VVV<0x710c8000>;
+def VBITCLR_W : LSX3R_VVV<0x710d0000>;
+def VBITCLR_D : LSX3R_VVV<0x710d8000>;
+def VBITCLRI_B : LSX2RI3_VVI<0x73102000>;
+def VBITCLRI_H : LSX2RI4_VVI<0x73104000>;
+def VBITCLRI_W : LSX2RI5_VVI<0x73108000>;
+def VBITCLRI_D : LSX2RI6_VVI<0x73110000>;
+
+def VBITSET_B : LSX3R_VVV<0x710e0000>;
+def VBITSET_H : LSX3R_VVV<0x710e8000>;
+def VBITSET_W : LSX3R_VVV<0x710f0000>;
+def VBITSET_D : LSX3R_VVV<0x710f8000>;
+def VBITSETI_B : LSX2RI3_VVI<0x73142000>;
+def VBITSETI_H : LSX2RI4_VVI<0x73144000>;
+def VBITSETI_W : LSX2RI5_VVI<0x73148000>;
+def VBITSETI_D : LSX2RI6_VVI<0x73150000>;
+
+def VBITREV_B : LSX3R_VVV<0x71100000>;
+def VBITREV_H : LSX3R_VVV<0x71108000>;
+def VBITREV_W : LSX3R_VVV<0x71110000>;
+def VBITREV_D : LSX3R_VVV<0x71118000>;
+def VBITREVI_B : LSX2RI3_VVI<0x73182000>;
+def VBITREVI_H : LSX2RI4_VVI<0x73184000>;
+def VBITREVI_W : LSX2RI5_VVI<0x73188000>;
+def VBITREVI_D : LSX2RI6_VVI<0x73190000>;
+
+def VFRSTP_B : LSX3R_VVVV<0x712b0000>;
+def VFRSTP_H : LSX3R_VVVV<0x712b8000>;
+def VFRSTPI_B : LSX2RI5_VVVI<0x729a0000>;
+def VFRSTPI_H : LSX2RI5_VVVI<0x729a8000>;
+
+def VFADD_S : LSX3R_VVV<0x71308000>;
+def VFADD_D : LSX3R_VVV<0x71310000>;
+def VFSUB_S : LSX3R_VVV<0x71328000>;
+def VFSUB_D : LSX3R_VVV<0x71330000>;
+def VFMUL_S : LSX3R_VVV<0x71388000>;
+def VFMUL_D : LSX3R_VVV<0x71390000>;
+def VFDIV_S : LSX3R_VVV<0x713a8000>;
+def VFDIV_D : LSX3R_VVV<0x713b0000>;
+
+def VFMADD_S : LSX4R_VVVV<0x09100000>;
+def VFMADD_D : LSX4R_VVVV<0x09200000>;
+def VFMSUB_S : LSX4R_VVVV<0x09500000>;
+def VFMSUB_D : LSX4R_VVVV<0x09600000>;
+def VFNMADD_S : LSX4R_VVVV<0x09900000>;
+def VFNMADD_D : LSX4R_VVVV<0x09a00000>;
+def VFNMSUB_S : LSX4R_VVVV<0x09d00000>;
+def VFNMSUB_D : LSX4R_VVVV<0x09e00000>;
+
+def VFMAX_S : LSX3R_VVV<0x713c8000>;
+def VFMAX_D : LSX3R_VVV<0x713d0000>;
+def VFMIN_S : LSX3R_VVV<0x713e8000>;
+def VFMIN_D : LSX3R_VVV<0x713f0000>;
+
+def VFMAXA_S : LSX3R_VVV<0x71408000>;
+def VFMAXA_D : LSX3R_VVV<0x71410000>;
+def VFMINA_S : LSX3R_VVV<0x71428000>;
+def VFMINA_D : LSX3R_VVV<0x71430000>;
+
+def VFLOGB_S : LSX2R_VV<0x729cc400>;
+def VFLOGB_D : LSX2R_VV<0x729cc800>;
+
+def VFCLASS_S : LSX2R_VV<0x729cd400>;
+def VFCLASS_D : LSX2R_VV<0x729cd800>;
+
+def VFSQRT_S : LSX2R_VV<0x729ce400>;
+def VFSQRT_D : LSX2R_VV<0x729ce800>;
+def VFRECIP_S : LSX2R_VV<0x729cf400>;
+def VFRECIP_D : LSX2R_VV<0x729cf800>;
+def VFRSQRT_S : LSX2R_VV<0x729d0400>;
+def VFRSQRT_D : LSX2R_VV<0x729d0800>;
+
+def VFCVTL_S_H : LSX2R_VV<0x729de800>;
+def VFCVTH_S_H : LSX2R_VV<0x729dec00>;
+def VFCVTL_D_S : LSX2R_VV<0x729df000>;
+def VFCVTH_D_S : LSX2R_VV<0x729df400>;
+def VFCVT_H_S : LSX3R_VVV<0x71460000>;
+def VFCVT_S_D : LSX3R_VVV<0x71468000>;
+
+def VFRINTRNE_S : LSX2R_VV<0x729d7400>;
+def VFRINTRNE_D : LSX2R_VV<0x729d7800>;
+def VFRINTRZ_S : LSX2R_VV<0x729d6400>;
+def VFRINTRZ_D : LSX2R_VV<0x729d6800>;
+def VFRINTRP_S : LSX2R_VV<0x729d5400>;
+def VFRINTRP_D : LSX2R_VV<0x729d5800>;
+def VFRINTRM_S : LSX2R_VV<0x729d4400>;
+def VFRINTRM_D : LSX2R_VV<0x729d4800>;
+def VFRINT_S : LSX2R_VV<0x729d3400>;
+def VFRINT_D : LSX2R_VV<0x729d3800>;
+
+def VFTINTRNE_W_S : LSX2R_VV<0x729e5000>;
+def VFTINTRNE_L_D : LSX2R_VV<0x729e5400>;
+def VFTINTRZ_W_S : LSX2R_VV<0x729e4800>;
+def VFTINTRZ_L_D : LSX2R_VV<0x729e4c00>;
+def VFTINTRP_W_S : LSX2R_VV<0x729e4000>;
+def VFTINTRP_L_D : LSX2R_VV<0x729e4400>;
+def VFTINTRM_W_S : LSX2R_VV<0x729e3800>;
+def VFTINTRM_L_D : LSX2R_VV<0x729e3c00>;
+def VFTINT_W_S : LSX2R_VV<0x729e3000>;
+def VFTINT_L_D : LSX2R_VV<0x729e3400>;
+def VFTINTRZ_WU_S : LSX2R_VV<0x729e7000>;
+def VFTINTRZ_LU_D : LSX2R_VV<0x729e7400>;
+def VFTINT_WU_S : LSX2R_VV<0x729e5800>;
+def VFTINT_LU_D : LSX2R_VV<0x729e5c00>;
+
+def VFTINTRNE_W_D : LSX3R_VVV<0x714b8000>;
+def VFTINTRZ_W_D : LSX3R_VVV<0x714b0000>;
+def VFTINTRP_W_D : LSX3R_VVV<0x714a8000>;
+def VFTINTRM_W_D : LSX3R_VVV<0x714a0000>;
+def VFTINT_W_D : LSX3R_VVV<0x71498000>;
+
+def VFTINTRNEL_L_S : LSX2R_VV<0x729ea000>;
+def VFTINTRNEH_L_S : LSX2R_VV<0x729ea400>;
+def VFTINTRZL_L_S : LSX2R_VV<0x729e9800>;
+def VFTINTRZH_L_S : LSX2R_VV<0x729e9c00>;
+def VFTINTRPL_L_S : LSX2R_VV<0x729e9000>;
+def VFTINTRPH_L_S : LSX2R_VV<0x729e9400>;
+def VFTINTRML_L_S : LSX2R_VV<0x729e8800>;
+def VFTINTRMH_L_S : LSX2R_VV<0x729e8c00>;
+def VFTINTL_L_S : LSX2R_VV<0x729e8000>;
+def VFTINTH_L_S : LSX2R_VV<0x729e8400>;
+
+def VFFINT_S_W : LSX2R_VV<0x729e0000>;
+def VFFINT_D_L : LSX2R_VV<0x729e0800>;
+def VFFINT_S_WU : LSX2R_VV<0x729e0400>;
+def VFFINT_D_LU : LSX2R_VV<0x729e0c00>;
+def VFFINTL_D_W : LSX2R_VV<0x729e1000>;
+def VFFINTH_D_W : LSX2R_VV<0x729e1400>;
+def VFFINT_S_L : LSX3R_VVV<0x71480000>;
+
+def VSEQ_B : LSX3R_VVV<0x70000000>;
+def VSEQ_H : LSX3R_VVV<0x70008000>;
+def VSEQ_W : LSX3R_VVV<0x70010000>;
+def VSEQ_D : LSX3R_VVV<0x70018000>;
+def VSEQI_B : LSX2RI5_VVI<0x72800000, simm5>;
+def VSEQI_H : LSX2RI5_VVI<0x72808000, simm5>;
+def VSEQI_W : LSX2RI5_VVI<0x72810000, simm5>;
+def VSEQI_D : LSX2RI5_VVI<0x72818000, simm5>;
+
+def VSLE_B : LSX3R_VVV<0x70020000>;
+def VSLE_H : LSX3R_VVV<0x70028000>;
+def VSLE_W : LSX3R_VVV<0x70030000>;
+def VSLE_D : LSX3R_VVV<0x70038000>;
+def VSLEI_B : LSX2RI5_VVI<0x72820000, simm5>;
+def VSLEI_H : LSX2RI5_VVI<0x72828000, simm5>;
+def VSLEI_W : LSX2RI5_VVI<0x72830000, simm5>;
+def VSLEI_D : LSX2RI5_VVI<0x72838000, simm5>;
+
+def VSLE_BU : LSX3R_VVV<0x70040000>;
+def VSLE_HU : LSX3R_VVV<0x70048000>;
+def VSLE_WU : LSX3R_VVV<0x70050000>;
+def VSLE_DU : LSX3R_VVV<0x70058000>;
+def VSLEI_BU : LSX2RI5_VVI<0x72840000>;
+def VSLEI_HU : LSX2RI5_VVI<0x72848000>;
+def VSLEI_WU : LSX2RI5_VVI<0x72850000>;
+def VSLEI_DU : LSX2RI5_VVI<0x72858000>;
+
+def VSLT_B : LSX3R_VVV<0x70060000>;
+def VSLT_H : LSX3R_VVV<0x70068000>;
+def VSLT_W : LSX3R_VVV<0x70070000>;
+def VSLT_D : LSX3R_VVV<0x70078000>;
+def VSLTI_B : LSX2RI5_VVI<0x72860000, simm5>;
+def VSLTI_H : LSX2RI5_VVI<0x72868000, simm5>;
+def VSLTI_W : LSX2RI5_VVI<0x72870000, simm5>;
+def VSLTI_D : LSX2RI5_VVI<0x72878000, simm5>;
+
+def VSLT_BU : LSX3R_VVV<0x70080000>;
+def VSLT_HU : LSX3R_VVV<0x70088000>;
+def VSLT_WU : LSX3R_VVV<0x70090000>;
+def VSLT_DU : LSX3R_VVV<0x70098000>;
+def VSLTI_BU : LSX2RI5_VVI<0x72880000>;
+def VSLTI_HU : LSX2RI5_VVI<0x72888000>;
+def VSLTI_WU : LSX2RI5_VVI<0x72890000>;
+def VSLTI_DU : LSX2RI5_VVI<0x72898000>;
+
+def VFCMP_CAF_S : LSX3R_VVV<0x0c500000>;
+def VFCMP_SAF_S : LSX3R_VVV<0x0c508000>;
+def VFCMP_CLT_S : LSX3R_VVV<0x0c510000>;
+def VFCMP_SLT_S : LSX3R_VVV<0x0c518000>;
+def VFCMP_CEQ_S : LSX3R_VVV<0x0c520000>;
+def VFCMP_SEQ_S : LSX3R_VVV<0x0c528000>;
+def VFCMP_CLE_S : LSX3R_VVV<0x0c530000>;
+def VFCMP_SLE_S : LSX3R_VVV<0x0c538000>;
+def VFCMP_CUN_S : LSX3R_VVV<0x0c540000>;
+def VFCMP_SUN_S : LSX3R_VVV<0x0c548000>;
+def VFCMP_CULT_S : LSX3R_VVV<0x0c550000>;
+def VFCMP_SULT_S : LSX3R_VVV<0x0c558000>;
+def VFCMP_CUEQ_S : LSX3R_VVV<0x0c560000>;
+def VFCMP_SUEQ_S : LSX3R_VVV<0x0c568000>;
+def VFCMP_CULE_S : LSX3R_VVV<0x0c570000>;
+def VFCMP_SULE_S : LSX3R_VVV<0x0c578000>;
+def VFCMP_CNE_S : LSX3R_VVV<0x0c580000>;
+def VFCMP_SNE_S : LSX3R_VVV<0x0c588000>;
+def VFCMP_COR_S : LSX3R_VVV<0x0c5a0000>;
+def VFCMP_SOR_S : LSX3R_VVV<0x0c5a8000>;
+def VFCMP_CUNE_S : LSX3R_VVV<0x0c5c0000>;
+def VFCMP_SUNE_S : LSX3R_VVV<0x0c5c8000>;
+
+def VFCMP_CAF_D : LSX3R_VVV<0x0c600000>;
+def VFCMP_SAF_D : LSX3R_VVV<0x0c608000>;
+def VFCMP_CLT_D : LSX3R_VVV<0x0c610000>;
+def VFCMP_SLT_D : LSX3R_VVV<0x0c618000>;
+def VFCMP_CEQ_D : LSX3R_VVV<0x0c620000>;
+def VFCMP_SEQ_D : LSX3R_VVV<0x0c628000>;
+def VFCMP_CLE_D : LSX3R_VVV<0x0c630000>;
+def VFCMP_SLE_D : LSX3R_VVV<0x0c638000>;
+def VFCMP_CUN_D : LSX3R_VVV<0x0c640000>;
+def VFCMP_SUN_D : LSX3R_VVV<0x0c648000>;
+def VFCMP_CULT_D : LSX3R_VVV<0x0c650000>;
+def VFCMP_SULT_D : LSX3R_VVV<0x0c658000>;
+def VFCMP_CUEQ_D : LSX3R_VVV<0x0c660000>;
+def VFCMP_SUEQ_D : LSX3R_VVV<0x0c668000>;
+def VFCMP_CULE_D : LSX3R_VVV<0x0c670000>;
+def VFCMP_SULE_D : LSX3R_VVV<0x0c678000>;
+def VFCMP_CNE_D : LSX3R_VVV<0x0c680000>;
+def VFCMP_SNE_D : LSX3R_VVV<0x0c688000>;
+def VFCMP_COR_D : LSX3R_VVV<0x0c6a0000>;
+def VFCMP_SOR_D : LSX3R_VVV<0x0c6a8000>;
+def VFCMP_CUNE_D : LSX3R_VVV<0x0c6c0000>;
+def VFCMP_SUNE_D : LSX3R_VVV<0x0c6c8000>;
+
+def VBITSEL_V : LSX4R_VVVV<0x0d100000>;
+
+def VBITSELI_B : LSX2RI8_VVVI<0x73c40000>;
+
+def VSETEQZ_V : LSX2R_CV<0x729c9800>;
+def VSETNEZ_V : LSX2R_CV<0x729c9c00>;
+def VSETANYEQZ_B : LSX2R_CV<0x729ca000>;
+def VSETANYEQZ_H : LSX2R_CV<0x729ca400>;
+def VSETANYEQZ_W : LSX2R_CV<0x729ca800>;
+def VSETANYEQZ_D : LSX2R_CV<0x729cac00>;
+def VSETALLNEZ_B : LSX2R_CV<0x729cb000>;
+def VSETALLNEZ_H : LSX2R_CV<0x729cb400>;
+def VSETALLNEZ_W : LSX2R_CV<0x729cb800>;
+def VSETALLNEZ_D : LSX2R_CV<0x729cbc00>;
+
+def VINSGR2VR_B : LSX2RI4_VVRI<0x72eb8000>;
+def VINSGR2VR_H : LSX2RI3_VVRI<0x72ebc000>;
+def VINSGR2VR_W : LSX2RI2_VVRI<0x72ebe000>;
+def VINSGR2VR_D : LSX2RI1_VVRI<0x72ebf000>;
+def VPICKVE2GR_B : LSX2RI4_RVI<0x72ef8000>;
+def VPICKVE2GR_H : LSX2RI3_RVI<0x72efc000>;
+def VPICKVE2GR_W : LSX2RI2_RVI<0x72efe000>;
+def VPICKVE2GR_D : LSX2RI1_RVI<0x72eff000>;
+def VPICKVE2GR_BU : LSX2RI4_RVI<0x72f38000>;
+def VPICKVE2GR_HU : LSX2RI3_RVI<0x72f3c000>;
+def VPICKVE2GR_WU : LSX2RI2_RVI<0x72f3e000>;
+def VPICKVE2GR_DU : LSX2RI1_RVI<0x72f3f000>;
+
+def VREPLGR2VR_B : LSX2R_VR<0x729f0000>;
+def VREPLGR2VR_H : LSX2R_VR<0x729f0400>;
+def VREPLGR2VR_W : LSX2R_VR<0x729f0800>;
+def VREPLGR2VR_D : LSX2R_VR<0x729f0c00>;
+
+def VREPLVE_B : LSX3R_VVR<0x71220000>;
+def VREPLVE_H : LSX3R_VVR<0x71228000>;
+def VREPLVE_W : LSX3R_VVR<0x71230000>;
+def VREPLVE_D : LSX3R_VVR<0x71238000>;
+def VREPLVEI_B : LSX2RI4_VVI<0x72f78000>;
+def VREPLVEI_H : LSX2RI3_VVI<0x72f7c000>;
+def VREPLVEI_W : LSX2RI2_VVI<0x72f7e000>;
+def VREPLVEI_D : LSX2RI1_VVI<0x72f7f000>;
+
+def VBSLL_V : LSX2RI5_VVI<0x728e0000>;
+def VBSRL_V : LSX2RI5_VVI<0x728e8000>;
+
+def VPACKEV_B : LSX3R_VVV<0x71160000>;
+def VPACKEV_H : LSX3R_VVV<0x71168000>;
+def VPACKEV_W : LSX3R_VVV<0x71170000>;
+def VPACKEV_D : LSX3R_VVV<0x71178000>;
+def VPACKOD_B : LSX3R_VVV<0x71180000>;
+def VPACKOD_H : LSX3R_VVV<0x71188000>;
+def VPACKOD_W : LSX3R_VVV<0x71190000>;
+def VPACKOD_D : LSX3R_VVV<0x71198000>;
+
+def VPICKEV_B : LSX3R_VVV<0x711e0000>;
+def VPICKEV_H : LSX3R_VVV<0x711e8000>;
+def VPICKEV_W : LSX3R_VVV<0x711f0000>;
+def VPICKEV_D : LSX3R_VVV<0x711f8000>;
+def VPICKOD_B : LSX3R_VVV<0x71200000>;
+def VPICKOD_H : LSX3R_VVV<0x71208000>;
+def VPICKOD_W : LSX3R_VVV<0x71210000>;
+def VPICKOD_D : LSX3R_VVV<0x71218000>;
+
+def VILVL_B : LSX3R_VVV<0x711a0000>;
+def VILVL_H : LSX3R_VVV<0x711a8000>;
+def VILVL_W : LSX3R_VVV<0x711b0000>;
+def VILVL_D : LSX3R_VVV<0x711b8000>;
+def VILVH_B : LSX3R_VVV<0x711c0000>;
+def VILVH_H : LSX3R_VVV<0x711c8000>;
+def VILVH_W : LSX3R_VVV<0x711d0000>;
+def VILVH_D : LSX3R_VVV<0x711d8000>;
+
+def VSHUF_B : LSX4R_VVVV<0x0d500000>;
+
+def VSHUF_H : LSX3R_VVVV<0x717a8000>;
+def VSHUF_W : LSX3R_VVVV<0x717b0000>;
+def VSHUF_D : LSX3R_VVVV<0x717b8000>;
+
+def VSHUF4I_B : LSX2RI8_VVI<0x73900000>;
+def VSHUF4I_H : LSX2RI8_VVI<0x73940000>;
+def VSHUF4I_W : LSX2RI8_VVI<0x73980000>;
+def VSHUF4I_D : LSX2RI8_VVVI<0x739c0000>;
+
+def VPERMI_W : LSX2RI8_VVVI<0x73e40000>;
+
+def VEXTRINS_D : LSX2RI8_VVVI<0x73800000>;
+def VEXTRINS_W : LSX2RI8_VVVI<0x73840000>;
+def VEXTRINS_H : LSX2RI8_VVVI<0x73880000>;
+def VEXTRINS_B : LSX2RI8_VVVI<0x738c0000>;
+} // mayLoad = 0, mayStore = 0
+
+let mayLoad = 1, mayStore = 0 in {
+def VLD : LSX2RI12_Load<0x2c000000>;
+def VLDX : LSX3R_Load<0x38400000>;
+
+def VLDREPL_B : LSX2RI12_Load<0x30800000>;
+def VLDREPL_H : LSX2RI11_Load<0x30400000>;
+def VLDREPL_W : LSX2RI10_Load<0x30200000>;
+def VLDREPL_D : LSX2RI9_Load<0x30100000>;
+} // mayLoad = 1, mayStore = 0
+
+let mayLoad = 0, mayStore = 1 in {
+def VST : LSX2RI12_Store<0x2c400000>;
+def VSTX : LSX3R_Store<0x38440000>;
+
+def VSTELM_B : LSX2RI8I4_VRII<0x31800000>;
+def VSTELM_H : LSX2RI8I3_VRII<0x31400000, simm8_lsl1>;
+def VSTELM_W : LSX2RI8I2_VRII<0x31200000, simm8_lsl2>;
+def VSTELM_D : LSX2RI8I1_VRII<0x31100000, simm8_lsl3>;
+} // mayLoad = 0, mayStore = 1
+
+} // hasSideEffects = 0, Predicates = [HasExtLSX]
+
+/// Pseudo-instructions
+
+let Predicates = [HasExtLSX] in {
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 0,
+ isAsmParserOnly = 1 in {
+def PseudoVREPLI_B : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [],
+ "vrepli.b", "$vd, $imm">;
+def PseudoVREPLI_H : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [],
+ "vrepli.h", "$vd, $imm">;
+def PseudoVREPLI_W : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [],
+ "vrepli.w", "$vd, $imm">;
+def PseudoVREPLI_D : Pseudo<(outs LSX128:$vd), (ins simm10:$imm), [],
+ "vrepli.d", "$vd, $imm">;
+}
+
+} // Predicates = [HasExtLSX]
diff --git a/llvm/lib/Target/LoongArch/LoongArchLVZInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLVZInstrInfo.td
new file mode 100644
index 000000000000..50a16e2dd56b
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchLVZInstrInfo.td
@@ -0,0 +1,33 @@
+//===- LoongArchLVZInstrInfo.td - LoongArch LVZ instructions -*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the LVZ extension instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Predicates = [HasExtLVZ] in {
+
+def GCSRRD : FmtCSR<0x05000000, (outs GPR:$rd), (ins uimm14:$csr_num),
+ "$rd, $csr_num">;
+
+let Constraints = "$rd = $dst" in {
+def GCSRWR : FmtCSR<0x05000020, (outs GPR:$dst),
+ (ins GPR:$rd, uimm14:$csr_num), "$rd, $csr_num">;
+def GCSRXCHG : FmtCSRXCHG<0x05000000, (outs GPR:$dst),
+ (ins GPR:$rd, GPR:$rj, uimm14:$csr_num),
+ "$rd, $rj, $csr_num">;
+} // Constraints = "$rd = $dst"
+
+def GTLBFLUSH : FmtI32<0x06482401>;
+def HVCL : MISC_I15<0x002b8000>;
+
+} // hasSideEffects = 1, mayLoad = 0, mayStore = 0, Predicates = [HasExtLVZ]
diff --git a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
index 64f08e260381..5daa9481c907 100644
--- a/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp
@@ -47,24 +47,48 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
case LoongArchII::MO_PCREL_LO:
Kind = LoongArchMCExpr::VK_LoongArch_PCALA_LO12;
break;
+ case LoongArchII::MO_PCREL64_LO:
+ Kind = LoongArchMCExpr::VK_LoongArch_PCALA64_LO20;
+ break;
+ case LoongArchII::MO_PCREL64_HI:
+ Kind = LoongArchMCExpr::VK_LoongArch_PCALA64_HI12;
+ break;
case LoongArchII::MO_GOT_PC_HI:
Kind = LoongArchMCExpr::VK_LoongArch_GOT_PC_HI20;
break;
case LoongArchII::MO_GOT_PC_LO:
Kind = LoongArchMCExpr::VK_LoongArch_GOT_PC_LO12;
break;
+ case LoongArchII::MO_GOT_PC64_LO:
+ Kind = LoongArchMCExpr::VK_LoongArch_GOT64_PC_LO20;
+ break;
+ case LoongArchII::MO_GOT_PC64_HI:
+ Kind = LoongArchMCExpr::VK_LoongArch_GOT64_PC_HI12;
+ break;
case LoongArchII::MO_LE_HI:
Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_HI20;
break;
case LoongArchII::MO_LE_LO:
Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE_LO12;
break;
+ case LoongArchII::MO_LE64_LO:
+ Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE64_LO20;
+ break;
+ case LoongArchII::MO_LE64_HI:
+ Kind = LoongArchMCExpr::VK_LoongArch_TLS_LE64_HI12;
+ break;
case LoongArchII::MO_IE_PC_HI:
Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_HI20;
break;
case LoongArchII::MO_IE_PC_LO:
Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE_PC_LO12;
break;
+ case LoongArchII::MO_IE_PC64_LO:
+ Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_LO20;
+ break;
+ case LoongArchII::MO_IE_PC64_HI:
+ Kind = LoongArchMCExpr::VK_LoongArch_TLS_IE64_PC_HI12;
+ break;
case LoongArchII::MO_LD_PC_HI:
Kind = LoongArchMCExpr::VK_LoongArch_TLS_LD_PC_HI20;
break;
diff --git a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
index ff914f805e5b..fbca110fd093 100644
--- a/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchRegisterInfo.td
@@ -17,6 +17,13 @@ class LoongArchReg<bits<16> Enc, string n, list<string> alt = []>
let AltNames = alt;
}
+class LoongArchRegWithSubRegs<bits<16> Enc, string n, list<Register> subregs,
+ list<string> alt = []>
+ : RegisterWithSubRegs<n, subregs> {
+ let HWEncoding = Enc;
+ let AltNames = alt;
+}
+
class LoongArchReg32<bits<16> Enc, string n, list<string> alt = []>
: Register<n> {
let HWEncoding = Enc;
@@ -25,12 +32,21 @@ class LoongArchReg32<bits<16> Enc, string n, list<string> alt = []>
def sub_32 : SubRegIndex<32>;
class LoongArchReg64<LoongArchReg32 subreg>
- : Register<""> {
- let HWEncoding = subreg.HWEncoding;
- let SubRegs = [subreg];
+ : LoongArchRegWithSubRegs<subreg.HWEncoding, subreg.AsmName, [subreg],
+ subreg.AltNames> {
let SubRegIndices = [sub_32];
- let AsmName = subreg.AsmName;
- let AltNames = subreg.AltNames;
+}
+
+def sub_64 : SubRegIndex<64>;
+class LoongArchReg128<LoongArchReg64 subreg, string n>
+ : LoongArchRegWithSubRegs<subreg.HWEncoding, n, [subreg]> {
+ let SubRegIndices = [sub_64];
+}
+
+def sub_128 : SubRegIndex<128>;
+class LoongArchReg256<LoongArchReg128 subreg, string n>
+ : LoongArchRegWithSubRegs<subreg.HWEncoding, n, [subreg]> {
+ let SubRegIndices = [sub_128];
}
let FallbackRegAltNameIndex = NoRegAltName in
@@ -170,3 +186,31 @@ def FCSR#I : LoongArchReg<I, "fcsr"#I>;
let isAllocatable = false in
def FCSR : RegisterClass<"LoongArch", [i32], 32, (sequence "FCSR%u", 0, 3)>;
+
+// LSX registers
+
+foreach I = 0-31 in
+def VR#I : LoongArchReg128<!cast<LoongArchReg64>("F"#I#"_64"), "vr"#I>,
+ DwarfRegAlias<!cast<LoongArchReg64>("F"#I#"_64")>;
+
+def LSX128 : RegisterClass<"LoongArch",
+ [v4f32, v2f64, v16i8, v8i16, v4i32, v2i64],
+ 128, (sequence "VR%u", 0, 31)>;
+
+// LASX registers
+
+foreach I = 0-31 in
+def XR#I : LoongArchReg256<!cast<LoongArchReg128>("VR"#I), "xr"#I>,
+ DwarfRegAlias<!cast<LoongArchReg128>("VR"#I)>;
+
+def LASX256 : RegisterClass<"LoongArch",
+ [v8f32, v4f64, v32i8, v16i16, v8i32, v4i64],
+ 256, (sequence "XR%u", 0, 31)>;
+
+// Scratchpad registers
+
+foreach I = 0-3 in
+def SCR#I : LoongArchReg<I, "scr"#I>;
+
+let isAllocatable = false, RegInfos = GRLenRI in
+def SCR : RegisterClass<"LoongArch", [GRLenVT], 32, (sequence "SCR%u", 0, 3)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
index d8850f656d52..ffcde7dd1fa7 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp
@@ -12,6 +12,7 @@
#include "LoongArchSubtarget.h"
#include "LoongArchFrameLowering.h"
+#include "MCTargetDesc/LoongArchBaseInfo.h"
using namespace llvm;
@@ -34,6 +35,7 @@ LoongArchSubtarget &LoongArchSubtarget::initializeSubtargetDependencies(
TuneCPU = CPU;
ParseSubtargetFeatures(CPU, TuneCPU, FS);
+ initializeProperties(TuneCPU);
if (Is64Bit) {
GRLenVT = MVT::i64;
GRLen = 64;
@@ -48,11 +50,37 @@ LoongArchSubtarget &LoongArchSubtarget::initializeSubtargetDependencies(
if (!Is64Bit && HasLA64)
report_fatal_error("Feature 64bit should be used for loongarch64 target.");
- // TODO: ILP32{S,F} LP64{S,F}
- TargetABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D;
+ TargetABI = LoongArchABI::computeTargetABI(TT, ABIName);
+
return *this;
}
+void LoongArchSubtarget::initializeProperties(StringRef TuneCPU) {
+ // Initialize CPU specific properties. We should add a tablegen feature for
+ // this in the future so we can specify it together with the subtarget
+ // features.
+
+ // TODO: Check TuneCPU and override defaults (that are for LA464) once we
+ // support optimizing for more uarchs.
+
+ // Default to the alignment settings empirically confirmed to perform best
+ // on LA464, with 4-wide instruction fetch and decode stages. These settings
+ // can also be overridden in initializeProperties.
+ //
+ // We default to such higher-than-minimum alignments because we assume that:
+ //
+ // * these settings should benefit most existing uarchs/users,
+ // * future general-purpose LoongArch cores are likely to have issue widths
+ // equal to or wider than 4,
+ // * instruction sequences best for LA464 should not pessimize other future
+ // uarchs, and
+ // * narrower cores would not suffer much (aside from slightly increased
+ // ICache footprint maybe), compared to the gains everywhere else.
+ PrefFunctionAlignment = Align(32);
+ PrefLoopAlignment = Align(16);
+ MaxBytesForAlignment = 16;
+}
+
LoongArchSubtarget::LoongArchSubtarget(const Triple &TT, StringRef CPU,
StringRef TuneCPU, StringRef FS,
StringRef ABIName,
diff --git a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
index aa87638e47e9..0fbe23f2f62d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
+++ b/llvm/lib/Target/LoongArch/LoongArchSubtarget.h
@@ -42,6 +42,7 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
bool HasLaGlobalWithPcrel = false;
bool HasLaGlobalWithAbs = false;
bool HasLaLocalWithAbs = false;
+ bool HasUAL = false;
unsigned GRLen = 32;
MVT GRLenVT = MVT::i32;
LoongArchABI::ABI TargetABI = LoongArchABI::ABI_Unknown;
@@ -51,6 +52,10 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
LoongArchTargetLowering TLInfo;
SelectionDAGTargetInfo TSInfo;
+ Align PrefFunctionAlignment;
+ Align PrefLoopAlignment;
+ unsigned MaxBytesForAlignment;
+
/// Initializes using the passed in CPU and feature strings so that we can
/// use initializer lists for subtarget initialization.
LoongArchSubtarget &initializeSubtargetDependencies(const Triple &TT,
@@ -59,6 +64,9 @@ class LoongArchSubtarget : public LoongArchGenSubtargetInfo {
StringRef FS,
StringRef ABIName);
+ /// Initialize properties based on the selected processor family.
+ void initializeProperties(StringRef TuneCPU);
+
public:
// Initializes the data members to match that of the specified triple.
LoongArchSubtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
@@ -91,9 +99,14 @@ public:
bool hasLaGlobalWithPcrel() const { return HasLaGlobalWithPcrel; }
bool hasLaGlobalWithAbs() const { return HasLaGlobalWithAbs; }
bool hasLaLocalWithAbs() const { return HasLaLocalWithAbs; }
+ bool hasUAL() const { return HasUAL; }
MVT getGRLenVT() const { return GRLenVT; }
unsigned getGRLen() const { return GRLen; }
LoongArchABI::ABI getTargetABI() const { return TargetABI; }
+ bool isXRaySupported() const override { return is64Bit(); }
+ Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
+ Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
+ unsigned getMaxBytesForAlignment() const { return MaxBytesForAlignment; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
index 933ba3b40ce4..46e4a06f6bc0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp
@@ -13,12 +13,16 @@
#include "LoongArchTargetMachine.h"
#include "LoongArch.h"
#include "LoongArchMachineFunctionInfo.h"
+#include "LoongArchTargetTransformInfo.h"
#include "MCTargetDesc/LoongArchBaseInfo.h"
#include "TargetInfo/LoongArchTargetInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Transforms/Scalar.h"
#include <optional>
using namespace llvm;
@@ -34,6 +38,11 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() {
initializeLoongArchDAGToDAGISelPass(*PR);
}
+static cl::opt<bool>
+ EnableLoopDataPrefetch("loongarch-enable-loop-data-prefetch", cl::Hidden,
+ cl::desc("Enable the loop data prefetch pass"),
+ cl::init(false));
+
static std::string computeDataLayout(const Triple &TT) {
if (TT.isArch64Bit())
return "e-m:e-p:64:64-i64:64-i128:128-n64-S128";
@@ -46,13 +55,33 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
return RM.value_or(Reloc::Static);
}
+static CodeModel::Model
+getEffectiveLoongArchCodeModel(const Triple &TT,
+ std::optional<CodeModel::Model> CM) {
+ if (!CM)
+ return CodeModel::Small;
+
+ switch (*CM) {
+ case CodeModel::Small:
+ case CodeModel::Medium:
+ return *CM;
+ case CodeModel::Large:
+ if (!TT.isArch64Bit())
+ report_fatal_error("Large code model requires LA64");
+ return *CM;
+ default:
+ report_fatal_error(
+ "Only small, medium and large code models are allowed on LoongArch");
+ }
+}
+
LoongArchTargetMachine::LoongArchTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, std::optional<Reloc::Model> RM,
std::optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
: LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
getEffectiveRelocModel(TT, RM),
- getEffectiveCodeModel(CM, CodeModel::Small), OL),
+ getEffectiveLoongArchCodeModel(TT, CM), OL),
TLOF(std::make_unique<TargetLoweringObjectFileELF>()) {
initAsmInfo();
}
@@ -126,6 +155,12 @@ LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) {
}
void LoongArchPassConfig::addIRPasses() {
+ // Run LoopDataPrefetch
+ //
+ // Run this before LSR to remove the multiplies involved in computing the
+ // pointer values N iterations ahead.
+ if (TM->getOptLevel() != CodeGenOpt::None && EnableLoopDataPrefetch)
+ addPass(createLoopDataPrefetchPass());
addPass(createAtomicExpandPass());
TargetPassConfig::addIRPasses();
@@ -137,6 +172,11 @@ bool LoongArchPassConfig::addInstSelector() {
return false;
}
+TargetTransformInfo
+LoongArchTargetMachine::getTargetTransformInfo(const Function &F) const {
+ return TargetTransformInfo(LoongArchTTIImpl(this, F));
+}
+
void LoongArchPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
void LoongArchPassConfig::addPreEmitPass2() {
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h
index 4d71be49a5e0..06fcec838ea4 100644
--- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.h
@@ -31,6 +31,7 @@ public:
CodeGenOpt::Level OL, bool JIT);
~LoongArchTargetMachine() override;
+ TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
const LoongArchSubtarget *getSubtargetImpl(const Function &F) const override;
const LoongArchSubtarget *getSubtargetImpl() const = delete;
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
new file mode 100644
index 000000000000..a6de86eea116
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.cpp
@@ -0,0 +1,22 @@
+//===-- LoongArchTargetTransformInfo.cpp - LoongArch specific TTI ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a TargetTransformInfo analysis pass specific to the
+/// LoongArch target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#include "LoongArchTargetTransformInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loongarchtti"
+
+// TODO: Implement more hooks to provide TTI machinery for LoongArch.
diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
new file mode 100644
index 000000000000..9e02f793ba8a
--- /dev/null
+++ b/llvm/lib/Target/LoongArch/LoongArchTargetTransformInfo.h
@@ -0,0 +1,47 @@
+//===- LoongArchTargetTransformInfo.h - LoongArch specific TTI --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file a TargetTransformInfo::Concept conforming object specific to the
+/// LoongArch target machine. It uses the target's detailed information to
+/// provide more precise answers to certain TTI queries, while letting the
+/// target independent and default TTI implementations handle the rest.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETTRANSFORMINFO_H
+#define LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETTRANSFORMINFO_H
+
+#include "LoongArchSubtarget.h"
+#include "LoongArchTargetMachine.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/BasicTTIImpl.h"
+
+namespace llvm {
+
+class LoongArchTTIImpl : public BasicTTIImplBase<LoongArchTTIImpl> {
+ typedef BasicTTIImplBase<LoongArchTTIImpl> BaseT;
+ typedef TargetTransformInfo TTI;
+ friend BaseT;
+
+ const LoongArchSubtarget *ST;
+ const LoongArchTargetLowering *TLI;
+
+ const LoongArchSubtarget *getST() const { return ST; }
+ const LoongArchTargetLowering *getTLI() const { return TLI; }
+
+public:
+ explicit LoongArchTTIImpl(const LoongArchTargetMachine *TM, const Function &F)
+ : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
+ TLI(ST->getTargetLowering()) {}
+
+ // TODO: Implement more hooks to provide TTI machinery for LoongArch.
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_LOONGARCH_LOONGARCHTARGETTRANSFORMINFO_H
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
index ff0804e2a144..ecb68ff401e9 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp
@@ -202,5 +202,5 @@ MCAsmBackend *llvm::createLoongArchAsmBackend(const Target &T,
const MCTargetOptions &Options) {
const Triple &TT = STI.getTargetTriple();
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
- return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit());
+ return new LoongArchAsmBackend(STI, OSABI, TT.isArch64Bit(), Options);
}
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
index 0d04cecc4554..ae9bb8af0419 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h
@@ -26,11 +26,13 @@ class LoongArchAsmBackend : public MCAsmBackend {
const MCSubtargetInfo &STI;
uint8_t OSABI;
bool Is64Bit;
+ const MCTargetOptions &TargetOptions;
public:
- LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit)
- : MCAsmBackend(support::little), STI(STI), OSABI(OSABI),
- Is64Bit(Is64Bit) {}
+ LoongArchAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
+ const MCTargetOptions &Options)
+ : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit),
+ TargetOptions(Options) {}
~LoongArchAsmBackend() override {}
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
@@ -63,6 +65,7 @@ public:
std::unique_ptr<MCObjectTargetWriter>
createObjectTargetWriter() const override;
+ const MCTargetOptions &getTargetOptions() const { return TargetOptions; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp
index de2ba2833414..928adb03f098 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp
@@ -13,13 +13,73 @@
#include "LoongArchBaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
namespace LoongArchABI {
+ABI computeTargetABI(const Triple &TT, StringRef ABIName) {
+ ABI ArgProvidedABI = getTargetABI(ABIName);
+ bool Is64Bit = TT.isArch64Bit();
+ ABI TripleABI;
+
+ // Figure out the ABI explicitly requested via the triple's environment type.
+ switch (TT.getEnvironment()) {
+ case llvm::Triple::EnvironmentType::GNUSF:
+ TripleABI = Is64Bit ? LoongArchABI::ABI_LP64S : LoongArchABI::ABI_ILP32S;
+ break;
+ case llvm::Triple::EnvironmentType::GNUF32:
+ TripleABI = Is64Bit ? LoongArchABI::ABI_LP64F : LoongArchABI::ABI_ILP32F;
+ break;
+
+ // Let the fallback case behave like {ILP32,LP64}D.
+ case llvm::Triple::EnvironmentType::GNUF64:
+ default:
+ TripleABI = Is64Bit ? LoongArchABI::ABI_LP64D : LoongArchABI::ABI_ILP32D;
+ break;
+ }
+
+ switch (ArgProvidedABI) {
+ case LoongArchABI::ABI_Unknown:
+ // Fallback to the triple-implied ABI if ABI name is not specified or
+ // invalid.
+ if (!ABIName.empty())
+ errs() << "'" << ABIName
+ << "' is not a recognized ABI for this target, ignoring and using "
+ "triple-implied ABI\n";
+ return TripleABI;
+
+ case LoongArchABI::ABI_ILP32S:
+ case LoongArchABI::ABI_ILP32F:
+ case LoongArchABI::ABI_ILP32D:
+ if (Is64Bit) {
+ errs() << "32-bit ABIs are not supported for 64-bit targets, ignoring "
+ "target-abi and using triple-implied ABI\n";
+ return TripleABI;
+ }
+ break;
+
+ case LoongArchABI::ABI_LP64S:
+ case LoongArchABI::ABI_LP64F:
+ case LoongArchABI::ABI_LP64D:
+ if (!Is64Bit) {
+ errs() << "64-bit ABIs are not supported for 32-bit targets, ignoring "
+ "target-abi and using triple-implied ABI\n";
+ return TripleABI;
+ }
+ break;
+ }
+
+ if (!ABIName.empty() && TT.hasEnvironment() && ArgProvidedABI != TripleABI)
+ errs() << "warning: triple-implied ABI conflicts with provided target-abi '"
+ << ABIName << "', using target-abi\n";
+
+ return ArgProvidedABI;
+}
+
ABI getTargetABI(StringRef ABIName) {
auto TargetABI = StringSwitch<ABI>(ABIName)
.Case("ilp32s", ABI_ILP32S)
@@ -32,7 +92,9 @@ ABI getTargetABI(StringRef ABIName) {
return TargetABI;
}
-// FIXME: other register?
+// To avoid the BP value clobbered by a function call, we need to choose a
+// callee saved register to save the value. The `last` `S` register (s9) is
+// used for FP. So we choose the previous (s8) as BP.
MCRegister getBPReg() { return LoongArch::R31; }
} // end namespace LoongArchABI
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
index c5f072677999..cee6dad1f095 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h
@@ -18,7 +18,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
namespace llvm {
@@ -31,12 +31,20 @@ enum {
MO_CALL_PLT,
MO_PCREL_HI,
MO_PCREL_LO,
+ MO_PCREL64_LO,
+ MO_PCREL64_HI,
MO_GOT_PC_HI,
MO_GOT_PC_LO,
+ MO_GOT_PC64_LO,
+ MO_GOT_PC64_HI,
MO_LE_HI,
MO_LE_LO,
+ MO_LE64_LO,
+ MO_LE64_HI,
MO_IE_PC_HI,
MO_IE_PC_LO,
+ MO_IE_PC64_LO,
+ MO_IE_PC64_HI,
MO_LD_PC_HI,
MO_GD_PC_HI,
// TODO: Add more flags.
@@ -54,6 +62,7 @@ enum ABI {
ABI_Unknown
};
+ABI computeTargetABI(const Triple &TT, StringRef ABIName);
ABI getTargetABI(StringRef ABIName);
// Returns the register used to hold the stack pointer after realignment.
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
index 57330dd31f71..a6b9c0652639 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp
@@ -59,7 +59,7 @@ unsigned LoongArchELFObjectWriter::getRelocType(MCContext &Ctx,
case FK_Data_4:
return IsPCRel ? ELF::R_LARCH_32_PCREL : ELF::R_LARCH_32;
case FK_Data_8:
- return ELF::R_LARCH_64;
+ return IsPCRel ? ELF::R_LARCH_64_PCREL : ELF::R_LARCH_64;
case LoongArch::fixup_loongarch_b16:
return ELF::R_LARCH_B16;
case LoongArch::fixup_loongarch_b21:
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp
index 3410c8f4277d..a6e15e09463d 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFStreamer.cpp
@@ -12,6 +12,7 @@
#include "LoongArchELFStreamer.h"
#include "LoongArchAsmBackend.h"
+#include "LoongArchBaseInfo.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -23,9 +24,10 @@ using namespace llvm;
LoongArchTargetELFStreamer::LoongArchTargetELFStreamer(
MCStreamer &S, const MCSubtargetInfo &STI)
: LoongArchTargetStreamer(S) {
- // FIXME: select appropriate ABI.
- setTargetABI(STI.getTargetTriple().isArch64Bit() ? LoongArchABI::ABI_LP64D
- : LoongArchABI::ABI_ILP32D);
+ auto &MAB = static_cast<LoongArchAsmBackend &>(
+ getStreamer().getAssembler().getBackend());
+ setTargetABI(LoongArchABI::computeTargetABI(
+ STI.getTargetTriple(), MAB.getTargetOptions().getABIName()));
}
MCELFStreamer &LoongArchTargetELFStreamer::getStreamer() {
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp
index bc946db2f449..9b7fccd0078e 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp
@@ -11,9 +11,9 @@
//===----------------------------------------------------------------------===//
#include "LoongArchMCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
index 4587d59087f3..03fb9e008ae9 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp
@@ -39,10 +39,15 @@ public:
~LoongArchMCCodeEmitter() override {}
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
+ template <unsigned Opc>
+ void expandToVectorLDI(const MCInst &MI, SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
/// TableGen'erated function for getting the binary encoding for an
/// instruction.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
@@ -65,12 +70,21 @@ public:
/// Return binary encoding of an immediate operand specified by OpNo.
/// The value returned is the value of the immediate shifted right
- // arithmetically by 2.
+ // arithmetically by N.
/// Note that this function is dedicated to specific immediate types,
/// e.g. simm14_lsl2, simm16_lsl2, simm21_lsl2 and simm26_lsl2.
- unsigned getImmOpValueAsr2(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ template <unsigned N>
+ unsigned getImmOpValueAsr(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isImm()) {
+ unsigned Res = MI.getOperand(OpNo).getImm();
+ assert((Res & ((1U << N) - 1U)) == 0 && "lowest N bits are non-zero");
+ return Res >> N;
+ }
+ return getExprOpValue(MI, MO, Fixups, STI);
+ }
unsigned getExprOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
@@ -102,21 +116,6 @@ LoongArchMCCodeEmitter::getImmOpValueSub1(const MCInst &MI, unsigned OpNo,
}
unsigned
-LoongArchMCCodeEmitter::getImmOpValueAsr2(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- const MCOperand &MO = MI.getOperand(OpNo);
-
- if (MO.isImm()) {
- unsigned Res = MI.getOperand(OpNo).getImm();
- assert((Res & 3) == 0 && "lowest 2 bits are non-zero");
- return Res >> 2;
- }
-
- return getExprOpValue(MI, MO, Fixups, STI);
-}
-
-unsigned
LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
@@ -273,19 +272,61 @@ LoongArchMCCodeEmitter::getExprOpValue(const MCInst &MI, const MCOperand &MO,
return 0;
}
+template <unsigned Opc>
+void LoongArchMCCodeEmitter::expandToVectorLDI(
+ const MCInst &MI, SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
+ int64_t Imm = MI.getOperand(1).getImm() & 0x3FF;
+ switch (MI.getOpcode()) {
+ case LoongArch::PseudoVREPLI_B:
+ case LoongArch::PseudoXVREPLI_B:
+ break;
+ case LoongArch::PseudoVREPLI_H:
+ case LoongArch::PseudoXVREPLI_H:
+ Imm |= 0x400;
+ break;
+ case LoongArch::PseudoVREPLI_W:
+ case LoongArch::PseudoXVREPLI_W:
+ Imm |= 0x800;
+ break;
+ case LoongArch::PseudoVREPLI_D:
+ case LoongArch::PseudoXVREPLI_D:
+ Imm |= 0xC00;
+ break;
+ }
+ MCInst TmpInst = MCInstBuilder(Opc).addOperand(MI.getOperand(0)).addImm(Imm);
+ uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+ support::endian::write(CB, Binary, support::little);
+}
+
void LoongArchMCCodeEmitter::encodeInstruction(
- const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ const MCInst &MI, SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
// Get byte count of instruction.
unsigned Size = Desc.getSize();
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case LoongArch::PseudoVREPLI_B:
+ case LoongArch::PseudoVREPLI_H:
+ case LoongArch::PseudoVREPLI_W:
+ case LoongArch::PseudoVREPLI_D:
+ return expandToVectorLDI<LoongArch::VLDI>(MI, CB, Fixups, STI);
+ case LoongArch::PseudoXVREPLI_B:
+ case LoongArch::PseudoXVREPLI_H:
+ case LoongArch::PseudoXVREPLI_W:
+ case LoongArch::PseudoXVREPLI_D:
+ return expandToVectorLDI<LoongArch::XVLDI>(MI, CB, Fixups, STI);
+ }
+
switch (Size) {
default:
llvm_unreachable("Unhandled encodeInstruction length!");
case 4: {
uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
- support::endian::write(OS, Bits, support::little);
+ support::endian::write(CB, Bits, support::little);
break;
}
}
diff --git a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
index f431aae2f23e..7a0a033c55ad 100644
--- a/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
+++ b/llvm/lib/Target/M68k/AsmParser/M68kAsmParser.cpp
@@ -44,15 +44,15 @@ class M68kAsmParser : public MCTargetAsmParser {
bool missingFeature(const SMLoc &Loc, const uint64_t &ErrorInfo);
bool emit(MCInst &Inst, SMLoc const &Loc, MCStreamer &Out) const;
bool parseRegisterName(MCRegister &RegNo, SMLoc Loc, StringRef RegisterName);
- OperandMatchResultTy parseRegister(MCRegister &RegNo);
+ ParseStatus parseRegister(MCRegister &RegNo);
// Parser functions.
void eatComma();
bool isExpr();
- OperandMatchResultTy parseImm(OperandVector &Operands);
- OperandMatchResultTy parseMemOp(OperandVector &Operands);
- OperandMatchResultTy parseRegOrMoveMask(OperandVector &Operands);
+ ParseStatus parseImm(OperandVector &Operands);
+ ParseStatus parseMemOp(OperandVector &Operands);
+ ParseStatus parseRegOrMoveMask(OperandVector &Operands);
public:
M68kAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
@@ -72,7 +72,6 @@ public:
SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool ParseDirective(AsmToken DirectiveID) override;
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
uint64_t &ErrorInfo,
@@ -133,7 +132,6 @@ class M68kOperand : public MCParsedAsmOperand {
SMLoc Start, End;
union {
StringRef Token;
- int64_t Imm;
const MCExpr *Expr;
M68kMemOp MemOp;
};
@@ -158,6 +156,7 @@ public:
bool isReg() const override;
bool isAReg() const;
bool isDReg() const;
+ bool isFPDReg() const;
unsigned getReg() const override;
void addRegOperands(MCInst &Inst, unsigned N) const;
@@ -177,6 +176,11 @@ public:
static std::unique_ptr<M68kOperand> createImm(const MCExpr *Expr, SMLoc Start,
SMLoc End);
+ // Imm for TRAP instruction
+ bool isTrapImm() const;
+ // Imm for BKPT instruction
+ bool isBkptImm() const;
+
// MoveMask
bool isMoveMask() const;
void addMoveMaskOperands(MCInst &Inst, unsigned N) const;
@@ -223,15 +227,16 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeM68kAsmParser() {
RegisterMCAsmParser<M68kAsmParser> X(getTheM68kTarget());
}
+#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#include "M68kGenAsmMatcher.inc"
static inline unsigned getRegisterByIndex(unsigned RegisterIndex) {
static unsigned RegistersByIndex[] = {
- M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5,
- M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3,
- M68k::A4, M68k::A5, M68k::A6, M68k::SP,
- };
+ M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5,
+ M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3,
+ M68k::A4, M68k::A5, M68k::A6, M68k::SP, M68k::FP0, M68k::FP1,
+ M68k::FP2, M68k::FP3, M68k::FP4, M68k::FP5, M68k::FP6, M68k::FP7};
assert(RegisterIndex <=
sizeof(RegistersByIndex) / sizeof(RegistersByIndex[0]));
return RegistersByIndex[RegisterIndex];
@@ -242,6 +247,8 @@ static inline unsigned getRegisterIndex(unsigned Register) {
return Register - M68k::D0;
if (Register >= M68k::A0 && Register <= M68k::A6)
return Register - M68k::A0 + 8;
+ if (Register >= M68k::FP0 && Register <= M68k::FP7)
+ return Register - M68k::FP0 + 16;
switch (Register) {
case M68k::SP:
@@ -350,6 +357,22 @@ std::unique_ptr<M68kOperand> M68kOperand::createImm(const MCExpr *Expr,
return Op;
}
+bool M68kOperand::isTrapImm() const {
+ int64_t Value;
+ if (!isImm() || !Expr->evaluateAsAbsolute(Value))
+ return false;
+
+ return isUInt<4>(Value);
+}
+
+bool M68kOperand::isBkptImm() const {
+ int64_t Value;
+ if (!isImm() || !Expr->evaluateAsAbsolute(Value))
+ return false;
+
+ return isUInt<3>(Value);
+}
+
// MoveMask
bool M68kOperand::isMoveMask() const {
if (!isMemOp())
@@ -466,7 +489,7 @@ void M68kOperand::addPCIOperands(MCInst &Inst, unsigned N) const {
}
static inline bool checkRegisterClass(unsigned RegNo, bool Data, bool Address,
- bool SP) {
+ bool SP, bool FPDR = false) {
switch (RegNo) {
case M68k::A0:
case M68k::A1:
@@ -494,6 +517,16 @@ static inline bool checkRegisterClass(unsigned RegNo, bool Data, bool Address,
case M68k::CCR:
return false;
+ case M68k::FP0:
+ case M68k::FP1:
+ case M68k::FP2:
+ case M68k::FP3:
+ case M68k::FP4:
+ case M68k::FP5:
+ case M68k::FP6:
+ case M68k::FP7:
+ return FPDR;
+
default:
llvm_unreachable("unexpected register type");
return false;
@@ -512,6 +545,13 @@ bool M68kOperand::isDReg() const {
/*Address=*/false, /*SP=*/false);
}
+bool M68kOperand::isFPDReg() const {
+ return isReg() && checkRegisterClass(getReg(),
+ /*Data=*/false,
+ /*Address=*/false, /*SP=*/false,
+ /*FPDR=*/true);
+}
+
unsigned M68kAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) {
M68kOperand &Operand = (M68kOperand &)Op;
@@ -619,12 +659,20 @@ bool M68kAsmParser::parseRegisterName(MCRegister &RegNo, SMLoc Loc,
}
break;
}
+ } else if (StringRef(RegisterNameLower).starts_with("fp") &&
+ RegisterNameLower.size() > 2) {
+ // Floating point data register.
+ auto RegIndex = unsigned(RegisterNameLower[2] - '0');
+ if (RegIndex >= 8 || RegisterNameLower.size() > 3)
+ return false;
+ RegNo = getRegisterByIndex(16 + RegIndex);
+ return true;
}
return false;
}
-OperandMatchResultTy M68kAsmParser::parseRegister(MCRegister &RegNo) {
+ParseStatus M68kAsmParser::parseRegister(MCRegister &RegNo) {
bool HasPercent = false;
AsmToken PercentToken;
@@ -634,14 +682,14 @@ OperandMatchResultTy M68kAsmParser::parseRegister(MCRegister &RegNo) {
HasPercent = true;
PercentToken = Lex();
} else if (!RegisterPrefixOptional.getValue()) {
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
if (!Parser.getTok().is(AsmToken::Identifier)) {
if (HasPercent) {
getLexer().UnLex(PercentToken);
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
auto RegisterName = Parser.getTok().getString();
@@ -649,11 +697,11 @@ OperandMatchResultTy M68kAsmParser::parseRegister(MCRegister &RegNo) {
if (HasPercent) {
getLexer().UnLex(PercentToken);
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
Parser.Lex();
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool M68kAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
@@ -670,7 +718,7 @@ OperandMatchResultTy M68kAsmParser::tryParseRegister(MCRegister &RegNo,
SMLoc &StartLoc,
SMLoc &EndLoc) {
StartLoc = getLexer().getLoc();
- auto Result = parseRegister(RegNo);
+ ParseStatus Result = parseRegister(RegNo);
EndLoc = getLexer().getLoc();
return Result;
}
@@ -688,34 +736,31 @@ bool M68kAsmParser::isExpr() {
}
}
-OperandMatchResultTy M68kAsmParser::parseImm(OperandVector &Operands) {
- if (getLexer().isNot(AsmToken::Hash)) {
- return MatchOperand_NoMatch;
- }
+ParseStatus M68kAsmParser::parseImm(OperandVector &Operands) {
+ if (getLexer().isNot(AsmToken::Hash))
+ return ParseStatus::NoMatch;
SMLoc Start = getLexer().getLoc();
Parser.Lex();
SMLoc End;
const MCExpr *Expr;
- if (getParser().parseExpression(Expr, End)) {
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseExpression(Expr, End))
+ return ParseStatus::Failure;
Operands.push_back(M68kOperand::createImm(Expr, Start, End));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
+ParseStatus M68kAsmParser::parseMemOp(OperandVector &Operands) {
SMLoc Start = getLexer().getLoc();
bool IsPD = false;
M68kMemOp MemOp;
// Check for a plain register or register mask.
- auto Result = parseRegOrMoveMask(Operands);
- if (Result != llvm::MatchOperand_NoMatch) {
+ ParseStatus Result = parseRegOrMoveMask(Operands);
+ if (!Result.isNoMatch())
return Result;
- }
// Check for pre-decrement & outer displacement.
bool HasDisplacement = false;
@@ -723,9 +768,8 @@ OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
IsPD = true;
Parser.Lex();
} else if (isExpr()) {
- if (Parser.parseExpression(MemOp.OuterDisp)) {
- return MatchOperand_ParseFail;
- }
+ if (Parser.parseExpression(MemOp.OuterDisp))
+ return ParseStatus::Failure;
HasDisplacement = true;
}
@@ -734,21 +778,19 @@ OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
MemOp.Op = M68kMemOp::Kind::Addr;
Operands.push_back(
M68kOperand::createMemOp(MemOp, Start, getLexer().getLoc()));
- return MatchOperand_Success;
- } else if (IsPD) {
- Error(getLexer().getLoc(), "expected (");
- return MatchOperand_ParseFail;
+ return ParseStatus::Success;
}
+ if (IsPD)
+ return Error(getLexer().getLoc(), "expected (");
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
Parser.Lex();
// Check for constant dereference & MIT-style displacement
if (!HasDisplacement && isExpr()) {
- if (Parser.parseExpression(MemOp.OuterDisp)) {
- return MatchOperand_ParseFail;
- }
+ if (Parser.parseExpression(MemOp.OuterDisp))
+ return ParseStatus::Failure;
HasDisplacement = true;
// If we're not followed by a comma, we're a constant dereference.
@@ -756,21 +798,18 @@ OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
MemOp.Op = M68kMemOp::Kind::Addr;
Operands.push_back(
M68kOperand::createMemOp(MemOp, Start, getLexer().getLoc()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
Parser.Lex();
}
Result = parseRegister(MemOp.OuterReg);
- if (Result == MatchOperand_ParseFail) {
- return MatchOperand_ParseFail;
- }
+ if (Result.isFailure())
+ return ParseStatus::Failure;
- if (Result != MatchOperand_Success) {
- Error(getLexer().getLoc(), "expected register");
- return MatchOperand_ParseFail;
- }
+ if (!Result.isSuccess())
+ return Error(getLexer().getLoc(), "expected register");
// Check for Index.
bool HasIndex = false;
@@ -778,14 +817,11 @@ OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
Parser.Lex();
Result = parseRegister(MemOp.InnerReg);
- if (Result == MatchOperand_ParseFail) {
+ if (Result.isFailure())
return Result;
- }
- if (Result == MatchOperand_NoMatch) {
- Error(getLexer().getLoc(), "expected register");
- return MatchOperand_ParseFail;
- }
+ if (Result.isNoMatch())
+ return Error(getLexer().getLoc(), "expected register");
// TODO: parse size, scale and inner displacement.
MemOp.Size = 4;
@@ -794,10 +830,8 @@ OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
HasIndex = true;
}
- if (Parser.getTok().isNot(AsmToken::RParen)) {
- Error(getLexer().getLoc(), "expected )");
- return MatchOperand_ParseFail;
- }
+ if (Parser.getTok().isNot(AsmToken::RParen))
+ return Error(getLexer().getLoc(), "expected )");
Parser.Lex();
bool IsPI = false;
@@ -809,11 +843,9 @@ OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
SMLoc End = getLexer().getLoc();
unsigned OpCount = IsPD + IsPI + (HasIndex || HasDisplacement);
- if (OpCount > 1) {
- Error(Start, "only one of post-increment, pre-decrement or displacement "
- "can be used");
- return MatchOperand_ParseFail;
- }
+ if (OpCount > 1)
+ return Error(Start, "only one of post-increment, pre-decrement or "
+ "displacement can be used");
if (IsPD) {
MemOp.Op = M68kMemOp::Kind::RegPreDecrement;
@@ -828,11 +860,10 @@ OperandMatchResultTy M68kAsmParser::parseMemOp(OperandVector &Operands) {
}
Operands.push_back(M68kOperand::createMemOp(MemOp, Start, End));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-M68kAsmParser::parseRegOrMoveMask(OperandVector &Operands) {
+ParseStatus M68kAsmParser::parseRegOrMoveMask(OperandVector &Operands) {
SMLoc Start = getLexer().getLoc();
M68kMemOp MemOp(M68kMemOp::Kind::RegMask);
MemOp.RegMask = 0;
@@ -842,23 +873,17 @@ M68kAsmParser::parseRegOrMoveMask(OperandVector &Operands) {
(MemOp.Op == M68kMemOp::Kind::RegMask) && (MemOp.RegMask == 0);
MCRegister FirstRegister;
- auto Result = parseRegister(FirstRegister);
- if (IsFirstRegister && (Result == llvm::MatchOperand_NoMatch)) {
- return MatchOperand_NoMatch;
- }
- if (Result != llvm::MatchOperand_Success) {
- Error(getLexer().getLoc(), "expected start register");
- return MatchOperand_ParseFail;
- }
+ ParseStatus Result = parseRegister(FirstRegister);
+ if (IsFirstRegister && Result.isNoMatch())
+ return ParseStatus::NoMatch;
+ if (!Result.isSuccess())
+ return Error(getLexer().getLoc(), "expected start register");
MCRegister LastRegister = FirstRegister;
- if (getLexer().is(AsmToken::Minus)) {
- getLexer().Lex();
+ if (parseOptionalToken(AsmToken::Minus)) {
Result = parseRegister(LastRegister);
- if (Result != llvm::MatchOperand_Success) {
- Error(getLexer().getLoc(), "expected end register");
- return MatchOperand_ParseFail;
- }
+ if (!Result.isSuccess())
+ return Error(getLexer().getLoc(), "expected end register");
}
unsigned FirstRegisterIndex = getRegisterIndex(FirstRegister);
@@ -879,37 +904,28 @@ M68kAsmParser::parseRegOrMoveMask(OperandVector &Operands) {
MemOp.Op = M68kMemOp::Kind::RegMask;
MemOp.RegMask = 1 << getRegisterIndex(MemOp.OuterReg);
- if (MemOp.RegMask == 0) {
- Error(getLexer().getLoc(),
- "special registers cannot be used in register masks");
- return MatchOperand_ParseFail;
- }
+ if (MemOp.RegMask == 0)
+ return Error(getLexer().getLoc(),
+ "special registers cannot be used in register masks");
}
- if ((FirstRegisterIndex >= 16) || (LastRegisterIndex >= 16)) {
- Error(getLexer().getLoc(),
- "special registers cannot be used in register masks");
- return MatchOperand_ParseFail;
- }
+ if ((FirstRegisterIndex >= 16) || (LastRegisterIndex >= 16))
+ return Error(getLexer().getLoc(),
+ "special registers cannot be used in register masks");
- if (NewMaskBits & MemOp.RegMask) {
- Error(getLexer().getLoc(), "conflicting masked registers");
- return MatchOperand_ParseFail;
- }
+ if (NewMaskBits & MemOp.RegMask)
+ return Error(getLexer().getLoc(), "conflicting masked registers");
MemOp.RegMask |= NewMaskBits;
}
- if (getLexer().isNot(AsmToken::Slash)) {
+ if (!parseOptionalToken(AsmToken::Slash))
break;
- }
-
- getLexer().Lex();
}
Operands.push_back(
M68kOperand::createMemOp(MemOp, Start, getLexer().getLoc()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
void M68kAsmParser::eatComma() {
@@ -931,10 +947,9 @@ bool M68kAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
First = false;
}
- auto MatchResult = MatchOperandParserImpl(Operands, Name);
- if (MatchResult == MatchOperand_Success) {
+ ParseStatus MatchResult = MatchOperandParserImpl(Operands, Name);
+ if (MatchResult.isSuccess())
continue;
- }
// Add custom operand formats here...
SMLoc Loc = getLexer().getLoc();
@@ -947,8 +962,6 @@ bool M68kAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return false;
}
-bool M68kAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
-
bool M68kAsmParser::invalidOperand(SMLoc const &Loc,
OperandVector const &Operands,
uint64_t const &ErrorInfo) {
@@ -1019,9 +1032,12 @@ void M68kOperand::print(raw_ostream &OS) const {
OS << "token '" << Token << "'";
break;
- case KindTy::Imm:
- OS << "immediate " << Imm;
+ case KindTy::Imm: {
+ int64_t Value;
+ Expr->evaluateAsAbsolute(Value);
+ OS << "immediate " << Value;
break;
+ }
case KindTy::MemOp:
MemOp.print(OS);
diff --git a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
index ffe4869e8fe5..2124a35cc65a 100644
--- a/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
+++ b/llvm/lib/Target/M68k/Disassembler/M68kDisassembler.cpp
@@ -33,14 +33,14 @@ using namespace llvm;
typedef MCDisassembler::DecodeStatus DecodeStatus;
static const unsigned RegisterDecode[] = {
- M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5,
- M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3,
- M68k::A4, M68k::A5, M68k::A6, M68k::SP,
-};
+ M68k::D0, M68k::D1, M68k::D2, M68k::D3, M68k::D4, M68k::D5,
+ M68k::D6, M68k::D7, M68k::A0, M68k::A1, M68k::A2, M68k::A3,
+ M68k::A4, M68k::A5, M68k::A6, M68k::SP, M68k::FP0, M68k::FP1,
+ M68k::FP2, M68k::FP3, M68k::FP4, M68k::FP5, M68k::FP6, M68k::FP7};
static DecodeStatus DecodeRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address, const void *Decoder) {
- if (RegNo >= 16)
+ if (RegNo >= 24)
return DecodeStatus::Fail;
Inst.addOperand(MCOperand::createReg(RegisterDecode[RegNo]));
return DecodeStatus::Success;
@@ -88,6 +88,15 @@ static DecodeStatus DecodeXR16RegisterClass(MCInst &Inst, uint64_t RegNo,
return DecodeRegisterClass(Inst, RegNo, Address, Decoder);
}
+static DecodeStatus DecodeFPDRRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ return DecodeRegisterClass(Inst, RegNo | 16ULL, Address, Decoder);
+}
+#define DecodeFPDR32RegisterClass DecodeFPDRRegisterClass
+#define DecodeFPDR64RegisterClass DecodeFPDRRegisterClass
+#define DecodeFPDR80RegisterClass DecodeFPDRRegisterClass
+
static DecodeStatus DecodeCCRCRegisterClass(MCInst &Inst, APInt &Insn,
uint64_t Address,
const void *Decoder) {
@@ -102,6 +111,10 @@ static DecodeStatus DecodeImm32(MCInst &Inst, uint64_t Imm, uint64_t Address,
#include "M68kGenDisassemblerTable.inc"
+#undef DecodeFPDR32RegisterClass
+#undef DecodeFPDR64RegisterClass
+#undef DecodeFPDR80RegisterClass
+
/// A disassembler class for M68k.
struct M68kDisassembler : public MCDisassembler {
M68kDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
diff --git a/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
index e0aaa9d51cc3..b0ada29d1cea 100644
--- a/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kCallLowering.cpp
@@ -221,10 +221,10 @@ bool M68kCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
}
- CallSeqStart.addImm(Assigner.StackOffset).addImm(0);
+ CallSeqStart.addImm(Assigner.StackSize).addImm(0);
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
- MIRBuilder.buildInstr(AdjStackUp).addImm(Assigner.StackOffset).addImm(0);
+ MIRBuilder.buildInstr(AdjStackUp).addImm(Assigner.StackSize).addImm(0);
return true;
}
diff --git a/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp b/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp
index a627eccd110d..3fddf10aca2e 100644
--- a/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp
+++ b/llvm/lib/Target/M68k/GISel/M68kInstructionSelector.cpp
@@ -9,8 +9,8 @@
#include "M68kRegisterBankInfo.h"
#include "M68kSubtarget.h"
#include "M68kTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "m68k-isel"
diff --git a/llvm/lib/Target/M68k/M68k.h b/llvm/lib/Target/M68k/M68k.h
index 71c4cf8e3641..1d0f383dc5c9 100644
--- a/llvm/lib/Target/M68k/M68k.h
+++ b/llvm/lib/Target/M68k/M68k.h
@@ -42,18 +42,14 @@ FunctionPass *createM68kGlobalBaseRegPass();
/// emission so that all possible MOVEM are already in place.
FunctionPass *createM68kCollapseMOVEMPass();
-/// Finds MOVE instructions before any conditioanl branch instruction and
-/// replaces them with MOVEM instruction. Motorola's MOVEs do trash(V,C) flags
-/// register which prevents branch from taking the correct route. This pass
-/// has to be run after all pseudo expansions and prologue/epilogue emission
-/// so that all possible MOVEs are present.
-FunctionPass *createM68kConvertMOVToMOVMPass();
-
InstructionSelector *
createM68kInstructionSelector(const M68kTargetMachine &, const M68kSubtarget &,
const M68kRegisterBankInfo &);
void initializeM68kDAGToDAGISelPass(PassRegistry &);
+void initializeM68kExpandPseudoPass(PassRegistry &);
+void initializeM68kGlobalBaseRegPass(PassRegistry &);
+void initializeM68kCollapseMOVEMPass(PassRegistry &);
} // namespace llvm
diff --git a/llvm/lib/Target/M68k/M68k.td b/llvm/lib/Target/M68k/M68k.td
index de7a6c82d110..dab66d102295 100644
--- a/llvm/lib/Target/M68k/M68k.td
+++ b/llvm/lib/Target/M68k/M68k.td
@@ -37,10 +37,19 @@ def FeatureISA30
"Is M68030 ISA supported",
[ FeatureISA20 ]>;
+def FeatureISA881
+ : SubtargetFeature<"isa-68881", "FPUKind", "M881",
+ "Is M68881 (FPU) ISA supported">;
+
+def FeatureISA882
+ : SubtargetFeature<"isa-68882", "FPUKind", "M882",
+ "Is M68882 (FPU) ISA supported",
+ [ FeatureISA881 ]>;
+
def FeatureISA40
: SubtargetFeature<"isa-68040", "SubtargetKind", "M40",
"Is M68040 ISA supported",
- [ FeatureISA30 ]>;
+ [ FeatureISA30, FeatureISA882 ]>;
def FeatureISA60
: SubtargetFeature<"isa-68060", "SubtargetKind", "M60",
diff --git a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
index 4933d40f3388..f748450c170a 100644
--- a/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
+++ b/llvm/lib/Target/M68k/M68kAsmPrinter.cpp
@@ -76,6 +76,90 @@ bool M68kAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, OS);
}
+void M68kAsmPrinter::printDisp(const MachineInstr *MI, unsigned opNum,
+ raw_ostream &O) {
+ // Print immediate displacement without the '#' predix
+ const MachineOperand &Op = MI->getOperand(opNum);
+ if (Op.isImm()) {
+ O << Op.getImm();
+ return;
+ }
+ // Displacement is relocatable, so we're pretty permissive about what
+ // can be put here.
+ printOperand(MI, opNum, O);
+}
+
+void M68kAsmPrinter::printAbsMem(const MachineInstr *MI, unsigned OpNum,
+ raw_ostream &O) {
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.isImm())
+ O << format("$%0" PRIx64, (uint64_t)MO.getImm());
+ else
+ PrintAsmMemoryOperand(MI, OpNum, nullptr, O);
+}
+
+bool M68kAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+ unsigned OpNo, const char *ExtraCode,
+ raw_ostream &OS) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ case MachineOperand::MO_Immediate:
+ // Immediate value that goes here is the addressing mode kind we set
+ // in M68kDAGToDAGISel::SelectInlineAsmMemoryOperand.
+ using namespace M68k;
+ // Skip the addressing mode kind operand.
+ ++OpNo;
+ // Decode MemAddrModeKind.
+ switch (static_cast<MemAddrModeKind>(MO.getImm())) {
+ case MemAddrModeKind::j:
+ printARIMem(MI, OpNo, OS);
+ break;
+ case MemAddrModeKind::o:
+ printARIPIMem(MI, OpNo, OS);
+ break;
+ case MemAddrModeKind::e:
+ printARIPDMem(MI, OpNo, OS);
+ break;
+ case MemAddrModeKind::p:
+ printARIDMem(MI, OpNo, OS);
+ break;
+ case MemAddrModeKind::f:
+ case MemAddrModeKind::F:
+ printARIIMem(MI, OpNo, OS);
+ break;
+ case MemAddrModeKind::k:
+ printPCIMem(MI, 0, OpNo, OS);
+ break;
+ case MemAddrModeKind::q:
+ printPCDMem(MI, 0, OpNo, OS);
+ break;
+ case MemAddrModeKind::b:
+ printAbsMem(MI, OpNo, OS);
+ break;
+ default:
+ llvm_unreachable("Unrecognized memory addressing mode");
+ }
+ return false;
+ case MachineOperand::MO_GlobalAddress:
+ PrintSymbolOperand(MO, OS);
+ return false;
+ case MachineOperand::MO_BlockAddress:
+ GetBlockAddressSymbol(MO.getBlockAddress())->print(OS, MAI);
+ return false;
+ case MachineOperand::MO_Register:
+ // This is a special case where it is treated as a memory reference, with
+ // the register holding the address value. Thus, we print it as ARI here.
+ if (M68kII::isAddressRegister(MO.getReg())) {
+ printARIMem(MI, OpNo, OS);
+ return false;
+ }
+ break;
+ default:
+ break;
+ }
+ return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, ExtraCode, OS);
+}
+
void M68kAsmPrinter::emitInstruction(const MachineInstr *MI) {
M68k_MC::verifyInstructionPredicates(MI->getOpcode(),
getSubtargetInfo().getFeatureBits());
diff --git a/llvm/lib/Target/M68k/M68kAsmPrinter.h b/llvm/lib/Target/M68k/M68kAsmPrinter.h
index 1a76e3bf4e27..7b4dbfef58c5 100644
--- a/llvm/lib/Target/M68k/M68kAsmPrinter.h
+++ b/llvm/lib/Target/M68k/M68kAsmPrinter.h
@@ -16,6 +16,7 @@
#include "M68kMCInstLower.h"
#include "M68kTargetMachine.h"
+#include "MCTargetDesc/M68kMemOperandPrinter.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/MC/MCStreamer.h"
@@ -34,12 +35,19 @@ class raw_ostream;
class M68kSubtarget;
class M68kMachineFunctionInfo;
-class LLVM_LIBRARY_VISIBILITY M68kAsmPrinter : public AsmPrinter {
+class LLVM_LIBRARY_VISIBILITY M68kAsmPrinter
+ : public AsmPrinter,
+ public M68kMemOperandPrinter<M68kAsmPrinter, MachineInstr> {
+
+ friend class M68kMemOperandPrinter;
void EmitInstrWithMacroNoAT(const MachineInstr *MI);
void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &OS);
+ void printDisp(const MachineInstr *MI, unsigned OpNum, raw_ostream &OS);
+ void printAbsMem(const MachineInstr *MI, unsigned OpNum, raw_ostream &OS);
+
public:
const M68kSubtarget *Subtarget;
const M68kMachineFunctionInfo *MMFI;
@@ -57,6 +65,8 @@ public:
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
const char *ExtraCode, raw_ostream &OS) override;
+ bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
+ const char *ExtraCode, raw_ostream &OS) override;
void emitInstruction(const MachineInstr *MI) override;
void emitFunctionBodyStart() override;
diff --git a/llvm/lib/Target/M68k/M68kCallingConv.td b/llvm/lib/Target/M68k/M68kCallingConv.td
index 360f2199cf6f..523f08e64615 100644
--- a/llvm/lib/Target/M68k/M68kCallingConv.td
+++ b/llvm/lib/Target/M68k/M68kCallingConv.td
@@ -22,18 +22,13 @@ class CCIfSubtarget<string F, CCAction A>
// Return Value Calling Conventions
//===----------------------------------------------------------------------===//
-/// Return-value conventions common to all M68k CC's.
-def RetCC_M68kCommon : CallingConv<[
-]>;
-
/// M68k C return convention.
-/// TODO: Return via address register
def RetCC_M68k_C : CallingConv<[
+ CCIfPtr<CCAssignToReg<[A0]>>,
CCIfType<[i1], CCPromoteToType<i8>>,
CCIfType<[i8], CCAssignToReg<[BD0, BD1]>>,
CCIfType<[i16], CCAssignToReg<[WD0, WD1]>>,
CCIfType<[i32], CCAssignToReg<[D0, D1]>>,
- CCDelegateTo<RetCC_M68kCommon>
]>;
/// M68k fastcc return convention.
@@ -41,11 +36,11 @@ def RetCC_M68k_C : CallingConv<[
/// split among 16 1-byte values or used for a single 16-byte value.
/// TODO: Verify its functionality and write tests
def RetCC_M68k_Fast : CallingConv<[
+ CCIfPtr<CCAssignToReg<[A0]>>,
CCIfType<[i1], CCPromoteToType<i8>>,
CCIfType<[i8], CCAssignToReg<[BD0, BD1]>>,
CCIfType<[i16], CCAssignToReg<[WD0, WD1, WA0, WA1]>>,
CCIfType<[i32], CCAssignToReg<[D0, D1, A0, A1]>>,
- CCDelegateTo<RetCC_M68kCommon>
]>;
/// This is the root return-value convention for the M68k backend.
diff --git a/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp b/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp
index cbd69f24666e..a40b08ade61d 100644
--- a/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp
+++ b/llvm/lib/Target/M68k/M68kCollapseMOVEMPass.cpp
@@ -20,16 +20,17 @@
#include "M68kMachineFunction.h"
#include "M68kSubtarget.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/MathExtras.h"
using namespace llvm;
-#define DEBUG_TYPE "M68k-collapse-movem"
+#define DEBUG_TYPE "m68k-collapse-movem"
+#define PASS_NAME "M68k MOVEM collapser pass"
namespace {
@@ -294,13 +295,13 @@ public:
return Modified;
}
-
- StringRef getPassName() const override { return "M68k MOVEM collapser pass"; }
};
char M68kCollapseMOVEM::ID = 0;
} // anonymous namespace.
+INITIALIZE_PASS(M68kCollapseMOVEM, DEBUG_TYPE, PASS_NAME, false, false)
+
/// Returns an instance of the pseudo instruction expansion pass.
FunctionPass *llvm::createM68kCollapseMOVEMPass() {
return new M68kCollapseMOVEM();
diff --git a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
index 51a148f5aa04..2f60fc834a18 100644
--- a/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
+++ b/llvm/lib/Target/M68k/M68kExpandPseudo.cpp
@@ -19,16 +19,17 @@
#include "M68kMachineFunction.h"
#include "M68kSubtarget.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/GlobalValue.h"
using namespace llvm;
-#define DEBUG_TYPE "M68k-expand-pseudos"
+#define DEBUG_TYPE "m68k-expand-pseudo"
+#define PASS_NAME "M68k pseudo instruction expansion pass"
namespace {
class M68kExpandPseudo : public MachineFunctionPass {
@@ -56,10 +57,6 @@ public:
MachineFunctionProperties::Property::NoVRegs);
}
- StringRef getPassName() const override {
- return "M68k pseudo instruction expansion pass";
- }
-
private:
bool ExpandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool ExpandMBB(MachineBasicBlock &MBB);
@@ -67,6 +64,8 @@ private:
char M68kExpandPseudo::ID = 0;
} // End anonymous namespace.
+INITIALIZE_PASS(M68kExpandPseudo, DEBUG_TYPE, PASS_NAME, false, false)
+
/// If \p MBBI is a pseudo instruction, this method expands
/// it to the corresponding (sequence of) actual instruction(s).
/// \returns true if \p MBBI has been expanded.
@@ -162,6 +161,16 @@ bool M68kExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
return TII->ExpandMOVSZX_RM(MIB, false, TII->get(M68k::MOV16rf), MVT::i32,
MVT::i16);
+ case M68k::MOVZXd16q8:
+ return TII->ExpandMOVSZX_RM(MIB, false, TII->get(M68k::MOV8dq), MVT::i16,
+ MVT::i8);
+ case M68k::MOVZXd32q8:
+ return TII->ExpandMOVSZX_RM(MIB, false, TII->get(M68k::MOV8dq), MVT::i32,
+ MVT::i8);
+ case M68k::MOVZXd32q16:
+ return TII->ExpandMOVSZX_RM(MIB, false, TII->get(M68k::MOV16dq), MVT::i32,
+ MVT::i16);
+
case M68k::MOV8cd:
return TII->ExpandCCR(MIB, /*IsToCCR=*/true);
case M68k::MOV8dc:
diff --git a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
index f8335f3dcd77..e33654ea3f18 100644
--- a/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
+++ b/llvm/lib/Target/M68k/M68kISelDAGToDAG.cpp
@@ -227,6 +227,9 @@ private:
bool SelectPCD(SDNode *Parent, SDValue N, SDValue &Imm);
bool SelectPCI(SDNode *Parent, SDValue N, SDValue &Imm, SDValue &Index);
+ bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
+ std::vector<SDValue> &OutOps) override;
+
// If Address Mode represents Frame Index store FI in Disp and
// Displacement bit size in Base. These values are read symmetrically by
// M68kRegisterInfo::eliminateFrameIndex method
@@ -497,6 +500,13 @@ bool M68kDAGToDAGISel::matchAddressRecursively(SDValue N,
return true;
}
break;
+
+ case ISD::TargetGlobalTLSAddress: {
+ GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+ AM.GV = GA->getGlobal();
+ AM.SymbolFlags = GA->getTargetFlags();
+ return true;
+ }
}
return matchAddressBase(N, AM);
@@ -663,6 +673,15 @@ void M68kDAGToDAGISel::Select(SDNode *Node) {
default:
break;
+ case ISD::GLOBAL_OFFSET_TABLE: {
+ SDValue GOT = CurDAG->getTargetExternalSymbol(
+ "_GLOBAL_OFFSET_TABLE_", MVT::i32, M68kII::MO_GOTPCREL);
+ MachineSDNode *Res =
+ CurDAG->getMachineNode(M68k::LEA32q, DL, MVT::i32, GOT);
+ ReplaceNode(Node, Res);
+ return;
+ }
+
case M68kISD::GLOBAL_BASE_REG:
ReplaceNode(Node, getGlobalBaseReg());
return;
@@ -712,6 +731,8 @@ bool M68kDAGToDAGISel::SelectARID(SDNode *Parent, SDValue N, SDValue &Disp,
return false;
}
+ Base = AM.BaseReg;
+
if (getSymbolicDisplacement(AM, SDLoc(N), Disp)) {
assert(!AM.Disp && "Should not be any displacement");
LLVM_DEBUG(dbgs() << "SUCCESS, matched Symbol\n");
@@ -724,7 +745,6 @@ bool M68kDAGToDAGISel::SelectARID(SDNode *Parent, SDValue N, SDValue &Disp,
return false;
}
- Base = AM.BaseReg;
Disp = getI16Imm(AM.Disp, SDLoc(N));
LLVM_DEBUG(dbgs() << "SUCCESS\n");
@@ -931,3 +951,74 @@ bool M68kDAGToDAGISel::SelectARI(SDNode *Parent, SDValue N, SDValue &Base) {
return false;
}
+
+bool M68kDAGToDAGISel::SelectInlineAsmMemoryOperand(
+ const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ // In order to tell AsmPrinter the exact addressing mode we select here, which
+ // might comprise of multiple SDValues (hence MachineOperands), a 32-bit
+ // immediate value is prepended to the list of selected SDValues to indicate
+ // the addressing mode kind.
+ using AMK = M68k::MemAddrModeKind;
+ auto addKind = [this](SDValue &Opnd, AMK Kind) -> bool {
+ Opnd = CurDAG->getTargetConstant(unsigned(Kind), SDLoc(), MVT::i32);
+ return true;
+ };
+
+ switch (ConstraintID) {
+ // Generic memory operand.
+ case InlineAsm::Constraint_m: {
+ // Try every supported (memory) addressing modes.
+ SDValue Operands[4];
+
+ // TODO: The ordering of the following SelectXXX is relatively...arbitrary,
+ // right now we simply sort them by descending complexity. Maybe we should
+ // adjust this by code model and/or relocation mode in the future.
+ if (SelectARII(nullptr, Op, Operands[1], Operands[2], Operands[3]) &&
+ addKind(Operands[0], AMK::f)) {
+ OutOps.insert(OutOps.end(), &Operands[0], Operands + 4);
+ return false;
+ }
+
+ if ((SelectPCI(nullptr, Op, Operands[1], Operands[2]) &&
+ addKind(Operands[0], AMK::k)) ||
+ (SelectARID(nullptr, Op, Operands[1], Operands[2]) &&
+ addKind(Operands[0], AMK::p))) {
+ OutOps.insert(OutOps.end(), &Operands[0], Operands + 3);
+ return false;
+ }
+
+ if ((SelectPCD(nullptr, Op, Operands[1]) && addKind(Operands[0], AMK::q)) ||
+ (SelectARI(nullptr, Op, Operands[1]) && addKind(Operands[0], AMK::j)) ||
+ (SelectAL(nullptr, Op, Operands[1]) && addKind(Operands[0], AMK::b))) {
+ OutOps.insert(OutOps.end(), {Operands[0], Operands[1]});
+ return false;
+ }
+
+ return true;
+ }
+ // 'Q': Address register indirect addressing.
+ case InlineAsm::Constraint_Q: {
+ SDValue AMKind, Base;
+ // 'j' addressing mode.
+ // TODO: Add support for 'o' and 'e' after their
+ // select functions are implemented.
+ if (SelectARI(nullptr, Op, Base) && addKind(AMKind, AMK::j)) {
+ OutOps.insert(OutOps.end(), {AMKind, Base});
+ return false;
+ }
+ return true;
+ }
+ // 'U': Address register indirect w/ constant offset addressing.
+ case InlineAsm::Constraint_Um: {
+ SDValue AMKind, Base, Offset;
+ // 'p' addressing mode.
+ if (SelectARID(nullptr, Op, Offset, Base) && addKind(AMKind, AMK::p)) {
+ OutOps.insert(OutOps.end(), {AMKind, Offset, Base});
+ return false;
+ }
+ return true;
+ }
+ default:
+ return true;
+ }
+}
diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp
index 87ce56a5b9a9..af3af6760ae1 100644
--- a/llvm/lib/Target/M68k/M68kISelLowering.cpp
+++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp
@@ -73,16 +73,16 @@ M68kTargetLowering::M68kTargetLowering(const M68kTargetMachine &TM,
setTruncStoreAction(MVT::i32, MVT::i8, Expand);
setTruncStoreAction(MVT::i16, MVT::i8, Expand);
- setOperationAction(ISD::MUL, MVT::i8, Promote);
- setOperationAction(ISD::MUL, MVT::i16, Legal);
+ setOperationAction({ISD::MUL, ISD::SDIV, ISD::UDIV}, MVT::i8, Promote);
+ setOperationAction({ISD::MUL, ISD::SDIV, ISD::UDIV}, MVT::i16, Legal);
if (Subtarget.atLeastM68020())
- setOperationAction(ISD::MUL, MVT::i32, Legal);
+ setOperationAction({ISD::MUL, ISD::SDIV, ISD::UDIV}, MVT::i32, Legal);
else
- setOperationAction(ISD::MUL, MVT::i32, LibCall);
+ setOperationAction({ISD::MUL, ISD::SDIV, ISD::UDIV}, MVT::i32, LibCall);
setOperationAction(ISD::MUL, MVT::i64, LibCall);
for (auto OP :
- {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::UDIVREM, ISD::SDIVREM,
+ {ISD::SREM, ISD::UREM, ISD::UDIVREM, ISD::SDIVREM,
ISD::MULHS, ISD::MULHU, ISD::UMUL_LOHI, ISD::SMUL_LOHI}) {
setOperationAction(OP, MVT::i8, Promote);
setOperationAction(OP, MVT::i16, Legal);
@@ -163,6 +163,8 @@ M68kTargetLowering::M68kTargetLowering(const M68kTargetMachine &TM,
setOperationAction(ISD::ATOMIC_CMP_SWAP, {MVT::i8, MVT::i16, MVT::i32},
Subtarget.atLeastM68020() ? Legal : LibCall);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
+
// M68k does not have native read-modify-write support, so expand all of them
// to `__sync_fetch_*` for target < M68020, otherwise expand to CmpxChg.
// See `shouldExpandAtomicRMWInIR` below.
@@ -178,12 +180,11 @@ M68kTargetLowering::M68kTargetLowering(const M68kTargetMachine &TM,
ISD::ATOMIC_LOAD_MAX,
ISD::ATOMIC_LOAD_UMIN,
ISD::ATOMIC_LOAD_UMAX,
+ ISD::ATOMIC_SWAP,
},
{MVT::i8, MVT::i16, MVT::i32}, LibCall);
- // 2^2 bytes
- // FIXME can it be just 2^1?
- setMinFunctionAlignment(Align::Constant<2>());
+ setMinFunctionAlignment(Align(2));
}
TargetLoweringBase::AtomicExpansionKind
@@ -193,6 +194,24 @@ M68kTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
: TargetLoweringBase::AtomicExpansionKind::None;
}
+Register
+M68kTargetLowering::getExceptionPointerRegister(const Constant *) const {
+ return M68k::D0;
+}
+
+Register
+M68kTargetLowering::getExceptionSelectorRegister(const Constant *) const {
+ return M68k::D1;
+}
+
+unsigned
+M68kTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
+ return StringSwitch<unsigned>(ConstraintCode)
+ .Case("Q", InlineAsm::Constraint_Q)
+ .Case("U", InlineAsm::Constraint_Um) // We borrow Constraint_Um for 'U'.
+ .Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
+}
+
EVT M68kTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &Context, EVT VT) const {
// M68k SETcc producess either 0x00 or 0xFF
@@ -748,11 +767,11 @@ SDValue M68kTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into registers.
- SDValue InFlag;
+ SDValue InGlue;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
+ RegsToPass[i].second, InGlue);
+ InGlue = Chain.getValue(1);
}
if (Callee->getOpcode() == ISD::GlobalAddress) {
@@ -796,8 +815,8 @@ SDValue M68kTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVector<SDValue, 8> Ops;
if (!IsSibcall && IsTailCall) {
- Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InFlag, DL);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, DL);
+ InGlue = Chain.getValue(1);
}
Ops.push_back(Chain);
@@ -818,8 +837,8 @@ SDValue M68kTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(DAG.getRegisterMask(Mask));
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
if (IsTailCall) {
MF.getFrameInfo().setHasTailCall();
@@ -827,7 +846,7 @@ SDValue M68kTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
Chain = DAG.getNode(M68kISD::CALL, DL, NodeTys, Ops);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPop;
@@ -851,18 +870,18 @@ SDValue M68kTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Returns a flag for retval copy to use.
if (!IsSibcall) {
Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
- InFlag, DL);
- InFlag = Chain.getValue(1);
+ InGlue, DL);
+ InGlue = Chain.getValue(1);
}
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
+ return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, DL, DAG,
InVals);
}
SDValue M68kTargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg,
+ SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
@@ -878,14 +897,14 @@ SDValue M68kTargetLowering::LowerCallResult(
EVT CopyVT = VA.getLocVT();
/// ??? is this correct?
- Chain = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), CopyVT, InFlag)
+ Chain = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), CopyVT, InGlue)
.getValue(1);
SDValue Val = Chain.getValue(0);
if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
- InFlag = Chain.getValue(2);
+ InGlue = Chain.getValue(2);
InVals.push_back(Val);
}
@@ -986,7 +1005,7 @@ SDValue M68kTargetLowering::LowerFormalArguments(
}
}
- unsigned StackSize = CCInfo.getNextStackOffset();
+ unsigned StackSize = CCInfo.getStackSize();
// Align stack specially for tail calls.
if (shouldGuaranteeTCO(CCID, MF.getTarget().Options.GuaranteedTailCallOpt))
StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
@@ -1039,6 +1058,14 @@ SDValue M68kTargetLowering::LowerFormalArguments(
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
+bool M68kTargetLowering::CanLowerReturn(
+ CallingConv::ID CCID, MachineFunction &MF, bool IsVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CCID, IsVarArg, MF, RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_M68k);
+}
+
SDValue
M68kTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CCID,
bool IsVarArg,
@@ -1052,7 +1079,7 @@ M68kTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CCID,
CCState CCInfo(CCID, IsVarArg, MF, RVLocs, *DAG.getContext());
CCInfo.AnalyzeReturn(Outs, RetCC_M68k);
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 6> RetOps;
// Operand #0 = Chain (updated below)
RetOps.push_back(Chain);
@@ -1080,8 +1107,8 @@ M68kTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CCID,
} else if (VA.getLocInfo() == CCValAssign::BCvt)
ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), ValToCopy, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), ValToCopy, Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
@@ -1123,8 +1150,8 @@ M68kTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CCID,
// ??? How will this work if CC does not use registers for args passing?
// ??? What if I return multiple structs?
unsigned RetValReg = M68k::D0;
- Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
@@ -1132,9 +1159,9 @@ M68kTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CCID,
RetOps[0] = Chain; // Update chain.
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ // Add the glue if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
return DAG.getNode(M68kISD::RET, DL, MVT::Other, RetOps);
}
@@ -1267,9 +1294,9 @@ bool M68kTargetLowering::IsEligibleForTailCallOptimization(
CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, C);
CCInfo.AnalyzeCallOperands(Outs, CC_M68k);
- StackArgsSize = CCInfo.getNextStackOffset();
+ StackArgsSize = CCInfo.getStackSize();
- if (CCInfo.getNextStackOffset()) {
+ if (StackArgsSize) {
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -1391,7 +1418,108 @@ SDValue M68kTargetLowering::LowerOperation(SDValue Op,
return LowerShiftRightParts(Op, DAG, true);
case ISD::SRL_PARTS:
return LowerShiftRightParts(Op, DAG, false);
+ case ISD::ATOMIC_FENCE:
+ return LowerATOMICFENCE(Op, DAG);
+ case ISD::GlobalTLSAddress:
+ return LowerGlobalTLSAddress(Op, DAG);
+ }
+}
+
+SDValue M68kTargetLowering::LowerExternalSymbolCall(SelectionDAG &DAG,
+ SDLoc Loc,
+ llvm::StringRef SymbolName,
+ ArgListTy &&ArgList) const {
+ PointerType *PtrTy = PointerType::get(*DAG.getContext(), 0);
+ CallLoweringInfo CLI(DAG);
+ CLI.setDebugLoc(Loc)
+ .setChain(DAG.getEntryNode())
+ .setLibCallee(CallingConv::C, PtrTy,
+ DAG.getExternalSymbol(SymbolName.data(),
+ getPointerMemTy(DAG.getDataLayout())),
+ std::move(ArgList));
+ return LowerCallTo(CLI).first;
+}
+
+SDValue M68kTargetLowering::getTLSGetAddr(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG,
+ unsigned TargetFlags) const {
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+ SDValue TGA = DAG.getTargetGlobalAddress(
+ GA->getGlobal(), GA, GA->getValueType(0), GA->getOffset(), TargetFlags);
+ SDValue Arg = DAG.getNode(ISD::ADD, SDLoc(GA), MVT::i32, GOT, TGA);
+
+ PointerType *PtrTy = PointerType::get(*DAG.getContext(), 0);
+
+ ArgListTy Args;
+ ArgListEntry Entry;
+ Entry.Node = Arg;
+ Entry.Ty = PtrTy;
+ Args.push_back(Entry);
+ return LowerExternalSymbolCall(DAG, SDLoc(GA), "__tls_get_addr",
+ std::move(Args));
+}
+
+SDValue M68kTargetLowering::getM68kReadTp(SDLoc Loc, SelectionDAG &DAG) const {
+ return LowerExternalSymbolCall(DAG, Loc, "__m68k_read_tp", ArgListTy());
+}
+
+SDValue M68kTargetLowering::LowerTLSGeneralDynamic(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ return getTLSGetAddr(GA, DAG, M68kII::MO_TLSGD);
+}
+
+SDValue M68kTargetLowering::LowerTLSLocalDynamic(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ SDValue Addr = getTLSGetAddr(GA, DAG, M68kII::MO_TLSLDM);
+ SDValue TGA =
+ DAG.getTargetGlobalAddress(GA->getGlobal(), GA, GA->getValueType(0),
+ GA->getOffset(), M68kII::MO_TLSLD);
+ return DAG.getNode(ISD::ADD, SDLoc(GA), MVT::i32, TGA, Addr);
+}
+
+SDValue M68kTargetLowering::LowerTLSInitialExec(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
+ SDValue Tp = getM68kReadTp(SDLoc(GA), DAG);
+ SDValue TGA =
+ DAG.getTargetGlobalAddress(GA->getGlobal(), GA, GA->getValueType(0),
+ GA->getOffset(), M68kII::MO_TLSIE);
+ SDValue Addr = DAG.getNode(ISD::ADD, SDLoc(GA), MVT::i32, TGA, GOT);
+ SDValue Offset =
+ DAG.getLoad(MVT::i32, SDLoc(GA), DAG.getEntryNode(), Addr,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
+
+ return DAG.getNode(ISD::ADD, SDLoc(GA), MVT::i32, Offset, Tp);
+}
+
+SDValue M68kTargetLowering::LowerTLSLocalExec(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const {
+ SDValue Tp = getM68kReadTp(SDLoc(GA), DAG);
+ SDValue TGA =
+ DAG.getTargetGlobalAddress(GA->getGlobal(), GA, GA->getValueType(0),
+ GA->getOffset(), M68kII::MO_TLSLE);
+ return DAG.getNode(ISD::ADD, SDLoc(GA), MVT::i32, TGA, Tp);
+}
+
+SDValue M68kTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget.isTargetELF());
+
+ auto *GA = cast<GlobalAddressSDNode>(Op);
+ TLSModel::Model AccessModel = DAG.getTarget().getTLSModel(GA->getGlobal());
+
+ switch (AccessModel) {
+ case TLSModel::GeneralDynamic:
+ return LowerTLSGeneralDynamic(GA, DAG);
+ case TLSModel::LocalDynamic:
+ return LowerTLSLocalDynamic(GA, DAG);
+ case TLSModel::InitialExec:
+ return LowerTLSInitialExec(GA, DAG);
+ case TLSModel::LocalExec:
+ return LowerTLSLocalExec(GA, DAG);
}
+
+ llvm_unreachable("Unexpected TLS access model type");
}
bool M68kTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
@@ -1553,12 +1681,12 @@ static unsigned TranslateM68kCC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
SelectionDAG &DAG) {
if (!IsFP) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
- if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
+ if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnes()) {
// X > -1 -> X == 0, jump !sign.
RHS = DAG.getConstant(0, DL, RHS.getValueType());
return M68k::COND_PL;
}
- if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
+ if (SetCCOpcode == ISD::SETLT && RHSC->isZero()) {
// X < 0 -> X == 0, jump on sign.
return M68k::COND_MI;
}
@@ -2756,6 +2884,9 @@ M68kTargetLowering::getConstraintType(StringRef Constraint) const {
break;
}
break;
+ case 'Q':
+ case 'U':
+ return C_Memory;
default:
break;
}
@@ -3220,6 +3351,28 @@ SDValue M68kTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachinePointerInfo(SV));
}
+SDValue M68kTargetLowering::LowerATOMICFENCE(SDValue Op,
+ SelectionDAG &DAG) const {
+ // Lower to a memory barrier created from inline asm.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ LLVMContext &Ctx = *DAG.getContext();
+
+ const unsigned Flags = InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore |
+ InlineAsm::Extra_HasSideEffects;
+ const SDValue AsmOperands[4] = {
+ Op.getOperand(0), // Input chain
+ DAG.getTargetExternalSymbol(
+ "", TLI.getProgramPointerTy(
+ DAG.getDataLayout())), // Empty inline asm string
+ DAG.getMDNode(MDNode::get(Ctx, {})), // (empty) srcloc
+ DAG.getTargetConstant(Flags, SDLoc(Op),
+ TLI.getPointerTy(DAG.getDataLayout())), // Flags
+ };
+
+ return DAG.getNode(ISD::INLINEASM, SDLoc(Op),
+ DAG.getVTList(MVT::Other, MVT::Glue), AsmOperands);
+}
+
// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets.
// Calls to _alloca are needed to probe the stack when allocating more than 4k
// bytes in one go. Touching the stack at 4K increments is necessary to ensure
diff --git a/llvm/lib/Target/M68k/M68kISelLowering.h b/llvm/lib/Target/M68k/M68kISelLowering.h
index ec525bc4c6b3..5f279b3dcbd3 100644
--- a/llvm/lib/Target/M68k/M68kISelLowering.h
+++ b/llvm/lib/Target/M68k/M68kISelLowering.h
@@ -177,6 +177,18 @@ public:
AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override;
+ /// If a physical register, this returns the register that receives the
+ /// exception address on entry to an EH pad.
+ Register
+ getExceptionPointerRegister(const Constant *PersonalityFn) const override;
+
+ /// If a physical register, this returns the register that receives the
+ /// exception typeid on entry to a landing pad.
+ Register
+ getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
+
+ unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
+
private:
unsigned GetAlignedArgumentStackSize(unsigned StackSize,
SelectionDAG &DAG) const;
@@ -226,11 +238,14 @@ private:
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ SDValue LowerATOMICFENCE(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
/// LowerFormalArguments - transform physical registers into virtual
/// registers and generate load operations for arguments places on the stack.
@@ -243,6 +258,11 @@ private:
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const override;
+
/// Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue LowerReturn(SDValue Chain, CallingConv::ID CCID, bool IsVarArg,
@@ -250,6 +270,20 @@ private:
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
SelectionDAG &DAG) const override;
+ SDValue LowerExternalSymbolCall(SelectionDAG &DAG, SDLoc loc,
+ llvm::StringRef SymbolName,
+ ArgListTy &&ArgList) const;
+ SDValue getTLSGetAddr(GlobalAddressSDNode *GA, SelectionDAG &DAG,
+ unsigned TargetFlags) const;
+ SDValue getM68kReadTp(SDLoc Loc, SelectionDAG &DAG) const;
+
+ SDValue LowerTLSGeneralDynamic(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
+ SDValue LowerTLSLocalDynamic(GlobalAddressSDNode *GA,
+ SelectionDAG &DAG) const;
+ SDValue LowerTLSInitialExec(GlobalAddressSDNode *GA, SelectionDAG &DAG) const;
+ SDValue LowerTLSLocalExec(GlobalAddressSDNode *GA, SelectionDAG &DAG) const;
+
bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
SDValue C) const override;
diff --git a/llvm/lib/Target/M68k/M68kInstrArithmetic.td b/llvm/lib/Target/M68k/M68kInstrArithmetic.td
index 2339e3caa517..15d2049f62cb 100644
--- a/llvm/lib/Target/M68k/M68kInstrArithmetic.td
+++ b/llvm/lib/Target/M68k/M68kInstrArithmetic.td
@@ -148,7 +148,7 @@ class MxBiArOp_R_RI<string MN, SDNode NODE, MxType TYPE, bits<4> CMD>
[(set TYPE.VT:$dst, CCR, (NODE TYPE.VT:$src, TYPE.IPat:$opd))]> {
let Inst = (ascend
(descend 0b0000, CMD,
- !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ !cast<MxEncSize>("MxEncSize"#TYPE.Size).Value,
// The destination cannot be address register, so it's always
// the MODE for data register direct mode.
/*MODE*/0b000,
@@ -181,7 +181,7 @@ class MxBiArOp_MI<string MN, MxType TYPE,
MN#"."#TYPE.Prefix#"\t$opd, $dst", []> {
let Inst = (ascend
(descend 0b0000, CMD,
- !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ !cast<MxEncSize>("MxEncSize"#TYPE.Size).Value,
DST_ENC.EA),
// Source (i.e. immediate value) encoding
MxEncAddrMode_i<"opd", TYPE.Size>.Supplement,
@@ -293,6 +293,8 @@ multiclass MxBiArOp_AF<string MN, SDNode NODE, bits<4> CMD> {
CMD, MxEncAddrMode_p<"opd">>;
def NAME#"32aj" : MxBiArOp_R_RM<MN, NODE, MxType32a, MxType32.JOp, MxType32.JPat,
CMD, MxEncAddrMode_j<"opd">>;
+ def NAME#"32ab" : MxBiArOp_R_RM<MN, NODE, MxType32a, MxType32.BOp, MxType32.BPat,
+ CMD, MxEncAddrMode_abs<"opd", true>>;
def NAME#"32ai" : MxBiArOp_R_RI_xEA<MN, NODE, MxType32a, CMD>;
def NAME#"32ar" : MxBiArOp_R_RR_xEA<MN, NODE, MxType32a, MxType32r, CMD>;
@@ -310,6 +312,12 @@ defm ADD : MxBiArOp_AF<"adda", MxAdd, 0xD>;
defm SUB : MxBiArOp_DF<"sub", MxSub, 0, 0x9, 0x4>;
defm SUB : MxBiArOp_AF<"suba", MxSub, 0x9>;
+// This pattern is used to enable the instruction selector to select ADD32ab
+// for global values that are allocated in thread-local storage, i.e.:
+// t8: i32 = ISD::ADD GLOBAL_OFFSET_TABLE, TargetGlobalTLSAddress:i32<ptr @myvar>
+// ====>
+// t8: i32,i8 = ADD32ab GLOBAL_OFFSET_TABLE, TargetGlobalTLSAddress:i32<ptr @myvar>
+def : Pat<(add MxARD32:$src, tglobaltlsaddr:$opd), (ADD32ab MxARD32:$src, MxAL32:$opd)>;
let Uses = [CCR], Defs = [CCR] in {
let Constraints = "$src = $dst" in {
@@ -409,7 +417,7 @@ class MxCmp_RI<MxType TYPE>
[(set CCR, (MxCmp TYPE.IPat:$imm, TYPE.VT:$reg))]> {
let Inst = (ascend
(descend 0b00001100,
- !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ !cast<MxEncSize>("MxEncSize"#TYPE.Size).Value,
// The destination cannot be address register, so it's always
// the MODE for data register direct mode.
/*MODE*/0b000,
@@ -428,7 +436,7 @@ class MxCmp_MI<MxType TYPE, MxOperand MEMOpd, ComplexPattern MEMPat,
[(set CCR, (MxCmp TYPE.IPat:$imm, (load MEMPat:$mem)))]> {
let Inst = (ascend
(descend 0b00001100,
- !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ !cast<MxEncSize>("MxEncSize"#TYPE.Size).Value,
MEM_ENC.EA),
// Source (i.e. immediate value) encoding
MxEncAddrMode_i<"imm", TYPE.Size>.Supplement,
@@ -446,7 +454,7 @@ class MxCmp_BI<MxType TYPE>
defvar AbsEncoding = MxEncAddrMode_abs<"abs", true>;
let Inst = (ascend
(descend 0b00001100,
- !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ !cast<MxEncSize>("MxEncSize"#TYPE.Size).Value,
AbsEncoding.EA),
// Source (i.e. immediate value) encoding
MxEncAddrMode_i<"imm", TYPE.Size>.Supplement,
@@ -581,6 +589,22 @@ class MxDiMuOp_DD<string MN, bits<4> CMD, bit SIGNED = false,
);
}
+// $dreg <- $dreg op $dreg
+class MxDiMuOp_DD_Long<string MN, bits<10> CMD, bit SIGNED = false>
+ : MxInst<(outs MxDRD32:$dst), (ins MxDRD32:$src, MxDRD32:$opd), MN#"\t$opd, $dst", []> {
+ let Inst = (ascend
+ (descend CMD,
+ /*MODE*/0b000, /*REGISTER*/(operand "$opd", 3)),
+ (descend 0b0,
+ // REGISTER
+ (operand "$dst", 3),
+ !if(SIGNED, 0b1, 0b0),
+ /*SIZE*/0b0, 0b0000000,
+ // Dr REGISTER
+ 0b000)
+ );
+}
+
// $reg <- $reg op $imm
class MxDiMuOp_DI<string MN, bits<4> CMD, bit SIGNED = false,
MxOperand DST, MxOperand OPD>
@@ -610,6 +634,9 @@ multiclass MxDiMuOp<string MN, bits<4> CMD, bit isComm = 0> {
defm DIV : MxDiMuOp<"div", 0x8>;
+def SDIVd32d32 : MxDiMuOp_DD_Long<"divs.l", 0x131, /*SIGNED*/true>;
+def UDIVd32d32 : MxDiMuOp_DD_Long<"divu.l", 0x131, /*SIGNED*/false>;
+
// This is used to cast immediates to 16-bits for operations which don't
// support smaller immediate sizes.
def as_i16imm : SDNodeXForm<imm, [{
@@ -659,6 +686,12 @@ def : Pat<(urem i16:$dst, i16:$opd),
MxSubRegIndex16Lo)>;
+// RR i32
+def : Pat<(sdiv i32:$dst, i32:$opd), (SDIVd32d32 $dst, $opd)>;
+
+def : Pat<(udiv i32:$dst, i32:$opd), (UDIVd32d32 $dst, $opd)>;
+
+
// RI i8
def : Pat<(sdiv i8:$dst, MximmSExt8:$opd),
(EXTRACT_SUBREG
@@ -704,6 +737,9 @@ def : Pat<(urem i16:$dst, MximmSExt16:$opd),
defm MUL : MxDiMuOp<"mul", 0xC, 1>;
+def SMULd32d32 : MxDiMuOp_DD_Long<"muls.l", 0x130, /*SIGNED*/true>;
+def UMULd32d32 : MxDiMuOp_DD_Long<"mulu.l", 0x130, /*SIGNED*/false>;
+
// RR
def : Pat<(mul i16:$dst, i16:$opd),
(EXTRACT_SUBREG
@@ -720,6 +756,8 @@ def : Pat<(mulhu i16:$dst, i16:$opd),
(LSR32di (LSR32di (UMULd32d16 (MOVXd32d16 $dst), $opd), 8), 8),
MxSubRegIndex16Lo)>;
+def : Pat<(mul i32:$dst, i32:$opd), (SMULd32d32 $dst, $opd)>;
+
// RI
def : Pat<(mul i16:$dst, MximmSExt16:$opd),
@@ -756,7 +794,7 @@ class MxNeg_D<MxType TYPE>
"neg."#TYPE.Prefix#"\t$dst",
[(set TYPE.VT:$dst, (ineg TYPE.VT:$src))]> {
let Inst = (descend 0b01000100,
- /*SIZE*/!cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ /*SIZE*/!cast<MxEncSize>("MxEncSize"#TYPE.Size).Value,
//MODE without last bit
0b00,
//REGISTER prefixed by D/A bit
@@ -770,7 +808,7 @@ class MxNegX_D<MxType TYPE>
"negx."#TYPE.Prefix#"\t$dst",
[(set TYPE.VT:$dst, (MxSubX 0, TYPE.VT:$src, CCR))]> {
let Inst = (descend 0b01000000,
- /*SIZE*/!cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size).Value,
+ /*SIZE*/!cast<MxEncSize>("MxEncSize"#TYPE.Size).Value,
//MODE without last bit
0b00,
//REGISTER prefixed by D/A bit
@@ -790,6 +828,12 @@ foreach S = [8, 16, 32] in {
def : Pat<(MxSub 0, i8 :$src), (NEG8d MxDRD8 :$src)>;
def : Pat<(MxSub 0, i16:$src), (NEG16d MxDRD16:$src)>;
def : Pat<(MxSub 0, i32:$src), (NEG32d MxDRD32:$src)>;
+// SExt of i1 values.
+// Although we specify `ZeroOrOneBooleanContent` for boolean content,
+// we're still adding an AND here as we don't know the origin of the i1 value.
+def : Pat<(sext_inreg i8:$src, i1), (NEG8d (AND8di MxDRD8:$src, 1))>;
+def : Pat<(sext_inreg i16:$src, i1), (NEG16d (AND16di MxDRD16:$src, 1))>;
+def : Pat<(sext_inreg i32:$src, i1), (NEG32d (AND32di MxDRD32:$src, 1))>;
//===----------------------------------------------------------------------===//
// no-CCR Patterns
@@ -949,3 +993,110 @@ multiclass BitwisePat<string INST, SDNode OP> {
defm : BitwisePat<"AND", and>;
defm : BitwisePat<"OR", or>;
defm : BitwisePat<"XOR", xor>;
+
+//===----------------------------------------------------------------------===//
+// Floating point arithmetic instruction
+//===----------------------------------------------------------------------===//
+
+let Defs = [FPS] in
+class MxFArithBase_FF<dag outs, dag ins, string asm, string rounding,
+ list<dag> patterns>
+ : MxInst<outs, ins, asm, patterns> {
+ let Uses = !if(!eq(rounding, ""), [FPC], []);
+
+ let Predicates = !if(!eq(rounding, ""), [AtLeastM68881], [AtLeastM68040]);
+}
+
+class MxFPOpModeSelector<string rounding, bits<7> single, bits<7> double,
+ bits<7> extended> {
+ bits<7> Mode = !cond(!eq(rounding, "s"): single,
+ !eq(rounding, "d"): double,
+ !eq(rounding, ""): extended);
+}
+
+//===----------------------------------------------------------------------===//
+// Unary floating point instruction
+//===----------------------------------------------------------------------===//
+
+class MxFUnary_FF<MxOpBundle Opnd, string rounding,
+ string mnemonic, bits<7> opmode>
+ : MxFArithBase_FF<(outs Opnd.Op:$dst), (ins Opnd.Op:$src),
+ "f"#rounding#mnemonic#".x\t$src, $dst", rounding, [(null_frag)]> {
+ let Inst = (ascend
+ (descend 0b1111,
+ /*COPROCESSOR ID*/0b001,
+ 0b000,
+ /*MODE+REGISTER*/0b000000),
+ (descend 0b0, /* R/M */ 0b0, 0b0,
+ /*SOURCE SPECIFIER*/
+ (operand "$src", 3),
+ /*DESTINATION*/
+ (operand "$dst", 3),
+ /*OPMODE*/
+ opmode)
+ );
+}
+
+multiclass MxFUnaryOp<string mnemonic, bits<7> single, bits<7> double,
+ bits<7> extended> {
+ foreach rounding = ["", "s", "d"] in {
+ defvar opmode = MxFPOpModeSelector<rounding, single, double, extended>.Mode;
+
+ def F # !toupper(rounding) # !substr(NAME, 1) # "80fp_fp"
+ : MxFUnary_FF<MxOp80AddrMode_fpr, rounding, mnemonic, opmode>;
+
+ let isCodeGenOnly = 1 in
+ foreach size = [32, 64] in
+ def F # !toupper(rounding) # !substr(NAME, 1) # size # "fp_fp"
+ : MxFUnary_FF<!cast<MxOpBundle>("MxOp"#size#"AddrMode_fpr"),
+ rounding, mnemonic, opmode>;
+ }
+}
+
+defm FABS : MxFUnaryOp<"abs", 0b1011000, 0b1011100, 0b0011000>;
+defm FNEG : MxFUnaryOp<"neg", 0b1011010, 0b1011110, 0b0011010>;
+
+//===----------------------------------------------------------------------===//
+// Binary floating point instruction
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src = $dst" in
+class MxFBinary_FF<MxOpBundle Opnd, string rounding,
+ string mnemonic, bits<7> opmode>
+ : MxFArithBase_FF<(outs Opnd.Op:$dst), (ins Opnd.Op:$src, Opnd.Op:$opd),
+ "f"#rounding#mnemonic#".x\t$opd, $dst", rounding, [(null_frag)]> {
+ let Inst = (ascend
+ (descend 0b1111,
+ /*COPROCESSOR ID*/0b001,
+ 0b000,
+ /*MODE+REGISTER*/0b000000),
+ (descend 0b0, /* R/M */ 0b0, 0b0,
+ /*SOURCE SPECIFIER*/
+ (operand "$opd", 3),
+ /*DESTINATION*/
+ (operand "$dst", 3),
+ /*OPMODE*/
+ opmode)
+ );
+}
+
+multiclass MxFBinaryOp<string mnemonic, bits<7> single, bits<7> double,
+ bits<7> extended> {
+ foreach rounding = ["", "s", "d"] in {
+ defvar opmode = MxFPOpModeSelector<rounding, single, double, extended>.Mode;
+
+ def F # !toupper(rounding) # !substr(NAME, 1) # "80fp_fp"
+ : MxFBinary_FF<MxOp80AddrMode_fpr, rounding, mnemonic, opmode>;
+
+ let isCodeGenOnly = 1 in
+ foreach size = [32, 64] in
+ def F # !toupper(rounding) # !substr(NAME, 1) # size # "fp_fp"
+ : MxFBinary_FF<!cast<MxOpBundle>("MxOp"#size#"AddrMode_fpr"),
+ rounding, mnemonic, opmode>;
+ }
+}
+
+defm FADD : MxFBinaryOp<"add", 0b1100010, 0b1100110, 0b0100010>;
+defm FSUB : MxFBinaryOp<"sub", 0b1101000, 0b1101100, 0b0101000>;
+defm FMUL : MxFBinaryOp<"mul", 0b1100011, 0b1100111, 0b0100011>;
+defm FDIV : MxFBinaryOp<"div", 0b1100000, 0b1100100, 0b0100000>;
diff --git a/llvm/lib/Target/M68k/M68kInstrControl.td b/llvm/lib/Target/M68k/M68kInstrControl.td
index d15283c769f6..225f932f3316 100644
--- a/llvm/lib/Target/M68k/M68kInstrControl.td
+++ b/llvm/lib/Target/M68k/M68kInstrControl.td
@@ -12,7 +12,7 @@
///
/// Machine:
///
-/// BRA [x] BSR [ ] Bcc [~] DBcc [ ] FBcc [ ]
+/// BRA [x] BSR [~] Bcc [~] DBcc [ ] FBcc [ ]
/// FDBcc [ ] FNOP [ ] FPn [ ] FScc [ ] FTST [ ]
/// JMP [~] JSR [x] NOP [x] RTD [!] RTR [ ]
/// RTS [x] Scc [~] TST [ ]
@@ -225,6 +225,34 @@ def BRA16 : MxBra<MxBrTarget16, (descend 0b0000, 0b0000),
def : Pat<(br bb:$target), (BRA8 MxBrTarget8:$target)>;
+/// -------------------------------------------------
+/// F E D C B A 9 8 | 7 6 5 4 3 2 1 0
+/// -------------------------------------------------
+/// 0 1 1 0 0 0 0 1 | 8-BIT DISPLACEMENT
+/// -------------------------------------------------
+/// 16-BIT DISPLACEMENT IF 8-BIT DISPLACEMENT = $00
+/// -------------------------------------------------
+/// 32-BIT DISPLACEMENT IF 8-BIT DISPLACEMENT = $FF
+/// -------------------------------------------------
+
+let isBranch = 1, isTerminator = 1 in
+class MxBsr<Operand TARGET, MxType TYPE, dag disp_8, dag disp_16_32>
+ : MxInst<(outs), (ins TARGET:$dst), "bsr."#TYPE.Prefix#"\t$dst"> {
+ let Inst = (ascend
+ (descend 0b0110, 0b0001, disp_8),
+ disp_16_32
+ );
+}
+
+def BSR8 : MxBsr<MxBrTarget8, MxType8,
+ (operand "$dst", 8, (encoder "encodePCRelImm<8>")), (ascend)>;
+
+def BSR16 : MxBsr<MxBrTarget16, MxType16, (descend 0b0000, 0b0000),
+ (operand "$dst", 16, (encoder "encodePCRelImm<16>"))>;
+
+def BSR32 : MxBsr<MxBrTarget32, MxType32, (descend 0b1111, 0b1111),
+ (operand "$dst", 32, (encoder "encodePCRelImm<32>"),
+ (decoder "DecodeImm32"))>;
//===----------------------------------------------------------------------===//
// Call
@@ -293,8 +321,6 @@ def TAILJMPj : MxPseudo<(outs), (ins MxARI32_TC:$dst)>;
// Return
//===----------------------------------------------------------------------===//
-// TODO Implement LINK/UNLK
-
let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1 in {
def RTS : MxInst<(outs), (ins), "rts", []> {
@@ -353,3 +379,41 @@ def : Pat<(sub MxDRD8:$op, (i8 (MxSetCC_C MxCONDcs, CCR))),
(ADDX8dd MxDRD8:$op, (MOV8di 0))>;
def : Pat<(sub MxXRD32:$op, (i32 (MxSetCC_C MxCONDcs, CCR))),
(ADDX32dd MxDRD32:$op, (MOV32ri 0))>;
+
+//===------------===//
+// Trap / Breakpoint
+//===------------===//
+
+let RenderMethod = "addImmOperands", ParserMethod = "parseImm" in {
+ def MxTrapImm : AsmOperandClass {
+ let Name = "MxTrapImm";
+ let PredicateMethod = "isTrapImm";
+ }
+
+ def MxBkptImm : AsmOperandClass {
+ let Name = "MxBkptImm";
+ let PredicateMethod = "isBkptImm";
+ }
+}
+
+let ParserMatchClass = MxTrapImm in
+def MxTrapimm : MxOp<i8, MxSize8, "i">;
+
+let ParserMatchClass = MxBkptImm in
+def MxBkptimm : MxOp<i8, MxSize8, "i">;
+
+def TRAP : MxInst<(outs), (ins MxTrapimm:$vect), "trap\t$vect", []> {
+ let Inst = (descend 0b0100, 0b1110, 0b0100, (operand "$vect", 4));
+}
+
+def TRAPV : MxInst<(outs), (ins), "trapv", []> {
+ let Inst = (descend 0b0100, 0b1110, 0b0111, 0b0110);
+}
+
+def BKPT : MxInst<(outs), (ins MxBkptimm:$vect), "bkpt\t$vect", []> {
+ let Inst = (descend 0b0100, 0b1000, 0b0100, 0b1 , (operand "$vect", 3));
+}
+
+def ILLEGAL : MxInst<(outs), (ins), "illegal", []> {
+ let Inst = (descend 0b0100, 0b1010, 0b1111, 0b1100);
+}
diff --git a/llvm/lib/Target/M68k/M68kInstrData.td b/llvm/lib/Target/M68k/M68kInstrData.td
index ed37bd1f3dae..e6d4471f7aab 100644
--- a/llvm/lib/Target/M68k/M68kInstrData.td
+++ b/llvm/lib/Target/M68k/M68kInstrData.td
@@ -525,6 +525,10 @@ foreach EXT = ["S", "Z"] in {
def MOV#EXT#Xd32f8 : MxPseudoMove_RM<MxType32d, MxType8.FOp>;
def MOV#EXT#Xd32f16 : MxPseudoMove_RM<MxType32d, MxType16.FOp>;
+ def MOV#EXT#Xd16q8 : MxPseudoMove_RM<MxType16d, MxType8.QOp>;
+ def MOV#EXT#Xd32q8 : MxPseudoMove_RM<MxType32d, MxType8.QOp>;
+ def MOV#EXT#Xd32q16 : MxPseudoMove_RM<MxType32d, MxType16.QOp>;
+
}
}
}
@@ -572,18 +576,21 @@ def: Pat<(MxZExtLoadi16i8 MxCP_ARID:$src),
(EXTRACT_SUBREG (MOVZXd32p8 MxARID8:$src), MxSubRegIndex16Lo)>;
def: Pat<(MxZExtLoadi16i8 MxCP_ARII:$src),
(EXTRACT_SUBREG (MOVZXd32f8 MxARII8:$src), MxSubRegIndex16Lo)>;
+def: Pat<(MxZExtLoadi16i8 MxCP_PCD :$src), (MOVZXd16q8 MxPCD8 :$src)>;
// i32 <- zext i8
def: Pat<(i32 (zext i8:$src)), (MOVZXd32d8 MxDRD8:$src)>;
def: Pat<(MxZExtLoadi32i8 MxCP_ARI :$src), (MOVZXd32j8 MxARI8 :$src)>;
def: Pat<(MxZExtLoadi32i8 MxCP_ARID:$src), (MOVZXd32p8 MxARID8:$src)>;
def: Pat<(MxZExtLoadi32i8 MxCP_ARII:$src), (MOVZXd32f8 MxARII8:$src)>;
+def: Pat<(MxZExtLoadi32i8 MxCP_PCD :$src), (MOVZXd32q8 MxPCD8 :$src)>;
// i32 <- zext i16
def: Pat<(i32 (zext i16:$src)), (MOVZXd32d16 MxDRD16:$src)>;
def: Pat<(MxZExtLoadi32i16 MxCP_ARI :$src), (MOVZXd32j16 MxARI16 :$src)>;
def: Pat<(MxZExtLoadi32i16 MxCP_ARID:$src), (MOVZXd32p16 MxARID16:$src)>;
def: Pat<(MxZExtLoadi32i16 MxCP_ARII:$src), (MOVZXd32f16 MxARII16:$src)>;
+def: Pat<(MxZExtLoadi32i16 MxCP_PCD :$src), (MOVZXd32q16 MxPCD16 :$src)>;
// i16 <- anyext i8
def: Pat<(i16 (anyext i8:$src)),
@@ -614,3 +621,54 @@ def : Pat<(i8 (trunc i32:$src)),
(EXTRACT_SUBREG MxXRD32:$src, MxSubRegIndex8Lo)>;
def : Pat<(i8 (trunc i16:$src)),
(EXTRACT_SUBREG MxXRD16:$src, MxSubRegIndex8Lo)>;
+
+//===----------------------------------------------------------------------===//
+// FMOVE
+//===----------------------------------------------------------------------===//
+
+let Defs = [FPS] in
+class MxFMove<string size, dag outs, dag ins, list<dag> pattern,
+ string rounding = "">
+ : MxInst<outs, ins,
+ "f"#rounding#"move."#size#"\t$src, $dst", pattern> {
+ // Only FMOVE uses FPC
+ let Uses = !if(!eq(rounding, ""), [FPC], []);
+
+ // FSMOVE and FDMOVE are only available after M68040
+ let Predicates = [!if(!eq(rounding, ""), AtLeastM68881, AtLeastM68040)];
+}
+
+// FPDR <- FPDR
+class MxFMove_FF<string rounding, int size,
+ MxOpBundle Opnd = !cast<MxOpBundle>("MxOp"#size#"AddrMode_fpr")>
+ : MxFMove<"x", (outs Opnd.Op:$dst), (ins Opnd.Op:$src),
+ [(null_frag)], rounding> {
+ let Inst = (ascend
+ (descend 0b1111,
+ /*COPROCESSOR ID*/0b001,
+ 0b000,
+ /*MODE + REGISTER*/0b000000
+ ),
+ (descend 0b0, /* R/M */0b0, 0b0,
+ /*SOURCE SPECIFIER*/
+ (operand "$src", 3),
+ /*DESTINATION*/
+ (operand "$dst", 3),
+ /*OPMODE*/
+ !cond(!eq(rounding, "s"): 0b1000000,
+ !eq(rounding, "d"): 0b1000100,
+ true: 0b0000000)
+ )
+ );
+}
+
+foreach rounding = ["", "s", "d"] in {
+ def F # !toupper(rounding) # MOV80fp_fp : MxFMove_FF<rounding, 80>;
+
+ // We don't have `fmove.s` or `fmove.d` because values will be converted to
+ // f80 upon storing into the register, but FMOV32/64fp_fp are still needed
+ // to make codegen easier.
+ let isCodeGenOnly = true in
+ foreach size = [32, 64] in
+ def F # !toupper(rounding) # MOV # size # fp_fp : MxFMove_FF<rounding, size>;
+}
diff --git a/llvm/lib/Target/M68k/M68kInstrFormats.td b/llvm/lib/Target/M68k/M68kInstrFormats.td
index 2343af09788e..38d3127ac6a6 100644
--- a/llvm/lib/Target/M68k/M68kInstrFormats.td
+++ b/llvm/lib/Target/M68k/M68kInstrFormats.td
@@ -69,179 +69,11 @@
// Encoding primitives
//===----------------------------------------------------------------------===//
-class MxBead<bits<4> type, bit b4 = 0, bit b5 = 0, bit b6 = 0, bit b7 = 0> {
- bits<8> Value = 0b00000000;
- let Value{3-0} = type;
- let Value{4} = b4;
- let Value{5} = b5;
- let Value{6} = b6;
- let Value{7} = b7;
-}
-
-/// System beads, allow to control beading flow
-def MxBeadTerm : MxBead<0x0, 0, 0, 0, 0>;
-def MxBeadIgnore : MxBead<0x0, 1, 0, 0, 0>;
-
-/// Add plain bit to the instruction
-class MxBead1Bit <bits<1> b> : MxBead<0x1, b>;
-class MxBead2Bits <bits<2> b> : MxBead<0x2, b{0}, b{1}>;
-class MxBead3Bits <bits<3> b> : MxBead<0x3, b{0}, b{1}, b{2}>;
-class MxBead4Bits <bits<4> b> : MxBead<0x4, b{0}, b{1}, b{2}, b{3}>;
-
-/// bits<3> o - operand number
-/// bit a - use alternative, used to select index register or
-/// outer displacement/immediate
-/// suffix NP means non-padded
-class MxBeadDAReg <bits<3> o, bit a = 0> : MxBead<0x5, o{0}, o{1}, o{2}, a>;
-class MxBeadDA <bits<3> o, bit a = 0> : MxBead<0x6, o{0}, o{1}, o{2}, a>;
-class MxBeadReg <bits<3> o, bit a = 0> : MxBead<0x7, o{0}, o{1}, o{2}, a>;
-class MxBeadDReg <bits<3> o, bit a = 0> : MxBead<0x8, o{0}, o{1}, o{2}, a>;
-class MxBead8Disp <bits<3> o, bit a = 0> : MxBead<0x9, o{0}, o{1}, o{2}, a>;
-
-/// Add Immediate to the instruction. 8-bit version is padded with zeros to fit
-/// the word.
-class MxBead8Imm <bits<3> o, bit a = 0> : MxBead<0xA, o{0}, o{1}, o{2}, a>;
-class MxBead16Imm <bits<3> o, bit a = 0> : MxBead<0xB, o{0}, o{1}, o{2}, a>;
-class MxBead32Imm <bits<3> o, bit a = 0> : MxBead<0xC, o{0}, o{1}, o{2}, a>;
-
-/// Encodes an immediate 0-7(alt. 1-8) into 3 bit field
-class MxBead3Imm <bits<3> o, bit a = 0> : MxBead<0xD, o{0}, o{1}, o{2}, a>;
-
-
-class MxEncoding<MxBead n0 = MxBeadTerm, MxBead n1 = MxBeadTerm,
- MxBead n2 = MxBeadTerm, MxBead n3 = MxBeadTerm,
- MxBead n4 = MxBeadTerm, MxBead n5 = MxBeadTerm,
- MxBead n6 = MxBeadTerm, MxBead n7 = MxBeadTerm,
- MxBead n8 = MxBeadTerm, MxBead n9 = MxBeadTerm,
- MxBead n10 = MxBeadTerm, MxBead n11 = MxBeadTerm,
- MxBead n12 = MxBeadTerm, MxBead n13 = MxBeadTerm,
- MxBead n14 = MxBeadTerm, MxBead n15 = MxBeadTerm,
- MxBead n16 = MxBeadTerm, MxBead n17 = MxBeadTerm,
- MxBead n18 = MxBeadTerm, MxBead n19 = MxBeadTerm,
- MxBead n20 = MxBeadTerm, MxBead n21 = MxBeadTerm,
- MxBead n22 = MxBeadTerm, MxBead n23 = MxBeadTerm> {
- bits <192> Value;
- let Value{7-0} = n0.Value;
- let Value{15-8} = n1.Value;
- let Value{23-16} = n2.Value;
- let Value{31-24} = n3.Value;
- let Value{39-32} = n4.Value;
- let Value{47-40} = n5.Value;
- let Value{55-48} = n6.Value;
- let Value{63-56} = n7.Value;
- let Value{71-64} = n8.Value;
- let Value{79-72} = n9.Value;
- let Value{87-80} = n10.Value;
- let Value{95-88} = n11.Value;
- let Value{103-96} = n12.Value;
- let Value{111-104} = n13.Value;
- let Value{119-112} = n14.Value;
- let Value{127-120} = n15.Value;
- let Value{135-128} = n16.Value;
- let Value{143-136} = n17.Value;
- let Value{151-144} = n18.Value;
- let Value{159-152} = n19.Value;
- let Value{167-160} = n20.Value;
- let Value{175-168} = n21.Value;
- let Value{183-176} = n22.Value;
- let Value{191-184} = n23.Value;
-}
-
-class MxEncFixed<bits<16> value> : MxEncoding {
- let Value{7-0} = MxBead4Bits<value{3-0}>.Value;
- let Value{15-8} = MxBead4Bits<value{7-4}>.Value;
- let Value{23-16} = MxBead4Bits<value{11-8}>.Value;
- let Value{31-24} = MxBead4Bits<value{15-12}>.Value;
-}
-
-//===----------------------------------------------------------------------===//
-// Encoding composites
-//
-// These must be lowered to MxEncoding by instr specific wrappers
-//
-// HERE BE DRAGONS...
-//===----------------------------------------------------------------------===//
-
-class MxEncByte<bits<8> value> : MxEncoding {
- MxBead4Bits LO = MxBead4Bits<value{3-0}>;
- MxBead4Bits HI = MxBead4Bits<value{7-4}>;
-}
-
-def MxEncEmpty : MxEncoding;
-
-
-/// M68k Standard Effective Address layout:
-///
-/// :-------------------:
-/// | 5 4 3 | 2 1 0 |
-/// | mode | reg |
-/// :-------------------:
-///
-/// If the EA is a direct register mode, bits 4 and 5 are 0, and the register
-/// number will be encoded in bit 0 - 3. Since the first address register's
-/// (A0) register number is 8, we can easily tell data registers from
-/// address registers by only inspecting bit 3 (i.e. if bit 3 is set, it's an
-/// address register).
-///
-///
-/// But MOVE instruction uses reversed layout for destination EA:
-///
-/// :-------------------:
-/// | 5 4 3 | 2 1 0 |
-/// | reg | mode |
-/// :-------------------:
-///
-/// And this complicates things a bit because the DA bit is now separated from
-/// the register and we have to encode those separately using MxBeadDA<opN>
-///
-class MxEncEA<MxBead reg, MxBead mode, MxBead da = MxBeadIgnore> {
- MxBead Reg = reg;
- MxBead Mode = mode;
- MxBead DA = da;
-}
-
class MxEncMemOp {
dag EA = (ascend);
dag Supplement = (ascend);
}
-// FIXME: Is there a way to factorize the addressing mode suffix (i.e.
-// 'r', 'd', 'a' etc.) and use something like multiclass to replace?
-def MxEncEAr_0: MxEncEA<MxBeadDAReg<0>, MxBead2Bits<0b00>>;
-def MxEncEAd_0: MxEncEA<MxBeadDReg<0>, MxBead2Bits<0b00>, MxBead1Bit<0>>;
-def MxEncEAa_0: MxEncEA<MxBeadReg<0>, MxBead2Bits<0b00>, MxBead1Bit<1>>;
-def MxEncEAj_0: MxEncEA<MxBeadReg<0>, MxBead2Bits<0b01>, MxBead1Bit<0>>;
-def MxEncEAo_0: MxEncEA<MxBeadReg<0>, MxBead2Bits<0b01>, MxBead1Bit<1>>;
-def MxEncEAe_0: MxEncEA<MxBeadReg<0>, MxBead2Bits<0b10>, MxBead1Bit<0>>;
-def MxEncEAp_0: MxEncEA<MxBeadReg<0>, MxBead2Bits<0b10>, MxBead1Bit<1>>;
-def MxEncEAf_0: MxEncEA<MxBeadReg<0>, MxBead2Bits<0b11>, MxBead1Bit<0>>;
-
-def MxEncEAa_0_reflected : MxEncEA<MxBeadReg<0>, MxBead3Bits<0b001>>;
-def MxEncEAr_0_reflected : MxEncEA<MxBeadReg<0>, MxBead2Bits<0b00>, MxBeadDA<0>>;
-
-def MxEncEAr_1: MxEncEA<MxBeadDAReg<1>, MxBead2Bits<0b00>>;
-def MxEncEAd_1: MxEncEA<MxBeadDReg<1>, MxBead2Bits<0b00>, MxBead1Bit<0>>;
-def MxEncEAa_1: MxEncEA<MxBeadReg<1>, MxBead2Bits<0b00>, MxBead1Bit<1>>;
-def MxEncEAj_1: MxEncEA<MxBeadReg<1>, MxBead2Bits<0b01>, MxBead1Bit<0>>;
-def MxEncEAo_1: MxEncEA<MxBeadReg<1>, MxBead2Bits<0b01>, MxBead1Bit<1>>;
-def MxEncEAe_1: MxEncEA<MxBeadReg<1>, MxBead2Bits<0b10>, MxBead1Bit<0>>;
-def MxEncEAp_1: MxEncEA<MxBeadReg<1>, MxBead2Bits<0b10>, MxBead1Bit<1>>;
-def MxEncEAf_1: MxEncEA<MxBeadReg<1>, MxBead2Bits<0b11>, MxBead1Bit<0>>;
-
-def MxEncEAr_2: MxEncEA<MxBeadDAReg<2>, MxBead2Bits<0b00>>;
-def MxEncEAd_2: MxEncEA<MxBeadDReg<2>, MxBead2Bits<0b00>, MxBead1Bit<0>>;
-def MxEncEAa_2: MxEncEA<MxBeadReg<2>, MxBead2Bits<0b00>, MxBead1Bit<1>>;
-def MxEncEAj_2: MxEncEA<MxBeadReg<2>, MxBead2Bits<0b01>, MxBead1Bit<0>>;
-def MxEncEAo_2: MxEncEA<MxBeadReg<2>, MxBead2Bits<0b01>, MxBead1Bit<1>>;
-def MxEncEAe_2: MxEncEA<MxBeadReg<2>, MxBead2Bits<0b10>, MxBead1Bit<0>>;
-def MxEncEAp_2: MxEncEA<MxBeadReg<2>, MxBead2Bits<0b10>, MxBead1Bit<1>>;
-def MxEncEAf_2: MxEncEA<MxBeadReg<2>, MxBead2Bits<0b11>, MxBead1Bit<0>>;
-
-def MxEncEAb : MxEncEA<MxBead3Bits<0b001>, MxBead2Bits<0b11>, MxBead1Bit<1>>;
-def MxEncEAq : MxEncEA<MxBead3Bits<0b010>, MxBead2Bits<0b11>, MxBead1Bit<1>>;
-def MxEncEAk : MxEncEA<MxBead3Bits<0b011>, MxBead2Bits<0b11>, MxBead1Bit<1>>;
-def MxEncEAi : MxEncEA<MxBead3Bits<0b100>, MxBead2Bits<0b11>, MxBead1Bit<1>>;
-
class MxEncBriefExt<string reg_opnd, string disp_opnd,
bit size_w_l = false, int scale = 1,
string disp_encoder = ""> {
@@ -366,111 +198,14 @@ class MxEncAddrMode_e<string reg_opnd> : MxEncMemOp {
/*REGISTER*/(operand "$"#reg_opnd, 3));
}
-// Allows you to specify each bit of opcode
-class MxEncOpMode<MxBead b0, MxBead b1 = MxBeadIgnore, MxBead b2 = MxBeadIgnore> {
- MxBead B0 = b0;
- MxBead B1 = b1;
- MxBead B2 = b2;
-}
-
-// op EA, Dn
-def MxOpMode8dEA : MxEncOpMode<MxBead3Bits<0b000>>;
-def MxOpMode16dEA : MxEncOpMode<MxBead3Bits<0b001>>;
-def MxOpMode32dEA : MxEncOpMode<MxBead3Bits<0b010>>;
-
-// op EA, An
-def MxOpMode16aEA : MxEncOpMode<MxBead3Bits<0b011>>;
-def MxOpMode32aEA : MxEncOpMode<MxBead3Bits<0b111>>;
-
-// op EA, Rn
-// As you might noticed this guy is special... Since M68k differentiates
-// between Data and Address registers we required to use different OPMODE codes
-// for Address registers DST operands. One way of dealing with it is to use
-// separate tablegen instructions, but in this case it would force Register
-// Allocator to use specific Register Classes and eventually will lead to
-// superfluous moves. Another approach is to use reg-variadic encoding which will
-// change OPMODE base on Register Class used. Luckily, all the bits that differ go
-// from 0 to 1 and can be encoded with MxBeadDA.
-// Basically, if the register used is of Data type these encodings will be
-// the same as MxOpMode{16,32}dEA above and used with regular instructions(e.g. ADD,
-// SUB), but if the register is of Address type the appropriate bits will flip and
-// the instructions become of *A type(e.g ADDA, SUBA).
-def MxOpMode16rEA : MxEncOpMode<MxBead1Bit<1>, MxBeadDA<0>, MxBead1Bit<0>>;
-def MxOpMode32rEA : MxEncOpMode<MxBeadDA<0>, MxBead1Bit<1>, MxBeadDA<0>>;
-
-// op Dn, EA
-def MxOpMode8EAd : MxEncOpMode<MxBead3Bits<0b100>>;
-def MxOpMode16EAd : MxEncOpMode<MxBead3Bits<0b101>>;
-def MxOpMode32EAd : MxEncOpMode<MxBead3Bits<0b110>>;
-
-
-// Represents two types of extension word:
-// - Imm extension word
-// - Brief extension word
-class MxEncExt<MxBead imm = MxBeadIgnore, MxBead b8 = MxBeadIgnore,
- MxBead scale = MxBeadIgnore, MxBead wl = MxBeadIgnore,
- MxBead daReg = MxBeadIgnore> {
- MxBead Imm = imm;
- MxBead B8 = b8;
- MxBead Scale = scale;
- MxBead WL = wl;
- MxBead DAReg = daReg;
+class MxEncSize<bits<2> value> {
+ bits<2> Value = value;
}
-
-def MxExtEmpty : MxEncExt;
-
-// These handle encoding of displacement fields, absolute addresses and
-// immediate values, since encoding for these categories is mainly the same,
-// with exception of some weird immediates.
-def MxExtI8_0 : MxEncExt<MxBead8Imm<0>>;
-def MxExtI16_0 : MxEncExt<MxBead16Imm<0>>;
-def MxExtI32_0 : MxEncExt<MxBead32Imm<0>>;
-
-def MxExtI8_1 : MxEncExt<MxBead8Imm<1>>;
-def MxExtI16_1 : MxEncExt<MxBead16Imm<1>>;
-def MxExtI32_1 : MxEncExt<MxBead32Imm<1>>;
-
-def MxExtI8_2 : MxEncExt<MxBead8Imm<2>>;
-def MxExtI16_2 : MxEncExt<MxBead16Imm<2>>;
-def MxExtI32_2 : MxEncExt<MxBead32Imm<2>>;
-
-// NOTE They are all using Long Xn
-def MxExtBrief_0 : MxEncExt<MxBead8Disp<0>, MxBead1Bit<0b0>,
- MxBead2Bits<0b00>, MxBead1Bit<1>,
- MxBeadDAReg<0, 1>>;
-
-def MxExtBrief_1 : MxEncExt<MxBead8Disp<1>, MxBead1Bit<0b0>,
- MxBead2Bits<0b00>, MxBead1Bit<1>,
- MxBeadDAReg<1, 1>>;
-
-def MxExtBrief_2 : MxEncExt<MxBead8Disp<2>, MxBead1Bit<0b0>,
- MxBead2Bits<0b00>, MxBead1Bit<1>,
- MxBeadDAReg<2, 1>>;
-
-def MxExtBrief_3 : MxEncExt<MxBead8Disp<3>, MxBead1Bit<0b0>,
- MxBead2Bits<0b00>, MxBead1Bit<1>,
- MxBeadDAReg<3, 1>>;
-
-def MxExtBrief_4 : MxEncExt<MxBead8Disp<4>, MxBead1Bit<0b0>,
- MxBead2Bits<0b00>, MxBead1Bit<1>,
- MxBeadDAReg<4, 1>>;
-
-class MxEncSize<bits<2> value> : MxBead2Bits<value>;
def MxEncSize8 : MxEncSize<0b00>;
def MxEncSize16 : MxEncSize<0b01>;
def MxEncSize32 : MxEncSize<0b10>;
def MxEncSize64 : MxEncSize<0b11>;
-// TODO: Remove "New" in the name after the codebead-based
-// representation is deprecated.
-class MxNewEncSize<bits<2> value> {
- bits<2> Value = value;
-}
-def MxNewEncSize8 : MxNewEncSize<0b00>;
-def MxNewEncSize16 : MxNewEncSize<0b01>;
-def MxNewEncSize32 : MxNewEncSize<0b10>;
-def MxNewEncSize64 : MxNewEncSize<0b11>;
-
// M68k INSTRUCTION. Most instructions specify the location of an operand by
// using the effective address field in the operation word. The effective address
// is composed of two 3-bit fields: the mode field and the register field. The
@@ -484,7 +219,6 @@ def MxNewEncSize64 : MxNewEncSize<0b11>;
class MxInst<dag outs, dag ins,
string asmStr = "",
list<dag> pattern = [],
- MxEncoding beads = MxEncEmpty,
InstrItinClass itin = NoItinerary>
: Instruction {
let Namespace = "M68k";
@@ -494,8 +228,6 @@ class MxInst<dag outs, dag ins,
let Pattern = pattern;
let Itinerary = itin;
- // Byte stream
- field bits<192> Beads = beads.Value;
dag Inst = (ascend);
// Number of bytes
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.cpp b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
index dc394f56d0c0..1803a936701f 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.cpp
@@ -609,7 +609,7 @@ bool M68kInstrInfo::isPCRelRegisterOperandLegal(
const MachineInstr *MI = MO.getParent();
const unsigned NameIndices = M68kInstrNameIndices[MI->getOpcode()];
StringRef InstrName(&M68kInstrNameData[NameIndices]);
- const unsigned OperandNo = MI->getOperandNo(&MO);
+ const unsigned OperandNo = MO.getOperandNo();
// If this machine operand is the 2nd operand, then check
// whether the instruction has destination addressing mode 'k'.
@@ -781,7 +781,7 @@ unsigned M68kInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
return GlobalBaseReg;
// Create the register. The code to initialize it is inserted later,
- // by the CGBR pass (below).
+ // by the M68kGlobalBaseReg pass (below).
//
// NOTE
// Normally M68k uses A5 register as global base pointer but this will
@@ -809,15 +809,25 @@ M68kInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
{MO_GOT, "m68k-got"},
{MO_GOTOFF, "m68k-gotoff"},
{MO_GOTPCREL, "m68k-gotpcrel"},
- {MO_PLT, "m68k-plt"}};
+ {MO_PLT, "m68k-plt"},
+ {MO_TLSGD, "m68k-tlsgd"},
+ {MO_TLSLD, "m68k-tlsld"},
+ {MO_TLSLDM, "m68k-tlsldm"},
+ {MO_TLSIE, "m68k-tlsie"},
+ {MO_TLSLE, "m68k-tlsle"}};
return ArrayRef(TargetFlags);
}
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "m68k-create-global-base-reg"
+
+#define PASS_NAME "M68k PIC Global Base Reg Initialization"
+
namespace {
-/// Create Global Base Reg pass. This initializes the PIC global base register
-struct CGBR : public MachineFunctionPass {
+/// This initializes the PIC global base register
+struct M68kGlobalBaseReg : public MachineFunctionPass {
static char ID;
- CGBR() : MachineFunctionPass(ID) {}
+ M68kGlobalBaseReg() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override {
const M68kSubtarget &STI = MF.getSubtarget<M68kSubtarget>();
@@ -842,16 +852,16 @@ struct CGBR : public MachineFunctionPass {
return true;
}
- StringRef getPassName() const override {
- return "M68k PIC Global Base Reg Initialization";
- }
-
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
+char M68kGlobalBaseReg::ID = 0;
} // namespace
-char CGBR::ID = 0;
-FunctionPass *llvm::createM68kGlobalBaseRegPass() { return new CGBR(); }
+INITIALIZE_PASS(M68kGlobalBaseReg, DEBUG_TYPE, PASS_NAME, false, false)
+
+FunctionPass *llvm::createM68kGlobalBaseRegPass() {
+ return new M68kGlobalBaseReg();
+}
diff --git a/llvm/lib/Target/M68k/M68kInstrInfo.td b/llvm/lib/Target/M68k/M68kInstrInfo.td
index a9591869f924..6d3370d5ee90 100644
--- a/llvm/lib/Target/M68k/M68kInstrInfo.td
+++ b/llvm/lib/Target/M68k/M68kInstrInfo.td
@@ -164,6 +164,9 @@ class MxSize<int num, string id, string full> {
def MxSize8 : MxSize<8, "b", "byte">;
def MxSize16 : MxSize<16, "w", "word">;
def MxSize32 : MxSize<32, "l", "long">;
+def MxSizeF32 : MxSize<32, "s", "f32">;
+def MxSizeF64 : MxSize<64, "d", "f64">;
+def MxSizeF80 : MxSize<80, "x", "f80">;
class MxOpClass<string name,
list<AsmOperandClass> superClasses = []> : AsmOperandClass {
@@ -181,6 +184,8 @@ def MxRegClass : MxOpClass<"Reg">;
let RenderMethod = "addRegOperands", SuperClasses = [MxRegClass]in {
def MxARegClass : MxOpClass<"AReg">;
def MxDRegClass : MxOpClass<"DReg">;
+
+ def MxFPDRegClass : MxOpClass<"FPDReg">;
}
class MxOperand<ValueType vt, MxSize size, string letter, RegisterClass rc, dag pat = (null_frag)> {
@@ -230,6 +235,13 @@ let ParserMatchClass = MxARegClass in {
def MxARD32_TC : MxRegOp<i32, AR32_TC, MxSize32, "a">;
}
+// FLOATING POINT DATA REGISTER.
+let ParserMatchClass = MxFPDRegClass in {
+ def MxFPR32 : MxRegOp<f32, FPDR32, MxSizeF32, "fp">;
+ def MxFPR64 : MxRegOp<f64, FPDR64, MxSizeF64, "fp">;
+ def MxFPR80 : MxRegOp<f80, FPDR80, MxSizeF80, "fp">;
+}
+
class MxMemOp<dag ops, MxSize size, string letter,
string printMethod = "printOperand",
AsmOperandClass parserMatchClass = ImmAsmOperand>
@@ -430,11 +442,15 @@ def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
def IsPIC : Predicate<"TM.isPositionIndependent()">;
def IsNotPIC : Predicate<"!TM.isPositionIndependent()">;
-def AtLeastM68000 : Predicate<"Subtarget->atLeastM68000()">;
-def AtLeastM68010 : Predicate<"Subtarget->atLeastM68010()">;
-def AtLeastM68020 : Predicate<"Subtarget->atLeastM68020()">;
-def AtLeastM68030 : Predicate<"Subtarget->atLeastM68030()">;
-def AtLeastM68040 : Predicate<"Subtarget->atLeastM68040()">;
+// ISA versions
+foreach i = [0,1,2,4,6] in
+def AtLeastM680 # i # "0" : Predicate<"Subtarget->atLeastM680"#i#"0()">,
+ AssemblerPredicate<(all_of
+ !cast<SubtargetFeature>("FeatureISA"#i#"0"))>;
+def AtLeastM68881 : Predicate<"Subtarget->atLeastM68881()">,
+ AssemblerPredicate<(all_of FeatureISA881)>;
+def AtLeastM68882 : Predicate<"Subtarget->atLeastM68882()">,
+ AssemblerPredicate<(all_of FeatureISA882)>;
//===----------------------------------------------------------------------===//
// Condition Codes
@@ -707,6 +723,10 @@ foreach size = [16, 32] in {
: MxOpBundle<size, !cast<MxOperand>("MxXRD"#size), ?>;
} // foreach size = [16, 32]
+foreach size = [32, 64, 80] in
+def MxOp#size#AddrMode_fpr
+ : MxOpBundle<size, !cast<MxOperand>("MxFPR"#size), ?>;
+
class MxType8Class<string rLet, MxOperand reg>
: MxType<i8, "b", "", rLet, reg,
MxARI8, MxCP_ARI,
diff --git a/llvm/lib/Target/M68k/M68kInstrShiftRotate.td b/llvm/lib/Target/M68k/M68kInstrShiftRotate.td
index b50354597a49..7de994626014 100644
--- a/llvm/lib/Target/M68k/M68kInstrShiftRotate.td
+++ b/llvm/lib/Target/M68k/M68kInstrShiftRotate.td
@@ -41,7 +41,7 @@ defvar MxROOP_RO = 0b11;
/// 1 1 1 0 | REG/IMM | D | SIZE |R/I| OP | REG
/// ------------+---------+---+------+---+------+---------
class MxSREncoding<bit kind, string src_opnd, string dst_opnd,
- bit direction, bits<2> ro_op, MxNewEncSize size> {
+ bit direction, bits<2> ro_op, MxEncSize size> {
dag Value = (descend 0b1110,
// REG/IMM
(operand "$"#src_opnd, 3),
@@ -57,7 +57,7 @@ class MxSR_DD<string MN, MxType TYPE, SDNode NODE, bit RODI, bits<2> ROOP>
MN#"."#TYPE.Prefix#"\t$opd, $dst",
[(set TYPE.VT:$dst, (NODE TYPE.VT:$src, TYPE.VT:$opd))]> {
let Inst = MxSREncoding<MxROKind_R, "opd", "dst", RODI, ROOP,
- !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size)>.Value;
+ !cast<MxEncSize>("MxEncSize"#TYPE.Size)>.Value;
}
// $reg <- $reg op $imm
@@ -69,7 +69,7 @@ class MxSR_DI<string MN, MxType TYPE, SDNode NODE, bit RODI, bits<2> ROOP>
(NODE TYPE.VT:$src,
!cast<ImmLeaf>("Mximm"#TYPE.Size#"_1to8"):$opd))]> {
let Inst = MxSREncoding<MxROKind_I, "opd", "dst", RODI, ROOP,
- !cast<MxNewEncSize>("MxNewEncSize"#TYPE.Size)>.Value;
+ !cast<MxEncSize>("MxEncSize"#TYPE.Size)>.Value;
}
multiclass MxSROp<string MN, SDNode NODE, bit RODI, bits<2> ROOP> {
diff --git a/llvm/lib/Target/M68k/M68kMCInstLower.cpp b/llvm/lib/Target/M68k/M68kMCInstLower.cpp
index 40844803aead..b24d2d231c45 100644
--- a/llvm/lib/Target/M68k/M68kMCInstLower.cpp
+++ b/llvm/lib/Target/M68k/M68kMCInstLower.cpp
@@ -96,6 +96,21 @@ MCOperand M68kMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case M68kII::MO_PLT:
RefKind = MCSymbolRefExpr::VK_PLT;
break;
+ case M68kII::MO_TLSGD:
+ RefKind = MCSymbolRefExpr::VK_TLSGD;
+ break;
+ case M68kII::MO_TLSLD:
+ RefKind = MCSymbolRefExpr::VK_TLSLD;
+ break;
+ case M68kII::MO_TLSLDM:
+ RefKind = MCSymbolRefExpr::VK_TLSLDM;
+ break;
+ case M68kII::MO_TLSIE:
+ RefKind = MCSymbolRefExpr::VK_GOTTPOFF;
+ break;
+ case M68kII::MO_TLSLE:
+ RefKind = MCSymbolRefExpr::VK_TPOFF;
+ break;
}
if (!Expr) {
diff --git a/llvm/lib/Target/M68k/M68kMachineFunction.h b/llvm/lib/Target/M68k/M68kMachineFunction.h
index 188265079cd1..e40069e7afdb 100644
--- a/llvm/lib/Target/M68k/M68kMachineFunction.h
+++ b/llvm/lib/Target/M68k/M68kMachineFunction.h
@@ -16,7 +16,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/Support/MachineValueType.h"
+#include "llvm/CodeGen/MachineValueType.h"
namespace llvm {
diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
index d12705bc935c..62fb72ba4fd5 100644
--- a/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
+++ b/llvm/lib/Target/M68k/M68kRegisterInfo.cpp
@@ -75,9 +75,9 @@ M68kRegisterInfo::getRegsForTailCall(const MachineFunction &MF) const {
unsigned
M68kRegisterInfo::getMatchingMegaReg(unsigned Reg,
const TargetRegisterClass *RC) const {
- for (MCSuperRegIterator Super(Reg, this); Super.isValid(); ++Super)
- if (RC->contains(*Super))
- return *Super;
+ for (MCPhysReg Super : superregs(Reg))
+ if (RC->contains(Super))
+ return Super;
return 0;
}
@@ -129,8 +129,8 @@ BitVector M68kRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
for (MCRegAliasIterator I(Reg, this, /* self */ true); I.isValid(); ++I) {
Reserved.set(*I);
}
- for (MCSubRegIterator I(Reg, this, /* self */ true); I.isValid(); ++I) {
- Reserved.set(*I);
+ for (MCPhysReg I : subregs_inclusive(Reg)) {
+ Reserved.set(I);
}
};
@@ -213,7 +213,7 @@ bool M68kRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
bool M68kRegisterInfo::requiresRegisterScavenging(
const MachineFunction &MF) const {
- return true;
+ return false;
}
bool M68kRegisterInfo::trackLivenessAfterRegAlloc(
diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.td b/llvm/lib/Target/M68k/M68kRegisterInfo.td
index 49874a2b1099..1567bcbb7319 100644
--- a/llvm/lib/Target/M68k/M68kRegisterInfo.td
+++ b/llvm/lib/Target/M68k/M68kRegisterInfo.td
@@ -68,6 +68,19 @@ defm A5 : MxAddressRegister<5, "a5", ["bp"]>;
defm A6 : MxAddressRegister<6, "a6", ["fp"]>;
defm SP : MxAddressRegister<7, "sp", ["usp", "ssp", "isp", "a7"]>;
+// Floating Point Registers
+class MxFPRegister<int INDEX, string REG_NAME, list<string> ALTNAMES = []>
+ : MxReg<REG_NAME, INDEX, /*SUBREGS=*/[], /*SUBIDX=*/[],
+ /*DWREGS=*/[!add(18,INDEX)], ALTNAMES>;
+
+foreach i = {0-7} in
+ def FP#i : MxFPRegister<i, "fp"#i>;
+
+// Unlike their counterparts in integer registers, these
+// control registers can be accessed and modified by instructions.
+def FPC : MxFPRegister<8, "fpcr", ["fpc"]>;
+def FPS : MxFPRegister<9, "fpsr", ["fps"]>;
+def FPIAR : MxFPRegister<10, "fpiar", ["fpi"]>;
// Pseudo Registers
class MxPseudoReg<string N, list<Register> SUBREGS = [], list<SubRegIndex> SUBIDX = []>
@@ -103,9 +116,16 @@ def XR32 : MxRegClass<[i32], 32, (add DR32, AR32)>;
def SPC : MxRegClass<[i32], 32, (add SP)>;
+// Floating Point Data Registers
+def FPDR32 : MxRegClass<[f32], 32, (sequence "FP%u", 0, 7)>;
+def FPDR64 : MxRegClass<[f64], 32, (add FPDR32)>;
+def FPDR80 : MxRegClass<[f80], 32, (add FPDR32)>;
+
let CopyCost = -1 in {
def CCRC : MxRegClass<[i8], 16, (add CCR)>;
def SRC : MxRegClass<[i16], 16, (add SR)>;
+
+ def FPCR : MxRegClass<[i32], 32, (add FPC, FPS, FPIAR)>;
}
let isAllocatable = 0 in {
diff --git a/llvm/lib/Target/M68k/M68kSubtarget.cpp b/llvm/lib/Target/M68k/M68kSubtarget.cpp
index e5a4d0d2811b..86e81cd08ea2 100644
--- a/llvm/lib/Target/M68k/M68kSubtarget.cpp
+++ b/llvm/lib/Target/M68k/M68kSubtarget.cpp
@@ -84,8 +84,6 @@ bool M68kSubtarget::isPositionIndependent() const {
bool M68kSubtarget::isLegalToCallImmediateAddr() const { return true; }
-bool M68kSubtarget::abiUsesSoftFloat() const { return true; }
-
M68kSubtarget &M68kSubtarget::initializeSubtargetDependencies(
StringRef CPU, Triple TT, StringRef FS, const M68kTargetMachine &TM) {
std::string CPUName = selectM68kCPU(TT, CPU).str();
diff --git a/llvm/lib/Target/M68k/M68kSubtarget.h b/llvm/lib/Target/M68k/M68kSubtarget.h
index c8331d3f091e..3fbec2f72fb8 100644
--- a/llvm/lib/Target/M68k/M68kSubtarget.h
+++ b/llvm/lib/Target/M68k/M68kSubtarget.h
@@ -51,6 +51,9 @@ protected:
enum SubtargetEnum { M00, M10, M20, M30, M40, M60 };
SubtargetEnum SubtargetKind = M00;
+ enum FPKindEnum { M881, M882 };
+ std::optional<FPKindEnum> FPUKind;
+
std::bitset<M68k::NUM_TARGET_REGS> UserReservedRegister;
InstrItineraryData InstrItins;
@@ -88,9 +91,12 @@ public:
bool atLeastM68040() const { return SubtargetKind >= M40; }
bool atLeastM68060() const { return SubtargetKind >= M60; }
- bool useSmallSection() const { return UseSmallSection; }
+ /// Floating point support
+ bool hasFPU() const { return FPUKind.has_value(); }
+ bool atLeastM68881() const { return hasFPU() && *FPUKind >= M881; }
+ bool atLeastM68882() const { return hasFPU() && *FPUKind >= M882; }
- bool abiUsesSoftFloat() const;
+ bool useSmallSection() const { return UseSmallSection; }
const Triple &getTargetTriple() const { return TargetTriple; }
diff --git a/llvm/lib/Target/M68k/M68kTargetMachine.cpp b/llvm/lib/Target/M68k/M68kTargetMachine.cpp
index 07453745e951..4e59e27bef8c 100644
--- a/llvm/lib/Target/M68k/M68kTargetMachine.cpp
+++ b/llvm/lib/Target/M68k/M68kTargetMachine.cpp
@@ -23,7 +23,6 @@
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/PassRegistry.h"
@@ -39,6 +38,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeM68kTarget() {
auto *PR = PassRegistry::getPassRegistry();
initializeGlobalISel(*PR);
initializeM68kDAGToDAGISelPass(*PR);
+ initializeM68kExpandPseudoPass(*PR);
+ initializeM68kGlobalBaseRegPass(*PR);
+ initializeM68kCollapseMOVEMPass(*PR);
}
namespace {
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h
index bd2964ab84b1..e52b4961e3c8 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kBaseInfo.h
@@ -48,6 +48,32 @@ enum { MemDisp = 0, MemBase = 1, MemIndex = 2, MemOuter = 3 };
/// ([bd,PC,Xn],od)
enum { PCRelDisp = 0, PCRelIndex = 1, PCRelOuter = 2 };
+enum class MemAddrModeKind : unsigned {
+ j = 1, // (An)
+ o, // (An)+
+ e, // -(An)
+ p, // (d,An)
+ f, // (d,An,Xn.L)
+ F, // (d,An,Xn.W)
+ g, // (d,An,Xn.L,SCALE)
+ G, // (d,An,Xn.W,SCALE)
+ u, // ([bd,An],Xn.L,SCALE,od)
+ U, // ([bd,An],Xn.W,SCALE,od)
+ v, // ([bd,An,Xn.L,SCALE],od)
+ V, // ([bd,An,Xn.W,SCALE],od)
+ b, // abs.L
+ B, // abs.W
+ q, // (d,PC)
+ k, // (d,PC,Xn.L)
+ K, // (d,PC,Xn.W)
+ l, // (d,PC,Xn.L,SCALE)
+ L, // (d,PC,Xn.W,SCALE)
+ x, // ([bd,PC],Xn.L,SCALE,od)
+ X, // ([bd,PC],Xn.W,SCALE,od)
+ y, // ([bd,PC,Xn.L,SCALE],od)
+ Y // ([bd,PC,Xn.W,SCALE],od)
+};
+
// On a LE host:
// MSB LSB MSB LSB
// | 0x12 0x34 | 0xAB 0xCD | -> | 0xAB 0xCD | 0x12 0x34 |
@@ -131,6 +157,37 @@ enum TOF {
///
/// name@PLT
MO_PLT,
+
+ /// On a symbol operand, this indicates that the immediate is the offset to
+ /// the slot in GOT which stores the information for accessing the TLS
+ /// variable. This is used when operating in Global Dynamic mode.
+ /// name@TLSGD
+ MO_TLSGD,
+
+ /// On a symbol operand, this indicates that the immediate is the offset to
+ /// variable within the thread local storage when operating in Local Dynamic
+ /// mode.
+ /// name@TLSLD
+ MO_TLSLD,
+
+ /// On a symbol operand, this indicates that the immediate is the offset to
+ /// the slot in GOT which stores the information for accessing the TLS
+ /// variable. This is used when operating in Local Dynamic mode.
+ /// name@TLSLDM
+ MO_TLSLDM,
+
+ /// On a symbol operand, this indicates that the immediate is the offset to
+ /// the variable within the thread local storage when operating in Initial
+ /// Exec mode.
+ /// name@TLSIE
+ MO_TLSIE,
+
+ /// On a symbol operand, this indicates that the immediate is the offset to
+ /// the variable within in the thread local storage when operating in Local
+ /// Exec mode.
+ /// name@TLSLE
+ MO_TLSLE,
+
}; // enum TOF
/// Return true if the specified TargetFlag operand is a reference to a stub
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp
index 27f1b3a3fac8..cac068e4dddf 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kELFObjectWriter.cpp
@@ -70,6 +70,57 @@ unsigned M68kELFObjectWriter::getRelocType(MCContext &Ctx,
switch (Modifier) {
default:
llvm_unreachable("Unimplemented");
+
+ case MCSymbolRefExpr::VK_TLSGD:
+ switch (Type) {
+ case RT_32:
+ return ELF::R_68K_TLS_GD32;
+ case RT_16:
+ return ELF::R_68K_TLS_GD16;
+ case RT_8:
+ return ELF::R_68K_TLS_GD8;
+ }
+ llvm_unreachable("Unrecognized size");
+ case MCSymbolRefExpr::VK_TLSLDM:
+ switch (Type) {
+ case RT_32:
+ return ELF::R_68K_TLS_LDM32;
+ case RT_16:
+ return ELF::R_68K_TLS_LDM16;
+ case RT_8:
+ return ELF::R_68K_TLS_LDM8;
+ }
+ llvm_unreachable("Unrecognized size");
+ case MCSymbolRefExpr::VK_TLSLD:
+ switch (Type) {
+ case RT_32:
+ return ELF::R_68K_TLS_LDO32;
+ case RT_16:
+ return ELF::R_68K_TLS_LDO16;
+ case RT_8:
+ return ELF::R_68K_TLS_LDO8;
+ }
+ llvm_unreachable("Unrecognized size");
+ case MCSymbolRefExpr::VK_GOTTPOFF:
+ switch (Type) {
+ case RT_32:
+ return ELF::R_68K_TLS_IE32;
+ case RT_16:
+ return ELF::R_68K_TLS_IE16;
+ case RT_8:
+ return ELF::R_68K_TLS_IE8;
+ }
+ llvm_unreachable("Unrecognized size");
+ case MCSymbolRefExpr::VK_TPOFF:
+ switch (Type) {
+ case RT_32:
+ return ELF::R_68K_TLS_LE32;
+ case RT_16:
+ return ELF::R_68K_TLS_LE16;
+ case RT_8:
+ return ELF::R_68K_TLS_LE8;
+ }
+ llvm_unreachable("Unrecognized size");
case MCSymbolRefExpr::VK_None:
switch (Type) {
case RT_32:
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
index 97a5af45de02..84800fc762cb 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.cpp
@@ -147,47 +147,6 @@ void M68kInstPrinter::printDisp(const MCInst *MI, unsigned opNum,
Op.getExpr()->print(O, &MAI);
}
-void M68kInstPrinter::printARIMem(const MCInst *MI, unsigned opNum,
- raw_ostream &O) {
- O << '(';
- printOperand(MI, opNum, O);
- O << ')';
-}
-
-void M68kInstPrinter::printARIPIMem(const MCInst *MI, unsigned opNum,
- raw_ostream &O) {
- O << "(";
- printOperand(MI, opNum, O);
- O << ")+";
-}
-
-void M68kInstPrinter::printARIPDMem(const MCInst *MI, unsigned opNum,
- raw_ostream &O) {
- O << "-(";
- printOperand(MI, opNum, O);
- O << ")";
-}
-
-void M68kInstPrinter::printARIDMem(const MCInst *MI, unsigned opNum,
- raw_ostream &O) {
- O << '(';
- printDisp(MI, opNum + M68k::MemDisp, O);
- O << ',';
- printOperand(MI, opNum + M68k::MemBase, O);
- O << ')';
-}
-
-void M68kInstPrinter::printARIIMem(const MCInst *MI, unsigned opNum,
- raw_ostream &O) {
- O << '(';
- printDisp(MI, opNum + M68k::MemDisp, O);
- O << ',';
- printOperand(MI, opNum + M68k::MemBase, O);
- O << ',';
- printOperand(MI, opNum + M68k::MemIndex, O);
- O << ')';
-}
-
// NOTE forcing (W,L) size available since M68020 only
void M68kInstPrinter::printAbsMem(const MCInst *MI, unsigned opNum,
raw_ostream &O) {
@@ -201,19 +160,3 @@ void M68kInstPrinter::printAbsMem(const MCInst *MI, unsigned opNum,
assert(MO.isImm() && "absolute memory addressing needs an immediate");
O << format("$%0" PRIx64, (uint64_t)MO.getImm());
}
-
-void M68kInstPrinter::printPCDMem(const MCInst *MI, uint64_t Address,
- unsigned opNum, raw_ostream &O) {
- O << '(';
- printDisp(MI, opNum + M68k::PCRelDisp, O);
- O << ",%pc)";
-}
-
-void M68kInstPrinter::printPCIMem(const MCInst *MI, uint64_t Address,
- unsigned opNum, raw_ostream &O) {
- O << '(';
- printDisp(MI, opNum + M68k::PCRelDisp, O);
- O << ",%pc,";
- printOperand(MI, opNum + M68k::PCRelIndex, O);
- O << ')';
-}
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h
index 5e104856adb1..096317630458 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kInstPrinter.h
@@ -14,13 +14,17 @@
#ifndef LLVM_LIB_TARGET_M68K_INSTPRINTER_M68KINSTPRINTER_H
#define LLVM_LIB_TARGET_M68K_INSTPRINTER_M68KINSTPRINTER_H
+#include "M68kMemOperandPrinter.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
class TargetMachine;
-class M68kInstPrinter : public MCInstPrinter {
+class M68kInstPrinter : public MCInstPrinter,
+ public M68kMemOperandPrinter<M68kInstPrinter, MCInst> {
+ friend class M68kMemOperandPrinter;
+
public:
M68kInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
@@ -48,16 +52,7 @@ private:
/// Print register mask for MOVEM instruction in order A7-A0,D7-D0
void printMoveMaskR(const MCInst *MI, unsigned opNum, raw_ostream &O);
void printDisp(const MCInst *MI, unsigned opNum, raw_ostream &O);
- void printARIMem(const MCInst *MI, unsigned opNum, raw_ostream &O);
- void printARIPIMem(const MCInst *MI, unsigned opNum, raw_ostream &O);
- void printARIPDMem(const MCInst *MI, unsigned opNum, raw_ostream &O);
- void printARIDMem(const MCInst *MI, unsigned opNum, raw_ostream &O);
- void printARIIMem(const MCInst *MI, unsigned opNum, raw_ostream &O);
void printAbsMem(const MCInst *MI, unsigned opNum, raw_ostream &O);
- void printPCDMem(const MCInst *MI, uint64_t Address, unsigned opNum,
- raw_ostream &O);
- void printPCIMem(const MCInst *MI, uint64_t Address, unsigned opNum,
- raw_ostream &O);
//===----------------------------------------------------------------------===//
// Specializations
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp
index 005d2d38f53d..bf5d63c613f2 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCAsmInfo.cpp
@@ -13,7 +13,7 @@
#include "M68kMCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
index d4fc0510f944..7fc5395671cf 100644
--- a/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp
@@ -65,7 +65,7 @@ public:
~M68kMCCodeEmitter() override {}
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
};
@@ -199,7 +199,8 @@ void M68kMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &Op,
}
}
-void M68kMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void M68kMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
unsigned Opcode = MI.getOpcode();
@@ -216,7 +217,7 @@ void M68kMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
int64_t InstSize = EncodedInst.getBitWidth();
for (uint64_t Word : Data) {
for (int i = 0; i < 4 && InstSize > 0; ++i, InstSize -= 16) {
- support::endian::write<uint16_t>(OS, static_cast<uint16_t>(Word),
+ support::endian::write<uint16_t>(CB, static_cast<uint16_t>(Word),
support::big);
Word >>= 16;
}
diff --git a/llvm/lib/Target/M68k/MCTargetDesc/M68kMemOperandPrinter.h b/llvm/lib/Target/M68k/MCTargetDesc/M68kMemOperandPrinter.h
new file mode 100644
index 000000000000..cc5cc7a37e85
--- /dev/null
+++ b/llvm/lib/Target/M68k/MCTargetDesc/M68kMemOperandPrinter.h
@@ -0,0 +1,80 @@
+//===-- M68kMemOperandPrinter.h - Memory operands printing ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains memory operand printing logics shared between AsmPrinter
+// and MCInstPrinter.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_M68K_MEMOPERANDPRINTER_M68KINSTPRINTER_H
+#define LLVM_LIB_TARGET_M68K_MEMOPERANDPRINTER_M68KINSTPRINTER_H
+
+#include "M68kBaseInfo.h"
+
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+template <class Derived, typename InstTy> class M68kMemOperandPrinter {
+ Derived &impl() { return *static_cast<Derived *>(this); }
+
+protected:
+ void printARIMem(const InstTy *MI, unsigned OpNum, raw_ostream &O) {
+ O << '(';
+ impl().printOperand(MI, OpNum, O);
+ O << ')';
+ }
+
+ void printARIPIMem(const InstTy *MI, unsigned OpNum, raw_ostream &O) {
+ O << "(";
+ impl().printOperand(MI, OpNum, O);
+ O << ")+";
+ }
+
+ void printARIPDMem(const InstTy *MI, unsigned OpNum, raw_ostream &O) {
+ O << "-(";
+ impl().printOperand(MI, OpNum, O);
+ O << ")";
+ }
+
+ void printARIDMem(const InstTy *MI, unsigned OpNum, raw_ostream &O) {
+ O << '(';
+ impl().printDisp(MI, OpNum + M68k::MemDisp, O);
+ O << ',';
+ impl().printOperand(MI, OpNum + M68k::MemBase, O);
+ O << ')';
+ }
+
+ void printARIIMem(const InstTy *MI, unsigned OpNum, raw_ostream &O) {
+ O << '(';
+ impl().printDisp(MI, OpNum + M68k::MemDisp, O);
+ O << ',';
+ impl().printOperand(MI, OpNum + M68k::MemBase, O);
+ O << ',';
+ impl().printOperand(MI, OpNum + M68k::MemIndex, O);
+ O << ')';
+ }
+
+ void printPCDMem(const InstTy *MI, uint64_t Address, unsigned OpNum,
+ raw_ostream &O) {
+ O << '(';
+ impl().printDisp(MI, OpNum + M68k::PCRelDisp, O);
+ O << ",%pc)";
+ }
+
+ void printPCIMem(const InstTy *MI, uint64_t Address, unsigned OpNum,
+ raw_ostream &O) {
+ O << '(';
+ impl().printDisp(MI, OpNum + M68k::PCRelDisp, O);
+ O << ",%pc,";
+ impl().printOperand(MI, OpNum + M68k::PCRelIndex, O);
+ O << ')';
+ }
+};
+} // end namespace llvm
+#endif
diff --git a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index 1560f14976dd..f2c90f565863 100644
--- a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -53,7 +53,7 @@ class MSP430AsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool ParseDirective(AsmToken DirectiveID) override;
+ ParseStatus parseDirective(AsmToken DirectiveID) override;
bool ParseDirectiveRefSym(AsmToken DirectiveID);
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
@@ -330,7 +330,7 @@ OperandMatchResultTy MSP430AsmParser::tryParseRegister(MCRegister &RegNo,
bool MSP430AsmParser::parseJccInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
- if (!Name.startswith_insensitive("j"))
+ if (!Name.starts_with_insensitive("j"))
return true;
auto CC = Name.drop_front().lower();
@@ -363,8 +363,7 @@ bool MSP430AsmParser::parseJccInstruction(ParseInstructionInfo &Info,
}
// Skip optional '$' sign.
- if (getLexer().getKind() == AsmToken::Dollar)
- getLexer().Lex(); // Eat '$'
+ (void)parseOptionalToken(AsmToken::Dollar);
const MCExpr *Val;
SMLoc ExprLoc = getLexer().getLoc();
@@ -393,7 +392,7 @@ bool MSP430AsmParser::ParseInstruction(ParseInstructionInfo &Info,
StringRef Name, SMLoc NameLoc,
OperandVector &Operands) {
// Drop .w suffix
- if (Name.endswith_insensitive(".w"))
+ if (Name.ends_with_insensitive(".w"))
Name = Name.drop_back(2);
if (!parseJccInstruction(Info, Name, NameLoc, Operands))
@@ -411,11 +410,8 @@ bool MSP430AsmParser::ParseInstruction(ParseInstructionInfo &Info,
return true;
// Parse second operand if any
- if (getLexer().is(AsmToken::Comma)) {
- getLexer().Lex(); // Eat ','
- if (ParseOperand(Operands))
- return true;
- }
+ if (parseOptionalToken(AsmToken::Comma) && ParseOperand(Operands))
+ return true;
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
@@ -428,27 +424,26 @@ bool MSP430AsmParser::ParseInstruction(ParseInstructionInfo &Info,
}
bool MSP430AsmParser::ParseDirectiveRefSym(AsmToken DirectiveID) {
- StringRef Name;
- if (getParser().parseIdentifier(Name))
- return TokError("expected identifier in directive");
+ StringRef Name;
+ if (getParser().parseIdentifier(Name))
+ return TokError("expected identifier in directive");
- MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
- getStreamer().emitSymbolAttribute(Sym, MCSA_Global);
- return false;
+ MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
+ getStreamer().emitSymbolAttribute(Sym, MCSA_Global);
+ return parseEOL();
}
-bool MSP430AsmParser::ParseDirective(AsmToken DirectiveID) {
+ParseStatus MSP430AsmParser::parseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
- if (IDVal.lower() == ".long") {
- ParseLiteralValues(4, DirectiveID.getLoc());
- } else if (IDVal.lower() == ".word" || IDVal.lower() == ".short") {
- ParseLiteralValues(2, DirectiveID.getLoc());
- } else if (IDVal.lower() == ".byte") {
- ParseLiteralValues(1, DirectiveID.getLoc());
- } else if (IDVal.lower() == ".refsym") {
+ if (IDVal.lower() == ".long")
+ return ParseLiteralValues(4, DirectiveID.getLoc());
+ if (IDVal.lower() == ".word" || IDVal.lower() == ".short")
+ return ParseLiteralValues(2, DirectiveID.getLoc());
+ if (IDVal.lower() == ".byte")
+ return ParseLiteralValues(1, DirectiveID.getLoc());
+ if (IDVal.lower() == ".refsym")
return ParseDirectiveRefSym(DirectiveID);
- }
- return true;
+ return ParseStatus::NoMatch;
}
bool MSP430AsmParser::ParseOperand(OperandVector &Operands) {
@@ -474,15 +469,13 @@ bool MSP430AsmParser::ParseOperand(OperandVector &Operands) {
MCRegister RegNo = MSP430::PC;
SMLoc EndLoc = getParser().getTok().getLoc();
// Try (rN)
- if (getLexer().getKind() == AsmToken::LParen) {
- getLexer().Lex(); // Eat '('
+ if (parseOptionalToken(AsmToken::LParen)) {
SMLoc RegStartLoc;
if (parseRegister(RegNo, RegStartLoc, EndLoc))
return true;
- if (getLexer().getKind() != AsmToken::RParen)
- return true;
EndLoc = getParser().getTok().getEndLoc();
- getLexer().Lex(); // Eat ')'
+ if (!parseOptionalToken(AsmToken::RParen))
+ return true;
}
Operands.push_back(MSP430Operand::CreateMem(RegNo, Val, StartLoc,
EndLoc));
@@ -511,9 +504,8 @@ bool MSP430AsmParser::ParseOperand(OperandVector &Operands) {
SMLoc RegStartLoc, EndLoc;
if (parseRegister(RegNo, RegStartLoc, EndLoc))
return true;
- if (getLexer().getKind() == AsmToken::Plus) {
+ if (parseOptionalToken(AsmToken::Plus)) {
Operands.push_back(MSP430Operand::CreatePostIndReg(RegNo, StartLoc, EndLoc));
- getLexer().Lex(); // Eat '+'
return false;
}
if (Operands.size() > 1) // Emulate @rd in destination position as 0(rd)
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
index 420893f65d5b..3726c600f4a7 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.cpp
@@ -26,6 +26,10 @@ using namespace llvm;
#define PRINT_ALIAS_INSTR
#include "MSP430GenAsmWriter.inc"
+void MSP430InstPrinter::printRegName(raw_ostream &O, MCRegister Reg) const {
+ O << getRegisterName(Reg);
+}
+
void MSP430InstPrinter::printInst(const MCInst *MI, uint64_t Address,
StringRef Annot, const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
index 60849d69e04e..40605b92bcb0 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430InstPrinter.h
@@ -22,6 +22,8 @@ namespace llvm {
const MCRegisterInfo &MRI)
: MCInstPrinter(MAI, MII, MRI) {}
+ void printRegName(raw_ostream &O, MCRegister Reg) const override;
+
void printInst(const MCInst *MI, uint64_t Address, StringRef Annot,
const MCSubtargetInfo &STI, raw_ostream &O) override;
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index de07b47096d3..386b5abe3801 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -15,9 +15,13 @@ using namespace llvm;
void MSP430MCAsmInfo::anchor() { }
-MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT,
- const MCTargetOptions &Options) {
- CodePointerSize = CalleeSaveStackSlotSize = 2;
+MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT) {
+ // Since MSP430-GCC already generates 32-bit DWARF information, we will
+ // also store 16-bit pointers as 32-bit pointers in DWARF, because using
+ // 32-bit DWARF pointers is already a working and tested path for LLDB
+ // as well.
+ CodePointerSize = 4;
+ CalleeSaveStackSlotSize = 2;
CommentString = ";";
SeparatorString = "{";
@@ -26,4 +30,6 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(const Triple &TT,
UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true;
+
+ ExceptionsType = ExceptionHandling::DwarfCFI;
}
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index c4ff4a9eefb1..93979df037e6 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
@@ -22,7 +22,7 @@ class MSP430MCAsmInfo : public MCAsmInfoELF {
void anchor() override;
public:
- explicit MSP430MCAsmInfo(const Triple &TT, const MCTargetOptions &Options);
+ explicit MSP430MCAsmInfo(const Triple &TT);
};
} // namespace llvm
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
index 2b16c6234a51..985906a35331 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCCodeEmitter.cpp
@@ -74,12 +74,13 @@ public:
MSP430MCCodeEmitter(MCContext &ctx, MCInstrInfo const &MCII)
: Ctx(ctx), MCII(MCII) {}
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
};
-void MSP430MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void MSP430MCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
@@ -93,7 +94,7 @@ void MSP430MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
size_t WordCount = Size / 2;
while (WordCount--) {
- support::endian::write(OS, (uint16_t)BinaryOpCode, support::little);
+ support::endian::write(CB, (uint16_t)BinaryOpCode, support::little);
BinaryOpCode >>= 16;
}
}
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index 13a880de68b5..df182a5459ea 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -14,6 +14,7 @@
#include "MSP430InstPrinter.h"
#include "MSP430MCAsmInfo.h"
#include "TargetInfo/MSP430TargetInfo.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@@ -43,6 +44,27 @@ static MCRegisterInfo *createMSP430MCRegisterInfo(const Triple &TT) {
return X;
}
+static MCAsmInfo *createMSP430MCAsmInfo(const MCRegisterInfo &MRI,
+ const Triple &TT,
+ const MCTargetOptions &Options) {
+ MCAsmInfo *MAI = new MSP430MCAsmInfo(TT);
+
+ // Initialize initial frame state.
+ int stackGrowth = -2;
+
+ // Initial state of the frame pointer is sp+ptr_size.
+ MCCFIInstruction Inst = MCCFIInstruction::cfiDefCfa(
+ nullptr, MRI.getDwarfRegNum(MSP430::SP, true), -stackGrowth);
+ MAI->addInitialFrameState(Inst);
+
+ // Add return address to move list
+ MCCFIInstruction Inst2 = MCCFIInstruction::createOffset(
+ nullptr, MRI.getDwarfRegNum(MSP430::PC, true), stackGrowth);
+ MAI->addInitialFrameState(Inst2);
+
+ return MAI;
+}
+
static MCSubtargetInfo *
createMSP430MCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
return createMSP430MCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
@@ -61,7 +83,7 @@ static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T,
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeMSP430TargetMC() {
Target &T = getTheMSP430Target();
- RegisterMCAsmInfo<MSP430MCAsmInfo> X(T);
+ TargetRegistry::RegisterMCAsmInfo(T, createMSP430MCAsmInfo);
TargetRegistry::RegisterMCInstrInfo(T, createMSP430MCInstrInfo);
TargetRegistry::RegisterMCRegInfo(T, createMSP430MCRegisterInfo);
TargetRegistry::RegisterMCSubtargetInfo(T, createMSP430MCSubtargetInfo);
diff --git a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
index 6a8dc3502496..176387d71fcb 100644
--- a/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -25,6 +25,11 @@
using namespace llvm;
+MSP430FrameLowering::MSP430FrameLowering(const MSP430Subtarget &STI)
+ : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(2), -2,
+ Align(2)),
+ STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()) {}
+
bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -37,6 +42,45 @@ bool MSP430FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const
return !MF.getFrameInfo().hasVarSizedObjects();
}
+void MSP430FrameLowering::BuildCFI(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL,
+ const MCCFIInstruction &CFIInst,
+ MachineInstr::MIFlag Flag) const {
+ MachineFunction &MF = *MBB.getParent();
+ unsigned CFIIndex = MF.addFrameInst(CFIInst);
+ BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(Flag);
+}
+
+void MSP430FrameLowering::emitCalleeSavedFrameMoves(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, bool IsPrologue) const {
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+
+ // Calculate offsets.
+ for (const CalleeSavedInfo &I : CSI) {
+ int64_t Offset = MFI.getObjectOffset(I.getFrameIdx());
+ Register Reg = I.getReg();
+ unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
+
+ if (IsPrologue) {
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+ } else {
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createRestore(nullptr, DwarfReg));
+ }
+ }
+}
+
void MSP430FrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
@@ -50,6 +94,7 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF,
// Get the number of bytes to allocate from the FrameInfo.
uint64_t StackSize = MFI.getStackSize();
+ int stackGrowth = -2;
uint64_t NumBytes = 0;
if (hasFP(MF)) {
@@ -64,23 +109,56 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF,
// Save FP into the appropriate stack slot...
BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
- .addReg(MSP430::R4, RegState::Kill);
+ .addReg(MSP430::R4, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Mark the place where FP was saved.
+ // Define the current CFA rule to use the provided offset.
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth),
+ MachineInstr::FrameSetup);
+
+ // Change the rule for the FramePtr to be an "offset" rule.
+ unsigned DwarfFramePtr = TRI->getDwarfRegNum(MSP430::R4, true);
+ BuildCFI(
+ MBB, MBBI, DL,
+ MCCFIInstruction::createOffset(nullptr, DwarfFramePtr, 2 * stackGrowth),
+ MachineInstr::FrameSetup);
// Update FP with the new base value...
BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::R4)
- .addReg(MSP430::SP);
+ .addReg(MSP430::SP)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Mark effective beginning of when frame pointer becomes valid.
+ // Define the current CFA to use the FP register.
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
+ MachineInstr::FrameSetup);
// Mark the FramePtr as live-in in every block except the entry.
for (MachineBasicBlock &MBBJ : llvm::drop_begin(MF))
MBBJ.addLiveIn(MSP430::R4);
-
} else
NumBytes = StackSize - MSP430FI->getCalleeSavedFrameSize();
// Skip the callee-saved push instructions.
- while (MBBI != MBB.end() && (MBBI->getOpcode() == MSP430::PUSH16r))
+ int StackOffset = 2 * stackGrowth;
+ while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup) &&
+ (MBBI->getOpcode() == MSP430::PUSH16r)) {
++MBBI;
+ if (!hasFP(MF)) {
+ // Mark callee-saved push instruction.
+ // Define the current CFA rule to use the provided offset.
+ assert(StackSize && "Expected stack frame");
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset),
+ MachineInstr::FrameSetup);
+ StackOffset += stackGrowth;
+ }
+ }
+
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
@@ -94,12 +172,23 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF,
if (NumBytes) {
MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SP)
- .addReg(MSP430::SP).addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SP)
+ .addReg(MSP430::SP)
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameSetup);
// The SRW implicit def is dead.
MI->getOperand(3).setIsDead();
}
+ if (!hasFP(MF)) {
+ // Adjust the previous CFA value if CFA was not redefined by FP
+ BuildCFI(
+ MBB, MBBI, DL,
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth),
+ MachineInstr::FrameSetup);
+ }
}
+
+ emitCalleeSavedFrameMoves(MBB, MBBI, DL, true);
}
void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
@@ -125,24 +214,43 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
unsigned CSSize = MSP430FI->getCalleeSavedFrameSize();
uint64_t NumBytes = 0;
+ MachineBasicBlock::iterator AfterPop = MBBI;
if (hasFP(MF)) {
// Calculate required stack adjustment
uint64_t FrameSize = StackSize - 2;
NumBytes = FrameSize - CSSize;
// pop FP.
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::R4);
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::POP16r), MSP430::R4)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ unsigned DwarfStackPtr = TRI->getDwarfRegNum(MSP430::SP, true);
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 2),
+ MachineInstr::FrameDestroy);
+ --MBBI;
+ if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
+ unsigned DwarfFramePtr = TRI->getDwarfRegNum(MSP430::R4, true);
+ BuildCFI(MBB, AfterPop, DL,
+ MCCFIInstruction::createRestore(nullptr, DwarfFramePtr),
+ MachineInstr::FrameDestroy);
+ --MBBI;
+ --AfterPop;
+ }
} else
NumBytes = StackSize - CSSize;
// Skip the callee-saved pop instructions.
+ MachineBasicBlock::iterator FirstCSPop = MBBI;
while (MBBI != MBB.begin()) {
MachineBasicBlock::iterator PI = std::prev(MBBI);
unsigned Opc = PI->getOpcode();
- if (Opc != MSP430::POP16r && !PI->isTerminator())
+ if ((Opc != MSP430::POP16r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
+ !PI->isTerminator())
break;
+ FirstCSPop = PI;
--MBBI;
}
+ MBBI = FirstCSPop;
DL = MBBI->getDebugLoc();
@@ -152,13 +260,15 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
// mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
if (MFI.hasVarSizedObjects()) {
- BuildMI(MBB, MBBI, DL,
- TII.get(MSP430::MOV16rr), MSP430::SP).addReg(MSP430::R4);
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::MOV16rr), MSP430::SP)
+ .addReg(MSP430::R4)
+ .setMIFlag(MachineInstr::FrameDestroy);
if (CSSize) {
MachineInstr *MI =
- BuildMI(MBB, MBBI, DL,
- TII.get(MSP430::SUB16ri), MSP430::SP)
- .addReg(MSP430::SP).addImm(CSSize);
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::SUB16ri), MSP430::SP)
+ .addReg(MSP430::SP)
+ .addImm(CSSize)
+ .setMIFlag(MachineInstr::FrameDestroy);
// The SRW implicit def is dead.
MI->getOperand(3).setIsDead();
}
@@ -166,12 +276,40 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
// adjust stack pointer back: SP += numbytes
if (NumBytes) {
MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SP)
- .addReg(MSP430::SP).addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(MSP430::ADD16ri), MSP430::SP)
+ .addReg(MSP430::SP)
+ .addImm(NumBytes)
+ .setMIFlag(MachineInstr::FrameDestroy);
// The SRW implicit def is dead.
MI->getOperand(3).setIsDead();
+
+ if (!hasFP(MF)) {
+ // Adjust CFA value if it was defined by SP
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, CSSize + 2),
+ MachineInstr::FrameDestroy);
+ }
+ }
+ }
+
+ if (!hasFP(MF)) {
+ MBBI = FirstCSPop;
+ int64_t Offset = -CSSize - 2;
+ // Mark callee-saved pop instruction.
+ // Define the current CFA rule to use the provided offset.
+ while (MBBI != MBB.end()) {
+ MachineBasicBlock::iterator PI = MBBI;
+ unsigned Opc = PI->getOpcode();
+ ++MBBI;
+ if (Opc == MSP430::POP16r) {
+ Offset += 2;
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset),
+ MachineInstr::FrameDestroy);
+ }
}
}
+ emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false);
}
// FIXME: Can we eleminate these in favour of generic code?
@@ -189,12 +327,13 @@ bool MSP430FrameLowering::spillCalleeSavedRegisters(
MSP430MachineFunctionInfo *MFI = MF.getInfo<MSP430MachineFunctionInfo>();
MFI->setCalleeSavedFrameSize(CSI.size() * 2);
- for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
+ for (const CalleeSavedInfo &I : CSI) {
Register Reg = I.getReg();
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
BuildMI(MBB, MI, DL, TII.get(MSP430::PUSH16r))
- .addReg(Reg, RegState::Kill);
+ .addReg(Reg, RegState::Kill)
+ .setMIFlag(MachineInstr::FrameSetup);
}
return true;
}
@@ -211,8 +350,9 @@ bool MSP430FrameLowering::restoreCalleeSavedRegisters(
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- for (const CalleeSavedInfo &I : CSI)
- BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), I.getReg());
+ for (const CalleeSavedInfo &I : llvm::reverse(CSI))
+ BuildMI(MBB, MI, DL, TII.get(MSP430::POP16r), I.getReg())
+ .setMIFlag(MachineInstr::FrameDestroy);
return true;
}
@@ -269,6 +409,11 @@ MachineBasicBlock::iterator MSP430FrameLowering::eliminateCallFramePseudoInstr(
BuildMI(MF, Old.getDebugLoc(), TII.get(MSP430::SUB16ri), MSP430::SP)
.addReg(MSP430::SP)
.addImm(CalleeAmt);
+ if (!hasFP(MF)) {
+ DebugLoc DL = I->getDebugLoc();
+ BuildCFI(MBB, I, DL,
+ MCCFIInstruction::createAdjustCfaOffset(nullptr, CalleeAmt));
+ }
// The SRW implicit def is dead.
New->getOperand(3).setIsDead();
diff --git a/llvm/lib/Target/MSP430/MSP430FrameLowering.h b/llvm/lib/Target/MSP430/MSP430FrameLowering.h
index f6995edf4b0a..5227d3e731ed 100644
--- a/llvm/lib/Target/MSP430/MSP430FrameLowering.h
+++ b/llvm/lib/Target/MSP430/MSP430FrameLowering.h
@@ -17,13 +17,20 @@
#include "llvm/CodeGen/TargetFrameLowering.h"
namespace llvm {
+
+class MSP430Subtarget;
+class MSP430InstrInfo;
+class MSP430RegisterInfo;
+
class MSP430FrameLowering : public TargetFrameLowering {
protected:
public:
- explicit MSP430FrameLowering()
- : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, Align(2), -2,
- Align(2)) {}
+ MSP430FrameLowering(const MSP430Subtarget &STI);
+
+ const MSP430Subtarget &STI;
+ const MSP430InstrInfo &TII;
+ const MSP430RegisterInfo *TRI;
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
@@ -48,6 +55,15 @@ public:
bool hasReservedCallFrame(const MachineFunction &MF) const override;
void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS = nullptr) const override;
+
+ /// Wraps up getting a CFI index and building a MachineInstr for it.
+ void BuildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, const MCCFIInstruction &CFIInst,
+ MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
+
+ void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, bool IsPrologue) const;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index 05fa6c42aaf3..ee7762c296bf 100644
--- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -631,7 +631,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
// Create frame index for the start of the first vararg value
if (isVarArg) {
- unsigned Offset = CCInfo.getNextStackOffset();
+ unsigned Offset = CCInfo.getStackSize();
FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, Offset, true));
}
@@ -645,7 +645,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
{
#ifndef NDEBUG
errs() << "LowerFormalArguments Unhandled argument type: "
- << RegVT.getEVTString() << "\n";
+ << RegVT << "\n";
#endif
llvm_unreachable(nullptr);
}
@@ -686,8 +686,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
if (ObjSize > 2) {
errs() << "LowerFormalArguments Unhandled argument type: "
- << EVT(VA.getLocVT()).getEVTString()
- << "\n";
+ << VA.getLocVT() << "\n";
}
// Create the frame index object for this incoming parameter...
int FI = MFI.CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
@@ -754,7 +753,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Analize return values.
AnalyzeReturnValues(CCInfo, RVLocs, Outs);
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
@@ -763,11 +762,11 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- OutVals[i], Flag);
+ OutVals[i], Glue);
// Guarantee that all emitted copies are stuck together,
// avoiding something bad.
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
@@ -783,19 +782,19 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
DAG.getCopyFromReg(Chain, dl, Reg, PtrVT);
unsigned R12 = MSP430::R12;
- Chain = DAG.getCopyToReg(Chain, dl, R12, Val, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, R12, Val, Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(R12, PtrVT));
}
unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
- MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
+ MSP430ISD::RETI_GLUE : MSP430ISD::RET_GLUE);
RetOps[0] = Chain; // Update chain.
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ // Add the glue if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
return DAG.getNode(Opc, dl, MVT::Other, RetOps);
}
@@ -815,7 +814,7 @@ SDValue MSP430TargetLowering::LowerCCCCallTo(
AnalyzeArguments(CCInfo, ArgLocs, Outs);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = CCInfo.getNextStackOffset();
+ unsigned NumBytes = CCInfo.getStackSize();
MVT PtrVT = getFrameIndexTy(DAG.getDataLayout());
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
@@ -883,13 +882,13 @@ SDValue MSP430TargetLowering::LowerCCCCallTo(
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
// Build a sequence of copy-to-reg nodes chained together with token chain and
- // flag operands which copy the outgoing args into registers. The InFlag in
+ // flag operands which copy the outgoing args into registers. The InGlue in
// necessary since all emitted instructions must be stuck together.
- SDValue InFlag;
+ SDValue InGlue;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
+ RegsToPass[i].second, InGlue);
+ InGlue = Chain.getValue(1);
}
// If the callee is a GlobalAddress node (quite common, every direct call is)
@@ -912,19 +911,19 @@ SDValue MSP430TargetLowering::LowerCCCCallTo(
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
Chain = DAG.getNode(MSP430ISD::CALL, dl, NodeTys, Ops);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
// Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InFlag, dl);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
+ InGlue = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl,
+ return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl,
DAG, InVals);
}
@@ -932,7 +931,7 @@ SDValue MSP430TargetLowering::LowerCCCCallTo(
/// appropriate copies out of appropriate physical registers.
///
SDValue MSP430TargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
+ SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
@@ -946,8 +945,8 @@ SDValue MSP430TargetLowering::LowerCallResult(
// Copy all of the result registers out of their specified physreg.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
- RVLocs[i].getValVT(), InFlag).getValue(1);
- InFlag = Chain.getValue(2);
+ RVLocs[i].getValVT(), InGlue).getValue(1);
+ InGlue = Chain.getValue(2);
InVals.push_back(Chain.getValue(0));
}
@@ -1370,8 +1369,8 @@ bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((MSP430ISD::NodeType)Opcode) {
case MSP430ISD::FIRST_NUMBER: break;
- case MSP430ISD::RET_FLAG: return "MSP430ISD::RET_FLAG";
- case MSP430ISD::RETI_FLAG: return "MSP430ISD::RETI_FLAG";
+ case MSP430ISD::RET_GLUE: return "MSP430ISD::RET_GLUE";
+ case MSP430ISD::RETI_GLUE: return "MSP430ISD::RETI_GLUE";
case MSP430ISD::RRA: return "MSP430ISD::RRA";
case MSP430ISD::RLA: return "MSP430ISD::RLA";
case MSP430ISD::RRC: return "MSP430ISD::RRC";
@@ -1413,10 +1412,6 @@ bool MSP430TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
return false && VT1 == MVT::i8 && VT2 == MVT::i16;
}
-bool MSP430TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
- return isZExtFree(Val.getValueType(), VT2);
-}
-
//===----------------------------------------------------------------------===//
// Other Lowering Code
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.h b/llvm/lib/Target/MSP430/MSP430ISelLowering.h
index f23042a369fd..667ad6033861 100644
--- a/llvm/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.h
@@ -23,11 +23,11 @@ namespace llvm {
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- /// Return with a flag operand. Operand 0 is the chain operand.
- RET_FLAG,
+ /// Return with a glue operand. Operand 0 is the chain operand.
+ RET_GLUE,
- /// Same as RET_FLAG, but used for returning from ISRs.
- RETI_FLAG,
+ /// Same as RET_GLUE, but used for returning from ISRs.
+ RETI_GLUE,
/// Y = R{R,L}A X, rotate right (left) arithmetically
RRA, RLA,
@@ -126,7 +126,6 @@ namespace llvm {
/// out to 16 bits.
bool isZExtFree(Type *Ty1, Type *Ty2) const override;
bool isZExtFree(EVT VT1, EVT VT2) const override;
- bool isZExtFree(SDValue Val, EVT VT2) const override;
bool isLegalICmpImmediate(int64_t) const override;
bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const override;
@@ -153,7 +152,7 @@ namespace llvm {
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/llvm/lib/Target/MSP430/MSP430InstrInfo.h
index 94cf9f8e1f16..b8d015a21cd1 100644
--- a/llvm/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -33,7 +33,7 @@ public:
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
- const TargetRegisterInfo &getRegisterInfo() const { return RI; }
+ const MSP430RegisterInfo &getRegisterInfo() const { return RI; }
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
diff --git a/llvm/lib/Target/MSP430/MSP430InstrInfo.td b/llvm/lib/Target/MSP430/MSP430InstrInfo.td
index e7218ca21147..0ff9763e4c93 100644
--- a/llvm/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/llvm/lib/Target/MSP430/MSP430InstrInfo.td
@@ -40,9 +40,9 @@ def SDT_MSP430DAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
//===----------------------------------------------------------------------===//
// MSP430 Specific Node Definitions.
//===----------------------------------------------------------------------===//
-def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
+def MSP430retglue : SDNode<"MSP430ISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
+def MSP430retiglue : SDNode<"MSP430ISD::RETI_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
@@ -227,13 +227,13 @@ let isCodeGenOnly = 1, usesCustomInserter = 1 in {
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def RET : IForm16<0b0100, DstReg, SrcPostInc, 2,
- (outs), (ins), "ret", [(MSP430retflag)]> {
+ (outs), (ins), "ret", [(MSP430retglue)]> {
let DecoderNamespace = "Delta";
let rs = 1;
let rd = 0;
}
def RETI : IIForm16<0b110, SrcReg, 2,
- (outs), (ins), "reti", [(MSP430retiflag)]> {
+ (outs), (ins), "reti", [(MSP430retiglue)]> {
let rs = 0;
}
}
diff --git a/llvm/lib/Target/MSP430/MSP430RegisterInfo.h b/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
index 78b02cf8ecc0..51e07f4e8e9e 100644
--- a/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/llvm/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -20,7 +20,7 @@
namespace llvm {
-struct MSP430RegisterInfo : public MSP430GenRegisterInfo {
+class MSP430RegisterInfo : public MSP430GenRegisterInfo {
public:
MSP430RegisterInfo();
diff --git a/llvm/lib/Target/MSP430/MSP430RegisterInfo.td b/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
index 61cc72d494b5..153df285aebd 100644
--- a/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/llvm/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -32,42 +32,42 @@ class MSP430RegWithSubregs<bits<4> num, string n, list<Register> subregs,
// Registers
//===----------------------------------------------------------------------===//
-def PCB : MSP430Reg<0, "r0", ["pc"]>;
-def SPB : MSP430Reg<1, "r1", ["sp"]>;
-def SRB : MSP430Reg<2, "r2", ["sr"]>;
-def CGB : MSP430Reg<3, "r3", ["cg"]>;
-def R4B : MSP430Reg<4, "r4", ["fp"]>;
-def R5B : MSP430Reg<5, "r5">;
-def R6B : MSP430Reg<6, "r6">;
-def R7B : MSP430Reg<7, "r7">;
-def R8B : MSP430Reg<8, "r8">;
-def R9B : MSP430Reg<9, "r9">;
-def R10B : MSP430Reg<10, "r10">;
-def R11B : MSP430Reg<11, "r11">;
-def R12B : MSP430Reg<12, "r12">;
-def R13B : MSP430Reg<13, "r13">;
-def R14B : MSP430Reg<14, "r14">;
-def R15B : MSP430Reg<15, "r15">;
+def PCB : MSP430Reg<0, "r0", ["pc"]>, DwarfRegNum<[16]>;
+def SPB : MSP430Reg<1, "r1", ["sp"]>, DwarfRegNum<[17]>;
+def SRB : MSP430Reg<2, "r2", ["sr"]>, DwarfRegNum<[18]>;
+def CGB : MSP430Reg<3, "r3", ["cg"]>, DwarfRegNum<[19]>;
+def R4B : MSP430Reg<4, "r4", ["fp"]>, DwarfRegNum<[20]>;
+def R5B : MSP430Reg<5, "r5">, DwarfRegNum<[21]>;
+def R6B : MSP430Reg<6, "r6">, DwarfRegNum<[22]>;
+def R7B : MSP430Reg<7, "r7">, DwarfRegNum<[23]>;
+def R8B : MSP430Reg<8, "r8">, DwarfRegNum<[24]>;
+def R9B : MSP430Reg<9, "r9">, DwarfRegNum<[25]>;
+def R10B : MSP430Reg<10, "r10">, DwarfRegNum<[26]>;
+def R11B : MSP430Reg<11, "r11">, DwarfRegNum<[27]>;
+def R12B : MSP430Reg<12, "r12">, DwarfRegNum<[28]>;
+def R13B : MSP430Reg<13, "r13">, DwarfRegNum<[29]>;
+def R14B : MSP430Reg<14, "r14">, DwarfRegNum<[30]>;
+def R15B : MSP430Reg<15, "r15">, DwarfRegNum<[31]>;
def subreg_8bit : SubRegIndex<8> { let Namespace = "MSP430"; }
let SubRegIndices = [subreg_8bit] in {
-def PC : MSP430RegWithSubregs<0, "r0", [PCB], ["pc"]>;
-def SP : MSP430RegWithSubregs<1, "r1", [SPB], ["sp"]>;
-def SR : MSP430RegWithSubregs<2, "r2", [SRB], ["sr"]>;
-def CG : MSP430RegWithSubregs<3, "r3", [CGB], ["cg"]>;
-def R4 : MSP430RegWithSubregs<4, "r4", [R4B], ["fp"]>;
-def R5 : MSP430RegWithSubregs<5, "r5", [R5B]>;
-def R6 : MSP430RegWithSubregs<6, "r6", [R6B]>;
-def R7 : MSP430RegWithSubregs<7, "r7", [R7B]>;
-def R8 : MSP430RegWithSubregs<8, "r8", [R8B]>;
-def R9 : MSP430RegWithSubregs<9, "r9", [R9B]>;
-def R10 : MSP430RegWithSubregs<10, "r10", [R10B]>;
-def R11 : MSP430RegWithSubregs<11, "r11", [R11B]>;
-def R12 : MSP430RegWithSubregs<12, "r12", [R12B]>;
-def R13 : MSP430RegWithSubregs<13, "r13", [R13B]>;
-def R14 : MSP430RegWithSubregs<14, "r14", [R14B]>;
-def R15 : MSP430RegWithSubregs<15, "r15", [R15B]>;
+def PC : MSP430RegWithSubregs<0, "r0", [PCB], ["pc"]>, DwarfRegNum<[0]>;
+def SP : MSP430RegWithSubregs<1, "r1", [SPB], ["sp"]>, DwarfRegNum<[1]>;
+def SR : MSP430RegWithSubregs<2, "r2", [SRB], ["sr"]>, DwarfRegNum<[2]>;
+def CG : MSP430RegWithSubregs<3, "r3", [CGB], ["cg"]>, DwarfRegNum<[3]>;
+def R4 : MSP430RegWithSubregs<4, "r4", [R4B], ["fp"]>, DwarfRegNum<[4]>;
+def R5 : MSP430RegWithSubregs<5, "r5", [R5B]>, DwarfRegNum<[5]>;
+def R6 : MSP430RegWithSubregs<6, "r6", [R6B]>, DwarfRegNum<[6]>;
+def R7 : MSP430RegWithSubregs<7, "r7", [R7B]>, DwarfRegNum<[7]>;
+def R8 : MSP430RegWithSubregs<8, "r8", [R8B]>, DwarfRegNum<[8]>;
+def R9 : MSP430RegWithSubregs<9, "r9", [R9B]>, DwarfRegNum<[9]>;
+def R10 : MSP430RegWithSubregs<10, "r10", [R10B]>, DwarfRegNum<[10]>;
+def R11 : MSP430RegWithSubregs<11, "r11", [R11B]>, DwarfRegNum<[11]>;
+def R12 : MSP430RegWithSubregs<12, "r12", [R12B]>, DwarfRegNum<[12]>;
+def R13 : MSP430RegWithSubregs<13, "r13", [R13B]>, DwarfRegNum<[13]>;
+def R14 : MSP430RegWithSubregs<14, "r14", [R14B]>, DwarfRegNum<[14]>;
+def R15 : MSP430RegWithSubregs<15, "r15", [R15B]>, DwarfRegNum<[15]>;
}
def GR8 : RegisterClass<"MSP430", [i8], 8,
diff --git a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
index 0604d47597e2..2d208cdf3f05 100644
--- a/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/llvm/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -58,4 +58,5 @@ MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
MSP430Subtarget::MSP430Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM)
: MSP430GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
- InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this) {}
+ InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
+ FrameLowering(*this) {}
diff --git a/llvm/lib/Target/MSP430/MSP430Subtarget.h b/llvm/lib/Target/MSP430/MSP430Subtarget.h
index 079af2c75ec1..d99545a2224d 100644
--- a/llvm/lib/Target/MSP430/MSP430Subtarget.h
+++ b/llvm/lib/Target/MSP430/MSP430Subtarget.h
@@ -38,10 +38,10 @@ private:
virtual void anchor();
bool ExtendedInsts = false;
HWMultEnum HWMultMode = NoHWMult;
- MSP430FrameLowering FrameLowering;
MSP430InstrInfo InstrInfo;
MSP430TargetLowering TLInfo;
SelectionDAGTargetInfo TSInfo;
+ MSP430FrameLowering FrameLowering;
public:
/// This constructor initializes the data members to match that
@@ -64,9 +64,10 @@ public:
return &FrameLowering;
}
const MSP430InstrInfo *getInstrInfo() const override { return &InstrInfo; }
- const TargetRegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
+ const MSP430RegisterInfo *getRegisterInfo() const override {
+ return &getInstrInfo()->getRegisterInfo();
}
+
const MSP430TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
diff --git a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index c5b654c37e11..2efeeb5ee63d 100644
--- a/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -17,7 +17,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include <optional>
diff --git a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index 45cbddd03d92..b4f99788410b 100644
--- a/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
@@ -39,7 +38,6 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
@@ -51,6 +49,8 @@
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -195,20 +195,17 @@ class MipsAsmParser : public MCTargetAsmParser {
bool ParseDirective(AsmToken DirectiveID) override;
- OperandMatchResultTy parseMemOperand(OperandVector &Operands);
- OperandMatchResultTy
- matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
- StringRef Identifier, SMLoc S);
- OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands,
- const AsmToken &Token,
- SMLoc S);
- OperandMatchResultTy matchAnyRegisterWithoutDollar(OperandVector &Operands,
- SMLoc S);
- OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
- OperandMatchResultTy parseImm(OperandVector &Operands);
- OperandMatchResultTy parseJumpTarget(OperandVector &Operands);
- OperandMatchResultTy parseInvNum(OperandVector &Operands);
- OperandMatchResultTy parseRegisterList(OperandVector &Operands);
+ ParseStatus parseMemOperand(OperandVector &Operands);
+ ParseStatus matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
+ StringRef Identifier, SMLoc S);
+ ParseStatus matchAnyRegisterWithoutDollar(OperandVector &Operands,
+ const AsmToken &Token, SMLoc S);
+ ParseStatus matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S);
+ ParseStatus parseAnyRegister(OperandVector &Operands);
+ ParseStatus parseImm(OperandVector &Operands);
+ ParseStatus parseJumpTarget(OperandVector &Operands);
+ ParseStatus parseInvNum(OperandVector &Operands);
+ ParseStatus parseRegisterList(OperandVector &Operands);
bool searchSymbolAlias(OperandVector &Operands);
@@ -482,7 +479,7 @@ class MipsAsmParser : public MCTargetAsmParser {
}
void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
- if (!(getSTI().getFeatureBits()[Feature])) {
+ if (!(getSTI().hasFeature(Feature))) {
MCSubtargetInfo &STI = copySTI();
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
@@ -491,7 +488,7 @@ class MipsAsmParser : public MCTargetAsmParser {
}
void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
- if (getSTI().getFeatureBits()[Feature]) {
+ if (getSTI().hasFeature(Feature)) {
MCSubtargetInfo &STI = copySTI();
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
@@ -576,11 +573,11 @@ public:
bool hasEightFccRegisters() const { return hasMips4() || hasMips32(); }
bool isGP64bit() const {
- return getSTI().getFeatureBits()[Mips::FeatureGP64Bit];
+ return getSTI().hasFeature(Mips::FeatureGP64Bit);
}
bool isFP64bit() const {
- return getSTI().getFeatureBits()[Mips::FeatureFP64Bit];
+ return getSTI().hasFeature(Mips::FeatureFP64Bit);
}
bool isJalrRelocAvailable(const MCExpr *JalExpr) {
@@ -601,99 +598,99 @@ public:
bool isABI_N64() const { return ABI.IsN64(); }
bool isABI_O32() const { return ABI.IsO32(); }
bool isABI_FPXX() const {
- return getSTI().getFeatureBits()[Mips::FeatureFPXX];
+ return getSTI().hasFeature(Mips::FeatureFPXX);
}
bool useOddSPReg() const {
- return !(getSTI().getFeatureBits()[Mips::FeatureNoOddSPReg]);
+ return !(getSTI().hasFeature(Mips::FeatureNoOddSPReg));
}
bool inMicroMipsMode() const {
- return getSTI().getFeatureBits()[Mips::FeatureMicroMips];
+ return getSTI().hasFeature(Mips::FeatureMicroMips);
}
bool hasMips1() const {
- return getSTI().getFeatureBits()[Mips::FeatureMips1];
+ return getSTI().hasFeature(Mips::FeatureMips1);
}
bool hasMips2() const {
- return getSTI().getFeatureBits()[Mips::FeatureMips2];
+ return getSTI().hasFeature(Mips::FeatureMips2);
}
bool hasMips3() const {
- return getSTI().getFeatureBits()[Mips::FeatureMips3];
+ return getSTI().hasFeature(Mips::FeatureMips3);
}
bool hasMips4() const {
- return getSTI().getFeatureBits()[Mips::FeatureMips4];
+ return getSTI().hasFeature(Mips::FeatureMips4);
}
bool hasMips5() const {
- return getSTI().getFeatureBits()[Mips::FeatureMips5];
+ return getSTI().hasFeature(Mips::FeatureMips5);
}
bool hasMips32() const {
- return getSTI().getFeatureBits()[Mips::FeatureMips32];
+ return getSTI().hasFeature(Mips::FeatureMips32);
}
bool hasMips64() const {
- return getSTI().getFeatureBits()[Mips::FeatureMips64];
+ return getSTI().hasFeature(Mips::FeatureMips64);
}
bool hasMips32r2() const {
- return getSTI().getFeatureBits()[Mips::FeatureMips32r2];
+ return getSTI().hasFeature(Mips::FeatureMips32r2);
}
bool hasMips64r2() const {
- return getSTI().getFeatureBits()[Mips::FeatureMips64r2];
+ return getSTI().hasFeature(Mips::FeatureMips64r2);
}
bool hasMips32r3() const {
- return (getSTI().getFeatureBits()[Mips::FeatureMips32r3]);
+ return (getSTI().hasFeature(Mips::FeatureMips32r3));
}
bool hasMips64r3() const {
- return (getSTI().getFeatureBits()[Mips::FeatureMips64r3]);
+ return (getSTI().hasFeature(Mips::FeatureMips64r3));
}
bool hasMips32r5() const {
- return (getSTI().getFeatureBits()[Mips::FeatureMips32r5]);
+ return (getSTI().hasFeature(Mips::FeatureMips32r5));
}
bool hasMips64r5() const {
- return (getSTI().getFeatureBits()[Mips::FeatureMips64r5]);
+ return (getSTI().hasFeature(Mips::FeatureMips64r5));
}
bool hasMips32r6() const {
- return getSTI().getFeatureBits()[Mips::FeatureMips32r6];
+ return getSTI().hasFeature(Mips::FeatureMips32r6);
}
bool hasMips64r6() const {
- return getSTI().getFeatureBits()[Mips::FeatureMips64r6];
+ return getSTI().hasFeature(Mips::FeatureMips64r6);
}
bool hasDSP() const {
- return getSTI().getFeatureBits()[Mips::FeatureDSP];
+ return getSTI().hasFeature(Mips::FeatureDSP);
}
bool hasDSPR2() const {
- return getSTI().getFeatureBits()[Mips::FeatureDSPR2];
+ return getSTI().hasFeature(Mips::FeatureDSPR2);
}
bool hasDSPR3() const {
- return getSTI().getFeatureBits()[Mips::FeatureDSPR3];
+ return getSTI().hasFeature(Mips::FeatureDSPR3);
}
bool hasMSA() const {
- return getSTI().getFeatureBits()[Mips::FeatureMSA];
+ return getSTI().hasFeature(Mips::FeatureMSA);
}
bool hasCnMips() const {
- return (getSTI().getFeatureBits()[Mips::FeatureCnMips]);
+ return (getSTI().hasFeature(Mips::FeatureCnMips));
}
bool hasCnMipsP() const {
- return (getSTI().getFeatureBits()[Mips::FeatureCnMipsP]);
+ return (getSTI().hasFeature(Mips::FeatureCnMipsP));
}
bool inPicMode() {
@@ -701,30 +698,30 @@ public:
}
bool inMips16Mode() const {
- return getSTI().getFeatureBits()[Mips::FeatureMips16];
+ return getSTI().hasFeature(Mips::FeatureMips16);
}
bool useTraps() const {
- return getSTI().getFeatureBits()[Mips::FeatureUseTCCInDIV];
+ return getSTI().hasFeature(Mips::FeatureUseTCCInDIV);
}
bool useSoftFloat() const {
- return getSTI().getFeatureBits()[Mips::FeatureSoftFloat];
+ return getSTI().hasFeature(Mips::FeatureSoftFloat);
}
bool hasMT() const {
- return getSTI().getFeatureBits()[Mips::FeatureMT];
+ return getSTI().hasFeature(Mips::FeatureMT);
}
bool hasCRC() const {
- return getSTI().getFeatureBits()[Mips::FeatureCRC];
+ return getSTI().hasFeature(Mips::FeatureCRC);
}
bool hasVirt() const {
- return getSTI().getFeatureBits()[Mips::FeatureVirt];
+ return getSTI().hasFeature(Mips::FeatureVirt);
}
bool hasGINV() const {
- return getSTI().getFeatureBits()[Mips::FeatureGINV];
+ return getSTI().hasFeature(Mips::FeatureGINV);
}
/// Warn if RegIndex is the same as the current AT.
@@ -2671,9 +2668,7 @@ bool MipsAsmParser::expandJalWithRegs(MCInst &Inst, SMLoc IDLoc,
/// Can the value be represented by a unsigned N-bit value and a shift left?
template <unsigned N> static bool isShiftedUIntAtAnyPosition(uint64_t x) {
- unsigned BitNum = findFirstSet(x);
-
- return (x == x >> BitNum << BitNum) && isUInt<N>(x >> BitNum);
+ return x && isUInt<N>(x >> llvm::countr_zero(x));
}
/// Load (or add) an immediate into a register.
@@ -2798,11 +2793,14 @@ bool MipsAsmParser::loadImmediate(int64_t ImmValue, unsigned DstReg,
return true;
}
+ // We've processed ImmValue satisfying isUInt<16> above, so ImmValue must be
+ // at least 17-bit wide here.
+ unsigned BitWidth = llvm::bit_width((uint64_t)ImmValue);
+ assert(BitWidth >= 17 && "ImmValue must be at least 17-bit wide");
+
// Traditionally, these immediates are shifted as little as possible and as
// such we align the most significant bit to bit 15 of our temporary.
- unsigned FirstSet = findFirstSet((uint64_t)ImmValue);
- unsigned LastSet = findLastSet((uint64_t)ImmValue);
- unsigned ShiftAmount = FirstSet - (15 - (LastSet - FirstSet));
+ unsigned ShiftAmount = BitWidth - 16;
uint16_t Bits = (ImmValue >> ShiftAmount) & 0xffff;
TOut.emitRRI(Mips::ORi, TmpReg, ZeroReg, Bits, IDLoc, STI);
TOut.emitRRI(Mips::DSLL, TmpReg, TmpReg, ShiftAmount, IDLoc, STI);
@@ -2923,7 +2921,7 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
(Res.getSymA()->getSymbol().isELF() &&
cast<MCSymbolELF>(Res.getSymA()->getSymbol()).getBinding() ==
ELF::STB_LOCAL);
- bool UseXGOT = STI->getFeatureBits()[Mips::FeatureXGOT] && !IsLocalSym;
+ bool UseXGOT = STI->hasFeature(Mips::FeatureXGOT) && !IsLocalSym;
// The case where the result register is $25 is somewhat special. If the
// symbol in the final relocation is external and not modified with a
@@ -3086,6 +3084,7 @@ bool MipsAsmParser::loadAndAddSymbolAddress(const MCExpr *SymExpr,
MipsMCExpr::create(MipsMCExpr::MEK_HIGHER, SymExpr, getContext());
bool RdRegIsRsReg =
+ UseSrcReg &&
getContext().getRegisterInfo()->isSuperOrSubRegisterEq(DstReg, SrcReg);
if (canUseATReg() && UseSrcReg && RdRegIsRsReg) {
@@ -3341,9 +3340,9 @@ static uint64_t convertIntToDoubleImm(uint64_t ImmOp64) {
static uint32_t covertDoubleImmToSingleImm(uint64_t ImmOp64) {
// Conversion of a double in an uint64_t to a float in a uint32_t,
// retaining the bit pattern of a float.
- double DoubleImm = BitsToDouble(ImmOp64);
+ double DoubleImm = llvm::bit_cast<double>(ImmOp64);
float TmpFloat = static_cast<float>(DoubleImm);
- return FloatToBits(TmpFloat);
+ return llvm::bit_cast<uint32_t>(TmpFloat);
}
bool MipsAsmParser::expandLoadSingleImmToGPR(MCInst &Inst, SMLoc IDLoc,
@@ -6331,13 +6330,13 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
- OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
- if (ResTy == MatchOperand_Success)
+ ParseStatus Res = MatchOperandParserImpl(Operands, Mnemonic);
+ if (Res.isSuccess())
return false;
// If there wasn't a custom match, try the generic matcher below. Otherwise,
// there was a match, but an error occurred, in which case, just return that
// the operand parsing failed.
- if (ResTy == MatchOperand_ParseFail)
+ if (Res.isFailure())
return true;
LLVM_DEBUG(dbgs() << ".. Generic Parser\n");
@@ -6352,7 +6351,7 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// for div, divu, and similar instructions because it is not an operand
// to the instruction definition but an explicit register. Special case
// this situation for now.
- if (parseAnyRegister(Operands) != MatchOperand_NoMatch)
+ if (!parseAnyRegister(Operands).isNoMatch())
return false;
// Maybe it is a symbol reference.
@@ -6363,10 +6362,10 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
MCSymbol *Sym = getContext().getOrCreateSymbol("$" + Identifier);
// Otherwise create a symbol reference.
- const MCExpr *Res =
+ const MCExpr *SymRef =
MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
- Operands.push_back(MipsOperand::CreateImm(Res, S, E, *this));
+ Operands.push_back(MipsOperand::CreateImm(SymRef, S, E, *this));
return false;
}
default: {
@@ -6395,8 +6394,8 @@ OperandMatchResultTy MipsAsmParser::tryParseRegister(MCRegister &RegNo,
SMLoc &StartLoc,
SMLoc &EndLoc) {
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
- OperandMatchResultTy ResTy = parseAnyRegister(Operands);
- if (ResTy == MatchOperand_Success) {
+ ParseStatus Res = parseAnyRegister(Operands);
+ if (Res.isSuccess()) {
assert(Operands.size() == 1);
MipsOperand &Operand = static_cast<MipsOperand &>(*Operands.front());
StartLoc = Operand.getStartLoc();
@@ -6427,14 +6426,13 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
return getParser().parseExpression(Res);
}
-OperandMatchResultTy
-MipsAsmParser::parseMemOperand(OperandVector &Operands) {
+ParseStatus MipsAsmParser::parseMemOperand(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
LLVM_DEBUG(dbgs() << "parseMemOperand\n");
const MCExpr *IdVal = nullptr;
SMLoc S;
bool isParenExpr = false;
- OperandMatchResultTy Res = MatchOperand_NoMatch;
+ ParseStatus Res = ParseStatus::NoMatch;
// First operand is the offset.
S = Parser.getTok().getLoc();
@@ -6445,7 +6443,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
if (getLexer().getKind() != AsmToken::Dollar) {
if (parseMemOffset(IdVal, isParenExpr))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
const AsmToken &Tok = Parser.getTok(); // Get the next token.
if (Tok.isNot(AsmToken::LParen)) {
@@ -6454,7 +6452,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
SMLoc E =
SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(MipsOperand::CreateImm(IdVal, S, E, *this));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
if (Tok.is(AsmToken::EndOfStatement)) {
SMLoc E =
@@ -6466,7 +6464,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
0, "0", getContext().getRegisterInfo(), S, E, *this);
Operands.push_back(
MipsOperand::CreateMem(std::move(Base), IdVal, S, E, *this));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
MCBinaryExpr::Opcode Opcode;
// GAS and LLVM treat comparison operators different. GAS will generate -1
@@ -6515,12 +6513,11 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
Parser.Lex();
break;
default:
- Error(Parser.getTok().getLoc(), "'(' or expression expected");
- return MatchOperand_ParseFail;
+ return Error(Parser.getTok().getLoc(), "'(' or expression expected");
}
const MCExpr * NextExpr;
if (getParser().parseExpression(NextExpr))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
IdVal = MCBinaryExpr::create(Opcode, IdVal, NextExpr, getContext());
}
@@ -6528,13 +6525,11 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
}
Res = parseAnyRegister(Operands);
- if (Res != MatchOperand_Success)
+ if (!Res.isSuccess())
return Res;
- if (Parser.getTok().isNot(AsmToken::RParen)) {
- Error(Parser.getTok().getLoc(), "')' expected");
- return MatchOperand_ParseFail;
- }
+ if (Parser.getTok().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "')' expected");
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
@@ -6560,7 +6555,7 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
}
Operands.push_back(MipsOperand::CreateMem(std::move(op), IdVal, S, E, *this));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
@@ -6576,14 +6571,14 @@ bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
const MCSymbolRefExpr *Ref = static_cast<const MCSymbolRefExpr *>(Expr);
StringRef DefSymbol = Ref->getSymbol().getName();
if (DefSymbol.startswith("$")) {
- OperandMatchResultTy ResTy =
+ ParseStatus Res =
matchAnyRegisterNameWithoutDollar(Operands, DefSymbol.substr(1), S);
- if (ResTy == MatchOperand_Success) {
+ if (Res.isSuccess()) {
Parser.Lex();
return true;
}
- if (ResTy == MatchOperand_ParseFail)
- llvm_unreachable("Should never ParseFail");
+ if (Res.isFailure())
+ llvm_unreachable("Should never fail");
}
}
} else if (Sym->isUnset()) {
@@ -6592,9 +6587,9 @@ bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
// Lookup in the aliases list.
auto Entry = RegisterSets.find(Sym->getName());
if (Entry != RegisterSets.end()) {
- OperandMatchResultTy ResTy =
+ ParseStatus Res =
matchAnyRegisterWithoutDollar(Operands, Entry->getValue(), S);
- if (ResTy == MatchOperand_Success) {
+ if (Res.isSuccess()) {
Parser.Lex();
return true;
}
@@ -6604,16 +6599,14 @@ bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
return false;
}
-OperandMatchResultTy
-MipsAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
- StringRef Identifier,
- SMLoc S) {
+ParseStatus MipsAsmParser::matchAnyRegisterNameWithoutDollar(
+ OperandVector &Operands, StringRef Identifier, SMLoc S) {
int Index = matchCPURegisterName(Identifier);
if (Index != -1) {
Operands.push_back(MipsOperand::createGPRReg(
Index, Identifier, getContext().getRegisterInfo(), S,
getLexer().getLoc(), *this));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
Index = matchHWRegsRegisterName(Identifier);
@@ -6621,7 +6614,7 @@ MipsAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
Operands.push_back(MipsOperand::createHWRegsReg(
Index, Identifier, getContext().getRegisterInfo(), S,
getLexer().getLoc(), *this));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
Index = matchFPURegisterName(Identifier);
@@ -6629,7 +6622,7 @@ MipsAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
Operands.push_back(MipsOperand::createFGRReg(
Index, Identifier, getContext().getRegisterInfo(), S,
getLexer().getLoc(), *this));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
Index = matchFCCRegisterName(Identifier);
@@ -6637,7 +6630,7 @@ MipsAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
Operands.push_back(MipsOperand::createFCCReg(
Index, Identifier, getContext().getRegisterInfo(), S,
getLexer().getLoc(), *this));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
Index = matchACRegisterName(Identifier);
@@ -6645,7 +6638,7 @@ MipsAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
Operands.push_back(MipsOperand::createACCReg(
Index, Identifier, getContext().getRegisterInfo(), S,
getLexer().getLoc(), *this));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
Index = matchMSA128RegisterName(Identifier);
@@ -6653,7 +6646,7 @@ MipsAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
Operands.push_back(MipsOperand::createMSA128Reg(
Index, Identifier, getContext().getRegisterInfo(), S,
getLexer().getLoc(), *this));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
Index = matchMSA128CtrlRegisterName(Identifier);
@@ -6661,22 +6654,21 @@ MipsAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
Operands.push_back(MipsOperand::createMSACtrlReg(
Index, Identifier, getContext().getRegisterInfo(), S,
getLexer().getLoc(), *this));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
-OperandMatchResultTy
+ParseStatus
MipsAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands,
const AsmToken &Token, SMLoc S) {
if (Token.is(AsmToken::Identifier)) {
LLVM_DEBUG(dbgs() << ".. identifier\n");
StringRef Identifier = Token.getIdentifier();
- OperandMatchResultTy ResTy =
- matchAnyRegisterNameWithoutDollar(Operands, Identifier, S);
- return ResTy;
- } else if (Token.is(AsmToken::Integer)) {
+ return matchAnyRegisterNameWithoutDollar(Operands, Identifier, S);
+ }
+ if (Token.is(AsmToken::Integer)) {
LLVM_DEBUG(dbgs() << ".. integer\n");
int64_t RegNum = Token.getIntVal();
if (RegNum < 0 || RegNum > 31) {
@@ -6688,22 +6680,21 @@ MipsAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands,
Operands.push_back(MipsOperand::createNumericReg(
RegNum, Token.getString(), getContext().getRegisterInfo(), S,
Token.getLoc(), *this));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
LLVM_DEBUG(dbgs() << Token.getKind() << "\n");
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
-OperandMatchResultTy
+ParseStatus
MipsAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
auto Token = getLexer().peekTok(false);
return matchAnyRegisterWithoutDollar(Operands, Token, S);
}
-OperandMatchResultTy
-MipsAsmParser::parseAnyRegister(OperandVector &Operands) {
+ParseStatus MipsAsmParser::parseAnyRegister(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
LLVM_DEBUG(dbgs() << "parseAnyRegister\n");
@@ -6715,46 +6706,44 @@ MipsAsmParser::parseAnyRegister(OperandVector &Operands) {
LLVM_DEBUG(dbgs() << ".. !$ -> try sym aliasing\n");
if (Token.is(AsmToken::Identifier)) {
if (searchSymbolAlias(Operands))
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
LLVM_DEBUG(dbgs() << ".. !symalias -> NoMatch\n");
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
LLVM_DEBUG(dbgs() << ".. $\n");
- OperandMatchResultTy ResTy = matchAnyRegisterWithoutDollar(Operands, S);
- if (ResTy == MatchOperand_Success) {
+ ParseStatus Res = matchAnyRegisterWithoutDollar(Operands, S);
+ if (Res.isSuccess()) {
Parser.Lex(); // $
Parser.Lex(); // identifier
}
- return ResTy;
+ return Res;
}
-OperandMatchResultTy
-MipsAsmParser::parseJumpTarget(OperandVector &Operands) {
+ParseStatus MipsAsmParser::parseJumpTarget(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
LLVM_DEBUG(dbgs() << "parseJumpTarget\n");
SMLoc S = getLexer().getLoc();
// Registers are a valid target and have priority over symbols.
- OperandMatchResultTy ResTy = parseAnyRegister(Operands);
- if (ResTy != MatchOperand_NoMatch)
- return ResTy;
+ ParseStatus Res = parseAnyRegister(Operands);
+ if (!Res.isNoMatch())
+ return Res;
// Integers and expressions are acceptable
const MCExpr *Expr = nullptr;
if (Parser.parseExpression(Expr)) {
// We have no way of knowing if a symbol was consumed so we must ParseFail
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
}
Operands.push_back(
MipsOperand::CreateImm(Expr, S, getLexer().getLoc(), *this));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-MipsAsmParser::parseInvNum(OperandVector &Operands) {
+ParseStatus MipsAsmParser::parseInvNum(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
const MCExpr *IdVal;
// If the first token is '$' we may have register operand. We have to reject
@@ -6764,23 +6753,22 @@ MipsAsmParser::parseInvNum(OperandVector &Operands) {
SMLoc S = Parser.getTok().getLoc();
if (Parser.getTok().is(AsmToken::Dollar)) {
return matchCPURegisterName(Parser.getLexer().peekTok().getString()) == -1
- ? MatchOperand_ParseFail
- : MatchOperand_NoMatch;
+ ? ParseStatus::Failure
+ : ParseStatus::NoMatch;
}
if (getParser().parseExpression(IdVal))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(IdVal);
if (!MCE)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
int64_t Val = MCE->getValue();
SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
Operands.push_back(MipsOperand::CreateImm(
MCConstantExpr::create(0 - Val, getContext()), S, E, *this));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-MipsAsmParser::parseRegisterList(OperandVector &Operands) {
+ParseStatus MipsAsmParser::parseRegisterList(OperandVector &Operands) {
MCAsmParser &Parser = getParser();
SmallVector<unsigned, 10> Regs;
unsigned RegNo;
@@ -6789,10 +6777,10 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> TmpOperands;
if (Parser.getTok().isNot(AsmToken::Dollar))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
SMLoc S = Parser.getTok().getLoc();
- while (parseAnyRegister(TmpOperands) == MatchOperand_Success) {
+ while (parseAnyRegister(TmpOperands).isSuccess()) {
SMLoc E = getLexer().getLoc();
MipsOperand &Reg = static_cast<MipsOperand &>(*TmpOperands.back());
RegNo = isGP64bit() ? Reg.getGPR64Reg() : Reg.getGPR32Reg();
@@ -6807,10 +6795,8 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
while (TmpReg <= RegNo) {
if ((((TmpReg < Mips::S0) || (TmpReg > Mips::S7)) && !isGP64bit()) ||
(((TmpReg < Mips::S0_64) || (TmpReg > Mips::S7_64)) &&
- isGP64bit())) {
- Error(E, "invalid register operand");
- return MatchOperand_ParseFail;
- }
+ isGP64bit()))
+ return Error(E, "invalid register operand");
PrevReg = TmpReg;
Regs.push_back(TmpReg++);
@@ -6821,24 +6807,19 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
} else {
if ((PrevReg == Mips::NoRegister) &&
((isGP64bit() && (RegNo != Mips::S0_64) && (RegNo != Mips::RA_64)) ||
- (!isGP64bit() && (RegNo != Mips::S0) && (RegNo != Mips::RA)))) {
- Error(E, "$16 or $31 expected");
- return MatchOperand_ParseFail;
- } else if (!(((RegNo == Mips::FP || RegNo == Mips::RA ||
- (RegNo >= Mips::S0 && RegNo <= Mips::S7)) &&
- !isGP64bit()) ||
- ((RegNo == Mips::FP_64 || RegNo == Mips::RA_64 ||
- (RegNo >= Mips::S0_64 && RegNo <= Mips::S7_64)) &&
- isGP64bit()))) {
- Error(E, "invalid register operand");
- return MatchOperand_ParseFail;
- } else if ((PrevReg != Mips::NoRegister) && (RegNo != PrevReg + 1) &&
- ((RegNo != Mips::FP && RegNo != Mips::RA && !isGP64bit()) ||
- (RegNo != Mips::FP_64 && RegNo != Mips::RA_64 &&
- isGP64bit()))) {
- Error(E, "consecutive register numbers expected");
- return MatchOperand_ParseFail;
- }
+ (!isGP64bit() && (RegNo != Mips::S0) && (RegNo != Mips::RA))))
+ return Error(E, "$16 or $31 expected");
+ if (!(((RegNo == Mips::FP || RegNo == Mips::RA ||
+ (RegNo >= Mips::S0 && RegNo <= Mips::S7)) &&
+ !isGP64bit()) ||
+ ((RegNo == Mips::FP_64 || RegNo == Mips::RA_64 ||
+ (RegNo >= Mips::S0_64 && RegNo <= Mips::S7_64)) &&
+ isGP64bit())))
+ return Error(E, "invalid register operand");
+ if ((PrevReg != Mips::NoRegister) && (RegNo != PrevReg + 1) &&
+ ((RegNo != Mips::FP && RegNo != Mips::RA && !isGP64bit()) ||
+ (RegNo != Mips::FP_64 && RegNo != Mips::RA_64 && isGP64bit())))
+ return Error(E, "consecutive register numbers expected");
Regs.push_back(RegNo);
}
@@ -6847,10 +6828,8 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
RegRange = true;
if (!Parser.getTok().isNot(AsmToken::Minus) &&
- !Parser.getTok().isNot(AsmToken::Comma)) {
- Error(E, "',' or '-' expected");
- return MatchOperand_ParseFail;
- }
+ !Parser.getTok().isNot(AsmToken::Comma))
+ return Error(E, "',' or '-' expected");
Lex(); // Consume comma or minus
if (Parser.getTok().isNot(AsmToken::Dollar))
@@ -6862,7 +6841,7 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
SMLoc E = Parser.getTok().getLoc();
Operands.push_back(MipsOperand::CreateRegList(Regs, S, E, *this));
parseMemOperand(Operands);
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
/// Sometimes (i.e. load/stores) the operand may be followed immediately by
@@ -7642,8 +7621,8 @@ bool MipsAsmParser::isPicAndNotNxxAbi() {
bool MipsAsmParser::parseDirectiveCpAdd(SMLoc Loc) {
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Reg;
- OperandMatchResultTy ResTy = parseAnyRegister(Reg);
- if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+ ParseStatus Res = parseAnyRegister(Reg);
+ if (Res.isNoMatch() || Res.isFailure()) {
reportParseError("expected register");
return false;
}
@@ -7675,8 +7654,8 @@ bool MipsAsmParser::parseDirectiveCpLoad(SMLoc Loc) {
}
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Reg;
- OperandMatchResultTy ResTy = parseAnyRegister(Reg);
- if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+ ParseStatus Res = parseAnyRegister(Reg);
+ if (Res.isNoMatch() || Res.isFailure()) {
reportParseError("expected register containing function address");
return false;
}
@@ -7704,8 +7683,8 @@ bool MipsAsmParser::parseDirectiveCpLocal(SMLoc Loc) {
}
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Reg;
- OperandMatchResultTy ResTy = parseAnyRegister(Reg);
- if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+ ParseStatus Res = parseAnyRegister(Reg);
+ if (Res.isNoMatch() || Res.isFailure()) {
reportParseError("expected register containing global pointer");
return false;
}
@@ -7783,8 +7762,8 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
bool SaveIsReg = true;
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> TmpReg;
- OperandMatchResultTy ResTy = parseAnyRegister(TmpReg);
- if (ResTy == MatchOperand_NoMatch) {
+ ParseStatus Res = parseAnyRegister(TmpReg);
+ if (Res.isNoMatch()) {
reportParseError("expected register containing function address");
return false;
}
@@ -7801,8 +7780,8 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
if (!eatComma("unexpected token, expected comma"))
return true;
- ResTy = parseAnyRegister(TmpReg);
- if (ResTy == MatchOperand_NoMatch) {
+ Res = parseAnyRegister(TmpReg);
+ if (Res.isNoMatch()) {
const MCExpr *OffsetExpr;
int64_t OffsetVal;
SMLoc ExprLoc = getLexer().getLoc();
@@ -8675,8 +8654,8 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".frame") {
// .frame $stack_reg, frame_size_in_bytes, $return_reg
SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> TmpReg;
- OperandMatchResultTy ResTy = parseAnyRegister(TmpReg);
- if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+ ParseStatus Res = parseAnyRegister(TmpReg);
+ if (Res.isNoMatch() || Res.isFailure()) {
reportParseError("expected stack register");
return false;
}
@@ -8719,8 +8698,8 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
// Parse the return register.
TmpReg.clear();
- ResTy = parseAnyRegister(TmpReg);
- if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) {
+ Res = parseAnyRegister(TmpReg);
+ if (Res.isNoMatch() || Res.isFailure()) {
reportParseError("expected return register");
return false;
}
diff --git a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index fb92590350c7..cda288a25aed 100644
--- a/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -44,26 +44,26 @@ class MipsDisassembler : public MCDisassembler {
public:
MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, bool IsBigEndian)
: MCDisassembler(STI, Ctx),
- IsMicroMips(STI.getFeatureBits()[Mips::FeatureMicroMips]),
+ IsMicroMips(STI.hasFeature(Mips::FeatureMicroMips)),
IsBigEndian(IsBigEndian) {}
- bool hasMips2() const { return STI.getFeatureBits()[Mips::FeatureMips2]; }
- bool hasMips3() const { return STI.getFeatureBits()[Mips::FeatureMips3]; }
- bool hasMips32() const { return STI.getFeatureBits()[Mips::FeatureMips32]; }
+ bool hasMips2() const { return STI.hasFeature(Mips::FeatureMips2); }
+ bool hasMips3() const { return STI.hasFeature(Mips::FeatureMips3); }
+ bool hasMips32() const { return STI.hasFeature(Mips::FeatureMips32); }
bool hasMips32r6() const {
- return STI.getFeatureBits()[Mips::FeatureMips32r6];
+ return STI.hasFeature(Mips::FeatureMips32r6);
}
- bool isFP64() const { return STI.getFeatureBits()[Mips::FeatureFP64Bit]; }
+ bool isFP64() const { return STI.hasFeature(Mips::FeatureFP64Bit); }
- bool isGP64() const { return STI.getFeatureBits()[Mips::FeatureGP64Bit]; }
+ bool isGP64() const { return STI.hasFeature(Mips::FeatureGP64Bit); }
- bool isPTR64() const { return STI.getFeatureBits()[Mips::FeaturePTR64Bit]; }
+ bool isPTR64() const { return STI.hasFeature(Mips::FeaturePTR64Bit); }
- bool hasCnMips() const { return STI.getFeatureBits()[Mips::FeatureCnMips]; }
+ bool hasCnMips() const { return STI.hasFeature(Mips::FeatureCnMips); }
- bool hasCnMipsP() const { return STI.getFeatureBits()[Mips::FeatureCnMipsP]; }
+ bool hasCnMipsP() const { return STI.hasFeature(Mips::FeatureCnMipsP); }
bool hasCOP3() const {
// Only present in MIPS-I and MIPS-II
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index 933077beb0c2..3e0d51ef887c 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -9,10 +9,10 @@
#include "MipsABIInfo.h"
#include "MipsRegisterInfo.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
index 046cc686b311..41f80771142d 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.h
@@ -9,9 +9,9 @@
#ifndef LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIINFO_H
#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSABIINFO_H
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
index 1172a06a3e08..749223a6d01b 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h
@@ -15,8 +15,8 @@
#define LLVM_LIB_TARGET_MIPS_MCTARGETDESC_MIPSASMBACKEND_H
#include "MCTargetDesc/MipsFixupKinds.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 9c85a39bc348..f89c78e75d3e 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -12,7 +12,7 @@
#include "MipsMCAsmInfo.h"
#include "MipsABIInfo.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 781f1097176d..1c7440dfbe91 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -116,11 +116,11 @@ void MipsMCCodeEmitter::LowerCompactBranch(MCInst& Inst) const {
}
bool MipsMCCodeEmitter::isMicroMips(const MCSubtargetInfo &STI) const {
- return STI.getFeatureBits()[Mips::FeatureMicroMips];
+ return STI.hasFeature(Mips::FeatureMicroMips);
}
bool MipsMCCodeEmitter::isMips32r6(const MCSubtargetInfo &STI) const {
- return STI.getFeatureBits()[Mips::FeatureMips32r6];
+ return STI.hasFeature(Mips::FeatureMips32r6);
}
void MipsMCCodeEmitter::EmitByte(unsigned char C, raw_ostream &OS) const {
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index 273dcdb0b429..d38b89f9a1f2 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -19,7 +19,6 @@
#include "MipsMCNaCl.h"
#include "MipsTargetStreamer.h"
#include "TargetInfo/MipsTargetInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCELFStreamer.h"
#include "llvm/MC/MCInstrAnalysis.h"
@@ -32,6 +31,7 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
index 9893c6055f81..d0aa14a1b724 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp
@@ -37,11 +37,11 @@ static cl::opt<bool> RoundSectionSizes(
} // end anonymous namespace
static bool isMicroMips(const MCSubtargetInfo *STI) {
- return STI->getFeatureBits()[Mips::FeatureMicroMips];
+ return STI->hasFeature(Mips::FeatureMicroMips);
}
static bool isMips32r6(const MCSubtargetInfo *STI) {
- return STI->getFeatureBits()[Mips::FeatureMips32r6];
+ return STI->hasFeature(Mips::FeatureMips32r6);
}
MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S)
diff --git a/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td b/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
index 8950de230a01..f7d0105f4d7d 100644
--- a/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
+++ b/llvm/lib/Target/Mips/MicroMipsDSPInstrInfo.td
@@ -374,7 +374,6 @@ class WRDSP_MM_DESC {
string AsmString = !strconcat("wrdsp", "\t$rt, $mask");
list<dag> Pattern = [(int_mips_wrdsp GPR32Opnd:$rt, timmZExt7:$mask)];
InstrItinClass Itinerary = NoItinerary;
- bit isMoveReg = 1;
}
class BPOSGE32C_MMR3_DESC {
diff --git a/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index ce04124a7b00..c8c9612b75e7 100644
--- a/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -47,16 +47,16 @@ Mips16DAGToDAGISel::selectMULT(SDNode *N, unsigned Opc, const SDLoc &DL, EVT Ty,
SDNode *Lo = nullptr, *Hi = nullptr;
SDNode *Mul = CurDAG->getMachineNode(Opc, DL, MVT::Glue, N->getOperand(0),
N->getOperand(1));
- SDValue InFlag = SDValue(Mul, 0);
+ SDValue InGlue = SDValue(Mul, 0);
if (HasLo) {
unsigned Opcode = Mips::Mflo16;
- Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InFlag);
- InFlag = SDValue(Lo, 1);
+ Lo = CurDAG->getMachineNode(Opcode, DL, Ty, MVT::Glue, InGlue);
+ InGlue = SDValue(Lo, 1);
}
if (HasHi) {
unsigned Opcode = Mips::Mfhi16;
- Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InFlag);
+ Hi = CurDAG->getMachineNode(Opcode, DL, Ty, InGlue);
}
return std::make_pair(Lo, Hi);
}
diff --git a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index bb527dca6b6d..20185e83286d 100644
--- a/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -340,8 +340,8 @@ unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm,
int Reg =0;
int SpReg = 0;
- rs.enterBasicBlock(MBB);
- rs.forward(II);
+ rs.enterBasicBlockEnd(MBB);
+ rs.backward(II);
//
// We need to know which registers can be used, in the case where there
// are not enough free registers. We exclude all registers that
@@ -351,8 +351,7 @@ unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm,
RI.getAllocatableSet
(*II->getParent()->getParent(), &Mips::CPU16RegsRegClass);
// Exclude all the registers being used by the instruction.
- for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = II->getOperand(i);
+ for (MachineOperand &MO : II->operands()) {
if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() &&
!MO.getReg().isVirtual())
Candidates.reset(MO.getReg());
@@ -367,8 +366,7 @@ unsigned Mips16InstrInfo::loadImmediate(unsigned FrameReg, int64_t Imm,
// whether the register is live before the instruction. if it's not
// then we don't need to save it in case there are no free registers.
int DefReg = 0;
- for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = II->getOperand(i);
+ for (MachineOperand &MO : II->operands()) {
if (MO.isReg() && MO.isDef()) {
DefReg = MO.getReg();
break;
diff --git a/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp b/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp
index 33da0ff31be8..ea4a25a62a0b 100644
--- a/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp
+++ b/llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp
@@ -43,7 +43,7 @@ void MipsAnalyzeImmediate::GetInstSeqLsORi(uint64_t Imm, unsigned RemSize,
void MipsAnalyzeImmediate::GetInstSeqLsSLL(uint64_t Imm, unsigned RemSize,
InstSeqLs &SeqLs) {
- unsigned Shamt = countTrailingZeros(Imm);
+ unsigned Shamt = llvm::countr_zero(Imm);
GetInstSeqLs(Imm >> Shamt, RemSize - Shamt, SeqLs);
AddInstr(SeqLs, Inst(SLL, Shamt));
}
diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index dd0b384ff53d..26df40e3b13c 100644
--- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -26,7 +26,6 @@
#include "TargetInfo/MipsTargetInfo.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -58,6 +57,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <cstdint>
#include <map>
@@ -777,14 +777,18 @@ void MipsAsmPrinter::emitStartOfAsmFile(Module &M) {
// around it by re-initializing the PIC state here.
TS.setPic(OutContext.getObjectFileInfo()->isPositionIndependent());
+ // Try to get target-features from the first function.
+ StringRef FS = TM.getTargetFeatureString();
+ Module::iterator F = M.begin();
+ if (FS.empty() && M.size() && F->hasFnAttribute("target-features"))
+ FS = F->getFnAttribute("target-features").getValueAsString();
+
// Compute MIPS architecture attributes based on the default subtarget
- // that we'd have constructed. Module level directives aren't LTO
- // clean anyhow.
+ // that we'd have constructed.
// FIXME: For ifunc related functions we could iterate over and look
// for a feature string that doesn't match the default one.
const Triple &TT = TM.getTargetTriple();
StringRef CPU = MIPS_MC::selectMipsCPU(TT, TM.getTargetCPU());
- StringRef FS = TM.getTargetFeatureString();
const MipsTargetMachine &MTM = static_cast<const MipsTargetMachine &>(TM);
const MipsSubtarget STI(TT, CPU, FS, MTM.isLittleEndian(), MTM, std::nullopt);
diff --git a/llvm/lib/Target/Mips/MipsCallLowering.cpp b/llvm/lib/Target/Mips/MipsCallLowering.cpp
index 044fad6d9e5c..4d6ca5ac2bcc 100644
--- a/llvm/lib/Target/Mips/MipsCallLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsCallLowering.cpp
@@ -412,7 +412,7 @@ bool MipsCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
int VaArgOffset;
unsigned RegSize = 4;
if (ArgRegs.size() == Idx)
- VaArgOffset = alignTo(CCInfo.getNextStackOffset(), RegSize);
+ VaArgOffset = alignTo(CCInfo.getStackSize(), RegSize);
else {
VaArgOffset =
(int)ABI.GetCalleeAllocdArgSizeInBytes(CCInfo.getCallingConv()) -
@@ -524,14 +524,14 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
if (!handleAssignments(ArgHandler, ArgInfos, CCInfo, ArgLocs, MIRBuilder))
return false;
- unsigned NextStackOffset = CCInfo.getNextStackOffset();
+ unsigned StackSize = CCInfo.getStackSize();
unsigned StackAlignment = F.getParent()->getOverrideStackAlignment();
if (!StackAlignment) {
const TargetFrameLowering *TFL = MF.getSubtarget().getFrameLowering();
StackAlignment = TFL->getStackAlignment();
}
- NextStackOffset = alignTo(NextStackOffset, StackAlignment);
- CallSeqStart.addImm(NextStackOffset).addImm(0);
+ StackSize = alignTo(StackSize, StackAlignment);
+ CallSeqStart.addImm(StackSize).addImm(0);
if (IsCalleeGlobalPIC) {
MIRBuilder.buildCopy(
@@ -570,7 +570,7 @@ bool MipsCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
}
- MIRBuilder.buildInstr(Mips::ADJCALLSTACKUP).addImm(NextStackOffset).addImm(0);
+ MIRBuilder.buildInstr(Mips::ADJCALLSTACKUP).addImm(StackSize).addImm(0);
return true;
}
diff --git a/llvm/lib/Target/Mips/MipsCombine.td b/llvm/lib/Target/Mips/MipsCombine.td
index 29550a15d38d..cb1594421cc5 100644
--- a/llvm/lib/Target/Mips/MipsCombine.td
+++ b/llvm/lib/Target/Mips/MipsCombine.td
@@ -8,8 +8,6 @@
include "llvm/Target/GlobalISel/Combine.td"
-def MipsPostLegalizerCombinerHelper: GICombinerHelper<
- "MipsGenPostLegalizerCombinerHelper", []> {
- let DisableRuleOption = "mipspostlegalizercombiner-disable-rule";
+def MipsPostLegalizerCombiner: GICombinerHelper<
+ "MipsPostLegalizerCombinerImpl", []> {
}
-
diff --git a/llvm/lib/Target/Mips/MipsDSPInstrInfo.td b/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
index dd0b48573ef6..9498cd015ba3 100644
--- a/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsDSPInstrInfo.td
@@ -452,7 +452,6 @@ class RDDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<dag> Pattern = [(set GPR32Opnd:$rd, (OpNode timmZExt10:$mask))];
InstrItinClass Itinerary = itin;
string BaseOpcode = instr_asm;
- bit isMoveReg = 1;
}
class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
@@ -463,7 +462,6 @@ class WRDSP_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
list<dag> Pattern = [(OpNode GPR32Opnd:$rs, timmZExt10:$mask)];
InstrItinClass Itinerary = itin;
string BaseOpcode = instr_asm;
- bit isMoveReg = 1;
}
class DPA_W_PH_DESC_BASE<string instr_asm, SDPatternOperator OpNode> {
diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 94053fa2eb7a..8aa5f769c903 100644
--- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -321,8 +321,7 @@ static void insertDelayFiller(Iter Filler, const BB2BrMap &BrMap) {
/// This function adds registers Filler defines to MBB's live-in register list.
static void addLiveInRegs(Iter Filler, MachineBasicBlock &MBB) {
- for (unsigned I = 0, E = Filler->getNumOperands(); I != E; ++I) {
- const MachineOperand &MO = Filler->getOperand(I);
+ for (const MachineOperand &MO : Filler->operands()) {
unsigned R;
if (!MO.isReg() || !MO.isDef() || !(R = MO.getReg()))
diff --git a/llvm/lib/Target/Mips/MipsFastISel.cpp b/llvm/lib/Target/Mips/MipsFastISel.cpp
index 7533c1273523..7fcf375aa10b 100644
--- a/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -35,6 +35,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -63,7 +64,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -1135,7 +1135,7 @@ bool MipsFastISel::processCallArgs(CallLoweringInfo &CLI,
CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
// Get a count of how many bytes are to be pushed on the stack.
- NumBytes = CCInfo.getNextStackOffset();
+ NumBytes = CCInfo.getStackSize();
// This is the minimum argument area used for A0-A3.
if (NumBytes < 16)
NumBytes = 16;
@@ -1356,7 +1356,7 @@ bool MipsFastISel::fastLowerArguments() {
EVT ArgVT = TLI.getValueType(DL, ArgTy);
LLVM_DEBUG(dbgs() << ".. " << FormalArg.getArgNo() << ": "
- << ArgVT.getEVTString() << "\n");
+ << ArgVT << "\n");
if (!ArgVT.isSimple()) {
LLVM_DEBUG(dbgs() << ".. .. gave up (not a simple type)\n");
return false;
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp
index 5d1fc69b24e8..18d7773067f1 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -40,6 +40,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -63,7 +64,6 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -102,29 +102,37 @@ MVT MipsTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
if (!VT.isVector())
return getRegisterType(Context, VT);
- return Subtarget.isABI_O32() || VT.getSizeInBits() == 32 ? MVT::i32
- : MVT::i64;
+ if (VT.isPow2VectorType() && VT.getVectorElementType().isRound())
+ return Subtarget.isABI_O32() || VT.getSizeInBits() == 32 ? MVT::i32
+ : MVT::i64;
+ return getRegisterType(Context, VT.getVectorElementType());
}
unsigned MipsTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
- if (VT.isVector())
- return divideCeil(VT.getSizeInBits(), Subtarget.isABI_O32() ? 32 : 64);
+ if (VT.isVector()) {
+ if (VT.isPow2VectorType() && VT.getVectorElementType().isRound())
+ return divideCeil(VT.getSizeInBits(), Subtarget.isABI_O32() ? 32 : 64);
+ return VT.getVectorNumElements() *
+ getNumRegisters(Context, VT.getVectorElementType());
+ }
return MipsTargetLowering::getNumRegisters(Context, VT);
}
unsigned MipsTargetLowering::getVectorTypeBreakdownForCallingConv(
LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
unsigned &NumIntermediates, MVT &RegisterVT) const {
- // Break down vector types to either 2 i64s or 4 i32s.
- RegisterVT = getRegisterTypeForCallingConv(Context, CC, VT);
- IntermediateVT = RegisterVT;
- NumIntermediates =
- VT.getFixedSizeInBits() < RegisterVT.getFixedSizeInBits()
- ? VT.getVectorNumElements()
- : divideCeil(VT.getSizeInBits(), RegisterVT.getSizeInBits());
- return NumIntermediates;
+ if (VT.isPow2VectorType()) {
+ IntermediateVT = getRegisterTypeForCallingConv(Context, CC, VT);
+ RegisterVT = IntermediateVT.getSimpleVT();
+ NumIntermediates = getNumRegistersForCallingConv(Context, CC, VT);
+ return NumIntermediates;
+ }
+ IntermediateVT = VT.getVectorElementType();
+ NumIntermediates = VT.getVectorNumElements();
+ RegisterVT = getRegisterType(Context, IntermediateVT);
+ return NumIntermediates * getNumRegisters(Context, IntermediateVT);
}
SDValue MipsTargetLowering::getGlobalReg(SelectionDAG &DAG, EVT Ty) const {
@@ -493,6 +501,11 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setLibcallName(RTLIB::MULO_I128, nullptr);
}
+ if (Subtarget.isGP64bit())
+ setMaxAtomicSizeInBitsSupported(64);
+ else
+ setMaxAtomicSizeInBitsSupported(32);
+
setMinFunctionAlignment(Subtarget.isGP64bit() ? Align(8) : Align(4));
// The arguments on the stack are defined in terms of 4-byte slots on O32
@@ -1015,16 +1028,11 @@ static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG,
// Initialize accumulator.
SDLoc DL(ROOTNode);
- SDValue TopHalf;
- SDValue BottomHalf;
- BottomHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand,
- CurDAG.getIntPtrConstant(0, DL));
-
- TopHalf = CurDAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, AddOperand,
- CurDAG.getIntPtrConstant(1, DL));
- SDValue ACCIn = CurDAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped,
- BottomHalf,
- TopHalf);
+ SDValue BottomHalf, TopHalf;
+ std::tie(BottomHalf, TopHalf) =
+ CurDAG.SplitScalar(AddOperand, DL, MVT::i32, MVT::i32);
+ SDValue ACCIn =
+ CurDAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, BottomHalf, TopHalf);
// Create MipsMAdd(u) / MipsMSub(u) node.
bool IsAdd = ROOTNode->getOpcode() == ISD::ADD;
@@ -3058,13 +3066,13 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emitted instructions must be
+ // The InGlue in necessary since all emitted instructions must be
// stuck together.
- SDValue InFlag;
+ SDValue InGlue;
for (auto &R : RegsToPass) {
- Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, R.first, R.second, InFlag);
- InFlag = Chain.getValue(1);
+ Chain = CLI.DAG.getCopyToReg(Chain, CLI.DL, R.first, R.second, InGlue);
+ InGlue = Chain.getValue(1);
}
// Add argument registers to the end of the list so that they are
@@ -3088,8 +3096,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
}
Ops.push_back(CLI.DAG.getRegisterMask(Mask));
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
}
void MipsTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
@@ -3219,7 +3227,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ES ? ES->getSymbol() : nullptr);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NextStackOffset = CCInfo.getNextStackOffset();
+ unsigned StackSize = CCInfo.getStackSize();
// Call site info for function parameters tracking.
MachineFunction::CallSiteInfo CSInfo;
@@ -3229,8 +3237,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool InternalLinkage = false;
if (IsTailCall) {
IsTailCall = isEligibleForTailCallOptimization(
- CCInfo, NextStackOffset, *MF.getInfo<MipsFunctionInfo>());
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ CCInfo, StackSize, *MF.getInfo<MipsFunctionInfo>());
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
InternalLinkage = G->getGlobal()->hasInternalLinkage();
IsTailCall &= (InternalLinkage || G->getGlobal()->hasLocalLinkage() ||
G->getGlobal()->hasPrivateLinkage() ||
@@ -3249,10 +3257,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// ByValChain is the output chain of the last Memcpy node created for copying
// byval arguments to the stack.
unsigned StackAlignment = TFL->getStackAlignment();
- NextStackOffset = alignTo(NextStackOffset, StackAlignment);
+ StackSize = alignTo(StackSize, StackAlignment);
if (!(IsTailCall || MemcpyInByVal))
- Chain = DAG.getCALLSEQ_START(Chain, NextStackOffset, 0, DL);
+ Chain = DAG.getCALLSEQ_START(Chain, StackSize, 0, DL);
SDValue StackPtr =
DAG.getCopyFromReg(Chain, DL, ABI.IsN64() ? Mips::SP_64 : Mips::SP,
@@ -3473,27 +3481,27 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, Ops);
- SDValue InFlag = Chain.getValue(1);
+ SDValue InGlue = Chain.getValue(1);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
// Create the CALLSEQ_END node in the case of where it is not a call to
// memcpy.
if (!(MemcpyInByVal)) {
- Chain = DAG.getCALLSEQ_END(Chain, NextStackOffset, 0, InFlag, DL);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, StackSize, 0, InGlue, DL);
+ InGlue = Chain.getValue(1);
}
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
+ return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, DL, DAG,
InVals, CLI);
}
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
SDValue MipsTargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg,
+ SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
TargetLowering::CallLoweringInfo &CLI) const {
@@ -3513,9 +3521,9 @@ SDValue MipsTargetLowering::LowerCallResult(
assert(VA.isRegLoc() && "Can only return in registers!");
SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(),
- RVLocs[i].getLocVT(), InFlag);
+ RVLocs[i].getLocVT(), InGlue);
Chain = Val.getValue(1);
- InFlag = Val.getValue(2);
+ InGlue = Val.getValue(2);
if (VA.isUpperBitsInLoc()) {
unsigned ValSizeInBits = Ins[i].ArgVT.getSizeInBits();
@@ -3645,7 +3653,7 @@ SDValue MipsTargetLowering::LowerFormalArguments(
"Functions with the interrupt attribute cannot have arguments!");
CCInfo.AnalyzeFormalArguments(Ins, CC_Mips_FixedArg);
- MipsFI->setFormalArgInfo(CCInfo.getNextStackOffset(),
+ MipsFI->setFormalArgInfo(CCInfo.getStackSize(),
CCInfo.getInRegsParamsCount() > 0);
unsigned CurArgIdx = 0;
@@ -3832,7 +3840,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Mips);
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
@@ -3878,10 +3886,10 @@ MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
DAG.getConstant(LocSizeInBits - ValSizeInBits, DL, VA.getLocVT()));
}
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
// Guarantee that all emitted copies are stuck together with flags.
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
@@ -3899,16 +3907,16 @@ MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(DAG.getDataLayout()));
unsigned V0 = ABI.IsN64() ? Mips::V0_64 : Mips::V0;
- Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(V0, getPointerTy(DAG.getDataLayout())));
}
RetOps[0] = Chain; // Update chain.
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ // Add the glue if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
// ISRs must use "eret".
if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt"))
@@ -4038,7 +4046,7 @@ static std::pair<bool, bool> parsePhysicalReg(StringRef C, StringRef &Prefix,
EVT MipsTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
ISD::NodeType) const {
bool Cond = !Subtarget.isABI_O32() && VT.getSizeInBits() == 32;
- EVT MinVT = getRegisterType(Context, Cond ? MVT::i64 : MVT::i32);
+ EVT MinVT = getRegisterType(Cond ? MVT::i64 : MVT::i32);
return VT.bitsLT(MinVT) ? MinVT : VT;
}
@@ -4507,7 +4515,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
int VaArgOffset;
if (ArgRegs.size() == Idx)
- VaArgOffset = alignTo(State.getNextStackOffset(), RegSizeInBytes);
+ VaArgOffset = alignTo(State.getStackSize(), RegSizeInBytes);
else {
VaArgOffset =
(int)ABI.GetCalleeAllocdArgSizeInBytes(State.getCallingConv()) -
diff --git a/llvm/lib/Target/Mips/MipsISelLowering.h b/llvm/lib/Target/Mips/MipsISelLowering.h
index 723be3b31dce..8614c4d3abe5 100644
--- a/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -28,7 +29,6 @@
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
#include <algorithm>
#include <cassert>
@@ -524,7 +524,7 @@ class TargetRegisterClass;
unsigned Flag) const;
// Lower Operand helpers
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
diff --git a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
index 04fe0960998d..4478a574a240 100644
--- a/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
+++ b/llvm/lib/Target/Mips/MipsInstructionSelector.cpp
@@ -15,7 +15,7 @@
#include "MipsMachineFunction.h"
#include "MipsRegisterBankInfo.h"
#include "MipsTargetMachine.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/IR/IntrinsicsMips.h"
diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
index 7ed504325dbf..2738a78e4a86 100644
--- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp
@@ -445,7 +445,7 @@ bool MipsLegalizerInfo::legalizeCustom(LegalizerHelper &Helper,
MIRBuilder.buildMergeLikeInstr(s64, {Src, C_HiMask.getReg(0)});
MachineInstrBuilder TwoP52FP = MIRBuilder.buildFConstant(
- s64, BitsToDouble(UINT64_C(0x4330000000000000)));
+ s64, llvm::bit_cast<double>(UINT64_C(0x4330000000000000)));
if (DstTy == s64)
MIRBuilder.buildFSub(Dst, Bitcast, TwoP52FP);
diff --git a/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp b/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
index 96fd9abfa19f..ef847adbebc1 100644
--- a/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -33,7 +34,6 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/RecyclingAllocator.h"
#include <cassert>
#include <utility>
diff --git a/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp b/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp
index c16869aeb637..4247bf9a4e3a 100644
--- a/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/Mips/MipsPostLegalizerCombiner.cpp
@@ -18,38 +18,84 @@
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Target/TargetMachine.h"
+#define GET_GICOMBINER_DEPS
+#include "MipsGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_DEPS
+
#define DEBUG_TYPE "mips-postlegalizer-combiner"
using namespace llvm;
using namespace MIPatternMatch;
-#define MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+namespace {
+#define GET_GICOMBINER_TYPES
#include "MipsGenPostLegalizeGICombiner.inc"
-#undef MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
+#undef GET_GICOMBINER_TYPES
-namespace {
-#define MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+class MipsPostLegalizerCombinerImpl : public GIMatchTableExecutor {
+protected:
+ CombinerHelper &Helper;
+ const MipsPostLegalizerCombinerImplRuleConfig &RuleConfig;
+
+ const MipsSubtarget &STI;
+ GISelChangeObserver &Observer;
+ MachineIRBuilder &B;
+ MachineFunction &MF;
+
+ MachineRegisterInfo &MRI;
+
+public:
+ MipsPostLegalizerCombinerImpl(
+ const MipsPostLegalizerCombinerImplRuleConfig &RuleConfig,
+ const MipsSubtarget &STI, GISelChangeObserver &Observer,
+ MachineIRBuilder &B, CombinerHelper &Helper);
+
+ static const char *getName() { return "MipsPostLegalizerCombiner"; }
+
+ bool tryCombineAll(MachineInstr &I) const;
+
+private:
+#define GET_GICOMBINER_CLASS_MEMBERS
+#include "MipsGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CLASS_MEMBERS
+};
+
+#define GET_GICOMBINER_IMPL
#include "MipsGenPostLegalizeGICombiner.inc"
-#undef MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
+#undef GET_GICOMBINER_IMPL
+
+MipsPostLegalizerCombinerImpl::MipsPostLegalizerCombinerImpl(
+ const MipsPostLegalizerCombinerImplRuleConfig &RuleConfig,
+ const MipsSubtarget &STI, GISelChangeObserver &Observer,
+ MachineIRBuilder &B, CombinerHelper &Helper)
+ : Helper(Helper), RuleConfig(RuleConfig), STI(STI), Observer(Observer),
+ B(B), MF(B.getMF()), MRI(*B.getMRI()),
+#define GET_GICOMBINER_CONSTRUCTOR_INITS
+#include "MipsGenPostLegalizeGICombiner.inc"
+#undef GET_GICOMBINER_CONSTRUCTOR_INITS
+{
+}
class MipsPostLegalizerCombinerInfo final : public CombinerInfo {
GISelKnownBits *KB;
public:
- MipsGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
+ MipsPostLegalizerCombinerImplRuleConfig RuleConfig;
MipsPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
GISelKnownBits *KB, const MipsLegalizerInfo *LI)
: CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
/*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
KB(KB) {
- if (!GeneratedRuleCfg.parseCommandLineOption())
+ if (!RuleConfig.parseCommandLineOption())
report_fatal_error("Invalid rule identifier");
}
@@ -60,17 +106,14 @@ public:
bool MipsPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
-
+ const auto &STI = MI.getMF()->getSubtarget<MipsSubtarget>();
CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false, KB,
/*DominatorTree*/ nullptr, LInfo);
- MipsGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper);
- return Generated.tryCombineAll(Observer, MI, B, Helper);
+ MipsPostLegalizerCombinerImpl Impl(RuleConfig, STI, Observer, B, Helper);
+ Impl.setupMF(*MI.getMF(), KB);
+ return Impl.tryCombineAll(MI);
}
-#define MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
-#include "MipsGenPostLegalizeGICombiner.inc"
-#undef MIPSPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
-
// Pass boilerplate
// ================
diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index e1fa03379776..3b12cb35b367 100644
--- a/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -239,11 +239,6 @@ getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-bool
-MipsRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
- return true;
-}
-
// FrameIndex represent objects inside a abstract stack.
// We must replace FrameIndex with an stack/frame pointer
// direct reference.
diff --git a/llvm/lib/Target/Mips/MipsRegisterInfo.h b/llvm/lib/Target/Mips/MipsRegisterInfo.h
index 7eaab8d1d206..1463304d35ce 100644
--- a/llvm/lib/Target/Mips/MipsRegisterInfo.h
+++ b/llvm/lib/Target/Mips/MipsRegisterInfo.h
@@ -56,8 +56,6 @@ public:
BitVector getReservedRegs(const MachineFunction &MF) const override;
- bool requiresRegisterScavenging(const MachineFunction &MF) const override;
-
/// Stack Frame Processing Methods
bool eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
@@ -72,6 +70,8 @@ public:
/// Return GPR register class.
virtual const TargetRegisterClass *intRegClass(unsigned Size) const = 0;
+ bool supportsBackwardScavenger() const override { return true; }
+
private:
virtual void eliminateFI(MachineBasicBlock::iterator II, unsigned OpNo,
int FrameIndex, uint64_t StackSize,
diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index df357506b34f..138735d44df6 100644
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -204,15 +204,15 @@ void MipsSEDAGToDAGISel::processFunctionAfterISel(MachineFunction &MF) {
}
void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const {
- SDValue InFlag = Node->getOperand(2);
- unsigned Opc = InFlag.getOpcode();
+ SDValue InGlue = Node->getOperand(2);
+ unsigned Opc = InGlue.getOpcode();
SDValue LHS = Node->getOperand(0), RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
// In the base case, we can rely on the carry bit from the addsc
// instruction.
if (Opc == ISD::ADDC) {
- SDValue Ops[3] = {LHS, RHS, InFlag};
+ SDValue Ops[3] = {LHS, RHS, InGlue};
CurDAG->SelectNodeTo(Node, Mips::ADDWC, VT, MVT::Glue, Ops);
return;
}
@@ -236,7 +236,7 @@ void MipsSEDAGToDAGISel::selectAddE(SDNode *Node, const SDLoc &DL) const {
SDValue OuFlag = CurDAG->getTargetConstant(20, DL, MVT::i32);
SDNode *DSPCtrlField = CurDAG->getMachineNode(Mips::RDDSP, DL, MVT::i32,
- MVT::Glue, CstOne, InFlag);
+ MVT::Glue, CstOne, InGlue);
SDNode *Carry = CurDAG->getMachineNode(
Mips::EXT, DL, MVT::i32, SDValue(DSPCtrlField, 0), OuFlag, CstOne);
@@ -670,8 +670,7 @@ bool MipsSEDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const {
// as the original value.
if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) {
- Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N),
- EltTy);
+ Imm = CurDAG->getTargetConstant(ImmValue.popcount() - 1, SDLoc(N), EltTy);
return true;
}
}
@@ -702,8 +701,7 @@ bool MipsSEDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
// Extract the run of set bits starting with bit zero, and test that the
// result is the same as the original value
if (ImmValue == (ImmValue & ~(ImmValue + 1))) {
- Imm = CurDAG->getTargetConstant(ImmValue.countPopulation() - 1, SDLoc(N),
- EltTy);
+ Imm = CurDAG->getTargetConstant(ImmValue.popcount() - 1, SDLoc(N), EltTy);
return true;
}
}
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 2891ff0ef223..b84f304373f6 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -27,6 +26,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -39,9 +39,9 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -1230,10 +1230,9 @@ SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op,
// Bitcast i64 to double.
if (Src == MVT::i64 && Dest == MVT::f64) {
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
- Op.getOperand(0), DAG.getIntPtrConstant(0, DL));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
- Op.getOperand(0), DAG.getIntPtrConstant(1, DL));
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) =
+ DAG.SplitScalar(Op.getOperand(0), DL, MVT::i32, MVT::i32);
return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
}
@@ -1277,10 +1276,8 @@ SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc,
}
static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) {
- SDValue InLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue InHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, In,
- DAG.getConstant(1, DL, MVT::i32));
+ SDValue InLo, InHi;
+ std::tie(InLo, InHi) = DAG.SplitScalar(In, DL, MVT::i32, MVT::i32);
return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi);
}
diff --git a/llvm/lib/Target/Mips/MipsSEISelLowering.h b/llvm/lib/Target/Mips/MipsSEISelLowering.h
index 9714e976a9d3..34f221e2fbf2 100644
--- a/llvm/lib/Target/Mips/MipsSEISelLowering.h
+++ b/llvm/lib/Target/Mips/MipsSEISelLowering.h
@@ -14,8 +14,8 @@
#define LLVM_LIB_TARGET_MIPS_MIPSSEISELLOWERING_H
#include "MipsISelLowering.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
-#include "llvm/Support/MachineValueType.h"
namespace llvm {
diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index f752ab2d2549..d76dc0143b23 100644
--- a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -200,44 +200,14 @@ static bool isORCopyInst(const MachineInstr &MI) {
return false;
}
-/// If @MI is WRDSP/RRDSP instruction return true with @isWrite set to true
-/// if it is WRDSP instruction.
-static bool isReadOrWriteToDSPReg(const MachineInstr &MI, bool &isWrite) {
- switch (MI.getOpcode()) {
- default:
- return false;
- case Mips::WRDSP:
- case Mips::WRDSP_MM:
- isWrite = true;
- break;
- case Mips::RDDSP:
- case Mips::RDDSP_MM:
- isWrite = false;
- break;
- }
- return true;
-}
-
/// We check for the common case of 'or', as it's MIPS' preferred instruction
/// for GPRs but we have to check the operands to ensure that is the case.
/// Other move instructions for MIPS are directly identifiable.
std::optional<DestSourcePair>
MipsSEInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
- bool isDSPControlWrite = false;
- // Condition is made to match the creation of WRDSP/RDDSP copy instruction
- // from copyPhysReg function.
- if (isReadOrWriteToDSPReg(MI, isDSPControlWrite)) {
- if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != (1 << 4))
- return std::nullopt;
- else if (isDSPControlWrite) {
- return DestSourcePair{MI.getOperand(2), MI.getOperand(0)};
-
- } else {
- return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
- }
- } else if (MI.isMoveReg() || isORCopyInst(MI)) {
+ if (MI.isMoveReg() || isORCopyInst(MI))
return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};
- }
+
return std::nullopt;
}
diff --git a/llvm/lib/Target/Mips/MipsSubtarget.cpp b/llvm/lib/Target/Mips/MipsSubtarget.cpp
index 10530cdafeed..323e611207a2 100644
--- a/llvm/lib/Target/Mips/MipsSubtarget.cpp
+++ b/llvm/lib/Target/Mips/MipsSubtarget.cpp
@@ -104,8 +104,7 @@ MipsSubtarget::MipsSubtarget(const Triple &TT, StringRef CPU, StringRef FS,
report_fatal_error("Code generation for MIPS-V is not implemented", false);
// Check if Architecture and ABI are compatible.
- assert(((!isGP64bit() && isABI_O32()) ||
- (isGP64bit() && (isABI_N32() || isABI_N64()))) &&
+ assert(((!isGP64bit() && isABI_O32()) || isGP64bit()) &&
"Invalid Arch & ABI pair.");
if (hasMSA() && !isFP64bit())
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 0f4a8176429f..07c56ac79a63 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -60,12 +60,6 @@ void NVPTXInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
case 6:
OS << "%fd";
break;
- case 7:
- OS << "%h";
- break;
- case 8:
- OS << "%hh";
- break;
}
unsigned VReg = Reg.id() & 0x0FFFFFFF;
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index 85ace96eeeaf..b453024ba372 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "NVPTXMCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index 3bd9a7f08f54..ec32a95dea90 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -38,14 +38,16 @@ enum CondCodes {
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
llvm::CodeGenOpt::Level OptLevel);
ModulePass *createNVPTXAssignValidGlobalNamesPass();
-ModulePass *createGenericToNVVMPass();
+ModulePass *createGenericToNVVMLegacyPass();
+ModulePass *createNVPTXCtorDtorLoweringLegacyPass();
FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
FunctionPass *createNVVMReflectPass(unsigned int SmVersion);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
-FunctionPass *createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM);
+FunctionPass *createNVPTXLowerArgsPass();
FunctionPass *createNVPTXLowerAllocaPass();
+FunctionPass *createNVPTXLowerUnreachablePass();
MachineFunctionPass *createNVPTXPeephole();
MachineFunctionPass *createNVPTXProxyRegErasurePass();
@@ -67,6 +69,10 @@ private:
unsigned SmVersion;
};
+struct GenericToNVVMPass : PassInfoMixin<GenericToNVVMPass> {
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
namespace NVPTX {
enum DrvInterface {
NVCL,
diff --git a/llvm/lib/Target/NVPTX/NVPTX.td b/llvm/lib/Target/NVPTX/NVPTX.td
index 4d4203c50376..02fa2a4ee81e 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/llvm/lib/Target/NVPTX/NVPTX.td
@@ -24,89 +24,24 @@ include "NVPTXInstrInfo.td"
// TableGen in NVPTXGenSubtarget.inc.
//===----------------------------------------------------------------------===//
-// SM Versions
-def SM20 : SubtargetFeature<"sm_20", "SmVersion", "20",
- "Target SM 2.0">;
-def SM21 : SubtargetFeature<"sm_21", "SmVersion", "21",
- "Target SM 2.1">;
-def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30",
- "Target SM 3.0">;
-def SM32 : SubtargetFeature<"sm_32", "SmVersion", "32",
- "Target SM 3.2">;
-def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35",
- "Target SM 3.5">;
-def SM37 : SubtargetFeature<"sm_37", "SmVersion", "37",
- "Target SM 3.7">;
-def SM50 : SubtargetFeature<"sm_50", "SmVersion", "50",
- "Target SM 5.0">;
-def SM52 : SubtargetFeature<"sm_52", "SmVersion", "52",
- "Target SM 5.2">;
-def SM53 : SubtargetFeature<"sm_53", "SmVersion", "53",
- "Target SM 5.3">;
-def SM60 : SubtargetFeature<"sm_60", "SmVersion", "60",
- "Target SM 6.0">;
-def SM61 : SubtargetFeature<"sm_61", "SmVersion", "61",
- "Target SM 6.1">;
-def SM62 : SubtargetFeature<"sm_62", "SmVersion", "62",
- "Target SM 6.2">;
-def SM70 : SubtargetFeature<"sm_70", "SmVersion", "70",
- "Target SM 7.0">;
-def SM72 : SubtargetFeature<"sm_72", "SmVersion", "72",
- "Target SM 7.2">;
-def SM75 : SubtargetFeature<"sm_75", "SmVersion", "75",
- "Target SM 7.5">;
-def SM80 : SubtargetFeature<"sm_80", "SmVersion", "80",
- "Target SM 8.0">;
-def SM86 : SubtargetFeature<"sm_86", "SmVersion", "86",
- "Target SM 8.6">;
-def SM87 : SubtargetFeature<"sm_87", "SmVersion", "87",
- "Target SM 8.7">;
-def SM89 : SubtargetFeature<"sm_89", "SmVersion", "89",
- "Target SM 8.9">;
-def SM90 : SubtargetFeature<"sm_90", "SmVersion", "90",
- "Target SM 9.0">;
+class FeatureSM<int version>:
+ SubtargetFeature<"sm_"# version, "SmVersion",
+ "" # version,
+ "Target SM " # version>;
+def SM90a: FeatureSM<90>;
-// PTX Versions
-def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
- "Use PTX version 3.2">;
-def PTX40 : SubtargetFeature<"ptx40", "PTXVersion", "40",
- "Use PTX version 4.0">;
-def PTX41 : SubtargetFeature<"ptx41", "PTXVersion", "41",
- "Use PTX version 4.1">;
-def PTX42 : SubtargetFeature<"ptx42", "PTXVersion", "42",
- "Use PTX version 4.2">;
-def PTX43 : SubtargetFeature<"ptx43", "PTXVersion", "43",
- "Use PTX version 4.3">;
-def PTX50 : SubtargetFeature<"ptx50", "PTXVersion", "50",
- "Use PTX version 5.0">;
-def PTX60 : SubtargetFeature<"ptx60", "PTXVersion", "60",
- "Use PTX version 6.0">;
-def PTX61 : SubtargetFeature<"ptx61", "PTXVersion", "61",
- "Use PTX version 6.1">;
-def PTX63 : SubtargetFeature<"ptx63", "PTXVersion", "63",
- "Use PTX version 6.3">;
-def PTX64 : SubtargetFeature<"ptx64", "PTXVersion", "64",
- "Use PTX version 6.4">;
-def PTX65 : SubtargetFeature<"ptx65", "PTXVersion", "65",
- "Use PTX version 6.5">;
-def PTX70 : SubtargetFeature<"ptx70", "PTXVersion", "70",
- "Use PTX version 7.0">;
-def PTX71 : SubtargetFeature<"ptx71", "PTXVersion", "71",
- "Use PTX version 7.1">;
-def PTX72 : SubtargetFeature<"ptx72", "PTXVersion", "72",
- "Use PTX version 7.2">;
-def PTX73 : SubtargetFeature<"ptx73", "PTXVersion", "73",
- "Use PTX version 7.3">;
-def PTX74 : SubtargetFeature<"ptx74", "PTXVersion", "74",
- "Use PTX version 7.4">;
-def PTX75 : SubtargetFeature<"ptx75", "PTXVersion", "75",
- "Use PTX version 7.5">;
-def PTX76 : SubtargetFeature<"ptx76", "PTXVersion", "76",
- "Use PTX version 7.6">;
-def PTX77 : SubtargetFeature<"ptx77", "PTXVersion", "77",
- "Use PTX version 7.7">;
-def PTX78 : SubtargetFeature<"ptx78", "PTXVersion", "78",
- "Use PTX version 7.8">;
+class FeaturePTX<int version>:
+ SubtargetFeature<"ptx"# version, "PTXVersion",
+ "" # version,
+ "Use PTX version " # version>;
+
+foreach version = [20, 21, 30, 32, 35, 37, 50, 52, 53,
+ 60, 61, 62, 70, 72, 75, 80, 86, 87, 89, 90] in
+ def SM#version: FeatureSM<version>;
+
+foreach version = [32, 40, 41, 42, 43, 50, 60, 61, 63, 64, 65,
+ 70, 71, 72, 73, 74, 75, 76, 77, 78, 80, 81] in
+ def PTX#version: FeaturePTX<version>;
//===----------------------------------------------------------------------===//
// NVPTX supported processors.
@@ -135,6 +70,7 @@ def : Proc<"sm_86", [SM86, PTX71]>;
def : Proc<"sm_87", [SM87, PTX74]>;
def : Proc<"sm_89", [SM89, PTX78]>;
def : Proc<"sm_90", [SM90, PTX78]>;
+def : Proc<"sm_90a", [SM90a, PTX80]>;
def NVPTXInstrInfo : InstrInfo {
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp
new file mode 100644
index 000000000000..b4f7e78cb107
--- /dev/null
+++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.cpp
@@ -0,0 +1,98 @@
+//===--------------------- NVPTXAliasAnalysis.cpp--------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the NVPTX address space based alias analysis pass.
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXAliasAnalysis.h"
+#include "MCTargetDesc/NVPTXBaseInfo.h"
+#include "NVPTX.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "NVPTX-aa"
+
+AnalysisKey NVPTXAA::Key;
+
+char NVPTXAAWrapperPass::ID = 0;
+char NVPTXExternalAAWrapper::ID = 0;
+
+INITIALIZE_PASS(NVPTXAAWrapperPass, "nvptx-aa",
+ "NVPTX Address space based Alias Analysis", false, true)
+
+INITIALIZE_PASS(NVPTXExternalAAWrapper, "nvptx-aa-wrapper",
+ "NVPTX Address space based Alias Analysis Wrapper", false, true)
+
+ImmutablePass *llvm::createNVPTXAAWrapperPass() {
+ return new NVPTXAAWrapperPass();
+}
+
+ImmutablePass *llvm::createNVPTXExternalAAWrapperPass() {
+ return new NVPTXExternalAAWrapper();
+}
+
+NVPTXAAWrapperPass::NVPTXAAWrapperPass() : ImmutablePass(ID) {
+ initializeNVPTXAAWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+void NVPTXAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+}
+
+static AliasResult::Kind getAliasResult(unsigned AS1, unsigned AS2) {
+ if ((AS1 == ADDRESS_SPACE_GENERIC) || (AS2 == ADDRESS_SPACE_GENERIC))
+ return AliasResult::MayAlias;
+
+ // PTX s6.4.1.1. Generic Addressing:
+ // A generic address maps to global memory unless it falls within
+ // the window for const, local, or shared memory. The Kernel
+ // Function Parameters (.param) window is contained within the
+ // .global window.
+ //
+ // Therefore a global pointer may alias with a param pointer on some
+ // GPUs via addrspacecast(param->generic->global) when cvta.param
+ // instruction is used (PTX 7.7+ and SM_70+).
+ //
+ // TODO: cvta.param is not yet supported. We need to change aliasing
+ // rules once it is added.
+
+ return (AS1 == AS2 ? AliasResult::MayAlias : AliasResult::NoAlias);
+}
+
+AliasResult NVPTXAAResult::alias(const MemoryLocation &Loc1,
+ const MemoryLocation &Loc2, AAQueryInfo &AAQI,
+ const Instruction *) {
+ unsigned AS1 = Loc1.Ptr->getType()->getPointerAddressSpace();
+ unsigned AS2 = Loc2.Ptr->getType()->getPointerAddressSpace();
+
+ return getAliasResult(AS1, AS2);
+}
+
+// TODO: .param address space may be writable in presence of cvta.param, but
+// this instruction is currently not supported. NVPTXLowerArgs also does not
+// allow any writes to .param pointers.
+static bool isConstOrParam(unsigned AS) {
+ return AS == AddressSpace::ADDRESS_SPACE_CONST ||
+ AS == AddressSpace::ADDRESS_SPACE_PARAM;
+}
+
+ModRefInfo NVPTXAAResult::getModRefInfoMask(const MemoryLocation &Loc,
+ AAQueryInfo &AAQI,
+ bool IgnoreLocals) {
+ if (isConstOrParam(Loc.Ptr->getType()->getPointerAddressSpace()))
+ return ModRefInfo::NoModRef;
+
+ const Value *Base = getUnderlyingObject(Loc.Ptr);
+ if (isConstOrParam(Base->getType()->getPointerAddressSpace()))
+ return ModRefInfo::NoModRef;
+
+ return AAResultBase::getModRefInfoMask(Loc, AAQI, IgnoreLocals);
+}
diff --git a/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
new file mode 100644
index 000000000000..2d204979eb6c
--- /dev/null
+++ b/llvm/lib/Target/NVPTX/NVPTXAliasAnalysis.h
@@ -0,0 +1,101 @@
+//===-------------------- NVPTXAliasAnalysis.h ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This is the NVPTX address space based alias analysis pass.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXALIASANALYSIS_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXALIASANALYSIS_H
+
+#include "llvm/Analysis/AliasAnalysis.h"
+
+namespace llvm {
+
+class MemoryLocation;
+
+class NVPTXAAResult : public AAResultBase {
+public:
+ NVPTXAAResult() {}
+ NVPTXAAResult(NVPTXAAResult &&Arg) : AAResultBase(std::move(Arg)) {}
+
+ /// Handle invalidation events from the new pass manager.
+ ///
+ /// By definition, this result is stateless and so remains valid.
+ bool invalidate(Function &, const PreservedAnalyses &,
+ FunctionAnalysisManager::Invalidator &Inv) {
+ return false;
+ }
+
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI, const Instruction *CtxI = nullptr);
+
+ ModRefInfo getModRefInfoMask(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool IgnoreLocals);
+};
+
+/// Analysis pass providing a never-invalidated alias analysis result.
+class NVPTXAA : public AnalysisInfoMixin<NVPTXAA> {
+ friend AnalysisInfoMixin<NVPTXAA>;
+
+ static AnalysisKey Key;
+
+public:
+ using Result = NVPTXAAResult;
+
+ NVPTXAAResult run(Function &F, AnalysisManager<Function> &AM) {
+ return NVPTXAAResult();
+ }
+};
+
+/// Legacy wrapper pass to provide the NVPTXAAResult object.
+class NVPTXAAWrapperPass : public ImmutablePass {
+ std::unique_ptr<NVPTXAAResult> Result;
+
+public:
+ static char ID;
+
+ NVPTXAAWrapperPass();
+
+ NVPTXAAResult &getResult() { return *Result; }
+ const NVPTXAAResult &getResult() const { return *Result; }
+
+ bool doInitialization(Module &M) override {
+ Result.reset(new NVPTXAAResult());
+ return false;
+ }
+
+ bool doFinalization(Module &M) override {
+ Result.reset();
+ return false;
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+};
+
+// Wrapper around ExternalAAWrapperPass so that the default
+// constructor gets the callback.
+class NVPTXExternalAAWrapper : public ExternalAAWrapperPass {
+public:
+ static char ID;
+
+ NVPTXExternalAAWrapper()
+ : ExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
+ if (auto *WrapperPass =
+ P.getAnalysisIfAvailable<NVPTXAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ }) {}
+};
+
+ImmutablePass *createNVPTXAAWrapperPass();
+void initializeNVPTXAAWrapperPassPass(PassRegistry &);
+ImmutablePass *createNVPTXExternalAAWrapperPass();
+void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_NVPTX_NVPTXALIASANALYSIS_H
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 9acad670aa7e..71b70766bf9e 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -33,7 +33,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/CodeGen/Analysis.h"
@@ -45,6 +44,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Attributes.h"
@@ -75,12 +75,12 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/NativeFormatting.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <cassert>
#include <cstdint>
@@ -92,6 +92,11 @@
using namespace llvm;
+static cl::opt<bool>
+ LowerCtorDtor("nvptx-lower-global-ctor-dtor",
+ cl::desc("Lower GPU ctor / dtors to globals on the device."),
+ cl::init(false), cl::Hidden);
+
#define DEPOTNAME "__local_depot"
/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
@@ -267,6 +272,10 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = MCOperand::createExpr(
NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext));
break;
+ case Type::BFloatTyID:
+ MCOp = MCOperand::createExpr(
+ NVPTXFloatMCExpr::createConstantBFPHalf(Val, OutContext));
+ break;
case Type::FloatTyID:
MCOp = MCOperand::createExpr(
NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext));
@@ -304,10 +313,6 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
Ret = (5 << 28);
} else if (RC == &NVPTX::Float64RegsRegClass) {
Ret = (6 << 28);
- } else if (RC == &NVPTX::Float16RegsRegClass) {
- Ret = (7 << 28);
- } else if (RC == &NVPTX::Float16x2RegsRegClass) {
- Ret = (8 << 28);
} else {
report_fatal_error("Bad register class");
}
@@ -329,6 +334,11 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
return MCOperand::createExpr(Expr);
}
+static bool ShouldPassAsArray(Type *Ty) {
+ return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) ||
+ Ty->isHalfTy() || Ty->isBFloatTy();
+}
+
void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
const DataLayout &DL = getDataLayout();
const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F);
@@ -340,11 +350,11 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
if (Ty->getTypeID() == Type::VoidTyID)
return;
-
O << " (";
if (isABI) {
- if (Ty->isFloatingPointTy() || (Ty->isIntegerTy() && !Ty->isIntegerTy(128))) {
+ if ((Ty->isFloatingPointTy() || Ty->isIntegerTy()) &&
+ !ShouldPassAsArray(Ty)) {
unsigned size = 0;
if (auto *ITy = dyn_cast<IntegerType>(Ty)) {
size = ITy->getBitWidth();
@@ -352,16 +362,12 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
assert(Ty->isFloatingPointTy() && "Floating point type expected here");
size = Ty->getPrimitiveSizeInBits();
}
- // PTX ABI requires all scalar return values to be at least 32
- // bits in size. fp16 normally uses .b16 as its storage type in
- // PTX, so its size must be adjusted here, too.
size = promoteScalarArgumentSize(size);
-
O << ".param .b" << size << " func_retval0";
} else if (isa<PointerType>(Ty)) {
O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
<< " func_retval0";
- } else if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
+ } else if (ShouldPassAsArray(Ty)) {
unsigned totalsz = DL.getTypeAllocSize(Ty);
unsigned retAlignment = 0;
if (!getAlign(*F, 0, retAlignment))
@@ -429,7 +435,7 @@ bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
if (MDNode *UnrollCountMD =
GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) {
if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1))
- ->getZExtValue() == 1)
+ ->isOne())
return true;
}
}
@@ -466,7 +472,8 @@ void NVPTXAsmPrinter::emitFunctionEntryLabel() {
CurrentFnSym->print(O, MAI);
- emitFunctionParamList(*MF, O);
+ emitFunctionParamList(F, O);
+ O << "\n";
if (isKernelFunction(*F))
emitKernelFunctionDirectives(*F, O);
@@ -617,6 +624,7 @@ void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
getSymbol(F)->print(O, MAI);
O << "\n";
emitFunctionParamList(F, O);
+ O << "\n";
if (shouldEmitPTXNoReturn(F, TM))
O << ".noreturn";
O << ";\n";
@@ -700,7 +708,7 @@ static bool useFuncSeen(const Constant *C,
const Function *caller = bb->getParent();
if (!caller)
continue;
- if (seenMap.find(caller) != seenMap.end())
+ if (seenMap.contains(caller))
return true;
}
}
@@ -753,7 +761,7 @@ void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) {
// If a caller has already been seen, then the caller is
// appearing in the module before the callee. so print out
// a declaration for the callee.
- if (seenMap.find(caller) != seenMap.end()) {
+ if (seenMap.contains(caller)) {
emitDeclaration(&F, O);
break;
}
@@ -784,16 +792,20 @@ void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) {
}
bool NVPTXAsmPrinter::doInitialization(Module &M) {
- if (M.alias_size()) {
- report_fatal_error("Module has aliases, which NVPTX does not support.");
- return true; // error
- }
- if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors"))) {
+ const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
+ const NVPTXSubtarget &STI =
+ *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl());
+ if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
+ report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
+
+ if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
+ !LowerCtorDtor) {
report_fatal_error(
"Module has a nontrivial global ctor, which NVPTX does not support.");
return true; // error
}
- if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors"))) {
+ if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
+ !LowerCtorDtor) {
report_fatal_error(
"Module has a nontrivial global dtor, which NVPTX does not support.");
return true; // error
@@ -826,8 +838,7 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) {
for (const GlobalVariable &I : M.globals())
VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting);
- assert(GVVisited.size() == M.getGlobalList().size() &&
- "Missed a global variable");
+ assert(GVVisited.size() == M.global_size() && "Missed a global variable");
assert(GVVisiting.size() == 0 && "Did not fully process a global variable");
const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM);
@@ -843,6 +854,34 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) {
OutStreamer->emitRawText(OS2.str());
}
+void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) {
+ SmallString<128> Str;
+ raw_svector_ostream OS(Str);
+
+ MCSymbol *Name = getSymbol(&GA);
+ const Function *F = dyn_cast<Function>(GA.getAliasee());
+ if (!F || isKernelFunction(*F))
+ report_fatal_error("NVPTX aliasee must be a non-kernel function");
+
+ if (GA.hasLinkOnceLinkage() || GA.hasWeakLinkage() ||
+ GA.hasAvailableExternallyLinkage() || GA.hasCommonLinkage())
+ report_fatal_error("NVPTX aliasee must not be '.weak'");
+
+ OS << "\n";
+ emitLinkageDirective(F, OS);
+ OS << ".func ";
+ printReturnValStr(F, OS);
+ OS << Name->getName();
+ emitFunctionParamList(F, OS);
+ if (shouldEmitPTXNoReturn(F, TM))
+ OS << "\n.noreturn";
+ OS << ";\n";
+
+ OS << ".alias " << Name->getName() << ", " << F->getName() << ";\n";
+
+ OutStreamer->emitRawText(OS.str());
+}
+
void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O,
const NVPTXSubtarget &STI) {
O << "//\n";
@@ -899,6 +938,16 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) {
GlobalsEmitted = true;
}
+ // If we have any aliases we emit them at the end.
+ SmallVector<GlobalAlias *> AliasesToRemove;
+ for (GlobalAlias &Alias : M.aliases()) {
+ emitGlobalAlias(M, Alias);
+ AliasesToRemove.push_back(&Alias);
+ }
+
+ for (GlobalAlias *A : AliasesToRemove)
+ A->eraseFromParent();
+
// call doFinalization
bool ret = AsmPrinter::doFinalization(M);
@@ -1149,7 +1198,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
}
}
} else {
- unsigned int ElementSize = 0;
+ uint64_t ElementSize = 0;
// Although PTX has direct support for struct type and array type and
// LLVM IR is very similar to PTX, the LLVM CodeGen does not support for
@@ -1353,8 +1402,10 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const {
}
break;
}
+ case Type::BFloatTyID:
case Type::HalfTyID:
- // fp16 is stored as .b16 for compatibility with pre-sm_53 PTX assembly.
+ // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53
+ // PTX assembly.
return "b16";
case Type::FloatTyID:
return "f32";
@@ -1442,12 +1493,6 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
}
}
-void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
- int paramIndex, raw_ostream &O) {
- getSymbol(I->getParent())->print(O, MAI);
- O << "_param_" << paramIndex;
-}
-
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
const DataLayout &DL = getDataLayout();
const AttributeList &PAL = F->getAttributes();
@@ -1462,7 +1507,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
bool hasImageHandles = STI.hasImageHandles();
if (F->arg_empty() && !F->isVarArg()) {
- O << "()\n";
+ O << "()";
return;
}
@@ -1486,24 +1531,21 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << "\t.param .u64 .ptr .surfref ";
else
O << "\t.param .surfref ";
- CurrentFnSym->print(O, MAI);
- O << "_param_" << paramIndex;
+ O << TLI->getParamName(F, paramIndex);
}
else { // Default image is read_only
if (hasImageHandles)
O << "\t.param .u64 .ptr .texref ";
else
O << "\t.param .texref ";
- CurrentFnSym->print(O, MAI);
- O << "_param_" << paramIndex;
+ O << TLI->getParamName(F, paramIndex);
}
} else {
if (hasImageHandles)
O << "\t.param .u64 .ptr .samplerref ";
else
O << "\t.param .samplerref ";
- CurrentFnSym->print(O, MAI);
- O << "_param_" << paramIndex;
+ O << TLI->getParamName(F, paramIndex);
}
continue;
}
@@ -1517,7 +1559,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
};
if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) {
- if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
+ if (ShouldPassAsArray(Ty)) {
// Just print .param .align <a> .b8 .param[size];
// <a> = optimal alignment for the element type; always multiple of
// PAL.getParamAlignment
@@ -1525,7 +1567,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
Align OptimalAlign = getOptimalAlignForParam(Ty);
O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
- printParamName(I, paramIndex, O);
+ O << TLI->getParamName(F, paramIndex);
O << "[" << DL.getTypeAllocSize(Ty) << "]";
continue;
@@ -1564,7 +1606,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
Align ParamAlign = I->getParamAlign().valueOrOne();
O << ".align " << ParamAlign.value() << " ";
}
- printParamName(I, paramIndex, O);
+ O << TLI->getParamName(F, paramIndex);
continue;
}
@@ -1576,7 +1618,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
else
O << getPTXFundamentalTypeStr(Ty);
O << " ";
- printParamName(I, paramIndex, O);
+ O << TLI->getParamName(F, paramIndex);
continue;
}
// Non-kernel function, just print .param .b<size> for ABI
@@ -1588,18 +1630,13 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
} else if (PTy) {
assert(PTySizeInBits && "Invalid pointer size");
sz = PTySizeInBits;
- } else if (Ty->isHalfTy())
- // PTX ABI requires all scalar parameters to be at least 32
- // bits in size. fp16 normally uses .b16 as its storage type
- // in PTX, so its size must be adjusted here, too.
- sz = 32;
- else
+ } else
sz = Ty->getPrimitiveSizeInBits();
if (isABI)
O << "\t.param .b" << sz << " ";
else
O << "\t.reg .b" << sz << " ";
- printParamName(I, paramIndex, O);
+ O << TLI->getParamName(F, paramIndex);
continue;
}
@@ -1620,7 +1657,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
unsigned sz = DL.getTypeAllocSize(ETy);
O << "\t.param .align " << OptimalAlign.value() << " .b8 ";
- printParamName(I, paramIndex, O);
+ O << TLI->getParamName(F, paramIndex);
O << "[" << sz << "]";
continue;
} else {
@@ -1643,7 +1680,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
if (elemtype.isInteger())
sz = promoteScalarArgumentSize(sz);
O << "\t.reg .b" << sz << " ";
- printParamName(I, paramIndex, O);
+ O << TLI->getParamName(F, paramIndex);
if (j < je - 1)
O << ",\n";
++paramIndex;
@@ -1661,17 +1698,10 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << ",\n";
O << "\t.param .align " << STI.getMaxRequiredAlignment();
O << " .b8 ";
- getSymbol(F)->print(O, MAI);
- O << "_vararg[]";
+ O << TLI->getParamName(F, /* vararg */ -1) << "[]";
}
- O << "\n)\n";
-}
-
-void NVPTXAsmPrinter::emitFunctionParamList(const MachineFunction &MF,
- raw_ostream &O) {
- const Function &F = MF.getFunction();
- emitFunctionParamList(&F, O);
+ O << "\n)";
}
void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 710c089e3325..2bd40116e63c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -173,14 +173,12 @@ private:
const char *Modifier = nullptr);
void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O,
bool processDemoted, const NVPTXSubtarget &STI);
- void printParamName(Function::const_arg_iterator I, int paramIndex,
- raw_ostream &O);
void emitGlobals(const Module &M);
+ void emitGlobalAlias(const Module &M, const GlobalAlias &GA);
void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI);
void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
void emitVirtualRegister(unsigned int vr, raw_ostream &);
void emitFunctionParamList(const Function *, raw_ostream &O);
- void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
void printReturnValStr(const Function *, raw_ostream &O);
void printReturnValStr(const MachineFunction &MF, raw_ostream &O);
diff --git a/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp b/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
index 34b9dfe87cc2..789995743861 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAssignValidGlobalNames.cpp
@@ -73,7 +73,7 @@ std::string NVPTXAssignValidGlobalNames::cleanUpName(StringRef Name) {
std::string ValidName;
raw_string_ostream ValidNameStream(ValidName);
for (char C : Name) {
- if (C == '.' || C == '@') {
+ if (C == '.' || C == '@' || C == '<' || C == '>') {
ValidNameStream << "_$_";
} else {
ValidNameStream << C;
diff --git a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
new file mode 100644
index 000000000000..ed7839cafe3a
--- /dev/null
+++ b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.cpp
@@ -0,0 +1,117 @@
+//===-- NVPTXCtorDtorLowering.cpp - Handle global ctors and dtors --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This pass creates a unified init and fini kernel with the required metadata
+//===----------------------------------------------------------------------===//
+
+#include "NVPTXCtorDtorLowering.h"
+#include "NVPTX.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "nvptx-lower-ctor-dtor"
+
+static cl::opt<std::string>
+ GlobalStr("nvptx-lower-global-ctor-dtor-id",
+ cl::desc("Override unique ID of ctor/dtor globals."),
+ cl::init(""), cl::Hidden);
+
+namespace {
+
+static std::string getHash(StringRef Str) {
+ llvm::MD5 Hasher;
+ llvm::MD5::MD5Result Hash;
+ Hasher.update(Str);
+ Hasher.final(Hash);
+ return llvm::utohexstr(Hash.low(), /*LowerCase=*/true);
+}
+
+static bool createInitOrFiniGlobls(Module &M, StringRef GlobalName,
+ bool IsCtor) {
+ GlobalVariable *GV = M.getGlobalVariable(GlobalName);
+ if (!GV || !GV->hasInitializer())
+ return false;
+ ConstantArray *GA = dyn_cast<ConstantArray>(GV->getInitializer());
+ if (!GA || GA->getNumOperands() == 0)
+ return false;
+
+ // NVPTX has no way to emit variables at specific sections or support for
+ // the traditional constructor sections. Instead, we emit mangled global
+ // names so the runtime can build the list manually.
+ for (Value *V : GA->operands()) {
+ auto *CS = cast<ConstantStruct>(V);
+ auto *F = cast<Constant>(CS->getOperand(1));
+ uint64_t Priority = cast<ConstantInt>(CS->getOperand(0))->getSExtValue();
+ std::string PriorityStr = "." + std::to_string(Priority);
+ // We append a semi-unique hash and the priority to the global name.
+ std::string GlobalID =
+ !GlobalStr.empty() ? GlobalStr : getHash(M.getSourceFileName());
+ std::string NameStr =
+ ((IsCtor ? "__init_array_object_" : "__fini_array_object_") +
+ F->getName() + "_" + GlobalID + "_" + std::to_string(Priority))
+ .str();
+ // PTX does not support exported names with '.' in them.
+ llvm::transform(NameStr, NameStr.begin(),
+ [](char c) { return c == '.' ? '_' : c; });
+
+ auto *GV = new GlobalVariable(M, F->getType(), /*IsConstant=*/true,
+ GlobalValue::ExternalLinkage, F, NameStr,
+ nullptr, GlobalValue::NotThreadLocal,
+ /*AddressSpace=*/4);
+ // This isn't respected by Nvidia, simply put here for clarity.
+ GV->setSection(IsCtor ? ".init_array" + PriorityStr
+ : ".fini_array" + PriorityStr);
+ GV->setVisibility(GlobalVariable::ProtectedVisibility);
+ appendToUsed(M, {GV});
+ }
+
+ GV->eraseFromParent();
+ return true;
+}
+
+static bool lowerCtorsAndDtors(Module &M) {
+ bool Modified = false;
+ Modified |= createInitOrFiniGlobls(M, "llvm.global_ctors", /*IsCtor =*/true);
+ Modified |= createInitOrFiniGlobls(M, "llvm.global_dtors", /*IsCtor =*/false);
+ return Modified;
+}
+
+class NVPTXCtorDtorLoweringLegacy final : public ModulePass {
+public:
+ static char ID;
+ NVPTXCtorDtorLoweringLegacy() : ModulePass(ID) {}
+ bool runOnModule(Module &M) override { return lowerCtorsAndDtors(M); }
+};
+
+} // End anonymous namespace
+
+PreservedAnalyses NVPTXCtorDtorLoweringPass::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ return lowerCtorsAndDtors(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
+
+char NVPTXCtorDtorLoweringLegacy::ID = 0;
+char &llvm::NVPTXCtorDtorLoweringLegacyPassID = NVPTXCtorDtorLoweringLegacy::ID;
+INITIALIZE_PASS(NVPTXCtorDtorLoweringLegacy, DEBUG_TYPE,
+ "Lower ctors and dtors for NVPTX", false, false)
+
+ModulePass *llvm::createNVPTXCtorDtorLoweringLegacyPass() {
+ return new NVPTXCtorDtorLoweringLegacy();
+}
diff --git a/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h
new file mode 100644
index 000000000000..c03fe97f1a78
--- /dev/null
+++ b/llvm/lib/Target/NVPTX/NVPTXCtorDtorLowering.h
@@ -0,0 +1,30 @@
+//===-- NVPTXCtorDtorLowering.h --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H
+#define LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Module;
+class PassRegistry;
+
+extern char &NVPTXCtorDtorLoweringLegacyPassID;
+extern void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
+
+/// Lower llvm.global_ctors and llvm.global_dtors to special kernels.
+class NVPTXCtorDtorLoweringPass
+ : public PassInfoMixin<NVPTXCtorDtorLoweringPass> {
+public:
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_LIB_TARGET_NVPTX_NVPTXCTORDTORLOWERING_H
diff --git a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index d892023c6cb7..4f03e474edb4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -29,19 +29,13 @@
using namespace llvm;
namespace llvm {
-void initializeGenericToNVVMPass(PassRegistry &);
+void initializeGenericToNVVMLegacyPassPass(PassRegistry &);
}
namespace {
-class GenericToNVVM : public ModulePass {
+class GenericToNVVM {
public:
- static char ID;
-
- GenericToNVVM() : ModulePass(ID) {}
-
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {}
+ bool runOnModule(Module &M);
private:
Value *remapConstant(Module *M, Function *F, Constant *C,
@@ -59,15 +53,6 @@ private:
};
} // end namespace
-char GenericToNVVM::ID = 0;
-
-ModulePass *llvm::createGenericToNVVMPass() { return new GenericToNVVM(); }
-
-INITIALIZE_PASS(
- GenericToNVVM, "generic-to-nvvm",
- "Ensure that the global variables are in the global address space", false,
- false)
-
bool GenericToNVVM::runOnModule(Module &M) {
// Create a clone of each global variable that has the default address space.
// The clone is created with the global address space specifier, and the pair
@@ -293,3 +278,34 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
llvm_unreachable("GenericToNVVM encountered an unsupported ConstantExpr");
}
}
+
+namespace {
+class GenericToNVVMLegacyPass : public ModulePass {
+public:
+ static char ID;
+
+ GenericToNVVMLegacyPass() : ModulePass(ID) {}
+
+ bool runOnModule(Module &M) override;
+};
+} // namespace
+
+char GenericToNVVMLegacyPass::ID = 0;
+
+ModulePass *llvm::createGenericToNVVMLegacyPass() {
+ return new GenericToNVVMLegacyPass();
+}
+
+INITIALIZE_PASS(
+ GenericToNVVMLegacyPass, "generic-to-nvvm",
+ "Ensure that the global variables are in the global address space", false,
+ false)
+
+bool GenericToNVVMLegacyPass::runOnModule(Module &M) {
+ return GenericToNVVM().runOnModule(M);
+}
+
+PreservedAnalyses GenericToNVVMPass::run(Module &M, ModuleAnalysisManager &AM) {
+ return GenericToNVVM().runOnModule(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index a18787196bb5..99a7fdb9d1e2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -500,7 +500,7 @@ void NVPTXDAGToDAGISel::Select(SDNode *N) {
SelectAddrSpaceCast(N);
return;
case ISD::ConstantFP:
- if (tryConstantFP16(N))
+ if (tryConstantFP(N))
return;
break;
default:
@@ -524,15 +524,17 @@ bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
}
}
-// There's no way to specify FP16 immediates in .f16 ops, so we have to
-// load them into an .f16 register first.
-bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
- if (N->getValueType(0) != MVT::f16)
+// There's no way to specify FP16 and BF16 immediates in .(b)f16 ops, so we
+// have to load them into an .(b)f16 register first.
+bool NVPTXDAGToDAGISel::tryConstantFP(SDNode *N) {
+ if (N->getValueType(0) != MVT::f16 && N->getValueType(0) != MVT::bf16)
return false;
SDValue Val = CurDAG->getTargetConstantFP(
- cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
- SDNode *LoadConstF16 =
- CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
+ cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), N->getValueType(0));
+ SDNode *LoadConstF16 = CurDAG->getMachineNode(
+ (N->getValueType(0) == MVT::f16 ? NVPTX::LOAD_CONST_F16
+ : NVPTX::LOAD_CONST_BF16),
+ SDLoc(N), N->getValueType(0), Val);
ReplaceNode(N, LoadConstF16);
return true;
}
@@ -612,9 +614,9 @@ bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
// We only care about f16x2 as it's the only real vector type we
// need to deal with.
- if (Vector.getSimpleValueType() != MVT::v2f16)
+ MVT VT = Vector.getSimpleValueType();
+ if (!(VT == MVT::v2f16 || VT == MVT::v2bf16))
return false;
-
// Find and record all uses of this vector that extract element 0 or 1.
SmallVector<SDNode *, 4> E0, E1;
for (auto *U : Vector.getNode()->uses()) {
@@ -638,18 +640,11 @@ bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
if (E0.empty() || E1.empty())
return false;
- unsigned Op = NVPTX::SplitF16x2;
- // If the vector has been BITCAST'ed from i32, we can use original
- // value directly and avoid register-to-register move.
- SDValue Source = Vector;
- if (Vector->getOpcode() == ISD::BITCAST) {
- Op = NVPTX::SplitI32toF16x2;
- Source = Vector->getOperand(0);
- }
// Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
// into f16,f16 SplitF16x2(V)
+ MVT EltVT = VT.getVectorElementType();
SDNode *ScatterOp =
- CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
+ CurDAG->getMachineNode(NVPTX::I32toV2I16, SDLoc(N), EltVT, EltVT, Vector);
for (auto *Node : E0)
ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
for (auto *Node : E1)
@@ -816,8 +811,7 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
static std::optional<unsigned>
pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8,
unsigned Opcode_i16, unsigned Opcode_i32,
- std::optional<unsigned> Opcode_i64, unsigned Opcode_f16,
- unsigned Opcode_f16x2, unsigned Opcode_f32,
+ std::optional<unsigned> Opcode_i64, unsigned Opcode_f32,
std::optional<unsigned> Opcode_f64) {
switch (VT) {
case MVT::i1:
@@ -831,10 +825,10 @@ pickOpcodeForVT(MVT::SimpleValueType VT, unsigned Opcode_i8,
return Opcode_i64;
case MVT::f16:
case MVT::bf16:
- return Opcode_f16;
+ return Opcode_i16;
case MVT::v2f16:
case MVT::v2bf16:
- return Opcode_f16x2;
+ return Opcode_i32;
case MVT::f32:
return Opcode_f32;
case MVT::f64:
@@ -935,10 +929,9 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
if (SelectDirectAddr(N1, Addr)) {
- Opcode = pickOpcodeForVT(
- TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
- NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
- NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
+ Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar,
+ NVPTX::LD_i32_avar, NVPTX::LD_i64_avar,
+ NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
if (!Opcode)
return false;
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
@@ -948,9 +941,8 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
} else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
: SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
- NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
- NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
- NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
+ NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
+ NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
if (!Opcode)
return false;
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
@@ -960,15 +952,14 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
} else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
: SelectADDRri(N1.getNode(), N1, Base, Offset)) {
if (PointerSize == 64)
- Opcode = pickOpcodeForVT(
- TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
- NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
- NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
+ Opcode =
+ pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
+ NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64,
+ NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
else
- Opcode = pickOpcodeForVT(
- TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
- NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
- NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
+ Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari,
+ NVPTX::LD_i32_ari, NVPTX::LD_i64_ari,
+ NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
if (!Opcode)
return false;
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
@@ -977,16 +968,14 @@ bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
NVPTXLD = CurDAG->getMachineNode(*Opcode, dl, TargetVT, MVT::Other, Ops);
} else {
if (PointerSize == 64)
- Opcode = pickOpcodeForVT(
- TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
- NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
- NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
- NVPTX::LD_f64_areg_64);
+ Opcode =
+ pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
+ NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64,
+ NVPTX::LD_f32_areg_64, NVPTX::LD_f64_areg_64);
else
- Opcode = pickOpcodeForVT(
- TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
- NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
- NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
+ Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg,
+ NVPTX::LD_i32_areg, NVPTX::LD_i64_areg,
+ NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
if (!Opcode)
return false;
SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
@@ -1090,15 +1079,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
- NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
break;
case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::LDV_i8_v4_avar, NVPTX::LDV_i16_v4_avar,
- NVPTX::LDV_i32_v4_avar, std::nullopt,
- NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
- NVPTX::LDV_f32_v4_avar, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
+ NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar,
+ std::nullopt, NVPTX::LDV_f32_v4_avar, std::nullopt);
break;
}
if (!Opcode)
@@ -1117,15 +1104,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
- NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
break;
case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::LDV_i8_v4_asi, NVPTX::LDV_i16_v4_asi,
- NVPTX::LDV_i32_v4_asi, std::nullopt,
- NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
- NVPTX::LDV_f32_v4_asi, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
+ NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi,
+ std::nullopt, NVPTX::LDV_f32_v4_asi, std::nullopt);
break;
}
if (!Opcode)
@@ -1142,18 +1127,16 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
default:
return false;
case NVPTXISD::LoadV2:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
- NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
- NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
- NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
- NVPTX::LDV_f64_v2_ari_64);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::LDV_i8_v2_ari_64, NVPTX::LDV_i16_v2_ari_64,
+ NVPTX::LDV_i32_v2_ari_64, NVPTX::LDV_i64_v2_ari_64,
+ NVPTX::LDV_f32_v2_ari_64, NVPTX::LDV_f64_v2_ari_64);
break;
case NVPTXISD::LoadV4:
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, std::nullopt,
- NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
NVPTX::LDV_f32_v4_ari_64, std::nullopt);
break;
}
@@ -1165,15 +1148,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
- NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
break;
case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::LDV_i8_v4_ari, NVPTX::LDV_i16_v4_ari,
- NVPTX::LDV_i32_v4_ari, std::nullopt,
- NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
- NVPTX::LDV_f32_v4_ari, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
+ NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari,
+ std::nullopt, NVPTX::LDV_f32_v4_ari, std::nullopt);
break;
}
}
@@ -1193,15 +1174,13 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
- NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
- NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
+ NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
NVPTX::LDV_f64_v2_areg_64);
break;
case NVPTXISD::LoadV4:
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, std::nullopt,
- NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
NVPTX::LDV_f32_v4_areg_64, std::nullopt);
break;
}
@@ -1213,16 +1192,14 @@ bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Opcode =
pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
- NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
- NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
+ NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f32_v2_areg,
NVPTX::LDV_f64_v2_areg);
break;
case NVPTXISD::LoadV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
- NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, std::nullopt,
- NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
- NVPTX::LDV_f32_v4_areg, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
+ NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg,
+ std::nullopt, NVPTX::LDV_f32_v4_areg, std::nullopt);
break;
}
}
@@ -1284,10 +1261,11 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NumElts = EltVT.getVectorNumElements();
EltVT = EltVT.getVectorElementType();
// vectors of f16 are loaded/stored as multiples of v2f16 elements.
- if (EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) {
- assert(NumElts % 2 == 0 && "Vector must have even number of elements");
- EltVT = MVT::v2f16;
- NumElts /= 2;
+ if ((EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) ||
+ (EltVT == MVT::bf16 && N->getValueType(0) == MVT::v2bf16)) {
+ assert(NumElts % 2 == 0 && "Vector must have even number of elements");
+ EltVT = N->getValueType(0);
+ NumElts /= 2;
}
}
@@ -1310,47 +1288,39 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG)
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
- NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
- NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
- NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
- NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
- NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
- NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
- NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
+ NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
else
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
- NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
- NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
- NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
- NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
- NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
- NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
- NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
+ NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
break;
case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
break;
case NVPTXISD::LDUV2:
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
break;
case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
@@ -1358,8 +1328,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
- NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, std::nullopt);
break;
case NVPTXISD::LDUV4:
@@ -1367,8 +1335,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
- NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, std::nullopt);
break;
}
@@ -1390,8 +1356,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
- NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
- NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
else
@@ -1400,8 +1364,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
- NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
- NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
break;
@@ -1412,8 +1374,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
- NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
- NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
break;
@@ -1423,8 +1383,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
- NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
- NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
break;
@@ -1434,8 +1392,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
- NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, std::nullopt);
break;
case NVPTXISD::LDUV4:
@@ -1443,8 +1399,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
- NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, std::nullopt);
break;
}
@@ -1456,47 +1410,39 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
case ISD::INTRINSIC_W_CHAIN:
if (IsLDG)
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
- NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
- NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
- NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
- NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
- NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
- NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
- NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
+ NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
+ NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
else
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
- NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
- NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
- NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
- NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
- NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
- NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
- NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
+ NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
+ NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
break;
case NVPTXISD::LoadV2:
case NVPTXISD::LDGV2:
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
+ NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
+ NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
break;
case NVPTXISD::LDUV2:
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
+ NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
+ NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
break;
case NVPTXISD::LoadV4:
case NVPTXISD::LDGV4:
@@ -1504,8 +1450,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
- NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, std::nullopt);
break;
case NVPTXISD::LDUV4:
@@ -1513,8 +1457,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
- NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, std::nullopt);
break;
}
@@ -1536,8 +1478,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
- NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
- NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
else
@@ -1546,8 +1486,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
- NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
- NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
break;
@@ -1558,8 +1496,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
- NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
- NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
break;
@@ -1569,8 +1505,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
- NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
- NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
break;
@@ -1580,8 +1514,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
- NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, std::nullopt);
break;
case NVPTXISD::LDUV4:
@@ -1589,8 +1521,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
- NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, std::nullopt);
break;
}
@@ -1606,8 +1536,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
- NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
- NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
else
@@ -1616,8 +1544,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
- NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
- NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
break;
@@ -1628,8 +1554,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
- NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
- NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
break;
@@ -1639,8 +1563,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
- NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
- NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
break;
@@ -1650,8 +1572,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, std::nullopt,
- NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
- NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, std::nullopt);
break;
case NVPTXISD::LDUV4:
@@ -1659,8 +1579,6 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EltVT.getSimpleVT().SimpleTy, NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, std::nullopt,
- NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
- NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, std::nullopt);
break;
}
@@ -1685,13 +1603,13 @@ bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
EVT OrigType = N->getValueType(0);
LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
- if (OrigType != EltVT && LdNode) {
+ if (OrigType != EltVT &&
+ (LdNode || (OrigType.isFloatingPoint() && EltVT.isFloatingPoint()))) {
// We have an extending-load. The instruction we selected operates on the
// smaller type, but the SDNode we are replacing has the larger type. We
// need to emit a CVT to make the types match.
- bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
- unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
- EltVT.getSimpleVT(), IsSigned);
+ unsigned CvtOpc =
+ GetConvertOpcode(OrigType.getSimpleVT(), EltVT.getSimpleVT(), LdNode);
// For each output value, apply the manual sign/zero-extension and make sure
// all users of the load go through that CVT.
@@ -1781,7 +1699,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
if (SelectDirectAddr(BasePtr, Addr)) {
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
- NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
if (!Opcode)
return false;
@@ -1799,7 +1716,6 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
: SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
- NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
if (!Opcode)
return false;
@@ -1817,14 +1733,13 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
: SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
if (PointerSize == 64)
- Opcode = pickOpcodeForVT(
- SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
- NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
- NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
+ Opcode =
+ pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
+ NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64,
+ NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
else
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
- NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
if (!Opcode)
return false;
@@ -1844,12 +1759,10 @@ bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
Opcode =
pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
- NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
else
Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
- NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
if (!Opcode)
return false;
@@ -1955,14 +1868,12 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
- NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
break;
case NVPTXISD::StoreV4:
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::STV_i8_v4_avar, NVPTX::STV_i16_v4_avar,
NVPTX::STV_i32_v4_avar, std::nullopt,
- NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
NVPTX::STV_f32_v4_avar, std::nullopt);
break;
}
@@ -1976,15 +1887,13 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
- NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
break;
case NVPTXISD::StoreV4:
- Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
- NVPTX::STV_i8_v4_asi, NVPTX::STV_i16_v4_asi,
- NVPTX::STV_i32_v4_asi, std::nullopt,
- NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
- NVPTX::STV_f32_v4_asi, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
+ NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi,
+ std::nullopt, NVPTX::STV_f32_v4_asi, std::nullopt);
break;
}
StOps.push_back(Base);
@@ -1996,18 +1905,16 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
default:
return false;
case NVPTXISD::StoreV2:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
- NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
- NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
- NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
- NVPTX::STV_f64_v2_ari_64);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
+ NVPTX::STV_i8_v2_ari_64, NVPTX::STV_i16_v2_ari_64,
+ NVPTX::STV_i32_v2_ari_64, NVPTX::STV_i64_v2_ari_64,
+ NVPTX::STV_f32_v2_ari_64, NVPTX::STV_f64_v2_ari_64);
break;
case NVPTXISD::StoreV4:
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, std::nullopt,
- NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
NVPTX::STV_f32_v4_ari_64, std::nullopt);
break;
}
@@ -2019,14 +1926,12 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
- NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
break;
case NVPTXISD::StoreV4:
Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
NVPTX::STV_i8_v4_ari, NVPTX::STV_i16_v4_ari,
NVPTX::STV_i32_v4_ari, std::nullopt,
- NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
NVPTX::STV_f32_v4_ari, std::nullopt);
break;
}
@@ -2042,15 +1947,13 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
- NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
- NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
+ NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
NVPTX::STV_f64_v2_areg_64);
break;
case NVPTXISD::StoreV4:
Opcode = pickOpcodeForVT(
EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, std::nullopt,
- NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
NVPTX::STV_f32_v4_areg_64, std::nullopt);
break;
}
@@ -2062,16 +1965,14 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Opcode =
pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
- NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
- NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
+ NVPTX::STV_i64_v2_areg, NVPTX::STV_f32_v2_areg,
NVPTX::STV_f64_v2_areg);
break;
case NVPTXISD::StoreV4:
- Opcode = pickOpcodeForVT(
- EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
- NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, std::nullopt,
- NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
- NVPTX::STV_f32_v4_areg, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
+ NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg,
+ std::nullopt, NVPTX::STV_f32_v4_areg, std::nullopt);
break;
}
}
@@ -2095,7 +1996,7 @@ bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
SDValue Chain = Node->getOperand(0);
SDValue Offset = Node->getOperand(2);
- SDValue Flag = Node->getOperand(3);
+ SDValue Glue = Node->getOperand(3);
SDLoc DL(Node);
MemSDNode *Mem = cast<MemSDNode>(Node);
@@ -2126,23 +2027,20 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
- NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
break;
case 2:
Opcode =
pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
- NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
- NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
+ NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F32,
NVPTX::LoadParamMemV2F64);
break;
case 4:
- Opcode = pickOpcodeForVT(
- MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
- NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, std::nullopt,
- NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
- NVPTX::LoadParamMemV4F32, std::nullopt);
+ Opcode =
+ pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
+ NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32,
+ std::nullopt, NVPTX::LoadParamMemV4F32, std::nullopt);
break;
}
if (!Opcode)
@@ -2163,7 +2061,7 @@ bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
SmallVector<SDValue, 2> Ops;
Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Ops.push_back(Chain);
- Ops.push_back(Flag);
+ Ops.push_back(Glue);
ReplaceNode(Node, CurDAG->getMachineNode(*Opcode, DL, VTs, Ops));
return true;
@@ -2210,21 +2108,18 @@ bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
- NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
break;
case 2:
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
- NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
break;
case 4:
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
NVPTX::StoreRetvalV4I32, std::nullopt,
- NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
NVPTX::StoreRetvalV4F32, std::nullopt);
break;
}
@@ -2247,7 +2142,7 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
SDValue Offset = N->getOperand(2);
unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
MemSDNode *Mem = cast<MemSDNode>(N);
- SDValue Flag = N->getOperand(N->getNumOperands() - 1);
+ SDValue Glue = N->getOperand(N->getNumOperands() - 1);
// How many elements do we have?
unsigned NumElts = 1;
@@ -2274,7 +2169,7 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Ops.push_back(Chain);
- Ops.push_back(Flag);
+ Ops.push_back(Glue);
// Determine target opcode
// If we have an i1, use an 8-bit store. The lowering code in
@@ -2289,21 +2184,18 @@ bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreParamI8, NVPTX::StoreParamI16,
NVPTX::StoreParamI32, NVPTX::StoreParamI64,
- NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
NVPTX::StoreParamF32, NVPTX::StoreParamF64);
break;
case 2:
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
- NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
break;
case 4:
Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
NVPTX::StoreParamV4I32, std::nullopt,
- NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
NVPTX::StoreParamV4F32, std::nullopt);
break;
}
@@ -3405,7 +3297,7 @@ bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
}
// How many bits are in our mask?
- uint64_t NumBits = countTrailingOnes(MaskVal);
+ int64_t NumBits = countr_one(MaskVal);
Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
@@ -3417,7 +3309,7 @@ bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
uint64_t StartVal = StartConst->getZExtValue();
// How many "good" bits do we have left? "good" is defined here as bits
// that exist in the original value, not shifted in.
- uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
+ int64_t GoodBits = Start.getValueSizeInBits() - StartVal;
if (NumBits > GoodBits) {
// Do not handle the case where bits have been shifted in. In theory
// we could handle this, but the cost is likely higher than just
@@ -3469,10 +3361,10 @@ bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
NumZeros = 0;
// The number of bits in the result bitfield will be the number of
// trailing ones (the AND) minus the number of bits we shift off
- NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
+ NumBits = llvm::countr_one(MaskVal) - ShiftAmt;
} else if (isShiftedMask_64(MaskVal)) {
- NumZeros = countTrailingZeros(MaskVal);
- unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
+ NumZeros = llvm::countr_zero(MaskVal);
+ unsigned NumOnes = llvm::countr_one(MaskVal >> NumZeros);
// The number of bits in the result bitfield will be the number of
// trailing zeros plus the number of set bits in the mask minus the
// number of bits we shift off
@@ -3713,7 +3605,8 @@ bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
/// conversion from \p SrcTy to \p DestTy.
unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
- bool IsSigned) {
+ LoadSDNode *LdNode) {
+ bool IsSigned = LdNode && LdNode->getExtensionType() == ISD::SEXTLOAD;
switch (SrcTy.SimpleTy) {
default:
llvm_unreachable("Unhandled source type");
@@ -3761,5 +3654,14 @@ unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
case MVT::i32:
return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
}
+ case MVT::f16:
+ switch (DestTy.SimpleTy) {
+ default:
+ llvm_unreachable("Unhandled dest type");
+ case MVT::f32:
+ return NVPTX::CVT_f32_f16;
+ case MVT::f64:
+ return NVPTX::CVT_f64_f16;
+ }
}
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 746a9de5a201..25bb73cd5536 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -71,7 +71,7 @@ private:
bool tryTextureIntrinsic(SDNode *N);
bool trySurfaceIntrinsic(SDNode *N);
bool tryBFE(SDNode *N);
- bool tryConstantFP16(SDNode *N);
+ bool tryConstantFP(SDNode *N);
bool SelectSETP_F16X2(SDNode *N);
bool tryEXTRACT_VECTOR_ELEMENT(SDNode *N);
@@ -97,7 +97,7 @@ private:
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
- static unsigned GetConvertOpcode(MVT DestTy, MVT SrcTy, bool IsSigned);
+ static unsigned GetConvertOpcode(MVT DestTy, MVT SrcTy, LoadSDNode *N);
};
} // end namespace llvm
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 008a04aa2f63..7823e12d6270 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetCallingConv.h"
@@ -48,7 +49,6 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -89,6 +89,12 @@ static cl::opt<bool> UsePrecSqrtF32(
cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
cl::init(true));
+static cl::opt<bool> ForceMinByValParamAlign(
+ "nvptx-force-min-byval-param-align", cl::Hidden,
+ cl::desc("NVPTX Specific: force 4-byte minimal alignment for byval"
+ " params of device functions."),
+ cl::init(false));
+
int NVPTXTargetLowering::getDivF32Level() const {
if (UsePrecDivF32.getNumOccurrences() > 0) {
// If nvptx-prec-div32=N is used on the command-line, always honor it
@@ -143,6 +149,14 @@ static bool IsPTXVectorType(MVT VT) {
}
}
+static bool Isv2f16Orv2bf16Type(EVT VT) {
+ return (VT == MVT::v2f16 || VT == MVT::v2bf16);
+}
+
+static bool Isf16Orbf16Type(MVT VT) {
+ return (VT.SimpleTy == MVT::f16 || VT.SimpleTy == MVT::bf16);
+}
+
/// ComputePTXValueVTs - For the given Type \p Ty, returns the set of primitive
/// EVTs that compose it. Unlike ComputeValueVTs, this will break apart vectors
/// into their primitive components.
@@ -193,7 +207,7 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
// Vectors with an even number of f16 elements will be passed to
// us as an array of v2f16/v2bf16 elements. We must match this so we
// stay in sync with Ins/Outs.
- if ((EltVT == MVT::f16 || EltVT == MVT::bf16) && NumElts % 2 == 0) {
+ if ((Isf16Orbf16Type(EltVT.getSimpleVT())) && NumElts % 2 == 0) {
EltVT = EltVT == MVT::f16 ? MVT::v2f16 : MVT::v2bf16;
NumElts /= 2;
}
@@ -398,16 +412,31 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(Op, VT, STI.allowFP16Math() ? Action : NoF16Action);
};
+ auto setBF16OperationAction = [&](unsigned Op, MVT VT, LegalizeAction Action,
+ LegalizeAction NoBF16Action) {
+ bool IsOpSupported = STI.hasBF16Math();
+ // Few instructions are available on sm_90 only
+ switch(Op) {
+ case ISD::FADD:
+ case ISD::FMUL:
+ case ISD::FSUB:
+ IsOpSupported = STI.getSmVersion() >= 90 && STI.getPTXVersion() >= 78;
+ break;
+ }
+ setOperationAction(
+ Op, VT, IsOpSupported ? Action : NoBF16Action);
+ };
+
addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
- addRegisterClass(MVT::f16, &NVPTX::Float16RegsRegClass);
- addRegisterClass(MVT::v2f16, &NVPTX::Float16x2RegsRegClass);
- addRegisterClass(MVT::bf16, &NVPTX::Float16RegsRegClass);
- addRegisterClass(MVT::v2bf16, &NVPTX::Float16x2RegsRegClass);
+ addRegisterClass(MVT::f16, &NVPTX::Int16RegsRegClass);
+ addRegisterClass(MVT::v2f16, &NVPTX::Int32RegsRegClass);
+ addRegisterClass(MVT::bf16, &NVPTX::Int16RegsRegClass);
+ addRegisterClass(MVT::v2bf16, &NVPTX::Int32RegsRegClass);
// Conversion to/from FP16/FP16x2 is always legal.
setOperationAction(ISD::SINT_TO_FP, MVT::f16, Legal);
@@ -420,9 +449,19 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setFP16OperationAction(ISD::SETCC, MVT::f16, Legal, Promote);
setFP16OperationAction(ISD::SETCC, MVT::v2f16, Legal, Expand);
+ // Conversion to/from BFP16/BFP16x2 is always legal.
+ setOperationAction(ISD::SINT_TO_FP, MVT::bf16, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::bf16, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v2bf16, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2bf16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2bf16, Expand);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2bf16, Expand);
+
+ setBF16OperationAction(ISD::SETCC, MVT::bf16, Legal, Promote);
+ setBF16OperationAction(ISD::SETCC, MVT::v2bf16, Legal, Expand);
// Operations not directly supported by NVPTX.
- for (MVT VT : {MVT::f16, MVT::v2f16, MVT::f32, MVT::f64, MVT::i1, MVT::i8,
- MVT::i16, MVT::i32, MVT::i64}) {
+ for (MVT VT : {MVT::bf16, MVT::f16, MVT::v2bf16, MVT::v2f16, MVT::f32,
+ MVT::f64, MVT::i1, MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::BR_CC, VT, Expand);
}
@@ -476,17 +515,25 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// Turn FP extload into load/fpextend
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand);
// Turn FP truncstore into trunc + store.
// FIXME: vector types should also be expanded
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setTruncStoreAction(MVT::f32, MVT::bf16, Expand);
+ setTruncStoreAction(MVT::f64, MVT::bf16, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// PTX does not support load / store predicate registers
@@ -563,9 +610,9 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setTargetDAGCombine({ISD::ADD, ISD::AND, ISD::FADD, ISD::MUL, ISD::SHL,
ISD::SREM, ISD::UREM});
- // setcc for f16x2 needs special handling to prevent legalizer's
- // attempt to scalarize it due to v2i1 not being legal.
- if (STI.allowFP16Math())
+ // setcc for f16x2 and bf16x2 needs special handling to prevent
+ // legalizer's attempt to scalarize it due to v2i1 not being legal.
+ if (STI.allowFP16Math() || STI.hasBF16Math())
setTargetDAGCombine(ISD::SETCC);
// Promote fp16 arithmetic if fp16 hardware isn't available or the
@@ -577,6 +624,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
for (const auto &Op : {ISD::FADD, ISD::FMUL, ISD::FSUB, ISD::FMA}) {
setFP16OperationAction(Op, MVT::f16, Legal, Promote);
setFP16OperationAction(Op, MVT::v2f16, Legal, Expand);
+ setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
+ setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
+ // bf16 must be promoted to f32.
+ if (getOperationAction(Op, MVT::bf16) == Promote)
+ AddPromotedToType(Op, MVT::bf16, MVT::f32);
}
// f16/f16x2 neg was introduced in PTX 60, SM_53.
@@ -587,19 +639,25 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
setOperationAction(ISD::FNEG, VT,
IsFP16FP16x2NegAvailable ? Legal : Expand);
+ setBF16OperationAction(ISD::FNEG, MVT::bf16, Legal, Expand);
+ setBF16OperationAction(ISD::FNEG, MVT::v2bf16, Legal, Expand);
// (would be) Library functions.
// These map to conversion instructions for scalar FP types.
for (const auto &Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
ISD::FROUNDEVEN, ISD::FTRUNC}) {
+ setOperationAction(Op, MVT::bf16, Legal);
setOperationAction(Op, MVT::f16, Legal);
setOperationAction(Op, MVT::f32, Legal);
setOperationAction(Op, MVT::f64, Legal);
setOperationAction(Op, MVT::v2f16, Expand);
+ setOperationAction(Op, MVT::v2bf16, Expand);
}
setOperationAction(ISD::FROUND, MVT::f16, Promote);
setOperationAction(ISD::FROUND, MVT::v2f16, Expand);
+ setOperationAction(ISD::FROUND, MVT::bf16, Promote);
+ setOperationAction(ISD::FROUND, MVT::v2bf16, Expand);
setOperationAction(ISD::FROUND, MVT::f32, Custom);
setOperationAction(ISD::FROUND, MVT::f64, Custom);
@@ -607,6 +665,8 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
// 'Expand' implements FCOPYSIGN without calling an external library.
setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::v2f16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::v2bf16, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
@@ -616,9 +676,11 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
for (const auto &Op :
{ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FABS}) {
setOperationAction(Op, MVT::f16, Promote);
+ setOperationAction(Op, MVT::bf16, Promote);
setOperationAction(Op, MVT::f32, Legal);
setOperationAction(Op, MVT::f64, Legal);
setOperationAction(Op, MVT::v2f16, Expand);
+ setOperationAction(Op, MVT::v2bf16, Expand);
}
// max.f16, max.f16x2 and max.NaN are supported on sm_80+.
auto GetMinMaxAction = [&](LegalizeAction NotSm80Action) {
@@ -627,14 +689,18 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
};
for (const auto &Op : {ISD::FMINNUM, ISD::FMAXNUM}) {
setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Promote), Promote);
+ setBF16OperationAction(Op, MVT::bf16, Legal, Promote);
setOperationAction(Op, MVT::f32, Legal);
setOperationAction(Op, MVT::f64, Legal);
setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
+ setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
}
for (const auto &Op : {ISD::FMINIMUM, ISD::FMAXIMUM}) {
setFP16OperationAction(Op, MVT::f16, GetMinMaxAction(Expand), Expand);
+ setFP16OperationAction(Op, MVT::bf16, Legal, Expand);
setOperationAction(Op, MVT::f32, GetMinMaxAction(Expand));
setFP16OperationAction(Op, MVT::v2f16, GetMinMaxAction(Expand), Expand);
+ setBF16OperationAction(Op, MVT::v2bf16, Legal, Expand);
}
// No FEXP2, FLOG2. The PTX ex2 and log2 functions are always approximate.
@@ -653,8 +719,8 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
break;
case NVPTXISD::CALL:
return "NVPTXISD::CALL";
- case NVPTXISD::RET_FLAG:
- return "NVPTXISD::RET_FLAG";
+ case NVPTXISD::RET_GLUE:
+ return "NVPTXISD::RET_GLUE";
case NVPTXISD::LOAD_PARAM:
return "NVPTXISD::LOAD_PARAM";
case NVPTXISD::Wrapper:
@@ -1252,7 +1318,7 @@ NVPTXTargetLowering::getPreferredVectorAction(MVT VT) const {
if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
VT.getScalarType() == MVT::i1)
return TypeSplitVector;
- if (VT == MVT::v2f16)
+ if (Isv2f16Orv2bf16Type(VT))
return TypeLegal;
return TargetLoweringBase::getPreferredVectorAction(VT);
}
@@ -1315,6 +1381,11 @@ NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(NVPTXISD::Wrapper, dl, PtrVT, Op);
}
+static bool IsTypePassedAsArray(const Type *Ty) {
+ return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) ||
+ Ty->isHalfTy() || Ty->isBFloatTy();
+}
+
std::string NVPTXTargetLowering::getPrototype(
const DataLayout &DL, Type *retTy, const ArgListTy &Args,
const SmallVectorImpl<ISD::OutputArg> &Outs, MaybeAlign retAlignment,
@@ -1335,7 +1406,8 @@ std::string NVPTXTargetLowering::getPrototype(
O << "()";
} else {
O << "(";
- if (retTy->isFloatingPointTy() || (retTy->isIntegerTy() && !retTy->isIntegerTy(128))) {
+ if ((retTy->isFloatingPointTy() || retTy->isIntegerTy()) &&
+ !IsTypePassedAsArray(retTy)) {
unsigned size = 0;
if (auto *ITy = dyn_cast<IntegerType>(retTy)) {
size = ITy->getBitWidth();
@@ -1352,8 +1424,7 @@ std::string NVPTXTargetLowering::getPrototype(
O << ".param .b" << size << " _";
} else if (isa<PointerType>(retTy)) {
O << ".param .b" << PtrVT.getSizeInBits() << " _";
- } else if (retTy->isAggregateType() || retTy->isVectorTy() ||
- retTy->isIntegerTy(128)) {
+ } else if (IsTypePassedAsArray(retTy)) {
O << ".param .align " << (retAlignment ? retAlignment->value() : 0)
<< " .b8 _[" << DL.getTypeAllocSize(retTy) << "]";
} else {
@@ -1375,7 +1446,7 @@ std::string NVPTXTargetLowering::getPrototype(
first = false;
if (!Outs[OIdx].Flags.isByVal()) {
- if (Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128)) {
+ if (IsTypePassedAsArray(Ty)) {
unsigned ParamAlign = 0;
const CallInst *CallI = cast<CallInst>(&CB);
// +1 because index 0 is reserved for return type alignment
@@ -1402,13 +1473,9 @@ std::string NVPTXTargetLowering::getPrototype(
sz = promoteScalarArgumentSize(sz);
} else if (isa<PointerType>(Ty)) {
sz = PtrVT.getSizeInBits();
- } else if (Ty->isHalfTy())
- // PTX ABI requires all scalar parameters to be at least 32
- // bits in size. fp16 normally uses .b16 as its storage type
- // in PTX, so its size must be adjusted here, too.
- sz = 32;
- else
+ } else {
sz = Ty->getPrimitiveSizeInBits();
+ }
O << ".param .b" << sz << " ";
O << "_";
continue;
@@ -1523,7 +1590,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
unsigned UniqueCallSite = GlobalUniqueCallSite.fetch_add(1);
SDValue TempChain = Chain;
Chain = DAG.getCALLSEQ_START(Chain, UniqueCallSite, 0, dl);
- SDValue InFlag = Chain.getValue(1);
+ SDValue InGlue = Chain.getValue(1);
unsigned ParamCount = 0;
// Args.size() and Outs.size() need not match.
@@ -1571,24 +1638,23 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
bool NeedAlign; // Does argument declaration specify alignment?
+ bool PassAsArray = IsByVal || IsTypePassedAsArray(Ty);
if (IsVAArg) {
if (ParamCount == FirstVAArg) {
SDValue DeclareParamOps[] = {
Chain, DAG.getConstant(STI.getMaxRequiredAlignment(), dl, MVT::i32),
DAG.getConstant(ParamCount, dl, MVT::i32),
- DAG.getConstant(1, dl, MVT::i32), InFlag};
+ DAG.getConstant(1, dl, MVT::i32), InGlue};
VADeclareParam = Chain = DAG.getNode(NVPTXISD::DeclareParam, dl,
DeclareParamVTs, DeclareParamOps);
}
- NeedAlign = IsByVal || Ty->isAggregateType() || Ty->isVectorTy() ||
- Ty->isIntegerTy(128);
- } else if (IsByVal || Ty->isAggregateType() || Ty->isVectorTy() ||
- Ty->isIntegerTy(128)) {
+ NeedAlign = PassAsArray;
+ } else if (PassAsArray) {
// declare .param .align <align> .b8 .param<n>[<size>];
SDValue DeclareParamOps[] = {
Chain, DAG.getConstant(ArgAlign.value(), dl, MVT::i32),
DAG.getConstant(ParamCount, dl, MVT::i32),
- DAG.getConstant(TypeSize, dl, MVT::i32), InFlag};
+ DAG.getConstant(TypeSize, dl, MVT::i32), InGlue};
Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
DeclareParamOps);
NeedAlign = true;
@@ -1603,12 +1669,12 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue DeclareScalarParamOps[] = {
Chain, DAG.getConstant(ParamCount, dl, MVT::i32),
DAG.getConstant(TypeSize * 8, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), InFlag};
+ DAG.getConstant(0, dl, MVT::i32), InGlue};
Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
DeclareScalarParamOps);
NeedAlign = false;
}
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
// PTX Interoperability Guide 3.3(A): [Integer] Values shorter
// than 32-bits are sign extended or zero extended, depending on
@@ -1689,7 +1755,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
llvm_unreachable("Invalid vector info.");
}
- StoreOperands.push_back(InFlag);
+ StoreOperands.push_back(InGlue);
// Adjust type of the store op if we've extended the scalar
// return value.
@@ -1699,7 +1765,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Op, dl, DAG.getVTList(MVT::Other, MVT::Glue), StoreOperands,
TheStoreType, MachinePointerInfo(), PartAlign,
MachineMemOperand::MOStore);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
// Cleanup.
StoreOperands.clear();
@@ -1733,23 +1799,18 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
ComputeValueVTs(*this, DL, RetTy, resvtparts);
// Declare
- // .param .align 16 .b8 retval0[<size-in-bytes>], or
+ // .param .align N .b8 retval0[<size-in-bytes>], or
// .param .b<size-in-bits> retval0
unsigned resultsz = DL.getTypeAllocSizeInBits(RetTy);
- // Emit ".param .b<size-in-bits> retval0" instead of byte arrays only for
- // these three types to match the logic in
- // NVPTXAsmPrinter::printReturnValStr and NVPTXTargetLowering::getPrototype.
- // Plus, this behavior is consistent with nvcc's.
- if (RetTy->isFloatingPointTy() || RetTy->isPointerTy() ||
- (RetTy->isIntegerTy() && !RetTy->isIntegerTy(128))) {
+ if (!IsTypePassedAsArray(RetTy)) {
resultsz = promoteScalarArgumentSize(resultsz);
SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
DAG.getConstant(resultsz, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), InFlag };
+ DAG.getConstant(0, dl, MVT::i32), InGlue };
Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
DeclareRetOps);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
} else {
retAlignment = getArgumentAlignment(Callee, CB, RetTy, 0, DL);
assert(retAlignment && "retAlignment is guaranteed to be set");
@@ -1757,10 +1818,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue DeclareRetOps[] = {
Chain, DAG.getConstant(retAlignment->value(), dl, MVT::i32),
DAG.getConstant(resultsz / 8, dl, MVT::i32),
- DAG.getConstant(0, dl, MVT::i32), InFlag};
+ DAG.getConstant(0, dl, MVT::i32), InGlue};
Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
DeclareRetOps);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
}
}
@@ -1815,15 +1876,15 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue ProtoOps[] = {
Chain,
DAG.getTargetExternalSymbol(ProtoStr, MVT::i32),
- InFlag,
+ InGlue,
};
Chain = DAG.getNode(NVPTXISD::CallPrototype, dl, ProtoVTs, ProtoOps);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
}
// Op to just print "call"
SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue PrintCallOps[] = {
- Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InFlag
+ Chain, DAG.getConstant((Ins.size() == 0) ? 0 : 1, dl, MVT::i32), InGlue
};
// We model convergent calls as separate opcodes.
unsigned Opcode = isIndirectCall ? NVPTXISD::PrintCall : NVPTXISD::PrintCallUni;
@@ -1831,20 +1892,20 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Opcode = Opcode == NVPTXISD::PrintCallUni ? NVPTXISD::PrintConvergentCallUni
: NVPTXISD::PrintConvergentCall;
Chain = DAG.getNode(Opcode, dl, PrintCallVTs, PrintCallOps);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
// Ops to print out the function name
SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CallVoidOps[] = { Chain, Callee, InFlag };
+ SDValue CallVoidOps[] = { Chain, Callee, InGlue };
Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
// Ops to print out the param list
SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue CallArgBeginOps[] = { Chain, InFlag };
+ SDValue CallArgBeginOps[] = { Chain, InGlue };
Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
CallArgBeginOps);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
for (unsigned i = 0, e = std::min(CLI.NumFixedArgs + 1, ParamCount); i != e;
++i) {
@@ -1855,23 +1916,23 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
opcode = NVPTXISD::CallArg;
SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallArgOps[] = { Chain, DAG.getConstant(1, dl, MVT::i32),
- DAG.getConstant(i, dl, MVT::i32), InFlag };
+ DAG.getConstant(i, dl, MVT::i32), InGlue };
Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
}
SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue CallArgEndOps[] = { Chain,
DAG.getConstant(isIndirectCall ? 0 : 1, dl, MVT::i32),
- InFlag };
+ InGlue };
Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
if (isIndirectCall) {
SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
SDValue PrototypeOps[] = {
- Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InFlag};
+ Chain, DAG.getConstant(UniqueCallSite, dl, MVT::i32), InGlue};
Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
}
SmallVector<SDValue, 16> ProxyRegOps;
@@ -1948,7 +2009,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue LoadOperands[] = {
Chain, DAG.getConstant(1, dl, MVT::i32),
- DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InFlag};
+ DAG.getConstant(Offsets[VecIdx], dl, MVT::i32), InGlue};
SDValue RetVal = DAG.getMemIntrinsicNode(
Op, dl, DAG.getVTList(LoadVTs), LoadOperands, TheLoadType,
MachinePointerInfo(), EltAlign,
@@ -1964,7 +2025,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
Chain = RetVal.getValue(NumElts);
- InFlag = RetVal.getValue(NumElts + 1);
+ InGlue = RetVal.getValue(NumElts + 1);
// Cleanup
VecIdx = -1;
@@ -1974,8 +2035,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
Chain =
- DAG.getCALLSEQ_END(Chain, UniqueCallSite, UniqueCallSite + 1, InFlag, dl);
- InFlag = Chain.getValue(1);
+ DAG.getCALLSEQ_END(Chain, UniqueCallSite, UniqueCallSite + 1, InGlue, dl);
+ InGlue = Chain.getValue(1);
// Append ProxyReg instructions to the chain to make sure that `callseq_end`
// will not get lost. Otherwise, during libcalls expansion, the nodes can become
@@ -1984,11 +2045,11 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SDValue Ret = DAG.getNode(
NVPTXISD::ProxyReg, dl,
DAG.getVTList(ProxyRegOps[i].getSimpleValueType(), MVT::Other, MVT::Glue),
- { Chain, ProxyRegOps[i], InFlag }
+ { Chain, ProxyRegOps[i], InGlue }
);
Chain = Ret.getValue(1);
- InFlag = Ret.getValue(2);
+ InGlue = Ret.getValue(2);
if (ProxyRegTruncates[i]) {
Ret = DAG.getNode(ISD::TRUNCATE, dl, *ProxyRegTruncates[i], Ret);
@@ -2037,7 +2098,7 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
// generates good SASS in both cases.
SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
- if (!(Op->getValueType(0) == MVT::v2f16 &&
+ if (!(Isv2f16Orv2bf16Type(Op->getValueType(0)) &&
isa<ConstantFPSDNode>(Op->getOperand(0)) &&
isa<ConstantFPSDNode>(Op->getOperand(1))))
return Op;
@@ -2048,7 +2109,7 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
cast<ConstantFPSDNode>(Op->getOperand(1))->getValueAPF().bitcastToAPInt();
SDValue Const =
DAG.getConstant(E1.zext(32).shl(16) | E0.zext(32), SDLoc(Op), MVT::i32);
- return DAG.getNode(ISD::BITCAST, SDLoc(Op), MVT::v2f16, Const);
+ return DAG.getNode(ISD::BITCAST, SDLoc(Op), Op->getValueType(0), Const);
}
SDValue NVPTXTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
@@ -2409,7 +2470,7 @@ SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
// v2f16 is legal, so we can't rely on legalizer to handle unaligned
// loads and have to handle it here.
- if (Op.getValueType() == MVT::v2f16) {
+ if (Isv2f16Orv2bf16Type(Op.getValueType())) {
LoadSDNode *Load = cast<LoadSDNode>(Op);
EVT MemVT = Load->getMemoryVT();
if (!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
@@ -2454,11 +2515,15 @@ SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
// v2f16 is legal, so we can't rely on legalizer to handle unaligned
// stores and have to handle it here.
- if (VT == MVT::v2f16 &&
+ if (Isv2f16Orv2bf16Type(VT) &&
!allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
VT, *Store->getMemOperand()))
return expandUnalignedStore(Store, DAG);
+ // v2f16 and v2bf16 don't need special handling.
+ if (VT == MVT::v2f16 || VT == MVT::v2bf16)
+ return SDValue();
+
if (VT.isVector())
return LowerSTOREVector(Op, DAG);
@@ -2541,7 +2606,7 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
// v8f16 is a special case. PTX doesn't have st.v8.f16
// instruction. Instead, we split the vector into v2f16 chunks and
// store them with st.v4.b32.
- assert((EltVT == MVT::f16 || EltVT == MVT::bf16) &&
+ assert(Isf16Orbf16Type(EltVT.getSimpleVT()) &&
"Wrong type for the vector.");
Opcode = NVPTXISD::StoreV4;
StoreF16x2 = true;
@@ -2557,11 +2622,12 @@ NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
// Combine f16,f16 -> v2f16
NumElts /= 2;
for (unsigned i = 0; i < NumElts; ++i) {
- SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
+ SDValue E0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
DAG.getIntPtrConstant(i * 2, DL));
- SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f16, Val,
+ SDValue E1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
DAG.getIntPtrConstant(i * 2 + 1, DL));
- SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f16, E0, E1);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, 2);
+ SDValue V2 = DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, E0, E1);
Ops.push_back(V2);
}
} else {
@@ -2614,18 +2680,8 @@ SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
// passing variable arguments.
SDValue NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx,
EVT v) const {
- std::string ParamSym;
- raw_string_ostream ParamStr(ParamSym);
-
- ParamStr << DAG.getMachineFunction().getName();
-
- if (idx < 0)
- ParamStr << "_vararg";
- else
- ParamStr << "_param_" << idx;
-
- StringRef SavedStr =
- nvTM->getStrPool().save(ParamSym);
+ StringRef SavedStr = nvTM->getStrPool().save(
+ getParamName(&DAG.getMachineFunction().getFunction(), idx));
return DAG.getTargetExternalSymbol(SavedStr.data(), v);
}
@@ -2672,11 +2728,13 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (theArgs[i]->use_empty()) {
// argument is dead
- if (Ty->isAggregateType() || Ty->isIntegerTy(128)) {
+ if (IsTypePassedAsArray(Ty) && !Ty->isVectorTy()) {
SmallVector<EVT, 16> vtparts;
ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts);
- assert(vtparts.size() > 0 && "empty aggregate type not expected");
+ if (vtparts.empty())
+ report_fatal_error("Empty parameter types are not supported");
+
for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
++parti) {
InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
@@ -2713,7 +2771,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
SmallVector<EVT, 16> VTs;
SmallVector<uint64_t, 16> Offsets;
ComputePTXValueVTs(*this, DL, Ty, VTs, &Offsets, 0);
- assert(VTs.size() > 0 && "Unexpected empty type.");
+ if (VTs.empty())
+ report_fatal_error("Empty parameter types are not supported");
+
auto VectorInfo =
VectorizePTXValueVTs(VTs, Offsets, DL.getABITypeAlign(Ty));
@@ -2733,9 +2793,9 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
EVT LoadVT = EltVT;
if (EltVT == MVT::i1)
LoadVT = MVT::i8;
- else if (EltVT == MVT::v2f16)
+ else if (Isv2f16Orv2bf16Type(EltVT))
// getLoad needs a vector type, but it can't handle
- // vectors which contain v2f16 elements. So we must load
+ // vectors which contain v2f16 or v2bf16 elements. So we must load
// using i32 here and then bitcast back.
LoadVT = MVT::i32;
@@ -2759,8 +2819,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
if (EltVT == MVT::i1)
Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Elt);
// v2f16 was loaded as an i32. Now we must bitcast it back.
- else if (EltVT == MVT::v2f16)
- Elt = DAG.getNode(ISD::BITCAST, dl, MVT::v2f16, Elt);
+ else if (Isv2f16Orv2bf16Type(EltVT))
+ Elt = DAG.getNode(ISD::BITCAST, dl, EltVT, Elt);
// If a promoted integer type is used, truncate down to the original
MVT PromotedVT;
@@ -2914,7 +2974,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
}
}
- return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
+ return DAG.getNode(NVPTXISD::RET_GLUE, dl, MVT::Other, Chain);
}
void NVPTXTargetLowering::LowerAsmOperandForConstraint(
@@ -4508,20 +4568,38 @@ Align NVPTXTargetLowering::getFunctionByValParamAlign(
if (F)
ArgAlign = std::max(ArgAlign, getFunctionParamOptimizedAlign(F, ArgTy, DL));
- // Work around a bug in ptxas. When PTX code takes address of
+ // Old ptx versions have a bug. When PTX code takes address of
// byval parameter with alignment < 4, ptxas generates code to
// spill argument into memory. Alas on sm_50+ ptxas generates
// SASS code that fails with misaligned access. To work around
// the problem, make sure that we align byval parameters by at
- // least 4.
- // TODO: this will need to be undone when we get to support multi-TU
- // device-side compilation as it breaks ABI compatibility with nvcc.
- // Hopefully ptxas bug is fixed by then.
- ArgAlign = std::max(ArgAlign, Align(4));
+ // least 4. This bug seems to be fixed at least starting from
+ // ptxas > 9.0.
+ // TODO: remove this after verifying the bug is not reproduced
+ // on non-deprecated ptxas versions.
+ if (ForceMinByValParamAlign)
+ ArgAlign = std::max(ArgAlign, Align(4));
return ArgAlign;
}
+// Helper for getting a function parameter name. Name is composed from
+// its index and the function name. Negative index corresponds to special
+// parameter (unsized array) used for passing variable arguments.
+std::string NVPTXTargetLowering::getParamName(const Function *F,
+ int Idx) const {
+ std::string ParamName;
+ raw_string_ostream ParamStr(ParamName);
+
+ ParamStr << getTargetMachine().getSymbol(F)->getName();
+ if (Idx < 0)
+ ParamStr << "_vararg";
+ else
+ ParamStr << "_param_" << Idx;
+
+ return ParamName;
+}
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
/// Used to guide target specific optimizations, like loop strength reduction
@@ -5128,7 +5206,8 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
Align Alignment = LD->getAlign();
auto &TD = DAG.getDataLayout();
- Align PrefAlign = TD.getPrefTypeAlign(ResVT.getTypeForEVT(*DAG.getContext()));
+ Align PrefAlign =
+ TD.getPrefTypeAlign(LD->getMemoryVT().getTypeForEVT(*DAG.getContext()));
if (Alignment < PrefAlign) {
// This load is not sufficiently aligned, so bail out and let this vector
// load be scalarized. Note that we may still be able to emit smaller
@@ -5171,7 +5250,7 @@ static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
// v8f16 is a special case. PTX doesn't have ld.v8.f16
// instruction. Instead, we split the vector into v2f16 chunks and
// load them with ld.v4.b32.
- assert((EltVT == MVT::f16 || EltVT == MVT::bf16) &&
+ assert(Isf16Orbf16Type(EltVT.getSimpleVT()) &&
"Unsupported v8 vector type.");
LoadF16x2 = true;
Opcode = NVPTXISD::LoadV4;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index f48ec1740b0f..ccd80359bf80 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -25,7 +25,7 @@ enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
Wrapper,
CALL,
- RET_FLAG,
+ RET_GLUE,
LOAD_PARAM,
DeclareParam,
DeclareScalarParam,
@@ -466,6 +466,11 @@ public:
Align InitialAlign,
const DataLayout &DL) const;
+ // Helper for getting a function parameter name. Name is composed from
+ // its index and the function name. Negative index corresponds to special
+ // parameter (unsized array) used for passing variable arguments.
+ std::string getParamName(const Function *F, int Idx) const;
+
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type
/// Used to guide target specific optimizations, like loop strength
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 8df6f13aa68e..b0d792b5ee3f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -51,11 +51,6 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
} else if (DestRC == &NVPTX::Int64RegsRegClass) {
Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr
: NVPTX::BITCONVERT_64_F2I);
- } else if (DestRC == &NVPTX::Float16RegsRegClass) {
- Op = (SrcRC == &NVPTX::Float16RegsRegClass ? NVPTX::FMOV16rr
- : NVPTX::BITCONVERT_16_I2F);
- } else if (DestRC == &NVPTX::Float16x2RegsRegClass) {
- Op = NVPTX::IMOV32rr;
} else if (DestRC == &NVPTX::Float32RegsRegClass) {
Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr
: NVPTX::BITCONVERT_32_I2F);
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index b6a139411980..b98f76ed4b38 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -19,6 +19,8 @@ let hasSideEffects = false in {
let OperandType = "OPERAND_IMMEDIATE" in {
def f16imm : Operand<f16>;
+ def bf16imm : Operand<bf16>;
+
}
// List of vector specific properties
@@ -145,26 +147,8 @@ def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
def True : Predicate<"true">;
-def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
-def hasPTX42 : Predicate<"Subtarget->getPTXVersion() >= 42">;
-def hasPTX43 : Predicate<"Subtarget->getPTXVersion() >= 43">;
-def hasPTX60 : Predicate<"Subtarget->getPTXVersion() >= 60">;
-def hasPTX61 : Predicate<"Subtarget->getPTXVersion() >= 61">;
-def hasPTX63 : Predicate<"Subtarget->getPTXVersion() >= 63">;
-def hasPTX64 : Predicate<"Subtarget->getPTXVersion() >= 64">;
-def hasPTX65 : Predicate<"Subtarget->getPTXVersion() >= 65">;
-def hasPTX70 : Predicate<"Subtarget->getPTXVersion() >= 70">;
-def hasPTX71 : Predicate<"Subtarget->getPTXVersion() >= 71">;
-def hasPTX72 : Predicate<"Subtarget->getPTXVersion() >= 72">;
-
-def hasSM30 : Predicate<"Subtarget->getSmVersion() >= 30">;
-def hasSM32 : Predicate<"Subtarget->getSmVersion() >= 32">;
-def hasSM53 : Predicate<"Subtarget->getSmVersion() >= 53">;
-def hasSM70 : Predicate<"Subtarget->getSmVersion() >= 70">;
-def hasSM72 : Predicate<"Subtarget->getSmVersion() >= 72">;
-def hasSM75 : Predicate<"Subtarget->getSmVersion() >= 75">;
-def hasSM80 : Predicate<"Subtarget->getSmVersion() >= 80">;
-def hasSM86 : Predicate<"Subtarget->getSmVersion() >= 86">;
+class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>;
+class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>;
// non-sync shfl instructions are not available on sm_70+ in PTX6.4+
def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"
@@ -172,6 +156,7 @@ def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"
def useShortPtr : Predicate<"useShortPointers()">;
def useFP16Math: Predicate<"Subtarget->allowFP16Math()">;
+def hasBF16Math: Predicate<"Subtarget->hasBF16Math()">;
// Helper class to aid conversion between ValueType and a matching RegisterClass.
@@ -182,10 +167,10 @@ class ValueToRegClass<ValueType T> {
!eq(name, "i16"): Int16Regs,
!eq(name, "i32"): Int32Regs,
!eq(name, "i64"): Int64Regs,
- !eq(name, "f16"): Float16Regs,
- !eq(name, "v2f16"): Float16x2Regs,
- !eq(name, "bf16"): Float16Regs,
- !eq(name, "v2bf16"): Float16x2Regs,
+ !eq(name, "f16"): Int16Regs,
+ !eq(name, "v2f16"): Int32Regs,
+ !eq(name, "bf16"): Int16Regs,
+ !eq(name, "v2bf16"): Int32Regs,
!eq(name, "f32"): Float32Regs,
!eq(name, "f64"): Float64Regs,
!eq(name, "ai32"): Int32ArgRegs,
@@ -245,12 +230,12 @@ multiclass ADD_SUB_INT_CARRY<string OpcStr, SDNode OpNode> {
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
!strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
[(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>,
- Requires<[hasPTX43]>;
+ Requires<[hasPTX<43>]>;
def i64ri :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
!strconcat(OpcStr, ".s64 \t$dst, $a, $b;"),
[(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>,
- Requires<[hasPTX43]>;
+ Requires<[hasPTX<43>]>;
}
}
@@ -298,30 +283,55 @@ multiclass F3<string OpcStr, SDNode OpNode> {
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
def f16rr_ftz :
- NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"),
- [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>,
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
Requires<[useFP16Math, doF32FTZ]>;
def f16rr :
- NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".f16 \t$dst, $a, $b;"),
- [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>,
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
Requires<[useFP16Math]>;
def f16x2rr_ftz :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"),
- [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>,
+ [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
Requires<[useFP16Math, doF32FTZ]>;
def f16x2rr :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"),
- [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>,
+ [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
Requires<[useFP16Math]>;
+ def bf16rr_ftz :
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, ".ftz.bf16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>,
+ Requires<[hasBF16Math, doF32FTZ]>;
+ def bf16rr :
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, ".bf16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>,
+ Requires<[hasBF16Math]>;
+
+ def bf16x2rr_ftz :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, ".ftz.bf16x2 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>,
+ Requires<[hasBF16Math, doF32FTZ]>;
+ def bf16x2rr :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, ".bf16x2 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>,
+ Requires<[hasBF16Math]>;
}
// Template for instructions which take three FP args. The
@@ -372,31 +382,55 @@ multiclass F3_fma_component<string OpcStr, SDNode OpNode> {
Requires<[allowFMA]>;
def f16rr_ftz :
- NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".ftz.f16 \t$dst, $a, $b;"),
- [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>,
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
Requires<[useFP16Math, allowFMA, doF32FTZ]>;
def f16rr :
- NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".f16 \t$dst, $a, $b;"),
- [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>,
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
Requires<[useFP16Math, allowFMA]>;
def f16x2rr_ftz :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".ftz.f16x2 \t$dst, $a, $b;"),
- [(set (v2f16 Float16x2Regs:$dst), (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>,
+ [(set (v2f16 Int32Regs:$dst), (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
Requires<[useFP16Math, allowFMA, doF32FTZ]>;
def f16x2rr :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".f16x2 \t$dst, $a, $b;"),
- [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>,
+ [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
Requires<[useFP16Math, allowFMA]>;
-
+ def bf16rr_ftz :
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, ".ftz.bf16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>,
+ Requires<[hasBF16Math, allowFMA, doF32FTZ]>;
+ def bf16rr :
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, ".bf16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>,
+ Requires<[hasBF16Math, allowFMA]>;
+
+ def bf16x2rr_ftz :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, ".ftz.bf16x2 \t$dst, $a, $b;"),
+ [(set (v2bf16 Int32Regs:$dst), (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>,
+ Requires<[hasBF16Math, allowFMA, doF32FTZ]>;
+ def bf16x2rr :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, ".bf16x2 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>,
+ Requires<[hasBF16Math, allowFMA]>;
// These have strange names so we don't perturb existing mir tests.
def _rnf64rr :
NVPTXInst<(outs Float64Regs:$dst),
@@ -435,29 +469,53 @@ multiclass F3_fma_component<string OpcStr, SDNode OpNode> {
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
Requires<[noFMA]>;
def _rnf16rr_ftz :
- NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".rn.ftz.f16 \t$dst, $a, $b;"),
- [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>,
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
Requires<[useFP16Math, noFMA, doF32FTZ]>;
def _rnf16rr :
- NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".rn.f16 \t$dst, $a, $b;"),
- [(set Float16Regs:$dst, (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>,
+ [(set Int16Regs:$dst, (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b)))]>,
Requires<[useFP16Math, noFMA]>;
def _rnf16x2rr_ftz :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".rn.ftz.f16x2 \t$dst, $a, $b;"),
- [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>,
+ [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
Requires<[useFP16Math, noFMA, doF32FTZ]>;
def _rnf16x2rr :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".rn.f16x2 \t$dst, $a, $b;"),
- [(set Float16x2Regs:$dst, (OpNode (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>,
+ [(set Int32Regs:$dst, (OpNode (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>,
Requires<[useFP16Math, noFMA]>;
+ def _rnbf16rr_ftz :
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, ".rn.ftz.bf16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>,
+ Requires<[hasBF16Math, noFMA, doF32FTZ]>;
+ def _rnbf16rr :
+ NVPTXInst<(outs Int16Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b),
+ !strconcat(OpcStr, ".rn.bf16 \t$dst, $a, $b;"),
+ [(set Int16Regs:$dst, (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)))]>,
+ Requires<[hasBF16Math, noFMA]>;
+ def _rnbf16x2rr_ftz :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, ".rn.ftz.bf16x2 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>,
+ Requires<[hasBF16Math, noFMA, doF32FTZ]>;
+ def _rnbf16x2rr :
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b),
+ !strconcat(OpcStr, ".rn.bf16x2 \t$dst, $a, $b;"),
+ [(set Int32Regs:$dst, (OpNode (v2bf16 Int32Regs:$a), (v2bf16 Int32Regs:$b)))]>,
+ Requires<[hasBF16Math, noFMA]>;
}
// Template for operations which take two f32 or f64 operands. Provides three
@@ -488,62 +546,86 @@ let hasSideEffects = false in {
// Generate a cvt to the given type from all possible types. Each instance
// takes a CvtMode immediate that defines the conversion mode to use. It can
// be CvtNONE to omit a conversion mode.
- multiclass CVT_FROM_ALL<string FromName, RegisterClass RC> {
+ multiclass CVT_FROM_ALL<string ToType, RegisterClass RC, list<Predicate> Preds = []> {
def _s8 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s8 \t$dst, $src;"), []>;
+ ToType, ".s8 \t$dst, $src;"), []>,
+ Requires<Preds>;
def _u8 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u8 \t$dst, $src;"), []>;
+ ToType, ".u8 \t$dst, $src;"), []>,
+ Requires<Preds>;
def _s16 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s16 \t$dst, $src;"), []>;
+ ToType, ".s16 \t$dst, $src;"), []>,
+ Requires<Preds>;
def _u16 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u16 \t$dst, $src;"), []>;
+ ToType, ".u16 \t$dst, $src;"), []>,
+ Requires<Preds>;
def _s32 :
NVPTXInst<(outs RC:$dst),
(ins Int32Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s32 \t$dst, $src;"), []>;
+ ToType, ".s32 \t$dst, $src;"), []>,
+ Requires<Preds>;
def _u32 :
NVPTXInst<(outs RC:$dst),
(ins Int32Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u32 \t$dst, $src;"), []>;
+ ToType, ".u32 \t$dst, $src;"), []>,
+ Requires<Preds>;
def _s64 :
NVPTXInst<(outs RC:$dst),
(ins Int64Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".s64 \t$dst, $src;"), []>;
+ ToType, ".s64 \t$dst, $src;"), []>,
+ Requires<Preds>;
def _u64 :
NVPTXInst<(outs RC:$dst),
(ins Int64Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".u64 \t$dst, $src;"), []>;
+ ToType, ".u64 \t$dst, $src;"), []>,
+ Requires<Preds>;
def _f16 :
NVPTXInst<(outs RC:$dst),
- (ins Float16Regs:$src, CvtMode:$mode),
+ (ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".f16 \t$dst, $src;"), []>;
+ ToType, ".f16 \t$dst, $src;"), []>,
+ Requires<Preds>;
+ def _bf16 :
+ NVPTXInst<(outs RC:$dst),
+ (ins Int16Regs:$src, CvtMode:$mode),
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:relu}${mode:sat}.",
+ ToType, ".bf16 \t$dst, $src;"), []>,
+ Requires<!if(!eq(ToType, "f32"),
+ // bf16->f32 was introduced early.
+ [hasPTX<71>, hasSM<80>],
+ // bf16->everything else needs sm90/ptx78
+ [hasPTX<78>, hasSM<90>])>;
def _f32 :
NVPTXInst<(outs RC:$dst),
(ins Float32Regs:$src, CvtMode:$mode),
- !strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".f32 \t$dst, $src;"), []>;
+ !strconcat("cvt${mode:base}${mode:ftz}${mode:relu}${mode:sat}.",
+ ToType, ".f32 \t$dst, $src;"), []>,
+ Requires<!if(!eq(ToType, "bf16"),
+ // f32->bf16 was introduced early.
+ [hasPTX<70>, hasSM<80>],
+ Preds)>;
def _f64 :
NVPTXInst<(outs RC:$dst),
(ins Float64Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
- FromName, ".f64 \t$dst, $src;"), []>;
+ ToType, ".f64 \t$dst, $src;"), []>,
+ Requires<Preds>;
}
// Generate cvts from all types to all types.
@@ -555,7 +637,8 @@ let hasSideEffects = false in {
defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
- defm CVT_f16 : CVT_FROM_ALL<"f16", Float16Regs>;
+ defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
+ defm CVT_bf16 : CVT_FROM_ALL<"bf16", Int16Regs, [hasPTX<78>, hasSM<90>]>;
defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
@@ -574,27 +657,16 @@ let hasSideEffects = false in {
def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
"cvt.s64.s32 \t$dst, $src;", []>;
-multiclass CVT_FROM_FLOAT_SM80<string FromName, RegisterClass RC> {
- def _f32 :
- NVPTXInst<(outs RC:$dst),
- (ins Float32Regs:$src, CvtMode:$mode),
- !strconcat("cvt${mode:base}${mode:relu}.",
- FromName, ".f32 \t$dst, $src;"), []>,
- Requires<[hasPTX70, hasSM80]>;
- }
-
- defm CVT_bf16 : CVT_FROM_FLOAT_SM80<"bf16", Int16Regs>;
-
- multiclass CVT_FROM_FLOAT_V2_SM80<string FromName, RegisterClass RC> {
+ multiclass CVT_FROM_FLOAT_V2_SM80<string FromName, RegisterClass RC> {
def _f32 :
NVPTXInst<(outs RC:$dst),
(ins Float32Regs:$src1, Float32Regs:$src2, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:relu}.",
FromName, ".f32 \t$dst, $src1, $src2;"), []>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
}
- defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", Float16x2Regs>;
+ defm CVT_f16x2 : CVT_FROM_FLOAT_V2_SM80<"f16x2", Int32Regs>;
defm CVT_bf16x2 : CVT_FROM_FLOAT_V2_SM80<"bf16x2", Int32Regs>;
}
@@ -658,21 +730,22 @@ defm SELP_u32 : SELP<"u32", Int32Regs, i32imm>;
defm SELP_b64 : SELP_PATTERN<"b64", i64, Int64Regs, i64imm, imm>;
defm SELP_s64 : SELP<"s64", Int64Regs, i64imm>;
defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
-defm SELP_f16 : SELP_PATTERN<"b16", f16, Float16Regs, f16imm, fpimm>;
+defm SELP_f16 : SELP_PATTERN<"b16", f16, Int16Regs, f16imm, fpimm>;
+defm SELP_bf16 : SELP_PATTERN<"b16", bf16, Int16Regs, bf16imm, fpimm>;
defm SELP_f32 : SELP_PATTERN<"f32", f32, Float32Regs, f32imm, fpimm>;
defm SELP_f64 : SELP_PATTERN<"f64", f64, Float64Regs, f64imm, fpimm>;
// This does not work as tablegen fails to infer the type of 'imm'.
// def v2f16imm : Operand<v2f16>;
-// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Float16x2Regs, v2f16imm, imm>;
+// defm SELP_f16x2 : SELP_PATTERN<"b32", v2f16, Int32Regs, v2f16imm, imm>;
def SELP_f16x2rr :
- NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16x2Regs:$a, Float16x2Regs:$b, Int1Regs:$p),
+ NVPTXInst<(outs Int32Regs:$dst),
+ (ins Int32Regs:$a, Int32Regs:$b, Int1Regs:$p),
"selp.b32 \t$dst, $a, $b, $p;",
- [(set Float16x2Regs:$dst,
- (select Int1Regs:$p, (v2f16 Float16x2Regs:$a), (v2f16 Float16x2Regs:$b)))]>;
+ [(set Int32Regs:$dst,
+ (select Int1Regs:$p, (v2f16 Int32Regs:$a), (v2f16 Int32Regs:$b)))]>;
//-----------------------------------
// Test Instructions
@@ -801,26 +874,26 @@ def mul_wide_signed : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;
def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;
// Matchers for signed, unsigned mul.wide ISD nodes.
-def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)),
- (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
+def : Pat<(i32 (mul_wide_signed i16:$a, i16:$b)),
+ (MULWIDES32 i16:$a, i16:$b)>,
Requires<[doMulWide]>;
def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),
(MULWIDES32Imm Int16Regs:$a, imm:$b)>,
Requires<[doMulWide]>;
-def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)),
+def : Pat<(i32 (mul_wide_unsigned i16:$a, i16:$b)),
(MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),
(MULWIDEU32Imm Int16Regs:$a, imm:$b)>,
Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)),
+def : Pat<(i64 (mul_wide_signed i32:$a, i32:$b)),
(MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),
(MULWIDES64Imm Int32Regs:$a, imm:$b)>,
Requires<[doMulWide]>;
-def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)),
+def : Pat<(i64 (mul_wide_unsigned i32:$a, i32:$b)),
(MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),
@@ -1021,9 +1094,11 @@ def DoubleConst1 : PatLeaf<(fpimm), [{
// fp16 immediate values in .f16 instructions. Instead we have to load
// the constant into a register using mov.b16.
def LOAD_CONST_F16 :
- NVPTXInst<(outs Float16Regs:$dst), (ins f16imm:$a),
+ NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$a),
+ "mov.b16 \t$dst, $a;", []>;
+def LOAD_CONST_BF16 :
+ NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$a),
"mov.b16 \t$dst, $a;", []>;
-
defm FADD : F3_fma_component<"add", fadd>;
defm FSUB : F3_fma_component<"sub", fsub>;
defm FMUL : F3_fma_component<"mul", fmul>;
@@ -1045,11 +1120,25 @@ class FNEG_F16_F16X2<string OpcStr, ValueType T, RegisterClass RC, Predicate Pre
NVPTXInst<(outs RC:$dst), (ins RC:$src),
!strconcat(OpcStr, " \t$dst, $src;"),
[(set RC:$dst, (fneg (T RC:$src)))]>,
- Requires<[useFP16Math, hasPTX60, hasSM53, Pred]>;
-def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Float16Regs, doF32FTZ>;
-def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Float16Regs, True>;
-def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, Float16x2Regs, doF32FTZ>;
-def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, Float16x2Regs, True>;
+ Requires<[useFP16Math, hasPTX<60>, hasSM<53>, Pred]>;
+def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, Int16Regs, doF32FTZ>;
+def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, Int16Regs, True>;
+def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>;
+def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, Int32Regs, True>;
+
+//
+// BF16 NEG
+//
+
+class FNEG_BF16_F16X2<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> :
+ NVPTXInst<(outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcStr, " \t$dst, $src;"),
+ [(set RC:$dst, (fneg (T RC:$src)))]>,
+ Requires<[hasBF16Math, hasPTX<70>, hasSM<80>, Pred]>;
+def BFNEG16_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, Int16Regs, doF32FTZ>;
+def BFNEG16 : FNEG_BF16_F16X2<"neg.bf16", bf16, Int16Regs, True>;
+def BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, Int32Regs, doF32FTZ>;
+def BFNEG16x2 : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, Int32Regs, True>;
//
// F64 division
@@ -1229,13 +1318,24 @@ multiclass FMA_F16<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred>
Requires<[useFP16Math, Pred]>;
}
-defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", f16, Float16Regs, doF32FTZ>;
-defm FMA16 : FMA_F16<"fma.rn.f16", f16, Float16Regs, True>;
-defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", v2f16, Float16x2Regs, doF32FTZ>;
-defm FMA16x2 : FMA_F16<"fma.rn.f16x2", v2f16, Float16x2Regs, True>;
-defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>;
-defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, True>;
-defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>;
+multiclass FMA_BF16<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> {
+ def rrr : NVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c),
+ !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
+ [(set RC:$dst, (fma (T RC:$a), (T RC:$b), (T RC:$c)))]>,
+ Requires<[hasBF16Math, Pred]>;
+}
+
+defm FMA16_ftz : FMA_F16<"fma.rn.ftz.f16", f16, Int16Regs, doF32FTZ>;
+defm FMA16 : FMA_F16<"fma.rn.f16", f16, Int16Regs, True>;
+defm FMA16x2_ftz : FMA_F16<"fma.rn.ftz.f16x2", v2f16, Int32Regs, doF32FTZ>;
+defm FMA16x2 : FMA_F16<"fma.rn.f16x2", v2f16, Int32Regs, True>;
+defm BFMA16_ftz : FMA_BF16<"fma.rn.ftz.bf16", bf16, Int16Regs, doF32FTZ>;
+defm BFMA16 : FMA_BF16<"fma.rn.bf16", bf16, Int16Regs, True>;
+defm BFMA16x2_ftz : FMA_BF16<"fma.rn.ftz.bf16x2", v2bf16, Int32Regs, doF32FTZ>;
+defm BFMA16x2 : FMA_BF16<"fma.rn.bf16x2", v2bf16, Int32Regs, True>;
+defm FMA32_ftz : FMA<"fma.rn.ftz.f32", Float32Regs, f32imm, doF32FTZ>;
+defm FMA32 : FMA<"fma.rn.f32", Float32Regs, f32imm, True>;
+defm FMA64 : FMA<"fma.rn.f64", Float64Regs, f64imm, True>;
// sin/cos
def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
@@ -1669,16 +1769,28 @@ defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>;
defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>;
def SETP_f16rr :
NVPTXInst<(outs Int1Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b, CmpMode:$cmp),
+ (ins Int16Regs:$a, Int16Regs:$b, CmpMode:$cmp),
"setp${cmp:base}${cmp:ftz}.f16 \t$dst, $a, $b;",
[]>, Requires<[useFP16Math]>;
def SETP_f16x2rr :
NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q),
- (ins Float16x2Regs:$a, Float16x2Regs:$b, CmpMode:$cmp),
+ (ins Int32Regs:$a, Int32Regs:$b, CmpMode:$cmp),
"setp${cmp:base}${cmp:ftz}.f16x2 \t$p|$q, $a, $b;",
[]>,
Requires<[useFP16Math]>;
+def SETP_bf16rr :
+ NVPTXInst<(outs Int1Regs:$dst),
+ (ins Int16Regs:$a, Int16Regs:$b, CmpMode:$cmp),
+ "setp${cmp:base}${cmp:ftz}.bf16 \t$dst, $a, $b;",
+ []>, Requires<[hasBF16Math]>;
+
+def SETP_bf16x2rr :
+ NVPTXInst<(outs Int1Regs:$p, Int1Regs:$q),
+ (ins Int32Regs:$a, Int32Regs:$b, CmpMode:$cmp),
+ "setp${cmp:base}${cmp:ftz}.bf16x2 \t$p|$q, $a, $b;",
+ []>,
+ Requires<[hasBF16Math]>;
// FIXME: This doesn't appear to be correct. The "set" mnemonic has the form
@@ -1708,7 +1820,8 @@ defm SET_u32 : SET<"u32", Int32Regs, i32imm>;
defm SET_b64 : SET<"b64", Int64Regs, i64imm>;
defm SET_s64 : SET<"s64", Int64Regs, i64imm>;
defm SET_u64 : SET<"u64", Int64Regs, i64imm>;
-defm SET_f16 : SET<"f16", Float16Regs, f16imm>;
+defm SET_f16 : SET<"f16", Int16Regs, f16imm>;
+defm SET_bf16 : SET<"bf16", Int16Regs, bf16imm>;
defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
@@ -1778,7 +1891,14 @@ let IsSimpleMove=1, hasSideEffects=0 in {
def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
"mov.u64 \t$dst, $sss;", []>;
- def FMOV16rr : NVPTXInst<(outs Float16Regs:$dst), (ins Float16Regs:$src),
+ def IMOVB16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
+ "mov.b16 \t$dst, $sss;", []>;
+ def IMOVB32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
+ "mov.b32 \t$dst, $sss;", []>;
+ def IMOVB64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
+ "mov.b64 \t$dst, $sss;", []>;
+
+ def FMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
// We have to use .b16 here as there's no mov.f16.
"mov.b16 \t$dst, $src;", []>;
def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
@@ -1800,6 +1920,13 @@ def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
"mov.u64 \t$dst, $src;",
[(set Int64Regs:$dst, imm:$src)]>;
+def IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
+ "mov.b16 \t$dst, $src;", []>;
+def IMOVB32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
+ "mov.b32 \t$dst, $src;", []>;
+def IMOVB64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
+ "mov.b64 \t$dst, $src;", []>;
+
def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
"mov.f32 \t$dst, $src;",
[(set Float32Regs:$dst, fpimm:$src)]>;
@@ -1842,14 +1969,14 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
Instruction set_64ri,
Instruction set_64ir> {
// i16 -> pred
- def : Pat<(i1 (OpNode Int16Regs:$a, Int16Regs:$b)),
+ def : Pat<(i1 (OpNode i16:$a, i16:$b)),
(setp_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
def : Pat<(i1 (OpNode Int16Regs:$a, imm:$b)),
(setp_16ri Int16Regs:$a, imm:$b, Mode)>;
def : Pat<(i1 (OpNode imm:$a, Int16Regs:$b)),
(setp_16ir imm:$a, Int16Regs:$b, Mode)>;
// i32 -> pred
- def : Pat<(i1 (OpNode Int32Regs:$a, Int32Regs:$b)),
+ def : Pat<(i1 (OpNode i32:$a, i32:$b)),
(setp_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
def : Pat<(i1 (OpNode Int32Regs:$a, imm:$b)),
(setp_32ri Int32Regs:$a, imm:$b, Mode)>;
@@ -1864,14 +1991,14 @@ multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
(setp_64ir imm:$a, Int64Regs:$b, Mode)>;
// i16 -> i32
- def : Pat<(i32 (OpNode Int16Regs:$a, Int16Regs:$b)),
+ def : Pat<(i32 (OpNode i16:$a, i16:$b)),
(set_16rr Int16Regs:$a, Int16Regs:$b, Mode)>;
def : Pat<(i32 (OpNode Int16Regs:$a, imm:$b)),
(set_16ri Int16Regs:$a, imm:$b, Mode)>;
def : Pat<(i32 (OpNode imm:$a, Int16Regs:$b)),
(set_16ir imm:$a, Int16Regs:$b, Mode)>;
// i32 -> i32
- def : Pat<(i32 (OpNode Int32Regs:$a, Int32Regs:$b)),
+ def : Pat<(i32 (OpNode i32:$a, i32:$b)),
(set_32rr Int32Regs:$a, Int32Regs:$b, Mode)>;
def : Pat<(i32 (OpNode Int32Regs:$a, imm:$b)),
(set_32ri Int32Regs:$a, imm:$b, Mode)>;
@@ -1944,25 +2071,45 @@ def : Pat<(i32 (setne Int1Regs:$a, Int1Regs:$b)),
multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
// f16 -> pred
- def : Pat<(i1 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))),
- (SETP_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>,
+ def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
+ (SETP_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
Requires<[useFP16Math,doF32FTZ]>;
- def : Pat<(i1 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))),
- (SETP_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>,
+ def : Pat<(i1 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
+ (SETP_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
Requires<[useFP16Math]>;
- def : Pat<(i1 (OpNode (f16 Float16Regs:$a), fpimm:$b)),
- (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
+ def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)),
+ (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
Requires<[useFP16Math,doF32FTZ]>;
- def : Pat<(i1 (OpNode (f16 Float16Regs:$a), fpimm:$b)),
- (SETP_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
+ def : Pat<(i1 (OpNode (f16 Int16Regs:$a), fpimm:$b)),
+ (SETP_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
Requires<[useFP16Math]>;
- def : Pat<(i1 (OpNode fpimm:$a, (f16 Float16Regs:$b))),
- (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>,
+ def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))),
+ (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
Requires<[useFP16Math,doF32FTZ]>;
- def : Pat<(i1 (OpNode fpimm:$a, (f16 Float16Regs:$b))),
- (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>,
+ def : Pat<(i1 (OpNode fpimm:$a, (f16 Int16Regs:$b))),
+ (SETP_f16rr (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
Requires<[useFP16Math]>;
+ // bf16 -> pred
+ def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))),
+ (SETP_bf16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
+ Requires<[hasBF16Math,doF32FTZ]>;
+ def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))),
+ (SETP_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
+ Requires<[hasBF16Math]>;
+ def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), fpimm:$b)),
+ (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>,
+ Requires<[hasBF16Math,doF32FTZ]>;
+ def : Pat<(i1 (OpNode (bf16 Int16Regs:$a), fpimm:$b)),
+ (SETP_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>,
+ Requires<[hasBF16Math]>;
+ def : Pat<(i1 (OpNode fpimm:$a, (bf16 Int16Regs:$b))),
+ (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
+ Requires<[hasBF16Math,doF32FTZ]>;
+ def : Pat<(i1 (OpNode fpimm:$a, (bf16 Int16Regs:$b))),
+ (SETP_bf16rr (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>,
+ Requires<[hasBF16Math]>;
+
// f32 -> pred
def : Pat<(i1 (OpNode Float32Regs:$a, Float32Regs:$b)),
(SETP_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
@@ -1989,25 +2136,45 @@ multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
(SETP_f64ir fpimm:$a, Float64Regs:$b, Mode)>;
// f16 -> i32
- def : Pat<(i32 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))),
- (SET_f16rr Float16Regs:$a, Float16Regs:$b, ModeFTZ)>,
+ def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
+ (SET_f16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
Requires<[useFP16Math, doF32FTZ]>;
- def : Pat<(i32 (OpNode (f16 Float16Regs:$a), (f16 Float16Regs:$b))),
- (SET_f16rr Float16Regs:$a, Float16Regs:$b, Mode)>,
+ def : Pat<(i32 (OpNode (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
+ (SET_f16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
Requires<[useFP16Math]>;
- def : Pat<(i32 (OpNode (f16 Float16Regs:$a), fpimm:$b)),
- (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
+ def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)),
+ (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), ModeFTZ)>,
Requires<[useFP16Math, doF32FTZ]>;
- def : Pat<(i32 (OpNode (f16 Float16Regs:$a), fpimm:$b)),
- (SET_f16rr Float16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
+ def : Pat<(i32 (OpNode (f16 Int16Regs:$a), fpimm:$b)),
+ (SET_f16rr Int16Regs:$a, (LOAD_CONST_F16 fpimm:$b), Mode)>,
Requires<[useFP16Math]>;
- def : Pat<(i32 (OpNode fpimm:$a, (f16 Float16Regs:$b))),
- (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, ModeFTZ)>,
+ def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))),
+ (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
Requires<[useFP16Math, doF32FTZ]>;
- def : Pat<(i32 (OpNode fpimm:$a, (f16 Float16Regs:$b))),
- (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Float16Regs:$b, Mode)>,
+ def : Pat<(i32 (OpNode fpimm:$a, (f16 Int16Regs:$b))),
+ (SET_f16ir (LOAD_CONST_F16 fpimm:$a), Int16Regs:$b, Mode)>,
Requires<[useFP16Math]>;
+ // bf16 -> i32
+ def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))),
+ (SET_bf16rr Int16Regs:$a, Int16Regs:$b, ModeFTZ)>,
+ Requires<[hasBF16Math, doF32FTZ]>;
+ def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))),
+ (SET_bf16rr Int16Regs:$a, Int16Regs:$b, Mode)>,
+ Requires<[hasBF16Math]>;
+ def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), fpimm:$b)),
+ (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), ModeFTZ)>,
+ Requires<[hasBF16Math, doF32FTZ]>;
+ def : Pat<(i32 (OpNode (bf16 Int16Regs:$a), fpimm:$b)),
+ (SET_bf16rr Int16Regs:$a, (LOAD_CONST_BF16 fpimm:$b), Mode)>,
+ Requires<[hasBF16Math]>;
+ def : Pat<(i32 (OpNode fpimm:$a, (bf16 Int16Regs:$b))),
+ (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, ModeFTZ)>,
+ Requires<[hasBF16Math, doF32FTZ]>;
+ def : Pat<(i32 (OpNode fpimm:$a, (bf16 Int16Regs:$b))),
+ (SET_bf16ir (LOAD_CONST_BF16 fpimm:$a), Int16Regs:$b, Mode)>,
+ Requires<[hasBF16Math]>;
+
// f32 -> i32
def : Pat<(i32 (OpNode Float32Regs:$a, Float32Regs:$b)),
(SET_f32rr Float32Regs:$a, Float32Regs:$b, ModeFTZ)>,
@@ -2294,16 +2461,10 @@ def LoadParamMemV2I8 : LoadParamV2MemInst<Int16Regs, ".b8">;
def LoadParamMemV4I32 : LoadParamV4MemInst<Int32Regs, ".b32">;
def LoadParamMemV4I16 : LoadParamV4MemInst<Int16Regs, ".b16">;
def LoadParamMemV4I8 : LoadParamV4MemInst<Int16Regs, ".b8">;
-def LoadParamMemF16 : LoadParamMemInst<Float16Regs, ".b16">;
-def LoadParamMemF16x2 : LoadParamMemInst<Float16x2Regs, ".b32">;
def LoadParamMemF32 : LoadParamMemInst<Float32Regs, ".f32">;
def LoadParamMemF64 : LoadParamMemInst<Float64Regs, ".f64">;
-def LoadParamMemV2F16 : LoadParamV2MemInst<Float16Regs, ".b16">;
-def LoadParamMemV2F16x2: LoadParamV2MemInst<Float16x2Regs, ".b32">;
def LoadParamMemV2F32 : LoadParamV2MemInst<Float32Regs, ".f32">;
def LoadParamMemV2F64 : LoadParamV2MemInst<Float64Regs, ".f64">;
-def LoadParamMemV4F16 : LoadParamV4MemInst<Float16Regs, ".b16">;
-def LoadParamMemV4F16x2: LoadParamV4MemInst<Float16x2Regs, ".b32">;
def LoadParamMemV4F32 : LoadParamV4MemInst<Float32Regs, ".f32">;
def StoreParamI64 : StoreParamInst<Int64Regs, ".b64">;
@@ -2320,16 +2481,10 @@ def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">;
def StoreParamV4I16 : StoreParamV4Inst<Int16Regs, ".b16">;
def StoreParamV4I8 : StoreParamV4Inst<Int16Regs, ".b8">;
-def StoreParamF16 : StoreParamInst<Float16Regs, ".b16">;
-def StoreParamF16x2 : StoreParamInst<Float16x2Regs, ".b32">;
def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
-def StoreParamV2F16 : StoreParamV2Inst<Float16Regs, ".b16">;
-def StoreParamV2F16x2 : StoreParamV2Inst<Float16x2Regs, ".b32">;
def StoreParamV2F32 : StoreParamV2Inst<Float32Regs, ".f32">;
def StoreParamV2F64 : StoreParamV2Inst<Float64Regs, ".f64">;
-def StoreParamV4F16 : StoreParamV4Inst<Float16Regs, ".b16">;
-def StoreParamV4F16x2 : StoreParamV4Inst<Float16x2Regs, ".b32">;
def StoreParamV4F32 : StoreParamV4Inst<Float32Regs, ".f32">;
def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
@@ -2346,15 +2501,9 @@ def StoreRetvalV4I8 : StoreRetvalV4Inst<Int16Regs, ".b8">;
def StoreRetvalF64 : StoreRetvalInst<Float64Regs, ".f64">;
def StoreRetvalF32 : StoreRetvalInst<Float32Regs, ".f32">;
-def StoreRetvalF16 : StoreRetvalInst<Float16Regs, ".b16">;
-def StoreRetvalF16x2 : StoreRetvalInst<Float16x2Regs, ".b32">;
def StoreRetvalV2F64 : StoreRetvalV2Inst<Float64Regs, ".f64">;
def StoreRetvalV2F32 : StoreRetvalV2Inst<Float32Regs, ".f32">;
-def StoreRetvalV2F16 : StoreRetvalV2Inst<Float16Regs, ".b16">;
-def StoreRetvalV2F16x2: StoreRetvalV2Inst<Float16x2Regs, ".b32">;
def StoreRetvalV4F32 : StoreRetvalV4Inst<Float32Regs, ".f32">;
-def StoreRetvalV4F16 : StoreRetvalV4Inst<Float16Regs, ".b16">;
-def StoreRetvalV4F16x2: StoreRetvalV4Inst<Float16x2Regs, ".b32">;
def CallArgBeginInst : NVPTXInst<(outs), (ins), "(", [(CallArgBegin)]>;
def CallArgEndInst1 : NVPTXInst<(outs), (ins), ");", [(CallArgEnd (i32 1))]>;
@@ -2365,19 +2514,26 @@ class CallArgInst<NVPTXRegClass regclass> :
NVPTXInst<(outs), (ins regclass:$a), "$a, ",
[(CallArg (i32 0), regclass:$a)]>;
+class CallArgInstVT<NVPTXRegClass regclass, ValueType vt> :
+ NVPTXInst<(outs), (ins regclass:$a), "$a, ",
+ [(CallArg (i32 0), vt:$a)]>;
+
class LastCallArgInst<NVPTXRegClass regclass> :
NVPTXInst<(outs), (ins regclass:$a), "$a",
[(LastCallArg (i32 0), regclass:$a)]>;
+class LastCallArgInstVT<NVPTXRegClass regclass, ValueType vt> :
+ NVPTXInst<(outs), (ins regclass:$a), "$a",
+ [(LastCallArg (i32 0), vt:$a)]>;
def CallArgI64 : CallArgInst<Int64Regs>;
-def CallArgI32 : CallArgInst<Int32Regs>;
-def CallArgI16 : CallArgInst<Int16Regs>;
+def CallArgI32 : CallArgInstVT<Int32Regs, i32>;
+def CallArgI16 : CallArgInstVT<Int16Regs, i16>;
def CallArgF64 : CallArgInst<Float64Regs>;
def CallArgF32 : CallArgInst<Float32Regs>;
def LastCallArgI64 : LastCallArgInst<Int64Regs>;
-def LastCallArgI32 : LastCallArgInst<Int32Regs>;
-def LastCallArgI16 : LastCallArgInst<Int16Regs>;
+def LastCallArgI32 : LastCallArgInstVT<Int32Regs, i32>;
+def LastCallArgI16 : LastCallArgInstVT<Int16Regs, i16>;
def LastCallArgF64 : LastCallArgInst<Float64Regs>;
def LastCallArgF32 : LastCallArgInst<Float32Regs>;
@@ -2394,7 +2550,7 @@ def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ",
[(CallVoid (Wrapper tglobaladdr:$addr))]>;
def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ",
- [(CallVoid Int32Regs:$addr)]>;
+ [(CallVoid i32:$addr)]>;
def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ",
[(CallVoid Int64Regs:$addr)]>;
def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;",
@@ -2431,53 +2587,54 @@ class MoveParamInst<ValueType T, NVPTXRegClass regclass, string asmstr> :
!strconcat("mov", asmstr, " \t$dst, $src;"),
[(set (T regclass:$dst), (MoveParam (T regclass:$src)))]>;
-class MoveParamSymbolInst<NVPTXRegClass regclass, Operand srcty,
+class MoveParamSymbolInst<NVPTXRegClass regclass, Operand srcty, ValueType vt,
string asmstr> :
NVPTXInst<(outs regclass:$dst), (ins srcty:$src),
!strconcat("mov", asmstr, " \t$dst, $src;"),
- [(set regclass:$dst, (MoveParam texternalsym:$src))]>;
+ [(set vt:$dst, (MoveParam texternalsym:$src))]>;
def MoveParamI64 : MoveParamInst<i64, Int64Regs, ".b64">;
def MoveParamI32 : MoveParamInst<i32, Int32Regs, ".b32">;
-def MoveParamSymbolI64 : MoveParamSymbolInst<Int64Regs, i64imm, ".b64">;
-def MoveParamSymbolI32 : MoveParamSymbolInst<Int32Regs, i32imm, ".b32">;
+def MoveParamSymbolI64 : MoveParamSymbolInst<Int64Regs, i64imm, i64, ".b64">;
+def MoveParamSymbolI32 : MoveParamSymbolInst<Int32Regs, i32imm, i32, ".b32">;
def MoveParamI16 :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
- "cvt.u16.u32 \t$dst, $src;",
- [(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
+ "cvt.u16.u32 \t$dst, $src;", // ??? Why cvt.u16.u32 ?
+ [(set i16:$dst, (MoveParam i16:$src))]>;
def MoveParamF64 : MoveParamInst<f64, Float64Regs, ".f64">;
def MoveParamF32 : MoveParamInst<f32, Float32Regs, ".f32">;
-def MoveParamF16 : MoveParamInst<f16, Float16Regs, ".f16">;
-class PseudoUseParamInst<NVPTXRegClass regclass> :
+class PseudoUseParamInst<NVPTXRegClass regclass, ValueType vt> :
NVPTXInst<(outs), (ins regclass:$src),
"// Pseudo use of $src",
- [(PseudoUseParam regclass:$src)]>;
+ [(PseudoUseParam vt:$src)]>;
-def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
-def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
-def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs>;
-def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs>;
-def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
+def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs, i64>;
+def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs, i32>;
+def PseudoUseParamI16 : PseudoUseParamInst<Int16Regs, i16>;
+def PseudoUseParamF64 : PseudoUseParamInst<Float64Regs, f64>;
+def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs, f32>;
class ProxyRegInst<string SzStr, ValueType T, NVPTXRegClass regclass> :
NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
!strconcat("mov.", SzStr, " \t$dst, $src;"),
[(set (T regclass:$dst), (ProxyReg (T regclass:$src)))]>;
-let isCodeGenOnly=1, isPseudo=1 in {
- def ProxyRegI1 : ProxyRegInst<"pred", i1, Int1Regs>;
- def ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>;
- def ProxyRegI32 : ProxyRegInst<"b32", i32, Int32Regs>;
- def ProxyRegI64 : ProxyRegInst<"b64", i64, Int64Regs>;
- def ProxyRegF16 : ProxyRegInst<"b16", f16, Float16Regs>;
- def ProxyRegBF16 : ProxyRegInst<"b16", bf16, Float16Regs>;
- def ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>;
- def ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>;
- def ProxyRegF16x2 : ProxyRegInst<"b32", v2f16, Float16x2Regs>;
- def ProxyRegBF16x2 : ProxyRegInst<"b32", v2bf16, Float16x2Regs>;
+def ProxyRegI1 : ProxyRegInst<"pred", i1, Int1Regs>;
+def ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>;
+def ProxyRegI32 : ProxyRegInst<"b32", i32, Int32Regs>;
+def ProxyRegI64 : ProxyRegInst<"b64", i64, Int64Regs>;
+def ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>;
+def ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>;
+
+foreach vt = [f16, bf16] in {
+ def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI16 Int16Regs:$src)>;
+}
+
+foreach vt = [v2f16, v2bf16] in {
+ def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI32 Int32Regs:$src)>;
}
//
@@ -2527,8 +2684,6 @@ let mayLoad=1, hasSideEffects=0 in {
defm LD_i16 : LD<Int16Regs>;
defm LD_i32 : LD<Int32Regs>;
defm LD_i64 : LD<Int64Regs>;
- defm LD_f16 : LD<Float16Regs>;
- defm LD_f16x2 : LD<Float16x2Regs>;
defm LD_f32 : LD<Float32Regs>;
defm LD_f64 : LD<Float64Regs>;
}
@@ -2577,8 +2732,6 @@ let mayStore=1, hasSideEffects=0 in {
defm ST_i16 : ST<Int16Regs>;
defm ST_i32 : ST<Int32Regs>;
defm ST_i64 : ST<Int64Regs>;
- defm ST_f16 : ST<Float16Regs>;
- defm ST_f16x2 : ST<Float16x2Regs>;
defm ST_f32 : ST<Float32Regs>;
defm ST_f64 : ST<Float64Regs>;
}
@@ -2665,8 +2818,6 @@ let mayLoad=1, hasSideEffects=0 in {
defm LDV_i16 : LD_VEC<Int16Regs>;
defm LDV_i32 : LD_VEC<Int32Regs>;
defm LDV_i64 : LD_VEC<Int64Regs>;
- defm LDV_f16 : LD_VEC<Float16Regs>;
- defm LDV_f16x2 : LD_VEC<Float16x2Regs>;
defm LDV_f32 : LD_VEC<Float32Regs>;
defm LDV_f64 : LD_VEC<Float64Regs>;
}
@@ -2760,8 +2911,6 @@ let mayStore=1, hasSideEffects=0 in {
defm STV_i16 : ST_VEC<Int16Regs>;
defm STV_i32 : ST_VEC<Int32Regs>;
defm STV_i64 : ST_VEC<Int64Regs>;
- defm STV_f16 : ST_VEC<Float16Regs>;
- defm STV_f16x2 : ST_VEC<Float16x2Regs>;
defm STV_f32 : ST_VEC<Float32Regs>;
defm STV_f64 : ST_VEC<Float64Regs>;
}
@@ -2775,22 +2924,29 @@ class F_BITCONVERT<string SzStr, ValueType TIn, ValueType TOut,
!strconcat("mov.b", SzStr, " \t$d, $a;"),
[(set (TOut regclassOut:$d), (bitconvert (TIn regclassIn:$a)))]>;
-def BITCONVERT_16_I2F : F_BITCONVERT<"16", i16, f16>;
-def BITCONVERT_16_F2I : F_BITCONVERT<"16", f16, i16>;
-def BITCONVERT_16_I2BF : F_BITCONVERT<"16", i16, bf16>;
-def BITCONVERT_16_BF2I : F_BITCONVERT<"16", bf16, i16>;
def BITCONVERT_32_I2F : F_BITCONVERT<"32", i32, f32>;
def BITCONVERT_32_F2I : F_BITCONVERT<"32", f32, i32>;
def BITCONVERT_64_I2F : F_BITCONVERT<"64", i64, f64>;
def BITCONVERT_64_F2I : F_BITCONVERT<"64", f64, i64>;
-def BITCONVERT_32_I2F16x2 : F_BITCONVERT<"32", i32, v2f16>;
-def BITCONVERT_32_F16x22I : F_BITCONVERT<"32", v2f16, i32>;
-def BITCONVERT_32_F2F16x2 : F_BITCONVERT<"32", f32, v2f16>;
-def BITCONVERT_32_F16x22F : F_BITCONVERT<"32", v2f16, f32>;
-def BITCONVERT_32_I2BF16x2 : F_BITCONVERT<"32", i32, v2bf16>;
-def BITCONVERT_32_BF16x22I : F_BITCONVERT<"32", v2bf16, i32>;
-def BITCONVERT_32_F2BF16x2 : F_BITCONVERT<"32", f32, v2bf16>;
-def BITCONVERT_32_BF16x22F : F_BITCONVERT<"32", v2bf16, f32>;
+
+foreach vt = [v2f16, v2bf16] in {
+def: Pat<(vt (bitconvert (i32 UInt32Const:$a))),
+ (IMOVB32ri UInt32Const:$a)>;
+def: Pat<(vt (bitconvert (i32 Int32Regs:$a))),
+ (ProxyRegI32 Int32Regs:$a)>;
+def: Pat<(i32 (bitconvert (vt Int32Regs:$a))),
+ (ProxyRegI32 Int32Regs:$a)>;
+def: Pat<(vt (bitconvert (f32 Float32Regs:$a))),
+ (BITCONVERT_32_F2I Float32Regs:$a)>;
+}
+foreach vt = [f16, bf16] in {
+def: Pat<(vt (bitconvert (i16 UInt16Const:$a))),
+ (IMOVB16ri UInt16Const:$a)>;
+def: Pat<(vt (bitconvert (i16 Int16Regs:$a))),
+ (ProxyRegI16 Int16Regs:$a)>;
+def: Pat<(i16 (bitconvert (vt Int16Regs:$a))),
+ (ProxyRegI16 Int16Regs:$a)>;
+}
// NOTE: pred->fp are currently sub-optimal due to an issue in TableGen where
// we cannot specify floating-point literals in isel patterns. Therefore, we
@@ -2816,6 +2972,26 @@ def : Pat<(f16 (uint_to_fp Int32Regs:$a)),
def : Pat<(f16 (uint_to_fp Int64Regs:$a)),
(CVT_f16_u64 Int64Regs:$a, CvtRN)>;
+// sint -> bf16
+def : Pat<(bf16 (sint_to_fp Int1Regs:$a)),
+ (CVT_bf16_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
+def : Pat<(bf16 (sint_to_fp Int16Regs:$a)),
+ (CVT_bf16_s16 Int16Regs:$a, CvtRN)>;
+def : Pat<(bf16 (sint_to_fp Int32Regs:$a)),
+ (CVT_bf16_s32 Int32Regs:$a, CvtRN)>;
+def : Pat<(bf16 (sint_to_fp Int64Regs:$a)),
+ (CVT_bf16_s64 Int64Regs:$a, CvtRN)>;
+
+// uint -> bf16
+def : Pat<(bf16 (uint_to_fp Int1Regs:$a)),
+ (CVT_bf16_u32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
+def : Pat<(bf16 (uint_to_fp Int16Regs:$a)),
+ (CVT_bf16_u16 Int16Regs:$a, CvtRN)>;
+def : Pat<(bf16 (uint_to_fp Int32Regs:$a)),
+ (CVT_bf16_u32 Int32Regs:$a, CvtRN)>;
+def : Pat<(bf16 (uint_to_fp Int64Regs:$a)),
+ (CVT_bf16_u64 Int64Regs:$a, CvtRN)>;
+
// sint -> f32
def : Pat<(f32 (sint_to_fp Int1Regs:$a)),
(CVT_f32_s32 (SELP_u32ii 1, 0, Int1Regs:$a), CvtRN)>;
@@ -2858,25 +3034,44 @@ def : Pat<(f64 (uint_to_fp Int64Regs:$a)),
// f16 -> sint
-def : Pat<(i1 (fp_to_sint (f16 Float16Regs:$a))),
- (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
-def : Pat<(i16 (fp_to_sint (f16 Float16Regs:$a))),
- (CVT_s16_f16 (f16 Float16Regs:$a), CvtRZI)>;
-def : Pat<(i32 (fp_to_sint (f16 Float16Regs:$a))),
- (CVT_s32_f16 (f16 Float16Regs:$a), CvtRZI)>;
-def : Pat<(i64 (fp_to_sint (f16 Float16Regs:$a))),
- (CVT_s64_f16 Float16Regs:$a, CvtRZI)>;
+def : Pat<(i1 (fp_to_sint (f16 Int16Regs:$a))),
+ (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
+def : Pat<(i16 (fp_to_sint (f16 Int16Regs:$a))),
+ (CVT_s16_f16 (f16 Int16Regs:$a), CvtRZI)>;
+def : Pat<(i32 (fp_to_sint (f16 Int16Regs:$a))),
+ (CVT_s32_f16 (f16 Int16Regs:$a), CvtRZI)>;
+def : Pat<(i64 (fp_to_sint (f16 Int16Regs:$a))),
+ (CVT_s64_f16 Int16Regs:$a, CvtRZI)>;
// f16 -> uint
-def : Pat<(i1 (fp_to_uint (f16 Float16Regs:$a))),
- (SETP_b16ri (BITCONVERT_16_F2I Float16Regs:$a), 0, CmpEQ)>;
-def : Pat<(i16 (fp_to_uint (f16 Float16Regs:$a))),
- (CVT_u16_f16 Float16Regs:$a, CvtRZI)>;
-def : Pat<(i32 (fp_to_uint (f16 Float16Regs:$a))),
- (CVT_u32_f16 Float16Regs:$a, CvtRZI)>;
-def : Pat<(i64 (fp_to_uint (f16 Float16Regs:$a))),
- (CVT_u64_f16 Float16Regs:$a, CvtRZI)>;
-
+def : Pat<(i1 (fp_to_uint (f16 Int16Regs:$a))),
+ (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
+def : Pat<(i16 (fp_to_uint (f16 Int16Regs:$a))),
+ (CVT_u16_f16 Int16Regs:$a, CvtRZI)>;
+def : Pat<(i32 (fp_to_uint (f16 Int16Regs:$a))),
+ (CVT_u32_f16 Int16Regs:$a, CvtRZI)>;
+def : Pat<(i64 (fp_to_uint (f16 Int16Regs:$a))),
+ (CVT_u64_f16 Int16Regs:$a, CvtRZI)>;
+
+// bf16 -> sint
+def : Pat<(i1 (fp_to_sint (bf16 Int16Regs:$a))),
+ (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
+def : Pat<(i16 (fp_to_sint (bf16 Int16Regs:$a))),
+ (CVT_s16_bf16 (bf16 Int16Regs:$a), CvtRZI)>;
+def : Pat<(i32 (fp_to_sint (bf16 Int16Regs:$a))),
+ (CVT_s32_bf16 (bf16 Int16Regs:$a), CvtRZI)>;
+def : Pat<(i64 (fp_to_sint (bf16 Int16Regs:$a))),
+ (CVT_s64_bf16 Int16Regs:$a, CvtRZI)>;
+
+// bf16 -> uint
+def : Pat<(i1 (fp_to_uint (bf16 Int16Regs:$a))),
+ (SETP_b16ri Int16Regs:$a, 0, CmpEQ)>;
+def : Pat<(i16 (fp_to_uint (bf16 Int16Regs:$a))),
+ (CVT_u16_bf16 Int16Regs:$a, CvtRZI)>;
+def : Pat<(i32 (fp_to_uint (bf16 Int16Regs:$a))),
+ (CVT_u32_bf16 Int16Regs:$a, CvtRZI)>;
+def : Pat<(i64 (fp_to_uint (bf16 Int16Regs:$a))),
+ (CVT_u64_bf16 Int16Regs:$a, CvtRZI)>;
// f32 -> sint
def : Pat<(i1 (fp_to_sint Float32Regs:$a)),
(SETP_b32ri (BITCONVERT_32_F2I Float32Regs:$a), 0, CmpEQ)>;
@@ -3012,17 +3207,20 @@ def : Pat<(sext_inreg Int64Regs:$a, i32), (CVT_INREG_s64_s32 Int64Regs:$a)>;
// Select instructions with 32-bit predicates
-def : Pat<(select Int32Regs:$pred, Int16Regs:$a, Int16Regs:$b),
+def : Pat<(select Int32Regs:$pred, i16:$a, i16:$b),
(SELP_b16rr Int16Regs:$a, Int16Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, Int32Regs:$a, Int32Regs:$b),
+def : Pat<(select Int32Regs:$pred, i32:$a, i32:$b),
(SELP_b32rr Int32Regs:$a, Int32Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
def : Pat<(select Int32Regs:$pred, Int64Regs:$a, Int64Regs:$b),
(SELP_b64rr Int64Regs:$a, Int64Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
-def : Pat<(select Int32Regs:$pred, (f16 Float16Regs:$a), (f16 Float16Regs:$b)),
- (SELP_f16rr Float16Regs:$a, Float16Regs:$b,
+def : Pat<(select Int32Regs:$pred, (f16 Int16Regs:$a), (f16 Int16Regs:$b)),
+ (SELP_f16rr Int16Regs:$a, Int16Regs:$b,
+ (SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
+def : Pat<(select Int32Regs:$pred, (bf16 Int16Regs:$a), (bf16 Int16Regs:$b)),
+ (SELP_bf16rr Int16Regs:$a, Int16Regs:$b,
(SETP_b32ri (ANDb32ri Int32Regs:$pred, 1), 1, CmpEQ))>;
def : Pat<(select Int32Regs:$pred, Float32Regs:$a, Float32Regs:$b),
(SELP_f32rr Float32Regs:$a, Float32Regs:$b,
@@ -3063,49 +3261,44 @@ let hasSideEffects = false in {
(ins Float64Regs:$s),
"mov.b64 \t{{$d1, $d2}}, $s;", []>;
+ def I32toI16H : NVPTXInst<(outs Int16Regs:$high),
+ (ins Int32Regs:$s),
+ "{{ .reg .b16 tmp; mov.b32 {tmp, $high}, $s; }}",
+ []>;
+ def I32toI16L : NVPTXInst<(outs Int16Regs:$low),
+ (ins Int32Regs:$s),
+ "{{ .reg .b16 tmp; mov.b32 {$low, tmp}, $s; }}",
+ []>;
+ def I64toI32H : NVPTXInst<(outs Int32Regs:$high),
+ (ins Int64Regs:$s),
+ "{{ .reg .b32 tmp; mov.b64 {tmp, $high}, $s; }}",
+ []>;
}
-let hasSideEffects = false in {
- // Extract element of f16x2 register. PTX does not provide any way
- // to access elements of f16x2 vector directly, so we need to
- // extract it using a temporary register.
- def F16x2toF16_0 : NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16x2Regs:$src),
- "{{ .reg .b16 \t%tmp_hi;\n\t"
- " mov.b32 \t{$dst, %tmp_hi}, $src; }}",
- [(set Float16Regs:$dst,
- (extractelt (v2f16 Float16x2Regs:$src), 0))]>;
- def F16x2toF16_1 : NVPTXInst<(outs Float16Regs:$dst),
- (ins Float16x2Regs:$src),
- "{{ .reg .b16 \t%tmp_lo;\n\t"
- " mov.b32 \t{%tmp_lo, $dst}, $src; }}",
- [(set Float16Regs:$dst,
- (extractelt (v2f16 Float16x2Regs:$src), 1))]>;
-
- // Coalesce two f16 registers into f16x2
- def BuildF16x2 : NVPTXInst<(outs Float16x2Regs:$dst),
- (ins Float16Regs:$a, Float16Regs:$b),
- "mov.b32 \t$dst, {{$a, $b}};",
- [(set (v2f16 Float16x2Regs:$dst),
- (build_vector (f16 Float16Regs:$a), (f16 Float16Regs:$b)))]>;
-
- // Directly initializing underlying the b32 register is one less SASS
- // instruction than than vector-packing move.
- def BuildF16x2i : NVPTXInst<(outs Float16x2Regs:$dst), (ins i32imm:$src),
- "mov.b32 \t$dst, $src;",
- []>;
-
- // Split f16x2 into two f16 registers.
- def SplitF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi),
- (ins Float16x2Regs:$src),
- "mov.b32 \t{{$lo, $hi}}, $src;",
- []>;
- // Split an i32 into two f16
- def SplitI32toF16x2 : NVPTXInst<(outs Float16Regs:$lo, Float16Regs:$hi),
- (ins Int32Regs:$src),
- "mov.b32 \t{{$lo, $hi}}, $src;",
- []>;
-}
+// Using partial vectorized move produces better SASS code for extraction of
+// upper/lower parts of an integer.
+def : Pat<(i16 (trunc (srl Int32Regs:$s, (i32 16)))),
+ (I32toI16H Int32Regs:$s)>;
+def : Pat<(i16 (trunc (sra Int32Regs:$s, (i32 16)))),
+ (I32toI16H Int32Regs:$s)>;
+def : Pat<(i32 (trunc (srl Int64Regs:$s, (i32 32)))),
+ (I64toI32H Int64Regs:$s)>;
+def : Pat<(i32 (trunc (sra Int64Regs:$s, (i32 32)))),
+ (I64toI32H Int64Regs:$s)>;
+
+def : Pat<(f16 (extractelt (v2f16 Int32Regs:$src), 0)),
+ (I32toI16L Int32Regs:$src)>;
+def : Pat<(f16 (extractelt (v2f16 Int32Regs:$src), 1)),
+ (I32toI16H Int32Regs:$src)>;
+def : Pat<(v2f16 (build_vector (f16 Int16Regs:$a), (f16 Int16Regs:$b))),
+ (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
+
+def : Pat<(bf16 (extractelt (v2bf16 Int32Regs:$src), 0)),
+ (I32toI16L Int32Regs:$src)>;
+def : Pat<(bf16 (extractelt (v2bf16 Int32Regs:$src), 1)),
+ (I32toI16H Int32Regs:$src)>;
+def : Pat<(v2bf16 (build_vector (bf16 Int16Regs:$a), (bf16 Int16Regs:$b))),
+ (V2I16toI32 Int16Regs:$a, Int16Regs:$b)>;
// Count leading zeros
let hasSideEffects = false in {
@@ -3174,10 +3367,17 @@ def : Pat<(i32 (zext (i16 (ctpop Int16Regs:$a)))),
def : Pat<(f16 (fpround Float32Regs:$a)),
(CVT_f16_f32 Float32Regs:$a, CvtRN)>;
+// fpround f32 -> bf16
+def : Pat<(bf16 (fpround Float32Regs:$a)),
+ (CVT_bf16_f32 Float32Regs:$a, CvtRN)>;
+
// fpround f64 -> f16
def : Pat<(f16 (fpround Float64Regs:$a)),
(CVT_f16_f64 Float64Regs:$a, CvtRN)>;
+// fpround f64 -> bf16
+def : Pat<(bf16 (fpround Float64Regs:$a)),
+ (CVT_bf16_f64 Float64Regs:$a, CvtRN)>;
// fpround f64 -> f32
def : Pat<(f32 (fpround Float64Regs:$a)),
(CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
@@ -3185,14 +3385,23 @@ def : Pat<(f32 (fpround Float64Regs:$a)),
(CVT_f32_f64 Float64Regs:$a, CvtRN)>;
// fpextend f16 -> f32
-def : Pat<(f32 (fpextend (f16 Float16Regs:$a))),
- (CVT_f32_f16 Float16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f32 (fpextend (f16 Float16Regs:$a))),
- (CVT_f32_f16 Float16Regs:$a, CvtNONE)>;
+def : Pat<(f32 (fpextend (f16 Int16Regs:$a))),
+ (CVT_f32_f16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f32 (fpextend (f16 Int16Regs:$a))),
+ (CVT_f32_f16 Int16Regs:$a, CvtNONE)>;
+// fpextend bf16 -> f32
+def : Pat<(f32 (fpextend (bf16 Int16Regs:$a))),
+ (CVT_f32_bf16 Int16Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f32 (fpextend (bf16 Int16Regs:$a))),
+ (CVT_f32_bf16 Int16Regs:$a, CvtNONE)>;
// fpextend f16 -> f64
-def : Pat<(f64 (fpextend (f16 Float16Regs:$a))),
- (CVT_f64_f16 Float16Regs:$a, CvtNONE)>;
+def : Pat<(f64 (fpextend (f16 Int16Regs:$a))),
+ (CVT_f64_f16 Int16Regs:$a, CvtNONE)>;
+
+// fpextend bf16 -> f64
+def : Pat<(f64 (fpextend (bf16 Int16Regs:$a))),
+ (CVT_f64_bf16 Int16Regs:$a, CvtNONE)>;
// fpextend f32 -> f64
def : Pat<(f64 (fpextend Float32Regs:$a)),
@@ -3200,14 +3409,16 @@ def : Pat<(f64 (fpextend Float32Regs:$a)),
def : Pat<(f64 (fpextend Float32Regs:$a)),
(CVT_f64_f32 Float32Regs:$a, CvtNONE)>;
-def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
+def retglue : SDNode<"NVPTXISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
// fceil, ffloor, froundeven, ftrunc.
multiclass CVT_ROUND<SDNode OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
- def : Pat<(OpNode (f16 Float16Regs:$a)),
- (CVT_f16_f16 Float16Regs:$a, Mode)>;
+ def : Pat<(OpNode (f16 Int16Regs:$a)),
+ (CVT_f16_f16 Int16Regs:$a, Mode)>;
+ def : Pat<(OpNode (bf16 Int16Regs:$a)),
+ (CVT_bf16_bf16 Int16Regs:$a, Mode)>;
def : Pat<(OpNode Float32Regs:$a),
(CVT_f32_f32 Float32Regs:$a, ModeFTZ)>, Requires<[doF32FTZ]>;
def : Pat<(OpNode Float32Regs:$a),
@@ -3234,7 +3445,7 @@ defm : CVT_ROUND<frint, CvtRNI, CvtRNI_FTZ>;
let isTerminator=1 in {
let isReturn=1, isBarrier=1 in
- def Return : NVPTXInst<(outs), (ins), "ret;", [(retflag)]>;
+ def Return : NVPTXInst<(outs), (ins), "ret;", [(retglue)]>;
let isBranch=1 in
def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 1192cc078408..f0de0144d410 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -104,34 +104,46 @@ def INT_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync \t$i;",
def INT_BAR_WARP_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "bar.warp.sync \t$i;",
[(int_nvvm_bar_warp_sync imm:$i)]>,
- Requires<[hasPTX60, hasSM30]>;
+ Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BAR_WARP_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "bar.warp.sync \t$i;",
[(int_nvvm_bar_warp_sync Int32Regs:$i)]>,
- Requires<[hasPTX60, hasSM30]>;
+ Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BARRIER_SYNC_I : NVPTXInst<(outs), (ins i32imm:$i), "barrier.sync \t$i;",
[(int_nvvm_barrier_sync imm:$i)]>,
- Requires<[hasPTX60, hasSM30]>;
+ Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BARRIER_SYNC_R : NVPTXInst<(outs), (ins Int32Regs:$i), "barrier.sync \t$i;",
[(int_nvvm_barrier_sync Int32Regs:$i)]>,
- Requires<[hasPTX60, hasSM30]>;
+ Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BARRIER_SYNC_CNT_RR : NVPTXInst<(outs), (ins Int32Regs:$id, Int32Regs:$cnt),
"barrier.sync \t$id, $cnt;",
[(int_nvvm_barrier_sync_cnt Int32Regs:$id, Int32Regs:$cnt)]>,
- Requires<[hasPTX60, hasSM30]>;
+ Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BARRIER_SYNC_CNT_RI : NVPTXInst<(outs), (ins Int32Regs:$id, i32imm:$cnt),
"barrier.sync \t$id, $cnt;",
[(int_nvvm_barrier_sync_cnt Int32Regs:$id, imm:$cnt)]>,
- Requires<[hasPTX60, hasSM30]>;
+ Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BARRIER_SYNC_CNT_IR : NVPTXInst<(outs), (ins i32imm:$id, Int32Regs:$cnt),
"barrier.sync \t$id, $cnt;",
[(int_nvvm_barrier_sync_cnt imm:$id, Int32Regs:$cnt)]>,
- Requires<[hasPTX60, hasSM30]>;
+ Requires<[hasPTX<60>, hasSM<30>]>;
def INT_BARRIER_SYNC_CNT_II : NVPTXInst<(outs), (ins i32imm:$id, i32imm:$cnt),
"barrier.sync \t$id, $cnt;",
[(int_nvvm_barrier_sync_cnt imm:$id, imm:$cnt)]>,
- Requires<[hasPTX60, hasSM30]>;
+ Requires<[hasPTX<60>, hasSM<30>]>;
+class INT_BARRIER_CLUSTER<string variant, Intrinsic Intr,
+ list<Predicate> Preds = [hasPTX<78>, hasSM<90>]>:
+ NVPTXInst<(outs), (ins), "barrier.cluster."# variant #";", [(Intr)]>,
+ Requires<Preds>;
+
+def barrier_cluster_arrive:
+ INT_BARRIER_CLUSTER<"arrive", int_nvvm_barrier_cluster_arrive>;
+def barrier_cluster_arrive_relaxed:
+ INT_BARRIER_CLUSTER<"arrive.relaxed",
+ int_nvvm_barrier_cluster_arrive_relaxed, [hasPTX<80>, hasSM<90>]>;
+def barrier_cluster_wait:
+ INT_BARRIER_CLUSTER<"wait", int_nvvm_barrier_cluster_wait>;
class SHFL_INSTR<bit sync, string mode, string reg, bit return_pred,
bit offset_imm, bit mask_imm, bit threadmask_imm>
@@ -182,7 +194,7 @@ foreach sync = [false, true] in {
foreach threadmask_imm = THREADMASK_INFO<sync>.ret in {
def : SHFL_INSTR<sync, mode, regclass, return_pred,
offset_imm, mask_imm, threadmask_imm>,
- Requires<!if(sync, [hasSM30, hasPTX60], [hasSM30, hasSHFL])>;
+ Requires<!if(sync, [hasSM<30>, hasPTX<60>], [hasSM<30>, hasSHFL])>;
}
}
}
@@ -196,7 +208,7 @@ multiclass VOTE<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
def : NVPTXInst<(outs regclass:$dest), (ins Int1Regs:$pred),
"vote." # mode # " \t$dest, $pred;",
[(set regclass:$dest, (IntOp Int1Regs:$pred))]>,
- Requires<[hasPTX60, hasSM30]>;
+ Requires<[hasPTX<60>, hasSM<30>]>;
}
defm VOTE_ALL : VOTE<Int1Regs, "all.pred", int_nvvm_vote_all>;
@@ -209,11 +221,11 @@ multiclass VOTE_SYNC<NVPTXRegClass regclass, string mode, Intrinsic IntOp> {
def i : NVPTXInst<(outs regclass:$dest), (ins i32imm:$mask, Int1Regs:$pred),
"vote.sync." # mode # " \t$dest, $pred, $mask;",
[(set regclass:$dest, (IntOp imm:$mask, Int1Regs:$pred))]>,
- Requires<[hasPTX60, hasSM30]>;
+ Requires<[hasPTX<60>, hasSM<30>]>;
def r : NVPTXInst<(outs regclass:$dest), (ins Int32Regs:$mask, Int1Regs:$pred),
"vote.sync." # mode #" \t$dest, $pred, $mask;",
[(set regclass:$dest, (IntOp Int32Regs:$mask, Int1Regs:$pred))]>,
- Requires<[hasPTX60, hasSM30]>;
+ Requires<[hasPTX<60>, hasSM<30>]>;
}
defm VOTE_SYNC_ALL : VOTE_SYNC<Int1Regs, "all.pred", int_nvvm_vote_all_sync>;
@@ -226,19 +238,19 @@ multiclass MATCH_ANY_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic IntO
def ii : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, ImmOp:$value),
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
[(set Int32Regs:$dest, (IntOp imm:$mask, imm:$value))]>,
- Requires<[hasPTX60, hasSM70]>;
+ Requires<[hasPTX<60>, hasSM<70>]>;
def ir : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, ImmOp:$value),
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
[(set Int32Regs:$dest, (IntOp Int32Regs:$mask, imm:$value))]>,
- Requires<[hasPTX60, hasSM70]>;
+ Requires<[hasPTX<60>, hasSM<70>]>;
def ri : NVPTXInst<(outs Int32Regs:$dest), (ins i32imm:$mask, regclass:$value),
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
[(set Int32Regs:$dest, (IntOp imm:$mask, regclass:$value))]>,
- Requires<[hasPTX60, hasSM70]>;
+ Requires<[hasPTX<60>, hasSM<70>]>;
def rr : NVPTXInst<(outs Int32Regs:$dest), (ins Int32Regs:$mask, regclass:$value),
"match.any.sync." # ptxtype # " \t$dest, $value, $mask;",
[(set Int32Regs:$dest, (IntOp Int32Regs:$mask, regclass:$value))]>,
- Requires<[hasPTX60, hasSM70]>;
+ Requires<[hasPTX<60>, hasSM<70>]>;
}
defm MATCH_ANY_SYNC_32 : MATCH_ANY_SYNC<Int32Regs, "b32", int_nvvm_match_any_sync_i32,
@@ -252,22 +264,22 @@ multiclass MATCH_ALLP_SYNC<NVPTXRegClass regclass, string ptxtype, Intrinsic Int
(ins i32imm:$mask, ImmOp:$value),
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
[(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, imm:$value))]>,
- Requires<[hasPTX60, hasSM70]>;
+ Requires<[hasPTX<60>, hasSM<70>]>;
def ir : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
(ins Int32Regs:$mask, ImmOp:$value),
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
[(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, imm:$value))]>,
- Requires<[hasPTX60, hasSM70]>;
+ Requires<[hasPTX<60>, hasSM<70>]>;
def ri : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
(ins i32imm:$mask, regclass:$value),
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
[(set Int32Regs:$dest, Int1Regs:$pred, (IntOp imm:$mask, regclass:$value))]>,
- Requires<[hasPTX60, hasSM70]>;
+ Requires<[hasPTX<60>, hasSM<70>]>;
def rr : NVPTXInst<(outs Int32Regs:$dest, Int1Regs:$pred),
(ins Int32Regs:$mask, regclass:$value),
"match.all.sync." # ptxtype # " \t$dest|$pred, $value, $mask;",
[(set Int32Regs:$dest, Int1Regs:$pred, (IntOp Int32Regs:$mask, regclass:$value))]>,
- Requires<[hasPTX60, hasSM70]>;
+ Requires<[hasPTX<60>, hasSM<70>]>;
}
defm MATCH_ALLP_SYNC_32 : MATCH_ALLP_SYNC<Int32Regs, "b32", int_nvvm_match_all_sync_i32p,
i32imm>;
@@ -278,7 +290,7 @@ multiclass REDUX_SYNC<string BinOp, string PTXType, Intrinsic Intrin> {
def : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$mask),
"redux.sync." # BinOp # "." # PTXType # " $dst, $src, $mask;",
[(set Int32Regs:$dst, (Intrin Int32Regs:$src, Int32Regs:$mask))]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
}
defm REDUX_SYNC_UMIN : REDUX_SYNC<"min", "u32", int_nvvm_redux_sync_umin>;
@@ -303,6 +315,9 @@ def INT_MEMBAR_CTA : MEMBAR<"membar.cta;", int_nvvm_membar_cta>;
def INT_MEMBAR_GL : MEMBAR<"membar.gl;", int_nvvm_membar_gl>;
def INT_MEMBAR_SYS : MEMBAR<"membar.sys;", int_nvvm_membar_sys>;
+def INT_FENCE_SC_CLUSTER:
+ MEMBAR<"fence.sc.cluster;", int_nvvm_fence_sc_cluster>,
+ Requires<[hasPTX<78>, hasSM<90>]>;
//-----------------------------------
// Async Copy Functions
@@ -312,11 +327,11 @@ multiclass CP_ASYNC_MBARRIER_ARRIVE<string NoInc, string AddrSpace, Intrinsic In
def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
!strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
[(Intrin Int32Regs:$addr)]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
!strconcat("cp.async.mbarrier.arrive", NoInc, AddrSpace, ".b64 [$addr];"),
[(Intrin Int64Regs:$addr)]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
}
defm CP_ASYNC_MBARRIER_ARRIVE :
@@ -328,53 +343,63 @@ defm CP_ASYNC_MBARRIER_ARRIVE_NOINC :
defm CP_ASYNC_MBARRIER_ARRIVE_NOINC_SHARED :
CP_ASYNC_MBARRIER_ARRIVE<".noinc", ".shared", int_nvvm_cp_async_mbarrier_arrive_noinc_shared>;
-multiclass CP_ASYNC_CA_SHARED_GLOBAL_I<string cpsize, Intrinsic Intrin> {
+multiclass CP_ASYNC_SHARED_GLOBAL_I<string cc, string cpsize, Intrinsic Intrin, Intrinsic IntrinS> {
def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
- !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
+ !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
[(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
- !strconcat("cp.async.ca.shared.global [$dst], [$src], ", cpsize, ";"),
+ !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ";"),
[(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
+ // Variant with src_size parameter
+ def _32s : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size),
+ !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
+ [(IntrinS Int32Regs:$dst, Int32Regs:$src, Int32Regs:$src_size)]>,
+ Requires<[hasPTX<70>, hasSM<80>]>;
+ def _32si: NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src, i32imm:$src_size),
+ !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
+ [(IntrinS Int32Regs:$dst, Int32Regs:$src, imm:$src_size)]>,
+ Requires<[hasPTX<70>, hasSM<80>]>;
+ def _64s : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size),
+ !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
+ [(IntrinS Int64Regs:$dst, Int64Regs:$src, Int32Regs:$src_size)]>,
+ Requires<[hasPTX<70>, hasSM<80>]>;
+ def _64si: NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src, i32imm:$src_size),
+ !strconcat("cp.async.", cc, ".shared.global [$dst], [$src], ", cpsize, ", $src_size;"),
+ [(IntrinS Int64Regs:$dst, Int64Regs:$src, imm:$src_size)]>,
+ Requires<[hasPTX<70>, hasSM<80>]>;
}
defm CP_ASYNC_CA_SHARED_GLOBAL_4 :
- CP_ASYNC_CA_SHARED_GLOBAL_I<"4", int_nvvm_cp_async_ca_shared_global_4>;
+ CP_ASYNC_SHARED_GLOBAL_I<"ca", "4", int_nvvm_cp_async_ca_shared_global_4,
+ int_nvvm_cp_async_ca_shared_global_4_s>;
defm CP_ASYNC_CA_SHARED_GLOBAL_8 :
- CP_ASYNC_CA_SHARED_GLOBAL_I<"8", int_nvvm_cp_async_ca_shared_global_8>;
+ CP_ASYNC_SHARED_GLOBAL_I<"ca", "8", int_nvvm_cp_async_ca_shared_global_8,
+ int_nvvm_cp_async_ca_shared_global_8_s>;
defm CP_ASYNC_CA_SHARED_GLOBAL_16 :
- CP_ASYNC_CA_SHARED_GLOBAL_I<"16", int_nvvm_cp_async_ca_shared_global_16>;
-
-multiclass CP_ASYNC_CG_SHARED_GLOBAL<string cpsize, Intrinsic Intrin> {
- def _32 : NVPTXInst<(outs), (ins Int32Regs:$dst, Int32Regs:$src),
- !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
- [(Intrin Int32Regs:$dst, Int32Regs:$src)]>,
- Requires<[hasPTX70, hasSM80]>;
- def _64 : NVPTXInst<(outs), (ins Int64Regs:$dst, Int64Regs:$src),
- !strconcat("cp.async.cg.shared.global [$dst], [$src], ", cpsize, ";"),
- [(Intrin Int64Regs:$dst, Int64Regs:$src)]>,
- Requires<[hasPTX70, hasSM80]>;
-}
+ CP_ASYNC_SHARED_GLOBAL_I<"ca", "16", int_nvvm_cp_async_ca_shared_global_16,
+ int_nvvm_cp_async_ca_shared_global_16_s>;
defm CP_ASYNC_CG_SHARED_GLOBAL_16 :
- CP_ASYNC_CG_SHARED_GLOBAL<"16", int_nvvm_cp_async_cg_shared_global_16>;
+ CP_ASYNC_SHARED_GLOBAL_I<"cg", "16", int_nvvm_cp_async_cg_shared_global_16,
+ int_nvvm_cp_async_cg_shared_global_16_s>;
def CP_ASYNC_COMMIT_GROUP :
NVPTXInst<(outs), (ins), "cp.async.commit_group;", [(int_nvvm_cp_async_commit_group)]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
def CP_ASYNC_WAIT_GROUP :
NVPTXInst<(outs), (ins i32imm:$n), "cp.async.wait_group $n;",
[(int_nvvm_cp_async_wait_group (i32 timm:$n))]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
def CP_ASYNC_WAIT_ALL :
NVPTXInst<(outs), (ins), "cp.async.wait_all;",
[(int_nvvm_cp_async_wait_all)]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
//-----------------------------------
// MBarrier Functions
@@ -384,11 +409,11 @@ multiclass MBARRIER_INIT<string AddrSpace, Intrinsic Intrin> {
def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr, Int32Regs:$count),
!strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
[(Intrin Int32Regs:$addr, Int32Regs:$count)]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr, Int32Regs:$count),
!strconcat("mbarrier.init", AddrSpace, ".b64 [$addr], $count;"),
[(Intrin Int64Regs:$addr, Int32Regs:$count)]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
}
defm MBARRIER_INIT : MBARRIER_INIT<"", int_nvvm_mbarrier_init>;
@@ -399,11 +424,11 @@ multiclass MBARRIER_INVAL<string AddrSpace, Intrinsic Intrin> {
def _32 : NVPTXInst<(outs), (ins Int32Regs:$addr),
!strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
[(Intrin Int32Regs:$addr)]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs), (ins Int64Regs:$addr),
!strconcat("mbarrier.inval", AddrSpace, ".b64 [$addr];"),
[(Intrin Int64Regs:$addr)]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
}
defm MBARRIER_INVAL : MBARRIER_INVAL<"", int_nvvm_mbarrier_inval>;
@@ -414,11 +439,11 @@ multiclass MBARRIER_ARRIVE<string AddrSpace, Intrinsic Intrin> {
def _32 : NVPTXInst<(outs Int64Regs:$state), (ins Int32Regs:$addr),
!strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
[(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
!strconcat("mbarrier.arrive", AddrSpace, ".b64 $state, [$addr];"),
[(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
}
defm MBARRIER_ARRIVE : MBARRIER_ARRIVE<"", int_nvvm_mbarrier_arrive>;
@@ -431,13 +456,13 @@ multiclass MBARRIER_ARRIVE_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
!strconcat("mbarrier.arrive.noComplete", AddrSpace,
".b64 $state, [$addr], $count;"),
[(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs Int64Regs:$state),
(ins Int64Regs:$addr, Int32Regs:$count),
!strconcat("mbarrier.arrive.noComplete", AddrSpace,
".b64 $state, [$addr], $count;"),
[(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
}
defm MBARRIER_ARRIVE_NOCOMPLETE :
@@ -450,12 +475,12 @@ multiclass MBARRIER_ARRIVE_DROP<string AddrSpace, Intrinsic Intrin> {
!strconcat("mbarrier.arrive_drop", AddrSpace,
".b64 $state, [$addr];"),
[(set Int64Regs:$state, (Intrin Int32Regs:$addr))]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs Int64Regs:$state), (ins Int64Regs:$addr),
!strconcat("mbarrier.arrive_drop", AddrSpace,
".b64 $state, [$addr];"),
[(set Int64Regs:$state, (Intrin Int64Regs:$addr))]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
}
defm MBARRIER_ARRIVE_DROP :
@@ -469,13 +494,13 @@ multiclass MBARRIER_ARRIVE_DROP_NOCOMPLETE<string AddrSpace, Intrinsic Intrin> {
!strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
".b64 $state, [$addr], $count;"),
[(set Int64Regs:$state, (Intrin Int32Regs:$addr, Int32Regs:$count))]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs Int64Regs:$state),
(ins Int64Regs:$addr, Int32Regs:$count),
!strconcat("mbarrier.arrive_drop.noComplete", AddrSpace,
".b64 $state, [$addr], $count;"),
[(set Int64Regs:$state, (Intrin Int64Regs:$addr, Int32Regs:$count))]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
}
defm MBARRIER_ARRIVE_DROP_NOCOMPLETE :
@@ -488,11 +513,11 @@ multiclass MBARRIER_TEST_WAIT<string AddrSpace, Intrinsic Intrin> {
def _32 : NVPTXInst<(outs Int1Regs:$res), (ins Int32Regs:$addr, Int64Regs:$state),
!strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
[(set Int1Regs:$res, (Intrin Int32Regs:$addr, Int64Regs:$state))]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
def _64 : NVPTXInst<(outs Int1Regs:$res), (ins Int64Regs:$addr, Int64Regs:$state),
!strconcat("mbarrier.test_wait", AddrSpace, ".b64 $res, [$addr], $state;"),
[(set Int1Regs:$res, (Intrin Int64Regs:$addr, Int64Regs:$state))]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
}
defm MBARRIER_TEST_WAIT :
@@ -504,7 +529,7 @@ class MBARRIER_PENDING_COUNT<Intrinsic Intrin> :
NVPTXInst<(outs Int32Regs:$res), (ins Int64Regs:$state),
"mbarrier.pending_count.b64 $res, $state;",
[(set Int32Regs:$res, (Intrin Int64Regs:$state))]>,
- Requires<[hasPTX70, hasSM80]>;
+ Requires<[hasPTX<70>, hasSM<80>]>;
def MBARRIER_PENDING_COUNT :
MBARRIER_PENDING_COUNT<int_nvvm_mbarrier_pending_count>;
@@ -593,26 +618,26 @@ def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_f>;
def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_f,
- [hasPTX70, hasSM80]>;
+ [hasPTX<70>, hasSM<80>]>;
def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_f,
- [hasPTX70, hasSM80]>;
+ [hasPTX<70>, hasSM<80>]>;
def INT_NVVM_FMIN_XORSIGN_ABS_F :
F_MATH_2<"min.xorsign.abs.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_xorsign_abs_f,
- [hasPTX72, hasSM86]>;
+ [hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMIN_FTZ_XORSIGN_ABS_F :
F_MATH_2<"min.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_xorsign_abs_f,
- [hasPTX72, hasSM86]>;
+ [hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMIN_NAN_XORSIGN_ABS_F :
F_MATH_2<"min.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_nan_xorsign_abs_f,
- [hasPTX72, hasSM86]>;
+ [hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
F_MATH_2<"min.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
- [hasPTX72, hasSM86]>;
+ [hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMAX_F : F_MATH_2<"max.f32 \t$dst, $src0, $src1;", Float32Regs,
Float32Regs, Float32Regs, int_nvvm_fmax_f>;
@@ -620,26 +645,26 @@ def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_f>;
def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_f,
- [hasPTX70, hasSM80]>;
+ [hasPTX<70>, hasSM<80>]>;
def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_f,
- [hasPTX70, hasSM80]>;
+ [hasPTX<70>, hasSM<80>]>;
def INT_NVVM_FMAX_XORSIGN_ABS_F :
F_MATH_2<"max.xorsign.abs.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_xorsign_abs_f,
- [hasPTX72, hasSM86]>;
+ [hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMAX_FTZ_XORSIGN_ABS_F :
F_MATH_2<"max.ftz.xorsign.abs.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_xorsign_abs_f,
- [hasPTX72, hasSM86]>;
+ [hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMAX_NAN_XORSIGN_ABS_F :
F_MATH_2<"max.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_nan_xorsign_abs_f,
- [hasPTX72, hasSM86]>;
+ [hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
F_MATH_2<"max.ftz.NaN.xorsign.abs.f32 \t$dst, $src0, $src1;",
Float32Regs, Float32Regs, Float32Regs, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
- [hasPTX72, hasSM86]>;
+ [hasPTX<72>, hasSM<86>]>;
def INT_NVVM_FMIN_D : F_MATH_2<"min.f64 \t$dst, $src0, $src1;", Float64Regs,
Float64Regs, Float64Regs, int_nvvm_fmin_d>;
@@ -651,7 +676,7 @@ def INT_NVVM_FMAX_D : F_MATH_2<"max.f64 \t$dst, $src0, $src1;", Float64Regs,
//
class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
- list<Predicate> Preds = [hasPTX70, hasSM80]> {
+ list<Predicate> Preds = [hasPTX<70>, hasSM<80>]> {
string Variant = V;
Intrinsic Intr = I;
NVPTXRegClass RegClass = RC;
@@ -661,67 +686,67 @@ class MIN_MAX_TUPLE<string V, Intrinsic I, NVPTXRegClass RC,
multiclass MIN_MAX<string IntName> {
foreach P = [
MIN_MAX_TUPLE<"_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_f16,
- int_nvvm_fmax_f16), Float16Regs>,
+ int_nvvm_fmax_f16), Int16Regs>,
MIN_MAX_TUPLE<"_ftz_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_ftz_f16,
- int_nvvm_fmax_ftz_f16), Float16Regs>,
+ int_nvvm_fmax_ftz_f16), Int16Regs>,
MIN_MAX_TUPLE<"_NaN_f16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_f16,
- int_nvvm_fmax_nan_f16), Float16Regs>,
+ int_nvvm_fmax_nan_f16), Int16Regs>,
MIN_MAX_TUPLE<"_ftz_NaN_f16", !if(!eq(IntName, "min"),
- int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Float16Regs>,
+ int_nvvm_fmin_ftz_nan_f16, int_nvvm_fmax_ftz_nan_f16), Int16Regs>,
MIN_MAX_TUPLE<"_xorsign_abs_f16", !if(!eq(IntName, "min"),
int_nvvm_fmin_xorsign_abs_f16, int_nvvm_fmax_xorsign_abs_f16),
- Float16Regs, [hasPTX72, hasSM86]>,
+ Int16Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16", !if(!eq(IntName, "min"),
int_nvvm_fmin_ftz_xorsign_abs_f16, int_nvvm_fmax_ftz_xorsign_abs_f16),
- Float16Regs, [hasPTX72, hasSM86]>,
+ Int16Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
int_nvvm_fmin_nan_xorsign_abs_f16, int_nvvm_fmax_nan_xorsign_abs_f16),
- Float16Regs, [hasPTX72, hasSM86]>,
+ Int16Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16", !if(!eq(IntName, "min"),
int_nvvm_fmin_ftz_nan_xorsign_abs_f16,
- int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Float16Regs, [hasPTX72, hasSM86]>,
+ int_nvvm_fmax_ftz_nan_xorsign_abs_f16), Int16Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_f16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_f16x2,
- int_nvvm_fmax_f16x2), Float16x2Regs>,
+ int_nvvm_fmax_f16x2), Int32Regs>,
MIN_MAX_TUPLE<"_ftz_f16x2", !if(!eq(IntName, "min"),
- int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Float16x2Regs>,
+ int_nvvm_fmin_ftz_f16x2, int_nvvm_fmax_ftz_f16x2), Int32Regs>,
MIN_MAX_TUPLE<"_NaN_f16x2", !if(!eq(IntName, "min"),
- int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Float16x2Regs>,
+ int_nvvm_fmin_nan_f16x2, int_nvvm_fmax_nan_f16x2), Int32Regs>,
MIN_MAX_TUPLE<"_ftz_NaN_f16x2", !if(!eq(IntName, "min"),
- int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Float16x2Regs>,
+ int_nvvm_fmin_ftz_nan_f16x2, int_nvvm_fmax_ftz_nan_f16x2), Int32Regs>,
MIN_MAX_TUPLE<"_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_xorsign_abs_f16x2, int_nvvm_fmax_xorsign_abs_f16x2),
- Float16x2Regs, [hasPTX72, hasSM86]>,
+ Int32Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_ftz_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_ftz_xorsign_abs_f16x2, int_nvvm_fmax_ftz_xorsign_abs_f16x2),
- Float16x2Regs, [hasPTX72, hasSM86]>,
+ Int32Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_nan_xorsign_abs_f16x2, int_nvvm_fmax_nan_xorsign_abs_f16x2),
- Float16x2Regs, [hasPTX72, hasSM86]>,
+ Int32Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_ftz_NaN_xorsign_abs_f16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
int_nvvm_fmax_ftz_nan_xorsign_abs_f16x2),
- Float16x2Regs, [hasPTX72, hasSM86]>,
+ Int32Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_bf16", !if(!eq(IntName, "min"),
int_nvvm_fmin_bf16, int_nvvm_fmax_bf16), Int16Regs>,
MIN_MAX_TUPLE<"_NaN_bf16", !if(!eq(IntName, "min"), int_nvvm_fmin_nan_bf16,
int_nvvm_fmax_nan_bf16), Int16Regs>,
MIN_MAX_TUPLE<"_xorsign_abs_bf16", !if(!eq(IntName, "min"),
int_nvvm_fmin_xorsign_abs_bf16, int_nvvm_fmax_xorsign_abs_bf16),
- Int16Regs, [hasPTX72, hasSM86]>,
+ Int16Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16", !if(!eq(IntName, "min"),
int_nvvm_fmin_nan_xorsign_abs_bf16, int_nvvm_fmax_nan_xorsign_abs_bf16),
- Int16Regs, [hasPTX72, hasSM86]>,
+ Int16Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_bf16x2", !if(!eq(IntName, "min"), int_nvvm_fmin_bf16x2,
int_nvvm_fmax_bf16x2), Int32Regs>,
MIN_MAX_TUPLE<"_NaN_bf16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_nan_bf16x2, int_nvvm_fmax_nan_bf16x2), Int32Regs>,
MIN_MAX_TUPLE<"_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_xorsign_abs_bf16x2, int_nvvm_fmax_xorsign_abs_bf16x2),
- Int32Regs, [hasPTX72, hasSM86]>,
+ Int32Regs, [hasPTX<72>, hasSM<86>]>,
MIN_MAX_TUPLE<"_NaN_xorsign_abs_bf16x2", !if(!eq(IntName, "min"),
int_nvvm_fmin_nan_xorsign_abs_bf16x2,
int_nvvm_fmax_nan_xorsign_abs_bf16x2),
- Int32Regs, [hasPTX72, hasSM86]>] in {
+ Int32Regs, [hasPTX<72>, hasSM<86>]>] in {
def P.Variant : F_MATH_2<!strconcat(
IntName, !subst("_", ".", P.Variant), " \t$dst, $src0, $src1;"),
P.RegClass, P.RegClass, P.RegClass, P.Intr, P.Predicates>;
@@ -856,13 +881,13 @@ def INT_NVVM_FABS_D : F_MATH_1<"abs.f64 \t$dst, $src0;", Float64Regs,
//
def INT_NVVM_ABS_BF16 : F_MATH_1<"abs.bf16 \t$dst, $src0;", Int16Regs,
- Int16Regs, int_nvvm_abs_bf16, [hasPTX70, hasSM80]>;
+ Int16Regs, int_nvvm_abs_bf16, [hasPTX<70>, hasSM<80>]>;
def INT_NVVM_ABS_BF16X2 : F_MATH_1<"abs.bf16x2 \t$dst, $src0;", Int32Regs,
- Int32Regs, int_nvvm_abs_bf16x2, [hasPTX70, hasSM80]>;
+ Int32Regs, int_nvvm_abs_bf16x2, [hasPTX<70>, hasSM<80>]>;
def INT_NVVM_NEG_BF16 : F_MATH_1<"neg.bf16 \t$dst, $src0;", Int16Regs,
- Int16Regs, int_nvvm_neg_bf16, [hasPTX70, hasSM80]>;
+ Int16Regs, int_nvvm_neg_bf16, [hasPTX<70>, hasSM<80>]>;
def INT_NVVM_NEG_BF16X2 : F_MATH_1<"neg.bf16x2 \t$dst, $src0;", Int32Regs,
- Int32Regs, int_nvvm_neg_bf16x2, [hasPTX70, hasSM80]>;
+ Int32Regs, int_nvvm_neg_bf16x2, [hasPTX<70>, hasSM<80>]>;
//
// Round
@@ -908,9 +933,9 @@ def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32 \t$dst, $src0;",
def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64 \t$dst, $src0;",
Float64Regs, Float64Regs, int_nvvm_ex2_approx_d>;
def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16 \t$dst, $src0;",
- Float16Regs, Float16Regs, int_nvvm_ex2_approx_f16, [hasPTX70, hasSM75]>;
+ Int16Regs, Int16Regs, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2 \t$dst, $src0;",
- Float16x2Regs, Float16x2Regs, int_nvvm_ex2_approx_f16x2, [hasPTX70, hasSM75]>;
+ Int32Regs, Int32Regs, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32 \t$dst, $src0;",
Float32Regs, Float32Regs, int_nvvm_lg2_approx_ftz_f>;
@@ -961,39 +986,46 @@ multiclass FMA_INST {
FMA_TUPLE<"_rp_f32", int_nvvm_fma_rp_f, Float32Regs>,
FMA_TUPLE<"_rp_ftz_f32", int_nvvm_fma_rp_ftz_f, Float32Regs>,
- FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Float16Regs, [hasPTX42, hasSM53]>,
- FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Float16Regs,
- [hasPTX42, hasSM53]>,
- FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Float16Regs,
- [hasPTX42, hasSM53]>,
- FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Float16Regs,
- [hasPTX42, hasSM53]>,
- FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Float16Regs,
- [hasPTX70, hasSM80]>,
- FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Float16Regs,
- [hasPTX70, hasSM80]>,
-
- FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Float16x2Regs,
- [hasPTX42, hasSM53]>,
- FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Float16x2Regs,
- [hasPTX42, hasSM53]>,
- FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Float16x2Regs,
- [hasPTX42, hasSM53]>,
+ FMA_TUPLE<"_rn_f16", int_nvvm_fma_rn_f16, Int16Regs, [hasPTX<42>, hasSM<53>]>,
+ FMA_TUPLE<"_rn_ftz_f16", int_nvvm_fma_rn_ftz_f16, Int16Regs,
+ [hasPTX<42>, hasSM<53>]>,
+ FMA_TUPLE<"_rn_sat_f16", int_nvvm_fma_rn_sat_f16, Int16Regs,
+ [hasPTX<42>, hasSM<53>]>,
+ FMA_TUPLE<"_rn_ftz_sat_f16", int_nvvm_fma_rn_ftz_sat_f16, Int16Regs,
+ [hasPTX<42>, hasSM<53>]>,
+ FMA_TUPLE<"_rn_relu_f16", int_nvvm_fma_rn_relu_f16, Int16Regs,
+ [hasPTX<70>, hasSM<80>]>,
+ FMA_TUPLE<"_rn_ftz_relu_f16", int_nvvm_fma_rn_ftz_relu_f16, Int16Regs,
+ [hasPTX<70>, hasSM<80>]>,
+
+ FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX<70>, hasSM<80>]>,
+ FMA_TUPLE<"_rn_ftz_bf16", int_nvvm_fma_rn_ftz_bf16, Int16Regs,
+ [hasPTX<70>, hasSM<80>]>,
+ FMA_TUPLE<"_rn_sat_bf16", int_nvvm_fma_rn_sat_bf16, Int16Regs,
+ [hasPTX<70>, hasSM<80>]>,
+ FMA_TUPLE<"_rn_ftz_sat_bf16", int_nvvm_fma_rn_ftz_sat_bf16, Int16Regs,
+ [hasPTX<70>, hasSM<80>]>,
+ FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
+ [hasPTX<70>, hasSM<80>]>,
+ FMA_TUPLE<"_rn_ftz_relu_bf16", int_nvvm_fma_rn_ftz_relu_bf16, Int16Regs,
+ [hasPTX<70>, hasSM<80>]>,
+
+ FMA_TUPLE<"_rn_f16x2", int_nvvm_fma_rn_f16x2, Int32Regs,
+ [hasPTX<42>, hasSM<53>]>,
+ FMA_TUPLE<"_rn_ftz_f16x2", int_nvvm_fma_rn_ftz_f16x2, Int32Regs,
+ [hasPTX<42>, hasSM<53>]>,
+ FMA_TUPLE<"_rn_sat_f16x2", int_nvvm_fma_rn_sat_f16x2, Int32Regs,
+ [hasPTX<42>, hasSM<53>]>,
FMA_TUPLE<"_rn_ftz_sat_f16x2", int_nvvm_fma_rn_ftz_sat_f16x2,
- Float16x2Regs, [hasPTX42, hasSM53]>,
- FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Float16x2Regs,
- [hasPTX70, hasSM80]>,
+ Int32Regs, [hasPTX<42>, hasSM<53>]>,
+ FMA_TUPLE<"_rn_relu_f16x2", int_nvvm_fma_rn_relu_f16x2, Int32Regs,
+ [hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_ftz_relu_f16x2", int_nvvm_fma_rn_ftz_relu_f16x2,
- Float16x2Regs, [hasPTX70, hasSM80]>,
-
- FMA_TUPLE<"_rn_bf16", int_nvvm_fma_rn_bf16, Int16Regs, [hasPTX70, hasSM80]>,
- FMA_TUPLE<"_rn_relu_bf16", int_nvvm_fma_rn_relu_bf16, Int16Regs,
- [hasPTX70, hasSM80]>,
-
+ Int32Regs, [hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_bf16x2", int_nvvm_fma_rn_bf16x2, Int32Regs,
- [hasPTX70, hasSM80]>,
+ [hasPTX<70>, hasSM<80>]>,
FMA_TUPLE<"_rn_relu_bf16x2", int_nvvm_fma_rn_relu_bf16x2, Int32Regs,
- [hasPTX70, hasSM80]>
+ [hasPTX<70>, hasSM<80>]>
] in {
def P.Variant :
F_MATH_3<!strconcat("fma",
@@ -1372,9 +1404,9 @@ def : Pat<(int_nvvm_ull2d_rp Int64Regs:$a),
def : Pat<(int_nvvm_f2h_rn_ftz Float32Regs:$a),
- (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ))>;
+ (CVT_f16_f32 Float32Regs:$a, CvtRN_FTZ)>;
def : Pat<(int_nvvm_f2h_rn Float32Regs:$a),
- (BITCONVERT_16_F2I (CVT_f16_f32 Float32Regs:$a, CvtRN))>;
+ (CVT_f16_f32 Float32Regs:$a, CvtRN)>;
//
// Bitcast
@@ -1398,7 +1430,7 @@ class INT_FNS_MBO<dag ins, dag Operands>
: NVPTXInst<(outs Int32Regs:$dst), ins,
"fns.b32 \t$dst, $mask, $base, $offset;",
[(set Int32Regs:$dst, Operands )]>,
- Requires<[hasPTX60, hasSM30]>;
+ Requires<[hasPTX<60>, hasSM<30>]>;
def INT_FNS_rrr : INT_FNS_MBO<(ins Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset),
(int_nvvm_fns Int32Regs:$mask, Int32Regs:$base, Int32Regs:$offset)>;
@@ -1660,13 +1692,13 @@ defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max",
defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".s32", ".max", atomic_load_max_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
- ".max", atomic_load_max_64_g, i64imm, imm, [hasSM32]>;
+ ".max", atomic_load_max_64_g, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
- ".max", atomic_load_max_64_s, i64imm, imm, [hasSM32]>;
+ ".max", atomic_load_max_64_s, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max",
- atomic_load_max_64_gen, i64imm, imm, [hasSM32]>;
+ atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
- ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM32]>;
+ ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
".max", atomic_load_umax_32_g, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@@ -1676,13 +1708,13 @@ defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max",
defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".u32", ".max", atomic_load_umax_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
- ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM32]>;
+ ".max", atomic_load_umax_64_g, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
- ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM32]>;
+ ".max", atomic_load_umax_64_s, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max",
- atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>;
+ atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
- ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM32]>;
+ ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, [hasSM<32>]>;
// atom_min
@@ -1720,13 +1752,13 @@ defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min",
defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".s32", ".min", atomic_load_min_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64",
- ".min", atomic_load_min_64_g, i64imm, imm, [hasSM32]>;
+ ".min", atomic_load_min_64_g, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64",
- ".min", atomic_load_min_64_s, i64imm, imm, [hasSM32]>;
+ ".min", atomic_load_min_64_s, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min",
- atomic_load_min_64_gen, i64imm, imm, [hasSM32]>;
+ atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
- ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM32]>;
+ ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32",
".min", atomic_load_umin_32_g, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32",
@@ -1736,13 +1768,13 @@ defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min",
defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global",
".u32", ".min", atomic_load_umin_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64",
- ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM32]>;
+ ".min", atomic_load_umin_64_g, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64",
- ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM32]>;
+ ".min", atomic_load_umin_64_s, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min",
- atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>;
+ atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global",
- ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM32]>;
+ ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, [hasSM<32>]>;
// atom_inc atom_dec
@@ -1800,13 +1832,13 @@ defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and",
defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".and", atomic_load_and_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and",
- atomic_load_and_64_g, i64imm, imm, [hasSM32]>;
+ atomic_load_and_64_g, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and",
- atomic_load_and_64_s, i64imm, imm, [hasSM32]>;
+ atomic_load_and_64_s, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and",
- atomic_load_and_64_gen, i64imm, imm, [hasSM32]>;
+ atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
- ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM32]>;
+ ".and", atomic_load_and_64_gen, i64imm, imm, [hasSM<32>]>;
// atom_or
@@ -1832,13 +1864,13 @@ defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or",
atomic_load_or_32_s, i32imm, imm>;
defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or",
- atomic_load_or_64_g, i64imm, imm, [hasSM32]>;
+ atomic_load_or_64_g, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or",
- atomic_load_or_64_gen, i64imm, imm, [hasSM32]>;
+ atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
- ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM32]>;
+ ".or", atomic_load_or_64_gen, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or",
- atomic_load_or_64_s, i64imm, imm, [hasSM32]>;
+ atomic_load_or_64_s, i64imm, imm, [hasSM<32>]>;
// atom_xor
@@ -1864,13 +1896,13 @@ defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor",
defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32",
".xor", atomic_load_xor_32_gen, i32imm, imm>;
defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor",
- atomic_load_xor_64_g, i64imm, imm, [hasSM32]>;
+ atomic_load_xor_64_g, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor",
- atomic_load_xor_64_s, i64imm, imm, [hasSM32]>;
+ atomic_load_xor_64_s, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor",
- atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>;
+ atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64",
- ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM32]>;
+ ".xor", atomic_load_xor_64_gen, i64imm, imm, [hasSM<32>]>;
// atom_cas
@@ -2134,12 +2166,8 @@ defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>;
defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>;
defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
-defm INT_PTX_LDU_GLOBAL_f16 : LDU_G<"b16 \t$result, [$src];", Float16Regs>;
-defm INT_PTX_LDU_GLOBAL_f16x2 : LDU_G<"b32 \t$result, [$src];", Float16x2Regs>;
defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>;
defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>;
-defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>;
-defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>;
// vector
@@ -2186,10 +2214,6 @@ defm INT_PTX_LDU_G_v2i16_ELE
: VLDU_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
defm INT_PTX_LDU_G_v2i32_ELE
: VLDU_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
-defm INT_PTX_LDU_G_v2f16_ELE
- : VLDU_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
-defm INT_PTX_LDU_G_v2f16x2_ELE
- : VLDU_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
defm INT_PTX_LDU_G_v2f32_ELE
: VLDU_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
defm INT_PTX_LDU_G_v2i64_ELE
@@ -2206,10 +2230,10 @@ defm INT_PTX_LDU_G_v4i32_ELE
Int32Regs>;
defm INT_PTX_LDU_G_v4f16_ELE
: VLDU_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
- Float16Regs>;
+ Int16Regs>;
defm INT_PTX_LDU_G_v4f16x2_ELE
: VLDU_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
- Float16x2Regs>;
+ Int32Regs>;
defm INT_PTX_LDU_G_v4f32_ELE
: VLDU_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];",
Float32Regs>;
@@ -2249,18 +2273,10 @@ defm INT_PTX_LDG_GLOBAL_i32
: LDG_G<"u32 \t$result, [$src];", Int32Regs>;
defm INT_PTX_LDG_GLOBAL_i64
: LDG_G<"u64 \t$result, [$src];", Int64Regs>;
-defm INT_PTX_LDG_GLOBAL_f16
- : LDG_G<"b16 \t$result, [$src];", Float16Regs>;
-defm INT_PTX_LDG_GLOBAL_f16x2
- : LDG_G<"b32 \t$result, [$src];", Float16x2Regs>;
defm INT_PTX_LDG_GLOBAL_f32
: LDG_G<"f32 \t$result, [$src];", Float32Regs>;
defm INT_PTX_LDG_GLOBAL_f64
: LDG_G<"f64 \t$result, [$src];", Float64Regs>;
-defm INT_PTX_LDG_GLOBAL_p32
- : LDG_G<"u32 \t$result, [$src];", Int32Regs>;
-defm INT_PTX_LDG_GLOBAL_p64
- : LDG_G<"u64 \t$result, [$src];", Int64Regs>;
// vector
@@ -2308,10 +2324,6 @@ defm INT_PTX_LDG_G_v2i16_ELE
: VLDG_G_ELE_V2<"v2.u16 \t{{$dst1, $dst2}}, [$src];", Int16Regs>;
defm INT_PTX_LDG_G_v2i32_ELE
: VLDG_G_ELE_V2<"v2.u32 \t{{$dst1, $dst2}}, [$src];", Int32Regs>;
-defm INT_PTX_LDG_G_v2f16_ELE
- : VLDG_G_ELE_V2<"v2.b16 \t{{$dst1, $dst2}}, [$src];", Float16Regs>;
-defm INT_PTX_LDG_G_v2f16x2_ELE
- : VLDG_G_ELE_V2<"v2.b32 \t{{$dst1, $dst2}}, [$src];", Float16x2Regs>;
defm INT_PTX_LDG_G_v2f32_ELE
: VLDG_G_ELE_V2<"v2.f32 \t{{$dst1, $dst2}}, [$src];", Float32Regs>;
defm INT_PTX_LDG_G_v2i64_ELE
@@ -2324,10 +2336,6 @@ defm INT_PTX_LDG_G_v4i16_ELE
: VLDG_G_ELE_V4<"v4.u16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int16Regs>;
defm INT_PTX_LDG_G_v4i32_ELE
: VLDG_G_ELE_V4<"v4.u32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Int32Regs>;
-defm INT_PTX_LDG_G_v4f16_ELE
- : VLDG_G_ELE_V4<"v4.b16 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16Regs>;
-defm INT_PTX_LDG_G_v4f16x2_ELE
- : VLDG_G_ELE_V4<"v4.b32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float16x2Regs>;
defm INT_PTX_LDG_G_v4f32_ELE
: VLDG_G_ELE_V4<"v4.f32 \t{{$dst1, $dst2, $dst3, $dst4}}, [$src];", Float32Regs>;
@@ -2470,41 +2478,24 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a),
// isspacep
-def ISSPACEP_CONST_32
- : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
- "isspacep.const \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>,
- Requires<[hasPTX31]>;
-def ISSPACEP_CONST_64
- : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
- "isspacep.const \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>,
- Requires<[hasPTX31]>;
-def ISSPACEP_GLOBAL_32
- : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
- "isspacep.global \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>;
-def ISSPACEP_GLOBAL_64
- : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
- "isspacep.global \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>;
-def ISSPACEP_LOCAL_32
- : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
- "isspacep.local \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>;
-def ISSPACEP_LOCAL_64
- : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
- "isspacep.local \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>;
-def ISSPACEP_SHARED_32
- : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
- "isspacep.shared \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>;
-def ISSPACEP_SHARED_64
- : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
- "isspacep.shared \t$d, $a;",
- [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>;
-
+multiclass ISSPACEP<string suffix, Intrinsic Intr, list<Predicate> Preds = []> {
+ def _32: NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a),
+ "isspacep." # suffix # "\t$d, $a;",
+ [(set Int1Regs:$d, (Intr Int32Regs:$a))]>,
+ Requires<Preds>;
+ def _64: NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a),
+ "isspacep." # suffix # "\t$d, $a;",
+ [(set Int1Regs:$d, (Intr Int64Regs:$a))]>,
+ Requires<Preds>;
+}
+
+defm isspace_const : ISSPACEP<"const", int_nvvm_isspacep_const, [hasPTX<31>]>;
+defm isspace_global : ISSPACEP<"global", int_nvvm_isspacep_global>;
+defm isspace_local : ISSPACEP<"local", int_nvvm_isspacep_local>;
+defm isspace_shared : ISSPACEP<"shared", int_nvvm_isspacep_shared>;
+defm isspace_shared_cluster : ISSPACEP<"shared::cluster",
+ int_nvvm_isspacep_shared_cluster,
+ [hasPTX<78>, hasSM<90>]>;
// Special register reads
def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d),
@@ -6203,35 +6194,51 @@ def : Pat<(int_nvvm_sust_p_3d_v4i32_trap
// Read Special Registers
//-----------------------------------
-class PTX_READ_SREG_R64<string regname, Intrinsic intop>
+class PTX_READ_SREG_R64<string regname, Intrinsic intop, list<Predicate> Preds=[]>
: NVPTXInst<(outs Int64Regs:$d), (ins),
!strconcat("mov.u64 \t$d, %", regname, ";"),
- [(set Int64Regs:$d, (intop))]>;
+ [(set Int64Regs:$d, (intop))]>,
+ Requires<Preds>;
-class PTX_READ_SREG_R32<string regname, Intrinsic intop>
+class PTX_READ_SREG_R32<string regname, Intrinsic intop, list<Predicate> Preds=[]>
: NVPTXInst<(outs Int32Regs:$d), (ins),
!strconcat("mov.u32 \t$d, %", regname, ";"),
- [(set Int32Regs:$d, (intop))]>;
+ [(set Int32Regs:$d, (intop))]>,
+ Requires<Preds>;
+
+multiclass PTX_READ_SREG_R32V4<string regname, list<Predicate> Preds=[]> {
+ foreach suffix = ["x", "y", "z", "w"] in {
+ defvar reg = regname # "." # suffix;
+ defvar intr = !cast<Intrinsic>("int_nvvm_read_ptx_sreg_" # regname # "_" # suffix);
+ def "_"#suffix : PTX_READ_SREG_R32<reg, intr, Preds>;
+ }
+}
// TODO Add read vector-version of special registers
-def INT_PTX_SREG_TID_X :
- PTX_READ_SREG_R32<"tid.x", int_nvvm_read_ptx_sreg_tid_x>;
-def INT_PTX_SREG_TID_Y :
- PTX_READ_SREG_R32<"tid.y", int_nvvm_read_ptx_sreg_tid_y>;
-def INT_PTX_SREG_TID_Z :
- PTX_READ_SREG_R32<"tid.z", int_nvvm_read_ptx_sreg_tid_z>;
-def INT_PTX_SREG_TID_W :
- PTX_READ_SREG_R32<"tid.w", int_nvvm_read_ptx_sreg_tid_w>;
-
-def INT_PTX_SREG_NTID_X :
- PTX_READ_SREG_R32<"ntid.x", int_nvvm_read_ptx_sreg_ntid_x>;
-def INT_PTX_SREG_NTID_Y :
- PTX_READ_SREG_R32<"ntid.y", int_nvvm_read_ptx_sreg_ntid_y>;
-def INT_PTX_SREG_NTID_Z :
- PTX_READ_SREG_R32<"ntid.z", int_nvvm_read_ptx_sreg_ntid_z>;
-def INT_PTX_SREG_NTID_W :
- PTX_READ_SREG_R32<"ntid.w", int_nvvm_read_ptx_sreg_ntid_w>;
+defm INT_PTX_SREG_TID : PTX_READ_SREG_R32V4<"tid">;
+defm INT_PTX_SREG_NTID : PTX_READ_SREG_R32V4<"ntid">;
+defm INT_PTX_SREG_CTAID : PTX_READ_SREG_R32V4<"ctaid">;
+defm INT_PTX_SREG_NCTAID: PTX_READ_SREG_R32V4<"nctaid">;
+
+defm INT_PTX_SREG_CLUSTERID :
+ PTX_READ_SREG_R32V4<"clusterid", [hasSM<90>, hasPTX<78>]>;
+defm INT_PTX_SREG_NCLUSTERID :
+ PTX_READ_SREG_R32V4<"nclusterid", [hasSM<90>, hasPTX<78>]>;
+defm INT_PTX_SREG_CLUSTER_CTAID :
+ PTX_READ_SREG_R32V4<"cluster_ctaid", [hasSM<90>, hasPTX<78>]>;
+defm INT_PTX_SREG_CLUSTER_NCTAID:
+ PTX_READ_SREG_R32V4<"cluster_nctaid", [hasSM<90>, hasPTX<78>]>;
+
+def INT_PTX_SREG_CLUSTER_CTARANK :
+ PTX_READ_SREG_R32<"cluster_ctarank",
+ int_nvvm_read_ptx_sreg_cluster_ctarank,
+ [hasSM<90>, hasPTX<78>]>;
+def INT_PTX_SREG_CLUSTER_NCTARANK:
+ PTX_READ_SREG_R32<"cluster_nctarank",
+ int_nvvm_read_ptx_sreg_cluster_nctarank,
+ [hasSM<90>, hasPTX<78>]>;
+
def INT_PTX_SREG_LANEID :
PTX_READ_SREG_R32<"laneid", int_nvvm_read_ptx_sreg_laneid>;
@@ -6239,25 +6246,6 @@ def INT_PTX_SREG_WARPID :
PTX_READ_SREG_R32<"warpid", int_nvvm_read_ptx_sreg_warpid>;
def INT_PTX_SREG_NWARPID :
PTX_READ_SREG_R32<"nwarpid", int_nvvm_read_ptx_sreg_nwarpid>;
-
-def INT_PTX_SREG_CTAID_X :
- PTX_READ_SREG_R32<"ctaid.x", int_nvvm_read_ptx_sreg_ctaid_x>;
-def INT_PTX_SREG_CTAID_Y :
- PTX_READ_SREG_R32<"ctaid.y", int_nvvm_read_ptx_sreg_ctaid_y>;
-def INT_PTX_SREG_CTAID_Z :
- PTX_READ_SREG_R32<"ctaid.z", int_nvvm_read_ptx_sreg_ctaid_z>;
-def INT_PTX_SREG_CTAID_W :
- PTX_READ_SREG_R32<"ctaid.w", int_nvvm_read_ptx_sreg_ctaid_w>;
-
-def INT_PTX_SREG_NCTAID_X :
- PTX_READ_SREG_R32<"nctaid.x", int_nvvm_read_ptx_sreg_nctaid_x>;
-def INT_PTX_SREG_NCTAID_Y :
- PTX_READ_SREG_R32<"nctaid.y", int_nvvm_read_ptx_sreg_nctaid_y>;
-def INT_PTX_SREG_NCTAID_Z :
- PTX_READ_SREG_R32<"nctaid.z", int_nvvm_read_ptx_sreg_nctaid_z>;
-def INT_PTX_SREG_NCTAID_W :
- PTX_READ_SREG_R32<"nctaid.w", int_nvvm_read_ptx_sreg_nctaid_w>;
-
def INT_PTX_SREG_SMID :
PTX_READ_SREG_R32<"smid", int_nvvm_read_ptx_sreg_smid>;
def INT_PTX_SREG_NSMID :
@@ -6300,7 +6288,7 @@ class WMMA_REGINFO<WMMA_REGS r, string op>
: WMMA_REGS<r.geom, r.frag, r.ptx_elt_type> {
// NVPTX register types used to carry fragment data.
NVPTXRegClass regclass = !cond(
- !eq(ptx_elt_type, "f16") : Float16x2Regs,
+ !eq(ptx_elt_type, "f16") : Int32Regs,
!eq(ptx_elt_type, "f32") : Float32Regs,
!eq(ptx_elt_type, "f64") : Float64Regs,
!eq(ptx_elt_type, "bf16") : Int32Regs,
@@ -6332,16 +6320,16 @@ class WMMA_REGINFO<WMMA_REGS r, string op>
// fp16 -> fp16/fp32 @ m16n16k16
!and(!eq(geom, "m16n16k16"),
!or(!eq(ptx_elt_type, "f16"),
- !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX60],
+ !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<60>],
!and(!eq(geom,"m8n8k4"),
- !eq(ptx_elt_type, "f64")) : [hasSM80, hasPTX70],
+ !eq(ptx_elt_type, "f64")) : [hasSM<80>, hasPTX<70>],
// fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
!and(!or(!eq(geom, "m8n32k16"),
!eq(geom, "m32n8k16")),
!or(!eq(ptx_elt_type, "f16"),
- !eq(ptx_elt_type, "f32"))) : [hasSM70, hasPTX61],
+ !eq(ptx_elt_type, "f32"))) : [hasSM<70>, hasPTX<61>],
// u8/s8 -> s32 @ m16n16k16/m8n32k16/m32n8k16
!and(!or(!eq(geom,"m16n16k16"),
@@ -6349,39 +6337,39 @@ class WMMA_REGINFO<WMMA_REGS r, string op>
!eq(geom,"m32n8k16")),
!or(!eq(ptx_elt_type, "u8"),
!eq(ptx_elt_type, "s8"),
- !eq(ptx_elt_type, "s32"))) : [hasSM72, hasPTX63],
+ !eq(ptx_elt_type, "s32"))) : [hasSM<72>, hasPTX<63>],
!and(!or(!eq(geom,"m16n16k16"),
!eq(geom,"m8n32k16"),
!eq(geom,"m32n8k16")),
- !eq(ptx_elt_type, "bf16")) : [hasSM80, hasPTX70],
+ !eq(ptx_elt_type, "bf16")) : [hasSM<80>, hasPTX<70>],
!and(!eq(geom,"m16n16k8"),
- !eq(ptx_elt_type, "tf32")) : [hasSM80, hasPTX70],
+ !eq(ptx_elt_type, "tf32")) : [hasSM<80>, hasPTX<70>],
!and(!eq(geom,"m16n16k8"),
- !eq(ptx_elt_type, "f32")) : [hasSM80, hasPTX70],
+ !eq(ptx_elt_type, "f32")) : [hasSM<80>, hasPTX<70>],
// b1 -> s32 @ m8n8k128(b1)
!and(!ne(op,"mma"),
- !eq(geom,"m8n8k128")) : [hasSM75, hasPTX63],
+ !eq(geom,"m8n8k128")) : [hasSM<75>, hasPTX<63>],
// u4/s4 -> s32 @ m8n8k32 (u4/s4)
!and(!ne(op,"mma"),
- !eq(geom,"m8n8k32")) : [hasSM75, hasPTX63],
+ !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<63>],
!or(!eq(geom,"m16n8k8"),
- !eq(geom,"m8n8k16")) : [hasSM75, hasPTX65],
+ !eq(geom,"m8n8k16")) : [hasSM<75>, hasPTX<65>],
!and(!ne(ptx_elt_type,"f64"),
- !eq(geom, "m8n8k4")) : [hasSM70, hasPTX64],
+ !eq(geom, "m8n8k4")) : [hasSM<70>, hasPTX<64>],
// mma m8n8k32 requires higher PTX version
!and(!eq(op,"mma"),
- !eq(geom,"m8n8k32")) : [hasSM75, hasPTX65],
+ !eq(geom,"m8n8k32")) : [hasSM<75>, hasPTX<65>],
!and(!eq(ptx_elt_type,"f64"),
- !eq(geom, "m8n8k4")) : [hasSM80, hasPTX70],
+ !eq(geom, "m8n8k4")) : [hasSM<80>, hasPTX<70>],
!and(!eq(op,"mma"),
!or(!eq(geom, "m16n8k16"),
@@ -6390,11 +6378,11 @@ class WMMA_REGINFO<WMMA_REGS r, string op>
!eq(geom, "m16n8k64"),
!eq(geom, "m8n8k128"),
!eq(geom, "m16n8k128"),
- !eq(geom, "m16n8k256"))) : [hasSM80, hasPTX70],
+ !eq(geom, "m16n8k256"))) : [hasSM<80>, hasPTX<70>],
!and(!eq(op,"ldmatrix"),
!eq(ptx_elt_type,"b16"),
- !eq(geom, "m8n8")) : [hasSM75, hasPTX65]);
+ !eq(geom, "m8n8")) : [hasSM<75>, hasPTX<65>]);
// template DAGs for instruction inputs/output.
dag Outs = !dag(outs, ptx_regs, reg_names);
@@ -6535,7 +6523,7 @@ class MMA_OP_PREDICATES<WMMA_REGINFO FragA, string b1op> {
WMMA_REGINFO Frag = FragA;
list<Predicate> ret = !listconcat(
FragA.Predicates,
- !if(!eq(b1op, ".and.popc"), [hasSM80,hasPTX71],[])
+ !if(!eq(b1op, ".and.popc"), [hasSM<80>,hasPTX<71>],[])
);
}
// WMMA.MMA
@@ -6694,3 +6682,45 @@ class MMA_PAT<WMMA_INSTR wi>
// Build intrinsic->instruction patterns for all MMA instructions.
foreach mma = !listconcat(MMAs, WMMAs, MMA_LDSTs, LDMATRIXs) in
def : MMA_PAT<mma>;
+
+multiclass MAPA<string suffix, Intrinsic Intr> {
+ def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, Int32Regs:$b),
+ "mapa" # suffix # ".u32\t$d, $a, $b;",
+ [(set Int32Regs:$d, (Intr Int32Regs:$a, Int32Regs:$b))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+ def _32i: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a, i32imm:$b),
+ "mapa" # suffix # ".u32\t$d, $a, $b;",
+ [(set Int32Regs:$d, (Intr Int32Regs:$a, imm:$b))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+ def _64: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, Int32Regs:$b),
+ "mapa" # suffix # ".u64\t$d, $a, $b;",
+ [(set Int64Regs:$d, (Intr Int64Regs:$a, Int32Regs:$b))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+ def _64i: NVPTXInst<(outs Int64Regs:$d), (ins Int64Regs:$a, i32imm:$b),
+ "mapa" # suffix # ".u64\t$d, $a, $b;",
+ [(set Int64Regs:$d, (Intr Int64Regs:$a, imm:$b))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+}
+
+defm mapa : MAPA<"", int_nvvm_mapa>;
+defm mapa_shared_cluster : MAPA<".shared::cluster", int_nvvm_mapa_shared_cluster>;
+
+
+multiclass GETCTARANK<string suffix, Intrinsic Intr> {
+ def _32: NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
+ "getctarank" # suffix # ".u32\t$d, $a;",
+ [(set Int32Regs:$d, (Intr Int32Regs:$a))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+ def _64: NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
+ "getctarank" # suffix # ".u64\t$d, $a;",
+ [(set Int32Regs:$d, (Intr Int64Regs:$a))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
+}
+
+defm getctarank : GETCTARANK<"", int_nvvm_getctarank>;
+defm getctarank_shared_cluster : GETCTARANK<".shared::cluster", int_nvvm_getctarank_shared_cluster>;
+
+def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
+ "mov.pred\t$d, %is_explicit_cluster;",
+ [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
+ Requires<[hasSM<90>, hasPTX<78>]>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index f57c2920449b..6ee4b160e90a 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -127,7 +127,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
if (MemCpyInst *Memcpy = dyn_cast<MemCpyInst>(MemCall)) {
expandMemCpyAsLoop(Memcpy, TTI);
} else if (MemMoveInst *Memmove = dyn_cast<MemMoveInst>(MemCall)) {
- expandMemMoveAsLoop(Memmove);
+ expandMemMoveAsLoop(Memmove, TTI);
} else if (MemSetInst *Memset = dyn_cast<MemSetInst>(MemCall)) {
expandMemSetAsLoop(Memset);
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index 306c485b8791..5cd41cc39fd0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -93,10 +93,12 @@
#include "NVPTXTargetMachine.h"
#include "NVPTXUtilities.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
+#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include <numeric>
#include <queue>
@@ -113,11 +115,11 @@ namespace {
class NVPTXLowerArgs : public FunctionPass {
bool runOnFunction(Function &F) override;
- bool runOnKernelFunction(Function &F);
- bool runOnDeviceFunction(Function &F);
+ bool runOnKernelFunction(const NVPTXTargetMachine &TM, Function &F);
+ bool runOnDeviceFunction(const NVPTXTargetMachine &TM, Function &F);
// handle byval parameters
- void handleByValParam(Argument *Arg);
+ void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg);
// Knowing Ptr must point to the global address space, this function
// addrspacecasts Ptr to global and then back to generic. This allows
// NVPTXInferAddressSpaces to fold the global-to-generic cast into
@@ -126,21 +128,23 @@ class NVPTXLowerArgs : public FunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- NVPTXLowerArgs(const NVPTXTargetMachine *TM = nullptr)
- : FunctionPass(ID), TM(TM) {}
+ NVPTXLowerArgs() : FunctionPass(ID) {}
StringRef getPassName() const override {
return "Lower pointer arguments of CUDA kernels";
}
-
-private:
- const NVPTXTargetMachine *TM;
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<TargetPassConfig>();
+ }
};
} // namespace
char NVPTXLowerArgs::ID = 1;
-INITIALIZE_PASS(NVPTXLowerArgs, "nvptx-lower-args",
- "Lower arguments (NVPTX)", false, false)
+INITIALIZE_PASS_BEGIN(NVPTXLowerArgs, "nvptx-lower-args",
+ "Lower arguments (NVPTX)", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(NVPTXLowerArgs, "nvptx-lower-args",
+ "Lower arguments (NVPTX)", false, false)
// =============================================================================
// If the function had a byval struct ptr arg, say foo(%struct.x* byval %d),
@@ -186,8 +190,7 @@ static void convertToParamAS(Value *OldUser, Value *Param) {
return NewGEP;
}
if (auto *BC = dyn_cast<BitCastInst>(I.OldInstruction)) {
- auto *NewBCType = PointerType::getWithSamePointeeType(
- cast<PointerType>(BC->getType()), ADDRESS_SPACE_PARAM);
+ auto *NewBCType = PointerType::get(BC->getContext(), ADDRESS_SPACE_PARAM);
return BitCastInst::Create(BC->getOpcode(), I.NewParam, NewBCType,
BC->getName(), BC);
}
@@ -310,7 +313,8 @@ static void adjustByValArgAlignment(Argument *Arg, Value *ArgInParamAS,
}
}
-void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
+void NVPTXLowerArgs::handleByValParam(const NVPTXTargetMachine &TM,
+ Argument *Arg) {
Function *Func = Arg->getParent();
Instruction *FirstInst = &(Func->getEntryBlock().front());
Type *StructType = Arg->getParamByValType();
@@ -354,12 +358,8 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
convertToParamAS(V, ArgInParamAS);
LLVM_DEBUG(dbgs() << "No need to copy " << *Arg << "\n");
- // Further optimizations require target lowering info.
- if (!TM)
- return;
-
const auto *TLI =
- cast<NVPTXTargetLowering>(TM->getSubtargetImpl()->getTargetLowering());
+ cast<NVPTXTargetLowering>(TM.getSubtargetImpl()->getTargetLowering());
adjustByValArgAlignment(Arg, ArgInParamAS, TLI);
@@ -390,7 +390,7 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
}
void NVPTXLowerArgs::markPointerAsGlobal(Value *Ptr) {
- if (Ptr->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
+ if (Ptr->getType()->getPointerAddressSpace() != ADDRESS_SPACE_GENERIC)
return;
// Deciding where to emit the addrspacecast pair.
@@ -406,9 +406,7 @@ void NVPTXLowerArgs::markPointerAsGlobal(Value *Ptr) {
}
Instruction *PtrInGlobal = new AddrSpaceCastInst(
- Ptr,
- PointerType::getWithSamePointeeType(cast<PointerType>(Ptr->getType()),
- ADDRESS_SPACE_GLOBAL),
+ Ptr, PointerType::get(Ptr->getContext(), ADDRESS_SPACE_GLOBAL),
Ptr->getName(), &*InsertPt);
Value *PtrInGeneric = new AddrSpaceCastInst(PtrInGlobal, Ptr->getType(),
Ptr->getName(), &*InsertPt);
@@ -420,18 +418,32 @@ void NVPTXLowerArgs::markPointerAsGlobal(Value *Ptr) {
// =============================================================================
// Main function for this pass.
// =============================================================================
-bool NVPTXLowerArgs::runOnKernelFunction(Function &F) {
- if (TM && TM->getDrvInterface() == NVPTX::CUDA) {
+bool NVPTXLowerArgs::runOnKernelFunction(const NVPTXTargetMachine &TM,
+ Function &F) {
+ // Copying of byval aggregates + SROA may result in pointers being loaded as
+ // integers, followed by intotoptr. We may want to mark those as global, too,
+ // but only if the loaded integer is used exclusively for conversion to a
+ // pointer with inttoptr.
+ auto HandleIntToPtr = [this](Value &V) {
+ if (llvm::all_of(V.users(), [](User *U) { return isa<IntToPtrInst>(U); })) {
+ SmallVector<User *, 16> UsersToUpdate(V.users());
+ llvm::for_each(UsersToUpdate, [&](User *U) { markPointerAsGlobal(U); });
+ }
+ };
+ if (TM.getDrvInterface() == NVPTX::CUDA) {
// Mark pointers in byval structs as global.
for (auto &B : F) {
for (auto &I : B) {
if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
- if (LI->getType()->isPointerTy()) {
+ if (LI->getType()->isPointerTy() || LI->getType()->isIntegerTy()) {
Value *UO = getUnderlyingObject(LI->getPointerOperand());
if (Argument *Arg = dyn_cast<Argument>(UO)) {
if (Arg->hasByValAttr()) {
// LI is a load from a pointer within a byval kernel parameter.
- markPointerAsGlobal(LI);
+ if (LI->getType()->isPointerTy())
+ markPointerAsGlobal(LI);
+ else
+ HandleIntToPtr(*LI);
}
}
}
@@ -444,28 +456,32 @@ bool NVPTXLowerArgs::runOnKernelFunction(Function &F) {
for (Argument &Arg : F.args()) {
if (Arg.getType()->isPointerTy()) {
if (Arg.hasByValAttr())
- handleByValParam(&Arg);
- else if (TM && TM->getDrvInterface() == NVPTX::CUDA)
+ handleByValParam(TM, &Arg);
+ else if (TM.getDrvInterface() == NVPTX::CUDA)
markPointerAsGlobal(&Arg);
+ } else if (Arg.getType()->isIntegerTy() &&
+ TM.getDrvInterface() == NVPTX::CUDA) {
+ HandleIntToPtr(Arg);
}
}
return true;
}
// Device functions only need to copy byval args into local memory.
-bool NVPTXLowerArgs::runOnDeviceFunction(Function &F) {
+bool NVPTXLowerArgs::runOnDeviceFunction(const NVPTXTargetMachine &TM,
+ Function &F) {
LLVM_DEBUG(dbgs() << "Lowering function args of " << F.getName() << "\n");
for (Argument &Arg : F.args())
if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
- handleByValParam(&Arg);
+ handleByValParam(TM, &Arg);
return true;
}
bool NVPTXLowerArgs::runOnFunction(Function &F) {
- return isKernelFunction(F) ? runOnKernelFunction(F) : runOnDeviceFunction(F);
-}
+ auto &TM = getAnalysis<TargetPassConfig>().getTM<NVPTXTargetMachine>();
-FunctionPass *
-llvm::createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM) {
- return new NVPTXLowerArgs(TM);
+ return isKernelFunction(F) ? runOnKernelFunction(TM, F)
+ : runOnDeviceFunction(TM, F);
}
+
+FunctionPass *llvm::createNVPTXLowerArgsPass() { return new NVPTXLowerArgs(); }
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp
new file mode 100644
index 000000000000..1d312f82e6c0
--- /dev/null
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerUnreachable.cpp
@@ -0,0 +1,126 @@
+//===-- NVPTXLowerUnreachable.cpp - Lower unreachables to exit =====--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// PTX does not have a notion of `unreachable`, which results in emitted basic
+// blocks having an edge to the next block:
+//
+// block1:
+// call @does_not_return();
+// // unreachable
+// block2:
+// // ptxas will create a CFG edge from block1 to block2
+//
+// This may result in significant changes to the control flow graph, e.g., when
+// LLVM moves unreachable blocks to the end of the function. That's a problem
+// in the context of divergent control flow, as `ptxas` uses the CFG to
+// determine divergent regions, and some intructions may not be executed
+// divergently.
+//
+// For example, `bar.sync` is not allowed to be executed divergently on Pascal
+// or earlier. If we start with the following:
+//
+// entry:
+// // start of divergent region
+// @%p0 bra cont;
+// @%p1 bra unlikely;
+// ...
+// bra.uni cont;
+// unlikely:
+// ...
+// // unreachable
+// cont:
+// // end of divergent region
+// bar.sync 0;
+// bra.uni exit;
+// exit:
+// ret;
+//
+// it is transformed by the branch-folder and block-placement passes to:
+//
+// entry:
+// // start of divergent region
+// @%p0 bra cont;
+// @%p1 bra unlikely;
+// ...
+// bra.uni cont;
+// cont:
+// bar.sync 0;
+// bra.uni exit;
+// unlikely:
+// ...
+// // unreachable
+// exit:
+// // end of divergent region
+// ret;
+//
+// After moving the `unlikely` block to the end of the function, it has an edge
+// to the `exit` block, which widens the divergent region and makes the
+// `bar.sync` instruction happen divergently.
+//
+// To work around this, we add an `exit` instruction before every `unreachable`,
+// as `ptxas` understands that exit terminates the CFG. Note that `trap` is not
+// equivalent, and only future versions of `ptxas` will model it like `exit`.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVPTXLowerUnreachablePass(PassRegistry &);
+}
+
+namespace {
+class NVPTXLowerUnreachable : public FunctionPass {
+ bool runOnFunction(Function &F) override;
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ NVPTXLowerUnreachable() : FunctionPass(ID) {}
+ StringRef getPassName() const override {
+ return "add an exit instruction before every unreachable";
+ }
+};
+} // namespace
+
+char NVPTXLowerUnreachable::ID = 1;
+
+INITIALIZE_PASS(NVPTXLowerUnreachable, "nvptx-lower-unreachable",
+ "Lower Unreachable", false, false)
+
+// =============================================================================
+// Main function for this pass.
+// =============================================================================
+bool NVPTXLowerUnreachable::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ LLVMContext &C = F.getContext();
+ FunctionType *ExitFTy = FunctionType::get(Type::getVoidTy(C), false);
+ InlineAsm *Exit = InlineAsm::get(ExitFTy, "exit;", "", true);
+
+ bool Changed = false;
+ for (auto &BB : F)
+ for (auto &I : BB) {
+ if (auto unreachableInst = dyn_cast<UnreachableInst>(&I)) {
+ Changed = true;
+ CallInst::Create(ExitFTy, Exit, "", unreachableInst);
+ }
+ }
+ return Changed;
+}
+
+FunctionPass *llvm::createNVPTXLowerUnreachablePass() {
+ return new NVPTXLowerUnreachable();
+}
diff --git a/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp b/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
index 5ec1b2425e68..95125eb41bc0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -34,6 +34,11 @@ void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
NumHex = 4;
APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored);
break;
+ case VK_NVPTX_BFLOAT_PREC_FLOAT:
+ OS << "0x";
+ NumHex = 4;
+ APF.convert(APFloat::BFloat(), APFloat::rmNearestTiesToEven, &Ignored);
+ break;
case VK_NVPTX_SINGLE_PREC_FLOAT:
OS << "0f";
NumHex = 8;
diff --git a/llvm/lib/Target/NVPTX/NVPTXMCExpr.h b/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
index 440fa1310003..ef99def06c4d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -21,6 +21,7 @@ class NVPTXFloatMCExpr : public MCTargetExpr {
public:
enum VariantKind {
VK_NVPTX_None,
+ VK_NVPTX_BFLOAT_PREC_FLOAT, // FP constant in bfloat-precision
VK_NVPTX_HALF_PREC_FLOAT, // FP constant in half-precision
VK_NVPTX_SINGLE_PREC_FLOAT, // FP constant in single-precision
VK_NVPTX_DOUBLE_PREC_FLOAT // FP constant in double-precision
@@ -40,6 +41,11 @@ public:
static const NVPTXFloatMCExpr *create(VariantKind Kind, const APFloat &Flt,
MCContext &Ctx);
+ static const NVPTXFloatMCExpr *createConstantBFPHalf(const APFloat &Flt,
+ MCContext &Ctx) {
+ return create(VK_NVPTX_BFLOAT_PREC_FLOAT, Flt, Ctx);
+ }
+
static const NVPTXFloatMCExpr *createConstantFPHalf(const APFloat &Flt,
MCContext &Ctx) {
return create(VK_NVPTX_HALF_PREC_FLOAT, Flt, Ctx);
diff --git a/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp b/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
index 869231ff4ffe..258ae97a20d5 100644
--- a/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXProxyRegErasure.cpp
@@ -73,10 +73,6 @@ bool NVPTXProxyRegErasure::runOnMachineFunction(MachineFunction &MF) {
case NVPTX::ProxyRegI16:
case NVPTX::ProxyRegI32:
case NVPTX::ProxyRegI64:
- case NVPTX::ProxyRegF16:
- case NVPTX::ProxyRegF16x2:
- case NVPTX::ProxyRegBF16:
- case NVPTX::ProxyRegBF16x2:
case NVPTX::ProxyRegF32:
case NVPTX::ProxyRegF64:
replaceMachineInstructionUsage(MF, MI);
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 6e4208d27241..f1213f030bba 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -29,14 +29,6 @@ namespace llvm {
std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float32RegsRegClass)
return ".f32";
- if (RC == &NVPTX::Float16RegsRegClass)
- // Ideally fp16 registers should be .f16, but this syntax is only
- // supported on sm_53+. On the other hand, .b16 registers are
- // accepted for all supported fp16 instructions on all GPU
- // variants, so we can use them instead.
- return ".b16";
- if (RC == &NVPTX::Float16x2RegsRegClass)
- return ".b32";
if (RC == &NVPTX::Float64RegsRegClass)
return ".f64";
if (RC == &NVPTX::Int64RegsRegClass)
@@ -73,10 +65,6 @@ std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
if (RC == &NVPTX::Float32RegsRegClass)
return "%f";
- if (RC == &NVPTX::Float16RegsRegClass)
- return "%h";
- if (RC == &NVPTX::Float16x2RegsRegClass)
- return "%hh";
if (RC == &NVPTX::Float64RegsRegClass)
return "%fd";
if (RC == &NVPTX::Int64RegsRegClass)
diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
index 31d5441e58b3..b62460e8cd31 100644
--- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td
@@ -57,11 +57,11 @@ foreach i = 0...31 in {
// Register classes
//===----------------------------------------------------------------------===//
def Int1Regs : NVPTXRegClass<[i1], 8, (add (sequence "P%u", 0, 4))>;
-def Int16Regs : NVPTXRegClass<[i16], 16, (add (sequence "RS%u", 0, 4))>;
-def Int32Regs : NVPTXRegClass<[i32], 32, (add (sequence "R%u", 0, 4), VRFrame32, VRFrameLocal32)>;
+def Int16Regs : NVPTXRegClass<[i16, f16, bf16], 16, (add (sequence "RS%u", 0, 4))>;
+def Int32Regs : NVPTXRegClass<[i32, v2f16, v2bf16], 32,
+ (add (sequence "R%u", 0, 4),
+ VRFrame32, VRFrameLocal32)>;
def Int64Regs : NVPTXRegClass<[i64], 64, (add (sequence "RL%u", 0, 4), VRFrame64, VRFrameLocal64)>;
-def Float16Regs : NVPTXRegClass<[f16,bf16], 16, (add (sequence "H%u", 0, 4))>;
-def Float16x2Regs : NVPTXRegClass<[v2f16,v2bf16], 32, (add (sequence "HH%u", 0, 4))>;
def Float32Regs : NVPTXRegClass<[f32], 32, (add (sequence "F%u", 0, 4))>;
def Float64Regs : NVPTXRegClass<[f64], 64, (add (sequence "FL%u", 0, 4))>;
def Int32ArgRegs : NVPTXRegClass<[i32], 32, (add (sequence "ia%u", 0, 4))>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index 2347f46449d5..7fa64af196b9 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -26,7 +26,6 @@ static cl::opt<bool>
NoF16Math("nvptx-no-f16-math", cl::Hidden,
cl::desc("NVPTX Specific: Disable generation of f16 math ops."),
cl::init(false));
-
// Pin the vtable to this file.
void NVPTXSubtarget::anchor() {}
diff --git a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 920f5bb94689..93af11c258b4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -76,6 +76,7 @@ public:
inline bool hasHWROT32() const { return SmVersion >= 32; }
bool hasImageHandles() const;
bool hasFP16Math() const { return SmVersion >= 53; }
+ bool hasBF16Math() const { return SmVersion >= 80; }
bool allowFP16Math() const;
bool hasMaskOperator() const { return PTXVersion >= 71; }
bool hasNoReturn() const { return SmVersion >= 30 && PTXVersion >= 64; }
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index 36814d9f5742..1892f951ee83 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -12,29 +12,30 @@
#include "NVPTXTargetMachine.h"
#include "NVPTX.h"
+#include "NVPTXAliasAnalysis.h"
#include "NVPTXAllocaHoisting.h"
#include "NVPTXAtomicLower.h"
+#include "NVPTXCtorDtorLowering.h"
#include "NVPTXLowerAggrCopies.h"
#include "NVPTXMachineFunctionInfo.h"
#include "NVPTXTargetObjectFile.h"
#include "NVPTXTargetTransformInfo.h"
#include "TargetInfo/NVPTXTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
-#include "llvm/Transforms/Vectorize.h"
+#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include <cassert>
#include <optional>
#include <string>
@@ -62,18 +63,30 @@ static cl::opt<bool> UseShortPointersOpt(
"Use 32-bit pointers for accessing const/local/shared address spaces."),
cl::init(false), cl::Hidden);
+// FIXME: intended as a temporary debugging aid. Should be removed before it
+// makes it into the LLVM-17 release.
+static cl::opt<bool>
+ ExitOnUnreachable("nvptx-exit-on-unreachable",
+ cl::desc("Lower 'unreachable' as 'exit' instruction."),
+ cl::init(true), cl::Hidden);
+
namespace llvm {
-void initializeGenericToNVVMPass(PassRegistry&);
+void initializeGenericToNVVMLegacyPassPass(PassRegistry &);
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
-void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
+void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry &);
void initializeNVPTXAtomicLowerPass(PassRegistry &);
+void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
void initializeNVPTXLowerAllocaPass(PassRegistry &);
+void initializeNVPTXLowerUnreachablePass(PassRegistry &);
+void initializeNVPTXCtorDtorLoweringLegacyPass(PassRegistry &);
void initializeNVPTXLowerArgsPass(PassRegistry &);
void initializeNVPTXProxyRegErasurePass(PassRegistry &);
void initializeNVVMIntrRangePass(PassRegistry &);
void initializeNVVMReflectPass(PassRegistry &);
+void initializeNVPTXAAWrapperPassPass(PassRegistry &);
+void initializeNVPTXExternalAAWrapperPass(PassRegistry &);
} // end namespace llvm
@@ -87,15 +100,19 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
// but it's very NVPTX-specific.
initializeNVVMReflectPass(PR);
initializeNVVMIntrRangePass(PR);
- initializeGenericToNVVMPass(PR);
+ initializeGenericToNVVMLegacyPassPass(PR);
initializeNVPTXAllocaHoistingPass(PR);
initializeNVPTXAssignValidGlobalNamesPass(PR);
initializeNVPTXAtomicLowerPass(PR);
initializeNVPTXLowerArgsPass(PR);
initializeNVPTXLowerAllocaPass(PR);
+ initializeNVPTXLowerUnreachablePass(PR);
+ initializeNVPTXCtorDtorLoweringLegacyPass(PR);
initializeNVPTXLowerAggrCopiesPass(PR);
initializeNVPTXProxyRegErasurePass(PR);
initializeNVPTXDAGToDAGISelPass(PR);
+ initializeNVPTXAAWrapperPassPass(PR);
+ initializeNVPTXExternalAAWrapperPass(PR);
}
static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
@@ -211,6 +228,10 @@ MachineFunctionInfo *NVPTXTargetMachine::createMachineFunctionInfo(
F, STI);
}
+void NVPTXTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
+ AAM.registerFunctionAnalysis<NVPTXAA>();
+}
+
void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerPipelineParsingCallback(
[](StringRef PassName, FunctionPassManager &PM,
@@ -226,6 +247,32 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
return false;
});
+ PB.registerAnalysisRegistrationCallback([](FunctionAnalysisManager &FAM) {
+ FAM.registerPass([&] { return NVPTXAA(); });
+ });
+
+ PB.registerParseAACallback([](StringRef AAName, AAManager &AAM) {
+ if (AAName == "nvptx-aa") {
+ AAM.registerFunctionAnalysis<NVPTXAA>();
+ return true;
+ }
+ return false;
+ });
+
+ PB.registerPipelineParsingCallback(
+ [](StringRef PassName, ModulePassManager &PM,
+ ArrayRef<PassBuilder::PipelineElement>) {
+ if (PassName == "nvptx-lower-ctor-dtor") {
+ PM.addPass(NVPTXCtorDtorLoweringPass());
+ return true;
+ }
+ if (PassName == "generic-to-nvvm") {
+ PM.addPass(GenericToNVVMPass());
+ return true;
+ }
+ return false;
+ });
+
PB.registerPipelineStartEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
FunctionPassManager FPM;
@@ -253,6 +300,7 @@ NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
case Intrinsic::nvvm_isspacep_local:
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL);
case Intrinsic::nvvm_isspacep_shared:
+ case Intrinsic::nvvm_isspacep_shared_cluster:
return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED);
default:
break;
@@ -312,6 +360,12 @@ void NVPTXPassConfig::addIRPasses() {
disablePass(&PatchableFunctionID);
disablePass(&ShrinkWrapID);
+ addPass(createNVPTXAAWrapperPass());
+ addPass(createExternalAAWrapperPass([](Pass &P, Function &, AAResults &AAR) {
+ if (auto *WrapperPass = P.getAnalysisIfAvailable<NVPTXAAWrapperPass>())
+ AAR.addAAResult(WrapperPass->getResult());
+ }));
+
// NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
// it here does nothing. But since we need it for correctness when lowering
// to NVPTX, run it here too, in case whoever built our pass pipeline didn't
@@ -322,17 +376,18 @@ void NVPTXPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOpt::None)
addPass(createNVPTXImageOptimizerPass());
addPass(createNVPTXAssignValidGlobalNamesPass());
- addPass(createGenericToNVVMPass());
+ addPass(createGenericToNVVMLegacyPass());
// NVPTXLowerArgs is required for correctness and should be run right
// before the address space inference passes.
- addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
+ addPass(createNVPTXLowerArgsPass());
if (getOptLevel() != CodeGenOpt::None) {
addAddressSpaceInferencePasses();
addStraightLineScalarOptimizationPasses();
}
addPass(createAtomicExpandPass());
+ addPass(createNVPTXCtorDtorLoweringLegacyPass());
// === LSR and other generic IR passes ===
TargetPassConfig::addIRPasses();
@@ -354,6 +409,9 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createLoadStoreVectorizerPass());
addPass(createSROAPass());
}
+
+ if (ExitOnUnreachable)
+ addPass(createNVPTXLowerUnreachablePass());
}
bool NVPTXPassConfig::addInstSelector() {
@@ -406,11 +464,10 @@ void NVPTXPassConfig::addOptimizedRegAlloc() {
if (addPass(&MachineSchedulerID))
printAndVerify("After Machine Scheduling");
-
addPass(&StackSlotColoringID);
// FIXME: Needs physical registers
- //addPass(&MachineLICMID);
+ // addPass(&MachineLICMID);
printAndVerify("After StackSlotColoring");
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
index 843c3a218e1d..25dfea11aabc 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
@@ -67,6 +67,8 @@ public:
createMachineFunctionInfo(BumpPtrAllocator &Allocator, const Function &F,
const TargetSubtargetInfo *STI) const override;
+ void registerDefaultAliasAnalyses(AAManager &AAM) override;
+
void registerPassBuilderCallbacks(PassBuilder &PB) override;
TargetTransformInfo getTargetTransformInfo(const Function &F) const override;
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index f39934ae13e8..c73721da46e3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -204,6 +204,14 @@ static Instruction *simplifyNvvmIntrinsic(IntrinsicInst *II, InstCombiner &IC) {
return {Intrinsic::fma, FTZ_MustBeOff, true};
case Intrinsic::nvvm_fma_rn_ftz_f16x2:
return {Intrinsic::fma, FTZ_MustBeOn, true};
+ case Intrinsic::nvvm_fma_rn_bf16:
+ return {Intrinsic::fma, FTZ_MustBeOff, true};
+ case Intrinsic::nvvm_fma_rn_ftz_bf16:
+ return {Intrinsic::fma, FTZ_MustBeOn, true};
+ case Intrinsic::nvvm_fma_rn_bf16x2:
+ return {Intrinsic::fma, FTZ_MustBeOff, true};
+ case Intrinsic::nvvm_fma_rn_ftz_bf16x2:
+ return {Intrinsic::fma, FTZ_MustBeOn, true};
case Intrinsic::nvvm_fmax_d:
return {Intrinsic::maxnum, FTZ_Any};
case Intrinsic::nvvm_fmax_f:
diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 0b1195ed9c8f..3ce2675560c4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -41,7 +41,7 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
TLI(ST->getTargetLowering()) {}
- bool hasBranchDivergence() { return true; }
+ bool hasBranchDivergence(const Function *F = nullptr) { return true; }
bool isSourceOfDivergence(const Value *V);
@@ -90,9 +90,9 @@ public:
return true;
}
- // Increase the inlining cost threshold by a factor of 5, reflecting that
+ // Increase the inlining cost threshold by a factor of 11, reflecting that
// calls are particularly expensive in NVPTX.
- unsigned getInliningThresholdMultiplier() { return 5; }
+ unsigned getInliningThresholdMultiplier() const { return 11; }
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 6de25b87016b..56fdf19a0720 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -282,6 +282,11 @@ public:
return (unsigned) Imm.Val;
}
+ unsigned getFpReg() const {
+ assert(isEvenRegNumber() && "Invalid access!");
+ return (unsigned)(Imm.Val >> 1);
+ }
+
unsigned getVSReg() const {
assert(isVSRegNumber() && "Invalid access!");
return (unsigned) Imm.Val;
@@ -334,7 +339,7 @@ public:
unsigned getCRBitMask() const {
assert(isCRBitMask() && "Invalid access!");
- return 7 - countTrailingZeros<uint64_t>(Imm.Val);
+ return 7 - llvm::countr_zero<uint64_t>(Imm.Val);
}
bool isToken() const override { return Kind == Token; }
@@ -441,8 +446,10 @@ public:
bool isEvenRegNumber() const { return isRegNumber() && (getImm() & 1) == 0; }
- bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) &&
- isPowerOf2_32(getImm()); }
+ bool isCRBitMask() const {
+ return Kind == Immediate && isUInt<8>(getImm()) &&
+ llvm::has_single_bit<uint32_t>(getImm());
+ }
bool isATBitsAsHint() const { return false; }
bool isMem() const override { return false; }
bool isReg() const override { return false; }
@@ -500,6 +507,11 @@ public:
Inst.addOperand(MCOperand::createReg(FRegs[getReg()]));
}
+ void addRegFpRCOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createReg(FpRegs[getFpReg()]));
+ }
+
void addRegVFRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createReg(VFRegs[getReg()]));
@@ -1198,7 +1210,7 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
break;
}
case PPC::MFTB: {
- if (getSTI().getFeatureBits()[PPC::FeatureMFTB]) {
+ if (getSTI().hasFeature(PPC::FeatureMFTB)) {
assert(Inst.getNumOperands() == 2 && "Expecting two operands");
Inst.setOpcode(PPC::MFSPR);
}
@@ -1266,40 +1278,40 @@ bool PPCAsmParser::MatchRegisterName(MCRegister &RegNo, int64_t &IntVal) {
} else if (Name.equals_insensitive("vrsave")) {
RegNo = PPC::VRSAVE;
IntVal = 256;
- } else if (Name.startswith_insensitive("r") &&
+ } else if (Name.starts_with_insensitive("r") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = isPPC64() ? XRegs[IntVal] : RRegs[IntVal];
- } else if (Name.startswith_insensitive("f") &&
+ } else if (Name.starts_with_insensitive("f") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = FRegs[IntVal];
- } else if (Name.startswith_insensitive("vs") &&
+ } else if (Name.starts_with_insensitive("vs") &&
!Name.substr(2).getAsInteger(10, IntVal) && IntVal < 64) {
RegNo = VSRegs[IntVal];
- } else if (Name.startswith_insensitive("v") &&
+ } else if (Name.starts_with_insensitive("v") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = VRegs[IntVal];
- } else if (Name.startswith_insensitive("cr") &&
+ } else if (Name.starts_with_insensitive("cr") &&
!Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
RegNo = CRRegs[IntVal];
- } else if (Name.startswith_insensitive("acc") &&
+ } else if (Name.starts_with_insensitive("acc") &&
!Name.substr(3).getAsInteger(10, IntVal) && IntVal < 8) {
RegNo = ACCRegs[IntVal];
- } else if (Name.startswith_insensitive("wacc_hi") &&
+ } else if (Name.starts_with_insensitive("wacc_hi") &&
!Name.substr(7).getAsInteger(10, IntVal) && IntVal < 8) {
RegNo = ACCRegs[IntVal];
- } else if (Name.startswith_insensitive("wacc") &&
+ } else if (Name.starts_with_insensitive("wacc") &&
!Name.substr(4).getAsInteger(10, IntVal) && IntVal < 8) {
RegNo = WACCRegs[IntVal];
- } else if (Name.startswith_insensitive("dmrrowp") &&
+ } else if (Name.starts_with_insensitive("dmrrowp") &&
!Name.substr(7).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = DMRROWpRegs[IntVal];
- } else if (Name.startswith_insensitive("dmrrow") &&
+ } else if (Name.starts_with_insensitive("dmrrow") &&
!Name.substr(6).getAsInteger(10, IntVal) && IntVal < 64) {
RegNo = DMRROWRegs[IntVal];
- } else if (Name.startswith_insensitive("dmrp") &&
+ } else if (Name.starts_with_insensitive("dmrp") &&
!Name.substr(4).getAsInteger(10, IntVal) && IntVal < 4) {
RegNo = DMRROWpRegs[IntVal];
- } else if (Name.startswith_insensitive("dmr") &&
+ } else if (Name.starts_with_insensitive("dmr") &&
!Name.substr(3).getAsInteger(10, IntVal) && IntVal < 8) {
RegNo = DMRRegs[IntVal];
} else
@@ -1536,24 +1548,20 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
if (const MCSymbolRefExpr *Ref = dyn_cast<MCSymbolRefExpr>(EVal))
TLSCall = Ref->getSymbol().getName() == "__tls_get_addr";
- if (TLSCall && getLexer().is(AsmToken::LParen)) {
+ if (TLSCall && parseOptionalToken(AsmToken::LParen)) {
const MCExpr *TLSSym;
-
- Parser.Lex(); // Eat the '('.
S = Parser.getTok().getLoc();
if (ParseExpression(TLSSym))
return Error(S, "invalid TLS call expression");
- if (getLexer().isNot(AsmToken::RParen))
- return Error(Parser.getTok().getLoc(), "missing ')'");
+ if (parseToken(AsmToken::RParen, "expected ')'"))
+ return true;
E = Parser.getTok().getLoc();
- Parser.Lex(); // Eat the ')'.
Operands.push_back(PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64()));
}
// Otherwise, check for D-form memory operands
- if (!TLSCall && getLexer().is(AsmToken::LParen)) {
- Parser.Lex(); // Eat the '('.
+ if (!TLSCall && parseOptionalToken(AsmToken::LParen)) {
S = Parser.getTok().getLoc();
int64_t IntVal;
@@ -1640,7 +1648,7 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
// where th can be omitted when it is 0. dcbtst is the same. We take the
// server form to be the default, so swap the operands if we're parsing for
// an embedded core (they'll be swapped again upon printing).
- if (getSTI().getFeatureBits()[PPC::FeatureBookE] &&
+ if (getSTI().hasFeature(PPC::FeatureBookE) &&
Operands.size() == 4 &&
(Name == "dcbt" || Name == "dcbtst")) {
std::swap(Operands[1], Operands[3]);
diff --git a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 21fee2441f32..0c6c17d5a0b6 100644
--- a/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -83,7 +83,8 @@ static DecodeStatus decodeDirectBrTarget(MCInst &Inst, unsigned Imm,
template <std::size_t N>
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
const MCPhysReg (&Regs)[N]) {
- assert(RegNo < N && "Invalid register number");
+ if (RegNo >= N)
+ return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createReg(Regs[RegNo]));
return MCDisassembler::Success;
}
@@ -112,6 +113,14 @@ static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
return decodeRegisterClass(Inst, RegNo, FRegs);
}
+static DecodeStatus DecodeFpRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (RegNo > 30 || (RegNo & 1))
+ return MCDisassembler::Fail;
+ return decodeRegisterClass(Inst, RegNo >> 1, FpRegs);
+}
+
static DecodeStatus DecodeVFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -239,7 +248,8 @@ template <unsigned N>
static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
int64_t Address,
const MCDisassembler *Decoder) {
- assert(isUInt<N>(Imm) && "Invalid immediate");
+ if (!isUInt<N>(Imm))
+ return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(Imm));
return MCDisassembler::Success;
}
@@ -248,7 +258,8 @@ template <unsigned N>
static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
int64_t Address,
const MCDisassembler *Decoder) {
- assert(isUInt<N>(Imm) && "Invalid immediate");
+ if (!isUInt<N>(Imm))
+ return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm)));
return MCDisassembler::Success;
}
@@ -271,171 +282,64 @@ static DecodeStatus decodeVSRpEvenOperands(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus decodeMemRIOperands(MCInst &Inst, uint64_t Imm,
- int64_t Address,
- const MCDisassembler *Decoder) {
- // Decode the memri field (imm, reg), which has the low 16-bits as the
- // displacement and the next 5 bits as the register #.
-
- uint64_t Base = Imm >> 16;
- uint64_t Disp = Imm & 0xFFFF;
-
- assert(Base < 32 && "Invalid base register");
-
- switch (Inst.getOpcode()) {
- default: break;
- case PPC::LBZU:
- case PPC::LHAU:
- case PPC::LHZU:
- case PPC::LWZU:
- case PPC::LFSU:
- case PPC::LFDU:
- // Add the tied output operand.
- Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
- break;
- case PPC::STBU:
- case PPC::STHU:
- case PPC::STWU:
- case PPC::STFSU:
- case PPC::STFDU:
- Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base]));
- break;
- }
-
- Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp)));
- Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus decodeMemRIXOperands(MCInst &Inst, uint64_t Imm,
+static DecodeStatus decodeDispRIXOperand(MCInst &Inst, uint64_t Imm,
int64_t Address,
const MCDisassembler *Decoder) {
- // Decode the memrix field (imm, reg), which has the low 14-bits as the
- // displacement and the next 5 bits as the register #.
-
- uint64_t Base = Imm >> 14;
- uint64_t Disp = Imm & 0x3FFF;
-
- assert(Base < 32 && "Invalid base register");
-
- if (Inst.getOpcode() == PPC::LDU)
- // Add the tied output operand.
- Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
- else if (Inst.getOpcode() == PPC::STDU)
- Inst.insert(Inst.begin(), MCOperand::createReg(RRegsNoR0[Base]));
-
- Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 2)));
- Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
+ // The rix displacement is an immediate shifted by 2
+ Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Imm << 2)));
return MCDisassembler::Success;
}
-static DecodeStatus decodeMemRIHashOperands(MCInst &Inst, uint64_t Imm,
+static DecodeStatus decodeDispRIHashOperand(MCInst &Inst, uint64_t Imm,
int64_t Address,
const MCDisassembler *Decoder) {
- // Decode the memrix field for a hash store or hash check operation.
- // The field is composed of a register and an immediate value that is 6 bits
+ // Decode the disp field for a hash store or hash check operation.
+ // The field is composed of an immediate value that is 6 bits
// and covers the range -8 to -512. The immediate is always negative and 2s
// complement which is why we sign extend a 7 bit value.
- const uint64_t Base = Imm >> 6;
const int64_t Disp = SignExtend64<7>((Imm & 0x3F) + 64) * 8;
- assert(Base < 32 && "Invalid base register");
-
Inst.addOperand(MCOperand::createImm(Disp));
- Inst.addOperand(MCOperand::createReg(RRegs[Base]));
return MCDisassembler::Success;
}
-static DecodeStatus decodeMemRIX16Operands(MCInst &Inst, uint64_t Imm,
+static DecodeStatus decodeDispRIX16Operand(MCInst &Inst, uint64_t Imm,
int64_t Address,
const MCDisassembler *Decoder) {
- // Decode the memrix16 field (imm, reg), which has the low 12-bits as the
- // displacement with 16-byte aligned, and the next 5 bits as the register #.
-
- uint64_t Base = Imm >> 12;
- uint64_t Disp = Imm & 0xFFF;
-
- assert(Base < 32 && "Invalid base register");
-
- Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Disp << 4)));
- Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
+ // The rix16 displacement has 12-bits which are shifted by 4.
+ Inst.addOperand(MCOperand::createImm(SignExtend64<16>(Imm << 4)));
return MCDisassembler::Success;
}
-static DecodeStatus decodeMemRI34PCRelOperands(MCInst &Inst, uint64_t Imm,
- int64_t Address,
- const MCDisassembler *Decoder) {
- // Decode the memri34_pcrel field (imm, reg), which has the low 34-bits as the
- // displacement, and the next 5 bits as an immediate 0.
- uint64_t Base = Imm >> 34;
- uint64_t Disp = Imm & 0x3FFFFFFFFUL;
-
- assert(Base < 32 && "Invalid base register");
-
- Inst.addOperand(MCOperand::createImm(SignExtend64<34>(Disp)));
- return decodeImmZeroOperand(Inst, Base, Address, Decoder);
-}
-
-static DecodeStatus decodeMemRI34Operands(MCInst &Inst, uint64_t Imm,
+static DecodeStatus decodeDispSPE8Operand(MCInst &Inst, uint64_t Imm,
int64_t Address,
const MCDisassembler *Decoder) {
- // Decode the memri34 field (imm, reg), which has the low 34-bits as the
- // displacement, and the next 5 bits as the register #.
- uint64_t Base = Imm >> 34;
- uint64_t Disp = Imm & 0x3FFFFFFFFUL;
-
- assert(Base < 32 && "Invalid base register");
-
- Inst.addOperand(MCOperand::createImm(SignExtend64<34>(Disp)));
- Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
- return MCDisassembler::Success;
-}
+ // Decode the dispSPE8 field, which has 5-bits, 8-byte aligned.
-static DecodeStatus decodeSPE8Operands(MCInst &Inst, uint64_t Imm,
- int64_t Address,
- const MCDisassembler *Decoder) {
- // Decode the spe8disp field (imm, reg), which has the low 5-bits as the
- // displacement with 8-byte aligned, and the next 5 bits as the register #.
-
- uint64_t Base = Imm >> 5;
uint64_t Disp = Imm & 0x1F;
- assert(Base < 32 && "Invalid base register");
-
Inst.addOperand(MCOperand::createImm(Disp << 3));
- Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
-static DecodeStatus decodeSPE4Operands(MCInst &Inst, uint64_t Imm,
- int64_t Address,
- const MCDisassembler *Decoder) {
- // Decode the spe4disp field (imm, reg), which has the low 5-bits as the
- // displacement with 4-byte aligned, and the next 5 bits as the register #.
+static DecodeStatus decodeDispSPE4Operand(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ // Decode the dispSPE8 field, which has 5-bits, 4-byte aligned.
- uint64_t Base = Imm >> 5;
uint64_t Disp = Imm & 0x1F;
- assert(Base < 32 && "Invalid base register");
-
Inst.addOperand(MCOperand::createImm(Disp << 2));
- Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
-static DecodeStatus decodeSPE2Operands(MCInst &Inst, uint64_t Imm,
- int64_t Address,
- const MCDisassembler *Decoder) {
- // Decode the spe2disp field (imm, reg), which has the low 5-bits as the
- // displacement with 2-byte aligned, and the next 5 bits as the register #.
+static DecodeStatus decodeDispSPE2Operand(MCInst &Inst, uint64_t Imm,
+ int64_t Address,
+ const MCDisassembler *Decoder) {
+ // Decode the dispSPE8 field, which has 5-bits, 2-byte aligned.
- uint64_t Base = Imm >> 5;
uint64_t Disp = Imm & 0x1F;
-
- assert(Base < 32 && "Invalid base register");
-
Inst.addOperand(MCOperand::createImm(Disp << 1));
- Inst.addOperand(MCOperand::createReg(RRegsNoR0[Base]));
return MCDisassembler::Success;
}
@@ -444,8 +348,9 @@ static DecodeStatus decodeCRBitMOperand(MCInst &Inst, uint64_t Imm,
const MCDisassembler *Decoder) {
// The cr bit encoding is 0x80 >> cr_reg_num.
- unsigned Zeros = countTrailingZeros(Imm);
- assert(Zeros < 8 && "Invalid CR bit value");
+ unsigned Zeros = llvm::countr_zero(Imm);
+ if (Zeros >= 8)
+ return MCDisassembler::Fail;
Inst.addOperand(MCOperand::createReg(CRRegs[7 - Zeros]));
return MCDisassembler::Success;
@@ -468,7 +373,7 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// TODO: In this function we call decodeInstruction several times with
// different decoder tables. It may be possible to only call once by
// looking at the top 6 bits of the instruction.
- if (STI.getFeatureBits()[PPC::FeaturePrefixInstrs] && Bytes.size() >= 8) {
+ if (STI.hasFeature(PPC::FeaturePrefixInstrs) && Bytes.size() >= 8) {
uint32_t Prefix = ReadFunc(Bytes.data());
uint32_t BaseInst = ReadFunc(Bytes.data() + 4);
uint64_t Inst = BaseInst | (uint64_t)Prefix << 32;
@@ -490,7 +395,7 @@ DecodeStatus PPCDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
// Read the instruction in the proper endianness.
uint64_t Inst = ReadFunc(Bytes.data());
- if (STI.getFeatureBits()[PPC::FeatureSPE]) {
+ if (STI.hasFeature(PPC::FeatureSPE)) {
DecodeStatus result =
decodeInstruction(DecoderTableSPE32, MI, Inst, Address, this, STI);
if (result != MCDisassembler::Fail)
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
index d737eec570eb..3fd7a1ad9efa 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCInstructionSelector.cpp
@@ -12,12 +12,13 @@
#include "PPC.h"
#include "PPCInstrInfo.h"
+#include "PPCMachineFunctionInfo.h"
#include "PPCRegisterBankInfo.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -54,6 +55,8 @@ private:
bool selectZExt(MachineInstr &I, MachineBasicBlock &MBB,
MachineRegisterInfo &MRI) const;
+ bool selectConstantPool(MachineInstr &I, MachineBasicBlock &MBB,
+ MachineRegisterInfo &MRI) const;
std::optional<bool> selectI64ImmDirect(MachineInstr &I,
MachineBasicBlock &MBB,
@@ -62,6 +65,7 @@ private:
bool selectI64Imm(MachineInstr &I, MachineBasicBlock &MBB,
MachineRegisterInfo &MRI) const;
+ const PPCTargetMachine &TM;
const PPCSubtarget &STI;
const PPCInstrInfo &TII;
const PPCRegisterInfo &TRI;
@@ -85,7 +89,8 @@ private:
PPCInstructionSelector::PPCInstructionSelector(const PPCTargetMachine &TM,
const PPCSubtarget &STI,
const PPCRegisterBankInfo &RBI)
- : STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), RBI(RBI),
+ : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
+ RBI(RBI),
#define GET_GLOBALISEL_PREDICATES_INIT
#include "PPCGenGlobalISel.inc"
#undef GET_GLOBALISEL_PREDICATES_INIT
@@ -108,6 +113,10 @@ static const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank *RB) {
if (Ty.getSizeInBits() == 64)
return &PPC::F8RCRegClass;
}
+ if (RB->getID() == PPC::VECRegBankID) {
+ if (Ty.getSizeInBits() == 128)
+ return &PPC::VSRCRegClass;
+ }
if (RB->getID() == PPC::CRRegBankID) {
if (Ty.getSizeInBits() == 1)
return &PPC::CRBITRCRegClass;
@@ -266,8 +275,8 @@ bool PPCInstructionSelector::selectZExt(MachineInstr &I, MachineBasicBlock &MBB,
// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
// zeros and return the number of bits by the left of these consecutive zeros.
static uint32_t findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
- uint32_t HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm));
- uint32_t LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm));
+ uint32_t HiTZ = llvm::countr_zero<uint32_t>(Hi_32(Imm));
+ uint32_t LoLZ = llvm::countl_zero<uint32_t>(Lo_32(Imm));
if ((HiTZ + LoLZ) >= Num)
return (32 + HiTZ);
return 0;
@@ -280,10 +289,10 @@ std::optional<bool> PPCInstructionSelector::selectI64ImmDirect(MachineInstr &I,
MachineRegisterInfo &MRI,
Register Reg,
uint64_t Imm) const {
- unsigned TZ = countTrailingZeros<uint64_t>(Imm);
- unsigned LZ = countLeadingZeros<uint64_t>(Imm);
- unsigned TO = countTrailingOnes<uint64_t>(Imm);
- unsigned LO = countLeadingOnes<uint64_t>(Imm);
+ unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
+ unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
+ unsigned TO = llvm::countr_one<uint64_t>(Imm);
+ unsigned LO = llvm::countl_one<uint64_t>(Imm);
uint32_t Hi32 = Hi_32(Imm);
uint32_t Lo32 = Lo_32(Imm);
uint32_t Shift = 0;
@@ -307,7 +316,7 @@ std::optional<bool> PPCInstructionSelector::selectI64ImmDirect(MachineInstr &I,
assert(LZ < 64 && "Unexpected leading zeros here.");
// Count of ones follwing the leading zeros.
- unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);
+ unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);
// 2-1) Patterns : {zeros}{31-bit value}
// {ones}{31-bit value}
if (isInt<32>(Imm)) {
@@ -636,6 +645,66 @@ bool PPCInstructionSelector::selectI64Imm(MachineInstr &I,
return true;
}
+bool PPCInstructionSelector::selectConstantPool(
+ MachineInstr &I, MachineBasicBlock &MBB, MachineRegisterInfo &MRI) const {
+ const DebugLoc &DbgLoc = I.getDebugLoc();
+ MachineFunction *MF = MBB.getParent();
+
+ // TODO: handle 32-bit.
+ // TODO: Enabling floating point constant pool selection on AIX requires
+ // global isel on big endian target enabled first.
+ // See CallLowering::enableBigEndian().
+ if (!STI.isPPC64() || !STI.isLittleEndian())
+ return false;
+
+ MF->getInfo<PPCFunctionInfo>()->setUsesTOCBasePtr();
+
+ const Register DstReg = I.getOperand(0).getReg();
+ unsigned CPI = I.getOperand(1).getIndex();
+
+ // Address stored in the TOC entry. This is related to code model and the ABI
+ // we are currently using. For now we only handle 64-bit Linux LE. PowerPC
+ // only supports small, medium and large code model.
+ const CodeModel::Model CModel = TM.getCodeModel();
+ assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
+ "PowerPC doesn't support tiny or kernel code models.");
+
+ const MCRegister TOCReg = STI.getTOCPointerRegister();
+ MachineMemOperand *MMO = MF->getMachineMemOperand(
+ MachinePointerInfo::getGOT(*MF), MachineMemOperand::MOLoad,
+ MRI.getType(DstReg), MF->getDataLayout().getPointerABIAlignment(0));
+
+ MachineInstr *MI = nullptr;
+ // For now we only handle 64-bit Linux.
+ if (CModel == CodeModel::Small) {
+ // For small code model, generate LDtocCPT(CPI, X2).
+ MI = BuildMI(MBB, I, DbgLoc, TII.get(PPC::LDtocCPT), DstReg)
+ .addConstantPoolIndex(CPI)
+ .addReg(TOCReg)
+ .addMemOperand(MMO);
+ } else {
+ Register HaAddrReg = MRI.createVirtualRegister(&PPC::G8RCRegClass);
+ BuildMI(MBB, I, DbgLoc, TII.get(PPC::ADDIStocHA8), HaAddrReg)
+ .addReg(TOCReg)
+ .addConstantPoolIndex(CPI);
+
+ if (CModel == CodeModel::Large)
+ // For large code model, generate LDtocL(CPI, ADDIStocHA8(X2, CPI))
+ MI = BuildMI(MBB, I, DbgLoc, TII.get(PPC::LDtocL), DstReg)
+ .addConstantPoolIndex(CPI)
+ .addReg(HaAddrReg)
+ .addMemOperand(MMO);
+ else
+ // For medium code model, generate ADDItocL(CPI, ADDIStocHA8(X2, CPI))
+ MI = BuildMI(MBB, I, DbgLoc, TII.get(PPC::ADDItocL), DstReg)
+ .addReg(HaAddrReg)
+ .addConstantPoolIndex(CPI);
+ }
+
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
+}
+
bool PPCInstructionSelector::select(MachineInstr &I) {
auto &MBB = *I.getParent();
auto &MF = *MBB.getParent();
@@ -704,6 +773,8 @@ bool PPCInstructionSelector::select(MachineInstr &I) {
return selectZExt(I, MBB, MRI);
case TargetOpcode::G_CONSTANT:
return selectI64Imm(I, MBB, MRI);
+ case TargetOpcode::G_CONSTANT_POOL:
+ return selectConstantPool(I, MBB, MRI);
}
return false;
}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
index 1a25fcde8815..6b24c2a07f68 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCLegalizerInfo.cpp
@@ -10,12 +10,33 @@
//===----------------------------------------------------------------------===//
#include "PPCLegalizerInfo.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "ppc-legalinfo"
using namespace llvm;
using namespace LegalizeActions;
+using namespace LegalizeMutations;
+using namespace LegalityPredicates;
+
+static LegalityPredicate isRegisterType(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ unsigned TypeSize = QueryTy.getSizeInBits();
+
+ if (TypeSize % 32 == 1 || TypeSize > 128)
+ return false;
+
+ // Check if this is a legal PowerPC vector type.
+ if (QueryTy.isVector()) {
+ const int EltSize = QueryTy.getElementType().getSizeInBits();
+ return (EltSize == 8 || EltSize == 16 || EltSize == 32 || EltSize == 64);
+ }
+
+ return true;
+ };
+}
PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) {
using namespace TargetOpcode;
@@ -25,6 +46,10 @@ PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) {
const LLT S16 = LLT::scalar(16);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
+ const LLT V16S8 = LLT::fixed_vector(16, 8);
+ const LLT V8S16 = LLT::fixed_vector(8, 16);
+ const LLT V4S32 = LLT::fixed_vector(4, 32);
+ const LLT V2S64 = LLT::fixed_vector(2, 64);
getActionDefinitionsBuilder(G_IMPLICIT_DEF).legalFor({S64});
getActionDefinitionsBuilder(G_CONSTANT)
.legalFor({S32, S64})
@@ -33,14 +58,18 @@ PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) {
.legalForCartesianProduct({S64}, {S1, S8, S16, S32})
.clampScalar(0, S64, S64);
getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
- .legalFor({S64})
- .clampScalar(0, S64, S64);
+ .legalFor({S64, V4S32})
+ .clampScalar(0, S64, S64)
+ .bitcastIf(typeIsNot(0, V4S32), changeTo(0, V4S32));
getActionDefinitionsBuilder({G_ADD, G_SUB})
- .legalFor({S64})
+ .legalFor({S64, V16S8, V8S16, V4S32, V2S64})
.clampScalar(0, S64, S64);
+ getActionDefinitionsBuilder(G_BITCAST)
+ .legalIf(all(isRegisterType(0), isRegisterType(1)))
+ .lower();
getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV})
- .legalFor({S32, S64});
+ .legalFor({S32, S64, V4S32, V2S64});
getActionDefinitionsBuilder(G_FCMP).legalForCartesianProduct({S1},
{S32, S64});
@@ -54,5 +83,8 @@ PPCLegalizerInfo::PPCLegalizerInfo(const PPCSubtarget &ST) {
getActionDefinitionsBuilder({G_LOAD, G_STORE})
.legalForTypesWithMemDesc({{S64, P0, S64, 8}, {S32, P0, S32, 4}});
+ getActionDefinitionsBuilder(G_FCONSTANT).lowerFor({S32, S64});
+ getActionDefinitionsBuilder(G_CONSTANT_POOL).legalFor({P0});
+
getLegacyLegalizerInfo().computeTables();
}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
index ff8bb16ba9c8..25587b39b97f 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp
@@ -48,6 +48,14 @@ PPCRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
case PPC::VSSRCRegClassID:
case PPC::F4RCRegClassID:
return getRegBank(PPC::FPRRegBankID);
+ case PPC::VSRCRegClassID:
+ case PPC::VRRCRegClassID:
+ case PPC::VRRC_with_sub_64_in_SPILLTOVSRRCRegClassID:
+ case PPC::VSRC_with_sub_64_in_SPILLTOVSRRCRegClassID:
+ case PPC::SPILLTOVSRRCRegClassID:
+ case PPC::VSLRCRegClassID:
+ case PPC::VSLRC_with_sub_64_in_SPILLTOVSRRCRegClassID:
+ return getRegBank(PPC::VECRegBankID);
case PPC::CRRCRegClassID:
case PPC::CRBITRCRegClassID:
return getRegBank(PPC::CRRegBankID);
@@ -90,11 +98,21 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// Extension ops.
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
- case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_ANYEXT: {
assert(NumOperands <= 3 &&
"This code is for instructions with 3 or less operands");
- OperandsMapping = getValueMapping(PMI_GPR64);
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ unsigned Size = Ty.getSizeInBits();
+ switch (Size) {
+ case 128:
+ OperandsMapping = getValueMapping(PMI_VEC128);
+ break;
+ default:
+ OperandsMapping = getValueMapping(PMI_GPR64);
+ break;
+ }
break;
+ }
case TargetOpcode::G_FADD:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMUL:
@@ -102,8 +120,19 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
Register SrcReg = MI.getOperand(1).getReg();
unsigned Size = getSizeInBits(SrcReg, MRI, TRI);
- assert((Size == 32 || Size == 64) && "Unsupported floating point types!\n");
- OperandsMapping = getValueMapping(Size == 32 ? PMI_FPR32 : PMI_FPR64);
+ assert((Size == 32 || Size == 64 || Size == 128) &&
+ "Unsupported floating point types!\n");
+ switch (Size) {
+ case 32:
+ OperandsMapping = getValueMapping(PMI_FPR32);
+ break;
+ case 64:
+ OperandsMapping = getValueMapping(PMI_FPR64);
+ break;
+ case 128:
+ OperandsMapping = getValueMapping(PMI_VEC128);
+ break;
+ }
break;
}
case TargetOpcode::G_FCMP: {
@@ -118,6 +147,9 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case TargetOpcode::G_CONSTANT:
OperandsMapping = getOperandsMapping({getValueMapping(PMI_GPR64), nullptr});
break;
+ case TargetOpcode::G_CONSTANT_POOL:
+ OperandsMapping = getOperandsMapping({getValueMapping(PMI_GPR64), nullptr});
+ break;
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_FPTOSI: {
Register SrcReg = MI.getOperand(1).getReg();
@@ -182,6 +214,23 @@ PPCRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OperandsMapping = getOperandsMapping(OpdsMapping);
break;
}
+ case TargetOpcode::G_BITCAST: {
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ unsigned DstSize = DstTy.getSizeInBits();
+
+ bool DstIsGPR = !DstTy.isVector();
+ bool SrcIsGPR = !SrcTy.isVector();
+ // TODO: Currently, only vector and GPR register banks are handled.
+ // This needs to be extended to handle floating point register
+ // banks in the future.
+ const RegisterBank &DstRB = DstIsGPR ? PPC::GPRRegBank : PPC::VECRegBank;
+ const RegisterBank &SrcRB = SrcIsGPR ? PPC::GPRRegBank : PPC::VECRegBank;
+
+ return getInstructionMapping(
+ MappingID, Cost, getCopyMapping(DstRB.getID(), SrcRB.getID(), DstSize),
+ NumOperands);
+ }
default:
return getInvalidInstructionMapping();
}
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
index 885bdcb75816..c2a16c92ba85 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.h
@@ -32,7 +32,8 @@ protected:
PMI_GPR64 = 2,
PMI_FPR32 = 3,
PMI_FPR64 = 4,
- PMI_CR = 5,
+ PMI_VEC128 = 5,
+ PMI_CR = 6,
PMI_Min = PMI_GPR32,
};
diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
index 16f3bd8cf4a7..f2237d825cb4 100644
--- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
+++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBanks.td
@@ -15,5 +15,7 @@
def GPRRegBank : RegisterBank<"GPR", [G8RC, G8RC_NOX0]>;
/// Floating point Registers
def FPRRegBank : RegisterBank<"FPR", [VSSRC]>;
+/// Vector Registers
+def VECRegBank : RegisterBank<"VEC", [VSRC]>;
/// Condition Registers
def CRRegBank : RegisterBank<"CR", [CRRC]>;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 227bd59ba3a6..89d04dbe378e 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
@@ -178,7 +179,10 @@ public:
unsigned Other = S->getOther() << 2;
if ((Other & ELF::STO_PPC64_LOCAL_MASK) != 0)
return true;
- }
+ } else if (const auto *S = dyn_cast<MCSymbolXCOFF>(&A->getSymbol())) {
+ return !Target.isAbsolute() && S->isExternal() &&
+ S->getStorageClass() == XCOFF::C_WEAKEXT;
+ }
}
return false;
}
@@ -238,6 +242,8 @@ public:
createObjectTargetWriter() const override {
return createPPCXCOFFObjectWriter(TT.isArch64Bit());
}
+
+ std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;
};
} // end anonymous namespace
@@ -272,6 +278,13 @@ ELFPPCAsmBackend::getFixupKind(StringRef Name) const {
return std::nullopt;
}
+std::optional<MCFixupKind>
+XCOFFPPCAsmBackend::getFixupKind(StringRef Name) const {
+ return StringSwitch<std::optional<MCFixupKind>>(Name)
+ .Case("R_REF", (MCFixupKind)PPC::fixup_ppc_nofixup)
+ .Default(std::nullopt);
+}
+
MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
const MCSubtargetInfo &STI,
const MCRegisterInfo &MRI,
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
index dfc0409434de..2f03aa37745f 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp
@@ -184,7 +184,7 @@ void PPCELFStreamer::emitGOTToPCRelLabel(const MCInst &Inst) {
emitLabel(LabelSym, Inst.getLoc());
}
-// This funciton checks if the parameter Inst is part of the setup for a link
+// This function checks if the parameter Inst is part of the setup for a link
// time GOT PC Relative optimization. For example in this situation:
// <MCInst PLDpc <MCOperand Reg:282> <MCOperand Expr:(glob_double@got@pcrel)>
// <MCOperand Imm:0> <MCOperand Expr:(.Lpcrel@<<invalid>>)>>
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index df0c666f5b11..9e8ee9f23107 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -48,7 +48,8 @@ enum Fixups {
/// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the
/// TLS general and local dynamic models, or inserts the thread-pointer
- /// register number.
+ /// register number. It can also be used to tie the ref symbol to prevent it
+ /// from being garbage collected on AIX.
fixup_ppc_nofixup,
/// A 16-bit fixup corresponding to lo16(_foo) with implied 3 zero bits for
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
index 064d3d6916db..dbdfb6e906bb 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.cpp
@@ -161,7 +161,7 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
// On AIX, only emit the extended mnemonics for dcbt and dcbtst if
// the "modern assembler" is available.
if ((MI->getOpcode() == PPC::DCBT || MI->getOpcode() == PPC::DCBTST) &&
- (!TT.isOSAIX() || STI.getFeatureBits()[PPC::FeatureModernAIXAs])) {
+ (!TT.isOSAIX() || STI.hasFeature(PPC::FeatureModernAIXAs))) {
unsigned char TH = MI->getOperand(0).getImm();
O << "\tdcbt";
if (MI->getOpcode() == PPC::DCBTST)
@@ -170,7 +170,7 @@ void PPCInstPrinter::printInst(const MCInst *MI, uint64_t Address,
O << "t";
O << " ";
- bool IsBookE = STI.getFeatureBits()[PPC::FeatureBookE];
+ bool IsBookE = STI.hasFeature(PPC::FeatureBookE);
if (IsBookE && TH != 0 && TH != 16)
O << (unsigned int) TH << ", ";
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
index db17383df78c..6ba3eb4c79dc 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCInstPrinter.h
@@ -13,8 +13,8 @@
#ifndef LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCINSTPRINTER_H
#define LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCINSTPRINTER_H
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 21b368e70885..a5dc0b45b13c 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "PPCMCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
using namespace llvm;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index fa9e69f2e607..da0174ce1982 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -10,12 +10,11 @@
//
//===----------------------------------------------------------------------===//
+#include "PPCMCCodeEmitter.h"
#include "MCTargetDesc/PPCFixupKinds.h"
#include "PPCInstrInfo.h"
-#include "PPCMCCodeEmitter.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -24,6 +23,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <cassert>
#include <cstdint>
@@ -147,87 +147,70 @@ PPCMCCodeEmitter::getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo,
(MCFixupKind)PPC::fixup_ppc_pcrel34);
}
-unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- // Encode (imm, reg) as a memri, which has the low 16-bits as the
- // displacement and the next 5 bits as the register #.
- assert(MI.getOperand(OpNo+1).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 16;
-
+unsigned PPCMCCodeEmitter::getDispRIEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
- return (getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF) | RegBits;
+ return getMachineOpValue(MI, MO, Fixups, STI) & 0xFFFF;
// Add a fixup for the displacement field.
Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16));
- return RegBits;
+ return 0;
}
-unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- // Encode (imm, reg) as a memrix, which has the low 14-bits as the
- // displacement and the next 5 bits as the register #.
- assert(MI.getOperand(OpNo+1).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 14;
-
+unsigned
+PPCMCCodeEmitter::getDispRIXEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
- return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF) | RegBits;
+ return ((getMachineOpValue(MI, MO, Fixups, STI) >> 2) & 0x3FFF);
// Add a fixup for the displacement field.
Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16ds));
- return RegBits;
+ return 0;
}
-unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
+unsigned
+PPCMCCodeEmitter::getDispRIX16Encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- // Encode (imm, reg) as a memrix16, which has the low 12-bits as the
- // displacement and the next 5 bits as the register #.
- assert(MI.getOperand(OpNo+1).isReg());
- unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 12;
-
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm()) {
assert(!(MO.getImm() % 16) &&
"Expecting an immediate that is a multiple of 16");
- return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF) | RegBits;
+ return ((getMachineOpValue(MI, MO, Fixups, STI) >> 4) & 0xFFF);
}
// Otherwise add a fixup for the displacement field.
Fixups.push_back(MCFixup::create(IsLittleEndian ? 0 : 2, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_half16dq));
- return RegBits;
+ return 0;
}
unsigned
-PPCMCCodeEmitter::getMemRIHashEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- // Encode (imm, reg) for the hash load/store to stack for the ROP Protection
+PPCMCCodeEmitter::getDispRIHashEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode imm for the hash load/store to stack for the ROP Protection
// instructions.
- const MCOperand &RegMO = MI.getOperand(OpNo + 1);
const MCOperand &MO = MI.getOperand(OpNo);
- assert(RegMO.isReg() && "Base address must be a register.");
assert(MO.isImm() && "Expecting an immediate operand.");
assert(!(MO.getImm() % 8) && "Expecting offset to be 8 byte aligned.");
- unsigned RegBits = getMachineOpValue(MI, RegMO, Fixups, STI) << 6;
unsigned DX = (MO.getImm() >> 3) & 0x3F;
- return RegBits | DX;
+ return DX;
}
uint64_t
-PPCMCCodeEmitter::getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- // Encode the PCRelative version of memri34: imm34(r0).
- // In the PC relative version the register for the address must be zero.
+PPCMCCodeEmitter::getDispRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode the displacement part of pc-relative memri34, which is an imm34.
// The 34 bit immediate can fall into one of three cases:
// 1) It is a relocation to be filled in by the linker represented as:
// (MCExpr::SymbolRef)
@@ -235,17 +218,11 @@ PPCMCCodeEmitter::getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
// (MCExpr::Binary(MCExpr::SymbolRef + MCExpr::Constant))
// 3) It is a known value at compile time.
- // Make sure that the register is a zero as expected.
- assert(MI.getOperand(OpNo + 1).isImm() && "Expecting an immediate.");
- uint64_t RegBits =
- getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI) << 34;
- assert(RegBits == 0 && "Operand must be 0.");
-
// If this is not a MCExpr then we are in case 3) and we are dealing with
// a value known at compile time, not a relocation.
const MCOperand &MO = MI.getOperand(OpNo);
if (!MO.isExpr())
- return ((getMachineOpValue(MI, MO, Fixups, STI)) & 0x3FFFFFFFFUL) | RegBits;
+ return (getMachineOpValue(MI, MO, Fixups, STI)) & 0x3FFFFFFFFUL;
// At this point in the function it is known that MO is of type MCExpr.
// Therefore we are dealing with either case 1) a symbol ref or
@@ -313,61 +290,42 @@ PPCMCCodeEmitter::getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
}
uint64_t
-PPCMCCodeEmitter::getMemRI34Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- // Encode (imm, reg) as a memri34, which has the low 34-bits as the
- // displacement and the next 5 bits as the register #.
- assert(MI.getOperand(OpNo + 1).isReg() && "Expecting a register.");
- uint64_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo + 1), Fixups, STI)
- << 34;
+PPCMCCodeEmitter::getDispRI34Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode the displacement part of a memri34.
const MCOperand &MO = MI.getOperand(OpNo);
- return ((getMachineOpValue(MI, MO, Fixups, STI)) & 0x3FFFFFFFFUL) | RegBits;
+ return (getMachineOpValue(MI, MO, Fixups, STI)) & 0x3FFFFFFFFUL;
}
-unsigned PPCMCCodeEmitter::getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI)
- const {
- // Encode (imm, reg) as a spe8dis, which has the low 5-bits of (imm / 8)
- // as the displacement and the next 5 bits as the register #.
- assert(MI.getOperand(OpNo+1).isReg());
- uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
-
+unsigned
+PPCMCCodeEmitter::getDispSPE8Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode imm as a dispSPE8, which has the low 5-bits of (imm / 8).
const MCOperand &MO = MI.getOperand(OpNo);
assert(MO.isImm());
- uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 3;
- return reverseBits(Imm | RegBits) >> 22;
+ return getMachineOpValue(MI, MO, Fixups, STI) >> 3;
}
-unsigned PPCMCCodeEmitter::getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI)
- const {
- // Encode (imm, reg) as a spe4dis, which has the low 5-bits of (imm / 4)
- // as the displacement and the next 5 bits as the register #.
- assert(MI.getOperand(OpNo+1).isReg());
- uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
-
+unsigned
+PPCMCCodeEmitter::getDispSPE4Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode imm as a dispSPE8, which has the low 5-bits of (imm / 4).
const MCOperand &MO = MI.getOperand(OpNo);
assert(MO.isImm());
- uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 2;
- return reverseBits(Imm | RegBits) >> 22;
+ return getMachineOpValue(MI, MO, Fixups, STI) >> 2;
}
-unsigned PPCMCCodeEmitter::getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI)
- const {
- // Encode (imm, reg) as a spe2dis, which has the low 5-bits of (imm / 2)
- // as the displacement and the next 5 bits as the register #.
- assert(MI.getOperand(OpNo+1).isReg());
- uint32_t RegBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups, STI) << 5;
-
+unsigned
+PPCMCCodeEmitter::getDispSPE2Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ // Encode imm as a dispSPE8, which has the low 5-bits of (imm / 2).
const MCOperand &MO = MI.getOperand(OpNo);
assert(MO.isImm());
- uint32_t Imm = getMachineOpValue(MI, MO, Fixups, STI) >> 1;
- return reverseBits(Imm | RegBits) >> 22;
+ return getMachineOpValue(MI, MO, Fixups, STI) >> 1;
}
unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
@@ -449,7 +407,8 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
return MO.getImm();
}
-void PPCMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void PPCMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
@@ -461,13 +420,13 @@ void PPCMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
case 0:
break;
case 4:
- support::endian::write<uint32_t>(OS, Bits, E);
+ support::endian::write<uint32_t>(CB, Bits, E);
break;
case 8:
// If we emit a pair of instructions, the first one is
// always in the top 32 bits, even on little-endian.
- support::endian::write<uint32_t>(OS, Bits >> 32, E);
- support::endian::write<uint32_t>(OS, Bits, E);
+ support::endian::write<uint32_t>(CB, Bits >> 32, E);
+ support::endian::write<uint32_t>(CB, Bits, E);
break;
default:
llvm_unreachable("Invalid instruction size");
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
index c4d4d35a6665..17a15ef18cb7 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.h
@@ -60,33 +60,33 @@ public:
uint64_t getImm34EncodingPCRel(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
+ unsigned getDispRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- unsigned getMemRIX16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getMemRIHashEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint64_t getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint64_t getMemRI34Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getSPE8DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getSPE4DisEncoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- unsigned getSPE2DisEncoding(const MCInst &MI, unsigned OpNo,
+ unsigned getDispRIXEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ unsigned getDispRIX16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getDispRIHashEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getDispRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t getDispRI34Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getDispSPE8Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getDispSPE4Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ unsigned getDispSPE2Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
@@ -112,7 +112,7 @@ public:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 0ea50e6b509b..271f7ab757e1 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -19,7 +19,6 @@
#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCAsmBackend.h"
#include "llvm/MC/MCAssembler.h"
@@ -44,6 +43,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
@@ -147,12 +147,11 @@ public:
MCSymbolXCOFF *TCSym =
cast<MCSectionXCOFF>(Streamer.getCurrentSectionOnly())
->getQualNameSymbol();
- // If the variant kind is VK_PPC_AIX_TLSGDM the entry represents the
- // region handle for the symbol, we add the relocation specifier @m.
- // If the variant kind is VK_PPC_AIX_TLSGD the entry represents the
- // variable offset for the symbol, we add the relocation specifier @gd.
+ // On AIX, we have a region handle (symbol@m) and the variable offset
+ // (symbol@{gd|le}) for TLS variables, depending on the TLS model.
if (Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGD ||
- Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM)
+ Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSGDM ||
+ Kind == MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE)
OS << "\t.tc " << TCSym->getName() << "," << XSym->getName() << "@"
<< MCSymbolRefExpr::getVariantKindName(Kind) << '\n';
else
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index e4521aebad7e..86ca1386fed9 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -60,17 +60,17 @@ static inline bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
if (isShiftedMask_32(Val)) {
// look for the first non-zero bit
- MB = countLeadingZeros(Val);
+ MB = llvm::countl_zero(Val);
// look for the first zero bit after the run of ones
- ME = countLeadingZeros((Val - 1) ^ Val);
+ ME = llvm::countl_zero((Val - 1) ^ Val);
return true;
} else {
Val = ~Val; // invert mask
if (isShiftedMask_32(Val)) {
// effectively look for the first zero bit
- ME = countLeadingZeros(Val) - 1;
+ ME = llvm::countl_zero(Val) - 1;
// effectively look for the first one bit after the run of zeros
- MB = countLeadingZeros((Val - 1) ^ Val) + 1;
+ MB = llvm::countl_zero((Val - 1) ^ Val) + 1;
return true;
}
}
@@ -84,17 +84,17 @@ static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) {
if (isShiftedMask_64(Val)) {
// look for the first non-zero bit
- MB = countLeadingZeros(Val);
+ MB = llvm::countl_zero(Val);
// look for the first zero bit after the run of ones
- ME = countLeadingZeros((Val - 1) ^ Val);
+ ME = llvm::countl_zero((Val - 1) ^ Val);
return true;
} else {
Val = ~Val; // invert mask
if (isShiftedMask_64(Val)) {
// effectively look for the first zero bit
- ME = countLeadingZeros(Val) - 1;
+ ME = llvm::countl_zero(Val) - 1;
// effectively look for the first one bit after the run of zeros
- MB = countLeadingZeros((Val - 1) ^ Val) + 1;
+ MB = llvm::countl_zero((Val - 1) ^ Val) + 1;
return true;
}
}
@@ -136,6 +136,12 @@ static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) {
X##22, X##23, X##24, X##25, X##26, X##27, X##28, X##29, X##30, X##31 \
}
+#define PPC_REGS_EVEN0_30(X) \
+ { \
+ X##0, X##2, X##4, X##6, X##8, X##10, X##12, X##14, X##16, X##18, X##20, \
+ X##22, X##24, X##26, X##28, X##30 \
+ }
+
#define PPC_REGS0_63(X) \
{ \
X##0, X##1, X##2, X##3, X##4, X##5, X##6, X##7, X##8, X##9, X##10, X##11, \
@@ -178,41 +184,34 @@ static inline bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME) {
using llvm::MCPhysReg;
-#define DEFINE_PPC_REGCLASSES \
- static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \
- static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \
- static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \
- static const MCPhysReg VSRpRegs[32] = PPC_REGS0_31(PPC::VSRp); \
- static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \
- static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \
- static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \
- static const MCPhysReg RRegsNoR0[32] = \
- PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \
- static const MCPhysReg XRegsNoX0[32] = \
- PPC_REGS_NO0_31(PPC::ZERO8, PPC::X); \
- static const MCPhysReg VSRegs[64] = \
- PPC_REGS_LO_HI(PPC::VSL, PPC::V); \
- static const MCPhysReg VSFRegs[64] = \
- PPC_REGS_LO_HI(PPC::F, PPC::VF); \
- static const MCPhysReg VSSRegs[64] = \
- PPC_REGS_LO_HI(PPC::F, PPC::VF); \
- static const MCPhysReg CRBITRegs[32] = { \
- PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, \
- PPC::CR1LT, PPC::CR1GT, PPC::CR1EQ, PPC::CR1UN, \
- PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, \
- PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, \
- PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, \
- PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \
- PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, \
- PPC::CR7LT, PPC::CR7GT, PPC::CR7EQ, PPC::CR7UN}; \
- static const MCPhysReg CRRegs[8] = PPC_REGS0_7(PPC::CR); \
- static const MCPhysReg ACCRegs[8] = PPC_REGS0_7(PPC::ACC); \
- static const MCPhysReg WACCRegs[8] = PPC_REGS0_7(PPC::WACC); \
- static const MCPhysReg WACC_HIRegs[8] = PPC_REGS0_7(PPC::WACC_HI); \
- static const MCPhysReg DMRROWpRegs[32] = PPC_REGS0_31(PPC::DMRROWp); \
- static const MCPhysReg DMRROWRegs[64] = PPC_REGS0_63(PPC::DMRROW); \
- static const MCPhysReg DMRRegs[8] = PPC_REGS0_7(PPC::DMR); \
+#define DEFINE_PPC_REGCLASSES \
+ static const MCPhysReg RRegs[32] = PPC_REGS0_31(PPC::R); \
+ static const MCPhysReg XRegs[32] = PPC_REGS0_31(PPC::X); \
+ static const MCPhysReg FRegs[32] = PPC_REGS0_31(PPC::F); \
+ static const MCPhysReg FpRegs[16] = PPC_REGS_EVEN0_30(PPC::Fpair); \
+ static const MCPhysReg VSRpRegs[32] = PPC_REGS0_31(PPC::VSRp); \
+ static const MCPhysReg SPERegs[32] = PPC_REGS0_31(PPC::S); \
+ static const MCPhysReg VFRegs[32] = PPC_REGS0_31(PPC::VF); \
+ static const MCPhysReg VRegs[32] = PPC_REGS0_31(PPC::V); \
+ static const MCPhysReg RRegsNoR0[32] = PPC_REGS_NO0_31(PPC::ZERO, PPC::R); \
+ static const MCPhysReg XRegsNoX0[32] = PPC_REGS_NO0_31(PPC::ZERO8, PPC::X); \
+ static const MCPhysReg VSRegs[64] = PPC_REGS_LO_HI(PPC::VSL, PPC::V); \
+ static const MCPhysReg VSFRegs[64] = PPC_REGS_LO_HI(PPC::F, PPC::VF); \
+ static const MCPhysReg VSSRegs[64] = PPC_REGS_LO_HI(PPC::F, PPC::VF); \
+ static const MCPhysReg CRBITRegs[32] = { \
+ PPC::CR0LT, PPC::CR0GT, PPC::CR0EQ, PPC::CR0UN, PPC::CR1LT, PPC::CR1GT, \
+ PPC::CR1EQ, PPC::CR1UN, PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, \
+ PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, PPC::CR4LT, PPC::CR4GT, \
+ PPC::CR4EQ, PPC::CR4UN, PPC::CR5LT, PPC::CR5GT, PPC::CR5EQ, PPC::CR5UN, \
+ PPC::CR6LT, PPC::CR6GT, PPC::CR6EQ, PPC::CR6UN, PPC::CR7LT, PPC::CR7GT, \
+ PPC::CR7EQ, PPC::CR7UN}; \
+ static const MCPhysReg CRRegs[8] = PPC_REGS0_7(PPC::CR); \
+ static const MCPhysReg ACCRegs[8] = PPC_REGS0_7(PPC::ACC); \
+ static const MCPhysReg WACCRegs[8] = PPC_REGS0_7(PPC::WACC); \
+ static const MCPhysReg WACC_HIRegs[8] = PPC_REGS0_7(PPC::WACC_HI); \
+ static const MCPhysReg DMRROWpRegs[32] = PPC_REGS0_31(PPC::DMRROWp); \
+ static const MCPhysReg DMRROWRegs[64] = PPC_REGS0_63(PPC::DMRROW); \
+ static const MCPhysReg DMRRegs[8] = PPC_REGS0_7(PPC::DMR); \
static const MCPhysReg DMRpRegs[4] = PPC_REGS0_3(PPC::DMRp);
-
#endif // LLVM_LIB_TARGET_POWERPC_MCTARGETDESC_PPCMCTARGETDESC_H
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
index 729cb35cbebc..df671f53cbd8 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp
@@ -90,6 +90,12 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
return {XCOFF::RelocationType::R_RBR, EncodedSignednessIndicator | 25};
case PPC::fixup_ppc_br24abs:
return {XCOFF::RelocationType::R_RBA, EncodedSignednessIndicator | 25};
+ case PPC::fixup_ppc_nofixup: {
+ if (Modifier == MCSymbolRefExpr::VK_None)
+ return {XCOFF::RelocationType::R_REF, 0};
+ else
+ llvm_unreachable("Unsupported Modifier");
+ } break;
case FK_Data_4:
case FK_Data_8:
const uint8_t SignAndSizeForFKData =
@@ -102,6 +108,8 @@ std::pair<uint8_t, uint8_t> PPCXCOFFObjectWriter::getRelocTypeAndSignSize(
return {XCOFF::RelocationType::R_TLS, SignAndSizeForFKData};
case MCSymbolRefExpr::VK_PPC_AIX_TLSGDM:
return {XCOFF::RelocationType::R_TLSM, SignAndSizeForFKData};
+ case MCSymbolRefExpr::VK_PPC_AIX_TLSLE:
+ return {XCOFF::RelocationType::R_TLS_LE, SignAndSizeForFKData};
case MCSymbolRefExpr::VK_None:
return {XCOFF::RelocationType::R_POS, SignAndSizeForFKData};
}
diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index f7d07a06c33e..0827e528a80f 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
// Automatically generated file, do not edit!
//
-// This file defines the itinerary class data for the POWER10 processor.
+// This file defines instruction data for SchedModel of the POWER10 processor.
//
//===----------------------------------------------------------------------===//
// 22 Cycles Binary Floating Point operations, 2 input operands
@@ -307,41 +307,32 @@ def : InstRW<[P10W_BF_7C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
FSELD_rec, FSELS_rec
)>;
-// 2 Cycles Branch operations, 0 input operands
-def : InstRW<[P10W_BR_2C, P10W_DISP_ANY],
- (instrs
- BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR,
- BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL,
- BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS
-)>;
-
// 2 Cycles Branch operations, 1 input operands
def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read],
(instrs
B, BCC, BCCA, BCCCTR, BCCCTR8, BCCCTRL, BCCCTRL8, BCCL, BCCLA, BCCLR, BCCLRL, CTRL_DEP, TAILB, TAILB8,
BA, TAILBA, TAILBA8,
- BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL8_LDinto_toc_RM, BCTRL8_RM, BCTRL_LWZinto_toc, BCTRL_LWZinto_toc_RM, BCTRL_RM, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
- BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat,
- BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM
-)>;
-
-// 2 Cycles Branch operations, 3 input operands
-def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read],
- (instrs
BCCTR, BCCTR8, BCCTR8n, BCCTRn, gBCCTR,
- BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, gBCCTRL
+ BCCTRL, BCCTRL8, BCCTRL8n, BCCTRLn, gBCCTRL,
+ BCLR, BCLRn, BDNZLR, BDNZLR8, BDNZLRm, BDNZLRp, BDZLR, BDZLR8, BDZLRm, BDZLRp, gBCLR,
+ BCLRL, BCLRLn, BDNZLRL, BDNZLRLm, BDNZLRLp, BDZLRL, BDZLRLm, BDZLRLp, gBCLRL,
+ BL, BL8, BL8_NOP, BL8_NOP_RM, BL8_NOP_TLS, BL8_NOTOC, BL8_NOTOC_RM, BL8_NOTOC_TLS, BL8_RM, BL8_TLS, BL8_TLS_, BLR, BLR8, BLRL, BL_NOP, BL_NOP_RM, BL_RM, BL_TLS,
+ BLA, BLA8, BLA8_NOP, BLA8_NOP_RM, BLA8_RM, BLA_RM
)>;
-// 2 Cycles Branch operations, 4 input operands
-def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read, P10BR_Read, P10BR_Read],
+// 2 Cycles Branch operations, 2 input operands
+def : InstRW<[P10W_BR_2C, P10W_DISP_ANY, P10BR_Read, P10BR_Read],
(instrs
+ BC, BCTR, BCTR8, BCTRL, BCTRL8, BCTRL8_LDinto_toc, BCTRL8_LDinto_toc_RM, BCTRL8_RM, BCTRL_LWZinto_toc, BCTRL_LWZinto_toc_RM, BCTRL_RM, BCn, BDNZ, BDNZ8, BDNZm, BDNZp, BDZ, BDZ8, BDZm, BDZp, TAILBCTR, TAILBCTR8, gBC, gBCat,
BDNZA, BDNZAm, BDNZAp, BDZA, BDZAm, BDZAp, gBCA, gBCAat,
+ BCL, BCLalways, BCLn, BDNZL, BDNZLm, BDNZLp, BDZL, BDZLm, BDZLp, gBCL, gBCLat,
BDNZLA, BDNZLAm, BDNZLAp, BDZLA, BDZLAm, BDZLAp, gBCLA, gBCLAat
)>;
// 7 Cycles Crypto operations, 1 input operands
def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read],
(instrs
+ VGNB,
VSBOX
)>;
@@ -358,7 +349,6 @@ def : InstRW<[P10W_CY_7C, P10W_DISP_ANY, P10CY_Read, P10CY_Read],
VCIPHERLAST,
VCLZDM,
VCTZDM,
- VGNB,
VNCIPHER,
VNCIPHERLAST,
VPDEPD,
@@ -384,29 +374,24 @@ def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read],
XSCVSDQP,
XSCVSQQP,
XSCVUDQP,
- XSCVUQQP
+ XSCVUQQP,
+ XSRQPI,
+ XSRQPIX,
+ XSRQPXP
)>;
// 13 Cycles Decimal Floating Point operations, 2 input operands
def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
(instrs
+ BCDSR_rec,
XSADDQP,
XSADDQPO,
XSSUBQP,
XSSUBQPO
)>;
-// 13 Cycles Decimal Floating Point operations, 3 input operands
-def : InstRW<[P10W_DF_13C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
- (instrs
- BCDSR_rec,
- XSRQPI,
- XSRQPIX,
- XSRQPXP
-)>;
-
// 2-way crack instructions
-// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 2 input operands
+// 13 Cycles Decimal Floating Point operations, and 3 Cycles Store operations, 1 input operands
def : InstRW<[P10W_DF_13C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY],
(instrs
HASHST, HASHST8,
@@ -439,8 +424,8 @@ def : InstRW<[P10W_DF_25C, P10W_DISP_ANY, P10DF_Read, P10DF_Read, P10DF_Read],
XSNMSUBQPO
)>;
-// 38 Cycles Decimal Floating Point operations, 2 input operands
-def : InstRW<[P10W_DF_38C, P10W_DISP_ANY, P10DF_Read, P10DF_Read],
+// 38 Cycles Decimal Floating Point operations, 1 input operands
+def : InstRW<[P10W_DF_38C, P10W_DISP_ANY, P10DF_Read],
(instrs
BCDCFSQ_rec
)>;
@@ -594,20 +579,26 @@ def : InstRW<[P10W_DV_83C, P10W_DISP_ANY, P10DV_Read, P10DV_Read],
// 5 Cycles Fixed-Point and BCD operations, 1 input operands
def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read],
(instrs
+ BCDCFN_rec,
+ BCDCFZ_rec,
BCDCTN_rec,
+ BCDCTZ_rec,
+ BCDSETSGN_rec,
VMUL10CUQ,
VMUL10UQ,
- XSXSIGQP
+ XSTSTDCQP,
+ XSXSIGQP,
+ XXGENPCVBM
)>;
// 5 Cycles Fixed-Point and BCD operations, 2 input operands
def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
(instrs
- BCDCFN_rec,
- BCDCFZ_rec,
+ BCDADD_rec,
BCDCPSGN_rec,
- BCDCTZ_rec,
- BCDSETSGN_rec,
+ BCDS_rec,
+ BCDSUB_rec,
+ BCDTRUNC_rec,
BCDUS_rec,
BCDUTRUNC_rec,
VADDCUQ,
@@ -623,18 +614,12 @@ def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
XSCMPOQP,
XSCMPUQP,
XSMAXCQP,
- XSMINCQP,
- XSTSTDCQP,
- XXGENPCVBM
+ XSMINCQP
)>;
// 5 Cycles Fixed-Point and BCD operations, 3 input operands
def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read],
(instrs
- BCDADD_rec,
- BCDS_rec,
- BCDSUB_rec,
- BCDTRUNC_rec,
VADDECUQ,
VADDEUQM,
VSUBECUQ,
@@ -644,12 +629,14 @@ def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read, P10DX_Read],
// 4 Cycles ALU2 operations, 0 input operands
def : InstRW<[P10W_F2_4C, P10W_DISP_ANY],
(instrs
- TRAP, TW
+ MTVSRBMI
)>;
// 4 Cycles ALU2 operations, 1 input operands
def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
(instrs
+ CBCDTD, CBCDTD8,
+ CDTBCD, CDTBCD8,
CNTLZD,
CNTLZD_rec,
CNTLZW, CNTLZW8,
@@ -658,9 +645,9 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
CNTTZD_rec,
CNTTZW, CNTTZW8,
CNTTZW8_rec, CNTTZW_rec,
+ EXTSWSLI_32_64_rec, EXTSWSLI_rec,
FTSQRT,
MTVSRBM,
- MTVSRBMI,
MTVSRDM,
MTVSRHM,
MTVSRQM,
@@ -668,10 +655,18 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
POPCNTB, POPCNTB8,
POPCNTD,
POPCNTW,
+ RLDIC_rec,
+ RLDICL_32_rec, RLDICL_rec,
+ RLDICR_rec,
+ RLWINM8_rec, RLWINM_rec,
VCLZB,
VCLZD,
VCLZH,
VCLZW,
+ VCNTMBB,
+ VCNTMBD,
+ VCNTMBH,
+ VCNTMBW,
VCTZB,
VCTZD,
VCTZH,
@@ -692,27 +687,40 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read],
VPOPCNTW,
VPRTYBD,
VPRTYBW,
+ VSHASIGMAD,
+ VSHASIGMAW,
XSCVHPDP,
XSCVSPDPN,
XSTSQRTDP,
+ XSTSTDCDP,
+ XSTSTDCSP,
XVCVHPSP,
XVTLSBB,
XVTSQRTDP,
- XVTSQRTSP
+ XVTSQRTSP,
+ XVTSTDCDP,
+ XVTSTDCSP
)>;
// 4 Cycles ALU2 operations, 2 input operands
def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
(instrs
CMPEQB,
- EXTSWSLI_32_64_rec, EXTSWSLI_rec,
+ CMPRB, CMPRB8,
FCMPOD, FCMPOS,
FCMPUD, FCMPUS,
FTDIV,
+ RLDCL_rec,
+ RLDCR_rec,
+ RLDIMI_rec,
+ RLWIMI8_rec, RLWIMI_rec,
+ RLWNM8_rec, RLWNM_rec,
SLD_rec,
SLW8_rec, SLW_rec,
SRD_rec,
SRW8_rec, SRW_rec,
+ TDI,
+ TWI,
VABSDUB,
VABSDUH,
VABSDUW,
@@ -763,10 +771,6 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
VCMPNEZW_rec,
VCMPSQ,
VCMPUQ,
- VCNTMBB,
- VCNTMBD,
- VCNTMBH,
- VCNTMBW,
VMAXFP,
VMINFP,
VSUBCUW,
@@ -789,8 +793,6 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
XSMINDP,
XSMINJDP,
XSTDIVDP,
- XSTSTDCDP,
- XSTSTDCSP,
XVCMPEQDP,
XVCMPEQDP_rec,
XVCMPEQSP,
@@ -808,39 +810,24 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
XVMINDP,
XVMINSP,
XVTDIVDP,
- XVTDIVSP,
- XVTSTDCDP,
- XVTSTDCSP
+ XVTDIVSP
)>;
// 4 Cycles ALU2 operations, 3 input operands
def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
(instrs
- CMPRB, CMPRB8,
- RLDCL_rec,
- RLDCR_rec,
- RLDIC_rec,
- RLDICL_32_rec, RLDICL_rec,
- RLDICR_rec,
TD,
- TDI,
- TWI,
- VSHASIGMAD,
- VSHASIGMAW
-)>;
-
-// 4 Cycles ALU2 operations, 4 input operands
-def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
- (instrs
- RLDIMI_rec,
- RLWINM8_rec, RLWINM_rec,
- RLWNM8_rec, RLWNM_rec
+ TRAP, TW
)>;
-// 4 Cycles ALU2 operations, 5 input operands
-def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read, P10F2_Read],
+// Single crack instructions
+// 4 Cycles ALU2 operations, 1 input operands
+def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read],
(instrs
- RLWIMI8_rec, RLWIMI_rec
+ SRADI_rec,
+ SRAWI_rec,
+ TABORTDCI,
+ TABORTWCI
)>;
// Single crack instructions
@@ -848,19 +835,9 @@ def : InstRW<[P10W_F2_4C, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read, P10
def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read],
(instrs
SRAD_rec,
- SRADI_rec,
SRAW_rec,
- SRAWI_rec
-)>;
-
-// Single crack instructions
-// 4 Cycles ALU2 operations, 3 input operands
-def : InstRW<[P10W_F2_4C, P10W_DISP_EVEN, P10W_DISP_ANY, P10F2_Read, P10F2_Read, P10F2_Read],
- (instrs
TABORTDC,
- TABORTDCI,
- TABORTWC,
- TABORTWCI
+ TABORTWC
)>;
// 2-way crack instructions
@@ -898,32 +875,34 @@ def : InstRW<[P10W_FX_2C, P10W_DISP_ANY, P10FX_Read],
// 3 Cycles ALU operations, 0 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_ANY],
(instrs
- CR6SET, CREQV, CRSET,
DSS, DSSALL,
MCRXRX,
MFCTR, MFCTR8,
MFLR, MFLR8,
- NOP, NOP_GT_PWR6, NOP_GT_PWR7, ORI, ORI8,
- VXOR, V_SET0, V_SET0B, V_SET0H,
- XXLEQV, XXLEQVOnes,
- XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz
+ WAIT
)>;
// 3 Cycles ALU operations, 1 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
(instrs
- ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8,
- ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8,
+ ADDI, ADDI8, ADDIdtprelL32, ADDItlsldLADDR32, ADDItocL, LI, LI8,
+ ADDIC, ADDIC8,
+ ADDIS, ADDIS8, ADDISdtprelHA32, ADDIStocHA, ADDIStocHA8, LIS, LIS8,
ADDME, ADDME8,
ADDME8O, ADDMEO,
ADDZE, ADDZE8,
ADDZE8O, ADDZEO,
+ ANDI8_rec, ANDI_rec,
+ ANDIS8_rec, ANDIS_rec,
+ CMPDI, CMPWI,
+ CMPLDI, CMPLWI,
EXTSB, EXTSB8, EXTSB8_32_64,
EXTSB8_rec, EXTSB_rec,
EXTSH, EXTSH8, EXTSH8_32_64,
EXTSH8_rec, EXTSH_rec,
EXTSW, EXTSW_32, EXTSW_32_64,
EXTSW_32_64_rec, EXTSW_rec,
+ EXTSWSLI, EXTSWSLI_32_64,
FABSD, FABSS,
FMR,
FNABSD, FNABSS,
@@ -939,11 +918,20 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
NEG, NEG8,
NEG8_rec, NEG_rec,
NEG8O, NEGO,
+ NOP, NOP_GT_PWR6, NOP_GT_PWR7, ORI, ORI8,
+ ORIS, ORIS8,
+ RLDIC,
+ RLDICL, RLDICL_32, RLDICL_32_64,
+ RLDICR, RLDICR_32,
+ RLWINM, RLWINM8,
SETB, SETB8,
SETBC, SETBC8,
SETBCR, SETBCR8,
SETNBC, SETNBC8,
SETNBCR, SETNBCR8,
+ SRADI, SRADI_32,
+ SRAWI,
+ SUBFIC, SUBFIC8,
SUBFME, SUBFME8,
SUBFME8O, SUBFMEO,
SUBFZE, SUBFZE8,
@@ -956,7 +944,8 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read],
VEXTSW2D, VEXTSW2Ds,
VNEGD,
VNEGW,
- WAIT,
+ XORI, XORI8,
+ XORIS, XORIS8,
XSABSDP,
XSABSQP,
XSNABSDP, XSNABSDPs,
@@ -985,29 +974,27 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
ADD4_rec, ADD8_rec,
ADDE, ADDE8,
ADDE8O, ADDEO,
- ADDIC, ADDIC8,
+ ADDEX, ADDEX8,
ADD4O, ADD8O,
AND, AND8,
AND8_rec, AND_rec,
ANDC, ANDC8,
ANDC8_rec, ANDC_rec,
- ANDI8_rec, ANDI_rec,
- ANDIS8_rec, ANDIS_rec,
CMPD, CMPW,
CMPB, CMPB8,
- CMPDI, CMPWI,
CMPLD, CMPLW,
- CMPLDI, CMPLWI,
CRAND,
CRANDC,
+ CR6SET, CREQV, CRSET,
CRNAND,
CRNOR,
CROR,
CRORC,
CR6UNSET, CRUNSET, CRXOR,
+ DST, DST64, DSTT, DSTT64,
+ DSTST, DSTST64, DSTSTT, DSTSTT64,
EQV, EQV8,
EQV8_rec, EQV_rec,
- EXTSWSLI, EXTSWSLI_32_64,
FCPSGND, FCPSGNS,
NAND, NAND8,
NAND8_rec, NAND_rec,
@@ -1017,20 +1004,21 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
OR8_rec, OR_rec,
ORC, ORC8,
ORC8_rec, ORC_rec,
- ORIS, ORIS8,
+ RLDCL,
+ RLDCR,
+ RLDIMI,
+ RLWIMI, RLWIMI8,
+ RLWNM, RLWNM8,
SLD,
SLW, SLW8,
SRAD,
- SRADI, SRADI_32,
SRAW,
- SRAWI,
SRD,
SRW, SRW8,
SUBF, SUBF8,
SUBF8_rec, SUBF_rec,
SUBFE, SUBFE8,
SUBFE8O, SUBFEO,
- SUBFIC, SUBFIC8,
SUBF8O, SUBFO,
VADDUBM,
VADDUDM,
@@ -1101,10 +1089,9 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
VSUBUDM,
VSUBUHM,
VSUBUWM,
+ VXOR, V_SET0, V_SET0B, V_SET0H,
XOR, XOR8,
XOR8_rec, XOR_rec,
- XORI, XORI8,
- XORIS, XORIS8,
XSCPSGNDP,
XSCPSGNQP,
XSIEXPDP,
@@ -1115,69 +1102,52 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
XVIEXPSP,
XXLAND,
XXLANDC,
+ XXLEQV, XXLEQVOnes,
XXLNAND,
XXLNOR,
XXLOR, XXLORf,
- XXLORC
+ XXLORC,
+ XXLXOR, XXLXORdpz, XXLXORspz, XXLXORz
)>;
// 3 Cycles ALU operations, 3 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
(instrs
- ADDEX, ADDEX8,
- DST, DST64, DSTT, DSTT64,
- DSTST, DSTST64, DSTSTT, DSTSTT64,
ISEL, ISEL8,
- RLDCL,
- RLDCR,
- RLDIC,
- RLDICL, RLDICL_32, RLDICL_32_64,
- RLDICR, RLDICR_32,
VRLDMI,
VRLWMI,
VSEL,
XXSEL
)>;
-// 3 Cycles ALU operations, 4 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
- (instrs
- RLDIMI,
- RLWINM, RLWINM8,
- RLWNM, RLWNM8
-)>;
-
-// 3 Cycles ALU operations, 5 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
- (instrs
- RLWIMI, RLWIMI8
-)>;
-
// Single crack instructions
// 3 Cycles ALU operations, 0 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
(instrs
MFFS,
MFFS_rec,
+ MFFSCDRNI,
+ MFFSCRNI,
MFFSL,
MFVSCR,
- TRECHKPT
+ MTFSB0,
+ TBEGIN,
+ TRECHKPT,
+ TSR
)>;
// Single crack instructions
// 3 Cycles ALU operations, 1 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read],
(instrs
+ ADDIC_rec,
ADDME8_rec, ADDME_rec,
ADDME8O_rec, ADDMEO_rec,
ADDZE8_rec, ADDZE_rec,
ADDZE8O_rec, ADDZEO_rec,
MCRFS,
MFFSCDRN,
- MFFSCDRNI,
MFFSCRN,
- MFFSCRNI,
- MTFSB0,
MTVSCR,
NEG8O_rec, NEGO_rec,
SUBFME8_rec, SUBFME_rec,
@@ -1185,9 +1155,7 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read],
SUBFZE8_rec, SUBFZE_rec,
SUBFZE8O_rec, SUBFZEO_rec,
TABORT,
- TBEGIN,
- TRECLAIM,
- TSR
+ TRECLAIM
)>;
// Single crack instructions
@@ -1196,7 +1164,6 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read, P10FX_Read]
(instrs
ADDE8_rec, ADDE_rec,
ADDE8O_rec, ADDEO_rec,
- ADDIC_rec,
ADD4O_rec, ADD8O_rec,
SUBFE8_rec, SUBFE_rec,
SUBFE8O_rec, SUBFEO_rec,
@@ -1204,12 +1171,24 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10FX_Read, P10FX_Read]
)>;
// 2-way crack instructions
+// 3 Cycles ALU operations, and 4 Cycles ALU2 operations, 2 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_F2_4C, P10W_DISP_ANY],
+ (instrs
+ ADDG6S, ADDG6S8
+)>;
+
+// 2-way crack instructions
// 3 Cycles ALU operations, and 3 Cycles ALU operations, 0 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
(instrs
HRFID,
MFFSCE,
+ MTFSB1,
+ MTFSFI, MTFSFIb,
+ MTFSFI_rec,
+ RFEBB,
RFID,
+ SC,
STOP
)>;
@@ -1221,9 +1200,8 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read]
FMR_rec,
FNABSD_rec, FNABSS_rec,
FNEGD_rec, FNEGS_rec,
- MTFSB1,
- RFEBB,
- SC
+ MTFSF, MTFSFb,
+ MTFSF_rec
)>;
// 2-way crack instructions
@@ -1234,27 +1212,11 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read,
ADDC8_rec, ADDC_rec,
ADDC8O, ADDCO,
FCPSGND_rec, FCPSGNS_rec,
- MTFSF, MTFSFb,
- MTFSFI, MTFSFIb,
SUBFC, SUBFC8,
SUBFC8_rec, SUBFC_rec,
SUBFC8O, SUBFCO
)>;
-// 2-way crack instructions
-// 3 Cycles ALU operations, and 3 Cycles ALU operations, 3 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read],
- (instrs
- MTFSFI_rec
-)>;
-
-// 2-way crack instructions
-// 3 Cycles ALU operations, and 3 Cycles ALU operations, 4 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read, P10FX_Read, P10FX_Read],
- (instrs
- MTFSF_rec
-)>;
-
// 4-way crack instructions
// 3 Cycles ALU operations, 3 Cycles ALU operations, 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10FX_Read, P10FX_Read],
@@ -1274,42 +1236,61 @@ def : InstRW<[P10W_FX_3C, P10W_DISP_EVEN, P10W_PM_4C, P10W_DISP_ANY],
)>;
// 2-way crack instructions
-// 3 Cycles ALU operations, and 3 Cycles ALU operations, 2 input operands
-def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read, P10FX_Read],
+// 3 Cycles ALU operations, and 3 Cycles ALU operations, 1 input operands
+def : InstRW<[P10W_FX_3C, P10W_DISP_PAIR, P10W_FX_3C, P10FX_Read],
(instrs
MTCRF, MTCRF8
)>;
+// 6 Cycles Load operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY],
+ (instrs
+ LBZ, LBZ8,
+ LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD,
+ DFLOADf32, DFLOADf64, LFD,
+ LHA, LHA8,
+ LHZ, LHZ8,
+ LWA, LWA_32,
+ LWZ, LWZ8, LWZtoc, LWZtocL,
+ LXSD,
+ LXV
+)>;
+
// 6 Cycles Load operations, 1 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
(instrs
- LBZ, LBZ8,
- LD, LDtoc, LDtocBA, LDtocCPT, LDtocJTI, LDtocL, SPILLTOVSR_LD,
+ LXVL,
+ LXVLL
+)>;
+
+// 6 Cycles Load operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+ (instrs
+ DCBT,
+ DCBTST,
+ ICBT,
+ LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32,
LDBRX,
- DFLOADf32, DFLOADf64, LFD,
- LFDX, XFLOADf32, XFLOADf64,
+ LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX,
+ LFDX, LFDXTLS, LFDXTLS_, XFLOADf32, XFLOADf64,
LFIWAX, LIWAX,
LFIWZX, LIWZX,
- LHA, LHA8,
- LHAX, LHAX8,
+ LHAX, LHAX8, LHAXTLS, LHAXTLS_, LHAXTLS_32,
LHBRX, LHBRX8,
- LHZ, LHZ8,
+ LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32,
LVEBX,
LVEHX,
LVEWX,
LVX,
LVXL,
- LWA, LWA_32,
- LWAX, LWAX_32,
+ LWAX, LWAXTLS, LWAXTLS_, LWAXTLS_32, LWAX_32,
LWBRX, LWBRX8,
- LWZ, LWZ8, LWZtoc, LWZtocL,
- LXSD,
+ LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32,
LXSDX,
LXSIBZX,
LXSIHZX,
LXSIWAX,
LXSIWZX,
- LXV,
LXVB16X,
LXVD2X,
LXVDSX,
@@ -1323,22 +1304,8 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
LXVX
)>;
-// 6 Cycles Load operations, 2 input operands
-def : InstRW<[P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
- (instrs
- DCBT,
- DCBTST,
- ICBT,
- LBZX, LBZX8, LBZXTLS, LBZXTLS_, LBZXTLS_32,
- LDX, LDXTLS, LDXTLS_, SPILLTOVSR_LDX,
- LHZX, LHZX8, LHZXTLS, LHZXTLS_, LHZXTLS_32,
- LWZX, LWZX8, LWZXTLS, LWZXTLS_, LWZXTLS_32,
- LXVL,
- LXVLL
-)>;
-
// 2-way crack instructions
-// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 2 input operands
+// 6 Cycles Load operations, and 13 Cycles Decimal Floating Point operations, 1 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY],
(instrs
HASHCHK, HASHCHK8,
@@ -1349,6 +1316,7 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DF_13C, P10W_DISP_ANY],
// 6 Cycles Load operations, 0 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY],
(instrs
+ DARN,
SLBIA
)>;
@@ -1356,11 +1324,7 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY],
// 6 Cycles Load operations, 1 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read],
(instrs
- DARN,
- LBARX, LBARXL,
- LDARX, LDARXL,
- LHARX, LHARXL,
- LWARX, LWARXL,
+ MTSPR, MTSPR8, MTSR, MTUDSCR, MTVRSAVE, MTVRSAVEv,
SLBFEE_rec,
SLBIE,
SLBMFEE,
@@ -1371,48 +1335,57 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read],
// 6 Cycles Load operations, 2 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
(instrs
+ LBARX, LBARXL,
LBZCIX,
+ LDARX, LDARXL,
LDCIX,
+ LHARX, LHARXL,
LHZCIX,
- LWZCIX,
- MTSPR, MTSPR8, MTSR, MTUDSCR, MTVRSAVE, MTVRSAVEv
+ LWARX, LWARXL,
+ LWZCIX
)>;
// Expand instructions
-// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
-def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY],
(instrs
LMW
)>;
// Expand instructions
-// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 2 input operands
-def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read, P10LD_Read],
+// 6 Cycles Load operations, 6 Cycles Load operations, 6 Cycles Load operations, and 6 Cycles Load operations, 1 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10LD_Read],
(instrs
LSWI
)>;
// 2-way crack instructions
-// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 0 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_SX_3C, P10W_DISP_ANY],
(instrs
LBZU, LBZU8,
- LBZUX, LBZUX8,
LDU,
- LDUX,
LFDU,
- LFDUX,
LHAU, LHAU8,
- LHAUX, LHAUX8,
LHZU, LHZU8,
+ LWZU, LWZU8
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_EVEN, P10W_SX_3C, P10W_DISP_ANY],
+ (instrs
+ LBZUX, LBZUX8,
+ LDUX,
+ LFDUX,
+ LHAUX, LHAUX8,
LHZUX, LHZUX8,
LWAUX,
- LWZU, LWZU8,
LWZUX, LWZUX8
)>;
-// 6 Cycles Load operations, 1 input operands
-def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
+// 6 Cycles Load operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR],
(instrs
PLBZ, PLBZ8, PLBZ8pc, PLBZpc,
PLD, PLDpc,
@@ -1429,20 +1402,32 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read],
)>;
// 2-way crack instructions
-// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 1 input operands
+// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 0 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
(instrs
LFS,
- LFSX,
- LXSSP,
+ LXSSP
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 4 Cycles ALU2 operations, 2 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C],
+ (instrs
+ LFSX, LFSXTLS, LFSXTLS_,
LXSSPX
)>;
// 4-way crack instructions
-// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY],
+ (instrs
+ LFSU
+)>;
+
+// 4-way crack instructions
+// 6 Cycles Load operations, 4 Cycles ALU2 operations, 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 2 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_F2_4C, P10W_SX_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY],
(instrs
- LFSU,
LFSUX
)>;
@@ -1461,10 +1446,16 @@ def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_LD_6C, P10W_DISP_PAIR, P10LD_Read
)>;
// 2-way crack instructions
-// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 0 input operands
+def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C],
+ (instrs
+ LXVP
+)>;
+
+// 2-way crack instructions
+// 6 Cycles Load operations, and 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands
def : InstRW<[P10W_LD_6C, P10W_DISP_PAIR, P10W_SX_3C],
(instrs
- LXVP,
LXVPX
)>;
@@ -1520,34 +1511,21 @@ def : InstRW<[P10W_MM_10C, P10W_DISP_ANY, P10MM_Read, P10MM_Read, P10MM_Read],
XVI8GER4SPP
)>;
-// 10 Cycles SIMD Matrix Multiply Engine operations, 4 input operands
-def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
- (instrs
- PMXVF32GER,
- PMXVF64GER
-)>;
-
-// 10 Cycles SIMD Matrix Multiply Engine operations, 5 input operands
-def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+// 10 Cycles SIMD Matrix Multiply Engine operations, 2 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read],
(instrs
PMXVBF16GER2,
PMXVF16GER2,
- PMXVF32GERNN,
- PMXVF32GERNP,
- PMXVF32GERPN,
- PMXVF32GERPP,
- PMXVF64GERNN,
- PMXVF64GERNP,
- PMXVF64GERPN,
- PMXVF64GERPP,
+ PMXVF32GER,
+ PMXVF64GER,
PMXVI16GER2,
PMXVI16GER2S,
PMXVI4GER8,
PMXVI8GER4
)>;
-// 10 Cycles SIMD Matrix Multiply Engine operations, 6 input operands
-def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read, P10MM_Read],
+// 10 Cycles SIMD Matrix Multiply Engine operations, 3 input operands
+def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read],
(instrs
PMXVBF16GER2NN,
PMXVBF16GER2NP,
@@ -1557,6 +1535,14 @@ def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10MM_Read, P10MM_Read, P10MM_Read, P
PMXVF16GER2NP,
PMXVF16GER2PN,
PMXVF16GER2PP,
+ PMXVF32GERNN,
+ PMXVF32GERNP,
+ PMXVF32GERPN,
+ PMXVF32GERPP,
+ PMXVF64GERNN,
+ PMXVF64GERNP,
+ PMXVF64GERPN,
+ PMXVF64GERPP,
PMXVI16GER2PP,
PMXVI16GER2SPP,
PMXVI4GER8PP,
@@ -1578,6 +1564,12 @@ def : InstRW<[P10W_MM_10C, P10W_DISP_PAIR, P10W_FX_3C, P10W_MM_10C, P10W_DISP_PA
XXMFACC
)>;
+// 5 Cycles GPR Multiply operations, 1 input operands
+def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read],
+ (instrs
+ MULLI, MULLI8
+)>;
+
// 5 Cycles GPR Multiply operations, 2 input operands
def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read],
(instrs
@@ -1587,7 +1579,6 @@ def : InstRW<[P10W_MU_5C, P10W_DISP_ANY, P10MU_Read, P10MU_Read],
MULHWU,
MULLD,
MULLDO,
- MULLI, MULLI8,
MULLW,
MULLWO,
VMULHSD,
@@ -1620,7 +1611,11 @@ def : InstRW<[P10W_MU_5C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
// 4 Cycles Permute operations, 0 input operands
def : InstRW<[P10W_PM_4C, P10W_DISP_ANY],
(instrs
- VSPLTISW, V_SETALLONES, V_SETALLONESB, V_SETALLONESH
+ LXVKQ,
+ VSPLTISB,
+ VSPLTISH,
+ VSPLTISW, V_SETALLONES, V_SETALLONESB, V_SETALLONESH,
+ XXSPLTIB
)>;
// 4 Cycles Permute operations, 1 input operands
@@ -1629,17 +1624,21 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read],
BRD,
BRH, BRH8,
BRW, BRW8,
- LVSL,
- LVSR,
- LXVKQ,
MFVSRLD,
MTVSRWS,
VCLZLSBB,
VCTZLSBB,
+ VEXTRACTD,
+ VEXTRACTUB,
+ VEXTRACTUH,
+ VEXTRACTUW,
VGBBD,
+ VINSERTD,
+ VINSERTW,
VPRTYBQ,
- VSPLTISB,
- VSPLTISH,
+ VSPLTB, VSPLTBs,
+ VSPLTH, VSPLTHs,
+ VSPLTW,
VSTRIBL,
VSTRIBR,
VSTRIHL,
@@ -1657,30 +1656,34 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read],
XXBRH,
XXBRQ,
XXBRW,
- XXSPLTIB
+ XXEXTRACTUW,
+ XXGENPCVDM,
+ XXGENPCVHM,
+ XXGENPCVWM,
+ XXSPLTW, XXSPLTWs
)>;
// 4 Cycles Permute operations, 2 input operands
def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read],
(instrs
BPERMD,
+ LVSL,
+ LVSR,
MTVSRDD,
VBPERMD,
VBPERMQ,
VCLRLB,
VCLRRB,
- VEXTRACTD,
- VEXTRACTUB,
- VEXTRACTUH,
- VEXTRACTUW,
VEXTUBLX,
VEXTUBRX,
VEXTUHLX,
VEXTUHRX,
VEXTUWLX,
VEXTUWRX,
- VINSERTD,
- VINSERTW,
+ VINSD,
+ VINSERTB,
+ VINSERTH,
+ VINSW,
VMRGHB,
VMRGHH,
VMRGHW,
@@ -1701,23 +1704,19 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read],
VPKUWUM,
VPKUWUS,
VSL,
+ VSLDBI,
+ VSLDOI,
VSLO,
VSLV,
- VSPLTB, VSPLTBs,
- VSPLTH, VSPLTHs,
- VSPLTW,
VSR,
+ VSRDBI,
VSRO,
VSRV,
- XXEXTRACTUW,
- XXGENPCVDM,
- XXGENPCVHM,
- XXGENPCVWM,
+ XXINSERTW,
XXMRGHW,
XXMRGLW,
XXPERMDI, XXPERMDIs,
- XXSLDWI, XXSLDWIs,
- XXSPLTW, XXSPLTWs
+ XXSLDWI, XXSLDWIs
)>;
// 4 Cycles Permute operations, 3 input operands
@@ -1735,16 +1734,12 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read, P10PM_Read],
VINSBRX,
VINSBVLX,
VINSBVRX,
- VINSD,
VINSDLX,
VINSDRX,
- VINSERTB,
- VINSERTH,
VINSHLX,
VINSHRX,
VINSHVLX,
VINSHVRX,
- VINSW,
VINSWLX,
VINSWRX,
VINSWVLX,
@@ -1752,10 +1747,6 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_ANY, P10PM_Read, P10PM_Read, P10PM_Read],
VPERM,
VPERMR,
VPERMXOR,
- VSLDBI,
- VSLDOI,
- VSRDBI,
- XXINSERTW,
XXPERM,
XXPERMR
)>;
@@ -1767,13 +1758,19 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_EVEN, P10W_vMU_7C, P10W_DISP_ANY],
VSUMSWS
)>;
-// 4 Cycles Permute operations, 1 input operands
-def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read],
+// 4 Cycles Permute operations, 0 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR],
(instrs
XXSPLTIDP,
XXSPLTIW
)>;
+// 4 Cycles Permute operations, 1 input operands
+def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read],
+ (instrs
+ XXSPLTI32DX
+)>;
+
// 4 Cycles Permute operations, 3 input operands
def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read],
(instrs
@@ -1781,12 +1778,6 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read],
XXBLENDVD,
XXBLENDVH,
XXBLENDVW,
- XXSPLTI32DX
-)>;
-
-// 4 Cycles Permute operations, 4 input operands
-def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read, P10PM_Read],
- (instrs
XXEVAL,
XXPERMX
)>;
@@ -1794,53 +1785,65 @@ def : InstRW<[P10W_PM_4C, P10W_DISP_PAIR, P10PM_Read, P10PM_Read, P10PM_Read, P1
// 3 Cycles Store operations, 1 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read],
(instrs
- DCBST,
- DCBZ,
- ICBI
+ PSTXVP, PSTXVPpc,
+ STB, STB8,
+ STBU, STBU8,
+ SPILLTOVSR_ST, STD,
+ STDU,
+ DFSTOREf32, DFSTOREf64, STFD,
+ STFDU,
+ STFS,
+ STFSU,
+ STH, STH8,
+ STHU, STHU8,
+ STW, STW8,
+ STWU, STWU8,
+ STXSD,
+ STXSSP,
+ STXV
)>;
// 3 Cycles Store operations, 2 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
(instrs
+ CP_COPY, CP_COPY8,
DCBF,
- PSTXVP, PSTXVPpc,
- STB, STB8,
- STBU, STBU8,
+ DCBST,
+ DCBZ,
+ ICBI,
+ STXVL,
+ STXVLL
+)>;
+
+// 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+ (instrs
STBUX, STBUX8,
- SPILLTOVSR_ST, STD,
+ STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32,
STDBRX,
- STDU,
STDUX,
- DFSTOREf32, DFSTOREf64, STFD,
- STFDU,
+ SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_,
STFDUX,
- STFDX,
+ STFDX, STFDXTLS, STFDXTLS_,
STFIWX, STIWX,
- STFS,
- STFSU,
STFSUX,
- STFSX,
- STH, STH8,
+ STFSX, STFSXTLS, STFSXTLS_,
STHBRX,
- STHU, STHU8,
STHUX, STHUX8,
+ STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32,
STVEBX,
STVEHX,
STVEWX,
STVX,
STVXL,
- STW, STW8,
STWBRX,
- STWU, STWU8,
STWUX, STWUX8,
- STXSD,
+ STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32,
STXSDX,
STXSIBX, STXSIBXv,
STXSIHX, STXSIHXv,
STXSIWX,
- STXSSP,
STXSSPX,
- STXV,
STXVB16X,
STXVD2X,
STXVH8X,
@@ -1852,18 +1855,6 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
STXVX
)>;
-// 3 Cycles Store operations, 3 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
- (instrs
- CP_COPY, CP_COPY8,
- STBX, STBX8, STBXTLS, STBXTLS_, STBXTLS_32,
- SPILLTOVSR_STX, STDX, STDXTLS, STDXTLS_,
- STHX, STHX8, STHXTLS, STHXTLS_, STHXTLS_32,
- STWX, STWX8, STWXTLS, STWXTLS_, STWXTLS_32,
- STXVL,
- STXVLL
-)>;
-
// Single crack instructions
// 3 Cycles Store operations, 0 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
@@ -1872,25 +1863,16 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
MSGSYNC,
SLBSYNC,
TCHECK,
+ TEND,
TLBSYNC
)>;
// Single crack instructions
-// 3 Cycles Store operations, 1 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read],
- (instrs
- TEND
-)>;
-
-// Single crack instructions
// 3 Cycles Store operations, 2 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
(instrs
+ CP_PASTE8_rec, CP_PASTE_rec,
SLBIEG,
- STBCX,
- STDCX,
- STHCX,
- STWCX,
TLBIE
)>;
@@ -1898,29 +1880,26 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read]
// 3 Cycles Store operations, 3 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
(instrs
- CP_PASTE8_rec, CP_PASTE_rec,
STBCIX,
+ STBCX,
STDCIX,
+ STDCX,
STHCIX,
- STWCIX
+ STHCX,
+ STWCIX,
+ STWCX
)>;
// 2-way crack instructions
// 3 Cycles Store operations, and 3 Cycles ALU operations, 0 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
(instrs
- ISYNC
-)>;
-
-// 2-way crack instructions
-// 3 Cycles Store operations, and 3 Cycles ALU operations, 1 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
- (instrs
+ ISYNC,
SYNC
)>;
// Expand instructions
-// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 6 Cycles Load operations, and 3 Cycles Store operations, 2 input operands
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, 3 Cycles ALU operations, 6 Cycles Load operations, and 3 Cycles Store operations, 1 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_FX_3C, P10W_DISP_ANY, P10W_LD_6C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
(instrs
LDAT,
@@ -1928,7 +1907,7 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C,
)>;
// 4-way crack instructions
-// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+// 3 Cycles Store operations, 3 Cycles ALU operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY],
(instrs
STDAT,
@@ -1936,21 +1915,21 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY, P10W_ST_3C,
)>;
// Expand instructions
-// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read],
(instrs
STMW
)>;
// Expand instructions
-// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read, P10ST_Read],
+// 3 Cycles Store operations, 3 Cycles Store operations, 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_EVEN, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10W_ST_3C, P10W_DISP_ANY, P10ST_Read, P10ST_Read],
(instrs
STSWI
)>;
-// 3 Cycles Store operations, 2 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read, P10ST_Read],
+// 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read],
(instrs
PSTB, PSTB8, PSTB8pc, PSTBpc,
PSTD, PSTDpc,
@@ -1964,10 +1943,16 @@ def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10ST_Read, P10ST_Read],
)>;
// 2-way crack instructions
-// 3 Cycles Store operations, and 3 Cycles Store operations, 2 input operands
-def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read, P10ST_Read],
+// 3 Cycles Store operations, and 3 Cycles Store operations, 1 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read],
+ (instrs
+ STXVP
+)>;
+
+// 2-way crack instructions
+// 3 Cycles Store operations, and 3 Cycles Store operations, 3 input operands
+def : InstRW<[P10W_ST_3C, P10W_DISP_PAIR, P10W_ST_3C, P10ST_Read, P10ST_Read, P10ST_Read],
(instrs
- STXVP,
STXVPX
)>;
@@ -2009,27 +1994,21 @@ def : InstRW<[P10W_SX, P10W_DISP_ANY],
def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY],
(instrs
CLRBHRB,
- MFMSR
+ MFBHRBE,
+ MFMSR,
+ MFTB
)>;
// Single crack instructions
// 3 Cycles Simple Fixed-point (SFX) operations, 1 input operands
def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read],
(instrs
- MFTB
-)>;
-
-// Single crack instructions
-// 3 Cycles Simple Fixed-point (SFX) operations, 2 input operands
-def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_DISP_ANY, P10SX_Read, P10SX_Read],
- (instrs
- MFBHRBE,
MTMSR,
MTMSRD
)>;
// 2-way crack instructions
-// 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 1 input operands
+// 3 Cycles Simple Fixed-point (SFX) operations, and 3 Cycles ALU operations, 0 input operands
def : InstRW<[P10W_SX_3C, P10W_DISP_EVEN, P10W_FX_3C, P10W_DISP_ANY],
(instrs
ADDPCIS
diff --git a/llvm/lib/Target/PowerPC/P9InstrResources.td b/llvm/lib/Target/PowerPC/P9InstrResources.td
index ac20dd353c84..395999c7242a 100644
--- a/llvm/lib/Target/PowerPC/P9InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -765,6 +765,7 @@ def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
(instrs
LFIWZX,
LFDX,
+ (instregex "LFDXTLS?(_)?$"),
LFD
)>;
@@ -815,9 +816,9 @@ def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
DISP_1C, DISP_1C],
(instrs
- (instregex "LHA(X)?(8)?$"),
+ (instregex "LHA(X)?(TLS)?(8)?(_32)?(_)?$"),
(instregex "CP_PASTE(8)?_rec$"),
- (instregex "LWA(X)?(_32)?$"),
+ (instregex "LWA(X)?(TLS)?(_32)?(_)?$"),
TCHECK
)>;
@@ -850,6 +851,7 @@ def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
DISP_3SLOTS_1C, DISP_3SLOTS_1C],
(instrs
LFSX,
+ (instregex "LFSXTLS?(_)?$"),
LFS
)>;
@@ -891,7 +893,7 @@ def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
// all three dispatches for the superslice.
def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
(instrs
- (instregex "STF(S|D|IWX|SX|DX)$"),
+ (instregex "STF(S|D|IWX|SX|DX|SXTLS|DXTLS|SXTLS_|DXTLS_)$"),
(instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
(instregex "STW(8)?$"),
(instregex "(D|X)FSTORE(f32|f64)$"),
@@ -1430,5 +1432,8 @@ def : InstRW<[],
DCCCI,
ICCCI,
ADDEX,
- ADDEX8
+ ADDEX8,
+ CDTBCD, CDTBCD8,
+ CBCDTD, CBCDTD8,
+ ADDG6S, ADDG6S8
)> { let Unsupported = 1; }
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 8f84ae7efc24..0d3d71742bfb 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -127,8 +127,9 @@ class ModulePass;
/// General Dynamic model for AIX.
MO_TLSGD_FLAG = 32,
- /// MO_TPREL_FLAG - If this bit is set the symbol reference is relative to
- /// TLS Initial Exec model.
+ /// MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to
+ /// the thread pointer and the symbol can be used for the TLS Initial Exec
+ /// and Local Exec models.
MO_TPREL_FLAG = 64,
/// MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index d05723461103..3ba36f4f01e1 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -670,12 +670,6 @@ include "PPCCallingConv.td"
def PPCInstrInfo : InstrInfo {
let isLittleEndianEncoding = 1;
-
- // FIXME: Unset this when no longer needed!
- let decodePositionallyEncodedOperands = 1;
-
- let noNamedPositionallyEncodedOperands = 1;
- let useDeprecatedPositionallyEncodedOperands = 1;
}
def PPCAsmWriter : AsmWriter {
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 30e96cff9e2c..a089d61616e1 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -28,8 +28,9 @@
#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -68,6 +69,7 @@
#include "llvm/Support/Process.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
@@ -80,6 +82,17 @@ using namespace llvm::XCOFF;
#define DEBUG_TYPE "asmprinter"
+STATISTIC(NumTOCEntries, "Number of Total TOC Entries Emitted.");
+STATISTIC(NumTOCConstPool, "Number of Constant Pool TOC Entries.");
+STATISTIC(NumTOCGlobalInternal,
+ "Number of Internal Linkage Global TOC Entries.");
+STATISTIC(NumTOCGlobalExternal,
+ "Number of External Linkage Global TOC Entries.");
+STATISTIC(NumTOCJumpTable, "Number of Jump Table TOC Entries.");
+STATISTIC(NumTOCThreadLocal, "Number of Thread Local TOC Entries.");
+STATISTIC(NumTOCBlockAddress, "Number of Block Address TOC Entries.");
+STATISTIC(NumTOCEHBlock, "Number of EH Block TOC Entries.");
+
static cl::opt<bool> EnableSSPCanaryBitInTB(
"aix-ssp-tb-bit", cl::init(false),
cl::desc("Enable Passing SSP Canary info in Trackback on AIX"), cl::Hidden);
@@ -148,7 +161,17 @@ public:
StringRef getPassName() const override { return "PowerPC Assembly Printer"; }
- MCSymbol *lookUpOrCreateTOCEntry(const MCSymbol *Sym,
+ enum TOCEntryType {
+ TOCType_ConstantPool,
+ TOCType_GlobalExternal,
+ TOCType_GlobalInternal,
+ TOCType_JumpTable,
+ TOCType_ThreadLocal,
+ TOCType_BlockAddress,
+ TOCType_EHBlock
+ };
+
+ MCSymbol *lookUpOrCreateTOCEntry(const MCSymbol *Sym, TOCEntryType Type,
MCSymbolRefExpr::VariantKind Kind =
MCSymbolRefExpr::VariantKind::VK_None);
@@ -174,6 +197,7 @@ public:
void LowerSTACKMAP(StackMaps &SM, const MachineInstr &MI);
void LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI);
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
+ void EmitAIXTlsCallHelper(const MachineInstr *MI);
bool runOnMachineFunction(MachineFunction &MF) override {
Subtarget = &MF.getSubtarget<PPCSubtarget>();
bool Changed = AsmPrinter::runOnMachineFunction(MF);
@@ -266,6 +290,8 @@ public:
bool doFinalization(Module &M) override;
void emitTTypeReference(const GlobalValue *GV, unsigned Encoding) override;
+
+ void emitModuleCommandLines(Module &M) override;
};
} // end anonymous namespace
@@ -412,12 +438,43 @@ bool PPCAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
return false;
}
+static void collectTOCStats(PPCAsmPrinter::TOCEntryType Type) {
+ ++NumTOCEntries;
+ switch (Type) {
+ case PPCAsmPrinter::TOCType_ConstantPool:
+ ++NumTOCConstPool;
+ break;
+ case PPCAsmPrinter::TOCType_GlobalInternal:
+ ++NumTOCGlobalInternal;
+ break;
+ case PPCAsmPrinter::TOCType_GlobalExternal:
+ ++NumTOCGlobalExternal;
+ break;
+ case PPCAsmPrinter::TOCType_JumpTable:
+ ++NumTOCJumpTable;
+ break;
+ case PPCAsmPrinter::TOCType_ThreadLocal:
+ ++NumTOCThreadLocal;
+ break;
+ case PPCAsmPrinter::TOCType_BlockAddress:
+ ++NumTOCBlockAddress;
+ break;
+ case PPCAsmPrinter::TOCType_EHBlock:
+ ++NumTOCEHBlock;
+ break;
+ }
+}
+
/// lookUpOrCreateTOCEntry -- Given a symbol, look up whether a TOC entry
/// exists for it. If not, create one. Then return a symbol that references
/// the TOC entry.
MCSymbol *
-PPCAsmPrinter::lookUpOrCreateTOCEntry(const MCSymbol *Sym,
+PPCAsmPrinter::lookUpOrCreateTOCEntry(const MCSymbol *Sym, TOCEntryType Type,
MCSymbolRefExpr::VariantKind Kind) {
+ // If this is a new TOC entry add statistics about it.
+ if (!TOC.contains({Sym, Kind}))
+ collectTOCStats(Type);
+
MCSymbol *&TOCEntry = TOC[{Sym, Kind}];
if (!TOCEntry)
TOCEntry = createTempSymbol("C");
@@ -558,13 +615,26 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
/// This helper function creates the TlsGetAddr MCSymbol for AIX. We will
/// create the csect and use the qual-name symbol instead of creating just the
/// external symbol.
-static MCSymbol *createMCSymbolForTlsGetAddr(MCContext &Ctx) {
+static MCSymbol *createMCSymbolForTlsGetAddr(MCContext &Ctx, unsigned MIOpc) {
+ StringRef SymName =
+ MIOpc == PPC::GETtlsTpointer32AIX ? ".__get_tpointer" : ".__tls_get_addr";
return Ctx
- .getXCOFFSection(".__tls_get_addr", SectionKind::getText(),
+ .getXCOFFSection(SymName, SectionKind::getText(),
XCOFF::CsectProperties(XCOFF::XMC_PR, XCOFF::XTY_ER))
->getQualNameSymbol();
}
+void PPCAsmPrinter::EmitAIXTlsCallHelper(const MachineInstr *MI) {
+ assert(Subtarget->isAIXABI() &&
+ "Only expecting to emit calls to get the thread pointer on AIX!");
+
+ MCSymbol *TlsCall = createMCSymbolForTlsGetAddr(OutContext, MI->getOpcode());
+ const MCExpr *TlsRef =
+ MCSymbolRefExpr::create(TlsCall, MCSymbolRefExpr::VK_None, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BLA).addExpr(TlsRef));
+ return;
+}
+
/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
/// call to __tls_get_addr to the current output stream.
void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
@@ -599,10 +669,7 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
assert(MI->getOperand(2).isReg() &&
MI->getOperand(2).getReg() == VarOffsetReg &&
"GETtls[ld]ADDR[32] must read GPR4");
- MCSymbol *TlsGetAddr = createMCSymbolForTlsGetAddr(OutContext);
- const MCExpr *TlsRef = MCSymbolRefExpr::create(
- TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
- EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::BLA).addExpr(TlsRef));
+ EmitAIXTlsCallHelper(MI);
return;
}
@@ -648,6 +715,48 @@ static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
}
}
+static bool hasTLSFlag(const MachineOperand &MO) {
+ unsigned Flags = MO.getTargetFlags();
+ if (Flags & PPCII::MO_TLSGD_FLAG || Flags & PPCII::MO_TPREL_FLAG ||
+ Flags & PPCII::MO_TLSLD_FLAG || Flags & PPCII::MO_TLSGDM_FLAG)
+ return true;
+
+ if (Flags == PPCII::MO_TPREL_LO || Flags == PPCII::MO_TPREL_HA ||
+ Flags == PPCII::MO_DTPREL_LO || Flags == PPCII::MO_TLSLD_LO ||
+ Flags == PPCII::MO_TLS)
+ return true;
+
+ return false;
+}
+
+static PPCAsmPrinter::TOCEntryType
+getTOCEntryTypeForMO(const MachineOperand &MO) {
+ // Use the target flags to determine if this MO is Thread Local.
+ // If we don't do this it comes out as Global.
+ if (hasTLSFlag(MO))
+ return PPCAsmPrinter::TOCType_ThreadLocal;
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GlobalV = MO.getGlobal();
+ GlobalValue::LinkageTypes Linkage = GlobalV->getLinkage();
+ if (Linkage == GlobalValue::ExternalLinkage ||
+ Linkage == GlobalValue::AvailableExternallyLinkage ||
+ Linkage == GlobalValue::ExternalWeakLinkage)
+ return PPCAsmPrinter::TOCType_GlobalExternal;
+
+ return PPCAsmPrinter::TOCType_GlobalInternal;
+ }
+ case MachineOperand::MO_ConstantPoolIndex:
+ return PPCAsmPrinter::TOCType_ConstantPool;
+ case MachineOperand::MO_JumpTableIndex:
+ return PPCAsmPrinter::TOCType_JumpTable;
+ case MachineOperand::MO_BlockAddress:
+ return PPCAsmPrinter::TOCType_BlockAddress;
+ default:
+ llvm_unreachable("Unexpected operand type to get TOC type.");
+ }
+}
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
///
@@ -718,6 +827,18 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
return Expr;
};
auto GetVKForMO = [&](const MachineOperand &MO) {
+ // For TLS local-exec accesses on AIX, we have one TOC entry for the symbol
+ // (with the variable offset), which is differentiated by MO_TPREL_FLAG.
+ if (MO.getTargetFlags() & PPCII::MO_TPREL_FLAG) {
+ // TODO: Update the query and the comment above to add a check for initial
+ // exec when this TLS model is supported on AIX in the future, as both
+ // local-exec and initial-exec can use MO_TPREL_FLAG.
+ assert(MO.isGlobal() && "Only expecting a global MachineOperand here!\n");
+ TLSModel::Model Model = TM.getTLSModel(MO.getGlobal());
+ if (Model == TLSModel::LocalExec)
+ return MCSymbolRefExpr::VariantKind::VK_PPC_AIX_TLSLE;
+ llvm_unreachable("Only expecting local-exec accesses!");
+ }
// For GD TLS access on AIX, we have two TOC entries for the symbol (one for
// the variable offset and the other for the region handle). They are
// differentiated by MO_TLSGD_FLAG and MO_TLSGDM_FLAG.
@@ -865,7 +986,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
// Otherwise, use the TOC. 'TOCEntry' is a label used to reference the
// storage allocated in the TOC which contains the address of
// 'MOSymbol'. Said TOC entry will be synthesized later.
- MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol, VK);
+ MCSymbol *TOCEntry =
+ lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
const MCExpr *Exp =
MCSymbolRefExpr::create(TOCEntry, MCSymbolRefExpr::VK_None, OutContext);
@@ -942,7 +1064,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
// Map the machine operand to its corresponding MCSymbol, then map the
// global address operand to be a reference to the TOC entry we will
// synthesize later.
- MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol, VK);
+ MCSymbol *TOCEntry =
+ lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
MCSymbolRefExpr::VariantKind VKExpr =
IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC;
@@ -980,7 +1103,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
// to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
// reference the storage allocated in the TOC which contains the address of
// 'MOSymbol'.
- MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol, VK);
+ MCSymbol *TOCEntry =
+ lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry,
MCSymbolRefExpr::VK_PPC_U,
OutContext);
@@ -1012,7 +1136,8 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
// to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
// reference the storage allocated in the TOC which contains the address of
// 'MOSymbol'.
- MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol, VK);
+ MCSymbol *TOCEntry =
+ lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
const MCExpr *Exp = MCSymbolRefExpr::create(TOCEntry,
MCSymbolRefExpr::VK_PPC_L,
OutContext);
@@ -1042,7 +1167,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
if (GlobalToc || MO.isJTI() || MO.isBlockAddress() ||
(MO.isCPI() && TM.getCodeModel() == CodeModel::Large))
- MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, VK);
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
VK = IsAIX ? MCSymbolRefExpr::VK_PPC_U : MCSymbolRefExpr::VK_PPC_TOC_HA;
@@ -1084,7 +1209,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::VariantKind VK = GetVKForMO(MO);
if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large)
- MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, VK);
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol, getTOCEntryTypeForMO(MO), VK);
VK = IsAIX ? MCSymbolRefExpr::VK_PPC_L : MCSymbolRefExpr::VK_PPC_TOC_LO;
const MCExpr *Exp =
@@ -1246,6 +1371,12 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD);
return;
}
+ case PPC::GETtlsTpointer32AIX: {
+ // Transform: %r3 = GETtlsTpointer32AIX
+ // Into: BLA .__get_tpointer()
+ EmitAIXTlsCallHelper(MI);
+ return;
+ }
case PPC::ADDIStlsldHA: {
// Transform: %xd = ADDIStlsldHA %x2, @sym
// Into: %xd = ADDIS8 %x2, sym@got@tlsld@ha
@@ -2096,8 +2227,9 @@ void PPCAIXAsmPrinter::emitTracebackTable() {
// Set the 5th byte of mandatory field.
uint32_t SecondHalfOfMandatoryField = 0;
- // Always store back chain.
- SecondHalfOfMandatoryField |= TracebackTable::IsBackChainStoredMask;
+ SecondHalfOfMandatoryField |= MF->getFrameInfo().getStackSize()
+ ? TracebackTable::IsBackChainStoredMask
+ : 0;
uint32_t FPRSaved = 0;
for (unsigned Reg = PPC::F14; Reg <= PPC::F31; ++Reg) {
@@ -2311,7 +2443,7 @@ void PPCAIXAsmPrinter::emitTracebackTable() {
auto &Ctx = OutStreamer->getContext();
MCSymbol *EHInfoSym =
TargetLoweringObjectFileXCOFF::getEHInfoTableSymbol(MF);
- MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(EHInfoSym);
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(EHInfoSym, TOCType_EHBlock);
const MCSymbol *TOCBaseSym =
cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
->getQualNameSymbol();
@@ -2516,16 +2648,22 @@ void PPCAIXAsmPrinter::emitPGORefs() {
OutStreamer->switchSection(CntsSection);
if (OutContext.hasXCOFFSection(
"__llvm_prf_data",
- XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD)))
- OutStreamer->emitXCOFFRefDirective("__llvm_prf_data[RW]");
+ XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) {
+ MCSymbol *S = OutContext.getOrCreateSymbol("__llvm_prf_data[RW]");
+ OutStreamer->emitXCOFFRefDirective(S);
+ }
if (OutContext.hasXCOFFSection(
"__llvm_prf_names",
- XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD)))
- OutStreamer->emitXCOFFRefDirective("__llvm_prf_names[RO]");
+ XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD))) {
+ MCSymbol *S = OutContext.getOrCreateSymbol("__llvm_prf_names[RO]");
+ OutStreamer->emitXCOFFRefDirective(S);
+ }
if (OutContext.hasXCOFFSection(
"__llvm_prf_vnds",
- XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD)))
- OutStreamer->emitXCOFFRefDirective("__llvm_prf_vnds[RW]");
+ XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) {
+ MCSymbol *S = OutContext.getOrCreateSymbol("__llvm_prf_vnds[RW]");
+ OutStreamer->emitXCOFFRefDirective(S);
+ }
}
}
@@ -2658,11 +2796,13 @@ void PPCAIXAsmPrinter::emitInstruction(const MachineInstr *MI) {
MMI->hasDebugInfo());
break;
}
+ case PPC::GETtlsTpointer32AIX:
case PPC::GETtlsADDR64AIX:
case PPC::GETtlsADDR32AIX: {
- // The reference to .__tls_get_addr is unknown to the assembler
- // so we need to emit an external symbol reference.
- MCSymbol *TlsGetAddr = createMCSymbolForTlsGetAddr(OutContext);
+ // A reference to .__tls_get_addr/.__get_tpointer is unknown to the
+ // assembler so we need to emit an external symbol reference.
+ MCSymbol *TlsGetAddr =
+ createMCSymbolForTlsGetAddr(OutContext, MI->getOpcode());
ExtSymSDNodeSymbols.insert(TlsGetAddr);
break;
}
@@ -2785,8 +2925,14 @@ void PPCAIXAsmPrinter::emitXXStructorList(const DataLayout &DL,
void PPCAIXAsmPrinter::emitTTypeReference(const GlobalValue *GV,
unsigned Encoding) {
if (GV) {
+ TOCEntryType GlobalType = TOCType_GlobalInternal;
+ GlobalValue::LinkageTypes Linkage = GV->getLinkage();
+ if (Linkage == GlobalValue::ExternalLinkage ||
+ Linkage == GlobalValue::AvailableExternallyLinkage ||
+ Linkage == GlobalValue::ExternalWeakLinkage)
+ GlobalType = TOCType_GlobalExternal;
MCSymbol *TypeInfoSym = TM.getSymbol(GV);
- MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(TypeInfoSym);
+ MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(TypeInfoSym, GlobalType);
const MCSymbol *TOCBaseSym =
cast<MCSectionXCOFF>(getObjFileLowering().getTOCBaseSection())
->getQualNameSymbol();
@@ -2810,6 +2956,26 @@ createPPCAsmPrinterPass(TargetMachine &tm,
return new PPCLinuxAsmPrinter(tm, std::move(Streamer));
}
+void PPCAIXAsmPrinter::emitModuleCommandLines(Module &M) {
+ const NamedMDNode *NMD = M.getNamedMetadata("llvm.commandline");
+ if (!NMD || !NMD->getNumOperands())
+ return;
+
+ std::string S;
+ raw_string_ostream RSOS(S);
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ const MDNode *N = NMD->getOperand(i);
+ assert(N->getNumOperands() == 1 &&
+ "llvm.commandline metadata entry can have only one operand");
+ const MDString *MDS = cast<MDString>(N->getOperand(0));
+ // Add "@(#)" to support retrieving the command line information with the
+ // AIX "what" command
+ RSOS << "@(#)opt " << MDS->getString() << "\n";
+ RSOS.write('\0');
+ }
+ OutStreamer->emitXCOFFCInfoSym(".GCC.command.line", RSOS.str());
+}
+
// Force static initialization.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmPrinter() {
TargetRegistry::RegisterAsmPrinter(getThePPC32Target(),
diff --git a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
index 1fbe94eb310f..5d97d187b296 100644
--- a/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
+++ b/llvm/lib/Target/PowerPC/PPCBack2BackFusion.def
@@ -50,6 +50,10 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
ANDIS_rec,
ANDI_rec,
AND_rec,
+ CBCDTD,
+ CBCDTD8,
+ CDTBCD,
+ CDTBCD8,
CMPB,
CMPB8,
CNTLZD,
@@ -535,6 +539,10 @@ FUSION_FEATURE(GeneralBack2Back, hasBack2BackFusion, -1,
ANDIS_rec,
ANDI_rec,
AND_rec,
+ CBCDTD,
+ CBCDTD8,
+ CDTBCD,
+ CDTBCD8,
CMPB,
CMPB8,
CMPD,
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
index ff792fda8fb2..188fc96bc7c2 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
@@ -1,4 +1,4 @@
-//===-- PPCCallingConv.h - --------------------------------------*- C++ -*-===//
+//===-- PPCCallingConv.cpp - ------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -21,6 +21,42 @@ inline bool CC_PPC_AnyReg_Error(unsigned &, MVT &, MVT &,
return false;
}
+// This function handles the shadowing of GPRs for fp and vector types,
+// and is a depiction of the algorithm described in the ELFv2 ABI,
+// Section 2.2.4.1: Parameter Passing Register Selection Algorithm.
+inline bool CC_PPC64_ELF_Shadow_GPR_Regs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+
+ // The 64-bit ELFv2 ABI-defined parameter passing general purpose registers.
+ static const MCPhysReg ELF64ArgGPRs[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
+ PPC::X7, PPC::X8, PPC::X9, PPC::X10};
+ const unsigned ELF64NumArgGPRs = std::size(ELF64ArgGPRs);
+
+ unsigned FirstUnallocGPR = State.getFirstUnallocated(ELF64ArgGPRs);
+ if (FirstUnallocGPR == ELF64NumArgGPRs)
+ return false;
+
+ // As described in 2.2.4.1 under the "float" section, shadow a single GPR
+ // for single/double precision. ppcf128 gets broken up into two doubles
+ // and will also shadow GPRs within this section.
+ if (LocVT == MVT::f32 || LocVT == MVT::f64)
+ State.AllocateReg(ELF64ArgGPRs);
+ else if (LocVT.is128BitVector() || (LocVT == MVT::f128)) {
+ // For vector and __float128 (which is represents the "vector" section
+ // in 2.2.4.1), shadow two even GPRs (skipping the odd one if it is next
+ // in the allocation order). To check if the GPR is even, the specific
+ // condition checks if the register allocated is odd, because the even
+ // physical registers are odd values.
+ if ((State.AllocateReg(ELF64ArgGPRs) - PPC::X3) % 2 == 1)
+ State.AllocateReg(ELF64ArgGPRs);
+ State.AllocateReg(ELF64ArgGPRs);
+ }
+ return false;
+}
+
static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.h b/llvm/lib/Target/PowerPC/PPCCallingConv.h
index 03d9be0a73d9..ab61472c72eb 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.h
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.h
@@ -31,6 +31,9 @@ bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT,
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State);
+bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State);
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 9df1b1dbd598..825c1a29ed62 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -112,10 +112,46 @@ def CC_PPC64_AnyReg : CallingConv<[
CCCustom<"CC_PPC_AnyReg_Error">
]>;
-// Note that we don't currently have calling conventions for 64-bit
-// PowerPC, but handle all the complexities of the ABI in the lowering
-// logic. FIXME: See if the logic can be simplified with use of CCs.
-// This may require some extensions to current table generation.
+// Calling Convention corresponding to the 64-bit PowerPC ELFv2 ABI.
+// This calling convention currently only handles integers, floats and
+// vectors within registers, as well as it handles the shadowing of GPRs
+// when floating point and vector arguments are used.
+// FIXME: This calling convention needs to be extended to handle all types and
+// complexities of the ABI.
+let Entry = 1 in
+def CC_PPC64_ELF : CallingConv<[
+ CCIfCC<"CallingConv::AnyReg", CCDelegateTo<CC_PPC64_AnyReg>>,
+
+ CCIfType<[i1], CCPromoteToType<i64>>,
+ CCIfType<[i8], CCPromoteToType<i64>>,
+ CCIfType<[i16], CCPromoteToType<i64>>,
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
+
+ // Handle fp types and shadow the corresponding registers as necessary.
+ CCIfType<[f32, f64], CCIfNotVarArg<CCCustom<"CC_PPC64_ELF_Shadow_GPR_Regs">>>,
+ CCIfType<[f32, f64],
+ CCIfNotVarArg<CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8, F9, F10,
+ F11, F12, F13]>>>,
+
+ // f128 is handled through vector registers instead of fp registers.
+ CCIfType<[f128],
+ CCIfSubtarget<"hasAltivec()",
+ CCIfNotVarArg<CCCustom<"CC_PPC64_ELF_Shadow_GPR_Regs">>>>,
+ CCIfType<[f128],
+ CCIfSubtarget<"hasAltivec()",
+ CCIfNotVarArg<CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,
+ V11, V12, V13]>>>>,
+
+ // Handle support for vector types, and shadow GPRs as necessary.
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v1i128],
+ CCIfSubtarget<"hasAltivec()",
+ CCIfNotVarArg<CCCustom<"CC_PPC64_ELF_Shadow_GPR_Regs">>>>,
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v1i128],
+ CCIfSubtarget<"hasAltivec()",
+ CCIfNotVarArg<CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9, V10,
+ V11, V12, V13]>>>>,
+]>;
// Simple calling convention for 64-bit ELF PowerPC fast isel.
// Only handle ints and floats. All ints are promoted to i64.
diff --git a/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index be555ac0edf6..42f5a4e624c4 100644
--- a/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -1404,7 +1404,7 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
}
// Get a count of how many bytes are to be pushed onto the stack.
- NumBytes = CCInfo.getNextStackOffset();
+ NumBytes = CCInfo.getStackSize();
// The prolog code of the callee may store up to 8 GPR argument registers to
// the stack, allowing va_start to index over them in memory if its varargs.
@@ -1555,8 +1555,8 @@ bool PPCFastISel::fastLowerCall(CallLoweringInfo &CLI) {
if (!Callee && !Symbol)
return false;
- // Allow SelectionDAG isel to handle tail calls.
- if (IsTailCall)
+ // Allow SelectionDAG isel to handle tail calls and long calls.
+ if (IsTailCall || Subtarget->useLongCalls())
return false;
// Let SDISel handle vararg functions.
@@ -2155,7 +2155,7 @@ unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
// If the value doesn't fit in 32 bits, see if we can shift it
// so that it fits in 32 bits.
if (!isInt<32>(Imm)) {
- Shift = countTrailingZeros<uint64_t>(Imm);
+ Shift = llvm::countr_zero<uint64_t>(Imm);
int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
if (isInt<32>(ImmSh))
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 3c0aa2390666..d5e4ae34dde7 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -395,8 +395,7 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
for (MachineBasicBlock &MBB : MF)
for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) {
--MBBI;
- for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
- MachineOperand &MO = MBBI->getOperand(I);
+ for (MachineOperand &MO : MBBI->operands()) {
if (!MO.isReg())
continue;
@@ -459,19 +458,19 @@ PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
(!UseAtEnd && (&MBB->getParent()->front() == MBB)))
return true;
- RS.enterBasicBlock(*MBB);
-
- if (UseAtEnd && !MBB->empty()) {
- // The scratch register will be used at the end of the block, so must
- // consider all registers used within the block
-
+ if (UseAtEnd) {
+ // The scratch register will be used before the first terminator (or at the
+ // end of the block if there are no terminators).
MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
- // If no terminator, back iterator up to previous instruction.
- if (MBBI == MBB->end())
- MBBI = std::prev(MBBI);
-
- if (MBBI != MBB->begin())
- RS.forward(MBBI);
+ if (MBBI == MBB->begin()) {
+ RS.enterBasicBlock(*MBB);
+ } else {
+ RS.enterBasicBlockEnd(*MBB);
+ RS.backward(std::prev(MBBI));
+ }
+ } else {
+ // The scratch register will be used at the start of the block.
+ RS.enterBasicBlock(*MBB);
}
// If the two registers are available, we're all good.
@@ -2287,13 +2286,15 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
// slot for dynamic stack allocations.
// The scavenger might be invoked if the frame offset does not fit into
- // the 16-bit immediate. We don't know the complete frame size here
- // because we've not yet computed callee-saved register spills or the
- // needed alignment padding.
+ // the 16-bit immediate in case of not SPE and 8-bit in case of SPE.
+ // We don't know the complete frame size here because we've not yet computed
+ // callee-saved register spills or the needed alignment padding.
unsigned StackSize = determineFrameLayout(MF, true);
MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(StackSize) : !isInt<16>(StackSize);
+
if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) ||
- (hasSpills(MF) && !isInt<16>(StackSize))) {
+ (hasSpills(MF) && NeedSpills)) {
const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
@@ -2325,6 +2326,35 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots(
if (CSI.empty())
return true; // Early exit if no callee saved registers are modified!
+ const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ if (Subtarget.hasSPE()) {
+ // In case of SPE we only have SuperRegs and CRs
+ // in our CalleSaveInfo vector.
+
+ unsigned Idx = 0;
+ for (auto &CalleeSaveReg : CSI) {
+ const MCPhysReg &Reg = CalleeSaveReg.getReg();
+ const MCPhysReg &Lower = RegInfo->getSubReg(Reg, 1);
+ const MCPhysReg &Higher = RegInfo->getSubReg(Reg, 2);
+
+ // Check only for SuperRegs.
+ if (Lower) {
+ if (MRI.isPhysRegModified(Higher)) {
+ Idx++;
+ continue;
+ } else {
+ // Replace Reg if only lower-32 bits modified
+ CSI.erase(CSI.begin() + Idx);
+ CSI.insert(CSI.begin() + Idx, CalleeSavedInfo(Lower));
+ }
+ }
+ Idx++;
+ }
+ }
+
// Early exit if cannot spill gprs to volatile vector registers.
MachineFrameInfo &MFI = MF.getFrameInfo();
if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
@@ -2333,8 +2363,6 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots(
// Build a BitVector of VSRs that can be used for spilling GPRs.
BitVector BVAllocatable = TRI->getAllocatableSet(MF);
BitVector BVCalleeSaved(TRI->getNumRegs());
- const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i)
BVCalleeSaved.set(CSRegs[i]);
@@ -2342,7 +2370,7 @@ bool PPCFrameLowering::assignCalleeSavedSpillSlots(
// Set to 0 if the register is not a volatile VSX register, or if it is
// used in the function.
if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) ||
- MF.getRegInfo().isPhysRegUsed(Reg))
+ MRI.isPhysRegUsed(Reg))
BVAllocatable.reset(Reg);
}
diff --git a/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def b/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def
index f7e79ae71ebd..eff4432206e1 100644
--- a/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def
+++ b/llvm/lib/Target/PowerPC/PPCGenRegisterBankInfo.def
@@ -22,7 +22,9 @@ RegisterBankInfo::PartialMapping PPCGenRegisterBankInfo::PartMappings[]{
{0, 32, PPC::FPRRegBank},
// 3: FPR 64-bit value
{0, 64, PPC::FPRRegBank},
- // 4: CR 4-bit value
+ // 4: 128-bit vector (VSX, Altivec)
+ {0, 128, PPC::VECRegBank},
+ // 5: CR 4-bit value
{0, 4, PPC::CRRegBank},
};
@@ -57,7 +59,11 @@ RegisterBankInfo::ValueMapping PPCGenRegisterBankInfo::ValMappings[]{
{&PPCGenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
{&PPCGenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
{&PPCGenRegisterBankInfo::PartMappings[PMI_FPR64 - PMI_Min], 1},
- // 13: CR 4-bit value.
+ // 13: 128-bit vector.
+ {&PPCGenRegisterBankInfo::PartMappings[PMI_VEC128 - PMI_Min], 1},
+ {&PPCGenRegisterBankInfo::PartMappings[PMI_VEC128 - PMI_Min], 1},
+ {&PPCGenRegisterBankInfo::PartMappings[PMI_VEC128 - PMI_Min], 1},
+ // 16: CR 4-bit value.
{&PPCGenRegisterBankInfo::PartMappings[PMI_CR - PMI_Min], 1},
};
@@ -71,14 +77,36 @@ PPCGenRegisterBankInfo::getValueMapping(PartialMappingIdx RBIdx) {
return &ValMappings[1 + 3 * ValMappingIdx];
}
+PPCGenRegisterBankInfo::PartialMappingIdx
+ PPCGenRegisterBankInfo::BankIDToCopyMapIdx[]{
+ PMI_None,
+ PMI_FPR64, // FPR
+ PMI_GPR64, // GPR
+ PMI_VEC128, // VEC
+};
+
// TODO Too simple!
const RegisterBankInfo::ValueMapping *
PPCGenRegisterBankInfo::getCopyMapping(unsigned DstBankID, unsigned SrcBankID,
unsigned Size) {
assert(DstBankID < PPC::NumRegisterBanks && "Invalid bank ID");
assert(SrcBankID < PPC::NumRegisterBanks && "Invalid bank ID");
+ PartialMappingIdx DstRBIdx = BankIDToCopyMapIdx[DstBankID];
+ PartialMappingIdx SrcRBIdx = BankIDToCopyMapIdx[SrcBankID];
+ assert(DstRBIdx != PMI_None && "No such mapping");
+ assert(SrcRBIdx != PMI_None && "No such mapping");
+
+ if (DstRBIdx == SrcRBIdx)
+ return getValueMapping(DstRBIdx);
- return &ValMappings[1];
+ assert(Size <= 128 && "Can currently handle types up to 128 bits (vectors)!");
+ // TODO: This function needs to be updated to handle all cases for
+ // GPRs, FPRs and vectors. It currently only handles bitcasting to
+ // the same type and has only mainly been tested for bitcasting
+ // between different vector types.
+ unsigned ValMappingIdx = DstRBIdx - PMI_Min;
+
+ return &ValMappings[1 + 3 * ValMappingIdx];
}
} // namespace llvm
diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index d80a33ff6064..0ebfc007b3d7 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -19,6 +19,7 @@
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -32,6 +33,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -53,7 +55,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -704,16 +705,90 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
return false;
}
-bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
- SDValue Base = ST->getBasePtr();
- if (Base.getOpcode() != PPCISD::ADD_TLS)
+// isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS
+// instruction use the thread pointer.
+static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG) {
+ assert(
+ Base.getOpcode() == PPCISD::ADD_TLS &&
+ "Only expecting the ADD_TLS instruction to acquire the thread pointer!");
+ const PPCSubtarget &Subtarget =
+ CurDAG->getMachineFunction().getSubtarget<PPCSubtarget>();
+ SDValue ADDTLSOp1 = Base.getOperand(0);
+ unsigned ADDTLSOp1Opcode = ADDTLSOp1.getOpcode();
+
+ // Account for when ADD_TLS is used for the initial-exec TLS model on Linux.
+ //
+ // Although ADD_TLS does not explicitly use the thread pointer
+ // register when LD_GOT_TPREL_L is one of it's operands, the LD_GOT_TPREL_L
+ // instruction will have a relocation specifier, @got@tprel, that is used to
+ // generate a GOT entry. The linker replaces this entry with an offset for a
+ // for a thread local variable, which will be relative to the thread pointer.
+ if (ADDTLSOp1Opcode == PPCISD::LD_GOT_TPREL_L)
+ return true;
+ // When using PC-Relative instructions for initial-exec, a MAT_PCREL_ADDR
+ // node is produced instead to represent the aforementioned situation.
+ LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSOp1);
+ if (LD && LD->getBasePtr().getOpcode() == PPCISD::MAT_PCREL_ADDR)
+ return true;
+
+ // A GET_TPOINTER PPCISD node (only produced on AIX 32-bit mode) as an operand
+ // to ADD_TLS represents a call to .__get_tpointer to get the thread pointer,
+ // later returning it into R3.
+ if (ADDTLSOp1Opcode == PPCISD::GET_TPOINTER)
+ return true;
+
+ // The ADD_TLS note is explicitly acquiring the thread pointer (X13/R13).
+ RegisterSDNode *AddFirstOpReg =
+ dyn_cast_or_null<RegisterSDNode>(ADDTLSOp1.getNode());
+ if (AddFirstOpReg &&
+ AddFirstOpReg->getReg() == Subtarget.getThreadPointerRegister())
+ return true;
+
+ return false;
+}
+
+// canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS
+// instruction is present. An ADD_TLS instruction, followed by a D-Form memory
+// operation, can be optimized to use an X-Form load or store, allowing the
+// ADD_TLS node to be removed completely.
+static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base) {
+
+ // Do not do this transformation at -O0.
+ if (CurDAG->getTarget().getOptLevel() == CodeGenOpt::None)
return false;
- SDValue Offset = ST->getOffset();
- if (!Offset.isUndef())
+
+ // In order to perform this optimization inside tryTLSXForm[Load|Store],
+ // Base is expected to be an ADD_TLS node.
+ if (Base.getOpcode() != PPCISD::ADD_TLS)
return false;
+ for (auto *ADDTLSUse : Base.getNode()->uses()) {
+ // The optimization to convert the D-Form load/store into its X-Form
+ // counterpart should only occur if the source value offset of the load/
+ // store is 0. This also means that The offset should always be undefined.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSUse)) {
+ if (LD->getSrcValueOffset() != 0 || !LD->getOffset().isUndef())
+ return false;
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(ADDTLSUse)) {
+ if (ST->getSrcValueOffset() != 0 || !ST->getOffset().isUndef())
+ return false;
+ } else // Don't optimize if there are ADD_TLS users that aren't load/stores.
+ return false;
+ }
+
if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
return false;
+ // Does the ADD_TLS node of the load/store use the thread pointer?
+ // If the thread pointer is not used as one of the operands of ADD_TLS,
+ // then this optimization is not valid.
+ return isThreadPointerAcquisitionNode(Base, CurDAG);
+}
+
+bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
+ SDValue Base = ST->getBasePtr();
+ if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
+ return false;
+
SDLoc dl(ST);
EVT MemVT = ST->getMemoryVT();
EVT RegVT = ST->getValue().getValueType();
@@ -738,6 +813,14 @@ bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
Opcode = PPC::STDXTLS;
break;
}
+ case MVT::f32: {
+ Opcode = PPC::STFSXTLS;
+ break;
+ }
+ case MVT::f64: {
+ Opcode = PPC::STFDXTLS;
+ break;
+ }
}
SDValue Chain = ST->getChain();
SDVTList VTs = ST->getVTList();
@@ -751,17 +834,13 @@ bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
SDValue Base = LD->getBasePtr();
- if (Base.getOpcode() != PPCISD::ADD_TLS)
- return false;
- SDValue Offset = LD->getOffset();
- if (!Offset.isUndef())
- return false;
- if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
+ if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
return false;
SDLoc dl(LD);
EVT MemVT = LD->getMemoryVT();
EVT RegVT = LD->getValueType(0);
+ bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
unsigned Opcode;
switch (MemVT.getSimpleVT().SimpleTy) {
default:
@@ -771,17 +850,31 @@ bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
break;
}
case MVT::i16: {
- Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
+ if (RegVT == MVT::i32)
+ Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
+ else
+ Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
break;
}
case MVT::i32: {
- Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
+ if (RegVT == MVT::i32)
+ Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
+ else
+ Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
break;
}
case MVT::i64: {
Opcode = PPC::LDXTLS;
break;
}
+ case MVT::f32: {
+ Opcode = PPC::LFSXTLS;
+ break;
+ }
+ case MVT::f64: {
+ Opcode = PPC::LFDXTLS;
+ break;
+ }
}
SDValue Chain = LD->getChain();
SDVTList VTs = LD->getVTList();
@@ -926,8 +1019,8 @@ static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
// zeros and return the number of bits by the left of these consecutive zeros.
static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
- unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm));
- unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm));
+ unsigned HiTZ = llvm::countr_zero<uint32_t>(Hi_32(Imm));
+ unsigned LoLZ = llvm::countl_zero<uint32_t>(Lo_32(Imm));
if ((HiTZ + LoLZ) >= Num)
return (32 + HiTZ);
return 0;
@@ -936,10 +1029,10 @@ static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
// Direct materialization of 64-bit constants by enumerated patterns.
static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
uint64_t Imm, unsigned &InstCnt) {
- unsigned TZ = countTrailingZeros<uint64_t>(Imm);
- unsigned LZ = countLeadingZeros<uint64_t>(Imm);
- unsigned TO = countTrailingOnes<uint64_t>(Imm);
- unsigned LO = countLeadingOnes<uint64_t>(Imm);
+ unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
+ unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
+ unsigned TO = llvm::countr_one<uint64_t>(Imm);
+ unsigned LO = llvm::countl_one<uint64_t>(Imm);
unsigned Hi32 = Hi_32(Imm);
unsigned Lo32 = Lo_32(Imm);
SDNode *Result = nullptr;
@@ -967,7 +1060,7 @@ static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
InstCnt = 2;
assert(LZ < 64 && "Unexpected leading zeros here.");
// Count of ones follwing the leading zeros.
- unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);
+ unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);
// 2-1) Patterns : {zeros}{31-bit value}
// {ones}{31-bit value}
if (isInt<32>(Imm)) {
@@ -1165,10 +1258,10 @@ static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
// were selected.
static SDNode *selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl,
uint64_t Imm, unsigned &InstCnt) {
- unsigned TZ = countTrailingZeros<uint64_t>(Imm);
- unsigned LZ = countLeadingZeros<uint64_t>(Imm);
- unsigned TO = countTrailingOnes<uint64_t>(Imm);
- unsigned FO = countLeadingOnes<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
+ unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
+ unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
+ unsigned TO = llvm::countr_one<uint64_t>(Imm);
+ unsigned FO = llvm::countl_one<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
unsigned Hi32 = Hi_32(Imm);
unsigned Lo32 = Lo_32(Imm);
@@ -1319,18 +1412,68 @@ static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
auto getI32Imm = [CurDAG, dl](unsigned Imm) {
return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
};
+
+ uint32_t Hi16OfLo32 = (Lo_32(Imm) >> 16) & 0xffff;
+ uint32_t Lo16OfLo32 = Lo_32(Imm) & 0xffff;
+
+ // Try to use 4 instructions to materialize the immediate which is "almost" a
+ // splat of a 32 bit immediate.
+ if (Hi16OfLo32 && Lo16OfLo32) {
+ uint32_t Hi16OfHi32 = (Hi_32(Imm) >> 16) & 0xffff;
+ uint32_t Lo16OfHi32 = Hi_32(Imm) & 0xffff;
+ bool IsSelected = false;
+
+ auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) {
+ SDNode *Result =
+ CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16));
+ Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
+ SDValue(Result, 0), getI32Imm(Lo16));
+ SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
+ getI32Imm(0)};
+ return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
+ };
+
+ if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) {
+ IsSelected = true;
+ Result = getSplat(Hi16OfLo32, Lo16OfLo32);
+ // Modify Hi16OfHi32.
+ SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(48),
+ getI32Imm(0)};
+ Result = CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
+ } else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
+ IsSelected = true;
+ Result = getSplat(Hi16OfHi32, Lo16OfHi32);
+ // Modify Lo16OfLo32.
+ SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
+ getI32Imm(16), getI32Imm(31)};
+ Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
+ } else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
+ IsSelected = true;
+ Result = getSplat(Hi16OfHi32, Lo16OfHi32);
+ // Modify Hi16OfLo32.
+ SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
+ getI32Imm(0), getI32Imm(15)};
+ Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
+ }
+ if (IsSelected == true) {
+ if (InstCnt)
+ *InstCnt = 4;
+ return Result;
+ }
+ }
+
// Handle the upper 32 bit value.
Result =
selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
// Add in the last bits as required.
- if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) {
+ if (Hi16OfLo32) {
Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
- SDValue(Result, 0), getI32Imm(Hi16));
+ SDValue(Result, 0), getI32Imm(Hi16OfLo32));
++InstCntDirect;
}
- if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) {
+ if (Lo16OfLo32) {
Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
- getI32Imm(Lo16));
+ getI32Imm(Lo16OfLo32));
++InstCntDirect;
}
if (InstCnt)
@@ -2796,9 +2939,6 @@ public:
}
};
-static bool isLogicOp(unsigned Opc) {
- return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
-}
// The obvious case for wanting to keep the value in a GPR. Namely, the
// result of the comparison is actually needed in a GPR.
SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
@@ -2808,7 +2948,7 @@ SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
SDValue WideRes;
// If we are zero-extending the result of a logical operation on i1
// values, we can keep the values in GPRs.
- if (isLogicOp(N->getOperand(0).getOpcode()) &&
+ if (ISD::isBitwiseLogicOp(N->getOperand(0).getOpcode()) &&
N->getOperand(0).getValueType() == MVT::i1 &&
N->getOpcode() == ISD::ZERO_EXTEND)
WideRes = computeLogicOpInGPR(N->getOperand(0));
@@ -2844,7 +2984,7 @@ SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
if (N->getValueType(0) != MVT::i1)
return nullptr;
- assert(isLogicOp(N->getOpcode()) &&
+ assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
"Expected a logic operation on setcc results.");
SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
if (!LoweredLogical)
@@ -2924,7 +3064,7 @@ SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
// There is also a special case that is handled (namely a complement operation
// achieved with xor %a, -1).
SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
- assert(isLogicOp(LogicOp.getOpcode()) &&
+ assert(ISD::isBitwiseLogicOp(LogicOp.getOpcode()) &&
"Can only handle logic operations here.");
assert(LogicOp.getValueType() == MVT::i1 &&
"Can only handle logic operations on i1 values here.");
@@ -2949,7 +3089,7 @@ SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
PPC::RLDICL, dl, InVT, InputOp,
S->getI64Imm(0, dl),
S->getI64Imm(63, dl)), 0);
- } else if (isLogicOp(OperandOpcode))
+ } else if (ISD::isBitwiseLogicOp(OperandOpcode))
return computeLogicOpInGPR(Operand);
return SDValue();
};
@@ -3838,7 +3978,7 @@ static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
CompareUse->getOpcode() != ISD::SELECT &&
- !isLogicOp(CompareUse->getOpcode())) {
+ !ISD::isBitwiseLogicOp(CompareUse->getOpcode())) {
OmittedForNonExtendUses++;
return false;
}
@@ -3947,7 +4087,7 @@ bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
if (SRLConst && SRLConst->getSExtValue() == 16)
return false;
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case ISD::ROTL:
case ISD::SHL:
case ISD::AND:
@@ -4457,9 +4597,9 @@ bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
// Force the ccreg into CR7.
SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
- SDValue InFlag; // Null incoming flag value.
+ SDValue InGlue; // Null incoming flag value.
CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
- InFlag).getValue(1);
+ InGlue).getValue(1);
IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
CCReg), 0);
@@ -4872,7 +5012,7 @@ bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
// wrapped run of ones, i.e.
// Change pattern |0001111100000011111111|
// to |1111111100000011111111|.
- unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
+ unsigned NumOfLeadingZeros = llvm::countl_zero(Imm64);
if (NumOfLeadingZeros != 0)
Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
@@ -4952,7 +5092,7 @@ bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
return false;
// If this is a 64-bit zero-extension mask, emit rldicl.
- unsigned MB = 64 - countTrailingOnes(Imm64);
+ unsigned MB = 64 - llvm::countr_one(Imm64);
unsigned SH = 0;
unsigned Imm;
SDValue Val = N->getOperand(0);
@@ -5002,7 +5142,7 @@ bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
// If this is a negated 64-bit zero-extension mask,
// i.e. the immediate is a sequence of ones from most significant side
// and all zero for reminder, we should use rldicr.
- unsigned MB = 63 - countTrailingOnes(~Imm64);
+ unsigned MB = 63 - llvm::countr_one(~Imm64);
unsigned SH = 0;
SDLoc dl(N);
SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
@@ -5321,9 +5461,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
return;
case PPCISD::MFOCRF: {
- SDValue InFlag = N->getOperand(1);
+ SDValue InGlue = N->getOperand(1);
ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
- N->getOperand(0), InFlag));
+ N->getOperand(0), InGlue));
return;
}
@@ -5358,9 +5498,10 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
case ISD::STORE: {
- // Change TLS initial-exec D-form stores to X-form stores.
+ // Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to
+ // X-form stores.
StoreSDNode *ST = cast<StoreSDNode>(N);
- if (EnableTLSOpt && Subtarget->isELFv2ABI() &&
+ if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) &&
ST->getAddressingMode() != ISD::PRE_INC)
if (tryTLSXFormStore(ST))
return;
@@ -5373,8 +5514,9 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// Normal loads are handled by code generated from the .td file.
if (LD->getAddressingMode() != ISD::PRE_INC) {
- // Change TLS initial-exec D-form loads to X-form loads.
- if (EnableTLSOpt && Subtarget->isELFv2ABI())
+ // Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to
+ // X-form loads.
+ if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()))
if (tryTLSXFormLoad(LD))
return;
break;
@@ -5582,7 +5724,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
// If the multiplier fits int16, we can handle it with mulli.
int64_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();
- unsigned Shift = countTrailingZeros<uint64_t>(Imm);
+ unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
if (isInt<16>(Imm) || !Shift)
break;
@@ -5675,21 +5817,18 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
}
// Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
- if (!isPPC64)
- if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
- if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
- if (N1C->isZero() && N3C->isZero() && N2C->getZExtValue() == 1ULL &&
- CC == ISD::SETNE &&
- // FIXME: Implement this optzn for PPC64.
- N->getValueType(0) == MVT::i32) {
- SDNode *Tmp =
- CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
- N->getOperand(0), getI32Imm(~0U, dl));
- CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
- N->getOperand(0), SDValue(Tmp, 1));
- return;
- }
+ if (!isPPC64 && isNullConstant(N->getOperand(1)) &&
+ isOneConstant(N->getOperand(2)) && isNullConstant(N->getOperand(3)) &&
+ CC == ISD::SETNE &&
+ // FIXME: Implement this optzn for PPC64.
+ N->getValueType(0) == MVT::i32) {
+ SDNode *Tmp =
+ CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+ N->getOperand(0), getI32Imm(~0U, dl));
+ CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
+ N->getOperand(0), SDValue(Tmp, 1));
+ return;
+ }
SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
@@ -7494,6 +7633,20 @@ void PPCDAGToDAGISel::PeepholePPC64() {
case PPC::ADDItocL:
Flags = PPCII::MO_TOC_LO;
break;
+ case PPC::ADDItoc:
+ case PPC::ADDItoc8:
+ if (RequiresMod4Offset) {
+ if (GlobalAddressSDNode *GA =
+ dyn_cast<GlobalAddressSDNode>(Base.getOperand(0))) {
+ const GlobalValue *GV = GA->getGlobal();
+ Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
+ // XMC_TD global that is underaligned being accessed with a DS form
+ // instruction.
+ if (Alignment < 4)
+ continue;
+ }
+ }
+ break;
}
SDValue ImmOpnd = Base.getOperand(1);
@@ -7588,12 +7741,27 @@ void PPCDAGToDAGISel::PeepholePPC64() {
}
}
+ const unsigned BaseOpcode = Base.getMachineOpcode();
+ // ADDItoc and ADDItoc8 are pseudos used exclusively by AIX small code
+ // model when a global is defined in the TOC.
+ const bool OpcodeIsAIXTocData =
+ BaseOpcode == PPC::ADDItoc || BaseOpcode == PPC::ADDItoc8;
+
if (FirstOp == 1) // Store
- (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
- Base.getOperand(0), N->getOperand(3));
+ if (OpcodeIsAIXTocData)
+ (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0),
+ Base.getOperand(0), Base.getOperand(1),
+ N->getOperand(3));
+ else
+ (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
+ Base.getOperand(0), N->getOperand(3));
else // Load
- (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
- N->getOperand(2));
+ if (OpcodeIsAIXTocData)
+ (void)CurDAG->UpdateNodeOperands(N, Base.getOperand(0),
+ Base.getOperand(1), N->getOperand(2));
+ else
+ (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
+ N->getOperand(2));
if (UpdateHBase)
(void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 77630cf027fa..3ed0a261eb76 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -24,6 +24,7 @@
#include "PPCTargetMachine.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
@@ -46,6 +47,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -85,7 +87,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/KnownBits.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -121,11 +122,6 @@ cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
cl::desc("use absolute jump tables on ppc"), cl::Hidden);
-static cl::opt<bool> EnableQuadwordAtomics(
- "ppc-quadword-atomics",
- cl::desc("enable quadword lock-free atomic operations"), cl::init(false),
- cl::Hidden);
-
static cl::opt<bool>
DisablePerfectShuffle("ppc-disable-perfect-shuffle",
cl::desc("disable vector permute decomposition"),
@@ -849,6 +845,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FCEIL, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FLDEXP, VT, Expand);
setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
@@ -1185,6 +1182,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
+ // Test data class instructions store results in CR bits.
+ if (Subtarget.useCRBits()) {
+ setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
+ setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
+ setOperationAction(ISD::IS_FPCLASS, MVT::f128, Custom);
+ }
+
// 128 bit shifts can be accomplished via 3 instructions for SHL and
// SRL, but not for SRA because of the instructions available:
// VS{RL} and VS{RL}O.
@@ -1299,6 +1303,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
+
+ setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
+ setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
+ setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
+ setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
}
if (Subtarget.hasP10Vector()) {
@@ -1357,18 +1366,18 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLibcallName(RTLIB::MULO_I64, nullptr);
}
- if (!isPPC64)
- setMaxAtomicSizeInBitsSupported(32);
- else if (shouldInlineQuadwordAtomics())
+ if (shouldInlineQuadwordAtomics())
setMaxAtomicSizeInBitsSupported(128);
- else
+ else if (isPPC64)
setMaxAtomicSizeInBitsSupported(64);
+ else
+ setMaxAtomicSizeInBitsSupported(32);
setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
// We have target-specific dag combine patterns for the following nodes:
- setTargetDAGCombine({ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL, ISD::MUL,
- ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
+ setTargetDAGCombine({ISD::AND, ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL,
+ ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR});
if (Subtarget.hasFPCVT())
setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
@@ -1385,10 +1394,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine({ISD::TRUNCATE, ISD::SETCC, ISD::SELECT_CC});
}
- if (Subtarget.hasP9Altivec()) {
- setTargetDAGCombine({ISD::ABS, ISD::VSELECT});
- }
-
setLibcallName(RTLIB::LOG_F128, "logf128");
setLibcallName(RTLIB::LOG2_F128, "log2f128");
setLibcallName(RTLIB::LOG10_F128, "log10f128");
@@ -1413,6 +1418,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
setLibcallName(RTLIB::FMA_F128, "fmaf128");
+ if (Subtarget.isAIXABI()) {
+ setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
+ setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
+ setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
+ setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
+ }
+
// With 32 condition bits, we don't need to sink (and duplicate) compares
// aggressively in CodeGenPrep.
if (Subtarget.useCRBits()) {
@@ -1627,10 +1639,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
- case PPCISD::FP_TO_UINT_IN_VSR:
- return "PPCISD::FP_TO_UINT_IN_VSR,";
- case PPCISD::FP_TO_SINT_IN_VSR:
- return "PPCISD::FP_TO_SINT_IN_VSR";
case PPCISD::FRE: return "PPCISD::FRE";
case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
case PPCISD::FTSQRT:
@@ -1679,7 +1687,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
return "PPCISD::BCTRL_RM";
case PPCISD::BCTRL_LOAD_TOC_RM:
return "PPCISD::BCTRL_LOAD_TOC_RM";
- case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
+ case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
@@ -1726,6 +1734,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
+ case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
@@ -1743,7 +1752,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::RFEBB: return "PPCISD::RFEBB";
case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
- case PPCISD::VABSD: return "PPCISD::VABSD";
case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
@@ -2550,7 +2558,7 @@ SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
Value = CN->getZExtValue();
} else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
- Value = FloatToBits(CN->getValueAPF().convertToFloat());
+ Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
}
// If the splat value is larger than the element value, then we can never do
@@ -3315,9 +3323,37 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy(DAG.getDataLayout());
+ bool Is64Bit = Subtarget.isPPC64();
+ TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
- // The general-dynamic model is the only access model supported for now, so
- // all the GlobalTLSAddress nodes are lowered with this model.
+ if (Model == TLSModel::LocalExec) {
+ SDValue VariableOffsetTGA =
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
+ SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
+ SDValue TLSReg;
+ if (Is64Bit)
+ // For local-exec on AIX (64-bit), the sequence that is generated involves
+ // a load of the variable offset (from the TOC), followed by an add of the
+ // loaded variable offset to R13 (the thread pointer).
+ // This code sequence looks like:
+ // ld reg1,var[TC](2)
+ // add reg2, reg1, r13 // r13 contains the thread pointer
+ TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
+ else
+ // For local-exec on AIX (32-bit), the sequence that is generated involves
+ // loading the variable offset from the TOC, generating a call to
+ // .__get_tpointer to get the thread pointer (which will be in R3), and
+ // adding the two together:
+ // lwz reg1,var[TC](2)
+ // bla .__get_tpointer
+ // add reg2, reg1, r3
+ TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
+ }
+
+ // The Local-Exec and General-Dynamic TLS models are currently the only
+ // supported access models. If Local-exec is not possible or specified, all
+ // GlobalTLSAddress nodes are lowered using the general-dynamic model.
// We need to generate two TOC entries, one for the variable offset, one for
// the region handle. The global address for the TOC entry of the region
// handle is created with the MO_TLSGDM_FLAG flag and the global address
@@ -4167,12 +4203,12 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
- CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
+ CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
// Area that is at least reserved in the caller of this function.
- unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
+ unsigned MinReservedArea = CCByValInfo.getStackSize();
MinReservedArea = std::max(MinReservedArea, LinkageSize);
// Set the size that is at least reserved in caller of this function. Tail
@@ -4210,9 +4246,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
- FuncInfo->setVarArgsStackOffset(
- MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
- CCInfo.getNextStackOffset(), true));
+ FuncInfo->setVarArgsStackOffset(MFI.CreateFixedObject(
+ PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
FuncInfo->setVarArgsFrameIndex(
MFI.CreateStackObject(Depth, Align(8), false));
@@ -4672,9 +4707,10 @@ static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
return SPDiff;
}
-static bool isFunctionGlobalAddress(SDValue Callee);
+static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
-static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
+static bool callsShareTOCBase(const Function *Caller,
+ const GlobalValue *CalleeGV,
const TargetMachine &TM) {
// It does not make sense to call callsShareTOCBase() with a caller that
// is PC Relative since PC Relative callers do not have a TOC.
@@ -4688,23 +4724,20 @@ static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
// don't have enough information to determine if the caller and callee share
// the same TOC base, so we have to pessimistically assume they don't for
// correctness.
- GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
- if (!G)
+ if (!CalleeGV)
return false;
- const GlobalValue *GV = G->getGlobal();
-
// If the callee is preemptable, then the static linker will use a plt-stub
// which saves the toc to the stack, and needs a nop after the call
// instruction to convert to a toc-restore.
- if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
+ if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), CalleeGV))
return false;
// Functions with PC Relative enabled may clobber the TOC in the same DSO.
// We may need a TOC restore in the situation where the caller requires a
// valid TOC but the callee is PC Relative and does not.
- const Function *F = dyn_cast<Function>(GV);
- const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
+ const Function *F = dyn_cast<Function>(CalleeGV);
+ const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
// If we have an Alias we can try to get the function from there.
if (Alias) {
@@ -4729,7 +4762,7 @@ static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
// replaced by another function at link time. The function that replaces
// it may not share the same TOC as the caller since the callee may be
// replaced by a PC Relative version of the same function.
- if (!GV->isStrongDefinitionForLinker())
+ if (!CalleeGV->isStrongDefinitionForLinker())
return false;
// The medium and large code models are expected to provide a sufficiently
@@ -4742,10 +4775,10 @@ static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
// Any explicitly-specified sections and section prefixes must also match.
// Also, if we're using -ffunction-sections, then each function is always in
// a different section (the same is true for COMDAT functions).
- if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
- GV->getSection() != Caller->getSection())
+ if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
+ Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
return false;
- if (const auto *F = dyn_cast<Function>(GV)) {
+ if (const auto *F = dyn_cast<Function>(CalleeGV)) {
if (F->getSectionPrefix() != Caller->getSectionPrefix())
return false;
}
@@ -4838,9 +4871,11 @@ areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC,
}
bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
- SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
+ const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
+ CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
+ const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
+ bool isCalleeExternalSymbol) const {
bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
if (DisableSCO && !TailCallOpt) return false;
@@ -4848,9 +4883,8 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
// Variadic argument functions are not supported.
if (isVarArg) return false;
- auto &Caller = DAG.getMachineFunction().getFunction();
// Check that the calling conventions are compatible for tco.
- if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
+ if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
return false;
// Caller contains any byval parameter is not supported.
@@ -4878,8 +4912,7 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
// If callee and caller use different calling conventions, we cannot pass
// parameters on stack since offsets for the parameter area may be different.
- if (Caller.getCallingConv() != CalleeCC &&
- needStackSlotPassParameters(Subtarget, Outs))
+ if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
return false;
// All variants of 64-bit ELF ABIs without PC-Relative addressing require that
@@ -4892,12 +4925,12 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
// applicable so this check is not required.
// Check first for indirect calls.
if (!Subtarget.isUsingPCRelativeCalls() &&
- !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
+ !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
return false;
// Check if we share the TOC base.
if (!Subtarget.isUsingPCRelativeCalls() &&
- !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
+ !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
return false;
// TCO allows altering callee ABI, so we don't have to check further.
@@ -4912,7 +4945,7 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
// PC Relative tail calls may not have a CallBase.
// If there is no CallBase we cannot verify if we have the same argument
// list so assume that we don't have the same argument list.
- if (CB && !hasSameArgumentList(&Caller, *CB) &&
+ if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
needStackSlotPassParameters(Subtarget, Outs))
return false;
else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
@@ -4924,12 +4957,10 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
-bool
-PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
- CallingConv::ID CalleeCC,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const {
+bool PPCTargetLowering::IsEligibleForTailCallOptimization(
+ const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
+ CallingConv::ID CallerCC, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins) const {
if (!getTargetMachine().Options.GuaranteedTailCallOpt)
return false;
@@ -4937,14 +4968,10 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
if (isVarArg)
return false;
- MachineFunction &MF = DAG.getMachineFunction();
- CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
// Functions containing by val parameters are not supported.
- for (unsigned i = 0; i != Ins.size(); i++) {
- ISD::ArgFlagsTy Flags = Ins[i].Flags;
- if (Flags.isByVal()) return false;
- }
+ if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
+ return false;
// Non-PIC/GOT tail calls are supported.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
@@ -4952,9 +4979,9 @@ PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// At the moment we can only do local tail calls (in same module, hidden
// or protected) if we are generating PIC.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- return G->getGlobal()->hasHiddenVisibility()
- || G->getGlobal()->hasProtectedVisibility();
+ if (CalleeGV)
+ return CalleeGV->hasHiddenVisibility() ||
+ CalleeGV->hasProtectedVisibility();
}
return false;
@@ -5104,7 +5131,7 @@ static void LowerMemOpCallTo(
}
static void
-PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
+PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain,
const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
SDValue FPOp,
SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
@@ -5112,7 +5139,7 @@ PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
// might overwrite each other in case of tail call optimization.
SmallVector<SDValue, 8> MemOpChains2;
// Do not flag preceding copytoreg stuff together with the following stuff.
- InFlag = SDValue();
+ InGlue = SDValue();
StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
MemOpChains2, dl);
if (!MemOpChains2.empty())
@@ -5122,26 +5149,25 @@ PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
// Emit callseq_end just before tailcall node.
- Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InFlag, dl);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
+ InGlue = Chain.getValue(1);
}
// Is this global address that of a function that can be called by name? (as
// opposed to something that must hold a descriptor for an indirect call).
-static bool isFunctionGlobalAddress(SDValue Callee) {
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
- Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
+static bool isFunctionGlobalAddress(const GlobalValue *GV) {
+ if (GV) {
+ if (GV->isThreadLocal())
return false;
- return G->getGlobal()->getValueType()->isFunctionTy();
+ return GV->getValueType()->isFunctionTy();
}
return false;
}
SDValue PPCTargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
+ SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
SmallVector<CCValAssign, 16> RVLocs;
@@ -5162,22 +5188,22 @@ SDValue PPCTargetLowering::LowerCallResult(
if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
- InFlag);
+ InGlue);
Chain = Lo.getValue(1);
- InFlag = Lo.getValue(2);
+ InGlue = Lo.getValue(2);
VA = RVLocs[++i]; // skip ahead to next loc
SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
- InFlag);
+ InGlue);
Chain = Hi.getValue(1);
- InFlag = Hi.getValue(2);
+ InGlue = Hi.getValue(2);
if (!Subtarget.isLittleEndian())
std::swap (Lo, Hi);
Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
} else {
Val = DAG.getCopyFromReg(Chain, dl,
- VA.getLocReg(), VA.getLocVT(), InFlag);
+ VA.getLocReg(), VA.getLocVT(), InGlue);
Chain = Val.getValue(1);
- InFlag = Val.getValue(2);
+ InGlue = Val.getValue(2);
}
switch (VA.getLocInfo()) {
@@ -5206,11 +5232,14 @@ SDValue PPCTargetLowering::LowerCallResult(
static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
const PPCSubtarget &Subtarget, bool isPatchPoint) {
+ auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ const GlobalValue *GV = G ? G->getGlobal() : nullptr;
+
// PatchPoint calls are not indirect.
if (isPatchPoint)
return false;
- if (isFunctionGlobalAddress(Callee) || isa<ExternalSymbolSDNode>(Callee))
+ if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
return false;
// Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
@@ -5255,7 +5284,7 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
} else if (Subtarget.isUsingPCRelativeCalls()) {
assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
RetOpc = PPCISD::CALL_NOTOC;
- } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
+ } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
// The ABIs that maintain a TOC pointer accross calls need to have a nop
// immediately following the call instruction if the caller and callee may
// have different TOC bases. At link time if the linker determines the calls
@@ -5264,9 +5293,11 @@ static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags,
// TOC pointer at an ABI designated offset in the linkage area and the
// linker will rewrite the nop to be a load of the TOC pointer from the
// linkage area into gpr2.
- RetOpc = callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
- : PPCISD::CALL_NOP;
- else
+ auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ const GlobalValue *GV = G ? G->getGlobal() : nullptr;
+ RetOpc =
+ callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
+ } else
RetOpc = PPCISD::CALL;
if (IsStrictFPCall) {
switch (RetOpc) {
@@ -5326,7 +5357,9 @@ static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
return DAG.getMCSymbol(S, PtrVT);
};
- if (isFunctionGlobalAddress(Callee)) {
+ auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ const GlobalValue *GV = G ? G->getGlobal() : nullptr;
+ if (isFunctionGlobalAddress(GV)) {
const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
if (Subtarget.isAIXABI()) {
@@ -5617,7 +5650,9 @@ SDValue PPCTargetLowering::FinishCall(
assert(CallOpc == PPCISD::TC_RETURN &&
"Unexpected call opcode for a tail call.");
DAG.getMachineFunction().getFrameInfo().setHasTailCall();
- return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
+ SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
+ DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
+ return Ret;
}
std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
@@ -5640,6 +5675,45 @@ SDValue PPCTargetLowering::FinishCall(
DAG, InVals);
}
+bool PPCTargetLowering::supportsTailCallFor(const CallBase *CB) const {
+ CallingConv::ID CalleeCC = CB->getCallingConv();
+ const Function *CallerFunc = CB->getCaller();
+ CallingConv::ID CallerCC = CallerFunc->getCallingConv();
+ const Function *CalleeFunc = CB->getCalledFunction();
+ if (!CalleeFunc)
+ return false;
+ const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
+
+ SmallVector<ISD::OutputArg, 2> Outs;
+ SmallVector<ISD::InputArg, 2> Ins;
+
+ GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
+ CalleeFunc->getAttributes(), Outs, *this,
+ CalleeFunc->getParent()->getDataLayout());
+
+ return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
+ CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
+ false /*isCalleeExternalSymbol*/);
+}
+
+bool PPCTargetLowering::isEligibleForTCO(
+ const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
+ CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
+ bool isCalleeExternalSymbol) const {
+ if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
+ return false;
+
+ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
+ return IsEligibleForTailCallOptimization_64SVR4(
+ CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
+ isCalleeExternalSymbol);
+ else
+ return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
+ isVarArg, Ins);
+}
+
SDValue
PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
@@ -5657,14 +5731,15 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
const CallBase *CB = CLI.CB;
if (isTailCall) {
- if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
- isTailCall = false;
- else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
- isTailCall = IsEligibleForTailCallOptimization_64SVR4(
- Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
- else
- isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
- Ins, DAG);
+ MachineFunction &MF = DAG.getMachineFunction();
+ CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
+ auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
+ const GlobalValue *GV = G ? G->getGlobal() : nullptr;
+ bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
+
+ isTailCall =
+ isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
+ &(MF.getFunction()), IsCalleeExternalSymbol);
if (isTailCall) {
++NumTailCalls;
if (!getTargetMachine().Options.GuaranteedTailCallOpt)
@@ -5784,7 +5859,7 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
if (Result) {
#ifndef NDEBUG
errs() << "Call operand #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString() << "\n";
+ << ArgVT << "\n";
#endif
llvm_unreachable(nullptr);
}
@@ -5800,14 +5875,14 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
// Reserve stack space for the allocations in CCInfo.
- CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
+ CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
// Size of the linkage area, parameter list area and the part of the local
// space variable where copies of aggregates which are passed by value are
// stored.
- unsigned NumBytes = CCByValInfo.getNextStackOffset();
+ unsigned NumBytes = CCByValInfo.getStackSize();
// Calculate by how many bytes the stack has to be adjusted in case of tail
// call optimization.
@@ -5927,30 +6002,30 @@ SDValue PPCTargetLowering::LowerCall_32SVR4(
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
- SDValue InFlag;
+ SDValue InGlue;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
+ RegsToPass[i].second, InGlue);
+ InGlue = Chain.getValue(1);
}
// Set CR bit 6 to true if this is a vararg call with floating args passed in
// registers.
if (IsVarArg) {
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue Ops[] = { Chain, InFlag };
+ SDValue Ops[] = { Chain, InGlue };
Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
- VTs, ArrayRef(Ops, InFlag.getNode() ? 2 : 1));
+ VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
}
if (IsTailCall)
- PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
+ PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
TailCallArguments);
- return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+ return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
@@ -6541,18 +6616,18 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
- SDValue InFlag;
+ SDValue InGlue;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
+ RegsToPass[i].second, InGlue);
+ InGlue = Chain.getValue(1);
}
if (CFlags.IsTailCall && !IsSibCall)
- PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
+ PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
TailCallArguments);
- return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+ return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
@@ -6628,8 +6703,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
// but needs a MemLoc for a stack slot for the formal arguments side.
if (ByValSize == 0) {
State.addLoc(CCValAssign::getMem(ValNo, MVT::INVALID_SIMPLE_VALUE_TYPE,
- State.getNextStackOffset(), RegVT,
- LocInfo));
+ State.getStackSize(), RegVT, LocInfo));
return false;
}
@@ -7172,8 +7246,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
// On AIX a minimum of 8 words is saved to the parameter save area.
const unsigned MinParameterSaveArea = 8 * PtrByteSize;
// Area that is at least reserved in the caller of this function.
- unsigned CallerReservedArea =
- std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
+ unsigned CallerReservedArea = std::max<unsigned>(
+ CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
// Set the size that is at least reserved in caller of this function. Tail
// call optimized function's reserved stack space needs to be aligned so
@@ -7185,7 +7259,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
if (isVarArg) {
FuncInfo->setVarArgsFrameIndex(
- MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
+ MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
@@ -7199,7 +7273,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
// VarArgsFrameIndex on the stack so that they may be loaded by
// dereferencing the result of va_next.
for (unsigned GPRIndex =
- (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
+ (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
GPRIndex < NumGPArgRegs; ++GPRIndex) {
const Register VReg =
@@ -7265,8 +7339,8 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
- const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
- CCInfo.getNextStackOffset());
+ const unsigned NumBytes = std::max<unsigned>(
+ LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass.
@@ -7351,7 +7425,7 @@ SDValue PPCTargetLowering::LowerCall_AIX(
"Unexpected register residue for by-value argument.");
SDValue ResidueVal;
for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
- const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
+ const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
const MVT VT =
N == 1 ? MVT::i8
: ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
@@ -7532,14 +7606,14 @@ SDValue PPCTargetLowering::LowerCall_AIX(
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
- SDValue InFlag;
+ SDValue InGlue;
for (auto Reg : RegsToPass) {
- Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
+ InGlue = Chain.getValue(1);
}
const int SPDiff = 0;
- return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
+ return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
Callee, SPDiff, NumBytes, Ins, InVals, CB);
}
@@ -7570,7 +7644,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
? RetCC_PPC_Cold
: RetCC_PPC);
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
@@ -7599,26 +7673,26 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
SDValue SVal =
DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
VA = RVLocs[++i]; // skip ahead to next loc
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
} else
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
RetOps[0] = Chain; // Update chain.
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ // Add the glue if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
- return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
+ return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
}
SDValue
@@ -7843,15 +7917,15 @@ SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
EVT EltVT = TrgVT.getVectorElementType();
if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
- !isPowerOf2_32(EltVT.getSizeInBits()))
+ !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
return SDValue();
SDValue N1 = Op.getOperand(0);
EVT SrcVT = N1.getValueType();
unsigned SrcSize = SrcVT.getSizeInBits();
- if (SrcSize > 256 ||
- !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
- !isPowerOf2_32(SrcVT.getVectorElementType().getSizeInBits()))
+ if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
+ !llvm::has_single_bit<uint32_t>(
+ SrcVT.getVectorElementType().getSizeInBits()))
return SDValue();
if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
return SDValue();
@@ -8065,7 +8139,11 @@ static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
// For strict nodes, source is the second operand.
SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
- assert(Src.getValueType().isFloatingPoint());
+ MVT DestTy = Op.getSimpleValueType();
+ assert(Src.getValueType().isFloatingPoint() &&
+ (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
+ DestTy == MVT::i64) &&
+ "Invalid FP_TO_INT types");
if (Src.getValueType() == MVT::f32) {
if (IsStrict) {
Src =
@@ -8075,9 +8153,10 @@ static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
} else
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
}
- SDValue Conv;
+ if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
+ DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
unsigned Opc = ISD::DELETED_NODE;
- switch (Op.getSimpleValueType().SimpleTy) {
+ switch (DestTy.SimpleTy) {
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Opc = IsSigned ? PPCISD::FCTIWZ
@@ -8088,12 +8167,14 @@ static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG,
"i64 FP_TO_UINT is supported only with FPCVT");
Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
}
+ EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
+ SDValue Conv;
if (IsStrict) {
Opc = getPPCStrictOpcode(Opc);
- Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
- {Chain, Src}, Flags);
+ Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
+ Flags);
} else {
- Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
+ Conv = DAG.getNode(Opc, dl, ConvTy, Src);
}
return Conv;
}
@@ -8180,10 +8261,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
if (IsSigned) {
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
- DAG.getIntPtrConstant(0, dl));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
- DAG.getIntPtrConstant(1, dl));
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
// Add the two halves of the long double in round-to-zero mode, and use
// a smaller FP_TO_SINT.
@@ -9266,7 +9345,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
// Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
// Below cases should also happen for "lfiwzx/lfiwax + LE target + index
// 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
- // 15", but funciton IsValidSplatLoad() now will only return true when
+ // 15", but function IsValidSplatLoad() now will only return true when
// the data at index 0 is not nullptr. So we will not get into trouble for
// these cases.
//
@@ -10178,14 +10257,16 @@ SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
if (isLittleEndian)
std::swap(V1, V2);
- if (Subtarget.isISA3_0() && (V1->hasOneUse() || V2->hasOneUse())) {
+ if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
+ (V1->hasOneUse() || V2->hasOneUse())) {
LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
"XXPERM instead\n");
Opcode = PPCISD::XXPERM;
- // if V2 is dead, then we swap V1 and V2 so we can
- // use V2 as the destination instead.
- if (!V1->hasOneUse() && V2->hasOneUse()) {
+ // The second input to XXPERM is also an output so if the second input has
+ // multiple uses then copying is necessary, as a result we want the
+ // single-use operand to be used as the second input to prevent copying.
+ if (!V2->hasOneUse() && V1->hasOneUse()) {
std::swap(V1, V2);
NeedSwap = !NeedSwap;
}
@@ -10634,7 +10715,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
RetOps.push_back(Extract);
return DAG.getMergeValues(RetOps, dl);
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case Intrinsic::ppc_vsx_disassemble_pair: {
int NumVecs = 2;
@@ -10655,6 +10736,20 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getMergeValues(RetOps, dl);
}
+ case Intrinsic::ppc_mma_xxmfacc:
+ case Intrinsic::ppc_mma_xxmtacc: {
+ // Allow pre-isa-future subtargets to lower as normal.
+ if (!Subtarget.isISAFuture())
+ return SDValue();
+ // The intrinsics for xxmtacc and xxmfacc take one argument of
+ // type v512i1, for future cpu the corresponding wacc instruction
+ // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
+ // the need to produce the xxm[t|f]acc.
+ SDValue WideVec = Op.getOperand(1);
+ DAG.ReplaceAllUsesWith(Op, WideVec);
+ return SDValue();
+ }
+
case Intrinsic::ppc_unpack_longdouble: {
auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
@@ -10956,6 +11051,153 @@ SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
}
}
+static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl,
+ SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) {
+ assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
+
+ enum DataClassMask {
+ DC_NAN = 1 << 6,
+ DC_NEG_INF = 1 << 4,
+ DC_POS_INF = 1 << 5,
+ DC_NEG_ZERO = 1 << 2,
+ DC_POS_ZERO = 1 << 3,
+ DC_NEG_SUBNORM = 1,
+ DC_POS_SUBNORM = 1 << 1,
+ };
+
+ EVT VT = Op.getValueType();
+
+ unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
+ : VT == MVT::f64 ? PPC::XSTSTDCDP
+ : PPC::XSTSTDCSP;
+
+ if (Mask == fcAllFlags)
+ return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
+ if (Mask == 0)
+ return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
+
+ // When it's cheaper or necessary to test reverse flags.
+ if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
+ SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
+ return DAG.getNOT(Dl, Rev, MVT::i1);
+ }
+
+ // Power doesn't support testing whether a value is 'normal'. Test the rest
+ // first, and test if it's 'not not-normal' with expected sign.
+ if (Mask & fcNormal) {
+ SDValue Rev(DAG.getMachineNode(
+ TestOp, Dl, MVT::i32,
+ DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
+ DC_NEG_ZERO | DC_POS_ZERO |
+ DC_NEG_SUBNORM | DC_POS_SUBNORM,
+ Dl, MVT::i32),
+ Op),
+ 0);
+ // Sign are stored in CR bit 0, result are in CR bit 2.
+ SDValue Sign(
+ DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
+ DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
+ 0);
+ SDValue Normal(DAG.getNOT(
+ Dl,
+ SDValue(DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
+ DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
+ 0),
+ MVT::i1));
+ if (Mask & fcPosNormal)
+ Sign = DAG.getNOT(Dl, Sign, MVT::i1);
+ SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
+ if (Mask == fcPosNormal || Mask == fcNegNormal)
+ return Result;
+
+ return DAG.getNode(
+ ISD::OR, Dl, MVT::i1,
+ getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
+ }
+
+ // The instruction doesn't differentiate between signaling or quiet NaN. Test
+ // the rest first, and test if it 'is NaN and is signaling/quiet'.
+ if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
+ bool IsQuiet = Mask & fcQNan;
+ SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
+
+ // Quietness is determined by the first bit in fraction field.
+ uint64_t QuietMask = 0;
+ SDValue HighWord;
+ if (VT == MVT::f128) {
+ HighWord = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
+ DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
+ QuietMask = 0x8000;
+ } else if (VT == MVT::f64) {
+ if (Subtarget.isPPC64()) {
+ HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
+ DAG.getBitcast(MVT::i64, Op),
+ DAG.getConstant(1, Dl, MVT::i32));
+ } else {
+ SDValue Vec = DAG.getBitcast(
+ MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
+ HighWord = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
+ DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
+ }
+ QuietMask = 0x80000;
+ } else if (VT == MVT::f32) {
+ HighWord = DAG.getBitcast(MVT::i32, Op);
+ QuietMask = 0x400000;
+ }
+ SDValue NanRes = DAG.getSetCC(
+ Dl, MVT::i1,
+ DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
+ DAG.getConstant(QuietMask, Dl, MVT::i32)),
+ DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
+ NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
+ if (Mask == fcQNan || Mask == fcSNan)
+ return NanRes;
+
+ return DAG.getNode(ISD::OR, Dl, MVT::i1,
+ getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
+ NanRes);
+ }
+
+ unsigned NativeMask = 0;
+ if ((Mask & fcNan) == fcNan)
+ NativeMask |= DC_NAN;
+ if (Mask & fcNegInf)
+ NativeMask |= DC_NEG_INF;
+ if (Mask & fcPosInf)
+ NativeMask |= DC_POS_INF;
+ if (Mask & fcNegZero)
+ NativeMask |= DC_NEG_ZERO;
+ if (Mask & fcPosZero)
+ NativeMask |= DC_POS_ZERO;
+ if (Mask & fcNegSubnormal)
+ NativeMask |= DC_NEG_SUBNORM;
+ if (Mask & fcPosSubnormal)
+ NativeMask |= DC_POS_SUBNORM;
+ return SDValue(
+ DAG.getMachineNode(
+ TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
+ SDValue(DAG.getMachineNode(
+ TestOp, Dl, MVT::i32,
+ DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
+ 0),
+ DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
+ 0);
+}
+
+SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
+ SDValue LHS = Op.getOperand(0);
+ const auto *RHS = cast<ConstantSDNode>(Op.getOperand(1));
+ SDLoc Dl(Op);
+ FPClassTest Category = static_cast<FPClassTest>(RHS->getZExtValue());
+ return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
+}
+
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDLoc dl(Op);
@@ -11383,6 +11625,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
return LowerATOMIC_CMP_SWAP(Op, DAG);
case ISD::ATOMIC_STORE:
return LowerATOMIC_LOAD_STORE(Op, DAG);
+ case ISD::IS_FPCLASS:
+ return LowerIS_FPCLASS(Op, DAG);
}
}
@@ -11729,6 +11973,7 @@ MachineBasicBlock *PPCTargetLowering::EmitPartwordAtomicBinary(
BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
.addReg(MI.getOperand(3).getReg());
MI.getOperand(3).setReg(ValueReg);
+ incr = ValueReg;
}
// If we support part-word atomic mnemonics, just use them
if (Subtarget.hasPartwordAtomics())
@@ -14835,60 +15080,49 @@ SDValue PPCTargetLowering::expandVSXStoreForLE(SDNode *N,
// Handle DAG combine for STORE (FP_TO_INT F).
SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
DAGCombinerInfo &DCI) const {
-
SelectionDAG &DAG = DCI.DAG;
SDLoc dl(N);
unsigned Opcode = N->getOperand(1).getOpcode();
+ (void)Opcode;
+ bool Strict = N->getOperand(1)->isStrictFPOpcode();
- assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
+ assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
+ Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
&& "Not a FP_TO_INT Instruction!");
- SDValue Val = N->getOperand(1).getOperand(0);
+ SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
EVT Op1VT = N->getOperand(1).getValueType();
EVT ResVT = Val.getValueType();
- if (!isTypeLegal(ResVT))
+ if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
return SDValue();
// Only perform combine for conversion to i64/i32 or power9 i16/i8.
bool ValidTypeForStoreFltAsInt =
- (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
+ (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
(Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
- if (ResVT == MVT::f128 && !Subtarget.hasP9Vector())
+ // TODO: Lower conversion from f128 on all VSX targets
+ if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
return SDValue();
- if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
+ if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
return SDValue();
- // Extend f32 values to f64
- if (ResVT.getScalarSizeInBits() == 32) {
- Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
- DCI.AddToWorklist(Val.getNode());
- }
-
- // Set signed or unsigned conversion opcode.
- unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
- PPCISD::FP_TO_SINT_IN_VSR :
- PPCISD::FP_TO_UINT_IN_VSR;
-
- Val = DAG.getNode(ConvOpcode,
- dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
- DCI.AddToWorklist(Val.getNode());
+ Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
// Set number of bytes being converted.
unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
- SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
- DAG.getIntPtrConstant(ByteSize, dl, false),
- DAG.getValueType(Op1VT) };
+ SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
+ DAG.getIntPtrConstant(ByteSize, dl, false),
+ DAG.getValueType(Op1VT)};
Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
DAG.getVTList(MVT::Other), Ops,
cast<StoreSDNode>(N)->getMemoryVT(),
cast<StoreSDNode>(N)->getMemOperand());
- DCI.AddToWorklist(Val.getNode());
return Val;
}
@@ -15271,6 +15505,30 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::ADD:
return combineADD(N, DCI);
+ case ISD::AND: {
+ // We don't want (and (zext (shift...)), C) if C fits in the width of the
+ // original input as that will prevent us from selecting optimal rotates.
+ // This only matters if the input to the extend is i32 widened to i64.
+ SDValue Op1 = N->getOperand(0);
+ SDValue Op2 = N->getOperand(1);
+ if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
+ Op1.getOpcode() != ISD::ANY_EXTEND) ||
+ !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
+ Op1.getOperand(0).getValueType() != MVT::i32)
+ break;
+ SDValue NarrowOp = Op1.getOperand(0);
+ if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
+ NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
+ break;
+
+ uint64_t Imm = cast<ConstantSDNode>(Op2)->getZExtValue();
+ // Make sure that the constant is narrow enough to fit in the narrow type.
+ if (!isUInt<32>(Imm))
+ break;
+ SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
+ SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
+ return DAG.getAnyExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
+ }
case ISD::SHL:
return combineSHL(N, DCI);
case ISD::SRA:
@@ -15323,8 +15581,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
EVT Op1VT = N->getOperand(1).getValueType();
unsigned Opcode = N->getOperand(1).getOpcode();
- if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
- SDValue Val= combineStoreFPToInt(N, DCI);
+ if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
+ Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
+ SDValue Val = combineStoreFPToInt(N, DCI);
if (Val)
return Val;
}
@@ -15738,16 +15997,37 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
break;
case ISD::INTRINSIC_W_CHAIN:
- // For little endian, VSX loads require generating lxvd2x/xxswapd.
- // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
- if (Subtarget.needsSwapsForVSXMemOps()) {
- switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
- default:
- break;
- case Intrinsic::ppc_vsx_lxvw4x:
- case Intrinsic::ppc_vsx_lxvd2x:
- return expandVSXLoadForLE(N, DCI);
+ switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
+ default:
+ break;
+ case Intrinsic::ppc_altivec_vsum4sbs:
+ case Intrinsic::ppc_altivec_vsum4shs:
+ case Intrinsic::ppc_altivec_vsum4ubs: {
+ // These sum-across intrinsics only have a chain due to the side effect
+ // that they may set the SAT bit. If we know the SAT bit will not be set
+ // for some inputs, we can replace any uses of their chain with the input
+ // chain.
+ if (BuildVectorSDNode *BVN =
+ dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
+ APInt APSplatBits, APSplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+ bool BVNIsConstantSplat = BVN->isConstantSplat(
+ APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
+ !Subtarget.isLittleEndian());
+ // If the constant splat vector is 0, the SAT bit will not be set.
+ if (BVNIsConstantSplat && APSplatBits == 0)
+ DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
}
+ return SDValue();
+ }
+ case Intrinsic::ppc_vsx_lxvw4x:
+ case Intrinsic::ppc_vsx_lxvd2x:
+ // For little endian, VSX loads require generating lxvd2x/xxswapd.
+ // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
+ if (Subtarget.needsSwapsForVSXMemOps())
+ return expandVSXLoadForLE(N, DCI);
+ break;
}
break;
case ISD::INTRINSIC_VOID:
@@ -16008,10 +16288,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
case ISD::BUILD_VECTOR:
return DAGCombineBuildVector(N, DCI);
- case ISD::ABS:
- return combineABS(N, DCI);
- case ISD::VSELECT:
- return combineVSelect(N, DCI);
}
return SDValue();
@@ -16033,7 +16309,7 @@ PPCTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SDValue N0 = N->getOperand(0);
bool IsNegPow2 = Divisor.isNegatedPowerOf2();
- unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
+ unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
@@ -16929,7 +17205,7 @@ bool PPCTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
// 2. If the multiplier after shifted fits 16 bits, an extra shift
// instruction is needed than case 1, ie. MULLI and RLDICR
int64_t Imm = ConstNode->getSExtValue();
- unsigned Shift = countTrailingZeros<uint64_t>(Imm);
+ unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
Imm >>= Shift;
if (isInt<16>(Imm))
return false;
@@ -17437,24 +17713,6 @@ SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
SDLoc dl(N);
SDValue Op0 = N->getOperand(0);
- // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
- if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
- EVT VT = N->getValueType(0);
- if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
- return SDValue();
- SDValue Sub = Op0.getOperand(0);
- if (Sub.getOpcode() == ISD::SUB) {
- SDValue SubOp0 = Sub.getOperand(0);
- SDValue SubOp1 = Sub.getOperand(1);
- if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
- (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
- return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
- SubOp1.getOperand(0),
- DCI.DAG.getTargetConstant(0, dl, MVT::i32));
- }
- }
- }
-
// Looking for a truncate of i128 to i64.
if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
return SDValue();
@@ -17638,15 +17896,6 @@ bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
}
-bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
- if (!Subtarget.hasVSX())
- return false;
- if (Subtarget.hasP9Vector() && VT == MVT::f128)
- return true;
- return VT == MVT::f32 || VT == MVT::f64 ||
- VT == MVT::v4f32 || VT == MVT::v2f64;
-}
-
bool PPCTargetLowering::
isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
const Value *Mask = AndI.getOperand(1);
@@ -17664,112 +17913,6 @@ isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
return true;
}
-// Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
-// Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
-// Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
-// Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
-// Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
-SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
- assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
- assert(Subtarget.hasP9Altivec() &&
- "Only combine this when P9 altivec supported!");
- EVT VT = N->getValueType(0);
- if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
- return SDValue();
-
- SelectionDAG &DAG = DCI.DAG;
- SDLoc dl(N);
- if (N->getOperand(0).getOpcode() == ISD::SUB) {
- // Even for signed integers, if it's known to be positive (as signed
- // integer) due to zero-extended inputs.
- unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
- unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
- if ((SubOpcd0 == ISD::ZERO_EXTEND ||
- SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
- (SubOpcd1 == ISD::ZERO_EXTEND ||
- SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
- return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
- N->getOperand(0)->getOperand(0),
- N->getOperand(0)->getOperand(1),
- DAG.getTargetConstant(0, dl, MVT::i32));
- }
-
- // For type v4i32, it can be optimized with xvnegsp + vabsduw
- if (N->getOperand(0).getValueType() == MVT::v4i32 &&
- N->getOperand(0).hasOneUse()) {
- return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
- N->getOperand(0)->getOperand(0),
- N->getOperand(0)->getOperand(1),
- DAG.getTargetConstant(1, dl, MVT::i32));
- }
- }
-
- return SDValue();
-}
-
-// For type v4i32/v8ii16/v16i8, transform
-// from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
-// from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
-// from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
-// from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
-SDValue PPCTargetLowering::combineVSelect(SDNode *N,
- DAGCombinerInfo &DCI) const {
- assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
- assert(Subtarget.hasP9Altivec() &&
- "Only combine this when P9 altivec supported!");
-
- SelectionDAG &DAG = DCI.DAG;
- SDLoc dl(N);
- SDValue Cond = N->getOperand(0);
- SDValue TrueOpnd = N->getOperand(1);
- SDValue FalseOpnd = N->getOperand(2);
- EVT VT = N->getOperand(1).getValueType();
-
- if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
- FalseOpnd.getOpcode() != ISD::SUB)
- return SDValue();
-
- // ABSD only available for type v4i32/v8i16/v16i8
- if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
- return SDValue();
-
- // At least to save one more dependent computation
- if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
- return SDValue();
-
- ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
-
- // Can only handle unsigned comparison here
- switch (CC) {
- default:
- return SDValue();
- case ISD::SETUGT:
- case ISD::SETUGE:
- break;
- case ISD::SETULT:
- case ISD::SETULE:
- std::swap(TrueOpnd, FalseOpnd);
- break;
- }
-
- SDValue CmpOpnd1 = Cond.getOperand(0);
- SDValue CmpOpnd2 = Cond.getOperand(1);
-
- // SETCC CmpOpnd1 CmpOpnd2 cond
- // TrueOpnd = CmpOpnd1 - CmpOpnd2
- // FalseOpnd = CmpOpnd2 - CmpOpnd1
- if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
- TrueOpnd.getOperand(1) == CmpOpnd2 &&
- FalseOpnd.getOperand(0) == CmpOpnd2 &&
- FalseOpnd.getOperand(1) == CmpOpnd1) {
- return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
- CmpOpnd1, CmpOpnd2,
- DAG.getTargetConstant(0, dl, MVT::i32));
- }
-
- return SDValue();
-}
-
/// getAddrModeForFlags - Based on the set of address flags, select the most
/// optimal instruction format to match by.
PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
@@ -18220,7 +18363,7 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent,
if (Flags & PPC::MOF_RPlusSImm16) {
SDValue Op0 = N.getOperand(0);
SDValue Op1 = N.getOperand(1);
- int16_t Imm = cast<ConstantSDNode>(Op1)->getAPIntValue().getZExtValue();
+ int16_t Imm = cast<ConstantSDNode>(Op1)->getZExtValue();
if (!Align || isAligned(*Align, Imm)) {
Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
Base = Op0;
@@ -18324,18 +18467,14 @@ CCAssignFn *PPCTargetLowering::ccAssignFnForCall(CallingConv::ID CC,
bool IsVarArg) const {
switch (CC) {
case CallingConv::Cold:
- return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF_FIS);
+ return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
default:
- return CC_PPC64_ELF_FIS;
+ return CC_PPC64_ELF;
}
}
bool PPCTargetLowering::shouldInlineQuadwordAtomics() const {
- // TODO: 16-byte atomic type support for AIX is in progress; we should be able
- // to inline 16-byte atomic ops on AIX too in the future.
- return Subtarget.isPPC64() &&
- (EnableQuadwordAtomics || !Subtarget.getTargetTriple().isOSAIX()) &&
- Subtarget.hasQuadwordAtomics();
+ return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
}
TargetLowering::AtomicExpansionKind
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index b80479427c2e..e6ebc68008fb 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -28,7 +29,6 @@
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Type.h"
-#include "llvm/Support/MachineValueType.h"
#include <optional>
#include <utility>
@@ -78,10 +78,6 @@ namespace llvm {
FCTIDUZ,
FCTIWUZ,
- /// Floating-point-to-integer conversion instructions
- FP_TO_UINT_IN_VSR,
- FP_TO_SINT_IN_VSR,
-
/// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
/// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
VEXTS,
@@ -210,8 +206,8 @@ namespace llvm {
BCTRL_RM,
BCTRL_LOAD_TOC_RM,
- /// Return with a flag operand, matched by 'blr'
- RET_FLAG,
+ /// Return with a glue operand, matched by 'blr'
+ RET_GLUE,
/// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
/// This copies the bits corresponding to the specified CRREG into the
@@ -336,11 +332,11 @@ namespace llvm {
/// finds the offset of "sym" relative to the thread pointer.
LD_GOT_TPREL_L,
- /// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
- /// model, produces an ADD instruction that adds the contents of
- /// G8RReg to the thread pointer. Symbol contains a relocation
- /// sym\@tls which is to be replaced by the thread pointer and
- /// identifies to the linker that the instruction is part of a
+ /// G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec
+ /// and local-exec TLS models, produces an ADD instruction that adds
+ /// the contents of G8RReg to the thread pointer. Symbol contains a
+ /// relocation sym\@tls which is to be replaced by the thread pointer
+ /// and identifies to the linker that the instruction is part of a
/// TLS sequence.
ADD_TLS,
@@ -360,6 +356,11 @@ namespace llvm {
/// ADDIS_TLSGD_L_ADDR until after register assignment.
GET_TLS_ADDR,
+ /// %x3 = GET_TPOINTER - Used for the local-exec TLS model on 32-bit AIX,
+ /// produces a call to .__get_tpointer to retrieve the thread pointer
+ /// At the end of the call, the thread pointer is found in R3.
+ GET_TPOINTER,
+
/// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
/// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
/// register assignment.
@@ -440,21 +441,6 @@ namespace llvm {
/// and thereby have no chain.
SWAP_NO_CHAIN,
- /// An SDNode for Power9 vector absolute value difference.
- /// operand #0 vector
- /// operand #1 vector
- /// operand #2 constant i32 0 or 1, to indicate whether needs to patch
- /// the most significant bit for signed i32
- ///
- /// Power9 VABSD* instructions are designed to support unsigned integer
- /// vectors (byte/halfword/word), if we want to make use of them for signed
- /// integer vectors, we have to flip their sign bits first. To flip sign bit
- /// for byte/halfword integer vector would become inefficient, but for word
- /// integer vector, we can leverage XVNEGSP to make it efficiently. eg:
- /// abs(sub(a,b)) => VABSDUW(a+0x80000000, b+0x80000000)
- /// => VABSDUW((XVNEGSP a), (XVNEGSP b))
- VABSD,
-
/// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
/// lower (IDX=1) half of v4f32 to v2f64.
FP_EXTEND_HALF,
@@ -1187,6 +1173,7 @@ namespace llvm {
CCAssignFn *ccAssignFnForCall(CallingConv::ID CC, bool Return,
bool IsVarArg) const;
+ bool supportsTailCallFor(const CallBase *CB) const;
private:
struct ReuseLoadInfo {
@@ -1239,17 +1226,25 @@ namespace llvm {
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
- bool
- IsEligibleForTailCallOptimization(SDValue Callee,
- CallingConv::ID CalleeCC,
- bool isVarArg,
- const SmallVectorImpl<ISD::InputArg> &Ins,
- SelectionDAG& DAG) const;
+ bool IsEligibleForTailCallOptimization(
+ const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
+ CallingConv::ID CallerCC, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins) const;
bool IsEligibleForTailCallOptimization_64SVR4(
- SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB,
- bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs,
- const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
+ const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
+ CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
+ bool isCalleeExternalSymbol) const;
+
+ bool isEligibleForTCO(const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
+ CallingConv::ID CallerCC, const CallBase *CB,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ const Function *CallerFunc,
+ bool isCalleeExternalSymbol) const;
SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff,
SDValue Chain, SDValue &LROpOut,
@@ -1299,6 +1294,7 @@ namespace llvm {
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerToLibCall(const char *LibCallName, SDValue Op,
SelectionDAG &DAG) const;
SDValue lowerLibCallBasedOnType(const char *LibCallFloatName,
@@ -1328,7 +1324,7 @@ namespace llvm {
SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
@@ -1336,7 +1332,7 @@ namespace llvm {
SDValue FinishCall(CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
- SDValue InFlag, SDValue Chain, SDValue CallSeqStart,
+ SDValue InGlue, SDValue Chain, SDValue CallSeqStart,
SDValue &Callee, int SPDiff, unsigned NumBytes,
const SmallVectorImpl<ISD::InputArg> &Ins,
SmallVectorImpl<SDValue> &InVals,
@@ -1422,8 +1418,6 @@ namespace llvm {
SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG) const;
SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
@@ -1468,7 +1462,6 @@ namespace llvm {
// tail call. This will cause the optimizers to attempt to move, or
// duplicate return instructions to help enable tail call optimizations.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
- bool hasBitPreservingFPLogic(EVT VT) const override;
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
/// getAddrModeForFlags - Based on the set of address flags, select the most
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 4335891cd483..fd44efa1b3f4 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -76,22 +76,22 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, hasSideEffects = 0 in {
let isReturn = 1, isPredicable = 1, Uses = [LR8, RM] in
def BLR8 : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
- [(retflag)]>, Requires<[In64BitMode]>;
+ [(retglue)]>, Requires<[In64BitMode]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR8] in {
let isPredicable = 1 in
def BCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
[]>,
Requires<[In64BitMode]>;
- def BCCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+ def BCCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins (pred $BIBO, $CR):$cond),
"b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB,
[]>,
Requires<[In64BitMode]>;
- def BCCTR8 : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi),
- "bcctr 12, $bi, 0", IIC_BrB, []>,
+ def BCCTR8 : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$BI),
+ "bcctr 12, $BI, 0", IIC_BrB, []>,
Requires<[In64BitMode]>;
- def BCCTR8n : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi),
- "bcctr 4, $bi, 0", IIC_BrB, []>,
+ def BCCTR8n : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$BI),
+ "bcctr 4, $BI, 0", IIC_BrB, []>,
Requires<[In64BitMode]>;
}
}
@@ -102,10 +102,10 @@ let Defs = [LR8] in
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, hasSideEffects = 0 in {
let Defs = [CTR8], Uses = [CTR8] in {
- def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdz $dst">;
- def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdnz $dst">;
+ def BDZ8 : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$BD),
+ "bdz $BD">;
+ def BDNZ8 : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$BD),
+ "bdnz $BD">;
}
let isReturn = 1, Defs = [CTR8], Uses = [CTR8, LR8, RM] in {
@@ -121,39 +121,39 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7, hasSideEffe
let isCall = 1, PPC970_Unit = 7, Defs = [LR8], hasSideEffects = 0 in {
// Convenient aliases for call instructions
let Uses = [RM] in {
- def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func),
- "bl $func", IIC_BrB, []>; // See Pat patterns below.
+ def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$LI),
+ "bl $LI", IIC_BrB, []>; // See Pat patterns below.
- def BL8_TLS : IForm<18, 0, 1, (outs), (ins tlscall:$func),
- "bl $func", IIC_BrB, []>;
+ def BL8_TLS : IForm<18, 0, 1, (outs), (ins tlscall:$LI),
+ "bl $LI", IIC_BrB, []>;
- def BLA8 : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
- "bla $func", IIC_BrB, [(PPCcall (i64 imm:$func))]>;
+ def BLA8 : IForm<18, 1, 1, (outs), (ins abscalltarget:$LI),
+ "bla $LI", IIC_BrB, [(PPCcall (i64 imm:$LI))]>;
}
let Uses = [RM], isCodeGenOnly = 1 in {
def BL8_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24,
- (outs), (ins calltarget:$func),
- "bl $func\n\tnop", IIC_BrB, []>;
+ (outs), (ins calltarget:$LI),
+ "bl $LI\n\tnop", IIC_BrB, []>;
def BL8_NOP_TLS : IForm_and_DForm_4_zero<18, 0, 1, 24,
- (outs), (ins tlscall:$func),
- "bl $func\n\tnop", IIC_BrB, []>;
+ (outs), (ins tlscall:$LI),
+ "bl $LI\n\tnop", IIC_BrB, []>;
def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24,
- (outs), (ins abscalltarget:$func),
- "bla $func\n\tnop", IIC_BrB,
- [(PPCcall_nop (i64 imm:$func))]>;
+ (outs), (ins abscalltarget:$LI),
+ "bla $LI\n\tnop", IIC_BrB,
+ [(PPCcall_nop (i64 imm:$LI))]>;
let Predicates = [PCRelativeMemops] in {
// BL8_NOTOC means that the caller does not use the TOC pointer and if
// it does use R2 then it is just a caller saved register. Therefore it is
// safe to emit only the bl and not the nop for this instruction. The
// linker will not try to restore R2 after the call.
def BL8_NOTOC : IForm<18, 0, 1, (outs),
- (ins calltarget:$func),
- "bl $func", IIC_BrB, []>;
+ (ins calltarget:$LI),
+ "bl $LI", IIC_BrB, []>;
def BL8_NOTOC_TLS : IForm<18, 0, 1, (outs),
- (ins tlscall:$func),
- "bl $func", IIC_BrB, []>;
+ (ins tlscall:$LI),
+ "bl $LI", IIC_BrB, []>;
}
}
let Uses = [CTR8, RM] in {
@@ -163,16 +163,16 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8], hasSideEffects = 0 in {
Requires<[In64BitMode]>;
let isCodeGenOnly = 1 in {
- def BCCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+ def BCCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins (pred $BIBO, $CR):$cond),
"b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB,
[]>,
Requires<[In64BitMode]>;
- def BCCTRL8 : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi),
- "bcctrl 12, $bi, 0", IIC_BrB, []>,
+ def BCCTRL8 : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$BI),
+ "bcctrl 12, $BI, 0", IIC_BrB, []>,
Requires<[In64BitMode]>;
- def BCCTRL8n : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi),
- "bcctrl 4, $bi, 0", IIC_BrB, []>,
+ def BCCTRL8n : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$BI),
+ "bcctrl 4, $BI, 0", IIC_BrB, []>,
Requires<[In64BitMode]>;
}
}
@@ -181,27 +181,27 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8], hasSideEffects = 0 in {
let isCall = 1, PPC970_Unit = 7, Defs = [LR8, RM], hasSideEffects = 0,
isCodeGenOnly = 1, Uses = [RM] in {
// Convenient aliases for call instructions
- def BL8_RM : IForm<18, 0, 1, (outs), (ins calltarget:$func),
- "bl $func", IIC_BrB, []>; // See Pat patterns below.
+ def BL8_RM : IForm<18, 0, 1, (outs), (ins calltarget:$LI),
+ "bl $LI", IIC_BrB, []>; // See Pat patterns below.
- def BLA8_RM : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
- "bla $func", IIC_BrB, [(PPCcall_rm (i64 imm:$func))]>;
+ def BLA8_RM : IForm<18, 1, 1, (outs), (ins abscalltarget:$LI),
+ "bla $LI", IIC_BrB, [(PPCcall_rm (i64 imm:$LI))]>;
def BL8_NOP_RM : IForm_and_DForm_4_zero<18, 0, 1, 24,
- (outs), (ins calltarget:$func),
- "bl $func\n\tnop", IIC_BrB, []>;
+ (outs), (ins calltarget:$LI),
+ "bl $LI\n\tnop", IIC_BrB, []>;
def BLA8_NOP_RM : IForm_and_DForm_4_zero<18, 1, 1, 24,
- (outs), (ins abscalltarget:$func),
- "bla $func\n\tnop", IIC_BrB,
- [(PPCcall_nop_rm (i64 imm:$func))]>;
+ (outs), (ins abscalltarget:$LI),
+ "bla $LI\n\tnop", IIC_BrB,
+ [(PPCcall_nop_rm (i64 imm:$LI))]>;
let Predicates = [PCRelativeMemops] in {
// BL8_NOTOC means that the caller does not use the TOC pointer and if
// it does use R2 then it is just a caller saved register. Therefore it is
// safe to emit only the bl and not the nop for this instruction. The
// linker will not try to restore R2 after the call.
def BL8_NOTOC_RM : IForm<18, 0, 1, (outs),
- (ins calltarget:$func),
- "bl $func", IIC_BrB, []>;
+ (ins calltarget:$LI),
+ "bl $LI", IIC_BrB, []>;
}
let Uses = [CTR8, RM] in {
let isPredicable = 1 in
@@ -215,7 +215,7 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
Defs = [LR8, X2], Uses = [CTR8, RM], RST = 2 in {
def BCTRL8_LDinto_toc :
XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs),
- (ins memrix:$src),
+ (ins (memrix $D, $RA):$src),
"bctrl\n\tld 2, $src", IIC_BrB,
[(PPCbctrl_load_toc iaddrX4:$src)]>,
Requires<[In64BitMode]>;
@@ -225,7 +225,7 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
Defs = [LR8, X2, RM], Uses = [CTR8, RM], RST = 2 in {
def BCTRL8_LDinto_toc_RM :
XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs),
- (ins memrix:$src),
+ (ins (memrix $D, $RA):$src),
"bctrl\n\tld 2, $src", IIC_BrB,
[(PPCbctrl_load_toc_rm iaddrX4:$src)]>,
Requires<[In64BitMode]>;
@@ -238,8 +238,8 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
// conflicts.
let Interpretation64Bit = 1, isAsmParserOnly = 1, hasSideEffects = 0 in
let isCall = 1, PPC970_Unit = 7, Defs = [LR8], Uses = [RM] in
-def BL8_TLS_ : IForm<18, 0, 1, (outs), (ins tlscall:$func),
- "bl $func", IIC_BrB, []>;
+def BL8_TLS_ : IForm<18, 0, 1, (outs), (ins tlscall:$LI),
+ "bl $LI", IIC_BrB, []>;
// Calls
def : Pat<(PPCcall (i64 tglobaladdr:$dst)),
@@ -332,38 +332,38 @@ let Defs = [CR0] in {
// Instructions to support atomic operations
let mayLoad = 1, hasSideEffects = 0 in {
-def LDARX : XForm_1_memOp<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
- "ldarx $rD, $ptr", IIC_LdStLDARX, []>;
+def LDARX : XForm_1_memOp<31, 84, (outs g8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "ldarx $RST, $addr", IIC_LdStLDARX, []>;
// TODO: Add scheduling info.
let hasNoSchedulingInfo = 1 in
-def LQARX : XForm_1_memOp<31, 276, (outs g8prc:$RTp), (ins memrr:$ptr),
- "lqarx $RTp, $ptr", IIC_LdStLQARX, []>, isPPC64;
+def LQARX : XForm_1_memOp<31, 276, (outs g8prc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lqarx $RST, $addr", IIC_LdStLQARX, []>, isPPC64;
// Instruction to support lock versions of atomics
// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
-def LDARXL : XForm_1<31, 84, (outs g8rc:$rD), (ins memrr:$ptr),
- "ldarx $rD, $ptr, 1", IIC_LdStLDARX, []>, isRecordForm;
+def LDARXL : XForm_1<31, 84, (outs g8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "ldarx $RST, $addr, 1", IIC_LdStLDARX, []>, isRecordForm;
// TODO: Add scheduling info.
let hasNoSchedulingInfo = 1 in
// FIXME: We have to seek a way to remove isRecordForm since
// LQARXL is not really altering CR0.
-def LQARXL : XForm_1<31, 276, (outs g8prc:$RTp), (ins memrr:$ptr),
- "lqarx $RTp, $ptr, 1", IIC_LdStLQARX, []>,
+def LQARXL : XForm_1<31, 276, (outs g8prc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lqarx $RST, $addr, 1", IIC_LdStLQARX, []>,
isPPC64, isRecordForm;
let hasExtraDefRegAllocReq = 1 in
-def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$rD), (ins g8rc:$rA, u5imm:$FC),
- "ldat $rD, $rA, $FC", IIC_LdStLoad>, isPPC64,
+def LDAT : X_RD5_RS5_IM5<31, 614, (outs g8rc:$RST), (ins g8rc:$RA, u5imm:$RB),
+ "ldat $RST, $RA, $RB", IIC_LdStLoad>, isPPC64,
Requires<[IsISA3_0]>;
}
let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
-def STDCX : XForm_1_memOp<31, 214, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdcx. $rS, $dst", IIC_LdStSTDCX, []>, isRecordForm;
+def STDCX : XForm_1_memOp<31, 214, (outs), (ins g8rc:$RST, (memrr $RA, $RB):$addr),
+ "stdcx. $RST, $addr", IIC_LdStSTDCX, []>, isRecordForm;
// TODO: Add scheduling info.
let hasNoSchedulingInfo = 1 in
-def STQCX : XForm_1_memOp<31, 182, (outs), (ins g8prc:$RSp, memrr:$dst),
- "stqcx. $RSp, $dst", IIC_LdStSTQCX, []>,
+def STQCX : XForm_1_memOp<31, 182, (outs), (ins g8prc:$RST, (memrr $RA, $RB):$addr),
+ "stqcx. $RST, $addr", IIC_LdStSTQCX, []>,
isPPC64, isRecordForm;
}
@@ -450,8 +450,8 @@ def : Pat<(int_ppc_cmpxchg_i128 ForceXForm:$ptr,
g8rc:$new_hi))>;
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
-def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$rS, g8rc:$rA, u5imm:$FC),
- "stdat $rS, $rA, $FC", IIC_LdStStore>, isPPC64,
+def STDAT : X_RD5_RS5_IM5<31, 742, (outs), (ins g8rc:$RST, g8rc:$RA, u5imm:$RB),
+ "stdat $RST, $RA, $RB", IIC_LdStStore>, isPPC64,
Requires<[IsISA3_0]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
@@ -480,14 +480,14 @@ def TAILBCTR8 : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
-def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
- "b $dst", IIC_BrB,
+def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$LI),
+ "b $LI", IIC_BrB,
[]>;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
-def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
- "ba $dst", IIC_BrB,
+def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$LI),
+ "ba $LI", IIC_BrB,
[]>;
}
} // Interpretation64Bit
@@ -509,14 +509,14 @@ let hasSideEffects = 0 in {
// on the cr register selected. Thus, post-ra anti-dep breaking must not
// later change that register assignment.
let hasExtraDefRegAllocReq = 1 in {
-def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST),
- "mtocrf $FXM, $ST", IIC_BrMCRX>,
+def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$RST),
+ "mtocrf $FXM, $RST", IIC_BrMCRX>,
PPC970_DGroup_First, PPC970_Unit_CRU;
// Similarly to mtocrf, the mask for mtcrf must be prepared in a way that
// is dependent on the cr fields being set.
-def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS),
- "mtcrf $FXM, $rS", IIC_BrMCRX>,
+def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$RST),
+ "mtcrf $FXM, $RST", IIC_BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
} // hasExtraDefRegAllocReq = 1
@@ -524,14 +524,14 @@ def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS),
// on the cr register selected. Thus, post-ra anti-dep breaking must not
// later change that register assignment.
let hasExtraSrcRegAllocReq = 1 in {
-def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
- "mfocrf $rT, $FXM", IIC_SprMFCRF>,
+def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$RST), (ins crbitm:$FXM),
+ "mfocrf $RST, $FXM", IIC_SprMFCRF>,
PPC970_DGroup_First, PPC970_Unit_CRU;
// Similarly to mfocrf, the mask for mfcrf must be prepared in a way that
// is dependent on the cr fields being copied.
-def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
- "mfcr $rT", IIC_SprMFCR>,
+def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$RT), (ins),
+ "mfcr $RT", IIC_SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
} // hasExtraSrcRegAllocReq = 1
} // hasSideEffects = 0
@@ -554,31 +554,31 @@ let hasSideEffects = 1, isBarrier = 1 in {
Requires<[In64BitMode]>;
}
-def MFSPR8 : XFXForm_1<31, 339, (outs g8rc:$RT), (ins i32imm:$SPR),
- "mfspr $RT, $SPR", IIC_SprMFSPR>;
-def MTSPR8 : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, g8rc:$RT),
- "mtspr $SPR, $RT", IIC_SprMTSPR>;
+def MFSPR8 : XFXForm_1<31, 339, (outs g8rc:$RST), (ins i32imm:$SPR),
+ "mfspr $RST, $SPR", IIC_SprMFSPR>;
+def MTSPR8 : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, g8rc:$RST),
+ "mtspr $SPR, $RST", IIC_SprMTSPR>;
//===----------------------------------------------------------------------===//
// 64-bit SPR manipulation instrs.
let Uses = [CTR8] in {
-def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$rT), (ins),
- "mfctr $rT", IIC_SprMFSPR>,
+def MFCTR8 : XFXForm_1_ext<31, 339, 9, (outs g8rc:$RST), (ins),
+ "mfctr $RST", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let Pattern = [(PPCmtctr i64:$rS)], Defs = [CTR8] in {
-def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
- "mtctr $rS", IIC_SprMTSPR>,
+let Pattern = [(PPCmtctr i64:$RST)], Defs = [CTR8] in {
+def MTCTR8 : XFXForm_1_ext<31, 467, 9, (outs), (ins g8rc:$RST),
+ "mtctr $RST", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
// MTCTR[8|]loop must be inside a loop-preheader, duplicating
// the loop-preheader block will break this assumption.
let hasSideEffects = 1, isNotDuplicable = 1, Defs = [CTR8] in {
-let Pattern = [(int_set_loop_iterations i64:$rS)] in
-def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
- "mtctr $rS", IIC_SprMTSPR>,
+let Pattern = [(int_set_loop_iterations i64:$RST)] in
+def MTCTR8loop : XFXForm_1_ext<31, 467, 9, (outs), (ins g8rc:$RST),
+ "mtctr $RST", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
@@ -586,9 +586,9 @@ let hasSideEffects = 1, hasNoSchedulingInfo = 1, isNotDuplicable = 1, Uses = [CT
def DecreaseCTR8loop : PPCEmitTimePseudo<(outs crbitrc:$rT), (ins i64imm:$stride),
"#DecreaseCTR8loop", [(set i1:$rT, (int_loop_decrement (i64 imm:$stride)))]>;
-let Pattern = [(set i64:$rT, readcyclecounter)] in
-def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
- "mfspr $rT, 268", IIC_SprMFTB>,
+let Pattern = [(set i64:$RST, readcyclecounter)] in
+def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$RST), (ins),
+ "mfspr $RST, 268", IIC_SprMFTB>,
PPC970_DGroup_First, PPC970_Unit_FXU;
// Note that encoding mftb using mfspr is now the preferred form,
// and has been since at least ISA v2.03. The mftb instruction has
@@ -622,13 +622,13 @@ def PROBED_STACKALLOC_64 : PPCEmitTimePseudo<(outs g8rc:$scratch, g8rc:$temp),
let hasSideEffects = 0 in {
let Defs = [LR8] in {
-def MTLR8 : XFXForm_7_ext<31, 467, 8, (outs), (ins g8rc:$rS),
- "mtlr $rS", IIC_SprMTSPR>,
+def MTLR8 : XFXForm_1_ext<31, 467, 8, (outs), (ins g8rc:$RST),
+ "mtlr $RST", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Uses = [LR8] in {
-def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$rT), (ins),
- "mflr $rT", IIC_SprMFSPR>,
+def MFLR8 : XFXForm_1_ext<31, 339, 8, (outs g8rc:$RST), (ins),
+ "mflr $RST", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
} // Interpretation64Bit
@@ -644,181 +644,203 @@ let hasSideEffects = 0 in {
let isCodeGenOnly = 1 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
-def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins s16imm64:$imm),
- "li $rD, $imm", IIC_IntSimple,
- [(set i64:$rD, imm64SExt16:$imm)]>, SExt32To64;
-def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s17imm64:$imm),
- "lis $rD, $imm", IIC_IntSimple,
- [(set i64:$rD, imm16ShiftedSExt:$imm)]>, SExt32To64;
+def LI8 : DForm_2_r0<14, (outs g8rc:$RST), (ins s16imm64:$D),
+ "li $RST, $D", IIC_IntSimple,
+ [(set i64:$RST, imm64SExt16:$D)]>, SExt32To64;
+def LIS8 : DForm_2_r0<15, (outs g8rc:$RST), (ins s17imm64:$D),
+ "lis $RST, $D", IIC_IntSimple,
+ [(set i64:$RST, imm16ShiftedSExt:$D)]>, SExt32To64;
}
// Logical ops.
let isCommutable = 1 in {
-defm NAND8: XForm_6r<31, 476, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "nand", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i64:$rA, (not (and i64:$rS, i64:$rB)))]>;
-defm AND8 : XForm_6r<31, 28, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "and", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i64:$rA, (and i64:$rS, i64:$rB))]>;
+defm NAND8: XForm_6r<31, 476, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "nand", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i64:$RA, (not (and i64:$RST, i64:$RB)))]>;
+defm AND8 : XForm_6r<31, 28, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "and", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i64:$RA, (and i64:$RST, i64:$RB))]>;
} // isCommutable
-defm ANDC8: XForm_6r<31, 60, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "andc", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i64:$rA, (and i64:$rS, (not i64:$rB)))]>;
+defm ANDC8: XForm_6r<31, 60, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "andc", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i64:$RA, (and i64:$RST, (not i64:$RB)))]>;
let isCommutable = 1 in {
-defm OR8 : XForm_6r<31, 444, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "or", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i64:$rA, (or i64:$rS, i64:$rB))]>;
-defm NOR8 : XForm_6r<31, 124, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "nor", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i64:$rA, (not (or i64:$rS, i64:$rB)))]>;
+defm OR8 : XForm_6r<31, 444, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "or", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i64:$RA, (or i64:$RST, i64:$RB))]>;
+defm NOR8 : XForm_6r<31, 124, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "nor", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i64:$RA, (not (or i64:$RST, i64:$RB)))]>;
} // isCommutable
-defm ORC8 : XForm_6r<31, 412, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "orc", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i64:$rA, (or i64:$rS, (not i64:$rB)))]>;
+defm ORC8 : XForm_6r<31, 412, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "orc", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i64:$RA, (or i64:$RST, (not i64:$RB)))]>;
let isCommutable = 1 in {
-defm EQV8 : XForm_6r<31, 284, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "eqv", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i64:$rA, (not (xor i64:$rS, i64:$rB)))]>;
-defm XOR8 : XForm_6r<31, 316, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "xor", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i64:$rA, (xor i64:$rS, i64:$rB))]>;
+defm EQV8 : XForm_6r<31, 284, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "eqv", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i64:$RA, (not (xor i64:$RST, i64:$RB)))]>;
+defm XOR8 : XForm_6r<31, 316, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "xor", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i64:$RA, (xor i64:$RST, i64:$RB))]>;
} // let isCommutable = 1
// Logical ops with immediate.
let Defs = [CR0] in {
-def ANDI8_rec : DForm_4<28, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
- "andi. $dst, $src1, $src2", IIC_IntGeneral,
- [(set i64:$dst, (and i64:$src1, immZExt16:$src2))]>,
+def ANDI8_rec : DForm_4<28, (outs g8rc:$RA), (ins g8rc:$RST, u16imm64:$D),
+ "andi. $RA, $RST, $D", IIC_IntGeneral,
+ [(set i64:$RA, (and i64:$RST, immZExt16:$D))]>,
isRecordForm, SExt32To64, ZExt32To64;
-def ANDIS8_rec : DForm_4<29, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
- "andis. $dst, $src1, $src2", IIC_IntGeneral,
- [(set i64:$dst, (and i64:$src1, imm16ShiftedZExt:$src2))]>,
+def ANDIS8_rec : DForm_4<29, (outs g8rc:$RA), (ins g8rc:$RST, u16imm64:$D),
+ "andis. $RA, $RST, $D", IIC_IntGeneral,
+ [(set i64:$RA, (and i64:$RST, imm16ShiftedZExt:$D))]>,
isRecordForm, ZExt32To64;
}
-def ORI8 : DForm_4<24, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
- "ori $dst, $src1, $src2", IIC_IntSimple,
- [(set i64:$dst, (or i64:$src1, immZExt16:$src2))]>;
-def ORIS8 : DForm_4<25, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
- "oris $dst, $src1, $src2", IIC_IntSimple,
- [(set i64:$dst, (or i64:$src1, imm16ShiftedZExt:$src2))]>;
-def XORI8 : DForm_4<26, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
- "xori $dst, $src1, $src2", IIC_IntSimple,
- [(set i64:$dst, (xor i64:$src1, immZExt16:$src2))]>;
-def XORIS8 : DForm_4<27, (outs g8rc:$dst), (ins g8rc:$src1, u16imm64:$src2),
- "xoris $dst, $src1, $src2", IIC_IntSimple,
- [(set i64:$dst, (xor i64:$src1, imm16ShiftedZExt:$src2))]>;
+def ORI8 : DForm_4<24, (outs g8rc:$RA), (ins g8rc:$RST, u16imm64:$D),
+ "ori $RA, $RST, $D", IIC_IntSimple,
+ [(set i64:$RA, (or i64:$RST, immZExt16:$D))]>;
+def ORIS8 : DForm_4<25, (outs g8rc:$RA), (ins g8rc:$RST, u16imm64:$D),
+ "oris $RA, $RST, $D", IIC_IntSimple,
+ [(set i64:$RA, (or i64:$RST, imm16ShiftedZExt:$D))]>;
+def XORI8 : DForm_4<26, (outs g8rc:$RA), (ins g8rc:$RST, u16imm64:$D),
+ "xori $RA, $RST, $D", IIC_IntSimple,
+ [(set i64:$RA, (xor i64:$RST, immZExt16:$D))]>;
+def XORIS8 : DForm_4<27, (outs g8rc:$RA), (ins g8rc:$RST, u16imm64:$D),
+ "xoris $RA, $RST, $D", IIC_IntSimple,
+ [(set i64:$RA, (xor i64:$RST, imm16ShiftedZExt:$D))]>;
let isCommutable = 1 in
-defm ADD8 : XOForm_1rx<31, 266, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "add", "$rT, $rA, $rB", IIC_IntSimple,
- [(set i64:$rT, (add i64:$rA, i64:$rB))]>;
+defm ADD8 : XOForm_1rx<31, 266, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "add", "$RT, $RA, $RB", IIC_IntSimple,
+ [(set i64:$RT, (add i64:$RA, i64:$RB))]>;
// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
// initial-exec thread-local storage model. We need to forbid r0 here -
// while it works for add just fine, the linker can relax this to local-exec
// addi, which won't work for r0.
-def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc_nox0:$rA, tlsreg:$rB),
- "add $rT, $rA, $rB", IIC_IntSimple,
- [(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
+def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$RT), (ins g8rc_nox0:$RA, tlsreg:$RB),
+ "add $RT, $RA, $RB", IIC_IntSimple,
+ [(set i64:$RT, (add i64:$RA, tglobaltlsaddr:$RB))]>;
let mayLoad = 1 in {
-def LBZXTLS : XForm_1<31, 87, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
- "lbzx $rD, $rA, $rB", IIC_LdStLoad, []>;
-def LHZXTLS : XForm_1<31, 279, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
- "lhzx $rD, $rA, $rB", IIC_LdStLoad, []>;
-def LWZXTLS : XForm_1<31, 23, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
- "lwzx $rD, $rA, $rB", IIC_LdStLoad, []>;
-def LDXTLS : XForm_1<31, 21, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
- "ldx $rD, $rA, $rB", IIC_LdStLD, []>, isPPC64;
-def LBZXTLS_32 : XForm_1<31, 87, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
- "lbzx $rD, $rA, $rB", IIC_LdStLoad, []>;
-def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
- "lhzx $rD, $rA, $rB", IIC_LdStLoad, []>;
-def LWZXTLS_32 : XForm_1<31, 23, (outs gprc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
- "lwzx $rD, $rA, $rB", IIC_LdStLoad, []>;
-
+def LBZXTLS : XForm_1<31, 87, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lbzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LHZXTLS : XForm_1<31, 279, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lhzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LHAXTLS : XForm_1<31, 343, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lhax $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LWZXTLS : XForm_1<31, 23, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lwzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LWAXTLS : XForm_1<31, 341, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lwax $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LDXTLS : XForm_1<31, 21, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "ldx $RST, $RA, $RB", IIC_LdStLD, []>, isPPC64;
+def LBZXTLS_32 : XForm_1<31, 87, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lbzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LHZXTLS_32 : XForm_1<31, 279, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lhzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LHAXTLS_32 : XForm_1<31, 343, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lhax $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LWZXTLS_32 : XForm_1<31, 23, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lwzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LWAXTLS_32 : XForm_1<31, 341, (outs gprc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lwax $RST, $RA, $RB", IIC_LdStLoad, []>;
+
+}
+let mayLoad = 1, Predicates = [HasFPU] in {
+def LFSXTLS : XForm_25<31, 535, (outs f4rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lfsx $RST, $RA, $RB", IIC_LdStLFD, []>;
+def LFDXTLS : XForm_25<31, 599, (outs f8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lfdx $RST, $RA, $RB", IIC_LdStLFD, []>;
}
let mayStore = 1 in {
-def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
- "stbx $rS, $rA, $rB", IIC_LdStStore, []>,
+def STBXTLS : XForm_8<31, 215, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stbx $RST, $RA, $RB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
-def STHXTLS : XForm_8<31, 407, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
- "sthx $rS, $rA, $rB", IIC_LdStStore, []>,
+def STHXTLS : XForm_8<31, 407, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "sthx $RST, $RA, $RB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
-def STWXTLS : XForm_8<31, 151, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
- "stwx $rS, $rA, $rB", IIC_LdStStore, []>,
+def STWXTLS : XForm_8<31, 151, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stwx $RST, $RA, $RB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
-def STDXTLS : XForm_8<31, 149, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
- "stdx $rS, $rA, $rB", IIC_LdStSTD, []>, isPPC64,
+def STDXTLS : XForm_8<31, 149, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stdx $RST, $RA, $RB", IIC_LdStSTD, []>, isPPC64,
PPC970_DGroup_Cracked;
-def STBXTLS_32 : XForm_8<31, 215, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
- "stbx $rS, $rA, $rB", IIC_LdStStore, []>,
+def STBXTLS_32 : XForm_8<31, 215, (outs), (ins gprc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stbx $RST, $RA, $RB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
-def STHXTLS_32 : XForm_8<31, 407, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
- "sthx $rS, $rA, $rB", IIC_LdStStore, []>,
+def STHXTLS_32 : XForm_8<31, 407, (outs), (ins gprc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "sthx $RST, $RA, $RB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
-def STWXTLS_32 : XForm_8<31, 151, (outs), (ins gprc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
- "stwx $rS, $rA, $rB", IIC_LdStStore, []>,
+def STWXTLS_32 : XForm_8<31, 151, (outs), (ins gprc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stwx $RST, $RA, $RB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
}
+let mayStore = 1, Predicates = [HasFPU] in {
+def STFSXTLS : XForm_8<31, 663, (outs), (ins f4rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stfsx $RST, $RA, $RB", IIC_LdStSTFD, []>,
+ PPC970_DGroup_Cracked;
+def STFDXTLS : XForm_8<31, 727, (outs), (ins f8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stfdx $RST, $RA, $RB", IIC_LdStSTFD, []>,
+ PPC970_DGroup_Cracked;
+}
let isCommutable = 1 in
-defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "addc", "$rT, $rA, $rB", IIC_IntGeneral,
- [(set i64:$rT, (addc i64:$rA, i64:$rB))]>,
+defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "addc", "$RT, $RA, $RB", IIC_IntGeneral,
+ [(set i64:$RT, (addc i64:$RA, i64:$RB))]>,
PPC970_DGroup_Cracked;
let Defs = [CARRY] in
-def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
- "addic $rD, $rA, $imm", IIC_IntGeneral,
- [(set i64:$rD, (addc i64:$rA, imm64SExt16:$imm))]>;
-def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm),
- "addi $rD, $rA, $imm", IIC_IntSimple,
- [(set i64:$rD, (add i64:$rA, imm64SExt16:$imm))]>;
-def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm),
- "addis $rD, $rA, $imm", IIC_IntSimple,
- [(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
-
-def LA8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$sym),
- "la $rD, $sym($rA)", IIC_IntGeneral,
- [(set i64:$rD, (add i64:$rA,
- (PPClo tglobaladdr:$sym, 0)))]>;
+def ADDIC8 : DForm_2<12, (outs g8rc:$RST), (ins g8rc:$RA, s16imm64:$D),
+ "addic $RST, $RA, $D", IIC_IntGeneral,
+ [(set i64:$RST, (addc i64:$RA, imm64SExt16:$D))]>;
+def ADDI8 : DForm_2<14, (outs g8rc:$RST), (ins g8rc_nox0:$RA, s16imm64:$D),
+ "addi $RST, $RA, $D", IIC_IntSimple,
+ [(set i64:$RST, (add i64:$RA, imm64SExt16:$D))]>;
+def ADDIS8 : DForm_2<15, (outs g8rc:$RST), (ins g8rc_nox0:$RA, s17imm64:$D),
+ "addis $RST, $RA, $D", IIC_IntSimple,
+ [(set i64:$RST, (add i64:$RA, imm16ShiftedSExt:$D))]>;
+
+def LA8 : DForm_2<14, (outs g8rc:$RST), (ins g8rc_nox0:$RA, s16imm64:$D),
+ "la $RST, $D($RA)", IIC_IntGeneral,
+ [(set i64:$RST, (add i64:$RA,
+ (PPClo tglobaladdr:$D, 0)))]>;
let Defs = [CARRY] in {
-def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
- "subfic $rD, $rA, $imm", IIC_IntGeneral,
- [(set i64:$rD, (subc imm64SExt16:$imm, i64:$rA))]>;
+def SUBFIC8: DForm_2< 8, (outs g8rc:$RST), (ins g8rc:$RA, s16imm64:$D),
+ "subfic $RST, $RA, $D", IIC_IntGeneral,
+ [(set i64:$RST, (subc imm64SExt16:$D, i64:$RA))]>;
}
-defm SUBFC8 : XOForm_1rc<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "subfc", "$rT, $rA, $rB", IIC_IntGeneral,
- [(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
+defm SUBFC8 : XOForm_1rc<31, 8, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "subfc", "$RT, $RA, $RB", IIC_IntGeneral,
+ [(set i64:$RT, (subc i64:$RB, i64:$RA))]>,
PPC970_DGroup_Cracked;
-defm SUBF8 : XOForm_1rx<31, 40, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "subf", "$rT, $rA, $rB", IIC_IntGeneral,
- [(set i64:$rT, (sub i64:$rB, i64:$rA))]>;
-defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "neg", "$rT, $rA", IIC_IntSimple,
- [(set i64:$rT, (ineg i64:$rA))]>;
+defm SUBF8 : XOForm_1rx<31, 40, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "subf", "$RT, $RA, $RB", IIC_IntGeneral,
+ [(set i64:$RT, (sub i64:$RB, i64:$RA))]>;
+defm NEG8 : XOForm_3r<31, 104, 0, (outs g8rc:$RT), (ins g8rc:$RA),
+ "neg", "$RT, $RA", IIC_IntSimple,
+ [(set i64:$RT, (ineg i64:$RA))]>;
let Uses = [CARRY] in {
let isCommutable = 1 in
-defm ADDE8 : XOForm_1rc<31, 138, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "adde", "$rT, $rA, $rB", IIC_IntGeneral,
- [(set i64:$rT, (adde i64:$rA, i64:$rB))]>;
-defm ADDME8 : XOForm_3rc<31, 234, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "addme", "$rT, $rA", IIC_IntGeneral,
- [(set i64:$rT, (adde i64:$rA, -1))]>;
-defm ADDZE8 : XOForm_3rc<31, 202, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "addze", "$rT, $rA", IIC_IntGeneral,
- [(set i64:$rT, (adde i64:$rA, 0))]>;
-defm SUBFE8 : XOForm_1rc<31, 136, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "subfe", "$rT, $rA, $rB", IIC_IntGeneral,
- [(set i64:$rT, (sube i64:$rB, i64:$rA))]>;
-defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "subfme", "$rT, $rA", IIC_IntGeneral,
- [(set i64:$rT, (sube -1, i64:$rA))]>;
-defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA),
- "subfze", "$rT, $rA", IIC_IntGeneral,
- [(set i64:$rT, (sube 0, i64:$rA))]>;
+defm ADDE8 : XOForm_1rc<31, 138, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "adde", "$RT, $RA, $RB", IIC_IntGeneral,
+ [(set i64:$RT, (adde i64:$RA, i64:$RB))]>;
+defm ADDME8 : XOForm_3rc<31, 234, 0, (outs g8rc:$RT), (ins g8rc:$RA),
+ "addme", "$RT, $RA", IIC_IntGeneral,
+ [(set i64:$RT, (adde i64:$RA, -1))]>;
+defm ADDZE8 : XOForm_3rc<31, 202, 0, (outs g8rc:$RT), (ins g8rc:$RA),
+ "addze", "$RT, $RA", IIC_IntGeneral,
+ [(set i64:$RT, (adde i64:$RA, 0))]>;
+defm SUBFE8 : XOForm_1rc<31, 136, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "subfe", "$RT, $RA, $RB", IIC_IntGeneral,
+ [(set i64:$RT, (sube i64:$RB, i64:$RA))]>;
+defm SUBFME8 : XOForm_3rc<31, 232, 0, (outs g8rc:$RT), (ins g8rc:$RA),
+ "subfme", "$RT, $RA", IIC_IntGeneral,
+ [(set i64:$RT, (sube -1, i64:$RA))]>;
+defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$RT), (ins g8rc:$RA),
+ "subfze", "$RT, $RA", IIC_IntGeneral,
+ [(set i64:$RT, (sube 0, i64:$RA))]>;
}
} // isCodeGenOnly
@@ -826,202 +848,231 @@ defm SUBFZE8 : XOForm_3rc<31, 200, 0, (outs g8rc:$rT), (ins g8rc:$rA),
// previous definition must be marked as CodeGen only to prevent decoding
// conflicts.
let isAsmParserOnly = 1 in {
-def ADD8TLS_ : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB),
- "add $rT, $rA, $rB", IIC_IntSimple, []>;
+def ADD8TLS_ : XOForm_1<31, 266, 0, (outs g8rc:$RT), (ins g8rc:$RA, tlsreg:$RB),
+ "add $RT, $RA, $RB", IIC_IntSimple, []>;
let mayLoad = 1 in {
-def LBZXTLS_ : XForm_1<31, 87, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
- "lbzx $rD, $rA, $rB", IIC_LdStLoad, []>;
-def LHZXTLS_ : XForm_1<31, 279, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
- "lhzx $rD, $rA, $rB", IIC_LdStLoad, []>;
-def LWZXTLS_ : XForm_1<31, 23, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
- "lwzx $rD, $rA, $rB", IIC_LdStLoad, []>;
-def LDXTLS_ : XForm_1<31, 21, (outs g8rc:$rD), (ins ptr_rc_nor0:$rA, tlsreg:$rB),
- "ldx $rD, $rA, $rB", IIC_LdStLD, []>, isPPC64;
+def LBZXTLS_ : XForm_1<31, 87, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lbzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LHZXTLS_ : XForm_1<31, 279, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lhzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LHAXTLS_ : XForm_1<31, 343, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lhax $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LWZXTLS_ : XForm_1<31, 23, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lwzx $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LWAXTLS_ : XForm_1<31, 341, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lwax $RST, $RA, $RB", IIC_LdStLoad, []>;
+def LDXTLS_ : XForm_1<31, 21, (outs g8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "ldx $RST, $RA, $RB", IIC_LdStLD, []>, isPPC64;
+}
+
+let mayLoad = 1, Predicates = [HasFPU] in {
+def LFSXTLS_ : XForm_25<31, 535, (outs f4rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lfsx $RST, $RA, $RB", IIC_LdStLFD, []>;
+def LFDXTLS_ : XForm_25<31, 599, (outs f8rc:$RST), (ins ptr_rc_nor0:$RA, tlsreg:$RB),
+ "lfdx $RST, $RA, $RB", IIC_LdStLFD, []>;
}
let mayStore = 1 in {
-def STBXTLS_ : XForm_8<31, 215, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
- "stbx $rS, $rA, $rB", IIC_LdStStore, []>,
+def STBXTLS_ : XForm_8<31, 215, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stbx $RST, $RA, $RB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
-def STHXTLS_ : XForm_8<31, 407, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
- "sthx $rS, $rA, $rB", IIC_LdStStore, []>,
+def STHXTLS_ : XForm_8<31, 407, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "sthx $RST, $RA, $RB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
-def STWXTLS_ : XForm_8<31, 151, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
- "stwx $rS, $rA, $rB", IIC_LdStStore, []>,
+def STWXTLS_ : XForm_8<31, 151, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stwx $RST, $RA, $RB", IIC_LdStStore, []>,
PPC970_DGroup_Cracked;
-def STDXTLS_ : XForm_8<31, 149, (outs), (ins g8rc:$rS, ptr_rc_nor0:$rA, tlsreg:$rB),
- "stdx $rS, $rA, $rB", IIC_LdStSTD, []>, isPPC64,
+def STDXTLS_ : XForm_8<31, 149, (outs), (ins g8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stdx $RST, $RA, $RB", IIC_LdStSTD, []>, isPPC64,
PPC970_DGroup_Cracked;
}
+
+let mayStore = 1, Predicates = [HasFPU] in {
+def STFSXTLS_ : XForm_8<31, 663, (outs), (ins f4rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stfsx $RST, $RA, $RB", IIC_LdStSTFD, []>,
+ PPC970_DGroup_Cracked;
+def STFDXTLS_ : XForm_8<31, 727, (outs), (ins f8rc:$RST, ptr_rc_nor0:$RA, tlsreg:$RB),
+ "stfdx $RST, $RA, $RB", IIC_LdStSTFD, []>,
+ PPC970_DGroup_Cracked;
+}
}
let isCommutable = 1 in {
-defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "mulhd", "$rT, $rA, $rB", IIC_IntMulHW,
- [(set i64:$rT, (mulhs i64:$rA, i64:$rB))]>;
-defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "mulhdu", "$rT, $rA, $rB", IIC_IntMulHWU,
- [(set i64:$rT, (mulhu i64:$rA, i64:$rB))]>;
+defm MULHD : XOForm_1r<31, 73, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "mulhd", "$RT, $RA, $RB", IIC_IntMulHW,
+ [(set i64:$RT, (mulhs i64:$RA, i64:$RB))]>;
+defm MULHDU : XOForm_1r<31, 9, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "mulhdu", "$RT, $RA, $RB", IIC_IntMulHWU,
+ [(set i64:$RT, (mulhu i64:$RA, i64:$RB))]>;
} // isCommutable
}
} // Interpretation64Bit
let isCompare = 1, hasSideEffects = 0 in {
- def CMPD : XForm_16_ext<31, 0, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
- "cmpd $crD, $rA, $rB", IIC_IntCompare>, isPPC64;
- def CMPLD : XForm_16_ext<31, 32, (outs crrc:$crD), (ins g8rc:$rA, g8rc:$rB),
- "cmpld $crD, $rA, $rB", IIC_IntCompare>, isPPC64;
- def CMPDI : DForm_5_ext<11, (outs crrc:$crD), (ins g8rc:$rA, s16imm64:$imm),
- "cmpdi $crD, $rA, $imm", IIC_IntCompare>, isPPC64;
- def CMPLDI : DForm_6_ext<10, (outs crrc:$dst), (ins g8rc:$src1, u16imm64:$src2),
- "cmpldi $dst, $src1, $src2",
+ def CMPD : XForm_16_ext<31, 0, (outs crrc:$BF), (ins g8rc:$RA, g8rc:$RB),
+ "cmpd $BF, $RA, $RB", IIC_IntCompare>, isPPC64;
+ def CMPLD : XForm_16_ext<31, 32, (outs crrc:$BF), (ins g8rc:$RA, g8rc:$RB),
+ "cmpld $BF, $RA, $RB", IIC_IntCompare>, isPPC64;
+ def CMPDI : DForm_5_ext<11, (outs crrc:$BF), (ins g8rc:$RA, s16imm64:$D),
+ "cmpdi $BF, $RA, $D", IIC_IntCompare>, isPPC64;
+ def CMPLDI : DForm_6_ext<10, (outs crrc:$BF), (ins g8rc:$RA, u16imm64:$D),
+ "cmpldi $BF, $RA, $D",
IIC_IntCompare>, isPPC64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
def CMPRB8 : X_BF3_L1_RS5_RS5<31, 192, (outs crrc:$BF),
- (ins u1imm:$L, g8rc:$rA, g8rc:$rB),
- "cmprb $BF, $L, $rA, $rB", IIC_IntCompare, []>,
+ (ins u1imm:$L, g8rc:$RA, g8rc:$RB),
+ "cmprb $BF, $L, $RA, $RB", IIC_IntCompare, []>,
Requires<[IsISA3_0]>;
def CMPEQB : X_BF3_RS5_RS5<31, 224, (outs crrc:$BF),
- (ins g8rc:$rA, g8rc:$rB), "cmpeqb $BF, $rA, $rB",
+ (ins g8rc:$RA, g8rc:$RB), "cmpeqb $BF, $RA, $RB",
IIC_IntCompare, []>, Requires<[IsISA3_0]>;
}
let hasSideEffects = 0 in {
-defm SLD : XForm_6r<31, 27, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
- "sld", "$rA, $rS, $rB", IIC_IntRotateD,
- [(set i64:$rA, (PPCshl i64:$rS, i32:$rB))]>, isPPC64;
-defm SRD : XForm_6r<31, 539, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
- "srd", "$rA, $rS, $rB", IIC_IntRotateD,
- [(set i64:$rA, (PPCsrl i64:$rS, i32:$rB))]>, isPPC64;
-defm SRAD : XForm_6rc<31, 794, (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB),
- "srad", "$rA, $rS, $rB", IIC_IntRotateD,
- [(set i64:$rA, (PPCsra i64:$rS, i32:$rB))]>, isPPC64;
+defm SLD : XForm_6r<31, 27, (outs g8rc:$RA), (ins g8rc:$RST, gprc:$RB),
+ "sld", "$RA, $RST, $RB", IIC_IntRotateD,
+ [(set i64:$RA, (PPCshl i64:$RST, i32:$RB))]>, isPPC64;
+defm SRD : XForm_6r<31, 539, (outs g8rc:$RA), (ins g8rc:$RST, gprc:$RB),
+ "srd", "$RA, $RST, $RB", IIC_IntRotateD,
+ [(set i64:$RA, (PPCsrl i64:$RST, i32:$RB))]>, isPPC64;
+defm SRAD : XForm_6rc<31, 794, (outs g8rc:$RA), (ins g8rc:$RST, gprc:$RB),
+ "srad", "$RA, $RST, $RB", IIC_IntRotateD,
+ [(set i64:$RA, (PPCsra i64:$RST, i32:$RB))]>, isPPC64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
-defm CNTLZW8 : XForm_11r<31, 26, (outs g8rc:$rA), (ins g8rc:$rS),
- "cntlzw", "$rA, $rS", IIC_IntGeneral, []>,
+defm CNTLZW8 : XForm_11r<31, 26, (outs g8rc:$RA), (ins g8rc:$RST),
+ "cntlzw", "$RA, $RST", IIC_IntGeneral, []>,
ZExt32To64, SExt32To64;
-defm CNTTZW8 : XForm_11r<31, 538, (outs g8rc:$rA), (ins g8rc:$rS),
- "cnttzw", "$rA, $rS", IIC_IntGeneral, []>,
+defm CNTTZW8 : XForm_11r<31, 538, (outs g8rc:$RA), (ins g8rc:$RST),
+ "cnttzw", "$RA, $RST", IIC_IntGeneral, []>,
Requires<[IsISA3_0]>, ZExt32To64, SExt32To64;
-defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$rA), (ins g8rc:$rS),
- "extsb", "$rA, $rS", IIC_IntSimple,
- [(set i64:$rA, (sext_inreg i64:$rS, i8))]>, SExt32To64;
-defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
- "extsh", "$rA, $rS", IIC_IntSimple,
- [(set i64:$rA, (sext_inreg i64:$rS, i16))]>, SExt32To64;
-
-defm SLW8 : XForm_6r<31, 24, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "slw", "$rA, $rS, $rB", IIC_IntGeneral, []>, ZExt32To64;
-defm SRW8 : XForm_6r<31, 536, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "srw", "$rA, $rS, $rB", IIC_IntGeneral, []>, ZExt32To64;
+defm EXTSB8 : XForm_11r<31, 954, (outs g8rc:$RA), (ins g8rc:$RST),
+ "extsb", "$RA, $RST", IIC_IntSimple,
+ [(set i64:$RA, (sext_inreg i64:$RST, i8))]>, SExt32To64;
+defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$RA), (ins g8rc:$RST),
+ "extsh", "$RA, $RST", IIC_IntSimple,
+ [(set i64:$RA, (sext_inreg i64:$RST, i16))]>, SExt32To64;
+
+defm SLW8 : XForm_6r<31, 24, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "slw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;
+defm SRW8 : XForm_6r<31, 536, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "srw", "$RA, $RST, $RB", IIC_IntGeneral, []>, ZExt32To64;
} // Interpretation64Bit
// For fast-isel:
let isCodeGenOnly = 1 in {
-def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS),
- "extsb $rA, $rS", IIC_IntSimple, []>, isPPC64,
+def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$RA), (ins gprc:$RST),
+ "extsb $RA, $RST", IIC_IntSimple, []>, isPPC64,
SExt32To64;
-def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS),
- "extsh $rA, $rS", IIC_IntSimple, []>, isPPC64,
+def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$RA), (ins gprc:$RST),
+ "extsh $RA, $RST", IIC_IntSimple, []>, isPPC64,
SExt32To64;
} // isCodeGenOnly for fast-isel
-defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
- "extsw", "$rA, $rS", IIC_IntSimple,
- [(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64,
+defm EXTSW : XForm_11r<31, 986, (outs g8rc:$RA), (ins g8rc:$RST),
+ "extsw", "$RA, $RST", IIC_IntSimple,
+ [(set i64:$RA, (sext_inreg i64:$RST, i32))]>, isPPC64,
SExt32To64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
-defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$rA), (ins gprc:$rS),
- "extsw", "$rA, $rS", IIC_IntSimple,
- [(set i64:$rA, (sext i32:$rS))]>, isPPC64,
+defm EXTSW_32_64 : XForm_11r<31, 986, (outs g8rc:$RA), (ins gprc:$RST),
+ "extsw", "$RA, $RST", IIC_IntSimple,
+ [(set i64:$RA, (sext i32:$RST))]>, isPPC64,
SExt32To64;
let isCodeGenOnly = 1 in
-def EXTSW_32 : XForm_11<31, 986, (outs gprc:$rA), (ins gprc:$rS),
- "extsw $rA, $rS", IIC_IntSimple,
+def EXTSW_32 : XForm_11<31, 986, (outs gprc:$RA), (ins gprc:$RST),
+ "extsw $RA, $RST", IIC_IntSimple,
[]>, isPPC64;
-defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
- "sradi", "$rA, $rS, $SH", IIC_IntRotateDI,
- [(set i64:$rA, (sra i64:$rS, (i32 imm:$SH)))]>, isPPC64;
+defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$RA), (ins g8rc:$RS, u6imm:$SH),
+ "sradi", "$RA, $RS, $SH", IIC_IntRotateDI,
+ [(set i64:$RA, (sra i64:$RS, (i32 imm:$SH)))]>, isPPC64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
-defm EXTSWSLI_32_64 : XSForm_1r<31, 445, (outs g8rc:$rA),
- (ins gprc:$rS, u6imm:$SH),
- "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
- [(set i64:$rA,
- (PPCextswsli i32:$rS, (i32 imm:$SH)))]>,
+defm EXTSWSLI_32_64 : XSForm_1r<31, 445, (outs g8rc:$RA),
+ (ins gprc:$RS, u6imm:$SH),
+ "extswsli", "$RA, $RS, $SH", IIC_IntRotateDI,
+ [(set i64:$RA,
+ (PPCextswsli i32:$RS, (i32 imm:$SH)))]>,
isPPC64, Requires<[IsISA3_0]>;
-defm EXTSWSLI : XSForm_1rc<31, 445, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
- "extswsli", "$rA, $rS, $SH", IIC_IntRotateDI,
+defm EXTSWSLI : XSForm_1rc<31, 445, (outs g8rc:$RA), (ins g8rc:$RS, u6imm:$SH),
+ "extswsli", "$RA, $RS, $SH", IIC_IntRotateDI,
[]>, isPPC64, Requires<[IsISA3_0]>;
// For fast-isel:
let isCodeGenOnly = 1, Defs = [CARRY] in
-def SRADI_32 : XSForm_1<31, 413, (outs gprc:$rA), (ins gprc:$rS, u6imm:$SH),
- "sradi $rA, $rS, $SH", IIC_IntRotateDI, []>, isPPC64;
+def SRADI_32 : XSForm_1<31, 413, (outs gprc:$RA), (ins gprc:$RS, u6imm:$SH),
+ "sradi $RA, $RS, $SH", IIC_IntRotateDI, []>, isPPC64;
-defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
- "cntlzd", "$rA, $rS", IIC_IntGeneral,
- [(set i64:$rA, (ctlz i64:$rS))]>,
+defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$RA), (ins g8rc:$RST),
+ "cntlzd", "$RA, $RST", IIC_IntGeneral,
+ [(set i64:$RA, (ctlz i64:$RST))]>,
ZExt32To64, SExt32To64;
-defm CNTTZD : XForm_11r<31, 570, (outs g8rc:$rA), (ins g8rc:$rS),
- "cnttzd", "$rA, $rS", IIC_IntGeneral,
- [(set i64:$rA, (cttz i64:$rS))]>, Requires<[IsISA3_0]>,
+defm CNTTZD : XForm_11r<31, 570, (outs g8rc:$RA), (ins g8rc:$RST),
+ "cnttzd", "$RA, $RST", IIC_IntGeneral,
+ [(set i64:$RA, (cttz i64:$RST))]>, Requires<[IsISA3_0]>,
ZExt32To64, SExt32To64;
-def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
- "popcntd $rA, $rS", IIC_IntGeneral,
- [(set i64:$rA, (ctpop i64:$rS))]>,
+def POPCNTD : XForm_11<31, 506, (outs g8rc:$RA), (ins g8rc:$RST),
+ "popcntd $RA, $RST", IIC_IntGeneral,
+ [(set i64:$RA, (ctpop i64:$RST))]>,
ZExt32To64, SExt32To64;
-def BPERMD : XForm_6<31, 252, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "bpermd $rA, $rS, $rB", IIC_IntGeneral,
- [(set i64:$rA, (int_ppc_bpermd g8rc:$rS, g8rc:$rB))]>,
+def BPERMD : XForm_6<31, 252, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "bpermd $RA, $RST, $RB", IIC_IntGeneral,
+ [(set i64:$RA, (int_ppc_bpermd g8rc:$RST, g8rc:$RB))]>,
isPPC64, Requires<[HasBPERMD]>;
let isCodeGenOnly = 1, isCommutable = 1 in
-def CMPB8 : XForm_6<31, 508, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "cmpb $rA, $rS, $rB", IIC_IntGeneral,
- [(set i64:$rA, (PPCcmpb i64:$rS, i64:$rB))]>;
+def CMPB8 : XForm_6<31, 508, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "cmpb $RA, $RST, $RB", IIC_IntGeneral,
+ [(set i64:$RA, (PPCcmpb i64:$RST, i64:$RB))]>;
// popcntw also does a population count on the high 32 bits (storing the
// results in the high 32-bits of the output). We'll ignore that here (which is
// safe because we never separately use the high part of the 64-bit registers).
-def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
- "popcntw $rA, $rS", IIC_IntGeneral,
- [(set i32:$rA, (ctpop i32:$rS))]>;
+def POPCNTW : XForm_11<31, 378, (outs gprc:$RA), (ins gprc:$RST),
+ "popcntw $RA, $RST", IIC_IntGeneral,
+ [(set i32:$RA, (ctpop i32:$RST))]>;
-let isCodeGenOnly = 1 in
-def POPCNTB8 : XForm_11<31, 122, (outs g8rc:$rA), (ins g8rc:$rS),
- "popcntb $rA, $rS", IIC_IntGeneral,
- [(set i64:$rA, (int_ppc_popcntb i64:$rS))]>;
-
-defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divd", "$rT, $rA, $rB", IIC_IntDivD,
- [(set i64:$rT, (sdiv i64:$rA, i64:$rB))]>, isPPC64;
-defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divdu", "$rT, $rA, $rB", IIC_IntDivD,
- [(set i64:$rT, (udiv i64:$rA, i64:$rB))]>, isPPC64;
-defm DIVDE : XOForm_1rcr<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divde", "$rT, $rA, $rB", IIC_IntDivD,
- [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>,
+let isCodeGenOnly = 1 in {
+def POPCNTB8 : XForm_11<31, 122, (outs g8rc:$RA), (ins g8rc:$RST),
+ "popcntb $RA, $RST", IIC_IntGeneral,
+ [(set i64:$RA, (int_ppc_popcntb i64:$RST))]>;
+
+def CDTBCD8 : XForm_11<31, 282, (outs g8rc:$RA), (ins g8rc:$RST),
+ "cdtbcd $RA, $RST", IIC_IntGeneral, []>;
+def CBCDTD8 : XForm_11<31, 314, (outs g8rc:$RA), (ins g8rc:$RST),
+ "cbcdtd $RA, $RST", IIC_IntGeneral, []>;
+
+def ADDG6S8 : XOForm_1<31, 74, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "addg6s $RT, $RA, $RB", IIC_IntGeneral, []>;
+}
+
+defm DIVD : XOForm_1rcr<31, 489, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "divd", "$RT, $RA, $RB", IIC_IntDivD,
+ [(set i64:$RT, (sdiv i64:$RA, i64:$RB))]>, isPPC64;
+defm DIVDU : XOForm_1rcr<31, 457, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "divdu", "$RT, $RA, $RB", IIC_IntDivD,
+ [(set i64:$RT, (udiv i64:$RA, i64:$RB))]>, isPPC64;
+defm DIVDE : XOForm_1rcr<31, 425, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "divde", "$RT, $RA, $RB", IIC_IntDivD,
+ [(set i64:$RT, (int_ppc_divde g8rc:$RA, g8rc:$RB))]>,
isPPC64, Requires<[HasExtDiv]>;
let Predicates = [IsISA3_0] in {
-def MADDHD : VAForm_1a<48, (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
+def MADDHD : VAForm_1a<48, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
"maddhd $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
def MADDHDU : VAForm_1a<49,
- (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
+ (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
"maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
-def MADDLD : VAForm_1a<51, (outs gprc :$RT), (ins gprc:$RA, gprc:$RB, gprc:$RC),
+def MADDLD : VAForm_1a<51, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB, gprc:$RC),
"maddld $RT, $RA, $RB, $RC", IIC_IntMulHD,
[(set i32:$RT, (add_without_simm16 (mul_without_simm16 i32:$RA, i32:$RB), i32:$RC))]>,
isPPC64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def MADDLD8 : VAForm_1a<51,
- (outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
+ (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
"maddld $RT, $RA, $RB, $RC", IIC_IntMulHD,
[(set i64:$RT, (add_without_simm16 (mul_without_simm16 i64:$RA, i64:$RB), i64:$RC))]>,
isPPC64;
@@ -1030,26 +1081,26 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
}
def ADDPCIS : DXForm<19, 2, (outs g8rc:$RT), (ins i32imm:$D),
"addpcis $RT, $D", IIC_BrB, []>, isPPC64;
-def MODSD : XForm_8<31, 777, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "modsd $rT, $rA, $rB", IIC_IntDivW,
- [(set i64:$rT, (srem i64:$rA, i64:$rB))]>;
-def MODUD : XForm_8<31, 265, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "modud $rT, $rA, $rB", IIC_IntDivW,
- [(set i64:$rT, (urem i64:$rA, i64:$rB))]>;
+def MODSD : XForm_8<31, 777, (outs g8rc:$RST), (ins g8rc:$RA, g8rc:$RB),
+ "modsd $RST, $RA, $RB", IIC_IntDivW,
+ [(set i64:$RST, (srem i64:$RA, i64:$RB))]>;
+def MODUD : XForm_8<31, 265, (outs g8rc:$RST), (ins g8rc:$RA, g8rc:$RB),
+ "modud $RST, $RA, $RB", IIC_IntDivW,
+ [(set i64:$RST, (urem i64:$RA, i64:$RB))]>;
}
-defm DIVDEU : XOForm_1rcr<31, 393, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "divdeu", "$rT, $rA, $rB", IIC_IntDivD,
- [(set i64:$rT, (int_ppc_divdeu g8rc:$rA, g8rc:$rB))]>,
+defm DIVDEU : XOForm_1rcr<31, 393, 0, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "divdeu", "$RT, $RA, $RB", IIC_IntDivD,
+ [(set i64:$RT, (int_ppc_divdeu g8rc:$RA, g8rc:$RB))]>,
isPPC64, Requires<[HasExtDiv]>;
let isCommutable = 1 in
-defm MULLD : XOForm_1rx<31, 233, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
- "mulld", "$rT, $rA, $rB", IIC_IntMulHD,
- [(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
+defm MULLD : XOForm_1rx<31, 233, (outs g8rc:$RT), (ins g8rc:$RA, g8rc:$RB),
+ "mulld", "$RT, $RA, $RB", IIC_IntMulHD,
+ [(set i64:$RT, (mul i64:$RA, i64:$RB))]>, isPPC64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
-def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
- "mulli $rD, $rA, $imm", IIC_IntMulLI,
- [(set i64:$rD, (mul i64:$rA, imm64SExt16:$imm))]>;
+def MULLI8 : DForm_2<7, (outs g8rc:$RST), (ins g8rc:$RA, s16imm64:$D),
+ "mulli $RST, $RA, $D", IIC_IntMulLI,
+ [(set i64:$RST, (mul i64:$RA, imm64SExt16:$D))]>;
}
let hasSideEffects = 1 in {
@@ -1058,76 +1109,76 @@ def DARN : XForm_45<31, 755, (outs g8rc:$RT), (ins u2imm:$L),
}
let hasSideEffects = 0 in {
-defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$rA),
- (ins g8rc:$rSi, g8rc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldimi", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
- []>, isPPC64, RegConstraint<"$rSi = $rA">,
- NoEncode<"$rSi">;
+defm RLDIMI : MDForm_1r<30, 3, (outs g8rc:$RA),
+ (ins g8rc:$RAi, g8rc:$RS, u6imm:$SH, u6imm:$MBE),
+ "rldimi", "$RA, $RS, $SH, $MBE", IIC_IntRotateDI,
+ []>, isPPC64, RegConstraint<"$RAi = $RA">,
+ NoEncode<"$RAi">;
// Rotate instructions.
defm RLDCL : MDSForm_1r<30, 8,
- (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
- "rldcl", "$rA, $rS, $rB, $MBE", IIC_IntRotateD,
+ (outs g8rc:$RA), (ins g8rc:$RS, gprc:$RB, u6imm:$MBE),
+ "rldcl", "$RA, $RS, $RB, $MBE", IIC_IntRotateD,
[]>, isPPC64;
defm RLDCR : MDSForm_1r<30, 9,
- (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
- "rldcr", "$rA, $rS, $rB, $MBE", IIC_IntRotateD,
+ (outs g8rc:$RA), (ins g8rc:$RS, gprc:$RB, u6imm:$MBE),
+ "rldcr", "$RA, $RS, $RB, $MBE", IIC_IntRotateD,
[]>, isPPC64;
defm RLDICL : MDForm_1r<30, 0,
- (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ (outs g8rc:$RA), (ins g8rc:$RS, u6imm:$SH, u6imm:$MBE),
+ "rldicl", "$RA, $RS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
// For fast-isel:
let isCodeGenOnly = 1 in
def RLDICL_32_64 : MDForm_1<30, 0,
- (outs g8rc:$rA),
- (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicl $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ (outs g8rc:$RA),
+ (ins gprc:$RS, u6imm:$SH, u6imm:$MBE),
+ "rldicl $RA, $RS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
// End fast-isel.
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
defm RLDICL_32 : MDForm_1r<30, 0,
- (outs gprc:$rA),
- (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicl", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ (outs gprc:$RA),
+ (ins gprc:$RS, u6imm:$SH, u6imm:$MBE),
+ "rldicl", "$RA, $RS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
defm RLDICR : MDForm_1r<30, 1,
- (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicr", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ (outs g8rc:$RA), (ins g8rc:$RS, u6imm:$SH, u6imm:$MBE),
+ "rldicr", "$RA, $RS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
let isCodeGenOnly = 1 in
def RLDICR_32 : MDForm_1<30, 1,
- (outs gprc:$rA), (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldicr $rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ (outs gprc:$RA), (ins gprc:$RS, u6imm:$SH, u6imm:$MBE),
+ "rldicr $RA, $RS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
defm RLDIC : MDForm_1r<30, 2,
- (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
- "rldic", "$rA, $rS, $SH, $MBE", IIC_IntRotateDI,
+ (outs g8rc:$RA), (ins g8rc:$RS, u6imm:$SH, u6imm:$MBE),
+ "rldic", "$RA, $RS, $SH, $MBE", IIC_IntRotateDI,
[]>, isPPC64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
-defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA),
- (ins g8rc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
- "rlwinm", "$rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
+defm RLWINM8 : MForm_2r<21, (outs g8rc:$RA),
+ (ins g8rc:$RS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm", "$RA, $RS, $SH, $MB, $ME", IIC_IntGeneral,
[]>;
-defm RLWNM8 : MForm_2r<23, (outs g8rc:$rA),
- (ins g8rc:$rS, g8rc:$rB, u5imm:$MB, u5imm:$ME),
- "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral,
+defm RLWNM8 : MForm_1r<23, (outs g8rc:$RA),
+ (ins g8rc:$RS, g8rc:$RB, u5imm:$MB, u5imm:$ME),
+ "rlwnm", "$RA, $RS, $RB, $MB, $ME", IIC_IntGeneral,
[]>;
// RLWIMI can be commuted if the rotate amount is zero.
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
-defm RLWIMI8 : MForm_2r<20, (outs g8rc:$rA),
- (ins g8rc:$rSi, g8rc:$rS, u5imm:$SH, u5imm:$MB,
- u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME",
+defm RLWIMI8 : MForm_2r<20, (outs g8rc:$RA),
+ (ins g8rc:$RAi, g8rc:$RS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi", "$RA, $RS, $SH, $MB, $ME",
IIC_IntRotate, []>, PPC970_DGroup_Cracked,
- RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">;
+ RegConstraint<"$RAi = $RA">, NoEncode<"$RAi">;
let isSelect = 1 in
def ISEL8 : AForm_4<31, 15,
- (outs g8rc:$rT), (ins g8rc_nox0:$rA, g8rc:$rB, crbitrc:$cond),
- "isel $rT, $rA, $rB, $cond", IIC_IntISEL,
+ (outs g8rc:$RT), (ins g8rc_nox0:$RA, g8rc:$RB, crbitrc:$COND),
+ "isel $RT, $RA, $RB, $COND", IIC_IntISEL,
[]>;
} // Interpretation64Bit
} // hasSideEffects = 0
@@ -1245,53 +1296,53 @@ def : InstAlias<"mtspefscr $Rx", (MTSPR8 512, g8rc:$Rx)>;
// Sign extending loads.
let PPC970_Unit = 2 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
-def LHA8: DForm_1<42, (outs g8rc:$rD), (ins memri:$src),
- "lha $rD, $src", IIC_LdStLHA,
- [(set i64:$rD, (sextloadi16 DForm:$src))]>,
+def LHA8: DForm_1<42, (outs g8rc:$RST), (ins (memri $D, $RA):$addr),
+ "lha $RST, $addr", IIC_LdStLHA,
+ [(set i64:$RST, (sextloadi16 DForm:$addr))]>,
PPC970_DGroup_Cracked, SExt32To64;
-def LWA : DSForm_1<58, 2, (outs g8rc:$rD), (ins memrix:$src),
- "lwa $rD, $src", IIC_LdStLWA,
- [(set i64:$rD,
- (sextloadi32 DSForm:$src))]>, isPPC64,
+def LWA : DSForm_1<58, 2, (outs g8rc:$RST), (ins (memrix $D, $RA):$addr),
+ "lwa $RST, $addr", IIC_LdStLWA,
+ [(set i64:$RST,
+ (sextloadi32 DSForm:$addr))]>, isPPC64,
PPC970_DGroup_Cracked, SExt32To64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
-def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$rD), (ins memrr:$src),
- "lhax $rD, $src", IIC_LdStLHA,
- [(set i64:$rD, (sextloadi16 XForm:$src))]>,
+def LHAX8: XForm_1_memOp<31, 343, (outs g8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lhax $RST, $addr", IIC_LdStLHA,
+ [(set i64:$RST, (sextloadi16 XForm:$addr))]>,
PPC970_DGroup_Cracked, SExt32To64;
-def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$rD), (ins memrr:$src),
- "lwax $rD, $src", IIC_LdStLHA,
- [(set i64:$rD, (sextloadi32 XForm:$src))]>, isPPC64,
+def LWAX : XForm_1_memOp<31, 341, (outs g8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lwax $RST, $addr", IIC_LdStLHA,
+ [(set i64:$RST, (sextloadi32 XForm:$addr))]>, isPPC64,
PPC970_DGroup_Cracked, SExt32To64;
// For fast-isel:
let isCodeGenOnly = 1, mayLoad = 1, hasSideEffects = 0 in {
-def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src),
- "lwa $rD, $src", IIC_LdStLWA, []>, isPPC64,
+def LWA_32 : DSForm_1<58, 2, (outs gprc:$RST), (ins (memrix $D, $RA):$addr),
+ "lwa $RST, $addr", IIC_LdStLWA, []>, isPPC64,
PPC970_DGroup_Cracked, SExt32To64;
-def LWAX_32 : XForm_1_memOp<31, 341, (outs gprc:$rD), (ins memrr:$src),
- "lwax $rD, $src", IIC_LdStLHA, []>, isPPC64,
+def LWAX_32 : XForm_1_memOp<31, 341, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lwax $RST, $addr", IIC_LdStLHA, []>, isPPC64,
PPC970_DGroup_Cracked, SExt32To64;
} // end fast-isel isCodeGenOnly
// Update forms.
let mayLoad = 1, hasSideEffects = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
-def LHAU8 : DForm_1<43, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
- (ins memri:$addr),
- "lhau $rD, $addr", IIC_LdStLHAU,
+def LHAU8 : DForm_1<43, (outs g8rc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memri $D, $RA):$addr),
+ "lhau $RST, $addr", IIC_LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
// NO LWAU!
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
-def LHAUX8 : XForm_1_memOp<31, 375, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
- (ins memrr:$addr),
- "lhaux $rD, $addr", IIC_LdStLHAUX,
+def LHAUX8 : XForm_1_memOp<31, 375, (outs g8rc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memrr $RA, $RB):$addr),
+ "lhaux $RST, $addr", IIC_LdStLHAUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWAUX : XForm_1_memOp<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
- (ins memrr:$addr),
- "lwaux $rD, $addr", IIC_LdStLHAUX,
+def LWAUX : XForm_1_memOp<31, 373, (outs g8rc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memrr $RA, $RB):$addr),
+ "lwaux $RST, $addr", IIC_LdStLHAUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
@@ -1300,64 +1351,64 @@ def LWAUX : XForm_1_memOp<31, 373, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
// Zero extending loads.
let PPC970_Unit = 2 in {
-def LBZ8 : DForm_1<34, (outs g8rc:$rD), (ins memri:$src),
- "lbz $rD, $src", IIC_LdStLoad,
- [(set i64:$rD, (zextloadi8 DForm:$src))]>, ZExt32To64,
+def LBZ8 : DForm_1<34, (outs g8rc:$RST), (ins (memri $D, $RA):$addr),
+ "lbz $RST, $addr", IIC_LdStLoad,
+ [(set i64:$RST, (zextloadi8 DForm:$addr))]>, ZExt32To64,
SExt32To64;
-def LHZ8 : DForm_1<40, (outs g8rc:$rD), (ins memri:$src),
- "lhz $rD, $src", IIC_LdStLoad,
- [(set i64:$rD, (zextloadi16 DForm:$src))]>, ZExt32To64,
+def LHZ8 : DForm_1<40, (outs g8rc:$RST), (ins (memri $D, $RA):$addr),
+ "lhz $RST, $addr", IIC_LdStLoad,
+ [(set i64:$RST, (zextloadi16 DForm:$addr))]>, ZExt32To64,
SExt32To64;
-def LWZ8 : DForm_1<32, (outs g8rc:$rD), (ins memri:$src),
- "lwz $rD, $src", IIC_LdStLoad,
- [(set i64:$rD, (zextloadi32 DForm:$src))]>, isPPC64,
+def LWZ8 : DForm_1<32, (outs g8rc:$RST), (ins (memri $D, $RA):$addr),
+ "lwz $RST, $addr", IIC_LdStLoad,
+ [(set i64:$RST, (zextloadi32 DForm:$addr))]>, isPPC64,
ZExt32To64;
-def LBZX8 : XForm_1_memOp<31, 87, (outs g8rc:$rD), (ins memrr:$src),
- "lbzx $rD, $src", IIC_LdStLoad,
- [(set i64:$rD, (zextloadi8 XForm:$src))]>, ZExt32To64,
+def LBZX8 : XForm_1_memOp<31, 87, (outs g8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lbzx $RST, $addr", IIC_LdStLoad,
+ [(set i64:$RST, (zextloadi8 XForm:$addr))]>, ZExt32To64,
SExt32To64;
-def LHZX8 : XForm_1_memOp<31, 279, (outs g8rc:$rD), (ins memrr:$src),
- "lhzx $rD, $src", IIC_LdStLoad,
- [(set i64:$rD, (zextloadi16 XForm:$src))]>,
+def LHZX8 : XForm_1_memOp<31, 279, (outs g8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lhzx $RST, $addr", IIC_LdStLoad,
+ [(set i64:$RST, (zextloadi16 XForm:$addr))]>,
ZExt32To64, SExt32To64;
-def LWZX8 : XForm_1_memOp<31, 23, (outs g8rc:$rD), (ins memrr:$src),
- "lwzx $rD, $src", IIC_LdStLoad,
- [(set i64:$rD, (zextloadi32 XForm:$src))]>,
+def LWZX8 : XForm_1_memOp<31, 23, (outs g8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lwzx $RST, $addr", IIC_LdStLoad,
+ [(set i64:$RST, (zextloadi32 XForm:$addr))]>,
ZExt32To64;
// Update forms.
let mayLoad = 1, hasSideEffects = 0 in {
-def LBZU8 : DForm_1<35, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
- (ins memri:$addr),
- "lbzu $rD, $addr", IIC_LdStLoadUpd,
+def LBZU8 : DForm_1<35, (outs g8rc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memri $D, $RA):$addr),
+ "lbzu $RST, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU8 : DForm_1<41, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
- (ins memri:$addr),
- "lhzu $rD, $addr", IIC_LdStLoadUpd,
+def LHZU8 : DForm_1<41, (outs g8rc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memri $D, $RA):$addr),
+ "lhzu $RST, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU8 : DForm_1<33, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
- (ins memri:$addr),
- "lwzu $rD, $addr", IIC_LdStLoadUpd,
+def LWZU8 : DForm_1<33, (outs g8rc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memri $D, $RA):$addr),
+ "lwzu $RST, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LBZUX8 : XForm_1_memOp<31, 119, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
- (ins memrr:$addr),
- "lbzux $rD, $addr", IIC_LdStLoadUpdX,
+def LBZUX8 : XForm_1_memOp<31, 119, (outs g8rc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memrr $RA, $RB):$addr),
+ "lbzux $RST, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX8 : XForm_1_memOp<31, 311, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
- (ins memrr:$addr),
- "lhzux $rD, $addr", IIC_LdStLoadUpdX,
+def LHZUX8 : XForm_1_memOp<31, 311, (outs g8rc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memrr $RA, $RB):$addr),
+ "lhzux $RST, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
- (ins memrr:$addr),
- "lwzux $rD, $addr", IIC_LdStLoadUpdX,
+def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memrr $RA, $RB):$addr),
+ "lwzux $RST, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
@@ -1367,9 +1418,9 @@ def LWZUX8 : XForm_1_memOp<31, 55, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
// Full 8-byte loads.
let PPC970_Unit = 2 in {
-def LD : DSForm_1<58, 0, (outs g8rc:$rD), (ins memrix:$src),
- "ld $rD, $src", IIC_LdStLD,
- [(set i64:$rD, (load DSForm:$src))]>, isPPC64;
+def LD : DSForm_1<58, 0, (outs g8rc:$RST), (ins (memrix $D, $RA):$addr),
+ "ld $RST, $addr", IIC_LdStLD,
+ [(set i64:$RST, (load DSForm:$addr))]>, isPPC64;
// The following four definitions are selected for small code model only.
// Otherwise, we need to create two instructions to form a 32-bit offset,
// so we have a custom matcher for TOC_ENTRY in PPCDAGToDAGIsel::Select().
@@ -1390,33 +1441,33 @@ def LDtocBA: PPCEmitTimePseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
[(set i64:$rD,
(PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;
-def LDX : XForm_1_memOp<31, 21, (outs g8rc:$rD), (ins memrr:$src),
- "ldx $rD, $src", IIC_LdStLD,
- [(set i64:$rD, (load XForm:$src))]>, isPPC64;
+def LDX : XForm_1_memOp<31, 21, (outs g8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "ldx $RST, $addr", IIC_LdStLD,
+ [(set i64:$RST, (load XForm:$addr))]>, isPPC64;
let Predicates = [IsISA2_06] in {
-def LDBRX : XForm_1_memOp<31, 532, (outs g8rc:$rD), (ins memrr:$src),
- "ldbrx $rD, $src", IIC_LdStLoad,
- [(set i64:$rD, (PPClbrx ForceXForm:$src, i64))]>, isPPC64;
+def LDBRX : XForm_1_memOp<31, 532, (outs g8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "ldbrx $RST, $addr", IIC_LdStLoad,
+ [(set i64:$RST, (PPClbrx ForceXForm:$addr, i64))]>, isPPC64;
}
let mayLoad = 1, hasSideEffects = 0, isCodeGenOnly = 1 in {
-def LHBRX8 : XForm_1_memOp<31, 790, (outs g8rc:$rD), (ins memrr:$src),
- "lhbrx $rD, $src", IIC_LdStLoad, []>, ZExt32To64;
-def LWBRX8 : XForm_1_memOp<31, 534, (outs g8rc:$rD), (ins memrr:$src),
- "lwbrx $rD, $src", IIC_LdStLoad, []>, ZExt32To64;
+def LHBRX8 : XForm_1_memOp<31, 790, (outs g8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lhbrx $RST, $addr", IIC_LdStLoad, []>, ZExt32To64;
+def LWBRX8 : XForm_1_memOp<31, 534, (outs g8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lwbrx $RST, $addr", IIC_LdStLoad, []>, ZExt32To64;
}
let mayLoad = 1, hasSideEffects = 0 in {
-def LDU : DSForm_1<58, 1, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
- (ins memrix:$addr),
- "ldu $rD, $addr", IIC_LdStLDU,
+def LDU : DSForm_1<58, 1, (outs g8rc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memrix $D, $RA):$addr),
+ "ldu $RST, $addr", IIC_LdStLDU,
[]>, RegConstraint<"$addr.reg = $ea_result">, isPPC64,
NoEncode<"$ea_result">;
-def LDUX : XForm_1_memOp<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
- (ins memrr:$addr),
- "ldux $rD, $addr", IIC_LdStLDUX,
+def LDUX : XForm_1_memOp<31, 53, (outs g8rc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memrr $RA, $RB):$addr),
+ "ldux $RST, $addr", IIC_LdStLDUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">, isPPC64;
}
@@ -1427,8 +1478,8 @@ let mayLoad = 1, hasNoSchedulingInfo = 1 in {
// TODO: Add scheduling info.
def LQ : DQForm_RTp5_RA17_MEM<56, 0,
(outs g8prc:$RTp),
- (ins memrix16:$src),
- "lq $RTp, $src", IIC_LdStLQ,
+ (ins (memrix16 $DQ, $RA):$addr),
+ "lq $RTp, $addr", IIC_LdStLQ,
[]>,
RegConstraint<"@earlyclobber $RTp">,
isPPC64;
@@ -1604,50 +1655,50 @@ def PADDIdtprel : PPCEmitTimePseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm
let PPC970_Unit = 2 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
// Truncating stores.
-def STB8 : DForm_1<38, (outs), (ins g8rc:$rS, memri:$src),
- "stb $rS, $src", IIC_LdStStore,
- [(truncstorei8 i64:$rS, DForm:$src)]>;
-def STH8 : DForm_1<44, (outs), (ins g8rc:$rS, memri:$src),
- "sth $rS, $src", IIC_LdStStore,
- [(truncstorei16 i64:$rS, DForm:$src)]>;
-def STW8 : DForm_1<36, (outs), (ins g8rc:$rS, memri:$src),
- "stw $rS, $src", IIC_LdStStore,
- [(truncstorei32 i64:$rS, DForm:$src)]>;
-def STBX8 : XForm_8_memOp<31, 215, (outs), (ins g8rc:$rS, memrr:$dst),
- "stbx $rS, $dst", IIC_LdStStore,
- [(truncstorei8 i64:$rS, XForm:$dst)]>,
+def STB8 : DForm_1<38, (outs), (ins g8rc:$RST, (memri $D, $RA):$addr),
+ "stb $RST, $addr", IIC_LdStStore,
+ [(truncstorei8 i64:$RST, DForm:$addr)]>;
+def STH8 : DForm_1<44, (outs), (ins g8rc:$RST, (memri $D, $RA):$addr),
+ "sth $RST, $addr", IIC_LdStStore,
+ [(truncstorei16 i64:$RST, DForm:$addr)]>;
+def STW8 : DForm_1<36, (outs), (ins g8rc:$RST, (memri $D, $RA):$addr),
+ "stw $RST, $addr", IIC_LdStStore,
+ [(truncstorei32 i64:$RST, DForm:$addr)]>;
+def STBX8 : XForm_8_memOp<31, 215, (outs), (ins g8rc:$RST, (memrr $RA, $RB):$addr),
+ "stbx $RST, $addr", IIC_LdStStore,
+ [(truncstorei8 i64:$RST, XForm:$addr)]>,
PPC970_DGroup_Cracked;
-def STHX8 : XForm_8_memOp<31, 407, (outs), (ins g8rc:$rS, memrr:$dst),
- "sthx $rS, $dst", IIC_LdStStore,
- [(truncstorei16 i64:$rS, XForm:$dst)]>,
+def STHX8 : XForm_8_memOp<31, 407, (outs), (ins g8rc:$RST, (memrr $RA, $RB):$addr),
+ "sthx $RST, $addr", IIC_LdStStore,
+ [(truncstorei16 i64:$RST, XForm:$addr)]>,
PPC970_DGroup_Cracked;
-def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$rS, memrr:$dst),
- "stwx $rS, $dst", IIC_LdStStore,
- [(truncstorei32 i64:$rS, XForm:$dst)]>,
+def STWX8 : XForm_8_memOp<31, 151, (outs), (ins g8rc:$RST, (memrr $RA, $RB):$addr),
+ "stwx $RST, $addr", IIC_LdStStore,
+ [(truncstorei32 i64:$RST, XForm:$addr)]>,
PPC970_DGroup_Cracked;
} // Interpretation64Bit
// Normal 8-byte stores.
-def STD : DSForm_1<62, 0, (outs), (ins g8rc:$rS, memrix:$dst),
- "std $rS, $dst", IIC_LdStSTD,
- [(store i64:$rS, DSForm:$dst)]>, isPPC64;
-def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdx $rS, $dst", IIC_LdStSTD,
- [(store i64:$rS, XForm:$dst)]>, isPPC64,
+def STD : DSForm_1<62, 0, (outs), (ins g8rc:$RST, (memrix $D, $RA):$addr),
+ "std $RST, $addr", IIC_LdStSTD,
+ [(store i64:$RST, DSForm:$addr)]>, isPPC64;
+def STDX : XForm_8_memOp<31, 149, (outs), (ins g8rc:$RST, (memrr $RA, $RB):$addr),
+ "stdx $RST, $addr", IIC_LdStSTD,
+ [(store i64:$RST, XForm:$addr)]>, isPPC64,
PPC970_DGroup_Cracked;
let Predicates = [IsISA2_06] in {
-def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$rS, memrr:$dst),
- "stdbrx $rS, $dst", IIC_LdStStore,
- [(PPCstbrx i64:$rS, ForceXForm:$dst, i64)]>, isPPC64,
+def STDBRX: XForm_8_memOp<31, 660, (outs), (ins g8rc:$RST, (memrr $RA, $RB):$addr),
+ "stdbrx $RST, $addr", IIC_LdStStore,
+ [(PPCstbrx i64:$RST, ForceXForm:$addr, i64)]>, isPPC64,
PPC970_DGroup_Cracked;
}
let mayStore = 1, hasNoSchedulingInfo = 1 in {
// Normal 16-byte stores.
// TODO: Add scheduling info.
-def STQ : DSForm_1<62, 2, (outs), (ins g8prc:$RSp, memrix:$dst),
- "stq $RSp, $dst", IIC_LdStSTQ,
+def STQ : DSForm_1<62, 2, (outs), (ins g8prc:$RST, (memrix $D, $RA):$addr),
+ "stq $RST, $addr", IIC_LdStSTQ,
[]>, isPPC64;
def STQX_PSEUDO : PPCCustomInserterPseudo<(outs),
@@ -1674,46 +1725,46 @@ def : Pat<(int_ppc_atomic_store_i128 i64:$lo, i64:$hi, ForceXForm:$dst),
// Stores with Update (pre-inc).
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
-def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "stbu $rS, $dst", IIC_LdStSTU, []>,
- RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "sthu $rS, $dst", IIC_LdStSTU, []>,
- RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$rS, memri:$dst),
- "stwu $rS, $dst", IIC_LdStSTU, []>,
- RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
+def STBU8 : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$RST, (memri $D, $RA):$addr),
+ "stbu $RST, $addr", IIC_LdStSTU, []>,
+ RegConstraint<"$addr.reg = $ea_res">, NoEncode<"$ea_res">;
+def STHU8 : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$RST, (memri $D, $RA):$addr),
+ "sthu $RST, $addr", IIC_LdStSTU, []>,
+ RegConstraint<"$addr.reg = $ea_res">, NoEncode<"$ea_res">;
+def STWU8 : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins g8rc:$RST, (memri $D, $RA):$addr),
+ "stwu $RST, $addr", IIC_LdStSTU, []>,
+ RegConstraint<"$addr.reg = $ea_res">, NoEncode<"$ea_res">;
def STBUX8: XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res),
- (ins g8rc:$rS, memrr:$dst),
- "stbux $rS, $dst", IIC_LdStSTUX, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
+ (ins g8rc:$RST, (memrr $RA, $RB):$addr),
+ "stbux $RST, $addr", IIC_LdStSTUX, []>,
+ RegConstraint<"$addr.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STHUX8: XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res),
- (ins g8rc:$rS, memrr:$dst),
- "sthux $rS, $dst", IIC_LdStSTUX, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
+ (ins g8rc:$RST, (memrr $RA, $RB):$addr),
+ "sthux $RST, $addr", IIC_LdStSTUX, []>,
+ RegConstraint<"$addr.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STWUX8: XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
- (ins g8rc:$rS, memrr:$dst),
- "stwux $rS, $dst", IIC_LdStSTUX, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
+ (ins g8rc:$RST, (memrr $RA, $RB):$addr),
+ "stwux $RST, $addr", IIC_LdStSTUX, []>,
+ RegConstraint<"$addr.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
} // Interpretation64Bit
def STDU : DSForm_1<62, 1, (outs ptr_rc_nor0:$ea_res),
- (ins g8rc:$rS, memrix:$dst),
- "stdu $rS, $dst", IIC_LdStSTU, []>,
- RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">,
+ (ins g8rc:$RST, (memrix $D, $RA):$addr),
+ "stdu $RST, $addr", IIC_LdStSTU, []>,
+ RegConstraint<"$addr.reg = $ea_res">, NoEncode<"$ea_res">,
isPPC64;
def STDUX : XForm_8_memOp<31, 181, (outs ptr_rc_nor0:$ea_res),
- (ins g8rc:$rS, memrr:$dst),
- "stdux $rS, $dst", IIC_LdStSTUX, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
+ (ins g8rc:$RST, (memrr $RA, $RB):$addr),
+ "stdux $RST, $addr", IIC_LdStSTUX, []>,
+ RegConstraint<"$addr.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked, isPPC64;
}
@@ -1747,45 +1798,45 @@ def : Pat<(pre_store i64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
let PPC970_Unit = 3, hasSideEffects = 0, mayRaiseFPException = 1,
Uses = [RM] in { // FPU Operations.
-defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
- "fcfid", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCany_fcfid f64:$frB))]>, isPPC64;
-defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctid", "$frD, $frB", IIC_FPGeneral,
+defm FCFID : XForm_26r<63, 846, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fcfid", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (PPCany_fcfid f64:$RB))]>, isPPC64;
+defm FCTID : XForm_26r<63, 814, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fctid", "$RST, $RB", IIC_FPGeneral,
[]>, isPPC64;
-defm FCTIDU : XForm_26r<63, 942, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctidu", "$frD, $frB", IIC_FPGeneral,
+defm FCTIDU : XForm_26r<63, 942, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fctidu", "$RST, $RB", IIC_FPGeneral,
[]>, isPPC64;
-defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctidz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCany_fctidz f64:$frB))]>, isPPC64;
-
-defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$frD), (ins f8rc:$frB),
- "fcfidu", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCany_fcfidu f64:$frB))]>, isPPC64;
-defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$frD), (ins f8rc:$frB),
- "fcfids", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (PPCany_fcfids f64:$frB))]>, isPPC64;
-defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$frD), (ins f8rc:$frB),
- "fcfidus", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (PPCany_fcfidus f64:$frB))]>, isPPC64;
-defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiduz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCany_fctiduz f64:$frB))]>, isPPC64;
-defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiwuz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCany_fctiwuz f64:$frB))]>, isPPC64;
+defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fctidz", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (PPCany_fctidz f64:$RB))]>, isPPC64;
+
+defm FCFIDU : XForm_26r<63, 974, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fcfidu", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (PPCany_fcfidu f64:$RB))]>, isPPC64;
+defm FCFIDS : XForm_26r<59, 846, (outs f4rc:$RST), (ins f8rc:$RB),
+ "fcfids", "$RST, $RB", IIC_FPGeneral,
+ [(set f32:$RST, (PPCany_fcfids f64:$RB))]>, isPPC64;
+defm FCFIDUS : XForm_26r<59, 974, (outs f4rc:$RST), (ins f8rc:$RB),
+ "fcfidus", "$RST, $RB", IIC_FPGeneral,
+ [(set f32:$RST, (PPCany_fcfidus f64:$RB))]>, isPPC64;
+defm FCTIDUZ : XForm_26r<63, 943, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fctiduz", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (PPCany_fctiduz f64:$RB))]>, isPPC64;
+defm FCTIWUZ : XForm_26r<63, 143, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fctiwuz", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (PPCany_fctiwuz f64:$RB))]>, isPPC64;
}
// These instructions store a hash computed from the value of the link register
// and the value of the stack pointer.
let mayStore = 1, Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def HASHST8 : XForm_XD6_RA5_RB5<31, 722, (outs),
- (ins g8rc:$RB, memrihash:$D_RA_XD),
- "hashst $RB, $D_RA_XD", IIC_IntGeneral, []>;
+ (ins g8rc:$RB, (memrihash $D, $RA):$addr),
+ "hashst $RB, $addr", IIC_IntGeneral, []>;
def HASHSTP8 : XForm_XD6_RA5_RB5<31, 658, (outs),
- (ins g8rc:$RB, memrihash:$D_RA_XD),
- "hashstp $RB, $D_RA_XD", IIC_IntGeneral, []>;
+ (ins g8rc:$RB, (memrihash $D, $RA):$addr),
+ "hashstp $RB, $addr", IIC_IntGeneral, []>;
}
// These instructions check a hash computed from the value of the link register
@@ -1795,18 +1846,18 @@ def HASHSTP8 : XForm_XD6_RA5_RB5<31, 658, (outs),
let mayLoad = 1, hasSideEffects = 1,
Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def HASHCHK8 : XForm_XD6_RA5_RB5<31, 754, (outs),
- (ins g8rc:$RB, memrihash:$D_RA_XD),
- "hashchk $RB, $D_RA_XD", IIC_IntGeneral, []>;
+ (ins g8rc:$RB, (memrihash $D, $RA):$addr),
+ "hashchk $RB, $addr", IIC_IntGeneral, []>;
def HASHCHKP8 : XForm_XD6_RA5_RB5<31, 690, (outs),
- (ins g8rc:$RB, memrihash:$D_RA_XD),
- "hashchkp $RB, $D_RA_XD", IIC_IntGeneral, []>;
+ (ins g8rc:$RB, (memrihash $D, $RA):$addr),
+ "hashchkp $RB, $addr", IIC_IntGeneral, []>;
}
let Interpretation64Bit = 1, isCodeGenOnly = 1, hasSideEffects = 1 in
-def ADDEX8 : Z23Form_RTAB5_CY2<31, 170, (outs g8rc:$rT),
- (ins g8rc:$rA, g8rc:$rB, u2imm:$CY),
- "addex $rT, $rA, $rB, $CY", IIC_IntGeneral,
- [(set i64:$rT, (int_ppc_addex i64:$rA, i64:$rB,
+def ADDEX8 : Z23Form_RTAB5_CY2<31, 170, (outs g8rc:$RT),
+ (ins g8rc:$RA, g8rc:$RB, u2imm:$CY),
+ "addex $RT, $RA, $RB, $CY", IIC_IntGeneral,
+ [(set i64:$RT, (int_ppc_addex i64:$RA, i64:$RB,
timm:$CY))]>;
//===----------------------------------------------------------------------===//
@@ -1901,9 +1952,20 @@ def : Pat<(add i64:$in, (PPChi tblockaddress:$g, 0)),
(ADDIS8 $in, tblockaddress:$g)>;
// AIX 64-bit small code model TLS access.
+// This is used for global dynamic accesses when loading the region handle and
+// variable offset, and also for local-exec accesses to load the offset of a
+// TLS variable from the TOC, prior to adding it to r13.
def : Pat<(i64 (PPCtoc_entry tglobaltlsaddr:$disp, i64:$reg)),
(i64 (LDtoc tglobaltlsaddr:$disp, i64:$reg))>;
+// The following pattern matches 64-bit local-exec TLS accesses on AIX.
+// PPCaddTls is used in local-exec accesses in order to:
+// - Get the address of a variable (adding the variable offset to the thread
+// pointer in r13).
+// - Create an opportunity to optimize the user of the loaded address.
+def : Pat<(PPCaddTls i64:$in, i64:$addr),
+ (ADD8TLS $in, $addr)>;
+
// 64-bits atomic loads and stores
def : Pat<(atomic_load_64 DSForm:$src), (LD memrix:$src)>;
def : Pat<(atomic_load_64 XForm:$src), (LDX memrr:$src)>;
@@ -1947,15 +2009,15 @@ def : Pat<(int_ppc_darnraw), (DARN 2)>;
class X_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, RegisterOperand ty,
InstrItinClass itin, list<dag> pattern>
- : X_L1_RS5_RS5<opcode, xo, (outs), (ins ty:$rA, ty:$rB, u1imm:$L),
- !strconcat(opc, " $rA, $rB"), itin, pattern>{
+ : X_L1_RS5_RS5<opcode, xo, (outs), (ins ty:$RA, ty:$RB, u1imm:$L),
+ !strconcat(opc, " $RA, $RB"), itin, pattern>{
let L = 1;
}
class X_L1_RA5_RB5<bits<6> opcode, bits<10> xo, string opc, RegisterOperand ty,
InstrItinClass itin, list<dag> pattern>
- : X_L1_RS5_RS5<opcode, xo, (outs), (ins ty:$rA, ty:$rB, u1imm:$L),
- !strconcat(opc, " $rA, $rB, $L"), itin, pattern>;
+ : X_L1_RS5_RS5<opcode, xo, (outs), (ins ty:$RA, ty:$RB, u1imm:$L),
+ !strconcat(opc, " $RA, $RB, $L"), itin, pattern>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def CP_COPY8 : X_RA5_RB5<31, 774, "copy" , g8rc, IIC_LdStCOPY, []>;
@@ -1963,8 +2025,8 @@ def CP_PASTE8_rec : X_L1_RA5_RB5<31, 902, "paste.", g8rc, IIC_LdStPASTE, []>,isR
}
// SLB Invalidate Entry Global
-def SLBIEG : XForm_26<31, 466, (outs), (ins gprc:$RS, gprc:$RB),
- "slbieg $RS, $RB", IIC_SprSLBIEG, []>;
+def SLBIEG : XForm_26<31, 466, (outs), (ins gprc:$RST, gprc:$RB),
+ "slbieg $RST, $RB", IIC_SprSLBIEG, []>;
// SLB Synchronize
def SLBSYNC : XForm_0<31, 338, (outs), (ins), "slbsync", IIC_SprSLBSYNC, []>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index 9236b8fea773..386c94a32499 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -266,72 +266,72 @@ def immEQOneV : PatLeaf<(build_vector), [{
// VA1a_Int_Ty - A VAForm_1a intrinsic definition of specific type.
class VA1a_Int_Ty<bits<6> xo, string opc, Intrinsic IntID, ValueType Ty>
- : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
- !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
- [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB, Ty:$vC))]>;
+ : VAForm_1a<xo, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RB, vrrc:$RC),
+ !strconcat(opc, " $RT, $RA, $RB, $RC"), IIC_VecFP,
+ [(set Ty:$RT, (IntID Ty:$RA, Ty:$RB, Ty:$RC))]>;
// VA1a_Int_Ty2 - A VAForm_1a intrinsic definition where the type of the
// inputs doesn't match the type of the output.
class VA1a_Int_Ty2<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType InTy>
- : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
- !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
- [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB, InTy:$vC))]>;
+ : VAForm_1a<xo, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RB, vrrc:$RC),
+ !strconcat(opc, " $RT, $RA, $RB, $RC"), IIC_VecFP,
+ [(set OutTy:$RT, (IntID InTy:$RA, InTy:$RB, InTy:$RC))]>;
// VA1a_Int_Ty3 - A VAForm_1a intrinsic definition where there are two
// input types and an output type.
class VA1a_Int_Ty3<bits<6> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType In1Ty, ValueType In2Ty>
- : VAForm_1a<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
- !strconcat(opc, " $vD, $vA, $vB, $vC"), IIC_VecFP,
- [(set OutTy:$vD,
- (IntID In1Ty:$vA, In1Ty:$vB, In2Ty:$vC))]>;
+ : VAForm_1a<xo, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RB, vrrc:$RC),
+ !strconcat(opc, " $RT, $RA, $RB, $RC"), IIC_VecFP,
+ [(set OutTy:$RT,
+ (IntID In1Ty:$RA, In1Ty:$RB, In2Ty:$RC))]>;
// VX1_Int_Ty - A VXForm_1 intrinsic definition of specific type.
class VX1_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
- : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
- [(set Ty:$vD, (IntID Ty:$vA, Ty:$vB))]>;
+ : VXForm_1<xo, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ !strconcat(opc, " $VD, $VA, $VB"), IIC_VecFP,
+ [(set Ty:$VD, (IntID Ty:$VA, Ty:$VB))]>;
// VX1_Int_Ty2 - A VXForm_1 intrinsic definition where the type of the
// inputs doesn't match the type of the output.
class VX1_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType InTy>
- : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
- [(set OutTy:$vD, (IntID InTy:$vA, InTy:$vB))]>;
+ : VXForm_1<xo, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ !strconcat(opc, " $VD, $VA, $VB"), IIC_VecFP,
+ [(set OutTy:$VD, (IntID InTy:$VA, InTy:$VB))]>;
// VX1_Int_Ty3 - A VXForm_1 intrinsic definition where there are two
// input types and an output type.
class VX1_Int_Ty3<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType In1Ty, ValueType In2Ty>
- : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP,
- [(set OutTy:$vD, (IntID In1Ty:$vA, In2Ty:$vB))]>;
+ : VXForm_1<xo, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ !strconcat(opc, " $VD, $VA, $VB"), IIC_VecFP,
+ [(set OutTy:$VD, (IntID In1Ty:$VA, In2Ty:$VB))]>;
// VX2_Int_SP - A VXForm_2 intrinsic definition of vector single-precision type.
class VX2_Int_SP<bits<11> xo, string opc, Intrinsic IntID>
- : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
- !strconcat(opc, " $vD, $vB"), IIC_VecFP,
- [(set v4f32:$vD, (IntID v4f32:$vB))]>;
+ : VXForm_2<xo, (outs vrrc:$VD), (ins vrrc:$VB),
+ !strconcat(opc, " $VD, $VB"), IIC_VecFP,
+ [(set v4f32:$VD, (IntID v4f32:$VB))]>;
// VX2_Int_Ty2 - A VXForm_2 intrinsic definition where the type of the
// inputs doesn't match the type of the output.
class VX2_Int_Ty2<bits<11> xo, string opc, Intrinsic IntID, ValueType OutTy,
ValueType InTy>
- : VXForm_2<xo, (outs vrrc:$vD), (ins vrrc:$vB),
- !strconcat(opc, " $vD, $vB"), IIC_VecFP,
- [(set OutTy:$vD, (IntID InTy:$vB))]>;
+ : VXForm_2<xo, (outs vrrc:$VD), (ins vrrc:$VB),
+ !strconcat(opc, " $VD, $VB"), IIC_VecFP,
+ [(set OutTy:$VD, (IntID InTy:$VB))]>;
class VXBX_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
- : VXForm_BX<xo, (outs vrrc:$vD), (ins vrrc:$vA),
- !strconcat(opc, " $vD, $vA"), IIC_VecFP,
- [(set Ty:$vD, (IntID Ty:$vA))]>;
+ : VXForm_BX<xo, (outs vrrc:$VD), (ins vrrc:$VA),
+ !strconcat(opc, " $VD, $VA"), IIC_VecFP,
+ [(set Ty:$VD, (IntID Ty:$VA))]>;
class VXCR_Int_Ty<bits<11> xo, string opc, Intrinsic IntID, ValueType Ty>
- : VXForm_CR<xo, (outs vrrc:$vD), (ins vrrc:$vA, u1imm:$ST, u4imm:$SIX),
- !strconcat(opc, " $vD, $vA, $ST, $SIX"), IIC_VecFP,
- [(set Ty:$vD, (IntID Ty:$vA, timm:$ST, timm:$SIX))]>;
+ : VXForm_CR<xo, (outs vrrc:$VD), (ins vrrc:$VA, u1imm:$ST, u4imm:$SIX),
+ !strconcat(opc, " $VD, $VA, $ST, $SIX"), IIC_VecFP,
+ [(set Ty:$VD, (IntID Ty:$VA, timm:$ST, timm:$SIX))]>;
//===----------------------------------------------------------------------===//
// Instruction Definitions.
@@ -342,130 +342,130 @@ let Predicates = [HasAltivec] in {
def DSS : DSS_Form<0, 822, (outs), (ins u5imm:$STRM),
"dss $STRM", IIC_LdStLoad /*FIXME*/, [(int_ppc_altivec_dss imm:$STRM)]>,
Deprecated<DeprecatedDST> {
- let A = 0;
- let B = 0;
+ let RA = 0;
+ let RB = 0;
}
def DSSALL : DSS_Form<1, 822, (outs), (ins),
"dssall", IIC_LdStLoad /*FIXME*/, []>,
Deprecated<DeprecatedDST> {
let STRM = 0;
- let A = 0;
- let B = 0;
+ let RA = 0;
+ let RB = 0;
}
-def DST : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
- [(int_ppc_altivec_dst i32:$rA, i32:$rB, imm:$STRM)]>,
+def DST : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, gprc:$RA, gprc:$RB),
+ "dst $RA, $RB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dst i32:$RA, i32:$RB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
-def DSTT : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
- [(int_ppc_altivec_dstt i32:$rA, i32:$rB, imm:$STRM)]>,
+def DSTT : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, gprc:$RA, gprc:$RB),
+ "dstt $RA, $RB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstt i32:$RA, i32:$RB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
-def DSTST : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
- [(int_ppc_altivec_dstst i32:$rA, i32:$rB, imm:$STRM)]>,
+def DSTST : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, gprc:$RA, gprc:$RB),
+ "dstst $RA, $RB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstst i32:$RA, i32:$RB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
-def DSTSTT : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
- [(int_ppc_altivec_dststt i32:$rA, i32:$rB, imm:$STRM)]>,
+def DSTSTT : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, gprc:$RA, gprc:$RB),
+ "dststt $RA, $RB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dststt i32:$RA, i32:$RB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
let isCodeGenOnly = 1 in {
// The very same instructions as above, but formally matching 64bit registers.
- def DST64 : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
- [(int_ppc_altivec_dst i64:$rA, i32:$rB, imm:$STRM)]>,
+ def DST64 : DSS_Form<0, 342, (outs), (ins u5imm:$STRM, g8rc:$RA, gprc:$RB),
+ "dst $RA, $RB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dst i64:$RA, i32:$RB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
- def DSTT64 : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
- [(int_ppc_altivec_dstt i64:$rA, i32:$rB, imm:$STRM)]>,
+ def DSTT64 : DSS_Form<1, 342, (outs), (ins u5imm:$STRM, g8rc:$RA, gprc:$RB),
+ "dstt $RA, $RB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstt i64:$RA, i32:$RB, imm:$STRM)]>,
Deprecated<DeprecatedDST>;
- def DSTST64 : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
- [(int_ppc_altivec_dstst i64:$rA, i32:$rB,
+ def DSTST64 : DSS_Form<0, 374, (outs), (ins u5imm:$STRM, g8rc:$RA, gprc:$RB),
+ "dstst $RA, $RB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dstst i64:$RA, i32:$RB,
imm:$STRM)]>,
Deprecated<DeprecatedDST>;
- def DSTSTT64 : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", IIC_LdStLoad /*FIXME*/,
- [(int_ppc_altivec_dststt i64:$rA, i32:$rB,
+ def DSTSTT64 : DSS_Form<1, 374, (outs), (ins u5imm:$STRM, g8rc:$RA, gprc:$RB),
+ "dststt $RA, $RB, $STRM", IIC_LdStLoad /*FIXME*/,
+ [(int_ppc_altivec_dststt i64:$RA, i32:$RB,
imm:$STRM)]>,
Deprecated<DeprecatedDST>;
}
let hasSideEffects = 1 in {
- def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
- "mfvscr $vD", IIC_LdStStore,
- [(set v8i16:$vD, (int_ppc_altivec_mfvscr))]>;
- def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$vB),
- "mtvscr $vB", IIC_LdStLoad,
- [(int_ppc_altivec_mtvscr v4i32:$vB)]>;
+ def MFVSCR : VXForm_4<1540, (outs vrrc:$VD), (ins),
+ "mfvscr $VD", IIC_LdStStore,
+ [(set v8i16:$VD, (int_ppc_altivec_mfvscr))]>;
+ def MTVSCR : VXForm_5<1604, (outs), (ins vrrc:$VB),
+ "mtvscr $VB", IIC_LdStLoad,
+ [(int_ppc_altivec_mtvscr v4i32:$VB)]>;
}
let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in { // Loads.
-def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$vD), (ins memrr:$src),
- "lvebx $vD, $src", IIC_LdStLoad,
- [(set v16i8:$vD, (int_ppc_altivec_lvebx ForceXForm:$src))]>;
-def LVEHX: XForm_1_memOp<31, 39, (outs vrrc:$vD), (ins memrr:$src),
- "lvehx $vD, $src", IIC_LdStLoad,
- [(set v8i16:$vD, (int_ppc_altivec_lvehx ForceXForm:$src))]>;
-def LVEWX: XForm_1_memOp<31, 71, (outs vrrc:$vD), (ins memrr:$src),
- "lvewx $vD, $src", IIC_LdStLoad,
- [(set v4i32:$vD, (int_ppc_altivec_lvewx ForceXForm:$src))]>;
-def LVX : XForm_1_memOp<31, 103, (outs vrrc:$vD), (ins memrr:$src),
- "lvx $vD, $src", IIC_LdStLoad,
- [(set v4i32:$vD, (int_ppc_altivec_lvx ForceXForm:$src))]>;
-def LVXL : XForm_1_memOp<31, 359, (outs vrrc:$vD), (ins memrr:$src),
- "lvxl $vD, $src", IIC_LdStLoad,
- [(set v4i32:$vD, (int_ppc_altivec_lvxl ForceXForm:$src))]>;
+def LVEBX: XForm_1_memOp<31, 7, (outs vrrc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lvebx $RST, $addr", IIC_LdStLoad,
+ [(set v16i8:$RST, (int_ppc_altivec_lvebx ForceXForm:$addr))]>;
+def LVEHX: XForm_1_memOp<31, 39, (outs vrrc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lvehx $RST, $addr", IIC_LdStLoad,
+ [(set v8i16:$RST, (int_ppc_altivec_lvehx ForceXForm:$addr))]>;
+def LVEWX: XForm_1_memOp<31, 71, (outs vrrc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lvewx $RST, $addr", IIC_LdStLoad,
+ [(set v4i32:$RST, (int_ppc_altivec_lvewx ForceXForm:$addr))]>;
+def LVX : XForm_1_memOp<31, 103, (outs vrrc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lvx $RST, $addr", IIC_LdStLoad,
+ [(set v4i32:$RST, (int_ppc_altivec_lvx ForceXForm:$addr))]>;
+def LVXL : XForm_1_memOp<31, 359, (outs vrrc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lvxl $RST, $addr", IIC_LdStLoad,
+ [(set v4i32:$RST, (int_ppc_altivec_lvxl ForceXForm:$addr))]>;
}
-def LVSL : XForm_1_memOp<31, 6, (outs vrrc:$vD), (ins memrr:$src),
- "lvsl $vD, $src", IIC_LdStLoad,
- [(set v16i8:$vD, (int_ppc_altivec_lvsl ForceXForm:$src))]>,
+def LVSL : XForm_1_memOp<31, 6, (outs vrrc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lvsl $RST, $addr", IIC_LdStLoad,
+ [(set v16i8:$RST, (int_ppc_altivec_lvsl ForceXForm:$addr))]>,
PPC970_Unit_LSU;
-def LVSR : XForm_1_memOp<31, 38, (outs vrrc:$vD), (ins memrr:$src),
- "lvsr $vD, $src", IIC_LdStLoad,
- [(set v16i8:$vD, (int_ppc_altivec_lvsr ForceXForm:$src))]>,
+def LVSR : XForm_1_memOp<31, 38, (outs vrrc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lvsr $RST, $addr", IIC_LdStLoad,
+ [(set v16i8:$RST, (int_ppc_altivec_lvsr ForceXForm:$addr))]>,
PPC970_Unit_LSU;
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in { // Stores.
-def STVEBX: XForm_8_memOp<31, 135, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvebx $rS, $dst", IIC_LdStStore,
- [(int_ppc_altivec_stvebx v16i8:$rS, ForceXForm:$dst)]>;
-def STVEHX: XForm_8_memOp<31, 167, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvehx $rS, $dst", IIC_LdStStore,
- [(int_ppc_altivec_stvehx v8i16:$rS, ForceXForm:$dst)]>;
-def STVEWX: XForm_8_memOp<31, 199, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvewx $rS, $dst", IIC_LdStStore,
- [(int_ppc_altivec_stvewx v4i32:$rS, ForceXForm:$dst)]>;
-def STVX : XForm_8_memOp<31, 231, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvx $rS, $dst", IIC_LdStStore,
- [(int_ppc_altivec_stvx v4i32:$rS, ForceXForm:$dst)]>;
-def STVXL : XForm_8_memOp<31, 487, (outs), (ins vrrc:$rS, memrr:$dst),
- "stvxl $rS, $dst", IIC_LdStStore,
- [(int_ppc_altivec_stvxl v4i32:$rS, ForceXForm:$dst)]>;
+def STVEBX: XForm_8_memOp<31, 135, (outs), (ins vrrc:$RST, (memrr $RA, $RB):$addr),
+ "stvebx $RST, $addr", IIC_LdStStore,
+ [(int_ppc_altivec_stvebx v16i8:$RST, ForceXForm:$addr)]>;
+def STVEHX: XForm_8_memOp<31, 167, (outs), (ins vrrc:$RST, (memrr $RA, $RB):$addr),
+ "stvehx $RST, $addr", IIC_LdStStore,
+ [(int_ppc_altivec_stvehx v8i16:$RST, ForceXForm:$addr)]>;
+def STVEWX: XForm_8_memOp<31, 199, (outs), (ins vrrc:$RST, (memrr $RA, $RB):$addr),
+ "stvewx $RST, $addr", IIC_LdStStore,
+ [(int_ppc_altivec_stvewx v4i32:$RST, ForceXForm:$addr)]>;
+def STVX : XForm_8_memOp<31, 231, (outs), (ins vrrc:$RST, (memrr $RA, $RB):$addr),
+ "stvx $RST, $addr", IIC_LdStStore,
+ [(int_ppc_altivec_stvx v4i32:$RST, ForceXForm:$addr)]>;
+def STVXL : XForm_8_memOp<31, 487, (outs), (ins vrrc:$RST, (memrr $RA, $RB):$addr),
+ "stvxl $RST, $addr", IIC_LdStStore,
+ [(int_ppc_altivec_stvxl v4i32:$RST, ForceXForm:$addr)]>;
}
let PPC970_Unit = 5 in { // VALU Operations.
// VA-Form instructions. 3-input AltiVec ops.
let isCommutable = 1 in {
-def VMADDFP : VAForm_1<46, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
- "vmaddfp $vD, $vA, $vC, $vB", IIC_VecFP,
- [(set v4f32:$vD,
- (fma v4f32:$vA, v4f32:$vC, v4f32:$vB))]>;
+def VMADDFP : VAForm_1<46, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB),
+ "vmaddfp $RT, $RA, $RC, $RB", IIC_VecFP,
+ [(set v4f32:$RT,
+ (fma v4f32:$RA, v4f32:$RC, v4f32:$RB))]>;
// FIXME: The fma+fneg pattern won't match because fneg is not legal.
-def VNMSUBFP: VAForm_1<47, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vC, vrrc:$vB),
- "vnmsubfp $vD, $vA, $vC, $vB", IIC_VecFP,
- [(set v4f32:$vD, (fneg (fma v4f32:$vA, v4f32:$vC,
- (fneg v4f32:$vB))))]>;
+def VNMSUBFP: VAForm_1<47, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RC, vrrc:$RB),
+ "vnmsubfp $RT, $RA, $RC, $RB", IIC_VecFP,
+ [(set v4f32:$RT, (fneg (fma v4f32:$RA, v4f32:$RC,
+ (fneg v4f32:$RB))))]>;
let hasSideEffects = 1 in {
def VMHADDSHS : VA1a_Int_Ty<32, "vmhaddshs", int_ppc_altivec_vmhaddshs, v8i16>;
def VMHRADDSHS : VA1a_Int_Ty<33, "vmhraddshs", int_ppc_altivec_vmhraddshs,
@@ -479,26 +479,26 @@ def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>;
// Shuffles.
-def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u4imm:$SH),
- "vsldoi $vD, $vA, $vB, $SH", IIC_VecFP,
- [(set v16i8:$vD,
- (PPCvecshl v16i8:$vA, v16i8:$vB, imm32SExt16:$SH))]>;
+def VSLDOI : VAForm_2<44, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RB, u4imm:$SH),
+ "vsldoi $RT, $RA, $RB, $SH", IIC_VecFP,
+ [(set v16i8:$RT,
+ (PPCvecshl v16i8:$RA, v16i8:$RB, imm32SExt16:$SH))]>;
// VX-Form instructions. AltiVec arithmetic ops.
let isCommutable = 1 in {
-def VADDFP : VXForm_1<10, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vaddfp $vD, $vA, $vB", IIC_VecFP,
- [(set v4f32:$vD, (fadd v4f32:$vA, v4f32:$vB))]>;
-
-def VADDUBM : VXForm_1<0, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vaddubm $vD, $vA, $vB", IIC_VecGeneral,
- [(set v16i8:$vD, (add v16i8:$vA, v16i8:$vB))]>;
-def VADDUHM : VXForm_1<64, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vadduhm $vD, $vA, $vB", IIC_VecGeneral,
- [(set v8i16:$vD, (add v8i16:$vA, v8i16:$vB))]>;
-def VADDUWM : VXForm_1<128, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vadduwm $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (add v4i32:$vA, v4i32:$vB))]>;
+def VADDFP : VXForm_1<10, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vaddfp $VD, $VA, $VB", IIC_VecFP,
+ [(set v4f32:$VD, (fadd v4f32:$VA, v4f32:$VB))]>;
+
+def VADDUBM : VXForm_1<0, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vaddubm $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v16i8:$VD, (add v16i8:$VA, v16i8:$VB))]>;
+def VADDUHM : VXForm_1<64, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vadduhm $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v8i16:$VD, (add v8i16:$VA, v8i16:$VB))]>;
+def VADDUWM : VXForm_1<128, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vadduwm $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (add v4i32:$VA, v4i32:$VB))]>;
def VADDCUW : VX1_Int_Ty<384, "vaddcuw", int_ppc_altivec_vaddcuw, v4i32>;
def VADDSBS : VX1_Int_Ty<768, "vaddsbs", int_ppc_altivec_vaddsbs, v16i8>;
@@ -510,51 +510,51 @@ def VADDUWS : VX1_Int_Ty<640, "vadduws", int_ppc_altivec_vadduws, v4i32>;
} // isCommutable
let isCommutable = 1 in
-def VAND : VXForm_1<1028, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vand $vD, $vA, $vB", IIC_VecFP,
- [(set v4i32:$vD, (and v4i32:$vA, v4i32:$vB))]>;
-def VANDC : VXForm_1<1092, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vandc $vD, $vA, $vB", IIC_VecFP,
- [(set v4i32:$vD, (and v4i32:$vA,
- (vnot v4i32:$vB)))]>;
-
-def VCFSX : VXForm_1<842, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vcfsx $vD, $vB, $UIMM", IIC_VecFP,
- [(set v4f32:$vD,
- (int_ppc_altivec_vcfsx v4i32:$vB, timm:$UIMM))]>;
-def VCFUX : VXForm_1<778, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vcfux $vD, $vB, $UIMM", IIC_VecFP,
- [(set v4f32:$vD,
- (int_ppc_altivec_vcfux v4i32:$vB, timm:$UIMM))]>;
-def VCTSXS : VXForm_1<970, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vctsxs $vD, $vB, $UIMM", IIC_VecFP,
- [(set v4i32:$vD,
- (int_ppc_altivec_vctsxs v4f32:$vB, timm:$UIMM))]>;
-def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vctuxs $vD, $vB, $UIMM", IIC_VecFP,
- [(set v4i32:$vD,
- (int_ppc_altivec_vctuxs v4f32:$vB, timm:$UIMM))]>;
+def VAND : VXForm_1<1028, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vand $VD, $VA, $VB", IIC_VecFP,
+ [(set v4i32:$VD, (and v4i32:$VA, v4i32:$VB))]>;
+def VANDC : VXForm_1<1092, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vandc $VD, $VA, $VB", IIC_VecFP,
+ [(set v4i32:$VD, (and v4i32:$VA,
+ (vnot v4i32:$VB)))]>;
+
+def VCFSX : VXForm_1<842, (outs vrrc:$VD), (ins u5imm:$VA, vrrc:$VB),
+ "vcfsx $VD, $VB, $VA", IIC_VecFP,
+ [(set v4f32:$VD,
+ (int_ppc_altivec_vcfsx v4i32:$VB, timm:$VA))]>;
+def VCFUX : VXForm_1<778, (outs vrrc:$VD), (ins u5imm:$VA, vrrc:$VB),
+ "vcfux $VD, $VB, $VA", IIC_VecFP,
+ [(set v4f32:$VD,
+ (int_ppc_altivec_vcfux v4i32:$VB, timm:$VA))]>;
+def VCTSXS : VXForm_1<970, (outs vrrc:$VD), (ins u5imm:$VA, vrrc:$VB),
+ "vctsxs $VD, $VB, $VA", IIC_VecFP,
+ [(set v4i32:$VD,
+ (int_ppc_altivec_vctsxs v4f32:$VB, timm:$VA))]>;
+def VCTUXS : VXForm_1<906, (outs vrrc:$VD), (ins u5imm:$VA, vrrc:$VB),
+ "vctuxs $VD, $VB, $VA", IIC_VecFP,
+ [(set v4i32:$VD,
+ (int_ppc_altivec_vctuxs v4f32:$VB, timm:$VA))]>;
// Defines with the UIM field set to 0 for floating-point
// to integer (fp_to_sint/fp_to_uint) conversions and integer
// to floating-point (sint_to_fp/uint_to_fp) conversions.
let isCodeGenOnly = 1, VA = 0 in {
-def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB),
- "vcfsx $vD, $vB, 0", IIC_VecFP,
- [(set v4f32:$vD,
- (int_ppc_altivec_vcfsx v4i32:$vB, 0))]>;
-def VCTUXS_0 : VXForm_1<906, (outs vrrc:$vD), (ins vrrc:$vB),
- "vctuxs $vD, $vB, 0", IIC_VecFP,
- [(set v4i32:$vD,
- (int_ppc_altivec_vctuxs v4f32:$vB, 0))]>;
-def VCFUX_0 : VXForm_1<778, (outs vrrc:$vD), (ins vrrc:$vB),
- "vcfux $vD, $vB, 0", IIC_VecFP,
- [(set v4f32:$vD,
- (int_ppc_altivec_vcfux v4i32:$vB, 0))]>;
-def VCTSXS_0 : VXForm_1<970, (outs vrrc:$vD), (ins vrrc:$vB),
- "vctsxs $vD, $vB, 0", IIC_VecFP,
- [(set v4i32:$vD,
- (int_ppc_altivec_vctsxs v4f32:$vB, 0))]>;
+def VCFSX_0 : VXForm_1<842, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vcfsx $VD, $VB, 0", IIC_VecFP,
+ [(set v4f32:$VD,
+ (int_ppc_altivec_vcfsx v4i32:$VB, 0))]>;
+def VCTUXS_0 : VXForm_1<906, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vctuxs $VD, $VB, 0", IIC_VecFP,
+ [(set v4i32:$VD,
+ (int_ppc_altivec_vctuxs v4f32:$VB, 0))]>;
+def VCFUX_0 : VXForm_1<778, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vcfux $VD, $VB, 0", IIC_VecFP,
+ [(set v4f32:$VD,
+ (int_ppc_altivec_vcfux v4i32:$VB, 0))]>;
+def VCTSXS_0 : VXForm_1<970, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vctsxs $VD, $VB, 0", IIC_VecFP,
+ [(set v4i32:$VD,
+ (int_ppc_altivec_vctsxs v4f32:$VB, 0))]>;
}
def VEXPTEFP : VX2_Int_SP<394, "vexptefp", int_ppc_altivec_vexptefp>;
def VLOGEFP : VX2_Int_SP<458, "vlogefp", int_ppc_altivec_vlogefp>;
@@ -583,24 +583,24 @@ def VMINUH : VX1_Int_Ty< 578, "vminuh", int_ppc_altivec_vminuh, v8i16>;
def VMINUW : VX1_Int_Ty< 642, "vminuw", int_ppc_altivec_vminuw, v4i32>;
} // isCommutable
-def VMRGHB : VXForm_1< 12, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrghb $vD, $vA, $vB", IIC_VecFP,
- [(set v16i8:$vD, (vmrghb_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGHH : VXForm_1< 76, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrghh $vD, $vA, $vB", IIC_VecFP,
- [(set v16i8:$vD, (vmrghh_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGHW : VXForm_1<140, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrghw $vD, $vA, $vB", IIC_VecFP,
- [(set v16i8:$vD, (vmrghw_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGLB : VXForm_1<268, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrglb $vD, $vA, $vB", IIC_VecFP,
- [(set v16i8:$vD, (vmrglb_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGLH : VXForm_1<332, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrglh $vD, $vA, $vB", IIC_VecFP,
- [(set v16i8:$vD, (vmrglh_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VMRGLW : VXForm_1<396, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrglw $vD, $vA, $vB", IIC_VecFP,
- [(set v16i8:$vD, (vmrglw_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VMRGHB : VXForm_1< 12, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmrghb $VD, $VA, $VB", IIC_VecFP,
+ [(set v16i8:$VD, (vmrghb_shuffle v16i8:$VA, v16i8:$VB))]>;
+def VMRGHH : VXForm_1< 76, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmrghh $VD, $VA, $VB", IIC_VecFP,
+ [(set v16i8:$VD, (vmrghh_shuffle v16i8:$VA, v16i8:$VB))]>;
+def VMRGHW : VXForm_1<140, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmrghw $VD, $VA, $VB", IIC_VecFP,
+ [(set v16i8:$VD, (vmrghw_shuffle v16i8:$VA, v16i8:$VB))]>;
+def VMRGLB : VXForm_1<268, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmrglb $VD, $VA, $VB", IIC_VecFP,
+ [(set v16i8:$VD, (vmrglb_shuffle v16i8:$VA, v16i8:$VB))]>;
+def VMRGLH : VXForm_1<332, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmrglh $VD, $VA, $VB", IIC_VecFP,
+ [(set v16i8:$VD, (vmrglh_shuffle v16i8:$VA, v16i8:$VB))]>;
+def VMRGLW : VXForm_1<396, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmrglw $VD, $VA, $VB", IIC_VecFP,
+ [(set v16i8:$VD, (vmrglw_shuffle v16i8:$VA, v16i8:$VB))]>;
def VMSUMMBM : VA1a_Int_Ty3<37, "vmsummbm", int_ppc_altivec_vmsummbm,
v4i32, v16i8, v4i32>;
@@ -645,18 +645,18 @@ def VRSQRTEFP : VX2_Int_SP<330, "vrsqrtefp", int_ppc_altivec_vrsqrtefp>;
def VSUBCUW : VX1_Int_Ty<1408, "vsubcuw", int_ppc_altivec_vsubcuw, v4i32>;
-def VSUBFP : VXForm_1<74, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsubfp $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4f32:$vD, (fsub v4f32:$vA, v4f32:$vB))]>;
-def VSUBUBM : VXForm_1<1024, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsububm $vD, $vA, $vB", IIC_VecGeneral,
- [(set v16i8:$vD, (sub v16i8:$vA, v16i8:$vB))]>;
-def VSUBUHM : VXForm_1<1088, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsubuhm $vD, $vA, $vB", IIC_VecGeneral,
- [(set v8i16:$vD, (sub v8i16:$vA, v8i16:$vB))]>;
-def VSUBUWM : VXForm_1<1152, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsubuwm $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (sub v4i32:$vA, v4i32:$vB))]>;
+def VSUBFP : VXForm_1<74, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vsubfp $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4f32:$VD, (fsub v4f32:$VA, v4f32:$VB))]>;
+def VSUBUBM : VXForm_1<1024, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vsububm $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v16i8:$VD, (sub v16i8:$VA, v16i8:$VB))]>;
+def VSUBUHM : VXForm_1<1088, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vsubuhm $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v8i16:$VD, (sub v8i16:$VA, v8i16:$VB))]>;
+def VSUBUWM : VXForm_1<1152, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vsubuwm $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (sub v4i32:$VA, v4i32:$VB))]>;
def VSUBSBS : VX1_Int_Ty<1792, "vsubsbs" , int_ppc_altivec_vsubsbs, v16i8>;
def VSUBSHS : VX1_Int_Ty<1856, "vsubshs" , int_ppc_altivec_vsubshs, v8i16>;
@@ -677,17 +677,17 @@ let hasSideEffects = 1 in {
v4i32, v16i8, v4i32>;
}
-def VNOR : VXForm_1<1284, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vnor $vD, $vA, $vB", IIC_VecFP,
- [(set v4i32:$vD, (vnot (or v4i32:$vA,
- v4i32:$vB)))]>;
+def VNOR : VXForm_1<1284, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vnor $VD, $VA, $VB", IIC_VecFP,
+ [(set v4i32:$VD, (vnot (or v4i32:$VA,
+ v4i32:$VB)))]>;
let isCommutable = 1 in {
-def VOR : VXForm_1<1156, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vor $vD, $vA, $vB", IIC_VecFP,
- [(set v4i32:$vD, (or v4i32:$vA, v4i32:$vB))]>;
-def VXOR : VXForm_1<1220, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vxor $vD, $vA, $vB", IIC_VecFP,
- [(set v4i32:$vD, (xor v4i32:$vA, v4i32:$vB))]>;
+def VOR : VXForm_1<1156, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vor $VD, $VA, $VB", IIC_VecFP,
+ [(set v4i32:$VD, (or v4i32:$VA, v4i32:$VB))]>;
+def VXOR : VXForm_1<1220, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vxor $VD, $VA, $VB", IIC_VecFP,
+ [(set v4i32:$VD, (xor v4i32:$VA, v4i32:$VB))]>;
} // isCommutable
def VRLB : VX1_Int_Ty< 4, "vrlb", int_ppc_altivec_vrlb, v16i8>;
@@ -701,23 +701,23 @@ def VSLB : VX1_Int_Ty< 260, "vslb", int_ppc_altivec_vslb, v16i8>;
def VSLH : VX1_Int_Ty< 324, "vslh", int_ppc_altivec_vslh, v8i16>;
def VSLW : VX1_Int_Ty< 388, "vslw", int_ppc_altivec_vslw, v4i32>;
-def VSPLTB : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vspltb $vD, $vB, $UIMM", IIC_VecPerm,
- [(set v16i8:$vD,
- (vspltb_shuffle:$UIMM v16i8:$vB, (undef)))]>;
-def VSPLTH : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vsplth $vD, $vB, $UIMM", IIC_VecPerm,
- [(set v16i8:$vD,
- (vsplth_shuffle:$UIMM v16i8:$vB, (undef)))]>;
-def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
- "vspltw $vD, $vB, $UIMM", IIC_VecPerm,
- [(set v16i8:$vD,
- (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
+def VSPLTB : VXForm_1<524, (outs vrrc:$VD), (ins u5imm:$VA, vrrc:$VB),
+ "vspltb $VD, $VB, $VA", IIC_VecPerm,
+ [(set v16i8:$VD,
+ (vspltb_shuffle:$VA v16i8:$VB, (undef)))]>;
+def VSPLTH : VXForm_1<588, (outs vrrc:$VD), (ins u5imm:$VA, vrrc:$VB),
+ "vsplth $VD, $VB, $VA", IIC_VecPerm,
+ [(set v16i8:$VD,
+ (vsplth_shuffle:$VA v16i8:$VB, (undef)))]>;
+def VSPLTW : VXForm_1<652, (outs vrrc:$VD), (ins u5imm:$VA, vrrc:$VB),
+ "vspltw $VD, $VB, $VA", IIC_VecPerm,
+ [(set v16i8:$VD,
+ (vspltw_shuffle:$VA v16i8:$VB, (undef)))]>;
let isCodeGenOnly = 1, hasSideEffects = 0 in {
- def VSPLTBs : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
- "vspltb $vD, $vB, $UIMM", IIC_VecPerm, []>;
- def VSPLTHs : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
- "vsplth $vD, $vB, $UIMM", IIC_VecPerm, []>;
+ def VSPLTBs : VXForm_1<524, (outs vrrc:$VD), (ins u5imm:$VA, vfrc:$VB),
+ "vspltb $VD, $VB, $VA", IIC_VecPerm, []>;
+ def VSPLTHs : VXForm_1<588, (outs vrrc:$VD), (ins u5imm:$VA, vfrc:$VB),
+ "vsplth $VD, $VB, $VA", IIC_VecPerm, []>;
}
def VSR : VX1_Int_Ty< 708, "vsr" , int_ppc_altivec_vsr, v4i32>;
@@ -731,15 +731,15 @@ def VSRH : VX1_Int_Ty< 580, "vsrh" , int_ppc_altivec_vsrh , v8i16>;
def VSRW : VX1_Int_Ty< 644, "vsrw" , int_ppc_altivec_vsrw , v4i32>;
-def VSPLTISB : VXForm_3<780, (outs vrrc:$vD), (ins s5imm:$SIMM),
- "vspltisb $vD, $SIMM", IIC_VecPerm,
- [(set v16i8:$vD, (v16i8 vecspltisb:$SIMM))]>;
-def VSPLTISH : VXForm_3<844, (outs vrrc:$vD), (ins s5imm:$SIMM),
- "vspltish $vD, $SIMM", IIC_VecPerm,
- [(set v8i16:$vD, (v8i16 vecspltish:$SIMM))]>;
-def VSPLTISW : VXForm_3<908, (outs vrrc:$vD), (ins s5imm:$SIMM),
- "vspltisw $vD, $SIMM", IIC_VecPerm,
- [(set v4i32:$vD, (v4i32 vecspltisw:$SIMM))]>;
+def VSPLTISB : VXForm_3<780, (outs vrrc:$VD), (ins s5imm:$IMM),
+ "vspltisb $VD, $IMM", IIC_VecPerm,
+ [(set v16i8:$VD, (v16i8 vecspltisb:$IMM))]>;
+def VSPLTISH : VXForm_3<844, (outs vrrc:$VD), (ins s5imm:$IMM),
+ "vspltish $VD, $IMM", IIC_VecPerm,
+ [(set v8i16:$VD, (v8i16 vecspltish:$IMM))]>;
+def VSPLTISW : VXForm_3<908, (outs vrrc:$VD), (ins s5imm:$IMM),
+ "vspltisw $VD, $IMM", IIC_VecPerm,
+ [(set v4i32:$VD, (v4i32 vecspltisw:$IMM))]>;
// Vector Pack.
def VPKPX : VX1_Int_Ty2<782, "vpkpx", int_ppc_altivec_vpkpx,
@@ -758,14 +758,14 @@ let hasSideEffects = 1 in {
def VPKUWUS : VX1_Int_Ty2<206, "vpkuwus", int_ppc_altivec_vpkuwus,
v8i16, v4i32>;
}
-def VPKUHUM : VXForm_1<14, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vpkuhum $vD, $vA, $vB", IIC_VecFP,
- [(set v16i8:$vD,
- (vpkuhum_shuffle v16i8:$vA, v16i8:$vB))]>;
-def VPKUWUM : VXForm_1<78, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vpkuwum $vD, $vA, $vB", IIC_VecFP,
- [(set v16i8:$vD,
- (vpkuwum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUHUM : VXForm_1<14, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vpkuhum $VD, $VA, $VB", IIC_VecFP,
+ [(set v16i8:$VD,
+ (vpkuhum_shuffle v16i8:$VA, v16i8:$VB))]>;
+def VPKUWUM : VXForm_1<78, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vpkuwum $VD, $VA, $VB", IIC_VecFP,
+ [(set v16i8:$VD,
+ (vpkuwum_shuffle v16i8:$VA, v16i8:$VB))]>;
// Vector Unpack.
def VUPKHPX : VX2_Int_Ty2<846, "vupkhpx", int_ppc_altivec_vupkhpx,
@@ -785,74 +785,74 @@ def VUPKLSH : VX2_Int_Ty2<718, "vupklsh", int_ppc_altivec_vupklsh,
// Altivec Comparisons.
class VCMP<bits<10> xo, string asmstr, ValueType Ty>
- : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
+ : VXRForm_1<xo, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB), asmstr,
IIC_VecFPCompare,
- [(set Ty:$vD, (Ty (PPCvcmp Ty:$vA, Ty:$vB, xo)))]>;
+ [(set Ty:$VD, (Ty (PPCvcmp Ty:$VA, Ty:$VB, xo)))]>;
class VCMP_rec<bits<10> xo, string asmstr, ValueType Ty>
- : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
+ : VXRForm_1<xo, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB), asmstr,
IIC_VecFPCompare,
- [(set Ty:$vD, (Ty (PPCvcmp_rec Ty:$vA, Ty:$vB, xo)))]> {
+ [(set Ty:$VD, (Ty (PPCvcmp_rec Ty:$VA, Ty:$VB, xo)))]> {
let Defs = [CR6];
let RC = 1;
}
// f32 element comparisons.0
-def VCMPBFP : VCMP <966, "vcmpbfp $vD, $vA, $vB" , v4f32>;
-def VCMPBFP_rec : VCMP_rec<966, "vcmpbfp. $vD, $vA, $vB" , v4f32>;
-def VCMPEQFP : VCMP <198, "vcmpeqfp $vD, $vA, $vB" , v4f32>;
-def VCMPEQFP_rec : VCMP_rec<198, "vcmpeqfp. $vD, $vA, $vB", v4f32>;
-def VCMPGEFP : VCMP <454, "vcmpgefp $vD, $vA, $vB" , v4f32>;
-def VCMPGEFP_rec : VCMP_rec<454, "vcmpgefp. $vD, $vA, $vB", v4f32>;
-def VCMPGTFP : VCMP <710, "vcmpgtfp $vD, $vA, $vB" , v4f32>;
-def VCMPGTFP_rec : VCMP_rec<710, "vcmpgtfp. $vD, $vA, $vB", v4f32>;
+def VCMPBFP : VCMP <966, "vcmpbfp $VD, $VA, $VB" , v4f32>;
+def VCMPBFP_rec : VCMP_rec<966, "vcmpbfp. $VD, $VA, $VB" , v4f32>;
+def VCMPEQFP : VCMP <198, "vcmpeqfp $VD, $VA, $VB" , v4f32>;
+def VCMPEQFP_rec : VCMP_rec<198, "vcmpeqfp. $VD, $VA, $VB", v4f32>;
+def VCMPGEFP : VCMP <454, "vcmpgefp $VD, $VA, $VB" , v4f32>;
+def VCMPGEFP_rec : VCMP_rec<454, "vcmpgefp. $VD, $VA, $VB", v4f32>;
+def VCMPGTFP : VCMP <710, "vcmpgtfp $VD, $VA, $VB" , v4f32>;
+def VCMPGTFP_rec : VCMP_rec<710, "vcmpgtfp. $VD, $VA, $VB", v4f32>;
// i8 element comparisons.
-def VCMPEQUB : VCMP < 6, "vcmpequb $vD, $vA, $vB" , v16i8>;
-def VCMPEQUB_rec : VCMP_rec< 6, "vcmpequb. $vD, $vA, $vB", v16i8>;
-def VCMPGTSB : VCMP <774, "vcmpgtsb $vD, $vA, $vB" , v16i8>;
-def VCMPGTSB_rec : VCMP_rec<774, "vcmpgtsb. $vD, $vA, $vB", v16i8>;
-def VCMPGTUB : VCMP <518, "vcmpgtub $vD, $vA, $vB" , v16i8>;
-def VCMPGTUB_rec : VCMP_rec<518, "vcmpgtub. $vD, $vA, $vB", v16i8>;
+def VCMPEQUB : VCMP < 6, "vcmpequb $VD, $VA, $VB" , v16i8>;
+def VCMPEQUB_rec : VCMP_rec< 6, "vcmpequb. $VD, $VA, $VB", v16i8>;
+def VCMPGTSB : VCMP <774, "vcmpgtsb $VD, $VA, $VB" , v16i8>;
+def VCMPGTSB_rec : VCMP_rec<774, "vcmpgtsb. $VD, $VA, $VB", v16i8>;
+def VCMPGTUB : VCMP <518, "vcmpgtub $VD, $VA, $VB" , v16i8>;
+def VCMPGTUB_rec : VCMP_rec<518, "vcmpgtub. $VD, $VA, $VB", v16i8>;
// i16 element comparisons.
-def VCMPEQUH : VCMP < 70, "vcmpequh $vD, $vA, $vB" , v8i16>;
-def VCMPEQUH_rec : VCMP_rec< 70, "vcmpequh. $vD, $vA, $vB", v8i16>;
-def VCMPGTSH : VCMP <838, "vcmpgtsh $vD, $vA, $vB" , v8i16>;
-def VCMPGTSH_rec : VCMP_rec<838, "vcmpgtsh. $vD, $vA, $vB", v8i16>;
-def VCMPGTUH : VCMP <582, "vcmpgtuh $vD, $vA, $vB" , v8i16>;
-def VCMPGTUH_rec : VCMP_rec<582, "vcmpgtuh. $vD, $vA, $vB", v8i16>;
+def VCMPEQUH : VCMP < 70, "vcmpequh $VD, $VA, $VB" , v8i16>;
+def VCMPEQUH_rec : VCMP_rec< 70, "vcmpequh. $VD, $VA, $VB", v8i16>;
+def VCMPGTSH : VCMP <838, "vcmpgtsh $VD, $VA, $VB" , v8i16>;
+def VCMPGTSH_rec : VCMP_rec<838, "vcmpgtsh. $VD, $VA, $VB", v8i16>;
+def VCMPGTUH : VCMP <582, "vcmpgtuh $VD, $VA, $VB" , v8i16>;
+def VCMPGTUH_rec : VCMP_rec<582, "vcmpgtuh. $VD, $VA, $VB", v8i16>;
// i32 element comparisons.
-def VCMPEQUW : VCMP <134, "vcmpequw $vD, $vA, $vB" , v4i32>;
-def VCMPEQUW_rec : VCMP_rec<134, "vcmpequw. $vD, $vA, $vB", v4i32>;
-def VCMPGTSW : VCMP <902, "vcmpgtsw $vD, $vA, $vB" , v4i32>;
-def VCMPGTSW_rec : VCMP_rec<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
-def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
-def VCMPGTUW_rec : VCMP_rec<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
+def VCMPEQUW : VCMP <134, "vcmpequw $VD, $VA, $VB" , v4i32>;
+def VCMPEQUW_rec : VCMP_rec<134, "vcmpequw. $VD, $VA, $VB", v4i32>;
+def VCMPGTSW : VCMP <902, "vcmpgtsw $VD, $VA, $VB" , v4i32>;
+def VCMPGTSW_rec : VCMP_rec<902, "vcmpgtsw. $VD, $VA, $VB", v4i32>;
+def VCMPGTUW : VCMP <646, "vcmpgtuw $VD, $VA, $VB" , v4i32>;
+def VCMPGTUW_rec : VCMP_rec<646, "vcmpgtuw. $VD, $VA, $VB", v4i32>;
let isCodeGenOnly = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
isReMaterializable = 1 in {
-def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
- "vxor $vD, $vD, $vD", IIC_VecFP,
- [(set v16i8:$vD, (v16i8 immAllZerosV))]>;
-def V_SET0H : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
- "vxor $vD, $vD, $vD", IIC_VecFP,
- [(set v8i16:$vD, (v8i16 immAllZerosV))]>;
-def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
- "vxor $vD, $vD, $vD", IIC_VecFP,
- [(set v4i32:$vD, (v4i32 immAllZerosV))]>;
+def V_SET0B : VXForm_setzero<1220, (outs vrrc:$VD), (ins),
+ "vxor $VD, $VD, $VD", IIC_VecFP,
+ [(set v16i8:$VD, (v16i8 immAllZerosV))]>;
+def V_SET0H : VXForm_setzero<1220, (outs vrrc:$VD), (ins),
+ "vxor $VD, $VD, $VD", IIC_VecFP,
+ [(set v8i16:$VD, (v8i16 immAllZerosV))]>;
+def V_SET0 : VXForm_setzero<1220, (outs vrrc:$VD), (ins),
+ "vxor $VD, $VD, $VD", IIC_VecFP,
+ [(set v4i32:$VD, (v4i32 immAllZerosV))]>;
let IMM=-1 in {
-def V_SETALLONESB : VXForm_3<908, (outs vrrc:$vD), (ins),
- "vspltisw $vD, -1", IIC_VecFP,
- [(set v16i8:$vD, (v16i8 immAllOnesV))]>;
-def V_SETALLONESH : VXForm_3<908, (outs vrrc:$vD), (ins),
- "vspltisw $vD, -1", IIC_VecFP,
- [(set v8i16:$vD, (v8i16 immAllOnesV))]>;
-def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
- "vspltisw $vD, -1", IIC_VecFP,
- [(set v4i32:$vD, (v4i32 immAllOnesV))]>;
+def V_SETALLONESB : VXForm_3<908, (outs vrrc:$VD), (ins),
+ "vspltisw $VD, -1", IIC_VecFP,
+ [(set v16i8:$VD, (v16i8 immAllOnesV))]>;
+def V_SETALLONESH : VXForm_3<908, (outs vrrc:$VD), (ins),
+ "vspltisw $VD, -1", IIC_VecFP,
+ [(set v8i16:$VD, (v8i16 immAllOnesV))]>;
+def V_SETALLONES : VXForm_3<908, (outs vrrc:$VD), (ins),
+ "vspltisw $VD, -1", IIC_VecFP,
+ [(set v4i32:$VD, (v4i32 immAllOnesV))]>;
}
}
} // VALU Operations.
@@ -1161,20 +1161,27 @@ def : Pat<(v8i16 (srl (sub v8i16:$vA, (v8i16 (bitconvert(vnot v4i32:$vB)))),
def : Pat<(v16i8 (srl (sub v16i8:$vA, (v16i8 (bitconvert(vnot v4i32:$vB)))),
(v16i8 (immEQOneV)))), (v16i8 (VAVGUB $vA, $vB))>;
+def : Pat<(v16i8 (shl v16i8:$vA, (v16i8 (immEQOneV)))),
+ (v16i8 (VADDUBM $vA, $vA))>;
+def : Pat<(v8i16 (shl v8i16:$vA, (v8i16 (immEQOneV)))),
+ (v8i16 (VADDUHM $vA, $vA))>;
+def : Pat<(v4i32 (shl v4i32:$vA, (v4i32 (immEQOneV)))),
+ (v4i32 (VADDUWM $vA, $vA))>;
+
} // end HasAltivec
// [PO VRT VRA VRB 1 PS XO], "_o" means CR6 is set.
class VX_VT5_VA5_VB5_PS1_XO9_o<bits<9> xo, string opc, list<dag> pattern>
: VX_RD5_RSp5_PS1_XO9<xo,
- (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u1imm:$PS),
- !strconcat(opc, " $vD, $vA, $vB, $PS"), IIC_VecFP, pattern> {
+ (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, u1imm:$PS),
+ !strconcat(opc, " $VD, $VA, $VB, $PS"), IIC_VecFP, pattern> {
let Defs = [CR6];
}
// [PO VRT VRA VRB 1 / XO]
class VX_VT5_VA5_VB5_XO9_o<bits<9> xo, string opc, list<dag> pattern>
- : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern> {
+ : VX_RD5_RSp5_PS1_XO9<xo, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ !strconcat(opc, " $VD, $VA, $VB"), IIC_VecFP, pattern> {
let Defs = [CR6];
let PS = 0;
}
@@ -1192,9 +1199,9 @@ def VMULOSW : VX1_Int_Ty2<392, "vmulosw", int_ppc_altivec_vmulosw,
v2i64, v4i32>;
def VMULOUW : VX1_Int_Ty2<136, "vmulouw", int_ppc_altivec_vmulouw,
v2i64, v4i32>;
-def VMULUWM : VXForm_1<137, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmuluwm $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (mul v4i32:$vA, v4i32:$vB))]>;
+def VMULUWM : VXForm_1<137, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmuluwm $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (mul v4i32:$VA, v4i32:$VB))]>;
def VMAXSD : VX1_Int_Ty<450, "vmaxsd", int_ppc_altivec_vmaxsd, v2i64>;
def VMAXUD : VX1_Int_Ty<194, "vmaxud", int_ppc_altivec_vmaxud, v2i64>;
def VMINSD : VX1_Int_Ty<962, "vminsd", int_ppc_altivec_vminsd, v2i64>;
@@ -1202,14 +1209,14 @@ def VMINUD : VX1_Int_Ty<706, "vminud", int_ppc_altivec_vminud, v2i64>;
} // isCommutable
// Vector merge
-def VMRGEW : VXForm_1<1932, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrgew $vD, $vA, $vB", IIC_VecFP,
- [(set v16i8:$vD,
- (v16i8 (vmrgew_shuffle v16i8:$vA, v16i8:$vB)))]>;
-def VMRGOW : VXForm_1<1676, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmrgow $vD, $vA, $vB", IIC_VecFP,
- [(set v16i8:$vD,
- (v16i8 (vmrgow_shuffle v16i8:$vA, v16i8:$vB)))]>;
+def VMRGEW : VXForm_1<1932, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmrgew $VD, $VA, $VB", IIC_VecFP,
+ [(set v16i8:$VD,
+ (v16i8 (vmrgew_shuffle v16i8:$VA, v16i8:$VB)))]>;
+def VMRGOW : VXForm_1<1676, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmrgow $VD, $VA, $VB", IIC_VecFP,
+ [(set v16i8:$VD,
+ (v16i8 (vmrgow_shuffle v16i8:$VA, v16i8:$VB)))]>;
// Match vmrgew(x,x) and vmrgow(x,x)
def:Pat<(vmrgew_unary_shuffle v16i8:$vA, undef),
@@ -1232,12 +1239,12 @@ def : Pat<(v2i64 (rotl v2i64:$vA, v2i64:$vB)),
(v2i64 (VRLD v2i64:$vA, v2i64:$vB))>;
// Vector shifts
-def VSLD : VXForm_1<1476, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsld $vD, $vA, $vB", IIC_VecGeneral, []>;
-def VSRD : VXForm_1<1732, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsrd $vD, $vA, $vB", IIC_VecGeneral, []>;
-def VSRAD : VXForm_1<964, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsrad $vD, $vA, $vB", IIC_VecGeneral, []>;
+def VSLD : VXForm_1<1476, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vsld $VD, $VA, $VB", IIC_VecGeneral, []>;
+def VSRD : VXForm_1<1732, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vsrd $VD, $VA, $VB", IIC_VecGeneral, []>;
+def VSRAD : VXForm_1<964, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vsrad $VD, $VA, $VB", IIC_VecGeneral, []>;
def : Pat<(v2i64 (shl v2i64:$vA, v2i64:$vB)),
(v2i64 (VSLD $vA, $vB))>;
@@ -1254,12 +1261,12 @@ def : Pat<(v2i64 (PPCsra v2i64:$vA, v2i64:$vB)),
// Vector Integer Arithmetic Instructions
let isCommutable = 1 in {
-def VADDUDM : VXForm_1<192, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vaddudm $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (add v2i64:$vA, v2i64:$vB))]>;
-def VADDUQM : VXForm_1<256, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vadduqm $vD, $vA, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (add v1i128:$vA, v1i128:$vB))]>;
+def VADDUDM : VXForm_1<192, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vaddudm $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (add v2i64:$VA, v2i64:$VB))]>;
+def VADDUQM : VXForm_1<256, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vadduqm $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (add v1i128:$VA, v1i128:$VB))]>;
} // isCommutable
// Vector Quadword Add
@@ -1268,45 +1275,45 @@ def VADDCUQ : VX1_Int_Ty<320, "vaddcuq", int_ppc_altivec_vaddcuq, v1i128>;
def VADDECUQ : VA1a_Int_Ty<61, "vaddecuq", int_ppc_altivec_vaddecuq, v1i128>;
// Vector Doubleword Subtract
-def VSUBUDM : VXForm_1<1216, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsubudm $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (sub v2i64:$vA, v2i64:$vB))]>;
+def VSUBUDM : VXForm_1<1216, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vsubudm $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (sub v2i64:$VA, v2i64:$VB))]>;
// Vector Quadword Subtract
-def VSUBUQM : VXForm_1<1280, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vsubuqm $vD, $vA, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (sub v1i128:$vA, v1i128:$vB))]>;
+def VSUBUQM : VXForm_1<1280, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vsubuqm $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (sub v1i128:$VA, v1i128:$VB))]>;
def VSUBEUQM : VA1a_Int_Ty<62, "vsubeuqm", int_ppc_altivec_vsubeuqm, v1i128>;
def VSUBCUQ : VX1_Int_Ty<1344, "vsubcuq", int_ppc_altivec_vsubcuq, v1i128>;
def VSUBECUQ : VA1a_Int_Ty<63, "vsubecuq", int_ppc_altivec_vsubecuq, v1i128>;
// Count Leading Zeros
-def VCLZB : VXForm_2<1794, (outs vrrc:$vD), (ins vrrc:$vB),
- "vclzb $vD, $vB", IIC_VecGeneral,
- [(set v16i8:$vD, (ctlz v16i8:$vB))]>;
-def VCLZH : VXForm_2<1858, (outs vrrc:$vD), (ins vrrc:$vB),
- "vclzh $vD, $vB", IIC_VecGeneral,
- [(set v8i16:$vD, (ctlz v8i16:$vB))]>;
-def VCLZW : VXForm_2<1922, (outs vrrc:$vD), (ins vrrc:$vB),
- "vclzw $vD, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (ctlz v4i32:$vB))]>;
-def VCLZD : VXForm_2<1986, (outs vrrc:$vD), (ins vrrc:$vB),
- "vclzd $vD, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (ctlz v2i64:$vB))]>;
+def VCLZB : VXForm_2<1794, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vclzb $VD, $VB", IIC_VecGeneral,
+ [(set v16i8:$VD, (ctlz v16i8:$VB))]>;
+def VCLZH : VXForm_2<1858, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vclzh $VD, $VB", IIC_VecGeneral,
+ [(set v8i16:$VD, (ctlz v8i16:$VB))]>;
+def VCLZW : VXForm_2<1922, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vclzw $VD, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (ctlz v4i32:$VB))]>;
+def VCLZD : VXForm_2<1986, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vclzd $VD, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (ctlz v2i64:$VB))]>;
// Population Count
-def VPOPCNTB : VXForm_2<1795, (outs vrrc:$vD), (ins vrrc:$vB),
- "vpopcntb $vD, $vB", IIC_VecGeneral,
- [(set v16i8:$vD, (ctpop v16i8:$vB))]>;
-def VPOPCNTH : VXForm_2<1859, (outs vrrc:$vD), (ins vrrc:$vB),
- "vpopcnth $vD, $vB", IIC_VecGeneral,
- [(set v8i16:$vD, (ctpop v8i16:$vB))]>;
-def VPOPCNTW : VXForm_2<1923, (outs vrrc:$vD), (ins vrrc:$vB),
- "vpopcntw $vD, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (ctpop v4i32:$vB))]>;
-def VPOPCNTD : VXForm_2<1987, (outs vrrc:$vD), (ins vrrc:$vB),
- "vpopcntd $vD, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (ctpop v2i64:$vB))]>;
+def VPOPCNTB : VXForm_2<1795, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vpopcntb $VD, $VB", IIC_VecGeneral,
+ [(set v16i8:$VD, (ctpop v16i8:$VB))]>;
+def VPOPCNTH : VXForm_2<1859, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vpopcnth $VD, $VB", IIC_VecGeneral,
+ [(set v8i16:$VD, (ctpop v8i16:$VB))]>;
+def VPOPCNTW : VXForm_2<1923, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vpopcntw $VD, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (ctpop v4i32:$VB))]>;
+def VPOPCNTD : VXForm_2<1987, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vpopcntd $VD, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (ctpop v2i64:$VB))]>;
let isCommutable = 1 in {
// FIXME: Use AddedComplexity > 400 to ensure these patterns match before the
@@ -1319,26 +1326,26 @@ let isCommutable = 1 in {
// 2. Employ a more disciplined use of AddedComplexity, which would provide
// more fine-grained control than option 1. This would be beneficial
// if we find situations where Altivec is really preferred over VSX.
-def VEQV : VXForm_1<1668, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "veqv $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (vnot (xor v4i32:$vA, v4i32:$vB)))]>;
-def VNAND : VXForm_1<1412, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vnand $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (vnot (and v4i32:$vA, v4i32:$vB)))]>;
+def VEQV : VXForm_1<1668, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "veqv $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (vnot (xor v4i32:$VA, v4i32:$VB)))]>;
+def VNAND : VXForm_1<1412, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vnand $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (vnot (and v4i32:$VA, v4i32:$VB)))]>;
} // isCommutable
-def VORC : VXForm_1<1348, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vorc $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (or v4i32:$vA,
- (vnot v4i32:$vB)))]>;
+def VORC : VXForm_1<1348, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vorc $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (or v4i32:$VA,
+ (vnot v4i32:$VB)))]>;
// i64 element comparisons.
-def VCMPEQUD : VCMP <199, "vcmpequd $vD, $vA, $vB" , v2i64>;
-def VCMPEQUD_rec : VCMP_rec<199, "vcmpequd. $vD, $vA, $vB", v2i64>;
-def VCMPGTSD : VCMP <967, "vcmpgtsd $vD, $vA, $vB" , v2i64>;
-def VCMPGTSD_rec : VCMP_rec<967, "vcmpgtsd. $vD, $vA, $vB", v2i64>;
-def VCMPGTUD : VCMP <711, "vcmpgtud $vD, $vA, $vB" , v2i64>;
-def VCMPGTUD_rec : VCMP_rec<711, "vcmpgtud. $vD, $vA, $vB", v2i64>;
+def VCMPEQUD : VCMP <199, "vcmpequd $VD, $VA, $VB" , v2i64>;
+def VCMPEQUD_rec : VCMP_rec<199, "vcmpequd. $VD, $VA, $VB", v2i64>;
+def VCMPGTSD : VCMP <967, "vcmpgtsd $VD, $VA, $VB" , v2i64>;
+def VCMPGTSD_rec : VCMP_rec<967, "vcmpgtsd. $VD, $VA, $VB", v2i64>;
+def VCMPGTUD : VCMP <711, "vcmpgtud $VD, $VA, $VB" , v2i64>;
+def VCMPGTUD_rec : VCMP_rec<711, "vcmpgtud. $VD, $VA, $VB", v2i64>;
// The cryptography instructions that do not require Category:Vector.Crypto
def VPMSUMB : VX1_Int_Ty<1032, "vpmsumb",
@@ -1349,8 +1356,8 @@ def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw",
int_ppc_altivec_crypto_vpmsumw, v4i32>;
def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd",
int_ppc_altivec_crypto_vpmsumd, v2i64>;
-def VPERMXOR : VAForm_1<45, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VC),
- "vpermxor $VD, $VA, $VB, $VC", IIC_VecFP, []>;
+def VPERMXOR : VAForm_1<45, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RB, vrrc:$RC),
+ "vpermxor $RT, $RA, $RB, $RC", IIC_VecFP, []>;
// Vector doubleword integer pack and unpack.
let hasSideEffects = 1 in {
@@ -1361,10 +1368,10 @@ let hasSideEffects = 1 in {
def VPKUDUS : VX1_Int_Ty2<1230, "vpkudus", int_ppc_altivec_vpkudus,
v4i32, v2i64>;
}
-def VPKUDUM : VXForm_1<1102, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vpkudum $vD, $vA, $vB", IIC_VecFP,
- [(set v16i8:$vD,
- (vpkudum_shuffle v16i8:$vA, v16i8:$vB))]>;
+def VPKUDUM : VXForm_1<1102, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vpkudum $VD, $VA, $VB", IIC_VecFP,
+ [(set v16i8:$VD,
+ (vpkudum_shuffle v16i8:$VA, v16i8:$VB))]>;
def VUPKHSW : VX2_Int_Ty2<1614, "vupkhsw", int_ppc_altivec_vupkhsw,
v2i64, v4i32>;
def VUPKLSW : VX2_Int_Ty2<1742, "vupklsw", int_ppc_altivec_vupklsw,
@@ -1414,33 +1421,33 @@ def VMSUMUDM : VA1a_Int_Ty3<35, "vmsumudm", int_ppc_altivec_vmsumudm,
v1i128, v2i64, v1i128>;
// i8 element comparisons.
-def VCMPNEB : VCMP < 7, "vcmpneb $vD, $vA, $vB" , v16i8>;
-def VCMPNEB_rec : VCMP_rec < 7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
-def VCMPNEZB : VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>;
-def VCMPNEZB_rec : VCMP_rec<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
+def VCMPNEB : VCMP < 7, "vcmpneb $VD, $VA, $VB" , v16i8>;
+def VCMPNEB_rec : VCMP_rec < 7, "vcmpneb. $VD, $VA, $VB" , v16i8>;
+def VCMPNEZB : VCMP <263, "vcmpnezb $VD, $VA, $VB" , v16i8>;
+def VCMPNEZB_rec : VCMP_rec<263, "vcmpnezb. $VD, $VA, $VB", v16i8>;
// i16 element comparisons.
-def VCMPNEH : VCMP < 71, "vcmpneh $vD, $vA, $vB" , v8i16>;
-def VCMPNEH_rec : VCMP_rec< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
-def VCMPNEZH : VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>;
-def VCMPNEZH_rec : VCMP_rec<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
+def VCMPNEH : VCMP < 71, "vcmpneh $VD, $VA, $VB" , v8i16>;
+def VCMPNEH_rec : VCMP_rec< 71, "vcmpneh. $VD, $VA, $VB" , v8i16>;
+def VCMPNEZH : VCMP <327, "vcmpnezh $VD, $VA, $VB" , v8i16>;
+def VCMPNEZH_rec : VCMP_rec<327, "vcmpnezh. $VD, $VA, $VB", v8i16>;
// i32 element comparisons.
-def VCMPNEW : VCMP <135, "vcmpnew $vD, $vA, $vB" , v4i32>;
-def VCMPNEW_rec : VCMP_rec<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
-def VCMPNEZW : VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>;
-def VCMPNEZW_rec : VCMP_rec<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
+def VCMPNEW : VCMP <135, "vcmpnew $VD, $VA, $VB" , v4i32>;
+def VCMPNEW_rec : VCMP_rec<135, "vcmpnew. $VD, $VA, $VB" , v4i32>;
+def VCMPNEZW : VCMP <391, "vcmpnezw $VD, $VA, $VB" , v4i32>;
+def VCMPNEZW_rec : VCMP_rec<391, "vcmpnezw. $VD, $VA, $VB", v4i32>;
// VX-Form: [PO VRT / UIM VRB XO].
// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
// "/ UIM" (1 + 4 bit)
class VX1_VT5_UIM5_VB5<bits<11> xo, string opc, list<dag> pattern>
- : VXForm_1<xo, (outs vrrc:$vD), (ins u4imm:$UIMM, vrrc:$vB),
- !strconcat(opc, " $vD, $vB, $UIMM"), IIC_VecGeneral, pattern>;
+ : VXForm_1<xo, (outs vrrc:$VD), (ins u4imm:$VA, vrrc:$VB),
+ !strconcat(opc, " $VD, $VB, $VA"), IIC_VecGeneral, pattern>;
class VX1_RT5_RA5_VB5<bits<11> xo, string opc, list<dag> pattern>
- : VXForm_1<xo, (outs g8rc:$rD), (ins g8rc:$rA, vrrc:$vB),
- !strconcat(opc, " $rD, $rA, $vB"), IIC_VecGeneral, pattern>;
+ : VXForm_1<xo, (outs g8rc:$VD), (ins g8rc:$VA, vrrc:$VB),
+ !strconcat(opc, " $VD, $VA, $VB"), IIC_VecGeneral, pattern>;
// Vector Extract Unsigned
def VEXTRACTUB : VX1_VT5_UIM5_VB5<525, "vextractub", []>;
@@ -1459,58 +1466,58 @@ def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>, ZExt32To64;
}
// Vector Insert Element Instructions
-def VINSERTB : VXForm_1<781, (outs vrrc:$vD),
- (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB),
- "vinsertb $vD, $vB, $UIM", IIC_VecGeneral,
- [(set v16i8:$vD, (PPCvecinsert v16i8:$vDi, v16i8:$vB,
- imm32SExt16:$UIM))]>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
-def VINSERTH : VXForm_1<845, (outs vrrc:$vD),
- (ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB),
- "vinserth $vD, $vB, $UIM", IIC_VecGeneral,
- [(set v8i16:$vD, (PPCvecinsert v8i16:$vDi, v8i16:$vB,
- imm32SExt16:$UIM))]>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+def VINSERTB : VXForm_1<781, (outs vrrc:$VD),
+ (ins vrrc:$VDi, u4imm:$VA, vrrc:$VB),
+ "vinsertb $VD, $VB, $VA", IIC_VecGeneral,
+ [(set v16i8:$VD, (PPCvecinsert v16i8:$VDi, v16i8:$VB,
+ imm32SExt16:$VA))]>,
+ RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">;
+def VINSERTH : VXForm_1<845, (outs vrrc:$VD),
+ (ins vrrc:$VDi, u4imm:$VA, vrrc:$VB),
+ "vinserth $VD, $VB, $VA", IIC_VecGeneral,
+ [(set v8i16:$VD, (PPCvecinsert v8i16:$VDi, v8i16:$VB,
+ imm32SExt16:$VA))]>,
+ RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">;
def VINSERTW : VX1_VT5_UIM5_VB5<909, "vinsertw", []>;
def VINSERTD : VX1_VT5_UIM5_VB5<973, "vinsertd", []>;
class VX_VT5_EO5_VB5<bits<11> xo, bits<5> eo, string opc, list<dag> pattern>
- : VXForm_RD5_XO5_RS5<xo, eo, (outs vrrc:$vD), (ins vrrc:$vB),
- !strconcat(opc, " $vD, $vB"), IIC_VecGeneral, pattern>;
+ : VXForm_RD5_XO5_RS5<xo, eo, (outs vrrc:$VD), (ins vrrc:$VB),
+ !strconcat(opc, " $VD, $VB"), IIC_VecGeneral, pattern>;
class VX_VT5_EO5_VB5s<bits<11> xo, bits<5> eo, string opc, list<dag> pattern>
- : VXForm_RD5_XO5_RS5<xo, eo, (outs vfrc:$vD), (ins vfrc:$vB),
- !strconcat(opc, " $vD, $vB"), IIC_VecGeneral, pattern>;
-
-// Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]
-def VCLZLSBB : VXForm_RD5_XO5_RS5<1538, 0, (outs gprc:$rD), (ins vrrc:$vB),
- "vclzlsbb $rD, $vB", IIC_VecGeneral,
- [(set i32:$rD, (int_ppc_altivec_vclzlsbb
- v16i8:$vB))]>;
-def VCTZLSBB : VXForm_RD5_XO5_RS5<1538, 1, (outs gprc:$rD), (ins vrrc:$vB),
- "vctzlsbb $rD, $vB", IIC_VecGeneral,
- [(set i32:$rD, (int_ppc_altivec_vctzlsbb
- v16i8:$vB))]>;
+ : VXForm_RD5_XO5_RS5<xo, eo, (outs vfrc:$VD), (ins vfrc:$VB),
+ !strconcat(opc, " $VD, $VB"), IIC_VecGeneral, pattern>;
+
+// Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[RD]
+def VCLZLSBB : VXForm_RD5_XO5_RS5<1538, 0, (outs gprc:$VD), (ins vrrc:$VB),
+ "vclzlsbb $VD, $VB", IIC_VecGeneral,
+ [(set i32:$VD, (int_ppc_altivec_vclzlsbb
+ v16i8:$VB))]>;
+def VCTZLSBB : VXForm_RD5_XO5_RS5<1538, 1, (outs gprc:$VD), (ins vrrc:$VB),
+ "vctzlsbb $VD, $VB", IIC_VecGeneral,
+ [(set i32:$VD, (int_ppc_altivec_vctzlsbb
+ v16i8:$VB))]>;
// Vector Count Trailing Zeros
def VCTZB : VX_VT5_EO5_VB5<1538, 28, "vctzb",
- [(set v16i8:$vD, (cttz v16i8:$vB))]>;
+ [(set v16i8:$VD, (cttz v16i8:$VB))]>;
def VCTZH : VX_VT5_EO5_VB5<1538, 29, "vctzh",
- [(set v8i16:$vD, (cttz v8i16:$vB))]>;
+ [(set v8i16:$VD, (cttz v8i16:$VB))]>;
def VCTZW : VX_VT5_EO5_VB5<1538, 30, "vctzw",
- [(set v4i32:$vD, (cttz v4i32:$vB))]>;
+ [(set v4i32:$VD, (cttz v4i32:$VB))]>;
def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd",
- [(set v2i64:$vD, (cttz v2i64:$vB))]>;
+ [(set v2i64:$VD, (cttz v2i64:$VB))]>;
// Vector Extend Sign
def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w",
- [(set v4i32:$vD, (int_ppc_altivec_vextsb2w v16i8:$vB))]>;
+ [(set v4i32:$VD, (int_ppc_altivec_vextsb2w v16i8:$VB))]>;
def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w",
- [(set v4i32:$vD, (int_ppc_altivec_vextsh2w v8i16:$vB))]>;
+ [(set v4i32:$VD, (int_ppc_altivec_vextsh2w v8i16:$VB))]>;
def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d",
- [(set v2i64:$vD, (int_ppc_altivec_vextsb2d v16i8:$vB))]>;
+ [(set v2i64:$VD, (int_ppc_altivec_vextsb2d v16i8:$VB))]>;
def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d",
- [(set v2i64:$vD, (int_ppc_altivec_vextsh2d v8i16:$vB))]>;
+ [(set v2i64:$VD, (int_ppc_altivec_vextsh2d v8i16:$VB))]>;
def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d",
- [(set v2i64:$vD, (int_ppc_altivec_vextsw2d v4i32:$vB))]>;
+ [(set v2i64:$VD, (int_ppc_altivec_vextsw2d v4i32:$VB))]>;
let isCodeGenOnly = 1 in {
def VEXTSB2Ws : VX_VT5_EO5_VB5s<1538, 16, "vextsb2w", []>;
def VEXTSH2Ws : VX_VT5_EO5_VB5s<1538, 17, "vextsh2w", []>;
@@ -1527,64 +1534,64 @@ def : Pat<(v2i64 (sext_inreg v2i64:$VRB, v2i32)), (v2i64 (VEXTSW2D $VRB))>;
// Vector Integer Negate
def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw",
- [(set v4i32:$vD,
- (sub (v4i32 immAllZerosV), v4i32:$vB))]>;
+ [(set v4i32:$VD,
+ (sub (v4i32 immAllZerosV), v4i32:$VB))]>;
def VNEGD : VX_VT5_EO5_VB5<1538, 7, "vnegd",
- [(set v2i64:$vD,
- (sub (v2i64 immAllZerosV), v2i64:$vB))]>;
+ [(set v2i64:$VD,
+ (sub (v2i64 immAllZerosV), v2i64:$VB))]>;
// Vector Parity Byte
-def VPRTYBW : VX_VT5_EO5_VB5<1538, 8, "vprtybw", [(set v4i32:$vD,
- (int_ppc_altivec_vprtybw v4i32:$vB))]>;
-def VPRTYBD : VX_VT5_EO5_VB5<1538, 9, "vprtybd", [(set v2i64:$vD,
- (int_ppc_altivec_vprtybd v2i64:$vB))]>;
-def VPRTYBQ : VX_VT5_EO5_VB5<1538, 10, "vprtybq", [(set v1i128:$vD,
- (int_ppc_altivec_vprtybq v1i128:$vB))]>;
+def VPRTYBW : VX_VT5_EO5_VB5<1538, 8, "vprtybw", [(set v4i32:$VD,
+ (int_ppc_altivec_vprtybw v4i32:$VB))]>;
+def VPRTYBD : VX_VT5_EO5_VB5<1538, 9, "vprtybd", [(set v2i64:$VD,
+ (int_ppc_altivec_vprtybd v2i64:$VB))]>;
+def VPRTYBQ : VX_VT5_EO5_VB5<1538, 10, "vprtybq", [(set v1i128:$VD,
+ (int_ppc_altivec_vprtybq v1i128:$VB))]>;
// Vector (Bit) Permute (Right-indexed)
def VBPERMD : VX1_Int_Ty3<1484, "vbpermd", int_ppc_altivec_vbpermd,
v2i64, v2i64, v16i8>;
-def VPERMR : VAForm_1a<59, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
- "vpermr $vD, $vA, $vB, $vC", IIC_VecFP, []>;
+def VPERMR : VAForm_1a<59, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RB, vrrc:$RC),
+ "vpermr $RT, $RA, $RB, $RC", IIC_VecFP, []>;
class VX1_VT5_VA5_VB5<bits<11> xo, string opc, list<dag> pattern>
- : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern>;
+ : VXForm_1<xo, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ !strconcat(opc, " $VD, $VA, $VB"), IIC_VecFP, pattern>;
// Vector Rotate Left Mask/Mask-Insert
def VRLWNM : VX1_VT5_VA5_VB5<389, "vrlwnm",
- [(set v4i32:$vD,
- (int_ppc_altivec_vrlwnm v4i32:$vA,
- v4i32:$vB))]>;
-def VRLWMI : VXForm_1<133, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
- "vrlwmi $vD, $vA, $vB", IIC_VecFP,
- [(set v4i32:$vD,
- (int_ppc_altivec_vrlwmi v4i32:$vA, v4i32:$vB,
- v4i32:$vDi))]>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+ [(set v4i32:$VD,
+ (int_ppc_altivec_vrlwnm v4i32:$VA,
+ v4i32:$VB))]>;
+def VRLWMI : VXForm_1<133, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VDi),
+ "vrlwmi $VD, $VA, $VB", IIC_VecFP,
+ [(set v4i32:$VD,
+ (int_ppc_altivec_vrlwmi v4i32:$VA, v4i32:$VB,
+ v4i32:$VDi))]>,
+ RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">;
def VRLDNM : VX1_VT5_VA5_VB5<453, "vrldnm",
- [(set v2i64:$vD,
- (int_ppc_altivec_vrldnm v2i64:$vA,
- v2i64:$vB))]>;
-def VRLDMI : VXForm_1<197, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
- "vrldmi $vD, $vA, $vB", IIC_VecFP,
- [(set v2i64:$vD,
- (int_ppc_altivec_vrldmi v2i64:$vA, v2i64:$vB,
- v2i64:$vDi))]>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+ [(set v2i64:$VD,
+ (int_ppc_altivec_vrldnm v2i64:$VA,
+ v2i64:$VB))]>;
+def VRLDMI : VXForm_1<197, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VDi),
+ "vrldmi $VD, $VA, $VB", IIC_VecFP,
+ [(set v2i64:$VD,
+ (int_ppc_altivec_vrldmi v2i64:$VA, v2i64:$VB,
+ v2i64:$VDi))]>,
+ RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">;
// Vector Shift Left/Right
def VSLV : VX1_VT5_VA5_VB5<1860, "vslv",
- [(set v16i8 : $vD, (int_ppc_altivec_vslv v16i8 : $vA, v16i8 : $vB))]>;
+ [(set v16i8 : $VD, (int_ppc_altivec_vslv v16i8 : $VA, v16i8 : $VB))]>;
def VSRV : VX1_VT5_VA5_VB5<1796, "vsrv",
- [(set v16i8 : $vD, (int_ppc_altivec_vsrv v16i8 : $vA, v16i8 : $vB))]>;
+ [(set v16i8 : $VD, (int_ppc_altivec_vsrv v16i8 : $VA, v16i8 : $VB))]>;
// Vector Multiply-by-10 (& Write Carry) Unsigned Quadword
-def VMUL10UQ : VXForm_BX<513, (outs vrrc:$vD), (ins vrrc:$vA),
- "vmul10uq $vD, $vA", IIC_VecFP, []>;
-def VMUL10CUQ : VXForm_BX< 1, (outs vrrc:$vD), (ins vrrc:$vA),
- "vmul10cuq $vD, $vA", IIC_VecFP, []>;
+def VMUL10UQ : VXForm_BX<513, (outs vrrc:$VD), (ins vrrc:$VA),
+ "vmul10uq $VD, $VA", IIC_VecFP, []>;
+def VMUL10CUQ : VXForm_BX< 1, (outs vrrc:$VD), (ins vrrc:$VA),
+ "vmul10cuq $VD, $VA", IIC_VecFP, []>;
// Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword
def VMUL10EUQ : VX1_VT5_VA5_VB5<577, "vmul10euq" , []>;
@@ -1595,16 +1602,16 @@ def VMUL10ECUQ : VX1_VT5_VA5_VB5< 65, "vmul10ecuq", []>;
// [PO VRT EO VRB 1 PS XO], "_o" means CR6 is set.
class VX_VT5_EO5_VB5_PS1_XO9_o<bits<5> eo, bits<9> xo, string opc,
list<dag> pattern>
- : VX_RD5_EO5_RS5_PS1_XO9<eo, xo, (outs vrrc:$vD), (ins vrrc:$vB, u1imm:$PS),
- !strconcat(opc, " $vD, $vB, $PS"), IIC_VecFP, pattern> {
+ : VX_RD5_EO5_RS5_PS1_XO9<eo, xo, (outs vrrc:$VD), (ins vrrc:$VB, u1imm:$PS),
+ !strconcat(opc, " $VD, $VB, $PS"), IIC_VecFP, pattern> {
let Defs = [CR6];
}
// [PO VRT EO VRB 1 / XO]
class VX_VT5_EO5_VB5_XO9_o<bits<5> eo, bits<9> xo, string opc,
list<dag> pattern>
- : VX_RD5_EO5_RS5_PS1_XO9<eo, xo, (outs vrrc:$vD), (ins vrrc:$vB),
- !strconcat(opc, " $vD, $vB"), IIC_VecFP, pattern> {
+ : VX_RD5_EO5_RS5_PS1_XO9<eo, xo, (outs vrrc:$VD), (ins vrrc:$VB),
+ !strconcat(opc, " $VD, $VB"), IIC_VecFP, pattern> {
let Defs = [CR6];
let PS = 0;
}
@@ -1633,14 +1640,14 @@ def BCDTRUNC_rec : VX_VT5_VA5_VB5_PS1_XO9_o<257, "bcdtrunc." , []>;
def BCDUTRUNC_rec : VX_VT5_VA5_VB5_XO9_o <321, "bcdutrunc.", []>;
// Absolute Difference
-def VABSDUB : VXForm_1<1027, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vabsdub $vD, $vA, $vB", IIC_VecGeneral,
- [(set v16i8:$vD, (int_ppc_altivec_vabsdub v16i8:$vA, v16i8:$vB))]>;
-def VABSDUH : VXForm_1<1091, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vabsduh $vD, $vA, $vB", IIC_VecGeneral,
- [(set v8i16:$vD, (int_ppc_altivec_vabsduh v8i16:$vA, v8i16:$vB))]>;
-def VABSDUW : VXForm_1<1155, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vabsduw $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (int_ppc_altivec_vabsduw v4i32:$vA, v4i32:$vB))]>;
+def VABSDUB : VXForm_1<1027, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vabsdub $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v16i8:$VD, (int_ppc_altivec_vabsdub v16i8:$VA, v16i8:$VB))]>;
+def VABSDUH : VXForm_1<1091, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vabsduh $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v8i16:$VD, (int_ppc_altivec_vabsduh v8i16:$VA, v8i16:$VB))]>;
+def VABSDUW : VXForm_1<1155, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vabsduw $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (int_ppc_altivec_vabsduw v4i32:$VA, v4i32:$VB))]>;
} // end HasP9Altivec
diff --git a/llvm/lib/Target/PowerPC/PPCInstrDFP.td b/llvm/lib/Target/PowerPC/PPCInstrDFP.td
new file mode 100644
index 000000000000..f4908e325e13
--- /dev/null
+++ b/llvm/lib/Target/PowerPC/PPCInstrDFP.td
@@ -0,0 +1,193 @@
+//===-- PPCInstrDFP.td - PowerPC Decimal Floating Point ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the PowerPC Decimal Floating Point (DFP) instructions.
+//
+//===----------------------------------------------------------------------===//
+
+// We provide no scheduling info for the DFP instructions.
+// While they are not pseudo instructions we don't intend on scheduling them.
+let hasNoSchedulingInfo = 1 in {
+let mayRaiseFPException = 1, hasSideEffects = 0 in {
+
+let isCommutable = 1 in {
+defm DADD : XForm_28r<59, 2, (outs f8rc:$RST), (ins f8rc:$RA, f8rc:$RB),
+ "dadd", "$RST, $RA, $RB", IIC_FPGeneral, []>;
+
+defm DADDQ : XForm_28r<63, 2, (outs fpairrc:$RST), (ins fpairrc:$RA, fpairrc:$RB),
+ "daddq", "$RST, $RA, $RB", IIC_FPGeneral, []>;
+}
+
+defm DSUB : XForm_28r<59, 514, (outs f8rc:$RST), (ins f8rc:$RA, f8rc:$RB),
+ "dsub", "$RST, $RA, $RB", IIC_FPGeneral, []>;
+
+defm DSUBQ : XForm_28r<63, 514, (outs fpairrc:$RST), (ins fpairrc:$RA, fpairrc:$RB),
+ "dsubq", "$RST, $RA, $RB", IIC_FPGeneral, []>;
+
+let isCommutable = 1 in {
+defm DMUL : XForm_28r<59, 34, (outs f8rc:$RST), (ins f8rc:$RA, f8rc:$RB),
+ "dmul", "$RST, $RA, $RB", IIC_FPGeneral, []>;
+
+defm DMULQ : XForm_28r<63, 34, (outs fpairrc:$RST), (ins fpairrc:$RA, fpairrc:$RB),
+ "dmulq", "$RST, $RA, $RB", IIC_FPGeneral, []>;
+}
+
+defm DDIV : XForm_28r<59, 546, (outs f8rc:$RST), (ins f8rc:$RA, f8rc:$RB),
+ "ddiv", "$RST, $RA, $RB", IIC_FPGeneral, []>;
+
+defm DDIVQ : XForm_28r<63, 546, (outs fpairrc:$RST), (ins fpairrc:$RA, fpairrc:$RB),
+ "ddivq", "$RST, $RA, $RB", IIC_FPGeneral, []>;
+
+let isCompare = 1 in {
+ def DCMPU : XForm_17<59, 642, (outs crrc:$BF), (ins f8rc:$RA, f8rc:$RB),
+ "dcmpu $BF, $RA, $RB", IIC_FPCompare>;
+
+ def DCMPUQ : XForm_17<63, 642, (outs crrc:$BF), (ins fpairrc:$RA, fpairrc:$RB),
+ "dcmpuq $BF, $RA, $RB", IIC_FPCompare>;
+
+ def DCMPO : XForm_17<59, 130, (outs crrc:$BF), (ins f8rc:$RA, f8rc:$RB),
+ "dcmpo $BF, $RA, $RB", IIC_FPCompare>;
+
+ def DCMPOQ : XForm_17<63, 130, (outs crrc:$BF), (ins fpairrc:$RA, fpairrc:$RB),
+ "dcmpoq $BF, $RA, $RB", IIC_FPCompare>;
+}
+
+// 5.6.4 DFP Quantum Adjustment Instructions
+defm DQUAI: Z23Form_TE5_FRTB5_RMC2r<59, 67, (outs f8rc:$FRT),
+ (ins s5imm:$TE, f8rc:$FRB, u2imm:$RMC),
+ "dquai", "$TE, $FRT, $FRB, $RMC", []>;
+defm DQUAIQ: Z23Form_TE5_FRTB5_RMC2r<63, 67, (outs fpairrc:$FRT),
+ (ins s5imm:$TE, fpairrc:$FRB, u2imm:$RMC),
+ "dquaiq", "$TE, $FRT, $FRB, $RMC", []>;
+defm DQUA: Z23Form_FRTAB5_RMC2r<59, 3, (outs f8rc:$FRT),
+ (ins f8rc:$FRA, f8rc:$FRB, u2imm:$RMC),
+ "dqua", "$FRT, $FRA, $FRB, $RMC", []>;
+defm DQUAQ: Z23Form_FRTAB5_RMC2r<63, 3, (outs fpairrc:$FRT),
+ (ins fpairrc:$FRA, fpairrc:$FRB, u2imm:$RMC),
+ "dquaq", "$FRT, $FRA, $FRB, $RMC", []>;
+defm DRRND: Z23Form_FRTAB5_RMC2r<59, 35, (outs f8rc:$FRT),
+ (ins f8rc:$FRA, f8rc:$FRB, u2imm:$RMC),
+ "drrnd", "$FRT, $FRA, $FRB, $RMC", []>;
+defm DRRNDQ: Z23Form_FRTAB5_RMC2r<63, 35, (outs fpairrc:$FRT),
+ (ins f8rc:$FRA, fpairrc:$FRB, u2imm:$RMC),
+ "drrndq", "$FRT, $FRA, $FRB, $RMC", []>;
+defm DRINTX: Z23Form_FRTB5_R1_RMC2r<59, 99, (outs f8rc:$FRT),
+ (ins u1imm:$R, f8rc:$FRB, u2imm:$RMC),
+ "drintx", "$R, $FRT, $FRB, $RMC", []>;
+defm DRINTXQ: Z23Form_FRTB5_R1_RMC2r<63, 99, (outs fpairrc:$FRT),
+ (ins u1imm:$R, fpairrc:$FRB, u2imm:$RMC),
+ "drintxq", "$R, $FRT, $FRB, $RMC", []>;
+defm DRINTN: Z23Form_FRTB5_R1_RMC2r<59, 227, (outs f8rc:$FRT),
+ (ins u1imm:$R, f8rc:$FRB, u2imm:$RMC),
+ "drintn", "$R, $FRT, $FRB, $RMC", []>;
+defm DRINTNQ: Z23Form_FRTB5_R1_RMC2r<63, 227, (outs fpairrc:$FRT),
+ (ins u1imm:$R, fpairrc:$FRB, u2imm:$RMC),
+ "drintnq", "$R, $FRT, $FRB, $RMC", []>;
+
+// 5.6.5 DFP Conversion Instructions
+defm DCTDP: XForm_26r<59, 258, (outs f8rc:$RST), (ins f8rc:$RB),
+ "dctdp", "$RST, $RB", IIC_FPGeneral, []>;
+defm DCTQPQ: XForm_26r<63, 258, (outs fpairrc:$RST), (ins f8rc:$RB),
+ "dctqpq", "$RST, $RB", IIC_FPGeneral, []>;
+defm DRSP: XForm_26r<59, 770, (outs f8rc:$RST), (ins f8rc:$RB),
+ "drsp", "$RST, $RB", IIC_FPGeneral, []>;
+defm DRDPQ: XForm_26r<63, 770, (outs fpairrc:$RST), (ins fpairrc:$RB),
+ "drdpq", "$RST, $RB", IIC_FPGeneral, []>;
+defm DCFFIX: XForm_26r<59, 802, (outs f8rc:$RST), (ins f8rc:$RB),
+ "dcffix", "$RST, $RB", IIC_FPGeneral, []>;
+defm DCFFIXQ: XForm_26r<63, 802, (outs fpairrc:$RST), (ins f8rc:$RB),
+ "dcffixq", "$RST, $RB", IIC_FPGeneral, []>;
+defm DCTFIX: XForm_26r<59, 290, (outs f8rc:$RST), (ins f8rc:$RB),
+ "dctfix", "$RST, $RB", IIC_FPGeneral, []>;
+defm DCTFIXQ: XForm_26r<63, 290, (outs f8rc:$RST), (ins fpairrc:$RB),
+ "dctfixq", "$RST, $RB", IIC_FPGeneral, []>;
+let Predicates = [HasP10Vector] in {
+ def DCFFIXQQ: XForm_26<63, 994, (outs fpairrc:$RST), (ins vrrc:$RB),
+ "dcffixqq $RST, $RB", IIC_FPGeneral, []>;
+let RA = 1 in
+ def DCTFIXQQ: XForm_base_r3xo<63, 994, (outs vrrc:$RST), (ins fpairrc:$RB),
+ "dctfixqq $RST, $RB", IIC_FPGeneral, []>;
+} // HasP10Vector
+
+// 5.6.6 DFP Format Instructions
+defm DENBCD: XForm_S1_FRTB5r<59, 834, (outs f8rc:$FRT),
+ (ins u1imm:$S, f8rc:$FRB),
+ "denbcd", "$S, $FRT, $FRB", []>;
+defm DENBCDQ: XForm_S1_FRTB5r<63, 834, (outs fpairrc:$FRT),
+ (ins u1imm:$S, fpairrc:$FRB),
+ "denbcdq", "$S, $FRT, $FRB", []>;
+} // mayRaiseFPException
+
+// 5.6.6 DFP none exception raising format instructions.
+defm DDEDPD: XForm_SP2_FRTB5r<59, 322, (outs f8rc:$FRT),
+ (ins u2imm:$SP, f8rc:$FRB),
+ "ddedpd", "$SP, $FRT, $FRB", []>;
+defm DDEDPDQ: XForm_SP2_FRTB5r<63, 322, (outs fpairrc:$FRT),
+ (ins u2imm:$SP, fpairrc:$FRB),
+ "ddedpdq", "$SP, $FRT, $FRB", []>;
+defm DXEX: XForm_26r<59, 354, (outs f8rc:$RST), (ins f8rc:$RB),
+ "dxex", "$RST, $RB", NoItinerary, []>;
+defm DXEXQ: XForm_26r<63, 354, (outs f8rc:$RST), (ins fpairrc:$RB),
+ "dxexq", "$RST, $RB", NoItinerary, []>;
+defm DIEX: XForm_base_r3xo_r<59, 866, (outs f8rc:$RST),
+ (ins f8rc:$RA, f8rc:$RB),
+ "diex", "$RST, $RA, $RB", []>;
+defm DIEXQ: XForm_base_r3xo_r<63, 866, (outs fpairrc:$RST),
+ (ins f8rc:$RA, fpairrc:$RB),
+ "diexq", "$RST, $RA, $RB", []>;
+defm DSCLI: Z22Form_FRTA5_SH6r<59, 66, (outs f8rc:$FRT),
+ (ins f8rc:$FRA, u6imm:$SH),
+ "dscli", "$FRT, $FRA, $SH", []>;
+defm DSCLIQ: Z22Form_FRTA5_SH6r<63, 66, (outs fpairrc:$FRT),
+ (ins fpairrc:$FRA, u6imm:$SH),
+ "dscliq", "$FRT, $FRA, $SH", []>;
+defm DSCRI: Z22Form_FRTA5_SH6r<59, 98, (outs f8rc:$FRT),
+ (ins f8rc:$FRA, u6imm:$SH),
+ "dscri", "$FRT, $FRA, $SH", []>;
+defm DSCRIQ: Z22Form_FRTA5_SH6r<63, 98, (outs fpairrc:$FRT),
+ (ins fpairrc:$FRA, u6imm:$SH),
+ "dscriq", "$FRT, $FRA, $SH", []>;
+
+// 5.6.3 DFP Test Instructions
+def DTSTDC : Z22Form_BF3_FRA5_DCM6<59, 194, (outs crrc:$BF),
+ (ins f8rc:$FRA, u6imm:$DCM),
+ "dtstdc $BF, $FRA, $DCM", IIC_FPCompare, []>;
+
+def DTSTDCQ : Z22Form_BF3_FRA5_DCM6<63, 194, (outs crrc:$BF),
+ (ins fpairrc:$FRA, u6imm:$DCM),
+ "dtstdcq $BF, $FRA, $DCM", IIC_FPCompare, []>;
+
+def DTSTDG : Z22Form_BF3_FRA5_DCM6<59, 226, (outs crrc:$BF),
+ (ins f8rc:$FRA, u6imm:$DCM),
+ "dtstdg $BF, $FRA, $DCM", IIC_FPCompare, []>;
+
+def DTSTDGQ : Z22Form_BF3_FRA5_DCM6<63, 226, (outs crrc:$BF),
+ (ins fpairrc:$FRA, u6imm:$DCM),
+ "dtstdgq $BF, $FRA, $DCM", IIC_FPCompare, []>;
+
+def DTSTEX : XForm_17<59, 162, (outs crrc:$BF), (ins f8rc:$RA, f8rc:$RB),
+ "dtstex $BF, $RA, $RB", IIC_FPCompare>;
+
+def DTSTEXQ : XForm_17<63, 162, (outs crrc:$BF), (ins fpairrc:$RA, fpairrc:$RB),
+ "dtstexq $BF, $RA, $RB", IIC_FPCompare>;
+
+def DTSTSF : XForm_17<59, 674, (outs crrc:$BF), (ins f8rc:$RA, f8rc:$RB),
+ "dtstsf $BF, $RA, $RB", IIC_FPCompare>;
+
+def DTSTSFQ : XForm_17<63, 674, (outs crrc:$BF), (ins f8rc:$RA, fpairrc:$RB),
+ "dtstsfq $BF, $RA, $RB", IIC_FPCompare>;
+
+def DTSTSFI : XForm_BF3_UIM6_FRB5<59, 675, (outs crrc:$BF),
+ (ins u6imm:$UIM, f8rc:$FRB),
+ "dtstsfi $BF, $UIM, $FRB", IIC_FPCompare, []>;
+
+def DTSTSFIQ : XForm_BF3_UIM6_FRB5<63, 675, (outs crrc:$BF),
+ (ins u6imm:$UIM, fpairrc:$FRB),
+ "dtstsfiq $BF, $UIM, $FRB", IIC_FPCompare, []>;
+
+} // hasNoSchedulingInfo
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index f0f8d6ebcf09..0081c0f5295a 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -205,7 +205,8 @@ class BForm_3_at<bits<6> opcode, bit aa, bit lk,
let Inst{31} = lk;
}
-class BForm_4<bits<6> opcode, bits<5> bo, bit aa, bit lk,
+class
+BForm_4<bits<6> opcode, bits<5> bo, bit aa, bit lk,
dag OOL, dag IOL, string asmstr>
: I<opcode, OOL, IOL, asmstr, IIC_BrB> {
bits<5> BI;
@@ -233,47 +234,24 @@ class SCForm<bits<6> opcode, bits<1> xo,
// 1.7.4 D-Form
class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
+ InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> A;
- bits<5> B;
- bits<16> C;
+ bits<5> RST;
+ bits<5> RA;
+ bits<16> D;
let Pattern = pattern;
-
- let Inst{6-10} = A;
- let Inst{11-15} = B;
- let Inst{16-31} = C;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = RA;
+ let Inst{16-31} = D;
}
class DForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
- : I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> A;
- bits<21> Addr;
-
- let Pattern = pattern;
-
- let Inst{6-10} = A;
- let Inst{11-15} = Addr{20-16}; // Base Reg
- let Inst{16-31} = Addr{15-0}; // Displacement
-}
-
-class DForm_1a<bits<6> opcode, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin, list<dag> pattern>
- : I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> A;
- bits<16> C;
- bits<5> B;
-
- let Pattern = pattern;
-
- let Inst{6-10} = A;
- let Inst{11-15} = B;
- let Inst{16-31} = C;
+ : DForm_base<opcode, OOL, IOL, asmstr, itin, pattern> {
}
-
class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: DForm_base<opcode, OOL, IOL, asmstr, itin, pattern> {
@@ -286,52 +264,54 @@ class DForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
class DForm_2_r0<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> A;
- bits<16> B;
-
+ bits<5> RST;
+ bits<16> D;
+
let Pattern = pattern;
-
- let Inst{6-10} = A;
+
+ let Inst{6-10} = RST;
let Inst{11-15} = 0;
- let Inst{16-31} = B;
+ let Inst{16-31} = D;
}
class DForm_4<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> B;
- bits<5> A;
- bits<16> C;
-
+ bits<5> RA;
+ bits<5> RST;
+ bits<16> D;
+
let Pattern = pattern;
-
- let Inst{6-10} = A;
- let Inst{11-15} = B;
- let Inst{16-31} = C;
+
+ let Inst{6-10} = RST;
+ let Inst{11-15} = RA;
+ let Inst{16-31} = D;
}
-
+
class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: DForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
- let A = 0;
- let Addr = 0;
+ let RST = 0;
+ let RA = 0;
+ let D = 0;
}
class DForm_4_fixedreg_zero<bits<6> opcode, bits<5> R, dag OOL, dag IOL,
string asmstr, InstrItinClass itin,
list<dag> pattern>
: DForm_4<opcode, OOL, IOL, asmstr, itin, pattern> {
- let A = R;
- let B = R;
- let C = 0;
+ let RST = R;
+ let RA = R;
+ let D = 0;
}
class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
- bits<5> A;
- bits<21> Addr;
+ bits<5> RST;
+ bits<5> RA;
+ bits<16> D;
let Pattern = pattern;
bits<24> LI;
@@ -340,9 +320,9 @@ class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
let Inst{30} = aa;
let Inst{31} = lk;
- let Inst{38-42} = A;
- let Inst{43-47} = Addr{20-16}; // Base Reg
- let Inst{48-63} = Addr{15-0}; // Displacement
+ let Inst{38-42} = RST;
+ let Inst{43-47} = RA;
+ let Inst{48-63} = D;
}
// This is used to emit BL8+NOP.
@@ -351,8 +331,9 @@ class IForm_and_DForm_4_zero<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
InstrItinClass itin, list<dag> pattern>
: IForm_and_DForm_1<opcode1, aa, lk, opcode2,
OOL, IOL, asmstr, itin, pattern> {
- let A = 0;
- let Addr = 0;
+ let RST = 0;
+ let RA = 0;
+ let D = 0;
}
class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
@@ -361,13 +342,13 @@ class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
bits<3> BF;
bits<1> L;
bits<5> RA;
- bits<16> I;
+ bits<16> D;
let Inst{6-8} = BF;
let Inst{9} = 0;
let Inst{10} = L;
let Inst{11-15} = RA;
- let Inst{16-31} = I;
+ let Inst{16-31} = D;
}
class DForm_5_ext<bits<6> opcode, dag OOL, dag IOL, string asmstr,
@@ -392,13 +373,14 @@ class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> RST;
- bits<19> DS_RA;
+ bits<5> RA;
+ bits<14> D;
let Pattern = pattern;
-
+
let Inst{6-10} = RST;
- let Inst{11-15} = DS_RA{18-14}; // Register #
- let Inst{16-29} = DS_RA{13-0}; // Displacement.
+ let Inst{11-15} = RA;
+ let Inst{16-29} = D;
let Inst{30-31} = xo;
}
@@ -423,13 +405,14 @@ class DQ_RD6_RS5_DQ12<bits<6> opcode, bits<3> xo, dag OOL, dag IOL,
string asmstr, InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<6> XT;
- bits<17> DS_RA;
+ bits<5> RA;
+ bits<12> DQ;
let Pattern = pattern;
let Inst{6-10} = XT{4-0};
- let Inst{11-15} = DS_RA{16-12}; // Register #
- let Inst{16-27} = DS_RA{11-0}; // Displacement.
+ let Inst{11-15} = RA;
+ let Inst{16-27} = DQ;
let Inst{28} = XT{5};
let Inst{29-31} = xo;
}
@@ -439,12 +422,13 @@ class DQForm_RTp5_RA17_MEM<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> RTp;
- bits<17> DQ_RA;
+ bits<5> RA;
+ bits<12> DQ;
let Pattern = pattern;
let Inst{6-10} = RTp{4-0};
- let Inst{11-15} = DQ_RA{16-12}; // Register #
- let Inst{16-27} = DQ_RA{11-0}; // Displacement.
+ let Inst{11-15} = RA;
+ let Inst{16-27} = DQ;
let Inst{28-31} = xo;
}
@@ -453,16 +437,16 @@ class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asms
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> RST;
- bits<5> A;
- bits<5> B;
+ bits<5> RA;
+ bits<5> RB;
let Pattern = pattern;
bit RC = 0; // set by isRecordForm
let Inst{6-10} = RST;
- let Inst{11-15} = A;
- let Inst{16-20} = B;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
let Inst{21-30} = xo;
let Inst{31} = RC;
}
@@ -489,15 +473,15 @@ class XForm_base_r3xo_swapped
<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> A;
+ bits<5> RA;
bits<5> RST;
- bits<5> B;
+ bits<5> RB;
bit RC = 0; // set by isRecordForm
let Inst{6-10} = RST;
- let Inst{11-15} = A;
- let Inst{16-20} = B;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
let Inst{21-30} = xo;
let Inst{31} = RC;
}
@@ -520,21 +504,21 @@ class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
class XForm_rs<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
- let A = 0;
- let B = 0;
+ let RA = 0;
+ let RB = 0;
}
class XForm_tlbws<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> RST;
- bits<5> A;
+ bits<5> RA;
bits<1> WS;
let Pattern = pattern;
let Inst{6-10} = RST;
- let Inst{11-15} = A;
+ let Inst{11-15} = RA;
let Inst{20} = WS;
let Inst{21-30} = xo;
let Inst{31} = 0;
@@ -563,7 +547,7 @@ class XForm_10<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
class XForm_11<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
- let B = 0;
+ let RB = 0;
let Pattern = pattern;
}
@@ -571,10 +555,10 @@ class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<3> BF;
- bits<1> L;
+ bits<1> L;
bits<5> RA;
bits<5> RB;
-
+
let Inst{6-8} = BF;
let Inst{9} = 0;
let Inst{10} = L;
@@ -651,13 +635,13 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<3> BF;
- bits<5> FRA;
- bits<5> FRB;
-
+ bits<5> RA;
+ bits<5> RB;
+
let Inst{6-8} = BF;
let Inst{9-10} = 0;
- let Inst{11-15} = FRA;
- let Inst{16-20} = FRB;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
let Inst{21-30} = xo;
let Inst{31} = 0;
}
@@ -665,7 +649,7 @@ class XForm_17<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
class XForm_17a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_17<opcode, xo, OOL, IOL, asmstr, itin > {
- let FRA = 0;
+ let RA = 0;
let Pattern = pattern;
}
@@ -754,7 +738,7 @@ class XForm_25_memOp<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
class XForm_26<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
- let A = 0;
+ let RA = 0;
}
class XForm_28_memOp<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
@@ -889,15 +873,15 @@ class XForm_0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let RST = 0;
- let A = 0;
- let B = 0;
+ let RA = 0;
+ let RB = 0;
}
class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
let RST = 0;
- let A = 0;
+ let RA = 0;
}
class XForm_htm0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
@@ -996,7 +980,7 @@ class X_BF3_RS5_RS5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
class X_RD5_XO5_RS5<bits<6> opcode, bits<5> xo2, bits<10> xo, dag OOL, dag IOL,
string asmstr, InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
- let A = xo2;
+ let RA = xo2;
}
class X_BF3_DCMX7_RS5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
@@ -1040,8 +1024,8 @@ class X_RD5_RS5_IM5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
class X_BF3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: XForm_17<opcode, xo, OOL, IOL, asmstr, itin> {
- let FRA = 0;
- let FRB = 0;
+ let RA = 0;
+ let RB = 0;
}
// [PO /// L RA RB XO /]
@@ -1060,14 +1044,14 @@ class XX1Form<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<6> XT;
- bits<5> A;
- bits<5> B;
+ bits<5> RA;
+ bits<5> RB;
let Pattern = pattern;
let Inst{6-10} = XT{4-0};
- let Inst{11-15} = A;
- let Inst{16-20} = B;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
let Inst{21-30} = xo;
let Inst{31} = XT{5};
}
@@ -1079,7 +1063,7 @@ class XX1Form_memOp<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
class XX1_RS6_RD5_XO<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmstr, InstrItinClass itin, list<dag> pattern>
: XX1Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
- let B = 0;
+ let RB = 0;
}
class XX2Form<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
@@ -1224,16 +1208,72 @@ class XX2_RD6_DCMX7_RS6<bits<6> opcode, bits<4> xo1, bits<3> xo2,
class XForm_XD6_RA5_RB5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmstr, InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
- bits<11> D_RA_XD;
+ bits<5> RA;
+ bits<6> D;
bits<5> RB;
let Pattern = pattern;
- let Inst{6-10} = D_RA_XD{4-0}; // D
- let Inst{11-15} = D_RA_XD{10-6}; // RA
+ let Inst{6-10} = D{4-0}; // D
+ let Inst{11-15} = RA;
let Inst{16-20} = RB;
let Inst{21-30} = xo;
- let Inst{31} = D_RA_XD{5}; // DX
+ let Inst{31} = D{5}; // DX
+}
+
+class XForm_BF3_UIM6_FRB5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<6> UIM;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = BF;
+ let Inst{9} = 0;
+ let Inst{10-15} = UIM;
+ let Inst{16-20} = FRB;
+ let Inst{21-30} = xo;
+ let Inst{31} = 0;
+}
+
+class XForm_SP2_FRTB5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ list<dag> pattern, InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<2> SP;
+ bits<5> FRT;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isRecordForm
+
+ let Inst{6 - 10} = FRT;
+ let Inst{11 - 12} = SP;
+ let Inst{13 - 15} = 0;
+ let Inst{16 - 20} = FRB;
+ let Inst{21 - 30} = xo;
+ let Inst{31} = RC;
+}
+
+class XForm_S1_FRTB5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern, InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bit S;
+ bits<5> FRT;
+ bits<5> FRB;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isRecordForm
+
+ let Inst{6 - 10} = FRT;
+ let Inst{11} = S;
+ let Inst{12 - 15} = 0;
+ let Inst{16 - 20} = FRB;
+ let Inst{21 - 30} = xo;
+ let Inst{31} = RC;
}
class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
@@ -1347,14 +1387,14 @@ class XX4Form<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr,
class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<31, OOL, IOL, asmstr, itin> {
- bits<5> A;
- bits<5> B;
+ bits<5> RA;
+ bits<5> RB;
let Pattern = pattern;
let Inst{6-10} = immfield;
- let Inst{11-15} = A;
- let Inst{16-20} = B;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
let Inst{21-30} = xo;
let Inst{31} = 0;
}
@@ -1363,14 +1403,14 @@ class DCB_Form_hint<bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<31, OOL, IOL, asmstr, itin> {
bits<5> TH;
- bits<5> A;
- bits<5> B;
+ bits<5> RA;
+ bits<5> RB;
let Pattern = pattern;
let Inst{6-10} = TH;
- let Inst{11-15} = A;
- let Inst{16-20} = B;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
let Inst{21-30} = xo;
let Inst{31} = 0;
}
@@ -1380,16 +1420,16 @@ class DSS_Form<bits<1> T, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<31, OOL, IOL, asmstr, itin> {
bits<2> STRM;
- bits<5> A;
- bits<5> B;
+ bits<5> RA;
+ bits<5> RB;
let Pattern = pattern;
let Inst{6} = T;
let Inst{7-8} = 0;
let Inst{9-10} = STRM;
- let Inst{11-15} = A;
- let Inst{16-20} = B;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
let Inst{21-30} = xo;
let Inst{31} = 0;
}
@@ -1552,7 +1592,8 @@ class XLForm_2_and_DSForm_1<bits<6> opcode1, bits<10> xo1, bit lk,
bits<2> BH;
bits<5> RST;
- bits<19> DS_RA;
+ bits<5> RA;
+ bits<14> D;
let Pattern = pattern;
@@ -1564,8 +1605,8 @@ class XLForm_2_and_DSForm_1<bits<6> opcode1, bits<10> xo1, bit lk,
let Inst{31} = lk;
let Inst{38-42} = RST;
- let Inst{43-47} = DS_RA{18-14}; // Register #
- let Inst{48-61} = DS_RA{13-0}; // Displacement.
+ let Inst{43-47} = RA;
+ let Inst{48-61} = D;
let Inst{62-63} = xo2;
}
@@ -1588,7 +1629,8 @@ class XLForm_2_ext_and_DForm_1<bits<6> opcode1, bits<10> xo1, bits<5> bo,
: I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
bits<5> RST;
- bits<21> D_RA;
+ bits<5> RA;
+ bits<16> D;
let Pattern = pattern;
@@ -1600,18 +1642,18 @@ class XLForm_2_ext_and_DForm_1<bits<6> opcode1, bits<10> xo1, bits<5> bo,
let Inst{31} = lk;
let Inst{38-42} = RST;
- let Inst{43-47} = D_RA{20-16}; // Base Register
- let Inst{48-63} = D_RA{15-0}; // Displacement
+ let Inst{43-47} = RA;
+ let Inst{48-63} = D;
}
// 1.7.8 XFX-Form
class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> RT;
+ bits<5> RST;
bits<10> SPR;
- let Inst{6-10} = RT;
+ let Inst{6-10} = RST;
let Inst{11} = SPR{4};
let Inst{12} = SPR{3};
let Inst{13} = SPR{2};
@@ -1647,22 +1689,22 @@ class XFXForm_3p<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> RT;
- bits<10> Entry;
+ bits<10> imm;
let Pattern = pattern;
let Inst{6-10} = RT;
- let Inst{11-20} = Entry;
+ let Inst{11-20} = imm;
let Inst{21-30} = xo;
let Inst{31} = 0;
}
class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin>
+ InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<8> FXM;
- bits<5> rS;
-
- let Inst{6-10} = rS;
+ bits<5> RST;
+
+ let Inst{6-10} = RST;
let Inst{11} = 0;
let Inst{12-19} = FXM;
let Inst{20} = 0;
@@ -1671,12 +1713,12 @@ class XFXForm_5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
}
class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin>
+ InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> ST;
+ bits<5> RST;
bits<8> FXM;
-
- let Inst{6-10} = ST;
+
+ let Inst{6-10} = RST;
let Inst{11} = 1;
let Inst{12-19} = FXM;
let Inst{20} = 0;
@@ -1684,16 +1726,6 @@ class XFXForm_5a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
-class XFXForm_7<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
- InstrItinClass itin>
- : XFXForm_1<opcode, xo, OOL, IOL, asmstr, itin>;
-
-class XFXForm_7_ext<bits<6> opcode, bits<10> xo, bits<10> spr,
- dag OOL, dag IOL, string asmstr, InstrItinClass itin>
- : XFXForm_7<opcode, xo, OOL, IOL, asmstr, itin> {
- let SPR = spr;
-}
-
// XFL-Form - MTFSF
// This is probably 1.7.9, but I don't have the reference that uses this
// numbering scheme...
@@ -1701,7 +1733,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag>pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<8> FM;
- bits<5> rT;
+ bits<5> RT;
bit RC = 0; // set by isRecordForm
let Pattern = pattern;
@@ -1709,7 +1741,7 @@ class XFLForm<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{6} = 0;
let Inst{7-14} = FM;
let Inst{15} = 0;
- let Inst{16-20} = rT;
+ let Inst{16-20} = RT;
let Inst{21-30} = xo;
let Inst{31} = RC;
}
@@ -1737,7 +1769,7 @@ class XFLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
- bits<5> A;
+ bits<5> RA;
bits<5> RS;
bits<6> SH;
@@ -1745,7 +1777,7 @@ class XSForm_1<bits<6> opcode, bits<9> xo, dag OOL, dag IOL, string asmstr,
let Pattern = pattern;
let Inst{6-10} = RS;
- let Inst{11-15} = A;
+ let Inst{11-15} = RA;
let Inst{16-20} = SH{4,3,2,1,0};
let Inst{21-29} = xo;
let Inst{30} = SH{5};
@@ -1853,7 +1885,23 @@ class MForm_1<bits<6> opcode, dag OOL, dag IOL, string asmstr,
class MForm_2<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
- : MForm_1<opcode, OOL, IOL, asmstr, itin, pattern> {
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RA;
+ bits<5> RS;
+ bits<5> SH;
+ bits<5> MB;
+ bits<5> ME;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isRecordForm
+
+ let Inst{6-10} = RS;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = SH;
+ let Inst{21-25} = MB;
+ let Inst{26-30} = ME;
+ let Inst{31} = RC;
}
// 1.7.14 MD-Form
@@ -1905,17 +1953,17 @@ class MDSForm_1<bits<6> opcode, bits<4> xo, dag OOL, dag IOL, string asmstr,
class VAForm_1<bits<6> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<4, OOL, IOL, asmstr, itin> {
- bits<5> VD;
- bits<5> VA;
- bits<5> VC;
- bits<5> VB;
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RC;
+ bits<5> RB;
let Pattern = pattern;
- let Inst{6-10} = VD;
- let Inst{11-15} = VA;
- let Inst{16-20} = VB;
- let Inst{21-25} = VC;
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-25} = RC;
let Inst{26-31} = xo;
}
@@ -1923,33 +1971,33 @@ class VAForm_1<bits<6> xo, dag OOL, dag IOL, string asmstr,
class VAForm_1a<bits<6> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<4, OOL, IOL, asmstr, itin> {
- bits<5> VD;
- bits<5> VA;
- bits<5> VB;
- bits<5> VC;
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
+ bits<5> RC;
let Pattern = pattern;
- let Inst{6-10} = VD;
- let Inst{11-15} = VA;
- let Inst{16-20} = VB;
- let Inst{21-25} = VC;
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
+ let Inst{21-25} = RC;
let Inst{26-31} = xo;
}
class VAForm_2<bits<6> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<4, OOL, IOL, asmstr, itin> {
- bits<5> VD;
- bits<5> VA;
- bits<5> VB;
+ bits<5> RT;
+ bits<5> RA;
+ bits<5> RB;
bits<4> SH;
let Pattern = pattern;
- let Inst{6-10} = VD;
- let Inst{11-15} = VA;
- let Inst{16-20} = VB;
+ let Inst{6-10} = RT;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
let Inst{21} = 0;
let Inst{22-25} = SH;
let Inst{26-31} = xo;
@@ -2039,12 +2087,12 @@ class VXForm_5<bits<11> xo, dag OOL, dag IOL, string asmstr,
class VXForm_RD5_XO5_RS5<bits<11> xo, bits<5> eo, dag OOL, dag IOL,
string asmstr, InstrItinClass itin, list<dag> pattern>
: I<4, OOL, IOL, asmstr, itin> {
- bits<5> RD;
+ bits<5> VD;
bits<5> VB;
let Pattern = pattern;
- let Inst{6-10} = RD;
+ let Inst{6-10} = VD;
let Inst{11-15} = eo;
let Inst{16-20} = VB;
let Inst{21-31} = xo;
@@ -2139,6 +2187,43 @@ class VX_RD5_RSp5_PS1_XO9<bits<9> xo, dag OOL, dag IOL, string asmstr,
let Inst{23-31} = xo;
}
+class Z22Form_BF3_FRA5_DCM6<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<3> BF;
+ bits<5> FRA;
+ bits<6> DCM;
+
+ let Pattern = pattern;
+
+ let Inst{6-8} = BF;
+ let Inst{9-10} = 0;
+ let Inst{11-15} = FRA;
+ let Inst{16-21} = DCM;
+ let Inst{22-30} = xo;
+ let Inst{31} = 0;
+}
+
+class Z22Form_FRTA5_SH6<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern, InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<6> SH;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isRecordForm
+
+ let Inst{6 - 10} = FRT;
+ let Inst{11 - 15} = FRA;
+ let Inst{16 - 21} = SH;
+ let Inst{22 - 30} = xo;
+ let Inst{31} = RC;
+}
+
class Z23Form_8<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
@@ -2178,6 +2263,54 @@ class Z23Form_RTAB5_CY2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
let Inst{31} = 0;
}
+class Z23Form_FRTAB5_RMC2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, NoItinerary> {
+ bits<5> FRT;
+ bits<5> FRA;
+ bits<5> FRB;
+ bits<2> RMC;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isRecordForm
+
+ let Inst{6 - 10} = FRT;
+ let Inst{11 - 15} = FRA;
+ let Inst{16 - 20} = FRB;
+ let Inst{21 - 22} = RMC;
+ let Inst{23 - 30} = xo;
+ let Inst{31} = RC;
+}
+
+class Z23Form_TE5_FRTB5_RMC2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern>
+ : Z23Form_FRTAB5_RMC2<opcode, xo, OOL, IOL, asmstr, pattern> {
+ bits<5> TE;
+ let FRA = TE;
+}
+
+class Z23Form_FRTB5_R1_RMC2<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmstr, list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, NoItinerary> {
+ bits<5> FRT;
+ bits<1> R;
+ bits<5> FRB;
+ bits<2> RMC;
+
+ let Pattern = pattern;
+
+ bit RC = 0; // set by isRecordForm
+
+ let Inst{6 - 10} = FRT;
+ let Inst{11 - 14} = 0;
+ let Inst{15} = R;
+ let Inst{16 - 20} = FRB;
+ let Inst{21 - 22} = RMC;
+ let Inst{23 - 30} = xo;
+ let Inst{31} = RC;
+}
+
//===----------------------------------------------------------------------===//
// EmitTimePseudo won't have encoding information for the [MC]CodeEmitter
// stuff
diff --git a/llvm/lib/Target/PowerPC/PPCInstrFuture.td b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
index a27d5a9741e5..1ac91fadf658 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrFuture.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrFuture.td
@@ -53,36 +53,36 @@ let Predicates = [IsISAFuture] in {
let Predicates = [HasVSX, IsISAFuture] in {
let mayLoad = 1 in {
- def LXVRL : XX1Form_memOp<31, 525, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
- "lxvrl $XT, $src, $rB", IIC_LdStLoad, []>;
+ def LXVRL : XX1Form_memOp<31, 525, (outs vsrc:$XT), (ins memr:$RA, g8rc:$RB),
+ "lxvrl $XT, $RA, $RB", IIC_LdStLoad, []>;
- def LXVRLL : XX1Form_memOp<31, 557, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
- "lxvrll $XT, $src, $rB", IIC_LdStLoad, []>;
+ def LXVRLL : XX1Form_memOp<31, 557, (outs vsrc:$XT), (ins memr:$RA, g8rc:$RB),
+ "lxvrll $XT, $RA, $RB", IIC_LdStLoad, []>;
def LXVPRL : XForm_XTp5_XAB5<31, 589, (outs vsrprc:$XTp),
- (ins memr:$src, g8rc:$rB),
- "lxvprl $XTp, $src, $rB", IIC_LdStLFD, []>;
+ (ins memr:$RA, g8rc:$RB),
+ "lxvprl $XTp, $RA, $RB", IIC_LdStLFD, []>;
def LXVPRLL : XForm_XTp5_XAB5<31, 621, (outs vsrprc:$XTp),
- (ins memr:$src, g8rc:$rB),
- "lxvprll $XTp, $src, $rB", IIC_LdStLFD, []>;
+ (ins memr:$RA, g8rc:$RB),
+ "lxvprll $XTp, $RA, $RB", IIC_LdStLFD, []>;
}
let mayStore = 1 in {
def STXVRL : XX1Form_memOp<31, 653, (outs),
- (ins vsrc:$XT, memr:$dst, g8rc:$rB),
- "stxvrl $XT, $dst, $rB", IIC_LdStLoad, []>;
+ (ins vsrc:$XT, memr:$RA, g8rc:$RB),
+ "stxvrl $XT, $RA, $RB", IIC_LdStLoad, []>;
def STXVRLL : XX1Form_memOp<31, 685, (outs),
- (ins vsrc:$XT, memr:$dst, g8rc:$rB),
- "stxvrll $XT, $dst, $rB", IIC_LdStLoad, []>;
+ (ins vsrc:$XT, memr:$RA, g8rc:$RB),
+ "stxvrll $XT, $RA, $RB", IIC_LdStLoad, []>;
def STXVPRL : XForm_XTp5_XAB5<31, 717, (outs),
- (ins vsrprc:$XTp, memr:$src, g8rc:$rB),
- "stxvprl $XTp, $src, $rB", IIC_LdStLFD, []>;
+ (ins vsrprc:$XTp, memr:$RA, g8rc:$RB),
+ "stxvprl $XTp, $RA, $RB", IIC_LdStLFD, []>;
def STXVPRLL : XForm_XTp5_XAB5<31, 749, (outs),
- (ins vsrprc:$XTp, memr:$src, g8rc:$rB),
- "stxvprll $XTp, $src, $rB", IIC_LdStLFD, []>;
+ (ins vsrprc:$XTp, memr:$RA, g8rc:$RB),
+ "stxvprll $XTp, $RA, $RB", IIC_LdStLFD, []>;
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrHTM.td b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
index ec1c397ff57f..8d0ac512b290 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrHTM.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrHTM.td
@@ -35,30 +35,30 @@ def TEND : XForm_htm1 <31, 686,
(outs), (ins u1imm:$A), "tend. $A", IIC_SprMTSPR>;
def TABORT : XForm_base_r3xo <31, 910,
- (outs), (ins gprc:$A), "tabort. $A", IIC_SprMTSPR,
+ (outs), (ins gprc:$RA), "tabort. $RA", IIC_SprMTSPR,
[]>, isRecordForm {
let RST = 0;
- let B = 0;
+ let RB = 0;
}
def TABORTWC : XForm_base_r3xo <31, 782,
- (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
- "tabortwc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ (outs), (ins u5imm:$RST, gprc:$RA, gprc:$RB),
+ "tabortwc. $RST, $RA, $RB", IIC_SprMTSPR, []>,
isRecordForm;
def TABORTWCI : XForm_base_r3xo <31, 846,
- (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
- "tabortwci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ (outs), (ins u5imm:$RST, gprc:$RA, u5imm:$RB),
+ "tabortwci. $RST, $RA, $RB", IIC_SprMTSPR, []>,
isRecordForm;
def TABORTDC : XForm_base_r3xo <31, 814,
- (outs), (ins u5imm:$RTS, gprc:$A, gprc:$B),
- "tabortdc. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ (outs), (ins u5imm:$RST, gprc:$RA, gprc:$RB),
+ "tabortdc. $RST, $RA, $RB", IIC_SprMTSPR, []>,
isRecordForm;
def TABORTDCI : XForm_base_r3xo <31, 878,
- (outs), (ins u5imm:$RTS, gprc:$A, u5imm:$B),
- "tabortdci. $RTS, $A, $B", IIC_SprMTSPR, []>,
+ (outs), (ins u5imm:$RST, gprc:$RA, u5imm:$RB),
+ "tabortdci. $RST, $RA, $RB", IIC_SprMTSPR, []>,
isRecordForm;
def TSR : XForm_htm2 <31, 750,
@@ -66,19 +66,19 @@ def TSR : XForm_htm2 <31, 750,
isRecordForm;
def TRECLAIM : XForm_base_r3xo <31, 942,
- (outs), (ins gprc:$A), "treclaim. $A",
+ (outs), (ins gprc:$RA), "treclaim. $RA",
IIC_SprMTSPR, []>,
isRecordForm {
let RST = 0;
- let B = 0;
+ let RB = 0;
}
def TRECHKPT : XForm_base_r3xo <31, 1006,
(outs), (ins), "trechkpt.", IIC_SprMTSPR, []>,
isRecordForm {
let RST = 0;
- let A = 0;
- let B = 0;
+ let RA = 0;
+ let RB = 0;
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index fb7316e07459..784953dbc847 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -226,7 +226,7 @@ void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
MachineInstr &NewMI2) const {
// Propagate FP flags from the original instructions.
// But clear poison-generating flags because those may not be valid now.
- uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
+ uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
NewMI1.setFlags(IntersectedFlags);
NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
@@ -239,7 +239,7 @@ void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
}
void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI,
- uint16_t Flags) const {
+ uint32_t Flags) const {
MI.setFlags(Flags);
MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
@@ -841,7 +841,7 @@ void PPCInstrInfo::reassociateFMA(
}
}
- uint16_t IntersectedFlags = 0;
+ uint32_t IntersectedFlags = 0;
if (IsILPReassociate)
IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();
else
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 7c95f3ca2b4c..3dc5e2680c61 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -279,11 +279,9 @@ class PPCInstrInfo : public PPCGenInstrInfo {
SmallVectorImpl<MachineInstr *> &InsInstrs,
SmallVectorImpl<MachineInstr *> &DelInstrs,
DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const;
- bool isLoadFromConstantPool(MachineInstr *I) const;
Register
generateLoadForNewConst(unsigned Idx, MachineInstr *MI, Type *Ty,
SmallVectorImpl<MachineInstr *> &InsInstrs) const;
- const Constant *getConstantFromConstantPool(MachineInstr *I) const;
virtual void anchor();
protected:
@@ -304,6 +302,9 @@ protected:
public:
explicit PPCInstrInfo(PPCSubtarget &STI);
+ bool isLoadFromConstantPool(MachineInstr *I) const;
+ const Constant *getConstantFromConstantPool(MachineInstr *I) const;
+
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
@@ -517,7 +518,7 @@ public:
// PowerPC specific version of setSpecialOperandAttr that copies Flags to MI
// and clears nuw, nsw, and exact flags.
- void setSpecialOperandAttr(MachineInstr &MI, uint16_t Flags) const;
+ void setSpecialOperandAttr(MachineInstr &MI, uint32_t Flags) const;
bool isCoalescableExtInstr(const MachineInstr &MI,
Register &SrcReg, Register &DstReg,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 1551f3f32841..616f4e48cfb8 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -32,7 +32,7 @@ def SDT_PPCcv_fp_to_int : SDTypeProfile<1, 1, [
SDTCisFP<0>, SDTCisFP<1>
]>;
def SDT_PPCstore_scal_int_from_vsr : SDTypeProfile<0, 3, [
- SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+ SDTCisFP<0>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
]>;
def SDT_PPCVexts : SDTypeProfile<1, 2, [
SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2>
@@ -164,10 +164,6 @@ def PPCany_fcfidus : PatFrags<(ops node:$op),
[(PPCfcfidus node:$op),
(PPCstrict_fcfidus node:$op)]>;
-def PPCcv_fp_to_uint_in_vsr:
- SDNode<"PPCISD::FP_TO_UINT_IN_VSR", SDT_PPCcv_fp_to_int, []>;
-def PPCcv_fp_to_sint_in_vsr:
- SDNode<"PPCISD::FP_TO_SINT_IN_VSR", SDT_PPCcv_fp_to_int, []>;
def PPCstore_scal_int_from_vsr:
SDNode<"PPCISD::ST_VSR_SCAL_INT", SDT_PPCstore_scal_int_from_vsr,
[SDNPHasChain, SDNPMayStore]>;
@@ -217,6 +213,7 @@ def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
+def PPCgetTpointer : SDNode<"PPCISD::GET_TPOINTER", SDTIntLeaf, []>;
def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
@@ -337,7 +334,7 @@ def PPCbctrl_load_toc_rm : SDNode<"PPCISD::BCTRL_LOAD_TOC_RM",
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
+def retglue : SDNode<"PPCISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret,
@@ -733,6 +730,18 @@ def IsNotISAFuture : Predicate<"!Subtarget->isISAFuture()">;
//===----------------------------------------------------------------------===//
// PowerPC Multiclass Definitions.
+multiclass XForm_base_r3xo_r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_base_r3xo<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)),
+ NoItinerary, pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def _rec : XForm_base_r3xo<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)),
+ NoItinerary, []>, isRecordForm, RecFormRel;
+ }
+}
multiclass XForm_6r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
@@ -933,6 +942,20 @@ multiclass XOForm_3rc<bits<6> opcode, bits<9> xo, bit oe, dag OOL, dag IOL,
}
}
+multiclass MForm_1r<bits<6> opcode, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : MForm_1<opcode, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def _rec : MForm_1<opcode, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isRecordForm, RecFormRel;
+ }
+}
+
multiclass MForm_2r<bits<6> opcode, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
@@ -1032,6 +1055,32 @@ multiclass XForm_28r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
}
}
+multiclass XForm_SP2_FRTB5r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_SP2_FRTB5<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)),
+ pattern, NoItinerary>, RecFormRel;
+ let Defs = [CR1] in
+ def _rec : XForm_SP2_FRTB5<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)),
+ [], NoItinerary>, isRecordForm, RecFormRel;
+ }
+}
+
+multiclass XForm_S1_FRTB5r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_S1_FRTB5<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)),
+ pattern, NoItinerary>, RecFormRel;
+ let Defs = [CR1] in
+ def _rec : XForm_S1_FRTB5<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), [],
+ NoItinerary>, isRecordForm, RecFormRel;
+ }
+}
+
multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
@@ -1074,6 +1123,66 @@ multiclass AForm_3r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
}
}
+multiclass
+ Z23Form_TE5_FRTB5_RMC2r<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME
+ : Z23Form_TE5_FRTB5_RMC2<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)),
+ pattern>, RecFormRel;
+ let Defs = [CR0] in
+ def _rec : Z23Form_TE5_FRTB5_RMC2<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)),
+ []>, isRecordForm, RecFormRel;
+ }
+}
+
+multiclass
+ Z23Form_FRTAB5_RMC2r<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : Z23Form_FRTAB5_RMC2<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)),
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def _rec : Z23Form_FRTAB5_RMC2<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)),
+ []>, isRecordForm, RecFormRel;
+ }
+}
+
+multiclass
+ Z23Form_FRTB5_R1_RMC2r<bits<6> opcode, bits<8> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : Z23Form_FRTB5_R1_RMC2<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)),
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def _rec : Z23Form_FRTB5_R1_RMC2<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)),
+ []>, isRecordForm, RecFormRel;
+ }
+}
+
+multiclass Z22Form_FRTA5_SH6r<bits<6> opcode, bits<9> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : Z22Form_FRTA5_SH6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)),
+ pattern, NoItinerary>, RecFormRel;
+ let Defs = [CR1] in
+ def _rec : Z22Form_FRTA5_SH6<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)),
+ [], NoItinerary>, isRecordForm, RecFormRel;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// END OF MULTICLASS DEFINITIONS
+//===----------------------------------------------------------------------===//
+
//===----------------------------------------------------------------------===//
// PowerPC Instruction Definitions.
@@ -1188,21 +1297,21 @@ def RESTORE_CRBIT : PPCEmitTimePseudo<(outs crbitrc:$cond), (ins memri:$F),
let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, hasSideEffects = 0 in {
let isPredicable = 1, isReturn = 1, Uses = [LR, RM] in
def BLR : XLForm_2_ext<19, 16, 20, 0, 0, (outs), (ins), "blr", IIC_BrB,
- [(retflag)]>, Requires<[In32BitMode]>;
+ [(retglue)]>, Requires<[In32BitMode]>;
let isBranch = 1, isIndirectBranch = 1, Uses = [CTR] in {
let isPredicable = 1 in
def BCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
[]>;
let isCodeGenOnly = 1 in {
- def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
+ def BCCCTR : XLForm_2_br<19, 528, 0, (outs), (ins (pred $BIBO, $CR):$cond),
"b${cond:cc}ctr${cond:pm} ${cond:reg}", IIC_BrB,
[]>;
- def BCCTR : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$bi),
- "bcctr 12, $bi, 0", IIC_BrB, []>;
- def BCCTRn : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$bi),
- "bcctr 4, $bi, 0", IIC_BrB, []>;
+ def BCCTR : XLForm_2_br2<19, 528, 12, 0, (outs), (ins crbitrc:$BI),
+ "bcctr 12, $BI, 0", IIC_BrB, []>;
+ def BCCTRn : XLForm_2_br2<19, 528, 4, 0, (outs), (ins crbitrc:$BI),
+ "bcctr 4, $BI, 0", IIC_BrB, []>;
}
}
}
@@ -1230,48 +1339,48 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
hasSideEffects = 0 in {
let isBarrier = 1 in {
let isPredicable = 1 in
- def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
- "b $dst", IIC_BrB,
- [(br bb:$dst)]>;
- def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst),
- "ba $dst", IIC_BrB, []>;
+ def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$LI),
+ "b $LI", IIC_BrB,
+ [(br bb:$LI)]>;
+ def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$LI),
+ "ba $LI", IIC_BrB, []>;
}
// BCC represents an arbitrary conditional branch on a predicate.
// FIXME: should be able to write a pattern for PPCcondbranch, but can't use
// a two-value operand where a dag node expects two operands. :(
let isCodeGenOnly = 1 in {
- class BCC_class : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
- "b${cond:cc}${cond:pm} ${cond:reg}, $dst"
- /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+ class BCC_class : BForm<16, 0, 0, (outs), (ins (pred $BIBO, $CR):$cond, condbrtarget:$BD),
+ "b${cond:cc}${cond:pm} ${cond:reg}, $BD"
+ /*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$BD)]*/>;
def BCC : BCC_class;
// The same as BCC, except that it's not a terminator. Used for introducing
// control flow dependency without creating new blocks.
let isTerminator = 0 in def CTRL_DEP : BCC_class;
- def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst),
- "b${cond:cc}a${cond:pm} ${cond:reg}, $dst">;
+ def BCCA : BForm<16, 1, 0, (outs), (ins (pred $BIBO, $CR):$cond, abscondbrtarget:$BD),
+ "b${cond:cc}a${cond:pm} ${cond:reg}, $BD">;
let isReturn = 1, Uses = [LR, RM] in
- def BCCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
+ def BCCLR : XLForm_2_br<19, 16, 0, (outs), (ins (pred $BIBO, $CR):$cond),
"b${cond:cc}lr${cond:pm} ${cond:reg}", IIC_BrB, []>;
}
let isCodeGenOnly = 1 in {
- let Pattern = [(brcond i1:$bi, bb:$dst)] in
- def BC : BForm_4<16, 12, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst),
- "bc 12, $bi, $dst">;
+ let Pattern = [(brcond i1:$BI, bb:$BD)] in
+ def BC : BForm_4<16, 12, 0, 0, (outs), (ins crbitrc:$BI, condbrtarget:$BD),
+ "bc 12, $BI, $BD">;
- let Pattern = [(brcond (not i1:$bi), bb:$dst)] in
- def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$bi, condbrtarget:$dst),
- "bc 4, $bi, $dst">;
+ let Pattern = [(brcond (not i1:$BI), bb:$BD)] in
+ def BCn : BForm_4<16, 4, 0, 0, (outs), (ins crbitrc:$BI, condbrtarget:$BD),
+ "bc 4, $BI, $BD">;
let isReturn = 1, Uses = [LR, RM] in {
- def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$bi),
- "bclr 12, $bi, 0", IIC_BrB, []>;
- def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$bi),
- "bclr 4, $bi, 0", IIC_BrB, []>;
+ def BCLR : XLForm_2_br2<19, 16, 12, 0, (outs), (ins crbitrc:$BI),
+ "bclr 12, $BI, 0", IIC_BrB, []>;
+ def BCLRn : XLForm_2_br2<19, 16, 4, 0, (outs), (ins crbitrc:$BI),
+ "bclr 4, $BI, 0", IIC_BrB, []>;
}
}
@@ -1291,30 +1400,30 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
}
let Defs = [CTR], Uses = [CTR] in {
- def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdz $dst">;
- def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdnz $dst">;
- def BDZA : BForm_1<16, 18, 1, 0, (outs), (ins abscondbrtarget:$dst),
- "bdza $dst">;
- def BDNZA : BForm_1<16, 16, 1, 0, (outs), (ins abscondbrtarget:$dst),
- "bdnza $dst">;
- def BDZp : BForm_1<16, 27, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdz+ $dst">;
- def BDNZp: BForm_1<16, 25, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdnz+ $dst">;
- def BDZAp : BForm_1<16, 27, 1, 0, (outs), (ins abscondbrtarget:$dst),
- "bdza+ $dst">;
- def BDNZAp: BForm_1<16, 25, 1, 0, (outs), (ins abscondbrtarget:$dst),
- "bdnza+ $dst">;
- def BDZm : BForm_1<16, 26, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdz- $dst">;
- def BDNZm: BForm_1<16, 24, 0, 0, (outs), (ins condbrtarget:$dst),
- "bdnz- $dst">;
- def BDZAm : BForm_1<16, 26, 1, 0, (outs), (ins abscondbrtarget:$dst),
- "bdza- $dst">;
- def BDNZAm: BForm_1<16, 24, 1, 0, (outs), (ins abscondbrtarget:$dst),
- "bdnza- $dst">;
+ def BDZ : BForm_1<16, 18, 0, 0, (outs), (ins condbrtarget:$BD),
+ "bdz $BD">;
+ def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$BD),
+ "bdnz $BD">;
+ def BDZA : BForm_1<16, 18, 1, 0, (outs), (ins abscondbrtarget:$BD),
+ "bdza $BD">;
+ def BDNZA : BForm_1<16, 16, 1, 0, (outs), (ins abscondbrtarget:$BD),
+ "bdnza $BD">;
+ def BDZp : BForm_1<16, 27, 0, 0, (outs), (ins condbrtarget:$BD),
+ "bdz+ $BD">;
+ def BDNZp: BForm_1<16, 25, 0, 0, (outs), (ins condbrtarget:$BD),
+ "bdnz+ $BD">;
+ def BDZAp : BForm_1<16, 27, 1, 0, (outs), (ins abscondbrtarget:$BD),
+ "bdza+ $BD">;
+ def BDNZAp: BForm_1<16, 25, 1, 0, (outs), (ins abscondbrtarget:$BD),
+ "bdnza+ $BD">;
+ def BDZm : BForm_1<16, 26, 0, 0, (outs), (ins condbrtarget:$BD),
+ "bdz- $BD">;
+ def BDNZm: BForm_1<16, 24, 0, 0, (outs), (ins condbrtarget:$BD),
+ "bdnz- $BD">;
+ def BDZAm : BForm_1<16, 26, 1, 0, (outs), (ins abscondbrtarget:$BD),
+ "bdza- $BD">;
+ def BDNZAm: BForm_1<16, 24, 1, 0, (outs), (ins abscondbrtarget:$BD),
+ "bdnza- $BD">;
}
}
@@ -1322,36 +1431,36 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
let isCall = 1, hasCtrlDep = 1, isCodeGenOnly = 1, PPC970_Unit = 7,
hasSideEffects = 0 in {
let Defs = [LR], Uses = [RM] in {
- def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$dst),
- "bcl 20, 31, $dst">;
+ def BCLalways : BForm_2<16, 20, 31, 0, 1, (outs), (ins condbrtarget:$BD),
+ "bcl 20, 31, $BD">;
}
}
let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
// Convenient aliases for call instructions
let Uses = [RM] in {
- def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func),
- "bl $func", IIC_BrB, []>; // See Pat patterns below.
- def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
- "bla $func", IIC_BrB, [(PPCcall (i32 imm:$func))]>;
+ def BL : IForm<18, 0, 1, (outs), (ins calltarget:$LI),
+ "bl $LI", IIC_BrB, []>; // See Pat patterns below.
+ def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$LI),
+ "bla $LI", IIC_BrB, [(PPCcall (i32 imm:$LI))]>;
let isCodeGenOnly = 1 in {
- def BL_TLS : IForm<18, 0, 1, (outs), (ins tlscall32:$func),
- "bl $func", IIC_BrB, []>;
- def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst),
- "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">;
- def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst),
- "b${cond:cc}la${cond:pm} ${cond:reg}, $dst">;
+ def BL_TLS : IForm<18, 0, 1, (outs), (ins tlscall32:$LI),
+ "bl $LI", IIC_BrB, []>;
+ def BCCL : BForm<16, 0, 1, (outs), (ins (pred $BIBO, $CR):$cond, condbrtarget:$BD),
+ "b${cond:cc}l${cond:pm} ${cond:reg}, $BD">;
+ def BCCLA : BForm<16, 1, 1, (outs), (ins (pred $BIBO, $CR):$cond, abscondbrtarget:$BD),
+ "b${cond:cc}la${cond:pm} ${cond:reg}, $BD">;
def BCL : BForm_4<16, 12, 0, 1, (outs),
- (ins crbitrc:$bi, condbrtarget:$dst),
- "bcl 12, $bi, $dst">;
+ (ins crbitrc:$BI, condbrtarget:$BD),
+ "bcl 12, $BI, $BD">;
def BCLn : BForm_4<16, 4, 0, 1, (outs),
- (ins crbitrc:$bi, condbrtarget:$dst),
- "bcl 4, $bi, $dst">;
+ (ins crbitrc:$BI, condbrtarget:$BD),
+ "bcl 4, $BI, $BD">;
def BL_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24,
- (outs), (ins calltarget:$func),
- "bl $func\n\tnop", IIC_BrB, []>;
+ (outs), (ins calltarget:$LI),
+ "bl $LI\n\tnop", IIC_BrB, []>;
}
}
let Uses = [CTR, RM] in {
@@ -1361,14 +1470,14 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
Requires<[In32BitMode]>;
let isCodeGenOnly = 1 in {
- def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
+ def BCCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins (pred $BIBO, $CR):$cond),
"b${cond:cc}ctrl${cond:pm} ${cond:reg}", IIC_BrB,
[]>;
- def BCCTRL : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$bi),
- "bcctrl 12, $bi, 0", IIC_BrB, []>;
- def BCCTRLn : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$bi),
- "bcctrl 4, $bi, 0", IIC_BrB, []>;
+ def BCCTRL : XLForm_2_br2<19, 528, 12, 1, (outs), (ins crbitrc:$BI),
+ "bcctrl 12, $BI, 0", IIC_BrB, []>;
+ def BCCTRLn : XLForm_2_br2<19, 528, 4, 1, (outs), (ins crbitrc:$BI),
+ "bcctrl 4, $BI, 0", IIC_BrB, []>;
}
}
let Uses = [LR, RM] in {
@@ -1376,41 +1485,41 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
"blrl", IIC_BrB, []>;
let isCodeGenOnly = 1 in {
- def BCCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond),
+ def BCCLRL : XLForm_2_br<19, 16, 1, (outs), (ins (pred $BIBO, $CR):$cond),
"b${cond:cc}lrl${cond:pm} ${cond:reg}", IIC_BrB,
[]>;
- def BCLRL : XLForm_2_br2<19, 16, 12, 1, (outs), (ins crbitrc:$bi),
- "bclrl 12, $bi, 0", IIC_BrB, []>;
- def BCLRLn : XLForm_2_br2<19, 16, 4, 1, (outs), (ins crbitrc:$bi),
- "bclrl 4, $bi, 0", IIC_BrB, []>;
+ def BCLRL : XLForm_2_br2<19, 16, 12, 1, (outs), (ins crbitrc:$BI),
+ "bclrl 12, $BI, 0", IIC_BrB, []>;
+ def BCLRLn : XLForm_2_br2<19, 16, 4, 1, (outs), (ins crbitrc:$BI),
+ "bclrl 4, $BI, 0", IIC_BrB, []>;
}
}
let Defs = [CTR], Uses = [CTR, RM] in {
- def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst),
- "bdzl $dst">;
- def BDNZL : BForm_1<16, 16, 0, 1, (outs), (ins condbrtarget:$dst),
- "bdnzl $dst">;
- def BDZLA : BForm_1<16, 18, 1, 1, (outs), (ins abscondbrtarget:$dst),
- "bdzla $dst">;
- def BDNZLA : BForm_1<16, 16, 1, 1, (outs), (ins abscondbrtarget:$dst),
- "bdnzla $dst">;
- def BDZLp : BForm_1<16, 27, 0, 1, (outs), (ins condbrtarget:$dst),
- "bdzl+ $dst">;
- def BDNZLp: BForm_1<16, 25, 0, 1, (outs), (ins condbrtarget:$dst),
- "bdnzl+ $dst">;
- def BDZLAp : BForm_1<16, 27, 1, 1, (outs), (ins abscondbrtarget:$dst),
- "bdzla+ $dst">;
- def BDNZLAp: BForm_1<16, 25, 1, 1, (outs), (ins abscondbrtarget:$dst),
- "bdnzla+ $dst">;
- def BDZLm : BForm_1<16, 26, 0, 1, (outs), (ins condbrtarget:$dst),
- "bdzl- $dst">;
- def BDNZLm: BForm_1<16, 24, 0, 1, (outs), (ins condbrtarget:$dst),
- "bdnzl- $dst">;
- def BDZLAm : BForm_1<16, 26, 1, 1, (outs), (ins abscondbrtarget:$dst),
- "bdzla- $dst">;
- def BDNZLAm: BForm_1<16, 24, 1, 1, (outs), (ins abscondbrtarget:$dst),
- "bdnzla- $dst">;
+ def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$BD),
+ "bdzl $BD">;
+ def BDNZL : BForm_1<16, 16, 0, 1, (outs), (ins condbrtarget:$BD),
+ "bdnzl $BD">;
+ def BDZLA : BForm_1<16, 18, 1, 1, (outs), (ins abscondbrtarget:$BD),
+ "bdzla $BD">;
+ def BDNZLA : BForm_1<16, 16, 1, 1, (outs), (ins abscondbrtarget:$BD),
+ "bdnzla $BD">;
+ def BDZLp : BForm_1<16, 27, 0, 1, (outs), (ins condbrtarget:$BD),
+ "bdzl+ $BD">;
+ def BDNZLp: BForm_1<16, 25, 0, 1, (outs), (ins condbrtarget:$BD),
+ "bdnzl+ $BD">;
+ def BDZLAp : BForm_1<16, 27, 1, 1, (outs), (ins abscondbrtarget:$BD),
+ "bdzla+ $BD">;
+ def BDNZLAp: BForm_1<16, 25, 1, 1, (outs), (ins abscondbrtarget:$BD),
+ "bdnzla+ $BD">;
+ def BDZLm : BForm_1<16, 26, 0, 1, (outs), (ins condbrtarget:$BD),
+ "bdzl- $BD">;
+ def BDNZLm: BForm_1<16, 24, 0, 1, (outs), (ins condbrtarget:$BD),
+ "bdnzl- $BD">;
+ def BDZLAm : BForm_1<16, 26, 1, 1, (outs), (ins abscondbrtarget:$BD),
+ "bdzla- $BD">;
+ def BDNZLAm: BForm_1<16, 24, 1, 1, (outs), (ins abscondbrtarget:$BD),
+ "bdnzla- $BD">;
}
let Defs = [CTR], Uses = [CTR, LR, RM] in {
def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins),
@@ -1431,14 +1540,14 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
let isCall = 1, PPC970_Unit = 7, Defs = [LR, RM], isCodeGenOnly = 1 in {
// Convenient aliases for call instructions
let Uses = [RM] in {
- def BL_RM : IForm<18, 0, 1, (outs), (ins calltarget:$func),
- "bl $func", IIC_BrB, []>; // See Pat patterns below.
- def BLA_RM : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
- "bla $func", IIC_BrB, [(PPCcall_rm (i32 imm:$func))]>;
+ def BL_RM : IForm<18, 0, 1, (outs), (ins calltarget:$LI),
+ "bl $LI", IIC_BrB, []>; // See Pat patterns below.
+ def BLA_RM : IForm<18, 1, 1, (outs), (ins abscalltarget:$LI),
+ "bla $LI", IIC_BrB, [(PPCcall_rm (i32 imm:$LI))]>;
def BL_NOP_RM : IForm_and_DForm_4_zero<18, 0, 1, 24,
- (outs), (ins calltarget:$func),
- "bl $func\n\tnop", IIC_BrB, []>;
+ (outs), (ins calltarget:$LI),
+ "bl $LI\n\tnop", IIC_BrB, []>;
}
let Uses = [CTR, RM] in {
let isPredicable = 1 in
@@ -1469,8 +1578,8 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
Defs = [LR, R2], Uses = [CTR, RM], RST = 2 in {
def BCTRL_LWZinto_toc:
XLForm_2_ext_and_DForm_1<19, 528, 20, 0, 1, 32, (outs),
- (ins memri:$src), "bctrl\n\tlwz 2, $src", IIC_BrB,
- [(PPCbctrl_load_toc iaddr:$src)]>, Requires<[In32BitMode]>;
+ (ins (memri $D, $RA):$addr), "bctrl\n\tlwz 2, $addr", IIC_BrB,
+ [(PPCbctrl_load_toc iaddr:$addr)]>, Requires<[In32BitMode]>;
}
@@ -1478,8 +1587,8 @@ let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
Defs = [LR, R2, RM], Uses = [CTR, RM], RST = 2 in {
def BCTRL_LWZinto_toc_RM:
XLForm_2_ext_and_DForm_1<19, 528, 20, 0, 1, 32, (outs),
- (ins memri:$src), "bctrl\n\tlwz 2, $src", IIC_BrB,
- [(PPCbctrl_load_toc_rm iaddr:$src)]>, Requires<[In32BitMode]>;
+ (ins (memri $D, $RA):$addr), "bctrl\n\tlwz 2, $addr", IIC_BrB,
+ [(PPCbctrl_load_toc_rm iaddr:$addr)]>, Requires<[In32BitMode]>;
}
@@ -1492,14 +1601,14 @@ def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", IIC_BrB,
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
-def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
- "b $dst", IIC_BrB,
+def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$LI),
+ "b $LI", IIC_BrB,
[]>;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
-def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
- "ba $dst", IIC_BrB,
+def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$LI),
+ "ba $LI", IIC_BrB,
[]>;
}
@@ -1532,8 +1641,8 @@ let isBranch = 1, isTerminator = 1, Size = 0 in {
// System call.
let PPC970_Unit = 7 in {
- def SC : SCForm<17, 1, (outs), (ins i32imm:$lev),
- "sc $lev", IIC_BrB, [(PPCsc (i32 imm:$lev))]>;
+ def SC : SCForm<17, 1, (outs), (ins i32imm:$LEV),
+ "sc $LEV", IIC_BrB, [(PPCsc (i32 imm:$LEV))]>;
}
// Branch history rolling buffer.
@@ -1543,57 +1652,57 @@ def CLRBHRB : XForm_0<31, 430, (outs), (ins), "clrbhrb", IIC_BrB,
// The $dmy argument used for MFBHRBE is not needed; however, including
// it avoids automatic generation of PPCFastISel::fastEmit_i(), which
// interferes with necessary special handling (see PPCFastISel.cpp).
-def MFBHRBE : XFXForm_3p<31, 302, (outs gprc:$rD),
+def MFBHRBE : XFXForm_3p<31, 302, (outs gprc:$RT),
(ins u10imm:$imm, u10imm:$dmy),
- "mfbhrbe $rD, $imm", IIC_BrB,
- [(set i32:$rD,
+ "mfbhrbe $RT, $imm", IIC_BrB,
+ [(set i32:$RT,
(PPCmfbhrbe imm:$imm, imm:$dmy))]>,
PPC970_DGroup_First;
-def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$imm), "rfebb $imm",
- IIC_BrB, [(PPCrfebb (i32 imm:$imm))]>,
+def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$S), "rfebb $S",
+ IIC_BrB, [(PPCrfebb (i32 imm:$S))]>,
PPC970_DGroup_Single;
def : InstAlias<"rfebb", (RFEBB 1)>;
// DCB* instructions.
-def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst",
- IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
+def DCBA : DCB_Form<758, 0, (outs), (ins (memrr $RA, $RB):$addr), "dcba $addr",
+ IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$addr)]>,
PPC970_DGroup_Single;
-def DCBI : DCB_Form<470, 0, (outs), (ins memrr:$dst), "dcbi $dst",
- IIC_LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
+def DCBI : DCB_Form<470, 0, (outs), (ins (memrr $RA, $RB):$addr), "dcbi $addr",
+ IIC_LdStDCBF, [(int_ppc_dcbi xoaddr:$addr)]>,
PPC970_DGroup_Single;
-def DCBST : DCB_Form<54, 0, (outs), (ins memrr:$dst), "dcbst $dst",
- IIC_LdStDCBF, [(int_ppc_dcbst xoaddr:$dst)]>,
+def DCBST : DCB_Form<54, 0, (outs), (ins (memrr $RA, $RB):$addr), "dcbst $addr",
+ IIC_LdStDCBF, [(int_ppc_dcbst xoaddr:$addr)]>,
PPC970_DGroup_Single;
-def DCBZ : DCB_Form<1014, 0, (outs), (ins memrr:$dst), "dcbz $dst",
- IIC_LdStDCBF, [(int_ppc_dcbz xoaddr:$dst)]>,
+def DCBZ : DCB_Form<1014, 0, (outs), (ins (memrr $RA, $RB):$addr), "dcbz $addr",
+ IIC_LdStDCBF, [(int_ppc_dcbz xoaddr:$addr)]>,
PPC970_DGroup_Single;
-def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
- IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
+def DCBZL : DCB_Form<1014, 1, (outs), (ins (memrr $RA, $RB):$addr), "dcbzl $addr",
+ IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$addr)]>,
PPC970_DGroup_Single;
-def DCBF : DCB_Form_hint<86, (outs), (ins u3imm:$TH, memrr:$dst),
- "dcbf $dst, $TH", IIC_LdStDCBF, []>,
+def DCBF : DCB_Form_hint<86, (outs), (ins u3imm:$TH, (memrr $RA, $RB):$addr),
+ "dcbf $addr, $TH", IIC_LdStDCBF, []>,
PPC970_DGroup_Single;
let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in {
-def DCBT : DCB_Form_hint<278, (outs), (ins u5imm:$TH, memrr:$dst),
- "dcbt $dst, $TH", IIC_LdStDCBF, []>,
+def DCBT : DCB_Form_hint<278, (outs), (ins u5imm:$TH, (memrr $RA, $RB):$addr),
+ "dcbt $addr, $TH", IIC_LdStDCBF, []>,
PPC970_DGroup_Single;
-def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, memrr:$dst),
- "dcbtst $dst, $TH", IIC_LdStDCBF, []>,
+def DCBTST : DCB_Form_hint<246, (outs), (ins u5imm:$TH, (memrr $RA, $RB):$addr),
+ "dcbtst $addr, $TH", IIC_LdStDCBF, []>,
PPC970_DGroup_Single;
} // hasSideEffects = 0
-def ICBLC : XForm_icbt<31, 230, (outs), (ins u4imm:$CT, memrr:$src),
- "icblc $CT, $src", IIC_LdStStore>, Requires<[HasICBT]>;
-def ICBLQ : XForm_icbt<31, 198, (outs), (ins u4imm:$CT, memrr:$src),
- "icblq. $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
-def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, memrr:$src),
- "icbt $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
-def ICBTLS : XForm_icbt<31, 486, (outs), (ins u4imm:$CT, memrr:$src),
- "icbtls $CT, $src", IIC_LdStLoad>, Requires<[HasICBT]>;
+def ICBLC : XForm_icbt<31, 230, (outs), (ins u4imm:$CT, (memrr $RA, $RB):$addr),
+ "icblc $CT, $addr", IIC_LdStStore>, Requires<[HasICBT]>;
+def ICBLQ : XForm_icbt<31, 198, (outs), (ins u4imm:$CT, (memrr $RA, $RB):$addr),
+ "icblq. $CT, $addr", IIC_LdStLoad>, Requires<[HasICBT]>;
+def ICBT : XForm_icbt<31, 22, (outs), (ins u4imm:$CT, (memrr $RA, $RB):$addr),
+ "icbt $CT, $addr", IIC_LdStLoad>, Requires<[HasICBT]>;
+def ICBTLS : XForm_icbt<31, 486, (outs), (ins u4imm:$CT, (memrr $RA, $RB):$addr),
+ "icbtls $CT, $addr", IIC_LdStLoad>, Requires<[HasICBT]>;
def : Pat<(int_ppc_dcbt xoaddr:$dst),
(DCBT 0, xoaddr:$dst)>;
@@ -1742,71 +1851,79 @@ def : Pat<(PPCatomicCmpSwap_16 ForceXForm:$ptr, i32:$old, i32:$new),
// Instructions to support atomic operations
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
-def LBARX : XForm_1_memOp<31, 52, (outs gprc:$rD), (ins memrr:$src),
- "lbarx $rD, $src", IIC_LdStLWARX, []>,
+def LBARX : XForm_1_memOp<31, 52, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lbarx $RST, $addr", IIC_LdStLWARX, []>,
Requires<[HasPartwordAtomics]>;
-def LHARX : XForm_1_memOp<31, 116, (outs gprc:$rD), (ins memrr:$src),
- "lharx $rD, $src", IIC_LdStLWARX, []>,
+def LHARX : XForm_1_memOp<31, 116, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lharx $RST, $addr", IIC_LdStLWARX, []>,
Requires<[HasPartwordAtomics]>;
-def LWARX : XForm_1_memOp<31, 20, (outs gprc:$rD), (ins memrr:$src),
- "lwarx $rD, $src", IIC_LdStLWARX, []>;
+def LWARX : XForm_1_memOp<31, 20, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lwarx $RST, $addr", IIC_LdStLWARX, []>;
// Instructions to support lock versions of atomics
// (EH=1 - see Power ISA 2.07 Book II 4.4.2)
-def LBARXL : XForm_1_memOp<31, 52, (outs gprc:$rD), (ins memrr:$src),
- "lbarx $rD, $src, 1", IIC_LdStLWARX, []>, isRecordForm,
+def LBARXL : XForm_1_memOp<31, 52, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lbarx $RST, $addr, 1", IIC_LdStLWARX, []>, isRecordForm,
Requires<[HasPartwordAtomics]>;
-def LHARXL : XForm_1_memOp<31, 116, (outs gprc:$rD), (ins memrr:$src),
- "lharx $rD, $src, 1", IIC_LdStLWARX, []>, isRecordForm,
+def LHARXL : XForm_1_memOp<31, 116, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lharx $RST, $addr, 1", IIC_LdStLWARX, []>, isRecordForm,
Requires<[HasPartwordAtomics]>;
-def LWARXL : XForm_1_memOp<31, 20, (outs gprc:$rD), (ins memrr:$src),
- "lwarx $rD, $src, 1", IIC_LdStLWARX, []>, isRecordForm;
+def LWARXL : XForm_1_memOp<31, 20, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lwarx $RST, $addr, 1", IIC_LdStLWARX, []>, isRecordForm;
// The atomic instructions use the destination register as well as the next one
// or two registers in order (modulo 31).
let hasExtraSrcRegAllocReq = 1 in
-def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$rD), (ins gprc:$rA, u5imm:$FC),
- "lwat $rD, $rA, $FC", IIC_LdStLoad>,
+def LWAT : X_RD5_RS5_IM5<31, 582, (outs gprc:$RST), (ins gprc:$RA, u5imm:$RB),
+ "lwat $RST, $RA, $RB", IIC_LdStLoad>,
Requires<[IsISA3_0]>;
}
let Defs = [CR0], mayStore = 1, mayLoad = 0, hasSideEffects = 0 in {
-def STBCX : XForm_1_memOp<31, 694, (outs), (ins gprc:$rS, memrr:$dst),
- "stbcx. $rS, $dst", IIC_LdStSTWCX, []>,
+def STBCX : XForm_1_memOp<31, 694, (outs), (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "stbcx. $RST, $addr", IIC_LdStSTWCX, []>,
isRecordForm, Requires<[HasPartwordAtomics]>;
-def STHCX : XForm_1_memOp<31, 726, (outs), (ins gprc:$rS, memrr:$dst),
- "sthcx. $rS, $dst", IIC_LdStSTWCX, []>,
+def STHCX : XForm_1_memOp<31, 726, (outs), (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "sthcx. $RST, $addr", IIC_LdStSTWCX, []>,
isRecordForm, Requires<[HasPartwordAtomics]>;
-def STWCX : XForm_1_memOp<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
- "stwcx. $rS, $dst", IIC_LdStSTWCX, []>, isRecordForm;
+def STWCX : XForm_1_memOp<31, 150, (outs), (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "stwcx. $RST, $addr", IIC_LdStSTWCX, []>, isRecordForm;
}
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
-def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$rS, gprc:$rA, u5imm:$FC),
- "stwat $rS, $rA, $FC", IIC_LdStStore>,
+def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$RST, gprc:$RA, u5imm:$RB),
+ "stwat $RST, $RA, $RB", IIC_LdStStore>,
Requires<[IsISA3_0]>;
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>;
-def TWI : DForm_base<3, (outs), (ins u5imm:$to, gprc:$rA, s16imm:$imm, variable_ops),
- "twi $to, $rA, $imm", IIC_IntTrapW, []>;
-def TW : XForm_1<31, 4, (outs), (ins u5imm:$to, gprc:$rA, gprc:$rB, variable_ops),
- "tw $to, $rA, $rB", IIC_IntTrapW, []>;
-def TDI : DForm_base<2, (outs), (ins u5imm:$to, g8rc:$rA, s16imm:$imm, variable_ops),
- "tdi $to, $rA, $imm", IIC_IntTrapD, []>;
-def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB, variable_ops),
- "td $to, $rA, $rB", IIC_IntTrapD, []>;
+def TWI : DForm_base<3, (outs), (ins u5imm:$RST, gprc:$RA, s16imm:$D, variable_ops),
+ "twi $RST, $RA, $D", IIC_IntTrapW, []>;
+def TW : XForm_1<31, 4, (outs), (ins u5imm:$RST, gprc:$RA, gprc:$RB, variable_ops),
+ "tw $RST, $RA, $RB", IIC_IntTrapW, []>;
+def TDI : DForm_base<2, (outs), (ins u5imm:$RST, g8rc:$RA, s16imm:$D, variable_ops),
+ "tdi $RST, $RA, $D", IIC_IntTrapD, []>;
+def TD : XForm_1<31, 68, (outs), (ins u5imm:$RST, g8rc:$RA, g8rc:$RB, variable_ops),
+ "td $RST, $RA, $RB", IIC_IntTrapD, []>;
+
+def POPCNTB : XForm_11<31, 122, (outs gprc:$RA), (ins gprc:$RST),
+ "popcntb $RA, $RST", IIC_IntGeneral,
+ [(set i32:$RA, (int_ppc_popcntb i32:$RST))]>;
+
+def CDTBCD : XForm_11<31, 282, (outs gprc:$RA), (ins gprc:$RST),
+ "cdtbcd $RA, $RST", IIC_IntGeneral, []>;
+def CBCDTD : XForm_11<31, 314, (outs gprc:$RA), (ins gprc:$RST),
+ "cbcdtd $RA, $RST", IIC_IntGeneral, []>;
-def POPCNTB : XForm_11<31, 122, (outs gprc:$rA), (ins gprc:$rS),
- "popcntb $rA, $rS", IIC_IntGeneral,
- [(set i32:$rA, (int_ppc_popcntb i32:$rS))]>;
+def ADDG6S : XOForm_1<31, 74, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "addg6s $RT, $RA, $RB", IIC_IntGeneral, []>;
//===----------------------------------------------------------------------===//
// PPC32 Load Instructions.
@@ -1814,102 +1931,101 @@ def POPCNTB : XForm_11<31, 122, (outs gprc:$rA), (ins gprc:$rS),
// Unindexed (r+i) Loads.
let PPC970_Unit = 2 in {
-def LBZ : DForm_1<34, (outs gprc:$rD), (ins memri:$src),
- "lbz $rD, $src", IIC_LdStLoad,
- [(set i32:$rD, (zextloadi8 DForm:$src))]>, ZExt32To64,
+def LBZ : DForm_1<34, (outs gprc:$RST), (ins (memri $D, $RA):$addr),
+ "lbz $RST, $addr", IIC_LdStLoad,
+ [(set i32:$RST, (zextloadi8 DForm:$addr))]>, ZExt32To64,
SExt32To64;
-def LHA : DForm_1<42, (outs gprc:$rD), (ins memri:$src),
- "lha $rD, $src", IIC_LdStLHA,
- [(set i32:$rD, (sextloadi16 DForm:$src))]>,
+def LHA : DForm_1<42, (outs gprc:$RST), (ins (memri $D, $RA):$addr),
+ "lha $RST, $addr", IIC_LdStLHA,
+ [(set i32:$RST, (sextloadi16 DForm:$addr))]>,
PPC970_DGroup_Cracked, SExt32To64;
-def LHZ : DForm_1<40, (outs gprc:$rD), (ins memri:$src),
- "lhz $rD, $src", IIC_LdStLoad,
- [(set i32:$rD, (zextloadi16 DForm:$src))]>, ZExt32To64,
+def LHZ : DForm_1<40, (outs gprc:$RST), (ins (memri $D, $RA):$addr),
+ "lhz $RST, $addr", IIC_LdStLoad,
+ [(set i32:$RST, (zextloadi16 DForm:$addr))]>, ZExt32To64,
SExt32To64;
-def LWZ : DForm_1<32, (outs gprc:$rD), (ins memri:$src),
- "lwz $rD, $src", IIC_LdStLoad,
- [(set i32:$rD, (load DForm:$src))]>, ZExt32To64;
+def LWZ : DForm_1<32, (outs gprc:$RST), (ins (memri $D, $RA):$addr),
+ "lwz $RST, $addr", IIC_LdStLoad,
+ [(set i32:$RST, (load DForm:$addr))]>, ZExt32To64;
let Predicates = [HasFPU] in {
-def LFS : DForm_1<48, (outs f4rc:$rD), (ins memri:$src),
- "lfs $rD, $src", IIC_LdStLFD,
- [(set f32:$rD, (load DForm:$src))]>;
-def LFD : DForm_1<50, (outs f8rc:$rD), (ins memri:$src),
- "lfd $rD, $src", IIC_LdStLFD,
- [(set f64:$rD, (load DForm:$src))]>;
+def LFS : DForm_1<48, (outs f4rc:$RST), (ins (memri $D, $RA):$addr),
+ "lfs $RST, $addr", IIC_LdStLFD,
+ [(set f32:$RST, (load DForm:$addr))]>;
+def LFD : DForm_1<50, (outs f8rc:$RST), (ins (memri $D, $RA):$addr),
+ "lfd $RST, $addr", IIC_LdStLFD,
+ [(set f64:$RST, (load DForm:$addr))]>;
}
// Unindexed (r+i) Loads with Update (preinc).
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in {
-def LBZU : DForm_1<35, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lbzu $rD, $addr", IIC_LdStLoadUpd,
- []>, RegConstraint<"$addr.reg = $ea_result">,
- NoEncode<"$ea_result">;
+def LBZU : DForm_1<35, (outs gprc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr),
+ "lbzu $RST, $addr", IIC_LdStLoadUpd,
+ []>, RegConstraint<"$RA = $ea_result">;
-def LHAU : DForm_1<43, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lhau $rD, $addr", IIC_LdStLHAU,
+def LHAU : DForm_1<43, (outs gprc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr),
+ "lhau $RST, $addr", IIC_LdStLHAU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZU : DForm_1<41, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lhzu $rD, $addr", IIC_LdStLoadUpd,
+def LHZU : DForm_1<41, (outs gprc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr),
+ "lhzu $RST, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZU : DForm_1<33, (outs gprc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lwzu $rD, $addr", IIC_LdStLoadUpd,
+def LWZU : DForm_1<33, (outs gprc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr),
+ "lwzu $RST, $addr", IIC_LdStLoadUpd,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
let Predicates = [HasFPU] in {
-def LFSU : DForm_1<49, (outs f4rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lfsu $rD, $addr", IIC_LdStLFDU,
+def LFSU : DForm_1<49, (outs f4rc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr),
+ "lfsu $RST, $addr", IIC_LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDU : DForm_1<51, (outs f8rc:$rD, ptr_rc_nor0:$ea_result), (ins memri:$addr),
- "lfdu $rD, $addr", IIC_LdStLFDU,
+def LFDU : DForm_1<51, (outs f8rc:$RST, ptr_rc_nor0:$ea_result), (ins (memri $D, $RA):$addr),
+ "lfdu $RST, $addr", IIC_LdStLFDU,
[]>, RegConstraint<"$addr.reg = $ea_result">,
NoEncode<"$ea_result">;
}
// Indexed (r+r) Loads with Update (preinc).
-def LBZUX : XForm_1_memOp<31, 119, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
- (ins memrr:$addr),
- "lbzux $rD, $addr", IIC_LdStLoadUpdX,
+def LBZUX : XForm_1_memOp<31, 119, (outs gprc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memrr $RA, $RB):$addr),
+ "lbzux $RST, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHAUX : XForm_1_memOp<31, 375, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
- (ins memrr:$addr),
- "lhaux $rD, $addr", IIC_LdStLHAUX,
+def LHAUX : XForm_1_memOp<31, 375, (outs gprc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memrr $RA, $RB):$addr),
+ "lhaux $RST, $addr", IIC_LdStLHAUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LHZUX : XForm_1_memOp<31, 311, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
- (ins memrr:$addr),
- "lhzux $rD, $addr", IIC_LdStLoadUpdX,
+def LHZUX : XForm_1_memOp<31, 311, (outs gprc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memrr $RA, $RB):$addr),
+ "lhzux $RST, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LWZUX : XForm_1_memOp<31, 55, (outs gprc:$rD, ptr_rc_nor0:$ea_result),
- (ins memrr:$addr),
- "lwzux $rD, $addr", IIC_LdStLoadUpdX,
+def LWZUX : XForm_1_memOp<31, 55, (outs gprc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memrr $RA, $RB):$addr),
+ "lwzux $RST, $addr", IIC_LdStLoadUpdX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
let Predicates = [HasFPU] in {
-def LFSUX : XForm_1_memOp<31, 567, (outs f4rc:$rD, ptr_rc_nor0:$ea_result),
- (ins memrr:$addr),
- "lfsux $rD, $addr", IIC_LdStLFDUX,
+def LFSUX : XForm_1_memOp<31, 567, (outs f4rc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memrr $RA, $RB):$addr),
+ "lfsux $RST, $addr", IIC_LdStLFDUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
-def LFDUX : XForm_1_memOp<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
- (ins memrr:$addr),
- "lfdux $rD, $addr", IIC_LdStLFDUX,
+def LFDUX : XForm_1_memOp<31, 631, (outs f8rc:$RST, ptr_rc_nor0:$ea_result),
+ (ins (memrr $RA, $RB):$addr),
+ "lfdux $RST, $addr", IIC_LdStLFDUX,
[]>, RegConstraint<"$addr.ptrreg = $ea_result">,
NoEncode<"$ea_result">;
}
@@ -1919,49 +2035,49 @@ def LFDUX : XForm_1_memOp<31, 631, (outs f8rc:$rD, ptr_rc_nor0:$ea_result),
// Indexed (r+r) Loads.
//
let PPC970_Unit = 2, mayLoad = 1, mayStore = 0 in {
-def LBZX : XForm_1_memOp<31, 87, (outs gprc:$rD), (ins memrr:$src),
- "lbzx $rD, $src", IIC_LdStLoad,
- [(set i32:$rD, (zextloadi8 XForm:$src))]>, ZExt32To64,
+def LBZX : XForm_1_memOp<31, 87, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lbzx $RST, $addr", IIC_LdStLoad,
+ [(set i32:$RST, (zextloadi8 XForm:$addr))]>, ZExt32To64,
SExt32To64;
-def LHAX : XForm_1_memOp<31, 343, (outs gprc:$rD), (ins memrr:$src),
- "lhax $rD, $src", IIC_LdStLHA,
- [(set i32:$rD, (sextloadi16 XForm:$src))]>,
+def LHAX : XForm_1_memOp<31, 343, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lhax $RST, $addr", IIC_LdStLHA,
+ [(set i32:$RST, (sextloadi16 XForm:$addr))]>,
PPC970_DGroup_Cracked, SExt32To64;
-def LHZX : XForm_1_memOp<31, 279, (outs gprc:$rD), (ins memrr:$src),
- "lhzx $rD, $src", IIC_LdStLoad,
- [(set i32:$rD, (zextloadi16 XForm:$src))]>, ZExt32To64,
+def LHZX : XForm_1_memOp<31, 279, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lhzx $RST, $addr", IIC_LdStLoad,
+ [(set i32:$RST, (zextloadi16 XForm:$addr))]>, ZExt32To64,
SExt32To64;
-def LWZX : XForm_1_memOp<31, 23, (outs gprc:$rD), (ins memrr:$src),
- "lwzx $rD, $src", IIC_LdStLoad,
- [(set i32:$rD, (load XForm:$src))]>, ZExt32To64;
-def LHBRX : XForm_1_memOp<31, 790, (outs gprc:$rD), (ins memrr:$src),
- "lhbrx $rD, $src", IIC_LdStLoad,
- [(set i32:$rD, (PPClbrx ForceXForm:$src, i16))]>, ZExt32To64;
-def LWBRX : XForm_1_memOp<31, 534, (outs gprc:$rD), (ins memrr:$src),
- "lwbrx $rD, $src", IIC_LdStLoad,
- [(set i32:$rD, (PPClbrx ForceXForm:$src, i32))]>, ZExt32To64;
+def LWZX : XForm_1_memOp<31, 23, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lwzx $RST, $addr", IIC_LdStLoad,
+ [(set i32:$RST, (load XForm:$addr))]>, ZExt32To64;
+def LHBRX : XForm_1_memOp<31, 790, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lhbrx $RST, $addr", IIC_LdStLoad,
+ [(set i32:$RST, (PPClbrx ForceXForm:$addr, i16))]>, ZExt32To64;
+def LWBRX : XForm_1_memOp<31, 534, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lwbrx $RST, $addr", IIC_LdStLoad,
+ [(set i32:$RST, (PPClbrx ForceXForm:$addr, i32))]>, ZExt32To64;
let Predicates = [HasFPU] in {
-def LFSX : XForm_25_memOp<31, 535, (outs f4rc:$frD), (ins memrr:$src),
- "lfsx $frD, $src", IIC_LdStLFD,
- [(set f32:$frD, (load XForm:$src))]>;
-def LFDX : XForm_25_memOp<31, 599, (outs f8rc:$frD), (ins memrr:$src),
- "lfdx $frD, $src", IIC_LdStLFD,
- [(set f64:$frD, (load XForm:$src))]>;
+def LFSX : XForm_25_memOp<31, 535, (outs f4rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lfsx $RST, $addr", IIC_LdStLFD,
+ [(set f32:$RST, (load XForm:$addr))]>;
+def LFDX : XForm_25_memOp<31, 599, (outs f8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lfdx $RST, $addr", IIC_LdStLFD,
+ [(set f64:$RST, (load XForm:$addr))]>;
-def LFIWAX : XForm_25_memOp<31, 855, (outs f8rc:$frD), (ins memrr:$src),
- "lfiwax $frD, $src", IIC_LdStLFD,
- [(set f64:$frD, (PPClfiwax ForceXForm:$src))]>;
-def LFIWZX : XForm_25_memOp<31, 887, (outs f8rc:$frD), (ins memrr:$src),
- "lfiwzx $frD, $src", IIC_LdStLFD,
- [(set f64:$frD, (PPClfiwzx ForceXForm:$src))]>;
+def LFIWAX : XForm_25_memOp<31, 855, (outs f8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lfiwax $RST, $addr", IIC_LdStLFD,
+ [(set f64:$RST, (PPClfiwax ForceXForm:$addr))]>;
+def LFIWZX : XForm_25_memOp<31, 887, (outs f8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lfiwzx $RST, $addr", IIC_LdStLFD,
+ [(set f64:$RST, (PPClfiwzx ForceXForm:$addr))]>;
}
}
// Load Multiple
let mayLoad = 1, mayStore = 0, hasSideEffects = 0 in
-def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
- "lmw $rD, $src", IIC_LdStLMW, []>;
+def LMW : DForm_1<46, (outs gprc:$RST), (ins (memri $D, $RA):$src),
+ "lmw $RST, $src", IIC_LdStLMW, []>;
//===----------------------------------------------------------------------===//
// PPC32 Store Instructions.
@@ -1969,42 +2085,42 @@ def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
// Unindexed (r+i) Stores.
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
-def STB : DForm_1<38, (outs), (ins gprc:$rS, memri:$dst),
- "stb $rS, $dst", IIC_LdStStore,
- [(truncstorei8 i32:$rS, DForm:$dst)]>;
-def STH : DForm_1<44, (outs), (ins gprc:$rS, memri:$dst),
- "sth $rS, $dst", IIC_LdStStore,
- [(truncstorei16 i32:$rS, DForm:$dst)]>;
-def STW : DForm_1<36, (outs), (ins gprc:$rS, memri:$dst),
- "stw $rS, $dst", IIC_LdStStore,
- [(store i32:$rS, DForm:$dst)]>;
+def STB : DForm_1<38, (outs), (ins gprc:$RST, (memri $D, $RA):$dst),
+ "stb $RST, $dst", IIC_LdStStore,
+ [(truncstorei8 i32:$RST, DForm:$dst)]>;
+def STH : DForm_1<44, (outs), (ins gprc:$RST, (memri $D, $RA):$dst),
+ "sth $RST, $dst", IIC_LdStStore,
+ [(truncstorei16 i32:$RST, DForm:$dst)]>;
+def STW : DForm_1<36, (outs), (ins gprc:$RST, (memri $D, $RA):$dst),
+ "stw $RST, $dst", IIC_LdStStore,
+ [(store i32:$RST, DForm:$dst)]>;
let Predicates = [HasFPU] in {
-def STFS : DForm_1<52, (outs), (ins f4rc:$rS, memri:$dst),
- "stfs $rS, $dst", IIC_LdStSTFD,
- [(store f32:$rS, DForm:$dst)]>;
-def STFD : DForm_1<54, (outs), (ins f8rc:$rS, memri:$dst),
- "stfd $rS, $dst", IIC_LdStSTFD,
- [(store f64:$rS, DForm:$dst)]>;
+def STFS : DForm_1<52, (outs), (ins f4rc:$RST, (memri $D, $RA):$dst),
+ "stfs $RST, $dst", IIC_LdStSTFD,
+ [(store f32:$RST, DForm:$dst)]>;
+def STFD : DForm_1<54, (outs), (ins f8rc:$RST, (memri $D, $RA):$dst),
+ "stfd $RST, $dst", IIC_LdStSTFD,
+ [(store f64:$RST, DForm:$dst)]>;
}
}
// Unindexed (r+i) Stores with Update (preinc).
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
-def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "stbu $rS, $dst", IIC_LdStSTU, []>,
+def STBU : DForm_1<39, (outs ptr_rc_nor0:$ea_res), (ins gprc:$RST, (memri $D, $RA):$dst),
+ "stbu $RST, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "sthu $rS, $dst", IIC_LdStSTU, []>,
+def STHU : DForm_1<45, (outs ptr_rc_nor0:$ea_res), (ins gprc:$RST, (memri $D, $RA):$dst),
+ "sthu $RST, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$rS, memri:$dst),
- "stwu $rS, $dst", IIC_LdStSTU, []>,
+def STWU : DForm_1<37, (outs ptr_rc_nor0:$ea_res), (ins gprc:$RST, (memri $D, $RA):$dst),
+ "stwu $RST, $dst", IIC_LdStSTU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
let Predicates = [HasFPU] in {
-def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$rS, memri:$dst),
- "stfsu $rS, $dst", IIC_LdStSTFDU, []>,
+def STFSU : DForm_1<53, (outs ptr_rc_nor0:$ea_res), (ins f4rc:$RST, (memri $D, $RA):$dst),
+ "stfsu $RST, $dst", IIC_LdStSTFDU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
-def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$rS, memri:$dst),
- "stfdu $rS, $dst", IIC_LdStSTFDU, []>,
+def STFDU : DForm_1<55, (outs ptr_rc_nor0:$ea_res), (ins f8rc:$RST, (memri $D, $RA):$dst),
+ "stfdu $RST, $dst", IIC_LdStSTFDU, []>,
RegConstraint<"$dst.reg = $ea_res">, NoEncode<"$ea_res">;
}
}
@@ -2025,73 +2141,73 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iaddroff:$ptroff),
// Indexed (r+r) Stores.
let PPC970_Unit = 2 in {
-def STBX : XForm_8_memOp<31, 215, (outs), (ins gprc:$rS, memrr:$dst),
- "stbx $rS, $dst", IIC_LdStStore,
- [(truncstorei8 i32:$rS, XForm:$dst)]>,
+def STBX : XForm_8_memOp<31, 215, (outs), (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "stbx $RST, $addr", IIC_LdStStore,
+ [(truncstorei8 i32:$RST, XForm:$addr)]>,
PPC970_DGroup_Cracked;
-def STHX : XForm_8_memOp<31, 407, (outs), (ins gprc:$rS, memrr:$dst),
- "sthx $rS, $dst", IIC_LdStStore,
- [(truncstorei16 i32:$rS, XForm:$dst)]>,
+def STHX : XForm_8_memOp<31, 407, (outs), (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "sthx $RST, $addr", IIC_LdStStore,
+ [(truncstorei16 i32:$RST, XForm:$addr)]>,
PPC970_DGroup_Cracked;
-def STWX : XForm_8_memOp<31, 151, (outs), (ins gprc:$rS, memrr:$dst),
- "stwx $rS, $dst", IIC_LdStStore,
- [(store i32:$rS, XForm:$dst)]>,
+def STWX : XForm_8_memOp<31, 151, (outs), (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "stwx $RST, $addr", IIC_LdStStore,
+ [(store i32:$RST, XForm:$addr)]>,
PPC970_DGroup_Cracked;
-def STHBRX: XForm_8_memOp<31, 918, (outs), (ins gprc:$rS, memrr:$dst),
- "sthbrx $rS, $dst", IIC_LdStStore,
- [(PPCstbrx i32:$rS, ForceXForm:$dst, i16)]>,
+def STHBRX: XForm_8_memOp<31, 918, (outs), (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "sthbrx $RST, $addr", IIC_LdStStore,
+ [(PPCstbrx i32:$RST, ForceXForm:$addr, i16)]>,
PPC970_DGroup_Cracked;
-def STWBRX: XForm_8_memOp<31, 662, (outs), (ins gprc:$rS, memrr:$dst),
- "stwbrx $rS, $dst", IIC_LdStStore,
- [(PPCstbrx i32:$rS, ForceXForm:$dst, i32)]>,
+def STWBRX: XForm_8_memOp<31, 662, (outs), (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "stwbrx $RST, $addr", IIC_LdStStore,
+ [(PPCstbrx i32:$RST, ForceXForm:$addr, i32)]>,
PPC970_DGroup_Cracked;
let Predicates = [HasFPU] in {
-def STFIWX: XForm_28_memOp<31, 983, (outs), (ins f8rc:$frS, memrr:$dst),
- "stfiwx $frS, $dst", IIC_LdStSTFD,
- [(PPCstfiwx f64:$frS, ForceXForm:$dst)]>;
+def STFIWX: XForm_28_memOp<31, 983, (outs), (ins f8rc:$RST, (memrr $RA, $RB):$addr),
+ "stfiwx $RST, $addr", IIC_LdStSTFD,
+ [(PPCstfiwx f64:$RST, ForceXForm:$addr)]>;
-def STFSX : XForm_28_memOp<31, 663, (outs), (ins f4rc:$frS, memrr:$dst),
- "stfsx $frS, $dst", IIC_LdStSTFD,
- [(store f32:$frS, XForm:$dst)]>;
-def STFDX : XForm_28_memOp<31, 727, (outs), (ins f8rc:$frS, memrr:$dst),
- "stfdx $frS, $dst", IIC_LdStSTFD,
- [(store f64:$frS, XForm:$dst)]>;
+def STFSX : XForm_28_memOp<31, 663, (outs), (ins f4rc:$RST, (memrr $RA, $RB):$addr),
+ "stfsx $RST, $addr", IIC_LdStSTFD,
+ [(store f32:$RST, XForm:$addr)]>;
+def STFDX : XForm_28_memOp<31, 727, (outs), (ins f8rc:$RST, (memrr $RA, $RB):$addr),
+ "stfdx $RST, $addr", IIC_LdStSTFD,
+ [(store f64:$RST, XForm:$addr)]>;
}
}
// Indexed (r+r) Stores with Update (preinc).
let PPC970_Unit = 2, mayStore = 1, mayLoad = 0 in {
def STBUX : XForm_8_memOp<31, 247, (outs ptr_rc_nor0:$ea_res),
- (ins gprc:$rS, memrr:$dst),
- "stbux $rS, $dst", IIC_LdStSTUX, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
+ (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "stbux $RST, $addr", IIC_LdStSTUX, []>,
+ RegConstraint<"$addr.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STHUX : XForm_8_memOp<31, 439, (outs ptr_rc_nor0:$ea_res),
- (ins gprc:$rS, memrr:$dst),
- "sthux $rS, $dst", IIC_LdStSTUX, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
+ (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "sthux $RST, $addr", IIC_LdStSTUX, []>,
+ RegConstraint<"$addr.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STWUX : XForm_8_memOp<31, 183, (outs ptr_rc_nor0:$ea_res),
- (ins gprc:$rS, memrr:$dst),
- "stwux $rS, $dst", IIC_LdStSTUX, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
+ (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "stwux $RST, $addr", IIC_LdStSTUX, []>,
+ RegConstraint<"$addr.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
let Predicates = [HasFPU] in {
def STFSUX: XForm_8_memOp<31, 695, (outs ptr_rc_nor0:$ea_res),
- (ins f4rc:$rS, memrr:$dst),
- "stfsux $rS, $dst", IIC_LdStSTFDU, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
+ (ins f4rc:$RST, (memrr $RA, $RB):$addr),
+ "stfsux $RST, $addr", IIC_LdStSTFDU, []>,
+ RegConstraint<"$addr.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
def STFDUX: XForm_8_memOp<31, 759, (outs ptr_rc_nor0:$ea_res),
- (ins f8rc:$rS, memrr:$dst),
- "stfdux $rS, $dst", IIC_LdStSTFDU, []>,
- RegConstraint<"$dst.ptrreg = $ea_res">,
+ (ins f8rc:$RST, (memrr $RA, $RB):$addr),
+ "stfdux $RST, $addr", IIC_LdStSTFDU, []>,
+ RegConstraint<"$addr.ptrreg = $ea_res">,
NoEncode<"$ea_res">,
PPC970_DGroup_Cracked;
}
@@ -2115,8 +2231,8 @@ def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
// Store Multiple
let mayStore = 1, mayLoad = 0, hasSideEffects = 0 in
-def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
- "stmw $rS, $dst", IIC_LdStLMW, []>;
+def STMW : DForm_1<47, (outs), (ins gprc:$RST, (memri $D, $RA):$dst),
+ "stmw $RST, $dst", IIC_LdStLMW, []>;
def SYNC : XForm_24_sync<31, 598, (outs), (ins u2imm:$L),
"sync $L", IIC_LdStSync, []>;
@@ -2151,43 +2267,43 @@ def : Pat<(int_ppc_iospace_eieio), (PseudoEIEIO)>;
//
let PPC970_Unit = 1 in { // FXU Operations.
-def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm),
- "addi $rD, $rA, $imm", IIC_IntSimple,
- [(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>;
+def ADDI : DForm_2<14, (outs gprc:$RST), (ins gprc_nor0:$RA, s16imm:$D),
+ "addi $RST, $RA, $D", IIC_IntSimple,
+ [(set i32:$RST, (add i32:$RA, imm32SExt16:$D))]>;
let BaseName = "addic" in {
let Defs = [CARRY] in
-def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
- "addic $rD, $rA, $imm", IIC_IntGeneral,
- [(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>,
+def ADDIC : DForm_2<12, (outs gprc:$RST), (ins gprc:$RA, s16imm:$D),
+ "addic $RST, $RA, $D", IIC_IntGeneral,
+ [(set i32:$RST, (addc i32:$RA, imm32SExt16:$D))]>,
RecFormRel, PPC970_DGroup_Cracked;
let Defs = [CARRY, CR0] in
-def ADDIC_rec : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
- "addic. $rD, $rA, $imm", IIC_IntGeneral,
+def ADDIC_rec : DForm_2<13, (outs gprc:$RST), (ins gprc:$RA, s16imm:$D),
+ "addic. $RST, $RA, $D", IIC_IntGeneral,
[]>, isRecordForm, RecFormRel;
}
-def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm),
- "addis $rD, $rA, $imm", IIC_IntSimple,
- [(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
+def ADDIS : DForm_2<15, (outs gprc:$RST), (ins gprc_nor0:$RA, s17imm:$D),
+ "addis $RST, $RA, $D", IIC_IntSimple,
+ [(set i32:$RST, (add i32:$RA, imm16ShiftedSExt:$D))]>;
let isCodeGenOnly = 1 in
-def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym),
- "la $rD, $sym($rA)", IIC_IntGeneral,
- [(set i32:$rD, (add i32:$rA,
- (PPClo tglobaladdr:$sym, 0)))]>;
-def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
- "mulli $rD, $rA, $imm", IIC_IntMulLI,
- [(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>;
+def LA : DForm_2<14, (outs gprc:$RST), (ins gprc_nor0:$RA, s16imm:$D),
+ "la $RST, $D($RA)", IIC_IntGeneral,
+ [(set i32:$RST, (add i32:$RA,
+ (PPClo tglobaladdr:$D, 0)))]>;
+def MULLI : DForm_2< 7, (outs gprc:$RST), (ins gprc:$RA, s16imm:$D),
+ "mulli $RST, $RA, $D", IIC_IntMulLI,
+ [(set i32:$RST, (mul i32:$RA, imm32SExt16:$D))]>;
let Defs = [CARRY] in
-def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
- "subfic $rD, $rA, $imm", IIC_IntGeneral,
- [(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>;
+def SUBFIC : DForm_2< 8, (outs gprc:$RST), (ins gprc:$RA, s16imm:$D),
+ "subfic $RST, $RA, $D", IIC_IntGeneral,
+ [(set i32:$RST, (subc imm32SExt16:$D, i32:$RA))]>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
- def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm),
- "li $rD, $imm", IIC_IntSimple,
- [(set i32:$rD, imm32SExt16:$imm)]>, SExt32To64;
- def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s17imm:$imm),
- "lis $rD, $imm", IIC_IntSimple,
- [(set i32:$rD, imm16ShiftedSExt:$imm)]>, SExt32To64;
+ def LI : DForm_2_r0<14, (outs gprc:$RST), (ins s16imm:$D),
+ "li $RST, $D", IIC_IntSimple,
+ [(set i32:$RST, imm32SExt16:$D)]>, SExt32To64;
+ def LIS : DForm_2_r0<15, (outs gprc:$RST), (ins s17imm:$D),
+ "lis $RST, $D", IIC_IntSimple,
+ [(set i32:$RST, imm16ShiftedSExt:$D)]>, SExt32To64;
}
}
@@ -2196,27 +2312,27 @@ def : InstAlias<"lis $rD, $imm", (ADDIS gprc:$rD, ZERO, s17imm:$imm)>;
let PPC970_Unit = 1 in { // FXU Operations.
let Defs = [CR0] in {
-def ANDI_rec : DForm_4<28, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "andi. $dst, $src1, $src2", IIC_IntGeneral,
- [(set i32:$dst, (and i32:$src1, immZExt16:$src2))]>,
+def ANDI_rec : DForm_4<28, (outs gprc:$RA), (ins gprc:$RST, u16imm:$D),
+ "andi. $RA, $RST, $D", IIC_IntGeneral,
+ [(set i32:$RA, (and i32:$RST, immZExt16:$D))]>,
isRecordForm, ZExt32To64, SExt32To64;
-def ANDIS_rec : DForm_4<29, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "andis. $dst, $src1, $src2", IIC_IntGeneral,
- [(set i32:$dst, (and i32:$src1, imm16ShiftedZExt:$src2))]>,
+def ANDIS_rec : DForm_4<29, (outs gprc:$RA), (ins gprc:$RST, u16imm:$D),
+ "andis. $RA, $RST, $D", IIC_IntGeneral,
+ [(set i32:$RA, (and i32:$RST, imm16ShiftedZExt:$D))]>,
isRecordForm, ZExt32To64;
}
-def ORI : DForm_4<24, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "ori $dst, $src1, $src2", IIC_IntSimple,
- [(set i32:$dst, (or i32:$src1, immZExt16:$src2))]>;
-def ORIS : DForm_4<25, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "oris $dst, $src1, $src2", IIC_IntSimple,
- [(set i32:$dst, (or i32:$src1, imm16ShiftedZExt:$src2))]>;
-def XORI : DForm_4<26, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "xori $dst, $src1, $src2", IIC_IntSimple,
- [(set i32:$dst, (xor i32:$src1, immZExt16:$src2))]>;
-def XORIS : DForm_4<27, (outs gprc:$dst), (ins gprc:$src1, u16imm:$src2),
- "xoris $dst, $src1, $src2", IIC_IntSimple,
- [(set i32:$dst, (xor i32:$src1, imm16ShiftedZExt:$src2))]>;
+def ORI : DForm_4<24, (outs gprc:$RA), (ins gprc:$RST, u16imm:$D),
+ "ori $RA, $RST, $D", IIC_IntSimple,
+ [(set i32:$RA, (or i32:$RST, immZExt16:$D))]>;
+def ORIS : DForm_4<25, (outs gprc:$RA), (ins gprc:$RST, u16imm:$D),
+ "oris $RA, $RST, $D", IIC_IntSimple,
+ [(set i32:$RA, (or i32:$RST, imm16ShiftedZExt:$D))]>;
+def XORI : DForm_4<26, (outs gprc:$RA), (ins gprc:$RST, u16imm:$D),
+ "xori $RA, $RST, $D", IIC_IntSimple,
+ [(set i32:$RA, (xor i32:$RST, immZExt16:$D))]>;
+def XORIS : DForm_4<27, (outs gprc:$RA), (ins gprc:$RST, u16imm:$D),
+ "xoris $RA, $RST, $D", IIC_IntSimple,
+ [(set i32:$RA, (xor i32:$RST, imm16ShiftedZExt:$D))]>;
def NOP : DForm_4_zero<24, (outs), (ins), "nop", IIC_IntSimple,
[]>;
@@ -2229,57 +2345,57 @@ def NOP_GT_PWR7 : DForm_4_fixedreg_zero<24, 2, (outs), (ins),
}
let isCompare = 1, hasSideEffects = 0 in {
- def CMPWI : DForm_5_ext<11, (outs crrc:$crD), (ins gprc:$rA, s16imm:$imm),
- "cmpwi $crD, $rA, $imm", IIC_IntCompare>;
- def CMPLWI : DForm_6_ext<10, (outs crrc:$dst), (ins gprc:$src1, u16imm:$src2),
- "cmplwi $dst, $src1, $src2", IIC_IntCompare>;
+ def CMPWI : DForm_5_ext<11, (outs crrc:$BF), (ins gprc:$RA, s16imm:$D),
+ "cmpwi $BF, $RA, $D", IIC_IntCompare>;
+ def CMPLWI : DForm_6_ext<10, (outs crrc:$BF), (ins gprc:$RA, u16imm:$D),
+ "cmplwi $BF, $RA, $D", IIC_IntCompare>;
def CMPRB : X_BF3_L1_RS5_RS5<31, 192, (outs crrc:$BF),
- (ins u1imm:$L, gprc:$rA, gprc:$rB),
- "cmprb $BF, $L, $rA, $rB", IIC_IntCompare, []>,
+ (ins u1imm:$L, gprc:$RA, gprc:$RB),
+ "cmprb $BF, $L, $RA, $RB", IIC_IntCompare, []>,
Requires<[IsISA3_0]>;
}
}
let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations.
let isCommutable = 1 in {
-defm NAND : XForm_6r<31, 476, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "nand", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i32:$rA, (not (and i32:$rS, i32:$rB)))]>;
-defm AND : XForm_6r<31, 28, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "and", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i32:$rA, (and i32:$rS, i32:$rB))]>;
+defm NAND : XForm_6r<31, 476, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
+ "nand", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i32:$RA, (not (and i32:$RST, i32:$RB)))]>;
+defm AND : XForm_6r<31, 28, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
+ "and", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i32:$RA, (and i32:$RST, i32:$RB))]>;
} // isCommutable
-defm ANDC : XForm_6r<31, 60, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "andc", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i32:$rA, (and i32:$rS, (not i32:$rB)))]>;
+defm ANDC : XForm_6r<31, 60, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
+ "andc", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i32:$RA, (and i32:$RST, (not i32:$RB)))]>;
let isCommutable = 1 in {
-defm OR : XForm_6r<31, 444, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "or", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i32:$rA, (or i32:$rS, i32:$rB))]>;
-defm NOR : XForm_6r<31, 124, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "nor", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i32:$rA, (not (or i32:$rS, i32:$rB)))]>;
+defm OR : XForm_6r<31, 444, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
+ "or", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i32:$RA, (or i32:$RST, i32:$RB))]>;
+defm NOR : XForm_6r<31, 124, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
+ "nor", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i32:$RA, (not (or i32:$RST, i32:$RB)))]>;
} // isCommutable
-defm ORC : XForm_6r<31, 412, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "orc", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i32:$rA, (or i32:$rS, (not i32:$rB)))]>;
+defm ORC : XForm_6r<31, 412, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
+ "orc", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i32:$RA, (or i32:$RST, (not i32:$RB)))]>;
let isCommutable = 1 in {
-defm EQV : XForm_6r<31, 284, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "eqv", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i32:$rA, (not (xor i32:$rS, i32:$rB)))]>;
-defm XOR : XForm_6r<31, 316, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "xor", "$rA, $rS, $rB", IIC_IntSimple,
- [(set i32:$rA, (xor i32:$rS, i32:$rB))]>;
+defm EQV : XForm_6r<31, 284, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
+ "eqv", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i32:$RA, (not (xor i32:$RST, i32:$RB)))]>;
+defm XOR : XForm_6r<31, 316, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
+ "xor", "$RA, $RST, $RB", IIC_IntSimple,
+ [(set i32:$RA, (xor i32:$RST, i32:$RB))]>;
} // isCommutable
-defm SLW : XForm_6r<31, 24, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "slw", "$rA, $rS, $rB", IIC_IntGeneral,
- [(set i32:$rA, (PPCshl i32:$rS, i32:$rB))]>, ZExt32To64;
-defm SRW : XForm_6r<31, 536, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "srw", "$rA, $rS, $rB", IIC_IntGeneral,
- [(set i32:$rA, (PPCsrl i32:$rS, i32:$rB))]>, ZExt32To64;
-defm SRAW : XForm_6rc<31, 792, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "sraw", "$rA, $rS, $rB", IIC_IntShift,
- [(set i32:$rA, (PPCsra i32:$rS, i32:$rB))]>, SExt32To64;
+defm SLW : XForm_6r<31, 24, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
+ "slw", "$RA, $RST, $RB", IIC_IntGeneral,
+ [(set i32:$RA, (PPCshl i32:$RST, i32:$RB))]>, ZExt32To64;
+defm SRW : XForm_6r<31, 536, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
+ "srw", "$RA, $RST, $RB", IIC_IntGeneral,
+ [(set i32:$RA, (PPCsrl i32:$RST, i32:$RB))]>, ZExt32To64;
+defm SRAW : XForm_6rc<31, 792, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
+ "sraw", "$RA, $RST, $RB", IIC_IntShift,
+ [(set i32:$RA, (PPCsra i32:$RST, i32:$RB))]>, SExt32To64;
}
def : InstAlias<"mr $rA, $rB", (OR gprc:$rA, gprc:$rB, gprc:$rB)>;
@@ -2292,109 +2408,109 @@ def : InstAlias<"nop", (ORI R0, R0, 0)>;
let PPC970_Unit = 1 in { // FXU Operations.
let hasSideEffects = 0 in {
-defm SRAWI : XForm_10rc<31, 824, (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH),
- "srawi", "$rA, $rS, $SH", IIC_IntShift,
- [(set i32:$rA, (sra i32:$rS, (i32 imm:$SH)))]>,
+defm SRAWI : XForm_10rc<31, 824, (outs gprc:$RA), (ins gprc:$RST, u5imm:$RB),
+ "srawi", "$RA, $RST, $RB", IIC_IntShift,
+ [(set i32:$RA, (sra i32:$RST, (i32 imm:$RB)))]>,
SExt32To64;
-defm CNTLZW : XForm_11r<31, 26, (outs gprc:$rA), (ins gprc:$rS),
- "cntlzw", "$rA, $rS", IIC_IntGeneral,
- [(set i32:$rA, (ctlz i32:$rS))]>, ZExt32To64;
-defm CNTTZW : XForm_11r<31, 538, (outs gprc:$rA), (ins gprc:$rS),
- "cnttzw", "$rA, $rS", IIC_IntGeneral,
- [(set i32:$rA, (cttz i32:$rS))]>, Requires<[IsISA3_0]>,
+defm CNTLZW : XForm_11r<31, 26, (outs gprc:$RA), (ins gprc:$RST),
+ "cntlzw", "$RA, $RST", IIC_IntGeneral,
+ [(set i32:$RA, (ctlz i32:$RST))]>, ZExt32To64;
+defm CNTTZW : XForm_11r<31, 538, (outs gprc:$RA), (ins gprc:$RST),
+ "cnttzw", "$RA, $RST", IIC_IntGeneral,
+ [(set i32:$RA, (cttz i32:$RST))]>, Requires<[IsISA3_0]>,
ZExt32To64;
-defm EXTSB : XForm_11r<31, 954, (outs gprc:$rA), (ins gprc:$rS),
- "extsb", "$rA, $rS", IIC_IntSimple,
- [(set i32:$rA, (sext_inreg i32:$rS, i8))]>, SExt32To64;
-defm EXTSH : XForm_11r<31, 922, (outs gprc:$rA), (ins gprc:$rS),
- "extsh", "$rA, $rS", IIC_IntSimple,
- [(set i32:$rA, (sext_inreg i32:$rS, i16))]>, SExt32To64;
+defm EXTSB : XForm_11r<31, 954, (outs gprc:$RA), (ins gprc:$RST),
+ "extsb", "$RA, $RST", IIC_IntSimple,
+ [(set i32:$RA, (sext_inreg i32:$RST, i8))]>, SExt32To64;
+defm EXTSH : XForm_11r<31, 922, (outs gprc:$RA), (ins gprc:$RST),
+ "extsh", "$RA, $RST", IIC_IntSimple,
+ [(set i32:$RA, (sext_inreg i32:$RST, i16))]>, SExt32To64;
let isCommutable = 1 in
-def CMPB : XForm_6<31, 508, (outs gprc:$rA), (ins gprc:$rS, gprc:$rB),
- "cmpb $rA, $rS, $rB", IIC_IntGeneral,
- [(set i32:$rA, (PPCcmpb i32:$rS, i32:$rB))]>;
+def CMPB : XForm_6<31, 508, (outs gprc:$RA), (ins gprc:$RST, gprc:$RB),
+ "cmpb $RA, $RST, $RB", IIC_IntGeneral,
+ [(set i32:$RA, (PPCcmpb i32:$RST, i32:$RB))]>;
}
let isCompare = 1, hasSideEffects = 0 in {
- def CMPW : XForm_16_ext<31, 0, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
- "cmpw $crD, $rA, $rB", IIC_IntCompare>;
- def CMPLW : XForm_16_ext<31, 32, (outs crrc:$crD), (ins gprc:$rA, gprc:$rB),
- "cmplw $crD, $rA, $rB", IIC_IntCompare>;
+ def CMPW : XForm_16_ext<31, 0, (outs crrc:$BF), (ins gprc:$RA, gprc:$RB),
+ "cmpw $BF, $RA, $RB", IIC_IntCompare>;
+ def CMPLW : XForm_16_ext<31, 32, (outs crrc:$BF), (ins gprc:$RA, gprc:$RB),
+ "cmplw $BF, $RA, $RB", IIC_IntCompare>;
}
}
let PPC970_Unit = 3, Predicates = [HasFPU] in { // FPU Operations.
let isCompare = 1, mayRaiseFPException = 1, hasSideEffects = 0 in {
- def FCMPUS : XForm_17<63, 0, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
- "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
- def FCMPOS : XForm_17<63, 32, (outs crrc:$crD), (ins f4rc:$fA, f4rc:$fB),
- "fcmpo $crD, $fA, $fB", IIC_FPCompare>;
+ def FCMPUS : XForm_17<63, 0, (outs crrc:$BF), (ins f4rc:$RA, f4rc:$RB),
+ "fcmpu $BF, $RA, $RB", IIC_FPCompare>;
+ def FCMPOS : XForm_17<63, 32, (outs crrc:$BF), (ins f4rc:$RA, f4rc:$RB),
+ "fcmpo $BF, $RA, $RB", IIC_FPCompare>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
- def FCMPUD : XForm_17<63, 0, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
- "fcmpu $crD, $fA, $fB", IIC_FPCompare>;
- def FCMPOD : XForm_17<63, 32, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
- "fcmpo $crD, $fA, $fB", IIC_FPCompare>;
+ def FCMPUD : XForm_17<63, 0, (outs crrc:$BF), (ins f8rc:$RA, f8rc:$RB),
+ "fcmpu $BF, $RA, $RB", IIC_FPCompare>;
+ def FCMPOD : XForm_17<63, 32, (outs crrc:$BF), (ins f8rc:$RA, f8rc:$RB),
+ "fcmpo $BF, $RA, $RB", IIC_FPCompare>;
}
}
-def FTDIV: XForm_17<63, 128, (outs crrc:$crD), (ins f8rc:$fA, f8rc:$fB),
- "ftdiv $crD, $fA, $fB", IIC_FPCompare>;
-def FTSQRT: XForm_17a<63, 160, (outs crrc:$crD), (ins f8rc:$fB),
- "ftsqrt $crD, $fB", IIC_FPCompare,
- [(set i32:$crD, (PPCftsqrt f64:$fB))]>;
+def FTDIV: XForm_17<63, 128, (outs crrc:$BF), (ins f8rc:$RA, f8rc:$RB),
+ "ftdiv $BF, $RA, $RB", IIC_FPCompare>;
+def FTSQRT: XForm_17a<63, 160, (outs crrc:$BF), (ins f8rc:$RB),
+ "ftsqrt $BF, $RB", IIC_FPCompare,
+ [(set i32:$BF, (PPCftsqrt f64:$RB))]>;
let mayRaiseFPException = 1, hasSideEffects = 0 in {
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
- defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
- "frin", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (any_fround f64:$frB))]>;
- defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
- "frin", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (any_fround f32:$frB))]>;
+ defm FRIND : XForm_26r<63, 392, (outs f8rc:$RST), (ins f8rc:$RB),
+ "frin", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (any_fround f64:$RB))]>;
+ defm FRINS : XForm_26r<63, 392, (outs f4rc:$RST), (ins f4rc:$RB),
+ "frin", "$RST, $RB", IIC_FPGeneral,
+ [(set f32:$RST, (any_fround f32:$RB))]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
- defm FRIPD : XForm_26r<63, 456, (outs f8rc:$frD), (ins f8rc:$frB),
- "frip", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (any_fceil f64:$frB))]>;
- defm FRIPS : XForm_26r<63, 456, (outs f4rc:$frD), (ins f4rc:$frB),
- "frip", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (any_fceil f32:$frB))]>;
+ defm FRIPD : XForm_26r<63, 456, (outs f8rc:$RST), (ins f8rc:$RB),
+ "frip", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (any_fceil f64:$RB))]>;
+ defm FRIPS : XForm_26r<63, 456, (outs f4rc:$RST), (ins f4rc:$RB),
+ "frip", "$RST, $RB", IIC_FPGeneral,
+ [(set f32:$RST, (any_fceil f32:$RB))]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
- defm FRIZD : XForm_26r<63, 424, (outs f8rc:$frD), (ins f8rc:$frB),
- "friz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (any_ftrunc f64:$frB))]>;
- defm FRIZS : XForm_26r<63, 424, (outs f4rc:$frD), (ins f4rc:$frB),
- "friz", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (any_ftrunc f32:$frB))]>;
+ defm FRIZD : XForm_26r<63, 424, (outs f8rc:$RST), (ins f8rc:$RB),
+ "friz", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (any_ftrunc f64:$RB))]>;
+ defm FRIZS : XForm_26r<63, 424, (outs f4rc:$RST), (ins f4rc:$RB),
+ "friz", "$RST, $RB", IIC_FPGeneral,
+ [(set f32:$RST, (any_ftrunc f32:$RB))]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
- defm FRIMD : XForm_26r<63, 488, (outs f8rc:$frD), (ins f8rc:$frB),
- "frim", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (any_ffloor f64:$frB))]>;
- defm FRIMS : XForm_26r<63, 488, (outs f4rc:$frD), (ins f4rc:$frB),
- "frim", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (any_ffloor f32:$frB))]>;
+ defm FRIMD : XForm_26r<63, 488, (outs f8rc:$RST), (ins f8rc:$RB),
+ "frim", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (any_ffloor f64:$RB))]>;
+ defm FRIMS : XForm_26r<63, 488, (outs f4rc:$RST), (ins f4rc:$RB),
+ "frim", "$RST, $RB", IIC_FPGeneral,
+ [(set f32:$RST, (any_ffloor f32:$RB))]>;
}
let Uses = [RM], mayRaiseFPException = 1, hasSideEffects = 0 in {
- defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiw", "$frD, $frB", IIC_FPGeneral,
+ defm FCTIW : XForm_26r<63, 14, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fctiw", "$RST, $RB", IIC_FPGeneral,
[]>;
- defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiwu", "$frD, $frB", IIC_FPGeneral,
+ defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fctiwu", "$RST, $RB", IIC_FPGeneral,
[]>;
- defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
- "fctiwz", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCany_fctiwz f64:$frB))]>;
+ defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fctiwz", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (PPCany_fctiwz f64:$RB))]>;
- defm FRSP : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
- "frsp", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (any_fpround f64:$frB))]>;
+ defm FRSP : XForm_26r<63, 12, (outs f4rc:$RST), (ins f8rc:$RB),
+ "frsp", "$RST, $RB", IIC_FPGeneral,
+ [(set f32:$RST, (any_fpround f64:$RB))]>;
- defm FSQRT : XForm_26r<63, 22, (outs f8rc:$frD), (ins f8rc:$frB),
- "fsqrt", "$frD, $frB", IIC_FPSqrtD,
- [(set f64:$frD, (any_fsqrt f64:$frB))]>;
- defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$frD), (ins f4rc:$frB),
- "fsqrts", "$frD, $frB", IIC_FPSqrtS,
- [(set f32:$frD, (any_fsqrt f32:$frB))]>;
+ defm FSQRT : XForm_26r<63, 22, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fsqrt", "$RST, $RB", IIC_FPSqrtD,
+ [(set f64:$RST, (any_fsqrt f64:$RB))]>;
+ defm FSQRTS : XForm_26r<59, 22, (outs f4rc:$RST), (ins f4rc:$RB),
+ "fsqrts", "$RST, $RB", IIC_FPSqrtS,
+ [(set f32:$RST, (any_fsqrt f32:$RB))]>;
}
}
@@ -2405,57 +2521,57 @@ def : Pat<(PPCfsqrt f64:$frA), (FSQRT $frA)>;
/// that they will fill slots (which could cause the load of a LSU reject to
/// sneak into a d-group with a store).
let hasSideEffects = 0, Predicates = [HasFPU] in
-defm FMR : XForm_26r<63, 72, (outs f4rc:$frD), (ins f4rc:$frB),
- "fmr", "$frD, $frB", IIC_FPGeneral,
- []>, // (set f32:$frD, f32:$frB)
+defm FMR : XForm_26r<63, 72, (outs f4rc:$RST), (ins f4rc:$RB),
+ "fmr", "$RST, $RB", IIC_FPGeneral,
+ []>, // (set f32:$RST, f32:$RB)
PPC970_Unit_Pseudo;
let PPC970_Unit = 3, hasSideEffects = 0, Predicates = [HasFPU] in { // FPU Operations.
// These are artificially split into two different forms, for 4/8 byte FP.
-defm FABSS : XForm_26r<63, 264, (outs f4rc:$frD), (ins f4rc:$frB),
- "fabs", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (fabs f32:$frB))]>;
+defm FABSS : XForm_26r<63, 264, (outs f4rc:$RST), (ins f4rc:$RB),
+ "fabs", "$RST, $RB", IIC_FPGeneral,
+ [(set f32:$RST, (fabs f32:$RB))]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
-defm FABSD : XForm_26r<63, 264, (outs f8rc:$frD), (ins f8rc:$frB),
- "fabs", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (fabs f64:$frB))]>;
-defm FNABSS : XForm_26r<63, 136, (outs f4rc:$frD), (ins f4rc:$frB),
- "fnabs", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (fneg (fabs f32:$frB)))]>;
+defm FABSD : XForm_26r<63, 264, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fabs", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (fabs f64:$RB))]>;
+defm FNABSS : XForm_26r<63, 136, (outs f4rc:$RST), (ins f4rc:$RB),
+ "fnabs", "$RST, $RB", IIC_FPGeneral,
+ [(set f32:$RST, (fneg (fabs f32:$RB)))]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
-defm FNABSD : XForm_26r<63, 136, (outs f8rc:$frD), (ins f8rc:$frB),
- "fnabs", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (fneg (fabs f64:$frB)))]>;
-defm FNEGS : XForm_26r<63, 40, (outs f4rc:$frD), (ins f4rc:$frB),
- "fneg", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (fneg f32:$frB))]>;
+defm FNABSD : XForm_26r<63, 136, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fnabs", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (fneg (fabs f64:$RB)))]>;
+defm FNEGS : XForm_26r<63, 40, (outs f4rc:$RST), (ins f4rc:$RB),
+ "fneg", "$RST, $RB", IIC_FPGeneral,
+ [(set f32:$RST, (fneg f32:$RB))]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
-defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB),
- "fneg", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (fneg f64:$frB))]>;
+defm FNEGD : XForm_26r<63, 40, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fneg", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (fneg f64:$RB))]>;
-defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB),
- "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral,
- [(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>;
+defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$RST), (ins f4rc:$RA, f4rc:$RB),
+ "fcpsgn", "$RST, $RA, $RB", IIC_FPGeneral,
+ [(set f32:$RST, (fcopysign f32:$RB, f32:$RA))]>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in
-defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB),
- "fcpsgn", "$frD, $frA, $frB", IIC_FPGeneral,
- [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>;
+defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$RST), (ins f8rc:$RA, f8rc:$RB),
+ "fcpsgn", "$RST, $RA, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (fcopysign f64:$RB, f64:$RA))]>;
// Reciprocal estimates.
let mayRaiseFPException = 1 in {
-defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
- "fre", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfre f64:$frB))]>;
-defm FRES : XForm_26r<59, 24, (outs f4rc:$frD), (ins f4rc:$frB),
- "fres", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (PPCfre f32:$frB))]>;
-defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$frD), (ins f8rc:$frB),
- "frsqrte", "$frD, $frB", IIC_FPGeneral,
- [(set f64:$frD, (PPCfrsqrte f64:$frB))]>;
-defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$frD), (ins f4rc:$frB),
- "frsqrtes", "$frD, $frB", IIC_FPGeneral,
- [(set f32:$frD, (PPCfrsqrte f32:$frB))]>;
+defm FRE : XForm_26r<63, 24, (outs f8rc:$RST), (ins f8rc:$RB),
+ "fre", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (PPCfre f64:$RB))]>;
+defm FRES : XForm_26r<59, 24, (outs f4rc:$RST), (ins f4rc:$RB),
+ "fres", "$RST, $RB", IIC_FPGeneral,
+ [(set f32:$RST, (PPCfre f32:$RB))]>;
+defm FRSQRTE : XForm_26r<63, 26, (outs f8rc:$RST), (ins f8rc:$RB),
+ "frsqrte", "$RST, $RB", IIC_FPGeneral,
+ [(set f64:$RST, (PPCfrsqrte f64:$RB))]>;
+defm FRSQRTES : XForm_26r<59, 26, (outs f4rc:$RST), (ins f4rc:$RB),
+ "frsqrtes", "$RST, $RB", IIC_FPGeneral,
+ [(set f32:$RST, (PPCfrsqrte f32:$RB))]>;
}
}
@@ -2523,13 +2639,13 @@ def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD),
let isCodeGenOnly = 1 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
- "creqv $dst, $dst, $dst", IIC_BrCR,
- [(set i1:$dst, 1)]>;
+def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$CRD), (ins),
+ "creqv $CRD, $CRD, $CRD", IIC_BrCR,
+ [(set i1:$CRD, 1)]>;
-def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$dst), (ins),
- "crxor $dst, $dst, $dst", IIC_BrCR,
- [(set i1:$dst, 0)]>;
+def CRUNSET: XLForm_1_ext<19, 193, (outs crbitrc:$CRD), (ins),
+ "crxor $CRD, $CRD, $CRD", IIC_BrCR,
+ [(set i1:$CRD, 0)]>;
}
let Defs = [CR1EQ], CRD = 6 in {
@@ -2546,19 +2662,19 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
// XFX-Form instructions. Instructions that deal with SPRs.
//
-def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR),
- "mfspr $RT, $SPR", IIC_SprMFSPR>;
-def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
- "mtspr $SPR, $RT", IIC_SprMTSPR>;
+def MFSPR : XFXForm_1<31, 339, (outs gprc:$RST), (ins i32imm:$SPR),
+ "mfspr $RST, $SPR", IIC_SprMFSPR>;
+def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RST),
+ "mtspr $SPR, $RST", IIC_SprMTSPR>;
-def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
- "mftb $RT, $SPR", IIC_SprMFTB>;
+def MFTB : XFXForm_1<31, 371, (outs gprc:$RST), (ins i32imm:$SPR),
+ "mftb $RST, $SPR", IIC_SprMFTB>;
-def MFPMR : XFXForm_1<31, 334, (outs gprc:$RT), (ins i32imm:$SPR),
- "mfpmr $RT, $SPR", IIC_SprMFPMR>;
+def MFPMR : XFXForm_1<31, 334, (outs gprc:$RST), (ins i32imm:$SPR),
+ "mfpmr $RST, $SPR", IIC_SprMFPMR>;
-def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RT),
- "mtpmr $SPR, $RT", IIC_SprMTPMR>;
+def MTPMR : XFXForm_1<31, 462, (outs), (ins i32imm:$SPR, gprc:$RST),
+ "mtpmr $SPR, $RST", IIC_SprMTPMR>;
// A pseudo-instruction used to implement the read of the 64-bit cycle counter
@@ -2568,19 +2684,19 @@ def ReadTB : PPCCustomInserterPseudo<(outs gprc:$lo, gprc:$hi), (ins),
"#ReadTB", []>;
let Uses = [CTR] in {
-def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins),
- "mfctr $rT", IIC_SprMFSPR>,
+def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$RST), (ins),
+ "mfctr $RST", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-let Defs = [CTR], Pattern = [(PPCmtctr i32:$rS)] in {
-def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
- "mtctr $rS", IIC_SprMTSPR>,
+let Defs = [CTR], Pattern = [(PPCmtctr i32:$RST)] in {
+def MTCTR : XFXForm_1_ext<31, 467, 9, (outs), (ins gprc:$RST),
+ "mtctr $RST", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let hasSideEffects = 1, isCodeGenOnly = 1, isNotDuplicable = 1, Defs = [CTR] in {
-let Pattern = [(int_set_loop_iterations i32:$rS)] in
-def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
- "mtctr $rS", IIC_SprMTSPR>,
+let Pattern = [(int_set_loop_iterations i32:$RST)] in
+def MTCTRloop : XFXForm_1_ext<31, 467, 9, (outs), (ins gprc:$RST),
+ "mtctr $RST", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
@@ -2590,23 +2706,23 @@ def DecreaseCTRloop : PPCEmitTimePseudo<(outs crbitrc:$rT), (ins i32imm:$stride)
let hasSideEffects = 0 in {
let Defs = [LR] in {
-def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
- "mtlr $rS", IIC_SprMTSPR>,
+def MTLR : XFXForm_1_ext<31, 467, 8, (outs), (ins gprc:$RST),
+ "mtlr $RST", IIC_SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
let Uses = [LR] in {
-def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
- "mflr $rT", IIC_SprMFSPR>,
+def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$RST), (ins),
+ "mflr $RST", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
}
let hasSideEffects = 1 in {
- def MTUDSCR : XFXForm_7_ext<31, 467, 3, (outs), (ins gprc:$rX),
- "mtspr 3, $rX", IIC_SprMTSPR>,
+ def MTUDSCR : XFXForm_1_ext<31, 467, 3, (outs), (ins gprc:$RST),
+ "mtspr 3, $RST", IIC_SprMTSPR>,
PPC970_DGroup_Single, PPC970_Unit_FXU;
- def MFUDSCR : XFXForm_1_ext<31, 339, 3, (outs gprc:$rX), (ins),
- "mfspr $rX, 3", IIC_SprMFSPR>,
+ def MFUDSCR : XFXForm_1_ext<31, 339, 3, (outs gprc:$RST), (ins),
+ "mfspr $RST, 3", IIC_SprMFSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
@@ -2621,20 +2737,20 @@ let isCodeGenOnly = 1 in {
// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed
// like a GPR on the PPC970. As such, copies in and out have the same
// performance characteristics as an OR instruction.
- def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS),
- "mtspr 256, $rS", IIC_IntGeneral>,
+ def MTVRSAVE : XFXForm_1_ext<31, 467, 256, (outs), (ins gprc:$RST),
+ "mtspr 256, $RST", IIC_IntGeneral>,
PPC970_DGroup_Single, PPC970_Unit_FXU;
- def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins),
- "mfspr $rT, 256", IIC_IntGeneral>,
+ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$RST), (ins),
+ "mfspr $RST, 256", IIC_IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
- def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
- (outs VRSAVERC:$reg), (ins gprc:$rS),
- "mtspr 256, $rS", IIC_IntGeneral>,
+ def MTVRSAVEv : XFXForm_1_ext<31, 467, 256,
+ (outs VRSAVERC:$SPR), (ins gprc:$RST),
+ "mtspr 256, $RST", IIC_IntGeneral>,
PPC970_DGroup_Single, PPC970_Unit_FXU;
- def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT),
- (ins VRSAVERC:$reg),
- "mfspr $rT, 256", IIC_IntGeneral>,
+ def MFVRSAVEv : XFXForm_1_ext<31, 339, 256, (outs gprc:$RST),
+ (ins VRSAVERC:$SPR),
+ "mfspr $RST, 256", IIC_IntGeneral>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
@@ -2647,14 +2763,14 @@ let hasSideEffects = 0 in {
// on the cr register selected. Thus, post-ra anti-dep breaking must not
// later change that register assignment.
let hasExtraDefRegAllocReq = 1 in {
-def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST),
- "mtocrf $FXM, $ST", IIC_BrMCRX>,
+def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$RST),
+ "mtocrf $FXM, $RST", IIC_BrMCRX>,
PPC970_DGroup_First, PPC970_Unit_CRU;
// Similarly to mtocrf, the mask for mtcrf must be prepared in a way that
// is dependent on the cr fields being set.
-def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS),
- "mtcrf $FXM, $rS", IIC_BrMCRX>,
+def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$RST),
+ "mtcrf $FXM, $RST", IIC_BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
} // hasExtraDefRegAllocReq = 1
@@ -2662,14 +2778,14 @@ def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS),
// on the cr register selected. Thus, post-ra anti-dep breaking must not
// later change that register assignment.
let hasExtraSrcRegAllocReq = 1 in {
-def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
- "mfocrf $rT, $FXM", IIC_SprMFCRF>,
+def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$RST), (ins crbitm:$FXM),
+ "mfocrf $RST, $FXM", IIC_SprMFCRF>,
PPC970_DGroup_First, PPC970_Unit_CRU;
// Similarly to mfocrf, the mask for mfcrf must be prepared in a way that
// is dependent on the cr fields being copied.
-def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
- "mfcr $rT", IIC_SprMFCR>,
+def MFCR : XFXForm_3<31, 19, (outs gprc:$RT), (ins),
+ "mfcr $RT", IIC_SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
} // hasExtraSrcRegAllocReq = 1
@@ -2704,136 +2820,136 @@ def MTFSB1 : XForm_43<63, 38, (outs), (ins u5imm:$FM),
let Defs = [RM], hasSideEffects = 1 in {
let isCodeGenOnly = 1 in
- def MTFSFb : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$rT),
- "mtfsf $FM, $rT", IIC_IntMTFSB0,
- [(int_ppc_mtfsf timm:$FM, f64:$rT)]>,
+ def MTFSFb : XFLForm<63, 711, (outs), (ins i32imm:$FM, f8rc:$RT),
+ "mtfsf $FM, $RT", IIC_IntMTFSB0,
+ [(int_ppc_mtfsf timm:$FM, f64:$RT)]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
let Uses = [RM], hasSideEffects = 1 in {
- def MFFS : XForm_42<63, 583, (outs f8rc:$rT), (ins),
- "mffs $rT", IIC_IntMFFS,
- [(set f64:$rT, (PPCmffs))]>,
+ def MFFS : XForm_42<63, 583, (outs f8rc:$RST), (ins),
+ "mffs $RST", IIC_IntMFFS,
+ [(set f64:$RST, (PPCmffs))]>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
let Defs = [CR1] in
- def MFFS_rec : XForm_42<63, 583, (outs f8rc:$rT), (ins),
- "mffs. $rT", IIC_IntMFFS, []>, isRecordForm;
+ def MFFS_rec : XForm_42<63, 583, (outs f8rc:$RST), (ins),
+ "mffs. $RST", IIC_IntMFFS, []>, isRecordForm;
- def MFFSCE : X_FRT5_XO2_XO3_XO10<63, 0, 1, 583, (outs f8rc:$rT), (ins),
- "mffsce $rT", IIC_IntMFFS, []>,
+ def MFFSCE : X_FRT5_XO2_XO3_XO10<63, 0, 1, 583, (outs f8rc:$RST), (ins),
+ "mffsce $RST", IIC_IntMFFS, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
- def MFFSCDRN : X_FRT5_XO2_XO3_FRB5_XO10<63, 2, 4, 583, (outs f8rc:$rT),
- (ins f8rc:$FRB), "mffscdrn $rT, $FRB",
+ def MFFSCDRN : X_FRT5_XO2_XO3_FRB5_XO10<63, 2, 4, 583, (outs f8rc:$RST),
+ (ins f8rc:$FRB), "mffscdrn $RST, $FRB",
IIC_IntMFFS, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
- def MFFSCDRNI : X_FRT5_XO2_XO3_DRM3_XO10<63, 2, 5, 583, (outs f8rc:$rT),
+ def MFFSCDRNI : X_FRT5_XO2_XO3_DRM3_XO10<63, 2, 5, 583, (outs f8rc:$RST),
(ins u3imm:$DRM),
- "mffscdrni $rT, $DRM",
+ "mffscdrni $RST, $DRM",
IIC_IntMFFS, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
- def MFFSCRN : X_FRT5_XO2_XO3_FRB5_XO10<63, 2, 6, 583, (outs f8rc:$rT),
- (ins f8rc:$FRB), "mffscrn $rT, $FRB",
+ def MFFSCRN : X_FRT5_XO2_XO3_FRB5_XO10<63, 2, 6, 583, (outs f8rc:$RST),
+ (ins f8rc:$FRB), "mffscrn $RST, $FRB",
IIC_IntMFFS, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
- def MFFSCRNI : X_FRT5_XO2_XO3_RM2_X10<63, 2, 7, 583, (outs f8rc:$rT),
- (ins u2imm:$RM), "mffscrni $rT, $RM",
+ def MFFSCRNI : X_FRT5_XO2_XO3_RM2_X10<63, 2, 7, 583, (outs f8rc:$RST),
+ (ins u2imm:$RM), "mffscrni $RST, $RM",
IIC_IntMFFS, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
- def MFFSL : X_FRT5_XO2_XO3_XO10<63, 3, 0, 583, (outs f8rc:$rT), (ins),
- "mffsl $rT", IIC_IntMFFS, []>,
+ def MFFSL : X_FRT5_XO2_XO3_XO10<63, 3, 0, 583, (outs f8rc:$RST), (ins),
+ "mffsl $RST", IIC_IntMFFS, []>,
PPC970_DGroup_Single, PPC970_Unit_FPU;
}
}
let Predicates = [IsISA3_0] in {
-def MODSW : XForm_8<31, 779, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "modsw $rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (srem i32:$rA, i32:$rB))]>;
-def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "moduw $rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (urem i32:$rA, i32:$rB))]>;
+def MODSW : XForm_8<31, 779, (outs gprc:$RST), (ins gprc:$RA, gprc:$RB),
+ "modsw $RST, $RA, $RB", IIC_IntDivW,
+ [(set i32:$RST, (srem i32:$RA, i32:$RB))]>;
+def MODUW : XForm_8<31, 267, (outs gprc:$RST), (ins gprc:$RA, gprc:$RB),
+ "moduw $RST, $RA, $RB", IIC_IntDivW,
+ [(set i32:$RST, (urem i32:$RA, i32:$RB))]>;
let hasSideEffects = 1 in
-def ADDEX : Z23Form_RTAB5_CY2<31, 170, (outs gprc:$rT),
- (ins gprc:$rA, gprc:$rB, u2imm:$CY),
- "addex $rT, $rA, $rB, $CY", IIC_IntGeneral, []>;
+def ADDEX : Z23Form_RTAB5_CY2<31, 170, (outs gprc:$RT),
+ (ins gprc:$RA, gprc:$RB, u2imm:$CY),
+ "addex $RT, $RA, $RB, $CY", IIC_IntGeneral, []>;
}
let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations.
// XO-Form instructions. Arithmetic instructions that can set overflow bit
let isCommutable = 1 in
-defm ADD4 : XOForm_1rx<31, 266, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "add", "$rT, $rA, $rB", IIC_IntSimple,
- [(set i32:$rT, (add i32:$rA, i32:$rB))]>;
+defm ADD4 : XOForm_1rx<31, 266, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "add", "$RT, $RA, $RB", IIC_IntSimple,
+ [(set i32:$RT, (add i32:$RA, i32:$RB))]>;
let isCodeGenOnly = 1 in
-def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$rT), (ins gprc:$rA, tlsreg32:$rB),
- "add $rT, $rA, $rB", IIC_IntSimple,
- [(set i32:$rT, (add i32:$rA, tglobaltlsaddr:$rB))]>;
+def ADD4TLS : XOForm_1<31, 266, 0, (outs gprc:$RT), (ins gprc:$RA, tlsreg32:$RB),
+ "add $RT, $RA, $RB", IIC_IntSimple,
+ [(set i32:$RT, (add i32:$RA, tglobaltlsaddr:$RB))]>;
let isCommutable = 1 in
-defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "addc", "$rT, $rA, $rB", IIC_IntGeneral,
- [(set i32:$rT, (addc i32:$rA, i32:$rB))]>,
+defm ADDC : XOForm_1rc<31, 10, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "addc", "$RT, $RA, $RB", IIC_IntGeneral,
+ [(set i32:$RT, (addc i32:$RA, i32:$RB))]>,
PPC970_DGroup_Cracked;
-defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divw", "$rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (sdiv i32:$rA, i32:$rB))]>;
-defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divwu", "$rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (udiv i32:$rA, i32:$rB))]>;
-defm DIVWE : XOForm_1rcr<31, 427, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divwe", "$rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (int_ppc_divwe gprc:$rA, gprc:$rB))]>,
+defm DIVW : XOForm_1rcr<31, 491, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "divw", "$RT, $RA, $RB", IIC_IntDivW,
+ [(set i32:$RT, (sdiv i32:$RA, i32:$RB))]>;
+defm DIVWU : XOForm_1rcr<31, 459, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "divwu", "$RT, $RA, $RB", IIC_IntDivW,
+ [(set i32:$RT, (udiv i32:$RA, i32:$RB))]>;
+defm DIVWE : XOForm_1rcr<31, 427, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "divwe", "$RT, $RA, $RB", IIC_IntDivW,
+ [(set i32:$RT, (int_ppc_divwe gprc:$RA, gprc:$RB))]>,
Requires<[HasExtDiv]>;
-defm DIVWEU : XOForm_1rcr<31, 395, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "divweu", "$rT, $rA, $rB", IIC_IntDivW,
- [(set i32:$rT, (int_ppc_divweu gprc:$rA, gprc:$rB))]>,
+defm DIVWEU : XOForm_1rcr<31, 395, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "divweu", "$RT, $RA, $RB", IIC_IntDivW,
+ [(set i32:$RT, (int_ppc_divweu gprc:$RA, gprc:$RB))]>,
Requires<[HasExtDiv]>;
let isCommutable = 1 in {
-defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "mulhw", "$rT, $rA, $rB", IIC_IntMulHW,
- [(set i32:$rT, (mulhs i32:$rA, i32:$rB))]>;
-defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "mulhwu", "$rT, $rA, $rB", IIC_IntMulHWU,
- [(set i32:$rT, (mulhu i32:$rA, i32:$rB))]>;
-defm MULLW : XOForm_1rx<31, 235, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "mullw", "$rT, $rA, $rB", IIC_IntMulHW,
- [(set i32:$rT, (mul i32:$rA, i32:$rB))]>;
+defm MULHW : XOForm_1r<31, 75, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "mulhw", "$RT, $RA, $RB", IIC_IntMulHW,
+ [(set i32:$RT, (mulhs i32:$RA, i32:$RB))]>;
+defm MULHWU : XOForm_1r<31, 11, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "mulhwu", "$RT, $RA, $RB", IIC_IntMulHWU,
+ [(set i32:$RT, (mulhu i32:$RA, i32:$RB))]>;
+defm MULLW : XOForm_1rx<31, 235, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "mullw", "$RT, $RA, $RB", IIC_IntMulHW,
+ [(set i32:$RT, (mul i32:$RA, i32:$RB))]>;
} // isCommutable
-defm SUBF : XOForm_1rx<31, 40, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "subf", "$rT, $rA, $rB", IIC_IntGeneral,
- [(set i32:$rT, (sub i32:$rB, i32:$rA))]>;
-defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "subfc", "$rT, $rA, $rB", IIC_IntGeneral,
- [(set i32:$rT, (subc i32:$rB, i32:$rA))]>,
+defm SUBF : XOForm_1rx<31, 40, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "subf", "$RT, $RA, $RB", IIC_IntGeneral,
+ [(set i32:$RT, (sub i32:$RB, i32:$RA))]>;
+defm SUBFC : XOForm_1rc<31, 8, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "subfc", "$RT, $RA, $RB", IIC_IntGeneral,
+ [(set i32:$RT, (subc i32:$RB, i32:$RA))]>,
PPC970_DGroup_Cracked;
-defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$rT), (ins gprc:$rA),
- "neg", "$rT, $rA", IIC_IntSimple,
- [(set i32:$rT, (ineg i32:$rA))]>;
+defm NEG : XOForm_3r<31, 104, 0, (outs gprc:$RT), (ins gprc:$RA),
+ "neg", "$RT, $RA", IIC_IntSimple,
+ [(set i32:$RT, (ineg i32:$RA))]>;
let Uses = [CARRY] in {
let isCommutable = 1 in
-defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "adde", "$rT, $rA, $rB", IIC_IntGeneral,
- [(set i32:$rT, (adde i32:$rA, i32:$rB))]>;
-defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$rT), (ins gprc:$rA),
- "addme", "$rT, $rA", IIC_IntGeneral,
- [(set i32:$rT, (adde i32:$rA, -1))]>;
-defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$rT), (ins gprc:$rA),
- "addze", "$rT, $rA", IIC_IntGeneral,
- [(set i32:$rT, (adde i32:$rA, 0))]>;
-defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB),
- "subfe", "$rT, $rA, $rB", IIC_IntGeneral,
- [(set i32:$rT, (sube i32:$rB, i32:$rA))]>;
-defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$rT), (ins gprc:$rA),
- "subfme", "$rT, $rA", IIC_IntGeneral,
- [(set i32:$rT, (sube -1, i32:$rA))]>;
-defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$rT), (ins gprc:$rA),
- "subfze", "$rT, $rA", IIC_IntGeneral,
- [(set i32:$rT, (sube 0, i32:$rA))]>;
+defm ADDE : XOForm_1rc<31, 138, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "adde", "$RT, $RA, $RB", IIC_IntGeneral,
+ [(set i32:$RT, (adde i32:$RA, i32:$RB))]>;
+defm ADDME : XOForm_3rc<31, 234, 0, (outs gprc:$RT), (ins gprc:$RA),
+ "addme", "$RT, $RA", IIC_IntGeneral,
+ [(set i32:$RT, (adde i32:$RA, -1))]>;
+defm ADDZE : XOForm_3rc<31, 202, 0, (outs gprc:$RT), (ins gprc:$RA),
+ "addze", "$RT, $RA", IIC_IntGeneral,
+ [(set i32:$RT, (adde i32:$RA, 0))]>;
+defm SUBFE : XOForm_1rc<31, 136, 0, (outs gprc:$RT), (ins gprc:$RA, gprc:$RB),
+ "subfe", "$RT, $RA, $RB", IIC_IntGeneral,
+ [(set i32:$RT, (sube i32:$RB, i32:$RA))]>;
+defm SUBFME : XOForm_3rc<31, 232, 0, (outs gprc:$RT), (ins gprc:$RA),
+ "subfme", "$RT, $RA", IIC_IntGeneral,
+ [(set i32:$RT, (sube -1, i32:$RA))]>;
+defm SUBFZE : XOForm_3rc<31, 200, 0, (outs gprc:$RT), (ins gprc:$RA),
+ "subfze", "$RT, $RA", IIC_IntGeneral,
+ [(set i32:$RT, (sube 0, i32:$RA))]>;
}
}
@@ -2945,8 +3061,8 @@ let hasSideEffects = 0 in {
let PPC970_Unit = 1 in { // FXU Operations.
let isSelect = 1 in
def ISEL : AForm_4<31, 15,
- (outs gprc:$rT), (ins gprc_nor0:$rA, gprc:$rB, crbitrc:$cond),
- "isel $rT, $rA, $rB, $cond", IIC_IntISEL,
+ (outs gprc:$RT), (ins gprc_nor0:$RA, gprc:$RB, crbitrc:$COND),
+ "isel $RT, $RA, $RB, $COND", IIC_IntISEL,
[]>;
}
@@ -2955,26 +3071,26 @@ let PPC970_Unit = 1 in { // FXU Operations.
//
let isCommutable = 1 in {
// RLWIMI can be commuted if the rotate amount is zero.
-defm RLWIMI : MForm_2r<20, (outs gprc:$rA),
- (ins gprc:$rSi, gprc:$rS, u5imm:$SH, u5imm:$MB,
- u5imm:$ME), "rlwimi", "$rA, $rS, $SH, $MB, $ME",
+defm RLWIMI : MForm_2r<20, (outs gprc:$RA),
+ (ins gprc:$RAi, gprc:$RS, u5imm:$SH, u5imm:$MB,
+ u5imm:$ME), "rlwimi", "$RA, $RS, $SH, $MB, $ME",
IIC_IntRotate, []>, PPC970_DGroup_Cracked,
- RegConstraint<"$rSi = $rA">, NoEncode<"$rSi">;
+ RegConstraint<"$RAi = $RA">, NoEncode<"$RAi">;
}
let BaseName = "rlwinm" in {
def RLWINM : MForm_2<21,
- (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
- "rlwinm $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
+ (outs gprc:$RA), (ins gprc:$RS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm $RA, $RS, $SH, $MB, $ME", IIC_IntGeneral,
[]>, RecFormRel;
let Defs = [CR0] in
def RLWINM_rec : MForm_2<21,
- (outs gprc:$rA), (ins gprc:$rS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
- "rlwinm. $rA, $rS, $SH, $MB, $ME", IIC_IntGeneral,
+ (outs gprc:$RA), (ins gprc:$RS, u5imm:$SH, u5imm:$MB, u5imm:$ME),
+ "rlwinm. $RA, $RS, $SH, $MB, $ME", IIC_IntGeneral,
[]>, isRecordForm, RecFormRel, PPC970_DGroup_Cracked;
}
-defm RLWNM : MForm_2r<23, (outs gprc:$rA),
- (ins gprc:$rS, gprc:$rB, u5imm:$MB, u5imm:$ME),
- "rlwnm", "$rA, $rS, $rB, $MB, $ME", IIC_IntGeneral,
+defm RLWNM : MForm_1r<23, (outs gprc:$RA),
+ (ins gprc:$RS, gprc:$RB, u5imm:$MB, u5imm:$ME),
+ "rlwnm", "$RA, $RS, $RB, $MB, $ME", IIC_IntGeneral,
[]>;
}
} // hasSideEffects = 0
@@ -3125,6 +3241,24 @@ def GETtlsADDR32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins gprc:$offset, gprc
"GETtlsADDR32AIX",
[(set i32:$rD,
(PPCgetTlsAddr i32:$offset, i32:$handle))]>;
+
+// For local-exec accesses on 32-bit AIX, a call to .__get_tpointer is
+// generated to retrieve the thread pointer. GETtlsTpointer32AIX clobbers both
+// R3 and the LR (link register).
+let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
+ Defs = [R3,LR] in
+def GETtlsTpointer32AIX : PPCEmitTimePseudo<(outs gprc:$rD), (ins),
+ "GETtlsTpointer32AIX",
+ [(set i32:$rD, (PPCgetTpointer))]>;
+
+// The following pattern matches local-exec TLS accesses on 32-bit AIX.
+// PPCaddTls is used in local-exec accesses in order to:
+// - Get the address of a variable (add the variable offset to the thread
+// pointer, retrieved by calling .__get_tpointer).
+// - Create an opportunity to optimize the user of the loaded address.
+def : Pat<(PPCaddTls i32:$in, i32:$addr),
+ (ADD4TLS $in, $addr)>;
+
// Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded. R3 and LR
// are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
@@ -3300,6 +3434,7 @@ include "PPCInstrP10.td"
include "PPCInstrFutureMMA.td"
include "PPCInstrFuture.td"
include "PPCInstrMMA.td"
+include "PPCInstrDFP.td"
// Patterns for arithmetic i1 operations.
def : Pat<(add i1:$a, i1:$b),
@@ -3327,9 +3462,9 @@ def : Pat<(i64 (sext i1:$in)),
// FIXME: We should choose either a zext or a sext based on other constants
// already around.
def : Pat<(i32 (anyext i1:$in)),
- (SELECT_I4 crbitrc:$in, (LI 1), (LI 0))>;
+ (SELECT_I4 $in, (LI 1), (LI 0))>;
def : Pat<(i64 (anyext i1:$in)),
- (SELECT_I8 crbitrc:$in, (LI8 1), (LI8 0))>;
+ (SELECT_I8 $in, (LI8 1), (LI8 0))>;
// match setcc on i1 variables.
// CRANDC is:
@@ -3735,34 +3870,34 @@ defm : CRNotPat<(i1 (setcc i64:$s1, i64:$s2, SETNE)),
multiclass FSetCCPat<SDPatternOperator SetCC, ValueType Ty, I FCmp> {
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUGE)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_lt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGE)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_lt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETULE)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_gt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLE)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_gt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUNE)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_eq)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETNE)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_eq)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
defm : CRNotPat<(i1 (SetCC Ty:$s1, Ty:$s2, SETO)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_un)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOLT)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_lt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETLT)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_lt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_lt)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOGT)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_gt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETGT)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_gt)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_gt)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETOEQ)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_eq)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETEQ)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_eq)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_eq)>;
def : Pat<(i1 (SetCC Ty:$s1, Ty:$s2, SETUO)),
- (EXTRACT_SUBREG (FCmp Ty:$s1, Ty:$s2), sub_un)>;
+ (EXTRACT_SUBREG (FCmp $s1, $s2), sub_un)>;
}
let Predicates = [HasFPU] in {
@@ -4043,21 +4178,21 @@ def : Pat<(int_ppc_frsqrtes f4rc:$frB), (FRSQRTES $frB)>;
// FIXME: For B=0 or B > 8, the registers following RT are used.
// WARNING: Do not add patterns for this instruction without fixing this.
-def LSWI : XForm_base_r3xo_memOp<31, 597, (outs gprc:$RT),
- (ins gprc:$A, u5imm:$B),
- "lswi $RT, $A, $B", IIC_LdStLoad, []>;
+def LSWI : XForm_base_r3xo_memOp<31, 597, (outs gprc:$RST),
+ (ins gprc:$RA, u5imm:$RB),
+ "lswi $RST, $RA, $RB", IIC_LdStLoad, []>;
// FIXME: For B=0 or B > 8, the registers following RT are used.
// WARNING: Do not add patterns for this instruction without fixing this.
def STSWI : XForm_base_r3xo_memOp<31, 725, (outs),
- (ins gprc:$RT, gprc:$A, u5imm:$B),
- "stswi $RT, $A, $B", IIC_LdStLoad, []>;
+ (ins gprc:$RST, gprc:$RA, u5imm:$RB),
+ "stswi $RST, $RA, $RB", IIC_LdStLoad, []>;
def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
"isync", IIC_SprISYNC, []>;
-def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
- "icbi $src", IIC_LdStICBI, []>;
+def ICBI : XForm_1a<31, 982, (outs), (ins (memrr $RA, $RB):$addr),
+ "icbi $addr", IIC_LdStICBI, []>;
def WAIT : XForm_24_sync<31, 30, (outs), (ins u2imm:$L),
"wait $L", IIC_LdStLoad, []>;
@@ -4093,18 +4228,18 @@ def WRTEEI: I<31, (outs), (ins i1imm:$E), "wrteei $E", IIC_SprMTMSR>,
let Inst{21-30} = 163;
}
-def DCCCI : XForm_tlb<454, (outs), (ins gprc:$A, gprc:$B),
- "dccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
-def ICCCI : XForm_tlb<966, (outs), (ins gprc:$A, gprc:$B),
- "iccci $A, $B", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
+def DCCCI : XForm_tlb<454, (outs), (ins gprc:$RA, gprc:$RB),
+ "dccci $RA, $RB", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
+def ICCCI : XForm_tlb<966, (outs), (ins gprc:$RA, gprc:$RB),
+ "iccci $RA, $RB", IIC_LdStLoad>, Requires<[IsPPC4xx]>;
def : InstAlias<"dci 0", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>;
def : InstAlias<"dccci", (DCCCI R0, R0)>, Requires<[IsPPC4xx]>;
def : InstAlias<"ici 0", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
def : InstAlias<"iccci", (ICCCI R0, R0)>, Requires<[IsPPC4xx]>;
-def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
- "mfmsr $RT", IIC_SprMFMSR, []>;
+def MFMSR : XForm_rs<31, 83, (outs gprc:$RST), (ins),
+ "mfmsr $RST", IIC_SprMFMSR, []>;
def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, u1imm:$L),
"mtmsrd $RS, $L", IIC_SprMTMSRD>;
@@ -4144,11 +4279,11 @@ def : InstAlias<"mtfsf. $FLM, $FRB", (MTFSF_rec i32imm:$FLM, f8rc:$FRB, 0, 0)>;
def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
"slbie $RB", IIC_SprSLBIE, []>;
-def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB),
- "slbmte $RS, $RB", IIC_SprSLBMTE, []>;
+def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RST, gprc:$RB),
+ "slbmte $RST, $RB", IIC_SprSLBMTE, []>;
-def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
- "slbmfee $RT, $RB", IIC_SprSLBMFEE, []>;
+def SLBMFEE : XForm_26<31, 915, (outs gprc:$RST), (ins gprc:$RB),
+ "slbmfee $RST, $RB", IIC_SprSLBMFEE, []>;
def SLBMFEV : XLForm_1_gen<31, 851, (outs gprc:$RT), (ins gprc:$RB),
"slbmfev $RT, $RB", IIC_SprSLBMFEV, []>;
@@ -4156,8 +4291,8 @@ def SLBMFEV : XLForm_1_gen<31, 851, (outs gprc:$RT), (ins gprc:$RB),
def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
let Defs = [CR0] in
-def SLBFEE_rec : XForm_26<31, 979, (outs gprc:$RT), (ins gprc:$RB),
- "slbfee. $RT, $RB", IIC_SprSLBFEE, []>, isRecordForm;
+def SLBFEE_rec : XForm_26<31, 979, (outs gprc:$RST), (ins gprc:$RB),
+ "slbfee. $RST, $RB", IIC_SprSLBFEE, []>, isRecordForm;
def TLBIA : XForm_0<31, 370, (outs), (ins),
"tlbia", IIC_SprTLBIA, []>;
@@ -4173,13 +4308,13 @@ def TLBLD : XForm_16b<31, 978, (outs), (ins gprc:$RB),
def TLBLI : XForm_16b<31, 1010, (outs), (ins gprc:$RB),
"tlbli $RB", IIC_LdStLoad, []>, Requires<[IsPPC6xx]>;
-def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB),
- "tlbie $RB,$RS", IIC_SprTLBIE, []>;
+def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RST, gprc:$RB),
+ "tlbie $RB,$RST", IIC_SprTLBIE, []>;
-def TLBSX : XForm_tlb<914, (outs), (ins gprc:$A, gprc:$B), "tlbsx $A, $B",
+def TLBSX : XForm_tlb<914, (outs), (ins gprc:$RA, gprc:$RB), "tlbsx $RA, $RB",
IIC_LdStLoad>, Requires<[IsBookE]>;
-def TLBIVAX : XForm_tlb<786, (outs), (ins gprc:$A, gprc:$B), "tlbivax $A, $B",
+def TLBIVAX : XForm_tlb<786, (outs), (ins gprc:$RA, gprc:$RB), "tlbivax $RA, $RB",
IIC_LdStLoad>, Requires<[IsBookE]>;
def TLBRE : XForm_24_eieio<31, 946, (outs), (ins),
@@ -4188,18 +4323,18 @@ def TLBRE : XForm_24_eieio<31, 946, (outs), (ins),
def TLBWE : XForm_24_eieio<31, 978, (outs), (ins),
"tlbwe", IIC_LdStLoad, []>, Requires<[IsBookE]>;
-def TLBRE2 : XForm_tlbws<31, 946, (outs gprc:$RS), (ins gprc:$A, i1imm:$WS),
- "tlbre $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
+def TLBRE2 : XForm_tlbws<31, 946, (outs gprc:$RST), (ins gprc:$RA, i1imm:$WS),
+ "tlbre $RST, $RA, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
-def TLBWE2 : XForm_tlbws<31, 978, (outs), (ins gprc:$RS, gprc:$A, i1imm:$WS),
- "tlbwe $RS, $A, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
+def TLBWE2 : XForm_tlbws<31, 978, (outs), (ins gprc:$RST, gprc:$RA, i1imm:$WS),
+ "tlbwe $RST, $RA, $WS", IIC_LdStLoad, []>, Requires<[IsPPC4xx]>;
-def TLBSX2 : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$A, gprc:$B),
- "tlbsx $RST, $A, $B", IIC_LdStLoad, []>,
+def TLBSX2 : XForm_base_r3xo<31, 914, (outs), (ins gprc:$RST, gprc:$RA, gprc:$RB),
+ "tlbsx $RST, $RA, $RB", IIC_LdStLoad, []>,
Requires<[IsPPC4xx]>;
def TLBSX2D : XForm_base_r3xo<31, 914, (outs),
- (ins gprc:$RST, gprc:$A, gprc:$B),
- "tlbsx. $RST, $A, $B", IIC_LdStLoad, []>,
+ (ins gprc:$RST, gprc:$RA, gprc:$RB),
+ "tlbsx. $RST, $RA, $RB", IIC_LdStLoad, []>,
Requires<[IsPPC4xx]>, isRecordForm;
def RFID : XForm_0<19, 18, (outs), (ins), "rfid", IIC_IntRFID, []>;
@@ -4214,10 +4349,10 @@ def RFDI : XForm_0<19, 39, (outs), (ins), "rfdi", IIC_BrB, []>,
def RFMCI : XForm_0<19, 38, (outs), (ins), "rfmci", IIC_BrB, []>,
Requires<[IsE500]>;
-def MFDCR : XFXForm_1<31, 323, (outs gprc:$RT), (ins i32imm:$SPR),
- "mfdcr $RT, $SPR", IIC_SprMFSPR>, Requires<[IsPPC4xx]>;
-def MTDCR : XFXForm_1<31, 451, (outs), (ins gprc:$RT, i32imm:$SPR),
- "mtdcr $SPR, $RT", IIC_SprMTSPR>, Requires<[IsPPC4xx]>;
+def MFDCR : XFXForm_1<31, 323, (outs gprc:$RST), (ins i32imm:$SPR),
+ "mfdcr $RST, $SPR", IIC_SprMFSPR>, Requires<[IsPPC4xx]>;
+def MTDCR : XFXForm_1<31, 451, (outs), (ins gprc:$RST, i32imm:$SPR),
+ "mtdcr $SPR, $RST", IIC_SprMTSPR>, Requires<[IsPPC4xx]>;
def HRFID : XLForm_1_np<19, 274, (outs), (ins), "hrfid", IIC_BrB, []>;
def NAP : XLForm_1_np<19, 434, (outs), (ins), "nap", IIC_BrB, []>;
@@ -4225,86 +4360,86 @@ def NAP : XLForm_1_np<19, 434, (outs), (ins), "nap", IIC_BrB, []>;
def ATTN : XForm_attn<0, 256, (outs), (ins), "attn", IIC_BrB>;
def LBZCIX : XForm_base_r3xo_memOp<31, 853, (outs gprc:$RST),
- (ins gprc:$A, gprc:$B),
- "lbzcix $RST, $A, $B", IIC_LdStLoad, []>;
+ (ins gprc:$RA, gprc:$RB),
+ "lbzcix $RST, $RA, $RB", IIC_LdStLoad, []>;
def LHZCIX : XForm_base_r3xo_memOp<31, 821, (outs gprc:$RST),
- (ins gprc:$A, gprc:$B),
- "lhzcix $RST, $A, $B", IIC_LdStLoad, []>;
+ (ins gprc:$RA, gprc:$RB),
+ "lhzcix $RST, $RA, $RB", IIC_LdStLoad, []>;
def LWZCIX : XForm_base_r3xo_memOp<31, 789, (outs gprc:$RST),
- (ins gprc:$A, gprc:$B),
- "lwzcix $RST, $A, $B", IIC_LdStLoad, []>;
+ (ins gprc:$RA, gprc:$RB),
+ "lwzcix $RST, $RA, $RB", IIC_LdStLoad, []>;
def LDCIX : XForm_base_r3xo_memOp<31, 885, (outs gprc:$RST),
- (ins gprc:$A, gprc:$B),
- "ldcix $RST, $A, $B", IIC_LdStLoad, []>;
+ (ins gprc:$RA, gprc:$RB),
+ "ldcix $RST, $RA, $RB", IIC_LdStLoad, []>;
def STBCIX : XForm_base_r3xo_memOp<31, 981, (outs),
- (ins gprc:$RST, gprc:$A, gprc:$B),
- "stbcix $RST, $A, $B", IIC_LdStLoad, []>;
+ (ins gprc:$RST, gprc:$RA, gprc:$RB),
+ "stbcix $RST, $RA, $RB", IIC_LdStLoad, []>;
def STHCIX : XForm_base_r3xo_memOp<31, 949, (outs),
- (ins gprc:$RST, gprc:$A, gprc:$B),
- "sthcix $RST, $A, $B", IIC_LdStLoad, []>;
+ (ins gprc:$RST, gprc:$RA, gprc:$RB),
+ "sthcix $RST, $RA, $RB", IIC_LdStLoad, []>;
def STWCIX : XForm_base_r3xo_memOp<31, 917, (outs),
- (ins gprc:$RST, gprc:$A, gprc:$B),
- "stwcix $RST, $A, $B", IIC_LdStLoad, []>;
+ (ins gprc:$RST, gprc:$RA, gprc:$RB),
+ "stwcix $RST, $RA, $RB", IIC_LdStLoad, []>;
def STDCIX : XForm_base_r3xo_memOp<31, 1013, (outs),
- (ins gprc:$RST, gprc:$A, gprc:$B),
- "stdcix $RST, $A, $B", IIC_LdStLoad, []>;
+ (ins gprc:$RST, gprc:$RA, gprc:$RB),
+ "stdcix $RST, $RA, $RB", IIC_LdStLoad, []>;
// External PID Load Store Instructions
-def LBEPX : XForm_1<31, 95, (outs gprc:$rD), (ins memrr:$src),
- "lbepx $rD, $src", IIC_LdStLoad, []>,
+def LBEPX : XForm_1<31, 95, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lbepx $RST, $addr", IIC_LdStLoad, []>,
Requires<[IsE500]>;
-def LFDEPX : XForm_25<31, 607, (outs f8rc:$frD), (ins memrr:$src),
- "lfdepx $frD, $src", IIC_LdStLFD, []>,
+def LFDEPX : XForm_25<31, 607, (outs f8rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lfdepx $RST, $addr", IIC_LdStLFD, []>,
Requires<[IsE500]>;
-def LHEPX : XForm_1<31, 287, (outs gprc:$rD), (ins memrr:$src),
- "lhepx $rD, $src", IIC_LdStLoad, []>,
+def LHEPX : XForm_1<31, 287, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lhepx $RST, $addr", IIC_LdStLoad, []>,
Requires<[IsE500]>;
-def LWEPX : XForm_1<31, 31, (outs gprc:$rD), (ins memrr:$src),
- "lwepx $rD, $src", IIC_LdStLoad, []>,
+def LWEPX : XForm_1<31, 31, (outs gprc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lwepx $RST, $addr", IIC_LdStLoad, []>,
Requires<[IsE500]>;
-def STBEPX : XForm_8<31, 223, (outs), (ins gprc:$rS, memrr:$dst),
- "stbepx $rS, $dst", IIC_LdStStore, []>,
+def STBEPX : XForm_8<31, 223, (outs), (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "stbepx $RST, $addr", IIC_LdStStore, []>,
Requires<[IsE500]>;
-def STFDEPX : XForm_28_memOp<31, 735, (outs), (ins f8rc:$frS, memrr:$dst),
- "stfdepx $frS, $dst", IIC_LdStSTFD, []>,
+def STFDEPX : XForm_28_memOp<31, 735, (outs), (ins f8rc:$RST, (memrr $RA, $RB):$addr),
+ "stfdepx $RST, $addr", IIC_LdStSTFD, []>,
Requires<[IsE500]>;
-def STHEPX : XForm_8<31, 415, (outs), (ins gprc:$rS, memrr:$dst),
- "sthepx $rS, $dst", IIC_LdStStore, []>,
+def STHEPX : XForm_8<31, 415, (outs), (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "sthepx $RST, $addr", IIC_LdStStore, []>,
Requires<[IsE500]>;
-def STWEPX : XForm_8<31, 159, (outs), (ins gprc:$rS, memrr:$dst),
- "stwepx $rS, $dst", IIC_LdStStore, []>,
+def STWEPX : XForm_8<31, 159, (outs), (ins gprc:$RST, (memrr $RA, $RB):$addr),
+ "stwepx $RST, $addr", IIC_LdStStore, []>,
Requires<[IsE500]>;
-def DCBFEP : DCB_Form<127, 0, (outs), (ins memrr:$dst), "dcbfep $dst",
+def DCBFEP : DCB_Form<127, 0, (outs), (ins (memrr $RA, $RB):$addr), "dcbfep $addr",
IIC_LdStDCBF, []>, Requires<[IsE500]>;
-def DCBSTEP : DCB_Form<63, 0, (outs), (ins memrr:$dst), "dcbstep $dst",
+def DCBSTEP : DCB_Form<63, 0, (outs), (ins (memrr $RA, $RB):$addr), "dcbstep $addr",
IIC_LdStDCBF, []>, Requires<[IsE500]>;
-def DCBTEP : DCB_Form_hint<319, (outs), (ins memrr:$dst, u5imm:$TH),
- "dcbtep $TH, $dst", IIC_LdStDCBF, []>,
+def DCBTEP : DCB_Form_hint<319, (outs), (ins (memrr $RA, $RB):$addr, u5imm:$TH),
+ "dcbtep $TH, $addr", IIC_LdStDCBF, []>,
Requires<[IsE500]>;
-def DCBTSTEP : DCB_Form_hint<255, (outs), (ins memrr:$dst, u5imm:$TH),
- "dcbtstep $TH, $dst", IIC_LdStDCBF, []>,
+def DCBTSTEP : DCB_Form_hint<255, (outs), (ins (memrr $RA, $RB):$addr, u5imm:$TH),
+ "dcbtstep $TH, $addr", IIC_LdStDCBF, []>,
Requires<[IsE500]>;
-def DCBZEP : DCB_Form<1023, 0, (outs), (ins memrr:$dst), "dcbzep $dst",
+def DCBZEP : DCB_Form<1023, 0, (outs), (ins (memrr $RA, $RB):$addr), "dcbzep $addr",
IIC_LdStDCBF, []>, Requires<[IsE500]>;
-def DCBZLEP : DCB_Form<1023, 1, (outs), (ins memrr:$dst), "dcbzlep $dst",
+def DCBZLEP : DCB_Form<1023, 1, (outs), (ins (memrr $RA, $RB):$addr), "dcbzlep $addr",
IIC_LdStDCBF, []>, Requires<[IsE500]>;
-def ICBIEP : XForm_1a<31, 991, (outs), (ins memrr:$src), "icbiep $src",
+def ICBIEP : XForm_1a<31, 991, (outs), (ins (memrr $RA, $RB):$addr), "icbiep $addr",
IIC_LdStICBI, []>, Requires<[IsE500]>;
//===----------------------------------------------------------------------===//
@@ -4679,56 +4814,56 @@ def RLWNMbm_rec : PPCAsmPseudo<"rlwnm. $rA, $rS, $n, $b",
let PPC970_Unit = 7, isBranch = 1, hasSideEffects = 0 in {
let Defs = [CTR], Uses = [CTR, RM] in {
def gBC : BForm_3<16, 0, 0, (outs),
- (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst),
- "bc $bo, $bi, $dst">;
+ (ins u5imm:$BO, crbitrc:$BI, condbrtarget:$BD),
+ "bc $BO, $BI, $BD">;
def gBCA : BForm_3<16, 1, 0, (outs),
- (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst),
- "bca $bo, $bi, $dst">;
+ (ins u5imm:$BO, crbitrc:$BI, abscondbrtarget:$BD),
+ "bca $BO, $BI, $BD">;
let isAsmParserOnly = 1 in {
def gBCat : BForm_3_at<16, 0, 0, (outs),
- (ins u5imm:$bo, atimm:$at, crbitrc:$bi,
- condbrtarget:$dst),
- "bc$at $bo, $bi, $dst">;
+ (ins u5imm:$BO, atimm:$at, crbitrc:$BI,
+ condbrtarget:$BD),
+ "bc$at $BO, $BI, $BD">;
def gBCAat : BForm_3_at<16, 1, 0, (outs),
- (ins u5imm:$bo, atimm:$at, crbitrc:$bi,
- abscondbrtarget:$dst),
- "bca$at $bo, $bi, $dst">;
+ (ins u5imm:$BO, atimm:$at, crbitrc:$BI,
+ abscondbrtarget:$BD),
+ "bca$at $BO, $BI, $BD">;
} // isAsmParserOnly = 1
}
let Defs = [LR, CTR], Uses = [CTR, RM] in {
def gBCL : BForm_3<16, 0, 1, (outs),
- (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst),
- "bcl $bo, $bi, $dst">;
+ (ins u5imm:$BO, crbitrc:$BI, condbrtarget:$BD),
+ "bcl $BO, $BI, $BD">;
def gBCLA : BForm_3<16, 1, 1, (outs),
- (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst),
- "bcla $bo, $bi, $dst">;
+ (ins u5imm:$BO, crbitrc:$BI, abscondbrtarget:$BD),
+ "bcla $BO, $BI, $BD">;
let isAsmParserOnly = 1 in {
def gBCLat : BForm_3_at<16, 0, 1, (outs),
- (ins u5imm:$bo, atimm:$at, crbitrc:$bi,
- condbrtarget:$dst),
- "bcl$at $bo, $bi, $dst">;
+ (ins u5imm:$BO, atimm:$at, crbitrc:$BI,
+ condbrtarget:$BD),
+ "bcl$at $BO, $BI, $BD">;
def gBCLAat : BForm_3_at<16, 1, 1, (outs),
- (ins u5imm:$bo, atimm:$at, crbitrc:$bi,
- abscondbrtarget:$dst),
- "bcla$at $bo, $bi, $dst">;
+ (ins u5imm:$BO, atimm:$at, crbitrc:$BI,
+ abscondbrtarget:$BD),
+ "bcla$at $BO, $BI, $BD">;
} // // isAsmParserOnly = 1
}
let Defs = [CTR], Uses = [CTR, LR, RM] in
def gBCLR : XLForm_2<19, 16, 0, (outs),
- (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
- "bclr $bo, $bi, $bh", IIC_BrB, []>;
+ (ins u5imm:$BO, crbitrc:$BI, i32imm:$BH),
+ "bclr $BO, $BI, $BH", IIC_BrB, []>;
let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
def gBCLRL : XLForm_2<19, 16, 1, (outs),
- (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
- "bclrl $bo, $bi, $bh", IIC_BrB, []>;
+ (ins u5imm:$BO, crbitrc:$BI, i32imm:$BH),
+ "bclrl $BO, $BI, $BH", IIC_BrB, []>;
let Defs = [CTR], Uses = [CTR, LR, RM] in
def gBCCTR : XLForm_2<19, 528, 0, (outs),
- (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
- "bcctr $bo, $bi, $bh", IIC_BrB, []>;
+ (ins u5imm:$BO, crbitrc:$BI, i32imm:$BH),
+ "bcctr $BO, $BI, $BH", IIC_BrB, []>;
let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
def gBCCTRL : XLForm_2<19, 528, 1, (outs),
- (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
- "bcctrl $bo, $bi, $bh", IIC_BrB, []>;
+ (ins u5imm:$BO, crbitrc:$BI, i32imm:$BH),
+ "bcctrl $BO, $BI, $BH", IIC_BrB, []>;
}
multiclass BranchSimpleMnemonicAT<string pm, int at> {
@@ -5087,11 +5222,11 @@ def DWBytes3210 {
// and the value of the stack pointer.
let mayStore = 1 in {
def HASHST : XForm_XD6_RA5_RB5<31, 722, (outs),
- (ins gprc:$RB, memrihash:$D_RA_XD),
- "hashst $RB, $D_RA_XD", IIC_IntGeneral, []>;
+ (ins gprc:$RB, (memrihash $D, $RA):$addr),
+ "hashst $RB, $addr", IIC_IntGeneral, []>;
def HASHSTP : XForm_XD6_RA5_RB5<31, 658, (outs),
- (ins gprc:$RB, memrihash:$D_RA_XD),
- "hashstp $RB, $D_RA_XD", IIC_IntGeneral, []>;
+ (ins gprc:$RB, (memrihash $D, $RA):$addr),
+ "hashstp $RB, $addr", IIC_IntGeneral, []>;
}
// These instructions check a hash computed from the value of the link register
@@ -5100,11 +5235,11 @@ def HASHSTP : XForm_XD6_RA5_RB5<31, 658, (outs),
// specified address.
let mayLoad = 1, hasSideEffects = 1 in {
def HASHCHK : XForm_XD6_RA5_RB5<31, 754, (outs),
- (ins gprc:$RB, memrihash:$D_RA_XD),
- "hashchk $RB, $D_RA_XD", IIC_IntGeneral, []>;
+ (ins gprc:$RB, (memrihash $D, $RA):$addr),
+ "hashchk $RB, $addr", IIC_IntGeneral, []>;
def HASHCHKP : XForm_XD6_RA5_RB5<31, 690, (outs),
- (ins gprc:$RB, memrihash:$D_RA_XD),
- "hashchkp $RB, $D_RA_XD", IIC_IntGeneral, []>;
+ (ins gprc:$RB, (memrihash $D, $RA):$addr),
+ "hashchkp $RB, $addr", IIC_IntGeneral, []>;
}
// Now both high word and low word are reversed, next
diff --git a/llvm/lib/Target/PowerPC/PPCInstrMMA.td b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
index ad2a294c68d4..161d4d3c492f 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrMMA.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrMMA.td
@@ -502,10 +502,10 @@ multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
let Predicates = [MMA, IsNotISAFuture] in {
def XXMFACC :
- XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS",
+ XForm_AT3<31, 0, 177, (outs acc:$ATo), (ins acc:$AT), "xxmfacc $AT",
IIC_VecGeneral,
- [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>,
- RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">;
+ [(set v512i1:$ATo, (int_ppc_mma_xxmfacc v512i1:$AT))]>,
+ RegConstraint<"$ATo = $AT">, NoEncode<"$ATo">;
def XXMTACC :
XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT",
IIC_VecGeneral,
@@ -550,9 +550,9 @@ let Predicates = [MMA, IsISAFuture], isCodeGenOnly = 1 in {
// On top of that Future CPU has a more convenient way to move between vsrs
// and wacc registers using xxextfdmr512 and xxinstdmr512.
def XXMFACCW :
- XForm_AT3<31, 0, 177, (outs wacc:$ASo), (ins wacc:$AS), "xxmfacc $AS",
+ XForm_AT3<31, 0, 177, (outs wacc:$ATo), (ins wacc:$AT), "xxmfacc $AT",
IIC_VecGeneral, []>,
- RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">;
+ RegConstraint<"$ATo = $AT">, NoEncode<"$ATo">;
def XXMTACCW :
XForm_AT3<31, 1, 177, (outs wacc:$AT), (ins wacc:$ATi), "xxmtacc $AT",
IIC_VecGeneral, []>,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrP10.td b/llvm/lib/Target/PowerPC/PPCInstrP10.td
index cb8ab6bf5255..8cb8e4d91db2 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrP10.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrP10.td
@@ -184,8 +184,9 @@ multiclass VXForm_VTB5_RCr<bits<10> xo, bits<5> R, dag OOL, dag IOL,
class MLS_DForm_R_SI34_RTA5_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: PI<1, opcode, OOL, IOL, asmstr, itin> {
- bits<5> FRS;
- bits<39> D_RA;
+ bits<5> RST;
+ bits<5> RA;
+ bits<34> D;
let Pattern = pattern;
@@ -194,12 +195,12 @@ class MLS_DForm_R_SI34_RTA5_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
let Inst{8-10} = 0;
let Inst{11} = PCRel;
let Inst{12-13} = 0;
- let Inst{14-31} = D_RA{33-16}; // d0
+ let Inst{14-31} = D{33-16}; // d0
// The instruction.
- let Inst{38-42} = FRS{4-0};
- let Inst{43-47} = D_RA{38-34}; // RA
- let Inst{48-63} = D_RA{15-0}; // d1
+ let Inst{38-42} = RST{4-0};
+ let Inst{43-47} = RA;
+ let Inst{48-63} = D{15-0}; // d1
}
class MLS_DForm_R_SI34_RTA5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
@@ -257,8 +258,9 @@ multiclass MLS_DForm_R_SI34_RTA5_p<bits<6> opcode, dag OOL, dag IOL,
class 8LS_DForm_R_SI34_RTA5_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: PI<1, opcode, OOL, IOL, asmstr, itin> {
- bits<5> RT;
- bits<39> D_RA;
+ bits<5> RST;
+ bits<5> RA;
+ bits<34> D;
let Pattern = pattern;
@@ -266,12 +268,12 @@ class 8LS_DForm_R_SI34_RTA5_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
let Inst{6-10} = 0;
let Inst{11} = PCRel;
let Inst{12-13} = 0;
- let Inst{14-31} = D_RA{33-16}; // d0
+ let Inst{14-31} = D{33-16}; // d0
// The instruction.
- let Inst{38-42} = RT{4-0};
- let Inst{43-47} = D_RA{38-34}; // RA
- let Inst{48-63} = D_RA{15-0}; // d1
+ let Inst{38-42} = RST{4-0};
+ let Inst{43-47} = RA;
+ let Inst{48-63} = D{15-0}; // d1
}
// 8LS:D-Form: [ 1 0 0 // R // d0
@@ -280,8 +282,9 @@ class 8LS_DForm_R_SI34_XT6_RA5_MEM<bits<5> opcode, dag OOL, dag IOL,
string asmstr, InstrItinClass itin,
list<dag> pattern>
: PI<1, { opcode, ? }, OOL, IOL, asmstr, itin> {
- bits<6> XT;
- bits<39> D_RA;
+ bits<6> XST;
+ bits<5> RA;
+ bits<34> D;
let Pattern = pattern;
@@ -291,13 +294,13 @@ class 8LS_DForm_R_SI34_XT6_RA5_MEM<bits<5> opcode, dag OOL, dag IOL,
let Inst{9-10} = 0; // reserved
let Inst{11} = PCRel;
let Inst{12-13} = 0; // reserved
- let Inst{14-31} = D_RA{33-16}; // d0
+ let Inst{14-31} = D{33-16}; // d0
// The instruction.
- let Inst{37} = XT{5};
- let Inst{38-42} = XT{4-0};
- let Inst{43-47} = D_RA{38-34}; // RA
- let Inst{48-63} = D_RA{15-0}; // d1
+ let Inst{37} = XST{5};
+ let Inst{38-42} = XST{4-0};
+ let Inst{43-47} = RA;
+ let Inst{48-63} = D{15-0}; // d1
}
// X-Form: [PO T IMM VRB XO TX]
@@ -368,16 +371,16 @@ class VXForm_RD5_N3_VB5<bits<11> xo, dag OOL, dag IOL, string asmstr,
// VX-Form: [PO VRT RA VRB XO].
// Destructive (insert) forms are suffixed with _ins.
class VXForm_VTB5_RA5_ins<bits<11> xo, string opc, list<dag> pattern>
- : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, gprc:$rA, vrrc:$vB),
- !strconcat(opc, " $vD, $rA, $vB"), IIC_VecGeneral, pattern>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+ : VXForm_1<xo, (outs vrrc:$VD), (ins vrrc:$VDi, gprc:$VA, vrrc:$VB),
+ !strconcat(opc, " $VD, $VA, $VB"), IIC_VecGeneral, pattern>,
+ RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">;
// VX-Form: [PO VRT RA RB XO].
// Destructive (insert) forms are suffixed with _ins.
class VXForm_VRT5_RAB5_ins<bits<11> xo, string opc, list<dag> pattern>
- : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, gprc:$rA, gprc:$rB),
- !strconcat(opc, " $vD, $rA, $rB"), IIC_VecGeneral, pattern>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+ : VXForm_1<xo, (outs vrrc:$VD), (ins vrrc:$VDi, gprc:$VA, gprc:$VB),
+ !strconcat(opc, " $VD, $VA, $VB"), IIC_VecGeneral, pattern>,
+ RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">;
// VX-Form: [ PO BF // VRA VRB XO ]
class VXForm_BF3_VAB5<bits<11> xo, dag OOL, dag IOL, string asmstr,
@@ -566,7 +569,9 @@ class XX2_BF3_XO5_XB6_XO9<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL,
class XForm_XT5_BI5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmstr, InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
- let B = 0;
+ bits<5> BI;
+ let RA = BI;
+ let RB = 0;
}
multiclass MLS_DForm_R_SI34_RTA5_MEM_p<bits<6> opcode, dag OOL, dag IOL,
@@ -631,124 +636,124 @@ let Predicates = [PrefixInstrs] in {
let mayLoad = 1, mayStore = 0 in {
defm PLXV :
- 8LS_DForm_R_SI34_XT6_RA5_MEM_p<25, (outs vsrc:$XT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA),
- "plxv $XT, $D_RA", IIC_LdStLFD>;
+ 8LS_DForm_R_SI34_XT6_RA5_MEM_p<25, (outs vsrc:$XST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr),
+ "plxv $XST, $addr", IIC_LdStLFD>;
defm PLFS :
- MLS_DForm_R_SI34_RTA5_MEM_p<48, (outs f4rc:$FRT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plfs $FRT, $D_RA",
+ MLS_DForm_R_SI34_RTA5_MEM_p<48, (outs f4rc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr), "plfs $RST, $addr",
IIC_LdStLFD>;
defm PLFD :
- MLS_DForm_R_SI34_RTA5_MEM_p<50, (outs f8rc:$FRT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plfd $FRT, $D_RA",
+ MLS_DForm_R_SI34_RTA5_MEM_p<50, (outs f8rc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr), "plfd $RST, $addr",
IIC_LdStLFD>;
defm PLXSSP :
- 8LS_DForm_R_SI34_RTA5_MEM_p<43, (outs vfrc:$VRT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA),
- "plxssp $VRT, $D_RA", IIC_LdStLFD>;
+ 8LS_DForm_R_SI34_RTA5_MEM_p<43, (outs vfrc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr),
+ "plxssp $RST, $addr", IIC_LdStLFD>;
defm PLXSD :
- 8LS_DForm_R_SI34_RTA5_MEM_p<42, (outs vfrc:$VRT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA),
- "plxsd $VRT, $D_RA", IIC_LdStLFD>;
+ 8LS_DForm_R_SI34_RTA5_MEM_p<42, (outs vfrc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr),
+ "plxsd $RST, $addr", IIC_LdStLFD>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
defm PLBZ8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs g8rc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA",
+ MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs g8rc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr), "plbz $RST, $addr",
IIC_LdStLFD>;
defm PLHZ8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs g8rc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA",
+ MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs g8rc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr), "plhz $RST, $addr",
IIC_LdStLFD>;
defm PLHA8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs g8rc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA",
+ MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs g8rc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr), "plha $RST, $addr",
IIC_LdStLFD>;
defm PLWA8 :
- 8LS_DForm_R_SI34_RTA5_MEM_p<41, (outs g8rc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA),
- "plwa $RT, $D_RA", IIC_LdStLFD>;
+ 8LS_DForm_R_SI34_RTA5_MEM_p<41, (outs g8rc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr),
+ "plwa $RST, $addr", IIC_LdStLFD>;
defm PLWZ8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs g8rc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA",
+ MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs g8rc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr), "plwz $RST, $addr",
IIC_LdStLFD>;
}
defm PLBZ :
- MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs gprc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plbz $RT, $D_RA",
+ MLS_DForm_R_SI34_RTA5_MEM_p<34, (outs gprc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr), "plbz $RST, $addr",
IIC_LdStLFD>;
defm PLHZ :
- MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs gprc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plhz $RT, $D_RA",
+ MLS_DForm_R_SI34_RTA5_MEM_p<40, (outs gprc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr), "plhz $RST, $addr",
IIC_LdStLFD>;
defm PLHA :
- MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs gprc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plha $RT, $D_RA",
+ MLS_DForm_R_SI34_RTA5_MEM_p<42, (outs gprc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr), "plha $RST, $addr",
IIC_LdStLFD>;
defm PLWZ :
- MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs gprc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plwz $RT, $D_RA",
+ MLS_DForm_R_SI34_RTA5_MEM_p<32, (outs gprc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr), "plwz $RST, $addr",
IIC_LdStLFD>;
defm PLWA :
- 8LS_DForm_R_SI34_RTA5_MEM_p<41, (outs gprc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plwa $RT, $D_RA",
+ 8LS_DForm_R_SI34_RTA5_MEM_p<41, (outs gprc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr), "plwa $RST, $addr",
IIC_LdStLFD>;
defm PLD :
- 8LS_DForm_R_SI34_RTA5_MEM_p<57, (outs g8rc:$RT), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "pld $RT, $D_RA",
+ 8LS_DForm_R_SI34_RTA5_MEM_p<57, (outs g8rc:$RST), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr), "pld $RST, $addr",
IIC_LdStLFD>;
}
let mayStore = 1, mayLoad = 0 in {
defm PSTXV :
- 8LS_DForm_R_SI34_XT6_RA5_MEM_p<27, (outs), (ins vsrc:$XS, memri34:$D_RA),
- (ins vsrc:$XS, memri34_pcrel:$D_RA),
- "pstxv $XS, $D_RA", IIC_LdStLFD>;
+ 8LS_DForm_R_SI34_XT6_RA5_MEM_p<27, (outs), (ins vsrc:$XST, (memri34 $D, $RA):$addr),
+ (ins vsrc:$XST, (memri34_pcrel $D, $RA):$addr),
+ "pstxv $XST, $addr", IIC_LdStLFD>;
defm PSTFS :
- MLS_DForm_R_SI34_RTA5_MEM_p<52, (outs), (ins f4rc:$FRS, memri34:$D_RA),
- (ins f4rc:$FRS, memri34_pcrel:$D_RA),
- "pstfs $FRS, $D_RA", IIC_LdStLFD>;
+ MLS_DForm_R_SI34_RTA5_MEM_p<52, (outs), (ins f4rc:$RST, (memri34 $D, $RA):$addr),
+ (ins f4rc:$RST, (memri34_pcrel $D, $RA):$addr),
+ "pstfs $RST, $addr", IIC_LdStLFD>;
defm PSTFD :
- MLS_DForm_R_SI34_RTA5_MEM_p<54, (outs), (ins f8rc:$FRS, memri34:$D_RA),
- (ins f8rc:$FRS, memri34_pcrel:$D_RA),
- "pstfd $FRS, $D_RA", IIC_LdStLFD>;
+ MLS_DForm_R_SI34_RTA5_MEM_p<54, (outs), (ins f8rc:$RST, (memri34 $D, $RA):$addr),
+ (ins f8rc:$RST, (memri34_pcrel $D, $RA):$addr),
+ "pstfd $RST, $addr", IIC_LdStLFD>;
defm PSTXSSP :
- 8LS_DForm_R_SI34_RTA5_MEM_p<47, (outs), (ins vfrc:$VRS, memri34:$D_RA),
- (ins vfrc:$VRS, memri34_pcrel:$D_RA),
- "pstxssp $VRS, $D_RA", IIC_LdStLFD>;
+ 8LS_DForm_R_SI34_RTA5_MEM_p<47, (outs), (ins vfrc:$RST, (memri34 $D, $RA):$addr),
+ (ins vfrc:$RST, (memri34_pcrel $D, $RA):$addr),
+ "pstxssp $RST, $addr", IIC_LdStLFD>;
defm PSTXSD :
- 8LS_DForm_R_SI34_RTA5_MEM_p<46, (outs), (ins vfrc:$VRS, memri34:$D_RA),
- (ins vfrc:$VRS, memri34_pcrel:$D_RA),
- "pstxsd $VRS, $D_RA", IIC_LdStLFD>;
+ 8LS_DForm_R_SI34_RTA5_MEM_p<46, (outs), (ins vfrc:$RST, (memri34 $D, $RA):$addr),
+ (ins vfrc:$RST, (memri34_pcrel $D, $RA):$addr),
+ "pstxsd $RST, $addr", IIC_LdStLFD>;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
defm PSTB8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins g8rc:$RS, memri34:$D_RA),
- (ins g8rc:$RS, memri34_pcrel:$D_RA),
- "pstb $RS, $D_RA", IIC_LdStLFD>;
+ MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins g8rc:$RST, (memri34 $D, $RA):$addr),
+ (ins g8rc:$RST, (memri34_pcrel $D, $RA):$addr),
+ "pstb $RST, $addr", IIC_LdStLFD>;
defm PSTH8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins g8rc:$RS, memri34:$D_RA),
- (ins g8rc:$RS, memri34_pcrel:$D_RA),
- "psth $RS, $D_RA", IIC_LdStLFD>;
+ MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins g8rc:$RST, (memri34 $D, $RA):$addr),
+ (ins g8rc:$RST, (memri34_pcrel $D, $RA):$addr),
+ "psth $RST, $addr", IIC_LdStLFD>;
defm PSTW8 :
- MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins g8rc:$RS, memri34:$D_RA),
- (ins g8rc:$RS, memri34_pcrel:$D_RA),
- "pstw $RS, $D_RA", IIC_LdStLFD>;
+ MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins g8rc:$RST, (memri34 $D, $RA):$addr),
+ (ins g8rc:$RST, (memri34_pcrel $D, $RA):$addr),
+ "pstw $RST, $addr", IIC_LdStLFD>;
}
defm PSTB :
- MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins gprc:$RS, memri34:$D_RA),
- (ins gprc:$RS, memri34_pcrel:$D_RA),
- "pstb $RS, $D_RA", IIC_LdStLFD>;
+ MLS_DForm_R_SI34_RTA5_MEM_p<38, (outs), (ins gprc:$RST, (memri34 $D, $RA):$addr),
+ (ins gprc:$RST, (memri34_pcrel $D, $RA):$addr),
+ "pstb $RST, $addr", IIC_LdStLFD>;
defm PSTH :
- MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins gprc:$RS, memri34:$D_RA),
- (ins gprc:$RS, memri34_pcrel:$D_RA),
- "psth $RS, $D_RA", IIC_LdStLFD>;
+ MLS_DForm_R_SI34_RTA5_MEM_p<44, (outs), (ins gprc:$RST, (memri34 $D, $RA):$addr),
+ (ins gprc:$RST, (memri34_pcrel $D, $RA):$addr),
+ "psth $RST, $addr", IIC_LdStLFD>;
defm PSTW :
- MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins gprc:$RS, memri34:$D_RA),
- (ins gprc:$RS, memri34_pcrel:$D_RA),
- "pstw $RS, $D_RA", IIC_LdStLFD>;
+ MLS_DForm_R_SI34_RTA5_MEM_p<36, (outs), (ins gprc:$RST, (memri34 $D, $RA):$addr),
+ (ins gprc:$RST, (memri34_pcrel $D, $RA):$addr),
+ "pstw $RST, $addr", IIC_LdStLFD>;
defm PSTD :
- 8LS_DForm_R_SI34_RTA5_MEM_p<61, (outs), (ins g8rc:$RS, memri34:$D_RA),
- (ins g8rc:$RS, memri34_pcrel:$D_RA),
- "pstd $RS, $D_RA", IIC_LdStLFD>;
+ 8LS_DForm_R_SI34_RTA5_MEM_p<61, (outs), (ins g8rc:$RST, (memri34 $D, $RA):$addr),
+ (ins g8rc:$RST, (memri34_pcrel $D, $RA):$addr),
+ "pstd $RST, $addr", IIC_LdStLFD>;
}
}
@@ -756,13 +761,15 @@ class DQForm_XTp5_RA17_MEM<bits<6> opcode, bits<4> xo, dag OOL, dag IOL,
string asmstr, InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
bits<5> XTp;
- bits<17> DQ_RA;
+ bits<5> RA;
+ bits<12> DQ;
+
let Pattern = pattern;
let Inst{6-9} = XTp{3-0};
let Inst{10} = XTp{4};
- let Inst{11-15} = DQ_RA{16-12}; // Register #
- let Inst{16-27} = DQ_RA{11-0}; // Displacement.
+ let Inst{11-15} = RA;
+ let Inst{16-27} = DQ;
let Inst{28-31} = xo;
}
@@ -770,14 +777,14 @@ class XForm_XTp5_XAB5<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmstr, InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin>, XFormMemOp {
bits<5> XTp;
- bits<5> A;
- bits<5> B;
+ bits<5> RA;
+ bits<5> RB;
let Pattern = pattern;
let Inst{6-9} = XTp{3-0};
let Inst{10} = XTp{4};
- let Inst{11-15} = A;
- let Inst{16-20} = B;
+ let Inst{11-15} = RA;
+ let Inst{16-20} = RB;
let Inst{21-30} = xo;
let Inst{31} = 0;
}
@@ -786,7 +793,8 @@ class 8LS_DForm_R_XTp5_SI34_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: PI<1, opcode, OOL, IOL, asmstr, itin> {
bits<5> XTp;
- bits<39> D_RA;
+ bits<5> RA;
+ bits<34> D;
let Pattern = pattern;
@@ -794,13 +802,13 @@ class 8LS_DForm_R_XTp5_SI34_MEM<bits<6> opcode, dag OOL, dag IOL, string asmstr,
let Inst{6-10} = 0;
let Inst{11} = PCRel;
let Inst{12-13} = 0;
- let Inst{14-31} = D_RA{33-16}; // Imm18
+ let Inst{14-31} = D{33-16}; // Imm18
// The instruction.
let Inst{38-41} = XTp{3-0};
let Inst{42} = XTp{4};
- let Inst{43-47} = D_RA{38-34}; // Register #
- let Inst{48-63} = D_RA{15-0}; // D
+ let Inst{43-47} = RA;
+ let Inst{48-63} = D{15-0};
}
multiclass 8LS_DForm_R_XTp5_SI34_MEM_p<bits<6> opcode, dag OOL,
@@ -1052,34 +1060,34 @@ let Predicates = [PairedVectorMemops] in {
let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops] in {
def LXVP : DQForm_XTp5_RA17_MEM<6, 0, (outs vsrprc:$XTp),
- (ins memrix16:$DQ_RA), "lxvp $XTp, $DQ_RA",
+ (ins (memrix16 $DQ, $RA):$addr), "lxvp $XTp, $addr",
IIC_LdStLFD, []>;
- def LXVPX : XForm_XTp5_XAB5<31, 333, (outs vsrprc:$XTp), (ins memrr:$src),
- "lxvpx $XTp, $src", IIC_LdStLFD,
+ def LXVPX : XForm_XTp5_XAB5<31, 333, (outs vsrprc:$XTp), (ins (memrr $RA, $RB):$addr),
+ "lxvpx $XTp, $addr", IIC_LdStLFD,
[]>;
}
let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops] in {
def STXVP : DQForm_XTp5_RA17_MEM<6, 1, (outs), (ins vsrprc:$XTp,
- memrix16:$DQ_RA), "stxvp $XTp, $DQ_RA",
+ (memrix16 $DQ, $RA):$addr), "stxvp $XTp, $addr",
IIC_LdStLFD, []>;
- def STXVPX : XForm_XTp5_XAB5<31, 461, (outs), (ins vsrprc:$XTp, memrr:$dst),
- "stxvpx $XTp, $dst", IIC_LdStLFD,
+ def STXVPX : XForm_XTp5_XAB5<31, 461, (outs), (ins vsrprc:$XTp, (memrr $RA, $RB):$addr),
+ "stxvpx $XTp, $addr", IIC_LdStLFD,
[]>;
}
let mayLoad = 1, mayStore = 0, Predicates = [PairedVectorMemops, PrefixInstrs] in {
defm PLXVP :
- 8LS_DForm_R_XTp5_SI34_MEM_p<58, (outs vsrprc:$XTp), (ins memri34:$D_RA),
- (ins memri34_pcrel:$D_RA), "plxvp $XTp, $D_RA",
+ 8LS_DForm_R_XTp5_SI34_MEM_p<58, (outs vsrprc:$XTp), (ins (memri34 $D, $RA):$addr),
+ (ins (memri34_pcrel $D, $RA):$addr), "plxvp $XTp, $addr",
IIC_LdStLFD>;
}
let mayLoad = 0, mayStore = 1, Predicates = [PairedVectorMemops, PrefixInstrs] in {
defm PSTXVP :
- 8LS_DForm_R_XTp5_SI34_MEM_p<62, (outs), (ins vsrprc:$XTp, memri34:$D_RA),
- (ins vsrprc:$XTp, memri34_pcrel:$D_RA),
- "pstxvp $XTp, $D_RA", IIC_LdStLFD>;
+ 8LS_DForm_R_XTp5_SI34_MEM_p<62, (outs), (ins vsrprc:$XTp, (memri34 $D, $RA):$addr),
+ (ins vsrprc:$XTp, (memri34_pcrel $D, $RA):$addr),
+ "pstxvp $XTp, $addr", IIC_LdStLFD>;
}
let Predicates = [PairedVectorMemops] in {
@@ -1244,23 +1252,10 @@ let Predicates = [PCRelativeMemops] in {
(PSTDpc $RS, $ga, 0)>;
// Special Cases For PPCstore_scal_int_from_vsr
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)),
- (PPCmatpcreladdr PCRelForm:$dst), 8),
- (PSTXSDpc (XSCVDPSXDS f64:$src), $dst, 0)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)),
- (PPCmatpcreladdr PCRelForm:$dst), 8),
- (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), $dst, 0)>;
-
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)),
- (PPCmatpcreladdr PCRelForm:$dst), 8),
- (PSTXSDpc (XSCVDPUXDS f64:$src), $dst, 0)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)),
- (PPCmatpcreladdr PCRelForm:$dst), 8),
- (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), $dst, 0)>;
+ def : Pat<(PPCstore_scal_int_from_vsr f64:$src, (PPCmatpcreladdr PCRelForm:$dst), 8),
+ (PSTXSDpc $src, $dst, 0)>;
+ def : Pat<(PPCstore_scal_int_from_vsr f128:$src, (PPCmatpcreladdr PCRelForm:$dst), 8),
+ (PSTXSDpc (COPY_TO_REGCLASS $src, VFRC), $dst, 0)>;
def : Pat<(v4f32 (PPCldvsxlh (PPCmatpcreladdr PCRelForm:$addr))),
(SUBREG_TO_REG (i64 1), (PLFDpc $addr, 0), sub_64)>;
@@ -1280,8 +1275,8 @@ let Predicates = [PCRelativeMemops] in {
let Predicates = [PrefixInstrs] in {
def XXPERMX :
8RR_XX4Form_IMM3_XTABC6<34, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
- vsrc:$XC, u3imm:$UIM),
- "xxpermx $XT, $XA, $XB, $XC, $UIM",
+ vsrc:$XC, u3imm:$IMM),
+ "xxpermx $XT, $XA, $XB, $XC, $IMM",
IIC_VecPerm, []>;
def XXBLENDVB :
8RR_XX4Form_XTABC6<33, 0, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
@@ -1324,349 +1319,349 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1, Predicates = [P
}
let Predicates = [IsISA3_1] in {
- def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RT), (ins crbitrc:$BI),
- "setbc $RT, $BI", IIC_IntCompare, []>,
+ def SETBC : XForm_XT5_BI5<31, 384, (outs gprc:$RST), (ins crbitrc:$BI),
+ "setbc $RST, $BI", IIC_IntCompare, []>,
SExt32To64, ZExt32To64;
- def SETBCR : XForm_XT5_BI5<31, 416, (outs gprc:$RT), (ins crbitrc:$BI),
- "setbcr $RT, $BI", IIC_IntCompare, []>,
+ def SETBCR : XForm_XT5_BI5<31, 416, (outs gprc:$RST), (ins crbitrc:$BI),
+ "setbcr $RST, $BI", IIC_IntCompare, []>,
SExt32To64, ZExt32To64;
- def SETNBC : XForm_XT5_BI5<31, 448, (outs gprc:$RT), (ins crbitrc:$BI),
- "setnbc $RT, $BI", IIC_IntCompare, []>,
+ def SETNBC : XForm_XT5_BI5<31, 448, (outs gprc:$RST), (ins crbitrc:$BI),
+ "setnbc $RST, $BI", IIC_IntCompare, []>,
SExt32To64;
- def SETNBCR : XForm_XT5_BI5<31, 480, (outs gprc:$RT), (ins crbitrc:$BI),
- "setnbcr $RT, $BI", IIC_IntCompare, []>,
+ def SETNBCR : XForm_XT5_BI5<31, 480, (outs gprc:$RST), (ins crbitrc:$BI),
+ "setnbcr $RST, $BI", IIC_IntCompare, []>,
SExt32To64;
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
- def SETBC8 : XForm_XT5_BI5<31, 384, (outs g8rc:$RT), (ins crbitrc:$BI),
- "setbc $RT, $BI", IIC_IntCompare, []>,
+ def SETBC8 : XForm_XT5_BI5<31, 384, (outs g8rc:$RST), (ins crbitrc:$BI),
+ "setbc $RST, $BI", IIC_IntCompare, []>,
SExt32To64, ZExt32To64;
- def SETBCR8 : XForm_XT5_BI5<31, 416, (outs g8rc:$RT), (ins crbitrc:$BI),
- "setbcr $RT, $BI", IIC_IntCompare, []>,
+ def SETBCR8 : XForm_XT5_BI5<31, 416, (outs g8rc:$RST), (ins crbitrc:$BI),
+ "setbcr $RST, $BI", IIC_IntCompare, []>,
SExt32To64, ZExt32To64;
- def SETNBC8 : XForm_XT5_BI5<31, 448, (outs g8rc:$RT), (ins crbitrc:$BI),
- "setnbc $RT, $BI", IIC_IntCompare, []>,
+ def SETNBC8 : XForm_XT5_BI5<31, 448, (outs g8rc:$RST), (ins crbitrc:$BI),
+ "setnbc $RST, $BI", IIC_IntCompare, []>,
SExt32To64;
- def SETNBCR8 : XForm_XT5_BI5<31, 480, (outs g8rc:$RT), (ins crbitrc:$BI),
- "setnbcr $RT, $BI", IIC_IntCompare, []>,
+ def SETNBCR8 : XForm_XT5_BI5<31, 480, (outs g8rc:$RST), (ins crbitrc:$BI),
+ "setnbcr $RST, $BI", IIC_IntCompare, []>,
SExt32To64;
}
def VSLDBI : VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT),
- (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH),
- "vsldbi $VRT, $VRA, $VRB, $SH",
+ (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SD),
+ "vsldbi $VRT, $VRA, $VRB, $SD",
IIC_VecGeneral,
[(set v16i8:$VRT,
(int_ppc_altivec_vsldbi v16i8:$VRA,
v16i8:$VRB,
- timm:$SH))]>;
+ timm:$SD))]>;
def VSRDBI : VNForm_VTAB5_SD3<22, 1, (outs vrrc:$VRT),
- (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH),
- "vsrdbi $VRT, $VRA, $VRB, $SH",
+ (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SD),
+ "vsrdbi $VRT, $VRA, $VRB, $SD",
IIC_VecGeneral,
[(set v16i8:$VRT,
(int_ppc_altivec_vsrdbi v16i8:$VRA,
v16i8:$VRB,
- timm:$SH))]>;
- defm VSTRIBR : VXForm_VTB5_RCr<13, 1, (outs vrrc:$vT), (ins vrrc:$vB),
- "vstribr", "$vT, $vB", IIC_VecGeneral,
- [(set v16i8:$vT,
- (int_ppc_altivec_vstribr v16i8:$vB))]>;
- defm VSTRIBL : VXForm_VTB5_RCr<13, 0, (outs vrrc:$vT), (ins vrrc:$vB),
- "vstribl", "$vT, $vB", IIC_VecGeneral,
- [(set v16i8:$vT,
- (int_ppc_altivec_vstribl v16i8:$vB))]>;
- defm VSTRIHR : VXForm_VTB5_RCr<13, 3, (outs vrrc:$vT), (ins vrrc:$vB),
- "vstrihr", "$vT, $vB", IIC_VecGeneral,
- [(set v8i16:$vT,
- (int_ppc_altivec_vstrihr v8i16:$vB))]>;
- defm VSTRIHL : VXForm_VTB5_RCr<13, 2, (outs vrrc:$vT), (ins vrrc:$vB),
- "vstrihl", "$vT, $vB", IIC_VecGeneral,
- [(set v8i16:$vT,
- (int_ppc_altivec_vstrihl v8i16:$vB))]>;
+ timm:$SD))]>;
+ defm VSTRIBR : VXForm_VTB5_RCr<13, 1, (outs vrrc:$VT), (ins vrrc:$VB),
+ "vstribr", "$VT, $VB", IIC_VecGeneral,
+ [(set v16i8:$VT,
+ (int_ppc_altivec_vstribr v16i8:$VB))]>;
+ defm VSTRIBL : VXForm_VTB5_RCr<13, 0, (outs vrrc:$VT), (ins vrrc:$VB),
+ "vstribl", "$VT, $VB", IIC_VecGeneral,
+ [(set v16i8:$VT,
+ (int_ppc_altivec_vstribl v16i8:$VB))]>;
+ defm VSTRIHR : VXForm_VTB5_RCr<13, 3, (outs vrrc:$VT), (ins vrrc:$VB),
+ "vstrihr", "$VT, $VB", IIC_VecGeneral,
+ [(set v8i16:$VT,
+ (int_ppc_altivec_vstrihr v8i16:$VB))]>;
+ defm VSTRIHL : VXForm_VTB5_RCr<13, 2, (outs vrrc:$VT), (ins vrrc:$VB),
+ "vstrihl", "$VT, $VB", IIC_VecGeneral,
+ [(set v8i16:$VT,
+ (int_ppc_altivec_vstrihl v8i16:$VB))]>;
def VINSW :
- VXForm_1<207, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, gprc:$rB),
- "vinsw $vD, $rB, $UIM", IIC_VecGeneral,
- [(set v4i32:$vD,
- (int_ppc_altivec_vinsw v4i32:$vDi, i32:$rB, timm:$UIM))]>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+ VXForm_1<207, (outs vrrc:$VD), (ins vrrc:$VDi, u4imm:$VA, gprc:$VB),
+ "vinsw $VD, $VB, $VA", IIC_VecGeneral,
+ [(set v4i32:$VD,
+ (int_ppc_altivec_vinsw v4i32:$VDi, i32:$VB, timm:$VA))]>,
+ RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">;
def VINSD :
- VXForm_1<463, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB),
- "vinsd $vD, $rB, $UIM", IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB, timm:$UIM))]>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+ VXForm_1<463, (outs vrrc:$VD), (ins vrrc:$VDi, u4imm:$VA, g8rc:$VB),
+ "vinsd $VD, $VB, $VA", IIC_VecGeneral,
+ [(set v2i64:$VD,
+ (int_ppc_altivec_vinsd v2i64:$VDi, i64:$VB, timm:$VA))]>,
+ RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">;
def VINSBVLX :
VXForm_VTB5_RA5_ins<15, "vinsbvlx",
- [(set v16i8:$vD,
- (int_ppc_altivec_vinsbvlx v16i8:$vDi, i32:$rA,
- v16i8:$vB))]>;
+ [(set v16i8:$VD,
+ (int_ppc_altivec_vinsbvlx v16i8:$VDi, i32:$VA,
+ v16i8:$VB))]>;
def VINSBVRX :
VXForm_VTB5_RA5_ins<271, "vinsbvrx",
- [(set v16i8:$vD,
- (int_ppc_altivec_vinsbvrx v16i8:$vDi, i32:$rA,
- v16i8:$vB))]>;
+ [(set v16i8:$VD,
+ (int_ppc_altivec_vinsbvrx v16i8:$VDi, i32:$VA,
+ v16i8:$VB))]>;
def VINSHVLX :
VXForm_VTB5_RA5_ins<79, "vinshvlx",
- [(set v8i16:$vD,
- (int_ppc_altivec_vinshvlx v8i16:$vDi, i32:$rA,
- v8i16:$vB))]>;
+ [(set v8i16:$VD,
+ (int_ppc_altivec_vinshvlx v8i16:$VDi, i32:$VA,
+ v8i16:$VB))]>;
def VINSHVRX :
VXForm_VTB5_RA5_ins<335, "vinshvrx",
- [(set v8i16:$vD,
- (int_ppc_altivec_vinshvrx v8i16:$vDi, i32:$rA,
- v8i16:$vB))]>;
+ [(set v8i16:$VD,
+ (int_ppc_altivec_vinshvrx v8i16:$VDi, i32:$VA,
+ v8i16:$VB))]>;
def VINSWVLX :
VXForm_VTB5_RA5_ins<143, "vinswvlx",
- [(set v4i32:$vD,
- (int_ppc_altivec_vinswvlx v4i32:$vDi, i32:$rA,
- v4i32:$vB))]>;
+ [(set v4i32:$VD,
+ (int_ppc_altivec_vinswvlx v4i32:$VDi, i32:$VA,
+ v4i32:$VB))]>;
def VINSWVRX :
VXForm_VTB5_RA5_ins<399, "vinswvrx",
- [(set v4i32:$vD,
- (int_ppc_altivec_vinswvrx v4i32:$vDi, i32:$rA,
- v4i32:$vB))]>;
+ [(set v4i32:$VD,
+ (int_ppc_altivec_vinswvrx v4i32:$VDi, i32:$VA,
+ v4i32:$VB))]>;
def VINSBLX :
VXForm_VRT5_RAB5_ins<527, "vinsblx",
- [(set v16i8:$vD,
- (int_ppc_altivec_vinsblx v16i8:$vDi, i32:$rA,
- i32:$rB))]>;
+ [(set v16i8:$VD,
+ (int_ppc_altivec_vinsblx v16i8:$VDi, i32:$VA,
+ i32:$VB))]>;
def VINSBRX :
VXForm_VRT5_RAB5_ins<783, "vinsbrx",
- [(set v16i8:$vD,
- (int_ppc_altivec_vinsbrx v16i8:$vDi, i32:$rA,
- i32:$rB))]>;
+ [(set v16i8:$VD,
+ (int_ppc_altivec_vinsbrx v16i8:$VDi, i32:$VA,
+ i32:$VB))]>;
def VINSHLX :
VXForm_VRT5_RAB5_ins<591, "vinshlx",
- [(set v8i16:$vD,
- (int_ppc_altivec_vinshlx v8i16:$vDi, i32:$rA,
- i32:$rB))]>;
+ [(set v8i16:$VD,
+ (int_ppc_altivec_vinshlx v8i16:$VDi, i32:$VA,
+ i32:$VB))]>;
def VINSHRX :
VXForm_VRT5_RAB5_ins<847, "vinshrx",
- [(set v8i16:$vD,
- (int_ppc_altivec_vinshrx v8i16:$vDi, i32:$rA,
- i32:$rB))]>;
+ [(set v8i16:$VD,
+ (int_ppc_altivec_vinshrx v8i16:$VDi, i32:$VA,
+ i32:$VB))]>;
def VINSWLX :
VXForm_VRT5_RAB5_ins<655, "vinswlx",
- [(set v4i32:$vD,
- (int_ppc_altivec_vinswlx v4i32:$vDi, i32:$rA,
- i32:$rB))]>;
+ [(set v4i32:$VD,
+ (int_ppc_altivec_vinswlx v4i32:$VDi, i32:$VA,
+ i32:$VB))]>;
def VINSWRX :
VXForm_VRT5_RAB5_ins<911, "vinswrx",
- [(set v4i32:$vD,
- (int_ppc_altivec_vinswrx v4i32:$vDi, i32:$rA,
- i32:$rB))]>;
+ [(set v4i32:$VD,
+ (int_ppc_altivec_vinswrx v4i32:$VDi, i32:$VA,
+ i32:$VB))]>;
def VINSDLX :
- VXForm_1<719, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB),
- "vinsdlx $vD, $rA, $rB", IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA, i64:$rB))]>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+ VXForm_1<719, (outs vrrc:$VD), (ins vrrc:$VDi, g8rc:$VA, g8rc:$VB),
+ "vinsdlx $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD,
+ (int_ppc_altivec_vinsdlx v2i64:$VDi, i64:$VA, i64:$VB))]>,
+ RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">;
def VINSDRX :
- VXForm_1<975, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB),
- "vinsdrx $vD, $rA, $rB", IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA, i64:$rB))]>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
- def VEXTRACTBM : VXForm_RD5_XO5_RS5<1602, 8, (outs gprc:$rD), (ins vrrc:$vB),
- "vextractbm $rD, $vB", IIC_VecGeneral,
- [(set i32:$rD,
- (int_ppc_altivec_vextractbm v16i8:$vB))]>,
+ VXForm_1<975, (outs vrrc:$VD), (ins vrrc:$VDi, g8rc:$VA, g8rc:$VB),
+ "vinsdrx $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD,
+ (int_ppc_altivec_vinsdrx v2i64:$VDi, i64:$VA, i64:$VB))]>,
+ RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">;
+ def VEXTRACTBM : VXForm_RD5_XO5_RS5<1602, 8, (outs gprc:$VD), (ins vrrc:$VB),
+ "vextractbm $VD, $VB", IIC_VecGeneral,
+ [(set i32:$VD,
+ (int_ppc_altivec_vextractbm v16i8:$VB))]>,
ZExt32To64;
- def VEXTRACTHM : VXForm_RD5_XO5_RS5<1602, 9, (outs gprc:$rD), (ins vrrc:$vB),
- "vextracthm $rD, $vB", IIC_VecGeneral,
- [(set i32:$rD,
- (int_ppc_altivec_vextracthm v8i16:$vB))]>,
+ def VEXTRACTHM : VXForm_RD5_XO5_RS5<1602, 9, (outs gprc:$VD), (ins vrrc:$VB),
+ "vextracthm $VD, $VB", IIC_VecGeneral,
+ [(set i32:$VD,
+ (int_ppc_altivec_vextracthm v8i16:$VB))]>,
ZExt32To64;
- def VEXTRACTWM : VXForm_RD5_XO5_RS5<1602, 10, (outs gprc:$rD), (ins vrrc:$vB),
- "vextractwm $rD, $vB", IIC_VecGeneral,
- [(set i32:$rD,
- (int_ppc_altivec_vextractwm v4i32:$vB))]>,
+ def VEXTRACTWM : VXForm_RD5_XO5_RS5<1602, 10, (outs gprc:$VD), (ins vrrc:$VB),
+ "vextractwm $VD, $VB", IIC_VecGeneral,
+ [(set i32:$VD,
+ (int_ppc_altivec_vextractwm v4i32:$VB))]>,
ZExt32To64;
- def VEXTRACTDM : VXForm_RD5_XO5_RS5<1602, 11, (outs gprc:$rD), (ins vrrc:$vB),
- "vextractdm $rD, $vB", IIC_VecGeneral,
- [(set i32:$rD,
- (int_ppc_altivec_vextractdm v2i64:$vB))]>,
+ def VEXTRACTDM : VXForm_RD5_XO5_RS5<1602, 11, (outs gprc:$VD), (ins vrrc:$VB),
+ "vextractdm $VD, $VB", IIC_VecGeneral,
+ [(set i32:$VD,
+ (int_ppc_altivec_vextractdm v2i64:$VB))]>,
ZExt32To64;
- def VEXTRACTQM : VXForm_RD5_XO5_RS5<1602, 12, (outs gprc:$rD), (ins vrrc:$vB),
- "vextractqm $rD, $vB", IIC_VecGeneral,
- [(set i32:$rD,
- (int_ppc_altivec_vextractqm v1i128:$vB))]>;
- def VEXPANDBM : VXForm_RD5_XO5_RS5<1602, 0, (outs vrrc:$vD), (ins vrrc:$vB),
- "vexpandbm $vD, $vB", IIC_VecGeneral,
- [(set v16i8:$vD, (int_ppc_altivec_vexpandbm
- v16i8:$vB))]>;
- def VEXPANDHM : VXForm_RD5_XO5_RS5<1602, 1, (outs vrrc:$vD), (ins vrrc:$vB),
- "vexpandhm $vD, $vB", IIC_VecGeneral,
- [(set v8i16:$vD, (int_ppc_altivec_vexpandhm
- v8i16:$vB))]>;
- def VEXPANDWM : VXForm_RD5_XO5_RS5<1602, 2, (outs vrrc:$vD), (ins vrrc:$vB),
- "vexpandwm $vD, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (int_ppc_altivec_vexpandwm
- v4i32:$vB))]>;
- def VEXPANDDM : VXForm_RD5_XO5_RS5<1602, 3, (outs vrrc:$vD), (ins vrrc:$vB),
- "vexpanddm $vD, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (int_ppc_altivec_vexpanddm
- v2i64:$vB))]>;
- def VEXPANDQM : VXForm_RD5_XO5_RS5<1602, 4, (outs vrrc:$vD), (ins vrrc:$vB),
- "vexpandqm $vD, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (int_ppc_altivec_vexpandqm
- v1i128:$vB))]>;
- def MTVSRBM : VXForm_RD5_XO5_RS5<1602, 16, (outs vrrc:$vD), (ins g8rc:$rB),
- "mtvsrbm $vD, $rB", IIC_VecGeneral,
- [(set v16i8:$vD,
- (int_ppc_altivec_mtvsrbm i64:$rB))]>;
- def MTVSRHM : VXForm_RD5_XO5_RS5<1602, 17, (outs vrrc:$vD), (ins g8rc:$rB),
- "mtvsrhm $vD, $rB", IIC_VecGeneral,
- [(set v8i16:$vD,
- (int_ppc_altivec_mtvsrhm i64:$rB))]>;
- def MTVSRWM : VXForm_RD5_XO5_RS5<1602, 18, (outs vrrc:$vD), (ins g8rc:$rB),
- "mtvsrwm $vD, $rB", IIC_VecGeneral,
- [(set v4i32:$vD,
- (int_ppc_altivec_mtvsrwm i64:$rB))]>;
- def MTVSRDM : VXForm_RD5_XO5_RS5<1602, 19, (outs vrrc:$vD), (ins g8rc:$rB),
- "mtvsrdm $vD, $rB", IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_mtvsrdm i64:$rB))]>;
- def MTVSRQM : VXForm_RD5_XO5_RS5<1602, 20, (outs vrrc:$vD), (ins g8rc:$rB),
- "mtvsrqm $vD, $rB", IIC_VecGeneral,
- [(set v1i128:$vD,
- (int_ppc_altivec_mtvsrqm i64:$rB))]>;
- def MTVSRBMI : DXForm<4, 10, (outs vrrc:$vD), (ins u16imm64:$D),
- "mtvsrbmi $vD, $D", IIC_VecGeneral,
- [(set v16i8:$vD,
+ def VEXTRACTQM : VXForm_RD5_XO5_RS5<1602, 12, (outs gprc:$VD), (ins vrrc:$VB),
+ "vextractqm $VD, $VB", IIC_VecGeneral,
+ [(set i32:$VD,
+ (int_ppc_altivec_vextractqm v1i128:$VB))]>;
+ def VEXPANDBM : VXForm_RD5_XO5_RS5<1602, 0, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vexpandbm $VD, $VB", IIC_VecGeneral,
+ [(set v16i8:$VD, (int_ppc_altivec_vexpandbm
+ v16i8:$VB))]>;
+ def VEXPANDHM : VXForm_RD5_XO5_RS5<1602, 1, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vexpandhm $VD, $VB", IIC_VecGeneral,
+ [(set v8i16:$VD, (int_ppc_altivec_vexpandhm
+ v8i16:$VB))]>;
+ def VEXPANDWM : VXForm_RD5_XO5_RS5<1602, 2, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vexpandwm $VD, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (int_ppc_altivec_vexpandwm
+ v4i32:$VB))]>;
+ def VEXPANDDM : VXForm_RD5_XO5_RS5<1602, 3, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vexpanddm $VD, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (int_ppc_altivec_vexpanddm
+ v2i64:$VB))]>;
+ def VEXPANDQM : VXForm_RD5_XO5_RS5<1602, 4, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vexpandqm $VD, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (int_ppc_altivec_vexpandqm
+ v1i128:$VB))]>;
+ def MTVSRBM : VXForm_RD5_XO5_RS5<1602, 16, (outs vrrc:$VD), (ins g8rc:$VB),
+ "mtvsrbm $VD, $VB", IIC_VecGeneral,
+ [(set v16i8:$VD,
+ (int_ppc_altivec_mtvsrbm i64:$VB))]>;
+ def MTVSRHM : VXForm_RD5_XO5_RS5<1602, 17, (outs vrrc:$VD), (ins g8rc:$VB),
+ "mtvsrhm $VD, $VB", IIC_VecGeneral,
+ [(set v8i16:$VD,
+ (int_ppc_altivec_mtvsrhm i64:$VB))]>;
+ def MTVSRWM : VXForm_RD5_XO5_RS5<1602, 18, (outs vrrc:$VD), (ins g8rc:$VB),
+ "mtvsrwm $VD, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD,
+ (int_ppc_altivec_mtvsrwm i64:$VB))]>;
+ def MTVSRDM : VXForm_RD5_XO5_RS5<1602, 19, (outs vrrc:$VD), (ins g8rc:$VB),
+ "mtvsrdm $VD, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD,
+ (int_ppc_altivec_mtvsrdm i64:$VB))]>;
+ def MTVSRQM : VXForm_RD5_XO5_RS5<1602, 20, (outs vrrc:$VD), (ins g8rc:$VB),
+ "mtvsrqm $VD, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD,
+ (int_ppc_altivec_mtvsrqm i64:$VB))]>;
+ def MTVSRBMI : DXForm<4, 10, (outs vrrc:$RT), (ins u16imm64:$D),
+ "mtvsrbmi $RT, $D", IIC_VecGeneral,
+ [(set v16i8:$RT,
(int_ppc_altivec_mtvsrbm imm:$D))]>;
- def VCNTMBB : VXForm_RD5_MP_VB5<1602, 12, (outs g8rc:$rD),
- (ins vrrc:$vB, u1imm:$MP),
- "vcntmbb $rD, $vB, $MP", IIC_VecGeneral,
- [(set i64:$rD, (int_ppc_altivec_vcntmbb
- v16i8:$vB, timm:$MP))]>;
- def VCNTMBH : VXForm_RD5_MP_VB5<1602, 13, (outs g8rc:$rD),
- (ins vrrc:$vB, u1imm:$MP),
- "vcntmbh $rD, $vB, $MP", IIC_VecGeneral,
- [(set i64:$rD, (int_ppc_altivec_vcntmbh
- v8i16:$vB, timm:$MP))]>;
- def VCNTMBW : VXForm_RD5_MP_VB5<1602, 14, (outs g8rc:$rD),
- (ins vrrc:$vB, u1imm:$MP),
- "vcntmbw $rD, $vB, $MP", IIC_VecGeneral,
- [(set i64:$rD, (int_ppc_altivec_vcntmbw
- v4i32:$vB, timm:$MP))]>;
- def VCNTMBD : VXForm_RD5_MP_VB5<1602, 15, (outs g8rc:$rD),
- (ins vrrc:$vB, u1imm:$MP),
- "vcntmbd $rD, $vB, $MP", IIC_VecGeneral,
- [(set i64:$rD, (int_ppc_altivec_vcntmbd
- v2i64:$vB, timm:$MP))]>;
- def VEXTDUBVLX : VAForm_1a<24, (outs vrrc:$vD),
- (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
- "vextdubvlx $vD, $vA, $vB, $rC",
+ def VCNTMBB : VXForm_RD5_MP_VB5<1602, 12, (outs g8rc:$RD),
+ (ins vrrc:$VB, u1imm:$MP),
+ "vcntmbb $RD, $VB, $MP", IIC_VecGeneral,
+ [(set i64:$RD, (int_ppc_altivec_vcntmbb
+ v16i8:$VB, timm:$MP))]>;
+ def VCNTMBH : VXForm_RD5_MP_VB5<1602, 13, (outs g8rc:$RD),
+ (ins vrrc:$VB, u1imm:$MP),
+ "vcntmbh $RD, $VB, $MP", IIC_VecGeneral,
+ [(set i64:$RD, (int_ppc_altivec_vcntmbh
+ v8i16:$VB, timm:$MP))]>;
+ def VCNTMBW : VXForm_RD5_MP_VB5<1602, 14, (outs g8rc:$RD),
+ (ins vrrc:$VB, u1imm:$MP),
+ "vcntmbw $RD, $VB, $MP", IIC_VecGeneral,
+ [(set i64:$RD, (int_ppc_altivec_vcntmbw
+ v4i32:$VB, timm:$MP))]>;
+ def VCNTMBD : VXForm_RD5_MP_VB5<1602, 15, (outs g8rc:$RD),
+ (ins vrrc:$VB, u1imm:$MP),
+ "vcntmbd $RD, $VB, $MP", IIC_VecGeneral,
+ [(set i64:$RD, (int_ppc_altivec_vcntmbd
+ v2i64:$VB, timm:$MP))]>;
+ def VEXTDUBVLX : VAForm_1a<24, (outs vrrc:$RT),
+ (ins vrrc:$RA, vrrc:$RB, gprc:$RC),
+ "vextdubvlx $RT, $RA, $RB, $RC",
IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vextdubvlx v16i8:$vA,
- v16i8:$vB,
- i32:$rC))]>;
- def VEXTDUBVRX : VAForm_1a<25, (outs vrrc:$vD),
- (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
- "vextdubvrx $vD, $vA, $vB, $rC",
+ [(set v2i64:$RT,
+ (int_ppc_altivec_vextdubvlx v16i8:$RA,
+ v16i8:$RB,
+ i32:$RC))]>;
+ def VEXTDUBVRX : VAForm_1a<25, (outs vrrc:$RT),
+ (ins vrrc:$RA, vrrc:$RB, gprc:$RC),
+ "vextdubvrx $RT, $RA, $RB, $RC",
IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vextdubvrx v16i8:$vA,
- v16i8:$vB,
- i32:$rC))]>;
- def VEXTDUHVLX : VAForm_1a<26, (outs vrrc:$vD),
- (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
- "vextduhvlx $vD, $vA, $vB, $rC",
+ [(set v2i64:$RT,
+ (int_ppc_altivec_vextdubvrx v16i8:$RA,
+ v16i8:$RB,
+ i32:$RC))]>;
+ def VEXTDUHVLX : VAForm_1a<26, (outs vrrc:$RT),
+ (ins vrrc:$RA, vrrc:$RB, gprc:$RC),
+ "vextduhvlx $RT, $RA, $RB, $RC",
IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vextduhvlx v8i16:$vA,
- v8i16:$vB,
- i32:$rC))]>;
- def VEXTDUHVRX : VAForm_1a<27, (outs vrrc:$vD),
- (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
- "vextduhvrx $vD, $vA, $vB, $rC",
+ [(set v2i64:$RT,
+ (int_ppc_altivec_vextduhvlx v8i16:$RA,
+ v8i16:$RB,
+ i32:$RC))]>;
+ def VEXTDUHVRX : VAForm_1a<27, (outs vrrc:$RT),
+ (ins vrrc:$RA, vrrc:$RB, gprc:$RC),
+ "vextduhvrx $RT, $RA, $RB, $RC",
IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vextduhvrx v8i16:$vA,
- v8i16:$vB,
- i32:$rC))]>;
- def VEXTDUWVLX : VAForm_1a<28, (outs vrrc:$vD),
- (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
- "vextduwvlx $vD, $vA, $vB, $rC",
+ [(set v2i64:$RT,
+ (int_ppc_altivec_vextduhvrx v8i16:$RA,
+ v8i16:$RB,
+ i32:$RC))]>;
+ def VEXTDUWVLX : VAForm_1a<28, (outs vrrc:$RT),
+ (ins vrrc:$RA, vrrc:$RB, gprc:$RC),
+ "vextduwvlx $RT, $RA, $RB, $RC",
IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vextduwvlx v4i32:$vA,
- v4i32:$vB,
- i32:$rC))]>;
- def VEXTDUWVRX : VAForm_1a<29, (outs vrrc:$vD),
- (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
- "vextduwvrx $vD, $vA, $vB, $rC",
+ [(set v2i64:$RT,
+ (int_ppc_altivec_vextduwvlx v4i32:$RA,
+ v4i32:$RB,
+ i32:$RC))]>;
+ def VEXTDUWVRX : VAForm_1a<29, (outs vrrc:$RT),
+ (ins vrrc:$RA, vrrc:$RB, gprc:$RC),
+ "vextduwvrx $RT, $RA, $RB, $RC",
IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vextduwvrx v4i32:$vA,
- v4i32:$vB,
- i32:$rC))]>;
- def VEXTDDVLX : VAForm_1a<30, (outs vrrc:$vD),
- (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
- "vextddvlx $vD, $vA, $vB, $rC",
+ [(set v2i64:$RT,
+ (int_ppc_altivec_vextduwvrx v4i32:$RA,
+ v4i32:$RB,
+ i32:$RC))]>;
+ def VEXTDDVLX : VAForm_1a<30, (outs vrrc:$RT),
+ (ins vrrc:$RA, vrrc:$RB, gprc:$RC),
+ "vextddvlx $RT, $RA, $RB, $RC",
IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vextddvlx v2i64:$vA,
- v2i64:$vB,
- i32:$rC))]>;
- def VEXTDDVRX : VAForm_1a<31, (outs vrrc:$vD),
- (ins vrrc:$vA, vrrc:$vB, gprc:$rC),
- "vextddvrx $vD, $vA, $vB, $rC",
+ [(set v2i64:$RT,
+ (int_ppc_altivec_vextddvlx v2i64:$RA,
+ v2i64:$RB,
+ i32:$RC))]>;
+ def VEXTDDVRX : VAForm_1a<31, (outs vrrc:$RT),
+ (ins vrrc:$RA, vrrc:$RB, gprc:$RC),
+ "vextddvrx $RT, $RA, $RB, $RC",
IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vextddvrx v2i64:$vA,
- v2i64:$vB,
- i32:$rC))]>;
- def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vpdepd $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vpdepd v2i64:$vA, v2i64:$vB))]>;
- def VPEXTD : VXForm_1<1421, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vpextd $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vpextd v2i64:$vA, v2i64:$vB))]>;
- def PDEPD : XForm_6<31, 156, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "pdepd $rA, $rS, $rB", IIC_IntGeneral,
- [(set i64:$rA, (int_ppc_pdepd i64:$rS, i64:$rB))]>;
- def PEXTD : XForm_6<31, 188, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "pextd $rA, $rS, $rB", IIC_IntGeneral,
- [(set i64:$rA, (int_ppc_pextd i64:$rS, i64:$rB))]>;
- def VCFUGED : VXForm_1<1357, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vcfuged $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vcfuged v2i64:$vA, v2i64:$vB))]>;
- def VGNB : VXForm_RD5_N3_VB5<1228, (outs g8rc:$rD), (ins vrrc:$vB, u3imm:$N),
- "vgnb $rD, $vB, $N", IIC_VecGeneral,
- [(set i64:$rD,
- (int_ppc_altivec_vgnb v1i128:$vB, timm:$N))]>;
- def CFUGED : XForm_6<31, 220, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "cfuged $rA, $rS, $rB", IIC_IntGeneral,
- [(set i64:$rA, (int_ppc_cfuged i64:$rS, i64:$rB))]>;
+ [(set v2i64:$RT,
+ (int_ppc_altivec_vextddvrx v2i64:$RA,
+ v2i64:$RB,
+ i32:$RC))]>;
+ def VPDEPD : VXForm_1<1485, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vpdepd $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD,
+ (int_ppc_altivec_vpdepd v2i64:$VA, v2i64:$VB))]>;
+ def VPEXTD : VXForm_1<1421, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vpextd $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD,
+ (int_ppc_altivec_vpextd v2i64:$VA, v2i64:$VB))]>;
+ def PDEPD : XForm_6<31, 156, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "pdepd $RA, $RST, $RB", IIC_IntGeneral,
+ [(set i64:$RA, (int_ppc_pdepd i64:$RST, i64:$RB))]>;
+ def PEXTD : XForm_6<31, 188, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "pextd $RA, $RST, $RB", IIC_IntGeneral,
+ [(set i64:$RA, (int_ppc_pextd i64:$RST, i64:$RB))]>;
+ def VCFUGED : VXForm_1<1357, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vcfuged $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD,
+ (int_ppc_altivec_vcfuged v2i64:$VA, v2i64:$VB))]>;
+ def VGNB : VXForm_RD5_N3_VB5<1228, (outs g8rc:$RD), (ins vrrc:$VB, u3imm:$N),
+ "vgnb $RD, $VB, $N", IIC_VecGeneral,
+ [(set i64:$RD,
+ (int_ppc_altivec_vgnb v1i128:$VB, timm:$N))]>;
+ def CFUGED : XForm_6<31, 220, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "cfuged $RA, $RST, $RB", IIC_IntGeneral,
+ [(set i64:$RA, (int_ppc_cfuged i64:$RST, i64:$RB))]>;
def XXEVAL :
8RR_XX4Form_IMM8_XTAB6<34, 1, (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB,
vsrc:$XC, u8imm:$IMM),
"xxeval $XT, $XA, $XB, $XC, $IMM", IIC_VecGeneral,
[(set v2i64:$XT, (int_ppc_vsx_xxeval v2i64:$XA,
v2i64:$XB, v2i64:$XC, timm:$IMM))]>;
- def VCLZDM : VXForm_1<1924, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vclzdm $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vclzdm v2i64:$vA, v2i64:$vB))]>;
- def VCTZDM : VXForm_1<1988, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vctzdm $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD,
- (int_ppc_altivec_vctzdm v2i64:$vA, v2i64:$vB))]>;
- def CNTLZDM : XForm_6<31, 59, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "cntlzdm $rA, $rS, $rB", IIC_IntGeneral,
- [(set i64:$rA,
- (int_ppc_cntlzdm i64:$rS, i64:$rB))]>;
- def CNTTZDM : XForm_6<31, 571, (outs g8rc:$rA), (ins g8rc:$rS, g8rc:$rB),
- "cnttzdm $rA, $rS, $rB", IIC_IntGeneral,
- [(set i64:$rA,
- (int_ppc_cnttzdm i64:$rS, i64:$rB))]>;
+ def VCLZDM : VXForm_1<1924, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vclzdm $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD,
+ (int_ppc_altivec_vclzdm v2i64:$VA, v2i64:$VB))]>;
+ def VCTZDM : VXForm_1<1988, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vctzdm $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD,
+ (int_ppc_altivec_vctzdm v2i64:$VA, v2i64:$VB))]>;
+ def CNTLZDM : XForm_6<31, 59, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "cntlzdm $RA, $RST, $RB", IIC_IntGeneral,
+ [(set i64:$RA,
+ (int_ppc_cntlzdm i64:$RST, i64:$RB))]>;
+ def CNTTZDM : XForm_6<31, 571, (outs g8rc:$RA), (ins g8rc:$RST, g8rc:$RB),
+ "cnttzdm $RA, $RST, $RB", IIC_IntGeneral,
+ [(set i64:$RA,
+ (int_ppc_cnttzdm i64:$RST, i64:$RB))]>;
def XXGENPCVBM :
XForm_XT6_IMM5_VB5<60, 916, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM),
"xxgenpcvbm $XT, $VRB, $IMM", IIC_VecGeneral, []>;
@@ -1679,85 +1674,85 @@ let Predicates = [IsISA3_1] in {
def XXGENPCVDM :
XForm_XT6_IMM5_VB5<60, 949, (outs vsrc:$XT), (ins vrrc:$VRB, s5imm:$IMM),
"xxgenpcvdm $XT, $VRB, $IMM", IIC_VecGeneral, []>;
- def VCLRLB : VXForm_1<397, (outs vrrc:$vD), (ins vrrc:$vA, gprc:$rB),
- "vclrlb $vD, $vA, $rB", IIC_VecGeneral,
- [(set v16i8:$vD,
- (int_ppc_altivec_vclrlb v16i8:$vA, i32:$rB))]>;
- def VCLRRB : VXForm_1<461, (outs vrrc:$vD), (ins vrrc:$vA, gprc:$rB),
- "vclrrb $vD, $vA, $rB", IIC_VecGeneral,
- [(set v16i8:$vD,
- (int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>;
- def VMULLD : VXForm_1<457, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulld $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (mul v2i64:$vA, v2i64:$vB))]>;
- def VMULHSW : VXForm_1<905, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulhsw $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (mulhs v4i32:$vA, v4i32:$vB))]>;
- def VMULHUW : VXForm_1<649, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulhuw $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (mulhu v4i32:$vA, v4i32:$vB))]>;
- def VMULHSD : VXForm_1<969, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulhsd $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (mulhs v2i64:$vA, v2i64:$vB))]>;
- def VMULHUD : VXForm_1<713, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulhud $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (mulhu v2i64:$vA, v2i64:$vB))]>;
- def VMODSW : VXForm_1<1931, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmodsw $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (srem v4i32:$vA, v4i32:$vB))]>;
- def VMODUW : VXForm_1<1675, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmoduw $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (urem v4i32:$vA, v4i32:$vB))]>;
- def VMODSD : VXForm_1<1995, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmodsd $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (srem v2i64:$vA, v2i64:$vB))]>;
- def VMODUD : VXForm_1<1739, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmodud $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (urem v2i64:$vA, v2i64:$vB))]>;
- def VDIVSW : VXForm_1<395, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdivsw $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (sdiv v4i32:$vA, v4i32:$vB))]>;
- def VDIVUW : VXForm_1<139, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdivuw $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (udiv v4i32:$vA, v4i32:$vB))]>;
- def VDIVSD : VXForm_1<459, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdivsd $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (sdiv v2i64:$vA, v2i64:$vB))]>;
- def VDIVUD : VXForm_1<203, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdivud $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (udiv v2i64:$vA, v2i64:$vB))]>;
- def VDIVESW : VXForm_1<907, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdivesw $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (int_ppc_altivec_vdivesw v4i32:$vA,
- v4i32:$vB))]>;
- def VDIVEUW : VXForm_1<651, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdiveuw $vD, $vA, $vB", IIC_VecGeneral,
- [(set v4i32:$vD, (int_ppc_altivec_vdiveuw v4i32:$vA,
- v4i32:$vB))]>;
- def VDIVESD : VXForm_1<971, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdivesd $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (int_ppc_altivec_vdivesd v2i64:$vA,
- v2i64:$vB))]>;
- def VDIVEUD : VXForm_1<715, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdiveud $vD, $vA, $vB", IIC_VecGeneral,
- [(set v2i64:$vD, (int_ppc_altivec_vdiveud v2i64:$vA,
- v2i64:$vB))]>;
+ def VCLRLB : VXForm_1<397, (outs vrrc:$VD), (ins vrrc:$VA, gprc:$VB),
+ "vclrlb $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v16i8:$VD,
+ (int_ppc_altivec_vclrlb v16i8:$VA, i32:$VB))]>;
+ def VCLRRB : VXForm_1<461, (outs vrrc:$VD), (ins vrrc:$VA, gprc:$VB),
+ "vclrrb $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v16i8:$VD,
+ (int_ppc_altivec_vclrrb v16i8:$VA, i32:$VB))]>;
+ def VMULLD : VXForm_1<457, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmulld $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (mul v2i64:$VA, v2i64:$VB))]>;
+ def VMULHSW : VXForm_1<905, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmulhsw $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (mulhs v4i32:$VA, v4i32:$VB))]>;
+ def VMULHUW : VXForm_1<649, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmulhuw $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (mulhu v4i32:$VA, v4i32:$VB))]>;
+ def VMULHSD : VXForm_1<969, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmulhsd $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (mulhs v2i64:$VA, v2i64:$VB))]>;
+ def VMULHUD : VXForm_1<713, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmulhud $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (mulhu v2i64:$VA, v2i64:$VB))]>;
+ def VMODSW : VXForm_1<1931, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmodsw $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (srem v4i32:$VA, v4i32:$VB))]>;
+ def VMODUW : VXForm_1<1675, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmoduw $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (urem v4i32:$VA, v4i32:$VB))]>;
+ def VMODSD : VXForm_1<1995, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmodsd $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (srem v2i64:$VA, v2i64:$VB))]>;
+ def VMODUD : VXForm_1<1739, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmodud $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (urem v2i64:$VA, v2i64:$VB))]>;
+ def VDIVSW : VXForm_1<395, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vdivsw $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (sdiv v4i32:$VA, v4i32:$VB))]>;
+ def VDIVUW : VXForm_1<139, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vdivuw $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (udiv v4i32:$VA, v4i32:$VB))]>;
+ def VDIVSD : VXForm_1<459, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vdivsd $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (sdiv v2i64:$VA, v2i64:$VB))]>;
+ def VDIVUD : VXForm_1<203, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vdivud $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (udiv v2i64:$VA, v2i64:$VB))]>;
+ def VDIVESW : VXForm_1<907, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vdivesw $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (int_ppc_altivec_vdivesw v4i32:$VA,
+ v4i32:$VB))]>;
+ def VDIVEUW : VXForm_1<651, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vdiveuw $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v4i32:$VD, (int_ppc_altivec_vdiveuw v4i32:$VA,
+ v4i32:$VB))]>;
+ def VDIVESD : VXForm_1<971, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vdivesd $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (int_ppc_altivec_vdivesd v2i64:$VA,
+ v2i64:$VB))]>;
+ def VDIVEUD : VXForm_1<715, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vdiveud $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v2i64:$VD, (int_ppc_altivec_vdiveud v2i64:$VA,
+ v2i64:$VB))]>;
def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB),
"xvtlsbb $BF, $XB", IIC_VecGeneral, []>;
- def BRH : XForm_11<31, 219, (outs gprc:$RA), (ins gprc:$RS),
- "brh $RA, $RS", IIC_IntRotate, []>;
- def BRW : XForm_11<31, 155, (outs gprc:$RA), (ins gprc:$RS),
- "brw $RA, $RS", IIC_IntRotate,
- [(set i32:$RA, (bswap i32:$RS))]>;
+ def BRH : XForm_11<31, 219, (outs gprc:$RA), (ins gprc:$RST),
+ "brh $RA, $RST", IIC_IntRotate, []>;
+ def BRW : XForm_11<31, 155, (outs gprc:$RA), (ins gprc:$RST),
+ "brw $RA, $RST", IIC_IntRotate,
+ [(set i32:$RA, (bswap i32:$RST))]>;
let isCodeGenOnly = 1 in {
- def BRH8 : XForm_11<31, 219, (outs g8rc:$RA), (ins g8rc:$RS),
- "brh $RA, $RS", IIC_IntRotate, []>;
- def BRW8 : XForm_11<31, 155, (outs g8rc:$RA), (ins g8rc:$RS),
- "brw $RA, $RS", IIC_IntRotate, []>;
+ def BRH8 : XForm_11<31, 219, (outs g8rc:$RA), (ins g8rc:$RST),
+ "brh $RA, $RST", IIC_IntRotate, []>;
+ def BRW8 : XForm_11<31, 155, (outs g8rc:$RA), (ins g8rc:$RST),
+ "brw $RA, $RST", IIC_IntRotate, []>;
}
- def BRD : XForm_11<31, 187, (outs g8rc:$RA), (ins g8rc:$RS),
- "brd $RA, $RS", IIC_IntRotate,
- [(set i64:$RA, (bswap i64:$RS))]>;
+ def BRD : XForm_11<31, 187, (outs g8rc:$RA), (ins g8rc:$RST),
+ "brd $RA, $RST", IIC_IntRotate,
+ [(set i64:$RA, (bswap i64:$RST))]>;
// The XFormMemOp flag for the following 8 instructions is set on
// the instruction format.
@@ -1775,70 +1770,70 @@ let Predicates = [IsISA3_1] in {
def STXVRDX : X_XS6_RA5_RB5<31, 237, "stxvrdx", vsrc, []>;
}
- def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulesd $vD, $vA, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (int_ppc_altivec_vmulesd v2i64:$vA,
- v2i64:$vB))]>;
- def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmuleud $vD, $vA, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (int_ppc_altivec_vmuleud v2i64:$vA,
- v2i64:$vB))]>;
- def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmulosd $vD, $vA, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (int_ppc_altivec_vmulosd v2i64:$vA,
- v2i64:$vB))]>;
- def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmuloud $vD, $vA, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (int_ppc_altivec_vmuloud v2i64:$vA,
- v2i64:$vB))]>;
- def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
- "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral,
- [(set v1i128:$vD, (int_ppc_altivec_vmsumcud
- v2i64:$vA, v2i64:$vB, v1i128:$vC))]>;
- def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdivsq $vD, $vA, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (sdiv v1i128:$vA, v1i128:$vB))]>;
- def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdivuq $vD, $vA, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (udiv v1i128:$vA, v1i128:$vB))]>;
- def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdivesq $vD, $vA, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (int_ppc_altivec_vdivesq v1i128:$vA,
- v1i128:$vB))]>;
- def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vdiveuq $vD, $vA, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (int_ppc_altivec_vdiveuq v1i128:$vA,
- v1i128:$vB))]>;
- def VCMPEQUQ : VCMP <455, "vcmpequq $vD, $vA, $vB" , v1i128>;
- def VCMPGTSQ : VCMP <903, "vcmpgtsq $vD, $vA, $vB" , v1i128>;
- def VCMPGTUQ : VCMP <647, "vcmpgtuq $vD, $vA, $vB" , v1i128>;
- def VCMPEQUQ_rec : VCMP_rec <455, "vcmpequq. $vD, $vA, $vB" , v1i128>;
- def VCMPGTSQ_rec : VCMP_rec <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>;
- def VCMPGTUQ_rec : VCMP_rec <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>;
- def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmodsq $vD, $vA, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (srem v1i128:$vA, v1i128:$vB))]>;
- def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
- "vmoduq $vD, $vA, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (urem v1i128:$vA, v1i128:$vB))]>;
- def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB),
- "vextsd2q $vD, $vB", IIC_VecGeneral,
- [(set v1i128:$vD, (int_ppc_altivec_vextsd2q v2i64:$vB))]>;
- def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
- "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;
- def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
- "vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>;
+ def VMULESD : VXForm_1<968, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmulesd $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (int_ppc_altivec_vmulesd v2i64:$VA,
+ v2i64:$VB))]>;
+ def VMULEUD : VXForm_1<712, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmuleud $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (int_ppc_altivec_vmuleud v2i64:$VA,
+ v2i64:$VB))]>;
+ def VMULOSD : VXForm_1<456, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmulosd $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (int_ppc_altivec_vmulosd v2i64:$VA,
+ v2i64:$VB))]>;
+ def VMULOUD : VXForm_1<200, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmuloud $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (int_ppc_altivec_vmuloud v2i64:$VA,
+ v2i64:$VB))]>;
+ def VMSUMCUD : VAForm_1a<23, (outs vrrc:$RT), (ins vrrc:$RA, vrrc:$RB, vrrc:$RC),
+ "vmsumcud $RT, $RA, $RB, $RC", IIC_VecGeneral,
+ [(set v1i128:$RT, (int_ppc_altivec_vmsumcud
+ v2i64:$RA, v2i64:$RB, v1i128:$RC))]>;
+ def VDIVSQ : VXForm_1<267, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vdivsq $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (sdiv v1i128:$VA, v1i128:$VB))]>;
+ def VDIVUQ : VXForm_1<11, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vdivuq $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (udiv v1i128:$VA, v1i128:$VB))]>;
+ def VDIVESQ : VXForm_1<779, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vdivesq $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (int_ppc_altivec_vdivesq v1i128:$VA,
+ v1i128:$VB))]>;
+ def VDIVEUQ : VXForm_1<523, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vdiveuq $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (int_ppc_altivec_vdiveuq v1i128:$VA,
+ v1i128:$VB))]>;
+ def VCMPEQUQ : VCMP <455, "vcmpequq $VD, $VA, $VB" , v1i128>;
+ def VCMPGTSQ : VCMP <903, "vcmpgtsq $VD, $VA, $VB" , v1i128>;
+ def VCMPGTUQ : VCMP <647, "vcmpgtuq $VD, $VA, $VB" , v1i128>;
+ def VCMPEQUQ_rec : VCMP_rec <455, "vcmpequq. $VD, $VA, $VB" , v1i128>;
+ def VCMPGTSQ_rec : VCMP_rec <903, "vcmpgtsq. $VD, $VA, $VB" , v1i128>;
+ def VCMPGTUQ_rec : VCMP_rec <647, "vcmpgtuq. $VD, $VA, $VB" , v1i128>;
+ def VMODSQ : VXForm_1<1803, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmodsq $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (srem v1i128:$VA, v1i128:$VB))]>;
+ def VMODUQ : VXForm_1<1547, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB),
+ "vmoduq $VD, $VA, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (urem v1i128:$VA, v1i128:$VB))]>;
+ def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$VD), (ins vrrc:$VB),
+ "vextsd2q $VD, $VB", IIC_VecGeneral,
+ [(set v1i128:$VD, (int_ppc_altivec_vextsd2q v2i64:$VB))]>;
+ def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$VA, vrrc:$VB),
+ "vcmpuq $BF, $VA, $VB", IIC_VecGeneral, []>;
+ def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$VA, vrrc:$VB),
+ "vcmpsq $BF, $VA, $VB", IIC_VecGeneral, []>;
def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm",
- [(set v1i128:$vD,
- (int_ppc_altivec_vrlqnm v1i128:$vA,
- v1i128:$vB))]>;
- def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
- (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
- "vrlqmi $vD, $vA, $vB", IIC_VecFP,
- [(set v1i128:$vD,
- (int_ppc_altivec_vrlqmi v1i128:$vA, v1i128:$vB,
- v1i128:$vDi))]>,
- RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+ [(set v1i128:$VD,
+ (int_ppc_altivec_vrlqnm v1i128:$VA,
+ v1i128:$VB))]>;
+ def VRLQMI : VXForm_1<69, (outs vrrc:$VD),
+ (ins vrrc:$VA, vrrc:$VB, vrrc:$VDi),
+ "vrlqmi $VD, $VA, $VB", IIC_VecFP,
+ [(set v1i128:$VD,
+ (int_ppc_altivec_vrlqmi v1i128:$VA, v1i128:$VB,
+ v1i128:$VDi))]>,
+ RegConstraint<"$VDi = $VD">, NoEncode<"$VDi">;
def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;
def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;
def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;
@@ -1855,9 +1850,9 @@ let Predicates = [IsISA3_1, HasVSX] in {
def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>;
def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>;
def XSMAXCQP : X_VT5_VA5_VB5<63, 676, "xsmaxcqp",
- [(set f128:$vT, (PPCxsmaxc f128:$vA, f128:$vB))]>;
+ [(set f128:$RST, (PPCxsmaxc f128:$RA, f128:$RB))]>;
def XSMINCQP : X_VT5_VA5_VB5<63, 740, "xsmincqp",
- [(set f128:$vT, (PPCxsminc f128:$vA, f128:$vB))]>;
+ [(set f128:$RST, (PPCxsminc f128:$RA, f128:$RB))]>;
}
// Multiclass defining patterns for Set Boolean Extension Reverse Instructions.
@@ -2201,20 +2196,10 @@ def : Pat<(f64 nzFPImmAsi64:$A),
def : Pat<(store v2f64:$XS, PDForm:$dst), (PSTXV $XS, memri34:$dst)>;
// Cases For PPCstore_scal_int_from_vsr
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), PDForm:$dst, 8),
- (PSTXSD (XSCVDPUXDS f64:$src), PDForm:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), PDForm:$dst, 8),
- (PSTXSD (XSCVDPSXDS f64:$src), PDForm:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), PDForm:$dst, 8),
- (PSTXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
- PDForm:$dst)>;
- def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), PDForm:$dst, 8),
- (PSTXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
- PDForm:$dst)>;
+ def : Pat<(PPCstore_scal_int_from_vsr f64:$src, PDForm:$dst, 8),
+ (PSTXSD $src, PDForm:$dst)>;
+ def : Pat<(PPCstore_scal_int_from_vsr f128:$src, PDForm:$dst, 8),
+ (PSTXSD (COPY_TO_REGCLASS $src, VFRC), PDForm:$dst)>;
}
let Predicates = [PrefixInstrs] in {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/llvm/lib/Target/PowerPC/PPCInstrSPE.td
index 1e0cc7f348b6..5adfbad6ca11 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrSPE.td
@@ -116,22 +116,14 @@ class EVXForm_D<bits<11> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern> :
I<4, OOL, IOL, asmstr, itin> {
bits<5> RT;
- bits<21> D;
+ bits<5> RA;
+ bits<5> D;
let Pattern = pattern;
let Inst{6-10} = RT;
- let Inst{20} = D{0};
- let Inst{19} = D{1};
- let Inst{18} = D{2};
- let Inst{17} = D{3};
- let Inst{16} = D{4};
- let Inst{15} = D{5};
- let Inst{14} = D{6};
- let Inst{13} = D{7};
- let Inst{12} = D{8};
- let Inst{11} = D{9};
- let Inst{11-20} = D{0-9};
+ let Inst{11-15} = RA;
+ let Inst{16-20} = D;
let Inst{21-31} = xo;
}
@@ -460,53 +452,53 @@ def EVFSTSTLT : EVXForm_3<669, (outs crrc:$crD), (ins sperc:$RA, sperc:$RB
"evfststlt $crD, $RA, $RB", IIC_VecGeneral, []>;
}
-def EVLDD : EVXForm_D<769, (outs sperc:$RT), (ins spe8dis:$dst),
+def EVLDD : EVXForm_D<769, (outs sperc:$RT), (ins (spe8dis $D, $RA):$dst),
"evldd $RT, $dst", IIC_LdStLoad,
[(set f64:$RT, (load iaddr:$dst))]>;
-def EVLDDX : EVXForm_1<768, (outs sperc:$RT), (ins memrr:$src),
+def EVLDDX : EVXForm_1<768, (outs sperc:$RT), (ins (memrr $RA, $RB):$src),
"evlddx $RT, $src", IIC_LdStLoad,
[(set f64:$RT, (load xaddr:$src))]>;
-def EVLDH : EVXForm_D<773, (outs sperc:$RT), (ins spe8dis:$dst),
+def EVLDH : EVXForm_D<773, (outs sperc:$RT), (ins (spe8dis $D, $RA):$dst),
"evldh $RT, $dst", IIC_LdStLoad, []>;
-def EVLDHX : EVXForm_1<772, (outs sperc:$RT), (ins memrr:$src),
+def EVLDHX : EVXForm_1<772, (outs sperc:$RT), (ins (memrr $RA, $RB):$src),
"evldhx $RT, $src", IIC_LdStLoad, []>;
-def EVLDW : EVXForm_D<771, (outs sperc:$RT), (ins spe8dis:$dst),
+def EVLDW : EVXForm_D<771, (outs sperc:$RT), (ins (spe8dis $D, $RA):$dst),
"evldw $RT, $dst", IIC_LdStLoad,
[]>;
-def EVLDWX : EVXForm_1<770, (outs sperc:$RT), (ins memrr:$src),
+def EVLDWX : EVXForm_1<770, (outs sperc:$RT), (ins (memrr $RA, $RB):$src),
"evldwx $RT, $src", IIC_LdStLoad,
[]>;
-def EVLHHESPLAT : EVXForm_D<777, (outs sperc:$RT), (ins spe2dis:$dst),
+def EVLHHESPLAT : EVXForm_D<777, (outs sperc:$RT), (ins (spe2dis $D, $RA):$dst),
"evlhhesplat $RT, $dst", IIC_LdStLoad, []>;
-def EVLHHESPLATX : EVXForm_1<776, (outs sperc:$RT), (ins memrr:$src),
+def EVLHHESPLATX : EVXForm_1<776, (outs sperc:$RT), (ins (memrr $RA, $RB):$src),
"evlhhesplatx $RT, $src", IIC_LdStLoad, []>;
-def EVLHHOUSPLAT : EVXForm_D<781, (outs sperc:$RT), (ins spe2dis:$dst),
+def EVLHHOUSPLAT : EVXForm_D<781, (outs sperc:$RT), (ins (spe2dis $D, $RA):$dst),
"evlhhousplat $RT, $dst", IIC_LdStLoad, []>;
-def EVLHHOUSPLATX : EVXForm_1<780, (outs sperc:$RT), (ins memrr:$src),
+def EVLHHOUSPLATX : EVXForm_1<780, (outs sperc:$RT), (ins (memrr $RA, $RB):$src),
"evlhhousplatx $RT, $src", IIC_LdStLoad, []>;
-def EVLHHOSSPLAT : EVXForm_D<783, (outs sperc:$RT), (ins spe2dis:$dst),
+def EVLHHOSSPLAT : EVXForm_D<783, (outs sperc:$RT), (ins (spe2dis $D, $RA):$dst),
"evlhhossplat $RT, $dst", IIC_LdStLoad, []>;
-def EVLHHOSSPLATX : EVXForm_1<782, (outs sperc:$RT), (ins memrr:$src),
+def EVLHHOSSPLATX : EVXForm_1<782, (outs sperc:$RT), (ins (memrr $RA, $RB):$src),
"evlhhossplatx $RT, $src", IIC_LdStLoad, []>;
-def EVLWHE : EVXForm_D<785, (outs sperc:$RT), (ins spe4dis:$dst),
+def EVLWHE : EVXForm_D<785, (outs sperc:$RT), (ins (spe4dis $D, $RA):$dst),
"evlwhe $RT, $dst", IIC_LdStLoad, []>;
-def EVLWHEX : EVXForm_1<784, (outs sperc:$RT), (ins memrr:$src),
+def EVLWHEX : EVXForm_1<784, (outs sperc:$RT), (ins (memrr $RA, $RB):$src),
"evlwhex $RT, $src", IIC_LdStLoad, []>;
-def EVLWHOS : EVXForm_D<791, (outs sperc:$RT), (ins spe4dis:$dst),
+def EVLWHOS : EVXForm_D<791, (outs sperc:$RT), (ins (spe4dis $D, $RA):$dst),
"evlwhos $RT, $dst", IIC_LdStLoad, []>;
-def EVLWHOSX : EVXForm_1<790, (outs sperc:$RT), (ins memrr:$src),
+def EVLWHOSX : EVXForm_1<790, (outs sperc:$RT), (ins (memrr $RA, $RB):$src),
"evlwhosx $RT, $src", IIC_LdStLoad, []>;
-def EVLWHOU : EVXForm_D<789, (outs sperc:$RT), (ins spe4dis:$dst),
+def EVLWHOU : EVXForm_D<789, (outs sperc:$RT), (ins (spe4dis $D, $RA):$dst),
"evlwhou $RT, $dst", IIC_LdStLoad, []>;
-def EVLWHOUX : EVXForm_1<788, (outs sperc:$RT), (ins memrr:$src),
+def EVLWHOUX : EVXForm_1<788, (outs sperc:$RT), (ins (memrr $RA, $RB):$src),
"evlwhoux $RT, $src", IIC_LdStLoad, []>;
-def EVLWHSPLAT : EVXForm_D<797, (outs sperc:$RT), (ins spe4dis:$dst),
+def EVLWHSPLAT : EVXForm_D<797, (outs sperc:$RT), (ins (spe4dis $D, $RA):$dst),
"evlwhsplat $RT, $dst", IIC_LdStLoad, []>;
-def EVLWHSPLATX : EVXForm_1<796, (outs sperc:$RT), (ins memrr:$src),
+def EVLWHSPLATX : EVXForm_1<796, (outs sperc:$RT), (ins (memrr $RA, $RB):$src),
"evlwhsplatx $RT, $src", IIC_LdStLoad, []>;
-def EVLWWSPLAT : EVXForm_D<793, (outs sperc:$RT), (ins spe4dis:$dst),
+def EVLWWSPLAT : EVXForm_D<793, (outs sperc:$RT), (ins (spe4dis $D, $RA):$dst),
"evlwwsplat $RT, $dst", IIC_LdStLoad, []>;
-def EVLWWSPLATX : EVXForm_1<792, (outs sperc:$RT), (ins memrr:$src),
+def EVLWWSPLATX : EVXForm_1<792, (outs sperc:$RT), (ins (memrr $RA, $RB):$src),
"evlwwsplatx $RT, $src", IIC_LdStLoad, []>;
def EVMERGEHI : EVXForm_1<556, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
@@ -751,37 +743,37 @@ def EVSRWU : EVXForm_1<544, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB)
"evsrwu $RT, $RA, $RB", IIC_VecGeneral,
[]>;
-def EVSTDD : EVXForm_D<801, (outs), (ins sperc:$RT, spe8dis:$dst),
+def EVSTDD : EVXForm_D<801, (outs), (ins sperc:$RT, (spe8dis $D, $RA):$dst),
"evstdd $RT, $dst", IIC_LdStStore,
[(store f64:$RT, iaddr:$dst)]>;
-def EVSTDDX : EVXForm_1<800, (outs), (ins sperc:$RT, memrr:$dst),
+def EVSTDDX : EVXForm_1<800, (outs), (ins sperc:$RT, (memrr $RA, $RB):$dst),
"evstddx $RT, $dst", IIC_LdStStore,
[(store f64:$RT, xaddr:$dst)]>;
-def EVSTDH : EVXForm_D<805, (outs), (ins sperc:$RT, spe8dis:$dst),
+def EVSTDH : EVXForm_D<805, (outs), (ins sperc:$RT, (spe8dis $D, $RA):$dst),
"evstdh $RT, $dst", IIC_LdStStore, []>;
-def EVSTDHX : EVXForm_1<804, (outs), (ins sperc:$RT, memrr:$dst),
+def EVSTDHX : EVXForm_1<804, (outs), (ins sperc:$RT, (memrr $RA, $RB):$dst),
"evstdhx $RT, $dst", IIC_LdStStore, []>;
-def EVSTDW : EVXForm_D<803, (outs), (ins sperc:$RT, spe8dis:$dst),
+def EVSTDW : EVXForm_D<803, (outs), (ins sperc:$RT, (spe8dis $D, $RA):$dst),
"evstdw $RT, $dst", IIC_LdStStore,
[]>;
-def EVSTDWX : EVXForm_1<802, (outs), (ins sperc:$RT, memrr:$dst),
+def EVSTDWX : EVXForm_1<802, (outs), (ins sperc:$RT, (memrr $RA, $RB):$dst),
"evstdwx $RT, $dst", IIC_LdStStore,
[]>;
-def EVSTWHE : EVXForm_D<817, (outs), (ins sperc:$RT, spe4dis:$dst),
+def EVSTWHE : EVXForm_D<817, (outs), (ins sperc:$RT, (spe4dis $D, $RA):$dst),
"evstwhe $RT, $dst", IIC_LdStStore, []>;
-def EVSTWHEX : EVXForm_1<816, (outs), (ins sperc:$RT, memrr:$dst),
+def EVSTWHEX : EVXForm_1<816, (outs), (ins sperc:$RT, (memrr $RA, $RB):$dst),
"evstwhex $RT, $dst", IIC_LdStStore, []>;
-def EVSTWHO : EVXForm_D<821, (outs), (ins sperc:$RT, spe4dis:$dst),
+def EVSTWHO : EVXForm_D<821, (outs), (ins sperc:$RT, (spe4dis $D, $RA):$dst),
"evstwho $RT, $dst", IIC_LdStStore, []>;
-def EVSTWHOX : EVXForm_1<820, (outs), (ins sperc:$RT, memrr:$dst),
+def EVSTWHOX : EVXForm_1<820, (outs), (ins sperc:$RT, (memrr $RA, $RB):$dst),
"evstwhox $RT, $dst", IIC_LdStStore, []>;
-def EVSTWWE : EVXForm_D<825, (outs), (ins sperc:$RT, spe4dis:$dst),
+def EVSTWWE : EVXForm_D<825, (outs), (ins sperc:$RT, (spe4dis $D, $RA):$dst),
"evstwwe $RT, $dst", IIC_LdStStore, []>;
-def EVSTWWEX : EVXForm_1<824, (outs), (ins sperc:$RT, memrr:$dst),
+def EVSTWWEX : EVXForm_1<824, (outs), (ins sperc:$RT, (memrr $RA, $RB):$dst),
"evstwwex $RT, $dst", IIC_LdStStore, []>;
-def EVSTWWO : EVXForm_D<829, (outs), (ins sperc:$RT, spe4dis:$dst),
+def EVSTWWO : EVXForm_D<829, (outs), (ins sperc:$RT, (spe4dis $D, $RA):$dst),
"evstwwo $RT, $dst", IIC_LdStStore, []>;
-def EVSTWWOX : EVXForm_1<828, (outs), (ins sperc:$RT, memrr:$dst),
+def EVSTWWOX : EVXForm_1<828, (outs), (ins sperc:$RT, (memrr $RA, $RB):$dst),
"evstwwox $RT, $dst", IIC_LdStStore, []>;
def EVSUBFSSIAAW : EVXForm_2<1219, (outs sperc:$RT), (ins sperc:$RA),
@@ -803,18 +795,18 @@ def EVXOR : EVXForm_1<534, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB)
let isAsmParserOnly = 1 in {
// Identical to the integer Load/Stores, but to handle floats
-def SPELWZ : DForm_1<32, (outs spe4rc:$rD), (ins memri:$src),
- "lwz $rD, $src", IIC_LdStLoad,
- [(set f32:$rD, (load iaddr:$src))]>;
-def SPELWZX : XForm_1<31, 23, (outs spe4rc:$rD), (ins memrr:$src),
- "lwzx $rD, $src", IIC_LdStLoad,
- [(set f32:$rD, (load xaddr:$src))]>;
-def SPESTW : DForm_1<36, (outs), (ins spe4rc:$rS, memri:$src),
- "stw $rS, $src", IIC_LdStStore,
- [(store f32:$rS, iaddr:$src)]>;
-def SPESTWX : XForm_8<31, 151, (outs), (ins spe4rc:$rS, memrr:$dst),
- "stwx $rS, $dst", IIC_LdStStore,
- [(store f32:$rS, xaddr:$dst)]>;
+def SPELWZ : DForm_1<32, (outs spe4rc:$RST), (ins (memri $D, $RA):$addr),
+ "lwz $RST, $addr", IIC_LdStLoad,
+ [(set f32:$RST, (load iaddr:$addr))]>;
+def SPELWZX : XForm_1<31, 23, (outs spe4rc:$RST), (ins (memrr $RA, $RB):$addr),
+ "lwzx $RST, $addr", IIC_LdStLoad,
+ [(set f32:$RST, (load xaddr:$addr))]>;
+def SPESTW : DForm_1<36, (outs), (ins spe4rc:$RST, (memri $D, $RA):$addr),
+ "stw $RST, $addr", IIC_LdStStore,
+ [(store f32:$RST, iaddr:$addr)]>;
+def SPESTWX : XForm_8<31, 151, (outs), (ins spe4rc:$RST, (memrr $RA, $RB):$addr),
+ "stwx $RST, $addr", IIC_LdStStore,
+ [(store f32:$RST, xaddr:$addr)]>;
}
} // HasSPE
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 3c742075b111..0e5f6b773bb5 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -76,9 +76,6 @@ def SDT_PPCxxswapd : SDTypeProfile<1, 1, [
def SDTVecConv : SDTypeProfile<1, 2, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisPtrTy<2>
]>;
-def SDTVabsd : SDTypeProfile<1, 3, [
- SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32>
-]>;
def SDT_PPCld_vec_be : SDTypeProfile<1, 1, [
SDTCisVec<0>, SDTCisPtrTy<1>
]>;
@@ -105,7 +102,6 @@ def PPCmtvsrz : SDNode<"PPCISD::MTVSRZ", SDTUnaryOp, []>;
def PPCsvec2fp : SDNode<"PPCISD::SINT_VEC_TO_FP", SDTVecConv, []>;
def PPCuvec2fp: SDNode<"PPCISD::UINT_VEC_TO_FP", SDTVecConv, []>;
def PPCswapNoChain : SDNode<"PPCISD::SWAP_NO_CHAIN", SDT_PPCxxswapd>;
-def PPCvabsd : SDNode<"PPCISD::VABSD", SDTVabsd, []>;
def PPCfpexth : SDNode<"PPCISD::FP_EXTEND_HALF", SDT_PPCfpexth, []>;
def PPCldvsxlh : SDNode<"PPCISD::LD_VSX_LH", SDT_PPCldvsxlh,
@@ -126,12 +122,14 @@ def IsLittleEndian : Predicate<"Subtarget->isLittleEndian()">;
def IsBigEndian : Predicate<"!Subtarget->isLittleEndian()">;
def IsPPC64 : Predicate<"Subtarget->isPPC64()">;
def HasOnlySwappingMemOps : Predicate<"!Subtarget->hasP9Vector()">;
+def NoP8Vector : Predicate<"!Subtarget->hasP8Vector()">;
def HasP8Vector : Predicate<"Subtarget->hasP8Vector()">;
def HasDirectMove : Predicate<"Subtarget->hasDirectMove()">;
def NoP9Vector : Predicate<"!Subtarget->hasP9Vector()">;
def HasP9Vector : Predicate<"Subtarget->hasP9Vector()">;
def NoP9Altivec : Predicate<"!Subtarget->hasP9Altivec()">;
def NoP10Vector: Predicate<"!Subtarget->hasP10Vector()">;
+def HasP10Vector: Predicate<"Subtarget->hasP10Vector()">;
def PPCldsplatAlign16 : PatFrag<(ops node:$ptr), (PPCldsplat node:$ptr), [{
return cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
@@ -177,8 +175,8 @@ class XX3Form_2s<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
let Predicates = [HasVSX, HasP9Vector] in {
class X_VT5_XO5_VB5<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
list<dag> pattern>
- : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vrrc:$vB),
- !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+ : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$RST), (ins vrrc:$RB),
+ !strconcat(opc, " $RST, $RB"), IIC_VecFP, pattern>;
// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
@@ -189,14 +187,14 @@ class X_VT5_XO5_VB5_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
// So we use different operand class for VRB
class X_VT5_XO5_VB5_TyVB<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
RegisterOperand vbtype, list<dag> pattern>
- : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB),
- !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+ : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$RST), (ins vbtype:$RB),
+ !strconcat(opc, " $RST, $RB"), IIC_VecFP, pattern>;
// [PO VRT XO VRB XO /]
class X_VT5_XO5_VB5_VSFR<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
list<dag> pattern>
- : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vfrc:$vT), (ins vrrc:$vB),
- !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
+ : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vfrc:$RST), (ins vrrc:$RB),
+ !strconcat(opc, " $RST, $RB"), IIC_VecFP, pattern>;
// [PO VRT XO VRB XO RO], Round to Odd version of [PO VRT XO VRB XO /]
class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc,
@@ -206,8 +204,8 @@ class X_VT5_XO5_VB5_VSFR_Ro<bits<6> opcode, bits<5> xo2, bits<10> xo, string opc
// [PO T XO B XO BX /]
class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
list<dag> pattern>
- : XX2_RD5_XO5_RS6<opcode, xo2, xo, (outs g8rc:$rT), (ins vsfrc:$XB),
- !strconcat(opc, " $rT, $XB"), IIC_VecFP, pattern>;
+ : XX2_RD5_XO5_RS6<opcode, xo2, xo, (outs g8rc:$RT), (ins vsfrc:$XB),
+ !strconcat(opc, " $RT, $XB"), IIC_VecFP, pattern>;
// [PO T XO B XO BX TX]
class XX2_XT6_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
@@ -225,8 +223,8 @@ class XX3_XT5_XA5_XB5<bits<6> opcode, bits<8> xo, string opc,
// [PO VRT VRA VRB XO /]
class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
list<dag> pattern>
- : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>;
+ : XForm_1<opcode, xo, (outs vrrc:$RST), (ins vrrc:$RA, vrrc:$RB),
+ !strconcat(opc, " $RST, $RA, $RB"), IIC_VecFP, pattern>;
// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
@@ -236,9 +234,9 @@ class X_VT5_VA5_VB5_Ro<bits<6> opcode, bits<10> xo, string opc,
// [PO VRT VRA VRB XO /]
class X_VT5_VA5_VB5_FMA<bits<6> opcode, bits<10> xo, string opc,
list<dag> pattern>
- : XForm_1<opcode, xo, (outs vrrc:$vT), (ins vrrc:$vTi, vrrc:$vA, vrrc:$vB),
- !strconcat(opc, " $vT, $vA, $vB"), IIC_VecFP, pattern>,
- RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">;
+ : XForm_1<opcode, xo, (outs vrrc:$RST), (ins vrrc:$RSTi, vrrc:$RA, vrrc:$RB),
+ !strconcat(opc, " $RST, $RA, $RB"), IIC_VecFP, pattern>,
+ RegConstraint<"$RSTi = $RST">, NoEncode<"$RSTi">;
// [PO VRT VRA VRB XO RO], Round to Odd version of [PO VRT VRA VRB XO /]
class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc,
@@ -248,16 +246,16 @@ class X_VT5_VA5_VB5_FMA_Ro<bits<6> opcode, bits<10> xo, string opc,
class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc,
list<dag> pattern>
: Z23Form_8<opcode, xo,
- (outs vrrc:$vT), (ins u1imm:$r, vrrc:$vB, u2imm:$rmc),
- !strconcat(opc, " $r, $vT, $vB, $rmc"), IIC_VecFP, pattern> {
+ (outs vrrc:$VRT), (ins u1imm:$R, vrrc:$VRB, u2imm:$idx),
+ !strconcat(opc, " $R, $VRT, $VRB, $idx"), IIC_VecFP, pattern> {
let RC = ex;
}
// [PO BF // VRA VRB XO /]
class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
list<dag> pattern>
- : XForm_17<opcode, xo, (outs crrc:$crD), (ins vrrc:$VA, vrrc:$VB),
- !strconcat(opc, " $crD, $VA, $VB"), IIC_FPCompare> {
+ : XForm_17<opcode, xo, (outs crrc:$BF), (ins vrrc:$RA, vrrc:$RB),
+ !strconcat(opc, " $BF, $RA, $RB"), IIC_FPCompare> {
let Pattern = pattern;
}
@@ -265,14 +263,14 @@ class X_BF3_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
// "out" and "in" dag
class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
RegisterOperand vtype, list<dag> pattern>
- : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins memrr:$src),
- !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
+ : XX1Form_memOp<opcode, xo, (outs vtype:$XT), (ins (memrr $RA, $RB):$addr),
+ !strconcat(opc, " $XT, $addr"), IIC_LdStLFD, pattern>;
// [PO S RA RB XO SX]
class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
RegisterOperand vtype, list<dag> pattern>
- : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst),
- !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
+ : XX1Form_memOp<opcode, xo, (outs), (ins vtype:$XT, (memrr $RA, $RB):$addr),
+ !strconcat(opc, " $XT, $addr"), IIC_LdStSTFD, pattern>;
} // Predicates = HasP9Vector
} // AddedComplexity = 400, hasSideEffects = 0
@@ -292,30 +290,30 @@ let hasSideEffects = 0 in {
let mayLoad = 1, mayStore = 0 in {
let CodeSize = 3 in
def LXSDX : XX1Form_memOp<31, 588,
- (outs vsfrc:$XT), (ins memrr:$src),
- "lxsdx $XT, $src", IIC_LdStLFD,
+ (outs vsfrc:$XT), (ins (memrr $RA, $RB):$addr),
+ "lxsdx $XT, $addr", IIC_LdStLFD,
[]>;
// Pseudo instruction XFLOADf64 will be expanded to LXSDX or LFDX later
let CodeSize = 3 in
- def XFLOADf64 : PseudoXFormMemOp<(outs vsfrc:$XT), (ins memrr:$src),
+ def XFLOADf64 : PseudoXFormMemOp<(outs vsfrc:$XT), (ins (memrr $RA, $RB):$addr),
"#XFLOADf64",
- [(set f64:$XT, (load XForm:$src))]>;
+ [(set f64:$XT, (load XForm:$addr))]>;
let Predicates = [HasVSX, HasOnlySwappingMemOps] in
def LXVD2X : XX1Form_memOp<31, 844,
- (outs vsrc:$XT), (ins memrr:$src),
- "lxvd2x $XT, $src", IIC_LdStLFD,
+ (outs vsrc:$XT), (ins (memrr $RA, $RB):$addr),
+ "lxvd2x $XT, $addr", IIC_LdStLFD,
[]>;
def LXVDSX : XX1Form_memOp<31, 332,
- (outs vsrc:$XT), (ins memrr:$src),
- "lxvdsx $XT, $src", IIC_LdStLFD, []>;
+ (outs vsrc:$XT), (ins (memrr $RA, $RB):$addr),
+ "lxvdsx $XT, $addr", IIC_LdStLFD, []>;
let Predicates = [HasVSX, HasOnlySwappingMemOps] in
def LXVW4X : XX1Form_memOp<31, 780,
- (outs vsrc:$XT), (ins memrr:$src),
- "lxvw4x $XT, $src", IIC_LdStLFD,
+ (outs vsrc:$XT), (ins (memrr $RA, $RB):$addr),
+ "lxvw4x $XT, $addr", IIC_LdStLFD,
[]>;
} // mayLoad
@@ -323,27 +321,27 @@ let hasSideEffects = 0 in {
let mayStore = 1, mayLoad = 0 in {
let CodeSize = 3 in
def STXSDX : XX1Form_memOp<31, 716,
- (outs), (ins vsfrc:$XT, memrr:$dst),
- "stxsdx $XT, $dst", IIC_LdStSTFD,
+ (outs), (ins vsfrc:$XT, (memrr $RA, $RB):$addr),
+ "stxsdx $XT, $addr", IIC_LdStSTFD,
[]>;
// Pseudo instruction XFSTOREf64 will be expanded to STXSDX or STFDX later
let CodeSize = 3 in
- def XFSTOREf64 : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, memrr:$dst),
+ def XFSTOREf64 : PseudoXFormMemOp<(outs), (ins vsfrc:$XT, (memrr $RA, $RB):$addr),
"#XFSTOREf64",
- [(store f64:$XT, XForm:$dst)]>;
+ [(store f64:$XT, XForm:$addr)]>;
let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
// The behaviour of this instruction is endianness-specific so we provide no
// pattern to match it without considering endianness.
def STXVD2X : XX1Form_memOp<31, 972,
- (outs), (ins vsrc:$XT, memrr:$dst),
- "stxvd2x $XT, $dst", IIC_LdStSTFD,
+ (outs), (ins vsrc:$XT, (memrr $RA, $RB):$addr),
+ "stxvd2x $XT, $addr", IIC_LdStSTFD,
[]>;
def STXVW4X : XX1Form_memOp<31, 908,
- (outs), (ins vsrc:$XT, memrr:$dst),
- "stxvw4x $XT, $dst", IIC_LdStSTFD,
+ (outs), (ins vsrc:$XT, (memrr $RA, $RB):$addr),
+ "stxvw4x $XT, $addr", IIC_LdStSTFD,
[]>;
}
} // mayStore
@@ -611,27 +609,27 @@ let hasSideEffects = 0 in {
let mayRaiseFPException = 0 in {
def XSTDIVDP : XX3Form_1<60, 61,
- (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
- "xstdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ (outs crrc:$CR), (ins vsfrc:$XA, vsfrc:$XB),
+ "xstdivdp $CR, $XA, $XB", IIC_FPCompare, []>;
def XSTSQRTDP : XX2Form_1<60, 106,
- (outs crrc:$crD), (ins vsfrc:$XB),
- "xstsqrtdp $crD, $XB", IIC_FPCompare,
- [(set i32:$crD, (PPCftsqrt f64:$XB))]>;
+ (outs crrc:$CR), (ins vsfrc:$XB),
+ "xstsqrtdp $CR, $XB", IIC_FPCompare,
+ [(set i32:$CR, (PPCftsqrt f64:$XB))]>;
def XVTDIVDP : XX3Form_1<60, 125,
- (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
- "xvtdivdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ (outs crrc:$CR), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivdp $CR, $XA, $XB", IIC_FPCompare, []>;
def XVTDIVSP : XX3Form_1<60, 93,
- (outs crrc:$crD), (ins vsrc:$XA, vsrc:$XB),
- "xvtdivsp $crD, $XA, $XB", IIC_FPCompare, []>;
+ (outs crrc:$CR), (ins vsrc:$XA, vsrc:$XB),
+ "xvtdivsp $CR, $XA, $XB", IIC_FPCompare, []>;
def XVTSQRTDP : XX2Form_1<60, 234,
- (outs crrc:$crD), (ins vsrc:$XB),
- "xvtsqrtdp $crD, $XB", IIC_FPCompare,
- [(set i32:$crD, (PPCftsqrt v2f64:$XB))]>;
+ (outs crrc:$CR), (ins vsrc:$XB),
+ "xvtsqrtdp $CR, $XB", IIC_FPCompare,
+ [(set i32:$CR, (PPCftsqrt v2f64:$XB))]>;
def XVTSQRTSP : XX2Form_1<60, 170,
- (outs crrc:$crD), (ins vsrc:$XB),
- "xvtsqrtsp $crD, $XB", IIC_FPCompare,
- [(set i32:$crD, (PPCftsqrt v4f32:$XB))]>;
+ (outs crrc:$CR), (ins vsrc:$XB),
+ "xvtsqrtsp $CR, $XB", IIC_FPCompare,
+ [(set i32:$CR, (PPCftsqrt v4f32:$XB))]>;
}
def XVDIVDP : XX3Form<60, 120,
@@ -672,11 +670,11 @@ let hasSideEffects = 0 in {
// Compare Instructions
def XSCMPODP : XX3Form_1<60, 43,
- (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
- "xscmpodp $crD, $XA, $XB", IIC_FPCompare, []>;
+ (outs crrc:$CR), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpodp $CR, $XA, $XB", IIC_FPCompare, []>;
def XSCMPUDP : XX3Form_1<60, 35,
- (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
- "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
+ (outs crrc:$CR), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpudp $CR, $XA, $XB", IIC_FPCompare, []>;
defm XVCMPEQDP : XX3Form_Rcr<60, 99,
"xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
@@ -1043,10 +1041,10 @@ let hasSideEffects = 0 in {
"xxmrglw $XT, $XA, $XB", IIC_VecPerm, []>;
def XXPERMDI : XX3Form_2<60, 10,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
- "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$D),
+ "xxpermdi $XT, $XA, $XB, $D", IIC_VecPerm,
[(set v2i64:$XT, (PPCxxpermdi v2i64:$XA, v2i64:$XB,
- imm32SExt16:$DM))]>;
+ imm32SExt16:$D))]>;
let isCodeGenOnly = 1 in
// Note that the input register class for `$XA` of XXPERMDIs is `vsfrc` which
// is not the same with the input register class(`vsrc`) of XXPERMDI instruction.
@@ -1056,32 +1054,32 @@ let hasSideEffects = 0 in {
// 2: With `vsfrc` register class, in the final assembly, float registers
// like `f0` are used instead of vector scalar register like `vs0`. This
// helps readability.
- def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM),
- "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>;
+ def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$D),
+ "xxpermdi $XT, $XA, $XA, $D", IIC_VecPerm, []>;
def XXSEL : XX4Form<60, 3,
(outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
"xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
def XXSLDWI : XX3Form_2<60, 2,
- (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$SHW),
- "xxsldwi $XT, $XA, $XB, $SHW", IIC_VecPerm,
+ (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$D),
+ "xxsldwi $XT, $XA, $XB, $D", IIC_VecPerm,
[(set v4i32:$XT, (PPCvecshl v4i32:$XA, v4i32:$XB,
- imm32SExt16:$SHW))]>;
+ imm32SExt16:$D))]>;
let isCodeGenOnly = 1 in
def XXSLDWIs : XX3Form_2s<60, 2,
- (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$SHW),
- "xxsldwi $XT, $XA, $XA, $SHW", IIC_VecPerm, []>;
+ (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$D),
+ "xxsldwi $XT, $XA, $XA, $D", IIC_VecPerm, []>;
def XXSPLTW : XX2Form_2<60, 164,
- (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
- "xxspltw $XT, $XB, $UIM", IIC_VecPerm,
+ (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$D),
+ "xxspltw $XT, $XB, $D", IIC_VecPerm,
[(set v4i32:$XT,
- (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>;
+ (PPCxxsplt v4i32:$XB, imm32SExt16:$D))]>;
let isCodeGenOnly = 1 in
def XXSPLTWs : XX2Form_2<60, 164,
- (outs vsrc:$XT), (ins vsfrc:$XB, u2imm:$UIM),
- "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
+ (outs vsrc:$XT), (ins vsfrc:$XB, u2imm:$D),
+ "xxspltw $XT, $XB, $D", IIC_VecPerm, []>;
// The following VSX instructions were introduced in Power ISA 2.07
let Predicates = [HasVSX, HasP8Vector] in {
@@ -1111,12 +1109,12 @@ let Predicates = [HasVSX, HasP8Vector] in {
// VSX scalar loads introduced in ISA 2.07
let mayLoad = 1, mayStore = 0 in {
let CodeSize = 3 in
- def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins memrr:$src),
- "lxsspx $XT, $src", IIC_LdStLFD, []>;
- def LXSIWAX : XX1Form_memOp<31, 76, (outs vsfrc:$XT), (ins memrr:$src),
- "lxsiwax $XT, $src", IIC_LdStLFD, []>;
- def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
- "lxsiwzx $XT, $src", IIC_LdStLFD, []>;
+ def LXSSPX : XX1Form_memOp<31, 524, (outs vssrc:$XT), (ins (memrr $RA, $RB):$addr),
+ "lxsspx $XT, $addr", IIC_LdStLFD, []>;
+ def LXSIWAX : XX1Form_memOp<31, 76, (outs vsfrc:$XT), (ins (memrr $RA, $RB):$addr),
+ "lxsiwax $XT, $addr", IIC_LdStLFD, []>;
+ def LXSIWZX : XX1Form_memOp<31, 12, (outs vsfrc:$XT), (ins (memrr $RA, $RB):$addr),
+ "lxsiwzx $XT, $addr", IIC_LdStLFD, []>;
// Pseudo instruction XFLOADf32 will be expanded to LXSSPX or LFSX later
let CodeSize = 3 in
@@ -1136,10 +1134,10 @@ let Predicates = [HasVSX, HasP8Vector] in {
// VSX scalar stores introduced in ISA 2.07
let mayStore = 1, mayLoad = 0 in {
let CodeSize = 3 in
- def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
- "stxsspx $XT, $dst", IIC_LdStSTFD, []>;
- def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
- "stxsiwx $XT, $dst", IIC_LdStSTFD, []>;
+ def STXSSPX : XX1Form_memOp<31, 652, (outs), (ins vssrc:$XT, (memrr $RA, $RB):$addr),
+ "stxsspx $XT, $addr", IIC_LdStSTFD, []>;
+ def STXSIWX : XX1Form_memOp<31, 140, (outs), (ins vsfrc:$XT, (memrr $RA, $RB):$addr),
+ "stxsiwx $XT, $addr", IIC_LdStSTFD, []>;
// Pseudo instruction XFSTOREf32 will be expanded to STXSSPX or STFSX later
let CodeSize = 3 in
@@ -1292,64 +1290,64 @@ let Predicates = [HasVSX, HasP8Vector] in {
let Predicates = [HasVSX, HasDirectMove] in {
// VSX direct move instructions
- def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
- "mfvsrd $rA, $XT", IIC_VecGeneral,
- [(set i64:$rA, (PPCmfvsr f64:$XT))]>,
+ def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$RA), (ins vsfrc:$XT),
+ "mfvsrd $RA, $XT", IIC_VecGeneral,
+ [(set i64:$RA, (PPCmfvsr f64:$XT))]>,
Requires<[In64BitMode]>;
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
let isCodeGenOnly = 1, hasSideEffects = 1 in
- def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsrc:$XT),
- "mfvsrd $rA, $XT", IIC_VecGeneral,
+ def MFVRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$RA), (ins vsrc:$XT),
+ "mfvsrd $RA, $XT", IIC_VecGeneral,
[]>,
Requires<[In64BitMode]>;
- def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsfrc:$XT),
- "mfvsrwz $rA, $XT", IIC_VecGeneral,
- [(set i32:$rA, (PPCmfvsr f64:$XT))]>, ZExt32To64;
+ def MFVSRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$RA), (ins vsfrc:$XT),
+ "mfvsrwz $RA, $XT", IIC_VecGeneral,
+ [(set i32:$RA, (PPCmfvsr f64:$XT))]>, ZExt32To64;
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
let isCodeGenOnly = 1, hasSideEffects = 1 in
- def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$rA), (ins vsrc:$XT),
- "mfvsrwz $rA, $XT", IIC_VecGeneral,
+ def MFVRWZ : XX1_RS6_RD5_XO<31, 115, (outs gprc:$RA), (ins vsrc:$XT),
+ "mfvsrwz $RA, $XT", IIC_VecGeneral,
[]>;
- def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$rA),
- "mtvsrd $XT, $rA", IIC_VecGeneral,
- [(set f64:$XT, (PPCmtvsra i64:$rA))]>,
+ def MTVSRD : XX1_RS6_RD5_XO<31, 179, (outs vsfrc:$XT), (ins g8rc:$RA),
+ "mtvsrd $XT, $RA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsra i64:$RA))]>,
Requires<[In64BitMode]>;
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
let isCodeGenOnly = 1, hasSideEffects = 1 in
- def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$rA),
- "mtvsrd $XT, $rA", IIC_VecGeneral,
+ def MTVRD : XX1_RS6_RD5_XO<31, 179, (outs vsrc:$XT), (ins g8rc:$RA),
+ "mtvsrd $XT, $RA", IIC_VecGeneral,
[]>,
Requires<[In64BitMode]>;
- def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$rA),
- "mtvsrwa $XT, $rA", IIC_VecGeneral,
- [(set f64:$XT, (PPCmtvsra i32:$rA))]>;
+ def MTVSRWA : XX1_RS6_RD5_XO<31, 211, (outs vsfrc:$XT), (ins gprc:$RA),
+ "mtvsrwa $XT, $RA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsra i32:$RA))]>;
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
let isCodeGenOnly = 1, hasSideEffects = 1 in
- def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$rA),
- "mtvsrwa $XT, $rA", IIC_VecGeneral,
+ def MTVRWA : XX1_RS6_RD5_XO<31, 211, (outs vsrc:$XT), (ins gprc:$RA),
+ "mtvsrwa $XT, $RA", IIC_VecGeneral,
[]>;
- def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$rA),
- "mtvsrwz $XT, $rA", IIC_VecGeneral,
- [(set f64:$XT, (PPCmtvsrz i32:$rA))]>;
+ def MTVSRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsfrc:$XT), (ins gprc:$RA),
+ "mtvsrwz $XT, $RA", IIC_VecGeneral,
+ [(set f64:$XT, (PPCmtvsrz i32:$RA))]>;
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
let isCodeGenOnly = 1, hasSideEffects = 1 in
- def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$rA),
- "mtvsrwz $XT, $rA", IIC_VecGeneral,
+ def MTVRWZ : XX1_RS6_RD5_XO<31, 243, (outs vsrc:$XT), (ins gprc:$RA),
+ "mtvsrwz $XT, $RA", IIC_VecGeneral,
[]>;
} // HasDirectMove
} // HasVSX, HasP8Vector
let Predicates = [HasVSX, IsISA3_0, HasDirectMove] in {
-def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA),
- "mtvsrws $XT, $rA", IIC_VecGeneral, []>;
+def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$RA),
+ "mtvsrws $XT, $RA", IIC_VecGeneral, []>;
-def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$rA, g8rc:$rB),
- "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral,
+def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc_nox0:$RA, g8rc:$RB),
+ "mtvsrdd $XT, $RA, $RB", IIC_VecGeneral,
[]>, Requires<[In64BitMode]>;
-def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT),
- "mfvsrld $rA, $XT", IIC_VecGeneral,
+def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$RA), (ins vsrc:$XT),
+ "mfvsrld $RA, $XT", IIC_VecGeneral,
[]>, Requires<[In64BitMode]>;
} // HasVSX, IsISA3_0, HasDirectMove
@@ -1358,16 +1356,16 @@ let Predicates = [HasVSX, HasP9Vector] in {
// Quad-Precision Scalar Move Instructions:
// Copy Sign
def XSCPSGNQP : X_VT5_VA5_VB5<63, 100, "xscpsgnqp",
- [(set f128:$vT,
- (fcopysign f128:$vB, f128:$vA))]>;
+ [(set f128:$RST,
+ (fcopysign f128:$RB, f128:$RA))]>;
// Absolute/Negative-Absolute/Negate
def XSABSQP : X_VT5_XO5_VB5<63, 0, 804, "xsabsqp",
- [(set f128:$vT, (fabs f128:$vB))]>;
+ [(set f128:$RST, (fabs f128:$RB))]>;
def XSNABSQP : X_VT5_XO5_VB5<63, 8, 804, "xsnabsqp",
- [(set f128:$vT, (fneg (fabs f128:$vB)))]>;
+ [(set f128:$RST, (fneg (fabs f128:$RB)))]>;
def XSNEGQP : X_VT5_XO5_VB5<63, 16, 804, "xsnegqp",
- [(set f128:$vT, (fneg f128:$vB))]>;
+ [(set f128:$RST, (fneg f128:$RB))]>;
//===--------------------------------------------------------------------===//
// Quad-Precision Scalar Floating-Point Arithmetic Instructions:
@@ -1376,74 +1374,74 @@ let Predicates = [HasVSX, HasP9Vector] in {
let mayRaiseFPException = 1 in {
let isCommutable = 1 in {
def XSADDQP : X_VT5_VA5_VB5 <63, 4, "xsaddqp",
- [(set f128:$vT, (any_fadd f128:$vA, f128:$vB))]>;
+ [(set f128:$RST, (any_fadd f128:$RA, f128:$RB))]>;
def XSMULQP : X_VT5_VA5_VB5 <63, 36, "xsmulqp",
- [(set f128:$vT, (any_fmul f128:$vA, f128:$vB))]>;
+ [(set f128:$RST, (any_fmul f128:$RA, f128:$RB))]>;
}
def XSSUBQP : X_VT5_VA5_VB5 <63, 516, "xssubqp" ,
- [(set f128:$vT, (any_fsub f128:$vA, f128:$vB))]>;
+ [(set f128:$RST, (any_fsub f128:$RA, f128:$RB))]>;
def XSDIVQP : X_VT5_VA5_VB5 <63, 548, "xsdivqp",
- [(set f128:$vT, (any_fdiv f128:$vA, f128:$vB))]>;
+ [(set f128:$RST, (any_fdiv f128:$RA, f128:$RB))]>;
// Square-Root
def XSSQRTQP : X_VT5_XO5_VB5 <63, 27, 804, "xssqrtqp",
- [(set f128:$vT, (any_fsqrt f128:$vB))]>;
+ [(set f128:$RST, (any_fsqrt f128:$RB))]>;
// (Negative) Multiply-{Add/Subtract}
def XSMADDQP : X_VT5_VA5_VB5_FMA <63, 388, "xsmaddqp",
- [(set f128:$vT,
- (any_fma f128:$vA, f128:$vB, f128:$vTi))]>;
+ [(set f128:$RST,
+ (any_fma f128:$RA, f128:$RB, f128:$RSTi))]>;
def XSMSUBQP : X_VT5_VA5_VB5_FMA <63, 420, "xsmsubqp" ,
- [(set f128:$vT,
- (any_fma f128:$vA, f128:$vB,
- (fneg f128:$vTi)))]>;
+ [(set f128:$RST,
+ (any_fma f128:$RA, f128:$RB,
+ (fneg f128:$RSTi)))]>;
def XSNMADDQP : X_VT5_VA5_VB5_FMA <63, 452, "xsnmaddqp",
- [(set f128:$vT,
- (fneg (any_fma f128:$vA, f128:$vB,
- f128:$vTi)))]>;
+ [(set f128:$RST,
+ (fneg (any_fma f128:$RA, f128:$RB,
+ f128:$RSTi)))]>;
def XSNMSUBQP : X_VT5_VA5_VB5_FMA <63, 484, "xsnmsubqp",
- [(set f128:$vT,
- (fneg (any_fma f128:$vA, f128:$vB,
- (fneg f128:$vTi))))]>;
+ [(set f128:$RST,
+ (fneg (any_fma f128:$RA, f128:$RB,
+ (fneg f128:$RSTi))))]>;
let isCommutable = 1 in {
def XSADDQPO : X_VT5_VA5_VB5_Ro<63, 4, "xsaddqpo",
- [(set f128:$vT,
+ [(set f128:$RST,
(int_ppc_addf128_round_to_odd
- f128:$vA, f128:$vB))]>;
+ f128:$RA, f128:$RB))]>;
def XSMULQPO : X_VT5_VA5_VB5_Ro<63, 36, "xsmulqpo",
- [(set f128:$vT,
+ [(set f128:$RST,
(int_ppc_mulf128_round_to_odd
- f128:$vA, f128:$vB))]>;
+ f128:$RA, f128:$RB))]>;
}
def XSSUBQPO : X_VT5_VA5_VB5_Ro<63, 516, "xssubqpo",
- [(set f128:$vT,
+ [(set f128:$RST,
(int_ppc_subf128_round_to_odd
- f128:$vA, f128:$vB))]>;
+ f128:$RA, f128:$RB))]>;
def XSDIVQPO : X_VT5_VA5_VB5_Ro<63, 548, "xsdivqpo",
- [(set f128:$vT,
+ [(set f128:$RST,
(int_ppc_divf128_round_to_odd
- f128:$vA, f128:$vB))]>;
+ f128:$RA, f128:$RB))]>;
def XSSQRTQPO : X_VT5_XO5_VB5_Ro<63, 27, 804, "xssqrtqpo",
- [(set f128:$vT,
- (int_ppc_sqrtf128_round_to_odd f128:$vB))]>;
+ [(set f128:$RST,
+ (int_ppc_sqrtf128_round_to_odd f128:$RB))]>;
def XSMADDQPO : X_VT5_VA5_VB5_FMA_Ro<63, 388, "xsmaddqpo",
- [(set f128:$vT,
+ [(set f128:$RST,
(int_ppc_fmaf128_round_to_odd
- f128:$vA,f128:$vB,f128:$vTi))]>;
+ f128:$RA,f128:$RB,f128:$RSTi))]>;
def XSMSUBQPO : X_VT5_VA5_VB5_FMA_Ro<63, 420, "xsmsubqpo" ,
- [(set f128:$vT,
+ [(set f128:$RST,
(int_ppc_fmaf128_round_to_odd
- f128:$vA, f128:$vB, (fneg f128:$vTi)))]>;
+ f128:$RA, f128:$RB, (fneg f128:$RSTi)))]>;
def XSNMADDQPO: X_VT5_VA5_VB5_FMA_Ro<63, 452, "xsnmaddqpo",
- [(set f128:$vT,
+ [(set f128:$RST,
(fneg (int_ppc_fmaf128_round_to_odd
- f128:$vA, f128:$vB, f128:$vTi)))]>;
+ f128:$RA, f128:$RB, f128:$RSTi)))]>;
def XSNMSUBQPO: X_VT5_VA5_VB5_FMA_Ro<63, 484, "xsnmsubqpo",
- [(set f128:$vT,
+ [(set f128:$RST,
(fneg (int_ppc_fmaf128_round_to_odd
- f128:$vA, f128:$vB, (fneg f128:$vTi))))]>;
+ f128:$RA, f128:$RB, (fneg f128:$RSTi))))]>;
} // mayRaiseFPException
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
@@ -1451,8 +1449,8 @@ let Predicates = [HasVSX, HasP9Vector] in {
let hasSideEffects = 1 in {
// DP/QP Compare Exponents
def XSCMPEXPDP : XX3Form_1<60, 59,
- (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
- "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
+ (outs crrc:$CR), (ins vsfrc:$XA, vsfrc:$XB),
+ "xscmpexpdp $CR, $XA, $XB", IIC_FPCompare, []>;
def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
let mayRaiseFPException = 1 in {
@@ -1477,22 +1475,26 @@ let Predicates = [HasVSX, HasP9Vector] in {
let mayRaiseFPException = 1 in {
// Convert DP -> QP
def XSCVDPQP : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc,
- [(set f128:$vT, (any_fpextend f64:$vB))]>;
+ [(set f128:$RST, (any_fpextend f64:$RB))]>;
// Round & Convert QP -> DP (dword[1] is set to zero)
def XSCVQPDP : X_VT5_XO5_VB5_VSFR<63, 20, 836, "xscvqpdp" , []>;
def XSCVQPDPO : X_VT5_XO5_VB5_VSFR_Ro<63, 20, 836, "xscvqpdpo",
- [(set f64:$vT,
+ [(set f64:$RST,
(int_ppc_truncf128_round_to_odd
- f128:$vB))]>;
+ f128:$RB))]>;
}
// Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero)
let mayRaiseFPException = 1 in {
- def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz", []>;
- def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz", []>;
- def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz", []>;
- def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz", []>;
+ def XSCVQPSDZ : X_VT5_XO5_VB5<63, 25, 836, "xscvqpsdz",
+ [(set f128:$RST, (PPCany_fctidz f128:$RB))]>;
+ def XSCVQPSWZ : X_VT5_XO5_VB5<63, 9, 836, "xscvqpswz",
+ [(set f128:$RST, (PPCany_fctiwz f128:$RB))]>;
+ def XSCVQPUDZ : X_VT5_XO5_VB5<63, 17, 836, "xscvqpudz",
+ [(set f128:$RST, (PPCany_fctiduz f128:$RB))]>;
+ def XSCVQPUWZ : X_VT5_XO5_VB5<63, 1, 836, "xscvqpuwz",
+ [(set f128:$RST, (PPCany_fctiwuz f128:$RB))]>;
}
// Convert (Un)Signed DWord -> QP.
@@ -1533,14 +1535,14 @@ let Predicates = [HasVSX, HasP9Vector] in {
// Insert Exponent DP/QP
// XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
- def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
- "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
+ def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$RA, g8rc:$RB),
+ "xsiexpdp $XT, $RA, $RB", IIC_VecFP, []>;
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
let hasSideEffects = 1 in {
// vB NOTE: only vB.dword[0] is used, that's why we don't use
// X_VT5_VA5_VB5 form
- def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
- "xsiexpqp $vT, $vA, $vB", IIC_VecFP, []>;
+ def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$FRT), (ins vrrc:$FRA, vsfrc:$FRB),
+ "xsiexpqp $FRT, $FRA, $FRB", IIC_VecFP, []>;
}
// Extract Exponent/Significand DP/QP
@@ -1557,18 +1559,18 @@ let Predicates = [HasVSX, HasP9Vector] in {
// XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
def XXINSERTW :
XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
- (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM),
- "xxinsertw $XT, $XB, $UIM", IIC_VecFP,
+ (ins vsrc:$XTi, vsrc:$XB, u4imm:$UIM5),
+ "xxinsertw $XT, $XB, $UIM5", IIC_VecFP,
[(set v4i32:$XT, (PPCvecinsert v4i32:$XTi, v4i32:$XB,
- imm32SExt16:$UIM))]>,
+ imm32SExt16:$UIM5))]>,
RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">;
// Vector Extract Unsigned Word
// FIXME: Setting the hasSideEffects flag here to match current behaviour.
let hasSideEffects = 1 in
def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
- (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
- "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
+ (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIM5),
+ "xxextractuw $XT, $XB, $UIM5", IIC_VecFP, []>;
// Vector Insert Exponent DP/SP
def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc,
@@ -1600,8 +1602,8 @@ let Predicates = [HasVSX, HasP9Vector] in {
(outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
"xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
def XSTSTDCQP : X_BF3_DCMX7_RS5 <63, 708,
- (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
- "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
+ (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$VB),
+ "xststdcqp $BF, $VB, $DCMX", IIC_VecFP, []>;
}
// Vector Test Data Class SP/DP
@@ -1666,20 +1668,20 @@ let Predicates = [HasVSX, HasP9Vector] in {
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
let mayLoad = 1, mayStore = 0 in {
// Load Vector
- def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
- "lxv $XT, $src", IIC_LdStLFD, []>;
+ def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins (memrix16 $DQ, $RA):$addr),
+ "lxv $XT, $addr", IIC_LdStLFD, []>;
// Load DWord
- def LXSD : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src),
- "lxsd $vD, $src", IIC_LdStLFD, []>;
+ def LXSD : DSForm_1<57, 2, (outs vfrc:$RST), (ins (memrix $D, $RA):$addr),
+ "lxsd $RST, $addr", IIC_LdStLFD, []>;
// Load SP from src, convert it to DP, and place in dword[0]
- def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src),
- "lxssp $vD, $src", IIC_LdStLFD, []>;
+ def LXSSP : DSForm_1<57, 3, (outs vfrc:$RST), (ins (memrix $D, $RA):$addr),
+ "lxssp $RST, $addr", IIC_LdStLFD, []>;
// Load as Integer Byte/Halfword & Zero Indexed
def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
- [(set f64:$XT, (PPClxsizx ForceXForm:$src, 1))]>;
+ [(set f64:$XT, (PPClxsizx ForceXForm:$addr, 1))]>;
def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc,
- [(set f64:$XT, (PPClxsizx ForceXForm:$src, 2))]>;
+ [(set f64:$XT, (PPClxsizx ForceXForm:$addr, 2))]>;
// Load Vector Halfword*8/Byte*16 Indexed
def LXVH8X : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>;
@@ -1687,14 +1689,14 @@ let Predicates = [HasVSX, HasP9Vector] in {
// Load Vector Indexed
def LXVX : X_XT6_RA5_RB5<31, 268, "lxvx" , vsrc,
- [(set v2f64:$XT, (load XForm:$src))]>;
+ [(set v2f64:$XT, (load XForm:$addr))]>;
// Load Vector (Left-justified) with Length
- def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
- "lxvl $XT, $src, $rB", IIC_LdStLoad,
- [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>;
- def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
- "lxvll $XT, $src, $rB", IIC_LdStLoad,
- [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>;
+ def LXVL : XX1Form_memOp<31, 269, (outs vsrc:$XT), (ins (memr $RA):$addr, g8rc:$RB),
+ "lxvl $XT, $addr, $RB", IIC_LdStLoad,
+ [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$addr, i64:$RB))]>;
+ def LXVLL : XX1Form_memOp<31,301, (outs vsrc:$XT), (ins (memr $RA):$addr, g8rc:$RB),
+ "lxvll $XT, $addr, $RB", IIC_LdStLoad,
+ [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$addr, i64:$RB))]>;
// Load Vector Word & Splat Indexed
def LXVWSX : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
@@ -1704,20 +1706,20 @@ let Predicates = [HasVSX, HasP9Vector] in {
// PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
let mayStore = 1, mayLoad = 0 in {
// Store Vector
- def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
- "stxv $XT, $dst", IIC_LdStSTFD, []>;
+ def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, (memrix16 $DQ, $RA):$addr),
+ "stxv $XT, $addr", IIC_LdStSTFD, []>;
// Store DWord
- def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst),
- "stxsd $vS, $dst", IIC_LdStSTFD, []>;
+ def STXSD : DSForm_1<61, 2, (outs), (ins vfrc:$RST, (memrix $D, $RA):$addr),
+ "stxsd $RST, $addr", IIC_LdStSTFD, []>;
// Convert DP of dword[0] to SP, and Store to dst
- def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst),
- "stxssp $vS, $dst", IIC_LdStSTFD, []>;
+ def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$RST, (memrix $D, $RA):$addr),
+ "stxssp $RST, $addr", IIC_LdStSTFD, []>;
// Store as Integer Byte/Halfword Indexed
def STXSIBX : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsfrc,
- [(PPCstxsix f64:$XT, ForceXForm:$dst, 1)]>;
+ [(PPCstxsix f64:$XT, ForceXForm:$addr, 1)]>;
def STXSIHX : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsfrc,
- [(PPCstxsix f64:$XT, ForceXForm:$dst, 2)]>;
+ [(PPCstxsix f64:$XT, ForceXForm:$addr, 2)]>;
let isCodeGenOnly = 1 in {
def STXSIBXv : X_XS6_RA5_RB5<31, 909, "stxsibx" , vsrc, []>;
def STXSIHXv : X_XS6_RA5_RB5<31, 941, "stxsihx" , vsrc, []>;
@@ -1729,19 +1731,19 @@ let Predicates = [HasVSX, HasP9Vector] in {
// Store Vector Indexed
def STXVX : X_XS6_RA5_RB5<31, 396, "stxvx" , vsrc,
- [(store v2f64:$XT, XForm:$dst)]>;
+ [(store v2f64:$XT, XForm:$addr)]>;
// Store Vector (Left-justified) with Length
def STXVL : XX1Form_memOp<31, 397, (outs),
- (ins vsrc:$XT, memr:$dst, g8rc:$rB),
- "stxvl $XT, $dst, $rB", IIC_LdStLoad,
- [(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst,
- i64:$rB)]>;
+ (ins vsrc:$XT, (memr $RA):$addr, g8rc:$RB),
+ "stxvl $XT, $addr, $RB", IIC_LdStLoad,
+ [(int_ppc_vsx_stxvl v4i32:$XT, addr:$addr,
+ i64:$RB)]>;
def STXVLL : XX1Form_memOp<31, 429, (outs),
- (ins vsrc:$XT, memr:$dst, g8rc:$rB),
- "stxvll $XT, $dst, $rB", IIC_LdStLoad,
- [(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst,
- i64:$rB)]>;
+ (ins vsrc:$XT, (memr $RA):$addr, g8rc:$RB),
+ "stxvll $XT, $addr, $RB", IIC_LdStLoad,
+ [(int_ppc_vsx_stxvll v4i32:$XT, addr:$addr,
+ i64:$RB)]>;
} // mayStore
def DFLOADf32 : PPCPostRAExpPseudo<(outs vssrc:$XT), (ins memrix:$src),
@@ -2451,6 +2453,7 @@ def DblwdCmp {
// [HasVSX, NoP9Vector, IsBigEndian]
// [HasVSX, HasOnlySwappingMemOps]
// [HasVSX, HasOnlySwappingMemOps, IsBigEndian]
+// [HasVSX, NoP8Vector]
// [HasVSX, HasP8Vector]
// [HasVSX, HasP8Vector, IsBigEndian]
// [HasVSX, HasP8Vector, IsBigEndian, IsPPC64]
@@ -2504,17 +2507,10 @@ def : Pat<(v4i32 (or (and (vnot v4i32:$C), v4i32:$A),
def : Pat<(f64 (fpimm0neg)),
(f64 (XSNEGDP (XXLXORdpz)))>;
-def : Pat<(f32 (fpimm0neg)),
- (f32 (COPY_TO_REGCLASS (XSNEGDP (XXLXORdpz)), VSSRC))>;
-
def : Pat<(f64 (nzFPImmExactInti5:$A)),
(COPY_TO_REGCLASS (XVCVSXWDP (COPY_TO_REGCLASS
(VSPLTISW (getFPAs5BitExactInt fpimm:$A)), VSRC)), VSFRC)>;
-def : Pat<(f32 (nzFPImmExactInti5:$A)),
- (COPY_TO_REGCLASS (XVCVSXWDP (COPY_TO_REGCLASS
- (VSPLTISW (getFPAs5BitExactInt fpimm:$A)), VSRC)), VSSRC)>;
-
// Additional fnmsub pattern for PPC specific ISD opcode
def : Pat<(PPCfnmsub f64:$A, f64:$B, f64:$C),
(XSNMSUBADP $C, $A, $B)>;
@@ -2918,6 +2914,10 @@ def:Pat<(vmrglw_swapped_shuffle v16i8:$vA, v16i8:$vB),
def:Pat<(vmrghw_swapped_shuffle v16i8:$vA, v16i8:$vB),
(COPY_TO_REGCLASS (XXMRGHW (COPY_TO_REGCLASS $vB, VSRC),
(COPY_TO_REGCLASS $vA, VSRC)), VRRC)>;
+def : Pat<(PPCstore_scal_int_from_vsr f64:$src, XForm:$dst, 8),
+ (STXSDX $src, XForm:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr f128:$src, XForm:$dst, 8),
+ (STXSDX (COPY_TO_REGCLASS $src, VSFRC), XForm:$dst)>;
} // HasVSX
// Any big endian VSX subtarget.
@@ -2988,7 +2988,7 @@ def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 0))),
(v2f64 (XVCVSPDP $A))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 1))),
(f64 (fpextend (extractelt v4f32:$A, 3))))),
- (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 3)))>;
+ (v2f64 (XVCVSPDP (XXSLDWI $A, $A, 1)))>;
def : Pat<(v2f64 (build_vector (f64 (fpextend (extractelt v4f32:$A, 2))),
(f64 (fpextend (extractelt v4f32:$A, 3))))),
(v2f64 (XVCVSPDP (XXMRGLW $A, $A)))>;
@@ -3160,12 +3160,10 @@ def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 1)),
// Any pre-Power9 VSX subtarget.
let Predicates = [HasVSX, NoP9Vector] in {
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ForceXForm:$dst, 8),
- (STXSDX (XSCVDPSXDS f64:$src), ForceXForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ForceXForm:$dst, 8),
- (STXSDX (XSCVDPUXDS f64:$src), ForceXForm:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr f64:$src, ForceXForm:$dst, 8),
+ (STXSDX $src, ForceXForm:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr f128:$src, ForceXForm:$dst, 8),
+ (STXSDX (COPY_TO_REGCLASS $src, VSFRC), ForceXForm:$dst)>;
// Load-and-splat with fp-to-int conversion (using X-Form VSX/FP loads).
defm : ScalToVecWPermute<
@@ -3245,6 +3243,17 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps, IsBigEndian] in {
(SUBREG_TO_REG (i64 1), (XFLOADf64 ForceXForm:$src), sub_64)>;
} // HasVSX, HasOnlySwappingMemOps, IsBigEndian
+// Target before Power8 with VSX.
+let Predicates = [HasVSX, NoP8Vector] in {
+def : Pat<(f32 (fpimm0neg)),
+ (f32 (COPY_TO_REGCLASS (XSNEGDP (XXLXORdpz)), F4RC))>;
+
+def : Pat<(f32 (nzFPImmExactInti5:$A)),
+ (COPY_TO_REGCLASS (XVCVSXWDP (COPY_TO_REGCLASS
+ (VSPLTISW (getFPAs5BitExactInt fpimm:$A)), VSRC)), F4RC)>;
+
+} // HasVSX, NoP8Vector
+
// Any Power8 VSX subtarget.
let Predicates = [HasVSX, HasP8Vector] in {
def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
@@ -3256,6 +3265,13 @@ def : Pat<(f32 (fpround (f64 (extloadf32 ForceXForm:$src)))),
def : Pat<(f64 (any_fpextend f32:$src)),
(COPY_TO_REGCLASS $src, VSFRC)>;
+def : Pat<(f32 (fpimm0neg)),
+ (f32 (COPY_TO_REGCLASS (XSNEGDP (XXLXORdpz)), VSSRC))>;
+
+def : Pat<(f32 (nzFPImmExactInti5:$A)),
+ (COPY_TO_REGCLASS (XVCVSXWDP (COPY_TO_REGCLASS
+ (VSPLTISW (getFPAs5BitExactInt fpimm:$A)), VSRC)), VSSRC)>;
+
def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
(SELECT_VSSRC (CRANDC $lhs, $rhs), $tval, $fval)>;
def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETULT)),
@@ -3294,12 +3310,15 @@ def : Pat<(f32 (fneg f32:$S)),
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
// Instructions for converting float to i32 feeding a store.
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ForceXForm:$dst, 4),
- (STIWX (XSCVDPSXWS f64:$src), ForceXForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ForceXForm:$dst, 4),
- (STIWX (XSCVDPUXWS f64:$src), ForceXForm:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr f64:$src, ForceXForm:$dst, 4),
+ (STIWX $src, ForceXForm:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr f128:$src, ForceXForm:$dst, 4),
+ (STIWX (COPY_TO_REGCLASS $src, VSFRC), ForceXForm:$dst)>;
+
+def : Pat<(PPCstore_scal_int_from_vsr f64:$src, ForceXForm:$dst, 4),
+ (STXSIWX $src, ForceXForm:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr f128:$src, ForceXForm:$dst, 4),
+ (STXSIWX (COPY_TO_REGCLASS $src, VSFRC), ForceXForm:$dst)>;
def : Pat<(v2i64 (smax v2i64:$src1, v2i64:$src2)),
(v2i64 (VMAXSD (COPY_TO_REGCLASS $src1, VRRC),
@@ -4033,67 +4052,19 @@ def : Pat<(i32 (any_fp_to_uint f128:$src)),
(i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>;
// Instructions for store(fptosi).
-// The 8-byte version is repeated here due to availability of D-Form STXSD.
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), XForm:$dst, 8),
- (STXSDX (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
- XForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), DSForm:$dst, 8),
- (STXSD (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC),
- DSForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ForceXForm:$dst, 4),
- (STXSIWX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), ForceXForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ForceXForm:$dst, 2),
- (STXSIHX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), ForceXForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)), ForceXForm:$dst, 1),
- (STXSIBX (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC), ForceXForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), XForm:$dst, 8),
- (STXSDX (XSCVDPSXDS f64:$src), XForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), DSForm:$dst, 8),
- (STXSD (XSCVDPSXDS f64:$src), DSForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ForceXForm:$dst, 2),
- (STXSIHX (XSCVDPSXWS f64:$src), ForceXForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)), ForceXForm:$dst, 1),
- (STXSIBX (XSCVDPSXWS f64:$src), ForceXForm:$dst)>;
-
-// Instructions for store(fptoui).
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), XForm:$dst, 8),
- (STXSDX (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
- XForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), DSForm:$dst, 8),
- (STXSD (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC),
- DSForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ForceXForm:$dst, 4),
- (STXSIWX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), ForceXForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ForceXForm:$dst, 2),
- (STXSIHX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), ForceXForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)), ForceXForm:$dst, 1),
- (STXSIBX (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC), ForceXForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), XForm:$dst, 8),
- (STXSDX (XSCVDPUXDS f64:$src), XForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), DSForm:$dst, 8),
- (STXSD (XSCVDPUXDS f64:$src), DSForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ForceXForm:$dst, 2),
- (STXSIHX (XSCVDPUXWS f64:$src), ForceXForm:$dst)>;
-def : Pat<(PPCstore_scal_int_from_vsr
- (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)), ForceXForm:$dst, 1),
- (STXSIBX (XSCVDPUXWS f64:$src), ForceXForm:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr f64:$src, DSForm:$dst, 8),
+ (STXSD $src, DSForm:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr f64:$src, ForceXForm:$dst, 2),
+ (STXSIHX $src, ForceXForm:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr f64:$src, ForceXForm:$dst, 1),
+ (STXSIBX $src, ForceXForm:$dst)>;
+
+def : Pat<(PPCstore_scal_int_from_vsr f128:$src, DSForm:$dst, 8),
+ (STXSD (COPY_TO_REGCLASS $src, VFRC), DSForm:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr f128:$src, ForceXForm:$dst, 2),
+ (STXSIHX (COPY_TO_REGCLASS $src, VSFRC), ForceXForm:$dst)>;
+def : Pat<(PPCstore_scal_int_from_vsr f128:$src, ForceXForm:$dst, 1),
+ (STXSIBX (COPY_TO_REGCLASS $src, VSFRC), ForceXForm:$dst)>;
// Round & Convert QP -> DP/SP
def : Pat<(f64 (any_fpround f128:$src)), (f64 (XSCVQPDP $src))>;
@@ -4808,20 +4779,23 @@ def : Pat<(f128 (uint_to_fp (i32 (PPCmfvsr f64:$src)))),
// Any Power9 VSX subtarget that supports Power9 Altivec.
let Predicates = [HasVSX, HasP9Altivec] in {
-// Put this P9Altivec related definition here since it's possible to be
-// selected to VSX instruction xvnegsp, avoid possible undef.
-def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 0))),
+// Unsigned absolute-difference.
+def : Pat<(v4i32 (abdu v4i32:$A, v4i32:$B)),
(v4i32 (VABSDUW $A, $B))>;
-def : Pat<(v8i16 (PPCvabsd v8i16:$A, v8i16:$B, (i32 0))),
+def : Pat<(v8i16 (abdu v8i16:$A, v8i16:$B)),
(v8i16 (VABSDUH $A, $B))>;
-def : Pat<(v16i8 (PPCvabsd v16i8:$A, v16i8:$B, (i32 0))),
+def : Pat<(v16i8 (abdu v16i8:$A, v16i8:$B)),
(v16i8 (VABSDUB $A, $B))>;
-// As PPCVABSD description, the last operand indicates whether do the
-// sign bit flip.
-def : Pat<(v4i32 (PPCvabsd v4i32:$A, v4i32:$B, (i32 1))),
+// Signed absolute-difference.
+// Power9 VABSD* instructions are designed to support unsigned integer
+// vectors (byte/halfword/word), if we want to make use of them for signed
+// integer vectors, we have to flip their sign bits first. To flip sign bit
+// for byte/halfword integer vector would become inefficient, but for word
+// integer vector, we can leverage XVNEGSP to make it efficiently.
+def : Pat<(v4i32 (abds v4i32:$A, v4i32:$B)),
(v4i32 (VABSDUW (XVNEGSP $A), (XVNEGSP $B)))>;
} // HasVSX, HasP9Altivec
diff --git a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
index 999b0b06baa6..f29a7af1bdf1 100644
--- a/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
+++ b/llvm/lib/Target/PowerPC/PPCLoopInstrFormPrep.cpp
@@ -910,7 +910,7 @@ bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
unsigned Remainder = cast<SCEVConstant>(BucketChain.Elements[j].Offset)
->getAPInt()
.urem(Form);
- if (RemainderOffsetInfo.find(Remainder) == RemainderOffsetInfo.end())
+ if (!RemainderOffsetInfo.contains(Remainder))
RemainderOffsetInfo[Remainder] = std::make_pair(j, 1);
else
RemainderOffsetInfo[Remainder].second++;
@@ -933,7 +933,7 @@ bool PPCLoopInstrFormPrep::prepareBaseForDispFormChain(Bucket &BucketChain,
// 1 X form.
unsigned MaxCountRemainder = 0;
for (unsigned j = 0; j < (unsigned)Form; j++)
- if ((RemainderOffsetInfo.find(j) != RemainderOffsetInfo.end()) &&
+ if ((RemainderOffsetInfo.contains(j)) &&
RemainderOffsetInfo[j].second >
RemainderOffsetInfo[MaxCountRemainder].second)
MaxCountRemainder = j;
@@ -1179,6 +1179,8 @@ Value *PPCLoopInstrFormPrep::getNodeForInc(Loop *L, Instruction *MemI,
// Get the incoming value from the loop latch and check if the value has
// the add form with the required increment.
+ if (CurrentPHINode->getBasicBlockIndex(LatchBB) < 0)
+ continue;
if (Instruction *I = dyn_cast<Instruction>(
CurrentPHINode->getIncomingValueForBlock(LatchBB))) {
Value *StrippedBaseI = I;
diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 976effb96adc..1f7dba66db35 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -42,6 +42,10 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
} else {
const GlobalValue *GV = MO.getGlobal();
+ if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasAttribute("toc-data"))
+ return TM.getSymbol(GV);
+
TM.getNameWithPrefix(Name, GV, Mang);
}
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 7d3a8b4ca252..410f4cba97c6 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -193,7 +193,7 @@ static unsigned getKnownLeadingZeroCount(const unsigned Reg,
if (Opcode == PPC::ANDI_rec) {
uint16_t Imm = MI->getOperand(2).getImm();
- return 48 + countLeadingZeros(Imm);
+ return 48 + llvm::countl_zero(Imm);
}
if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
@@ -219,6 +219,20 @@ static unsigned getKnownLeadingZeroCount(const unsigned Reg,
Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
return 56;
+ if (Opcode == PPC::AND || Opcode == PPC::AND8 || Opcode == PPC::AND_rec ||
+ Opcode == PPC::AND8_rec)
+ return std::max(
+ getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
+ getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
+
+ if (Opcode == PPC::OR || Opcode == PPC::OR8 || Opcode == PPC::XOR ||
+ Opcode == PPC::XOR8 || Opcode == PPC::OR_rec ||
+ Opcode == PPC::OR8_rec || Opcode == PPC::XOR_rec ||
+ Opcode == PPC::XOR8_rec)
+ return std::min(
+ getKnownLeadingZeroCount(MI->getOperand(1).getReg(), TII, MRI),
+ getKnownLeadingZeroCount(MI->getOperand(2).getReg(), TII, MRI));
+
if (TII->isZeroExtended(Reg, MRI))
return 32;
@@ -641,6 +655,34 @@ bool PPCMIPeephole::simplifyCode() {
DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
LLVM_DEBUG(MI.dump());
+ } else if (Immed == 2 &&
+ (DefOpc == PPC::VSPLTB || DefOpc == PPC::VSPLTH ||
+ DefOpc == PPC::VSPLTW || DefOpc == PPC::XXSPLTW ||
+ DefOpc == PPC::VSPLTISB || DefOpc == PPC::VSPLTISH ||
+ DefOpc == PPC::VSPLTISW)) {
+ // Swap of various vector splats, convert to copy.
+ ToErase = &MI;
+ Simplified = true;
+ LLVM_DEBUG(dbgs() << "Optimizing swap(vsplt(is)?[b|h|w]|xxspltw) => "
+ "copy(vsplt(is)?[b|h|w]|xxspltw): ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
+ } else if ((Immed == 0 || Immed == 3 || Immed == 2) &&
+ TII->isLoadFromConstantPool(DefMI)) {
+ const Constant *C = TII->getConstantFromConstantPool(DefMI);
+ if (C && C->getType()->isVectorTy() && C->getSplatValue()) {
+ ToErase = &MI;
+ Simplified = true;
+ LLVM_DEBUG(dbgs()
+ << "Optimizing swap(splat pattern from constant-pool) "
+ "=> copy(splat pattern from constant-pool): ");
+ LLVM_DEBUG(MI.dump());
+ BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+ MI.getOperand(0).getReg())
+ .add(MI.getOperand(1));
+ }
}
break;
}
@@ -839,7 +881,8 @@ bool PPCMIPeephole::simplifyCode() {
if (SrcMI->getOperand(1).isGlobal()) {
const GlobalObject *GO =
dyn_cast<GlobalObject>(SrcMI->getOperand(1).getGlobal());
- if (GO && GO->getAlign() && *GO->getAlign() >= 4)
+ if (GO && GO->getAlign() && *GO->getAlign() >= 4 &&
+ (SrcMI->getOperand(1).getOffset() % 4 == 0))
IsWordAligned = true;
} else if (SrcMI->getOperand(1).isImm()) {
int64_t Value = SrcMI->getOperand(1).getImm();
@@ -1288,7 +1331,7 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
// We assume Pred1MBB is the BB containing the compare to be merged and
// Pred2MBB is the BB to which we will append a compare instruction.
- // Hence we can proceed as is.
+ // Proceed as is if Pred1MBB is different from MBB.
}
else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
// We need to swap Pred1MBB and Pred2MBB to canonicalize.
@@ -1296,6 +1339,9 @@ static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
}
else return false;
+ if (Pred1MBB == &MBB)
+ return false;
+
// Here, Pred2MBB is the BB to which we need to append a compare inst.
// We cannot move the compare instruction if operands are not available
// in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 781f6255dc72..11dbbce42f61 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -179,9 +179,12 @@ public:
case 'a':
if (RegName[1] == 'c' && RegName[2] == 'c')
return RegName + 3;
- break;
- case 'r':
+ break;
case 'f':
+ if (RegName[1] == 'p')
+ return RegName + 2;
+ [[fallthrough]];
+ case 'r':
case 'v':
if (RegName[1] == 's') {
if (RegName[2] == 'p')
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 700baa5733b4..6151faf403aa 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -15,11 +15,14 @@ def sub_gt : SubRegIndex<1, 1>;
def sub_eq : SubRegIndex<1, 2>;
def sub_un : SubRegIndex<1, 3>;
def sub_32 : SubRegIndex<32>;
+def sub_32_hi_phony : SubRegIndex<32,32>;
def sub_64 : SubRegIndex<64>;
def sub_vsx0 : SubRegIndex<128>;
def sub_vsx1 : SubRegIndex<128, 128>;
def sub_gp8_x0 : SubRegIndex<64>;
def sub_gp8_x1 : SubRegIndex<64, 64>;
+def sub_fp0 : SubRegIndex<64>;
+def sub_fp1 : SubRegIndex<64, 64>;
}
@@ -41,13 +44,12 @@ class GP8<GPR SubReg, string n> : PPCReg<n> {
let SubRegIndices = [sub_32];
}
-// SPE - One of the 32 64-bit general-purpose registers (SPE)
-class SPE<GPR SubReg, string n> : PPCReg<n> {
- let HWEncoding = SubReg.HWEncoding;
- let SubRegs = [SubReg];
- let SubRegIndices = [sub_32];
+class SPE<string n, bits<5> Enc, list<Register> subregs = []> : PPCReg<n> {
+ let HWEncoding{4-0} = Enc;
+ let SubRegs = subregs;
+ let SubRegIndices = [sub_32, sub_32_hi_phony];
+ let CoveredBySubRegs = 1;
}
-
// SPR - One of the 32-bit special-purpose registers
class SPR<bits<10> num, string n> : PPCReg<n> {
let HWEncoding{9-0} = num;
@@ -58,6 +60,15 @@ class FPR<bits<5> num, string n> : PPCReg<n> {
let HWEncoding{4-0} = num;
}
+// FPPair - A pair of 64-bit floating-point registers.
+class FPPair<string n, bits<5> EvenIndex> : PPCReg<n> {
+ assert !eq(EvenIndex{0}, 0), "Index should be even.";
+ let HWEncoding{4-0} = EvenIndex;
+ let SubRegs = [!cast<FPR>("F"#EvenIndex), !cast<FPR>("F"#!add(EvenIndex, 1))];
+ let DwarfNumbers = [-1, -1];
+ let SubRegIndices = [sub_fp0, sub_fp1];
+}
+
// VF - One of the 32 64-bit floating-point subregisters of the vector
// registers (used by VSX).
class VF<bits<5> num, string n> : PPCReg<n> {
@@ -118,6 +129,12 @@ foreach Index = 0-31 in {
def R#Index : GPR<Index, "r"#Index>, DwarfRegNum<[-2, Index]>;
}
+let isArtificial = 1 in {
+ foreach Index = 0-31 in {
+ def H#Index : GPR<-1,"">;
+ }
+}
+
// 64-bit General-purpose registers
foreach Index = 0-31 in {
def X#Index : GP8<!cast<GPR>("R"#Index), "r"#Index>,
@@ -126,16 +143,23 @@ foreach Index = 0-31 in {
// SPE registers
foreach Index = 0-31 in {
- def S#Index : SPE<!cast<GPR>("R"#Index), "r"#Index>,
+ def S#Index : SPE<"r"#Index, Index, [!cast<GPR>("R"#Index), !cast<GPR>("H"#Index)]>,
DwarfRegNum<[!add(Index, 1200), !add(Index, 1200)]>;
+
}
+
// Floating-point registers
foreach Index = 0-31 in {
def F#Index : FPR<Index, "f"#Index>,
DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
}
+// Floating-point pair registers
+foreach Index = { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 } in {
+ def Fpair#Index : FPPair<"fp"#Index, Index>;
+}
+
// 64-bit Floating-point subregisters of Altivec registers
// Note: the register names are v0-v31 or vs32-vs63 depending on the use.
// Custom C++ code is used to produce the correct name and encoding.
@@ -277,6 +301,11 @@ def CARRY: SPR<1, "xer">, DwarfRegNum<[76]> {
// that do nothing but change RM will not get deleted.
def RM: PPCReg<"**ROUNDING MODE**">;
+let isAllocatable = 0 in
+def GPRC32 : RegisterClass<"PPC", [i32,f32], 32, (add (sequence "H%u", 2, 12),
+ (sequence "H%u", 30, 13),
+ H31, H0, H1)>;
+
/// Register classes
// Allocate volatiles first
// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
@@ -349,6 +378,21 @@ def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
(sequence "F%u", 31, 14))>;
def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
+// Floating point pair registers.
+// Note that the type used for this register class is ppcf128. This is not
+// completely correct. However, since we are not pattern matching any
+// instructions for these registers and we are not register allocating or
+// scheduling any of these instructions it should be safe to do this.
+// The reason we didn't use the correct type (Decimal Floating Point) is that
+// at the time of this implementation the correct type was not available.
+def FpRC :
+ RegisterClass<"PPC", [ppcf128], 128,
+ (add Fpair0, Fpair2, Fpair4, Fpair6, Fpair8, Fpair10, Fpair12,
+ Fpair14, Fpair16, Fpair18, Fpair20, Fpair22, Fpair24,
+ Fpair26, Fpair28, Fpair30)> {
+ let Size = 128;
+}
+
def VRRC : RegisterClass<"PPC",
[v16i8,v8i16,v4i32,v2i64,v1i128,v4f32,v2f64, f128],
128,
@@ -504,6 +548,12 @@ def PPCRegF4RCAsmOperand : AsmOperandClass {
def f4rc : RegisterOperand<F4RC> {
let ParserMatchClass = PPCRegF4RCAsmOperand;
}
+def PPCRegFpRCAsmOperand : AsmOperandClass {
+ let Name = "RegFpRC"; let PredicateMethod = "isEvenRegNumber";
+}
+def fpairrc : RegisterOperand<FpRC> {
+ let ParserMatchClass = PPCRegFpRCAsmOperand;
+}
def PPCRegVRRCAsmOperand : AsmOperandClass {
let Name = "RegVRRC"; let PredicateMethod = "isRegNumber";
}
@@ -548,6 +598,7 @@ def PPCU1ImmAsmOperand : AsmOperandClass {
def u1imm : Operand<i32> {
let PrintMethod = "printU1ImmOperand";
let ParserMatchClass = PPCU1ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<1>";
let OperandType = "OPERAND_IMMEDIATE";
}
@@ -558,6 +609,7 @@ def PPCU2ImmAsmOperand : AsmOperandClass {
def u2imm : Operand<i32> {
let PrintMethod = "printU2ImmOperand";
let ParserMatchClass = PPCU2ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<2>";
let OperandType = "OPERAND_IMMEDIATE";
}
@@ -578,6 +630,7 @@ def PPCU3ImmAsmOperand : AsmOperandClass {
def u3imm : Operand<i32> {
let PrintMethod = "printU3ImmOperand";
let ParserMatchClass = PPCU3ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<3>";
let OperandType = "OPERAND_IMMEDIATE";
}
@@ -588,6 +641,7 @@ def PPCU4ImmAsmOperand : AsmOperandClass {
def u4imm : Operand<i32> {
let PrintMethod = "printU4ImmOperand";
let ParserMatchClass = PPCU4ImmAsmOperand;
+ let DecoderMethod = "decodeUImmOperand<4>";
let OperandType = "OPERAND_IMMEDIATE";
}
def PPCS5ImmAsmOperand : AsmOperandClass {
@@ -799,20 +853,23 @@ def PPCDispRI34Operand : AsmOperandClass {
}
def dispRI34 : Operand<iPTR> {
let ParserMatchClass = PPCDispRI34Operand;
+ let EncoderMethod = "getDispRI34Encoding";
+ let DecoderMethod = "decodeSImmOperand<34>";
+}
+def dispRI34_pcrel : Operand<iPTR> {
+ let ParserMatchClass = PPCDispRI34Operand;
+ let EncoderMethod = "getDispRI34PCRelEncoding";
+ let DecoderMethod = "decodeSImmOperand<34>";
}
def memri34 : Operand<iPTR> { // memri, imm is a 34-bit value.
let PrintMethod = "printMemRegImm34";
let MIOperandInfo = (ops dispRI34:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getMemRI34Encoding";
- let DecoderMethod = "decodeMemRI34Operands";
}
// memri, imm is a 34-bit value for pc-relative instructions where
// base register is set to zero.
def memri34_pcrel : Operand<iPTR> { // memri, imm is a 34-bit value.
let PrintMethod = "printMemRegImm34PCRel";
- let MIOperandInfo = (ops dispRI34:$imm, immZero:$reg);
- let EncoderMethod = "getMemRI34PCRelEncoding";
- let DecoderMethod = "decodeMemRI34PCRelOperands";
+ let MIOperandInfo = (ops dispRI34_pcrel:$imm, immZero:$reg);
}
// A version of ptr_rc usable with the asm parser.
@@ -829,6 +886,7 @@ def PPCDispRIOperand : AsmOperandClass {
}
def dispRI : Operand<iPTR> {
let ParserMatchClass = PPCDispRIOperand;
+ let EncoderMethod = "getDispRIEncoding";
}
def PPCDispRIXOperand : AsmOperandClass {
let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4";
@@ -836,6 +894,8 @@ def PPCDispRIXOperand : AsmOperandClass {
}
def dispRIX : Operand<iPTR> {
let ParserMatchClass = PPCDispRIXOperand;
+ let EncoderMethod = "getDispRIXEncoding";
+ let DecoderMethod = "decodeDispRIXOperand";
}
def PPCDispRIHashOperand : AsmOperandClass {
let Name = "DispRIHash"; let PredicateMethod = "isHashImmX8";
@@ -843,6 +903,8 @@ def PPCDispRIHashOperand : AsmOperandClass {
}
def dispRIHash : Operand<iPTR> {
let ParserMatchClass = PPCDispRIHashOperand;
+ let EncoderMethod = "getDispRIHashEncoding";
+ let DecoderMethod = "decodeDispRIHashOperand";
}
def PPCDispRIX16Operand : AsmOperandClass {
let Name = "DispRIX16"; let PredicateMethod = "isS16ImmX16";
@@ -850,6 +912,9 @@ def PPCDispRIX16Operand : AsmOperandClass {
}
def dispRIX16 : Operand<iPTR> {
let ParserMatchClass = PPCDispRIX16Operand;
+ let EncoderMethod = "getDispRIX16Encoding";
+ let DecoderMethod = "decodeDispRIX16Operand";
+
}
def PPCDispSPE8Operand : AsmOperandClass {
let Name = "DispSPE8"; let PredicateMethod = "isU8ImmX8";
@@ -857,6 +922,8 @@ def PPCDispSPE8Operand : AsmOperandClass {
}
def dispSPE8 : Operand<iPTR> {
let ParserMatchClass = PPCDispSPE8Operand;
+ let DecoderMethod = "decodeDispSPE8Operand";
+ let EncoderMethod = "getDispSPE8Encoding";
}
def PPCDispSPE4Operand : AsmOperandClass {
let Name = "DispSPE4"; let PredicateMethod = "isU7ImmX4";
@@ -864,6 +931,8 @@ def PPCDispSPE4Operand : AsmOperandClass {
}
def dispSPE4 : Operand<iPTR> {
let ParserMatchClass = PPCDispSPE4Operand;
+ let DecoderMethod = "decodeDispSPE4Operand";
+ let EncoderMethod = "getDispSPE4Encoding";
}
def PPCDispSPE2Operand : AsmOperandClass {
let Name = "DispSPE2"; let PredicateMethod = "isU6ImmX2";
@@ -871,13 +940,13 @@ def PPCDispSPE2Operand : AsmOperandClass {
}
def dispSPE2 : Operand<iPTR> {
let ParserMatchClass = PPCDispSPE2Operand;
+ let DecoderMethod = "decodeDispSPE2Operand";
+ let EncoderMethod = "getDispSPE2Encoding";
}
def memri : Operand<iPTR> {
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRI:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getMemRIEncoding";
- let DecoderMethod = "decodeMemRIOperands";
let OperandType = "OPERAND_MEMORY";
}
def memrr : Operand<iPTR> {
@@ -888,44 +957,32 @@ def memrr : Operand<iPTR> {
def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getMemRIXEncoding";
- let DecoderMethod = "decodeMemRIXOperands";
let OperandType = "OPERAND_MEMORY";
}
def memrihash : Operand<iPTR> {
// memrihash 8-aligned for ROP Protection Instructions.
let PrintMethod = "printMemRegImmHash";
let MIOperandInfo = (ops dispRIHash:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getMemRIHashEncoding";
- let DecoderMethod = "decodeMemRIHashOperands";
let OperandType = "OPERAND_MEMORY";
}
def memrix16 : Operand<iPTR> { // memri, imm is 16-aligned, 12-bit, Inst{16:27}
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRIX16:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getMemRIX16Encoding";
- let DecoderMethod = "decodeMemRIX16Operands";
let OperandType = "OPERAND_MEMORY";
}
def spe8dis : Operand<iPTR> { // SPE displacement where the imm is 8-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispSPE8:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getSPE8DisEncoding";
- let DecoderMethod = "decodeSPE8Operands";
let OperandType = "OPERAND_MEMORY";
}
def spe4dis : Operand<iPTR> { // SPE displacement where the imm is 4-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispSPE4:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getSPE4DisEncoding";
- let DecoderMethod = "decodeSPE4Operands";
let OperandType = "OPERAND_MEMORY";
}
def spe2dis : Operand<iPTR> { // SPE displacement where the imm is 2-aligned.
let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispSPE2:$imm, ptr_rc_nor0:$reg);
- let EncoderMethod = "getSPE2DisEncoding";
- let DecoderMethod = "decodeSPE2Operands";
let OperandType = "OPERAND_MEMORY";
}
diff --git a/llvm/lib/Target/PowerPC/PPCScheduleP10.td b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
index f89ef735a367..25be37718af2 100644
--- a/llvm/lib/Target/PowerPC/PPCScheduleP10.td
+++ b/llvm/lib/Target/PowerPC/PPCScheduleP10.td
@@ -25,14 +25,8 @@ def P10vMU_Read : SchedRead;
def P10Model : SchedMachineModel {
let IssueWidth = 8;
-
- // TODO - Need to be updated according to P10 UM.
let MicroOpBufferSize = 44;
-
- // TODO - tune this on real HW once it arrives. For now, we will use the same
- // value as we do on P9.
let LoopMicroOpBufferSize = 60;
-
let CompleteModel = 1;
// Do not support SPE (Signal Procesing Engine) on Power 10.
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index e32a2ed9dee7..49400eefe4a9 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -106,9 +106,7 @@ void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU,
if (IsPPC64 && has64BitSupport())
Use64BitRegs = true;
- if ((TargetTriple.isOSFreeBSD() && TargetTriple.getOSMajorVersion() >= 13) ||
- TargetTriple.isOSNetBSD() || TargetTriple.isOSOpenBSD() ||
- TargetTriple.isMusl())
+ if (TargetTriple.isPPC32SecurePlt())
IsSecurePlt = true;
if (HasSPE && IsPPC64)
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 05ab0d4d7198..306a52dca836 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -16,7 +16,6 @@
#include "PPCFrameLowering.h"
#include "PPCISelLowering.h"
#include "PPCInstrInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/RegisterBankInfo.h"
@@ -24,6 +23,7 @@
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/TargetParser/Triple.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -240,6 +240,8 @@ public:
bool enableSubRegLiveness() const override;
+ bool enableSpillageCopyElimination() const override { return true; }
+
/// True if the GV will be accessed via an indirect symbol.
bool isGVIndirectSymbol(const GlobalValue *GV) const;
@@ -274,6 +276,12 @@ public:
return IsPPC64 ? PPC::X2 : PPC::R2;
}
+ MCRegister getThreadPointerRegister() const {
+ assert((is64BitELFABI() || isAIXABI()) &&
+ "Should only be called for targets with a thread pointer register.");
+ return IsPPC64 ? PPC::X13 : PPC::R13;
+ }
+
MCRegister getStackPointerRegister() const {
return IsPPC64 ? PPC::X1 : PPC::R1;
}
diff --git a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 59e8f3ff84a4..9518d5347065 100644
--- a/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -56,13 +56,16 @@ protected:
I != IE;) {
MachineInstr &MI = *I;
IsPCREL = isPCREL(MI);
+ // There are a number of slight differences in code generation
+ // when we call .__get_tpointer (32-bit AIX TLS).
+ bool IsTLSTPRelMI = MI.getOpcode() == PPC::GETtlsTpointer32AIX;
if (MI.getOpcode() != PPC::ADDItlsgdLADDR &&
MI.getOpcode() != PPC::ADDItlsldLADDR &&
MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
MI.getOpcode() != PPC::ADDItlsldLADDR32 &&
MI.getOpcode() != PPC::TLSGDAIX &&
- MI.getOpcode() != PPC::TLSGDAIX8 && !IsPCREL) {
+ MI.getOpcode() != PPC::TLSGDAIX8 && !IsTLSTPRelMI && !IsPCREL) {
// Although we create ADJCALLSTACKDOWN and ADJCALLSTACKUP
// as scheduling fences, we skip creating fences if we already
// have existing ADJCALLSTACKDOWN/UP to avoid nesting,
@@ -82,7 +85,7 @@ protected:
Register InReg = PPC::NoRegister;
Register GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
Register GPR4 = Is64Bit ? PPC::X4 : PPC::R4;
- if (!IsPCREL)
+ if (!IsPCREL && !IsTLSTPRelMI)
InReg = MI.getOperand(1).getReg();
DebugLoc DL = MI.getDebugLoc();
@@ -116,6 +119,12 @@ protected:
// set Opc2 here.
Opc2 = PPC::GETtlsADDR32AIX;
break;
+ case PPC::GETtlsTpointer32AIX:
+ // GETtlsTpointer32AIX is expanded to a call to GET_TPOINTER on AIX
+ // 32-bit mode within PPCAsmPrinter. This instruction does not need
+ // to change, so Opc2 is set to the same instruction opcode.
+ Opc2 = PPC::GETtlsTpointer32AIX;
+ break;
case PPC::PADDI8pc:
assert(IsPCREL && "Expecting General/Local Dynamic PCRel");
Opc1 = PPC::PADDI8pc;
@@ -138,11 +147,17 @@ protected:
if (IsAIX) {
// The variable offset and region handle are copied in r4 and r3. The
// copies are followed by GETtlsADDR32AIX/GETtlsADDR64AIX.
- BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
- .addReg(MI.getOperand(1).getReg());
- BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
- .addReg(MI.getOperand(2).getReg());
- BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4);
+ if (!IsTLSTPRelMI) {
+ BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR4)
+ .addReg(MI.getOperand(1).getReg());
+ BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
+ .addReg(MI.getOperand(2).getReg());
+ BuildMI(MBB, I, DL, TII->get(Opc2), GPR3).addReg(GPR3).addReg(GPR4);
+ } else
+ // The opcode of GETtlsTpointer32AIX does not change, because later
+ // this instruction will be expanded into a call to .__get_tpointer,
+ // which will return the thread pointer into r3.
+ BuildMI(MBB, I, DL, TII->get(Opc2), GPR3);
} else {
MachineInstr *Addi;
if (IsPCREL) {
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 9aea5af8a60a..3858d44e5099 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -22,7 +22,6 @@
#include "TargetInfo/PowerPCTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
@@ -43,6 +42,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Scalar.h"
#include <cassert>
#include <memory>
@@ -161,6 +161,17 @@ static std::string getDataLayoutString(const Triple &T) {
if (!is64Bit || T.getOS() == Triple::Lv2)
Ret += "-p:32:32";
+ // If the target ABI uses function descriptors, then the alignment of function
+ // pointers depends on the alignment used to emit the descriptor. Otherwise,
+ // function pointers are aligned to 32 bits because the instructions must be.
+ if ((T.getArch() == Triple::ppc64 && !T.isPPC64ELFv2ABI())) {
+ Ret += "-Fi64";
+ } else if (T.isOSAIX()) {
+ Ret += is64Bit ? "-Fi64" : "-Fi32";
+ } else {
+ Ret += "-Fn32";
+ }
+
// Note, the alignment values for f64 and i64 on ppc64 in Darwin
// documentation are wrong; these are correct (i.e. "what gcc does").
Ret += "-i64:64";
@@ -237,7 +248,10 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
case Triple::ppc64le:
return PPCTargetMachine::PPC_ABI_ELFv2;
case Triple::ppc64:
- return PPCTargetMachine::PPC_ABI_ELFv1;
+ if (TT.isPPC64ELFv2ABI())
+ return PPCTargetMachine::PPC_ABI_ELFv2;
+ else
+ return PPCTargetMachine::PPC_ABI_ELFv1;
default:
return PPCTargetMachine::PPC_ABI_UNKNOWN;
}
@@ -474,7 +488,7 @@ bool PPCPassConfig::addPreISel() {
addPass(createPPCLoopInstrFormPrepPass(getPPCTargetMachine()));
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
- addPass(createHardwareLoopsPass());
+ addPass(createHardwareLoopsLegacyPass());
return false;
}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 594ba1816263..8137b61f4982 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -517,7 +517,7 @@ unsigned PPCTTIImpl::getPrefetchDistance() const {
return 300;
}
-unsigned PPCTTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned PPCTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
unsigned Directive = ST->getCPUDirective();
// The 440 has no SIMD support, but floating-point instructions
// have a 5-cycle latency, so unroll by 5x for latency hiding.
@@ -1079,17 +1079,5 @@ InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
}
bool PPCTTIImpl::supportsTailCallFor(const CallBase *CB) const {
- // Subtargets using PC-Relative addressing supported.
- if (ST->isUsingPCRelativeCalls())
- return true;
-
- const Function *Callee = CB->getCalledFunction();
- // Indirect calls and variadic argument functions not supported.
- if (!Callee || Callee->isVarArg())
- return false;
-
- const Function *Caller = CB->getCaller();
- // Support if we can share TOC base.
- return ST->getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(),
- Callee);
+ return TLI->supportsTailCallFor(CB);
}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 97377cb9af43..c3ade9968c33 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -34,8 +34,6 @@ class PPCTTIImpl : public BasicTTIImplBase<PPCTTIImpl> {
const PPCSubtarget *getST() const { return ST; }
const PPCTargetLowering *getTLI() const { return TLI; }
- bool mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
- SmallPtrSetImpl<const Value *> &Visited);
public:
explicit PPCTTIImpl(const PPCTargetMachine *TM, const Function &F)
@@ -101,7 +99,7 @@ public:
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
unsigned getCacheLineSize() const override;
unsigned getPrefetchDistance() const override;
- unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getMaxInterleaveFactor(ElementCount VF);
InstructionCost vectorCostAdjustmentFactor(unsigned Opcode, Type *Ty1,
Type *Ty2);
InstructionCost getArithmeticInstrCost(
diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 837812ab85c4..0d8c71f9f2e6 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -314,10 +314,7 @@ protected:
// copy to be removed, or somewhere in between there and here). This
// is necessary only if it is a physical register.
if (!AddendSrcReg.isVirtual())
- for (MCRegUnitIterator Units(AddendSrcReg.asMCReg(), TRI);
- Units.isValid(); ++Units) {
- unsigned Unit = *Units;
-
+ for (MCRegUnit Unit : TRI->regunits(AddendSrcReg.asMCReg())) {
LiveRange &AddendSrcRange = LIS->getRegUnit(Unit);
AddendSrcRange.extendInBlock(LIS->getMBBStartIdx(&MBB),
FMAIdx.getRegSlot());
diff --git a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index 365ba524a757..ea88021eec3d 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -618,7 +618,7 @@ void PPCVSXSwapRemoval::formWebs() {
continue;
MachineInstr* DefMI = MRI->getVRegDef(Reg);
- assert(SwapMap.find(DefMI) != SwapMap.end() &&
+ assert(SwapMap.contains(DefMI) &&
"Inconsistency: def of vector reg not found in swap map!");
int DefIdx = SwapMap[DefMI];
(void)EC->unionSets(SwapVector[DefIdx].VSEId,
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 9752e398bd99..046a208921ae 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVAsmParser.cpp - Parse RISCV assembly to MCInst instructions --===//
+//===-- RISCVAsmParser.cpp - Parse RISC-V assembly to MCInst instructions -===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -19,6 +19,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCAssembler.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -35,6 +36,7 @@
#include "llvm/MC/MCValue.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/RISCVAttributes.h"
#include "llvm/Support/RISCVISAInfo.h"
@@ -48,6 +50,9 @@ using namespace llvm;
STATISTIC(RISCVNumInstrsCompressed,
"Number of RISC-V Compressed instructions emitted");
+static cl::opt<bool> AddBuildAttributes("riscv-add-build-attributes",
+ cl::init(false));
+
namespace llvm {
extern const SubtargetFeatureKV RISCVFeatureKV[RISCV::NumSubtargetFeatures];
} // namespace llvm
@@ -60,6 +65,18 @@ struct ParserOptionsSet {
};
class RISCVAsmParser : public MCTargetAsmParser {
+ // This tracks the parsing of the 4 operands that make up the vtype portion
+ // of vset(i)vli instructions which are separated by commas. The state names
+ // represent the next expected operand with Done meaning no other operands are
+ // expected.
+ enum VTypeState {
+ VTypeState_SEW,
+ VTypeState_LMUL,
+ VTypeState_TailPolicy,
+ VTypeState_MaskPolicy,
+ VTypeState_Done,
+ };
+
SmallVector<FeatureBitset, 4> FeatureBitStack;
SmallVector<ParserOptionsSet, 4> ParserOptionsStack;
@@ -67,7 +84,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); }
- bool isRV32E() const { return getSTI().hasFeature(RISCV::FeatureRV32E); }
+ bool isRVE() const { return getSTI().hasFeature(RISCV::FeatureRVE); }
RISCVTargetStreamer &getTargetStreamer() {
assert(getParser().getStreamer().getTargetStreamer() &&
@@ -78,9 +95,13 @@ class RISCVAsmParser : public MCTargetAsmParser {
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
+ unsigned checkTargetMatchPredicate(MCInst &Inst) override;
bool generateImmOutOfRangeError(OperandVector &Operands, uint64_t ErrorInfo,
- int64_t Lower, int64_t Upper, Twine Msg);
+ int64_t Lower, int64_t Upper,
+ const Twine &Msg);
+ bool generateImmOutOfRangeError(SMLoc ErrorLoc, int64_t Lower, int64_t Upper,
+ const Twine &Msg);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
OperandVector &Operands, MCStreamer &Out,
@@ -95,7 +116,12 @@ class RISCVAsmParser : public MCTargetAsmParser {
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool ParseDirective(AsmToken DirectiveID) override;
+ ParseStatus parseDirective(AsmToken DirectiveID) override;
+
+ bool parseVTypeToken(StringRef Identifier, VTypeState &State, unsigned &Sew,
+ unsigned &Lmul, bool &Fractional, bool &TailAgnostic,
+ bool &MaskAgnostic);
+ bool generateVTypeError(SMLoc ErrorLoc);
// Helper to actually emit an instruction to the MCStreamer. Also, when
// possible, compression of the instruction is performed.
@@ -114,6 +140,9 @@ class RISCVAsmParser : public MCTargetAsmParser {
// Helper to emit pseudo instruction "lla" used in PC-rel addressing.
void emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+ // Helper to emit pseudo instruction "lga" used in GOT-rel addressing.
+ void emitLoadGlobalAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
+
// Helper to emit pseudo instruction "la" used in GOT/PC-rel addressing.
void emitLoadAddress(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out);
@@ -156,21 +185,27 @@ class RISCVAsmParser : public MCTargetAsmParser {
#define GET_ASSEMBLER_HEADER
#include "RISCVGenAsmMatcher.inc"
- OperandMatchResultTy parseCSRSystemRegister(OperandVector &Operands);
- OperandMatchResultTy parseImmediate(OperandVector &Operands);
- OperandMatchResultTy parseRegister(OperandVector &Operands,
- bool AllowParens = false);
- OperandMatchResultTy parseMemOpBaseReg(OperandVector &Operands);
- OperandMatchResultTy parseZeroOffsetMemOp(OperandVector &Operands);
- OperandMatchResultTy parseOperandWithModifier(OperandVector &Operands);
- OperandMatchResultTy parseBareSymbol(OperandVector &Operands);
- OperandMatchResultTy parseCallSymbol(OperandVector &Operands);
- OperandMatchResultTy parsePseudoJumpSymbol(OperandVector &Operands);
- OperandMatchResultTy parseJALOffset(OperandVector &Operands);
- OperandMatchResultTy parseVTypeI(OperandVector &Operands);
- OperandMatchResultTy parseMaskReg(OperandVector &Operands);
- OperandMatchResultTy parseInsnDirectiveOpcode(OperandVector &Operands);
- OperandMatchResultTy parseGPRAsFPR(OperandVector &Operands);
+ ParseStatus parseCSRSystemRegister(OperandVector &Operands);
+ ParseStatus parseFPImm(OperandVector &Operands);
+ ParseStatus parseImmediate(OperandVector &Operands);
+ ParseStatus parseRegister(OperandVector &Operands, bool AllowParens = false);
+ ParseStatus parseMemOpBaseReg(OperandVector &Operands);
+ ParseStatus parseZeroOffsetMemOp(OperandVector &Operands);
+ ParseStatus parseOperandWithModifier(OperandVector &Operands);
+ ParseStatus parseBareSymbol(OperandVector &Operands);
+ ParseStatus parseCallSymbol(OperandVector &Operands);
+ ParseStatus parsePseudoJumpSymbol(OperandVector &Operands);
+ ParseStatus parseJALOffset(OperandVector &Operands);
+ ParseStatus parseVTypeI(OperandVector &Operands);
+ ParseStatus parseMaskReg(OperandVector &Operands);
+ ParseStatus parseInsnDirectiveOpcode(OperandVector &Operands);
+ ParseStatus parseInsnCDirectiveOpcode(OperandVector &Operands);
+ ParseStatus parseGPRAsFPR(OperandVector &Operands);
+ ParseStatus parseFRMArg(OperandVector &Operands);
+ ParseStatus parseFenceArg(OperandVector &Operands);
+ ParseStatus parseReglist(OperandVector &Operands);
+ ParseStatus parseRetval(OperandVector &Operands);
+ ParseStatus parseZcmpSpimm(OperandVector &Operands);
bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
@@ -179,20 +214,22 @@ class RISCVAsmParser : public MCTargetAsmParser {
bool parseDirectiveInsn(SMLoc L);
bool parseDirectiveVariantCC();
+ /// Helper to reset target features for a new arch string. It
+ /// also records the new arch string that is expanded by RISCVISAInfo
+ /// and reports error for invalid arch string.
+ bool resetToArch(StringRef Arch, SMLoc Loc, std::string &Result,
+ bool FromOptionDirective);
+
void setFeatureBits(uint64_t Feature, StringRef FeatureString) {
- if (!(getSTI().getFeatureBits()[Feature])) {
+ if (!(getSTI().hasFeature(Feature))) {
MCSubtargetInfo &STI = copySTI();
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
}
}
- bool getFeatureBits(uint64_t Feature) {
- return getSTI().getFeatureBits()[Feature];
- }
-
void clearFeatureBits(uint64_t Feature, StringRef FeatureString) {
- if (getSTI().getFeatureBits()[Feature]) {
+ if (getSTI().hasFeature(Feature)) {
MCSubtargetInfo &STI = copySTI();
setAvailableFeatures(
ComputeAvailableFeatures(STI.ToggleFeature(FeatureString)));
@@ -222,10 +259,12 @@ class RISCVAsmParser : public MCTargetAsmParser {
}
std::unique_ptr<RISCVOperand> defaultMaskRegOp() const;
+ std::unique_ptr<RISCVOperand> defaultFRMArgOp() const;
public:
enum RISCVMatchResultTy {
Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
+ Match_RequiresEvenGPRs,
#define GET_OPERAND_DIAGNOSTIC_TYPES
#include "RISCVGenAsmMatcher.inc"
#undef GET_OPERAND_DIAGNOSTIC_TYPES
@@ -233,10 +272,13 @@ public:
static bool classifySymbolRef(const MCExpr *Expr,
RISCVMCExpr::VariantKind &Kind);
+ static bool isSymbolDiff(const MCExpr *Expr);
RISCVAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI, MII) {
+ MCAsmParserExtension::Initialize(Parser);
+
Parser.addAliasForDirective(".half", ".2byte");
Parser.addAliasForDirective(".hword", ".2byte");
Parser.addAliasForDirective(".word", ".4byte");
@@ -244,13 +286,12 @@ public:
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
auto ABIName = StringRef(Options.ABIName);
- if (ABIName.endswith("f") &&
- !getSTI().getFeatureBits()[RISCV::FeatureStdExtF]) {
+ if (ABIName.endswith("f") && !getSTI().hasFeature(RISCV::FeatureStdExtF)) {
errs() << "Hard-float 'f' ABI can't be used for a target that "
"doesn't support the F instruction set extension (ignoring "
"target-abi)\n";
} else if (ABIName.endswith("d") &&
- !getSTI().getFeatureBits()[RISCV::FeatureStdExtD]) {
+ !getSTI().hasFeature(RISCV::FeatureStdExtD)) {
errs() << "Hard-float 'd' ABI can't be used for a target that "
"doesn't support the D instruction set extension (ignoring "
"target-abi)\n";
@@ -263,31 +304,41 @@ public:
const MCObjectFileInfo *MOFI = Parser.getContext().getObjectFileInfo();
ParserOptions.IsPicEnabled = MOFI->isPositionIndependent();
+
+ if (AddBuildAttributes)
+ getTargetStreamer().emitTargetAttributes(STI, /*EmitStackAlign*/ false);
}
};
/// RISCVOperand - Instances of this class represent a parsed machine
/// instruction
-struct RISCVOperand : public MCParsedAsmOperand {
+struct RISCVOperand final : public MCParsedAsmOperand {
enum class KindTy {
Token,
Register,
Immediate,
+ FPImmediate,
SystemRegister,
VType,
+ FRM,
+ Fence,
+ Rlist,
+ Spimm,
} Kind;
- bool IsRV64;
-
- bool IsGPRAsFPR;
-
struct RegOp {
MCRegister RegNum;
+ bool IsGPRAsFPR;
};
struct ImmOp {
const MCExpr *Val;
+ bool IsRV64;
+ };
+
+ struct FPImmOp {
+ uint64_t Val;
};
struct SysRegOp {
@@ -302,13 +353,34 @@ struct RISCVOperand : public MCParsedAsmOperand {
unsigned Val;
};
+ struct FRMOp {
+ RISCVFPRndMode::RoundingMode FRM;
+ };
+
+ struct FenceOp {
+ unsigned Val;
+ };
+
+ struct RlistOp {
+ unsigned Val;
+ };
+
+ struct SpimmOp {
+ unsigned Val;
+ };
+
SMLoc StartLoc, EndLoc;
union {
StringRef Tok;
RegOp Reg;
ImmOp Imm;
+ FPImmOp FPImm;
struct SysRegOp SysReg;
struct VTypeOp VType;
+ struct FRMOp FRM;
+ struct FenceOp Fence;
+ struct RlistOp Rlist;
+ struct SpimmOp Spimm;
};
RISCVOperand(KindTy K) : Kind(K) {}
@@ -316,7 +388,6 @@ struct RISCVOperand : public MCParsedAsmOperand {
public:
RISCVOperand(const RISCVOperand &o) : MCParsedAsmOperand() {
Kind = o.Kind;
- IsRV64 = o.IsRV64;
StartLoc = o.StartLoc;
EndLoc = o.EndLoc;
switch (Kind) {
@@ -326,6 +397,9 @@ public:
case KindTy::Immediate:
Imm = o.Imm;
break;
+ case KindTy::FPImmediate:
+ FPImm = o.FPImm;
+ break;
case KindTy::Token:
Tok = o.Tok;
break;
@@ -335,6 +409,18 @@ public:
case KindTy::VType:
VType = o.VType;
break;
+ case KindTy::FRM:
+ FRM = o.FRM;
+ break;
+ case KindTy::Fence:
+ Fence = o.Fence;
+ break;
+ case KindTy::Rlist:
+ Rlist = o.Rlist;
+ break;
+ case KindTy::Spimm:
+ Spimm = o.Spimm;
+ break;
}
}
@@ -343,22 +429,35 @@ public:
bool isV0Reg() const {
return Kind == KindTy::Register && Reg.RegNum == RISCV::V0;
}
+ bool isAnyReg() const {
+ return Kind == KindTy::Register &&
+ (RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum) ||
+ RISCVMCRegisterClasses[RISCV::FPR64RegClassID].contains(Reg.RegNum) ||
+ RISCVMCRegisterClasses[RISCV::VRRegClassID].contains(Reg.RegNum));
+ }
+ bool isAnyRegC() const {
+ return Kind == KindTy::Register &&
+ (RISCVMCRegisterClasses[RISCV::GPRCRegClassID].contains(
+ Reg.RegNum) ||
+ RISCVMCRegisterClasses[RISCV::FPR64CRegClassID].contains(
+ Reg.RegNum));
+ }
bool isImm() const override { return Kind == KindTy::Immediate; }
bool isMem() const override { return false; }
bool isSystemRegister() const { return Kind == KindTy::SystemRegister; }
+ bool isRlist() const { return Kind == KindTy::Rlist; }
+ bool isSpimm() const { return Kind == KindTy::Spimm; }
bool isGPR() const {
return Kind == KindTy::Register &&
RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum);
}
- bool isGPRAsFPR() const { return isGPR() && IsGPRAsFPR; }
+ bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; }
- bool isGPRF64AsFPR() const { return isGPR() && IsGPRAsFPR && IsRV64; }
+ bool isGPRF64AsFPR() const { return isGPR() && Reg.IsGPRAsFPR; }
- bool isGPRPF64AsFPR() const {
- return isGPR() && IsGPRAsFPR && !IsRV64 && !((Reg.RegNum - RISCV::X0) & 1);
- }
+ bool isGPRPF64AsFPR() const { return isGPR() && Reg.IsGPRAsFPR; }
static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm,
RISCVMCExpr::VariantKind &VK) {
@@ -461,49 +560,23 @@ public:
/// Return true if the operand is a valid for the fence instruction e.g.
/// ('iorw').
- bool isFenceArg() const {
- if (!isImm())
- return false;
-
- int64_t Imm;
- RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
- if (evaluateConstantImm(getImm(), Imm, VK)) {
- // Only accept 0 as a constant immediate.
- return VK == RISCVMCExpr::VK_RISCV_None && Imm == 0;
- }
-
- auto *SVal = dyn_cast<MCSymbolRefExpr>(getImm());
-
- if (!SVal || SVal->getKind() != MCSymbolRefExpr::VK_None)
- return false;
-
- StringRef Str = SVal->getSymbol().getName();
- // Letters must be unique, taken from 'iorw', and in ascending order. This
- // holds as long as each individual character is one of 'iorw' and is
- // greater than the previous character.
- char Prev = '\0';
- for (char c : Str) {
- if (c != 'i' && c != 'o' && c != 'r' && c != 'w')
- return false;
- if (c <= Prev)
- return false;
- Prev = c;
- }
- return true;
- }
+ bool isFenceArg() const { return Kind == KindTy::Fence; }
/// Return true if the operand is a valid floating point rounding mode.
- bool isFRMArg() const {
- if (!isImm())
- return false;
- const MCExpr *Val = getImm();
- auto *SVal = dyn_cast<MCSymbolRefExpr>(Val);
- if (!SVal || SVal->getKind() != MCSymbolRefExpr::VK_None)
+ bool isFRMArg() const { return Kind == KindTy::FRM; }
+ bool isRTZArg() const { return isFRMArg() && FRM.FRM == RISCVFPRndMode::RTZ; }
+
+ /// Return true if the operand is a valid fli.s floating-point immediate.
+ bool isLoadFPImm() const {
+ if (isImm())
+ return isUImm5();
+ if (Kind != KindTy::FPImmediate)
return false;
-
- StringRef Str = SVal->getSymbol().getName();
-
- return RISCVFPRndMode::stringToRoundingMode(Str) != RISCVFPRndMode::Invalid;
+ int Idx = RISCVLoadFPImm::getLoadFPImm(
+ APFloat(APFloat::IEEEdouble(), APInt(64, getFPConst())));
+ // Don't allow decimal version of the minimum value. It is a different value
+ // for each supported data type.
+ return Idx >= 0 && Idx != 1;
}
bool isImmXLenLI() const {
@@ -516,8 +589,23 @@ public:
return true;
// Given only Imm, ensuring that the actually specified constant is either
// a signed or unsigned 64-bit number is unfortunately impossible.
- return IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None &&
- (isRV64() || (isInt<32>(Imm) || isUInt<32>(Imm)));
+ if (IsConstantImm) {
+ return VK == RISCVMCExpr::VK_RISCV_None &&
+ (isRV64Imm() || (isInt<32>(Imm) || isUInt<32>(Imm)));
+ }
+
+ return RISCVAsmParser::isSymbolDiff(getImm());
+ }
+
+ bool isImmXLenLI_Restricted() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ // 'la imm' supports constant immediates only.
+ return IsConstantImm && (VK == RISCVMCExpr::VK_RISCV_None) &&
+ (isRV64Imm() || (isInt<32>(Imm) || isUInt<32>(Imm)));
}
bool isUImmLog2XLen() const {
@@ -528,7 +616,7 @@ public:
if (!evaluateConstantImm(getImm(), Imm, VK) ||
VK != RISCVMCExpr::VK_RISCV_None)
return false;
- return (isRV64() && isUInt<6>(Imm)) || isUInt<5>(Imm);
+ return (isRV64Imm() && isUInt<6>(Imm)) || isUInt<5>(Imm);
}
bool isUImmLog2XLenNonZero() const {
@@ -541,7 +629,7 @@ public:
return false;
if (Imm == 0)
return false;
- return (isRV64() && isUInt<6>(Imm)) || isUInt<5>(Imm);
+ return (isRV64Imm() && isUInt<6>(Imm)) || isUInt<5>(Imm);
}
bool isUImmLog2XLenHalf() const {
@@ -552,7 +640,7 @@ public:
if (!evaluateConstantImm(getImm(), Imm, VK) ||
VK != RISCVMCExpr::VK_RISCV_None)
return false;
- return (isRV64() && isUInt<5>(Imm)) || isUInt<4>(Imm);
+ return (isRV64Imm() && isUInt<5>(Imm)) || isUInt<4>(Imm);
}
template <unsigned N> bool IsUImm() const {
@@ -564,10 +652,24 @@ public:
return IsConstantImm && isUInt<N>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
}
- bool isUImm2() { return IsUImm<2>(); }
- bool isUImm3() { return IsUImm<3>(); }
- bool isUImm5() { return IsUImm<5>(); }
- bool isUImm7() { return IsUImm<7>(); }
+ bool isUImm1() const { return IsUImm<1>(); }
+ bool isUImm2() const { return IsUImm<2>(); }
+ bool isUImm3() const { return IsUImm<3>(); }
+ bool isUImm4() const { return IsUImm<4>(); }
+ bool isUImm5() const { return IsUImm<5>(); }
+ bool isUImm6() const { return IsUImm<6>(); }
+ bool isUImm7() const { return IsUImm<7>(); }
+ bool isUImm8() const { return IsUImm<8>(); }
+
+ bool isUImm8GE32() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isUInt<8>(Imm) && Imm >= 32 &&
+ VK == RISCVMCExpr::VK_RISCV_None;
+ }
bool isRnumArg() const {
int64_t Imm;
@@ -579,13 +681,44 @@ public:
VK == RISCVMCExpr::VK_RISCV_None;
}
+ bool isRnumArg_0_7() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && Imm >= INT64_C(0) && Imm <= INT64_C(7) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
+ bool isRnumArg_1_10() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && Imm >= INT64_C(1) && Imm <= INT64_C(10) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
+ bool isRnumArg_2_14() const {
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ if (!isImm())
+ return false;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && Imm >= INT64_C(2) && Imm <= INT64_C(14) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
bool isSImm5() const {
if (!isImm())
return false;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
- return IsConstantImm && isInt<5>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
+ return IsConstantImm && isInt<5>(fixImmediateForRV32(Imm, isRV64Imm())) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm6() const {
@@ -594,7 +727,8 @@ public:
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
- return IsConstantImm && isInt<6>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
+ return IsConstantImm && isInt<6>(fixImmediateForRV32(Imm, isRV64Imm())) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
}
bool isSImm6NonZero() const {
@@ -603,7 +737,8 @@ public:
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
- return IsConstantImm && isInt<6>(Imm) && (Imm != 0) &&
+ return IsConstantImm && Imm != 0 &&
+ isInt<6>(fixImmediateForRV32(Imm, isRV64Imm())) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
@@ -618,6 +753,16 @@ public:
VK == RISCVMCExpr::VK_RISCV_None;
}
+ bool isUImm2Lsb0() const {
+ if (!isImm())
+ return false;
+ int64_t Imm;
+ RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+ bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+ return IsConstantImm && isShiftedUInt<1, 1>(Imm) &&
+ VK == RISCVMCExpr::VK_RISCV_None;
+ }
+
bool isUImm7Lsb00() const {
if (!isImm())
return false;
@@ -670,6 +815,14 @@ public:
VK == RISCVMCExpr::VK_RISCV_None;
}
+ // If this a RV32 and the immediate is a uimm32, sign extend it to 32 bits.
+ // This allows writing 'addi a0, a0, 0xffffffff'.
+ static int64_t fixImmediateForRV32(int64_t Imm, bool IsRV64Imm) {
+ if (IsRV64Imm || !isUInt<32>(Imm))
+ return Imm;
+ return SignExtend64<32>(Imm);
+ }
+
bool isSImm12() const {
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
@@ -680,7 +833,7 @@ public:
if (!IsConstantImm)
IsValid = RISCVAsmParser::classifySymbolRef(getImm(), VK);
else
- IsValid = isInt<12>(Imm);
+ IsValid = isInt<12>(fixImmediateForRV32(Imm, isRV64Imm()));
return IsValid && ((IsConstantImm && VK == RISCVMCExpr::VK_RISCV_None) ||
VK == RISCVMCExpr::VK_RISCV_LO ||
VK == RISCVMCExpr::VK_RISCV_PCREL_LO ||
@@ -768,7 +921,8 @@ public:
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
int64_t Imm;
bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
- return IsConstantImm && isInt<5>(Imm - 1) &&
+ return IsConstantImm &&
+ isInt<5>(fixImmediateForRV32(Imm, isRV64Imm()) - 1) &&
VK == RISCVMCExpr::VK_RISCV_None;
}
@@ -777,7 +931,10 @@ public:
/// getEndLoc - Gets location of the last token of this operand
SMLoc getEndLoc() const override { return EndLoc; }
/// True if this operand is for an RV64 instruction
- bool isRV64() const { return IsRV64; }
+ bool isRV64Imm() const {
+ assert(Kind == KindTy::Immediate && "Invalid type access!");
+ return Imm.IsRV64;
+ }
unsigned getReg() const override {
assert(Kind == KindTy::Register && "Invalid type access!");
@@ -794,6 +951,11 @@ public:
return Imm.Val;
}
+ uint64_t getFPConst() const {
+ assert(Kind == KindTy::FPImmediate && "Invalid type access!");
+ return FPImm.Val;
+ }
+
StringRef getToken() const {
assert(Kind == KindTy::Token && "Invalid type access!");
return Tok;
@@ -804,6 +966,16 @@ public:
return VType.Val;
}
+ RISCVFPRndMode::RoundingMode getFRM() const {
+ assert(Kind == KindTy::FRM && "Invalid type access!");
+ return FRM.FRM;
+ }
+
+ unsigned getFence() const {
+ assert(Kind == KindTy::Fence && "Invalid type access!");
+ return Fence.Val;
+ }
+
void print(raw_ostream &OS) const override {
auto RegName = [](MCRegister Reg) {
if (Reg)
@@ -816,6 +988,8 @@ public:
case KindTy::Immediate:
OS << *getImm();
break;
+ case KindTy::FPImmediate:
+ break;
case KindTy::Register:
OS << "<register " << RegName(getReg()) << ">";
break;
@@ -830,28 +1004,44 @@ public:
RISCVVType::printVType(getVType(), OS);
OS << '>';
break;
+ case KindTy::FRM:
+ OS << "<frm: ";
+ roundingModeToString(getFRM());
+ OS << '>';
+ break;
+ case KindTy::Fence:
+ OS << "<fence: ";
+ OS << getFence();
+ OS << '>';
+ break;
+ case KindTy::Rlist:
+ OS << "<rlist: ";
+ RISCVZC::printRlist(Rlist.Val, OS);
+ OS << '>';
+ break;
+ case KindTy::Spimm:
+ OS << "<Spimm: ";
+ RISCVZC::printSpimm(Spimm.Val, OS);
+ OS << '>';
+ break;
}
}
- static std::unique_ptr<RISCVOperand> createToken(StringRef Str, SMLoc S,
- bool IsRV64) {
+ static std::unique_ptr<RISCVOperand> createToken(StringRef Str, SMLoc S) {
auto Op = std::make_unique<RISCVOperand>(KindTy::Token);
Op->Tok = Str;
Op->StartLoc = S;
Op->EndLoc = S;
- Op->IsRV64 = IsRV64;
return Op;
}
- static std::unique_ptr<RISCVOperand> createReg(unsigned RegNo, SMLoc S,
- SMLoc E, bool IsRV64,
- bool IsGPRAsFPR = false) {
+ static std::unique_ptr<RISCVOperand>
+ createReg(unsigned RegNo, SMLoc S, SMLoc E, bool IsGPRAsFPR = false) {
auto Op = std::make_unique<RISCVOperand>(KindTy::Register);
Op->Reg.RegNum = RegNo;
+ Op->Reg.IsGPRAsFPR = IsGPRAsFPR;
Op->StartLoc = S;
Op->EndLoc = E;
- Op->IsRV64 = IsRV64;
- Op->IsGPRAsFPR = IsGPRAsFPR;
return Op;
}
@@ -859,42 +1049,80 @@ public:
SMLoc E, bool IsRV64) {
auto Op = std::make_unique<RISCVOperand>(KindTy::Immediate);
Op->Imm.Val = Val;
+ Op->Imm.IsRV64 = IsRV64;
Op->StartLoc = S;
Op->EndLoc = E;
- Op->IsRV64 = IsRV64;
return Op;
}
- static std::unique_ptr<RISCVOperand>
- createSysReg(StringRef Str, SMLoc S, unsigned Encoding, bool IsRV64) {
+ static std::unique_ptr<RISCVOperand> createFPImm(uint64_t Val, SMLoc S) {
+ auto Op = std::make_unique<RISCVOperand>(KindTy::FPImmediate);
+ Op->FPImm.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<RISCVOperand> createSysReg(StringRef Str, SMLoc S,
+ unsigned Encoding) {
auto Op = std::make_unique<RISCVOperand>(KindTy::SystemRegister);
Op->SysReg.Data = Str.data();
Op->SysReg.Length = Str.size();
Op->SysReg.Encoding = Encoding;
Op->StartLoc = S;
Op->EndLoc = S;
- Op->IsRV64 = IsRV64;
return Op;
}
- static std::unique_ptr<RISCVOperand> createVType(unsigned VTypeI, SMLoc S,
- bool IsRV64) {
+ static std::unique_ptr<RISCVOperand>
+ createFRMArg(RISCVFPRndMode::RoundingMode FRM, SMLoc S) {
+ auto Op = std::make_unique<RISCVOperand>(KindTy::FRM);
+ Op->FRM.FRM = FRM;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<RISCVOperand> createFenceArg(unsigned Val, SMLoc S) {
+ auto Op = std::make_unique<RISCVOperand>(KindTy::Fence);
+ Op->Fence.Val = Val;
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<RISCVOperand> createVType(unsigned VTypeI, SMLoc S) {
auto Op = std::make_unique<RISCVOperand>(KindTy::VType);
Op->VType.Val = VTypeI;
Op->StartLoc = S;
Op->EndLoc = S;
- Op->IsRV64 = IsRV64;
return Op;
}
- void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+ static std::unique_ptr<RISCVOperand> createRlist(unsigned RlistEncode,
+ SMLoc S) {
+ auto Op = std::make_unique<RISCVOperand>(KindTy::Rlist);
+ Op->Rlist.Val = RlistEncode;
+ Op->StartLoc = S;
+ return Op;
+ }
+
+ static std::unique_ptr<RISCVOperand> createSpimm(unsigned Spimm, SMLoc S) {
+ auto Op = std::make_unique<RISCVOperand>(KindTy::Spimm);
+ Op->Spimm.Val = Spimm;
+ Op->StartLoc = S;
+ return Op;
+ }
+
+ static void addExpr(MCInst &Inst, const MCExpr *Expr, bool IsRV64Imm) {
assert(Expr && "Expr shouldn't be null!");
int64_t Imm = 0;
RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
bool IsConstant = evaluateConstantImm(Expr, Imm, VK);
if (IsConstant)
- Inst.addOperand(MCOperand::createImm(Imm));
+ Inst.addOperand(
+ MCOperand::createImm(fixImmediateForRV32(Imm, IsRV64Imm)));
else
Inst.addOperand(MCOperand::createExpr(Expr));
}
@@ -907,47 +1135,26 @@ public:
void addImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
+ addExpr(Inst, getImm(), isRV64Imm());
}
- void addFenceArgOperands(MCInst &Inst, unsigned N) const {
+ void addFPImmOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
-
- int64_t Constant = 0;
- RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
- if (evaluateConstantImm(getImm(), Constant, VK)) {
- if (Constant == 0) {
- Inst.addOperand(MCOperand::createImm(Constant));
- return;
- }
- llvm_unreachable("FenceArg must contain only [iorw] or be 0");
+ if (isImm()) {
+ addExpr(Inst, getImm(), isRV64Imm());
+ return;
}
- // isFenceArg has validated the operand, meaning this cast is safe
- auto SE = cast<MCSymbolRefExpr>(getImm());
-
- unsigned Imm = 0;
- for (char c : SE->getSymbol().getName()) {
- switch (c) {
- default:
- llvm_unreachable("FenceArg must contain only [iorw] or be 0");
- case 'i':
- Imm |= RISCVFenceField::I;
- break;
- case 'o':
- Imm |= RISCVFenceField::O;
- break;
- case 'r':
- Imm |= RISCVFenceField::R;
- break;
- case 'w':
- Imm |= RISCVFenceField::W;
- break;
- }
- }
+ int Imm = RISCVLoadFPImm::getLoadFPImm(
+ APFloat(APFloat::IEEEdouble(), APInt(64, getFPConst())));
Inst.addOperand(MCOperand::createImm(Imm));
}
+ void addFenceArgOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(Fence.Val));
+ }
+
void addCSRSystemRegisterOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
Inst.addOperand(MCOperand::createImm(SysReg.Encoding));
@@ -970,20 +1177,19 @@ public:
Inst.addOperand(MCOperand::createImm(Imm));
}
- // Returns the rounding mode represented by this RISCVOperand. Should only
- // be called after checking isFRMArg.
- RISCVFPRndMode::RoundingMode getRoundingMode() const {
- // isFRMArg has validated the operand, meaning this cast is safe.
- auto SE = cast<MCSymbolRefExpr>(getImm());
- RISCVFPRndMode::RoundingMode FRM =
- RISCVFPRndMode::stringToRoundingMode(SE->getSymbol().getName());
- assert(FRM != RISCVFPRndMode::Invalid && "Invalid rounding mode");
- return FRM;
+ void addRlistOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(Rlist.Val));
+ }
+
+ void addSpimmOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 1 && "Invalid number of operands!");
+ Inst.addOperand(MCOperand::createImm(Spimm.Val));
}
void addFRMArgOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createImm(getRoundingMode()));
+ Inst.addOperand(MCOperand::createImm(getFRM()));
}
};
} // end anonymous namespace.
@@ -1056,11 +1262,34 @@ unsigned RISCVAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
return Match_InvalidOperand;
}
+unsigned RISCVAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
+ const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+
+ for (unsigned I = 0; I < MCID.NumOperands; ++I) {
+ if (MCID.operands()[I].RegClass == RISCV::GPRPF64RegClassID) {
+ const auto &Op = Inst.getOperand(I);
+ assert(Op.isReg());
+
+ MCRegister Reg = Op.getReg();
+ if (((Reg.id() - RISCV::X0) & 1) != 0)
+ return Match_RequiresEvenGPRs;
+ }
+ }
+
+ return Match_Success;
+}
+
+bool RISCVAsmParser::generateImmOutOfRangeError(
+ SMLoc ErrorLoc, int64_t Lower, int64_t Upper,
+ const Twine &Msg = "immediate must be an integer in the range") {
+ return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]");
+}
+
bool RISCVAsmParser::generateImmOutOfRangeError(
OperandVector &Operands, uint64_t ErrorInfo, int64_t Lower, int64_t Upper,
- Twine Msg = "immediate must be an integer in the range") {
+ const Twine &Msg = "immediate must be an integer in the range") {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
- return Error(ErrorLoc, Msg + " [" + Twine(Lower) + ", " + Twine(Upper) + "]");
+ return generateImmOutOfRangeError(ErrorLoc, Lower, Upper, Msg);
}
bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -1125,6 +1354,10 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
switch (Result) {
default:
break;
+ case Match_RequiresEvenGPRs:
+ return Error(IDLoc,
+ "double precision floating point operands must use even "
+ "numbered X register");
case Match_InvalidImmXLenLI:
if (isRV64()) {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
@@ -1133,6 +1366,17 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
return generateImmOutOfRangeError(Operands, ErrorInfo,
std::numeric_limits<int32_t>::min(),
std::numeric_limits<uint32_t>::max());
+ case Match_InvalidImmXLenLI_Restricted:
+ if (isRV64()) {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "operand either must be a constant 64-bit integer "
+ "or a bare symbol name");
+ }
+ return generateImmOutOfRangeError(
+ Operands, ErrorInfo, std::numeric_limits<int32_t>::min(),
+ std::numeric_limits<uint32_t>::max(),
+ "operand either must be a bare symbol name or an immediate integer in "
+ "the range");
case Match_InvalidImmZero: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "immediate must be zero");
@@ -1149,14 +1393,27 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
if (isRV64())
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 4) - 1);
+ case Match_InvalidUImm1:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 1) - 1);
case Match_InvalidUImm2:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 2) - 1);
+ case Match_InvalidUImm2Lsb0:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, 2,
+ "immediate must be one of");
case Match_InvalidUImm3:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 3) - 1);
+ case Match_InvalidUImm4:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 4) - 1);
case Match_InvalidUImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 5) - 1);
+ case Match_InvalidUImm6:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 6) - 1);
case Match_InvalidUImm7:
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 7) - 1);
+ case Match_InvalidUImm8:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 0, (1 << 8) - 1);
+ case Match_InvalidUImm8GE32:
+ return generateImmOutOfRangeError(Operands, ErrorInfo, 32, (1 << 8) - 1);
case Match_InvalidSImm5:
return generateImmOutOfRangeError(Operands, ErrorInfo, -(1 << 4),
(1 << 4) - 1);
@@ -1236,16 +1493,9 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
"operand must be a valid system register "
"name or an integer in the range");
}
- case Match_InvalidFenceArg: {
+ case Match_InvalidLoadFPImm: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
- return Error(ErrorLoc, "operand must be formed of letters selected "
- "in-order from 'iorw' or be 0");
- }
- case Match_InvalidFRMArg: {
- SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
- return Error(
- ErrorLoc,
- "operand must be a valid floating point rounding mode mnemonic");
+ return Error(ErrorLoc, "operand must be a valid floating-point constant");
}
case Match_InvalidBareSymbol: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
@@ -1263,12 +1513,13 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
return Error(ErrorLoc, "operand must be a symbol with %tprel_add modifier");
}
+ case Match_InvalidRTZArg: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(ErrorLoc, "operand must be 'rtz' floating-point rounding mode");
+ }
case Match_InvalidVTypeI: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
- return Error(
- ErrorLoc,
- "operand must be "
- "e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu]");
+ return generateVTypeError(ErrorLoc);
}
case Match_InvalidVMaskRegister: {
SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
@@ -1279,6 +1530,19 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
(1 << 4),
"immediate must be in the range");
}
+ case Match_InvalidRlist: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(
+ ErrorLoc,
+ "operand must be {ra [, s0[-sN]]} or {x1 [, x8[-x9][, x18[-xN]]]}");
+ }
+ case Match_InvalidSpimm: {
+ SMLoc ErrorLoc = ((RISCVOperand &)*Operands[ErrorInfo]).getStartLoc();
+ return Error(
+ ErrorLoc,
+ "stack adjustment is invalid for this instruction and register list; "
+ "refer to Zc spec for a detailed range of stack adjustment");
+ }
case Match_InvalidRnumArg: {
return generateImmOutOfRangeError(Operands, ErrorInfo, 0, 10);
}
@@ -1289,23 +1553,22 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
// Attempts to match Name as a register (either using the default name or
// alternative ABI names), setting RegNo to the matching register. Upon
-// failure, returns true and sets RegNo to 0. If IsRV32E then registers
-// x16-x31 will be rejected.
-static bool matchRegisterNameHelper(bool IsRV32E, MCRegister &RegNo,
- StringRef Name) {
- RegNo = MatchRegisterName(Name);
+// failure, returns a non-valid MCRegister. If IsRVE, then registers x16-x31
+// will be rejected.
+static MCRegister matchRegisterNameHelper(bool IsRVE, StringRef Name) {
+ MCRegister Reg = MatchRegisterName(Name);
// The 16-/32- and 64-bit FPRs have the same asm name. Check that the initial
// match always matches the 64-bit variant, and not the 16/32-bit one.
- assert(!(RegNo >= RISCV::F0_H && RegNo <= RISCV::F31_H));
- assert(!(RegNo >= RISCV::F0_F && RegNo <= RISCV::F31_F));
+ assert(!(Reg >= RISCV::F0_H && Reg <= RISCV::F31_H));
+ assert(!(Reg >= RISCV::F0_F && Reg <= RISCV::F31_F));
// The default FPR register class is based on the tablegen enum ordering.
static_assert(RISCV::F0_D < RISCV::F0_H, "FPR matching must be updated");
static_assert(RISCV::F0_D < RISCV::F0_F, "FPR matching must be updated");
- if (RegNo == RISCV::NoRegister)
- RegNo = MatchRegisterAltName(Name);
- if (IsRV32E && RegNo >= RISCV::X16 && RegNo <= RISCV::X31)
- RegNo = RISCV::NoRegister;
- return RegNo == RISCV::NoRegister;
+ if (!Reg)
+ Reg = MatchRegisterAltName(Name);
+ if (IsRVE && Reg >= RISCV::X16 && Reg <= RISCV::X31)
+ Reg = RISCV::NoRegister;
+ return Reg;
}
bool RISCVAsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
@@ -1321,18 +1584,18 @@ OperandMatchResultTy RISCVAsmParser::tryParseRegister(MCRegister &RegNo,
const AsmToken &Tok = getParser().getTok();
StartLoc = Tok.getLoc();
EndLoc = Tok.getEndLoc();
- RegNo = 0;
StringRef Name = getLexer().getTok().getIdentifier();
- if (matchRegisterNameHelper(isRV32E(), (MCRegister &)RegNo, Name))
+ RegNo = matchRegisterNameHelper(isRVE(), Name);
+ if (!RegNo)
return MatchOperand_NoMatch;
getParser().Lex(); // Eat identifier token.
return MatchOperand_Success;
}
-OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands,
- bool AllowParens) {
+ParseStatus RISCVAsmParser::parseRegister(OperandVector &Operands,
+ bool AllowParens) {
SMLoc FirstS = getLoc();
bool HadParens = false;
AsmToken LParen;
@@ -1353,42 +1616,40 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands,
default:
if (HadParens)
getLexer().UnLex(LParen);
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::Identifier:
StringRef Name = getLexer().getTok().getIdentifier();
- MCRegister RegNo;
- matchRegisterNameHelper(isRV32E(), RegNo, Name);
+ MCRegister RegNo = matchRegisterNameHelper(isRVE(), Name);
- if (RegNo == RISCV::NoRegister) {
+ if (!RegNo) {
if (HadParens)
getLexer().UnLex(LParen);
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
if (HadParens)
- Operands.push_back(RISCVOperand::createToken("(", FirstS, isRV64()));
+ Operands.push_back(RISCVOperand::createToken("(", FirstS));
SMLoc S = getLoc();
SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size());
getLexer().Lex();
- Operands.push_back(RISCVOperand::createReg(RegNo, S, E, isRV64()));
+ Operands.push_back(RISCVOperand::createReg(RegNo, S, E));
}
if (HadParens) {
getParser().Lex(); // Eat ')'
- Operands.push_back(RISCVOperand::createToken(")", getLoc(), isRV64()));
+ Operands.push_back(RISCVOperand::createToken(")", getLoc()));
}
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-RISCVAsmParser::parseInsnDirectiveOpcode(OperandVector &Operands) {
+ParseStatus RISCVAsmParser::parseInsnDirectiveOpcode(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E;
const MCExpr *Res;
switch (getLexer().getKind()) {
default:
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -1397,58 +1658,112 @@ RISCVAsmParser::parseInsnDirectiveOpcode(OperandVector &Operands) {
case AsmToken::Integer:
case AsmToken::String: {
if (getParser().parseExpression(Res, E))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
auto *CE = dyn_cast<MCConstantExpr>(Res);
if (CE) {
int64_t Imm = CE->getValue();
if (isUInt<7>(Imm)) {
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
}
- Twine Msg = "immediate must be an integer in the range";
- Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]");
- return MatchOperand_ParseFail;
+ break;
}
case AsmToken::Identifier: {
StringRef Identifier;
if (getParser().parseIdentifier(Identifier))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
auto Opcode = RISCVInsnOpcode::lookupRISCVOpcodeByName(Identifier);
if (Opcode) {
+ assert(isUInt<7>(Opcode->Value) && (Opcode->Value & 0x3) == 3 &&
+ "Unexpected opcode");
Res = MCConstantExpr::create(Opcode->Value, getContext());
E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
- Twine Msg = "operand must be a valid opcode name or an "
- "integer in the range";
- Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]");
- return MatchOperand_ParseFail;
+ break;
+ }
+ case AsmToken::Percent:
+ break;
+ }
+
+ return generateImmOutOfRangeError(
+ S, 0, 127,
+ "opcode must be a valid opcode name or an immediate in the range");
+}
+
+ParseStatus RISCVAsmParser::parseInsnCDirectiveOpcode(OperandVector &Operands) {
+ SMLoc S = getLoc();
+ SMLoc E;
+ const MCExpr *Res;
+
+ switch (getLexer().getKind()) {
+ default:
+ return ParseStatus::NoMatch;
+ case AsmToken::LParen:
+ case AsmToken::Minus:
+ case AsmToken::Plus:
+ case AsmToken::Exclaim:
+ case AsmToken::Tilde:
+ case AsmToken::Integer:
+ case AsmToken::String: {
+ if (getParser().parseExpression(Res, E))
+ return ParseStatus::Failure;
+
+ auto *CE = dyn_cast<MCConstantExpr>(Res);
+ if (CE) {
+ int64_t Imm = CE->getValue();
+ if (Imm >= 0 && Imm <= 2) {
+ Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+ return ParseStatus::Success;
+ }
+ }
+
+ break;
+ }
+ case AsmToken::Identifier: {
+ StringRef Identifier;
+ if (getParser().parseIdentifier(Identifier))
+ return ParseStatus::Failure;
+
+ unsigned Opcode;
+ if (Identifier == "C0")
+ Opcode = 0;
+ else if (Identifier == "C1")
+ Opcode = 1;
+ else if (Identifier == "C2")
+ Opcode = 2;
+ else
+ break;
+
+ Res = MCConstantExpr::create(Opcode, getContext());
+ E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
+ Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
+ return ParseStatus::Success;
}
case AsmToken::Percent: {
// Discard operand with modifier.
- Twine Msg = "immediate must be an integer in the range";
- Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]");
- return MatchOperand_ParseFail;
+ break;
}
}
- return MatchOperand_NoMatch;
+ return generateImmOutOfRangeError(
+ S, 0, 2,
+ "opcode must be a valid opcode name or an immediate in the range");
}
-OperandMatchResultTy
-RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
+ParseStatus RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
SMLoc S = getLoc();
const MCExpr *Res;
switch (getLexer().getKind()) {
default:
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::LParen:
case AsmToken::Minus:
case AsmToken::Plus:
@@ -1457,7 +1772,7 @@ RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
case AsmToken::Integer:
case AsmToken::String: {
if (getParser().parseExpression(Res))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
auto *CE = dyn_cast<MCConstantExpr>(Res);
if (CE) {
@@ -1466,64 +1781,148 @@ RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) {
auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
// Accept an immediate representing a named or un-named Sys Reg
// if the range is valid, regardless of the required features.
- Operands.push_back(RISCVOperand::createSysReg(
- SysReg ? SysReg->Name : "", S, Imm, isRV64()));
- return MatchOperand_Success;
+ Operands.push_back(
+ RISCVOperand::createSysReg(SysReg ? SysReg->Name : "", S, Imm));
+ return ParseStatus::Success;
}
}
- Twine Msg = "immediate must be an integer in the range";
- Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
- return MatchOperand_ParseFail;
+ return generateImmOutOfRangeError(S, 0, (1 << 12) - 1);
}
case AsmToken::Identifier: {
StringRef Identifier;
if (getParser().parseIdentifier(Identifier))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
+
+ // Check for CSR names conflicts.
+ // Custom CSR names might conflict with CSR names in privileged spec.
+ // E.g. - SiFive mnscratch(0x350) and privileged spec mnscratch(0x740).
+ auto CheckCSRNameConflict = [&]() {
+ if (!(RISCVSysReg::lookupSysRegByName(Identifier))) {
+ Error(S, "system register use requires an option to be enabled");
+ return true;
+ }
+ return false;
+ };
+
+ // First check for vendor specific CSRs.
+ auto SiFiveReg = RISCVSysReg::lookupSiFiveRegByName(Identifier);
+ if (SiFiveReg) {
+ if (SiFiveReg->haveVendorRequiredFeatures(getSTI().getFeatureBits())) {
+ Operands.push_back(
+ RISCVOperand::createSysReg(Identifier, S, SiFiveReg->Encoding));
+ return ParseStatus::Success;
+ }
+ if (CheckCSRNameConflict())
+ return ParseStatus::Failure;
+ }
auto SysReg = RISCVSysReg::lookupSysRegByName(Identifier);
if (!SysReg)
- SysReg = RISCVSysReg::lookupSysRegByAltName(Identifier);
- if (!SysReg)
if ((SysReg = RISCVSysReg::lookupSysRegByDeprecatedName(Identifier)))
Warning(S, "'" + Identifier + "' is a deprecated alias for '" +
SysReg->Name + "'");
- // Accept a named Sys Reg if the required features are present.
- if (SysReg) {
- if (!SysReg->haveRequiredFeatures(getSTI().getFeatureBits())) {
- Error(S, "system register use requires an option to be enabled");
- return MatchOperand_ParseFail;
+ // Check for CSR encoding conflicts.
+ // Custom CSR encoding might conflict with CSR encoding in privileged spec.
+ // E.g. - SiFive mnscratch(0x350) and privileged spec miselect(0x350).
+ auto CheckCSREncodingConflict = [&]() {
+ auto Reg = RISCVSysReg::lookupSiFiveRegByEncoding(SysReg->Encoding);
+ if (Reg && Reg->haveVendorRequiredFeatures(getSTI().getFeatureBits())) {
+ Warning(S, "'" + Identifier + "' CSR is not available on the current " +
+ "subtarget. Instead '" + Reg->Name +
+ "' CSR will be used.");
+ Operands.push_back(
+ RISCVOperand::createSysReg(Reg->Name, S, Reg->Encoding));
+ return true;
}
- Operands.push_back(RISCVOperand::createSysReg(
- Identifier, S, SysReg->Encoding, isRV64()));
- return MatchOperand_Success;
+ return false;
+ };
+
+ // Accept a named SysReg if the required features are present.
+ if (SysReg) {
+ if (!SysReg->haveRequiredFeatures(getSTI().getFeatureBits()))
+ return Error(S, "system register use requires an option to be enabled");
+ if (CheckCSREncodingConflict())
+ return ParseStatus::Success;
+ Operands.push_back(
+ RISCVOperand::createSysReg(Identifier, S, SysReg->Encoding));
+ return ParseStatus::Success;
}
- Twine Msg = "operand must be a valid system register name "
- "or an integer in the range";
- Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
- return MatchOperand_ParseFail;
+ return generateImmOutOfRangeError(S, 0, (1 << 12) - 1,
+ "operand must be a valid system register "
+ "name or an integer in the range");
}
case AsmToken::Percent: {
// Discard operand with modifier.
- Twine Msg = "immediate must be an integer in the range";
- Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 12) - 1) + "]");
- return MatchOperand_ParseFail;
+ return generateImmOutOfRangeError(S, 0, (1 << 12) - 1);
}
}
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
-OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
+ParseStatus RISCVAsmParser::parseFPImm(OperandVector &Operands) {
+ SMLoc S = getLoc();
+
+ // Parse special floats (inf/nan/min) representation.
+ if (getTok().is(AsmToken::Identifier)) {
+ StringRef Identifier = getTok().getIdentifier();
+ if (Identifier.compare_insensitive("inf") == 0) {
+ Operands.push_back(
+ RISCVOperand::createImm(MCConstantExpr::create(30, getContext()), S,
+ getTok().getEndLoc(), isRV64()));
+ } else if (Identifier.compare_insensitive("nan") == 0) {
+ Operands.push_back(
+ RISCVOperand::createImm(MCConstantExpr::create(31, getContext()), S,
+ getTok().getEndLoc(), isRV64()));
+ } else if (Identifier.compare_insensitive("min") == 0) {
+ Operands.push_back(
+ RISCVOperand::createImm(MCConstantExpr::create(1, getContext()), S,
+ getTok().getEndLoc(), isRV64()));
+ } else {
+ return TokError("invalid floating point literal");
+ }
+
+ Lex(); // Eat the token.
+
+ return ParseStatus::Success;
+ }
+
+ // Handle negation, as that still comes through as a separate token.
+ bool IsNegative = parseOptionalToken(AsmToken::Minus);
+
+ const AsmToken &Tok = getTok();
+ if (!Tok.is(AsmToken::Real))
+ return TokError("invalid floating point immediate");
+
+ // Parse FP representation.
+ APFloat RealVal(APFloat::IEEEdouble());
+ auto StatusOrErr =
+ RealVal.convertFromString(Tok.getString(), APFloat::rmTowardZero);
+ if (errorToBool(StatusOrErr.takeError()))
+ return TokError("invalid floating point representation");
+
+ if (IsNegative)
+ RealVal.changeSign();
+
+ Operands.push_back(RISCVOperand::createFPImm(
+ RealVal.bitcastToAPInt().getZExtValue(), S));
+
+ Lex(); // Eat the token.
+
+ return ParseStatus::Success;
+}
+
+ParseStatus RISCVAsmParser::parseImmediate(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E;
const MCExpr *Res;
switch (getLexer().getKind()) {
default:
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
case AsmToken::LParen:
case AsmToken::Dot:
case AsmToken::Minus:
@@ -1534,75 +1933,60 @@ OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) {
case AsmToken::String:
case AsmToken::Identifier:
if (getParser().parseExpression(Res, E))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
break;
case AsmToken::Percent:
return parseOperandWithModifier(Operands);
}
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) {
+ParseStatus RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E;
- if (getLexer().getKind() != AsmToken::Percent) {
- Error(getLoc(), "expected '%' for operand modifier");
- return MatchOperand_ParseFail;
- }
-
- getParser().Lex(); // Eat '%'
+ if (parseToken(AsmToken::Percent, "expected '%' for operand modifier"))
+ return ParseStatus::Failure;
- if (getLexer().getKind() != AsmToken::Identifier) {
- Error(getLoc(), "expected valid identifier for operand modifier");
- return MatchOperand_ParseFail;
- }
+ if (getLexer().getKind() != AsmToken::Identifier)
+ return Error(getLoc(), "expected valid identifier for operand modifier");
StringRef Identifier = getParser().getTok().getIdentifier();
RISCVMCExpr::VariantKind VK = RISCVMCExpr::getVariantKindForName(Identifier);
- if (VK == RISCVMCExpr::VK_RISCV_Invalid) {
- Error(getLoc(), "unrecognized operand modifier");
- return MatchOperand_ParseFail;
- }
+ if (VK == RISCVMCExpr::VK_RISCV_Invalid)
+ return Error(getLoc(), "unrecognized operand modifier");
getParser().Lex(); // Eat the identifier
- if (getLexer().getKind() != AsmToken::LParen) {
- Error(getLoc(), "expected '('");
- return MatchOperand_ParseFail;
- }
- getParser().Lex(); // Eat '('
+ if (parseToken(AsmToken::LParen, "expected '('"))
+ return ParseStatus::Failure;
const MCExpr *SubExpr;
- if (getParser().parseParenExpression(SubExpr, E)) {
- return MatchOperand_ParseFail;
- }
+ if (getParser().parseParenExpression(SubExpr, E))
+ return ParseStatus::Failure;
const MCExpr *ModExpr = RISCVMCExpr::create(SubExpr, VK, getContext());
Operands.push_back(RISCVOperand::createImm(ModExpr, S, E, isRV64()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
+ParseStatus RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
const MCExpr *Res;
if (getLexer().getKind() != AsmToken::Identifier)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
StringRef Identifier;
AsmToken Tok = getLexer().getTok();
if (getParser().parseIdentifier(Identifier))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
- if (Identifier.consume_back("@plt")) {
- Error(getLoc(), "'@plt' operand not valid for instruction");
- return MatchOperand_ParseFail;
- }
+ if (Identifier.consume_back("@plt"))
+ return Error(getLoc(), "'@plt' operand not valid for instruction");
MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
@@ -1610,7 +1994,7 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
const MCExpr *V = Sym->getVariableValue(/*SetUsed=*/false);
if (!isa<MCSymbolRefExpr>(V)) {
getLexer().UnLex(Tok); // Put back if it's not a bare symbol.
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
}
Res = V;
} else
@@ -1620,7 +2004,7 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
switch (getLexer().getKind()) {
default:
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
case AsmToken::Plus:
Opcode = MCBinaryExpr::Add;
getLexer().Lex();
@@ -1633,26 +2017,26 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) {
const MCExpr *Expr;
if (getParser().parseExpression(Expr, E))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext());
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
+ParseStatus RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
const MCExpr *Res;
if (getLexer().getKind() != AsmToken::Identifier)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
// Avoid parsing the register in `call rd, foo` as a call symbol.
if (getLexer().peekTok().getKind() != AsmToken::EndOfStatement)
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
StringRef Identifier;
if (getParser().parseIdentifier(Identifier))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size());
@@ -1664,31 +2048,28 @@ OperandMatchResultTy RISCVAsmParser::parseCallSymbol(OperandVector &Operands) {
Res = MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, getContext());
Res = RISCVMCExpr::create(Res, Kind, getContext());
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-RISCVAsmParser::parsePseudoJumpSymbol(OperandVector &Operands) {
+ParseStatus RISCVAsmParser::parsePseudoJumpSymbol(OperandVector &Operands) {
SMLoc S = getLoc();
SMLoc E;
const MCExpr *Res;
if (getParser().parseExpression(Res, E))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
if (Res->getKind() != MCExpr::ExprKind::SymbolRef ||
cast<MCSymbolRefExpr>(Res)->getKind() ==
- MCSymbolRefExpr::VariantKind::VK_PLT) {
- Error(S, "operand must be a valid jump target");
- return MatchOperand_ParseFail;
- }
+ MCSymbolRefExpr::VariantKind::VK_PLT)
+ return Error(S, "operand must be a valid jump target");
Res = RISCVMCExpr::create(Res, RISCVMCExpr::VK_RISCV_CALL, getContext());
Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64()));
- return MatchOperand_Success;
+ return ParseStatus::Success;
}
-OperandMatchResultTy RISCVAsmParser::parseJALOffset(OperandVector &Operands) {
+ParseStatus RISCVAsmParser::parseJALOffset(OperandVector &Operands) {
// Parsing jal operands is fiddly due to the `jal foo` and `jal ra, foo`
// both being acceptable forms. When parsing `jal ra, foo` this function
// will be called for the `ra` register operand in an attempt to match the
@@ -1700,163 +2081,246 @@ OperandMatchResultTy RISCVAsmParser::parseJALOffset(OperandVector &Operands) {
// is an identifier and is followed by a comma.
if (getLexer().is(AsmToken::Identifier) &&
getLexer().peekTok().is(AsmToken::Comma))
- return MatchOperand_NoMatch;
+ return ParseStatus::NoMatch;
return parseImmediate(Operands);
}
-OperandMatchResultTy RISCVAsmParser::parseVTypeI(OperandVector &Operands) {
- SMLoc S = getLoc();
- if (getLexer().isNot(AsmToken::Identifier))
- return MatchOperand_NoMatch;
-
- SmallVector<AsmToken, 7> VTypeIElements;
- // Put all the tokens for vtypei operand into VTypeIElements vector.
- while (getLexer().isNot(AsmToken::EndOfStatement)) {
- VTypeIElements.push_back(getLexer().getTok());
- getLexer().Lex();
- if (getLexer().is(AsmToken::EndOfStatement))
+bool RISCVAsmParser::parseVTypeToken(StringRef Identifier, VTypeState &State,
+ unsigned &Sew, unsigned &Lmul,
+ bool &Fractional, bool &TailAgnostic,
+ bool &MaskAgnostic) {
+ switch (State) {
+ case VTypeState_SEW:
+ if (!Identifier.consume_front("e"))
+ break;
+ if (Identifier.getAsInteger(10, Sew))
break;
- if (getLexer().isNot(AsmToken::Comma))
- goto MatchFail;
- AsmToken Comma = getLexer().getTok();
- VTypeIElements.push_back(Comma);
- getLexer().Lex();
- }
-
- if (VTypeIElements.size() == 7) {
- // The VTypeIElements layout is:
- // SEW comma LMUL comma TA comma MA
- // 0 1 2 3 4 5 6
- StringRef Name = VTypeIElements[0].getIdentifier();
- if (!Name.consume_front("e"))
- goto MatchFail;
- unsigned Sew;
- if (Name.getAsInteger(10, Sew))
- goto MatchFail;
if (!RISCVVType::isValidSEW(Sew))
- goto MatchFail;
-
- Name = VTypeIElements[2].getIdentifier();
- if (!Name.consume_front("m"))
- goto MatchFail;
- // "m" or "mf"
- bool Fractional = Name.consume_front("f");
- unsigned Lmul;
- if (Name.getAsInteger(10, Lmul))
- goto MatchFail;
+ break;
+ State = VTypeState_LMUL;
+ return false;
+ case VTypeState_LMUL: {
+ if (!Identifier.consume_front("m"))
+ break;
+ Fractional = Identifier.consume_front("f");
+ if (Identifier.getAsInteger(10, Lmul))
+ break;
if (!RISCVVType::isValidLMUL(Lmul, Fractional))
- goto MatchFail;
-
- // ta or tu
- Name = VTypeIElements[4].getIdentifier();
- bool TailAgnostic;
- if (Name == "ta")
+ break;
+ State = VTypeState_TailPolicy;
+ return false;
+ }
+ case VTypeState_TailPolicy:
+ if (Identifier == "ta")
TailAgnostic = true;
- else if (Name == "tu")
+ else if (Identifier == "tu")
TailAgnostic = false;
else
- goto MatchFail;
-
- // ma or mu
- Name = VTypeIElements[6].getIdentifier();
- bool MaskAgnostic;
- if (Name == "ma")
+ break;
+ State = VTypeState_MaskPolicy;
+ return false;
+ case VTypeState_MaskPolicy:
+ if (Identifier == "ma")
MaskAgnostic = true;
- else if (Name == "mu")
+ else if (Identifier == "mu")
MaskAgnostic = false;
else
- goto MatchFail;
+ break;
+ State = VTypeState_Done;
+ return false;
+ case VTypeState_Done:
+ // Extra token?
+ break;
+ }
+
+ return true;
+}
+
+ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) {
+ SMLoc S = getLoc();
+
+ unsigned Sew = 0;
+ unsigned Lmul = 0;
+ bool Fractional = false;
+ bool TailAgnostic = false;
+ bool MaskAgnostic = false;
+
+ VTypeState State = VTypeState_SEW;
+
+ if (getLexer().isNot(AsmToken::Identifier))
+ return ParseStatus::NoMatch;
+ StringRef Identifier = getTok().getIdentifier();
+
+ if (parseVTypeToken(Identifier, State, Sew, Lmul, Fractional, TailAgnostic,
+ MaskAgnostic))
+ return ParseStatus::NoMatch;
+
+ getLexer().Lex();
+
+ while (parseOptionalToken(AsmToken::Comma)) {
+ if (getLexer().isNot(AsmToken::Identifier))
+ break;
+
+ Identifier = getTok().getIdentifier();
+
+ if (parseVTypeToken(Identifier, State, Sew, Lmul, Fractional, TailAgnostic,
+ MaskAgnostic))
+ break;
+
+ getLexer().Lex();
+ }
+
+ if (getLexer().is(AsmToken::EndOfStatement) && State == VTypeState_Done) {
RISCVII::VLMUL VLMUL = RISCVVType::encodeLMUL(Lmul, Fractional);
unsigned VTypeI =
RISCVVType::encodeVTYPE(VLMUL, Sew, TailAgnostic, MaskAgnostic);
- Operands.push_back(RISCVOperand::createVType(VTypeI, S, isRV64()));
- return MatchOperand_Success;
+ Operands.push_back(RISCVOperand::createVType(VTypeI, S));
+ return ParseStatus::Success;
}
-// If NoMatch, unlex all the tokens that comprise a vtypei operand
-MatchFail:
- while (!VTypeIElements.empty())
- getLexer().UnLex(VTypeIElements.pop_back_val());
- return MatchOperand_NoMatch;
+ return generateVTypeError(S);
}
-OperandMatchResultTy RISCVAsmParser::parseMaskReg(OperandVector &Operands) {
- switch (getLexer().getKind()) {
- default:
- return MatchOperand_NoMatch;
- case AsmToken::Identifier:
- StringRef Name = getLexer().getTok().getIdentifier();
- if (!Name.consume_back(".t")) {
- Error(getLoc(), "expected '.t' suffix");
- return MatchOperand_ParseFail;
- }
- MCRegister RegNo;
- matchRegisterNameHelper(isRV32E(), RegNo, Name);
+bool RISCVAsmParser::generateVTypeError(SMLoc ErrorLoc) {
+ return Error(
+ ErrorLoc,
+ "operand must be "
+ "e[8|16|32|64|128|256|512|1024],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu]");
+}
- if (RegNo == RISCV::NoRegister)
- return MatchOperand_NoMatch;
- if (RegNo != RISCV::V0)
- return MatchOperand_NoMatch;
- SMLoc S = getLoc();
- SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size());
- getLexer().Lex();
- Operands.push_back(RISCVOperand::createReg(RegNo, S, E, isRV64()));
- }
+ParseStatus RISCVAsmParser::parseMaskReg(OperandVector &Operands) {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return ParseStatus::NoMatch;
- return MatchOperand_Success;
+ StringRef Name = getLexer().getTok().getIdentifier();
+ if (!Name.consume_back(".t"))
+ return Error(getLoc(), "expected '.t' suffix");
+ MCRegister RegNo = matchRegisterNameHelper(isRVE(), Name);
+
+ if (!RegNo)
+ return ParseStatus::NoMatch;
+ if (RegNo != RISCV::V0)
+ return ParseStatus::NoMatch;
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size());
+ getLexer().Lex();
+ Operands.push_back(RISCVOperand::createReg(RegNo, S, E));
+ return ParseStatus::Success;
}
-OperandMatchResultTy RISCVAsmParser::parseGPRAsFPR(OperandVector &Operands) {
- switch (getLexer().getKind()) {
- default:
- return MatchOperand_NoMatch;
- case AsmToken::Identifier:
- StringRef Name = getLexer().getTok().getIdentifier();
- MCRegister RegNo;
- matchRegisterNameHelper(isRV32E(), RegNo, Name);
+ParseStatus RISCVAsmParser::parseGPRAsFPR(OperandVector &Operands) {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return ParseStatus::NoMatch;
- if (RegNo == RISCV::NoRegister)
- return MatchOperand_NoMatch;
- SMLoc S = getLoc();
- SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1);
- getLexer().Lex();
- Operands.push_back(RISCVOperand::createReg(
- RegNo, S, E, isRV64(), !getSTI().hasFeature(RISCV::FeatureStdExtF)));
- }
- return MatchOperand_Success;
+ StringRef Name = getLexer().getTok().getIdentifier();
+ MCRegister RegNo = matchRegisterNameHelper(isRVE(), Name);
+
+ if (!RegNo)
+ return ParseStatus::NoMatch;
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size());
+ getLexer().Lex();
+ Operands.push_back(RISCVOperand::createReg(
+ RegNo, S, E, !getSTI().hasFeature(RISCV::FeatureStdExtF)));
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
- if (getLexer().isNot(AsmToken::LParen)) {
- Error(getLoc(), "expected '('");
- return MatchOperand_ParseFail;
- }
+ParseStatus RISCVAsmParser::parseFRMArg(OperandVector &Operands) {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return TokError(
+ "operand must be a valid floating point rounding mode mnemonic");
+
+ StringRef Str = getLexer().getTok().getIdentifier();
+ RISCVFPRndMode::RoundingMode FRM = RISCVFPRndMode::stringToRoundingMode(Str);
+
+ if (FRM == RISCVFPRndMode::Invalid)
+ return TokError(
+ "operand must be a valid floating point rounding mode mnemonic");
- getParser().Lex(); // Eat '('
- Operands.push_back(RISCVOperand::createToken("(", getLoc(), isRV64()));
+ Operands.push_back(RISCVOperand::createFRMArg(FRM, getLoc()));
+ Lex(); // Eat identifier token.
+ return ParseStatus::Success;
+}
+
+ParseStatus RISCVAsmParser::parseFenceArg(OperandVector &Operands) {
+ const AsmToken &Tok = getLexer().getTok();
+
+ if (Tok.is(AsmToken::Integer)) {
+ if (Tok.getIntVal() != 0)
+ goto ParseFail;
- if (parseRegister(Operands) != MatchOperand_Success) {
- Error(getLoc(), "expected register");
- return MatchOperand_ParseFail;
+ Operands.push_back(RISCVOperand::createFenceArg(0, getLoc()));
+ Lex();
+ return ParseStatus::Success;
}
- if (getLexer().isNot(AsmToken::RParen)) {
- Error(getLoc(), "expected ')'");
- return MatchOperand_ParseFail;
+ if (Tok.is(AsmToken::Identifier)) {
+ StringRef Str = Tok.getIdentifier();
+
+ // Letters must be unique, taken from 'iorw', and in ascending order. This
+ // holds as long as each individual character is one of 'iorw' and is
+ // greater than the previous character.
+ unsigned Imm = 0;
+ bool Valid = true;
+ char Prev = '\0';
+ for (char c : Str) {
+ switch (c) {
+ default:
+ Valid = false;
+ break;
+ case 'i':
+ Imm |= RISCVFenceField::I;
+ break;
+ case 'o':
+ Imm |= RISCVFenceField::O;
+ break;
+ case 'r':
+ Imm |= RISCVFenceField::R;
+ break;
+ case 'w':
+ Imm |= RISCVFenceField::W;
+ break;
+ }
+
+ if (c <= Prev) {
+ Valid = false;
+ break;
+ }
+ Prev = c;
+ }
+
+ if (!Valid)
+ goto ParseFail;
+
+ Operands.push_back(RISCVOperand::createFenceArg(Imm, getLoc()));
+ Lex();
+ return ParseStatus::Success;
}
- getParser().Lex(); // Eat ')'
- Operands.push_back(RISCVOperand::createToken(")", getLoc(), isRV64()));
+ParseFail:
+ return TokError("operand must be formed of letters selected in-order from "
+ "'iorw' or be 0");
+}
+
+ParseStatus RISCVAsmParser::parseMemOpBaseReg(OperandVector &Operands) {
+ if (parseToken(AsmToken::LParen, "expected '('"))
+ return ParseStatus::Failure;
+ Operands.push_back(RISCVOperand::createToken("(", getLoc()));
- return MatchOperand_Success;
+ if (!parseRegister(Operands).isSuccess())
+ return Error(getLoc(), "expected register");
+
+ if (parseToken(AsmToken::RParen, "expected ')'"))
+ return ParseStatus::Failure;
+ Operands.push_back(RISCVOperand::createToken(")", getLoc()));
+
+ return ParseStatus::Success;
}
-OperandMatchResultTy
-RISCVAsmParser::parseZeroOffsetMemOp(OperandVector &Operands) {
+ParseStatus RISCVAsmParser::parseZeroOffsetMemOp(OperandVector &Operands) {
// Atomic operations such as lr.w, sc.w, and amo*.w accept a "memory operand"
// as one of their register operands, such as `(a0)`. This just denotes that
// the register (in this case `a0`) contains a memory address.
@@ -1885,7 +2349,7 @@ RISCVAsmParser::parseZeroOffsetMemOp(OperandVector &Operands) {
SMLoc ImmStart = getLoc();
if (getParser().parseIntToken(ImmVal,
"expected '(' or optional integer offset"))
- return MatchOperand_ParseFail;
+ return ParseStatus::Failure;
// Create a RISCVOperand for checking later (so the error messages are
// nicer), but we don't add it to Operands.
@@ -1895,32 +2359,136 @@ RISCVAsmParser::parseZeroOffsetMemOp(OperandVector &Operands) {
ImmStart, ImmEnd, isRV64());
}
- if (getLexer().isNot(AsmToken::LParen)) {
- Error(getLoc(), OptionalImmOp ? "expected '(' after optional integer offset"
- : "expected '(' or optional integer offset");
- return MatchOperand_ParseFail;
- }
- getParser().Lex(); // Eat '('
+ if (parseToken(AsmToken::LParen,
+ OptionalImmOp ? "expected '(' after optional integer offset"
+ : "expected '(' or optional integer offset"))
+ return ParseStatus::Failure;
- if (parseRegister(Operands) != MatchOperand_Success) {
- Error(getLoc(), "expected register");
- return MatchOperand_ParseFail;
- }
+ if (!parseRegister(Operands).isSuccess())
+ return Error(getLoc(), "expected register");
- if (getLexer().isNot(AsmToken::RParen)) {
- Error(getLoc(), "expected ')'");
- return MatchOperand_ParseFail;
- }
- getParser().Lex(); // Eat ')'
+ if (parseToken(AsmToken::RParen, "expected ')'"))
+ return ParseStatus::Failure;
// Deferred Handling of non-zero offsets. This makes the error messages nicer.
- if (OptionalImmOp && !OptionalImmOp->isImmZero()) {
- Error(OptionalImmOp->getStartLoc(), "optional integer offset must be 0",
- SMRange(OptionalImmOp->getStartLoc(), OptionalImmOp->getEndLoc()));
- return MatchOperand_ParseFail;
+ if (OptionalImmOp && !OptionalImmOp->isImmZero())
+ return Error(
+ OptionalImmOp->getStartLoc(), "optional integer offset must be 0",
+ SMRange(OptionalImmOp->getStartLoc(), OptionalImmOp->getEndLoc()));
+
+ return ParseStatus::Success;
+}
+
+ParseStatus RISCVAsmParser::parseReglist(OperandVector &Operands) {
+ // Rlist: {ra [, s0[-sN]]}
+ // XRlist: {x1 [, x8[-x9][, x18[-xN]]]}
+ SMLoc S = getLoc();
+
+ if (parseToken(AsmToken::LCurly, "register list must start with '{'"))
+ return ParseStatus::Failure;
+
+ bool IsEABI = isRVE();
+
+ if (getLexer().isNot(AsmToken::Identifier))
+ return Error(getLoc(), "register list must start from 'ra' or 'x1'");
+
+ StringRef RegName = getLexer().getTok().getIdentifier();
+ MCRegister RegStart = matchRegisterNameHelper(IsEABI, RegName);
+ MCRegister RegEnd;
+ if (RegStart != RISCV::X1)
+ return Error(getLoc(), "register list must start from 'ra' or 'x1'");
+ getLexer().Lex();
+
+ // parse case like ,s0
+ if (parseOptionalToken(AsmToken::Comma)) {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return Error(getLoc(), "invalid register");
+ StringRef RegName = getLexer().getTok().getIdentifier();
+ RegStart = matchRegisterNameHelper(IsEABI, RegName);
+ if (!RegStart)
+ return Error(getLoc(), "invalid register");
+ if (RegStart != RISCV::X8)
+ return Error(getLoc(),
+ "continuous register list must start from 's0' or 'x8'");
+ getLexer().Lex(); // eat reg
+ }
+
+ // parse case like -s1
+ if (parseOptionalToken(AsmToken::Minus)) {
+ StringRef EndName = getLexer().getTok().getIdentifier();
+ // FIXME: the register mapping and checks of EABI is wrong
+ RegEnd = matchRegisterNameHelper(IsEABI, EndName);
+ if (!RegEnd)
+ return Error(getLoc(), "invalid register");
+ if (IsEABI && RegEnd != RISCV::X9)
+ return Error(getLoc(), "contiguous register list of EABI can only be "
+ "'s0-s1' or 'x8-x9' pair");
+ getLexer().Lex();
}
- return MatchOperand_Success;
+ if (!IsEABI) {
+ // parse extra part like ', x18[-x20]' for XRegList
+ if (parseOptionalToken(AsmToken::Comma)) {
+ if (RegEnd != RISCV::X9)
+ return Error(
+ getLoc(),
+ "first contiguous registers pair of register list must be 'x8-x9'");
+
+ // parse ', x18' for extra part
+ if (getLexer().isNot(AsmToken::Identifier))
+ return Error(getLoc(), "invalid register");
+ StringRef EndName = getLexer().getTok().getIdentifier();
+ if (MatchRegisterName(EndName) != RISCV::X18)
+ return Error(getLoc(),
+ "second contiguous registers pair of register list "
+ "must start from 'x18'");
+ getLexer().Lex();
+
+ // parse '-x20' for extra part
+ if (parseOptionalToken(AsmToken::Minus)) {
+ if (getLexer().isNot(AsmToken::Identifier))
+ return Error(getLoc(), "invalid register");
+ EndName = getLexer().getTok().getIdentifier();
+ if (MatchRegisterName(EndName) == RISCV::NoRegister)
+ return Error(getLoc(), "invalid register");
+ getLexer().Lex();
+ }
+ RegEnd = MatchRegisterName(EndName);
+ }
+ }
+
+ if (RegEnd == RISCV::X26)
+ return Error(getLoc(), "invalid register list, {ra, s0-s10} or {x1, x8-x9, "
+ "x18-x26} is not supported");
+
+ if (parseToken(AsmToken::RCurly, "register list must end with '}'"))
+ return ParseStatus::Failure;
+
+ if (RegEnd == RISCV::NoRegister)
+ RegEnd = RegStart;
+
+ auto Encode = RISCVZC::encodeRlist(RegEnd, IsEABI);
+ if (Encode == 16)
+ return Error(S, "invalid register list");
+ Operands.push_back(RISCVOperand::createRlist(Encode, S));
+
+ return ParseStatus::Success;
+}
+
+ParseStatus RISCVAsmParser::parseZcmpSpimm(OperandVector &Operands) {
+ (void)parseOptionalToken(AsmToken::Minus);
+
+ SMLoc S = getLoc();
+ int64_t StackAdjustment = getLexer().getTok().getIntVal();
+ unsigned Spimm = 0;
+ unsigned RlistVal = static_cast<RISCVOperand *>(Operands[1].get())->Rlist.Val;
+
+ bool IsEABI = isRVE();
+ if (!RISCVZC::getSpimm(RlistVal, Spimm, StackAdjustment, isRV64(), IsEABI))
+ return ParseStatus::NoMatch;
+ Operands.push_back(RISCVOperand::createSpimm(Spimm << 4, S));
+ getLexer().Lex();
+ return ParseStatus::Success;
}
/// Looks at a token type and creates the relevant operand from this
@@ -1929,22 +2497,22 @@ RISCVAsmParser::parseZeroOffsetMemOp(OperandVector &Operands) {
bool RISCVAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
// Check if the current operand has a custom associated parser, if so, try to
// custom parse the operand, or fallback to the general approach.
- OperandMatchResultTy Result =
+ ParseStatus Result =
MatchOperandParserImpl(Operands, Mnemonic, /*ParseForAllFeatures=*/true);
- if (Result == MatchOperand_Success)
+ if (Result.isSuccess())
return false;
- if (Result == MatchOperand_ParseFail)
+ if (Result.isFailure())
return true;
// Attempt to parse token as a register.
- if (parseRegister(Operands, true) == MatchOperand_Success)
+ if (parseRegister(Operands, true).isSuccess())
return false;
// Attempt to parse token as an immediate
- if (parseImmediate(Operands) == MatchOperand_Success) {
+ if (parseImmediate(Operands).isSuccess()) {
// Parse memory base register if present
if (getLexer().is(AsmToken::LParen))
- return parseMemOpBaseReg(Operands) != MatchOperand_Success;
+ return !parseMemOpBaseReg(Operands).isSuccess();
return false;
}
@@ -1962,7 +2530,7 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
// cause relaxations. Unfortunately instruction processing stage occurs in the
// same pass as relocation emission, so it's too late to set a 'sticky bit'
// for the entire file.
- if (getSTI().getFeatureBits()[RISCV::FeatureRelax]) {
+ if (getSTI().hasFeature(RISCV::FeatureRelax)) {
auto *Assembler = getTargetStreamer().getStreamer().getAssemblerPtr();
if (Assembler != nullptr) {
RISCVAsmBackend &MAB =
@@ -1972,7 +2540,7 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
}
// First operand is token for instruction
- Operands.push_back(RISCVOperand::createToken(Name, NameLoc, isRV64()));
+ Operands.push_back(RISCVOperand::createToken(Name, NameLoc));
// If there are no more operands, then finish
if (getLexer().is(AsmToken::EndOfStatement)) {
@@ -1985,22 +2553,16 @@ bool RISCVAsmParser::ParseInstruction(ParseInstructionInfo &Info,
return true;
// Parse until end of statement, consuming commas between operands
- while (getLexer().is(AsmToken::Comma)) {
- // Consume comma token
- getLexer().Lex();
-
+ while (parseOptionalToken(AsmToken::Comma)) {
// Parse next operand
if (parseOperand(Operands, Name))
return true;
}
- if (getLexer().isNot(AsmToken::EndOfStatement)) {
- SMLoc Loc = getLexer().getLoc();
+ if (getParser().parseEOL("unexpected token")) {
getParser().eatToEndOfStatement();
- return Error(Loc, "unexpected token");
+ return true;
}
-
- getParser().Lex(); // Consume the EndOfStatement.
return false;
}
@@ -2020,11 +2582,17 @@ bool RISCVAsmParser::classifySymbolRef(const MCExpr *Expr,
return false;
}
-bool RISCVAsmParser::ParseDirective(AsmToken DirectiveID) {
- // This returns false if this function recognizes the directive
- // regardless of whether it is successfully handles or reports an
- // error. Otherwise it returns true to give the generic parser a
- // chance at recognizing it.
+bool RISCVAsmParser::isSymbolDiff(const MCExpr *Expr) {
+ MCValue Res;
+ MCFixup Fixup;
+ if (Expr->evaluateAsRelocatable(Res, nullptr, &Fixup)) {
+ return Res.getRefKind() == RISCVMCExpr::VK_RISCV_None && Res.getSymA() &&
+ Res.getSymB();
+ }
+ return false;
+}
+
+ParseStatus RISCVAsmParser::parseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
if (IDVal == ".option")
@@ -2036,124 +2604,232 @@ bool RISCVAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal == ".variant_cc")
return parseDirectiveVariantCC();
- return true;
+ return ParseStatus::NoMatch;
+}
+
+bool RISCVAsmParser::resetToArch(StringRef Arch, SMLoc Loc, std::string &Result,
+ bool FromOptionDirective) {
+ for (auto Feature : RISCVFeatureKV)
+ if (llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key))
+ clearFeatureBits(Feature.Value, Feature.Key);
+
+ auto ParseResult = llvm::RISCVISAInfo::parseArchString(
+ Arch, /*EnableExperimentalExtension=*/true,
+ /*ExperimentalExtensionVersionCheck=*/true);
+ if (!ParseResult) {
+ std::string Buffer;
+ raw_string_ostream OutputErrMsg(Buffer);
+ handleAllErrors(ParseResult.takeError(), [&](llvm::StringError &ErrMsg) {
+ OutputErrMsg << "invalid arch name '" << Arch << "', "
+ << ErrMsg.getMessage();
+ });
+
+ return Error(Loc, OutputErrMsg.str());
+ }
+ auto &ISAInfo = *ParseResult;
+
+ for (auto Feature : RISCVFeatureKV)
+ if (ISAInfo->hasExtension(Feature.Key))
+ setFeatureBits(Feature.Value, Feature.Key);
+
+ if (FromOptionDirective) {
+ if (ISAInfo->getXLen() == 32 && isRV64())
+ return Error(Loc, "bad arch string switching from rv64 to rv32");
+ else if (ISAInfo->getXLen() == 64 && !isRV64())
+ return Error(Loc, "bad arch string switching from rv32 to rv64");
+ }
+
+ if (ISAInfo->getXLen() == 32)
+ clearFeatureBits(RISCV::Feature64Bit, "64bit");
+ else if (ISAInfo->getXLen() == 64)
+ setFeatureBits(RISCV::Feature64Bit, "64bit");
+ else
+ return Error(Loc, "bad arch string " + Arch);
+
+ Result = ISAInfo->toString();
+ return false;
}
bool RISCVAsmParser::parseDirectiveOption() {
MCAsmParser &Parser = getParser();
// Get the option token.
AsmToken Tok = Parser.getTok();
+
// At the moment only identifiers are supported.
- if (Tok.isNot(AsmToken::Identifier))
- return Error(Parser.getTok().getLoc(),
- "unexpected token, expected identifier");
+ if (parseToken(AsmToken::Identifier, "expected identifier"))
+ return true;
StringRef Option = Tok.getIdentifier();
if (Option == "push") {
- getTargetStreamer().emitDirectiveOptionPush();
-
- Parser.Lex();
- if (Parser.getTok().isNot(AsmToken::EndOfStatement))
- return Error(Parser.getTok().getLoc(),
- "unexpected token, expected end of statement");
+ if (Parser.parseEOL())
+ return true;
+ getTargetStreamer().emitDirectiveOptionPush();
pushFeatureBits();
return false;
}
if (Option == "pop") {
SMLoc StartLoc = Parser.getTok().getLoc();
- getTargetStreamer().emitDirectiveOptionPop();
-
- Parser.Lex();
- if (Parser.getTok().isNot(AsmToken::EndOfStatement))
- return Error(Parser.getTok().getLoc(),
- "unexpected token, expected end of statement");
+ if (Parser.parseEOL())
+ return true;
+ getTargetStreamer().emitDirectiveOptionPop();
if (popFeatureBits())
return Error(StartLoc, ".option pop with no .option push");
return false;
}
- if (Option == "rvc") {
- getTargetStreamer().emitDirectiveOptionRVC();
+ if (Option == "arch") {
+ SmallVector<RISCVOptionArchArg> Args;
+ do {
+ if (Parser.parseComma())
+ return true;
+
+ RISCVOptionArchArgType Type;
+ if (parseOptionalToken(AsmToken::Plus))
+ Type = RISCVOptionArchArgType::Plus;
+ else if (parseOptionalToken(AsmToken::Minus))
+ Type = RISCVOptionArchArgType::Minus;
+ else if (!Args.empty())
+ return Error(Parser.getTok().getLoc(),
+ "unexpected token, expected + or -");
+ else
+ Type = RISCVOptionArchArgType::Full;
- Parser.Lex();
- if (Parser.getTok().isNot(AsmToken::EndOfStatement))
- return Error(Parser.getTok().getLoc(),
- "unexpected token, expected end of statement");
+ if (Parser.getTok().isNot(AsmToken::Identifier))
+ return Error(Parser.getTok().getLoc(),
+ "unexpected token, expected identifier");
+
+ StringRef Arch = Parser.getTok().getString();
+ SMLoc Loc = Parser.getTok().getLoc();
+ Parser.Lex();
+
+ if (Type == RISCVOptionArchArgType::Full) {
+ std::string Result;
+ if (resetToArch(Arch, Loc, Result, true))
+ return true;
+
+ Args.emplace_back(Type, Result);
+ break;
+ }
+
+ ArrayRef<SubtargetFeatureKV> KVArray(RISCVFeatureKV);
+ auto Ext = llvm::lower_bound(KVArray, Arch);
+ if (Ext == KVArray.end() || StringRef(Ext->Key) != Arch ||
+ !RISCVISAInfo::isSupportedExtension(Arch)) {
+ if (isDigit(Arch.back()))
+ return Error(
+ Loc,
+ "Extension version number parsing not currently implemented");
+ return Error(Loc, "unknown extension feature");
+ }
+
+ Args.emplace_back(Type, Ext->Key);
+
+ if (Type == RISCVOptionArchArgType::Plus) {
+ FeatureBitset OldFeatureBits = STI->getFeatureBits();
+
+ setFeatureBits(Ext->Value, Ext->Key);
+ auto ParseResult = RISCVFeatures::parseFeatureBits(isRV64(), STI->getFeatureBits());
+ if (!ParseResult) {
+ copySTI().setFeatureBits(OldFeatureBits);
+ setAvailableFeatures(ComputeAvailableFeatures(OldFeatureBits));
+
+ std::string Buffer;
+ raw_string_ostream OutputErrMsg(Buffer);
+ handleAllErrors(ParseResult.takeError(), [&](llvm::StringError &ErrMsg) {
+ OutputErrMsg << ErrMsg.getMessage();
+ });
+
+ return Error(Loc, OutputErrMsg.str());
+ }
+ } else {
+ assert(Type == RISCVOptionArchArgType::Minus);
+ // It is invalid to disable an extension that there are other enabled
+ // extensions depend on it.
+ // TODO: Make use of RISCVISAInfo to handle this
+ for (auto Feature : KVArray) {
+ if (getSTI().hasFeature(Feature.Value) &&
+ Feature.Implies.test(Ext->Value))
+ return Error(Loc,
+ Twine("Can't disable ") + Ext->Key + " extension, " +
+ Feature.Key + " extension requires " + Ext->Key +
+ " extension be enabled");
+ }
+
+ clearFeatureBits(Ext->Value, Ext->Key);
+ }
+ } while (Parser.getTok().isNot(AsmToken::EndOfStatement));
+
+ if (Parser.parseEOL())
+ return true;
+
+ getTargetStreamer().emitDirectiveOptionArch(Args);
+ return false;
+ }
+
+ if (Option == "rvc") {
+ if (Parser.parseEOL())
+ return true;
+ getTargetStreamer().emitDirectiveOptionRVC();
setFeatureBits(RISCV::FeatureStdExtC, "c");
return false;
}
if (Option == "norvc") {
- getTargetStreamer().emitDirectiveOptionNoRVC();
-
- Parser.Lex();
- if (Parser.getTok().isNot(AsmToken::EndOfStatement))
- return Error(Parser.getTok().getLoc(),
- "unexpected token, expected end of statement");
+ if (Parser.parseEOL())
+ return true;
+ getTargetStreamer().emitDirectiveOptionNoRVC();
clearFeatureBits(RISCV::FeatureStdExtC, "c");
- clearFeatureBits(RISCV::FeatureExtZca, "+experimental-zca");
+ clearFeatureBits(RISCV::FeatureStdExtZca, "+zca");
return false;
}
if (Option == "pic") {
- getTargetStreamer().emitDirectiveOptionPIC();
-
- Parser.Lex();
- if (Parser.getTok().isNot(AsmToken::EndOfStatement))
- return Error(Parser.getTok().getLoc(),
- "unexpected token, expected end of statement");
+ if (Parser.parseEOL())
+ return true;
+ getTargetStreamer().emitDirectiveOptionPIC();
ParserOptions.IsPicEnabled = true;
return false;
}
if (Option == "nopic") {
- getTargetStreamer().emitDirectiveOptionNoPIC();
-
- Parser.Lex();
- if (Parser.getTok().isNot(AsmToken::EndOfStatement))
- return Error(Parser.getTok().getLoc(),
- "unexpected token, expected end of statement");
+ if (Parser.parseEOL())
+ return true;
+ getTargetStreamer().emitDirectiveOptionNoPIC();
ParserOptions.IsPicEnabled = false;
return false;
}
if (Option == "relax") {
- getTargetStreamer().emitDirectiveOptionRelax();
-
- Parser.Lex();
- if (Parser.getTok().isNot(AsmToken::EndOfStatement))
- return Error(Parser.getTok().getLoc(),
- "unexpected token, expected end of statement");
+ if (Parser.parseEOL())
+ return true;
+ getTargetStreamer().emitDirectiveOptionRelax();
setFeatureBits(RISCV::FeatureRelax, "relax");
return false;
}
if (Option == "norelax") {
- getTargetStreamer().emitDirectiveOptionNoRelax();
-
- Parser.Lex();
- if (Parser.getTok().isNot(AsmToken::EndOfStatement))
- return Error(Parser.getTok().getLoc(),
- "unexpected token, expected end of statement");
+ if (Parser.parseEOL())
+ return true;
+ getTargetStreamer().emitDirectiveOptionNoRelax();
clearFeatureBits(RISCV::FeatureRelax, "relax");
return false;
}
// Unknown option.
- Warning(Parser.getTok().getLoc(),
- "unknown option, expected 'push', 'pop', 'rvc', 'norvc', 'relax' or "
- "'norelax'");
+ Warning(Parser.getTok().getLoc(), "unknown option, expected 'push', 'pop', "
+ "'rvc', 'norvc', 'arch', 'relax' or "
+ "'norelax'");
Parser.eatToEndOfStatement();
return false;
}
@@ -2170,10 +2846,8 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
StringRef Name = Parser.getTok().getIdentifier();
std::optional<unsigned> Ret =
ELFAttrs::attrTypeFromString(Name, RISCVAttrs::getRISCVAttributeTags());
- if (!Ret) {
- Error(TagLoc, "attribute name not recognised: " + Name);
- return false;
- }
+ if (!Ret)
+ return Error(TagLoc, "attribute name not recognised: " + Name);
Tag = *Ret;
Parser.Lex();
} else {
@@ -2190,7 +2864,7 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
Tag = CE->getValue();
}
- if (Parser.parseToken(AsmToken::Comma, "comma expected"))
+ if (Parser.parseComma())
return true;
StringRef StringValue;
@@ -2228,44 +2902,24 @@ bool RISCVAsmParser::parseDirectiveAttribute() {
else if (Tag != RISCVAttrs::ARCH)
getTargetStreamer().emitTextAttribute(Tag, StringValue);
else {
- StringRef Arch = StringValue;
- for (auto Feature : RISCVFeatureKV)
- if (llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key))
- clearFeatureBits(Feature.Value, Feature.Key);
-
- auto ParseResult = llvm::RISCVISAInfo::parseArchString(
- StringValue, /*EnableExperimentalExtension=*/true,
- /*ExperimentalExtensionVersionCheck=*/true);
- if (!ParseResult) {
- std::string Buffer;
- raw_string_ostream OutputErrMsg(Buffer);
- handleAllErrors(ParseResult.takeError(), [&](llvm::StringError &ErrMsg) {
- OutputErrMsg << "invalid arch name '" << Arch << "', "
- << ErrMsg.getMessage();
- });
-
- return Error(ValueExprLoc, OutputErrMsg.str());
- }
- auto &ISAInfo = *ParseResult;
-
- for (auto Feature : RISCVFeatureKV)
- if (ISAInfo->hasExtension(Feature.Key))
- setFeatureBits(Feature.Value, Feature.Key);
-
- if (ISAInfo->getXLen() == 32)
- clearFeatureBits(RISCV::Feature64Bit, "64bit");
- else if (ISAInfo->getXLen() == 64)
- setFeatureBits(RISCV::Feature64Bit, "64bit");
- else
- return Error(ValueExprLoc, "bad arch string " + Arch);
+ std::string Result;
+ if (resetToArch(StringValue, ValueExprLoc, Result, false))
+ return true;
// Then emit the arch string.
- getTargetStreamer().emitTextAttribute(Tag, ISAInfo->toString());
+ getTargetStreamer().emitTextAttribute(Tag, Result);
}
return false;
}
+bool isValidInsnFormat(StringRef Format, bool AllowC) {
+ return StringSwitch<bool>(Format)
+ .Cases("r", "r4", "i", "b", "sb", "u", "j", "uj", "s", true)
+ .Cases("cr", "ci", "ciw", "css", "cl", "cs", "ca", "cb", "cj", AllowC)
+ .Default(false);
+}
+
/// parseDirectiveInsn
/// ::= .insn [ format encoding, (operands (, operands)*) ]
bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
@@ -2277,9 +2931,9 @@ bool RISCVAsmParser::parseDirectiveInsn(SMLoc L) {
if (Parser.parseIdentifier(Format))
return Error(ErrorLoc, "expected instruction format");
- if (Format != "r" && Format != "r4" && Format != "i" && Format != "b" &&
- Format != "sb" && Format != "u" && Format != "j" && Format != "uj" &&
- Format != "s")
+ bool AllowC = getSTI().hasFeature(RISCV::FeatureStdExtC) ||
+ getSTI().hasFeature(RISCV::FeatureStdExtZca);
+ if (!isValidInsnFormat(Format, AllowC))
return Error(ErrorLoc, "invalid instruction format");
std::string FormatName = (".insn_" + Format).str();
@@ -2304,7 +2958,7 @@ bool RISCVAsmParser::parseDirectiveVariantCC() {
if (getParser().parseIdentifier(Name))
return TokError("expected symbol name");
if (parseEOL())
- return false;
+ return true;
getTargetStreamer().emitDirectiveVariantCC(
*getContext().getOrCreateSymbol(Name));
return false;
@@ -2324,7 +2978,7 @@ void RISCVAsmParser::emitLoadImm(MCRegister DestReg, int64_t Value,
RISCVMatInt::generateInstSeq(Value, getSTI().getFeatureBits());
MCRegister SrcReg = RISCV::X0;
- for (RISCVMatInt::Inst &Inst : Seq) {
+ for (const RISCVMatInt::Inst &Inst : Seq) {
switch (Inst.getOpndKind()) {
case RISCVMatInt::Imm:
emitToStreamer(Out,
@@ -2393,29 +3047,34 @@ void RISCVAsmParser::emitLoadLocalAddress(MCInst &Inst, SMLoc IDLoc,
RISCV::ADDI, IDLoc, Out);
}
+void RISCVAsmParser::emitLoadGlobalAddress(MCInst &Inst, SMLoc IDLoc,
+ MCStreamer &Out) {
+ // The load global address pseudo-instruction "lga" is used in GOT-indirect
+ // addressing of global symbols:
+ // lga rdest, symbol
+ // expands to
+ // TmpLabel: AUIPC rdest, %got_pcrel_hi(symbol)
+ // Lx rdest, %pcrel_lo(TmpLabel)(rdest)
+ MCOperand DestReg = Inst.getOperand(0);
+ const MCExpr *Symbol = Inst.getOperand(1).getExpr();
+ unsigned SecondOpcode = isRV64() ? RISCV::LD : RISCV::LW;
+ emitAuipcInstPair(DestReg, DestReg, Symbol, RISCVMCExpr::VK_RISCV_GOT_HI,
+ SecondOpcode, IDLoc, Out);
+}
+
void RISCVAsmParser::emitLoadAddress(MCInst &Inst, SMLoc IDLoc,
MCStreamer &Out) {
// The load address pseudo-instruction "la" is used in PC-relative and
// GOT-indirect addressing of global symbols:
// la rdest, symbol
- // expands to either (for non-PIC)
- // TmpLabel: AUIPC rdest, %pcrel_hi(symbol)
- // ADDI rdest, rdest, %pcrel_lo(TmpLabel)
+ // is an alias for either (for non-PIC)
+ // lla rdest, symbol
// or (for PIC)
- // TmpLabel: AUIPC rdest, %got_pcrel_hi(symbol)
- // Lx rdest, %pcrel_lo(TmpLabel)(rdest)
- MCOperand DestReg = Inst.getOperand(0);
- const MCExpr *Symbol = Inst.getOperand(1).getExpr();
- unsigned SecondOpcode;
- RISCVMCExpr::VariantKind VKHi;
- if (ParserOptions.IsPicEnabled) {
- SecondOpcode = isRV64() ? RISCV::LD : RISCV::LW;
- VKHi = RISCVMCExpr::VK_RISCV_GOT_HI;
- } else {
- SecondOpcode = RISCV::ADDI;
- VKHi = RISCVMCExpr::VK_RISCV_PCREL_HI;
- }
- emitAuipcInstPair(DestReg, DestReg, Symbol, VKHi, SecondOpcode, IDLoc, Out);
+ // lga rdest, symbol
+ if (ParserOptions.IsPicEnabled)
+ emitLoadGlobalAddress(Inst, IDLoc, Out);
+ else
+ emitLoadLocalAddress(Inst, IDLoc, Out);
}
void RISCVAsmParser::emitLoadTLSIEAddress(MCInst &Inst, SMLoc IDLoc,
@@ -2589,13 +3248,20 @@ bool RISCVAsmParser::checkPseudoAddTPRel(MCInst &Inst,
std::unique_ptr<RISCVOperand> RISCVAsmParser::defaultMaskRegOp() const {
return RISCVOperand::createReg(RISCV::NoRegister, llvm::SMLoc(),
- llvm::SMLoc(), isRV64());
+ llvm::SMLoc());
+}
+
+std::unique_ptr<RISCVOperand> RISCVAsmParser::defaultFRMArgOp() const {
+ return RISCVOperand::createFRMArg(RISCVFPRndMode::RoundingMode::DYN,
+ llvm::SMLoc());
}
bool RISCVAsmParser::validateInstruction(MCInst &Inst,
OperandVector &Operands) {
- if (Inst.getOpcode() == RISCV::PseudoVMSGEU_VX_M_T ||
- Inst.getOpcode() == RISCV::PseudoVMSGE_VX_M_T) {
+ unsigned Opcode = Inst.getOpcode();
+
+ if (Opcode == RISCV::PseudoVMSGEU_VX_M_T ||
+ Opcode == RISCV::PseudoVMSGE_VX_M_T) {
unsigned DestReg = Inst.getOperand(0).getReg();
unsigned TempReg = Inst.getOperand(1).getReg();
if (DestReg == TempReg) {
@@ -2605,30 +3271,104 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst,
}
}
- const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
- RISCVII::VConstraintType Constraints = RISCVII::getConstraint(MCID.TSFlags);
- if (Constraints == RISCVII::NoConstraint)
+ if (Opcode == RISCV::TH_LDD || Opcode == RISCV::TH_LWUD ||
+ Opcode == RISCV::TH_LWD) {
+ unsigned Rd1 = Inst.getOperand(0).getReg();
+ unsigned Rd2 = Inst.getOperand(1).getReg();
+ unsigned Rs1 = Inst.getOperand(2).getReg();
+ // The encoding with rd1 == rd2 == rs1 is reserved for XTHead load pair.
+ if (Rs1 == Rd1 && Rs1 == Rd2) {
+ SMLoc Loc = Operands[1]->getStartLoc();
+ return Error(Loc, "The source register and destination registers "
+ "cannot be equal.");
+ }
+ }
+
+ if (Opcode == RISCV::CM_MVSA01) {
+ unsigned Rd1 = Inst.getOperand(0).getReg();
+ unsigned Rd2 = Inst.getOperand(1).getReg();
+ if (Rd1 == Rd2) {
+ SMLoc Loc = Operands[1]->getStartLoc();
+ return Error(Loc, "'rs1' and 'rs2' must be different.");
+ }
+ }
+
+ bool IsTHeadMemPair32 = (Opcode == RISCV::TH_LWD ||
+ Opcode == RISCV::TH_LWUD || Opcode == RISCV::TH_SWD);
+ bool IsTHeadMemPair64 = (Opcode == RISCV::TH_LDD || Opcode == RISCV::TH_SDD);
+ // The last operand of XTHeadMemPair instructions must be constant 3 or 4
+ // depending on the data width.
+ if (IsTHeadMemPair32 && Inst.getOperand(4).getImm() != 3) {
+ SMLoc Loc = Operands.back()->getStartLoc();
+ return Error(Loc, "Operand must be constant 3.");
+ } else if (IsTHeadMemPair64 && Inst.getOperand(4).getImm() != 4) {
+ SMLoc Loc = Operands.back()->getStartLoc();
+ return Error(Loc, "Operand must be constant 4.");
+ }
+
+ bool IsAMOCAS_D = Opcode == RISCV::AMOCAS_D || Opcode == RISCV::AMOCAS_D_AQ ||
+ Opcode == RISCV::AMOCAS_D_RL ||
+ Opcode == RISCV::AMOCAS_D_AQ_RL;
+ bool IsAMOCAS_Q = Opcode == RISCV::AMOCAS_Q || Opcode == RISCV::AMOCAS_Q_AQ ||
+ Opcode == RISCV::AMOCAS_Q_RL ||
+ Opcode == RISCV::AMOCAS_Q_AQ_RL;
+ if ((!isRV64() && IsAMOCAS_D) || IsAMOCAS_Q) {
+ unsigned Rd = Inst.getOperand(0).getReg();
+ unsigned Rs2 = Inst.getOperand(2).getReg();
+ assert(Rd >= RISCV::X0 && Rd <= RISCV::X31);
+ if ((Rd - RISCV::X0) % 2 != 0) {
+ SMLoc Loc = Operands[1]->getStartLoc();
+ return Error(Loc, "The destination register must be even.");
+ }
+ assert(Rs2 >= RISCV::X0 && Rs2 <= RISCV::X31);
+ if ((Rs2 - RISCV::X0) % 2 != 0) {
+ SMLoc Loc = Operands[2]->getStartLoc();
+ return Error(Loc, "The source register must be even.");
+ }
+ }
+
+ const MCInstrDesc &MCID = MII.get(Opcode);
+ if (!(MCID.TSFlags & RISCVII::ConstraintMask))
return false;
+ if (Opcode == RISCV::VC_V_XVW || Opcode == RISCV::VC_V_IVW ||
+ Opcode == RISCV::VC_V_FVW || Opcode == RISCV::VC_V_VVW) {
+ // Operands Opcode, Dst, uimm, Dst, Rs2, Rs1 for VC_V_XVW.
+ unsigned VCIXDst = Inst.getOperand(0).getReg();
+ SMLoc VCIXDstLoc = Operands[2]->getStartLoc();
+ if (MCID.TSFlags & RISCVII::VS1Constraint) {
+ unsigned VCIXRs1 = Inst.getOperand(Inst.getNumOperands() - 1).getReg();
+ if (VCIXDst == VCIXRs1)
+ return Error(VCIXDstLoc, "The destination vector register group cannot"
+ " overlap the source vector register group.");
+ }
+ if (MCID.TSFlags & RISCVII::VS2Constraint) {
+ unsigned VCIXRs2 = Inst.getOperand(Inst.getNumOperands() - 2).getReg();
+ if (VCIXDst == VCIXRs2)
+ return Error(VCIXDstLoc, "The destination vector register group cannot"
+ " overlap the source vector register group.");
+ }
+ return false;
+ }
+
unsigned DestReg = Inst.getOperand(0).getReg();
// Operands[1] will be the first operand, DestReg.
SMLoc Loc = Operands[1]->getStartLoc();
- if (Constraints & RISCVII::VS2Constraint) {
+ if (MCID.TSFlags & RISCVII::VS2Constraint) {
unsigned CheckReg = Inst.getOperand(1).getReg();
if (DestReg == CheckReg)
return Error(Loc, "The destination vector register group cannot overlap"
" the source vector register group.");
}
- if ((Constraints & RISCVII::VS1Constraint) && (Inst.getOperand(2).isReg())) {
+ if ((MCID.TSFlags & RISCVII::VS1Constraint) && (Inst.getOperand(2).isReg())) {
unsigned CheckReg = Inst.getOperand(2).getReg();
if (DestReg == CheckReg)
return Error(Loc, "The destination vector register group cannot overlap"
" the source vector register group.");
}
- if ((Constraints & RISCVII::VMConstraint) && (DestReg == RISCV::V0)) {
+ if ((MCID.TSFlags & RISCVII::VMConstraint) && (DestReg == RISCV::V0)) {
// vadc, vsbc are special cases. These instructions have no mask register.
// The destination register could not be V0.
- unsigned Opcode = Inst.getOpcode();
if (Opcode == RISCV::VADC_VVM || Opcode == RISCV::VADC_VXM ||
Opcode == RISCV::VADC_VIM || Opcode == RISCV::VSBC_VVM ||
Opcode == RISCV::VSBC_VXM || Opcode == RISCV::VFMERGE_VFM ||
@@ -2659,6 +3399,8 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
switch (Inst.getOpcode()) {
default:
break;
+ case RISCV::PseudoLLAImm:
+ case RISCV::PseudoLAImm:
case RISCV::PseudoLI: {
MCRegister Reg = Inst.getOperand(0).getReg();
const MCOperand &Op1 = Inst.getOperand(1);
@@ -2683,6 +3425,9 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
case RISCV::PseudoLLA:
emitLoadLocalAddress(Inst, IDLoc, Out);
return false;
+ case RISCV::PseudoLGA:
+ emitLoadGlobalAddress(Inst, IDLoc, Out);
+ return false;
case RISCV::PseudoLA:
emitLoadAddress(Inst, IDLoc, Out);
return false;
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index 42cdd755b5b4..e6ea6baa72ff 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVDisassembler.cpp - Disassembler for RISCV --------------------===//
+//===-- RISCVDisassembler.cpp - Disassembler for RISC-V -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -41,6 +41,9 @@ public:
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const override;
+
+private:
+ void addSPOperands(MCInst &MI) const;
};
} // end anonymous namespace
@@ -58,14 +61,12 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVDisassembler() {
createRISCVDisassembler);
}
-static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
- const FeatureBitset &FeatureBits =
- Decoder->getSubtargetInfo().getFeatureBits();
- bool IsRV32E = FeatureBits[RISCV::FeatureRV32E];
+ bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureRVE);
- if (RegNo >= 32 || (IsRV32E && RegNo >= 16))
+ if (RegNo >= 32 || (IsRVE && RegNo >= 16))
return MCDisassembler::Fail;
MCRegister Reg = RISCV::X0 + RegNo;
@@ -73,7 +74,7 @@ static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, uint64_t RegNo,
+static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo >= 32)
@@ -84,7 +85,7 @@ static DecodeStatus DecodeFPR16RegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo,
+static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo >= 32)
@@ -95,7 +96,7 @@ static DecodeStatus DecodeFPR32RegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeFPR32CRegisterClass(MCInst &Inst, uint64_t RegNo,
+static DecodeStatus DecodeFPR32CRegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo >= 8) {
@@ -106,7 +107,7 @@ static DecodeStatus DecodeFPR32CRegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
+static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo >= 32)
@@ -117,7 +118,7 @@ static DecodeStatus DecodeFPR64RegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint64_t RegNo,
+static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo >= 8) {
@@ -128,7 +129,7 @@ static DecodeStatus DecodeFPR64CRegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint64_t RegNo,
+static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo == 0) {
@@ -139,7 +140,7 @@ static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint64_t RegNo,
}
static DecodeStatus
-DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address,
+DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, uint32_t Address,
const MCDisassembler *Decoder) {
if (RegNo == 2) {
return MCDisassembler::Fail;
@@ -148,7 +149,7 @@ DecodeGPRNoX0X2RegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address,
return DecodeGPRNoX0RegisterClass(Inst, RegNo, Address, Decoder);
}
-static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo >= 8)
@@ -159,7 +160,7 @@ static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeGPRPF64RegisterClass(MCInst &Inst, uint64_t RegNo,
+static DecodeStatus DecodeGPRPF64RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo >= 32 || RegNo & 1)
@@ -170,7 +171,18 @@ static DecodeStatus DecodeGPRPF64RegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint64_t RegNo,
+static DecodeStatus DecodeSR07RegisterClass(MCInst &Inst, uint64_t RegNo,
+ uint64_t Address,
+ const void *Decoder) {
+ if (RegNo >= 8)
+ return MCDisassembler::Fail;
+
+ MCRegister Reg = (RegNo < 2) ? (RegNo + RISCV::X8) : (RegNo - 2 + RISCV::X18);
+ Inst.addOperand(MCOperand::createReg(Reg));
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo >= 32)
@@ -181,7 +193,7 @@ static DecodeStatus DecodeVRRegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint64_t RegNo,
+static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo >= 32)
@@ -201,7 +213,7 @@ static DecodeStatus DecodeVRM2RegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint64_t RegNo,
+static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo >= 32)
@@ -221,7 +233,7 @@ static DecodeStatus DecodeVRM4RegisterClass(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint64_t RegNo,
+static DecodeStatus DecodeVRM8RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo >= 32)
@@ -258,37 +270,17 @@ static DecodeStatus decodeVMaskReg(MCInst &Inst, uint64_t RegNo,
return MCDisassembler::Success;
}
-// Add implied SP operand for instructions *SP compressed instructions. The SP
-// operand isn't explicitly encoded in the instruction.
-static void addImplySP(MCInst &Inst, int64_t Address,
- const MCDisassembler *Decoder) {
- if (Inst.getOpcode() == RISCV::C_LWSP || Inst.getOpcode() == RISCV::C_SWSP ||
- Inst.getOpcode() == RISCV::C_LDSP || Inst.getOpcode() == RISCV::C_SDSP ||
- Inst.getOpcode() == RISCV::C_FLWSP ||
- Inst.getOpcode() == RISCV::C_FSWSP ||
- Inst.getOpcode() == RISCV::C_FLDSP ||
- Inst.getOpcode() == RISCV::C_FSDSP ||
- Inst.getOpcode() == RISCV::C_ADDI4SPN) {
- DecodeGPRRegisterClass(Inst, 2, Address, Decoder);
- }
- if (Inst.getOpcode() == RISCV::C_ADDI16SP) {
- DecodeGPRRegisterClass(Inst, 2, Address, Decoder);
- DecodeGPRRegisterClass(Inst, 2, Address, Decoder);
- }
-}
-
template <unsigned N>
-static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm,
+static DecodeStatus decodeUImmOperand(MCInst &Inst, uint32_t Imm,
int64_t Address,
const MCDisassembler *Decoder) {
assert(isUInt<N>(Imm) && "Invalid immediate");
- addImplySP(Inst, Address, Decoder);
Inst.addOperand(MCOperand::createImm(Imm));
return MCDisassembler::Success;
}
template <unsigned N>
-static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
+static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint32_t Imm,
int64_t Address,
const MCDisassembler *Decoder) {
if (Imm == 0)
@@ -297,18 +289,17 @@ static DecodeStatus decodeUImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
}
template <unsigned N>
-static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm,
+static DecodeStatus decodeSImmOperand(MCInst &Inst, uint32_t Imm,
int64_t Address,
const MCDisassembler *Decoder) {
assert(isUInt<N>(Imm) && "Invalid immediate");
- addImplySP(Inst, Address, Decoder);
// Sign-extend the number in the bottom N bits of Imm
Inst.addOperand(MCOperand::createImm(SignExtend64<N>(Imm)));
return MCDisassembler::Success;
}
template <unsigned N>
-static DecodeStatus decodeSImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
+static DecodeStatus decodeSImmNonZeroOperand(MCInst &Inst, uint32_t Imm,
int64_t Address,
const MCDisassembler *Decoder) {
if (Imm == 0)
@@ -317,7 +308,7 @@ static DecodeStatus decodeSImmNonZeroOperand(MCInst &Inst, uint64_t Imm,
}
template <unsigned N>
-static DecodeStatus decodeSImmOperandAndLsl1(MCInst &Inst, uint64_t Imm,
+static DecodeStatus decodeSImmOperandAndLsl1(MCInst &Inst, uint32_t Imm,
int64_t Address,
const MCDisassembler *Decoder) {
assert(isUInt<N>(Imm) && "Invalid immediate");
@@ -328,7 +319,7 @@ static DecodeStatus decodeSImmOperandAndLsl1(MCInst &Inst, uint64_t Imm,
return MCDisassembler::Success;
}
-static DecodeStatus decodeCLUIImmOperand(MCInst &Inst, uint64_t Imm,
+static DecodeStatus decodeCLUIImmOperand(MCInst &Inst, uint32_t Imm,
int64_t Address,
const MCDisassembler *Decoder) {
assert(isUInt<6>(Imm) && "Invalid immediate");
@@ -339,7 +330,7 @@ static DecodeStatus decodeCLUIImmOperand(MCInst &Inst, uint64_t Imm,
return MCDisassembler::Success;
}
-static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm, int64_t Address,
+static DecodeStatus decodeFRMArg(MCInst &Inst, uint32_t Imm, int64_t Address,
const MCDisassembler *Decoder) {
assert(isUInt<3>(Imm) && "Invalid immediate");
if (!llvm::RISCVFPRndMode::isValidRoundingMode(Imm))
@@ -349,44 +340,55 @@ static DecodeStatus decodeFRMArg(MCInst &Inst, uint64_t Imm, int64_t Address,
return MCDisassembler::Success;
}
-static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn,
- uint64_t Address,
- const MCDisassembler *Decoder);
+static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
-static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn,
+static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder);
-static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn,
+static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder);
-static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn,
+static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder);
-static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, unsigned Insn,
+static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder);
+static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder);
+
+static DecodeStatus decodeZcmpRlist(MCInst &Inst, unsigned Imm,
+ uint64_t Address, const void *Decoder);
+
+static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm,
+ uint64_t Address, const void *Decoder);
+
#include "RISCVGenDisassemblerTables.inc"
-static DecodeStatus decodeRVCInstrSImm(MCInst &Inst, unsigned Insn,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- uint64_t SImm6 =
- fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
- DecodeStatus Result = decodeSImmOperand<6>(Inst, SImm6, Address, Decoder);
+static DecodeStatus decodeRVCInstrRdRs1ImmZero(MCInst &Inst, uint32_t Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ uint32_t Rd = fieldFromInstruction(Insn, 7, 5);
+ DecodeStatus Result = DecodeGPRNoX0RegisterClass(Inst, Rd, Address, Decoder);
(void)Result;
- assert(Result == MCDisassembler::Success && "Invalid immediate");
+ assert(Result == MCDisassembler::Success && "Invalid register");
+ Inst.addOperand(Inst.getOperand(0));
+ Inst.addOperand(MCOperand::createImm(0));
return MCDisassembler::Success;
}
-static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn,
+static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
- DecodeGPRRegisterClass(Inst, 0, Address, Decoder);
- uint64_t SImm6 =
+ Inst.addOperand(MCOperand::createReg(RISCV::X0));
+ uint32_t SImm6 =
fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
DecodeStatus Result = decodeSImmOperand<6>(Inst, SImm6, Address, Decoder);
(void)Result;
@@ -394,12 +396,12 @@ static DecodeStatus decodeRVCInstrRdSImm(MCInst &Inst, unsigned Insn,
return MCDisassembler::Success;
}
-static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn,
+static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
- DecodeGPRRegisterClass(Inst, 0, Address, Decoder);
+ Inst.addOperand(MCOperand::createReg(RISCV::X0));
Inst.addOperand(Inst.getOperand(0));
- uint64_t UImm6 =
+ uint32_t UImm6 =
fieldFromInstruction(Insn, 12, 1) << 5 | fieldFromInstruction(Insn, 2, 5);
DecodeStatus Result = decodeUImmOperand<6>(Inst, UImm6, Address, Decoder);
(void)Result;
@@ -407,27 +409,78 @@ static DecodeStatus decodeRVCInstrRdRs1UImm(MCInst &Inst, unsigned Insn,
return MCDisassembler::Success;
}
-static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, unsigned Insn,
+static DecodeStatus decodeRVCInstrRdRs2(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
- unsigned Rd = fieldFromInstruction(Insn, 7, 5);
- unsigned Rs2 = fieldFromInstruction(Insn, 2, 5);
+ uint32_t Rd = fieldFromInstruction(Insn, 7, 5);
+ uint32_t Rs2 = fieldFromInstruction(Insn, 2, 5);
DecodeGPRRegisterClass(Inst, Rd, Address, Decoder);
DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder);
return MCDisassembler::Success;
}
-static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, unsigned Insn,
+static DecodeStatus decodeRVCInstrRdRs1Rs2(MCInst &Inst, uint32_t Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
- unsigned Rd = fieldFromInstruction(Insn, 7, 5);
- unsigned Rs2 = fieldFromInstruction(Insn, 2, 5);
+ uint32_t Rd = fieldFromInstruction(Insn, 7, 5);
+ uint32_t Rs2 = fieldFromInstruction(Insn, 2, 5);
DecodeGPRRegisterClass(Inst, Rd, Address, Decoder);
Inst.addOperand(Inst.getOperand(0));
DecodeGPRRegisterClass(Inst, Rs2, Address, Decoder);
return MCDisassembler::Success;
}
+static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ uint32_t Rd1 = fieldFromInstruction(Insn, 7, 5);
+ uint32_t Rs1 = fieldFromInstruction(Insn, 15, 5);
+ uint32_t Rd2 = fieldFromInstruction(Insn, 20, 5);
+ uint32_t UImm2 = fieldFromInstruction(Insn, 25, 2);
+ DecodeGPRRegisterClass(Inst, Rd1, Address, Decoder);
+ DecodeGPRRegisterClass(Inst, Rd2, Address, Decoder);
+ DecodeGPRRegisterClass(Inst, Rs1, Address, Decoder);
+ DecodeStatus Result = decodeUImmOperand<2>(Inst, UImm2, Address, Decoder);
+ (void)Result;
+ assert(Result == MCDisassembler::Success && "Invalid immediate");
+
+ // Disassemble the final operand which is implicit.
+ unsigned Opcode = Inst.getOpcode();
+ bool IsWordOp = (Opcode == RISCV::TH_LWD || Opcode == RISCV::TH_LWUD ||
+ Opcode == RISCV::TH_SWD);
+ if (IsWordOp)
+ Inst.addOperand(MCOperand::createImm(3));
+ else
+ Inst.addOperand(MCOperand::createImm(4));
+
+ return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeZcmpRlist(MCInst &Inst, unsigned Imm,
+ uint64_t Address, const void *Decoder) {
+ if (Imm <= 3)
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(Imm));
+ return MCDisassembler::Success;
+}
+
+// spimm is based on rlist now.
+static DecodeStatus decodeZcmpSpimm(MCInst &Inst, unsigned Imm,
+ uint64_t Address, const void *Decoder) {
+ // TODO: check if spimm matches rlist
+ Inst.addOperand(MCOperand::createImm(Imm));
+ return MCDisassembler::Success;
+}
+
+// Add implied SP operand for C.*SP compressed instructions. The SP operand
+// isn't explicitly encoded in the instruction.
+void RISCVDisassembler::addSPOperands(MCInst &MI) const {
+ const MCInstrDesc &MCID = MCII->get(MI.getOpcode());
+ for (unsigned i = 0; i < MCID.getNumOperands(); i++)
+ if (MCID.operands()[i].RegClass == RISCV::SPRegClassID)
+ MI.insert(MI.begin() + i, MCOperand::createReg(RISCV::X2));
+}
+
DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
@@ -437,80 +490,103 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
uint32_t Insn;
DecodeStatus Result;
+#define TRY_TO_DECODE_WITH_ADDITIONAL_OPERATION(FEATURE_CHECKS, DECODER_TABLE, \
+ DESC, ADDITIONAL_OPERATION) \
+ do { \
+ if (FEATURE_CHECKS) { \
+ LLVM_DEBUG(dbgs() << "Trying " DESC ":\n"); \
+ Result = decodeInstruction(DECODER_TABLE, MI, Insn, Address, this, STI); \
+ if (Result != MCDisassembler::Fail) { \
+ ADDITIONAL_OPERATION; \
+ return Result; \
+ } \
+ } \
+ } while (false)
+#define TRY_TO_DECODE_AND_ADD_SP(FEATURE_CHECKS, DECODER_TABLE, DESC) \
+ TRY_TO_DECODE_WITH_ADDITIONAL_OPERATION(FEATURE_CHECKS, DECODER_TABLE, DESC, \
+ addSPOperands(MI))
+#define TRY_TO_DECODE(FEATURE_CHECKS, DECODER_TABLE, DESC) \
+ TRY_TO_DECODE_WITH_ADDITIONAL_OPERATION(FEATURE_CHECKS, DECODER_TABLE, DESC, \
+ (void)nullptr)
+#define TRY_TO_DECODE_FEATURE(FEATURE, DECODER_TABLE, DESC) \
+ TRY_TO_DECODE(STI.hasFeature(FEATURE), DECODER_TABLE, DESC)
+
// It's a 32 bit instruction if bit 0 and 1 are 1.
if ((Bytes[0] & 0x3) == 0x3) {
if (Bytes.size() < 4) {
Size = 0;
return MCDisassembler::Fail;
}
+ Size = 4;
+
Insn = support::endian::read32le(Bytes.data());
- if (STI.getFeatureBits()[RISCV::FeatureStdExtZdinx] &&
- !STI.getFeatureBits()[RISCV::Feature64Bit]) {
- LLVM_DEBUG(dbgs() << "Trying RV32Zdinx table (Double in Integer and"
- "rv32)\n");
- Result = decodeInstruction(DecoderTableRV32Zdinx32, MI, Insn, Address,
- this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
- }
- }
- if (STI.getFeatureBits()[RISCV::FeatureStdExtZfinx]) {
- LLVM_DEBUG(dbgs() << "Trying RVZfinx table (Float in Integer):\n");
- Result = decodeInstruction(DecoderTableRVZfinx32, MI, Insn, Address, this,
- STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
- }
- }
- if (STI.getFeatureBits()[RISCV::FeatureVendorXVentanaCondOps]) {
- LLVM_DEBUG(dbgs() << "Trying Ventana custom opcode table:\n");
- Result = decodeInstruction(DecoderTableVentana32, MI, Insn, Address, this,
- STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
- }
- }
- if (STI.getFeatureBits()[RISCV::FeatureVendorXTHeadVdot]) {
- LLVM_DEBUG(dbgs() << "Trying T-Head custom opcode table:\n");
- Result =
- decodeInstruction(DecoderTableTHeadV32, MI, Insn, Address, this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 4;
- return Result;
- }
- }
+ TRY_TO_DECODE(STI.hasFeature(RISCV::FeatureStdExtZdinx) &&
+ !STI.hasFeature(RISCV::Feature64Bit),
+ DecoderTableRV32Zdinx32,
+ "RV32Zdinx table (Double in Integer and rv32)");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZfinx, DecoderTableRVZfinx32,
+ "RVZfinx table (Float in Integer)");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXVentanaCondOps,
+ DecoderTableVentana32, "Ventana custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBa, DecoderTableTHeadBa32,
+ "XTHeadBa custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBb, DecoderTableTHeadBb32,
+ "XTHeadBb custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadBs, DecoderTableTHeadBs32,
+ "XTHeadBs custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadCondMov,
+ DecoderTableTHeadCondMov32,
+ "XTHeadCondMov custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadCmo, DecoderTableTHeadCmo32,
+ "XTHeadCmo custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadFMemIdx,
+ DecoderTableTHeadFMemIdx32,
+ "XTHeadFMemIdx custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMac, DecoderTableTHeadMac32,
+ "XTHeadMac custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMemIdx,
+ DecoderTableTHeadMemIdx32,
+ "XTHeadMemIdx custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadMemPair,
+ DecoderTableTHeadMemPair32,
+ "XTHeadMemPair custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadSync,
+ DecoderTableTHeadSync32,
+ "XTHeadSync custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXTHeadVdot, DecoderTableTHeadV32,
+ "XTHeadVdot custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfvcp, DecoderTableXSfvcp32,
+ "SiFive VCIX custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfcie, DecoderTableXSfcie32,
+ "Sifive CIE custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVbitmanip,
+ DecoderTableXCVbitmanip32,
+ "CORE-V Bit Manipulation custom opcode table");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVmac, DecoderTableXCVmac32,
+ "CORE-V MAC custom opcode table");
+ TRY_TO_DECODE(true, DecoderTable32, "RISCV32 table");
- LLVM_DEBUG(dbgs() << "Trying RISCV32 table :\n");
- Result = decodeInstruction(DecoderTable32, MI, Insn, Address, this, STI);
- Size = 4;
- } else {
- if (Bytes.size() < 2) {
- Size = 0;
- return MCDisassembler::Fail;
- }
- Insn = support::endian::read16le(Bytes.data());
-
- if (!STI.getFeatureBits()[RISCV::Feature64Bit]) {
- LLVM_DEBUG(
- dbgs() << "Trying RISCV32Only_16 table (16-bit Instruction):\n");
- // Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTableRISCV32Only_16, MI, Insn, Address,
- this, STI);
- if (Result != MCDisassembler::Fail) {
- Size = 2;
- return Result;
- }
- }
-
- LLVM_DEBUG(dbgs() << "Trying RISCV_C table (16-bit Instruction):\n");
- // Calling the auto-generated decoder function.
- Result = decodeInstruction(DecoderTable16, MI, Insn, Address, this, STI);
- Size = 2;
+ return MCDisassembler::Fail;
}
- return Result;
+ if (Bytes.size() < 2) {
+ Size = 0;
+ return MCDisassembler::Fail;
+ }
+ Size = 2;
+
+ Insn = support::endian::read16le(Bytes.data());
+ TRY_TO_DECODE_AND_ADD_SP(!STI.hasFeature(RISCV::Feature64Bit),
+ DecoderTableRISCV32Only_16,
+ "RISCV32Only_16 table (16-bit Instruction)");
+ TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZcmt, DecoderTableRVZcmt16,
+ "Zcmt table (16-bit Table Jump Instructions)");
+ TRY_TO_DECODE_FEATURE(
+ RISCV::FeatureStdExtZcmp, DecoderTableRVZcmp16,
+ "Zcmp table (16-bit Push/Pop & Double Move Instructions)");
+ TRY_TO_DECODE_AND_ADD_SP(true, DecoderTable16,
+ "RISCV_C table (16-bit Instruction)");
+
+ return MCDisassembler::Fail;
}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index d265f3a12b7f..5505f89a32f2 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -14,22 +14,184 @@
#include "RISCVCallLowering.h"
#include "RISCVISelLowering.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
using namespace llvm;
+namespace {
+
+struct RISCVOutgoingValueAssigner : public CallLowering::OutgoingValueAssigner {
+private:
+ // The function used internally to assign args - we ignore the AssignFn stored
+ // by OutgoingValueAssigner since RISC-V implements its CC using a custom
+ // function with a different signature.
+ RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn;
+
+ // Whether this is assigning args for a return.
+ bool IsRet;
+
+public:
+ RISCVOutgoingValueAssigner(
+ RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet)
+ : CallLowering::OutgoingValueAssigner(nullptr),
+ RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {}
+
+ bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
+ CCState &State) override {
+ MachineFunction &MF = State.getMachineFunction();
+ const DataLayout &DL = MF.getDataLayout();
+ const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+
+ return RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
+ LocInfo, Flags, State, /*IsFixed=*/true, IsRet,
+ Info.Ty, *Subtarget.getTargetLowering(),
+ /*FirstMaskArgument=*/std::nullopt);
+ }
+};
+
+struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
+ RISCVOutgoingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ MachineInstrBuilder MIB)
+ : OutgoingValueHandler(B, MRI), MIB(MIB) {}
+
+ MachineInstrBuilder MIB;
+
+ Register getStackAddress(uint64_t MemSize, int64_t Offset,
+ MachinePointerInfo &MPO,
+ ISD::ArgFlagsTy Flags) override {
+ llvm_unreachable("not implemented");
+ }
+
+ void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
+ MachinePointerInfo &MPO, CCValAssign &VA) override {
+ llvm_unreachable("not implemented");
+ }
+
+ void assignValueToReg(Register ValVReg, Register PhysReg,
+ CCValAssign VA) override {
+ Register ExtReg = extendRegister(ValVReg, VA);
+ MIRBuilder.buildCopy(PhysReg, ExtReg);
+ MIB.addUse(PhysReg, RegState::Implicit);
+ }
+};
+
+struct RISCVIncomingValueAssigner : public CallLowering::IncomingValueAssigner {
+private:
+ // The function used internally to assign args - we ignore the AssignFn stored
+ // by IncomingValueAssigner since RISC-V implements its CC using a custom
+ // function with a different signature.
+ RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn;
+
+ // Whether this is assigning args from a return.
+ bool IsRet;
+
+public:
+ RISCVIncomingValueAssigner(
+ RISCVTargetLowering::RISCVCCAssignFn *RISCVAssignFn_, bool IsRet)
+ : CallLowering::IncomingValueAssigner(nullptr),
+ RISCVAssignFn(RISCVAssignFn_), IsRet(IsRet) {}
+
+ bool assignArg(unsigned ValNo, EVT OrigVT, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
+ CCState &State) override {
+ MachineFunction &MF = State.getMachineFunction();
+ const DataLayout &DL = MF.getDataLayout();
+ const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+
+ return RISCVAssignFn(DL, Subtarget.getTargetABI(), ValNo, ValVT, LocVT,
+ LocInfo, Flags, State, /*IsFixed=*/true, IsRet,
+ Info.Ty, *Subtarget.getTargetLowering(),
+ /*FirstMaskArgument=*/std::nullopt);
+ }
+};
+
+struct RISCVIncomingValueHandler : public CallLowering::IncomingValueHandler {
+ RISCVIncomingValueHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI)
+ : IncomingValueHandler(B, MRI) {}
+
+ Register getStackAddress(uint64_t MemSize, int64_t Offset,
+ MachinePointerInfo &MPO,
+ ISD::ArgFlagsTy Flags) override {
+ llvm_unreachable("not implemented");
+ }
+
+ void assignValueToAddress(Register ValVReg, Register Addr, LLT MemTy,
+ MachinePointerInfo &MPO, CCValAssign &VA) override {
+ llvm_unreachable("not implemented");
+ }
+
+ void assignValueToReg(Register ValVReg, Register PhysReg,
+ CCValAssign VA) override {
+ // Copy argument received in physical register to desired VReg.
+ MIRBuilder.getMBB().addLiveIn(PhysReg);
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ }
+};
+
+struct RISCVCallReturnHandler : public RISCVIncomingValueHandler {
+ RISCVCallReturnHandler(MachineIRBuilder &B, MachineRegisterInfo &MRI,
+ MachineInstrBuilder &MIB)
+ : RISCVIncomingValueHandler(B, MRI), MIB(MIB) {}
+
+ MachineInstrBuilder MIB;
+
+ void assignValueToReg(Register ValVReg, Register PhysReg,
+ CCValAssign VA) override {
+ // Copy argument received in physical register to desired VReg.
+ MIB.addDef(PhysReg, RegState::Implicit);
+ MIRBuilder.buildCopy(ValVReg, PhysReg);
+ }
+};
+
+} // namespace
+
RISCVCallLowering::RISCVCallLowering(const RISCVTargetLowering &TLI)
: CallLowering(&TLI) {}
+bool RISCVCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
+ const Value *Val,
+ ArrayRef<Register> VRegs,
+ MachineInstrBuilder &Ret) const {
+ if (!Val)
+ return true;
+
+ // TODO: Only integer, pointer and aggregate types are supported now.
+ if (!Val->getType()->isIntOrPtrTy() && !Val->getType()->isAggregateType())
+ return false;
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ const DataLayout &DL = MF.getDataLayout();
+ const Function &F = MF.getFunction();
+ CallingConv::ID CC = F.getCallingConv();
+
+ ArgInfo OrigRetInfo(VRegs, Val->getType(), 0);
+ setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F);
+
+ SmallVector<ArgInfo, 4> SplitRetInfos;
+ splitToValueTypes(OrigRetInfo, SplitRetInfos, DL, CC);
+
+ RISCVOutgoingValueAssigner Assigner(
+ CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
+ /*IsRet=*/true);
+ RISCVOutgoingValueHandler Handler(MIRBuilder, MF.getRegInfo(), Ret);
+ return determineAndHandleAssignments(Handler, Assigner, SplitRetInfos,
+ MIRBuilder, CC, F.isVarArg());
+}
+
bool RISCVCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
const Value *Val, ArrayRef<Register> VRegs,
FunctionLoweringInfo &FLI) const {
-
+ assert(!Val == VRegs.empty() && "Return value without a vreg");
MachineInstrBuilder Ret = MIRBuilder.buildInstrNoInsert(RISCV::PseudoRET);
- if (Val != nullptr) {
+ if (!lowerReturnVal(MIRBuilder, Val, VRegs, Ret))
return false;
- }
+
MIRBuilder.insertInstr(Ret);
return true;
}
@@ -38,14 +200,130 @@ bool RISCVCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
const Function &F,
ArrayRef<ArrayRef<Register>> VRegs,
FunctionLoweringInfo &FLI) const {
-
+ // Early exit if there are no arguments.
if (F.arg_empty())
return true;
- return false;
+ // TODO: Support vararg functions.
+ if (F.isVarArg())
+ return false;
+
+ // TODO: Support all argument types.
+ for (auto &Arg : F.args()) {
+ if (Arg.getType()->isIntegerTy())
+ continue;
+ if (Arg.getType()->isPointerTy())
+ continue;
+ return false;
+ }
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ const DataLayout &DL = MF.getDataLayout();
+ CallingConv::ID CC = F.getCallingConv();
+
+ SmallVector<ArgInfo, 32> SplitArgInfos;
+ unsigned Index = 0;
+ for (auto &Arg : F.args()) {
+ // Construct the ArgInfo object from destination register and argument type.
+ ArgInfo AInfo(VRegs[Index], Arg.getType(), Index);
+ setArgFlags(AInfo, Index + AttributeList::FirstArgIndex, DL, F);
+
+ // Handle any required merging from split value types from physical
+ // registers into the desired VReg. ArgInfo objects are constructed
+ // correspondingly and appended to SplitArgInfos.
+ splitToValueTypes(AInfo, SplitArgInfos, DL, CC);
+
+ ++Index;
+ }
+
+ RISCVIncomingValueAssigner Assigner(
+ CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
+ /*IsRet=*/false);
+ RISCVIncomingValueHandler Handler(MIRBuilder, MF.getRegInfo());
+
+ return determineAndHandleAssignments(Handler, Assigner, SplitArgInfos,
+ MIRBuilder, CC, F.isVarArg());
}
bool RISCVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const {
- return false;
+ MachineFunction &MF = MIRBuilder.getMF();
+ const DataLayout &DL = MF.getDataLayout();
+ const Function &F = MF.getFunction();
+ CallingConv::ID CC = F.getCallingConv();
+
+ // TODO: Support vararg functions.
+ if (Info.IsVarArg)
+ return false;
+
+ // TODO: Support all argument types.
+ for (auto &AInfo : Info.OrigArgs) {
+ if (AInfo.Ty->isIntegerTy())
+ continue;
+ if (AInfo.Ty->isPointerTy())
+ continue;
+ if (AInfo.Ty->isFloatingPointTy())
+ continue;
+ return false;
+ }
+
+ SmallVector<ArgInfo, 32> SplitArgInfos;
+ SmallVector<ISD::OutputArg, 8> Outs;
+ for (auto &AInfo : Info.OrigArgs) {
+ // Handle any required unmerging of split value types from a given VReg into
+ // physical registers. ArgInfo objects are constructed correspondingly and
+ // appended to SplitArgInfos.
+ splitToValueTypes(AInfo, SplitArgInfos, DL, CC);
+ }
+
+ // TODO: Support tail calls.
+ Info.IsTailCall = false;
+
+ if (!Info.Callee.isReg())
+ Info.Callee.setTargetFlags(RISCVII::MO_CALL);
+
+ MachineInstrBuilder Call =
+ MIRBuilder
+ .buildInstrNoInsert(Info.Callee.isReg() ? RISCV::PseudoCALLIndirect
+ : RISCV::PseudoCALL)
+ .add(Info.Callee);
+
+ RISCVOutgoingValueAssigner ArgAssigner(
+ CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
+ /*IsRet=*/false);
+ RISCVOutgoingValueHandler ArgHandler(MIRBuilder, MF.getRegInfo(), Call);
+ if (!determineAndHandleAssignments(ArgHandler, ArgAssigner, SplitArgInfos,
+ MIRBuilder, CC, Info.IsVarArg))
+ return false;
+
+ MIRBuilder.insertInstr(Call);
+
+ if (Info.OrigRet.Ty->isVoidTy())
+ return true;
+
+ // TODO: Only integer, pointer and aggregate types are supported now.
+ if (!Info.OrigRet.Ty->isIntOrPtrTy() && !Info.OrigRet.Ty->isAggregateType())
+ return false;
+
+ SmallVector<ArgInfo, 4> SplitRetInfos;
+ splitToValueTypes(Info.OrigRet, SplitRetInfos, DL, CC);
+
+ // Assignments should be handled *before* the merging of values takes place.
+ // To ensure this, the insert point is temporarily adjusted to just after the
+ // call instruction.
+ MachineBasicBlock::iterator CallInsertPt = Call;
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), std::next(CallInsertPt));
+
+ RISCVIncomingValueAssigner RetAssigner(
+ CC == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV,
+ /*IsRet=*/true);
+ RISCVCallReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Call);
+ if (!determineAndHandleAssignments(RetHandler, RetAssigner, SplitRetInfos,
+ MIRBuilder, CC, Info.IsVarArg))
+ return false;
+
+ // Readjust insert point to end of basic block.
+ MIRBuilder.setMBB(MIRBuilder.getMBB());
+
+ return true;
}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h
index cd7fc4c76123..d80a666f3489 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.h
@@ -16,10 +16,11 @@
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
-#include "llvm/CodeGen/ValueTypes.h"
namespace llvm {
+class MachineInstrBuilder;
+class MachineIRBuilder;
class RISCVTargetLowering;
class RISCVCallLowering : public CallLowering {
@@ -37,6 +38,10 @@ public:
bool lowerCall(MachineIRBuilder &MIRBuilder,
CallLoweringInfo &Info) const override;
+
+private:
+ bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
+ ArrayRef<Register> VRegs, MachineInstrBuilder &Ret) const;
};
} // end namespace llvm
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 8dfd71ac0b6b..691439b3a18b 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -7,15 +7,15 @@
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the targeting of the InstructionSelector class for
-/// RISCV.
+/// RISC-V.
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
#include "RISCVRegisterBankInfo.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Debug.h"
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index f6256defe5d3..3f829cc2e677 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -6,11 +6,12 @@
//
//===----------------------------------------------------------------------===//
/// \file
-/// This file implements the targeting of the Machinelegalizer class for RISCV.
+/// This file implements the targeting of the Machinelegalizer class for RISC-V.
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
#include "RISCVLegalizerInfo.h"
+#include "RISCVSubtarget.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
@@ -19,5 +20,14 @@
using namespace llvm;
RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) {
+ const unsigned XLen = ST.getXLen();
+ const LLT XLenLLT = LLT::scalar(XLen);
+
+ using namespace TargetOpcode;
+
+ getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
+ .legalFor({XLenLLT})
+ .clampScalar(0, XLenLLT, XLenLLT);
+
getLegacyLegalizerInfo().computeTables();
}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
index f2c2b9a3fd46..960410ead62c 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
/// \file
-/// This file declares the targeting of the Machinelegalizer class for RISCV.
+/// This file declares the targeting of the Machinelegalizer class for RISC-V.
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index 5371b790a148..9b601902ad20 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
/// \file
-/// This file implements the targeting of the RegisterBankInfo class for RISCV.
+/// This file implements the targeting of the RegisterBankInfo class for RISC-V.
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
@@ -22,4 +22,5 @@
using namespace llvm;
-RISCVRegisterBankInfo::RISCVRegisterBankInfo(const TargetRegisterInfo &TRI) {}
+RISCVRegisterBankInfo::RISCVRegisterBankInfo(unsigned HwMode)
+ : RISCVGenRegisterBankInfo(HwMode) {}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h
index 194a1548af24..ee6d4db27880 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.h
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
/// \file
-/// This file declares the targeting of the RegisterBankInfo class for RISCV.
+/// This file declares the targeting of the RegisterBankInfo class for RISC-V.
/// \todo This should be generated by TableGen.
//===----------------------------------------------------------------------===//
@@ -31,7 +31,7 @@ protected:
/// This class provides the information for the target register banks.
class RISCVRegisterBankInfo final : public RISCVGenRegisterBankInfo {
public:
- RISCVRegisterBankInfo(const TargetRegisterInfo &TRI);
+ RISCVRegisterBankInfo(unsigned HwMode);
};
} // end namespace llvm
#endif
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td
index 400b65a1bf9a..b49f8259e382 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBanks.td
@@ -1,4 +1,4 @@
-//=-- RISCVRegisterBank.td - Describe the RISCV Banks --------*- tablegen -*-=//
+//=-- RISCVRegisterBank.td - Describe the RISC-V Banks -------*- tablegen -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
index 277b976b313a..8f8684e30b3a 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
@@ -13,6 +13,7 @@
#include "RISCVCustomBehaviour.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "RISCV.h"
#include "RISCVInstrInfo.h"
#include "TargetInfo/RISCVTargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
@@ -31,6 +32,7 @@ struct PseudoInfo {
uint16_t Pseudo;
uint16_t BaseInstr;
uint8_t VLMul;
+ uint8_t SEW;
};
#define GET_RISCVVInversePseudosTable_IMPL
@@ -56,7 +58,7 @@ uint8_t RISCVLMULInstrument::getLMUL() const {
// below
assert(isDataValid(getData()) &&
"Cannot get LMUL because invalid Data value");
- // These are the LMUL values that are used in RISCV tablegen
+ // These are the LMUL values that are used in RISC-V tablegen
return StringSwitch<uint8_t>(getData())
.Case("M1", 0b000)
.Case("M2", 0b001)
@@ -67,64 +69,178 @@ uint8_t RISCVLMULInstrument::getLMUL() const {
.Case("MF8", 0b111);
}
+const llvm::StringRef RISCVSEWInstrument::DESC_NAME = "RISCV-SEW";
+
+bool RISCVSEWInstrument::isDataValid(llvm::StringRef Data) {
+ // Return true if not one of the valid SEW strings
+ return StringSwitch<bool>(Data)
+ .Cases("E8", "E16", "E32", "E64", true)
+ .Default(false);
+}
+
+uint8_t RISCVSEWInstrument::getSEW() const {
+ // assertion prevents us from needing llvm_unreachable in the StringSwitch
+ // below
+ assert(isDataValid(getData()) && "Cannot get SEW because invalid Data value");
+ // These are the LMUL values that are used in RISC-V tablegen
+ return StringSwitch<uint8_t>(getData())
+ .Case("E8", 8)
+ .Case("E16", 16)
+ .Case("E32", 32)
+ .Case("E64", 64);
+}
+
bool RISCVInstrumentManager::supportsInstrumentType(
llvm::StringRef Type) const {
- // Currently, only support for RISCVLMULInstrument type
- return Type == RISCVLMULInstrument::DESC_NAME;
+ return Type == RISCVLMULInstrument::DESC_NAME ||
+ Type == RISCVSEWInstrument::DESC_NAME;
}
-SharedInstrument
+UniqueInstrument
RISCVInstrumentManager::createInstrument(llvm::StringRef Desc,
llvm::StringRef Data) {
- if (Desc != RISCVLMULInstrument::DESC_NAME) {
- LLVM_DEBUG(dbgs() << "RVCB: Unknown instrumentation Desc: " << Desc
- << '\n');
- return nullptr;
+ if (Desc == RISCVLMULInstrument::DESC_NAME) {
+ if (!RISCVLMULInstrument::isDataValid(Data)) {
+ LLVM_DEBUG(dbgs() << "RVCB: Bad data for instrument kind " << Desc << ": "
+ << Data << '\n');
+ return nullptr;
+ }
+ return std::make_unique<RISCVLMULInstrument>(Data);
+ }
+
+ if (Desc == RISCVSEWInstrument::DESC_NAME) {
+ if (!RISCVSEWInstrument::isDataValid(Data)) {
+ LLVM_DEBUG(dbgs() << "RVCB: Bad data for instrument kind " << Desc << ": "
+ << Data << '\n');
+ return nullptr;
+ }
+ return std::make_unique<RISCVSEWInstrument>(Data);
}
- if (RISCVLMULInstrument::isDataValid(Data)) {
- LLVM_DEBUG(dbgs() << "RVCB: Bad data for instrument kind " << Desc << ": "
- << Data << '\n');
- return nullptr;
+
+ LLVM_DEBUG(dbgs() << "RVCB: Unknown instrumentation Desc: " << Desc << '\n');
+ return nullptr;
+}
+
+SmallVector<UniqueInstrument>
+RISCVInstrumentManager::createInstruments(const MCInst &Inst) {
+ if (Inst.getOpcode() == RISCV::VSETVLI ||
+ Inst.getOpcode() == RISCV::VSETIVLI) {
+ LLVM_DEBUG(dbgs() << "RVCB: Found VSETVLI and creating instrument for it: "
+ << Inst << "\n");
+ unsigned VTypeI = Inst.getOperand(2).getImm();
+ RISCVII::VLMUL VLMUL = RISCVVType::getVLMUL(VTypeI);
+
+ StringRef LMUL;
+ switch (VLMUL) {
+ case RISCVII::LMUL_1:
+ LMUL = "M1";
+ break;
+ case RISCVII::LMUL_2:
+ LMUL = "M2";
+ break;
+ case RISCVII::LMUL_4:
+ LMUL = "M4";
+ break;
+ case RISCVII::LMUL_8:
+ LMUL = "M8";
+ break;
+ case RISCVII::LMUL_F2:
+ LMUL = "MF2";
+ break;
+ case RISCVII::LMUL_F4:
+ LMUL = "MF4";
+ break;
+ case RISCVII::LMUL_F8:
+ LMUL = "MF8";
+ break;
+ case RISCVII::LMUL_RESERVED:
+ llvm_unreachable("Cannot create instrument for LMUL_RESERVED");
+ }
+ SmallVector<UniqueInstrument> Instruments;
+ Instruments.emplace_back(
+ createInstrument(RISCVLMULInstrument::DESC_NAME, LMUL));
+
+ unsigned SEW = RISCVVType::getSEW(VTypeI);
+ StringRef SEWStr;
+ switch (SEW) {
+ case 8:
+ SEWStr = "E8";
+ break;
+ case 16:
+ SEWStr = "E16";
+ break;
+ case 32:
+ SEWStr = "E32";
+ break;
+ case 64:
+ SEWStr = "E64";
+ break;
+ default:
+ llvm_unreachable("Cannot create instrument for SEW");
+ }
+ Instruments.emplace_back(
+ createInstrument(RISCVSEWInstrument::DESC_NAME, SEWStr));
+
+ return Instruments;
}
- return std::make_shared<RISCVLMULInstrument>(Data);
+ return SmallVector<UniqueInstrument>();
}
unsigned RISCVInstrumentManager::getSchedClassID(
const MCInstrInfo &MCII, const MCInst &MCI,
- const llvm::SmallVector<SharedInstrument> &IVec) const {
+ const llvm::SmallVector<Instrument *> &IVec) const {
unsigned short Opcode = MCI.getOpcode();
unsigned SchedClassID = MCII.get(Opcode).getSchedClass();
- for (const auto &I : IVec) {
- // Unknown Instrument kind
- if (I->getDesc() == RISCVLMULInstrument::DESC_NAME) {
- uint8_t LMUL = static_cast<RISCVLMULInstrument *>(I.get())->getLMUL();
- const RISCVVInversePseudosTable::PseudoInfo *RVV =
- RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL);
- // Not a RVV instr
- if (!RVV) {
- LLVM_DEBUG(
- dbgs()
- << "RVCB: Could not find PseudoInstruction for Opcode "
- << MCII.getName(Opcode) << ", LMUL=" << I->getData()
- << ". Ignoring instrumentation and using original SchedClassID="
- << SchedClassID << '\n');
- return SchedClassID;
- }
-
- // Override using pseudo
- LLVM_DEBUG(dbgs() << "RVCB: Found Pseudo Instruction for Opcode "
- << MCII.getName(Opcode) << ", LMUL=" << I->getData()
- << ". Overriding original SchedClassID=" << SchedClassID
- << " with " << MCII.getName(RVV->Pseudo) << '\n');
- return MCII.get(RVV->Pseudo).getSchedClass();
- }
+ // Unpack all possible RISCV instruments from IVec.
+ RISCVLMULInstrument *LI = nullptr;
+ RISCVSEWInstrument *SI = nullptr;
+ for (auto &I : IVec) {
+ if (I->getDesc() == RISCVLMULInstrument::DESC_NAME)
+ LI = static_cast<RISCVLMULInstrument *>(I);
+ else if (I->getDesc() == RISCVSEWInstrument::DESC_NAME)
+ SI = static_cast<RISCVSEWInstrument *>(I);
+ }
+
+ // Need LMUL or LMUL, SEW in order to override opcode. If no LMUL is provided,
+ // then no option to override.
+ if (!LI) {
+ LLVM_DEBUG(
+ dbgs() << "RVCB: Did not use instrumentation to override Opcode.\n");
+ return SchedClassID;
+ }
+ uint8_t LMUL = LI->getLMUL();
+
+ // getBaseInfo works with (Opcode, LMUL, 0) if no SEW instrument,
+ // or (Opcode, LMUL, SEW) if SEW instrument is active, and depends on LMUL
+ // and SEW, or (Opcode, LMUL, 0) if does not depend on SEW.
+ uint8_t SEW = SI ? SI->getSEW() : 0;
+ // Check if it depends on LMUL and SEW
+ const RISCVVInversePseudosTable::PseudoInfo *RVV =
+ RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, SEW);
+ // Check if it depends only on LMUL
+ if (!RVV)
+ RVV = RISCVVInversePseudosTable::getBaseInfo(Opcode, LMUL, 0);
+
+ // Not a RVV instr
+ if (!RVV) {
+ LLVM_DEBUG(
+ dbgs() << "RVCB: Could not find PseudoInstruction for Opcode "
+ << MCII.getName(Opcode)
+ << ", LMUL=" << (LI ? LI->getData() : "Unspecified")
+ << ", SEW=" << (SI ? SI->getData() : "Unspecified")
+ << ". Ignoring instrumentation and using original SchedClassID="
+ << SchedClassID << '\n');
+ return SchedClassID;
}
- // Unknown Instrument kind
- LLVM_DEBUG(
- dbgs() << "RVCB: Did not use instrumentation to override Opcode.\n");
- return SchedClassID;
+ // Override using pseudo
+ LLVM_DEBUG(dbgs() << "RVCB: Found Pseudo Instruction for Opcode "
+ << MCII.getName(Opcode) << ", LMUL=" << LI->getData()
+ << ", SEW=" << (SI ? SI->getData() : "Unspecified")
+ << ". Overriding original SchedClassID=" << SchedClassID
+ << " with " << MCII.getName(RVV->Pseudo) << '\n');
+ return MCII.get(RVV->Pseudo).getSchedClass();
}
} // namespace mca
@@ -139,7 +255,7 @@ createRISCVInstrumentManager(const MCSubtargetInfo &STI,
return new RISCVInstrumentManager(STI, MCII);
}
-/// Extern function to initialize the targets for the RISCV backend
+/// Extern function to initialize the targets for the RISC-V backend
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTargetMCA() {
TargetRegistry::RegisterInstrumentManager(getTheRISCV32Target(),
createRISCVInstrumentManager);
diff --git a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.h b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.h
index b3737c98b092..34efa0b2ebad 100644
--- a/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.h
+++ b/llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.h
@@ -31,13 +31,25 @@ public:
static const StringRef DESC_NAME;
static bool isDataValid(StringRef Data);
- RISCVLMULInstrument(StringRef Data) : Instrument(DESC_NAME, Data) {}
+ explicit RISCVLMULInstrument(StringRef Data) : Instrument(DESC_NAME, Data) {}
~RISCVLMULInstrument() = default;
uint8_t getLMUL() const;
};
+class RISCVSEWInstrument : public Instrument {
+public:
+ static const StringRef DESC_NAME;
+ static bool isDataValid(StringRef Data);
+
+ explicit RISCVSEWInstrument(StringRef Data) : Instrument(DESC_NAME, Data) {}
+
+ ~RISCVSEWInstrument() = default;
+
+ uint8_t getSEW() const;
+};
+
class RISCVInstrumentManager : public InstrumentManager {
public:
RISCVInstrumentManager(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
@@ -46,14 +58,16 @@ public:
bool shouldIgnoreInstruments() const override { return false; }
bool supportsInstrumentType(StringRef Type) const override;
- /// Create a Instrument for RISCV target
- SharedInstrument createInstrument(StringRef Desc, StringRef Data) override;
+ /// Create a Instrument for RISC-V target
+ UniqueInstrument createInstrument(StringRef Desc, StringRef Data) override;
+
+ SmallVector<UniqueInstrument> createInstruments(const MCInst &Inst) override;
/// Using the Instrument, returns a SchedClassID to use instead of
/// the SchedClassID that belongs to the MCI or the original SchedClassID.
unsigned
getSchedClassID(const MCInstrInfo &MCII, const MCInst &MCI,
- const SmallVector<SharedInstrument> &IVec) const override;
+ const SmallVector<Instrument *> &IVec) const override;
};
} // namespace mca
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 892c406f1e68..1b890fbe041a 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -53,6 +53,7 @@ RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
// name offset bits flags
{"fixup_riscv_hi20", 12, 20, 0},
{"fixup_riscv_lo12_i", 20, 12, 0},
+ {"fixup_riscv_12_i", 20, 12, 0},
{"fixup_riscv_lo12_s", 0, 32, 0},
{"fixup_riscv_pcrel_hi20", 12, 20,
MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsTarget},
@@ -134,7 +135,7 @@ bool RISCVAsmBackend::shouldForceRelocation(const MCAssembler &Asm,
return true;
}
- return STI.getFeatureBits()[RISCV::FeatureRelax] || ForceRelocs;
+ return STI.hasFeature(RISCV::FeatureRelax) || ForceRelocs;
}
bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
@@ -143,6 +144,15 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
const MCRelaxableFragment *DF,
const MCAsmLayout &Layout,
const bool WasForced) const {
+ int64_t Offset = int64_t(Value);
+ unsigned Kind = Fixup.getTargetKind();
+
+ // We only do conditional branch relaxation when the symbol is resolved.
+ // For conditional branch, the immediate must be in the range
+ // [-4096, 4094].
+ if (Kind == RISCV::fixup_riscv_branch)
+ return Resolved && !isInt<13>(Offset);
+
// Return true if the symbol is actually unresolved.
// Resolved could be always false when shouldForceRelocation return true.
// We use !WasForced to indicate that the symbol is unresolved and not forced
@@ -150,8 +160,7 @@ bool RISCVAsmBackend::fixupNeedsRelaxationAdvanced(const MCFixup &Fixup,
if (!Resolved && !WasForced)
return true;
- int64_t Offset = int64_t(Value);
- switch (Fixup.getTargetKind()) {
+ switch (Kind) {
default:
return false;
case RISCV::fixup_riscv_rvc_branch:
@@ -174,12 +183,24 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst,
case RISCV::C_BEQZ:
case RISCV::C_BNEZ:
case RISCV::C_J:
- case RISCV::C_JAL:
+ case RISCV::C_JAL: {
bool Success = RISCVRVC::uncompress(Res, Inst, STI);
assert(Success && "Can't uncompress instruction");
(void)Success;
break;
}
+ case RISCV::BEQ:
+ case RISCV::BNE:
+ case RISCV::BLT:
+ case RISCV::BGE:
+ case RISCV::BLTU:
+ case RISCV::BGEU:
+ Res.setOpcode(getRelaxedOpcode(Inst.getOpcode()));
+ Res.addOperand(Inst.getOperand(0));
+ Res.addOperand(Inst.getOperand(1));
+ Res.addOperand(Inst.getOperand(2));
+ break;
+ }
Inst = std::move(Res);
}
@@ -210,7 +231,7 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCDwarfLineAddrFragment &DF,
}
unsigned Offset;
- std::pair<unsigned, unsigned> Fixup;
+ std::pair<MCFixupKind, MCFixupKind> Fixup;
// According to the DWARF specification, the `DW_LNS_fixed_advance_pc` opcode
// takes a single unsigned half (unencoded) operand. The maximum encodable
@@ -223,23 +244,19 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCDwarfLineAddrFragment &DF,
OS << uint8_t(dwarf::DW_LNE_set_address);
Offset = OS.tell();
- Fixup = PtrSize == 4 ? std::make_pair(RISCV::fixup_riscv_add_32,
- RISCV::fixup_riscv_sub_32)
- : std::make_pair(RISCV::fixup_riscv_add_64,
- RISCV::fixup_riscv_sub_64);
+ assert((PtrSize == 4 || PtrSize == 8) && "Unexpected pointer size");
+ Fixup = RISCV::getRelocPairForSize(PtrSize);
OS.write_zeros(PtrSize);
} else {
OS << uint8_t(dwarf::DW_LNS_fixed_advance_pc);
Offset = OS.tell();
- Fixup = {RISCV::fixup_riscv_add_16, RISCV::fixup_riscv_sub_16};
+ Fixup = RISCV::getRelocPairForSize(2);
support::endian::write<uint16_t>(OS, 0, support::little);
}
const MCBinaryExpr &MBE = cast<MCBinaryExpr>(AddrDelta);
- Fixups.push_back(MCFixup::create(
- Offset, MBE.getLHS(), static_cast<MCFixupKind>(std::get<0>(Fixup))));
- Fixups.push_back(MCFixup::create(
- Offset, MBE.getRHS(), static_cast<MCFixupKind>(std::get<1>(Fixup))));
+ Fixups.push_back(MCFixup::create(Offset, MBE.getLHS(), std::get<0>(Fixup)));
+ Fixups.push_back(MCFixup::create(Offset, MBE.getRHS(), std::get<1>(Fixup)));
if (LineDelta == INT64_MAX) {
OS << uint8_t(dwarf::DW_LNS_extended_op);
@@ -256,13 +273,14 @@ bool RISCVAsmBackend::relaxDwarfLineAddr(MCDwarfLineAddrFragment &DF,
bool RISCVAsmBackend::relaxDwarfCFA(MCDwarfCallFrameFragment &DF,
MCAsmLayout &Layout,
bool &WasRelaxed) const {
-
const MCExpr &AddrDelta = DF.getAddrDelta();
SmallVectorImpl<char> &Data = DF.getContents();
SmallVectorImpl<MCFixup> &Fixups = DF.getFixups();
size_t OldSize = Data.size();
int64_t Value;
+ if (AddrDelta.evaluateAsAbsolute(Value, Layout.getAssembler()))
+ return false;
bool IsAbsolute = AddrDelta.evaluateKnownAbsolute(Value, Layout);
assert(IsAbsolute && "CFA with invalid expression");
(void)IsAbsolute;
@@ -325,6 +343,18 @@ unsigned RISCVAsmBackend::getRelaxedOpcode(unsigned Op) const {
case RISCV::C_J:
case RISCV::C_JAL: // fall through.
return RISCV::JAL;
+ case RISCV::BEQ:
+ return RISCV::PseudoLongBEQ;
+ case RISCV::BNE:
+ return RISCV::PseudoLongBNE;
+ case RISCV::BLT:
+ return RISCV::PseudoLongBLT;
+ case RISCV::BGE:
+ return RISCV::PseudoLongBGE;
+ case RISCV::BLTU:
+ return RISCV::PseudoLongBLTU;
+ case RISCV::BGEU:
+ return RISCV::PseudoLongBGEU;
}
}
@@ -346,11 +376,11 @@ bool RISCVAsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
Count -= 1;
}
- bool HasStdExtC = STI->getFeatureBits()[RISCV::FeatureStdExtC];
- bool HasStdExtZca = STI->getFeatureBits()[RISCV::FeatureExtZca];
+ bool UseCompressedNop = STI->hasFeature(RISCV::FeatureStdExtC) ||
+ STI->hasFeature(RISCV::FeatureStdExtZca);
// The canonical nop on RVC is c.nop.
if (Count % 4 == 2) {
- OS.write((HasStdExtC || HasStdExtZca) ? "\x01\0" : "\0\0", 2);
+ OS.write(UseCompressedNop ? "\x01\0" : "\0\0", 2);
Count -= 2;
}
@@ -393,6 +423,12 @@ static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
case RISCV::fixup_riscv_pcrel_lo12_i:
case RISCV::fixup_riscv_tprel_lo12_i:
return Value & 0xfff;
+ case RISCV::fixup_riscv_12_i:
+ if (!isInt<12>(Value)) {
+ Ctx.reportError(Fixup.getLoc(),
+ "operand must be a constant 12-bit integer");
+ }
+ return Value & 0xfff;
case RISCV::fixup_riscv_lo12_s:
case RISCV::fixup_riscv_pcrel_lo12_s:
case RISCV::fixup_riscv_tprel_lo12_s:
@@ -536,6 +572,48 @@ bool RISCVAsmBackend::evaluateTargetFixup(
return true;
}
+bool RISCVAsmBackend::handleAddSubRelocations(const MCAsmLayout &Layout,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ const MCValue &Target,
+ uint64_t &FixedValue) const {
+ uint64_t FixedValueA, FixedValueB;
+ unsigned TA = 0, TB = 0;
+ switch (Fixup.getKind()) {
+ case llvm::FK_Data_1:
+ TA = ELF::R_RISCV_ADD8;
+ TB = ELF::R_RISCV_SUB8;
+ break;
+ case llvm::FK_Data_2:
+ TA = ELF::R_RISCV_ADD16;
+ TB = ELF::R_RISCV_SUB16;
+ break;
+ case llvm::FK_Data_4:
+ TA = ELF::R_RISCV_ADD32;
+ TB = ELF::R_RISCV_SUB32;
+ break;
+ case llvm::FK_Data_8:
+ TA = ELF::R_RISCV_ADD64;
+ TB = ELF::R_RISCV_SUB64;
+ break;
+ default:
+ llvm_unreachable("unsupported fixup size");
+ }
+ MCValue A = MCValue::get(Target.getSymA(), nullptr, Target.getConstant());
+ MCValue B = MCValue::get(Target.getSymB());
+ auto FA = MCFixup::create(
+ Fixup.getOffset(), nullptr,
+ static_cast<MCFixupKind>(FirstLiteralRelocationKind + TA));
+ auto FB = MCFixup::create(
+ Fixup.getOffset(), nullptr,
+ static_cast<MCFixupKind>(FirstLiteralRelocationKind + TB));
+ auto &Asm = Layout.getAssembler();
+ Asm.getWriter().recordRelocation(Asm, Layout, &F, FA, A, FixedValueA);
+ Asm.getWriter().recordRelocation(Asm, Layout, &F, FB, B, FixedValueB);
+ FixedValue = FixedValueA - FixedValueB;
+ return true;
+}
+
void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target,
MutableArrayRef<char> Data, uint64_t Value,
@@ -574,11 +652,11 @@ bool RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign(
const MCAlignFragment &AF, unsigned &Size) {
// Calculate Nops Size only when linker relaxation enabled.
const MCSubtargetInfo *STI = AF.getSubtargetInfo();
- if (!STI->getFeatureBits()[RISCV::FeatureRelax])
+ if (!STI->hasFeature(RISCV::FeatureRelax))
return false;
- bool UseCompressedNop = STI->getFeatureBits()[RISCV::FeatureStdExtC] ||
- STI->getFeatureBits()[RISCV::FeatureExtZca];
+ bool UseCompressedNop = STI->hasFeature(RISCV::FeatureStdExtC) ||
+ STI->hasFeature(RISCV::FeatureStdExtZca);
unsigned MinNopLen = UseCompressedNop ? 2 : 4;
if (AF.getAlignment() <= MinNopLen) {
@@ -599,7 +677,7 @@ bool RISCVAsmBackend::shouldInsertFixupForCodeAlign(MCAssembler &Asm,
MCAlignFragment &AF) {
// Insert the fixup only when linker relaxation enabled.
const MCSubtargetInfo *STI = AF.getSubtargetInfo();
- if (!STI->getFeatureBits()[RISCV::FeatureRelax])
+ if (!STI->hasFeature(RISCV::FeatureRelax))
return false;
// Calculate total Nops we need to insert. If there are none to insert
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
index 7e380ab44da4..0ea1f32e8296 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.h
@@ -1,4 +1,4 @@
-//===-- RISCVAsmBackend.h - RISCV Assembler Backend -----------------------===//
+//===-- RISCVAsmBackend.h - RISC-V Assembler Backend ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -31,8 +31,8 @@ class RISCVAsmBackend : public MCAsmBackend {
public:
RISCVAsmBackend(const MCSubtargetInfo &STI, uint8_t OSABI, bool Is64Bit,
const MCTargetOptions &Options)
- : MCAsmBackend(support::little), STI(STI), OSABI(OSABI), Is64Bit(Is64Bit),
- TargetOptions(Options) {
+ : MCAsmBackend(support::little, RISCV::fixup_riscv_relax), STI(STI),
+ OSABI(OSABI), Is64Bit(Is64Bit), TargetOptions(Options) {
RISCVFeatures::validate(STI.getTargetTriple(), STI.getFeatureBits());
}
~RISCVAsmBackend() override = default;
@@ -53,6 +53,10 @@ public:
const MCValue &Target, uint64_t &Value,
bool &WasForced) override;
+ bool handleAddSubRelocations(const MCAsmLayout &Layout, const MCFragment &F,
+ const MCFixup &Fixup, const MCValue &Target,
+ uint64_t &FixedValue) const override;
+
void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
const MCValue &Target, MutableArrayRef<char> Data,
uint64_t Value, bool IsResolved,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
index 3292df6a966a..0a42c6faee29 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVBaseInfo.cpp - Top level definitions for RISCV MC ------------===//
+//===-- RISCVBaseInfo.cpp - Top level definitions for RISC-V MC -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,20 +6,20 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains small standalone enum definitions for the RISCV target
+// This file contains small standalone enum definitions for the RISC-V target
// useful for the compiler back-end and the MC libraries.
//
//===----------------------------------------------------------------------===//
#include "RISCVBaseInfo.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/RISCVISAInfo.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/TargetParser.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
@@ -27,6 +27,7 @@ extern const SubtargetFeatureKV RISCVFeatureKV[RISCV::NumSubtargetFeatures];
namespace RISCVSysReg {
#define GET_SysRegsList_IMPL
+#define GET_SiFiveRegsList_IMPL
#include "RISCVGenSearchableTables.inc"
} // namespace RISCVSysReg
@@ -36,11 +37,11 @@ namespace RISCVInsnOpcode {
} // namespace RISCVInsnOpcode
namespace RISCVABI {
-ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
+ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits,
StringRef ABIName) {
auto TargetABI = getTargetABI(ABIName);
bool IsRV64 = TT.isArch64Bit();
- bool IsRV32E = FeatureBits[RISCV::FeatureRV32E];
+ bool IsRVE = FeatureBits[RISCV::FeatureRVE];
if (!ABIName.empty() && TargetABI == ABI_Unknown) {
errs()
@@ -54,11 +55,18 @@ ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
errs() << "64-bit ABIs are not supported for 32-bit targets (ignoring "
"target-abi)\n";
TargetABI = ABI_Unknown;
- } else if (IsRV32E && TargetABI != ABI_ILP32E && TargetABI != ABI_Unknown) {
+ } else if (!IsRV64 && IsRVE && TargetABI != ABI_ILP32E &&
+ TargetABI != ABI_Unknown) {
// TODO: move this checking to RISCVTargetLowering and RISCVAsmParser
errs()
<< "Only the ilp32e ABI is supported for RV32E (ignoring target-abi)\n";
TargetABI = ABI_Unknown;
+ } else if (IsRV64 && IsRVE && TargetABI != ABI_LP64E &&
+ TargetABI != ABI_Unknown) {
+ // TODO: move this checking to RISCVTargetLowering and RISCVAsmParser
+ errs()
+ << "Only the lp64e ABI is supported for RV64E (ignoring target-abi)\n";
+ TargetABI = ABI_Unknown;
}
if (TargetABI != ABI_Unknown)
@@ -80,6 +88,7 @@ ABI getTargetABI(StringRef ABIName) {
.Case("lp64", ABI_LP64)
.Case("lp64f", ABI_LP64F)
.Case("lp64d", ABI_LP64D)
+ .Case("lp64e", ABI_LP64E)
.Default(ABI_Unknown);
return TargetABI;
}
@@ -90,7 +99,7 @@ ABI getTargetABI(StringRef ABIName) {
MCRegister getBPReg() { return RISCV::X9; }
// Returns the register holding shadow call stack pointer.
-MCRegister getSCSPReg() { return RISCV::X18; }
+MCRegister getSCSPReg() { return RISCV::X3; }
} // namespace RISCVABI
@@ -101,8 +110,6 @@ void validate(const Triple &TT, const FeatureBitset &FeatureBits) {
report_fatal_error("RV64 target requires an RV64 CPU");
if (!TT.isArch64Bit() && !FeatureBits[RISCV::Feature32Bit])
report_fatal_error("RV32 target requires an RV32 CPU");
- if (TT.isArch64Bit() && FeatureBits[RISCV::FeatureRV32E])
- report_fatal_error("RV32E can't be enabled for an RV64 target");
if (FeatureBits[RISCV::Feature32Bit] &&
FeatureBits[RISCV::Feature64Bit])
report_fatal_error("RV32 and RV64 can't be combined");
@@ -214,4 +221,93 @@ bool RISCVRVC::uncompress(MCInst &OutInst, const MCInst &MI,
return uncompressInst(OutInst, MI, STI);
}
+// Lookup table for fli.s for entries 2-31.
+static constexpr std::pair<uint8_t, uint8_t> LoadFP32ImmArr[] = {
+ {0b01101111, 0b00}, {0b01110000, 0b00}, {0b01110111, 0b00},
+ {0b01111000, 0b00}, {0b01111011, 0b00}, {0b01111100, 0b00},
+ {0b01111101, 0b00}, {0b01111101, 0b01}, {0b01111101, 0b10},
+ {0b01111101, 0b11}, {0b01111110, 0b00}, {0b01111110, 0b01},
+ {0b01111110, 0b10}, {0b01111110, 0b11}, {0b01111111, 0b00},
+ {0b01111111, 0b01}, {0b01111111, 0b10}, {0b01111111, 0b11},
+ {0b10000000, 0b00}, {0b10000000, 0b01}, {0b10000000, 0b10},
+ {0b10000001, 0b00}, {0b10000010, 0b00}, {0b10000011, 0b00},
+ {0b10000110, 0b00}, {0b10000111, 0b00}, {0b10001110, 0b00},
+ {0b10001111, 0b00}, {0b11111111, 0b00}, {0b11111111, 0b10},
+};
+
+int RISCVLoadFPImm::getLoadFPImm(APFloat FPImm) {
+ assert((&FPImm.getSemantics() == &APFloat::IEEEsingle() ||
+ &FPImm.getSemantics() == &APFloat::IEEEdouble() ||
+ &FPImm.getSemantics() == &APFloat::IEEEhalf()) &&
+ "Unexpected semantics");
+
+ // Handle the minimum normalized value which is different for each type.
+ if (FPImm.isSmallestNormalized())
+ return 1;
+
+ // Convert to single precision to use its lookup table.
+ bool LosesInfo;
+ APFloat::opStatus Status = FPImm.convert(
+ APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &LosesInfo);
+ if (Status != APFloat::opOK || LosesInfo)
+ return -1;
+
+ APInt Imm = FPImm.bitcastToAPInt();
+
+ if (Imm.extractBitsAsZExtValue(21, 0) != 0)
+ return -1;
+
+ bool Sign = Imm.extractBitsAsZExtValue(1, 31);
+ uint8_t Mantissa = Imm.extractBitsAsZExtValue(2, 21);
+ uint8_t Exp = Imm.extractBitsAsZExtValue(8, 23);
+
+ auto EMI = llvm::lower_bound(LoadFP32ImmArr, std::make_pair(Exp, Mantissa));
+ if (EMI == std::end(LoadFP32ImmArr) || EMI->first != Exp ||
+ EMI->second != Mantissa)
+ return -1;
+
+ // Table doesn't have entry 0 or 1.
+ int Entry = std::distance(std::begin(LoadFP32ImmArr), EMI) + 2;
+
+ // The only legal negative value is -1.0(entry 0). 1.0 is entry 16.
+ if (Sign) {
+ if (Entry == 16)
+ return 0;
+ return false;
+ }
+
+ return Entry;
+}
+
+float RISCVLoadFPImm::getFPImm(unsigned Imm) {
+ assert(Imm != 1 && Imm != 30 && Imm != 31 && "Unsupported immediate");
+
+ // Entry 0 is -1.0, the only negative value. Entry 16 is 1.0.
+ uint32_t Sign = 0;
+ if (Imm == 0) {
+ Sign = 0b1;
+ Imm = 16;
+ }
+
+ uint32_t Exp = LoadFP32ImmArr[Imm - 2].first;
+ uint32_t Mantissa = LoadFP32ImmArr[Imm - 2].second;
+
+ uint32_t I = Sign << 31 | Exp << 23 | Mantissa << 21;
+ return bit_cast<float>(I);
+}
+
+void RISCVZC::printRlist(unsigned SlistEncode, raw_ostream &OS) {
+ OS << "{ra";
+ if (SlistEncode > 4) {
+ OS << ", s0";
+ if (SlistEncode == 15)
+ OS << "-s11";
+ else if (SlistEncode > 5 && SlistEncode <= 14)
+ OS << "-s" << (SlistEncode - 5);
+ }
+ OS << "}";
+}
+
+void RISCVZC::printSpimm(int64_t Spimm, raw_ostream &OS) { OS << Spimm; }
+
} // namespace llvm
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 2cf2045c1719..f86419319dd3 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -1,4 +1,4 @@
-//===-- RISCVBaseInfo.h - Top level definitions for RISCV MC ----*- C++ -*-===//
+//===-- RISCVBaseInfo.h - Top level definitions for RISC-V MC ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains small standalone enum definitions for the RISCV target
+// This file contains small standalone enum definitions for the RISC-V target
// useful for the compiler back-end and the MC libraries.
//
//===----------------------------------------------------------------------===//
@@ -14,11 +14,13 @@
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVBASEINFO_H
#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCInstrDesc.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/RISCVISAInfo.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
namespace llvm {
@@ -43,34 +45,37 @@ enum {
InstFormatCA = 14,
InstFormatCB = 15,
InstFormatCJ = 16,
- InstFormatOther = 17,
+ InstFormatCU = 17,
+ InstFormatCLB = 18,
+ InstFormatCLH = 19,
+ InstFormatCSB = 20,
+ InstFormatCSH = 21,
+ InstFormatOther = 22,
InstFormatMask = 31,
InstFormatShift = 0,
ConstraintShift = InstFormatShift + 5,
+ VS2Constraint = 0b001 << ConstraintShift,
+ VS1Constraint = 0b010 << ConstraintShift,
+ VMConstraint = 0b100 << ConstraintShift,
ConstraintMask = 0b111 << ConstraintShift,
VLMulShift = ConstraintShift + 3,
VLMulMask = 0b111 << VLMulShift,
- // Do we need to add a dummy mask op when converting RVV Pseudo to MCInst.
- HasDummyMaskOpShift = VLMulShift + 3,
- HasDummyMaskOpMask = 1 << HasDummyMaskOpShift,
-
// Force a tail agnostic policy even this instruction has a tied destination.
- ForceTailAgnosticShift = HasDummyMaskOpShift + 1,
+ ForceTailAgnosticShift = VLMulShift + 3,
ForceTailAgnosticMask = 1 << ForceTailAgnosticShift,
- // Does this instruction have a merge operand that must be removed when
- // converting to MCInst. It will be the first explicit use operand. Used by
- // RVV Pseudos.
- HasMergeOpShift = ForceTailAgnosticShift + 1,
- HasMergeOpMask = 1 << HasMergeOpShift,
+ // Is this a _TIED vector pseudo instruction. For these instructions we
+ // shouldn't skip the tied operand when converting to MC instructions.
+ IsTiedPseudoShift = ForceTailAgnosticShift + 1,
+ IsTiedPseudoMask = 1 << IsTiedPseudoShift,
// Does this instruction have a SEW operand. It will be the last explicit
// operand unless there is a vector policy operand. Used by RVV Pseudos.
- HasSEWOpShift = HasMergeOpShift + 1,
+ HasSEWOpShift = IsTiedPseudoShift + 1,
HasSEWOpMask = 1 << HasSEWOpShift,
// Does this instruction have a VL operand. It will be the second to last
@@ -102,14 +107,12 @@ enum {
// in bits 63:31. Used by the SExtWRemoval pass.
IsSignExtendingOpWShift = UsesMaskPolicyShift + 1,
IsSignExtendingOpWMask = 1ULL << IsSignExtendingOpWShift,
-};
-// Match with the definitions in RISCVInstrFormats.td
-enum VConstraintType {
- NoConstraint = 0,
- VS2Constraint = 0b001,
- VS1Constraint = 0b010,
- VMConstraint = 0b100,
+ HasRoundModeOpShift = IsSignExtendingOpWShift + 1,
+ HasRoundModeOpMask = 1 << HasRoundModeOpShift,
+
+ UsesVXRMShift = HasRoundModeOpShift + 1,
+ UsesVXRMMask = 1 << UsesVXRMShift,
};
enum VLMUL : uint8_t {
@@ -134,26 +137,17 @@ enum {
static inline unsigned getFormat(uint64_t TSFlags) {
return (TSFlags & InstFormatMask) >> InstFormatShift;
}
-/// \returns the constraint for the instruction.
-static inline VConstraintType getConstraint(uint64_t TSFlags) {
- return static_cast<VConstraintType>((TSFlags & ConstraintMask) >>
- ConstraintShift);
-}
/// \returns the LMUL for the instruction.
static inline VLMUL getLMul(uint64_t TSFlags) {
return static_cast<VLMUL>((TSFlags & VLMulMask) >> VLMulShift);
}
-/// \returns true if there is a dummy mask operand for the instruction.
-static inline bool hasDummyMaskOp(uint64_t TSFlags) {
- return TSFlags & HasDummyMaskOpMask;
-}
/// \returns true if tail agnostic is enforced for the instruction.
static inline bool doesForceTailAgnostic(uint64_t TSFlags) {
return TSFlags & ForceTailAgnosticMask;
}
-/// \returns true if there is a merge operand for the instruction.
-static inline bool hasMergeOp(uint64_t TSFlags) {
- return TSFlags & HasMergeOpMask;
+/// \returns true if this a _TIED pseudo.
+static inline bool isTiedPseudo(uint64_t TSFlags) {
+ return TSFlags & IsTiedPseudoMask;
}
/// \returns true if there is a SEW operand for the instruction.
static inline bool hasSEWOp(uint64_t TSFlags) {
@@ -176,12 +170,14 @@ static inline bool usesMaskPolicy(uint64_t TSFlags) {
return TSFlags & UsesMaskPolicyMask;
}
-static inline unsigned getMergeOpNum(const MCInstrDesc &Desc) {
- assert(hasMergeOp(Desc.TSFlags));
- assert(!Desc.isVariadic());
- return Desc.getNumDefs();
+/// \returns true if there is a rounding mode operand for this instruction
+static inline bool hasRoundModeOp(uint64_t TSFlags) {
+ return TSFlags & HasRoundModeOpMask;
}
+/// \returns true if this instruction uses vxrm
+static inline bool usesVXRM(uint64_t TSFlags) { return TSFlags & UsesVXRMMask; }
+
static inline unsigned getVLOpNum(const MCInstrDesc &Desc) {
const uint64_t TSFlags = Desc.TSFlags;
// This method is only called if we expect to have a VL operand, and all
@@ -207,6 +203,15 @@ static inline unsigned getVecPolicyOpNum(const MCInstrDesc &Desc) {
return Desc.getNumOperands() - 1;
}
+// Is the first def operand tied to the first use operand. This is true for
+// vector pseudo instructions that have a merge operand for tail/mask
+// undisturbed. It's also true for vector FMA instructions where one of the
+// operands is also the destination register.
+static inline bool isFirstDefTiedToFirstUse(const MCInstrDesc &Desc) {
+ return Desc.getNumDefs() < Desc.getNumOperands() &&
+ Desc.getOperandConstraint(Desc.getNumDefs(), MCOI::TIED_TO) == 0;
+}
+
// RISC-V Specific Machine Operand Flags
enum {
MO_None = 0,
@@ -233,14 +238,21 @@ enum {
namespace RISCVOp {
enum OperandType : unsigned {
OPERAND_FIRST_RISCV_IMM = MCOI::OPERAND_FIRST_TARGET,
- OPERAND_UIMM2 = OPERAND_FIRST_RISCV_IMM,
+ OPERAND_UIMM1 = OPERAND_FIRST_RISCV_IMM,
+ OPERAND_UIMM2,
+ OPERAND_UIMM2_LSB0,
OPERAND_UIMM3,
OPERAND_UIMM4,
OPERAND_UIMM5,
+ OPERAND_UIMM6,
OPERAND_UIMM7,
OPERAND_UIMM7_LSB00,
OPERAND_UIMM8_LSB00,
+ OPERAND_UIMM8,
OPERAND_UIMM8_LSB000,
+ OPERAND_UIMM8_GE32,
+ OPERAND_UIMM9_LSB000,
+ OPERAND_UIMM10_LSB00_NONZERO,
OPERAND_UIMM12,
OPERAND_ZERO,
OPERAND_SIMM5,
@@ -253,11 +265,14 @@ enum OperandType : unsigned {
OPERAND_UIMM20,
OPERAND_UIMMLOG2XLEN,
OPERAND_UIMMLOG2XLEN_NONZERO,
- OPERAND_UIMM_SHFL,
+ OPERAND_CLUI_IMM,
OPERAND_VTYPEI10,
OPERAND_VTYPEI11,
OPERAND_RVKRNUM,
- OPERAND_LAST_RISCV_IMM = OPERAND_RVKRNUM,
+ OPERAND_RVKRNUM_0_7,
+ OPERAND_RVKRNUM_1_10,
+ OPERAND_RVKRNUM_2_14,
+ OPERAND_LAST_RISCV_IMM = OPERAND_RVKRNUM_2_14,
// Operand is either a register or uimm5, this is used by V extension pseudo
// instructions to represent a value that be passed as AVL to either vsetvli
// or vsetivli.
@@ -332,10 +347,22 @@ inline static bool isValidRoundingMode(unsigned Mode) {
}
} // namespace RISCVFPRndMode
+//===----------------------------------------------------------------------===//
+// Floating-point Immediates
+//
+
+namespace RISCVLoadFPImm {
+float getFPImm(unsigned Imm);
+
+/// getLoadFPImm - Return a 5-bit binary encoding of the floating-point
+/// immediate value. If the value cannot be represented as a 5-bit binary
+/// encoding, then return -1.
+int getLoadFPImm(APFloat FPImm);
+} // namespace RISCVLoadFPImm
+
namespace RISCVSysReg {
struct SysReg {
const char *Name;
- const char *AltName;
const char *DeprecatedName;
unsigned Encoding;
// FIXME: add these additional fields when needed.
@@ -359,9 +386,22 @@ struct SysReg {
return true;
return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
}
+
+ bool haveVendorRequiredFeatures(const FeatureBitset &ActiveFeatures) const {
+ // Not in 32-bit mode.
+ if (isRV32Only && ActiveFeatures[RISCV::Feature64Bit])
+ return false;
+ // No required feature associated with the system register.
+ if (FeaturesRequired.none())
+ return false;
+ return (FeaturesRequired & ActiveFeatures) == FeaturesRequired;
+ }
};
+struct SiFiveReg : SysReg {};
+
#define GET_SysRegsList_DECL
+#define GET_SiFiveRegsList_DECL
#include "RISCVGenSearchableTables.inc"
} // end namespace RISCVSysReg
@@ -385,12 +425,13 @@ enum ABI {
ABI_LP64,
ABI_LP64F,
ABI_LP64D,
+ ABI_LP64E,
ABI_Unknown
};
// Returns the target ABI, or else a StringError if the requested ABIName is
// not supported for the given TT and FeatureBits combination.
-ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits,
+ABI computeTargetABI(const Triple &TT, const FeatureBitset &FeatureBits,
StringRef ABIName);
ABI getTargetABI(StringRef ABIName);
@@ -472,6 +513,124 @@ bool compress(MCInst &OutInst, const MCInst &MI, const MCSubtargetInfo &STI);
bool uncompress(MCInst &OutInst, const MCInst &MI, const MCSubtargetInfo &STI);
} // namespace RISCVRVC
+namespace RISCVZC {
+enum RLISTENCODE {
+ RA = 4,
+ RA_S0,
+ RA_S0_S1,
+ RA_S0_S2,
+ RA_S0_S3,
+ RA_S0_S4,
+ RA_S0_S5,
+ RA_S0_S6,
+ RA_S0_S7,
+ RA_S0_S8,
+ RA_S0_S9,
+ // note - to include s10, s11 must also be included
+ RA_S0_S11,
+ INVALID_RLIST,
+};
+
+inline unsigned encodeRlist(MCRegister EndReg, bool IsRV32E = false) {
+ assert((!IsRV32E || EndReg <= RISCV::X9) && "Invalid Rlist for RV32E");
+ switch (EndReg) {
+ case RISCV::X1:
+ return RLISTENCODE::RA;
+ case RISCV::X8:
+ return RLISTENCODE::RA_S0;
+ case RISCV::X9:
+ return RLISTENCODE::RA_S0_S1;
+ case RISCV::X18:
+ return RLISTENCODE::RA_S0_S2;
+ case RISCV::X19:
+ return RLISTENCODE::RA_S0_S3;
+ case RISCV::X20:
+ return RLISTENCODE::RA_S0_S4;
+ case RISCV::X21:
+ return RLISTENCODE::RA_S0_S5;
+ case RISCV::X22:
+ return RLISTENCODE::RA_S0_S6;
+ case RISCV::X23:
+ return RLISTENCODE::RA_S0_S7;
+ case RISCV::X24:
+ return RLISTENCODE::RA_S0_S8;
+ case RISCV::X25:
+ return RLISTENCODE::RA_S0_S9;
+ case RISCV::X26:
+ return RLISTENCODE::INVALID_RLIST;
+ case RISCV::X27:
+ return RLISTENCODE::RA_S0_S11;
+ default:
+ llvm_unreachable("Undefined input.");
+ }
+}
+
+inline static unsigned getStackAdjBase(unsigned RlistVal, bool IsRV64,
+ bool IsEABI) {
+ assert(RlistVal != RLISTENCODE::INVALID_RLIST &&
+ "{ra, s0-s10} is not supported, s11 must be included.");
+ if (IsEABI)
+ return 16;
+ if (!IsRV64) {
+ switch (RlistVal) {
+ case RLISTENCODE::RA:
+ case RLISTENCODE::RA_S0:
+ case RLISTENCODE::RA_S0_S1:
+ case RLISTENCODE::RA_S0_S2:
+ return 16;
+ case RLISTENCODE::RA_S0_S3:
+ case RLISTENCODE::RA_S0_S4:
+ case RLISTENCODE::RA_S0_S5:
+ case RLISTENCODE::RA_S0_S6:
+ return 32;
+ case RLISTENCODE::RA_S0_S7:
+ case RLISTENCODE::RA_S0_S8:
+ case RLISTENCODE::RA_S0_S9:
+ return 48;
+ case RLISTENCODE::RA_S0_S11:
+ return 64;
+ }
+ } else {
+ switch (RlistVal) {
+ case RLISTENCODE::RA:
+ case RLISTENCODE::RA_S0:
+ return 16;
+ case RLISTENCODE::RA_S0_S1:
+ case RLISTENCODE::RA_S0_S2:
+ return 32;
+ case RLISTENCODE::RA_S0_S3:
+ case RLISTENCODE::RA_S0_S4:
+ return 48;
+ case RLISTENCODE::RA_S0_S5:
+ case RLISTENCODE::RA_S0_S6:
+ return 64;
+ case RLISTENCODE::RA_S0_S7:
+ case RLISTENCODE::RA_S0_S8:
+ return 80;
+ case RLISTENCODE::RA_S0_S9:
+ return 96;
+ case RLISTENCODE::RA_S0_S11:
+ return 112;
+ }
+ }
+ llvm_unreachable("Unexpected RlistVal");
+}
+
+inline static bool getSpimm(unsigned RlistVal, unsigned &SpimmVal,
+ int64_t StackAdjustment, bool IsRV64, bool IsEABI) {
+ if (RlistVal == RLISTENCODE::INVALID_RLIST)
+ return false;
+ unsigned stackAdj = getStackAdjBase(RlistVal, IsRV64, IsEABI);
+ SpimmVal = (StackAdjustment - stackAdj) / 16;
+ if (SpimmVal > 3)
+ return false;
+ return true;
+}
+
+void printRlist(unsigned SlistEncode, raw_ostream &OS);
+void printSpimm(int64_t Spimm, raw_ostream &OS);
+} // namespace RISCVZC
+
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index b4f7e8658c73..db7dc1aed7fc 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVELFObjectWriter.cpp - RISCV ELF Writer -----------------------===//
+//===-- RISCVELFObjectWriter.cpp - RISC-V ELF Writer ----------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -13,6 +13,7 @@
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -57,11 +58,13 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
if (IsPCRel) {
switch (Kind) {
default:
- Ctx.reportError(Fixup.getLoc(), "Unsupported relocation type");
+ Ctx.reportError(Fixup.getLoc(), "unsupported relocation type");
return ELF::R_RISCV_NONE;
case FK_Data_4:
case FK_PCRel_4:
- return ELF::R_RISCV_32_PCREL;
+ return Target.getAccessVariant() == MCSymbolRefExpr::VK_PLT
+ ? ELF::R_RISCV_PLT32
+ : ELF::R_RISCV_32_PCREL;
case RISCV::fixup_riscv_pcrel_hi20:
return ELF::R_RISCV_PCREL_HI20;
case RISCV::fixup_riscv_pcrel_lo12_i:
@@ -107,7 +110,7 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
switch (Kind) {
default:
- Ctx.reportError(Fixup.getLoc(), "Unsupported relocation type");
+ Ctx.reportError(Fixup.getLoc(), "unsupported relocation type");
return ELF::R_RISCV_NONE;
case FK_Data_1:
Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported");
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index 379aaa713a00..e43cb8b40d83 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVELFStreamer.cpp - RISCV ELF Target Streamer Methods ----------===//
+//===-- RISCVELFStreamer.cpp - RISC-V ELF Target Streamer Methods ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file provides RISCV specific target streamer methods.
+// This file provides RISC-V specific target streamer methods.
//
//===----------------------------------------------------------------------===//
@@ -53,98 +53,28 @@ void RISCVTargetELFStreamer::emitDirectiveOptionRelax() {}
void RISCVTargetELFStreamer::emitDirectiveOptionNoRelax() {}
void RISCVTargetELFStreamer::emitAttribute(unsigned Attribute, unsigned Value) {
- setAttributeItem(Attribute, Value, /*OverwriteExisting=*/true);
+ getStreamer().setAttributeItem(Attribute, Value, /*OverwriteExisting=*/true);
}
void RISCVTargetELFStreamer::emitTextAttribute(unsigned Attribute,
StringRef String) {
- setAttributeItem(Attribute, String, /*OverwriteExisting=*/true);
+ getStreamer().setAttributeItem(Attribute, String, /*OverwriteExisting=*/true);
}
void RISCVTargetELFStreamer::emitIntTextAttribute(unsigned Attribute,
unsigned IntValue,
StringRef StringValue) {
- setAttributeItems(Attribute, IntValue, StringValue,
- /*OverwriteExisting=*/true);
+ getStreamer().setAttributeItems(Attribute, IntValue, StringValue,
+ /*OverwriteExisting=*/true);
}
void RISCVTargetELFStreamer::finishAttributeSection() {
- if (Contents.empty())
+ RISCVELFStreamer &S = getStreamer();
+ if (S.Contents.empty())
return;
- if (AttributeSection) {
- Streamer.switchSection(AttributeSection);
- } else {
- MCAssembler &MCA = getStreamer().getAssembler();
- AttributeSection = MCA.getContext().getELFSection(
- ".riscv.attributes", ELF::SHT_RISCV_ATTRIBUTES, 0);
- Streamer.switchSection(AttributeSection);
-
- Streamer.emitInt8(ELFAttrs::Format_Version);
- }
-
- // Vendor size + Vendor name + '\0'
- const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1;
-
- // Tag + Tag Size
- const size_t TagHeaderSize = 1 + 4;
-
- const size_t ContentsSize = calculateContentSize();
-
- Streamer.emitInt32(VendorHeaderSize + TagHeaderSize + ContentsSize);
- Streamer.emitBytes(CurrentVendor);
- Streamer.emitInt8(0); // '\0'
-
- Streamer.emitInt8(ELFAttrs::File);
- Streamer.emitInt32(TagHeaderSize + ContentsSize);
-
- // Size should have been accounted for already, now
- // emit each field as its type (ULEB or String).
- for (AttributeItem item : Contents) {
- Streamer.emitULEB128IntValue(item.Tag);
- switch (item.Type) {
- default:
- llvm_unreachable("Invalid attribute type");
- case AttributeType::Numeric:
- Streamer.emitULEB128IntValue(item.IntValue);
- break;
- case AttributeType::Text:
- Streamer.emitBytes(item.StringValue);
- Streamer.emitInt8(0); // '\0'
- break;
- case AttributeType::NumericAndText:
- Streamer.emitULEB128IntValue(item.IntValue);
- Streamer.emitBytes(item.StringValue);
- Streamer.emitInt8(0); // '\0'
- break;
- }
- }
-
- Contents.clear();
-}
-
-size_t RISCVTargetELFStreamer::calculateContentSize() const {
- size_t Result = 0;
- for (AttributeItem item : Contents) {
- switch (item.Type) {
- case AttributeType::Hidden:
- break;
- case AttributeType::Numeric:
- Result += getULEB128Size(item.Tag);
- Result += getULEB128Size(item.IntValue);
- break;
- case AttributeType::Text:
- Result += getULEB128Size(item.Tag);
- Result += item.StringValue.size() + 1; // string + '\0'
- break;
- case AttributeType::NumericAndText:
- Result += getULEB128Size(item.Tag);
- Result += getULEB128Size(item.IntValue);
- Result += item.StringValue.size() + 1; // string + '\0';
- break;
- }
- }
- return Result;
+ S.emitAttributesSection(CurrentVendor, ".riscv.attributes",
+ ELF::SHT_RISCV_ATTRIBUTES, AttributeSection);
}
void RISCVTargetELFStreamer::finish() {
@@ -173,6 +103,7 @@ void RISCVTargetELFStreamer::finish() {
EFlags |= ELF::EF_RISCV_FLOAT_ABI_DOUBLE;
break;
case RISCVABI::ABI_ILP32E:
+ case RISCVABI::ABI_LP64E:
EFlags |= ELF::EF_RISCV_RVE;
break;
case RISCVABI::ABI_Unknown:
@@ -184,7 +115,6 @@ void RISCVTargetELFStreamer::finish() {
void RISCVTargetELFStreamer::reset() {
AttributeSection = nullptr;
- Contents.clear();
}
void RISCVTargetELFStreamer::emitDirectiveVariantCC(MCSymbol &Symbol) {
@@ -192,86 +122,11 @@ void RISCVTargetELFStreamer::emitDirectiveVariantCC(MCSymbol &Symbol) {
cast<MCSymbolELF>(Symbol).setOther(ELF::STO_RISCV_VARIANT_CC);
}
-std::pair<unsigned, unsigned>
-RISCVELFStreamer::getRelocPairForSize(unsigned Size) {
- switch (Size) {
- default:
- llvm_unreachable("unsupported fixup size");
- case 1:
- return std::make_pair(RISCV::fixup_riscv_add_8, RISCV::fixup_riscv_sub_8);
- case 2:
- return std::make_pair(RISCV::fixup_riscv_add_16, RISCV::fixup_riscv_sub_16);
- case 4:
- return std::make_pair(RISCV::fixup_riscv_add_32, RISCV::fixup_riscv_sub_32);
- case 8:
- return std::make_pair(RISCV::fixup_riscv_add_64, RISCV::fixup_riscv_sub_64);
- }
-}
-
-bool RISCVELFStreamer::requiresFixups(MCContext &C, const MCExpr *Value,
- const MCExpr *&LHS, const MCExpr *&RHS) {
- const auto *MBE = dyn_cast<MCBinaryExpr>(Value);
- if (MBE == nullptr)
- return false;
-
- MCValue E;
- if (!Value->evaluateAsRelocatable(E, nullptr, nullptr))
- return false;
- if (E.getSymA() == nullptr || E.getSymB() == nullptr)
- return false;
-
- const auto &A = E.getSymA()->getSymbol();
- const auto &B = E.getSymB()->getSymbol();
-
- LHS = MCBinaryExpr::create(MCBinaryExpr::Add, MCSymbolRefExpr::create(&A, C),
- MCConstantExpr::create(E.getConstant(), C), C);
- RHS = E.getSymB();
-
- // If either symbol is in a text section, we need to delay the relocation
- // evaluation as relaxation may alter the size of the symbol.
- //
- // Unfortunately, we cannot identify if the symbol was built with relaxation
- // as we do not track the state per symbol or section. However, BFD will
- // always emit the relocation and so we follow suit which avoids the need to
- // track that information.
- if (A.isInSection() && A.getSection().getKind().isText())
- return true;
- if (B.isInSection() && B.getSection().getKind().isText())
- return true;
-
- // Support cross-section symbolic differences ...
- return A.isInSection() && B.isInSection() &&
- A.getSection().getName() != B.getSection().getName();
-}
-
void RISCVELFStreamer::reset() {
static_cast<RISCVTargetStreamer *>(getTargetStreamer())->reset();
MCELFStreamer::reset();
}
-void RISCVELFStreamer::emitValueImpl(const MCExpr *Value, unsigned Size,
- SMLoc Loc) {
- const MCExpr *A, *B;
- if (!requiresFixups(getContext(), Value, A, B))
- return MCELFStreamer::emitValueImpl(Value, Size, Loc);
-
- MCStreamer::emitValueImpl(Value, Size, Loc);
-
- MCDataFragment *DF = getOrCreateDataFragment();
- flushPendingLabels(DF, DF->getContents().size());
- MCDwarfLineEntry::make(this, getCurrentSectionOnly());
-
- unsigned Add, Sub;
- std::tie(Add, Sub) = getRelocPairForSize(Size);
-
- DF->getFixups().push_back(MCFixup::create(
- DF->getContents().size(), A, static_cast<MCFixupKind>(Add), Loc));
- DF->getFixups().push_back(MCFixup::create(
- DF->getContents().size(), B, static_cast<MCFixupKind>(Sub), Loc));
-
- DF->getContents().resize(DF->getContents().size() + Size, 0);
-}
-
namespace llvm {
MCELFStreamer *createRISCVELFStreamer(MCContext &C,
std::unique_ptr<MCAsmBackend> MAB,
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index 7331894f637d..e68f70261146 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -1,4 +1,4 @@
-//===-- RISCVELFStreamer.h - RISCV ELF Target Streamer ---------*- C++ -*--===//
+//===-- RISCVELFStreamer.h - RISC-V ELF Target Streamer ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,9 +15,6 @@
using namespace llvm;
class RISCVELFStreamer : public MCELFStreamer {
- static std::pair<unsigned, unsigned> getRelocPairForSize(unsigned Size);
- static bool requiresFixups(MCContext &C, const MCExpr *Value,
- const MCExpr *&LHS, const MCExpr *&RHS);
void reset() override;
public:
@@ -25,89 +22,22 @@ public:
std::unique_ptr<MCObjectWriter> MOW,
std::unique_ptr<MCCodeEmitter> MCE)
: MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {}
-
- void emitValueImpl(const MCExpr *Value, unsigned Size, SMLoc Loc) override;
};
namespace llvm {
class RISCVTargetELFStreamer : public RISCVTargetStreamer {
private:
- enum class AttributeType { Hidden, Numeric, Text, NumericAndText };
-
- struct AttributeItem {
- AttributeType Type;
- unsigned Tag;
- unsigned IntValue;
- std::string StringValue;
- };
-
StringRef CurrentVendor;
- SmallVector<AttributeItem, 64> Contents;
MCSection *AttributeSection = nullptr;
const MCSubtargetInfo &STI;
- AttributeItem *getAttributeItem(unsigned Attribute) {
- for (size_t i = 0; i < Contents.size(); ++i)
- if (Contents[i].Tag == Attribute)
- return &Contents[i];
- return nullptr;
- }
-
- void setAttributeItem(unsigned Attribute, unsigned Value,
- bool OverwriteExisting) {
- // Look for existing attribute item.
- if (AttributeItem *Item = getAttributeItem(Attribute)) {
- if (!OverwriteExisting)
- return;
- Item->Type = AttributeType::Numeric;
- Item->IntValue = Value;
- return;
- }
-
- // Create new attribute item.
- Contents.push_back({AttributeType::Numeric, Attribute, Value, ""});
- }
-
- void setAttributeItem(unsigned Attribute, StringRef Value,
- bool OverwriteExisting) {
- // Look for existing attribute item.
- if (AttributeItem *Item = getAttributeItem(Attribute)) {
- if (!OverwriteExisting)
- return;
- Item->Type = AttributeType::Text;
- Item->StringValue = std::string(Value);
- return;
- }
-
- // Create new attribute item.
- Contents.push_back({AttributeType::Text, Attribute, 0, std::string(Value)});
- }
-
- void setAttributeItems(unsigned Attribute, unsigned IntValue,
- StringRef StringValue, bool OverwriteExisting) {
- // Look for existing attribute item.
- if (AttributeItem *Item = getAttributeItem(Attribute)) {
- if (!OverwriteExisting)
- return;
- Item->Type = AttributeType::NumericAndText;
- Item->IntValue = IntValue;
- Item->StringValue = std::string(StringValue);
- return;
- }
-
- // Create new attribute item.
- Contents.push_back({AttributeType::NumericAndText, Attribute, IntValue,
- std::string(StringValue)});
- }
-
void emitAttribute(unsigned Attribute, unsigned Value) override;
void emitTextAttribute(unsigned Attribute, StringRef String) override;
void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
StringRef StringValue) override;
void finishAttributeSection() override;
- size_t calculateContentSize() const;
void reset() override;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
index 67841d2c8f8c..5727aab3cd4c 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVFixupKinds.h
@@ -10,6 +10,7 @@
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVFIXUPKINDS_H
#include "llvm/MC/MCFixup.h"
+#include <utility>
#undef RISCV
@@ -19,6 +20,8 @@ enum Fixups {
fixup_riscv_hi20 = FirstTargetFixupKind,
// 12-bit fixup corresponding to %lo(foo) for instructions like addi
fixup_riscv_lo12_i,
+ // 12-bit fixup corresponding to foo-bar for instructions like addi
+ fixup_riscv_12_i,
// 12-bit fixup corresponding to %lo(foo) for the S-type store instructions
fixup_riscv_lo12_s,
// 20-bit fixup corresponding to %pcrel_hi(foo) for instructions like auipc
@@ -108,6 +111,27 @@ enum Fixups {
fixup_riscv_invalid,
NumTargetFixupKinds = fixup_riscv_invalid - FirstTargetFixupKind
};
+
+static inline std::pair<MCFixupKind, MCFixupKind>
+getRelocPairForSize(unsigned Size) {
+ switch (Size) {
+ default:
+ llvm_unreachable("unsupported fixup size");
+ case 1:
+ return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_8),
+ MCFixupKind(RISCV::fixup_riscv_sub_8));
+ case 2:
+ return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_16),
+ MCFixupKind(RISCV::fixup_riscv_sub_16));
+ case 4:
+ return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_32),
+ MCFixupKind(RISCV::fixup_riscv_sub_32));
+ case 8:
+ return std::make_pair(MCFixupKind(RISCV::fixup_riscv_add_64),
+ MCFixupKind(RISCV::fixup_riscv_sub_64));
+ }
+}
+
} // end namespace llvm::RISCV
#endif
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index a4fbba7ae1e9..8e98abd65aab 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVInstPrinter.cpp - Convert RISCV MCInst to asm syntax ---------===//
+//===-- RISCVInstPrinter.cpp - Convert RISC-V MCInst to asm syntax --------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This class prints an RISCV MCInst to a .s file.
+// This class prints an RISC-V MCInst to a .s file.
//
//===----------------------------------------------------------------------===//
@@ -120,8 +120,11 @@ void RISCVInstPrinter::printCSRSystemRegister(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
+ auto SiFiveReg = RISCVSysReg::lookupSiFiveRegByEncoding(Imm);
auto SysReg = RISCVSysReg::lookupSysRegByEncoding(Imm);
- if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits()))
+ if (SiFiveReg && SiFiveReg->haveVendorRequiredFeatures(STI.getFeatureBits()))
+ O << SiFiveReg->Name;
+ else if (SysReg && SysReg->haveRequiredFeatures(STI.getFeatureBits()))
O << SysReg->Name;
else
O << Imm;
@@ -149,7 +152,32 @@ void RISCVInstPrinter::printFRMArg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
auto FRMArg =
static_cast<RISCVFPRndMode::RoundingMode>(MI->getOperand(OpNo).getImm());
- O << RISCVFPRndMode::roundingModeToString(FRMArg);
+ if (PrintAliases && !NoAliases && FRMArg == RISCVFPRndMode::RoundingMode::DYN)
+ return;
+ O << ", " << RISCVFPRndMode::roundingModeToString(FRMArg);
+}
+
+void RISCVInstPrinter::printFPImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI,
+ raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ if (Imm == 1) {
+ O << "min";
+ } else if (Imm == 30) {
+ O << "inf";
+ } else if (Imm == 31) {
+ O << "nan";
+ } else {
+ float FPVal = RISCVLoadFPImm::getFPImm(Imm);
+ // If the value is an integer, print a .0 fraction. Otherwise, use %g to
+ // which will not print trailing zeros and will use scientific notation
+ // if it is shorter than printing as a decimal. The smallest value requires
+ // 12 digits of precision including the decimal.
+ if (FPVal == (int)(FPVal))
+ O << format("%.1f", FPVal);
+ else
+ O << format("%.12g", FPVal);
+ }
}
void RISCVInstPrinter::printZeroOffsetMemOp(const MCInst *MI, unsigned OpNo,
@@ -177,6 +205,60 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
RISCVVType::printVType(Imm, O);
}
+void RISCVInstPrinter::printRlist(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ unsigned Imm = MI->getOperand(OpNo).getImm();
+ O << "{";
+ switch (Imm) {
+ case RISCVZC::RLISTENCODE::RA:
+ O << (ArchRegNames ? "x1" : "ra");
+ break;
+ case RISCVZC::RLISTENCODE::RA_S0:
+ O << (ArchRegNames ? "x1, x8" : "ra, s0");
+ break;
+ case RISCVZC::RLISTENCODE::RA_S0_S1:
+ O << (ArchRegNames ? "x1, x8-x9" : "ra, s0-s1");
+ break;
+ case RISCVZC::RLISTENCODE::RA_S0_S2:
+ O << (ArchRegNames ? "x1, x8-x9, x18" : "ra, s0-s2");
+ break;
+ case RISCVZC::RLISTENCODE::RA_S0_S3:
+ case RISCVZC::RLISTENCODE::RA_S0_S4:
+ case RISCVZC::RLISTENCODE::RA_S0_S5:
+ case RISCVZC::RLISTENCODE::RA_S0_S6:
+ case RISCVZC::RLISTENCODE::RA_S0_S7:
+ case RISCVZC::RLISTENCODE::RA_S0_S8:
+ case RISCVZC::RLISTENCODE::RA_S0_S9:
+ O << (ArchRegNames ? "x1, x8-x9, x18-" : "ra, s0-")
+ << getRegisterName(RISCV::X19 + (Imm - RISCVZC::RLISTENCODE::RA_S0_S3));
+ break;
+ case RISCVZC::RLISTENCODE::RA_S0_S11:
+ O << (ArchRegNames ? "x1, x8-x9, x18-x27" : "ra, s0-s11");
+ break;
+ default:
+ llvm_unreachable("invalid register list");
+ }
+ O << "}";
+}
+
+void RISCVInstPrinter::printSpimm(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O) {
+ int64_t Imm = MI->getOperand(OpNo).getImm();
+ unsigned Opcode = MI->getOpcode();
+ bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
+ bool IsEABI = STI.hasFeature(RISCV::FeatureRVE);
+ int64_t Spimm = 0;
+ auto RlistVal = MI->getOperand(0).getImm();
+ assert(RlistVal != 16 && "Incorrect rlist.");
+ auto Base = RISCVZC::getStackAdjBase(RlistVal, IsRV64, IsEABI);
+ Spimm = Imm + Base;
+ assert((Spimm >= Base && Spimm <= Base + 48) && "Incorrect spimm");
+ if (Opcode == RISCV::CM_PUSH)
+ Spimm = -Spimm;
+
+ RISCVZC::printSpimm(Spimm, O);
+}
+
void RISCVInstPrinter::printVMaskReg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
index d7d93842e80c..20f12af13008 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.h
@@ -1,4 +1,4 @@
-//===-- RISCVInstPrinter.h - Convert RISCV MCInst to asm syntax ---*- C++ -*--//
+//===-- RISCVInstPrinter.h - Convert RISC-V MCInst to asm syntax --*- C++ -*--//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This class prints a RISCV MCInst to a .s file.
+// This class prints a RISC-V MCInst to a .s file.
//
//===----------------------------------------------------------------------===//
@@ -40,12 +40,18 @@ public:
const MCSubtargetInfo &STI, raw_ostream &O);
void printFRMArg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
+ void printFPImmOperand(const MCInst *MI, unsigned OpNo,
+ const MCSubtargetInfo &STI, raw_ostream &O);
void printZeroOffsetMemOp(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
void printVTypeI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
raw_ostream &O);
void printVMaskReg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O);
+ void printRlist(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
+ void printSpimm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+ raw_ostream &O);
// Autogenerated by tblgen.
std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
index 089a2def4c21..7b927522d395 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVMCAsmInfo.cpp - RISCV Asm properties -------------------------===//
+//===-- RISCVMCAsmInfo.cpp - RISC-V Asm properties ------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -12,9 +12,9 @@
#include "RISCVMCAsmInfo.h"
#include "MCTargetDesc/RISCVMCExpr.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
void RISCVMCAsmInfo::anchor() {}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
index 6824baf699aa..bceeb1256471 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
@@ -1,4 +1,4 @@
-//===-- RISCVMCAsmInfo.h - RISCV Asm Info ----------------------*- C++ -*--===//
+//===-- RISCVMCAsmInfo.h - RISC-V Asm Info ---------------------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index a335b2d23394..b63a5cea823e 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVMCCodeEmitter.cpp - Convert RISCV code to machine code -------===//
+//===-- RISCVMCCodeEmitter.cpp - Convert RISC-V code to machine code ------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -49,18 +49,22 @@ public:
~RISCVMCCodeEmitter() override = default;
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
- void expandFunctionCall(const MCInst &MI, raw_ostream &OS,
+ void expandFunctionCall(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- void expandAddTPRel(const MCInst &MI, raw_ostream &OS,
+ void expandAddTPRel(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ void expandLongCondBr(const MCInst &MI, SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+
/// TableGen'erated function for getting the binary encoding for an
/// instruction.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
@@ -84,6 +88,10 @@ public:
unsigned getVMaskReg(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+
+ unsigned getRlistOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
};
} // end anonymous namespace
@@ -94,13 +102,14 @@ MCCodeEmitter *llvm::createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
// Expand PseudoCALL(Reg), PseudoTAIL and PseudoJump to AUIPC and JALR with
// relocation types. We expand those pseudo-instructions while encoding them,
-// meaning AUIPC and JALR won't go through RISCV MC to MC compressed
+// meaning AUIPC and JALR won't go through RISC-V MC to MC compressed
// instruction transformation. This is acceptable because AUIPC has no 16-bit
// form and C_JALR has no immediate operand field. We let linker relaxation
// deal with it. When linker relaxation is enabled, AUIPC and JALR have a
// chance to relax to JAL.
// If the C extension is enabled, JAL has a chance relax to C_JAL.
-void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS,
+void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
MCInst TmpInst;
@@ -128,7 +137,7 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS,
// Emit AUIPC Ra, Func with R_RISCV_CALL relocation type.
TmpInst = MCInstBuilder(RISCV::AUIPC).addReg(Ra).addExpr(CallExpr);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(OS, Binary, support::little);
+ support::endian::write(CB, Binary, support::little);
if (MI.getOpcode() == RISCV::PseudoTAIL ||
MI.getOpcode() == RISCV::PseudoJump)
@@ -138,11 +147,12 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI, raw_ostream &OS,
// Emit JALR Ra, Ra, 0
TmpInst = MCInstBuilder(RISCV::JALR).addReg(Ra).addReg(Ra).addImm(0);
Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(OS, Binary, support::little);
+ support::endian::write(CB, Binary, support::little);
}
// Expand PseudoAddTPRel to a simple ADD with the correct relocation.
-void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, raw_ostream &OS,
+void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
MCOperand DestReg = MI.getOperand(0);
@@ -164,7 +174,7 @@ void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, raw_ostream &OS,
0, Expr, MCFixupKind(RISCV::fixup_riscv_tprel_add), MI.getLoc()));
// Emit fixup_riscv_relax for tprel_add where the relax feature is enabled.
- if (STI.getFeatureBits()[RISCV::FeatureRelax]) {
+ if (STI.hasFeature(RISCV::FeatureRelax)) {
const MCConstantExpr *Dummy = MCConstantExpr::create(0, Ctx);
Fixups.push_back(MCFixup::create(
0, Dummy, MCFixupKind(RISCV::fixup_riscv_relax), MI.getLoc()));
@@ -176,10 +186,87 @@ void RISCVMCCodeEmitter::expandAddTPRel(const MCInst &MI, raw_ostream &OS,
.addOperand(SrcReg)
.addOperand(TPReg);
uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
- support::endian::write(OS, Binary, support::little);
+ support::endian::write(CB, Binary, support::little);
}
-void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+static unsigned getInvertedBranchOp(unsigned BrOp) {
+ switch (BrOp) {
+ default:
+ llvm_unreachable("Unexpected branch opcode!");
+ case RISCV::PseudoLongBEQ:
+ return RISCV::BNE;
+ case RISCV::PseudoLongBNE:
+ return RISCV::BEQ;
+ case RISCV::PseudoLongBLT:
+ return RISCV::BGE;
+ case RISCV::PseudoLongBGE:
+ return RISCV::BLT;
+ case RISCV::PseudoLongBLTU:
+ return RISCV::BGEU;
+ case RISCV::PseudoLongBGEU:
+ return RISCV::BLTU;
+ }
+}
+
+// Expand PseudoLongBxx to an inverted conditional branch and an unconditional
+// jump.
+void RISCVMCCodeEmitter::expandLongCondBr(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ MCRegister SrcReg1 = MI.getOperand(0).getReg();
+ MCRegister SrcReg2 = MI.getOperand(1).getReg();
+ MCOperand SrcSymbol = MI.getOperand(2);
+ unsigned Opcode = MI.getOpcode();
+ bool IsEqTest =
+ Opcode == RISCV::PseudoLongBNE || Opcode == RISCV::PseudoLongBEQ;
+
+ bool UseCompressedBr = false;
+ if (IsEqTest && (STI.hasFeature(RISCV::FeatureStdExtC) ||
+ STI.hasFeature(RISCV::FeatureStdExtZca))) {
+ if (RISCV::X8 <= SrcReg1.id() && SrcReg1.id() <= RISCV::X15 &&
+ SrcReg2.id() == RISCV::X0) {
+ UseCompressedBr = true;
+ } else if (RISCV::X8 <= SrcReg2.id() && SrcReg2.id() <= RISCV::X15 &&
+ SrcReg1.id() == RISCV::X0) {
+ std::swap(SrcReg1, SrcReg2);
+ UseCompressedBr = true;
+ }
+ }
+
+ uint32_t Offset;
+ if (UseCompressedBr) {
+ unsigned InvOpc =
+ Opcode == RISCV::PseudoLongBNE ? RISCV::C_BEQZ : RISCV::C_BNEZ;
+ MCInst TmpInst = MCInstBuilder(InvOpc).addReg(SrcReg1).addImm(6);
+ uint16_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+ support::endian::write<uint16_t>(CB, Binary, support::little);
+ Offset = 2;
+ } else {
+ unsigned InvOpc = getInvertedBranchOp(Opcode);
+ MCInst TmpInst =
+ MCInstBuilder(InvOpc).addReg(SrcReg1).addReg(SrcReg2).addImm(8);
+ uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+ support::endian::write(CB, Binary, support::little);
+ Offset = 4;
+ }
+
+ // Emit an unconditional jump to the destination.
+ MCInst TmpInst =
+ MCInstBuilder(RISCV::JAL).addReg(RISCV::X0).addOperand(SrcSymbol);
+ uint32_t Binary = getBinaryCodeForInstr(TmpInst, Fixups, STI);
+ support::endian::write(CB, Binary, support::little);
+
+ Fixups.clear();
+ if (SrcSymbol.isExpr()) {
+ Fixups.push_back(MCFixup::create(Offset, SrcSymbol.getExpr(),
+ MCFixupKind(RISCV::fixup_riscv_jal),
+ MI.getLoc()));
+ }
+}
+
+void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
@@ -189,19 +276,29 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
// RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
// expanded instructions for each pseudo is correct in the Size field of the
// tablegen definition for the pseudo.
- if (MI.getOpcode() == RISCV::PseudoCALLReg ||
- MI.getOpcode() == RISCV::PseudoCALL ||
- MI.getOpcode() == RISCV::PseudoTAIL ||
- MI.getOpcode() == RISCV::PseudoJump) {
- expandFunctionCall(MI, OS, Fixups, STI);
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case RISCV::PseudoCALLReg:
+ case RISCV::PseudoCALL:
+ case RISCV::PseudoTAIL:
+ case RISCV::PseudoJump:
+ expandFunctionCall(MI, CB, Fixups, STI);
MCNumEmitted += 2;
return;
- }
-
- if (MI.getOpcode() == RISCV::PseudoAddTPRel) {
- expandAddTPRel(MI, OS, Fixups, STI);
+ case RISCV::PseudoAddTPRel:
+ expandAddTPRel(MI, CB, Fixups, STI);
MCNumEmitted += 1;
return;
+ case RISCV::PseudoLongBEQ:
+ case RISCV::PseudoLongBNE:
+ case RISCV::PseudoLongBLT:
+ case RISCV::PseudoLongBGE:
+ case RISCV::PseudoLongBLTU:
+ case RISCV::PseudoLongBGEU:
+ expandLongCondBr(MI, CB, Fixups, STI);
+ MCNumEmitted += 2;
+ return;
}
switch (Size) {
@@ -209,12 +306,12 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
llvm_unreachable("Unhandled encodeInstruction length!");
case 2: {
uint16_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
- support::endian::write<uint16_t>(OS, Bits, support::little);
+ support::endian::write<uint16_t>(CB, Bits, support::little);
break;
}
case 4: {
uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
- support::endian::write(OS, Bits, support::little);
+ support::endian::write(CB, Bits, support::little);
break;
}
}
@@ -255,7 +352,7 @@ RISCVMCCodeEmitter::getImmOpValueAsr1(const MCInst &MI, unsigned OpNo,
unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- bool EnableRelax = STI.getFeatureBits()[RISCV::FeatureRelax];
+ bool EnableRelax = STI.hasFeature(RISCV::FeatureRelax);
const MCOperand &MO = MI.getOperand(OpNo);
MCInstrDesc const &Desc = MCII.get(MI.getOpcode());
@@ -355,6 +452,8 @@ unsigned RISCVMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNo,
FixupKind = RISCV::fixup_riscv_rvc_jump;
} else if (MIFrm == RISCVII::InstFormatCB) {
FixupKind = RISCV::fixup_riscv_rvc_branch;
+ } else if (MIFrm == RISCVII::InstFormatI) {
+ FixupKind = RISCV::fixup_riscv_12_i;
}
}
@@ -394,4 +493,14 @@ unsigned RISCVMCCodeEmitter::getVMaskReg(const MCInst &MI, unsigned OpNo,
}
}
+unsigned RISCVMCCodeEmitter::getRlistOpValue(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ assert(MO.isImm() && "Rlist operand must be immediate");
+ auto Imm = MO.getImm();
+ assert(Imm >= 4 && "EABI is currently not implemented");
+ return Imm;
+}
+
#include "RISCVGenMCCodeEmitter.inc"
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
index 336289cf107b..d67351102bc1 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVMCExpr.cpp - RISCV specific MC expression classes ------------===//
+//===-- RISCVMCExpr.cpp - RISC-V specific MC expression classes -----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
//
// This file contains the implementation of the assembly expression modifiers
-// accepted by the RISCV architecture (e.g. ":lo12:", ":gottprel_g1:", ...).
+// accepted by the RISC-V architecture (e.g. ":lo12:", ":gottprel_g1:", ...).
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
index 2e752c138ecf..ee83bf0208ef 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCExpr.h
@@ -1,4 +1,4 @@
-//===-- RISCVMCExpr.h - RISCV specific MC expression classes ----*- C++ -*-===//
+//===-- RISCVMCExpr.h - RISC-V specific MC expression classes----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file describes RISCV-specific MCExprs, used for modifiers like
+// This file describes RISC-V specific MCExprs, used for modifiers like
// "%hi" or "%lo" etc.,
//
//===----------------------------------------------------------------------===//
@@ -80,8 +80,6 @@ public:
return E->getKind() == MCExpr::Target;
}
- static bool classof(const RISCVMCExpr *) { return true; }
-
static VariantKind getVariantKindForName(StringRef name);
static StringRef getVariantKindName(VariantKind Kind);
};
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp
index ef2ec87338d7..ac7d3b785ab1 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVMCObjectFileInfo.cpp - RISCV object file properties ----------===//
+//===-- RISCVMCObjectFileInfo.cpp - RISC-V object file properties ---------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -17,10 +17,13 @@
using namespace llvm;
+unsigned
+RISCVMCObjectFileInfo::getTextSectionAlignment(const MCSubtargetInfo &STI) {
+ bool RVC = STI.hasFeature(RISCV::FeatureStdExtC) ||
+ STI.hasFeature(RISCV::FeatureStdExtZca);
+ return RVC ? 2 : 4;
+}
+
unsigned RISCVMCObjectFileInfo::getTextSectionAlignment() const {
- const MCSubtargetInfo *STI = getContext().getSubtargetInfo();
- return (STI->hasFeature(RISCV::FeatureStdExtC) ||
- STI->hasFeature(RISCV::FeatureExtZca))
- ? 2
- : 4;
+ return getTextSectionAlignment(*getContext().getSubtargetInfo());
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h
index 2f6b10229864..c2ef160c5107 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCObjectFileInfo.h
@@ -1,4 +1,4 @@
-//===-- RISCVMCObjectFileInfo.h - RISCV object file Info -------*- C++ -*--===//
+//===-- RISCVMCObjectFileInfo.h - RISC-V object file Info ------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -14,11 +14,13 @@
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCOBJECTFILEINFO_H
#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
class RISCVMCObjectFileInfo : public MCObjectFileInfo {
public:
+ static unsigned getTextSectionAlignment(const MCSubtargetInfo &STI);
unsigned getTextSectionAlignment() const override;
};
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
index c63e0c8e737d..75af5c2de094 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVMCTargetDesc.cpp - RISCV Target Descriptions -----------------===//
+//===-- RISCVMCTargetDesc.cpp - RISC-V Target Descriptions ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
///
-/// This file provides RISCV-specific target descriptions.
+/// This file provides RISC-V specific target descriptions.
///
//===----------------------------------------------------------------------===//
@@ -142,6 +142,96 @@ public:
return false;
}
+
+ bool isTerminator(const MCInst &Inst) const override {
+ if (MCInstrAnalysis::isTerminator(Inst))
+ return true;
+
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case RISCV::JAL:
+ case RISCV::JALR:
+ return Inst.getOperand(0).getReg() == RISCV::X0;
+ }
+ }
+
+ bool isCall(const MCInst &Inst) const override {
+ if (MCInstrAnalysis::isCall(Inst))
+ return true;
+
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case RISCV::JAL:
+ case RISCV::JALR:
+ return Inst.getOperand(0).getReg() != RISCV::X0;
+ }
+ }
+
+ bool isReturn(const MCInst &Inst) const override {
+ if (MCInstrAnalysis::isReturn(Inst))
+ return true;
+
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case RISCV::JALR:
+ return Inst.getOperand(0).getReg() == RISCV::X0 &&
+ maybeReturnAddress(Inst.getOperand(1).getReg());
+ case RISCV::C_JR:
+ return maybeReturnAddress(Inst.getOperand(0).getReg());
+ }
+ }
+
+ bool isBranch(const MCInst &Inst) const override {
+ if (MCInstrAnalysis::isBranch(Inst))
+ return true;
+
+ return isBranchImpl(Inst);
+ }
+
+ bool isUnconditionalBranch(const MCInst &Inst) const override {
+ if (MCInstrAnalysis::isUnconditionalBranch(Inst))
+ return true;
+
+ return isBranchImpl(Inst);
+ }
+
+ bool isIndirectBranch(const MCInst &Inst) const override {
+ if (MCInstrAnalysis::isIndirectBranch(Inst))
+ return true;
+
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case RISCV::JALR:
+ return Inst.getOperand(0).getReg() == RISCV::X0 &&
+ !maybeReturnAddress(Inst.getOperand(1).getReg());
+ case RISCV::C_JR:
+ return !maybeReturnAddress(Inst.getOperand(0).getReg());
+ }
+ }
+
+private:
+ static bool maybeReturnAddress(unsigned Reg) {
+ // X1 is used for normal returns, X5 for returns from outlined functions.
+ return Reg == RISCV::X1 || Reg == RISCV::X5;
+ }
+
+ static bool isBranchImpl(const MCInst &Inst) {
+ switch (Inst.getOpcode()) {
+ default:
+ return false;
+ case RISCV::JAL:
+ return Inst.getOperand(0).getReg() == RISCV::X0;
+ case RISCV::JALR:
+ return Inst.getOperand(0).getReg() == RISCV::X0 &&
+ !maybeReturnAddress(Inst.getOperand(1).getReg());
+ case RISCV::C_JR:
+ return !maybeReturnAddress(Inst.getOperand(0).getReg());
+ }
+ }
};
} // end anonymous namespace
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
index d157257d976c..3cfddb530cdf 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
@@ -1,4 +1,4 @@
-//===-- RISCVMCTargetDesc.h - RISCV Target Descriptions ---------*- C++ -*-===//
+//===-- RISCVMCTargetDesc.h - RISC-V Target Descriptions --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file provides RISCV specific target descriptions.
+// This file provides RISC-V specific target descriptions.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index 0c2bf9dad795..f659779e9772 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -155,15 +155,15 @@ static void generateInstSeqImpl(int64_t Val,
static unsigned extractRotateInfo(int64_t Val) {
// for case: 0b111..1..xxxxxx1..1..
- unsigned LeadingOnes = countLeadingOnes((uint64_t)Val);
- unsigned TrailingOnes = countTrailingOnes((uint64_t)Val);
+ unsigned LeadingOnes = llvm::countl_one((uint64_t)Val);
+ unsigned TrailingOnes = llvm::countr_one((uint64_t)Val);
if (TrailingOnes > 0 && TrailingOnes < 64 &&
(LeadingOnes + TrailingOnes) > (64 - 12))
return 64 - TrailingOnes;
// for case: 0bxxx1..1..1...xxx
- unsigned UpperTrailingOnes = countTrailingOnes(Hi_32(Val));
- unsigned LowerLeadingOnes = countLeadingOnes(Lo_32(Val));
+ unsigned UpperTrailingOnes = llvm::countr_one(Hi_32(Val));
+ unsigned LowerLeadingOnes = llvm::countl_one(Lo_32(Val));
if (UpperTrailingOnes < 32 &&
(UpperTrailingOnes + LowerLeadingOnes) > (64 - 12))
return 32 - UpperTrailingOnes;
@@ -180,7 +180,7 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
// or ADDIW. If there are trailing zeros, try generating a sign extended
// constant with no trailing zeros and use a final SLLI to restore them.
if ((Val & 0xfff) != 0 && (Val & 1) == 0 && Res.size() >= 2) {
- unsigned TrailingZeros = countTrailingZeros((uint64_t)Val);
+ unsigned TrailingZeros = llvm::countr_zero((uint64_t)Val);
int64_t ShiftedVal = Val >> TrailingZeros;
// If we can use C.LI+C.SLLI instead of LUI+ADDI(W) prefer that since
// its more compressible. But only if LUI+ADDI(W) isn't fusable.
@@ -190,19 +190,27 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
isInt<6>(ShiftedVal) && !ActiveFeatures[RISCV::TuneLUIADDIFusion];
RISCVMatInt::InstSeq TmpSeq;
generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
- TmpSeq.emplace_back(RISCV::SLLI, TrailingZeros);
// Keep the new sequence if it is an improvement.
- if (TmpSeq.size() < Res.size() || IsShiftedCompressible)
+ if ((TmpSeq.size() + 1) < Res.size() || IsShiftedCompressible) {
+ TmpSeq.emplace_back(RISCV::SLLI, TrailingZeros);
Res = TmpSeq;
+ }
}
+ // If we have a 1 or 2 instruction sequence this is the best we can do. This
+ // will always be true for RV32 and will often be true for RV64.
+ if (Res.size() <= 2)
+ return Res;
+
+ assert(ActiveFeatures[RISCV::Feature64Bit] &&
+ "Expected RV32 to only need 2 instructions");
+
// If the constant is positive we might be able to generate a shifted constant
// with no leading zeros and use a final SRLI to restore them.
- if (Val > 0 && Res.size() > 2) {
- assert(ActiveFeatures[RISCV::Feature64Bit] &&
- "Expected RV32 to only need 2 instructions");
- unsigned LeadingZeros = countLeadingZeros((uint64_t)Val);
+ if (Val > 0) {
+ assert(Res.size() > 2 && "Expected longer sequence");
+ unsigned LeadingZeros = llvm::countl_zero((uint64_t)Val);
uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
// Fill in the bits that will be shifted out with 1s. An example where this
// helps is trailing one masks with 32 or more ones. This will generate
@@ -211,21 +219,23 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
RISCVMatInt::InstSeq TmpSeq;
generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
- TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
// Keep the new sequence if it is an improvement.
- if (TmpSeq.size() < Res.size())
+ if ((TmpSeq.size() + 1) < Res.size()) {
+ TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
Res = TmpSeq;
+ }
// Some cases can benefit from filling the lower bits with zeros instead.
ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
TmpSeq.clear();
generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
- TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
// Keep the new sequence if it is an improvement.
- if (TmpSeq.size() < Res.size())
+ if ((TmpSeq.size() + 1) < Res.size()) {
+ TmpSeq.emplace_back(RISCV::SRLI, LeadingZeros);
Res = TmpSeq;
+ }
// If we have exactly 32 leading zeros and Zba, we can try using zext.w at
// the end of the sequence.
@@ -234,19 +244,33 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
TmpSeq.clear();
generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
- TmpSeq.emplace_back(RISCV::ADD_UW, 0);
// Keep the new sequence if it is an improvement.
- if (TmpSeq.size() < Res.size())
+ if ((TmpSeq.size() + 1) < Res.size()) {
+ TmpSeq.emplace_back(RISCV::ADD_UW, 0);
+ Res = TmpSeq;
+ }
+ }
+ }
+
+ // If the Low and High halves are the same, use pack. The pack instruction
+ // packs the XLEN/2-bit lower halves of rs1 and rs2 into rd, with rs1 in the
+ // lower half and rs2 in the upper half.
+ if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbkb]) {
+ int64_t LoVal = SignExtend64<32>(Val);
+ int64_t HiVal = SignExtend64<32>(Val >> 32);
+ if (LoVal == HiVal) {
+ RISCVMatInt::InstSeq TmpSeq;
+ generateInstSeqImpl(LoVal, ActiveFeatures, TmpSeq);
+ if ((TmpSeq.size() + 1) < Res.size()) {
+ TmpSeq.emplace_back(RISCV::PACK, 0);
Res = TmpSeq;
+ }
}
}
// Perform optimization with BCLRI/BSETI in the Zbs extension.
if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbs]) {
- assert(ActiveFeatures[RISCV::Feature64Bit] &&
- "Expected RV32 to only need 2 instructions");
-
// 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000,
// call generateInstSeqImpl with Val|0x80000000 (which is expected be
// an int32), then emit (BCLRI r, 31).
@@ -265,9 +289,10 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
if (isInt<32>(NewVal)) {
RISCVMatInt::InstSeq TmpSeq;
generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq);
- TmpSeq.emplace_back(Opc, 31);
- if (TmpSeq.size() < Res.size())
+ if ((TmpSeq.size() + 1) < Res.size()) {
+ TmpSeq.emplace_back(Opc, 31);
Res = TmpSeq;
+ }
}
// Try to use BCLRI for upper 32 bits if the original lower 32 bits are
@@ -299,8 +324,6 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
// Perform optimization with SH*ADD in the Zba extension.
if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
- assert(ActiveFeatures[RISCV::Feature64Bit] &&
- "Expected RV32 to only need 2 instructions");
int64_t Div = 0;
unsigned Opc = 0;
RISCVMatInt::InstSeq TmpSeq;
@@ -318,9 +341,10 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
// Build the new instruction sequence.
if (Div > 0) {
generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq);
- TmpSeq.emplace_back(Opc, 0);
- if (TmpSeq.size() < Res.size())
+ if ((TmpSeq.size() + 1) < Res.size()) {
+ TmpSeq.emplace_back(Opc, 0);
Res = TmpSeq;
+ }
} else {
// Try to use LUI+SH*ADD+ADDI.
int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull;
@@ -344,23 +368,28 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
"unexpected instruction sequence for immediate materialisation");
assert(TmpSeq.empty() && "Expected empty TmpSeq");
generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
- TmpSeq.emplace_back(Opc, 0);
- TmpSeq.emplace_back(RISCV::ADDI, Lo12);
- if (TmpSeq.size() < Res.size())
+ if ((TmpSeq.size() + 2) < Res.size()) {
+ TmpSeq.emplace_back(Opc, 0);
+ TmpSeq.emplace_back(RISCV::ADDI, Lo12);
Res = TmpSeq;
+ }
}
}
}
- // Perform optimization with rori in the Zbb extension.
- if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbb]) {
+ // Perform optimization with rori in the Zbb and th.srri in the XTheadBb
+ // extension.
+ if (Res.size() > 2 && (ActiveFeatures[RISCV::FeatureStdExtZbb] ||
+ ActiveFeatures[RISCV::FeatureVendorXTHeadBb])) {
if (unsigned Rotate = extractRotateInfo(Val)) {
RISCVMatInt::InstSeq TmpSeq;
- uint64_t NegImm12 =
- ((uint64_t)Val >> (64 - Rotate)) | ((uint64_t)Val << Rotate);
+ uint64_t NegImm12 = llvm::rotl<uint64_t>(Val, Rotate);
assert(isInt<12>(NegImm12));
TmpSeq.emplace_back(RISCV::ADDI, NegImm12);
- TmpSeq.emplace_back(RISCV::RORI, Rotate);
+ TmpSeq.emplace_back(ActiveFeatures[RISCV::FeatureStdExtZbb]
+ ? RISCV::RORI
+ : RISCV::TH_SRRI,
+ Rotate);
Res = TmpSeq;
}
}
@@ -371,7 +400,7 @@ int getIntMatCost(const APInt &Val, unsigned Size,
const FeatureBitset &ActiveFeatures, bool CompressionCost) {
bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
bool HasRVC = CompressionCost && (ActiveFeatures[RISCV::FeatureStdExtC] ||
- ActiveFeatures[RISCV::FeatureExtZca]);
+ ActiveFeatures[RISCV::FeatureStdExtZca]);
int PlatRegSize = IsRV64 ? 64 : 32;
// Split the constant into platform register sized chunks, and calculate cost
@@ -396,6 +425,7 @@ OpndKind Inst::getOpndKind() const {
case RISCV::SH1ADD:
case RISCV::SH2ADD:
case RISCV::SH3ADD:
+ case RISCV::PACK:
return RISCVMatInt::RegReg;
case RISCV::ADDI:
case RISCV::ADDIW:
@@ -405,6 +435,7 @@ OpndKind Inst::getOpndKind() const {
case RISCV::RORI:
case RISCV::BSETI:
case RISCV::BCLRI:
+ case RISCV::TH_SRRI:
return RISCVMatInt::RegImm;
}
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
index 8d71e0a22350..ae7b8d402184 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
@@ -10,7 +10,7 @@
#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_MATINT_H
#include "llvm/ADT/SmallVector.h"
-#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <cstdint>
namespace llvm {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index 756cc14a8701..29ffc3224b52 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVTargetStreamer.cpp - RISCV Target Streamer Methods -----------===//
+//===-- RISCVTargetStreamer.cpp - RISC-V Target Streamer Methods ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file provides RISCV specific target streamer methods.
+// This file provides RISC-V specific target streamer methods.
//
//===----------------------------------------------------------------------===//
@@ -33,6 +33,8 @@ void RISCVTargetStreamer::emitDirectiveOptionRVC() {}
void RISCVTargetStreamer::emitDirectiveOptionNoRVC() {}
void RISCVTargetStreamer::emitDirectiveOptionRelax() {}
void RISCVTargetStreamer::emitDirectiveOptionNoRelax() {}
+void RISCVTargetStreamer::emitDirectiveOptionArch(
+ ArrayRef<RISCVOptionArchArg> Args) {}
void RISCVTargetStreamer::emitDirectiveVariantCC(MCSymbol &Symbol) {}
void RISCVTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {}
void RISCVTargetStreamer::finishAttributeSection() {}
@@ -46,10 +48,12 @@ void RISCVTargetStreamer::setTargetABI(RISCVABI::ABI ABI) {
TargetABI = ABI;
}
-void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) {
- if (STI.hasFeature(RISCV::FeatureRV32E))
- emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_4);
- else
+void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI,
+ bool EmitStackAlign) {
+ if (STI.hasFeature(RISCV::FeatureRVE))
+ report_fatal_error("Codegen not yet implemented for RVE");
+
+ if (EmitStackAlign)
emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16);
auto ParseResult = RISCVFeatures::parseFeatureBits(
@@ -99,6 +103,26 @@ void RISCVTargetAsmStreamer::emitDirectiveOptionNoRelax() {
OS << "\t.option\tnorelax\n";
}
+void RISCVTargetAsmStreamer::emitDirectiveOptionArch(
+ ArrayRef<RISCVOptionArchArg> Args) {
+ OS << "\t.option\tarch";
+ for (const auto &Arg : Args) {
+ OS << ", ";
+ switch (Arg.Type) {
+ case RISCVOptionArchArgType::Full:
+ break;
+ case RISCVOptionArchArgType::Plus:
+ OS << "+";
+ break;
+ case RISCVOptionArchArgType::Minus:
+ OS << "-";
+ break;
+ }
+ OS << Arg.Value;
+ }
+ OS << "\n";
+}
+
void RISCVTargetAsmStreamer::emitDirectiveVariantCC(MCSymbol &Symbol) {
OS << "\t.variant_cc\t" << Symbol.getName() << "\n";
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
index 6c8a1bc7344c..070e72fb157a 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
@@ -1,4 +1,4 @@
-//===-- RISCVTargetStreamer.h - RISCV Target Streamer ----------*- C++ -*--===//
+//===-- RISCVTargetStreamer.h - RISC-V Target Streamer ---------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -17,6 +17,20 @@ namespace llvm {
class formatted_raw_ostream;
+enum class RISCVOptionArchArgType {
+ Full,
+ Plus,
+ Minus,
+};
+
+struct RISCVOptionArchArg {
+ RISCVOptionArchArgType Type;
+ std::string Value;
+
+ RISCVOptionArchArg(RISCVOptionArchArgType Type, std::string Value)
+ : Type(Type), Value(Value) {}
+};
+
class RISCVTargetStreamer : public MCTargetStreamer {
RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
@@ -33,6 +47,7 @@ public:
virtual void emitDirectiveOptionNoRVC();
virtual void emitDirectiveOptionRelax();
virtual void emitDirectiveOptionNoRelax();
+ virtual void emitDirectiveOptionArch(ArrayRef<RISCVOptionArchArg> Args);
virtual void emitDirectiveVariantCC(MCSymbol &Symbol);
virtual void emitAttribute(unsigned Attribute, unsigned Value);
virtual void finishAttributeSection();
@@ -40,7 +55,7 @@ public:
virtual void emitIntTextAttribute(unsigned Attribute, unsigned IntValue,
StringRef StringValue);
- void emitTargetAttributes(const MCSubtargetInfo &STI);
+ void emitTargetAttributes(const MCSubtargetInfo &STI, bool EmitStackAlign);
void setTargetABI(RISCVABI::ABI ABI);
RISCVABI::ABI getTargetABI() const { return TargetABI; }
};
@@ -66,6 +81,7 @@ public:
void emitDirectiveOptionNoRVC() override;
void emitDirectiveOptionRelax() override;
void emitDirectiveOptionNoRelax() override;
+ void emitDirectiveOptionArch(ArrayRef<RISCVOptionArchArg> Args) override;
void emitDirectiveVariantCC(MCSymbol &Symbol) override;
};
diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h
index c42fb070aade..107ca51520b7 100644
--- a/llvm/lib/Target/RISCV/RISCV.h
+++ b/llvm/lib/Target/RISCV/RISCV.h
@@ -1,4 +1,4 @@
-//===-- RISCV.h - Top-level interface for RISCV -----------------*- C++ -*-===//
+//===-- RISCV.h - Top-level interface for RISC-V ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -33,11 +33,6 @@ class RISCVTargetMachine;
FunctionPass *createRISCVCodeGenPreparePass();
void initializeRISCVCodeGenPreparePass(PassRegistry &);
-bool lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP);
-bool lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
- MCOperand &MCOp, const AsmPrinter &AP);
-
FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM,
CodeGenOpt::Level OptLevel);
@@ -47,11 +42,8 @@ void initializeRISCVMakeCompressibleOptPass(PassRegistry &);
FunctionPass *createRISCVGatherScatterLoweringPass();
void initializeRISCVGatherScatterLoweringPass(PassRegistry &);
-FunctionPass *createRISCVSExtWRemovalPass();
-void initializeRISCVSExtWRemovalPass(PassRegistry &);
-
-FunctionPass *createRISCVStripWSuffixPass();
-void initializeRISCVStripWSuffixPass(PassRegistry &);
+FunctionPass *createRISCVOptWInstrsPass();
+void initializeRISCVOptWInstrsPass(PassRegistry &);
FunctionPass *createRISCVMergeBaseOffsetOptPass();
void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
@@ -68,9 +60,22 @@ void initializeRISCVExpandAtomicPseudoPass(PassRegistry &);
FunctionPass *createRISCVInsertVSETVLIPass();
void initializeRISCVInsertVSETVLIPass(PassRegistry &);
+FunctionPass *createRISCVInsertReadWriteCSRPass();
+void initializeRISCVInsertReadWriteCSRPass(PassRegistry &);
+
FunctionPass *createRISCVRedundantCopyEliminationPass();
void initializeRISCVRedundantCopyEliminationPass(PassRegistry &);
+FunctionPass *createRISCVInitUndefPass();
+void initializeRISCVInitUndefPass(PassRegistry &);
+extern char &RISCVInitUndefID;
+
+FunctionPass *createRISCVMoveMergePass();
+void initializeRISCVMoveMergePass(PassRegistry &);
+
+FunctionPass *createRISCVPushPopOptimizationPass();
+void initializeRISCVPushPopOptPass(PassRegistry &);
+
InstructionSelector *createRISCVInstructionSelector(const RISCVTargetMachine &,
RISCVSubtarget &,
RISCVRegisterBankInfo &);
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 671f75c04baa..be93d5933d33 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -1,4 +1,4 @@
-//===-- RISCV.td - Describe the RISCV Target Machine -------*- tablegen -*-===//
+//===-- RISCV.td - Describe the RISC-V Target Machine ------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index a4b999e6aa3b..f7d11e921c7d 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVAsmPrinter.cpp - RISCV LLVM assembly writer ------------------===//
+//===-- RISCVAsmPrinter.cpp - RISC-V LLVM assembly writer -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,10 +7,11 @@
//===----------------------------------------------------------------------===//
//
// This file contains a printer that converts from our internal representation
-// of machine-dependent LLVM code to the RISCV assembly language.
+// of machine-dependent LLVM code to the RISC-V assembly language.
//
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVInstPrinter.h"
#include "MCTargetDesc/RISCVMCExpr.h"
#include "MCTargetDesc/RISCVTargetStreamer.h"
@@ -18,6 +19,7 @@
#include "RISCVMachineFunctionInfo.h"
#include "RISCVTargetMachine.h"
#include "TargetInfo/RISCVTargetInfo.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -53,7 +55,7 @@ public:
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {}
- StringRef getPassName() const override { return "RISCV Assembly Printer"; }
+ StringRef getPassName() const override { return "RISC-V Assembly Printer"; }
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -71,12 +73,11 @@ public:
typedef std::tuple<unsigned, uint32_t> HwasanMemaccessTuple;
std::map<HwasanMemaccessTuple, MCSymbol *> HwasanMemaccessSymbols;
void LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI);
+ void LowerKCFI_CHECK(const MachineInstr &MI);
void EmitHwasanMemaccessSymbols(Module &M);
// Wrapper needed for tblgenned pseudo lowering.
- bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const {
- return lowerRISCVMachineOperandToMCOperand(MO, MCOp, *this);
- }
+ bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
void emitStartOfAsmFile(Module &M) override;
void emitEndOfAsmFile(Module &M) override;
@@ -85,6 +86,10 @@ public:
private:
void emitAttributes();
+
+ void emitNTLHint(const MachineInstr *MI);
+
+ bool lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
};
}
@@ -100,10 +105,44 @@ void RISCVAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
// instructions) auto-generated.
#include "RISCVGenMCPseudoLowering.inc"
+// If the target supports Zihintntl and the instruction has a nontemporal
+// MachineMemOperand, emit an NTLH hint instruction before it.
+void RISCVAsmPrinter::emitNTLHint(const MachineInstr *MI) {
+ if (!STI->hasStdExtZihintntl())
+ return;
+
+ if (MI->memoperands_empty())
+ return;
+
+ MachineMemOperand *MMO = *(MI->memoperands_begin());
+ if (!MMO->isNonTemporal())
+ return;
+
+ unsigned NontemporalMode = 0;
+ if (MMO->getFlags() & MONontemporalBit0)
+ NontemporalMode += 0b1;
+ if (MMO->getFlags() & MONontemporalBit1)
+ NontemporalMode += 0b10;
+
+ MCInst Hint;
+ if (STI->hasStdExtCOrZca() && STI->enableRVCHintInstrs())
+ Hint.setOpcode(RISCV::C_ADD_HINT);
+ else
+ Hint.setOpcode(RISCV::ADD);
+
+ Hint.addOperand(MCOperand::createReg(RISCV::X0));
+ Hint.addOperand(MCOperand::createReg(RISCV::X0));
+ Hint.addOperand(MCOperand::createReg(RISCV::X2 + NontemporalMode));
+
+ EmitToStreamer(*OutStreamer, Hint);
+}
+
void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) {
RISCV_MC::verifyInstructionPredicates(MI->getOpcode(),
getSubtargetInfo().getFeatureBits());
+ emitNTLHint(MI);
+
// Do any auto-generated pseudo lowerings.
if (emitPseudoExpansionLowering(*OutStreamer, MI))
return;
@@ -113,11 +152,19 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) {
case RISCV::HWASAN_CHECK_MEMACCESS_SHORTGRANULES:
LowerHWASAN_CHECK_MEMACCESS(*MI);
return;
+ case RISCV::KCFI_CHECK:
+ LowerKCFI_CHECK(*MI);
+ return;
+ case RISCV::PseudoRVVInitUndefM1:
+ case RISCV::PseudoRVVInitUndefM2:
+ case RISCV::PseudoRVVInitUndefM4:
+ case RISCV::PseudoRVVInitUndefM8:
+ return;
}
- MCInst TmpInst;
- if (!lowerRISCVMachineInstrToMCInst(MI, TmpInst, *this))
- EmitToStreamer(*OutStreamer, TmpInst);
+ MCInst OutInst;
+ if (!lowerToMCInst(MI, OutInst))
+ EmitToStreamer(*OutStreamer, OutInst);
}
bool RISCVAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -173,18 +220,22 @@ bool RISCVAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
unsigned OpNo,
const char *ExtraCode,
raw_ostream &OS) {
- if (!ExtraCode) {
- const MachineOperand &MO = MI->getOperand(OpNo);
- // For now, we only support register memory operands in registers and
- // assume there is no addend
- if (!MO.isReg())
- return true;
-
- OS << "0(" << RISCVInstPrinter::getRegisterName(MO.getReg()) << ")";
- return false;
- }
-
- return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, ExtraCode, OS);
+ if (ExtraCode)
+ return AsmPrinter::PrintAsmMemoryOperand(MI, OpNo, ExtraCode, OS);
+
+ const MachineOperand &AddrReg = MI->getOperand(OpNo);
+ assert(MI->getNumOperands() > OpNo + 1 && "Expected additional operand");
+ const MachineOperand &DispImm = MI->getOperand(OpNo + 1);
+ // All memory operands should have a register and an immediate operand (see
+ // RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand).
+ if (!AddrReg.isReg())
+ return true;
+ if (!DispImm.isImm())
+ return true;
+
+ OS << DispImm.getImm() << "("
+ << RISCVInstPrinter::getRegisterName(AddrReg.getReg()) << ")";
+ return false;
}
bool RISCVAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
@@ -220,7 +271,7 @@ void RISCVAsmPrinter::emitAttributes() {
// Use MCSubtargetInfo from TargetMachine. Individual functions may have
// attributes that differ from other functions in the module and we have no
// way to know which function is correct.
- RTS.emitTargetAttributes(*TM.getMCSubtargetInfo());
+ RTS.emitTargetAttributes(*TM.getMCSubtargetInfo(), /*EmitStackAlign*/ true);
}
void RISCVAsmPrinter::emitFunctionEntryLabel() {
@@ -259,6 +310,92 @@ void RISCVAsmPrinter::LowerHWASAN_CHECK_MEMACCESS(const MachineInstr &MI) {
EmitToStreamer(*OutStreamer, MCInstBuilder(RISCV::PseudoCALL).addExpr(Expr));
}
+void RISCVAsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) {
+ Register AddrReg = MI.getOperand(0).getReg();
+ assert(std::next(MI.getIterator())->isCall() &&
+ "KCFI_CHECK not followed by a call instruction");
+ assert(std::next(MI.getIterator())->getOperand(0).getReg() == AddrReg &&
+ "KCFI_CHECK call target doesn't match call operand");
+
+ // Temporary registers for comparing the hashes. If a register is used
+ // for the call target, or reserved by the user, we can clobber another
+ // temporary register as the check is immediately followed by the
+ // call. The check defaults to X6/X7, but can fall back to X28-X31 if
+ // needed.
+ unsigned ScratchRegs[] = {RISCV::X6, RISCV::X7};
+ unsigned NextReg = RISCV::X28;
+ auto isRegAvailable = [&](unsigned Reg) {
+ return Reg != AddrReg && !STI->isRegisterReservedByUser(Reg);
+ };
+ for (auto &Reg : ScratchRegs) {
+ if (isRegAvailable(Reg))
+ continue;
+ while (!isRegAvailable(NextReg))
+ ++NextReg;
+ Reg = NextReg++;
+ if (Reg > RISCV::X31)
+ report_fatal_error("Unable to find scratch registers for KCFI_CHECK");
+ }
+
+ if (AddrReg == RISCV::X0) {
+ // Checking X0 makes no sense. Instead of emitting a load, zero
+ // ScratchRegs[0].
+ EmitToStreamer(*OutStreamer, MCInstBuilder(RISCV::ADDI)
+ .addReg(ScratchRegs[0])
+ .addReg(RISCV::X0)
+ .addImm(0));
+ } else {
+ // Adjust the offset for patchable-function-prefix. This assumes that
+ // patchable-function-prefix is the same for all functions.
+ int NopSize = STI->hasStdExtCOrZca() ? 2 : 4;
+ int64_t PrefixNops = 0;
+ (void)MI.getMF()
+ ->getFunction()
+ .getFnAttribute("patchable-function-prefix")
+ .getValueAsString()
+ .getAsInteger(10, PrefixNops);
+
+ // Load the target function type hash.
+ EmitToStreamer(*OutStreamer, MCInstBuilder(RISCV::LW)
+ .addReg(ScratchRegs[0])
+ .addReg(AddrReg)
+ .addImm(-(PrefixNops * NopSize + 4)));
+ }
+
+ // Load the expected 32-bit type hash.
+ const int64_t Type = MI.getOperand(1).getImm();
+ const int64_t Hi20 = ((Type + 0x800) >> 12) & 0xFFFFF;
+ const int64_t Lo12 = SignExtend64<12>(Type);
+ if (Hi20) {
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(RISCV::LUI).addReg(ScratchRegs[1]).addImm(Hi20));
+ }
+ if (Lo12 || Hi20 == 0) {
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder((STI->hasFeature(RISCV::Feature64Bit) && Hi20)
+ ? RISCV::ADDIW
+ : RISCV::ADDI)
+ .addReg(ScratchRegs[1])
+ .addReg(ScratchRegs[1])
+ .addImm(Lo12));
+ }
+
+ // Compare the hashes and trap if there's a mismatch.
+ MCSymbol *Pass = OutContext.createTempSymbol();
+ EmitToStreamer(*OutStreamer,
+ MCInstBuilder(RISCV::BEQ)
+ .addReg(ScratchRegs[0])
+ .addReg(ScratchRegs[1])
+ .addExpr(MCSymbolRefExpr::create(Pass, OutContext)));
+
+ MCSymbol *Trap = OutContext.createTempSymbol();
+ OutStreamer->emitLabel(Trap);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(RISCV::EBREAK));
+ emitKCFITrapEntry(*MI.getMF(), Trap);
+ OutStreamer->emitLabel(Pass);
+}
+
void RISCVAsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
if (HwasanMemaccessSymbols.empty())
return;
@@ -465,3 +602,249 @@ void RISCVAsmPrinter::EmitHwasanMemaccessSymbols(Module &M) {
MCSTI);
}
}
+
+static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
+ const AsmPrinter &AP) {
+ MCContext &Ctx = AP.OutContext;
+ RISCVMCExpr::VariantKind Kind;
+
+ switch (MO.getTargetFlags()) {
+ default:
+ llvm_unreachable("Unknown target flag on GV operand");
+ case RISCVII::MO_None:
+ Kind = RISCVMCExpr::VK_RISCV_None;
+ break;
+ case RISCVII::MO_CALL:
+ Kind = RISCVMCExpr::VK_RISCV_CALL;
+ break;
+ case RISCVII::MO_PLT:
+ Kind = RISCVMCExpr::VK_RISCV_CALL_PLT;
+ break;
+ case RISCVII::MO_LO:
+ Kind = RISCVMCExpr::VK_RISCV_LO;
+ break;
+ case RISCVII::MO_HI:
+ Kind = RISCVMCExpr::VK_RISCV_HI;
+ break;
+ case RISCVII::MO_PCREL_LO:
+ Kind = RISCVMCExpr::VK_RISCV_PCREL_LO;
+ break;
+ case RISCVII::MO_PCREL_HI:
+ Kind = RISCVMCExpr::VK_RISCV_PCREL_HI;
+ break;
+ case RISCVII::MO_GOT_HI:
+ Kind = RISCVMCExpr::VK_RISCV_GOT_HI;
+ break;
+ case RISCVII::MO_TPREL_LO:
+ Kind = RISCVMCExpr::VK_RISCV_TPREL_LO;
+ break;
+ case RISCVII::MO_TPREL_HI:
+ Kind = RISCVMCExpr::VK_RISCV_TPREL_HI;
+ break;
+ case RISCVII::MO_TPREL_ADD:
+ Kind = RISCVMCExpr::VK_RISCV_TPREL_ADD;
+ break;
+ case RISCVII::MO_TLS_GOT_HI:
+ Kind = RISCVMCExpr::VK_RISCV_TLS_GOT_HI;
+ break;
+ case RISCVII::MO_TLS_GD_HI:
+ Kind = RISCVMCExpr::VK_RISCV_TLS_GD_HI;
+ break;
+ }
+
+ const MCExpr *ME =
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx);
+
+ if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
+ ME = MCBinaryExpr::createAdd(
+ ME, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+
+ if (Kind != RISCVMCExpr::VK_RISCV_None)
+ ME = RISCVMCExpr::create(ME, Kind, Ctx);
+ return MCOperand::createExpr(ME);
+}
+
+bool RISCVAsmPrinter::lowerOperand(const MachineOperand &MO,
+ MCOperand &MCOp) const {
+ switch (MO.getType()) {
+ default:
+ report_fatal_error("lowerOperand: unknown operand type");
+ case MachineOperand::MO_Register:
+ // Ignore all implicit register operands.
+ if (MO.isImplicit())
+ return false;
+ MCOp = MCOperand::createReg(MO.getReg());
+ break;
+ case MachineOperand::MO_RegisterMask:
+ // Regmasks are like implicit defs.
+ return false;
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::createImm(MO.getImm());
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ MCOp = lowerSymbolOperand(MO, MO.getMBB()->getSymbol(), *this);
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ MCOp = lowerSymbolOperand(MO, getSymbolPreferLocal(*MO.getGlobal()), *this);
+ break;
+ case MachineOperand::MO_BlockAddress:
+ MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress()),
+ *this);
+ break;
+ case MachineOperand::MO_ExternalSymbol:
+ MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName()),
+ *this);
+ break;
+ case MachineOperand::MO_ConstantPoolIndex:
+ MCOp = lowerSymbolOperand(MO, GetCPISymbol(MO.getIndex()), *this);
+ break;
+ case MachineOperand::MO_JumpTableIndex:
+ MCOp = lowerSymbolOperand(MO, GetJTISymbol(MO.getIndex()), *this);
+ break;
+ case MachineOperand::MO_MCSymbol:
+ MCOp = lowerSymbolOperand(MO, MO.getMCSymbol(), *this);
+ break;
+ }
+ return true;
+}
+
+static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI,
+ MCInst &OutMI) {
+ const RISCVVPseudosTable::PseudoInfo *RVV =
+ RISCVVPseudosTable::getPseudoInfo(MI->getOpcode());
+ if (!RVV)
+ return false;
+
+ OutMI.setOpcode(RVV->BaseInstr);
+
+ const MachineBasicBlock *MBB = MI->getParent();
+ assert(MBB && "MI expected to be in a basic block");
+ const MachineFunction *MF = MBB->getParent();
+ assert(MF && "MBB expected to be in a machine function");
+
+ const RISCVSubtarget &Subtarget = MF->getSubtarget<RISCVSubtarget>();
+ const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ assert(TRI && "TargetRegisterInfo expected");
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ uint64_t TSFlags = MCID.TSFlags;
+ unsigned NumOps = MI->getNumExplicitOperands();
+
+ // Skip policy, VL and SEW operands which are the last operands if present.
+ if (RISCVII::hasVecPolicyOp(TSFlags))
+ --NumOps;
+ if (RISCVII::hasVLOp(TSFlags))
+ --NumOps;
+ if (RISCVII::hasSEWOp(TSFlags))
+ --NumOps;
+ if (RISCVII::hasRoundModeOp(TSFlags))
+ --NumOps;
+
+ bool hasVLOutput = RISCV::isFaultFirstLoad(*MI);
+ for (unsigned OpNo = 0; OpNo != NumOps; ++OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ // Skip vl ouput. It should be the second output.
+ if (hasVLOutput && OpNo == 1)
+ continue;
+
+ // Skip merge op. It should be the first operand after the defs.
+ if (OpNo == MI->getNumExplicitDefs() && MO.isReg() && MO.isTied()) {
+ assert(MCID.getOperandConstraint(OpNo, MCOI::TIED_TO) == 0 &&
+ "Expected tied to first def.");
+ const MCInstrDesc &OutMCID = TII->get(OutMI.getOpcode());
+ // Skip if the next operand in OutMI is not supposed to be tied. Unless it
+ // is a _TIED instruction.
+ if (OutMCID.getOperandConstraint(OutMI.getNumOperands(), MCOI::TIED_TO) <
+ 0 &&
+ !RISCVII::isTiedPseudo(TSFlags))
+ continue;
+ }
+
+ MCOperand MCOp;
+ switch (MO.getType()) {
+ default:
+ llvm_unreachable("Unknown operand type");
+ case MachineOperand::MO_Register: {
+ Register Reg = MO.getReg();
+
+ if (RISCV::VRM2RegClass.contains(Reg) ||
+ RISCV::VRM4RegClass.contains(Reg) ||
+ RISCV::VRM8RegClass.contains(Reg)) {
+ Reg = TRI->getSubReg(Reg, RISCV::sub_vrm1_0);
+ assert(Reg && "Subregister does not exist");
+ } else if (RISCV::FPR16RegClass.contains(Reg)) {
+ Reg =
+ TRI->getMatchingSuperReg(Reg, RISCV::sub_16, &RISCV::FPR32RegClass);
+ assert(Reg && "Subregister does not exist");
+ } else if (RISCV::FPR64RegClass.contains(Reg)) {
+ Reg = TRI->getSubReg(Reg, RISCV::sub_32);
+ assert(Reg && "Superregister does not exist");
+ } else if (RISCV::VRN2M1RegClass.contains(Reg) ||
+ RISCV::VRN2M2RegClass.contains(Reg) ||
+ RISCV::VRN2M4RegClass.contains(Reg) ||
+ RISCV::VRN3M1RegClass.contains(Reg) ||
+ RISCV::VRN3M2RegClass.contains(Reg) ||
+ RISCV::VRN4M1RegClass.contains(Reg) ||
+ RISCV::VRN4M2RegClass.contains(Reg) ||
+ RISCV::VRN5M1RegClass.contains(Reg) ||
+ RISCV::VRN6M1RegClass.contains(Reg) ||
+ RISCV::VRN7M1RegClass.contains(Reg) ||
+ RISCV::VRN8M1RegClass.contains(Reg)) {
+ Reg = TRI->getSubReg(Reg, RISCV::sub_vrm1_0);
+ assert(Reg && "Subregister does not exist");
+ }
+
+ MCOp = MCOperand::createReg(Reg);
+ break;
+ }
+ case MachineOperand::MO_Immediate:
+ MCOp = MCOperand::createImm(MO.getImm());
+ break;
+ }
+ OutMI.addOperand(MCOp);
+ }
+
+ // Unmasked pseudo instructions need to append dummy mask operand to
+ // V instructions. All V instructions are modeled as the masked version.
+ const MCInstrDesc &OutMCID = TII->get(OutMI.getOpcode());
+ if (OutMI.getNumOperands() < OutMCID.getNumOperands()) {
+ assert(OutMCID.operands()[OutMI.getNumOperands()].RegClass ==
+ RISCV::VMV0RegClassID &&
+ "Expected only mask operand to be missing");
+ OutMI.addOperand(MCOperand::createReg(RISCV::NoRegister));
+ }
+
+ assert(OutMI.getNumOperands() == OutMCID.getNumOperands());
+ return true;
+}
+
+bool RISCVAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
+ if (lowerRISCVVMachineInstrToMCInst(MI, OutMI))
+ return false;
+
+ OutMI.setOpcode(MI->getOpcode());
+
+ for (const MachineOperand &MO : MI->operands()) {
+ MCOperand MCOp;
+ if (lowerOperand(MO, MCOp))
+ OutMI.addOperand(MCOp);
+ }
+
+ switch (OutMI.getOpcode()) {
+ case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
+ const Function &F = MI->getParent()->getParent()->getFunction();
+ if (F.hasFnAttribute("patchable-function-entry")) {
+ unsigned Num;
+ if (F.getFnAttribute("patchable-function-entry")
+ .getValueAsString()
+ .getAsInteger(10, Num))
+ return false;
+ emitNops(Num);
+ return true;
+ }
+ break;
+ }
+ }
+ return false;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.td b/llvm/lib/Target/RISCV/RISCVCallingConv.td
index 025454f8fcca..130a6ecc143d 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.td
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.td
@@ -1,4 +1,4 @@
-//===-- RISCVCallingConv.td - Calling Conventions RISCV ----*- tablegen -*-===//
+//===-- RISCVCallingConv.td - Calling Conventions RISC-V ---*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This describes the calling conventions for the RISCV architecture.
+// This describes the calling conventions for the RISC-V architecture.
//
//===----------------------------------------------------------------------===//
@@ -29,37 +29,12 @@ def CSR_NoRegs : CalleeSavedRegs<(add)>;
// Interrupt handler needs to save/restore all registers that are used,
// both Caller and Callee saved registers.
-def CSR_Interrupt : CalleeSavedRegs<(add X1,
- (sequence "X%u", 3, 9),
- (sequence "X%u", 10, 11),
- (sequence "X%u", 12, 17),
- (sequence "X%u", 18, 27),
- (sequence "X%u", 28, 31))>;
+def CSR_Interrupt : CalleeSavedRegs<(add X1, (sequence "X%u", 3, 31))>;
// Same as CSR_Interrupt, but including all 32-bit FP registers.
-def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add X1,
- (sequence "X%u", 3, 9),
- (sequence "X%u", 10, 11),
- (sequence "X%u", 12, 17),
- (sequence "X%u", 18, 27),
- (sequence "X%u", 28, 31),
- (sequence "F%u_F", 0, 7),
- (sequence "F%u_F", 10, 11),
- (sequence "F%u_F", 12, 17),
- (sequence "F%u_F", 28, 31),
- (sequence "F%u_F", 8, 9),
- (sequence "F%u_F", 18, 27))>;
+def CSR_XLEN_F32_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
+ (sequence "F%u_F", 0, 31))>;
// Same as CSR_Interrupt, but including all 64-bit FP registers.
-def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add X1,
- (sequence "X%u", 3, 9),
- (sequence "X%u", 10, 11),
- (sequence "X%u", 12, 17),
- (sequence "X%u", 18, 27),
- (sequence "X%u", 28, 31),
- (sequence "F%u_D", 0, 7),
- (sequence "F%u_D", 10, 11),
- (sequence "F%u_D", 12, 17),
- (sequence "F%u_D", 28, 31),
- (sequence "F%u_D", 8, 9),
- (sequence "F%u_D", 18, 27))>;
+def CSR_XLEN_F64_Interrupt: CalleeSavedRegs<(add CSR_Interrupt,
+ (sequence "F%u_D", 0, 31))>;
diff --git a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
index 5c12d3304557..2fcd9a40588a 100644
--- a/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This is a RISCV specific version of CodeGenPrepare.
+// This is a RISC-V specific version of CodeGenPrepare.
// It munges the code in the input function to better prepare it for
// SelectionDAG-based code generation. This works around limitations in it's
// basic-block-at-a-time approach.
@@ -26,7 +26,7 @@
using namespace llvm;
#define DEBUG_TYPE "riscv-codegenprepare"
-#define PASS_NAME "RISCV CodeGenPrepare"
+#define PASS_NAME "RISC-V CodeGenPrepare"
STATISTIC(NumZExtToSExt, "Number of SExt instructions converted to ZExt");
diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index 58ae28d57bd8..59f1e8319ae7 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -24,7 +24,7 @@
using namespace llvm;
#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \
- "RISCV atomic pseudo instruction expansion pass"
+ "RISC-V atomic pseudo instruction expansion pass"
namespace {
@@ -58,15 +58,34 @@ private:
bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, bool IsMasked,
int Width, MachineBasicBlock::iterator &NextMBBI);
+#ifndef NDEBUG
+ unsigned getInstSizeInBytes(const MachineFunction &MF) const {
+ unsigned Size = 0;
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ Size += TII->getInstSizeInBytes(MI);
+ return Size;
+ }
+#endif
};
char RISCVExpandAtomicPseudo::ID = 0;
bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ TII = MF.getSubtarget<RISCVSubtarget>().getInstrInfo();
+
+#ifndef NDEBUG
+ const unsigned OldSize = getInstSizeInBytes(MF);
+#endif
+
bool Modified = false;
for (auto &MBB : MF)
Modified |= expandMBB(MBB);
+
+#ifndef NDEBUG
+ const unsigned NewSize = getInstSizeInBytes(MF);
+ assert(OldSize >= NewSize);
+#endif
return Modified;
}
@@ -159,7 +178,7 @@ static unsigned getSCForRMW32(AtomicOrdering Ordering) {
case AtomicOrdering::AcquireRelease:
return RISCV::SC_W_RL;
case AtomicOrdering::SequentiallyConsistent:
- return RISCV::SC_W_AQ_RL;
+ return RISCV::SC_W_RL;
}
}
@@ -193,7 +212,7 @@ static unsigned getSCForRMW64(AtomicOrdering Ordering) {
case AtomicOrdering::AcquireRelease:
return RISCV::SC_D_RL;
case AtomicOrdering::SequentiallyConsistent:
- return RISCV::SC_D_AQ_RL;
+ return RISCV::SC_D_RL;
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index c4d85dc8ddc9..58896ee1b388 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -23,13 +23,14 @@
using namespace llvm;
-#define RISCV_EXPAND_PSEUDO_NAME "RISCV pseudo instruction expansion pass"
-#define RISCV_PRERA_EXPAND_PSEUDO_NAME "RISCV Pre-RA pseudo instruction expansion pass"
+#define RISCV_EXPAND_PSEUDO_NAME "RISC-V pseudo instruction expansion pass"
+#define RISCV_PRERA_EXPAND_PSEUDO_NAME "RISC-V Pre-RA pseudo instruction expansion pass"
namespace {
class RISCVExpandPseudo : public MachineFunctionPass {
public:
+ const RISCVSubtarget *STI;
const RISCVInstrInfo *TII;
static char ID;
@@ -50,15 +51,39 @@ private:
bool expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool expandVMSET_VMCLR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned Opcode);
+ bool expandRV32ZdinxStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
+ bool expandRV32ZdinxLoad(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
+#ifndef NDEBUG
+ unsigned getInstSizeInBytes(const MachineFunction &MF) const {
+ unsigned Size = 0;
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ Size += TII->getInstSizeInBytes(MI);
+ return Size;
+ }
+#endif
};
char RISCVExpandPseudo::ID = 0;
bool RISCVExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ STI = &MF.getSubtarget<RISCVSubtarget>();
+ TII = STI->getInstrInfo();
+
+#ifndef NDEBUG
+ const unsigned OldSize = getInstSizeInBytes(MF);
+#endif
+
bool Modified = false;
for (auto &MBB : MF)
Modified |= expandMBB(MBB);
+
+#ifndef NDEBUG
+ const unsigned NewSize = getInstSizeInBytes(MF);
+ assert(OldSize >= NewSize);
+#endif
return Modified;
}
@@ -82,6 +107,10 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
// expanded instructions for each pseudo is correct in the Size field of the
// tablegen definition for the pseudo.
switch (MBBI->getOpcode()) {
+ case RISCV::PseudoRV32ZdinxSD:
+ return expandRV32ZdinxStore(MBB, MBBI);
+ case RISCV::PseudoRV32ZdinxLD:
+ return expandRV32ZdinxLoad(MBB, MBBI);
case RISCV::PseudoCCMOVGPR:
case RISCV::PseudoCCADD:
case RISCV::PseudoCCSUB:
@@ -232,8 +261,89 @@ bool RISCVExpandPseudo::expandVMSET_VMCLR(MachineBasicBlock &MBB,
return true;
}
+// This function expands the PseudoRV32ZdinxSD for storing a double-precision
+// floating-point value into memory by generating an equivalent instruction
+// sequence for RV32.
+bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ DebugLoc DL = MBBI->getDebugLoc();
+ const TargetRegisterInfo *TRI = STI->getRegisterInfo();
+ Register Lo = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32);
+ Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32_hi);
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
+ .addReg(Lo, getKillRegState(MBBI->getOperand(0).isKill()))
+ .addReg(MBBI->getOperand(1).getReg())
+ .add(MBBI->getOperand(2));
+ if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) {
+ // FIXME: Zdinx RV32 can not work on unaligned scalar memory.
+ assert(!STI->enableUnalignedScalarMem());
+
+ assert(MBBI->getOperand(2).getOffset() % 8 == 0);
+ MBBI->getOperand(2).setOffset(MBBI->getOperand(2).getOffset() + 4);
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
+ .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill()))
+ .add(MBBI->getOperand(1))
+ .add(MBBI->getOperand(2));
+ } else {
+ assert(isInt<12>(MBBI->getOperand(2).getImm() + 4));
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
+ .addReg(Hi, getKillRegState(MBBI->getOperand(0).isKill()))
+ .add(MBBI->getOperand(1))
+ .addImm(MBBI->getOperand(2).getImm() + 4);
+ }
+ MBBI->eraseFromParent();
+ return true;
+}
+
+// This function expands PseudoRV32ZdinxLoad for loading a double-precision
+// floating-point value from memory into an equivalent instruction sequence for
+// RV32.
+bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ DebugLoc DL = MBBI->getDebugLoc();
+ const TargetRegisterInfo *TRI = STI->getRegisterInfo();
+ Register Lo = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32);
+ Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32_hi);
+
+ // If the register of operand 1 is equal to the Lo register, then swap the
+ // order of loading the Lo and Hi statements.
+ bool IsOp1EqualToLo = Lo == MBBI->getOperand(1).getReg();
+ // Order: Lo, Hi
+ if (!IsOp1EqualToLo) {
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo)
+ .addReg(MBBI->getOperand(1).getReg())
+ .add(MBBI->getOperand(2));
+ }
+
+ if (MBBI->getOperand(2).isGlobal() || MBBI->getOperand(2).isCPI()) {
+ auto Offset = MBBI->getOperand(2).getOffset();
+ assert(MBBI->getOperand(2).getOffset() % 8 == 0);
+ MBBI->getOperand(2).setOffset(Offset + 4);
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi)
+ .addReg(MBBI->getOperand(1).getReg())
+ .add(MBBI->getOperand(2));
+ MBBI->getOperand(2).setOffset(Offset);
+ } else {
+ assert(isInt<12>(MBBI->getOperand(2).getImm() + 4));
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Hi)
+ .addReg(MBBI->getOperand(1).getReg())
+ .addImm(MBBI->getOperand(2).getImm() + 4);
+ }
+
+ // Order: Hi, Lo
+ if (IsOp1EqualToLo) {
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::LW), Lo)
+ .addReg(MBBI->getOperand(1).getReg())
+ .add(MBBI->getOperand(2));
+ }
+
+ MBBI->eraseFromParent();
+ return true;
+}
+
class RISCVPreRAExpandPseudo : public MachineFunctionPass {
public:
+ const RISCVSubtarget *STI;
const RISCVInstrInfo *TII;
static char ID;
@@ -262,24 +372,44 @@ private:
bool expandLoadLocalAddress(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
- bool expandLoadAddress(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- MachineBasicBlock::iterator &NextMBBI);
+ bool expandLoadGlobalAddress(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ MachineBasicBlock::iterator &NextMBBI);
bool expandLoadTLSIEAddress(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
bool expandLoadTLSGDAddress(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI);
+#ifndef NDEBUG
+ unsigned getInstSizeInBytes(const MachineFunction &MF) const {
+ unsigned Size = 0;
+ for (auto &MBB : MF)
+ for (auto &MI : MBB)
+ Size += TII->getInstSizeInBytes(MI);
+ return Size;
+ }
+#endif
};
char RISCVPreRAExpandPseudo::ID = 0;
bool RISCVPreRAExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
- TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ STI = &MF.getSubtarget<RISCVSubtarget>();
+ TII = STI->getInstrInfo();
+
+#ifndef NDEBUG
+ const unsigned OldSize = getInstSizeInBytes(MF);
+#endif
+
bool Modified = false;
for (auto &MBB : MF)
Modified |= expandMBB(MBB);
+
+#ifndef NDEBUG
+ const unsigned NewSize = getInstSizeInBytes(MF);
+ assert(OldSize >= NewSize);
+#endif
return Modified;
}
@@ -303,8 +433,8 @@ bool RISCVPreRAExpandPseudo::expandMI(MachineBasicBlock &MBB,
switch (MBBI->getOpcode()) {
case RISCV::PseudoLLA:
return expandLoadLocalAddress(MBB, MBBI, NextMBBI);
- case RISCV::PseudoLA:
- return expandLoadAddress(MBB, MBBI, NextMBBI);
+ case RISCV::PseudoLGA:
+ return expandLoadGlobalAddress(MBB, MBBI, NextMBBI);
case RISCV::PseudoLA_TLS_IE:
return expandLoadTLSIEAddress(MBB, MBBI, NextMBBI);
case RISCV::PseudoLA_TLS_GD:
@@ -352,18 +482,10 @@ bool RISCVPreRAExpandPseudo::expandLoadLocalAddress(
RISCV::ADDI);
}
-bool RISCVPreRAExpandPseudo::expandLoadAddress(
+bool RISCVPreRAExpandPseudo::expandLoadGlobalAddress(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
- MachineFunction *MF = MBB.getParent();
-
- const auto &STI = MF->getSubtarget<RISCVSubtarget>();
- // When HWASAN is used and tagging of global variables is enabled
- // they should be accessed via the GOT, since the tagged address of a global
- // is incompatible with existing code models. This also applies to non-pic
- // mode.
- assert(MF->getTarget().isPositionIndependent() || STI.allowTaggedGlobals());
- unsigned SecondOpcode = STI.is64Bit() ? RISCV::LD : RISCV::LW;
+ unsigned SecondOpcode = STI->is64Bit() ? RISCV::LD : RISCV::LW;
return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_GOT_HI,
SecondOpcode);
}
@@ -371,10 +493,7 @@ bool RISCVPreRAExpandPseudo::expandLoadAddress(
bool RISCVPreRAExpandPseudo::expandLoadTLSIEAddress(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MachineBasicBlock::iterator &NextMBBI) {
- MachineFunction *MF = MBB.getParent();
-
- const auto &STI = MF->getSubtarget<RISCVSubtarget>();
- unsigned SecondOpcode = STI.is64Bit() ? RISCV::LD : RISCV::LW;
+ unsigned SecondOpcode = STI->is64Bit() ? RISCV::LD : RISCV::LW;
return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_TLS_GOT_HI,
SecondOpcode);
}
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index be8834fd4c2f..4ce9c41eaf5c 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1,4 +1,4 @@
-//===-- RISCVFeatures.td - RISCV Features and Extensions ---*- tablegen -*-===//
+//===-- RISCVFeatures.td - RISC-V Features and Extensions --*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -10,6 +10,13 @@
// RISC-V subtarget features and instruction predicates.
//===----------------------------------------------------------------------===//
+def FeatureStdExtZicsr
+ : SubtargetFeature<"zicsr", "HasStdExtZicsr", "true",
+ "'zicsr' (CSRs)">;
+def HasStdExtZicsr : Predicate<"Subtarget->hasStdExtZicsr()">,
+ AssemblerPredicate<(all_of FeatureStdExtZicsr),
+ "'Zicsr' (CSRs)">;
+
def FeatureStdExtM
: SubtargetFeature<"m", "HasStdExtM", "true",
"'M' (Integer Multiplication and Division)">;
@@ -36,7 +43,8 @@ def HasStdExtA : Predicate<"Subtarget->hasStdExtA()">,
def FeatureStdExtF
: SubtargetFeature<"f", "HasStdExtF", "true",
- "'F' (Single-Precision Floating-Point)">;
+ "'F' (Single-Precision Floating-Point)",
+ [FeatureStdExtZicsr]>;
def HasStdExtF : Predicate<"Subtarget->hasStdExtF()">,
AssemblerPredicate<(all_of FeatureStdExtF),
"'F' (Single-Precision Floating-Point)">;
@@ -59,18 +67,35 @@ def HasStdExtH : Predicate<"Subtarget->hasStdExtH()">,
def FeatureStdExtZihintpause
: SubtargetFeature<"zihintpause", "HasStdExtZihintpause", "true",
- "'zihintpause' (Pause Hint)">;
+ "'Zihintpause' (Pause Hint)">;
def HasStdExtZihintpause : Predicate<"Subtarget->hasStdExtZihintpause()">,
AssemblerPredicate<(all_of FeatureStdExtZihintpause),
"'Zihintpause' (Pause Hint)">;
def FeatureStdExtZihintntl
: SubtargetFeature<"experimental-zihintntl", "HasStdExtZihintntl", "true",
- "'zihintntl' (Non-Temporal Locality Hints)">;
+ "'Zihintntl' (Non-Temporal Locality Hints)">;
def HasStdExtZihintntl : Predicate<"Subtarget->hasStdExtZihintntl()">,
AssemblerPredicate<(all_of FeatureStdExtZihintntl),
"'Zihintntl' (Non-Temporal Locality Hints)">;
+def FeatureStdExtZifencei
+ : SubtargetFeature<"zifencei", "HasStdExtZifencei", "true",
+ "'Zifencei' (fence.i)">;
+def HasStdExtZifencei : Predicate<"Subtarget->hasStdExtZifencei()">,
+ AssemblerPredicate<(all_of FeatureStdExtZifencei),
+ "'Zifencei' (fence.i)">;
+
+def FeatureStdExtZicntr
+ : SubtargetFeature<"zicntr", "HasStdExtZicntr", "true",
+ "'Zicntr' (Base Counters and Timers)",
+ [FeatureStdExtZicsr]>;
+
+def FeatureStdExtZihpm
+ : SubtargetFeature<"zihpm", "HasStdExtZihpm", "true",
+ "'Zihpm' (Hardware Performance Counters)",
+ [FeatureStdExtZicsr]>;
+
def FeatureStdExtZfhmin
: SubtargetFeature<"zfhmin", "HasStdExtZfhmin", "true",
"'Zfhmin' (Half-Precision Floating-Point Minimal)",
@@ -89,14 +114,15 @@ def HasStdExtZfh : Predicate<"Subtarget->hasStdExtZfh()">,
def NoStdExtZfh : Predicate<"!Subtarget->hasStdExtZfh()">;
def HasStdExtZfhOrZfhmin
- : Predicate<"Subtarget->hasStdExtZfh() || Subtarget->hasStdExtZfhmin()">,
+ : Predicate<"Subtarget->hasStdExtZfhOrZfhmin()">,
AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZfhmin),
"'Zfh' (Half-Precision Floating-Point) or "
"'Zfhmin' (Half-Precision Floating-Point Minimal)">;
def FeatureStdExtZfinx
: SubtargetFeature<"zfinx", "HasStdExtZfinx", "true",
- "'Zfinx' (Float in Integer)">;
+ "'Zfinx' (Float in Integer)",
+ [FeatureStdExtZicsr]>;
def HasStdExtZfinx : Predicate<"Subtarget->hasStdExtZfinx()">,
AssemblerPredicate<(all_of FeatureStdExtZfinx),
"'Zfinx' (Float in Integer)">;
@@ -124,6 +150,7 @@ def FeatureStdExtZhinx
def HasStdExtZhinx : Predicate<"Subtarget->hasStdExtZhinx()">,
AssemblerPredicate<(all_of FeatureStdExtZhinx),
"'Zhinx' (Half Float in Integer)">;
+def NoStdExtZhinx : Predicate<"!Subtarget->hasStdExtZhinx()">;
def HasStdExtZhinxOrZhinxmin
: Predicate<"Subtarget->hasStdExtZhinx() || Subtarget->hasStdExtZhinxmin()">,
@@ -131,6 +158,14 @@ def HasStdExtZhinxOrZhinxmin
"'Zhinx' (Half Float in Integer) or "
"'Zhinxmin' (Half Float in Integer Minimal)">;
+def FeatureStdExtZfa
+ : SubtargetFeature<"experimental-zfa", "HasStdExtZfa", "true",
+ "'Zfa' (Additional Floating-Point)",
+ [FeatureStdExtF]>;
+def HasStdExtZfa : Predicate<"Subtarget->hasStdExtZfa()">,
+ AssemblerPredicate<(all_of FeatureStdExtZfa),
+ "'Zfa' (Additional Floating-Point)">;
+
def FeatureStdExtC
: SubtargetFeature<"c", "HasStdExtC", "true",
"'C' (Compressed Instructions)">;
@@ -287,35 +322,69 @@ def FeatureStdExtZk
FeatureStdExtZkr,
FeatureStdExtZkt]>;
-def FeatureExtZca
- : SubtargetFeature<"experimental-zca", "HasStdExtZca", "true",
+def FeatureStdExtZca
+ : SubtargetFeature<"zca", "HasStdExtZca", "true",
"'Zca' (part of the C extension, excluding compressed "
"floating point loads/stores)">;
def HasStdExtCOrZca
- : Predicate<"Subtarget->hasStdExtC() || Subtarget->hasStdExtZca()">,
- AssemblerPredicate<(any_of FeatureStdExtC, FeatureExtZca),
+ : Predicate<"Subtarget->hasStdExtCOrZca()">,
+ AssemblerPredicate<(any_of FeatureStdExtC, FeatureStdExtZca),
"'C' (Compressed Instructions) or "
"'Zca' (part of the C extension, excluding "
"compressed floating point loads/stores)">;
-def FeatureExtZcd
- : SubtargetFeature<"experimental-zcd", "HasStdExtZcd", "true",
- "'Zcd' (Compressed Double-Precision Floating-Point Instructions)">;
+def FeatureStdExtZcb
+ : SubtargetFeature<"zcb", "HasStdExtZcb", "true",
+ "'Zcb' (Compressed basic bit manipulation instructions)",
+ [FeatureStdExtZca]>;
+def HasStdExtZcb : Predicate<"Subtarget->hasStdExtZcb()">,
+ AssemblerPredicate<(all_of FeatureStdExtZcb),
+ "'Zcb' (Compressed basic bit manipulation instructions)">;
+
+def FeatureStdExtZcd
+ : SubtargetFeature<"zcd", "HasStdExtZcd", "true",
+ "'Zcd' (Compressed Double-Precision Floating-Point Instructions)",
+ [FeatureStdExtZca]>;
def HasStdExtCOrZcd
: Predicate<"Subtarget->hasStdExtC() || Subtarget->hasStdExtZcd()">,
- AssemblerPredicate<(any_of FeatureStdExtC, FeatureExtZcd),
+ AssemblerPredicate<(any_of FeatureStdExtC, FeatureStdExtZcd),
"'C' (Compressed Instructions) or "
"'Zcd' (Compressed Double-Precision Floating-Point Instructions)">;
-def FeatureExtZcf
- : SubtargetFeature<"experimental-zcf", "HasStdExtZcf", "true",
- "'Zcf' (Compressed Single-Precision Floating-Point Instructions)">;
-
-def HasStdExtCOrZcf
- : Predicate<"Subtarget->hasStdExtC() || Subtarget->hasStdExtZcf()">,
- AssemblerPredicate<(any_of FeatureStdExtC, FeatureExtZcf),
+def FeatureStdExtZcf
+ : SubtargetFeature<"zcf", "HasStdExtZcf", "true",
+ "'Zcf' (Compressed Single-Precision Floating-Point Instructions)",
+ [FeatureStdExtZca]>;
+
+def FeatureStdExtZcmp
+ : SubtargetFeature<"zcmp", "HasStdExtZcmp", "true",
+ "'Zcmp' (sequenced instuctions for code-size reduction)",
+ [FeatureStdExtZca]>;
+def HasStdExtZcmp : Predicate<"Subtarget->hasStdExtZcmp() && !Subtarget->hasStdExtC()">,
+ AssemblerPredicate<(all_of FeatureStdExtZcmp),
+ "'Zcmp' (sequenced instuctions for code-size reduction)">;
+
+def FeatureStdExtZcmt
+ : SubtargetFeature<"zcmt", "HasStdExtZcmt", "true",
+ "'Zcmt' (table jump instuctions for code-size reduction)",
+ [FeatureStdExtZca, FeatureStdExtZicsr]>;
+def HasStdExtZcmt : Predicate<"Subtarget->hasStdExtZcmt()">,
+ AssemblerPredicate<(all_of FeatureStdExtZcmt),
+ "'Zcmt' (table jump instuctions for code-size reduction)">;
+
+def FeatureStdExtZce
+ : SubtargetFeature<"zce", "HasStdExtZce", "true",
+ "'Zce' (Compressed extensions for microcontrollers)",
+ [FeatureStdExtZca, FeatureStdExtZcb, FeatureStdExtZcmp,
+ FeatureStdExtZcmt]>;
+
+def HasStdExtCOrZcfOrZce
+ : Predicate<"Subtarget->hasStdExtC() || Subtarget->hasStdExtZcf() "
+ "Subtarget->hasStdExtZce()">,
+ AssemblerPredicate<(any_of FeatureStdExtC, FeatureStdExtZcf,
+ FeatureStdExtZce),
"'C' (Compressed Instructions) or "
"'Zcf' (Compressed Single-Precision Floating-Point Instructions)">;
@@ -341,13 +410,13 @@ def FeatureStdExtZve32x
: SubtargetFeature<"zve32x", "HasStdExtZve32x", "true",
"'Zve32x' (Vector Extensions for Embedded Processors "
"with maximal 32 EEW)",
- [FeatureStdExtZvl32b]>;
+ [FeatureStdExtZicsr, FeatureStdExtZvl32b]>;
def FeatureStdExtZve32f
: SubtargetFeature<"zve32f", "HasStdExtZve32f", "true",
"'Zve32f' (Vector Extensions for Embedded Processors "
"with maximal 32 EEW and F extension)",
- [FeatureStdExtZve32x]>;
+ [FeatureStdExtZve32x, FeatureStdExtF]>;
def FeatureStdExtZve64x
: SubtargetFeature<"zve64x", "HasStdExtZve64x", "true",
@@ -365,13 +434,12 @@ def FeatureStdExtZve64d
: SubtargetFeature<"zve64d", "HasStdExtZve64d", "true",
"'Zve64d' (Vector Extensions for Embedded Processors "
"with maximal 64 EEW, F and D extension)",
- [FeatureStdExtZve64f]>;
+ [FeatureStdExtZve64f, FeatureStdExtD]>;
def FeatureStdExtV
: SubtargetFeature<"v", "HasStdExtV", "true",
"'V' (Vector Extension for Application Processors)",
- [FeatureStdExtZvl128b, FeatureStdExtZve64d,
- FeatureStdExtF, FeatureStdExtD]>;
+ [FeatureStdExtZvl128b, FeatureStdExtZve64d]>;
def HasVInstructions : Predicate<"Subtarget->hasVInstructions()">,
AssemblerPredicate<
@@ -389,10 +457,38 @@ def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">,
"'V' (Vector Extension for Application Processors), 'Zve32f', "
"'Zve64f' or 'Zve64d' (Vector Extensions for Embedded Processors)">;
+def HasVInstructionsF64 : Predicate<"Subtarget->hasVInstructionsF64()">;
+
+def HasVInstructionsFullMultiply : Predicate<"Subtarget->hasVInstructionsFullMultiply()">;
+
+def FeatureStdExtZvfbfmin
+ : SubtargetFeature<"experimental-zvfbfmin", "HasStdExtZvfbfmin", "true",
+ "'Zvbfmin' (Vector BF16 Converts)",
+ [FeatureStdExtZve32f]>;
+def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvfbfmin),
+ "'Zvfbfmin' (Vector BF16 Converts)">;
+
+def FeatureStdExtZvfbfwma
+ : SubtargetFeature<"experimental-zvfbfwma", "HasStdExtZvfbfwma", "true",
+ "'Zvfbfwma' (Vector BF16 widening mul-add)",
+ [FeatureStdExtZve32f]>;
+def HasStdExtZvfbfwma : Predicate<"Subtarget->hasStdExtZvfbfwma()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvfbfwma),
+ "'Zvfbfwma' (Vector BF16 widening mul-add)">;
+
def FeatureStdExtZvfh
- : SubtargetFeature<"experimental-zvfh", "HasStdExtZvfh", "true",
+ : SubtargetFeature<"zvfh", "HasStdExtZvfh", "true",
"'Zvfh' (Vector Half-Precision Floating-Point)",
- [FeatureStdExtZve32f]>;
+ [FeatureStdExtZve32f, FeatureStdExtZfhmin]>;
+
+def HasVInstructionsF16 : Predicate<"Subtarget->hasVInstructionsF16()">;
+
+def HasStdExtZfhOrZvfh
+ : Predicate<"Subtarget->hasStdExtZfh() || Subtarget->hasStdExtZvfh()">,
+ AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZvfh),
+ "'Zfh' (Half-Precision Floating-Point) or "
+ "'Zvfh' (Vector Half-Precision Floating-Point)">;
def FeatureStdExtZicbom
: SubtargetFeature<"zicbom", "HasStdExtZicbom", "true",
@@ -437,13 +533,144 @@ def HasStdExtZtso : Predicate<"Subtarget->hasStdExtZTso()">,
AssemblerPredicate<(all_of FeatureStdExtZtso),
"'Ztso' (Memory Model - Total Store Order)">;
-def FeatureStdExtZawrs
- : SubtargetFeature<"experimental-zawrs", "HasStdExtZawrs", "true",
- "'Zawrs' (Wait on Reservation Set)">;
+def FeatureStdExtZawrs : SubtargetFeature<"zawrs", "HasStdExtZawrs", "true",
+ "'Zawrs' (Wait on Reservation Set)">;
def HasStdExtZawrs : Predicate<"Subtarget->hasStdExtZawrs()">,
AssemblerPredicate<(all_of FeatureStdExtZawrs),
"'Zawrs' (Wait on Reservation Set)">;
+def FeatureStdExtZvbb
+ : SubtargetFeature<"experimental-zvbb", "HasStdExtZvbb", "true",
+ "'Zvbb' (Vector Bit-manipulation used in Cryptography)">;
+def HasStdExtZvbb : Predicate<"Subtarget->hasStdExtZvbb()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvbb),
+ "'Zvbb' (Vector Bit-manipulation used in Cryptography)">;
+
+def FeatureStdExtZvbc
+ : SubtargetFeature<"experimental-zvbc", "HasStdExtZvbc", "true",
+ "'Zvbc' (Vector Carryless Multiplication)">;
+def HasStdExtZvbc : Predicate<"Subtarget->hasStdExtZvbc()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvbc),
+ "'Zvbc' (Vector Carryless Multiplication)">;
+
+def FeatureStdExtZvkg
+ : SubtargetFeature<"experimental-zvkg", "HasStdExtZvkg", "true",
+ "'Zvkg' (Vector GCM instructions for Cryptography)">;
+def HasStdExtZvkg : Predicate<"Subtarget->hasStdExtZvkg()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvkg),
+ "'Zvkg' (Vector GCM instructions for Cryptography)">;
+
+def FeatureStdExtZvkn
+ : SubtargetFeature<"experimental-zvkn", "HasStdExtZvkn", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvkned, Zvknhb, Zvbb, Zvbc, and Zvkt.">;
+
+def FeatureStdExtZvknc
+ : SubtargetFeature<"experimental-zvknc", "HasStdExtZvknc", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvkn and Zvbc.">;
+
+def FeatureStdExtZvkned
+ : SubtargetFeature<"experimental-zvkned", "HasStdExtZvkned", "true",
+ "'Zvkned' (Vector AES Encryption & Decryption (Single Round))">;
+def HasStdExtZvkned : Predicate<"Subtarget->hasStdExtZvkned()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvkned),
+ "'Zvkned' (Vector AES Encryption & Decryption (Single Round))">;
+
+def FeatureStdExtZvkng
+ : SubtargetFeature<"experimental-zvkng", "HasStdExtZvkng", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvkn and Zvkg.">;
+
+def FeatureStdExtZvknha
+ : SubtargetFeature<"experimental-zvknha", "HasStdExtZvknha", "true",
+ "'Zvknha' (Vector SHA-2 (SHA-256 only))">;
+
+def FeatureStdExtZvknhb
+ : SubtargetFeature<"experimental-zvknhb", "HasStdExtZvknhb", "true",
+ "'Zvknhb' (Vector SHA-2 (SHA-256 and SHA-512))",
+ [FeatureStdExtZvknha]>;
+def HasStdExtZvknha : Predicate<"Subtarget->hasStdExtZvknha()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvknha),
+ "'Zvknha' (Vector SHA-2 (SHA-256 only))">;
+
+def FeatureStdExtZvks
+ : SubtargetFeature<"experimental-zvks", "HasStdExtZvks", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvksed, Zvksh, Zvbb, Zvbc, and Zvkt.">;
+
+def FeatureStdExtZvksc
+ : SubtargetFeature<"experimental-zvksc", "HasStdExtZvksc", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvks and Zvbc.">;
+
+def FeatureStdExtZvksed
+ : SubtargetFeature<"experimental-zvksed", "HasStdExtZvksed", "true",
+ "'Zvksed' (SM4 Block Cipher Instructions)">;
+def HasStdExtZvksed : Predicate<"Subtarget->hasStdExtZvksed()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvksed),
+ "'Zvksed' (SM4 Block Cipher Instructions)">;
+
+def FeatureStdExtZvksg
+ : SubtargetFeature<"experimental-zvksg", "HasStdExtZvksg", "true",
+ "This extension is shorthand for the following set of "
+ "other extensions: Zvks and Zvkg.">;
+
+def FeatureStdExtZvksh
+ : SubtargetFeature<"experimental-zvksh", "HasStdExtZvksh", "true",
+ "'Zvksh' (SM3 Hash Function Instructions)">;
+def HasStdExtZvksh : Predicate<"Subtarget->hasStdExtZvksh()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvksh),
+ "'Zvksh' (SM3 Hash Function Instructions)">;
+
+def FeatureStdExtZvkt
+ : SubtargetFeature<"experimental-zvkt", "HasStdExtZvkt", "true",
+ "'Zvkt' (Vector Data-Independent Execution Latency)">;
+
+def FeatureStdExtZicond
+ : SubtargetFeature<"experimental-zicond", "HasStdExtZicond", "true",
+ "'Zicond' (Integer Conditional Operations)">;
+def HasStdExtZicond : Predicate<"Subtarget->hasStdExtZicond()">,
+ AssemblerPredicate<(all_of FeatureStdExtZicond),
+ "'Zicond' (Integer Conditional Operations)">;
+
+def FeatureStdExtSmaia
+ : SubtargetFeature<"experimental-smaia", "HasStdExtSmaia", "true",
+ "'Smaia' (Smaia encompasses all added CSRs and all "
+ "modifications to interrupt response behavior that the "
+ "AIA specifies for a hart, over all privilege levels.)",
+ []>;
+
+def FeatureStdExtSsaia
+ : SubtargetFeature<"experimental-ssaia", "HasStdExtSsaia", "true",
+ "'Ssaia' (Ssaia is essentially the same as Smaia except "
+ "excluding the machine-level CSRs and behavior not "
+ "directly visible to supervisor level.)", []>;
+
+def FeatureStdExtZfbfmin
+ : SubtargetFeature<"experimental-zfbfmin", "HasStdExtZfbfmin", "true",
+ "'Zfbfmin' (Scalar BF16 Converts)",
+ [FeatureStdExtF]>;
+def HasStdExtZfbfmin : Predicate<"Subtarget->hasStdExtZfbfmin()">,
+ AssemblerPredicate<(all_of FeatureStdExtZfbfmin),
+ "'Zfbfmin' (Scalar BF16 Converts)">;
+
+def HasHalfFPLoadStoreMove
+ : Predicate<"Subtarget->hasHalfFPLoadStoreMove()">,
+ AssemblerPredicate<(any_of FeatureStdExtZfh, FeatureStdExtZfhmin,
+ FeatureStdExtZfbfmin, FeatureStdExtZvfbfwma),
+ "'Zfh' (Half-Precision Floating-Point) or "
+ "'Zfhmin' (Half-Precision Floating-Point Minimal) or "
+ "'Zfbfmin' (Scalar BF16 Converts) or "
+ "'Zvfbfwma' (Vector BF16 widening mul-add)">;
+
+def FeatureStdExtZacas
+ : SubtargetFeature<"experimental-zacas", "HasStdExtZacas", "true",
+ "'Zacas' (Atomic Compare-And-Swap Instructions)">;
+def HasStdExtZacas : Predicate<"Subtarget->hasStdExtZacas()">,
+ AssemblerPredicate<(all_of FeatureStdExtZacas),
+ "'Zacas' (Atomic Compare-And-Swap Instructions)">;
+
//===----------------------------------------------------------------------===//
// Vendor extensions
//===----------------------------------------------------------------------===//
@@ -455,6 +682,77 @@ def HasVendorXVentanaCondOps : Predicate<"Subtarget->hasVendorXVentanaCondOps()"
AssemblerPredicate<(all_of FeatureVendorXVentanaCondOps),
"'XVentanaCondOps' (Ventana Conditional Ops)">;
+def FeatureVendorXTHeadBa
+ : SubtargetFeature<"xtheadba", "HasVendorXTHeadBa", "true",
+ "'xtheadba' (T-Head address calculation instructions)">;
+def HasVendorXTHeadBa : Predicate<"Subtarget->hasVendorXTHeadBa()">,
+ AssemblerPredicate<(all_of FeatureVendorXTHeadBa),
+ "'xtheadba' (T-Head address calculation instructions)">;
+
+def FeatureVendorXTHeadBb
+ : SubtargetFeature<"xtheadbb", "HasVendorXTHeadBb", "true",
+ "'xtheadbb' (T-Head basic bit-manipulation instructions)">;
+def HasVendorXTHeadBb : Predicate<"Subtarget->hasVendorXTHeadBb()">,
+ AssemblerPredicate<(all_of FeatureVendorXTHeadBb),
+ "'xtheadbb' (T-Head basic bit-manipulation instructions)">;
+
+def FeatureVendorXTHeadBs
+ : SubtargetFeature<"xtheadbs", "HasVendorXTHeadBs", "true",
+ "'xtheadbs' (T-Head single-bit instructions)">;
+def HasVendorXTHeadBs : Predicate<"Subtarget->hasVendorXTHeadBs()">,
+ AssemblerPredicate<(all_of FeatureVendorXTHeadBs),
+ "'xtheadbs' (T-Head single-bit instructions)">;
+
+def FeatureVendorXTHeadCondMov
+ : SubtargetFeature<"xtheadcondmov", "HasVendorXTHeadCondMov", "true",
+ "'xtheadcondmov' (T-Head conditional move instructions)">;
+def HasVendorXTHeadCondMov : Predicate<"Subtarget->hasVendorXTHeadCondMov()">,
+ AssemblerPredicate<(all_of FeatureVendorXTHeadCondMov),
+ "'xtheadcondmov' (T-Head conditional move instructions)">;
+
+def FeatureVendorXTHeadCmo
+ : SubtargetFeature<"xtheadcmo", "HasVendorXTHeadCmo", "true",
+ "'xtheadcmo' (T-Head cache management instructions)">;
+def HasVendorXTHeadCmo : Predicate<"Subtarget->hasVendorXTHeadCmo()">,
+ AssemblerPredicate<(all_of FeatureVendorXTHeadCmo),
+ "'xtheadcmo' (T-Head cache management instructions)">;
+
+def FeatureVendorXTHeadFMemIdx
+ : SubtargetFeature<"xtheadfmemidx", "HasVendorXTHeadFMemIdx", "true",
+ "'xtheadfmemidx' (T-Head FP Indexed Memory Operations)",
+ [FeatureStdExtF]>;
+def HasVendorXTHeadFMemIdx : Predicate<"Subtarget->hasVendorXTHeadFMemIdx()">,
+ AssemblerPredicate<(all_of FeatureVendorXTHeadFMemIdx),
+ "'xtheadfmemidx' (T-Head FP Indexed Memory Operations)">;
+
+def FeatureVendorXTHeadMac
+ : SubtargetFeature<"xtheadmac", "HasVendorXTHeadMac", "true",
+ "'xtheadmac' (T-Head Multiply-Accumulate Instructions)">;
+def HasVendorXTHeadMac : Predicate<"Subtarget->hasVendorXTHeadMac()">,
+ AssemblerPredicate<(all_of FeatureVendorXTHeadMac),
+ "'xtheadmac' (T-Head Multiply-Accumulate Instructions)">;
+
+def FeatureVendorXTHeadMemIdx
+ : SubtargetFeature<"xtheadmemidx", "HasVendorXTHeadMemIdx", "true",
+ "'xtheadmemidx' (T-Head Indexed Memory Operations)">;
+def HasVendorXTHeadMemIdx : Predicate<"Subtarget->hasVendorXTHeadMemIdx()">,
+ AssemblerPredicate<(all_of FeatureVendorXTHeadMemIdx),
+ "'xtheadmemidx' (T-Head Indexed Memory Operations)">;
+
+def FeatureVendorXTHeadMemPair
+ : SubtargetFeature<"xtheadmempair", "HasVendorXTHeadMemPair", "true",
+ "'xtheadmempair' (T-Head two-GPR Memory Operations)">;
+def HasVendorXTHeadMemPair : Predicate<"Subtarget->hasVendorXTHeadMemPair()">,
+ AssemblerPredicate<(all_of FeatureVendorXTHeadMemPair),
+ "'xtheadmempair' (T-Head two-GPR Memory Operations)">;
+
+def FeatureVendorXTHeadSync
+ : SubtargetFeature<"xtheadsync", "HasVendorXTHeadSync", "true",
+ "'xtheadsync' (T-Head multicore synchronization instructions)">;
+def HasVendorXTHeadSync : Predicate<"Subtarget->hasVendorXTHeadSync()">,
+ AssemblerPredicate<(all_of FeatureVendorXTHeadSync),
+ "'xtheadsync' (T-Head multicore synchronization instructions)">;
+
def FeatureVendorXTHeadVdot
: SubtargetFeature<"xtheadvdot", "HasVendorXTHeadVdot", "true",
"'xtheadvdot' (T-Head Vector Extensions for Dot)",
@@ -463,6 +761,35 @@ def HasVendorXTHeadVdot : Predicate<"Subtarget->hasVendorXTHeadVdot()">,
AssemblerPredicate<(all_of FeatureVendorXTHeadVdot),
"'xtheadvdot' (T-Head Vector Extensions for Dot)">;
+def FeatureVendorXSfvcp
+ : SubtargetFeature<"xsfvcp", "HasVendorXSfvcp", "true",
+ "'XSfvcp' (SiFive Custom Vector Coprocessor Interface Instructions)",
+ [FeatureStdExtZve32x]>;
+def HasVendorXSfvcp : Predicate<"Subtarget->hasVendorXSfvcp()">,
+ AssemblerPredicate<(all_of FeatureVendorXSfvcp),
+ "'XSfvcp' (SiFive Custom Vector Coprocessor Interface Instructions)">;
+
+def FeatureVendorXSfcie
+ : SubtargetFeature<"xsfcie", "HasVendorXSfcie", "true",
+ "'XSfcie' (SiFive Custom Instruction Extension SCIE.)">;
+def HasVendorXSfcie : Predicate<"Subtarget->hasVendorXSfcie()">,
+ AssemblerPredicate<(all_of FeatureVendorXSfcie),
+ "'XSfcie' (SiFive Custom Instruction Extension SCIE.)">;
+
+def FeatureVendorXCVbitmanip
+ : SubtargetFeature<"xcvbitmanip", "HasVendorXCVbitmanip", "true",
+ "'XCVbitmanip' (CORE-V Bit Manipulation)">;
+def HasVendorXCVbitmanip : Predicate<"Subtarget->hasVendorXCVbitmanip()">,
+ AssemblerPredicate<(all_of FeatureVendorXCVbitmanip),
+ "'XCVbitmanip' (CORE-V Bit Manipulation)">;
+
+def FeatureVendorXCVmac
+ : SubtargetFeature<"xcvmac", "HasVendorXCVmac", "true",
+ "'XCVmac' (CORE-V Multiply-Accumulate)">;
+def HasVendorXCVmac : Predicate<"Subtarget->hasVendorXCVmac()">,
+ AssemblerPredicate<(all_of FeatureVendorXCVmac),
+ "'XCVmac' (CORE-V Multiply-Accumulate)">;
+
//===----------------------------------------------------------------------===//
// LLVM specific features and extensions
//===----------------------------------------------------------------------===//
@@ -470,9 +797,9 @@ def HasVendorXTHeadVdot : Predicate<"Subtarget->hasVendorXTHeadVdot()">,
// Feature32Bit exists to mark CPUs that support RV32 to distinquish them from
// tuning CPU names.
def Feature32Bit
- : SubtargetFeature<"32bit", "HasRV32", "true", "Implements RV32">;
+ : SubtargetFeature<"32bit", "IsRV32", "true", "Implements RV32">;
def Feature64Bit
- : SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">;
+ : SubtargetFeature<"64bit", "IsRV64", "true", "Implements RV64">;
def IsRV64 : Predicate<"Subtarget->is64Bit()">,
AssemblerPredicate<(all_of Feature64Bit),
"RV64I Base Instruction Set">;
@@ -481,13 +808,13 @@ def IsRV32 : Predicate<"!Subtarget->is64Bit()">,
"RV32I Base Instruction Set">;
defvar RV32 = DefaultMode;
-def RV64 : HwMode<"+64bit">;
+def RV64 : HwMode<"+64bit", [IsRV64]>;
-def FeatureRV32E
- : SubtargetFeature<"e", "IsRV32E", "true",
- "Implements RV32E (provides 16 rather than 32 GPRs)">;
-def IsRV32E : Predicate<"Subtarget->isRV32E()">,
- AssemblerPredicate<(all_of FeatureRV32E)>;
+def FeatureRVE
+ : SubtargetFeature<"e", "IsRVE", "true",
+ "Implements RV{32,64}E (provides 16 rather than 32 GPRs)">;
+def IsRVE : Predicate<"Subtarget->isRVE()">,
+ AssemblerPredicate<(all_of FeatureRVE)>;
def FeatureRelax
: SubtargetFeature<"relax", "EnableLinkerRelax", "true",
@@ -501,16 +828,33 @@ foreach i = {1-31} in
def FeatureSaveRestore : SubtargetFeature<"save-restore", "EnableSaveRestore",
"true", "Enable save/restore.">;
+def FeatureTrailingSeqCstFence : SubtargetFeature<"seq-cst-trailing-fence",
+ "EnableSeqCstTrailingFence",
+ "true",
+ "Enable trailing fence for seq-cst store.">;
+
def FeatureUnalignedScalarMem
: SubtargetFeature<"unaligned-scalar-mem", "EnableUnalignedScalarMem",
"true", "Has reasonably performant unaligned scalar "
"loads and stores">;
+def FeatureUnalignedVectorMem
+ : SubtargetFeature<"unaligned-vector-mem", "EnableUnalignedVectorMem",
+ "true", "Has reasonably performant unaligned vector "
+ "loads and stores">;
+
def TuneNoOptimizedZeroStrideLoad
: SubtargetFeature<"no-optimized-zero-stride-load", "HasOptimizedZeroStrideLoad",
"false", "Hasn't optimized (perform fewer memory operations)"
"zero-stride vector load">;
+// Some vector hardware implementations do not process all VLEN bits in parallel
+// and instead split over multiple cycles. DLEN refers to the datapath width
+// that can be done in parallel.
+def TuneDLenFactor2
+ : SubtargetFeature<"dlen-factor-2", "DLenFactor2", "true",
+ "Vector unit DLEN(data path width) is half of VLEN">;
+
def TuneLUIADDIFusion
: SubtargetFeature<"lui-addi-fusion", "HasLUIADDIFusion",
"true", "Enable LUI+ADDI macrofusion">;
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index bb55c16bf135..ca2d9474d1ed 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVFrameLowering.cpp - RISCV Frame Information ------------------===//
+//===-- RISCVFrameLowering.cpp - RISC-V Frame Information -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the RISCV implementation of TargetFrameLowering class.
+// This file contains the RISC-V implementation of TargetFrameLowering class.
//
//===----------------------------------------------------------------------===//
@@ -27,8 +27,13 @@
using namespace llvm;
-// For now we use x18, a.k.a s2, as pointer to shadow call stack.
-// User should explicitly set -ffixed-x18 and not use x18 in their asm.
+static const Register AllPopRegs[] = {
+ RISCV::X1, RISCV::X8, RISCV::X9, RISCV::X18, RISCV::X19,
+ RISCV::X20, RISCV::X21, RISCV::X22, RISCV::X23, RISCV::X24,
+ RISCV::X25, RISCV::X26, RISCV::X27};
+
+// For now we use x3, a.k.a gp, as pointer to shadow call stack.
+// User should not use x3 in their asm.
static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL) {
@@ -36,7 +41,8 @@ static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
return;
const auto &STI = MF.getSubtarget<RISCVSubtarget>();
- Register RAReg = STI.getRegisterInfo()->getRARegister();
+ const llvm::RISCVRegisterInfo *TRI = STI.getRegisterInfo();
+ Register RAReg = TRI->getRARegister();
// Do not save RA to the SCS if it's not saved to the regular stack,
// i.e. RA is not at risk of being overwritten.
@@ -47,37 +53,42 @@ static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
Register SCSPReg = RISCVABI::getSCSPReg();
- auto &Ctx = MF.getFunction().getContext();
- if (!STI.isRegisterReservedByUser(SCSPReg)) {
- Ctx.diagnose(DiagnosticInfoUnsupported{
- MF.getFunction(), "x18 not reserved by user for Shadow Call Stack."});
- return;
- }
-
- const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
- if (RVFI->useSaveRestoreLibCalls(MF)) {
- Ctx.diagnose(DiagnosticInfoUnsupported{
- MF.getFunction(),
- "Shadow Call Stack cannot be combined with Save/Restore LibCalls."});
- return;
- }
-
const RISCVInstrInfo *TII = STI.getInstrInfo();
bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
int64_t SlotSize = STI.getXLen() / 8;
// Store return address to shadow call stack
- // s[w|d] ra, 0(s2)
- // addi s2, s2, [4|8]
- BuildMI(MBB, MI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
- .addReg(RAReg)
- .addReg(SCSPReg)
- .addImm(0)
- .setMIFlag(MachineInstr::FrameSetup);
+ // addi gp, gp, [4|8]
+ // s[w|d] ra, -[4|8](gp)
BuildMI(MBB, MI, DL, TII->get(RISCV::ADDI))
.addReg(SCSPReg, RegState::Define)
.addReg(SCSPReg)
.addImm(SlotSize)
.setMIFlag(MachineInstr::FrameSetup);
+ BuildMI(MBB, MI, DL, TII->get(IsRV64 ? RISCV::SD : RISCV::SW))
+ .addReg(RAReg)
+ .addReg(SCSPReg)
+ .addImm(-SlotSize)
+ .setMIFlag(MachineInstr::FrameSetup);
+
+ // Emit a CFI instruction that causes SlotSize to be subtracted from the value
+ // of the shadow stack pointer when unwinding past this frame.
+ char DwarfSCSReg = TRI->getDwarfRegNum(SCSPReg, /*IsEH*/ true);
+ assert(DwarfSCSReg < 32 && "SCS Register should be < 32 (X3).");
+
+ char Offset = static_cast<char>(-SlotSize) & 0x7f;
+ const char CFIInst[] = {
+ dwarf::DW_CFA_val_expression,
+ DwarfSCSReg, // register
+ 2, // length
+ static_cast<char>(unsigned(dwarf::DW_OP_breg0 + DwarfSCSReg)),
+ Offset, // addend (sleb128)
+ };
+
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
+ nullptr, StringRef(CFIInst, sizeof(CFIInst))));
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlag(MachineInstr::FrameSetup);
}
static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -97,27 +108,12 @@ static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB,
Register SCSPReg = RISCVABI::getSCSPReg();
- auto &Ctx = MF.getFunction().getContext();
- if (!STI.isRegisterReservedByUser(SCSPReg)) {
- Ctx.diagnose(DiagnosticInfoUnsupported{
- MF.getFunction(), "x18 not reserved by user for Shadow Call Stack."});
- return;
- }
-
- const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
- if (RVFI->useSaveRestoreLibCalls(MF)) {
- Ctx.diagnose(DiagnosticInfoUnsupported{
- MF.getFunction(),
- "Shadow Call Stack cannot be combined with Save/Restore LibCalls."});
- return;
- }
-
const RISCVInstrInfo *TII = STI.getInstrInfo();
bool IsRV64 = STI.hasFeature(RISCV::Feature64Bit);
int64_t SlotSize = STI.getXLen() / 8;
// Load return address from shadow call stack
- // l[w|d] ra, -[4|8](s2)
- // addi s2, s2, -[4|8]
+ // l[w|d] ra, -[4|8](gp)
+ // addi gp, gp, -[4|8]
BuildMI(MBB, MI, DL, TII->get(IsRV64 ? RISCV::LD : RISCV::LW))
.addReg(RAReg, RegState::Define)
.addReg(SCSPReg)
@@ -128,6 +124,12 @@ static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB,
.addReg(SCSPReg)
.addImm(-SlotSize)
.setMIFlag(MachineInstr::FrameDestroy);
+ // Restore the SCS pointer
+ unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestore(
+ nullptr, STI.getRegisterInfo()->getDwarfRegNum(SCSPReg, /*IsEH*/ true)));
+ BuildMI(MBB, MI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex)
+ .setMIFlags(MachineInstr::FrameDestroy);
}
// Get the ID of the libcall used for spilling and restoring callee saved
@@ -224,6 +226,71 @@ getRestoreLibCallName(const MachineFunction &MF,
return RestoreLibCalls[LibCallID];
}
+// Return encoded value for PUSH/POP instruction, representing
+// registers to store/load.
+static unsigned getPushPopEncoding(const Register MaxReg) {
+ switch (MaxReg) {
+ default:
+ llvm_unreachable("Unexpected Reg for Push/Pop Inst");
+ case RISCV::X27: /*s11*/
+ case RISCV::X26: /*s10*/
+ return llvm::RISCVZC::RLISTENCODE::RA_S0_S11;
+ case RISCV::X25: /*s9*/
+ return llvm::RISCVZC::RLISTENCODE::RA_S0_S9;
+ case RISCV::X24: /*s8*/
+ return llvm::RISCVZC::RLISTENCODE::RA_S0_S8;
+ case RISCV::X23: /*s7*/
+ return llvm::RISCVZC::RLISTENCODE::RA_S0_S7;
+ case RISCV::X22: /*s6*/
+ return llvm::RISCVZC::RLISTENCODE::RA_S0_S6;
+ case RISCV::X21: /*s5*/
+ return llvm::RISCVZC::RLISTENCODE::RA_S0_S5;
+ case RISCV::X20: /*s4*/
+ return llvm::RISCVZC::RLISTENCODE::RA_S0_S4;
+ case RISCV::X19: /*s3*/
+ return llvm::RISCVZC::RLISTENCODE::RA_S0_S3;
+ case RISCV::X18: /*s2*/
+ return llvm::RISCVZC::RLISTENCODE::RA_S0_S2;
+ case RISCV::X9: /*s1*/
+ return llvm::RISCVZC::RLISTENCODE::RA_S0_S1;
+ case RISCV::X8: /*s0*/
+ return llvm::RISCVZC::RLISTENCODE::RA_S0;
+ case RISCV::X1: /*ra*/
+ return llvm::RISCVZC::RLISTENCODE::RA;
+ }
+}
+
+// Get the max reg of Push/Pop for restoring callee saved registers.
+static Register getMaxPushPopReg(const MachineFunction &MF,
+ const std::vector<CalleeSavedInfo> &CSI,
+ unsigned &PushPopRegs) {
+ Register MaxPushPopReg = RISCV::NoRegister;
+ PushPopRegs = 0;
+ for (auto &CS : CSI) {
+ Register Reg = CS.getReg();
+ if (RISCV::PGPRRegClass.contains(Reg)) {
+ MaxPushPopReg = std::max(MaxPushPopReg.id(), Reg.id());
+ PushPopRegs += 1;
+ }
+ }
+ // if rlist is {rs, s0-s10}, then s11 will also be included
+ if (MaxPushPopReg == RISCV::X26) {
+ MaxPushPopReg = RISCV::X27;
+ PushPopRegs = 13;
+ }
+ return MaxPushPopReg;
+}
+
+static uint64_t adjSPInPushPop(MachineBasicBlock::iterator MBBI,
+ unsigned RequiredStack, unsigned FreePushStack,
+ bool IsPop) {
+ if (FreePushStack > RequiredStack)
+ RequiredStack = 0;
+ unsigned Spimm = std::min(RequiredStack, 48u);
+ MBBI->getOperand(1).setImm(Spimm);
+ return alignTo(RequiredStack - Spimm, 16);
+}
+
// Return true if the specified function should have a dedicated frame
// pointer register. This is true if frame pointer elimination is
// disabled, if it needs dynamic stack realignment, if the function has
@@ -298,8 +365,8 @@ static Register getFPReg(const RISCVSubtarget &STI) { return RISCV::X8; }
static Register getSPReg(const RISCVSubtarget &STI) { return RISCV::X2; }
static SmallVector<CalleeSavedInfo, 8>
-getNonLibcallCSI(const MachineFunction &MF,
- const std::vector<CalleeSavedInfo> &CSI) {
+getUnmanagedCSI(const MachineFunction &MF,
+ const std::vector<CalleeSavedInfo> &CSI) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
SmallVector<CalleeSavedInfo, 8> NonLibcallCSI;
@@ -387,7 +454,7 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
DefCfaExpr.append(Expr.str());
- return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(),
+ return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
Comment.str());
}
@@ -415,6 +482,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// Emit prologue for shadow call stack.
emitSCSPrologue(MF, MBB, MBBI, DL);
+ auto FirstFrameSetup = MBBI;
+
// Since spillCalleeSavedRegisters may have inserted a libcall, skip past
// any instructions marked as FrameSetup
while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
@@ -451,7 +520,8 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// FIXME (note copied from Lanai): This appears to be overallocating. Needs
// investigation. Get the number of bytes to allocate from the FrameInfo.
uint64_t StackSize = getStackSizeWithRVVPadding(MF);
- uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize();
+ uint64_t RealStackSize =
+ StackSize + RVFI->getLibCallStackSize() + RVFI->getRVPushStackSize();
uint64_t RVVStackSize = RVFI->getRVVStackSize();
// Early exit if there is no need to allocate on the stack
@@ -471,9 +541,21 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
RealStackSize = FirstSPAdjustAmount;
}
- // Allocate space on the stack if necessary.
- RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(-StackSize),
- MachineInstr::FrameSetup, getStackAlign());
+ if (RVFI->isPushable(MF) && FirstFrameSetup->getOpcode() == RISCV::CM_PUSH) {
+ // Use available stack adjustment in push instruction to allocate additional
+ // stack space.
+ unsigned PushStack = RVFI->getRVPushRegs() * (STI.getXLen() / 8);
+ unsigned SpImmBase = RVFI->getRVPushStackSize();
+ StackSize = adjSPInPushPop(FirstFrameSetup, StackSize,
+ (SpImmBase - PushStack), true);
+ }
+
+ if (StackSize != 0) {
+ // Allocate space on the stack if necessary.
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg,
+ StackOffset::getFixed(-StackSize), MachineInstr::FrameSetup,
+ getStackAlign());
+ }
// Emit ".cfi_def_cfa_offset RealStackSize"
unsigned CFIIndex = MF.addFrameInst(
@@ -490,7 +572,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// to the stack, not before.
// FIXME: assumes exactly one instruction is used to save each callee-saved
// register.
- std::advance(MBBI, getNonLibcallCSI(MF, CSI).size());
+ std::advance(MBBI, getUnmanagedCSI(MF, CSI).size());
// Iterate over list of callee-saved registers and emit .cfi_offset
// directives.
@@ -637,7 +719,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
--MBBI;
}
- const auto &CSI = getNonLibcallCSI(MF, MFI.getCalleeSavedInfo());
+ const auto &CSI = getUnmanagedCSI(MF, MFI.getCalleeSavedInfo());
// Skip to before the restores of callee-saved registers
// FIXME: assumes exactly one instruction is used to restore each
@@ -647,7 +729,8 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
LastFrameDestroy = std::prev(MBBI, CSI.size());
uint64_t StackSize = getStackSizeWithRVVPadding(MF);
- uint64_t RealStackSize = StackSize + RVFI->getLibCallStackSize();
+ uint64_t RealStackSize =
+ StackSize + RVFI->getLibCallStackSize() + RVFI->getRVPushStackSize();
uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
uint64_t RVVStackSize = RVFI->getRVVStackSize();
@@ -688,9 +771,19 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
if (FirstSPAdjustAmount)
StackSize = FirstSPAdjustAmount;
+ if (RVFI->isPushable(MF) && MBBI->getOpcode() == RISCV::CM_POP) {
+ // Use available stack adjustment in pop instruction to deallocate stack
+ // space.
+ unsigned PushStack = RVFI->getRVPushRegs() * (STI.getXLen() / 8);
+ unsigned SpImmBase = RVFI->getRVPushStackSize();
+ StackSize = adjSPInPushPop(MBBI, StackSize, (SpImmBase - PushStack), true);
+ }
+
// Deallocate stack
- RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
- MachineInstr::FrameDestroy, getStackAlign());
+ if (StackSize != 0) {
+ RI->adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed(StackSize),
+ MachineInstr::FrameDestroy, getStackAlign());
+ }
// Emit epilogue for shadow call stack.
emitSCSEpilogue(MF, MBB, MBBI, DL);
@@ -706,7 +799,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
// Callee-saved registers should be referenced relative to the stack
// pointer (positive offset), otherwise use the frame pointer (negative
// offset).
- const auto &CSI = getNonLibcallCSI(MF, MFI.getCalleeSavedInfo());
+ const auto &CSI = getUnmanagedCSI(MF, MFI.getCalleeSavedInfo());
int MinCSFI = 0;
int MaxCSFI = -1;
StackOffset Offset;
@@ -855,7 +948,8 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
assert(!RI->hasStackRealignment(MF) &&
"Can't index across variable sized realign");
Offset += StackOffset::get(getStackSizeWithRVVPadding(MF) +
- RVFI->getLibCallStackSize(),
+ RVFI->getLibCallStackSize() +
+ RVFI->getRVPushStackSize(),
RVFI->getRVVStackSize());
} else {
Offset += StackOffset::getFixed(MFI.getStackSize());
@@ -863,9 +957,10 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
} else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
// Ensure the base of the RVV stack is correctly aligned: add on the
// alignment padding.
- int ScalarLocalVarSize =
- MFI.getStackSize() - RVFI->getCalleeSavedStackSize() -
- RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding();
+ int ScalarLocalVarSize = MFI.getStackSize() -
+ RVFI->getCalleeSavedStackSize() -
+ RVFI->getRVPushStackSize() -
+ RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding();
Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize());
}
return Offset;
@@ -1121,7 +1216,8 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
RVFI->setBranchRelaxationScratchFrameIndex(FI);
}
- if (MFI.getCalleeSavedInfo().empty() || RVFI->useSaveRestoreLibCalls(MF)) {
+ if (MFI.getCalleeSavedInfo().empty() || RVFI->useSaveRestoreLibCalls(MF) ||
+ RVFI->isPushable(MF)) {
RVFI->setCalleeSavedStackSize(0);
return;
}
@@ -1197,7 +1293,7 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
// Disable SplitSPAdjust if save-restore libcall is used. The callee-saved
// registers will be pushed by the save-restore libcalls, so we don't have to
// split the SP adjustment in this case.
- if (RVFI->getLibCallStackSize())
+ if (RVFI->getLibCallStackSize() || RVFI->getRVPushStackSize())
return 0;
// Return the FirstSPAdjustAmount if the StackSize can not fit in a signed
@@ -1226,8 +1322,28 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
if (MI != MBB.end() && !MI->isDebugInstr())
DL = MI->getDebugLoc();
- const char *SpillLibCall = getSpillLibCallName(*MF, CSI);
- if (SpillLibCall) {
+ // Emit CM.PUSH with base SPimm & evaluate Push stack
+ RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
+ if (RVFI->isPushable(*MF)) {
+ unsigned PushPopRegs = 0;
+ Register MaxReg = getMaxPushPopReg(*MF, CSI, PushPopRegs);
+ RVFI->setRVPushRegs(PushPopRegs);
+ RVFI->setRVPushStackSize(alignTo((STI.getXLen() / 8) * PushPopRegs, 16));
+
+ if (MaxReg != RISCV::NoRegister) {
+ // Use encoded number to represent registers to spill.
+ unsigned RegEnc = getPushPopEncoding(MaxReg);
+ RVFI->setRVPushRlist(RegEnc);
+ MachineInstrBuilder PushBuilder =
+ BuildMI(MBB, MI, DL, TII.get(RISCV::CM_PUSH))
+ .setMIFlag(MachineInstr::FrameSetup);
+ PushBuilder.addImm((int64_t)RegEnc);
+ PushBuilder.addImm(0);
+
+ for (unsigned i = 0; i < PushPopRegs; i++)
+ PushBuilder.addUse(AllPopRegs[i], RegState::Implicit);
+ }
+ } else if (const char *SpillLibCall = getSpillLibCallName(*MF, CSI)) {
// Add spill libcall via non-callee-saved register t0.
BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoCALLReg), RISCV::X5)
.addExternalSymbol(SpillLibCall, RISCVII::MO_CALL)
@@ -1238,9 +1354,9 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
MBB.addLiveIn(CS.getReg());
}
- // Manually spill values not spilled by libcall.
- const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI);
- for (auto &CS : NonLibcallCSI) {
+ // Manually spill values not spilled by libcall & Push/Pop.
+ const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI);
+ for (auto &CS : UnmanagedCSI) {
// Insert the spill to the stack frame.
Register Reg = CS.getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
@@ -1263,14 +1379,14 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
if (MI != MBB.end() && !MI->isDebugInstr())
DL = MI->getDebugLoc();
- // Manually restore values not restored by libcall.
+ // Manually restore values not restored by libcall & Push/Pop.
// Keep the same order as in the prologue. There is no need to reverse the
// order in the epilogue. In addition, the return address will be restored
// first in the epilogue. It increases the opportunity to avoid the
// load-to-use data hazard between loading RA and return by RA.
// loadRegFromStackSlot can insert multiple instructions.
- const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI);
- for (auto &CS : NonLibcallCSI) {
+ const auto &UnmanagedCSI = getUnmanagedCSI(*MF, CSI);
+ for (auto &CS : UnmanagedCSI) {
Register Reg = CS.getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI,
@@ -1278,22 +1394,37 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
}
- const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI);
- if (RestoreLibCall) {
- // Add restore libcall via tail call.
- MachineBasicBlock::iterator NewMI =
- BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL))
- .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL)
- .setMIFlag(MachineInstr::FrameDestroy);
-
- // Remove trailing returns, since the terminator is now a tail call to the
- // restore function.
- if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) {
- NewMI->copyImplicitOps(*MF, *MI);
- MI->eraseFromParent();
+ RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
+ if (RVFI->isPushable(*MF)) {
+ int RegEnc = RVFI->getRVPushRlist();
+ if (RegEnc != llvm::RISCVZC::RLISTENCODE::INVALID_RLIST) {
+ MachineInstrBuilder PopBuilder =
+ BuildMI(MBB, MI, DL, TII.get(RISCV::CM_POP))
+ .setMIFlag(MachineInstr::FrameDestroy);
+ // Use encoded number to represent registers to restore.
+ PopBuilder.addImm(RegEnc);
+ PopBuilder.addImm(0);
+
+ for (unsigned i = 0; i < RVFI->getRVPushRegs(); i++)
+ PopBuilder.addDef(AllPopRegs[i], RegState::ImplicitDefine);
+ }
+ } else {
+ const char *RestoreLibCall = getRestoreLibCallName(*MF, CSI);
+ if (RestoreLibCall) {
+ // Add restore libcall via tail call.
+ MachineBasicBlock::iterator NewMI =
+ BuildMI(MBB, MI, DL, TII.get(RISCV::PseudoTAIL))
+ .addExternalSymbol(RestoreLibCall, RISCVII::MO_CALL)
+ .setMIFlag(MachineInstr::FrameDestroy);
+
+ // Remove trailing returns, since the terminator is now a tail call to the
+ // restore function.
+ if (MI != MBB.end() && MI->getOpcode() == RISCV::PseudoRET) {
+ NewMI->copyImplicitOps(*MF, *MI);
+ MI->eraseFromParent();
+ }
}
}
-
return true;
}
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index bf6c1a652629..79adc83e8d65 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -1,4 +1,4 @@
-//===-- RISCVFrameLowering.h - Define frame lowering for RISCV -*- C++ -*--===//
+//===-- RISCVFrameLowering.h - Define frame lowering for RISC-V -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This class implements RISCV-specific bits of TargetFrameLowering class.
+// This class implements RISC-V specific bits of TargetFrameLowering class.
//
//===----------------------------------------------------------------------===//
@@ -74,7 +74,7 @@ public:
TargetStackID::Value getStackIDForScalableVectors() const override;
bool isStackIdSafeForLocalArea(unsigned StackId) const override {
- // We don't support putting RISCV Vector objects into the pre-allocated
+ // We don't support putting RISC-V Vector objects into the pre-allocated
// local frame block at the moment.
return StackId != TargetStackID::ScalableVector;
}
diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
index de627983b538..b9c69a966b4a 100644
--- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -7,12 +7,13 @@
//===----------------------------------------------------------------------===//
//
// This pass custom lowers llvm.gather and llvm.scatter instructions to
-// RISCV intrinsics.
+// RISC-V intrinsics.
//
//===----------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVTargetMachine.h"
+#include "llvm/Analysis/InstSimplifyFolder.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -59,21 +60,19 @@ public:
}
StringRef getPassName() const override {
- return "RISCV gather/scatter lowering";
+ return "RISC-V gather/scatter lowering";
}
private:
- bool isLegalTypeAndAlignment(Type *DataType, Value *AlignOp);
-
bool tryCreateStridedLoadStore(IntrinsicInst *II, Type *DataType, Value *Ptr,
Value *AlignOp);
std::pair<Value *, Value *> determineBaseAndStride(GetElementPtrInst *GEP,
- IRBuilder<> &Builder);
+ IRBuilderBase &Builder);
bool matchStridedRecurrence(Value *Index, Loop *L, Value *&Stride,
PHINode *&BasePtr, BinaryOperator *&Inc,
- IRBuilder<> &Builder);
+ IRBuilderBase &Builder);
};
} // end anonymous namespace
@@ -81,32 +80,17 @@ private:
char RISCVGatherScatterLowering::ID = 0;
INITIALIZE_PASS(RISCVGatherScatterLowering, DEBUG_TYPE,
- "RISCV gather/scatter lowering pass", false, false)
+ "RISC-V gather/scatter lowering pass", false, false)
FunctionPass *llvm::createRISCVGatherScatterLoweringPass() {
return new RISCVGatherScatterLowering();
}
-bool RISCVGatherScatterLowering::isLegalTypeAndAlignment(Type *DataType,
- Value *AlignOp) {
- Type *ScalarType = DataType->getScalarType();
- if (!TLI->isLegalElementTypeForRVV(ScalarType))
- return false;
-
- MaybeAlign MA = cast<ConstantInt>(AlignOp)->getMaybeAlignValue();
- if (MA && MA->value() < DL->getTypeStoreSize(ScalarType).getFixedValue())
- return false;
-
- // FIXME: Let the backend type legalize by splitting/widening?
- EVT DataVT = TLI->getValueType(*DL, DataType);
- if (!TLI->isTypeLegal(DataVT))
- return false;
-
- return true;
-}
-
// TODO: Should we consider the mask when looking for a stride?
static std::pair<Value *, Value *> matchStridedConstant(Constant *StartC) {
+ if (!isa<FixedVectorType>(StartC->getType()))
+ return std::make_pair(nullptr, nullptr);
+
unsigned NumElts = cast<FixedVectorType>(StartC->getType())->getNumElements();
// Check that the start value is a strided constant.
@@ -136,7 +120,7 @@ static std::pair<Value *, Value *> matchStridedConstant(Constant *StartC) {
}
static std::pair<Value *, Value *> matchStridedStart(Value *Start,
- IRBuilder<> &Builder) {
+ IRBuilderBase &Builder) {
// Base case, start is a strided constant.
auto *StartC = dyn_cast<Constant>(Start);
if (StartC)
@@ -148,17 +132,20 @@ static std::pair<Value *, Value *> matchStridedStart(Value *Start,
return std::make_pair(ConstantInt::get(Ty, 0), ConstantInt::get(Ty, 1));
}
- // Not a constant, maybe it's a strided constant with a splat added to it.
+ // Not a constant, maybe it's a strided constant with a splat added or
+ // multipled.
auto *BO = dyn_cast<BinaryOperator>(Start);
- if (!BO || BO->getOpcode() != Instruction::Add)
+ if (!BO || (BO->getOpcode() != Instruction::Add &&
+ BO->getOpcode() != Instruction::Shl &&
+ BO->getOpcode() != Instruction::Mul))
return std::make_pair(nullptr, nullptr);
// Look for an operand that is splatted.
- unsigned OtherIndex = 1;
- Value *Splat = getSplatValue(BO->getOperand(0));
- if (!Splat) {
- Splat = getSplatValue(BO->getOperand(1));
- OtherIndex = 0;
+ unsigned OtherIndex = 0;
+ Value *Splat = getSplatValue(BO->getOperand(1));
+ if (!Splat && Instruction::isCommutative(BO->getOpcode())) {
+ Splat = getSplatValue(BO->getOperand(0));
+ OtherIndex = 1;
}
if (!Splat)
return std::make_pair(nullptr, nullptr);
@@ -169,10 +156,26 @@ static std::pair<Value *, Value *> matchStridedStart(Value *Start,
if (!Start)
return std::make_pair(nullptr, nullptr);
- // Add the splat value to the start.
Builder.SetInsertPoint(BO);
Builder.SetCurrentDebugLocation(DebugLoc());
- Start = Builder.CreateAdd(Start, Splat);
+ // Add the splat value to the start or multiply the start and stride by the
+ // splat.
+ switch (BO->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case Instruction::Add:
+ Start = Builder.CreateAdd(Start, Splat);
+ break;
+ case Instruction::Mul:
+ Start = Builder.CreateMul(Start, Splat);
+ Stride = Builder.CreateMul(Stride, Splat);
+ break;
+ case Instruction::Shl:
+ Start = Builder.CreateShl(Start, Splat);
+ Stride = Builder.CreateShl(Stride, Splat);
+ break;
+ }
+
return std::make_pair(Start, Stride);
}
@@ -184,7 +187,7 @@ bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L,
Value *&Stride,
PHINode *&BasePtr,
BinaryOperator *&Inc,
- IRBuilder<> &Builder) {
+ IRBuilderBase &Builder) {
// Our base case is a Phi.
if (auto *Phi = dyn_cast<PHINode>(Index)) {
// A phi node we want to perform this function on should be from the
@@ -233,20 +236,21 @@ bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L,
if (!BO)
return false;
- if (BO->getOpcode() != Instruction::Add &&
- BO->getOpcode() != Instruction::Or &&
- BO->getOpcode() != Instruction::Mul &&
- BO->getOpcode() != Instruction::Shl)
- return false;
-
- // Only support shift by constant.
- if (BO->getOpcode() == Instruction::Shl && !isa<Constant>(BO->getOperand(1)))
- return false;
-
- // We need to be able to treat Or as Add.
- if (BO->getOpcode() == Instruction::Or &&
- !haveNoCommonBitsSet(BO->getOperand(0), BO->getOperand(1), *DL))
+ switch (BO->getOpcode()) {
+ default:
return false;
+ case Instruction::Or:
+ // We need to be able to treat Or as Add.
+ if (!haveNoCommonBitsSet(BO->getOperand(0), BO->getOperand(1), *DL))
+ return false;
+ break;
+ case Instruction::Add:
+ break;
+ case Instruction::Shl:
+ break;
+ case Instruction::Mul:
+ break;
+ }
// We should have one operand in the loop and one splat.
Value *OtherOp;
@@ -255,7 +259,8 @@ bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L,
Index = cast<Instruction>(BO->getOperand(0));
OtherOp = BO->getOperand(1);
} else if (isa<Instruction>(BO->getOperand(1)) &&
- L->contains(cast<Instruction>(BO->getOperand(1)))) {
+ L->contains(cast<Instruction>(BO->getOperand(1))) &&
+ Instruction::isCommutative(BO->getOpcode())) {
Index = cast<Instruction>(BO->getOperand(1));
OtherOp = BO->getOperand(0);
} else {
@@ -293,49 +298,31 @@ bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L,
case Instruction::Or: {
// An add only affects the start value. It's ok to do this for Or because
// we already checked that there are no common set bits.
-
- // If the start value is Zero, just take the SplatOp.
- if (isa<ConstantInt>(Start) && cast<ConstantInt>(Start)->isZero())
- Start = SplatOp;
- else
- Start = Builder.CreateAdd(Start, SplatOp, "start");
- BasePtr->setIncomingValue(StartBlock, Start);
+ Start = Builder.CreateAdd(Start, SplatOp, "start");
break;
}
case Instruction::Mul: {
- // If the start is zero we don't need to multiply.
- if (!isa<ConstantInt>(Start) || !cast<ConstantInt>(Start)->isZero())
- Start = Builder.CreateMul(Start, SplatOp, "start");
-
+ Start = Builder.CreateMul(Start, SplatOp, "start");
Step = Builder.CreateMul(Step, SplatOp, "step");
-
- // If the Stride is 1 just take the SplatOpt.
- if (isa<ConstantInt>(Stride) && cast<ConstantInt>(Stride)->isOne())
- Stride = SplatOp;
- else
- Stride = Builder.CreateMul(Stride, SplatOp, "stride");
- Inc->setOperand(StepIndex, Step);
- BasePtr->setIncomingValue(StartBlock, Start);
+ Stride = Builder.CreateMul(Stride, SplatOp, "stride");
break;
}
case Instruction::Shl: {
- // If the start is zero we don't need to shift.
- if (!isa<ConstantInt>(Start) || !cast<ConstantInt>(Start)->isZero())
- Start = Builder.CreateShl(Start, SplatOp, "start");
+ Start = Builder.CreateShl(Start, SplatOp, "start");
Step = Builder.CreateShl(Step, SplatOp, "step");
Stride = Builder.CreateShl(Stride, SplatOp, "stride");
- Inc->setOperand(StepIndex, Step);
- BasePtr->setIncomingValue(StartBlock, Start);
break;
}
}
+ Inc->setOperand(StepIndex, Step);
+ BasePtr->setIncomingValue(StartBlock, Start);
return true;
}
std::pair<Value *, Value *>
RISCVGatherScatterLowering::determineBaseAndStride(GetElementPtrInst *GEP,
- IRBuilder<> &Builder) {
+ IRBuilderBase &Builder) {
auto I = StridedAddrs.find(GEP);
if (I != StridedAddrs.end())
@@ -452,7 +439,13 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II,
Value *Ptr,
Value *AlignOp) {
// Make sure the operation will be supported by the backend.
- if (!isLegalTypeAndAlignment(DataType, AlignOp))
+ MaybeAlign MA = cast<ConstantInt>(AlignOp)->getMaybeAlignValue();
+ EVT DataTypeVT = TLI->getValueType(*DL, DataType);
+ if (!MA || !TLI->isLegalStridedLoadStore(DataTypeVT, *MA))
+ return false;
+
+ // FIXME: Let the backend type legalize by splitting/widening?
+ if (!TLI->isTypeLegal(DataTypeVT))
return false;
// Pointer should be a GEP.
@@ -460,7 +453,9 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II,
if (!GEP)
return false;
- IRBuilder<> Builder(GEP);
+ LLVMContext &Ctx = GEP->getContext();
+ IRBuilder<InstSimplifyFolder> Builder(Ctx, *DL);
+ Builder.SetInsertPoint(GEP);
Value *BasePtr, *Stride;
std::tie(BasePtr, Stride) = determineBaseAndStride(GEP, Builder);
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 28244728f656..cafce628cf6a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISCV ------===//
+//===-- RISCVISelDAGToDAG.cpp - A dag to dag inst selector for RISC-V -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,11 +6,12 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines an instruction selector for the RISCV target.
+// This file defines an instruction selector for the RISC-V target.
//
//===----------------------------------------------------------------------===//
#include "RISCVISelDAGToDAG.h"
+#include "MCTargetDesc/RISCVBaseInfo.h"
#include "MCTargetDesc/RISCVMCTargetDesc.h"
#include "MCTargetDesc/RISCVMatInt.h"
#include "RISCVISelLowering.h"
@@ -26,7 +27,7 @@
using namespace llvm;
#define DEBUG_TYPE "riscv-isel"
-#define PASS_NAME "RISCV DAG->DAG Pattern Instruction Selection"
+#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
namespace llvm::RISCV {
#define GET_RISCVVSSEGTable_IMPL
@@ -41,22 +42,6 @@ namespace llvm::RISCV {
#include "RISCVGenSearchableTables.inc"
} // namespace llvm::RISCV
-static unsigned getLastNonGlueOrChainOpIdx(const SDNode *Node) {
- assert(Node->getNumOperands() > 0 && "Node with no operands");
- unsigned LastOpIdx = Node->getNumOperands() - 1;
- if (Node->getOperand(LastOpIdx).getValueType() == MVT::Glue)
- --LastOpIdx;
- if (Node->getOperand(LastOpIdx).getValueType() == MVT::Other)
- --LastOpIdx;
- return LastOpIdx;
-}
-
-static unsigned getVecPolicyOpIdx(const SDNode *Node, const MCInstrDesc &MCID) {
- assert(RISCVII::hasVecPolicyOp(MCID.TSFlags));
- (void)MCID;
- return getLastNonGlueOrChainOpIdx(Node);
-}
-
void RISCVDAGToDAGISel::PreprocessISelDAG() {
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
@@ -94,17 +79,13 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
"Unexpected VTs!");
MachineFunction &MF = CurDAG->getMachineFunction();
- RISCVMachineFunctionInfo *FuncInfo =
- MF.getInfo<RISCVMachineFunctionInfo>();
SDLoc DL(N);
- // We use the same frame index we use for moving two i32s into 64-bit FPR.
- // This is an analogous operation.
- int FI = FuncInfo->getMoveF64FrameIndex(MF);
- MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
- const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
+ // Create temporary stack for each expanding node.
SDValue StackSlot =
- CurDAG->getFrameIndex(FI, TLI.getPointerTy(CurDAG->getDataLayout()));
+ CurDAG->CreateStackTemporary(TypeSize::Fixed(8), Align(4));
+ int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
+ MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
SDValue Chain = CurDAG->getEntryNode();
Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
@@ -134,7 +115,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
}
if (Result) {
- LLVM_DEBUG(dbgs() << "RISCV DAG preprocessing replacing:\nOld: ");
+ LLVM_DEBUG(dbgs() << "RISC-V DAG preprocessing replacing:\nOld: ");
LLVM_DEBUG(N->dump(CurDAG));
LLVM_DEBUG(dbgs() << "\nNew: ");
LLVM_DEBUG(Result->dump(CurDAG));
@@ -172,12 +153,12 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
CurDAG->RemoveDeadNodes();
}
-static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
+static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
RISCVMatInt::InstSeq &Seq) {
- SDNode *Result = nullptr;
SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
- for (RISCVMatInt::Inst &Inst : Seq) {
+ for (const RISCVMatInt::Inst &Inst : Seq) {
SDValue SDImm = CurDAG->getTargetConstant(Inst.getImm(), DL, VT);
+ SDNode *Result = nullptr;
switch (Inst.getOpndKind()) {
case RISCVMatInt::Imm:
Result = CurDAG->getMachineNode(Inst.getOpcode(), DL, VT, SDImm);
@@ -198,14 +179,37 @@ static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
SrcReg = SDValue(Result, 0);
}
- return Result;
+ return SrcReg;
}
-static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
+static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
int64_t Imm, const RISCVSubtarget &Subtarget) {
RISCVMatInt::InstSeq Seq =
RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
+ // See if we can create this constant as (ADD (SLLI X, 32), X) where X is at
+ // worst an LUI+ADDIW. This will require an extra register, but avoids a
+ // constant pool.
+ if (Seq.size() > 3) {
+ int64_t LoVal = SignExtend64<32>(Imm);
+ int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
+ if (LoVal == HiVal) {
+ RISCVMatInt::InstSeq SeqLo =
+ RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
+ if ((SeqLo.size() + 2) < Seq.size()) {
+ SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
+
+ SDValue SLLI = SDValue(
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
+ CurDAG->getTargetConstant(32, DL, VT)),
+ 0);
+ return SDValue(CurDAG->getMachineNode(RISCV::ADD, DL, VT, Lo, SLLI),
+ 0);
+ }
+ }
+ }
+
+ // Otherwise, use the original sequence.
return selectImmSeq(CurDAG, DL, VT, Seq);
}
@@ -293,10 +297,13 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
Operands.push_back(SEWOp);
- // Masked load has the tail policy argument.
- if (IsMasked && IsLoad) {
- // Policy must be a constant.
- uint64_t Policy = Node->getConstantOperandVal(CurOp++);
+ // At the IR layer, all the masked load intrinsics have policy operands,
+ // none of the others do. All have passthru operands. For our pseudos,
+ // all loads have policy operands.
+ if (IsLoad) {
+ uint64_t Policy = RISCVII::MASK_AGNOSTIC;
+ if (IsMasked)
+ Policy = Node->getConstantOperandVal(CurOp++);
SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
Operands.push_back(PolicyOp);
}
@@ -306,10 +313,6 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
Operands.push_back(Glue);
}
-static bool isAllUndef(ArrayRef<SDValue> Values) {
- return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); });
-}
-
void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
bool IsStrided) {
SDLoc DL(Node);
@@ -323,18 +326,15 @@ void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
Node->op_begin() + CurOp + NF);
- bool IsTU = IsMasked || !isAllUndef(Regs);
- if (IsTU) {
- SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
- Operands.push_back(Merge);
- }
+ SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
+ Operands.push_back(Merge);
CurOp += NF;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
Operands, /*IsLoad=*/true);
const RISCV::VLSEGPseudo *P =
- RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
+ RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
static_cast<unsigned>(LMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
@@ -366,11 +366,8 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
Node->op_begin() + CurOp + NF);
- bool IsTU = IsMasked || !isAllUndef(Regs);
- if (IsTU) {
- SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
- Operands.push_back(MaskedOff);
- }
+ SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
+ Operands.push_back(MaskedOff);
CurOp += NF;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
@@ -378,7 +375,7 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
/*IsLoad=*/true);
const RISCV::VLSEGPseudo *P =
- RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
+ RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
Log2SEW, static_cast<unsigned>(LMUL));
MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
XLenVT, MVT::Other, Operands);
@@ -411,11 +408,8 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
Node->op_begin() + CurOp + NF);
- bool IsTU = IsMasked || !isAllUndef(Regs);
- if (IsTU) {
- SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
- Operands.push_back(MaskedOff);
- }
+ SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
+ Operands.push_back(MaskedOff);
CurOp += NF;
MVT IndexVT;
@@ -433,7 +427,7 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
"values when XLEN=32");
}
const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
- NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
+ NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
static_cast<unsigned>(IndexLMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
@@ -530,26 +524,19 @@ void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
if (!Subtarget->hasVInstructions())
return;
- assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
- Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) &&
- "Unexpected opcode");
+ assert(Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Unexpected opcode");
SDLoc DL(Node);
MVT XLenVT = Subtarget->getXLenVT();
- bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN;
- unsigned IntNoOffset = HasChain ? 1 : 0;
- unsigned IntNo = Node->getConstantOperandVal(IntNoOffset);
+ unsigned IntNo = Node->getConstantOperandVal(0);
assert((IntNo == Intrinsic::riscv_vsetvli ||
- IntNo == Intrinsic::riscv_vsetvlimax ||
- IntNo == Intrinsic::riscv_vsetvli_opt ||
- IntNo == Intrinsic::riscv_vsetvlimax_opt) &&
+ IntNo == Intrinsic::riscv_vsetvlimax) &&
"Unexpected vsetvli intrinsic");
- bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax ||
- IntNo == Intrinsic::riscv_vsetvlimax_opt;
- unsigned Offset = IntNoOffset + (VLMax ? 1 : 2);
+ bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
+ unsigned Offset = (VLMax ? 1 : 2);
assert(Node->getNumOperands() == Offset + 2 &&
"Unexpected number of operands");
@@ -560,40 +547,30 @@ void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
Node->getConstantOperandVal(Offset + 1) & 0x7);
unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
- /*MaskAgnostic*/ false);
+ /*MaskAgnostic*/ true);
SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
- SmallVector<EVT, 2> VTs = {XLenVT};
- if (HasChain)
- VTs.push_back(MVT::Other);
-
SDValue VLOperand;
unsigned Opcode = RISCV::PseudoVSETVLI;
- if (VLMax) {
+ if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
Opcode = RISCV::PseudoVSETVLIX0;
} else {
- VLOperand = Node->getOperand(IntNoOffset + 1);
+ VLOperand = Node->getOperand(1);
if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
uint64_t AVL = C->getZExtValue();
if (isUInt<5>(AVL)) {
SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
- SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp};
- if (HasChain)
- Ops.push_back(Node->getOperand(0));
- ReplaceNode(
- Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops));
+ ReplaceNode(Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL,
+ XLenVT, VLImm, VTypeIOp));
return;
}
}
}
- SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp};
- if (HasChain)
- Ops.push_back(Node->getOperand(0));
-
- ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops));
+ ReplaceNode(Node,
+ CurDAG->getMachineNode(Opcode, DL, XLenVT, VLOperand, VTypeIOp));
}
bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
@@ -674,6 +651,144 @@ bool RISCVDAGToDAGISel::tryShrinkShlLogicImm(SDNode *Node) {
return true;
}
+bool RISCVDAGToDAGISel::trySignedBitfieldExtract(SDNode *Node) {
+ // Only supported with XTHeadBb at the moment.
+ if (!Subtarget->hasVendorXTHeadBb())
+ return false;
+
+ auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+ if (!N1C)
+ return false;
+
+ SDValue N0 = Node->getOperand(0);
+ if (!N0.hasOneUse())
+ return false;
+
+ auto BitfieldExtract = [&](SDValue N0, unsigned Msb, unsigned Lsb, SDLoc DL,
+ MVT VT) {
+ return CurDAG->getMachineNode(RISCV::TH_EXT, DL, VT, N0.getOperand(0),
+ CurDAG->getTargetConstant(Msb, DL, VT),
+ CurDAG->getTargetConstant(Lsb, DL, VT));
+ };
+
+ SDLoc DL(Node);
+ MVT VT = Node->getSimpleValueType(0);
+ const unsigned RightShAmt = N1C->getZExtValue();
+
+ // Transform (sra (shl X, C1) C2) with C1 < C2
+ // -> (TH.EXT X, msb, lsb)
+ if (N0.getOpcode() == ISD::SHL) {
+ auto *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (!N01C)
+ return false;
+
+ const unsigned LeftShAmt = N01C->getZExtValue();
+ // Make sure that this is a bitfield extraction (i.e., the shift-right
+ // amount can not be less than the left-shift).
+ if (LeftShAmt > RightShAmt)
+ return false;
+
+ const unsigned MsbPlusOne = VT.getSizeInBits() - LeftShAmt;
+ const unsigned Msb = MsbPlusOne - 1;
+ const unsigned Lsb = RightShAmt - LeftShAmt;
+
+ SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
+ ReplaceNode(Node, TH_EXT);
+ return true;
+ }
+
+ // Transform (sra (sext_inreg X, _), C) ->
+ // (TH.EXT X, msb, lsb)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ unsigned ExtSize =
+ cast<VTSDNode>(N0.getOperand(1))->getVT().getSizeInBits();
+
+ // ExtSize of 32 should use sraiw via tablegen pattern.
+ if (ExtSize == 32)
+ return false;
+
+ const unsigned Msb = ExtSize - 1;
+ const unsigned Lsb = RightShAmt;
+
+ SDNode *TH_EXT = BitfieldExtract(N0, Msb, Lsb, DL, VT);
+ ReplaceNode(Node, TH_EXT);
+ return true;
+ }
+
+ return false;
+}
+
+bool RISCVDAGToDAGISel::tryIndexedLoad(SDNode *Node) {
+ // Target does not support indexed loads.
+ if (!Subtarget->hasVendorXTHeadMemIdx())
+ return false;
+
+ LoadSDNode *Ld = cast<LoadSDNode>(Node);
+ ISD::MemIndexedMode AM = Ld->getAddressingMode();
+ if (AM == ISD::UNINDEXED)
+ return false;
+
+ const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Ld->getOffset());
+ if (!C)
+ return false;
+
+ EVT LoadVT = Ld->getMemoryVT();
+ bool IsPre = (AM == ISD::PRE_INC || AM == ISD::PRE_DEC);
+ bool IsPost = (AM == ISD::POST_INC || AM == ISD::POST_DEC);
+ int64_t Offset = C->getSExtValue();
+
+ // Convert decrements to increments by a negative quantity.
+ if (AM == ISD::PRE_DEC || AM == ISD::POST_DEC)
+ Offset = -Offset;
+
+ // The constants that can be encoded in the THeadMemIdx instructions
+ // are of the form (sign_extend(imm5) << imm2).
+ int64_t Shift;
+ for (Shift = 0; Shift < 4; Shift++)
+ if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
+ break;
+
+ // Constant cannot be encoded.
+ if (Shift == 4)
+ return false;
+
+ bool IsZExt = (Ld->getExtensionType() == ISD::ZEXTLOAD);
+ unsigned Opcode;
+ if (LoadVT == MVT::i8 && IsPre)
+ Opcode = IsZExt ? RISCV::TH_LBUIB : RISCV::TH_LBIB;
+ else if (LoadVT == MVT::i8 && IsPost)
+ Opcode = IsZExt ? RISCV::TH_LBUIA : RISCV::TH_LBIA;
+ else if (LoadVT == MVT::i16 && IsPre)
+ Opcode = IsZExt ? RISCV::TH_LHUIB : RISCV::TH_LHIB;
+ else if (LoadVT == MVT::i16 && IsPost)
+ Opcode = IsZExt ? RISCV::TH_LHUIA : RISCV::TH_LHIA;
+ else if (LoadVT == MVT::i32 && IsPre)
+ Opcode = IsZExt ? RISCV::TH_LWUIB : RISCV::TH_LWIB;
+ else if (LoadVT == MVT::i32 && IsPost)
+ Opcode = IsZExt ? RISCV::TH_LWUIA : RISCV::TH_LWIA;
+ else if (LoadVT == MVT::i64 && IsPre)
+ Opcode = RISCV::TH_LDIB;
+ else if (LoadVT == MVT::i64 && IsPost)
+ Opcode = RISCV::TH_LDIA;
+ else
+ return false;
+
+ EVT Ty = Ld->getOffset().getValueType();
+ SDValue Ops[] = {Ld->getBasePtr(),
+ CurDAG->getTargetConstant(Offset >> Shift, SDLoc(Node), Ty),
+ CurDAG->getTargetConstant(Shift, SDLoc(Node), Ty),
+ Ld->getChain()};
+ SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(Node), Ld->getValueType(0),
+ Ld->getValueType(1), MVT::Other, Ops);
+
+ MachineMemOperand *MemOp = cast<MemSDNode>(Node)->getMemOperand();
+ CurDAG->setNodeMemRefs(cast<MachineSDNode>(New), {MemOp});
+
+ ReplaceNode(Node, New);
+
+ return true;
+}
+
void RISCVDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we have already selected.
if (Node->isMachineOpcode()) {
@@ -689,12 +804,15 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
SDLoc DL(Node);
MVT VT = Node->getSimpleValueType(0);
+ bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
+
switch (Opcode) {
case ISD::Constant: {
+ assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
auto *ConstNode = cast<ConstantSDNode>(Node);
- if (VT == XLenVT && ConstNode->isZero()) {
+ if (ConstNode->isZero()) {
SDValue New =
- CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, XLenVT);
+ CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, RISCV::X0, VT);
ReplaceNode(Node, New.getNode());
return;
}
@@ -709,7 +827,97 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
Imm = SignExtend64<32>(Imm);
- ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget));
+ ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
+ return;
+ }
+ case ISD::ConstantFP: {
+ const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
+ int FPImm = static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(
+ APF, VT);
+ if (FPImm >= 0) {
+ unsigned Opc;
+ switch (VT.SimpleTy) {
+ default:
+ llvm_unreachable("Unexpected size");
+ case MVT::f16:
+ Opc = RISCV::FLI_H;
+ break;
+ case MVT::f32:
+ Opc = RISCV::FLI_S;
+ break;
+ case MVT::f64:
+ Opc = RISCV::FLI_D;
+ break;
+ }
+
+ SDNode *Res = CurDAG->getMachineNode(
+ Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
+ ReplaceNode(Node, Res);
+ return;
+ }
+
+ bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64;
+ SDValue Imm;
+ // For +0.0 or f64 -0.0 we need to start from X0. For all others, we will
+ // create an integer immediate.
+ if (APF.isPosZero() || NegZeroF64)
+ Imm = CurDAG->getRegister(RISCV::X0, XLenVT);
+ else
+ Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
+ *Subtarget);
+
+ unsigned Opc;
+ switch (VT.SimpleTy) {
+ default:
+ llvm_unreachable("Unexpected size");
+ case MVT::f16:
+ Opc =
+ Subtarget->hasStdExtZhinxOrZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
+ break;
+ case MVT::f32:
+ Opc = Subtarget->hasStdExtZfinx() ? RISCV::COPY : RISCV::FMV_W_X;
+ break;
+ case MVT::f64:
+ // For RV32, we can't move from a GPR, we need to convert instead. This
+ // should only happen for +0.0 and -0.0.
+ assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
+ bool HasZdinx = Subtarget->hasStdExtZdinx();
+ if (Subtarget->is64Bit())
+ Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
+ else
+ Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
+ break;
+ }
+
+ SDNode *Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
+
+ // For f64 -0.0, we need to insert a fneg.d idiom.
+ if (NegZeroF64)
+ Res = CurDAG->getMachineNode(RISCV::FSGNJN_D, DL, VT, SDValue(Res, 0),
+ SDValue(Res, 0));
+
+ ReplaceNode(Node, Res);
+ return;
+ }
+ case RISCVISD::SplitF64: {
+ if (!Subtarget->hasStdExtZfa())
+ break;
+ assert(Subtarget->hasStdExtD() && !Subtarget->is64Bit() &&
+ "Unexpected subtarget");
+
+ // With Zfa, lower to fmv.x.w and fmvh.x.d.
+ if (!SDValue(Node, 0).use_empty()) {
+ SDNode *Lo = CurDAG->getMachineNode(RISCV::FMV_X_W_FPR64, DL, VT,
+ Node->getOperand(0));
+ ReplaceUses(SDValue(Node, 0), SDValue(Lo, 0));
+ }
+ if (!SDValue(Node, 1).use_empty()) {
+ SDNode *Hi = CurDAG->getMachineNode(RISCV::FMVH_X_D, DL, VT,
+ Node->getOperand(0));
+ ReplaceUses(SDValue(Node, 1), SDValue(Hi, 0));
+ }
+
+ CurDAG->RemoveDeadNode(Node);
return;
}
case ISD::SHL: {
@@ -728,7 +936,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
unsigned XLen = Subtarget->getXLen();
unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
- unsigned TrailingZeros = countTrailingZeros(Mask);
+ unsigned TrailingZeros = llvm::countr_zero(Mask);
if (TrailingZeros > 0 && LeadingZeros == 32) {
SDNode *SRLIW = CurDAG->getMachineNode(
RISCV::SRLIW, DL, VT, N0->getOperand(0),
@@ -757,7 +965,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (isShiftedMask_64(Mask) && N0.hasOneUse()) {
unsigned XLen = Subtarget->getXLen();
unsigned LeadingZeros = XLen - llvm::bit_width(Mask);
- unsigned TrailingZeros = countTrailingZeros(Mask);
+ unsigned TrailingZeros = llvm::countr_zero(Mask);
if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
SDNode *SRLIW = CurDAG->getMachineNode(
RISCV::SRLIW, DL, VT, N0->getOperand(0),
@@ -780,7 +988,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
Mask |= maskTrailingOnes<uint64_t>(ShAmt);
if (!isMask_64(Mask))
break;
- unsigned TrailingOnes = countTrailingOnes(Mask);
+ unsigned TrailingOnes = llvm::countr_one(Mask);
if (ShAmt >= TrailingOnes)
break;
// If the mask has 32 trailing ones, use SRLIW.
@@ -792,18 +1000,19 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
return;
}
- // Only do the remaining transforms if the shift has one use.
+ // Only do the remaining transforms if the AND has one use.
if (!N0.hasOneUse())
break;
- // If C2 is (1 << ShAmt) use bexti if possible.
- if (Subtarget->hasStdExtZbs() && ShAmt + 1 == TrailingOnes) {
- SDNode *BEXTI =
- CurDAG->getMachineNode(RISCV::BEXTI, DL, VT, N0->getOperand(0),
- CurDAG->getTargetConstant(ShAmt, DL, VT));
+ // If C2 is (1 << ShAmt) use bexti or th.tst if possible.
+ if (HasBitTest && ShAmt + 1 == TrailingOnes) {
+ SDNode *BEXTI = CurDAG->getMachineNode(
+ Subtarget->hasStdExtZbs() ? RISCV::BEXTI : RISCV::TH_TST, DL, VT,
+ N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
ReplaceNode(Node, BEXTI);
return;
}
+
unsigned LShAmt = Subtarget->getXLen() - TrailingOnes;
SDNode *SLLI =
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0->getOperand(0),
@@ -815,6 +1024,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
return;
}
case ISD::SRA: {
+ if (trySignedBitfieldExtract(Node))
+ return;
+
// Optimize (sra (sext_inreg X, i16), C) ->
// (srai (slli X, (XLen-16), (XLen-16) + C)
// And (sra (sext_inreg X, i8), C) ->
@@ -855,9 +1067,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
if (!N1C)
break;
+ uint64_t C1 = N1C->getZExtValue();
+ const bool isC1Mask = isMask_64(C1);
+ const bool isC1ANDI = isInt<12>(C1);
SDValue N0 = Node->getOperand(0);
+ auto tryUnsignedBitfieldExtract = [&](SDNode *Node, SDLoc DL, MVT VT,
+ SDValue X, unsigned Msb,
+ unsigned Lsb) {
+ if (!Subtarget->hasVendorXTHeadBb())
+ return false;
+
+ SDNode *TH_EXTU = CurDAG->getMachineNode(
+ RISCV::TH_EXTU, DL, VT, X, CurDAG->getTargetConstant(Msb, DL, VT),
+ CurDAG->getTargetConstant(Lsb, DL, VT));
+ ReplaceNode(Node, TH_EXTU);
+ return true;
+ };
+
bool LeftShift = N0.getOpcode() == ISD::SHL;
if (LeftShift || N0.getOpcode() == ISD::SRL) {
auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -867,8 +1095,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned XLen = Subtarget->getXLen();
assert((C2 > 0 && C2 < XLen) && "Unexpected shift amount!");
- uint64_t C1 = N1C->getZExtValue();
-
// Keep track of whether this is a c.andi. If we can't use c.andi, the
// shift pair might offer more compression opportunities.
// TODO: We could check for C extension here, but we don't have many lit
@@ -891,7 +1117,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Turn (and (srl x, c2) c1) -> (srli (slli x, c3-c2), c3) if c1 is a mask
// with c3 leading zeros.
- if (!LeftShift && isMask_64(C1)) {
+ if (!LeftShift && isC1Mask) {
unsigned Leading = XLen - llvm::bit_width(C1);
if (C2 < Leading) {
// If the number of leading zeros is C2+32 this can be SRLIW.
@@ -920,13 +1146,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
return;
}
+ // Try to use an unsigned bitfield extract (e.g., th.extu) if
+ // available.
+ // Transform (and (srl x, C2), C1)
+ // -> (<bfextract> x, msb, lsb)
+ //
+ // Make sure to keep this below the SRLIW cases, as we always want to
+ // prefer the more common instruction.
+ const unsigned Msb = llvm::bit_width(C1) + C2 - 1;
+ const unsigned Lsb = C2;
+ if (tryUnsignedBitfieldExtract(Node, DL, VT, X, Msb, Lsb))
+ return;
+
// (srli (slli x, c3-c2), c3).
// Skip if we could use (zext.w (sraiw X, C2)).
bool Skip = Subtarget->hasStdExtZba() && Leading == 32 &&
X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
- // Also Skip if we can use bexti.
- Skip |= Subtarget->hasStdExtZbs() && Leading == XLen - 1;
+ // Also Skip if we can use bexti or th.tst.
+ Skip |= HasBitTest && Leading == XLen - 1;
if (OneUseOrZExtW && !Skip) {
SDNode *SLLI = CurDAG->getMachineNode(
RISCV::SLLI, DL, VT, X,
@@ -974,7 +1212,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// shifted mask with c2 leading zeros and c3 trailing zeros.
if (!LeftShift && isShiftedMask_64(C1)) {
unsigned Leading = XLen - llvm::bit_width(C1);
- unsigned Trailing = countTrailingZeros(C1);
+ unsigned Trailing = llvm::countr_zero(C1);
if (Leading == C2 && C2 + Trailing < XLen && OneUseOrZExtW &&
!IsCANDI) {
unsigned SrliOpc = RISCV::SRLI;
@@ -1012,7 +1250,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// shifted mask with no leading zeros and c3 trailing zeros.
if (LeftShift && isShiftedMask_64(C1)) {
unsigned Leading = XLen - llvm::bit_width(C1);
- unsigned Trailing = countTrailingZeros(C1);
+ unsigned Trailing = llvm::countr_zero(C1);
if (Leading == 0 && C2 < Trailing && OneUseOrZExtW && !IsCANDI) {
SDNode *SRLI = CurDAG->getMachineNode(
RISCV::SRLI, DL, VT, X,
@@ -1037,6 +1275,17 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
}
+ // If C1 masks off the upper bits only (but can't be formed as an
+ // ANDI), use an unsigned bitfield extract (e.g., th.extu), if
+ // available.
+ // Transform (and x, C1)
+ // -> (<bfextract> x, msb, lsb)
+ if (isC1Mask && !isC1ANDI) {
+ const unsigned Msb = llvm::bit_width(C1) - 1;
+ if (tryUnsignedBitfieldExtract(Node, DL, VT, N0, Msb, 0))
+ return;
+ }
+
if (tryShrinkShlLogicImm(Node))
return;
@@ -1065,17 +1314,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (!isMask_64(C2))
break;
- // If this can be an ANDI, ZEXT.H or ZEXT.W, don't do this if the ANDI/ZEXT
- // has multiple users or the constant is a simm12. This prevents inserting
- // a shift and still have uses of the AND/ZEXT. Shifting a simm12 will
- // likely make it more costly to materialize. Otherwise, using a SLLI
- // might allow it to be compressed.
+ // If this can be an ANDI or ZEXT.H, don't do this if the ANDI/ZEXT has
+ // multiple users or the constant is a simm12. This prevents inserting a
+ // shift and still have uses of the AND/ZEXT. Shifting a simm12 will likely
+ // make it more costly to materialize. Otherwise, using a SLLI might allow
+ // it to be compressed.
bool IsANDIOrZExt =
isInt<12>(C2) ||
- (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb()) ||
- (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba());
+ (C2 == UINT64_C(0xFFFF) && Subtarget->hasStdExtZbb());
+ // With XTHeadBb, we can use TH.EXTU.
+ IsANDIOrZExt |= C2 == UINT64_C(0xFFFF) && Subtarget->hasVendorXTHeadBb();
if (IsANDIOrZExt && (isInt<12>(N1C->getSExtValue()) || !N0.hasOneUse()))
break;
+ // If this can be a ZEXT.w, don't do this if the ZEXT has multiple users or
+ // the constant is a simm32.
+ bool IsZExtW = C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba();
+ // With XTHeadBb, we can use TH.EXTU.
+ IsZExtW |= C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasVendorXTHeadBb();
+ if (IsZExtW && (isInt<32>(N1C->getSExtValue()) || !N0.hasOneUse()))
+ break;
// We need to shift left the AND input and C1 by a total of XLen bits.
@@ -1096,7 +1353,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ShiftedC1 = SignExtend64<32>(ShiftedC1);
// Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
- SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget);
+ SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget).getNode();
SDNode *SLLI =
CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0),
CurDAG->getTargetConstant(LeadingZeros, DL, VT));
@@ -1105,6 +1362,11 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, MULHU);
return;
}
+ case ISD::LOAD: {
+ if (tryIndexedLoad(Node))
+ return;
+ break;
+ }
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo = Node->getConstantOperandVal(0);
switch (IntNo) {
@@ -1283,8 +1545,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
{Cmp, Mask, VL, MaskSEW}));
return;
}
- case Intrinsic::riscv_vsetvli_opt:
- case Intrinsic::riscv_vsetvlimax_opt:
+ case Intrinsic::riscv_vsetvli:
+ case Intrinsic::riscv_vsetvlimax:
return selectVSETVLI(Node);
}
break;
@@ -1295,9 +1557,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// By default we do not custom select any intrinsic.
default:
break;
- case Intrinsic::riscv_vsetvli:
- case Intrinsic::riscv_vsetvlimax:
- return selectVSETVLI(Node);
case Intrinsic::riscv_vlseg2:
case Intrinsic::riscv_vlseg3:
case Intrinsic::riscv_vlseg4:
@@ -1407,14 +1666,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
unsigned CurOp = 2;
- // Masked intrinsic only have TU version pseduo instructions.
- bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef();
SmallVector<SDValue, 8> Operands;
- if (IsTU)
- Operands.push_back(Node->getOperand(CurOp++));
- else
- // Skip the undef passthru operand for nomask TA version pseudo
- CurOp++;
+ Operands.push_back(Node->getOperand(CurOp++));
MVT IndexVT;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
@@ -1432,7 +1685,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
"values when XLEN=32");
}
const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
- IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
+ IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
static_cast<unsigned>(IndexLMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
@@ -1456,25 +1709,30 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
MVT VT = Node->getSimpleValueType(0);
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
- unsigned CurOp = 2;
- // The riscv_vlm intrinsic are always tail agnostic and no passthru operand.
+ // The riscv_vlm intrinsic are always tail agnostic and no passthru
+ // operand at the IR level. In pseudos, they have both policy and
+ // passthru operand. The passthru operand is needed to track the
+ // "tail undefined" state, and the policy is there just for
+ // for consistency - it will always be "don't care" for the
+ // unmasked form.
bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
- // Masked intrinsic only have TU version pseduo instructions.
- bool IsTU = HasPassthruOperand &&
- (IsMasked || !Node->getOperand(CurOp).isUndef());
+ unsigned CurOp = 2;
SmallVector<SDValue, 8> Operands;
- if (IsTU)
+ if (HasPassthruOperand)
Operands.push_back(Node->getOperand(CurOp++));
- else if (HasPassthruOperand)
- // Skip the undef passthru operand for nomask TA version pseudo
- CurOp++;
-
+ else {
+ // We eagerly lower to implicit_def (instead of undef), as we
+ // otherwise fail to select nodes such as: nxv1i1 = undef
+ SDNode *Passthru =
+ CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
+ Operands.push_back(SDValue(Passthru, 0));
+ }
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
Operands, /*IsLoad=*/true);
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
const RISCV::VLEPseudo *P =
- RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
+ RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
static_cast<unsigned>(LMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
@@ -1493,22 +1751,15 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
unsigned CurOp = 2;
- // Masked intrinsic only have TU version pseduo instructions.
- bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef();
SmallVector<SDValue, 7> Operands;
- if (IsTU)
- Operands.push_back(Node->getOperand(CurOp++));
- else
- // Skip the undef passthru operand for nomask TA version pseudo
- CurOp++;
-
+ Operands.push_back(Node->getOperand(CurOp++));
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
/*IsStridedOrIndexed*/ false, Operands,
/*IsLoad=*/true);
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
const RISCV::VLEPseudo *P =
- RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
+ RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true,
Log2SEW, static_cast<unsigned>(LMUL));
MachineSDNode *Load = CurDAG->getMachineNode(
P->Pseudo, DL, Node->getVTList(), Operands);
@@ -1632,7 +1883,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
"values when XLEN=32");
}
const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
- IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW,
+ IsMasked, IsOrdered, IndexLog2EEW,
static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
MachineSDNode *Store =
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
@@ -1791,10 +2042,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
case RISCVISD::VFMV_S_F_VL:
case RISCVISD::VMV_V_X_VL:
case RISCVISD::VFMV_V_F_VL: {
- // Only if we have optimized zero-stride vector load.
- if (!Subtarget->hasOptimizedZeroStrideLoad())
- break;
-
// Try to match splat of a scalar load to a strided load with stride of x0.
bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
@@ -1802,7 +2049,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
break;
SDValue Src = Node->getOperand(1);
auto *Ld = dyn_cast<LoadSDNode>(Src);
- if (!Ld)
+ // Can't fold load update node because the second
+ // output is used so that load update node can't be removed.
+ if (!Ld || Ld->isIndexed())
break;
EVT MemVT = Ld->getMemoryVT();
// The memory VT should be the same size as the element type.
@@ -1825,13 +2074,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
- SDValue Operands[] = {Ld->getBasePtr(),
- CurDAG->getRegister(RISCV::X0, XLenVT), VL, SEW,
- Ld->getChain()};
+ // If VL=1, then we don't need to do a strided load and can just do a
+ // regular load.
+ bool IsStrided = !isOneConstant(VL);
+
+ // Only do a strided load if we have optimized zero-stride vector load.
+ if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
+ break;
+
+ SmallVector<SDValue> Operands =
+ {CurDAG->getUNDEF(VT), Ld->getBasePtr()};
+ if (IsStrided)
+ Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
+ uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC;
+ SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT);
+ Operands.append({VL, SEW, PolicyOp, Ld->getChain()});
RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
- /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false,
+ /*IsMasked*/ false, IsStrided, /*FF*/ false,
Log2SEW, static_cast<unsigned>(LMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands);
@@ -1843,6 +2104,36 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
ReplaceNode(Node, Load);
return;
}
+ case ISD::PREFETCH:
+ unsigned Locality = Node->getConstantOperandVal(3);
+ if (Locality > 2)
+ break;
+
+ if (auto *LoadStoreMem = dyn_cast<MemSDNode>(Node)) {
+ MachineMemOperand *MMO = LoadStoreMem->getMemOperand();
+ MMO->setFlags(MachineMemOperand::MONonTemporal);
+
+ int NontemporalLevel = 0;
+ switch (Locality) {
+ case 0:
+ NontemporalLevel = 3; // NTL.ALL
+ break;
+ case 1:
+ NontemporalLevel = 1; // NTL.PALL
+ break;
+ case 2:
+ NontemporalLevel = 0; // NTL.P1
+ break;
+ default:
+ llvm_unreachable("unexpected locality value.");
+ }
+
+ if (NontemporalLevel & 0b1)
+ MMO->setFlags(MONontemporalBit0);
+ if (NontemporalLevel & 0b10)
+ MMO->setFlags(MONontemporalBit1);
+ }
+ break;
}
// Select the default instruction.
@@ -1851,17 +2142,27 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ // Always produce a register and immediate operand, as expected by
+ // RISCVAsmPrinter::PrintAsmMemoryOperand.
switch (ConstraintID) {
- case InlineAsm::Constraint_m:
- // We just support simple memory operands that have a single address
- // operand and need no special handling.
- OutOps.push_back(Op);
+ case InlineAsm::Constraint_o:
+ case InlineAsm::Constraint_m: {
+ SDValue Op0, Op1;
+ bool Found = SelectAddrRegImm(Op, Op0, Op1);
+ assert(Found && "SelectAddrRegImm should always succeed");
+ (void)Found;
+ OutOps.push_back(Op0);
+ OutOps.push_back(Op1);
return false;
+ }
case InlineAsm::Constraint_A:
OutOps.push_back(Op);
+ OutOps.push_back(
+ CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
return false;
default:
- break;
+ report_fatal_error("Unexpected asm memory constraint " +
+ InlineAsm::getMemConstraintName(ConstraintID));
}
return true;
@@ -1943,7 +2244,7 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
Seq.pop_back();
assert(!Seq.empty() && "Expected more instructions in sequence");
- Base = SDValue(selectImmSeq(CurDAG, DL, VT, Seq), 0);
+ Base = selectImmSeq(CurDAG, DL, VT, Seq);
Offset = CurDAG->getTargetConstant(Lo12, DL, VT);
return true;
}
@@ -1972,8 +2273,62 @@ static bool isWorthFoldingAdd(SDValue Add) {
return true;
}
+bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
+ unsigned MaxShiftAmount,
+ SDValue &Base, SDValue &Index,
+ SDValue &Scale) {
+ EVT VT = Addr.getSimpleValueType();
+ auto UnwrapShl = [this, VT, MaxShiftAmount](SDValue N, SDValue &Index,
+ SDValue &Shift) {
+ uint64_t ShiftAmt = 0;
+ Index = N;
+
+ if (N.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N.getOperand(1))) {
+ // Only match shifts by a value in range [0, MaxShiftAmount].
+ if (N.getConstantOperandVal(1) <= MaxShiftAmount) {
+ Index = N.getOperand(0);
+ ShiftAmt = N.getConstantOperandVal(1);
+ }
+ }
+
+ Shift = CurDAG->getTargetConstant(ShiftAmt, SDLoc(N), VT);
+ return ShiftAmt != 0;
+ };
+
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (auto *C1 = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ SDValue AddrB = Addr.getOperand(0);
+ if (AddrB.getOpcode() == ISD::ADD &&
+ UnwrapShl(AddrB.getOperand(0), Index, Scale) &&
+ !isa<ConstantSDNode>(AddrB.getOperand(1)) &&
+ isInt<12>(C1->getSExtValue())) {
+ // (add (add (shl A C2) B) C1) -> (add (add B C1) (shl A C2))
+ SDValue C1Val =
+ CurDAG->getTargetConstant(C1->getZExtValue(), SDLoc(Addr), VT);
+ Base = SDValue(CurDAG->getMachineNode(RISCV::ADDI, SDLoc(Addr), VT,
+ AddrB.getOperand(1), C1Val),
+ 0);
+ return true;
+ }
+ } else if (UnwrapShl(Addr.getOperand(0), Index, Scale)) {
+ Base = Addr.getOperand(1);
+ return true;
+ } else {
+ UnwrapShl(Addr.getOperand(1), Index, Scale);
+ Base = Addr.getOperand(0);
+ return true;
+ }
+ } else if (UnwrapShl(Addr, Index, Scale)) {
+ EVT VT = Addr.getValueType();
+ Base = CurDAG->getRegister(RISCV::X0, VT);
+ return true;
+ }
+
+ return false;
+}
+
bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
- SDValue &Offset) {
+ SDValue &Offset, bool IsINX) {
if (SelectAddrFrameIndex(Addr, Base, Offset))
return true;
@@ -1986,9 +2341,10 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
return true;
}
+ int64_t RV32ZdinxRange = IsINX ? 4 : 0;
if (CurDAG->isBaseWithConstantOffset(Addr)) {
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
- if (isInt<12>(CVal)) {
+ if (isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) {
Base = Addr.getOperand(0);
if (Base.getOpcode() == RISCVISD::ADD_LO) {
SDValue LoOperand = Base.getOperand(1);
@@ -2022,7 +2378,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
// Handle ADD with large immediates.
if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
- assert(!isInt<12>(CVal) && "simm12 not already handled?");
+ assert(!(isInt<12>(CVal) && isInt<12>(CVal + RV32ZdinxRange)) &&
+ "simm12 not already handled?");
// Handle immediates in the range [-4096,-2049] or [2048, 4094]. We can use
// an ADDI for part of the offset and fold the rest into the load/store.
@@ -2066,7 +2423,7 @@ bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
SDValue &ShAmt) {
ShAmt = N;
- // Shift instructions on RISCV only read the lower 5 or 6 bits of the shift
+ // Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
// amount. If there is an AND on the shift amount, we can bypass it if it
// doesn't affect any of those bits.
if (ShAmt.getOpcode() == ISD::AND && isa<ConstantSDNode>(ShAmt.getOperand(1))) {
@@ -2129,15 +2486,93 @@ bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
return true;
}
-bool RISCVDAGToDAGISel::selectSExti32(SDValue N, SDValue &Val) {
+/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
+/// check for equality with 0. This function emits instructions that convert the
+/// seteq/setne into something that can be compared with 0.
+/// \p ExpectedCCVal indicates the condition code to attempt to match (e.g.
+/// ISD::SETNE).
+bool RISCVDAGToDAGISel::selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
+ SDValue &Val) {
+ assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
+ "Unexpected condition code!");
+
+ // We're looking for a setcc.
+ if (N->getOpcode() != ISD::SETCC)
+ return false;
+
+ // Must be an equality comparison.
+ ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ if (CCVal != ExpectedCCVal)
+ return false;
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ if (!LHS.getValueType().isScalarInteger())
+ return false;
+
+ // If the RHS side is 0, we don't need any extra instructions, return the LHS.
+ if (isNullConstant(RHS)) {
+ Val = LHS;
+ return true;
+ }
+
+ SDLoc DL(N);
+
+ if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
+ int64_t CVal = C->getSExtValue();
+ // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
+ // non-zero otherwise.
+ if (CVal == -2048) {
+ Val =
+ SDValue(CurDAG->getMachineNode(
+ RISCV::XORI, DL, N->getValueType(0), LHS,
+ CurDAG->getTargetConstant(CVal, DL, N->getValueType(0))),
+ 0);
+ return true;
+ }
+ // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
+ // LHS is equal to the RHS and non-zero otherwise.
+ if (isInt<12>(CVal) || CVal == 2048) {
+ Val =
+ SDValue(CurDAG->getMachineNode(
+ RISCV::ADDI, DL, N->getValueType(0), LHS,
+ CurDAG->getTargetConstant(-CVal, DL, N->getValueType(0))),
+ 0);
+ return true;
+ }
+ }
+
+ // If nothing else we can XOR the LHS and RHS to produce zero if they are
+ // equal and a non-zero value if they aren't.
+ Val = SDValue(
+ CurDAG->getMachineNode(RISCV::XOR, DL, N->getValueType(0), LHS, RHS), 0);
+ return true;
+}
+
+bool RISCVDAGToDAGISel::selectSExtBits(SDValue N, unsigned Bits, SDValue &Val) {
if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
- cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
+ cast<VTSDNode>(N.getOperand(1))->getVT().getSizeInBits() == Bits) {
Val = N.getOperand(0);
return true;
}
+
+ auto UnwrapShlSra = [](SDValue N, unsigned ShiftAmt) {
+ if (N.getOpcode() != ISD::SRA || !isa<ConstantSDNode>(N.getOperand(1)))
+ return N;
+
+ SDValue N0 = N.getOperand(0);
+ if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N.getConstantOperandVal(1) == ShiftAmt &&
+ N0.getConstantOperandVal(1) == ShiftAmt)
+ return N0.getOperand(0);
+
+ return N;
+ };
+
MVT VT = N.getSimpleValueType();
- if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - 32)) {
- Val = N;
+ if (CurDAG->ComputeNumSignBits(N) > (VT.getSizeInBits() - Bits)) {
+ Val = UnwrapShlSra(N, VT.getSizeInBits() - Bits);
return true;
}
@@ -2187,7 +2622,7 @@ bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
// followed by a SHXADD with c3 for the X amount.
if (isShiftedMask_64(Mask)) {
unsigned Leading = XLen - llvm::bit_width(Mask);
- unsigned Trailing = countTrailingZeros(Mask);
+ unsigned Trailing = llvm::countr_zero(Mask);
if (LeftShift && Leading == 0 && C2 < Trailing && Trailing == ShAmt) {
SDLoc DL(N);
EVT VT = N.getValueType();
@@ -2225,7 +2660,7 @@ bool RISCVDAGToDAGISel::selectSHXADDOp(SDValue N, unsigned ShAmt,
unsigned C1 = N.getConstantOperandVal(1);
unsigned XLen = Subtarget->getXLen();
unsigned Leading = XLen - llvm::bit_width(Mask);
- unsigned Trailing = countTrailingZeros(Mask);
+ unsigned Trailing = llvm::countr_zero(Mask);
// Look for (shl (and X, Mask), C1) where Mask has 32 leading zeros and
// C3 trailing zeros. If C1+C3==ShAmt we can use SRLIW+SHXADD.
if (LeftShift && Leading == 32 && Trailing > 0 &&
@@ -2276,8 +2711,8 @@ bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
// 32-ShAmt leading zeros and c2 trailing zeros. We can use SLLI by
// c2-ShAmt followed by SHXADD_UW with ShAmt for the X amount.
if (isShiftedMask_64(Mask)) {
- unsigned Leading = countLeadingZeros(Mask);
- unsigned Trailing = countTrailingZeros(Mask);
+ unsigned Leading = llvm::countl_zero(Mask);
+ unsigned Trailing = llvm::countr_zero(Mask);
if (Leading == 32 - ShAmt && Trailing == C2 && Trailing > ShAmt) {
SDLoc DL(N);
EVT VT = N.getValueType();
@@ -2354,6 +2789,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
case RISCV::FCVT_S_WU:
case RISCV::FCVT_D_W:
case RISCV::FCVT_D_WU:
+ case RISCV::TH_REVW:
+ case RISCV::TH_SRRIW:
if (Bits < 32)
return false;
break;
@@ -2451,6 +2888,29 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
return true;
}
+// Select a constant that can be represented as (sign_extend(imm5) << imm2).
+bool RISCVDAGToDAGISel::selectSimm5Shl2(SDValue N, SDValue &Simm5,
+ SDValue &Shl2) {
+ if (auto *C = dyn_cast<ConstantSDNode>(N)) {
+ int64_t Offset = C->getSExtValue();
+ int64_t Shift;
+ for (Shift = 0; Shift < 4; Shift++)
+ if (isInt<5>(Offset >> Shift) && ((Offset % (1LL << Shift)) == 0))
+ break;
+
+ // Constant cannot be encoded.
+ if (Shift == 4)
+ return false;
+
+ EVT Ty = N->getValueType(0);
+ Simm5 = CurDAG->getTargetConstant(Offset >> Shift, SDLoc(N), Ty);
+ Shl2 = CurDAG->getTargetConstant(Shift, SDLoc(N), Ty);
+ return true;
+ }
+
+ return false;
+}
+
// Select VL as a 5 bit immediate or a value that will become a register. This
// allows us to choose betwen VSETIVLI or VSETVLI later.
bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
@@ -2458,7 +2918,7 @@ bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
if (C && isUInt<5>(C->getZExtValue())) {
VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
N->getValueType(0));
- } else if (C && C->isAllOnesValue()) {
+ } else if (C && C->isAllOnes()) {
// Treat all ones as VLMax.
VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
N->getValueType(0));
@@ -2539,7 +2999,8 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
});
}
-bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {
+bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
+ SDValue &SplatVal) {
if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
!isa<ConstantSDNode>(N.getOperand(1)))
return false;
@@ -2547,7 +3008,7 @@ bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {
int64_t SplatImm =
cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
- if (!isUInt<5>(SplatImm))
+ if (!isUIntN(Bits, SplatImm))
return false;
SplatVal =
@@ -2556,6 +3017,42 @@ bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {
return true;
}
+bool RISCVDAGToDAGISel::selectExtOneUseVSplat(SDValue N, SDValue &SplatVal) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND) {
+ if (!N.hasOneUse())
+ return false;
+ N = N->getOperand(0);
+ }
+ return selectVSplat(N, SplatVal);
+}
+
+bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {
+ ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
+ if (!CFP)
+ return false;
+ const APFloat &APF = CFP->getValueAPF();
+ // td can handle +0.0 already.
+ if (APF.isPosZero())
+ return false;
+
+ MVT VT = CFP->getSimpleValueType(0);
+
+ if (static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
+ VT) >= 0)
+ return false;
+
+ MVT XLenVT = Subtarget->getXLenVT();
+ if (VT == MVT::f64 && !Subtarget->is64Bit()) {
+ assert(APF.isNegZero() && "Unexpected constant.");
+ return false;
+ }
+ SDLoc DL(N);
+ Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
+ *Subtarget);
+ return true;
+}
+
bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
SDValue &Imm) {
if (auto *C = dyn_cast<ConstantSDNode>(N)) {
@@ -2624,6 +3121,10 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
case RISCV::MULW:
case RISCV::SLLIW:
case RISCV::PACKW:
+ case RISCV::TH_MULAW:
+ case RISCV::TH_MULAH:
+ case RISCV::TH_MULSW:
+ case RISCV::TH_MULSH:
// Result is already sign extended just remove the sext.w.
// NOTE: We only handle the nodes that are selected with hasAllWUsers.
ReplaceUses(N, N0.getNode());
@@ -2633,15 +3134,14 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
return false;
}
-// Return true if we can make sure mask of N is all-ones mask.
-static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
+static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
// Check that we're using V0 as a mask register.
- if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) ||
- cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0)
+ if (!isa<RegisterSDNode>(MaskOp) ||
+ cast<RegisterSDNode>(MaskOp)->getReg() != RISCV::V0)
return false;
// The glued user defines V0.
- const auto *Glued = N->getGluedNode();
+ const auto *Glued = GlueOp.getNode();
if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
return false;
@@ -2668,6 +3168,17 @@ static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
IsVMSet(MaskSetter.getMachineOpcode());
}
+// Return true if we can make sure mask of N is all-ones mask.
+static bool usesAllOnesMask(SDNode *N, unsigned MaskOpIdx) {
+ return usesAllOnesMask(N->getOperand(MaskOpIdx),
+ N->getOperand(N->getNumOperands() - 1));
+}
+
+static bool isImplicitDef(SDValue V) {
+ return V.isMachineOpcode() &&
+ V.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF;
+}
+
// Optimize masked RVV pseudo instructions with a known all-ones mask to their
// corresponding "unmasked" pseudo versions. The mask we're interested in will
// take the form of a V0 physical register operand, with a glued
@@ -2679,49 +3190,29 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
return false;
unsigned MaskOpIdx = I->MaskOpIdx;
-
if (!usesAllOnesMask(N, MaskOpIdx))
return false;
- // Retrieve the tail policy operand index, if any.
- std::optional<unsigned> TailPolicyOpIdx;
- const RISCVInstrInfo &TII = *Subtarget->getInstrInfo();
- const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode());
-
- bool IsTA = true;
- if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) {
- TailPolicyOpIdx = getVecPolicyOpIdx(N, MaskedMCID);
- if (!(N->getConstantOperandVal(*TailPolicyOpIdx) &
- RISCVII::TAIL_AGNOSTIC)) {
- // Keep the true-masked instruction when there is no unmasked TU
- // instruction
- if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef())
- return false;
- // We can't use TA if the tie-operand is not IMPLICIT_DEF
- if (!N->getOperand(0).isUndef())
- IsTA = false;
- }
- }
-
- unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo;
-
- // Check that we're dropping the mask operand and any policy operand
- // when we transform to this unmasked pseudo. Additionally, if this insturtion
- // is tail agnostic, the unmasked instruction should not have a merge op.
- uint64_t TSFlags = TII.get(Opc).TSFlags;
- assert((IsTA != RISCVII::hasMergeOp(TSFlags)) &&
- RISCVII::hasDummyMaskOp(TSFlags) &&
- !RISCVII::hasVecPolicyOp(TSFlags) &&
- "Unexpected pseudo to transform to");
- (void)TSFlags;
+ // There are two classes of pseudos in the table - compares and
+ // everything else. See the comment on RISCVMaskedPseudo for details.
+ const unsigned Opc = I->UnmaskedPseudo;
+ const MCInstrDesc &MCID = TII->get(Opc);
+ const bool UseTUPseudo = RISCVII::hasVecPolicyOp(MCID.TSFlags);
+#ifndef NDEBUG
+ const MCInstrDesc &MaskedMCID = TII->get(N->getMachineOpcode());
+ assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) ==
+ RISCVII::hasVecPolicyOp(MCID.TSFlags) &&
+ "Masked and unmasked pseudos are inconsistent");
+ const bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(MCID);
+ assert(UseTUPseudo == HasTiedDest && "Unexpected pseudo structure");
+#endif
SmallVector<SDValue, 8> Ops;
- // Skip the merge operand at index 0 if IsTA
- for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) {
- // Skip the mask, the policy, and the Glue.
+ // Skip the merge operand at index 0 if !UseTUPseudo.
+ for (unsigned I = !UseTUPseudo, E = N->getNumOperands(); I != E; I++) {
+ // Skip the mask, and the Glue.
SDValue Op = N->getOperand(I);
- if (I == MaskOpIdx || I == TailPolicyOpIdx ||
- Op.getValueType() == MVT::Glue)
+ if (I == MaskOpIdx || Op.getValueType() == MVT::Glue)
continue;
Ops.push_back(Op);
}
@@ -2738,18 +3229,86 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
return true;
}
-// Try to fold VMERGE_VVM with unmasked intrinsic to masked intrinsic. The
-// peephole only deals with VMERGE_VVM which is TU and has false operand same as
-// its true operand now. E.g. (VMERGE_VVM_M1_TU False, False, (VADD_M1 ...),
-// ...) -> (VADD_VV_M1_MASK)
-bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) {
- unsigned Offset = IsTA ? 0 : 1;
- uint64_t Policy = IsTA ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0;
+static bool IsVMerge(SDNode *N) {
+ unsigned Opc = N->getMachineOpcode();
+ return Opc == RISCV::PseudoVMERGE_VVM_MF8 ||
+ Opc == RISCV::PseudoVMERGE_VVM_MF4 ||
+ Opc == RISCV::PseudoVMERGE_VVM_MF2 ||
+ Opc == RISCV::PseudoVMERGE_VVM_M1 ||
+ Opc == RISCV::PseudoVMERGE_VVM_M2 ||
+ Opc == RISCV::PseudoVMERGE_VVM_M4 || Opc == RISCV::PseudoVMERGE_VVM_M8;
+}
+
+static bool IsVMv(SDNode *N) {
+ unsigned Opc = N->getMachineOpcode();
+ return Opc == RISCV::PseudoVMV_V_V_MF8 || Opc == RISCV::PseudoVMV_V_V_MF4 ||
+ Opc == RISCV::PseudoVMV_V_V_MF2 || Opc == RISCV::PseudoVMV_V_V_M1 ||
+ Opc == RISCV::PseudoVMV_V_V_M2 || Opc == RISCV::PseudoVMV_V_V_M4 ||
+ Opc == RISCV::PseudoVMV_V_V_M8;
+}
- SDValue False = N->getOperand(0 + Offset);
- SDValue True = N->getOperand(1 + Offset);
- SDValue Mask = N->getOperand(2 + Offset);
- SDValue VL = N->getOperand(3 + Offset);
+static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
+ switch (LMUL) {
+ case RISCVII::LMUL_F8:
+ return RISCV::PseudoVMSET_M_B1;
+ case RISCVII::LMUL_F4:
+ return RISCV::PseudoVMSET_M_B2;
+ case RISCVII::LMUL_F2:
+ return RISCV::PseudoVMSET_M_B4;
+ case RISCVII::LMUL_1:
+ return RISCV::PseudoVMSET_M_B8;
+ case RISCVII::LMUL_2:
+ return RISCV::PseudoVMSET_M_B16;
+ case RISCVII::LMUL_4:
+ return RISCV::PseudoVMSET_M_B32;
+ case RISCVII::LMUL_8:
+ return RISCV::PseudoVMSET_M_B64;
+ case RISCVII::LMUL_RESERVED:
+ llvm_unreachable("Unexpected LMUL");
+ }
+ llvm_unreachable("Unknown VLMUL enum");
+}
+
+// Try to fold away VMERGE_VVM instructions. We handle these cases:
+// -Masked TU VMERGE_VVM combined with an unmasked TA instruction instruction
+// folds to a masked TU instruction. VMERGE_VVM must have have merge operand
+// same as false operand.
+// -Masked TA VMERGE_VVM combined with an unmasked TA instruction fold to a
+// masked TA instruction.
+// -Unmasked TU VMERGE_VVM combined with a masked MU TA instruction folds to
+// masked TU instruction. Both instructions must have the same merge operand.
+// VMERGE_VVM must have have merge operand same as false operand.
+// Note: The VMERGE_VVM forms above (TA, and TU) refer to the policy implied,
+// not the pseudo name. That is, a TA VMERGE_VVM can be either the _TU pseudo
+// form with an IMPLICIT_DEF passthrough operand or the unsuffixed (TA) pseudo
+// form.
+bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
+ SDValue Merge, False, True, VL, Mask, Glue;
+ // A vmv.v.v is equivalent to a vmerge with an all-ones mask.
+ if (IsVMv(N)) {
+ Merge = N->getOperand(0);
+ False = N->getOperand(0);
+ True = N->getOperand(1);
+ VL = N->getOperand(2);
+ // A vmv.v.v won't have a Mask or Glue, instead we'll construct an all-ones
+ // mask later below.
+ } else {
+ assert(IsVMerge(N));
+ Merge = N->getOperand(0);
+ False = N->getOperand(1);
+ True = N->getOperand(2);
+ Mask = N->getOperand(3);
+ VL = N->getOperand(4);
+ // We always have a glue node for the mask at v0.
+ Glue = N->getOperand(N->getNumOperands() - 1);
+ }
+ assert(!Mask || cast<RegisterSDNode>(Mask)->getReg() == RISCV::V0);
+ assert(!Glue || Glue.getValueType() == MVT::Glue);
+
+ // We require that either merge and false are the same, or that merge
+ // is undefined.
+ if (Merge != False && !isImplicitDef(Merge))
+ return false;
assert(True.getResNo() == 0 &&
"Expect True is the first output of an instruction.");
@@ -2762,27 +3321,60 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) {
return false;
unsigned TrueOpc = True.getMachineOpcode();
+ const MCInstrDesc &TrueMCID = TII->get(TrueOpc);
+ uint64_t TrueTSFlags = TrueMCID.TSFlags;
+ bool HasTiedDest = RISCVII::isFirstDefTiedToFirstUse(TrueMCID);
- // Skip if True has merge operand.
- // TODO: Deal with True having same merge operand with N.
- if (RISCVII::hasMergeOp(TII->get(TrueOpc).TSFlags))
+ bool IsMasked = false;
+ const RISCV::RISCVMaskedPseudoInfo *Info =
+ RISCV::lookupMaskedIntrinsicByUnmasked(TrueOpc);
+ if (!Info && HasTiedDest) {
+ Info = RISCV::getMaskedPseudoInfo(TrueOpc);
+ IsMasked = true;
+ }
+
+ if (!Info)
return false;
+ if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
+ // The vmerge instruction must be TU.
+ // FIXME: This could be relaxed, but we need to handle the policy for the
+ // resulting op correctly.
+ if (isImplicitDef(Merge))
+ return false;
+ SDValue MergeOpTrue = True->getOperand(0);
+ // Both the vmerge instruction and the True instruction must have the same
+ // merge operand.
+ if (False != MergeOpTrue)
+ return false;
+ }
+
+ if (IsMasked) {
+ assert(HasTiedDest && "Expected tied dest");
+ // The vmerge instruction must be TU.
+ if (isImplicitDef(Merge))
+ return false;
+ // The vmerge instruction must have an all 1s mask since we're going to keep
+ // the mask from the True instruction.
+ // FIXME: Support mask agnostic True instruction which would have an
+ // undef merge operand.
+ if (Mask && !usesAllOnesMask(Mask, Glue))
+ return false;
+ }
+
// Skip if True has side effect.
- // TODO: Support velff and vlsegff.
+ // TODO: Support vleff and vlsegff.
if (TII->get(TrueOpc).hasUnmodeledSideEffects())
return false;
- // Only deal with True when True is unmasked intrinsic now.
- const RISCV::RISCVMaskedPseudoInfo *Info =
- RISCV::lookupMaskedIntrinsicByUnmaskedTA(TrueOpc);
+ // The last operand of a masked instruction may be glued.
+ bool HasGlueOp = True->getGluedNode() != nullptr;
- if (!Info)
- return false;
-
- // The last operand of unmasked intrinsic should be sew or chain.
+ // The chain operand may exist either before the glued operands or in the last
+ // position.
+ unsigned TrueChainOpIdx = True.getNumOperands() - HasGlueOp - 1;
bool HasChainOp =
- True.getOperand(True.getNumOperands() - 1).getValueType() == MVT::Other;
+ True.getOperand(TrueChainOpIdx).getValueType() == MVT::Other;
if (HasChainOp) {
// Avoid creating cycles in the DAG. We must ensure that none of the other
@@ -2790,49 +3382,115 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) {
SmallVector<const SDNode *, 4> LoopWorklist;
SmallPtrSet<const SDNode *, 16> Visited;
LoopWorklist.push_back(False.getNode());
- LoopWorklist.push_back(Mask.getNode());
+ if (Mask)
+ LoopWorklist.push_back(Mask.getNode());
LoopWorklist.push_back(VL.getNode());
- if (SDNode *Glued = N->getGluedNode())
- LoopWorklist.push_back(Glued);
+ if (Glue)
+ LoopWorklist.push_back(Glue.getNode());
if (SDNode::hasPredecessorHelper(True.getNode(), Visited, LoopWorklist))
return false;
}
- // Need True has same VL with N.
- unsigned TrueVLIndex = True.getNumOperands() - HasChainOp - 2;
+ // The vector policy operand may be present for masked intrinsics
+ bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TrueTSFlags);
+ unsigned TrueVLIndex =
+ True.getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
SDValue TrueVL = True.getOperand(TrueVLIndex);
-
- auto IsNoFPExcept = [this](SDValue N) {
- return !this->mayRaiseFPException(N.getNode()) ||
- N->getFlags().hasNoFPExcept();
+ SDValue SEW = True.getOperand(TrueVLIndex + 1);
+
+ auto GetMinVL = [](SDValue LHS, SDValue RHS) {
+ if (LHS == RHS)
+ return LHS;
+ if (isAllOnesConstant(LHS))
+ return RHS;
+ if (isAllOnesConstant(RHS))
+ return LHS;
+ auto *CLHS = dyn_cast<ConstantSDNode>(LHS);
+ auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
+ if (!CLHS || !CRHS)
+ return SDValue();
+ return CLHS->getZExtValue() <= CRHS->getZExtValue() ? LHS : RHS;
};
- // Allow the peephole for non-exception True with VLMAX vector length, since
- // all the values after VL of N are dependent on Merge. VLMAX should be
- // lowered to (XLenVT -1).
- if (TrueVL != VL && !(IsNoFPExcept(True) && isAllOnesConstant(TrueVL)))
+ // Because N and True must have the same merge operand (or True's operand is
+ // implicit_def), the "effective" body is the minimum of their VLs.
+ VL = GetMinVL(TrueVL, VL);
+ if (!VL)
return false;
+ // If we end up changing the VL or mask of True, then we need to make sure it
+ // doesn't raise any observable fp exceptions, since changing the active
+ // elements will affect how fflags is set.
+ if (TrueVL != VL || !IsMasked)
+ if (mayRaiseFPException(True.getNode()) &&
+ !True->getFlags().hasNoFPExcept())
+ return false;
+
SDLoc DL(N);
+
+ // From the preconditions we checked above, we know the mask and thus glue
+ // for the result node will be taken from True.
+ if (IsMasked) {
+ Mask = True->getOperand(Info->MaskOpIdx);
+ Glue = True->getOperand(True->getNumOperands() - 1);
+ assert(Glue.getValueType() == MVT::Glue);
+ }
+ // If we end up using the vmerge mask the vmerge is actually a vmv.v.v, create
+ // an all-ones mask to use.
+ else if (IsVMv(N)) {
+ unsigned TSFlags = TII->get(N->getMachineOpcode()).TSFlags;
+ unsigned VMSetOpc = GetVMSetForLMul(RISCVII::getLMul(TSFlags));
+ ElementCount EC = N->getValueType(0).getVectorElementCount();
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, EC);
+
+ SDValue AllOnesMask =
+ SDValue(CurDAG->getMachineNode(VMSetOpc, DL, MaskVT, VL, SEW), 0);
+ SDValue MaskCopy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
+ RISCV::V0, AllOnesMask, SDValue());
+ Mask = CurDAG->getRegister(RISCV::V0, MaskVT);
+ Glue = MaskCopy.getValue(1);
+ }
+
unsigned MaskedOpc = Info->MaskedPseudo;
- assert(RISCVII::hasVecPolicyOp(TII->get(MaskedOpc).TSFlags) &&
+#ifndef NDEBUG
+ const MCInstrDesc &MaskedMCID = TII->get(MaskedOpc);
+ assert(RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags) &&
"Expected instructions with mask have policy operand.");
- assert(RISCVII::hasMergeOp(TII->get(MaskedOpc).TSFlags) &&
- "Expected instructions with mask have merge operand.");
+ assert(MaskedMCID.getOperandConstraint(MaskedMCID.getNumDefs(),
+ MCOI::TIED_TO) == 0 &&
+ "Expected instructions with mask have a tied dest.");
+#endif
+
+ uint64_t Policy = isImplicitDef(Merge) ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0;
+ SDValue PolicyOp =
+ CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
+
SmallVector<SDValue, 8> Ops;
Ops.push_back(False);
- Ops.append(True->op_begin(), True->op_begin() + TrueVLIndex);
- Ops.append({Mask, VL, /* SEW */ True.getOperand(TrueVLIndex + 1)});
- Ops.push_back(CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT()));
+
+ const bool HasRoundingMode = RISCVII::hasRoundModeOp(TrueTSFlags);
+ const unsigned NormalOpsEnd = TrueVLIndex - IsMasked - HasRoundingMode;
+ assert(!IsMasked || NormalOpsEnd == Info->MaskOpIdx);
+ Ops.append(True->op_begin() + HasTiedDest, True->op_begin() + NormalOpsEnd);
+
+ Ops.push_back(Mask);
+
+ // For unmasked "VOp" with rounding mode operand, that is interfaces like
+ // (..., rm, vl) or (..., rm, vl, policy).
+ // Its masked version is (..., vm, rm, vl, policy).
+ // Check the rounding mode pseudo nodes under RISCVInstrInfoVPseudos.td
+ if (HasRoundingMode)
+ Ops.push_back(True->getOperand(TrueVLIndex - 1));
+
+ Ops.append({VL, SEW, PolicyOp});
// Result node should have chain operand of True.
if (HasChainOp)
- Ops.push_back(True.getOperand(True.getNumOperands() - 1));
+ Ops.push_back(True.getOperand(TrueChainOpIdx));
- // Result node should take over glued node of N.
- if (N->getGluedNode())
- Ops.push_back(N->getOperand(N->getNumOperands() - 1));
+ // Add the glue for the CopyToReg of mask->v0.
+ Ops.push_back(Glue);
SDNode *Result =
CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
@@ -2850,45 +3508,36 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N, bool IsTA) {
return true;
}
-// Transform (VMERGE_VVM_<LMUL>_TU false, false, true, allones, vl, sew) to
-// (VADD_VI_<LMUL>_TU false, true, 0, vl, sew). It may decrease uses of VMSET.
-bool RISCVDAGToDAGISel::performVMergeToVAdd(SDNode *N) {
+// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
+// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
+bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) {
+#define CASE_VMERGE_TO_VMV(lmul) \
+ case RISCV::PseudoVMERGE_VVM_##lmul: \
+ NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
+ break;
unsigned NewOpc;
switch (N->getMachineOpcode()) {
default:
- llvm_unreachable("Expected VMERGE_VVM_<LMUL>_TU instruction.");
- case RISCV::PseudoVMERGE_VVM_MF8_TU:
- NewOpc = RISCV::PseudoVADD_VI_MF8_TU;
- break;
- case RISCV::PseudoVMERGE_VVM_MF4_TU:
- NewOpc = RISCV::PseudoVADD_VI_MF4_TU;
- break;
- case RISCV::PseudoVMERGE_VVM_MF2_TU:
- NewOpc = RISCV::PseudoVADD_VI_MF2_TU;
- break;
- case RISCV::PseudoVMERGE_VVM_M1_TU:
- NewOpc = RISCV::PseudoVADD_VI_M1_TU;
- break;
- case RISCV::PseudoVMERGE_VVM_M2_TU:
- NewOpc = RISCV::PseudoVADD_VI_M2_TU;
- break;
- case RISCV::PseudoVMERGE_VVM_M4_TU:
- NewOpc = RISCV::PseudoVADD_VI_M4_TU;
- break;
- case RISCV::PseudoVMERGE_VVM_M8_TU:
- NewOpc = RISCV::PseudoVADD_VI_M8_TU;
- break;
+ llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction.");
+ CASE_VMERGE_TO_VMV(MF8)
+ CASE_VMERGE_TO_VMV(MF4)
+ CASE_VMERGE_TO_VMV(MF2)
+ CASE_VMERGE_TO_VMV(M1)
+ CASE_VMERGE_TO_VMV(M2)
+ CASE_VMERGE_TO_VMV(M4)
+ CASE_VMERGE_TO_VMV(M8)
}
if (!usesAllOnesMask(N, /* MaskOpIdx */ 3))
return false;
SDLoc DL(N);
- EVT VT = N->getValueType(0);
- SDValue Ops[] = {N->getOperand(1), N->getOperand(2),
- CurDAG->getTargetConstant(0, DL, Subtarget->getXLenVT()),
- N->getOperand(4), N->getOperand(5)};
- SDNode *Result = CurDAG->getMachineNode(NewOpc, DL, VT, Ops);
+ SDValue PolicyOp =
+ CurDAG->getTargetConstant(/*TUMU*/ 0, DL, Subtarget->getXLenVT());
+ SDNode *Result = CurDAG->getMachineNode(
+ NewOpc, DL, N->getValueType(0),
+ {N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5),
+ PolicyOp});
ReplaceUses(N, Result);
return true;
}
@@ -2902,34 +3551,10 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
if (N->use_empty() || !N->isMachineOpcode())
continue;
- auto IsVMergeTU = [](unsigned Opcode) {
- return Opcode == RISCV::PseudoVMERGE_VVM_MF8_TU ||
- Opcode == RISCV::PseudoVMERGE_VVM_MF4_TU ||
- Opcode == RISCV::PseudoVMERGE_VVM_MF2_TU ||
- Opcode == RISCV::PseudoVMERGE_VVM_M1_TU ||
- Opcode == RISCV::PseudoVMERGE_VVM_M2_TU ||
- Opcode == RISCV::PseudoVMERGE_VVM_M4_TU ||
- Opcode == RISCV::PseudoVMERGE_VVM_M8_TU;
- };
-
- auto IsVMergeTA = [](unsigned Opcode) {
- return Opcode == RISCV::PseudoVMERGE_VVM_MF8 ||
- Opcode == RISCV::PseudoVMERGE_VVM_MF4 ||
- Opcode == RISCV::PseudoVMERGE_VVM_MF2 ||
- Opcode == RISCV::PseudoVMERGE_VVM_M1 ||
- Opcode == RISCV::PseudoVMERGE_VVM_M2 ||
- Opcode == RISCV::PseudoVMERGE_VVM_M4 ||
- Opcode == RISCV::PseudoVMERGE_VVM_M8;
- };
-
- unsigned Opc = N->getMachineOpcode();
- // The following optimizations require that the merge operand of N is same
- // as the false operand of N.
- if ((IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1)) ||
- IsVMergeTA(Opc))
- MadeChange |= performCombineVMergeAndVOps(N, IsVMergeTA(Opc));
- if (IsVMergeTU(Opc) && N->getOperand(0) == N->getOperand(1))
- MadeChange |= performVMergeToVAdd(N);
+ if (IsVMerge(N) || IsVMv(N))
+ MadeChange |= performCombineVMergeAndVOps(N);
+ if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1))
+ MadeChange |= performVMergeToVMv(N);
}
return MadeChange;
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 17205b8ba3d3..281719c12e70 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -1,4 +1,4 @@
-//===---- RISCVISelDAGToDAG.h - A dag to dag inst selector for RISCV ------===//
+//===---- RISCVISelDAGToDAG.h - A dag to dag inst selector for RISC-V -----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines an instruction selector for the RISCV target.
+// This file defines an instruction selector for the RISC-V target.
//
//===----------------------------------------------------------------------===//
@@ -18,7 +18,7 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/KnownBits.h"
-// RISCV-specific code to select RISCV machine instructions for
+// RISC-V specific code to select RISC-V machine instructions for
// SelectionDAG operations.
namespace llvm {
class RISCVDAGToDAGISel : public SelectionDAGISel {
@@ -48,9 +48,39 @@ public:
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectFrameAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
- bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
+ bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset,
+ bool IsINX = false);
+ bool SelectAddrRegImmINX(SDValue Addr, SDValue &Base, SDValue &Offset) {
+ return SelectAddrRegImm(Addr, Base, Offset, true);
+ }
+
+ bool SelectAddrRegRegScale(SDValue Addr, unsigned MaxShiftAmount,
+ SDValue &Base, SDValue &Index, SDValue &Scale);
+
+ template <unsigned MaxShift>
+ bool SelectAddrRegRegScale(SDValue Addr, SDValue &Base, SDValue &Index,
+ SDValue &Scale) {
+ return SelectAddrRegRegScale(Addr, MaxShift, Base, Index, Scale);
+ }
+
+ template <unsigned MaxShift, unsigned Bits>
+ bool SelectAddrRegZextRegScale(SDValue Addr, SDValue &Base, SDValue &Index,
+ SDValue &Scale) {
+ if (SelectAddrRegRegScale(Addr, MaxShift, Base, Index, Scale)) {
+ if (Index.getOpcode() == ISD::AND) {
+ auto *C = dyn_cast<ConstantSDNode>(Index.getOperand(1));
+ if (C && C->getZExtValue() == maskTrailingOnes<uint64_t>(Bits)) {
+ Index = Index.getOperand(0);
+ return true;
+ }
+ }
+ }
+ return false;
+ }
bool tryShrinkShlLogicImm(SDNode *Node);
+ bool trySignedBitfieldExtract(SDNode *Node);
+ bool tryIndexedLoad(SDNode *Node);
bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
bool selectShiftMaskXLen(SDValue N, SDValue &ShAmt) {
@@ -60,7 +90,18 @@ public:
return selectShiftMask(N, 32, ShAmt);
}
- bool selectSExti32(SDValue N, SDValue &Val);
+ bool selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal, SDValue &Val);
+ bool selectSETNE(SDValue N, SDValue &Val) {
+ return selectSETCC(N, ISD::SETNE, Val);
+ }
+ bool selectSETEQ(SDValue N, SDValue &Val) {
+ return selectSETCC(N, ISD::SETEQ, Val);
+ }
+
+ bool selectSExtBits(SDValue N, unsigned Bits, SDValue &Val);
+ template <unsigned Bits> bool selectSExtBits(SDValue N, SDValue &Val) {
+ return selectSExtBits(N, Bits, Val);
+ }
bool selectZExtBits(SDValue N, unsigned Bits, SDValue &Val);
template <unsigned Bits> bool selectZExtBits(SDValue N, SDValue &Val) {
return selectZExtBits(N, Bits, Val);
@@ -81,13 +122,20 @@ public:
bool hasAllHUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 16); }
bool hasAllWUsers(SDNode *Node) const { return hasAllNBitUsers(Node, 32); }
+ bool selectSimm5Shl2(SDValue N, SDValue &Simm5, SDValue &Shl2);
+
bool selectVLOp(SDValue N, SDValue &VL);
bool selectVSplat(SDValue N, SDValue &SplatVal);
bool selectVSplatSimm5(SDValue N, SDValue &SplatVal);
- bool selectVSplatUimm5(SDValue N, SDValue &SplatVal);
+ bool selectVSplatUimm(SDValue N, unsigned Bits, SDValue &SplatVal);
+ template <unsigned Bits> bool selectVSplatUimmBits(SDValue N, SDValue &Val) {
+ return selectVSplatUimm(N, Bits, Val);
+ }
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal);
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal);
+ bool selectExtOneUseVSplat(SDValue N, SDValue &SplatVal);
+ bool selectFPImm(SDValue N, SDValue &Imm);
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm);
template <unsigned Width> bool selectRVVSimm5(SDValue N, SDValue &Imm) {
@@ -137,15 +185,14 @@ private:
bool doPeepholeSExtW(SDNode *Node);
bool doPeepholeMaskedRVV(SDNode *Node);
bool doPeepholeMergeVVMFold();
- bool performVMergeToVAdd(SDNode *N);
- bool performCombineVMergeAndVOps(SDNode *N, bool IsTA);
+ bool performVMergeToVMv(SDNode *N);
+ bool performCombineVMergeAndVOps(SDNode *N);
};
namespace RISCV {
struct VLSEGPseudo {
uint16_t NF : 4;
uint16_t Masked : 1;
- uint16_t IsTU : 1;
uint16_t Strided : 1;
uint16_t FF : 1;
uint16_t Log2SEW : 3;
@@ -156,7 +203,6 @@ struct VLSEGPseudo {
struct VLXSEGPseudo {
uint16_t NF : 4;
uint16_t Masked : 1;
- uint16_t IsTU : 1;
uint16_t Ordered : 1;
uint16_t Log2SEW : 3;
uint16_t LMUL : 3;
@@ -185,7 +231,6 @@ struct VSXSEGPseudo {
struct VLEPseudo {
uint16_t Masked : 1;
- uint16_t IsTU : 1;
uint16_t Strided : 1;
uint16_t FF : 1;
uint16_t Log2SEW : 3;
@@ -203,7 +248,6 @@ struct VSEPseudo {
struct VLX_VSXPseudo {
uint16_t Masked : 1;
- uint16_t IsTU : 1;
uint16_t Ordered : 1;
uint16_t Log2SEW : 3;
uint16_t LMUL : 3;
@@ -214,7 +258,6 @@ struct VLX_VSXPseudo {
struct RISCVMaskedPseudoInfo {
uint16_t MaskedPseudo;
uint16_t UnmaskedPseudo;
- uint16_t UnmaskedTUPseudo;
uint8_t MaskOpIdx;
};
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a8720d070acb..f49c5011607f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation --------===//
+//===-- RISCVISelLowering.cpp - RISC-V DAG Lowering Implementation -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the interfaces that RISCV uses to lower LLVM code into a
+// This file defines the interfaces that RISC-V uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
@@ -21,6 +21,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -31,6 +32,7 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/DiagnosticPrinter.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Support/CommandLine.h"
@@ -65,12 +67,18 @@ static cl::opt<unsigned> NumRepeatedDivisors(
"transformation to multiplications by the reciprocal"),
cl::init(2));
+static cl::opt<int>
+ FPImmCost(DEBUG_TYPE "-fpimm-cost", cl::Hidden,
+ cl::desc("Give the maximum number of instructions that we will "
+ "use for creating a floating-point immediate value"),
+ cl::init(2));
+
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
- if (Subtarget.isRV32E())
- report_fatal_error("Codegen not yet implemented for RV32E");
+ if (Subtarget.isRVE())
+ report_fatal_error("Codegen not yet implemented for RVE");
RISCVABI::ABI ABI = Subtarget.getTargetABI();
assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
@@ -108,10 +116,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtZfhOrZfhmin())
addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
+ if (Subtarget.hasStdExtZfbfmin())
+ addRegisterClass(MVT::bf16, &RISCV::FPR16RegClass);
if (Subtarget.hasStdExtF())
addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
if (Subtarget.hasStdExtD())
addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
+ if (Subtarget.hasStdExtZhinxOrZhinxmin())
+ addRegisterClass(MVT::f16, &RISCV::GPRF16RegClass);
+ if (Subtarget.hasStdExtZfinx())
+ addRegisterClass(MVT::f32, &RISCV::GPRF32RegClass);
+ if (Subtarget.hasStdExtZdinx()) {
+ if (Subtarget.is64Bit())
+ addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
+ else
+ addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass);
+ }
static const MVT::SimpleValueType BoolVecVTs[] = {
MVT::nxv1i1, MVT::nxv2i1, MVT::nxv4i1, MVT::nxv8i1,
@@ -227,7 +247,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
- if (!Subtarget.hasStdExtZbb())
+ if (!Subtarget.hasStdExtZbb() && !Subtarget.hasVendorXTHeadBb())
setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
if (Subtarget.is64Bit()) {
@@ -238,6 +258,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
MVT::i32, Custom);
+ setOperationAction(ISD::SADDO, MVT::i32, Custom);
setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
MVT::i32, Custom);
} else {
@@ -247,25 +268,19 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::MULO_I64, nullptr);
}
- if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul()) {
+ if (!Subtarget.hasStdExtM() && !Subtarget.hasStdExtZmmul())
setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU}, XLenVT, Expand);
- } else {
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
- } else {
- setOperationAction(ISD::MUL, MVT::i64, Custom);
- }
- }
+ else if (Subtarget.is64Bit())
+ setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
+ else
+ setOperationAction(ISD::MUL, MVT::i64, Custom);
- if (!Subtarget.hasStdExtM()) {
+ if (!Subtarget.hasStdExtM())
setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM},
XLenVT, Expand);
- } else {
- if (Subtarget.is64Bit()) {
- setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
- {MVT::i8, MVT::i16, MVT::i32}, Custom);
- }
- }
+ else if (Subtarget.is64Bit())
+ setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
+ {MVT::i8, MVT::i16, MVT::i32}, Custom);
setOperationAction(
{ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
@@ -277,6 +292,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) {
if (Subtarget.is64Bit())
setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
+ } else if (Subtarget.hasVendorXTHeadBb()) {
+ if (Subtarget.is64Bit())
+ setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
+ setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Custom);
} else {
setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
}
@@ -284,7 +303,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
// pattern match it directly in isel.
setOperationAction(ISD::BSWAP, XLenVT,
- (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
+ (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
+ Subtarget.hasVendorXTHeadBb())
? Legal
: Expand);
// Zbkb can use rev8+brev8 to implement bitreverse.
@@ -303,10 +323,19 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand);
}
+ if (Subtarget.hasVendorXTHeadBb()) {
+ setOperationAction(ISD::CTLZ, XLenVT, Legal);
+
+ // We need the custom lowering to make sure that the resulting sequence
+ // for the 32bit case is efficient on 64bit targets.
+ if (Subtarget.is64Bit())
+ setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, MVT::i32, Custom);
+ }
+
if (Subtarget.is64Bit())
setOperationAction(ISD::ABS, MVT::i32, Custom);
- if (!Subtarget.hasVendorXVentanaCondOps())
+ if (!Subtarget.hasVendorXTHeadCondMov())
setOperationAction(ISD::SELECT, XLenVT, Custom);
static const unsigned FPLegalNodeTypes[] = {
@@ -324,20 +353,31 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
static const unsigned FPOpToExpand[] = {
ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW,
- ISD::FREM, ISD::FP16_TO_FP, ISD::FP_TO_FP16};
+ ISD::FREM};
static const unsigned FPRndMode[] = {
ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
ISD::FROUNDEVEN};
- if (Subtarget.hasStdExtZfhOrZfhmin())
+ if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
+ setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+
+ if (Subtarget.hasStdExtZfbfmin()) {
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+ setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
+ setOperationAction(ISD::FP_ROUND, MVT::bf16, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f32, Custom);
+ setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
+ setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
+ }
- if (Subtarget.hasStdExtZfhOrZfhmin()) {
- if (Subtarget.hasStdExtZfh()) {
+ if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
+ if (Subtarget.hasStdExtZfhOrZhinx()) {
setOperationAction(FPLegalNodeTypes, MVT::f16, Legal);
- setOperationAction(FPRndMode, MVT::f16, Custom);
+ setOperationAction(FPRndMode, MVT::f16,
+ Subtarget.hasStdExtZfa() ? Legal : Custom);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
+ setOperationAction(ISD::IS_FPCLASS, MVT::f16, Custom);
} else {
static const unsigned ZfhminPromoteOps[] = {
ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD,
@@ -365,7 +405,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
- setOperationAction({ISD::FREM, ISD::FNEARBYINT, ISD::FPOW, ISD::FPOWI,
+ setOperationAction(ISD::FNEARBYINT, MVT::f16,
+ Subtarget.hasStdExtZfa() ? Legal : Promote);
+ setOperationAction({ISD::FREM, ISD::FPOW, ISD::FPOWI,
ISD::FCOS, ISD::FSIN, ISD::FSINCOS, ISD::FEXP,
ISD::FEXP2, ISD::FLOG, ISD::FLOG2, ISD::FLOG10},
MVT::f16, Promote);
@@ -381,11 +423,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// We need to custom promote this.
if (Subtarget.is64Bit())
setOperationAction(ISD::FPOWI, MVT::i32, Custom);
+
+ if (!Subtarget.hasStdExtZfa())
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f16, Custom);
}
- if (Subtarget.hasStdExtF()) {
+ if (Subtarget.hasStdExtFOrZfinx()) {
setOperationAction(FPLegalNodeTypes, MVT::f32, Legal);
- setOperationAction(FPRndMode, MVT::f32, Custom);
+ setOperationAction(FPRndMode, MVT::f32,
+ Subtarget.hasStdExtZfa() ? Legal : Custom);
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
@@ -393,16 +439,37 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(FPOpToExpand, MVT::f32, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
+ setOperationAction(ISD::IS_FPCLASS, MVT::f32, Custom);
+ setOperationAction(ISD::BF16_TO_FP, MVT::f32, Custom);
+ setOperationAction(ISD::FP_TO_BF16, MVT::f32,
+ Subtarget.isSoftFPABI() ? LibCall : Custom);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f32, Custom);
+
+ if (Subtarget.hasStdExtZfa())
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ else
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f32, Custom);
}
- if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
+ if (Subtarget.hasStdExtFOrZfinx() && Subtarget.is64Bit())
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
- if (Subtarget.hasStdExtD()) {
+ if (Subtarget.hasStdExtDOrZdinx()) {
setOperationAction(FPLegalNodeTypes, MVT::f64, Legal);
- if (Subtarget.is64Bit()) {
- setOperationAction(FPRndMode, MVT::f64, Custom);
+
+ if (Subtarget.hasStdExtZfa()) {
+ setOperationAction(FPRndMode, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ setOperationAction(ISD::BITCAST, MVT::f64, Custom);
+ } else {
+ if (Subtarget.is64Bit())
+ setOperationAction(FPRndMode, MVT::f64, Custom);
+
+ setOperationAction({ISD::FMAXIMUM, ISD::FMINIMUM}, MVT::f64, Custom);
}
+
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
@@ -414,14 +481,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(FPOpToExpand, MVT::f64, Expand);
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
+ setOperationAction(ISD::IS_FPCLASS, MVT::f64, Custom);
+ setOperationAction(ISD::BF16_TO_FP, MVT::f64, Custom);
+ setOperationAction(ISD::FP_TO_BF16, MVT::f64,
+ Subtarget.isSoftFPABI() ? LibCall : Custom);
+ setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
+ setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
}
- if (Subtarget.is64Bit())
+ if (Subtarget.is64Bit()) {
setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
MVT::i32, Custom);
+ setOperationAction(ISD::LROUND, MVT::i32, Custom);
+ }
- if (Subtarget.hasStdExtF()) {
+ if (Subtarget.hasStdExtFOrZfinx()) {
setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
Custom);
@@ -452,6 +527,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.is64Bit())
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
+ if (Subtarget.hasStdExtZicbop()) {
+ setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+ }
+
if (Subtarget.hasStdExtA()) {
setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
setMinCmpXchgSizeInBits(32);
@@ -472,10 +551,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// RVV intrinsics may have illegal operands.
// We also need to custom legalize vmv.x.s.
- setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
+ setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN,
+ ISD::INTRINSIC_VOID},
{MVT::i8, MVT::i16}, Custom);
if (Subtarget.is64Bit())
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
+ setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
+ MVT::i32, Custom);
else
setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
MVT::i64, Custom);
@@ -541,7 +622,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Mask VTs are custom-expanded into a series of standard nodes
setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
- ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
+ ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
+ ISD::SCALAR_TO_VECTOR},
VT, Custom);
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
@@ -566,9 +648,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
- setOperationAction(
- {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
- VT, Custom);
+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
+ ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
+ ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
+ ISD::STRICT_FP_TO_UINT},
+ VT, Custom);
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
@@ -583,6 +667,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::VP_FP_TO_SINT, ISD::VP_FP_TO_UINT,
ISD::VP_TRUNCATE, ISD::VP_SETCC},
VT, Custom);
+
+ setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
+ setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
+
setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
setOperationPromotedToType(
@@ -607,16 +695,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
Legal);
- setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
-
- setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, VT, Expand);
-
- setOperationAction(ISD::BSWAP, VT, Expand);
- setOperationAction({ISD::VP_BSWAP, ISD::VP_BITREVERSE}, VT, Expand);
setOperationAction({ISD::VP_FSHL, ISD::VP_FSHR}, VT, Expand);
- setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
- ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
- VT, Expand);
// Custom-lower extensions and truncations from/to mask types.
setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
@@ -626,9 +705,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
- setOperationAction(
- {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
- VT, Custom);
+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
+ ISD::FP_TO_UINT, ISD::STRICT_SINT_TO_FP,
+ ISD::STRICT_UINT_TO_FP, ISD::STRICT_FP_TO_SINT,
+ ISD::STRICT_FP_TO_UINT},
+ VT, Custom);
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
@@ -659,9 +740,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER},
VT, Custom);
- setOperationAction(
- {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
- VT, Custom);
+ setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
+ VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -674,16 +755,37 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
VT, Expand);
}
+ setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
+ setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
+
// Splice
setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
- // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the range
- // of f32.
- EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
- if (isTypeLegal(FloatVT)) {
- setOperationAction(
- {ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
- Custom);
+ if (Subtarget.hasStdExtZvbb()) {
+ setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Legal);
+ setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Custom);
+ setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
+ ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
+ VT, Custom);
+ } else {
+ setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, VT, Expand);
+ setOperationAction({ISD::VP_BITREVERSE, ISD::VP_BSWAP}, VT, Expand);
+ setOperationAction({ISD::CTLZ, ISD::CTTZ, ISD::CTPOP}, VT, Expand);
+ setOperationAction({ISD::VP_CTLZ, ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ,
+ ISD::VP_CTTZ_ZERO_UNDEF, ISD::VP_CTPOP},
+ VT, Expand);
+
+ // Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if element of VT in the
+ // range of f32.
+ EVT FloatVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ if (isTypeLegal(FloatVT)) {
+ setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF,
+ ISD::CTTZ_ZERO_UNDEF, ISD::VP_CTLZ,
+ ISD::VP_CTLZ_ZERO_UNDEF, ISD::VP_CTTZ_ZERO_UNDEF},
+ VT, Custom);
+ }
+
+ setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
}
}
@@ -716,9 +818,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
- setOperationAction(
- {ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN},
- VT, Custom);
+ setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
+ ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT,
+ ISD::IS_FPCLASS},
+ VT, Custom);
setOperationAction(FloatingPointVecReduceOps, VT, Custom);
@@ -733,8 +836,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLOG, VT, Expand);
setOperationAction(ISD::FLOG2, VT, Expand);
setOperationAction(ISD::FLOG10, VT, Expand);
- setOperationAction(ISD::FRINT, VT, Expand);
- setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::FCOPYSIGN, VT, Legal);
@@ -751,13 +852,27 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(
- {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
- VT, Custom);
+ setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
+ VT, Custom);
+
+ setOperationAction(ISD::VECTOR_DEINTERLEAVE, VT, Custom);
+ setOperationAction(ISD::VECTOR_INTERLEAVE, VT, Custom);
setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);
setOperationAction(FloatingPointVPOps, VT, Custom);
+
+ setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
+ Custom);
+ setOperationAction({ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA},
+ VT, Legal);
+ setOperationAction({ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS,
+ ISD::STRICT_FTRUNC, ISD::STRICT_FCEIL,
+ ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
+ ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
+ VT, Custom);
};
// Sets common extload/truncstore actions on RVV floating-point vector
@@ -811,6 +926,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
OtherVT, VT, Expand);
}
+ // Custom lower fixed vector undefs to scalable vector undefs to avoid
+ // expansion to a build_vector of 0s.
+ setOperationAction(ISD::UNDEF, VT, Custom);
+
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
Custom);
@@ -821,6 +940,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
VT, Custom);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
+
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -839,12 +960,23 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
{ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
Custom);
- setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
- ISD::FP_TO_UINT},
- VT, Custom);
+ setOperationAction(
+ {
+ ISD::SINT_TO_FP,
+ ISD::UINT_TO_FP,
+ ISD::FP_TO_SINT,
+ ISD::FP_TO_UINT,
+ ISD::STRICT_SINT_TO_FP,
+ ISD::STRICT_UINT_TO_FP,
+ ISD::STRICT_FP_TO_SINT,
+ ISD::STRICT_FP_TO_UINT,
+ },
+ VT, Custom);
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+
// Operations below are different for between masks and other vectors.
if (VT.getVectorElementType() == MVT::i1) {
setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
@@ -867,8 +999,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
}
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
-
setOperationAction(
{ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
@@ -919,16 +1049,22 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
+ // There are no extending loads or truncating stores.
+ for (MVT InnerVT : MVT::fp_fixedlen_vector_valuetypes()) {
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
+ setTruncStoreAction(VT, InnerVT, Expand);
+ }
+
if (!useRVVForFixedLengthVectorVT(VT))
continue;
// By default everything must be expanded.
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
- for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
- setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
- setTruncStoreAction(VT, OtherVT, Expand);
- }
+
+ // Custom lower fixed vector undefs to scalable vector undefs to avoid
+ // expansion to a build_vector of 0s.
+ setOperationAction(ISD::UNDEF, VT, Custom);
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
@@ -951,17 +1087,19 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
- ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM},
+ ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM,
+ ISD::IS_FPCLASS},
VT, Custom);
setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND,
- ISD::FROUNDEVEN},
+ ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT},
VT, Custom);
setCondCodeAction(VFPCCToExpand, VT, Expand);
+ setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
@@ -970,16 +1108,26 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(FloatingPointVecReduceOps, VT, Custom);
setOperationAction(FloatingPointVPOps, VT, Custom);
+
+ setOperationAction({ISD::STRICT_FP_EXTEND, ISD::STRICT_FP_ROUND}, VT,
+ Custom);
+ setOperationAction(
+ {ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
+ ISD::STRICT_FDIV, ISD::STRICT_FSQRT, ISD::STRICT_FMA,
+ ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS, ISD::STRICT_FTRUNC,
+ ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
+ ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
+ VT, Custom);
}
// Custom-legalize bitcasts from fixed-length vectors to scalar types.
setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
Custom);
- if (Subtarget.hasStdExtZfhOrZfhmin())
+ if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
- if (Subtarget.hasStdExtF())
+ if (Subtarget.hasStdExtFOrZfinx())
setOperationAction(ISD::BITCAST, MVT::f32, Custom);
- if (Subtarget.hasStdExtD())
+ if (Subtarget.hasStdExtDOrZdinx())
setOperationAction(ISD::BITCAST, MVT::f64, Custom);
}
}
@@ -994,22 +1142,42 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
XLenVT, Expand);
}
+ if (Subtarget.hasVendorXTHeadMemIdx()) {
+ for (unsigned im = (unsigned)ISD::PRE_INC; im != (unsigned)ISD::POST_DEC;
+ ++im) {
+ setIndexedLoadAction(im, MVT::i8, Legal);
+ setIndexedStoreAction(im, MVT::i8, Legal);
+ setIndexedLoadAction(im, MVT::i16, Legal);
+ setIndexedStoreAction(im, MVT::i16, Legal);
+ setIndexedLoadAction(im, MVT::i32, Legal);
+ setIndexedStoreAction(im, MVT::i32, Legal);
+
+ if (Subtarget.is64Bit()) {
+ setIndexedLoadAction(im, MVT::i64, Legal);
+ setIndexedStoreAction(im, MVT::i64, Legal);
+ }
+ }
+ }
+
// Function alignments.
const Align FunctionAlignment(Subtarget.hasStdExtCOrZca() ? 2 : 4);
setMinFunctionAlignment(FunctionAlignment);
- setPrefFunctionAlignment(FunctionAlignment);
+ // Set preferred alignments.
+ setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
+ setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
setMinimumJumpTableEntries(5);
// Jumps are expensive, compared to logic
setJumpIsExpensive();
- setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
+ setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
+ ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
if (Subtarget.is64Bit())
setTargetDAGCombine(ISD::SRA);
- if (Subtarget.hasStdExtF())
+ if (Subtarget.hasStdExtFOrZfinx())
setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
if (Subtarget.hasStdExtZbb())
@@ -1020,20 +1188,26 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtZbkb())
setTargetDAGCombine(ISD::BITREVERSE);
- if (Subtarget.hasStdExtZfhOrZfhmin())
+ if (Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
- if (Subtarget.hasStdExtF())
+ if (Subtarget.hasStdExtFOrZfinx())
setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
if (Subtarget.hasVInstructions())
setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
- ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR});
+ ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
+ ISD::CONCAT_VECTORS});
+ if (Subtarget.hasVendorXTHeadMemPair())
+ setTargetDAGCombine({ISD::LOAD, ISD::STORE});
if (Subtarget.useRVVForFixedLengthVectors())
setTargetDAGCombine(ISD::BITCAST);
setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
+
+ // Disable strict node mutation.
+ IsStrictFPEnabled = true;
}
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
@@ -1051,11 +1225,68 @@ MVT RISCVTargetLowering::getVPExplicitVectorLengthTy() const {
return Subtarget.getXLenVT();
}
+// Return false if we can lower get_vector_length to a vsetvli intrinsic.
+bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT,
+ unsigned VF,
+ bool IsScalable) const {
+ if (!Subtarget.hasVInstructions())
+ return true;
+
+ if (!IsScalable)
+ return true;
+
+ if (TripCountVT != MVT::i32 && TripCountVT != Subtarget.getXLenVT())
+ return true;
+
+ // Don't allow VF=1 if those types are't legal.
+ if (VF < RISCV::RVVBitsPerBlock / Subtarget.getELEN())
+ return true;
+
+ // VLEN=32 support is incomplete.
+ if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
+ return true;
+
+ // The maximum VF is for the smallest element width with LMUL=8.
+ // VF must be a power of 2.
+ unsigned MaxVF = (RISCV::RVVBitsPerBlock / 8) * 8;
+ return VF > MaxVF || !isPowerOf2_32(VF);
+}
+
bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
const CallInst &I,
MachineFunction &MF,
unsigned Intrinsic) const {
auto &DL = I.getModule()->getDataLayout();
+
+ auto SetRVVLoadStoreInfo = [&](unsigned PtrOp, bool IsStore,
+ bool IsUnitStrided) {
+ Info.opc = IsStore ? ISD::INTRINSIC_VOID : ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(PtrOp);
+ Type *MemTy;
+ if (IsStore) {
+ // Store value is the first operand.
+ MemTy = I.getArgOperand(0)->getType();
+ } else {
+ // Use return type. If it's segment load, return type is a struct.
+ MemTy = I.getType();
+ if (MemTy->isStructTy())
+ MemTy = MemTy->getStructElementType(0);
+ }
+ if (!IsUnitStrided)
+ MemTy = MemTy->getScalarType();
+
+ Info.memVT = getValueType(DL, MemTy);
+ Info.align = Align(DL.getTypeSizeInBits(MemTy->getScalarType()) / 8);
+ Info.size = MemoryLocation::UnknownSize;
+ Info.flags |=
+ IsStore ? MachineMemOperand::MOStore : MachineMemOperand::MOLoad;
+ return true;
+ };
+
+ if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
+ Info.flags |= MachineMemOperand::MONonTemporal;
+
+ Info.flags |= RISCVTargetLowering::getTargetMMOFlags(I);
switch (Intrinsic) {
default:
return false;
@@ -1077,24 +1308,11 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
MachineMemOperand::MOVolatile;
return true;
case Intrinsic::riscv_masked_strided_load:
- Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.ptrVal = I.getArgOperand(1);
- Info.memVT = getValueType(DL, I.getType()->getScalarType());
- Info.align = Align(DL.getTypeSizeInBits(I.getType()->getScalarType()) / 8);
- Info.size = MemoryLocation::UnknownSize;
- Info.flags |= MachineMemOperand::MOLoad;
- return true;
+ return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ false,
+ /*IsUnitStrided*/ false);
case Intrinsic::riscv_masked_strided_store:
- Info.opc = ISD::INTRINSIC_VOID;
- Info.ptrVal = I.getArgOperand(1);
- Info.memVT =
- getValueType(DL, I.getArgOperand(0)->getType()->getScalarType());
- Info.align = Align(
- DL.getTypeSizeInBits(I.getArgOperand(0)->getType()->getScalarType()) /
- 8);
- Info.size = MemoryLocation::UnknownSize;
- Info.flags |= MachineMemOperand::MOStore;
- return true;
+ return SetRVVLoadStoreInfo(/*PtrOp*/ 1, /*IsStore*/ true,
+ /*IsUnitStrided*/ false);
case Intrinsic::riscv_seg2_load:
case Intrinsic::riscv_seg3_load:
case Intrinsic::riscv_seg4_load:
@@ -1102,17 +1320,199 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::riscv_seg6_load:
case Intrinsic::riscv_seg7_load:
case Intrinsic::riscv_seg8_load:
- Info.opc = ISD::INTRINSIC_W_CHAIN;
- Info.ptrVal = I.getArgOperand(0);
- Info.memVT =
- getValueType(DL, I.getType()->getStructElementType(0)->getScalarType());
- Info.align =
- Align(DL.getTypeSizeInBits(
- I.getType()->getStructElementType(0)->getScalarType()) /
- 8);
- Info.size = MemoryLocation::UnknownSize;
- Info.flags |= MachineMemOperand::MOLoad;
- return true;
+ return SetRVVLoadStoreInfo(/*PtrOp*/ 0, /*IsStore*/ false,
+ /*IsUnitStrided*/ false);
+ case Intrinsic::riscv_seg2_store:
+ case Intrinsic::riscv_seg3_store:
+ case Intrinsic::riscv_seg4_store:
+ case Intrinsic::riscv_seg5_store:
+ case Intrinsic::riscv_seg6_store:
+ case Intrinsic::riscv_seg7_store:
+ case Intrinsic::riscv_seg8_store:
+ // Operands are (vec, ..., vec, ptr, vl)
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
+ /*IsStore*/ true,
+ /*IsUnitStrided*/ false);
+ case Intrinsic::riscv_vle:
+ case Intrinsic::riscv_vle_mask:
+ case Intrinsic::riscv_vleff:
+ case Intrinsic::riscv_vleff_mask:
+ return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
+ /*IsStore*/ false,
+ /*IsUnitStrided*/ true);
+ case Intrinsic::riscv_vse:
+ case Intrinsic::riscv_vse_mask:
+ return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
+ /*IsStore*/ true,
+ /*IsUnitStrided*/ true);
+ case Intrinsic::riscv_vlse:
+ case Intrinsic::riscv_vlse_mask:
+ case Intrinsic::riscv_vloxei:
+ case Intrinsic::riscv_vloxei_mask:
+ case Intrinsic::riscv_vluxei:
+ case Intrinsic::riscv_vluxei_mask:
+ return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
+ /*IsStore*/ false,
+ /*IsUnitStrided*/ false);
+ case Intrinsic::riscv_vsse:
+ case Intrinsic::riscv_vsse_mask:
+ case Intrinsic::riscv_vsoxei:
+ case Intrinsic::riscv_vsoxei_mask:
+ case Intrinsic::riscv_vsuxei:
+ case Intrinsic::riscv_vsuxei_mask:
+ return SetRVVLoadStoreInfo(/*PtrOp*/ 1,
+ /*IsStore*/ true,
+ /*IsUnitStrided*/ false);
+ case Intrinsic::riscv_vlseg2:
+ case Intrinsic::riscv_vlseg3:
+ case Intrinsic::riscv_vlseg4:
+ case Intrinsic::riscv_vlseg5:
+ case Intrinsic::riscv_vlseg6:
+ case Intrinsic::riscv_vlseg7:
+ case Intrinsic::riscv_vlseg8:
+ case Intrinsic::riscv_vlseg2ff:
+ case Intrinsic::riscv_vlseg3ff:
+ case Intrinsic::riscv_vlseg4ff:
+ case Intrinsic::riscv_vlseg5ff:
+ case Intrinsic::riscv_vlseg6ff:
+ case Intrinsic::riscv_vlseg7ff:
+ case Intrinsic::riscv_vlseg8ff:
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
+ /*IsStore*/ false,
+ /*IsUnitStrided*/ false);
+ case Intrinsic::riscv_vlseg2_mask:
+ case Intrinsic::riscv_vlseg3_mask:
+ case Intrinsic::riscv_vlseg4_mask:
+ case Intrinsic::riscv_vlseg5_mask:
+ case Intrinsic::riscv_vlseg6_mask:
+ case Intrinsic::riscv_vlseg7_mask:
+ case Intrinsic::riscv_vlseg8_mask:
+ case Intrinsic::riscv_vlseg2ff_mask:
+ case Intrinsic::riscv_vlseg3ff_mask:
+ case Intrinsic::riscv_vlseg4ff_mask:
+ case Intrinsic::riscv_vlseg5ff_mask:
+ case Intrinsic::riscv_vlseg6ff_mask:
+ case Intrinsic::riscv_vlseg7ff_mask:
+ case Intrinsic::riscv_vlseg8ff_mask:
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
+ /*IsStore*/ false,
+ /*IsUnitStrided*/ false);
+ case Intrinsic::riscv_vlsseg2:
+ case Intrinsic::riscv_vlsseg3:
+ case Intrinsic::riscv_vlsseg4:
+ case Intrinsic::riscv_vlsseg5:
+ case Intrinsic::riscv_vlsseg6:
+ case Intrinsic::riscv_vlsseg7:
+ case Intrinsic::riscv_vlsseg8:
+ case Intrinsic::riscv_vloxseg2:
+ case Intrinsic::riscv_vloxseg3:
+ case Intrinsic::riscv_vloxseg4:
+ case Intrinsic::riscv_vloxseg5:
+ case Intrinsic::riscv_vloxseg6:
+ case Intrinsic::riscv_vloxseg7:
+ case Intrinsic::riscv_vloxseg8:
+ case Intrinsic::riscv_vluxseg2:
+ case Intrinsic::riscv_vluxseg3:
+ case Intrinsic::riscv_vluxseg4:
+ case Intrinsic::riscv_vluxseg5:
+ case Intrinsic::riscv_vluxseg6:
+ case Intrinsic::riscv_vluxseg7:
+ case Intrinsic::riscv_vluxseg8:
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
+ /*IsStore*/ false,
+ /*IsUnitStrided*/ false);
+ case Intrinsic::riscv_vlsseg2_mask:
+ case Intrinsic::riscv_vlsseg3_mask:
+ case Intrinsic::riscv_vlsseg4_mask:
+ case Intrinsic::riscv_vlsseg5_mask:
+ case Intrinsic::riscv_vlsseg6_mask:
+ case Intrinsic::riscv_vlsseg7_mask:
+ case Intrinsic::riscv_vlsseg8_mask:
+ case Intrinsic::riscv_vloxseg2_mask:
+ case Intrinsic::riscv_vloxseg3_mask:
+ case Intrinsic::riscv_vloxseg4_mask:
+ case Intrinsic::riscv_vloxseg5_mask:
+ case Intrinsic::riscv_vloxseg6_mask:
+ case Intrinsic::riscv_vloxseg7_mask:
+ case Intrinsic::riscv_vloxseg8_mask:
+ case Intrinsic::riscv_vluxseg2_mask:
+ case Intrinsic::riscv_vluxseg3_mask:
+ case Intrinsic::riscv_vluxseg4_mask:
+ case Intrinsic::riscv_vluxseg5_mask:
+ case Intrinsic::riscv_vluxseg6_mask:
+ case Intrinsic::riscv_vluxseg7_mask:
+ case Intrinsic::riscv_vluxseg8_mask:
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 5,
+ /*IsStore*/ false,
+ /*IsUnitStrided*/ false);
+ case Intrinsic::riscv_vsseg2:
+ case Intrinsic::riscv_vsseg3:
+ case Intrinsic::riscv_vsseg4:
+ case Intrinsic::riscv_vsseg5:
+ case Intrinsic::riscv_vsseg6:
+ case Intrinsic::riscv_vsseg7:
+ case Intrinsic::riscv_vsseg8:
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 2,
+ /*IsStore*/ true,
+ /*IsUnitStrided*/ false);
+ case Intrinsic::riscv_vsseg2_mask:
+ case Intrinsic::riscv_vsseg3_mask:
+ case Intrinsic::riscv_vsseg4_mask:
+ case Intrinsic::riscv_vsseg5_mask:
+ case Intrinsic::riscv_vsseg6_mask:
+ case Intrinsic::riscv_vsseg7_mask:
+ case Intrinsic::riscv_vsseg8_mask:
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
+ /*IsStore*/ true,
+ /*IsUnitStrided*/ false);
+ case Intrinsic::riscv_vssseg2:
+ case Intrinsic::riscv_vssseg3:
+ case Intrinsic::riscv_vssseg4:
+ case Intrinsic::riscv_vssseg5:
+ case Intrinsic::riscv_vssseg6:
+ case Intrinsic::riscv_vssseg7:
+ case Intrinsic::riscv_vssseg8:
+ case Intrinsic::riscv_vsoxseg2:
+ case Intrinsic::riscv_vsoxseg3:
+ case Intrinsic::riscv_vsoxseg4:
+ case Intrinsic::riscv_vsoxseg5:
+ case Intrinsic::riscv_vsoxseg6:
+ case Intrinsic::riscv_vsoxseg7:
+ case Intrinsic::riscv_vsoxseg8:
+ case Intrinsic::riscv_vsuxseg2:
+ case Intrinsic::riscv_vsuxseg3:
+ case Intrinsic::riscv_vsuxseg4:
+ case Intrinsic::riscv_vsuxseg5:
+ case Intrinsic::riscv_vsuxseg6:
+ case Intrinsic::riscv_vsuxseg7:
+ case Intrinsic::riscv_vsuxseg8:
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 3,
+ /*IsStore*/ true,
+ /*IsUnitStrided*/ false);
+ case Intrinsic::riscv_vssseg2_mask:
+ case Intrinsic::riscv_vssseg3_mask:
+ case Intrinsic::riscv_vssseg4_mask:
+ case Intrinsic::riscv_vssseg5_mask:
+ case Intrinsic::riscv_vssseg6_mask:
+ case Intrinsic::riscv_vssseg7_mask:
+ case Intrinsic::riscv_vssseg8_mask:
+ case Intrinsic::riscv_vsoxseg2_mask:
+ case Intrinsic::riscv_vsoxseg3_mask:
+ case Intrinsic::riscv_vsoxseg4_mask:
+ case Intrinsic::riscv_vsoxseg5_mask:
+ case Intrinsic::riscv_vsoxseg6_mask:
+ case Intrinsic::riscv_vsoxseg7_mask:
+ case Intrinsic::riscv_vsoxseg8_mask:
+ case Intrinsic::riscv_vsuxseg2_mask:
+ case Intrinsic::riscv_vsuxseg3_mask:
+ case Intrinsic::riscv_vsuxseg4_mask:
+ case Intrinsic::riscv_vsuxseg5_mask:
+ case Intrinsic::riscv_vsuxseg6_mask:
+ case Intrinsic::riscv_vsuxseg7_mask:
+ case Intrinsic::riscv_vsuxseg8_mask:
+ return SetRVVLoadStoreInfo(/*PtrOp*/ I.arg_size() - 4,
+ /*IsStore*/ true,
+ /*IsUnitStrided*/ false);
}
}
@@ -1206,7 +1606,7 @@ bool RISCVTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
}
bool RISCVTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
- return Subtarget.hasStdExtZbb();
+ return Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb();
}
bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
@@ -1217,7 +1617,7 @@ bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
// on the basis that it's possible the sinking+duplication of the AND in
// CodeGenPrepare triggered by this hook wouldn't decrease the instruction
// count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
- if (!Subtarget.hasStdExtZbs())
+ if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
return false;
ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
if (!Mask)
@@ -1240,8 +1640,11 @@ bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
// Zbs provides BEXT[_I], which can be used with SEQZ/SNEZ as a bit test.
if (Subtarget.hasStdExtZbs())
return X.getValueType().isScalarInteger();
- // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
auto *C = dyn_cast<ConstantSDNode>(Y);
+ // XTheadBs provides th.tst (similar to bexti), if Y is a constant
+ if (Subtarget.hasVendorXTHeadBs())
+ return C != nullptr;
+ // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
return C && C->getAPIntValue().ule(10);
}
@@ -1372,6 +1775,8 @@ bool RISCVTargetLowering::canSplatOperand(Instruction *I, int Operand) const {
case Intrinsic::vp_xor:
case Intrinsic::vp_fadd:
case Intrinsic::vp_fmul:
+ case Intrinsic::vp_icmp:
+ case Intrinsic::vp_fcmp:
// These intrinsics have 'vr' versions.
case Intrinsic::vp_sub:
case Intrinsic::vp_fsub:
@@ -1406,6 +1811,10 @@ bool RISCVTargetLowering::shouldSinkOperands(
m_Undef(), m_ZeroMask())))
continue;
+ // Don't sink i1 splats.
+ if (cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(1))
+ continue;
+
// All uses of the shuffle should be sunk to avoid duplicating it across gpr
// and vector registers
for (Use &U : Op->uses()) {
@@ -1448,15 +1857,58 @@ bool RISCVTargetLowering::isOffsetFoldingLegal(
return false;
}
+// Returns 0-31 if the fli instruction is available for the type and this is
+// legal FP immediate for the type. Returns -1 otherwise.
+int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const {
+ if (!Subtarget.hasStdExtZfa())
+ return -1;
+
+ bool IsSupportedVT = false;
+ if (VT == MVT::f16) {
+ IsSupportedVT = Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZvfh();
+ } else if (VT == MVT::f32) {
+ IsSupportedVT = true;
+ } else if (VT == MVT::f64) {
+ assert(Subtarget.hasStdExtD() && "Expect D extension");
+ IsSupportedVT = true;
+ }
+
+ if (!IsSupportedVT)
+ return -1;
+
+ return RISCVLoadFPImm::getLoadFPImm(Imm);
+}
+
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
- if (VT == MVT::f16 && !Subtarget.hasStdExtZfhOrZfhmin())
- return false;
- if (VT == MVT::f32 && !Subtarget.hasStdExtF())
- return false;
- if (VT == MVT::f64 && !Subtarget.hasStdExtD())
+ bool IsLegalVT = false;
+ if (VT == MVT::f16)
+ IsLegalVT = Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin();
+ else if (VT == MVT::f32)
+ IsLegalVT = Subtarget.hasStdExtFOrZfinx();
+ else if (VT == MVT::f64)
+ IsLegalVT = Subtarget.hasStdExtDOrZdinx();
+
+ if (!IsLegalVT)
return false;
- return Imm.isZero();
+
+ if (getLegalZfaFPImm(Imm, VT) >= 0)
+ return true;
+
+ // Cannot create a 64 bit floating-point immediate value for rv32.
+ if (Subtarget.getXLen() < VT.getScalarSizeInBits()) {
+ // td can handle +0.0 or -0.0 already.
+ // -0.0 can be created by fmv + fneg.
+ return Imm.isZero();
+ }
+ // Special case: the cost for -0.0 is 1.
+ int Cost = Imm.isNegZero()
+ ? 1
+ : RISCVMatInt::getIntMatCost(Imm.bitcastToAPInt(),
+ Subtarget.getXLen(),
+ Subtarget.getFeatureBits());
+ // If the constantpool data is already in cache, only Cost 1 is cheaper.
+ return Cost < FPImmCost;
}
// TODO: This is very conservative.
@@ -1492,19 +1944,13 @@ bool RISCVTargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
return Index == 0 || Index == ResElts;
}
-bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
- return (VT == MVT::f16 && Subtarget.hasStdExtZfhOrZfhmin()) ||
- (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
- (VT == MVT::f64 && Subtarget.hasStdExtD());
-}
-
MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
CallingConv::ID CC,
EVT VT) const {
// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
// We might still end up using a GPR but that will be decided based on ABI.
- if (VT == MVT::f16 && Subtarget.hasStdExtF() &&
- !Subtarget.hasStdExtZfhOrZfhmin())
+ if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
+ !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
return MVT::f32;
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
@@ -1515,8 +1961,8 @@ unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context
EVT VT) const {
// Use f32 to pass f16 if it is legal and Zfh/Zfhmin is not enabled.
// We might still end up using a GPR but that will be decided based on ABI.
- if (VT == MVT::f16 && Subtarget.hasStdExtF() &&
- !Subtarget.hasStdExtZfhOrZfhmin())
+ if (VT == MVT::f16 && Subtarget.hasStdExtFOrZfinx() &&
+ !Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin())
return 1;
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
@@ -1704,27 +2150,30 @@ bool RISCVTargetLowering::mergeStoresAfterLegalization(EVT VT) const {
(VT.isFixedLengthVector() && VT.getVectorElementType() == MVT::i1);
}
-bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const {
- if (ScalarTy->isPointerTy())
- return true;
-
- if (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
- ScalarTy->isIntegerTy(32))
+bool RISCVTargetLowering::isLegalElementTypeForRVV(EVT ScalarTy) const {
+ if (!ScalarTy.isSimple())
+ return false;
+ switch (ScalarTy.getSimpleVT().SimpleTy) {
+ case MVT::iPTR:
+ return Subtarget.is64Bit() ? Subtarget.hasVInstructionsI64() : true;
+ case MVT::i8:
+ case MVT::i16:
+ case MVT::i32:
return true;
-
- if (ScalarTy->isIntegerTy(64))
+ case MVT::i64:
return Subtarget.hasVInstructionsI64();
-
- if (ScalarTy->isHalfTy())
+ case MVT::f16:
return Subtarget.hasVInstructionsF16();
- if (ScalarTy->isFloatTy())
+ case MVT::f32:
return Subtarget.hasVInstructionsF32();
- if (ScalarTy->isDoubleTy())
+ case MVT::f64:
return Subtarget.hasVInstructionsF64();
-
- return false;
+ default:
+ return false;
+ }
}
+
unsigned RISCVTargetLowering::combineRepeatedFPDivisors() const {
return NumRepeatedDivisors;
}
@@ -1893,20 +2342,20 @@ static MVT getMaskTypeFor(MVT VecVT) {
/// Creates an all ones mask suitable for masking a vector of type VecTy with
/// vector length VL. .
-static SDValue getAllOnesMask(MVT VecVT, SDValue VL, SDLoc DL,
+static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
SelectionDAG &DAG) {
MVT MaskVT = getMaskTypeFor(VecVT);
return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
}
-static SDValue getVLOp(uint64_t NumElts, SDLoc DL, SelectionDAG &DAG,
+static SDValue getVLOp(uint64_t NumElts, const SDLoc &DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
}
static std::pair<SDValue, SDValue>
-getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
+ SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
SDValue VL = getVLOp(NumElts, DL, DAG, Subtarget);
SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
@@ -1918,7 +2367,7 @@ getDefaultVLOps(uint64_t NumElts, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
// the vector type that the fixed-length vector is contained in. Otherwise if
// VecVT is scalable, then ContainerVT should be the same as VecVT.
static std::pair<SDValue, SDValue>
-getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
+getDefaultVLOps(MVT VecVT, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
if (VecVT.isFixedLengthVector())
return getDefaultVLOps(VecVT.getVectorNumElements(), ContainerVT, DL, DAG,
@@ -1932,12 +2381,19 @@ getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
// As above but assuming the given type is a scalable vector type.
static std::pair<SDValue, SDValue>
-getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
+getDefaultScalableVLOps(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(VecVT.isScalableVector() && "Expecting a scalable vector");
return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
}
+SDValue RISCVTargetLowering::computeVLMax(MVT VecVT, const SDLoc &DL,
+ SelectionDAG &DAG) const {
+ assert(VecVT.isScalableVector() && "Expected scalable vector");
+ return DAG.getElementCount(DL, Subtarget.getXLenVT(),
+ VecVT.getVectorElementCount());
+}
+
// The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
// of either is (currently) supported. This can get us into an infinite loop
// where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
@@ -1953,7 +2409,7 @@ bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- // RISCV FP-to-int conversions saturate to the destination register size, but
+ // RISC-V FP-to-int conversions saturate to the destination register size, but
// don't produce 0 for nan. We can use a conversion instruction and fix the
// nan case with a compare and a select.
SDValue Src = Op.getOperand(0);
@@ -1965,7 +2421,8 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
if (!DstVT.isVector()) {
// In absense of Zfh, promote f16 to f32, then saturate the result.
- if (Src.getSimpleValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) {
+ if (Src.getSimpleValueType() == MVT::f16 &&
+ !Subtarget.hasStdExtZfhOrZhinx()) {
Src = DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, Src);
}
@@ -2053,18 +2510,23 @@ static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
static RISCVFPRndMode::RoundingMode matchRoundingOp(unsigned Opc) {
switch (Opc) {
case ISD::FROUNDEVEN:
+ case ISD::STRICT_FROUNDEVEN:
case ISD::VP_FROUNDEVEN:
return RISCVFPRndMode::RNE;
case ISD::FTRUNC:
+ case ISD::STRICT_FTRUNC:
case ISD::VP_FROUNDTOZERO:
return RISCVFPRndMode::RTZ;
case ISD::FFLOOR:
+ case ISD::STRICT_FFLOOR:
case ISD::VP_FFLOOR:
return RISCVFPRndMode::RDN;
case ISD::FCEIL:
+ case ISD::STRICT_FCEIL:
case ISD::VP_FCEIL:
return RISCVFPRndMode::RUP;
case ISD::FROUND:
+ case ISD::STRICT_FROUND:
case ISD::VP_FROUND:
return RISCVFPRndMode::RMM;
case ISD::FRINT:
@@ -2097,6 +2559,9 @@ lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
SDValue Mask, VL;
if (Op->isVPOpcode()) {
Mask = Op.getOperand(1);
+ if (VT.isFixedLengthVector())
+ Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
+ Subtarget);
VL = Op.getOperand(2);
} else {
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
@@ -2155,9 +2620,11 @@ lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
Truncated = DAG.getNode(RISCVISD::VFCVT_RTZ_X_F_VL, DL, IntVT, Src,
Mask, VL);
break;
+ case ISD::FRINT:
case ISD::VP_FRINT:
Truncated = DAG.getNode(RISCVISD::VFCVT_X_F_VL, DL, IntVT, Src, Mask, VL);
break;
+ case ISD::FNEARBYINT:
case ISD::VP_FNEARBYINT:
Truncated = DAG.getNode(RISCVISD::VFROUND_NOEXCEPT_VL, DL, ContainerVT, Src,
Mask, VL);
@@ -2165,7 +2632,7 @@ lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
}
// VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
- if (Op.getOpcode() != ISD::VP_FNEARBYINT)
+ if (Truncated.getOpcode() != RISCVISD::VFROUND_NOEXCEPT_VL)
Truncated = DAG.getNode(RISCVISD::SINT_TO_FP_VL, DL, ContainerVT, Truncated,
Mask, VL);
@@ -2179,6 +2646,110 @@ lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, Truncated, DAG, Subtarget);
}
+// Expand vector STRICT_FTRUNC, STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND
+// STRICT_FROUNDEVEN and STRICT_FNEARBYINT by converting sNan of the source to
+// qNan and coverting the new source to integer and back to FP.
+static SDValue
+lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ SDValue Chain = Op.getOperand(0);
+ SDValue Src = Op.getOperand(1);
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+ }
+
+ auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ // Freeze the source since we are increasing the number of uses.
+ Src = DAG.getFreeze(Src);
+
+ // Covert sNan to qNan by executing x + x for all unordered elemenet x in Src.
+ MVT MaskVT = Mask.getSimpleValueType();
+ SDValue Unorder = DAG.getNode(RISCVISD::STRICT_FSETCC_VL, DL,
+ DAG.getVTList(MaskVT, MVT::Other),
+ {Chain, Src, Src, DAG.getCondCode(ISD::SETUNE),
+ DAG.getUNDEF(MaskVT), Mask, VL});
+ Chain = Unorder.getValue(1);
+ Src = DAG.getNode(RISCVISD::STRICT_FADD_VL, DL,
+ DAG.getVTList(ContainerVT, MVT::Other),
+ {Chain, Src, Src, DAG.getUNDEF(ContainerVT), Unorder, VL});
+ Chain = Src.getValue(1);
+
+ // We do the conversion on the absolute value and fix the sign at the end.
+ SDValue Abs = DAG.getNode(RISCVISD::FABS_VL, DL, ContainerVT, Src, Mask, VL);
+
+ // Determine the largest integer that can be represented exactly. This and
+ // values larger than it don't have any fractional bits so don't need to
+ // be converted.
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(ContainerVT);
+ unsigned Precision = APFloat::semanticsPrecision(FltSem);
+ APFloat MaxVal = APFloat(FltSem);
+ MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
+ /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
+ SDValue MaxValNode =
+ DAG.getConstantFP(MaxVal, DL, ContainerVT.getVectorElementType());
+ SDValue MaxValSplat = DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), MaxValNode, VL);
+
+ // If abs(Src) was larger than MaxVal or nan, keep it.
+ Mask = DAG.getNode(
+ RISCVISD::SETCC_VL, DL, MaskVT,
+ {Abs, MaxValSplat, DAG.getCondCode(ISD::SETOLT), Mask, Mask, VL});
+
+ // Truncate to integer and convert back to FP.
+ MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDValue Truncated;
+
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FROUNDEVEN: {
+ RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Op.getOpcode());
+ assert(FRM != RISCVFPRndMode::Invalid);
+ Truncated = DAG.getNode(
+ RISCVISD::STRICT_VFCVT_RM_X_F_VL, DL, DAG.getVTList(IntVT, MVT::Other),
+ {Chain, Src, Mask, DAG.getTargetConstant(FRM, DL, XLenVT), VL});
+ break;
+ }
+ case ISD::STRICT_FTRUNC:
+ Truncated =
+ DAG.getNode(RISCVISD::STRICT_VFCVT_RTZ_X_F_VL, DL,
+ DAG.getVTList(IntVT, MVT::Other), Chain, Src, Mask, VL);
+ break;
+ case ISD::STRICT_FNEARBYINT:
+ Truncated = DAG.getNode(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL, DL,
+ DAG.getVTList(ContainerVT, MVT::Other), Chain, Src,
+ Mask, VL);
+ break;
+ }
+ Chain = Truncated.getValue(1);
+
+ // VFROUND_NOEXCEPT_VL includes SINT_TO_FP_VL.
+ if (Op.getOpcode() != ISD::STRICT_FNEARBYINT) {
+ Truncated = DAG.getNode(RISCVISD::STRICT_SINT_TO_FP_VL, DL,
+ DAG.getVTList(ContainerVT, MVT::Other), Chain,
+ Truncated, Mask, VL);
+ Chain = Truncated.getValue(1);
+ }
+
+ // Restore the original sign so that -0.0 is preserved.
+ Truncated = DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Truncated,
+ Src, Src, Mask, VL);
+
+ if (VT.isFixedLengthVector())
+ Truncated = convertFromScalableVector(VT, Truncated, DAG, Subtarget);
+ return DAG.getMergeValues({Truncated, Chain}, DL);
+}
+
static SDValue
lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
@@ -2207,6 +2778,30 @@ lowerFTRUNC_FCEIL_FFLOOR_FROUND(SDValue Op, SelectionDAG &DAG,
DAG.getTargetConstant(FRM, DL, Subtarget.getXLenVT()));
}
+static SDValue
+getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget,
+ const SDLoc &DL, EVT VT, SDValue Merge, SDValue Op,
+ SDValue Offset, SDValue Mask, SDValue VL,
+ unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
+ if (Merge.isUndef())
+ Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
+ SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
+ SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
+ return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
+}
+
+static SDValue
+getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const SDLoc &DL,
+ EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,
+ SDValue VL,
+ unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
+ if (Merge.isUndef())
+ Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
+ SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
+ SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
+ return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
+}
+
struct VIDSequence {
int64_t StepNumerator;
unsigned StepDenominator;
@@ -2412,8 +3007,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// according to the size of the final vector - use i8 chunks rather than
// XLenVT if we're producing a v8i1. This results in more consistent
// codegen across RV32 and RV64.
- unsigned NumViaIntegerBits =
- std::min(std::max(NumElts, 8u), Subtarget.getXLen());
+ unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
// If we have to use more than one INSERT_VECTOR_ELT then this
@@ -2423,39 +3017,36 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return SDValue();
// Now we can create our integer vector type. Note that it may be larger
// than the resulting mask type: v4i1 would use v1i8 as its integer type.
+ unsigned IntegerViaVecElts = divideCeil(NumElts, NumViaIntegerBits);
MVT IntegerViaVecVT =
MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
- divideCeil(NumElts, NumViaIntegerBits));
+ IntegerViaVecElts);
uint64_t Bits = 0;
unsigned BitPos = 0, IntegerEltIdx = 0;
- SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
+ SmallVector<SDValue, 8> Elts(IntegerViaVecElts);
- for (unsigned I = 0; I < NumElts; I++, BitPos++) {
- // Once we accumulate enough bits to fill our scalar type, insert into
- // our vector and clear our accumulated data.
- if (I != 0 && I % NumViaIntegerBits == 0) {
+ for (unsigned I = 0; I < NumElts;) {
+ SDValue V = Op.getOperand(I);
+ bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
+ Bits |= ((uint64_t)BitValue << BitPos);
+ ++BitPos;
+ ++I;
+
+ // Once we accumulate enough bits to fill our scalar type or process the
+ // last element, insert into our vector and clear our accumulated data.
+ if (I % NumViaIntegerBits == 0 || I == NumElts) {
if (NumViaIntegerBits <= 32)
Bits = SignExtend64<32>(Bits);
SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
- Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
- Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
+ Elts[IntegerEltIdx] = Elt;
Bits = 0;
BitPos = 0;
IntegerEltIdx++;
}
- SDValue V = Op.getOperand(I);
- bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
- Bits |= ((uint64_t)BitValue << BitPos);
}
- // Insert the (remaining) scalar value into position in our integer
- // vector type.
- if (NumViaIntegerBits <= 32)
- Bits = SignExtend64<32>(Bits);
- SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
- Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
- DAG.getConstant(IntegerEltIdx, DL, XLenVT));
+ SDValue Vec = DAG.getBuildVector(IntegerViaVecVT, DL, Elts);
if (NumElts < NumViaIntegerBits) {
// If we're producing a smaller vector than our minimum legal integer
@@ -2658,9 +3249,9 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
ValueCounts.insert(std::make_pair(V, 0));
unsigned &Count = ValueCounts[V];
-
- if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
- NumScalarLoads += !CFP->isExactlyValue(+0.0);
+ if (0 == Count)
+ if (auto *CFP = dyn_cast<ConstantFPSDNode>(V))
+ NumScalarLoads += !CFP->isExactlyValue(+0.0);
// Is this value dominant? In case of a tie, prefer the highest element as
// it's cheaper to insert near the beginning of a vector than it is at the
@@ -2678,6 +3269,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Don't perform this optimization when optimizing for size, since
// materializing elements and inserting them tends to cause code bloat.
if (!DAG.shouldOptForSize() && NumScalarLoads < NumElts &&
+ (NumElts != 2 || ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) &&
((MostCommonCount > DominantValueCountThreshold) ||
(ValueCounts.size() <= Log2_32(NumDefElts)))) {
// Start by splatting the most common element.
@@ -2709,7 +3301,43 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return Vec;
}
- return SDValue();
+ // For constant vectors, use generic constant pool lowering. Otherwise,
+ // we'd have to materialize constants in GPRs just to move them into the
+ // vector.
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
+ return SDValue();
+
+ assert((!VT.isFloatingPoint() ||
+ VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
+ "Illegal type which will result in reserved encoding");
+
+ const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
+
+ SDValue Vec = DAG.getUNDEF(ContainerVT);
+ unsigned UndefCount = 0;
+ for (const SDValue &V : Op->ops()) {
+ if (V.isUndef()) {
+ UndefCount++;
+ continue;
+ }
+ if (UndefCount) {
+ const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
+ Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
+ Vec, Offset, Mask, VL, Policy);
+ UndefCount = 0;
+ }
+ auto OpCode =
+ VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+ Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
+ V, Mask, VL);
+ }
+ if (UndefCount) {
+ const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
+ Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
+ Vec, Offset, Mask, VL, Policy);
+ }
+ return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}
static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
@@ -2727,8 +3355,7 @@ static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
// If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
// vmv.v.x whose EEW = 32 to lower it.
- auto *Const = dyn_cast<ConstantSDNode>(VL);
- if (LoC == HiC && Const && Const->isAllOnesValue()) {
+ if (LoC == HiC && isAllOnesConstant(VL)) {
MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
// TODO: if vl <= min(VLMAX), we can also do this. But we could not
// access the subtarget here now.
@@ -2751,10 +3378,8 @@ static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
SDValue Scalar, SDValue VL,
SelectionDAG &DAG) {
assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
- DAG.getConstant(1, DL, MVT::i32));
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitScalar(Scalar, DL, MVT::i32, MVT::i32);
return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
}
@@ -2762,7 +3387,7 @@ static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
// length VL. It ensures the final sequence is type legal, which is useful when
// lowering a splat after type legalization.
static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
- MVT VT, SDLoc DL, SelectionDAG &DAG,
+ MVT VT, const SDLoc &DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
bool HasPassthru = Passthru && !Passthru.isUndef();
if (!HasPassthru && !Passthru)
@@ -2817,8 +3442,8 @@ static MVT getLMUL1VT(MVT VT) {
// 0 of the vector regardless of the value of VL. The contents of the
// remaining lanes of the result vector are unspecified. VL is assumed
// to be non-zero.
-static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL,
- MVT VT, SDLoc DL, SelectionDAG &DAG,
+static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
+ const SDLoc &DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
const MVT XLenVT = Subtarget.getXLenVT();
@@ -2870,49 +3495,90 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL,
DAG.getUNDEF(VT),
Result, DAG.getConstant(0, DL, XLenVT));
return Result;
-
}
-static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
- const RISCVSubtarget &Subtarget) {
- // We need to be able to widen elements to the next larger integer type.
+// Is this a shuffle extracts either the even or odd elements of a vector?
+// That is, specifically, either (a) or (b) below.
+// t34: v8i8 = extract_subvector t11, Constant:i64<0>
+// t33: v8i8 = extract_subvector t11, Constant:i64<8>
+// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
+// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
+// Returns {Src Vector, Even Elements} om success
+static bool isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1,
+ SDValue V2, ArrayRef<int> Mask,
+ const RISCVSubtarget &Subtarget) {
+ // Need to be able to widen the vector.
if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
return false;
- int Size = Mask.size();
- assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+ // Both input must be extracts.
+ if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return false;
- int Srcs[] = {-1, -1};
- for (int i = 0; i != Size; ++i) {
- // Ignore undef elements.
- if (Mask[i] < 0)
- continue;
+ // Extracting from the same source.
+ SDValue Src = V1.getOperand(0);
+ if (Src != V2.getOperand(0))
+ return false;
- // Is this an even or odd element.
- int Pol = i % 2;
+ // Src needs to have twice the number of elements.
+ if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
+ return false;
- // Ensure we consistently use the same source for this element polarity.
- int Src = Mask[i] / Size;
- if (Srcs[Pol] < 0)
- Srcs[Pol] = Src;
- if (Srcs[Pol] != Src)
- return false;
+ // The extracts must extract the two halves of the source.
+ if (V1.getConstantOperandVal(1) != 0 ||
+ V2.getConstantOperandVal(1) != Mask.size())
+ return false;
+
+ // First index must be the first even or odd element from V1.
+ if (Mask[0] != 0 && Mask[0] != 1)
+ return false;
- // Make sure the element within the source is appropriate for this element
- // in the destination.
- int Elt = Mask[i] % Size;
- if (Elt != i / 2)
+ // The others must increase by 2 each time.
+ // TODO: Support undef elements?
+ for (unsigned i = 1; i != Mask.size(); ++i)
+ if (Mask[i] != Mask[i - 1] + 2)
return false;
- }
- // We need to find a source for each polarity and they can't be the same.
- if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
+ return true;
+}
+
+/// Is this shuffle interleaving contiguous elements from one vector into the
+/// even elements and contiguous elements from another vector into the odd
+/// elements. \p EvenSrc will contain the element that should be in the first
+/// even element. \p OddSrc will contain the element that should be in the first
+/// odd element. These can be the first element in a source or the element half
+/// way through the source.
+static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
+ int &OddSrc, const RISCVSubtarget &Subtarget) {
+ // We need to be able to widen elements to the next larger integer type.
+ if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
return false;
- // Swap the sources if the second source was in the even polarity.
- SwapSources = Srcs[0] > Srcs[1];
+ int Size = Mask.size();
+ int NumElts = VT.getVectorNumElements();
+ assert(Size == (int)NumElts && "Unexpected mask size");
- return true;
+ SmallVector<unsigned, 2> StartIndexes;
+ if (!ShuffleVectorInst::isInterleaveMask(Mask, 2, Size * 2, StartIndexes))
+ return false;
+
+ EvenSrc = StartIndexes[0];
+ OddSrc = StartIndexes[1];
+
+ // One source should be low half of first vector.
+ if (EvenSrc != 0 && OddSrc != 0)
+ return false;
+
+ // Subvectors will be subtracted from either at the start of the two input
+ // vectors, or at the start and middle of the first vector if it's an unary
+ // interleave.
+ // In both cases, HalfNumElts will be extracted.
+ // We need to ensure that the extract indices are 0 or HalfNumElts otherwise
+ // we'll create an illegal extract_subvector.
+ // FIXME: We could support other values using a slidedown first.
+ int HalfNumElts = NumElts / 2;
+ return ((EvenSrc % HalfNumElts) == 0) && ((OddSrc % HalfNumElts) == 0);
}
/// Match shuffles that concatenate two vectors, rotate the concatenation,
@@ -2988,74 +3654,44 @@ static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
return Rotation;
}
-// Lower the following shuffles to vnsrl.
-// t34: v8i8 = extract_subvector t11, Constant:i64<0>
-// t33: v8i8 = extract_subvector t11, Constant:i64<8>
-// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33
-// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33
-static SDValue lowerVECTOR_SHUFFLEAsVNSRL(const SDLoc &DL, MVT VT,
- MVT ContainerVT, SDValue V1,
- SDValue V2, SDValue TrueMask,
- SDValue VL, ArrayRef<int> Mask,
- const RISCVSubtarget &Subtarget,
- SelectionDAG &DAG) {
- // Need to be able to widen the vector.
- if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
- return SDValue();
-
- // Both input must be extracts.
- if (V1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
- V2.getOpcode() != ISD::EXTRACT_SUBVECTOR)
- return SDValue();
-
- // Extracting from the same source.
- SDValue Src = V1.getOperand(0);
- if (Src != V2.getOperand(0))
- return SDValue();
-
- // Src needs to have twice the number of elements.
- if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2))
- return SDValue();
-
- // The extracts must extract the two halves of the source.
- if (V1.getConstantOperandVal(1) != 0 ||
- V2.getConstantOperandVal(1) != Mask.size())
- return SDValue();
-
- // First index must be the first even or odd element from V1.
- if (Mask[0] != 0 && Mask[0] != 1)
- return SDValue();
-
- // The others must increase by 2 each time.
- // TODO: Support undef elements?
- for (unsigned i = 1; i != Mask.size(); ++i)
- if (Mask[i] != Mask[i - 1] + 2)
- return SDValue();
+// Lower a deinterleave shuffle to vnsrl.
+// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true)
+// -> [p, q, r, s] (EvenElts == false)
+// VT is the type of the vector to return, <[vscale x ]n x ty>
+// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty>
+static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src,
+ bool EvenElts,
+ const RISCVSubtarget &Subtarget,
+ SelectionDAG &DAG) {
+ // The result is a vector of type <m x n x ty>
+ MVT ContainerVT = VT;
+ // Convert fixed vectors to scalable if needed
+ if (ContainerVT.isFixedLengthVector()) {
+ assert(Src.getSimpleValueType().isFixedLengthVector());
+ ContainerVT = getContainerForFixedLengthVector(DAG, ContainerVT, Subtarget);
+
+ // The source is a vector of type <m x n*2 x ty>
+ MVT SrcContainerVT =
+ MVT::getVectorVT(ContainerVT.getVectorElementType(),
+ ContainerVT.getVectorElementCount() * 2);
+ Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
+ }
- // Convert the source using a container type with twice the elements. Since
- // source VT is legal and twice this VT, we know VT isn't LMUL=8 so it is
- // safe to double.
- MVT DoubleContainerVT =
- MVT::getVectorVT(ContainerVT.getVectorElementType(),
- ContainerVT.getVectorElementCount() * 2);
- Src = convertToScalableVector(DoubleContainerVT, Src, DAG, Subtarget);
+ auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
- // Convert the vector to a wider integer type with the original element
- // count. This also converts FP to int.
+ // Bitcast the source vector from <m x n*2 x ty> -> <m x n x ty*2>
+ // This also converts FP to int.
unsigned EltBits = ContainerVT.getScalarSizeInBits();
- MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
- MVT WideIntContainerVT =
- MVT::getVectorVT(WideIntEltVT, ContainerVT.getVectorElementCount());
- Src = DAG.getBitcast(WideIntContainerVT, Src);
+ MVT WideSrcContainerVT = MVT::getVectorVT(
+ MVT::getIntegerVT(EltBits * 2), ContainerVT.getVectorElementCount());
+ Src = DAG.getBitcast(WideSrcContainerVT, Src);
- // Convert to the integer version of the container type.
- MVT IntEltVT = MVT::getIntegerVT(EltBits);
- MVT IntContainerVT =
- MVT::getVectorVT(IntEltVT, ContainerVT.getVectorElementCount());
+ // The integer version of the container type.
+ MVT IntContainerVT = ContainerVT.changeVectorElementTypeToInteger();
// If we want even elements, then the shift amount is 0. Otherwise, shift by
// the original element size.
- unsigned Shift = Mask[0] == 0 ? 0 : EltBits;
+ unsigned Shift = EvenElts ? 0 : EltBits;
SDValue SplatShift = DAG.getNode(
RISCVISD::VMV_V_X_VL, DL, IntContainerVT, DAG.getUNDEF(ContainerVT),
DAG.getConstant(Shift, DL, Subtarget.getXLenVT()), VL);
@@ -3065,31 +3701,9 @@ static SDValue lowerVECTOR_SHUFFLEAsVNSRL(const SDLoc &DL, MVT VT,
// Cast back to FP if needed.
Res = DAG.getBitcast(ContainerVT, Res);
- return convertFromScalableVector(VT, Res, DAG, Subtarget);
-}
-
-static SDValue
-getVSlidedown(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, SDLoc DL,
- EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,
- SDValue VL,
- unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
- if (Merge.isUndef())
- Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
- SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
- SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
- return DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VT, Ops);
-}
-
-static SDValue
-getVSlideup(SelectionDAG &DAG, const RISCVSubtarget &Subtarget, SDLoc DL,
- EVT VT, SDValue Merge, SDValue Op, SDValue Offset, SDValue Mask,
- SDValue VL,
- unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED) {
- if (Merge.isUndef())
- Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
- SDValue PolicyOp = DAG.getTargetConstant(Policy, DL, Subtarget.getXLenVT());
- SDValue Ops[] = {Merge, Op, Offset, Mask, VL, PolicyOp};
- return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VT, Ops);
+ if (VT.isFixedLengthVector())
+ Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
+ return Res;
}
// Lower the following shuffle to vslidedown.
@@ -3169,6 +3783,183 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlidedown(const SDLoc &DL, MVT VT,
DAG.getConstant(0, DL, XLenVT));
}
+// Because vslideup leaves the destination elements at the start intact, we can
+// use it to perform shuffles that insert subvectors:
+//
+// vector_shuffle v8:v8i8, v9:v8i8, <0, 1, 2, 3, 8, 9, 10, 11>
+// ->
+// vsetvli zero, 8, e8, mf2, ta, ma
+// vslideup.vi v8, v9, 4
+//
+// vector_shuffle v8:v8i8, v9:v8i8 <0, 1, 8, 9, 10, 5, 6, 7>
+// ->
+// vsetvli zero, 5, e8, mf2, tu, ma
+// vslideup.v1 v8, v9, 2
+static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ const RISCVSubtarget &Subtarget,
+ SelectionDAG &DAG) {
+ unsigned NumElts = VT.getVectorNumElements();
+ int NumSubElts, Index;
+ if (!ShuffleVectorInst::isInsertSubvectorMask(Mask, NumElts, NumSubElts,
+ Index))
+ return SDValue();
+
+ bool OpsSwapped = Mask[Index] < (int)NumElts;
+ SDValue InPlace = OpsSwapped ? V2 : V1;
+ SDValue ToInsert = OpsSwapped ? V1 : V2;
+
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
+ // We slide up by the index that the subvector is being inserted at, and set
+ // VL to the index + the number of elements being inserted.
+ unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC;
+ // If the we're adding a suffix to the in place vector, i.e. inserting right
+ // up to the very end of it, then we don't actually care about the tail.
+ if (NumSubElts + Index >= (int)NumElts)
+ Policy |= RISCVII::TAIL_AGNOSTIC;
+
+ InPlace = convertToScalableVector(ContainerVT, InPlace, DAG, Subtarget);
+ ToInsert = convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget);
+ SDValue VL = DAG.getConstant(NumSubElts + Index, DL, XLenVT);
+
+ SDValue Res;
+ // If we're inserting into the lowest elements, use a tail undisturbed
+ // vmv.v.v.
+ if (Index == 0)
+ Res = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, InPlace, ToInsert,
+ VL);
+ else
+ Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, InPlace, ToInsert,
+ DAG.getConstant(Index, DL, XLenVT), TrueMask, VL, Policy);
+ return convertFromScalableVector(VT, Res, DAG, Subtarget);
+}
+
+/// Match v(f)slide1up/down idioms. These operations involve sliding
+/// N-1 elements to make room for an inserted scalar at one end.
+static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ const RISCVSubtarget &Subtarget,
+ SelectionDAG &DAG) {
+ bool OpsSwapped = false;
+ if (!isa<BuildVectorSDNode>(V1)) {
+ if (!isa<BuildVectorSDNode>(V2))
+ return SDValue();
+ std::swap(V1, V2);
+ OpsSwapped = true;
+ }
+ SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
+ if (!Splat)
+ return SDValue();
+
+ // Return true if the mask could describe a slide of Mask.size() - 1
+ // elements from concat_vector(V1, V2)[Base:] to [Offset:].
+ auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
+ const unsigned S = (Offset > 0) ? 0 : -Offset;
+ const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
+ for (unsigned i = S; i != E; ++i)
+ if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
+ return false;
+ return true;
+ };
+
+ const unsigned NumElts = VT.getVectorNumElements();
+ bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
+ if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
+ return SDValue();
+
+ const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
+ // Inserted lane must come from splat, undef scalar is legal but not profitable.
+ if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
+ return SDValue();
+
+ MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+ auto OpCode = IsVSlidedown ?
+ (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
+ (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
+ auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT),
+ convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
+ Splat, TrueMask, VL);
+ return convertFromScalableVector(VT, Vec, DAG, Subtarget);
+}
+
+// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
+// to create an interleaved vector of <[vscale x] n*2 x ty>.
+// This requires that the size of ty is less than the subtarget's maximum ELEN.
+static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
+ const SDLoc &DL, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ MVT VecVT = EvenV.getSimpleValueType();
+ MVT VecContainerVT = VecVT; // <vscale x n x ty>
+ // Convert fixed vectors to scalable if needed
+ if (VecContainerVT.isFixedLengthVector()) {
+ VecContainerVT = getContainerForFixedLengthVector(DAG, VecVT, Subtarget);
+ EvenV = convertToScalableVector(VecContainerVT, EvenV, DAG, Subtarget);
+ OddV = convertToScalableVector(VecContainerVT, OddV, DAG, Subtarget);
+ }
+
+ assert(VecVT.getScalarSizeInBits() < Subtarget.getELEN());
+
+ // We're working with a vector of the same size as the resulting
+ // interleaved vector, but with half the number of elements and
+ // twice the SEW (Hence the restriction on not using the maximum
+ // ELEN)
+ MVT WideVT =
+ MVT::getVectorVT(MVT::getIntegerVT(VecVT.getScalarSizeInBits() * 2),
+ VecVT.getVectorElementCount());
+ MVT WideContainerVT = WideVT; // <vscale x n x ty*2>
+ if (WideContainerVT.isFixedLengthVector())
+ WideContainerVT = getContainerForFixedLengthVector(DAG, WideVT, Subtarget);
+
+ // Bitcast the input vectors to integers in case they are FP
+ VecContainerVT = VecContainerVT.changeTypeToInteger();
+ EvenV = DAG.getBitcast(VecContainerVT, EvenV);
+ OddV = DAG.getBitcast(VecContainerVT, OddV);
+
+ auto [Mask, VL] = getDefaultVLOps(VecVT, VecContainerVT, DL, DAG, Subtarget);
+ SDValue Passthru = DAG.getUNDEF(WideContainerVT);
+
+ // Widen EvenV and OddV with 0s and add one copy of OddV to EvenV with
+ // vwaddu.vv
+ SDValue Interleaved = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideContainerVT,
+ EvenV, OddV, Passthru, Mask, VL);
+
+ // Then get OddV * by 2^(VecVT.getScalarSizeInBits() - 1)
+ SDValue AllOnesVec = DAG.getSplatVector(
+ VecContainerVT, DL, DAG.getAllOnesConstant(DL, Subtarget.getXLenVT()));
+ SDValue OddsMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideContainerVT, OddV,
+ AllOnesVec, Passthru, Mask, VL);
+
+ // Add the two together so we get
+ // (OddV * 0xff...ff) + (OddV + EvenV)
+ // = (OddV * 0x100...00) + EvenV
+ // = (OddV << VecVT.getScalarSizeInBits()) + EvenV
+ // Note the ADD_VL and VLMULU_VL should get selected as vwmaccu.vx
+ Interleaved = DAG.getNode(RISCVISD::ADD_VL, DL, WideContainerVT, Interleaved,
+ OddsMul, Passthru, Mask, VL);
+
+ // Bitcast from <vscale x n * ty*2> to <vscale x 2*n x ty>
+ MVT ResultContainerVT = MVT::getVectorVT(
+ VecVT.getVectorElementType(), // Make sure to use original type
+ VecContainerVT.getVectorElementCount().multiplyCoefficientBy(2));
+ Interleaved = DAG.getBitcast(ResultContainerVT, Interleaved);
+
+ // Convert back to a fixed vector if needed
+ MVT ResultVT =
+ MVT::getVectorVT(VecVT.getVectorElementType(),
+ VecVT.getVectorElementCount().multiplyCoefficientBy(2));
+ if (ResultVT.isFixedLengthVector())
+ Interleaved =
+ convertFromScalableVector(ResultVT, Interleaved, DAG, Subtarget);
+
+ return Interleaved;
+}
+
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
@@ -3179,6 +3970,17 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
unsigned NumElts = VT.getVectorNumElements();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
+ // Promote i1 shuffle to i8 shuffle.
+ if (VT.getVectorElementType() == MVT::i1) {
+ MVT WidenVT = MVT::getVectorVT(MVT::i8, VT.getVectorElementCount());
+ V1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V1);
+ V2 = V2.isUndef() ? DAG.getUNDEF(WidenVT)
+ : DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, V2);
+ SDValue Shuffled = DAG.getVectorShuffle(WidenVT, DL, V1, V2, SVN->getMask());
+ return DAG.getSetCC(DL, VT, Shuffled, DAG.getConstant(0, DL, WidenVT),
+ ISD::SETNE);
+ }
+
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
@@ -3261,6 +4063,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
ArrayRef<int> Mask = SVN->getMask();
if (SDValue V =
+ lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
+ return V;
+
+ if (SDValue V =
lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
return V;
@@ -3285,16 +4091,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
SDValue Res = DAG.getUNDEF(ContainerVT);
if (HiV) {
- // If we are doing a SLIDEDOWN+SLIDEUP, reduce the VL for the SLIDEDOWN.
- // FIXME: If we are only doing a SLIDEDOWN, don't reduce the VL as it
- // causes multiple vsetvlis in some test cases such as lowering
- // reduce.mul
- SDValue DownVL = VL;
- if (LoV)
- DownVL = DAG.getConstant(InvRotate, DL, XLenVT);
+ // Even though we could use a smaller VL, don't to avoid a vsetivli
+ // toggle.
Res = getVSlidedown(DAG, Subtarget, DL, ContainerVT, Res, HiV,
- DAG.getConstant(Rotation, DL, XLenVT), TrueMask,
- DownVL);
+ DAG.getConstant(Rotation, DL, XLenVT), TrueMask, VL);
}
if (LoV)
Res = getVSlideup(DAG, Subtarget, DL, ContainerVT, Res, LoV,
@@ -3304,80 +4104,37 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, Res, DAG, Subtarget);
}
- if (SDValue V = lowerVECTOR_SHUFFLEAsVNSRL(
- DL, VT, ContainerVT, V1, V2, TrueMask, VL, Mask, Subtarget, DAG))
+ // If this is a deinterleave and we can widen the vector, then we can use
+ // vnsrl to deinterleave.
+ if (isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) {
+ return getDeinterleaveViaVNSRL(DL, VT, V1.getOperand(0), Mask[0] == 0,
+ Subtarget, DAG);
+ }
+
+ if (SDValue V =
+ lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG))
return V;
// Detect an interleave shuffle and lower to
// (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
- bool SwapSources;
- if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
- // Swap sources if needed.
- if (SwapSources)
- std::swap(V1, V2);
-
- // Extract the lower half of the vectors.
+ int EvenSrc, OddSrc;
+ if (isInterleaveShuffle(Mask, VT, EvenSrc, OddSrc, Subtarget)) {
+ // Extract the halves of the vectors.
MVT HalfVT = VT.getHalfNumVectorElementsVT();
- V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
- DAG.getConstant(0, DL, XLenVT));
- V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
- DAG.getConstant(0, DL, XLenVT));
- // Double the element width and halve the number of elements in an int type.
- unsigned EltBits = VT.getScalarSizeInBits();
- MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
- MVT WideIntVT =
- MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
- // Convert this to a scalable vector. We need to base this on the
- // destination size to ensure there's always a type with a smaller LMUL.
- MVT WideIntContainerVT =
- getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
-
- // Convert sources to scalable vectors with the same element count as the
- // larger type.
- MVT HalfContainerVT = MVT::getVectorVT(
- VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
- V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
- V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
-
- // Cast sources to integer.
- MVT IntEltVT = MVT::getIntegerVT(EltBits);
- MVT IntHalfVT =
- MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
- V1 = DAG.getBitcast(IntHalfVT, V1);
- V2 = DAG.getBitcast(IntHalfVT, V2);
-
- // Freeze V2 since we use it twice and we need to be sure that the add and
- // multiply see the same value.
- V2 = DAG.getFreeze(V2);
-
- // Recreate TrueMask using the widened type's element count.
- TrueMask = getAllOnesMask(HalfContainerVT, VL, DL, DAG);
-
- // Widen V1 and V2 with 0s and add one copy of V2 to V1.
- SDValue Add =
- DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1, V2,
- DAG.getUNDEF(WideIntContainerVT), TrueMask, VL);
- // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
- SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
- DAG.getUNDEF(IntHalfVT),
- DAG.getAllOnesConstant(DL, XLenVT), VL);
- SDValue WidenMul =
- DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT, V2, Multiplier,
- DAG.getUNDEF(WideIntContainerVT), TrueMask, VL);
- // Add the new copies to our previous addition giving us 2^eltbits copies of
- // V2. This is equivalent to shifting V2 left by eltbits. This should
- // combine with the vwmulu.vv above to form vwmaccu.vv.
- Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
- DAG.getUNDEF(WideIntContainerVT), TrueMask, VL);
- // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
- // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
- // vector VT.
- ContainerVT =
- MVT::getVectorVT(VT.getVectorElementType(),
- WideIntContainerVT.getVectorElementCount() * 2);
- Add = DAG.getBitcast(ContainerVT, Add);
- return convertFromScalableVector(VT, Add, DAG, Subtarget);
+ int Size = Mask.size();
+ SDValue EvenV, OddV;
+ assert(EvenSrc >= 0 && "Undef source?");
+ EvenV = (EvenSrc / Size) == 0 ? V1 : V2;
+ EvenV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, EvenV,
+ DAG.getConstant(EvenSrc % Size, DL, XLenVT));
+
+ assert(OddSrc >= 0 && "Undef source?");
+ OddV = (OddSrc / Size) == 0 ? V1 : V2;
+ OddV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, OddV,
+ DAG.getConstant(OddSrc % Size, DL, XLenVT));
+
+ return getWideningInterleave(EvenV, OddV, DL, DAG, Subtarget);
}
// Detect shuffles which can be re-expressed as vector selects; these are
@@ -3527,10 +4284,13 @@ bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
MVT SVT = VT.getSimpleVT();
- bool SwapSources;
- int LoSrc, HiSrc;
- return (isElementRotate(LoSrc, HiSrc, M) > 0) ||
- isInterleaveShuffle(M, SVT, SwapSources, Subtarget);
+ // Not for i1 vectors.
+ if (SVT.getScalarType() == MVT::i1)
+ return false;
+
+ int Dummy1, Dummy2;
+ return (isElementRotate(Dummy1, Dummy2, M) > 0) ||
+ isInterleaveShuffle(M, SVT, Dummy1, Dummy2, Subtarget);
}
// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
@@ -3542,6 +4302,16 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
unsigned EltSize = VT.getScalarSizeInBits();
SDValue Src = Op.getOperand(0);
SDLoc DL(Op);
+ MVT ContainerVT = VT;
+
+ SDValue Mask, VL;
+ if (Op->isVPOpcode()) {
+ Mask = Op.getOperand(1);
+ if (VT.isFixedLengthVector())
+ Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
+ Subtarget);
+ VL = Op.getOperand(2);
+ }
// We choose FP type that can represent the value if possible. Otherwise, we
// use rounding to zero conversion for correct exponent of the result.
@@ -3562,21 +4332,27 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
SDValue Neg = DAG.getNegative(Src, DL, VT);
Src = DAG.getNode(ISD::AND, DL, VT, Src, Neg);
+ } else if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF) {
+ SDValue Neg = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ Src, Mask, VL);
+ Src = DAG.getNode(ISD::VP_AND, DL, VT, Src, Neg, Mask, VL);
}
// We have a legal FP type, convert to it.
SDValue FloatVal;
if (FloatVT.bitsGT(VT)) {
- FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
+ if (Op->isVPOpcode())
+ FloatVal = DAG.getNode(ISD::VP_UINT_TO_FP, DL, FloatVT, Src, Mask, VL);
+ else
+ FloatVal = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVT, Src);
} else {
// Use RTZ to avoid rounding influencing exponent of FloatVal.
- MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VT);
Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
}
-
- auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+ if (!Op->isVPOpcode())
+ std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
SDValue RTZRM =
DAG.getTargetConstant(RISCVFPRndMode::RTZ, DL, Subtarget.getXLenVT());
MVT ContainerFloatVT =
@@ -3590,30 +4366,49 @@ RISCVTargetLowering::lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op,
EVT IntVT = FloatVT.changeVectorElementTypeToInteger();
SDValue Bitcast = DAG.getBitcast(IntVT, FloatVal);
unsigned ShiftAmt = FloatEltVT == MVT::f64 ? 52 : 23;
- SDValue Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
- DAG.getConstant(ShiftAmt, DL, IntVT));
+
+ SDValue Exp;
// Restore back to original type. Truncation after SRL is to generate vnsrl.
- if (IntVT.bitsLT(VT))
- Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
- else if (IntVT.bitsGT(VT))
- Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
+ if (Op->isVPOpcode()) {
+ Exp = DAG.getNode(ISD::VP_LSHR, DL, IntVT, Bitcast,
+ DAG.getConstant(ShiftAmt, DL, IntVT), Mask, VL);
+ Exp = DAG.getVPZExtOrTrunc(DL, VT, Exp, Mask, VL);
+ } else {
+ Exp = DAG.getNode(ISD::SRL, DL, IntVT, Bitcast,
+ DAG.getConstant(ShiftAmt, DL, IntVT));
+ if (IntVT.bitsLT(VT))
+ Exp = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Exp);
+ else if (IntVT.bitsGT(VT))
+ Exp = DAG.getNode(ISD::TRUNCATE, DL, VT, Exp);
+ }
+
// The exponent contains log2 of the value in biased form.
unsigned ExponentBias = FloatEltVT == MVT::f64 ? 1023 : 127;
-
// For trailing zeros, we just need to subtract the bias.
if (Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF)
return DAG.getNode(ISD::SUB, DL, VT, Exp,
DAG.getConstant(ExponentBias, DL, VT));
+ if (Op.getOpcode() == ISD::VP_CTTZ_ZERO_UNDEF)
+ return DAG.getNode(ISD::VP_SUB, DL, VT, Exp,
+ DAG.getConstant(ExponentBias, DL, VT), Mask, VL);
// For leading zeros, we need to remove the bias and convert from log2 to
// leading zeros. We can do this by subtracting from (Bias + (EltSize - 1)).
unsigned Adjust = ExponentBias + (EltSize - 1);
- SDValue Res =
- DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
+ SDValue Res;
+ if (Op->isVPOpcode())
+ Res = DAG.getNode(ISD::VP_SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp,
+ Mask, VL);
+ else
+ Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(Adjust, DL, VT), Exp);
+
// The above result with zero input equals to Adjust which is greater than
// EltSize. Hence, we can do min(Res, EltSize) for CTLZ.
if (Op.getOpcode() == ISD::CTLZ)
Res = DAG.getNode(ISD::UMIN, DL, VT, Res, DAG.getConstant(EltSize, DL, VT));
+ else if (Op.getOpcode() == ISD::VP_CTLZ)
+ Res = DAG.getNode(ISD::VP_UMIN, DL, VT, Res,
+ DAG.getConstant(EltSize, DL, VT), Mask, VL);
return Res;
}
@@ -3699,15 +4494,43 @@ static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
return Op;
+ // Special case. See if we can build the constant as (ADD (SLLI X, 32), X) do
+ // that if it will avoid a constant pool.
+ // It will require an extra temporary register though.
+ if (!DAG.shouldOptForSize()) {
+ int64_t LoVal = SignExtend64<32>(Imm);
+ int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
+ if (LoVal == HiVal) {
+ RISCVMatInt::InstSeq SeqLo =
+ RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
+ if ((SeqLo.size() + 2) <= Subtarget.getMaxBuildIntsCost())
+ return Op;
+ }
+ }
+
// Expand to a constant pool using the default expansion code.
return SDValue();
}
-static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
SDLoc dl(Op);
+ AtomicOrdering FenceOrdering =
+ static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
SyncScope::ID FenceSSID =
static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
+ if (Subtarget.hasStdExtZtso()) {
+ // The only fence that needs an instruction is a sequentially-consistent
+ // cross-thread fence.
+ if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
+ FenceSSID == SyncScope::System)
+ return Op;
+
+ // MEMBARRIER is a compiler barrier; it codegens to a no-op.
+ return DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+ }
+
// singlethread fences only synchronize with signal handlers on the same
// thread and thus only need to preserve instruction order, not actually
// enforce memory ordering.
@@ -3718,13 +4541,230 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) {
return Op;
}
+SDValue RISCVTargetLowering::LowerIS_FPCLASS(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
+ auto CNode = cast<ConstantSDNode>(Op.getOperand(1));
+ unsigned Check = CNode->getZExtValue();
+ unsigned TDCMask = 0;
+ if (Check & fcSNan)
+ TDCMask |= RISCV::FPMASK_Signaling_NaN;
+ if (Check & fcQNan)
+ TDCMask |= RISCV::FPMASK_Quiet_NaN;
+ if (Check & fcPosInf)
+ TDCMask |= RISCV::FPMASK_Positive_Infinity;
+ if (Check & fcNegInf)
+ TDCMask |= RISCV::FPMASK_Negative_Infinity;
+ if (Check & fcPosNormal)
+ TDCMask |= RISCV::FPMASK_Positive_Normal;
+ if (Check & fcNegNormal)
+ TDCMask |= RISCV::FPMASK_Negative_Normal;
+ if (Check & fcPosSubnormal)
+ TDCMask |= RISCV::FPMASK_Positive_Subnormal;
+ if (Check & fcNegSubnormal)
+ TDCMask |= RISCV::FPMASK_Negative_Subnormal;
+ if (Check & fcPosZero)
+ TDCMask |= RISCV::FPMASK_Positive_Zero;
+ if (Check & fcNegZero)
+ TDCMask |= RISCV::FPMASK_Negative_Zero;
+
+ bool IsOneBitMask = isPowerOf2_32(TDCMask);
+
+ SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, XLenVT);
+
+ if (VT.isVector()) {
+ SDValue Op0 = Op.getOperand(0);
+ MVT VT0 = Op.getOperand(0).getSimpleValueType();
+
+ if (VT.isScalableVector()) {
+ MVT DstVT = VT0.changeVectorElementTypeToInteger();
+ auto [Mask, VL] = getDefaultScalableVLOps(VT0, DL, DAG, Subtarget);
+ SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, DstVT, Op0, Mask,
+ VL, Op->getFlags());
+ if (IsOneBitMask)
+ return DAG.getSetCC(DL, VT, FPCLASS,
+ DAG.getConstant(TDCMask, DL, DstVT),
+ ISD::CondCode::SETEQ);
+ SDValue AND = DAG.getNode(ISD::AND, DL, DstVT, FPCLASS,
+ DAG.getConstant(TDCMask, DL, DstVT));
+ return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, DstVT),
+ ISD::SETNE);
+ }
+
+ MVT ContainerVT0 = getContainerForFixedLengthVector(VT0);
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+ MVT ContainerDstVT = ContainerVT0.changeVectorElementTypeToInteger();
+ auto [Mask, VL] = getDefaultVLOps(VT0, ContainerVT0, DL, DAG, Subtarget);
+
+ Op0 = convertToScalableVector(ContainerVT0, Op0, DAG, Subtarget);
+
+ SDValue FPCLASS = DAG.getNode(RISCVISD::FCLASS_VL, DL, ContainerDstVT, Op0,
+ Mask, VL, Op->getFlags());
+
+ TDCMaskV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
+ DAG.getUNDEF(ContainerDstVT), TDCMaskV, VL);
+ if (IsOneBitMask) {
+ SDValue VMSEQ =
+ DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
+ {FPCLASS, TDCMaskV, DAG.getCondCode(ISD::SETEQ),
+ DAG.getUNDEF(ContainerVT), Mask, VL});
+ return convertFromScalableVector(VT, VMSEQ, DAG, Subtarget);
+ }
+ SDValue AND = DAG.getNode(RISCVISD::AND_VL, DL, ContainerDstVT, FPCLASS,
+ TDCMaskV, DAG.getUNDEF(ContainerDstVT), Mask, VL);
+
+ SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
+ SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerDstVT,
+ DAG.getUNDEF(ContainerDstVT), SplatZero, VL);
+
+ SDValue VMSNE = DAG.getNode(RISCVISD::SETCC_VL, DL, ContainerVT,
+ {AND, SplatZero, DAG.getCondCode(ISD::SETNE),
+ DAG.getUNDEF(ContainerVT), Mask, VL});
+ return convertFromScalableVector(VT, VMSNE, DAG, Subtarget);
+ }
+
+ SDValue FPCLASS = DAG.getNode(RISCVISD::FPCLASS, DL, VT, Op.getOperand(0));
+ SDValue AND = DAG.getNode(ISD::AND, DL, VT, FPCLASS, TDCMaskV);
+ return DAG.getSetCC(DL, VT, AND, DAG.getConstant(0, DL, XLenVT),
+ ISD::CondCode::SETNE);
+}
+
+// Lower fmaximum and fminimum. Unlike our fmax and fmin instructions, these
+// operations propagate nans.
+static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ SDValue X = Op.getOperand(0);
+ SDValue Y = Op.getOperand(1);
+
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ // If X is a nan, replace Y with X. If Y is a nan, replace X with Y. This
+ // ensures that when one input is a nan, the other will also be a nan allowing
+ // the nan to propagate. If both inputs are nan, this will swap the inputs
+ // which is harmless.
+ // FIXME: Handle nonans FMF and use isKnownNeverNaN.
+ SDValue XIsNonNan = DAG.getSetCC(DL, XLenVT, X, X, ISD::SETOEQ);
+ SDValue NewY = DAG.getSelect(DL, VT, XIsNonNan, Y, X);
+
+ SDValue YIsNonNan = DAG.getSetCC(DL, XLenVT, Y, Y, ISD::SETOEQ);
+ SDValue NewX = DAG.getSelect(DL, VT, YIsNonNan, X, Y);
+
+ unsigned Opc =
+ Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::FMAX : RISCVISD::FMIN;
+ return DAG.getNode(Opc, DL, VT, NewX, NewY);
+}
+
+/// Get a RISCV target specified VL op for a given SDNode.
+static unsigned getRISCVVLOp(SDValue Op) {
+#define OP_CASE(NODE) \
+ case ISD::NODE: \
+ return RISCVISD::NODE##_VL;
+ switch (Op.getOpcode()) {
+ default:
+ llvm_unreachable("don't have RISC-V specified VL op for this SDNode");
+ // clang-format off
+ OP_CASE(ADD)
+ OP_CASE(SUB)
+ OP_CASE(MUL)
+ OP_CASE(MULHS)
+ OP_CASE(MULHU)
+ OP_CASE(SDIV)
+ OP_CASE(SREM)
+ OP_CASE(UDIV)
+ OP_CASE(UREM)
+ OP_CASE(SHL)
+ OP_CASE(SRA)
+ OP_CASE(SRL)
+ OP_CASE(SADDSAT)
+ OP_CASE(UADDSAT)
+ OP_CASE(SSUBSAT)
+ OP_CASE(USUBSAT)
+ OP_CASE(FADD)
+ OP_CASE(FSUB)
+ OP_CASE(FMUL)
+ OP_CASE(FDIV)
+ OP_CASE(FNEG)
+ OP_CASE(FABS)
+ OP_CASE(FSQRT)
+ OP_CASE(SMIN)
+ OP_CASE(SMAX)
+ OP_CASE(UMIN)
+ OP_CASE(UMAX)
+ OP_CASE(FMINNUM)
+ OP_CASE(FMAXNUM)
+ OP_CASE(STRICT_FADD)
+ OP_CASE(STRICT_FSUB)
+ OP_CASE(STRICT_FMUL)
+ OP_CASE(STRICT_FDIV)
+ OP_CASE(STRICT_FSQRT)
+ // clang-format on
+#undef OP_CASE
+ case ISD::FMA:
+ return RISCVISD::VFMADD_VL;
+ case ISD::STRICT_FMA:
+ return RISCVISD::STRICT_VFMADD_VL;
+ case ISD::AND:
+ if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
+ return RISCVISD::VMAND_VL;
+ return RISCVISD::AND_VL;
+ case ISD::OR:
+ if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
+ return RISCVISD::VMOR_VL;
+ return RISCVISD::OR_VL;
+ case ISD::XOR:
+ if (Op.getSimpleValueType().getVectorElementType() == MVT::i1)
+ return RISCVISD::VMXOR_VL;
+ return RISCVISD::XOR_VL;
+ }
+}
+
+/// Return true if a RISC-V target specified op has a merge operand.
+static bool hasMergeOp(unsigned Opcode) {
+ assert(Opcode > RISCVISD::FIRST_NUMBER &&
+ Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL &&
+ "not a RISC-V target specific op");
+ assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 &&
+ "adding target specific op should update this function");
+ if (Opcode >= RISCVISD::ADD_VL && Opcode <= RISCVISD::FMAXNUM_VL)
+ return true;
+ if (Opcode == RISCVISD::FCOPYSIGN_VL)
+ return true;
+ if (Opcode >= RISCVISD::VWMUL_VL && Opcode <= RISCVISD::VFWSUB_W_VL)
+ return true;
+ if (Opcode >= RISCVISD::STRICT_FADD_VL && Opcode <= RISCVISD::STRICT_FDIV_VL)
+ return true;
+ return false;
+}
+
+/// Return true if a RISC-V target specified op has a mask operand.
+static bool hasMaskOp(unsigned Opcode) {
+ assert(Opcode > RISCVISD::FIRST_NUMBER &&
+ Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL &&
+ "not a RISC-V target specific op");
+ assert(RISCVISD::STRICT_VFROUND_NOEXCEPT_VL - RISCVISD::FIRST_NUMBER == 421 &&
+ "adding target specific op should update this function");
+ if (Opcode >= RISCVISD::TRUNCATE_VECTOR_VL && Opcode <= RISCVISD::SETCC_VL)
+ return true;
+ if (Opcode >= RISCVISD::VRGATHER_VX_VL && Opcode <= RISCVISD::VFIRST_VL)
+ return true;
+ if (Opcode >= RISCVISD::STRICT_FADD_VL &&
+ Opcode <= RISCVISD::STRICT_VFROUND_NOEXCEPT_VL)
+ return true;
+ return false;
+}
+
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
report_fatal_error("unimplemented operand");
case ISD::ATOMIC_FENCE:
- return LowerATOMIC_FENCE(Op, DAG);
+ return LowerATOMIC_FENCE(Op, DAG, Subtarget);
case ISD::GlobalAddress:
return lowerGlobalAddress(Op, DAG);
case ISD::BlockAddress:
@@ -3753,6 +4793,15 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerShiftRightParts(Op, DAG, true);
case ISD::SRL_PARTS:
return lowerShiftRightParts(Op, DAG, false);
+ case ISD::ROTL:
+ case ISD::ROTR:
+ assert(Subtarget.hasVendorXTHeadBb() &&
+ !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()) &&
+ "Unexpected custom legalization");
+ // XTHeadBb only supports rotate by constant.
+ if (!isa<ConstantSDNode>(Op.getOperand(1)))
+ return SDValue();
+ return Op;
case ISD::BITCAST: {
SDLoc DL(Op);
EVT VT = Op.getValueType();
@@ -3760,18 +4809,32 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
EVT Op0VT = Op0.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
if (VT == MVT::f16 && Op0VT == MVT::i16 &&
- Subtarget.hasStdExtZfhOrZfhmin()) {
+ Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
return FPConv;
}
+ if (VT == MVT::bf16 && Op0VT == MVT::i16 &&
+ Subtarget.hasStdExtZfbfmin()) {
+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
+ SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::bf16, NewOp0);
+ return FPConv;
+ }
if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
- Subtarget.hasStdExtF()) {
+ Subtarget.hasStdExtFOrZfinx()) {
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
SDValue FPConv =
DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
return FPConv;
}
+ if (VT == MVT::f64 && Op0VT == MVT::i64 && XLenVT == MVT::i32 &&
+ Subtarget.hasStdExtZfa()) {
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitScalar(Op0, DL, MVT::i32, MVT::i32);
+ SDValue RetReg =
+ DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
+ return RetReg;
+ }
// Consider other scalar<->scalar casts as legal if the types are legal.
// Otherwise expand them.
@@ -3821,6 +4884,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_VOID:
return LowerINTRINSIC_VOID(Op, DAG);
+ case ISD::IS_FPCLASS:
+ return LowerIS_FPCLASS(Op, DAG);
case ISD::BITREVERSE: {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
@@ -3852,6 +4917,25 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
+ case ISD::SCALAR_TO_VECTOR: {
+ MVT VT = Op.getSimpleValueType();
+ SDLoc DL(Op);
+ SDValue Scalar = Op.getOperand(0);
+ if (VT.getVectorElementType() == MVT::i1) {
+ MVT WideVT = VT.changeVectorElementType(MVT::i8);
+ SDValue V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, WideVT, Scalar);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
+ }
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector())
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
+ SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), Scalar, VL);
+ if (VT.isFixedLengthVector())
+ V = convertFromScalableVector(VT, V, DAG, Subtarget);
+ return V;
+ }
case ISD::VSCALE: {
MVT VT = Op.getSimpleValueType();
SDLoc DL(Op);
@@ -3899,15 +4983,56 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
return SDValue();
}
- case ISD::FP_EXTEND:
- case ISD::FP_ROUND:
+ case ISD::FMAXIMUM:
+ case ISD::FMINIMUM:
+ return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
+ case ISD::FP_EXTEND: {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+ if (VT == MVT::f32 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin())
+ return DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
+ if (VT == MVT::f64 && Op0VT == MVT::bf16 && Subtarget.hasStdExtZfbfmin()) {
+ SDValue FloatVal =
+ DAG.getNode(RISCVISD::FP_EXTEND_BF16, DL, MVT::f32, Op0);
+ return DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, FloatVal);
+ }
+
+ if (!Op.getValueType().isVector())
+ return Op;
+ return lowerVectorFPExtendOrRoundLike(Op, DAG);
+ }
+ case ISD::FP_ROUND: {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue Op0 = Op.getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+ if (VT == MVT::bf16 && Op0VT == MVT::f32 && Subtarget.hasStdExtZfbfmin())
+ return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, Op0);
+ if (VT == MVT::bf16 && Op0VT == MVT::f64 && Subtarget.hasStdExtZfbfmin() &&
+ Subtarget.hasStdExtDOrZdinx()) {
+ SDValue FloatVal =
+ DAG.getNode(ISD::FP_ROUND, DL, MVT::f32, Op0,
+ DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+ return DAG.getNode(RISCVISD::FP_ROUND_BF16, DL, MVT::bf16, FloatVal);
+ }
+
if (!Op.getValueType().isVector())
return Op;
return lowerVectorFPExtendOrRoundLike(Op, DAG);
+ }
+ case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_EXTEND:
+ return lowerStrictFPExtendOrRoundLike(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP: {
+ case ISD::UINT_TO_FP:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ case ISD::STRICT_SINT_TO_FP:
+ case ISD::STRICT_UINT_TO_FP: {
// RVV can only do fp<->int conversions to types half/double the size as
// the source. We custom-lower any conversions that do two hops into
// sequences.
@@ -3915,7 +5040,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (!VT.isVector())
return Op;
SDLoc DL(Op);
- SDValue Src = Op.getOperand(0);
+ bool IsStrict = Op->isStrictFPOpcode();
+ SDValue Src = Op.getOperand(0 + IsStrict);
MVT EltVT = VT.getVectorElementType();
MVT SrcVT = Src.getSimpleValueType();
MVT SrcEltVT = SrcVT.getVectorElementType();
@@ -3931,10 +5057,14 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// Do a regular integer sign/zero extension then convert to float.
MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize / 2),
VT.getVectorElementCount());
- unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
+ unsigned ExtOpcode = (Op.getOpcode() == ISD::UINT_TO_FP ||
+ Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
? ISD::ZERO_EXTEND
: ISD::SIGN_EXTEND;
SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
+ if (IsStrict)
+ return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(),
+ Op.getOperand(0), Ext);
return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
}
// FP2Int
@@ -3942,6 +5072,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// Do one doubling fp_extend then complete the operation by converting
// to int.
MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ if (IsStrict) {
+ auto [FExt, Chain] =
+ DAG.getStrictFPExtendOrRound(Src, Op.getOperand(0), DL, InterimFVT);
+ return DAG.getNode(Op.getOpcode(), DL, Op->getVTList(), Chain, FExt);
+ }
SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
}
@@ -3952,6 +5087,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// One narrowing int_to_fp, then an fp_round.
assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ if (IsStrict) {
+ SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL,
+ DAG.getVTList(InterimFVT, MVT::Other),
+ Op.getOperand(0), Src);
+ SDValue Chain = Int2FP.getValue(1);
+ return DAG.getStrictFPExtendOrRound(Int2FP, Chain, DL, VT).first;
+ }
SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
return DAG.getFPExtendOrRound(Int2FP, DL, VT);
}
@@ -3960,6 +5102,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// representable by the integer, the result is poison.
MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
VT.getVectorElementCount());
+ if (IsStrict) {
+ SDValue FP2Int =
+ DAG.getNode(Op.getOpcode(), DL, DAG.getVTList(IVecVT, MVT::Other),
+ Op.getOperand(0), Src);
+ SDValue Res = DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
+ return DAG.getMergeValues({Res, FP2Int.getValue(1)}, DL);
+ }
SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
}
@@ -3986,6 +5135,18 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::UINT_TO_FP:
RVVOpc = RISCVISD::UINT_TO_FP_VL;
break;
+ case ISD::STRICT_FP_TO_SINT:
+ RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_X_F_VL;
+ break;
+ case ISD::STRICT_FP_TO_UINT:
+ RVVOpc = RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL;
+ break;
+ case ISD::STRICT_SINT_TO_FP:
+ RVVOpc = RISCVISD::STRICT_SINT_TO_FP_VL;
+ break;
+ case ISD::STRICT_UINT_TO_FP:
+ RVVOpc = RISCVISD::STRICT_UINT_TO_FP_VL;
+ break;
}
MVT ContainerVT = getContainerForFixedLengthVector(VT);
@@ -3996,15 +5157,80 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
+ if (IsStrict) {
+ Src = DAG.getNode(RVVOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
+ Op.getOperand(0), Src, Mask, VL);
+ SDValue SubVec = convertFromScalableVector(VT, Src, DAG, Subtarget);
+ return DAG.getMergeValues({SubVec, Src.getValue(1)}, DL);
+ }
Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
return convertFromScalableVector(VT, Src, DAG, Subtarget);
}
case ISD::FP_TO_SINT_SAT:
case ISD::FP_TO_UINT_SAT:
return lowerFP_TO_INT_SAT(Op, DAG, Subtarget);
+ case ISD::FP_TO_BF16: {
+ // Custom lower to ensure the libcall return is passed in an FPR on hard
+ // float ABIs.
+ assert(!Subtarget.isSoftFPABI() && "Unexpected custom legalization");
+ SDLoc DL(Op);
+ MakeLibCallOptions CallOptions;
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::bf16);
+ SDValue Res =
+ makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
+ if (Subtarget.is64Bit())
+ return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
+ return DAG.getBitcast(MVT::i32, Res);
+ }
+ case ISD::BF16_TO_FP: {
+ assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalization");
+ MVT VT = Op.getSimpleValueType();
+ SDLoc DL(Op);
+ Op = DAG.getNode(
+ ISD::SHL, DL, Op.getOperand(0).getValueType(), Op.getOperand(0),
+ DAG.getShiftAmountConstant(16, Op.getOperand(0).getValueType(), DL));
+ SDValue Res = Subtarget.is64Bit()
+ ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Op)
+ : DAG.getBitcast(MVT::f32, Op);
+ // fp_extend if the target VT is bigger than f32.
+ if (VT != MVT::f32)
+ return DAG.getNode(ISD::FP_EXTEND, DL, VT, Res);
+ return Res;
+ }
+ case ISD::FP_TO_FP16: {
+ // Custom lower to ensure the libcall return is passed in an FPR on hard
+ // float ABIs.
+ assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
+ SDLoc DL(Op);
+ MakeLibCallOptions CallOptions;
+ RTLIB::Libcall LC =
+ RTLIB::getFPROUND(Op.getOperand(0).getValueType(), MVT::f16);
+ SDValue Res =
+ makeLibCall(DAG, LC, MVT::f32, Op.getOperand(0), CallOptions, DL).first;
+ if (Subtarget.is64Bit())
+ return DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Res);
+ return DAG.getBitcast(MVT::i32, Res);
+ }
+ case ISD::FP16_TO_FP: {
+ // Custom lower to ensure the libcall argument is passed in an FPR on hard
+ // float ABIs.
+ assert(Subtarget.hasStdExtFOrZfinx() && "Unexpected custom legalisation");
+ SDLoc DL(Op);
+ MakeLibCallOptions CallOptions;
+ SDValue Arg = Subtarget.is64Bit()
+ ? DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32,
+ Op.getOperand(0))
+ : DAG.getBitcast(MVT::f32, Op.getOperand(0));
+ SDValue Res =
+ makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Arg, CallOptions, DL)
+ .first;
+ return Res;
+ }
case ISD::FTRUNC:
case ISD::FCEIL:
case ISD::FFLOOR:
+ case ISD::FNEARBYINT:
case ISD::FRINT:
case ISD::FROUND:
case ISD::FROUNDEVEN:
@@ -4042,10 +5268,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (Op.getOperand(1).getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskVecReduction(Op, DAG, /*IsVP*/ true);
return lowerVPREDUCE(Op, DAG);
+ case ISD::UNDEF: {
+ MVT ContainerVT = getContainerForFixedLengthVector(Op.getSimpleValueType());
+ return convertFromScalableVector(Op.getSimpleValueType(),
+ DAG.getUNDEF(ContainerVT), DAG, Subtarget);
+ }
case ISD::INSERT_SUBVECTOR:
return lowerINSERT_SUBVECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return lowerEXTRACT_SUBVECTOR(Op, DAG);
+ case ISD::VECTOR_DEINTERLEAVE:
+ return lowerVECTOR_DEINTERLEAVE(Op, DAG);
+ case ISD::VECTOR_INTERLEAVE:
+ return lowerVECTOR_INTERLEAVE(Op, DAG);
case ISD::STEP_VECTOR:
return lowerSTEP_VECTOR(Op, DAG);
case ISD::VECTOR_REVERSE:
@@ -4099,7 +5334,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::SELECT_CC: {
// This occurs because we custom legalize SETGT and SETUGT for setcc. That
// causes LegalizeDAG to think we need to custom legalize select_cc. Expand
- // into separate SETCC+SELECT_CC just like LegalizeDAG.
+ // into separate SETCC+SELECT just like LegalizeDAG.
SDValue Tmp1 = Op.getOperand(0);
SDValue Tmp2 = Op.getOperand(1);
SDValue True = Op.getOperand(2);
@@ -4153,83 +5388,46 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFixedLengthVectorSetccToRVV(Op, DAG);
}
case ISD::ADD:
- return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL, /*HasMergeOp*/ true);
case ISD::SUB:
- return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL, /*HasMergeOp*/ true);
case ISD::MUL:
- return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL, /*HasMergeOp*/ true);
case ISD::MULHS:
- return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL, /*HasMergeOp*/ true);
case ISD::MULHU:
- return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL, /*HasMergeOp*/ true);
case ISD::AND:
- return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
- RISCVISD::AND_VL);
case ISD::OR:
- return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
- RISCVISD::OR_VL);
case ISD::XOR:
- return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
- RISCVISD::XOR_VL);
case ISD::SDIV:
- return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL, /*HasMergeOp*/ true);
case ISD::SREM:
- return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL, /*HasMergeOp*/ true);
case ISD::UDIV:
- return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL, /*HasMergeOp*/ true);
case ISD::UREM:
- return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL, /*HasMergeOp*/ true);
+ return lowerToScalableOp(Op, DAG);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
if (Op.getSimpleValueType().isFixedLengthVector())
- return lowerFixedLengthVectorShiftToRVV(Op, DAG);
+ return lowerToScalableOp(Op, DAG);
// This can be called for an i32 shift amount that needs to be promoted.
assert(Op.getOperand(1).getValueType() == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
return SDValue();
case ISD::SADDSAT:
- return lowerToScalableOp(Op, DAG, RISCVISD::SADDSAT_VL,
- /*HasMergeOp*/ true);
case ISD::UADDSAT:
- return lowerToScalableOp(Op, DAG, RISCVISD::UADDSAT_VL,
- /*HasMergeOp*/ true);
case ISD::SSUBSAT:
- return lowerToScalableOp(Op, DAG, RISCVISD::SSUBSAT_VL,
- /*HasMergeOp*/ true);
case ISD::USUBSAT:
- return lowerToScalableOp(Op, DAG, RISCVISD::USUBSAT_VL,
- /*HasMergeOp*/ true);
case ISD::FADD:
- return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL, /*HasMergeOp*/ true);
case ISD::FSUB:
- return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL, /*HasMergeOp*/ true);
case ISD::FMUL:
- return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL, /*HasMergeOp*/ true);
case ISD::FDIV:
- return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL, /*HasMergeOp*/ true);
case ISD::FNEG:
- return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
case ISD::FABS:
- return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
case ISD::FSQRT:
- return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
case ISD::FMA:
- return lowerToScalableOp(Op, DAG, RISCVISD::VFMADD_VL);
case ISD::SMIN:
- return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL, /*HasMergeOp*/ true);
case ISD::SMAX:
- return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL, /*HasMergeOp*/ true);
case ISD::UMIN:
- return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true);
case ISD::UMAX:
- return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true);
case ISD::FMINNUM:
- return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL,
- /*HasMergeOp*/ true);
case ISD::FMAXNUM:
- return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL,
- /*HasMergeOp*/ true);
+ return lowerToScalableOp(Op, DAG);
case ISD::ABS:
case ISD::VP_ABS:
return lowerABS(Op, DAG);
@@ -4241,6 +5439,24 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerFixedLengthVectorSelectToRVV(Op, DAG);
case ISD::FCOPYSIGN:
return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
+ case ISD::STRICT_FADD:
+ case ISD::STRICT_FSUB:
+ case ISD::STRICT_FMUL:
+ case ISD::STRICT_FDIV:
+ case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
+ return lowerToScalableOp(Op, DAG);
+ case ISD::STRICT_FSETCC:
+ case ISD::STRICT_FSETCCS:
+ return lowerVectorStrictFSetcc(Op, DAG);
+ case ISD::STRICT_FCEIL:
+ case ISD::STRICT_FRINT:
+ case ISD::STRICT_FFLOOR:
+ case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FNEARBYINT:
+ case ISD::STRICT_FROUND:
+ case ISD::STRICT_FROUNDEVEN:
+ return lowerVectorStrictFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
case ISD::MGATHER:
case ISD::VP_GATHER:
return lowerMaskedGather(Op, DAG);
@@ -4338,6 +5554,22 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerVPOp(Op, DAG, RISCVISD::UMIN_VL, /*HasMergeOp*/ true);
case ISD::VP_UMAX:
return lowerVPOp(Op, DAG, RISCVISD::UMAX_VL, /*HasMergeOp*/ true);
+ case ISD::VP_BITREVERSE:
+ return lowerVPOp(Op, DAG, RISCVISD::BITREVERSE_VL, /*HasMergeOp*/ true);
+ case ISD::VP_BSWAP:
+ return lowerVPOp(Op, DAG, RISCVISD::BSWAP_VL, /*HasMergeOp*/ true);
+ case ISD::VP_CTLZ:
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ if (Subtarget.hasStdExtZvbb())
+ return lowerVPOp(Op, DAG, RISCVISD::CTLZ_VL, /*HasMergeOp*/ true);
+ return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
+ case ISD::VP_CTTZ:
+ case ISD::VP_CTTZ_ZERO_UNDEF:
+ if (Subtarget.hasStdExtZvbb())
+ return lowerVPOp(Op, DAG, RISCVISD::CTTZ_VL, /*HasMergeOp*/ true);
+ return lowerCTLZ_CTTZ_ZERO_UNDEF(Op, DAG);
+ case ISD::VP_CTPOP:
+ return lowerVPOp(Op, DAG, RISCVISD::CTPOP_VL, /*HasMergeOp*/ true);
case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
return lowerVPStridedLoad(Op, DAG);
case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
@@ -4353,31 +5585,31 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
}
-static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
+static SDValue getTargetNode(GlobalAddressSDNode *N, const SDLoc &DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
}
-static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
+static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
Flags);
}
-static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
+static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
N->getOffset(), Flags);
}
-static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
+static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
SelectionDAG &DAG, unsigned Flags) {
return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
}
template <class NodeTy>
SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
- bool IsLocal) const {
+ bool IsLocal, bool IsExternWeak) const {
SDLoc DL(N);
EVT Ty = getPointerTy(DAG.getDataLayout());
@@ -4394,7 +5626,7 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
// Use PC-relative addressing to access the GOT for this symbol, then load
- // the address from the GOT. This generates the pattern (PseudoLA sym),
+ // the address from the GOT. This generates the pattern (PseudoLGA sym),
// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
MachineFunction &MF = DAG.getMachineFunction();
MachineMemOperand *MemOp = MF.getMachineMemOperand(
@@ -4403,7 +5635,7 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
MachineMemOperand::MOInvariant,
LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
SDValue Load =
- DAG.getMemIntrinsicNode(RISCVISD::LA, DL, DAG.getVTList(Ty, MVT::Other),
+ DAG.getMemIntrinsicNode(RISCVISD::LGA, DL, DAG.getVTList(Ty, MVT::Other),
{DAG.getEntryNode(), Addr}, Ty, MemOp);
return Load;
}
@@ -4420,10 +5652,28 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
}
case CodeModel::Medium: {
+ SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
+ if (IsExternWeak) {
+ // An extern weak symbol may be undefined, i.e. have value 0, which may
+ // not be within 2GiB of PC, so use GOT-indirect addressing to access the
+ // symbol. This generates the pattern (PseudoLGA sym), which expands to
+ // (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MemOp = MF.getMachineMemOperand(
+ MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
+ SDValue Load =
+ DAG.getMemIntrinsicNode(RISCVISD::LGA, DL,
+ DAG.getVTList(Ty, MVT::Other),
+ {DAG.getEntryNode(), Addr}, Ty, MemOp);
+ return Load;
+ }
+
// Generate a sequence for accessing addresses within any 2GiB range within
// the address space. This generates the pattern (PseudoLLA sym), which
// expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
- SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
}
}
@@ -4433,7 +5683,8 @@ SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
assert(N->getOffset() == 0 && "unexpected offset in global node");
- return getAddr(N, DAG, N->getGlobal()->isDSOLocal());
+ const GlobalValue *GV = N->getGlobal();
+ return getAddr(N, DAG, GV->isDSOLocal(), GV->hasExternalWeakLinkage());
}
SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
@@ -4540,6 +5791,9 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
assert(N->getOffset() == 0 && "unexpected offset in global node");
+ if (DAG.getTarget().useEmulatedTLS())
+ return LowerToTLSEmulatedModel(N, DAG);
+
TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
if (DAG.getMachineFunction().getFunction().getCallingConv() ==
@@ -4563,21 +5817,40 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
return Addr;
}
-SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
- SDValue CondV = Op.getOperand(0);
- SDValue TrueV = Op.getOperand(1);
- SDValue FalseV = Op.getOperand(2);
- SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
- MVT XLenVT = Subtarget.getXLenVT();
+// Return true if Val is equal to (setcc LHS, RHS, CC).
+// Return false if Val is the inverse of (setcc LHS, RHS, CC).
+// Otherwise, return std::nullopt.
+static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, SDValue Val) {
+ assert(Val->getOpcode() == ISD::SETCC);
+ SDValue LHS2 = Val.getOperand(0);
+ SDValue RHS2 = Val.getOperand(1);
+ ISD::CondCode CC2 = cast<CondCodeSDNode>(Val.getOperand(2))->get();
- // Lower vector SELECTs to VSELECTs by splatting the condition.
- if (VT.isVector()) {
- MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
- SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
- return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
+ if (LHS == LHS2 && RHS == RHS2) {
+ if (CC == CC2)
+ return true;
+ if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
+ return false;
+ } else if (LHS == RHS2 && RHS == LHS2) {
+ CC2 = ISD::getSetCCSwappedOperands(CC2);
+ if (CC == CC2)
+ return true;
+ if (CC == ISD::getSetCCInverse(CC2, LHS2.getValueType()))
+ return false;
}
+ return std::nullopt;
+}
+
+static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDValue CondV = N->getOperand(0);
+ SDValue TrueV = N->getOperand(1);
+ SDValue FalseV = N->getOperand(2);
+ MVT VT = N->getSimpleValueType(0);
+ SDLoc DL(N);
+
if (!Subtarget.hasShortForwardBranchOpt()) {
// (select c, -1, y) -> -c | y
if (isAllOnesConstant(TrueV)) {
@@ -4604,6 +5877,252 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
+ // Try to fold (select (setcc lhs, rhs, cc), truev, falsev) into bitwise ops
+ // when both truev and falsev are also setcc.
+ if (CondV.getOpcode() == ISD::SETCC && TrueV.getOpcode() == ISD::SETCC &&
+ FalseV.getOpcode() == ISD::SETCC) {
+ SDValue LHS = CondV.getOperand(0);
+ SDValue RHS = CondV.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
+
+ // (select x, x, y) -> x | y
+ // (select !x, x, y) -> x & y
+ if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, TrueV)) {
+ return DAG.getNode(*MatchResult ? ISD::OR : ISD::AND, DL, VT, TrueV,
+ FalseV);
+ }
+ // (select x, y, x) -> x & y
+ // (select !x, y, x) -> x | y
+ if (std::optional<bool> MatchResult = matchSetCC(LHS, RHS, CC, FalseV)) {
+ return DAG.getNode(*MatchResult ? ISD::AND : ISD::OR, DL, VT, TrueV,
+ FalseV);
+ }
+ }
+
+ return SDValue();
+}
+
+/// RISC-V doesn't have general instructions for integer setne/seteq, but we can
+/// check for equality with 0. This function emits nodes that convert the
+/// seteq/setne into something that can be compared with 0.
+/// Based on RISCVDAGToDAGISel::selectSETCC but modified to produce
+/// target-independent SelectionDAG nodes rather than machine nodes.
+static SDValue selectSETCC(SDValue N, ISD::CondCode ExpectedCCVal,
+ SelectionDAG &DAG) {
+ assert(ISD::isIntEqualitySetCC(ExpectedCCVal) &&
+ "Unexpected condition code!");
+
+ // We're looking for a setcc.
+ if (N->getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ // Must be an equality comparison.
+ ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ if (CCVal != ExpectedCCVal)
+ return SDValue();
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ if (!LHS.getValueType().isScalarInteger())
+ return SDValue();
+
+ // If the RHS side is 0, we don't need any extra instructions, return the LHS.
+ if (isNullConstant(RHS))
+ return LHS;
+
+ SDLoc DL(N);
+
+ if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
+ int64_t CVal = C->getSExtValue();
+ // If the RHS is -2048, we can use xori to produce 0 if the LHS is -2048 and
+ // non-zero otherwise.
+ if (CVal == -2048)
+ return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS,
+ DAG.getConstant(CVal, DL, N->getValueType(0)));
+ // If the RHS is [-2047,2048], we can use addi with -RHS to produce 0 if the
+ // LHS is equal to the RHS and non-zero otherwise.
+ if (isInt<12>(CVal) || CVal == 2048)
+ return DAG.getNode(ISD::ADD, DL, N->getValueType(0), LHS,
+ DAG.getConstant(-CVal, DL, N->getValueType(0)));
+ }
+
+ // If nothing else we can XOR the LHS and RHS to produce zero if they are
+ // equal and a non-zero value if they aren't.
+ return DAG.getNode(ISD::XOR, DL, N->getValueType(0), LHS, RHS);
+}
+
+// Transform `binOp (select cond, x, c0), c1` where `c0` and `c1` are constants
+// into `select cond, binOp(x, c1), binOp(c0, c1)` if profitable.
+// For now we only consider transformation profitable if `binOp(c0, c1)` ends up
+// being `0` or `-1`. In such cases we can replace `select` with `and`.
+// TODO: Should we also do this if `binOp(c0, c1)` is cheaper to materialize
+// than `c0`?
+static SDValue
+foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (Subtarget.hasShortForwardBranchOpt())
+ return SDValue();
+
+ unsigned SelOpNo = 0;
+ SDValue Sel = BO->getOperand(0);
+ if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
+ SelOpNo = 1;
+ Sel = BO->getOperand(1);
+ }
+
+ if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
+ return SDValue();
+
+ unsigned ConstSelOpNo = 1;
+ unsigned OtherSelOpNo = 2;
+ if (!dyn_cast<ConstantSDNode>(Sel->getOperand(ConstSelOpNo))) {
+ ConstSelOpNo = 2;
+ OtherSelOpNo = 1;
+ }
+ SDValue ConstSelOp = Sel->getOperand(ConstSelOpNo);
+ ConstantSDNode *ConstSelOpNode = dyn_cast<ConstantSDNode>(ConstSelOp);
+ if (!ConstSelOpNode || ConstSelOpNode->isOpaque())
+ return SDValue();
+
+ SDValue ConstBinOp = BO->getOperand(SelOpNo ^ 1);
+ ConstantSDNode *ConstBinOpNode = dyn_cast<ConstantSDNode>(ConstBinOp);
+ if (!ConstBinOpNode || ConstBinOpNode->isOpaque())
+ return SDValue();
+
+ SDLoc DL(Sel);
+ EVT VT = BO->getValueType(0);
+
+ SDValue NewConstOps[2] = {ConstSelOp, ConstBinOp};
+ if (SelOpNo == 1)
+ std::swap(NewConstOps[0], NewConstOps[1]);
+
+ SDValue NewConstOp =
+ DAG.FoldConstantArithmetic(BO->getOpcode(), DL, VT, NewConstOps);
+ if (!NewConstOp)
+ return SDValue();
+
+ const APInt &NewConstAPInt =
+ cast<ConstantSDNode>(NewConstOp)->getAPIntValue();
+ if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
+ return SDValue();
+
+ SDValue OtherSelOp = Sel->getOperand(OtherSelOpNo);
+ SDValue NewNonConstOps[2] = {OtherSelOp, ConstBinOp};
+ if (SelOpNo == 1)
+ std::swap(NewNonConstOps[0], NewNonConstOps[1]);
+ SDValue NewNonConstOp = DAG.getNode(BO->getOpcode(), DL, VT, NewNonConstOps);
+
+ SDValue NewT = (ConstSelOpNo == 1) ? NewConstOp : NewNonConstOp;
+ SDValue NewF = (ConstSelOpNo == 1) ? NewNonConstOp : NewConstOp;
+ return DAG.getSelect(DL, VT, Sel.getOperand(0), NewT, NewF);
+}
+
+SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDValue CondV = Op.getOperand(0);
+ SDValue TrueV = Op.getOperand(1);
+ SDValue FalseV = Op.getOperand(2);
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ // Lower vector SELECTs to VSELECTs by splatting the condition.
+ if (VT.isVector()) {
+ MVT SplatCondVT = VT.changeVectorElementType(MVT::i1);
+ SDValue CondSplat = DAG.getSplat(SplatCondVT, DL, CondV);
+ return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
+ }
+
+ // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
+ // nodes to implement the SELECT. Performing the lowering here allows for
+ // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
+ // sequence or RISCVISD::SELECT_CC node (branch-based select).
+ if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
+ VT.isScalarInteger()) {
+ if (SDValue NewCondV = selectSETCC(CondV, ISD::SETNE, DAG)) {
+ // (select (riscv_setne c), t, 0) -> (czero_eqz t, c)
+ if (isNullConstant(FalseV))
+ return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV);
+ // (select (riscv_setne c), 0, f) -> (czero_nez f, c)
+ if (isNullConstant(TrueV))
+ return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV);
+ // (select (riscv_setne c), t, f) -> (or (czero_eqz t, c), (czero_nez f,
+ // c)
+ return DAG.getNode(
+ ISD::OR, DL, VT,
+ DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, NewCondV),
+ DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, NewCondV));
+ }
+ if (SDValue NewCondV = selectSETCC(CondV, ISD::SETEQ, DAG)) {
+ // (select (riscv_seteq c), t, 0) -> (czero_nez t, c)
+ if (isNullConstant(FalseV))
+ return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV);
+ // (select (riscv_seteq c), 0, f) -> (czero_eqz f, c)
+ if (isNullConstant(TrueV))
+ return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV);
+ // (select (riscv_seteq c), t, f) -> (or (czero_eqz f, c), (czero_nez t,
+ // c)
+ return DAG.getNode(
+ ISD::OR, DL, VT,
+ DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, FalseV, NewCondV),
+ DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, TrueV, NewCondV));
+ }
+
+ // (select c, t, 0) -> (czero_eqz t, c)
+ if (isNullConstant(FalseV))
+ return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
+ // (select c, 0, f) -> (czero_nez f, c)
+ if (isNullConstant(TrueV))
+ return DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV);
+
+ // (select c, (and f, x), f) -> (or (and f, x), (czero_nez f, c))
+ if (TrueV.getOpcode() == ISD::AND &&
+ (TrueV.getOperand(0) == FalseV || TrueV.getOperand(1) == FalseV))
+ return DAG.getNode(
+ ISD::OR, DL, VT, TrueV,
+ DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
+ // (select c, t, (and t, x)) -> (or (czero_eqz t, c), (and t, x))
+ if (FalseV.getOpcode() == ISD::AND &&
+ (FalseV.getOperand(0) == TrueV || FalseV.getOperand(1) == TrueV))
+ return DAG.getNode(
+ ISD::OR, DL, VT, FalseV,
+ DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
+
+ // (select c, t, f) -> (or (czero_eqz t, c), (czero_nez f, c))
+ return DAG.getNode(ISD::OR, DL, VT,
+ DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
+ DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
+ }
+
+ if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
+ return V;
+
+ if (Op.hasOneUse()) {
+ unsigned UseOpc = Op->use_begin()->getOpcode();
+ if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
+ SDNode *BinOp = *Op->use_begin();
+ if (SDValue NewSel = foldBinOpIntoSelectIfProfitable(*Op->use_begin(),
+ DAG, Subtarget)) {
+ DAG.ReplaceAllUsesWith(BinOp, &NewSel);
+ return lowerSELECT(NewSel, DAG);
+ }
+ }
+ }
+
+ // (select cc, 1.0, 0.0) -> (sint_to_fp (zext cc))
+ // (select cc, 0.0, 1.0) -> (sint_to_fp (zext (xor cc, 1)))
+ const ConstantFPSDNode *FPTV = dyn_cast<ConstantFPSDNode>(TrueV);
+ const ConstantFPSDNode *FPFV = dyn_cast<ConstantFPSDNode>(FalseV);
+ if (FPTV && FPFV) {
+ if (FPTV->isExactlyValue(1.0) && FPFV->isExactlyValue(0.0))
+ return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV);
+ if (FPTV->isExactlyValue(0.0) && FPFV->isExactlyValue(1.0)) {
+ SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV,
+ DAG.getConstant(1, DL, XLenVT));
+ return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR);
+ }
+ }
+
// If the condition is not an integer SETCC which operates on XLenVT, we need
// to emit a RISCVISD::SELECT_CC comparing the condition to zero. i.e.:
// (select condv, truev, falsev)
@@ -5118,6 +6637,53 @@ SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
}
SDValue
+RISCVTargetLowering::lowerStrictFPExtendOrRoundLike(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Src = Op.getOperand(1);
+ MVT VT = Op.getSimpleValueType();
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
+ ContainerVT =
+ SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
+ Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
+ }
+
+ auto [Mask, VL] = getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
+
+ // RVV can only widen/truncate fp to types double/half the size as the source.
+ if ((VT.getVectorElementType() == MVT::f64 &&
+ SrcVT.getVectorElementType() == MVT::f16) ||
+ (VT.getVectorElementType() == MVT::f16 &&
+ SrcVT.getVectorElementType() == MVT::f64)) {
+ // For double rounding, the intermediate rounding should be round-to-odd.
+ unsigned InterConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
+ ? RISCVISD::STRICT_FP_EXTEND_VL
+ : RISCVISD::STRICT_VFNCVT_ROD_VL;
+ MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
+ Src = DAG.getNode(InterConvOpc, DL, DAG.getVTList(InterVT, MVT::Other),
+ Chain, Src, Mask, VL);
+ Chain = Src.getValue(1);
+ }
+
+ unsigned ConvOpc = Op.getOpcode() == ISD::STRICT_FP_EXTEND
+ ? RISCVISD::STRICT_FP_EXTEND_VL
+ : RISCVISD::STRICT_FP_ROUND_VL;
+ SDValue Res = DAG.getNode(ConvOpc, DL, DAG.getVTList(ContainerVT, MVT::Other),
+ Chain, Src, Mask, VL);
+ if (VT.isFixedLengthVector()) {
+ // StrictFP operations have two result values. Their lowered result should
+ // have same result count.
+ SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
+ Res = DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
+ }
+ return Res;
+}
+
+SDValue
RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
SelectionDAG &DAG) const {
bool IsVP =
@@ -5218,7 +6784,6 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
MVT XLenVT = Subtarget.getXLenVT();
- SDValue Zero = DAG.getConstant(0, DL, XLenVT);
bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
// Even i64-element vectors on RV32 can be lowered without scalar
// legalization if the most-significant 32 bits of the value are not affected
@@ -5251,9 +6816,8 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
// value at element 0, by using two vslide1down instructions in sequence on
// the i32 split lo/hi value. Use an equivalently-sized i32 vector for
// this.
- SDValue One = DAG.getConstant(1, DL, XLenVT);
- SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
- SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
+ SDValue ValLo, ValHi;
+ std::tie(ValLo, ValHi) = DAG.SplitScalar(Val, DL, MVT::i32, MVT::i32);
MVT I32ContainerVT =
MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
SDValue I32Mask =
@@ -5411,15 +6975,18 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
// promoted or expanded.
static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ assert((Op.getOpcode() == ISD::INTRINSIC_VOID ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
"Unexpected opcode");
if (!Subtarget.hasVInstructions())
return SDValue();
- bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
+ bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
+
SDLoc DL(Op);
const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
@@ -5483,11 +7050,9 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
// Convert the vector source to the equivalent nxvXi32 vector.
MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
-
- SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp,
- DAG.getConstant(0, DL, XLenVT));
- SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp,
- DAG.getConstant(1, DL, XLenVT));
+ SDValue ScalarLo, ScalarHi;
+ std::tie(ScalarLo, ScalarHi) =
+ DAG.SplitScalar(ScalarOp, DL, MVT::i32, MVT::i32);
// Double the VL since we halved SEW.
SDValue AVL = getVLOperand(Op);
@@ -5516,7 +7081,7 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
SDValue SETVLMAX = DAG.getTargetConstant(
- Intrinsic::riscv_vsetvlimax_opt, DL, MVT::i32);
+ Intrinsic::riscv_vsetvlimax, DL, MVT::i32);
I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
LMUL);
} else {
@@ -5531,7 +7096,7 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
SDValue SETVL =
- DAG.getTargetConstant(Intrinsic::riscv_vsetvli_opt, DL, MVT::i32);
+ DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, MVT::i32);
// Using vsetvli instruction to get actually used length which related to
// the hardware implementation
SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
@@ -5595,6 +7160,49 @@ static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
}
+// Lower the llvm.get.vector.length intrinsic to vsetvli. We only support
+// scalable vector llvm.get.vector.length for now.
+//
+// We need to convert from a scalable VF to a vsetvli with VLMax equal to
+// (vscale * VF). The vscale and VF are independent of element width. We use
+// SEW=8 for the vsetvli because it is the only element width that supports all
+// fractional LMULs. The LMUL is choosen so that with SEW=8 the VLMax is
+// (vscale * VF). Where vscale is defined as VLEN/RVVBitsPerBlock. The
+// InsertVSETVLI pass can fix up the vtype of the vsetvli if a different
+// SEW and LMUL are better for the surrounding vector instructions.
+static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ // The smallest LMUL is only valid for the smallest element width.
+ const unsigned ElementWidth = 8;
+
+ // Determine the VF that corresponds to LMUL 1 for ElementWidth.
+ unsigned LMul1VF = RISCV::RVVBitsPerBlock / ElementWidth;
+ // We don't support VF==1 with ELEN==32.
+ unsigned MinVF = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
+
+ unsigned VF = N->getConstantOperandVal(2);
+ assert(VF >= MinVF && VF <= (LMul1VF * 8) && isPowerOf2_32(VF) &&
+ "Unexpected VF");
+ (void)MinVF;
+
+ bool Fractional = VF < LMul1VF;
+ unsigned LMulVal = Fractional ? LMul1VF / VF : VF / LMul1VF;
+ unsigned VLMUL = (unsigned)RISCVVType::encodeLMUL(LMulVal, Fractional);
+ unsigned VSEW = RISCVVType::encodeSEW(ElementWidth);
+
+ SDLoc DL(N);
+
+ SDValue LMul = DAG.getTargetConstant(VLMUL, DL, XLenVT);
+ SDValue Sew = DAG.getTargetConstant(VSEW, DL, XLenVT);
+
+ SDValue AVL = DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
+
+ SDValue ID = DAG.getTargetConstant(Intrinsic::riscv_vsetvli, DL, XLenVT);
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, ID, AVL, Sew, LMul);
+}
+
SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = Op.getConstantOperandVal(0);
@@ -5609,17 +7217,51 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getRegister(RISCV::X4, PtrVT);
}
case Intrinsic::riscv_orc_b:
- case Intrinsic::riscv_brev8: {
- unsigned Opc =
- IntNo == Intrinsic::riscv_brev8 ? RISCVISD::BREV8 : RISCVISD::ORC_B;
+ case Intrinsic::riscv_brev8:
+ case Intrinsic::riscv_sha256sig0:
+ case Intrinsic::riscv_sha256sig1:
+ case Intrinsic::riscv_sha256sum0:
+ case Intrinsic::riscv_sha256sum1:
+ case Intrinsic::riscv_sm3p0:
+ case Intrinsic::riscv_sm3p1: {
+ unsigned Opc;
+ switch (IntNo) {
+ case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
+ case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
+ case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
+ case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
+ case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
+ case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
+ case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
+ case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
+ }
+
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
}
+ case Intrinsic::riscv_sm4ks:
+ case Intrinsic::riscv_sm4ed: {
+ unsigned Opc =
+ IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
+ return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2),
+ Op.getOperand(3));
+ }
case Intrinsic::riscv_zip:
case Intrinsic::riscv_unzip: {
unsigned Opc =
IntNo == Intrinsic::riscv_zip ? RISCVISD::ZIP : RISCVISD::UNZIP;
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
}
+ case Intrinsic::riscv_clmul:
+ return DAG.getNode(RISCVISD::CLMUL, DL, XLenVT, Op.getOperand(1),
+ Op.getOperand(2));
+ case Intrinsic::riscv_clmulh:
+ return DAG.getNode(RISCVISD::CLMULH, DL, XLenVT, Op.getOperand(1),
+ Op.getOperand(2));
+ case Intrinsic::riscv_clmulr:
+ return DAG.getNode(RISCVISD::CLMULR, DL, XLenVT, Op.getOperand(1),
+ Op.getOperand(2));
+ case Intrinsic::experimental_get_vector_length:
+ return lowerGetVectorLength(Op.getNode(), DAG, Subtarget);
case Intrinsic::riscv_vmv_x_s:
assert(Op.getValueType() == XLenVT && "Unexpected VT!");
return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
@@ -5853,9 +7495,44 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Ops, Store->getMemoryVT(),
Store->getMemOperand());
}
+ case Intrinsic::riscv_seg2_store:
+ case Intrinsic::riscv_seg3_store:
+ case Intrinsic::riscv_seg4_store:
+ case Intrinsic::riscv_seg5_store:
+ case Intrinsic::riscv_seg6_store:
+ case Intrinsic::riscv_seg7_store:
+ case Intrinsic::riscv_seg8_store: {
+ SDLoc DL(Op);
+ static const Intrinsic::ID VssegInts[] = {
+ Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
+ Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
+ Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
+ Intrinsic::riscv_vsseg8};
+ // Operands are (chain, int_id, vec*, ptr, vl)
+ unsigned NF = Op->getNumOperands() - 4;
+ assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT VT = Op->getOperand(2).getSimpleValueType();
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+
+ SDValue VL = getVLOp(VT.getVectorNumElements(), DL, DAG, Subtarget);
+ SDValue IntID = DAG.getTargetConstant(VssegInts[NF - 2], DL, XLenVT);
+ SDValue Ptr = Op->getOperand(NF + 2);
+
+ auto *FixedIntrinsic = cast<MemIntrinsicSDNode>(Op);
+ SmallVector<SDValue, 12> Ops = {FixedIntrinsic->getChain(), IntID};
+ for (unsigned i = 0; i < NF; i++)
+ Ops.push_back(convertToScalableVector(
+ ContainerVT, FixedIntrinsic->getOperand(2 + i), DAG, Subtarget));
+ Ops.append({Ptr, VL});
+
+ return DAG.getMemIntrinsicNode(
+ ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Ops,
+ FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
+ }
}
- return SDValue();
+ return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
}
static unsigned getRVVReductionOp(unsigned ISDOpcode) {
@@ -5964,7 +7641,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskVecReduction(SDValue Op,
return DAG.getNode(BaseOpc, DL, XLenVT, SetCC, Op.getOperand(0));
}
-static bool hasNonZeroAVL(SDValue AVL) {
+static bool isNonZeroAVL(SDValue AVL) {
auto *RegisterAVL = dyn_cast<RegisterSDNode>(AVL);
auto *ImmAVL = dyn_cast<ConstantSDNode>(AVL);
return (RegisterAVL && RegisterAVL->getReg() == RISCV::X0) ||
@@ -5975,12 +7652,12 @@ static bool hasNonZeroAVL(SDValue AVL) {
/// scalar = reduce_op vec, scalar_start
static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
SDValue StartValue, SDValue Vec, SDValue Mask,
- SDValue VL, SDLoc DL, SelectionDAG &DAG,
+ SDValue VL, const SDLoc &DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
const MVT VecVT = Vec.getSimpleValueType();
const MVT M1VT = getLMUL1VT(VecVT);
const MVT XLenVT = Subtarget.getXLenVT();
- const bool NonZeroAVL = hasNonZeroAVL(VL);
+ const bool NonZeroAVL = isNonZeroAVL(VL);
// The reduction needs an LMUL1 input; do the splat at either LMUL1
// or the original VT if fractional.
@@ -5996,8 +7673,9 @@ static SDValue lowerReductionSeq(unsigned RVVOpcode, MVT ResVT,
DAG.getUNDEF(M1VT),
InitialValue, DAG.getConstant(0, DL, XLenVT));
SDValue PassThru = NonZeroAVL ? DAG.getUNDEF(M1VT) : InitialValue;
- SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, PassThru, Vec,
- InitialValue, Mask, VL);
+ SDValue Policy = DAG.getTargetConstant(RISCVII::TAIL_AGNOSTIC, DL, XLenVT);
+ SDValue Ops[] = {PassThru, Vec, InitialValue, Mask, VL, Policy};
+ SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, Ops);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
DAG.getConstant(0, DL, XLenVT));
}
@@ -6223,20 +7901,28 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
// Set the vector length to only the number of elements we care about. Note
// that for slideup this includes the offset.
- SDValue VL =
- getVLOp(OrigIdx + SubVecVT.getVectorNumElements(), DL, DAG, Subtarget);
- SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
+ unsigned EndIndex = OrigIdx + SubVecVT.getVectorNumElements();
+ SDValue VL = getVLOp(EndIndex, DL, DAG, Subtarget);
- // Use tail agnostic policy if OrigIdx is the last index of Vec.
+ // Use tail agnostic policy if we're inserting over Vec's tail.
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
- if (VecVT.isFixedLengthVector() &&
- OrigIdx + 1 == VecVT.getVectorNumElements())
+ if (VecVT.isFixedLengthVector() && EndIndex == VecVT.getVectorNumElements())
Policy = RISCVII::TAIL_AGNOSTIC;
- SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
- SlideupAmt, Mask, VL, Policy);
+
+ // If we're inserting into the lowest elements, use a tail undisturbed
+ // vmv.v.v.
+ if (OrigIdx == 0) {
+ SubVec =
+ DAG.getNode(RISCVISD::VMV_V_V_VL, DL, ContainerVT, Vec, SubVec, VL);
+ } else {
+ SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
+ SubVec = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, SubVec,
+ SlideupAmt, Mask, VL, Policy);
+ }
+
if (VecVT.isFixedLengthVector())
- Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
- return DAG.getBitcast(Op.getValueType(), Slideup);
+ SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
+ return DAG.getBitcast(Op.getValueType(), SubVec);
}
unsigned SubRegIdx, RemIdx;
@@ -6280,33 +7966,39 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
DAG.getConstant(AlignedIdx, DL, XLenVT));
}
- SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
- // For scalable vectors this must be further multiplied by vscale.
- SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
+ DAG.getUNDEF(InterSubVT), SubVec,
+ DAG.getConstant(0, DL, XLenVT));
auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
- // Construct the vector length corresponding to RemIdx + length(SubVecVT).
- VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
- VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
- VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
+ VL = computeVLMax(SubVecVT, DL, DAG);
- SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
- DAG.getUNDEF(InterSubVT), SubVec,
- DAG.getConstant(0, DL, XLenVT));
+ // If we're inserting into the lowest elements, use a tail undisturbed
+ // vmv.v.v.
+ if (RemIdx == 0) {
+ SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
+ SubVec, VL);
+ } else {
+ SDValue SlideupAmt =
+ DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
+
+ // Construct the vector length corresponding to RemIdx + length(SubVecVT).
+ VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
- SDValue Slideup = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract,
- SubVec, SlideupAmt, Mask, VL);
+ SubVec = getVSlideup(DAG, Subtarget, DL, InterSubVT, AlignedExtract, SubVec,
+ SlideupAmt, Mask, VL);
+ }
// If required, insert this subvector back into the correct vector register.
// This should resolve to an INSERT_SUBREG instruction.
if (VecVT.bitsGT(InterSubVT))
- Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
- DAG.getConstant(AlignedIdx, DL, XLenVT));
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
+ DAG.getConstant(AlignedIdx, DL, XLenVT));
// We might have bitcast from a mask type: cast back to the original type if
// required.
- return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
+ return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
}
SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
@@ -6413,9 +8105,8 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
// Slide this vector register down by the desired number of elements in order
// to place the desired subvector starting at element 0.
- SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
- // For scalable vectors this must be further multiplied by vscale.
- SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
+ SDValue SlidedownAmt =
+ DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
auto [Mask, VL] = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
SDValue Slidedown =
@@ -6432,6 +8123,202 @@ SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
}
+// Widen a vector's operands to i8, then truncate its results back to the
+// original type, typically i1. All operand and result types must be the same.
+static SDValue widenVectorOpsToi8(SDValue N, const SDLoc &DL,
+ SelectionDAG &DAG) {
+ MVT VT = N.getSimpleValueType();
+ MVT WideVT = VT.changeVectorElementType(MVT::i8);
+ SmallVector<SDValue, 4> WideOps;
+ for (SDValue Op : N->ops()) {
+ assert(Op.getSimpleValueType() == VT &&
+ "Operands and result must be same type");
+ WideOps.push_back(DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Op));
+ }
+
+ unsigned NumVals = N->getNumValues();
+
+ SDVTList VTs = DAG.getVTList(SmallVector<EVT, 4>(
+ NumVals, N.getValueType().changeVectorElementType(MVT::i8)));
+ SDValue WideN = DAG.getNode(N.getOpcode(), DL, VTs, WideOps);
+ SmallVector<SDValue, 4> TruncVals;
+ for (unsigned I = 0; I < NumVals; I++) {
+ TruncVals.push_back(
+ DAG.getSetCC(DL, N->getSimpleValueType(I), WideN.getValue(I),
+ DAG.getConstant(0, DL, WideVT), ISD::SETNE));
+ }
+
+ if (TruncVals.size() > 1)
+ return DAG.getMergeValues(TruncVals, DL);
+ return TruncVals.front();
+}
+
+SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT VecVT = Op.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ assert(VecVT.isScalableVector() &&
+ "vector_interleave on non-scalable vector!");
+
+ // 1 bit element vectors need to be widened to e8
+ if (VecVT.getVectorElementType() == MVT::i1)
+ return widenVectorOpsToi8(Op, DL, DAG);
+
+ // If the VT is LMUL=8, we need to split and reassemble.
+ if (VecVT.getSizeInBits().getKnownMinValue() ==
+ (8 * RISCV::RVVBitsPerBlock)) {
+ auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
+ auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
+ EVT SplitVT = Op0Lo.getValueType();
+
+ SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
+ DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op0Hi);
+ SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL,
+ DAG.getVTList(SplitVT, SplitVT), Op1Lo, Op1Hi);
+
+ SDValue Even = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
+ ResLo.getValue(0), ResHi.getValue(0));
+ SDValue Odd = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, ResLo.getValue(1),
+ ResHi.getValue(1));
+ return DAG.getMergeValues({Even, Odd}, DL);
+ }
+
+ // Concatenate the two vectors as one vector to deinterleave
+ MVT ConcatVT =
+ MVT::getVectorVT(VecVT.getVectorElementType(),
+ VecVT.getVectorElementCount().multiplyCoefficientBy(2));
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
+ Op.getOperand(0), Op.getOperand(1));
+
+ // We want to operate on all lanes, so get the mask and VL and mask for it
+ auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
+ SDValue Passthru = DAG.getUNDEF(ConcatVT);
+
+ // We can deinterleave through vnsrl.wi if the element type is smaller than
+ // ELEN
+ if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) {
+ SDValue Even =
+ getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, Subtarget, DAG);
+ SDValue Odd =
+ getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, Subtarget, DAG);
+ return DAG.getMergeValues({Even, Odd}, DL);
+ }
+
+ // For the indices, use the same SEW to avoid an extra vsetvli
+ MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
+ // Create a vector of even indices {0, 2, 4, ...}
+ SDValue EvenIdx =
+ DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
+ // Create a vector of odd indices {1, 3, 5, ... }
+ SDValue OddIdx =
+ DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));
+
+ // Gather the even and odd elements into two separate vectors
+ SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
+ Concat, EvenIdx, Passthru, Mask, VL);
+ SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
+ Concat, OddIdx, Passthru, Mask, VL);
+
+ // Extract the result half of the gather for even and odd
+ SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
+ DAG.getConstant(0, DL, XLenVT));
+ SDValue Odd = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, OddWide,
+ DAG.getConstant(0, DL, XLenVT));
+
+ return DAG.getMergeValues({Even, Odd}, DL);
+}
+
+SDValue RISCVTargetLowering::lowerVECTOR_INTERLEAVE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT VecVT = Op.getSimpleValueType();
+
+ assert(VecVT.isScalableVector() &&
+ "vector_interleave on non-scalable vector!");
+
+ // i1 vectors need to be widened to i8
+ if (VecVT.getVectorElementType() == MVT::i1)
+ return widenVectorOpsToi8(Op, DL, DAG);
+
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDValue VL = DAG.getRegister(RISCV::X0, XLenVT);
+
+ // If the VT is LMUL=8, we need to split and reassemble.
+ if (VecVT.getSizeInBits().getKnownMinValue() == (8 * RISCV::RVVBitsPerBlock)) {
+ auto [Op0Lo, Op0Hi] = DAG.SplitVectorOperand(Op.getNode(), 0);
+ auto [Op1Lo, Op1Hi] = DAG.SplitVectorOperand(Op.getNode(), 1);
+ EVT SplitVT = Op0Lo.getValueType();
+
+ SDValue ResLo = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
+ DAG.getVTList(SplitVT, SplitVT), Op0Lo, Op1Lo);
+ SDValue ResHi = DAG.getNode(ISD::VECTOR_INTERLEAVE, DL,
+ DAG.getVTList(SplitVT, SplitVT), Op0Hi, Op1Hi);
+
+ SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
+ ResLo.getValue(0), ResLo.getValue(1));
+ SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT,
+ ResHi.getValue(0), ResHi.getValue(1));
+ return DAG.getMergeValues({Lo, Hi}, DL);
+ }
+
+ SDValue Interleaved;
+
+ // If the element type is smaller than ELEN, then we can interleave with
+ // vwaddu.vv and vwmaccu.vx
+ if (VecVT.getScalarSizeInBits() < Subtarget.getELEN()) {
+ Interleaved = getWideningInterleave(Op.getOperand(0), Op.getOperand(1), DL,
+ DAG, Subtarget);
+ } else {
+ // Otherwise, fallback to using vrgathere16.vv
+ MVT ConcatVT =
+ MVT::getVectorVT(VecVT.getVectorElementType(),
+ VecVT.getVectorElementCount().multiplyCoefficientBy(2));
+ SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
+ Op.getOperand(0), Op.getOperand(1));
+
+ MVT IdxVT = ConcatVT.changeVectorElementType(MVT::i16);
+
+ // 0 1 2 3 4 5 6 7 ...
+ SDValue StepVec = DAG.getStepVector(DL, IdxVT);
+
+ // 1 1 1 1 1 1 1 1 ...
+ SDValue Ones = DAG.getSplatVector(IdxVT, DL, DAG.getConstant(1, DL, XLenVT));
+
+ // 1 0 1 0 1 0 1 0 ...
+ SDValue OddMask = DAG.getNode(ISD::AND, DL, IdxVT, StepVec, Ones);
+ OddMask = DAG.getSetCC(
+ DL, IdxVT.changeVectorElementType(MVT::i1), OddMask,
+ DAG.getSplatVector(IdxVT, DL, DAG.getConstant(0, DL, XLenVT)),
+ ISD::CondCode::SETNE);
+
+ SDValue VLMax = DAG.getSplatVector(IdxVT, DL, computeVLMax(VecVT, DL, DAG));
+
+ // Build up the index vector for interleaving the concatenated vector
+ // 0 0 1 1 2 2 3 3 ...
+ SDValue Idx = DAG.getNode(ISD::SRL, DL, IdxVT, StepVec, Ones);
+ // 0 n 1 n+1 2 n+2 3 n+3 ...
+ Idx =
+ DAG.getNode(RISCVISD::ADD_VL, DL, IdxVT, Idx, VLMax, Idx, OddMask, VL);
+
+ // Then perform the interleave
+ // v[0] v[n] v[1] v[n+1] v[2] v[n+2] v[3] v[n+3] ...
+ SDValue TrueMask = getAllOnesMask(IdxVT, VL, DL, DAG);
+ Interleaved = DAG.getNode(RISCVISD::VRGATHEREI16_VV_VL, DL, ConcatVT,
+ Concat, Idx, DAG.getUNDEF(ConcatVT), TrueMask, VL);
+ }
+
+ // Extract the two halves from the interleaved result
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
+ DAG.getVectorIdxConstant(0, DL));
+ SDValue Hi = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, VecVT, Interleaved,
+ DAG.getVectorIdxConstant(VecVT.getVectorMinNumElements(), DL));
+
+ return DAG.getMergeValues({Lo, Hi}, DL);
+}
+
// Lower step_vector to the vid instruction. Any non-identity step value must
// be accounted for my manual expansion.
SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
@@ -6516,11 +8403,9 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
// Calculate VLMAX-1 for the desired SEW.
- unsigned MinElts = VecVT.getVectorMinNumElements();
- SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
- getVLOp(MinElts, DL, DAG, Subtarget));
- SDValue VLMinus1 =
- DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
+ SDValue VLMinus1 = DAG.getNode(ISD::SUB, DL, XLenVT,
+ computeVLMax(VecVT, DL, DAG),
+ DAG.getConstant(1, DL, XLenVT));
// Splat VLMAX-1 taking care to handle SEW==64 on RV32.
bool IsRV32E64 =
@@ -6548,9 +8433,7 @@ SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
MVT XLenVT = Subtarget.getXLenVT();
MVT VecVT = Op.getSimpleValueType();
- unsigned MinElts = VecVT.getVectorMinNumElements();
- SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
- getVLOp(MinElts, DL, DAG, Subtarget));
+ SDValue VLMax = computeVLMax(VecVT, DL, DAG);
int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
SDValue DownOffset, UpOffset;
@@ -6792,29 +8675,92 @@ RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
}
-SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
- SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
+SDValue RISCVTargetLowering::lowerVectorStrictFSetcc(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned Opc = Op.getOpcode();
+ SDLoc DL(Op);
+ SDValue Chain = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ SDValue CC = Op.getOperand(3);
+ ISD::CondCode CCVal = cast<CondCodeSDNode>(CC)->get();
MVT VT = Op.getSimpleValueType();
+ MVT InVT = Op1.getSimpleValueType();
+
+ // RVV VMFEQ/VMFNE ignores qNan, so we expand strict_fsetccs with OEQ/UNE
+ // condition code.
+ if (Opc == ISD::STRICT_FSETCCS) {
+ // Expand strict_fsetccs(x, oeq) to
+ // (and strict_fsetccs(x, y, oge), strict_fsetccs(x, y, ole))
+ SDVTList VTList = Op->getVTList();
+ if (CCVal == ISD::SETEQ || CCVal == ISD::SETOEQ) {
+ SDValue OLECCVal = DAG.getCondCode(ISD::SETOLE);
+ SDValue Tmp1 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
+ Op2, OLECCVal);
+ SDValue Tmp2 = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op2,
+ Op1, OLECCVal);
+ SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ Tmp1.getValue(1), Tmp2.getValue(1));
+ // Tmp1 and Tmp2 might be the same node.
+ if (Tmp1 != Tmp2)
+ Tmp1 = DAG.getNode(ISD::AND, DL, VT, Tmp1, Tmp2);
+ return DAG.getMergeValues({Tmp1, OutChain}, DL);
+ }
- if (VT.getVectorElementType() == MVT::i1)
- return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMergeOp*/ false,
- /*HasMask*/ false);
-
- return lowerToScalableOp(Op, DAG, VecOpc, /*HasMergeOp*/ true);
-}
+ // Expand (strict_fsetccs x, y, une) to (not (strict_fsetccs x, y, oeq))
+ if (CCVal == ISD::SETNE || CCVal == ISD::SETUNE) {
+ SDValue OEQCCVal = DAG.getCondCode(ISD::SETOEQ);
+ SDValue OEQ = DAG.getNode(ISD::STRICT_FSETCCS, DL, VTList, Chain, Op1,
+ Op2, OEQCCVal);
+ SDValue Res = DAG.getNOT(DL, OEQ, VT);
+ return DAG.getMergeValues({Res, OEQ.getValue(1)}, DL);
+ }
+ }
-SDValue
-RISCVTargetLowering::lowerFixedLengthVectorShiftToRVV(SDValue Op,
- SelectionDAG &DAG) const {
- unsigned Opc;
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Unexpected opcode!");
- case ISD::SHL: Opc = RISCVISD::SHL_VL; break;
- case ISD::SRA: Opc = RISCVISD::SRA_VL; break;
- case ISD::SRL: Opc = RISCVISD::SRL_VL; break;
+ MVT ContainerInVT = InVT;
+ if (InVT.isFixedLengthVector()) {
+ ContainerInVT = getContainerForFixedLengthVector(InVT);
+ Op1 = convertToScalableVector(ContainerInVT, Op1, DAG, Subtarget);
+ Op2 = convertToScalableVector(ContainerInVT, Op2, DAG, Subtarget);
+ }
+ MVT MaskVT = getMaskTypeFor(ContainerInVT);
+
+ auto [Mask, VL] = getDefaultVLOps(InVT, ContainerInVT, DL, DAG, Subtarget);
+
+ SDValue Res;
+ if (Opc == ISD::STRICT_FSETCC &&
+ (CCVal == ISD::SETLT || CCVal == ISD::SETOLT || CCVal == ISD::SETLE ||
+ CCVal == ISD::SETOLE)) {
+ // VMFLT/VMFLE/VMFGT/VMFGE raise exception for qNan. Generate a mask to only
+ // active when both input elements are ordered.
+ SDValue True = getAllOnesMask(ContainerInVT, VL, DL, DAG);
+ SDValue OrderMask1 = DAG.getNode(
+ RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
+ {Chain, Op1, Op1, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
+ True, VL});
+ SDValue OrderMask2 = DAG.getNode(
+ RISCVISD::STRICT_FSETCC_VL, DL, DAG.getVTList(MaskVT, MVT::Other),
+ {Chain, Op2, Op2, DAG.getCondCode(ISD::SETOEQ), DAG.getUNDEF(MaskVT),
+ True, VL});
+ Mask =
+ DAG.getNode(RISCVISD::VMAND_VL, DL, MaskVT, OrderMask1, OrderMask2, VL);
+ // Use Mask as the merge operand to let the result be 0 if either of the
+ // inputs is unordered.
+ Res = DAG.getNode(RISCVISD::STRICT_FSETCCS_VL, DL,
+ DAG.getVTList(MaskVT, MVT::Other),
+ {Chain, Op1, Op2, CC, Mask, Mask, VL});
+ } else {
+ unsigned RVVOpc = Opc == ISD::STRICT_FSETCC ? RISCVISD::STRICT_FSETCC_VL
+ : RISCVISD::STRICT_FSETCCS_VL;
+ Res = DAG.getNode(RVVOpc, DL, DAG.getVTList(MaskVT, MVT::Other),
+ {Chain, Op1, Op2, CC, DAG.getUNDEF(MaskVT), Mask, VL});
}
- return lowerToScalableOp(Op, DAG, Opc, /*HasMergeOp*/ true);
+ if (VT.isFixedLengthVector()) {
+ SDValue SubVec = convertFromScalableVector(VT, Res, DAG, Subtarget);
+ return DAG.getMergeValues({SubVec, Res.getValue(1)}, DL);
+ }
+ return Res;
}
// Lower vector ABS to smax(X, sub(0, X)).
@@ -6835,6 +8781,9 @@ SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
SDValue Mask, VL;
if (Op->getOpcode() == ISD::VP_ABS) {
Mask = Op->getOperand(1);
+ if (VT.isFixedLengthVector())
+ Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
+ Subtarget);
VL = Op->getOperand(2);
} else
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
@@ -6897,9 +8846,12 @@ SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
return convertFromScalableVector(VT, Select, DAG, Subtarget);
}
-SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
- unsigned NewOpc, bool HasMergeOp,
- bool HasMask) const {
+SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned NewOpc = getRISCVVLOp(Op);
+ bool HasMergeOp = hasMergeOp(NewOpc);
+ bool HasMask = hasMaskOp(NewOpc);
+
MVT VT = Op.getSimpleValueType();
MVT ContainerVT = getContainerForFixedLengthVector(VT);
@@ -6928,6 +8880,16 @@ SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
Ops.push_back(Mask);
Ops.push_back(VL);
+ // StrictFP operations have two result values. Their lowered result should
+ // have same result count.
+ if (Op->isStrictFPOpcode()) {
+ SDValue ScalableRes =
+ DAG.getNode(NewOpc, DL, DAG.getVTList(ContainerVT, MVT::Other), Ops,
+ Op->getFlags());
+ SDValue SubVec = convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
+ return DAG.getMergeValues({SubVec, ScalableRes.getValue(1)}, DL);
+ }
+
SDValue ScalableRes =
DAG.getNode(NewOpc, DL, ContainerVT, Ops, Op->getFlags());
return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
@@ -7534,8 +9496,8 @@ SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
- // Encoding used for rounding mode in RISCV differs from that used in
- // FLT_ROUNDS. To convert it the RISCV rounding mode is used as an index in a
+ // Encoding used for rounding mode in RISC-V differs from that used in
+ // FLT_ROUNDS. To convert it the RISC-V rounding mode is used as an index in a
// table, which consists of a sequence of 4-bit fields, each representing
// corresponding FLT_ROUNDS mode.
static const int Table =
@@ -7564,10 +9526,10 @@ SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
SDValue SysRegNo = DAG.getTargetConstant(
RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
- // Encoding used for rounding mode in RISCV differs from that used in
+ // Encoding used for rounding mode in RISC-V differs from that used in
// FLT_ROUNDS. To convert it the C rounding mode is used as an index in
// a table, which consists of a sequence of 4-bit fields, each representing
- // corresponding RISCV mode.
+ // corresponding RISC-V mode.
static const unsigned Table =
(RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
(RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
@@ -7673,7 +9635,8 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
if (IsStrict) {
SDValue Chain = N->getOperand(0);
// In absense of Zfh, promote f16 to f32, then convert.
- if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh()) {
+ if (Op0.getValueType() == MVT::f16 &&
+ !Subtarget.hasStdExtZfhOrZhinx()) {
Op0 = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
{Chain, Op0});
Chain = Op0.getValue(1);
@@ -7689,7 +9652,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
// In absense of Zfh, promote f16 to f32, then convert.
- if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
+ if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
@@ -7720,6 +9683,37 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Chain);
break;
}
+ case ISD::LROUND: {
+ SDValue Op0 = N->getOperand(0);
+ EVT Op0VT = Op0.getValueType();
+ if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
+ TargetLowering::TypeSoftenFloat) {
+ if (!isTypeLegal(Op0VT))
+ return;
+
+ // In absense of Zfh, promote f16 to f32, then convert.
+ if (Op0.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfhOrZhinx())
+ Op0 = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Op0);
+
+ SDValue Res =
+ DAG.getNode(RISCVISD::FCVT_W_RV64, DL, MVT::i64, Op0,
+ DAG.getTargetConstant(RISCVFPRndMode::RMM, DL, MVT::i64));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ return;
+ }
+ // If the FP type needs to be softened, emit a library call to lround. We'll
+ // need to truncate the result. We assume any value that doesn't fit in i32
+ // is allowed to return an unspecified value.
+ RTLIB::Libcall LC =
+ Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
+ MakeLibCallOptions CallOptions;
+ EVT OpVT = Op0.getValueType();
+ CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
+ SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
+ Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
+ Results.push_back(Result);
+ break;
+ }
case ISD::READCYCLECOUNTER: {
assert(!Subtarget.is64Bit() &&
"READCYCLECOUNTER only has custom type legalization on riscv32");
@@ -7828,6 +9822,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::ROTR:
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
+ assert((Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb() ||
+ Subtarget.hasVendorXTHeadBb()) &&
+ "Unexpected custom legalization");
+ if (!isa<ConstantSDNode>(N->getOperand(1)) &&
+ !(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb()))
+ return;
Results.push_back(customLegalizeToWOp(N, DAG));
break;
case ISD::CTTZ:
@@ -7871,6 +9871,39 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
break;
}
+ case ISD::SADDO: {
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+
+ // If the RHS is a constant, we can simplify ConditionRHS below. Otherwise
+ // use the default legalization.
+ if (!isa<ConstantSDNode>(N->getOperand(1)))
+ return;
+
+ SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
+ SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue Res = DAG.getNode(ISD::ADD, DL, MVT::i64, LHS, RHS);
+ Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
+ DAG.getValueType(MVT::i32));
+
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
+
+ // For an addition, the result should be less than one of the operands (LHS)
+ // if and only if the other operand (RHS) is negative, otherwise there will
+ // be overflow.
+ // For a subtraction, the result should be less than one of the operands
+ // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
+ // otherwise there will be overflow.
+ EVT OType = N->getValueType(1);
+ SDValue ResultLowerThanLHS = DAG.getSetCC(DL, OType, Res, LHS, ISD::SETLT);
+ SDValue ConditionRHS = DAG.getSetCC(DL, OType, RHS, Zero, ISD::SETLT);
+
+ SDValue Overflow =
+ DAG.getNode(ISD::XOR, DL, OType, ConditionRHS, ResultLowerThanLHS);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ Results.push_back(Overflow);
+ return;
+ }
case ISD::UADDO:
case ISD::USUBO: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
@@ -7893,6 +9926,10 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
// no compare with constant and branch instructions.
Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
+ } else if (IsAdd && isAllOnesConstant(RHS)) {
+ // Special case uaddo X, -1 overflowed if X != 0.
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), N->getOperand(0),
+ DAG.getConstant(0, DL, MVT::i32), ISD::SETNE);
} else {
// Sign extend the LHS and perform an unsigned compare with the ADDW
// result. Since the inputs are sign extended from i32, this is equivalent
@@ -7972,14 +10009,25 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
EVT Op0VT = Op0.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
if (VT == MVT::i16 && Op0VT == MVT::f16 &&
- Subtarget.hasStdExtZfhOrZfhmin()) {
+ Subtarget.hasStdExtZfhOrZfhminOrZhinxOrZhinxmin()) {
+ SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
+ } else if (VT == MVT::i16 && Op0VT == MVT::bf16 &&
+ Subtarget.hasStdExtZfbfmin()) {
SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
} else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
- Subtarget.hasStdExtF()) {
+ Subtarget.hasStdExtFOrZfinx()) {
SDValue FPConv =
DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
+ } else if (VT == MVT::i64 && Op0VT == MVT::f64 && XLenVT == MVT::i32 &&
+ Subtarget.hasStdExtZfa()) {
+ SDValue NewReg = DAG.getNode(RISCVISD::SplitF64, DL,
+ DAG.getVTList(MVT::i32, MVT::i32), Op0);
+ SDValue RetReg = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64,
+ NewReg.getValue(0), NewReg.getValue(1));
+ Results.push_back(RetReg);
} else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
isTypeLegal(Op0VT)) {
// Custom-legalize bitcasts from fixed-length vector types to illegal
@@ -8072,10 +10120,91 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
default:
llvm_unreachable(
"Don't know how to custom type legalize this intrinsic!");
- case Intrinsic::riscv_orc_b: {
+ case Intrinsic::experimental_get_vector_length: {
+ SDValue Res = lowerGetVectorLength(N, DAG, Subtarget);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ return;
+ }
+ case Intrinsic::riscv_orc_b:
+ case Intrinsic::riscv_brev8:
+ case Intrinsic::riscv_sha256sig0:
+ case Intrinsic::riscv_sha256sig1:
+ case Intrinsic::riscv_sha256sum0:
+ case Intrinsic::riscv_sha256sum1:
+ case Intrinsic::riscv_sm3p0:
+ case Intrinsic::riscv_sm3p1: {
+ if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
+ return;
+ unsigned Opc;
+ switch (IntNo) {
+ case Intrinsic::riscv_orc_b: Opc = RISCVISD::ORC_B; break;
+ case Intrinsic::riscv_brev8: Opc = RISCVISD::BREV8; break;
+ case Intrinsic::riscv_sha256sig0: Opc = RISCVISD::SHA256SIG0; break;
+ case Intrinsic::riscv_sha256sig1: Opc = RISCVISD::SHA256SIG1; break;
+ case Intrinsic::riscv_sha256sum0: Opc = RISCVISD::SHA256SUM0; break;
+ case Intrinsic::riscv_sha256sum1: Opc = RISCVISD::SHA256SUM1; break;
+ case Intrinsic::riscv_sm3p0: Opc = RISCVISD::SM3P0; break;
+ case Intrinsic::riscv_sm3p1: Opc = RISCVISD::SM3P1; break;
+ }
+
SDValue NewOp =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- SDValue Res = DAG.getNode(RISCVISD::ORC_B, DL, MVT::i64, NewOp);
+ SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ return;
+ }
+ case Intrinsic::riscv_sm4ks:
+ case Intrinsic::riscv_sm4ed: {
+ unsigned Opc =
+ IntNo == Intrinsic::riscv_sm4ks ? RISCVISD::SM4KS : RISCVISD::SM4ED;
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
+ SDValue Res =
+ DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, N->getOperand(3));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ return;
+ }
+ case Intrinsic::riscv_clmul: {
+ if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
+ return;
+
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
+ SDValue Res = DAG.getNode(RISCVISD::CLMUL, DL, MVT::i64, NewOp0, NewOp1);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ return;
+ }
+ case Intrinsic::riscv_clmulh:
+ case Intrinsic::riscv_clmulr: {
+ if (!Subtarget.is64Bit() || N->getValueType(0) != MVT::i32)
+ return;
+
+ // Extend inputs to XLen, and shift by 32. This will add 64 trailing zeros
+ // to the full 128-bit clmul result of multiplying two xlen values.
+ // Perform clmulr or clmulh on the shifted values. Finally, extract the
+ // upper 32 bits.
+ //
+ // The alternative is to mask the inputs to 32 bits and use clmul, but
+ // that requires two shifts to mask each input without zext.w.
+ // FIXME: If the inputs are known zero extended or could be freely
+ // zero extended, the mask form would be better.
+ SDValue NewOp0 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
+ NewOp0 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp0,
+ DAG.getConstant(32, DL, MVT::i64));
+ NewOp1 = DAG.getNode(ISD::SHL, DL, MVT::i64, NewOp1,
+ DAG.getConstant(32, DL, MVT::i64));
+ unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? RISCVISD::CLMULH
+ : RISCVISD::CLMULR;
+ SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1);
+ Res = DAG.getNode(ISD::SRL, DL, MVT::i64, Res,
+ DAG.getConstant(32, DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
@@ -8203,13 +10332,14 @@ static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
SDValue Extract = N->getOperand(ReduceIdx);
SDValue Reduce = Extract.getOperand(0);
- if (!Reduce.hasOneUse())
+ if (!Extract.hasOneUse() || !Reduce.hasOneUse())
return SDValue();
SDValue ScalarV = Reduce.getOperand(2);
EVT ScalarVT = ScalarV.getValueType();
if (ScalarV.getOpcode() == ISD::INSERT_SUBVECTOR &&
- ScalarV.getOperand(0)->isUndef())
+ ScalarV.getOperand(0)->isUndef() &&
+ isNullConstant(ScalarV.getOperand(2)))
ScalarV = ScalarV.getOperand(1);
// Make sure that ScalarV is a splat with VL=1.
@@ -8218,7 +10348,7 @@ static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
return SDValue();
- if (!hasNonZeroAVL(ScalarV.getOperand(2)))
+ if (!isNonZeroAVL(ScalarV.getOperand(2)))
return SDValue();
// Check the scalar of ScalarV is neutral element
@@ -8227,7 +10357,9 @@ static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
0))
return SDValue();
- if (!ScalarV.hasOneUse())
+ // If the AVL is zero, operand 0 will be returned. So it's not safe to fold.
+ // FIXME: We might be able to improve this if operand 0 is undef.
+ if (!isNonZeroAVL(Reduce.getOperand(5)))
return SDValue();
SDValue NewStart = N->getOperand(1 - ReduceIdx);
@@ -8243,10 +10375,11 @@ static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG,
DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ScalarVT, DAG.getUNDEF(ScalarVT),
NewScalarV, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+ SDValue Ops[] = {Reduce.getOperand(0), Reduce.getOperand(1),
+ NewScalarV, Reduce.getOperand(3),
+ Reduce.getOperand(4), Reduce.getOperand(5)};
SDValue NewReduce =
- DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(),
- Reduce.getOperand(0), Reduce.getOperand(1), NewScalarV,
- Reduce.getOperand(3), Reduce.getOperand(4));
+ DAG.getNode(Reduce.getOpcode(), DL, Reduce.getValueType(), Ops);
return DAG.getNode(Extract.getOpcode(), DL, Extract.getValueType(), NewReduce,
Extract.getOperand(1));
}
@@ -8438,8 +10571,31 @@ static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, DL, VT, New1, DAG.getConstant(CB, DL, VT));
}
+// Try to turn (add (xor (setcc X, Y), 1) -1) into (neg (setcc X, Y)).
+static SDValue combineAddOfBooleanXor(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ // RHS should be -1.
+ if (!isAllOnesConstant(N1))
+ return SDValue();
+
+ // Look for an (xor (setcc X, Y), 1).
+ if (N0.getOpcode() != ISD::XOR || !isOneConstant(N0.getOperand(1)) ||
+ N0.getOperand(0).getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ // Emit a negate of the setcc.
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ N0.getOperand(0));
+}
+
static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
+ if (SDValue V = combineAddOfBooleanXor(N, DAG))
+ return V;
if (SDValue V = transformAddImmMulImm(N, DAG, Subtarget))
return V;
if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
@@ -8497,10 +10653,23 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineSubOfBoolean(N, DAG))
return V;
- // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
- // (select lhs, rhs, cc, x, (sub x, y))
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
+ // fold (sub 0, (setcc x, 0, setlt)) -> (sra x, xlen - 1)
+ if (isNullConstant(N0) && N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
+ isNullConstant(N1.getOperand(1))) {
+ ISD::CondCode CCVal = cast<CondCodeSDNode>(N1.getOperand(2))->get();
+ if (CCVal == ISD::SETLT) {
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+ unsigned ShAmt = N0.getValueSizeInBits() - 1;
+ return DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0),
+ DAG.getConstant(ShAmt, DL, VT));
+ }
+ }
+
+ // fold (sub x, (select lhs, rhs, cc, 0, y)) ->
+ // (select lhs, rhs, cc, x, (sub x, y))
return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false, Subtarget);
}
@@ -8574,228 +10743,9 @@ static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-namespace {
-// Helper class contains information about comparison operation.
-// The first two operands of this operation are compared values and the
-// last one is the operation.
-// Compared values are stored in Ops.
-// Comparison operation is stored in CCode.
-class CmpOpInfo {
- static unsigned constexpr Size = 2u;
-
- // Type for storing operands of compare operation.
- using OpsArray = std::array<SDValue, Size>;
- OpsArray Ops;
-
- using const_iterator = OpsArray::const_iterator;
- const_iterator begin() const { return Ops.begin(); }
- const_iterator end() const { return Ops.end(); }
-
- ISD::CondCode CCode;
-
- unsigned CommonPos{Size};
- unsigned DifferPos{Size};
-
- // Sets CommonPos and DifferPos based on incoming position
- // of common operand CPos.
- void setPositions(const_iterator CPos) {
- assert(CPos != Ops.end() && "Common operand has to be in OpsArray.\n");
- CommonPos = CPos == Ops.begin() ? 0 : 1;
- DifferPos = 1 - CommonPos;
- assert((DifferPos == 0 || DifferPos == 1) &&
- "Positions can be only 0 or 1.");
- }
-
- // Private constructor of comparison info based on comparison operator.
- // It is private because CmpOpInfo only reasonable relative to other
- // comparison operator. Therefore, infos about comparison operation
- // have to be collected simultaneously via CmpOpInfo::getInfoAbout().
- CmpOpInfo(const SDValue &CmpOp)
- : Ops{CmpOp.getOperand(0), CmpOp.getOperand(1)},
- CCode{cast<CondCodeSDNode>(CmpOp.getOperand(2))->get()} {}
-
- // Finds common operand of Op1 and Op2 and finishes filling CmpOpInfos.
- // Returns true if common operand is found. Otherwise - false.
- static bool establishCorrespondence(CmpOpInfo &Op1, CmpOpInfo &Op2) {
- const auto CommonOpIt1 =
- std::find_first_of(Op1.begin(), Op1.end(), Op2.begin(), Op2.end());
- if (CommonOpIt1 == Op1.end())
- return false;
-
- const auto CommonOpIt2 = std::find(Op2.begin(), Op2.end(), *CommonOpIt1);
- assert(CommonOpIt2 != Op2.end() &&
- "Cannot find common operand in the second comparison operation.");
-
- Op1.setPositions(CommonOpIt1);
- Op2.setPositions(CommonOpIt2);
-
- return true;
- }
-
-public:
- CmpOpInfo(const CmpOpInfo &) = default;
- CmpOpInfo(CmpOpInfo &&) = default;
-
- SDValue const &operator[](unsigned Pos) const {
- assert(Pos < Size && "Out of range\n");
- return Ops[Pos];
- }
-
- // Creates infos about comparison operations CmpOp0 and CmpOp1.
- // If there is no common operand returns None. Otherwise, returns
- // correspondence info about comparison operations.
- static std::optional<std::pair<CmpOpInfo, CmpOpInfo>>
- getInfoAbout(SDValue const &CmpOp0, SDValue const &CmpOp1) {
- CmpOpInfo Op0{CmpOp0};
- CmpOpInfo Op1{CmpOp1};
- if (!establishCorrespondence(Op0, Op1))
- return std::nullopt;
- return std::make_pair(Op0, Op1);
- }
-
- // Returns position of common operand.
- unsigned getCPos() const { return CommonPos; }
-
- // Returns position of differ operand.
- unsigned getDPos() const { return DifferPos; }
-
- // Returns common operand.
- SDValue const &getCOp() const { return operator[](CommonPos); }
-
- // Returns differ operand.
- SDValue const &getDOp() const { return operator[](DifferPos); }
-
- // Returns consition code of comparison operation.
- ISD::CondCode getCondCode() const { return CCode; }
-};
-} // namespace
-
-// Verifies conditions to apply an optimization.
-// Returns Reference comparison code and three operands A, B, C.
-// Conditions for optimization:
-// One operand of the compasions has to be common.
-// This operand is written to C.
-// Two others operands are differend. They are written to A and B.
-// Comparisons has to be similar with respect to common operand C.
-// e.g. A < C; C > B are similar
-// but A < C; B > C are not.
-// Reference comparison code is the comparison code if
-// common operand is right placed.
-// e.g. C > A will be swapped to A < C.
-static std::optional<std::tuple<ISD::CondCode, SDValue, SDValue, SDValue>>
-verifyCompareConds(SDNode *N, SelectionDAG &DAG) {
- LLVM_DEBUG(
- dbgs() << "Checking conditions for comparison operation combining.\n";);
-
- SDValue V0 = N->getOperand(0);
- SDValue V1 = N->getOperand(1);
- assert(V0.getValueType() == V1.getValueType() &&
- "Operations must have the same value type.");
-
- // Condition 1. Operations have to be used only in logic operation.
- if (!V0.hasOneUse() || !V1.hasOneUse())
- return std::nullopt;
-
- // Condition 2. Operands have to be comparison operations.
- if (V0.getOpcode() != ISD::SETCC || V1.getOpcode() != ISD::SETCC)
- return std::nullopt;
-
- // Condition 3.1. Operations only with integers.
- if (!V0.getOperand(0).getValueType().isInteger())
- return std::nullopt;
-
- const auto ComparisonInfo = CmpOpInfo::getInfoAbout(V0, V1);
- // Condition 3.2. Common operand has to be in comparison.
- if (!ComparisonInfo)
- return std::nullopt;
-
- const auto [Op0, Op1] = ComparisonInfo.value();
-
- LLVM_DEBUG(dbgs() << "Shared operands are on positions: " << Op0.getCPos()
- << " and " << Op1.getCPos() << '\n';);
- // If common operand at the first position then swap operation to convert to
- // strict pattern. Common operand has to be right hand side.
- ISD::CondCode RefCond = Op0.getCondCode();
- ISD::CondCode AssistCode = Op1.getCondCode();
- if (!Op0.getCPos())
- RefCond = ISD::getSetCCSwappedOperands(RefCond);
- if (!Op1.getCPos())
- AssistCode = ISD::getSetCCSwappedOperands(AssistCode);
- LLVM_DEBUG(dbgs() << "Reference condition is: " << RefCond << '\n';);
- // If there are different comparison operations then do not perform an
- // optimization. a < c; c < b -> will be changed to b > c.
- if (RefCond != AssistCode)
- return std::nullopt;
-
- // Conditions can be only similar to Less or Greater. (>, >=, <, <=)
- // Applying this mask to the operation will determine Less and Greater
- // operations.
- const unsigned CmpMask = 0b110;
- const unsigned MaskedOpcode = CmpMask & RefCond;
- // If masking gave 0b110, then this is an operation NE, O or TRUE.
- if (MaskedOpcode == CmpMask)
- return std::nullopt;
- // If masking gave 00000, then this is an operation E, O or FALSE.
- if (MaskedOpcode == 0)
- return std::nullopt;
- // Everything else is similar to Less or Greater.
-
- SDValue A = Op0.getDOp();
- SDValue B = Op1.getDOp();
- SDValue C = Op0.getCOp();
-
- LLVM_DEBUG(
- dbgs() << "The conditions for combining comparisons are satisfied.\n";);
- return std::make_tuple(RefCond, A, B, C);
-}
-
-static ISD::NodeType getSelectionCode(bool IsUnsigned, bool IsAnd,
- bool IsGreaterOp) {
- // Codes of selection operation. The first index selects signed or unsigned,
- // the second index selects MIN/MAX.
- static constexpr ISD::NodeType SelectionCodes[2][2] = {
- {ISD::SMIN, ISD::SMAX}, {ISD::UMIN, ISD::UMAX}};
- const bool ChooseSelCode = IsAnd ^ IsGreaterOp;
- return SelectionCodes[IsUnsigned][ChooseSelCode];
-}
-
// Combines two comparison operation and logic operation to one selection
// operation(min, max) and logic operation. Returns new constructed Node if
// conditions for optimization are satisfied.
-static SDValue combineCmpOp(SDNode *N, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
- if (!Subtarget.hasStdExtZbb())
- return SDValue();
-
- const unsigned BitOpcode = N->getOpcode();
- assert((BitOpcode == ISD::AND || BitOpcode == ISD::OR) &&
- "This optimization can be used only with AND/OR operations");
-
- const auto Props = verifyCompareConds(N, DAG);
- // If conditions are invalidated then do not perform an optimization.
- if (!Props)
- return SDValue();
-
- const auto [RefOpcode, A, B, C] = Props.value();
- const EVT CmpOpVT = A.getValueType();
-
- const bool IsGreaterOp = RefOpcode & 0b10;
- const bool IsUnsigned = ISD::isUnsignedIntSetCC(RefOpcode);
- assert((IsUnsigned || ISD::isSignedIntSetCC(RefOpcode)) &&
- "Operation neither with signed or unsigned integers.");
-
- const bool IsAnd = BitOpcode == ISD::AND;
- const ISD::NodeType PickCode =
- getSelectionCode(IsUnsigned, IsAnd, IsGreaterOp);
-
- SDLoc DL(N);
- SDValue Pick = DAG.getNode(PickCode, DL, CmpOpVT, A, B);
- SDValue Cmp =
- DAG.getSetCC(DL, N->getOperand(0).getValueType(), Pick, C, RefOpcode);
-
- return Cmp;
-}
-
static SDValue performANDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
@@ -8820,9 +10770,6 @@ static SDValue performANDCombine(SDNode *N,
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
}
- if (SDValue V = combineCmpOp(N, DAG, Subtarget))
- return V;
-
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
@@ -8839,9 +10786,6 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
SelectionDAG &DAG = DCI.DAG;
- if (SDValue V = combineCmpOp(N, DAG, Subtarget))
- return V;
-
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
@@ -8870,6 +10814,20 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
}
+ // Fold (xor (setcc constant, y, setlt), 1) -> (setcc y, constant + 1, setlt)
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SETCC && isOneConstant(N1)) {
+ auto *ConstN00 = dyn_cast<ConstantSDNode>(N0.getOperand(0));
+ ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ if (ConstN00 && CC == ISD::SETLT) {
+ EVT VT = N0.getValueType();
+ SDLoc DL(N0);
+ const APInt &Imm = ConstN00->getAPIntValue();
+ if ((Imm + 1).isSignedIntN(12))
+ return DAG.getSetCC(DL, VT, N0.getOperand(1),
+ DAG.getConstant(Imm + 1, DL, VT), CC);
+ }
+ }
+
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
// fold (xor (select cond, 0, y), x) ->
@@ -8877,6 +10835,46 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false, Subtarget);
}
+// According to the property that indexed load/store instructions
+// zero-extended their indices, \p narrowIndex tries to narrow the type of index
+// operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C <
+// bits(ty).
+static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) {
+ if (N.getOpcode() != ISD::SHL || !N->hasOneUse())
+ return SDValue();
+
+ SDValue N0 = N.getOperand(0);
+ if (N0.getOpcode() != ISD::ZERO_EXTEND &&
+ N0.getOpcode() != RISCVISD::VZEXT_VL)
+ return SDValue();
+ if (!N0->hasOneUse())
+ return SDValue();
+
+ APInt ShAmt;
+ SDValue N1 = N.getOperand(1);
+ if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned SrcElen = SrcVT.getScalarSizeInBits();
+ unsigned ShAmtV = ShAmt.getZExtValue();
+ unsigned NewElen = PowerOf2Ceil(SrcElen + ShAmtV);
+ NewElen = std::max(NewElen, 8U);
+
+ // Skip if NewElen is not narrower than the original extended type.
+ if (NewElen >= N0.getValueType().getScalarSizeInBits())
+ return SDValue();
+
+ EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
+ EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
+
+ SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
+ SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
+ return DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
+}
+
// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
@@ -9010,7 +11008,7 @@ struct NodeExtensionHelper {
}
/// Get or create a value that can feed \p Root with the given extension \p
- /// SExt. If \p SExt is None, this returns the source of this operand.
+ /// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
/// \see ::getSource().
SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG,
std::optional<bool> SExt) const {
@@ -9543,6 +11541,147 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
return InputRootReplacement;
}
+// Helper function for performMemPairCombine.
+// Try to combine the memory loads/stores LSNode1 and LSNode2
+// into a single memory pair operation.
+static SDValue tryMemPairCombine(SelectionDAG &DAG, LSBaseSDNode *LSNode1,
+ LSBaseSDNode *LSNode2, SDValue BasePtr,
+ uint64_t Imm) {
+ SmallPtrSet<const SDNode *, 32> Visited;
+ SmallVector<const SDNode *, 8> Worklist = {LSNode1, LSNode2};
+
+ if (SDNode::hasPredecessorHelper(LSNode1, Visited, Worklist) ||
+ SDNode::hasPredecessorHelper(LSNode2, Visited, Worklist))
+ return SDValue();
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+
+ // The new operation has twice the width.
+ MVT XLenVT = Subtarget.getXLenVT();
+ EVT MemVT = LSNode1->getMemoryVT();
+ EVT NewMemVT = (MemVT == MVT::i32) ? MVT::i64 : MVT::i128;
+ MachineMemOperand *MMO = LSNode1->getMemOperand();
+ MachineMemOperand *NewMMO = MF.getMachineMemOperand(
+ MMO, MMO->getPointerInfo(), MemVT == MVT::i32 ? 8 : 16);
+
+ if (LSNode1->getOpcode() == ISD::LOAD) {
+ auto Ext = cast<LoadSDNode>(LSNode1)->getExtensionType();
+ unsigned Opcode;
+ if (MemVT == MVT::i32)
+ Opcode = (Ext == ISD::ZEXTLOAD) ? RISCVISD::TH_LWUD : RISCVISD::TH_LWD;
+ else
+ Opcode = RISCVISD::TH_LDD;
+
+ SDValue Res = DAG.getMemIntrinsicNode(
+ Opcode, SDLoc(LSNode1), DAG.getVTList({XLenVT, XLenVT, MVT::Other}),
+ {LSNode1->getChain(), BasePtr,
+ DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
+ NewMemVT, NewMMO);
+
+ SDValue Node1 =
+ DAG.getMergeValues({Res.getValue(0), Res.getValue(2)}, SDLoc(LSNode1));
+ SDValue Node2 =
+ DAG.getMergeValues({Res.getValue(1), Res.getValue(2)}, SDLoc(LSNode2));
+
+ DAG.ReplaceAllUsesWith(LSNode2, Node2.getNode());
+ return Node1;
+ } else {
+ unsigned Opcode = (MemVT == MVT::i32) ? RISCVISD::TH_SWD : RISCVISD::TH_SDD;
+
+ SDValue Res = DAG.getMemIntrinsicNode(
+ Opcode, SDLoc(LSNode1), DAG.getVTList(MVT::Other),
+ {LSNode1->getChain(), LSNode1->getOperand(1), LSNode2->getOperand(1),
+ BasePtr, DAG.getConstant(Imm, SDLoc(LSNode1), XLenVT)},
+ NewMemVT, NewMMO);
+
+ DAG.ReplaceAllUsesWith(LSNode2, Res.getNode());
+ return Res;
+ }
+}
+
+// Try to combine two adjacent loads/stores to a single pair instruction from
+// the XTHeadMemPair vendor extension.
+static SDValue performMemPairCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ SelectionDAG &DAG = DCI.DAG;
+ MachineFunction &MF = DAG.getMachineFunction();
+ const RISCVSubtarget &Subtarget = MF.getSubtarget<RISCVSubtarget>();
+
+ // Target does not support load/store pair.
+ if (!Subtarget.hasVendorXTHeadMemPair())
+ return SDValue();
+
+ LSBaseSDNode *LSNode1 = cast<LSBaseSDNode>(N);
+ EVT MemVT = LSNode1->getMemoryVT();
+ unsigned OpNum = LSNode1->getOpcode() == ISD::LOAD ? 1 : 2;
+
+ // No volatile, indexed or atomic loads/stores.
+ if (!LSNode1->isSimple() || LSNode1->isIndexed())
+ return SDValue();
+
+ // Function to get a base + constant representation from a memory value.
+ auto ExtractBaseAndOffset = [](SDValue Ptr) -> std::pair<SDValue, uint64_t> {
+ if (Ptr->getOpcode() == ISD::ADD)
+ if (auto *C1 = dyn_cast<ConstantSDNode>(Ptr->getOperand(1)))
+ return {Ptr->getOperand(0), C1->getZExtValue()};
+ return {Ptr, 0};
+ };
+
+ auto [Base1, Offset1] = ExtractBaseAndOffset(LSNode1->getOperand(OpNum));
+
+ SDValue Chain = N->getOperand(0);
+ for (SDNode::use_iterator UI = Chain->use_begin(), UE = Chain->use_end();
+ UI != UE; ++UI) {
+ SDUse &Use = UI.getUse();
+ if (Use.getUser() != N && Use.getResNo() == 0 &&
+ Use.getUser()->getOpcode() == N->getOpcode()) {
+ LSBaseSDNode *LSNode2 = cast<LSBaseSDNode>(Use.getUser());
+
+ // No volatile, indexed or atomic loads/stores.
+ if (!LSNode2->isSimple() || LSNode2->isIndexed())
+ continue;
+
+ // Check if LSNode1 and LSNode2 have the same type and extension.
+ if (LSNode1->getOpcode() == ISD::LOAD)
+ if (cast<LoadSDNode>(LSNode2)->getExtensionType() !=
+ cast<LoadSDNode>(LSNode1)->getExtensionType())
+ continue;
+
+ if (LSNode1->getMemoryVT() != LSNode2->getMemoryVT())
+ continue;
+
+ auto [Base2, Offset2] = ExtractBaseAndOffset(LSNode2->getOperand(OpNum));
+
+ // Check if the base pointer is the same for both instruction.
+ if (Base1 != Base2)
+ continue;
+
+ // Check if the offsets match the XTHeadMemPair encoding contraints.
+ bool Valid = false;
+ if (MemVT == MVT::i32) {
+ // Check for adjacent i32 values and a 2-bit index.
+ if ((Offset1 + 4 == Offset2) && isShiftedUInt<2, 3>(Offset1))
+ Valid = true;
+ } else if (MemVT == MVT::i64) {
+ // Check for adjacent i64 values and a 2-bit index.
+ if ((Offset1 + 8 == Offset2) && isShiftedUInt<2, 4>(Offset1))
+ Valid = true;
+ }
+
+ if (!Valid)
+ continue;
+
+ // Try to combine.
+ if (SDValue Res =
+ tryMemPairCombine(DAG, LSNode1, LSNode2, Base1, Offset1))
+ return Res;
+ }
+ }
+
+ return SDValue();
+}
+
// Fold
// (fp_to_int (froundeven X)) -> fcvt X, rne
// (fp_to_int (ftrunc X)) -> fcvt X, rtz
@@ -9558,6 +11697,10 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
SDValue Src = N->getOperand(0);
+ // Don't do this for strict-fp Src.
+ if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
+ return SDValue();
+
// Ensure the FP type is legal.
if (!TLI.isTypeLegal(Src.getValueType()))
return SDValue();
@@ -9657,6 +11800,10 @@ static SDValue performFP_TO_INT_SATCombine(SDNode *N,
SDValue Src = N->getOperand(0);
+ // Don't do this for strict-fp Src.
+ if (Src->isStrictFPOpcode() || Src->isTargetStrictFPOpcode())
+ return SDValue();
+
// Ensure the FP type is also legal.
if (!TLI.isTypeLegal(Src.getValueType()))
return SDValue();
@@ -9693,7 +11840,7 @@ static SDValue performFP_TO_INT_SATCombine(SDNode *N,
if (Opc == RISCVISD::FCVT_WU_RV64)
FpToInt = DAG.getZeroExtendInReg(FpToInt, DL, MVT::i32);
- // RISCV FP-to-int conversions saturate to the destination register size, but
+ // RISC-V FP-to-int conversions saturate to the destination register size, but
// don't produce 0 for nan.
SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
@@ -9711,7 +11858,7 @@ static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
- !isPowerOf2_32(VT.getSizeInBits()))
+ !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
return SDValue();
SDLoc DL(N);
@@ -9722,8 +11869,6 @@ static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
// multiply result and/or the accumulator.
// NOTE: Only supports RVV operations with VL.
static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
- assert((NegMul || NegAcc) && "Not negating anything?");
-
// Negating the multiply result changes ADD<->SUB and toggles 'N'.
if (NegMul) {
// clang-format off
@@ -9733,6 +11878,10 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
+ case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
+ case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
+ case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
+ case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
}
// clang-format on
}
@@ -9746,6 +11895,10 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
+ case RISCVISD::STRICT_VFMADD_VL: Opcode = RISCVISD::STRICT_VFMSUB_VL; break;
+ case RISCVISD::STRICT_VFMSUB_VL: Opcode = RISCVISD::STRICT_VFMADD_VL; break;
+ case RISCVISD::STRICT_VFNMADD_VL: Opcode = RISCVISD::STRICT_VFNMSUB_VL; break;
+ case RISCVISD::STRICT_VFNMSUB_VL: Opcode = RISCVISD::STRICT_VFNMADD_VL; break;
}
// clang-format on
}
@@ -9753,6 +11906,182 @@ static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
return Opcode;
}
+static SDValue combineVFMADD_VLWithVFNEG_VL(SDNode *N, SelectionDAG &DAG) {
+ // Fold FNEG_VL into FMA opcodes.
+ // The first operand of strict-fp is chain.
+ unsigned Offset = N->isTargetStrictFPOpcode();
+ SDValue A = N->getOperand(0 + Offset);
+ SDValue B = N->getOperand(1 + Offset);
+ SDValue C = N->getOperand(2 + Offset);
+ SDValue Mask = N->getOperand(3 + Offset);
+ SDValue VL = N->getOperand(4 + Offset);
+
+ auto invertIfNegative = [&Mask, &VL](SDValue &V) {
+ if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
+ V.getOperand(2) == VL) {
+ // Return the negated input.
+ V = V.getOperand(0);
+ return true;
+ }
+
+ return false;
+ };
+
+ bool NegA = invertIfNegative(A);
+ bool NegB = invertIfNegative(B);
+ bool NegC = invertIfNegative(C);
+
+ // If no operands are negated, we're done.
+ if (!NegA && !NegB && !NegC)
+ return SDValue();
+
+ unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
+ if (N->isTargetStrictFPOpcode())
+ return DAG.getNode(NewOpcode, SDLoc(N), N->getVTList(),
+ {N->getOperand(0), A, B, C, Mask, VL});
+ return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
+ VL);
+}
+
+static SDValue performVFMADD_VLCombine(SDNode *N, SelectionDAG &DAG) {
+ if (SDValue V = combineVFMADD_VLWithVFNEG_VL(N, DAG))
+ return V;
+
+ // FIXME: Ignore strict opcodes for now.
+ if (N->isTargetStrictFPOpcode())
+ return SDValue();
+
+ // Try to form widening FMA.
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Mask = N->getOperand(3);
+ SDValue VL = N->getOperand(4);
+
+ if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
+ Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
+ return SDValue();
+
+ // TODO: Refactor to handle more complex cases similar to
+ // combineBinOp_VLToVWBinOp_VL.
+ if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
+ (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
+ return SDValue();
+
+ // Check the mask and VL are the same.
+ if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
+ Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
+ return SDValue();
+
+ unsigned NewOpc;
+ switch (N->getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ case RISCVISD::VFMADD_VL:
+ NewOpc = RISCVISD::VFWMADD_VL;
+ break;
+ case RISCVISD::VFNMSUB_VL:
+ NewOpc = RISCVISD::VFWNMSUB_VL;
+ break;
+ case RISCVISD::VFNMADD_VL:
+ NewOpc = RISCVISD::VFWNMADD_VL;
+ break;
+ case RISCVISD::VFMSUB_VL:
+ NewOpc = RISCVISD::VFWMSUB_VL;
+ break;
+ }
+
+ Op0 = Op0.getOperand(0);
+ Op1 = Op1.getOperand(0);
+
+ return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0), Op0, Op1,
+ N->getOperand(2), Mask, VL);
+}
+
+static SDValue performVFMUL_VLCombine(SDNode *N, SelectionDAG &DAG) {
+ // FIXME: Ignore strict opcodes for now.
+ assert(!N->isTargetStrictFPOpcode() && "Unexpected opcode");
+
+ // Try to form widening multiply.
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Merge = N->getOperand(2);
+ SDValue Mask = N->getOperand(3);
+ SDValue VL = N->getOperand(4);
+
+ if (Op0.getOpcode() != RISCVISD::FP_EXTEND_VL ||
+ Op1.getOpcode() != RISCVISD::FP_EXTEND_VL)
+ return SDValue();
+
+ // TODO: Refactor to handle more complex cases similar to
+ // combineBinOp_VLToVWBinOp_VL.
+ if ((!Op0.hasOneUse() || !Op1.hasOneUse()) &&
+ (Op0 != Op1 || !Op0->hasNUsesOfValue(2, 0)))
+ return SDValue();
+
+ // Check the mask and VL are the same.
+ if (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL ||
+ Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL)
+ return SDValue();
+
+ Op0 = Op0.getOperand(0);
+ Op1 = Op1.getOperand(0);
+
+ return DAG.getNode(RISCVISD::VFWMUL_VL, SDLoc(N), N->getValueType(0), Op0,
+ Op1, Merge, Mask, VL);
+}
+
+static SDValue performFADDSUB_VLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Merge = N->getOperand(2);
+ SDValue Mask = N->getOperand(3);
+ SDValue VL = N->getOperand(4);
+
+ bool IsAdd = N->getOpcode() == RISCVISD::FADD_VL;
+
+ // Look for foldable FP_EXTENDS.
+ bool Op0IsExtend =
+ Op0.getOpcode() == RISCVISD::FP_EXTEND_VL &&
+ (Op0.hasOneUse() || (Op0 == Op1 && Op0->hasNUsesOfValue(2, 0)));
+ bool Op1IsExtend =
+ (Op0 == Op1 && Op0IsExtend) ||
+ (Op1.getOpcode() == RISCVISD::FP_EXTEND_VL && Op1.hasOneUse());
+
+ // Check the mask and VL.
+ if (Op0IsExtend && (Op0.getOperand(1) != Mask || Op0.getOperand(2) != VL))
+ Op0IsExtend = false;
+ if (Op1IsExtend && (Op1.getOperand(1) != Mask || Op1.getOperand(2) != VL))
+ Op1IsExtend = false;
+
+ // Canonicalize.
+ if (!Op1IsExtend) {
+ // Sub requires at least operand 1 to be an extend.
+ if (!IsAdd)
+ return SDValue();
+
+ // Add is commutable, if the other operand is foldable, swap them.
+ if (!Op0IsExtend)
+ return SDValue();
+
+ std::swap(Op0, Op1);
+ std::swap(Op0IsExtend, Op1IsExtend);
+ }
+
+ // Op1 is a foldable extend. Op0 might be foldable.
+ Op1 = Op1.getOperand(0);
+ if (Op0IsExtend)
+ Op0 = Op0.getOperand(0);
+
+ unsigned Opc;
+ if (IsAdd)
+ Opc = Op0IsExtend ? RISCVISD::VFWADD_VL : RISCVISD::VFWADD_W_VL;
+ else
+ Opc = Op0IsExtend ? RISCVISD::VFWSUB_VL : RISCVISD::VFWSUB_W_VL;
+
+ return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op0, Op1, Merge, Mask,
+ VL);
+}
+
static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
@@ -9806,7 +12135,7 @@ static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
// AddC needs to have at least 32 trailing zeros.
- if (AddC->getAPIntValue().countTrailingZeros() < 32)
+ if (AddC->getAPIntValue().countr_zero() < 32)
return SDValue();
// All users should be a shift by constant less than or equal to 32. This
@@ -10057,13 +12386,73 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(TrueVal.getOpcode(), DL, VT, FalseVal, NewSel);
}
+// This tries to get rid of `select` and `icmp` that are being used to handle
+// `Targets` that do not support `cttz(0)`/`ctlz(0)`.
+static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
+ SDValue Cond = N->getOperand(0);
+
+ // This represents either CTTZ or CTLZ instruction.
+ SDValue CountZeroes;
+
+ SDValue ValOnZero;
+
+ if (Cond.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ if (!isNullConstant(Cond->getOperand(1)))
+ return SDValue();
+
+ ISD::CondCode CCVal = cast<CondCodeSDNode>(Cond->getOperand(2))->get();
+ if (CCVal == ISD::CondCode::SETEQ) {
+ CountZeroes = N->getOperand(2);
+ ValOnZero = N->getOperand(1);
+ } else if (CCVal == ISD::CondCode::SETNE) {
+ CountZeroes = N->getOperand(1);
+ ValOnZero = N->getOperand(2);
+ } else {
+ return SDValue();
+ }
+
+ if (CountZeroes.getOpcode() == ISD::TRUNCATE ||
+ CountZeroes.getOpcode() == ISD::ZERO_EXTEND)
+ CountZeroes = CountZeroes.getOperand(0);
+
+ if (CountZeroes.getOpcode() != ISD::CTTZ &&
+ CountZeroes.getOpcode() != ISD::CTTZ_ZERO_UNDEF &&
+ CountZeroes.getOpcode() != ISD::CTLZ &&
+ CountZeroes.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
+ return SDValue();
+
+ if (!isNullConstant(ValOnZero))
+ return SDValue();
+
+ SDValue CountZeroesArgument = CountZeroes->getOperand(0);
+ if (Cond->getOperand(0) != CountZeroesArgument)
+ return SDValue();
+
+ if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
+ CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
+ CountZeroes.getValueType(), CountZeroesArgument);
+ } else if (CountZeroes.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
+ CountZeroes = DAG.getNode(ISD::CTLZ, SDLoc(CountZeroes),
+ CountZeroes.getValueType(), CountZeroesArgument);
+ }
+
+ unsigned BitWidth = CountZeroes.getValueSizeInBits();
+ SDValue BitWidthMinusOne =
+ DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
+
+ auto AndNode = DAG.getNode(ISD::AND, SDLoc(N), CountZeroes.getValueType(),
+ CountZeroes, BitWidthMinusOne);
+ return DAG.getZExtOrTrunc(AndNode, SDLoc(N), N->getValueType(0));
+}
+
static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- if (Subtarget.hasShortForwardBranchOpt())
- return SDValue();
+ if (SDValue Folded = foldSelectOfCTTZOrCTLZ(N, DAG))
+ return Folded;
- // Only support XLenVT.
- if (N->getValueType(0) != Subtarget.getXLenVT())
+ if (Subtarget.hasShortForwardBranchOpt())
return SDValue();
SDValue TrueVal = N->getOperand(1);
@@ -10073,6 +12462,191 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
}
+// If we're concatenating a series of vector loads like
+// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
+// Then we can turn this into a strided load by widening the vector elements
+// vlse32 p, stride=n
+static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget,
+ const RISCVTargetLowering &TLI) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // Only perform this combine on legal MVTs.
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ // TODO: Potentially extend this to scalable vectors
+ if (VT.isScalableVector())
+ return SDValue();
+
+ auto *BaseLd = dyn_cast<LoadSDNode>(N->getOperand(0));
+ if (!BaseLd || !BaseLd->isSimple() || !ISD::isNormalLoad(BaseLd) ||
+ !SDValue(BaseLd, 0).hasOneUse())
+ return SDValue();
+
+ EVT BaseLdVT = BaseLd->getValueType(0);
+ SDValue BasePtr = BaseLd->getBasePtr();
+
+ // Go through the loads and check that they're strided
+ SDValue CurPtr = BasePtr;
+ SDValue Stride;
+ Align Align = BaseLd->getAlign();
+
+ for (SDValue Op : N->ops().drop_front()) {
+ auto *Ld = dyn_cast<LoadSDNode>(Op);
+ if (!Ld || !Ld->isSimple() || !Op.hasOneUse() ||
+ Ld->getChain() != BaseLd->getChain() || !ISD::isNormalLoad(Ld) ||
+ Ld->getValueType(0) != BaseLdVT)
+ return SDValue();
+
+ SDValue Ptr = Ld->getBasePtr();
+ // Check that each load's pointer is (add CurPtr, Stride)
+ if (Ptr.getOpcode() != ISD::ADD || Ptr.getOperand(0) != CurPtr)
+ return SDValue();
+ SDValue Offset = Ptr.getOperand(1);
+ if (!Stride)
+ Stride = Offset;
+ else if (Offset != Stride)
+ return SDValue();
+
+ // The common alignment is the most restrictive (smallest) of all the loads
+ Align = std::min(Align, Ld->getAlign());
+
+ CurPtr = Ptr;
+ }
+
+ // A special case is if the stride is exactly the width of one of the loads,
+ // in which case it's contiguous and can be combined into a regular vle
+ // without changing the element size
+ if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride);
+ ConstStride &&
+ ConstStride->getZExtValue() == BaseLdVT.getFixedSizeInBits() / 8) {
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(),
+ VT.getStoreSize(), Align);
+ // Can't do the combine if the load isn't naturally aligned with the element
+ // type
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(),
+ DAG.getDataLayout(), VT, *MMO))
+ return SDValue();
+
+ SDValue WideLoad = DAG.getLoad(VT, DL, BaseLd->getChain(), BasePtr, MMO);
+ for (SDValue Ld : N->ops())
+ DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), WideLoad);
+ return WideLoad;
+ }
+
+ // Get the widened scalar type, e.g. v4i8 -> i64
+ unsigned WideScalarBitWidth =
+ BaseLdVT.getScalarSizeInBits() * BaseLdVT.getVectorNumElements();
+ MVT WideScalarVT = MVT::getIntegerVT(WideScalarBitWidth);
+
+ // Get the vector type for the strided load, e.g. 4 x v4i8 -> v4i64
+ MVT WideVecVT = MVT::getVectorVT(WideScalarVT, N->getNumOperands());
+ if (!TLI.isTypeLegal(WideVecVT))
+ return SDValue();
+
+ // Check that the operation is legal
+ if (!TLI.isLegalStridedLoadStore(WideVecVT, Align))
+ return SDValue();
+
+ MVT ContainerVT = TLI.getContainerForFixedLengthVector(WideVecVT);
+ SDValue VL =
+ getDefaultVLOps(WideVecVT, ContainerVT, DL, DAG, Subtarget).second;
+ SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
+ SDValue IntID =
+ DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, Subtarget.getXLenVT());
+ SDValue Ops[] = {BaseLd->getChain(),
+ IntID,
+ DAG.getUNDEF(ContainerVT),
+ BasePtr,
+ Stride,
+ VL};
+
+ uint64_t MemSize;
+ if (auto *ConstStride = dyn_cast<ConstantSDNode>(Stride))
+ // total size = (elsize * n) + (stride - elsize) * (n-1)
+ // = elsize + stride * (n-1)
+ MemSize = WideScalarVT.getSizeInBits() +
+ ConstStride->getSExtValue() * (N->getNumOperands() - 1);
+ else
+ // If Stride isn't constant, then we can't know how much it will load
+ MemSize = MemoryLocation::UnknownSize;
+
+ MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
+ BaseLd->getPointerInfo(), BaseLd->getMemOperand()->getFlags(), MemSize,
+ Align);
+
+ SDValue StridedLoad = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs,
+ Ops, WideVecVT, MMO);
+ for (SDValue Ld : N->ops())
+ DAG.makeEquivalentMemoryOrdering(cast<LoadSDNode>(Ld), StridedLoad);
+
+ // Note: Perform the bitcast before the convertFromScalableVector so we have
+ // balanced pairs of convertFromScalable/convertToScalable
+ SDValue Res = DAG.getBitcast(
+ TLI.getContainerForFixedLengthVector(VT.getSimpleVT()), StridedLoad);
+ return convertFromScalableVector(VT, Res, DAG, Subtarget);
+}
+
+static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(N->getOpcode() == RISCVISD::ADD_VL);
+ SDValue Addend = N->getOperand(0);
+ SDValue MulOp = N->getOperand(1);
+ SDValue AddMergeOp = N->getOperand(2);
+
+ if (!AddMergeOp.isUndef())
+ return SDValue();
+
+ auto IsVWMulOpc = [](unsigned Opc) {
+ switch (Opc) {
+ case RISCVISD::VWMUL_VL:
+ case RISCVISD::VWMULU_VL:
+ case RISCVISD::VWMULSU_VL:
+ return true;
+ default:
+ return false;
+ }
+ };
+
+ if (!IsVWMulOpc(MulOp.getOpcode()))
+ std::swap(Addend, MulOp);
+
+ if (!IsVWMulOpc(MulOp.getOpcode()))
+ return SDValue();
+
+ SDValue MulMergeOp = MulOp.getOperand(2);
+
+ if (!MulMergeOp.isUndef())
+ return SDValue();
+
+ SDValue AddMask = N->getOperand(3);
+ SDValue AddVL = N->getOperand(4);
+ SDValue MulMask = MulOp.getOperand(3);
+ SDValue MulVL = MulOp.getOperand(4);
+
+ if (AddMask != MulMask || AddVL != MulVL)
+ return SDValue();
+
+ unsigned Opc = RISCVISD::VWMACC_VL + MulOp.getOpcode() - RISCVISD::VWMUL_VL;
+ static_assert(RISCVISD::VWMACC_VL + 1 == RISCVISD::VWMACCU_VL,
+ "Unexpected opcode after VWMACC_VL");
+ static_assert(RISCVISD::VWMACC_VL + 2 == RISCVISD::VWMACCSU_VL,
+ "Unexpected opcode after VWMACC_VL!");
+ static_assert(RISCVISD::VWMUL_VL + 1 == RISCVISD::VWMULU_VL,
+ "Unexpected opcode after VWMUL_VL!");
+ static_assert(RISCVISD::VWMUL_VL + 2 == RISCVISD::VWMULSU_VL,
+ "Unexpected opcode after VWMUL_VL!");
+
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue Ops[] = {MulOp.getOperand(0), MulOp.getOperand(1), Addend, AddMask,
+ AddVL};
+ return DAG.getNode(Opc, DL, VT, Ops);
+}
+
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -10161,6 +12735,15 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue(N, 0);
break;
}
+ case RISCVISD::FMV_W_X_RV64: {
+ // If the input to FMV_W_X_RV64 is just FMV_X_ANYEXTW_RV64 the the
+ // conversion is unnecessary and can be replaced with the
+ // FMV_X_ANYEXTW_RV64 operand.
+ SDValue Op0 = N->getOperand(0);
+ if (Op0.getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64)
+ return Op0.getOperand(0);
+ break;
+ }
case RISCVISD::FMV_X_ANYEXTH:
case RISCVISD::FMV_X_ANYEXTW_RV64: {
SDLoc DL(N);
@@ -10244,6 +12827,24 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return performTRUNCATECombine(N, DAG, Subtarget);
case ISD::SELECT:
return performSELECTCombine(N, DAG, Subtarget);
+ case RISCVISD::CZERO_EQZ:
+ case RISCVISD::CZERO_NEZ:
+ // czero_eq X, (xor Y, 1) -> czero_ne X, Y if Y is 0 or 1.
+ // czero_ne X, (xor Y, 1) -> czero_eq X, Y if Y is 0 or 1.
+ if (N->getOperand(1).getOpcode() == ISD::XOR &&
+ isOneConstant(N->getOperand(1).getOperand(1))) {
+ SDValue Cond = N->getOperand(1).getOperand(0);
+ APInt Mask = APInt::getBitsSetFrom(Cond.getValueSizeInBits(), 1);
+ if (DAG.MaskedValueIsZero(Cond, Mask)) {
+ unsigned NewOpc = N->getOpcode() == RISCVISD::CZERO_EQZ
+ ? RISCVISD::CZERO_NEZ
+ : RISCVISD::CZERO_EQZ;
+ return DAG.getNode(NewOpc, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), Cond);
+ }
+ }
+ return SDValue();
+
case RISCVISD::SELECT_CC: {
// Transform
SDValue LHS = N->getOperand(0);
@@ -10318,6 +12919,18 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Neg = DAG.getNegative(C, DL, VT);
return DAG.getNode(ISD::AND, DL, VT, Neg, TrueV);
}
+ // (riscvisd::select_cc x, 0, ne, x, 1) -> (add x, (setcc x, 0, eq))
+ // (riscvisd::select_cc x, 0, eq, 1, x) -> (add x, (setcc x, 0, eq))
+ if (((isOneConstant(FalseV) && LHS == TrueV &&
+ CCVal == ISD::CondCode::SETNE) ||
+ (isOneConstant(TrueV) && LHS == FalseV &&
+ CCVal == ISD::CondCode::SETEQ)) &&
+ isNullConstant(RHS)) {
+ // freeze it to be safe.
+ LHS = DAG.getFreeze(LHS);
+ SDValue C = DAG.getSetCC(DL, VT, LHS, RHS, ISD::CondCode::SETEQ);
+ return DAG.getNode(ISD::ADD, DL, VT, LHS, C);
+ }
}
return SDValue();
@@ -10391,7 +13004,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
EVT IndexVT = Index.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
- // RISCV indexed loads only support the "unsigned unscaled" addressing
+ // RISC-V indexed loads only support the "unsigned unscaled" addressing
// mode, so anything else must be manually legalized.
bool NeedsIdxLegalization =
(IsIndexSigned && IndexVT.getVectorElementType().bitsLT(XLenVT));
@@ -10471,6 +13084,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
case RISCVISD::ADD_VL:
+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI))
+ return V;
+ return combineToVWMACC(N, DAG, Subtarget);
case RISCVISD::SUB_VL:
case RISCVISD::VWADD_W_VL:
case RISCVISD::VWADDU_W_VL:
@@ -10481,40 +13097,94 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case RISCVISD::VFMADD_VL:
case RISCVISD::VFNMADD_VL:
case RISCVISD::VFMSUB_VL:
- case RISCVISD::VFNMSUB_VL: {
- // Fold FNEG_VL into FMA opcodes.
- SDValue A = N->getOperand(0);
- SDValue B = N->getOperand(1);
- SDValue C = N->getOperand(2);
- SDValue Mask = N->getOperand(3);
- SDValue VL = N->getOperand(4);
-
- auto invertIfNegative = [&Mask, &VL](SDValue &V) {
- if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
- V.getOperand(2) == VL) {
- // Return the negated input.
- V = V.getOperand(0);
- return true;
- }
-
- return false;
- };
-
- bool NegA = invertIfNegative(A);
- bool NegB = invertIfNegative(B);
- bool NegC = invertIfNegative(C);
+ case RISCVISD::VFNMSUB_VL:
+ case RISCVISD::STRICT_VFMADD_VL:
+ case RISCVISD::STRICT_VFNMADD_VL:
+ case RISCVISD::STRICT_VFMSUB_VL:
+ case RISCVISD::STRICT_VFNMSUB_VL:
+ return performVFMADD_VLCombine(N, DAG);
+ case RISCVISD::FMUL_VL:
+ return performVFMUL_VLCombine(N, DAG);
+ case RISCVISD::FADD_VL:
+ case RISCVISD::FSUB_VL:
+ return performFADDSUB_VLCombine(N, DAG);
+ case ISD::LOAD:
+ case ISD::STORE: {
+ if (DCI.isAfterLegalizeDAG())
+ if (SDValue V = performMemPairCombine(N, DCI))
+ return V;
- // If no operands are negated, we're done.
- if (!NegA && !NegB && !NegC)
- return SDValue();
+ if (N->getOpcode() != ISD::STORE)
+ break;
- unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
- return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
- VL);
- }
- case ISD::STORE: {
auto *Store = cast<StoreSDNode>(N);
+ SDValue Chain = Store->getChain();
+ EVT MemVT = Store->getMemoryVT();
SDValue Val = Store->getValue();
+ SDLoc DL(N);
+
+ bool IsScalarizable =
+ MemVT.isFixedLengthVector() && ISD::isNormalStore(Store) &&
+ Store->isSimple() &&
+ MemVT.getVectorElementType().bitsLE(Subtarget.getXLenVT()) &&
+ isPowerOf2_64(MemVT.getSizeInBits()) &&
+ MemVT.getSizeInBits() <= Subtarget.getXLen();
+
+ // If sufficiently aligned we can scalarize stores of constant vectors of
+ // any power-of-two size up to XLen bits, provided that they aren't too
+ // expensive to materialize.
+ // vsetivli zero, 2, e8, m1, ta, ma
+ // vmv.v.i v8, 4
+ // vse64.v v8, (a0)
+ // ->
+ // li a1, 1028
+ // sh a1, 0(a0)
+ if (DCI.isBeforeLegalize() && IsScalarizable &&
+ ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
+ // Get the constant vector bits
+ APInt NewC(Val.getValueSizeInBits(), 0);
+ for (unsigned i = 0; i < Val.getNumOperands(); i++) {
+ if (Val.getOperand(i).isUndef())
+ continue;
+ NewC.insertBits(Val.getConstantOperandAPInt(i),
+ i * Val.getScalarValueSizeInBits());
+ }
+ MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
+
+ if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(),
+ Subtarget.getFeatureBits(), true) <= 2 &&
+ allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ NewVT, *Store->getMemOperand())) {
+ SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
+ return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
+ Store->getPointerInfo(), Store->getOriginalAlign(),
+ Store->getMemOperand()->getFlags());
+ }
+ }
+
+ // Similarly, if sufficiently aligned we can scalarize vector copies, e.g.
+ // vsetivli zero, 2, e16, m1, ta, ma
+ // vle16.v v8, (a0)
+ // vse16.v v8, (a1)
+ if (auto *L = dyn_cast<LoadSDNode>(Val);
+ L && DCI.isBeforeLegalize() && IsScalarizable && L->isSimple() &&
+ L->hasNUsesOfValue(1, 0) && L->hasNUsesOfValue(1, 1) &&
+ Store->getChain() == SDValue(L, 1) && ISD::isNormalLoad(L) &&
+ L->getMemoryVT() == MemVT) {
+ MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
+ if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ NewVT, *Store->getMemOperand()) &&
+ allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ NewVT, *L->getMemOperand())) {
+ SDValue NewL = DAG.getLoad(NewVT, DL, L->getChain(), L->getBasePtr(),
+ L->getPointerInfo(), L->getOriginalAlign(),
+ L->getMemOperand()->getFlags());
+ return DAG.getStore(Chain, DL, NewL, Store->getBasePtr(),
+ Store->getPointerInfo(), Store->getOriginalAlign(),
+ Store->getMemOperand()->getFlags());
+ }
+ }
+
// Combine store of vmv.x.s/vfmv.f.s to vse with VL of 1.
// vfmv.f.s is represented as extract element from 0. Match it late to avoid
// any illegal types.
@@ -10524,7 +13194,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
isNullConstant(Val.getOperand(1)))) {
SDValue Src = Val.getOperand(0);
MVT VecVT = Src.getSimpleValueType();
- EVT MemVT = Store->getMemoryVT();
// VecVT should be scalable and memory VT should match the element type.
if (VecVT.isScalableVector() &&
MemVT == VecVT.getVectorElementType()) {
@@ -10551,6 +13220,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return Gather;
break;
}
+ case ISD::CONCAT_VECTORS:
+ if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
+ return V;
+ break;
case RISCVISD::VMV_V_X_VL: {
// Tail agnostic VMV.V.X only demands the vector element bitwidth from the
// scalar input.
@@ -10581,8 +13254,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
+ case ISD::INTRINSIC_VOID:
+ case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_WO_CHAIN: {
- unsigned IntNo = N->getConstantOperandVal(0);
+ unsigned IntOpNo = N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
+ unsigned IntNo = N->getConstantOperandVal(IntOpNo);
switch (IntNo) {
// By default we do not combine any intrinsic.
default:
@@ -10605,6 +13281,23 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getConstant(-1, DL, VT);
return DAG.getConstant(0, DL, VT);
}
+ case Intrinsic::riscv_vloxei:
+ case Intrinsic::riscv_vloxei_mask:
+ case Intrinsic::riscv_vluxei:
+ case Intrinsic::riscv_vluxei_mask:
+ case Intrinsic::riscv_vsoxei:
+ case Intrinsic::riscv_vsoxei_mask:
+ case Intrinsic::riscv_vsuxei:
+ case Intrinsic::riscv_vsuxei_mask:
+ if (SDValue V = narrowIndex(N->getOperand(4), DAG)) {
+ SmallVector<SDValue, 8> Ops(N->ops());
+ Ops[4] = V;
+ const auto *MemSD = cast<MemIntrinsicSDNode>(N);
+ return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(),
+ Ops, MemSD->getMemoryVT(),
+ MemSD->getMemOperand());
+ }
+ return SDValue();
}
}
case ISD::BITCAST: {
@@ -10632,6 +13325,25 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
+bool RISCVTargetLowering::shouldTransformSignedTruncationCheck(
+ EVT XVT, unsigned KeptBits) const {
+ // For vectors, we don't have a preference..
+ if (XVT.isVector())
+ return false;
+
+ if (XVT != MVT::i32 && XVT != MVT::i64)
+ return false;
+
+ // We can use sext.w for RV64 or an srai 31 on RV32.
+ if (KeptBits == 32 || KeptBits == 64)
+ return true;
+
+ // With Zbb we can use sext.h/sext.b.
+ return Subtarget.hasStdExtZbb() &&
+ ((KeptBits == 8 && XVT == MVT::i64 && !Subtarget.is64Bit()) ||
+ KeptBits == 16);
+}
+
bool RISCVTargetLowering::isDesirableToCommuteWithShift(
const SDNode *N, CombineLevel Level) const {
assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
@@ -10656,13 +13368,13 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
// We can materialise `c1 << c2` into an add immediate, so it's "free",
// and the combine should happen, to potentially allow further combines
// later.
- if (ShiftedC1Int.getMinSignedBits() <= 64 &&
+ if (ShiftedC1Int.getSignificantBits() <= 64 &&
isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
return true;
// We can materialise `c1` in an add immediate, so it's "free", and the
// combine should be prevented.
- if (C1Int.getMinSignedBits() <= 64 &&
+ if (C1Int.getSignificantBits() <= 64 &&
isLegalAddImmediate(C1Int.getSExtValue()))
return false;
@@ -10752,7 +13464,7 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant(
return false;
// What is the fewest number of bits we need to represent the negative number.
- unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
+ unsigned MinSignedBits = ExpandedMask.getSignificantBits();
// Try to make a 12 bit negative immediate. If that fails try to make a 32
// bit negative immediate unless the shrunk immediate already fits in 32 bits.
@@ -10814,9 +13526,16 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
}
+ case RISCVISD::CZERO_EQZ:
+ case RISCVISD::CZERO_NEZ:
+ Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
+ // Result is either all zero or operand 0. We can propagate zeros, but not
+ // ones.
+ Known.One.clearAllBits();
+ break;
case RISCVISD::REMUW: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
@@ -10875,6 +13594,11 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.One.setBit(Log2_32(MinVLenB));
break;
}
+ case RISCVISD::FPCLASS: {
+ // fclass will only set one of the low 10 bits.
+ Known.Zero.setBitsFrom(10);
+ break;
+ }
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_WO_CHAIN: {
unsigned IntNo =
@@ -10885,12 +13609,10 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
case Intrinsic::riscv_vsetvli:
case Intrinsic::riscv_vsetvlimax:
- case Intrinsic::riscv_vsetvli_opt:
- case Intrinsic::riscv_vsetvlimax_opt:
- // Assume that VL output is positive and would fit in an int32_t.
- // TODO: VLEN might be capped at 16 bits in a future V spec update.
- if (BitWidth >= 32)
- Known.Zero.setBitsFrom(31);
+ // Assume that VL output is >= 65536.
+ // TODO: Take SEW and LMUL into account.
+ if (BitWidth > 17)
+ Known.Zero.setBitsFrom(17);
break;
}
break;
@@ -10912,6 +13634,11 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
DAG.ComputeNumSignBits(Op.getOperand(4), DemandedElts, Depth + 1);
return std::min(Tmp, Tmp2);
}
+ case RISCVISD::CZERO_EQZ:
+ case RISCVISD::CZERO_NEZ:
+ // Output is either all zero or operand 0. We can propagate sign bit count
+ // from operand 0.
+ return DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
case RISCVISD::ABSW: {
// We expand this at isel to negw+max. The result will have 33 sign bits
// if the input has at least 33 sign bits.
@@ -11086,8 +13813,11 @@ static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
}
static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
- MachineBasicBlock *BB) {
- assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
+ MachineBasicBlock *BB,
+ const RISCVSubtarget &Subtarget) {
+ assert((MI.getOpcode() == RISCV::SplitF64Pseudo ||
+ MI.getOpcode() == RISCV::SplitF64Pseudo_INX) &&
+ "Unexpected instruction");
MachineFunction &MF = *BB->getParent();
DebugLoc DL = MI.getDebugLoc();
@@ -11096,7 +13826,10 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
Register LoReg = MI.getOperand(0).getReg();
Register HiReg = MI.getOperand(1).getReg();
Register SrcReg = MI.getOperand(2).getReg();
- const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
+
+ const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX
+ ? &RISCV::GPRPF64RegClass
+ : &RISCV::FPR64RegClass;
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
@@ -11119,8 +13852,10 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
}
static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
- MachineBasicBlock *BB) {
- assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
+ MachineBasicBlock *BB,
+ const RISCVSubtarget &Subtarget) {
+ assert((MI.getOpcode() == RISCV::BuildPairF64Pseudo ||
+ MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX) &&
"Unexpected instruction");
MachineFunction &MF = *BB->getParent();
@@ -11130,7 +13865,10 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
Register DstReg = MI.getOperand(0).getReg();
Register LoReg = MI.getOperand(1).getReg();
Register HiReg = MI.getOperand(2).getReg();
- const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
+
+ const TargetRegisterClass *DstRC =
+ MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPF64RegClass
+ : &RISCV::FPR64RegClass;
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
@@ -11159,8 +13897,12 @@ static bool isSelectPseudo(MachineInstr &MI) {
return false;
case RISCV::Select_GPR_Using_CC_GPR:
case RISCV::Select_FPR16_Using_CC_GPR:
+ case RISCV::Select_FPR16INX_Using_CC_GPR:
case RISCV::Select_FPR32_Using_CC_GPR:
+ case RISCV::Select_FPR32INX_Using_CC_GPR:
case RISCV::Select_FPR64_Using_CC_GPR:
+ case RISCV::Select_FPR64INX_Using_CC_GPR:
+ case RISCV::Select_FPR64IN32X_Using_CC_GPR:
return true;
}
}
@@ -11439,8 +14181,8 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
return TailMBB;
}
-static MachineBasicBlock *
-emitVFCVT_RM_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode) {
+static MachineBasicBlock *emitVFCVT_RM(MachineInstr &MI, MachineBasicBlock *BB,
+ unsigned Opcode) {
DebugLoc DL = MI.getDebugLoc();
const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
@@ -11448,20 +14190,26 @@ emitVFCVT_RM_MASK(MachineInstr &MI, MachineBasicBlock *BB, unsigned Opcode) {
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
Register SavedFRM = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ assert(MI.getNumOperands() == 8 || MI.getNumOperands() == 7);
+ unsigned FRMIdx = MI.getNumOperands() == 8 ? 4 : 3;
+
// Update FRM and save the old value.
BuildMI(*BB, MI, DL, TII.get(RISCV::SwapFRMImm), SavedFRM)
- .addImm(MI.getOperand(4).getImm());
-
- // Emit an VFCVT without the FRM operand.
- assert(MI.getNumOperands() == 8);
- auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode))
- .add(MI.getOperand(0))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(5))
- .add(MI.getOperand(6))
- .add(MI.getOperand(7));
+ .addImm(MI.getOperand(FRMIdx).getImm());
+
+ // Emit an VFCVT with the FRM == DYN
+ auto MIB = BuildMI(*BB, MI, DL, TII.get(Opcode));
+
+ for (unsigned I = 0; I < MI.getNumOperands(); I++)
+ if (I != FRMIdx)
+ MIB = MIB.add(MI.getOperand(I));
+ else
+ MIB = MIB.add(MachineOperand::CreateImm(7)); // frm = DYN
+
+ MIB.add(MachineOperand::CreateReg(RISCV::FRM,
+ /*IsDef*/ false,
+ /*IsImp*/ true));
+
if (MI.getFlag(MachineInstr::MIFlag::NoFPExcept))
MIB->setFlag(MachineInstr::MIFlag::NoFPExcept);
@@ -11499,9 +14247,13 @@ static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
.add(MI.getOperand(1))
.add(MI.getOperand(2))
.add(MI.getOperand(3))
+ .add(MachineOperand::CreateImm(7)) // frm = DYN
.add(MI.getOperand(4))
.add(MI.getOperand(5))
- .add(MI.getOperand(6));
+ .add(MI.getOperand(6))
+ .add(MachineOperand::CreateReg(RISCV::FRM,
+ /*IsDef*/ false,
+ /*IsImp*/ true));
// Emit a VFCVT_F_X
BuildMI(*BB, MI, DL, TII.get(CVTFOpc))
@@ -11509,9 +14261,13 @@ static MachineBasicBlock *emitVFROUND_NOEXCEPT_MASK(MachineInstr &MI,
.add(MI.getOperand(1))
.addReg(Tmp)
.add(MI.getOperand(3))
+ .add(MachineOperand::CreateImm(7)) // frm = DYN
.add(MI.getOperand(4))
.add(MI.getOperand(5))
- .add(MI.getOperand(6));
+ .add(MI.getOperand(6))
+ .add(MachineOperand::CreateReg(RISCV::FRM,
+ /*IsDef*/ false,
+ /*IsImp*/ true));
// Restore FFLAGS.
BuildMI(*BB, MI, DL, TII.get(RISCV::WriteFFLAGS))
@@ -11537,6 +14293,14 @@ static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,
FSGNJXOpc = RISCV::FSGNJX_H;
RC = &RISCV::FPR16RegClass;
break;
+ case RISCV::PseudoFROUND_H_INX:
+ CmpOpc = RISCV::FLT_H_INX;
+ F2IOpc = RISCV::FCVT_W_H_INX;
+ I2FOpc = RISCV::FCVT_H_W_INX;
+ FSGNJOpc = RISCV::FSGNJ_H_INX;
+ FSGNJXOpc = RISCV::FSGNJX_H_INX;
+ RC = &RISCV::GPRF16RegClass;
+ break;
case RISCV::PseudoFROUND_S:
CmpOpc = RISCV::FLT_S;
F2IOpc = RISCV::FCVT_W_S;
@@ -11545,6 +14309,14 @@ static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,
FSGNJXOpc = RISCV::FSGNJX_S;
RC = &RISCV::FPR32RegClass;
break;
+ case RISCV::PseudoFROUND_S_INX:
+ CmpOpc = RISCV::FLT_S_INX;
+ F2IOpc = RISCV::FCVT_W_S_INX;
+ I2FOpc = RISCV::FCVT_S_W_INX;
+ FSGNJOpc = RISCV::FSGNJ_S_INX;
+ FSGNJXOpc = RISCV::FSGNJX_S_INX;
+ RC = &RISCV::GPRF32RegClass;
+ break;
case RISCV::PseudoFROUND_D:
assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
CmpOpc = RISCV::FLT_D;
@@ -11554,6 +14326,15 @@ static MachineBasicBlock *emitFROUND(MachineInstr &MI, MachineBasicBlock *MBB,
FSGNJXOpc = RISCV::FSGNJX_D;
RC = &RISCV::FPR64RegClass;
break;
+ case RISCV::PseudoFROUND_D_INX:
+ assert(Subtarget.is64Bit() && "Expected 64-bit GPR.");
+ CmpOpc = RISCV::FLT_D_INX;
+ F2IOpc = RISCV::FCVT_L_D_INX;
+ I2FOpc = RISCV::FCVT_D_L_INX;
+ FSGNJOpc = RISCV::FSGNJ_D_INX;
+ FSGNJXOpc = RISCV::FSGNJX_D_INX;
+ RC = &RISCV::GPRRegClass;
+ break;
}
const BasicBlock *BB = MBB->getBasicBlock();
@@ -11641,185 +14422,86 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitReadCycleWidePseudo(MI, BB);
case RISCV::Select_GPR_Using_CC_GPR:
case RISCV::Select_FPR16_Using_CC_GPR:
+ case RISCV::Select_FPR16INX_Using_CC_GPR:
case RISCV::Select_FPR32_Using_CC_GPR:
+ case RISCV::Select_FPR32INX_Using_CC_GPR:
case RISCV::Select_FPR64_Using_CC_GPR:
+ case RISCV::Select_FPR64INX_Using_CC_GPR:
+ case RISCV::Select_FPR64IN32X_Using_CC_GPR:
return emitSelectPseudo(MI, BB, Subtarget);
case RISCV::BuildPairF64Pseudo:
- return emitBuildPairF64Pseudo(MI, BB);
+ case RISCV::BuildPairF64Pseudo_INX:
+ return emitBuildPairF64Pseudo(MI, BB, Subtarget);
case RISCV::SplitF64Pseudo:
- return emitSplitF64Pseudo(MI, BB);
+ case RISCV::SplitF64Pseudo_INX:
+ return emitSplitF64Pseudo(MI, BB, Subtarget);
case RISCV::PseudoQuietFLE_H:
return emitQuietFCMP(MI, BB, RISCV::FLE_H, RISCV::FEQ_H, Subtarget);
+ case RISCV::PseudoQuietFLE_H_INX:
+ return emitQuietFCMP(MI, BB, RISCV::FLE_H_INX, RISCV::FEQ_H_INX, Subtarget);
case RISCV::PseudoQuietFLT_H:
return emitQuietFCMP(MI, BB, RISCV::FLT_H, RISCV::FEQ_H, Subtarget);
+ case RISCV::PseudoQuietFLT_H_INX:
+ return emitQuietFCMP(MI, BB, RISCV::FLT_H_INX, RISCV::FEQ_H_INX, Subtarget);
case RISCV::PseudoQuietFLE_S:
return emitQuietFCMP(MI, BB, RISCV::FLE_S, RISCV::FEQ_S, Subtarget);
+ case RISCV::PseudoQuietFLE_S_INX:
+ return emitQuietFCMP(MI, BB, RISCV::FLE_S_INX, RISCV::FEQ_S_INX, Subtarget);
case RISCV::PseudoQuietFLT_S:
return emitQuietFCMP(MI, BB, RISCV::FLT_S, RISCV::FEQ_S, Subtarget);
+ case RISCV::PseudoQuietFLT_S_INX:
+ return emitQuietFCMP(MI, BB, RISCV::FLT_S_INX, RISCV::FEQ_S_INX, Subtarget);
case RISCV::PseudoQuietFLE_D:
return emitQuietFCMP(MI, BB, RISCV::FLE_D, RISCV::FEQ_D, Subtarget);
+ case RISCV::PseudoQuietFLE_D_INX:
+ return emitQuietFCMP(MI, BB, RISCV::FLE_D_INX, RISCV::FEQ_D_INX, Subtarget);
+ case RISCV::PseudoQuietFLE_D_IN32X:
+ return emitQuietFCMP(MI, BB, RISCV::FLE_D_IN32X, RISCV::FEQ_D_IN32X,
+ Subtarget);
case RISCV::PseudoQuietFLT_D:
return emitQuietFCMP(MI, BB, RISCV::FLT_D, RISCV::FEQ_D, Subtarget);
-
- // =========================================================================
- // VFCVT
- // =========================================================================
-
- case RISCV::PseudoVFCVT_RM_X_F_V_M1_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK);
- case RISCV::PseudoVFCVT_RM_X_F_V_M2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M2_MASK);
- case RISCV::PseudoVFCVT_RM_X_F_V_M4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M4_MASK);
- case RISCV::PseudoVFCVT_RM_X_F_V_M8_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M8_MASK);
- case RISCV::PseudoVFCVT_RM_X_F_V_MF2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF2_MASK);
- case RISCV::PseudoVFCVT_RM_X_F_V_MF4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK);
-
- case RISCV::PseudoVFCVT_RM_XU_F_V_M1_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M1_MASK);
- case RISCV::PseudoVFCVT_RM_XU_F_V_M2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M2_MASK);
- case RISCV::PseudoVFCVT_RM_XU_F_V_M4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M4_MASK);
- case RISCV::PseudoVFCVT_RM_XU_F_V_M8_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_M8_MASK);
- case RISCV::PseudoVFCVT_RM_XU_F_V_MF2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF2_MASK);
- case RISCV::PseudoVFCVT_RM_XU_F_V_MF4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_XU_F_V_MF4_MASK);
-
- case RISCV::PseudoVFCVT_RM_F_XU_V_M1_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M1_MASK);
- case RISCV::PseudoVFCVT_RM_F_XU_V_M2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M2_MASK);
- case RISCV::PseudoVFCVT_RM_F_XU_V_M4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M4_MASK);
- case RISCV::PseudoVFCVT_RM_F_XU_V_M8_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_M8_MASK);
- case RISCV::PseudoVFCVT_RM_F_XU_V_MF2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_MF2_MASK);
- case RISCV::PseudoVFCVT_RM_F_XU_V_MF4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_XU_V_MF4_MASK);
-
- case RISCV::PseudoVFCVT_RM_F_X_V_M1_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M1_MASK);
- case RISCV::PseudoVFCVT_RM_F_X_V_M2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M2_MASK);
- case RISCV::PseudoVFCVT_RM_F_X_V_M4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M4_MASK);
- case RISCV::PseudoVFCVT_RM_F_X_V_M8_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_M8_MASK);
- case RISCV::PseudoVFCVT_RM_F_X_V_MF2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_MF2_MASK);
- case RISCV::PseudoVFCVT_RM_F_X_V_MF4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFCVT_F_X_V_MF4_MASK);
-
- // =========================================================================
- // VFWCVT
- // =========================================================================
-
- case RISCV::PseudoVFWCVT_RM_XU_F_V_M1_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M1_MASK);
- case RISCV::PseudoVFWCVT_RM_XU_F_V_M2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M2_MASK);
- case RISCV::PseudoVFWCVT_RM_XU_F_V_M4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M4_MASK);
- case RISCV::PseudoVFWCVT_RM_XU_F_V_MF2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF2_MASK);
- case RISCV::PseudoVFWCVT_RM_XU_F_V_MF4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF4_MASK);
-
- case RISCV::PseudoVFWCVT_RM_X_F_V_M1_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M1_MASK);
- case RISCV::PseudoVFWCVT_RM_X_F_V_M2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M2_MASK);
- case RISCV::PseudoVFWCVT_RM_X_F_V_M4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_M4_MASK);
- case RISCV::PseudoVFWCVT_RM_X_F_V_MF2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF2_MASK);
- case RISCV::PseudoVFWCVT_RM_X_F_V_MF4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_X_F_V_MF4_MASK);
-
- case RISCV::PseudoVFWCVT_RM_F_XU_V_M1_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M1_MASK);
- case RISCV::PseudoVFWCVT_RM_F_XU_V_M2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M2_MASK);
- case RISCV::PseudoVFWCVT_RM_F_XU_V_M4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M4_MASK);
- case RISCV::PseudoVFWCVT_RM_F_XU_V_MF2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF2_MASK);
- case RISCV::PseudoVFWCVT_RM_F_XU_V_MF4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF4_MASK);
- case RISCV::PseudoVFWCVT_RM_F_XU_V_MF8_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF8_MASK);
-
- case RISCV::PseudoVFWCVT_RM_F_X_V_M1_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M1_MASK);
- case RISCV::PseudoVFWCVT_RM_F_X_V_M2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M2_MASK);
- case RISCV::PseudoVFWCVT_RM_F_X_V_M4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_M4_MASK);
- case RISCV::PseudoVFWCVT_RM_F_X_V_MF2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF2_MASK);
- case RISCV::PseudoVFWCVT_RM_F_X_V_MF4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF4_MASK);
- case RISCV::PseudoVFWCVT_RM_F_X_V_MF8_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFWCVT_F_XU_V_MF8_MASK);
-
- // =========================================================================
- // VFNCVT
- // =========================================================================
-
- case RISCV::PseudoVFNCVT_RM_XU_F_W_M1_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M1_MASK);
- case RISCV::PseudoVFNCVT_RM_XU_F_W_M2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M2_MASK);
- case RISCV::PseudoVFNCVT_RM_XU_F_W_M4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M4_MASK);
- case RISCV::PseudoVFNCVT_RM_XU_F_W_MF2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF2_MASK);
- case RISCV::PseudoVFNCVT_RM_XU_F_W_MF4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF4_MASK);
- case RISCV::PseudoVFNCVT_RM_XU_F_W_MF8_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_XU_F_W_MF8_MASK);
-
- case RISCV::PseudoVFNCVT_RM_X_F_W_M1_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M1_MASK);
- case RISCV::PseudoVFNCVT_RM_X_F_W_M2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M2_MASK);
- case RISCV::PseudoVFNCVT_RM_X_F_W_M4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_M4_MASK);
- case RISCV::PseudoVFNCVT_RM_X_F_W_MF2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF2_MASK);
- case RISCV::PseudoVFNCVT_RM_X_F_W_MF4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF4_MASK);
- case RISCV::PseudoVFNCVT_RM_X_F_W_MF8_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_X_F_W_MF8_MASK);
-
- case RISCV::PseudoVFNCVT_RM_F_XU_W_M1_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M1_MASK);
- case RISCV::PseudoVFNCVT_RM_F_XU_W_M2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M2_MASK);
- case RISCV::PseudoVFNCVT_RM_F_XU_W_M4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M4_MASK);
- case RISCV::PseudoVFNCVT_RM_F_XU_W_MF2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF2_MASK);
- case RISCV::PseudoVFNCVT_RM_F_XU_W_MF4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF4_MASK);
-
- case RISCV::PseudoVFNCVT_RM_F_X_W_M1_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M1_MASK);
- case RISCV::PseudoVFNCVT_RM_F_X_W_M2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M2_MASK);
- case RISCV::PseudoVFNCVT_RM_F_X_W_M4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_M4_MASK);
- case RISCV::PseudoVFNCVT_RM_F_X_W_MF2_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF2_MASK);
- case RISCV::PseudoVFNCVT_RM_F_X_W_MF4_MASK:
- return emitVFCVT_RM_MASK(MI, BB, RISCV::PseudoVFNCVT_F_XU_W_MF4_MASK);
+ case RISCV::PseudoQuietFLT_D_INX:
+ return emitQuietFCMP(MI, BB, RISCV::FLT_D_INX, RISCV::FEQ_D_INX, Subtarget);
+ case RISCV::PseudoQuietFLT_D_IN32X:
+ return emitQuietFCMP(MI, BB, RISCV::FLT_D_IN32X, RISCV::FEQ_D_IN32X,
+ Subtarget);
+
+#define PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, LMUL) \
+ case RISCV::RMOpc##_##LMUL: \
+ return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL); \
+ case RISCV::RMOpc##_##LMUL##_MASK: \
+ return emitVFCVT_RM(MI, BB, RISCV::Opc##_##LMUL##_MASK);
+
+#define PseudoVFCVT_RM_CASE(RMOpc, Opc) \
+ PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M1) \
+ PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M2) \
+ PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M4) \
+ PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF2) \
+ PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF4)
+
+#define PseudoVFCVT_RM_CASE_M8(RMOpc, Opc) \
+ PseudoVFCVT_RM_CASE(RMOpc, Opc) \
+ PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, M8)
+
+#define PseudoVFCVT_RM_CASE_MF8(RMOpc, Opc) \
+ PseudoVFCVT_RM_CASE(RMOpc, Opc) \
+ PseudoVFCVT_RM_LMUL_CASE(RMOpc, Opc, MF8)
+
+ // VFCVT
+ PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_X_F_V, PseudoVFCVT_X_F_V)
+ PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_XU_F_V, PseudoVFCVT_XU_F_V)
+ PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_XU_V, PseudoVFCVT_F_XU_V)
+ PseudoVFCVT_RM_CASE_M8(PseudoVFCVT_RM_F_X_V, PseudoVFCVT_F_X_V)
+
+ // VFWCVT
+ PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_XU_F_V, PseudoVFWCVT_XU_F_V);
+ PseudoVFCVT_RM_CASE(PseudoVFWCVT_RM_X_F_V, PseudoVFWCVT_X_F_V);
+
+ // VFNCVT
+ PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_XU_F_W, PseudoVFNCVT_XU_F_W);
+ PseudoVFCVT_RM_CASE_MF8(PseudoVFNCVT_RM_X_F_W, PseudoVFNCVT_X_F_W);
+ PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_XU_W, PseudoVFNCVT_F_XU_W);
+ PseudoVFCVT_RM_CASE(PseudoVFNCVT_RM_F_X_W, PseudoVFNCVT_F_X_W);
case RISCV::PseudoVFROUND_NOEXCEPT_V_M1_MASK:
return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_M1_MASK,
@@ -11840,14 +14522,43 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
return emitVFROUND_NOEXCEPT_MASK(MI, BB, RISCV::PseudoVFCVT_X_F_V_MF4_MASK,
RISCV::PseudoVFCVT_F_X_V_MF4_MASK);
case RISCV::PseudoFROUND_H:
+ case RISCV::PseudoFROUND_H_INX:
case RISCV::PseudoFROUND_S:
+ case RISCV::PseudoFROUND_S_INX:
case RISCV::PseudoFROUND_D:
+ case RISCV::PseudoFROUND_D_INX:
+ case RISCV::PseudoFROUND_D_IN32X:
return emitFROUND(MI, BB, Subtarget);
}
}
+// Returns the index to the rounding mode immediate value if any, otherwise the
+// function will return None.
+static std::optional<unsigned> getRoundModeIdx(const MachineInstr &MI) {
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ if (!RISCVII::hasRoundModeOp(TSFlags))
+ return std::nullopt;
+
+ // The operand order
+ // -------------------------------------
+ // | n-1 (if any) | n-2 | n-3 | n-4 |
+ // | policy | sew | vl | rm |
+ // -------------------------------------
+ return MI.getNumExplicitOperands() - RISCVII::hasVecPolicyOp(TSFlags) - 3;
+}
+
void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
SDNode *Node) const {
+ // Add FRM dependency to vector floating-point instructions with dynamic
+ // rounding mode.
+ if (auto RoundModeIdx = getRoundModeIdx(MI)) {
+ unsigned FRMImm = MI.getOperand(*RoundModeIdx).getImm();
+ if (FRMImm == RISCVFPRndMode::DYN && !MI.readsRegister(RISCV::FRM)) {
+ MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false,
+ /*isImp*/ true));
+ }
+ }
+
// Add FRM dependency to any instructions with dynamic rounding mode.
unsigned Opc = MI.getOpcode();
auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm);
@@ -11976,7 +14687,7 @@ static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
}
// Implements the RISC-V calling convention. Returns true upon failure.
-static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
+bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
@@ -12033,7 +14744,8 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
// similar local variables rather than directly checking against the target
// ABI.
- if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
+ if (UseGPRForF16_F32 &&
+ (ValVT == MVT::f16 || ValVT == MVT::bf16 || ValVT == MVT::f32)) {
LocVT = XLenVT;
LocInfo = CCValAssign::BCvt;
} else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
@@ -12126,7 +14838,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
unsigned StoreSizeBytes = XLen / 8;
Align StackAlign = Align(XLen / 8);
- if (ValVT == MVT::f16 && !UseGPRForF16_F32)
+ if ((ValVT == MVT::f16 || ValVT == MVT::bf16) && !UseGPRForF16_F32)
Reg = State.AllocateReg(ArgFPR16s);
else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
Reg = State.AllocateReg(ArgFPR32s);
@@ -12191,9 +14903,10 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
return false;
}
- // When a floating-point value is passed on the stack, no bit-conversion is
- // needed.
- if (ValVT.isFloatingPoint()) {
+ // When a scalar floating-point value is passed on the stack, no
+ // bit-conversion is needed.
+ if (ValVT.isFloatingPoint() && LocInfo != CCValAssign::Indirect) {
+ assert(!ValVT.isVector());
LocVT = ValVT;
LocInfo = CCValAssign::Full;
}
@@ -12237,7 +14950,7 @@ void RISCVTargetLowering::analyzeInputArgs(
ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
FirstMaskArgument)) {
LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString() << '\n');
+ << ArgVT << '\n');
llvm_unreachable(nullptr);
}
}
@@ -12263,7 +14976,7 @@ void RISCVTargetLowering::analyzeOutputArgs(
ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
FirstMaskArgument)) {
LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString() << "\n");
+ << ArgVT << "\n");
llvm_unreachable(nullptr);
}
}
@@ -12282,8 +14995,9 @@ static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
break;
case CCValAssign::BCvt:
- if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
- Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
+ if (VA.getLocVT().isInteger() &&
+ (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
+ Val = DAG.getNode(RISCVISD::FMV_H_X, DL, VA.getValVT(), Val);
else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
else
@@ -12341,7 +15055,8 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
break;
case CCValAssign::BCvt:
- if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
+ if (VA.getLocVT().isInteger() &&
+ (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
@@ -12428,7 +15143,7 @@ static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
// FastCC has less than 1% performance improvement for some particular
// benchmark. But theoretically, it may has benenfit for some cases.
-static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
+bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State,
@@ -12449,7 +15164,10 @@ static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
}
}
- if (LocVT == MVT::f16) {
+ const RISCVSubtarget &Subtarget = TLI.getSubtarget();
+
+ if (LocVT == MVT::f16 &&
+ (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
static const MCPhysReg FPR16List[] = {
RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H, RISCV::F1_H,
@@ -12461,7 +15179,7 @@ static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
}
}
- if (LocVT == MVT::f32) {
+ if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
static const MCPhysReg FPR32List[] = {
RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F, RISCV::F1_F,
@@ -12473,7 +15191,7 @@ static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
}
}
- if (LocVT == MVT::f64) {
+ if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
static const MCPhysReg FPR64List[] = {
RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D, RISCV::F1_D,
@@ -12485,6 +15203,24 @@ static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
}
}
+ // Check if there is an available GPR before hitting the stack.
+ if ((LocVT == MVT::f16 &&
+ (Subtarget.hasStdExtZhinx() || Subtarget.hasStdExtZhinxmin())) ||
+ (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
+ (LocVT == MVT::f64 && Subtarget.is64Bit() &&
+ Subtarget.hasStdExtZdinx())) {
+ if (unsigned Reg = State.AllocateReg(GPRList)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
+ if (LocVT == MVT::f16) {
+ unsigned Offset2 = State.AllocateStack(2, Align(2));
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
+ return false;
+ }
+
if (LocVT == MVT::i32 || LocVT == MVT::f32) {
unsigned Offset4 = State.AllocateStack(4, Align(4));
State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
@@ -12530,28 +15266,31 @@ static bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
return true; // CC didn't match.
}
-static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
+bool RISCV::CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State) {
-
if (ArgFlags.isNest()) {
report_fatal_error(
"Attribute 'nest' is not supported in GHC calling convention");
}
+ static const MCPhysReg GPRList[] = {
+ RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
+ RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
+
if (LocVT == MVT::i32 || LocVT == MVT::i64) {
// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
// s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11
- static const MCPhysReg GPRList[] = {
- RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
- RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
if (unsigned Reg = State.AllocateReg(GPRList)) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
}
- if (LocVT == MVT::f32) {
+ const RISCVSubtarget &Subtarget =
+ State.getMachineFunction().getSubtarget<RISCVSubtarget>();
+
+ if (LocVT == MVT::f32 && Subtarget.hasStdExtF()) {
// Pass in STG registers: F1, ..., F6
// fs0 ... fs5
static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
@@ -12563,7 +15302,7 @@ static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
- if (LocVT == MVT::f64) {
+ if (LocVT == MVT::f64 && Subtarget.hasStdExtD()) {
// Pass in STG registers: D1, ..., D6
// fs6 ... fs11
static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
@@ -12575,6 +15314,15 @@ static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
}
}
+ if ((LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
+ (LocVT == MVT::f64 && Subtarget.hasStdExtZdinx() &&
+ Subtarget.is64Bit())) {
+ if (unsigned Reg = State.AllocateReg(GPRList)) {
+ State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
+ }
+
report_fatal_error("No registers left in GHC calling convention");
return true;
}
@@ -12594,10 +15342,9 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
case CallingConv::Fast:
break;
case CallingConv::GHC:
- if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
- !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
- report_fatal_error(
- "GHC calling convention requires the F and D instruction set extensions");
+ if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
+ report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
+ "(Zdinx/D) instruction set extensions");
}
const Function &Func = MF.getFunction();
@@ -12625,11 +15372,11 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
if (CallConv == CallingConv::GHC)
- CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
+ CCInfo.AnalyzeFormalArguments(Ins, RISCV::CC_RISCV_GHC);
else
analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false,
- CallConv == CallingConv::Fast ? CC_RISCV_FastCC
- : CC_RISCV);
+ CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
+ : RISCV::CC_RISCV);
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
@@ -12690,7 +15437,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
// If all registers are allocated, then all varargs must be passed on the
// stack and we don't need to save any argregs.
if (ArgRegs.size() == Idx) {
- VaArgOffset = CCInfo.getNextStackOffset();
+ VaArgOffset = CCInfo.getStackSize();
VarArgsSaveSize = 0;
} else {
VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
@@ -12746,7 +15493,6 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
const SmallVector<CCValAssign, 16> &ArgLocs) const {
- auto &Callee = CLI.Callee;
auto CalleeCC = CLI.CallConv;
auto &Outs = CLI.Outs;
auto &Caller = MF.getFunction();
@@ -12761,7 +15507,7 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
return false;
// Do not tail call opt if the stack is used to pass parameters.
- if (CCInfo.getNextStackOffset() != 0)
+ if (CCInfo.getStackSize() != 0)
return false;
// Do not tail call opt if any parameters need to be passed indirectly.
@@ -12783,16 +15529,6 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
if (IsCallerStructRet || IsCalleeStructRet)
return false;
- // Externally-defined functions with weak linkage should not be
- // tail-called. The behaviour of branch instructions in this situation (as
- // used for tail calls) is implementation-defined, so we cannot rely on the
- // linker replacing the tail call with a return.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- const GlobalValue *GV = G->getGlobal();
- if (GV->hasExternalWeakLinkage())
- return false;
- }
-
// The callee has to preserve all registers the caller needs to preserve.
const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
@@ -12841,11 +15577,11 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
if (CallConv == CallingConv::GHC)
- ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
+ ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC);
else
analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
- CallConv == CallingConv::Fast ? CC_RISCV_FastCC
- : CC_RISCV);
+ CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
+ : RISCV::CC_RISCV);
// Check if it's really possible to do a tail call.
if (IsTailCall)
@@ -12858,7 +15594,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
"site marked musttail");
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+ unsigned NumBytes = ArgCCInfo.getStackSize();
// Create local copies for byval args
SmallVector<SDValue, 8> ByValArgs;
@@ -13068,15 +15804,24 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
if (Glue.getNode())
Ops.push_back(Glue);
+ assert((!CLI.CFIType || CLI.CB->isIndirectCall()) &&
+ "Unexpected CFI type for a direct call");
+
// Emit the call.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
if (IsTailCall) {
MF.getFrameInfo().setHasTailCall();
- return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
+ SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
+ if (CLI.CFIType)
+ Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
+ DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
+ return Ret;
}
Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
+ if (CLI.CFIType)
+ Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
Glue = Chain.getValue(1);
@@ -13087,7 +15832,7 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
- analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_RISCV);
+ analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV);
// Copy all of the result registers out of their specified physreg.
for (auto &VA : RVLocs) {
@@ -13130,7 +15875,7 @@ bool RISCVTargetLowering::CanLowerReturn(
MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
- if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
+ if (RISCV::CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
*this, FirstMaskArgument))
return false;
@@ -13155,7 +15900,7 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
*DAG.getContext());
analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
- nullptr, CC_RISCV);
+ nullptr, RISCV::CC_RISCV);
if (CallConv == CallingConv::GHC && !RVLocs.empty())
report_fatal_error("GHC functions return void only");
@@ -13219,7 +15964,7 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
[](CCValAssign &VA) { return VA.getLocVT().isScalableVector(); }))
MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
- unsigned RetOpc = RISCVISD::RET_FLAG;
+ unsigned RetOpc = RISCVISD::RET_GLUE;
// Interrupt service routines use different return instructions.
const Function &Func = DAG.getMachineFunction().getFunction();
if (Func.hasFnAttribute("interrupt")) {
@@ -13231,12 +15976,10 @@ RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
StringRef Kind =
MF.getFunction().getFnAttribute("interrupt").getValueAsString();
- if (Kind == "user")
- RetOpc = RISCVISD::URET_FLAG;
- else if (Kind == "supervisor")
- RetOpc = RISCVISD::SRET_FLAG;
+ if (Kind == "supervisor")
+ RetOpc = RISCVISD::SRET_GLUE;
else
- RetOpc = RISCVISD::MRET_FLAG;
+ RetOpc = RISCVISD::MRET_GLUE;
}
return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
@@ -13264,6 +16007,11 @@ bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
return false;
SDNode *Copy = *N->use_begin();
+
+ if (Copy->getOpcode() == ISD::BITCAST) {
+ return isUsedByReturnOnly(Copy, Chain);
+ }
+
// TODO: Handle additional opcodes in order to support tail-calling libcalls
// with soft float ABIs.
if (Copy->getOpcode() != ISD::CopyToReg) {
@@ -13275,10 +16023,10 @@ bool RISCVTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() == MVT::Glue)
return false;
- // The copy must be used by a RISCVISD::RET_FLAG, and nothing else.
+ // The copy must be used by a RISCVISD::RET_GLUE, and nothing else.
bool HasRet = false;
for (SDNode *Node : Copy->uses()) {
- if (Node->getOpcode() != RISCVISD::RET_FLAG)
+ if (Node->getOpcode() != RISCVISD::RET_GLUE)
return false;
HasRet = true;
}
@@ -13301,10 +16049,9 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch ((RISCVISD::NodeType)Opcode) {
case RISCVISD::FIRST_NUMBER:
break;
- NODE_NAME_CASE(RET_FLAG)
- NODE_NAME_CASE(URET_FLAG)
- NODE_NAME_CASE(SRET_FLAG)
- NODE_NAME_CASE(MRET_FLAG)
+ NODE_NAME_CASE(RET_GLUE)
+ NODE_NAME_CASE(SRET_GLUE)
+ NODE_NAME_CASE(MRET_GLUE)
NODE_NAME_CASE(CALL)
NODE_NAME_CASE(SELECT_CC)
NODE_NAME_CASE(BR_CC)
@@ -13314,8 +16061,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(ADD_LO)
NODE_NAME_CASE(HI)
NODE_NAME_CASE(LLA)
+ NODE_NAME_CASE(LGA)
NODE_NAME_CASE(ADD_TPREL)
- NODE_NAME_CASE(LA)
NODE_NAME_CASE(LA_TLS_IE)
NODE_NAME_CASE(LA_TLS_GD)
NODE_NAME_CASE(MULHSU)
@@ -13341,12 +16088,34 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FCVT_WU_RV64)
NODE_NAME_CASE(STRICT_FCVT_W_RV64)
NODE_NAME_CASE(STRICT_FCVT_WU_RV64)
+ NODE_NAME_CASE(FP_ROUND_BF16)
+ NODE_NAME_CASE(FP_EXTEND_BF16)
NODE_NAME_CASE(FROUND)
+ NODE_NAME_CASE(FPCLASS)
+ NODE_NAME_CASE(FMAX)
+ NODE_NAME_CASE(FMIN)
NODE_NAME_CASE(READ_CYCLE_WIDE)
NODE_NAME_CASE(BREV8)
NODE_NAME_CASE(ORC_B)
NODE_NAME_CASE(ZIP)
NODE_NAME_CASE(UNZIP)
+ NODE_NAME_CASE(CLMUL)
+ NODE_NAME_CASE(CLMULH)
+ NODE_NAME_CASE(CLMULR)
+ NODE_NAME_CASE(SHA256SIG0)
+ NODE_NAME_CASE(SHA256SIG1)
+ NODE_NAME_CASE(SHA256SUM0)
+ NODE_NAME_CASE(SHA256SUM1)
+ NODE_NAME_CASE(SM4KS)
+ NODE_NAME_CASE(SM4ED)
+ NODE_NAME_CASE(SM3P0)
+ NODE_NAME_CASE(SM3P1)
+ NODE_NAME_CASE(TH_LWD)
+ NODE_NAME_CASE(TH_LWUD)
+ NODE_NAME_CASE(TH_LDD)
+ NODE_NAME_CASE(TH_SWD)
+ NODE_NAME_CASE(TH_SDD)
+ NODE_NAME_CASE(VMV_V_V_VL)
NODE_NAME_CASE(VMV_V_X_VL)
NODE_NAME_CASE(VFMV_V_F_VL)
NODE_NAME_CASE(VMV_X_S)
@@ -13359,6 +16128,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VSLIDE1UP_VL)
NODE_NAME_CASE(VSLIDEDOWN_VL)
NODE_NAME_CASE(VSLIDE1DOWN_VL)
+ NODE_NAME_CASE(VFSLIDE1UP_VL)
+ NODE_NAME_CASE(VFSLIDE1DOWN_VL)
NODE_NAME_CASE(VID_VL)
NODE_NAME_CASE(VFNCVT_ROD_VL)
NODE_NAME_CASE(VECREDUCE_ADD_VL)
@@ -13397,15 +16168,25 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FNEG_VL)
NODE_NAME_CASE(FABS_VL)
NODE_NAME_CASE(FSQRT_VL)
+ NODE_NAME_CASE(FCLASS_VL)
NODE_NAME_CASE(VFMADD_VL)
NODE_NAME_CASE(VFNMADD_VL)
NODE_NAME_CASE(VFMSUB_VL)
NODE_NAME_CASE(VFNMSUB_VL)
+ NODE_NAME_CASE(VFWMADD_VL)
+ NODE_NAME_CASE(VFWNMADD_VL)
+ NODE_NAME_CASE(VFWMSUB_VL)
+ NODE_NAME_CASE(VFWNMSUB_VL)
NODE_NAME_CASE(FCOPYSIGN_VL)
NODE_NAME_CASE(SMIN_VL)
NODE_NAME_CASE(SMAX_VL)
NODE_NAME_CASE(UMIN_VL)
NODE_NAME_CASE(UMAX_VL)
+ NODE_NAME_CASE(BITREVERSE_VL)
+ NODE_NAME_CASE(BSWAP_VL)
+ NODE_NAME_CASE(CTLZ_VL)
+ NODE_NAME_CASE(CTTZ_VL)
+ NODE_NAME_CASE(CTPOP_VL)
NODE_NAME_CASE(FMINNUM_VL)
NODE_NAME_CASE(FMAXNUM_VL)
NODE_NAME_CASE(MULHS_VL)
@@ -13423,6 +16204,26 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VFCVT_RM_F_X_VL)
NODE_NAME_CASE(FP_EXTEND_VL)
NODE_NAME_CASE(FP_ROUND_VL)
+ NODE_NAME_CASE(STRICT_FADD_VL)
+ NODE_NAME_CASE(STRICT_FSUB_VL)
+ NODE_NAME_CASE(STRICT_FMUL_VL)
+ NODE_NAME_CASE(STRICT_FDIV_VL)
+ NODE_NAME_CASE(STRICT_FSQRT_VL)
+ NODE_NAME_CASE(STRICT_VFMADD_VL)
+ NODE_NAME_CASE(STRICT_VFNMADD_VL)
+ NODE_NAME_CASE(STRICT_VFMSUB_VL)
+ NODE_NAME_CASE(STRICT_VFNMSUB_VL)
+ NODE_NAME_CASE(STRICT_FP_ROUND_VL)
+ NODE_NAME_CASE(STRICT_FP_EXTEND_VL)
+ NODE_NAME_CASE(STRICT_VFNCVT_ROD_VL)
+ NODE_NAME_CASE(STRICT_SINT_TO_FP_VL)
+ NODE_NAME_CASE(STRICT_UINT_TO_FP_VL)
+ NODE_NAME_CASE(STRICT_VFCVT_RM_X_F_VL)
+ NODE_NAME_CASE(STRICT_VFCVT_RTZ_X_F_VL)
+ NODE_NAME_CASE(STRICT_VFCVT_RTZ_XU_F_VL)
+ NODE_NAME_CASE(STRICT_FSETCC_VL)
+ NODE_NAME_CASE(STRICT_FSETCCS_VL)
+ NODE_NAME_CASE(STRICT_VFROUND_NOEXCEPT_VL)
NODE_NAME_CASE(VWMUL_VL)
NODE_NAME_CASE(VWMULU_VL)
NODE_NAME_CASE(VWMULSU_VL)
@@ -13434,6 +16235,14 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VWADDU_W_VL)
NODE_NAME_CASE(VWSUB_W_VL)
NODE_NAME_CASE(VWSUBU_W_VL)
+ NODE_NAME_CASE(VFWMUL_VL)
+ NODE_NAME_CASE(VFWADD_VL)
+ NODE_NAME_CASE(VFWSUB_VL)
+ NODE_NAME_CASE(VFWADD_W_VL)
+ NODE_NAME_CASE(VFWSUB_W_VL)
+ NODE_NAME_CASE(VWMACC_VL)
+ NODE_NAME_CASE(VWMACCU_VL)
+ NODE_NAME_CASE(VWMACCSU_VL)
NODE_NAME_CASE(VNSRL_VL)
NODE_NAME_CASE(SETCC_VL)
NODE_NAME_CASE(VSELECT_VL)
@@ -13453,6 +16262,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(READ_CSR)
NODE_NAME_CASE(WRITE_CSR)
NODE_NAME_CASE(SWAP_CSR)
+ NODE_NAME_CASE(CZERO_EQZ)
+ NODE_NAME_CASE(CZERO_NEZ)
}
// clang-format on
return nullptr;
@@ -13489,15 +16300,15 @@ std::pair<unsigned, const TargetRegisterClass *>
RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
StringRef Constraint,
MVT VT) const {
- // First, see if this is a constraint that directly corresponds to a
- // RISCV register class.
+ // First, see if this is a constraint that directly corresponds to a RISC-V
+ // register class.
if (Constraint.size() == 1) {
switch (Constraint[0]) {
case 'r':
// TODO: Support fixed vectors up to XLen for P extension?
if (VT.isVector())
break;
- return std::make_pair(0U, &RISCV::GPRRegClass);
+ return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
case 'f':
if (Subtarget.hasStdExtZfhOrZfhmin() && VT == MVT::f16)
return std::make_pair(0U, &RISCV::FPR16RegClass);
@@ -13678,7 +16489,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// Subtarget into account.
if (Res.second == &RISCV::GPRF16RegClass ||
Res.second == &RISCV::GPRF32RegClass ||
- Res.second == &RISCV::GPRF64RegClass)
+ Res.second == &RISCV::GPRPF64RegClass)
return std::make_pair(Res.first, &RISCV::GPRRegClass);
return Res;
@@ -13716,10 +16527,9 @@ void RISCVTargetLowering::LowerAsmOperandForConstraint(
return;
case 'J':
// Validate & create an integer zero operand.
- if (auto *C = dyn_cast<ConstantSDNode>(Op))
- if (C->getZExtValue() == 0)
- Ops.push_back(
- DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
+ if (isNullConstant(Op))
+ Ops.push_back(
+ DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
return;
case 'K':
// Validate & create a 5-bit unsigned immediate operand.
@@ -13749,6 +16559,12 @@ void RISCVTargetLowering::LowerAsmOperandForConstraint(
Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
+ if (Subtarget.hasStdExtZtso()) {
+ if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
+ return Builder.CreateFence(Ord);
+ return nullptr;
+ }
+
if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
return Builder.CreateFence(Ord);
if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
@@ -13759,8 +16575,14 @@ Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilderBase &Builder,
Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilderBase &Builder,
Instruction *Inst,
AtomicOrdering Ord) const {
+ if (Subtarget.hasStdExtZtso())
+ return nullptr;
+
if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
return Builder.CreateFence(AtomicOrdering::Acquire);
+ if (Subtarget.enableSeqCstTrailingFence() && isa<StoreInst>(Inst) &&
+ Ord == AtomicOrdering::SequentiallyConsistent)
+ return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
return nullptr;
}
@@ -13965,6 +16787,95 @@ bool RISCVTargetLowering::isVScaleKnownToBeAPowerOfTwo() const {
return true;
}
+bool RISCVTargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ bool &IsInc,
+ SelectionDAG &DAG) const {
+ // Target does not support indexed loads.
+ if (!Subtarget.hasVendorXTHeadMemIdx())
+ return false;
+
+ if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
+ return false;
+
+ Base = Op->getOperand(0);
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
+ int64_t RHSC = RHS->getSExtValue();
+ if (Op->getOpcode() == ISD::SUB)
+ RHSC = -(uint64_t)RHSC;
+
+ // The constants that can be encoded in the THeadMemIdx instructions
+ // are of the form (sign_extend(imm5) << imm2).
+ bool isLegalIndexedOffset = false;
+ for (unsigned i = 0; i < 4; i++)
+ if (isInt<5>(RHSC >> i) && ((RHSC % (1LL << i)) == 0)) {
+ isLegalIndexedOffset = true;
+ break;
+ }
+
+ if (!isLegalIndexedOffset)
+ return false;
+
+ IsInc = (Op->getOpcode() == ISD::ADD);
+ Offset = Op->getOperand(1);
+ return true;
+ }
+
+ return false;
+}
+
+bool RISCVTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ EVT VT;
+ SDValue Ptr;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
+ } else
+ return false;
+
+ bool IsInc;
+ if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
+ return false;
+
+ AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
+ return true;
+}
+
+bool RISCVTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+ EVT VT;
+ SDValue Ptr;
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+ VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
+ } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+ VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
+ } else
+ return false;
+
+ bool IsInc;
+ if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
+ return false;
+ // Post-indexing updates the base, so it's not a valid transform
+ // if that's not the same as the load's pointer.
+ if (Ptr != Base)
+ return false;
+
+ AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
+ return true;
+}
+
bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const {
EVT SVT = VT.getScalarType();
@@ -13975,11 +16886,11 @@ bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
switch (SVT.getSimpleVT().SimpleTy) {
case MVT::f16:
return VT.isVector() ? Subtarget.hasVInstructionsF16()
- : Subtarget.hasStdExtZfh();
+ : Subtarget.hasStdExtZfhOrZhinx();
case MVT::f32:
- return Subtarget.hasStdExtF();
+ return Subtarget.hasStdExtFOrZfinx();
case MVT::f64:
- return Subtarget.hasStdExtD();
+ return Subtarget.hasStdExtDOrZdinx();
default:
break;
}
@@ -13999,9 +16910,9 @@ Register RISCVTargetLowering::getExceptionSelectorRegister(
bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
// Return false to suppress the unnecessary extensions if the LibCall
- // arguments or return value is f32 type for LP64 ABI.
- RISCVABI::ABI ABI = Subtarget.getTargetABI();
- if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
+ // arguments or return value is a float narrower than XLEN on a soft FP ABI.
+ if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
+ Type.getSizeInBits() < Subtarget.getXLen()))
return false;
return true;
@@ -14019,34 +16930,35 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
// Check integral scalar types.
const bool HasExtMOrZmmul =
Subtarget.hasStdExtM() || Subtarget.hasStdExtZmmul();
- if (VT.isScalarInteger()) {
- // Omit the optimization if the sub target has the M extension and the data
- // size exceeds XLen.
- if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
- return false;
- if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
- // Break the MUL to a SLLI and an ADD/SUB.
- const APInt &Imm = ConstNode->getAPIntValue();
- if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
- (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
- return true;
- // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
- if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
- ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
- (Imm - 8).isPowerOf2()))
+ if (!VT.isScalarInteger())
+ return false;
+
+ // Omit the optimization if the sub target has the M extension and the data
+ // size exceeds XLen.
+ if (HasExtMOrZmmul && VT.getSizeInBits() > Subtarget.getXLen())
+ return false;
+
+ if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
+ // Break the MUL to a SLLI and an ADD/SUB.
+ const APInt &Imm = ConstNode->getAPIntValue();
+ if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
+ (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
+ return true;
+
+ // Optimize the MUL to (SH*ADD x, (SLLI x, bits)) if Imm is not simm12.
+ if (Subtarget.hasStdExtZba() && !Imm.isSignedIntN(12) &&
+ ((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
+ (Imm - 8).isPowerOf2()))
+ return true;
+
+ // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
+ // a pair of LUI/ADDI.
+ if (!Imm.isSignedIntN(12) && Imm.countr_zero() < 12 &&
+ ConstNode->hasOneUse()) {
+ APInt ImmS = Imm.ashr(Imm.countr_zero());
+ if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
+ (1 - ImmS).isPowerOf2())
return true;
- // Omit the following optimization if the sub target has the M extension
- // and the data size >= XLen.
- if (HasExtMOrZmmul && VT.getSizeInBits() >= Subtarget.getXLen())
- return false;
- // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
- // a pair of LUI/ADDI.
- if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
- APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
- if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
- (1 - ImmS).isPowerOf2())
- return true;
- }
}
}
@@ -14081,7 +16993,7 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
unsigned *Fast) const {
if (!VT.isVector()) {
if (Fast)
- *Fast = 0;
+ *Fast = Subtarget.enableUnalignedScalarMem();
return Subtarget.enableUnalignedScalarMem();
}
@@ -14093,7 +17005,13 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
return true;
}
- return false;
+ // Note: We lower an unmasked unaligned vector access to an equally sized
+ // e8 element type access. Given this, we effectively support all unmasked
+ // misaligned accesses. TODO: Work through the codegen implications of
+ // allowing such accesses to be formed, and considered fast.
+ if (Fast)
+ *Fast = Subtarget.enableUnalignedVectorMem();
+ return Subtarget.enableUnalignedVectorMem();
}
bool RISCVTargetLowering::splitValueIntoRegisterParts(
@@ -14101,9 +17019,10 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts(
unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
bool IsABIRegCopy = CC.has_value();
EVT ValueVT = Val.getValueType();
- if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
- // Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
- // and cast to f32.
+ if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
+ PartVT == MVT::f32) {
+ // Cast the [b]f16 to i16, extend to i32, pad with ones to make a float
+ // nan, and cast to f32.
Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
@@ -14154,13 +17073,14 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
bool IsABIRegCopy = CC.has_value();
- if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
+ if (IsABIRegCopy && (ValueVT == MVT::f16 || ValueVT == MVT::bf16) &&
+ PartVT == MVT::f32) {
SDValue Val = Parts[0];
- // Cast the f32 to i32, truncate to i16, and cast back to f16.
+ // Cast the f32 to i32, truncate to i16, and cast back to [b]f16.
Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
- Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
return Val;
}
@@ -14203,14 +17123,310 @@ bool RISCVTargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
return OptSize && !VT.isVector();
}
-bool RISCVTargetLowering::preferScalarizeSplat(unsigned Opc) const {
+bool RISCVTargetLowering::preferScalarizeSplat(SDNode *N) const {
// Scalarize zero_ext and sign_ext might stop match to widening instruction in
// some situation.
+ unsigned Opc = N->getOpcode();
if (Opc == ISD::ZERO_EXTEND || Opc == ISD::SIGN_EXTEND)
return false;
return true;
}
+static Value *useTpOffset(IRBuilderBase &IRB, unsigned Offset) {
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ Function *ThreadPointerFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
+ return IRB.CreatePointerCast(
+ IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
+ IRB.CreateCall(ThreadPointerFunc), Offset),
+ IRB.getInt8PtrTy()->getPointerTo(0));
+}
+
+Value *RISCVTargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
+ // Fuchsia provides a fixed TLS slot for the stack cookie.
+ // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
+ if (Subtarget.isTargetFuchsia())
+ return useTpOffset(IRB, -0x10);
+
+ return TargetLowering::getIRStackGuard(IRB);
+}
+
+bool RISCVTargetLowering::isLegalInterleavedAccessType(
+ VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
+ const DataLayout &DL) const {
+ EVT VT = getValueType(DL, VTy);
+ // Don't lower vlseg/vsseg for vector types that can't be split.
+ if (!isTypeLegal(VT))
+ return false;
+
+ if (!isLegalElementTypeForRVV(VT.getScalarType()) ||
+ !allowsMemoryAccessForAlignment(VTy->getContext(), DL, VT, AddrSpace,
+ Alignment))
+ return false;
+
+ MVT ContainerVT = VT.getSimpleVT();
+
+ if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
+ if (!Subtarget.useRVVForFixedLengthVectors())
+ return false;
+ // Sometimes the interleaved access pass picks up splats as interleaves of
+ // one element. Don't lower these.
+ if (FVTy->getNumElements() < 2)
+ return false;
+
+ ContainerVT = getContainerForFixedLengthVector(VT.getSimpleVT());
+ }
+
+ // Need to make sure that EMUL * NFIELDS ≤ 8
+ auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(getLMUL(ContainerVT));
+ if (Fractional)
+ return true;
+ return Factor * LMUL <= 8;
+}
+
+bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
+ Align Alignment) const {
+ if (!Subtarget.hasVInstructions())
+ return false;
+
+ // Only support fixed vectors if we know the minimum vector size.
+ if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
+ return false;
+
+ EVT ScalarType = DataType.getScalarType();
+ if (!isLegalElementTypeForRVV(ScalarType))
+ return false;
+
+ if (!Subtarget.enableUnalignedVectorMem() &&
+ Alignment < ScalarType.getStoreSize())
+ return false;
+
+ return true;
+}
+
+static const Intrinsic::ID FixedVlsegIntrIds[] = {
+ Intrinsic::riscv_seg2_load, Intrinsic::riscv_seg3_load,
+ Intrinsic::riscv_seg4_load, Intrinsic::riscv_seg5_load,
+ Intrinsic::riscv_seg6_load, Intrinsic::riscv_seg7_load,
+ Intrinsic::riscv_seg8_load};
+
+/// Lower an interleaved load into a vlsegN intrinsic.
+///
+/// E.g. Lower an interleaved load (Factor = 2):
+/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
+/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
+/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
+///
+/// Into:
+/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
+/// %ptr, i64 4)
+/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
+/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
+bool RISCVTargetLowering::lowerInterleavedLoad(
+ LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
+ ArrayRef<unsigned> Indices, unsigned Factor) const {
+ IRBuilder<> Builder(LI);
+
+ auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
+ if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
+ LI->getPointerAddressSpace(),
+ LI->getModule()->getDataLayout()))
+ return false;
+
+ auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
+
+ Function *VlsegNFunc =
+ Intrinsic::getDeclaration(LI->getModule(), FixedVlsegIntrIds[Factor - 2],
+ {VTy, LI->getPointerOperandType(), XLenTy});
+
+ Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
+
+ CallInst *VlsegN =
+ Builder.CreateCall(VlsegNFunc, {LI->getPointerOperand(), VL});
+
+ for (unsigned i = 0; i < Shuffles.size(); i++) {
+ Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
+ Shuffles[i]->replaceAllUsesWith(SubVec);
+ }
+
+ return true;
+}
+
+static const Intrinsic::ID FixedVssegIntrIds[] = {
+ Intrinsic::riscv_seg2_store, Intrinsic::riscv_seg3_store,
+ Intrinsic::riscv_seg4_store, Intrinsic::riscv_seg5_store,
+ Intrinsic::riscv_seg6_store, Intrinsic::riscv_seg7_store,
+ Intrinsic::riscv_seg8_store};
+
+/// Lower an interleaved store into a vssegN intrinsic.
+///
+/// E.g. Lower an interleaved store (Factor = 3):
+/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
+/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+/// store <12 x i32> %i.vec, <12 x i32>* %ptr
+///
+/// Into:
+/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
+/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
+/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
+/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
+/// %ptr, i32 4)
+///
+/// Note that the new shufflevectors will be removed and we'll only generate one
+/// vsseg3 instruction in CodeGen.
+bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
+ ShuffleVectorInst *SVI,
+ unsigned Factor) const {
+ IRBuilder<> Builder(SI);
+ auto *ShuffleVTy = cast<FixedVectorType>(SVI->getType());
+ // Given SVI : <n*factor x ty>, then VTy : <n x ty>
+ auto *VTy = FixedVectorType::get(ShuffleVTy->getElementType(),
+ ShuffleVTy->getNumElements() / Factor);
+ if (!isLegalInterleavedAccessType(VTy, Factor, SI->getAlign(),
+ SI->getPointerAddressSpace(),
+ SI->getModule()->getDataLayout()))
+ return false;
+
+ auto *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
+
+ Function *VssegNFunc =
+ Intrinsic::getDeclaration(SI->getModule(), FixedVssegIntrIds[Factor - 2],
+ {VTy, SI->getPointerOperandType(), XLenTy});
+
+ auto Mask = SVI->getShuffleMask();
+ SmallVector<Value *, 10> Ops;
+
+ for (unsigned i = 0; i < Factor; i++) {
+ Value *Shuffle = Builder.CreateShuffleVector(
+ SVI->getOperand(0), SVI->getOperand(1),
+ createSequentialMask(Mask[i], VTy->getNumElements(), 0));
+ Ops.push_back(Shuffle);
+ }
+ // This VL should be OK (should be executable in one vsseg instruction,
+ // potentially under larger LMULs) because we checked that the fixed vector
+ // type fits in isLegalInterleavedAccessType
+ Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
+ Ops.append({SI->getPointerOperand(), VL});
+
+ Builder.CreateCall(VssegNFunc, Ops);
+
+ return true;
+}
+
+bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
+ LoadInst *LI) const {
+ assert(LI->isSimple());
+ IRBuilder<> Builder(LI);
+
+ // Only deinterleave2 supported at present.
+ if (DI->getIntrinsicID() != Intrinsic::experimental_vector_deinterleave2)
+ return false;
+
+ unsigned Factor = 2;
+
+ VectorType *VTy = cast<VectorType>(DI->getOperand(0)->getType());
+ VectorType *ResVTy = cast<VectorType>(DI->getType()->getContainedType(0));
+
+ if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
+ LI->getPointerAddressSpace(),
+ LI->getModule()->getDataLayout()))
+ return false;
+
+ Function *VlsegNFunc;
+ Value *VL;
+ Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
+ SmallVector<Value *, 10> Ops;
+
+ if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
+ VlsegNFunc = Intrinsic::getDeclaration(
+ LI->getModule(), FixedVlsegIntrIds[Factor - 2],
+ {ResVTy, LI->getPointerOperandType(), XLenTy});
+ VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
+ } else {
+ static const Intrinsic::ID IntrIds[] = {
+ Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
+ Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
+ Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
+ Intrinsic::riscv_vlseg8};
+
+ VlsegNFunc = Intrinsic::getDeclaration(LI->getModule(), IntrIds[Factor - 2],
+ {ResVTy, XLenTy});
+ VL = Constant::getAllOnesValue(XLenTy);
+ Ops.append(Factor, PoisonValue::get(ResVTy));
+ }
+
+ Ops.append({LI->getPointerOperand(), VL});
+
+ Value *Vlseg = Builder.CreateCall(VlsegNFunc, Ops);
+ DI->replaceAllUsesWith(Vlseg);
+
+ return true;
+}
+
+bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
+ StoreInst *SI) const {
+ assert(SI->isSimple());
+ IRBuilder<> Builder(SI);
+
+ // Only interleave2 supported at present.
+ if (II->getIntrinsicID() != Intrinsic::experimental_vector_interleave2)
+ return false;
+
+ unsigned Factor = 2;
+
+ VectorType *VTy = cast<VectorType>(II->getType());
+ VectorType *InVTy = cast<VectorType>(II->getOperand(0)->getType());
+
+ if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
+ SI->getPointerAddressSpace(),
+ SI->getModule()->getDataLayout()))
+ return false;
+
+ Function *VssegNFunc;
+ Value *VL;
+ Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
+
+ if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
+ VssegNFunc = Intrinsic::getDeclaration(
+ SI->getModule(), FixedVssegIntrIds[Factor - 2],
+ {InVTy, SI->getPointerOperandType(), XLenTy});
+ VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
+ } else {
+ static const Intrinsic::ID IntrIds[] = {
+ Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
+ Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
+ Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
+ Intrinsic::riscv_vsseg8};
+
+ VssegNFunc = Intrinsic::getDeclaration(SI->getModule(), IntrIds[Factor - 2],
+ {InVTy, XLenTy});
+ VL = Constant::getAllOnesValue(XLenTy);
+ }
+
+ Builder.CreateCall(VssegNFunc, {II->getOperand(0), II->getOperand(1),
+ SI->getPointerOperand(), VL});
+
+ return true;
+}
+
+MachineInstr *
+RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator &MBBI,
+ const TargetInstrInfo *TII) const {
+ assert(MBBI->isCall() && MBBI->getCFIType() &&
+ "Invalid call instruction for a KCFI check");
+ assert(is_contained({RISCV::PseudoCALLIndirect, RISCV::PseudoTAILIndirect},
+ MBBI->getOpcode()));
+
+ MachineOperand &Target = MBBI->getOperand(0);
+ Target.setIsRenamable(false);
+
+ return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(RISCV::KCFI_CHECK))
+ .addReg(Target.getReg())
+ .addImm(MBBI->getCFIType())
+ .getInstr();
+}
+
#define GET_REGISTER_MATCHER
#include "RISCVGenAsmMatcher.inc"
@@ -14230,6 +17446,57 @@ RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
return Reg;
}
+MachineMemOperand::Flags
+RISCVTargetLowering::getTargetMMOFlags(const Instruction &I) const {
+ const MDNode *NontemporalInfo = I.getMetadata(LLVMContext::MD_nontemporal);
+
+ if (NontemporalInfo == nullptr)
+ return MachineMemOperand::MONone;
+
+ // 1 for default value work as __RISCV_NTLH_ALL
+ // 2 -> __RISCV_NTLH_INNERMOST_PRIVATE
+ // 3 -> __RISCV_NTLH_ALL_PRIVATE
+ // 4 -> __RISCV_NTLH_INNERMOST_SHARED
+ // 5 -> __RISCV_NTLH_ALL
+ int NontemporalLevel = 5;
+ const MDNode *RISCVNontemporalInfo =
+ I.getMetadata("riscv-nontemporal-domain");
+ if (RISCVNontemporalInfo != nullptr)
+ NontemporalLevel =
+ cast<ConstantInt>(
+ cast<ConstantAsMetadata>(RISCVNontemporalInfo->getOperand(0))
+ ->getValue())
+ ->getZExtValue();
+
+ assert((1 <= NontemporalLevel && NontemporalLevel <= 5) &&
+ "RISC-V target doesn't support this non-temporal domain.");
+
+ NontemporalLevel -= 2;
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
+ if (NontemporalLevel & 0b1)
+ Flags |= MONontemporalBit0;
+ if (NontemporalLevel & 0b10)
+ Flags |= MONontemporalBit1;
+
+ return Flags;
+}
+
+MachineMemOperand::Flags
+RISCVTargetLowering::getTargetMMOFlags(const MemSDNode &Node) const {
+
+ MachineMemOperand::Flags NodeFlags = Node.getMemOperand()->getFlags();
+ MachineMemOperand::Flags TargetFlags = MachineMemOperand::MONone;
+ TargetFlags |= (NodeFlags & MONontemporalBit0);
+ TargetFlags |= (NodeFlags & MONontemporalBit1);
+
+ return TargetFlags;
+}
+
+bool RISCVTargetLowering::areTwoSDNodeTargetMMOFlagsMergeable(
+ const MemSDNode &NodeX, const MemSDNode &NodeY) const {
+ return getTargetMMOFlags(NodeX) == getTargetMMOFlags(NodeY);
+}
+
namespace llvm::RISCVVIntrinsicsTable {
#define GET_RISCVVIntrinsicsTable_IMPL
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index acf92cab3598..164ded95a1b5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -1,4 +1,4 @@
-//===-- RISCVISelLowering.h - RISCV DAG Lowering Interface ------*- C++ -*-===//
+//===-- RISCVISelLowering.h - RISC-V DAG Lowering Interface -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines the interfaces that RISCV uses to lower LLVM code into a
+// This file defines the interfaces that RISC-V uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
@@ -27,10 +27,9 @@ struct RISCVRegisterInfo;
namespace RISCVISD {
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- RET_FLAG,
- URET_FLAG,
- SRET_FLAG,
- MRET_FLAG,
+ RET_GLUE,
+ SRET_GLUE,
+ MRET_GLUE,
CALL,
/// Select with condition operator - This selects between a true value and
/// a false value (ops #3 and #4) based on the boolean result of comparing
@@ -112,6 +111,9 @@ enum NodeType : unsigned {
FCVT_W_RV64,
FCVT_WU_RV64,
+ FP_ROUND_BF16,
+ FP_EXTEND_BF16,
+
// Rounds an FP value to its corresponding integer in the same FP format.
// First operand is the value to round, the second operand is the largest
// integer that can be represented exactly in the FP format. This will be
@@ -119,6 +121,11 @@ enum NodeType : unsigned {
// inserter.
FROUND,
+ FPCLASS,
+
+ // Floating point fmax and fmin matching the RISC-V instruction semantics.
+ FMAX, FMIN,
+
// READ_CYCLE_WIDE - A read of the 64-bit cycle CSR on a 32-bit target
// (returns (Lo, Hi)). It takes a chain operand.
READ_CYCLE_WIDE,
@@ -128,7 +135,18 @@ enum NodeType : unsigned {
ORC_B,
ZIP,
UNZIP,
+
+ // Scalar cryptography
+ CLMUL, CLMULH, CLMULR,
+ SHA256SIG0, SHA256SIG1, SHA256SUM0, SHA256SUM1,
+ SM4KS, SM4ED,
+ SM3P0, SM3P1,
+
// Vector Extension
+ // VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand
+ // for the VL value to be used for the operation. The first operand is
+ // passthru operand.
+ VMV_V_V_VL,
// VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand
// for the VL value to be used for the operation. The first operand is
// passthru operand.
@@ -164,6 +182,12 @@ enum NodeType : unsigned {
// value. The fourth and fifth operands are the mask and VL operands.
VSLIDE1UP_VL,
VSLIDE1DOWN_VL,
+ // Matches the semantics of vfslide1up/vfslide1down. The first operand is
+ // passthru operand, the second is source vector, third is a scalar value
+ // whose type matches the element type of the vectors. The fourth and fifth
+ // operands are the mask and VL operands.
+ VFSLIDE1UP_VL,
+ VFSLIDE1DOWN_VL,
// Matches the semantics of the vid.v instruction, with a mask and VL
// operand.
VID_VL,
@@ -217,6 +241,12 @@ enum NodeType : unsigned {
UMIN_VL,
UMAX_VL,
+ BITREVERSE_VL,
+ BSWAP_VL,
+ CTLZ_VL,
+ CTTZ_VL,
+ CTPOP_VL,
+
SADDSAT_VL,
UADDSAT_VL,
SSUBSAT_VL,
@@ -235,17 +265,18 @@ enum NodeType : unsigned {
FNEG_VL,
FABS_VL,
FSQRT_VL,
+ FCLASS_VL,
FCOPYSIGN_VL, // Has a merge operand
VFCVT_RTZ_X_F_VL,
VFCVT_RTZ_XU_F_VL,
VFCVT_X_F_VL,
VFCVT_XU_F_VL,
VFROUND_NOEXCEPT_VL,
- VFCVT_RM_X_F_VL, // Has a rounding mode operand.
+ VFCVT_RM_X_F_VL, // Has a rounding mode operand.
VFCVT_RM_XU_F_VL, // Has a rounding mode operand.
SINT_TO_FP_VL,
UINT_TO_FP_VL,
- VFCVT_RM_F_X_VL, // Has a rounding mode operand.
+ VFCVT_RM_F_X_VL, // Has a rounding mode operand.
VFCVT_RM_F_XU_VL, // Has a rounding mode operand.
FP_ROUND_VL,
FP_EXTEND_VL,
@@ -256,6 +287,13 @@ enum NodeType : unsigned {
VFMSUB_VL,
VFNMSUB_VL,
+ // Vector widening FMA ops with a mask as a fourth operand and VL as a fifth
+ // operand.
+ VFWMADD_VL,
+ VFWNMADD_VL,
+ VFWMSUB_VL,
+ VFWNMSUB_VL,
+
// Widening instructions with a merge value a third operand, a mask as a
// fourth operand, and VL as a fifth operand.
VWMUL_VL,
@@ -270,6 +308,20 @@ enum NodeType : unsigned {
VWSUB_W_VL,
VWSUBU_W_VL,
+ VFWMUL_VL,
+ VFWADD_VL,
+ VFWSUB_VL,
+ VFWADD_W_VL,
+ VFWSUB_W_VL,
+
+ // Widening ternary operations with a mask as the fourth operand and VL as the
+ // fifth operand.
+ VWMACC_VL,
+ VWMACCU_VL,
+ VWMACCSU_VL,
+
+ // Narrowing logical shift right.
+ // Operands are (source, shift, passthru, mask, vl)
VNSRL_VL,
// Vector compare producing a mask. Fourth operand is input mask. Fifth
@@ -324,18 +376,50 @@ enum NodeType : unsigned {
// the value read before the modification and the new chain pointer.
SWAP_CSR,
+ // Branchless select operations, matching the semantics of the instructions
+ // defined in Zicond or XVentanaCondOps.
+ CZERO_EQZ, // vt.maskc for XVentanaCondOps.
+ CZERO_NEZ, // vt.maskcn for XVentanaCondOps.
+
// FP to 32 bit int conversions for RV64. These are used to keep track of the
// result being sign extended to 64 bit. These saturate out of range inputs.
STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCVT_WU_RV64,
+ STRICT_FADD_VL,
+ STRICT_FSUB_VL,
+ STRICT_FMUL_VL,
+ STRICT_FDIV_VL,
+ STRICT_FSQRT_VL,
+ STRICT_VFMADD_VL,
+ STRICT_VFNMADD_VL,
+ STRICT_VFMSUB_VL,
+ STRICT_VFNMSUB_VL,
+ STRICT_FP_ROUND_VL,
+ STRICT_FP_EXTEND_VL,
+ STRICT_VFNCVT_ROD_VL,
+ STRICT_SINT_TO_FP_VL,
+ STRICT_UINT_TO_FP_VL,
+ STRICT_VFCVT_RM_X_F_VL,
+ STRICT_VFCVT_RTZ_X_F_VL,
+ STRICT_VFCVT_RTZ_XU_F_VL,
+ STRICT_FSETCC_VL,
+ STRICT_FSETCCS_VL,
+ STRICT_VFROUND_NOEXCEPT_VL,
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
// opcodes will be thought as target memory ops!
- // Load address.
- LA = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ // Represents an AUIPC+L[WD] pair. Selected to PseudoLGA.
+ LGA = ISD::FIRST_TARGET_MEMORY_OPCODE,
+ // Load initial exec thread-local address.
LA_TLS_IE,
+
+ TH_LWD,
+ TH_LWUD,
+ TH_LDD,
+ TH_SWD,
+ TH_SDD,
};
} // namespace RISCVISD
@@ -380,6 +464,7 @@ public:
SmallVectorImpl<Use *> &Ops) const override;
bool shouldScalarizeBinop(SDValue VecOp) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
+ int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
@@ -387,7 +472,7 @@ public:
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
- bool preferScalarizeSplat(unsigned Opc) const override;
+ bool preferScalarizeSplat(SDNode *N) const override;
bool softPromoteHalfType() const override { return true; }
@@ -409,7 +494,22 @@ public:
/// should be stack expanded.
bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
- bool hasBitPreservingFPLogic(EVT VT) const override;
+ bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
+ // If the pair to store is a mixture of float and int values, we will
+ // save two bitwise instructions and one float-to-int instruction and
+ // increase one store instruction. There is potentially a more
+ // significant benefit because it avoids the float->int domain switch
+ // for input value. So It is more likely a win.
+ if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
+ (LTy.isInteger() && HTy.isFloatingPoint()))
+ return true;
+ // If the pair only contains int values, we will save two bitwise
+ // instructions and increase one store instruction (costing one more
+ // store buffer). Since the benefit is more blurred we leave such a pair
+ // out until we get testcase to prove it is a win.
+ return false;
+ }
+
bool
shouldExpandBuildVectorWithShuffles(EVT VT,
unsigned DefinedValues) const override;
@@ -440,6 +540,16 @@ public:
// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
+ MachineMemOperand::Flags
+ getTargetMMOFlags(const Instruction &I) const override;
+
+ MachineMemOperand::Flags
+ getTargetMMOFlags(const MemSDNode &Node) const override;
+
+ bool
+ areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX,
+ const MemSDNode &NodeY) const override;
+
ConstraintType getConstraintType(StringRef Constraint) const override;
unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
@@ -462,11 +572,28 @@ public:
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
+ bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
+ bool MathUsed) const override {
+ if (VT == MVT::i8 || VT == MVT::i16)
+ return false;
+
+ return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed);
+ }
+
+ bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
+ unsigned AddrSpace) const override {
+ // If we can replace 4 or more scalar stores, there will be a reduction
+ // in instructions even after we add a vector constant load.
+ return NumElem >= 4;
+ }
+
bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
return VT.isScalarInteger();
}
bool convertSelectOfConstantsToMath(EVT VT) const override { return true; }
+ bool preferZeroCompareBranch() const override { return true; }
+
bool shouldInsertFencesForAtomic(const Instruction *I) const override {
return isa<LoadInst>(I) || isa<StoreInst>(I);
}
@@ -486,6 +613,9 @@ public:
return ISD::SIGN_EXTEND;
}
+ bool shouldTransformSignedTruncationCheck(EVT XVT,
+ unsigned KeptBits) const override;
+
TargetLowering::ShiftLegalizationStrategy
preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
unsigned ExpansionFactor) const override {
@@ -578,6 +708,9 @@ public:
unsigned NumParts, MVT PartVT, EVT ValueVT,
std::optional<CallingConv::ID> CC) const override;
+ // Return the value of VLMax for the given vector type (i.e. SEW and LMUL)
+ SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const;
+
static RISCVII::VLMUL getLMUL(MVT VT);
inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize,
unsigned MinSize) {
@@ -599,7 +732,7 @@ public:
bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
- bool isLegalElementTypeForRVV(Type *ScalarTy) const;
+ bool isLegalElementTypeForRVV(EVT ScalarTy) const;
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
@@ -612,13 +745,58 @@ public:
bool isVScaleKnownToBeAPowerOfTwo() const override;
+ bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM, bool &IsInc,
+ SelectionDAG &DAG) const;
+ bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
+ bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
+ SDValue &Offset, ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const override;
+
bool isLegalScaleForGatherScatter(uint64_t Scale,
uint64_t ElemSize) const override {
// Scaled addressing not supported on indexed load/stores
return Scale == 1;
}
-private:
+ /// If the target has a standard location for the stack protector cookie,
+ /// returns the address of that location. Otherwise, returns nullptr.
+ Value *getIRStackGuard(IRBuilderBase &IRB) const override;
+
+ /// Returns whether or not generating a interleaved load/store intrinsic for
+ /// this type will be legal.
+ bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
+ Align Alignment, unsigned AddrSpace,
+ const DataLayout &) const;
+
+ /// Return true if a stride load store of the given result type and
+ /// alignment is legal.
+ bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const;
+
+ unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
+
+ bool lowerInterleavedLoad(LoadInst *LI,
+ ArrayRef<ShuffleVectorInst *> Shuffles,
+ ArrayRef<unsigned> Indices,
+ unsigned Factor) const override;
+
+ bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
+ unsigned Factor) const override;
+
+ bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II,
+ LoadInst *LI) const override;
+
+ bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
+ StoreInst *SI) const override;
+
+ bool supportKCFIBundles() const override { return true; }
+
+ MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator &MBBI,
+ const TargetInstrInfo *TII) const override;
+
/// RISCVCCAssignFn - This target-specific function extends the default
/// CCValAssign with additional information used to lower RISC-V calling
/// conventions.
@@ -630,6 +808,7 @@ private:
const RISCVTargetLowering &TLI,
std::optional<unsigned> FirstMaskArgument);
+private:
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
RISCVCCAssignFn Fn) const;
@@ -639,7 +818,8 @@ private:
RISCVCCAssignFn Fn) const;
template <class NodeTy>
- SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true) const;
+ SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true,
+ bool IsExternWeak = false) const;
SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
bool UseGOT) const;
SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
@@ -675,6 +855,8 @@ private:
SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
@@ -688,14 +870,10 @@ private:
SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerFixedLengthVectorLogicOpToRVV(SDValue Op, SelectionDAG &DAG,
- unsigned MaskOpc,
- unsigned VecOpc) const;
- SDValue lowerFixedLengthVectorShiftToRVV(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op,
SelectionDAG &DAG) const;
- SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc,
- bool HasMergeOp = false, bool HasMask = true) const;
+ SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc,
bool HasMergeOp = false) const;
SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc,
@@ -714,6 +892,10 @@ private:
SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const;
+
SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
@@ -731,6 +913,9 @@ private:
MVT getVPExplicitVectorLengthTy() const override;
+ bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF,
+ bool IsScalable) const override;
+
/// RVV code generation for fixed length vectors does not lower all
/// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
/// merge. However, merging them creates a BUILD_VECTOR that is just as
@@ -742,7 +927,7 @@ private:
/// Disable normalizing
/// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
/// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
- /// RISCV doesn't have flags so it's better to perform the and/or in a GPR.
+ /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR.
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override {
return false;
};
@@ -751,6 +936,26 @@ private:
/// faster than two FDIVs.
unsigned combineRepeatedFPDivisors() const override;
};
+
+namespace RISCV {
+
+bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
+ MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
+ bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
+ std::optional<unsigned> FirstMaskArgument);
+
+bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
+ MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
+ bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
+ std::optional<unsigned> FirstMaskArgument);
+
+bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
+ CCState &State);
+} // end namespace RISCV
+
namespace RISCVVIntrinsicsTable {
struct RISCVVIntrinsicInfo {
@@ -771,6 +976,7 @@ using namespace RISCV;
#define GET_RISCVVIntrinsicsTable_DECL
#include "RISCVGenSearchableTables.inc"
+#undef GET_RISCVVIntrinsicsTable_DECL
} // end namespace RISCVVIntrinsicsTable
diff --git a/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
new file mode 100644
index 000000000000..4b26c27bb4f8
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInsertReadWriteCSR.cpp
@@ -0,0 +1,135 @@
+//===-- RISCVInsertReadWriteCSR.cpp - Insert Read/Write of RISC-V CSR -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file implements the machine function pass to insert read/write of CSR-s
+// of the RISC-V instructions.
+//
+// Currently the pass implements naive insertion of a write to vxrm before an
+// RVV fixed-point instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-insert-read-write-csr"
+#define RISCV_INSERT_READ_WRITE_CSR_NAME "RISC-V Insert Read/Write CSR Pass"
+
+namespace {
+
+class RISCVInsertReadWriteCSR : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+
+public:
+ static char ID;
+
+ RISCVInsertReadWriteCSR() : MachineFunctionPass(ID) {
+ initializeRISCVInsertReadWriteCSRPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override {
+ return RISCV_INSERT_READ_WRITE_CSR_NAME;
+ }
+
+private:
+ bool emitWriteRoundingMode(MachineBasicBlock &MBB);
+};
+
+} // end anonymous namespace
+
+char RISCVInsertReadWriteCSR::ID = 0;
+
+INITIALIZE_PASS(RISCVInsertReadWriteCSR, DEBUG_TYPE,
+ RISCV_INSERT_READ_WRITE_CSR_NAME, false, false)
+
+// Returns the index to the rounding mode immediate value if any, otherwise the
+// function will return None.
+static std::optional<unsigned> getRoundModeIdx(const MachineInstr &MI) {
+ uint64_t TSFlags = MI.getDesc().TSFlags;
+ if (!RISCVII::hasRoundModeOp(TSFlags))
+ return std::nullopt;
+
+ // The operand order
+ // -------------------------------------
+ // | n-1 (if any) | n-2 | n-3 | n-4 |
+ // | policy | sew | vl | rm |
+ // -------------------------------------
+ return MI.getNumExplicitOperands() - RISCVII::hasVecPolicyOp(TSFlags) - 3;
+}
+
+// This function inserts a write to vxrm when encountering an RVV fixed-point
+// instruction.
+bool RISCVInsertReadWriteCSR::emitWriteRoundingMode(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineInstr &MI : MBB) {
+ if (auto RoundModeIdx = getRoundModeIdx(MI)) {
+ if (RISCVII::usesVXRM(MI.getDesc().TSFlags)) {
+ unsigned VXRMImm = MI.getOperand(*RoundModeIdx).getImm();
+
+ Changed = true;
+
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::WriteVXRMImm))
+ .addImm(VXRMImm);
+ MI.addOperand(MachineOperand::CreateReg(RISCV::VXRM, /*IsDef*/ false,
+ /*IsImp*/ true));
+ } else { // FRM
+ unsigned FRMImm = MI.getOperand(*RoundModeIdx).getImm();
+
+ // The value is a hint to this pass to not alter the frm value.
+ if (FRMImm == RISCVFPRndMode::DYN)
+ continue;
+
+ Changed = true;
+
+ // Save
+ MachineRegisterInfo *MRI = &MBB.getParent()->getRegInfo();
+ Register SavedFRM = MRI->createVirtualRegister(&RISCV::GPRRegClass);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(RISCV::SwapFRMImm),
+ SavedFRM)
+ .addImm(FRMImm);
+ MI.addOperand(MachineOperand::CreateReg(RISCV::FRM, /*IsDef*/ false,
+ /*IsImp*/ true));
+ // Restore
+ MachineInstrBuilder MIB =
+ BuildMI(*MBB.getParent(), {}, TII->get(RISCV::WriteFRM))
+ .addReg(SavedFRM);
+ MBB.insertAfter(MI, MIB);
+ }
+ }
+ }
+ return Changed;
+}
+
+bool RISCVInsertReadWriteCSR::runOnMachineFunction(MachineFunction &MF) {
+ // Skip if the vector extension is not enabled.
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+ if (!ST.hasVInstructions())
+ return false;
+
+ TII = ST.getInstrInfo();
+
+ bool Changed = false;
+
+ for (MachineBasicBlock &MBB : MF)
+ Changed |= emitWriteRoundingMode(MBB);
+
+ return Changed;
+}
+
+FunctionPass *llvm::createRISCVInsertReadWriteCSRPass() {
+ return new RISCVInsertReadWriteCSR();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 115c9622219f..f1ebe63cfa14 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -32,7 +32,7 @@
using namespace llvm;
#define DEBUG_TYPE "riscv-insert-vsetvli"
-#define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
+#define RISCV_INSERT_VSETVLI_NAME "RISC-V Insert VSETVLI pass"
static cl::opt<bool> DisableInsertVSETVLPHIOpt(
"riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
@@ -85,6 +85,29 @@ static bool isScalarMoveInstr(const MachineInstr &MI) {
}
}
+static bool isScalarSplatInstr(const MachineInstr &MI) {
+ switch (getRVVMCOpcode(MI.getOpcode())) {
+ default:
+ return false;
+ case RISCV::VMV_V_I:
+ case RISCV::VMV_V_X:
+ case RISCV::VFMV_V_F:
+ return true;
+ }
+}
+
+static bool isVSlideInstr(const MachineInstr &MI) {
+ switch (getRVVMCOpcode(MI.getOpcode())) {
+ default:
+ return false;
+ case RISCV::VSLIDEDOWN_VX:
+ case RISCV::VSLIDEDOWN_VI:
+ case RISCV::VSLIDEUP_VX:
+ case RISCV::VSLIDEUP_VI:
+ return true;
+ }
+}
+
/// Get the EEW for a load or store instruction. Return std::nullopt if MI is
/// not a load or store which ignores SEW.
static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
@@ -124,6 +147,38 @@ static bool isMaskRegOp(const MachineInstr &MI) {
return Log2SEW == 0;
}
+/// Return true if the inactive elements in the result are entirely undefined.
+/// Note that this is different from "agnostic" as defined by the vector
+/// specification. Agnostic requires each lane to either be undisturbed, or
+/// take the value -1; no other value is allowed.
+static bool hasUndefinedMergeOp(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) {
+
+ unsigned UseOpIdx;
+ if (!MI.isRegTiedToUseOperand(0, &UseOpIdx))
+ // If there is no passthrough operand, then the pass through
+ // lanes are undefined.
+ return true;
+
+ // If the tied operand is an IMPLICIT_DEF (or a REG_SEQUENCE whose operands
+ // are solely IMPLICIT_DEFS), the pass through lanes are undefined.
+ const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
+ if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) {
+ if (UseMI->isImplicitDef())
+ return true;
+
+ if (UseMI->isRegSequence()) {
+ for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) {
+ MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg());
+ if (!SourceMI || !SourceMI->isImplicitDef())
+ return false;
+ }
+ return true;
+ }
+ }
+ return false;
+}
+
/// Which subfields of VL or VTYPE have values we need to preserve?
struct DemandedFields {
// Some unknown property of VL is used. If demanded, must preserve entire
@@ -131,7 +186,13 @@ struct DemandedFields {
bool VLAny = false;
// Only zero vs non-zero is used. If demanded, can change non-zero values.
bool VLZeroness = false;
- bool SEW = false;
+ // What properties of SEW we need to preserve.
+ enum : uint8_t {
+ SEWEqual = 2, // The exact value of SEW needs to be preserved.
+ SEWGreaterThanOrEqual = 1, // SEW can be changed as long as it's greater
+ // than or equal to the original value.
+ SEWNone = 0 // We don't need to preserve SEW at all.
+ } SEW = SEWNone;
bool LMUL = false;
bool SEWLMULRatio = false;
bool TailPolicy = false;
@@ -149,7 +210,7 @@ struct DemandedFields {
// Mark all VTYPE subfields and properties as demanded
void demandVTYPE() {
- SEW = true;
+ SEW = SEWEqual;
LMUL = true;
SEWLMULRatio = true;
TailPolicy = true;
@@ -174,7 +235,19 @@ struct DemandedFields {
OS << "{";
OS << "VLAny=" << VLAny << ", ";
OS << "VLZeroness=" << VLZeroness << ", ";
- OS << "SEW=" << SEW << ", ";
+ OS << "SEW=";
+ switch (SEW) {
+ case SEWEqual:
+ OS << "SEWEqual";
+ break;
+ case SEWGreaterThanOrEqual:
+ OS << "SEWGreaterThanOrEqual";
+ break;
+ case SEWNone:
+ OS << "SEWNone";
+ break;
+ };
+ OS << ", ";
OS << "LMUL=" << LMUL << ", ";
OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
OS << "TailPolicy=" << TailPolicy << ", ";
@@ -192,41 +265,44 @@ inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
}
#endif
-
-/// Return true if the two values of the VTYPE register provided are
-/// indistinguishable from the perspective of an instruction (or set of
-/// instructions) which use only the Used subfields and properties.
-static bool areCompatibleVTYPEs(uint64_t VType1,
- uint64_t VType2,
+/// Return true if moving from CurVType to NewVType is
+/// indistinguishable from the perspective of an instruction (or set
+/// of instructions) which use only the Used subfields and properties.
+static bool areCompatibleVTYPEs(uint64_t CurVType, uint64_t NewVType,
const DemandedFields &Used) {
- if (Used.SEW &&
- RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2))
+ if (Used.SEW == DemandedFields::SEWEqual &&
+ RISCVVType::getSEW(CurVType) != RISCVVType::getSEW(NewVType))
+ return false;
+
+ if (Used.SEW == DemandedFields::SEWGreaterThanOrEqual &&
+ RISCVVType::getSEW(NewVType) < RISCVVType::getSEW(CurVType))
return false;
if (Used.LMUL &&
- RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2))
+ RISCVVType::getVLMUL(CurVType) != RISCVVType::getVLMUL(NewVType))
return false;
if (Used.SEWLMULRatio) {
- auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType1),
- RISCVVType::getVLMUL(VType1));
- auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType2),
- RISCVVType::getVLMUL(VType2));
+ auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(CurVType),
+ RISCVVType::getVLMUL(CurVType));
+ auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(NewVType),
+ RISCVVType::getVLMUL(NewVType));
if (Ratio1 != Ratio2)
return false;
}
- if (Used.TailPolicy &&
- RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2))
+ if (Used.TailPolicy && RISCVVType::isTailAgnostic(CurVType) !=
+ RISCVVType::isTailAgnostic(NewVType))
return false;
- if (Used.MaskPolicy &&
- RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2))
+ if (Used.MaskPolicy && RISCVVType::isMaskAgnostic(CurVType) !=
+ RISCVVType::isMaskAgnostic(NewVType))
return false;
return true;
}
/// Return the fields and properties demanded by the provided instruction.
-static DemandedFields getDemanded(const MachineInstr &MI) {
+DemandedFields getDemanded(const MachineInstr &MI,
+ const MachineRegisterInfo *MRI) {
// Warning: This function has to work on both the lowered (i.e. post
// emitVSETVLIs) and pre-lowering forms. The main implication of this is
// that it can't use the value of a SEW, VL, or Policy operand as they might
@@ -236,7 +312,7 @@ static DemandedFields getDemanded(const MachineInstr &MI) {
DemandedFields Res;
// Start conservative if registers are used
if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
- Res.demandVL();;
+ Res.demandVL();
if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
Res.demandVTYPE();
// Start conservative on the unlowered form too
@@ -258,7 +334,7 @@ static DemandedFields getDemanded(const MachineInstr &MI) {
// Note: We assume that the instructions initial SEW is the EEW encoded
// in the opcode. This is asserted when constructing the VSETVLIInfo.
if (getEEWForLoadStore(MI)) {
- Res.SEW = false;
+ Res.SEW = DemandedFields::SEWNone;
Res.LMUL = false;
}
@@ -273,7 +349,7 @@ static DemandedFields getDemanded(const MachineInstr &MI) {
// * Probably ok if available VLMax is larger than demanded
// * The policy bits can probably be ignored..
if (isMaskRegOp(MI)) {
- Res.SEW = false;
+ Res.SEW = DemandedFields::SEWNone;
Res.LMUL = false;
}
@@ -282,6 +358,16 @@ static DemandedFields getDemanded(const MachineInstr &MI) {
Res.LMUL = false;
Res.SEWLMULRatio = false;
Res.VLAny = false;
+ // For vmv.s.x and vfmv.s.f, if the merge operand is *undefined*, we don't
+ // need to preserve any other bits and are thus compatible with any larger,
+ // etype and can disregard policy bits. Warning: It's tempting to try doing
+ // this for any tail agnostic operation, but we can't as TA requires
+ // tail lanes to either be the original value or -1. We are writing
+ // unknown bits to the lanes here.
+ if (hasUndefinedMergeOp(MI, *MRI)) {
+ Res.SEW = DemandedFields::SEWGreaterThanOrEqual;
+ Res.TailPolicy = false;
+ }
}
return Res;
@@ -348,18 +434,28 @@ public:
unsigned getSEW() const { return SEW; }
RISCVII::VLMUL getVLMUL() const { return VLMul; }
- bool hasNonZeroAVL() const {
+ bool hasNonZeroAVL(const MachineRegisterInfo &MRI) const {
if (hasAVLImm())
return getAVLImm() > 0;
- if (hasAVLReg())
- return getAVLReg() == RISCV::X0;
+ if (hasAVLReg()) {
+ if (getAVLReg() == RISCV::X0)
+ return true;
+ if (MachineInstr *MI = MRI.getVRegDef(getAVLReg());
+ MI && MI->getOpcode() == RISCV::ADDI &&
+ MI->getOperand(1).isReg() && MI->getOperand(2).isImm() &&
+ MI->getOperand(1).getReg() == RISCV::X0 &&
+ MI->getOperand(2).getImm() != 0)
+ return true;
+ return false;
+ }
return false;
}
- bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const {
+ bool hasEquallyZeroAVL(const VSETVLIInfo &Other,
+ const MachineRegisterInfo &MRI) const {
if (hasSameAVL(Other))
return true;
- return (hasNonZeroAVL() && Other.hasNonZeroAVL());
+ return (hasNonZeroAVL(MRI) && Other.hasNonZeroAVL(MRI));
}
bool hasSameAVL(const VSETVLIInfo &Other) const {
@@ -429,13 +525,14 @@ public:
bool hasCompatibleVTYPE(const DemandedFields &Used,
const VSETVLIInfo &Require) const {
- return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
+ return areCompatibleVTYPEs(Require.encodeVTYPE(), encodeVTYPE(), Used);
}
// Determine whether the vector instructions requirements represented by
// Require are compatible with the previous vsetvli instruction represented
// by this. MI is the instruction whose requirements we're considering.
- bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require) const {
+ bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require,
+ const MachineRegisterInfo &MRI) const {
assert(isValid() && Require.isValid() &&
"Can't compare invalid VSETVLIInfos");
assert(!Require.SEWLMULRatioOnly &&
@@ -457,10 +554,10 @@ public:
if (Used.VLAny && !hasSameAVL(Require))
return false;
- if (Used.VLZeroness && !hasEquallyZeroAVL(Require))
+ if (Used.VLZeroness && !hasEquallyZeroAVL(Require, MRI))
return false;
- return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
+ return hasCompatibleVTYPE(Used, Require);
}
bool operator==(const VSETVLIInfo &Other) const {
@@ -636,9 +733,9 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
const MachineRegisterInfo *MRI) {
VSETVLIInfo InstrInfo;
- bool TailAgnostic, MaskAgnostic;
- unsigned UseOpIdx;
- if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
+ bool TailAgnostic = true;
+ bool MaskAgnostic = true;
+ if (!hasUndefinedMergeOp(MI, *MRI)) {
// Start with undisturbed.
TailAgnostic = false;
MaskAgnostic = false;
@@ -653,14 +750,6 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
}
- // If the tied operand is an IMPLICIT_DEF we can use TailAgnostic and
- // MaskAgnostic.
- const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
- MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
- if (UseMI && UseMI->isImplicitDef()) {
- TailAgnostic = true;
- MaskAgnostic = true;
- }
// Some pseudo instructions force a tail agnostic policy despite having a
// tied def.
if (RISCVII::doesForceTailAgnostic(TSFlags))
@@ -668,12 +757,6 @@ static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
if (!RISCVII::usesMaskPolicy(TSFlags))
MaskAgnostic = true;
- } else {
- // If there is no tied operand,, there shouldn't be a policy operand.
- assert(!RISCVII::hasVecPolicyOp(TSFlags) && "Unexpected policy operand");
- // No tied operand use agnostic policies.
- TailAgnostic = true;
- MaskAgnostic = true;
}
RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
@@ -715,20 +798,60 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
}
+// Return a VSETVLIInfo representing the changes made by this VSETVLI or
+// VSETIVLI instruction.
+static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
+ VSETVLIInfo NewInfo;
+ if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
+ NewInfo.setAVLImm(MI.getOperand(1).getImm());
+ } else {
+ assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
+ MI.getOpcode() == RISCV::PseudoVSETVLIX0);
+ Register AVLReg = MI.getOperand(1).getReg();
+ assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
+ "Can't handle X0, X0 vsetvli yet");
+ NewInfo.setAVLReg(AVLReg);
+ }
+ NewInfo.setVTYPE(MI.getOperand(2).getImm());
+
+ return NewInfo;
+}
+
void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPt, DebugLoc DL,
const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
- // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
- // VLMAX.
- if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
- Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
- BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
- .addReg(RISCV::X0, RegState::Define | RegState::Dead)
- .addReg(RISCV::X0, RegState::Kill)
- .addImm(Info.encodeVTYPE())
- .addReg(RISCV::VL, RegState::Implicit);
- return;
+ if (PrevInfo.isValid() && !PrevInfo.isUnknown()) {
+ // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
+ // VLMAX.
+ if (Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
+ BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(Info.encodeVTYPE())
+ .addReg(RISCV::VL, RegState::Implicit);
+ return;
+ }
+
+ // If our AVL is a virtual register, it might be defined by a VSET(I)VLI. If
+ // it has the same VLMAX we want and the last VL/VTYPE we observed is the
+ // same, we can use the X0, X0 form.
+ if (Info.hasSameVLMAX(PrevInfo) && Info.hasAVLReg() &&
+ Info.getAVLReg().isVirtual()) {
+ if (MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg())) {
+ if (isVectorConfigInstr(*DefMI)) {
+ VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
+ if (DefInfo.hasSameAVL(PrevInfo) && DefInfo.hasSameVLMAX(PrevInfo)) {
+ BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
+ .addReg(RISCV::X0, RegState::Define | RegState::Dead)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(Info.encodeVTYPE())
+ .addReg(RISCV::VL, RegState::Implicit);
+ return;
+ }
+ }
+ }
+ }
}
if (Info.hasAVLImm()) {
@@ -778,23 +901,9 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
.addImm(Info.encodeVTYPE());
}
-// Return a VSETVLIInfo representing the changes made by this VSETVLI or
-// VSETIVLI instruction.
-static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
- VSETVLIInfo NewInfo;
- if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
- NewInfo.setAVLImm(MI.getOperand(1).getImm());
- } else {
- assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
- MI.getOpcode() == RISCV::PseudoVSETVLIX0);
- Register AVLReg = MI.getOperand(1).getReg();
- assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
- "Can't handle X0, X0 vsetvli yet");
- NewInfo.setAVLReg(AVLReg);
- }
- NewInfo.setVTYPE(MI.getOperand(2).getImm());
-
- return NewInfo;
+static bool isLMUL1OrSmaller(RISCVII::VLMUL LMUL) {
+ auto [LMul, Fractional] = RISCVVType::decodeVLMUL(LMUL);
+ return Fractional || LMul == 1;
}
/// Return true if a VSETVLI is required to transition from CurInfo to Require
@@ -807,24 +916,39 @@ bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
return true;
- DemandedFields Used = getDemanded(MI);
-
- if (isScalarMoveInstr(MI)) {
- // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand, we don't
- // need to preserve any other bits and are thus compatible with any larger,
- // etype and can disregard policy bits. Warning: It's tempting to try doing
- // this for any tail agnostic operation, but we can't as TA requires
- // tail lanes to either be the original value or -1. We are writing
- // unknown bits to the lanes here.
- auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
- if (VRegDef && VRegDef->isImplicitDef() &&
- CurInfo.getSEW() >= Require.getSEW()) {
- Used.SEW = false;
- Used.TailPolicy = false;
- }
- }
-
- if (CurInfo.isCompatible(Used, Require))
+ DemandedFields Used = getDemanded(MI, MRI);
+
+ // A slidedown/slideup with an *undefined* merge op can freely clobber
+ // elements not copied from the source vector (e.g. masked off, tail, or
+ // slideup's prefix). Notes:
+ // * We can't modify SEW here since the slide amount is in units of SEW.
+ // * VL=1 is special only because we have existing support for zero vs
+ // non-zero VL. We could generalize this if we had a VL > C predicate.
+ // * The LMUL1 restriction is for machines whose latency may depend on VL.
+ // * As above, this is only legal for tail "undefined" not "agnostic".
+ if (isVSlideInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
+ isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
+ Used.VLAny = false;
+ Used.VLZeroness = true;
+ Used.LMUL = false;
+ Used.TailPolicy = false;
+ }
+
+ // A tail undefined vmv.v.i/x or vfmv.v.f with VL=1 can be treated in the same
+ // semantically as vmv.s.x. This is particularly useful since we don't have an
+ // immediate form of vmv.s.x, and thus frequently use vmv.v.i in it's place.
+ // Since a splat is non-constant time in LMUL, we do need to be careful to not
+ // increase the number of active vector registers (unlike for vmv.s.x.)
+ if (isScalarSplatInstr(MI) && Require.hasAVLImm() && Require.getAVLImm() == 1 &&
+ isLMUL1OrSmaller(CurInfo.getVLMUL()) && hasUndefinedMergeOp(MI, *MRI)) {
+ Used.LMUL = false;
+ Used.SEWLMULRatio = false;
+ Used.VLAny = false;
+ Used.SEW = DemandedFields::SEWGreaterThanOrEqual;
+ Used.TailPolicy = false;
+ }
+
+ if (CurInfo.isCompatible(Used, Require, *MRI))
return false;
// We didn't find a compatible value. If our AVL is a virtual register,
@@ -872,7 +996,7 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &M
// prevent extending live range of an avl register operand.
// TODO: We can probably relax this for immediates.
if (isScalarMoveInstr(MI) && PrevInfo.isValid() &&
- PrevInfo.hasEquallyZeroAVL(Info) &&
+ PrevInfo.hasEquallyZeroAVL(Info, *MRI) &&
Info.hasSameVLMAX(PrevInfo)) {
if (PrevInfo.hasAVLImm())
Info.setAVLImm(PrevInfo.getAVLImm());
@@ -1188,27 +1312,40 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
if (!hasFixedResult(AvailableInfo, ST))
return;
- // Does it actually let us remove an implicit transition in MBB?
- bool Found = false;
- for (auto &MI : MBB) {
- if (isVectorConfigInstr(MI))
- return;
-
- const uint64_t TSFlags = MI.getDesc().TSFlags;
- if (RISCVII::hasSEWOp(TSFlags)) {
- if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI))
- return;
- Found = true;
+ // Model the effect of changing the input state of the block MBB to
+ // AvailableInfo. We're looking for two issues here; one legality,
+ // one profitability.
+ // 1) If the block doesn't use some of the fields from VL or VTYPE, we
+ // may hit the end of the block with a different end state. We can
+ // not make this change without reflowing later blocks as well.
+ // 2) If we don't actually remove a transition, inserting a vsetvli
+ // into the predecessor block would be correct, but unprofitable.
+ VSETVLIInfo OldInfo = BlockInfo[MBB.getNumber()].Pred;
+ VSETVLIInfo CurInfo = AvailableInfo;
+ int TransitionsRemoved = 0;
+ for (const MachineInstr &MI : MBB) {
+ const VSETVLIInfo LastInfo = CurInfo;
+ const VSETVLIInfo LastOldInfo = OldInfo;
+ transferBefore(CurInfo, MI);
+ transferBefore(OldInfo, MI);
+ if (CurInfo == LastInfo)
+ TransitionsRemoved++;
+ if (LastOldInfo == OldInfo)
+ TransitionsRemoved--;
+ transferAfter(CurInfo, MI);
+ transferAfter(OldInfo, MI);
+ if (CurInfo == OldInfo)
+ // Convergence. All transitions after this must match by construction.
break;
- }
}
- if (!Found)
+ if (CurInfo != OldInfo || TransitionsRemoved <= 0)
+ // Issues 1 and 2 above
return;
// Finally, update both data flow state and insert the actual vsetvli.
// Doing both keeps the code in sync with the dataflow results, which
// is critical for correctness of phase 3.
- auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit;
+ auto OldExit = BlockInfo[UnavailablePred->getNumber()].Exit;
LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
<< UnavailablePred->getName() << " with state "
<< AvailableInfo << "\n");
@@ -1220,13 +1357,13 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
auto InsertPt = UnavailablePred->getFirstInstrTerminator();
insertVSETVLI(*UnavailablePred, InsertPt,
UnavailablePred->findDebugLoc(InsertPt),
- AvailableInfo, OldInfo);
+ AvailableInfo, OldExit);
}
static void doUnion(DemandedFields &A, DemandedFields B) {
A.VLAny |= B.VLAny;
A.VLZeroness |= B.VLZeroness;
- A.SEW |= B.SEW;
+ A.SEW = std::max(A.SEW, B.SEW);
A.LMUL |= B.LMUL;
A.SEWLMULRatio |= B.SEWLMULRatio;
A.TailPolicy |= B.TailPolicy;
@@ -1296,7 +1433,7 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
if (!isVectorConfigInstr(MI)) {
- doUnion(Used, getDemanded(MI));
+ doUnion(Used, getDemanded(MI, MRI));
continue;
}
@@ -1319,12 +1456,16 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
MI.setDesc(NextMI->getDesc());
}
MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
- ToDelete.push_back(NextMI);
+ // Don't delete a vsetvli if its result might be used.
+ Register NextVRefDef = NextMI->getOperand(0).getReg();
+ if (NextVRefDef == RISCV::X0 ||
+ (NextVRefDef.isVirtual() && MRI->use_nodbg_empty(NextVRefDef)))
+ ToDelete.push_back(NextMI);
// fallthrough
}
}
NextMI = &MI;
- Used = getDemanded(MI);
+ Used = getDemanded(MI, MRI);
}
for (auto *MI : ToDelete)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
index 3a494a5e3b58..504952b6bd2f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -1,4 +1,4 @@
-//===-- RISCVInstrFormats.td - RISCV Instruction Formats ---*- tablegen -*-===//
+//===-- RISCVInstrFormats.td - RISC-V Instruction Formats --*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -47,7 +47,12 @@ def InstFormatCS : InstFormat<13>;
def InstFormatCA : InstFormat<14>;
def InstFormatCB : InstFormat<15>;
def InstFormatCJ : InstFormat<16>;
-def InstFormatOther : InstFormat<17>;
+def InstFormatCU : InstFormat<17>;
+def InstFormatCLB : InstFormat<18>;
+def InstFormatCLH : InstFormat<19>;
+def InstFormatCSB : InstFormat<20>;
+def InstFormatCSH : InstFormat<21>;
+def InstFormatOther : InstFormat<22>;
class RISCVVConstraint<bits<3> val> {
bits<3> Value = val;
@@ -148,28 +153,16 @@ def OPC_BRANCH : RISCVOpcode<"BRANCH", 0b1100011>;
def OPC_JALR : RISCVOpcode<"JALR", 0b1100111>;
def OPC_JAL : RISCVOpcode<"JAL", 0b1101111>;
def OPC_SYSTEM : RISCVOpcode<"SYSTEM", 0b1110011>;
+def OPC_OP_P : RISCVOpcode<"OP_P", 0b1110111>;
def OPC_CUSTOM_3 : RISCVOpcode<"CUSTOM_3", 0b1111011>;
-class RVInst<dag outs, dag ins, string opcodestr, string argstr,
- list<dag> pattern, InstFormat format>
- : Instruction {
- field bits<32> Inst;
- // SoftFail is a field the disassembler can use to provide a way for
- // instructions to not match without killing the whole decode process. It is
- // mainly used for ARM, but Tablegen expects this field to exist or it fails
- // to build the decode table.
- field bits<32> SoftFail = 0;
- let Size = 4;
-
- bits<7> Opcode = 0;
-
- let Inst{6-0} = Opcode;
-
+class RVInstCommon<dag outs, dag ins, string opcodestr, string argstr,
+ list<dag> pattern, InstFormat format> : Instruction {
let Namespace = "RISCV";
dag OutOperandList = outs;
dag InOperandList = ins;
- let AsmString = opcodestr # "\t" # argstr;
+ let AsmString = opcodestr # !if(!empty(argstr), "", "\t" # argstr);
let Pattern = pattern;
let TSFlags{4-0} = format.Value;
@@ -181,36 +174,56 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr,
bits<3> VLMul = 0;
let TSFlags{10-8} = VLMul;
- bit HasDummyMask = 0;
- let TSFlags{11} = HasDummyMask;
-
bit ForceTailAgnostic = false;
- let TSFlags{12} = ForceTailAgnostic;
+ let TSFlags{11} = ForceTailAgnostic;
- bit HasMergeOp = 0;
- let TSFlags{13} = HasMergeOp;
+ bit IsTiedPseudo = 0;
+ let TSFlags{12} = IsTiedPseudo;
bit HasSEWOp = 0;
- let TSFlags{14} = HasSEWOp;
+ let TSFlags{13} = HasSEWOp;
bit HasVLOp = 0;
- let TSFlags{15} = HasVLOp;
+ let TSFlags{14} = HasVLOp;
bit HasVecPolicyOp = 0;
- let TSFlags{16} = HasVecPolicyOp;
+ let TSFlags{15} = HasVecPolicyOp;
bit IsRVVWideningReduction = 0;
- let TSFlags{17} = IsRVVWideningReduction;
+ let TSFlags{16} = IsRVVWideningReduction;
bit UsesMaskPolicy = 0;
- let TSFlags{18} = UsesMaskPolicy;
+ let TSFlags{17} = UsesMaskPolicy;
// Indicates that the result can be considered sign extended from bit 31. Some
// instructions with this flag aren't W instructions, but are either sign
// extended from a smaller size, always outputs a small integer, or put zeros
// in bits 63:31. Used by the SExtWRemoval pass.
bit IsSignExtendingOpW = 0;
- let TSFlags{19} = IsSignExtendingOpW;
+ let TSFlags{18} = IsSignExtendingOpW;
+
+ bit HasRoundModeOp = 0;
+ let TSFlags{19} = HasRoundModeOp;
+
+ // This is only valid when HasRoundModeOp is set to 1. HasRoundModeOp is set
+ // to 1 for vector fixed-point or floating-point intrinsics. This bit is
+ // processed under pass 'RISCVInsertReadWriteCSR' pass to distinguish between
+ // fixed-point / floating-point instructions and emit appropriate read/write
+ // to the correct CSR.
+ bit UsesVXRM = 0;
+ let TSFlags{20} = UsesVXRM;
+}
+
+class RVInst<dag outs, dag ins, string opcodestr, string argstr,
+ list<dag> pattern, InstFormat format>
+ : RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> {
+ field bits<32> Inst;
+ // SoftFail is a field the disassembler can use to provide a way for
+ // instructions to not match without killing the whole decode process. It is
+ // mainly used for ARM, but Tablegen expects this field to exist or it fails
+ // to build the decode table.
+ field bits<32> SoftFail = 0;
+ let Size = 4;
}
// Pseudo instructions
@@ -220,7 +233,7 @@ class Pseudo<dag outs, dag ins, list<dag> pattern, string opcodestr = "", string
let isCodeGenOnly = 1;
}
-class PseudoQuietFCMP<RegisterClass Ty>
+class PseudoQuietFCMP<DAGOperand Ty>
: Pseudo<(outs GPR:$rd), (ins Ty:$rs1, Ty:$rs2), []> {
let hasSideEffects = 1;
let mayLoad = 0;
@@ -272,7 +285,7 @@ class RVInstR<bits<7> funct7, bits<3> funct3, RISCVOpcode opcode, dag outs,
let Inst{19-15} = rs1;
let Inst{14-12} = funct3;
let Inst{11-7} = rd;
- let Opcode = opcode.Value;
+ let Inst{6-0} = opcode.Value;
}
class RVInstR4<bits<2> funct2, bits<3> funct3, RISCVOpcode opcode, dag outs,
@@ -289,7 +302,7 @@ class RVInstR4<bits<2> funct2, bits<3> funct3, RISCVOpcode opcode, dag outs,
let Inst{19-15} = rs1;
let Inst{14-12} = funct3;
let Inst{11-7} = rd;
- let Opcode = opcode.Value;
+ let Inst{6-0} = opcode.Value;
}
class RVInstR4Frm<bits<2> funct2, RISCVOpcode opcode, dag outs, dag ins,
@@ -307,7 +320,7 @@ class RVInstR4Frm<bits<2> funct2, RISCVOpcode opcode, dag outs, dag ins,
let Inst{19-15} = rs1;
let Inst{14-12} = frm;
let Inst{11-7} = rd;
- let Opcode = opcode.Value;
+ let Inst{6-0} = opcode.Value;
}
class RVInstRAtomic<bits<5> funct5, bit aq, bit rl, bits<3> funct3,
@@ -325,7 +338,7 @@ class RVInstRAtomic<bits<5> funct5, bit aq, bit rl, bits<3> funct3,
let Inst{19-15} = rs1;
let Inst{14-12} = funct3;
let Inst{11-7} = rd;
- let Opcode = opcode.Value;
+ let Inst{6-0} = opcode.Value;
}
class RVInstRFrm<bits<7> funct7, RISCVOpcode opcode, dag outs, dag ins,
@@ -341,7 +354,7 @@ class RVInstRFrm<bits<7> funct7, RISCVOpcode opcode, dag outs, dag ins,
let Inst{19-15} = rs1;
let Inst{14-12} = frm;
let Inst{11-7} = rd;
- let Opcode = opcode.Value;
+ let Inst{6-0} = opcode.Value;
}
class RVInstI<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
@@ -355,7 +368,7 @@ class RVInstI<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
let Inst{19-15} = rs1;
let Inst{14-12} = funct3;
let Inst{11-7} = rd;
- let Opcode = opcode.Value;
+ let Inst{6-0} = opcode.Value;
}
class RVInstIShift<bits<5> imm11_7, bits<3> funct3, RISCVOpcode opcode,
@@ -371,7 +384,7 @@ class RVInstIShift<bits<5> imm11_7, bits<3> funct3, RISCVOpcode opcode,
let Inst{19-15} = rs1;
let Inst{14-12} = funct3;
let Inst{11-7} = rd;
- let Opcode = opcode.Value;
+ let Inst{6-0} = opcode.Value;
}
class RVInstIShiftW<bits<7> imm11_5, bits<3> funct3, RISCVOpcode opcode,
@@ -386,7 +399,7 @@ class RVInstIShiftW<bits<7> imm11_5, bits<3> funct3, RISCVOpcode opcode,
let Inst{19-15} = rs1;
let Inst{14-12} = funct3;
let Inst{11-7} = rd;
- let Opcode = opcode.Value;
+ let Inst{6-0} = opcode.Value;
}
class RVInstS<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
@@ -401,7 +414,7 @@ class RVInstS<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
let Inst{19-15} = rs1;
let Inst{14-12} = funct3;
let Inst{11-7} = imm12{4-0};
- let Opcode = opcode.Value;
+ let Inst{6-0} = opcode.Value;
}
class RVInstB<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
@@ -418,7 +431,7 @@ class RVInstB<bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins,
let Inst{14-12} = funct3;
let Inst{11-8} = imm12{3-0};
let Inst{7} = imm12{10};
- let Opcode = opcode.Value;
+ let Inst{6-0} = opcode.Value;
}
class RVInstU<RISCVOpcode opcode, dag outs, dag ins, string opcodestr,
@@ -429,7 +442,7 @@ class RVInstU<RISCVOpcode opcode, dag outs, dag ins, string opcodestr,
let Inst{31-12} = imm20;
let Inst{11-7} = rd;
- let Opcode = opcode.Value;
+ let Inst{6-0} = opcode.Value;
}
class RVInstJ<RISCVOpcode opcode, dag outs, dag ins, string opcodestr,
@@ -443,7 +456,7 @@ class RVInstJ<RISCVOpcode opcode, dag outs, dag ins, string opcodestr,
let Inst{20} = imm20{10};
let Inst{19-12} = imm20{18-11};
let Inst{11-7} = rd;
- let Opcode = opcode.Value;
+ let Inst{6-0} = opcode.Value;
}
//===----------------------------------------------------------------------===//
@@ -465,7 +478,7 @@ class DirectiveInsnR<dag outs, dag ins, string argstr>
let Inst{19-15} = rs1;
let Inst{14-12} = funct3;
let Inst{11-7} = rd;
- let Opcode = opcode;
+ let Inst{6-0} = opcode;
let AsmString = ".insn r " # argstr;
}
@@ -487,7 +500,7 @@ class DirectiveInsnR4<dag outs, dag ins, string argstr>
let Inst{19-15} = rs1;
let Inst{14-12} = funct3;
let Inst{11-7} = rd;
- let Opcode = opcode;
+ let Inst{6-0} = opcode;
let AsmString = ".insn r4 " # argstr;
}
@@ -505,7 +518,7 @@ class DirectiveInsnI<dag outs, dag ins, string argstr>
let Inst{19-15} = rs1;
let Inst{14-12} = funct3;
let Inst{11-7} = rd;
- let Opcode = opcode;
+ let Inst{6-0} = opcode;
let AsmString = ".insn i " # argstr;
}
@@ -524,7 +537,7 @@ class DirectiveInsnS<dag outs, dag ins, string argstr>
let Inst{19-15} = rs1;
let Inst{14-12} = funct3;
let Inst{11-7} = imm12{4-0};
- let Opcode = opcode;
+ let Inst{6-0} = opcode;
let AsmString = ".insn s " # argstr;
}
@@ -545,7 +558,7 @@ class DirectiveInsnB<dag outs, dag ins, string argstr>
let Inst{14-12} = funct3;
let Inst{11-8} = imm12{3-0};
let Inst{7} = imm12{10};
- let Opcode = opcode;
+ let Inst{6-0} = opcode;
let AsmString = ".insn b " # argstr;
}
@@ -559,7 +572,7 @@ class DirectiveInsnU<dag outs, dag ins, string argstr>
let Inst{31-12} = imm20;
let Inst{11-7} = rd;
- let Opcode = opcode;
+ let Inst{6-0} = opcode;
let AsmString = ".insn u " # argstr;
}
@@ -573,7 +586,7 @@ class DirectiveInsnJ<dag outs, dag ins, string argstr>
let Inst{31-12} = imm20;
let Inst{11-7} = rd;
- let Opcode = opcode;
+ let Inst{6-0} = opcode;
let AsmString = ".insn j " # argstr;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
index 690bec5181e2..e14be7dac08e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
@@ -1,4 +1,4 @@
-//===-- RISCVInstrFormatsC.td - RISCV C Instruction Formats --*- tablegen -*-=//
+//===-- RISCVInstrFormatsC.td - RISC-V C Instruction Formats -*- tablegen -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -12,7 +12,7 @@
class RVInst16<dag outs, dag ins, string opcodestr, string argstr,
list<dag> pattern, InstFormat format>
- : Instruction {
+ : RVInstCommon<outs, ins, opcodestr, argstr, pattern, format> {
field bits<16> Inst;
// SoftFail is a field the disassembler can use to provide a way for
// instructions to not match without killing the whole decode process. It is
@@ -20,17 +20,6 @@ class RVInst16<dag outs, dag ins, string opcodestr, string argstr,
// to build the decode table.
field bits<16> SoftFail = 0;
let Size = 2;
-
- bits<2> Opcode = 0;
-
- let Namespace = "RISCV";
-
- dag OutOperandList = outs;
- dag InOperandList = ins;
- let AsmString = opcodestr # "\t" # argstr;
- let Pattern = pattern;
-
- let TSFlags{4-0} = format.Value;
}
class RVInst16CR<bits<4> funct4, bits<2> opcode, dag outs, dag ins,
@@ -53,7 +42,6 @@ class RVInst16CI<bits<3> funct3, bits<2> opcode, dag outs, dag ins,
: RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCI> {
bits<10> imm;
bits<5> rd;
- bits<5> rs1;
let Inst{15-13} = funct3;
let Inst{12} = imm{5};
@@ -157,3 +145,242 @@ class RVInst16CJ<bits<3> funct3, bits<2> opcode, dag outs, dag ins,
let Inst{2} = offset{4};
let Inst{1-0} = opcode;
}
+
+class RVInst16CU<bits<6> funct6, bits<5> funct5, bits<2> opcode, dag outs,
+ dag ins, string opcodestr, string argstr>
+ : RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCU>{
+ bits<3> rd;
+
+ let Inst{15-10} = funct6;
+ let Inst{9-7} = rd;
+ let Inst{6-2} = funct5;
+ let Inst{1-0} = opcode;
+}
+
+// The immediate value encoding differs for each instruction, so each subclass
+// is responsible for setting the appropriate bits in the Inst field.
+// The bits Inst{6-5} must be set for each instruction.
+class RVInst16CLB<bits<6> funct6, bits<2> opcode, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCLB> {
+ bits<3> rd;
+ bits<3> rs1;
+
+ let Inst{15-10} = funct6;
+ let Inst{9-7} = rs1;
+ let Inst{4-2} = rd;
+ let Inst{1-0} = opcode;
+}
+
+// The immediate value encoding differs for each instruction, so each subclass
+// is responsible for setting the appropriate bits in the Inst field.
+// The bits Inst{5} must be set for each instruction.
+class RVInst16CLH<bits<6> funct6, bit funct1, bits<2> opcode, dag outs,
+ dag ins, string opcodestr, string argstr>
+ : RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCLH> {
+ bits<3> rd;
+ bits<3> rs1;
+
+ let Inst{15-10} = funct6;
+ let Inst{9-7} = rs1;
+ let Inst{6} = funct1;
+ let Inst{4-2} = rd;
+ let Inst{1-0} = opcode;
+}
+
+// The immediate value encoding differs for each instruction, so each subclass
+// is responsible for setting the appropriate bits in the Inst field.
+// The bits Inst{6-5} must be set for each instruction.
+class RVInst16CSB<bits<6> funct6, bits<2> opcode, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCSB> {
+ bits<3> rs2;
+ bits<3> rs1;
+
+ let Inst{15-10} = funct6;
+ let Inst{9-7} = rs1;
+ let Inst{4-2} = rs2;
+ let Inst{1-0} = opcode;
+}
+
+// The immediate value encoding differs for each instruction, so each subclass
+// is responsible for setting the appropriate bits in the Inst field.
+// The bits Inst{5} must be set for each instruction.
+class RVInst16CSH<bits<6> funct6, bit funct1, bits<2> opcode, dag outs,
+ dag ins, string opcodestr, string argstr>
+ : RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCSH> {
+ bits<3> rs2;
+ bits<3> rs1;
+
+ let Inst{15-10} = funct6;
+ let Inst{9-7} = rs1;
+ let Inst{6} = funct1;
+ let Inst{4-2} = rs2;
+ let Inst{1-0} = opcode;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction classes for .insn directives
+//===----------------------------------------------------------------------===//
+
+class DirectiveInsnCR<dag outs, dag ins, string argstr>
+ : RVInst16<outs, ins, "", "", [], InstFormatCR> {
+ bits<2> opcode;
+ bits<4> funct4;
+
+ bits<5> rs2;
+ bits<5> rd;
+
+ let Inst{15-12} = funct4;
+ let Inst{11-7} = rd;
+ let Inst{6-2} = rs2;
+ let Inst{1-0} = opcode;
+
+ let AsmString = ".insn cr " # argstr;
+}
+
+class DirectiveInsnCI<dag outs, dag ins, string argstr>
+ : RVInst16<outs, ins, "", "", [], InstFormatCI> {
+ bits<2> opcode;
+ bits<3> funct3;
+
+ bits<6> imm6;
+ bits<5> rd;
+
+ let Inst{15-13} = funct3;
+ let Inst{12} = imm6{5};
+ let Inst{11-7} = rd;
+ let Inst{6-2} = imm6{4-0};
+ let Inst{1-0} = opcode;
+
+ let AsmString = ".insn ci " # argstr;
+}
+
+class DirectiveInsnCIW<dag outs, dag ins, string argstr>
+ : RVInst16<outs, ins, "", "", [], InstFormatCIW> {
+ bits<2> opcode;
+ bits<3> funct3;
+
+ bits<8> imm8;
+ bits<3> rd;
+
+ let Inst{15-13} = funct3;
+ let Inst{12-5} = imm8;
+ let Inst{4-2} = rd;
+ let Inst{1-0} = opcode;
+
+ let AsmString = ".insn ciw " # argstr;
+}
+
+class DirectiveInsnCSS<dag outs, dag ins, string argstr>
+ : RVInst16<outs, ins, "", "", [], InstFormatCSS> {
+ bits<2> opcode;
+ bits<3> funct3;
+
+ bits<6> imm6;
+ bits<5> rs2;
+
+ let Inst{15-13} = funct3;
+ let Inst{12-7} = imm6;
+ let Inst{6-2} = rs2;
+ let Inst{1-0} = opcode;
+
+ let AsmString = ".insn css " # argstr;
+}
+
+class DirectiveInsnCL<dag outs, dag ins, string argstr>
+ : RVInst16<outs, ins, "", "", [], InstFormatCL> {
+ bits<2> opcode;
+ bits<3> funct3;
+
+ bits<5> imm5;
+ bits<3> rd;
+ bits<3> rs1;
+
+ let Inst{15-13} = funct3;
+ let Inst{12-10} = imm5{4-2};
+ let Inst{9-7} = rs1;
+ let Inst{6-5} = imm5{1-0};
+ let Inst{4-2} = rd;
+ let Inst{1-0} = opcode;
+
+ let AsmString = ".insn cl " # argstr;
+}
+
+class DirectiveInsnCS<dag outs, dag ins, string argstr>
+ : RVInst16<outs, ins, "", "", [], InstFormatCS> {
+ bits<2> opcode;
+ bits<3> funct3;
+
+ bits<5> imm5;
+ bits<3> rs2;
+ bits<3> rs1;
+
+ let Inst{15-13} = funct3;
+ let Inst{12-10} = imm5{4-2};
+ let Inst{9-7} = rs1;
+ let Inst{6-5} = imm5{1-0};
+ let Inst{4-2} = rs2;
+ let Inst{1-0} = opcode;
+
+ let AsmString = ".insn cs " # argstr;
+}
+
+class DirectiveInsnCA<dag outs, dag ins, string argstr>
+ : RVInst16<outs, ins, "", "", [], InstFormatCA> {
+ bits<2> opcode;
+ bits<6> funct6;
+ bits<2> funct2;
+
+ bits<3> rd;
+ bits<3> rs2;
+
+ let Inst{15-10} = funct6;
+ let Inst{9-7} = rd;
+ let Inst{6-5} = funct2;
+ let Inst{4-2} = rs2;
+ let Inst{1-0} = opcode;
+
+ let AsmString = ".insn ca " # argstr;
+}
+
+class DirectiveInsnCB<dag outs, dag ins, string argstr>
+ : RVInst16<outs, ins, "", "", [], InstFormatCB> {
+ bits<2> opcode;
+ bits<3> funct3;
+
+ bits<8> imm8;
+ bits<3> rs1;
+
+ let Inst{15-13} = funct3;
+ let Inst{12} = imm8{7};
+ let Inst{11-10} = imm8{3-2};
+ let Inst{9-7} = rs1;
+ let Inst{6-5} = imm8{6-5};
+ let Inst{4-3} = imm8{1-0};
+ let Inst{2} = imm8{4};
+ let Inst{1-0} = opcode;
+
+ let AsmString = ".insn cb " # argstr;
+}
+
+class DirectiveInsnCJ<dag outs, dag ins, string argstr>
+ : RVInst16<outs, ins, "", "", [], InstFormatCJ> {
+ bits<2> opcode;
+ bits<3> funct3;
+
+ bits<11> imm11;
+
+ let Inst{15-13} = funct3;
+ let Inst{12} = imm11{10};
+ let Inst{11} = imm11{3};
+ let Inst{10-9} = imm11{8-7};
+ let Inst{8} = imm11{9};
+ let Inst{7} = imm11{5};
+ let Inst{6} = imm11{6};
+ let Inst{5-3} = imm11{2-0};
+ let Inst{2} = imm11{4};
+ let Inst{1-0} = opcode;
+
+ let AsmString = ".insn cj " # argstr;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
index 5a5e4c454453..6f27c98dd618 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td
@@ -1,4 +1,4 @@
-//===-- RISCVInstrFormatsV.td - RISCV V Instruction Formats --*- tablegen -*-=//
+//===-- RISCVInstrFormatsV.td - RISC-V V Instruction Formats -*- tablegen -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -66,7 +66,7 @@ class RVInstSetiVLi<dag outs, dag ins, string opcodestr, string argstr>
let Inst{19-15} = uimm;
let Inst{14-12} = OPCFG.Value;
let Inst{11-7} = rd;
- let Opcode = OPC_OP_V.Value;
+ let Inst{6-0} = OPC_OP_V.Value;
let Defs = [VTYPE, VL];
}
@@ -82,7 +82,7 @@ class RVInstSetVLi<dag outs, dag ins, string opcodestr, string argstr>
let Inst{19-15} = rs1;
let Inst{14-12} = OPCFG.Value;
let Inst{11-7} = rd;
- let Opcode = OPC_OP_V.Value;
+ let Inst{6-0} = OPC_OP_V.Value;
let Defs = [VTYPE, VL];
}
@@ -99,7 +99,7 @@ class RVInstSetVL<dag outs, dag ins, string opcodestr, string argstr>
let Inst{19-15} = rs1;
let Inst{14-12} = OPCFG.Value;
let Inst{11-7} = rd;
- let Opcode = OPC_OP_V.Value;
+ let Inst{6-0} = OPC_OP_V.Value;
let Defs = [VTYPE, VL];
}
@@ -118,7 +118,7 @@ class RVInstVV<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
let Inst{19-15} = vs1;
let Inst{14-12} = opv.Value;
let Inst{11-7} = vd;
- let Opcode = OPC_OP_V.Value;
+ let Inst{6-0} = OPC_OP_V.Value;
let Uses = [VTYPE, VL];
let RVVConstraint = VMConstraint;
@@ -138,7 +138,7 @@ class RVInstVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
let Inst{19-15} = rs1;
let Inst{14-12} = opv.Value;
let Inst{11-7} = vd;
- let Opcode = OPC_OP_V.Value;
+ let Inst{6-0} = OPC_OP_V.Value;
let Uses = [VTYPE, VL];
let RVVConstraint = VMConstraint;
@@ -157,7 +157,7 @@ class RVInstV2<bits<6> funct6, bits<5> vs2, RISCVVFormat opv, dag outs, dag ins,
let Inst{19-15} = rs1;
let Inst{14-12} = opv.Value;
let Inst{11-7} = vd;
- let Opcode = OPC_OP_V.Value;
+ let Inst{6-0} = OPC_OP_V.Value;
let Uses = [VTYPE, VL];
let RVVConstraint = VMConstraint;
@@ -177,7 +177,7 @@ class RVInstIVI<bits<6> funct6, dag outs, dag ins, string opcodestr,
let Inst{19-15} = imm;
let Inst{14-12} = OPIVI.Value;
let Inst{11-7} = vd;
- let Opcode = OPC_OP_V.Value;
+ let Inst{6-0} = OPC_OP_V.Value;
let Uses = [VTYPE, VL];
let RVVConstraint = VMConstraint;
@@ -196,7 +196,7 @@ class RVInstV<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, dag outs,
let Inst{19-15} = vs1;
let Inst{14-12} = opv.Value;
let Inst{11-7} = vd;
- let Opcode = OPC_OP_V.Value;
+ let Inst{6-0} = OPC_OP_V.Value;
let Uses = [VTYPE, VL];
let RVVConstraint = VMConstraint;
@@ -218,7 +218,7 @@ class RVInstVLU<bits<3> nf, bit mew, RISCVLSUMOP lumop,
let Inst{19-15} = rs1;
let Inst{14-12} = width;
let Inst{11-7} = vd;
- let Opcode = OPC_LOAD_FP.Value;
+ let Inst{6-0} = OPC_LOAD_FP.Value;
let Uses = [VTYPE, VL];
let RVVConstraint = VMConstraint;
@@ -240,7 +240,7 @@ class RVInstVLS<bits<3> nf, bit mew, bits<3> width,
let Inst{19-15} = rs1;
let Inst{14-12} = width;
let Inst{11-7} = vd;
- let Opcode = OPC_LOAD_FP.Value;
+ let Inst{6-0} = OPC_LOAD_FP.Value;
let Uses = [VTYPE, VL];
let RVVConstraint = VMConstraint;
@@ -262,7 +262,7 @@ class RVInstVLX<bits<3> nf, bit mew, RISCVMOP mop, bits<3> width,
let Inst{19-15} = rs1;
let Inst{14-12} = width;
let Inst{11-7} = vd;
- let Opcode = OPC_LOAD_FP.Value;
+ let Inst{6-0} = OPC_LOAD_FP.Value;
let Uses = [VTYPE, VL];
let RVVConstraint = VMConstraint;
@@ -284,7 +284,7 @@ class RVInstVSU<bits<3> nf, bit mew, RISCVLSUMOP sumop,
let Inst{19-15} = rs1;
let Inst{14-12} = width;
let Inst{11-7} = vs3;
- let Opcode = OPC_STORE_FP.Value;
+ let Inst{6-0} = OPC_STORE_FP.Value;
let Uses = [VTYPE, VL];
}
@@ -305,7 +305,7 @@ class RVInstVSS<bits<3> nf, bit mew, bits<3> width,
let Inst{19-15} = rs1;
let Inst{14-12} = width;
let Inst{11-7} = vs3;
- let Opcode = OPC_STORE_FP.Value;
+ let Inst{6-0} = OPC_STORE_FP.Value;
let Uses = [VTYPE, VL];
}
@@ -326,7 +326,7 @@ class RVInstVSX<bits<3> nf, bit mew, RISCVMOP mop, bits<3> width,
let Inst{19-15} = rs1;
let Inst{14-12} = width;
let Inst{11-7} = vs3;
- let Opcode = OPC_STORE_FP.Value;
+ let Inst{6-0} = OPC_STORE_FP.Value;
let Uses = [VTYPE, VL];
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 6494c9a2cd9d..e0cbca6dc1c2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVInstrInfo.cpp - RISCV Instruction Information ------*- C++ -*-===//
+//===-- RISCVInstrInfo.cpp - RISC-V Instruction Information -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the RISCV implementation of the TargetInstrInfo class.
+// This file contains the RISC-V implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/MC/MCInstBuilder.h"
@@ -44,6 +45,16 @@ static cl::opt<bool> PreferWholeRegisterMove(
"riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
cl::desc("Prefer whole register move for vector registers."));
+static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(
+ "riscv-force-machine-combiner-strategy", cl::Hidden,
+ cl::desc("Force machine combiner to use a specific strategy for machine "
+ "trace metrics evaluation."),
+ cl::init(MachineTraceStrategy::TS_NumStrategies),
+ cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local",
+ "Local strategy."),
+ clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr",
+ "MinInstrCount strategy.")));
+
namespace llvm::RISCVVPseudosTable {
using namespace RISCV;
@@ -68,19 +79,33 @@ MCInst RISCVInstrInfo::getNop() const {
unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
+ unsigned Dummy;
+ return isLoadFromStackSlot(MI, FrameIndex, Dummy);
+}
+
+unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
+ int &FrameIndex,
+ unsigned &MemBytes) const {
switch (MI.getOpcode()) {
default:
return 0;
case RISCV::LB:
case RISCV::LBU:
+ MemBytes = 1;
+ break;
case RISCV::LH:
case RISCV::LHU:
case RISCV::FLH:
+ MemBytes = 2;
+ break;
case RISCV::LW:
case RISCV::FLW:
case RISCV::LWU:
+ MemBytes = 4;
+ break;
case RISCV::LD:
case RISCV::FLD:
+ MemBytes = 8;
break;
}
@@ -95,16 +120,30 @@ unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const {
+ unsigned Dummy;
+ return isStoreToStackSlot(MI, FrameIndex, Dummy);
+}
+
+unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
+ int &FrameIndex,
+ unsigned &MemBytes) const {
switch (MI.getOpcode()) {
default:
return 0;
case RISCV::SB:
+ MemBytes = 1;
+ break;
case RISCV::SH:
- case RISCV::SW:
case RISCV::FSH:
+ MemBytes = 2;
+ break;
+ case RISCV::SW:
case RISCV::FSW:
+ MemBytes = 4;
+ break;
case RISCV::SD:
case RISCV::FSD:
+ MemBytes = 8;
break;
}
@@ -206,7 +245,7 @@ static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI,
for (const MachineOperand &MO : MBBI->explicit_operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- if (!FoundDef && TRI->isSubRegisterEq(MO.getReg(), SrcReg)) {
+ if (!FoundDef && TRI->regsOverlap(MO.getReg(), SrcReg)) {
// We only permit the source of COPY has the same LMUL as the defined
// operand.
// There are cases we need to keep the whole register copy if the LMUL
@@ -258,6 +297,13 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg,
MCRegister SrcReg, bool KillSrc) const {
+ const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+
+ if (RISCV::GPRPF64RegClass.contains(DstReg))
+ DstReg = TRI->getSubReg(DstReg, RISCV::sub_32);
+ if (RISCV::GPRPF64RegClass.contains(SrcReg))
+ SrcReg = TRI->getSubReg(SrcReg, RISCV::sub_32);
+
if (RISCV::GPRRegClass.contains(DstReg, SrcReg)) {
BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
.addReg(SrcReg, getKillRegState(KillSrc))
@@ -268,10 +314,9 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
// Handle copy from csr
if (RISCV::VCSRRegClass.contains(SrcReg) &&
RISCV::GPRRegClass.contains(DstReg)) {
- const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
BuildMI(MBB, MBBI, DL, get(RISCV::CSRRS), DstReg)
- .addImm(RISCVSysReg::lookupSysRegByName(TRI.getName(SrcReg))->Encoding)
- .addReg(RISCV::X0);
+ .addImm(RISCVSysReg::lookupSysRegByName(TRI->getName(SrcReg))->Encoding)
+ .addReg(RISCV::X0);
return;
}
@@ -282,16 +327,17 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
RISCVII::VLMUL LMul = RISCVII::LMUL_1;
unsigned SubRegIdx = RISCV::sub_vrm1_0;
if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) {
- if (!STI.hasStdExtZfh() && STI.hasStdExtZfhmin()) {
+ if (STI.hasStdExtZfh()) {
+ Opc = RISCV::FSGNJ_H;
+ } else {
+ assert(STI.hasStdExtF() && STI.hasStdExtZfhmin() &&
+ "Unexpected extensions");
// Zfhmin subset doesn't have FSGNJ_H, replaces FSGNJ_H with FSGNJ_S.
- const TargetRegisterInfo *TRI = STI.getRegisterInfo();
DstReg = TRI->getMatchingSuperReg(DstReg, RISCV::sub_16,
&RISCV::FPR32RegClass);
SrcReg = TRI->getMatchingSuperReg(SrcReg, RISCV::sub_16,
&RISCV::FPR32RegClass);
Opc = RISCV::FSGNJ_S;
- } else {
- Opc = RISCV::FSGNJ_H;
}
IsScalableVector = false;
} else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) {
@@ -373,12 +419,13 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
if (IsScalableVector) {
bool UseVMV_V_V = false;
+ bool UseVMV_V_I = false;
MachineBasicBlock::const_iterator DefMBBI;
- unsigned VIOpc;
if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) {
UseVMV_V_V = true;
// We only need to handle LMUL = 1/2/4/8 here because we only define
// vector register classes for LMUL = 1/2/4/8.
+ unsigned VIOpc;
switch (LMul) {
default:
llvm_unreachable("Impossible LMUL for vector register copy.");
@@ -399,30 +446,30 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
VIOpc = RISCV::PseudoVMV_V_I_M8;
break;
}
- }
- bool UseVMV_V_I = false;
- if (UseVMV_V_V && (DefMBBI->getOpcode() == VIOpc)) {
- UseVMV_V_I = true;
- Opc = VIOpc;
+ if (DefMBBI->getOpcode() == VIOpc) {
+ UseVMV_V_I = true;
+ Opc = VIOpc;
+ }
}
if (NF == 1) {
auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg);
+ if (UseVMV_V_V)
+ MIB.addReg(DstReg, RegState::Undef);
if (UseVMV_V_I)
- MIB = MIB.add(DefMBBI->getOperand(1));
+ MIB = MIB.add(DefMBBI->getOperand(2));
else
MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc));
if (UseVMV_V_V) {
const MCInstrDesc &Desc = DefMBBI->getDesc();
MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
+ MIB.addImm(0); // tu, mu
MIB.addReg(RISCV::VL, RegState::Implicit);
MIB.addReg(RISCV::VTYPE, RegState::Implicit);
}
} else {
- const TargetRegisterInfo *TRI = STI.getRegisterInfo();
-
int I = 0, End = NF, Incr = 1;
unsigned SrcEncoding = TRI->getEncodingValue(SrcReg);
unsigned DstEncoding = TRI->getEncodingValue(DstReg);
@@ -439,8 +486,11 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
for (; I != End; I += Incr) {
auto MIB = BuildMI(MBB, MBBI, DL, get(Opc),
TRI->getSubReg(DstReg, SubRegIdx + I));
+ if (UseVMV_V_V)
+ MIB.addReg(TRI->getSubReg(DstReg, SubRegIdx + I),
+ RegState::Undef);
if (UseVMV_V_I)
- MIB = MIB.add(DefMBBI->getOperand(1));
+ MIB = MIB.add(DefMBBI->getOperand(2));
else
MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I),
getKillRegState(KillSrc));
@@ -448,6 +498,7 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
const MCInstrDesc &Desc = DefMBBI->getDesc();
MIB.add(DefMBBI->getOperand(RISCVII::getVLOpNum(Desc))); // AVL
MIB.add(DefMBBI->getOperand(RISCVII::getSEWOpNum(Desc))); // SEW
+ MIB.addImm(0); // tu, mu
MIB.addReg(RISCV::VL, RegState::Implicit);
MIB.addReg(RISCV::VTYPE, RegState::Implicit);
}
@@ -479,6 +530,9 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::SW : RISCV::SD;
IsScalableVector = false;
+ } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) {
+ Opcode = RISCV::PseudoRV32ZdinxSD;
+ IsScalableVector = false;
} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FSH;
IsScalableVector = false;
@@ -563,6 +617,9 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::LW : RISCV::LD;
IsScalableVector = false;
+ } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) {
+ Opcode = RISCV::PseudoRV32ZdinxLD;
+ IsScalableVector = false;
} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FLH;
IsScalableVector = false;
@@ -697,7 +754,7 @@ void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
RISCVMatInt::generateInstSeq(Val, STI.getFeatureBits());
assert(!Seq.empty());
- for (RISCVMatInt::Inst &Inst : Seq) {
+ for (const RISCVMatInt::Inst &Inst : Seq) {
switch (Inst.getOpndKind()) {
case RISCVMatInt::Imm:
BuildMI(MBB, MBBI, DL, get(Inst.getOpcode()), DstReg)
@@ -912,7 +969,7 @@ unsigned RISCVInstrInfo::insertBranch(
// Shouldn't be a fall through.
assert(TBB && "insertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 3 || Cond.size() == 0) &&
- "RISCV branch conditions have two components!");
+ "RISC-V branch conditions have two components!");
// Unconditional branch.
if (Cond.empty()) {
@@ -1079,8 +1136,7 @@ static MachineInstr *canFoldAsPredicatedOp(Register Reg,
if (getPredicatedOpcode(MI->getOpcode()) == RISCV::INSTRUCTION_LIST_END)
return nullptr;
// Check if MI has any other defs or physreg uses.
- for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) {
// Reject frame index operands, PEI can't handle the predicated pseudos.
if (MO.isFI() || MO.isCPI() || MO.isJTI())
return nullptr;
@@ -1204,6 +1260,23 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
*TM.getMCAsmInfo());
}
+ if (!MI.memoperands_empty()) {
+ MachineMemOperand *MMO = *(MI.memoperands_begin());
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const auto &ST = MF.getSubtarget<RISCVSubtarget>();
+ if (ST.hasStdExtZihintntl() && MMO->isNonTemporal()) {
+ if (ST.hasStdExtCOrZca() && ST.enableRVCHintInstrs()) {
+ if (isCompressibleInst(MI, STI))
+ return 4; // c.ntl.all + c.load/c.store
+ return 6; // c.ntl.all + load/store
+ }
+ return 8; // ntl.all + load/store
+ }
+ }
+
+ if (Opcode == TargetOpcode::BUNDLE)
+ return getInstBundleLength(MI);
+
if (MI.getParent() && MI.getParent()->getParent()) {
if (isCompressibleInst(MI, STI))
return 2;
@@ -1211,6 +1284,17 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
return get(Opcode).getSize();
}
+unsigned RISCVInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
+ unsigned Size = 0;
+ MachineBasicBlock::const_instr_iterator I = MI.getIterator();
+ MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
+ while (++I != E && I->isInsideBundle()) {
+ assert(!I->isBundle() && "No nested bundle!");
+ Size += getInstSizeInBytes(*I);
+ }
+ return Size;
+}
+
bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
const unsigned Opcode = MI.getOpcode();
switch (Opcode) {
@@ -1219,6 +1303,10 @@ bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
case RISCV::FSGNJ_D:
case RISCV::FSGNJ_S:
case RISCV::FSGNJ_H:
+ case RISCV::FSGNJ_D_INX:
+ case RISCV::FSGNJ_D_IN32X:
+ case RISCV::FSGNJ_S_INX:
+ case RISCV::FSGNJ_H_INX:
// The canonical floating-point move is fsgnj rd, rs, rs.
return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
MI.getOperand(1).getReg() == MI.getOperand(2).getReg();
@@ -1248,6 +1336,10 @@ RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
case RISCV::FSGNJ_D:
case RISCV::FSGNJ_S:
case RISCV::FSGNJ_H:
+ case RISCV::FSGNJ_D_INX:
+ case RISCV::FSGNJ_D_IN32X:
+ case RISCV::FSGNJ_S_INX:
+ case RISCV::FSGNJ_H_INX:
// The canonical floating-point move is fsgnj rd, rs, rs.
if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() &&
MI.getOperand(1).getReg() == MI.getOperand(2).getReg())
@@ -1257,11 +1349,25 @@ RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
return std::nullopt;
}
+MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const {
+ if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) {
+ // The option is unused. Choose Local strategy only for in-order cores. When
+ // scheduling model is unspecified, use MinInstrCount strategy as more
+ // generic one.
+ const auto &SchedModel = STI.getSchedModel();
+ return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder())
+ ? MachineTraceStrategy::TS_MinInstrCount
+ : MachineTraceStrategy::TS_Local;
+ }
+ // The strategy was forced by the option.
+ return ForceMachineCombinerStrategy;
+}
+
void RISCVInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
MachineInstr &OldMI2,
MachineInstr &NewMI1,
MachineInstr &NewMI2) const {
- uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
+ uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
NewMI1.setFlags(IntersectedFlags);
NewMI2.setFlags(IntersectedFlags);
}
@@ -1547,21 +1653,27 @@ static void combineFPFusedMultiply(MachineInstr &Root, MachineInstr &Prev,
Register DstReg = Dst.getReg();
unsigned FusedOpc = getFPFusedMultiplyOpcode(Root.getOpcode(), Pattern);
- auto IntersectedFlags = Root.getFlags() & Prev.getFlags();
+ uint32_t IntersectedFlags = Root.getFlags() & Prev.getFlags();
DebugLoc MergedLoc =
DILocation::getMergedLocation(Root.getDebugLoc(), Prev.getDebugLoc());
+ bool Mul1IsKill = Mul1.isKill();
+ bool Mul2IsKill = Mul2.isKill();
+ bool AddendIsKill = Addend.isKill();
+
+ // We need to clear kill flags since we may be extending the live range past
+ // a kill. If the mul had kill flags, we can preserve those since we know
+ // where the previous range stopped.
+ MRI.clearKillFlags(Mul1.getReg());
+ MRI.clearKillFlags(Mul2.getReg());
+
MachineInstrBuilder MIB =
BuildMI(*MF, MergedLoc, TII->get(FusedOpc), DstReg)
- .addReg(Mul1.getReg(), getKillRegState(Mul1.isKill()))
- .addReg(Mul2.getReg(), getKillRegState(Mul2.isKill()))
- .addReg(Addend.getReg(), getKillRegState(Addend.isKill()))
+ .addReg(Mul1.getReg(), getKillRegState(Mul1IsKill))
+ .addReg(Mul2.getReg(), getKillRegState(Mul2IsKill))
+ .addReg(Addend.getReg(), getKillRegState(AddendIsKill))
.setMIFlags(IntersectedFlags);
- // Mul operands are not killed anymore.
- Mul1.setIsKill(false);
- Mul2.setIsKill(false);
-
InsInstrs.push_back(MIB);
if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg()))
DelInstrs.push_back(&Prev);
@@ -1598,11 +1710,11 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
StringRef &ErrInfo) const {
MCInstrDesc const &Desc = MI.getDesc();
- for (auto &OI : enumerate(Desc.operands())) {
- unsigned OpType = OI.value().OperandType;
+ for (const auto &[Index, Operand] : enumerate(Desc.operands())) {
+ unsigned OpType = Operand.OperandType;
if (OpType >= RISCVOp::OPERAND_FIRST_RISCV_IMM &&
OpType <= RISCVOp::OPERAND_LAST_RISCV_IMM) {
- const MachineOperand &MO = MI.getOperand(OI.index());
+ const MachineOperand &MO = MI.getOperand(Index);
if (MO.isImm()) {
int64_t Imm = MO.getImm();
bool Ok;
@@ -1615,11 +1727,20 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
case RISCVOp::OPERAND_UIMM##NUM: \
Ok = isUInt<NUM>(Imm); \
break;
+ CASE_OPERAND_UIMM(1)
CASE_OPERAND_UIMM(2)
CASE_OPERAND_UIMM(3)
CASE_OPERAND_UIMM(4)
CASE_OPERAND_UIMM(5)
+ CASE_OPERAND_UIMM(6)
CASE_OPERAND_UIMM(7)
+ CASE_OPERAND_UIMM(8)
+ CASE_OPERAND_UIMM(12)
+ CASE_OPERAND_UIMM(20)
+ // clang-format on
+ case RISCVOp::OPERAND_UIMM2_LSB0:
+ Ok = isShiftedUInt<1, 1>(Imm);
+ break;
case RISCVOp::OPERAND_UIMM7_LSB00:
Ok = isShiftedUInt<5, 2>(Imm);
break;
@@ -1629,12 +1750,18 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
case RISCVOp::OPERAND_UIMM8_LSB000:
Ok = isShiftedUInt<5, 3>(Imm);
break;
- CASE_OPERAND_UIMM(12)
- CASE_OPERAND_UIMM(20)
- // clang-format on
+ case RISCVOp::OPERAND_UIMM8_GE32:
+ Ok = isUInt<8>(Imm) && Imm >= 32;
+ break;
+ case RISCVOp::OPERAND_UIMM9_LSB000:
+ Ok = isShiftedUInt<6, 3>(Imm);
+ break;
case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0);
break;
+ case RISCVOp::OPERAND_UIMM10_LSB00_NONZERO:
+ Ok = isShiftedUInt<8, 2>(Imm) && (Imm != 0);
+ break;
case RISCVOp::OPERAND_ZERO:
Ok = Imm == 0;
break;
@@ -1669,12 +1796,22 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
Ok = STI.is64Bit() ? isUInt<6>(Imm) : isUInt<5>(Imm);
Ok = Ok && Imm != 0;
break;
- case RISCVOp::OPERAND_UIMM_SHFL:
- Ok = STI.is64Bit() ? isUInt<5>(Imm) : isUInt<4>(Imm);
+ case RISCVOp::OPERAND_CLUI_IMM:
+ Ok = (isUInt<5>(Imm) && Imm != 0) ||
+ (Imm >= 0xfffe0 && Imm <= 0xfffff);
break;
case RISCVOp::OPERAND_RVKRNUM:
Ok = Imm >= 0 && Imm <= 10;
break;
+ case RISCVOp::OPERAND_RVKRNUM_0_7:
+ Ok = Imm >= 0 && Imm <= 7;
+ break;
+ case RISCVOp::OPERAND_RVKRNUM_1_10:
+ Ok = Imm >= 1 && Imm <= 10;
+ break;
+ case RISCVOp::OPERAND_RVKRNUM_2_14:
+ Ok = Imm >= 2 && Imm <= 14;
+ break;
}
if (!Ok) {
ErrInfo = "Invalid immediate";
@@ -1685,13 +1822,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
}
const uint64_t TSFlags = Desc.TSFlags;
- if (RISCVII::hasMergeOp(TSFlags)) {
- unsigned OpIdx = RISCVII::getMergeOpNum(Desc);
- if (MI.findTiedOperandIdx(0) != OpIdx) {
- ErrInfo = "Merge op improperly tied";
- return false;
- }
- }
if (RISCVII::hasVLOp(TSFlags)) {
const MachineOperand &Op = MI.getOperand(RISCVII::getVLOpNum(Desc));
if (!Op.isImm() && !Op.isReg()) {
@@ -1713,6 +1843,10 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
}
if (RISCVII::hasSEWOp(TSFlags)) {
unsigned OpIdx = RISCVII::getSEWOpNum(Desc);
+ if (!MI.getOperand(OpIdx).isImm()) {
+ ErrInfo = "SEW value expected to be an immediate";
+ return false;
+ }
uint64_t Log2SEW = MI.getOperand(OpIdx).getImm();
if (Log2SEW > 31) {
ErrInfo = "Unexpected SEW value";
@@ -1726,6 +1860,10 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
}
if (RISCVII::hasVecPolicyOp(TSFlags)) {
unsigned OpIdx = RISCVII::getVecPolicyOpNum(Desc);
+ if (!MI.getOperand(OpIdx).isImm()) {
+ ErrInfo = "Policy operand expected to be an immediate";
+ return false;
+ }
uint64_t Policy = MI.getOperand(OpIdx).getImm();
if (Policy > (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC)) {
ErrInfo = "Invalid Policy Value";
@@ -1862,7 +2000,8 @@ bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
return MF.getFunction().hasMinSize();
}
-outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo(
+std::optional<outliner::OutlinedFunction>
+RISCVInstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
// First we need to filter out candidates where the X5 register (IE t0) can't
@@ -1876,7 +2015,7 @@ outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo(
// If the sequence doesn't have enough candidates left, then we're done.
if (RepeatedSequenceLocs.size() < 2)
- return outliner::OutlinedFunction();
+ return std::nullopt;
unsigned SequenceSize = 0;
@@ -1903,7 +2042,7 @@ outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo(
}
outliner::InstrType
-RISCVInstrInfo::getOutliningType(MachineBasicBlock::iterator &MBBI,
+RISCVInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,
unsigned Flags) const {
MachineInstr &MI = *MBBI;
MachineBasicBlock *MBB = MI.getParent();
@@ -1911,26 +2050,13 @@ RISCVInstrInfo::getOutliningType(MachineBasicBlock::iterator &MBBI,
MBB->getParent()->getSubtarget().getRegisterInfo();
const auto &F = MI.getMF()->getFunction();
- // Positions generally can't safely be outlined.
- if (MI.isPosition()) {
- // We can manually strip out CFI instructions later.
- if (MI.isCFIInstruction())
- // If current function has exception handling code, we can't outline &
- // strip these CFI instructions since it may break .eh_frame section
- // needed in unwinding.
- return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
- : outliner::InstrType::Invisible;
-
- return outliner::InstrType::Illegal;
- }
-
- // Don't trust the user to write safe inline assembly.
- if (MI.isInlineAsm())
- return outliner::InstrType::Illegal;
-
- // We can't outline branches to other basic blocks.
- if (MI.isTerminator() && !MBB->succ_empty())
- return outliner::InstrType::Illegal;
+ // We can manually strip out CFI instructions later.
+ if (MI.isCFIInstruction())
+ // If current function has exception handling code, we can't outline &
+ // strip these CFI instructions since it may break .eh_frame section
+ // needed in unwinding.
+ return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
+ : outliner::InstrType::Invisible;
// We need support for tail calls to outlined functions before return
// statements can be allowed.
@@ -1945,8 +2071,6 @@ RISCVInstrInfo::getOutliningType(MachineBasicBlock::iterator &MBBI,
// Make sure the operands don't reference something unsafe.
for (const auto &MO : MI.operands()) {
- if (MO.isMBB() || MO.isBlockAddress() || MO.isCPI() || MO.isJTI())
- return outliner::InstrType::Illegal;
// pcrel-hi and pcrel-lo can't put in separate sections, filter that out
// if any possible.
@@ -1956,11 +2080,6 @@ RISCVInstrInfo::getOutliningType(MachineBasicBlock::iterator &MBBI,
return outliner::InstrType::Illegal;
}
- // Don't allow instructions which won't be materialized to impact outlining
- // analysis.
- if (MI.isMetaInstruction())
- return outliner::InstrType::Invisible;
-
return outliner::InstrType::Legal;
}
@@ -2087,6 +2206,23 @@ bool RISCVInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
return false;
switch (MI.getOpcode()) {
+ case RISCV::TH_MVEQZ:
+ case RISCV::TH_MVNEZ:
+ // We can't commute operands if operand 2 (i.e., rs1 in
+ // mveqz/mvnez rd,rs1,rs2) is the zero-register (as it is
+ // not valid as the in/out-operand 1).
+ if (MI.getOperand(2).getReg() == RISCV::X0)
+ return false;
+ // Operands 1 and 2 are commutable, if we switch the opcode.
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 1, 2);
+ case RISCV::TH_MULA:
+ case RISCV::TH_MULAW:
+ case RISCV::TH_MULAH:
+ case RISCV::TH_MULS:
+ case RISCV::TH_MULSW:
+ case RISCV::TH_MULSH:
+ // Operands 2 and 3 are commutable.
+ return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);
case RISCV::PseudoCCMOVGPR:
// Operands 4 and 5 are commutable.
return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 4, 5);
@@ -2235,6 +2371,14 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
};
switch (MI.getOpcode()) {
+ case RISCV::TH_MVEQZ:
+ case RISCV::TH_MVNEZ: {
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.setDesc(get(MI.getOpcode() == RISCV::TH_MVEQZ ? RISCV::TH_MVNEZ
+ : RISCV::TH_MVEQZ));
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, false, OpIdx1,
+ OpIdx2);
+ }
case RISCV::PseudoCCMOVGPR: {
// CCMOV can be commuted by inverting the condition.
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
@@ -2372,11 +2516,41 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
LiveVariables *LV,
LiveIntervals *LIS) const {
+ MachineInstrBuilder MIB;
switch (MI.getOpcode()) {
default:
- break;
+ return nullptr;
case CASE_WIDEOP_OPCODE_LMULS_MF4(FWADD_WV):
- case CASE_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV):
+ case CASE_WIDEOP_OPCODE_LMULS_MF4(FWSUB_WV): {
+ assert(RISCVII::hasVecPolicyOp(MI.getDesc().TSFlags) &&
+ MI.getNumExplicitOperands() == 7 &&
+ "Expect 7 explicit operands rd, rs2, rs1, rm, vl, sew, policy");
+ // If the tail policy is undisturbed we can't convert.
+ if ((MI.getOperand(RISCVII::getVecPolicyOpNum(MI.getDesc())).getImm() &
+ 1) == 0)
+ return nullptr;
+ // clang-format off
+ unsigned NewOpc;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV)
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV)
+ }
+ // clang-format on
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ .add(MI.getOperand(0))
+ .addReg(MI.getOperand(0).getReg(), RegState::Undef)
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5))
+ .add(MI.getOperand(6));
+ break;
+ }
case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
@@ -2392,8 +2566,6 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
switch (MI.getOpcode()) {
default:
llvm_unreachable("Unexpected opcode");
- CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWADD_WV)
- CASE_WIDEOP_CHANGE_OPCODE_LMULS_MF4(FWSUB_WV)
CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
@@ -2402,42 +2574,42 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
// clang-format on
MachineBasicBlock &MBB = *MI.getParent();
- MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
- .add(MI.getOperand(0))
- .add(MI.getOperand(1))
- .add(MI.getOperand(2))
- .add(MI.getOperand(3))
- .add(MI.getOperand(4));
- MIB.copyImplicitOps(MI);
-
- if (LV) {
- unsigned NumOps = MI.getNumOperands();
- for (unsigned I = 1; I < NumOps; ++I) {
- MachineOperand &Op = MI.getOperand(I);
- if (Op.isReg() && Op.isKill())
- LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
- }
+ MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ .add(MI.getOperand(0))
+ .addReg(MI.getOperand(0).getReg(), RegState::Undef)
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5));
+ }
+ }
+ MIB.copyImplicitOps(MI);
+
+ if (LV) {
+ unsigned NumOps = MI.getNumOperands();
+ for (unsigned I = 1; I < NumOps; ++I) {
+ MachineOperand &Op = MI.getOperand(I);
+ if (Op.isReg() && Op.isKill())
+ LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
}
+ }
- if (LIS) {
- SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
-
- if (MI.getOperand(0).isEarlyClobber()) {
- // Use operand 1 was tied to early-clobber def operand 0, so its live
- // interval could have ended at an early-clobber slot. Now they are not
- // tied we need to update it to the normal register slot.
- LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg());
- LiveRange::Segment *S = LI.getSegmentContaining(Idx);
- if (S->end == Idx.getRegSlot(true))
- S->end = Idx.getRegSlot();
- }
- }
+ if (LIS) {
+ SlotIndex Idx = LIS->ReplaceMachineInstrInMaps(MI, *MIB);
- return MIB;
- }
+ if (MI.getOperand(0).isEarlyClobber()) {
+ // Use operand 1 was tied to early-clobber def operand 0, so its live
+ // interval could have ended at an early-clobber slot. Now they are not
+ // tied we need to update it to the normal register slot.
+ LiveInterval &LI = LIS->getInterval(MI.getOperand(1).getReg());
+ LiveRange::Segment *S = LI.getSegmentContaining(Idx);
+ if (S->end == Idx.getRegSlot(true))
+ S->end = Idx.getRegSlot();
+ }
}
- return nullptr;
+ return MIB;
}
#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
@@ -2461,7 +2633,7 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
BuildMI(MBB, II, DL, get(RISCV::PseudoReadVLENB), DestReg).setMIFlag(Flag);
assert(isInt<32>(NumOfVReg) &&
"Expect the number of vector registers within 32-bits.");
- if (isPowerOf2_32(NumOfVReg)) {
+ if (llvm::has_single_bit<uint32_t>(NumOfVReg)) {
uint32_t ShiftAmount = Log2_32(NumOfVReg);
if (ShiftAmount == 0)
return;
@@ -2497,7 +2669,7 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
.addReg(DestReg, RegState::Kill)
.addReg(DestReg)
.setMIFlag(Flag);
- } else if (isPowerOf2_32(NumOfVReg - 1)) {
+ } else if (llvm::has_single_bit<uint32_t>(NumOfVReg - 1)) {
Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
uint32_t ShiftAmount = Log2_32(NumOfVReg - 1);
BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
@@ -2508,7 +2680,7 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
.addReg(ScaledRegister, RegState::Kill)
.addReg(DestReg, RegState::Kill)
.setMIFlag(Flag);
- } else if (isPowerOf2_32(NumOfVReg + 1)) {
+ } else if (llvm::has_single_bit<uint32_t>(NumOfVReg + 1)) {
Register ScaledRegister = MRI.createVirtualRegister(&RISCV::GPRRegClass);
uint32_t ShiftAmount = Log2_32(NumOfVReg + 1);
BuildMI(MBB, II, DL, get(RISCV::SLLI), ScaledRegister)
@@ -2534,224 +2706,12 @@ void RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
}
}
-// Checks if all users only demand the lower \p OrigBits of the original
-// instruction's result.
-// TODO: handle multiple interdependent transformations
-bool RISCVInstrInfo::hasAllNBitUsers(const MachineInstr &OrigMI,
- const MachineRegisterInfo &MRI,
- unsigned OrigBits) const {
-
- SmallSet<std::pair<const MachineInstr *, unsigned>, 4> Visited;
- SmallVector<std::pair<const MachineInstr *, unsigned>, 4> Worklist;
-
- Worklist.push_back(std::make_pair(&OrigMI, OrigBits));
-
- while (!Worklist.empty()) {
- auto P = Worklist.pop_back_val();
- const MachineInstr *MI = P.first;
- unsigned Bits = P.second;
-
- if (!Visited.insert(P).second)
- continue;
-
- // Only handle instructions with one def.
- if (MI->getNumExplicitDefs() != 1)
- return false;
-
- for (auto &UserOp : MRI.use_operands(MI->getOperand(0).getReg())) {
- const MachineInstr *UserMI = UserOp.getParent();
- unsigned OpIdx = UserMI->getOperandNo(&UserOp);
-
- switch (UserMI->getOpcode()) {
- default:
- return false;
-
- case RISCV::ADDIW:
- case RISCV::ADDW:
- case RISCV::DIVUW:
- case RISCV::DIVW:
- case RISCV::MULW:
- case RISCV::REMUW:
- case RISCV::REMW:
- case RISCV::SLLIW:
- case RISCV::SLLW:
- case RISCV::SRAIW:
- case RISCV::SRAW:
- case RISCV::SRLIW:
- case RISCV::SRLW:
- case RISCV::SUBW:
- case RISCV::ROLW:
- case RISCV::RORW:
- case RISCV::RORIW:
- case RISCV::CLZW:
- case RISCV::CTZW:
- case RISCV::CPOPW:
- case RISCV::SLLI_UW:
- case RISCV::FMV_W_X:
- case RISCV::FCVT_H_W:
- case RISCV::FCVT_H_WU:
- case RISCV::FCVT_S_W:
- case RISCV::FCVT_S_WU:
- case RISCV::FCVT_D_W:
- case RISCV::FCVT_D_WU:
- if (Bits >= 32)
- break;
- return false;
- case RISCV::SEXT_B:
- case RISCV::PACKH:
- if (Bits >= 8)
- break;
- return false;
- case RISCV::SEXT_H:
- case RISCV::FMV_H_X:
- case RISCV::ZEXT_H_RV32:
- case RISCV::ZEXT_H_RV64:
- case RISCV::PACKW:
- if (Bits >= 16)
- break;
- return false;
-
- case RISCV::PACK:
- if (Bits >= (STI.getXLen() / 2))
- break;
- return false;
-
- case RISCV::SRLI: {
- // If we are shifting right by less than Bits, and users don't demand
- // any bits that were shifted into [Bits-1:0], then we can consider this
- // as an N-Bit user.
- unsigned ShAmt = UserMI->getOperand(2).getImm();
- if (Bits > ShAmt) {
- Worklist.push_back(std::make_pair(UserMI, Bits - ShAmt));
- break;
- }
- return false;
- }
-
- // these overwrite higher input bits, otherwise the lower word of output
- // depends only on the lower word of input. So check their uses read W.
- case RISCV::SLLI:
- if (Bits >= (STI.getXLen() - UserMI->getOperand(2).getImm()))
- break;
- Worklist.push_back(std::make_pair(UserMI, Bits));
- break;
- case RISCV::ANDI: {
- uint64_t Imm = UserMI->getOperand(2).getImm();
- if (Bits >= (unsigned)llvm::bit_width(Imm))
- break;
- Worklist.push_back(std::make_pair(UserMI, Bits));
- break;
- }
- case RISCV::ORI: {
- uint64_t Imm = UserMI->getOperand(2).getImm();
- if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
- break;
- Worklist.push_back(std::make_pair(UserMI, Bits));
- break;
- }
-
- case RISCV::SLL:
- case RISCV::BSET:
- case RISCV::BCLR:
- case RISCV::BINV:
- // Operand 2 is the shift amount which uses log2(xlen) bits.
- if (OpIdx == 2) {
- if (Bits >= Log2_32(STI.getXLen()))
- break;
- return false;
- }
- Worklist.push_back(std::make_pair(UserMI, Bits));
- break;
-
- case RISCV::SRA:
- case RISCV::SRL:
- case RISCV::ROL:
- case RISCV::ROR:
- // Operand 2 is the shift amount which uses 6 bits.
- if (OpIdx == 2 && Bits >= Log2_32(STI.getXLen()))
- break;
- return false;
-
- case RISCV::ADD_UW:
- case RISCV::SH1ADD_UW:
- case RISCV::SH2ADD_UW:
- case RISCV::SH3ADD_UW:
- // Operand 1 is implicitly zero extended.
- if (OpIdx == 1 && Bits >= 32)
- break;
- Worklist.push_back(std::make_pair(UserMI, Bits));
- break;
-
- case RISCV::BEXTI:
- if (UserMI->getOperand(2).getImm() >= Bits)
- return false;
- break;
-
- case RISCV::SB:
- // The first argument is the value to store.
- if (OpIdx == 0 && Bits >= 8)
- break;
- return false;
- case RISCV::SH:
- // The first argument is the value to store.
- if (OpIdx == 0 && Bits >= 16)
- break;
- return false;
- case RISCV::SW:
- // The first argument is the value to store.
- if (OpIdx == 0 && Bits >= 32)
- break;
- return false;
-
- // For these, lower word of output in these operations, depends only on
- // the lower word of input. So, we check all uses only read lower word.
- case RISCV::COPY:
- case RISCV::PHI:
-
- case RISCV::ADD:
- case RISCV::ADDI:
- case RISCV::AND:
- case RISCV::MUL:
- case RISCV::OR:
- case RISCV::SUB:
- case RISCV::XOR:
- case RISCV::XORI:
-
- case RISCV::ANDN:
- case RISCV::BREV8:
- case RISCV::CLMUL:
- case RISCV::ORC_B:
- case RISCV::ORN:
- case RISCV::SH1ADD:
- case RISCV::SH2ADD:
- case RISCV::SH3ADD:
- case RISCV::XNOR:
- case RISCV::BSETI:
- case RISCV::BCLRI:
- case RISCV::BINVI:
- Worklist.push_back(std::make_pair(UserMI, Bits));
- break;
-
- case RISCV::PseudoCCMOVGPR:
- // Either operand 4 or operand 5 is returned by this instruction. If
- // only the lower word of the result is used, then only the lower word
- // of operand 4 and 5 is used.
- if (OpIdx != 4 && OpIdx != 5)
- return false;
- Worklist.push_back(std::make_pair(UserMI, Bits));
- break;
-
- case RISCV::VT_MASKC:
- case RISCV::VT_MASKCN:
- if (OpIdx != 1)
- return false;
- Worklist.push_back(std::make_pair(UserMI, Bits));
- break;
- }
- }
- }
-
- return true;
+ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
+RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
+ static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
+ {{MONontemporalBit0, "riscv-nontemporal-domain-bit-0"},
+ {MONontemporalBit1, "riscv-nontemporal-domain-bit-1"}};
+ return ArrayRef(TargetFlags);
}
// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index c663af75a557..99c907a98121 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -1,4 +1,4 @@
-//===-- RISCVInstrInfo.h - RISCV Instruction Information --------*- C++ -*-===//
+//===-- RISCVInstrInfo.h - RISC-V Instruction Information -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the RISCV implementation of the TargetInstrInfo class.
+// This file contains the RISC-V implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
@@ -25,6 +25,11 @@ namespace llvm {
class RISCVSubtarget;
+static const MachineMemOperand::Flags MONontemporalBit0 =
+ MachineMemOperand::MOTargetFlag1;
+static const MachineMemOperand::Flags MONontemporalBit1 =
+ MachineMemOperand::MOTargetFlag2;
+
namespace RISCVCC {
enum CondCode {
@@ -51,8 +56,12 @@ public:
unsigned isLoadFromStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
+ unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex,
+ unsigned &MemBytes) const override;
unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
+ unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex,
+ unsigned &MemBytes) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg,
@@ -155,12 +164,13 @@ public:
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override;
// Calculate target-specific information for a set of outlining candidates.
- outliner::OutlinedFunction getOutliningCandidateInfo(
+ std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
// Return if/how a given MachineInstr should be outlined.
- outliner::InstrType getOutliningType(MachineBasicBlock::iterator &MBBI,
- unsigned Flags) const override;
+ virtual outliner::InstrType
+ getOutliningTypeImpl(MachineBasicBlock::iterator &MBBI,
+ unsigned Flags) const override;
// Insert a custom frame for outlined functions.
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
@@ -194,6 +204,8 @@ public:
bool useMachineCombiner() const override { return true; }
+ MachineTraceStrategy getMachineCombinerTraceStrategy() const override;
+
void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2,
MachineInstr &NewMI1,
MachineInstr &NewMI2) const override;
@@ -220,19 +232,14 @@ public:
std::optional<unsigned> getInverseOpcode(unsigned Opcode) const override;
- // Returns true if all uses of OrigMI only depend on the lower \p NBits bits
- // of its output.
- bool hasAllNBitUsers(const MachineInstr &MI, const MachineRegisterInfo &MRI,
- unsigned NBits) const;
- // Returns true if all uses of OrigMI only depend on the lower word of its
- // output, so we can transform OrigMI to the corresponding W-version.
- bool hasAllWUsers(const MachineInstr &MI,
- const MachineRegisterInfo &MRI) const {
- return hasAllNBitUsers(MI, MRI, 32);
- }
+ ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
+ getSerializableMachineMemOperandTargetFlags() const override;
protected:
const RISCVSubtarget &STI;
+
+private:
+ unsigned getInstBundleLength(const MachineInstr &MI) const;
};
namespace RISCV {
@@ -261,6 +268,17 @@ bool hasEqualFRM(const MachineInstr &MI1, const MachineInstr &MI2);
// Special immediate for AVL operand of V pseudo instructions to indicate VLMax.
static constexpr int64_t VLMaxSentinel = -1LL;
+// Mask assignments for floating-point
+static constexpr unsigned FPMASK_Negative_Infinity = 0x001;
+static constexpr unsigned FPMASK_Negative_Normal = 0x002;
+static constexpr unsigned FPMASK_Negative_Subnormal = 0x004;
+static constexpr unsigned FPMASK_Negative_Zero = 0x008;
+static constexpr unsigned FPMASK_Positive_Zero = 0x010;
+static constexpr unsigned FPMASK_Positive_Subnormal = 0x020;
+static constexpr unsigned FPMASK_Positive_Normal = 0x040;
+static constexpr unsigned FPMASK_Positive_Infinity = 0x080;
+static constexpr unsigned FPMASK_Signaling_NaN = 0x100;
+static constexpr unsigned FPMASK_Quiet_NaN = 0x200;
} // namespace RISCV
namespace RISCVVPseudosTable {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index c699a94943d8..e58e3412aea3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1,4 +1,4 @@
-//===-- RISCVInstrInfo.td - Target Description for RISCV ---*- tablegen -*-===//
+//===-- RISCVInstrInfo.td - Target Description for RISC-V --*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -55,13 +55,11 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
def riscv_call : SDNode<"RISCVISD::CALL", SDT_RISCVCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def riscv_ret_flag : SDNode<"RISCVISD::RET_FLAG", SDTNone,
+def riscv_ret_glue : SDNode<"RISCVISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def riscv_uret_flag : SDNode<"RISCVISD::URET_FLAG", SDTNone,
+def riscv_sret_glue : SDNode<"RISCVISD::SRET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
-def riscv_sret_flag : SDNode<"RISCVISD::SRET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
-def riscv_mret_flag : SDNode<"RISCVISD::MRET_FLAG", SDTNone,
+def riscv_mret_glue : SDNode<"RISCVISD::MRET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
def riscv_selectcc : SDNode<"RISCVISD::SELECT_CC", SDT_RISCVSelectCC>;
def riscv_brcc : SDNode<"RISCVISD::BR_CC", SDT_RISCVBrCC,
@@ -86,14 +84,14 @@ def riscv_read_cycle_wide : SDNode<"RISCVISD::READ_CYCLE_WIDE",
def riscv_add_lo : SDNode<"RISCVISD::ADD_LO", SDTIntBinOp>;
def riscv_hi : SDNode<"RISCVISD::HI", SDTIntUnaryOp>;
def riscv_lla : SDNode<"RISCVISD::LLA", SDTIntUnaryOp>;
+def riscv_lga : SDNode<"RISCVISD::LGA", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def riscv_add_tprel : SDNode<"RISCVISD::ADD_TPREL",
SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
SDTCisInt<0>]>>;
-def riscv_la : SDNode<"RISCVISD::LA", SDTLoad,
- [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def riscv_la_tls_ie : SDNode<"RISCVISD::LA_TLS_IE", SDTLoad,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def riscv_la_tls_gd : SDNode<"RISCVISD::LA_TLS_GD", SDTIntUnaryOp>;
@@ -154,7 +152,7 @@ class UImmAsmOperand<int width, string suffix = "">
def FenceArg : AsmOperandClass {
let Name = "FenceArg";
let RenderMethod = "addFenceArgOperands";
- let DiagnosticType = "InvalidFenceArg";
+ let ParserMethod = "parseFenceArg";
}
def fencearg : Operand<XLenVT> {
@@ -191,11 +189,24 @@ def uimmlog2xlen : Operand<XLenVT>, ImmLeaf<XLenVT, [{
let OperandNamespace = "RISCVOp";
}
-def uimm2 : Operand<XLenVT> {
+def uimm1 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<1>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<1>;
+ let DecoderMethod = "decodeUImmOperand<1>";
+ let OperandType = "OPERAND_UIMM1";
+ let OperandNamespace = "RISCVOp";
+}
+
+def uimm2 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<2>;
let DecoderMethod = "decodeUImmOperand<2>";
let OperandType = "OPERAND_UIMM2";
let OperandNamespace = "RISCVOp";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isUInt<2>(Imm);
+ }];
}
def uimm3 : Operand<XLenVT> {
@@ -205,6 +216,13 @@ def uimm3 : Operand<XLenVT> {
let OperandNamespace = "RISCVOp";
}
+def uimm4 : Operand<XLenVT> {
+ let ParserMatchClass = UImmAsmOperand<4>;
+ let DecoderMethod = "decodeUImmOperand<4>";
+ let OperandType = "OPERAND_UIMM4";
+ let OperandNamespace = "RISCVOp";
+}
+
def uimm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<5>;
let DecoderMethod = "decodeUImmOperand<5>";
@@ -219,6 +237,13 @@ def InsnDirectiveOpcode : AsmOperandClass {
let PredicateMethod = "isImm";
}
+def uimm6 : Operand<XLenVT> {
+ let ParserMatchClass = UImmAsmOperand<6>;
+ let DecoderMethod = "decodeUImmOperand<6>";
+ let OperandType = "OPERAND_UIMM6";
+ let OperandNamespace = "RISCVOp";
+}
+
def uimm7_opcode : Operand<XLenVT> {
let ParserMatchClass = InsnDirectiveOpcode;
let DecoderMethod = "decodeUImmOperand<7>";
@@ -233,6 +258,13 @@ def uimm7 : Operand<XLenVT> {
let OperandNamespace = "RISCVOp";
}
+def uimm8 : Operand<XLenVT> {
+ let ParserMatchClass = UImmAsmOperand<8>;
+ let DecoderMethod = "decodeUImmOperand<8>";
+ let OperandType = "OPERAND_UIMM8";
+ let OperandNamespace = "RISCVOp";
+}
+
def simm12 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isInt<12>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<12>;
let EncoderMethod = "getImmOpValue";
@@ -375,11 +407,12 @@ def ixlenimm_li : Operand<XLenVT> {
let ParserMatchClass = ImmXLenAsmOperand<"", "LI">;
}
-// Standalone (codegen-only) immleaf patterns.
+// Accepts subset of LI operands, used by LAImm and LLAImm
+def ixlenimm_li_restricted : Operand<XLenVT> {
+ let ParserMatchClass = ImmXLenAsmOperand<"", "LI_Restricted">;
+}
-// A 12-bit signed immediate plus one where the imm range will be [-2047, 2048].
-def simm12_plus1 : ImmLeaf<XLenVT,
- [{return (isInt<12>(Imm) && Imm != -2048) || Imm == 2048;}]>;
+// Standalone (codegen-only) immleaf patterns.
// A 6-bit constant greater than 32.
def uimm6gt32 : ImmLeaf<XLenVT, [{
@@ -446,13 +479,13 @@ def AddiPairImmLarge : SDNodeXForm<imm, [{
}]>;
def TrailingZeros : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(countTrailingZeros(N->getZExtValue()),
+ return CurDAG->getTargetConstant(llvm::countr_zero(N->getZExtValue()),
SDLoc(N), N->getValueType(0));
}]>;
def XLenSubTrailingOnes : SDNodeXForm<imm, [{
uint64_t XLen = Subtarget->getXLen();
- uint64_t TrailingOnes = countTrailingOnes(N->getZExtValue());
+ uint64_t TrailingOnes = llvm::countr_one(N->getZExtValue());
return CurDAG->getTargetConstant(XLen - TrailingOnes, SDLoc(N),
N->getValueType(0));
}]>;
@@ -607,11 +640,9 @@ def LUI : RVInstU<OPC_LUI, (outs GPR:$rd), (ins uimm20_lui:$imm20),
def AUIPC : RVInstU<OPC_AUIPC, (outs GPR:$rd), (ins uimm20_auipc:$imm20),
"auipc", "$rd, $imm20">, Sched<[WriteIALU]>;
-let isCall = 1 in
def JAL : RVInstJ<OPC_JAL, (outs GPR:$rd), (ins simm21_lsb0_jal:$imm20),
"jal", "$rd, $imm20">, Sched<[WriteJal]>;
-let isCall = 1 in
def JALR : RVInstI<0b000, OPC_JALR, (outs GPR:$rd),
(ins GPR:$rs1, simm12:$imm12),
"jalr", "$rd, ${imm12}(${rs1})">,
@@ -658,7 +689,7 @@ def SLLI : Shift_ri<0b00000, 0b001, "slli">;
def SRLI : Shift_ri<0b00000, 0b101, "srli">;
def SRAI : Shift_ri<0b01000, 0b101, "srai">;
-def ADD : ALU_rr<0b0000000, 0b000, "add", /*Commutable*/1>,
+def ADD : ALU_rr<0b0000000, 0b000, "add", Commutable=1>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
def SUB : ALU_rr<0b0100000, 0b000, "sub">,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
@@ -670,15 +701,15 @@ def SLT : ALU_rr<0b0000000, 0b010, "slt">,
def SLTU : ALU_rr<0b0000000, 0b011, "sltu">,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
}
-def XOR : ALU_rr<0b0000000, 0b100, "xor", /*Commutable*/1>,
+def XOR : ALU_rr<0b0000000, 0b100, "xor", Commutable=1>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
def SRL : ALU_rr<0b0000000, 0b101, "srl">,
Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>;
def SRA : ALU_rr<0b0100000, 0b101, "sra">,
Sched<[WriteShiftReg, ReadShiftReg, ReadShiftReg]>;
-def OR : ALU_rr<0b0000000, 0b110, "or", /*Commutable*/1>,
+def OR : ALU_rr<0b0000000, 0b110, "or", Commutable=1>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def AND : ALU_rr<0b0000000, 0b111, "and", /*Commutable*/1>,
+def AND : ALU_rr<0b0000000, 0b111, "and", Commutable=1>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in {
@@ -772,7 +803,7 @@ def SLLIW : ShiftW_ri<0b0000000, 0b001, "slliw">;
def SRLIW : ShiftW_ri<0b0000000, 0b101, "srliw">;
def SRAIW : ShiftW_ri<0b0100000, 0b101, "sraiw">;
-def ADDW : ALUW_rr<0b0000000, 0b000, "addw", /*Commutable*/1>,
+def ADDW : ALUW_rr<0b0000000, 0b000, "addw", Commutable=1>,
Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
def SUBW : ALUW_rr<0b0100000, 0b000, "subw">,
Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
@@ -790,12 +821,6 @@ def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">,
//===----------------------------------------------------------------------===//
let isBarrier = 1, isReturn = 1, isTerminator = 1 in {
-def URET : Priv<"uret", 0b0000000>, Sched<[]> {
- let rd = 0;
- let rs1 = 0;
- let rs2 = 0b00010;
-}
-
def SRET : Priv<"sret", 0b0001000>, Sched<[]> {
let rd = 0;
let rs1 = 0;
@@ -1084,6 +1109,17 @@ def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF), 0>;
// .insn directive instructions
//===----------------------------------------------------------------------===//
+def AnyRegOperand : AsmOperandClass {
+ let Name = "AnyRegOperand";
+ let RenderMethod = "addRegOperands";
+ let PredicateMethod = "isAnyReg";
+}
+
+def AnyReg : Operand<XLenVT> {
+ let OperandType = "OPERAND_REGISTER";
+ let ParserMatchClass = AnyRegOperand;
+}
+
// isCodeGenOnly = 1 to hide them from the tablegened assembly parser.
let isCodeGenOnly = 1, hasSideEffects = 1, mayLoad = 1, mayStore = 1,
hasNoSchedulingInfo = 1 in {
@@ -1171,10 +1207,10 @@ def : InstAlias<".insn_s $opcode, $funct3, $rs2, ${imm12}(${rs1})",
/// Generic pattern classes
-class PatGpr<SDPatternOperator OpNode, RVInst Inst>
- : Pat<(OpNode GPR:$rs1), (Inst GPR:$rs1)>;
-class PatGprGpr<SDPatternOperator OpNode, RVInst Inst>
- : Pat<(OpNode GPR:$rs1, GPR:$rs2), (Inst GPR:$rs1, GPR:$rs2)>;
+class PatGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
+ : Pat<(vt (OpNode (vt GPR:$rs1))), (Inst GPR:$rs1)>;
+class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
+ : Pat<(vt (OpNode (vt GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
class PatGprImm<SDPatternOperator OpNode, RVInst Inst, ImmLeaf ImmType>
: Pat<(XLenVT (OpNode (XLenVT GPR:$rs1), ImmType:$imm)),
@@ -1189,7 +1225,8 @@ class PatGprUimmLog2XLen<SDPatternOperator OpNode, RVInstIShift Inst>
def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32);
}]>;
-def sexti32 : ComplexPattern<i64, 1, "selectSExti32">;
+def sexti16 : ComplexPattern<XLenVT, 1, "selectSExtBits<16>">;
+def sexti32 : ComplexPattern<i64, 1, "selectSExtBits<32>">;
def assertzexti32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32);
}]>;
@@ -1197,6 +1234,8 @@ def zexti32 : ComplexPattern<i64, 1, "selectZExtBits<32>">;
def zexti16 : ComplexPattern<XLenVT, 1, "selectZExtBits<16>">;
def zexti8 : ComplexPattern<XLenVT, 1, "selectZExtBits<8>">;
+def ext : PatFrags<(ops node:$A), [(sext node:$A), (zext node:$A)]>;
+
class binop_oneuse<SDPatternOperator operator>
: PatFrag<(ops node:$A, node:$B),
(operator node:$A, node:$B), [{
@@ -1204,14 +1243,12 @@ class binop_oneuse<SDPatternOperator operator>
}]>;
def and_oneuse : binop_oneuse<and>;
-def add_oneuse : binop_oneuse<add>;
def mul_oneuse : binop_oneuse<mul>;
def mul_const_oneuse : PatFrag<(ops node:$A, node:$B),
(mul node:$A, node:$B), [{
if (auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- if (N1C->hasOneUse())
- return true;
+ return N1C->hasOneUse();
return false;
}]>;
@@ -1224,6 +1261,7 @@ class unop_oneuse<SDPatternOperator operator>
def sext_oneuse : unop_oneuse<sext>;
def zext_oneuse : unop_oneuse<zext>;
def anyext_oneuse : unop_oneuse<anyext>;
+def ext_oneuse : unop_oneuse<ext>;
def fpext_oneuse : unop_oneuse<any_fpextend>;
/// Simple arithmetic operations
@@ -1291,7 +1329,7 @@ def PseudoAddTPRel : Pseudo<(outs GPR:$rd),
/// FrameIndex calculations
-def : Pat<(FrameAddrRegImm GPR:$rs1, simm12:$imm12),
+def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),
(ADDI GPR:$rs1, simm12:$imm12)>;
/// HI and ADD_LO address nodes.
@@ -1325,21 +1363,18 @@ def : PatGprSimm12<setlt, SLTI>;
def : PatGprGpr<setult, SLTU>;
def : PatGprSimm12<setult, SLTIU>;
+// RISC-V doesn't have general instructions for integer setne/seteq, but we can
+// check for equality with 0. These ComplexPatterns rewrite the setne/seteq into
+// something that can be compared with 0.
+// These ComplexPatterns must be used in pairs.
+def riscv_setne : ComplexPattern<XLenVT, 1, "selectSETNE", [setcc]>;
+def riscv_seteq : ComplexPattern<XLenVT, 1, "selectSETEQ", [setcc]>;
+
// Define pattern expansions for setcc operations that aren't directly
// handled by a RISC-V instruction.
-def : Pat<(seteq GPR:$rs1, 0), (SLTIU GPR:$rs1, 1)>;
-def : Pat<(seteq GPR:$rs1, GPR:$rs2), (SLTIU (XOR GPR:$rs1, GPR:$rs2), 1)>;
-def : Pat<(seteq GPR:$rs1, simm12_plus1:$imm12),
- (SLTIU (ADDI GPR:$rs1, (NegImm simm12_plus1:$imm12)), 1)>;
-def : Pat<(seteq GPR:$rs1, -2048),
- (SLTIU (XORI GPR:$rs1, -2048), 1)>;
-def : Pat<(setne GPR:$rs1, 0), (SLTU X0, GPR:$rs1)>;
-def : Pat<(setne GPR:$rs1, GPR:$rs2), (SLTU X0, (XOR GPR:$rs1, GPR:$rs2))>;
-def : Pat<(setne GPR:$rs1, simm12_plus1:$imm12),
- (SLTU X0, (ADDI GPR:$rs1, (NegImm simm12_plus1:$imm12)))>;
-def : Pat<(setne GPR:$rs1, -2048),
- (SLTU X0, (XORI GPR:$rs1, -2048))>;
-def : Pat<(setne GPR:$rs1, -1), (SLTIU GPR:$rs1, -1)>;
+def : Pat<(riscv_seteq (XLenVT GPR:$rs1)), (SLTIU GPR:$rs1, 1)>;
+def : Pat<(riscv_setne (XLenVT GPR:$rs1)), (SLTU (XLenVT X0), GPR:$rs1)>;
+def : Pat<(XLenVT (setne (XLenVT GPR:$rs1), -1)), (SLTIU GPR:$rs1, -1)>;
def IntCCtoRISCVCC : SDNodeXForm<riscv_selectcc, [{
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
@@ -1362,10 +1397,12 @@ def PseudoCCMOVGPR : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$truev),
[(set GPR:$dst,
- (riscv_selectcc_frag:$cc GPR:$lhs, GPR:$rhs,
- cond, GPR:$truev,
+ (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs),
+ GPR:$rhs, cond,
+ (XLenVT GPR:$truev),
GPR:$falsev))]>,
- Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
+ ReadSFBALU, ReadSFBALU]>;
}
// Conditional binops, that updates update $dst to (op rs1, rs2) when condition
@@ -1377,57 +1414,65 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8,
def PseudoCCADD : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
+ ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
def PseudoCCSUB : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
+ ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
def PseudoCCAND : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
+ ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
def PseudoCCOR : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
+ ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
def PseudoCCXOR : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
+ ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
// RV64I instructions
def PseudoCCADDW : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
+ ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
def PseudoCCSUBW : Pseudo<(outs GPR:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB, ReadSFB]>;
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
+ ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
}
-multiclass SelectCC_GPR_rrirr<RegisterClass valty> {
+multiclass SelectCC_GPR_rrirr<DAGOperand valty, ValueType vt> {
let usesCustomInserter = 1 in
def _Using_CC_GPR : Pseudo<(outs valty:$dst),
(ins GPR:$lhs, GPR:$rhs, ixlenimm:$cc,
valty:$truev, valty:$falsev),
[(set valty:$dst,
- (riscv_selectcc_frag:$cc GPR:$lhs, GPR:$rhs, cond,
- valty:$truev, valty:$falsev))]>;
+ (riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), GPR:$rhs, cond,
+ (vt valty:$truev), valty:$falsev))]>;
// Explicitly select 0 in the condition to X0. The register coalescer doesn't
// always do it.
- def : Pat<(riscv_selectcc_frag:$cc GPR:$lhs, 0, cond, valty:$truev,
+ def : Pat<(riscv_selectcc_frag:$cc (XLenVT GPR:$lhs), 0, cond, (vt valty:$truev),
valty:$falsev),
- (!cast<Instruction>(NAME#"_Using_CC_GPR") GPR:$lhs, X0,
+ (!cast<Instruction>(NAME#"_Using_CC_GPR") GPR:$lhs, (XLenVT X0),
(IntCCtoRISCVCC $cc), valty:$truev, valty:$falsev)>;
}
let Predicates = [NoShortForwardBranchOpt] in
-defm Select_GPR : SelectCC_GPR_rrirr<GPR>;
+defm Select_GPR : SelectCC_GPR_rrirr<GPR, XLenVT>;
-class SelectCompressOpt<CondCode Cond>: Pat<(riscv_selectcc_frag:$select GPR:$lhs, simm12_no6:$Constant, Cond,
- GPR:$truev, GPR:$falsev),
- (Select_GPR_Using_CC_GPR (ADDI GPR:$lhs, (NegImm simm12:$Constant)), X0,
+class SelectCompressOpt<CondCode Cond>
+ : Pat<(riscv_selectcc_frag:$select (XLenVT GPR:$lhs), simm12_no6:$Constant, Cond,
+ (XLenVT GPR:$truev), GPR:$falsev),
+ (Select_GPR_Using_CC_GPR (ADDI GPR:$lhs, (NegImm simm12:$Constant)), (XLenVT X0),
(IntCCtoRISCVCC $select), GPR:$truev, GPR:$falsev)>;
def OptForMinSize : Predicate<"MF ? MF->getFunction().hasMinSize() : false">;
@@ -1441,15 +1486,16 @@ let Predicates = [HasStdExtC, OptForMinSize] in {
// Match `riscv_brcc` and lower to the appropriate RISC-V branch instruction.
multiclass BccPat<CondCode Cond, RVInstB Inst> {
- def : Pat<(riscv_brcc GPR:$rs1, GPR:$rs2, Cond, bb:$imm12),
+ def : Pat<(riscv_brcc (XLenVT GPR:$rs1), GPR:$rs2, Cond, bb:$imm12),
(Inst GPR:$rs1, GPR:$rs2, simm13_lsb0:$imm12)>;
// Explicitly select 0 to X0. The register coalescer doesn't always do it.
- def : Pat<(riscv_brcc GPR:$rs1, 0, Cond, bb:$imm12),
- (Inst GPR:$rs1, X0, simm13_lsb0:$imm12)>;
+ def : Pat<(riscv_brcc (XLenVT GPR:$rs1), 0, Cond, bb:$imm12),
+ (Inst GPR:$rs1, (XLenVT X0), simm13_lsb0:$imm12)>;
}
-class BrccCompessOpt<CondCode Cond, RVInstB Inst> : Pat<(riscv_brcc GPR:$lhs, simm12_no6:$Constant, Cond, bb:$place),
- (Inst (ADDI GPR:$lhs, (NegImm simm12:$Constant)), X0, bb:$place)>;
+class BrccCompressOpt<CondCode Cond, RVInstB Inst>
+ : Pat<(riscv_brcc GPR:$lhs, simm12_no6:$Constant, Cond, bb:$place),
+ (Inst (ADDI GPR:$lhs, (NegImm simm12:$Constant)), (XLenVT X0), bb:$place)>;
defm : BccPat<SETEQ, BEQ>;
defm : BccPat<SETNE, BNE>;
@@ -1459,10 +1505,30 @@ defm : BccPat<SETULT, BLTU>;
defm : BccPat<SETUGE, BGEU>;
let Predicates = [HasStdExtC, OptForMinSize] in {
- def : BrccCompessOpt<SETEQ, BEQ>;
- def : BrccCompessOpt<SETNE, BNE>;
+ def : BrccCompressOpt<SETEQ, BEQ>;
+ def : BrccCompressOpt<SETNE, BNE>;
}
+class LongBccPseudo : Pseudo<(outs),
+ (ins GPR:$rs1, GPR:$rs2, simm21_lsb0_jal:$imm20),
+ []> {
+ let Size = 8;
+ let isBarrier = 1;
+ let isBranch = 1;
+ let hasSideEffects = 0;
+ let mayStore = 0;
+ let mayLoad = 0;
+ let isAsmParserOnly = 1;
+ let hasNoSchedulingInfo = 1;
+}
+
+def PseudoLongBEQ : LongBccPseudo;
+def PseudoLongBNE : LongBccPseudo;
+def PseudoLongBLT : LongBccPseudo;
+def PseudoLongBGE : LongBccPseudo;
+def PseudoLongBLTU : LongBccPseudo;
+def PseudoLongBGEU : LongBccPseudo;
+
let isBarrier = 1, isBranch = 1, isTerminator = 1 in
def PseudoBR : Pseudo<(outs), (ins simm21_lsb0_jal:$imm20), [(br bb:$imm20)]>,
PseudoInstExpansion<(JAL X0, simm21_lsb0_jal:$imm20)>;
@@ -1482,10 +1548,9 @@ def : Pat<(brind (add GPRJALR:$rs1, simm12:$imm12)),
// Define isCodeGenOnly = 0 to support parsing assembly "call" instruction.
let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, Size = 8, hasSideEffects = 0,
mayStore = 0, mayLoad = 0 in
-def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []>,
- Sched<[WriteIALU, WriteJalr, ReadJalr]> {
- let AsmString = "call\t$rd, $func";
-}
+def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), [],
+ "call", "$rd, $func">,
+ Sched<[WriteIALU, WriteJalr, ReadJalr]>;
// PseudoCALL is a pseudo instruction which will eventually expand to auipc
// and jalr while encoding. This is desirable, as an auipc+jalr pair with
@@ -1494,17 +1559,15 @@ def PseudoCALLReg : Pseudo<(outs GPR:$rd), (ins call_symbol:$func), []>,
// Define AsmString to print "call" when compile with -S flag.
// Define isCodeGenOnly = 0 to support parsing assembly "call" instruction.
let isCall = 1, Defs = [X1], isCodeGenOnly = 0, Size = 8 in
-def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), []>,
- Sched<[WriteIALU, WriteJalr, ReadJalr]> {
- let AsmString = "call\t$func";
-}
+def PseudoCALL : Pseudo<(outs), (ins call_symbol:$func), [],
+ "call", "$func">,
+ Sched<[WriteIALU, WriteJalr, ReadJalr]>;
def : Pat<(riscv_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>;
def : Pat<(riscv_call texternalsym:$func), (PseudoCALL texternalsym:$func)>;
-def : Pat<(riscv_uret_flag), (URET X0, X0)>;
-def : Pat<(riscv_sret_flag), (SRET X0, X0)>;
-def : Pat<(riscv_mret_flag), (MRET X0, X0)>;
+def : Pat<(riscv_sret_glue), (SRET (XLenVT X0), (XLenVT X0))>;
+def : Pat<(riscv_mret_glue), (MRET (XLenVT X0), (XLenVT X0))>;
let isCall = 1, Defs = [X1] in
def PseudoCALLIndirect : Pseudo<(outs), (ins GPRJALR:$rs1),
@@ -1512,7 +1575,7 @@ def PseudoCALLIndirect : Pseudo<(outs), (ins GPRJALR:$rs1),
PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>;
let isBarrier = 1, isReturn = 1, isTerminator = 1 in
-def PseudoRET : Pseudo<(outs), (ins), [(riscv_ret_flag)]>,
+def PseudoRET : Pseudo<(outs), (ins), [(riscv_ret_glue)]>,
PseudoInstExpansion<(JALR X0, X1, 0)>;
// PseudoTAIL is a pseudo instruction similar to PseudoCALL and will eventually
@@ -1520,10 +1583,9 @@ def PseudoRET : Pseudo<(outs), (ins), [(riscv_ret_flag)]>,
// Define AsmString to print "tail" when compile with -S flag.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [X2],
Size = 8, isCodeGenOnly = 0 in
-def PseudoTAIL : Pseudo<(outs), (ins call_symbol:$dst), []>,
- Sched<[WriteIALU, WriteJalr, ReadJalr]> {
- let AsmString = "tail\t$dst";
-}
+def PseudoTAIL : Pseudo<(outs), (ins call_symbol:$dst), [],
+ "tail", "$dst">,
+ Sched<[WriteIALU, WriteJalr, ReadJalr]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [X2] in
def PseudoTAILIndirect : Pseudo<(outs), (ins GPRTC:$rs1),
@@ -1537,16 +1599,20 @@ def : Pat<(riscv_tail (iPTR texternalsym:$dst)),
let isCall = 0, isBarrier = 1, isBranch = 1, isTerminator = 1, Size = 8,
isCodeGenOnly = 0, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in
-def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), []>,
- Sched<[WriteIALU, WriteJalr, ReadJalr]> {
- let AsmString = "jump\t$target, $rd";
-}
+def PseudoJump : Pseudo<(outs GPR:$rd), (ins pseudo_jump_symbol:$target), [],
+ "jump", "$target, $rd">,
+ Sched<[WriteIALU, WriteJalr, ReadJalr]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"lla", "$dst, $src">;
+// Refer to comment on PseudoLI for explanation of Size=32
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0,
+ isAsmParserOnly = 1 in
+def PseudoLLAImm : Pseudo<(outs GPR:$dst), (ins ixlenimm_li_restricted:$imm), [],
+ "lla", "$dst, $imm">;
def : Pat<(riscv_lla tglobaladdr:$in), (PseudoLLA tglobaladdr:$in)>;
def : Pat<(riscv_lla tblockaddress:$in), (PseudoLLA tblockaddress:$in)>;
def : Pat<(riscv_lla tjumptable:$in), (PseudoLLA tjumptable:$in)>;
@@ -1554,17 +1620,28 @@ def : Pat<(riscv_lla tconstpool:$in), (PseudoLLA tconstpool:$in)>;
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
+def PseudoLGA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
+ "lga", "$dst, $src">;
+
+def : Pat<(iPTR (riscv_lga tglobaladdr:$in)), (PseudoLGA tglobaladdr:$in)>;
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
+ isAsmParserOnly = 1 in
def PseudoLA : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la", "$dst, $src">;
-def : Pat<(riscv_la tglobaladdr:$in), (PseudoLA tglobaladdr:$in)>;
+// Refer to comment on PseudoLI for explanation of Size=32
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 32,
+ isCodeGenOnly = 0, isAsmParserOnly = 1 in
+def PseudoLAImm : Pseudo<(outs GPR:$rd), (ins ixlenimm_li_restricted:$imm), [],
+ "la", "$rd, $imm">;
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 0,
isAsmParserOnly = 1 in
def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
"la.tls.ie", "$dst, $src">;
-def : Pat<(riscv_la_tls_ie tglobaltlsaddr:$in),
+def : Pat<(iPTR (riscv_la_tls_ie tglobaltlsaddr:$in)),
(PseudoLA_TLS_IE tglobaltlsaddr:$in)>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, isCodeGenOnly = 0,
@@ -1595,30 +1672,29 @@ def PseudoZEXT_W : Pseudo<(outs GPR:$rd), (ins GPR:$rs), [], "zext.w", "$rd, $rs
/// Loads
-multiclass LdPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT> {
- def : Pat<(vt (LoadOp (AddrRegImm GPR:$rs1, simm12:$imm12))),
- (Inst GPR:$rs1, simm12:$imm12)>;
-}
+class LdPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT>
+ : Pat<(vt (LoadOp (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))),
+ (Inst GPR:$rs1, simm12:$imm12)>;
-defm : LdPat<sextloadi8, LB>;
-defm : LdPat<extloadi8, LB>;
-defm : LdPat<sextloadi16, LH>;
-defm : LdPat<extloadi16, LH>;
-defm : LdPat<load, LW, i32>, Requires<[IsRV32]>;
-defm : LdPat<zextloadi8, LBU>;
-defm : LdPat<zextloadi16, LHU>;
+def : LdPat<sextloadi8, LB>;
+def : LdPat<extloadi8, LBU>; // Prefer unsigned due to no c.lb in Zcb.
+def : LdPat<sextloadi16, LH>;
+def : LdPat<extloadi16, LH>;
+def : LdPat<load, LW, i32>, Requires<[IsRV32]>;
+def : LdPat<zextloadi8, LBU>;
+def : LdPat<zextloadi16, LHU>;
/// Stores
-multiclass StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
- ValueType vt> {
- def : Pat<(StoreOp (vt StTy:$rs2), (AddrRegImm GPR:$rs1, simm12:$imm12)),
- (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>;
-}
+class StPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
+ ValueType vt>
+ : Pat<(StoreOp (vt StTy:$rs2), (AddrRegImm (XLenVT GPR:$rs1),
+ simm12:$imm12)),
+ (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>;
-defm : StPat<truncstorei8, SB, GPR, XLenVT>;
-defm : StPat<truncstorei16, SH, GPR, XLenVT>;
-defm : StPat<store, SW, GPR, i32>, Requires<[IsRV32]>;
+def : StPat<truncstorei8, SB, GPR, XLenVT>;
+def : StPat<truncstorei16, SH, GPR, XLenVT>;
+def : StPat<store, SW, GPR, i32>, Requires<[IsRV32]>;
/// Fences
@@ -1648,7 +1724,7 @@ def : Pat<(atomic_fence (XLenVT 7), (timm)), (FENCE 0b11, 0b11)>;
class ReadSysReg<SysReg SR, list<Register> Regs>
: Pseudo<(outs GPR:$rd), (ins),
- [(set GPR:$rd, (riscv_read_csr (XLenVT SR.Encoding)))]>,
+ [(set GPR:$rd, (XLenVT (riscv_read_csr (XLenVT SR.Encoding))))]>,
PseudoInstExpansion<(CSRRS GPR:$rd, SR.Encoding, X0)> {
let hasSideEffects = 0;
let Uses = Regs;
@@ -1656,7 +1732,7 @@ class ReadSysReg<SysReg SR, list<Register> Regs>
class WriteSysReg<SysReg SR, list<Register> Regs>
: Pseudo<(outs), (ins GPR:$val),
- [(riscv_write_csr (XLenVT SR.Encoding), GPR:$val)]>,
+ [(riscv_write_csr (XLenVT SR.Encoding), (XLenVT GPR:$val))]>,
PseudoInstExpansion<(CSRRW X0, SR.Encoding, GPR:$val)> {
let hasSideEffects = 0;
let Defs = Regs;
@@ -1672,7 +1748,7 @@ class WriteSysRegImm<SysReg SR, list<Register> Regs>
class SwapSysReg<SysReg SR, list<Register> Regs>
: Pseudo<(outs GPR:$rd), (ins GPR:$val),
- [(set GPR:$rd, (riscv_swap_csr (XLenVT SR.Encoding), GPR:$val))]>,
+ [(set GPR:$rd, (riscv_swap_csr (XLenVT SR.Encoding), (XLenVT GPR:$val)))]>,
PseudoInstExpansion<(CSRRW GPR:$rd, SR.Encoding, GPR:$val)> {
let hasSideEffects = 0;
let Uses = Regs;
@@ -1681,7 +1757,7 @@ class SwapSysReg<SysReg SR, list<Register> Regs>
class SwapSysRegImm<SysReg SR, list<Register> Regs>
: Pseudo<(outs GPR:$rd), (ins uimm5:$val),
- [(set GPR:$rd, (riscv_swap_csr (XLenVT SR.Encoding), uimm5:$val))]>,
+ [(set GPR:$rd, (XLenVT (riscv_swap_csr (XLenVT SR.Encoding), uimm5:$val)))]>,
PseudoInstExpansion<(CSRRWI GPR:$rd, SR.Encoding, uimm5:$val)> {
let hasSideEffects = 0;
let Uses = Regs;
@@ -1693,6 +1769,8 @@ def WriteFRM : WriteSysReg<SysRegFRM, [FRM]>;
def WriteFRMImm : WriteSysRegImm<SysRegFRM, [FRM]>;
def SwapFRMImm : SwapSysRegImm<SysRegFRM, [FRM]>;
+def WriteVXRMImm : WriteSysRegImm<SysRegVXRM, [VXRM]>;
+
let hasSideEffects = true in {
def ReadFFLAGS : ReadSysReg<SysRegFFLAGS, [FFLAGS]>;
def WriteFFLAGS : WriteSysReg<SysRegFFLAGS, [FFLAGS]>;
@@ -1792,21 +1870,21 @@ def : Pat<(binop_allwusers<xor> GPR:$rs1, u32simm12:$imm),
(XORI GPR:$rs1, u32simm12:$imm)>;
/// Loads
-defm : LdPat<sextloadi32, LW, i64>;
-defm : LdPat<extloadi32, LW, i64>;
-defm : LdPat<zextloadi32, LWU, i64>;
-defm : LdPat<load, LD, i64>;
+def : LdPat<sextloadi32, LW, i64>;
+def : LdPat<extloadi32, LW, i64>;
+def : LdPat<zextloadi32, LWU, i64>;
+def : LdPat<load, LD, i64>;
/// Stores
-defm : StPat<truncstorei32, SW, GPR, i64>;
-defm : StPat<store, SD, GPR, i64>;
+def : StPat<truncstorei32, SW, GPR, i64>;
+def : StPat<store, SD, GPR, i64>;
} // Predicates = [IsRV64]
/// readcyclecounter
// On RV64, we can directly read the 64-bit "cycle" CSR.
let Predicates = [IsRV64] in
-def : Pat<(i64 (readcyclecounter)), (CSRRS CYCLE.Encoding, X0)>;
+def : Pat<(i64 (readcyclecounter)), (CSRRS CYCLE.Encoding, (XLenVT X0))>;
// On RV32, ReadCycleWide will be expanded to the suggested loop reading both
// halves of the 64-bit "cycle" CSR.
let Predicates = [IsRV32], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in
@@ -1831,8 +1909,15 @@ def HWASAN_CHECK_MEMACCESS_SHORTGRANULES
[(int_hwasan_check_memaccess_shortgranules X5, GPRJALR:$ptr,
(i32 timm:$accessinfo))]>;
+// This gets lowered into a 20-byte instruction sequence (at most)
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0,
+ Defs = [ X6, X7, X28, X29, X30, X31 ], Size = 20 in {
+def KCFI_CHECK
+ : Pseudo<(outs), (ins GPRJALR:$ptr, i32imm:$type), []>, Sched<[]>;
+}
+
/// Simple optimization
-def : Pat<(add GPR:$rs1, (AddiPair:$rs2)),
+def : Pat<(XLenVT (add GPR:$rs1, (AddiPair:$rs2))),
(ADDI (ADDI GPR:$rs1, (AddiPairImmLarge AddiPair:$rs2)),
(AddiPairImmSmall GPR:$rs2))>;
@@ -1847,16 +1932,35 @@ def : Pat<(binop_allwusers<add> GPR:$rs1, (AddiPair:$rs2)),
// Standard extensions
//===----------------------------------------------------------------------===//
+// Multiply and Division
include "RISCVInstrInfoM.td"
+
+// Atomic
include "RISCVInstrInfoA.td"
+
+// Scalar FP
include "RISCVInstrInfoF.td"
include "RISCVInstrInfoD.td"
-include "RISCVInstrInfoC.td"
+include "RISCVInstrInfoZfh.td"
+include "RISCVInstrInfoZfbfmin.td"
+include "RISCVInstrInfoZfa.td"
+
+// Scalar bitmanip and cryptography
include "RISCVInstrInfoZb.td"
include "RISCVInstrInfoZk.td"
+
+// Vector
include "RISCVInstrInfoV.td"
-include "RISCVInstrInfoZfh.td"
+include "RISCVInstrInfoZvfbf.td"
+include "RISCVInstrInfoZvk.td"
+
+// Integer
include "RISCVInstrInfoZicbo.td"
+include "RISCVInstrInfoZicond.td"
+
+// Compressed
+include "RISCVInstrInfoC.td"
+include "RISCVInstrInfoZc.td"
//===----------------------------------------------------------------------===//
// Vendor extensions
@@ -1864,3 +1968,5 @@ include "RISCVInstrInfoZicbo.td"
include "RISCVInstrInfoXVentana.td"
include "RISCVInstrInfoXTHead.td"
+include "RISCVInstrInfoXSf.td"
+include "RISCVInstrInfoXCV.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 5227acc1e504..8421109b8514 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -7,7 +7,8 @@
//===----------------------------------------------------------------------===//
//
// This file describes the RISC-V instructions from the standard 'A', Atomic
-// Instructions extension.
+// Instructions extension as well as the experimental 'Zacas' (Atomic
+// Compare-and-Swap) extension.
//
//===----------------------------------------------------------------------===//
@@ -43,11 +44,11 @@ multiclass AMO_rr_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr> {
def _AQ_RL : AMO_rr<funct5, 1, 1, funct3, opcodestr # ".aqrl">;
}
-multiclass AtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
- ValueType vt = XLenVT> {
- def : Pat<(StoreOp (AddrRegImm GPR:$rs1, simm12:$imm12), (vt StTy:$rs2)),
- (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>;
-}
+class AtomicStPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
+ ValueType vt = XLenVT>
+ : Pat<(StoreOp (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12),
+ (vt StTy:$rs2)),
+ (Inst StTy:$rs2, GPR:$rs1, simm12:$imm12)>;
//===----------------------------------------------------------------------===//
// Instructions
@@ -101,6 +102,15 @@ defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">,
Sched<[WriteAtomicD, ReadAtomicDA, ReadAtomicDD]>;
} // Predicates = [HasStdExtA, IsRV64]
+let Predicates = [HasStdExtZacas] in {
+defm AMOCAS_W : AMO_rr_aq_rl<0b00101, 0b010, "amocas.w">;
+defm AMOCAS_D : AMO_rr_aq_rl<0b00101, 0b011, "amocas.d">;
+} // Predicates = [HasStdExtZacas]
+
+let Predicates = [HasStdExtZacas, IsRV64] in {
+defm AMOCAS_Q : AMO_rr_aq_rl<0b00101, 0b100, "amocas.q">;
+} // Predicates = [HasStdExtZacas, IsRV64]
+
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
@@ -109,35 +119,35 @@ defm AMOMAXU_D : AMO_rr_aq_rl<0b11100, 0b011, "amomaxu.d">,
// Fences will be inserted for atomic load/stores according to the logic in
// RISCVTargetLowering::{emitLeadingFence,emitTrailingFence}.
let Predicates = [HasAtomicLdSt] in {
- defm : LdPat<atomic_load_8, LB>;
- defm : LdPat<atomic_load_16, LH>;
- defm : LdPat<atomic_load_32, LW>;
+ def : LdPat<atomic_load_8, LB>;
+ def : LdPat<atomic_load_16, LH>;
+ def : LdPat<atomic_load_32, LW>;
- defm : AtomicStPat<atomic_store_8, SB, GPR>;
- defm : AtomicStPat<atomic_store_16, SH, GPR>;
- defm : AtomicStPat<atomic_store_32, SW, GPR>;
+ def : AtomicStPat<atomic_store_8, SB, GPR>;
+ def : AtomicStPat<atomic_store_16, SH, GPR>;
+ def : AtomicStPat<atomic_store_32, SW, GPR>;
}
let Predicates = [HasAtomicLdSt, IsRV64] in {
- defm : LdPat<atomic_load_64, LD, i64>;
- defm : AtomicStPat<atomic_store_64, SD, GPR, i64>;
+ def : LdPat<atomic_load_64, LD, i64>;
+ def : AtomicStPat<atomic_store_64, SD, GPR, i64>;
}
let Predicates = [HasStdExtA] in {
/// AMOs
-multiclass AMOPat<string AtomicOp, string BaseInst> {
+multiclass AMOPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT> {
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_monotonic"),
- !cast<RVInst>(BaseInst)>;
+ !cast<RVInst>(BaseInst), vt>;
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acquire"),
- !cast<RVInst>(BaseInst#"_AQ")>;
+ !cast<RVInst>(BaseInst#"_AQ"), vt>;
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_release"),
- !cast<RVInst>(BaseInst#"_RL")>;
+ !cast<RVInst>(BaseInst#"_RL"), vt>;
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_acq_rel"),
- !cast<RVInst>(BaseInst#"_AQ_RL")>;
+ !cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
def : PatGprGpr<!cast<PatFrag>(AtomicOp#"_seq_cst"),
- !cast<RVInst>(BaseInst#"_AQ_RL")>;
+ !cast<RVInst>(BaseInst#"_AQ_RL"), vt>;
}
defm : AMOPat<"atomic_swap_32", "AMOSWAP_W">;
@@ -150,16 +160,16 @@ defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">;
defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">;
defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">;
-def : Pat<(atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr),
- (AMOADD_W GPR:$addr, (SUB X0, GPR:$incr))>;
-def : Pat<(atomic_load_sub_32_acquire GPR:$addr, GPR:$incr),
- (AMOADD_W_AQ GPR:$addr, (SUB X0, GPR:$incr))>;
-def : Pat<(atomic_load_sub_32_release GPR:$addr, GPR:$incr),
- (AMOADD_W_RL GPR:$addr, (SUB X0, GPR:$incr))>;
-def : Pat<(atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr),
- (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
-def : Pat<(atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr),
- (AMOADD_W_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+def : Pat<(XLenVT (atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr)),
+ (AMOADD_W GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
+def : Pat<(XLenVT (atomic_load_sub_32_acquire GPR:$addr, GPR:$incr)),
+ (AMOADD_W_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
+def : Pat<(XLenVT (atomic_load_sub_32_release GPR:$addr, GPR:$incr)),
+ (AMOADD_W_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
+def : Pat<(XLenVT (atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr)),
+ (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
+def : Pat<(XLenVT (atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr)),
+ (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
/// Pseudo AMOs
@@ -175,15 +185,15 @@ let Size = 20 in
def PseudoAtomicLoadNand32 : PseudoAMO;
// Ordering constants must be kept in sync with the AtomicOrdering enum in
// AtomicOrdering.h.
-def : Pat<(atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr),
+def : Pat<(XLenVT (atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr)),
(PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>;
-def : Pat<(atomic_load_nand_32_acquire GPR:$addr, GPR:$incr),
+def : Pat<(XLenVT (atomic_load_nand_32_acquire GPR:$addr, GPR:$incr)),
(PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>;
-def : Pat<(atomic_load_nand_32_release GPR:$addr, GPR:$incr),
+def : Pat<(XLenVT (atomic_load_nand_32_release GPR:$addr, GPR:$incr)),
(PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>;
-def : Pat<(atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr),
+def : Pat<(XLenVT (atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr)),
(PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>;
-def : Pat<(atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr),
+def : Pat<(XLenVT (atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr)),
(PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>;
class PseudoMaskedAMO
@@ -273,16 +283,17 @@ class PseudoCmpXchg
// Ordering constants must be kept in sync with the AtomicOrdering enum in
// AtomicOrdering.h.
-multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst> {
- def : Pat<(!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new),
+multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
+ ValueType vt = XLenVT> {
+ def : Pat<(vt (!cast<PatFrag>(Op#"_monotonic") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 2)>;
- def : Pat<(!cast<PatFrag>(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new),
+ def : Pat<(vt (!cast<PatFrag>(Op#"_acquire") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 4)>;
- def : Pat<(!cast<PatFrag>(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new),
+ def : Pat<(vt (!cast<PatFrag>(Op#"_release") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 5)>;
- def : Pat<(!cast<PatFrag>(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new),
+ def : Pat<(vt (!cast<PatFrag>(Op#"_acq_rel") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 6)>;
- def : Pat<(!cast<PatFrag>(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new),
+ def : Pat<(vt (!cast<PatFrag>(Op#"_seq_cst") GPR:$addr, GPR:$cmp, GPR:$new)),
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
}
@@ -309,28 +320,28 @@ def : Pat<(int_riscv_masked_cmpxchg_i32
let Predicates = [HasStdExtA, IsRV64] in {
-defm : AMOPat<"atomic_swap_64", "AMOSWAP_D">;
-defm : AMOPat<"atomic_load_add_64", "AMOADD_D">;
-defm : AMOPat<"atomic_load_and_64", "AMOAND_D">;
-defm : AMOPat<"atomic_load_or_64", "AMOOR_D">;
-defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D">;
-defm : AMOPat<"atomic_load_max_64", "AMOMAX_D">;
-defm : AMOPat<"atomic_load_min_64", "AMOMIN_D">;
-defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D">;
-defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D">;
+defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64>;
+defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64>;
+defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64>;
+defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64>;
+defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64>;
+defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64>;
+defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64>;
+defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64>;
+defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64>;
/// 64-bit AMOs
def : Pat<(i64 (atomic_load_sub_64_monotonic GPR:$addr, GPR:$incr)),
- (AMOADD_D GPR:$addr, (SUB X0, GPR:$incr))>;
+ (AMOADD_D GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
def : Pat<(i64 (atomic_load_sub_64_acquire GPR:$addr, GPR:$incr)),
- (AMOADD_D_AQ GPR:$addr, (SUB X0, GPR:$incr))>;
+ (AMOADD_D_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
def : Pat<(i64 (atomic_load_sub_64_release GPR:$addr, GPR:$incr)),
- (AMOADD_D_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+ (AMOADD_D_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
def : Pat<(i64 (atomic_load_sub_64_acq_rel GPR:$addr, GPR:$incr)),
- (AMOADD_D_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+ (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
def : Pat<(i64 (atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr)),
- (AMOADD_D_AQ_RL GPR:$addr, (SUB X0, GPR:$incr))>;
+ (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
/// 64-bit pseudo AMOs
@@ -369,7 +380,7 @@ def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64,
/// 64-bit compare and exchange
def PseudoCmpXchg64 : PseudoCmpXchg;
-defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>;
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>;
def : Pat<(int_riscv_masked_cmpxchg_i64
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index 26a16d099e86..74439bb67c61 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -1,4 +1,4 @@
-//===- RISCVInstrInfoC.td - Compressed RISCV instructions -*- tblgen-*-----===//
+//===- RISCVInstrInfoC.td - Compressed RISC-V instructions -*- tblgen-*----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -25,7 +25,7 @@ def uimmlog2xlennonzero : Operand<XLenVT>, ImmLeaf<XLenVT, [{
}]> {
let ParserMatchClass = UImmLog2XLenNonZeroAsmOperand;
// TODO: should ensure invalid shamt is rejected when decoding.
- let DecoderMethod = "decodeUImmOperand<6>";
+ let DecoderMethod = "decodeUImmNonZeroOperand<6>";
let OperandType = "OPERAND_UIMMLOG2XLEN_NONZERO";
let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
@@ -56,7 +56,7 @@ def simm6nonzero : Operand<XLenVT>,
ImmLeaf<XLenVT, [{return (Imm != 0) && isInt<6>(Imm);}]> {
let ParserMatchClass = SImmAsmOperand<6, "NonZero">;
let EncoderMethod = "getImmOpValue";
- let DecoderMethod = "decodeSImmOperand<6>";
+ let DecoderMethod = "decodeSImmNonZeroOperand<6>";
let OperandType = "OPERAND_SIMM6_NONZERO";
let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
@@ -93,6 +93,8 @@ def c_lui_imm : Operand<XLenVT>,
let ParserMatchClass = CLUIImmAsmOperand;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeCLUIImmOperand";
+ let OperandType = "OPERAND_CLUI_IMM";
+ let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (MCOp.evaluateAsConstantImm(Imm))
@@ -173,6 +175,8 @@ def uimm9_lsb000 : Operand<XLenVT>,
let ParserMatchClass = UImmAsmOperand<9, "Lsb000">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmOperand<9>";
+ let OperandType = "OPERAND_UIMM9_LSB000";
+ let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -189,6 +193,8 @@ def uimm10_lsb00nonzero : Operand<XLenVT>,
let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeUImmNonZeroOperand<10>";
+ let OperandType = "OPERAND_UIMM10_LSB00_NONZERO";
+ let OperandNamespace = "RISCVOp";
let MCOperandPredicate = [{
int64_t Imm;
if (!MCOp.evaluateAsConstantImm(Imm))
@@ -230,6 +236,20 @@ def simm12_lsb0 : Operand<XLenVT>,
let OperandType = "OPERAND_PCREL";
}
+def InsnCDirectiveOpcode : AsmOperandClass {
+ let Name = "InsnCDirectiveOpcode";
+ let ParserMethod = "parseInsnCDirectiveOpcode";
+ let RenderMethod = "addImmOperands";
+ let PredicateMethod = "isImm";
+}
+
+def uimm2_opcode : Operand<XLenVT> {
+ let ParserMatchClass = InsnCDirectiveOpcode;
+ let DecoderMethod = "decodeUImmOperand<2>";
+ let OperandType = "OPERAND_UIMM2";
+ let OperandNamespace = "RISCVOp";
+}
+
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
@@ -284,7 +304,7 @@ class Shift_right<bits<2> funct2, string OpcodeStr, RegisterClass cls,
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class CS_ALU<bits<6> funct6, bits<2> funct2, string OpcodeStr,
+class CA_ALU<bits<6> funct6, bits<2> funct2, string OpcodeStr,
RegisterClass cls>
: RVInst16CA<funct6, funct2, 0b01, (outs cls:$rd_wb), (ins cls:$rd, cls:$rs2),
OpcodeStr, "$rd, $rs2"> {
@@ -328,7 +348,7 @@ def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00>,
}
let DecoderNamespace = "RISCV32Only_",
- Predicates = [HasStdExtCOrZcf, HasStdExtF, IsRV32] in
+ Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>,
Sched<[WriteFLD32, ReadMemBase]> {
bits<7> imm;
@@ -362,7 +382,7 @@ def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00>,
}
let DecoderNamespace = "RISCV32Only_",
- Predicates = [HasStdExtCOrZcf, HasStdExtF, IsRV32] in
+ Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>,
Sched<[WriteFST32, ReadStoreData, ReadMemBase]> {
bits<7> imm;
@@ -381,8 +401,7 @@ def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000>,
let rd = 0, imm = 0, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_NOP : RVInst16CI<0b000, 0b01, (outs), (ins), "c.nop", "">,
- Sched<[WriteNop]>
-{
+ Sched<[WriteNop]> {
let Inst{6-2} = 0;
}
@@ -465,19 +484,19 @@ def C_ANDI : RVInst16CB<0b100, 0b01, (outs GPRC:$rs1_wb), (ins GPRC:$rs1, simm6:
let Inst{6-2} = imm{4-0};
}
-def C_SUB : CS_ALU<0b100011, 0b00, "c.sub", GPRC>,
+def C_SUB : CA_ALU<0b100011, 0b00, "c.sub", GPRC>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def C_XOR : CS_ALU<0b100011, 0b01, "c.xor", GPRC>,
+def C_XOR : CA_ALU<0b100011, 0b01, "c.xor", GPRC>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def C_OR : CS_ALU<0b100011, 0b10, "c.or" , GPRC>,
+def C_OR : CA_ALU<0b100011, 0b10, "c.or" , GPRC>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def C_AND : CS_ALU<0b100011, 0b11, "c.and", GPRC>,
+def C_AND : CA_ALU<0b100011, 0b11, "c.and", GPRC>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
let Predicates = [HasStdExtCOrZca, IsRV64] in {
-def C_SUBW : CS_ALU<0b100111, 0b00, "c.subw", GPRC>,
+def C_SUBW : CA_ALU<0b100111, 0b00, "c.subw", GPRC>,
Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
-def C_ADDW : CS_ALU<0b100111, 0b01, "c.addw", GPRC>,
+def C_ADDW : CA_ALU<0b100111, 0b01, "c.addw", GPRC>,
Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
}
@@ -489,8 +508,8 @@ def C_J : RVInst16CJ<0b101, 0b01, (outs), (ins simm12_lsb0:$offset),
let isBarrier=1;
}
-def C_BEQZ : Bcz<0b110, "c.beqz", GPRC>, Sched<[WriteJmp]>;
-def C_BNEZ : Bcz<0b111, "c.bnez", GPRC>, Sched<[WriteJmp]>;
+def C_BEQZ : Bcz<0b110, "c.beqz", GPRC>, Sched<[WriteJmp, ReadJmp]>;
+def C_BNEZ : Bcz<0b111, "c.bnez", GPRC>, Sched<[WriteJmp, ReadJmp]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPRNoX0:$rd_wb),
@@ -515,7 +534,7 @@ def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00>,
}
let DecoderNamespace = "RISCV32Only_",
- Predicates = [HasStdExtCOrZcf, HasStdExtF, IsRV32] in
+ Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>,
Sched<[WriteFLD32, ReadMemBase]> {
let Inst{6-4} = imm{4-2};
@@ -531,11 +550,9 @@ def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000>,
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
def C_JR : RVInst16CR<0b1000, 0b10, (outs), (ins GPRNoX0:$rs1),
- "c.jr", "$rs1">, Sched<[WriteJmpReg]> {
- let isBranch = 1;
+ "c.jr", "$rs1">, Sched<[WriteJalr, ReadJalr]> {
let isBarrier = 1;
let isTerminator = 1;
- let isIndirectBranch = 1;
let rs2 = 0;
}
@@ -575,7 +592,7 @@ def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00>,
}
let DecoderNamespace = "RISCV32Only_",
- Predicates = [HasStdExtCOrZcf, HasStdExtF, IsRV32] in
+ Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>,
Sched<[WriteFST32, ReadStoreData, ReadMemBase]> {
let Inst{12-9} = imm{5-2};
@@ -604,24 +621,12 @@ def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther>,
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtCOrZca, HasRVCHints], hasSideEffects = 0, mayLoad = 0,
- mayStore = 0 in
-{
+ mayStore = 0 in {
let rd = 0 in
def C_NOP_HINT : RVInst16CI<0b000, 0b01, (outs), (ins simm6nonzero:$imm),
"c.nop", "$imm">, Sched<[WriteNop]> {
let Inst{6-2} = imm{4-0};
- let DecoderMethod = "decodeRVCInstrSImm";
-}
-
-// Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6.
-def C_ADDI_HINT_X0 : RVInst16CI<0b000, 0b01, (outs GPRX0:$rd_wb),
- (ins GPRX0:$rd, simm6nonzero:$imm),
- "c.addi", "$rd, $imm">,
- Sched<[WriteIALU, ReadIALU]> {
- let Constraints = "$rd = $rd_wb";
- let Inst{6-2} = imm{4-0};
- let isAsmParserOnly = 1;
}
def C_ADDI_HINT_IMM_ZERO : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
@@ -629,8 +634,9 @@ def C_ADDI_HINT_IMM_ZERO : RVInst16CI<0b000, 0b01, (outs GPRNoX0:$rd_wb),
"c.addi", "$rd, $imm">,
Sched<[WriteIALU, ReadIALU]> {
let Constraints = "$rd = $rd_wb";
+ let Inst{12} = 0;
let Inst{6-2} = 0;
- let isAsmParserOnly = 1;
+ let DecoderMethod = "decodeRVCInstrRdRs1ImmZero";
}
def C_LI_HINT : RVInst16CI<0b010, 0b01, (outs GPRX0:$rd), (ins simm6:$imm),
@@ -651,8 +657,7 @@ def C_LUI_HINT : RVInst16CI<0b011, 0b01, (outs GPRX0:$rd),
}
def C_MV_HINT : RVInst16CR<0b1000, 0b10, (outs GPRX0:$rs1), (ins GPRNoX0:$rs2),
- "c.mv", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU]>
-{
+ "c.mv", "$rs1, $rs2">, Sched<[WriteIALU, ReadIALU]> {
let Inst{11-7} = 0;
let DecoderMethod = "decodeRVCInstrRdRs2";
}
@@ -711,6 +716,11 @@ def C_SRAI64_HINT : RVInst16CI<0b100, 0b01, (outs GPRC:$rd_wb),
// Assembler Pseudo Instructions
//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtCOrZca, HasRVCHints] in {
+// Just a different syntax for the c.nop hint: c.addi x0, simm6 vs c.nop simm6.
+def : InstAlias<"c.addi x0, $imm", (C_NOP_HINT simm6nonzero:$imm), 0>;
+}
+
let Predicates = [HasStdExtC, HasRVCHints, HasStdExtZihintntl] in {
def : InstAlias<"c.ntl.p1", (C_ADD_HINT X0, X2)>;
def : InstAlias<"c.ntl.pall", (C_ADD_HINT X0, X3)>;
@@ -733,7 +743,7 @@ def : InstAlias<"c.ldsp $rd, (${rs1})", (C_LDSP GPRC:$rd, SPMem:$rs1, 0)>;
def : InstAlias<"c.sdsp $rs2, (${rs1})", (C_SDSP GPRC:$rs2, SPMem:$rs1, 0)>;
}
-let Predicates = [HasStdExtCOrZcf, HasStdExtF, IsRV32] in {
+let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
def : InstAlias<"c.flw $rd, (${rs1})", (C_FLW FPR32C:$rd, GPRCMem:$rs1, 0)>;
def : InstAlias<"c.fsw $rs2, (${rs1})", (C_FSW FPR32C:$rs2, GPRCMem:$rs1, 0)>;
def : InstAlias<"c.flwsp $rd, (${rs1})", (C_FLWSP FPR32C:$rd, SPMem:$rs1, 0)>;
@@ -748,6 +758,100 @@ def : InstAlias<"c.fsdsp $rs2, (${rs1})", (C_FSDSP FPR64C:$rs2, SPMem:$rs1, 0)>;
}
} // EmitPriority = 0
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+def AnyRegCOperand : AsmOperandClass {
+ let Name = "AnyRegCOperand";
+ let RenderMethod = "addRegOperands";
+ let PredicateMethod = "isAnyRegC";
+}
+
+def AnyRegC : Operand<XLenVT> {
+ let OperandType = "OPERAND_REGISTER";
+ let ParserMatchClass = AnyRegCOperand;
+}
+
+// isCodeGenOnly = 1 to hide them from the tablegened assembly parser.
+let isCodeGenOnly = 1, hasSideEffects = 1, mayLoad = 1, mayStore = 1,
+ hasNoSchedulingInfo = 1, Predicates = [HasStdExtCOrZca] in {
+def InsnCR : DirectiveInsnCR<(outs AnyReg:$rd), (ins uimm2_opcode:$opcode,
+ uimm4:$funct4,
+ AnyReg:$rs2),
+ "$opcode, $funct4, $rd, $rs2">;
+def InsnCI : DirectiveInsnCI<(outs AnyRegC:$rd), (ins uimm2_opcode:$opcode,
+ uimm3:$funct3,
+ simm6:$imm6),
+ "$opcode, $funct3, $rd, $imm6">;
+def InsnCIW : DirectiveInsnCIW<(outs AnyRegC:$rd), (ins uimm2_opcode:$opcode,
+ uimm3:$funct3,
+ uimm8:$imm8),
+ "$opcode, $funct3, $rd, $imm8">;
+def InsnCSS : DirectiveInsnCSS<(outs), (ins uimm2_opcode:$opcode,
+ uimm3:$funct3,
+ AnyReg:$rs2,
+ uimm6:$imm6),
+ "$opcode, $funct3, $rs2, $imm6">;
+def InsnCL : DirectiveInsnCL<(outs AnyRegC:$rd), (ins uimm2_opcode:$opcode,
+ uimm3:$funct3,
+ AnyRegC:$rs1,
+ uimm5:$imm5),
+ "$opcode, $funct3, $rd, ${imm5}(${rs1})">;
+def InsnCS : DirectiveInsnCS<(outs), (ins uimm2_opcode:$opcode,
+ uimm3:$funct3,
+ AnyRegC:$rs2,
+ AnyRegC:$rs1,
+ uimm5:$imm5),
+ "$opcode, $funct3, $rs2, ${imm5}(${rs1})">;
+def InsnCA : DirectiveInsnCA<(outs AnyRegC:$rd), (ins uimm2_opcode:$opcode,
+ uimm6:$funct6,
+ uimm2:$funct2,
+ AnyRegC:$rs2),
+ "$opcode, $funct6, $funct2, $rd, $rs2">;
+def InsnCB : DirectiveInsnCB<(outs), (ins uimm2_opcode:$opcode, uimm3:$funct3,
+ AnyRegC:$rs1,
+ simm9_lsb0:$imm8),
+ "$opcode, $funct3, $rs1, $imm8">;
+def InsnCJ : DirectiveInsnCJ<(outs), (ins uimm2_opcode:$opcode,
+ uimm3:$funct3,
+ simm12_lsb0:$imm11),
+ "$opcode, $funct3, $imm11">;
+}
+
+// Use InstAliases to match these so that we can combine the insn and format
+// into a mnemonic to use as the key for the tablegened asm matcher table. The
+// parser will take care of creating these fake mnemonics and will only do it
+// for known formats.
+let EmitPriority = 0, Predicates = [HasStdExtCOrZca] in {
+def : InstAlias<".insn_cr $opcode, $funct4, $rd, $rs2",
+ (InsnCR AnyReg:$rd, uimm2_opcode:$opcode, uimm4:$funct4,
+ AnyReg:$rs2)>;
+def : InstAlias<".insn_ci $opcode, $funct3, $rd, $imm6",
+ (InsnCI AnyRegC:$rd, uimm2_opcode:$opcode, uimm3:$funct3,
+ simm6:$imm6)>;
+def : InstAlias<".insn_ciw $opcode, $funct3, $rd, $imm8",
+ (InsnCIW AnyRegC:$rd, uimm2_opcode:$opcode, uimm3:$funct3,
+ uimm8:$imm8)>;
+def : InstAlias<".insn_css $opcode, $funct3, $rs2, $imm6",
+ (InsnCSS uimm2_opcode:$opcode, uimm3:$funct3, AnyReg:$rs2,
+ uimm6:$imm6)>;
+def : InstAlias<".insn_cl $opcode, $funct3, $rd, ${imm5}(${rs1})",
+ (InsnCL AnyRegC:$rd, uimm2_opcode:$opcode, uimm3:$funct3,
+ AnyRegC:$rs1, uimm5:$imm5)>;
+def : InstAlias<".insn_cs $opcode, $funct3, $rs2, ${imm5}(${rs1})",
+ (InsnCS uimm2_opcode:$opcode, uimm3:$funct3, AnyRegC:$rs2,
+ AnyRegC:$rs1, uimm5:$imm5)>;
+def : InstAlias<".insn_ca $opcode, $funct6, $funct2, $rd, $rs2",
+ (InsnCA AnyRegC:$rd, uimm2_opcode:$opcode, uimm6:$funct6,
+ uimm2:$funct2, AnyRegC:$rs2)>;
+def : InstAlias<".insn_cb $opcode, $funct3, $rs1, $imm8",
+ (InsnCB uimm2_opcode:$opcode, uimm3:$funct3, AnyRegC:$rs1,
+ simm9_lsb0:$imm8)>;
+def : InstAlias<".insn_cj $opcode, $funct3, $imm11",
+ (InsnCJ uimm2_opcode:$opcode, uimm3:$funct3, simm12_lsb0:$imm11)>;
+}
+
//===----------------------------------------------------------------------===/i
// Compress Instruction tablegen backend.
//===----------------------------------------------------------------------===//
@@ -771,7 +875,7 @@ def : CompressPat<(LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
} // Predicates = [HasStdExtCOrZca]
-let Predicates = [HasStdExtCOrZcf, HasStdExtF, IsRV32] in {
+let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
def : CompressPat<(FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
} // Predicates = [HasStdExtC, HasStdExtF, IsRV32]
@@ -791,7 +895,7 @@ def : CompressPat<(SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
} // Predicates = [HasStdExtCOrZca]
-let Predicates = [HasStdExtCOrZcf, HasStdExtF, IsRV32] in {
+let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
def : CompressPat<(FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
} // Predicates = [HasStdExtC, HasStdExtF, IsRV32]
@@ -888,7 +992,7 @@ def : CompressPat<(LW GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
(C_LWSP GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
} // Predicates = [HasStdExtCOrZca]
-let Predicates = [HasStdExtCOrZcf, HasStdExtF, IsRV32] in {
+let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
def : CompressPat<(FLW FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
(C_FLWSP FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
} // Predicates = [HasStdExtC, HasStdExtF, IsRV32]
@@ -930,7 +1034,7 @@ def : CompressPat<(SW GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
(C_SWSP GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
} // Predicates = [HasStdExtCOrZca]
-let Predicates = [HasStdExtCOrZcf, HasStdExtF, IsRV32] in {
+let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
def : CompressPat<(FSW FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
(C_FSWSP FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
} // Predicates = [HasStdExtC, HasStdExtF, IsRV32]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 7863120dc16e..7a79e3ca6a2f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -25,6 +25,8 @@ def SDT_RISCVSplitF64 : SDTypeProfile<2, 1, [SDTCisVT<0, i32>,
def RISCVBuildPairF64 : SDNode<"RISCVISD::BuildPairF64", SDT_RISCVBuildPairF64>;
def RISCVSplitF64 : SDNode<"RISCVISD::SplitF64", SDT_RISCVSplitF64>;
+def AddrRegImmINX : ComplexPattern<iPTR, 2, "SelectAddrRegImmINX">;
+
//===----------------------------------------------------------------------===//
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
@@ -43,7 +45,7 @@ def GPRF64AsFPR : AsmOperandClass {
let RenderMethod = "addRegOperands";
}
-def FPR64INX : RegisterOperand<GPRF64> {
+def FPR64INX : RegisterOperand<GPR> {
let ParserMatchClass = GPRF64AsFPR;
let DecoderMethod = "DecodeGPRRegisterClass";
}
@@ -52,41 +54,15 @@ def FPR64IN32X : RegisterOperand<GPRPF64> {
let ParserMatchClass = GPRPF64AsFPR;
}
-def DExt : ExtInfo<0, [HasStdExtD]>;
-def D64Ext : ExtInfo<0, [HasStdExtD, IsRV64]>;
-def ZdinxExt : ExtInfo<1, [HasStdExtZdinx, IsRV64]>;
-def Zdinx32Ext : ExtInfo<2, [HasStdExtZdinx, IsRV32]>;
-
-def D : ExtInfo_r<DExt, FPR64>;
-def D_INX : ExtInfo_r<ZdinxExt, FPR64INX>;
-def D_IN32X : ExtInfo_r<Zdinx32Ext, FPR64IN32X>;
-
-def DD : ExtInfo_rr<DExt, FPR64, FPR64>;
-def DD_INX : ExtInfo_rr<ZdinxExt, FPR64INX, FPR64INX>;
-def DD_IN32X : ExtInfo_rr<Zdinx32Ext, FPR64IN32X, FPR64IN32X>;
-def DF : ExtInfo_rr<DExt, FPR64, FPR32>;
-def DF_INX : ExtInfo_rr<ZdinxExt, FPR64INX, FPR32INX>;
-def DF_IN32X : ExtInfo_rr<Zdinx32Ext, FPR64IN32X, FPR32INX>;
-def DX : ExtInfo_rr<DExt, FPR64, GPR>;
-def DX_INX : ExtInfo_rr<ZdinxExt, FPR64INX, GPR>;
-def DX_IN32X : ExtInfo_rr<Zdinx32Ext, FPR64IN32X, GPR>;
-def DX_64 : ExtInfo_rr<D64Ext, FPR64, GPR>;
-def FD : ExtInfo_rr<DExt, FPR32, FPR64>;
-def FD_INX : ExtInfo_rr<ZdinxExt, FPR32INX, FPR64INX>;
-def FD_IN32X : ExtInfo_rr<Zdinx32Ext, FPR32INX, FPR64IN32X>;
-def XD : ExtInfo_rr<DExt, GPR, FPR64>;
-def XD_INX : ExtInfo_rr<ZdinxExt, GPR, FPR64INX>;
-def XD_IN32X : ExtInfo_rr<Zdinx32Ext, GPR, FPR64IN32X>;
-def XD_64 : ExtInfo_rr<D64Ext, GPR, FPR64>;
-
-defvar DINX = [D, D_INX, D_IN32X];
-defvar DDINX = [DD, DD_INX, DD_IN32X];
-defvar DXINX = [DX, DX_INX, DX_IN32X];
-defvar DFINX = [DF, DF_INX, DF_IN32X];
-defvar FDINX = [FD, FD_INX, FD_IN32X];
-defvar XDINX = [XD, XD_INX, XD_IN32X];
-defvar DXIN64X = [DX_64, DX_INX];
-defvar XDIN64X = [XD_64, XD_INX];
+def DExt : ExtInfo<"", "", [HasStdExtD], f64, FPR64, FPR32, FPR64, ?>;
+
+def ZdinxExt : ExtInfo<"_INX", "RVZfinx", [HasStdExtZdinx, IsRV64],
+ f64, FPR64INX, FPR32INX, FPR64INX, ?>;
+def Zdinx32Ext : ExtInfo<"_IN32X", "RV32Zdinx", [HasStdExtZdinx, IsRV32],
+ f64, FPR64IN32X, FPR32INX, FPR64IN32X, ?>;
+
+defvar DExts = [DExt, ZdinxExt, Zdinx32Ext];
+defvar DExtsRV64 = [DExt, ZdinxExt];
//===----------------------------------------------------------------------===//
// Instructions
@@ -101,101 +77,100 @@ def FLD : FPLoad_r<0b011, "fld", FPR64, WriteFLD64>;
def FSD : FPStore_r<0b011, "fsd", FPR64, WriteFST64>;
} // Predicates = [HasStdExtD]
-let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in {
-defm FMADD_D : FPFMA_rrr_frm_m<OPC_MADD, 0b01, "fmadd.d", DINX>;
-defm FMSUB_D : FPFMA_rrr_frm_m<OPC_MSUB, 0b01, "fmsub.d", DINX>;
-defm FNMSUB_D : FPFMA_rrr_frm_m<OPC_NMSUB, 0b01, "fnmsub.d", DINX>;
-defm FNMADD_D : FPFMA_rrr_frm_m<OPC_NMADD, 0b01, "fnmadd.d", DINX>;
-}
-
-defm : FPFMADynFrmAlias_m<FMADD_D, "fmadd.d", DINX>;
-defm : FPFMADynFrmAlias_m<FMSUB_D, "fmsub.d", DINX>;
-defm : FPFMADynFrmAlias_m<FNMSUB_D, "fnmsub.d", DINX>;
-defm : FPFMADynFrmAlias_m<FNMADD_D, "fnmadd.d", DINX>;
-
-let SchedRW = [WriteFAdd64, ReadFAdd64, ReadFAdd64] in {
-defm FADD_D : FPALU_rr_frm_m<0b0000001, "fadd.d", DINX, /*Commutable*/1>;
-defm FSUB_D : FPALU_rr_frm_m<0b0000101, "fsub.d", DINX>;
-}
-let SchedRW = [WriteFMul64, ReadFMul64, ReadFMul64] in
-defm FMUL_D : FPALU_rr_frm_m<0b0001001, "fmul.d", DINX, /*Commutable*/1>;
-
-let SchedRW = [WriteFDiv64, ReadFDiv64, ReadFDiv64] in
-defm FDIV_D : FPALU_rr_frm_m<0b0001101, "fdiv.d", DINX>;
-
-defm : FPALUDynFrmAlias_m<FADD_D, "fadd.d", DINX>;
-defm : FPALUDynFrmAlias_m<FSUB_D, "fsub.d", DINX>;
-defm : FPALUDynFrmAlias_m<FMUL_D, "fmul.d", DINX>;
-defm : FPALUDynFrmAlias_m<FDIV_D, "fdiv.d", DINX>;
-
-defm FSQRT_D : FPUnaryOp_r_frm_m<0b0101101, 0b00000, DDINX, "fsqrt.d">,
- Sched<[WriteFSqrt64, ReadFSqrt64]>;
-defm : FPUnaryOpDynFrmAlias_m<FSQRT_D, "fsqrt.d", DDINX>;
-
-let SchedRW = [WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64],
- mayRaiseFPException = 0 in {
-defm FSGNJ_D : FPALU_rr_m<0b0010001, 0b000, "fsgnj.d", DINX>;
-defm FSGNJN_D : FPALU_rr_m<0b0010001, 0b001, "fsgnjn.d", DINX>;
-defm FSGNJX_D : FPALU_rr_m<0b0010001, 0b010, "fsgnjx.d", DINX>;
-}
-
-let SchedRW = [WriteFMinMax64, ReadFMinMax64, ReadFMinMax64] in {
-defm FMIN_D : FPALU_rr_m<0b0010101, 0b000, "fmin.d", DINX, /*Commutable*/1>;
-defm FMAX_D : FPALU_rr_m<0b0010101, 0b001, "fmax.d", DINX, /*Commutable*/1>;
-}
-
-defm FCVT_S_D : FPUnaryOp_r_frm_m<0b0100000, 0b00001, FDINX, "fcvt.s.d">,
- Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_S_D, "fcvt.s.d", FDINX>;
+foreach Ext = DExts in {
+ let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in {
+ defm FMADD_D : FPFMA_rrr_frm_m<OPC_MADD, 0b01, "fmadd.d", Ext>;
+ defm FMSUB_D : FPFMA_rrr_frm_m<OPC_MSUB, 0b01, "fmsub.d", Ext>;
+ defm FNMSUB_D : FPFMA_rrr_frm_m<OPC_NMSUB, 0b01, "fnmsub.d", Ext>;
+ defm FNMADD_D : FPFMA_rrr_frm_m<OPC_NMADD, 0b01, "fnmadd.d", Ext>;
+ }
+
+ let SchedRW = [WriteFAdd64, ReadFAdd64, ReadFAdd64] in {
+ defm FADD_D : FPALU_rr_frm_m<0b0000001, "fadd.d", Ext, Commutable=1>;
+ defm FSUB_D : FPALU_rr_frm_m<0b0000101, "fsub.d", Ext>;
+ }
+ let SchedRW = [WriteFMul64, ReadFMul64, ReadFMul64] in
+ defm FMUL_D : FPALU_rr_frm_m<0b0001001, "fmul.d", Ext, Commutable=1>;
+
+ let SchedRW = [WriteFDiv64, ReadFDiv64, ReadFDiv64] in
+ defm FDIV_D : FPALU_rr_frm_m<0b0001101, "fdiv.d", Ext>;
+
+ defm FSQRT_D : FPUnaryOp_r_frm_m<0b0101101, 0b00000, Ext, Ext.PrimaryTy,
+ Ext.PrimaryTy, "fsqrt.d">,
+ Sched<[WriteFSqrt64, ReadFSqrt64]>;
+
+ let SchedRW = [WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64],
+ mayRaiseFPException = 0 in {
+ defm FSGNJ_D : FPALU_rr_m<0b0010001, 0b000, "fsgnj.d", Ext>;
+ defm FSGNJN_D : FPALU_rr_m<0b0010001, 0b001, "fsgnjn.d", Ext>;
+ defm FSGNJX_D : FPALU_rr_m<0b0010001, 0b010, "fsgnjx.d", Ext>;
+ }
+
+ let SchedRW = [WriteFMinMax64, ReadFMinMax64, ReadFMinMax64] in {
+ defm FMIN_D : FPALU_rr_m<0b0010101, 0b000, "fmin.d", Ext, Commutable=1>;
+ defm FMAX_D : FPALU_rr_m<0b0010101, 0b001, "fmax.d", Ext, Commutable=1>;
+ }
+
+ defm FCVT_S_D : FPUnaryOp_r_frm_m<0b0100000, 0b00001, Ext, Ext.F32Ty,
+ Ext.PrimaryTy, "fcvt.s.d">,
+ Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]>;
+
+ defm FCVT_D_S : FPUnaryOp_r_m<0b0100001, 0b00000, 0b000, Ext, Ext.PrimaryTy,
+ Ext.F32Ty, "fcvt.d.s">,
+ Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>;
+
+ let SchedRW = [WriteFCmp64, ReadFCmp64, ReadFCmp64] in {
+ defm FEQ_D : FPCmp_rr_m<0b1010001, 0b010, "feq.d", Ext, Commutable=1>;
+ defm FLT_D : FPCmp_rr_m<0b1010001, 0b001, "flt.d", Ext>;
+ defm FLE_D : FPCmp_rr_m<0b1010001, 0b000, "fle.d", Ext>;
+ }
+
+ let mayRaiseFPException = 0 in
+ defm FCLASS_D : FPUnaryOp_r_m<0b1110001, 0b00000, 0b001, Ext, GPR, Ext.PrimaryTy,
+ "fclass.d">,
+ Sched<[WriteFClass64, ReadFClass64]>;
+
+ let IsSignExtendingOpW = 1 in
+ defm FCVT_W_D : FPUnaryOp_r_frm_m<0b1100001, 0b00000, Ext, GPR, Ext.PrimaryTy,
+ "fcvt.w.d">,
+ Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
-defm FCVT_D_S : FPUnaryOp_r_m<0b0100001, 0b00000, 0b000, DFINX, "fcvt.d.s">,
- Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>;
+ let IsSignExtendingOpW = 1 in
+ defm FCVT_WU_D : FPUnaryOp_r_frm_m<0b1100001, 0b00001, Ext, GPR, Ext.PrimaryTy,
+ "fcvt.wu.d">,
+ Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
-let SchedRW = [WriteFCmp64, ReadFCmp64, ReadFCmp64] in {
-defm FEQ_D : FPCmp_rr_m<0b1010001, 0b010, "feq.d", DINX, /*Commutable*/1>;
-defm FLT_D : FPCmp_rr_m<0b1010001, 0b001, "flt.d", DINX>;
-defm FLE_D : FPCmp_rr_m<0b1010001, 0b000, "fle.d", DINX>;
-}
+ defm FCVT_D_W : FPUnaryOp_r_m<0b1101001, 0b00000, 0b000, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.d.w">,
+ Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>;
-defm FCLASS_D : FPUnaryOp_r_m<0b1110001, 0b00000, 0b001, XDINX, "fclass.d">,
- Sched<[WriteFClass64, ReadFClass64]>;
+ defm FCVT_D_WU : FPUnaryOp_r_m<0b1101001, 0b00001, 0b000, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.d.wu">,
+ Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>;
+} // foreach Ext = DExts
-let IsSignExtendingOpW = 1 in
-defm FCVT_W_D : FPUnaryOp_r_frm_m<0b1100001, 0b00000, XDINX, "fcvt.w.d">,
- Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_W_D, "fcvt.w.d", XDINX>;
-
-let IsSignExtendingOpW = 1 in
-defm FCVT_WU_D : FPUnaryOp_r_frm_m<0b1100001, 0b00001, XDINX, "fcvt.wu.d">,
- Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_WU_D, "fcvt.wu.d", XDINX>;
+foreach Ext = DExtsRV64 in {
+ defm FCVT_L_D : FPUnaryOp_r_frm_m<0b1100001, 0b00010, Ext, GPR, Ext.PrimaryTy,
+ "fcvt.l.d", [IsRV64]>,
+ Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>;
-defm FCVT_D_W : FPUnaryOp_r_m<0b1101001, 0b00000, 0b000, DXINX, "fcvt.d.w">,
- Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>;
+ defm FCVT_LU_D : FPUnaryOp_r_frm_m<0b1100001, 0b00011, Ext, GPR, Ext.PrimaryTy,
+ "fcvt.lu.d", [IsRV64]>,
+ Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>;
-defm FCVT_D_WU : FPUnaryOp_r_m<0b1101001, 0b00001, 0b000, DXINX, "fcvt.d.wu">,
- Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>;
+ defm FCVT_D_L : FPUnaryOp_r_frm_m<0b1101001, 0b00010, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.d.l", [IsRV64]>,
+ Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>;
-defm FCVT_L_D : FPUnaryOp_r_frm_m<0b1100001, 0b00010, XDIN64X, "fcvt.l.d">,
- Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_L_D, "fcvt.l.d", XDIN64X>;
-
-defm FCVT_LU_D : FPUnaryOp_r_frm_m<0b1100001, 0b00011, XDIN64X, "fcvt.lu.d">,
- Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_LU_D, "fcvt.lu.d", XDIN64X>;
+ defm FCVT_D_LU : FPUnaryOp_r_frm_m<0b1101001, 0b00011, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.d.lu", [IsRV64]>,
+ Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>;
+} // foreach Ext = DExts64
let Predicates = [HasStdExtD, IsRV64], mayRaiseFPException = 0 in
def FMV_X_D : FPUnaryOp_r<0b1110001, 0b00000, 0b000, GPR, FPR64, "fmv.x.d">,
Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]>;
-defm FCVT_D_L : FPUnaryOp_r_frm_m<0b1101001, 0b00010, DXIN64X, "fcvt.d.l">,
- Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_D_L, "fcvt.d.l", DXIN64X>;
-
-defm FCVT_D_LU : FPUnaryOp_r_frm_m<0b1101001, 0b00011, DXIN64X, "fcvt.d.lu">,
- Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_D_LU, "fcvt.d.lu", DXIN64X>;
-
let Predicates = [HasStdExtD, IsRV64], mayRaiseFPException = 0 in
def FMV_D_X : FPUnaryOp_r<0b1111001, 0b00000, 0b000, FPR64, GPR, "fmv.d.x">,
Sched<[WriteFMovI64ToF64, ReadFMovI64ToF64]>;
@@ -235,6 +210,10 @@ def : InstAlias<"fgt.d $rd, $rs, $rt",
(FLT_D_INX GPR:$rd, FPR64INX:$rt, FPR64INX:$rs), 0>;
def : InstAlias<"fge.d $rd, $rs, $rt",
(FLE_D_INX GPR:$rd, FPR64INX:$rt, FPR64INX:$rs), 0>;
+let usesCustomInserter = 1 in {
+def PseudoQuietFLE_D_INX : PseudoQuietFCMP<FPR64INX>;
+def PseudoQuietFLT_D_INX : PseudoQuietFCMP<FPR64INX>;
+}
} // Predicates = [HasStdExtZdinx, IsRV64]
let Predicates = [HasStdExtZdinx, IsRV32] in {
@@ -245,6 +224,10 @@ def : InstAlias<"fgt.d $rd, $rs, $rt",
(FLT_D_IN32X GPR:$rd, FPR64IN32X:$rt, FPR64IN32X:$rs), 0>;
def : InstAlias<"fge.d $rd, $rs, $rt",
(FLE_D_IN32X GPR:$rd, FPR64IN32X:$rt, FPR64IN32X:$rs), 0>;
+let usesCustomInserter = 1 in {
+def PseudoQuietFLE_D_IN32X : PseudoQuietFCMP<FPR64IN32X>;
+def PseudoQuietFLT_D_IN32X : PseudoQuietFCMP<FPR64IN32X>;
+}
} // Predicates = [HasStdExtZdinx, IsRV32]
//===----------------------------------------------------------------------===//
@@ -256,97 +239,243 @@ let Predicates = [HasStdExtD] in {
/// Float conversion operations
// f64 -> f32, f32 -> f64
-def : Pat<(any_fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>;
+def : Pat<(any_fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, FRM_DYN)>;
def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>;
+} // Predicates = [HasStdExtD]
+
+let Predicates = [HasStdExtZdinx, IsRV64] in {
+/// Float conversion operations
+
+// f64 -> f32, f32 -> f64
+def : Pat<(any_fpround FPR64INX:$rs1), (FCVT_S_D_INX FPR64INX:$rs1, FRM_DYN)>;
+def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_INX FPR32INX:$rs1)>;
+} // Predicates = [HasStdExtZdinx, IsRV64]
+
+let Predicates = [HasStdExtZdinx, IsRV32] in {
+/// Float conversion operations
+
+// f64 -> f32, f32 -> f64
+def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_S_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>;
+def : Pat<(any_fpextend FPR32INX:$rs1), (FCVT_D_S_IN32X FPR32INX:$rs1)>;
+} // Predicates = [HasStdExtZdinx, IsRV32]
// [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so
// are defined later.
/// Float arithmetic operations
-def : PatFprFprDynFrm<any_fadd, FADD_D, FPR64>;
-def : PatFprFprDynFrm<any_fsub, FSUB_D, FPR64>;
-def : PatFprFprDynFrm<any_fmul, FMUL_D, FPR64>;
-def : PatFprFprDynFrm<any_fdiv, FDIV_D, FPR64>;
+foreach Ext = DExts in {
+ defm : PatFprFprDynFrm_m<any_fadd, FADD_D, Ext>;
+ defm : PatFprFprDynFrm_m<any_fsub, FSUB_D, Ext>;
+ defm : PatFprFprDynFrm_m<any_fmul, FMUL_D, Ext>;
+ defm : PatFprFprDynFrm_m<any_fdiv, FDIV_D, Ext>;
+}
-def : Pat<(any_fsqrt FPR64:$rs1), (FSQRT_D FPR64:$rs1, 0b111)>;
+let Predicates = [HasStdExtD] in {
+def : Pat<(any_fsqrt FPR64:$rs1), (FSQRT_D FPR64:$rs1, FRM_DYN)>;
def : Pat<(fneg FPR64:$rs1), (FSGNJN_D $rs1, $rs1)>;
def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>;
-def : PatFprFpr<fcopysign, FSGNJ_D, FPR64>;
+def : Pat<(riscv_fpclass FPR64:$rs1), (FCLASS_D $rs1)>;
+
+def : PatFprFpr<fcopysign, FSGNJ_D, FPR64, f64>;
def : Pat<(fcopysign FPR64:$rs1, (fneg FPR64:$rs2)), (FSGNJN_D $rs1, $rs2)>;
def : Pat<(fcopysign FPR64:$rs1, FPR32:$rs2), (FSGNJ_D $rs1, (FCVT_D_S $rs2))>;
def : Pat<(fcopysign FPR32:$rs1, FPR64:$rs2), (FSGNJ_S $rs1, (FCVT_S_D $rs2,
- 0b111))>;
+ FRM_DYN))>;
// fmadd: rs1 * rs2 + rs3
def : Pat<(any_fma FPR64:$rs1, FPR64:$rs2, FPR64:$rs3),
- (FMADD_D $rs1, $rs2, $rs3, 0b111)>;
+ (FMADD_D $rs1, $rs2, $rs3, FRM_DYN)>;
// fmsub: rs1 * rs2 - rs3
def : Pat<(any_fma FPR64:$rs1, FPR64:$rs2, (fneg FPR64:$rs3)),
- (FMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+ (FMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, FRM_DYN)>;
// fnmsub: -rs1 * rs2 + rs3
def : Pat<(any_fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3),
- (FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+ (FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, FRM_DYN)>;
// fnmadd: -rs1 * rs2 - rs3
def : Pat<(any_fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)),
- (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+ (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, FRM_DYN)>;
// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA)
def : Pat<(fneg (any_fma_nsz FPR64:$rs1, FPR64:$rs2, FPR64:$rs3)),
- (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>;
+ (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, FRM_DYN)>;
+} // Predicates = [HasStdExtD]
+
+let Predicates = [HasStdExtZdinx, IsRV64] in {
+def : Pat<(any_fsqrt FPR64INX:$rs1), (FSQRT_D_INX FPR64INX:$rs1, FRM_DYN)>;
+
+def : Pat<(fneg FPR64INX:$rs1), (FSGNJN_D_INX $rs1, $rs1)>;
+def : Pat<(fabs FPR64INX:$rs1), (FSGNJX_D_INX $rs1, $rs1)>;
+
+def : Pat<(riscv_fpclass FPR64INX:$rs1), (FCLASS_D_INX $rs1)>;
+
+def : PatFprFpr<fcopysign, FSGNJ_D_INX, FPR64INX, f64>;
+def : Pat<(fcopysign FPR64INX:$rs1, (fneg FPR64INX:$rs2)),
+ (FSGNJN_D_INX $rs1, $rs2)>;
+def : Pat<(fcopysign FPR64INX:$rs1, FPR32INX:$rs2),
+ (FSGNJ_D_INX $rs1, (FCVT_D_S_INX $rs2))>;
+def : Pat<(fcopysign FPR32INX:$rs1, FPR64INX:$rs2),
+ (FSGNJ_S_INX $rs1, (FCVT_S_D_INX $rs2, FRM_DYN))>;
+
+// fmadd: rs1 * rs2 + rs3
+def : Pat<(any_fma FPR64INX:$rs1, FPR64INX:$rs2, FPR64INX:$rs3),
+ (FMADD_D_INX $rs1, $rs2, $rs3, FRM_DYN)>;
+
+// fmsub: rs1 * rs2 - rs3
+def : Pat<(any_fma FPR64INX:$rs1, FPR64INX:$rs2, (fneg FPR64INX:$rs3)),
+ (FMSUB_D_INX FPR64INX:$rs1, FPR64INX:$rs2, FPR64INX:$rs3, FRM_DYN)>;
+
+// fnmsub: -rs1 * rs2 + rs3
+def : Pat<(any_fma (fneg FPR64INX:$rs1), FPR64INX:$rs2, FPR64INX:$rs3),
+ (FNMSUB_D_INX FPR64INX:$rs1, FPR64INX:$rs2, FPR64INX:$rs3, FRM_DYN)>;
+
+// fnmadd: -rs1 * rs2 - rs3
+def : Pat<(any_fma (fneg FPR64INX:$rs1), FPR64INX:$rs2, (fneg FPR64INX:$rs3)),
+ (FNMADD_D_INX FPR64INX:$rs1, FPR64INX:$rs2, FPR64INX:$rs3, FRM_DYN)>;
+
+// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA)
+def : Pat<(fneg (any_fma_nsz FPR64INX:$rs1, FPR64INX:$rs2, FPR64INX:$rs3)),
+ (FNMADD_D_INX FPR64INX:$rs1, FPR64INX:$rs2, FPR64INX:$rs3, FRM_DYN)>;
+} // Predicates = [HasStdExtZdinx, IsRV64]
+
+let Predicates = [HasStdExtZdinx, IsRV32] in {
+def : Pat<(any_fsqrt FPR64IN32X:$rs1), (FSQRT_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>;
+
+def : Pat<(fneg FPR64IN32X:$rs1), (FSGNJN_D_IN32X $rs1, $rs1)>;
+def : Pat<(fabs FPR64IN32X:$rs1), (FSGNJX_D_IN32X $rs1, $rs1)>;
+
+def : Pat<(riscv_fpclass FPR64IN32X:$rs1), (FCLASS_D_IN32X $rs1)>;
+
+def : PatFprFpr<fcopysign, FSGNJ_D_IN32X, FPR64IN32X, f64>;
+def : Pat<(fcopysign FPR64IN32X:$rs1, (fneg FPR64IN32X:$rs2)),
+ (FSGNJN_D_IN32X $rs1, $rs2)>;
+def : Pat<(fcopysign FPR64IN32X:$rs1, FPR32INX:$rs2),
+ (FSGNJ_D_IN32X $rs1, (FCVT_D_S_INX $rs2))>;
+def : Pat<(fcopysign FPR32INX:$rs1, FPR64IN32X:$rs2),
+ (FSGNJ_S_INX $rs1, (FCVT_S_D_IN32X $rs2, FRM_DYN))>;
+
+// fmadd: rs1 * rs2 + rs3
+def : Pat<(any_fma FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3),
+ (FMADD_D_IN32X $rs1, $rs2, $rs3, FRM_DYN)>;
+
+// fmsub: rs1 * rs2 - rs3
+def : Pat<(any_fma FPR64IN32X:$rs1, FPR64IN32X:$rs2, (fneg FPR64IN32X:$rs3)),
+ (FMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>;
+
+// fnmsub: -rs1 * rs2 + rs3
+def : Pat<(any_fma (fneg FPR64IN32X:$rs1), FPR64IN32X:$rs2, FPR64IN32X:$rs3),
+ (FNMSUB_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>;
+
+// fnmadd: -rs1 * rs2 - rs3
+def : Pat<(any_fma (fneg FPR64IN32X:$rs1), FPR64IN32X:$rs2, (fneg FPR64IN32X:$rs3)),
+ (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>;
+
+// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA)
+def : Pat<(fneg (any_fma_nsz FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3)),
+ (FNMADD_D_IN32X FPR64IN32X:$rs1, FPR64IN32X:$rs2, FPR64IN32X:$rs3, FRM_DYN)>;
+} // Predicates = [HasStdExtZdinx, IsRV32]
// The ratified 20191213 ISA spec defines fmin and fmax in a way that matches
// LLVM's fminnum and fmaxnum.
// <https://github.com/riscv/riscv-isa-manual/commit/cd20cee7efd9bac7c5aa127ec3b451749d2b3cce>.
-def : PatFprFpr<fminnum, FMIN_D, FPR64>;
-def : PatFprFpr<fmaxnum, FMAX_D, FPR64>;
+foreach Ext = DExts in {
+ defm : PatFprFpr_m<fminnum, FMIN_D, Ext>;
+ defm : PatFprFpr_m<fmaxnum, FMAX_D, Ext>;
+ defm : PatFprFpr_m<riscv_fmin, FMIN_D, Ext>;
+ defm : PatFprFpr_m<riscv_fmax, FMAX_D, Ext>;
+}
/// Setcc
// FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for
// strict versions of those.
// Match non-signaling FEQ_D
-def : PatSetCC<FPR64, any_fsetcc, SETEQ, FEQ_D>;
-def : PatSetCC<FPR64, any_fsetcc, SETOEQ, FEQ_D>;
-def : PatSetCC<FPR64, strict_fsetcc, SETLT, PseudoQuietFLT_D>;
-def : PatSetCC<FPR64, strict_fsetcc, SETOLT, PseudoQuietFLT_D>;
-def : PatSetCC<FPR64, strict_fsetcc, SETLE, PseudoQuietFLE_D>;
-def : PatSetCC<FPR64, strict_fsetcc, SETOLE, PseudoQuietFLE_D>;
+foreach Ext = DExts in {
+ defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_D, Ext, f64>;
+ defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_D, Ext, f64>;
+ defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_D, Ext, f64>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_D, Ext, f64>;
+ defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_D, Ext, f64>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_D, Ext, f64>;
+}
+let Predicates = [HasStdExtD] in {
// Match signaling FEQ_D
-def : Pat<(strict_fsetccs FPR64:$rs1, FPR64:$rs2, SETEQ),
+def : Pat<(XLenVT (strict_fsetccs FPR64:$rs1, FPR64:$rs2, SETEQ)),
(AND (FLE_D $rs1, $rs2),
(FLE_D $rs2, $rs1))>;
-def : Pat<(strict_fsetccs FPR64:$rs1, FPR64:$rs2, SETOEQ),
+def : Pat<(XLenVT (strict_fsetccs FPR64:$rs1, FPR64:$rs2, SETOEQ)),
(AND (FLE_D $rs1, $rs2),
(FLE_D $rs2, $rs1))>;
// If both operands are the same, use a single FLE.
-def : Pat<(strict_fsetccs FPR64:$rs1, FPR64:$rs1, SETEQ),
+def : Pat<(XLenVT (strict_fsetccs FPR64:$rs1, FPR64:$rs1, SETEQ)),
(FLE_D $rs1, $rs1)>;
-def : Pat<(strict_fsetccs FPR64:$rs1, FPR64:$rs1, SETOEQ),
+def : Pat<(XLenVT (strict_fsetccs FPR64:$rs1, FPR64:$rs1, SETOEQ)),
(FLE_D $rs1, $rs1)>;
-def : PatSetCC<FPR64, any_fsetccs, SETLT, FLT_D>;
-def : PatSetCC<FPR64, any_fsetccs, SETOLT, FLT_D>;
-def : PatSetCC<FPR64, any_fsetccs, SETLE, FLE_D>;
-def : PatSetCC<FPR64, any_fsetccs, SETOLE, FLE_D>;
+def : PatSetCC<FPR64, any_fsetccs, SETLT, FLT_D, f64>;
+def : PatSetCC<FPR64, any_fsetccs, SETOLT, FLT_D, f64>;
+def : PatSetCC<FPR64, any_fsetccs, SETLE, FLE_D, f64>;
+def : PatSetCC<FPR64, any_fsetccs, SETOLE, FLE_D, f64>;
+} // Predicates = [HasStdExtD]
-defm Select_FPR64 : SelectCC_GPR_rrirr<FPR64>;
+let Predicates = [HasStdExtZdinx, IsRV64] in {
+// Match signaling FEQ_D
+def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs2, SETEQ)),
+ (AND (FLE_D_INX $rs1, $rs2),
+ (FLE_D_INX $rs2, $rs1))>;
+def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs2, SETOEQ)),
+ (AND (FLE_D_INX $rs1, $rs2),
+ (FLE_D_INX $rs2, $rs1))>;
+// If both operands are the same, use a single FLE.
+def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs1, SETEQ)),
+ (FLE_D_INX $rs1, $rs1)>;
+def : Pat<(XLenVT (strict_fsetccs (f64 FPR64INX:$rs1), FPR64INX:$rs1, SETOEQ)),
+ (FLE_D_INX $rs1, $rs1)>;
+
+def : PatSetCC<FPR64INX, any_fsetccs, SETLT, FLT_D_INX, f64>;
+def : PatSetCC<FPR64INX, any_fsetccs, SETOLT, FLT_D_INX, f64>;
+def : PatSetCC<FPR64INX, any_fsetccs, SETLE, FLE_D_INX, f64>;
+def : PatSetCC<FPR64INX, any_fsetccs, SETOLE, FLE_D_INX, f64>;
+} // Predicates = [HasStdExtZdinx, IsRV64]
-def PseudoFROUND_D : PseudoFROUND<FPR64>;
+let Predicates = [HasStdExtZdinx, IsRV32] in {
+// Match signaling FEQ_D
+def : Pat<(XLenVT (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs2, SETEQ)),
+ (AND (FLE_D_IN32X $rs1, $rs2),
+ (FLE_D_IN32X $rs2, $rs1))>;
+def : Pat<(XLenVT (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs2, SETOEQ)),
+ (AND (FLE_D_IN32X $rs1, $rs2),
+ (FLE_D_IN32X $rs2, $rs1))>;
+// If both operands are the same, use a single FLE.
+def : Pat<(XLenVT (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs1, SETEQ)),
+ (FLE_D_IN32X $rs1, $rs1)>;
+def : Pat<(XLenVT (strict_fsetccs FPR64IN32X:$rs1, FPR64IN32X:$rs1, SETOEQ)),
+ (FLE_D_IN32X $rs1, $rs1)>;
+
+def : PatSetCC<FPR64IN32X, any_fsetccs, SETLT, FLT_D_IN32X, f64>;
+def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLT, FLT_D_IN32X, f64>;
+def : PatSetCC<FPR64IN32X, any_fsetccs, SETLE, FLE_D_IN32X, f64>;
+def : PatSetCC<FPR64IN32X, any_fsetccs, SETOLE, FLE_D_IN32X, f64>;
+} // Predicates = [HasStdExtZdinx, IsRV32]
+
+let Predicates = [HasStdExtD] in {
+defm Select_FPR64 : SelectCC_GPR_rrirr<FPR64, f64>;
+
+def PseudoFROUND_D : PseudoFROUND<FPR64, f64>;
/// Loads
-defm : LdPat<load, FLD, f64>;
+def : LdPat<load, FLD, f64>;
/// Stores
-defm : StPat<store, FSD, FPR64, f64>;
+def : StPat<store, FSD, FPR64, f64>;
/// Pseudo-instructions needed for the soft-float ABI with RV32D
@@ -364,38 +493,93 @@ def SplitF64Pseudo
} // Predicates = [HasStdExtD]
-let Predicates = [HasStdExtD, IsRV32] in {
+let Predicates = [HasStdExtZdinx, IsRV64] in {
+defm Select_FPR64INX : SelectCC_GPR_rrirr<FPR64INX, f64>;
-/// Float constants
-def : Pat<(f64 (fpimm0)), (FCVT_D_W (i32 X0))>;
-def : Pat<(f64 (fpimmneg0)), (FSGNJN_D (FCVT_D_W (i32 X0)),
- (FCVT_D_W (i32 X0)))>;
+def PseudoFROUND_D_INX : PseudoFROUND<FPR64INX, f64>;
+
+/// Loads
+def : LdPat<load, LD, f64>;
+
+/// Stores
+def : StPat<store, SD, GPR, f64>;
+} // Predicates = [HasStdExtZdinx, IsRV64]
+
+let Predicates = [HasStdExtZdinx, IsRV32] in {
+defm Select_FPR64IN32X : SelectCC_GPR_rrirr<FPR64IN32X, f64>;
+
+def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64>;
+
+/// Loads
+let isCall = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in
+def PseudoRV32ZdinxLD : Pseudo<(outs GPRPF64:$dst), (ins GPR:$rs1, simm12:$imm12), []>;
+def : Pat<(f64 (load (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12))),
+ (PseudoRV32ZdinxLD GPR:$rs1, simm12:$imm12)>;
+
+/// Stores
+let isCall = 0, mayLoad = 0, mayStore = 1, Size = 8, isCodeGenOnly = 1 in
+def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPF64:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>;
+def : Pat<(store (f64 GPRPF64:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)),
+ (PseudoRV32ZdinxSD GPRPF64:$rs2, GPR:$rs1, simm12:$imm12)>;
+
+/// Pseudo-instructions needed for the soft-float ABI with RV32D
+
+// Moves two GPRs to an FPR.
+let usesCustomInserter = 1 in
+def BuildPairF64Pseudo_INX
+ : Pseudo<(outs FPR64IN32X:$dst), (ins GPR:$src1, GPR:$src2),
+ [(set FPR64IN32X:$dst, (RISCVBuildPairF64 GPR:$src1, GPR:$src2))]>;
+
+// Moves an FPR to two GPRs.
+let usesCustomInserter = 1 in
+def SplitF64Pseudo_INX
+ : Pseudo<(outs GPR:$dst1, GPR:$dst2), (ins FPR64IN32X:$src),
+ [(set GPR:$dst1, GPR:$dst2, (RISCVSplitF64 FPR64IN32X:$src))]>;
+} // Predicates = [HasStdExtZdinx, IsRV32]
+
+let Predicates = [HasStdExtD, IsRV32] in {
// double->[u]int. Round-to-zero must be used.
-def : Pat<(i32 (any_fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>;
-def : Pat<(i32 (any_fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>;
+def : Pat<(i32 (any_fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, FRM_RTZ)>;
// Saturating double->[u]int32.
def : Pat<(i32 (riscv_fcvt_x FPR64:$rs1, timm:$frm)), (FCVT_W_D $rs1, timm:$frm)>;
def : Pat<(i32 (riscv_fcvt_xu FPR64:$rs1, timm:$frm)), (FCVT_WU_D $rs1, timm:$frm)>;
// float->int32 with current rounding mode.
-def : Pat<(i32 (any_lrint FPR64:$rs1)), (FCVT_W_D $rs1, 0b111)>;
+def : Pat<(i32 (any_lrint FPR64:$rs1)), (FCVT_W_D $rs1, FRM_DYN)>;
// float->int32 rounded to nearest with ties rounded away from zero.
-def : Pat<(i32 (any_lround FPR64:$rs1)), (FCVT_W_D $rs1, 0b100)>;
+def : Pat<(i32 (any_lround FPR64:$rs1)), (FCVT_W_D $rs1, FRM_RMM)>;
// [u]int->double.
def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>;
def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>;
} // Predicates = [HasStdExtD, IsRV32]
-let Predicates = [HasStdExtD, IsRV64] in {
+let Predicates = [HasStdExtZdinx, IsRV32] in {
+
+// double->[u]int. Round-to-zero must be used.
+def : Pat<(i32 (any_fp_to_sint FPR64IN32X:$rs1)), (FCVT_W_D_IN32X FPR64IN32X:$rs1, FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_uint FPR64IN32X:$rs1)), (FCVT_WU_D_IN32X FPR64IN32X:$rs1, FRM_RTZ)>;
+
+// Saturating double->[u]int32.
+def : Pat<(i32 (riscv_fcvt_x FPR64IN32X:$rs1, timm:$frm)), (FCVT_W_D_IN32X $rs1, timm:$frm)>;
+def : Pat<(i32 (riscv_fcvt_xu FPR64IN32X:$rs1, timm:$frm)), (FCVT_WU_D_IN32X $rs1, timm:$frm)>;
+
+// float->int32 with current rounding mode.
+def : Pat<(i32 (any_lrint FPR64IN32X:$rs1)), (FCVT_W_D_IN32X $rs1, FRM_DYN)>;
-/// Float constants
-def : Pat<(f64 (fpimm0)), (FMV_D_X (i64 X0))>;
-def : Pat<(f64 (fpimmneg0)), (FSGNJN_D (FMV_D_X (i64 X0)),
- (FMV_D_X (i64 X0)))>;
+// float->int32 rounded to nearest with ties rounded away from zero.
+def : Pat<(i32 (any_lround FPR64IN32X:$rs1)), (FCVT_W_D_IN32X $rs1, FRM_RMM)>;
+
+// [u]int->double.
+def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W_IN32X GPR:$rs1)>;
+def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU_IN32X GPR:$rs1)>;
+} // Predicates = [HasStdExtZdinx, IsRV32]
+
+let Predicates = [HasStdExtD, IsRV64] in {
// Moves (no conversion)
def : Pat<(bitconvert (i64 GPR:$rs1)), (FMV_D_X GPR:$rs1)>;
@@ -416,18 +600,55 @@ def : Pat<(i64 (riscv_fcvt_x FPR64:$rs1, timm:$frm)), (FCVT_L_D $rs1, timm:$frm)
def : Pat<(i64 (riscv_fcvt_xu FPR64:$rs1, timm:$frm)), (FCVT_LU_D $rs1, timm:$frm)>;
// double->[u]int64. Round-to-zero must be used.
-def : Pat<(i64 (any_fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>;
-def : Pat<(i64 (any_fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>;
+def : Pat<(i64 (any_fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, FRM_RTZ)>;
// double->int64 with current rounding mode.
-def : Pat<(i64 (any_lrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>;
-def : Pat<(i64 (any_llrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>;
+def : Pat<(i64 (any_lrint FPR64:$rs1)), (FCVT_L_D $rs1, FRM_DYN)>;
+def : Pat<(i64 (any_llrint FPR64:$rs1)), (FCVT_L_D $rs1, FRM_DYN)>;
// double->int64 rounded to nearest with ties rounded away from zero.
-def : Pat<(i64 (any_lround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>;
-def : Pat<(i64 (any_llround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>;
+def : Pat<(i64 (any_lround FPR64:$rs1)), (FCVT_L_D $rs1, FRM_RMM)>;
+def : Pat<(i64 (any_llround FPR64:$rs1)), (FCVT_L_D $rs1, FRM_RMM)>;
// [u]int64->fp. Match GCC and default to using dynamic rounding mode.
-def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, 0b111)>;
-def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_D_LU GPR:$rs1, 0b111)>;
+def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, FRM_DYN)>;
+def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_D_LU GPR:$rs1, FRM_DYN)>;
} // Predicates = [HasStdExtD, IsRV64]
+
+let Predicates = [HasStdExtZdinx, IsRV64] in {
+
+// Moves (no conversion)
+def : Pat<(f64 (bitconvert (i64 GPR:$rs1))), (COPY_TO_REGCLASS GPR:$rs1, GPR)>;
+def : Pat<(i64 (bitconvert (f64 GPR:$rs1))), (COPY_TO_REGCLASS GPR:$rs1, GPR)>;
+
+// Use target specific isd nodes to help us remember the result is sign
+// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
+// duplicated if it has another user that didn't need the sign_extend.
+def : Pat<(riscv_any_fcvt_w_rv64 FPR64INX:$rs1, timm:$frm), (FCVT_W_D_INX $rs1, timm:$frm)>;
+def : Pat<(riscv_any_fcvt_wu_rv64 FPR64INX:$rs1, timm:$frm), (FCVT_WU_D_INX $rs1, timm:$frm)>;
+
+// [u]int32->fp
+def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W_INX $rs1)>;
+def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU_INX $rs1)>;
+
+// Saturating double->[u]int64.
+def : Pat<(i64 (riscv_fcvt_x FPR64INX:$rs1, timm:$frm)), (FCVT_L_D_INX $rs1, timm:$frm)>;
+def : Pat<(i64 (riscv_fcvt_xu FPR64INX:$rs1, timm:$frm)), (FCVT_LU_D_INX $rs1, timm:$frm)>;
+
+// double->[u]int64. Round-to-zero must be used.
+def : Pat<(i64 (any_fp_to_sint FPR64INX:$rs1)), (FCVT_L_D_INX FPR64INX:$rs1, FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_uint FPR64INX:$rs1)), (FCVT_LU_D_INX FPR64INX:$rs1, FRM_RTZ)>;
+
+// double->int64 with current rounding mode.
+def : Pat<(i64 (any_lrint FPR64INX:$rs1)), (FCVT_L_D_INX $rs1, FRM_DYN)>;
+def : Pat<(i64 (any_llrint FPR64INX:$rs1)), (FCVT_L_D_INX $rs1, FRM_DYN)>;
+
+// double->int64 rounded to nearest with ties rounded away from zero.
+def : Pat<(i64 (any_lround FPR64INX:$rs1)), (FCVT_L_D_INX $rs1, FRM_RMM)>;
+def : Pat<(i64 (any_llround FPR64INX:$rs1)), (FCVT_L_D_INX $rs1, FRM_RMM)>;
+
+// [u]int64->fp. Match GCC and default to using dynamic rounding mode.
+def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L_INX GPR:$rs1, FRM_DYN)>;
+def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_D_LU_INX GPR:$rs1, FRM_DYN)>;
+} // Predicates = [HasStdExtZdinx, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
index 92d8a2bab4c0..290c03defc5f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td
@@ -29,6 +29,11 @@ def SDT_RISCVFCVT_X
def SDT_RISCVFROUND
: SDTypeProfile<1, 3, [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
SDTCisVT<3, XLenVT>]>;
+def SDT_RISCVFPCLASS
+ : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>;
+
+def riscv_fpclass
+ : SDNode<"RISCVISD::FPCLASS", SDT_RISCVFPCLASS>;
def riscv_fround
: SDNode<"RISCVISD::FROUND", SDT_RISCVFROUND>;
@@ -46,6 +51,9 @@ def riscv_fcvt_x
def riscv_fcvt_xu
: SDNode<"RISCVISD::FCVT_XU", SDT_RISCVFCVT_X>;
+def riscv_fmin : SDNode<"RISCVISD::FMIN", SDTFPBinOp>;
+def riscv_fmax : SDNode<"RISCVISD::FMAX", SDTFPBinOp>;
+
def riscv_strict_fcvt_w_rv64
: SDNode<"RISCVISD::STRICT_FCVT_W_RV64", SDT_RISCVFCVT_W_RV64,
[SDNPHasChain]>;
@@ -81,66 +89,41 @@ def FPR32INX : RegisterOperand<GPRF32> {
let DecoderMethod = "DecodeGPRRegisterClass";
}
-// inx = 0 : f, d, zfh, zfhmin
-// = 1 : zfinx, zdinx, zhinx, zhinxmin
-// = 2 : zdinx_rv32
-class ExtInfo<bits<2> inx, list<Predicate> pres> {
- string Suffix = !cond(!eq(inx, 0): "",
- !eq(inx, 1): "_INX",
- !eq(inx, 2): "_IN32X");
- list<Predicate> Predicates = pres;
- string Space = !cond(!eq(inx, 0): "",
- !eq(inx, 1): "RVZfinx",
- !eq(inx, 2): "RV32Zdinx");
+// Describes a combination of predicates from F/D/Zfh/Zfhmin or
+// Zfinx/Zdinx/Zhinx/Zhinxmin that are applied to scalar FP instruction.
+// Contains the DAGOperand for the primary type for the predicates. The primary
+// type may be unset for combinations of predicates like Zfh+D.
+// Also contains the DAGOperand for f16/f32/f64, instruction suffix, and
+// decoder namespace that go with an instruction given those predicates.
+//
+// The DAGOperand can be unset if the predicates are not enough to define it.
+class ExtInfo<string suffix, string space, list<Predicate> predicates,
+ ValueType primaryvt, DAGOperand primaryty, DAGOperand f32ty,
+ DAGOperand f64ty, DAGOperand f16ty> {
+ list<Predicate> Predicates = predicates;
+ string Suffix = suffix;
+ string Space = space;
+ DAGOperand PrimaryTy = primaryty;
+ DAGOperand F16Ty = f16ty;
+ DAGOperand F32Ty = f32ty;
+ DAGOperand F64Ty = f64ty;
+ ValueType PrimaryVT = primaryvt;
}
-class ExtInfo_r<ExtInfo ext, DAGOperand reg> {
- string Suffix = ext.Suffix;
- list<Predicate> Predicates = ext.Predicates;
- string Space = ext.Space;
- DAGOperand Reg = reg;
-}
+def FExt : ExtInfo<"", "", [HasStdExtF], f32, FPR32, FPR32, ?, ?>;
-class ExtInfo_rr<ExtInfo ext, DAGOperand rdty, DAGOperand rs1ty> {
- string Suffix = ext.Suffix;
- list<Predicate> Predicates = ext.Predicates;
- string Space = ext.Space;
- DAGOperand RdTy = rdty;
- DAGOperand Rs1Ty = rs1ty;
-}
+def ZfinxExt : ExtInfo<"_INX", "RVZfinx", [HasStdExtZfinx], f32, FPR32INX, FPR32INX, ?, ?>;
-def FExt : ExtInfo<0, [HasStdExtF]>;
-def F64Ext : ExtInfo<0, [HasStdExtF, IsRV64]>;
-def ZfinxExt : ExtInfo<1, [HasStdExtZfinx]>;
-def Zfinx64Ext : ExtInfo<1, [HasStdExtZfinx, IsRV64]>;
-
-def F : ExtInfo_r<FExt, FPR32>;
-def F_INX : ExtInfo_r<ZfinxExt, FPR32INX>;
-
-def FF : ExtInfo_rr<FExt, FPR32, FPR32>;
-def FF_INX : ExtInfo_rr<ZfinxExt, FPR32INX, FPR32INX>;
-def FX : ExtInfo_rr<FExt, FPR32, GPR>;
-def FX_INX : ExtInfo_rr<ZfinxExt, FPR32INX, GPR>;
-def FX_64 : ExtInfo_rr<F64Ext, FPR32, GPR>;
-def FX_INX_64 : ExtInfo_rr<Zfinx64Ext, FPR32INX, GPR>;
-def XF : ExtInfo_rr<FExt, GPR, FPR32>;
-def XF_64 : ExtInfo_rr<F64Ext, GPR, FPR32>;
-def XF_INX : ExtInfo_rr<ZfinxExt, GPR, FPR32INX>;
-def XF_INX_64 : ExtInfo_rr<Zfinx64Ext, GPR, FPR32INX>;
-
-defvar FINX = [F, F_INX];
-defvar FFINX = [FF, FF_INX];
-defvar FXINX = [FX, FX_INX];
-defvar XFINX = [XF, XF_INX];
-defvar XFIN64X = [XF_64, XF_INX_64];
-defvar FXIN64X = [FX_64, FX_INX_64];
+defvar FExts = [FExt, ZfinxExt];
// Floating-point rounding mode
def FRMArg : AsmOperandClass {
let Name = "FRMArg";
let RenderMethod = "addFRMArgOperands";
- let DiagnosticType = "InvalidFRMArg";
+ let ParserMethod = "parseFRMArg";
+ let IsOptional = 1;
+ let DefaultMethod = "defaultFRMArgOp";
}
def frmarg : Operand<XLenVT> {
@@ -154,7 +137,7 @@ def frmarg : Operand<XLenVT> {
//===----------------------------------------------------------------------===//
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
-class FPLoad_r<bits<3> funct3, string opcodestr, RegisterClass rty,
+class FPLoad_r<bits<3> funct3, string opcodestr, DAGOperand rty,
SchedWrite sw>
: RVInstI<funct3, OPC_LOAD_FP, (outs rty:$rd),
(ins GPRMem:$rs1, simm12:$imm12),
@@ -162,7 +145,7 @@ class FPLoad_r<bits<3> funct3, string opcodestr, RegisterClass rty,
Sched<[sw, ReadFMemBase]>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
-class FPStore_r<bits<3> funct3, string opcodestr, RegisterClass rty,
+class FPStore_r<bits<3> funct3, string opcodestr, DAGOperand rty,
SchedWrite sw>
: RVInstS<funct3, OPC_STORE_FP, (outs),
(ins rty:$rs2, GPRMem:$rs1, simm12:$imm12),
@@ -175,25 +158,12 @@ class FPFMA_rrr_frm<RISCVOpcode opcode, bits<2> funct2, string opcodestr,
DAGOperand rty>
: RVInstR4Frm<funct2, opcode, (outs rty:$rd),
(ins rty:$rs1, rty:$rs2, rty:$rs3, frmarg:$frm),
- opcodestr, "$rd, $rs1, $rs2, $rs3, $frm">;
+ opcodestr, "$rd, $rs1, $rs2, $rs3$frm">;
multiclass FPFMA_rrr_frm_m<RISCVOpcode opcode, bits<2> funct2,
- string opcodestr, list<ExtInfo_r> Exts> {
- foreach Ext = Exts in
- let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
- def Ext.Suffix : FPFMA_rrr_frm<opcode, funct2, opcodestr, Ext.Reg>;
-}
-
-class FPFMADynFrmAlias<FPFMA_rrr_frm Inst, string OpcodeStr,
- DAGOperand rty>
- : InstAlias<OpcodeStr#" $rd, $rs1, $rs2, $rs3",
- (Inst rty:$rd, rty:$rs1, rty:$rs2, rty:$rs3, 0b111)>;
-multiclass FPFMADynFrmAlias_m<FPFMA_rrr_frm Inst, string OpcodeStr,
- list<ExtInfo_r> Exts> {
- foreach Ext = Exts in
- let Predicates = Ext.Predicates in
- def : FPFMADynFrmAlias<!cast<FPFMA_rrr_frm>(Inst#Ext.Suffix), OpcodeStr,
- Ext.Reg>;
+ string opcodestr, ExtInfo Ext> {
+ let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPFMA_rrr_frm<opcode, funct2, opcodestr, Ext.PrimaryTy>;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in
@@ -204,10 +174,9 @@ class FPALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr,
let isCommutable = Commutable;
}
multiclass FPALU_rr_m<bits<7> funct7, bits<3> funct3, string opcodestr,
- list<ExtInfo_r> Exts, bit Commutable = 0> {
- foreach Ext = Exts in
- let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
- def Ext.Suffix : FPALU_rr<funct7, funct3, opcodestr, Ext.Reg, Commutable>;
+ ExtInfo Ext, bit Commutable = 0> {
+ let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPALU_rr<funct7, funct3, opcodestr, Ext.PrimaryTy, Commutable>;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
@@ -216,26 +185,13 @@ class FPALU_rr_frm<bits<7> funct7, string opcodestr, DAGOperand rty,
bit Commutable>
: RVInstRFrm<funct7, OPC_OP_FP, (outs rty:$rd),
(ins rty:$rs1, rty:$rs2, frmarg:$frm), opcodestr,
- "$rd, $rs1, $rs2, $frm"> {
+ "$rd, $rs1, $rs2$frm"> {
let isCommutable = Commutable;
}
multiclass FPALU_rr_frm_m<bits<7> funct7, string opcodestr,
- list<ExtInfo_r> Exts, bit Commutable = 0> {
- foreach Ext = Exts in
- let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
- def Ext.Suffix : FPALU_rr_frm<funct7, opcodestr, Ext.Reg, Commutable>;
-}
-
-class FPALUDynFrmAlias<FPALU_rr_frm Inst, string OpcodeStr,
- DAGOperand rty>
- : InstAlias<OpcodeStr#" $rd, $rs1, $rs2",
- (Inst rty:$rd, rty:$rs1, rty:$rs2, 0b111)>;
-multiclass FPALUDynFrmAlias_m<FPALU_rr_frm Inst, string OpcodeStr,
- list<ExtInfo_r> Exts> {
- foreach Ext = Exts in
- let Predicates = Ext.Predicates in
- def : FPALUDynFrmAlias<!cast<FPALU_rr_frm>(Inst#Ext.Suffix), OpcodeStr,
- Ext.Reg>;
+ ExtInfo Ext, bit Commutable = 0> {
+ let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPALU_rr_frm<funct7, opcodestr, Ext.PrimaryTy, Commutable>;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in
@@ -246,11 +202,10 @@ class FPUnaryOp_r<bits<7> funct7, bits<5> rs2val, bits<3> funct3,
let rs2 = rs2val;
}
multiclass FPUnaryOp_r_m<bits<7> funct7, bits<5> rs2val, bits<3> funct3,
- list<ExtInfo_rr> Exts, string opcodestr> {
- foreach Ext = Exts in
- let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
- def Ext.Suffix : FPUnaryOp_r<funct7, rs2val, funct3, Ext.RdTy, Ext.Rs1Ty,
- opcodestr>;
+ ExtInfo Ext, DAGOperand rdty, DAGOperand rs1ty,
+ string opcodestr> {
+ let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPUnaryOp_r<funct7, rs2val, funct3, rdty, rs1ty, opcodestr>;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
@@ -259,47 +214,35 @@ class FPUnaryOp_r_frm<bits<7> funct7, bits<5> rs2val, DAGOperand rdty,
DAGOperand rs1ty, string opcodestr>
: RVInstRFrm<funct7, OPC_OP_FP, (outs rdty:$rd),
(ins rs1ty:$rs1, frmarg:$frm), opcodestr,
- "$rd, $rs1, $frm"> {
+ "$rd, $rs1$frm"> {
let rs2 = rs2val;
}
multiclass FPUnaryOp_r_frm_m<bits<7> funct7, bits<5> rs2val,
- list<ExtInfo_rr> Exts, string opcodestr> {
- foreach Ext = Exts in
- let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
- def Ext.Suffix : FPUnaryOp_r_frm<funct7, rs2val, Ext.RdTy, Ext.Rs1Ty,
- opcodestr>;
-}
-
-class FPUnaryOpDynFrmAlias<FPUnaryOp_r_frm Inst, string OpcodeStr,
- DAGOperand rdty, DAGOperand rs1ty>
- : InstAlias<OpcodeStr#" $rd, $rs1",
- (Inst rdty:$rd, rs1ty:$rs1, 0b111)>;
-multiclass FPUnaryOpDynFrmAlias_m<FPUnaryOp_r_frm Inst, string OpcodeStr,
- list<ExtInfo_rr> Exts> {
- foreach Ext = Exts in
- let Predicates = Ext.Predicates in
- def : FPUnaryOpDynFrmAlias<!cast<FPUnaryOp_r_frm>(Inst#Ext.Suffix),
- OpcodeStr, Ext.RdTy, Ext.Rs1Ty>;
+ ExtInfo Ext, DAGOperand rdty, DAGOperand rs1ty,
+ string opcodestr, list<Predicate> ExtraPreds = []> {
+ let Predicates = !listconcat(Ext.Predicates, ExtraPreds),
+ DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPUnaryOp_r_frm<funct7, rs2val, rdty, rs1ty,
+ opcodestr>;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
IsSignExtendingOpW = 1 in
class FPCmp_rr<bits<7> funct7, bits<3> funct3, string opcodestr,
- DAGOperand rty, bit Commutable>
+ DAGOperand rty, bit Commutable = 0>
: RVInstR<funct7, funct3, OPC_OP_FP, (outs GPR:$rd),
(ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2"> {
let isCommutable = Commutable;
}
multiclass FPCmp_rr_m<bits<7> funct7, bits<3> funct3, string opcodestr,
- list<ExtInfo_r> Exts, bit Commutable = 0> {
- foreach Ext = Exts in
- let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
- def Ext.Suffix : FPCmp_rr<funct7, funct3, opcodestr, Ext.Reg, Commutable>;
+ ExtInfo Ext, bit Commutable = 0> {
+ let Predicates = Ext.Predicates, DecoderNamespace = Ext.Space in
+ def Ext.Suffix : FPCmp_rr<funct7, funct3, opcodestr, Ext.PrimaryTy, Commutable>;
}
-class PseudoFROUND<RegisterClass Ty>
+class PseudoFROUND<DAGOperand Ty, ValueType vt>
: Pseudo<(outs Ty:$rd), (ins Ty:$rs1, Ty:$rs2, ixlenimm:$rm),
- [(set Ty:$rd, (riscv_fround Ty:$rs1, Ty:$rs2, timm:$rm))]> {
+ [(set Ty:$rd, (vt (riscv_fround Ty:$rs1, Ty:$rs2, timm:$rm)))]> {
let hasSideEffects = 0;
let mayLoad = 0;
let mayStore = 0;
@@ -320,102 +263,96 @@ def FLW : FPLoad_r<0b010, "flw", FPR32, WriteFLD32>;
def FSW : FPStore_r<0b010, "fsw", FPR32, WriteFST32>;
} // Predicates = [HasStdExtF]
-let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in {
-defm FMADD_S : FPFMA_rrr_frm_m<OPC_MADD, 0b00, "fmadd.s", FINX>;
-defm FMSUB_S : FPFMA_rrr_frm_m<OPC_MSUB, 0b00, "fmsub.s", FINX>;
-defm FNMSUB_S : FPFMA_rrr_frm_m<OPC_NMSUB, 0b00, "fnmsub.s", FINX>;
-defm FNMADD_S : FPFMA_rrr_frm_m<OPC_NMADD, 0b00, "fnmadd.s", FINX>;
-}
-
-defm : FPFMADynFrmAlias_m<FMADD_S, "fmadd.s", FINX>;
-defm : FPFMADynFrmAlias_m<FMSUB_S, "fmsub.s", FINX>;
-defm : FPFMADynFrmAlias_m<FNMSUB_S, "fnmsub.s", FINX>;
-defm : FPFMADynFrmAlias_m<FNMADD_S, "fnmadd.s", FINX>;
-
-let SchedRW = [WriteFAdd32, ReadFAdd32, ReadFAdd32] in {
-defm FADD_S : FPALU_rr_frm_m<0b0000000, "fadd.s", FINX, /*Commutable*/1>;
-defm FSUB_S : FPALU_rr_frm_m<0b0000100, "fsub.s", FINX>;
-}
-let SchedRW = [WriteFMul32, ReadFMul32, ReadFMul32] in
-defm FMUL_S : FPALU_rr_frm_m<0b0001000, "fmul.s", FINX, /*Commutable*/1>;
-
-let SchedRW = [WriteFDiv32, ReadFDiv32, ReadFDiv32] in
-defm FDIV_S : FPALU_rr_frm_m<0b0001100, "fdiv.s", FINX>;
-
-defm : FPALUDynFrmAlias_m<FADD_S, "fadd.s", FINX>;
-defm : FPALUDynFrmAlias_m<FSUB_S, "fsub.s", FINX>;
-defm : FPALUDynFrmAlias_m<FMUL_S, "fmul.s", FINX>;
-defm : FPALUDynFrmAlias_m<FDIV_S, "fdiv.s", FINX>;
-
-defm FSQRT_S : FPUnaryOp_r_frm_m<0b0101100, 0b00000, FFINX, "fsqrt.s">,
- Sched<[WriteFSqrt32, ReadFSqrt32]>;
-defm : FPUnaryOpDynFrmAlias_m<FSQRT_S, "fsqrt.s", FFINX>;
-
-let SchedRW = [WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32],
- mayRaiseFPException = 0 in {
-defm FSGNJ_S : FPALU_rr_m<0b0010000, 0b000, "fsgnj.s", FINX>;
-defm FSGNJN_S : FPALU_rr_m<0b0010000, 0b001, "fsgnjn.s", FINX>;
-defm FSGNJX_S : FPALU_rr_m<0b0010000, 0b010, "fsgnjx.s", FINX>;
-}
-
-let SchedRW = [WriteFMinMax32, ReadFMinMax32, ReadFMinMax32] in {
-defm FMIN_S : FPALU_rr_m<0b0010100, 0b000, "fmin.s", FINX, /*Commutable*/1>;
-defm FMAX_S : FPALU_rr_m<0b0010100, 0b001, "fmax.s", FINX, /*Commutable*/1>;
-}
-
-let IsSignExtendingOpW = 1 in
-defm FCVT_W_S : FPUnaryOp_r_frm_m<0b1100000, 0b00000, XFINX, "fcvt.w.s">,
- Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_W_S, "fcvt.w.s", XFINX>;
-
-let IsSignExtendingOpW = 1 in
-defm FCVT_WU_S : FPUnaryOp_r_frm_m<0b1100000, 0b00001, XFINX, "fcvt.wu.s">,
- Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_WU_S, "fcvt.wu.s", XFINX>;
+foreach Ext = FExts in {
+ let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in {
+ defm FMADD_S : FPFMA_rrr_frm_m<OPC_MADD, 0b00, "fmadd.s", Ext>;
+ defm FMSUB_S : FPFMA_rrr_frm_m<OPC_MSUB, 0b00, "fmsub.s", Ext>;
+ defm FNMSUB_S : FPFMA_rrr_frm_m<OPC_NMSUB, 0b00, "fnmsub.s", Ext>;
+ defm FNMADD_S : FPFMA_rrr_frm_m<OPC_NMADD, 0b00, "fnmadd.s", Ext>;
+ }
+
+ let SchedRW = [WriteFAdd32, ReadFAdd32, ReadFAdd32] in {
+ defm FADD_S : FPALU_rr_frm_m<0b0000000, "fadd.s", Ext, Commutable=1>;
+ defm FSUB_S : FPALU_rr_frm_m<0b0000100, "fsub.s", Ext>;
+ }
+
+ let SchedRW = [WriteFMul32, ReadFMul32, ReadFMul32] in
+ defm FMUL_S : FPALU_rr_frm_m<0b0001000, "fmul.s", Ext, Commutable=1>;
+
+ let SchedRW = [WriteFDiv32, ReadFDiv32, ReadFDiv32] in
+ defm FDIV_S : FPALU_rr_frm_m<0b0001100, "fdiv.s", Ext>;
+
+ defm FSQRT_S : FPUnaryOp_r_frm_m<0b0101100, 0b00000, Ext, Ext.PrimaryTy,
+ Ext.PrimaryTy, "fsqrt.s">,
+ Sched<[WriteFSqrt32, ReadFSqrt32]>;
+
+ let SchedRW = [WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32],
+ mayRaiseFPException = 0 in {
+ defm FSGNJ_S : FPALU_rr_m<0b0010000, 0b000, "fsgnj.s", Ext>;
+ defm FSGNJN_S : FPALU_rr_m<0b0010000, 0b001, "fsgnjn.s", Ext>;
+ defm FSGNJX_S : FPALU_rr_m<0b0010000, 0b010, "fsgnjx.s", Ext>;
+ }
+
+ let SchedRW = [WriteFMinMax32, ReadFMinMax32, ReadFMinMax32] in {
+ defm FMIN_S : FPALU_rr_m<0b0010100, 0b000, "fmin.s", Ext, Commutable=1>;
+ defm FMAX_S : FPALU_rr_m<0b0010100, 0b001, "fmax.s", Ext, Commutable=1>;
+ }
+
+ let IsSignExtendingOpW = 1 in
+ defm FCVT_W_S : FPUnaryOp_r_frm_m<0b1100000, 0b00000, Ext, GPR, Ext.PrimaryTy,
+ "fcvt.w.s">,
+ Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>;
+
+ let IsSignExtendingOpW = 1 in
+ defm FCVT_WU_S : FPUnaryOp_r_frm_m<0b1100000, 0b00001, Ext, GPR, Ext.PrimaryTy,
+ "fcvt.wu.s">,
+ Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>;
+
+ let SchedRW = [WriteFCmp32, ReadFCmp32, ReadFCmp32] in {
+ defm FEQ_S : FPCmp_rr_m<0b1010000, 0b010, "feq.s", Ext, Commutable=1>;
+ defm FLT_S : FPCmp_rr_m<0b1010000, 0b001, "flt.s", Ext>;
+ defm FLE_S : FPCmp_rr_m<0b1010000, 0b000, "fle.s", Ext>;
+ }
+
+ let mayRaiseFPException = 0 in
+ defm FCLASS_S : FPUnaryOp_r_m<0b1110000, 0b00000, 0b001, Ext, GPR, Ext.PrimaryTy,
+ "fclass.s">,
+ Sched<[WriteFClass32, ReadFClass32]>;
+
+ defm FCVT_S_W : FPUnaryOp_r_frm_m<0b1101000, 0b00000, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.s.w">,
+ Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>;
+
+ defm FCVT_S_WU : FPUnaryOp_r_frm_m<0b1101000, 0b00001, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.s.wu">,
+ Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>;
+
+ defm FCVT_L_S : FPUnaryOp_r_frm_m<0b1100000, 0b00010, Ext, GPR, Ext.PrimaryTy,
+ "fcvt.l.s", [IsRV64]>,
+ Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>;
+
+ defm FCVT_LU_S : FPUnaryOp_r_frm_m<0b1100000, 0b00011, Ext, GPR, Ext.PrimaryTy,
+ "fcvt.lu.s", [IsRV64]>,
+ Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>;
+
+ defm FCVT_S_L : FPUnaryOp_r_frm_m<0b1101000, 0b00010, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.s.l", [IsRV64]>,
+ Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>;
+
+ defm FCVT_S_LU : FPUnaryOp_r_frm_m<0b1101000, 0b00011, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.s.lu", [IsRV64]>,
+ Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>;
+} // foreach Ext = FExts
let Predicates = [HasStdExtF], mayRaiseFPException = 0,
IsSignExtendingOpW = 1 in
def FMV_X_W : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR32, "fmv.x.w">,
Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>;
-let SchedRW = [WriteFCmp32, ReadFCmp32, ReadFCmp32] in {
-defm FEQ_S : FPCmp_rr_m<0b1010000, 0b010, "feq.s", FINX, /*Commutable*/1>;
-defm FLT_S : FPCmp_rr_m<0b1010000, 0b001, "flt.s", FINX>;
-defm FLE_S : FPCmp_rr_m<0b1010000, 0b000, "fle.s", FINX>;
-}
-
-let mayRaiseFPException = 0 in
-defm FCLASS_S : FPUnaryOp_r_m<0b1110000, 0b00000, 0b001, XFINX, "fclass.s">,
- Sched<[WriteFClass32, ReadFClass32]>;
-
-defm FCVT_S_W : FPUnaryOp_r_frm_m<0b1101000, 0b00000, FXINX, "fcvt.s.w">,
- Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_S_W, "fcvt.s.w", FXINX>;
-
-defm FCVT_S_WU : FPUnaryOp_r_frm_m<0b1101000, 0b00001, FXINX, "fcvt.s.wu">,
- Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_S_WU, "fcvt.s.wu", FXINX>;
-
let Predicates = [HasStdExtF], mayRaiseFPException = 0 in
def FMV_W_X : FPUnaryOp_r<0b1111000, 0b00000, 0b000, FPR32, GPR, "fmv.w.x">,
Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]>;
-defm FCVT_L_S : FPUnaryOp_r_frm_m<0b1100000, 0b00010, XFIN64X, "fcvt.l.s">,
- Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_L_S, "fcvt.l.s", XFIN64X>;
-
-defm FCVT_LU_S : FPUnaryOp_r_frm_m<0b1100000, 0b00011, XFIN64X, "fcvt.lu.s">,
- Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_LU_S, "fcvt.lu.s", XFIN64X>;
-
-defm FCVT_S_L : FPUnaryOp_r_frm_m<0b1101000, 0b00010, FXIN64X, "fcvt.s.l">,
- Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_S_L, "fcvt.s.l", FXIN64X>;
-
-defm FCVT_S_LU : FPUnaryOp_r_frm_m<0b1101000, 0b00011, FXIN64X, "fcvt.s.lu">,
- Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_S_LU, "fcvt.s.lu", FXIN64X>;
-
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
//===----------------------------------------------------------------------===//
@@ -481,33 +418,57 @@ def : InstAlias<"fgt.s $rd, $rs, $rt",
(FLT_S_INX GPR:$rd, FPR32INX:$rt, FPR32INX:$rs), 0>;
def : InstAlias<"fge.s $rd, $rs, $rt",
(FLE_S_INX GPR:$rd, FPR32INX:$rt, FPR32INX:$rs), 0>;
+let usesCustomInserter = 1 in {
+def PseudoQuietFLE_S_INX : PseudoQuietFCMP<FPR32INX>;
+def PseudoQuietFLT_S_INX : PseudoQuietFCMP<FPR32INX>;
+}
} // Predicates = [HasStdExtZfinx]
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
+defvar FRM_RNE = 0b000;
+defvar FRM_RTZ = 0b001;
+defvar FRM_RDN = 0b010;
+defvar FRM_RUP = 0b011;
+defvar FRM_RMM = 0b100;
+defvar FRM_DYN = 0b111;
+
/// Floating point constants
def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
-def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>;
/// Generic pattern classes
-class PatSetCC<RegisterClass Ty, SDPatternOperator OpNode, CondCode Cond, RVInst Inst>
- : Pat<(OpNode Ty:$rs1, Ty:$rs2, Cond), (Inst $rs1, $rs2)>;
+class PatSetCC<DAGOperand Ty, SDPatternOperator OpNode, CondCode Cond,
+ RVInst Inst, ValueType vt>
+ : Pat<(XLenVT (OpNode (vt Ty:$rs1), Ty:$rs2, Cond)), (Inst $rs1, $rs2)>;
+multiclass PatSetCC_m<SDPatternOperator OpNode, CondCode Cond,
+ RVInst Inst, ExtInfo Ext, ValueType vt> {
+ let Predicates = Ext.Predicates in
+ def Ext.Suffix : PatSetCC<Ext.PrimaryTy, OpNode, Cond,
+ !cast<RVInst>(Inst#Ext.Suffix), vt>;
+}
class PatFprFpr<SDPatternOperator OpNode, RVInstR Inst,
- RegisterClass RegTy>
- : Pat<(OpNode RegTy:$rs1, RegTy:$rs2), (Inst $rs1, $rs2)>;
+ DAGOperand RegTy, ValueType vt>
+ : Pat<(OpNode (vt RegTy:$rs1), (vt RegTy:$rs2)), (Inst $rs1, $rs2)>;
+multiclass PatFprFpr_m<SDPatternOperator OpNode, RVInstR Inst,
+ ExtInfo Ext> {
+ let Predicates = Ext.Predicates in
+ def Ext.Suffix : PatFprFpr<OpNode, !cast<RVInstR>(Inst#Ext.Suffix),
+ Ext.PrimaryTy, Ext.PrimaryVT>;
+}
class PatFprFprDynFrm<SDPatternOperator OpNode, RVInstRFrm Inst,
- RegisterClass RegTy>
- : Pat<(OpNode RegTy:$rs1, RegTy:$rs2), (Inst $rs1, $rs2, 0b111)>;
-
-let Predicates = [HasStdExtF] in {
-
-/// Float constants
-def : Pat<(f32 (fpimm0)), (FMV_W_X X0)>;
-def : Pat<(f32 (fpimmneg0)), (FSGNJN_S (FMV_W_X X0), (FMV_W_X X0))>;
+ DAGOperand RegTy, ValueType vt>
+ : Pat<(OpNode (vt RegTy:$rs1), (vt RegTy:$rs2)), (Inst $rs1, $rs2, FRM_DYN)>;
+multiclass PatFprFprDynFrm_m<SDPatternOperator OpNode, RVInstRFrm Inst,
+ ExtInfo Ext> {
+ let Predicates = Ext.Predicates in
+ def Ext.Suffix : PatFprFprDynFrm<OpNode,
+ !cast<RVInstRFrm>(Inst#Ext.Suffix),
+ Ext.PrimaryTy, Ext.PrimaryVT>;
+}
/// Float conversion operations
@@ -515,120 +476,228 @@ def : Pat<(f32 (fpimmneg0)), (FSGNJN_S (FMV_W_X X0), (FMV_W_X X0))>;
// are defined later.
/// Float arithmetic operations
+foreach Ext = FExts in {
+ defm : PatFprFprDynFrm_m<any_fadd, FADD_S, Ext>;
+ defm : PatFprFprDynFrm_m<any_fsub, FSUB_S, Ext>;
+ defm : PatFprFprDynFrm_m<any_fmul, FMUL_S, Ext>;
+ defm : PatFprFprDynFrm_m<any_fdiv, FDIV_S, Ext>;
+}
-def : PatFprFprDynFrm<any_fadd, FADD_S, FPR32>;
-def : PatFprFprDynFrm<any_fsub, FSUB_S, FPR32>;
-def : PatFprFprDynFrm<any_fmul, FMUL_S, FPR32>;
-def : PatFprFprDynFrm<any_fdiv, FDIV_S, FPR32>;
-
-def : Pat<(any_fsqrt FPR32:$rs1), (FSQRT_S FPR32:$rs1, 0b111)>;
+let Predicates = [HasStdExtF] in {
+def : Pat<(any_fsqrt FPR32:$rs1), (FSQRT_S FPR32:$rs1, FRM_DYN)>;
def : Pat<(fneg FPR32:$rs1), (FSGNJN_S $rs1, $rs1)>;
def : Pat<(fabs FPR32:$rs1), (FSGNJX_S $rs1, $rs1)>;
-def : PatFprFpr<fcopysign, FSGNJ_S, FPR32>;
+def : Pat<(riscv_fpclass FPR32:$rs1), (FCLASS_S $rs1)>;
+} // Predicates = [HasStdExtF]
+
+let Predicates = [HasStdExtZfinx] in {
+def : Pat<(any_fsqrt FPR32INX:$rs1), (FSQRT_S_INX FPR32INX:$rs1, FRM_DYN)>;
+
+def : Pat<(fneg FPR32INX:$rs1), (FSGNJN_S_INX $rs1, $rs1)>;
+def : Pat<(fabs FPR32INX:$rs1), (FSGNJX_S_INX $rs1, $rs1)>;
+
+def : Pat<(riscv_fpclass FPR32INX:$rs1), (FCLASS_S_INX $rs1)>;
+} // Predicates = [HasStdExtZfinx]
+
+foreach Ext = FExts in
+defm : PatFprFpr_m<fcopysign, FSGNJ_S, Ext>;
+
+let Predicates = [HasStdExtF] in {
def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), (FSGNJN_S $rs1, $rs2)>;
// fmadd: rs1 * rs2 + rs3
def : Pat<(any_fma FPR32:$rs1, FPR32:$rs2, FPR32:$rs3),
- (FMADD_S $rs1, $rs2, $rs3, 0b111)>;
+ (FMADD_S $rs1, $rs2, $rs3, FRM_DYN)>;
// fmsub: rs1 * rs2 - rs3
def : Pat<(any_fma FPR32:$rs1, FPR32:$rs2, (fneg FPR32:$rs3)),
- (FMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+ (FMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, FRM_DYN)>;
// fnmsub: -rs1 * rs2 + rs3
def : Pat<(any_fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3),
- (FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+ (FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, FRM_DYN)>;
// fnmadd: -rs1 * rs2 - rs3
def : Pat<(any_fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)),
- (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+ (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, FRM_DYN)>;
// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA)
def : Pat<(fneg (any_fma_nsz FPR32:$rs1, FPR32:$rs2, FPR32:$rs3)),
- (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>;
+ (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, FRM_DYN)>;
+} // Predicates = [HasStdExtF]
+
+let Predicates = [HasStdExtZfinx] in {
+def : Pat<(fcopysign FPR32INX:$rs1, (fneg FPR32INX:$rs2)), (FSGNJN_S_INX $rs1, $rs2)>;
+
+// fmadd: rs1 * rs2 + rs3
+def : Pat<(any_fma FPR32INX:$rs1, FPR32INX:$rs2, FPR32INX:$rs3),
+ (FMADD_S_INX $rs1, $rs2, $rs3, FRM_DYN)>;
+
+// fmsub: rs1 * rs2 - rs3
+def : Pat<(any_fma FPR32INX:$rs1, FPR32INX:$rs2, (fneg FPR32INX:$rs3)),
+ (FMSUB_S_INX FPR32INX:$rs1, FPR32INX:$rs2, FPR32INX:$rs3, FRM_DYN)>;
+
+// fnmsub: -rs1 * rs2 + rs3
+def : Pat<(any_fma (fneg FPR32INX:$rs1), FPR32INX:$rs2, FPR32INX:$rs3),
+ (FNMSUB_S_INX FPR32INX:$rs1, FPR32INX:$rs2, FPR32INX:$rs3, FRM_DYN)>;
+
+// fnmadd: -rs1 * rs2 - rs3
+def : Pat<(any_fma (fneg FPR32INX:$rs1), FPR32INX:$rs2, (fneg FPR32INX:$rs3)),
+ (FNMADD_S_INX FPR32INX:$rs1, FPR32INX:$rs2, FPR32INX:$rs3, FRM_DYN)>;
+
+// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA)
+def : Pat<(fneg (any_fma_nsz FPR32INX:$rs1, FPR32INX:$rs2, FPR32INX:$rs3)),
+ (FNMADD_S_INX FPR32INX:$rs1, FPR32INX:$rs2, FPR32INX:$rs3, FRM_DYN)>;
+} // Predicates = [HasStdExtZfinx]
// The ratified 20191213 ISA spec defines fmin and fmax in a way that matches
// LLVM's fminnum and fmaxnum
// <https://github.com/riscv/riscv-isa-manual/commit/cd20cee7efd9bac7c5aa127ec3b451749d2b3cce>.
-def : PatFprFpr<fminnum, FMIN_S, FPR32>;
-def : PatFprFpr<fmaxnum, FMAX_S, FPR32>;
+foreach Ext = FExts in {
+ defm : PatFprFpr_m<fminnum, FMIN_S, Ext>;
+ defm : PatFprFpr_m<fmaxnum, FMAX_S, Ext>;
+ defm : PatFprFpr_m<riscv_fmin, FMIN_S, Ext>;
+ defm : PatFprFpr_m<riscv_fmax, FMAX_S, Ext>;
+}
/// Setcc
// FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for
// strict versions of those.
// Match non-signaling FEQ_S
-def : PatSetCC<FPR32, any_fsetcc, SETEQ, FEQ_S>;
-def : PatSetCC<FPR32, any_fsetcc, SETOEQ, FEQ_S>;
-def : PatSetCC<FPR32, strict_fsetcc, SETLT, PseudoQuietFLT_S>;
-def : PatSetCC<FPR32, strict_fsetcc, SETOLT, PseudoQuietFLT_S>;
-def : PatSetCC<FPR32, strict_fsetcc, SETLE, PseudoQuietFLE_S>;
-def : PatSetCC<FPR32, strict_fsetcc, SETOLE, PseudoQuietFLE_S>;
+foreach Ext = FExts in {
+ defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_S, Ext, f32>;
+ defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_S, Ext, f32>;
+ defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_S, Ext, f32>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_S, Ext, f32>;
+ defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_S, Ext, f32>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_S, Ext, f32>;
+}
+let Predicates = [HasStdExtF] in {
// Match signaling FEQ_S
-def : Pat<(strict_fsetccs FPR32:$rs1, FPR32:$rs2, SETEQ),
+def : Pat<(XLenVT (strict_fsetccs FPR32:$rs1, FPR32:$rs2, SETEQ)),
(AND (FLE_S $rs1, $rs2),
(FLE_S $rs2, $rs1))>;
-def : Pat<(strict_fsetccs FPR32:$rs1, FPR32:$rs2, SETOEQ),
+def : Pat<(XLenVT (strict_fsetccs FPR32:$rs1, FPR32:$rs2, SETOEQ)),
(AND (FLE_S $rs1, $rs2),
(FLE_S $rs2, $rs1))>;
// If both operands are the same, use a single FLE.
-def : Pat<(strict_fsetccs FPR32:$rs1, FPR32:$rs1, SETEQ),
+def : Pat<(XLenVT (strict_fsetccs FPR32:$rs1, FPR32:$rs1, SETEQ)),
(FLE_S $rs1, $rs1)>;
-def : Pat<(strict_fsetccs FPR32:$rs1, FPR32:$rs1, SETOEQ),
+def : Pat<(XLenVT (strict_fsetccs FPR32:$rs1, FPR32:$rs1, SETOEQ)),
(FLE_S $rs1, $rs1)>;
+} // Predicates = [HasStdExtF]
-def : PatSetCC<FPR32, any_fsetccs, SETLT, FLT_S>;
-def : PatSetCC<FPR32, any_fsetccs, SETOLT, FLT_S>;
-def : PatSetCC<FPR32, any_fsetccs, SETLE, FLE_S>;
-def : PatSetCC<FPR32, any_fsetccs, SETOLE, FLE_S>;
+let Predicates = [HasStdExtZfinx] in {
+// Match signaling FEQ_S
+def : Pat<(XLenVT (strict_fsetccs FPR32INX:$rs1, FPR32INX:$rs2, SETEQ)),
+ (AND (FLE_S_INX $rs1, $rs2),
+ (FLE_S_INX $rs2, $rs1))>;
+def : Pat<(XLenVT (strict_fsetccs FPR32INX:$rs1, FPR32INX:$rs2, SETOEQ)),
+ (AND (FLE_S_INX $rs1, $rs2),
+ (FLE_S_INX $rs2, $rs1))>;
+// If both operands are the same, use a single FLE.
+def : Pat<(XLenVT (strict_fsetccs FPR32INX:$rs1, FPR32INX:$rs1, SETEQ)),
+ (FLE_S_INX $rs1, $rs1)>;
+def : Pat<(XLenVT (strict_fsetccs FPR32INX:$rs1, FPR32INX:$rs1, SETOEQ)),
+ (FLE_S_INX $rs1, $rs1)>;
+} // Predicates = [HasStdExtZfinx]
+
+foreach Ext = FExts in {
+ defm : PatSetCC_m<any_fsetccs, SETLT, FLT_S, Ext, f32>;
+ defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_S, Ext, f32>;
+ defm : PatSetCC_m<any_fsetccs, SETLE, FLE_S, Ext, f32>;
+ defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_S, Ext, f32>;
+}
-defm Select_FPR32 : SelectCC_GPR_rrirr<FPR32>;
+let Predicates = [HasStdExtF] in {
+defm Select_FPR32 : SelectCC_GPR_rrirr<FPR32, f32>;
-def PseudoFROUND_S : PseudoFROUND<FPR32>;
+def PseudoFROUND_S : PseudoFROUND<FPR32, f32>;
/// Loads
-defm : LdPat<load, FLW, f32>;
+def : LdPat<load, FLW, f32>;
/// Stores
-defm : StPat<store, FSW, FPR32, f32>;
+def : StPat<store, FSW, FPR32, f32>;
} // Predicates = [HasStdExtF]
+let Predicates = [HasStdExtZfinx] in {
+defm Select_FPR32INX : SelectCC_GPR_rrirr<FPR32INX, f32>;
+
+def PseudoFROUND_S_INX : PseudoFROUND<FPR32INX, f32>;
+
+/// Loads
+def : Pat<(f32 (load (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12))),
+ (COPY_TO_REGCLASS (LW GPR:$rs1, simm12:$imm12), GPRF32)>;
+
+/// Stores
+def : Pat<(store (f32 FPR32INX:$rs2), (AddrRegImm (XLenVT GPR:$rs1), simm12:$imm12)),
+ (SW (COPY_TO_REGCLASS FPR32INX:$rs2, GPR), GPR:$rs1, simm12:$imm12)>;
+} // Predicates = [HasStdExtZfinx]
+
let Predicates = [HasStdExtF, IsRV32] in {
// Moves (no conversion)
def : Pat<(bitconvert (i32 GPR:$rs1)), (FMV_W_X GPR:$rs1)>;
def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>;
+} // Predicates = [HasStdExtF, IsRV32]
+let Predicates = [HasStdExtZfinx, IsRV32] in {
+// Moves (no conversion)
+def : Pat<(f32 (bitconvert (i32 GPR:$rs1))), (COPY_TO_REGCLASS GPR:$rs1, GPRF32)>;
+def : Pat<(i32 (bitconvert FPR32INX:$rs1)), (COPY_TO_REGCLASS FPR32INX:$rs1, GPR)>;
+} // Predicates = [HasStdExtZfinx, IsRV32]
+
+let Predicates = [HasStdExtF, IsRV32] in {
// float->[u]int. Round-to-zero must be used.
-def : Pat<(i32 (any_fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>;
-def : Pat<(i32 (any_fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>;
+def : Pat<(i32 (any_fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, FRM_RTZ)>;
// Saturating float->[u]int32.
def : Pat<(i32 (riscv_fcvt_x FPR32:$rs1, timm:$frm)), (FCVT_W_S $rs1, timm:$frm)>;
def : Pat<(i32 (riscv_fcvt_xu FPR32:$rs1, timm:$frm)), (FCVT_WU_S $rs1, timm:$frm)>;
// float->int32 with current rounding mode.
-def : Pat<(i32 (any_lrint FPR32:$rs1)), (FCVT_W_S $rs1, 0b111)>;
+def : Pat<(i32 (any_lrint FPR32:$rs1)), (FCVT_W_S $rs1, FRM_DYN)>;
// float->int32 rounded to nearest with ties rounded away from zero.
-def : Pat<(i32 (any_lround FPR32:$rs1)), (FCVT_W_S $rs1, 0b100)>;
+def : Pat<(i32 (any_lround FPR32:$rs1)), (FCVT_W_S $rs1, FRM_RMM)>;
// [u]int->float. Match GCC and default to using dynamic rounding mode.
-def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>;
-def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>;
+def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, FRM_DYN)>;
+def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, FRM_DYN)>;
} // Predicates = [HasStdExtF, IsRV32]
+let Predicates = [HasStdExtZfinx, IsRV32] in {
+// float->[u]int. Round-to-zero must be used.
+def : Pat<(i32 (any_fp_to_sint FPR32INX:$rs1)), (FCVT_W_S_INX $rs1, FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_uint FPR32INX:$rs1)), (FCVT_WU_S_INX $rs1, FRM_RTZ)>;
+
+// Saturating float->[u]int32.
+def : Pat<(i32 (riscv_fcvt_x FPR32INX:$rs1, timm:$frm)), (FCVT_W_S_INX $rs1, timm:$frm)>;
+def : Pat<(i32 (riscv_fcvt_xu FPR32INX:$rs1, timm:$frm)), (FCVT_WU_S_INX $rs1, timm:$frm)>;
+
+// float->int32 with current rounding mode.
+def : Pat<(i32 (any_lrint FPR32INX:$rs1)), (FCVT_W_S_INX $rs1, FRM_DYN)>;
+
+// float->int32 rounded to nearest with ties rounded away from zero.
+def : Pat<(i32 (any_lround FPR32INX:$rs1)), (FCVT_W_S_INX $rs1, FRM_RMM)>;
+
+// [u]int->float. Match GCC and default to using dynamic rounding mode.
+def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W_INX $rs1, FRM_DYN)>;
+def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU_INX $rs1, FRM_DYN)>;
+} // Predicates = [HasStdExtZfinx, IsRV32]
+
let Predicates = [HasStdExtF, IsRV64] in {
// Moves (no conversion)
def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (FMV_W_X GPR:$src)>;
def : Pat<(riscv_fmv_x_anyextw_rv64 FPR32:$src), (FMV_X_W FPR32:$src)>;
-def : Pat<(sext_inreg (riscv_fmv_x_anyextw_rv64 FPR32:$src), i32),
- (FMV_X_W FPR32:$src)>;
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
@@ -637,24 +706,58 @@ def : Pat<(riscv_any_fcvt_w_rv64 FPR32:$rs1, timm:$frm), (FCVT_W_S $rs1, timm:$
def : Pat<(riscv_any_fcvt_wu_rv64 FPR32:$rs1, timm:$frm), (FCVT_WU_S $rs1, timm:$frm)>;
// float->[u]int64. Round-to-zero must be used.
-def : Pat<(i64 (any_fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>;
-def : Pat<(i64 (any_fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>;
+def : Pat<(i64 (any_fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, FRM_RTZ)>;
// Saturating float->[u]int64.
def : Pat<(i64 (riscv_fcvt_x FPR32:$rs1, timm:$frm)), (FCVT_L_S $rs1, timm:$frm)>;
def : Pat<(i64 (riscv_fcvt_xu FPR32:$rs1, timm:$frm)), (FCVT_LU_S $rs1, timm:$frm)>;
// float->int64 with current rounding mode.
-def : Pat<(i64 (any_lrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>;
-def : Pat<(i64 (any_llrint FPR32:$rs1)), (FCVT_L_S $rs1, 0b111)>;
+def : Pat<(i64 (any_lrint FPR32:$rs1)), (FCVT_L_S $rs1, FRM_DYN)>;
+def : Pat<(i64 (any_llrint FPR32:$rs1)), (FCVT_L_S $rs1, FRM_DYN)>;
// float->int64 rounded to neartest with ties rounded away from zero.
-def : Pat<(i64 (any_lround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>;
-def : Pat<(i64 (any_llround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>;
+def : Pat<(i64 (any_lround FPR32:$rs1)), (FCVT_L_S $rs1, FRM_RMM)>;
+def : Pat<(i64 (any_llround FPR32:$rs1)), (FCVT_L_S $rs1, FRM_RMM)>;
// [u]int->fp. Match GCC and default to using dynamic rounding mode.
-def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, 0b111)>;
-def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_S_WU $rs1, 0b111)>;
-def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_S_L $rs1, 0b111)>;
-def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, 0b111)>;
+def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, FRM_DYN)>;
+def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_S_WU $rs1, FRM_DYN)>;
+def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_S_L $rs1, FRM_DYN)>;
+def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, FRM_DYN)>;
} // Predicates = [HasStdExtF, IsRV64]
+
+let Predicates = [HasStdExtZfinx, IsRV64] in {
+// Moves (no conversion)
+def : Pat<(riscv_fmv_w_x_rv64 GPR:$src), (COPY_TO_REGCLASS GPR:$src, GPRF32)>;
+def : Pat<(riscv_fmv_x_anyextw_rv64 GPRF32:$src), (COPY_TO_REGCLASS GPRF32:$src, GPR)>;
+
+// Use target specific isd nodes to help us remember the result is sign
+// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
+// duplicated if it has another user that didn't need the sign_extend.
+def : Pat<(riscv_any_fcvt_w_rv64 FPR32INX:$rs1, timm:$frm), (FCVT_W_S_INX $rs1, timm:$frm)>;
+def : Pat<(riscv_any_fcvt_wu_rv64 FPR32INX:$rs1, timm:$frm), (FCVT_WU_S_INX $rs1, timm:$frm)>;
+
+// float->[u]int64. Round-to-zero must be used.
+def : Pat<(i64 (any_fp_to_sint FPR32INX:$rs1)), (FCVT_L_S_INX $rs1, FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_uint FPR32INX:$rs1)), (FCVT_LU_S_INX $rs1, FRM_RTZ)>;
+
+// Saturating float->[u]int64.
+def : Pat<(i64 (riscv_fcvt_x FPR32INX:$rs1, timm:$frm)), (FCVT_L_S_INX $rs1, timm:$frm)>;
+def : Pat<(i64 (riscv_fcvt_xu FPR32INX:$rs1, timm:$frm)), (FCVT_LU_S_INX $rs1, timm:$frm)>;
+
+// float->int64 with current rounding mode.
+def : Pat<(i64 (any_lrint FPR32INX:$rs1)), (FCVT_L_S_INX $rs1, FRM_DYN)>;
+def : Pat<(i64 (any_llrint FPR32INX:$rs1)), (FCVT_L_S_INX $rs1, FRM_DYN)>;
+
+// float->int64 rounded to neartest with ties rounded away from zero.
+def : Pat<(i64 (any_lround FPR32INX:$rs1)), (FCVT_L_S_INX $rs1, FRM_DYN)>;
+def : Pat<(i64 (any_llround FPR32INX:$rs1)), (FCVT_L_S_INX $rs1, FRM_DYN)>;
+
+// [u]int->fp. Match GCC and default to using dynamic rounding mode.
+def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W_INX $rs1, FRM_DYN)>;
+def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_S_WU_INX $rs1, FRM_DYN)>;
+def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_S_L_INX $rs1, FRM_DYN)>;
+def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU_INX $rs1, FRM_DYN)>;
+} // Predicates = [HasStdExtZfinx, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
index 73bb9c1fc5be..6c3c9a771d94 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td
@@ -25,13 +25,13 @@ def riscv_remuw : SDNode<"RISCVISD::REMUW", SDT_RISCVIntBinOpW>;
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtMOrZmmul] in {
-def MUL : ALU_rr<0b0000001, 0b000, "mul", /*Commutable*/1>,
+def MUL : ALU_rr<0b0000001, 0b000, "mul", Commutable=1>,
Sched<[WriteIMul, ReadIMul, ReadIMul]>;
-def MULH : ALU_rr<0b0000001, 0b001, "mulh", /*Commutable*/1>,
+def MULH : ALU_rr<0b0000001, 0b001, "mulh", Commutable=1>,
Sched<[WriteIMul, ReadIMul, ReadIMul]>;
def MULHSU : ALU_rr<0b0000001, 0b010, "mulhsu">,
Sched<[WriteIMul, ReadIMul, ReadIMul]>;
-def MULHU : ALU_rr<0b0000001, 0b011, "mulhu", /*Commutable*/1>,
+def MULHU : ALU_rr<0b0000001, 0b011, "mulhu", Commutable=1>,
Sched<[WriteIMul, ReadIMul, ReadIMul]>;
} // Predicates = [HasStdExtMOrZmmul]
@@ -47,7 +47,7 @@ def REMU : ALU_rr<0b0000001, 0b111, "remu">,
} // Predicates = [HasStdExtM]
let Predicates = [HasStdExtMOrZmmul, IsRV64], IsSignExtendingOpW = 1 in {
-def MULW : ALUW_rr<0b0000001, 0b000, "mulw", /*Commutable*/1>,
+def MULW : ALUW_rr<0b0000001, 0b000, "mulw", Commutable=1>,
Sched<[WriteIMul32, ReadIMul32, ReadIMul32]>;
} // Predicates = [HasStdExtMOrZmmul, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index eec697361e3b..6e5ee8043e92 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -103,85 +103,82 @@ class VMVRSched<int n> : Sched<[
!cast<SchedReadWrite>("ReadVMov" #n #"V")
]>;
-class VLESched<string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVLDE_" #mx),
- !cast<SchedReadWrite>("ReadVLDX_" #mx), ReadVMask
+class VLESched<string lmul = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVLDE_" #lmul),
+ ReadVLDX, ReadVMask
]>;
-class VSESched<string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVSTE_" #mx),
- !cast<SchedReadWrite>("ReadVSTEV_" #mx),
- !cast<SchedReadWrite>("ReadVSTX_" #mx), ReadVMask
+class VSESched<string lmul = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVSTE_" #lmul),
+ !cast<SchedReadWrite>("ReadVSTEV_" #lmul),
+ ReadVSTX, ReadVMask
]>;
-class VLSSched<int n, string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVLDS" #n #"_" #mx),
- !cast<SchedReadWrite>("ReadVLDX_" #mx),
- !cast<SchedReadWrite>("ReadVLDSX_" #mx), ReadVMask
+class VLSSched<int eew, string emul = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVLDS" #eew #"_" #emul),
+ ReadVLDX, ReadVLDSX, ReadVMask
]>;
-class VSSSched<int n, string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVSTS" #n #"_" #mx),
- !cast<SchedReadWrite>("ReadVSTS" #n #"V_" #mx),
- !cast<SchedReadWrite>("ReadVSTX_" #mx),
- !cast<SchedReadWrite>("ReadVSTSX_" #mx), ReadVMask
+class VSSSched<int eew, string emul = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVSTS" #eew #"_" #emul),
+ !cast<SchedReadWrite>("ReadVSTS" #eew #"V_" #emul),
+ ReadVSTX, ReadVSTSX, ReadVMask
]>;
-class VLXSched<int n, string o, string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVLD" #o #"X" #n #"_" #mx),
- !cast<SchedReadWrite>("ReadVLDX_" #mx),
- !cast<SchedReadWrite>("ReadVLD" #o #"XV_" #mx), ReadVMask
+class VLXSched<int dataEEW, string isOrdered,
+ string dataEMUL = "WorstCase",
+ string idxEMUL = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVLD" #isOrdered #"X" #dataEEW #"_" #dataEMUL),
+ ReadVLDX,
+ !cast<SchedReadWrite>("ReadVLD" #isOrdered #"XV_" #idxEMUL), ReadVMask
]>;
-class VSXSched<int n, string o, string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVST" #o #"X" #n #"_" #mx),
- !cast<SchedReadWrite>("ReadVST" #o #"X" #n #"_" #mx),
- !cast<SchedReadWrite>("ReadVSTX_" #mx),
- !cast<SchedReadWrite>("ReadVST" #o #"XV_" #mx), ReadVMask
+class VSXSched<int dataEEW, string isOrdered,
+ string dataEMUL = "WorstCase",
+ string idxEMUL = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVST" #isOrdered #"X" #dataEEW #"_" #dataEMUL),
+ !cast<SchedReadWrite>("ReadVST" #isOrdered #"X" #dataEEW #"_" #dataEMUL),
+ ReadVSTX, !cast<SchedReadWrite>("ReadVST" #isOrdered #"XV_" #idxEMUL), ReadVMask
]>;
-class VLFSched<string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVLDFF_" #mx),
- !cast<SchedReadWrite>("ReadVLDX_" #mx), ReadVMask
+class VLFSched<string lmul = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVLDFF_" #lmul),
+ ReadVLDX, ReadVMask
]>;
// Unit-Stride Segment Loads and Stores
-class VLSEGSched<int nf, int eew, string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVLSEG" #nf #"e" #eew #"_" #mx),
- !cast<SchedReadWrite>("ReadVLDX_" #mx), ReadVMask
+class VLSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVLSEG" #nf #"e" #eew #"_" #emul),
+ ReadVLDX, ReadVMask
]>;
-class VSSEGSched<int nf, int eew, string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVSSEG" #nf #"e" #eew #"_" #mx),
- !cast<SchedReadWrite>("ReadVSTEV_" #mx),
- !cast<SchedReadWrite>("ReadVSTX_" #mx), ReadVMask
+class VSSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVSSEG" #nf #"e" #eew #"_" #emul),
+ !cast<SchedReadWrite>("ReadVSTEV_" #emul),
+ ReadVSTX, ReadVMask
]>;
-class VLSEGFFSched<int nf, int eew, string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVLSEGFF" #nf #"e" #eew #"_" #mx),
- !cast<SchedReadWrite>("ReadVLDX_" #mx), ReadVMask
+class VLSEGFFSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVLSEGFF" #nf #"e" #eew #"_" #emul),
+ ReadVLDX, ReadVMask
]>;
// Strided Segment Loads and Stores
-class VLSSEGSched<int nf, int eew, string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVLSSEG" #nf #"e" #eew #"_" #mx),
- !cast<SchedReadWrite>("ReadVLDX_" #mx),
- !cast<SchedReadWrite>("ReadVLDSX_" #mx), ReadVMask
+class VLSSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVLSSEG" #nf #"e" #eew #"_" #emul),
+ ReadVLDX, ReadVLDSX, ReadVMask
]>;
-class VSSSEGSched<int nf, int eew, string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVSSSEG" #nf #"e" #eew #"_" #mx),
- !cast<SchedReadWrite>("ReadVSTS" #eew #"V" #"_" #mx),
- !cast<SchedReadWrite>("ReadVSTX_" #mx),
- !cast<SchedReadWrite>("ReadVSTSX_" #mx), ReadVMask
+class VSSSEGSched<int nf, int eew, string emul = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVSSSEG" #nf #"e" #eew #"_" #emul),
+ !cast<SchedReadWrite>("ReadVSTS" #eew #"V_" #emul),
+ ReadVSTX, ReadVSTSX, ReadVMask
]>;
// Indexed Segment Loads and Stores
-class VLXSEGSched<int nf, int eew, string o, string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVL" #o #"XSEG" #nf #"e" #eew #"_" #mx),
- !cast<SchedReadWrite>("ReadVLDX_" #mx),
- !cast<SchedReadWrite>("ReadVLD" #o #"XV" #"_" #mx), ReadVMask
+class VLXSEGSched<int nf, int eew, string isOrdered, string emul = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVL" #isOrdered #"XSEG" #nf #"e" #eew #"_" #emul),
+ ReadVLDX, !cast<SchedReadWrite>("ReadVLD" #isOrdered #"XV_" #emul), ReadVMask
]>;
-class VSXSEGSched<int nf, int eew, string o, string mx> : Sched<[
- !cast<SchedReadWrite>("WriteVS" #o #"XSEG" #nf #"e" #eew #"_" #mx),
- !cast<SchedReadWrite>("ReadVST" #o #"X" #eew # "_" # mx),
- !cast<SchedReadWrite>("ReadVSTX_" #mx),
- !cast<SchedReadWrite>("ReadVST" #o #"XV" # "_" # mx), ReadVMask
+class VSXSEGSched<int nf, int eew, string isOrdered, string emul = "WorstCase"> : Sched<[
+ !cast<SchedReadWrite>("WriteVS" #isOrdered #"XSEG" #nf #"e" #eew #"_" #emul),
+ !cast<SchedReadWrite>("ReadVST" #isOrdered #"X" #eew #"_" #emul),
+ ReadVSTX, !cast<SchedReadWrite>("ReadVST" #isOrdered #"XV_" #emul), ReadVMask
]>;
//===----------------------------------------------------------------------===//
@@ -193,14 +190,14 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
class VUnitStrideLoad<RISCVWidth width, string opcodestr>
: RVInstVLU<0b000, width.Value{3}, LUMOPUnitStride, width.Value{2-0},
(outs VR:$vd),
- (ins GPRMem:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
+ (ins GPRMemZeroOffset:$rs1, VMaskOp:$vm), opcodestr, "$vd, ${rs1}$vm">;
let vm = 1, RVVConstraint = NoConstraint in {
// unit-stride whole register load vl<nf>r.v vd, (rs1)
class VWholeLoad<bits<3> nf, RISCVWidth width, string opcodestr, RegisterClass VRC>
: RVInstVLU<nf, width.Value{3}, LUMOPUnitStrideWholeReg,
- width.Value{2-0}, (outs VRC:$vd), (ins GPRMem:$rs1),
- opcodestr, "$vd, (${rs1})"> {
+ width.Value{2-0}, (outs VRC:$vd), (ins GPRMemZeroOffset:$rs1),
+ opcodestr, "$vd, $rs1"> {
let Uses = [];
}
@@ -208,110 +205,110 @@ class VWholeLoad<bits<3> nf, RISCVWidth width, string opcodestr, RegisterClass V
class VUnitStrideLoadMask<string opcodestr>
: RVInstVLU<0b000, LSWidth8.Value{3}, LUMOPUnitStrideMask, LSWidth8.Value{2-0},
(outs VR:$vd),
- (ins GPRMem:$rs1), opcodestr, "$vd, (${rs1})">;
+ (ins GPRMemZeroOffset:$rs1), opcodestr, "$vd, $rs1">;
} // vm = 1, RVVConstraint = NoConstraint
// unit-stride fault-only-first load vd, (rs1), vm
class VUnitStrideLoadFF<RISCVWidth width, string opcodestr>
: RVInstVLU<0b000, width.Value{3}, LUMOPUnitStrideFF, width.Value{2-0},
(outs VR:$vd),
- (ins GPRMem:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
+ (ins GPRMemZeroOffset:$rs1, VMaskOp:$vm), opcodestr, "$vd, ${rs1}$vm">;
// strided load vd, (rs1), rs2, vm
class VStridedLoad<RISCVWidth width, string opcodestr>
: RVInstVLS<0b000, width.Value{3}, width.Value{2-0},
(outs VR:$vd),
- (ins GPRMem:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr,
- "$vd, (${rs1}), $rs2$vm">;
+ (ins GPRMemZeroOffset:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr,
+ "$vd, $rs1, $rs2$vm">;
// indexed load vd, (rs1), vs2, vm
class VIndexedLoad<RISCVMOP mop, RISCVWidth width, string opcodestr>
: RVInstVLX<0b000, width.Value{3}, mop, width.Value{2-0},
(outs VR:$vd),
- (ins GPRMem:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr,
- "$vd, (${rs1}), $vs2$vm">;
+ (ins GPRMemZeroOffset:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr,
+ "$vd, $rs1, $vs2$vm">;
// unit-stride segment load vd, (rs1), vm
class VUnitStrideSegmentLoad<bits<3> nf, RISCVWidth width, string opcodestr>
: RVInstVLU<nf, width.Value{3}, LUMOPUnitStride, width.Value{2-0},
(outs VR:$vd),
- (ins GPRMem:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
+ (ins GPRMemZeroOffset:$rs1, VMaskOp:$vm), opcodestr, "$vd, ${rs1}$vm">;
// segment fault-only-first load vd, (rs1), vm
class VUnitStrideSegmentLoadFF<bits<3> nf, RISCVWidth width, string opcodestr>
: RVInstVLU<nf, width.Value{3}, LUMOPUnitStrideFF, width.Value{2-0},
(outs VR:$vd),
- (ins GPRMem:$rs1, VMaskOp:$vm), opcodestr, "$vd, (${rs1})$vm">;
+ (ins GPRMemZeroOffset:$rs1, VMaskOp:$vm), opcodestr, "$vd, ${rs1}$vm">;
// strided segment load vd, (rs1), rs2, vm
class VStridedSegmentLoad<bits<3> nf, RISCVWidth width, string opcodestr>
: RVInstVLS<nf, width.Value{3}, width.Value{2-0},
(outs VR:$vd),
- (ins GPRMem:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr,
- "$vd, (${rs1}), $rs2$vm">;
+ (ins GPRMemZeroOffset:$rs1, GPR:$rs2, VMaskOp:$vm), opcodestr,
+ "$vd, $rs1, $rs2$vm">;
// indexed segment load vd, (rs1), vs2, vm
class VIndexedSegmentLoad<bits<3> nf, RISCVMOP mop, RISCVWidth width,
string opcodestr>
: RVInstVLX<nf, width.Value{3}, mop, width.Value{2-0},
(outs VR:$vd),
- (ins GPRMem:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr,
- "$vd, (${rs1}), $vs2$vm">;
+ (ins GPRMemZeroOffset:$rs1, VR:$vs2, VMaskOp:$vm), opcodestr,
+ "$vd, $rs1, $vs2$vm">;
} // hasSideEffects = 0, mayLoad = 1, mayStore = 0
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
// unit-stride store vd, vs3, (rs1), vm
class VUnitStrideStore<RISCVWidth width, string opcodestr>
: RVInstVSU<0b000, width.Value{3}, SUMOPUnitStride, width.Value{2-0},
- (outs), (ins VR:$vs3, GPRMem:$rs1, VMaskOp:$vm), opcodestr,
- "$vs3, (${rs1})$vm">;
+ (outs), (ins VR:$vs3, GPRMemZeroOffset:$rs1, VMaskOp:$vm), opcodestr,
+ "$vs3, ${rs1}$vm">;
let vm = 1 in {
// vs<nf>r.v vd, (rs1)
class VWholeStore<bits<3> nf, string opcodestr, RegisterClass VRC>
: RVInstVSU<nf, 0, SUMOPUnitStrideWholeReg,
- 0b000, (outs), (ins VRC:$vs3, GPRMem:$rs1),
- opcodestr, "$vs3, (${rs1})"> {
+ 0b000, (outs), (ins VRC:$vs3, GPRMemZeroOffset:$rs1),
+ opcodestr, "$vs3, $rs1"> {
let Uses = [];
}
// unit-stride mask store vd, vs3, (rs1)
class VUnitStrideStoreMask<string opcodestr>
: RVInstVSU<0b000, LSWidth8.Value{3}, SUMOPUnitStrideMask, LSWidth8.Value{2-0},
- (outs), (ins VR:$vs3, GPRMem:$rs1), opcodestr,
- "$vs3, (${rs1})">;
+ (outs), (ins VR:$vs3, GPRMemZeroOffset:$rs1), opcodestr,
+ "$vs3, $rs1">;
} // vm = 1
// strided store vd, vs3, (rs1), rs2, vm
class VStridedStore<RISCVWidth width, string opcodestr>
: RVInstVSS<0b000, width.Value{3}, width.Value{2-0}, (outs),
- (ins VR:$vs3, GPRMem:$rs1, GPR:$rs2, VMaskOp:$vm),
- opcodestr, "$vs3, (${rs1}), $rs2$vm">;
+ (ins VR:$vs3, GPRMemZeroOffset:$rs1, GPR:$rs2, VMaskOp:$vm),
+ opcodestr, "$vs3, $rs1, $rs2$vm">;
// indexed store vd, vs3, (rs1), vs2, vm
class VIndexedStore<RISCVMOP mop, RISCVWidth width, string opcodestr>
: RVInstVSX<0b000, width.Value{3}, mop, width.Value{2-0}, (outs),
- (ins VR:$vs3, GPRMem:$rs1, VR:$vs2, VMaskOp:$vm),
- opcodestr, "$vs3, (${rs1}), $vs2$vm">;
+ (ins VR:$vs3, GPRMemZeroOffset:$rs1, VR:$vs2, VMaskOp:$vm),
+ opcodestr, "$vs3, $rs1, $vs2$vm">;
// segment store vd, vs3, (rs1), vm
class VUnitStrideSegmentStore<bits<3> nf, RISCVWidth width, string opcodestr>
: RVInstVSU<nf, width.Value{3}, SUMOPUnitStride, width.Value{2-0},
- (outs), (ins VR:$vs3, GPRMem:$rs1, VMaskOp:$vm), opcodestr,
- "$vs3, (${rs1})$vm">;
+ (outs), (ins VR:$vs3, GPRMemZeroOffset:$rs1, VMaskOp:$vm), opcodestr,
+ "$vs3, ${rs1}$vm">;
// segment store vd, vs3, (rs1), rs2, vm
class VStridedSegmentStore<bits<3> nf, RISCVWidth width, string opcodestr>
: RVInstVSS<nf, width.Value{3}, width.Value{2-0}, (outs),
- (ins VR:$vs3, GPRMem:$rs1, GPR:$rs2, VMaskOp:$vm),
- opcodestr, "$vs3, (${rs1}), $rs2$vm">;
+ (ins VR:$vs3, GPRMemZeroOffset:$rs1, GPR:$rs2, VMaskOp:$vm),
+ opcodestr, "$vs3, $rs1, $rs2$vm">;
// segment store vd, vs3, (rs1), vs2, vm
class VIndexedSegmentStore<bits<3> nf, RISCVMOP mop, RISCVWidth width,
string opcodestr>
: RVInstVSX<nf, width.Value{3}, mop, width.Value{2-0}, (outs),
- (ins VR:$vs3, GPRMem:$rs1, VR:$vs2, VMaskOp:$vm),
- opcodestr, "$vs3, (${rs1}), $vs2$vm">;
+ (ins VR:$vs3, GPRMemZeroOffset:$rs1, VR:$vs2, VMaskOp:$vm),
+ opcodestr, "$vs3, $rs1, $vs2$vm">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 1
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
@@ -410,6 +407,14 @@ class VALUVs2<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodestr>
: RVInstV<funct6, vs1, opv, (outs VR:$vd),
(ins VR:$vs2, VMaskOp:$vm),
opcodestr, "$vd, $vs2$vm">;
+
+// op vd, vs2 (use vs1 as instruction encoding)
+class VALUVs2NoVm<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodestr>
+ : RVInstV<funct6, vs1, opv, (outs VR:$vd),
+ (ins VR:$vs2), opcodestr,
+ "$vd, $vs2"> {
+ let vm = 1;
+}
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
//===----------------------------------------------------------------------===//
@@ -423,533 +428,548 @@ multiclass VIndexLoadStore<list<int> EEWList> {
def VLUXEI # n # _V :
VIndexedLoad<MOPLDIndexedUnord, w, "vluxei" # n # ".v">,
- VLXSched<n, "U", UpperBoundLMUL>;
+ VLXSched<n, "U">;
def VLOXEI # n # _V :
VIndexedLoad<MOPLDIndexedOrder, w, "vloxei" # n # ".v">,
- VLXSched<n, "O", UpperBoundLMUL>;
+ VLXSched<n, "O">;
def VSUXEI # n # _V :
VIndexedStore<MOPSTIndexedUnord, w, "vsuxei" # n # ".v">,
- VSXSched<n, "U", UpperBoundLMUL>;
+ VSXSched<n, "U">;
def VSOXEI # n # _V :
VIndexedStore<MOPSTIndexedOrder, w, "vsoxei" # n # ".v">,
- VSXSched<n, "O", UpperBoundLMUL>;
+ VSXSched<n, "O">;
}
}
-multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVIALUV_UpperBound, ReadVIALUV_UpperBound,
- ReadVIALUV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVIALUX_UpperBound, ReadVIALUV_UpperBound,
- ReadVIALUX_UpperBound, ReadVMask]>;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
- Sched<[WriteVIALUI_UpperBound, ReadVIALUV_UpperBound,
- ReadVMask]>;
+multiclass VALU_IV_V<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVIALUV_WorstCase, ReadVIALUV_WorstCase,
+ ReadVIALUV_WorstCase, ReadVMask]>;
}
-multiclass VALU_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVIALUV_UpperBound, ReadVIALUV_UpperBound,
- ReadVIALUV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVIALUX_UpperBound, ReadVIALUV_UpperBound,
- ReadVIALUX_UpperBound, ReadVMask]>;
+multiclass VALU_IV_X<string opcodestr, bits<6> funct6> {
+ def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVIALUX_WorstCase, ReadVIALUV_WorstCase,
+ ReadVIALUX_WorstCase, ReadVMask]>;
}
-multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVIALUV_UpperBound, ReadVIALUV_UpperBound,
- ReadVIALUX_UpperBound, ReadVMask]>;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
- Sched<[WriteVIALUI_UpperBound, ReadVIALUV_UpperBound,
+multiclass VALU_IV_I<string opcodestr, bits<6> funct6> {
+ def I : VALUVI<funct6, opcodestr # ".vi", simm5>,
+ Sched<[WriteVIALUI_WorstCase, ReadVIALUV_WorstCase,
ReadVMask]>;
}
-multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
+multiclass VALU_IV_V_X_I<string opcodestr, bits<6> funct6>
+ : VALU_IV_V<opcodestr, funct6>,
+ VALU_IV_X<opcodestr, funct6>,
+ VALU_IV_I<opcodestr, funct6>;
+
+multiclass VALU_IV_V_X<string opcodestr, bits<6> funct6>
+ : VALU_IV_V<opcodestr, funct6>,
+ VALU_IV_X<opcodestr, funct6>;
+
+multiclass VALU_IV_X_I<string opcodestr, bits<6> funct6>
+ : VALU_IV_X<opcodestr, funct6>,
+ VALU_IV_I<opcodestr, funct6>;
+
+multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw> {
def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVIWALUV_UpperBound, ReadVIWALUV_UpperBound,
- ReadVIWALUV_UpperBound, ReadVMask]>;
+ Sched<[WriteVIWALUV_WorstCase, ReadVIWALUV_WorstCase,
+ ReadVIWALUV_WorstCase, ReadVMask]>;
def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVIWALUX_UpperBound, ReadVIWALUV_UpperBound,
- ReadVIWALUX_UpperBound, ReadVMask]>;
+ Sched<[WriteVIWALUX_WorstCase, ReadVIWALUV_WorstCase,
+ ReadVIWALUX_WorstCase, ReadVMask]>;
}
-multiclass VMAC_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVIMulAddV_UpperBound, ReadVIMulAddV_UpperBound,
- ReadVIMulAddV_UpperBound, ReadVMask]>;
- def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVIMulAddX_UpperBound, ReadVIMulAddV_UpperBound,
- ReadVIMulAddX_UpperBound, ReadVMask]>;
+multiclass VMAC_MV_V_X<string opcodestr, bits<6> funct6> {
+ def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv">,
+ Sched<[WriteVIMulAddV_WorstCase, ReadVIMulAddV_WorstCase,
+ ReadVIMulAddV_WorstCase, ReadVMask]>;
+ def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">,
+ Sched<[WriteVIMulAddX_WorstCase, ReadVIMulAddV_WorstCase,
+ ReadVIMulAddX_WorstCase, ReadVMask]>;
}
-multiclass VWMAC_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUrVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVIWMulAddV_UpperBound, ReadVIWMulAddV_UpperBound,
- ReadVIWMulAddV_UpperBound, ReadVMask]>;
- def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVIWMulAddX_UpperBound, ReadVIWMulAddV_UpperBound,
- ReadVIWMulAddX_UpperBound, ReadVMask]>;
+multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6> {
+ def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">,
+ Sched<[WriteVIWMulAddX_WorstCase, ReadVIWMulAddV_WorstCase,
+ ReadVIWMulAddX_WorstCase, ReadVMask]>;
}
-multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def X : VALUrVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVIWMulAddX_UpperBound, ReadVIWMulAddV_UpperBound,
- ReadVIWMulAddX_UpperBound, ReadVMask]>;
+multiclass VWMAC_MV_V_X<string opcodestr, bits<6> funct6>
+ : VWMAC_MV_X<opcodestr, funct6> {
+ def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv">,
+ Sched<[WriteVIWMulAddV_WorstCase, ReadVIWMulAddV_WorstCase,
+ ReadVIWMulAddV_WorstCase, ReadVMask]>;
}
multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
- Sched<[WriteVExtV_UpperBound, ReadVExtV_UpperBound, ReadVMask]>;
-}
-
-multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6> {
- def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
- Sched<[WriteVICALUV_UpperBound, ReadVICALUV_UpperBound,
- ReadVICALUV_UpperBound, ReadVMask]>;
- def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
- Sched<[WriteVICALUX_UpperBound, ReadVICALUV_UpperBound,
- ReadVICALUX_UpperBound, ReadVMask]>;
- def IM : VALUmVI<funct6, opcodestr # ".vim">,
- Sched<[WriteVICALUI_UpperBound, ReadVICALUV_UpperBound,
- ReadVMask]>;
+ Sched<[WriteVExtV_WorstCase, ReadVExtV_WorstCase, ReadVMask]>;
}
multiclass VMRG_IV_V_X_I<string opcodestr, bits<6> funct6> {
def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
- Sched<[WriteVIMergeV_UpperBound, ReadVIMergeV_UpperBound,
- ReadVIMergeV_UpperBound, ReadVMask]>;
+ Sched<[WriteVIMergeV_WorstCase, ReadVIMergeV_WorstCase,
+ ReadVIMergeV_WorstCase, ReadVMask]>;
def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
- Sched<[WriteVIMergeX_UpperBound, ReadVIMergeV_UpperBound,
- ReadVIMergeX_UpperBound, ReadVMask]>;
+ Sched<[WriteVIMergeX_WorstCase, ReadVIMergeV_WorstCase,
+ ReadVIMergeX_WorstCase, ReadVMask]>;
def IM : VALUmVI<funct6, opcodestr # ".vim">,
- Sched<[WriteVIMergeI_UpperBound, ReadVIMergeV_UpperBound,
+ Sched<[WriteVIMergeI_WorstCase, ReadVIMergeV_WorstCase,
ReadVMask]>;
}
multiclass VALUm_IV_V_X<string opcodestr, bits<6> funct6> {
def VM : VALUmVV<funct6, OPIVV, opcodestr # ".vvm">,
- Sched<[WriteVICALUV_UpperBound, ReadVICALUV_UpperBound,
- ReadVICALUV_UpperBound, ReadVMask]>;
+ Sched<[WriteVICALUV_WorstCase, ReadVICALUV_WorstCase,
+ ReadVICALUV_WorstCase, ReadVMask]>;
def XM : VALUmVX<funct6, OPIVX, opcodestr # ".vxm">,
- Sched<[WriteVICALUX_UpperBound, ReadVICALUV_UpperBound,
- ReadVICALUX_UpperBound, ReadVMask]>;
+ Sched<[WriteVICALUX_WorstCase, ReadVICALUV_WorstCase,
+ ReadVICALUX_WorstCase, ReadVMask]>;
}
-multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5> {
- def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVICALUV_UpperBound, ReadVICALUV_UpperBound,
- ReadVICALUV_UpperBound]>;
- def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVICALUX_UpperBound, ReadVICALUV_UpperBound,
- ReadVICALUX_UpperBound]>;
- def I : VALUVINoVm<funct6, opcodestr # ".vi", optype>,
- Sched<[WriteVICALUI_UpperBound, ReadVICALUV_UpperBound]>;
+multiclass VALUm_IV_V_X_I<string opcodestr, bits<6> funct6>
+ : VALUm_IV_V_X<opcodestr, funct6> {
+ def IM : VALUmVI<funct6, opcodestr # ".vim">,
+ Sched<[WriteVICALUI_WorstCase, ReadVICALUV_WorstCase,
+ ReadVMask]>;
}
multiclass VALUNoVm_IV_V_X<string opcodestr, bits<6> funct6> {
def V : VALUVVNoVm<funct6, OPIVV, opcodestr # ".vv">,
- Sched<[WriteVICALUV_UpperBound, ReadVICALUV_UpperBound,
- ReadVICALUV_UpperBound]>;
+ Sched<[WriteVICALUV_WorstCase, ReadVICALUV_WorstCase,
+ ReadVICALUV_WorstCase]>;
def X : VALUVXNoVm<funct6, OPIVX, opcodestr # ".vx">,
- Sched<[WriteVICALUX_UpperBound, ReadVICALUV_UpperBound,
- ReadVICALUX_UpperBound]>;
+ Sched<[WriteVICALUX_WorstCase, ReadVICALUV_WorstCase,
+ ReadVICALUX_WorstCase]>;
}
-multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVFALUV_UpperBound, ReadVFALUV_UpperBound,
- ReadVFALUV_UpperBound, ReadVMask]>;
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFALUF_UpperBound, ReadVFALUV_UpperBound,
- ReadVFALUF_UpperBound, ReadVMask]>;
+multiclass VALUNoVm_IV_V_X_I<string opcodestr, bits<6> funct6>
+ : VALUNoVm_IV_V_X<opcodestr, funct6> {
+ def I : VALUVINoVm<funct6, opcodestr # ".vi", simm5>,
+ Sched<[WriteVICALUI_WorstCase, ReadVICALUV_WorstCase]>;
}
-multiclass VALU_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFALUF_UpperBound, ReadVFALUV_UpperBound,
- ReadVFALUF_UpperBound, ReadVMask]>;
+multiclass VALU_FV_F<string opcodestr, bits<6> funct6> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
+ Sched<[WriteVFALUF_WorstCase, ReadVFALUV_WorstCase,
+ ReadVFALUF_WorstCase, ReadVMask]>;
}
-multiclass VWALU_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVFWALUV_UpperBound, ReadVFWALUV_UpperBound,
- ReadVFWALUV_UpperBound, ReadVMask]>;
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFWALUF_UpperBound, ReadVFWALUV_UpperBound,
- ReadVFWALUF_UpperBound, ReadVMask]>;
+multiclass VALU_FV_V_F<string opcodestr, bits<6> funct6>
+ : VALU_FV_F<opcodestr, funct6> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
+ Sched<[WriteVFALUV_WorstCase, ReadVFALUV_WorstCase,
+ ReadVFALUV_WorstCase, ReadVMask]>;
}
-multiclass VMUL_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
+multiclass VWALU_FV_V_F<string opcodestr, bits<6> funct6, string vw> {
def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVFMulV_UpperBound, ReadVFMulV_UpperBound,
- ReadVFMulV_UpperBound, ReadVMask]>;
+ Sched<[WriteVFWALUV_WorstCase, ReadVFWALUV_WorstCase,
+ ReadVFWALUV_WorstCase, ReadVMask]>;
def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFMulF_UpperBound, ReadVFMulV_UpperBound,
- ReadVFMulF_UpperBound, ReadVMask]>;
+ Sched<[WriteVFWALUF_WorstCase, ReadVFWALUV_WorstCase,
+ ReadVFWALUF_WorstCase, ReadVMask]>;
}
-multiclass VDIV_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVFDivV_UpperBound, ReadVFDivV_UpperBound,
- ReadVFDivV_UpperBound, ReadVMask]>;
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFDivF_UpperBound, ReadVFDivV_UpperBound,
- ReadVFDivF_UpperBound, ReadVMask]>;
+multiclass VMUL_FV_V_F<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
+ Sched<[WriteVFMulV_WorstCase, ReadVFMulV_WorstCase,
+ ReadVFMulV_WorstCase, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
+ Sched<[WriteVFMulF_WorstCase, ReadVFMulV_WorstCase,
+ ReadVFMulF_WorstCase, ReadVMask]>;
}
-multiclass VRDIV_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFDivF_UpperBound, ReadVFDivV_UpperBound,
- ReadVFDivF_UpperBound, ReadVMask]>;
+multiclass VDIV_FV_F<string opcodestr, bits<6> funct6> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
+ Sched<[WriteVFDivF_WorstCase, ReadVFDivV_WorstCase,
+ ReadVFDivF_WorstCase, ReadVMask]>;
}
-multiclass VWMUL_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVFWMulV_UpperBound, ReadVFWMulV_UpperBound,
- ReadVFWMulV_UpperBound, ReadVMask]>;
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFWMulF_UpperBound, ReadVFWMulV_UpperBound,
- ReadVFWMulF_UpperBound, ReadVMask]>;
+multiclass VDIV_FV_V_F<string opcodestr, bits<6> funct6>
+ : VDIV_FV_F<opcodestr, funct6> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
+ Sched<[WriteVFDivV_WorstCase, ReadVFDivV_WorstCase,
+ ReadVFDivV_WorstCase, ReadVMask]>;
+}
+
+multiclass VWMUL_FV_V_F<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
+ Sched<[WriteVFWMulV_WorstCase, ReadVFWMulV_WorstCase,
+ ReadVFWMulV_WorstCase, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
+ Sched<[WriteVFWMulF_WorstCase, ReadVFWMulV_WorstCase,
+ ReadVFWMulF_WorstCase, ReadVMask]>;
}
-multiclass VMAC_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVFMulAddV_UpperBound, ReadVFMulAddV_UpperBound,
- ReadVFMulAddV_UpperBound, ReadVMask]>;
- def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFMulAddF_UpperBound, ReadVFMulAddV_UpperBound,
- ReadVFMulAddF_UpperBound, ReadVMask]>;
+multiclass VMAC_FV_V_F<string opcodestr, bits<6> funct6> {
+ def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv">,
+ Sched<[WriteVFMulAddV_WorstCase, ReadVFMulAddV_WorstCase,
+ ReadVFMulAddV_WorstCase, ReadVMask]>;
+ def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf">,
+ Sched<[WriteVFMulAddF_WorstCase, ReadVFMulAddV_WorstCase,
+ ReadVFMulAddF_WorstCase, ReadVMask]>;
}
-multiclass VWMAC_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUrVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVFWMulAddV_UpperBound, ReadVFWMulAddV_UpperBound,
- ReadVFWMulAddV_UpperBound, ReadVMask]>;
- def F : VALUrVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFWMulAddF_UpperBound, ReadVFWMulAddV_UpperBound,
- ReadVFWMulAddF_UpperBound, ReadVMask]>;
+multiclass VWMAC_FV_V_F<string opcodestr, bits<6> funct6> {
+ def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv">,
+ Sched<[WriteVFWMulAddV_WorstCase, ReadVFWMulAddV_WorstCase,
+ ReadVFWMulAddV_WorstCase, ReadVMask]>;
+ def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf">,
+ Sched<[WriteVFWMulAddF_WorstCase, ReadVFWMulAddV_WorstCase,
+ ReadVFWMulAddF_WorstCase, ReadVMask]>;
}
multiclass VSQR_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFSqrtV_UpperBound, ReadVFSqrtV_UpperBound,
+ Sched<[WriteVFSqrtV_WorstCase, ReadVFSqrtV_WorstCase,
ReadVMask]>;
}
multiclass VRCP_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFRecpV_UpperBound, ReadVFRecpV_UpperBound,
+ Sched<[WriteVFRecpV_WorstCase, ReadVFRecpV_WorstCase,
ReadVMask]>;
}
-multiclass VCMP_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVFCmpV_UpperBound, ReadVFCmpV_UpperBound,
- ReadVFCmpV_UpperBound, ReadVMask]>;
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFCmpF_UpperBound, ReadVFCmpV_UpperBound,
- ReadVFCmpF_UpperBound, ReadVMask]>;
+multiclass VMINMAX_FV_V_F<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
+ Sched<[WriteVFMinMaxV_WorstCase, ReadVFMinMaxV_WorstCase,
+ ReadVFMinMaxV_WorstCase, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
+ Sched<[WriteVFMinMaxF_WorstCase, ReadVFMinMaxV_WorstCase,
+ ReadVFMinMaxF_WorstCase, ReadVMask]>;
}
-multiclass VCMP_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFCmpF_UpperBound, ReadVFCmpV_UpperBound,
- ReadVFCmpF_UpperBound, ReadVMask]>;
+multiclass VCMP_FV_F<string opcodestr, bits<6> funct6> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
+ Sched<[WriteVFCmpF_WorstCase, ReadVFCmpV_WorstCase,
+ ReadVFCmpF_WorstCase, ReadVMask]>;
}
-multiclass VSGNJ_FV_V_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPFVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVFSgnjV_UpperBound, ReadVFSgnjV_UpperBound,
- ReadVFSgnjV_UpperBound, ReadVMask]>;
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFSgnjF_UpperBound, ReadVFSgnjV_UpperBound,
- ReadVFSgnjF_UpperBound, ReadVMask]>;
+multiclass VCMP_FV_V_F<string opcodestr, bits<6> funct6>
+ : VCMP_FV_F<opcodestr, funct6> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
+ Sched<[WriteVFCmpV_WorstCase, ReadVFCmpV_WorstCase,
+ ReadVFCmpV_WorstCase, ReadVMask]>;
+}
+
+multiclass VSGNJ_FV_V_F<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPFVV, opcodestr # ".vv">,
+ Sched<[WriteVFSgnjV_WorstCase, ReadVFSgnjV_WorstCase,
+ ReadVFSgnjV_WorstCase, ReadVMask]>;
+ def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
+ Sched<[WriteVFSgnjF_WorstCase, ReadVFSgnjV_WorstCase,
+ ReadVFSgnjF_WorstCase, ReadVMask]>;
}
multiclass VCLS_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFClassV_UpperBound, ReadVFClassV_UpperBound,
+ Sched<[WriteVFClassV_WorstCase, ReadVFClassV_WorstCase,
ReadVMask]>;
}
multiclass VCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFCvtIToFV_UpperBound, ReadVFCvtIToFV_UpperBound,
+ Sched<[WriteVFCvtIToFV_WorstCase, ReadVFCvtIToFV_WorstCase,
ReadVMask]>;
}
multiclass VCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFCvtFToIV_UpperBound, ReadVFCvtFToIV_UpperBound,
+ Sched<[WriteVFCvtFToIV_WorstCase, ReadVFCvtFToIV_WorstCase,
ReadVMask]>;
}
multiclass VWCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFWCvtIToFV_UpperBound, ReadVFWCvtIToFV_UpperBound,
+ Sched<[WriteVFWCvtIToFV_WorstCase, ReadVFWCvtIToFV_WorstCase,
ReadVMask]>;
}
multiclass VWCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFWCvtFToIV_UpperBound, ReadVFWCvtFToIV_UpperBound,
+ Sched<[WriteVFWCvtFToIV_WorstCase, ReadVFWCvtFToIV_WorstCase,
ReadVMask]>;
}
multiclass VWCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFWCvtFToFV_UpperBound, ReadVFWCvtFToFV_UpperBound,
+ Sched<[WriteVFWCvtFToFV_WorstCase, ReadVFWCvtFToFV_WorstCase,
ReadVMask]>;
}
multiclass VNCVTF_IV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFNCvtIToFV_UpperBound, ReadVFNCvtIToFV_UpperBound,
+ Sched<[WriteVFNCvtIToFV_WorstCase, ReadVFNCvtIToFV_WorstCase,
ReadVMask]>;
}
multiclass VNCVTI_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFNCvtFToIV_UpperBound, ReadVFNCvtFToIV_UpperBound,
+ Sched<[WriteVFNCvtFToIV_WorstCase, ReadVFNCvtFToIV_WorstCase,
ReadVMask]>;
}
multiclass VNCVTF_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPFVV, opcodestr>,
- Sched<[WriteVFNCvtFToFV_UpperBound, ReadVFNCvtFToFV_UpperBound,
+ Sched<[WriteVFNCvtFToFV_WorstCase, ReadVFNCvtFToFV_WorstCase,
ReadVMask]>;
}
multiclass VRED_MV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">,
- Sched<[WriteVIRedV, ReadVIRedV, ReadVIRedV0, ReadVMask]>;
+ Sched<[WriteVIRedV_From_WorstCase, ReadVIRedV, ReadVIRedV0,
+ ReadVMask]>;
+}
+
+multiclass VREDMINMAX_MV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPMVV, opcodestr # ".vs">,
+ Sched<[WriteVIRedMinMaxV_From_WorstCase, ReadVIRedV, ReadVIRedV0,
+ ReadVMask]>;
}
multiclass VWRED_IV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPIVV, opcodestr # ".vs">,
- Sched<[WriteVIWRedV, ReadVIWRedV, ReadVIWRedV0, ReadVMask]>;
+ Sched<[WriteVIWRedV_From_WorstCase, ReadVIWRedV, ReadVIWRedV0,
+ ReadVMask]>;
}
multiclass VRED_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFRedV, ReadVFRedV, ReadVFRedV0, ReadVMask]>;
+ Sched<[WriteVFRedV_From_WorstCase, ReadVFRedV, ReadVFRedV0,
+ ReadVMask]>;
+}
+
+multiclass VREDMINMAX_FV_V<string opcodestr, bits<6> funct6> {
+ def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
+ Sched<[WriteVFRedMinMaxV_From_WorstCase, ReadVFRedV, ReadVFRedV0,
+ ReadVMask]>;
}
multiclass VREDO_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFRedOV, ReadVFRedOV, ReadVFRedOV0, ReadVMask]>;
+ Sched<[WriteVFRedOV_From_WorstCase, ReadVFRedOV, ReadVFRedOV0,
+ ReadVMask]>;
}
multiclass VWRED_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFWRedV, ReadVFWRedV, ReadVFWRedV0, ReadVMask]>;
+ Sched<[WriteVFWRedV_From_WorstCase, ReadVFWRedV, ReadVFWRedV0,
+ ReadVMask]>;
}
multiclass VWREDO_FV_V<string opcodestr, bits<6> funct6> {
def _VS : VALUVV<funct6, OPFVV, opcodestr # ".vs">,
- Sched<[WriteVFWRedOV, ReadVFWRedOV, ReadVFWRedOV0, ReadVMask]>;
+ Sched<[WriteVFWRedOV_From_WorstCase, ReadVFWRedOV, ReadVFWRedOV0,
+ ReadVMask]>;
}
multiclass VMALU_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
def M : VALUVVNoVm<funct6, OPMVV, opcodestr #"." #vm #"m">,
- Sched<[WriteVMALUV_UpperBound, ReadVMALUV_UpperBound,
- ReadVMALUV_UpperBound]>;
+ Sched<[WriteVMALUV_WorstCase, ReadVMALUV_WorstCase,
+ ReadVMALUV_WorstCase]>;
}
multiclass VMSFS_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
- Sched<[WriteVMSFSV_UpperBound, ReadVMSFSV_UpperBound, ReadVMask]>;
+ Sched<[WriteVMSFSV_WorstCase, ReadVMSFSV_WorstCase, ReadVMask]>;
}
multiclass VMIOT_MV_V<string opcodestr, bits<6> funct6, bits<5> vs1> {
def "" : VALUVs2<funct6, vs1, OPMVV, opcodestr>,
- Sched<[WriteVMIotV_UpperBound, ReadVMIotV_UpperBound, ReadVMask]>;
-}
-
-multiclass VSHT_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVShiftV_UpperBound, ReadVShiftV_UpperBound,
- ReadVShiftV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVShiftX_UpperBound, ReadVShiftV_UpperBound,
- ReadVShiftX_UpperBound, ReadVMask]>;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
- Sched<[WriteVShiftI_UpperBound, ReadVShiftV_UpperBound,
+ Sched<[WriteVMIotV_WorstCase, ReadVMIotV_WorstCase, ReadVMask]>;
+}
+
+multiclass VSHT_IV_V_X_I<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVShiftV_WorstCase, ReadVShiftV_WorstCase,
+ ReadVShiftV_WorstCase, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVShiftX_WorstCase, ReadVShiftV_WorstCase,
+ ReadVShiftX_WorstCase, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # ".vi", uimm5>,
+ Sched<[WriteVShiftI_WorstCase, ReadVShiftV_WorstCase,
ReadVMask]>;
}
-multiclass VNSHT_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVNShiftV_UpperBound, ReadVNShiftV_UpperBound,
- ReadVNShiftV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVNShiftX_UpperBound, ReadVNShiftV_UpperBound,
- ReadVNShiftX_UpperBound, ReadVMask]>;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
- Sched<[WriteVNShiftI_UpperBound, ReadVNShiftV_UpperBound,
+multiclass VNSHT_IV_V_X_I<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # ".wv">,
+ Sched<[WriteVNShiftV_WorstCase, ReadVNShiftV_WorstCase,
+ ReadVNShiftV_WorstCase, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # ".wx">,
+ Sched<[WriteVNShiftX_WorstCase, ReadVNShiftV_WorstCase,
+ ReadVNShiftX_WorstCase, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # ".wi", uimm5>,
+ Sched<[WriteVNShiftI_WorstCase, ReadVNShiftV_WorstCase,
ReadVMask]>;
}
-multiclass VCMP_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVICmpV_UpperBound, ReadVICmpV_UpperBound,
- ReadVICmpV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVICmpX_UpperBound, ReadVICmpV_UpperBound,
- ReadVICmpX_UpperBound, ReadVMask]>;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
- Sched<[WriteVICmpI_UpperBound, ReadVICmpV_UpperBound,
- ReadVMask]>;
+multiclass VMINMAX_IV_V_X<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVIMinMaxV_WorstCase, ReadVIMinMaxV_WorstCase,
+ ReadVIMinMaxV_WorstCase, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVIMinMaxX_WorstCase, ReadVIMinMaxV_WorstCase,
+ ReadVIMinMaxX_WorstCase, ReadVMask]>;
+}
+
+multiclass VCMP_IV_V<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVICmpV_WorstCase, ReadVICmpV_WorstCase,
+ ReadVICmpV_WorstCase, ReadVMask]>;
}
-multiclass VCMP_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVICmpV_UpperBound, ReadVICmpV_UpperBound,
- ReadVICmpX_UpperBound, ReadVMask]>;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
- Sched<[WriteVICmpI_UpperBound, ReadVICmpV_UpperBound,
+multiclass VCMP_IV_X<string opcodestr, bits<6> funct6> {
+ def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVICmpX_WorstCase, ReadVICmpV_WorstCase,
+ ReadVICmpX_WorstCase, ReadVMask]>;
+}
+
+multiclass VCMP_IV_I<string opcodestr, bits<6> funct6> {
+ def I : VALUVI<funct6, opcodestr # ".vi", simm5>,
+ Sched<[WriteVICmpI_WorstCase, ReadVICmpV_WorstCase,
ReadVMask]>;
}
-multiclass VCMP_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVICmpV_UpperBound, ReadVICmpV_UpperBound,
- ReadVICmpV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVICmpX_UpperBound, ReadVICmpV_UpperBound,
- ReadVICmpX_UpperBound, ReadVMask]>;
+multiclass VCMP_IV_V_X_I<string opcodestr, bits<6> funct6>
+ : VCMP_IV_V<opcodestr, funct6>,
+ VCMP_IV_X<opcodestr, funct6>,
+ VCMP_IV_I<opcodestr, funct6>;
+
+multiclass VCMP_IV_X_I<string opcodestr, bits<6> funct6>
+ : VCMP_IV_X<opcodestr, funct6>,
+ VCMP_IV_I<opcodestr, funct6>;
+
+multiclass VCMP_IV_V_X<string opcodestr, bits<6> funct6>
+ : VCMP_IV_V<opcodestr, funct6>,
+ VCMP_IV_X<opcodestr, funct6>;
+
+multiclass VMUL_MV_V_X<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">,
+ Sched<[WriteVIMulV_WorstCase, ReadVIMulV_WorstCase,
+ ReadVIMulV_WorstCase, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
+ Sched<[WriteVIMulX_WorstCase, ReadVIMulV_WorstCase,
+ ReadVIMulX_WorstCase, ReadVMask]>;
}
-multiclass VMUL_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVIMulV_UpperBound, ReadVIMulV_UpperBound,
- ReadVIMulV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVIMulX_UpperBound, ReadVIMulV_UpperBound,
- ReadVIMulX_UpperBound, ReadVMask]>;
+multiclass VWMUL_MV_V_X<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">,
+ Sched<[WriteVIWMulV_WorstCase, ReadVIWMulV_WorstCase,
+ ReadVIWMulV_WorstCase, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
+ Sched<[WriteVIWMulX_WorstCase, ReadVIWMulV_WorstCase,
+ ReadVIWMulX_WorstCase, ReadVMask]>;
}
-multiclass VWMUL_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVIWMulV_UpperBound, ReadVIWMulV_UpperBound,
- ReadVIWMulV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVIWMulX_UpperBound, ReadVIWMulV_UpperBound,
- ReadVIWMulX_UpperBound, ReadVMask]>;
+multiclass VDIV_MV_V_X<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">,
+ Sched<[WriteVIDivV_WorstCase, ReadVIDivV_WorstCase,
+ ReadVIDivV_WorstCase, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
+ Sched<[WriteVIDivX_WorstCase, ReadVIDivV_WorstCase,
+ ReadVIDivX_WorstCase, ReadVMask]>;
}
-multiclass VDIV_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVIDivV_UpperBound, ReadVIDivV_UpperBound,
- ReadVIDivV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVIDivX_UpperBound, ReadVIDivV_UpperBound,
- ReadVIDivX_UpperBound, ReadVMask]>;
-}
-
-multiclass VSALU_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVSALUV_UpperBound, ReadVSALUV_UpperBound,
- ReadVSALUV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVSALUX_UpperBound, ReadVSALUV_UpperBound,
- ReadVSALUX_UpperBound, ReadVMask]>;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
- Sched<[WriteVSALUI_UpperBound, ReadVSALUV_UpperBound,
- ReadVMask]>;
+multiclass VSALU_IV_V_X<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVSALUV_WorstCase, ReadVSALUV_WorstCase,
+ ReadVSALUV_WorstCase, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVSALUX_WorstCase, ReadVSALUV_WorstCase,
+ ReadVSALUX_WorstCase, ReadVMask]>;
}
-multiclass VSALU_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVSALUV_UpperBound, ReadVSALUV_UpperBound,
- ReadVSALUV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVSALUX_UpperBound, ReadVSALUV_UpperBound,
- ReadVSALUX_UpperBound, ReadVMask]>;
+multiclass VSALU_IV_V_X_I<string opcodestr, bits<6> funct6>
+ : VSALU_IV_V_X<opcodestr, funct6> {
+ def I : VALUVI<funct6, opcodestr # ".vi", simm5>,
+ Sched<[WriteVSALUI_WorstCase, ReadVSALUV_WorstCase,
+ ReadVMask]>;
}
-multiclass VAALU_MV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPMVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVAALUV_UpperBound, ReadVAALUV_UpperBound,
- ReadVAALUV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVAALUX_UpperBound, ReadVAALUV_UpperBound,
- ReadVAALUX_UpperBound, ReadVMask]>;
-}
-
-multiclass VSMUL_IV_V_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVSMulV_UpperBound, ReadVSMulV_UpperBound,
- ReadVSMulV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVSMulX_UpperBound, ReadVSMulV_UpperBound,
- ReadVSMulX_UpperBound, ReadVMask]>;
-}
-
-multiclass VSSHF_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVSShiftV_UpperBound, ReadVSShiftV_UpperBound,
- ReadVSShiftV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVSShiftX_UpperBound, ReadVSShiftV_UpperBound,
- ReadVSShiftX_UpperBound, ReadVMask]>;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
- Sched<[WriteVSShiftI_UpperBound, ReadVSShiftV_UpperBound,
+multiclass VAALU_MV_V_X<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # ".vv">,
+ Sched<[WriteVAALUV_WorstCase, ReadVAALUV_WorstCase,
+ ReadVAALUV_WorstCase, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
+ Sched<[WriteVAALUX_WorstCase, ReadVAALUV_WorstCase,
+ ReadVAALUX_WorstCase, ReadVMask]>;
+}
+
+multiclass VSMUL_IV_V_X<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVSMulV_WorstCase, ReadVSMulV_WorstCase,
+ ReadVSMulV_WorstCase, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVSMulX_WorstCase, ReadVSMulV_WorstCase,
+ ReadVSMulX_WorstCase, ReadVMask]>;
+}
+
+multiclass VSSHF_IV_V_X_I<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVSShiftV_WorstCase, ReadVSShiftV_WorstCase,
+ ReadVSShiftV_WorstCase, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVSShiftX_WorstCase, ReadVSShiftV_WorstCase,
+ ReadVSShiftX_WorstCase, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # ".vi", uimm5>,
+ Sched<[WriteVSShiftI_WorstCase, ReadVSShiftV_WorstCase,
ReadVMask]>;
}
-multiclass VNCLP_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVNClipV_UpperBound, ReadVNClipV_UpperBound,
- ReadVNClipV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVNClipX_UpperBound, ReadVNClipV_UpperBound,
- ReadVNClipX_UpperBound, ReadVMask]>;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
- Sched<[WriteVNClipI_UpperBound, ReadVNClipV_UpperBound,
+multiclass VNCLP_IV_V_X_I<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # ".wv">,
+ Sched<[WriteVNClipV_WorstCase, ReadVNClipV_WorstCase,
+ ReadVNClipV_WorstCase, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # ".wx">,
+ Sched<[WriteVNClipX_WorstCase, ReadVNClipV_WorstCase,
+ ReadVNClipX_WorstCase, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # ".wi", uimm5>,
+ Sched<[WriteVNClipI_WorstCase, ReadVNClipV_WorstCase,
ReadVMask]>;
}
-multiclass VSLD_IV_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVISlideX_UpperBound, ReadVISlideV_UpperBound,
- ReadVISlideX_UpperBound, ReadVMask]>;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
- Sched<[WriteVISlideI_UpperBound, ReadVISlideV_UpperBound,
+multiclass VSLD_IV_X_I<string opcodestr, bits<6> funct6> {
+ def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVISlideX_WorstCase, ReadVISlideV_WorstCase,
+ ReadVISlideX_WorstCase, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # ".vi", uimm5>,
+ Sched<[WriteVISlideI_WorstCase, ReadVISlideV_WorstCase,
ReadVMask]>;
}
-multiclass VSLD1_MV_X<string opcodestr, bits<6> funct6, string vw = "v"> {
- def X : VALUVX<funct6, OPMVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVISlide1X_UpperBound, ReadVISlideV_UpperBound,
- ReadVISlideX_UpperBound, ReadVMask]>;
+multiclass VSLD1_MV_X<string opcodestr, bits<6> funct6> {
+ def X : VALUVX<funct6, OPMVX, opcodestr # ".vx">,
+ Sched<[WriteVISlide1X_WorstCase, ReadVISlideV_WorstCase,
+ ReadVISlideX_WorstCase, ReadVMask]>;
}
-multiclass VSLD1_FV_F<string opcodestr, bits<6> funct6, string vw = "v"> {
- def F : VALUVF<funct6, OPFVF, opcodestr # "." # vw # "f">,
- Sched<[WriteVFSlide1F_UpperBound, ReadVFSlideV_UpperBound,
- ReadVFSlideF_UpperBound, ReadVMask]>;
-}
-
-multiclass VGTR_IV_V_X_I<string opcodestr, bits<6> funct6, Operand optype = simm5, string vw = "v"> {
- def V : VALUVV<funct6, OPIVV, opcodestr # "." # vw # "v">,
- Sched<[WriteVGatherV_UpperBound, ReadVGatherV_UpperBound,
- ReadVGatherV_UpperBound, ReadVMask]>;
- def X : VALUVX<funct6, OPIVX, opcodestr # "." # vw # "x">,
- Sched<[WriteVGatherX_UpperBound, ReadVGatherV_UpperBound,
- ReadVGatherX_UpperBound, ReadVMask]>;
- def I : VALUVI<funct6, opcodestr # "." # vw # "i", optype>,
- Sched<[WriteVGatherI_UpperBound, ReadVGatherV_UpperBound,
+multiclass VSLD1_FV_F<string opcodestr, bits<6> funct6> {
+ def F : VALUVF<funct6, OPFVF, opcodestr # ".vf">,
+ Sched<[WriteVFSlide1F_WorstCase, ReadVFSlideV_WorstCase,
+ ReadVFSlideF_WorstCase, ReadVMask]>;
+}
+
+multiclass VGTR_IV_V_X_I<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPIVV, opcodestr # ".vv">,
+ Sched<[WriteVRGatherVV_WorstCase, ReadVRGatherVV_data_WorstCase,
+ ReadVRGatherVV_index_WorstCase, ReadVMask]>;
+ def X : VALUVX<funct6, OPIVX, opcodestr # ".vx">,
+ Sched<[WriteVRGatherVX_WorstCase, ReadVRGatherVX_data_WorstCase,
+ ReadVRGatherVX_index_WorstCase, ReadVMask]>;
+ def I : VALUVI<funct6, opcodestr # ".vi", uimm5>,
+ Sched<[WriteVRGatherVI_WorstCase, ReadVRGatherVI_data_WorstCase,
ReadVMask]>;
}
multiclass VCPR_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> {
def M : VALUVVNoVm<funct6, OPMVV, opcodestr # "." # vm # "m">,
- Sched<[WriteVCompressV_UpperBound, ReadVCompressV_UpperBound,
- ReadVCompressV_UpperBound]>;
+ Sched<[WriteVCompressV_WorstCase, ReadVCompressV_WorstCase,
+ ReadVCompressV_WorstCase]>;
}
multiclass VWholeLoadN<bits<3> nf, string opcodestr, RegisterClass VRC> {
@@ -958,12 +978,12 @@ multiclass VWholeLoadN<bits<3> nf, string opcodestr, RegisterClass VRC> {
defvar s = !cast<SchedWrite>("WriteVLD" # !add(nf, 1) # "R");
def E # l # _V : VWholeLoad<nf, w, opcodestr # "e" # l # ".v", VRC>,
- Sched<[s, ReadVLDX_UpperBound]>;
+ Sched<[s, ReadVLDX]>;
}
}
multiclass VWholeLoadEEW64<bits<3> nf, string opcodestr, RegisterClass VRC, SchedReadWrite schedrw> {
def E64_V : VWholeLoad<nf, LSWidth64, opcodestr # "e64.v", VRC>,
- Sched<[schedrw, ReadVLDX_UpperBound]>;
+ Sched<[schedrw, ReadVLDX]>;
}
//===----------------------------------------------------------------------===//
@@ -987,15 +1007,15 @@ foreach eew = [8, 16, 32] in {
defvar w = !cast<RISCVWidth>("LSWidth" # eew);
// Vector Unit-Stride Instructions
- def VLE#eew#_V : VUnitStrideLoad<w, "vle"#eew#".v">, VLESched<UpperBoundLMUL>;
- def VSE#eew#_V : VUnitStrideStore<w, "vse"#eew#".v">, VSESched<UpperBoundLMUL>;
+ def VLE#eew#_V : VUnitStrideLoad<w, "vle"#eew#".v">, VLESched;
+ def VSE#eew#_V : VUnitStrideStore<w, "vse"#eew#".v">, VSESched;
// Vector Unit-Stride Fault-only-First Loads
- def VLE#eew#FF_V : VUnitStrideLoadFF<w, "vle"#eew#"ff.v">, VLFSched<UpperBoundLMUL>;
+ def VLE#eew#FF_V : VUnitStrideLoadFF<w, "vle"#eew#"ff.v">, VLFSched;
// Vector Strided Instructions
- def VLSE#eew#_V : VStridedLoad<w, "vlse"#eew#".v">, VLSSched<eew, UpperBoundLMUL>;
- def VSSE#eew#_V : VStridedStore<w, "vsse"#eew#".v">, VSSSched<eew, UpperBoundLMUL>;
+ def VLSE#eew#_V : VStridedLoad<w, "vlse"#eew#".v">, VLSSched<eew>;
+ def VSSE#eew#_V : VStridedStore<w, "vsse"#eew#".v">, VSSSched<eew>;
}
defm "" : VIndexLoadStore<[8, 16, 32]>;
@@ -1003,9 +1023,9 @@ defm "" : VIndexLoadStore<[8, 16, 32]>;
let Predicates = [HasVInstructions] in {
def VLM_V : VUnitStrideLoadMask<"vlm.v">,
- Sched<[WriteVLDM_UpperBound, ReadVLDX_UpperBound]>;
+ Sched<[WriteVLDM_WorstCase, ReadVLDX]>;
def VSM_V : VUnitStrideStoreMask<"vsm.v">,
- Sched<[WriteVSTM_UpperBound, ReadVSTM_UpperBound, ReadVSTX_UpperBound]>;
+ Sched<[WriteVSTM_WorstCase, ReadVSTM_WorstCase, ReadVSTX]>;
def : InstAlias<"vle1.v $vd, (${rs1})",
(VLM_V VR:$vd, GPR:$rs1), 0>;
def : InstAlias<"vse1.v $vs3, (${rs1})",
@@ -1017,13 +1037,13 @@ defm VL4R : VWholeLoadN<3, "vl4r", VRM4>;
defm VL8R : VWholeLoadN<7, "vl8r", VRM8>;
def VS1R_V : VWholeStore<0, "vs1r.v", VR>,
- Sched<[WriteVST1R, ReadVST1R, ReadVSTX_UpperBound]>;
+ Sched<[WriteVST1R, ReadVST1R, ReadVSTX]>;
def VS2R_V : VWholeStore<1, "vs2r.v", VRM2>,
- Sched<[WriteVST2R, ReadVST2R, ReadVSTX_UpperBound]>;
+ Sched<[WriteVST2R, ReadVST2R, ReadVSTX]>;
def VS4R_V : VWholeStore<3, "vs4r.v", VRM4>,
- Sched<[WriteVST4R, ReadVST4R, ReadVSTX_UpperBound]>;
+ Sched<[WriteVST4R, ReadVST4R, ReadVSTX]>;
def VS8R_V : VWholeStore<7, "vs8r.v", VRM8>,
- Sched<[WriteVST8R, ReadVST8R, ReadVSTX_UpperBound]>;
+ Sched<[WriteVST8R, ReadVST8R, ReadVSTX]>;
def : InstAlias<"vl1r.v $vd, (${rs1})", (VL1RE8_V VR:$vd, GPR:$rs1)>;
def : InstAlias<"vl2r.v $vd, (${rs1})", (VL2RE8_V VRM2:$vd, GPR:$rs1)>;
@@ -1034,19 +1054,19 @@ def : InstAlias<"vl8r.v $vd, (${rs1})", (VL8RE8_V VRM8:$vd, GPR:$rs1)>;
let Predicates = [HasVInstructionsI64] in {
// Vector Unit-Stride Instructions
def VLE64_V : VUnitStrideLoad<LSWidth64, "vle64.v">,
- VLESched<UpperBoundLMUL>;
+ VLESched;
def VLE64FF_V : VUnitStrideLoadFF<LSWidth64, "vle64ff.v">,
- VLFSched<UpperBoundLMUL>;
+ VLFSched;
def VSE64_V : VUnitStrideStore<LSWidth64, "vse64.v">,
- VSESched<UpperBoundLMUL>;
+ VSESched;
// Vector Strided Instructions
def VLSE64_V : VStridedLoad<LSWidth64, "vlse64.v">,
- VLSSched<32, UpperBoundLMUL>;
+ VLSSched<32>;
def VSSE64_V : VStridedStore<LSWidth64, "vsse64.v">,
- VSSSched<64, UpperBoundLMUL>;
+ VSSSched<64>;
defm VL1R: VWholeLoadEEW64<0, "vl1r", VR, WriteVLD1R>;
defm VL2R: VWholeLoadEEW64<1, "vl2r", VRM2, WriteVLD2R>;
@@ -1074,10 +1094,10 @@ def : InstAlias<"vneg.v $vd, $vs", (VRSUB_VX VR:$vd, VR:$vs, X0, zero_reg)>;
// if masked), otherwise an illegal instruction exception is raised.
let Constraints = "@earlyclobber $vd" in {
let RVVConstraint = WidenV in {
-defm VWADDU_V : VALU_MV_V_X<"vwaddu", 0b110000>;
-defm VWSUBU_V : VALU_MV_V_X<"vwsubu", 0b110010>;
-defm VWADD_V : VALU_MV_V_X<"vwadd", 0b110001>;
-defm VWSUB_V : VALU_MV_V_X<"vwsub", 0b110011>;
+defm VWADDU_V : VALU_MV_V_X<"vwaddu", 0b110000, "v">;
+defm VWSUBU_V : VALU_MV_V_X<"vwsubu", 0b110010, "v">;
+defm VWADD_V : VALU_MV_V_X<"vwadd", 0b110001, "v">;
+defm VWSUB_V : VALU_MV_V_X<"vwsub", 0b110011, "v">;
} // RVVConstraint = WidenV
// Set earlyclobber for following instructions for second and mask operands.
// This has the downside that the earlyclobber constraint is too coarse and
@@ -1131,9 +1151,9 @@ def : InstAlias<"vnot.v $vd, $vs",
(VXOR_VI VR:$vd, VR:$vs, -1, zero_reg)>;
// Vector Single-Width Bit Shift Instructions
-defm VSLL_V : VSHT_IV_V_X_I<"vsll", 0b100101, uimm5>;
-defm VSRL_V : VSHT_IV_V_X_I<"vsrl", 0b101000, uimm5>;
-defm VSRA_V : VSHT_IV_V_X_I<"vsra", 0b101001, uimm5>;
+defm VSLL_V : VSHT_IV_V_X_I<"vsll", 0b100101>;
+defm VSRL_V : VSHT_IV_V_X_I<"vsrl", 0b101000>;
+defm VSRA_V : VSHT_IV_V_X_I<"vsra", 0b101001>;
// Vector Narrowing Integer Right Shift Instructions
// Refer to 11.3. Narrowing Vector Arithmetic Instructions
@@ -1141,8 +1161,8 @@ defm VSRA_V : VSHT_IV_V_X_I<"vsra", 0b101001, uimm5>;
// vector register group (specified by vs2). The destination vector register
// group cannot overlap the mask register if used, unless LMUL=1.
let Constraints = "@earlyclobber $vd" in {
-defm VNSRL_W : VNSHT_IV_V_X_I<"vnsrl", 0b101100, uimm5, "w">;
-defm VNSRA_W : VNSHT_IV_V_X_I<"vnsra", 0b101101, uimm5, "w">;
+defm VNSRL_W : VNSHT_IV_V_X_I<"vnsrl", 0b101100>;
+defm VNSRA_W : VNSHT_IV_V_X_I<"vnsra", 0b101101>;
} // Constraints = "@earlyclobber $vd"
def : InstAlias<"vncvt.x.x.w $vd, $vs$vm",
@@ -1217,10 +1237,10 @@ def PseudoVMSGE_VX_M_T : Pseudo<(outs VR:$vd, VRNoV0:$scratch),
}
// Vector Integer Min/Max Instructions
-defm VMINU_V : VCMP_IV_V_X<"vminu", 0b000100>;
-defm VMIN_V : VCMP_IV_V_X<"vmin", 0b000101>;
-defm VMAXU_V : VCMP_IV_V_X<"vmaxu", 0b000110>;
-defm VMAX_V : VCMP_IV_V_X<"vmax", 0b000111>;
+defm VMINU_V : VMINMAX_IV_V_X<"vminu", 0b000100>;
+defm VMIN_V : VMINMAX_IV_V_X<"vmin", 0b000101>;
+defm VMAXU_V : VMINMAX_IV_V_X<"vmaxu", 0b000110>;
+defm VMAX_V : VMINMAX_IV_V_X<"vmax", 0b000111>;
// Vector Single-Width Integer Multiply Instructions
defm VMUL_V : VMUL_MV_V_X<"vmul", 0b100101>;
@@ -1264,15 +1284,15 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vs2 = 0, vm = 1,
// op vd, vs1
def VMV_V_V : RVInstVV<0b010111, OPIVV, (outs VR:$vd),
(ins VR:$vs1), "vmv.v.v", "$vd, $vs1">,
- Sched<[WriteVIMovV_UpperBound, ReadVIMovV_UpperBound]>;
+ Sched<[WriteVIMovV_WorstCase, ReadVIMovV_WorstCase]>;
// op vd, rs1
def VMV_V_X : RVInstVX<0b010111, OPIVX, (outs VR:$vd),
(ins GPR:$rs1), "vmv.v.x", "$vd, $rs1">,
- Sched<[WriteVIMovX_UpperBound, ReadVIMovX_UpperBound]>;
+ Sched<[WriteVIMovX_WorstCase, ReadVIMovX_WorstCase]>;
// op vd, imm
def VMV_V_I : RVInstIVI<0b010111, (outs VR:$vd),
(ins simm5:$imm), "vmv.v.i", "$vd, $imm">,
- Sched<[WriteVIMovI_UpperBound]>;
+ Sched<[WriteVIMovI_WorstCase]>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
// Vector Fixed-Point Arithmetic Instructions
@@ -1291,13 +1311,13 @@ defm VASUB_V : VAALU_MV_V_X<"vasub", 0b001011>;
defm VSMUL_V : VSMUL_IV_V_X<"vsmul", 0b100111>;
// Vector Single-Width Scaling Shift Instructions
-defm VSSRL_V : VSSHF_IV_V_X_I<"vssrl", 0b101010, uimm5>;
-defm VSSRA_V : VSSHF_IV_V_X_I<"vssra", 0b101011, uimm5>;
+defm VSSRL_V : VSSHF_IV_V_X_I<"vssrl", 0b101010>;
+defm VSSRA_V : VSSHF_IV_V_X_I<"vssra", 0b101011>;
// Vector Narrowing Fixed-Point Clip Instructions
let Constraints = "@earlyclobber $vd" in {
-defm VNCLIPU_W : VNCLP_IV_V_X_I<"vnclipu", 0b101110, uimm5, "w">;
-defm VNCLIP_W : VNCLP_IV_V_X_I<"vnclip", 0b101111, uimm5, "w">;
+defm VNCLIPU_W : VNCLP_IV_V_X_I<"vnclipu", 0b101110>;
+defm VNCLIP_W : VNCLP_IV_V_X_I<"vnclip", 0b101111>;
} // Constraints = "@earlyclobber $vd"
} // Predicates = [HasVInstructions]
@@ -1314,8 +1334,8 @@ let Constraints = "@earlyclobber $vd",
Uses = [FRM],
mayRaiseFPException = true in {
let RVVConstraint = WidenV in {
-defm VFWADD_V : VWALU_FV_V_F<"vfwadd", 0b110000>;
-defm VFWSUB_V : VWALU_FV_V_F<"vfwsub", 0b110010>;
+defm VFWADD_V : VWALU_FV_V_F<"vfwadd", 0b110000, "v">;
+defm VFWSUB_V : VWALU_FV_V_F<"vfwsub", 0b110010, "v">;
} // RVVConstraint = WidenV
// Set earlyclobber for following instructions for second and mask operands.
// This has the downside that the earlyclobber constraint is too coarse and
@@ -1331,7 +1351,7 @@ defm VFWSUB_W : VWALU_FV_V_F<"vfwsub", 0b110110, "w">;
let Uses = [FRM], mayRaiseFPException = true in {
defm VFMUL_V : VMUL_FV_V_F<"vfmul", 0b100100>;
defm VFDIV_V : VDIV_FV_V_F<"vfdiv", 0b100000>;
-defm VFRDIV_V : VRDIV_FV_F<"vfrdiv", 0b100001>;
+defm VFRDIV_V : VDIV_FV_F<"vfrdiv", 0b100001>;
}
// Vector Widening Floating-Point Multiply
@@ -1372,8 +1392,8 @@ defm VFRSQRT7_V : VRCP_FV_VS2<"vfrsqrt7.v", 0b010011, 0b00100>;
// Vector Floating-Point MIN/MAX Instructions
let mayRaiseFPException = true in {
-defm VFMIN_V : VCMP_FV_V_F<"vfmin", 0b000100>;
-defm VFMAX_V : VCMP_FV_V_F<"vfmax", 0b000110>;
+defm VFMIN_V : VMINMAX_FV_V_F<"vfmin", 0b000100>;
+defm VFMAX_V : VMINMAX_FV_V_F<"vfmax", 0b000110>;
}
// Vector Floating-Point Sign-Injection Instructions
@@ -1415,15 +1435,15 @@ let vm = 0 in
def VFMERGE_VFM : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
(ins VR:$vs2, FPR32:$rs1, VMV0:$v0),
"vfmerge.vfm", "$vd, $vs2, $rs1, v0">,
- Sched<[WriteVFMergeV_UpperBound, ReadVFMergeV_UpperBound,
- ReadVFMergeF_UpperBound, ReadVMask]>;
+ Sched<[WriteVFMergeV_WorstCase, ReadVFMergeV_WorstCase,
+ ReadVFMergeF_WorstCase, ReadVMask]>;
// Vector Floating-Point Move Instruction
let RVVConstraint = NoConstraint in
let vm = 1, vs2 = 0 in
def VFMV_V_F : RVInstVX<0b010111, OPFVF, (outs VR:$vd),
(ins FPR32:$rs1), "vfmv.v.f", "$vd, $rs1">,
- Sched<[WriteVFMovV_UpperBound, ReadVFMovF_UpperBound]>;
+ Sched<[WriteVFMovV_WorstCase, ReadVFMovF_WorstCase]>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
@@ -1476,14 +1496,14 @@ let Predicates = [HasVInstructions] in {
// Vector Single-Width Integer Reduction Instructions
let RVVConstraint = NoConstraint in {
-defm VREDSUM : VRED_MV_V<"vredsum", 0b000000>;
-defm VREDMAXU : VRED_MV_V<"vredmaxu", 0b000110>;
-defm VREDMAX : VRED_MV_V<"vredmax", 0b000111>;
-defm VREDMINU : VRED_MV_V<"vredminu", 0b000100>;
-defm VREDMIN : VRED_MV_V<"vredmin", 0b000101>;
-defm VREDAND : VRED_MV_V<"vredand", 0b000001>;
-defm VREDOR : VRED_MV_V<"vredor", 0b000010>;
-defm VREDXOR : VRED_MV_V<"vredxor", 0b000011>;
+defm VREDSUM : VRED_MV_V<"vredsum", 0b000000>;
+defm VREDMAXU : VREDMINMAX_MV_V<"vredmaxu", 0b000110>;
+defm VREDMAX : VREDMINMAX_MV_V<"vredmax", 0b000111>;
+defm VREDMINU : VREDMINMAX_MV_V<"vredminu", 0b000100>;
+defm VREDMIN : VREDMINMAX_MV_V<"vredmin", 0b000101>;
+defm VREDAND : VRED_MV_V<"vredand", 0b000001>;
+defm VREDOR : VRED_MV_V<"vredor", 0b000010>;
+defm VREDXOR : VRED_MV_V<"vredxor", 0b000011>;
} // RVVConstraint = NoConstraint
// Vector Widening Integer Reduction Instructions
@@ -1506,8 +1526,8 @@ defm VFREDOSUM : VREDO_FV_V<"vfredosum", 0b000011>;
defm VFREDUSUM : VRED_FV_V<"vfredusum", 0b000001>;
}
let mayRaiseFPException = true in {
-defm VFREDMAX : VRED_FV_V<"vfredmax", 0b000111>;
-defm VFREDMIN : VRED_FV_V<"vfredmin", 0b000101>;
+defm VFREDMAX : VREDMINMAX_FV_V<"vfredmax", 0b000111>;
+defm VFREDMIN : VREDMINMAX_FV_V<"vfredmin", 0b000101>;
}
} // RVVConstraint = NoConstraint
@@ -1564,14 +1584,14 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
def VCPOP_M : RVInstV<0b010000, 0b10000, OPMVV, (outs GPR:$vd),
(ins VR:$vs2, VMaskOp:$vm),
"vcpop.m", "$vd, $vs2$vm">,
- Sched<[WriteVMPopV_UpperBound, ReadVMPopV_UpperBound,
+ Sched<[WriteVMPopV_WorstCase, ReadVMPopV_WorstCase,
ReadVMask]>;
// vfirst find-first-set mask bit
def VFIRST_M : RVInstV<0b010000, 0b10001, OPMVV, (outs GPR:$vd),
(ins VR:$vs2, VMaskOp:$vm),
"vfirst.m", "$vd, $vs2$vm">,
- Sched<[WriteVMFFSV_UpperBound, ReadVMFFSV_UpperBound,
+ Sched<[WriteVMFFSV_WorstCase, ReadVMFFSV_WorstCase,
ReadVMask]>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
@@ -1598,18 +1618,17 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
let vs2 = 0 in
def VID_V : RVInstV<0b010100, 0b10001, OPMVV, (outs VR:$vd),
(ins VMaskOp:$vm), "vid.v", "$vd$vm">,
- Sched<[WriteVMIdxV_UpperBound, ReadVMask]>;
+ Sched<[WriteVMIdxV_WorstCase, ReadVMask]>;
// Integer Scalar Move Instructions
let vm = 1, RVVConstraint = NoConstraint in {
def VMV_X_S : RVInstV<0b010000, 0b00000, OPMVV, (outs GPR:$vd),
(ins VR:$vs2), "vmv.x.s", "$vd, $vs2">,
- Sched<[WriteVIMovVX_UpperBound, ReadVIMovVX_UpperBound]>;
+ Sched<[WriteVIMovVX, ReadVIMovVX]>;
let Constraints = "$vd = $vd_wb" in
def VMV_S_X : RVInstV2<0b010000, 0b00000, OPMVX, (outs VR:$vd_wb),
(ins VR:$vd, GPR:$rs1), "vmv.s.x", "$vd, $rs1">,
- Sched<[WriteVIMovXV_UpperBound, ReadVIMovXV_UpperBound,
- ReadVIMovXX_UpperBound]>;
+ Sched<[WriteVIMovXV, ReadVIMovXV, ReadVIMovXX]>;
}
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
@@ -1623,12 +1642,11 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1,
// Floating-Point Scalar Move Instructions
def VFMV_F_S : RVInstV<0b010000, 0b00000, OPFVV, (outs FPR32:$vd),
(ins VR:$vs2), "vfmv.f.s", "$vd, $vs2">,
- Sched<[WriteVFMovVF_UpperBound, ReadVFMovVF_UpperBound]>;
+ Sched<[WriteVFMovVF, ReadVFMovVF]>;
let Constraints = "$vd = $vd_wb" in
def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VR:$vd_wb),
(ins VR:$vd, FPR32:$rs1), "vfmv.s.f", "$vd, $rs1">,
- Sched<[WriteVFMovFV_UpperBound, ReadVFMovFV_UpperBound,
- ReadVFMovFX_UpperBound]>;
+ Sched<[WriteVFMovFV, ReadVFMovFV, ReadVFMovFX]>;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0, vm = 1
@@ -1637,10 +1655,10 @@ def VFMV_S_F : RVInstV2<0b010000, 0b00000, OPFVF, (outs VR:$vd_wb),
let Predicates = [HasVInstructions] in {
// Vector Slide Instructions
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
-defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110, uimm5>;
+defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110>;
defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
-defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, uimm5>;
+defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111>;
defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>;
} // Predicates = [HasVInstructions]
@@ -1654,10 +1672,10 @@ defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>;
let Predicates = [HasVInstructions] in {
// Vector Register Gather Instruction
let Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather in {
-defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100, uimm5>;
+defm VRGATHER_V : VGTR_IV_V_X_I<"vrgather", 0b001100>;
def VRGATHEREI16_VV : VALUVV<0b001110, OPIVV, "vrgatherei16.vv">,
- Sched<[WriteVGatherV_UpperBound, ReadVGatherV_UpperBound,
- ReadVGatherV_UpperBound]>;
+ Sched<[WriteVRGatherVV_WorstCase, ReadVRGatherVV_data_WorstCase,
+ ReadVRGatherVV_index_WorstCase]>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = Vrgather
// Vector Compress Instruction
@@ -1665,7 +1683,7 @@ let Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress in {
defm VCOMPRESS_V : VCPR_MV_Mask<"vcompress", 0b010111>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = Vcompress
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0,
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isMoveReg = 1,
RVVConstraint = NoConstraint in {
// A future extension may relax the vector register alignment restrictions.
foreach n = [1, 2, 4, 8] in {
@@ -1687,38 +1705,38 @@ let Predicates = [HasVInstructions] in {
def VLSEG#nf#E#eew#_V :
VUnitStrideSegmentLoad<!add(nf, -1), w, "vlseg"#nf#"e"#eew#".v">,
- VLSEGSched<nf, eew, UpperBoundLMUL>;
+ VLSEGSched<nf, eew>;
def VLSEG#nf#E#eew#FF_V :
VUnitStrideSegmentLoadFF<!add(nf, -1), w, "vlseg"#nf#"e"#eew#"ff.v">,
- VLSEGFFSched<nf, eew, UpperBoundLMUL>;
+ VLSEGFFSched<nf, eew>;
def VSSEG#nf#E#eew#_V :
VUnitStrideSegmentStore<!add(nf, -1), w, "vsseg"#nf#"e"#eew#".v">,
- VSSEGSched<nf, eew, UpperBoundLMUL>;
+ VSSEGSched<nf, eew>;
// Vector Strided Instructions
def VLSSEG#nf#E#eew#_V :
VStridedSegmentLoad<!add(nf, -1), w, "vlsseg"#nf#"e"#eew#".v">,
- VLSSEGSched<nf, eew, UpperBoundLMUL>;
+ VLSSEGSched<nf, eew>;
def VSSSEG#nf#E#eew#_V :
VStridedSegmentStore<!add(nf, -1), w, "vssseg"#nf#"e"#eew#".v">,
- VSSSEGSched<nf, eew, UpperBoundLMUL>;
+ VSSSEGSched<nf, eew>;
// Vector Indexed Instructions
def VLUXSEG#nf#EI#eew#_V :
VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, w,
"vluxseg"#nf#"ei"#eew#".v">,
- VLXSEGSched<nf, eew, "U", UpperBoundLMUL>;
+ VLXSEGSched<nf, eew, "U">;
def VLOXSEG#nf#EI#eew#_V :
VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, w,
"vloxseg"#nf#"ei"#eew#".v">,
- VLXSEGSched<nf, eew, "O", UpperBoundLMUL>;
+ VLXSEGSched<nf, eew, "O">;
def VSUXSEG#nf#EI#eew#_V :
VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, w,
"vsuxseg"#nf#"ei"#eew#".v">,
- VSXSEGSched<nf, eew, "U", UpperBoundLMUL>;
+ VSXSEGSched<nf, eew, "U">;
def VSOXSEG#nf#EI#eew#_V :
VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, w,
"vsoxseg"#nf#"ei"#eew#".v">,
- VSXSEGSched<nf, eew, "O", UpperBoundLMUL>;
+ VSXSEGSched<nf, eew, "O">;
}
}
} // Predicates = [HasVInstructions]
@@ -1728,21 +1746,21 @@ let Predicates = [HasVInstructionsI64] in {
// Vector Unit-strided Segment Instructions
def VLSEG#nf#E64_V :
VUnitStrideSegmentLoad<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64.v">,
- VLSEGSched<nf, 64, UpperBoundLMUL>;
+ VLSEGSched<nf, 64>;
def VLSEG#nf#E64FF_V :
VUnitStrideSegmentLoadFF<!add(nf, -1), LSWidth64, "vlseg"#nf#"e64ff.v">,
- VLSEGFFSched<nf, 64, UpperBoundLMUL>;
+ VLSEGFFSched<nf, 64>;
def VSSEG#nf#E64_V :
VUnitStrideSegmentStore<!add(nf, -1), LSWidth64, "vsseg"#nf#"e64.v">,
- VSSEGSched<nf, 64, UpperBoundLMUL>;
+ VSSEGSched<nf, 64>;
// Vector Strided Segment Instructions
def VLSSEG#nf#E64_V :
VStridedSegmentLoad<!add(nf, -1), LSWidth64, "vlsseg"#nf#"e64.v">,
- VLSSEGSched<nf, 64, UpperBoundLMUL>;
+ VLSSEGSched<nf, 64>;
def VSSSEG#nf#E64_V :
VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">,
- VSSSEGSched<nf, 64, UpperBoundLMUL>;
+ VSSSEGSched<nf, 64>;
}
} // Predicates = [HasVInstructionsI64]
let Predicates = [HasVInstructionsI64, IsRV64] in {
@@ -1751,19 +1769,19 @@ let Predicates = [HasVInstructionsI64, IsRV64] in {
def VLUXSEG #nf #EI64_V
: VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedUnord, LSWidth64,
"vluxseg" #nf #"ei64.v">,
- VLXSEGSched<nf, 64, "U", UpperBoundLMUL>;
+ VLXSEGSched<nf, 64, "U">;
def VLOXSEG #nf #EI64_V
: VIndexedSegmentLoad<!add(nf, -1), MOPLDIndexedOrder, LSWidth64,
"vloxseg" #nf #"ei64.v">,
- VLXSEGSched<nf, 64, "O", UpperBoundLMUL>;
+ VLXSEGSched<nf, 64, "O">;
def VSUXSEG #nf #EI64_V
: VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedUnord, LSWidth64,
"vsuxseg" #nf #"ei64.v">,
- VSXSEGSched<nf, 64, "U", UpperBoundLMUL>;
+ VSXSEGSched<nf, 64, "U">;
def VSOXSEG #nf #EI64_V
: VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, LSWidth64,
"vsoxseg" #nf #"ei64.v">,
- VSXSEGSched<nf, 64, "O", UpperBoundLMUL>;
+ VSXSEGSched<nf, 64, "O">;
}
} // Predicates = [HasVInstructionsI64, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 3f69b5e41cf1..f8b7e32fe34c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -11,6 +11,68 @@
///
/// This file is included from RISCVInstrInfoV.td
///
+/// Overview of our vector instruction pseudos. Many of the instructions
+/// have behavior which depends on the value of VTYPE. Several core aspects of
+/// the compiler - e.g. register allocation - depend on fields in this
+/// configuration register. The details of which fields matter differ by the
+/// specific instruction, but the common dimensions are:
+///
+/// LMUL/EMUL - Most instructions can write to differently sized register groups
+/// depending on LMUL.
+///
+/// Masked vs Unmasked - Many instructions which allow a mask disallow register
+/// overlap. As a result, masked vs unmasked require different register
+/// allocation constraints.
+///
+/// Policy - For each of mask and tail policy, there are three options:
+/// * "Undisturbed" - As defined in the specification, required to preserve the
+/// exact bit pattern of inactive lanes.
+/// * "Agnostic" - As defined in the specification, required to either preserve
+/// the exact bit pattern of inactive lanes, or produce the bit pattern -1 for
+/// those lanes. Note that each lane can make this choice independently.
+/// Instructions which produce masks (and only those instructions) also have the
+/// option of producing a result as-if VL had been VLMAX.
+/// * "Undefined" - The bit pattern of the inactive lanes is unspecified, and
+/// can be changed without impacting the semantics of the program. Note that
+/// this concept does not exist in the specification, and requires source
+/// knowledge to be preserved.
+///
+/// SEW - Some instructions have semantics which depend on SEW. This is
+/// relatively rare, and mostly impacts scheduling and cost estimation.
+///
+/// We have two techniques we use to represent the impact of these fields:
+/// * For fields which don't impact register classes, we largely use
+/// dummy operands on the pseudo instructions which convey information
+/// about the value of VTYPE.
+/// * For fields which do impact register classes (and a few bits of
+/// legacy - see policy discussion below), we define a family of pseudo
+/// instructions for each actual instruction. Said differently, we encode
+/// each of the preceding fields which are relevant for a given instruction
+/// in the opcode space.
+///
+/// Currently, the policy is represented via the following instrinsic families:
+/// * _MASK - Can represent all three policy states for both tail and mask. If
+/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise,
+/// policy operand and tablegen flags drive the interpretation. (If policy
+/// operand is not present - there are a couple, thought we're rapidly
+/// removing them - a non-undefined policy defaults to "tail agnostic", and
+/// "mask undisturbed". Since this is the only variant with a mask, all
+/// other variants are "mask undefined".
+/// * Unsuffixed w/ both passthrough and policy operand. Can represent all
+/// three policy states. If passthrough is IMPLICIT_DEF, then represents
+/// "undefined". Otherwise, policy operand and tablegen flags drive the
+/// interpretation.
+/// * Unsuffixed w/o passthrough or policy operand -- Does not have a
+/// passthrough operand, and thus represents the "undefined" state. Note
+/// that terminology in code frequently refers to these as "TA" which is
+/// confusing. We're in the process of migrating away from this
+/// representation.
+/// * _TU w/o policy operand -- Has a passthrough operand, and always
+/// represents the tail undisturbed state.
+/// * _TU w/policy operand - Can represent all three policy states. If
+/// passthrough is IMPLICIT_DEF, then represents "undefined". Otherwise,
+/// policy operand and tablegen flags drive the interpretation.
+///
//===----------------------------------------------------------------------===//
def riscv_vmv_x_s : SDNode<"RISCVISD::VMV_X_S",
@@ -39,8 +101,8 @@ def DecImm : SDNodeXForm<imm, [{
N->getValueType(0));
}]>;
-defvar TAIL_UNDISTURBED_MASK_UNDISTURBED = 0;
defvar TAIL_AGNOSTIC = 1;
+defvar TU_MU = 0;
defvar TA_MA = 3;
//===----------------------------------------------------------------------===//
@@ -48,27 +110,41 @@ defvar TA_MA = 3;
//===----------------------------------------------------------------------===//
class PseudoToVInst<string PseudoInst> {
- string VInst = !subst("_M8", "",
- !subst("_M4", "",
- !subst("_M2", "",
- !subst("_M1", "",
- !subst("_MF2", "",
- !subst("_MF4", "",
- !subst("_MF8", "",
- !subst("_B1", "",
- !subst("_B2", "",
- !subst("_B4", "",
- !subst("_B8", "",
- !subst("_B16", "",
- !subst("_B32", "",
- !subst("_B64", "",
- !subst("_MASK", "",
- !subst("_TIED", "",
- !subst("_TU", "",
- !subst("F16", "F",
- !subst("F32", "F",
- !subst("F64", "F",
- !subst("Pseudo", "", PseudoInst)))))))))))))))))))));
+ defvar AffixSubsts = [["Pseudo", ""],
+ ["_E64", ""],
+ ["_E32", ""],
+ ["_E16", ""],
+ ["_E8", ""],
+ ["_F64", "_F"],
+ ["_F32", "_F"],
+ ["_F16", "_F"],
+ ["_VF64", "_VF"],
+ ["_VF32", "_VF"],
+ ["_VF16", "_VF"],
+ ["_WF64", "_WF"],
+ ["_WF32", "_WF"],
+ ["_WF16", "_WF"],
+ ["_TU", ""],
+ ["_TIED", ""],
+ ["_MASK", ""],
+ ["_B64", ""],
+ ["_B32", ""],
+ ["_B16", ""],
+ ["_B8", ""],
+ ["_B4", ""],
+ ["_B2", ""],
+ ["_B1", ""],
+ ["_MF8", ""],
+ ["_MF4", ""],
+ ["_MF2", ""],
+ ["_M1", ""],
+ ["_M2", ""],
+ ["_M4", ""],
+ ["_M8", ""],
+ ["_SE", ""]
+ ];
+ string VInst = !foldl(PseudoInst, AffixSubsts, Acc, AffixSubst,
+ !subst(AffixSubst[0], AffixSubst[1], Acc));
}
// This class describes information associated to the LMUL.
@@ -101,8 +177,14 @@ defvar MxListF = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
// Used for widening and narrowing instructions as it doesn't contain M8.
defvar MxListW = [V_MF8, V_MF4, V_MF2, V_M1, V_M2, V_M4];
+// Used for widening reductions. It can contain M8 because wider operands are
+// scalar operands.
+defvar MxListWRed = MxList;
// For floating point which don't need MF8.
defvar MxListFW = [V_MF4, V_MF2, V_M1, V_M2, V_M4];
+// For widening floating-point Reduction as it doesn't contain MF8. It can
+// contain M8 because wider operands are scalar operands.
+defvar MxListFWRed = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
// Use for zext/sext.vf2
defvar MxListVF2 = [V_MF4, V_MF2, V_M1, V_M2, V_M4, V_M8];
@@ -120,17 +202,17 @@ class MxSet<int eew> {
!eq(eew, 64) : [V_M1, V_M2, V_M4, V_M8]);
}
-class FPR_Info<RegisterClass regclass, string fx, list<LMULInfo> mxlist,
- list<LMULInfo> mxlistfw> {
- RegisterClass fprclass = regclass;
- string FX = fx;
- list<LMULInfo> MxList = mxlist;
- list<LMULInfo> MxListFW = mxlistfw;
+class FPR_Info<int sew> {
+ RegisterClass fprclass = !cast<RegisterClass>("FPR" # sew);
+ string FX = "F" # sew;
+ int SEW = sew;
+ list<LMULInfo> MxList = MxSet<sew>.m;
+ list<LMULInfo> MxListFW = !if(!eq(sew, 64), [], !listremove(MxList, [V_M8]));
}
-def SCALAR_F16 : FPR_Info<FPR16, "F16", MxSet<16>.m, [V_MF4, V_MF2, V_M1, V_M2, V_M4]>;
-def SCALAR_F32 : FPR_Info<FPR32, "F32", MxSet<32>.m, [V_MF2, V_M1, V_M2, V_M4]>;
-def SCALAR_F64 : FPR_Info<FPR64, "F64", MxSet<64>.m, []>;
+def SCALAR_F16 : FPR_Info<16>;
+def SCALAR_F32 : FPR_Info<32>;
+def SCALAR_F64 : FPR_Info<64>;
defvar FPList = [SCALAR_F16, SCALAR_F32, SCALAR_F64];
@@ -144,19 +226,14 @@ class NFSet<LMULInfo m> {
true: [2, 3, 4, 5, 6, 7, 8]);
}
-class log2<int num> {
- int val = !if(!eq(num, 1), 0, !add(1, log2<!srl(num, 1)>.val));
-}
-
class octuple_to_str<int octuple> {
- string ret = !if(!eq(octuple, 1), "MF8",
- !if(!eq(octuple, 2), "MF4",
- !if(!eq(octuple, 4), "MF2",
- !if(!eq(octuple, 8), "M1",
- !if(!eq(octuple, 16), "M2",
- !if(!eq(octuple, 32), "M4",
- !if(!eq(octuple, 64), "M8",
- "NoDef")))))));
+ string ret = !cond(!eq(octuple, 1): "MF8",
+ !eq(octuple, 2): "MF4",
+ !eq(octuple, 4): "MF2",
+ !eq(octuple, 8): "M1",
+ !eq(octuple, 16): "M2",
+ !eq(octuple, 32): "M4",
+ !eq(octuple, 64): "M8");
}
def VLOpFrag : PatFrag<(ops), (XLenVT (VLOp (XLenVT AVL:$vl)))>;
@@ -181,12 +258,11 @@ class SegRegClass<LMULInfo m, int nf> {
//===----------------------------------------------------------------------===//
class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M,
- ValueType Scal = XLenVT, RegisterClass ScalarReg = GPR>
-{
+ ValueType Scal = XLenVT, RegisterClass ScalarReg = GPR> {
ValueType Vector = Vec;
ValueType Mask = Mas;
int SEW = Sew;
- int Log2SEW = log2<Sew>.val;
+ int Log2SEW = !logtwo(Sew);
VReg RegClass = Reg;
LMULInfo LMul = M;
ValueType Scalar = Scal;
@@ -204,8 +280,7 @@ class VTypeInfo<ValueType Vec, ValueType Mas, int Sew, VReg Reg, LMULInfo M,
class GroupVTypeInfo<ValueType Vec, ValueType VecM1, ValueType Mas, int Sew,
VReg Reg, LMULInfo M, ValueType Scal = XLenVT,
RegisterClass ScalarReg = GPR>
- : VTypeInfo<Vec, Mas, Sew, Reg, M, Scal, ScalarReg>
-{
+ : VTypeInfo<Vec, Mas, Sew, Reg, M, Scal, ScalarReg> {
ValueType VectorM1 = VecM1;
}
@@ -283,8 +358,7 @@ defset list<VTypeInfo> AllVectors = {
// This functor is used to obtain the int vector type that has the same SEW and
// multiplier as the input parameter type
-class GetIntVTypeInfo<VTypeInfo vti>
-{
+class GetIntVTypeInfo<VTypeInfo vti> {
// Equivalent integer vector type. Eg.
// VI8M1 → VI8M1 (identity)
// VF64M4 → VI64M4
@@ -317,14 +391,12 @@ defset list<MTypeInfo> AllMasks = {
def : MTypeInfo<vbool1_t, V_M8, "B64">;
}
-class VTypeInfoToWide<VTypeInfo vti, VTypeInfo wti>
-{
+class VTypeInfoToWide<VTypeInfo vti, VTypeInfo wti> {
VTypeInfo Vti = vti;
VTypeInfo Wti = wti;
}
-class VTypeInfoToFraction<VTypeInfo vti, VTypeInfo fti>
-{
+class VTypeInfoToFraction<VTypeInfo vti, VTypeInfo fti> {
VTypeInfo Vti = vti;
VTypeInfo Fti = fti;
}
@@ -422,14 +494,12 @@ defset list<VTypeInfoToWide> AllWidenableIntToFloatVectors = {
// This class holds the record of the RISCVVPseudoTable below.
// This represents the information we need in codegen for each pseudo.
// The definition should be consistent with `struct PseudoInfo` in
-// RISCVBaseInfo.h.
-class CONST8b<bits<8> val> {
- bits<8> V = val;
-}
-def InvalidIndex : CONST8b<0x80>;
+// RISCVInstrInfo.h.
class RISCVVPseudo {
Pseudo Pseudo = !cast<Pseudo>(NAME); // Used as a key.
Instruction BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+ // SEW = 0 is used to denote that the Pseudo is not SEW specific (or unknown).
+ bits<8> SEW = 0;
}
// The actual table.
@@ -445,8 +515,8 @@ def RISCVVPseudosTable : GenericTable {
def RISCVVInversePseudosTable : GenericTable {
let FilterClass = "RISCVVPseudo";
let CppTypeName = "PseudoInfo";
- let Fields = [ "Pseudo", "BaseInstr", "VLMul" ];
- let PrimaryKey = [ "BaseInstr", "VLMul" ];
+ let Fields = [ "Pseudo", "BaseInstr", "VLMul", "SEW"];
+ let PrimaryKey = [ "BaseInstr", "VLMul", "SEW"];
let PrimaryKeyName = "getBaseInfo";
let PrimaryKeyEarlyOut = true;
}
@@ -459,24 +529,27 @@ def RISCVVIntrinsicsTable : GenericTable {
let PrimaryKeyName = "getRISCVVIntrinsicInfo";
}
-class RISCVMaskedPseudo<bits<4> MaskIdx, bit HasTU = true> {
+// Describes the relation of a masked pseudo to the unmasked variants.
+// Note that all masked variants (in this table) have exactly one
+// unmasked variant. For all but compares, both the masked and
+// unmasked variant have a passthru and policy operand. For compares,
+// neither has a policy op, and only the masked version has a passthru.
+class RISCVMaskedPseudo<bits<4> MaskIdx> {
Pseudo MaskedPseudo = !cast<Pseudo>(NAME);
Pseudo UnmaskedPseudo = !cast<Pseudo>(!subst("_MASK", "", NAME));
- Pseudo UnmaskedTUPseudo = !if(HasTU, !cast<Pseudo>(!subst("_MASK", "", NAME # "_TU")), MaskedPseudo);
bits<4> MaskOpIdx = MaskIdx;
}
def RISCVMaskedPseudosTable : GenericTable {
let FilterClass = "RISCVMaskedPseudo";
let CppTypeName = "RISCVMaskedPseudoInfo";
- let Fields = ["MaskedPseudo", "UnmaskedPseudo", "UnmaskedTUPseudo", "MaskOpIdx"];
+ let Fields = ["MaskedPseudo", "UnmaskedPseudo", "MaskOpIdx"];
let PrimaryKey = ["MaskedPseudo"];
let PrimaryKeyName = "getMaskedPseudoInfo";
}
-class RISCVVLE<bit M, bit TU, bit Str, bit F, bits<3> S, bits<3> L> {
+class RISCVVLE<bit M, bit Str, bit F, bits<3> S, bits<3> L> {
bits<1> Masked = M;
- bits<1> IsTU = TU;
bits<1> Strided = Str;
bits<1> FF = F;
bits<3> Log2SEW = S;
@@ -484,7 +557,7 @@ class RISCVVLE<bit M, bit TU, bit Str, bit F, bits<3> S, bits<3> L> {
Pseudo Pseudo = !cast<Pseudo>(NAME);
}
-def lookupMaskedIntrinsicByUnmaskedTA : SearchIndex {
+def lookupMaskedIntrinsicByUnmasked : SearchIndex {
let Table = RISCVMaskedPseudosTable;
let Key = ["UnmaskedPseudo"];
}
@@ -492,8 +565,8 @@ def lookupMaskedIntrinsicByUnmaskedTA : SearchIndex {
def RISCVVLETable : GenericTable {
let FilterClass = "RISCVVLE";
let CppTypeName = "VLEPseudo";
- let Fields = ["Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"];
- let PrimaryKey = ["Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL"];
+ let Fields = ["Masked", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"];
+ let PrimaryKey = ["Masked", "Strided", "FF", "Log2SEW", "LMUL"];
let PrimaryKeyName = "getVLEPseudo";
}
@@ -513,9 +586,8 @@ def RISCVVSETable : GenericTable {
let PrimaryKeyName = "getVSEPseudo";
}
-class RISCVVLX_VSX<bit M, bit TU, bit O, bits<3> S, bits<3> L, bits<3> IL> {
+class RISCVVLX_VSX<bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> {
bits<1> Masked = M;
- bits<1> IsTU = TU;
bits<1> Ordered = O;
bits<3> Log2SEW = S;
bits<3> LMUL = L;
@@ -523,15 +595,15 @@ class RISCVVLX_VSX<bit M, bit TU, bit O, bits<3> S, bits<3> L, bits<3> IL> {
Pseudo Pseudo = !cast<Pseudo>(NAME);
}
-class RISCVVLX<bit M, bit TU, bit O, bits<3> S, bits<3> L, bits<3> IL> :
- RISCVVLX_VSX<M, TU, O, S, L, IL>;
+class RISCVVLX<bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> :
+ RISCVVLX_VSX<M, O, S, L, IL>;
class RISCVVSX<bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> :
- RISCVVLX_VSX<M, /*TU*/0, O, S, L, IL>;
+ RISCVVLX_VSX<M, O, S, L, IL>;
class RISCVVLX_VSXTable : GenericTable {
let CppTypeName = "VLX_VSXPseudo";
- let Fields = ["Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"];
- let PrimaryKey = ["Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"];
+ let Fields = ["Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"];
+ let PrimaryKey = ["Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"];
}
def RISCVVLXTable : RISCVVLX_VSXTable {
@@ -544,10 +616,9 @@ def RISCVVSXTable : RISCVVLX_VSXTable {
let PrimaryKeyName = "getVSXPseudo";
}
-class RISCVVLSEG<bits<4> N, bit M, bit TU, bit Str, bit F, bits<3> S, bits<3> L> {
+class RISCVVLSEG<bits<4> N, bit M, bit Str, bit F, bits<3> S, bits<3> L> {
bits<4> NF = N;
bits<1> Masked = M;
- bits<1> IsTU = TU;
bits<1> Strided = Str;
bits<1> FF = F;
bits<3> Log2SEW = S;
@@ -558,15 +629,14 @@ class RISCVVLSEG<bits<4> N, bit M, bit TU, bit Str, bit F, bits<3> S, bits<3> L>
def RISCVVLSEGTable : GenericTable {
let FilterClass = "RISCVVLSEG";
let CppTypeName = "VLSEGPseudo";
- let Fields = ["NF", "Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"];
- let PrimaryKey = ["NF", "Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL"];
+ let Fields = ["NF", "Masked", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"];
+ let PrimaryKey = ["NF", "Masked", "Strided", "FF", "Log2SEW", "LMUL"];
let PrimaryKeyName = "getVLSEGPseudo";
}
-class RISCVVLXSEG<bits<4> N, bit M, bit TU, bit O, bits<3> S, bits<3> L, bits<3> IL> {
+class RISCVVLXSEG<bits<4> N, bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> {
bits<4> NF = N;
bits<1> Masked = M;
- bits<1> IsTU = TU;
bits<1> Ordered = O;
bits<3> Log2SEW = S;
bits<3> LMUL = L;
@@ -577,8 +647,8 @@ class RISCVVLXSEG<bits<4> N, bit M, bit TU, bit O, bits<3> S, bits<3> L, bits<3>
def RISCVVLXSEGTable : GenericTable {
let FilterClass = "RISCVVLXSEG";
let CppTypeName = "VLXSEGPseudo";
- let Fields = ["NF", "Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"];
- let PrimaryKey = ["NF", "Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"];
+ let Fields = ["NF", "Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"];
+ let PrimaryKey = ["NF", "Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"];
let PrimaryKeyName = "getVLXSEGPseudo";
}
@@ -644,47 +714,33 @@ class GetVRegNoV0<VReg VRegClass> {
true : VRegClass);
}
-// Join strings in list using separator and ignoring empty elements
-class Join<list<string> strings, string separator> {
- string ret = !foldl(!head(strings), !tail(strings), a, b,
- !cond(
- !and(!empty(a), !empty(b)) : "",
- !empty(a) : b,
- !empty(b) : a,
- 1 : a#separator#b));
-}
-
-class VPseudo<Instruction instr, LMULInfo m, dag outs, dag ins> :
+class VPseudo<Instruction instr, LMULInfo m, dag outs, dag ins, int sew = 0> :
Pseudo<outs, ins, []>, RISCVVPseudo {
let BaseInstr = instr;
let VLMul = m.value;
+ let SEW = sew;
}
-class VPseudoUSLoadNoMask<VReg RetClass, int EEW, bit DummyMask = 1> :
- Pseudo<(outs RetClass:$rd),
- (ins GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
- RISCVVPseudo,
- RISCVVLE</*Masked*/0, /*TU*/0, /*Strided*/0, /*FF*/0, log2<EEW>.val, VLMul> {
- let mayLoad = 1;
- let mayStore = 0;
- let hasSideEffects = 0;
- let HasVLOp = 1;
- let HasSEWOp = 1;
- let HasDummyMask = DummyMask;
+class GetVTypePredicates<VTypeInfo vti> {
+ list<Predicate> Predicates = !cond(!eq(vti.Scalar, f16) : [HasVInstructionsF16],
+ !eq(vti.Scalar, f32) : [HasVInstructionsAnyF],
+ !eq(vti.Scalar, f64) : [HasVInstructionsF64],
+ !eq(vti.SEW, 64) : [HasVInstructionsI64],
+ true : [HasVInstructions]);
}
-class VPseudoUSLoadNoMaskTU<VReg RetClass, int EEW> :
+class VPseudoUSLoadNoMask<VReg RetClass, int EEW> :
Pseudo<(outs RetClass:$rd),
- (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLE</*Masked*/0, /*TU*/1, /*Strided*/0, /*FF*/0, log2<EEW>.val, VLMul> {
+ RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
- let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let Constraints = "$rd = $dest";
}
@@ -694,43 +750,29 @@ class VPseudoUSLoadMask<VReg RetClass, int EEW> :
GPRMem:$rs1,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLE</*Masked*/1, /*TU*/1, /*Strided*/0, /*FF*/0, log2<EEW>.val, VLMul> {
+ RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = "$rd = $merge";
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
-class VPseudoUSLoadFFNoMask<VReg RetClass, int EEW, bit DummyMask = 1> :
- Pseudo<(outs RetClass:$rd, GPR:$vl),
- (ins GPRMem:$rs1, AVL:$avl, ixlenimm:$sew),[]>,
- RISCVVPseudo,
- RISCVVLE</*Masked*/0, /*TU*/0, /*Strided*/0, /*FF*/1, log2<EEW>.val, VLMul> {
- let mayLoad = 1;
- let mayStore = 0;
- let hasSideEffects = 0;
- let HasVLOp = 1;
- let HasSEWOp = 1;
- let HasDummyMask = DummyMask;
-}
-
-class VPseudoUSLoadFFNoMaskTU<VReg RetClass, int EEW> :
+class VPseudoUSLoadFFNoMask<VReg RetClass, int EEW> :
Pseudo<(outs RetClass:$rd, GPR:$vl),
- (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl, ixlenimm:$sew),[]>,
+ (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl,
+ ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLE</*Masked*/0, /*TU*/1, /*Strided*/0, /*FF*/1, log2<EEW>.val, VLMul> {
+ RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
- let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let Constraints = "$rd = $dest";
}
@@ -740,43 +782,29 @@ class VPseudoUSLoadFFMask<VReg RetClass, int EEW> :
GPRMem:$rs1,
VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLE</*Masked*/1, /*TU*/1, /*Strided*/0, /*FF*/1, log2<EEW>.val, VLMul> {
+ RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = "$rd = $merge";
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
class VPseudoSLoadNoMask<VReg RetClass, int EEW>:
Pseudo<(outs RetClass:$rd),
- (ins GPRMem:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins RetClass:$dest, GPRMem:$rs1, GPR:$rs2, AVL:$vl,
+ ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLE</*Masked*/0, /*TU*/0, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
+ RISCVVLE</*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
-}
-
-class VPseudoSLoadNoMaskTU<VReg RetClass, int EEW>:
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$dest, GPRMem:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
- RISCVVPseudo,
- RISCVVLE</*Masked*/0, /*TU*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
- let mayLoad = 1;
- let mayStore = 0;
- let hasSideEffects = 0;
- let HasVLOp = 1;
- let HasSEWOp = 1;
- let HasDummyMask = 1;
- let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let Constraints = "$rd = $dest";
}
@@ -786,14 +814,13 @@ class VPseudoSLoadMask<VReg RetClass, int EEW>:
GPRMem:$rs1, GPR:$rs2,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLE</*Masked*/1, /*TU*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
+ RISCVVLE</*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = "$rd = $merge";
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
@@ -801,33 +828,16 @@ class VPseudoSLoadMask<VReg RetClass, int EEW>:
class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
bit Ordered, bit EarlyClobber>:
Pseudo<(outs RetClass:$rd),
- (ins GPRMem:$rs1, IdxClass:$rs2, AVL:$vl,
- ixlenimm:$sew),[]>,
- RISCVVPseudo,
- RISCVVLX</*Masked*/0, /*TU*/0, Ordered, log2<EEW>.val, VLMul, LMUL> {
- let mayLoad = 1;
- let mayStore = 0;
- let hasSideEffects = 0;
- let HasVLOp = 1;
- let HasSEWOp = 1;
- let HasDummyMask = 1;
- let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd", "");
-}
-
-class VPseudoILoadNoMaskTU<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bit Ordered, bit EarlyClobber>:
- Pseudo<(outs RetClass:$rd),
(ins RetClass:$dest, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl,
- ixlenimm:$sew),[]>,
+ ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLX</*Masked*/0, /*TU*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ RISCVVLX</*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
- let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $dest", "$rd = $dest");
}
@@ -838,36 +848,34 @@ class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
GPRMem:$rs1, IdxClass:$rs2,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLX</*Masked*/1, /*TU*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ RISCVVLX</*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $merge", "$rd = $merge");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
-class VPseudoUSStoreNoMask<VReg StClass, int EEW, bit DummyMask = 1>:
+class VPseudoUSStoreNoMask<VReg StClass, int EEW>:
Pseudo<(outs),
(ins StClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVSE</*Masked*/0, /*Strided*/0, log2<EEW>.val, VLMul> {
+ RISCVVSE</*Masked*/0, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = DummyMask;
}
class VPseudoUSStoreMask<VReg StClass, int EEW>:
Pseudo<(outs),
(ins StClass:$rd, GPRMem:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVSE</*Masked*/1, /*Strided*/0, log2<EEW>.val, VLMul> {
+ RISCVVSE</*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
@@ -879,20 +887,19 @@ class VPseudoSStoreNoMask<VReg StClass, int EEW>:
Pseudo<(outs),
(ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVSE</*Masked*/0, /*Strided*/1, log2<EEW>.val, VLMul> {
+ RISCVVSE</*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
}
class VPseudoSStoreMask<VReg StClass, int EEW>:
Pseudo<(outs),
(ins StClass:$rd, GPRMem:$rs1, GPR:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVSE</*Masked*/1, /*Strided*/1, log2<EEW>.val, VLMul> {
+ RISCVVSE</*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
@@ -900,57 +907,17 @@ class VPseudoSStoreMask<VReg StClass, int EEW>:
let HasSEWOp = 1;
}
-// Unary instruction that is never masked so HasDummyMask=0.
-class VPseudoUnaryNoDummyMask<VReg RetClass,
- DAGOperand Op2Class> :
- Pseudo<(outs RetClass:$rd),
- (ins Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let HasVLOp = 1;
- let HasSEWOp = 1;
-}
-
-class VPseudoUnaryNoDummyMaskTU<VReg RetClass,
- DAGOperand Op2Class> :
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$dest, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
- RISCVVPseudo {
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let HasVLOp = 1;
- let HasSEWOp = 1;
- let HasMergeOp = 1;
- let Constraints = "$rd = $dest";
-}
-
class VPseudoNullaryNoMask<VReg RegClass>:
Pseudo<(outs RegClass:$rd),
- (ins AVL:$vl, ixlenimm:$sew),
- []>, RISCVVPseudo {
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let HasVLOp = 1;
- let HasSEWOp = 1;
- let HasDummyMask = 1;
-}
-
-class VPseudoNullaryNoMaskTU<VReg RegClass>:
- Pseudo<(outs RegClass:$rd),
- (ins RegClass:$merge, AVL:$vl, ixlenimm:$sew),
- []>, RISCVVPseudo {
+ (ins RegClass:$merge, AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy), []>, RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = "$rd = $merge";
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
- let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
}
class VPseudoNullaryMask<VReg RegClass>:
@@ -963,7 +930,6 @@ class VPseudoNullaryMask<VReg RegClass>:
let Constraints ="$rd = $merge";
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let UsesMaskPolicy = 1;
let HasVecPolicyOp = 1;
}
@@ -983,100 +949,127 @@ class VPseudoNullaryPseudoM<string BaseInst>
let BaseInstr = !cast<Instruction>(BaseInst);
}
-// RetClass could be GPR or VReg.
-class VPseudoUnaryNoMask<DAGOperand RetClass, VReg OpClass, string Constraint = ""> :
- Pseudo<(outs RetClass:$rd),
- (ins OpClass:$rs2, AVL:$vl, ixlenimm:$sew), []>,
+class VPseudoUnaryNoMask<DAGOperand RetClass, DAGOperand OpClass,
+ string Constraint = ""> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, OpClass:$rs2, AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Constraint;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
+ let HasVecPolicyOp = 1;
}
-// RetClass could be GPR or VReg.
-class VPseudoUnaryNoMaskTU<DAGOperand RetClass, VReg OpClass, string Constraint = ""> :
+class VPseudoUnaryNoMaskRoundingMode<DAGOperand RetClass, DAGOperand OpClass,
+ string Constraint = ""> :
Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, OpClass:$rs2, AVL:$vl, ixlenimm:$sew), []>,
+ (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$rm, AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
- let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
+ let HasRoundModeOp = 1;
+ let UsesVXRM = 0;
}
class VPseudoUnaryMask<VReg RetClass, VReg OpClass, string Constraint = ""> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
-class VPseudoUnaryMaskTA<VReg RetClass, VReg OpClass, string Constraint = ""> :
+class VPseudoUnaryMaskRoundingMode<VReg RetClass, VReg OpClass, string Constraint = ""> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
- VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ VMaskOp:$vm, ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
+ let HasRoundModeOp = 1;
+ let UsesVXRM = 0;
}
-class VPseudoUnaryMaskTA_NoExcept<VReg RetClass, VReg OpClass, string Constraint = ""> :
+class VPseudoUnaryMask_NoExcept<VReg RetClass, VReg OpClass, string Constraint = ""> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2, VMaskOp:$vm,
AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
let usesCustomInserter = 1;
}
-class VPseudoUnaryMaskTA_FRM<VReg RetClass, VReg OpClass, string Constraint = ""> :
+class VPseudoUnaryNoMask_FRM<VReg RetClass, VReg OpClass, string Constraint = ""> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, OpClass:$rs2, ixlenimm:$frm, AVL:$vl,
+ ixlenimm:$sew, ixlenimm:$policy), []> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasVecPolicyOp = 1;
+ let usesCustomInserter = 1;
+}
+
+class VPseudoUnaryMask_FRM<VReg RetClass, VReg OpClass, string Constraint = ""> :
Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
(ins GetVRegNoV0<RetClass>.R:$merge, OpClass:$rs2,
VMaskOp:$vm, ixlenimm:$frm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []> {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
let usesCustomInserter = 1;
}
-// mask unary operation without maskedoff
-class VPseudoMaskUnarySOutMask:
+class VPseudoUnaryNoMaskGPROut :
+ Pseudo<(outs GPR:$rd),
+ (ins VR:$rs2, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+}
+
+class VPseudoUnaryMaskGPROut:
Pseudo<(outs GPR:$rd),
(ins VR:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
@@ -1102,14 +1095,12 @@ class VPseudoUnaryAnyMask<VReg RetClass,
let Constraints = "@earlyclobber $rd, $rd = $merge";
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
}
class VPseudoBinaryNoMask<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
- string Constraint,
- int DummyMask = 1> :
+ string Constraint> :
Pseudo<(outs RetClass:$rd),
(ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
RISCVVPseudo {
@@ -1119,7 +1110,6 @@ class VPseudoBinaryNoMask<VReg RetClass,
let Constraints = Constraint;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = DummyMask;
}
class VPseudoBinaryNoMaskTU<VReg RetClass,
@@ -1127,16 +1117,57 @@ class VPseudoBinaryNoMaskTU<VReg RetClass,
DAGOperand Op2Class,
string Constraint> :
Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
RISCVVPseudo {
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasVecPolicyOp = 1;
+}
+
+class VPseudoBinaryNoMaskRoundingMode<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ string Constraint,
+ int UsesVXRM_ = 1> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasVecPolicyOp = 1;
+ let HasRoundModeOp = 1;
+ let UsesVXRM = UsesVXRM_;
+}
+
+class VPseudoBinaryMaskPolicyRoundingMode<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ string Constraint,
+ int UsesVXRM_> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, ixlenimm:$rm, AVL:$vl,
+ ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
- let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
+ let UsesMaskPolicy = 1;
+ let HasRoundModeOp = 1;
+ let UsesVXRM = UsesVXRM_;
}
// Special version of VPseudoBinaryNoMask where we pretend the first source is
@@ -1152,12 +1183,34 @@ class VPseudoTiedBinaryNoMask<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $rs2"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $rs2"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
let HasVecPolicyOp = 1;
let isConvertibleToThreeAddress = 1;
+ let IsTiedPseudo = 1;
+}
+
+class VPseudoTiedBinaryNoMaskRoundingMode<VReg RetClass,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs2, Op2Class:$rs1,
+ ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew,
+ ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = !interleave([Constraint, "$rd = $rs2"], ",");
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasVecPolicyOp = 1;
+ let isConvertibleToThreeAddress = 1;
+ let IsTiedPseudo = 1;
+ let HasRoundModeOp = 1;
+ let UsesVXRM = 0;
}
class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
@@ -1165,13 +1218,12 @@ class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
Pseudo<(outs),
(ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVSX</*Masked*/0, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ RISCVVSX</*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
}
class VPseudoIStoreMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
@@ -1179,7 +1231,7 @@ class VPseudoIStoreMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
Pseudo<(outs),
(ins StClass:$rd, GPRMem:$rs1, IdxClass:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVSX</*Masked*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ RISCVVSX</*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
@@ -1199,10 +1251,9 @@ class VPseudoBinaryMask<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
}
class VPseudoBinaryMaskPolicy<VReg RetClass,
@@ -1217,14 +1268,69 @@ class VPseudoBinaryMaskPolicy<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
+class VPseudoTernaryMaskPolicy<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasVecPolicyOp = 1;
+}
+
+class VPseudoTernaryMaskPolicyRoundingMode<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op1Class:$rs2, Op2Class:$rs1,
+ VMaskOp:$vm,
+ ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasVecPolicyOp = 1;
+ let HasRoundModeOp = 1;
+ let UsesVXRM = 0;
+}
+
+// Like VPseudoBinaryNoMask, but output can be V0.
+class VPseudoBinaryMOutNoMask<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Constraint;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+}
+
// Like VPseudoBinaryMask, but output can be V0.
class VPseudoBinaryMOutMask<VReg RetClass,
RegisterClass Op1Class,
@@ -1238,10 +1344,9 @@ class VPseudoBinaryMOutMask<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let UsesMaskPolicy = 1;
}
@@ -1259,12 +1364,35 @@ class VPseudoTiedBinaryMask<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasVecPolicyOp = 1;
+ let UsesMaskPolicy = 1;
+ let IsTiedPseudo = 1;
+}
+
+class VPseudoTiedBinaryMaskRoundingMode<VReg RetClass,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
+ (ins GetVRegNoV0<RetClass>.R:$merge,
+ Op2Class:$rs1,
+ VMaskOp:$vm,
+ ixlenimm:$rm,
+ AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 0; // Merge is also rs2.
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
+ let IsTiedPseudo = 1;
+ let HasRoundModeOp = 1;
+ let UsesVXRM = 0;
}
class VPseudoBinaryCarryIn<VReg RetClass,
@@ -1285,7 +1413,6 @@ class VPseudoBinaryCarryIn<VReg RetClass,
let Constraints = Constraint;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 0;
let VLMul = MInfo.value;
}
@@ -1304,10 +1431,9 @@ class VPseudoTiedBinaryCarryIn<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $merge"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let HasVecPolicyOp = 0;
let VLMul = MInfo.value;
}
@@ -1324,11 +1450,9 @@ class VPseudoTernaryNoMask<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $rs3"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $rs3"], ",");
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
- let HasDummyMask = 1;
}
class VPseudoTernaryNoMaskWithPolicy<VReg RetClass,
@@ -1343,39 +1467,44 @@ class VPseudoTernaryNoMaskWithPolicy<VReg RetClass,
let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
- let Constraints = Join<[Constraint, "$rd = $rs3"], ",">.ret;
+ let Constraints = !interleave([Constraint, "$rd = $rs3"], ",");
let HasVecPolicyOp = 1;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
- let HasDummyMask = 1;
}
-class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
- Pseudo<(outs RetClass:$rd),
- (ins GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
- RISCVVPseudo,
- RISCVVLSEG<NF, /*Masked*/0, /*TU*/0, /*Strided*/0, /*FF*/0, log2<EEW>.val, VLMul> {
- let mayLoad = 1;
+class VPseudoTernaryNoMaskWithPolicyRoundingMode<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs3, Op1Class:$rs1, Op2Class:$rs2,
+ ixlenimm:$rm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),
+ []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
let mayStore = 0;
let hasSideEffects = 0;
+ let Constraints = !interleave([Constraint, "$rd = $rs3"], ",");
+ let HasVecPolicyOp = 1;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
+ let HasRoundModeOp = 1;
+ let UsesVXRM = 0;
}
-class VPseudoUSSegLoadNoMaskTU<VReg RetClass, int EEW, bits<4> NF>:
+class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
Pseudo<(outs RetClass:$rd),
- (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl,
+ ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLSEG<NF, /*Masked*/0, /*TU*/1, /*Strided*/0, /*FF*/0, log2<EEW>.val, VLMul> {
+ RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
- let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let Constraints = "$rd = $dest";
}
@@ -1384,43 +1513,29 @@ class VPseudoUSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
(ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLSEG<NF, /*Masked*/1, /*TU*/1, /*Strided*/0, /*FF*/0, log2<EEW>.val, VLMul> {
+ RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = "$rd = $merge";
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
class VPseudoUSSegLoadFFNoMask<VReg RetClass, int EEW, bits<4> NF>:
Pseudo<(outs RetClass:$rd, GPR:$vl),
- (ins GPRMem:$rs1, AVL:$avl, ixlenimm:$sew),[]>,
+ (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl,
+ ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLSEG<NF, /*Masked*/0, /*TU*/0, /*Strided*/0, /*FF*/1, log2<EEW>.val, VLMul> {
+ RISCVVLSEG<NF, /*Masked*/0, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
-}
-
-class VPseudoUSSegLoadFFNoMaskTU<VReg RetClass, int EEW, bits<4> NF>:
- Pseudo<(outs RetClass:$rd, GPR:$vl),
- (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl, ixlenimm:$sew),[]>,
- RISCVVPseudo,
- RISCVVLSEG<NF, /*Masked*/0, /*TU*/1, /*Strided*/0, /*FF*/1, log2<EEW>.val, VLMul> {
- let mayLoad = 1;
- let mayStore = 0;
- let hasSideEffects = 0;
- let HasVLOp = 1;
- let HasSEWOp = 1;
- let HasDummyMask = 1;
- let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let Constraints = "$rd = $dest";
}
@@ -1429,43 +1544,29 @@ class VPseudoUSSegLoadFFMask<VReg RetClass, int EEW, bits<4> NF>:
(ins GetVRegNoV0<RetClass>.R:$merge, GPRMem:$rs1,
VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLSEG<NF, /*Masked*/1, /*TU*/1, /*Strided*/0, /*FF*/1, log2<EEW>.val, VLMul> {
+ RISCVVLSEG<NF, /*Masked*/1, /*Strided*/0, /*FF*/1, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = "$rd = $merge";
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
class VPseudoSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF>:
Pseudo<(outs RetClass:$rd),
- (ins GPRMem:$rs1, GPR:$offset, AVL:$vl, ixlenimm:$sew),[]>,
- RISCVVPseudo,
- RISCVVLSEG<NF, /*Masked*/0, /*TU*/0, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
- let mayLoad = 1;
- let mayStore = 0;
- let hasSideEffects = 0;
- let HasVLOp = 1;
- let HasSEWOp = 1;
- let HasDummyMask = 1;
-}
-
-class VPseudoSSegLoadNoMaskTU<VReg RetClass, int EEW, bits<4> NF>:
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, GPRMem:$rs1, GPR:$offset, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins RetClass:$merge, GPRMem:$rs1, GPR:$offset, AVL:$vl,
+ ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLSEG<NF, /*Masked*/0, /*TU*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
+ RISCVVLSEG<NF, /*Masked*/0, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
- let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
let Constraints = "$rd = $merge";
}
@@ -1475,14 +1576,13 @@ class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLSEG<NF, /*Masked*/1, /*TU*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
+ RISCVVLSEG<NF, /*Masked*/1, /*Strided*/1, /*FF*/0, !logtwo(EEW), VLMul> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
let Constraints = "$rd = $merge";
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
@@ -1490,26 +1590,10 @@ class VPseudoSSegLoadMask<VReg RetClass, int EEW, bits<4> NF>:
class VPseudoISegLoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
bits<4> NF, bit Ordered>:
Pseudo<(outs RetClass:$rd),
- (ins GPRMem:$rs1, IdxClass:$offset, AVL:$vl, ixlenimm:$sew),[]>,
+ (ins RetClass:$merge, GPRMem:$rs1, IdxClass:$offset, AVL:$vl,
+ ixlenimm:$sew, ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLXSEG<NF, /*Masked*/0, /*TU*/0, Ordered, log2<EEW>.val, VLMul, LMUL> {
- let mayLoad = 1;
- let mayStore = 0;
- let hasSideEffects = 0;
- // For vector indexed segment loads, the destination vector register groups
- // cannot overlap the source vector register group
- let Constraints = "@earlyclobber $rd";
- let HasVLOp = 1;
- let HasSEWOp = 1;
- let HasDummyMask = 1;
-}
-
-class VPseudoISegLoadNoMaskTU<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
- bits<4> NF, bit Ordered>:
- Pseudo<(outs RetClass:$rd),
- (ins RetClass:$merge, GPRMem:$rs1, IdxClass:$offset, AVL:$vl, ixlenimm:$sew),[]>,
- RISCVVPseudo,
- RISCVVLXSEG<NF, /*Masked*/0, /*TU*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ RISCVVLXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1518,8 +1602,7 @@ class VPseudoISegLoadNoMaskTU<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMU
let Constraints = "@earlyclobber $rd, $rd = $merge";
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
- let HasMergeOp = 1;
+ let HasVecPolicyOp = 1;
}
class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
@@ -1529,7 +1612,7 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew,
ixlenimm:$policy),[]>,
RISCVVPseudo,
- RISCVVLXSEG<NF, /*Masked*/1, /*TU*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ RISCVVLXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 1;
let mayStore = 0;
let hasSideEffects = 0;
@@ -1538,7 +1621,6 @@ class VPseudoISegLoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
let Constraints = "@earlyclobber $rd, $rd = $merge";
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasMergeOp = 1;
let HasVecPolicyOp = 1;
let UsesMaskPolicy = 1;
}
@@ -1547,13 +1629,12 @@ class VPseudoUSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
Pseudo<(outs),
(ins ValClass:$rd, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVSSEG<NF, /*Masked*/0, /*Strided*/0, log2<EEW>.val, VLMul> {
+ RISCVVSSEG<NF, /*Masked*/0, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
}
class VPseudoUSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
@@ -1561,7 +1642,7 @@ class VPseudoUSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
(ins ValClass:$rd, GPRMem:$rs1,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVSSEG<NF, /*Masked*/1, /*Strided*/0, log2<EEW>.val, VLMul> {
+ RISCVVSSEG<NF, /*Masked*/1, /*Strided*/0, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
@@ -1573,13 +1654,12 @@ class VPseudoSSegStoreNoMask<VReg ValClass, int EEW, bits<4> NF>:
Pseudo<(outs),
(ins ValClass:$rd, GPRMem:$rs1, GPR: $offset, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVSSEG<NF, /*Masked*/0, /*Strided*/1, log2<EEW>.val, VLMul> {
+ RISCVVSSEG<NF, /*Masked*/0, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
}
class VPseudoSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
@@ -1587,7 +1667,7 @@ class VPseudoSSegStoreMask<VReg ValClass, int EEW, bits<4> NF>:
(ins ValClass:$rd, GPRMem:$rs1, GPR: $offset,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVSSEG<NF, /*Masked*/1, /*Strided*/1, log2<EEW>.val, VLMul> {
+ RISCVVSSEG<NF, /*Masked*/1, /*Strided*/1, !logtwo(EEW), VLMul> {
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
@@ -1601,13 +1681,12 @@ class VPseudoISegStoreNoMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL
(ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index,
AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVSXSEG<NF, /*Masked*/0, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ RISCVVSXSEG<NF, /*Masked*/0, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let HasDummyMask = 1;
}
class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL,
@@ -1616,7 +1695,7 @@ class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL,
(ins ValClass:$rd, GPRMem:$rs1, IdxClass: $index,
VMaskOp:$vm, AVL:$vl, ixlenimm:$sew),[]>,
RISCVVPseudo,
- RISCVVSXSEG<NF, /*Masked*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
+ RISCVVSXSEG<NF, /*Masked*/1, Ordered, !logtwo(EEW), VLMul, LMUL> {
let mayLoad = 0;
let mayStore = 1;
let hasSideEffects = 0;
@@ -1629,16 +1708,13 @@ multiclass VPseudoUSLoad {
foreach lmul = MxSet<eew>.m in {
defvar LInfo = lmul.MX;
defvar vreg = lmul.vrclass;
- let VLMul = lmul.value in {
+ let VLMul = lmul.value, SEW=eew in {
def "E" # eew # "_V_" # LInfo :
VPseudoUSLoadNoMask<vreg, eew>,
VLESched<LInfo>;
- def "E" # eew # "_V_" # LInfo # "_TU":
- VPseudoUSLoadNoMaskTU<vreg, eew>,
- VLESched<LInfo>;
def "E" # eew # "_V_" # LInfo # "_MASK" :
VPseudoUSLoadMask<vreg, eew>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 2>,
+ RISCVMaskedPseudo<MaskIdx=2>,
VLESched<LInfo>;
}
}
@@ -1650,16 +1726,13 @@ multiclass VPseudoFFLoad {
foreach lmul = MxSet<eew>.m in {
defvar LInfo = lmul.MX;
defvar vreg = lmul.vrclass;
- let VLMul = lmul.value in {
+ let VLMul = lmul.value, SEW=eew in {
def "E" # eew # "FF_V_" # LInfo:
VPseudoUSLoadFFNoMask<vreg, eew>,
VLFSched<LInfo>;
- def "E" # eew # "FF_V_" # LInfo # "_TU":
- VPseudoUSLoadFFNoMaskTU<vreg, eew>,
- VLFSched<LInfo>;
def "E" # eew # "FF_V_" # LInfo # "_MASK":
VPseudoUSLoadFFMask<vreg, eew>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 2>,
+ RISCVMaskedPseudo<MaskIdx=2>,
VLFSched<LInfo>;
}
}
@@ -1670,10 +1743,9 @@ multiclass VPseudoLoadMask {
foreach mti = AllMasks in {
defvar mx = mti.LMul.MX;
defvar WriteVLDM_MX = !cast<SchedWrite>("WriteVLDM_" # mx);
- defvar ReadVLDX_MX = !cast<SchedRead>("ReadVLDX_" # mx);
let VLMul = mti.LMul.value in {
- def "_V_" # mti.BX : VPseudoUSLoadNoMask<VR, /*EEW*/1, /*DummyMask*/0>,
- Sched<[WriteVLDM_MX, ReadVLDX_MX]>;
+ def "_V_" # mti.BX : VPseudoUSLoadNoMask<VR, EEW=1>,
+ Sched<[WriteVLDM_MX, ReadVLDX]>;
}
}
}
@@ -1683,14 +1755,12 @@ multiclass VPseudoSLoad {
foreach lmul = MxSet<eew>.m in {
defvar LInfo = lmul.MX;
defvar vreg = lmul.vrclass;
- let VLMul = lmul.value in {
+ let VLMul = lmul.value, SEW=eew in {
def "E" # eew # "_V_" # LInfo : VPseudoSLoadNoMask<vreg, eew>,
VLSSched<eew, LInfo>;
- def "E" # eew # "_V_" # LInfo # "_TU": VPseudoSLoadNoMaskTU<vreg, eew>,
- VLSSched<eew, LInfo>;
def "E" # eew # "_V_" # LInfo # "_MASK" :
VPseudoSLoadMask<vreg, eew>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 3>,
+ RISCVMaskedPseudo<MaskIdx=3>,
VLSSched<eew, LInfo>;
}
}
@@ -1698,31 +1768,29 @@ multiclass VPseudoSLoad {
}
multiclass VPseudoILoad<bit Ordered> {
- foreach eew = EEWList in {
- foreach sew = EEWList in {
- foreach lmul = MxSet<sew>.m in {
- defvar octuple_lmul = lmul.octuple;
+ foreach idxEEW = EEWList in {
+ foreach dataEEW = EEWList in {
+ foreach dataEMUL = MxSet<dataEEW>.m in {
+ defvar dataEMULOctuple = dataEMUL.octuple;
// Calculate emul = eew * lmul / sew
- defvar octuple_emul = !srl(!mul(eew, octuple_lmul), log2<sew>.val);
- if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
- defvar LInfo = lmul.MX;
- defvar IdxLInfo = octuple_to_str<octuple_emul>.ret;
- defvar idx_lmul = !cast<LMULInfo>("V_" # IdxLInfo);
- defvar Vreg = lmul.vrclass;
- defvar IdxVreg = idx_lmul.vrclass;
- defvar HasConstraint = !ne(sew, eew);
+ defvar idxEMULOctuple =
+ !srl(!mul(idxEEW, dataEMULOctuple), !logtwo(dataEEW));
+ if !and(!ge(idxEMULOctuple, 1), !le(idxEMULOctuple, 64)) then {
+ defvar DataLInfo = dataEMUL.MX;
+ defvar IdxLInfo = octuple_to_str<idxEMULOctuple>.ret;
+ defvar idxEMUL = !cast<LMULInfo>("V_" # IdxLInfo);
+ defvar Vreg = dataEMUL.vrclass;
+ defvar IdxVreg = idxEMUL.vrclass;
+ defvar HasConstraint = !ne(dataEEW, idxEEW);
defvar Order = !if(Ordered, "O", "U");
- let VLMul = lmul.value in {
- def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo :
- VPseudoILoadNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>,
- VLXSched<eew, Order, LInfo>;
- def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_TU":
- VPseudoILoadNoMaskTU<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>,
- VLXSched<eew, Order, LInfo>;
- def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" :
- VPseudoILoadMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 3>,
- VLXSched<eew, Order, LInfo>;
+ let VLMul = dataEMUL.value in {
+ def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo :
+ VPseudoILoadNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint>,
+ VLXSched<dataEEW, Order, DataLInfo, IdxLInfo>;
+ def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
+ VPseudoILoadMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered, HasConstraint>,
+ RISCVMaskedPseudo<MaskIdx=3>,
+ VLXSched<dataEEW, Order, DataLInfo, IdxLInfo>;
}
}
}
@@ -1735,7 +1803,7 @@ multiclass VPseudoUSStore {
foreach lmul = MxSet<eew>.m in {
defvar LInfo = lmul.MX;
defvar vreg = lmul.vrclass;
- let VLMul = lmul.value in {
+ let VLMul = lmul.value, SEW=eew in {
def "E" # eew # "_V_" # LInfo : VPseudoUSStoreNoMask<vreg, eew>,
VSESched<LInfo>;
def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSStoreMask<vreg, eew>,
@@ -1749,10 +1817,9 @@ multiclass VPseudoStoreMask {
foreach mti = AllMasks in {
defvar mx = mti.LMul.MX;
defvar WriteVSTM_MX = !cast<SchedWrite>("WriteVSTM_" # mx);
- defvar ReadVSTX_MX = !cast<SchedRead>("ReadVSTX_" # mx);
let VLMul = mti.LMul.value in {
- def "_V_" # mti.BX : VPseudoUSStoreNoMask<VR, /*EEW*/1, /*DummyMask*/0>,
- Sched<[WriteVSTM_MX, ReadVSTX_MX]>;
+ def "_V_" # mti.BX : VPseudoUSStoreNoMask<VR, EEW=1>,
+ Sched<[WriteVSTM_MX, ReadVSTX]>;
}
}
}
@@ -1762,7 +1829,7 @@ multiclass VPseudoSStore {
foreach lmul = MxSet<eew>.m in {
defvar LInfo = lmul.MX;
defvar vreg = lmul.vrclass;
- let VLMul = lmul.value in {
+ let VLMul = lmul.value, SEW=eew in {
def "E" # eew # "_V_" # LInfo : VPseudoSStoreNoMask<vreg, eew>,
VSSSched<eew, LInfo>;
def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSStoreMask<vreg, eew>,
@@ -1773,26 +1840,27 @@ multiclass VPseudoSStore {
}
multiclass VPseudoIStore<bit Ordered> {
- foreach eew = EEWList in {
- foreach sew = EEWList in {
- foreach lmul = MxSet<sew>.m in {
- defvar octuple_lmul = lmul.octuple;
+ foreach idxEEW = EEWList in {
+ foreach dataEEW = EEWList in {
+ foreach dataEMUL = MxSet<dataEEW>.m in {
+ defvar dataEMULOctuple = dataEMUL.octuple;
// Calculate emul = eew * lmul / sew
- defvar octuple_emul = !srl(!mul(eew, octuple_lmul), log2<sew>.val);
- if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
- defvar LInfo = lmul.MX;
- defvar IdxLInfo = octuple_to_str<octuple_emul>.ret;
- defvar idx_lmul = !cast<LMULInfo>("V_" # IdxLInfo);
- defvar Vreg = lmul.vrclass;
- defvar IdxVreg = idx_lmul.vrclass;
+ defvar idxEMULOctuple =
+ !srl(!mul(idxEEW, dataEMULOctuple), !logtwo(dataEEW));
+ if !and(!ge(idxEMULOctuple, 1), !le(idxEMULOctuple, 64)) then {
+ defvar DataLInfo = dataEMUL.MX;
+ defvar IdxLInfo = octuple_to_str<idxEMULOctuple>.ret;
+ defvar idxEMUL = !cast<LMULInfo>("V_" # IdxLInfo);
+ defvar Vreg = dataEMUL.vrclass;
+ defvar IdxVreg = idxEMUL.vrclass;
defvar Order = !if(Ordered, "O", "U");
- let VLMul = lmul.value in {
- def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo :
- VPseudoIStoreNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>,
- VSXSched<eew, Order, LInfo>;
- def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" :
- VPseudoIStoreMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>,
- VSXSched<eew, Order, LInfo>;
+ let VLMul = dataEMUL.value in {
+ def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo :
+ VPseudoIStoreNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered>,
+ VSXSched<dataEEW, Order, DataLInfo, IdxLInfo>;
+ def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
+ VPseudoIStoreMask<Vreg, IdxVreg, idxEEW, idxEMUL.value, Ordered>,
+ VSXSched<dataEEW, Order, DataLInfo, IdxLInfo>;
}
}
}
@@ -1801,30 +1869,28 @@ multiclass VPseudoIStore<bit Ordered> {
}
multiclass VPseudoVPOP_M {
- foreach mti = AllMasks in
- {
+ foreach mti = AllMasks in {
defvar mx = mti.LMul.MX;
defvar WriteVMPopV_MX = !cast<SchedWrite>("WriteVMPopV_" # mx);
defvar ReadVMPopV_MX = !cast<SchedRead>("ReadVMPopV_" # mx);
let VLMul = mti.LMul.value in {
- def "_M_" # mti.BX : VPseudoUnaryNoMask<GPR, VR>,
+ def "_M_" # mti.BX : VPseudoUnaryNoMaskGPROut,
Sched<[WriteVMPopV_MX, ReadVMPopV_MX, ReadVMPopV_MX]>;
- def "_M_" # mti.BX # "_MASK" : VPseudoMaskUnarySOutMask,
+ def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMaskGPROut,
Sched<[WriteVMPopV_MX, ReadVMPopV_MX, ReadVMPopV_MX]>;
}
}
}
multiclass VPseudoV1ST_M {
- foreach mti = AllMasks in
- {
+ foreach mti = AllMasks in {
defvar mx = mti.LMul.MX;
defvar WriteVMFFSV_MX = !cast<SchedWrite>("WriteVMFFSV_" # mx);
defvar ReadVMFFSV_MX = !cast<SchedRead>("ReadVMFFSV_" # mx);
let VLMul = mti.LMul.value in {
- def "_M_" # mti.BX : VPseudoUnaryNoMask<GPR, VR>,
+ def "_M_" # mti.BX : VPseudoUnaryNoMaskGPROut,
Sched<[WriteVMFFSV_MX, ReadVMFFSV_MX, ReadVMFFSV_MX]>;
- def "_M_" # mti.BX # "_MASK" : VPseudoMaskUnarySOutMask,
+ def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMaskGPROut,
Sched<[WriteVMFFSV_MX, ReadVMFFSV_MX, ReadVMFFSV_MX]>;
}
}
@@ -1832,8 +1898,7 @@ multiclass VPseudoV1ST_M {
multiclass VPseudoVSFS_M {
defvar constraint = "@earlyclobber $rd";
- foreach mti = AllMasks in
- {
+ foreach mti = AllMasks in {
defvar mx = mti.LMul.MX;
defvar WriteVMSFSV_MX = !cast<SchedWrite>("WriteVMSFSV_" # mx);
defvar ReadVMSFSV_MX = !cast<SchedRead>("ReadVMSFSV_" # mx);
@@ -1855,10 +1920,8 @@ multiclass VPseudoVID_V {
let VLMul = m.value in {
def "_V_" # m.MX : VPseudoNullaryNoMask<m.vrclass>,
Sched<[WriteVMIdxV_MX, ReadVMask]>;
- def "_V_" # m.MX # "_TU": VPseudoNullaryNoMaskTU<m.vrclass>,
- Sched<[WriteVMIdxV_MX, ReadVMask]>;
def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask<m.vrclass>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 1>,
+ RISCVMaskedPseudo<MaskIdx=1>,
Sched<[WriteVMIdxV_MX, ReadVMask]>;
}
}
@@ -1886,10 +1949,8 @@ multiclass VPseudoVIOT_M {
let VLMul = m.value in {
def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, VR, constraint>,
Sched<[WriteVMIotV_MX, ReadVMIotV_MX, ReadVMask]>;
- def "_" # m.MX # "_TU" : VPseudoUnaryNoMaskTU<m.vrclass, VR, constraint>,
- Sched<[WriteVMIotV_MX, ReadVMIotV_MX, ReadVMask]>;
- def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, VR, constraint>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 2>,
+ def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>,
+ RISCVMaskedPseudo<MaskIdx=2>,
Sched<[WriteVMIotV_MX, ReadVMIotV_MX, ReadVMask]>;
}
}
@@ -1898,12 +1959,17 @@ multiclass VPseudoVIOT_M {
multiclass VPseudoVCPR_V {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVCompressV_MX = !cast<SchedWrite>("WriteVCompressV_" # mx);
- defvar ReadVCompressV_MX = !cast<SchedRead>("ReadVCompressV_" # mx);
-
+ defvar sews = SchedSEWSet<mx>.val;
let VLMul = m.value in
- def _VM # "_" # m.MX : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>,
- Sched<[WriteVCompressV_MX, ReadVCompressV_MX, ReadVCompressV_MX]>;
+ foreach e = sews in {
+ defvar suffix = "_" # m.MX # "_E" # e;
+ defvar WriteVCompressV_MX_E = !cast<SchedWrite>("WriteVCompressV" # suffix);
+ defvar ReadVCompressV_MX_E = !cast<SchedRead>("ReadVCompressV" # suffix);
+
+ let SEW = e in
+ def _VM # suffix : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>,
+ Sched<[WriteVCompressV_MX_E, ReadVCompressV_MX_E, ReadVCompressV_MX_E]>;
+ }
}
}
@@ -1911,30 +1977,51 @@ multiclass VPseudoBinary<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
LMULInfo MInfo,
- string Constraint = ""> {
- let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
- Constraint>;
- def "_" # MInfo.MX # "_TU" : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
- Constraint>;
- def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class,
- Constraint>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 3>;
+ string Constraint = "",
+ int sew = 0> {
+ let VLMul = MInfo.value, SEW=sew in {
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
+ def suffix : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
+ Constraint>;
+ def suffix # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class,
+ Constraint>,
+ RISCVMaskedPseudo<MaskIdx=3>;
}
}
+multiclass VPseudoBinaryRoundingMode<VReg RetClass,
+ VReg Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ string Constraint = "",
+ int sew = 0,
+ int UsesVXRM = 1> {
+ let VLMul = MInfo.value, SEW=sew in {
+ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX);
+ def suffix : VPseudoBinaryNoMaskRoundingMode<RetClass, Op1Class, Op2Class,
+ Constraint, UsesVXRM>;
+ def suffix # "_MASK" : VPseudoBinaryMaskPolicyRoundingMode<RetClass,
+ Op1Class,
+ Op2Class,
+ Constraint,
+ UsesVXRM>,
+ RISCVMaskedPseudo<MaskIdx=3>;
+ }
+}
+
+
multiclass VPseudoBinaryM<VReg RetClass,
VReg Op1Class,
DAGOperand Op2Class,
LMULInfo MInfo,
string Constraint = ""> {
let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
- Constraint>;
+ def "_" # MInfo.MX : VPseudoBinaryMOutNoMask<RetClass, Op1Class, Op2Class,
+ Constraint>;
let ForceTailAgnostic = true in
def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMOutMask<RetClass, Op1Class,
Op2Class, Constraint>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 3, /*HasTU*/ false>;
+ RISCVMaskedPseudo<MaskIdx=3>;
}
}
@@ -1943,15 +2030,15 @@ multiclass VPseudoBinaryEmul<VReg RetClass,
DAGOperand Op2Class,
LMULInfo lmul,
LMULInfo emul,
- string Constraint = ""> {
- let VLMul = lmul.value in {
- def "_" # lmul.MX # "_" # emul.MX : VPseudoBinaryNoMask<RetClass, Op1Class, Op2Class,
- Constraint>;
- def "_" # lmul.MX # "_" # emul.MX # "_TU": VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
- Constraint>;
- def "_" # lmul.MX # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class,
+ string Constraint = "",
+ int sew = 0> {
+ let VLMul = lmul.value, SEW=sew in {
+ defvar suffix = !if(sew, "_" # lmul.MX # "_E" # sew, "_" # lmul.MX);
+ def suffix # "_" # emul.MX : VPseudoBinaryNoMaskTU<RetClass, Op1Class, Op2Class,
+ Constraint>;
+ def suffix # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class,
Constraint>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 3>;
+ RISCVMaskedPseudo<MaskIdx=3>;
}
}
@@ -1967,37 +2054,67 @@ multiclass VPseudoTiedBinary<VReg RetClass,
}
}
-multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = ""> {
- defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint>;
+multiclass VPseudoTiedBinaryRoundingMode<VReg RetClass,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ string Constraint = ""> {
+ let VLMul = MInfo.value in {
+ def "_" # MInfo.MX # "_TIED":
+ VPseudoTiedBinaryNoMaskRoundingMode<RetClass, Op2Class, Constraint>;
+ def "_" # MInfo.MX # "_MASK_TIED" :
+ VPseudoTiedBinaryMaskRoundingMode<RetClass, Op2Class, Constraint>;
+ }
+}
+
+
+multiclass VPseudoBinaryV_VV<LMULInfo m, string Constraint = "", int sew = 0> {
+ defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew>;
+}
+
+multiclass VPseudoBinaryV_VV_RM<LMULInfo m, string Constraint = ""> {
+ defm _VV : VPseudoBinaryRoundingMode<m.vrclass, m.vrclass, m.vrclass, m, Constraint>;
}
// Similar to VPseudoBinaryV_VV, but uses MxListF.
-multiclass VPseudoBinaryFV_VV<LMULInfo m, string Constraint = ""> {
- defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint>;
+multiclass VPseudoBinaryFV_VV<LMULInfo m, string Constraint = "", int sew = 0> {
+ defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint, sew>;
+}
+
+multiclass VPseudoBinaryFV_VV_RM<LMULInfo m, string Constraint = "", int sew = 0> {
+ defm _VV : VPseudoBinaryRoundingMode<m.vrclass, m.vrclass, m.vrclass, m,
+ Constraint, sew,
+ UsesVXRM=0>;
}
multiclass VPseudoVGTR_VV_EEW<int eew, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVGatherV_MX = !cast<SchedWrite>("WriteVGatherV_" # mx);
- defvar ReadVGatherV_MX = !cast<SchedRead>("ReadVGatherV_" # mx);
-
foreach sew = EEWList in {
- defvar octuple_lmul = m.octuple;
+ defvar dataEMULOctuple = m.octuple;
// emul = lmul * eew / sew
- defvar octuple_emul = !srl(!mul(octuple_lmul, eew), log2<sew>.val);
- if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
- defvar emulMX = octuple_to_str<octuple_emul>.ret;
+ defvar idxEMULOctuple = !srl(!mul(dataEMULOctuple, eew), !logtwo(sew));
+ if !and(!ge(idxEMULOctuple, 1), !le(idxEMULOctuple, 64)) then {
+ defvar emulMX = octuple_to_str<idxEMULOctuple>.ret;
defvar emul = !cast<LMULInfo>("V_" # emulMX);
- defm _VV : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul, Constraint>,
- Sched<[WriteVGatherV_MX, ReadVGatherV_MX, ReadVGatherV_MX]>;
+ defvar sews = SchedSEWSet<mx>.val;
+ foreach e = sews in {
+ defvar WriteVRGatherVV_MX_E = !cast<SchedWrite>("WriteVRGatherVV_" # mx # "_E" # e);
+ defvar ReadVRGatherVV_data_MX_E = !cast<SchedRead>("ReadVRGatherVV_data_" # mx # "_E" # e);
+ defvar ReadVRGatherVV_index_MX_E = !cast<SchedRead>("ReadVRGatherVV_index_" # mx # "_E" # e);
+ defm _VV : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul, Constraint, e>,
+ Sched<[WriteVRGatherVV_MX_E, ReadVRGatherVV_data_MX_E, ReadVRGatherVV_index_MX_E]>;
+ }
}
}
}
}
-multiclass VPseudoBinaryV_VX<LMULInfo m, string Constraint = ""> {
- defm "_VX" : VPseudoBinary<m.vrclass, m.vrclass, GPR, m, Constraint>;
+multiclass VPseudoBinaryV_VX<LMULInfo m, string Constraint = "", int sew = 0> {
+ defm "_VX" : VPseudoBinary<m.vrclass, m.vrclass, GPR, m, Constraint, sew>;
+}
+
+multiclass VPseudoBinaryV_VX_RM<LMULInfo m, string Constraint = ""> {
+ defm "_VX" : VPseudoBinaryRoundingMode<m.vrclass, m.vrclass, GPR, m, Constraint>;
}
multiclass VPseudoVSLD1_VX<string Constraint = ""> {
@@ -2012,9 +2129,15 @@ multiclass VPseudoVSLD1_VX<string Constraint = ""> {
}
}
-multiclass VPseudoBinaryV_VF<LMULInfo m, FPR_Info f, string Constraint = ""> {
+multiclass VPseudoBinaryV_VF<LMULInfo m, FPR_Info f, string Constraint = "", int sew = 0> {
defm "_V" # f.FX : VPseudoBinary<m.vrclass, m.vrclass,
- f.fprclass, m, Constraint>;
+ f.fprclass, m, Constraint, sew>;
+}
+
+multiclass VPseudoBinaryV_VF_RM<LMULInfo m, FPR_Info f, string Constraint = "", int sew = 0> {
+ defm "_V" # f.FX : VPseudoBinaryRoundingMode<m.vrclass, m.vrclass,
+ f.fprclass, m, Constraint, sew,
+ UsesVXRM=0>;
}
multiclass VPseudoVSLD1_VF<string Constraint = ""> {
@@ -2036,6 +2159,10 @@ multiclass VPseudoBinaryV_VI<Operand ImmType = simm5, LMULInfo m, string Constra
defm _VI : VPseudoBinary<m.vrclass, m.vrclass, ImmType, m, Constraint>;
}
+multiclass VPseudoBinaryV_VI_RM<Operand ImmType = simm5, LMULInfo m, string Constraint = ""> {
+ defm _VI : VPseudoBinaryRoundingMode<m.vrclass, m.vrclass, ImmType, m, Constraint>;
+}
+
multiclass VPseudoVALU_MM {
foreach m = MxList in {
defvar mx = m.MX;
@@ -2043,7 +2170,7 @@ multiclass VPseudoVALU_MM {
defvar ReadVMALUV_MX = !cast<SchedRead>("ReadVMALUV_" # mx);
let VLMul = m.value in {
- def "_MM_" # mx : VPseudoBinaryNoMask<VR, VR, VR, "", /*DummyMask*/0>,
+ def "_MM_" # mx : VPseudoBinaryNoMask<VR, VR, VR, "">,
Sched<[WriteVMALUV_MX, ReadVMALUV_MX, ReadVMALUV_MX]>;
}
}
@@ -2061,6 +2188,11 @@ multiclass VPseudoBinaryW_VV<LMULInfo m> {
"@earlyclobber $rd">;
}
+multiclass VPseudoBinaryW_VV_RM<LMULInfo m> {
+ defm _VV : VPseudoBinaryRoundingMode<m.wvrclass, m.vrclass, m.vrclass, m,
+ "@earlyclobber $rd", UsesVXRM=0>;
+}
+
multiclass VPseudoBinaryW_VX<LMULInfo m> {
defm "_VX" : VPseudoBinary<m.wvrclass, m.vrclass, GPR, m,
"@earlyclobber $rd">;
@@ -2072,6 +2204,13 @@ multiclass VPseudoBinaryW_VF<LMULInfo m, FPR_Info f> {
"@earlyclobber $rd">;
}
+multiclass VPseudoBinaryW_VF_RM<LMULInfo m, FPR_Info f> {
+ defm "_V" # f.FX : VPseudoBinaryRoundingMode<m.wvrclass, m.vrclass,
+ f.fprclass, m,
+ "@earlyclobber $rd",
+ UsesVXRM=0>;
+}
+
multiclass VPseudoBinaryW_WV<LMULInfo m> {
defm _WV : VPseudoBinary<m.wvrclass, m.wvrclass, m.vrclass, m,
"@earlyclobber $rd">;
@@ -2079,6 +2218,13 @@ multiclass VPseudoBinaryW_WV<LMULInfo m> {
"@earlyclobber $rd">;
}
+multiclass VPseudoBinaryW_WV_RM<LMULInfo m> {
+ defm _WV : VPseudoBinaryRoundingMode<m.wvrclass, m.wvrclass, m.vrclass, m,
+ "@earlyclobber $rd", UsesVXRM=0>;
+ defm _WV : VPseudoTiedBinaryRoundingMode<m.wvrclass, m.vrclass, m,
+ "@earlyclobber $rd">;
+}
+
multiclass VPseudoBinaryW_WX<LMULInfo m> {
defm "_WX" : VPseudoBinary<m.wvrclass, m.wvrclass, GPR, m>;
}
@@ -2088,6 +2234,12 @@ multiclass VPseudoBinaryW_WF<LMULInfo m, FPR_Info f> {
f.fprclass, m>;
}
+multiclass VPseudoBinaryW_WF_RM<LMULInfo m, FPR_Info f> {
+ defm "_W" # f.FX : VPseudoBinaryRoundingMode<m.wvrclass, m.wvrclass,
+ f.fprclass, m,
+ UsesVXRM=0>;
+}
+
// Narrowing instructions like vnsrl/vnsra/vnclip(u) don't need @earlyclobber
// if the source and destination have an LMUL<=1. This matches this overlap
// exception from the spec.
@@ -2098,16 +2250,34 @@ multiclass VPseudoBinaryV_WV<LMULInfo m> {
!if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
}
+multiclass VPseudoBinaryV_WV_RM<LMULInfo m> {
+ defm _WV : VPseudoBinaryRoundingMode<m.vrclass, m.wvrclass, m.vrclass, m,
+ !if(!ge(m.octuple, 8),
+ "@earlyclobber $rd", "")>;
+}
+
multiclass VPseudoBinaryV_WX<LMULInfo m> {
defm _WX : VPseudoBinary<m.vrclass, m.wvrclass, GPR, m,
!if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
}
+multiclass VPseudoBinaryV_WX_RM<LMULInfo m> {
+ defm _WX : VPseudoBinaryRoundingMode<m.vrclass, m.wvrclass, GPR, m,
+ !if(!ge(m.octuple, 8),
+ "@earlyclobber $rd", "")>;
+}
+
multiclass VPseudoBinaryV_WI<LMULInfo m> {
defm _WI : VPseudoBinary<m.vrclass, m.wvrclass, uimm5, m,
!if(!ge(m.octuple, 8), "@earlyclobber $rd", "")>;
}
+multiclass VPseudoBinaryV_WI_RM<LMULInfo m> {
+ defm _WI : VPseudoBinaryRoundingMode<m.vrclass, m.wvrclass, uimm5, m,
+ !if(!ge(m.octuple, 8),
+ "@earlyclobber $rd", "")>;
+}
+
// For vadc and vsbc, the instruction encoding is reserved if the destination
// vector register is v0.
// For vadc and vsbc, CarryIn == 1 and CarryOut == 0
@@ -2120,13 +2290,10 @@ multiclass VPseudoBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, m.vrclass, m, CarryIn, Constraint>;
}
-multiclass VPseudoTiedBinaryV_VM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
- string Constraint = ""> {
- def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU" :
- VPseudoTiedBinaryCarryIn<!if(CarryOut, VR,
- !if(!and(CarryIn, !not(CarryOut)),
- GetVRegNoV0<m.vrclass>.R, m.vrclass)),
- m.vrclass, m.vrclass, m, CarryIn, Constraint>;
+multiclass VPseudoTiedBinaryV_VM<LMULInfo m> {
+ def "_VVM" # "_" # m.MX:
+ VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ m.vrclass, m.vrclass, m, 1, "">;
}
multiclass VPseudoBinaryV_XM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
@@ -2138,13 +2305,10 @@ multiclass VPseudoBinaryV_XM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, GPR, m, CarryIn, Constraint>;
}
-multiclass VPseudoTiedBinaryV_XM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
- string Constraint = ""> {
- def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU":
- VPseudoTiedBinaryCarryIn<!if(CarryOut, VR,
- !if(!and(CarryIn, !not(CarryOut)),
- GetVRegNoV0<m.vrclass>.R, m.vrclass)),
- m.vrclass, GPR, m, CarryIn, Constraint>;
+multiclass VPseudoTiedBinaryV_XM<LMULInfo m> {
+ def "_VXM" # "_" # m.MX:
+ VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ m.vrclass, GPR, m, 1, "">;
}
multiclass VPseudoVMRG_FM {
@@ -2155,14 +2319,9 @@ multiclass VPseudoVMRG_FM {
defvar ReadVFMergeV_MX = !cast<SchedRead>("ReadVFMergeV_" # mx);
defvar ReadVFMergeF_MX = !cast<SchedRead>("ReadVFMergeF_" # mx);
- def "_V" # f.FX # "M_" # mx :
- VPseudoBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">,
- Sched<[WriteVFMergeV_MX, ReadVFMergeV_MX, ReadVFMergeF_MX, ReadVMask]>;
- // Tied version to allow codegen control over the tail elements
- def "_V" # f.FX # "M_" # mx # "_TU":
+ def "_V" # f.FX # "M_" # mx:
VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
- m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">,
+ m.vrclass, f.fprclass, m, CarryIn=1, Constraint="">,
Sched<[WriteVFMergeV_MX, ReadVFMergeV_MX, ReadVFMergeF_MX, ReadVMask]>;
}
}
@@ -2177,13 +2336,10 @@ multiclass VPseudoBinaryV_IM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
m.vrclass, simm5, m, CarryIn, Constraint>;
}
-multiclass VPseudoTiedBinaryV_IM<LMULInfo m, bit CarryOut = 0, bit CarryIn = 1,
- string Constraint = ""> {
- def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU":
- VPseudoTiedBinaryCarryIn<!if(CarryOut, VR,
- !if(!and(CarryIn, !not(CarryOut)),
- GetVRegNoV0<m.vrclass>.R, m.vrclass)),
- m.vrclass, simm5, m, CarryIn, Constraint>;
+multiclass VPseudoTiedBinaryV_IM<LMULInfo m> {
+ def "_VIM" # "_" # m.MX:
+ VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ m.vrclass, simm5, m, 1, "">;
}
multiclass VPseudoUnaryVMV_V_X_I {
@@ -2197,17 +2353,11 @@ multiclass VPseudoUnaryVMV_V_X_I {
defvar ReadVIMovX_MX = !cast<SchedRead>("ReadVIMovX_" # mx);
let VLMul = m.value in {
- def "_V_" # mx : VPseudoUnaryNoDummyMask<m.vrclass, m.vrclass>,
+ def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
Sched<[WriteVIMovV_MX, ReadVIMovV_MX]>;
- def "_X_" # mx : VPseudoUnaryNoDummyMask<m.vrclass, GPR>,
+ def "_X_" # mx : VPseudoUnaryNoMask<m.vrclass, GPR>,
Sched<[WriteVIMovX_MX, ReadVIMovX_MX]>;
- def "_I_" # mx : VPseudoUnaryNoDummyMask<m.vrclass, simm5>,
- Sched<[WriteVIMovI_MX]>;
- def "_V_" # mx # "_TU": VPseudoUnaryNoDummyMaskTU<m.vrclass, m.vrclass>,
- Sched<[WriteVIMovV_MX, ReadVIMovV_MX]>;
- def "_X_" # mx # "_TU": VPseudoUnaryNoDummyMaskTU<m.vrclass, GPR>,
- Sched<[WriteVIMovX_MX, ReadVIMovX_MX]>;
- def "_I_" # mx # "_TU": VPseudoUnaryNoDummyMaskTU<m.vrclass, simm5>,
+ def "_I_" # mx : VPseudoUnaryNoMask<m.vrclass, simm5>,
Sched<[WriteVIMovI_MX]>;
}
}
@@ -2223,10 +2373,7 @@ multiclass VPseudoVMV_F {
let VLMul = m.value in {
def "_" # f.FX # "_" # mx :
- VPseudoUnaryNoDummyMask<m.vrclass, f.fprclass>,
- Sched<[WriteVFMovV_MX, ReadVFMovF_MX]>;
- def "_" # f.FX # "_" # mx # "_TU":
- VPseudoUnaryNoDummyMaskTU<m.vrclass, f.fprclass>,
+ VPseudoUnaryNoMask<m.vrclass, f.fprclass>,
Sched<[WriteVFMovV_MX, ReadVFMovF_MX]>;
}
}
@@ -2242,46 +2389,64 @@ multiclass VPseudoVCLS_V {
let VLMul = m.value in {
def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
Sched<[WriteVFClassV_MX, ReadVFClassV_MX, ReadVMask]>;
- def "_V_" # mx # "_TU": VPseudoUnaryNoMaskTU<m.vrclass, m.vrclass>,
- Sched<[WriteVFClassV_MX, ReadVFClassV_MX, ReadVMask]>;
- def "_V_" # mx # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 2>,
+ def "_V_" # mx # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx=2>,
Sched<[WriteVFClassV_MX, ReadVFClassV_MX, ReadVMask]>;
}
}
}
-multiclass VPseudoVSQR_V {
+multiclass VPseudoVSQR_V_RM {
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ defvar sews = SchedSEWSet<m.MX, isF=1>.val;
+
+ let VLMul = m.value in
+ foreach e = sews in {
+ defvar suffix = "_" # mx # "_E" # e;
+ defvar WriteVFSqrtV_MX_E = !cast<SchedWrite>("WriteVFSqrtV" # suffix);
+ defvar ReadVFSqrtV_MX_E = !cast<SchedRead>("ReadVFSqrtV" # suffix);
+
+ let SEW = e in {
+ def "_V" # suffix : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
+ Sched<[WriteVFSqrtV_MX_E, ReadVFSqrtV_MX_E,
+ ReadVMask]>;
+ def "_V" # suffix # "_MASK" : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx=2>,
+ Sched<[WriteVFSqrtV_MX_E, ReadVFSqrtV_MX_E,
+ ReadVMask]>;
+ }
+ }
+ }
+}
+
+multiclass VPseudoVRCP_V {
foreach m = MxListF in {
defvar mx = m.MX;
- defvar WriteVFSqrtV_MX = !cast<SchedWrite>("WriteVFSqrtV_" # mx);
- defvar ReadVFSqrtV_MX = !cast<SchedRead>("ReadVFSqrtV_" # mx);
+ defvar WriteVFRecpV_MX = !cast<SchedWrite>("WriteVFRecpV_" # mx);
+ defvar ReadVFRecpV_MX = !cast<SchedRead>("ReadVFRecpV_" # mx);
let VLMul = m.value in {
def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
- Sched<[WriteVFSqrtV_MX, ReadVFSqrtV_MX, ReadVMask]>;
- def "_V_" # mx # "_TU": VPseudoUnaryNoMaskTU<m.vrclass, m.vrclass>,
- Sched<[WriteVFSqrtV_MX, ReadVFSqrtV_MX, ReadVMask]>;
- def "_V_" # mx # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 2>,
- Sched<[WriteVFSqrtV_MX, ReadVFSqrtV_MX, ReadVMask]>;
+ Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>;
+ def "_V_" # mx # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx=2>,
+ Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>;
}
}
}
-multiclass VPseudoVRCP_V {
+multiclass VPseudoVRCP_V_RM {
foreach m = MxListF in {
defvar mx = m.MX;
defvar WriteVFRecpV_MX = !cast<SchedWrite>("WriteVFRecpV_" # mx);
defvar ReadVFRecpV_MX = !cast<SchedRead>("ReadVFRecpV_" # mx);
let VLMul = m.value in {
- def "_V_" # mx : VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
+ def "_V_" # mx : VPseudoUnaryNoMaskRoundingMode<m.vrclass, m.vrclass>,
Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>;
- def "_V_" # mx # "_TU": VPseudoUnaryNoMaskTU<m.vrclass, m.vrclass>,
- Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>;
- def "_V_" # mx # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 2>,
+ def "_V_" # mx # "_MASK" : VPseudoUnaryMaskRoundingMode<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx=2>,
Sched<[WriteVFRecpV_MX, ReadVFRecpV_MX, ReadVMask]>;
}
}
@@ -2289,8 +2454,7 @@ multiclass VPseudoVRCP_V {
multiclass PseudoVEXT_VF2 {
defvar constraints = "@earlyclobber $rd";
- foreach m = MxListVF2 in
- {
+ foreach m = MxListVF2 in {
defvar mx = m.MX;
defvar WriteVExtV_MX = !cast<SchedWrite>("WriteVExtV_" # mx);
defvar ReadVExtV_MX = !cast<SchedRead>("ReadVExtV_" # mx);
@@ -2298,11 +2462,9 @@ multiclass PseudoVEXT_VF2 {
let VLMul = m.value in {
def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>,
Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
- def "_" # mx # "_TU": VPseudoUnaryNoMaskTU<m.vrclass, m.f2vrclass, constraints>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
def "_" # mx # "_MASK" :
- VPseudoUnaryMaskTA<m.vrclass, m.f2vrclass, constraints>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 2>,
+ VPseudoUnaryMask<m.vrclass, m.f2vrclass, constraints>,
+ RISCVMaskedPseudo<MaskIdx=2>,
Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
}
}
@@ -2310,8 +2472,7 @@ multiclass PseudoVEXT_VF2 {
multiclass PseudoVEXT_VF4 {
defvar constraints = "@earlyclobber $rd";
- foreach m = MxListVF4 in
- {
+ foreach m = MxListVF4 in {
defvar mx = m.MX;
defvar WriteVExtV_MX = !cast<SchedWrite>("WriteVExtV_" # mx);
defvar ReadVExtV_MX = !cast<SchedRead>("ReadVExtV_" # mx);
@@ -2319,11 +2480,9 @@ multiclass PseudoVEXT_VF4 {
let VLMul = m.value in {
def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>,
Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
- def "_" # mx # "_TU": VPseudoUnaryNoMaskTU<m.vrclass, m.f4vrclass, constraints>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
def "_" # mx # "_MASK" :
- VPseudoUnaryMaskTA<m.vrclass, m.f4vrclass, constraints>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 2>,
+ VPseudoUnaryMask<m.vrclass, m.f4vrclass, constraints>,
+ RISCVMaskedPseudo<MaskIdx=2>,
Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
}
}
@@ -2331,8 +2490,7 @@ multiclass PseudoVEXT_VF4 {
multiclass PseudoVEXT_VF8 {
defvar constraints = "@earlyclobber $rd";
- foreach m = MxListVF8 in
- {
+ foreach m = MxListVF8 in {
defvar mx = m.MX;
defvar WriteVExtV_MX = !cast<SchedWrite>("WriteVExtV_" # mx);
defvar ReadVExtV_MX = !cast<SchedRead>("ReadVExtV_" # mx);
@@ -2340,11 +2498,9 @@ multiclass PseudoVEXT_VF8 {
let VLMul = m.value in {
def "_" # mx : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>,
Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
- def "_" # mx # "_TU": VPseudoUnaryNoMaskTU<m.vrclass, m.f8vrclass, constraints>,
- Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
def "_" # mx # "_MASK" :
- VPseudoUnaryMaskTA<m.vrclass, m.f8vrclass, constraints>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 2>,
+ VPseudoUnaryMask<m.vrclass, m.f8vrclass, constraints>,
+ RISCVMaskedPseudo<MaskIdx=2>,
Sched<[WriteVExtV_MX, ReadVExtV_MX, ReadVMask]>;
}
}
@@ -2386,18 +2542,27 @@ multiclass VPseudoBinaryM_VI<LMULInfo m> {
multiclass VPseudoVGTR_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVGatherV_MX = !cast<SchedWrite>("WriteVGatherV_" # mx);
- defvar WriteVGatherX_MX = !cast<SchedWrite>("WriteVGatherX_" # mx);
- defvar WriteVGatherI_MX = !cast<SchedWrite>("WriteVGatherI_" # mx);
- defvar ReadVGatherV_MX = !cast<SchedRead>("ReadVGatherV_" # mx);
- defvar ReadVGatherX_MX = !cast<SchedRead>("ReadVGatherX_" # mx);
+ defvar WriteVRGatherVX_MX = !cast<SchedWrite>("WriteVRGatherVX_" # mx);
+ defvar WriteVRGatherVI_MX = !cast<SchedWrite>("WriteVRGatherVI_" # mx);
+ defvar ReadVRGatherVX_data_MX = !cast<SchedRead>("ReadVRGatherVX_data_" # mx);
+ defvar ReadVRGatherVX_index_MX = !cast<SchedRead>("ReadVRGatherVX_index_" # mx);
+ defvar ReadVRGatherVI_data_MX = !cast<SchedRead>("ReadVRGatherVI_data_" # mx);
- defm "" : VPseudoBinaryV_VV<m, Constraint>,
- Sched<[WriteVGatherV_MX, ReadVGatherV_MX, ReadVGatherV_MX, ReadVMask]>;
defm "" : VPseudoBinaryV_VX<m, Constraint>,
- Sched<[WriteVGatherX_MX, ReadVGatherV_MX, ReadVGatherX_MX, ReadVMask]>;
+ Sched<[WriteVRGatherVX_MX, ReadVRGatherVX_data_MX,
+ ReadVRGatherVX_index_MX, ReadVMask]>;
defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>,
- Sched<[WriteVGatherI_MX, ReadVGatherV_MX, ReadVMask]>;
+ Sched<[WriteVRGatherVI_MX, ReadVRGatherVI_data_MX, ReadVMask]>;
+
+ defvar sews = SchedSEWSet<mx>.val;
+ foreach e = sews in {
+ defvar WriteVRGatherVV_MX_E = !cast<SchedWrite>("WriteVRGatherVV_" # mx # "_E" # e);
+ defvar ReadVRGatherVV_data_MX_E = !cast<SchedRead>("ReadVRGatherVV_data_" # mx # "_E" # e);
+ defvar ReadVRGatherVV_index_MX_E = !cast<SchedRead>("ReadVRGatherVV_index_" # mx # "_E" # e);
+ defm "" : VPseudoBinaryV_VV<m, Constraint, e>,
+ Sched<[WriteVRGatherVV_MX_E, ReadVRGatherVV_data_MX_E,
+ ReadVRGatherVV_index_MX_E, ReadVMask]>;
+ }
}
}
@@ -2438,7 +2603,7 @@ multiclass VPseudoVSHT_VV_VX_VI<Operand ImmType = simm5, string Constraint = "">
}
}
-multiclass VPseudoVSSHT_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
+multiclass VPseudoVSSHT_VV_VX_VI_RM<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
defvar WriteVSShiftV_MX = !cast<SchedWrite>("WriteVSShiftV_" # mx);
@@ -2447,11 +2612,11 @@ multiclass VPseudoVSSHT_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""
defvar ReadVSShiftV_MX = !cast<SchedRead>("ReadVSShiftV_" # mx);
defvar ReadVSShiftX_MX = !cast<SchedRead>("ReadVSShiftX_" # mx);
- defm "" : VPseudoBinaryV_VV<m, Constraint>,
+ defm "" : VPseudoBinaryV_VV_RM<m, Constraint>,
Sched<[WriteVSShiftV_MX, ReadVSShiftV_MX, ReadVSShiftV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_VX<m, Constraint>,
+ defm "" : VPseudoBinaryV_VX_RM<m, Constraint>,
Sched<[WriteVSShiftX_MX, ReadVSShiftV_MX, ReadVSShiftX_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_VI<ImmType, m, Constraint>,
+ defm "" : VPseudoBinaryV_VI_RM<ImmType, m, Constraint>,
Sched<[WriteVSShiftI_MX, ReadVSShiftV_MX, ReadVMask]>;
}
}
@@ -2489,7 +2654,7 @@ multiclass VPseudoVSALU_VV_VX {
}
}
-multiclass VPseudoVSMUL_VV_VX {
+multiclass VPseudoVSMUL_VV_VX_RM {
foreach m = MxList in {
defvar mx = m.MX;
defvar WriteVSMulV_MX = !cast<SchedWrite>("WriteVSMulV_" # mx);
@@ -2497,14 +2662,14 @@ multiclass VPseudoVSMUL_VV_VX {
defvar ReadVSMulV_MX = !cast<SchedRead>("ReadVSMulV_" # mx);
defvar ReadVSMulX_MX = !cast<SchedRead>("ReadVSMulX_" # mx);
- defm "" : VPseudoBinaryV_VV<m>,
+ defm "" : VPseudoBinaryV_VV_RM<m>,
Sched<[WriteVSMulV_MX, ReadVSMulV_MX, ReadVSMulV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_VX<m>,
+ defm "" : VPseudoBinaryV_VX_RM<m>,
Sched<[WriteVSMulX_MX, ReadVSMulV_MX, ReadVSMulX_MX, ReadVMask]>;
}
}
-multiclass VPseudoVAALU_VV_VX {
+multiclass VPseudoVAALU_VV_VX_RM {
foreach m = MxList in {
defvar mx = m.MX;
defvar WriteVAALUV_MX = !cast<SchedWrite>("WriteVAALUV_" # mx);
@@ -2512,9 +2677,9 @@ multiclass VPseudoVAALU_VV_VX {
defvar ReadVAALUV_MX = !cast<SchedRead>("ReadVAALUV_" # mx);
defvar ReadVAALUX_MX = !cast<SchedRead>("ReadVAALUX_" # mx);
- defm "" : VPseudoBinaryV_VV<m>,
+ defm "" : VPseudoBinaryV_VV_RM<m>,
Sched<[WriteVAALUV_MX, ReadVAALUV_MX, ReadVAALUV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_VX<m>,
+ defm "" : VPseudoBinaryV_VX_RM<m>,
Sched<[WriteVAALUX_MX, ReadVAALUV_MX, ReadVAALUX_MX, ReadVMask]>;
}
}
@@ -2522,15 +2687,15 @@ multiclass VPseudoVAALU_VV_VX {
multiclass VPseudoVMINMAX_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVICmpV_MX = !cast<SchedWrite>("WriteVICmpV_" # mx);
- defvar WriteVICmpX_MX = !cast<SchedWrite>("WriteVICmpX_" # mx);
- defvar ReadVICmpV_MX = !cast<SchedRead>("ReadVICmpV_" # mx);
- defvar ReadVICmpX_MX = !cast<SchedRead>("ReadVICmpX_" # mx);
+ defvar WriteVIMinMaxV_MX = !cast<SchedWrite>("WriteVIMinMaxV_" # mx);
+ defvar WriteVIMinMaxX_MX = !cast<SchedWrite>("WriteVIMinMaxX_" # mx);
+ defvar ReadVIMinMaxV_MX = !cast<SchedRead>("ReadVIMinMaxV_" # mx);
+ defvar ReadVIMinMaxX_MX = !cast<SchedRead>("ReadVIMinMaxX_" # mx);
defm "" : VPseudoBinaryV_VV<m>,
- Sched<[WriteVICmpV_MX, ReadVICmpV_MX, ReadVICmpV_MX, ReadVMask]>;
+ Sched<[WriteVIMinMaxV_MX, ReadVIMinMaxV_MX, ReadVIMinMaxV_MX, ReadVMask]>;
defm "" : VPseudoBinaryV_VX<m>,
- Sched<[WriteVICmpX_MX, ReadVICmpV_MX, ReadVICmpX_MX, ReadVMask]>;
+ Sched<[WriteVIMinMaxX_MX, ReadVIMinMaxV_MX, ReadVIMinMaxX_MX, ReadVMask]>;
}
}
@@ -2552,25 +2717,28 @@ multiclass VPseudoVMUL_VV_VX {
multiclass VPseudoVDIV_VV_VX {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIDivV_MX = !cast<SchedWrite>("WriteVIDivV_" # mx);
- defvar WriteVIDivX_MX = !cast<SchedWrite>("WriteVIDivX_" # mx);
- defvar ReadVIDivV_MX = !cast<SchedRead>("ReadVIDivV_" # mx);
- defvar ReadVIDivX_MX = !cast<SchedRead>("ReadVIDivX_" # mx);
-
- defm "" : VPseudoBinaryV_VV<m>,
- Sched<[WriteVIDivV_MX, ReadVIDivV_MX, ReadVIDivV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_VX<m>,
- Sched<[WriteVIDivX_MX, ReadVIDivV_MX, ReadVIDivX_MX, ReadVMask]>;
+ defvar sews = SchedSEWSet<mx>.val;
+ foreach e = sews in {
+ defvar WriteVIDivV_MX_E = !cast<SchedWrite>("WriteVIDivV_" # mx # "_E" # e);
+ defvar WriteVIDivX_MX_E = !cast<SchedWrite>("WriteVIDivX_" # mx # "_E" # e);
+ defvar ReadVIDivV_MX_E = !cast<SchedRead>("ReadVIDivV_" # mx # "_E" # e);
+ defvar ReadVIDivX_MX_E = !cast<SchedRead>("ReadVIDivX_" # mx # "_E" # e);
+
+ defm "" : VPseudoBinaryV_VV<m, "", e>,
+ Sched<[WriteVIDivV_MX_E, ReadVIDivV_MX_E, ReadVIDivV_MX_E, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VX<m, "", e>,
+ Sched<[WriteVIDivX_MX_E, ReadVIDivV_MX_E, ReadVIDivX_MX_E, ReadVMask]>;
+ }
}
}
-multiclass VPseudoVFMUL_VV_VF {
+multiclass VPseudoVFMUL_VV_VF_RM {
foreach m = MxListF in {
defvar mx = m.MX;
defvar WriteVFMulV_MX = !cast<SchedWrite>("WriteVFMulV_" # mx);
defvar ReadVFMulV_MX = !cast<SchedRead>("ReadVFMulV_" # mx);
- defm "" : VPseudoBinaryFV_VV<m>,
+ defm "" : VPseudoBinaryFV_VV_RM<m>,
Sched<[WriteVFMulV_MX, ReadVFMulV_MX, ReadVFMulV_MX, ReadVMask]>;
}
@@ -2581,44 +2749,48 @@ multiclass VPseudoVFMUL_VV_VF {
defvar ReadVFMulV_MX = !cast<SchedRead>("ReadVFMulV_" # mx);
defvar ReadVFMulF_MX = !cast<SchedRead>("ReadVFMulF_" # mx);
- defm "" : VPseudoBinaryV_VF<m, f>,
+ defm "" : VPseudoBinaryV_VF_RM<m, f>,
Sched<[WriteVFMulF_MX, ReadVFMulV_MX, ReadVFMulF_MX, ReadVMask]>;
}
}
}
-multiclass VPseudoVFDIV_VV_VF {
+multiclass VPseudoVFDIV_VV_VF_RM {
foreach m = MxListF in {
defvar mx = m.MX;
- defvar WriteVFDivV_MX = !cast<SchedWrite>("WriteVFDivV_" # mx);
- defvar ReadVFDivV_MX = !cast<SchedRead>("ReadVFDivV_" # mx);
+ defvar sews = SchedSEWSet<mx, isF=1>.val;
+ foreach e = sews in {
+ defvar WriteVFDivV_MX_E = !cast<SchedWrite>("WriteVFDivV_" # mx # "_E" # e);
+ defvar ReadVFDivV_MX_E = !cast<SchedRead>("ReadVFDivV_" # mx # "_E" # e);
- defm "" : VPseudoBinaryFV_VV<m>,
- Sched<[WriteVFDivV_MX, ReadVFDivV_MX, ReadVFDivV_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryFV_VV_RM<m, "", e>,
+ Sched<[WriteVFDivV_MX_E, ReadVFDivV_MX_E, ReadVFDivV_MX_E, ReadVMask]>;
+ }
}
foreach f = FPList in {
foreach m = f.MxList in {
defvar mx = m.MX;
- defvar WriteVFDivF_MX = !cast<SchedWrite>("WriteVFDivF_" # mx);
- defvar ReadVFDivV_MX = !cast<SchedRead>("ReadVFDivV_" # mx);
- defvar ReadVFDivF_MX = !cast<SchedRead>("ReadVFDivF_" # mx);
+ defvar WriteVFDivF_MX_E = !cast<SchedWrite>("WriteVFDivF_" # mx # "_E" # f.SEW);
+ defvar ReadVFDivV_MX_E = !cast<SchedRead>("ReadVFDivV_" # mx # "_E" # f.SEW);
+ defvar ReadVFDivF_MX_E = !cast<SchedRead>("ReadVFDivF_" # mx # "_E" # f.SEW);
- defm "" : VPseudoBinaryV_VF<m, f>,
- Sched<[WriteVFDivF_MX, ReadVFDivV_MX, ReadVFDivF_MX, ReadVMask]>;
+ defm "" : VPseudoBinaryV_VF_RM<m, f, "", f.SEW>,
+ Sched<[WriteVFDivF_MX_E, ReadVFDivV_MX_E, ReadVFDivF_MX_E, ReadVMask]>;
}
}
}
-multiclass VPseudoVFRDIV_VF {
+multiclass VPseudoVFRDIV_VF_RM {
foreach f = FPList in {
foreach m = f.MxList in {
defvar mx = m.MX;
- defvar WriteVFDivF_MX = !cast<SchedWrite>("WriteVFDivF_" # mx);
- defvar ReadVFDivV_MX = !cast<SchedRead>("ReadVFDivV_" # mx);
- defvar ReadVFDivF_MX = !cast<SchedRead>("ReadVFDivF_" # mx);
- defm "" : VPseudoBinaryV_VF<m, f>,
- Sched<[WriteVFDivF_MX, ReadVFDivV_MX, ReadVFDivF_MX, ReadVMask]>;
+ defvar WriteVFDivF_MX_E = !cast<SchedWrite>("WriteVFDivF_" # mx # "_E" # f.SEW);
+ defvar ReadVFDivV_MX_E = !cast<SchedRead>("ReadVFDivV_" # mx # "_E" # f.SEW);
+ defvar ReadVFDivF_MX_E = !cast<SchedRead>("ReadVFDivF_" # mx # "_E" # f.SEW);
+
+ defm "" : VPseudoBinaryV_VF_RM<m, f, "", f.SEW>,
+ Sched<[WriteVFDivF_MX_E, ReadVFDivV_MX_E, ReadVFDivF_MX_E, ReadVMask]>;
}
}
}
@@ -2664,22 +2836,22 @@ multiclass VPseudoVSGNJ_VV_VF {
multiclass VPseudoVMAX_VV_VF {
foreach m = MxListF in {
defvar mx = m.MX;
- defvar WriteVFCmpV_MX = !cast<SchedWrite>("WriteVFCmpV_" # mx);
- defvar ReadVFCmpV_MX = !cast<SchedRead>("ReadVFCmpV_" # mx);
+ defvar WriteVFMinMaxV_MX = !cast<SchedWrite>("WriteVFMinMaxV_" # mx);
+ defvar ReadVFMinMaxV_MX = !cast<SchedRead>("ReadVFMinMaxV_" # mx);
defm "" : VPseudoBinaryFV_VV<m>,
- Sched<[WriteVFCmpV_MX, ReadVFCmpV_MX, ReadVFCmpV_MX, ReadVMask]>;
+ Sched<[WriteVFMinMaxV_MX, ReadVFMinMaxV_MX, ReadVFMinMaxV_MX, ReadVMask]>;
}
foreach f = FPList in {
foreach m = f.MxList in {
defvar mx = m.MX;
- defvar WriteVFCmpF_MX = !cast<SchedWrite>("WriteVFCmpF_" # mx);
- defvar ReadVFCmpV_MX = !cast<SchedRead>("ReadVFCmpV_" # mx);
- defvar ReadVFCmpF_MX = !cast<SchedRead>("ReadVFCmpF_" # mx);
+ defvar WriteVFMinMaxF_MX = !cast<SchedWrite>("WriteVFMinMaxF_" # mx);
+ defvar ReadVFMinMaxV_MX = !cast<SchedRead>("ReadVFMinMaxV_" # mx);
+ defvar ReadVFMinMaxF_MX = !cast<SchedRead>("ReadVFMinMaxF_" # mx);
defm "" : VPseudoBinaryV_VF<m, f>,
- Sched<[WriteVFCmpF_MX, ReadVFCmpV_MX, ReadVFCmpF_MX, ReadVMask]>;
+ Sched<[WriteVFMinMaxF_MX, ReadVFMinMaxV_MX, ReadVFMinMaxF_MX, ReadVMask]>;
}
}
}
@@ -2706,6 +2878,28 @@ multiclass VPseudoVALU_VV_VF {
}
}
+multiclass VPseudoVALU_VV_VF_RM {
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ defvar WriteVFALUV_MX = !cast<SchedWrite>("WriteVFALUV_" # mx);
+ defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
+
+ defm "" : VPseudoBinaryFV_VV_RM<m>,
+ Sched<[WriteVFALUV_MX, ReadVFALUV_MX, ReadVFALUV_MX, ReadVMask]>;
+ }
+
+ foreach f = FPList in {
+ foreach m = f.MxList in {
+ defvar mx = m.MX;
+ defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx);
+ defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
+ defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx);
+ defm "" : VPseudoBinaryV_VF_RM<m, f>,
+ Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>;
+ }
+ }
+}
+
multiclass VPseudoVALU_VF {
foreach f = FPList in {
foreach m = f.MxList in {
@@ -2720,6 +2914,20 @@ multiclass VPseudoVALU_VF {
}
}
+multiclass VPseudoVALU_VF_RM {
+ foreach f = FPList in {
+ foreach m = f.MxList in {
+ defvar mx = m.MX;
+ defvar WriteVFALUF_MX = !cast<SchedWrite>("WriteVFALUF_" # mx);
+ defvar ReadVFALUV_MX = !cast<SchedRead>("ReadVFALUV_" # mx);
+ defvar ReadVFALUF_MX = !cast<SchedRead>("ReadVFALUF_" # mx);
+
+ defm "" : VPseudoBinaryV_VF_RM<m, f>,
+ Sched<[WriteVFALUF_MX, ReadVFALUV_MX, ReadVFALUF_MX, ReadVMask]>;
+ }
+ }
+}
+
multiclass VPseudoVALU_VX_VI<Operand ImmType = simm5> {
foreach m = MxList in {
defvar mx = m.MX;
@@ -2765,13 +2973,13 @@ multiclass VPseudoVWMUL_VV_VX {
}
}
-multiclass VPseudoVWMUL_VV_VF {
+multiclass VPseudoVWMUL_VV_VF_RM {
foreach m = MxListFW in {
defvar mx = m.MX;
defvar WriteVFWMulV_MX = !cast<SchedWrite>("WriteVFWMulV_" # mx);
defvar ReadVFWMulV_MX = !cast<SchedRead>("ReadVFWMulV_" # mx);
- defm "" : VPseudoBinaryW_VV<m>,
+ defm "" : VPseudoBinaryW_VV_RM<m>,
Sched<[WriteVFWMulV_MX, ReadVFWMulV_MX, ReadVFWMulV_MX, ReadVMask]>;
}
@@ -2782,7 +2990,7 @@ multiclass VPseudoVWMUL_VV_VF {
defvar ReadVFWMulV_MX = !cast<SchedRead>("ReadVFWMulV_" # mx);
defvar ReadVFWMulF_MX = !cast<SchedRead>("ReadVFWMulF_" # mx);
- defm "" : VPseudoBinaryW_VF<m, f>,
+ defm "" : VPseudoBinaryW_VF_RM<m, f>,
Sched<[WriteVFWMulF_MX, ReadVFWMulV_MX, ReadVFWMulF_MX, ReadVMask]>;
}
}
@@ -2803,13 +3011,13 @@ multiclass VPseudoVWALU_WV_WX {
}
}
-multiclass VPseudoVFWALU_VV_VF {
+multiclass VPseudoVFWALU_VV_VF_RM {
foreach m = MxListFW in {
defvar mx = m.MX;
defvar WriteVFWALUV_MX = !cast<SchedWrite>("WriteVFWALUV_" # mx);
defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx);
- defm "" : VPseudoBinaryW_VV<m>,
+ defm "" : VPseudoBinaryW_VV_RM<m>,
Sched<[WriteVFWALUV_MX, ReadVFWALUV_MX, ReadVFWALUV_MX, ReadVMask]>;
}
@@ -2820,19 +3028,19 @@ multiclass VPseudoVFWALU_VV_VF {
defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx);
defvar ReadVFWALUF_MX = !cast<SchedRead>("ReadVFWALUF_" # mx);
- defm "" : VPseudoBinaryW_VF<m, f>,
+ defm "" : VPseudoBinaryW_VF_RM<m, f>,
Sched<[WriteVFWALUF_MX, ReadVFWALUV_MX, ReadVFWALUF_MX, ReadVMask]>;
}
}
}
-multiclass VPseudoVFWALU_WV_WF {
+multiclass VPseudoVFWALU_WV_WF_RM {
foreach m = MxListFW in {
defvar mx = m.MX;
defvar WriteVFWALUV_MX = !cast<SchedWrite>("WriteVFWALUV_" # mx);
defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx);
- defm "" : VPseudoBinaryW_WV<m>,
+ defm "" : VPseudoBinaryW_WV_RM<m>,
Sched<[WriteVFWALUV_MX, ReadVFWALUV_MX, ReadVFWALUV_MX, ReadVMask]>;
}
foreach f = FPListW in {
@@ -2842,7 +3050,7 @@ multiclass VPseudoVFWALU_WV_WF {
defvar ReadVFWALUV_MX = !cast<SchedRead>("ReadVFWALUV_" # mx);
defvar ReadVFWALUF_MX = !cast<SchedRead>("ReadVFWALUF_" # mx);
- defm "" : VPseudoBinaryW_WF<m, f>,
+ defm "" : VPseudoBinaryW_WF_RM<m, f>,
Sched<[WriteVFWALUF_MX, ReadVFWALUV_MX, ReadVFWALUF_MX, ReadVMask]>;
}
}
@@ -2857,19 +3065,18 @@ multiclass VPseudoVMRG_VM_XM_IM {
defvar ReadVIMergeV_MX = !cast<SchedRead>("ReadVIMergeV_" # mx);
defvar ReadVIMergeX_MX = !cast<SchedRead>("ReadVIMergeX_" # mx);
- defm "" : VPseudoBinaryV_VM<m>,
- Sched<[WriteVIMergeV_MX, ReadVIMergeV_MX, ReadVIMergeV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_XM<m>,
- Sched<[WriteVIMergeX_MX, ReadVIMergeV_MX, ReadVIMergeX_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_IM<m>,
- Sched<[WriteVIMergeI_MX, ReadVIMergeV_MX, ReadVMask]>;
- // Tied versions to allow codegen control over the tail elements
- defm "" : VPseudoTiedBinaryV_VM<m>,
- Sched<[WriteVIMergeV_MX, ReadVIMergeV_MX, ReadVIMergeV_MX, ReadVMask]>;
- defm "" : VPseudoTiedBinaryV_XM<m>,
- Sched<[WriteVIMergeX_MX, ReadVIMergeV_MX, ReadVIMergeX_MX, ReadVMask]>;
- defm "" : VPseudoTiedBinaryV_IM<m>,
- Sched<[WriteVIMergeI_MX, ReadVIMergeV_MX, ReadVMask]>;
+ def "_VVM" # "_" # m.MX:
+ VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ m.vrclass, m.vrclass, m, 1, "">,
+ Sched<[WriteVIMergeV_MX, ReadVIMergeV_MX, ReadVIMergeV_MX, ReadVMask]>;
+ def "_VXM" # "_" # m.MX:
+ VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ m.vrclass, GPR, m, 1, "">,
+ Sched<[WriteVIMergeX_MX, ReadVIMergeV_MX, ReadVIMergeX_MX, ReadVMask]>;
+ def "_VIM" # "_" # m.MX:
+ VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+ m.vrclass, simm5, m, 1, "">,
+ Sched<[WriteVIMergeI_MX, ReadVIMergeV_MX, ReadVMask]>;
}
}
@@ -2882,13 +3089,6 @@ multiclass VPseudoVCALU_VM_XM_IM {
defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
- defm "" : VPseudoBinaryV_VM<m>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_XM<m>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_IM<m>,
- Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>;
- // Tied versions to allow codegen control over the tail elements
defm "" : VPseudoTiedBinaryV_VM<m>,
Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
defm "" : VPseudoTiedBinaryV_XM<m>,
@@ -2906,11 +3106,6 @@ multiclass VPseudoVCALU_VM_XM {
defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
- defm "" : VPseudoBinaryV_VM<m>,
- Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_XM<m>,
- Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>;
- // Tied versions to allow codegen control over the tail elements
defm "" : VPseudoTiedBinaryV_VM<m>,
Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
defm "" : VPseudoTiedBinaryV_XM<m>,
@@ -2927,11 +3122,11 @@ multiclass VPseudoVCALUM_VM_XM_IM<string Constraint> {
defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
- defm "" : VPseudoBinaryV_VM<m, /*CarryOut=*/1, /*CarryIn=*/1, Constraint>,
+ defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_XM<m, /*CarryOut=*/1, /*CarryIn=*/1, Constraint>,
+ defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_IM<m, /*CarryOut=*/1, /*CarryIn=*/1, Constraint>,
+ defm "" : VPseudoBinaryV_IM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
Sched<[WriteVICALUI_MX, ReadVICALUV_MX, ReadVMask]>;
}
}
@@ -2944,9 +3139,9 @@ multiclass VPseudoVCALUM_VM_XM<string Constraint> {
defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
- defm "" : VPseudoBinaryV_VM<m, /*CarryOut=*/1, /*CarryIn=*/1, Constraint>,
+ defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_XM<m, /*CarryOut=*/1, /*CarryIn=*/1, Constraint>,
+ defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=1, Constraint=Constraint>,
Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX, ReadVMask]>;
}
}
@@ -2960,11 +3155,11 @@ multiclass VPseudoVCALUM_V_X_I<string Constraint> {
defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
- defm "" : VPseudoBinaryV_VM<m, /*CarryOut=*/1, /*CarryIn=*/0, Constraint>,
+ defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX]>;
- defm "" : VPseudoBinaryV_XM<m, /*CarryOut=*/1, /*CarryIn=*/0, Constraint>,
+ defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX]>;
- defm "" : VPseudoBinaryV_IM<m, /*CarryOut=*/1, /*CarryIn=*/0, Constraint>,
+ defm "" : VPseudoBinaryV_IM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
Sched<[WriteVICALUI_MX, ReadVICALUV_MX]>;
}
}
@@ -2977,14 +3172,14 @@ multiclass VPseudoVCALUM_V_X<string Constraint> {
defvar ReadVICALUV_MX = !cast<SchedRead>("ReadVICALUV_" # mx);
defvar ReadVICALUX_MX = !cast<SchedRead>("ReadVICALUX_" # mx);
- defm "" : VPseudoBinaryV_VM<m, /*CarryOut=*/1, /*CarryIn=*/0, Constraint>,
+ defm "" : VPseudoBinaryV_VM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
Sched<[WriteVICALUV_MX, ReadVICALUV_MX, ReadVICALUV_MX]>;
- defm "" : VPseudoBinaryV_XM<m, /*CarryOut=*/1, /*CarryIn=*/0, Constraint>,
+ defm "" : VPseudoBinaryV_XM<m, CarryOut=1, CarryIn=0, Constraint=Constraint>,
Sched<[WriteVICALUX_MX, ReadVICALUV_MX, ReadVICALUX_MX]>;
}
}
-multiclass VPseudoVNCLP_WV_WX_WI {
+multiclass VPseudoVNCLP_WV_WX_WI_RM {
foreach m = MxListW in {
defvar mx = m.MX;
defvar WriteVNClipV_MX = !cast<SchedWrite>("WriteVNClipV_" # mx);
@@ -2993,11 +3188,11 @@ multiclass VPseudoVNCLP_WV_WX_WI {
defvar ReadVNClipV_MX = !cast<SchedRead>("ReadVNClipV_" # mx);
defvar ReadVNClipX_MX = !cast<SchedRead>("ReadVNClipX_" # mx);
- defm "" : VPseudoBinaryV_WV<m>,
+ defm "" : VPseudoBinaryV_WV_RM<m>,
Sched<[WriteVNClipV_MX, ReadVNClipV_MX, ReadVNClipV_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_WX<m>,
+ defm "" : VPseudoBinaryV_WX_RM<m>,
Sched<[WriteVNClipX_MX, ReadVNClipV_MX, ReadVNClipX_MX, ReadVMask]>;
- defm "" : VPseudoBinaryV_WI<m>,
+ defm "" : VPseudoBinaryV_WI_RM<m>,
Sched<[WriteVNClipI_MX, ReadVNClipV_MX, ReadVMask]>;
}
}
@@ -3020,27 +3215,37 @@ multiclass VPseudoVNSHT_WV_WX_WI {
}
}
-multiclass VPseudoTernary<VReg RetClass,
- RegisterClass Op1Class,
- DAGOperand Op2Class,
- LMULInfo MInfo,
- string Constraint = ""> {
+multiclass VPseudoTernaryWithTailPolicy<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ int sew,
+ string Constraint = "",
+ bit Commutable = 0> {
let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoTernaryNoMask<RetClass, Op1Class, Op2Class, Constraint>;
- def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMask<RetClass, Op1Class, Op2Class, Constraint>;
+ defvar mx = MInfo.MX;
+ let isCommutable = Commutable in
+ def "_" # mx # "_E" # sew : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint>;
+ def "_" # mx # "_E" # sew # "_MASK" : VPseudoTernaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>;
}
}
-multiclass VPseudoTernaryNoMaskNoPolicy<VReg RetClass,
- RegisterClass Op1Class,
- DAGOperand Op2Class,
- LMULInfo MInfo,
- string Constraint = ""> {
+multiclass VPseudoTernaryWithTailPolicyRoundingMode<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ int sew,
+ string Constraint = "",
+ bit Commutable = 0> {
let VLMul = MInfo.value in {
- def "_" # MInfo.MX : VPseudoTernaryNoMask<RetClass, Op1Class, Op2Class, Constraint>;
- def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class,
- Constraint>;
-
+ defvar mx = MInfo.MX;
+ let isCommutable = Commutable in
+ def "_" # mx # "_E" # sew
+ : VPseudoTernaryNoMaskWithPolicyRoundingMode<RetClass, Op1Class,
+ Op2Class, Constraint>;
+ def "_" # mx # "_E" # sew # "_MASK"
+ : VPseudoTernaryMaskPolicyRoundingMode<RetClass, Op1Class,
+ Op2Class, Constraint>;
}
}
@@ -3053,28 +3258,55 @@ multiclass VPseudoTernaryWithPolicy<VReg RetClass,
let VLMul = MInfo.value in {
let isCommutable = Commutable in
def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint>;
- def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>,
+ RISCVMaskedPseudo<MaskIdx=3>;
+ }
+}
+
+multiclass VPseudoTernaryWithPolicyRoundingMode<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ string Constraint = "",
+ bit Commutable = 0> {
+ let VLMul = MInfo.value in {
+ let isCommutable = Commutable in
+ def "_" # MInfo.MX :
+ VPseudoTernaryNoMaskWithPolicyRoundingMode<RetClass, Op1Class,
+ Op2Class, Constraint>;
+ def "_" # MInfo.MX # "_MASK" :
+ VPseudoBinaryMaskPolicyRoundingMode<RetClass, Op1Class,
+ Op2Class, Constraint,
+ UsesVXRM_=0>,
+ RISCVMaskedPseudo<MaskIdx=3>;
}
}
multiclass VPseudoTernaryV_VV_AAXA<LMULInfo m, string Constraint = ""> {
defm _VV : VPseudoTernaryWithPolicy<m.vrclass, m.vrclass, m.vrclass, m,
- Constraint, /*Commutable*/1>;
+ Constraint, Commutable=1>;
}
-multiclass VPseudoVSLDV_VX<LMULInfo m, string Constraint = ""> {
- defm _VX : VPseudoTernaryWithPolicy<m.vrclass, m.vrclass, GPR, m, Constraint>;
+multiclass VPseudoTernaryV_VV_AAXA_RM<LMULInfo m, string Constraint = ""> {
+ defm _VV : VPseudoTernaryWithPolicyRoundingMode<m.vrclass, m.vrclass, m.vrclass, m,
+ Constraint, Commutable=1>;
}
multiclass VPseudoTernaryV_VX_AAXA<LMULInfo m, string Constraint = ""> {
defm "_VX" : VPseudoTernaryWithPolicy<m.vrclass, GPR, m.vrclass, m,
- Constraint, /*Commutable*/1>;
+ Constraint, Commutable=1>;
}
multiclass VPseudoTernaryV_VF_AAXA<LMULInfo m, FPR_Info f, string Constraint = ""> {
defm "_V" # f.FX : VPseudoTernaryWithPolicy<m.vrclass, f.fprclass,
m.vrclass, m, Constraint,
- /*Commutable*/1>;
+ Commutable=1>;
+}
+
+multiclass VPseudoTernaryV_VF_AAXA_RM<LMULInfo m, FPR_Info f, string Constraint = ""> {
+ defm "_V" # f.FX : VPseudoTernaryWithPolicyRoundingMode<m.vrclass, f.fprclass,
+ m.vrclass, m, Constraint,
+ Commutable=1>;
}
multiclass VPseudoTernaryW_VV<LMULInfo m> {
@@ -3083,6 +3315,12 @@ multiclass VPseudoTernaryW_VV<LMULInfo m> {
constraint>;
}
+multiclass VPseudoTernaryW_VV_RM<LMULInfo m> {
+ defvar constraint = "@earlyclobber $rd";
+ defm _VV : VPseudoTernaryWithPolicyRoundingMode<m.wvrclass, m.vrclass, m.vrclass, m,
+ constraint>;
+}
+
multiclass VPseudoTernaryW_VX<LMULInfo m> {
defvar constraint = "@earlyclobber $rd";
defm "_VX" : VPseudoTernaryWithPolicy<m.wvrclass, GPR, m.vrclass, m,
@@ -3095,8 +3333,30 @@ multiclass VPseudoTernaryW_VF<LMULInfo m, FPR_Info f> {
m.vrclass, m, constraint>;
}
+multiclass VPseudoTernaryW_VF_RM<LMULInfo m, FPR_Info f> {
+ defvar constraint = "@earlyclobber $rd";
+ defm "_V" # f.FX : VPseudoTernaryWithPolicyRoundingMode<m.wvrclass, f.fprclass,
+ m.vrclass, m, constraint>;
+}
+
+multiclass VPseudoVSLDVWithPolicy<VReg RetClass,
+ RegisterClass Op1Class,
+ DAGOperand Op2Class,
+ LMULInfo MInfo,
+ string Constraint = ""> {
+ let VLMul = MInfo.value in {
+ def "_" # MInfo.MX : VPseudoTernaryNoMaskWithPolicy<RetClass, Op1Class, Op2Class, Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMaskPolicy<RetClass, Op1Class, Op2Class, Constraint>,
+ RISCVMaskedPseudo<MaskIdx=3>;
+ }
+}
+
+multiclass VPseudoVSLDV_VX<LMULInfo m, string Constraint = ""> {
+ defm _VX : VPseudoVSLDVWithPolicy<m.vrclass, m.vrclass, GPR, m, Constraint>;
+}
+
multiclass VPseudoVSLDV_VI<Operand ImmType = simm5, LMULInfo m, string Constraint = ""> {
- defm _VI : VPseudoTernaryWithPolicy<m.vrclass, m.vrclass, ImmType, m, Constraint>;
+ defm _VI : VPseudoVSLDVWithPolicy<m.vrclass, m.vrclass, ImmType, m, Constraint>;
}
multiclass VPseudoVMAC_VV_VX_AAXA<string Constraint = ""> {
@@ -3139,6 +3399,29 @@ multiclass VPseudoVMAC_VV_VF_AAXA<string Constraint = ""> {
}
}
+multiclass VPseudoVMAC_VV_VF_AAXA_RM<string Constraint = ""> {
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ defvar WriteVFMulAddV_MX = !cast<SchedWrite>("WriteVFMulAddV_" # mx);
+ defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx);
+
+ defm "" : VPseudoTernaryV_VV_AAXA_RM<m, Constraint>,
+ Sched<[WriteVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVMask]>;
+ }
+
+ foreach f = FPList in {
+ foreach m = f.MxList in {
+ defvar mx = m.MX;
+ defvar WriteVFMulAddF_MX = !cast<SchedWrite>("WriteVFMulAddF_" # mx);
+ defvar ReadVFMulAddV_MX = !cast<SchedRead>("ReadVFMulAddV_" # mx);
+ defvar ReadVFMulAddF_MX = !cast<SchedRead>("ReadVFMulAddF_" # mx);
+
+ defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, Constraint>,
+ Sched<[WriteVFMulAddF_MX, ReadVFMulAddV_MX, ReadVFMulAddV_MX, ReadVFMulAddF_MX, ReadVMask]>;
+ }
+ }
+}
+
multiclass VPseudoVSLD_VX_VI<Operand ImmType = simm5, string Constraint = ""> {
foreach m = MxList in {
defvar mx = m.MX;
@@ -3185,13 +3468,13 @@ multiclass VPseudoVWMAC_VX {
}
}
-multiclass VPseudoVWMAC_VV_VF {
+multiclass VPseudoVWMAC_VV_VF_RM {
foreach m = MxListFW in {
defvar mx = m.MX;
defvar WriteVFWMulAddV_MX = !cast<SchedWrite>("WriteVFWMulAddV_" # mx);
defvar ReadVFWMulAddV_MX = !cast<SchedRead>("ReadVFWMulAddV_" # mx);
- defm "" : VPseudoTernaryW_VV<m>,
+ defm "" : VPseudoTernaryW_VV_RM<m>,
Sched<[WriteVFWMulAddV_MX, ReadVFWMulAddV_MX,
ReadVFWMulAddV_MX, ReadVFWMulAddV_MX, ReadVMask]>;
}
@@ -3203,7 +3486,7 @@ multiclass VPseudoVWMAC_VV_VF {
defvar ReadVFWMulAddV_MX = !cast<SchedRead>("ReadVFWMulAddV_" # mx);
defvar ReadVFWMulAddF_MX = !cast<SchedRead>("ReadVFWMulAddF_" # mx);
- defm "" : VPseudoTernaryW_VF<m, f>,
+ defm "" : VPseudoTernaryW_VF_RM<m, f>,
Sched<[WriteVFWMulAddF_MX, ReadVFWMulAddV_MX,
ReadVFWMulAddV_MX, ReadVFWMulAddF_MX, ReadVMask]>;
}
@@ -3297,36 +3580,90 @@ multiclass VPseudoVCMPM_VX_VI {
multiclass VPseudoVRED_VS {
foreach m = MxList in {
- defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
- Sched<[WriteVIRedV, ReadVIRedV, ReadVIRedV, ReadVIRedV, ReadVMask]>;
+ defvar mx = m.MX;
+ foreach e = SchedSEWSet<mx>.val in {
+ defvar WriteVIRedV_From_MX_E = !cast<SchedWrite>("WriteVIRedV_From_" # mx # "_E" # e);
+ defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>,
+ Sched<[WriteVIRedV_From_MX_E, ReadVIRedV, ReadVIRedV, ReadVIRedV,
+ ReadVMask]>;
+ }
}
}
-multiclass VPseudoVWRED_VS {
+multiclass VPseudoVREDMINMAX_VS {
foreach m = MxList in {
- defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
- Sched<[WriteVIWRedV, ReadVIWRedV, ReadVIWRedV, ReadVIWRedV, ReadVMask]>;
+ defvar mx = m.MX;
+ foreach e = SchedSEWSet<mx>.val in {
+ defvar WriteVIRedMinMaxV_From_MX_E = !cast<SchedWrite>("WriteVIRedMinMaxV_From_" # mx # "_E" # e);
+ defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>,
+ Sched<[WriteVIRedMinMaxV_From_MX_E, ReadVIRedV, ReadVIRedV,
+ ReadVIRedV, ReadVMask]>;
+ }
+ }
+}
+
+multiclass VPseudoVWRED_VS {
+ foreach m = MxListWRed in {
+ defvar mx = m.MX;
+ foreach e = SchedSEWSet<mx, isWidening=1>.val in {
+ defvar WriteVIWRedV_From_MX_E = !cast<SchedWrite>("WriteVIWRedV_From_" # mx # "_E" # e);
+ defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>,
+ Sched<[WriteVIWRedV_From_MX_E, ReadVIWRedV, ReadVIWRedV,
+ ReadVIWRedV, ReadVMask]>;
+ }
}
}
-multiclass VPseudoVFRED_VS {
+multiclass VPseudoVFRED_VS_RM {
foreach m = MxListF in {
- defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
- Sched<[WriteVFRedV, ReadVFRedV, ReadVFRedV, ReadVFRedV, ReadVMask]>;
+ defvar mx = m.MX;
+ foreach e = SchedSEWSet<mx, isF=1>.val in {
+ defvar WriteVFRedV_From_MX_E = !cast<SchedWrite>("WriteVFRedV_From_" # mx # "_E" # e);
+ defm _VS
+ : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass,
+ V_M1.vrclass, m, e>,
+ Sched<[WriteVFRedV_From_MX_E, ReadVFRedV, ReadVFRedV, ReadVFRedV,
+ ReadVMask]>;
+ }
}
}
-multiclass VPseudoVFREDO_VS {
+multiclass VPseudoVFREDMINMAX_VS {
foreach m = MxListF in {
- defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
- Sched<[WriteVFRedOV, ReadVFRedOV, ReadVFRedOV, ReadVFRedOV, ReadVMask]>;
+ defvar mx = m.MX;
+ foreach e = SchedSEWSet<mx, isF=1>.val in {
+ defvar WriteVFRedMinMaxV_From_MX_E = !cast<SchedWrite>("WriteVFRedMinMaxV_From_" # mx # "_E" # e);
+ defm _VS : VPseudoTernaryWithTailPolicy<V_M1.vrclass, m.vrclass, V_M1.vrclass, m, e>,
+ Sched<[WriteVFRedMinMaxV_From_MX_E, ReadVFRedV, ReadVFRedV, ReadVFRedV,
+ ReadVMask]>;
+ }
}
}
-multiclass VPseudoVFWRED_VS {
+multiclass VPseudoVFREDO_VS_RM {
foreach m = MxListF in {
- defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>,
- Sched<[WriteVFWRedV, ReadVFWRedV, ReadVFWRedV, ReadVFWRedV, ReadVMask]>;
+ defvar mx = m.MX;
+ foreach e = SchedSEWSet<mx, isF=1>.val in {
+ defvar WriteVFRedOV_From_MX_E = !cast<SchedWrite>("WriteVFRedOV_From_" # mx # "_E" # e);
+ defm _VS : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass,
+ V_M1.vrclass, m, e>,
+ Sched<[WriteVFRedOV_From_MX_E, ReadVFRedOV, ReadVFRedOV,
+ ReadVFRedOV, ReadVMask]>;
+ }
+ }
+}
+
+multiclass VPseudoVFWRED_VS_RM {
+ foreach m = MxListFWRed in {
+ defvar mx = m.MX;
+ foreach e = SchedSEWSet<mx, isF=1, isWidening=1>.val in {
+ defvar WriteVFWRedV_From_MX_E = !cast<SchedWrite>("WriteVFWRedV_From_" # mx # "_E" # e);
+ defm _VS
+ : VPseudoTernaryWithTailPolicyRoundingMode<V_M1.vrclass, m.vrclass,
+ V_M1.vrclass, m, e>,
+ Sched<[WriteVFWRedV_From_MX_E, ReadVFWRedV, ReadVFWRedV,
+ ReadVFWRedV, ReadVMask]>;
+ }
}
}
@@ -3336,20 +3673,35 @@ multiclass VPseudoConversion<VReg RetClass,
string Constraint = ""> {
let VLMul = MInfo.value in {
def "_" # MInfo.MX : VPseudoUnaryNoMask<RetClass, Op1Class, Constraint>;
- def "_" # MInfo.MX # "_TU": VPseudoUnaryNoMaskTU<RetClass, Op1Class, Constraint>;
- def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskTA<RetClass, Op1Class,
- Constraint>,
- RISCVMaskedPseudo</*MaskOpIdx*/ 2>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask<RetClass, Op1Class,
+ Constraint>,
+ RISCVMaskedPseudo<MaskIdx=2>;
+ }
+}
+
+multiclass VPseudoConversionRoundingMode<VReg RetClass,
+ VReg Op1Class,
+ LMULInfo MInfo,
+ string Constraint = ""> {
+ let VLMul = MInfo.value in {
+ def "_" # MInfo.MX : VPseudoUnaryNoMaskRoundingMode<RetClass, Op1Class, Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskRoundingMode<RetClass, Op1Class,
+ Constraint>,
+ RISCVMaskedPseudo<MaskIdx=2>;
}
}
+
multiclass VPseudoConversionRM<VReg RetClass,
VReg Op1Class,
LMULInfo MInfo,
string Constraint = ""> {
let VLMul = MInfo.value in {
- def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskTA_FRM<RetClass, Op1Class,
- Constraint>;
+ def "_" # MInfo.MX : VPseudoUnaryNoMask_FRM<RetClass, Op1Class,
+ Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask_FRM<RetClass, Op1Class,
+ Constraint>,
+ RISCVMaskedPseudo<MaskIdx=2>;
}
}
@@ -3358,7 +3710,7 @@ multiclass VPseudoConversionNoExcept<VReg RetClass,
LMULInfo MInfo,
string Constraint = ""> {
let VLMul = MInfo.value in {
- def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMaskTA_NoExcept<RetClass, Op1Class, Constraint>;
+ def "_" # MInfo.MX # "_MASK" : VPseudoUnaryMask_NoExcept<RetClass, Op1Class, Constraint>;
}
}
@@ -3373,6 +3725,17 @@ multiclass VPseudoVCVTI_V {
}
}
+multiclass VPseudoVCVTI_V_RM {
+ foreach m = MxListF in {
+ defvar mx = m.MX;
+ defvar WriteVFCvtFToIV_MX = !cast<SchedWrite>("WriteVFCvtFToIV_" # mx);
+ defvar ReadVFCvtFToIV_MX = !cast<SchedRead>("ReadVFCvtFToIV_" # mx);
+
+ defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m>,
+ Sched<[WriteVFCvtFToIV_MX, ReadVFCvtFToIV_MX, ReadVMask]>;
+ }
+}
+
multiclass VPseudoVCVTI_RM_V {
foreach m = MxListF in {
defvar mx = m.MX;
@@ -3395,13 +3758,13 @@ multiclass VPseudoVFROUND_NOEXCEPT_V {
}
}
-multiclass VPseudoVCVTF_V {
+multiclass VPseudoVCVTF_V_RM {
foreach m = MxListF in {
defvar mx = m.MX;
defvar WriteVFCvtIToFV_MX = !cast<SchedWrite>("WriteVFCvtIToFV_" # mx);
defvar ReadVFCvtIToFV_MX = !cast<SchedRead>("ReadVFCvtIToFV_" # mx);
- defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>,
+ defm _V : VPseudoConversionRoundingMode<m.vrclass, m.vrclass, m>,
Sched<[WriteVFCvtIToFV_MX, ReadVFCvtIToFV_MX, ReadVMask]>;
}
}
@@ -3417,12 +3780,6 @@ multiclass VPseudoVCVTF_RM_V {
}
}
-multiclass VPseudoConversionW_V {
- defvar constraint = "@earlyclobber $rd";
- foreach m = MxListW in
- defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>;
-}
-
multiclass VPseudoVWCVTI_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
@@ -3435,38 +3792,38 @@ multiclass VPseudoVWCVTI_V {
}
}
-multiclass VPseudoVWCVTI_RM_V {
+multiclass VPseudoVWCVTI_V_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
defvar mx = m.MX;
defvar WriteVFWCvtFToIV_MX = !cast<SchedWrite>("WriteVFWCvtFToIV_" # mx);
defvar ReadVFWCvtFToIV_MX = !cast<SchedRead>("ReadVFWCvtFToIV_" # mx);
- defm _V : VPseudoConversionRM<m.wvrclass, m.vrclass, m, constraint>,
+ defm _V : VPseudoConversionRoundingMode<m.wvrclass, m.vrclass, m, constraint>,
Sched<[WriteVFWCvtFToIV_MX, ReadVFWCvtFToIV_MX, ReadVMask]>;
}
}
-multiclass VPseudoVWCVTF_V {
+multiclass VPseudoVWCVTI_RM_V {
defvar constraint = "@earlyclobber $rd";
- foreach m = MxListW in {
+ foreach m = MxListFW in {
defvar mx = m.MX;
- defvar WriteVFWCvtIToFV_MX = !cast<SchedWrite>("WriteVFWCvtIToFV_" # mx);
- defvar ReadVFWCvtIToFV_MX = !cast<SchedRead>("ReadVFWCvtIToFV_" # mx);
+ defvar WriteVFWCvtFToIV_MX = !cast<SchedWrite>("WriteVFWCvtFToIV_" # mx);
+ defvar ReadVFWCvtFToIV_MX = !cast<SchedRead>("ReadVFWCvtFToIV_" # mx);
- defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
- Sched<[WriteVFWCvtIToFV_MX, ReadVFWCvtIToFV_MX, ReadVMask]>;
+ defm _V : VPseudoConversionRM<m.wvrclass, m.vrclass, m, constraint>,
+ Sched<[WriteVFWCvtFToIV_MX, ReadVFWCvtFToIV_MX, ReadVMask]>;
}
}
-multiclass VPseudoVWCVTF_RM_V {
+multiclass VPseudoVWCVTF_V {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
defvar mx = m.MX;
defvar WriteVFWCvtIToFV_MX = !cast<SchedWrite>("WriteVFWCvtIToFV_" # mx);
defvar ReadVFWCvtIToFV_MX = !cast<SchedRead>("ReadVFWCvtIToFV_" # mx);
- defm _V : VPseudoConversionRM<m.wvrclass, m.vrclass, m, constraint>,
+ defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>,
Sched<[WriteVFWCvtIToFV_MX, ReadVFWCvtIToFV_MX, ReadVMask]>;
}
}
@@ -3495,6 +3852,18 @@ multiclass VPseudoVNCVTI_W {
}
}
+multiclass VPseudoVNCVTI_W_RM {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListW in {
+ defvar mx = m.MX;
+ defvar WriteVFNCvtFToIV_MX = !cast<SchedWrite>("WriteVFNCvtFToIV_" # mx);
+ defvar ReadVFNCvtFToIV_MX = !cast<SchedRead>("ReadVFNCvtFToIV_" # mx);
+
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint>,
+ Sched<[WriteVFNCvtFToIV_MX, ReadVFNCvtFToIV_MX, ReadVMask]>;
+ }
+}
+
multiclass VPseudoVNCVTI_RM_W {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListW in {
@@ -3507,14 +3876,14 @@ multiclass VPseudoVNCVTI_RM_W {
}
}
-multiclass VPseudoVNCVTF_W {
+multiclass VPseudoVNCVTF_W_RM {
defvar constraint = "@earlyclobber $rd";
foreach m = MxListFW in {
defvar mx = m.MX;
defvar WriteVFNCvtIToFV_MX = !cast<SchedWrite>("WriteVFNCvtIToFV_" # mx);
defvar ReadVFNCvtIToFV_MX = !cast<SchedRead>("ReadVFNCvtIToFV_" # mx);
- defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>,
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint>,
Sched<[WriteVFNCvtIToFV_MX, ReadVFNCvtIToFV_MX, ReadVMask]>;
}
}
@@ -3543,17 +3912,27 @@ multiclass VPseudoVNCVTD_W {
}
}
+multiclass VPseudoVNCVTD_W_RM {
+ defvar constraint = "@earlyclobber $rd";
+ foreach m = MxListFW in {
+ defvar mx = m.MX;
+ defvar WriteVFNCvtFToFV_MX = !cast<SchedWrite>("WriteVFNCvtFToFV_" # mx);
+ defvar ReadVFNCvtFToFV_MX = !cast<SchedRead>("ReadVFNCvtFToFV_" # mx);
+
+ defm _W : VPseudoConversionRoundingMode<m.vrclass, m.wvrclass, m, constraint>,
+ Sched<[WriteVFNCvtFToFV_MX, ReadVFNCvtFToFV_MX, ReadVMask]>;
+ }
+}
+
multiclass VPseudoUSSegLoad {
foreach eew = EEWList in {
foreach lmul = MxSet<eew>.m in {
defvar LInfo = lmul.MX;
- let VLMul = lmul.value in {
+ let VLMul = lmul.value, SEW=eew in {
foreach nf = NFSet<lmul>.L in {
defvar vreg = SegRegClass<lmul, nf>.RC;
def nf # "E" # eew # "_V_" # LInfo :
VPseudoUSSegLoadNoMask<vreg, eew, nf>, VLSEGSched<nf, eew, LInfo>;
- def nf # "E" # eew # "_V_" # LInfo # "_TU" :
- VPseudoUSSegLoadNoMaskTU<vreg, eew, nf>, VLSEGSched<nf, eew, LInfo>;
def nf # "E" # eew # "_V_" # LInfo # "_MASK" :
VPseudoUSSegLoadMask<vreg, eew, nf>, VLSEGSched<nf, eew, LInfo>;
}
@@ -3566,13 +3945,11 @@ multiclass VPseudoUSSegLoadFF {
foreach eew = EEWList in {
foreach lmul = MxSet<eew>.m in {
defvar LInfo = lmul.MX;
- let VLMul = lmul.value in {
+ let VLMul = lmul.value, SEW=eew in {
foreach nf = NFSet<lmul>.L in {
defvar vreg = SegRegClass<lmul, nf>.RC;
def nf # "E" # eew # "FF_V_" # LInfo :
VPseudoUSSegLoadFFNoMask<vreg, eew, nf>, VLSEGFFSched<nf, eew, LInfo>;
- def nf # "E" # eew # "FF_V_" # LInfo # "_TU" :
- VPseudoUSSegLoadFFNoMaskTU<vreg, eew, nf>, VLSEGFFSched<nf, eew, LInfo>;
def nf # "E" # eew # "FF_V_" # LInfo # "_MASK" :
VPseudoUSSegLoadFFMask<vreg, eew, nf>, VLSEGFFSched<nf, eew, LInfo>;
}
@@ -3585,13 +3962,11 @@ multiclass VPseudoSSegLoad {
foreach eew = EEWList in {
foreach lmul = MxSet<eew>.m in {
defvar LInfo = lmul.MX;
- let VLMul = lmul.value in {
+ let VLMul = lmul.value, SEW=eew in {
foreach nf = NFSet<lmul>.L in {
defvar vreg = SegRegClass<lmul, nf>.RC;
def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegLoadNoMask<vreg, eew, nf>,
VLSSEGSched<nf, eew, LInfo>;
- def nf # "E" # eew # "_V_" # LInfo # "_TU" : VPseudoSSegLoadNoMaskTU<vreg, eew, nf>,
- VLSSEGSched<nf, eew, LInfo>;
def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegLoadMask<vreg, eew, nf>,
VLSSEGSched<nf, eew, LInfo>;
}
@@ -3601,34 +3976,30 @@ multiclass VPseudoSSegLoad {
}
multiclass VPseudoISegLoad<bit Ordered> {
- foreach idx_eew = EEWList in {
- foreach sew = EEWList in {
- foreach val_lmul = MxSet<sew>.m in {
- defvar octuple_lmul = val_lmul.octuple;
+ foreach idxEEW = EEWList in {
+ foreach dataEEW = EEWList in {
+ foreach dataEMUL = MxSet<dataEEW>.m in {
+ defvar dataEMULOctuple = dataEMUL.octuple;
// Calculate emul = eew * lmul / sew
- defvar octuple_emul = !srl(!mul(idx_eew, octuple_lmul), log2<sew>.val);
- if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
- defvar ValLInfo = val_lmul.MX;
- defvar IdxLInfo = octuple_to_str<octuple_emul>.ret;
- defvar idx_lmul = !cast<LMULInfo>("V_" # IdxLInfo);
- defvar Vreg = val_lmul.vrclass;
- defvar IdxVreg = idx_lmul.vrclass;
+ defvar idxEMULOctuple = !srl(!mul(idxEEW, dataEMULOctuple), !logtwo(dataEEW));
+ if !and(!ge(idxEMULOctuple, 1), !le(idxEMULOctuple, 64)) then {
+ defvar DataLInfo = dataEMUL.MX;
+ defvar IdxLInfo = octuple_to_str<idxEMULOctuple>.ret;
+ defvar idxEMUL = !cast<LMULInfo>("V_" # IdxLInfo);
+ defvar DataVreg = dataEMUL.vrclass;
+ defvar IdxVreg = idxEMUL.vrclass;
defvar Order = !if(Ordered, "O", "U");
- let VLMul = val_lmul.value in {
- foreach nf = NFSet<val_lmul>.L in {
- defvar ValVreg = SegRegClass<val_lmul, nf>.RC;
- def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo :
- VPseudoISegLoadNoMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value,
+ let VLMul = dataEMUL.value in {
+ foreach nf = NFSet<dataEMUL>.L in {
+ defvar Vreg = SegRegClass<dataEMUL, nf>.RC;
+ def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo :
+ VPseudoISegLoadNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
- VLXSEGSched<nf, idx_eew, Order, ValLInfo>;
- def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_TU" :
- VPseudoISegLoadNoMaskTU<ValVreg, IdxVreg, idx_eew, idx_lmul.value,
- nf, Ordered>,
- VLXSEGSched<nf, idx_eew, Order, ValLInfo>;
- def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_MASK" :
- VPseudoISegLoadMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value,
+ VLXSEGSched<nf, dataEEW, Order, DataLInfo>;
+ def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
+ VPseudoISegLoadMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
- VLXSEGSched<nf, idx_eew, Order, ValLInfo>;
+ VLXSEGSched<nf, dataEEW, Order, DataLInfo>;
}
}
}
@@ -3641,7 +4012,7 @@ multiclass VPseudoUSSegStore {
foreach eew = EEWList in {
foreach lmul = MxSet<eew>.m in {
defvar LInfo = lmul.MX;
- let VLMul = lmul.value in {
+ let VLMul = lmul.value, SEW=eew in {
foreach nf = NFSet<lmul>.L in {
defvar vreg = SegRegClass<lmul, nf>.RC;
def nf # "E" # eew # "_V_" # LInfo : VPseudoUSSegStoreNoMask<vreg, eew, nf>,
@@ -3658,7 +4029,7 @@ multiclass VPseudoSSegStore {
foreach eew = EEWList in {
foreach lmul = MxSet<eew>.m in {
defvar LInfo = lmul.MX;
- let VLMul = lmul.value in {
+ let VLMul = lmul.value, SEW=eew in {
foreach nf = NFSet<lmul>.L in {
defvar vreg = SegRegClass<lmul, nf>.RC;
def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegStoreNoMask<vreg, eew, nf>,
@@ -3672,30 +4043,30 @@ multiclass VPseudoSSegStore {
}
multiclass VPseudoISegStore<bit Ordered> {
- foreach idx_eew = EEWList in {
- foreach sew = EEWList in {
- foreach val_lmul = MxSet<sew>.m in {
- defvar octuple_lmul = val_lmul.octuple;
+ foreach idxEEW = EEWList in {
+ foreach dataEEW = EEWList in {
+ foreach dataEMUL = MxSet<dataEEW>.m in {
+ defvar dataEMULOctuple = dataEMUL.octuple;
// Calculate emul = eew * lmul / sew
- defvar octuple_emul = !srl(!mul(idx_eew, octuple_lmul), log2<sew>.val);
- if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
- defvar ValLInfo = val_lmul.MX;
- defvar IdxLInfo = octuple_to_str<octuple_emul>.ret;
- defvar idx_lmul = !cast<LMULInfo>("V_" # IdxLInfo);
- defvar Vreg = val_lmul.vrclass;
- defvar IdxVreg = idx_lmul.vrclass;
+ defvar idxEMULOctuple = !srl(!mul(idxEEW, dataEMULOctuple), !logtwo(dataEEW));
+ if !and(!ge(idxEMULOctuple, 1), !le(idxEMULOctuple, 64)) then {
+ defvar DataLInfo = dataEMUL.MX;
+ defvar IdxLInfo = octuple_to_str<idxEMULOctuple>.ret;
+ defvar idxEMUL = !cast<LMULInfo>("V_" # IdxLInfo);
+ defvar DataVreg = dataEMUL.vrclass;
+ defvar IdxVreg = idxEMUL.vrclass;
defvar Order = !if(Ordered, "O", "U");
- let VLMul = val_lmul.value in {
- foreach nf = NFSet<val_lmul>.L in {
- defvar ValVreg = SegRegClass<val_lmul, nf>.RC;
- def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo :
- VPseudoISegStoreNoMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value,
+ let VLMul = dataEMUL.value in {
+ foreach nf = NFSet<dataEMUL>.L in {
+ defvar Vreg = SegRegClass<dataEMUL, nf>.RC;
+ def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo :
+ VPseudoISegStoreNoMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
- VSXSEGSched<nf, idx_eew, Order, ValLInfo>;
- def nf # "EI" # idx_eew # "_V_" # IdxLInfo # "_" # ValLInfo # "_MASK" :
- VPseudoISegStoreMask<ValVreg, IdxVreg, idx_eew, idx_lmul.value,
+ VSXSEGSched<nf, idxEEW, Order, DataLInfo>;
+ def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" :
+ VPseudoISegStoreMask<Vreg, IdxVreg, idxEEW, idxEMUL.value,
nf, Ordered>,
- VSXSEGSched<nf, idx_eew, Order, ValLInfo>;
+ VSXSEGSched<nf, idxEEW, Order, DataLInfo>;
}
}
}
@@ -3713,34 +4084,47 @@ class VPatUnaryNoMask<string intrinsic_name,
string kind,
ValueType result_type,
ValueType op2_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
- VReg op2_reg_class> :
+ VReg result_reg_class,
+ VReg op2_reg_class,
+ bit isSEWAware = 0> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
- (result_type undef),
+ (result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
VLOpFrag)),
- (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew),
+ inst#"_"#kind#"_"#vlmul.MX))
+ (result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
- GPR:$vl, sew)>;
+ GPR:$vl, log2sew, TU_MU)>;
-class VPatUnaryNoMaskTU<string intrinsic_name,
- string inst,
- string kind,
- ValueType result_type,
- ValueType op2_type,
- int sew,
- LMULInfo vlmul,
- VReg result_reg_class,
- VReg op2_reg_class> :
+class VPatUnaryNoMaskRoundingMode<string intrinsic_name,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op2_type,
+ int log2sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op2_reg_class,
+ bit isSEWAware = 0> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
+ (XLenVT timm:$round),
VLOpFrag)),
- (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_TU")
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew),
+ inst#"_"#kind#"_"#vlmul.MX))
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
- GPR:$vl, sew)>;
+ (XLenVT timm:$round),
+ GPR:$vl, log2sew, TU_MU)>;
+
class VPatUnaryMask<string intrinsic_name,
string inst,
@@ -3748,39 +4132,51 @@ class VPatUnaryMask<string intrinsic_name,
ValueType result_type,
ValueType op2_type,
ValueType mask_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
- VReg op2_reg_class> :
+ VReg op2_reg_class,
+ bit isSEWAware = 0> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
(mask_type V0),
- VLOpFrag)),
- (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_MASK",
+ inst#"_"#kind#"_"#vlmul.MX#"_MASK"))
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
- (mask_type V0), GPR:$vl, sew)>;
-
-class VPatUnaryMaskTA<string intrinsic_name,
- string inst,
- string kind,
- ValueType result_type,
- ValueType op2_type,
- ValueType mask_type,
- int sew,
- LMULInfo vlmul,
- VReg result_reg_class,
- VReg op2_reg_class> :
+ (mask_type V0), GPR:$vl, log2sew, (XLenVT timm:$policy))>;
+
+class VPatUnaryMaskRoundingMode<string intrinsic_name,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int log2sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op2_reg_class,
+ bit isSEWAware = 0> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
(mask_type V0),
+ (XLenVT timm:$round),
VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_MASK")
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_MASK",
+ inst#"_"#kind#"_"#vlmul.MX#"_MASK"))
(result_type result_reg_class:$merge),
(op2_type op2_reg_class:$rs2),
- (mask_type V0), GPR:$vl, sew, (XLenVT timm:$policy))>;
+ (mask_type V0),
+ (XLenVT timm:$round),
+ GPR:$vl, log2sew, (XLenVT timm:$policy))>;
+
class VPatMaskUnaryNoMask<string intrinsic_name,
string inst,
@@ -3789,8 +4185,9 @@ class VPatMaskUnaryNoMask<string intrinsic_name,
(mti.Mask VR:$rs2),
VLOpFrag)),
(!cast<Instruction>(inst#"_M_"#mti.BX)
+ (mti.Mask (IMPLICIT_DEF)),
(mti.Mask VR:$rs2),
- GPR:$vl, mti.Log2SEW)>;
+ GPR:$vl, mti.Log2SEW, TU_MU)>;
class VPatMaskUnaryMask<string intrinsic_name,
string inst,
@@ -3803,7 +4200,7 @@ class VPatMaskUnaryMask<string intrinsic_name,
(!cast<Instruction>(inst#"_M_"#mti.BX#"_MASK")
(mti.Mask VR:$merge),
(mti.Mask VR:$rs2),
- (mti.Mask V0), GPR:$vl, mti.Log2SEW)>;
+ (mti.Mask V0), GPR:$vl, mti.Log2SEW, TU_MU)>;
class VPatUnaryAnyMask<string intrinsic,
string inst,
@@ -3811,7 +4208,7 @@ class VPatUnaryAnyMask<string intrinsic,
ValueType result_type,
ValueType op1_type,
ValueType mask_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
VReg result_reg_class,
VReg op1_reg_class> :
@@ -3820,11 +4217,11 @@ class VPatUnaryAnyMask<string intrinsic,
(op1_type op1_reg_class:$rs1),
(mask_type VR:$rs2),
VLOpFrag)),
- (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew))
(result_type result_reg_class:$merge),
(op1_type op1_reg_class:$rs1),
(mask_type VR:$rs2),
- GPR:$vl, sew)>;
+ GPR:$vl, log2sew)>;
class VPatBinaryM<string intrinsic_name,
string inst,
@@ -3843,43 +4240,69 @@ class VPatBinaryM<string intrinsic_name,
(op2_type op2_kind:$rs2),
GPR:$vl, sew)>;
-class VPatBinaryNoMaskTA<string intrinsic_name,
+class VPatBinaryNoMaskTU<string intrinsic_name,
string inst,
ValueType result_type,
ValueType op1_type,
ValueType op2_type,
int sew,
+ VReg result_reg_class,
VReg op1_reg_class,
DAGOperand op2_kind> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ VLOpFrag)),
+ (!cast<Instruction>(inst)
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ GPR:$vl, sew, TU_MU)>;
+
+class VPatBinaryNoMaskRoundingMode<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ int sew,
+ VReg op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
(result_type (undef)),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
+ (XLenVT timm:$round),
VLOpFrag)),
(!cast<Instruction>(inst)
+ (result_type (IMPLICIT_DEF)),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
- GPR:$vl, sew)>;
+ (XLenVT timm:$round),
+ GPR:$vl, sew, TA_MA)>;
-class VPatBinaryNoMaskTU<string intrinsic_name,
- string inst,
- ValueType result_type,
- ValueType op1_type,
- ValueType op2_type,
- int sew,
- VReg result_reg_class,
- VReg op1_reg_class,
- DAGOperand op2_kind> :
+class VPatBinaryNoMaskTURoundingMode<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ int sew,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ DAGOperand op2_kind> :
Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
(result_type result_reg_class:$merge),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
+ (XLenVT timm:$round),
VLOpFrag)),
- (!cast<Instruction>(inst#"_TU")
+ (!cast<Instruction>(inst)
(result_type result_reg_class:$merge),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
- GPR:$vl, sew)>;
+ (XLenVT timm:$round),
+ GPR:$vl, sew, TU_MU)>;
+
// Same as above but source operands are swapped.
class VPatBinaryNoMaskSwapped<string intrinsic_name,
@@ -3943,6 +4366,31 @@ class VPatBinaryMaskTA<string intrinsic_name,
(op2_type op2_kind:$rs2),
(mask_type V0), GPR:$vl, sew, (XLenVT timm:$policy))>;
+class VPatBinaryMaskTARoundingMode<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ (XLenVT timm:$round),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(inst#"_MASK")
+ (result_type result_reg_class:$merge),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ (XLenVT timm:$round),
+ GPR:$vl, sew, (XLenVT timm:$policy))>;
+
// Same as above but source operands are swapped.
class VPatBinaryMaskSwapped<string intrinsic_name,
string inst,
@@ -3983,6 +4431,25 @@ class VPatTiedBinaryNoMask<string intrinsic_name,
(op2_type op2_kind:$rs2),
GPR:$vl, sew, TAIL_AGNOSTIC)>;
+class VPatTiedBinaryNoMaskRoundingMode<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op2_type,
+ int sew,
+ VReg result_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type (undef)),
+ (result_type result_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (XLenVT timm:$round),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_TIED")
+ (result_type result_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (XLenVT timm:$round),
+ GPR:$vl, sew, TAIL_AGNOSTIC)>;
+
class VPatTiedBinaryNoMaskTU<string intrinsic_name,
string inst,
ValueType result_type,
@@ -3998,7 +4465,26 @@ class VPatTiedBinaryNoMaskTU<string intrinsic_name,
(!cast<Instruction>(inst#"_TIED")
(result_type result_reg_class:$merge),
(op2_type op2_kind:$rs2),
- GPR:$vl, sew, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
+ GPR:$vl, sew, TU_MU)>;
+
+class VPatTiedBinaryNoMaskTURoundingMode<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op2_type,
+ int sew,
+ VReg result_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type result_reg_class:$merge),
+ (result_type result_reg_class:$merge),
+ (op2_type op2_kind:$rs2),
+ (XLenVT timm:$round),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_TIED")
+ (result_type result_reg_class:$merge),
+ (op2_type op2_kind:$rs2),
+ (XLenVT timm:$round),
+ GPR:$vl, sew, TU_MU)>;
class VPatTiedBinaryMask<string intrinsic_name,
string inst,
@@ -4019,6 +4505,28 @@ class VPatTiedBinaryMask<string intrinsic_name,
(op2_type op2_kind:$rs2),
(mask_type V0), GPR:$vl, sew, (XLenVT timm:$policy))>;
+class VPatTiedBinaryMaskRoundingMode<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ VReg result_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name#"_mask")
+ (result_type result_reg_class:$merge),
+ (result_type result_reg_class:$merge),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ (XLenVT timm:$round),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(inst#"_MASK_TIED")
+ (result_type result_reg_class:$merge),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ (XLenVT timm:$round),
+ GPR:$vl, sew, (XLenVT timm:$policy))>;
+
class VPatTernaryNoMask<string intrinsic,
string inst,
string kind,
@@ -4041,6 +4549,52 @@ class VPatTernaryNoMask<string intrinsic,
op2_kind:$rs2,
GPR:$vl, sew)>;
+class VPatTernaryNoMaskTA<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ int log2sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic)
+ (result_type result_reg_class:$rs3),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew))
+ result_reg_class:$rs3,
+ (op1_type op1_reg_class:$rs1),
+ op2_kind:$rs2,
+ GPR:$vl, log2sew, TAIL_AGNOSTIC)>;
+
+class VPatTernaryNoMaskTARoundingMode<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ int log2sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic)
+ (result_type result_reg_class:$rs3),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (XLenVT timm:$round),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew))
+ result_reg_class:$rs3,
+ (op1_type op1_reg_class:$rs1),
+ op2_kind:$rs2,
+ (XLenVT timm:$round),
+ GPR:$vl, log2sew, TAIL_AGNOSTIC)>;
+
class VPatTernaryNoMaskWithPolicy<string intrinsic,
string inst,
string kind,
@@ -4063,6 +4617,30 @@ class VPatTernaryNoMaskWithPolicy<string intrinsic,
op2_kind:$rs2,
GPR:$vl, sew, (XLenVT timm:$policy))>;
+class VPatTernaryNoMaskWithPolicyRoundingMode<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic)
+ (result_type result_reg_class:$rs3),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (XLenVT timm:$round),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
+ result_reg_class:$rs3,
+ (op1_type op1_reg_class:$rs1),
+ op2_kind:$rs2,
+ (XLenVT timm:$round),
+ GPR:$vl, sew, (XLenVT timm:$policy))>;
+
class VPatTernaryMask<string intrinsic,
string inst,
string kind,
@@ -4113,9 +4691,87 @@ class VPatTernaryMaskPolicy<string intrinsic,
(mask_type V0),
GPR:$vl, sew, (XLenVT timm:$policy))>;
+class VPatTernaryMaskPolicyRoundingMode<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic#"_mask")
+ (result_type result_reg_class:$rs3),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ (XLenVT timm:$round),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX # "_MASK")
+ result_reg_class:$rs3,
+ (op1_type op1_reg_class:$rs1),
+ op2_kind:$rs2,
+ (mask_type V0),
+ (XLenVT timm:$round),
+ GPR:$vl, sew, (XLenVT timm:$policy))>;
+
+class VPatTernaryMaskTA<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int log2sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic#"_mask")
+ (result_type result_reg_class:$rs3),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)# "_MASK")
+ result_reg_class:$rs3,
+ (op1_type op1_reg_class:$rs1),
+ op2_kind:$rs2,
+ (mask_type V0),
+ GPR:$vl, log2sew, TAIL_AGNOSTIC)>;
+
+class VPatTernaryMaskTARoundingMode<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int log2sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic#"_mask")
+ (result_type result_reg_class:$rs3),
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ (mask_type V0),
+ (XLenVT timm:$round),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_E"#!shl(1, log2sew)# "_MASK")
+ result_reg_class:$rs3,
+ (op1_type op1_reg_class:$rs1),
+ op2_kind:$rs2,
+ (mask_type V0),
+ (XLenVT timm:$round),
+ GPR:$vl, log2sew, TAIL_AGNOSTIC)>;
+
multiclass VPatUnaryS_M<string intrinsic_name,
- string inst>
-{
+ string inst> {
foreach mti = AllMasks in {
def : Pat<(XLenVT (!cast<Intrinsic>(intrinsic_name)
(mti.Mask VR:$rs1), VLOpFrag)),
@@ -4131,87 +4787,92 @@ multiclass VPatUnaryS_M<string intrinsic_name,
multiclass VPatUnaryV_V_AnyMask<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in
def : VPatUnaryAnyMask<intrinsic, instruction, "VM",
vti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass,
- vti.RegClass>;
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
}
}
multiclass VPatUnaryM_M<string intrinsic,
- string inst>
-{
+ string inst> {
foreach mti = AllMasks in {
def : VPatMaskUnaryNoMask<intrinsic, inst, mti>;
def : VPatMaskUnaryMask<intrinsic, inst, mti>;
}
}
-multiclass VPatUnaryV_M<string intrinsic, string instruction>
-{
+multiclass VPatUnaryV_M<string intrinsic, string instruction> {
foreach vti = AllIntegerVectors in {
- def : VPatUnaryNoMask<intrinsic, instruction, "M", vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, VR>;
- def : VPatUnaryNoMaskTU<intrinsic, instruction, "M", vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass,VR>;
- def : VPatUnaryMaskTA<intrinsic, instruction, "M", vti.Vector, vti.Mask,
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : VPatUnaryNoMask<intrinsic, instruction, "M", vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, VR>;
+ def : VPatUnaryMask<intrinsic, instruction, "M", vti.Vector, vti.Mask,
vti.Mask, vti.Log2SEW, vti.LMul, vti.RegClass, VR>;
+ }
}
}
multiclass VPatUnaryV_VF<string intrinsic, string instruction, string suffix,
- list<VTypeInfoToFraction> fractionList>
-{
- foreach vtiTofti = fractionList in
- {
+ list<VTypeInfoToFraction> fractionList> {
+ foreach vtiTofti = fractionList in {
defvar vti = vtiTofti.Vti;
defvar fti = vtiTofti.Fti;
- def : VPatUnaryNoMask<intrinsic, instruction, suffix,
- vti.Vector, fti.Vector,
- vti.Log2SEW, vti.LMul, fti.RegClass>;
- def : VPatUnaryNoMaskTU<intrinsic, instruction, suffix,
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fti>.Predicates) in {
+ def : VPatUnaryNoMask<intrinsic, instruction, suffix,
vti.Vector, fti.Vector,
vti.Log2SEW, vti.LMul, vti.RegClass, fti.RegClass>;
- def : VPatUnaryMaskTA<intrinsic, instruction, suffix,
+ def : VPatUnaryMask<intrinsic, instruction, suffix,
vti.Vector, fti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass, fti.RegClass>;
- }
+ }
+ }
}
multiclass VPatUnaryV_V<string intrinsic, string instruction,
- list<VTypeInfo> vtilist> {
+ list<VTypeInfo> vtilist, bit isSEWAware = 0> {
foreach vti = vtilist in {
- def : VPatUnaryNoMask<intrinsic, instruction, "V",
- vti.Vector, vti.Vector,
- vti.Log2SEW, vti.LMul, vti.RegClass>;
- def : VPatUnaryNoMaskTU<intrinsic, instruction, "V",
- vti.Vector, vti.Vector,
- vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
- def : VPatUnaryMaskTA<intrinsic, instruction, "V",
- vti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : VPatUnaryNoMask<intrinsic, instruction, "V",
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.RegClass, vti.RegClass, isSEWAware>;
+ def : VPatUnaryMask<intrinsic, instruction, "V",
+ vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, vti.RegClass, isSEWAware>;
+ }
+ }
+}
+
+multiclass VPatUnaryV_V_RM<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, bit isSEWAware = 0> {
+ foreach vti = vtilist in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : VPatUnaryNoMaskRoundingMode<intrinsic, instruction, "V",
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.RegClass, vti.RegClass, isSEWAware>;
+ def : VPatUnaryMaskRoundingMode<intrinsic, instruction, "V",
+ vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, vti.RegClass, isSEWAware>;
+ }
}
}
-multiclass VPatNullaryV<string intrinsic, string instruction>
-{
+multiclass VPatNullaryV<string intrinsic, string instruction> {
foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic)
- (vti.Vector undef),
- VLOpFrag)),
- (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX)
- GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic)
- (vti.Vector vti.RegClass:$merge),
- VLOpFrag)),
- (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX # "_TU")
- vti.RegClass:$merge, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic # "_mask")
- (vti.Vector vti.RegClass:$merge),
- (vti.Mask V0), VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX # "_MASK")
- vti.RegClass:$merge, (vti.Mask V0),
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic)
+ (vti.Vector vti.RegClass:$merge),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX)
+ vti.RegClass:$merge, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ def : Pat<(vti.Vector (!cast<Intrinsic>(intrinsic # "_mask")
+ (vti.Vector vti.RegClass:$merge),
+ (vti.Mask V0), VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction#"_V_" # vti.LMul.MX # "_MASK")
+ vti.RegClass:$merge, (vti.Mask V0),
+ GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+ }
}
}
@@ -4232,8 +4893,7 @@ multiclass VPatBinaryM<string intrinsic,
int sew,
VReg result_reg_class,
VReg op1_reg_class,
- DAGOperand op2_kind>
-{
+ DAGOperand op2_kind> {
def : VPatBinaryM<intrinsic, inst, result_type, op1_type, op2_type,
sew, op1_reg_class, op2_kind>;
def : VPatBinaryMask<intrinsic, inst, result_type, op1_type, op2_type,
@@ -4241,19 +4901,16 @@ multiclass VPatBinaryM<string intrinsic,
op2_kind>;
}
-multiclass VPatBinaryTA<string intrinsic,
- string inst,
- ValueType result_type,
- ValueType op1_type,
- ValueType op2_type,
- ValueType mask_type,
- int sew,
- VReg result_reg_class,
- VReg op1_reg_class,
- DAGOperand op2_kind>
-{
- def : VPatBinaryNoMaskTA<intrinsic, inst, result_type, op1_type, op2_type,
- sew, op1_reg_class, op2_kind>;
+multiclass VPatBinary<string intrinsic,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ DAGOperand op2_kind> {
def : VPatBinaryNoMaskTU<intrinsic, inst, result_type, op1_type, op2_type,
sew, result_reg_class, op1_reg_class, op2_kind>;
def : VPatBinaryMaskTA<intrinsic, inst, result_type, op1_type, op2_type,
@@ -4261,6 +4918,25 @@ multiclass VPatBinaryTA<string intrinsic,
op2_kind>;
}
+multiclass VPatBinaryRoundingMode<string intrinsic,
+ string inst,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ DAGOperand op2_kind> {
+ def : VPatBinaryNoMaskRoundingMode<intrinsic, inst, result_type, op1_type, op2_type,
+ sew, op1_reg_class, op2_kind>;
+ def : VPatBinaryNoMaskTURoundingMode<intrinsic, inst, result_type, op1_type, op2_type,
+ sew, result_reg_class, op1_reg_class, op2_kind>;
+ def : VPatBinaryMaskTARoundingMode<intrinsic, inst, result_type, op1_type, op2_type,
+ mask_type, sew, result_reg_class, op1_reg_class,
+ op2_kind>;
+}
+
multiclass VPatBinarySwapped<string intrinsic,
string inst,
ValueType result_type,
@@ -4270,8 +4946,7 @@ multiclass VPatBinarySwapped<string intrinsic,
int sew,
VReg result_reg_class,
VReg op1_reg_class,
- DAGOperand op2_kind>
-{
+ DAGOperand op2_kind> {
def : VPatBinaryNoMaskSwapped<intrinsic, inst, result_type, op1_type, op2_type,
sew, op1_reg_class, op2_kind>;
def : VPatBinaryMaskSwapped<intrinsic, inst, result_type, op1_type, op2_type,
@@ -4290,25 +4965,14 @@ multiclass VPatBinaryCarryInTAIL<string intrinsic,
LMULInfo vlmul,
VReg result_reg_class,
VReg op1_reg_class,
- DAGOperand op2_kind>
-{
- def : Pat<(result_type (!cast<Intrinsic>(intrinsic)
- (result_type undef),
- (op1_type op1_reg_class:$rs1),
- (op2_type op2_kind:$rs2),
- (mask_type V0),
- VLOpFrag)),
- (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
- (op1_type op1_reg_class:$rs1),
- (op2_type op2_kind:$rs2),
- (mask_type V0), GPR:$vl, sew)>;
+ DAGOperand op2_kind> {
def : Pat<(result_type (!cast<Intrinsic>(intrinsic)
(result_type result_reg_class:$merge),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
(mask_type V0),
VLOpFrag)),
- (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX#"_TU")
+ (!cast<Instruction>(inst#"_"#kind#"_"#vlmul.MX)
(result_type result_reg_class:$merge),
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
@@ -4325,8 +4989,7 @@ multiclass VPatBinaryCarryIn<string intrinsic,
int sew,
LMULInfo vlmul,
VReg op1_reg_class,
- DAGOperand op2_kind>
-{
+ DAGOperand op2_kind> {
def : Pat<(result_type (!cast<Intrinsic>(intrinsic)
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
@@ -4347,8 +5010,7 @@ multiclass VPatBinaryMaskOut<string intrinsic,
int sew,
LMULInfo vlmul,
VReg op1_reg_class,
- DAGOperand op2_kind>
-{
+ DAGOperand op2_kind> {
def : Pat<(result_type (!cast<Intrinsic>(intrinsic)
(op1_type op1_reg_class:$rs1),
(op2_type op2_kind:$rs2),
@@ -4368,33 +5030,65 @@ multiclass VPatConversionTA<string intrinsic,
int sew,
LMULInfo vlmul,
VReg result_reg_class,
- VReg op1_reg_class>
-{
+ VReg op1_reg_class> {
def : VPatUnaryNoMask<intrinsic, inst, kind, result_type, op1_type,
- sew, vlmul, op1_reg_class>;
- def : VPatUnaryNoMaskTU<intrinsic, inst, kind, result_type, op1_type,
- sew, vlmul, result_reg_class, op1_reg_class>;
- def : VPatUnaryMaskTA<intrinsic, inst, kind, result_type, op1_type,
- mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
+ sew, vlmul, result_reg_class, op1_reg_class>;
+ def : VPatUnaryMask<intrinsic, inst, kind, result_type, op1_type,
+ mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
+}
+
+multiclass VPatConversionTARoundingMode<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op1_reg_class> {
+ def : VPatUnaryNoMaskRoundingMode<intrinsic, inst, kind, result_type, op1_type,
+ sew, vlmul, result_reg_class, op1_reg_class>;
+ def : VPatUnaryMaskRoundingMode<intrinsic, inst, kind, result_type, op1_type,
+ mask_type, sew, vlmul, result_reg_class, op1_reg_class>;
}
multiclass VPatBinaryV_VV<string intrinsic, string instruction,
- list<VTypeInfo> vtilist> {
+ list<VTypeInfo> vtilist, bit isSEWAware = 0> {
foreach vti = vtilist in
- defm : VPatBinaryTA<intrinsic, instruction # "_VV_" # vti.LMul.MX,
- vti.Vector, vti.Vector, vti.Vector,vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, vti.RegClass>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinary<intrinsic,
+ !if(isSEWAware,
+ instruction # "_VV_" # vti.LMul.MX # "_E" # vti.SEW,
+ instruction # "_VV_" # vti.LMul.MX),
+ vti.Vector, vti.Vector, vti.Vector,vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
+}
+
+multiclass VPatBinaryV_VV_RM<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, bit isSEWAware = 0> {
+ foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinaryRoundingMode<intrinsic,
+ !if(isSEWAware,
+ instruction # "_VV_" # vti.LMul.MX # "_E" # vti.SEW,
+ instruction # "_VV_" # vti.LMul.MX),
+ vti.Vector, vti.Vector, vti.Vector,vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
}
multiclass VPatBinaryV_VV_INT<string intrinsic, string instruction,
- list<VTypeInfo> vtilist> {
+ list<VTypeInfo> vtilist> {
foreach vti = vtilist in {
defvar ivti = GetIntVTypeInfo<vti>.Vti;
- defm : VPatBinaryTA<intrinsic, instruction # "_VV_" # vti.LMul.MX,
- vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, vti.RegClass>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinary<intrinsic,
+ instruction # "_VV_" # vti.LMul.MX # "_E" # vti.SEW,
+ vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
}
}
@@ -4408,46 +5102,82 @@ multiclass VPatBinaryV_VV_INT_EEW<string intrinsic, string instruction,
if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
defvar emul_str = octuple_to_str<octuple_emul>.ret;
defvar ivti = !cast<VTypeInfo>("VI" # eew # emul_str);
- defvar inst = instruction # "_VV_" # vti.LMul.MX # "_" # emul_str;
- defm : VPatBinaryTA<intrinsic, inst,
- vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, ivti.RegClass>;
+ defvar inst = instruction # "_VV_" # vti.LMul.MX # "_E" # vti.SEW # "_" # emul_str;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
+ defm : VPatBinary<intrinsic, inst,
+ vti.Vector, vti.Vector, ivti.Vector, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, ivti.RegClass>;
}
}
}
multiclass VPatBinaryV_VX<string intrinsic, string instruction,
- list<VTypeInfo> vtilist> {
+ list<VTypeInfo> vtilist, bit isSEWAware = 0> {
foreach vti = vtilist in {
defvar kind = "V"#vti.ScalarSuffix;
- defm : VPatBinaryTA<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
- vti.Vector, vti.Vector, vti.Scalar, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, vti.ScalarRegClass>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinary<intrinsic,
+ !if(isSEWAware,
+ instruction#"_"#kind#"_"#vti.LMul.MX#"_E"#vti.SEW,
+ instruction#"_"#kind#"_"#vti.LMul.MX),
+ vti.Vector, vti.Vector, vti.Scalar, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryV_VX_RM<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, bit isSEWAware = 0> {
+ foreach vti = vtilist in {
+ defvar kind = "V"#vti.ScalarSuffix;
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinaryRoundingMode<intrinsic,
+ !if(isSEWAware,
+ instruction#"_"#kind#"_"#vti.LMul.MX#"_E"#vti.SEW,
+ instruction#"_"#kind#"_"#vti.LMul.MX),
+ vti.Vector, vti.Vector, vti.Scalar, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, vti.ScalarRegClass>;
}
}
multiclass VPatBinaryV_VX_INT<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
- defm : VPatBinaryTA<intrinsic, instruction # "_VX_" # vti.LMul.MX,
- vti.Vector, vti.Vector, XLenVT, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, GPR>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinary<intrinsic, instruction # "_VX_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, GPR>;
}
multiclass VPatBinaryV_VI<string intrinsic, string instruction,
list<VTypeInfo> vtilist, Operand imm_type> {
foreach vti = vtilist in
- defm : VPatBinaryTA<intrinsic, instruction # "_VI_" # vti.LMul.MX,
- vti.Vector, vti.Vector, XLenVT, vti.Mask,
- vti.Log2SEW, vti.RegClass,
- vti.RegClass, imm_type>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinary<intrinsic, instruction # "_VI_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, imm_type>;
+}
+
+multiclass VPatBinaryV_VI_RM<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist,
+ Operand imm_type> {
+ foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatBinaryRoundingMode<intrinsic,
+ instruction # "_VI_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, XLenVT, vti.Mask,
+ vti.Log2SEW, vti.RegClass,
+ vti.RegClass, imm_type>;
}
multiclass VPatBinaryM_MM<string intrinsic, string instruction> {
foreach mti = AllMasks in
+ let Predicates = [HasVInstructions] in
def : VPatBinaryM<intrinsic, instruction # "_MM_" # mti.LMul.MX,
mti.Mask, mti.Mask, mti.Mask,
mti.Log2SEW, VR, VR>;
@@ -4458,10 +5188,26 @@ multiclass VPatBinaryW_VV<string intrinsic, string instruction,
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
- defm : VPatBinaryTA<intrinsic, instruction # "_VV_" # Vti.LMul.MX,
- Wti.Vector, Vti.Vector, Vti.Vector, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Vti.RegClass, Vti.RegClass>;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinary<intrinsic, instruction # "_VV_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Vti.RegClass, Vti.RegClass>;
+ }
+}
+
+multiclass VPatBinaryW_VV_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinaryRoundingMode<intrinsic, instruction # "_VV_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Vti.RegClass, Vti.RegClass>;
}
}
@@ -4471,10 +5217,27 @@ multiclass VPatBinaryW_VX<string intrinsic, string instruction,
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
defvar kind = "V"#Vti.ScalarSuffix;
- defm : VPatBinaryTA<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
- Wti.Vector, Vti.Vector, Vti.Scalar, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Vti.RegClass, Vti.ScalarRegClass>;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Vti.RegClass, Vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryW_VX_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defvar kind = "V"#Vti.ScalarSuffix;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinaryRoundingMode<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Vti.RegClass, Vti.ScalarRegClass>;
}
}
@@ -4483,24 +5246,56 @@ multiclass VPatBinaryW_WV<string intrinsic, string instruction,
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
- def : VPatTiedBinaryNoMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
- Wti.Vector, Vti.Vector,
- Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
- def : VPatBinaryNoMaskTU<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
- Wti.Vector, Wti.Vector, Vti.Vector, Vti.Log2SEW,
- Wti.RegClass, Wti.RegClass, Vti.RegClass>;
- let AddedComplexity = 1 in {
- def : VPatTiedBinaryNoMaskTU<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in {
+ def : VPatTiedBinaryNoMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
Wti.Vector, Vti.Vector,
Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
- def : VPatTiedBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
- Wti.Vector, Vti.Vector, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
+ def : VPatBinaryNoMaskTU<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Vector, Vti.Log2SEW,
+ Wti.RegClass, Wti.RegClass, Vti.RegClass>;
+ let AddedComplexity = 1 in {
+ def : VPatTiedBinaryNoMaskTU<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector,
+ Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
+ def : VPatTiedBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
+ }
+ def : VPatBinaryMaskTA<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Wti.RegClass, Vti.RegClass>;
+ }
+ }
+}
+
+multiclass VPatBinaryW_WV_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in {
+ def : VPatTiedBinaryNoMaskRoundingMode<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector,
+ Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
+ def : VPatBinaryNoMaskTURoundingMode<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Vector, Vti.Log2SEW,
+ Wti.RegClass, Wti.RegClass, Vti.RegClass>;
+ let AddedComplexity = 1 in {
+ def : VPatTiedBinaryNoMaskTURoundingMode<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector,
+ Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
+ def : VPatTiedBinaryMaskRoundingMode<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
+ }
+ def : VPatBinaryMaskTARoundingMode<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Wti.RegClass, Vti.RegClass>;
}
- def : VPatBinaryMaskTA<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
- Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Wti.RegClass, Vti.RegClass>;
}
}
@@ -4510,10 +5305,27 @@ multiclass VPatBinaryW_WX<string intrinsic, string instruction,
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
defvar kind = "W"#Vti.ScalarSuffix;
- defm : VPatBinaryTA<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
- Wti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Wti.RegClass, Vti.ScalarRegClass>;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Wti.RegClass, Vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryW_WX_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defvar kind = "W"#Vti.ScalarSuffix;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinaryRoundingMode<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Wti.RegClass, Vti.ScalarRegClass>;
}
}
@@ -4522,10 +5334,27 @@ multiclass VPatBinaryV_WV<string intrinsic, string instruction,
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
- defm : VPatBinaryTA<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
- Vti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
- Vti.Log2SEW, Vti.RegClass,
- Wti.RegClass, Vti.RegClass>;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinary<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, Vti.RegClass>;
+ }
+}
+
+multiclass VPatBinaryV_WV_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinaryRoundingMode<intrinsic,
+ instruction # "_WV_" # Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, Vti.RegClass>;
}
}
@@ -4535,22 +5364,58 @@ multiclass VPatBinaryV_WX<string intrinsic, string instruction,
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
defvar kind = "W"#Vti.ScalarSuffix;
- defm : VPatBinaryTA<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
- Vti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
- Vti.Log2SEW, Vti.RegClass,
- Wti.RegClass, Vti.ScalarRegClass>;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinary<intrinsic, instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, Vti.ScalarRegClass>;
+ }
+}
+
+multiclass VPatBinaryV_WX_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ defvar kind = "W"#Vti.ScalarSuffix;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinaryRoundingMode<intrinsic,
+ instruction#"_"#kind#"_"#Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, Vti.Scalar, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, Vti.ScalarRegClass>;
}
}
+
multiclass VPatBinaryV_WI<string intrinsic, string instruction,
list<VTypeInfoToWide> vtilist> {
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
- defm : VPatBinaryTA<intrinsic, instruction # "_WI_" # Vti.LMul.MX,
- Vti.Vector, Wti.Vector, XLenVT, Vti.Mask,
- Vti.Log2SEW, Vti.RegClass,
- Wti.RegClass, uimm5>;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinary<intrinsic, instruction # "_WI_" # Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, XLenVT, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, uimm5>;
+ }
+}
+
+multiclass VPatBinaryV_WI_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach VtiToWti = vtilist in {
+ defvar Vti = VtiToWti.Vti;
+ defvar Wti = VtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<Vti>.Predicates,
+ GetVTypePredicates<Wti>.Predicates) in
+ defm : VPatBinaryRoundingMode<intrinsic,
+ instruction # "_WI_" # Vti.LMul.MX,
+ Vti.Vector, Wti.Vector, XLenVT, Vti.Mask,
+ Vti.Log2SEW, Vti.RegClass,
+ Wti.RegClass, uimm5>;
}
}
@@ -4558,6 +5423,7 @@ multiclass VPatBinaryV_VM<string intrinsic, string instruction,
bit CarryOut = 0,
list<VTypeInfo> vtilist = AllIntegerVectors> {
foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatBinaryCarryIn<intrinsic, instruction, "VVM",
!if(CarryOut, vti.Mask, vti.Vector),
vti.Vector, vti.Vector, vti.Mask,
@@ -4569,6 +5435,7 @@ multiclass VPatBinaryV_XM<string intrinsic, string instruction,
bit CarryOut = 0,
list<VTypeInfo> vtilist = AllIntegerVectors> {
foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatBinaryCarryIn<intrinsic, instruction,
"V"#vti.ScalarSuffix#"M",
!if(CarryOut, vti.Mask, vti.Vector),
@@ -4580,6 +5447,7 @@ multiclass VPatBinaryV_XM<string intrinsic, string instruction,
multiclass VPatBinaryV_IM<string intrinsic, string instruction,
bit CarryOut = 0> {
foreach vti = AllIntegerVectors in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatBinaryCarryIn<intrinsic, instruction, "VIM",
!if(CarryOut, vti.Mask, vti.Vector),
vti.Vector, XLenVT, vti.Mask,
@@ -4587,34 +5455,32 @@ multiclass VPatBinaryV_IM<string intrinsic, string instruction,
vti.RegClass, simm5>;
}
-multiclass VPatBinaryV_VM_TAIL<string intrinsic, string instruction,
- bit CarryOut = 0,
- list<VTypeInfo> vtilist = AllIntegerVectors> {
- foreach vti = vtilist in
+multiclass VPatBinaryV_VM_TAIL<string intrinsic, string instruction> {
+ foreach vti = AllIntegerVectors in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatBinaryCarryInTAIL<intrinsic, instruction, "VVM",
- !if(CarryOut, vti.Mask, vti.Vector),
+ vti.Vector,
vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass,
vti.RegClass, vti.RegClass>;
}
-multiclass VPatBinaryV_XM_TAIL<string intrinsic, string instruction,
- bit CarryOut = 0,
- list<VTypeInfo> vtilist = AllIntegerVectors> {
- foreach vti = vtilist in
+multiclass VPatBinaryV_XM_TAIL<string intrinsic, string instruction> {
+ foreach vti = AllIntegerVectors in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatBinaryCarryInTAIL<intrinsic, instruction,
"V"#vti.ScalarSuffix#"M",
- !if(CarryOut, vti.Mask, vti.Vector),
+ vti.Vector,
vti.Vector, vti.Scalar, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass,
vti.RegClass, vti.ScalarRegClass>;
}
-multiclass VPatBinaryV_IM_TAIL<string intrinsic, string instruction,
- bit CarryOut = 0> {
+multiclass VPatBinaryV_IM_TAIL<string intrinsic, string instruction> {
foreach vti = AllIntegerVectors in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatBinaryCarryInTAIL<intrinsic, instruction, "VIM",
- !if(CarryOut, vti.Mask, vti.Vector),
+ vti.Vector,
vti.Vector, XLenVT, vti.Mask,
vti.Log2SEW, vti.LMul,
vti.RegClass, vti.RegClass, simm5>;
@@ -4622,6 +5488,7 @@ multiclass VPatBinaryV_IM_TAIL<string intrinsic, string instruction,
multiclass VPatBinaryV_V<string intrinsic, string instruction> {
foreach vti = AllIntegerVectors in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatBinaryMaskOut<intrinsic, instruction, "VV",
vti.Mask, vti.Vector, vti.Vector,
vti.Log2SEW, vti.LMul,
@@ -4630,6 +5497,7 @@ multiclass VPatBinaryV_V<string intrinsic, string instruction> {
multiclass VPatBinaryV_X<string intrinsic, string instruction> {
foreach vti = AllIntegerVectors in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatBinaryMaskOut<intrinsic, instruction, "VX",
vti.Mask, vti.Vector, XLenVT,
vti.Log2SEW, vti.LMul,
@@ -4638,6 +5506,7 @@ multiclass VPatBinaryV_X<string intrinsic, string instruction> {
multiclass VPatBinaryV_I<string intrinsic, string instruction> {
foreach vti = AllIntegerVectors in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatBinaryMaskOut<intrinsic, instruction, "VI",
vti.Mask, vti.Vector, XLenVT,
vti.Log2SEW, vti.LMul,
@@ -4647,6 +5516,7 @@ multiclass VPatBinaryV_I<string intrinsic, string instruction> {
multiclass VPatBinaryM_VV<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatBinaryM<intrinsic, instruction # "_VV_" # vti.LMul.MX,
vti.Mask, vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, VR,
@@ -4656,6 +5526,7 @@ multiclass VPatBinaryM_VV<string intrinsic, string instruction,
multiclass VPatBinarySwappedM_VV<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatBinarySwapped<intrinsic, instruction # "_VV_" # vti.LMul.MX,
vti.Mask, vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, VR,
@@ -4666,6 +5537,7 @@ multiclass VPatBinaryM_VX<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in {
defvar kind = "V"#vti.ScalarSuffix;
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatBinaryM<intrinsic, instruction#"_"#kind#"_"#vti.LMul.MX,
vti.Mask, vti.Vector, vti.Scalar, vti.Mask,
vti.Log2SEW, VR,
@@ -4676,6 +5548,7 @@ multiclass VPatBinaryM_VX<string intrinsic, string instruction,
multiclass VPatBinaryM_VI<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatBinaryM<intrinsic, instruction # "_VI_" # vti.LMul.MX,
vti.Mask, vti.Vector, XLenVT, vti.Mask,
vti.Log2SEW, VR,
@@ -4688,10 +5561,21 @@ multiclass VPatBinaryV_VV_VX_VI<string intrinsic, string instruction,
VPatBinaryV_VX<intrinsic, instruction, vtilist>,
VPatBinaryV_VI<intrinsic, instruction, vtilist, ImmType>;
+multiclass VPatBinaryV_VV_VX_VI_RM<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, Operand ImmType = simm5>
+ : VPatBinaryV_VV_RM<intrinsic, instruction, vtilist>,
+ VPatBinaryV_VX_RM<intrinsic, instruction, vtilist>,
+ VPatBinaryV_VI_RM<intrinsic, instruction, vtilist, ImmType>;
+
multiclass VPatBinaryV_VV_VX<string intrinsic, string instruction,
- list<VTypeInfo> vtilist>
- : VPatBinaryV_VV<intrinsic, instruction, vtilist>,
- VPatBinaryV_VX<intrinsic, instruction, vtilist>;
+ list<VTypeInfo> vtilist, bit isSEWAware = 0>
+ : VPatBinaryV_VV<intrinsic, instruction, vtilist, isSEWAware>,
+ VPatBinaryV_VX<intrinsic, instruction, vtilist, isSEWAware>;
+
+multiclass VPatBinaryV_VV_VX_RM<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist, bit isSEWAware = 0>
+ : VPatBinaryV_VV_RM<intrinsic, instruction, vtilist, isSEWAware>,
+ VPatBinaryV_VX_RM<intrinsic, instruction, vtilist, isSEWAware>;
multiclass VPatBinaryV_VX_VI<string intrinsic, string instruction,
list<VTypeInfo> vtilist>
@@ -4703,26 +5587,42 @@ multiclass VPatBinaryW_VV_VX<string intrinsic, string instruction,
: VPatBinaryW_VV<intrinsic, instruction, vtilist>,
VPatBinaryW_VX<intrinsic, instruction, vtilist>;
+multiclass VPatBinaryW_VV_VX_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist>
+ : VPatBinaryW_VV_RM<intrinsic, instruction, vtilist>,
+ VPatBinaryW_VX_RM<intrinsic, instruction, vtilist>;
+
multiclass VPatBinaryW_WV_WX<string intrinsic, string instruction,
list<VTypeInfoToWide> vtilist>
: VPatBinaryW_WV<intrinsic, instruction, vtilist>,
VPatBinaryW_WX<intrinsic, instruction, vtilist>;
+multiclass VPatBinaryW_WV_WX_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist>
+ : VPatBinaryW_WV_RM<intrinsic, instruction, vtilist>,
+ VPatBinaryW_WX_RM<intrinsic, instruction, vtilist>;
+
multiclass VPatBinaryV_WV_WX_WI<string intrinsic, string instruction,
list<VTypeInfoToWide> vtilist>
: VPatBinaryV_WV<intrinsic, instruction, vtilist>,
VPatBinaryV_WX<intrinsic, instruction, vtilist>,
VPatBinaryV_WI<intrinsic, instruction, vtilist>;
+multiclass VPatBinaryV_WV_WX_WI_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist>
+ : VPatBinaryV_WV_RM<intrinsic, instruction, vtilist>,
+ VPatBinaryV_WX_RM<intrinsic, instruction, vtilist>,
+ VPatBinaryV_WI_RM<intrinsic, instruction, vtilist>;
+
multiclass VPatBinaryV_VM_XM_IM<string intrinsic, string instruction>
: VPatBinaryV_VM_TAIL<intrinsic, instruction>,
VPatBinaryV_XM_TAIL<intrinsic, instruction>,
VPatBinaryV_IM_TAIL<intrinsic, instruction>;
multiclass VPatBinaryM_VM_XM_IM<string intrinsic, string instruction>
- : VPatBinaryV_VM<intrinsic, instruction, /*CarryOut=*/1>,
- VPatBinaryV_XM<intrinsic, instruction, /*CarryOut=*/1>,
- VPatBinaryV_IM<intrinsic, instruction, /*CarryOut=*/1>;
+ : VPatBinaryV_VM<intrinsic, instruction, CarryOut=1>,
+ VPatBinaryV_XM<intrinsic, instruction, CarryOut=1>,
+ VPatBinaryV_IM<intrinsic, instruction, CarryOut=1>;
multiclass VPatBinaryM_V_X_I<string intrinsic, string instruction>
: VPatBinaryV_V<intrinsic, instruction>,
@@ -4734,8 +5634,8 @@ multiclass VPatBinaryV_VM_XM<string intrinsic, string instruction>
VPatBinaryV_XM_TAIL<intrinsic, instruction>;
multiclass VPatBinaryM_VM_XM<string intrinsic, string instruction>
- : VPatBinaryV_VM<intrinsic, instruction, /*CarryOut=*/1>,
- VPatBinaryV_XM<intrinsic, instruction, /*CarryOut=*/1>;
+ : VPatBinaryV_VM<intrinsic, instruction, CarryOut=1>,
+ VPatBinaryV_XM<intrinsic, instruction, CarryOut=1>;
multiclass VPatBinaryM_V_X<string intrinsic, string instruction>
: VPatBinaryV_V<intrinsic, instruction>,
@@ -4801,18 +5701,92 @@ multiclass VPatTernaryWithPolicy<string intrinsic,
op2_kind>;
}
+multiclass VPatTernaryWithPolicyRoundingMode<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> {
+ def : VPatTernaryNoMaskWithPolicyRoundingMode<intrinsic, inst, kind, result_type,
+ op1_type, op2_type, sew, vlmul,
+ result_reg_class, op1_reg_class,
+ op2_kind>;
+ def : VPatTernaryMaskPolicyRoundingMode<intrinsic, inst, kind, result_type, op1_type,
+ op2_type, mask_type, sew, vlmul,
+ result_reg_class, op1_reg_class,
+ op2_kind>;
+}
+
+multiclass VPatTernaryTA<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int log2sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> {
+ def : VPatTernaryNoMaskTA<intrinsic, inst, kind, result_type, op1_type,
+ op2_type, log2sew, vlmul, result_reg_class,
+ op1_reg_class, op2_kind>;
+ def : VPatTernaryMaskTA<intrinsic, inst, kind, result_type, op1_type,
+ op2_type, mask_type, log2sew, vlmul,
+ result_reg_class, op1_reg_class, op2_kind>;
+}
+
+multiclass VPatTernaryTARoundingMode<string intrinsic,
+ string inst,
+ string kind,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int log2sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ RegisterClass op1_reg_class,
+ DAGOperand op2_kind> {
+ def : VPatTernaryNoMaskTARoundingMode<intrinsic, inst, kind, result_type, op1_type,
+ op2_type, log2sew, vlmul, result_reg_class,
+ op1_reg_class, op2_kind>;
+ def : VPatTernaryMaskTARoundingMode<intrinsic, inst, kind, result_type, op1_type,
+ op2_type, mask_type, log2sew, vlmul,
+ result_reg_class, op1_reg_class, op2_kind>;
+}
+
multiclass VPatTernaryV_VV_AAXA<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatTernaryWithPolicy<intrinsic, instruction, "VV",
vti.Vector, vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass,
vti.RegClass, vti.RegClass>;
}
+multiclass VPatTernaryV_VV_AAXA_RM<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatTernaryWithPolicyRoundingMode<intrinsic, instruction, "VV",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
+}
+
multiclass VPatTernaryV_VX<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatTernaryWithPolicy<intrinsic, instruction, "VX",
vti.Vector, vti.Vector, XLenVT, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass,
@@ -4822,6 +5796,7 @@ multiclass VPatTernaryV_VX<string intrinsic, string instruction,
multiclass VPatTernaryV_VX_AAXA<string intrinsic, string instruction,
list<VTypeInfo> vtilist> {
foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatTernaryWithPolicy<intrinsic, instruction,
"V"#vti.ScalarSuffix,
vti.Vector, vti.Scalar, vti.Vector, vti.Mask,
@@ -4829,9 +5804,21 @@ multiclass VPatTernaryV_VX_AAXA<string intrinsic, string instruction,
vti.ScalarRegClass, vti.RegClass>;
}
+multiclass VPatTernaryV_VX_AAXA_RM<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatTernaryWithPolicyRoundingMode<intrinsic, instruction,
+ "V"#vti.ScalarSuffix,
+ vti.Vector, vti.Scalar, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.ScalarRegClass, vti.RegClass>;
+}
+
multiclass VPatTernaryV_VI<string intrinsic, string instruction,
list<VTypeInfo> vtilist, Operand Imm_type> {
foreach vti = vtilist in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatTernaryWithPolicy<intrinsic, instruction, "VI",
vti.Vector, vti.Vector, XLenVT, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass,
@@ -4843,6 +5830,8 @@ multiclass VPatTernaryW_VV<string intrinsic, string instruction,
foreach vtiToWti = vtilist in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
defm : VPatTernaryWithPolicy<intrinsic, instruction, "VV",
wti.Vector, vti.Vector, vti.Vector,
vti.Mask, vti.Log2SEW, vti.LMul,
@@ -4850,11 +5839,27 @@ multiclass VPatTernaryW_VV<string intrinsic, string instruction,
}
}
+multiclass VPatTernaryW_VV_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach vtiToWti = vtilist in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
+ defm : VPatTernaryWithPolicyRoundingMode<intrinsic, instruction, "VV",
+ wti.Vector, vti.Vector, vti.Vector,
+ vti.Mask, vti.Log2SEW, vti.LMul,
+ wti.RegClass, vti.RegClass, vti.RegClass>;
+ }
+}
+
multiclass VPatTernaryW_VX<string intrinsic, string instruction,
list<VTypeInfoToWide> vtilist> {
foreach vtiToWti = vtilist in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
defm : VPatTernaryWithPolicy<intrinsic, instruction,
"V"#vti.ScalarSuffix,
wti.Vector, vti.Scalar, vti.Vector,
@@ -4863,11 +5868,32 @@ multiclass VPatTernaryW_VX<string intrinsic, string instruction,
}
}
+multiclass VPatTernaryW_VX_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist> {
+ foreach vtiToWti = vtilist in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
+ defm : VPatTernaryWithPolicyRoundingMode<intrinsic, instruction,
+ "V"#vti.ScalarSuffix,
+ wti.Vector, vti.Scalar, vti.Vector,
+ vti.Mask, vti.Log2SEW, vti.LMul,
+ wti.RegClass, vti.ScalarRegClass,
+ vti.RegClass>;
+ }
+}
+
multiclass VPatTernaryV_VV_VX_AAXA<string intrinsic, string instruction,
list<VTypeInfo> vtilist>
: VPatTernaryV_VV_AAXA<intrinsic, instruction, vtilist>,
VPatTernaryV_VX_AAXA<intrinsic, instruction, vtilist>;
+multiclass VPatTernaryV_VV_VX_AAXA_RM<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist>
+ : VPatTernaryV_VV_AAXA_RM<intrinsic, instruction, vtilist>,
+ VPatTernaryV_VX_AAXA_RM<intrinsic, instruction, vtilist>;
+
multiclass VPatTernaryV_VX_VI<string intrinsic, string instruction,
list<VTypeInfo> vtilist, Operand Imm_type = simm5>
: VPatTernaryV_VX<intrinsic, instruction, vtilist>,
@@ -4885,6 +5911,11 @@ multiclass VPatTernaryW_VV_VX<string intrinsic, string instruction,
: VPatTernaryW_VV<intrinsic, instruction, vtilist>,
VPatTernaryW_VX<intrinsic, instruction, vtilist>;
+multiclass VPatTernaryW_VV_VX_RM<string intrinsic, string instruction,
+ list<VTypeInfoToWide> vtilist>
+ : VPatTernaryW_VV_RM<intrinsic, instruction, vtilist>,
+ VPatTernaryW_VX_RM<intrinsic, instruction, vtilist>;
+
multiclass VPatBinaryM_VV_VX<string intrinsic, string instruction,
list<VTypeInfo> vtilist>
: VPatBinaryM_VV<intrinsic, instruction, vtilist>,
@@ -4902,97 +5933,155 @@ multiclass VPatBinaryV_VV_VX_VI_INT<string intrinsic, string instruction,
VPatBinaryV_VI<intrinsic#"_vx", instruction, vtilist, ImmType>;
multiclass VPatReductionV_VS<string intrinsic, string instruction, bit IsFloat = 0> {
- foreach vti = !if(IsFloat, NoGroupFloatVectors, NoGroupIntegerVectors) in
- {
+ foreach vti = !if(IsFloat, NoGroupFloatVectors, NoGroupIntegerVectors) in {
+ defvar vectorM1 = !cast<VTypeInfo>(!if(IsFloat, "VF", "VI") # vti.SEW # "M1");
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatTernaryTA<intrinsic, instruction, "VS",
+ vectorM1.Vector, vti.Vector,
+ vectorM1.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul,
+ VR, vti.RegClass, VR>;
+ }
+ foreach gvti = !if(IsFloat, GroupFloatVectors, GroupIntegerVectors) in {
+ let Predicates = GetVTypePredicates<gvti>.Predicates in
+ defm : VPatTernaryTA<intrinsic, instruction, "VS",
+ gvti.VectorM1, gvti.Vector,
+ gvti.VectorM1, gvti.Mask,
+ gvti.Log2SEW, gvti.LMul,
+ VR, gvti.RegClass, VR>;
+ }
+}
+
+multiclass VPatReductionV_VS_RM<string intrinsic, string instruction, bit IsFloat = 0> {
+ foreach vti = !if(IsFloat, NoGroupFloatVectors, NoGroupIntegerVectors) in {
defvar vectorM1 = !cast<VTypeInfo>(!if(IsFloat, "VF", "VI") # vti.SEW # "M1");
- defm : VPatTernary<intrinsic, instruction, "VS",
- vectorM1.Vector, vti.Vector,
- vectorM1.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul,
- VR, vti.RegClass, VR>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatTernaryTARoundingMode<intrinsic, instruction, "VS",
+ vectorM1.Vector, vti.Vector,
+ vectorM1.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul,
+ VR, vti.RegClass, VR>;
}
- foreach gvti = !if(IsFloat, GroupFloatVectors, GroupIntegerVectors) in
- {
- defm : VPatTernary<intrinsic, instruction, "VS",
- gvti.VectorM1, gvti.Vector,
- gvti.VectorM1, gvti.Mask,
- gvti.Log2SEW, gvti.LMul,
- VR, gvti.RegClass, VR>;
+ foreach gvti = !if(IsFloat, GroupFloatVectors, GroupIntegerVectors) in {
+ let Predicates = GetVTypePredicates<gvti>.Predicates in
+ defm : VPatTernaryTARoundingMode<intrinsic, instruction, "VS",
+ gvti.VectorM1, gvti.Vector,
+ gvti.VectorM1, gvti.Mask,
+ gvti.Log2SEW, gvti.LMul,
+ VR, gvti.RegClass, VR>;
}
}
multiclass VPatReductionW_VS<string intrinsic, string instruction, bit IsFloat = 0> {
- foreach vti = !if(IsFloat, AllFloatVectors, AllIntegerVectors) in
- {
+ foreach vti = !if(IsFloat, AllFloatVectors, AllIntegerVectors) in {
defvar wtiSEW = !mul(vti.SEW, 2);
if !le(wtiSEW, 64) then {
defvar wtiM1 = !cast<VTypeInfo>(!if(IsFloat, "VF", "VI") # wtiSEW # "M1");
- defm : VPatTernary<intrinsic, instruction, "VS",
- wtiM1.Vector, vti.Vector,
- wtiM1.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul,
- wtiM1.RegClass, vti.RegClass,
- wtiM1.RegClass>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatTernaryTA<intrinsic, instruction, "VS",
+ wtiM1.Vector, vti.Vector,
+ wtiM1.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul,
+ wtiM1.RegClass, vti.RegClass,
+ wtiM1.RegClass>;
+ }
+ }
+}
+
+multiclass VPatReductionW_VS_RM<string intrinsic, string instruction, bit IsFloat = 0> {
+ foreach vti = !if(IsFloat, AllFloatVectors, AllIntegerVectors) in {
+ defvar wtiSEW = !mul(vti.SEW, 2);
+ if !le(wtiSEW, 64) then {
+ defvar wtiM1 = !cast<VTypeInfo>(!if(IsFloat, "VF", "VI") # wtiSEW # "M1");
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ defm : VPatTernaryTARoundingMode<intrinsic, instruction, "VS",
+ wtiM1.Vector, vti.Vector,
+ wtiM1.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul,
+ wtiM1.RegClass, vti.RegClass,
+ wtiM1.RegClass>;
}
}
}
multiclass VPatConversionVI_VF<string intrinsic,
- string instruction>
-{
- foreach fvti = AllFloatVectors in
- {
+ string instruction> {
+ foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
-
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "V",
ivti.Vector, fvti.Vector, ivti.Mask, fvti.Log2SEW,
fvti.LMul, ivti.RegClass, fvti.RegClass>;
}
}
-multiclass VPatConversionVF_VI<string intrinsic,
- string instruction>
-{
- foreach fvti = AllFloatVectors in
- {
+multiclass VPatConversionVI_VF_RM<string intrinsic,
+ string instruction> {
+ foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
+ defm : VPatConversionTARoundingMode<intrinsic, instruction, "V",
+ ivti.Vector, fvti.Vector, ivti.Mask, fvti.Log2SEW,
+ fvti.LMul, ivti.RegClass, fvti.RegClass>;
+ }
+}
- defm : VPatConversionTA<intrinsic, instruction, "V",
- fvti.Vector, ivti.Vector, fvti.Mask, ivti.Log2SEW,
- ivti.LMul, fvti.RegClass, ivti.RegClass>;
+multiclass VPatConversionVF_VI_RM<string intrinsic,
+ string instruction> {
+ foreach fvti = AllFloatVectors in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
+ defm : VPatConversionTARoundingMode<intrinsic, instruction, "V",
+ fvti.Vector, ivti.Vector, fvti.Mask, ivti.Log2SEW,
+ ivti.LMul, fvti.RegClass, ivti.RegClass>;
}
}
multiclass VPatConversionWI_VF<string intrinsic, string instruction> {
- foreach fvtiToFWti = AllWidenableFloatVectors in
- {
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
-
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<iwti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "V",
iwti.Vector, fvti.Vector, iwti.Mask, fvti.Log2SEW,
fvti.LMul, iwti.RegClass, fvti.RegClass>;
}
}
+multiclass VPatConversionWI_VF_RM<string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<iwti>.Predicates) in
+ defm : VPatConversionTARoundingMode<intrinsic, instruction, "V",
+ iwti.Vector, fvti.Vector, iwti.Mask, fvti.Log2SEW,
+ fvti.LMul, iwti.RegClass, fvti.RegClass>;
+ }
+}
+
multiclass VPatConversionWF_VI<string intrinsic, string instruction> {
- foreach vtiToWti = AllWidenableIntToFloatVectors in
- {
+ foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
-
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "V",
fwti.Vector, vti.Vector, fwti.Mask, vti.Log2SEW,
vti.LMul, fwti.RegClass, vti.RegClass>;
}
}
-multiclass VPatConversionWF_VF <string intrinsic, string instruction> {
- foreach fvtiToFWti = AllWidenableFloatVectors in
- {
+multiclass VPatConversionWF_VF<string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
-
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "V",
fwti.Vector, fvti.Vector, fwti.Mask, fvti.Log2SEW,
fvti.LMul, fwti.RegClass, fvti.RegClass>;
@@ -5000,46 +6089,71 @@ multiclass VPatConversionWF_VF <string intrinsic, string instruction> {
}
multiclass VPatConversionVI_WF <string intrinsic, string instruction> {
- foreach vtiToWti = AllWidenableIntToFloatVectors in
- {
+ foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
-
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "W",
vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
vti.LMul, vti.RegClass, fwti.RegClass>;
}
}
-multiclass VPatConversionVF_WI <string intrinsic, string instruction> {
- foreach fvtiToFWti = AllWidenableFloatVectors in
- {
+multiclass VPatConversionVI_WF_RM <string intrinsic, string instruction> {
+ foreach vtiToWti = AllWidenableIntToFloatVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversionTARoundingMode<intrinsic, instruction, "W",
+ vti.Vector, fwti.Vector, vti.Mask, vti.Log2SEW,
+ vti.LMul, vti.RegClass, fwti.RegClass>;
+ }
+}
+
+multiclass VPatConversionVF_WI_RM <string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
-
- defm : VPatConversionTA<intrinsic, instruction, "W",
- fvti.Vector, iwti.Vector, fvti.Mask, fvti.Log2SEW,
- fvti.LMul, fvti.RegClass, iwti.RegClass>;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<iwti>.Predicates) in
+ defm : VPatConversionTARoundingMode<intrinsic, instruction, "W",
+ fvti.Vector, iwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, iwti.RegClass>;
}
}
multiclass VPatConversionVF_WF <string intrinsic, string instruction> {
- foreach fvtiToFWti = AllWidenableFloatVectors in
- {
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
-
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
defm : VPatConversionTA<intrinsic, instruction, "W",
fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
fvti.LMul, fvti.RegClass, fwti.RegClass>;
}
}
+multiclass VPatConversionVF_WF_RM <string intrinsic, string instruction> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ defm : VPatConversionTARoundingMode<intrinsic, instruction, "W",
+ fvti.Vector, fwti.Vector, fvti.Mask, fvti.Log2SEW,
+ fvti.LMul, fvti.RegClass, fwti.RegClass>;
+ }
+}
+
multiclass VPatCompare_VI<string intrinsic, string inst,
ImmLeaf ImmType> {
foreach vti = AllIntegerVectors in {
defvar Intr = !cast<Intrinsic>(intrinsic);
defvar Pseudo = !cast<Instruction>(inst#"_VI_"#vti.LMul.MX);
+ let Predicates = GetVTypePredicates<vti>.Predicates in
def : Pat<(vti.Mask (Intr (vti.Vector vti.RegClass:$rs1),
(vti.Scalar ImmType:$rs2),
VLOpFrag)),
@@ -5047,6 +6161,7 @@ multiclass VPatCompare_VI<string intrinsic, string inst,
GPR:$vl, vti.Log2SEW)>;
defvar IntrMask = !cast<Intrinsic>(intrinsic # "_mask");
defvar PseudoMask = !cast<Instruction>(inst#"_VI_"#vti.LMul.MX#"_MASK");
+ let Predicates = GetVTypePredicates<vti>.Predicates in
def : Pat<(vti.Mask (IntrMask (vti.Mask VR:$merge),
(vti.Vector vti.RegClass:$rs1),
(vti.Scalar ImmType:$rs2),
@@ -5070,12 +6185,14 @@ let Predicates = [HasVInstructions] in {
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
def PseudoReadVLENB : Pseudo<(outs GPR:$rd), (ins),
[(set GPR:$rd, (riscv_read_vlenb))]>,
+ PseudoInstExpansion<(CSRRS GPR:$rd, SysRegVLENB.Encoding, X0)>,
Sched<[WriteRdVLENB]>;
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1,
Uses = [VL] in
-def PseudoReadVL : Pseudo<(outs GPR:$rd), (ins), []>;
+def PseudoReadVL : Pseudo<(outs GPR:$rd), (ins), []>,
+ PseudoInstExpansion<(CSRRS GPR:$rd, SysRegVL.Encoding, X0)>;
foreach lmul = MxList in {
foreach nf = NFSet<lmul>.L in {
@@ -5093,6 +6210,15 @@ foreach lmul = MxList in {
}
}
+/// Empty pseudo for RISCVInitUndefPass
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 0,
+ isCodeGenOnly = 1 in {
+ def PseudoRVVInitUndefM1 : Pseudo<(outs VR:$vd), (ins), [], "">;
+ def PseudoRVVInitUndefM2 : Pseudo<(outs VRM2:$vd), (ins), [], "">;
+ def PseudoRVVInitUndefM4 : Pseudo<(outs VRM4:$vd), (ins), [], "">;
+ def PseudoRVVInitUndefM8 : Pseudo<(outs VRM8:$vd), (ins), [], "">;
+}
+
//===----------------------------------------------------------------------===//
// 6. Configuration-Setting Instructions
//===----------------------------------------------------------------------===//
@@ -5139,10 +6265,10 @@ defm PseudoVSS : VPseudoSStore;
//===----------------------------------------------------------------------===//
// Vector Indexed Loads and Stores
-defm PseudoVLUX : VPseudoILoad</*Ordered=*/false>;
-defm PseudoVLOX : VPseudoILoad</*Ordered=*/true>;
-defm PseudoVSOX : VPseudoIStore</*Ordered=*/true>;
-defm PseudoVSUX : VPseudoIStore</*Ordered=*/false>;
+defm PseudoVLUX : VPseudoILoad<Ordered=false>;
+defm PseudoVLOX : VPseudoILoad<Ordered=true>;
+defm PseudoVSOX : VPseudoIStore<Ordered=true>;
+defm PseudoVSUX : VPseudoIStore<Ordered=false>;
//===----------------------------------------------------------------------===//
// 7.7. Unit-stride Fault-Only-First Loads
@@ -5157,12 +6283,12 @@ defm PseudoVL : VPseudoFFLoad;
//===----------------------------------------------------------------------===//
defm PseudoVLSEG : VPseudoUSSegLoad;
defm PseudoVLSSEG : VPseudoSSegLoad;
-defm PseudoVLOXSEG : VPseudoISegLoad</*Ordered=*/true>;
-defm PseudoVLUXSEG : VPseudoISegLoad</*Ordered=*/false>;
+defm PseudoVLOXSEG : VPseudoISegLoad<Ordered=true>;
+defm PseudoVLUXSEG : VPseudoISegLoad<Ordered=false>;
defm PseudoVSSEG : VPseudoUSSegStore;
defm PseudoVSSSEG : VPseudoSSegStore;
-defm PseudoVSOXSEG : VPseudoISegStore</*Ordered=*/true>;
-defm PseudoVSUXSEG : VPseudoISegStore</*Ordered=*/false>;
+defm PseudoVSOXSEG : VPseudoISegStore<Ordered=true>;
+defm PseudoVSUXSEG : VPseudoISegStore<Ordered=false>;
// vlseg<nf>e<eew>ff.v may update VL register
let hasSideEffects = 1, Defs = [VL] in {
@@ -5185,62 +6311,57 @@ foreach vti = AllIntegerVectors in {
// Occurs when legalizing vrsub.vx intrinsics for i64 on RV32 since we need
// to use a more complex splat sequence. Add the pattern for all VTs for
// consistency.
- def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector (undef)),
- (vti.Vector vti.RegClass:$rs2),
- (vti.Vector vti.RegClass:$rs1),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVSUB_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
- vti.RegClass:$rs2,
- GPR:$vl,
- vti.Log2SEW)>;
- def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector vti.RegClass:$merge),
- (vti.Vector vti.RegClass:$rs2),
- (vti.Vector vti.RegClass:$rs1),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVSUB_VV_"#vti.LMul.MX#"_TU")
- vti.RegClass:$merge,
- vti.RegClass:$rs1,
- vti.RegClass:$rs2,
- GPR:$vl,
- vti.Log2SEW)>;
- def : Pat<(vti.Vector (int_riscv_vrsub_mask (vti.Vector vti.RegClass:$merge),
- (vti.Vector vti.RegClass:$rs2),
- (vti.Vector vti.RegClass:$rs1),
- (vti.Mask V0),
- VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVSUB_VV_"#vti.LMul.MX#"_MASK")
- vti.RegClass:$merge,
- vti.RegClass:$rs1,
- vti.RegClass:$rs2,
- (vti.Mask V0),
- GPR:$vl,
- vti.Log2SEW,
- (XLenVT timm:$policy))>;
-
- // Match VSUB with a small immediate to vadd.vi by negating the immediate.
- def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector (undef)),
- (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar simm5_plus1:$rs2),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVADD_VI_"#vti.LMul.MX) vti.RegClass:$rs1,
- (NegImm simm5_plus1:$rs2),
- GPR:$vl,
- vti.Log2SEW)>;
- def : Pat<(vti.Vector (int_riscv_vsub_mask (vti.Vector vti.RegClass:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (vti.Scalar simm5_plus1:$rs2),
- (vti.Mask V0),
- VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVADD_VI_"#vti.LMul.MX#"_MASK")
- vti.RegClass:$merge,
- vti.RegClass:$rs1,
- (NegImm simm5_plus1:$rs2),
- (vti.Mask V0),
- GPR:$vl,
- vti.Log2SEW,
- (XLenVT timm:$policy))>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector vti.RegClass:$rs1),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVSUB_VV_"#vti.LMul.MX)
+ vti.RegClass:$merge,
+ vti.RegClass:$rs1,
+ vti.RegClass:$rs2,
+ GPR:$vl,
+ vti.Log2SEW, TU_MU)>;
+ def : Pat<(vti.Vector (int_riscv_vrsub_mask (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask V0),
+ VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>("PseudoVSUB_VV_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge,
+ vti.RegClass:$rs1,
+ vti.RegClass:$rs2,
+ (vti.Mask V0),
+ GPR:$vl,
+ vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+
+ // Match VSUB with a small immediate to vadd.vi by negating the immediate.
+ def : Pat<(vti.Vector (int_riscv_vsub (vti.Vector (undef)),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Scalar simm5_plus1:$rs2),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVADD_VI_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1,
+ (NegImm simm5_plus1:$rs2),
+ GPR:$vl,
+ vti.Log2SEW, TU_MU)>;
+ def : Pat<(vti.Vector (int_riscv_vsub_mask (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Scalar simm5_plus1:$rs2),
+ (vti.Mask V0),
+ VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>("PseudoVADD_VI_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge,
+ vti.RegClass:$rs1,
+ (NegImm simm5_plus1:$rs2),
+ (vti.Mask V0),
+ GPR:$vl,
+ vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -5382,34 +6503,30 @@ let Defs = [VXSAT], hasSideEffects = 1 in {
//===----------------------------------------------------------------------===//
// 12.2. Vector Single-Width Averaging Add and Subtract
//===----------------------------------------------------------------------===//
-let Uses = [VXRM], hasSideEffects = 1 in {
- defm PseudoVAADDU : VPseudoVAALU_VV_VX;
- defm PseudoVAADD : VPseudoVAALU_VV_VX;
- defm PseudoVASUBU : VPseudoVAALU_VV_VX;
- defm PseudoVASUB : VPseudoVAALU_VV_VX;
-}
+defm PseudoVAADDU : VPseudoVAALU_VV_VX_RM;
+defm PseudoVAADD : VPseudoVAALU_VV_VX_RM;
+defm PseudoVASUBU : VPseudoVAALU_VV_VX_RM;
+defm PseudoVASUB : VPseudoVAALU_VV_VX_RM;
//===----------------------------------------------------------------------===//
// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
//===----------------------------------------------------------------------===//
-let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
- defm PseudoVSMUL : VPseudoVSMUL_VV_VX;
+let Defs = [VXSAT], hasSideEffects = 1 in {
+ defm PseudoVSMUL : VPseudoVSMUL_VV_VX_RM;
}
//===----------------------------------------------------------------------===//
// 12.4. Vector Single-Width Scaling Shift Instructions
//===----------------------------------------------------------------------===//
-let Uses = [VXRM], hasSideEffects = 1 in {
- defm PseudoVSSRL : VPseudoVSSHT_VV_VX_VI<uimm5>;
- defm PseudoVSSRA : VPseudoVSSHT_VV_VX_VI<uimm5>;
-}
+defm PseudoVSSRL : VPseudoVSSHT_VV_VX_VI_RM<uimm5>;
+defm PseudoVSSRA : VPseudoVSSHT_VV_VX_VI_RM<uimm5>;
//===----------------------------------------------------------------------===//
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
//===----------------------------------------------------------------------===//
-let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in {
- defm PseudoVNCLIP : VPseudoVNCLP_WV_WX_WI;
- defm PseudoVNCLIPU : VPseudoVNCLP_WV_WX_WI;
+let Defs = [VXSAT], hasSideEffects = 1 in {
+ defm PseudoVNCLIP : VPseudoVNCLP_WV_WX_WI_RM;
+ defm PseudoVNCLIPU : VPseudoVNCLP_WV_WX_WI_RM;
}
} // Predicates = [HasVInstructions]
@@ -5422,67 +6539,67 @@ let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
-let Uses = [FRM], mayRaiseFPException = true in {
-defm PseudoVFADD : VPseudoVALU_VV_VF;
-defm PseudoVFSUB : VPseudoVALU_VV_VF;
-defm PseudoVFRSUB : VPseudoVALU_VF;
+let mayRaiseFPException = true, hasPostISelHook = 1 in {
+defm PseudoVFADD : VPseudoVALU_VV_VF_RM;
+defm PseudoVFSUB : VPseudoVALU_VV_VF_RM;
+defm PseudoVFRSUB : VPseudoVALU_VF_RM;
}
//===----------------------------------------------------------------------===//
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
-let Uses = [FRM], mayRaiseFPException = true in {
-defm PseudoVFWADD : VPseudoVFWALU_VV_VF;
-defm PseudoVFWSUB : VPseudoVFWALU_VV_VF;
-defm PseudoVFWADD : VPseudoVFWALU_WV_WF;
-defm PseudoVFWSUB : VPseudoVFWALU_WV_WF;
+let mayRaiseFPException = true, hasSideEffects = 0, hasPostISelHook = 1 in {
+defm PseudoVFWADD : VPseudoVFWALU_VV_VF_RM;
+defm PseudoVFWSUB : VPseudoVFWALU_VV_VF_RM;
+defm PseudoVFWADD : VPseudoVFWALU_WV_WF_RM;
+defm PseudoVFWSUB : VPseudoVFWALU_WV_WF_RM;
}
//===----------------------------------------------------------------------===//
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
//===----------------------------------------------------------------------===//
-let Uses = [FRM], mayRaiseFPException = true in {
-defm PseudoVFMUL : VPseudoVFMUL_VV_VF;
-defm PseudoVFDIV : VPseudoVFDIV_VV_VF;
-defm PseudoVFRDIV : VPseudoVFRDIV_VF;
+let mayRaiseFPException = true, hasSideEffects = 0, hasPostISelHook = 1 in {
+defm PseudoVFMUL : VPseudoVFMUL_VV_VF_RM;
+defm PseudoVFDIV : VPseudoVFDIV_VV_VF_RM;
+defm PseudoVFRDIV : VPseudoVFRDIV_VF_RM;
}
//===----------------------------------------------------------------------===//
// 13.5. Vector Widening Floating-Point Multiply
//===----------------------------------------------------------------------===//
-let Uses = [FRM], mayRaiseFPException = true in {
-defm PseudoVFWMUL : VPseudoVWMUL_VV_VF;
+let mayRaiseFPException = true, hasSideEffects = 0 in {
+defm PseudoVFWMUL : VPseudoVWMUL_VV_VF_RM;
}
//===----------------------------------------------------------------------===//
// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
//===----------------------------------------------------------------------===//
-let Uses = [FRM], mayRaiseFPException = true in {
-defm PseudoVFMACC : VPseudoVMAC_VV_VF_AAXA;
-defm PseudoVFNMACC : VPseudoVMAC_VV_VF_AAXA;
-defm PseudoVFMSAC : VPseudoVMAC_VV_VF_AAXA;
-defm PseudoVFNMSAC : VPseudoVMAC_VV_VF_AAXA;
-defm PseudoVFMADD : VPseudoVMAC_VV_VF_AAXA;
-defm PseudoVFNMADD : VPseudoVMAC_VV_VF_AAXA;
-defm PseudoVFMSUB : VPseudoVMAC_VV_VF_AAXA;
-defm PseudoVFNMSUB : VPseudoVMAC_VV_VF_AAXA;
+let mayRaiseFPException = true, hasSideEffects = 0, hasPostISelHook = 1 in {
+defm PseudoVFMACC : VPseudoVMAC_VV_VF_AAXA_RM;
+defm PseudoVFNMACC : VPseudoVMAC_VV_VF_AAXA_RM;
+defm PseudoVFMSAC : VPseudoVMAC_VV_VF_AAXA_RM;
+defm PseudoVFNMSAC : VPseudoVMAC_VV_VF_AAXA_RM;
+defm PseudoVFMADD : VPseudoVMAC_VV_VF_AAXA_RM;
+defm PseudoVFNMADD : VPseudoVMAC_VV_VF_AAXA_RM;
+defm PseudoVFMSUB : VPseudoVMAC_VV_VF_AAXA_RM;
+defm PseudoVFNMSUB : VPseudoVMAC_VV_VF_AAXA_RM;
}
//===----------------------------------------------------------------------===//
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
//===----------------------------------------------------------------------===//
-let Uses = [FRM], mayRaiseFPException = true in {
-defm PseudoVFWMACC : VPseudoVWMAC_VV_VF;
-defm PseudoVFWNMACC : VPseudoVWMAC_VV_VF;
-defm PseudoVFWMSAC : VPseudoVWMAC_VV_VF;
-defm PseudoVFWNMSAC : VPseudoVWMAC_VV_VF;
+let mayRaiseFPException = true, hasSideEffects = 0, hasPostISelHook = 1 in {
+defm PseudoVFWMACC : VPseudoVWMAC_VV_VF_RM;
+defm PseudoVFWNMACC : VPseudoVWMAC_VV_VF_RM;
+defm PseudoVFWMSAC : VPseudoVWMAC_VV_VF_RM;
+defm PseudoVFWNMSAC : VPseudoVWMAC_VV_VF_RM;
}
//===----------------------------------------------------------------------===//
// 13.8. Vector Floating-Point Square-Root Instruction
//===----------------------------------------------------------------------===//
-let Uses = [FRM], mayRaiseFPException = true in
-defm PseudoVFSQRT : VPseudoVSQR_V;
+let mayRaiseFPException = true, hasSideEffects = 0 in
+defm PseudoVFSQRT : VPseudoVSQR_V_RM;
//===----------------------------------------------------------------------===//
// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
@@ -5493,8 +6610,8 @@ defm PseudoVFRSQRT7 : VPseudoVRCP_V;
//===----------------------------------------------------------------------===//
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
//===----------------------------------------------------------------------===//
-let Uses = [FRM], mayRaiseFPException = true in
-defm PseudoVFREC7 : VPseudoVRCP_V;
+let mayRaiseFPException = true, hasSideEffects = 0 in
+defm PseudoVFREC7 : VPseudoVRCP_V_RM;
//===----------------------------------------------------------------------===//
// 13.11. Vector Floating-Point Min/Max Instructions
@@ -5542,9 +6659,9 @@ defm PseudoVFMV_V : VPseudoVMV_F;
// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
//===----------------------------------------------------------------------===//
let mayRaiseFPException = true in {
-let Uses = [FRM] in {
-defm PseudoVFCVT_XU_F : VPseudoVCVTI_V;
-defm PseudoVFCVT_X_F : VPseudoVCVTI_V;
+let hasSideEffects = 0, hasPostISelHook = 1 in {
+defm PseudoVFCVT_XU_F : VPseudoVCVTI_V_RM;
+defm PseudoVFCVT_X_F : VPseudoVCVTI_V_RM;
}
defm PseudoVFCVT_RM_XU_F : VPseudoVCVTI_RM_V;
@@ -5554,9 +6671,9 @@ defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V;
defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V;
defm PseudoVFROUND_NOEXCEPT : VPseudoVFROUND_NOEXCEPT_V;
-let Uses = [FRM] in {
-defm PseudoVFCVT_F_XU : VPseudoVCVTF_V;
-defm PseudoVFCVT_F_X : VPseudoVCVTF_V;
+let hasSideEffects = 0, hasPostISelHook = 1 in {
+defm PseudoVFCVT_F_XU : VPseudoVCVTF_V_RM;
+defm PseudoVFCVT_F_X : VPseudoVCVTF_V_RM;
}
defm PseudoVFCVT_RM_F_XU : VPseudoVCVTF_RM_V;
defm PseudoVFCVT_RM_F_X : VPseudoVCVTF_RM_V;
@@ -5566,9 +6683,9 @@ defm PseudoVFCVT_RM_F_X : VPseudoVCVTF_RM_V;
// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
//===----------------------------------------------------------------------===//
let mayRaiseFPException = true in {
-let Uses = [FRM] in {
-defm PseudoVFWCVT_XU_F : VPseudoVWCVTI_V;
-defm PseudoVFWCVT_X_F : VPseudoVWCVTI_V;
+let hasSideEffects = 0, hasPostISelHook = 1 in {
+defm PseudoVFWCVT_XU_F : VPseudoVWCVTI_V_RM;
+defm PseudoVFWCVT_X_F : VPseudoVWCVTI_V_RM;
}
defm PseudoVFWCVT_RM_XU_F : VPseudoVWCVTI_RM_V;
defm PseudoVFWCVT_RM_X_F : VPseudoVWCVTI_RM_V;
@@ -5576,12 +6693,8 @@ defm PseudoVFWCVT_RM_X_F : VPseudoVWCVTI_RM_V;
defm PseudoVFWCVT_RTZ_XU_F : VPseudoVWCVTI_V;
defm PseudoVFWCVT_RTZ_X_F : VPseudoVWCVTI_V;
-let Uses = [FRM] in {
defm PseudoVFWCVT_F_XU : VPseudoVWCVTF_V;
defm PseudoVFWCVT_F_X : VPseudoVWCVTF_V;
-}
-defm PseudoVFWCVT_RM_F_XU : VPseudoVWCVTF_RM_V;
-defm PseudoVFWCVT_RM_F_X : VPseudoVWCVTF_RM_V;
defm PseudoVFWCVT_F_F : VPseudoVWCVTD_V;
} // mayRaiseFPException = true
@@ -5590,9 +6703,9 @@ defm PseudoVFWCVT_F_F : VPseudoVWCVTD_V;
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
//===----------------------------------------------------------------------===//
let mayRaiseFPException = true in {
-let Uses = [FRM] in {
-defm PseudoVFNCVT_XU_F : VPseudoVNCVTI_W;
-defm PseudoVFNCVT_X_F : VPseudoVNCVTI_W;
+let hasSideEffects = 0, hasPostISelHook = 1 in {
+defm PseudoVFNCVT_XU_F : VPseudoVNCVTI_W_RM;
+defm PseudoVFNCVT_X_F : VPseudoVNCVTI_W_RM;
}
defm PseudoVFNCVT_RM_XU_F : VPseudoVNCVTI_RM_W;
defm PseudoVFNCVT_RM_X_F : VPseudoVNCVTI_RM_W;
@@ -5600,15 +6713,15 @@ defm PseudoVFNCVT_RM_X_F : VPseudoVNCVTI_RM_W;
defm PseudoVFNCVT_RTZ_XU_F : VPseudoVNCVTI_W;
defm PseudoVFNCVT_RTZ_X_F : VPseudoVNCVTI_W;
-let Uses = [FRM] in {
-defm PseudoVFNCVT_F_XU : VPseudoVNCVTF_W;
-defm PseudoVFNCVT_F_X : VPseudoVNCVTF_W;
+let hasSideEffects = 0, hasPostISelHook = 1 in {
+defm PseudoVFNCVT_F_XU : VPseudoVNCVTF_W_RM;
+defm PseudoVFNCVT_F_X : VPseudoVNCVTF_W_RM;
}
defm PseudoVFNCVT_RM_F_XU : VPseudoVNCVTF_RM_W;
defm PseudoVFNCVT_RM_F_X : VPseudoVNCVTF_RM_W;
-let Uses = [FRM] in
-defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W;
+let hasSideEffects = 0, hasPostISelHook = 1 in
+defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W_RM;
defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W;
} // mayRaiseFPException = true
@@ -5626,10 +6739,10 @@ defm PseudoVREDSUM : VPseudoVRED_VS;
defm PseudoVREDAND : VPseudoVRED_VS;
defm PseudoVREDOR : VPseudoVRED_VS;
defm PseudoVREDXOR : VPseudoVRED_VS;
-defm PseudoVREDMINU : VPseudoVRED_VS;
-defm PseudoVREDMIN : VPseudoVRED_VS;
-defm PseudoVREDMAXU : VPseudoVRED_VS;
-defm PseudoVREDMAX : VPseudoVRED_VS;
+defm PseudoVREDMINU : VPseudoVREDMINMAX_VS;
+defm PseudoVREDMIN : VPseudoVREDMINMAX_VS;
+defm PseudoVREDMAXU : VPseudoVREDMINMAX_VS;
+defm PseudoVREDMAX : VPseudoVREDMINMAX_VS;
//===----------------------------------------------------------------------===//
// 14.2. Vector Widening Integer Reduction Instructions
@@ -5644,23 +6757,24 @@ let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 14.3. Vector Single-Width Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
-let Uses = [FRM], mayRaiseFPException = true in {
-defm PseudoVFREDOSUM : VPseudoVFREDO_VS;
-defm PseudoVFREDUSUM : VPseudoVFRED_VS;
+let mayRaiseFPException = true,
+ hasSideEffects = 0 in {
+defm PseudoVFREDOSUM : VPseudoVFREDO_VS_RM;
+defm PseudoVFREDUSUM : VPseudoVFRED_VS_RM;
}
let mayRaiseFPException = true in {
-defm PseudoVFREDMIN : VPseudoVFRED_VS;
-defm PseudoVFREDMAX : VPseudoVFRED_VS;
+defm PseudoVFREDMIN : VPseudoVFREDMINMAX_VS;
+defm PseudoVFREDMAX : VPseudoVFREDMINMAX_VS;
}
//===----------------------------------------------------------------------===//
// 14.4. Vector Widening Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
let IsRVVWideningReduction = 1,
- Uses = [FRM],
+ hasSideEffects = 0,
mayRaiseFPException = true in {
-defm PseudoVFWREDUSUM : VPseudoVFWRED_VS;
-defm PseudoVFWREDOSUM : VPseudoVFWRED_VS;
+defm PseudoVFWREDUSUM : VPseudoVFWRED_VS_RM;
+defm PseudoVFWREDOSUM : VPseudoVFWRED_VS_RM;
}
} // Predicates = [HasVInstructionsAnyF]
@@ -5735,16 +6849,11 @@ let Predicates = [HasVInstructions] in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
foreach m = MxList in {
defvar mx = m.MX;
- defvar WriteVIMovVX_MX = !cast<SchedWrite>("WriteVIMovVX_" # mx);
- defvar WriteVIMovXV_MX = !cast<SchedWrite>("WriteVIMovXV_" # mx);
- defvar ReadVIMovVX_MX = !cast<SchedRead>("ReadVIMovVX_" # mx);
- defvar ReadVIMovXV_MX = !cast<SchedRead>("ReadVIMovXV_" # mx);
- defvar ReadVIMovXX_MX = !cast<SchedRead>("ReadVIMovXX_" # mx);
let VLMul = m.value in {
let HasSEWOp = 1, BaseInstr = VMV_X_S in
def PseudoVMV_X_S # "_" # mx:
Pseudo<(outs GPR:$rd), (ins m.vrclass:$rs2, ixlenimm:$sew), []>,
- Sched<[WriteVIMovVX_MX, ReadVIMovVX_MX]>,
+ Sched<[WriteVIMovVX, ReadVIMovVX]>,
RISCVVPseudo;
let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X,
Constraints = "$rd = $rs1" in
@@ -5752,7 +6861,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
(ins m.vrclass:$rs1, GPR:$rs2,
AVL:$vl, ixlenimm:$sew),
[]>,
- Sched<[WriteVIMovXV_MX, ReadVIMovXV_MX, ReadVIMovXX_MX]>,
+ Sched<[WriteVIMovXV, ReadVIMovXV, ReadVIMovXX]>,
RISCVVPseudo;
}
}
@@ -5768,17 +6877,12 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
foreach f = FPList in {
foreach m = f.MxList in {
defvar mx = m.MX;
- defvar WriteVFMovVF_MX = !cast<SchedWrite>("WriteVFMovVF_" # mx);
- defvar WriteVFMovFV_MX = !cast<SchedWrite>("WriteVFMovFV_" # mx);
- defvar ReadVFMovVF_MX = !cast<SchedRead>("ReadVFMovVF_" # mx);
- defvar ReadVFMovFV_MX = !cast<SchedRead>("ReadVFMovFV_" # mx);
- defvar ReadVFMovFX_MX = !cast<SchedRead>("ReadVFMovFX_" # mx);
let VLMul = m.value in {
let HasSEWOp = 1, BaseInstr = VFMV_F_S in
def "PseudoVFMV_" # f.FX # "_S_" # mx :
Pseudo<(outs f.fprclass:$rd),
(ins m.vrclass:$rs2, ixlenimm:$sew), []>,
- Sched<[WriteVFMovVF_MX, ReadVFMovVF_MX]>,
+ Sched<[WriteVFMovVF, ReadVFMovVF]>,
RISCVVPseudo;
let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F,
Constraints = "$rd = $rs1" in
@@ -5787,7 +6891,7 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
(ins m.vrclass:$rs1, f.fprclass:$rs2,
AVL:$vl, ixlenimm:$sew),
[]>,
- Sched<[WriteVFMovFV_MX, ReadVFMovFV_MX, ReadVFMovFX_MX]>,
+ Sched<[WriteVFMovFV, ReadVFMovFV, ReadVFMovFX]>,
RISCVVPseudo;
}
}
@@ -5814,7 +6918,8 @@ let Predicates = [HasVInstructionsAnyF] in {
// 16.4. Vector Register Gather Instructions
//===----------------------------------------------------------------------===//
defm PseudoVRGATHER : VPseudoVGTR_VV_VX_VI<uimm5, "@earlyclobber $rd">;
-defm PseudoVRGATHEREI16 : VPseudoVGTR_VV_EEW</* eew */ 16, "@earlyclobber $rd">;
+defm PseudoVRGATHEREI16 : VPseudoVGTR_VV_EEW<eew=16,
+ Constraint="@earlyclobber $rd">;
//===----------------------------------------------------------------------===//
// 16.5. Vector Compress Instruction
@@ -5829,7 +6934,6 @@ defm PseudoVCOMPRESS : VPseudoVCPR_V;
// 11. Vector Integer Arithmetic Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// 11.1. Vector Single-Width Integer Add and Subtract
//===----------------------------------------------------------------------===//
@@ -5895,27 +6999,28 @@ defm : VPatBinaryV_VV_VX_VI<"int_riscv_vsra", "PseudoVSRA", AllIntegerVectors,
foreach vti = AllIntegerVectors in {
// Emit shift by 1 as an add since it might be faster.
- def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector undef),
- (vti.Vector vti.RegClass:$rs1),
- (XLenVT 1), VLOpFrag)),
- (!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
- vti.RegClass:$rs1,
- GPR:$vl,
- vti.Log2SEW)>;
- def : Pat<(vti.Vector (int_riscv_vsll_mask (vti.Vector vti.RegClass:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (XLenVT 1),
- (vti.Mask V0),
- VLOpFrag,
- (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX#"_MASK")
- vti.RegClass:$merge,
- vti.RegClass:$rs1,
- vti.RegClass:$rs1,
- (vti.Mask V0),
- GPR:$vl,
- vti.Log2SEW,
- (XLenVT timm:$policy))>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector undef),
+ (vti.Vector vti.RegClass:$rs1),
+ (XLenVT 1), VLOpFrag)),
+ (!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ def : Pat<(vti.Vector (int_riscv_vsll_mask (vti.Vector vti.RegClass:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (XLenVT 1),
+ (vti.Mask V0),
+ VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge,
+ vti.RegClass:$rs1,
+ vti.RegClass:$rs1,
+ (vti.Mask V0),
+ GPR:$vl,
+ vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -5967,17 +7072,34 @@ defm : VPatBinaryV_VV_VX<"int_riscv_vmax", "PseudoVMAX", AllIntegerVectors>;
// 11.10. Vector Single-Width Integer Multiply Instructions
//===----------------------------------------------------------------------===//
defm : VPatBinaryV_VV_VX<"int_riscv_vmul", "PseudoVMUL", AllIntegerVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vmulh", "PseudoVMULH", AllIntegerVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vmulhu", "PseudoVMULHU", AllIntegerVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vmulhsu", "PseudoVMULHSU", AllIntegerVectors>;
+
+defvar IntegerVectorsExceptI64 = !filter(vti, AllIntegerVectors,
+ !ne(vti.SEW, 64));
+defm : VPatBinaryV_VV_VX<"int_riscv_vmulh", "PseudoVMULH",
+ IntegerVectorsExceptI64>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vmulhu", "PseudoVMULHU",
+ IntegerVectorsExceptI64>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vmulhsu", "PseudoVMULHSU",
+ IntegerVectorsExceptI64>;
+
+// vmulh, vmulhu, vmulhsu are not included for EEW=64 in Zve64*.
+defvar I64IntegerVectors = !filter(vti, AllIntegerVectors, !eq(vti.SEW, 64));
+let Predicates = [HasVInstructionsFullMultiply] in {
+ defm : VPatBinaryV_VV_VX<"int_riscv_vmulh", "PseudoVMULH",
+ I64IntegerVectors>;
+ defm : VPatBinaryV_VV_VX<"int_riscv_vmulhu", "PseudoVMULHU",
+ I64IntegerVectors>;
+ defm : VPatBinaryV_VV_VX<"int_riscv_vmulhsu", "PseudoVMULHSU",
+ I64IntegerVectors>;
+}
//===----------------------------------------------------------------------===//
// 11.11. Vector Integer Divide Instructions
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_VV_VX<"int_riscv_vdivu", "PseudoVDIVU", AllIntegerVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vdiv", "PseudoVDIV", AllIntegerVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vremu", "PseudoVREMU", AllIntegerVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vrem", "PseudoVREM", AllIntegerVectors>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vdivu", "PseudoVDIVU", AllIntegerVectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vdiv", "PseudoVDIV", AllIntegerVectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vremu", "PseudoVREMU", AllIntegerVectors, isSEWAware=1>;
+defm : VPatBinaryV_VV_VX<"int_riscv_vrem", "PseudoVREM", AllIntegerVectors, isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 11.12. Vector Widening Integer Multiply Instructions
@@ -6011,18 +7133,15 @@ defm : VPatBinaryV_VM_XM_IM<"int_riscv_vmerge", "PseudoVMERGE">;
// 11.16. Vector Integer Move Instructions
//===----------------------------------------------------------------------===//
foreach vti = AllVectors in {
- def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector undef),
- (vti.Vector vti.RegClass:$rs1),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX)
- $rs1, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector vti.RegClass:$passthru),
- (vti.Vector vti.RegClass:$rs1),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX#"_TU")
- $passthru, $rs1, GPR:$vl, vti.Log2SEW)>;
-
- // vmv.v.x/vmv.v.i are handled in RISCInstrVInstrInfoVVLPatterns.td
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (int_riscv_vmv_v_v (vti.Vector vti.RegClass:$passthru),
+ (vti.Vector vti.RegClass:$rs1),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX)
+ $passthru, $rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+
+ // vmv.v.x/vmv.v.i are handled in RISCInstrVInstrInfoVVLPatterns.td
+ }
}
//===----------------------------------------------------------------------===//
@@ -6040,88 +7159,110 @@ defm : VPatBinaryV_VV_VX<"int_riscv_vssub", "PseudoVSSUB", AllIntegerVectors>;
//===----------------------------------------------------------------------===//
// 12.2. Vector Single-Width Averaging Add and Subtract
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_VV_VX<"int_riscv_vaaddu", "PseudoVAADDU", AllIntegerVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vaadd", "PseudoVAADD", AllIntegerVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vasubu", "PseudoVASUBU", AllIntegerVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vasub", "PseudoVASUB", AllIntegerVectors>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vaaddu", "PseudoVAADDU",
+ AllIntegerVectors>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vasubu", "PseudoVASUBU",
+ AllIntegerVectors>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vasub", "PseudoVASUB",
+ AllIntegerVectors>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vaadd", "PseudoVAADD",
+ AllIntegerVectors>;
//===----------------------------------------------------------------------===//
// 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_VV_VX<"int_riscv_vsmul", "PseudoVSMUL", AllIntegerVectors>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vsmul", "PseudoVSMUL",
+ IntegerVectorsExceptI64>;
+// vsmul.vv and vsmul.vx are not included in EEW=64 in Zve64*.
+let Predicates = [HasVInstructionsFullMultiply] in
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vsmul", "PseudoVSMUL",
+ I64IntegerVectors>;
//===----------------------------------------------------------------------===//
// 12.4. Vector Single-Width Scaling Shift Instructions
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_VV_VX_VI<"int_riscv_vssrl", "PseudoVSSRL", AllIntegerVectors,
- uimm5>;
-defm : VPatBinaryV_VV_VX_VI<"int_riscv_vssra", "PseudoVSSRA", AllIntegerVectors,
- uimm5>;
+defm : VPatBinaryV_VV_VX_VI_RM<"int_riscv_vssrl", "PseudoVSSRL",
+ AllIntegerVectors, uimm5>;
+defm : VPatBinaryV_VV_VX_VI_RM<"int_riscv_vssra", "PseudoVSSRA",
+ AllIntegerVectors, uimm5>;
//===----------------------------------------------------------------------===//
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_WV_WX_WI<"int_riscv_vnclipu", "PseudoVNCLIPU", AllWidenableIntVectors>;
-defm : VPatBinaryV_WV_WX_WI<"int_riscv_vnclip", "PseudoVNCLIP", AllWidenableIntVectors>;
-
-} // Predicates = [HasVInstructions]
+defm : VPatBinaryV_WV_WX_WI_RM<"int_riscv_vnclipu", "PseudoVNCLIPU",
+ AllWidenableIntVectors>;
+defm : VPatBinaryV_WV_WX_WI_RM<"int_riscv_vnclip", "PseudoVNCLIP",
+ AllWidenableIntVectors>;
//===----------------------------------------------------------------------===//
// 13. Vector Floating-Point Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_VV_VX<"int_riscv_vfadd", "PseudoVFADD", AllFloatVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vfsub", "PseudoVFSUB", AllFloatVectors>;
-defm : VPatBinaryV_VX<"int_riscv_vfrsub", "PseudoVFRSUB", AllFloatVectors>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfadd", "PseudoVFADD",
+ AllFloatVectors>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfsub", "PseudoVFSUB",
+ AllFloatVectors>;
+defm : VPatBinaryV_VX_RM<"int_riscv_vfrsub", "PseudoVFRSUB", AllFloatVectors>;
//===----------------------------------------------------------------------===//
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
//===----------------------------------------------------------------------===//
-defm : VPatBinaryW_VV_VX<"int_riscv_vfwadd", "PseudoVFWADD", AllWidenableFloatVectors>;
-defm : VPatBinaryW_VV_VX<"int_riscv_vfwsub", "PseudoVFWSUB", AllWidenableFloatVectors>;
-defm : VPatBinaryW_WV_WX<"int_riscv_vfwadd_w", "PseudoVFWADD", AllWidenableFloatVectors>;
-defm : VPatBinaryW_WV_WX<"int_riscv_vfwsub_w", "PseudoVFWSUB", AllWidenableFloatVectors>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwadd", "PseudoVFWADD",
+ AllWidenableFloatVectors>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwsub", "PseudoVFWSUB",
+ AllWidenableFloatVectors>;
+defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwadd_w", "PseudoVFWADD",
+ AllWidenableFloatVectors>;
+defm : VPatBinaryW_WV_WX_RM<"int_riscv_vfwsub_w", "PseudoVFWSUB",
+ AllWidenableFloatVectors>;
//===----------------------------------------------------------------------===//
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
//===----------------------------------------------------------------------===//
-defm : VPatBinaryV_VV_VX<"int_riscv_vfmul", "PseudoVFMUL", AllFloatVectors>;
-defm : VPatBinaryV_VV_VX<"int_riscv_vfdiv", "PseudoVFDIV", AllFloatVectors>;
-defm : VPatBinaryV_VX<"int_riscv_vfrdiv", "PseudoVFRDIV", AllFloatVectors>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfmul", "PseudoVFMUL",
+ AllFloatVectors>;
+defm : VPatBinaryV_VV_VX_RM<"int_riscv_vfdiv", "PseudoVFDIV",
+ AllFloatVectors, isSEWAware=1>;
+defm : VPatBinaryV_VX_RM<"int_riscv_vfrdiv", "PseudoVFRDIV",
+ AllFloatVectors, isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.5. Vector Widening Floating-Point Multiply
//===----------------------------------------------------------------------===//
-defm : VPatBinaryW_VV_VX<"int_riscv_vfwmul", "PseudoVFWMUL", AllWidenableFloatVectors>;
+defm : VPatBinaryW_VV_VX_RM<"int_riscv_vfwmul", "PseudoVFWMUL",
+ AllWidenableFloatVectors>;
//===----------------------------------------------------------------------===//
// 13.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions
//===----------------------------------------------------------------------===//
-defm : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmacc", "PseudoVFMACC", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmacc", "PseudoVFNMACC", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmsac", "PseudoVFMSAC", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmsac", "PseudoVFNMSAC", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmadd", "PseudoVFMADD", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmadd", "PseudoVFNMADD", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfmsub", "PseudoVFMSUB", AllFloatVectors>;
-defm : VPatTernaryV_VV_VX_AAXA<"int_riscv_vfnmsub", "PseudoVFNMSUB", AllFloatVectors>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmacc", "PseudoVFMACC", AllFloatVectors>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmacc", "PseudoVFNMACC", AllFloatVectors>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsac", "PseudoVFMSAC", AllFloatVectors>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsac", "PseudoVFNMSAC", AllFloatVectors>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmadd", "PseudoVFMADD", AllFloatVectors>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmadd", "PseudoVFNMADD", AllFloatVectors>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfmsub", "PseudoVFMSUB", AllFloatVectors>;
+defm : VPatTernaryV_VV_VX_AAXA_RM<"int_riscv_vfnmsub", "PseudoVFNMSUB", AllFloatVectors>;
//===----------------------------------------------------------------------===//
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
//===----------------------------------------------------------------------===//
-defm : VPatTernaryW_VV_VX<"int_riscv_vfwmacc", "PseudoVFWMACC", AllWidenableFloatVectors>;
-defm : VPatTernaryW_VV_VX<"int_riscv_vfwnmacc", "PseudoVFWNMACC", AllWidenableFloatVectors>;
-defm : VPatTernaryW_VV_VX<"int_riscv_vfwmsac", "PseudoVFWMSAC", AllWidenableFloatVectors>;
-defm : VPatTernaryW_VV_VX<"int_riscv_vfwnmsac", "PseudoVFWNMSAC", AllWidenableFloatVectors>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmacc", "PseudoVFWMACC",
+ AllWidenableFloatVectors>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmacc", "PseudoVFWNMACC",
+ AllWidenableFloatVectors>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwmsac", "PseudoVFWMSAC",
+ AllWidenableFloatVectors>;
+defm : VPatTernaryW_VV_VX_RM<"int_riscv_vfwnmsac", "PseudoVFWNMSAC",
+ AllWidenableFloatVectors>;
//===----------------------------------------------------------------------===//
// 13.8. Vector Floating-Point Square-Root Instruction
//===----------------------------------------------------------------------===//
-defm : VPatUnaryV_V<"int_riscv_vfsqrt", "PseudoVFSQRT", AllFloatVectors>;
+defm : VPatUnaryV_V_RM<"int_riscv_vfsqrt", "PseudoVFSQRT", AllFloatVectors, isSEWAware=1>;
//===----------------------------------------------------------------------===//
// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
@@ -6131,7 +7272,7 @@ defm : VPatUnaryV_V<"int_riscv_vfrsqrt7", "PseudoVFRSQRT7", AllFloatVectors>;
//===----------------------------------------------------------------------===//
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
//===----------------------------------------------------------------------===//
-defm : VPatUnaryV_V<"int_riscv_vfrec7", "PseudoVFREC7", AllFloatVectors>;
+defm : VPatUnaryV_V_RM<"int_riscv_vfrec7", "PseudoVFREC7", AllFloatVectors>;
//===----------------------------------------------------------------------===//
// 13.11. Vector Floating-Point Min/Max Instructions
@@ -6169,44 +7310,53 @@ defm : VPatConversionVI_VF<"int_riscv_vfclass", "PseudoVFCLASS">;
// We can use vmerge.vvm to support vector-vector vfmerge.
// NOTE: Clang previously used int_riscv_vfmerge for vector-vector, but now uses
// int_riscv_vmerge. Support both for compatibility.
-defm : VPatBinaryV_VM_TAIL<"int_riscv_vmerge", "PseudoVMERGE",
- /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
-defm : VPatBinaryV_VM_TAIL<"int_riscv_vfmerge", "PseudoVMERGE",
- /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
-defm : VPatBinaryV_XM_TAIL<"int_riscv_vfmerge", "PseudoVFMERGE",
- /*CarryOut = */0, /*vtilist=*/AllFloatVectors>;
+foreach vti = AllFloatVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vmerge", "PseudoVMERGE", "VVM",
+ vti.Vector,
+ vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVMERGE", "VVM",
+ vti.Vector,
+ vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.RegClass>;
+ defm : VPatBinaryCarryInTAIL<"int_riscv_vfmerge", "PseudoVFMERGE",
+ "V"#vti.ScalarSuffix#"M",
+ vti.Vector,
+ vti.Vector, vti.Scalar, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass,
+ vti.RegClass, vti.ScalarRegClass>;
+ }
+}
foreach fvti = AllFloatVectors in {
defvar instr = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX);
- def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector undef),
- (fvti.Vector fvti.RegClass:$rs2),
- (fvti.Scalar (fpimm0)),
- (fvti.Mask V0), VLOpFrag)),
- (instr fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
- defvar instr_tu = !cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU");
+ let Predicates = GetVTypePredicates<fvti>.Predicates in
def : Pat<(fvti.Vector (int_riscv_vfmerge (fvti.Vector fvti.RegClass:$merge),
(fvti.Vector fvti.RegClass:$rs2),
(fvti.Scalar (fpimm0)),
(fvti.Mask V0), VLOpFrag)),
- (instr_tu fvti.RegClass:$merge, fvti.RegClass:$rs2, 0,
- (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
+ (instr fvti.RegClass:$merge, fvti.RegClass:$rs2, 0,
+ (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
}
//===----------------------------------------------------------------------===//
// 13.17. Single-Width Floating-Point/Integer Type-Convert Instructions
//===----------------------------------------------------------------------===//
-defm : VPatConversionVI_VF<"int_riscv_vfcvt_xu_f_v", "PseudoVFCVT_XU_F">;
+defm : VPatConversionVI_VF_RM<"int_riscv_vfcvt_x_f_v", "PseudoVFCVT_X_F">;
+defm : VPatConversionVI_VF_RM<"int_riscv_vfcvt_xu_f_v", "PseudoVFCVT_XU_F">;
defm : VPatConversionVI_VF<"int_riscv_vfcvt_rtz_xu_f_v", "PseudoVFCVT_RTZ_XU_F">;
-defm : VPatConversionVI_VF<"int_riscv_vfcvt_x_f_v", "PseudoVFCVT_X_F">;
defm : VPatConversionVI_VF<"int_riscv_vfcvt_rtz_x_f_v", "PseudoVFCVT_RTZ_X_F">;
-defm : VPatConversionVF_VI<"int_riscv_vfcvt_f_x_v", "PseudoVFCVT_F_X">;
-defm : VPatConversionVF_VI<"int_riscv_vfcvt_f_xu_v", "PseudoVFCVT_F_XU">;
+defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_x_v", "PseudoVFCVT_F_X">;
+defm : VPatConversionVF_VI_RM<"int_riscv_vfcvt_f_xu_v", "PseudoVFCVT_F_XU">;
//===----------------------------------------------------------------------===//
// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
//===----------------------------------------------------------------------===//
-defm : VPatConversionWI_VF<"int_riscv_vfwcvt_xu_f_v", "PseudoVFWCVT_XU_F">;
-defm : VPatConversionWI_VF<"int_riscv_vfwcvt_x_f_v", "PseudoVFWCVT_X_F">;
+defm : VPatConversionWI_VF_RM<"int_riscv_vfwcvt_xu_f_v", "PseudoVFWCVT_XU_F">;
+defm : VPatConversionWI_VF_RM<"int_riscv_vfwcvt_x_f_v", "PseudoVFWCVT_X_F">;
defm : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_xu_f_v", "PseudoVFWCVT_RTZ_XU_F">;
defm : VPatConversionWI_VF<"int_riscv_vfwcvt_rtz_x_f_v", "PseudoVFWCVT_RTZ_X_F">;
defm : VPatConversionWF_VI<"int_riscv_vfwcvt_f_xu_v", "PseudoVFWCVT_F_XU">;
@@ -6216,21 +7366,19 @@ defm : VPatConversionWF_VF<"int_riscv_vfwcvt_f_f_v", "PseudoVFWCVT_F_F">;
//===----------------------------------------------------------------------===//
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
//===----------------------------------------------------------------------===//
-defm : VPatConversionVI_WF<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_XU_F">;
-defm : VPatConversionVI_WF<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_X_F">;
+defm : VPatConversionVI_WF_RM<"int_riscv_vfncvt_xu_f_w", "PseudoVFNCVT_XU_F">;
+defm : VPatConversionVI_WF_RM<"int_riscv_vfncvt_x_f_w", "PseudoVFNCVT_X_F">;
defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_xu_f_w", "PseudoVFNCVT_RTZ_XU_F">;
defm : VPatConversionVI_WF<"int_riscv_vfncvt_rtz_x_f_w", "PseudoVFNCVT_RTZ_X_F">;
-defm : VPatConversionVF_WI <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">;
-defm : VPatConversionVF_WI <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">;
-defm : VPatConversionVF_WF<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F">;
+defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_xu_w", "PseudoVFNCVT_F_XU">;
+defm : VPatConversionVF_WI_RM <"int_riscv_vfncvt_f_x_w", "PseudoVFNCVT_F_X">;
+defm : VPatConversionVF_WF_RM<"int_riscv_vfncvt_f_f_w", "PseudoVFNCVT_F_F">;
defm : VPatConversionVF_WF<"int_riscv_vfncvt_rod_f_f_w", "PseudoVFNCVT_ROD_F_F">;
-} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 14. Vector Reduction Operations
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// 14.1. Vector Single-Width Integer Reduction Instructions
//===----------------------------------------------------------------------===//
@@ -6248,30 +7396,25 @@ defm : VPatReductionV_VS<"int_riscv_vredmax", "PseudoVREDMAX">;
//===----------------------------------------------------------------------===//
defm : VPatReductionW_VS<"int_riscv_vwredsumu", "PseudoVWREDSUMU">;
defm : VPatReductionW_VS<"int_riscv_vwredsum", "PseudoVWREDSUM">;
-} // Predicates = [HasVInstructions]
-let Predicates = [HasVInstructionsAnyF] in {
//===----------------------------------------------------------------------===//
// 14.3. Vector Single-Width Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
-defm : VPatReductionV_VS<"int_riscv_vfredosum", "PseudoVFREDOSUM", /*IsFloat=*/1>;
-defm : VPatReductionV_VS<"int_riscv_vfredusum", "PseudoVFREDUSUM", /*IsFloat=*/1>;
-defm : VPatReductionV_VS<"int_riscv_vfredmin", "PseudoVFREDMIN", /*IsFloat=*/1>;
-defm : VPatReductionV_VS<"int_riscv_vfredmax", "PseudoVFREDMAX", /*IsFloat=*/1>;
+defm : VPatReductionV_VS_RM<"int_riscv_vfredosum", "PseudoVFREDOSUM", IsFloat=1>;
+defm : VPatReductionV_VS_RM<"int_riscv_vfredusum", "PseudoVFREDUSUM", IsFloat=1>;
+defm : VPatReductionV_VS<"int_riscv_vfredmin", "PseudoVFREDMIN", IsFloat=1>;
+defm : VPatReductionV_VS<"int_riscv_vfredmax", "PseudoVFREDMAX", IsFloat=1>;
//===----------------------------------------------------------------------===//
// 14.4. Vector Widening Floating-Point Reduction Instructions
//===----------------------------------------------------------------------===//
-defm : VPatReductionW_VS<"int_riscv_vfwredusum", "PseudoVFWREDUSUM", /*IsFloat=*/1>;
-defm : VPatReductionW_VS<"int_riscv_vfwredosum", "PseudoVFWREDOSUM", /*IsFloat=*/1>;
-
-} // Predicates = [HasVInstructionsAnyF]
+defm : VPatReductionW_VS_RM<"int_riscv_vfwredusum", "PseudoVFWREDUSUM", IsFloat=1>;
+defm : VPatReductionW_VS_RM<"int_riscv_vfwredosum", "PseudoVFWREDOSUM", IsFloat=1>;
//===----------------------------------------------------------------------===//
// 15. Vector Mask Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructions] in {
//===----------------------------------------------------------------------===//
// 15.1 Vector Mask-Register Logical Instructions
//===----------------------------------------------------------------------===//
@@ -6323,7 +7466,6 @@ defm : VPatUnaryV_M<"int_riscv_viota", "PseudoVIOTA">;
//===----------------------------------------------------------------------===//
defm : VPatNullaryV<"int_riscv_vid", "PseudoVID">;
-} // Predicates = [HasVInstructions]
//===----------------------------------------------------------------------===//
// 16. Vector Permutation Instructions
@@ -6333,79 +7475,68 @@ defm : VPatNullaryV<"int_riscv_vid", "PseudoVID">;
// 16.1. Integer Scalar Move Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructions] in {
foreach vti = AllIntegerVectors in {
- def : Pat<(riscv_vmv_x_s (vti.Vector vti.RegClass:$rs2)),
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ def : Pat<(XLenVT (riscv_vmv_x_s (vti.Vector vti.RegClass:$rs2))),
(!cast<Instruction>("PseudoVMV_X_S_" # vti.LMul.MX) $rs2, vti.Log2SEW)>;
// vmv.s.x is handled with a custom node in RISCVInstrInfoVVLPatterns.td
}
-} // Predicates = [HasVInstructions]
//===----------------------------------------------------------------------===//
// 16.2. Floating-Point Scalar Move Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructionsAnyF] in {
foreach fvti = AllFloatVectors in {
- def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1),
- (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)),
- (!cast<Instruction>("PseudoVFMV_S_"#fvti.ScalarSuffix#"_" #
- fvti.LMul.MX)
- (fvti.Vector $rs1),
- (fvti.Scalar fvti.ScalarRegClass:$rs2),
- GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1),
- (fvti.Scalar (fpimm0)), VLOpFrag)),
- (!cast<Instruction>("PseudoVMV_S_X_" # fvti.LMul.MX)
- (fvti.Vector $rs1), X0, GPR:$vl, fvti.Log2SEW)>;
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMV_S_"#fvti.ScalarSuffix#"_" #
+ fvti.LMul.MX)
+ (fvti.Vector $rs1),
+ (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Scalar (fpimm0)), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_S_X_" # fvti.LMul.MX)
+ (fvti.Vector $rs1), (XLenVT X0), GPR:$vl, fvti.Log2SEW)>;
+ }
}
-} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// 16.3. Vector Slide Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructions] in {
- defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllIntegerVectors, uimm5>;
- defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllIntegerVectors, uimm5>;
- defm : VPatBinaryV_VX<"int_riscv_vslide1up", "PseudoVSLIDE1UP", AllIntegerVectors>;
- defm : VPatBinaryV_VX<"int_riscv_vslide1down", "PseudoVSLIDE1DOWN", AllIntegerVectors>;
-} // Predicates = [HasVInstructions]
+defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllIntegerVectors, uimm5>;
+defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllIntegerVectors, uimm5>;
+defm : VPatBinaryV_VX<"int_riscv_vslide1up", "PseudoVSLIDE1UP", AllIntegerVectors>;
+defm : VPatBinaryV_VX<"int_riscv_vslide1down", "PseudoVSLIDE1DOWN", AllIntegerVectors>;
-let Predicates = [HasVInstructionsAnyF] in {
- defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllFloatVectors, uimm5>;
- defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllFloatVectors, uimm5>;
- defm : VPatBinaryV_VX<"int_riscv_vfslide1up", "PseudoVFSLIDE1UP", AllFloatVectors>;
- defm : VPatBinaryV_VX<"int_riscv_vfslide1down", "PseudoVFSLIDE1DOWN", AllFloatVectors>;
-} // Predicates = [HasVInstructionsAnyF]
+defm : VPatTernaryV_VX_VI<"int_riscv_vslideup", "PseudoVSLIDEUP", AllFloatVectors, uimm5>;
+defm : VPatTernaryV_VX_VI<"int_riscv_vslidedown", "PseudoVSLIDEDOWN", AllFloatVectors, uimm5>;
+defm : VPatBinaryV_VX<"int_riscv_vfslide1up", "PseudoVFSLIDE1UP", AllFloatVectors>;
+defm : VPatBinaryV_VX<"int_riscv_vfslide1down", "PseudoVFSLIDE1DOWN", AllFloatVectors>;
//===----------------------------------------------------------------------===//
// 16.4. Vector Register Gather Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructions] in {
- defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
- AllIntegerVectors, uimm5>;
- defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
- /* eew */ 16, AllIntegerVectors>;
-} // Predicates = [HasVInstructions]
-
-let Predicates = [HasVInstructionsAnyF] in {
- defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
- AllFloatVectors, uimm5>;
- defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
- /* eew */ 16, AllFloatVectors>;
-} // Predicates = [HasVInstructionsAnyF]
+defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
+ AllIntegerVectors, uimm5>;
+defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
+ eew=16, vtilist=AllIntegerVectors>;
+defm : VPatBinaryV_VV_VX_VI_INT<"int_riscv_vrgather", "PseudoVRGATHER",
+ AllFloatVectors, uimm5>;
+defm : VPatBinaryV_VV_INT_EEW<"int_riscv_vrgatherei16_vv", "PseudoVRGATHEREI16",
+ eew=16, vtilist=AllFloatVectors>;
//===----------------------------------------------------------------------===//
// 16.5. Vector Compress Instruction
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructions] in {
- defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
-} // Predicates = [HasVInstructions]
-
-let Predicates = [HasVInstructionsAnyF] in {
- defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
-} // Predicates = [HasVInstructionsAnyF]
+defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
+defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
+defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllIntegerVectors>;
+defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
+defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
+defm : VPatUnaryV_V_AnyMask<"int_riscv_vcompress", "PseudoVCOMPRESS", AllFloatVectors>;
// Include the non-intrinsic ISel patterns
include "RISCVInstrInfoVVLPatterns.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index c07bb775c796..4141c7698bb4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -29,13 +29,13 @@ multiclass VPatUSLoadStoreSDNode<ValueType type,
LMULInfo vlmul,
OutPatFrag avl,
VReg reg_class,
- int sew = !shl(1, log2sew)>
-{
+ int sew = !shl(1, log2sew)> {
defvar load_instr = !cast<Instruction>("PseudoVLE"#sew#"_V_"#vlmul.MX);
defvar store_instr = !cast<Instruction>("PseudoVSE"#sew#"_V_"#vlmul.MX);
// Load
def : Pat<(type (load GPR:$rs1)),
- (load_instr GPR:$rs1, avl, log2sew)>;
+ (load_instr (type (IMPLICIT_DEF)), GPR:$rs1, avl,
+ log2sew, TU_MU)>;
// Store
def : Pat<(store type:$rs2, GPR:$rs1),
(store_instr reg_class:$rs2, GPR:$rs1, avl, log2sew)>;
@@ -45,8 +45,7 @@ multiclass VPatUSLoadStoreWholeVRSDNode<ValueType type,
int log2sew,
LMULInfo vlmul,
VReg reg_class,
- int sew = !shl(1, log2sew)>
-{
+ int sew = !shl(1, log2sew)> {
defvar load_instr =
!cast<Instruction>("VL"#!substr(vlmul.MX, 1)#"RE"#sew#"_V");
defvar store_instr =
@@ -60,69 +59,108 @@ multiclass VPatUSLoadStoreWholeVRSDNode<ValueType type,
(store_instr reg_class:$rs2, GPR:$rs1)>;
}
-multiclass VPatUSLoadStoreMaskSDNode<MTypeInfo m>
-{
+multiclass VPatUSLoadStoreMaskSDNode<MTypeInfo m> {
defvar load_instr = !cast<Instruction>("PseudoVLM_V_"#m.BX);
defvar store_instr = !cast<Instruction>("PseudoVSM_V_"#m.BX);
// Load
def : Pat<(m.Mask (load GPR:$rs1)),
- (load_instr GPR:$rs1, m.AVL, m.Log2SEW)>;
+ (load_instr (m.Mask (IMPLICIT_DEF)), GPR:$rs1, m.AVL,
+ m.Log2SEW, TA_MA)>;
// Store
def : Pat<(store m.Mask:$rs2, GPR:$rs1),
(store_instr VR:$rs2, GPR:$rs1, m.AVL, m.Log2SEW)>;
}
-class VPatBinarySDNode_VV<SDNode vop,
+class VPatBinarySDNode_VV<SDPatternOperator vop,
string instruction_name,
ValueType result_type,
ValueType op_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
OutPatFrag avl,
- VReg op_reg_class> :
+ VReg op_reg_class,
+ bit isSEWAware = 0> :
Pat<(result_type (vop
(op_type op_reg_class:$rs1),
(op_type op_reg_class:$rs2))),
- (!cast<Instruction>(instruction_name#"_VV_"# vlmul.MX)
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#"_VV_"# vlmul.MX#"_E"#!shl(1, log2sew),
+ instruction_name#"_VV_"# vlmul.MX))
+ (result_type (IMPLICIT_DEF)),
op_reg_class:$rs1,
op_reg_class:$rs2,
- avl, sew)>;
+ avl, log2sew, TA_MA)>;
+
+class VPatBinarySDNode_VV_RM<SDPatternOperator vop,
+ string instruction_name,
+ ValueType result_type,
+ ValueType op_type,
+ int log2sew,
+ LMULInfo vlmul,
+ OutPatFrag avl,
+ VReg op_reg_class,
+ bit isSEWAware = 0> :
+ Pat<(result_type (vop
+ (op_type op_reg_class:$rs1),
+ (op_type op_reg_class:$rs2))),
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#"_VV_"# vlmul.MX#"_E"#!shl(1, log2sew),
+ instruction_name#"_VV_"# vlmul.MX))
+ (result_type (IMPLICIT_DEF)),
+ op_reg_class:$rs1,
+ op_reg_class:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ avl, log2sew, TA_MA)>;
-class VPatBinarySDNode_XI<SDNode vop,
+class VPatBinarySDNode_XI<SDPatternOperator vop,
string instruction_name,
string suffix,
ValueType result_type,
ValueType vop_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
OutPatFrag avl,
VReg vop_reg_class,
ComplexPattern SplatPatKind,
- DAGOperand xop_kind> :
+ DAGOperand xop_kind,
+ bit isSEWAware = 0> :
Pat<(result_type (vop
(vop_type vop_reg_class:$rs1),
- (vop_type (SplatPatKind xop_kind:$rs2)))),
- (!cast<Instruction>(instruction_name#_#suffix#_# vlmul.MX)
+ (vop_type (SplatPatKind (XLenVT xop_kind:$rs2))))),
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#_#suffix#_# vlmul.MX#"_E"#!shl(1, log2sew),
+ instruction_name#_#suffix#_# vlmul.MX))
+ (result_type (IMPLICIT_DEF)),
vop_reg_class:$rs1,
xop_kind:$rs2,
- avl, sew)>;
-
-multiclass VPatBinarySDNode_VV_VX<SDNode vop, string instruction_name> {
- foreach vti = AllIntegerVectors in {
- def : VPatBinarySDNode_VV<vop, instruction_name,
- vti.Vector, vti.Vector, vti.Log2SEW,
- vti.LMul, vti.AVL, vti.RegClass>;
- def : VPatBinarySDNode_XI<vop, instruction_name, "VX",
- vti.Vector, vti.Vector, vti.Log2SEW,
- vti.LMul, vti.AVL, vti.RegClass,
- SplatPat, GPR>;
+ avl, log2sew, TA_MA)>;
+
+multiclass VPatBinarySDNode_VV_VX<SDPatternOperator vop, string instruction_name,
+ list<VTypeInfo> vtilist = AllIntegerVectors,
+ bit isSEWAware = 0> {
+ foreach vti = vtilist in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : VPatBinarySDNode_VV<vop, instruction_name,
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.AVL, vti.RegClass, isSEWAware>;
+ def : VPatBinarySDNode_XI<vop, instruction_name, "VX",
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.AVL, vti.RegClass,
+ SplatPat, GPR, isSEWAware>;
+ }
}
}
-multiclass VPatBinarySDNode_VV_VX_VI<SDNode vop, string instruction_name,
+multiclass VPatBinarySDNode_VV_VX_VI<SDPatternOperator vop, string instruction_name,
Operand ImmType = simm5>
: VPatBinarySDNode_VV_VX<vop, instruction_name> {
foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in
def : VPatBinarySDNode_XI<vop, instruction_name, "VI",
vti.Vector, vti.Vector, vti.Log2SEW,
vti.LMul, vti.AVL, vti.RegClass,
@@ -131,49 +169,123 @@ multiclass VPatBinarySDNode_VV_VX_VI<SDNode vop, string instruction_name,
}
}
-class VPatBinarySDNode_VF<SDNode vop,
+class VPatBinarySDNode_VF<SDPatternOperator vop,
string instruction_name,
ValueType result_type,
ValueType vop_type,
ValueType xop_type,
- int sew,
+ int log2sew,
LMULInfo vlmul,
OutPatFrag avl,
VReg vop_reg_class,
- DAGOperand xop_kind> :
+ DAGOperand xop_kind,
+ bit isSEWAware = 0> :
Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
(vop_type (SplatFPOp xop_kind:$rs2)))),
- (!cast<Instruction>(instruction_name#"_"#vlmul.MX)
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#"_"#vlmul.MX#"_E"#!shl(1, log2sew),
+ instruction_name#"_"#vlmul.MX))
+ (result_type (IMPLICIT_DEF)),
vop_reg_class:$rs1,
(xop_type xop_kind:$rs2),
- avl, sew)>;
+ avl, log2sew, TA_MA)>;
+
+class VPatBinarySDNode_VF_RM<SDPatternOperator vop,
+ string instruction_name,
+ ValueType result_type,
+ ValueType vop_type,
+ ValueType xop_type,
+ int log2sew,
+ LMULInfo vlmul,
+ OutPatFrag avl,
+ VReg vop_reg_class,
+ DAGOperand xop_kind,
+ bit isSEWAware = 0> :
+ Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
+ (vop_type (SplatFPOp xop_kind:$rs2)))),
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#"_"#vlmul.MX#"_E"#!shl(1, log2sew),
+ instruction_name#"_"#vlmul.MX))
+ (result_type (IMPLICIT_DEF)),
+ vop_reg_class:$rs1,
+ (xop_type xop_kind:$rs2),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ avl, log2sew, TA_MA)>;
-multiclass VPatBinaryFPSDNode_VV_VF<SDNode vop, string instruction_name> {
+multiclass VPatBinaryFPSDNode_VV_VF<SDPatternOperator vop, string instruction_name,
+ bit isSEWAware = 0> {
foreach vti = AllFloatVectors in {
- def : VPatBinarySDNode_VV<vop, instruction_name,
- vti.Vector, vti.Vector, vti.Log2SEW,
- vti.LMul, vti.AVL, vti.RegClass>;
- def : VPatBinarySDNode_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
- vti.Vector, vti.Vector, vti.Scalar,
- vti.Log2SEW, vti.LMul, vti.AVL, vti.RegClass,
- vti.ScalarRegClass>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : VPatBinarySDNode_VV<vop, instruction_name,
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.AVL, vti.RegClass, isSEWAware>;
+ def : VPatBinarySDNode_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ vti.Vector, vti.Vector, vti.Scalar,
+ vti.Log2SEW, vti.LMul, vti.AVL, vti.RegClass,
+ vti.ScalarRegClass, isSEWAware>;
+ }
}
}
-multiclass VPatBinaryFPSDNode_R_VF<SDNode vop, string instruction_name> {
+multiclass VPatBinaryFPSDNode_VV_VF_RM<SDPatternOperator vop, string instruction_name,
+ bit isSEWAware = 0> {
+ foreach vti = AllFloatVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : VPatBinarySDNode_VV_RM<vop, instruction_name,
+ vti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, vti.AVL, vti.RegClass, isSEWAware>;
+ def : VPatBinarySDNode_VF_RM<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ vti.Vector, vti.Vector, vti.Scalar,
+ vti.Log2SEW, vti.LMul, vti.AVL, vti.RegClass,
+ vti.ScalarRegClass, isSEWAware>;
+ }
+ }
+}
+
+multiclass VPatBinaryFPSDNode_R_VF<SDPatternOperator vop, string instruction_name,
+ bit isSEWAware = 0> {
+ foreach fvti = AllFloatVectors in
+ let Predicates = GetVTypePredicates<fvti>.Predicates in
+ def : Pat<(fvti.Vector (vop (fvti.Vector (SplatFPOp fvti.Scalar:$rs2)),
+ (fvti.Vector fvti.RegClass:$rs1))),
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_E"#fvti.SEW,
+ instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX))
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs1,
+ (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
+}
+
+multiclass VPatBinaryFPSDNode_R_VF_RM<SDPatternOperator vop, string instruction_name,
+ bit isSEWAware = 0> {
foreach fvti = AllFloatVectors in
+ let Predicates = GetVTypePredicates<fvti>.Predicates in
def : Pat<(fvti.Vector (vop (fvti.Vector (SplatFPOp fvti.Scalar:$rs2)),
(fvti.Vector fvti.RegClass:$rs1))),
- (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_E"#fvti.SEW,
+ instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX))
+ (fvti.Vector (IMPLICIT_DEF)),
fvti.RegClass:$rs1,
(fvti.Scalar fvti.ScalarRegClass:$rs2),
- fvti.AVL, fvti.Log2SEW)>;
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
multiclass VPatIntegerSetCCSDNode_VV<string instruction_name,
CondCode cc> {
foreach vti = AllIntegerVectors in {
defvar instruction = !cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX);
+ let Predicates = GetVTypePredicates<vti>.Predicates in
def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1),
(vti.Vector vti.RegClass:$rs2), cc)),
(instruction vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL,
@@ -186,6 +298,7 @@ multiclass VPatIntegerSetCCSDNode_VV_Swappable<string instruction_name,
: VPatIntegerSetCCSDNode_VV<instruction_name, cc> {
foreach vti = AllIntegerVectors in {
defvar instruction = !cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX);
+ let Predicates = GetVTypePredicates<vti>.Predicates in
def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs2),
(vti.Vector vti.RegClass:$rs1), invcc)),
(instruction vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL,
@@ -201,8 +314,9 @@ multiclass VPatIntegerSetCCSDNode_XI<
DAGOperand xop_kind> {
foreach vti = AllIntegerVectors in {
defvar instruction = !cast<Instruction>(instruction_name#_#kind#_#vti.LMul.MX);
+ let Predicates = GetVTypePredicates<vti>.Predicates in
def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (SplatPatKind xop_kind:$rs2)), cc)),
+ (vti.Vector (SplatPatKind (XLenVT xop_kind:$rs2))), cc)),
(instruction vti.RegClass:$rs1, xop_kind:$rs2, vti.AVL, vti.Log2SEW)>;
}
}
@@ -216,12 +330,14 @@ multiclass VPatIntegerSetCCSDNode_XI_Swappable<string instruction_name,
xop_kind> {
foreach vti = AllIntegerVectors in {
defvar instruction = !cast<Instruction>(instruction_name#_#kind#_#vti.LMul.MX);
- def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (SplatPatKind xop_kind:$rs2)), cc)),
- (instruction vti.RegClass:$rs1, xop_kind:$rs2, vti.AVL, vti.Log2SEW)>;
- def : Pat<(vti.Mask (setcc (vti.Vector (SplatPatKind xop_kind:$rs2)),
- (vti.Vector vti.RegClass:$rs1), invcc)),
- (instruction vti.RegClass:$rs1, xop_kind:$rs2, vti.AVL, vti.Log2SEW)>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPatKind (XLenVT xop_kind:$rs2))), cc)),
+ (instruction vti.RegClass:$rs1, xop_kind:$rs2, vti.AVL, vti.Log2SEW)>;
+ def : Pat<(vti.Mask (setcc (vti.Vector (SplatPatKind (XLenVT xop_kind:$rs2))),
+ (vti.Vector vti.RegClass:$rs1), invcc)),
+ (instruction vti.RegClass:$rs1, xop_kind:$rs2, vti.AVL, vti.Log2SEW)>;
+ }
}
}
@@ -237,6 +353,7 @@ multiclass VPatIntegerSetCCSDNode_VIPlus1<string instruction_name, CondCode cc,
ComplexPattern splatpat_kind> {
foreach vti = AllIntegerVectors in {
defvar instruction = !cast<Instruction>(instruction_name#"_VI_"#vti.LMul.MX);
+ let Predicates = GetVTypePredicates<vti>.Predicates in
def : Pat<(vti.Mask (setcc (vti.Vector vti.RegClass:$rs1),
(vti.Vector (splatpat_kind simm5:$rs2)),
cc)),
@@ -249,23 +366,25 @@ multiclass VPatFPSetCCSDNode_VV_VF_FV<CondCode cc,
string inst_name,
string swapped_op_inst_name> {
foreach fvti = AllFloatVectors in {
- def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1),
- (fvti.Vector fvti.RegClass:$rs2),
- cc)),
- (!cast<Instruction>(inst_name#"_VV_"#fvti.LMul.MX)
- fvti.RegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.Log2SEW)>;
- def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1),
- (SplatFPOp fvti.ScalarRegClass:$rs2),
- cc)),
- (!cast<Instruction>(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
- fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
- def : Pat<(fvti.Mask (setcc (SplatFPOp fvti.ScalarRegClass:$rs2),
- (fvti.Vector fvti.RegClass:$rs1),
- cc)),
- (!cast<Instruction>(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
- fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
- fvti.AVL, fvti.Log2SEW)>;
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Vector fvti.RegClass:$rs2),
+ cc)),
+ (!cast<Instruction>(inst_name#"_VV_"#fvti.LMul.MX)
+ fvti.RegClass:$rs1, fvti.RegClass:$rs2, fvti.AVL, fvti.Log2SEW)>;
+ def : Pat<(fvti.Mask (setcc (fvti.Vector fvti.RegClass:$rs1),
+ (SplatFPOp fvti.ScalarRegClass:$rs2),
+ cc)),
+ (!cast<Instruction>(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
+ fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
+ fvti.AVL, fvti.Log2SEW)>;
+ def : Pat<(fvti.Mask (setcc (SplatFPOp fvti.ScalarRegClass:$rs2),
+ (fvti.Vector fvti.RegClass:$rs1),
+ cc)),
+ (!cast<Instruction>(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
+ fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
+ fvti.AVL, fvti.Log2SEW)>;
+ }
}
}
@@ -275,67 +394,103 @@ multiclass VPatExtendSDNode_V<list<SDNode> ops, string inst_name, string suffix,
defvar vti = vtiTofti.Vti;
defvar fti = vtiTofti.Fti;
foreach op = ops in
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fti>.Predicates) in
def : Pat<(vti.Vector (op (fti.Vector fti.RegClass:$rs2))),
(!cast<Instruction>(inst_name#"_"#suffix#"_"#vti.LMul.MX)
- fti.RegClass:$rs2, fti.AVL, vti.Log2SEW)>;
+ (vti.Vector (IMPLICIT_DEF)),
+ fti.RegClass:$rs2, fti.AVL, vti.Log2SEW, TU_MU)>;
}
}
-multiclass VPatConvertI2FPSDNode_V<SDNode vop, string instruction_name> {
+multiclass VPatConvertI2FPSDNode_V_RM<SDPatternOperator vop,
+ string instruction_name> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
- ivti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>;
+ (fvti.Vector (IMPLICIT_DEF)),
+ ivti.RegClass:$rs1,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TU_MU)>;
}
}
-multiclass VPatConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
+multiclass VPatConvertFP2ISDNode_V<SDPatternOperator vop,
+ string instruction_name> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
- fvti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW)>;
+ (ivti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW, TU_MU)>;
}
}
-multiclass VPatWConvertI2FPSDNode_V<SDNode vop, string instruction_name> {
+multiclass VPatWConvertI2FPSDNode_V<SDPatternOperator vop,
+ string instruction_name> {
foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar ivti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<ivti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX)
- ivti.RegClass:$rs1, ivti.AVL, ivti.Log2SEW)>;
+ (fwti.Vector (IMPLICIT_DEF)),
+ ivti.RegClass:$rs1,
+ ivti.AVL, ivti.Log2SEW, TU_MU)>;
}
}
-multiclass VPatWConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
+multiclass VPatWConvertFP2ISDNode_V<SDPatternOperator vop,
+ string instruction_name> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<iwti>.Predicates) in
def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
- fvti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>;
+ (iwti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW, TU_MU)>;
}
}
-multiclass VPatNConvertI2FPSDNode_V<SDNode vop, string instruction_name> {
+multiclass VPatNConvertI2FPSDNode_W_RM<SDPatternOperator vop,
+ string instruction_name> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<iwti>.Predicates) in
def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX)
- iwti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>;
+ (fvti.Vector (IMPLICIT_DEF)),
+ iwti.RegClass:$rs1,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TU_MU)>;
}
}
-multiclass VPatNConvertFP2ISDNode_V<SDNode vop, string instruction_name> {
+multiclass VPatNConvertFP2ISDNode_W<SDPatternOperator vop,
+ string instruction_name> {
foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1))),
(!cast<Instruction>(instruction_name#"_"#vti.LMul.MX)
- fwti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ (vti.Vector (IMPLICIT_DEF)),
+ fwti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
}
}
@@ -344,14 +499,19 @@ multiclass VPatWidenBinarySDNode_VV_VX<SDNode op, PatFrags extop1, PatFrags exto
foreach vtiToWti = AllWidenableIntVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- def : Pat<(op (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs2))),
- (wti.Vector (extop2 (vti.Vector vti.RegClass:$rs1)))),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
- vti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
- def : Pat<(op (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs2))),
- (wti.Vector (extop2 (vti.Vector (SplatPat GPR:$rs1))))),
- (!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX)
- vti.RegClass:$rs2, GPR:$rs1, vti.AVL, vti.Log2SEW)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(op (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (extop2 (vti.Vector vti.RegClass:$rs1)))),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ def : Pat<(op (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs2))),
+ (wti.Vector (extop2 (vti.Vector (SplatPat (XLenVT GPR:$rs1)))))),
+ (!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ GPR:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ }
}
}
@@ -360,28 +520,33 @@ multiclass VPatWidenBinarySDNode_WV_WX<SDNode op, PatFrags extop,
foreach vtiToWti = AllWidenableIntVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- def : Pat<(op (wti.Vector wti.RegClass:$rs2),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1)))),
- (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX#"_TIED")
- wti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW,
- TAIL_AGNOSTIC)>;
- def : Pat<(op (wti.Vector wti.RegClass:$rs2),
- (wti.Vector (extop (vti.Vector (SplatPat GPR:$rs1))))),
- (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
- wti.RegClass:$rs2, GPR:$rs1, vti.AVL, vti.Log2SEW)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1)))),
+ (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX#"_TIED")
+ wti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+ def : Pat<(op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (extop (vti.Vector (SplatPat (XLenVT GPR:$rs1)))))),
+ (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, GPR:$rs1,
+ vti.AVL, vti.Log2SEW, TU_MU)>;
+ }
}
}
multiclass VPatWidenBinarySDNode_VV_VX_WV_WX<SDNode op, PatFrags extop,
- string instruction_name> {
- defm : VPatWidenBinarySDNode_VV_VX<op, extop, extop, instruction_name>;
- defm : VPatWidenBinarySDNode_WV_WX<op, extop, instruction_name>;
-}
+ string instruction_name>
+ : VPatWidenBinarySDNode_VV_VX<op, extop, extop, instruction_name>,
+ VPatWidenBinarySDNode_WV_WX<op, extop, instruction_name>;
multiclass VPatWidenMulAddSDNode_VV<PatFrags extop1, PatFrags extop2, string instruction_name> {
foreach vtiToWti = AllWidenableIntVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
def : Pat<
(add (wti.Vector wti.RegClass:$rd),
(mul_oneuse (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs1))),
@@ -396,9 +561,11 @@ multiclass VPatWidenMulAddSDNode_VX<PatFrags extop1, PatFrags extop2, string ins
foreach vtiToWti = AllWidenableIntVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
def : Pat<
(add (wti.Vector wti.RegClass:$rd),
- (mul_oneuse (wti.Vector (extop1 (vti.Vector (SplatPat GPR:$rs1)))),
+ (mul_oneuse (wti.Vector (extop1 (vti.Vector (SplatPat (XLenVT GPR:$rs1))))),
(wti.Vector (extop2 (vti.Vector vti.RegClass:$rs2))))),
(!cast<Instruction>(instruction_name#"_VX_"#vti.LMul.MX)
wti.RegClass:$rd, GPR:$rs1, vti.RegClass:$rs2,
@@ -411,188 +578,302 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF<SDNode op, string instruction_name> {
foreach vtiToWti = AllWidenableFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), (XLenVT srcvalue))),
- (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs1),
- (vti.Mask true_mask), (XLenVT srcvalue)))),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
- vti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
- def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), (XLenVT srcvalue))),
- (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
- (vti.Mask true_mask), (XLenVT srcvalue)))),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- vti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>;
- def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), (XLenVT srcvalue))),
- (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- vti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
+ }
}
}
-multiclass VPatWidenBinaryFPSDNode_WV_WF<SDNode op, string instruction_name> {
+multiclass VPatWidenBinaryFPSDNode_VV_VF_RM<SDNode op, string instruction_name> {
foreach vtiToWti = AllWidenableFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- def : Pat<(op (wti.Vector wti.RegClass:$rs2),
- (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs1),
- (vti.Mask true_mask), (XLenVT srcvalue)))),
- (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX#"_TIED")
- wti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW,
- TAIL_AGNOSTIC)>;
- def : Pat<(op (wti.Vector wti.RegClass:$rs2),
- (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
- (vti.Mask true_mask), (XLenVT srcvalue)))),
- (!cast<Instruction>(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- wti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>;
- def : Pat<(op (wti.Vector wti.RegClass:$rs2),
- (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))),
- (!cast<Instruction>(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- wti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ vti.RegClass:$rs1,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TU_MU)>;
+ def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector (SplatFPOp (vti.Scalar vti.ScalarRegClass:$rs1))),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ vti.ScalarRegClass:$rs1,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TU_MU)>;
+ def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector (SplatFPOp (fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1))))),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ vti.ScalarRegClass:$rs1,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TU_MU)>;
+ }
}
}
-multiclass VPatWidenBinaryFPSDNode_VV_VF_WV_WF<SDNode op, string instruction_name> {
- defm : VPatWidenBinaryFPSDNode_VV_VF<op, instruction_name>;
- defm : VPatWidenBinaryFPSDNode_WV_WF<op, instruction_name>;
+multiclass VPatWidenBinaryFPSDNode_WV_WF_RM<SDNode op, string instruction_name> {
+ foreach vtiToWti = AllWidenableFloatVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX#"_TIED")
+ wti.RegClass:$rs2, vti.RegClass:$rs1,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+ def : Pat<(op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (!cast<Instruction>(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2,
+ vti.ScalarRegClass:$rs1,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TU_MU)>;
+ def : Pat<(op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (SplatFPOp (fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1))))),
+ (!cast<Instruction>(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2,
+ vti.ScalarRegClass:$rs1,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TU_MU)>;
+ }
+ }
}
-multiclass VPatWidenFPMulAccSDNode_VV_VF<string instruction_name> {
+multiclass VPatWidenBinaryFPSDNode_VV_VF_WV_WF_RM<SDNode op,
+ string instruction_name>
+ : VPatWidenBinaryFPSDNode_VV_VF_RM<op, instruction_name>,
+ VPatWidenBinaryFPSDNode_WV_WF_RM<op, instruction_name>;
+
+multiclass VPatWidenFPMulAccSDNode_VV_VF_RM<string instruction_name> {
foreach vtiToWti = AllWidenableFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs1),
- (vti.Mask true_mask), (XLenVT srcvalue))),
- (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), (XLenVT srcvalue))),
- (wti.Vector wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
- wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fma (wti.Vector (SplatFPOp
- (fpext_oneuse vti.ScalarRegClass:$rs1))),
- (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), (XLenVT srcvalue))),
- (wti.Vector wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fma (wti.Vector (SplatFPOp
+ (fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1)))),
+ (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (wti.Vector wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
}
-multiclass VPatWidenFPNegMulAccSDNode_VV_VF<string instruction_name> {
+multiclass VPatWidenFPNegMulAccSDNode_VV_VF_RM<string instruction_name> {
foreach vtiToWti = AllWidenableFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs1),
- (vti.Mask true_mask), (XLenVT srcvalue)))),
- (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), (XLenVT srcvalue)),
- (fneg wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
- wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fma (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)),
- (fneg (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), (XLenVT srcvalue)))),
- (fneg wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fma (fneg (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))),
- (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), (XLenVT srcvalue)),
- (fneg wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)),
+ (fneg wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fma (SplatFPOp (fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1))),
+ (fneg (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (fneg wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fma (fneg (wti.Vector (SplatFPOp (fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1))))),
+ (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)),
+ (fneg wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
}
-multiclass VPatWidenFPMulSacSDNode_VV_VF<string instruction_name> {
+multiclass VPatWidenFPMulSacSDNode_VV_VF_RM<string instruction_name> {
foreach vtiToWti = AllWidenableFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs1),
- (vti.Mask true_mask), (XLenVT srcvalue))),
- (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), (XLenVT srcvalue)),
- (fneg wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
- wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fma (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1))),
- (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), (XLenVT srcvalue)),
- (fneg wti.RegClass:$rd)),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(fma (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), (XLenVT srcvalue))),
+ (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)),
+ (fneg wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fma (wti.Vector (SplatFPOp (fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1)))),
+ (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)),
+ (fneg wti.RegClass:$rd)),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
}
-multiclass VPatWidenFPNegMulSacSDNode_VV_VF<string instruction_name> {
+multiclass VPatWidenFPNegMulSacSDNode_VV_VF_RM<string instruction_name> {
foreach vtiToWti = AllWidenableFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs1),
- (vti.Mask true_mask), (XLenVT srcvalue)))),
- (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), (XLenVT srcvalue)),
- wti.RegClass:$rd),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
- wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fma (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1))),
- (fneg (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), (XLenVT srcvalue)))),
- wti.RegClass:$rd),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fma (fneg (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))),
- (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), (XLenVT srcvalue)),
- wti.RegClass:$rd),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(fma (fneg (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)),
+ wti.RegClass:$rd),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fma (wti.Vector (SplatFPOp (fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1)))),
+ (fneg (wti.Vector (riscv_fpextend_vl_oneuse
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)))),
+ wti.RegClass:$rd),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fma (fneg (wti.Vector (SplatFPOp (fpext_oneuse (vti.Scalar vti.ScalarRegClass:$rs1))))),
+ (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), (XLenVT srcvalue)),
+ wti.RegClass:$rd),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
}
multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> {
foreach vti = AllIntegerVectors in {
defvar suffix = vti.LMul.MX;
- // NOTE: We choose VMADD because it has the most commuting freedom. So it
- // works best with how TwoAddressInstructionPass tries commuting.
- def : Pat<(vti.Vector (op vti.RegClass:$rs2,
- (mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
- // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally
- // commutable.
- def : Pat<(vti.Vector (op vti.RegClass:$rs2,
- (mul_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rd))),
- (!cast<Instruction>(instruction_name#"_VX_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ // NOTE: We choose VMADD because it has the most commuting freedom. So it
+ // works best with how TwoAddressInstructionPass tries commuting.
+ def : Pat<(vti.Vector (op vti.RegClass:$rs2,
+ (mul_oneuse vti.RegClass:$rs1, vti.RegClass:$rd))),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix)
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally
+ // commutable.
+ def : Pat<(vti.Vector (op vti.RegClass:$rs2,
+ (mul_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rd))),
+ (!cast<Instruction>(instruction_name#"_VX_" # suffix)
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ vti.AVL, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
}
@@ -600,20 +881,22 @@ multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> {
// Patterns.
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructions] in {
-
// 7.4. Vector Unit-Stride Instructions
foreach vti = !listconcat(FractionalGroupIntegerVectors,
FractionalGroupFloatVectors) in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatUSLoadStoreSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.AVL, vti.RegClass>;
foreach vti = [VI8M1, VI16M1, VI32M1, VI64M1, VF16M1, VF32M1, VF64M1] in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;
foreach vti = !listconcat(GroupIntegerVectors, GroupFloatVectors) in
+ let Predicates = GetVTypePredicates<vti>.Predicates in
defm : VPatUSLoadStoreWholeVRSDNode<vti.Vector, vti.Log2SEW, vti.LMul,
vti.RegClass>;
foreach mti = AllMasks in
+ let Predicates = [HasVInstructions] in
defm : VPatUSLoadStoreMaskSDNode<mti>;
// 11. Vector Integer Arithmetic Instructions
@@ -624,14 +907,22 @@ defm : VPatBinarySDNode_VV_VX<sub, "PseudoVSUB">;
// Handle VRSUB specially since it's the only integer binary op with reversed
// pattern operands
foreach vti = AllIntegerVectors in {
- def : Pat<(sub (vti.Vector (SplatPat GPR:$rs2)),
- (vti.Vector vti.RegClass:$rs1)),
- (!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX)
- vti.RegClass:$rs1, GPR:$rs2, vti.AVL, vti.Log2SEW)>;
- def : Pat<(sub (vti.Vector (SplatPat_simm5 simm5:$rs2)),
- (vti.Vector vti.RegClass:$rs1)),
- (!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX)
- vti.RegClass:$rs1, simm5:$rs2, vti.AVL, vti.Log2SEW)>;
+ // FIXME: The AddedComplexity here is covering up a missing matcher for
+ // widening vwsub.vx which can recognize a extended folded into the
+ // scalar of the splat.
+ let AddedComplexity = 20 in
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(sub (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
+ (vti.Vector vti.RegClass:$rs1)),
+ (!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
+ vti.AVL, vti.Log2SEW, TU_MU)>;
+ def : Pat<(sub (vti.Vector (SplatPat_simm5 simm5:$rs2)),
+ (vti.Vector vti.RegClass:$rs1)),
+ (!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ simm5:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
+ }
}
// 11.2. Vector Widening Integer Add and Subtract
@@ -643,6 +934,30 @@ defm : VPatWidenBinarySDNode_VV_VX_WV_WX<sub, sext_oneuse, "PseudoVWSUB">;
defm : VPatWidenBinarySDNode_VV_VX_WV_WX<sub, zext_oneuse, "PseudoVWSUBU">;
defm : VPatWidenBinarySDNode_VV_VX_WV_WX<sub, anyext_oneuse, "PseudoVWSUBU">;
+// shl (ext v, splat 1) is a special case of widening add.
+foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(shl (wti.Vector (sext_oneuse (vti.Vector vti.RegClass:$rs1))),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
+ (!cast<Instruction>("PseudoVWADD_VV_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1,
+ vti.AVL, vti.Log2SEW, TU_MU)>;
+ def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs1))),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
+ (!cast<Instruction>("PseudoVWADDU_VV_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1,
+ vti.AVL, vti.Log2SEW, TU_MU)>;
+ def : Pat<(shl (wti.Vector (anyext_oneuse (vti.Vector vti.RegClass:$rs1))),
+ (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))),
+ (!cast<Instruction>("PseudoVWADDU_VV_"#vti.LMul.MX)
+ (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1,
+ vti.AVL, vti.Log2SEW, TU_MU)>;
+ }
+}
+
// 11.3. Vector Integer Extension
defm : VPatExtendSDNode_V<[zext, anyext], "PseudoVZEXT", "VF2",
AllFractionableVF2IntVectors>;
@@ -669,10 +984,12 @@ defm : VPatBinarySDNode_VV_VX_VI<sra, "PseudoVSRA", uimm5>;
foreach vti = AllIntegerVectors in {
// Emit shift by 1 as an add since it might be faster.
+ let Predicates = GetVTypePredicates<vti>.Predicates in
def : Pat<(shl (vti.Vector vti.RegClass:$rs1),
(vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), 1, (XLenVT srcvalue)))),
(!cast<Instruction>("PseudoVADD_VV_"# vti.LMul.MX)
- vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>;
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>;
}
@@ -703,7 +1020,7 @@ defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSGT", SETGT>;
defm : VPatIntegerSetCCSDNode_VI<"PseudoVMSGTU", SETUGT>;
defm : VPatIntegerSetCCSDNode_VIPlus1<"PseudoVMSLE", SETLT,
- SplatPat_simm5_plus1_nonzero>;
+ SplatPat_simm5_plus1>;
defm : VPatIntegerSetCCSDNode_VIPlus1<"PseudoVMSLEU", SETULT,
SplatPat_simm5_plus1_nonzero>;
defm : VPatIntegerSetCCSDNode_VIPlus1<"PseudoVMSGT", SETGE,
@@ -719,14 +1036,20 @@ defm : VPatBinarySDNode_VV_VX<smax, "PseudoVMAX">;
// 11.10. Vector Single-Width Integer Multiply Instructions
defm : VPatBinarySDNode_VV_VX<mul, "PseudoVMUL">;
-defm : VPatBinarySDNode_VV_VX<mulhs, "PseudoVMULH">;
-defm : VPatBinarySDNode_VV_VX<mulhu, "PseudoVMULHU">;
+
+defm : VPatBinarySDNode_VV_VX<mulhs, "PseudoVMULH", IntegerVectorsExceptI64>;
+defm : VPatBinarySDNode_VV_VX<mulhu, "PseudoVMULHU", IntegerVectorsExceptI64>;
+
+let Predicates = [HasVInstructionsFullMultiply] in {
+ defm : VPatBinarySDNode_VV_VX<mulhs, "PseudoVMULH", I64IntegerVectors>;
+ defm : VPatBinarySDNode_VV_VX<mulhu, "PseudoVMULHU", I64IntegerVectors>;
+}
// 11.11. Vector Integer Divide Instructions
-defm : VPatBinarySDNode_VV_VX<udiv, "PseudoVDIVU">;
-defm : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIV">;
-defm : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU">;
-defm : VPatBinarySDNode_VV_VX<srem, "PseudoVREM">;
+defm : VPatBinarySDNode_VV_VX<udiv, "PseudoVDIVU", isSEWAware=1>;
+defm : VPatBinarySDNode_VV_VX<sdiv, "PseudoVDIV", isSEWAware=1>;
+defm : VPatBinarySDNode_VV_VX<urem, "PseudoVREMU", isSEWAware=1>;
+defm : VPatBinarySDNode_VV_VX<srem, "PseudoVREM", isSEWAware=1>;
// 11.12. Vector Widening Integer Multiply Instructions
defm : VPatWidenBinarySDNode_VV_VX<mul, sext_oneuse, sext_oneuse,
@@ -757,21 +1080,26 @@ defm : VPatWidenMulAddSDNode_VX<zext_oneuse, sext_oneuse, "PseudoVWMACCUS">;
// 11.15. Vector Integer Merge Instructions
foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (vselect (vti.Mask V0), vti.RegClass:$rs1,
- vti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX)
- vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0),
- vti.AVL, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat XLenVT:$rs1),
- vti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX)
- vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat_simm5 simm5:$rs1),
- vti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX)
- vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (vselect (vti.Mask V0), vti.RegClass:$rs1,
+ vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0),
+ vti.AVL, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat XLenVT:$rs1),
+ vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (vselect (vti.Mask V0), (SplatPat_simm5 simm5:$rs1),
+ vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), vti.AVL, vti.Log2SEW)>;
+ }
}
// 12. Vector Fixed-Point Arithmetic Instructions
@@ -786,163 +1114,206 @@ defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">;
// 15.1. Vector Mask-Register Logical Instructions
foreach mti = AllMasks in {
- def : Pat<(mti.Mask (and VR:$rs1, VR:$rs2)),
- (!cast<Instruction>("PseudoVMAND_MM_"#mti.LMul.MX)
- VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
- def : Pat<(mti.Mask (or VR:$rs1, VR:$rs2)),
- (!cast<Instruction>("PseudoVMOR_MM_"#mti.LMul.MX)
- VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
- def : Pat<(mti.Mask (xor VR:$rs1, VR:$rs2)),
- (!cast<Instruction>("PseudoVMXOR_MM_"#mti.LMul.MX)
- VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
-
- def : Pat<(mti.Mask (rvv_vnot (and VR:$rs1, VR:$rs2))),
- (!cast<Instruction>("PseudoVMNAND_MM_"#mti.LMul.MX)
- VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
- def : Pat<(mti.Mask (rvv_vnot (or VR:$rs1, VR:$rs2))),
- (!cast<Instruction>("PseudoVMNOR_MM_"#mti.LMul.MX)
- VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
- def : Pat<(mti.Mask (rvv_vnot (xor VR:$rs1, VR:$rs2))),
- (!cast<Instruction>("PseudoVMXNOR_MM_"#mti.LMul.MX)
- VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
-
- def : Pat<(mti.Mask (and VR:$rs1, (rvv_vnot VR:$rs2))),
- (!cast<Instruction>("PseudoVMANDN_MM_"#mti.LMul.MX)
- VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
- def : Pat<(mti.Mask (or VR:$rs1, (rvv_vnot VR:$rs2))),
- (!cast<Instruction>("PseudoVMORN_MM_"#mti.LMul.MX)
- VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
-
- // Handle rvv_vnot the same as the vmnot.m pseudoinstruction.
- def : Pat<(mti.Mask (rvv_vnot VR:$rs)),
- (!cast<Instruction>("PseudoVMNAND_MM_"#mti.LMul.MX)
- VR:$rs, VR:$rs, mti.AVL, mti.Log2SEW)>;
+ let Predicates = [HasVInstructions] in {
+ def : Pat<(mti.Mask (and VR:$rs1, VR:$rs2)),
+ (!cast<Instruction>("PseudoVMAND_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
+ def : Pat<(mti.Mask (or VR:$rs1, VR:$rs2)),
+ (!cast<Instruction>("PseudoVMOR_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
+ def : Pat<(mti.Mask (xor VR:$rs1, VR:$rs2)),
+ (!cast<Instruction>("PseudoVMXOR_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
+
+ def : Pat<(mti.Mask (rvv_vnot (and VR:$rs1, VR:$rs2))),
+ (!cast<Instruction>("PseudoVMNAND_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
+ def : Pat<(mti.Mask (rvv_vnot (or VR:$rs1, VR:$rs2))),
+ (!cast<Instruction>("PseudoVMNOR_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
+ def : Pat<(mti.Mask (rvv_vnot (xor VR:$rs1, VR:$rs2))),
+ (!cast<Instruction>("PseudoVMXNOR_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
+
+ def : Pat<(mti.Mask (and VR:$rs1, (rvv_vnot VR:$rs2))),
+ (!cast<Instruction>("PseudoVMANDN_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
+ def : Pat<(mti.Mask (or VR:$rs1, (rvv_vnot VR:$rs2))),
+ (!cast<Instruction>("PseudoVMORN_MM_"#mti.LMul.MX)
+ VR:$rs1, VR:$rs2, mti.AVL, mti.Log2SEW)>;
+
+ // Handle rvv_vnot the same as the vmnot.m pseudoinstruction.
+ def : Pat<(mti.Mask (rvv_vnot VR:$rs)),
+ (!cast<Instruction>("PseudoVMNAND_MM_"#mti.LMul.MX)
+ VR:$rs, VR:$rs, mti.AVL, mti.Log2SEW)>;
+ }
}
-} // Predicates = [HasVInstructions]
-
// 13. Vector Floating-Point Instructions
-let Predicates = [HasVInstructionsAnyF] in {
-
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
-defm : VPatBinaryFPSDNode_VV_VF<fadd, "PseudoVFADD">;
-defm : VPatBinaryFPSDNode_VV_VF<fsub, "PseudoVFSUB">;
-defm : VPatBinaryFPSDNode_R_VF<fsub, "PseudoVFRSUB">;
+defm : VPatBinaryFPSDNode_VV_VF_RM<any_fadd, "PseudoVFADD">;
+defm : VPatBinaryFPSDNode_VV_VF_RM<any_fsub, "PseudoVFSUB">;
+defm : VPatBinaryFPSDNode_R_VF_RM<any_fsub, "PseudoVFRSUB">;
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
-defm : VPatWidenBinaryFPSDNode_VV_VF_WV_WF<fadd, "PseudoVFWADD">;
-defm : VPatWidenBinaryFPSDNode_VV_VF_WV_WF<fsub, "PseudoVFWSUB">;
+defm : VPatWidenBinaryFPSDNode_VV_VF_WV_WF_RM<fadd, "PseudoVFWADD">;
+defm : VPatWidenBinaryFPSDNode_VV_VF_WV_WF_RM<fsub, "PseudoVFWSUB">;
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
-defm : VPatBinaryFPSDNode_VV_VF<fmul, "PseudoVFMUL">;
-defm : VPatBinaryFPSDNode_VV_VF<fdiv, "PseudoVFDIV">;
-defm : VPatBinaryFPSDNode_R_VF<fdiv, "PseudoVFRDIV">;
+defm : VPatBinaryFPSDNode_VV_VF_RM<any_fmul, "PseudoVFMUL">;
+defm : VPatBinaryFPSDNode_VV_VF_RM<any_fdiv, "PseudoVFDIV", isSEWAware=1>;
+defm : VPatBinaryFPSDNode_R_VF_RM<any_fdiv, "PseudoVFRDIV", isSEWAware=1>;
// 13.5. Vector Widening Floating-Point Multiply Instructions
-defm : VPatWidenBinaryFPSDNode_VV_VF<fmul, "PseudoVFWMUL">;
+defm : VPatWidenBinaryFPSDNode_VV_VF_RM<fmul, "PseudoVFWMUL">;
// 13.6 Vector Single-Width Floating-Point Fused Multiply-Add Instructions.
foreach fvti = AllFloatVectors in {
// NOTE: We choose VFMADD because it has the most commuting freedom. So it
// works best with how TwoAddressInstructionPass tries commuting.
defvar suffix = fvti.LMul.MX;
- def : Pat<(fvti.Vector (fma fvti.RegClass:$rs1, fvti.RegClass:$rd,
- fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVFMADD_VV_"# suffix)
- fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector (fma fvti.RegClass:$rs1, fvti.RegClass:$rd,
- (fneg fvti.RegClass:$rs2))),
- (!cast<Instruction>("PseudoVFMSUB_VV_"# suffix)
- fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector (fma (fneg fvti.RegClass:$rs1), fvti.RegClass:$rd,
- (fneg fvti.RegClass:$rs2))),
- (!cast<Instruction>("PseudoVFNMADD_VV_"# suffix)
- fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector (fma (fneg fvti.RegClass:$rs1), fvti.RegClass:$rd,
- fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVFNMSUB_VV_"# suffix)
- fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
-
- // The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally
- // commutable.
- def : Pat<(fvti.Vector (fma (SplatFPOp fvti.ScalarRegClass:$rs1),
- fvti.RegClass:$rd, fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVFMADD_V" # fvti.ScalarSuffix # "_" # suffix)
- fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector (fma (SplatFPOp fvti.ScalarRegClass:$rs1),
- fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))),
- (!cast<Instruction>("PseudoVFMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
- fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
-
- def : Pat<(fvti.Vector (fma (SplatFPOp fvti.ScalarRegClass:$rs1),
- (fneg fvti.RegClass:$rd), (fneg fvti.RegClass:$rs2))),
- (!cast<Instruction>("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # suffix)
- fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector (fma (SplatFPOp fvti.ScalarRegClass:$rs1),
- (fneg fvti.RegClass:$rd), fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
- fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
-
- // The splat might be negated.
- def : Pat<(fvti.Vector (fma (fneg (SplatFPOp fvti.ScalarRegClass:$rs1)),
- fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))),
- (!cast<Instruction>("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # suffix)
- fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fvti.Vector (fma (fneg (SplatFPOp fvti.ScalarRegClass:$rs1)),
- fvti.RegClass:$rd, fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
- fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
- fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ def : Pat<(fvti.Vector (any_fma fvti.RegClass:$rs1, fvti.RegClass:$rd,
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVFMADD_VV_"# suffix)
+ fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector (any_fma fvti.RegClass:$rs1, fvti.RegClass:$rd,
+ (fneg fvti.RegClass:$rs2))),
+ (!cast<Instruction>("PseudoVFMSUB_VV_"# suffix)
+ fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector (any_fma (fneg fvti.RegClass:$rs1), fvti.RegClass:$rd,
+ (fneg fvti.RegClass:$rs2))),
+ (!cast<Instruction>("PseudoVFNMADD_VV_"# suffix)
+ fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector (any_fma (fneg fvti.RegClass:$rs1), fvti.RegClass:$rd,
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVFNMSUB_VV_"# suffix)
+ fvti.RegClass:$rd, fvti.RegClass:$rs1, fvti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ // The choice of VFMADD here is arbitrary, vfmadd.vf and vfmacc.vf are equally
+ // commutable.
+ def : Pat<(fvti.Vector (any_fma (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rd, fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVFMADD_V" # fvti.ScalarSuffix # "_" # suffix)
+ fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector (any_fma (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))),
+ (!cast<Instruction>("PseudoVFMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
+ fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ def : Pat<(fvti.Vector (any_fma (SplatFPOp fvti.ScalarRegClass:$rs1),
+ (fneg fvti.RegClass:$rd), (fneg fvti.RegClass:$rs2))),
+ (!cast<Instruction>("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # suffix)
+ fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector (any_fma (SplatFPOp fvti.ScalarRegClass:$rs1),
+ (fneg fvti.RegClass:$rd), fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
+ fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+
+ // The splat might be negated.
+ def : Pat<(fvti.Vector (any_fma (fneg (SplatFPOp fvti.ScalarRegClass:$rs1)),
+ fvti.RegClass:$rd, (fneg fvti.RegClass:$rs2))),
+ (!cast<Instruction>("PseudoVFNMADD_V" # fvti.ScalarSuffix # "_" # suffix)
+ fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(fvti.Vector (any_fma (fneg (SplatFPOp fvti.ScalarRegClass:$rs1)),
+ fvti.RegClass:$rd, fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVFNMSUB_V" # fvti.ScalarSuffix # "_" # suffix)
+ fvti.RegClass:$rd, fvti.ScalarRegClass:$rs1, fvti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
-defm : VPatWidenFPMulAccSDNode_VV_VF<"PseudoVFWMACC">;
-defm : VPatWidenFPNegMulAccSDNode_VV_VF<"PseudoVFWNMACC">;
-defm : VPatWidenFPMulSacSDNode_VV_VF<"PseudoVFWMSAC">;
-defm : VPatWidenFPNegMulSacSDNode_VV_VF<"PseudoVFWNMSAC">;
+defm : VPatWidenFPMulAccSDNode_VV_VF_RM<"PseudoVFWMACC">;
+defm : VPatWidenFPNegMulAccSDNode_VV_VF_RM<"PseudoVFWNMACC">;
+defm : VPatWidenFPMulSacSDNode_VV_VF_RM<"PseudoVFWMSAC">;
+defm : VPatWidenFPNegMulSacSDNode_VV_VF_RM<"PseudoVFWNMSAC">;
foreach vti = AllFloatVectors in {
- // 13.8. Vector Floating-Point Square-Root Instruction
- def : Pat<(fsqrt (vti.Vector vti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVFSQRT_V_"# vti.LMul.MX)
- vti.RegClass:$rs2, vti.AVL, vti.Log2SEW)>;
-
- // 13.12. Vector Floating-Point Sign-Injection Instructions
- def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
- (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX)
- vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW)>;
- // Handle fneg with VFSGNJN using the same input for both operands.
- def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
- (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
- vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
- (vti.Vector vti.RegClass:$rs2))),
- (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX)
- vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW)>;
- def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))),
- (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (fneg vti.RegClass:$rs2)))),
- (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
- vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW)>;
- def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))),
- (!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW)>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ // 13.8. Vector Floating-Point Square-Root Instruction
+ def : Pat<(any_fsqrt (vti.Vector vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVFSQRT_V_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ vti.AVL, vti.Log2SEW, TU_MU)>;
+
+ // 13.12. Vector Floating-Point Sign-Injection Instructions
+ def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
+ (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TU_MU)>;
+ // Handle fneg with VFSGNJN using the same input for both operands.
+ def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
+ (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TU_MU)>;
+
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2))),
+ (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))),
+ (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
+
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (fneg vti.RegClass:$rs2)))),
+ (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))),
+ (!cast<Instruction>("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>;
+ }
}
// 13.11. Vector Floating-Point MIN/MAX Instructions
@@ -966,74 +1337,86 @@ defm : VPatFPSetCCSDNode_VV_VF_FV<SETOLE, "PseudoVMFLE", "PseudoVMFGE">;
// 11.15. Vector Integer Merge Instructions
// 13.15. Vector Floating-Point Merge Instruction
foreach fvti = AllFloatVectors in {
- def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1,
- fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
- fvti.AVL, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
- (SplatFPOp fvti.ScalarRegClass:$rs1),
- fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
- fvti.RegClass:$rs2,
- (fvti.Scalar fvti.ScalarRegClass:$rs1),
- (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
- (SplatFPOp (fvti.Scalar fpimm0)),
- fvti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ def : Pat<(fvti.Vector (vselect (fvti.Mask V0), fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
+ fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (vselect (fvti.Mask V0),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>;
+ }
}
// 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
-defm : VPatConvertFP2ISDNode_V<fp_to_sint, "PseudoVFCVT_RTZ_X_F_V">;
-defm : VPatConvertFP2ISDNode_V<fp_to_uint, "PseudoVFCVT_RTZ_XU_F_V">;
-defm : VPatConvertI2FPSDNode_V<sint_to_fp, "PseudoVFCVT_F_X_V">;
-defm : VPatConvertI2FPSDNode_V<uint_to_fp, "PseudoVFCVT_F_XU_V">;
+defm : VPatConvertFP2ISDNode_V<any_fp_to_sint, "PseudoVFCVT_RTZ_X_F_V">;
+defm : VPatConvertFP2ISDNode_V<any_fp_to_uint, "PseudoVFCVT_RTZ_XU_F_V">;
+defm : VPatConvertI2FPSDNode_V_RM<any_sint_to_fp, "PseudoVFCVT_F_X_V">;
+defm : VPatConvertI2FPSDNode_V_RM<any_uint_to_fp, "PseudoVFCVT_F_XU_V">;
// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
-defm : VPatWConvertFP2ISDNode_V<fp_to_sint, "PseudoVFWCVT_RTZ_X_F_V">;
-defm : VPatWConvertFP2ISDNode_V<fp_to_uint, "PseudoVFWCVT_RTZ_XU_F_V">;
-defm : VPatWConvertI2FPSDNode_V<sint_to_fp, "PseudoVFWCVT_F_X_V">;
-defm : VPatWConvertI2FPSDNode_V<uint_to_fp, "PseudoVFWCVT_F_XU_V">;
+defm : VPatWConvertFP2ISDNode_V<any_fp_to_sint, "PseudoVFWCVT_RTZ_X_F_V">;
+defm : VPatWConvertFP2ISDNode_V<any_fp_to_uint, "PseudoVFWCVT_RTZ_XU_F_V">;
+defm : VPatWConvertI2FPSDNode_V<any_sint_to_fp, "PseudoVFWCVT_F_X_V">;
+defm : VPatWConvertI2FPSDNode_V<any_uint_to_fp, "PseudoVFWCVT_F_XU_V">;
// 13.19. Narrowing Floating-Point/Integer Type-Convert Instructions
-defm : VPatNConvertFP2ISDNode_V<fp_to_sint, "PseudoVFNCVT_RTZ_X_F_W">;
-defm : VPatNConvertFP2ISDNode_V<fp_to_uint, "PseudoVFNCVT_RTZ_XU_F_W">;
-defm : VPatNConvertI2FPSDNode_V<sint_to_fp, "PseudoVFNCVT_F_X_W">;
-defm : VPatNConvertI2FPSDNode_V<uint_to_fp, "PseudoVFNCVT_F_XU_W">;
+defm : VPatNConvertFP2ISDNode_W<any_fp_to_sint, "PseudoVFNCVT_RTZ_X_F_W">;
+defm : VPatNConvertFP2ISDNode_W<any_fp_to_uint, "PseudoVFNCVT_RTZ_XU_F_W">;
+defm : VPatNConvertI2FPSDNode_W_RM<any_sint_to_fp, "PseudoVFNCVT_F_X_W">;
+defm : VPatNConvertI2FPSDNode_W_RM<any_uint_to_fp, "PseudoVFNCVT_F_XU_W">;
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
(!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX)
- fwti.RegClass:$rs1, fvti.AVL, fvti.Log2SEW)>;
+ (fvti.Vector (IMPLICIT_DEF)),
+ fwti.RegClass:$rs1,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TU_MU)>;
}
-} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// Vector Splats
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructionsAnyF] in {
foreach fvti = AllFloatVectors in {
- def : Pat<(fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)),
- (!cast<Instruction>("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
- (fvti.Scalar fvti.ScalarRegClass:$rs1),
- fvti.AVL, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (SplatFPOp (fvti.Scalar fpimm0))),
- (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
- 0, fvti.AVL, fvti.Log2SEW)>;
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ def : Pat<(fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)),
+ (!cast<Instruction>("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ fvti.AVL, fvti.Log2SEW, TU_MU)>;
+
+ def : Pat<(fvti.Vector (SplatFPOp (fvti.Scalar fpimm0))),
+ (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ 0, fvti.AVL, fvti.Log2SEW, TU_MU)>;
+ }
}
-} // Predicates = [HasVInstructionsAnyF]
//===----------------------------------------------------------------------===//
// Vector Element Extracts
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructionsAnyF] in
foreach vti = AllFloatVectors in {
defvar vmv_f_s_inst = !cast<Instruction>(!strconcat("PseudoVFMV_",
vti.ScalarSuffix,
@@ -1041,6 +1424,7 @@ foreach vti = AllFloatVectors in {
// Only pattern-match extract-element operations where the index is 0. Any
// other index will have been custom-lowered to slide the vector correctly
// into place.
+ let Predicates = GetVTypePredicates<vti>.Predicates in
def : Pat<(vti.Scalar (extractelt (vti.Vector vti.RegClass:$rs2), 0)),
(vmv_f_s_inst vti.RegClass:$rs2, vti.Log2SEW)>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 451962daeada..900f9dd1be05 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -21,6 +21,13 @@
// Helpers to define the VL patterns.
//===----------------------------------------------------------------------===//
+def SDT_RISCVIntUnOp_VL : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisVec<0>, SDTCisInt<0>,
+ SDTCVecEltisVT<3, i1>,
+ SDTCisSameNumEltsAs<0, 3>,
+ SDTCisVT<4, XLenVT>]>;
+
def SDT_RISCVIntBinOp_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisVec<0>, SDTCisInt<0>,
@@ -50,6 +57,11 @@ def SDT_RISCVCopySign_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
SDTCisSameNumEltsAs<0, 4>,
SDTCisVT<5, XLenVT>]>;
+def riscv_vmv_v_v_vl : SDNode<"RISCVISD::VMV_V_V_VL",
+ SDTypeProfile<1, 3, [SDTCisVec<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisVT<3, XLenVT>]>>;
def riscv_vmv_v_x_vl : SDNode<"RISCVISD::VMV_V_X_VL",
SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>,
SDTCisSameAs<0, 1>,
@@ -91,6 +103,12 @@ def riscv_smax_vl : SDNode<"RISCVISD::SMAX_VL", SDT_RISCVIntBinOp_VL, [SDNPCom
def riscv_umin_vl : SDNode<"RISCVISD::UMIN_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_umax_vl : SDNode<"RISCVISD::UMAX_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
+def riscv_bitreverse_vl : SDNode<"RISCVISD::BITREVERSE_VL", SDT_RISCVIntUnOp_VL>;
+def riscv_bswap_vl : SDNode<"RISCVISD::BSWAP_VL", SDT_RISCVIntUnOp_VL>;
+def riscv_ctlz_vl : SDNode<"RISCVISD::CTLZ_VL", SDT_RISCVIntUnOp_VL>;
+def riscv_cttz_vl : SDNode<"RISCVISD::CTTZ_VL", SDT_RISCVIntUnOp_VL>;
+def riscv_ctpop_vl : SDNode<"RISCVISD::CTPOP_VL", SDT_RISCVIntUnOp_VL>;
+
def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>;
@@ -107,6 +125,37 @@ def riscv_fcopysign_vl : SDNode<"RISCVISD::FCOPYSIGN_VL", SDT_RISCVCopySign_VL>;
def riscv_fminnum_vl : SDNode<"RISCVISD::FMINNUM_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
def riscv_fmaxnum_vl : SDNode<"RISCVISD::FMAXNUM_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative]>;
+def riscv_strict_fadd_vl : SDNode<"RISCVISD::STRICT_FADD_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative, SDNPHasChain]>;
+def riscv_strict_fsub_vl : SDNode<"RISCVISD::STRICT_FSUB_VL", SDT_RISCVFPBinOp_VL, [SDNPHasChain]>;
+def riscv_strict_fmul_vl : SDNode<"RISCVISD::STRICT_FMUL_VL", SDT_RISCVFPBinOp_VL, [SDNPCommutative, SDNPHasChain]>;
+def riscv_strict_fdiv_vl : SDNode<"RISCVISD::STRICT_FDIV_VL", SDT_RISCVFPBinOp_VL, [SDNPHasChain]>;
+def riscv_strict_fsqrt_vl : SDNode<"RISCVISD::STRICT_FSQRT_VL", SDT_RISCVFPUnOp_VL, [SDNPHasChain]>;
+
+def any_riscv_fadd_vl : PatFrags<(ops node:$lhs, node:$rhs, node:$merge, node:$mask, node:$vl),
+ [(riscv_fadd_vl node:$lhs, node:$rhs, node:$merge, node:$mask, node:$vl),
+ (riscv_strict_fadd_vl node:$lhs, node:$rhs, node:$merge, node:$mask, node:$vl)]>;
+def any_riscv_fsub_vl : PatFrags<(ops node:$lhs, node:$rhs, node:$merge, node:$mask, node:$vl),
+ [(riscv_fsub_vl node:$lhs, node:$rhs, node:$merge, node:$mask, node:$vl),
+ (riscv_strict_fsub_vl node:$lhs, node:$rhs, node:$merge, node:$mask, node:$vl)]>;
+def any_riscv_fmul_vl : PatFrags<(ops node:$lhs, node:$rhs, node:$merge, node:$mask, node:$vl),
+ [(riscv_fmul_vl node:$lhs, node:$rhs, node:$merge, node:$mask, node:$vl),
+ (riscv_strict_fmul_vl node:$lhs, node:$rhs, node:$merge, node:$mask, node:$vl)]>;
+def any_riscv_fdiv_vl : PatFrags<(ops node:$lhs, node:$rhs, node:$merge, node:$mask, node:$vl),
+ [(riscv_fdiv_vl node:$lhs, node:$rhs, node:$merge, node:$mask, node:$vl),
+ (riscv_strict_fdiv_vl node:$lhs, node:$rhs, node:$merge, node:$mask, node:$vl)]>;
+def any_riscv_fsqrt_vl : PatFrags<(ops node:$src, node:$mask, node:$vl),
+ [(riscv_fsqrt_vl node:$src, node:$mask, node:$vl),
+ (riscv_strict_fsqrt_vl node:$src, node:$mask, node:$vl)]>;
+
+def riscv_fclass_vl : SDNode<"RISCVISD::FCLASS_VL",
+ SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisVec<0>,
+ SDTCisFP<1>, SDTCisVec<1>,
+ SDTCisSameSizeAs<0, 1>,
+ SDTCisSameNumEltsAs<0, 1>,
+ SDTCVecEltisVT<2, i1>,
+ SDTCisSameNumEltsAs<0, 2>,
+ SDTCisVT<3, XLenVT>]>>;
+
def SDT_RISCVVecFMA_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>,
@@ -114,11 +163,43 @@ def SDT_RISCVVecFMA_VL : SDTypeProfile<1, 5, [SDTCisSameAs<0, 1>,
SDTCVecEltisVT<4, i1>,
SDTCisSameNumEltsAs<0, 4>,
SDTCisVT<5, XLenVT>]>;
-def riscv_vfmadd_vl : SDNode<"RISCVISD::VFMADD_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
+def riscv_vfmadd_vl : SDNode<"RISCVISD::VFMADD_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
def riscv_vfnmadd_vl : SDNode<"RISCVISD::VFNMADD_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
-def riscv_vfmsub_vl : SDNode<"RISCVISD::VFMSUB_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
+def riscv_vfmsub_vl : SDNode<"RISCVISD::VFMSUB_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
def riscv_vfnmsub_vl : SDNode<"RISCVISD::VFNMSUB_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative]>;
+def SDT_RISCVWVecFMA_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisFP<0>,
+ SDTCisVec<1>, SDTCisFP<1>,
+ SDTCisOpSmallerThanOp<1, 0>,
+ SDTCisSameNumEltsAs<0, 1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCVecEltisVT<4, i1>,
+ SDTCisSameNumEltsAs<0, 4>,
+ SDTCisVT<5, XLenVT>]>;
+def riscv_vfwmadd_vl : SDNode<"RISCVISD::VFWMADD_VL", SDT_RISCVWVecFMA_VL, [SDNPCommutative]>;
+def riscv_vfwnmadd_vl : SDNode<"RISCVISD::VFWNMADD_VL", SDT_RISCVWVecFMA_VL, [SDNPCommutative]>;
+def riscv_vfwmsub_vl : SDNode<"RISCVISD::VFWMSUB_VL", SDT_RISCVWVecFMA_VL, [SDNPCommutative]>;
+def riscv_vfwnmsub_vl : SDNode<"RISCVISD::VFWNMSUB_VL", SDT_RISCVWVecFMA_VL, [SDNPCommutative]>;
+
+def riscv_strict_vfmadd_vl : SDNode<"RISCVISD::STRICT_VFMADD_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative, SDNPHasChain]>;
+def riscv_strict_vfnmadd_vl : SDNode<"RISCVISD::STRICT_VFNMADD_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative, SDNPHasChain]>;
+def riscv_strict_vfmsub_vl : SDNode<"RISCVISD::STRICT_VFMSUB_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative, SDNPHasChain]>;
+def riscv_strict_vfnmsub_vl : SDNode<"RISCVISD::STRICT_VFNMSUB_VL", SDT_RISCVVecFMA_VL, [SDNPCommutative, SDNPHasChain]>;
+
+def any_riscv_vfmadd_vl : PatFrags<(ops node:$rs1, node:$rs2, node:$rs3, node:$mask, node:$vl),
+ [(riscv_vfmadd_vl node:$rs1, node:$rs2, node:$rs3, node:$mask, node:$vl),
+ (riscv_strict_vfmadd_vl node:$rs1, node:$rs2, node:$rs3, node:$mask, node:$vl)]>;
+def any_riscv_vfnmadd_vl : PatFrags<(ops node:$rs1, node:$rs2, node:$rs3, node:$mask, node:$vl),
+ [(riscv_vfnmadd_vl node:$rs1, node:$rs2, node:$rs3, node:$mask, node:$vl),
+ (riscv_strict_vfnmadd_vl node:$rs1, node:$rs2, node:$rs3, node:$mask, node:$vl)]>;
+def any_riscv_vfmsub_vl : PatFrags<(ops node:$rs1, node:$rs2, node:$rs3, node:$mask, node:$vl),
+ [(riscv_vfmsub_vl node:$rs1, node:$rs2, node:$rs3, node:$mask, node:$vl),
+ (riscv_strict_vfmsub_vl node:$rs1, node:$rs2, node:$rs3, node:$mask, node:$vl)]>;
+def any_riscv_vfnmsub_vl : PatFrags<(ops node:$rs1, node:$rs2, node:$rs3, node:$mask, node:$vl),
+ [(riscv_vfnmsub_vl node:$rs1, node:$rs2, node:$rs3, node:$mask, node:$vl),
+ (riscv_strict_vfnmsub_vl node:$rs1, node:$rs2, node:$rs3, node:$mask, node:$vl)]>;
+
def SDT_RISCVFPRoundOp_VL : SDTypeProfile<1, 3, [
SDTCisFP<0>, SDTCisFP<1>, SDTCisOpSmallerThanOp<0, 1>, SDTCisSameNumEltsAs<0, 1>,
SDTCVecEltisVT<2, i1>, SDTCisSameNumEltsAs<1, 2>, SDTCisVT<3, XLenVT>
@@ -129,8 +210,21 @@ def SDT_RISCVFPExtendOp_VL : SDTypeProfile<1, 3, [
]>;
def riscv_fpround_vl : SDNode<"RISCVISD::FP_ROUND_VL", SDT_RISCVFPRoundOp_VL>;
+def riscv_strict_fpround_vl : SDNode<"RISCVISD::STRICT_FP_ROUND_VL", SDT_RISCVFPRoundOp_VL, [SDNPHasChain]>;
def riscv_fpextend_vl : SDNode<"RISCVISD::FP_EXTEND_VL", SDT_RISCVFPExtendOp_VL>;
+def riscv_strict_fpextend_vl : SDNode<"RISCVISD::STRICT_FP_EXTEND_VL", SDT_RISCVFPExtendOp_VL, [SDNPHasChain]>;
def riscv_fncvt_rod_vl : SDNode<"RISCVISD::VFNCVT_ROD_VL", SDT_RISCVFPRoundOp_VL>;
+def riscv_strict_fncvt_rod_vl : SDNode<"RISCVISD::STRICT_VFNCVT_ROD_VL", SDT_RISCVFPRoundOp_VL, [SDNPHasChain]>;
+
+def any_riscv_fpround_vl : PatFrags<(ops node:$src, node:$mask, node:$vl),
+ [(riscv_fpround_vl node:$src, node:$mask, node:$vl),
+ (riscv_strict_fpround_vl node:$src, node:$mask, node:$vl)]>;
+def any_riscv_fpextend_vl : PatFrags<(ops node:$src, node:$mask, node:$vl),
+ [(riscv_fpextend_vl node:$src, node:$mask, node:$vl),
+ (riscv_strict_fpextend_vl node:$src, node:$mask, node:$vl)]>;
+def any_riscv_fncvt_rod_vl : PatFrags<(ops node:$src, node:$mask, node:$vl),
+ [(riscv_fncvt_rod_vl node:$src, node:$mask, node:$vl),
+ (riscv_strict_fncvt_rod_vl node:$src, node:$mask, node:$vl)]>;
def SDT_RISCVFP2IOp_VL : SDTypeProfile<1, 3, [
SDTCisInt<0>, SDTCisFP<1>, SDTCisSameNumEltsAs<0, 1>,
@@ -152,6 +246,11 @@ def SDT_RISCVI2FPOp_RM_VL : SDTypeProfile<1, 4, [
SDTCisVT<4, XLenVT> // Rounding mode
]>;
+def SDT_RISCVSETCCOP_VL : SDTypeProfile<1, 6, [
+ SDTCVecEltisVT<0, i1>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
+ SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>, SDTCisSameAs<0, 4>,
+ SDTCisSameAs<0, 5>, SDTCisVT<6, XLenVT>]>;
+
// Float -> Int
def riscv_vfcvt_xu_f_vl : SDNode<"RISCVISD::VFCVT_XU_F_VL", SDT_RISCVFP2IOp_VL>;
def riscv_vfcvt_x_f_vl : SDNode<"RISCVISD::VFCVT_X_F_VL", SDT_RISCVFP2IOp_VL>;
@@ -161,24 +260,52 @@ def riscv_vfcvt_rm_x_f_vl : SDNode<"RISCVISD::VFCVT_RM_X_F_VL", SDT_RISCVFP2IOp_
def riscv_vfcvt_rtz_xu_f_vl : SDNode<"RISCVISD::VFCVT_RTZ_XU_F_VL", SDT_RISCVFP2IOp_VL>;
def riscv_vfcvt_rtz_x_f_vl : SDNode<"RISCVISD::VFCVT_RTZ_X_F_VL", SDT_RISCVFP2IOp_VL>;
+def riscv_strict_vfcvt_rm_x_f_vl : SDNode<"RISCVISD::STRICT_VFCVT_RM_X_F_VL", SDT_RISCVFP2IOp_RM_VL, [SDNPHasChain]>;
+def riscv_strict_vfcvt_rtz_xu_f_vl : SDNode<"RISCVISD::STRICT_VFCVT_RTZ_XU_F_VL", SDT_RISCVFP2IOp_VL, [SDNPHasChain]>;
+def riscv_strict_vfcvt_rtz_x_f_vl : SDNode<"RISCVISD::STRICT_VFCVT_RTZ_X_F_VL", SDT_RISCVFP2IOp_VL, [SDNPHasChain]>;
+
+def any_riscv_vfcvt_rm_x_f_vl : PatFrags<(ops node:$src, node:$mask, node:$vl, node:$rm),
+ [(riscv_vfcvt_rm_x_f_vl node:$src, node:$mask, node:$vl, node:$rm),
+ (riscv_strict_vfcvt_rm_x_f_vl node:$src, node:$mask, node:$vl, node:$rm)]>;
+def any_riscv_vfcvt_rtz_xu_f_vl : PatFrags<(ops node:$src, node:$mask, node:$vl),
+ [(riscv_vfcvt_rtz_xu_f_vl node:$src, node:$mask, node:$vl),
+ (riscv_strict_vfcvt_rtz_xu_f_vl node:$src, node:$mask, node:$vl)]>;
+def any_riscv_vfcvt_rtz_x_f_vl : PatFrags<(ops node:$src, node:$mask, node:$vl),
+ [(riscv_vfcvt_rtz_x_f_vl node:$src, node:$mask, node:$vl),
+ (riscv_strict_vfcvt_rtz_x_f_vl node:$src, node:$mask, node:$vl)]>;
+
// Int -> Float
def riscv_sint_to_fp_vl : SDNode<"RISCVISD::SINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>;
def riscv_uint_to_fp_vl : SDNode<"RISCVISD::UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL>;
def riscv_vfcvt_rm_f_xu_vl : SDNode<"RISCVISD::VFCVT_RM_F_XU_VL", SDT_RISCVI2FPOp_RM_VL>;
def riscv_vfcvt_rm_f_x_vl : SDNode<"RISCVISD::VFCVT_RM_F_X_VL", SDT_RISCVI2FPOp_RM_VL>;
+def riscv_strict_sint_to_fp_vl : SDNode<"RISCVISD::STRICT_SINT_TO_FP_VL", SDT_RISCVI2FPOp_VL, [SDNPHasChain]>;
+def riscv_strict_uint_to_fp_vl : SDNode<"RISCVISD::STRICT_UINT_TO_FP_VL", SDT_RISCVI2FPOp_VL, [SDNPHasChain]>;
-def riscv_vfround_noexcept_vl: SDNode<"RISCVISD::VFROUND_NOEXCEPT_VL", SDT_RISCVFPUnOp_VL>;
+def any_riscv_sint_to_fp_vl : PatFrags<(ops node:$src, node:$mask, node:$vl),
+ [(riscv_sint_to_fp_vl node:$src, node:$mask, node:$vl),
+ (riscv_strict_sint_to_fp_vl node:$src, node:$mask, node:$vl)]>;
+def any_riscv_uint_to_fp_vl : PatFrags<(ops node:$src, node:$mask, node:$vl),
+ [(riscv_uint_to_fp_vl node:$src, node:$mask, node:$vl),
+ (riscv_strict_uint_to_fp_vl node:$src, node:$mask, node:$vl)]>;
-def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL",
- SDTypeProfile<1, 6, [SDTCVecEltisVT<0, i1>,
- SDTCisVec<1>,
- SDTCisSameNumEltsAs<0, 1>,
- SDTCisSameAs<1, 2>,
- SDTCisVT<3, OtherVT>,
- SDTCisSameAs<0, 4>,
- SDTCisSameAs<0, 5>,
- SDTCisVT<6, XLenVT>]>>;
+def riscv_vfround_noexcept_vl: SDNode<"RISCVISD::VFROUND_NOEXCEPT_VL", SDT_RISCVFPUnOp_VL>;
+def riscv_strict_vfround_noexcept_vl: SDNode<"RISCVISD::STRICT_VFROUND_NOEXCEPT_VL", SDT_RISCVFPUnOp_VL, [SDNPHasChain]>;
+
+def any_riscv_vfround_noexcept_vl : PatFrags<(ops node:$src, node:$mask, node:$vl),
+ [(riscv_vfround_noexcept_vl node:$src, node:$mask, node:$vl),
+ (riscv_strict_vfround_noexcept_vl node:$src, node:$mask, node:$vl)]>;
+
+def riscv_setcc_vl : SDNode<"RISCVISD::SETCC_VL", SDT_RISCVSETCCOP_VL>;
+def riscv_strict_fsetcc_vl : SDNode<"RISCVISD::STRICT_FSETCC_VL", SDT_RISCVSETCCOP_VL, [SDNPHasChain]>;
+def riscv_strict_fsetccs_vl : SDNode<"RISCVISD::STRICT_FSETCCS_VL", SDT_RISCVSETCCOP_VL, [SDNPHasChain]>;
+def any_riscv_fsetcc_vl : PatFrags<(ops node:$lhs, node:$rhs, node:$cc, node:$merge, node:$mask, node:$vl),
+ [(riscv_setcc_vl node:$lhs, node:$rhs, node:$cc, node:$merge, node:$mask, node:$vl),
+ (riscv_strict_fsetcc_vl node:$lhs, node:$rhs, node:$cc, node:$merge, node:$mask, node:$vl)]>;
+def any_riscv_fsetccs_vl : PatFrags<(ops node:$lhs, node:$rhs, node:$cc, node:$merge, node:$mask, node:$vl),
+ [(riscv_setcc_vl node:$lhs, node:$rhs, node:$cc, node:$merge, node:$mask, node:$vl),
+ (riscv_strict_fsetccs_vl node:$lhs, node:$rhs, node:$cc, node:$merge, node:$mask, node:$vl)]>;
def riscv_vrgather_vx_vl : SDNode<"RISCVISD::VRGATHER_VX_VL",
SDTypeProfile<1, 5, [SDTCisVec<0>,
@@ -264,47 +391,91 @@ def riscv_trunc_vector_vl : SDNode<"RISCVISD::TRUNCATE_VECTOR_VL",
SDTCVecEltisVT<2, i1>,
SDTCisVT<3, XLenVT>]>>;
-def SDT_RISCVVWBinOp_VL : SDTypeProfile<1, 5, [SDTCisVec<0>,
- SDTCisSameNumEltsAs<0, 1>,
- SDTCisSameAs<1, 2>,
- SDTCisSameAs<0, 3>,
- SDTCisSameNumEltsAs<1, 4>,
- SDTCVecEltisVT<4, i1>,
- SDTCisVT<5, XLenVT>]>;
-def riscv_vwmul_vl : SDNode<"RISCVISD::VWMUL_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
-def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
-def riscv_vwmulsu_vl : SDNode<"RISCVISD::VWMULSU_VL", SDT_RISCVVWBinOp_VL>;
-def riscv_vwadd_vl : SDNode<"RISCVISD::VWADD_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
-def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
-def riscv_vwsub_vl : SDNode<"RISCVISD::VWSUB_VL", SDT_RISCVVWBinOp_VL, []>;
-def riscv_vwsubu_vl : SDNode<"RISCVISD::VWSUBU_VL", SDT_RISCVVWBinOp_VL, []>;
-
-def SDT_RISCVVNBinOp_VL : SDTypeProfile<1, 5, [SDTCisVec<0>,
- SDTCisSameNumEltsAs<0, 1>,
- SDTCisOpSmallerThanOp<0, 1>,
- SDTCisSameAs<0, 2>,
- SDTCisSameAs<0, 3>,
- SDTCisSameNumEltsAs<0, 4>,
- SDTCVecEltisVT<4, i1>,
- SDTCisVT<5, XLenVT>]>;
-def riscv_vnsrl_vl : SDNode<"RISCVISD::VNSRL_VL", SDT_RISCVVNBinOp_VL>;
-
-def SDT_RISCVVWBinOpW_VL : SDTypeProfile<1, 5, [SDTCisVec<0>,
- SDTCisSameAs<0, 1>,
- SDTCisSameNumEltsAs<1, 2>,
- SDTCisOpSmallerThanOp<2, 1>,
- SDTCisSameAs<0, 3>,
- SDTCisSameNumEltsAs<1, 4>,
- SDTCVecEltisVT<4, i1>,
- SDTCisVT<5, XLenVT>]>;
-def riscv_vwadd_w_vl : SDNode<"RISCVISD::VWADD_W_VL", SDT_RISCVVWBinOpW_VL>;
-def riscv_vwaddu_w_vl : SDNode<"RISCVISD::VWADDU_W_VL", SDT_RISCVVWBinOpW_VL>;
-def riscv_vwsub_w_vl : SDNode<"RISCVISD::VWSUB_W_VL", SDT_RISCVVWBinOpW_VL>;
-def riscv_vwsubu_w_vl : SDNode<"RISCVISD::VWSUBU_W_VL", SDT_RISCVVWBinOpW_VL>;
-
-def SDTRVVVecReduce : SDTypeProfile<1, 5, [
+def SDT_RISCVVWIntBinOp_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisInt<0>,
+ SDTCisInt<1>,
+ SDTCisSameNumEltsAs<0, 1>,
+ SDTCisOpSmallerThanOp<1, 0>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisSameNumEltsAs<1, 4>,
+ SDTCVecEltisVT<4, i1>,
+ SDTCisVT<5, XLenVT>]>;
+def riscv_vwmul_vl : SDNode<"RISCVISD::VWMUL_VL", SDT_RISCVVWIntBinOp_VL, [SDNPCommutative]>;
+def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWIntBinOp_VL, [SDNPCommutative]>;
+def riscv_vwmulsu_vl : SDNode<"RISCVISD::VWMULSU_VL", SDT_RISCVVWIntBinOp_VL>;
+def riscv_vwadd_vl : SDNode<"RISCVISD::VWADD_VL", SDT_RISCVVWIntBinOp_VL, [SDNPCommutative]>;
+def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWIntBinOp_VL, [SDNPCommutative]>;
+def riscv_vwsub_vl : SDNode<"RISCVISD::VWSUB_VL", SDT_RISCVVWIntBinOp_VL, []>;
+def riscv_vwsubu_vl : SDNode<"RISCVISD::VWSUBU_VL", SDT_RISCVVWIntBinOp_VL, []>;
+
+def SDT_RISCVVWIntTernOp_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisInt<0>,
+ SDTCisInt<1>,
+ SDTCisSameNumEltsAs<0, 1>,
+ SDTCisOpSmallerThanOp<1, 0>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisSameNumEltsAs<1, 4>,
+ SDTCVecEltisVT<4, i1>,
+ SDTCisVT<5, XLenVT>]>;
+def riscv_vwmacc_vl : SDNode<"RISCVISD::VWMACC_VL", SDT_RISCVVWIntTernOp_VL, [SDNPCommutative]>;
+def riscv_vwmaccu_vl : SDNode<"RISCVISD::VWMACCU_VL", SDT_RISCVVWIntTernOp_VL, [SDNPCommutative]>;
+def riscv_vwmaccsu_vl : SDNode<"RISCVISD::VWMACCSU_VL", SDT_RISCVVWIntTernOp_VL, []>;
+
+def SDT_RISCVVWFPBinOp_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisFP<0>,
+ SDTCisFP<1>,
+ SDTCisSameNumEltsAs<0, 1>,
+ SDTCisOpSmallerThanOp<1, 0>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisSameNumEltsAs<1, 4>,
+ SDTCVecEltisVT<4, i1>,
+ SDTCisVT<5, XLenVT>]>;
+def riscv_vfwmul_vl : SDNode<"RISCVISD::VFWMUL_VL", SDT_RISCVVWFPBinOp_VL, [SDNPCommutative]>;
+def riscv_vfwadd_vl : SDNode<"RISCVISD::VFWADD_VL", SDT_RISCVVWFPBinOp_VL, [SDNPCommutative]>;
+def riscv_vfwsub_vl : SDNode<"RISCVISD::VFWSUB_VL", SDT_RISCVVWFPBinOp_VL, []>;
+
+def SDT_RISCVVNIntBinOp_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisInt<0>,
+ SDTCisInt<1>,
+ SDTCisSameNumEltsAs<0, 1>,
+ SDTCisOpSmallerThanOp<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisSameNumEltsAs<0, 4>,
+ SDTCVecEltisVT<4, i1>,
+ SDTCisVT<5, XLenVT>]>;
+def riscv_vnsrl_vl : SDNode<"RISCVISD::VNSRL_VL", SDT_RISCVVNIntBinOp_VL>;
+
+def SDT_RISCVVWIntBinOpW_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisInt<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisInt<2>,
+ SDTCisSameNumEltsAs<1, 2>,
+ SDTCisOpSmallerThanOp<2, 1>,
+ SDTCisSameAs<0, 3>,
+ SDTCisSameNumEltsAs<1, 4>,
+ SDTCVecEltisVT<4, i1>,
+ SDTCisVT<5, XLenVT>]>;
+def riscv_vwadd_w_vl : SDNode<"RISCVISD::VWADD_W_VL", SDT_RISCVVWIntBinOpW_VL>;
+def riscv_vwaddu_w_vl : SDNode<"RISCVISD::VWADDU_W_VL", SDT_RISCVVWIntBinOpW_VL>;
+def riscv_vwsub_w_vl : SDNode<"RISCVISD::VWSUB_W_VL", SDT_RISCVVWIntBinOpW_VL>;
+def riscv_vwsubu_w_vl : SDNode<"RISCVISD::VWSUBU_W_VL", SDT_RISCVVWIntBinOpW_VL>;
+
+def SDT_RISCVVWFPBinOpW_VL : SDTypeProfile<1, 5, [SDTCisVec<0>, SDTCisFP<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisFP<2>,
+ SDTCisSameNumEltsAs<1, 2>,
+ SDTCisOpSmallerThanOp<2, 1>,
+ SDTCisSameAs<0, 3>,
+ SDTCisSameNumEltsAs<1, 4>,
+ SDTCVecEltisVT<4, i1>,
+ SDTCisVT<5, XLenVT>]>;
+
+def riscv_vfwadd_w_vl : SDNode<"RISCVISD::VFWADD_W_VL", SDT_RISCVVWFPBinOpW_VL>;
+def riscv_vfwsub_w_vl : SDNode<"RISCVISD::VFWSUB_W_VL", SDT_RISCVVWFPBinOpW_VL>;
+
+def SDTRVVVecReduce : SDTypeProfile<1, 6, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>,
- SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<2, 4>, SDTCisVT<5, XLenVT>
+ SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<2, 4>, SDTCisVT<5, XLenVT>,
+ SDTCisVT<6, XLenVT>
]>;
def riscv_add_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C, node:$D,
@@ -398,12 +569,18 @@ foreach kind = ["ADD", "UMAX", "SMAX", "UMIN", "SMIN", "AND", "OR", "XOR",
// Give explicit Complexity to prefer simm5/uimm5.
def SplatPat : ComplexPattern<vAny, 1, "selectVSplat", [], [], 1>;
-def SplatPat_simm5 : ComplexPattern<vAny, 1, "selectVSplatSimm5", [], [], 2>;
-def SplatPat_uimm5 : ComplexPattern<vAny, 1, "selectVSplatUimm5", [], [], 2>;
+def SplatPat_simm5 : ComplexPattern<vAny, 1, "selectVSplatSimm5", [], [], 3>;
+def SplatPat_uimm5 : ComplexPattern<vAny, 1, "selectVSplatUimmBits<5>", [], [], 3>;
+def SplatPat_uimm6 : ComplexPattern<vAny, 1, "selectVSplatUimmBits<6>", [], [], 3>;
def SplatPat_simm5_plus1
- : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1", [], [], 2>;
+ : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1", [], [], 3>;
def SplatPat_simm5_plus1_nonzero
- : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1NonZero", [], [], 2>;
+ : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1NonZero", [], [], 3>;
+
+def ext_oneuse_SplatPat
+ : ComplexPattern<vAny, 1, "selectExtOneUseVSplat", [], [], 2>;
+
+def SelectFPImm : ComplexPattern<fAny, 1, "selectFPImm", [], [], 1>;
// Ignore the vl operand.
def SplatFPOp : PatFrag<(ops node:$op),
@@ -414,30 +591,65 @@ def sew16simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<16>", []>;
def sew32simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<32>", []>;
def sew64simm5 : ComplexPattern<XLenVT, 1, "selectRVVSimm5<64>", []>;
-multiclass VPatBinaryVL_V<SDNode vop,
- string instruction_name,
- string suffix,
- ValueType result_type,
- ValueType op1_type,
- ValueType op2_type,
- ValueType mask_type,
- int sew,
- LMULInfo vlmul,
- VReg result_reg_class,
- VReg op1_reg_class,
- VReg op2_reg_class> {
- def : Pat<(result_type (vop
- (op1_type op1_reg_class:$rs1),
- (op2_type op2_reg_class:$rs2),
- (result_type result_reg_class:$merge),
- (mask_type V0),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_MASK")
- result_reg_class:$merge,
- op1_reg_class:$rs1,
- op2_reg_class:$rs2,
- (mask_type V0), GPR:$vl, sew, TAIL_AGNOSTIC)>;
-}
+class VPatBinaryVL_V<SDPatternOperator vop,
+ string instruction_name,
+ string suffix,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int log2sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ VReg op2_reg_class,
+ bit isSEWAware = 0>
+ : Pat<(result_type (vop
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_reg_class:$rs2),
+ (result_type result_reg_class:$merge),
+ (mask_type V0),
+ VLOpFrag)),
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#"_"#suffix#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_MASK",
+ instruction_name#"_"#suffix#"_"#vlmul.MX#"_MASK"))
+ result_reg_class:$merge,
+ op1_reg_class:$rs1,
+ op2_reg_class:$rs2,
+ (mask_type V0), GPR:$vl, log2sew, TAIL_AGNOSTIC)>;
+
+class VPatBinaryVL_V_RM<SDPatternOperator vop,
+ string instruction_name,
+ string suffix,
+ ValueType result_type,
+ ValueType op1_type,
+ ValueType op2_type,
+ ValueType mask_type,
+ int log2sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op1_reg_class,
+ VReg op2_reg_class,
+ bit isSEWAware = 0>
+ : Pat<(result_type (vop
+ (op1_type op1_reg_class:$rs1),
+ (op2_type op2_reg_class:$rs2),
+ (result_type result_reg_class:$merge),
+ (mask_type V0),
+ VLOpFrag)),
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#"_"#suffix#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_MASK",
+ instruction_name#"_"#suffix#"_"#vlmul.MX#"_MASK"))
+ result_reg_class:$merge,
+ op1_reg_class:$rs1,
+ op2_reg_class:$rs2,
+ (mask_type V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, log2sew, TAIL_AGNOSTIC)>;
multiclass VPatTiedBinaryNoMaskVL_V<SDNode vop,
string instruction_name,
@@ -470,163 +682,302 @@ multiclass VPatTiedBinaryNoMaskVL_V<SDNode vop,
(!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_TIED")
result_reg_class:$rs1,
op2_reg_class:$rs2,
- GPR:$vl, sew, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
-}
-
-multiclass VPatBinaryVL_XI<SDNode vop,
- string instruction_name,
- string suffix,
- ValueType result_type,
- ValueType vop1_type,
- ValueType vop2_type,
- ValueType mask_type,
- int sew,
- LMULInfo vlmul,
- VReg result_reg_class,
- VReg vop_reg_class,
- ComplexPattern SplatPatKind,
- DAGOperand xop_kind> {
+ GPR:$vl, sew, TU_MU)>;
+}
+
+multiclass VPatTiedBinaryNoMaskVL_V_RM<SDNode vop,
+ string instruction_name,
+ string suffix,
+ ValueType result_type,
+ ValueType op2_type,
+ int sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg op2_reg_class> {
def : Pat<(result_type (vop
- (vop1_type vop_reg_class:$rs1),
- (vop2_type (SplatPatKind (XLenVT xop_kind:$rs2))),
- (result_type result_reg_class:$merge),
- (mask_type V0),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#_#suffix#_# vlmul.MX#"_MASK")
- result_reg_class:$merge,
- vop_reg_class:$rs1,
- xop_kind:$rs2,
- (mask_type V0), GPR:$vl, sew, TAIL_AGNOSTIC)>;
-}
-
-multiclass VPatBinaryVL_VV_VX<SDNode vop, string instruction_name> {
- foreach vti = AllIntegerVectors in {
- defm : VPatBinaryVL_V<vop, instruction_name, "VV",
- vti.Vector, vti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
- vti.RegClass>;
- defm : VPatBinaryVL_XI<vop, instruction_name, "VX",
+ (result_type result_reg_class:$rs1),
+ (op2_type op2_reg_class:$rs2),
+ srcvalue,
+ true_mask,
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_TIED")
+ result_reg_class:$rs1,
+ op2_reg_class:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, sew, TAIL_AGNOSTIC)>;
+ // Tail undisturbed
+ def : Pat<(riscv_vp_merge_vl true_mask,
+ (result_type (vop
+ result_reg_class:$rs1,
+ (op2_type op2_reg_class:$rs2),
+ srcvalue,
+ true_mask,
+ VLOpFrag)),
+ result_reg_class:$rs1, VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_TIED")
+ result_reg_class:$rs1,
+ op2_reg_class:$rs2,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, sew, TU_MU)>;
+}
+
+class VPatBinaryVL_XI<SDPatternOperator vop,
+ string instruction_name,
+ string suffix,
+ ValueType result_type,
+ ValueType vop1_type,
+ ValueType vop2_type,
+ ValueType mask_type,
+ int log2sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg vop_reg_class,
+ ComplexPattern SplatPatKind,
+ DAGOperand xop_kind,
+ bit isSEWAware = 0>
+ : Pat<(result_type (vop
+ (vop1_type vop_reg_class:$rs1),
+ (vop2_type (SplatPatKind (XLenVT xop_kind:$rs2))),
+ (result_type result_reg_class:$merge),
+ (mask_type V0),
+ VLOpFrag)),
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#_#suffix#_#vlmul.MX#"_E"#!shl(1, log2sew)#"_MASK",
+ instruction_name#_#suffix#_#vlmul.MX#"_MASK"))
+ result_reg_class:$merge,
+ vop_reg_class:$rs1,
+ xop_kind:$rs2,
+ (mask_type V0), GPR:$vl, log2sew, TAIL_AGNOSTIC)>;
+
+multiclass VPatBinaryVL_VV_VX<SDPatternOperator vop, string instruction_name,
+ list<VTypeInfo> vtilist = AllIntegerVectors,
+ bit isSEWAware = 0> {
+ foreach vti = vtilist in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : VPatBinaryVL_V<vop, instruction_name, "VV",
vti.Vector, vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
- SplatPat, GPR>;
+ vti.RegClass, isSEWAware>;
+ def : VPatBinaryVL_XI<vop, instruction_name, "VX",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
+ SplatPat, GPR, isSEWAware>;
+ }
}
}
-multiclass VPatBinaryVL_VV_VX_VI<SDNode vop, string instruction_name,
+multiclass VPatBinaryVL_VV_VX_VI<SDPatternOperator vop, string instruction_name,
Operand ImmType = simm5>
: VPatBinaryVL_VV_VX<vop, instruction_name> {
foreach vti = AllIntegerVectors in {
- defm : VPatBinaryVL_XI<vop, instruction_name, "VI",
- vti.Vector, vti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
- !cast<ComplexPattern>(SplatPat#_#ImmType),
- ImmType>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in
+ def : VPatBinaryVL_XI<vop, instruction_name, "VI",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
+ !cast<ComplexPattern>(SplatPat#_#ImmType),
+ ImmType>;
}
}
-multiclass VPatBinaryWVL_VV_VX<SDNode vop, string instruction_name> {
+multiclass VPatBinaryWVL_VV_VX<SDPatternOperator vop, string instruction_name> {
foreach VtiToWti = AllWidenableIntVectors in {
defvar vti = VtiToWti.Vti;
defvar wti = VtiToWti.Wti;
- defm : VPatBinaryVL_V<vop, instruction_name, "VV",
- wti.Vector, vti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, wti.RegClass, vti.RegClass,
- vti.RegClass>;
- defm : VPatBinaryVL_XI<vop, instruction_name, "VX",
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : VPatBinaryVL_V<vop, instruction_name, "VV",
wti.Vector, vti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, wti.RegClass, vti.RegClass,
- SplatPat, GPR>;
+ vti.RegClass>;
+ def : VPatBinaryVL_XI<vop, instruction_name, "VX",
+ wti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, vti.RegClass,
+ SplatPat, GPR>;
+ }
}
}
-multiclass VPatBinaryWVL_VV_VX_WV_WX<SDNode vop, SDNode vop_w,
+
+multiclass VPatBinaryWVL_VV_VX_WV_WX<SDPatternOperator vop, SDNode vop_w,
string instruction_name>
: VPatBinaryWVL_VV_VX<vop, instruction_name> {
foreach VtiToWti = AllWidenableIntVectors in {
defvar vti = VtiToWti.Vti;
defvar wti = VtiToWti.Wti;
- defm : VPatTiedBinaryNoMaskVL_V<vop_w, instruction_name, "WV",
- wti.Vector, vti.Vector, vti.Log2SEW,
- vti.LMul, wti.RegClass, vti.RegClass>;
- defm : VPatBinaryVL_V<vop_w, instruction_name, "WV",
- wti.Vector, wti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, wti.RegClass, wti.RegClass,
- vti.RegClass>;
- defm : VPatBinaryVL_XI<vop_w, instruction_name, "WX",
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ defm : VPatTiedBinaryNoMaskVL_V<vop_w, instruction_name, "WV",
+ wti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, wti.RegClass, vti.RegClass>;
+ def : VPatBinaryVL_V<vop_w, instruction_name, "WV",
wti.Vector, wti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, wti.RegClass, wti.RegClass,
- SplatPat, GPR>;
+ vti.RegClass>;
+ def : VPatBinaryVL_XI<vop_w, instruction_name, "WX",
+ wti.Vector, wti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, wti.RegClass,
+ SplatPat, GPR>;
+ }
}
}
-multiclass VPatBinaryNVL_WV_WX_WI<SDNode vop, string instruction_name> {
+multiclass VPatBinaryNVL_WV_WX_WI<SDPatternOperator vop, string instruction_name> {
foreach VtiToWti = AllWidenableIntVectors in {
defvar vti = VtiToWti.Vti;
defvar wti = VtiToWti.Wti;
- defm : VPatBinaryVL_V<vop, instruction_name, "WV",
- vti.Vector, wti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass, wti.RegClass,
- vti.RegClass>;
- defm : VPatBinaryVL_XI<vop, instruction_name, "WX",
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : VPatBinaryVL_V<vop, instruction_name, "WV",
vti.Vector, wti.Vector, vti.Vector, vti.Mask,
vti.Log2SEW, vti.LMul, vti.RegClass, wti.RegClass,
- SplatPat, GPR>;
- defm : VPatBinaryVL_XI<vop, instruction_name, "WI",
- vti.Vector, wti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass, wti.RegClass,
- !cast<ComplexPattern>(SplatPat#_#uimm5),
- uimm5>;
- }
-}
-
-multiclass VPatBinaryVL_VF<SDNode vop,
- string instruction_name,
- ValueType result_type,
- ValueType vop_type,
- ValueType mask_type,
- int sew,
- LMULInfo vlmul,
- VReg result_reg_class,
- VReg vop_reg_class,
- RegisterClass scalar_reg_class> {
- def : Pat<(result_type (vop (vop_type vop_reg_class:$rs1),
- (vop_type (SplatFPOp scalar_reg_class:$rs2)),
- (result_type result_reg_class:$merge),
- (mask_type V0),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#vlmul.MX#"_MASK")
- result_reg_class:$merge,
- vop_reg_class:$rs1,
- scalar_reg_class:$rs2,
- (mask_type V0), GPR:$vl, sew, TAIL_AGNOSTIC)>;
+ vti.RegClass>;
+ def : VPatBinaryVL_XI<vop, instruction_name, "WX",
+ vti.Vector, wti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, wti.RegClass,
+ SplatPat, GPR>;
+ def : VPatBinaryVL_XI<vop, instruction_name, "WI",
+ vti.Vector, wti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, wti.RegClass,
+ !cast<ComplexPattern>(SplatPat#_#uimm5),
+ uimm5>;
+ }
+ }
}
-multiclass VPatBinaryFPVL_VV_VF<SDNode vop, string instruction_name> {
+class VPatBinaryVL_VF<SDPatternOperator vop,
+ string instruction_name,
+ ValueType result_type,
+ ValueType vop1_type,
+ ValueType vop2_type,
+ ValueType mask_type,
+ int log2sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg vop_reg_class,
+ RegisterClass scalar_reg_class,
+ bit isSEWAware = 0>
+ : Pat<(result_type (vop (vop1_type vop_reg_class:$rs1),
+ (vop2_type (SplatFPOp scalar_reg_class:$rs2)),
+ (result_type result_reg_class:$merge),
+ (mask_type V0),
+ VLOpFrag)),
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_MASK",
+ instruction_name#"_"#vlmul.MX#"_MASK"))
+ result_reg_class:$merge,
+ vop_reg_class:$rs1,
+ scalar_reg_class:$rs2,
+ (mask_type V0), GPR:$vl, log2sew, TAIL_AGNOSTIC)>;
+
+class VPatBinaryVL_VF_RM<SDPatternOperator vop,
+ string instruction_name,
+ ValueType result_type,
+ ValueType vop1_type,
+ ValueType vop2_type,
+ ValueType mask_type,
+ int log2sew,
+ LMULInfo vlmul,
+ VReg result_reg_class,
+ VReg vop_reg_class,
+ RegisterClass scalar_reg_class,
+ bit isSEWAware = 0>
+ : Pat<(result_type (vop (vop1_type vop_reg_class:$rs1),
+ (vop2_type (SplatFPOp scalar_reg_class:$rs2)),
+ (result_type result_reg_class:$merge),
+ (mask_type V0),
+ VLOpFrag)),
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_MASK",
+ instruction_name#"_"#vlmul.MX#"_MASK"))
+ result_reg_class:$merge,
+ vop_reg_class:$rs1,
+ scalar_reg_class:$rs2,
+ (mask_type V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, log2sew, TAIL_AGNOSTIC)>;
+
+multiclass VPatBinaryFPVL_VV_VF<SDPatternOperator vop, string instruction_name,
+ bit isSEWAware = 0> {
foreach vti = AllFloatVectors in {
- defm : VPatBinaryVL_V<vop, instruction_name, "VV",
- vti.Vector, vti.Vector, vti.Vector, vti.Mask,
- vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
- vti.RegClass>;
- defm : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
- vti.Vector, vti.Vector, vti.Mask, vti.Log2SEW,
- vti.LMul, vti.RegClass, vti.RegClass,
- vti.ScalarRegClass>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : VPatBinaryVL_V<vop, instruction_name, "VV",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
+ vti.RegClass, isSEWAware>;
+ def : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
+ vti.ScalarRegClass, isSEWAware>;
+ }
+ }
+}
+
+multiclass VPatBinaryFPVL_VV_VF_RM<SDPatternOperator vop, string instruction_name,
+ bit isSEWAware = 0> {
+ foreach vti = AllFloatVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : VPatBinaryVL_V_RM<vop, instruction_name, "VV",
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
+ vti.RegClass, isSEWAware>;
+ def : VPatBinaryVL_VF_RM<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ vti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, vti.RegClass, vti.RegClass,
+ vti.ScalarRegClass, isSEWAware>;
+ }
}
}
-multiclass VPatBinaryFPVL_R_VF<SDNode vop, string instruction_name> {
+multiclass VPatBinaryFPVL_R_VF<SDPatternOperator vop, string instruction_name,
+ bit isSEWAware = 0> {
foreach fvti = AllFloatVectors in {
+ let Predicates = GetVTypePredicates<fvti>.Predicates in
def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2),
fvti.RegClass:$rs1,
(fvti.Vector fvti.RegClass:$merge),
(fvti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK",
+ instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK"))
fvti.RegClass:$merge,
fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
(fvti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
}
}
+multiclass VPatBinaryFPVL_R_VF_RM<SDPatternOperator vop, string instruction_name,
+ bit isSEWAware = 0> {
+ foreach fvti = AllFloatVectors in {
+ let Predicates = GetVTypePredicates<fvti>.Predicates in
+ def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2),
+ fvti.RegClass:$rs1,
+ (fvti.Vector fvti.RegClass:$merge),
+ (fvti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(
+ !if(isSEWAware,
+ instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_MASK",
+ instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK"))
+ fvti.RegClass:$merge,
+ fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
+ (fvti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+
multiclass VPatIntegerSetCCVL_VV<VTypeInfo vti, string instruction_name,
CondCode cc> {
def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1),
@@ -722,40 +1073,42 @@ multiclass VPatIntegerSetCCVL_VIPlus1_Swappable<VTypeInfo vti,
vti.Log2SEW)>;
}
-multiclass VPatFPSetCCVL_VV_VF_FV<CondCode cc,
+multiclass VPatFPSetCCVL_VV_VF_FV<SDPatternOperator vop, CondCode cc,
string inst_name,
string swapped_op_inst_name> {
foreach fvti = AllFloatVectors in {
- def : Pat<(fvti.Mask (riscv_setcc_vl (fvti.Vector fvti.RegClass:$rs1),
- fvti.RegClass:$rs2,
- cc,
- VR:$merge,
- (fvti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>(inst_name#"_VV_"#fvti.LMul.MX#"_MASK")
- VR:$merge, fvti.RegClass:$rs1,
- fvti.RegClass:$rs2, (fvti.Mask V0),
- GPR:$vl, fvti.Log2SEW)>;
- def : Pat<(fvti.Mask (riscv_setcc_vl (fvti.Vector fvti.RegClass:$rs1),
- (SplatFPOp fvti.ScalarRegClass:$rs2),
- cc,
- VR:$merge,
- (fvti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
- VR:$merge, fvti.RegClass:$rs1,
- fvti.ScalarRegClass:$rs2, (fvti.Mask V0),
- GPR:$vl, fvti.Log2SEW)>;
- def : Pat<(fvti.Mask (riscv_setcc_vl (SplatFPOp fvti.ScalarRegClass:$rs2),
- (fvti.Vector fvti.RegClass:$rs1),
- cc,
- VR:$merge,
- (fvti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
- VR:$merge, fvti.RegClass:$rs1,
- fvti.ScalarRegClass:$rs2, (fvti.Mask V0),
- GPR:$vl, fvti.Log2SEW)>;
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ def : Pat<(fvti.Mask (vop (fvti.Vector fvti.RegClass:$rs1),
+ fvti.RegClass:$rs2,
+ cc,
+ VR:$merge,
+ (fvti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(inst_name#"_VV_"#fvti.LMul.MX#"_MASK")
+ VR:$merge, fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2, (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW)>;
+ def : Pat<(fvti.Mask (vop (fvti.Vector fvti.RegClass:$rs1),
+ (SplatFPOp fvti.ScalarRegClass:$rs2),
+ cc,
+ VR:$merge,
+ (fvti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
+ VR:$merge, fvti.RegClass:$rs1,
+ fvti.ScalarRegClass:$rs2, (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW)>;
+ def : Pat<(fvti.Mask (vop (SplatFPOp fvti.ScalarRegClass:$rs2),
+ (fvti.Vector fvti.RegClass:$rs1),
+ cc,
+ VR:$merge,
+ (fvti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(swapped_op_inst_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
+ VR:$merge, fvti.RegClass:$rs1,
+ fvti.ScalarRegClass:$rs2, (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW)>;
+ }
}
}
@@ -764,6 +1117,8 @@ multiclass VPatExtendVL_V<SDNode vop, string inst_name, string suffix,
foreach vtiTofti = fraction_list in {
defvar vti = vtiTofti.Vti;
defvar fti = vtiTofti.Fti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fti>.Predicates) in
def : Pat<(vti.Vector (vop (fti.Vector fti.RegClass:$rs2),
(fti.Mask V0), VLOpFrag)),
(!cast<Instruction>(inst_name#"_"#suffix#"_"#vti.LMul.MX#"_MASK")
@@ -775,9 +1130,11 @@ multiclass VPatExtendVL_V<SDNode vop, string inst_name, string suffix,
// Single width converting
-multiclass VPatConvertFP2IVL_V<SDNode vop, string instruction_name> {
+multiclass VPatConvertFP2IVL_V<SDPatternOperator vop, string instruction_name> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1),
(fvti.Mask V0),
VLOpFrag)),
@@ -787,9 +1144,30 @@ multiclass VPatConvertFP2IVL_V<SDNode vop, string instruction_name> {
}
}
-multiclass VPatConvertFP2I_RM_VL_V<SDNode vop, string instruction_name> {
+multiclass VPatConvertFP2IVL_V_RM<SDPatternOperator vop, string instruction_name> {
+ foreach fvti = AllFloatVectors in {
+ defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
+ def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_MASK")
+ (ivti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ (fvti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, ivti.Log2SEW, TA_MA)>;
+ }
+}
+
+
+multiclass VPatConvertFP2I_RM_VL_V<SDPatternOperator vop, string instruction_name> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
def : Pat<(ivti.Vector (vop (fvti.Vector fvti.RegClass:$rs1),
(fvti.Mask V0), (XLenVT timm:$frm),
VLOpFrag)),
@@ -800,21 +1178,29 @@ multiclass VPatConvertFP2I_RM_VL_V<SDNode vop, string instruction_name> {
}
}
-multiclass VPatConvertI2FPVL_V<SDNode vop, string instruction_name> {
+multiclass VPatConvertI2FPVL_V_RM<SDPatternOperator vop, string instruction_name> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1),
(ivti.Mask V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1,
- (ivti.Mask V0), GPR:$vl, fvti.Log2SEW, TA_MA)>;
+ (ivti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, fvti.Log2SEW, TA_MA)>;
}
}
multiclass VPatConvertI2FP_RM_VL_V<SDNode vop, string instruction_name> {
foreach fvti = AllFloatVectors in {
defvar ivti = GetIntVTypeInfo<fvti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
def : Pat<(fvti.Vector (vop (ivti.Vector ivti.RegClass:$rs1),
(ivti.Mask V0), (XLenVT timm:$frm),
VLOpFrag)),
@@ -826,10 +1212,12 @@ multiclass VPatConvertI2FP_RM_VL_V<SDNode vop, string instruction_name> {
// Widening converting
-multiclass VPatWConvertFP2IVL_V<SDNode vop, string instruction_name> {
+multiclass VPatWConvertFP2IVL_V<SDPatternOperator vop, string instruction_name> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<iwti>.Predicates) in
def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1),
(fvti.Mask V0),
VLOpFrag)),
@@ -839,10 +1227,32 @@ multiclass VPatWConvertFP2IVL_V<SDNode vop, string instruction_name> {
}
}
+multiclass VPatWConvertFP2IVL_V_RM<SDPatternOperator vop, string instruction_name> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<iwti>.Predicates) in
+ def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
+ (iwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ (fvti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, fvti.Log2SEW, TA_MA)>;
+ }
+}
+
+
multiclass VPatWConvertFP2I_RM_VL_V<SDNode vop, string instruction_name> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<iwti>.Predicates) in
def : Pat<(iwti.Vector (vop (fvti.Vector fvti.RegClass:$rs1),
(fvti.Mask V0), (XLenVT timm:$frm),
VLOpFrag)),
@@ -852,53 +1262,71 @@ multiclass VPatWConvertFP2I_RM_VL_V<SDNode vop, string instruction_name> {
}
}
-multiclass VPatWConvertI2FPVL_V<SDNode vop, string instruction_name> {
+multiclass VPatWConvertI2FPVL_V<SDPatternOperator vop,
+ string instruction_name> {
foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar ivti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<ivti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1),
(ivti.Mask V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_MASK")
(fwti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1,
- (ivti.Mask V0), GPR:$vl, ivti.Log2SEW, TA_MA)>;
+ (ivti.Mask V0),
+ GPR:$vl, ivti.Log2SEW, TA_MA)>;
}
}
-multiclass VPatWConvertI2FP_RM_VL_V<SDNode vop, string instruction_name> {
+// Narrowing converting
+
+multiclass VPatNConvertFP2IVL_W<SDPatternOperator vop,
+ string instruction_name> {
+ // Reuse the same list of types used in the widening nodes, but just swap the
+ // direction of types around so we're converting from Wti -> Vti
foreach vtiToWti = AllWidenableIntToFloatVectors in {
- defvar ivti = vtiToWti.Vti;
+ defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
- def : Pat<(fwti.Vector (vop (ivti.Vector ivti.RegClass:$rs1),
- (ivti.Mask V0), (XLenVT timm:$frm),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_"#ivti.LMul.MX#"_MASK")
- (fwti.Vector (IMPLICIT_DEF)), ivti.RegClass:$rs1,
- (ivti.Mask V0), timm:$frm, GPR:$vl, ivti.Log2SEW, TA_MA)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1),
+ (fwti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
+ (fwti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
}
}
-// Narrowing converting
-
-multiclass VPatNConvertFP2IVL_V<SDNode vop, string instruction_name> {
+multiclass VPatNConvertFP2IVL_W_RM<SDPatternOperator vop,
+ string instruction_name> {
// Reuse the same list of types used in the widening nodes, but just swap the
// direction of types around so we're converting from Wti -> Vti
foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1),
(fwti.Mask V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_"#vti.LMul.MX#"_MASK")
(vti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
- (fwti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
+ (fwti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW, TA_MA)>;
}
}
-multiclass VPatNConvertFP2I_RM_VL_V<SDNode vop, string instruction_name> {
+multiclass VPatNConvertFP2I_RM_VL_W<SDNode vop, string instruction_name> {
foreach vtiToWti = AllWidenableIntToFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar fwti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
def : Pat<(vti.Vector (vop (fwti.Vector fwti.RegClass:$rs1),
(fwti.Mask V0), (XLenVT timm:$frm),
VLOpFrag)),
@@ -908,23 +1336,32 @@ multiclass VPatNConvertFP2I_RM_VL_V<SDNode vop, string instruction_name> {
}
}
-multiclass VPatNConvertI2FPVL_V<SDNode vop, string instruction_name> {
+multiclass VPatNConvertI2FPVL_W_RM<SDPatternOperator vop,
+ string instruction_name> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<iwti>.Predicates) in
def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1),
(iwti.Mask V0),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_"#fvti.LMul.MX#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), iwti.RegClass:$rs1,
- (iwti.Mask V0), GPR:$vl, fvti.Log2SEW, TA_MA)>;
+ (iwti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, fvti.Log2SEW, TA_MA)>;
}
}
-multiclass VPatNConvertI2FP_RM_VL_V<SDNode vop, string instruction_name> {
+multiclass VPatNConvertI2FP_RM_VL_W<SDNode vop, string instruction_name> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
defvar fvti = fvtiToFWti.Vti;
defvar iwti = GetIntVTypeInfo<fvtiToFWti.Wti>.Vti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<iwti>.Predicates) in
def : Pat<(fvti.Vector (vop (iwti.Vector iwti.RegClass:$rs1),
(iwti.Mask V0), (XLenVT timm:$frm),
VLOpFrag)),
@@ -937,84 +1374,164 @@ multiclass VPatNConvertI2FP_RM_VL_V<SDNode vop, string instruction_name> {
multiclass VPatReductionVL<SDNode vop, string instruction_name, bit is_float> {
foreach vti = !if(is_float, AllFloatVectors, AllIntegerVectors) in {
defvar vti_m1 = !cast<VTypeInfo>(!if(is_float, "VF", "VI") # vti.SEW # "M1");
- def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), VR:$rs2,
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX)
- (vti_m1.Vector VR:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (vti_m1.Vector VR:$rs2),
- GPR:$vl, vti.Log2SEW)>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge),
+ (vti.Vector vti.RegClass:$rs1), VR:$rs2,
+ (vti.Mask true_mask), VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
+ (vti_m1.Vector VR:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti_m1.Vector VR:$rs2),
+ GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+
+ def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge),
+ (vti.Vector vti.RegClass:$rs1), VR:$rs2,
+ (vti.Mask V0), VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ (vti_m1.Vector VR:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti_m1.Vector VR:$rs2),
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+ }
+ }
+}
- def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1), VR:$rs2,
- (vti.Mask V0), VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_MASK")
- (vti_m1.Vector VR:$merge),
- (vti.Vector vti.RegClass:$rs1),
- (vti_m1.Vector VR:$rs2),
- (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+multiclass VPatReductionVL_RM<SDNode vop, string instruction_name, bit is_float> {
+ foreach vti = !if(is_float, AllFloatVectors, AllIntegerVectors) in {
+ defvar vti_m1 = !cast<VTypeInfo>(!if(is_float, "VF", "VI") # vti.SEW # "M1");
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge),
+ (vti.Vector vti.RegClass:$rs1), VR:$rs2,
+ (vti.Mask true_mask), VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
+ (vti_m1.Vector VR:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti_m1.Vector VR:$rs2),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+
+ def: Pat<(vti_m1.Vector (vop (vti_m1.Vector VR:$merge),
+ (vti.Vector vti.RegClass:$rs1), VR:$rs2,
+ (vti.Mask V0), VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ (vti_m1.Vector VR:$merge),
+ (vti.Vector vti.RegClass:$rs1),
+ (vti_m1.Vector VR:$rs2),
+ (vti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+ }
}
}
-multiclass VPatBinaryExtVL_WV_WX<SDNode op, PatFrags extop, string instruction_name> {
+multiclass VPatBinaryVL_WV_WX_WI<SDNode op, string instruction_name> {
foreach vtiToWti = AllWidenableIntVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- def : Pat<
- (vti.Vector
- (riscv_trunc_vector_vl
- (op (wti.Vector wti.RegClass:$rs2),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1)))),
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX)
- wti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>;
- def : Pat<
- (vti.Vector
- (riscv_trunc_vector_vl
- (op (wti.Vector wti.RegClass:$rs2),
- (wti.Vector (extop (vti.Vector (SplatPat GPR:$rs1))))),
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
- wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<
+ (vti.Vector
+ (riscv_trunc_vector_vl
+ (op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (ext_oneuse (vti.Vector vti.RegClass:$rs1)))),
+ (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ wti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+
+ def : Pat<
+ (vti.Vector
+ (riscv_trunc_vector_vl
+ (op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (ext_oneuse_SplatPat (XLenVT GPR:$rs1)))),
+ (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+
+ def : Pat<
+ (vti.Vector
+ (riscv_trunc_vector_vl
+ (op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (SplatPat_uimm5 uimm5:$rs1))), (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_WI_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ wti.RegClass:$rs2, uimm5:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ }
}
}
-multiclass VPatBinaryVL_WV_WX_WI<SDNode op, string instruction_name> {
- defm : VPatBinaryExtVL_WV_WX<op, sext_oneuse, instruction_name>;
- defm : VPatBinaryExtVL_WV_WX<op, zext_oneuse, instruction_name>;
- foreach vtiToWti = AllWidenableIntVectors in {
+multiclass VPatWidenReductionVL<SDNode vop, PatFrags extop, string instruction_name, bit is_float> {
+ foreach vtiToWti = !if(is_float, AllWidenableFloatVectors, AllWidenableIntVectors) in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- def : Pat<
- (vti.Vector
- (riscv_trunc_vector_vl
- (op (wti.Vector wti.RegClass:$rs2),
- (wti.Vector (SplatPat_uimm5 uimm5:$rs1))), (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_WI_"#vti.LMul.MX)
- wti.RegClass:$rs2, uimm5:$rs1, GPR:$vl, vti.Log2SEW)>;
+ defvar wti_m1 = !cast<VTypeInfo>(!if(is_float, "VF", "VI") # wti.SEW # "M1");
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
+ VR:$rs2, (vti.Mask true_mask), VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
+ (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
+ (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+ def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
+ VR:$rs2, (vti.Mask V0), VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
+ (wti_m1.Vector VR:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+ }
}
}
-multiclass VPatWidenReductionVL<SDNode vop, PatFrags extop, string instruction_name, bit is_float> {
+multiclass VPatWidenReductionVL_RM<SDNode vop, PatFrags extop, string instruction_name, bit is_float> {
foreach vtiToWti = !if(is_float, AllWidenableFloatVectors, AllWidenableIntVectors) in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
defvar wti_m1 = !cast<VTypeInfo>(!if(is_float, "VF", "VI") # wti.SEW # "M1");
- def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
- VR:$rs2, (vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX)
- (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
- (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW)>;
- def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
- VR:$rs2, (vti.Mask V0), VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_MASK")
- (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
- (wti_m1.Vector VR:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
+ VR:$rs2, (vti.Mask true_mask), VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
+ (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
+ (wti_m1.Vector VR:$rs2),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+ def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1))),
+ VR:$rs2, (vti.Mask V0), VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
+ (wti_m1.Vector VR:$rs2), (vti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+ }
}
}
@@ -1023,197 +1540,284 @@ multiclass VPatWidenReductionVL_Ext_VL<SDNode vop, PatFrags extop, string instru
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
defvar wti_m1 = !cast<VTypeInfo>(!if(is_float, "VF", "VI") # wti.SEW # "M1");
- def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
- VR:$rs2, (vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX)
- (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
- (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW)>;
- def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
- VR:$rs2, (vti.Mask V0), VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_MASK")
- (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
- (wti_m1.Vector VR:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
- }
-}
-
-multiclass VPatWidenBinaryFPVL_VV_VF<SDNode op, PatFrags extop, string instruction_name> {
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
+ VR:$rs2, (vti.Mask true_mask), VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
+ (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
+ (wti_m1.Vector VR:$rs2), GPR:$vl, vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+ def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
+ VR:$rs2, (vti.Mask V0), VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
+ (wti_m1.Vector VR:$rs2), (vti.Mask V0), GPR:$vl, vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+ }
+ }
+}
+
+multiclass VPatWidenReductionVL_Ext_VL_RM<SDNode vop, PatFrags extop, string instruction_name, bit is_float> {
+ foreach vtiToWti = !if(is_float, AllWidenableFloatVectors, AllWidenableIntVectors) in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ defvar wti_m1 = !cast<VTypeInfo>(!if(is_float, "VF", "VI") # wti.SEW # "M1");
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
+ VR:$rs2, (vti.Mask true_mask), VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW)
+ (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
+ (wti_m1.Vector VR:$rs2),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+ def: Pat<(wti_m1.Vector (vop (wti_m1.Vector VR:$merge),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), VLOpFrag)),
+ VR:$rs2, (vti.Mask V0), VLOpFrag,
+ (XLenVT timm:$policy))),
+ (!cast<Instruction>(instruction_name#"_VS_"#vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ (wti_m1.Vector VR:$merge), (vti.Vector vti.RegClass:$rs1),
+ (wti_m1.Vector VR:$rs2), (vti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW,
+ (XLenVT timm:$policy))>;
+ }
+ }
+}
+
+multiclass VPatBinaryFPWVL_VV_VF<SDNode vop, string instruction_name> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
- defvar fvti = fvtiToFWti.Vti;
- defvar fwti = fvtiToFWti.Wti;
- def : Pat<(fwti.Vector (op (fwti.Vector (extop (fvti.Vector fvti.RegClass:$rs2),
- (fvti.Mask true_mask), VLOpFrag)),
- (fwti.Vector (extop (fvti.Vector fvti.RegClass:$rs1),
- (fvti.Mask true_mask), VLOpFrag)),
- srcvalue, (fwti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VV_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs1,
- GPR:$vl, fvti.Log2SEW)>;
- def : Pat<(fwti.Vector (op (fwti.Vector (extop (fvti.Vector fvti.RegClass:$rs2),
- (fvti.Mask true_mask), VLOpFrag)),
- (fwti.Vector (extop (fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)),
- (fvti.Mask true_mask), VLOpFrag)),
- srcvalue, (fwti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.ScalarRegClass:$rs1,
- GPR:$vl, fvti.Log2SEW)>;
- }
-}
-
-multiclass VPatWidenBinaryFPVL_WV_WF<SDNode op, PatFrags extop, string instruction_name> {
+ defvar vti = fvtiToFWti.Vti;
+ defvar wti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : VPatBinaryVL_V<vop, instruction_name, "VV",
+ wti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, vti.RegClass,
+ vti.RegClass>;
+ def : VPatBinaryVL_VF<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ wti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, vti.RegClass,
+ vti.ScalarRegClass>;
+ }
+ }
+}
+
+multiclass VPatBinaryFPWVL_VV_VF_RM<SDNode vop, string instruction_name> {
foreach fvtiToFWti = AllWidenableFloatVectors in {
- defvar fvti = fvtiToFWti.Vti;
- defvar fwti = fvtiToFWti.Wti;
- def : Pat<(fwti.Vector (op (fwti.Vector fwti.RegClass:$rs2),
- (fwti.Vector (extop (fvti.Vector fvti.RegClass:$rs1),
- (fvti.Mask true_mask), VLOpFrag)),
- srcvalue, (fwti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_WV_"#fvti.LMul.MX#"_TIED")
- fwti.RegClass:$rs2, fvti.RegClass:$rs1,
- GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(fwti.Vector (op (fwti.Vector fwti.RegClass:$rs2),
- (fwti.Vector (extop (fvti.Vector (SplatFPOp fvti.ScalarRegClass:$rs1)),
- (fvti.Mask true_mask), VLOpFrag)),
- srcvalue, (fwti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_W"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
- fwti.RegClass:$rs2, fvti.ScalarRegClass:$rs1,
- GPR:$vl, fvti.Log2SEW)>;
+ defvar vti = fvtiToFWti.Vti;
+ defvar wti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : VPatBinaryVL_V_RM<vop, instruction_name, "VV",
+ wti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, vti.RegClass,
+ vti.RegClass>;
+ def : VPatBinaryVL_VF_RM<vop, instruction_name#"_V"#vti.ScalarSuffix,
+ wti.Vector, vti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, vti.RegClass,
+ vti.ScalarRegClass>;
+ }
+ }
+}
+
+multiclass VPatBinaryFPWVL_VV_VF_WV_WF<SDNode vop, SDNode vop_w, string instruction_name>
+ : VPatBinaryFPWVL_VV_VF<vop, instruction_name> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
+ defvar vti = fvtiToFWti.Vti;
+ defvar wti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ defm : VPatTiedBinaryNoMaskVL_V<vop_w, instruction_name, "WV",
+ wti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, wti.RegClass, vti.RegClass>;
+ def : VPatBinaryVL_V<vop_w, instruction_name, "WV",
+ wti.Vector, wti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, wti.RegClass,
+ vti.RegClass>;
+ def : VPatBinaryVL_VF<vop_w, instruction_name#"_W"#vti.ScalarSuffix,
+ wti.Vector, wti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, wti.RegClass,
+ vti.ScalarRegClass>;
+ }
}
}
-multiclass VPatWidenBinaryFPVL_VV_VF_WV_WF<SDNode op, string instruction_name> {
- defm : VPatWidenBinaryFPVL_VV_VF<op, riscv_fpextend_vl_oneuse, instruction_name>;
- defm : VPatWidenBinaryFPVL_WV_WF<op, riscv_fpextend_vl_oneuse, instruction_name>;
+multiclass VPatBinaryFPWVL_VV_VF_WV_WF_RM<SDNode vop, SDNode vop_w, string instruction_name>
+ : VPatBinaryFPWVL_VV_VF_RM<vop, instruction_name> {
+ foreach fvtiToFWti = AllWidenableFloatVectors in {
+ defvar vti = fvtiToFWti.Vti;
+ defvar wti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ defm : VPatTiedBinaryNoMaskVL_V_RM<vop_w, instruction_name, "WV",
+ wti.Vector, vti.Vector, vti.Log2SEW,
+ vti.LMul, wti.RegClass, vti.RegClass>;
+ def : VPatBinaryVL_V_RM<vop_w, instruction_name, "WV",
+ wti.Vector, wti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, wti.RegClass,
+ vti.RegClass>;
+ def : VPatBinaryVL_VF_RM<vop_w, instruction_name#"_W"#vti.ScalarSuffix,
+ wti.Vector, wti.Vector, vti.Vector, vti.Mask,
+ vti.Log2SEW, vti.LMul, wti.RegClass, wti.RegClass,
+ vti.ScalarRegClass>;
+ }
+ }
}
multiclass VPatNarrowShiftSplatExt_WX<SDNode op, PatFrags extop, string instruction_name> {
foreach vtiToWti = AllWidenableIntVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
def : Pat<
(vti.Vector
(riscv_trunc_vector_vl
(op (wti.Vector wti.RegClass:$rs2),
- (wti.Vector (extop (vti.Vector (SplatPat GPR:$rs1)),
+ (wti.Vector (extop (vti.Vector (SplatPat (XLenVT GPR:$rs1))),
(vti.Mask true_mask), VLOpFrag)),
srcvalue, (wti.Mask true_mask), VLOpFrag),
(vti.Mask true_mask), VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
- wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW)>;
+ (vti.Vector (IMPLICIT_DEF)),
+ wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
}
}
+multiclass VPatNarrowShiftExtVL_WV<SDNode op, PatFrags extop, string instruction_name> {
+ foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
+ def : Pat<
+ (vti.Vector
+ (riscv_trunc_vector_vl
+ (op (wti.Vector wti.RegClass:$rs2),
+ (wti.Vector (extop (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask true_mask), VLOpFrag)),
+ srcvalue, (vti.Mask true_mask), VLOpFrag),
+ (vti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
+ }
+}
+
+multiclass VPatNarrowShiftVL_WV<SDNode op, string instruction_name> {
+ defm : VPatNarrowShiftExtVL_WV<op, riscv_sext_vl_oneuse, instruction_name>;
+ defm : VPatNarrowShiftExtVL_WV<op, riscv_zext_vl_oneuse, instruction_name>;
+}
+
multiclass VPatMultiplyAddVL_VV_VX<SDNode op, string instruction_name> {
foreach vti = AllIntegerVectors in {
defvar suffix = vti.LMul.MX;
- // NOTE: We choose VMADD because it has the most commuting freedom. So it
- // works best with how TwoAddressInstructionPass tries commuting.
- def : Pat<(vti.Vector
- (op vti.RegClass:$rs2,
- (riscv_mul_vl_oneuse vti.RegClass:$rs1,
- vti.RegClass:$rd,
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally
- // commutable.
- def : Pat<(vti.Vector
- (op vti.RegClass:$rs2,
- (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1),
- vti.RegClass:$rd,
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VX_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ // NOTE: We choose VMADD because it has the most commuting freedom. So it
+ // works best with how TwoAddressInstructionPass tries commuting.
+ def : Pat<(vti.Vector
+ (op vti.RegClass:$rs2,
+ (riscv_mul_vl_oneuse vti.RegClass:$rs1,
+ vti.RegClass:$rd,
+ srcvalue, (vti.Mask true_mask), VLOpFrag),
+ srcvalue, (vti.Mask true_mask), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix)
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ // The choice of VMADD here is arbitrary, vmadd.vx and vmacc.vx are equally
+ // commutable.
+ def : Pat<(vti.Vector
+ (op vti.RegClass:$rs2,
+ (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1),
+ vti.RegClass:$rd,
+ srcvalue, (vti.Mask true_mask), VLOpFrag),
+ srcvalue, (vti.Mask true_mask), VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VX_" # suffix)
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
}
multiclass VPatMultiplyAccVL_VV_VX<PatFrag op, string instruction_name> {
foreach vti = AllIntegerVectors in {
defvar suffix = vti.LMul.MX;
- def : Pat<(riscv_vp_merge_vl (vti.Mask true_mask),
- (vti.Vector (op vti.RegClass:$rd,
- (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2,
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
- def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
- (vti.Vector (op vti.RegClass:$rd,
- (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2,
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
- def : Pat<(riscv_vp_merge_vl (vti.Mask true_mask),
- (vti.Vector (op vti.RegClass:$rd,
- (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2,
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VX_"# suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
- def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
- (vti.Vector (op vti.RegClass:$rd,
- (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2,
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VX_"# suffix #"_MASK")
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
- def : Pat<(riscv_vselect_vl (vti.Mask V0),
- (vti.Vector (op vti.RegClass:$rd,
- (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2,
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(riscv_vselect_vl (vti.Mask V0),
- (vti.Vector (op vti.RegClass:$rd,
- (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2,
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VX_"# suffix #"_MASK")
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- }
-}
-
-multiclass VPatWidenMultiplyAddVL_VV_VX<PatFrag op1, string instruction_name> {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
+ (vti.Vector (op vti.RegClass:$rd,
+ (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2,
+ srcvalue, (vti.Mask true_mask), VLOpFrag),
+ srcvalue, (vti.Mask true_mask), VLOpFrag)),
+ vti.RegClass:$rd, VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
+ def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
+ (vti.Vector (op vti.RegClass:$rd,
+ (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2,
+ srcvalue, (vti.Mask true_mask), VLOpFrag),
+ srcvalue, (vti.Mask true_mask), VLOpFrag)),
+ vti.RegClass:$rd, VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_VX_"# suffix #"_MASK")
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
+ def : Pat<(riscv_vselect_vl (vti.Mask V0),
+ (vti.Vector (op vti.RegClass:$rd,
+ (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2,
+ srcvalue, (vti.Mask true_mask), VLOpFrag),
+ srcvalue, (vti.Mask true_mask), VLOpFrag)),
+ vti.RegClass:$rd, VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(riscv_vselect_vl (vti.Mask V0),
+ (vti.Vector (op vti.RegClass:$rd,
+ (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2,
+ srcvalue, (vti.Mask true_mask), VLOpFrag),
+ srcvalue, (vti.Mask true_mask), VLOpFrag)),
+ vti.RegClass:$rd, VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_VX_"# suffix #"_MASK")
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+ }
+}
+
+multiclass VPatWidenMultiplyAddVL_VV_VX<SDNode vwmacc_op, string instr_name> {
foreach vtiTowti = AllWidenableIntVectors in {
defvar vti = vtiTowti.Vti;
defvar wti = vtiTowti.Wti;
- def : Pat<(wti.Vector
- (riscv_add_vl wti.RegClass:$rd,
- (op1 vti.RegClass:$rs1,
- (vti.Vector vti.RegClass:$rs2),
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VV_" # vti.LMul.MX)
- wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(wti.Vector
- (riscv_add_vl wti.RegClass:$rd,
- (op1 (SplatPat XLenVT:$rs1),
- (vti.Vector vti.RegClass:$rs2),
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VX_" # vti.LMul.MX)
- wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(vwmacc_op (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ (wti.Vector wti.RegClass:$rd),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>(instr_name#"_VV_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vwmacc_op (SplatPat XLenVT:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ (wti.Vector wti.RegClass:$rd),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>(instr_name#"_VX_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1,
+ vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+ }
}
}
@@ -1221,97 +1825,161 @@ multiclass VPatNarrowShiftSplat_WX_WI<SDNode op, string instruction_name> {
foreach vtiTowti = AllWidenableIntVectors in {
defvar vti = vtiTowti.Vti;
defvar wti = vtiTowti.Wti;
- def : Pat<(vti.Vector (riscv_trunc_vector_vl
- (wti.Vector (op wti.RegClass:$rs1, (SplatPat XLenVT:$rs2),
- srcvalue, true_mask, VLOpFrag)), true_mask, VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
- wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_trunc_vector_vl
- (wti.Vector (op wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2),
- srcvalue, true_mask, VLOpFrag)), true_mask, VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_WI_"#vti.LMul.MX)
- wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(vti.Vector (riscv_trunc_vector_vl
+ (wti.Vector (op wti.RegClass:$rs1, (SplatPat XLenVT:$rs2),
+ srcvalue, true_mask, VLOpFrag)), true_mask, VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ def : Pat<(vti.Vector (riscv_trunc_vector_vl
+ (wti.Vector (op wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2),
+ srcvalue, true_mask, VLOpFrag)), true_mask, VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_WI_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ }
}
}
-multiclass VPatFPMulAddVL_VV_VF<SDNode vop, string instruction_name> {
+multiclass VPatFPMulAddVL_VV_VF<SDPatternOperator vop, string instruction_name> {
foreach vti = AllFloatVectors in {
defvar suffix = vti.LMul.MX;
- def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd,
- vti.RegClass:$rs2, (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd,
- vti.RegClass:$rs2, (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd,
+ vti.RegClass:$rs2, (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
- def : Pat<(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1),
- vti.RegClass:$rd, vti.RegClass:$rs2,
- (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1),
- vti.RegClass:$rd, vti.RegClass:$rs2,
- (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1),
+ vti.RegClass:$rd, vti.RegClass:$rs2,
+ (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
+ }
+ }
+}
+
+multiclass VPatFPMulAddVL_VV_VF_RM<SDPatternOperator vop, string instruction_name> {
+ foreach vti = AllFloatVectors in {
+ defvar suffix = vti.LMul.MX;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rd,
+ vti.RegClass:$rs2, (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1),
+ vti.RegClass:$rd, vti.RegClass:$rs2,
+ (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW, TA_MA)>;
+ }
}
}
multiclass VPatFPMulAccVL_VV_VF<PatFrag vop, string instruction_name> {
foreach vti = AllFloatVectors in {
defvar suffix = vti.LMul.MX;
- def : Pat<(riscv_vp_merge_vl (vti.Mask true_mask),
- (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix)
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
- def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
- (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
- def : Pat<(riscv_vp_merge_vl (vti.Mask true_mask),
- (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2,
- vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix)
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
- def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
- (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2,
- vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_UNDISTURBED_MASK_UNDISTURBED)>;
- def : Pat<(riscv_vselect_vl (vti.Mask V0),
- (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(riscv_vselect_vl (vti.Mask V0),
- (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2,
- vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
+ (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
+ vti.RegClass:$rd, VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
+ def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
+ (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2,
+ vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
+ vti.RegClass:$rd, VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TU_MU)>;
+ def : Pat<(riscv_vselect_vl (vti.Mask V0),
+ (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
+ vti.RegClass:$rd, VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(riscv_vselect_vl (vti.Mask V0),
+ (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2,
+ vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
+ vti.RegClass:$rd, VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+ }
+}
+
+multiclass VPatFPMulAccVL_VV_VF_RM<PatFrag vop, string instruction_name> {
+ foreach vti = AllFloatVectors in {
+ defvar suffix = vti.LMul.MX;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
+ (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
+ vti.RegClass:$rd, VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW, TU_MU)>;
+ def : Pat<(riscv_vp_merge_vl (vti.Mask V0),
+ (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2,
+ vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
+ vti.RegClass:$rd, VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW, TU_MU)>;
+ def : Pat<(riscv_vselect_vl (vti.Mask V0),
+ (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
+ vti.RegClass:$rd, VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(riscv_vselect_vl (vti.Mask V0),
+ (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2,
+ vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
+ vti.RegClass:$rd, VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
+ vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
}
@@ -1319,30 +1987,55 @@ multiclass VPatWidenFPMulAccVL_VV_VF<SDNode vop, string instruction_name> {
foreach vtiToWti = AllWidenableFloatVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
- def : Pat<(vop
- (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs1),
- (vti.Mask true_mask), VLOpFrag)),
- (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), VLOpFrag)),
- (wti.Vector wti.RegClass:$rd), (vti.Mask true_mask),
- VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX)
- wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vop
- (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
- (vti.Mask true_mask), VLOpFrag)),
- (wti.Vector (riscv_fpextend_vl_oneuse
- (vti.Vector vti.RegClass:$rs2),
- (vti.Mask true_mask), VLOpFrag)),
- (wti.Vector wti.RegClass:$rd), (vti.Mask true_mask),
- VLOpFrag),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ (wti.Vector wti.RegClass:$rd), (vti.Mask V0),
+ VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX #"_MASK")
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vop (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
+ (vti.Vector vti.RegClass:$rs2),
+ (wti.Vector wti.RegClass:$rd), (vti.Mask V0),
+ VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX #"_MASK")
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
+ }
+ }
+}
+
+multiclass VPatWidenFPMulAccVL_VV_VF_RM<SDNode vop, string instruction_name> {
+ foreach vtiToWti = AllWidenableFloatVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ (wti.Vector wti.RegClass:$rd), (vti.Mask V0),
+ VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX #"_MASK")
+ wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vop (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
+ (vti.Vector vti.RegClass:$rs2),
+ (wti.Vector wti.RegClass:$rd), (vti.Mask V0),
+ VLOpFrag),
+ (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX #"_MASK")
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW, TA_MA)>;
+ }
}
}
@@ -1350,8 +2043,6 @@ multiclass VPatWidenFPMulAccVL_VV_VF<SDNode vop, string instruction_name> {
// Patterns.
//===----------------------------------------------------------------------===//
-let Predicates = [HasVInstructions] in {
-
// 11. Vector Integer Arithmetic Instructions
// 11.1. Vector Single-Width Integer Add and Subtract
@@ -1360,18 +2051,20 @@ defm : VPatBinaryVL_VV_VX<riscv_sub_vl, "PseudoVSUB">;
// Handle VRSUB specially since it's the only integer binary op with reversed
// pattern operands
foreach vti = AllIntegerVectors in {
- def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
- (vti.Vector vti.RegClass:$rs1),
- vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
- (!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)),
- (vti.Vector vti.RegClass:$rs1),
- vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
- (!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs1, simm5:$rs2,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(riscv_sub_vl (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
+ (vti.Vector vti.RegClass:$rs1),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVRSUB_VX_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(riscv_sub_vl (vti.Vector (SplatPat_simm5 simm5:$rs2)),
+ (vti.Vector vti.RegClass:$rs1),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVRSUB_VI_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1, simm5:$rs2,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
}
// 11.2. Vector Widening Integer Add/Subtract
@@ -1380,6 +2073,33 @@ defm : VPatBinaryWVL_VV_VX_WV_WX<riscv_vwaddu_vl, riscv_vwaddu_w_vl, "PseudoVWAD
defm : VPatBinaryWVL_VV_VX_WV_WX<riscv_vwsub_vl, riscv_vwsub_w_vl, "PseudoVWSUB">;
defm : VPatBinaryWVL_VV_VX_WV_WX<riscv_vwsubu_vl, riscv_vwsubu_w_vl, "PseudoVWSUBU">;
+// shl_vl (ext_vl v, splat 1) is a special case of widening add.
+foreach vtiToWti = AllWidenableIntVectors in {
+ defvar vti = vtiToWti.Vti;
+ defvar wti = vtiToWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in {
+ def : Pat<(riscv_shl_vl (wti.Vector (riscv_sext_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask V0), VLOpFrag)),
+ (wti.Vector (riscv_vmv_v_x_vl
+ (wti.Vector undef), 1, VLOpFrag)),
+ wti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWADD_VV_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(riscv_shl_vl (wti.Vector (riscv_zext_vl_oneuse
+ (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask V0), VLOpFrag)),
+ (wti.Vector (riscv_vmv_v_x_vl
+ (wti.Vector undef), 1, VLOpFrag)),
+ wti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWADDU_VV_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+}
+
// 11.3. Vector Integer Extension
defm : VPatExtendVL_V<riscv_zext_vl, "PseudoVZEXT", "VF2",
AllFractionableVF2IntVectors>;
@@ -1406,11 +2126,13 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_sra_vl, "PseudoVSRA", uimm5>;
foreach vti = AllIntegerVectors in {
// Emit shift by 1 as an add since it might be faster.
+ let Predicates = GetVTypePredicates<vti>.Predicates in
def : Pat<(riscv_shl_vl (vti.Vector vti.RegClass:$rs1),
(riscv_vmv_v_x_vl (vti.Vector undef), 1, (XLenVT srcvalue)),
srcvalue, (vti.Mask true_mask), VLOpFrag),
(!cast<Instruction>("PseudoVADD_VV_"# vti.LMul.MX)
- vti.RegClass:$rs1, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>;
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
}
// 11.7. Vector Narrowing Integer Right Shift Instructions
@@ -1424,11 +2146,16 @@ defm : VPatNarrowShiftSplatExt_WX<riscv_sra_vl, riscv_zext_vl_oneuse, "PseudoVNS
defm : VPatNarrowShiftSplatExt_WX<riscv_srl_vl, riscv_sext_vl_oneuse, "PseudoVNSRL">;
defm : VPatNarrowShiftSplatExt_WX<riscv_srl_vl, riscv_zext_vl_oneuse, "PseudoVNSRL">;
+defm : VPatNarrowShiftVL_WV<riscv_srl_vl, "PseudoVNSRL">;
+defm : VPatNarrowShiftVL_WV<riscv_sra_vl, "PseudoVNSRA">;
+
defm : VPatBinaryNVL_WV_WX_WI<riscv_vnsrl_vl, "PseudoVNSRL">;
foreach vtiTowti = AllWidenableIntVectors in {
defvar vti = vtiTowti.Vti;
defvar wti = vtiTowti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
def : Pat<(vti.Vector (riscv_trunc_vector_vl (wti.Vector wti.RegClass:$rs1),
(vti.Mask V0),
VLOpFrag)),
@@ -1439,39 +2166,41 @@ foreach vtiTowti = AllWidenableIntVectors in {
// 11.8. Vector Integer Comparison Instructions
foreach vti = AllIntegerVectors in {
- defm : VPatIntegerSetCCVL_VV<vti, "PseudoVMSEQ", SETEQ>;
- defm : VPatIntegerSetCCVL_VV<vti, "PseudoVMSNE", SETNE>;
-
- defm : VPatIntegerSetCCVL_VV_Swappable<vti, "PseudoVMSLT", SETLT, SETGT>;
- defm : VPatIntegerSetCCVL_VV_Swappable<vti, "PseudoVMSLTU", SETULT, SETUGT>;
- defm : VPatIntegerSetCCVL_VV_Swappable<vti, "PseudoVMSLE", SETLE, SETGE>;
- defm : VPatIntegerSetCCVL_VV_Swappable<vti, "PseudoVMSLEU", SETULE, SETUGE>;
-
- defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSEQ", SETEQ, SETEQ>;
- defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSNE", SETNE, SETNE>;
- defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSLT", SETLT, SETGT>;
- defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSLTU", SETULT, SETUGT>;
- defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSLE", SETLE, SETGE>;
- defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSLEU", SETULE, SETUGE>;
- defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSGT", SETGT, SETLT>;
- defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSGTU", SETUGT, SETULT>;
- // There is no VMSGE(U)_VX instruction
-
- defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSEQ", SETEQ, SETEQ>;
- defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSNE", SETNE, SETNE>;
- defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSLE", SETLE, SETGE>;
- defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSLEU", SETULE, SETUGE>;
- defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSGT", SETGT, SETLT>;
- defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSGTU", SETUGT, SETULT>;
-
- defm : VPatIntegerSetCCVL_VIPlus1_Swappable<vti, "PseudoVMSLE", SETLT, SETGT,
- SplatPat_simm5_plus1_nonzero>;
- defm : VPatIntegerSetCCVL_VIPlus1_Swappable<vti, "PseudoVMSLEU", SETULT, SETUGT,
- SplatPat_simm5_plus1_nonzero>;
- defm : VPatIntegerSetCCVL_VIPlus1_Swappable<vti, "PseudoVMSGT", SETGE, SETLE,
- SplatPat_simm5_plus1>;
- defm : VPatIntegerSetCCVL_VIPlus1_Swappable<vti, "PseudoVMSGTU", SETUGE, SETULE,
- SplatPat_simm5_plus1_nonzero>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ defm : VPatIntegerSetCCVL_VV<vti, "PseudoVMSEQ", SETEQ>;
+ defm : VPatIntegerSetCCVL_VV<vti, "PseudoVMSNE", SETNE>;
+
+ defm : VPatIntegerSetCCVL_VV_Swappable<vti, "PseudoVMSLT", SETLT, SETGT>;
+ defm : VPatIntegerSetCCVL_VV_Swappable<vti, "PseudoVMSLTU", SETULT, SETUGT>;
+ defm : VPatIntegerSetCCVL_VV_Swappable<vti, "PseudoVMSLE", SETLE, SETGE>;
+ defm : VPatIntegerSetCCVL_VV_Swappable<vti, "PseudoVMSLEU", SETULE, SETUGE>;
+
+ defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSEQ", SETEQ, SETEQ>;
+ defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSNE", SETNE, SETNE>;
+ defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSLT", SETLT, SETGT>;
+ defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSLTU", SETULT, SETUGT>;
+ defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSLE", SETLE, SETGE>;
+ defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSLEU", SETULE, SETUGE>;
+ defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSGT", SETGT, SETLT>;
+ defm : VPatIntegerSetCCVL_VX_Swappable<vti, "PseudoVMSGTU", SETUGT, SETULT>;
+ // There is no VMSGE(U)_VX instruction
+
+ defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSEQ", SETEQ, SETEQ>;
+ defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSNE", SETNE, SETNE>;
+ defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSLE", SETLE, SETGE>;
+ defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSLEU", SETULE, SETUGE>;
+ defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSGT", SETGT, SETLT>;
+ defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSGTU", SETUGT, SETULT>;
+
+ defm : VPatIntegerSetCCVL_VIPlus1_Swappable<vti, "PseudoVMSLE", SETLT, SETGT,
+ SplatPat_simm5_plus1>;
+ defm : VPatIntegerSetCCVL_VIPlus1_Swappable<vti, "PseudoVMSLEU", SETULT, SETUGT,
+ SplatPat_simm5_plus1_nonzero>;
+ defm : VPatIntegerSetCCVL_VIPlus1_Swappable<vti, "PseudoVMSGT", SETGE, SETLE,
+ SplatPat_simm5_plus1>;
+ defm : VPatIntegerSetCCVL_VIPlus1_Swappable<vti, "PseudoVMSGTU", SETUGE, SETULE,
+ SplatPat_simm5_plus1_nonzero>;
+ }
} // foreach vti = AllIntegerVectors
// 11.9. Vector Integer Min/Max Instructions
@@ -1482,14 +2211,19 @@ defm : VPatBinaryVL_VV_VX<riscv_smax_vl, "PseudoVMAX">;
// 11.10. Vector Single-Width Integer Multiply Instructions
defm : VPatBinaryVL_VV_VX<riscv_mul_vl, "PseudoVMUL">;
-defm : VPatBinaryVL_VV_VX<riscv_mulhs_vl, "PseudoVMULH">;
-defm : VPatBinaryVL_VV_VX<riscv_mulhu_vl, "PseudoVMULHU">;
+defm : VPatBinaryVL_VV_VX<riscv_mulhs_vl, "PseudoVMULH", IntegerVectorsExceptI64>;
+defm : VPatBinaryVL_VV_VX<riscv_mulhu_vl, "PseudoVMULHU", IntegerVectorsExceptI64>;
+// vsmul.vv and vsmul.vx are not included in EEW=64 in Zve64*.
+let Predicates = [HasVInstructionsFullMultiply] in {
+ defm : VPatBinaryVL_VV_VX<riscv_mulhs_vl, "PseudoVMULH", I64IntegerVectors>;
+ defm : VPatBinaryVL_VV_VX<riscv_mulhu_vl, "PseudoVMULHU", I64IntegerVectors>;
+}
// 11.11. Vector Integer Divide Instructions
-defm : VPatBinaryVL_VV_VX<riscv_udiv_vl, "PseudoVDIVU">;
-defm : VPatBinaryVL_VV_VX<riscv_sdiv_vl, "PseudoVDIV">;
-defm : VPatBinaryVL_VV_VX<riscv_urem_vl, "PseudoVREMU">;
-defm : VPatBinaryVL_VV_VX<riscv_srem_vl, "PseudoVREM">;
+defm : VPatBinaryVL_VV_VX<riscv_udiv_vl, "PseudoVDIVU", isSEWAware=1>;
+defm : VPatBinaryVL_VV_VX<riscv_sdiv_vl, "PseudoVDIV", isSEWAware=1>;
+defm : VPatBinaryVL_VV_VX<riscv_urem_vl, "PseudoVREMU", isSEWAware=1>;
+defm : VPatBinaryVL_VV_VX<riscv_srem_vl, "PseudoVREM", isSEWAware=1>;
// 11.12. Vector Widening Integer Multiply Instructions
defm : VPatBinaryWVL_VV_VX<riscv_vwmul_vl, "PseudoVWMUL">;
@@ -1503,91 +2237,96 @@ defm : VPatMultiplyAccVL_VV_VX<riscv_add_vl_oneuse, "PseudoVMACC">;
defm : VPatMultiplyAccVL_VV_VX<riscv_sub_vl_oneuse, "PseudoVNMSAC">;
// 11.14. Vector Widening Integer Multiply-Add Instructions
-defm : VPatWidenMultiplyAddVL_VV_VX<riscv_vwmul_vl_oneuse, "PseudoVWMACC">;
-defm : VPatWidenMultiplyAddVL_VV_VX<riscv_vwmulu_vl_oneuse, "PseudoVWMACCU">;
-defm : VPatWidenMultiplyAddVL_VV_VX<riscv_vwmulsu_vl_oneuse, "PseudoVWMACCSU">;
+defm : VPatWidenMultiplyAddVL_VV_VX<riscv_vwmacc_vl, "PseudoVWMACC">;
+defm : VPatWidenMultiplyAddVL_VV_VX<riscv_vwmaccu_vl, "PseudoVWMACCU">;
+defm : VPatWidenMultiplyAddVL_VV_VX<riscv_vwmaccsu_vl, "PseudoVWMACCSU">;
foreach vtiTowti = AllWidenableIntVectors in {
defvar vti = vtiTowti.Vti;
defvar wti = vtiTowti.Wti;
- def : Pat<(wti.Vector
- (riscv_add_vl wti.RegClass:$rd,
- (riscv_vwmulsu_vl_oneuse (vti.Vector vti.RegClass:$rs1),
- (SplatPat XLenVT:$rs2),
- srcvalue,
- (vti.Mask true_mask),
- VLOpFrag),
- srcvalue, (vti.Mask true_mask),VLOpFrag)),
- (!cast<Instruction>("PseudoVWMACCUS_VX_" # vti.LMul.MX)
- wti.RegClass:$rd, vti.ScalarRegClass:$rs2, vti.RegClass:$rs1,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<wti>.Predicates) in
+ def : Pat<(riscv_vwmaccsu_vl (vti.Vector vti.RegClass:$rs1),
+ (SplatPat XLenVT:$rs2),
+ (wti.Vector wti.RegClass:$rd),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVWMACCUS_VX_"#vti.LMul.MX#"_MASK")
+ wti.RegClass:$rd, vti.ScalarRegClass:$rs2, vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
}
// 11.15. Vector Integer Merge Instructions
foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
- vti.RegClass:$rs1,
- vti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX)
- vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0),
- GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
- (SplatPat XLenVT:$rs1),
- vti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX)
- vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
- (SplatPat_simm5 simm5:$rs1),
- vti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX)
- vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
- vti.RegClass:$rs1,
- vti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX#"_TU")
- vti.RegClass:$rs2, vti.RegClass:$rs2, vti.RegClass:$rs1,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
- (SplatPat XLenVT:$rs1),
- vti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX#"_TU")
- vti.RegClass:$rs2, vti.RegClass:$rs2, GPR:$rs1,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
- (SplatPat_simm5 simm5:$rs1),
- vti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX#"_TU")
- vti.RegClass:$rs2, vti.RegClass:$rs2, simm5:$rs1,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
+ vti.RegClass:$rs1,
+ vti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2, vti.RegClass:$rs1, (vti.Mask V0),
+ GPR:$vl, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
+ (SplatPat XLenVT:$rs1),
+ vti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2, GPR:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (riscv_vselect_vl (vti.Mask V0),
+ (SplatPat_simm5 simm5:$rs1),
+ vti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
+ vti.RegClass:$rs1,
+ vti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX)
+ vti.RegClass:$rs2, vti.RegClass:$rs2, vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
+ (SplatPat XLenVT:$rs1),
+ vti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX)
+ vti.RegClass:$rs2, vti.RegClass:$rs2, GPR:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
+ (SplatPat_simm5 simm5:$rs1),
+ vti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX)
+ vti.RegClass:$rs2, vti.RegClass:$rs2, simm5:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+ }
}
// 11.16. Vector Integer Move Instructions
+foreach vti = AllVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (riscv_vmv_v_v_vl vti.RegClass:$passthru,
+ vti.RegClass:$rs2, VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_V_"#vti.LMul.MX)
+ vti.RegClass:$passthru, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+}
+
foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), GPR:$rs2, VLOpFrag)),
- (!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX)
- $rs2, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.Vector:$passthru, GPR:$rs2, VLOpFrag)),
- (!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX#"_TU")
- $passthru, $rs2, GPR:$vl, vti.Log2SEW)>;
- defvar ImmPat = !cast<ComplexPattern>("sew"#vti.SEW#"simm5");
- def : Pat<(vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), (ImmPat XLenVT:$imm5),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX)
- XLenVT:$imm5, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.Vector:$passthru, (ImmPat XLenVT:$imm5),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX#"_TU")
- $passthru, XLenVT:$imm5, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.RegClass:$passthru, GPR:$rs2, VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_X_"#vti.LMul.MX)
+ vti.RegClass:$passthru, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ defvar ImmPat = !cast<ComplexPattern>("sew"#vti.SEW#"simm5");
+ def : Pat<(vti.Vector (riscv_vmv_v_x_vl vti.RegClass:$passthru, (ImmPat simm5:$imm5),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_I_"#vti.LMul.MX)
+ vti.RegClass:$passthru, simm5:$imm5, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ }
}
// 12. Vector Fixed-Point Arithmetic Instructions
@@ -1598,430 +2337,460 @@ defm : VPatBinaryVL_VV_VX_VI<riscv_uaddsat_vl, "PseudoVSADDU">;
defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
-} // Predicates = [HasVInstructions]
-
// 13. Vector Floating-Point Instructions
-let Predicates = [HasVInstructionsAnyF] in {
-
// 13.2. Vector Single-Width Floating-Point Add/Subtract Instructions
-defm : VPatBinaryFPVL_VV_VF<riscv_fadd_vl, "PseudoVFADD">;
-defm : VPatBinaryFPVL_VV_VF<riscv_fsub_vl, "PseudoVFSUB">;
-defm : VPatBinaryFPVL_R_VF<riscv_fsub_vl, "PseudoVFRSUB">;
+defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fadd_vl, "PseudoVFADD">;
+defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fsub_vl, "PseudoVFSUB">;
+defm : VPatBinaryFPVL_R_VF_RM<any_riscv_fsub_vl, "PseudoVFRSUB">;
// 13.3. Vector Widening Floating-Point Add/Subtract Instructions
-defm : VPatWidenBinaryFPVL_VV_VF_WV_WF<riscv_fadd_vl, "PseudoVFWADD">;
-defm : VPatWidenBinaryFPVL_VV_VF_WV_WF<riscv_fsub_vl, "PseudoVFWSUB">;
+defm : VPatBinaryFPWVL_VV_VF_WV_WF_RM<riscv_vfwadd_vl, riscv_vfwadd_w_vl, "PseudoVFWADD">;
+defm : VPatBinaryFPWVL_VV_VF_WV_WF_RM<riscv_vfwsub_vl, riscv_vfwsub_w_vl, "PseudoVFWSUB">;
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
-defm : VPatBinaryFPVL_VV_VF<riscv_fmul_vl, "PseudoVFMUL">;
-defm : VPatBinaryFPVL_VV_VF<riscv_fdiv_vl, "PseudoVFDIV">;
-defm : VPatBinaryFPVL_R_VF<riscv_fdiv_vl, "PseudoVFRDIV">;
+defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fmul_vl, "PseudoVFMUL">;
+defm : VPatBinaryFPVL_VV_VF_RM<any_riscv_fdiv_vl, "PseudoVFDIV", isSEWAware=1>;
+defm : VPatBinaryFPVL_R_VF_RM<any_riscv_fdiv_vl, "PseudoVFRDIV", isSEWAware=1>;
// 13.5. Vector Widening Floating-Point Multiply Instructions
-defm : VPatWidenBinaryFPVL_VV_VF<riscv_fmul_vl, riscv_fpextend_vl_oneuse, "PseudoVFWMUL">;
+defm : VPatBinaryFPWVL_VV_VF_RM<riscv_vfwmul_vl, "PseudoVFWMUL">;
// 13.6 Vector Single-Width Floating-Point Fused Multiply-Add Instructions.
-defm : VPatFPMulAddVL_VV_VF<riscv_vfmadd_vl, "PseudoVFMADD">;
-defm : VPatFPMulAddVL_VV_VF<riscv_vfmsub_vl, "PseudoVFMSUB">;
-defm : VPatFPMulAddVL_VV_VF<riscv_vfnmadd_vl, "PseudoVFNMADD">;
-defm : VPatFPMulAddVL_VV_VF<riscv_vfnmsub_vl, "PseudoVFNMSUB">;
-defm : VPatFPMulAccVL_VV_VF<riscv_vfmadd_vl_oneuse, "PseudoVFMACC">;
-defm : VPatFPMulAccVL_VV_VF<riscv_vfmsub_vl_oneuse, "PseudoVFMSAC">;
-defm : VPatFPMulAccVL_VV_VF<riscv_vfnmadd_vl_oneuse, "PseudoVFNMACC">;
-defm : VPatFPMulAccVL_VV_VF<riscv_vfnmsub_vl_oneuse, "PseudoVFNMSAC">;
+defm : VPatFPMulAddVL_VV_VF_RM<any_riscv_vfmadd_vl, "PseudoVFMADD">;
+defm : VPatFPMulAddVL_VV_VF_RM<any_riscv_vfmsub_vl, "PseudoVFMSUB">;
+defm : VPatFPMulAddVL_VV_VF_RM<any_riscv_vfnmadd_vl, "PseudoVFNMADD">;
+defm : VPatFPMulAddVL_VV_VF_RM<any_riscv_vfnmsub_vl, "PseudoVFNMSUB">;
+defm : VPatFPMulAccVL_VV_VF_RM<riscv_vfmadd_vl_oneuse, "PseudoVFMACC">;
+defm : VPatFPMulAccVL_VV_VF_RM<riscv_vfmsub_vl_oneuse, "PseudoVFMSAC">;
+defm : VPatFPMulAccVL_VV_VF_RM<riscv_vfnmadd_vl_oneuse, "PseudoVFNMACC">;
+defm : VPatFPMulAccVL_VV_VF_RM<riscv_vfnmsub_vl_oneuse, "PseudoVFNMSAC">;
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
-defm : VPatWidenFPMulAccVL_VV_VF<riscv_vfmadd_vl, "PseudoVFWMACC">;
-defm : VPatWidenFPMulAccVL_VV_VF<riscv_vfnmadd_vl, "PseudoVFWNMACC">;
-defm : VPatWidenFPMulAccVL_VV_VF<riscv_vfmsub_vl, "PseudoVFWMSAC">;
-defm : VPatWidenFPMulAccVL_VV_VF<riscv_vfnmsub_vl, "PseudoVFWNMSAC">;
+defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwmadd_vl, "PseudoVFWMACC">;
+defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwnmadd_vl, "PseudoVFWNMACC">;
+defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwmsub_vl, "PseudoVFWMSAC">;
+defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwnmsub_vl, "PseudoVFWNMSAC">;
// 13.11. Vector Floating-Point MIN/MAX Instructions
defm : VPatBinaryFPVL_VV_VF<riscv_fminnum_vl, "PseudoVFMIN">;
defm : VPatBinaryFPVL_VV_VF<riscv_fmaxnum_vl, "PseudoVFMAX">;
// 13.13. Vector Floating-Point Compare Instructions
-defm : VPatFPSetCCVL_VV_VF_FV<SETEQ, "PseudoVMFEQ", "PseudoVMFEQ">;
-defm : VPatFPSetCCVL_VV_VF_FV<SETOEQ, "PseudoVMFEQ", "PseudoVMFEQ">;
-
-defm : VPatFPSetCCVL_VV_VF_FV<SETNE, "PseudoVMFNE", "PseudoVMFNE">;
-defm : VPatFPSetCCVL_VV_VF_FV<SETUNE, "PseudoVMFNE", "PseudoVMFNE">;
-
-defm : VPatFPSetCCVL_VV_VF_FV<SETLT, "PseudoVMFLT", "PseudoVMFGT">;
-defm : VPatFPSetCCVL_VV_VF_FV<SETOLT, "PseudoVMFLT", "PseudoVMFGT">;
-
-defm : VPatFPSetCCVL_VV_VF_FV<SETLE, "PseudoVMFLE", "PseudoVMFGE">;
-defm : VPatFPSetCCVL_VV_VF_FV<SETOLE, "PseudoVMFLE", "PseudoVMFGE">;
+defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetcc_vl, SETEQ,
+ "PseudoVMFEQ", "PseudoVMFEQ">;
+defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetcc_vl, SETOEQ,
+ "PseudoVMFEQ", "PseudoVMFEQ">;
+defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetcc_vl, SETNE,
+ "PseudoVMFNE", "PseudoVMFNE">;
+defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetcc_vl, SETUNE,
+ "PseudoVMFNE", "PseudoVMFNE">;
+defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetccs_vl, SETLT,
+ "PseudoVMFLT", "PseudoVMFGT">;
+defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetccs_vl, SETOLT,
+ "PseudoVMFLT", "PseudoVMFGT">;
+defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetccs_vl, SETLE,
+ "PseudoVMFLE", "PseudoVMFGE">;
+defm : VPatFPSetCCVL_VV_VF_FV<any_riscv_fsetccs_vl, SETOLE,
+ "PseudoVMFLE", "PseudoVMFGE">;
foreach vti = AllFloatVectors in {
- // 13.8. Vector Floating-Point Square-Root Instruction
- def : Pat<(riscv_fsqrt_vl (vti.Vector vti.RegClass:$rs2), (vti.Mask V0),
- VLOpFrag),
- (!cast<Instruction>("PseudoVFSQRT_V_"# vti.LMul.MX #"_MASK")
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
-
- // 13.12. Vector Floating-Point Sign-Injection Instructions
- def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask V0),
- VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX #"_MASK")
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
- vti.RegClass:$rs, (vti.Mask V0), GPR:$vl, vti.Log2SEW,
- TA_MA)>;
- // Handle fneg with VFSGNJN using the same input for both operands.
- def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask V0),
- VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX #"_MASK")
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
- vti.RegClass:$rs, (vti.Mask V0), GPR:$vl, vti.Log2SEW,
- TA_MA)>;
-
- def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
- (vti.Vector vti.RegClass:$rs2),
- vti.RegClass:$merge,
- (vti.Mask V0),
- VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs1,
- vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW,
- TAIL_AGNOSTIC)>;
-
- def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
- (riscv_fneg_vl vti.RegClass:$rs2,
- (vti.Mask true_mask),
- VLOpFrag),
- srcvalue,
- (vti.Mask true_mask),
- VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
- vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
- (SplatFPOp vti.ScalarRegClass:$rs2),
- vti.RegClass:$merge,
- (vti.Mask V0),
- VLOpFrag),
- (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs1,
- vti.ScalarRegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW,
- TAIL_AGNOSTIC)>;
-
- // Rounding without exception to implement nearbyint.
- def : Pat<(riscv_vfround_noexcept_vl (vti.Vector vti.RegClass:$rs1),
- (vti.Mask V0), VLOpFrag),
- (!cast<Instruction>("PseudoVFROUND_NOEXCEPT_V_" # vti.LMul.MX #"_MASK")
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ // 13.8. Vector Floating-Point Square-Root Instruction
+ def : Pat<(any_riscv_fsqrt_vl (vti.Vector vti.RegClass:$rs2), (vti.Mask V0),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSQRT_V_"# vti.LMul.MX # "_E" # vti.SEW # "_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2,
+ (vti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+ // 13.12. Vector Floating-Point Sign-Injection Instructions
+ def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask V0),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJX_VV_"# vti.LMul.MX #"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
+ vti.RegClass:$rs, (vti.Mask V0), GPR:$vl, vti.Log2SEW,
+ TA_MA)>;
+ // Handle fneg with VFSGNJN using the same input for both operands.
+ def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask V0),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX #"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
+ vti.RegClass:$rs, (vti.Mask V0), GPR:$vl, vti.Log2SEW,
+ TA_MA)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ vti.RegClass:$merge,
+ (vti.Mask V0),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJ_VV_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1,
+ vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (riscv_fneg_vl vti.RegClass:$rs2,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ srcvalue,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJN_VV_"# vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (SplatFPOp vti.ScalarRegClass:$rs2),
+ vti.RegClass:$merge,
+ (vti.Mask V0),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1,
+ vti.ScalarRegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+
+ // Rounding without exception to implement nearbyint.
+ def : Pat<(any_riscv_vfround_noexcept_vl (vti.Vector vti.RegClass:$rs1),
+ (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVFROUND_NOEXCEPT_V_" # vti.LMul.MX #"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+ // 14.14. Vector Floating-Point Classify Instruction
+ def : Pat<(riscv_fclass_vl (vti.Vector vti.RegClass:$rs2),
+ (vti.Mask true_mask), VLOpFrag),
+ (!cast<Instruction>("PseudoVFCLASS_V_"# vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ }
}
foreach fvti = AllFloatVectors in {
// Floating-point vselects:
// 11.15. Vector Integer Merge Instructions
// 13.15. Vector Floating-Point Merge Instruction
- def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
- fvti.RegClass:$rs1,
- fvti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
- GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
- (SplatFPOp fvti.ScalarRegClass:$rs1),
- fvti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
- fvti.RegClass:$rs2,
- (fvti.Scalar fvti.ScalarRegClass:$rs1),
- (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
- (SplatFPOp (fvti.Scalar fpimm0)),
- fvti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
- fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
- fvti.RegClass:$rs1,
- fvti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX#"_TU")
- fvti.RegClass:$rs2, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
- GPR:$vl, fvti.Log2SEW)>;
+ let Predicates = GetVTypePredicates<fvti>.Predicates in {
+ def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
+ fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
+ (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))),
+ fvti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VXM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2,
+ GPR:$imm,
+ (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vselect_vl (fvti.Mask V0),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
- (SplatFPOp fvti.ScalarRegClass:$rs1),
- fvti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX#"_TU")
- fvti.RegClass:$rs2, fvti.RegClass:$rs2,
- (fvti.Scalar fvti.ScalarRegClass:$rs1),
- (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
- (SplatFPOp (fvti.Scalar fpimm0)),
- fvti.RegClass:$rs2,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU")
- fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0),
- GPR:$vl, fvti.Log2SEW)>;
-
- // 13.16. Vector Floating-Point Move Instruction
- // If we're splatting fpimm0, use vmv.v.x vd, x0.
- def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
- (fvti.Vector undef), (fvti.Scalar (fpimm0)), VLOpFrag)),
- (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
- 0, GPR:$vl, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
- fvti.Vector:$passthru, (fvti.Scalar (fpimm0)), VLOpFrag)),
- (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX#"_TU")
- $passthru, 0, GPR:$vl, fvti.Log2SEW)>;
-
- def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
- (fvti.Vector undef), (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)),
- (!cast<Instruction>("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" #
- fvti.LMul.MX)
- (fvti.Scalar fvti.ScalarRegClass:$rs2),
- GPR:$vl, fvti.Log2SEW)>;
- def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
- fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)),
- (!cast<Instruction>("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" #
- fvti.LMul.MX # "_TU")
- $passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2),
- GPR:$vl, fvti.Log2SEW)>;
-
- // 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
- defm : VPatConvertFP2IVL_V<riscv_vfcvt_xu_f_vl, "PseudoVFCVT_XU_F_V">;
- defm : VPatConvertFP2IVL_V<riscv_vfcvt_x_f_vl, "PseudoVFCVT_X_F_V">;
- defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFCVT_RM_XU_F_V">;
- defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl, "PseudoVFCVT_RM_X_F_V">;
-
- defm : VPatConvertFP2IVL_V<riscv_vfcvt_rtz_xu_f_vl, "PseudoVFCVT_RTZ_XU_F_V">;
- defm : VPatConvertFP2IVL_V<riscv_vfcvt_rtz_x_f_vl, "PseudoVFCVT_RTZ_X_F_V">;
-
- defm : VPatConvertI2FPVL_V<riscv_uint_to_fp_vl, "PseudoVFCVT_F_XU_V">;
- defm : VPatConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFCVT_F_X_V">;
-
- defm : VPatConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_xu_vl, "PseudoVFCVT_RM_F_XU_V">;
- defm : VPatConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_x_vl, "PseudoVFCVT_RM_F_X_V">;
-
- // 13.18. Widening Floating-Point/Integer Type-Convert Instructions
- defm : VPatWConvertFP2IVL_V<riscv_vfcvt_xu_f_vl, "PseudoVFWCVT_XU_F_V">;
- defm : VPatWConvertFP2IVL_V<riscv_vfcvt_x_f_vl, "PseudoVFWCVT_X_F_V">;
- defm : VPatWConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFWCVT_RM_XU_F_V">;
- defm : VPatWConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl, "PseudoVFWCVT_RM_X_F_V">;
-
- defm : VPatWConvertFP2IVL_V<riscv_vfcvt_rtz_xu_f_vl, "PseudoVFWCVT_RTZ_XU_F_V">;
- defm : VPatWConvertFP2IVL_V<riscv_vfcvt_rtz_x_f_vl, "PseudoVFWCVT_RTZ_X_F_V">;
-
- defm : VPatWConvertI2FPVL_V<riscv_uint_to_fp_vl, "PseudoVFWCVT_F_XU_V">;
- defm : VPatWConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFWCVT_F_X_V">;
-
- defm : VPatWConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_xu_vl, "PseudoVFWCVT_RM_F_XU_V">;
- defm : VPatWConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_x_vl, "PseudoVFWCVT_RM_F_X_V">;
+ def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
+ fvti.RegClass:$rs1,
+ fvti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX)
+ fvti.RegClass:$rs2, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW)>;
- foreach fvtiToFWti = AllWidenableFloatVectors in {
- defvar fvti = fvtiToFWti.Vti;
- defvar fwti = fvtiToFWti.Wti;
- def : Pat<(fwti.Vector (riscv_fpextend_vl (fvti.Vector fvti.RegClass:$rs1),
- (fvti.Mask V0),
+ def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
+ (SplatFPOp fvti.ScalarRegClass:$rs1),
+ fvti.RegClass:$rs2,
VLOpFrag)),
- (!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX#"_MASK")
- (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
- (fvti.Mask V0), GPR:$vl, fvti.Log2SEW, TA_MA)>;
+ (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX)
+ fvti.RegClass:$rs2, fvti.RegClass:$rs2,
+ (fvti.Scalar fvti.ScalarRegClass:$rs1),
+ (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
+
+ def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
+ (SplatFPOp (fvti.Scalar fpimm0)),
+ fvti.RegClass:$rs2,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
+ fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW)>;
+
+ // 13.16. Vector Floating-Point Move Instruction
+ // If we're splatting fpimm0, use vmv.v.x vd, x0.
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar (fpimm0)), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_I_"#fvti.LMul.MX)
+ $passthru, 0, GPR:$vl, fvti.Log2SEW, TU_MU)>;
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar (SelectFPImm (XLenVT GPR:$imm))), VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_V_X_"#fvti.LMul.MX)
+ $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>;
+
+ def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
+ fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" #
+ fvti.LMul.MX)
+ $passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2),
+ GPR:$vl, fvti.Log2SEW, TU_MU)>;
}
+}
- // 13.19 Narrowing Floating-Point/Integer Type-Convert Instructions
- defm : VPatNConvertFP2IVL_V<riscv_vfcvt_xu_f_vl, "PseudoVFNCVT_XU_F_W">;
- defm : VPatNConvertFP2IVL_V<riscv_vfcvt_x_f_vl, "PseudoVFNCVT_X_F_W">;
- defm : VPatNConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFNCVT_RM_XU_F_W">;
- defm : VPatNConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl, "PseudoVFNCVT_RM_X_F_W">;
-
- defm : VPatNConvertFP2IVL_V<riscv_vfcvt_rtz_xu_f_vl, "PseudoVFNCVT_RTZ_XU_F_W">;
- defm : VPatNConvertFP2IVL_V<riscv_vfcvt_rtz_x_f_vl, "PseudoVFNCVT_RTZ_X_F_W">;
-
- defm : VPatNConvertI2FPVL_V<riscv_uint_to_fp_vl, "PseudoVFNCVT_F_XU_W">;
- defm : VPatNConvertI2FPVL_V<riscv_sint_to_fp_vl, "PseudoVFNCVT_F_X_W">;
-
- defm : VPatNConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_xu_vl, "PseudoVFNCVT_RM_F_XU_W">;
- defm : VPatNConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_x_vl, "PseudoVFNCVT_RM_F_X_W">;
+// 13.17. Vector Single-Width Floating-Point/Integer Type-Convert Instructions
+defm : VPatConvertFP2IVL_V_RM<riscv_vfcvt_xu_f_vl, "PseudoVFCVT_XU_F_V">;
+defm : VPatConvertFP2IVL_V_RM<riscv_vfcvt_x_f_vl, "PseudoVFCVT_X_F_V">;
+defm : VPatConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFCVT_RM_XU_F_V">;
+defm : VPatConvertFP2I_RM_VL_V<any_riscv_vfcvt_rm_x_f_vl, "PseudoVFCVT_RM_X_F_V">;
+
+defm : VPatConvertFP2IVL_V<any_riscv_vfcvt_rtz_xu_f_vl, "PseudoVFCVT_RTZ_XU_F_V">;
+defm : VPatConvertFP2IVL_V<any_riscv_vfcvt_rtz_x_f_vl, "PseudoVFCVT_RTZ_X_F_V">;
+
+defm : VPatConvertI2FPVL_V_RM<any_riscv_uint_to_fp_vl, "PseudoVFCVT_F_XU_V">;
+defm : VPatConvertI2FPVL_V_RM<any_riscv_sint_to_fp_vl, "PseudoVFCVT_F_X_V">;
+
+defm : VPatConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_xu_vl, "PseudoVFCVT_RM_F_XU_V">;
+defm : VPatConvertI2FP_RM_VL_V<riscv_vfcvt_rm_f_x_vl, "PseudoVFCVT_RM_F_X_V">;
+
+// 13.18. Widening Floating-Point/Integer Type-Convert Instructions
+defm : VPatWConvertFP2IVL_V_RM<riscv_vfcvt_xu_f_vl, "PseudoVFWCVT_XU_F_V">;
+defm : VPatWConvertFP2IVL_V_RM<riscv_vfcvt_x_f_vl, "PseudoVFWCVT_X_F_V">;
+defm : VPatWConvertFP2I_RM_VL_V<riscv_vfcvt_rm_xu_f_vl, "PseudoVFWCVT_RM_XU_F_V">;
+defm : VPatWConvertFP2I_RM_VL_V<riscv_vfcvt_rm_x_f_vl, "PseudoVFWCVT_RM_X_F_V">;
+
+defm : VPatWConvertFP2IVL_V<any_riscv_vfcvt_rtz_xu_f_vl, "PseudoVFWCVT_RTZ_XU_F_V">;
+defm : VPatWConvertFP2IVL_V<any_riscv_vfcvt_rtz_x_f_vl, "PseudoVFWCVT_RTZ_X_F_V">;
+
+defm : VPatWConvertI2FPVL_V<any_riscv_uint_to_fp_vl, "PseudoVFWCVT_F_XU_V">;
+defm : VPatWConvertI2FPVL_V<any_riscv_sint_to_fp_vl, "PseudoVFWCVT_F_X_V">;
+
+foreach fvtiToFWti = AllWidenableFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in
+ def : Pat<(fwti.Vector (any_riscv_fpextend_vl
+ (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFWCVT_F_F_V_"#fvti.LMul.MX#"_MASK")
+ (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW, TA_MA)>;
+}
- foreach fvtiToFWti = AllWidenableFloatVectors in {
- defvar fvti = fvtiToFWti.Vti;
- defvar fwti = fvtiToFWti.Wti;
- def : Pat<(fvti.Vector (riscv_fpround_vl (fwti.Vector fwti.RegClass:$rs1),
- (fwti.Mask V0),
- VLOpFrag)),
+// 13.19 Narrowing Floating-Point/Integer Type-Convert Instructions
+defm : VPatNConvertFP2IVL_W_RM<riscv_vfcvt_xu_f_vl, "PseudoVFNCVT_XU_F_W">;
+defm : VPatNConvertFP2IVL_W_RM<riscv_vfcvt_x_f_vl, "PseudoVFNCVT_X_F_W">;
+defm : VPatNConvertFP2I_RM_VL_W<riscv_vfcvt_rm_xu_f_vl, "PseudoVFNCVT_RM_XU_F_W">;
+defm : VPatNConvertFP2I_RM_VL_W<riscv_vfcvt_rm_x_f_vl, "PseudoVFNCVT_RM_X_F_W">;
+
+defm : VPatNConvertFP2IVL_W<any_riscv_vfcvt_rtz_xu_f_vl, "PseudoVFNCVT_RTZ_XU_F_W">;
+defm : VPatNConvertFP2IVL_W<any_riscv_vfcvt_rtz_x_f_vl, "PseudoVFNCVT_RTZ_X_F_W">;
+
+defm : VPatNConvertI2FPVL_W_RM<any_riscv_uint_to_fp_vl, "PseudoVFNCVT_F_XU_W">;
+defm : VPatNConvertI2FPVL_W_RM<any_riscv_sint_to_fp_vl, "PseudoVFNCVT_F_X_W">;
+
+defm : VPatNConvertI2FP_RM_VL_W<riscv_vfcvt_rm_f_xu_vl, "PseudoVFNCVT_RM_F_XU_W">;
+defm : VPatNConvertI2FP_RM_VL_W<riscv_vfcvt_rm_f_x_vl, "PseudoVFNCVT_RM_F_X_W">;
+
+foreach fvtiToFWti = AllWidenableFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = !listconcat(GetVTypePredicates<fvti>.Predicates,
+ GetVTypePredicates<fwti>.Predicates) in {
+ def : Pat<(fvti.Vector (any_riscv_fpround_vl
+ (fwti.Vector fwti.RegClass:$rs1),
+ (fwti.Mask V0), VLOpFrag)),
(!cast<Instruction>("PseudoVFNCVT_F_F_W_"#fvti.LMul.MX#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
- (fwti.Mask V0), GPR:$vl, fvti.Log2SEW, TA_MA)>;
-
- def : Pat<(fvti.Vector (riscv_fncvt_rod_vl (fwti.Vector fwti.RegClass:$rs1),
- (fwti.Mask V0),
- VLOpFrag)),
+ (fwti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, fvti.Log2SEW, TA_MA)>;
+
+ def : Pat<(fvti.Vector (any_riscv_fncvt_rod_vl
+ (fwti.Vector fwti.RegClass:$rs1),
+ (fwti.Mask V0), VLOpFrag)),
(!cast<Instruction>("PseudoVFNCVT_ROD_F_F_W_"#fvti.LMul.MX#"_MASK")
(fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
(fwti.Mask V0), GPR:$vl, fvti.Log2SEW, TA_MA)>;
}
}
-} // Predicates = [HasVInstructionsAnyF]
-
// 14. Vector Reduction Operations
// 14.1. Vector Single-Width Integer Reduction Instructions
-let Predicates = [HasVInstructions] in {
-defm : VPatReductionVL<rvv_vecreduce_ADD_vl, "PseudoVREDSUM", /*is_float*/0>;
-defm : VPatReductionVL<rvv_vecreduce_UMAX_vl, "PseudoVREDMAXU", /*is_float*/0>;
-defm : VPatReductionVL<rvv_vecreduce_SMAX_vl, "PseudoVREDMAX", /*is_float*/0>;
-defm : VPatReductionVL<rvv_vecreduce_UMIN_vl, "PseudoVREDMINU", /*is_float*/0>;
-defm : VPatReductionVL<rvv_vecreduce_SMIN_vl, "PseudoVREDMIN", /*is_float*/0>;
-defm : VPatReductionVL<rvv_vecreduce_AND_vl, "PseudoVREDAND", /*is_float*/0>;
-defm : VPatReductionVL<rvv_vecreduce_OR_vl, "PseudoVREDOR", /*is_float*/0>;
-defm : VPatReductionVL<rvv_vecreduce_XOR_vl, "PseudoVREDXOR", /*is_float*/0>;
+defm : VPatReductionVL<rvv_vecreduce_ADD_vl, "PseudoVREDSUM", is_float=0>;
+defm : VPatReductionVL<rvv_vecreduce_UMAX_vl, "PseudoVREDMAXU", is_float=0>;
+defm : VPatReductionVL<rvv_vecreduce_SMAX_vl, "PseudoVREDMAX", is_float=0>;
+defm : VPatReductionVL<rvv_vecreduce_UMIN_vl, "PseudoVREDMINU", is_float=0>;
+defm : VPatReductionVL<rvv_vecreduce_SMIN_vl, "PseudoVREDMIN", is_float=0>;
+defm : VPatReductionVL<rvv_vecreduce_AND_vl, "PseudoVREDAND", is_float=0>;
+defm : VPatReductionVL<rvv_vecreduce_OR_vl, "PseudoVREDOR", is_float=0>;
+defm : VPatReductionVL<rvv_vecreduce_XOR_vl, "PseudoVREDXOR", is_float=0>;
// 14.2. Vector Widening Integer Reduction Instructions
-defm : VPatWidenReductionVL<rvv_vecreduce_ADD_vl, anyext_oneuse, "PseudoVWREDSUMU", /*is_float*/0>;
-defm : VPatWidenReductionVL<rvv_vecreduce_ADD_vl, zext_oneuse, "PseudoVWREDSUMU", /*is_float*/0>;
-defm : VPatWidenReductionVL_Ext_VL<rvv_vecreduce_ADD_vl, riscv_zext_vl_oneuse, "PseudoVWREDSUMU", /*is_float*/0>;
-defm : VPatWidenReductionVL<rvv_vecreduce_ADD_vl, sext_oneuse, "PseudoVWREDSUM", /*is_float*/0>;
-defm : VPatWidenReductionVL_Ext_VL<rvv_vecreduce_ADD_vl, riscv_sext_vl_oneuse, "PseudoVWREDSUM", /*is_float*/0>;
-} // Predicates = [HasVInstructions]
+defm : VPatWidenReductionVL<rvv_vecreduce_ADD_vl, anyext_oneuse, "PseudoVWREDSUMU", is_float=0>;
+defm : VPatWidenReductionVL<rvv_vecreduce_ADD_vl, zext_oneuse, "PseudoVWREDSUMU", is_float=0>;
+defm : VPatWidenReductionVL_Ext_VL<rvv_vecreduce_ADD_vl, riscv_zext_vl_oneuse, "PseudoVWREDSUMU", is_float=0>;
+defm : VPatWidenReductionVL<rvv_vecreduce_ADD_vl, sext_oneuse, "PseudoVWREDSUM", is_float=0>;
+defm : VPatWidenReductionVL_Ext_VL<rvv_vecreduce_ADD_vl, riscv_sext_vl_oneuse, "PseudoVWREDSUM", is_float=0>;
// 14.3. Vector Single-Width Floating-Point Reduction Instructions
-let Predicates = [HasVInstructionsAnyF] in {
-defm : VPatReductionVL<rvv_vecreduce_SEQ_FADD_vl, "PseudoVFREDOSUM", /*is_float*/1>;
-defm : VPatReductionVL<rvv_vecreduce_FADD_vl, "PseudoVFREDUSUM", /*is_float*/1>;
-defm : VPatReductionVL<rvv_vecreduce_FMIN_vl, "PseudoVFREDMIN", /*is_float*/1>;
-defm : VPatReductionVL<rvv_vecreduce_FMAX_vl, "PseudoVFREDMAX", /*is_float*/1>;
+defm : VPatReductionVL_RM<rvv_vecreduce_SEQ_FADD_vl, "PseudoVFREDOSUM", is_float=1>;
+defm : VPatReductionVL_RM<rvv_vecreduce_FADD_vl, "PseudoVFREDUSUM", is_float=1>;
+defm : VPatReductionVL<rvv_vecreduce_FMIN_vl, "PseudoVFREDMIN", is_float=1>;
+defm : VPatReductionVL<rvv_vecreduce_FMAX_vl, "PseudoVFREDMAX", is_float=1>;
// 14.4. Vector Widening Floating-Point Reduction Instructions
-defm : VPatWidenReductionVL<rvv_vecreduce_SEQ_FADD_vl, fpext_oneuse, "PseudoVFWREDOSUM", /*is_float*/1>;
-defm : VPatWidenReductionVL_Ext_VL<rvv_vecreduce_SEQ_FADD_vl, riscv_fpextend_vl_oneuse, "PseudoVFWREDOSUM", /*is_float*/1>;
-defm : VPatWidenReductionVL<rvv_vecreduce_FADD_vl, fpext_oneuse, "PseudoVFWREDUSUM", /*is_float*/1>;
-defm : VPatWidenReductionVL_Ext_VL<rvv_vecreduce_FADD_vl, riscv_fpextend_vl_oneuse, "PseudoVFWREDUSUM", /*is_float*/1>;
-} // Predicates = [HasVInstructionsAnyF]
+defm : VPatWidenReductionVL_RM<rvv_vecreduce_SEQ_FADD_vl, fpext_oneuse,
+ "PseudoVFWREDOSUM", is_float=1>;
+defm : VPatWidenReductionVL_Ext_VL_RM<rvv_vecreduce_SEQ_FADD_vl,
+ riscv_fpextend_vl_oneuse,
+ "PseudoVFWREDOSUM", is_float=1>;
+defm : VPatWidenReductionVL_RM<rvv_vecreduce_FADD_vl, fpext_oneuse,
+ "PseudoVFWREDUSUM", is_float=1>;
+defm : VPatWidenReductionVL_Ext_VL_RM<rvv_vecreduce_FADD_vl,
+ riscv_fpextend_vl_oneuse,
+ "PseudoVFWREDUSUM", is_float=1>;
// 15. Vector Mask Instructions
-let Predicates = [HasVInstructions] in {
-
foreach mti = AllMasks in {
- // 15.1 Vector Mask-Register Logical Instructions
- def : Pat<(mti.Mask (riscv_vmset_vl VLOpFrag)),
- (!cast<Instruction>("PseudoVMSET_M_" # mti.BX) GPR:$vl, mti.Log2SEW)>;
- def : Pat<(mti.Mask (riscv_vmclr_vl VLOpFrag)),
- (!cast<Instruction>("PseudoVMCLR_M_" # mti.BX) GPR:$vl, mti.Log2SEW)>;
-
- def : Pat<(mti.Mask (riscv_vmand_vl VR:$rs1, VR:$rs2, VLOpFrag)),
- (!cast<Instruction>("PseudoVMAND_MM_" # mti.LMul.MX)
- VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
- def : Pat<(mti.Mask (riscv_vmor_vl VR:$rs1, VR:$rs2, VLOpFrag)),
- (!cast<Instruction>("PseudoVMOR_MM_" # mti.LMul.MX)
- VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
- def : Pat<(mti.Mask (riscv_vmxor_vl VR:$rs1, VR:$rs2, VLOpFrag)),
- (!cast<Instruction>("PseudoVMXOR_MM_" # mti.LMul.MX)
- VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
-
- def : Pat<(mti.Mask (riscv_vmand_vl VR:$rs1,
- (riscv_vmnot_vl VR:$rs2, VLOpFrag),
+ let Predicates = [HasVInstructions] in {
+ // 15.1 Vector Mask-Register Logical Instructions
+ def : Pat<(mti.Mask (riscv_vmset_vl VLOpFrag)),
+ (!cast<Instruction>("PseudoVMSET_M_" # mti.BX) GPR:$vl, mti.Log2SEW)>;
+ def : Pat<(mti.Mask (riscv_vmclr_vl VLOpFrag)),
+ (!cast<Instruction>("PseudoVMCLR_M_" # mti.BX) GPR:$vl, mti.Log2SEW)>;
+
+ def : Pat<(mti.Mask (riscv_vmand_vl VR:$rs1, VR:$rs2, VLOpFrag)),
+ (!cast<Instruction>("PseudoVMAND_MM_" # mti.LMul.MX)
+ VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+ def : Pat<(mti.Mask (riscv_vmor_vl VR:$rs1, VR:$rs2, VLOpFrag)),
+ (!cast<Instruction>("PseudoVMOR_MM_" # mti.LMul.MX)
+ VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+ def : Pat<(mti.Mask (riscv_vmxor_vl VR:$rs1, VR:$rs2, VLOpFrag)),
+ (!cast<Instruction>("PseudoVMXOR_MM_" # mti.LMul.MX)
+ VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+
+ def : Pat<(mti.Mask (riscv_vmand_vl VR:$rs1,
+ (riscv_vmnot_vl VR:$rs2, VLOpFrag),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMANDN_MM_" # mti.LMul.MX)
+ VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+ def : Pat<(mti.Mask (riscv_vmor_vl VR:$rs1,
+ (riscv_vmnot_vl VR:$rs2, VLOpFrag),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMORN_MM_" # mti.LMul.MX)
+ VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+ // XOR is associative so we need 2 patterns for VMXNOR.
+ def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmnot_vl VR:$rs1,
+ VLOpFrag),
+ VR:$rs2, VLOpFrag)),
+ (!cast<Instruction>("PseudoVMXNOR_MM_" # mti.LMul.MX)
+ VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+
+ def : Pat<(mti.Mask (riscv_vmnot_vl (riscv_vmand_vl VR:$rs1, VR:$rs2,
+ VLOpFrag),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMNAND_MM_" # mti.LMul.MX)
+ VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+ def : Pat<(mti.Mask (riscv_vmnot_vl (riscv_vmor_vl VR:$rs1, VR:$rs2,
+ VLOpFrag),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMNOR_MM_" # mti.LMul.MX)
+ VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+ def : Pat<(mti.Mask (riscv_vmnot_vl (riscv_vmxor_vl VR:$rs1, VR:$rs2,
+ VLOpFrag),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMXNOR_MM_" # mti.LMul.MX)
+ VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+
+ // Match the not idiom to the vmnot.m pseudo.
+ def : Pat<(mti.Mask (riscv_vmnot_vl VR:$rs, VLOpFrag)),
+ (!cast<Instruction>("PseudoVMNAND_MM_" # mti.LMul.MX)
+ VR:$rs, VR:$rs, GPR:$vl, mti.Log2SEW)>;
+
+ // 15.2 Vector count population in mask vcpop.m
+ def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask true_mask),
VLOpFrag)),
- (!cast<Instruction>("PseudoVMANDN_MM_" # mti.LMul.MX)
- VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
- def : Pat<(mti.Mask (riscv_vmor_vl VR:$rs1,
- (riscv_vmnot_vl VR:$rs2, VLOpFrag),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMORN_MM_" # mti.LMul.MX)
- VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
- // XOR is associative so we need 2 patterns for VMXNOR.
- def : Pat<(mti.Mask (riscv_vmxor_vl (riscv_vmnot_vl VR:$rs1,
- VLOpFrag),
- VR:$rs2, VLOpFrag)),
- (!cast<Instruction>("PseudoVMXNOR_MM_" # mti.LMul.MX)
- VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
-
- def : Pat<(mti.Mask (riscv_vmnot_vl (riscv_vmand_vl VR:$rs1, VR:$rs2,
- VLOpFrag),
+ (!cast<Instruction>("PseudoVCPOP_M_" # mti.BX)
+ VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+ def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>("PseudoVMNAND_MM_" # mti.LMul.MX)
- VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
- def : Pat<(mti.Mask (riscv_vmnot_vl (riscv_vmor_vl VR:$rs1, VR:$rs2,
- VLOpFrag),
+ (!cast<Instruction>("PseudoVCPOP_M_" # mti.BX # "_MASK")
+ VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>;
+
+ // 15.3 vfirst find-first-set mask bit
+ def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask true_mask),
VLOpFrag)),
- (!cast<Instruction>("PseudoVMNOR_MM_" # mti.LMul.MX)
- VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
- def : Pat<(mti.Mask (riscv_vmnot_vl (riscv_vmxor_vl VR:$rs1, VR:$rs2,
- VLOpFrag),
+ (!cast<Instruction>("PseudoVFIRST_M_" # mti.BX)
+ VR:$rs2, GPR:$vl, mti.Log2SEW)>;
+ def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask V0),
VLOpFrag)),
- (!cast<Instruction>("PseudoVMXNOR_MM_" # mti.LMul.MX)
- VR:$rs1, VR:$rs2, GPR:$vl, mti.Log2SEW)>;
-
- // Match the not idiom to the vmnot.m pseudo.
- def : Pat<(mti.Mask (riscv_vmnot_vl VR:$rs, VLOpFrag)),
- (!cast<Instruction>("PseudoVMNAND_MM_" # mti.LMul.MX)
- VR:$rs, VR:$rs, GPR:$vl, mti.Log2SEW)>;
-
- // 15.2 Vector count population in mask vcpop.m
- def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVCPOP_M_" # mti.BX)
- VR:$rs2, GPR:$vl, mti.Log2SEW)>;
- def : Pat<(XLenVT (riscv_vcpop_vl (mti.Mask VR:$rs2), (mti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVCPOP_M_" # mti.BX # "_MASK")
- VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>;
-
- // 15.3 vfirst find-first-set mask bit
- def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFIRST_M_" # mti.BX)
- VR:$rs2, GPR:$vl, mti.Log2SEW)>;
- def : Pat<(XLenVT (riscv_vfirst_vl (mti.Mask VR:$rs2), (mti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFIRST_M_" # mti.BX # "_MASK")
- VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>;
-}
-
-} // Predicates = [HasVInstructions]
+ (!cast<Instruction>("PseudoVFIRST_M_" # mti.BX # "_MASK")
+ VR:$rs2, (mti.Mask V0), GPR:$vl, mti.Log2SEW)>;
+ }
+}
// 16. Vector Permutation Instructions
-let Predicates = [HasVInstructions] in {
// 16.1. Integer Scalar Move Instructions
// 16.4. Vector Register Gather Instruction
foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (riscv_vmv_s_x_vl (vti.Vector vti.RegClass:$merge),
- vti.ScalarRegClass:$rs1,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMV_S_X_"#vti.LMul.MX)
- vti.RegClass:$merge,
- (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2,
- vti.RegClass:$rs1,
- vti.RegClass:$merge,
- (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VV_"# vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1,
- vti.RegClass:$merge,
- (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VX_"# vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2,
- uimm5:$imm,
- vti.RegClass:$merge,
- (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (riscv_vmv_s_x_vl (vti.Vector vti.RegClass:$merge),
+ vti.ScalarRegClass:$rs1,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_S_X_"#vti.LMul.MX)
+ vti.RegClass:$merge,
+ (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
+
+ def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2,
+ vti.RegClass:$rs1,
+ vti.RegClass:$merge,
+ (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VV_"# vti.LMul.MX#"_E"# vti.SEW#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1,
+ vti.RegClass:$merge,
+ (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VX_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2,
+ uimm5:$imm,
+ vti.RegClass:$merge,
+ (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
// emul = lmul * 16 / sew
defvar vlmul = vti.LMul;
@@ -2030,8 +2799,8 @@ foreach vti = AllIntegerVectors in {
if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
defvar emul_str = octuple_to_str<octuple_emul>.ret;
defvar ivti = !cast<VTypeInfo>("VI16" # emul_str);
- defvar inst = "PseudoVRGATHEREI16_VV_" # vti.LMul.MX # "_" # emul_str;
-
+ defvar inst = "PseudoVRGATHEREI16_VV_" # vti.LMul.MX # "_E" # vti.SEW # "_" # emul_str;
+ let Predicates = GetVTypePredicates<vti>.Predicates in
def : Pat<(vti.Vector
(riscv_vrgatherei16_vv_vl vti.RegClass:$rs2,
(ivti.Vector ivti.RegClass:$rs1),
@@ -2044,50 +2813,55 @@ foreach vti = AllIntegerVectors in {
}
}
-} // Predicates = [HasVInstructions]
-
-let Predicates = [HasVInstructionsAnyF] in {
-
// 16.2. Floating-Point Scalar Move Instructions
foreach vti = AllFloatVectors in {
- def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$merge),
- (vti.Scalar (fpimm0)),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVMV_S_X_"#vti.LMul.MX)
- vti.RegClass:$merge, X0, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$merge),
- vti.ScalarRegClass:$rs1,
- VLOpFrag)),
- (!cast<Instruction>("PseudoVFMV_S_"#vti.ScalarSuffix#"_"#vti.LMul.MX)
- vti.RegClass:$merge,
- (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$merge),
+ (vti.Scalar (fpimm0)),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_S_X_"#vti.LMul.MX)
+ vti.RegClass:$merge, (XLenVT X0), GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$merge),
+ (vti.Scalar (SelectFPImm (XLenVT GPR:$imm))),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVMV_S_X_"#vti.LMul.MX)
+ vti.RegClass:$merge, GPR:$imm, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$merge),
+ vti.ScalarRegClass:$rs1,
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFMV_S_"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ vti.RegClass:$merge,
+ (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>;
+ }
defvar ivti = GetIntVTypeInfo<vti>.Vti;
-
- def : Pat<(vti.Vector
- (riscv_vrgather_vv_vl vti.RegClass:$rs2,
- (ivti.Vector vti.RegClass:$rs1),
- vti.RegClass:$merge,
- (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VV_"# vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1,
- vti.RegClass:$merge,
- (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VX_"# vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(vti.Vector
- (riscv_vrgather_vx_vl vti.RegClass:$rs2,
- uimm5:$imm,
- vti.RegClass:$merge,
- (vti.Mask V0),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm,
- (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in {
+ def : Pat<(vti.Vector
+ (riscv_vrgather_vv_vl vti.RegClass:$rs2,
+ (ivti.Vector vti.RegClass:$rs1),
+ vti.RegClass:$merge,
+ (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VV_"# vti.LMul.MX#"_E"# vti.SEW#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vti.Vector (riscv_vrgather_vx_vl vti.RegClass:$rs2, GPR:$rs1,
+ vti.RegClass:$merge,
+ (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VX_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vti.Vector
+ (riscv_vrgather_vx_vl vti.RegClass:$rs2,
+ uimm5:$imm,
+ vti.RegClass:$merge,
+ (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVRGATHER_VI_"# vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$imm,
+ (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
defvar vlmul = vti.LMul;
defvar octuple_lmul = vlmul.octuple;
@@ -2095,8 +2869,9 @@ foreach vti = AllFloatVectors in {
if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then {
defvar emul_str = octuple_to_str<octuple_emul>.ret;
defvar ivti = !cast<VTypeInfo>("VI16" # emul_str);
- defvar inst = "PseudoVRGATHEREI16_VV_" # vti.LMul.MX # "_" # emul_str;
-
+ defvar inst = "PseudoVRGATHEREI16_VV_" # vti.LMul.MX # "_E" # vti.SEW # "_" # emul_str;
+ let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
+ GetVTypePredicates<ivti>.Predicates) in
def : Pat<(vti.Vector
(riscv_vrgatherei16_vv_vl vti.RegClass:$rs2,
(ivti.Vector ivti.RegClass:$rs1),
@@ -2109,8 +2884,6 @@ foreach vti = AllFloatVectors in {
}
}
-} // Predicates = [HasVInstructionsAnyF]
-
//===----------------------------------------------------------------------===//
// Miscellaneous RISCVISD SDNodes
//===----------------------------------------------------------------------===//
@@ -2129,77 +2902,90 @@ def SDTRVVSlide1 : SDTypeProfile<1, 5, [
SDTCisVT<3, XLenVT>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>,
SDTCisVT<5, XLenVT>
]>;
+def SDTRVVFSlide1 : SDTypeProfile<1, 5, [
+ SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisFP<0>,
+ SDTCisEltOfVec<3, 0>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>,
+ SDTCisVT<5, XLenVT>
+]>;
def riscv_slideup_vl : SDNode<"RISCVISD::VSLIDEUP_VL", SDTRVVSlide, []>;
def riscv_slide1up_vl : SDNode<"RISCVISD::VSLIDE1UP_VL", SDTRVVSlide1, []>;
def riscv_slidedown_vl : SDNode<"RISCVISD::VSLIDEDOWN_VL", SDTRVVSlide, []>;
def riscv_slide1down_vl : SDNode<"RISCVISD::VSLIDE1DOWN_VL", SDTRVVSlide1, []>;
-
-let Predicates = [HasVInstructions] in {
+def riscv_fslide1up_vl : SDNode<"RISCVISD::VFSLIDE1UP_VL", SDTRVVFSlide1, []>;
+def riscv_fslide1down_vl : SDNode<"RISCVISD::VFSLIDE1DOWN_VL", SDTRVVFSlide1, []>;
foreach vti = AllIntegerVectors in {
- def : Pat<(vti.Vector (riscv_vid_vl (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVID_V_"#vti.LMul.MX) GPR:$vl, vti.Log2SEW)>;
-
- def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector undef),
- (vti.Vector vti.RegClass:$rs1),
- GPR:$rs2, (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVSLIDE1UP_VX_"#vti.LMul.MX)
- vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rd),
- (vti.Vector vti.RegClass:$rs1),
- GPR:$rs2, (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVSLIDE1UP_VX_"#vti.LMul.MX#"_TU")
- vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector undef),
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (riscv_vid_vl (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVID_V_"#vti.LMul.MX#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), (vti.Mask V0), GPR:$vl, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+ def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rd),
(vti.Vector vti.RegClass:$rs1),
GPR:$rs2, (vti.Mask true_mask),
VLOpFrag)),
- (!cast<Instruction>("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX)
- vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
- def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rd),
- (vti.Vector vti.RegClass:$rs1),
- GPR:$rs2, (vti.Mask true_mask),
- VLOpFrag)),
- (!cast<Instruction>("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX#"_TU")
- vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>;
-}
-
-foreach vti = !listconcat(AllIntegerVectors, AllFloatVectors) in {
- def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3),
- (vti.Vector vti.RegClass:$rs1),
- uimm5:$rs2, (vti.Mask true_mask),
- VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVSLIDEUP_VI_"#vti.LMul.MX)
- vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
-
- def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3),
- (vti.Vector vti.RegClass:$rs1),
- GPR:$rs2, (vti.Mask true_mask),
- VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVSLIDEUP_VX_"#vti.LMul.MX)
- vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
-
- def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3),
+ (!cast<Instruction>("PseudoVSLIDE1UP_VX_"#vti.LMul.MX)
+ vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rd),
+ (vti.Vector vti.RegClass:$rs1),
+ GPR:$rs2, (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX)
+ vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ }
+}
+
+foreach vti = AllFloatVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector vti.RegClass:$rd),
+ (vti.Vector vti.RegClass:$rs1),
+ vti.Scalar:$rs2, (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector vti.RegClass:$rd),
+ (vti.Vector vti.RegClass:$rs1),
+ vti.Scalar:$rs2, (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>;
+ }
+}
+
+foreach vti = AllVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3),
(vti.Vector vti.RegClass:$rs1),
uimm5:$rs2, (vti.Mask true_mask),
VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVSLIDEDOWN_VI_"#vti.LMul.MX)
- vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+ (!cast<Instruction>("PseudoVSLIDEUP_VI_"#vti.LMul.MX)
+ vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
+ GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
- def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3),
+ def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3),
(vti.Vector vti.RegClass:$rs1),
GPR:$rs2, (vti.Mask true_mask),
VLOpFrag, (XLenVT timm:$policy))),
- (!cast<Instruction>("PseudoVSLIDEDOWN_VX_"#vti.LMul.MX)
- vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
- GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+ (!cast<Instruction>("PseudoVSLIDEUP_VX_"#vti.LMul.MX)
+ vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
+ GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+
+ def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3),
+ (vti.Vector vti.RegClass:$rs1),
+ uimm5:$rs2, (vti.Mask true_mask),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>("PseudoVSLIDEDOWN_VI_"#vti.LMul.MX)
+ vti.RegClass:$rs3, vti.RegClass:$rs1, uimm5:$rs2,
+ GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+
+ def : Pat<(vti.Vector (riscv_slidedown_vl (vti.Vector vti.RegClass:$rs3),
+ (vti.Vector vti.RegClass:$rs1),
+ GPR:$rs2, (vti.Mask true_mask),
+ VLOpFrag, (XLenVT timm:$policy))),
+ (!cast<Instruction>("PseudoVSLIDEDOWN_VX_"#vti.LMul.MX)
+ vti.RegClass:$rs3, vti.RegClass:$rs1, GPR:$rs2,
+ GPR:$vl, vti.Log2SEW, (XLenVT timm:$policy))>;
+ }
}
-
-} // Predicates = [HasVInstructions]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
new file mode 100644
index 000000000000..4ba052b25e42
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
@@ -0,0 +1,205 @@
+//===-- RISCVInstrInfoXCV.td - CORE-V instructions ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the vendor extensions defined by Core-V extensions.
+//
+//===----------------------------------------------------------------------===//
+
+let DecoderNamespace = "XCVbitmanip" in {
+ class CVInstBitManipRII<bits<2> funct2, bits<3> funct3, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInstI<funct3, OPC_CUSTOM_2, outs, ins, opcodestr, argstr> {
+ bits<5> is3;
+ bits<5> is2;
+ let imm12 = {funct2, is3, is2};
+ }
+
+ class CVBitManipRII<bits<2> funct2, bits<3> funct3, string opcodestr,
+ Operand i3type = uimm5>
+ : CVInstBitManipRII<funct2, funct3, (outs GPR:$rd),
+ (ins GPR:$rs1, i3type:$is3, uimm5:$is2),
+ opcodestr, "$rd, $rs1, $is3, $is2">;
+
+ class CVBitManipRR<bits<7> funct7, string opcodestr>
+ : RVInstR<funct7, 0b011, OPC_CUSTOM_1, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+
+ class CVBitManipR<bits<7> funct7, string opcodestr>
+ : RVInstR<funct7, 0b011, OPC_CUSTOM_1, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1"> {
+ let rs2 = 0b00000;
+ }
+}
+
+let Predicates = [HasVendorXCVbitmanip, IsRV32],
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+ def CV_EXTRACT : CVBitManipRII<0b00, 0b000, "cv.extract">;
+ def CV_EXTRACTU : CVBitManipRII<0b01, 0b000, "cv.extractu">;
+
+ def CV_BCLR : CVBitManipRII<0b00, 0b001, "cv.bclr">;
+ def CV_BSET : CVBitManipRII<0b01, 0b001, "cv.bset">;
+ def CV_BITREV : CVBitManipRII<0b11, 0b001, "cv.bitrev", uimm2>;
+
+ def CV_EXTRACTR : CVBitManipRR<0b0011000, "cv.extractr">;
+ def CV_EXTRACTUR : CVBitManipRR<0b0011001, "cv.extractur">;
+
+ let Constraints = "$rd = $rd_wb" in {
+ def CV_INSERT : CVInstBitManipRII<0b10, 0b000, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, uimm5:$is3, uimm5:$is2),
+ "cv.insert", "$rd, $rs1, $is3, $is2">;
+ def CV_INSERTR : RVInstR<0b0011010, 0b011, OPC_CUSTOM_1, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2),
+ "cv.insertr", "$rd, $rs1, $rs2">;
+ }
+
+ def CV_BCLRR : CVBitManipRR<0b0011100, "cv.bclrr">;
+ def CV_BSETR : CVBitManipRR<0b0011101, "cv.bsetr">;
+
+ def CV_ROR : CVBitManipRR<0b0100000, "cv.ror">;
+ def CV_FF1 : CVBitManipR<0b0100001, "cv.ff1">;
+ def CV_FL1 : CVBitManipR<0b0100010, "cv.fl1">;
+ def CV_CLB : CVBitManipR<0b0100011, "cv.clb">;
+ def CV_CNT : CVBitManipR<0b0100100, "cv.cnt">;
+}
+
+class CVInstMac<bits<7> funct7, bits<3> funct3, dag outs, dag ins,
+ string opcodestr, string argstr, list<dag> pattern>
+ : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatOther> {
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{31-25} = funct7;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let Inst{6-0} = OPC_CUSTOM_1.Value;
+ let DecoderNamespace = "XCVmac";
+}
+
+class CVInstMac16I<bits<2> funct2, bits<3> funct3, dag outs, dag ins,
+ string opcodestr, string argstr, list<dag> pattern>
+ : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatOther> {
+ bits<5> imm5;
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{31-30} = funct2;
+ let Inst{29-25} = imm5;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let Inst{6-0} = OPC_CUSTOM_2.Value;
+ let DecoderNamespace = "XCVmac";
+}
+
+let Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0,
+ mayStore = 0, Constraints = "$rd = $rd_wb" in {
+ // 32x32 bit macs
+ def CV_MAC : CVInstMac<0b1001000, 0b011, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2),
+ "cv.mac", "$rd, $rs1, $rs2", []>,
+ Sched<[]>;
+ def CV_MSU : CVInstMac<0b1001001, 0b011, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2),
+ "cv.msu", "$rd, $rs1, $rs2", []>,
+ Sched<[]>;
+
+ // Signed 16x16 bit macs with imm
+ def CV_MACSN : CVInstMac16I<0b00, 0b110, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.macsn", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+ def CV_MACHHSN : CVInstMac16I<0b01, 0b110, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.machhsn", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+ def CV_MACSRN : CVInstMac16I<0b10, 0b110, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.macsrn", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+ def CV_MACHHSRN : CVInstMac16I<0b11, 0b110, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.machhsrn", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+
+ // Unsigned 16x16 bit macs with imm
+ def CV_MACUN : CVInstMac16I<0b00, 0b111, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.macun", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+ def CV_MACHHUN : CVInstMac16I<0b01, 0b111, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.machhun", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+ def CV_MACURN : CVInstMac16I<0b10, 0b111, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.macurn", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+ def CV_MACHHURN : CVInstMac16I<0b11, 0b111, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.machhurn", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+} // Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0...
+
+let Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+ // Signed 16x16 bit muls with imm
+ def CV_MULSN : CVInstMac16I<0b00, 0b100, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.mulsn", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+ def CV_MULHHSN : CVInstMac16I<0b01, 0b100, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.mulhhsn", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+ def CV_MULSRN : CVInstMac16I<0b10, 0b100, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.mulsrn", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+ def CV_MULHHSRN : CVInstMac16I<0b11, 0b100, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.mulhhsrn", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+
+
+ // Unsigned 16x16 bit muls with imm
+ def CV_MULUN : CVInstMac16I<0b00, 0b101, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.mulun", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+ def CV_MULHHUN : CVInstMac16I<0b01, 0b101, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.mulhhun", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+ def CV_MULURN : CVInstMac16I<0b10, 0b101, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.mulurn", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+ def CV_MULHHURN : CVInstMac16I<0b11, 0b101, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm5:$imm5),
+ "cv.mulhhurn", "$rd, $rs1, $rs2, $imm5", []>,
+ Sched<[]>;
+} // Predicates = [HasVendorXCVmac, IsRV32], hasSideEffects = 0, mayLoad = 0...
+
+let Predicates = [HasVendorXCVmac, IsRV32] in {
+ // Xcvmac Pseudo Instructions
+ // Signed 16x16 bit muls
+ def : InstAlias<"cv.muls $rd1, $rs1, $rs2",
+ (CV_MULSN GPR:$rd1, GPR:$rs1, GPR:$rs2, 0)>;
+ def : InstAlias<"cv.mulhhs $rd1, $rs1, $rs2",
+ (CV_MULHHSN GPR:$rd1, GPR:$rs1, GPR:$rs2, 0)>;
+
+ // Unsigned 16x16 bit muls
+ def : InstAlias<"cv.mulu $rd1, $rs1, $rs2",
+ (CV_MULUN GPR:$rd1, GPR:$rs1, GPR:$rs2, 0)>;
+ def : InstAlias<"cv.mulhhu $rd1, $rs1, $rs2",
+ (CV_MULHHUN GPR:$rd1, GPR:$rs1, GPR:$rs2, 0)>;
+} // Predicates = [HasVendorXCVmac, IsRV32]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
new file mode 100644
index 000000000000..03ed501ba6a3
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -0,0 +1,530 @@
+//===-- RISCVInstrInfoXsf.td - SiFive custom instructions --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the vendor extensions defined by SiFive.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// XSFVCP extension instructions.
+//===----------------------------------------------------------------------===//
+
+def VCIXVS2 : RISCVVConstraint<VS2Constraint.Value>;
+def VCIXVS2VS1 : RISCVVConstraint<!or(VS2Constraint.Value,
+ VS1Constraint.Value)>;
+
+class VCIXType<bits<4> val> {
+ bits<4> Val = val;
+}
+
+def VCIX_X : VCIXType<0b0000>;
+def VCIX_XV : VCIXType<0b0010>;
+def VCIX_XVV : VCIXType<0b1010>;
+def VCIX_XVW : VCIXType<0b1111>;
+
+// The payload and timm5 operands are all marked as ImmArg in the IR
+// intrinsic and will be target constant, so use TImmLeaf rather than ImmLeaf.
+def payload1 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<1>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<1>;
+ let DecoderMethod = "decodeUImmOperand<1>";
+ let OperandType = "OPERAND_UIMM1";
+ let OperandNamespace = "RISCVOp";
+}
+
+def payload2 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<2>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<2>;
+ let DecoderMethod = "decodeUImmOperand<2>";
+ let OperandType = "OPERAND_UIMM2";
+ let OperandNamespace = "RISCVOp";
+}
+
+def payload5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<5>;
+ let DecoderMethod = "decodeUImmOperand<5>";
+ let OperandType = "OPERAND_UIMM5";
+ let OperandNamespace = "RISCVOp";
+}
+
+def timm5 : Operand<XLenVT>, TImmLeaf<XLenVT, [{return isInt<5>(Imm);}]> {
+ let ParserMatchClass = SImmAsmOperand<5>;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeSImmOperand<5>";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (MCOp.evaluateAsConstantImm(Imm))
+ return isInt<5>(Imm);
+ return MCOp.isBareSymbolRef();
+ }];
+}
+
+class SwapVCIXIns<dag funct6, dag rd, dag rs2, dag rs1, bit swap> {
+ dag Ins = !con(funct6, !if(swap, rs2, rd), !if(swap, rd, rs2), rs1);
+}
+
+class RVInstVCCustom2<bits<4> funct6_hi4, bits<3> funct3, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> rd;
+ bits<2> funct6_lo2;
+ bit vm;
+
+ let Inst{31-28} = funct6_hi4;
+ let Inst{27-26} = funct6_lo2;
+ let Inst{25} = vm;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let Inst{6-0} = OPC_CUSTOM_2.Value;
+
+ let Uses = [VTYPE, VL];
+ let RVVConstraint = NoConstraint;
+}
+
+class RVInstVCFCustom2<bits<4> funct6_hi4, bits<3> funct3, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> rd;
+ bit funct6_lo1;
+ bit vm;
+
+ let Inst{31-28} = funct6_hi4;
+ let Inst{27} = 1;
+ let Inst{26} = funct6_lo1;
+ let Inst{25} = vm;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let Inst{6-0} = OPC_CUSTOM_2.Value;
+
+ let Uses = [VTYPE, VL];
+ let RVVConstraint = NoConstraint;
+}
+
+class GetFTypeInfo<int sew> {
+ ValueType Scalar = !cond(!eq(sew, 16): f16,
+ !eq(sew, 32): f32,
+ !eq(sew, 64): f64);
+ RegisterClass ScalarRegClass = !cond(!eq(sew, 16): FPR16,
+ !eq(sew, 32): FPR32,
+ !eq(sew, 64): FPR64);
+}
+
+class VCIXInfo<string suffix, VCIXType type, DAGOperand TyRd,
+ DAGOperand TyRs2, DAGOperand TyRs1, bit HaveOutputDst> {
+ string OpcodeStr = !if(HaveOutputDst, "sf.vc.v." # suffix,
+ "sf.vc." # suffix);
+ bits<4> Funct6_hi4 = type.Val;
+ bits<3> Funct3 = !cond(!eq(TyRs1, VR): 0b000,
+ !eq(TyRs1, GPR): 0b100,
+ !eq(TyRs1, FPR32): 0b101,
+ !eq(TyRs1, simm5): 0b011);
+ dag Outs = !if(!not(HaveOutputDst), (outs),
+ !if(!or(!eq(type, VCIX_XVV), !eq(type, VCIX_XVW)),
+ (outs TyRd:$rd_wb), (outs TyRd:$rd)));
+ dag Ins = SwapVCIXIns<!if(!ne(TyRs1, FPR32), (ins uimm2:$funct6_lo2),
+ (ins uimm1:$funct6_lo1)),
+ !if(!and(HaveOutputDst, !or(!eq(type, VCIX_X),
+ !eq(type, VCIX_XV))),
+ (ins), (ins TyRd:$rd)),
+ (ins TyRs2:$rs2),
+ (ins TyRs1:$rs1),
+ !if(!eq(type, VCIX_X), 1, 0)>.Ins;
+ string Prototype = !if(!eq(type, VCIX_X), "$funct6_lo2, $rs2, $rd, $rs1",
+ !if(!ne(TyRs1, FPR32), "$funct6_lo2, $rd, $rs2, $rs1",
+ "$funct6_lo1, $rd, $rs2, $rs1"));
+ string Constraints = !if(!not(HaveOutputDst), "",
+ !if(!or(!eq(type, VCIX_XVV),
+ !eq(type, VCIX_XVW)), "$rd = $rd_wb", ""));
+ RISCVVConstraint RVVConstraint = !if(!or(!not(HaveOutputDst),
+ !ne(type, VCIX_XVW)), NoConstraint,
+ !if(!eq(TyRs1, VR), VCIXVS2VS1, VCIXVS2));
+}
+
+class CustomSiFiveVCIX<VCIXInfo info>
+ : RVInstVCCustom2<info.Funct6_hi4, info.Funct3, info.Outs,
+ info.Ins, info.OpcodeStr, info.Prototype> {
+ let Constraints = info.Constraints;
+ let RVVConstraint = info.RVVConstraint;
+}
+
+class CustomSiFiveVCIF<VCIXInfo info>
+ : RVInstVCFCustom2<info.Funct6_hi4, info.Funct3, info.Outs,
+ info.Ins, info.OpcodeStr, info.Prototype> {
+ let Constraints = info.Constraints;
+ let RVVConstraint = info.RVVConstraint;
+}
+
+multiclass CustomSiFiveVCIXorVCIF<string suffix, VCIXType type,
+ DAGOperand TyRd, DAGOperand TyRs2,
+ DAGOperand TyRs1, bit HaveOutputDst> {
+ defvar info = VCIXInfo<suffix, type, TyRd, TyRs2, TyRs1, HaveOutputDst>;
+ if !eq(TyRs1, FPR32) then {
+ def NAME : CustomSiFiveVCIF<info>;
+ } else {
+ def NAME : CustomSiFiveVCIX<info>;
+ }
+}
+
+multiclass CustomSiFiveVCIX<string suffix, VCIXType type,
+ DAGOperand InTyRd, DAGOperand InTyRs2,
+ DAGOperand InTyRs1> {
+ let vm = 1 in
+ defm VC_ # NAME : CustomSiFiveVCIXorVCIF<suffix, type, InTyRd, InTyRs2,
+ InTyRs1, 0>;
+ let vm = 0 in
+ defm VC_V_ # NAME : CustomSiFiveVCIXorVCIF<suffix, type, VR, InTyRs2,
+ InTyRs1, 1>;
+}
+
+let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0,
+ hasSideEffects = 1, hasNoSchedulingInfo = 1, DecoderNamespace = "XSfvcp" in {
+ defm X : CustomSiFiveVCIX<"x", VCIX_X, uimm5, uimm5, GPR>, Sched<[]>;
+ defm I : CustomSiFiveVCIX<"i", VCIX_X, uimm5, uimm5, simm5>, Sched<[]>;
+ defm XV : CustomSiFiveVCIX<"xv", VCIX_XV, uimm5, VR, GPR>, Sched<[]>;
+ defm IV : CustomSiFiveVCIX<"iv", VCIX_XV, uimm5, VR, simm5>, Sched<[]>;
+ defm VV : CustomSiFiveVCIX<"vv", VCIX_XV, uimm5, VR, VR>, Sched<[]>;
+ defm FV : CustomSiFiveVCIX<"fv", VCIX_XV, uimm5, VR, FPR32>, Sched<[]>;
+ defm XVV : CustomSiFiveVCIX<"xvv", VCIX_XVV, VR, VR, GPR>, Sched<[]>;
+ defm IVV : CustomSiFiveVCIX<"ivv", VCIX_XVV, VR, VR, simm5>, Sched<[]>;
+ defm VVV : CustomSiFiveVCIX<"vvv", VCIX_XVV, VR, VR, VR>, Sched<[]>;
+ defm FVV : CustomSiFiveVCIX<"fvv", VCIX_XVV, VR, VR, FPR32>, Sched<[]>;
+ defm XVW : CustomSiFiveVCIX<"xvw", VCIX_XVW, VR, VR, GPR>, Sched<[]>;
+ defm IVW : CustomSiFiveVCIX<"ivw", VCIX_XVW, VR, VR, simm5>, Sched<[]>;
+ defm VVW : CustomSiFiveVCIX<"vvw", VCIX_XVW, VR, VR, VR>, Sched<[]>;
+ defm FVW : CustomSiFiveVCIX<"fvw", VCIX_XVW, VR, VR, FPR32>, Sched<[]>;
+}
+
+class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class,
+ bit HasSideEffect = 1> :
+ Pseudo<(outs),
+ (ins OpClass:$op1, payload5:$rs2, payload5:$rd, RS1Class:$r1,
+ AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let hasSideEffects = HasSideEffect;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoVC_XV<Operand OpClass, VReg RS2Class, DAGOperand RS1Class,
+ bit HasSideEffect = 1> :
+ Pseudo<(outs),
+ (ins OpClass:$op1, payload5:$rd, RS2Class:$rs2, RS1Class:$r1,
+ AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let hasSideEffects = HasSideEffect;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoVC_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
+ DAGOperand RS1Class, bit HasSideEffect = 1> :
+ Pseudo<(outs),
+ (ins OpClass:$op1, RDClass:$rd, RS2Class:$rs2, RS1Class:$r1,
+ AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let hasSideEffects = HasSideEffect;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoVC_V_X<Operand OpClass, VReg RDClass, DAGOperand RS1Class,
+ bit HasSideEffect = 1> :
+ Pseudo<(outs RDClass:$rd),
+ (ins OpClass:$op1, payload5:$rs2, RS1Class:$r1,
+ AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let hasSideEffects = HasSideEffect;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoVC_V_XV<Operand OpClass, VReg RDClass, VReg RS2Class,
+ DAGOperand RS1Class, bit HasSideEffect = 1> :
+ Pseudo<(outs RDClass:$rd),
+ (ins OpClass:$op1, RS2Class:$rs2, RS1Class:$r1,
+ AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let hasSideEffects = HasSideEffect;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoVC_V_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
+ DAGOperand RS1Class, bit HasSideEffect = 1> :
+ Pseudo<(outs RDClass:$rd),
+ (ins OpClass:$op1, RDClass:$rs3, RS2Class:$rs2, RS1Class:$r1,
+ AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let hasSideEffects = HasSideEffect;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+multiclass VPseudoVC_X<LMULInfo m, DAGOperand RS1Class,
+ Operand OpClass = payload2> {
+ let VLMul = m.value in {
+ def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_X<OpClass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class, 0>;
+ }
+}
+
+multiclass VPseudoVC_XV<LMULInfo m, DAGOperand RS1Class,
+ Operand OpClass = payload2> {
+ let VLMul = m.value in {
+ def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XV<OpClass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class, 0>;
+ }
+}
+
+multiclass VPseudoVC_XVV<LMULInfo m, DAGOperand RS1Class,
+ Operand OpClass = payload2> {
+ let VLMul = m.value in {
+ def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class, 0>;
+ }
+}
+
+multiclass VPseudoVC_XVW<LMULInfo m, DAGOperand RS1Class,
+ Operand OpClass = payload2> {
+ let VLMul = m.value in {
+ def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
+ let Constraints = "@earlyclobber $rd, $rd = $rs3" in {
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class, 0>;
+ }
+ }
+}
+
+let Predicates = [HasVendorXSfvcp] in {
+ foreach m = MxList in {
+ defm X : VPseudoVC_X<m, GPR>;
+ defm I : VPseudoVC_X<m, timm5>;
+ defm XV : VPseudoVC_XV<m, GPR>;
+ defm IV : VPseudoVC_XV<m, timm5>;
+ defm VV : VPseudoVC_XV<m, m.vrclass>;
+ defm XVV : VPseudoVC_XVV<m, GPR>;
+ defm IVV : VPseudoVC_XVV<m, timm5>;
+ defm VVV : VPseudoVC_XVV<m, m.vrclass>;
+ }
+ foreach f = FPList in {
+ foreach m = f.MxList in {
+ defm f.FX # "V" : VPseudoVC_XV<m, f.fprclass, payload1>;
+ defm f.FX # "VV" : VPseudoVC_XVV<m, f.fprclass, payload1>;
+ }
+ }
+ foreach m = MxListW in {
+ defm XVW : VPseudoVC_XVW<m, GPR>;
+ defm IVW : VPseudoVC_XVW<m, timm5>;
+ defm VVW : VPseudoVC_XVW<m, m.vrclass>;
+ }
+ foreach f = FPListW in {
+ foreach m = f.MxList in
+ defm f.FX # "VW" : VPseudoVC_XVW<m, f.fprclass, payload1>;
+ }
+}
+
+class VPatVC_OP4<string intrinsic_name,
+ string inst,
+ ValueType op2_type,
+ ValueType op3_type,
+ ValueType op4_type,
+ int sew,
+ DAGOperand op2_kind,
+ DAGOperand op3_kind,
+ DAGOperand op4_kind,
+ Operand op1_kind = payload2> :
+ Pat<(!cast<Intrinsic>(intrinsic_name)
+ (XLenVT op1_kind:$op1),
+ (op2_type op2_kind:$op2),
+ (op3_type op3_kind:$op3),
+ (op4_type op4_kind:$op4),
+ VLOpFrag),
+ (!cast<Instruction>(inst)
+ (XLenVT op1_kind:$op1),
+ (op2_type op2_kind:$op2),
+ (op3_type op3_kind:$op3),
+ (op4_type op4_kind:$op4),
+ GPR:$vl, sew)>;
+
+class VPatVC_V_OP4<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op2_type,
+ ValueType op3_type,
+ ValueType op4_type,
+ int sew,
+ DAGOperand op2_kind,
+ DAGOperand op3_kind,
+ DAGOperand op4_kind,
+ Operand op1_kind = payload2> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (XLenVT op1_kind:$op1),
+ (op2_type op2_kind:$op2),
+ (op3_type op3_kind:$op3),
+ (op4_type op4_kind:$op4),
+ VLOpFrag)),
+ (!cast<Instruction>(inst)
+ (XLenVT op1_kind:$op1),
+ (op2_type op2_kind:$op2),
+ (op3_type op3_kind:$op3),
+ (op4_type op4_kind:$op4),
+ GPR:$vl, sew)>;
+
+class VPatVC_V_OP3<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op2_type,
+ ValueType op3_type,
+ int sew,
+ DAGOperand op2_kind,
+ DAGOperand op3_kind,
+ Operand op1_kind = payload2> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (XLenVT op1_kind:$op1),
+ (op2_type op2_kind:$op2),
+ (op3_type op3_kind:$op3),
+ VLOpFrag)),
+ (!cast<Instruction>(inst)
+ (XLenVT op1_kind:$op1),
+ (op2_type op2_kind:$op2),
+ (op3_type op3_kind:$op3),
+ GPR:$vl, sew)>;
+
+multiclass VPatVC_X<string intrinsic_suffix, string instruction_suffix,
+ VTypeInfo vti, ValueType type, DAGOperand kind> {
+ def : VPatVC_OP4<"int_riscv_sf_vc_" # intrinsic_suffix # "_se_e" # vti.SEW # !tolower(vti.LMul.MX),
+ "PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+ XLenVT, XLenVT, type, vti.Log2SEW,
+ payload5, payload5, kind>;
+ def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix # "_se",
+ "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+ vti.Vector, XLenVT, type, vti.Log2SEW,
+ payload5, kind>;
+ def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix,
+ "PseudoVC_V_" # instruction_suffix # "_" # vti.LMul.MX,
+ vti.Vector, XLenVT, type, vti.Log2SEW,
+ payload5, kind>;
+}
+
+multiclass VPatVC_XV<string intrinsic_suffix, string instruction_suffix,
+ VTypeInfo vti, ValueType type, DAGOperand kind,
+ Operand op1_kind = payload2> {
+ def : VPatVC_OP4<"int_riscv_sf_vc_" # intrinsic_suffix # "_se",
+ "PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+ XLenVT, vti.Vector, type, vti.Log2SEW,
+ payload5, vti.RegClass, kind, op1_kind>;
+ def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix # "_se",
+ "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, type, vti.Log2SEW,
+ vti.RegClass, kind, op1_kind>;
+ def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix,
+ "PseudoVC_V_" # instruction_suffix # "_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, type, vti.Log2SEW,
+ vti.RegClass, kind, op1_kind>;
+}
+
+multiclass VPatVC_XVV<string intrinsic_suffix, string instruction_suffix,
+ VTypeInfo wti, VTypeInfo vti, ValueType type, DAGOperand kind,
+ Operand op1_kind = payload2> {
+ def : VPatVC_OP4<"int_riscv_sf_vc_" # intrinsic_suffix # "_se",
+ "PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+ wti.Vector, vti.Vector, type, vti.Log2SEW,
+ wti.RegClass, vti.RegClass, kind, op1_kind>;
+ def : VPatVC_V_OP4<"int_riscv_sf_vc_v_" # intrinsic_suffix # "_se",
+ "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+ wti.Vector, wti.Vector, vti.Vector, type, vti.Log2SEW,
+ wti.RegClass, vti.RegClass, kind, op1_kind>;
+ def : VPatVC_V_OP4<"int_riscv_sf_vc_v_" # intrinsic_suffix,
+ "PseudoVC_V_" # instruction_suffix # "_" # vti.LMul.MX,
+ wti.Vector, wti.Vector, vti.Vector, type, vti.Log2SEW,
+ wti.RegClass, vti.RegClass, kind, op1_kind>;
+}
+
+let Predicates = [HasVendorXSfvcp] in {
+ foreach vti = AllIntegerVectors in {
+ defm : VPatVC_X<"x", "X", vti, vti.Scalar, vti.ScalarRegClass>;
+ defm : VPatVC_X<"i", "I", vti, XLenVT, timm5>;
+ defm : VPatVC_XV<"xv", "XV", vti, vti.Scalar, vti.ScalarRegClass>;
+ defm : VPatVC_XV<"iv", "IV", vti, XLenVT, timm5>;
+ defm : VPatVC_XV<"vv", "VV", vti, vti.Vector, vti.RegClass>;
+ defm : VPatVC_XVV<"xvv", "XVV", vti, vti, vti.Scalar, vti.ScalarRegClass>;
+ defm : VPatVC_XVV<"ivv", "IVV", vti, vti, XLenVT, timm5>;
+ defm : VPatVC_XVV<"vvv", "VVV", vti, vti, vti.Vector, vti.RegClass>;
+ if !ge(vti.SEW, 16) then {
+ defm : VPatVC_XV<"fv", "F" # vti.SEW # "V", vti,
+ GetFTypeInfo<vti.SEW>.Scalar,
+ GetFTypeInfo<vti.SEW>.ScalarRegClass, payload1>;
+ defm : VPatVC_XVV<"fvv", "F" # vti.SEW # "VV", vti, vti,
+ GetFTypeInfo<vti.SEW>.Scalar,
+ GetFTypeInfo<vti.SEW>.ScalarRegClass, payload1>;
+ }
+ }
+ foreach VtiToWti = AllWidenableIntVectors in {
+ defvar vti = VtiToWti.Vti;
+ defvar wti = VtiToWti.Wti;
+ defm : VPatVC_XVV<"xvw", "XVW", wti, vti, vti.Scalar, vti.ScalarRegClass>;
+ defm : VPatVC_XVV<"ivw", "IVW", wti, vti, XLenVT, timm5>;
+ defm : VPatVC_XVV<"vvw", "VVW", wti, vti, vti.Vector, vti.RegClass>;
+ if !ge(vti.SEW, 16) then {
+ defm : VPatVC_XVV<"fvw", "F" # vti.SEW # "VW", wti, vti,
+ GetFTypeInfo<vti.SEW>.Scalar,
+ GetFTypeInfo<vti.SEW>.ScalarRegClass, payload1>;
+ }
+ }
+}
+
+let Predicates = [HasVendorXSfcie] in {
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0, DecoderNamespace = "XSfcie" in {
+def SF_CFLUSH_D_L1 : RVInstI<0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1), "cflush.d.l1","$rs1">,
+ Sched<[]> {
+ let rd = 0;
+ let imm12 = {0b1111,0b1100,0b0000};
+}
+
+def SF_CDISCARD_D_L1 : RVInstI<0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1), "cdiscard.d.l1","$rs1">,
+ Sched<[]> {
+ let rd = 0;
+ let imm12 = {0b1111,0b1100,0b0010};
+}
+
+def SF_CEASE : RVInstI<0b000, OPC_SYSTEM, (outs), (ins), "cease","">, Sched<[]> {
+ let rs1 = 0;
+ let rd = 0;
+ let imm12 = {0b0011,0b0000,0b0101};
+}
+}
+def : InstAlias<"cflush.d.l1", (SF_CFLUSH_D_L1 X0)>;
+def : InstAlias<"cdiscard.d.l1", (SF_CDISCARD_D_L1 X0)>;
+} // Predicates = [HasVendorXScie]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
index fbf7db7a333a..e840dfddd8d9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td
@@ -11,13 +11,33 @@
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
+// T-HEAD specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_LoadPair : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>;
+def SDT_StorePair : SDTypeProfile<0, 4,
+ [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 3>, SDTCisPtrTy<2>, SDTCisVT<3, XLenVT>]>;
+
+def th_lwud : SDNode<"RISCVISD::TH_LWUD", SDT_LoadPair,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def th_lwd : SDNode<"RISCVISD::TH_LWD", SDT_LoadPair,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def th_ldd : SDNode<"RISCVISD::TH_LDD", SDT_LoadPair,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def th_swd : SDNode<"RISCVISD::TH_SWD", SDT_StorePair,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def th_sdd : SDNode<"RISCVISD::TH_SDD", SDT_StorePair,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+//===----------------------------------------------------------------------===//
// Instruction class templates
//===----------------------------------------------------------------------===//
class THInstVdotVV<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
string opcodestr, string argstr>
: RVInstVV<funct6, opv, outs, ins, opcodestr, argstr> {
let Inst{26} = 0;
- let Opcode = OPC_CUSTOM_0.Value;
+ let Inst{6-0} = OPC_CUSTOM_0.Value;
let DecoderNamespace = "THeadV";
}
@@ -25,7 +45,7 @@ class THInstVdotVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
string opcodestr, string argstr>
: RVInstVX<funct6, opv, outs, ins, opcodestr, argstr> {
let Inst{26} = 1;
- let Opcode = OPC_CUSTOM_0.Value;
+ let Inst{6-0} = OPC_CUSTOM_0.Value;
let DecoderNamespace = "THeadV";
}
@@ -43,6 +63,159 @@ class THVdotALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
opcodestr, "$vd, $rs1, $vs2$vm">;
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+let Predicates = [HasVendorXTHeadBa], DecoderNamespace = "THeadBa",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class THShiftALU_rri<bits<3> funct3, string opcodestr>
+ : RVInstR<0, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
+ opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
+ bits<2> uimm2;
+ let Inst{31-27} = 0;
+ let Inst{26-25} = uimm2;
+}
+
+let Predicates = [HasVendorXTHeadBb], DecoderNamespace = "THeadBb",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+class THShift_ri<bits<5> funct5, bits<3> funct3, string opcodestr>
+ : RVInstIShift<funct5, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPR:$rs1, uimmlog2xlen:$shamt),
+ opcodestr, "$rd, $rs1, $shamt">;
+
+class THBitfieldExtract_rii<bits<3> funct3, string opcodestr>
+ : RVInstI<funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPR:$rs1, uimmlog2xlen:$msb, uimmlog2xlen:$lsb),
+ opcodestr, "$rd, $rs1, $msb, $lsb"> {
+ bits<6> msb;
+ bits<6> lsb;
+ let Inst{31-26} = msb;
+ let Inst{25-20} = lsb;
+}
+
+class THRev_r<bits<5> funct5, bits<2> funct2, string opcodestr>
+ : RVInstR4<funct2, 0b001, OPC_CUSTOM_0, (outs GPR:$rd), (ins GPR:$rs1),
+ opcodestr, "$rd, $rs1"> {
+ let rs3 = funct5;
+ let rs2 = 0;
+}
+}
+
+let Predicates = [HasVendorXTHeadBb, IsRV64], DecoderNamespace = "THeadBb",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class THShiftW_ri<bits<7> funct7, bits<3> funct3, string opcodestr>
+ : RVInstIShiftW<funct7, funct3, OPC_CUSTOM_0, (outs GPR:$rd),
+ (ins GPR:$rs1, uimm5:$shamt),
+ opcodestr, "$rd, $rs1, $shamt">;
+
+let Predicates = [HasVendorXTHeadCondMov], DecoderNamespace = "THeadCondMov",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in
+class THCondMov_rr<bits<7> funct7, string opcodestr>
+ : RVInstR<funct7, 0b001, OPC_CUSTOM_0, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2),
+ opcodestr, "$rd, $rs1, $rs2"> {
+ let Constraints = "$rd_wb = $rd";
+}
+
+let Predicates = [HasVendorXTHeadMac], DecoderNamespace = "THeadMac",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCommutable = 1 in
+class THMulAccumulate_rr<bits<7> funct7, string opcodestr>
+ : RVInstR<funct7, 0b001, OPC_CUSTOM_0, (outs GPR:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2),
+ opcodestr, "$rd, $rs1, $rs2"> {
+ let Constraints = "$rd_wb = $rd";
+}
+
+let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair",
+ hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+class THLoadPair<bits<5> funct5, string opcodestr>
+ : RVInstR<!shl(funct5, 2), 0b100, OPC_CUSTOM_0,
+ (outs GPR:$rd, GPR:$rs2),
+ (ins GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
+ opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
+ bits<2> uimm2;
+ let Inst{26-25} = uimm2;
+ let DecoderMethod = "decodeXTHeadMemPair";
+ let Constraints = "@earlyclobber $rd,@earlyclobber $rs2";
+}
+
+let Predicates = [HasVendorXTHeadMemPair], DecoderNamespace = "THeadMemPair",
+ hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+class THStorePair<bits<5> funct5, string opcodestr>
+ : RVInstR<!shl(funct5, 2), 0b101, OPC_CUSTOM_0,
+ (outs),
+ (ins GPR:$rd, GPR:$rs2, GPR:$rs1, uimm2:$uimm2, uimm7:$const3or4),
+ opcodestr, "$rd, $rs2, (${rs1}), $uimm2, $const3or4"> {
+ bits<2> uimm2;
+ let Inst{26-25} = uimm2;
+ let DecoderMethod = "decodeXTHeadMemPair";
+}
+
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
+class THCacheInst_r<bits<5> funct5, string opcodestr>
+ : RVInstR<0b0000001, 0, OPC_CUSTOM_0, (outs), (ins GPR:$rs1),
+ opcodestr, "$rs1"> {
+ let rd = 0;
+ let rs2 = funct5;
+}
+
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
+class THCacheInst_rr<bits<7> funct7, string opcodestr>
+ : RVInstR<funct7, 0, OPC_CUSTOM_0, (outs), (ins GPR:$rs1, GPR:$rs2),
+ opcodestr, "$rs1, $rs2"> {
+ let rd = 0;
+}
+
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
+class THCacheInst_void<bits<5> funct5, string opcodestr>
+ : RVInstR<0b0000000, 0, OPC_CUSTOM_0, (outs), (ins), opcodestr, ""> {
+ let rd = 0;
+ let rs1 = 0;
+ let rs2 = funct5;
+}
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+class THLoadIndexed<RegisterClass Ty, bits<5> funct5, string opcodestr>
+ : RVInstR<!shl(funct5, 2), !if(!eq(Ty, GPR), 0b100, 0b110), OPC_CUSTOM_0,
+ (outs Ty:$rd), (ins GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
+ opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
+ bits<2> uimm2;
+ let Inst{26-25} = uimm2;
+}
+
+class THLoadUpdate<bits<5> funct5, string opcodestr>
+ : RVInstI<0b100, OPC_CUSTOM_0, (outs GPR:$rd, GPR:$rs1_wb),
+ (ins GPR:$rs1, simm5:$simm5, uimm2:$uimm2),
+ opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> {
+ bits<5> simm5;
+ bits<2> uimm2;
+ let imm12{11-7} = funct5;
+ let imm12{6-5} = uimm2;
+ let imm12{4-0} = simm5;
+ let Constraints = "@earlyclobber $rd, $rs1_wb = $rs1";
+}
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+class THStoreIndexed<RegisterClass StTy, bits<5> funct5, string opcodestr>
+ : RVInstR<!shl(funct5, 2), !if(!eq(StTy, GPR), 0b101, 0b111), OPC_CUSTOM_0,
+ (outs), (ins StTy:$rd, GPR:$rs1, GPR:$rs2, uimm2:$uimm2),
+ opcodestr, "$rd, $rs1, $rs2, $uimm2"> {
+ bits<2> uimm2;
+ let Inst{26-25} = uimm2;
+}
+
+class THStoreUpdate<bits<5> funct5, string opcodestr>
+ : RVInstI<0b101, OPC_CUSTOM_0, (outs GPR:$rs1_up),
+ (ins GPR:$rd, GPR:$rs1, simm5:$simm5, uimm2:$uimm2),
+ opcodestr, "$rd, (${rs1}), $simm5, $uimm2"> {
+ bits<5> simm5;
+ bits<2> uimm2;
+ let imm12{11-7} = funct5;
+ let imm12{6-5} = uimm2;
+ let imm12{4-0} = simm5;
+ let Constraints = "$rs1_up = $rs1";
+}
+}
+
//===----------------------------------------------------------------------===//
// Combination of instruction classes.
// Use these multiclasses to define instructions more easily.
@@ -59,6 +232,217 @@ multiclass THVdotVMAQA<string opcodestr, bits<6> funct6> {
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
+let Predicates = [HasVendorXTHeadBa] in {
+def TH_ADDSL : THShiftALU_rri<0b001, "th.addsl">,
+ Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
+} // Predicates = [HasVendorXTHeadBa]
+
+let Predicates = [HasVendorXTHeadBb] in {
+def TH_SRRI : THShift_ri<0b00010, 0b001, "th.srri">;
+def TH_EXT : THBitfieldExtract_rii<0b010, "th.ext">;
+def TH_EXTU : THBitfieldExtract_rii<0b011, "th.extu">;
+def TH_FF0 : THRev_r<0b10000, 0b10, "th.ff0">;
+def TH_FF1 : THRev_r<0b10000, 0b11, "th.ff1">;
+def TH_REV : THRev_r<0b10000, 0b01, "th.rev">;
+def TH_TSTNBZ : THRev_r<0b10000, 0b00, "th.tstnbz">;
+} // Predicates = [HasVendorXTHeadBb]
+
+let Predicates = [HasVendorXTHeadBb, IsRV64], IsSignExtendingOpW = 1 in {
+def TH_SRRIW : THShiftW_ri<0b0001010, 0b001, "th.srriw">;
+def TH_REVW : THRev_r<0b10010, 0b00, "th.revw">;
+} // Predicates = [HasVendorXTHeadBb, IsRV64]
+
+let Predicates = [HasVendorXTHeadBs], DecoderNamespace = "THeadBs" in {
+let IsSignExtendingOpW = 1 in
+def TH_TST : RVBShift_ri<0b10001, 0b001, OPC_CUSTOM_0, "th.tst">,
+ Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
+} // Predicates = [HasVendorXTHeadBs]
+
+let Predicates = [HasVendorXTHeadCondMov] in {
+def TH_MVEQZ : THCondMov_rr<0b0100000, "th.mveqz">;
+def TH_MVNEZ : THCondMov_rr<0b0100001, "th.mvnez">;
+} // Predicates = [HasVendorXTHeadCondMov]
+
+let Predicates = [HasVendorXTHeadMac] in {
+def TH_MULA : THMulAccumulate_rr<0b0010000, "th.mula">;
+def TH_MULS : THMulAccumulate_rr<0b0010001, "th.muls">;
+} // Predicates = [HasVendorXTHeadMac]
+
+let Predicates = [HasVendorXTHeadMac], IsSignExtendingOpW = 1 in {
+def TH_MULAH : THMulAccumulate_rr<0b0010100, "th.mulah">;
+def TH_MULSH : THMulAccumulate_rr<0b0010101, "th.mulsh">;
+} // Predicates = [HasVendorXTHeadMac], IsSignExtendingOpW = 1
+
+let Predicates = [HasVendorXTHeadMac, IsRV64], IsSignExtendingOpW = 1 in {
+def TH_MULAW : THMulAccumulate_rr<0b0010010, "th.mulaw">;
+def TH_MULSW : THMulAccumulate_rr<0b0010011, "th.mulsw">;
+} // Predicates = [HasVendorXTHeadMac, IsRV64]
+
+let Predicates = [HasVendorXTHeadMemPair] in {
+def TH_LWUD : THLoadPair<0b11110, "th.lwud">,
+ Sched<[WriteLDW, WriteLDW, ReadMemBase]>;
+def TH_SWD : THStorePair<0b11100, "th.swd">,
+ Sched<[WriteSTW, WriteSTW, ReadStoreData, ReadMemBase]>;
+let IsSignExtendingOpW = 1 in
+def TH_LWD : THLoadPair<0b11100, "th.lwd">,
+ Sched<[WriteLDW, WriteLDW, ReadMemBase]>;
+}
+
+let Predicates = [HasVendorXTHeadMemPair, IsRV64] in {
+def TH_LDD : THLoadPair<0b11111, "th.ldd">,
+ Sched<[WriteLDD, WriteLDD, ReadMemBase]>;
+def TH_SDD : THStorePair<0b11111, "th.sdd">,
+ Sched<[WriteSTD, WriteSTD, ReadStoreData, ReadMemBase]>;
+}
+
+let Predicates = [HasVendorXTHeadMemIdx], DecoderNamespace = "THeadMemIdx" in {
+// T-Head Load/Store + Update instructions.
+def TH_LBIA : THLoadUpdate<0b00011, "th.lbia">,
+ Sched<[WriteLDB, ReadMemBase]>;
+def TH_LBIB : THLoadUpdate<0b00001, "th.lbib">,
+ Sched<[WriteLDB, ReadMemBase]>;
+def TH_LBUIA : THLoadUpdate<0b10011, "th.lbuia">,
+ Sched<[WriteLDB, ReadMemBase]>;
+def TH_LBUIB : THLoadUpdate<0b10001, "th.lbuib">,
+ Sched<[WriteLDB, ReadMemBase]>;
+
+def TH_LHIA : THLoadUpdate<0b00111, "th.lhia">,
+ Sched<[WriteLDH, ReadMemBase]>;
+def TH_LHIB : THLoadUpdate<0b00101, "th.lhib">,
+ Sched<[WriteLDH, ReadMemBase]>;
+def TH_LHUIA : THLoadUpdate<0b10111, "th.lhuia">,
+ Sched<[WriteLDH, ReadMemBase]>;
+def TH_LHUIB : THLoadUpdate<0b10101, "th.lhuib">,
+ Sched<[WriteLDH, ReadMemBase]>;
+
+def TH_LWIA : THLoadUpdate<0b01011, "th.lwia">,
+ Sched<[WriteLDW, ReadMemBase]>;
+def TH_LWIB : THLoadUpdate<0b01001, "th.lwib">,
+ Sched<[WriteLDW, ReadMemBase]>;
+
+def TH_SBIA : THStoreUpdate<0b00011, "th.sbia">,
+ Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
+def TH_SBIB : THStoreUpdate<0b00001, "th.sbib">,
+ Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
+
+def TH_SHIA : THStoreUpdate<0b00111, "th.shia">,
+ Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
+def TH_SHIB : THStoreUpdate<0b00101, "th.shib">,
+ Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
+
+def TH_SWIA : THStoreUpdate<0b01011, "th.swia">,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+def TH_SWIB : THStoreUpdate<0b01001, "th.swib">,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+
+// T-Head Load/Store Indexed instructions.
+def TH_LRB : THLoadIndexed<GPR, 0b00000, "th.lrb">,
+ Sched<[WriteLDB, ReadMemBase]>;
+def TH_LRBU : THLoadIndexed<GPR, 0b10000, "th.lrbu">,
+ Sched<[WriteLDB, ReadMemBase]>;
+def TH_LURB : THLoadIndexed<GPR, 0b00010, "th.lurb">,
+ Sched<[WriteLDB, ReadMemBase]>;
+def TH_LURBU : THLoadIndexed<GPR, 0b10010, "th.lurbu">,
+ Sched<[WriteLDB, ReadMemBase]>;
+
+def TH_LRH : THLoadIndexed<GPR, 0b00100, "th.lrh">,
+ Sched<[WriteLDH, ReadMemBase]>;
+def TH_LRHU : THLoadIndexed<GPR, 0b10100, "th.lrhu">,
+ Sched<[WriteLDH, ReadMemBase]>;
+def TH_LURH : THLoadIndexed<GPR, 0b00110, "th.lurh">,
+ Sched<[WriteLDB, ReadMemBase]>;
+def TH_LURHU : THLoadIndexed<GPR, 0b10110, "th.lurhu">,
+ Sched<[WriteLDB, ReadMemBase]>;
+
+def TH_LRW : THLoadIndexed<GPR, 0b01000, "th.lrw">,
+ Sched<[WriteLDW, ReadMemBase]>;
+def TH_LURW : THLoadIndexed<GPR, 0b01010, "th.lurw">,
+ Sched<[WriteLDB, ReadMemBase]>;
+
+def TH_SRB : THStoreIndexed<GPR, 0b00000, "th.srb">,
+ Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
+def TH_SURB : THStoreIndexed<GPR, 0b00010, "th.surb">,
+ Sched<[WriteLDB, ReadMemBase]>;
+
+def TH_SRH : THStoreIndexed<GPR, 0b00100, "th.srh">,
+ Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
+def TH_SURH : THStoreIndexed<GPR, 0b00110, "th.surh">,
+ Sched<[WriteLDB, ReadMemBase]>;
+
+def TH_SRW : THStoreIndexed<GPR, 0b01000, "th.srw">,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+def TH_SURW : THStoreIndexed<GPR, 0b01010, "th.surw">,
+ Sched<[WriteLDB, ReadMemBase]>;
+}
+
+let Predicates = [HasVendorXTHeadMemIdx, IsRV64], DecoderNamespace = "THeadMemIdx" in {
+// T-Head Load/Store + Update instructions.
+def TH_LWUIA : THLoadUpdate<0b11011, "th.lwuia">,
+ Sched<[WriteLDH, ReadMemBase]>;
+def TH_LWUIB : THLoadUpdate<0b11001, "th.lwuib">,
+ Sched<[WriteLDH, ReadMemBase]>;
+
+def TH_LDIA : THLoadUpdate<0b01111, "th.ldia">,
+ Sched<[WriteLDW, ReadMemBase]>;
+def TH_LDIB : THLoadUpdate<0b01101, "th.ldib">,
+ Sched<[WriteLDW, ReadMemBase]>;
+
+def TH_SDIA : THStoreUpdate<0b01111, "th.sdia">,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+def TH_SDIB : THStoreUpdate<0b01101, "th.sdib">,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+
+// T-Head Load/Store Indexed instructions.
+def TH_LRWU : THLoadIndexed<GPR, 0b11000, "th.lrwu">,
+ Sched<[WriteLDW, ReadMemBase]>;
+def TH_LURWU : THLoadIndexed<GPR, 0b11010, "th.lurwu">,
+ Sched<[WriteLDB, ReadMemBase]>;
+
+def TH_LRD : THLoadIndexed<GPR, 0b01100, "th.lrd">,
+ Sched<[WriteLDW, ReadMemBase]>;
+def TH_LURD : THLoadIndexed<GPR, 0b01110, "th.lurd">,
+ Sched<[WriteLDB, ReadMemBase]>;
+
+def TH_SRD : THStoreIndexed<GPR, 0b01100, "th.srd">,
+ Sched<[WriteSTW, ReadStoreData, ReadMemBase]>;
+def TH_SURD : THStoreIndexed<GPR, 0b01110, "th.surd">,
+ Sched<[WriteLDB, ReadMemBase]>;
+}
+
+// T-Head Load/Store Indexed instructions for floating point registers.
+
+let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtF],
+ DecoderNamespace = "THeadFMemIdx" in {
+def TH_FLRW : THLoadIndexed<FPR32, 0b01000, "th.flrw">,
+ Sched<[WriteFLD32, ReadFMemBase]>;
+def TH_FSRW : THStoreIndexed<FPR32, 0b01000, "th.fsrw">,
+ Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]>;
+}
+
+let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtD],
+ DecoderNamespace = "THeadFMemIdx" in {
+def TH_FLRD : THLoadIndexed<FPR64, 0b01100, "th.flrd">,
+ Sched<[WriteFLD64, ReadFMemBase]>;
+def TH_FSRD : THStoreIndexed<FPR64, 0b01100, "th.fsrd">,
+ Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]>;
+}
+
+let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtF, IsRV64],
+ DecoderNamespace = "THeadFMemIdx" in {
+def TH_FLURW : THLoadIndexed<FPR32, 0b01010, "th.flurw">,
+ Sched<[WriteFLD32, ReadFMemBase]>;
+def TH_FSURW : THStoreIndexed<FPR32, 0b01010, "th.fsurw">,
+ Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]>;
+}
+
+let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtD, IsRV64],
+ DecoderNamespace = "THeadFMemIdx" in {
+def TH_FLURD : THLoadIndexed<FPR64, 0b01110, "th.flurd">,
+ Sched<[WriteFLD64, ReadFMemBase]>;
+def TH_FSURD : THStoreIndexed<FPR64, 0b01110, "th.fsurd">,
+ Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]>;
+}
+
let Predicates = [HasVendorXTHeadVdot],
Constraints = "@earlyclobber $vd",
RVVConstraint = WidenV in {
@@ -134,6 +518,169 @@ multiclass VPatTernaryVMAQA_VV_VX<string intrinsic, string instruction,
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
//===----------------------------------------------------------------------===//
+let Predicates = [HasVendorXTHeadBa] in {
+def : Pat<(add (XLenVT GPR:$rs1), (shl GPR:$rs2, uimm2:$uimm2)),
+ (TH_ADDSL GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>;
+
+// Reuse complex patterns from StdExtZba
+def : Pat<(add sh1add_op:$rs1, non_imm12:$rs2),
+ (TH_ADDSL GPR:$rs2, sh1add_op:$rs1, 1)>;
+def : Pat<(add sh2add_op:$rs1, non_imm12:$rs2),
+ (TH_ADDSL GPR:$rs2, sh2add_op:$rs1, 2)>;
+def : Pat<(add sh3add_op:$rs1, non_imm12:$rs2),
+ (TH_ADDSL GPR:$rs2, sh3add_op:$rs1, 3)>;
+
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 6)), GPR:$rs2),
+ (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 1)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 10)), GPR:$rs2),
+ (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 1)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 18)), GPR:$rs2),
+ (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 1)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 12)), GPR:$rs2),
+ (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 2)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 20)), GPR:$rs2),
+ (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 2)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 36)), GPR:$rs2),
+ (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 2)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 24)), GPR:$rs2),
+ (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 1), 3)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 40)), GPR:$rs2),
+ (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 2), 3)>;
+def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2),
+ (TH_ADDSL GPR:$rs2, (TH_ADDSL GPR:$rs1, GPR:$rs1, 3), 3)>;
+
+def : Pat<(add (XLenVT GPR:$r), CSImm12MulBy4:$i),
+ (TH_ADDSL GPR:$r, (ADDI (XLenVT X0), (SimmShiftRightBy2XForm CSImm12MulBy4:$i)), 2)>;
+def : Pat<(add (XLenVT GPR:$r), CSImm12MulBy8:$i),
+ (TH_ADDSL GPR:$r, (ADDI (XLenVT X0), (SimmShiftRightBy3XForm CSImm12MulBy8:$i)), 3)>;
+
+def : Pat<(mul (XLenVT GPR:$r), C3LeftShift:$i),
+ (SLLI (TH_ADDSL GPR:$r, GPR:$r, 1),
+ (TrailingZeros C3LeftShift:$i))>;
+def : Pat<(mul (XLenVT GPR:$r), C5LeftShift:$i),
+ (SLLI (TH_ADDSL GPR:$r, GPR:$r, 2),
+ (TrailingZeros C5LeftShift:$i))>;
+def : Pat<(mul (XLenVT GPR:$r), C9LeftShift:$i),
+ (SLLI (TH_ADDSL GPR:$r, GPR:$r, 3),
+ (TrailingZeros C9LeftShift:$i))>;
+
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 11)),
+ (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 1)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 19)),
+ (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 1)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 13)),
+ (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 1), 2)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 21)),
+ (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 2)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 37)),
+ (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 2)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 25)),
+ (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 2), (TH_ADDSL GPR:$r, GPR:$r, 2), 2)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 41)),
+ (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 2), 3)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 73)),
+ (TH_ADDSL GPR:$r, (TH_ADDSL GPR:$r, GPR:$r, 3), 3)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 27)),
+ (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 1)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 45)),
+ (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 2)>;
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 81)),
+ (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 3), (TH_ADDSL GPR:$r, GPR:$r, 3), 3)>;
+
+def : Pat<(mul_const_oneuse GPR:$r, (XLenVT 200)),
+ (SLLI (TH_ADDSL (TH_ADDSL GPR:$r, GPR:$r, 2),
+ (TH_ADDSL GPR:$r, GPR:$r, 2), 2), 3)>;
+} // Predicates = [HasVendorXTHeadBa]
+
+let Predicates = [HasVendorXTHeadBb] in {
+def : PatGprImm<rotr, TH_SRRI, uimmlog2xlen>;
+// There's no encoding for a rotate-left-immediate in X-THead-Bb, as
+// it can be implemented with th.srri by negating the immediate.
+def : Pat<(rotl (XLenVT GPR:$rs1), uimmlog2xlen:$shamt),
+ (TH_SRRI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
+def : Pat<(sext_inreg (XLenVT GPR:$rs1), i32), (TH_EXT GPR:$rs1, 31, 0)>;
+def : Pat<(sext_inreg (XLenVT GPR:$rs1), i16), (TH_EXT GPR:$rs1, 15, 0)>;
+def : Pat<(sext_inreg (XLenVT GPR:$rs1), i8), (TH_EXT GPR:$rs1, 7, 0)>;
+def : Pat<(sext_inreg (XLenVT GPR:$rs1), i1), (TH_EXT GPR:$rs1, 0, 0)>;
+def : PatGpr<ctlz, TH_FF1>;
+def : Pat<(XLenVT (ctlz (xor (XLenVT GPR:$rs1), -1))), (TH_FF0 GPR:$rs1)>;
+def : PatGpr<bswap, TH_REV>;
+} // Predicates = [HasVendorXTHeadBb]
+
+let Predicates = [HasVendorXTHeadBb, IsRV64] in {
+def : PatGprImm<riscv_rorw, TH_SRRIW, uimm5>;
+def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
+ (TH_SRRIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
+def : Pat<(sra (bswap i64:$rs1), (i64 32)),
+ (TH_REVW i64:$rs1)>;
+def : Pat<(binop_allwusers<srl> (bswap i64:$rs1), (i64 32)),
+ (TH_REVW i64:$rs1)>;
+def : Pat<(riscv_clzw i64:$rs1),
+ (TH_FF0 (SLLI (XORI i64:$rs1, -1), 32))>;
+} // Predicates = [HasVendorXTHeadBb, IsRV64]
+
+let Predicates = [HasVendorXTHeadBs] in {
+def : Pat<(and (srl (XLenVT GPR:$rs1), uimmlog2xlen:$shamt), 1),
+ (TH_TST GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(XLenVT (seteq (and (XLenVT GPR:$rs1), SingleBitSetMask:$mask), 0)),
+ (TH_TST (XORI GPR:$rs1, -1), SingleBitSetMask:$mask)>;
+} // Predicates = [HasVendorXTHeadBs]
+
+let Predicates = [HasVendorXTHeadCondMov] in {
+def : Pat<(select (XLenVT GPR:$cond), (XLenVT GPR:$a), (XLenVT GPR:$b)),
+ (TH_MVEQZ GPR:$a, GPR:$b, GPR:$cond)>;
+def : Pat<(select (XLenVT GPR:$cond), (XLenVT GPR:$a), (XLenVT 0)),
+ (TH_MVEQZ GPR:$a, (XLenVT X0), GPR:$cond)>;
+def : Pat<(select (XLenVT GPR:$cond), (XLenVT 0), (XLenVT GPR:$b)),
+ (TH_MVNEZ GPR:$b, (XLenVT X0), GPR:$cond)>;
+
+def : Pat<(select (riscv_seteq (XLenVT GPR:$cond)), (XLenVT GPR:$a), (XLenVT GPR:$b)),
+ (TH_MVNEZ GPR:$a, GPR:$b, GPR:$cond)>;
+def : Pat<(select (riscv_setne (XLenVT GPR:$cond)), (XLenVT GPR:$a), (XLenVT GPR:$b)),
+ (TH_MVEQZ GPR:$a, GPR:$b, GPR:$cond)>;
+def : Pat<(select (riscv_seteq (XLenVT GPR:$cond)), (XLenVT GPR:$a), (XLenVT 0)),
+ (TH_MVNEZ GPR:$a, (XLenVT X0), GPR:$cond)>;
+def : Pat<(select (riscv_setne (XLenVT GPR:$cond)), (XLenVT GPR:$a), (XLenVT 0)),
+ (TH_MVEQZ GPR:$a, (XLenVT X0), GPR:$cond)>;
+def : Pat<(select (riscv_seteq (XLenVT GPR:$cond)), (XLenVT 0), (XLenVT GPR:$b)),
+ (TH_MVEQZ GPR:$b, (XLenVT X0), GPR:$cond)>;
+def : Pat<(select (riscv_setne (XLenVT GPR:$cond)), (XLenVT 0), (XLenVT GPR:$b)),
+ (TH_MVNEZ GPR:$b, (XLenVT X0), GPR:$cond)>;
+} // Predicates = [HasVendorXTHeadCondMov]
+
+let Predicates = [HasVendorXTHeadMac] in {
+def : Pat<(add GPR:$rd, (mul (XLenVT GPR:$rs1), (XLenVT GPR:$rs2))),
+ (TH_MULA GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sub GPR:$rd, (mul (XLenVT GPR:$rs1), (XLenVT GPR:$rs2))),
+ (TH_MULS GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasVendorXTHeadMac]
+
+let Predicates = [HasVendorXTHeadMac, IsRV64] in {
+// mulaw, mulsw are available only in RV64.
+def : Pat<(binop_allwusers<add> GPR:$rd, (mul GPR:$rs1, GPR:$rs2)),
+ (TH_MULAW GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+def : Pat<(binop_allwusers<sub> GPR:$rd, (mul GPR:$rs1, GPR:$rs2)),
+ (TH_MULSW GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+// mulah, mulsh produce a sign-extended result.
+def : Pat<(binop_allwusers<add> GPR:$rd, (mul
+ (sexti16 (i64 GPR:$rs1)),
+ (sexti16 (i64 GPR:$rs2)))),
+ (TH_MULAH GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+def : Pat<(binop_allwusers<sub> GPR:$rd, (mul
+ (sexti16 (i64 GPR:$rs1)),
+ (sexti16 (i64 GPR:$rs2)))),
+ (TH_MULSH GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasVendorXTHeadMac, IsRV64]
+
+let Predicates = [HasVendorXTHeadMac, IsRV32] in {
+def : Pat<(i32 (add GPR:$rd, (mul (sexti16 (i32 GPR:$rs1)),
+ (sexti16 (i32 GPR:$rs2))))),
+ (TH_MULAH GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+def : Pat<(i32 (sub GPR:$rd, (mul (sexti16 (i32 GPR:$rs1)),
+ (sexti16 (i32 GPR:$rs2))))),
+ (TH_MULSH GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasVendorXTHeadMac, IsRV32]
+
defm PseudoTHVdotVMAQA : VPseudoVMAQA_VV_VX;
defm PseudoTHVdotVMAQAU : VPseudoVMAQA_VV_VX;
defm PseudoTHVdotVMAQASU : VPseudoVMAQA_VV_VX;
@@ -145,3 +692,189 @@ defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqau", "PseudoTHVdotVMAQAU", AllQu
defm : VPatTernaryVMAQA_VV_VX<"int_riscv_th_vmaqasu","PseudoTHVdotVMAQASU",AllQuadWidenableInt8NoVLMulVectors>;
defm : VPatTernaryVMAQA_VX<"int_riscv_th_vmaqaus", "PseudoTHVdotVMAQAUS",AllQuadWidenableInt8NoVLMulVectors>;
}
+
+def uimm2_3_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 0x3,
+ SDLoc(N), Subtarget->getXLenVT());
+}]>;
+
+def uimm2_3 : Operand<XLenVT>, ImmLeaf<XLenVT, [{
+ return isShiftedUInt<2, 3>(Imm);
+}], uimm2_3_XFORM>;
+
+def uimm2_4_XFORM : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((N->getZExtValue() >> 4) & 0x3,
+ SDLoc(N), Subtarget->getXLenVT());
+}]>;
+
+def uimm2_4 : Operand<XLenVT>, ImmLeaf<XLenVT, [{
+ return isShiftedUInt<2, 4>(Imm);
+}], uimm2_4_XFORM>;
+
+let Predicates = [HasVendorXTHeadMemPair, IsRV64] in {
+def : Pat<(th_lwud i64:$rs1, uimm2_3:$uimm2_3), (TH_LWUD i64:$rs1, uimm2_3:$uimm2_3, 3)>;
+def : Pat<(th_ldd i64:$rs1, uimm2_4:$uimm2_4), (TH_LDD i64:$rs1, uimm2_4:$uimm2_4, 4)>;
+
+def : Pat<(th_sdd i64:$rd1, i64:$rd2, i64:$rs1, uimm2_4:$uimm2_4),
+ (TH_SDD i64:$rd1, i64:$rd2, i64:$rs1, uimm2_4:$uimm2_4, 4)>;
+}
+
+let Predicates = [HasVendorXTHeadMemPair] in {
+ def : Pat<(th_lwd GPR:$rs1, uimm2_3:$uimm2_3), (TH_LWD GPR:$rs1, uimm2_3:$uimm2_3, 3)>;
+ def : Pat<(th_swd GPR:$rd1, GPR:$rd2, GPR:$rs1, uimm2_3:$uimm2_3),
+ (TH_SWD GPR:$rd1, GPR:$rd2, GPR:$rs1, uimm2_3:$uimm2_3, 3)>;
+}
+
+let Predicates = [HasVendorXTHeadCmo], DecoderNamespace = "THeadCmo" in {
+def TH_DCACHE_CSW : THCacheInst_r<0b00001, "th.dcache.csw">;
+def TH_DCACHE_ISW : THCacheInst_r<0b00010, "th.dcache.isw">;
+def TH_DCACHE_CISW : THCacheInst_r<0b00011, "th.dcache.cisw">;
+def TH_DCACHE_CVAL1 : THCacheInst_r<0b00100, "th.dcache.cval1">;
+def TH_DCACHE_CVA : THCacheInst_r<0b00101, "th.dcache.cva">;
+def TH_DCACHE_IVA : THCacheInst_r<0b00110, "th.dcache.iva">;
+def TH_DCACHE_CIVA : THCacheInst_r<0b00111, "th.dcache.civa">;
+def TH_DCACHE_CPAL1 : THCacheInst_r<0b01000, "th.dcache.cpal1">;
+def TH_DCACHE_CPA : THCacheInst_r<0b01001, "th.dcache.cpa">;
+def TH_DCACHE_IPA : THCacheInst_r<0b01010, "th.dcache.ipa">;
+def TH_DCACHE_CIPA : THCacheInst_r<0b01011, "th.dcache.cipa">;
+def TH_ICACHE_IVA : THCacheInst_r<0b10000, "th.icache.iva">;
+def TH_ICACHE_IPA : THCacheInst_r<0b11000, "th.icache.ipa">;
+
+def TH_DCACHE_CALL : THCacheInst_void<0b00001, "th.dcache.call">;
+def TH_DCACHE_IALL : THCacheInst_void<0b00010, "th.dcache.iall">;
+def TH_DCACHE_CIALL : THCacheInst_void<0b00011, "th.dcache.ciall">;
+def TH_ICACHE_IALL : THCacheInst_void<0b10000, "th.icache.iall">;
+def TH_ICACHE_IALLS : THCacheInst_void<0b10001, "th.icache.ialls">;
+def TH_L2CACHE_CALL : THCacheInst_void<0b10101, "th.l2cache.call">;
+def TH_L2CACHE_IALL : THCacheInst_void<0b10110, "th.l2cache.iall">;
+def TH_L2CACHE_CIALL : THCacheInst_void<0b10111, "th.l2cache.ciall">;
+}
+
+let Predicates = [HasVendorXTHeadSync], DecoderNamespace = "THeadSync" in {
+def TH_SFENCE_VMAS : THCacheInst_rr<0b0000010, "th.sfence.vmas">;
+def TH_SYNC : THCacheInst_void<0b11000, "th.sync">;
+def TH_SYNC_S : THCacheInst_void<0b11001, "th.sync.s">;
+def TH_SYNC_I : THCacheInst_void<0b11010, "th.sync.i">;
+def TH_SYNC_IS : THCacheInst_void<0b11011, "th.sync.is">;
+}
+
+def AddrRegRegScale : ComplexPattern<iPTR, 3, "SelectAddrRegRegScale<3>">;
+def AddrRegZextRegScale
+ : ComplexPattern<i64, 3, "SelectAddrRegZextRegScale<3, 32>",
+ [], [], 10>;
+
+multiclass LdIdxPat<PatFrag LoadOp, RVInst Inst, ValueType vt = XLenVT> {
+def : Pat<(vt (LoadOp (AddrRegRegScale (XLenVT GPR:$rs1), (XLenVT GPR:$rs2), uimm2:$uimm2))),
+ (Inst GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>;
+}
+
+multiclass LdZextIdxPat<PatFrag LoadOp, RVInst Inst, ValueType vt = i64> {
+def : Pat<(vt (LoadOp (AddrRegZextRegScale (i64 GPR:$rs1), (i64 GPR:$rs2), uimm2:$uimm2))),
+ (Inst GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>;
+}
+
+multiclass StIdxPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
+ ValueType vt = XLenVT> {
+def : Pat<(StoreOp (vt StTy:$rd),
+ (AddrRegRegScale (XLenVT GPR:$rs1), (XLenVT GPR:$rs2), uimm2:$uimm2)),
+ (Inst StTy:$rd, GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>;
+}
+
+multiclass StZextIdxPat<PatFrag StoreOp, RVInst Inst, RegisterClass StTy,
+ ValueType vt = i64> {
+def : Pat<(StoreOp (vt StTy:$rd),
+ (AddrRegZextRegScale (i64 GPR:$rs1), (i64 GPR:$rs2), uimm2:$uimm2)),
+ (Inst StTy:$rd, GPR:$rs1, GPR:$rs2, uimm2:$uimm2)>;
+}
+
+let Predicates = [HasVendorXTHeadMemIdx] in {
+defm : LdIdxPat<extloadi8, TH_LRB>;
+defm : LdIdxPat<sextloadi8, TH_LRB>;
+defm : LdIdxPat<zextloadi8, TH_LRBU>;
+
+defm : LdIdxPat<extloadi16, TH_LRH>;
+defm : LdIdxPat<sextloadi16, TH_LRH>;
+defm : LdIdxPat<zextloadi16, TH_LRHU>;
+
+defm : StIdxPat<truncstorei8, TH_SRB, GPR>;
+defm : StIdxPat<truncstorei16, TH_SRH, GPR>;
+}
+
+let Predicates = [HasVendorXTHeadMemIdx, IsRV32] in {
+defm : LdIdxPat<load, TH_LRW, i32>;
+defm : StIdxPat<store, TH_SRW, GPR, i32>;
+}
+
+let Predicates = [HasVendorXTHeadMemIdx, IsRV64] in {
+defm : LdZextIdxPat<extloadi8, TH_LURB>;
+defm : LdZextIdxPat<sextloadi8, TH_LURB>;
+defm : LdZextIdxPat<zextloadi8, TH_LURBU>;
+
+defm : LdZextIdxPat<extloadi16, TH_LURH>;
+defm : LdZextIdxPat<sextloadi16, TH_LURH>;
+defm : LdZextIdxPat<zextloadi16, TH_LURHU>;
+
+defm : LdIdxPat<extloadi32, TH_LRW, i64>;
+defm : LdIdxPat<sextloadi32, TH_LRW, i64>;
+defm : LdIdxPat<zextloadi32, TH_LRWU, i64>;
+
+defm : LdZextIdxPat<extloadi32, TH_LURW>;
+defm : LdZextIdxPat<sextloadi32, TH_LURW>;
+defm : LdZextIdxPat<zextloadi32, TH_LURWU>;
+
+defm : LdIdxPat<load, TH_LRD, i64>;
+defm : LdZextIdxPat<load, TH_LURD>;
+
+defm : StZextIdxPat<truncstorei8, TH_SURB, GPR>;
+defm : StZextIdxPat<truncstorei16, TH_SURH, GPR>;
+defm : StIdxPat<truncstorei32, TH_SRW, GPR, i64>;
+defm : StZextIdxPat<truncstorei32, TH_SURW, GPR, i64>;
+defm : StIdxPat<store, TH_SRD, GPR, i64>;
+defm : StZextIdxPat<store, TH_SURD, GPR>;
+}
+
+let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtF] in {
+defm : LdIdxPat<load, TH_FLRW, f32>;
+defm : StIdxPat<store, TH_FSRW, FPR32, f32>;
+}
+
+let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtD] in {
+defm : LdIdxPat<load, TH_FLRD, f64>;
+defm : StIdxPat<store, TH_FSRD, FPR64, f64>;
+}
+
+let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtF, IsRV64] in {
+defm : LdZextIdxPat<load, TH_FLURW, f32>;
+defm : StZextIdxPat<store, TH_FSURW, FPR32, f32>;
+}
+
+let Predicates = [HasVendorXTHeadFMemIdx, HasStdExtD, IsRV64] in {
+defm : LdZextIdxPat<load, TH_FLURD, f64>;
+defm : StZextIdxPat<store, TH_FSURD, FPR64, f64>;
+}
+
+def simm5shl2 : ComplexPattern<XLenVT, 2, "selectSimm5Shl2">;
+
+multiclass StoreUpdatePat<PatFrag st, Instruction Inst, ValueType vt = XLenVT> {
+def : Pat<(st (vt GPR:$rd), GPR:$rs1, (simm5shl2 simm5:$simm5, uimm2:$uimm2)),
+ (Inst GPR:$rd, GPR:$rs1, simm5:$simm5, uimm2:$uimm2)>;
+}
+
+let Predicates = [HasVendorXTHeadMemIdx] in {
+defm : StoreUpdatePat<post_truncsti8, TH_SBIA>;
+defm : StoreUpdatePat<pre_truncsti8, TH_SBIB>;
+defm : StoreUpdatePat<post_truncsti16, TH_SHIA>;
+defm : StoreUpdatePat<pre_truncsti16, TH_SHIB>;
+}
+
+let Predicates = [HasVendorXTHeadMemIdx, IsRV32] in {
+defm : StoreUpdatePat<post_store, TH_SWIA, i32>;
+defm : StoreUpdatePat<pre_store, TH_SWIB, i32>;
+}
+
+let Predicates = [HasVendorXTHeadMemIdx, IsRV64] in {
+defm : StoreUpdatePat<post_truncsti32, TH_SWIA, i64>;
+defm : StoreUpdatePat<pre_truncsti32, TH_SWIB, i64>;
+defm : StoreUpdatePat<post_store, TH_SDIA, i64>;
+defm : StoreUpdatePat<pre_store, TH_SDIB, i64>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td
index 68c3a2105373..f6b0feaf7628 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXVentana.td
@@ -29,82 +29,8 @@ def VT_MASKCN : VTMaskedMove<0b111, "vt.maskcn">,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
let Predicates = [IsRV64, HasVendorXVentanaCondOps] in {
-// Directly use MASKC/MASKCN in case of any of the operands being 0.
-def : Pat<(select GPR:$rc, GPR:$rs1, (i64 0)),
- (VT_MASKC $rs1, $rc)>;
-def : Pat<(select GPR:$rc, (i64 0), GPR:$rs1),
- (VT_MASKCN $rs1, $rc)>;
-
-def : Pat<(select (i64 (setne GPR:$rc, (i64 0))), GPR:$rs1, (i64 0)),
+def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, GPR:$rc)),
(VT_MASKC GPR:$rs1, GPR:$rc)>;
-def : Pat<(select (i64 (seteq GPR:$rc, (i64 0))), GPR:$rs1, (i64 0)),
- (VT_MASKCN GPR:$rs1, GPR:$rc)>;
-def : Pat<(select (i64 (setne GPR:$rc, (i64 0))), (i64 0), GPR:$rs1),
+def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, GPR:$rc)),
(VT_MASKCN GPR:$rs1, GPR:$rc)>;
-def : Pat<(select (i64 (seteq GPR:$rc, (i64 0))), (i64 0), GPR:$rs1),
- (VT_MASKC GPR:$rs1, GPR:$rc)>;
-
-def : Pat<(select (i64 (setne GPR:$x, simm12_plus1:$y)), GPR:$rs1, (i64 0)),
- (VT_MASKC GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)))>;
-def : Pat<(select (i64 (seteq GPR:$x, simm12_plus1:$y)), GPR:$rs1, (i64 0)),
- (VT_MASKCN GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)))>;
-def : Pat<(select (i64 (setne GPR:$x, simm12_plus1:$y)), (i64 0), GPR:$rs1),
- (VT_MASKCN GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)))>;
-def : Pat<(select (i64 (seteq GPR:$x, simm12_plus1:$y)), (i64 0), GPR:$rs1),
- (VT_MASKC GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)))>;
-
-def : Pat<(select (i64 (setne GPR:$x, (i64 -2048))), GPR:$rs1, (i64 0)),
- (VT_MASKC GPR:$rs1, (XORI GPR:$x, -2048))>;
-def : Pat<(select (i64 (seteq GPR:$x, (i64 -2048))), GPR:$rs1, (i64 0)),
- (VT_MASKCN GPR:$rs1, (XORI GPR:$x, -2048))>;
-def : Pat<(select (i64 (setne GPR:$x, (i64 -2048))), (i64 0), GPR:$rs1),
- (VT_MASKCN GPR:$rs1, (XORI GPR:$x, -2048))>;
-def : Pat<(select (i64 (seteq GPR:$x, (i64 -2048))), (i64 0), GPR:$rs1),
- (VT_MASKC GPR:$rs1, (XORI GPR:$x, -2048))>;
-
-def : Pat<(select (i64 (setne GPR:$x, GPR:$y)), GPR:$rs1, (i64 0)),
- (VT_MASKC GPR:$rs1, (XOR GPR:$x, GPR:$y))>;
-def : Pat<(select (i64 (seteq GPR:$x, GPR:$y)), GPR:$rs1, (i64 0)),
- (VT_MASKCN GPR:$rs1, (XOR GPR:$x, GPR:$y))>;
-def : Pat<(select (i64 (setne GPR:$x, GPR:$y)), (i64 0), GPR:$rs1),
- (VT_MASKCN GPR:$rs1, (XOR GPR:$x, GPR:$y))>;
-def : Pat<(select (i64 (seteq GPR:$x, GPR:$y)), (i64 0), GPR:$rs1),
- (VT_MASKC GPR:$rs1, (XOR GPR:$x, GPR:$y))>;
-
-// Conditional AND operation patterns.
-def : Pat<(i64 (select GPR:$rc, (and GPR:$rs1, GPR:$rs2), GPR:$rs1)),
- (OR (AND $rs1, $rs2), (VT_MASKCN $rs1, $rc))>;
-def : Pat<(i64 (select GPR:$rc, GPR:$rs1, (and GPR:$rs1, GPR:$rs2))),
- (OR (AND $rs1, $rs2), (VT_MASKC $rs1, $rc))>;
-
-// Basic select pattern that selects between 2 registers.
-def : Pat<(i64 (select GPR:$rc, GPR:$rs1, GPR:$rs2)),
- (OR (VT_MASKC $rs1, $rc), (VT_MASKCN $rs2, $rc))>;
-
-def : Pat<(i64 (select (i64 (setne GPR:$rc, (i64 0))), GPR:$rs1, GPR:$rs2)),
- (OR (VT_MASKC GPR:$rs1, GPR:$rc), (VT_MASKCN GPR:$rs2, GPR:$rc))>;
-def : Pat<(i64 (select (i64 (seteq GPR:$rc, (i64 0))), GPR:$rs2, GPR:$rs1)),
- (OR (VT_MASKC GPR:$rs1, GPR:$rc), (VT_MASKCN GPR:$rs2, GPR:$rc))>;
-
-def : Pat<(i64 (select (i64 (setne GPR:$x, simm12_plus1:$y)), GPR:$rs1, GPR:$rs2)),
- (OR (VT_MASKC GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y))),
- (VT_MASKCN GPR:$rs2, (ADDI GPR:$x, (NegImm simm12_plus1:$y))))>;
-def : Pat<(i64 (select (i64 (seteq GPR:$x, simm12_plus1:$y)), GPR:$rs2, GPR:$rs1)),
- (OR (VT_MASKC GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y))),
- (VT_MASKCN GPR:$rs2, (ADDI GPR:$x, (NegImm simm12_plus1:$y))))>;
-
-def : Pat<(i64 (select (i64 (setne GPR:$x, (i64 -2048))), GPR:$rs1, GPR:$rs2)),
- (OR (VT_MASKC GPR:$rs1, (XORI GPR:$x, -2048)),
- (VT_MASKCN GPR:$rs2, (XORI GPR:$x, -2048)))>;
-def : Pat<(i64 (select (i64 (seteq GPR:$x, (i64 -2048))), GPR:$rs2, GPR:$rs1)),
- (OR (VT_MASKC GPR:$rs1, (XORI GPR:$x, -2048)),
- (VT_MASKCN GPR:$rs2, (XORI GPR:$x, -2048)))>;
-
-def : Pat<(i64 (select (i64 (setne GPR:$x, GPR:$y)), GPR:$rs1, GPR:$rs2)),
- (OR (VT_MASKC GPR:$rs1, (XOR GPR:$x, GPR:$y)),
- (VT_MASKCN GPR:$rs2, (XOR GPR:$x, GPR:$y)))>;
-def : Pat<(i64 (select (i64 (seteq GPR:$x, GPR:$y)), GPR:$rs2, GPR:$rs1)),
- (OR (VT_MASKC GPR:$rs1, (XOR GPR:$x, GPR:$y)),
- (VT_MASKCN GPR:$rs2, (XOR GPR:$x, GPR:$y)))>;
-
} // Predicates = [IsRV64, HasVendorXVentanaCondOps]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 1a0ad8098b43..caeedfa652e4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -38,6 +38,9 @@ def riscv_orc_b : SDNode<"RISCVISD::ORC_B", SDTIntUnaryOp>;
def riscv_zip : SDNode<"RISCVISD::ZIP", SDTIntUnaryOp>;
def riscv_unzip : SDNode<"RISCVISD::UNZIP", SDTIntUnaryOp>;
def riscv_absw : SDNode<"RISCVISD::ABSW", SDTIntUnaryOp>;
+def riscv_clmul : SDNode<"RISCVISD::CLMUL", SDTIntBinOp>;
+def riscv_clmulh : SDNode<"RISCVISD::CLMULH", SDTIntBinOp>;
+def riscv_clmulr : SDNode<"RISCVISD::CLMULR", SDTIntBinOp>;
def UImmLog2XLenHalfAsmOperand : AsmOperandClass {
let Name = "UImmLog2XLenHalf";
@@ -66,13 +69,13 @@ def shfl_uimm : Operand<XLenVT>, ImmLeaf<XLenVT, [{
def BCLRXForm : SDNodeXForm<imm, [{
// Find the lowest 0.
- return CurDAG->getTargetConstant(countTrailingOnes(N->getZExtValue()),
+ return CurDAG->getTargetConstant(llvm::countr_one(N->getZExtValue()),
SDLoc(N), N->getValueType(0));
}]>;
def SingleBitSetMaskToIndex : SDNodeXForm<imm, [{
// Find the lowest 1.
- return CurDAG->getTargetConstant(countTrailingZeros(N->getZExtValue()),
+ return CurDAG->getTargetConstant(llvm::countr_zero(N->getZExtValue()),
SDLoc(N), N->getValueType(0));
}]>;
@@ -104,7 +107,7 @@ def BSETINVTwoBitsMask : PatLeaf<(imm), [{
def BSETINVTwoBitsMaskHigh : SDNodeXForm<imm, [{
uint64_t I = N->getZExtValue();
- return CurDAG->getTargetConstant(63 - countLeadingZeros(I), SDLoc(N),
+ return CurDAG->getTargetConstant(llvm::Log2_64(I), SDLoc(N),
N->getValueType(0));
}]>;
@@ -138,7 +141,7 @@ def BCLRITwoBitsMask : PatLeaf<(imm), [{
}]>;
def BCLRITwoBitsMaskLow : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(countTrailingZeros(~N->getZExtValue()),
+ return CurDAG->getTargetConstant(llvm::countr_zero(~N->getZExtValue()),
SDLoc(N), N->getValueType(0));
}]>;
@@ -146,7 +149,7 @@ def BCLRITwoBitsMaskHigh : SDNodeXForm<imm, [{
uint64_t I = N->getSExtValue();
if (!Subtarget->is64Bit())
I |= 0xffffffffull << 32;
- return CurDAG->getTargetConstant(63 - countLeadingZeros(~I), SDLoc(N),
+ return CurDAG->getTargetConstant(llvm::Log2_64(~I), SDLoc(N),
N->getValueType(0));
}]>;
@@ -170,44 +173,38 @@ def BCLRIANDIMaskLow : SDNodeXForm<imm, [{
def C3LeftShift : PatLeaf<(imm), [{
uint64_t C = N->getZExtValue();
- return C > 3 && ((C % 3) == 0) && isPowerOf2_64(C / 3);
+ return C > 3 && (C >> llvm::countr_zero(C)) == 3;
}]>;
def C5LeftShift : PatLeaf<(imm), [{
uint64_t C = N->getZExtValue();
- return C > 5 && ((C % 5) == 0) && isPowerOf2_64(C / 5);
+ return C > 5 && (C >> llvm::countr_zero(C)) == 5;
}]>;
def C9LeftShift : PatLeaf<(imm), [{
uint64_t C = N->getZExtValue();
- return C > 9 && ((C % 9) == 0) && isPowerOf2_64(C / 9);
+ return C > 5 && (C >> llvm::countr_zero(C)) == 9;
}]>;
// Constant of the form (3 << C) where C is less than 32.
def C3LeftShiftUW : PatLeaf<(imm), [{
uint64_t C = N->getZExtValue();
- if (C <= 3 || (C % 3) != 0)
- return false;
- C /= 3;
- return isPowerOf2_64(C) && C < (1ULL << 32);
+ unsigned Shift = llvm::countr_zero(C);
+ return 1 <= Shift && Shift < 32 && (C >> Shift) == 3;
}]>;
// Constant of the form (5 << C) where C is less than 32.
def C5LeftShiftUW : PatLeaf<(imm), [{
uint64_t C = N->getZExtValue();
- if (C <= 5 || (C % 5) != 0)
- return false;
- C /= 5;
- return isPowerOf2_64(C) && C < (1ULL << 32);
+ unsigned Shift = llvm::countr_zero(C);
+ return 1 <= Shift && Shift < 32 && (C >> Shift) == 5;
}]>;
// Constant of the form (9 << C) where C is less than 32.
def C9LeftShiftUW : PatLeaf<(imm), [{
uint64_t C = N->getZExtValue();
- if (C <= 9 || (C % 9) != 0)
- return false;
- C /= 9;
- return isPowerOf2_64(C) && C < (1ULL << 32);
+ unsigned Shift = llvm::countr_zero(C);
+ return 1 <= Shift && Shift < 32 && (C >> Shift) == 9;
}]>;
def CSImm12MulBy4 : PatLeaf<(imm), [{
@@ -248,7 +245,7 @@ def Shifted32OnesMask : PatLeaf<(imm), [{
if (!isShiftedMask_64(Imm))
return false;
- unsigned TrailingZeros = countTrailingZeros(Imm);
+ unsigned TrailingZeros = llvm::countr_zero(Imm);
return TrailingZeros > 0 && TrailingZeros < 32 &&
Imm == UINT64_C(0xFFFFFFFF) << TrailingZeros;
}], TrailingZeros>;
@@ -290,20 +287,6 @@ class RVBShiftW_ri<bits<7> imm11_5, bits<3> funct3, RISCVOpcode opcode,
(ins GPR:$rs1, uimm5:$shamt), opcodestr,
"$rd, $rs1, $shamt">;
-// Using RVInstIShiftW since it allocates 5 bits instead of 6 to shamt.
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVBShfl_ri<bits<7> imm11_5, bits<3> funct3, RISCVOpcode opcode,
- string opcodestr>
- : RVInstIShiftW<imm11_5, funct3, opcode, (outs GPR:$rd),
- (ins GPR:$rs1, shfl_uimm:$shamt), opcodestr,
- "$rd, $rs1, $shamt">;
-
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-class RVBTernaryR<bits<2> funct2, bits<3> funct3, RISCVOpcode opcode,
- string opcodestr, string argstr>
- : RVInstR4<funct2, funct3, opcode, (outs GPR:$rd),
- (ins GPR:$rs1, GPR:$rs2, GPR:$rs3), opcodestr, argstr>;
-
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -368,7 +351,7 @@ def BINV : ALU_rr<0b0110100, 0b001, "binv">,
Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>;
let IsSignExtendingOpW = 1 in
def BEXT : ALU_rr<0b0100100, 0b101, "bext">,
- Sched<[WriteSingleBit, ReadSingleBit, ReadSingleBit]>;
+ Sched<[WriteBEXT, ReadSingleBit, ReadSingleBit]>;
def BCLRI : RVBShift_ri<0b01001, 0b001, OPC_OP_IMM, "bclri">,
Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
@@ -378,7 +361,7 @@ def BINVI : RVBShift_ri<0b01101, 0b001, OPC_OP_IMM, "binvi">,
Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
let IsSignExtendingOpW = 1 in
def BEXTI : RVBShift_ri<0b01001, 0b101, OPC_OP_IMM, "bexti">,
- Sched<[WriteSingleBitImm, ReadSingleBitImm]>;
+ Sched<[WriteBEXTI, ReadSingleBitImm]>;
} // Predicates = [HasStdExtZbs]
// These instructions were named xperm.n and xperm.b in the last version of
@@ -417,25 +400,25 @@ def SEXT_H : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">,
} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbc] in {
-def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr", /*Commutable*/1>,
+def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr", Commutable=1>,
Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
} // Predicates = [HasStdExtZbc]
let Predicates = [HasStdExtZbcOrZbkc] in {
-def CLMUL : ALU_rr<0b0000101, 0b001, "clmul", /*Commutable*/1>,
+def CLMUL : ALU_rr<0b0000101, 0b001, "clmul", Commutable=1>,
Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
-def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh", /*Commutable*/1>,
+def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh", Commutable=1>,
Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
} // Predicates = [HasStdExtZbcOrZbkc]
let Predicates = [HasStdExtZbb] in {
-def MIN : ALU_rr<0b0000101, 0b100, "min", /*Commutable*/1>,
+def MIN : ALU_rr<0b0000101, 0b100, "min", Commutable=1>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def MINU : ALU_rr<0b0000101, 0b101, "minu", /*Commutable*/1>,
+def MINU : ALU_rr<0b0000101, 0b101, "minu", Commutable=1>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def MAX : ALU_rr<0b0000101, 0b110, "max", /*Commutable*/1>,
+def MAX : ALU_rr<0b0000101, 0b110, "max", Commutable=1>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-def MAXU : ALU_rr<0b0000101, 0b111, "maxu", /*Commutable*/1>,
+def MAXU : ALU_rr<0b0000101, 0b111, "maxu", Commutable=1>,
Sched<[WriteIALU, ReadIALU, ReadIALU]>;
} // Predicates = [HasStdExtZbb]
@@ -522,9 +505,9 @@ def : InstAlias<"bext $rd, $rs1, $shamt",
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtZbbOrZbkb] in {
-def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
-def : Pat<(or GPR:$rs1, (not GPR:$rs2)), (ORN GPR:$rs1, GPR:$rs2)>;
-def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
+def : Pat<(XLenVT (and GPR:$rs1, (not GPR:$rs2))), (ANDN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(XLenVT (or GPR:$rs1, (not GPR:$rs2))), (ORN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(XLenVT (xor GPR:$rs1, (not GPR:$rs2))), (XNOR GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZbbOrZbkb]
let Predicates = [HasStdExtZbbOrZbkb] in {
@@ -534,7 +517,7 @@ def : PatGprGpr<shiftop<rotr>, ROR>;
def : PatGprImm<rotr, RORI, uimmlog2xlen>;
// There's no encoding for roli in the the 'B' extension as it can be
// implemented with rori by negating the immediate.
-def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
+def : Pat<(XLenVT (rotl GPR:$rs1, uimmlog2xlen:$shamt)),
(RORI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
} // Predicates = [HasStdExtZbbOrZbkb]
@@ -547,64 +530,63 @@ def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
let Predicates = [HasStdExtZbs] in {
-def : Pat<(and (not (shiftop<shl> 1, GPR:$rs2)), GPR:$rs1),
+def : Pat<(XLenVT (and (not (shiftop<shl> 1, (XLenVT GPR:$rs2))), GPR:$rs1)),
(BCLR GPR:$rs1, GPR:$rs2)>;
-def : Pat<(and (rotl -2, GPR:$rs2), GPR:$rs1), (BCLR GPR:$rs1, GPR:$rs2)>;
-def : Pat<(or (shiftop<shl> 1, GPR:$rs2), GPR:$rs1),
+def : Pat<(XLenVT (and (rotl -2, (XLenVT GPR:$rs2)), GPR:$rs1)),
+ (BCLR GPR:$rs1, GPR:$rs2)>;
+def : Pat<(XLenVT (or (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)),
(BSET GPR:$rs1, GPR:$rs2)>;
-def : Pat<(xor (shiftop<shl> 1, GPR:$rs2), GPR:$rs1),
+def : Pat<(XLenVT (xor (shiftop<shl> 1, (XLenVT GPR:$rs2)), GPR:$rs1)),
(BINV GPR:$rs1, GPR:$rs2)>;
-def : Pat<(and (shiftop<srl> GPR:$rs1, GPR:$rs2), 1),
+def : Pat<(XLenVT (and (shiftop<srl> GPR:$rs1, (XLenVT GPR:$rs2)), 1)),
(BEXT GPR:$rs1, GPR:$rs2)>;
-def : Pat<(shiftop<shl> 1, GPR:$rs2),
- (BSET X0, GPR:$rs2)>;
+def : Pat<(XLenVT (shiftop<shl> 1, (XLenVT GPR:$rs2))),
+ (BSET (XLenVT X0), GPR:$rs2)>;
-def : Pat<(and GPR:$rs1, BCLRMask:$mask),
+def : Pat<(XLenVT (and GPR:$rs1, BCLRMask:$mask)),
(BCLRI GPR:$rs1, BCLRMask:$mask)>;
-def : Pat<(or GPR:$rs1, SingleBitSetMask:$mask),
+def : Pat<(XLenVT (or GPR:$rs1, SingleBitSetMask:$mask)),
(BSETI GPR:$rs1, SingleBitSetMask:$mask)>;
-def : Pat<(xor GPR:$rs1, SingleBitSetMask:$mask),
+def : Pat<(XLenVT (xor GPR:$rs1, SingleBitSetMask:$mask)),
(BINVI GPR:$rs1, SingleBitSetMask:$mask)>;
-def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)),
+def : Pat<(XLenVT (and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1))),
(BEXTI GPR:$rs1, uimmlog2xlen:$shamt)>;
-def : Pat<(seteq (and GPR:$rs1, SingleBitSetMask:$mask), 0),
+def : Pat<(XLenVT (seteq (XLenVT (and GPR:$rs1, SingleBitSetMask:$mask)), 0)),
(BEXTI (XORI GPR:$rs1, -1), SingleBitSetMask:$mask)>;
-def : Pat<(or GPR:$r, BSETINVTwoBitsMask:$i),
+def : Pat<(XLenVT (or GPR:$r, BSETINVTwoBitsMask:$i)),
(BSETI (BSETI GPR:$r, (TrailingZeros BSETINVTwoBitsMask:$i)),
(BSETINVTwoBitsMaskHigh BSETINVTwoBitsMask:$i))>;
-def : Pat<(xor GPR:$r, BSETINVTwoBitsMask:$i),
+def : Pat<(XLenVT (xor GPR:$r, BSETINVTwoBitsMask:$i)),
(BINVI (BINVI GPR:$r, (TrailingZeros BSETINVTwoBitsMask:$i)),
(BSETINVTwoBitsMaskHigh BSETINVTwoBitsMask:$i))>;
-def : Pat<(or GPR:$r, BSETINVORIMask:$i),
+def : Pat<(XLenVT (or GPR:$r, BSETINVORIMask:$i)),
(BSETI (ORI GPR:$r, (BSETINVORIMaskLow BSETINVORIMask:$i)),
(BSETINVTwoBitsMaskHigh BSETINVORIMask:$i))>;
-def : Pat<(xor GPR:$r, BSETINVORIMask:$i),
+def : Pat<(XLenVT (xor GPR:$r, BSETINVORIMask:$i)),
(BINVI (XORI GPR:$r, (BSETINVORIMaskLow BSETINVORIMask:$i)),
(BSETINVTwoBitsMaskHigh BSETINVORIMask:$i))>;
-def : Pat<(and GPR:$r, BCLRITwoBitsMask:$i),
+def : Pat<(XLenVT (and GPR:$r, BCLRITwoBitsMask:$i)),
(BCLRI (BCLRI GPR:$r, (BCLRITwoBitsMaskLow BCLRITwoBitsMask:$i)),
(BCLRITwoBitsMaskHigh BCLRITwoBitsMask:$i))>;
-def : Pat<(and GPR:$r, BCLRIANDIMask:$i),
+def : Pat<(XLenVT (and GPR:$r, BCLRIANDIMask:$i)),
(BCLRI (ANDI GPR:$r, (BCLRIANDIMaskLow BCLRIANDIMask:$i)),
(BCLRITwoBitsMaskHigh BCLRIANDIMask:$i))>;
} // Predicates = [HasStdExtZbs]
-let Predicates = [HasStdExtZbb] in {
-def : Pat<(riscv_orc_b GPR:$rs1), (ORC_B GPR:$rs1)>;
-} // Predicates = [HasStdExtZbb]
+let Predicates = [HasStdExtZbb] in
+def : PatGpr<riscv_orc_b, ORC_B>;
-let Predicates = [HasStdExtZbkb] in {
-def : Pat<(riscv_brev8 GPR:$rs1), (BREV8 GPR:$rs1)>;
-} // Predicates = [HasStdExtZbkb]
+let Predicates = [HasStdExtZbkb] in
+def : PatGpr<riscv_brev8, BREV8>;
let Predicates = [HasStdExtZbkb, IsRV32] in {
// We treat zip and unzip as separate instructions, so match it directly.
-def : Pat<(i32 (riscv_zip GPR:$rs1)), (ZIP_RV32 GPR:$rs1)>;
-def : Pat<(i32 (riscv_unzip GPR:$rs1)), (UNZIP_RV32 GPR:$rs1)>;
+def : PatGpr<riscv_zip, ZIP_RV32, i32>;
+def : PatGpr<riscv_unzip, UNZIP_RV32, i32>;
} // Predicates = [HasStdExtZbkb, IsRV32]
let Predicates = [HasStdExtZbb] in {
@@ -619,12 +601,12 @@ def : PatGpr<riscv_ctzw, CTZW>;
def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>;
def : Pat<(i64 (riscv_absw GPR:$rs1)),
- (MAX GPR:$rs1, (SUBW X0, GPR:$rs1))>;
+ (MAX GPR:$rs1, (SUBW (XLenVT X0), GPR:$rs1))>;
} // Predicates = [HasStdExtZbb, IsRV64]
let Predicates = [HasStdExtZbb] in {
-def : Pat<(sext_inreg GPR:$rs1, i8), (SEXT_B GPR:$rs1)>;
-def : Pat<(sext_inreg GPR:$rs1, i16), (SEXT_H GPR:$rs1)>;
+def : Pat<(XLenVT (sext_inreg GPR:$rs1, i8)), (SEXT_B GPR:$rs1)>;
+def : Pat<(XLenVT (sext_inreg GPR:$rs1, i16)), (SEXT_H GPR:$rs1)>;
} // Predicates = [HasStdExtZbb]
let Predicates = [HasStdExtZbb] in {
@@ -634,39 +616,37 @@ def : PatGprGpr<umin, MINU>;
def : PatGprGpr<umax, MAXU>;
} // Predicates = [HasStdExtZbb]
-let Predicates = [HasStdExtZbbOrZbkb, IsRV32] in {
-def : Pat<(i32 (bswap GPR:$rs1)), (REV8_RV32 GPR:$rs1)>;
-} // Predicates = [HasStdExtZbbOrZbkb, IsRV32]
+let Predicates = [HasStdExtZbbOrZbkb, IsRV32] in
+def : PatGpr<bswap, REV8_RV32, i32>;
-let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
-def : Pat<(i64 (bswap GPR:$rs1)), (REV8_RV64 GPR:$rs1)>;
-} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
+let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in
+def : PatGpr<bswap, REV8_RV64, i64>;
let Predicates = [HasStdExtZbkb] in {
def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFFFF),
- (zexti8 GPR:$rs1)),
+ (zexti8 (XLenVT GPR:$rs1))),
(PACKH GPR:$rs1, GPR:$rs2)>;
-def : Pat<(or (shl (zexti8 GPR:$rs2), (XLenVT 8)),
- (zexti8 GPR:$rs1)),
+def : Pat<(or (shl (zexti8 (XLenVT GPR:$rs2)), (XLenVT 8)),
+ (zexti8 (XLenVT GPR:$rs1))),
(PACKH GPR:$rs1, GPR:$rs2)>;
def : Pat<(and (or (shl GPR:$rs2, (XLenVT 8)),
- (zexti8 GPR:$rs1)), 0xFFFF),
+ (zexti8 (XLenVT GPR:$rs1))), 0xFFFF),
(PACKH GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZbkb]
let Predicates = [HasStdExtZbkb, IsRV32] in
-def : Pat<(i32 (or (zexti16 GPR:$rs1), (shl GPR:$rs2, (i32 16)))),
+def : Pat<(i32 (or (zexti16 (i32 GPR:$rs1)), (shl GPR:$rs2, (i32 16)))),
(PACK GPR:$rs1, GPR:$rs2)>;
let Predicates = [HasStdExtZbkb, IsRV64] in {
-def : Pat<(i64 (or (zexti32 GPR:$rs1), (shl GPR:$rs2, (i64 32)))),
+def : Pat<(i64 (or (zexti32 (i64 GPR:$rs1)), (shl GPR:$rs2, (i64 32)))),
(PACK GPR:$rs1, GPR:$rs2)>;
def : Pat<(binop_allwusers<or> (shl GPR:$rs2, (i64 16)),
- (zexti16 GPR:$rs1)),
+ (zexti16 (i64 GPR:$rs1))),
(PACKW GPR:$rs1, GPR:$rs2)>;
def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
- (zexti16 GPR:$rs1))),
+ (zexti16 (i64 GPR:$rs1)))),
(PACKW GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtZbkb, IsRV64]
@@ -710,20 +690,20 @@ def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 40)), GPR:$rs2),
def : Pat<(add (mul_oneuse GPR:$rs1, (XLenVT 72)), GPR:$rs2),
(SH3ADD (SH3ADD GPR:$rs1, GPR:$rs1), GPR:$rs2)>;
-def : Pat<(add GPR:$r, CSImm12MulBy4:$i),
- (SH2ADD (ADDI X0, (SimmShiftRightBy2XForm CSImm12MulBy4:$i)),
+def : Pat<(add (XLenVT GPR:$r), CSImm12MulBy4:$i),
+ (SH2ADD (ADDI (XLenVT X0), (SimmShiftRightBy2XForm CSImm12MulBy4:$i)),
GPR:$r)>;
-def : Pat<(add GPR:$r, CSImm12MulBy8:$i),
- (SH3ADD (ADDI X0, (SimmShiftRightBy3XForm CSImm12MulBy8:$i)),
+def : Pat<(add (XLenVT GPR:$r), CSImm12MulBy8:$i),
+ (SH3ADD (ADDI (XLenVT X0), (SimmShiftRightBy3XForm CSImm12MulBy8:$i)),
GPR:$r)>;
-def : Pat<(mul GPR:$r, C3LeftShift:$i),
+def : Pat<(mul (XLenVT GPR:$r), C3LeftShift:$i),
(SLLI (SH1ADD GPR:$r, GPR:$r),
(TrailingZeros C3LeftShift:$i))>;
-def : Pat<(mul GPR:$r, C5LeftShift:$i),
+def : Pat<(mul (XLenVT GPR:$r), C5LeftShift:$i),
(SLLI (SH2ADD GPR:$r, GPR:$r),
(TrailingZeros C5LeftShift:$i))>;
-def : Pat<(mul GPR:$r, C9LeftShift:$i),
+def : Pat<(mul (XLenVT GPR:$r), C9LeftShift:$i),
(SLLI (SH3ADD GPR:$r, GPR:$r),
(TrailingZeros C9LeftShift:$i))>;
@@ -762,7 +742,7 @@ def : Pat<(i64 (and GPR:$rs1, Shifted32OnesMask:$mask)),
def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFF), non_imm12:$rs2)),
(ADD_UW GPR:$rs1, GPR:$rs2)>;
-def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, X0)>;
+def : Pat<(i64 (and GPR:$rs, 0xFFFFFFFF)), (ADD_UW GPR:$rs, (XLenVT X0))>;
def : Pat<(i64 (add (shl (and GPR:$rs1, 0xFFFFFFFF), (i64 1)), non_imm12:$rs2)),
(SH1ADD_UW GPR:$rs1, GPR:$rs2)>;
@@ -779,11 +759,11 @@ def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2))
(SH3ADD_UW GPR:$rs1, GPR:$rs2)>;
// More complex cases use a ComplexPattern.
-def : Pat<(add sh1add_uw_op:$rs1, non_imm12:$rs2),
+def : Pat<(i64 (add sh1add_uw_op:$rs1, non_imm12:$rs2)),
(SH1ADD_UW sh1add_uw_op:$rs1, GPR:$rs2)>;
-def : Pat<(add sh2add_uw_op:$rs1, non_imm12:$rs2),
+def : Pat<(i64 (add sh2add_uw_op:$rs1, non_imm12:$rs2)),
(SH2ADD_UW sh2add_uw_op:$rs1, GPR:$rs2)>;
-def : Pat<(add sh3add_uw_op:$rs1, non_imm12:$rs2),
+def : Pat<(i64 (add sh3add_uw_op:$rs1, non_imm12:$rs2)),
(SH3ADD_UW sh3add_uw_op:$rs1, GPR:$rs2)>;
def : Pat<(i64 (add (and GPR:$rs1, 0xFFFFFFFE), non_imm12:$rs2)),
@@ -801,24 +781,24 @@ def : Pat<(i64 (add (and GPR:$rs1, 0x3FFFFFFFC), non_imm12:$rs2)),
def : Pat<(i64 (add (and GPR:$rs1, 0x7FFFFFFF8), non_imm12:$rs2)),
(SH3ADD_UW (SRLI GPR:$rs1, 3), GPR:$rs2)>;
-def : Pat<(mul (binop_oneuse<and> GPR:$r, 0xFFFFFFFF), C3LeftShiftUW:$i),
+def : Pat<(i64 (mul (and_oneuse GPR:$r, 0xFFFFFFFF), C3LeftShiftUW:$i)),
(SH1ADD (SLLI_UW GPR:$r, (TrailingZeros C3LeftShiftUW:$i)),
(SLLI_UW GPR:$r, (TrailingZeros C3LeftShiftUW:$i)))>;
-def : Pat<(mul (binop_oneuse<and> GPR:$r, 0xFFFFFFFF), C5LeftShiftUW:$i),
+def : Pat<(i64 (mul (and_oneuse GPR:$r, 0xFFFFFFFF), C5LeftShiftUW:$i)),
(SH2ADD (SLLI_UW GPR:$r, (TrailingZeros C5LeftShiftUW:$i)),
(SLLI_UW GPR:$r, (TrailingZeros C5LeftShiftUW:$i)))>;
-def : Pat<(mul (binop_oneuse<and> GPR:$r, 0xFFFFFFFF), C9LeftShiftUW:$i),
+def : Pat<(i64 (mul (and_oneuse GPR:$r, 0xFFFFFFFF), C9LeftShiftUW:$i)),
(SH3ADD (SLLI_UW GPR:$r, (TrailingZeros C9LeftShiftUW:$i)),
(SLLI_UW GPR:$r, (TrailingZeros C9LeftShiftUW:$i)))>;
} // Predicates = [HasStdExtZba, IsRV64]
let Predicates = [HasStdExtZbcOrZbkc] in {
-def : PatGprGpr<int_riscv_clmul, CLMUL>;
-def : PatGprGpr<int_riscv_clmulh, CLMULH>;
+def : PatGprGpr<riscv_clmul, CLMUL>;
+def : PatGprGpr<riscv_clmulh, CLMULH>;
} // Predicates = [HasStdExtZbcOrZbkc]
let Predicates = [HasStdExtZbc] in
-def : PatGprGpr<int_riscv_clmulr, CLMULR>;
+def : PatGprGpr<riscv_clmulr, CLMULR>;
let Predicates = [HasStdExtZbkx] in {
def : PatGprGpr<int_riscv_xperm4, XPERM4>;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
new file mode 100644
index 000000000000..6687343086da
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZc.td
@@ -0,0 +1,293 @@
+//===-- RISCVInstrInfoZc.td - RISC-V 'Zc*' instructions ----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file describes the RISC-V instructions from the 'Zc*' compressed
+/// instruction extensions, version 1.0.3.
+/// This version is still experimental as the 'Zc*' extensions haven't been
+/// ratified yet.
+///
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+def uimm2_lsb0 : Operand<XLenVT>,
+ ImmLeaf<XLenVT, [{return isShiftedUInt<1, 1>(Imm);}]> {
+ let ParserMatchClass = UImmAsmOperand<2, "Lsb0">;
+ let EncoderMethod = "getImmOpValue";
+ let DecoderMethod = "decodeUImmOperand<2>";
+ let OperandType = "OPERAND_UIMM2_LSB0";
+ let OperandNamespace = "RISCVOp";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<1, 1>(Imm);
+ }];
+}
+
+def uimm8ge32 : Operand<XLenVT> {
+ let ParserMatchClass = UImmAsmOperand<8, "GE32">;
+ let DecoderMethod = "decodeUImmOperand<8>";
+ let OperandType = "OPERAND_UIMM8_GE32";
+ let OperandNamespace = "RISCVOp";
+}
+
+def RlistAsmOperand : AsmOperandClass {
+ let Name = "Rlist";
+ let ParserMethod = "parseReglist";
+ let DiagnosticType = "InvalidRlist";
+}
+
+def SpimmAsmOperand : AsmOperandClass {
+ let Name = "Spimm";
+ let ParserMethod = "parseZcmpSpimm";
+ let DiagnosticType = "InvalidSpimm";
+}
+
+def rlist : Operand<OtherVT> {
+ let ParserMatchClass = RlistAsmOperand;
+ let PrintMethod = "printRlist";
+ let DecoderMethod = "decodeZcmpRlist";
+ let EncoderMethod = "getRlistOpValue";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ if (!isUInt<4>(Imm)) return false;
+ // 0~3 Reserved for EABI
+ return (Imm >= 4) && (Imm <= 15);
+ }];
+ }
+
+def spimm : Operand<OtherVT> {
+ let ParserMatchClass = SpimmAsmOperand;
+ let PrintMethod = "printSpimm";
+ let DecoderMethod = "decodeZcmpSpimm";
+ let MCOperandPredicate = [{
+ int64_t Imm;
+ if (!MCOp.evaluateAsConstantImm(Imm))
+ return false;
+ return isShiftedUInt<5, 4>(Imm);
+ }];
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction Class Templates
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+class CLoadB_ri<bits<6> funct6, string OpcodeStr>
+ : RVInst16CLB<funct6, 0b00, (outs GPRC:$rd),
+ (ins GPRCMem:$rs1, uimm2:$imm),
+ OpcodeStr, "$rd, ${imm}(${rs1})"> {
+ bits<2> imm;
+
+ let Inst{6-5} = imm{0,1};
+}
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+class CLoadH_ri<bits<6> funct6, bit funct1, string OpcodeStr>
+ : RVInst16CLH<funct6, funct1, 0b00, (outs GPRC:$rd),
+ (ins GPRCMem:$rs1, uimm2_lsb0:$imm),
+ OpcodeStr, "$rd, ${imm}(${rs1})"> {
+ bits<2> imm;
+
+ let Inst{5} = imm{1};
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+class CStoreB_rri<bits<6> funct6, string OpcodeStr>
+ : RVInst16CSB<funct6, 0b00, (outs),
+ (ins GPRC:$rs2, GPRCMem:$rs1, uimm2:$imm),
+ OpcodeStr, "$rs2, ${imm}(${rs1})"> {
+ bits<2> imm;
+
+ let Inst{6-5} = imm{0,1};
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+class CStoreH_rri<bits<6> funct6, bit funct1, string OpcodeStr>
+ : RVInst16CSH<funct6, funct1, 0b00, (outs),
+ (ins GPRC:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm),
+ OpcodeStr, "$rs2, ${imm}(${rs1})"> {
+ bits<2> imm;
+
+ let Inst{5} = imm{1};
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVZcArith_r<bits<5> funct5, string OpcodeStr> :
+ RVInst16CU<0b100111, funct5, 0b01, (outs GPRC:$rd_wb), (ins GPRC:$rd),
+ OpcodeStr, "$rd"> {
+ let Constraints = "$rd = $rd_wb";
+}
+
+class RVInstZcCPPP<bits<5> funct5, string opcodestr>
+ : RVInst16<(outs), (ins rlist:$rlist, spimm:$spimm),
+ opcodestr, "{$rlist}, $spimm", [], InstFormatOther> {
+ bits<4> rlist;
+ bits<16> spimm;
+
+ let Inst{1-0} = 0b10;
+ let Inst{3-2} = spimm{5-4};
+ let Inst{7-4} = rlist;
+ let Inst{12-8} = funct5;
+ let Inst{15-13} = 0b101;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZcb, HasStdExtZba, IsRV64] in
+def C_ZEXT_W : RVZcArith_r<0b11100 , "c.zext.w">,
+ Sched<[WriteIALU32, ReadIALU32, ReadIALU32]>;
+
+let Predicates = [HasStdExtZcb, HasStdExtZbb] in {
+def C_ZEXT_H : RVZcArith_r<0b11010 , "c.zext.h">,
+ Sched<[WriteIALU, ReadIALU]>;
+def C_SEXT_B : RVZcArith_r<0b11001 , "c.sext.b">,
+ Sched<[WriteIALU, ReadIALU]>;
+def C_SEXT_H : RVZcArith_r<0b11011 , "c.sext.h">,
+ Sched<[WriteIALU, ReadIALU]>;
+}
+
+let Predicates = [HasStdExtZcb] in
+def C_ZEXT_B : RVZcArith_r<0b11000 , "c.zext.b">,
+ Sched<[WriteIALU, ReadIALU]>;
+
+let Predicates = [HasStdExtZcb, HasStdExtMOrZmmul] in
+def C_MUL : CA_ALU<0b100111, 0b10, "c.mul", GPRC>,
+ Sched<[WriteIMul, ReadIMul, ReadIMul]>;
+
+let Predicates = [HasStdExtZcb] in {
+def C_NOT : RVZcArith_r<0b11101 , "c.not">,
+ Sched<[WriteIALU, ReadIALU]>;
+
+def C_LBU : CLoadB_ri<0b100000, "c.lbu">,
+ Sched<[WriteLDB, ReadMemBase]>;
+def C_LHU : CLoadH_ri<0b100001, 0b0, "c.lhu">,
+ Sched<[WriteLDH, ReadMemBase]>;
+def C_LH : CLoadH_ri<0b100001, 0b1, "c.lh">,
+ Sched<[WriteLDH, ReadMemBase]>;
+
+def C_SB : CStoreB_rri<0b100010, "c.sb">,
+ Sched<[WriteSTB, ReadStoreData, ReadMemBase]>;
+def C_SH : CStoreH_rri<0b100011, 0b0, "c.sh">,
+ Sched<[WriteSTH, ReadStoreData, ReadMemBase]>;
+}
+
+// Zcmp
+let DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp],
+ Defs = [X10, X11], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+def CM_MVA01S : RVInst16CA<0b101011, 0b11, 0b10, (outs),
+ (ins SR07:$rs1, SR07:$rs2), "cm.mva01s", "$rs1, $rs2">;
+
+def CM_MVSA01 : RVInst16CA<0b101011, 0b01, 0b10, (outs SR07:$rs1, SR07:$rs2),
+ (ins), "cm.mvsa01", "$rs1, $rs2">;
+} // DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp]...
+
+let DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp] in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
+def CM_PUSH : RVInstZcCPPP<0b11000, "cm.push">;
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+def CM_POPRET : RVInstZcCPPP<0b11110, "cm.popret">;
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+def CM_POPRETZ : RVInstZcCPPP<0b11100, "cm.popretz">;
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+def CM_POP : RVInstZcCPPP<0b11010, "cm.pop">;
+} // DecoderNamespace = "RVZcmp", Predicates = [HasStdExtZcmp]...
+
+let DecoderNamespace = "RVZcmt", Predicates = [HasStdExtZcmt],
+ hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+def CM_JT : RVInst16CJ<0b101, 0b10, (outs), (ins uimm5:$index),
+ "cm.jt", "$index">{
+ bits<5> index;
+
+ let Inst{12-7} = 0b000000;
+ let Inst{6-2} = index;
+}
+
+let Defs = [X1] in
+def CM_JALT : RVInst16CJ<0b101, 0b10, (outs), (ins uimm8ge32:$index),
+ "cm.jalt", "$index">{
+ bits<8> index;
+
+ let Inst{12-10} = 0b000;
+ let Inst{9-2} = index;
+}
+} // DecoderNamespace = "RVZcmt", Predicates = [HasStdExtZcmt]...
+
+
+let Predicates = [HasStdExtZcb, HasStdExtMOrZmmul] in{
+def : CompressPat<(MUL GPRC:$rs1, GPRC:$rs1, GPRC:$rs2),
+ (C_MUL GPRC:$rs1, GPRC:$rs2)>;
+let isCompressOnly = true in
+def : CompressPat<(MUL GPRC:$rs1, GPRC:$rs2, GPRC:$rs1),
+ (C_MUL GPRC:$rs1, GPRC:$rs2)>;
+} // Predicates = [HasStdExtZcb, HasStdExtMOrZmmul]
+
+let Predicates = [HasStdExtZcb, HasStdExtZbb] in{
+def : CompressPat<(SEXT_B GPRC:$rs1, GPRC:$rs1),
+ (C_SEXT_B GPRC:$rs1, GPRC:$rs1)>;
+def : CompressPat<(SEXT_H GPRC:$rs1, GPRC:$rs1),
+ (C_SEXT_H GPRC:$rs1, GPRC:$rs1)>;
+} // Predicates = [HasStdExtZcb, HasStdExtZbb]
+
+let Predicates = [HasStdExtZcb, HasStdExtZbb] in{
+def : CompressPat<(ZEXT_H_RV32 GPRC:$rs1, GPRC:$rs1),
+ (C_ZEXT_H GPRC:$rs1, GPRC:$rs1)>;
+def : CompressPat<(ZEXT_H_RV64 GPRC:$rs1, GPRC:$rs1),
+ (C_ZEXT_H GPRC:$rs1, GPRC:$rs1)>;
+} // Predicates = [HasStdExtZcb, HasStdExtZbb]
+
+let Predicates = [HasStdExtZcb] in{
+def : CompressPat<(ANDI GPRC:$rs1, GPRC:$rs1, 255),
+ (C_ZEXT_B GPRC:$rs1, GPRC:$rs1)>;
+} // Predicates = [HasStdExtZcb]
+
+let Predicates = [HasStdExtZcb, HasStdExtZba, IsRV64] in{
+def : CompressPat<(ADD_UW GPRC:$rs1, GPRC:$rs1, X0),
+ (C_ZEXT_W GPRC:$rs1, GPRC:$rs1)>;
+} // Predicates = [HasStdExtZcb, HasStdExtZba, IsRV64]
+
+let Predicates = [HasStdExtZcb] in{
+def : CompressPat<(XORI GPRC:$rs1, GPRC:$rs1, -1),
+ (C_NOT GPRC:$rs1, GPRC:$rs1)>;
+}
+
+let Predicates = [HasStdExtZcb] in{
+def : CompressPat<(LBU GPRC:$rd, GPRCMem:$rs1, uimm2:$imm),
+ (C_LBU GPRC:$rd, GPRCMem:$rs1, uimm2:$imm)>;
+def : CompressPat<(LHU GPRC:$rd, GPRCMem:$rs1, uimm2_lsb0:$imm),
+ (C_LHU GPRC:$rd, GPRCMem:$rs1, uimm2_lsb0:$imm)>;
+def : CompressPat<(LH GPRC:$rd, GPRCMem:$rs1, uimm2_lsb0:$imm),
+ (C_LH GPRC:$rd, GPRCMem:$rs1, uimm2_lsb0:$imm)>;
+def : CompressPat<(SB GPRC:$rs2, GPRCMem:$rs1, uimm2:$imm),
+ (C_SB GPRC:$rs2, GPRCMem:$rs1, uimm2:$imm)>;
+def : CompressPat<(SH GPRC:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm),
+ (C_SH GPRC:$rs2, GPRCMem:$rs1, uimm2_lsb0:$imm)>;
+}// Predicates = [HasStdExtZcb]
+
+
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZcb] in {
+def : InstAlias<"c.lbu $rd, (${rs1})",(C_LBU GPRC:$rd, GPRC:$rs1, 0)>;
+def : InstAlias<"c.lhu $rd, (${rs1})",(C_LHU GPRC:$rd, GPRC:$rs1, 0)>;
+def : InstAlias<"c.lh $rd, (${rs1})", (C_LH GPRC:$rd, GPRC:$rs1, 0)>;
+def : InstAlias<"c.sb $rd, (${rs1})", (C_SB GPRC:$rd, GPRC:$rs1, 0)>;
+def : InstAlias<"c.sh $rd, (${rs1})", (C_SH GPRC:$rd, GPRC:$rs1, 0)>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
new file mode 100644
index 000000000000..f36882f9a968
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td
@@ -0,0 +1,270 @@
+//===-- RISCVInstrInfoZfa.td - RISC-V 'Zfa' instructions ---*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions from the standard 'Zfa'
+// additional floating-point extension, version 0.1.
+// This version is still experimental as the 'Zfa' extension hasn't been
+// ratified yet.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+// 5-bit floating-point immediate encodings.
+def LoadFPImmOperand : AsmOperandClass {
+ let Name = "LoadFPImm";
+ let ParserMethod = "parseFPImm";
+ let RenderMethod = "addFPImmOperands";
+ let DiagnosticType = "InvalidLoadFPImm";
+}
+
+def loadfpimm : Operand<XLenVT> {
+ let ParserMatchClass = LoadFPImmOperand;
+ let PrintMethod = "printFPImmOperand";
+}
+
+def RTZArg : AsmOperandClass {
+ let Name = "RTZArg";
+ let RenderMethod = "addFRMArgOperands";
+ let DiagnosticType = "InvalidRTZArg";
+ let ParserMethod = "parseFRMArg";
+}
+
+def rtzarg : Operand<XLenVT> {
+ let ParserMatchClass = RTZArg;
+ let PrintMethod = "printFRMArg";
+ let DecoderMethod = "decodeFRMArg";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction class templates
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in
+class FPBinaryOp_rr<bits<7> funct7, bits<3> funct3, DAGOperand rdty,
+ DAGOperand rsty, string opcodestr>
+ : RVInstR<funct7, funct3, OPC_OP_FP, (outs rdty:$rd),
+ (ins rsty:$rs1, rsty:$rs2), opcodestr, "$rd, $rs1, $rs2">;
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class FPUnaryOp_imm<bits<7> funct7, bits<5> rs2val, bits<3> funct3,
+ dag outs, dag ins, string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatI> {
+ bits<5> imm;
+ bits<5> rd;
+
+ let Inst{31-25} = funct7;
+ let Inst{24-20} = rs2val;
+ let Inst{19-15} = imm;
+ let Inst{14-12} = funct3;
+ let Inst{11-7} = rd;
+ let Inst{6-0} = OPC_OP_FP.Value;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1,
+ UseNamedOperandTable = 1, hasPostISelHook = 1 in
+class FPUnaryOp_r_rtz<bits<7> funct7, bits<5> rs2val, DAGOperand rdty,
+ DAGOperand rs1ty, string opcodestr>
+ : RVInstRFrm<funct7, OPC_OP_FP, (outs rdty:$rd),
+ (ins rs1ty:$rs1, rtzarg:$frm), opcodestr,
+ "$rd, $rs1$frm"> {
+ let rs2 = rs2val;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZfa] in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FLI_S : FPUnaryOp_imm<0b1111000, 0b00001, 0b000, (outs FPR32:$rd),
+ (ins loadfpimm:$imm), "fli.s", "$rd, $imm">,
+ Sched<[WriteFLI32]>;
+
+let SchedRW = [WriteFMinMax32, ReadFMinMax32, ReadFMinMax32] in {
+def FMINM_S: FPALU_rr<0b0010100, 0b010, "fminm.s", FPR32, Commutable=1>;
+def FMAXM_S: FPALU_rr<0b0010100, 0b011, "fmaxm.s", FPR32, Commutable=1>;
+}
+
+def FROUND_S : FPUnaryOp_r_frm<0b0100000, 0b00100, FPR32, FPR32, "fround.s">,
+ Sched<[WriteFRoundF32, ReadFRoundF32]>;
+def FROUNDNX_S : FPUnaryOp_r_frm<0b0100000, 0b00101, FPR32, FPR32, "froundnx.s">,
+ Sched<[WriteFRoundF32, ReadFRoundF32]>;
+
+let SchedRW = [WriteFCmp32, ReadFCmp32, ReadFCmp32] in {
+def FLTQ_S : FPCmp_rr<0b1010000, 0b101, "fltq.s", FPR32>;
+def FLEQ_S : FPCmp_rr<0b1010000, 0b100, "fleq.s", FPR32>;
+}
+} // Predicates = [HasStdExtZfa]
+
+let Predicates = [HasStdExtZfa, HasStdExtD] in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FLI_D : FPUnaryOp_imm<0b1111001, 0b00001, 0b000, (outs FPR64:$rd),
+ (ins loadfpimm:$imm), "fli.d", "$rd, $imm">,
+ Sched<[WriteFLI64]>;
+
+let SchedRW = [WriteFMinMax64, ReadFMinMax64, ReadFMinMax64] in {
+def FMINM_D: FPALU_rr<0b0010101, 0b010, "fminm.d", FPR64, Commutable=1>;
+def FMAXM_D: FPALU_rr<0b0010101, 0b011, "fmaxm.d", FPR64, Commutable=1>;
+}
+
+def FROUND_D : FPUnaryOp_r_frm<0b0100001, 0b00100, FPR64, FPR64, "fround.d">,
+ Sched<[WriteFRoundF64, ReadFRoundF64]>;
+def FROUNDNX_D : FPUnaryOp_r_frm<0b0100001, 0b00101, FPR64, FPR64, "froundnx.d">,
+ Sched<[WriteFRoundF64, ReadFRoundF64]>;
+
+def FCVTMOD_W_D
+ : FPUnaryOp_r_rtz<0b1100001, 0b01000, GPR, FPR64, "fcvtmod.w.d">,
+ Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>;
+
+let SchedRW = [WriteFCmp64, ReadFCmp64, ReadFCmp64] in {
+def FLTQ_D : FPCmp_rr<0b1010001, 0b101, "fltq.d", FPR64>;
+def FLEQ_D : FPCmp_rr<0b1010001, 0b100, "fleq.d", FPR64>;
+}
+} // Predicates = [HasStdExtZfa, HasStdExtD]
+
+let Predicates = [HasStdExtZfa, HasStdExtD, IsRV32] in {
+let mayRaiseFPException = 0 in {
+def FMVH_X_D : FPUnaryOp_r<0b1110001, 0b00001, 0b000, GPR, FPR64, "fmvh.x.d">,
+ Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]>;
+def FMVP_D_X : FPBinaryOp_rr<0b1011001, 0b000, FPR64, GPR, "fmvp.d.x">,
+ Sched<[WriteFMovI64ToF64, ReadFMovI64ToF64]>;
+}
+
+let isCodeGenOnly = 1, mayRaiseFPException = 0 in
+def FMV_X_W_FPR64 : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR64,
+ "fmv.x.w">,
+ Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]>;
+} // Predicates = [HasStdExtZfa, HasStdExtD, IsRV32]
+
+let Predicates = [HasStdExtZfa, HasStdExtZfhOrZvfh] in
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def FLI_H : FPUnaryOp_imm<0b1111010, 0b00001, 0b000, (outs FPR16:$rd),
+ (ins loadfpimm:$imm), "fli.h", "$rd, $imm">,
+ Sched<[WriteFLI16]>;
+
+let Predicates = [HasStdExtZfa, HasStdExtZfh] in {
+let SchedRW = [WriteFMinMax16, ReadFMinMax16, ReadFMinMax16] in {
+def FMINM_H: FPALU_rr<0b0010110, 0b010, "fminm.h", FPR16, Commutable=1>;
+def FMAXM_H: FPALU_rr<0b0010110, 0b011, "fmaxm.h", FPR16, Commutable=1>;
+}
+
+def FROUND_H : FPUnaryOp_r_frm<0b0100010, 0b00100, FPR16, FPR16, "fround.h">,
+ Sched<[WriteFRoundF16, ReadFRoundF16]>;
+def FROUNDNX_H : FPUnaryOp_r_frm<0b0100010, 0b00101, FPR16, FPR16, "froundnx.h">,
+ Sched<[WriteFRoundF16, ReadFRoundF16]>;
+
+let SchedRW = [WriteFCmp16, ReadFCmp16, ReadFCmp16] in {
+def FLTQ_H : FPCmp_rr<0b1010010, 0b101, "fltq.h", FPR16>;
+def FLEQ_H : FPCmp_rr<0b1010010, 0b100, "fleq.h", FPR16>;
+}
+} // Predicates = [HasStdExtZfa, HasStdExtZfh]
+
+//===----------------------------------------------------------------------===//
+// Pseudo-instructions and codegen patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZfa] in {
+def : InstAlias<"fgtq.s $rd, $rs, $rt",
+ (FLTQ_S GPR:$rd, FPR32:$rt, FPR32:$rs), 0>;
+def : InstAlias<"fgeq.s $rd, $rs, $rt",
+ (FLEQ_S GPR:$rd, FPR32:$rt, FPR32:$rs), 0>;
+}
+
+let Predicates = [HasStdExtZfa, HasStdExtD] in {
+def : InstAlias<"fgtq.d $rd, $rs, $rt",
+ (FLTQ_D GPR:$rd, FPR64:$rt, FPR64:$rs), 0>;
+def : InstAlias<"fgeq.d $rd, $rs, $rt",
+ (FLEQ_D GPR:$rd, FPR64:$rt, FPR64:$rs), 0>;
+}
+
+let Predicates = [HasStdExtZfa, HasStdExtZfh] in {
+def : InstAlias<"fgtq.h $rd, $rs, $rt",
+ (FLTQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
+def : InstAlias<"fgeq.h $rd, $rs, $rt",
+ (FLEQ_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
+}
+
+//===----------------------------------------------------------------------===//
+// Codegen patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZfa] in {
+def: PatFprFpr<fminimum, FMINM_S, FPR32, f32>;
+def: PatFprFpr<fmaximum, FMAXM_S, FPR32, f32>;
+
+// frint rounds according to the current rounding mode and detects
+// inexact conditions.
+def: Pat<(any_frint FPR32:$rs1), (FROUNDNX_S FPR32:$rs1, FRM_DYN)>;
+
+// fnearbyint is like frint but does not detect inexact conditions.
+def: Pat<(any_fnearbyint FPR32:$rs1), (FROUND_S FPR32:$rs1, FRM_DYN)>;
+
+def: Pat<(any_fround FPR32:$rs1), (FROUND_S FPR32:$rs1, FRM_RMM)>;
+def: Pat<(any_ffloor FPR32:$rs1), (FROUND_S FPR32:$rs1, FRM_RDN)>;
+def: Pat<(any_fceil FPR32:$rs1), (FROUND_S FPR32:$rs1, FRM_RUP)>;
+def: Pat<(any_ftrunc FPR32:$rs1), (FROUND_S FPR32:$rs1, FRM_RTZ)>;
+
+def: PatSetCC<FPR32, strict_fsetcc, SETLT, FLTQ_S, f32>;
+def: PatSetCC<FPR32, strict_fsetcc, SETOLT, FLTQ_S, f32>;
+def: PatSetCC<FPR32, strict_fsetcc, SETLE, FLEQ_S, f32>;
+def: PatSetCC<FPR32, strict_fsetcc, SETOLE, FLEQ_S, f32>;
+} // Predicates = [HasStdExtZfa]
+
+let Predicates = [HasStdExtZfa, HasStdExtD] in {
+def: PatFprFpr<fminimum, FMINM_D, FPR64, f64>;
+def: PatFprFpr<fmaximum, FMAXM_D, FPR64, f64>;
+
+// frint rounds according to the current rounding mode and detects
+// inexact conditions.
+def: Pat<(any_frint FPR64:$rs1), (FROUNDNX_D FPR64:$rs1, FRM_DYN)>;
+
+// fnearbyint is like frint but does not detect inexact conditions.
+def: Pat<(any_fnearbyint FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_DYN)>;
+
+def: Pat<(any_fround FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_RMM)>;
+def: Pat<(any_froundeven FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_RNE)>;
+def: Pat<(any_ffloor FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_RDN)>;
+def: Pat<(any_fceil FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_RUP)>;
+def: Pat<(any_ftrunc FPR64:$rs1), (FROUND_D FPR64:$rs1, FRM_RTZ)>;
+
+def: PatSetCC<FPR64, strict_fsetcc, SETLT, FLTQ_D, f64>;
+def: PatSetCC<FPR64, strict_fsetcc, SETOLT, FLTQ_D, f64>;
+def: PatSetCC<FPR64, strict_fsetcc, SETLE, FLEQ_D, f64>;
+def: PatSetCC<FPR64, strict_fsetcc, SETOLE, FLEQ_D, f64>;
+} // Predicates = [HasStdExtZfa, HasStdExtD]
+
+let Predicates = [HasStdExtZfa, HasStdExtD, IsRV32] in {
+def : Pat<(RISCVBuildPairF64 GPR:$rs1, GPR:$rs2),
+ (FMVP_D_X GPR:$rs1, GPR:$rs2)>;
+}
+
+let Predicates = [HasStdExtZfa, HasStdExtZfh] in {
+def: PatFprFpr<fminimum, FMINM_H, FPR16, f16>;
+def: PatFprFpr<fmaximum, FMAXM_H, FPR16, f16>;
+
+// frint rounds according to the current rounding mode and detects
+// inexact conditions.
+def: Pat<(f16 (any_frint FPR16:$rs1)), (FROUNDNX_H FPR16:$rs1, FRM_DYN)>;
+
+// fnearbyint is like frint but does not detect inexact conditions.
+def: Pat<(f16 (any_fnearbyint FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_DYN)>;
+
+def: Pat<(f16 (any_fround FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_RMM)>;
+def: Pat<(f16 (any_froundeven FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_RNE)>;
+def: Pat<(f16 (any_ffloor FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_RDN)>;
+def: Pat<(f16 (any_fceil FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_RUP)>;
+def: Pat<(f16 (any_ftrunc FPR16:$rs1)), (FROUND_H FPR16:$rs1, FRM_RTZ)>;
+
+def: PatSetCC<FPR16, strict_fsetcc, SETLT, FLTQ_H, f16>;
+def: PatSetCC<FPR16, strict_fsetcc, SETOLT, FLTQ_H, f16>;
+def: PatSetCC<FPR16, strict_fsetcc, SETLE, FLEQ_H, f16>;
+def: PatSetCC<FPR16, strict_fsetcc, SETOLE, FLEQ_H, f16>;
+} // Predicates = [HasStdExtZfa, HasStdExtZfh]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
new file mode 100644
index 000000000000..35f9f03f61a1
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfbfmin.td
@@ -0,0 +1,63 @@
+//===-- RISCVInstrInfoZfbfmin.td - 'Zfbfmin' instructions --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions from the standard 'Zfbfmin'
+// extension, providing scalar conversion instructions for BFloat16.
+// This version is still experimental as the 'Zfbfmin' extension hasn't been
+// ratified yet.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// RISC-V specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_RISCVFP_ROUND_BF16
+ : SDTypeProfile<1, 1, [SDTCisVT<0, bf16>, SDTCisVT<1, f32>]>;
+def SDT_RISCVFP_EXTEND_BF16
+ : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, bf16>]>;
+
+def riscv_fpround_bf16
+ : SDNode<"RISCVISD::FP_ROUND_BF16", SDT_RISCVFP_ROUND_BF16>;
+def riscv_fpextend_bf16
+ : SDNode<"RISCVISD::FP_EXTEND_BF16", SDT_RISCVFP_EXTEND_BF16>;
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZfbfmin] in {
+def FCVT_BF16_S : FPUnaryOp_r_frm<0b0100010, 0b01000, FPR16, FPR32, "fcvt.bf16.s">,
+ Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>;
+def FCVT_S_BF16 : FPUnaryOp_r_frm<0b0100000, 0b00110, FPR32, FPR16, "fcvt.s.bf16">,
+ Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>;
+} // Predicates = [HasStdExtZfbfmin]
+
+//===----------------------------------------------------------------------===//
+// Pseudo-instructions and codegen patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZfbfmin] in {
+/// Loads
+def : LdPat<load, FLH, bf16>;
+
+/// Stores
+def : StPat<store, FSH, FPR16, bf16>;
+
+/// Float conversion operations
+// f32 -> bf16, bf16 -> f32
+def : Pat<(bf16 (riscv_fpround_bf16 FPR32:$rs1)),
+ (FCVT_BF16_S FPR32:$rs1, FRM_DYN)>;
+def : Pat<(riscv_fpextend_bf16 (bf16 FPR16:$rs1)),
+ (FCVT_S_BF16 FPR16:$rs1, FRM_DYN)>;
+
+// Moves (no conversion)
+def : Pat<(bf16 (riscv_fmv_h_x GPR:$src)), (FMV_H_X GPR:$src)>;
+def : Pat<(riscv_fmv_x_anyexth (bf16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
+def : Pat<(riscv_fmv_x_signexth (bf16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
+} // Predicates = [HasStdExtZfbfmin]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
index 914d39c583d7..810775a78241 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td
@@ -16,9 +16,9 @@
//===----------------------------------------------------------------------===//
def SDT_RISCVFMV_H_X
- : SDTypeProfile<1, 1, [SDTCisVT<0, f16>, SDTCisVT<1, XLenVT>]>;
+ : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, XLenVT>]>;
def SDT_RISCVFMV_X_EXTH
- : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisVT<1, f16>]>;
+ : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>;
def riscv_fmv_h_x
: SDNode<"RISCVISD::FMV_H_X", SDT_RISCVFMV_H_X>;
@@ -38,133 +38,116 @@ def FPR16INX : RegisterOperand<GPRF16> {
let DecoderMethod = "DecodeGPRRegisterClass";
}
-def ZfhExt : ExtInfo<0, [HasStdExtZfh]>;
-def Zfh64Ext : ExtInfo<0, [HasStdExtZfh, IsRV64]>;
-def ZfhminExt : ExtInfo<0, [HasStdExtZfhOrZfhmin]>;
-def ZhinxExt : ExtInfo<1, [HasStdExtZhinx]>;
-def ZhinxminExt : ExtInfo<1, [HasStdExtZhinxOrZhinxmin]>;
-def Zhinx64Ext : ExtInfo<1, [HasStdExtZhinx, IsRV64]>;
-
-def ZfhminDExt : ExtInfo<0, [HasStdExtZfhOrZfhmin, HasStdExtD]>;
-def ZhinxminZdinxExt : ExtInfo<1, [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx]>;
-
-def H : ExtInfo_r<ZfhExt, FPR16>;
-def H_INX : ExtInfo_r<ZhinxExt, FPR16INX>;
-
-def HH : ExtInfo_rr<ZfhExt, FPR16, FPR16>;
-def HH_INX : ExtInfo_rr<ZhinxExt, FPR16INX, FPR16INX>;
-def XH : ExtInfo_rr<ZfhExt, GPR, FPR16>;
-def XH_INX : ExtInfo_rr<ZhinxExt, GPR, FPR16INX>;
-def HX : ExtInfo_rr<ZfhExt, FPR16, GPR>;
-def HX_INX : ExtInfo_rr<ZhinxExt, FPR16INX, GPR>;
-def XH_64 : ExtInfo_rr<Zfh64Ext, GPR, FPR16>;
-def HX_64 : ExtInfo_rr<Zfh64Ext, FPR16, GPR>;
-def XH_INX_64 : ExtInfo_rr<Zhinx64Ext, GPR, FPR16INX>;
-def HX_INX_64 : ExtInfo_rr<Zhinx64Ext, FPR16INX, GPR>;
-def HFmin : ExtInfo_rr<ZfhminExt, FPR16, FPR32>;
-def HF_INXmin : ExtInfo_rr<ZhinxminExt, FPR16INX, FPR32INX>;
-def HF_INX : ExtInfo_rr<ZhinxExt, FPR16INX, FPR32INX>;
-def FHmin : ExtInfo_rr<ZfhminExt, FPR32, FPR16>;
-def FH_INXmin : ExtInfo_rr<ZhinxminExt, FPR32INX, FPR16INX>;
-def FH_INX : ExtInfo_rr<ZhinxExt, FPR32INX, FPR16INX>;
-def DHmin : ExtInfo_rr<ZfhminDExt, FPR64, FPR16>;
-def DH_INXmin : ExtInfo_rr<ZhinxminZdinxExt, FPR64INX, FPR16INX>;
-def HDmin : ExtInfo_rr<ZfhminDExt, FPR16, FPR64>;
-def HD_INXmin : ExtInfo_rr<ZhinxminZdinxExt, FPR16INX, FPR64INX>;
-
-defvar HINX = [H, H_INX];
-defvar HHINX = [HH, HH_INX];
-defvar XHINX = [XH, XH_INX];
-defvar HXINX = [HX, HX_INX];
-defvar XHIN64X = [XH_64, XH_INX_64];
-defvar HXIN64X = [HX_64, HX_INX_64];
-defvar HFINXmin = [HFmin, HF_INXmin];
-defvar FHINXmin = [FHmin, FH_INXmin];
-defvar DHINXmin = [DHmin, DH_INXmin];
-defvar HDINXmin = [HDmin, HD_INXmin];
+def ZfhExt : ExtInfo<"", "", [HasStdExtZfh],
+ f16, FPR16, FPR32, ?, FPR16>;
+def ZfhminExt : ExtInfo<"", "", [HasStdExtZfhOrZfhmin],
+ f16, FPR16, FPR32, ?, FPR16>;
+def ZfhDExt : ExtInfo<"", "", [HasStdExtZfh, HasStdExtD],
+ ?, ?, FPR32, FPR64, FPR16>;
+def ZfhminDExt : ExtInfo<"", "", [HasStdExtZfhOrZfhmin, HasStdExtD],
+ ?, ?, FPR32, FPR64, FPR16>;
+
+def ZhinxExt : ExtInfo<"_INX", "RVZfinx",
+ [HasStdExtZhinx],
+ f16, FPR16INX, FPR32INX, ?, FPR16INX>;
+def ZhinxminExt : ExtInfo<"_INX", "RVZfinx",
+ [HasStdExtZhinxOrZhinxmin],
+ f16, FPR16INX, FPR32INX, ?, FPR16INX>;
+def ZhinxZdinxExt : ExtInfo<"_INX", "RVZfinx",
+ [HasStdExtZhinx, HasStdExtZdinx, IsRV64],
+ ?, ?, FPR32INX, FPR64INX, FPR16INX>;
+def ZhinxminZdinxExt : ExtInfo<"_INX", "RVZfinx",
+ [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV64],
+ ?, ?, FPR32INX, FPR64INX, FPR16INX>;
+def ZhinxZdinx32Ext : ExtInfo<"_IN32X", "RV32Zdinx",
+ [HasStdExtZhinx, HasStdExtZdinx, IsRV32],
+ ?, ?, FPR32INX, FPR64IN32X, FPR16INX >;
+def ZhinxminZdinx32Ext : ExtInfo<"_IN32X", "RV32Zdinx",
+ [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV32],
+ ?, ?, FPR32INX, FPR64IN32X, FPR16INX>;
+
+defvar ZfhExts = [ZfhExt, ZhinxExt];
+defvar ZfhminExts = [ZfhminExt, ZhinxminExt];
+defvar ZfhDExts = [ZfhDExt, ZhinxZdinxExt, ZhinxZdinx32Ext];
+defvar ZfhminDExts = [ZfhminDExt, ZhinxminZdinxExt, ZhinxminZdinx32Ext];
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
-let Predicates = [HasStdExtZfhOrZfhmin] in {
+let Predicates = [HasHalfFPLoadStoreMove] in {
def FLH : FPLoad_r<0b001, "flh", FPR16, WriteFLD16>;
// Operands for stores are in the order srcreg, base, offset rather than
// reflecting the order these fields are specified in the instruction
// encoding.
def FSH : FPStore_r<0b001, "fsh", FPR16, WriteFST16>;
-} // Predicates = [HasStdExtZfhOrZfhmin]
-
-let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in {
-defm FMADD_H : FPFMA_rrr_frm_m<OPC_MADD, 0b10, "fmadd.h", HINX>;
-defm FMSUB_H : FPFMA_rrr_frm_m<OPC_MSUB, 0b10, "fmsub.h", HINX>;
-defm FNMSUB_H : FPFMA_rrr_frm_m<OPC_NMSUB, 0b10, "fnmsub.h", HINX>;
-defm FNMADD_H : FPFMA_rrr_frm_m<OPC_NMADD, 0b10, "fnmadd.h", HINX>;
-}
-
-defm : FPFMADynFrmAlias_m<FMADD_H, "fmadd.h", HINX>;
-defm : FPFMADynFrmAlias_m<FMSUB_H, "fmsub.h", HINX>;
-defm : FPFMADynFrmAlias_m<FNMSUB_H, "fnmsub.h", HINX>;
-defm : FPFMADynFrmAlias_m<FNMADD_H, "fnmadd.h", HINX>;
-
-let SchedRW = [WriteFAdd16, ReadFAdd16, ReadFAdd16] in {
-defm FADD_H : FPALU_rr_frm_m<0b0000010, "fadd.h", HINX, /*Commutable*/1>;
-defm FSUB_H : FPALU_rr_frm_m<0b0000110, "fsub.h", HINX>;
-}
-let SchedRW = [WriteFMul16, ReadFMul16, ReadFMul16] in
-defm FMUL_H : FPALU_rr_frm_m<0b0001010, "fmul.h", HINX, /*Commutable*/1>;
-
-let SchedRW = [WriteFDiv16, ReadFDiv16, ReadFDiv16] in
-defm FDIV_H : FPALU_rr_frm_m<0b0001110, "fdiv.h", HINX>;
-
-defm : FPALUDynFrmAlias_m<FADD_H, "fadd.h", HINX>;
-defm : FPALUDynFrmAlias_m<FSUB_H, "fsub.h", HINX>;
-defm : FPALUDynFrmAlias_m<FMUL_H, "fmul.h", HINX>;
-defm : FPALUDynFrmAlias_m<FDIV_H, "fdiv.h", HINX>;
-
-defm FSQRT_H : FPUnaryOp_r_frm_m<0b0101110, 0b00000, HHINX, "fsqrt.h">,
- Sched<[WriteFSqrt16, ReadFSqrt16]>;
-defm : FPUnaryOpDynFrmAlias_m<FSQRT_H, "fsqrt.h", HHINX>;
-
-let SchedRW = [WriteFSGNJ16, ReadFSGNJ16, ReadFSGNJ16],
- mayRaiseFPException = 0 in {
-defm FSGNJ_H : FPALU_rr_m<0b0010010, 0b000, "fsgnj.h", HINX>;
-defm FSGNJN_H : FPALU_rr_m<0b0010010, 0b001, "fsgnjn.h", HINX>;
-defm FSGNJX_H : FPALU_rr_m<0b0010010, 0b010, "fsgnjx.h", HINX>;
-}
-
-let SchedRW = [WriteFMinMax16, ReadFMinMax16, ReadFMinMax16] in {
-defm FMIN_H : FPALU_rr_m<0b0010110, 0b000, "fmin.h", HINX, /*Commutable*/1>;
-defm FMAX_H : FPALU_rr_m<0b0010110, 0b001, "fmax.h", HINX, /*Commutable*/1>;
-}
-
-let IsSignExtendingOpW = 1 in
-defm FCVT_W_H : FPUnaryOp_r_frm_m<0b1100010, 0b00000, XHINX, "fcvt.w.h">,
- Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_W_H, "fcvt.w.h", XHINX>;
-
-let IsSignExtendingOpW = 1 in
-defm FCVT_WU_H : FPUnaryOp_r_frm_m<0b1100010, 0b00001, XHINX, "fcvt.wu.h">,
- Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_WU_H, "fcvt.wu.h", XHINX>;
-
-defm FCVT_H_W : FPUnaryOp_r_frm_m<0b1101010, 0b00000, HXINX, "fcvt.h.w">,
- Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_H_W, "fcvt.h.w", HXINX>;
-
-defm FCVT_H_WU : FPUnaryOp_r_frm_m<0b1101010, 0b00001, HXINX, "fcvt.h.wu">,
- Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_H_WU, "fcvt.h.wu", HXINX>;
-
-defm FCVT_H_S : FPUnaryOp_r_frm_m<0b0100010, 0b00000, HFINXmin, "fcvt.h.s">,
- Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_H_S, "fcvt.h.s", HFINXmin>;
-
-defm FCVT_S_H : FPUnaryOp_r_m<0b0100000, 0b00010, 0b000, FHINXmin, "fcvt.s.h">,
- Sched<[WriteFCvtF16ToF32, ReadFCvtF16ToF32]>;
-
-let Predicates = [HasStdExtZfhOrZfhmin] in {
+} // Predicates = [HasHalfFPLoadStoreMove]
+
+foreach Ext = ZfhExts in {
+ let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in {
+ defm FMADD_H : FPFMA_rrr_frm_m<OPC_MADD, 0b10, "fmadd.h", Ext>;
+ defm FMSUB_H : FPFMA_rrr_frm_m<OPC_MSUB, 0b10, "fmsub.h", Ext>;
+ defm FNMSUB_H : FPFMA_rrr_frm_m<OPC_NMSUB, 0b10, "fnmsub.h", Ext>;
+ defm FNMADD_H : FPFMA_rrr_frm_m<OPC_NMADD, 0b10, "fnmadd.h", Ext>;
+ }
+
+ let SchedRW = [WriteFAdd16, ReadFAdd16, ReadFAdd16] in {
+ defm FADD_H : FPALU_rr_frm_m<0b0000010, "fadd.h", Ext, Commutable=1>;
+ defm FSUB_H : FPALU_rr_frm_m<0b0000110, "fsub.h", Ext>;
+ }
+ let SchedRW = [WriteFMul16, ReadFMul16, ReadFMul16] in
+ defm FMUL_H : FPALU_rr_frm_m<0b0001010, "fmul.h", Ext, Commutable=1>;
+
+ let SchedRW = [WriteFDiv16, ReadFDiv16, ReadFDiv16] in
+ defm FDIV_H : FPALU_rr_frm_m<0b0001110, "fdiv.h", Ext>;
+
+ defm FSQRT_H : FPUnaryOp_r_frm_m<0b0101110, 0b00000, Ext, Ext.PrimaryTy,
+ Ext.PrimaryTy, "fsqrt.h">,
+ Sched<[WriteFSqrt16, ReadFSqrt16]>;
+
+ let SchedRW = [WriteFSGNJ16, ReadFSGNJ16, ReadFSGNJ16],
+ mayRaiseFPException = 0 in {
+ defm FSGNJ_H : FPALU_rr_m<0b0010010, 0b000, "fsgnj.h", Ext>;
+ defm FSGNJN_H : FPALU_rr_m<0b0010010, 0b001, "fsgnjn.h", Ext>;
+ defm FSGNJX_H : FPALU_rr_m<0b0010010, 0b010, "fsgnjx.h", Ext>;
+ }
+
+ let SchedRW = [WriteFMinMax16, ReadFMinMax16, ReadFMinMax16] in {
+ defm FMIN_H : FPALU_rr_m<0b0010110, 0b000, "fmin.h", Ext, Commutable=1>;
+ defm FMAX_H : FPALU_rr_m<0b0010110, 0b001, "fmax.h", Ext, Commutable=1>;
+ }
+
+ let IsSignExtendingOpW = 1 in
+ defm FCVT_W_H : FPUnaryOp_r_frm_m<0b1100010, 0b00000, Ext, GPR, Ext.PrimaryTy,
+ "fcvt.w.h">,
+ Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>;
+
+ let IsSignExtendingOpW = 1 in
+ defm FCVT_WU_H : FPUnaryOp_r_frm_m<0b1100010, 0b00001, Ext, GPR, Ext.PrimaryTy,
+ "fcvt.wu.h">,
+ Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>;
+
+ defm FCVT_H_W : FPUnaryOp_r_frm_m<0b1101010, 0b00000, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.h.w">,
+ Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>;
+
+ defm FCVT_H_WU : FPUnaryOp_r_frm_m<0b1101010, 0b00001, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.h.wu">,
+ Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>;
+} // foreach Ext = ZfhExts
+
+foreach Ext = ZfhminExts in {
+ defm FCVT_H_S : FPUnaryOp_r_frm_m<0b0100010, 0b00000, Ext, Ext.PrimaryTy,
+ Ext.F32Ty, "fcvt.h.s">,
+ Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>;
+
+ defm FCVT_S_H : FPUnaryOp_r_m<0b0100000, 0b00010, 0b000, Ext, Ext.F32Ty,
+ Ext.PrimaryTy, "fcvt.s.h">,
+ Sched<[WriteFCvtF16ToF32, ReadFCvtF16ToF32]>;
+} // foreach Ext = ZfhminExts
+
+let Predicates = [HasHalfFPLoadStoreMove] in {
let mayRaiseFPException = 0, IsSignExtendingOpW = 1 in
def FMV_X_H : FPUnaryOp_r<0b1110010, 0b00000, 0b000, GPR, FPR16, "fmv.x.h">,
Sched<[WriteFMovF16ToI16, ReadFMovF16ToI16]>;
@@ -172,40 +155,46 @@ def FMV_X_H : FPUnaryOp_r<0b1110010, 0b00000, 0b000, GPR, FPR16, "fmv.x.h">,
let mayRaiseFPException = 0 in
def FMV_H_X : FPUnaryOp_r<0b1111010, 0b00000, 0b000, FPR16, GPR, "fmv.h.x">,
Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]>;
-} // Predicates = [HasStdExtZfhOrZfhmin]
-
-let SchedRW = [WriteFCmp16, ReadFCmp16, ReadFCmp16] in {
-defm FEQ_H : FPCmp_rr_m<0b1010010, 0b010, "feq.h", HINX, /*Commutable*/1>;
-defm FLT_H : FPCmp_rr_m<0b1010010, 0b001, "flt.h", HINX>;
-defm FLE_H : FPCmp_rr_m<0b1010010, 0b000, "fle.h", HINX>;
-}
-
-let mayRaiseFPException = 0 in
-defm FCLASS_H : FPUnaryOp_r_m<0b1110010, 0b00000, 0b001, XHINX, "fclass.h">,
- Sched<[WriteFClass16, ReadFClass16]>;
-
-defm FCVT_L_H : FPUnaryOp_r_frm_m<0b1100010, 0b00010, XHIN64X, "fcvt.l.h">,
- Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_L_H, "fcvt.l.h", XHIN64X>;
-
-defm FCVT_LU_H : FPUnaryOp_r_frm_m<0b1100010, 0b00011, XHIN64X, "fcvt.lu.h">,
- Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_LU_H, "fcvt.lu.h", XHIN64X>;
-
-defm FCVT_H_L : FPUnaryOp_r_frm_m<0b1101010, 0b00010, HXIN64X, "fcvt.h.l">,
- Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_H_L, "fcvt.h.l", HXIN64X>;
-
-defm FCVT_H_LU : FPUnaryOp_r_frm_m<0b1101010, 0b00011, HXIN64X, "fcvt.h.lu">,
- Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_H_LU, "fcvt.h.lu", HXIN64X>;
-
-defm FCVT_H_D : FPUnaryOp_r_frm_m<0b0100010, 0b00001, HDINXmin, "fcvt.h.d">,
- Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>;
-defm : FPUnaryOpDynFrmAlias_m<FCVT_H_D, "fcvt.h.d", HDINXmin>;
-
-defm FCVT_D_H : FPUnaryOp_r_m<0b0100001, 0b00010, 0b000, DHINXmin, "fcvt.d.h">,
- Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>;
+} // Predicates = [HasHalfFPLoadStoreMove]
+
+foreach Ext = ZfhExts in {
+ let SchedRW = [WriteFCmp16, ReadFCmp16, ReadFCmp16] in {
+ defm FEQ_H : FPCmp_rr_m<0b1010010, 0b010, "feq.h", Ext, Commutable=1>;
+ defm FLT_H : FPCmp_rr_m<0b1010010, 0b001, "flt.h", Ext>;
+ defm FLE_H : FPCmp_rr_m<0b1010010, 0b000, "fle.h", Ext>;
+ }
+
+ let mayRaiseFPException = 0 in
+ defm FCLASS_H : FPUnaryOp_r_m<0b1110010, 0b00000, 0b001, Ext, GPR, Ext.PrimaryTy,
+ "fclass.h">,
+ Sched<[WriteFClass16, ReadFClass16]>;
+
+ defm FCVT_L_H : FPUnaryOp_r_frm_m<0b1100010, 0b00010, Ext, GPR, Ext.PrimaryTy,
+ "fcvt.l.h", [IsRV64]>,
+ Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>;
+
+ defm FCVT_LU_H : FPUnaryOp_r_frm_m<0b1100010, 0b00011, Ext, GPR, Ext.PrimaryTy,
+ "fcvt.lu.h", [IsRV64]>,
+ Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>;
+
+ defm FCVT_H_L : FPUnaryOp_r_frm_m<0b1101010, 0b00010, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.h.l", [IsRV64]>,
+ Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>;
+
+ defm FCVT_H_LU : FPUnaryOp_r_frm_m<0b1101010, 0b00011, Ext, Ext.PrimaryTy, GPR,
+ "fcvt.h.lu", [IsRV64]>,
+ Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>;
+} // foreach Ext = ZfhExts
+
+foreach Ext = ZfhminDExts in {
+ defm FCVT_H_D : FPUnaryOp_r_frm_m<0b0100010, 0b00001, Ext, Ext.F16Ty,
+ Ext.F64Ty, "fcvt.h.d">,
+ Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>;
+
+ defm FCVT_D_H : FPUnaryOp_r_m<0b0100001, 0b00010, 0b000, Ext, Ext.F64Ty,
+ Ext.F16Ty, "fcvt.d.h">,
+ Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>;
+} // foreach Ext = ZfhminDExts
//===----------------------------------------------------------------------===//
// Assembler Pseudo Instructions (User-Level ISA, Version 2.2, Chapter 20)
@@ -227,15 +216,16 @@ def : InstAlias<"fgt.h $rd, $rs, $rt",
(FLT_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
def : InstAlias<"fge.h $rd, $rs, $rt",
(FLE_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>;
-} // Predicates = [HasStdExtZfh]
-let Predicates = [HasStdExtZfhOrZfhmin] in {
-def PseudoFLH : PseudoFloatLoad<"flh", FPR16>;
-def PseudoFSH : PseudoStore<"fsh", FPR16>;
let usesCustomInserter = 1 in {
def PseudoQuietFLE_H : PseudoQuietFCMP<FPR16>;
def PseudoQuietFLT_H : PseudoQuietFCMP<FPR16>;
}
+} // Predicates = [HasStdExtZfh]
+
+let Predicates = [HasStdExtZfhOrZfhmin] in {
+def PseudoFLH : PseudoFloatLoad<"flh", FPR16>;
+def PseudoFSH : PseudoStore<"fsh", FPR16>;
} // Predicates = [HasStdExtZfhOrZfhmin]
let Predicates = [HasStdExtZhinx] in {
@@ -247,7 +237,12 @@ def : InstAlias<"fgt.h $rd, $rs, $rt",
(FLT_H_INX GPR:$rd, FPR16INX:$rt, FPR16INX:$rs), 0>;
def : InstAlias<"fge.h $rd, $rs, $rt",
(FLE_H_INX GPR:$rd, FPR16INX:$rt, FPR16INX:$rs), 0>;
-} // Predicates = [HasStdExtZhinx]
+
+let usesCustomInserter = 1 in {
+def PseudoQuietFLE_H_INX : PseudoQuietFCMP<FPR16INX>;
+def PseudoQuietFLT_H_INX : PseudoQuietFCMP<FPR16INX>;
+}
+} // Predicates = [HasStdExtZhinxOrZhinxmin]
//===----------------------------------------------------------------------===//
// Pseudo-instructions and codegen patterns
@@ -255,8 +250,52 @@ def : InstAlias<"fge.h $rd, $rs, $rt",
let Predicates = [HasStdExtZfh] in {
-// Floating point constant -0.0
-def : Pat<(f16 (fpimmneg0)), (FSGNJN_H (FMV_H_X X0), (FMV_H_X X0))>;
+/// Float conversion operations
+
+// [u]int32<->float conversion patterns must be gated on IsRV32 or IsRV64, so
+// are defined later.
+
+/// Float arithmetic operations
+
+def : PatFprFprDynFrm<any_fadd, FADD_H, FPR16, f16>;
+def : PatFprFprDynFrm<any_fsub, FSUB_H, FPR16, f16>;
+def : PatFprFprDynFrm<any_fmul, FMUL_H, FPR16, f16>;
+def : PatFprFprDynFrm<any_fdiv, FDIV_H, FPR16, f16>;
+
+def : Pat<(f16 (any_fsqrt FPR16:$rs1)), (FSQRT_H FPR16:$rs1, FRM_DYN)>;
+
+def : Pat<(f16 (fneg FPR16:$rs1)), (FSGNJN_H $rs1, $rs1)>;
+def : Pat<(f16 (fabs FPR16:$rs1)), (FSGNJX_H $rs1, $rs1)>;
+
+def : Pat<(riscv_fpclass (f16 FPR16:$rs1)), (FCLASS_H $rs1)>;
+
+def : PatFprFpr<fcopysign, FSGNJ_H, FPR16, f16>;
+def : Pat<(f16 (fcopysign FPR16:$rs1, (f16 (fneg FPR16:$rs2)))), (FSGNJN_H $rs1, $rs2)>;
+def : Pat<(f16 (fcopysign FPR16:$rs1, FPR32:$rs2)),
+ (FSGNJ_H $rs1, (FCVT_H_S $rs2, FRM_DYN))>;
+
+// fmadd: rs1 * rs2 + rs3
+def : Pat<(f16 (any_fma FPR16:$rs1, FPR16:$rs2, FPR16:$rs3)),
+ (FMADD_H $rs1, $rs2, $rs3, FRM_DYN)>;
+
+// fmsub: rs1 * rs2 - rs3
+def : Pat<(f16 (any_fma FPR16:$rs1, FPR16:$rs2, (fneg FPR16:$rs3))),
+ (FMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, FRM_DYN)>;
+
+// fnmsub: -rs1 * rs2 + rs3
+def : Pat<(f16 (any_fma (fneg FPR16:$rs1), FPR16:$rs2, FPR16:$rs3)),
+ (FNMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, FRM_DYN)>;
+
+// fnmadd: -rs1 * rs2 - rs3
+def : Pat<(f16 (any_fma (fneg FPR16:$rs1), FPR16:$rs2, (fneg FPR16:$rs3))),
+ (FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, FRM_DYN)>;
+
+// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA)
+def : Pat<(f16 (fneg (any_fma_nsz FPR16:$rs1, FPR16:$rs2, FPR16:$rs3))),
+ (FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, FRM_DYN)>;
+} // Predicates = [HasStdExtZfh]
+
+let Predicates = [HasStdExtZhinx] in {
/// Float conversion operations
@@ -265,205 +304,364 @@ def : Pat<(f16 (fpimmneg0)), (FSGNJN_H (FMV_H_X X0), (FMV_H_X X0))>;
/// Float arithmetic operations
-def : PatFprFprDynFrm<any_fadd, FADD_H, FPR16>;
-def : PatFprFprDynFrm<any_fsub, FSUB_H, FPR16>;
-def : PatFprFprDynFrm<any_fmul, FMUL_H, FPR16>;
-def : PatFprFprDynFrm<any_fdiv, FDIV_H, FPR16>;
+def : PatFprFprDynFrm<any_fadd, FADD_H_INX, FPR16INX, f16>;
+def : PatFprFprDynFrm<any_fsub, FSUB_H_INX, FPR16INX, f16>;
+def : PatFprFprDynFrm<any_fmul, FMUL_H_INX, FPR16INX, f16>;
+def : PatFprFprDynFrm<any_fdiv, FDIV_H_INX, FPR16INX, f16>;
+
+def : Pat<(any_fsqrt FPR16INX:$rs1), (FSQRT_H_INX FPR16INX:$rs1, FRM_DYN)>;
-def : Pat<(any_fsqrt FPR16:$rs1), (FSQRT_H FPR16:$rs1, 0b111)>;
+def : Pat<(fneg FPR16INX:$rs1), (FSGNJN_H_INX $rs1, $rs1)>;
+def : Pat<(fabs FPR16INX:$rs1), (FSGNJX_H_INX $rs1, $rs1)>;
-def : Pat<(fneg FPR16:$rs1), (FSGNJN_H $rs1, $rs1)>;
-def : Pat<(fabs FPR16:$rs1), (FSGNJX_H $rs1, $rs1)>;
+def : Pat<(riscv_fpclass FPR16INX:$rs1), (FCLASS_H_INX $rs1)>;
-def : PatFprFpr<fcopysign, FSGNJ_H, FPR16>;
-def : Pat<(fcopysign FPR16:$rs1, (fneg FPR16:$rs2)), (FSGNJN_H $rs1, $rs2)>;
-def : Pat<(fcopysign FPR16:$rs1, FPR32:$rs2),
- (FSGNJ_H $rs1, (FCVT_H_S $rs2, 0b111))>;
+def : PatFprFpr<fcopysign, FSGNJ_H_INX, FPR16INX, f16>;
+def : Pat<(fcopysign FPR16INX:$rs1, (fneg FPR16INX:$rs2)), (FSGNJN_H_INX $rs1, $rs2)>;
+def : Pat<(fcopysign FPR16INX:$rs1, FPR32INX:$rs2),
+ (FSGNJ_H_INX $rs1, (FCVT_H_S_INX $rs2, FRM_DYN))>;
// fmadd: rs1 * rs2 + rs3
-def : Pat<(any_fma FPR16:$rs1, FPR16:$rs2, FPR16:$rs3),
- (FMADD_H $rs1, $rs2, $rs3, 0b111)>;
+def : Pat<(any_fma FPR16INX:$rs1, FPR16INX:$rs2, FPR16INX:$rs3),
+ (FMADD_H_INX $rs1, $rs2, $rs3, FRM_DYN)>;
// fmsub: rs1 * rs2 - rs3
-def : Pat<(any_fma FPR16:$rs1, FPR16:$rs2, (fneg FPR16:$rs3)),
- (FMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
+def : Pat<(any_fma FPR16INX:$rs1, FPR16INX:$rs2, (fneg FPR16INX:$rs3)),
+ (FMSUB_H_INX FPR16INX:$rs1, FPR16INX:$rs2, FPR16INX:$rs3, FRM_DYN)>;
// fnmsub: -rs1 * rs2 + rs3
-def : Pat<(any_fma (fneg FPR16:$rs1), FPR16:$rs2, FPR16:$rs3),
- (FNMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
+def : Pat<(any_fma (fneg FPR16INX:$rs1), FPR16INX:$rs2, FPR16INX:$rs3),
+ (FNMSUB_H_INX FPR16INX:$rs1, FPR16INX:$rs2, FPR16INX:$rs3, FRM_DYN)>;
// fnmadd: -rs1 * rs2 - rs3
-def : Pat<(any_fma (fneg FPR16:$rs1), FPR16:$rs2, (fneg FPR16:$rs3)),
- (FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
+def : Pat<(any_fma (fneg FPR16INX:$rs1), FPR16INX:$rs2, (fneg FPR16INX:$rs3)),
+ (FNMADD_H_INX FPR16INX:$rs1, FPR16INX:$rs2, FPR16INX:$rs3, FRM_DYN)>;
// fnmadd: -(rs1 * rs2 + rs3) (the nsz flag on the FMA)
-def : Pat<(fneg (any_fma_nsz FPR16:$rs1, FPR16:$rs2, FPR16:$rs3)),
- (FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>;
+def : Pat<(fneg (any_fma_nsz FPR16INX:$rs1, FPR16INX:$rs2, FPR16INX:$rs3)),
+ (FNMADD_H_INX FPR16INX:$rs1, FPR16INX:$rs2, FPR16INX:$rs3, FRM_DYN)>;
+} // Predicates = [HasStdExtZhinx]
// The ratified 20191213 ISA spec defines fmin and fmax in a way that matches
// LLVM's fminnum and fmaxnum
// <https://github.com/riscv/riscv-isa-manual/commit/cd20cee7efd9bac7c5aa127ec3b451749d2b3cce>.
-def : PatFprFpr<fminnum, FMIN_H, FPR16>;
-def : PatFprFpr<fmaxnum, FMAX_H, FPR16>;
+foreach Ext = ZfhExts in {
+ defm : PatFprFpr_m<fminnum, FMIN_H, Ext>;
+ defm : PatFprFpr_m<fmaxnum, FMAX_H, Ext>;
+ defm : PatFprFpr_m<riscv_fmin, FMIN_H, Ext>;
+ defm : PatFprFpr_m<riscv_fmax, FMAX_H, Ext>;
+}
/// Setcc
// FIXME: SETEQ/SETLT/SETLE imply nonans, can we pick better instructions for
// strict versions of those.
// Match non-signaling FEQ_D
-def : PatSetCC<FPR16, any_fsetcc, SETEQ, FEQ_H>;
-def : PatSetCC<FPR16, any_fsetcc, SETOEQ, FEQ_H>;
-def : PatSetCC<FPR16, strict_fsetcc, SETLT, PseudoQuietFLT_H>;
-def : PatSetCC<FPR16, strict_fsetcc, SETOLT, PseudoQuietFLT_H>;
-def : PatSetCC<FPR16, strict_fsetcc, SETLE, PseudoQuietFLE_H>;
-def : PatSetCC<FPR16, strict_fsetcc, SETOLE, PseudoQuietFLE_H>;
+foreach Ext = ZfhExts in {
+ defm : PatSetCC_m<any_fsetcc, SETEQ, FEQ_H, Ext, f16>;
+ defm : PatSetCC_m<any_fsetcc, SETOEQ, FEQ_H, Ext, f16>;
+ defm : PatSetCC_m<strict_fsetcc, SETLT, PseudoQuietFLT_H, Ext, f16>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLT, PseudoQuietFLT_H, Ext, f16>;
+ defm : PatSetCC_m<strict_fsetcc, SETLE, PseudoQuietFLE_H, Ext, f16>;
+ defm : PatSetCC_m<strict_fsetcc, SETOLE, PseudoQuietFLE_H, Ext, f16>;
+}
+let Predicates = [HasStdExtZfh] in {
// Match signaling FEQ_H
-def : Pat<(strict_fsetccs FPR16:$rs1, FPR16:$rs2, SETEQ),
+def : Pat<(XLenVT (strict_fsetccs (f16 FPR16:$rs1), FPR16:$rs2, SETEQ)),
(AND (FLE_H $rs1, $rs2),
(FLE_H $rs2, $rs1))>;
-def : Pat<(strict_fsetccs FPR16:$rs1, FPR16:$rs2, SETOEQ),
+def : Pat<(XLenVT (strict_fsetccs (f16 FPR16:$rs1), FPR16:$rs2, SETOEQ)),
(AND (FLE_H $rs1, $rs2),
(FLE_H $rs2, $rs1))>;
// If both operands are the same, use a single FLE.
-def : Pat<(strict_fsetccs FPR16:$rs1, FPR16:$rs1, SETEQ),
+def : Pat<(XLenVT (strict_fsetccs (f16 FPR16:$rs1), (f16 FPR16:$rs1), SETEQ)),
(FLE_H $rs1, $rs1)>;
-def : Pat<(strict_fsetccs FPR16:$rs1, FPR16:$rs1, SETOEQ),
+def : Pat<(XLenVT (strict_fsetccs (f16 FPR16:$rs1), (f16 FPR16:$rs1), SETOEQ)),
(FLE_H $rs1, $rs1)>;
+} // Predicates = [HasStdExtZfh]
-def : PatSetCC<FPR16, any_fsetccs, SETLT, FLT_H>;
-def : PatSetCC<FPR16, any_fsetccs, SETOLT, FLT_H>;
-def : PatSetCC<FPR16, any_fsetccs, SETLE, FLE_H>;
-def : PatSetCC<FPR16, any_fsetccs, SETOLE, FLE_H>;
+let Predicates = [HasStdExtZhinx] in {
+// Match signaling FEQ_H
+def : Pat<(XLenVT (strict_fsetccs FPR16INX:$rs1, FPR16INX:$rs2, SETEQ)),
+ (AND (FLE_H_INX $rs1, $rs2),
+ (FLE_H_INX $rs2, $rs1))>;
+def : Pat<(XLenVT (strict_fsetccs FPR16INX:$rs1, FPR16INX:$rs2, SETOEQ)),
+ (AND (FLE_H_INX $rs1, $rs2),
+ (FLE_H_INX $rs2, $rs1))>;
+// If both operands are the same, use a single FLE.
+def : Pat<(XLenVT (strict_fsetccs FPR16INX:$rs1, FPR16INX:$rs1, SETEQ)),
+ (FLE_H_INX $rs1, $rs1)>;
+def : Pat<(XLenVT (strict_fsetccs FPR16INX:$rs1, FPR16INX:$rs1, SETOEQ)),
+ (FLE_H_INX $rs1, $rs1)>;
+} // Predicates = [HasStdExtZhinx]
-defm Select_FPR16 : SelectCC_GPR_rrirr<FPR16>;
+foreach Ext = ZfhExts in {
+ defm : PatSetCC_m<any_fsetccs, SETLT, FLT_H, Ext, f16>;
+ defm : PatSetCC_m<any_fsetccs, SETOLT, FLT_H, Ext, f16>;
+ defm : PatSetCC_m<any_fsetccs, SETLE, FLE_H, Ext, f16>;
+ defm : PatSetCC_m<any_fsetccs, SETOLE, FLE_H, Ext, f16>;
+}
+
+let Predicates = [HasStdExtZfh] in {
+defm Select_FPR16 : SelectCC_GPR_rrirr<FPR16, f16>;
-def PseudoFROUND_H : PseudoFROUND<FPR16>;
+def PseudoFROUND_H : PseudoFROUND<FPR16, f16>;
} // Predicates = [HasStdExtZfh]
+let Predicates = [HasStdExtZhinx] in {
+defm Select_FPR16INX : SelectCC_GPR_rrirr<FPR16INX, f16>;
+
+def PseudoFROUND_H_INX : PseudoFROUND<FPR16INX, f16>;
+} // Predicates = [HasStdExtZhinx]
+
let Predicates = [HasStdExtZfhOrZfhmin] in {
/// Loads
-
-defm : LdPat<load, FLH, f16>;
+def : LdPat<load, FLH, f16>;
/// Stores
+def : StPat<store, FSH, FPR16, f16>;
+} // Predicates = [HasStdExtZfhOrZfhmin]
-defm : StPat<store, FSH, FPR16, f16>;
+let Predicates = [HasStdExtZhinxOrZhinxmin] in {
+/// Loads
+def : Pat<(f16 (load GPR:$rs1)), (COPY_TO_REGCLASS (LH GPR:$rs1, 0), GPRF16)>;
-/// Floating point constant +0.0
-def : Pat<(f16 (fpimm0)), (FMV_H_X X0)>;
+/// Stores
+def : Pat<(store (f16 FPR16INX:$rs2), GPR:$rs1),
+ (SH (COPY_TO_REGCLASS FPR16INX:$rs2, GPR), GPR:$rs1, 0)>;
+} // Predicates = [HasStdExtZhinxOrZhinxmin]
+let Predicates = [HasStdExtZfhOrZfhmin] in {
/// Float conversion operations
// f32 -> f16, f16 -> f32
-def : Pat<(any_fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>;
-def : Pat<(any_fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>;
+def : Pat<(f16 (any_fpround FPR32:$rs1)), (FCVT_H_S FPR32:$rs1, FRM_DYN)>;
+def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_S_H FPR16:$rs1)>;
// Moves (no conversion)
-def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>;
-def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>;
-def : Pat<(riscv_fmv_x_signexth FPR16:$src), (FMV_X_H FPR16:$src)>;
+def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (FMV_H_X GPR:$src)>;
+def : Pat<(riscv_fmv_x_anyexth (f16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
+def : Pat<(riscv_fmv_x_signexth (f16 FPR16:$src)), (FMV_X_H FPR16:$src)>;
-def : Pat<(fcopysign FPR32:$rs1, FPR16:$rs2), (FSGNJ_S $rs1, (FCVT_S_H $rs2))>;
+def : Pat<(fcopysign FPR32:$rs1, (f16 FPR16:$rs2)), (FSGNJ_S $rs1, (FCVT_S_H $rs2))>;
} // Predicates = [HasStdExtZfhOrZfhmin]
+let Predicates = [HasStdExtZhinxOrZhinxmin] in {
+/// Float conversion operations
+
+// f32 -> f16, f16 -> f32
+def : Pat<(any_fpround FPR32INX:$rs1), (FCVT_H_S_INX FPR32INX:$rs1, FRM_DYN)>;
+def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_S_H_INX FPR16INX:$rs1)>;
+
+// Moves (no conversion)
+def : Pat<(f16 (riscv_fmv_h_x GPR:$src)), (COPY_TO_REGCLASS GPR:$src, GPR)>;
+def : Pat<(riscv_fmv_x_anyexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>;
+def : Pat<(riscv_fmv_x_signexth FPR16INX:$src), (COPY_TO_REGCLASS FPR16INX:$src, GPR)>;
+
+def : Pat<(fcopysign FPR32INX:$rs1, FPR16INX:$rs2), (FSGNJ_S_INX $rs1, (FCVT_S_H_INX $rs2))>;
+} // Predicates = [HasStdExtZhinxOrZhinxmin]
+
let Predicates = [HasStdExtZfh, IsRV32] in {
// half->[u]int. Round-to-zero must be used.
-def : Pat<(i32 (any_fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>;
-def : Pat<(i32 (any_fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>;
+def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_H $rs1, 0b001)>;
+def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_H $rs1, 0b001)>;
// Saturating half->[u]int32.
-def : Pat<(i32 (riscv_fcvt_x FPR16:$rs1, timm:$frm)), (FCVT_W_H $rs1, timm:$frm)>;
-def : Pat<(i32 (riscv_fcvt_xu FPR16:$rs1, timm:$frm)), (FCVT_WU_H $rs1, timm:$frm)>;
+def : Pat<(i32 (riscv_fcvt_x (f16 FPR16:$rs1), timm:$frm)), (FCVT_W_H $rs1, timm:$frm)>;
+def : Pat<(i32 (riscv_fcvt_xu (f16 FPR16:$rs1), timm:$frm)), (FCVT_WU_H $rs1, timm:$frm)>;
// half->int32 with current rounding mode.
-def : Pat<(i32 (any_lrint FPR16:$rs1)), (FCVT_W_H $rs1, 0b111)>;
+def : Pat<(i32 (any_lrint (f16 FPR16:$rs1))), (FCVT_W_H $rs1, FRM_DYN)>;
// half->int32 rounded to nearest with ties rounded away from zero.
-def : Pat<(i32 (any_lround FPR16:$rs1)), (FCVT_W_H $rs1, 0b100)>;
+def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_H $rs1, FRM_RMM)>;
// [u]int->half. Match GCC and default to using dynamic rounding mode.
-def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>;
-def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>;
+def : Pat<(f16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_H_W $rs1, FRM_DYN)>;
+def : Pat<(f16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_H_WU $rs1, FRM_DYN)>;
} // Predicates = [HasStdExtZfh, IsRV32]
+let Predicates = [HasStdExtZhinx, IsRV32] in {
+// half->[u]int. Round-to-zero must be used.
+def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_H_INX $rs1, 0b001)>;
+def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_H_INX $rs1, 0b001)>;
+
+// Saturating float->[u]int32.
+def : Pat<(i32 (riscv_fcvt_x FPR16INX:$rs1, timm:$frm)), (FCVT_W_H_INX $rs1, timm:$frm)>;
+def : Pat<(i32 (riscv_fcvt_xu FPR16INX:$rs1, timm:$frm)), (FCVT_WU_H_INX $rs1, timm:$frm)>;
+
+// half->int32 with current rounding mode.
+def : Pat<(i32 (any_lrint FPR16INX:$rs1)), (FCVT_W_H_INX $rs1, FRM_DYN)>;
+
+// half->int32 rounded to nearest with ties rounded away from zero.
+def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_H_INX $rs1, FRM_RMM)>;
+
+// [u]int->half. Match GCC and default to using dynamic rounding mode.
+def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W_INX $rs1, FRM_DYN)>;
+def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU_INX $rs1, FRM_DYN)>;
+} // Predicates = [HasStdExtZhinx, IsRV32]
+
let Predicates = [HasStdExtZfh, IsRV64] in {
// Use target specific isd nodes to help us remember the result is sign
// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
// duplicated if it has another user that didn't need the sign_extend.
-def : Pat<(riscv_any_fcvt_w_rv64 FPR16:$rs1, timm:$frm), (FCVT_W_H $rs1, timm:$frm)>;
-def : Pat<(riscv_any_fcvt_wu_rv64 FPR16:$rs1, timm:$frm), (FCVT_WU_H $rs1, timm:$frm)>;
+def : Pat<(riscv_any_fcvt_w_rv64 (f16 FPR16:$rs1), timm:$frm), (FCVT_W_H $rs1, timm:$frm)>;
+def : Pat<(riscv_any_fcvt_wu_rv64 (f16 FPR16:$rs1), timm:$frm), (FCVT_WU_H $rs1, timm:$frm)>;
// half->[u]int64. Round-to-zero must be used.
-def : Pat<(i64 (any_fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>;
-def : Pat<(i64 (any_fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>;
+def : Pat<(i64 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_L_H $rs1, 0b001)>;
+def : Pat<(i64 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_LU_H $rs1, 0b001)>;
// Saturating half->[u]int64.
-def : Pat<(i64 (riscv_fcvt_x FPR16:$rs1, timm:$frm)), (FCVT_L_H $rs1, timm:$frm)>;
-def : Pat<(i64 (riscv_fcvt_xu FPR16:$rs1, timm:$frm)), (FCVT_LU_H $rs1, timm:$frm)>;
+def : Pat<(i64 (riscv_fcvt_x (f16 FPR16:$rs1), timm:$frm)), (FCVT_L_H $rs1, timm:$frm)>;
+def : Pat<(i64 (riscv_fcvt_xu (f16 FPR16:$rs1), timm:$frm)), (FCVT_LU_H $rs1, timm:$frm)>;
// half->int64 with current rounding mode.
-def : Pat<(i64 (any_lrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>;
-def : Pat<(i64 (any_llrint FPR16:$rs1)), (FCVT_L_H $rs1, 0b111)>;
+def : Pat<(i64 (any_lrint (f16 FPR16:$rs1))), (FCVT_L_H $rs1, FRM_DYN)>;
+def : Pat<(i64 (any_llrint (f16 FPR16:$rs1))), (FCVT_L_H $rs1, FRM_DYN)>;
// half->int64 rounded to nearest with ties rounded away from zero.
-def : Pat<(i64 (any_lround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>;
-def : Pat<(i64 (any_llround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>;
+def : Pat<(i64 (any_lround (f16 FPR16:$rs1))), (FCVT_L_H $rs1, FRM_RMM)>;
+def : Pat<(i64 (any_llround (f16 FPR16:$rs1))), (FCVT_L_H $rs1, FRM_RMM)>;
// [u]int->fp. Match GCC and default to using dynamic rounding mode.
-def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W $rs1, 0b111)>;
-def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_H_WU $rs1, 0b111)>;
-def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>;
-def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>;
+def : Pat<(f16 (any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1))))), (FCVT_H_W $rs1, FRM_DYN)>;
+def : Pat<(f16 (any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1))))), (FCVT_H_WU $rs1, FRM_DYN)>;
+def : Pat<(f16 (any_sint_to_fp (i64 GPR:$rs1))), (FCVT_H_L $rs1, FRM_DYN)>;
+def : Pat<(f16 (any_uint_to_fp (i64 GPR:$rs1))), (FCVT_H_LU $rs1, FRM_DYN)>;
} // Predicates = [HasStdExtZfh, IsRV64]
+let Predicates = [HasStdExtZhinx, IsRV64] in {
+// Use target specific isd nodes to help us remember the result is sign
+// extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be
+// duplicated if it has another user that didn't need the sign_extend.
+def : Pat<(riscv_any_fcvt_w_rv64 FPR16INX:$rs1, timm:$frm), (FCVT_W_H_INX $rs1, timm:$frm)>;
+def : Pat<(riscv_any_fcvt_wu_rv64 FPR16INX:$rs1, timm:$frm), (FCVT_WU_H_INX $rs1, timm:$frm)>;
+
+// half->[u]int64. Round-to-zero must be used.
+def : Pat<(i64 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_L_H_INX $rs1, 0b001)>;
+def : Pat<(i64 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_LU_H_INX $rs1, 0b001)>;
+
+// Saturating float->[u]int64.
+def : Pat<(i64 (riscv_fcvt_x FPR16INX:$rs1, timm:$frm)), (FCVT_L_H_INX $rs1, timm:$frm)>;
+def : Pat<(i64 (riscv_fcvt_xu FPR16INX:$rs1, timm:$frm)), (FCVT_LU_H_INX $rs1, timm:$frm)>;
+
+// half->int64 with current rounding mode.
+def : Pat<(i64 (any_lrint FPR16INX:$rs1)), (FCVT_L_H_INX $rs1, FRM_DYN)>;
+def : Pat<(i64 (any_llrint FPR16INX:$rs1)), (FCVT_L_H_INX $rs1, FRM_DYN)>;
+
+// half->int64 rounded to nearest with ties rounded away from zero.
+def : Pat<(i64 (any_lround FPR16INX:$rs1)), (FCVT_L_H_INX $rs1, FRM_RMM)>;
+def : Pat<(i64 (any_llround FPR16INX:$rs1)), (FCVT_L_H_INX $rs1, FRM_RMM)>;
+
+// [u]int->fp. Match GCC and default to using dynamic rounding mode.
+def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W_INX $rs1, FRM_DYN)>;
+def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_H_WU_INX $rs1, FRM_DYN)>;
+def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L_INX $rs1, FRM_DYN)>;
+def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU_INX $rs1, FRM_DYN)>;
+} // Predicates = [HasStdExtZhinx, IsRV64]
+
let Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD] in {
/// Float conversion operations
// f64 -> f16, f16 -> f64
-def : Pat<(any_fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>;
-def : Pat<(any_fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>;
+def : Pat<(f16 (any_fpround FPR64:$rs1)), (FCVT_H_D FPR64:$rs1, FRM_DYN)>;
+def : Pat<(any_fpextend (f16 FPR16:$rs1)), (FCVT_D_H FPR16:$rs1)>;
/// Float arithmetic operations
-def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2),
- (FSGNJ_H $rs1, (FCVT_H_D $rs2, 0b111))>;
-def : Pat<(fcopysign FPR64:$rs1, FPR16:$rs2), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>;
+def : Pat<(f16 (fcopysign FPR16:$rs1, FPR64:$rs2)),
+ (FSGNJ_H $rs1, (FCVT_H_D $rs2, FRM_DYN))>;
+def : Pat<(fcopysign FPR64:$rs1, (f16 FPR16:$rs2)), (FSGNJ_D $rs1, (FCVT_D_H $rs2))>;
} // Predicates = [HasStdExtZfhOrZfhmin, HasStdExtD]
-let Predicates = [HasStdExtZfhmin, NoStdExtZfh] in {
-// Floating point constant -0.0
-def : Pat<(f16 (fpimmneg0)), (FCVT_H_S (FSGNJN_S (FMV_W_X X0), (FMV_W_X X0)), 0b111)>;
-} // Predicates = [HasStdExtZfhmin, NoStdExtZfh]
+let Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV32] in {
+/// Float conversion operations
+// f64 -> f16, f16 -> f64
+def : Pat<(any_fpround FPR64IN32X:$rs1), (FCVT_H_D_IN32X FPR64IN32X:$rs1, FRM_DYN)>;
+def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_IN32X FPR16INX:$rs1)>;
+
+/// Float arithmetic operations
+def : Pat<(fcopysign FPR16INX:$rs1, FPR64IN32X:$rs2),
+ (FSGNJ_H_INX $rs1, (FCVT_H_D_IN32X $rs2, 0b111))>;
+def : Pat<(fcopysign FPR64IN32X:$rs1, FPR16INX:$rs2), (FSGNJ_D_IN32X $rs1, (FCVT_D_H_IN32X $rs2))>;
+} // Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV32]
+
+let Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV64] in {
+/// Float conversion operations
+// f64 -> f16, f16 -> f64
+def : Pat<(any_fpround FPR64INX:$rs1), (FCVT_H_D_INX FPR64INX:$rs1, FRM_DYN)>;
+def : Pat<(any_fpextend FPR16INX:$rs1), (FCVT_D_H_INX FPR16INX:$rs1)>;
+
+/// Float arithmetic operations
+def : Pat<(fcopysign FPR16INX:$rs1, FPR64INX:$rs2),
+ (FSGNJ_H_INX $rs1, (FCVT_H_D_INX $rs2, 0b111))>;
+def : Pat<(fcopysign FPR64INX:$rs1, FPR16INX:$rs2), (FSGNJ_D_INX $rs1, (FCVT_D_H_INX $rs2))>;
+} // Predicates = [HasStdExtZhinxOrZhinxmin, HasStdExtZdinx, IsRV64]
let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32] in {
// half->[u]int. Round-to-zero must be used.
-def : Pat<(i32 (any_fp_to_sint FPR16:$rs1)), (FCVT_W_S (FCVT_S_H $rs1), 0b001)>;
-def : Pat<(i32 (any_fp_to_uint FPR16:$rs1)), (FCVT_WU_S (FCVT_S_H $rs1), 0b001)>;
+def : Pat<(i32 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1), FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_WU_S (FCVT_S_H $rs1), FRM_RTZ)>;
// half->int32 with current rounding mode.
-def : Pat<(i32 (any_lrint FPR16:$rs1)), (FCVT_W_S (FCVT_S_H $rs1), 0b111)>;
+def : Pat<(i32 (any_lrint (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1), FRM_DYN)>;
// half->int32 rounded to nearest with ties rounded away from zero.
-def : Pat<(i32 (any_lround FPR16:$rs1)), (FCVT_W_S (FCVT_S_H $rs1), 0b100)>;
+def : Pat<(i32 (any_lround (f16 FPR16:$rs1))), (FCVT_W_S (FCVT_S_H $rs1), FRM_RMM)>;
// [u]int->half. Match GCC and default to using dynamic rounding mode.
-def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_S (FCVT_S_W $rs1, 0b111), 0b111)>;
-def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_S (FCVT_S_WU $rs1, 0b111), 0b111)>;
+def : Pat<(f16 (any_sint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_W $rs1, FRM_DYN), FRM_DYN)>;
+def : Pat<(f16 (any_uint_to_fp (i32 GPR:$rs1))), (FCVT_H_S (FCVT_S_WU $rs1, FRM_DYN), FRM_DYN)>;
} // Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV32]
+let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV32] in {
+// half->[u]int. Round-to-zero must be used.
+def : Pat<(i32 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>;
+def : Pat<(i32 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_WU_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>;
+
+// half->int32 with current rounding mode.
+def : Pat<(i32 (any_lrint FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1), FRM_DYN)>;
+
+// half->int32 rounded to nearest with ties rounded away from zero.
+def : Pat<(i32 (any_lround FPR16INX:$rs1)), (FCVT_W_S_INX (FCVT_S_H_INX $rs1), FRM_RMM)>;
+
+// [u]int->half. Match GCC and default to using dynamic rounding mode.
+def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_W_INX $rs1, FRM_DYN), FRM_DYN)>;
+def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_WU_INX $rs1, FRM_DYN), FRM_DYN)>;
+} // Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV32]
+
let Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV64] in {
// half->[u]int64. Round-to-zero must be used.
-def : Pat<(i64 (any_fp_to_sint FPR16:$rs1)), (FCVT_L_S (FCVT_S_H $rs1), 0b001)>;
-def : Pat<(i64 (any_fp_to_uint FPR16:$rs1)), (FCVT_LU_S (FCVT_S_H $rs1), 0b001)>;
+def : Pat<(i64 (any_fp_to_sint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_uint (f16 FPR16:$rs1))), (FCVT_LU_S (FCVT_S_H $rs1), FRM_RTZ)>;
// half->int64 with current rounding mode.
-def : Pat<(i64 (any_lrint FPR16:$rs1)), (FCVT_L_S (FCVT_S_H $rs1), 0b111)>;
-def : Pat<(i64 (any_llrint FPR16:$rs1)), (FCVT_L_S (FCVT_S_H $rs1), 0b111)>;
+def : Pat<(i64 (any_lrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_DYN)>;
+def : Pat<(i64 (any_llrint (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_DYN)>;
// half->int64 rounded to nearest with ties rounded away from zero.
-def : Pat<(i64 (any_lround FPR16:$rs1)), (FCVT_L_S (FCVT_S_H $rs1), 0b100)>;
-def : Pat<(i64 (any_llround FPR16:$rs1)), (FCVT_L_S (FCVT_S_H $rs1), 0b100)>;
+def : Pat<(i64 (any_lround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_RMM)>;
+def : Pat<(i64 (any_llround (f16 FPR16:$rs1))), (FCVT_L_S (FCVT_S_H $rs1), FRM_RMM)>;
// [u]int->fp. Match GCC and default to using dynamic rounding mode.
-def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_S (FCVT_S_L $rs1, 0b111), 0b111)>;
-def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_S (FCVT_S_LU $rs1, 0b111), 0b111)>;
+def : Pat<(f16 (any_sint_to_fp (i64 GPR:$rs1))), (FCVT_H_S (FCVT_S_L $rs1, FRM_DYN), FRM_DYN)>;
+def : Pat<(f16 (any_uint_to_fp (i64 GPR:$rs1))), (FCVT_H_S (FCVT_S_LU $rs1, FRM_DYN), FRM_DYN)>;
} // Predicates = [HasStdExtZfhmin, NoStdExtZfh, IsRV64]
+
+let Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV64] in {
+// half->[u]int64. Round-to-zero must be used.
+def : Pat<(i64 (any_fp_to_sint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>;
+def : Pat<(i64 (any_fp_to_uint FPR16INX:$rs1)), (FCVT_LU_S_INX (FCVT_S_H_INX $rs1), FRM_RTZ)>;
+
+// half->int64 with current rounding mode.
+def : Pat<(i64 (any_lrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_DYN)>;
+def : Pat<(i64 (any_llrint FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_DYN)>;
+
+// half->int64 rounded to nearest with ties rounded away from zero.
+def : Pat<(i64 (any_lround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_RMM)>;
+def : Pat<(i64 (any_llround FPR16INX:$rs1)), (FCVT_L_S_INX (FCVT_S_H_INX $rs1), FRM_RMM)>;
+
+// [u]int->fp. Match GCC and default to using dynamic rounding mode.
+def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_L_INX $rs1, FRM_DYN), FRM_DYN)>;
+def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_S_INX (FCVT_S_LU_INX $rs1, FRM_DYN), FRM_DYN)>;
+} // Predicates = [HasStdExtZhinxmin, NoStdExtZhinx, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
index 8cd1fc4cf1ed..509d1cfcd874 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicbo.td
@@ -42,7 +42,7 @@ class CBO_r<bits<12> optype, string opcodestr>
let rd = 0b00000;
}
-let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
+let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in
class Prefetch_ri<bits<5> optype, string opcodestr>
: RVInstS<0b110, OPC_OP_IMM, (outs), (ins GPR:$rs1, simm12_lsb00000:$imm12),
opcodestr, "${imm12}(${rs1})"> {
@@ -69,3 +69,17 @@ def PREFETCH_I : Prefetch_ri<0b00000, "prefetch.i">, Sched<[]>;
def PREFETCH_R : Prefetch_ri<0b00001, "prefetch.r">, Sched<[]>;
def PREFETCH_W : Prefetch_ri<0b00011, "prefetch.w">, Sched<[]>;
} // Predicates = [HasStdExtZicbop]
+
+//===----------------------------------------------------------------------===//
+// Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZicbop] in {
+ // FIXME: Match address with offset
+ def : Pat<(prefetch GPR:$rs1, imm, imm, (XLenVT 0)),
+ (PREFETCH_I GPR:$rs1, 0)>;
+ def : Pat<(prefetch GPR:$rs1, (XLenVT 0), imm, (XLenVT 1)),
+ (PREFETCH_R GPR:$rs1, 0)>;
+ def : Pat<(prefetch GPR:$rs1, (XLenVT 1), imm, (XLenVT 1)),
+ (PREFETCH_W GPR:$rs1, 0)>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td
new file mode 100644
index 000000000000..ab0b93d62af5
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicond.td
@@ -0,0 +1,43 @@
+//===-- RISCVInstrInfoZicond.td ----------------------------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions from the standard Integer
+// Conditional operations extension (Zicond).
+// This version is still experimental as the 'Zicond' extension hasn't been
+// ratified yet. It is based on v1.0-rc1 of the specification.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// RISC-V specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def riscv_czero_eqz : SDNode<"RISCVISD::CZERO_EQZ", SDTIntBinOp>;
+def riscv_czero_nez : SDNode<"RISCVISD::CZERO_NEZ", SDTIntBinOp>;
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZicond] in {
+def CZERO_EQZ : ALU_rr<0b0000111, 0b101, "czero.eqz">,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+def CZERO_NEZ : ALU_rr<0b0000111, 0b111, "czero.nez">,
+ Sched<[WriteIALU, ReadIALU, ReadIALU]>;
+} // Predicates = [HasStdExtZicond]
+
+//===----------------------------------------------------------------------===//
+// Pseudo-instructions and codegen patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZicond] in {
+def : Pat<(XLenVT (riscv_czero_eqz GPR:$rs1, GPR:$rc)),
+ (CZERO_EQZ GPR:$rs1, GPR:$rc)>;
+def : Pat<(XLenVT (riscv_czero_nez GPR:$rs1, GPR:$rc)),
+ (CZERO_NEZ GPR:$rs1, GPR:$rc)>;
+} // Predicates = [HasStdExtZicond]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
index 0ebe3b173ea4..b8c0606034c5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
@@ -15,6 +15,21 @@
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
+def riscv_sha256sig0 : SDNode<"RISCVISD::SHA256SIG0", SDTIntUnaryOp>;
+def riscv_sha256sig1 : SDNode<"RISCVISD::SHA256SIG1", SDTIntUnaryOp>;
+def riscv_sha256sum0 : SDNode<"RISCVISD::SHA256SUM0", SDTIntUnaryOp>;
+def riscv_sha256sum1 : SDNode<"RISCVISD::SHA256SUM1", SDTIntUnaryOp>;
+
+def SDT_RISCVZkByteSelect : SDTypeProfile<1, 3, [SDTCisVT<0, XLenVT>,
+ SDTCisVT<1, XLenVT>,
+ SDTCisVT<2, XLenVT>,
+ SDTCisVT<3, i32>]>;
+def riscv_sm4ks : SDNode<"RISCVISD::SM4KS", SDT_RISCVZkByteSelect>;
+def riscv_sm4ed : SDNode<"RISCVISD::SM4ED", SDT_RISCVZkByteSelect>;
+
+def riscv_sm3p0 : SDNode<"RISCVISD::SM3P0", SDTIntUnaryOp>;
+def riscv_sm3p1 : SDNode<"RISCVISD::SM3P1", SDTIntUnaryOp>;
+
def RnumArg : AsmOperandClass {
let Name = "RnumArg";
let RenderMethod = "addImmOperands";
@@ -29,7 +44,7 @@ def rnum : Operand<i32>, TImmLeaf<i32, [{return (Imm >= 0 && Imm <= 10);}]> {
let OperandNamespace = "RISCVOp";
}
-def byteselect : Operand<i8>, TImmLeaf<i8, [{return isUInt<2>(Imm);}]> {
+def byteselect : Operand<i32>, TImmLeaf<i32, [{return isUInt<2>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<2>;
let DecoderMethod = "decodeUImmOperand<2>";
let OperandType = "OPERAND_UIMM2";
@@ -119,12 +134,12 @@ def SHA512SUM0 : RVKUnary<0b000100000100, 0b001, "sha512sum0">;
def SHA512SUM1 : RVKUnary<0b000100000101, 0b001, "sha512sum1">;
} // Predicates = [HasStdExtZknh, IsRV64]
-let Predicates = [HasStdExtZksed] in {
+let Predicates = [HasStdExtZksed], IsSignExtendingOpW = 1 in {
def SM4ED : RVKByteSelect<0b11000, "sm4ed">;
def SM4KS : RVKByteSelect<0b11010, "sm4ks">;
} // Predicates = [HasStdExtZksed]
-let Predicates = [HasStdExtZksh] in {
+let Predicates = [HasStdExtZksh], IsSignExtendingOpW = 1 in {
def SM3P0 : RVKUnary<0b000100001000, 0b001, "sm3p0">;
def SM3P1 : RVKUnary<0b000100001001, 0b001, "sm3p1">;
} // Predicates = [HasStdExtZksh]
@@ -134,7 +149,7 @@ def SM3P1 : RVKUnary<0b000100001001, 0b001, "sm3p1">;
//===----------------------------------------------------------------------===//
class PatGprGprByteSelect<SDPatternOperator OpNode, RVInst Inst>
- : Pat<(OpNode GPR:$rs1, GPR:$rs2, i8:$imm),
+ : Pat<(XLenVT (OpNode (XLenVT GPR:$rs1), (XLenVT GPR:$rs2), byteselect:$imm)),
(Inst GPR:$rs1, GPR:$rs2, byteselect:$imm)>;
// Zknd
@@ -151,7 +166,7 @@ def : PatGpr<int_riscv_aes64im, AES64IM>;
let Predicates = [HasStdExtZkndOrZkne, IsRV64] in {
def : PatGprGpr<int_riscv_aes64ks2, AES64KS2>;
-def : Pat<(int_riscv_aes64ks1i GPR:$rs1, i32:$rnum),
+def : Pat<(int_riscv_aes64ks1i GPR:$rs1, rnum:$rnum),
(AES64KS1I GPR:$rs1, rnum:$rnum)>;
} // Predicates = [HasStdExtZkndOrZkne, IsRV64]
@@ -168,10 +183,10 @@ def : PatGprGpr<int_riscv_aes64esm, AES64ESM>;
// Zknh
let Predicates = [HasStdExtZknh] in {
-def : PatGpr<int_riscv_sha256sig0, SHA256SIG0>;
-def : PatGpr<int_riscv_sha256sig1, SHA256SIG1>;
-def : PatGpr<int_riscv_sha256sum0, SHA256SUM0>;
-def : PatGpr<int_riscv_sha256sum1, SHA256SUM1>;
+def : PatGpr<riscv_sha256sig0, SHA256SIG0>;
+def : PatGpr<riscv_sha256sig1, SHA256SIG1>;
+def : PatGpr<riscv_sha256sum0, SHA256SUM0>;
+def : PatGpr<riscv_sha256sum1, SHA256SUM1>;
} // Predicates = [HasStdExtZknh]
let Predicates = [HasStdExtZknh, IsRV32] in {
@@ -192,12 +207,12 @@ def : PatGpr<int_riscv_sha512sum1, SHA512SUM1>;
// Zksed
let Predicates = [HasStdExtZksed] in {
-def : PatGprGprByteSelect<int_riscv_sm4ks, SM4KS>;
-def : PatGprGprByteSelect<int_riscv_sm4ed, SM4ED>;
+def : PatGprGprByteSelect<riscv_sm4ks, SM4KS>;
+def : PatGprGprByteSelect<riscv_sm4ed, SM4ED>;
} // Predicates = [HasStdExtZksed]
// Zksh
let Predicates = [HasStdExtZksh] in {
-def : PatGpr<int_riscv_sm3p0, SM3P0>;
-def : PatGpr<int_riscv_sm3p1, SM3P1>;
+def : PatGpr<riscv_sm3p0, SM3P0>;
+def : PatGpr<riscv_sm3p1, SM3P1>;
} // Predicates = [HasStdExtZksh]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
new file mode 100644
index 000000000000..046074d848f5
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -0,0 +1,31 @@
+//===-- RISCVInstrInfoZvfbf.td - 'Zvfbf*' instructions -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions from the standard 'Zvfbfmin'
+// extension, providing vector conversion instructions for BFloat16.
+// This version is still experimental as the 'Zvfbfmin' extension hasn't been
+// ratified yet.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZvfbfmin], Constraints = "@earlyclobber $vd",
+ mayRaiseFPException = true in {
+let RVVConstraint = WidenCvt in
+defm VFWCVTBF16_F_F_V : VWCVTF_FV_VS2<"vfwcvtbf16.f.f.v", 0b010010, 0b01101>;
+let Uses = [FRM] in
+defm VFNCVTBF16_F_F_W : VNCVTF_FV_VS2<"vfncvtbf16.f.f.w", 0b010010, 0b11101>;
+}
+
+let Predicates = [HasStdExtZvfbfwma], Constraints = "@earlyclobber $vd",
+ RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true in {
+defm VFWMACCBF16_V : VWMAC_FV_V_F<"vfwmaccbf16", 0b111011>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
new file mode 100644
index 000000000000..13c98ce92d14
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td
@@ -0,0 +1,357 @@
+//===-- RISCVInstrInfoZvk.td - RISC-V 'Zvk' instructions -------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions from the standard 'Zvk',
+// Vector Cryptography Instructions extension, version 1.0.0-rc1.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+def RnumArg_0_7 : AsmOperandClass {
+ let Name = "RnumArg_0_7";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidRnumArg_0_7";
+}
+
+def RnumArg_1_10 : AsmOperandClass {
+ let Name = "RnumArg_1_10";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidRnumArg_1_10";
+}
+
+def RnumArg_2_14 : AsmOperandClass {
+ let Name = "RnumArg_2_14";
+ let RenderMethod = "addImmOperands";
+ let DiagnosticType = "InvalidRnumArg_2_14";
+}
+
+def rnum_0_7 : Operand<XLenVT>, ImmLeaf<XLenVT,
+ [{return (0 <= Imm && Imm <= 7);}]> {
+ let ParserMatchClass = RnumArg_0_7;
+ let DecoderMethod = "decodeUImmOperand<5>";
+ let OperandType = "OPERAND_RVKRNUM_0_7";
+ let OperandNamespace = "RISCVOp";
+}
+
+def rnum_1_10 : Operand<XLenVT>, ImmLeaf<XLenVT,
+ [{return (1 <= Imm && Imm <= 10);}]> {
+ let ParserMatchClass = RnumArg_1_10;
+ let DecoderMethod = "decodeUImmOperand<5>";
+ let OperandType = "OPERAND_RVKRNUM_1_10";
+ let OperandNamespace = "RISCVOp";
+}
+
+def rnum_2_14 : Operand<XLenVT>, ImmLeaf<XLenVT,
+ [{return (2 <= Imm && Imm <= 14);}]> {
+ let ParserMatchClass = RnumArg_2_14;
+ let DecoderMethod = "decodeUImmOperand<5>";
+ let OperandType = "OPERAND_RVKRNUM_2_14";
+ let OperandNamespace = "RISCVOp";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction class templates
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+multiclass VCLMUL_MV_V_X<string opcodestr, bits<6> funct6> {
+ def V : VALUVV<funct6, OPMVV, opcodestr # "." # "vv">,
+ Sched<[WriteVIALUV_WorstCase, ReadVIALUV_WorstCase,
+ ReadVIALUV_WorstCase, ReadVMask]>;
+ def X : VALUVX<funct6, OPMVX, opcodestr # "." # "vx">,
+ Sched<[WriteVIALUX_WorstCase, ReadVIALUV_WorstCase,
+ ReadVIALUX_WorstCase, ReadVMask]>;
+}
+
+class RVInstIVI_VROR<bits<6> funct6, dag outs, dag ins, string opcodestr,
+ string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> {
+ bits<5> vs2;
+ bits<6> imm;
+ bits<5> vd;
+ bit vm;
+
+ let Inst{31-27} = funct6{5-1};
+ let Inst{26} = imm{5};
+ let Inst{25} = vm;
+ let Inst{24-20} = vs2;
+ let Inst{19-15} = imm{4-0};
+ let Inst{14-12} = OPIVI.Value;
+ let Inst{11-7} = vd;
+ let Inst{6-0} = OPC_OP_V.Value;
+
+ let Uses = [VTYPE, VL];
+ let RVVConstraint = VMConstraint;
+}
+
+multiclass VROR_IV_V_X_I<string opcodestr, bits<6> funct6>
+ : VALU_IV_V_X<opcodestr, funct6> {
+ def I : RVInstIVI_VROR<funct6, (outs VR:$vd),
+ (ins VR:$vs2, uimm6:$imm, VMaskOp:$vm),
+ opcodestr # ".vi", "$vd, $vs2, $imm$vm">,
+ Sched<[WriteVIALUI_WorstCase, ReadVIALUV_WorstCase,
+ ReadVMask]>;
+}
+
+// op vd, vs2, vs1
+class PALUVVNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : VALUVVNoVm<funct6, opv, opcodestr> {
+ let Inst{6-0} = OPC_OP_P.Value;
+}
+
+// op vd, vs2, imm, vm
+class PALUVINoVm<bits<6> funct6, string opcodestr, Operand optype = simm5>
+ : VALUVINoVm<funct6, opcodestr, optype> {
+ let Inst{6-0} = OPC_OP_P.Value;
+ let Inst{14-12} = OPMVV.Value;
+}
+
+// op vd, vs2 (use vs1 as instruction encoding)
+class PALUVs2NoVm<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodestr>
+ : VALUVs2NoVm<funct6, vs1, opv, opcodestr> {
+ let Inst{6-0} = OPC_OP_P.Value;
+}
+
+multiclass VAES_MV_V_S<bits<6> funct6_vv, bits<6> funct6_vs, bits<5> vs1,
+ RISCVVFormat opv, string opcodestr> {
+ def NAME # _VV : PALUVs2NoVm<funct6_vv, vs1, opv, opcodestr # ".vv">;
+ def NAME # _VS : PALUVs2NoVm<funct6_vs, vs1, opv, opcodestr # ".vs">;
+}
+
+// vaeskf1.vi and vaeskf2.vi uses different opcode and format, we need
+// to customize one for them.
+class VAESKF_MV_I<bits<6> funct6, string opcodestr, Operand optype>
+ : VALUVINoVm<funct6, opcodestr, optype> {
+ let Inst{6-0} = OPC_OP_P.Value;
+ let Inst{14-12} = OPMVV.Value;
+}
+} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtZvbb] in {
+ defm VANDN_V : VALU_IV_V_X<"vandn", 0b000001>;
+ def VBREV8_V : VALUVs2<0b010010, 0b01000, OPMVV, "vbrev8.v">;
+ def VBREV_V : VALUVs2<0b010010, 0b01010, OPMVV, "vbrev.v">;
+ def VCLZ_V : VALUVs2<0b010010, 0b01100, OPMVV, "vclz.v">;
+ def VCPOP_V : VALUVs2<0b010010, 0b01110, OPMVV, "vcpop.v">;
+ def VCTZ_V : VALUVs2<0b010010, 0b01101, OPMVV, "vctz.v">;
+ def VREV8_V : VALUVs2<0b010010, 0b01001, OPMVV, "vrev8.v">;
+ defm VROL_V : VALU_IV_V_X<"vrol", 0b010101>;
+ defm VROR_V : VROR_IV_V_X_I<"vror", 0b010100>;
+ let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in
+ defm VWSLL_V : VSHT_IV_V_X_I<"vwsll", 0b110101>;
+} // Predicates = [HasStdExtZvbb]
+
+let Predicates = [HasStdExtZvbc] in {
+ defm VCLMUL_V : VCLMUL_MV_V_X<"vclmul", 0b001100>;
+ defm VCLMULH_V : VCLMUL_MV_V_X<"vclmulh", 0b001101>;
+} // Predicates = [HasStdExtZvbc]
+
+let Predicates = [HasStdExtZvkg], RVVConstraint = NoConstraint in {
+ def VGHSH_VV : PALUVVNoVm<0b101100, OPMVV, "vghsh.vv">;
+ def VGMUL_VV : PALUVs2NoVm<0b101000, 0b10001, OPMVV, "vgmul.vv">;
+} // Predicates = [HasStdExtZvkg]
+
+let Predicates = [HasStdExtZvknha], RVVConstraint = NoConstraint in {
+ def VSHA2CH_VV : PALUVVNoVm<0b101110, OPMVV, "vsha2ch.vv">;
+ def VSHA2CL_VV : PALUVVNoVm<0b101111, OPMVV, "vsha2cl.vv">;
+ def VSHA2MS_VV : PALUVVNoVm<0b101101, OPMVV, "vsha2ms.vv">;
+} // Predicates = [HasStdExtZvknha]
+
+let Predicates = [HasStdExtZvkned], RVVConstraint = NoConstraint in {
+ defm VAESDF : VAES_MV_V_S<0b101000, 0b101001, 0b00001, OPMVV, "vaesdf">;
+ defm VAESDM : VAES_MV_V_S<0b101000, 0b101001, 0b00000, OPMVV, "vaesdm">;
+ defm VAESEF : VAES_MV_V_S<0b101000, 0b101001, 0b00011, OPMVV, "vaesef">;
+ defm VAESEM : VAES_MV_V_S<0b101000, 0b101001, 0b00010, OPMVV, "vaesem">;
+ def VAESKF1_VI : VAESKF_MV_I<0b100010, "vaeskf1.vi", uimm5>;
+ def VAESKF2_VI : VAESKF_MV_I<0b101010, "vaeskf2.vi", uimm5>;
+ def VAESZ_VS : PALUVs2NoVm<0b101001, 0b00111, OPMVV, "vaesz.vs">;
+} // Predicates = [HasStdExtZvkned]
+
+let Predicates = [HasStdExtZvksed], RVVConstraint = NoConstraint in {
+ def VSM4K_VI : PALUVINoVm<0b100001, "vsm4k.vi", uimm5>;
+ defm VSM4R : VAES_MV_V_S<0b101000, 0b101001, 0b10000, OPMVV, "vsm4r">;
+} // Predicates = [HasStdExtZvksed]
+
+let Predicates = [HasStdExtZvksh], RVVConstraint = NoConstraint in {
+ def VSM3C_VI : PALUVINoVm<0b101011, "vsm3c.vi", uimm5>;
+ def VSM3ME_VV : PALUVVNoVm<0b100000, OPMVV, "vsm3me.vv">;
+} // Predicates = [HasStdExtZvksh]
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+defm PseudoVANDN : VPseudoVALU_VV_VX;
+
+multiclass VPseudoUnaryV_V {
+ foreach m = MxList in {
+ let VLMul = m.value in {
+ def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>;
+ def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>,
+ RISCVMaskedPseudo<MaskIdx=2>;
+ }
+ }
+}
+
+defm PseudoVBREV : VPseudoUnaryV_V;
+defm PseudoVREV8 : VPseudoUnaryV_V;
+defm PseudoVCLZ : VPseudoUnaryV_V;
+defm PseudoVCTZ : VPseudoUnaryV_V;
+defm PseudoVCPOP : VPseudoUnaryV_V;
+
+defm PseudoVROL : VPseudoVALU_VV_VX;
+defm PseudoVROR : VPseudoVALU_VV_VX_VI<uimm6>;
+
+//===----------------------------------------------------------------------===//
+// SDNode patterns
+//===----------------------------------------------------------------------===//
+
+multiclass VPatUnarySDNode_V<SDPatternOperator op, string instruction_name> {
+ foreach vti = AllIntegerVectors in {
+ let Predicates = !listconcat([HasStdExtZvbb],
+ GetVTypePredicates<vti>.Predicates) in {
+ def : Pat<(vti.Vector (op (vti.Vector vti.RegClass:$rs1))),
+ (!cast<Instruction>(instruction_name#"_V_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1,
+ vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
+ }
+}
+
+// Helpers for detecting splats since we preprocess splat_vector to vmv.v.x
+// This should match the logic in RISCVDAGToDAGISel::selectVSplat
+def riscv_splat_vector : PatFrag<(ops node:$rs1),
+ (riscv_vmv_v_x_vl undef, node:$rs1, srcvalue)>;
+def riscv_vnot : PatFrag<(ops node:$rs1), (xor node:$rs1,
+ (riscv_splat_vector -1))>;
+
+foreach vti = AllIntegerVectors in {
+ let Predicates = !listconcat([HasStdExtZvbb],
+ GetVTypePredicates<vti>.Predicates) in {
+ def : Pat<(vti.Vector (and (riscv_vnot vti.RegClass:$rs1),
+ vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVANDN_VV_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2,
+ vti.RegClass:$rs1,
+ vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vti.Vector (and (riscv_splat_vector
+ (not vti.ScalarRegClass:$rs1)),
+ vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVANDN_VX_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2,
+ vti.ScalarRegClass:$rs1,
+ vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
+}
+
+defm : VPatUnarySDNode_V<bitreverse, "PseudoVBREV">;
+defm : VPatUnarySDNode_V<bswap, "PseudoVREV8">;
+defm : VPatUnarySDNode_V<ctlz, "PseudoVCLZ">;
+defm : VPatUnarySDNode_V<cttz, "PseudoVCTZ">;
+defm : VPatUnarySDNode_V<ctpop, "PseudoVCPOP">;
+
+defm : VPatBinarySDNode_VV_VX<rotl, "PseudoVROL">;
+
+def NegImm64 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(0x3f & (64 - N->getZExtValue()), SDLoc(N),
+ N->getValueType(0));
+}]>;
+
+// Although there is no vrol.vi, an immediate rotate left can be achieved by
+// negating the immediate in vror.vi
+foreach vti = AllIntegerVectors in {
+ let Predicates = !listconcat([HasStdExtZvbb],
+ GetVTypePredicates<vti>.Predicates) in {
+ def : Pat<(vti.Vector (rotl vti.RegClass:$rs2,
+ (vti.Vector (SplatPat_uimm6 uimm6:$rs1)))),
+ (!cast<Instruction>("PseudoVROR_VI_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs2,
+ (NegImm64 uimm6:$rs1),
+ vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
+}
+defm : VPatBinarySDNode_VV_VX_VI<rotr, "PseudoVROR", uimm6>;
+
+//===----------------------------------------------------------------------===//
+// VL patterns
+//===----------------------------------------------------------------------===//
+
+multiclass VPatUnaryVL_V<SDPatternOperator op, string instruction_name> {
+ foreach vti = AllIntegerVectors in {
+ let Predicates = !listconcat([HasStdExtZvbb],
+ GetVTypePredicates<vti>.Predicates) in {
+ def : Pat<(vti.Vector (op (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$merge),
+ (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>(instruction_name#"_V_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge,
+ vti.RegClass:$rs1,
+ (vti.Mask V0),
+ GPR:$vl,
+ vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+ }
+ }
+}
+
+foreach vti = AllIntegerVectors in {
+ let Predicates = !listconcat([HasStdExtZvbb],
+ GetVTypePredicates<vti>.Predicates) in {
+ def : Pat<(vti.Vector (riscv_and_vl (riscv_xor_vl
+ (vti.Vector vti.RegClass:$rs1),
+ (riscv_splat_vector -1),
+ (vti.Vector vti.RegClass:$merge),
+ (vti.Mask V0),
+ VLOpFrag),
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector vti.RegClass:$merge),
+ (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVANDN_VV_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge,
+ vti.RegClass:$rs2,
+ vti.RegClass:$rs1,
+ (vti.Mask V0),
+ GPR:$vl,
+ vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+
+ def : Pat<(vti.Vector (riscv_and_vl (riscv_splat_vector
+ (not vti.ScalarRegClass:$rs1)),
+ (vti.Vector vti.RegClass:$rs2),
+ (vti.Vector vti.RegClass:$merge),
+ (vti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVANDN_VX_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge,
+ vti.RegClass:$rs2,
+ vti.ScalarRegClass:$rs1,
+ (vti.Mask V0),
+ GPR:$vl,
+ vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+ }
+}
+
+defm : VPatUnaryVL_V<riscv_bitreverse_vl, "PseudoVBREV">;
+defm : VPatUnaryVL_V<riscv_bswap_vl, "PseudoVREV8">;
+defm : VPatUnaryVL_V<riscv_ctlz_vl, "PseudoVCLZ">;
+defm : VPatUnaryVL_V<riscv_cttz_vl, "PseudoVCTZ">;
+defm : VPatUnaryVL_V<riscv_ctpop_vl, "PseudoVCPOP">;
diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
deleted file mode 100644
index 281918259cdb..000000000000
--- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
+++ /dev/null
@@ -1,257 +0,0 @@
-//===-- RISCVMCInstLower.cpp - Convert RISCV MachineInstr to an MCInst ------=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains code to lower RISCV MachineInstrs to their corresponding
-// MCInst records.
-//
-//===----------------------------------------------------------------------===//
-
-#include "RISCV.h"
-#include "RISCVSubtarget.h"
-#include "MCTargetDesc/RISCVMCExpr.h"
-#include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym,
- const AsmPrinter &AP) {
- MCContext &Ctx = AP.OutContext;
- RISCVMCExpr::VariantKind Kind;
-
- switch (MO.getTargetFlags()) {
- default:
- llvm_unreachable("Unknown target flag on GV operand");
- case RISCVII::MO_None:
- Kind = RISCVMCExpr::VK_RISCV_None;
- break;
- case RISCVII::MO_CALL:
- Kind = RISCVMCExpr::VK_RISCV_CALL;
- break;
- case RISCVII::MO_PLT:
- Kind = RISCVMCExpr::VK_RISCV_CALL_PLT;
- break;
- case RISCVII::MO_LO:
- Kind = RISCVMCExpr::VK_RISCV_LO;
- break;
- case RISCVII::MO_HI:
- Kind = RISCVMCExpr::VK_RISCV_HI;
- break;
- case RISCVII::MO_PCREL_LO:
- Kind = RISCVMCExpr::VK_RISCV_PCREL_LO;
- break;
- case RISCVII::MO_PCREL_HI:
- Kind = RISCVMCExpr::VK_RISCV_PCREL_HI;
- break;
- case RISCVII::MO_GOT_HI:
- Kind = RISCVMCExpr::VK_RISCV_GOT_HI;
- break;
- case RISCVII::MO_TPREL_LO:
- Kind = RISCVMCExpr::VK_RISCV_TPREL_LO;
- break;
- case RISCVII::MO_TPREL_HI:
- Kind = RISCVMCExpr::VK_RISCV_TPREL_HI;
- break;
- case RISCVII::MO_TPREL_ADD:
- Kind = RISCVMCExpr::VK_RISCV_TPREL_ADD;
- break;
- case RISCVII::MO_TLS_GOT_HI:
- Kind = RISCVMCExpr::VK_RISCV_TLS_GOT_HI;
- break;
- case RISCVII::MO_TLS_GD_HI:
- Kind = RISCVMCExpr::VK_RISCV_TLS_GD_HI;
- break;
- }
-
- const MCExpr *ME =
- MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_None, Ctx);
-
- if (!MO.isJTI() && !MO.isMBB() && MO.getOffset())
- ME = MCBinaryExpr::createAdd(
- ME, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
-
- if (Kind != RISCVMCExpr::VK_RISCV_None)
- ME = RISCVMCExpr::create(ME, Kind, Ctx);
- return MCOperand::createExpr(ME);
-}
-
-bool llvm::lowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
- MCOperand &MCOp,
- const AsmPrinter &AP) {
- switch (MO.getType()) {
- default:
- report_fatal_error("LowerRISCVMachineInstrToMCInst: unknown operand type");
- case MachineOperand::MO_Register:
- // Ignore all implicit register operands.
- if (MO.isImplicit())
- return false;
- MCOp = MCOperand::createReg(MO.getReg());
- break;
- case MachineOperand::MO_RegisterMask:
- // Regmasks are like implicit defs.
- return false;
- case MachineOperand::MO_Immediate:
- MCOp = MCOperand::createImm(MO.getImm());
- break;
- case MachineOperand::MO_MachineBasicBlock:
- MCOp = lowerSymbolOperand(MO, MO.getMBB()->getSymbol(), AP);
- break;
- case MachineOperand::MO_GlobalAddress:
- MCOp = lowerSymbolOperand(MO, AP.getSymbolPreferLocal(*MO.getGlobal()), AP);
- break;
- case MachineOperand::MO_BlockAddress:
- MCOp = lowerSymbolOperand(
- MO, AP.GetBlockAddressSymbol(MO.getBlockAddress()), AP);
- break;
- case MachineOperand::MO_ExternalSymbol:
- MCOp = lowerSymbolOperand(
- MO, AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
- break;
- case MachineOperand::MO_ConstantPoolIndex:
- MCOp = lowerSymbolOperand(MO, AP.GetCPISymbol(MO.getIndex()), AP);
- break;
- case MachineOperand::MO_JumpTableIndex:
- MCOp = lowerSymbolOperand(MO, AP.GetJTISymbol(MO.getIndex()), AP);
- break;
- case MachineOperand::MO_MCSymbol:
- MCOp = lowerSymbolOperand(MO, MO.getMCSymbol(), AP);
- break;
- }
- return true;
-}
-
-static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI,
- MCInst &OutMI) {
- const RISCVVPseudosTable::PseudoInfo *RVV =
- RISCVVPseudosTable::getPseudoInfo(MI->getOpcode());
- if (!RVV)
- return false;
-
- OutMI.setOpcode(RVV->BaseInstr);
-
- const MachineBasicBlock *MBB = MI->getParent();
- assert(MBB && "MI expected to be in a basic block");
- const MachineFunction *MF = MBB->getParent();
- assert(MF && "MBB expected to be in a machine function");
-
- const TargetRegisterInfo *TRI =
- MF->getSubtarget<RISCVSubtarget>().getRegisterInfo();
-
- assert(TRI && "TargetRegisterInfo expected");
-
- uint64_t TSFlags = MI->getDesc().TSFlags;
- unsigned NumOps = MI->getNumExplicitOperands();
-
- // Skip policy, VL and SEW operands which are the last operands if present.
- if (RISCVII::hasVecPolicyOp(TSFlags))
- --NumOps;
- if (RISCVII::hasVLOp(TSFlags))
- --NumOps;
- if (RISCVII::hasSEWOp(TSFlags))
- --NumOps;
-
- bool hasVLOutput = RISCV::isFaultFirstLoad(*MI);
- for (unsigned OpNo = 0; OpNo != NumOps; ++OpNo) {
- const MachineOperand &MO = MI->getOperand(OpNo);
- // Skip vl ouput. It should be the second output.
- if (hasVLOutput && OpNo == 1)
- continue;
-
- // Skip merge op. It should be the first operand after the result.
- if (RISCVII::hasMergeOp(TSFlags) && OpNo == 1U + hasVLOutput) {
- assert(MI->getNumExplicitDefs() == 1U + hasVLOutput);
- continue;
- }
-
- MCOperand MCOp;
- switch (MO.getType()) {
- default:
- llvm_unreachable("Unknown operand type");
- case MachineOperand::MO_Register: {
- Register Reg = MO.getReg();
-
- if (RISCV::VRM2RegClass.contains(Reg) ||
- RISCV::VRM4RegClass.contains(Reg) ||
- RISCV::VRM8RegClass.contains(Reg)) {
- Reg = TRI->getSubReg(Reg, RISCV::sub_vrm1_0);
- assert(Reg && "Subregister does not exist");
- } else if (RISCV::FPR16RegClass.contains(Reg)) {
- Reg = TRI->getMatchingSuperReg(Reg, RISCV::sub_16, &RISCV::FPR32RegClass);
- assert(Reg && "Subregister does not exist");
- } else if (RISCV::FPR64RegClass.contains(Reg)) {
- Reg = TRI->getSubReg(Reg, RISCV::sub_32);
- assert(Reg && "Superregister does not exist");
- }
-
- MCOp = MCOperand::createReg(Reg);
- break;
- }
- case MachineOperand::MO_Immediate:
- MCOp = MCOperand::createImm(MO.getImm());
- break;
- }
- OutMI.addOperand(MCOp);
- }
-
- // Unmasked pseudo instructions need to append dummy mask operand to
- // V instructions. All V instructions are modeled as the masked version.
- if (RISCVII::hasDummyMaskOp(TSFlags))
- OutMI.addOperand(MCOperand::createReg(RISCV::NoRegister));
-
- return true;
-}
-
-bool llvm::lowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
- AsmPrinter &AP) {
- if (lowerRISCVVMachineInstrToMCInst(MI, OutMI))
- return false;
-
- OutMI.setOpcode(MI->getOpcode());
-
- for (const MachineOperand &MO : MI->operands()) {
- MCOperand MCOp;
- if (lowerRISCVMachineOperandToMCOperand(MO, MCOp, AP))
- OutMI.addOperand(MCOp);
- }
-
- switch (OutMI.getOpcode()) {
- case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
- const Function &F = MI->getParent()->getParent()->getFunction();
- if (F.hasFnAttribute("patchable-function-entry")) {
- unsigned Num;
- if (F.getFnAttribute("patchable-function-entry")
- .getValueAsString()
- .getAsInteger(10, Num))
- return false;
- AP.emitNops(Num);
- return true;
- }
- break;
- }
- case RISCV::PseudoReadVLENB:
- OutMI.setOpcode(RISCV::CSRRS);
- OutMI.addOperand(MCOperand::createImm(
- RISCVSysReg::lookupSysRegByName("VLENB")->Encoding));
- OutMI.addOperand(MCOperand::createReg(RISCV::X0));
- break;
- case RISCV::PseudoReadVL:
- OutMI.setOpcode(RISCV::CSRRS);
- OutMI.addOperand(
- MCOperand::createImm(RISCVSysReg::lookupSysRegByName("VL")->Encoding));
- OutMI.addOperand(MCOperand::createReg(RISCV::X0));
- break;
- }
- return false;
-}
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
index d79c4d4a0290..d0c363042f51 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp
@@ -1,4 +1,4 @@
-//=- RISCVMachineFunctionInfo.cpp - RISCV machine function info ---*- C++ -*-=//
+//=- RISCVMachineFunctionInfo.cpp - RISC-V machine function info --*- C++ -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 2744072568ee..5dfd47a687e9 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -1,4 +1,4 @@
-//=- RISCVMachineFunctionInfo.h - RISCV machine function info -----*- C++ -*-=//
+//=- RISCVMachineFunctionInfo.h - RISC-V machine function info ----*- C++ -*-=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -71,6 +71,11 @@ private:
/// Registers that have been sign extended from i32.
SmallVector<Register, 8> SExt32Registers;
+ /// Size of stack frame for Zcmp PUSH/POP
+ unsigned RVPushStackSize = 0;
+ unsigned RVPushRegs = 0;
+ int RVPushRlist = llvm::RISCVZC::RLISTENCODE::INVALID_RLIST;
+
public:
RISCVMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI) {}
@@ -122,6 +127,21 @@ public:
unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; }
void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; }
+ uint64_t isPushable(const MachineFunction &MF) const {
+ return (!useSaveRestoreLibCalls(MF) &&
+ MF.getSubtarget<RISCVSubtarget>().hasStdExtZcmp() &&
+ !MF.getTarget().Options.DisableFramePointerElim(MF));
+ }
+
+ int getRVPushRlist() const { return RVPushRlist; }
+ void setRVPushRlist(int Rlist) { RVPushRlist = Rlist; }
+
+ unsigned getRVPushRegs() const { return RVPushRegs; }
+ void setRVPushRegs(unsigned Regs) { RVPushRegs = Regs; }
+
+ unsigned getRVPushStackSize() const { return RVPushStackSize; }
+ void setRVPushStackSize(unsigned Size) { RVPushStackSize = Size; }
+
void initializeBaseYamlFields(const yaml::RISCVMachineFunctionInfo &YamlMFI);
void addSExt32Register(Register Reg);
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
index 232f202f1815..da104657680a 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.cpp
@@ -1,4 +1,4 @@
-//===- RISCVMacroFusion.cpp - RISCV Macro Fusion --------------------------===//
+//===- RISCVMacroFusion.cpp - RISC-V Macro Fusion -------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-/// \file This file contains the RISCV implementation of the DAG scheduling
+/// \file This file contains the RISC-V implementation of the DAG scheduling
/// mutation to pair instructions back to back.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVMacroFusion.h b/llvm/lib/Target/RISCV/RISCVMacroFusion.h
index c238dacc37f6..7598db3f8fe1 100644
--- a/llvm/lib/Target/RISCV/RISCVMacroFusion.h
+++ b/llvm/lib/Target/RISCV/RISCVMacroFusion.h
@@ -1,4 +1,4 @@
-//===- RISCVMacroFusion.h - RISCV Macro Fusion ----------------------------===//
+//===- RISCVMacroFusion.h - RISC-V Macro Fusion -----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-/// \file This file contains the RISCV definition of the DAG scheduling mutation
-/// to pair instructions back to back.
+/// \file This file contains the RISC-V definition of the DAG scheduling
+/// mutation to pair instructions back to back.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
index 39d0a201c666..841439bb732e 100644
--- a/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMakeCompressible.cpp
@@ -75,7 +75,7 @@
using namespace llvm;
#define DEBUG_TYPE "riscv-make-compressible"
-#define RISCV_COMPRESS_INSTRS_NAME "RISCV Make Compressible"
+#define RISCV_COMPRESS_INSTRS_NAME "RISC-V Make Compressible"
namespace {
@@ -227,9 +227,6 @@ static Register analyzeCompressibleUses(MachineInstr &FirstMI,
const TargetRegisterInfo *TRI =
MBB.getParent()->getSubtarget().getRegisterInfo();
- RegScavenger RS;
- RS.enterBasicBlock(MBB);
-
for (MachineBasicBlock::instr_iterator I = FirstMI.getIterator(),
E = MBB.instr_end();
I != E; ++I) {
@@ -238,14 +235,8 @@ static Register analyzeCompressibleUses(MachineInstr &FirstMI,
// Determine if this is an instruction which would benefit from using the
// new register.
RegImmPair CandidateRegImm = getRegImmPairPreventingCompression(MI);
- if (CandidateRegImm.Reg == RegImm.Reg &&
- CandidateRegImm.Imm == RegImm.Imm) {
- // Advance tracking since the value in the new register must be live for
- // this instruction too.
- RS.forward(I);
-
+ if (CandidateRegImm.Reg == RegImm.Reg && CandidateRegImm.Imm == RegImm.Imm)
MIs.push_back(&MI);
- }
// If RegImm.Reg is modified by this instruction, then we cannot optimize
// past this instruction. If the register is already compressed, then it may
@@ -278,6 +269,9 @@ static Register analyzeCompressibleUses(MachineInstr &FirstMI,
else
return RISCV::NoRegister;
+ RegScavenger RS;
+ RS.enterBasicBlockEnd(MBB);
+ RS.backward(MIs.back()->getIterator());
return RS.scavengeRegisterBackwards(*RCToScavenge, FirstMI.getIterator(),
/*RestoreAfter=*/false, /*SPAdj=*/0,
/*AllowSpill=*/false);
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index bd1074da70b5..855322b981fb 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -23,12 +23,12 @@
using namespace llvm;
#define DEBUG_TYPE "riscv-merge-base-offset"
-#define RISCV_MERGE_BASE_OFFSET_NAME "RISCV Merge Base Offset"
+#define RISCV_MERGE_BASE_OFFSET_NAME "RISC-V Merge Base Offset"
namespace {
-struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
-private:
+class RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
const RISCVSubtarget *ST = nullptr;
+ MachineRegisterInfo *MRI;
public:
static char ID;
@@ -60,9 +60,6 @@ public:
StringRef getPassName() const override {
return RISCV_MERGE_BASE_OFFSET_NAME;
}
-
-private:
- MachineRegisterInfo *MRI;
};
} // end anonymous namespace
@@ -143,6 +140,8 @@ void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
if (Hi.getOpcode() != RISCV::AUIPC)
Lo.getOperand(2).setOffset(Offset);
// Delete the tail instruction.
+ MRI->constrainRegClass(Lo.getOperand(0).getReg(),
+ MRI->getRegClass(Tail.getOperand(0).getReg()));
MRI->replaceRegWith(Tail.getOperand(0).getReg(), Lo.getOperand(0).getReg());
Tail.eraseFromParent();
LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
diff --git a/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp b/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp
new file mode 100644
index 000000000000..6c1b0cf5ca7f
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp
@@ -0,0 +1,238 @@
+//===-- RISCVMoveMerger.cpp - RISCV move merge pass -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs move related peephole optimizations
+// as Zcmp has specified. This pass should be run after register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVInstrInfo.h"
+#include "RISCVMachineFunctionInfo.h"
+
+using namespace llvm;
+
+#define RISCV_MOVE_MERGE_NAME "RISC-V Zcmp move merging pass"
+
+namespace {
+struct RISCVMoveMerge : public MachineFunctionPass {
+ static char ID;
+
+ RISCVMoveMerge() : MachineFunctionPass(ID) {
+ initializeRISCVMoveMergePass(*PassRegistry::getPassRegistry());
+ }
+
+ const RISCVInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
+ // Track which register units have been modified and used.
+ LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+
+ bool isCandidateToMergeMVA01S(const DestSourcePair &RegPair);
+ bool isCandidateToMergeMVSA01(const DestSourcePair &RegPair);
+ // Merge the two instructions indicated into a single pair instruction.
+ MachineBasicBlock::iterator
+ mergePairedInsns(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired, unsigned Opcode);
+
+ // Look for C.MV instruction that can be combined with
+ // the given instruction into CM.MVA01S or CM.MVSA01. Return the matching
+ // instruction if one exists.
+ MachineBasicBlock::iterator
+ findMatchingInst(MachineBasicBlock::iterator &MBBI, unsigned InstOpcode,
+ const DestSourcePair &RegPair);
+ bool mergeMoveSARegPair(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override { return RISCV_MOVE_MERGE_NAME; }
+};
+
+char RISCVMoveMerge::ID = 0;
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(RISCVMoveMerge, "riscv-move-merge", RISCV_MOVE_MERGE_NAME,
+ false, false)
+
+// Check if registers meet CM.MVA01S constraints.
+bool RISCVMoveMerge::isCandidateToMergeMVA01S(const DestSourcePair &RegPair) {
+ Register Destination = RegPair.Destination->getReg();
+ Register Source = RegPair.Source->getReg();
+ // If destination is not a0 or a1.
+ if ((Destination == RISCV::X10 || Destination == RISCV::X11) &&
+ RISCV::SR07RegClass.contains(Source))
+ return true;
+ return false;
+}
+
+// Check if registers meet CM.MVSA01 constraints.
+bool RISCVMoveMerge::isCandidateToMergeMVSA01(const DestSourcePair &RegPair) {
+ Register Destination = RegPair.Destination->getReg();
+ Register Source = RegPair.Source->getReg();
+ // If Source is s0 - s7.
+ if ((Source == RISCV::X10 || Source == RISCV::X11) &&
+ RISCV::SR07RegClass.contains(Destination))
+ return true;
+ return false;
+}
+
+MachineBasicBlock::iterator
+RISCVMoveMerge::mergePairedInsns(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator Paired,
+ unsigned Opcode) {
+ const MachineOperand *Sreg1, *Sreg2;
+ MachineBasicBlock::iterator E = I->getParent()->end();
+ MachineBasicBlock::iterator NextI = next_nodbg(I, E);
+ DestSourcePair FirstPair = TII->isCopyInstrImpl(*I).value();
+ DestSourcePair PairedRegs = TII->isCopyInstrImpl(*Paired).value();
+ Register ARegInFirstPair = Opcode == RISCV::CM_MVA01S
+ ? FirstPair.Destination->getReg()
+ : FirstPair.Source->getReg();
+
+ if (NextI == Paired)
+ NextI = next_nodbg(NextI, E);
+ DebugLoc DL = I->getDebugLoc();
+
+ // The order of S-reg depends on which instruction holds A0, instead of
+ // the order of register pair.
+ // e,g.
+ // mv a1, s1
+ // mv a0, s2 => cm.mva01s s2,s1
+ //
+ // mv a0, s2
+ // mv a1, s1 => cm.mva01s s2,s1
+ bool StartWithX10 = ARegInFirstPair == RISCV::X10;
+ if (Opcode == RISCV::CM_MVA01S) {
+ Sreg1 = StartWithX10 ? FirstPair.Source : PairedRegs.Source;
+ Sreg2 = StartWithX10 ? PairedRegs.Source : FirstPair.Source;
+ } else {
+ Sreg1 = StartWithX10 ? FirstPair.Destination : PairedRegs.Destination;
+ Sreg2 = StartWithX10 ? PairedRegs.Destination : FirstPair.Destination;
+ }
+
+ BuildMI(*I->getParent(), I, DL, TII->get(Opcode)).add(*Sreg1).add(*Sreg2);
+
+ I->eraseFromParent();
+ Paired->eraseFromParent();
+ return NextI;
+}
+
+MachineBasicBlock::iterator
+RISCVMoveMerge::findMatchingInst(MachineBasicBlock::iterator &MBBI,
+ unsigned InstOpcode,
+ const DestSourcePair &RegPair) {
+ MachineBasicBlock::iterator E = MBBI->getParent()->end();
+
+ // Track which register units have been modified and used between the first
+ // insn and the second insn.
+ ModifiedRegUnits.clear();
+ UsedRegUnits.clear();
+
+ for (MachineBasicBlock::iterator I = next_nodbg(MBBI, E); I != E;
+ I = next_nodbg(I, E)) {
+
+ MachineInstr &MI = *I;
+
+ if (auto SecondPair = TII->isCopyInstrImpl(MI)) {
+ Register SourceReg = SecondPair->Source->getReg();
+ Register DestReg = SecondPair->Destination->getReg();
+
+ if (InstOpcode == RISCV::CM_MVA01S &&
+ isCandidateToMergeMVA01S(*SecondPair)) {
+ // If register pair is valid and destination registers are different.
+ if ((RegPair.Destination->getReg() == DestReg))
+ return E;
+
+ // If paired destination register was modified or used, the source reg
+ // was modified, there is no possibility of finding matching
+ // instruction so exit early.
+ if (!ModifiedRegUnits.available(DestReg) ||
+ !UsedRegUnits.available(DestReg) ||
+ !ModifiedRegUnits.available(SourceReg))
+ return E;
+
+ return I;
+ } else if (InstOpcode == RISCV::CM_MVSA01 &&
+ isCandidateToMergeMVSA01(*SecondPair)) {
+ if ((RegPair.Source->getReg() == SourceReg) ||
+ (RegPair.Destination->getReg() == DestReg))
+ return E;
+
+ if (!ModifiedRegUnits.available(DestReg) ||
+ !UsedRegUnits.available(DestReg) ||
+ !ModifiedRegUnits.available(SourceReg))
+ return E;
+
+ return I;
+ }
+ }
+ // Update modified / used register units.
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
+ }
+ return E;
+}
+
+// Finds instructions, which could be represented as C.MV instructions and
+// merged into CM.MVA01S or CM.MVSA01.
+bool RISCVMoveMerge::mergeMoveSARegPair(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ MBBI != E;) {
+ // Check if the instruction can be compressed to C.MV instruction. If it
+ // can, return Dest/Src register pair.
+ auto RegPair = TII->isCopyInstrImpl(*MBBI);
+ if (RegPair.has_value()) {
+ unsigned Opcode = 0;
+
+ if (isCandidateToMergeMVA01S(*RegPair))
+ Opcode = RISCV::CM_MVA01S;
+ else if (isCandidateToMergeMVSA01(*RegPair))
+ Opcode = RISCV::CM_MVSA01;
+ else {
+ ++MBBI;
+ continue;
+ }
+
+ MachineBasicBlock::iterator Paired =
+ findMatchingInst(MBBI, Opcode, RegPair.value());
+ // If matching instruction can be found merge them.
+ if (Paired != E) {
+ MBBI = mergePairedInsns(MBBI, Paired, Opcode);
+ Modified = true;
+ continue;
+ }
+ }
+ ++MBBI;
+ }
+ return Modified;
+}
+
+bool RISCVMoveMerge::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(Fn.getFunction()))
+ return false;
+
+ const RISCVSubtarget *Subtarget = &Fn.getSubtarget<RISCVSubtarget>();
+ if (!Subtarget->hasStdExtZcmp())
+ return false;
+
+ TII = Subtarget->getInstrInfo();
+ TRI = Subtarget->getRegisterInfo();
+ // Resize the modified and used register unit trackers. We do this once
+ // per function and then clear the register units each time we optimize a
+ // move.
+ ModifiedRegUnits.init(*TRI);
+ UsedRegUnits.init(*TRI);
+ bool Modified = false;
+ for (auto &MBB : Fn)
+ Modified |= mergeMoveSARegPair(MBB);
+ return Modified;
+}
+
+/// createRISCVMoveMergePass - returns an instance of the
+/// move merge pass.
+FunctionPass *llvm::createRISCVMoveMergePass() { return new RISCVMoveMerge(); }
diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 2ee228d72825..7014755b6706 100644
--- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -1,4 +1,4 @@
-//===-------------- RISCVSExtWRemoval.cpp - MI sext.w Removal -------------===//
+//===- RISCVOptWInstrs.cpp - MI W instruction optimizations ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,9 +6,16 @@
//
//===---------------------------------------------------------------------===//
//
-// This pass removes unneeded sext.w instructions at the MI level. Either
-// because the sign extended bits aren't consumed or because the input was
-// already sign extended by an earlier instruction.
+// This pass does some optimizations for *W instructions at the MI level.
+//
+// First it removes unneeded sext.w instructions. Either because the sign
+// extended bits aren't consumed or because the input was already sign extended
+// by an earlier instruction.
+//
+// Then it removes the -w suffix from each addiw and slliw instructions
+// whenever all users are dependent only on the lower word of the result of the
+// instruction. We do this only for addiw, slliw, and mulw because the -w forms
+// are less compressible.
//
//===---------------------------------------------------------------------===//
@@ -21,7 +28,8 @@
using namespace llvm;
-#define DEBUG_TYPE "riscv-sextw-removal"
+#define DEBUG_TYPE "riscv-opt-w-instrs"
+#define RISCV_OPT_W_INSTRS_NAME "RISC-V Optimize W Instructions"
STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
STATISTIC(NumTransformedToWInstrs,
@@ -30,34 +38,267 @@ STATISTIC(NumTransformedToWInstrs,
static cl::opt<bool> DisableSExtWRemoval("riscv-disable-sextw-removal",
cl::desc("Disable removal of sext.w"),
cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableStripWSuffix("riscv-disable-strip-w-suffix",
+ cl::desc("Disable strip W suffix"),
+ cl::init(false), cl::Hidden);
+
namespace {
-class RISCVSExtWRemoval : public MachineFunctionPass {
+class RISCVOptWInstrs : public MachineFunctionPass {
public:
static char ID;
- RISCVSExtWRemoval() : MachineFunctionPass(ID) {
- initializeRISCVSExtWRemovalPass(*PassRegistry::getPassRegistry());
+ RISCVOptWInstrs() : MachineFunctionPass(ID) {
+ initializeRISCVOptWInstrsPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
+ bool removeSExtWInstrs(MachineFunction &MF, const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
+ bool stripWSuffixes(MachineFunction &MF, const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST, MachineRegisterInfo &MRI);
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
- StringRef getPassName() const override { return "RISCV sext.w Removal"; }
+ StringRef getPassName() const override { return RISCV_OPT_W_INSTRS_NAME; }
};
} // end anonymous namespace
-char RISCVSExtWRemoval::ID = 0;
-INITIALIZE_PASS(RISCVSExtWRemoval, DEBUG_TYPE, "RISCV sext.w Removal", false,
+char RISCVOptWInstrs::ID = 0;
+INITIALIZE_PASS(RISCVOptWInstrs, DEBUG_TYPE, RISCV_OPT_W_INSTRS_NAME, false,
false)
-FunctionPass *llvm::createRISCVSExtWRemovalPass() {
- return new RISCVSExtWRemoval();
+FunctionPass *llvm::createRISCVOptWInstrsPass() {
+ return new RISCVOptWInstrs();
+}
+
+// Checks if all users only demand the lower \p OrigBits of the original
+// instruction's result.
+// TODO: handle multiple interdependent transformations
+static bool hasAllNBitUsers(const MachineInstr &OrigMI,
+ const RISCVSubtarget &ST,
+ const MachineRegisterInfo &MRI, unsigned OrigBits) {
+
+ SmallSet<std::pair<const MachineInstr *, unsigned>, 4> Visited;
+ SmallVector<std::pair<const MachineInstr *, unsigned>, 4> Worklist;
+
+ Worklist.push_back(std::make_pair(&OrigMI, OrigBits));
+
+ while (!Worklist.empty()) {
+ auto P = Worklist.pop_back_val();
+ const MachineInstr *MI = P.first;
+ unsigned Bits = P.second;
+
+ if (!Visited.insert(P).second)
+ continue;
+
+ // Only handle instructions with one def.
+ if (MI->getNumExplicitDefs() != 1)
+ return false;
+
+ for (auto &UserOp : MRI.use_operands(MI->getOperand(0).getReg())) {
+ const MachineInstr *UserMI = UserOp.getParent();
+ unsigned OpIdx = UserOp.getOperandNo();
+
+ switch (UserMI->getOpcode()) {
+ default:
+ return false;
+
+ case RISCV::ADDIW:
+ case RISCV::ADDW:
+ case RISCV::DIVUW:
+ case RISCV::DIVW:
+ case RISCV::MULW:
+ case RISCV::REMUW:
+ case RISCV::REMW:
+ case RISCV::SLLIW:
+ case RISCV::SLLW:
+ case RISCV::SRAIW:
+ case RISCV::SRAW:
+ case RISCV::SRLIW:
+ case RISCV::SRLW:
+ case RISCV::SUBW:
+ case RISCV::ROLW:
+ case RISCV::RORW:
+ case RISCV::RORIW:
+ case RISCV::CLZW:
+ case RISCV::CTZW:
+ case RISCV::CPOPW:
+ case RISCV::SLLI_UW:
+ case RISCV::FMV_W_X:
+ case RISCV::FCVT_H_W:
+ case RISCV::FCVT_H_WU:
+ case RISCV::FCVT_S_W:
+ case RISCV::FCVT_S_WU:
+ case RISCV::FCVT_D_W:
+ case RISCV::FCVT_D_WU:
+ if (Bits >= 32)
+ break;
+ return false;
+ case RISCV::SEXT_B:
+ case RISCV::PACKH:
+ if (Bits >= 8)
+ break;
+ return false;
+ case RISCV::SEXT_H:
+ case RISCV::FMV_H_X:
+ case RISCV::ZEXT_H_RV32:
+ case RISCV::ZEXT_H_RV64:
+ case RISCV::PACKW:
+ if (Bits >= 16)
+ break;
+ return false;
+
+ case RISCV::PACK:
+ if (Bits >= (ST.getXLen() / 2))
+ break;
+ return false;
+
+ case RISCV::SRLI: {
+ // If we are shifting right by less than Bits, and users don't demand
+ // any bits that were shifted into [Bits-1:0], then we can consider this
+ // as an N-Bit user.
+ unsigned ShAmt = UserMI->getOperand(2).getImm();
+ if (Bits > ShAmt) {
+ Worklist.push_back(std::make_pair(UserMI, Bits - ShAmt));
+ break;
+ }
+ return false;
+ }
+
+ // these overwrite higher input bits, otherwise the lower word of output
+ // depends only on the lower word of input. So check their uses read W.
+ case RISCV::SLLI:
+ if (Bits >= (ST.getXLen() - UserMI->getOperand(2).getImm()))
+ break;
+ Worklist.push_back(std::make_pair(UserMI, Bits));
+ break;
+ case RISCV::ANDI: {
+ uint64_t Imm = UserMI->getOperand(2).getImm();
+ if (Bits >= (unsigned)llvm::bit_width(Imm))
+ break;
+ Worklist.push_back(std::make_pair(UserMI, Bits));
+ break;
+ }
+ case RISCV::ORI: {
+ uint64_t Imm = UserMI->getOperand(2).getImm();
+ if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
+ break;
+ Worklist.push_back(std::make_pair(UserMI, Bits));
+ break;
+ }
+
+ case RISCV::SLL:
+ case RISCV::BSET:
+ case RISCV::BCLR:
+ case RISCV::BINV:
+ // Operand 2 is the shift amount which uses log2(xlen) bits.
+ if (OpIdx == 2) {
+ if (Bits >= Log2_32(ST.getXLen()))
+ break;
+ return false;
+ }
+ Worklist.push_back(std::make_pair(UserMI, Bits));
+ break;
+
+ case RISCV::SRA:
+ case RISCV::SRL:
+ case RISCV::ROL:
+ case RISCV::ROR:
+ // Operand 2 is the shift amount which uses 6 bits.
+ if (OpIdx == 2 && Bits >= Log2_32(ST.getXLen()))
+ break;
+ return false;
+
+ case RISCV::ADD_UW:
+ case RISCV::SH1ADD_UW:
+ case RISCV::SH2ADD_UW:
+ case RISCV::SH3ADD_UW:
+ // Operand 1 is implicitly zero extended.
+ if (OpIdx == 1 && Bits >= 32)
+ break;
+ Worklist.push_back(std::make_pair(UserMI, Bits));
+ break;
+
+ case RISCV::BEXTI:
+ if (UserMI->getOperand(2).getImm() >= Bits)
+ return false;
+ break;
+
+ case RISCV::SB:
+ // The first argument is the value to store.
+ if (OpIdx == 0 && Bits >= 8)
+ break;
+ return false;
+ case RISCV::SH:
+ // The first argument is the value to store.
+ if (OpIdx == 0 && Bits >= 16)
+ break;
+ return false;
+ case RISCV::SW:
+ // The first argument is the value to store.
+ if (OpIdx == 0 && Bits >= 32)
+ break;
+ return false;
+
+ // For these, lower word of output in these operations, depends only on
+ // the lower word of input. So, we check all uses only read lower word.
+ case RISCV::COPY:
+ case RISCV::PHI:
+
+ case RISCV::ADD:
+ case RISCV::ADDI:
+ case RISCV::AND:
+ case RISCV::MUL:
+ case RISCV::OR:
+ case RISCV::SUB:
+ case RISCV::XOR:
+ case RISCV::XORI:
+
+ case RISCV::ANDN:
+ case RISCV::BREV8:
+ case RISCV::CLMUL:
+ case RISCV::ORC_B:
+ case RISCV::ORN:
+ case RISCV::SH1ADD:
+ case RISCV::SH2ADD:
+ case RISCV::SH3ADD:
+ case RISCV::XNOR:
+ case RISCV::BSETI:
+ case RISCV::BCLRI:
+ case RISCV::BINVI:
+ Worklist.push_back(std::make_pair(UserMI, Bits));
+ break;
+
+ case RISCV::PseudoCCMOVGPR:
+ // Either operand 4 or operand 5 is returned by this instruction. If
+ // only the lower word of the result is used, then only the lower word
+ // of operand 4 and 5 is used.
+ if (OpIdx != 4 && OpIdx != 5)
+ return false;
+ Worklist.push_back(std::make_pair(UserMI, Bits));
+ break;
+
+ case RISCV::VT_MASKC:
+ case RISCV::VT_MASKCN:
+ if (OpIdx != 1)
+ return false;
+ Worklist.push_back(std::make_pair(UserMI, Bits));
+ break;
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool hasAllWUsers(const MachineInstr &OrigMI, const RISCVSubtarget &ST,
+ const MachineRegisterInfo &MRI) {
+ return hasAllNBitUsers(OrigMI, ST, MRI, 32);
}
// This function returns true if the machine instruction always outputs a value
@@ -94,8 +335,8 @@ static bool isSignExtendingOpW(const MachineInstr &MI,
return false;
}
-static bool isSignExtendedW(Register SrcReg, const MachineRegisterInfo &MRI,
- const RISCVInstrInfo &TII,
+static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST,
+ const MachineRegisterInfo &MRI,
SmallPtrSetImpl<MachineInstr *> &FixableDef) {
SmallPtrSet<const MachineInstr *, 4> Visited;
@@ -175,8 +416,9 @@ static bool isSignExtendedW(Register SrcReg, const MachineRegisterInfo &MRI,
const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
unsigned BitWidth = IntTy->getBitWidth();
- return (BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) ||
- (BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt));
+ if ((BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) ||
+ (BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt)))
+ continue;
}
if (!AddRegDefToWorkList(CopySrcReg))
@@ -283,7 +525,7 @@ static bool isSignExtendedW(Register SrcReg, const MachineRegisterInfo &MRI,
case RISCV::LWU:
case RISCV::MUL:
case RISCV::SUB:
- if (TII.hasAllWUsers(*MI, MRI)) {
+ if (hasAllWUsers(*MI, ST, MRI)) {
FixableDef.insert(MI);
break;
}
@@ -316,19 +558,14 @@ static unsigned getWOp(unsigned Opcode) {
}
}
-bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()) || DisableSExtWRemoval)
- return false;
-
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
- const RISCVInstrInfo &TII = *ST.getInstrInfo();
-
- if (!ST.is64Bit())
+bool RISCVOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
+ const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST,
+ MachineRegisterInfo &MRI) {
+ if (DisableSExtWRemoval)
return false;
bool MadeChange = false;
-
for (MachineBasicBlock &MBB : MF) {
for (auto I = MBB.begin(), IE = MBB.end(); I != IE;) {
MachineInstr *MI = &*I++;
@@ -344,8 +581,8 @@ bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) {
// If all users only use the lower bits, this sext.w is redundant.
// Or if all definitions reaching MI sign-extend their output,
// then sext.w is redundant.
- if (!TII.hasAllWUsers(*MI, MRI) &&
- !isSignExtendedW(SrcReg, MRI, TII, FixableDefs))
+ if (!hasAllWUsers(*MI, ST, MRI) &&
+ !isSignExtendedW(SrcReg, ST, MRI, FixableDefs))
continue;
Register DstReg = MI->getOperand(0).getReg();
@@ -374,3 +611,52 @@ bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) {
return MadeChange;
}
+
+bool RISCVOptWInstrs::stripWSuffixes(MachineFunction &MF,
+ const RISCVInstrInfo &TII,
+ const RISCVSubtarget &ST,
+ MachineRegisterInfo &MRI) {
+ if (DisableStripWSuffix)
+ return false;
+
+ bool MadeChange = false;
+ for (MachineBasicBlock &MBB : MF) {
+ for (auto I = MBB.begin(), IE = MBB.end(); I != IE; ++I) {
+ MachineInstr &MI = *I;
+
+ unsigned Opc;
+ switch (MI.getOpcode()) {
+ default:
+ continue;
+ case RISCV::ADDW: Opc = RISCV::ADD; break;
+ case RISCV::MULW: Opc = RISCV::MUL; break;
+ case RISCV::SLLIW: Opc = RISCV::SLLI; break;
+ }
+
+ if (hasAllWUsers(MI, ST, MRI)) {
+ MI.setDesc(TII.get(Opc));
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+bool RISCVOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
+ const RISCVInstrInfo &TII = *ST.getInstrInfo();
+
+ if (!ST.is64Bit())
+ return false;
+
+ bool MadeChange = false;
+ MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
+ MadeChange |= stripWSuffixes(MF, TII, ST, MRI);
+
+ return MadeChange;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index da48f10ef477..01291001cd7c 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -1,4 +1,4 @@
-//===-- RISCVProcessors.td - RISCV Processors --------------*- tablegen -*-===//
+//===-- RISCVProcessors.td - RISC-V Processors -------------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -37,10 +37,14 @@ def : ProcessorModel<"generic", NoSchedModel, []>;
def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32",
RocketModel,
- [Feature32Bit]>;
+ [Feature32Bit,
+ FeatureStdExtZifencei,
+ FeatureStdExtZicsr]>;
def ROCKET_RV64 : RISCVProcessorModel<"rocket-rv64",
RocketModel,
- [Feature64Bit]>;
+ [Feature64Bit,
+ FeatureStdExtZifencei,
+ FeatureStdExtZicsr]>;
def ROCKET : RISCVTuneProcessorModel<"rocket",
RocketModel>;
@@ -51,12 +55,16 @@ def SIFIVE_7 : RISCVTuneProcessorModel<"sifive-7-series",
def SIFIVE_E20 : RISCVProcessorModel<"sifive-e20",
RocketModel,
[Feature32Bit,
+ FeatureStdExtZicsr,
+ FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtC]>;
def SIFIVE_E21 : RISCVProcessorModel<"sifive-e21",
RocketModel,
[Feature32Bit,
+ FeatureStdExtZicsr,
+ FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtC]>;
@@ -64,6 +72,7 @@ def SIFIVE_E21 : RISCVProcessorModel<"sifive-e21",
def SIFIVE_E24 : RISCVProcessorModel<"sifive-e24",
RocketModel,
[Feature32Bit,
+ FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
@@ -72,6 +81,8 @@ def SIFIVE_E24 : RISCVProcessorModel<"sifive-e24",
def SIFIVE_E31 : RISCVProcessorModel<"sifive-e31",
RocketModel,
[Feature32Bit,
+ FeatureStdExtZifencei,
+ FeatureStdExtZicsr,
FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtC]>;
@@ -79,6 +90,7 @@ def SIFIVE_E31 : RISCVProcessorModel<"sifive-e31",
def SIFIVE_E34 : RISCVProcessorModel<"sifive-e34",
RocketModel,
[Feature32Bit,
+ FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
@@ -87,6 +99,7 @@ def SIFIVE_E34 : RISCVProcessorModel<"sifive-e34",
def SIFIVE_E76 : RISCVProcessorModel<"sifive-e76",
SiFive7Model,
[Feature32Bit,
+ FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
@@ -96,6 +109,8 @@ def SIFIVE_E76 : RISCVProcessorModel<"sifive-e76",
def SIFIVE_S21 : RISCVProcessorModel<"sifive-s21",
RocketModel,
[Feature64Bit,
+ FeatureStdExtZicsr,
+ FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtC]>;
@@ -103,6 +118,8 @@ def SIFIVE_S21 : RISCVProcessorModel<"sifive-s21",
def SIFIVE_S51 : RISCVProcessorModel<"sifive-s51",
RocketModel,
[Feature64Bit,
+ FeatureStdExtZicsr,
+ FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtC]>;
@@ -110,6 +127,7 @@ def SIFIVE_S51 : RISCVProcessorModel<"sifive-s51",
def SIFIVE_S54 : RISCVProcessorModel<"sifive-s54",
RocketModel,
[Feature64Bit,
+ FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
@@ -119,16 +137,20 @@ def SIFIVE_S54 : RISCVProcessorModel<"sifive-s54",
def SIFIVE_S76 : RISCVProcessorModel<"sifive-s76",
SiFive7Model,
[Feature64Bit,
+ FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
FeatureStdExtD,
- FeatureStdExtC],
+ FeatureStdExtC,
+ FeatureStdExtZihintpause,
+ FeatureVendorXSfcie],
[TuneSiFive7]>;
def SIFIVE_U54 : RISCVProcessorModel<"sifive-u54",
RocketModel,
[Feature64Bit,
+ FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
@@ -138,6 +160,7 @@ def SIFIVE_U54 : RISCVProcessorModel<"sifive-u54",
def SIFIVE_U74 : RISCVProcessorModel<"sifive-u74",
SiFive7Model,
[Feature64Bit,
+ FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtA,
FeatureStdExtF,
@@ -145,15 +168,36 @@ def SIFIVE_U74 : RISCVProcessorModel<"sifive-u74",
FeatureStdExtC],
[TuneSiFive7]>;
+def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model,
+ [Feature64Bit,
+ FeatureStdExtZifencei,
+ FeatureStdExtM,
+ FeatureStdExtA,
+ FeatureStdExtF,
+ FeatureStdExtD,
+ FeatureStdExtC,
+ FeatureStdExtV,
+ FeatureStdExtZvl512b,
+ FeatureStdExtZfh,
+ FeatureStdExtZvfh,
+ FeatureStdExtZba,
+ FeatureStdExtZbb],
+ [TuneSiFive7,
+ TuneDLenFactor2]>;
+
def SYNTACORE_SCR1_BASE : RISCVProcessorModel<"syntacore-scr1-base",
SyntacoreSCR1Model,
[Feature32Bit,
+ FeatureStdExtZicsr,
+ FeatureStdExtZifencei,
FeatureStdExtC],
[TuneNoDefaultUnroll]>;
def SYNTACORE_SCR1_MAX : RISCVProcessorModel<"syntacore-scr1-max",
SyntacoreSCR1Model,
[Feature32Bit,
+ FeatureStdExtZicsr,
+ FeatureStdExtZifencei,
FeatureStdExtM,
FeatureStdExtC],
[TuneNoDefaultUnroll]>;
diff --git a/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp
new file mode 100644
index 000000000000..a93e750eadc6
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVPushPopOptimizer.cpp
@@ -0,0 +1,145 @@
+//===------- RISCVPushPopOptimizer.cpp - RISCV Push/Pop opt. pass ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that modifies PUSH/POP instructions from Zca
+// standard to use their non prolog/epilog related functionalities
+// and generates POPRET instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVInstrInfo.h"
+#include "RISCVMachineFunctionInfo.h"
+
+using namespace llvm;
+
+#define RISCV_PUSH_POP_OPT_NAME "RISC-V Zcmp Push/Pop optimization pass"
+
+namespace {
+struct RISCVPushPopOpt : public MachineFunctionPass {
+ static char ID;
+
+ RISCVPushPopOpt() : MachineFunctionPass(ID) {
+ initializeRISCVPushPopOptPass(*PassRegistry::getPassRegistry());
+ }
+
+ const RISCVInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+
+ // Track which register units have been modified and used.
+ LiveRegUnits ModifiedRegUnits, UsedRegUnits;
+
+ bool usePopRet(MachineBasicBlock::iterator &MBBI,
+ MachineBasicBlock::iterator &NextI, bool IsReturnZero);
+ bool adjustRetVal(MachineBasicBlock::iterator &MBBI);
+ bool runOnMachineFunction(MachineFunction &Fn) override;
+
+ StringRef getPassName() const override { return RISCV_PUSH_POP_OPT_NAME; }
+};
+
+char RISCVPushPopOpt::ID = 0;
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(RISCVPushPopOpt, "riscv-push-pop-opt", RISCV_PUSH_POP_OPT_NAME,
+ false, false)
+
+// Check if POP instruction was inserted into the MBB and return iterator to it.
+static MachineBasicBlock::iterator containsPop(MachineBasicBlock &MBB) {
+ for (MachineBasicBlock::iterator MBBI = MBB.begin(); MBBI != MBB.end();
+ MBBI = next_nodbg(MBBI, MBB.end()))
+ if (MBBI->getOpcode() == RISCV::CM_POP)
+ return MBBI;
+
+ return MBB.end();
+}
+
+bool RISCVPushPopOpt::usePopRet(MachineBasicBlock::iterator &MBBI,
+ MachineBasicBlock::iterator &NextI,
+ bool IsReturnZero) {
+ // Since Pseudo instruction lowering happen later in the pipeline,
+ // this will detect all ret instruction.
+ DebugLoc DL = NextI->getDebugLoc();
+ unsigned Opc = IsReturnZero ? RISCV::CM_POPRETZ : RISCV::CM_POPRET;
+ BuildMI(*NextI->getParent(), NextI, DL, TII->get(Opc))
+ .add(MBBI->getOperand(0))
+ .add(MBBI->getOperand(1));
+
+ MBBI->eraseFromParent();
+ NextI->eraseFromParent();
+ return true;
+}
+
+// Search for last assignment to a0 and if possible use ret_val slot of POP to
+// store return value.
+bool RISCVPushPopOpt::adjustRetVal(MachineBasicBlock::iterator &MBBI) {
+ MachineBasicBlock::reverse_iterator RE = MBBI->getParent()->rend();
+ // Track which register units have been modified and used between the POP
+ // insn and the last assignment to register a0.
+ ModifiedRegUnits.clear();
+ UsedRegUnits.clear();
+ // Since POP instruction is in Epilogue no normal instructions will follow
+ // after it. Therefore search only previous ones to find the return value.
+ for (MachineBasicBlock::reverse_iterator I =
+ next_nodbg(MBBI.getReverse(), RE);
+ I != RE; I = next_nodbg(I, RE)) {
+ MachineInstr &MI = *I;
+ if (auto OperandPair = TII->isCopyInstrImpl(MI)) {
+ Register DestReg = OperandPair->Destination->getReg();
+ Register Source = OperandPair->Source->getReg();
+ if (DestReg == RISCV::X10 && Source == RISCV::X0) {
+ MI.removeFromParent();
+ return true;
+ }
+ }
+ // Update modified / used register units.
+ LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
+ // If a0 was modified or used, there is no possibility
+ // of using ret_val slot of POP instruction.
+ if (!ModifiedRegUnits.available(RISCV::X10) ||
+ !UsedRegUnits.available(RISCV::X10))
+ return false;
+ }
+ return false;
+}
+
+bool RISCVPushPopOpt::runOnMachineFunction(MachineFunction &Fn) {
+ if (skipFunction(Fn.getFunction()))
+ return false;
+
+ // If Zcmp extension is not supported, abort.
+ const RISCVSubtarget *Subtarget = &Fn.getSubtarget<RISCVSubtarget>();
+ if (!Subtarget->hasStdExtZcmp())
+ return false;
+
+ // If frame pointer elimination has been disabled, abort to avoid breaking the
+ // ABI.
+ if (Fn.getTarget().Options.DisableFramePointerElim(Fn))
+ return false;
+
+ TII = Subtarget->getInstrInfo();
+ TRI = Subtarget->getRegisterInfo();
+ // Resize the modified and used register unit trackers. We do this once
+ // per function and then clear the register units each time we determine
+ // correct return value for the POP.
+ ModifiedRegUnits.init(*TRI);
+ UsedRegUnits.init(*TRI);
+ bool Modified = false;
+ for (auto &MBB : Fn) {
+ MachineBasicBlock::iterator MBBI = containsPop(MBB);
+ MachineBasicBlock::iterator NextI = next_nodbg(MBBI, MBB.end());
+ if (MBBI != MBB.end() && NextI->getOpcode() == RISCV::PseudoRET)
+ Modified |= usePopRet(MBBI, NextI, adjustRetVal(MBBI));
+ }
+ return Modified;
+}
+
+/// createRISCVPushPopOptimizationPass - returns an instance of the
+/// Push/Pop optimization pass.
+FunctionPass *llvm::createRISCVPushPopOptimizationPass() {
+ return new RISCVPushPopOpt();
+}
diff --git a/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp b/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp
new file mode 100644
index 000000000000..fed3fa2987e5
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVRVVInitUndef.cpp
@@ -0,0 +1,274 @@
+//===- RISCVRVVInitUndef.cpp - Initialize undef vector value to pseudo ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function pass that initializes undef vector value to
+// temporary pseudo instruction and remove it in expandpseudo pass to prevent
+// register allocation resulting in a constraint violated result for vector
+// instruction.
+//
+// RISC-V vector instruction has register overlapping constraint for certain
+// instructions, and will cause illegal instruction trap if violated, we use
+// early clobber to model this constraint, but it can't prevent register
+// allocator allocated same or overlapped if the input register is undef value,
+// so convert IMPLICIT_DEF to temporary pseudo instruction and remove it later
+// could prevent that happen, it's not best way to resolve this, and it might
+// change the order of program or increase the register pressure, so ideally we
+// should model the constraint right, but before we model the constraint right,
+// it's the only way to prevent that happen.
+//
+// When we enable the subregister liveness option, it will also trigger same
+// issue due to the partial of register is undef. If we pseudoinit the whole
+// register, then it will generate redundant COPY instruction. Currently, it
+// will generate INSERT_SUBREG to make sure the whole register is occupied
+// when program encounter operation that has early-clobber constraint.
+//
+//
+// See also: https://github.com/llvm/llvm-project/issues/50157
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/DetectDeadLanes.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-init-undef"
+#define RISCV_INIT_UNDEF_NAME "RISC-V init undef pass"
+
+namespace {
+
+class RISCVInitUndef : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ const RISCVSubtarget *ST;
+ const TargetRegisterInfo *TRI;
+
+public:
+ static char ID;
+
+ RISCVInitUndef() : MachineFunctionPass(ID) {
+ initializeRISCVInitUndefPass(*PassRegistry::getPassRegistry());
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ StringRef getPassName() const override { return RISCV_INIT_UNDEF_NAME; }
+
+private:
+ bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
+ const DeadLaneDetector &DLD);
+ bool handleImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &Inst);
+ bool isVectorRegClass(const Register R);
+ const TargetRegisterClass *
+ getVRLargestSuperClass(const TargetRegisterClass *RC) const;
+ bool handleSubReg(MachineFunction &MF, MachineInstr &MI,
+ const DeadLaneDetector &DLD);
+};
+
+} // end anonymous namespace
+
+char RISCVInitUndef::ID = 0;
+INITIALIZE_PASS(RISCVInitUndef, DEBUG_TYPE, RISCV_INIT_UNDEF_NAME, false, false)
+char &llvm::RISCVInitUndefID = RISCVInitUndef::ID;
+
+const TargetRegisterClass *
+RISCVInitUndef::getVRLargestSuperClass(const TargetRegisterClass *RC) const {
+ if (RISCV::VRM8RegClass.hasSubClassEq(RC))
+ return &RISCV::VRM8RegClass;
+ if (RISCV::VRM4RegClass.hasSubClassEq(RC))
+ return &RISCV::VRM4RegClass;
+ if (RISCV::VRM2RegClass.hasSubClassEq(RC))
+ return &RISCV::VRM2RegClass;
+ if (RISCV::VRRegClass.hasSubClassEq(RC))
+ return &RISCV::VRRegClass;
+ return RC;
+}
+
+bool RISCVInitUndef::isVectorRegClass(const Register R) {
+ const TargetRegisterClass *RC = MRI->getRegClass(R);
+ return RISCV::VRRegClass.hasSubClassEq(RC) ||
+ RISCV::VRM2RegClass.hasSubClassEq(RC) ||
+ RISCV::VRM4RegClass.hasSubClassEq(RC) ||
+ RISCV::VRM8RegClass.hasSubClassEq(RC);
+}
+
+static unsigned getUndefInitOpcode(unsigned RegClassID) {
+ switch (RegClassID) {
+ case RISCV::VRRegClassID:
+ return RISCV::PseudoRVVInitUndefM1;
+ case RISCV::VRM2RegClassID:
+ return RISCV::PseudoRVVInitUndefM2;
+ case RISCV::VRM4RegClassID:
+ return RISCV::PseudoRVVInitUndefM4;
+ case RISCV::VRM8RegClassID:
+ return RISCV::PseudoRVVInitUndefM8;
+ default:
+ llvm_unreachable("Unexpected register class.");
+ }
+}
+
+bool RISCVInitUndef::handleImplicitDef(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &Inst) {
+ const TargetRegisterInfo &TRI =
+ *MBB.getParent()->getSubtarget().getRegisterInfo();
+
+ assert(Inst->getOpcode() == TargetOpcode::IMPLICIT_DEF);
+
+ Register Reg = Inst->getOperand(0).getReg();
+ if (!Reg.isVirtual())
+ return false;
+
+ bool NeedPseudoInit = false;
+ SmallVector<MachineOperand *, 1> UseMOs;
+ for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+ MachineInstr *UserMI = MO.getParent();
+
+ bool HasEarlyClobber = false;
+ bool TiedToDef = false;
+ for (MachineOperand &UserMO : UserMI->operands()) {
+ if (!UserMO.isReg())
+ continue;
+ if (UserMO.isEarlyClobber())
+ HasEarlyClobber = true;
+ if (UserMO.isUse() && UserMO.isTied() &&
+ TRI.regsOverlap(UserMO.getReg(), Reg))
+ TiedToDef = true;
+ }
+ if (HasEarlyClobber && !TiedToDef) {
+ NeedPseudoInit = true;
+ UseMOs.push_back(&MO);
+ }
+ }
+
+ if (!NeedPseudoInit)
+ return false;
+
+ LLVM_DEBUG(
+ dbgs() << "Emitting PseudoRVVInitUndef for implicit vector register "
+ << Reg << '\n');
+
+ unsigned RegClassID = getVRLargestSuperClass(MRI->getRegClass(Reg))->getID();
+ unsigned Opcode = getUndefInitOpcode(RegClassID);
+
+ BuildMI(MBB, Inst, Inst->getDebugLoc(), TII->get(Opcode), Reg);
+
+ Inst = MBB.erase(Inst);
+
+ for (auto MO : UseMOs)
+ MO->setIsUndef(false);
+
+ return true;
+}
+
+static bool isEarlyClobberMI(MachineInstr &MI) {
+ return llvm::any_of(MI.defs(), [](const MachineOperand &DefMO) {
+ return DefMO.isReg() && DefMO.isEarlyClobber();
+ });
+}
+
+bool RISCVInitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
+ const DeadLaneDetector &DLD) {
+ bool Changed = false;
+
+ for (MachineOperand &UseMO : MI.uses()) {
+ if (!UseMO.isReg())
+ continue;
+ if (!UseMO.getReg().isVirtual())
+ continue;
+
+ Register Reg = UseMO.getReg();
+ DeadLaneDetector::VRegInfo Info =
+ DLD.getVRegInfo(Register::virtReg2Index(Reg));
+
+ if (Info.UsedLanes == Info.DefinedLanes)
+ continue;
+
+ const TargetRegisterClass *TargetRegClass =
+ getVRLargestSuperClass(MRI->getRegClass(Reg));
+
+ LaneBitmask NeedDef = Info.UsedLanes & ~Info.DefinedLanes;
+
+ LLVM_DEBUG({
+ dbgs() << "Instruction has undef subregister.\n";
+ dbgs() << printReg(Reg, nullptr)
+ << " Used: " << PrintLaneMask(Info.UsedLanes)
+ << " Def: " << PrintLaneMask(Info.DefinedLanes)
+ << " Need Def: " << PrintLaneMask(NeedDef) << "\n";
+ });
+
+ SmallVector<unsigned> SubRegIndexNeedInsert;
+ TRI->getCoveringSubRegIndexes(*MRI, TargetRegClass, NeedDef,
+ SubRegIndexNeedInsert);
+
+ Register LatestReg = Reg;
+ for (auto ind : SubRegIndexNeedInsert) {
+ Changed = true;
+ const TargetRegisterClass *SubRegClass =
+ getVRLargestSuperClass(TRI->getSubRegisterClass(TargetRegClass, ind));
+ Register TmpInitSubReg = MRI->createVirtualRegister(SubRegClass);
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
+ TII->get(getUndefInitOpcode(SubRegClass->getID())),
+ TmpInitSubReg);
+ Register NewReg = MRI->createVirtualRegister(TargetRegClass);
+ BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::INSERT_SUBREG), NewReg)
+ .addReg(LatestReg)
+ .addReg(TmpInitSubReg)
+ .addImm(ind);
+ LatestReg = NewReg;
+ }
+
+ UseMO.setReg(LatestReg);
+ }
+
+ return Changed;
+}
+
+bool RISCVInitUndef::processBasicBlock(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ const DeadLaneDetector &DLD) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+ MachineInstr &MI = *I;
+ if (ST->enableSubRegLiveness() && isEarlyClobberMI(MI))
+ Changed |= handleSubReg(MF, MI, DLD);
+ if (MI.isImplicitDef()) {
+ auto DstReg = MI.getOperand(0).getReg();
+ if (isVectorRegClass(DstReg))
+ Changed |= handleImplicitDef(MBB, I);
+ }
+ }
+ return Changed;
+}
+
+bool RISCVInitUndef::runOnMachineFunction(MachineFunction &MF) {
+ ST = &MF.getSubtarget<RISCVSubtarget>();
+ if (!ST->hasVInstructions())
+ return false;
+
+ MRI = &MF.getRegInfo();
+ TII = ST->getInstrInfo();
+ TRI = MRI->getTargetRegisterInfo();
+
+ bool Changed = false;
+ DeadLaneDetector DLD(MRI, TRI);
+ DLD.computeSubRegisterLaneBitInfo();
+
+ for (MachineBasicBlock &BB : MF)
+ Changed |= processBasicBlock(MF, BB, DLD);
+
+ return Changed;
+}
+
+FunctionPass *llvm::createRISCVInitUndefPass() { return new RISCVInitUndef(); }
diff --git a/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp b/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp
index c7cc21aa5188..61d605fda3f5 100644
--- a/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp
@@ -1,4 +1,4 @@
-//=- RISCVRedundantCopyElimination.cpp - Remove useless copy for RISCV ------=//
+//=- RISCVRedundantCopyElimination.cpp - Remove useless copy for RISC-V -----=//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -56,7 +56,7 @@ public:
}
StringRef getPassName() const override {
- return "RISCV Redundant Copy Elimination";
+ return "RISC-V Redundant Copy Elimination";
}
private:
@@ -68,7 +68,7 @@ private:
char RISCVRedundantCopyElimination::ID = 0;
INITIALIZE_PASS(RISCVRedundantCopyElimination, "riscv-copyelim",
- "RISCV redundant copy elimination pass", false, false)
+ "RISC-V Redundant Copy Elimination", false, false)
static bool
guaranteesZeroRegInBlock(MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 927845aa23d1..c3ba4c1e7fdb 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVRegisterInfo.cpp - RISCV Register Information ------*- C++ -*-===//
+//===-- RISCVRegisterInfo.cpp - RISC-V Register Information -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the RISCV implementation of the TargetRegisterInfo class.
+// This file contains the RISC-V implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//
@@ -103,6 +103,10 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (TFI->hasBP(MF))
markSuperRegs(Reserved, RISCVABI::getBPReg()); // bp
+ // Additionally reserve dummy register used to form the register pair
+ // beginning with 'x0' for instructions that take register pairs.
+ markSuperRegs(Reserved, RISCV::DUMMY_REG_PAIR_WITH_X0);
+
// V registers for code generation. We handle them manually.
markSuperRegs(Reserved, RISCV::VL);
markSuperRegs(Reserved, RISCV::VTYPE);
@@ -149,7 +153,7 @@ bool RISCVRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
Register Reg,
int &FrameIdx) const {
const auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
- if (!RVFI->useSaveRestoreLibCalls(MF))
+ if (!RVFI->useSaveRestoreLibCalls(MF) && !RVFI->isPushable(MF))
return false;
const auto *FII =
@@ -759,7 +763,7 @@ bool RISCVRegisterInfo::getRegAllocationHints(
for (auto &MO : MRI->reg_nodbg_operands(VirtReg)) {
const MachineInstr &MI = *MO.getParent();
- unsigned OpIdx = MI.getOperandNo(&MO);
+ unsigned OpIdx = MO.getOperandNo();
bool NeedGPRC;
if (isCompressible(MI, NeedGPRC)) {
if (OpIdx == 0 && MI.getOperand(1).isReg()) {
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 57a725673523..8b729caa5f71 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -1,4 +1,4 @@
-//===-- RISCVRegisterInfo.h - RISCV Register Information Impl ---*- C++ -*-===//
+//===-- RISCVRegisterInfo.h - RISC-V Register Information Impl --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the RISCV implementation of the TargetRegisterInfo class.
+// This file contains the RISC-V implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index d9a0243d213d..0b17f54431ef 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -16,39 +16,36 @@ class RISCVReg<bits<5> Enc, string n, list<string> alt = []> : Register<n> {
let AltNames = alt;
}
+class RISCVRegWithSubRegs<bits<5> Enc, string n, list<Register> subregs,
+ list<string> alt = []>
+ : RegisterWithSubRegs<n, subregs> {
+ let HWEncoding{4-0} = Enc;
+ let AltNames = alt;
+}
+
class RISCVReg16<bits<5> Enc, string n, list<string> alt = []> : Register<n> {
let HWEncoding{4-0} = Enc;
let AltNames = alt;
}
def sub_16 : SubRegIndex<16>;
-class RISCVReg32<RISCVReg16 subreg> : Register<""> {
- let HWEncoding{4-0} = subreg.HWEncoding{4-0};
- let SubRegs = [subreg];
+class RISCVReg32<RISCVReg16 subreg>
+ : RISCVRegWithSubRegs<subreg.HWEncoding{4-0}, subreg.AsmName, [subreg],
+ subreg.AltNames> {
let SubRegIndices = [sub_16];
- let AsmName = subreg.AsmName;
- let AltNames = subreg.AltNames;
}
// Because RISCVReg64 register have AsmName and AltNames that alias with their
// 16/32-bit sub-register, RISCVAsmParser will need to coerce a register number
// from a RISCVReg16/RISCVReg32 to the equivalent RISCVReg64 when appropriate.
def sub_32 : SubRegIndex<32>;
-class RISCVReg64<RISCVReg32 subreg> : Register<""> {
- let HWEncoding{4-0} = subreg.HWEncoding{4-0};
- let SubRegs = [subreg];
+class RISCVReg64<RISCVReg32 subreg>
+ : RISCVRegWithSubRegs<subreg.HWEncoding{4-0}, subreg.AsmName, [subreg],
+ subreg.AltNames> {
let SubRegIndices = [sub_32];
- let AsmName = subreg.AsmName;
- let AltNames = subreg.AltNames;
-}
-
-class RISCVRegWithSubRegs<bits<5> Enc, string n, list<Register> subregs,
- list<string> alt = []>
- : RegisterWithSubRegs<n, subregs> {
- let HWEncoding{4-0} = Enc;
- let AltNames = alt;
}
+let FallbackRegAltNameIndex = NoRegAltName in
def ABIRegAltName : RegAltNameIndex;
def sub_vrm4_0 : SubRegIndex<256>;
@@ -118,63 +115,62 @@ let RegAltNameIndices = [ABIRegAltName] in {
def XLenVT : ValueTypeByHwMode<[RV32, RV64],
[i32, i64]>;
+// Allow f64 in GPR for ZDINX on RV64.
+def XLenFVT : ValueTypeByHwMode<[RV64],
+ [f64]>;
def XLenRI : RegInfoByHwMode<
[RV32, RV64],
[RegInfo<32,32,32>, RegInfo<64,64,64>]>;
-// The order of registers represents the preferred allocation sequence.
-// Registers are listed in the order caller-save, callee-save, specials.
-def GPR : RegisterClass<"RISCV", [XLenVT], 32, (add
- (sequence "X%u", 10, 17),
- (sequence "X%u", 5, 7),
- (sequence "X%u", 28, 31),
- (sequence "X%u", 8, 9),
- (sequence "X%u", 18, 27),
- (sequence "X%u", 0, 4)
- )> {
+class GPRRegisterClass<dag regList>
+ : RegisterClass<"RISCV", [XLenVT, XLenFVT, i32], 32, regList> {
let RegInfos = XLenRI;
}
-def GPRX0 : RegisterClass<"RISCV", [XLenVT], 32, (add X0)> {
- let RegInfos = XLenRI;
-}
+// The order of registers represents the preferred allocation sequence.
+// Registers are listed in the order caller-save, callee-save, specials.
+def GPR : GPRRegisterClass<(add (sequence "X%u", 10, 17),
+ (sequence "X%u", 5, 7),
+ (sequence "X%u", 28, 31),
+ (sequence "X%u", 8, 9),
+ (sequence "X%u", 18, 27),
+ (sequence "X%u", 0, 4))>;
-def GPRNoX0 : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0)> {
- let RegInfos = XLenRI;
-}
+def GPRX0 : GPRRegisterClass<(add X0)>;
-def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, X0, X2)> {
- let RegInfos = XLenRI;
-}
+def GPRNoX0 : GPRRegisterClass<(sub GPR, X0)>;
+
+def GPRNoX0X2 : GPRRegisterClass<(sub GPR, X0, X2)>;
// Don't use X1 or X5 for JALR since that is a hint to pop the return address
// stack on some microarchitectures. Also remove the reserved registers X0, X2,
// X3, and X4 as it reduces the number of register classes that get synthesized
// by tablegen.
-def GPRJALR : RegisterClass<"RISCV", [XLenVT], 32, (sub GPR, (sequence "X%u", 0, 5))> {
- let RegInfos = XLenRI;
-}
+def GPRJALR : GPRRegisterClass<(sub GPR, (sequence "X%u", 0, 5))>;
-def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add
- (sequence "X%u", 10, 15),
- (sequence "X%u", 8, 9)
- )> {
- let RegInfos = XLenRI;
-}
+def GPRC : GPRRegisterClass<(add (sequence "X%u", 10, 15),
+ (sequence "X%u", 8, 9))>;
// For indirect tail calls, we can't use callee-saved registers, as they are
// restored to the saved value before the tail call, which would clobber a call
// address. We shouldn't use x5 since that is a hint for to pop the return
// address stack on some microarchitectures.
-def GPRTC : RegisterClass<"RISCV", [XLenVT], 32, (add
- (sequence "X%u", 6, 7),
- (sequence "X%u", 10, 17),
- (sequence "X%u", 28, 31)
- )> {
- let RegInfos = XLenRI;
-}
+def GPRTC : GPRRegisterClass<(add (sequence "X%u", 6, 7),
+ (sequence "X%u", 10, 17),
+ (sequence "X%u", 28, 31))>;
+
+def SP : GPRRegisterClass<(add X2)>;
+
+// Saved Registers from s0 to s7, for C.MVA01S07 instruction in Zcmp extension
+def SR07 : GPRRegisterClass<(add (sequence "X%u", 8, 9),
+ (sequence "X%u", 18, 23))>;
-def SP : RegisterClass<"RISCV", [XLenVT], 32, (add X2)> {
+// Registers saveable by PUSH/POP instruction in Zcmp extension
+def PGPR : RegisterClass<"RISCV", [XLenVT], 32, (add
+ (sequence "X%u", 8, 9),
+ (sequence "X%u", 18, 27),
+ X1
+ )> {
let RegInfos = XLenRI;
}
@@ -226,39 +222,44 @@ let RegAltNameIndices = [ABIRegAltName] in {
// The order of registers represents the preferred allocation sequence,
// meaning caller-save regs are listed before callee-save.
-def FPR16 : RegisterClass<"RISCV", [f16], 16, (add
- (sequence "F%u_H", 0, 7),
- (sequence "F%u_H", 10, 17),
- (sequence "F%u_H", 28, 31),
- (sequence "F%u_H", 8, 9),
- (sequence "F%u_H", 18, 27)
+// We start by allocating argument registers in reverse order since they are
+// compressible.
+def FPR16 : RegisterClass<"RISCV", [f16, bf16], 16, (add
+ (sequence "F%u_H", 15, 10), // fa5-fa0
+ (sequence "F%u_H", 0, 7), // ft0-f7
+ (sequence "F%u_H", 16, 17), // fa6-fa7
+ (sequence "F%u_H", 28, 31), // ft8-ft11
+ (sequence "F%u_H", 8, 9), // fs0-fs1
+ (sequence "F%u_H", 18, 27) // fs2-fs11
)>;
def FPR32 : RegisterClass<"RISCV", [f32], 32, (add
+ (sequence "F%u_F", 15, 10),
(sequence "F%u_F", 0, 7),
- (sequence "F%u_F", 10, 17),
+ (sequence "F%u_F", 16, 17),
(sequence "F%u_F", 28, 31),
(sequence "F%u_F", 8, 9),
(sequence "F%u_F", 18, 27)
)>;
def FPR32C : RegisterClass<"RISCV", [f32], 32, (add
- (sequence "F%u_F", 10, 15),
+ (sequence "F%u_F", 15, 10),
(sequence "F%u_F", 8, 9)
)>;
// The order of registers represents the preferred allocation sequence,
// meaning caller-save regs are listed before callee-save.
def FPR64 : RegisterClass<"RISCV", [f64], 64, (add
+ (sequence "F%u_D", 15, 10),
(sequence "F%u_D", 0, 7),
- (sequence "F%u_D", 10, 17),
+ (sequence "F%u_D", 16, 17),
(sequence "F%u_D", 28, 31),
(sequence "F%u_D", 8, 9),
(sequence "F%u_D", 18, 27)
)>;
def FPR64C : RegisterClass<"RISCV", [f64], 64, (add
- (sequence "F%u_D", 10, 15),
+ (sequence "F%u_D", 15, 10),
(sequence "F%u_D", 8, 9)
)>;
@@ -332,9 +333,7 @@ defvar vbool32_t = nxv2i1;
defvar vbool64_t = nxv1i1;
// There is no need to define register classes for fractional LMUL.
-def LMULList {
- list<int> m = [1, 2, 4, 8];
-}
+defvar LMULList = [1, 2, 4, 8];
//===----------------------------------------------------------------------===//
// Utility classes for segment load/store.
@@ -419,51 +418,46 @@ class VRegList<list<dag> LIn, int start, int nf, int lmul, bit isV0> {
}
// Vector registers
-let RegAltNameIndices = [ABIRegAltName] in {
- foreach Index = 0-31 in {
- def V#Index : RISCVReg<Index, "v"#Index, ["v"#Index]>, DwarfRegNum<[!add(Index, 96)]>;
- }
+foreach Index = 0-31 in {
+ def V#Index : RISCVReg<Index, "v"#Index>, DwarfRegNum<[!add(Index, 96)]>;
+}
- foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22,
- 24, 26, 28, 30] in {
- def V#Index#M2 : RISCVRegWithSubRegs<Index, "v"#Index,
- [!cast<Register>("V"#Index),
- !cast<Register>("V"#!add(Index, 1))],
- ["v"#Index]>,
- DwarfRegAlias<!cast<Register>("V"#Index)> {
- let SubRegIndices = [sub_vrm1_0, sub_vrm1_1];
- }
+foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22,
+ 24, 26, 28, 30] in {
+ def V#Index#M2 : RISCVRegWithSubRegs<Index, "v"#Index,
+ [!cast<Register>("V"#Index),
+ !cast<Register>("V"#!add(Index, 1))]>,
+ DwarfRegAlias<!cast<Register>("V"#Index)> {
+ let SubRegIndices = [sub_vrm1_0, sub_vrm1_1];
}
+}
- foreach Index = [0, 4, 8, 12, 16, 20, 24, 28] in {
- def V#Index#M4 : RISCVRegWithSubRegs<Index, "v"#Index,
- [!cast<Register>("V"#Index#"M2"),
- !cast<Register>("V"#!add(Index, 2)#"M2")],
- ["v"#Index]>,
- DwarfRegAlias<!cast<Register>("V"#Index)> {
- let SubRegIndices = [sub_vrm2_0, sub_vrm2_1];
- }
+foreach Index = [0, 4, 8, 12, 16, 20, 24, 28] in {
+ def V#Index#M4 : RISCVRegWithSubRegs<Index, "v"#Index,
+ [!cast<Register>("V"#Index#"M2"),
+ !cast<Register>("V"#!add(Index, 2)#"M2")]>,
+ DwarfRegAlias<!cast<Register>("V"#Index)> {
+ let SubRegIndices = [sub_vrm2_0, sub_vrm2_1];
}
+}
- foreach Index = [0, 8, 16, 24] in {
- def V#Index#M8 : RISCVRegWithSubRegs<Index, "v"#Index,
- [!cast<Register>("V"#Index#"M4"),
- !cast<Register>("V"#!add(Index, 4)#"M4")],
- ["v"#Index]>,
- DwarfRegAlias<!cast<Register>("V"#Index)> {
- let SubRegIndices = [sub_vrm4_0, sub_vrm4_1];
- }
+foreach Index = [0, 8, 16, 24] in {
+ def V#Index#M8 : RISCVRegWithSubRegs<Index, "v"#Index,
+ [!cast<Register>("V"#Index#"M4"),
+ !cast<Register>("V"#!add(Index, 4)#"M4")]>,
+ DwarfRegAlias<!cast<Register>("V"#Index)> {
+ let SubRegIndices = [sub_vrm4_0, sub_vrm4_1];
}
-
- def VTYPE : RISCVReg<0, "vtype", ["vtype"]>;
- def VL : RISCVReg<0, "vl", ["vl"]>;
- def VXSAT : RISCVReg<0, "vxsat", ["vxsat"]>;
- def VXRM : RISCVReg<0, "vxrm", ["vxrm"]>;
- let isConstant = true in
- def VLENB : RISCVReg<0, "vlenb", ["vlenb"]>,
- DwarfRegNum<[!add(4096, SysRegVLENB.Encoding)]>;
}
+def VTYPE : RISCVReg<0, "vtype">;
+def VL : RISCVReg<0, "vl">;
+def VXSAT : RISCVReg<0, "vxsat">;
+def VXRM : RISCVReg<0, "vxrm">;
+let isConstant = true in
+def VLENB : RISCVReg<0, "vlenb">,
+ DwarfRegNum<[!add(4096, SysRegVLENB.Encoding)]>;
+
def VCSR : RegisterClass<"RISCV", [XLenVT], 32,
(add VTYPE, VL, VLENB)> {
let RegInfos = XLenRI;
@@ -539,18 +533,33 @@ def VMV0 : RegisterClass<"RISCV", VMaskVTs, 64, (add V0)> {
let RegInfos = XLenRI in {
def GPRF16 : RegisterClass<"RISCV", [f16], 16, (add GPR)>;
def GPRF32 : RegisterClass<"RISCV", [f32], 32, (add GPR)>;
-def GPRF64 : RegisterClass<"RISCV", [f64], 64, (add GPR)>;
} // RegInfos = XLenRI
+// Dummy zero register for use in the register pair containing X0 (as X1 is
+// not read to or written when the X0 register pair is used).
+def DUMMY_REG_PAIR_WITH_X0 : RISCVReg<0, "0">;
+
+// Must add DUMMY_REG_PAIR_WITH_X0 to a separate register class to prevent the
+// register's existence from changing codegen (due to the regPressureSetLimit
+// for the GPR register class being altered).
+def GPRAll : GPRRegisterClass<(add GPR, DUMMY_REG_PAIR_WITH_X0)>;
+
let RegAltNameIndices = [ABIRegAltName] in {
- foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22,
- 24, 26, 28, 30] in {
+ def X0_PD : RISCVRegWithSubRegs<0, X0.AsmName,
+ [X0, DUMMY_REG_PAIR_WITH_X0],
+ X0.AltNames> {
+ let SubRegIndices = [sub_32, sub_32_hi];
+ let CoveredBySubRegs = 1;
+ }
+ foreach I = 1-15 in {
+ defvar Index = !shl(I, 1);
defvar Reg = !cast<Register>("X"#Index);
+ defvar RegP1 = !cast<Register>("X"#!add(Index,1));
def X#Index#_PD : RISCVRegWithSubRegs<Index, Reg.AsmName,
- [!cast<Register>("X"#Index),
- !cast<Register>("X"#!add(Index, 1))],
- Reg.AltNames> {
+ [Reg, RegP1],
+ Reg.AltNames> {
let SubRegIndices = [sub_32, sub_32_hi];
+ let CoveredBySubRegs = 1;
}
}
}
@@ -570,7 +579,7 @@ def VM : VReg<VMaskVTs,
(add (sequence "V%u", 8, 31),
(sequence "V%u", 0, 7)), 1>;
-foreach m = LMULList.m in {
+foreach m = LMULList in {
foreach nf = NFList<m>.L in {
def "VRN" # nf # "M" # m # "NoV0": VReg<[untyped],
(add !cast<RegisterTuples>("VN" # nf # "M" # m # "NoV0")),
@@ -585,15 +594,3 @@ foreach m = LMULList.m in {
// Special registers
def FFLAGS : RISCVReg<0, "fflags">;
def FRM : RISCVReg<0, "frm">;
-
-// Any type register. Used for .insn directives when we don't know what the
-// register types could be.
-// NOTE: The alignment and size are bogus values. The Size needs to be non-zero
-// or tablegen will use "untyped" to determine the size which will assert.
-let isAllocatable = 0 in
-def AnyReg : RegisterClass<"RISCV", [untyped], 32,
- (add (sequence "X%u", 0, 31),
- (sequence "F%u_D", 0, 31),
- (sequence "V%u", 0, 31))> {
- let Size = 32;
-}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index ed0e9f2eeca9..b14cdd40f154 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -18,9 +18,9 @@ def RocketModel : SchedMachineModel {
let MispredictPenalty = 3;
let CompleteModel = false;
let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
- HasStdExtZknd, HasStdExtZkne, HasStdExtZknh,
- HasStdExtZksed, HasStdExtZksh, HasStdExtZkr,
- HasVInstructions, HasVInstructionsI64];
+ HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
+ HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
+ HasStdExtZkr, HasVInstructions, HasVInstructionsI64];
}
//===----------------------------------------------------------------------===//
@@ -51,7 +51,6 @@ let SchedModel = RocketModel in {
def : WriteRes<WriteJmp, [RocketUnitB]>;
def : WriteRes<WriteJal, [RocketUnitB]>;
def : WriteRes<WriteJalr, [RocketUnitB]>;
-def : WriteRes<WriteJmpReg, [RocketUnitB]>;
// Integer arithmetic and logic
def : WriteRes<WriteIALU32, [RocketUnitALU]>;
@@ -244,6 +243,7 @@ defm : UnsupportedSchedZbc;
defm : UnsupportedSchedZbs;
defm : UnsupportedSchedZbkb;
defm : UnsupportedSchedZbkx;
+defm : UnsupportedSchedZfa;
defm : UnsupportedSchedZfh;
defm : UnsupportedSchedSFB;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 329209f8aa81..e22c05b30b7f 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -8,6 +8,187 @@
//===----------------------------------------------------------------------===//
+/// c is true if mx has the worst case behavior compared to LMULs in MxList.
+/// On the SiFive7, the worst case LMUL is the Largest LMUL
+/// and the worst case sew is the smallest SEW for that LMUL.
+class SiFive7IsWorstCaseMX<string mx, list<string> MxList> {
+ defvar LLMUL = LargestLMUL<MxList>.r;
+ bit c = !eq(mx, LLMUL);
+}
+
+/// c is true if mx and sew have the worst case behavior compared to LMULs in
+/// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL
+/// and the worst case sew is the smallest SEW for that LMUL.
+class SiFive7IsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
+ bit isF = 0> {
+ defvar LLMUL = LargestLMUL<MxList>.r;
+ defvar SSEW = SmallestSEW<mx, isF>.r;
+ bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
+}
+
+/// Number of DLEN parts = (LMUL * VLEN) / DLEN.
+/// Since DLEN = VLEN / 2, Num DLEN parts = 2 * LMUL.
+class SiFive7GetCyclesDefault<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 2,
+ !eq(mx, "M2") : 4,
+ !eq(mx, "M4") : 8,
+ !eq(mx, "M8") : 16,
+ !eq(mx, "MF2") : 1,
+ !eq(mx, "MF4") : 1,
+ !eq(mx, "MF8") : 1
+ );
+}
+
+class SiFive7GetCyclesNarrowing<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 4,
+ !eq(mx, "M2") : 8,
+ !eq(mx, "M4") : 16,
+ !eq(mx, "MF2") : 2,
+ !eq(mx, "MF4") : 1,
+ !eq(mx, "MF8") : 1
+ );
+}
+
+class SiFive7GetCyclesVMask<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 1,
+ !eq(mx, "M2") : 1,
+ !eq(mx, "M4") : 1,
+ !eq(mx, "M8") : 2,
+ !eq(mx, "MF2") : 1,
+ !eq(mx, "MF4") : 1,
+ !eq(mx, "MF8") : 1
+ );
+}
+
+/// VLDM and VSTM can't read/write more than 2 DLENs of data.
+/// 2 DLENs when LMUL=8. 1 DLEN for all other DLENs
+class SiFive7GetMaskLoadStoreCycles<string mx> {
+ int c = !cond(
+ !eq(mx, "M8") : 2,
+ true : 1
+ );
+}
+
+// Cycles for nf=2 segmented loads and stores are calculated using the
+// formula (2 * VLEN * LMUL) / DLEN = 4 * LMUL
+class SiFive7GetCyclesSegmentedSeg2<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 4,
+ !eq(mx, "M2") : 8,
+ !eq(mx, "M4") : 16,
+ !eq(mx, "M8") : 32,
+ !eq(mx, "MF2") : 2,
+ !eq(mx, "MF4") : 1,
+ !eq(mx, "MF8") : 1
+ );
+}
+
+// Cycles for segmented loads and stores are calculated using the
+// formula vl * ceil((SEW * nf) / DLEN), where SEW * nf is the segment size.
+class SiFive7GetCyclesSegmented<string mx, int sew, int nf> {
+ defvar VLEN = 512;
+ defvar DLEN = 256;
+ // (VLEN * LMUL) / SEW
+ defvar VLUpperBound = !cond(
+ !eq(mx, "M1") : !div(VLEN, sew),
+ !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
+ !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
+ !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
+ !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
+ !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
+ !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
+ );
+ // We can calculate ceil(a/b) using (a + b - 1) / b.
+ defvar a = !mul(sew, nf);
+ defvar b = DLEN;
+ int c = !mul(VLUpperBound, !div(!sub(!add(a, b), 1), b));
+}
+
+class SiFive7GetCyclesOnePerElement<string mx, int sew> {
+ // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler
+ // to use a different VLEN, this model will not make scheduling decisions
+ // based on the user specified VLEN.
+ // c = ceil(VLEN / SEW) * LMUL
+ // Note: c >= 1 since the smallest VLEN is 512 / 8 = 8, and the
+ // largest division performed on VLEN is in MF8 case with division
+ // by 8. Therefore, there is no need to ceil the result.
+ int VLEN = !div(512, sew);
+ int c = !cond(
+ !eq(mx, "M1") : VLEN,
+ !eq(mx, "M2") : !mul(VLEN, 2),
+ !eq(mx, "M4") : !mul(VLEN, 4),
+ !eq(mx, "M8") : !mul(VLEN, 8),
+ !eq(mx, "MF2") : !div(VLEN, 2),
+ !eq(mx, "MF4") : !div(VLEN, 4),
+ !eq(mx, "MF8") : !div(VLEN, 8)
+ );
+}
+
+class SiFive7GetDivOrSqrtFactor<int sew> {
+ int c = !cond(
+ // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case.
+ !eq(sew, 8) : 15,
+ !eq(sew, 16) : 15,
+ !eq(sew, 32) : 28,
+ !eq(sew, 64) : 57
+ );
+}
+
+/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW))
+/// cycles.
+class SiFive7GetReductionCycles<string mx, int sew> {
+ // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since
+ // VLUpperBound=(VLEN*LMUL)/SEW.
+ defvar VLEN = 512;
+ defvar DLEN = !div(VLEN, 2);
+ defvar TwoTimesLMUL = !cond(
+ !eq(mx, "M1") : 2,
+ !eq(mx, "M2") : 4,
+ !eq(mx, "M4") : 8,
+ !eq(mx, "M8") : 16,
+ !eq(mx, "MF2") : 1,
+ !eq(mx, "MF4") : 1,
+ !eq(mx, "MF8") : 1
+ );
+ int c = !add(
+ !div(TwoTimesLMUL, DLEN),
+ !mul(5, !add(4, !logtwo(!div(DLEN, sew))))
+ );
+}
+
+/// Cycles for ordered reductions take approximatley 5*VL cycles
+class SiFive7GetOrderedReductionCycles<string mx, int sew> {
+ defvar VLEN = 512;
+ // (VLEN * LMUL) / SEW
+ defvar VLUpperBound = !cond(
+ !eq(mx, "M1") : !div(VLEN, sew),
+ !eq(mx, "M2") : !div(!mul(VLEN, 2), sew),
+ !eq(mx, "M4") : !div(!mul(VLEN, 4), sew),
+ !eq(mx, "M8") : !div(!mul(VLEN, 8), sew),
+ !eq(mx, "MF2") : !div(!div(VLEN, 2), sew),
+ !eq(mx, "MF4") : !div(!div(VLEN, 4), sew),
+ !eq(mx, "MF8") : !div(!div(VLEN, 8), sew),
+ );
+ int c = !mul(5, VLUpperBound);
+}
+
+class SiFive7AnyToGPRBypass<SchedRead read, int cycles = 2>
+ : ReadAdvance<read, cycles, [WriteIALU, WriteIALU32,
+ WriteShiftImm, WriteShiftImm32,
+ WriteShiftReg, WriteShiftReg32,
+ WriteSHXADD, WriteSHXADD32,
+ WriteRotateImm, WriteRotateImm32,
+ WriteRotateReg, WriteRotateReg32,
+ WriteCLZ, WriteCLZ32, WriteCTZ, WriteCTZ32,
+ WriteCPOP, WriteCPOP32,
+ WriteREV8, WriteORCB, WriteSFB,
+ WriteIMul, WriteIMul32,
+ WriteIDiv, WriteIDiv32,
+ WriteLDB, WriteLDH, WriteLDW, WriteLDD]>;
+
// SiFive7 machine model for scheduling and other instruction cost heuristics.
def SiFive7Model : SchedMachineModel {
let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order.
@@ -16,33 +197,39 @@ def SiFive7Model : SchedMachineModel {
let MispredictPenalty = 3;
let CompleteModel = 0;
let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
- HasStdExtZknd, HasStdExtZkne, HasStdExtZknh,
- HasStdExtZksed, HasStdExtZksh, HasStdExtZkr,
- HasVInstructions];
+ HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
+ HasStdExtZknh, HasStdExtZksed, HasStdExtZksh,
+ HasStdExtZkr];
}
-// The SiFive7 microarchitecture has two pipelines: A and B.
+// The SiFive7 microarchitecture has three pipelines: A, B, V.
// Pipe A can handle memory, integer alu and vector operations.
// Pipe B can handle integer alu, control flow, integer multiply and divide,
// and floating point computation.
+// Pipe V can handle the V extension.
let SchedModel = SiFive7Model in {
let BufferSize = 0 in {
def SiFive7PipeA : ProcResource<1>;
def SiFive7PipeB : ProcResource<1>;
+def SiFive7PipeV : ProcResource<1>;
}
let BufferSize = 1 in {
def SiFive7IDiv : ProcResource<1> { let Super = SiFive7PipeB; } // Int Division
def SiFive7FDiv : ProcResource<1> { let Super = SiFive7PipeB; } // FP Division/Sqrt
+def SiFive7VA : ProcResource<1> { let Super = SiFive7PipeV; } // Arithmetic sequencer
+def SiFive7VL : ProcResource<1> { let Super = SiFive7PipeV; } // Load sequencer
+def SiFive7VS : ProcResource<1> { let Super = SiFive7PipeV; } // Store sequencer
}
def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>;
// Branching
+let Latency = 3 in {
def : WriteRes<WriteJmp, [SiFive7PipeB]>;
def : WriteRes<WriteJal, [SiFive7PipeB]>;
def : WriteRes<WriteJalr, [SiFive7PipeB]>;
-def : WriteRes<WriteJmpReg, [SiFive7PipeB]>;
+}
//Short forward branch
def : WriteRes<WriteSFB, [SiFive7PipeA, SiFive7PipeB]> {
@@ -68,12 +255,41 @@ def : WriteRes<WriteIMul32, [SiFive7PipeB]>;
// Integer division
def : WriteRes<WriteIDiv, [SiFive7PipeB, SiFive7IDiv]> {
- let Latency = 16;
- let ResourceCycles = [1, 15];
+ let Latency = 66;
+ let ResourceCycles = [1, 65];
}
def : WriteRes<WriteIDiv32, [SiFive7PipeB, SiFive7IDiv]> {
- let Latency = 16;
- let ResourceCycles = [1, 15];
+ let Latency = 34;
+ let ResourceCycles = [1, 33];
+}
+
+// Bitmanip
+let Latency = 3 in {
+// Rotates are in the late-B ALU.
+def : WriteRes<WriteRotateImm, [SiFive7PipeB]>;
+def : WriteRes<WriteRotateImm32, [SiFive7PipeB]>;
+def : WriteRes<WriteRotateReg, [SiFive7PipeB]>;
+def : WriteRes<WriteRotateReg32, [SiFive7PipeB]>;
+
+// clz[w]/ctz[w] are in the late-B ALU.
+def : WriteRes<WriteCLZ, [SiFive7PipeB]>;
+def : WriteRes<WriteCLZ32, [SiFive7PipeB]>;
+def : WriteRes<WriteCTZ, [SiFive7PipeB]>;
+def : WriteRes<WriteCTZ32, [SiFive7PipeB]>;
+
+// cpop[w] look exactly like multiply.
+def : WriteRes<WriteCPOP, [SiFive7PipeB]>;
+def : WriteRes<WriteCPOP32, [SiFive7PipeB]>;
+
+// orc.b is in the late-B ALU.
+def : WriteRes<WriteORCB, [SiFive7PipeB]>;
+
+// rev8 is in the late-A and late-B ALUs.
+def : WriteRes<WriteREV8, [SiFive7PipeAB]>;
+
+// shNadd[.uw] is on the early-B and late-B ALUs.
+def : WriteRes<WriteSHXADD, [SiFive7PipeB]>;
+def : WriteRes<WriteSHXADD32, [SiFive7PipeB]>;
}
// Memory
@@ -81,6 +297,7 @@ def : WriteRes<WriteSTB, [SiFive7PipeA]>;
def : WriteRes<WriteSTH, [SiFive7PipeA]>;
def : WriteRes<WriteSTW, [SiFive7PipeA]>;
def : WriteRes<WriteSTD, [SiFive7PipeA]>;
+def : WriteRes<WriteFST16, [SiFive7PipeA]>;
def : WriteRes<WriteFST32, [SiFive7PipeA]>;
def : WriteRes<WriteFST64, [SiFive7PipeA]>;
@@ -92,6 +309,7 @@ def : WriteRes<WriteLDD, [SiFive7PipeA]>;
}
let Latency = 2 in {
+def : WriteRes<WriteFLD16, [SiFive7PipeA]>;
def : WriteRes<WriteFLD32, [SiFive7PipeA]>;
def : WriteRes<WriteFLD64, [SiFive7PipeA]>;
}
@@ -107,6 +325,22 @@ def : WriteRes<WriteAtomicLDW, [SiFive7PipeA]>;
def : WriteRes<WriteAtomicLDD, [SiFive7PipeA]>;
}
+// Half precision.
+let Latency = 5 in {
+def : WriteRes<WriteFAdd16, [SiFive7PipeB]>;
+def : WriteRes<WriteFMul16, [SiFive7PipeB]>;
+def : WriteRes<WriteFMA16, [SiFive7PipeB]>;
+}
+let Latency = 3 in {
+def : WriteRes<WriteFSGNJ16, [SiFive7PipeB]>;
+def : WriteRes<WriteFMinMax16, [SiFive7PipeB]>;
+}
+
+let Latency = 14, ResourceCycles = [1, 13] in {
+def : WriteRes<WriteFDiv16, [SiFive7PipeB, SiFive7FDiv]>;
+def : WriteRes<WriteFSqrt16, [SiFive7PipeB, SiFive7FDiv]>;
+}
+
// Single precision.
let Latency = 5 in {
def : WriteRes<WriteFAdd32, [SiFive7PipeB]>;
@@ -141,46 +375,534 @@ def : WriteRes<WriteFSqrt64, [SiFive7PipeB, SiFive7FDiv]> { let Latency = 56;
// Conversions
let Latency = 3 in {
+def : WriteRes<WriteFCvtI32ToF16, [SiFive7PipeB]>;
def : WriteRes<WriteFCvtI32ToF32, [SiFive7PipeB]>;
def : WriteRes<WriteFCvtI32ToF64, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtI64ToF16, [SiFive7PipeB]>;
def : WriteRes<WriteFCvtI64ToF32, [SiFive7PipeB]>;
def : WriteRes<WriteFCvtI64ToF64, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtF16ToI32, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtF16ToI64, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtF16ToF32, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtF16ToF64, [SiFive7PipeB]>;
def : WriteRes<WriteFCvtF32ToI32, [SiFive7PipeB]>;
def : WriteRes<WriteFCvtF32ToI64, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtF32ToF16, [SiFive7PipeB]>;
def : WriteRes<WriteFCvtF32ToF64, [SiFive7PipeB]>;
def : WriteRes<WriteFCvtF64ToI32, [SiFive7PipeB]>;
def : WriteRes<WriteFCvtF64ToI64, [SiFive7PipeB]>;
+def : WriteRes<WriteFCvtF64ToF16, [SiFive7PipeB]>;
def : WriteRes<WriteFCvtF64ToF32, [SiFive7PipeB]>;
+def : WriteRes<WriteFClass16, [SiFive7PipeB]>;
def : WriteRes<WriteFClass32, [SiFive7PipeB]>;
def : WriteRes<WriteFClass64, [SiFive7PipeB]>;
+def : WriteRes<WriteFCmp16, [SiFive7PipeB]>;
def : WriteRes<WriteFCmp32, [SiFive7PipeB]>;
def : WriteRes<WriteFCmp64, [SiFive7PipeB]>;
+def : WriteRes<WriteFMovI16ToF16, [SiFive7PipeB]>;
+def : WriteRes<WriteFMovF16ToI16, [SiFive7PipeB]>;
def : WriteRes<WriteFMovI32ToF32, [SiFive7PipeB]>;
def : WriteRes<WriteFMovF32ToI32, [SiFive7PipeB]>;
def : WriteRes<WriteFMovI64ToF64, [SiFive7PipeB]>;
def : WriteRes<WriteFMovF64ToI64, [SiFive7PipeB]>;
}
+// 6. Configuration-Setting Instructions
+let Latency = 3 in {
+def : WriteRes<WriteVSETVLI, [SiFive7PipeA]>;
+def : WriteRes<WriteVSETIVLI, [SiFive7PipeA]>;
+def : WriteRes<WriteVSETVL, [SiFive7PipeA]>;
+}
+
+// 7. Vector Loads and Stores
+// Unit-stride loads and stores can operate at the full bandwidth of the memory
+// pipe. The memory pipe is DLEN bits wide on x280.
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 4, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VL], mx, IsWorstCase>;
+ }
+ let Latency = 1, ResourceCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VS], mx, IsWorstCase>;
+}
+
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetMaskLoadStoreCycles<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 4, ResourceCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VL], mx, IsWorstCase>;
+ let Latency = 1, ResourceCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VS], mx, IsWorstCase>;
+}
+
+// Strided loads and stores operate at one element per cycle and should be
+// scheduled accordingly. Indexed loads and stores operate at one element per
+// cycle, and they stall the machine until all addresses have been generated,
+// so they cannot be scheduled. Indexed and strided loads and stores have LMUL
+// specific suffixes, but since SEW is already encoded in the name of the
+// resource, we do not need to use LMULSEWXXX constructors. However, we do
+// use the SEW from the name to determine the number of Cycles.
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 8>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVLDS8", [SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VL], mx, IsWorstCase>;
+ }
+ let Latency = 1, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VS], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 16>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVLDS16", [SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VL], mx, IsWorstCase>;
+ }
+ let Latency = 1, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VS], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 32>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVLDS32", [SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VL], mx, IsWorstCase>;
+ }
+ let Latency = 1, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VS], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, 64>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVLDS64", [SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VL], mx, IsWorstCase>;
+ }
+ let Latency = 1, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VS], mx, IsWorstCase>;
+ }
+}
+
+// VLD*R is LMUL aware
+let Latency = 4, ResourceCycles = [2] in
+ def : WriteRes<WriteVLD1R, [SiFive7VL]>;
+let Latency = 4, ResourceCycles = [4] in
+ def : WriteRes<WriteVLD2R, [SiFive7VL]>;
+let Latency = 4, ResourceCycles = [8] in
+ def : WriteRes<WriteVLD4R, [SiFive7VL]>;
+let Latency = 4, ResourceCycles = [16] in
+ def : WriteRes<WriteVLD8R, [SiFive7VL]>;
+// VST*R is LMUL aware
+let Latency = 1, ResourceCycles = [2] in
+ def : WriteRes<WriteVST1R, [SiFive7VS]>;
+let Latency = 1, ResourceCycles = [4] in
+ def : WriteRes<WriteVST2R, [SiFive7VS]>;
+let Latency = 1, ResourceCycles = [8] in
+ def : WriteRes<WriteVST4R, [SiFive7VS]>;
+let Latency = 1, ResourceCycles = [16] in
+ def : WriteRes<WriteVST8R, [SiFive7VS]>;
+
+// Segmented Loads and Stores
+// Unit-stride segmented loads and stores are effectively converted into strided
+// segment loads and stores. Strided segment loads and stores operate at up to
+// one segment per cycle if the segment fits within one aligned memory beat.
+// Indexed segment loads and stores operate at the same rate as strided ones,
+// but they stall the machine until all addresses have been generated.
+foreach mx = SchedMxList in {
+ foreach eew = [8, 16, 32, 64] in {
+ defvar Cycles = SiFive7GetCyclesSegmentedSeg2<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ // Does not chain so set latency high
+ let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVLSEG2e" # eew, [SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLSEGFF2e" # eew, [SiFive7VL], mx, IsWorstCase>;
+ }
+ let Latency = 1, ResourceCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVSSEG2e" # eew, [SiFive7VS], mx, IsWorstCase>;
+ foreach nf=3-8 in {
+ defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ // Does not chain so set latency high
+ let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
+ }
+ let Latency = 1, ResourceCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
+ }
+ }
+}
+foreach mx = SchedMxList in {
+ foreach nf=2-8 in {
+ foreach eew = [8, 16, 32, 64] in {
+ defvar Cycles = SiFive7GetCyclesSegmented<mx, eew, nf>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ // Does not chain so set latency high
+ let Latency = !add(3, Cycles), ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>;
+ }
+ let Latency = 1, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>;
+ }
+ }
+ }
+}
+
+// 11. Vector Integer Arithmetic Instructions
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 4, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VA], mx, IsWorstCase>;
+ }
+ // Mask results can't chain.
+ let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 4, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxList in {
+ foreach sew = SchedSEWSet<mx>.val in {
+ defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
+ !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
+ let Latency = Cycles, ResourceCycles = [Cycles] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VA], mx, sew, IsWorstCase>;
+ }
+ }
+}
+
+// Widening
+foreach mx = SchedMxListW in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
+ let Latency = 8, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+// Narrowing
+foreach mx = SchedMxListW in {
+ defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
+ let Latency = 8, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+
+// 12. Vector Fixed-Point Arithmetic Instructions
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 8, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+// Narrowing
+foreach mx = SchedMxListW in {
+ defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
+ let Latency = 8, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+
+// 13. Vector Floating-Point Instructions
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 8, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVFALUV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFALUF", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VA], mx, IsWorstCase>;
+ }
+ let Latency = 4, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMinMaxV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMinMaxF", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VA], mx, IsWorstCase>;
+ }
+ // Mask results can't chain.
+ let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, isF=1>.val in {
+ defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor<sew>.c,
+ !div(SiFive7GetCyclesOnePerElement<mx, sew>.c, 4));
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
+ let Latency = Cycles, ResourceCycles = [Cycles] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VA], mx, sew, IsWorstCase>;
+ }
+ }
+}
+
+// Widening
+foreach mx = SchedMxListW in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
+ let Latency = 8, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxListFW in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
+ let Latency = 8, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+// Narrowing
+foreach mx = SchedMxListW in {
+ defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListW>.c;
+ let Latency = 8, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxListFW in {
+ defvar Cycles = SiFive7GetCyclesNarrowing<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxListFW>.c;
+ let Latency = 8, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+
+// 14. Vector Reduction Operations
+foreach mx = SchedMxList in {
+ foreach sew = SchedSEWSet<mx>.val in {
+ defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
+ let Latency = Cycles, ResourceCycles = [Cycles] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VA],
+ mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIRedMinMaxV_From", [SiFive7VA],
+ mx, sew, IsWorstCase>;
+ }
+}
+
+foreach mx = SchedMxListWRed in {
+ foreach sew = SchedSEWSet<mx, 0, 1>.val in {
+ defvar Cycles = SiFive7GetReductionCycles<mx, sew>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListWRed>.c;
+ let Latency = Cycles, ResourceCycles = [Cycles] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VA],
+ mx, sew, IsWorstCase>;
+ }
+}
+
+foreach mx = SchedMxListF in {
+ foreach sew = SchedSEWSet<mx, 1>.val in {
+ defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListF, 1>.c;
+ let Latency = RedCycles, ResourceCycles = [RedCycles] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VA],
+ mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VA],
+ mx, sew, IsWorstCase>;
+ }
+ defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
+ let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VA],
+ mx, sew, IsWorstCase>;
+ }
+}
+
+foreach mx = SchedMxListFWRed in {
+ foreach sew = SchedSEWSet<mx, 1, 1>.val in {
+ defvar RedCycles = SiFive7GetReductionCycles<mx, sew>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxListFWRed, 1>.c;
+ let Latency = RedCycles, ResourceCycles = [RedCycles] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VA],
+ mx, sew, IsWorstCase>;
+ defvar OrdRedCycles = SiFive7GetOrderedReductionCycles<mx, sew>.c;
+ let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in
+ defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VA],
+ mx, sew, IsWorstCase>;
+ }
+}
+
+// 15. Vector Mask Instructions
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesVMask<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 4, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 4, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+
+// 16. Vector Permutation Instructions
+let Latency = 4, ResourceCycles = [1] in {
+ def : WriteRes<WriteVIMovVX, [SiFive7VA]>;
+ def : WriteRes<WriteVIMovXV, [SiFive7VA]>;
+ def : WriteRes<WriteVFMovVF, [SiFive7VA]>;
+ def : WriteRes<WriteVFMovFV, [SiFive7VA]>;
+}
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 8, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+
+foreach mx = SchedMxList in {
+ foreach sew = SchedSEWSet<mx>.val in {
+ defvar Cycles = SiFive7GetCyclesOnePerElement<mx, sew>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
+ let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in {
+ defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VA], mx, sew, IsWorstCase>;
+ defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VA], mx, sew, IsWorstCase>;
+ }
+ }
+}
+
+foreach mx = SchedMxList in {
+ defvar Cycles = SiFive7GetCyclesDefault<mx>.c;
+ defvar IsWorstCase = SiFive7IsWorstCaseMX<mx, SchedMxList>.c;
+ let Latency = 4, ResourceCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVISlideX", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVISlideI", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VA], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VA], mx, IsWorstCase>;
+ }
+}
+
+// VMov*V is LMUL Aware
+let Latency = 4, ResourceCycles = [2] in
+ def : WriteRes<WriteVMov1V, [SiFive7VA]>;
+let Latency = 4, ResourceCycles = [4] in
+ def : WriteRes<WriteVMov2V, [SiFive7VA]>;
+let Latency = 4, ResourceCycles = [8] in
+ def : WriteRes<WriteVMov4V, [SiFive7VA]>;
+let Latency = 4, ResourceCycles = [16] in
+ def : WriteRes<WriteVMov8V, [SiFive7VA]>;
+
// Others
def : WriteRes<WriteCSR, [SiFive7PipeB]>;
def : WriteRes<WriteNop, []>;
+let Latency = 3 in
+ def : WriteRes<WriteRdVLENB, [SiFive7PipeB]>;
def : InstRW<[WriteIALU], (instrs COPY)>;
//===----------------------------------------------------------------------===//
+
// Bypass and advance
-def : ReadAdvance<ReadJmp, 0>;
-def : ReadAdvance<ReadJalr, 0>;
+def : SiFive7AnyToGPRBypass<ReadJmp>;
+def : SiFive7AnyToGPRBypass<ReadJalr>;
def : ReadAdvance<ReadCSR, 0>;
def : ReadAdvance<ReadStoreData, 0>;
def : ReadAdvance<ReadMemBase, 0>;
-def : ReadAdvance<ReadIALU, 0>;
-def : ReadAdvance<ReadIALU32, 0>;
-def : ReadAdvance<ReadShiftImm, 0>;
-def : ReadAdvance<ReadShiftImm32, 0>;
-def : ReadAdvance<ReadShiftReg, 0>;
-def : ReadAdvance<ReadShiftReg32, 0>;
+def : SiFive7AnyToGPRBypass<ReadIALU>;
+def : SiFive7AnyToGPRBypass<ReadIALU32>;
+def : SiFive7AnyToGPRBypass<ReadShiftImm>;
+def : SiFive7AnyToGPRBypass<ReadShiftImm32>;
+def : SiFive7AnyToGPRBypass<ReadShiftReg>;
+def : SiFive7AnyToGPRBypass<ReadShiftReg32>;
def : ReadAdvance<ReadIDiv, 0>;
def : ReadAdvance<ReadIDiv32, 0>;
def : ReadAdvance<ReadIMul, 0>;
@@ -195,49 +917,249 @@ def : ReadAdvance<ReadAtomicSTW, 0>;
def : ReadAdvance<ReadAtomicSTD, 0>;
def : ReadAdvance<ReadFStoreData, 0>;
def : ReadAdvance<ReadFMemBase, 0>;
+def : ReadAdvance<ReadFAdd16, 0>;
def : ReadAdvance<ReadFAdd32, 0>;
def : ReadAdvance<ReadFAdd64, 0>;
+def : ReadAdvance<ReadFMul16, 0>;
+def : ReadAdvance<ReadFMA16, 0>;
def : ReadAdvance<ReadFMul32, 0>;
def : ReadAdvance<ReadFMul64, 0>;
def : ReadAdvance<ReadFMA32, 0>;
def : ReadAdvance<ReadFMA64, 0>;
+def : ReadAdvance<ReadFDiv16, 0>;
def : ReadAdvance<ReadFDiv32, 0>;
def : ReadAdvance<ReadFDiv64, 0>;
+def : ReadAdvance<ReadFSqrt16, 0>;
def : ReadAdvance<ReadFSqrt32, 0>;
def : ReadAdvance<ReadFSqrt64, 0>;
+def : ReadAdvance<ReadFCmp16, 0>;
def : ReadAdvance<ReadFCmp32, 0>;
def : ReadAdvance<ReadFCmp64, 0>;
+def : ReadAdvance<ReadFSGNJ16, 0>;
def : ReadAdvance<ReadFSGNJ32, 0>;
def : ReadAdvance<ReadFSGNJ64, 0>;
+def : ReadAdvance<ReadFMinMax16, 0>;
def : ReadAdvance<ReadFMinMax32, 0>;
def : ReadAdvance<ReadFMinMax64, 0>;
+def : ReadAdvance<ReadFCvtF16ToI32, 0>;
+def : ReadAdvance<ReadFCvtF16ToI64, 0>;
def : ReadAdvance<ReadFCvtF32ToI32, 0>;
def : ReadAdvance<ReadFCvtF32ToI64, 0>;
def : ReadAdvance<ReadFCvtF64ToI32, 0>;
def : ReadAdvance<ReadFCvtF64ToI64, 0>;
+def : ReadAdvance<ReadFCvtI32ToF16, 0>;
def : ReadAdvance<ReadFCvtI32ToF32, 0>;
def : ReadAdvance<ReadFCvtI32ToF64, 0>;
+def : ReadAdvance<ReadFCvtI64ToF16, 0>;
def : ReadAdvance<ReadFCvtI64ToF32, 0>;
def : ReadAdvance<ReadFCvtI64ToF64, 0>;
def : ReadAdvance<ReadFCvtF32ToF64, 0>;
def : ReadAdvance<ReadFCvtF64ToF32, 0>;
+def : ReadAdvance<ReadFCvtF16ToF32, 0>;
+def : ReadAdvance<ReadFCvtF32ToF16, 0>;
+def : ReadAdvance<ReadFCvtF16ToF64, 0>;
+def : ReadAdvance<ReadFCvtF64ToF16, 0>;
+def : ReadAdvance<ReadFMovF16ToI16, 0>;
+def : ReadAdvance<ReadFMovI16ToF16, 0>;
def : ReadAdvance<ReadFMovF32ToI32, 0>;
def : ReadAdvance<ReadFMovI32ToF32, 0>;
def : ReadAdvance<ReadFMovF64ToI64, 0>;
def : ReadAdvance<ReadFMovI64ToF64, 0>;
+def : ReadAdvance<ReadFClass16, 0>;
def : ReadAdvance<ReadFClass32, 0>;
def : ReadAdvance<ReadFClass64, 0>;
-def : ReadAdvance<ReadSFB, 0>;
+def : SiFive7AnyToGPRBypass<ReadSFBJmp, 0>;
+def : SiFive7AnyToGPRBypass<ReadSFBALU, 0>;
+
+// Bitmanip
+def : SiFive7AnyToGPRBypass<ReadRotateImm>;
+def : SiFive7AnyToGPRBypass<ReadRotateImm32>;
+def : SiFive7AnyToGPRBypass<ReadRotateReg>;
+def : SiFive7AnyToGPRBypass<ReadRotateReg32>;
+def : SiFive7AnyToGPRBypass<ReadCLZ>;
+def : SiFive7AnyToGPRBypass<ReadCLZ32>;
+def : SiFive7AnyToGPRBypass<ReadCTZ>;
+def : SiFive7AnyToGPRBypass<ReadCTZ32>;
+def : ReadAdvance<ReadCPOP, 0>;
+def : ReadAdvance<ReadCPOP32, 0>;
+def : SiFive7AnyToGPRBypass<ReadORCB>;
+def : SiFive7AnyToGPRBypass<ReadREV8>;
+def : SiFive7AnyToGPRBypass<ReadSHXADD>;
+def : SiFive7AnyToGPRBypass<ReadSHXADD32>;
+
+// 6. Configuration-Setting Instructions
+def : ReadAdvance<ReadVSETVLI, 2>;
+def : ReadAdvance<ReadVSETVL, 2>;
+
+// 7. Vector Loads and Stores
+def : ReadAdvance<ReadVLDX, 0>;
+def : ReadAdvance<ReadVSTX, 0>;
+defm "" : LMULReadAdvance<"ReadVSTEV", 0>;
+defm "" : LMULReadAdvance<"ReadVSTM", 0>;
+def : ReadAdvance<ReadVLDSX, 0>;
+def : ReadAdvance<ReadVSTSX, 0>;
+defm "" : LMULReadAdvance<"ReadVSTS8V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTS16V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTS32V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTS64V", 0>;
+defm "" : LMULReadAdvance<"ReadVLDUXV", 0>;
+defm "" : LMULReadAdvance<"ReadVLDOXV", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX8", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX16", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX32", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX64", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUXV", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX8", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX16", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX32", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX64", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOXV", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>;
+defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>;
+// LMUL Aware
+def : ReadAdvance<ReadVST1R, 0>;
+def : ReadAdvance<ReadVST2R, 0>;
+def : ReadAdvance<ReadVST4R, 0>;
+def : ReadAdvance<ReadVST8R, 0>;
+
+// 12. Vector Integer Arithmetic Instructions
+defm : LMULReadAdvance<"ReadVIALUV", 0>;
+defm : LMULReadAdvance<"ReadVIALUX", 0>;
+defm : LMULReadAdvanceW<"ReadVIWALUV", 0>;
+defm : LMULReadAdvanceW<"ReadVIWALUX", 0>;
+defm : LMULReadAdvance<"ReadVExtV", 0>;
+defm : LMULReadAdvance<"ReadVICALUV", 0>;
+defm : LMULReadAdvance<"ReadVICALUX", 0>;
+defm : LMULReadAdvance<"ReadVShiftV", 0>;
+defm : LMULReadAdvance<"ReadVShiftX", 0>;
+defm : LMULReadAdvanceW<"ReadVNShiftV", 0>;
+defm : LMULReadAdvanceW<"ReadVNShiftX", 0>;
+defm : LMULReadAdvance<"ReadVICmpV", 0>;
+defm : LMULReadAdvance<"ReadVICmpX", 0>;
+defm : LMULReadAdvance<"ReadVIMinMaxV", 0>;
+defm : LMULReadAdvance<"ReadVIMinMaxX", 0>;
+defm : LMULReadAdvance<"ReadVIMulV", 0>;
+defm : LMULReadAdvance<"ReadVIMulX", 0>;
+defm : LMULSEWReadAdvance<"ReadVIDivV", 0>;
+defm : LMULSEWReadAdvance<"ReadVIDivX", 0>;
+defm : LMULReadAdvanceW<"ReadVIWMulV", 0>;
+defm : LMULReadAdvanceW<"ReadVIWMulX", 0>;
+defm : LMULReadAdvance<"ReadVIMulAddV", 0>;
+defm : LMULReadAdvance<"ReadVIMulAddX", 0>;
+defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>;
+defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>;
+defm : LMULReadAdvance<"ReadVIMergeV", 0>;
+defm : LMULReadAdvance<"ReadVIMergeX", 0>;
+defm : LMULReadAdvance<"ReadVIMovV", 0>;
+defm : LMULReadAdvance<"ReadVIMovX", 0>;
+
+// 13. Vector Fixed-Point Arithmetic Instructions
+defm "" : LMULReadAdvance<"ReadVSALUV", 0>;
+defm "" : LMULReadAdvance<"ReadVSALUX", 0>;
+defm "" : LMULReadAdvance<"ReadVAALUV", 0>;
+defm "" : LMULReadAdvance<"ReadVAALUX", 0>;
+defm "" : LMULReadAdvance<"ReadVSMulV", 0>;
+defm "" : LMULReadAdvance<"ReadVSMulX", 0>;
+defm "" : LMULReadAdvance<"ReadVSShiftV", 0>;
+defm "" : LMULReadAdvance<"ReadVSShiftX", 0>;
+defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>;
+defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>;
+
+// 14. Vector Floating-Point Instructions
+defm "" : LMULReadAdvance<"ReadVFALUV", 0>;
+defm "" : LMULReadAdvance<"ReadVFALUF", 0>;
+defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>;
+defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>;
+defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
+defm "" : LMULReadAdvance<"ReadVFMulF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
+defm "" : LMULReadAdvanceFW<"ReadVFWMulV", 0>;
+defm "" : LMULReadAdvanceFW<"ReadVFWMulF", 0>;
+defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
+defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
+defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
+defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
+defm "" : LMULReadAdvance<"ReadVFRecpV", 0>;
+defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
+defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
+defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
+defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>;
+defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
+defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
+defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
+defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
+defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
+defm "" : LMULReadAdvance<"ReadVFMovF", 0>;
+defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>;
+defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>;
+defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>;
+defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>;
+defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>;
+defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>;
+defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>;
+defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>;
+
+// 15. Vector Reduction Operations
+def : ReadAdvance<ReadVIRedV, 0>;
+def : ReadAdvance<ReadVIRedV0, 0>;
+def : ReadAdvance<ReadVIWRedV, 0>;
+def : ReadAdvance<ReadVIWRedV0, 0>;
+def : ReadAdvance<ReadVFRedV, 0>;
+def : ReadAdvance<ReadVFRedV0, 0>;
+def : ReadAdvance<ReadVFRedOV, 0>;
+def : ReadAdvance<ReadVFRedOV0, 0>;
+def : ReadAdvance<ReadVFWRedV, 0>;
+def : ReadAdvance<ReadVFWRedV0, 0>;
+def : ReadAdvance<ReadVFWRedOV, 0>;
+def : ReadAdvance<ReadVFWRedOV0, 0>;
+
+// 16. Vector Mask Instructions
+defm "" : LMULReadAdvance<"ReadVMALUV", 0>;
+defm "" : LMULReadAdvance<"ReadVMPopV", 0>;
+defm "" : LMULReadAdvance<"ReadVMFFSV", 0>;
+defm "" : LMULReadAdvance<"ReadVMSFSV", 0>;
+defm "" : LMULReadAdvance<"ReadVMIotV", 0>;
+
+// 17. Vector Permutation Instructions
+def : ReadAdvance<ReadVIMovVX, 0>;
+def : ReadAdvance<ReadVIMovXV, 0>;
+def : ReadAdvance<ReadVIMovXX, 0>;
+def : ReadAdvance<ReadVFMovVF, 0>;
+def : ReadAdvance<ReadVFMovFV, 0>;
+def : ReadAdvance<ReadVFMovFX, 0>;
+defm "" : LMULReadAdvance<"ReadVISlideV", 0>;
+defm "" : LMULReadAdvance<"ReadVISlideX", 0>;
+defm "" : LMULReadAdvance<"ReadVFSlideV", 0>;
+defm "" : LMULReadAdvance<"ReadVFSlideF", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>;
+defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>;
+defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>;
+defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>;
+// LMUL Aware
+def : ReadAdvance<ReadVMov1V, 0>;
+def : ReadAdvance<ReadVMov2V, 0>;
+def : ReadAdvance<ReadVMov4V, 0>;
+def : ReadAdvance<ReadVMov8V, 0>;
+
+// Others
+def : ReadAdvance<ReadVMask, 0>;
//===----------------------------------------------------------------------===//
// Unsupported extensions
-defm : UnsupportedSchedV;
-defm : UnsupportedSchedZba;
-defm : UnsupportedSchedZbb;
defm : UnsupportedSchedZbc;
defm : UnsupportedSchedZbs;
defm : UnsupportedSchedZbkb;
defm : UnsupportedSchedZbkx;
-defm : UnsupportedSchedZfh;
+defm : UnsupportedSchedZfa;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
index 1c6b44beea7a..41eefa0c67d9 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSyntacoreSCR1.td
@@ -41,7 +41,6 @@ def SCR1_CFU : ProcResource<1>;
def : WriteRes<WriteJmp, [SCR1_CFU]>;
def : WriteRes<WriteJal, [SCR1_CFU]>;
def : WriteRes<WriteJalr, [SCR1_CFU]>;
-def : WriteRes<WriteJmpReg, [SCR1_CFU]>;
// Integer arithmetic and logic
def : WriteRes<WriteIALU32, [SCR1_ALU]>;
@@ -192,7 +191,8 @@ def : ReadAdvance<ReadFMovF64ToI64, 0>;
def : ReadAdvance<ReadFMovI64ToF64, 0>;
def : ReadAdvance<ReadFClass32, 0>;
def : ReadAdvance<ReadFClass64, 0>;
-def : ReadAdvance<ReadSFB, 0>;
+def : ReadAdvance<ReadSFBJmp, 0>;
+def : ReadAdvance<ReadSFBALU, 0>;
//===----------------------------------------------------------------------===//
// Unsupported extensions
@@ -203,5 +203,6 @@ defm : UnsupportedSchedZbc;
defm : UnsupportedSchedZbs;
defm : UnsupportedSchedZbkb;
defm : UnsupportedSchedZbkx;
+defm : UnsupportedSchedZfa;
defm : UnsupportedSchedZfh;
}
diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td
index 41c74b261c5a..af318ea5bf68 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedule.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedule.td
@@ -1,4 +1,4 @@
-//===-- RISCVSchedule.td - RISCV Scheduling Definitions ----*- tablegen -*-===//
+//===-- RISCVSchedule.td - RISC-V Scheduling Definitions ---*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -20,7 +20,6 @@ def WriteIMul32 : SchedWrite; // 32-bit multiply on RV64I
def WriteJmp : SchedWrite; // Jump
def WriteJal : SchedWrite; // Jump and link
def WriteJalr : SchedWrite; // Jump and link register
-def WriteJmpReg : SchedWrite; // Jump register
def WriteNop : SchedWrite;
def WriteLDB : SchedWrite; // Load byte
def WriteLDH : SchedWrite; // Load half-word
@@ -77,6 +76,11 @@ def WriteFCvtF32ToF16 : SchedWrite;
def WriteFCvtF16ToF64 : SchedWrite;
def WriteFCvtF64ToF16 : SchedWrite;
+// Zfa found instructions.
+def WriteFRoundF32 : SchedWrite;
+def WriteFRoundF64 : SchedWrite;
+def WriteFRoundF16 : SchedWrite;
+
def WriteFClass16 : SchedWrite; // 16-bit floating point classify
def WriteFClass32 : SchedWrite; // 32-bit floating point classify
def WriteFClass64 : SchedWrite; // 64-bit floating point classify
@@ -97,6 +101,10 @@ def WriteFMovI32ToF32 : SchedWrite;
def WriteFMovF64ToI64 : SchedWrite; // RV64I only
def WriteFMovI64ToF64 : SchedWrite; // RV64I only
+def WriteFLI16 : SchedWrite; // Floating point constant load
+def WriteFLI32 : SchedWrite; // Floating point constant load
+def WriteFLI64 : SchedWrite; // Floating point constant load
+
def WriteFLD16 : SchedWrite; // Floating point sp load
def WriteFLD32 : SchedWrite; // Floating point sp load
def WriteFLD64 : SchedWrite; // Floating point dp load
@@ -106,7 +114,8 @@ def WriteFST64 : SchedWrite; // Floating point dp store
// short forward branch for Bullet
def WriteSFB : SchedWrite;
-def ReadSFB : SchedRead;
+def ReadSFBJmp : SchedRead;
+def ReadSFBALU : SchedRead;
/// Define scheduler resources associated with use operands.
def ReadJmp : SchedRead;
@@ -182,6 +191,9 @@ def ReadFCvtF16ToF32 : SchedRead;
def ReadFCvtF32ToF16 : SchedRead;
def ReadFCvtF16ToF64 : SchedRead;
def ReadFCvtF64ToF16 : SchedRead;
+def ReadFRoundF16 : SchedRead;
+def ReadFRoundF32 : SchedRead;
+def ReadFRoundF64 : SchedRead;
def ReadFClass16 : SchedRead;
def ReadFClass32 : SchedRead;
def ReadFClass64 : SchedRead;
@@ -236,7 +248,23 @@ multiclass UnsupportedSchedSFB {
let Unsupported = true in {
def : WriteRes<WriteSFB, []>;
-def : ReadAdvance<ReadSFB, 0>;
+def : ReadAdvance<ReadSFBJmp, 0>;
+def : ReadAdvance<ReadSFBALU, 0>;
+} // Unsupported = true
+}
+
+multiclass UnsupportedSchedZfa {
+let Unsupported = true in {
+def : WriteRes<WriteFRoundF16, []>;
+def : WriteRes<WriteFRoundF32, []>;
+def : WriteRes<WriteFRoundF64, []>;
+def : WriteRes<WriteFLI16, []>;
+def : WriteRes<WriteFLI32, []>;
+def : WriteRes<WriteFLI64, []>;
+
+def : ReadAdvance<ReadFRoundF32, 0>;
+def : ReadAdvance<ReadFRoundF64, 0>;
+def : ReadAdvance<ReadFRoundF16, 0>;
} // Unsupported = true
}
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td
index 62054b0a8e6e..676383c5a636 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleV.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td
@@ -1,4 +1,4 @@
-//===-- RISCVScheduleV.td - RISCV Scheduling Definitions V -*- tablegen -*-===//
+//===- RISCVScheduleV.td - RISC-V Scheduling Definitions V -*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -9,101 +9,198 @@
//===----------------------------------------------------------------------===//
/// Define scheduler resources associated with def operands.
-defvar UpperBoundLMUL = "UpperBound";
-defvar SchedMxList = ["UpperBound", "M1", "M2", "M4", "M8", "MF2", "MF4", "MF8"];
+defvar SchedMxList = ["MF8", "MF4", "MF2", "M1", "M2", "M4", "M8"];
// Used for widening and narrowing instructions as it doesn't contain M8.
-defvar SchedMxListW = ["UpperBound", "MF8", "MF4", "MF2", "M1", "M2", "M4"];
-defvar SchedMxListFW = ["UpperBound", "MF4", "MF2", "M1", "M2", "M4"];
+defvar SchedMxListW = !listremove(SchedMxList, ["M8"]);
+// Used for widening reductions, which does contain M8.
+defvar SchedMxListWRed = SchedMxList;
+defvar SchedMxListFW = !listremove(SchedMxList, ["M8", "MF8"]);
+// Used for floating-point as it doesn't contain MF8.
+defvar SchedMxListF = !listremove(SchedMxList, ["MF8"]);
+// Used for widening floating-point Reduction as it doesn't contain MF8.
+defvar SchedMxListFWRed = SchedMxListF;
-// Creates SchedWrite for each (name, LMUL) pair for LMUL in SchedMxList
-multiclass LMULSchedWrites<string name> {
- foreach mx = SchedMxList in {
- def name # "_" # mx : SchedWrite;
- }
+class SchedSEWSet<string mx, bit isF = 0, bit isWidening = 0> {
+ assert !or(!not(isF), !ne(mx, "MF8")), "LMUL shouldn't be MF8 for floating-point";
+ defvar t = !cond(!eq(mx, "M1"): [8, 16, 32, 64],
+ !eq(mx, "M2"): [8, 16, 32, 64],
+ !eq(mx, "M4"): [8, 16, 32, 64],
+ !eq(mx, "M8"): [8, 16, 32, 64],
+ !eq(mx, "MF2"): [8, 16, 32],
+ !eq(mx, "MF4"): [8, 16],
+ !eq(mx, "MF8"): [8]);
+ // For floating-point instructions, SEW won't be 8.
+ defvar remove8 = !if(isF, !listremove(t, [8]), t);
+ // For widening instructions, SEW will not be 64.
+ defvar remove64 = !if(isWidening, !listremove(remove8, [64]), remove8);
+ list<int> val = remove64;
}
-// Creates SchedWrite for each (name, LMUL) pair for LMUL in SchedMxListW
-multiclass LMULSchedWritesW<string name> {
- foreach mx = SchedMxListW in {
- def name # "_" # mx : SchedWrite;
- }
+// Helper function to get the largest LMUL from MxList
+// Precondition: MxList is sorted in ascending LMUL order.
+class LargestLMUL<list<string> MxList> {
+ // MX list is sorted from smallest to largest
+ string r = !foldl(!head(MxList), MxList, last, curr, curr);
}
-
-// Creates SchedWrite for each (name, LMUL) pair for LMUL in SchedMxListFW
-multiclass LMULSchedWritesFW<string name> {
- foreach mx = SchedMxListFW in {
- def name # "_" # mx : SchedWrite;
- }
+// Helper function to get the smallest SEW that can be used with LMUL mx
+// Precondition: MxList is sorted in ascending LMUL order and SchedSEWSet<mx>
+class SmallestSEW<string mx, bit isF = 0> {
+ int r = !head(SchedSEWSet<mx, isF>.val);
}
-// Creates SchedRead for each (name, LMUL) pair for LMUL in SchedMxList
-multiclass LMULSchedReads<string name> {
- foreach mx = SchedMxList in {
- def name # "_" # mx : SchedRead;
- }
+// Creates WriteRes for (name, mx, resources) tuple
+multiclass LMULWriteResMX<string name, list<ProcResourceKind> resources,
+ string mx, bit IsWorstCase> {
+ def : WriteRes<!cast<SchedWrite>(name # "_" # mx), resources>;
+ if IsWorstCase then
+ def : WriteRes<!cast<SchedWrite>(name # "_WorstCase"), resources>;
+}
+multiclass LMULSEWWriteResMXSEW<string name, list<ProcResourceKind> resources,
+ string mx, int sew, bit IsWorstCase> {
+ def : WriteRes<!cast<SchedWrite>(name # "_" # mx # "_E" # sew), resources>;
+ if IsWorstCase then
+ def : WriteRes<!cast<SchedWrite>(name # "_WorstCase"), resources>;
}
-// Creates SchedRead for each (name, LMUL) pair for LMUL in SchedMxListW
-multiclass LMULSchedReadsW<string name> {
- foreach mx = SchedMxListW in {
- def name # "_" # mx : SchedRead;
+// Define multiclasses to define SchedWrite, SchedRead, WriteRes, and
+// ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the
+// SchedMxList variants above. Each multiclass is responsible for defining
+// a record that represents the WorseCase behavior for name.
+multiclass LMULSchedWritesImpl<string name, list<string> MxList> {
+ def name # "_WorstCase" : SchedWrite;
+ foreach mx = MxList in {
+ def name # "_" # mx : SchedWrite;
}
}
-
-// Creates SchedRead for each (name, LMUL) pair for LMUL in SchedMxListFW
-multiclass LMULSchedReadsFW<string name> {
- foreach mx = SchedMxListFW in {
+multiclass LMULSchedReadsImpl<string name, list<string> MxList> {
+ def name # "_WorstCase" : SchedRead;
+ foreach mx = MxList in {
def name # "_" # mx : SchedRead;
}
}
-
-// Creates WriteRes for each (name, LMUL, resources) tuple for LMUL
-// in SchedMxList
-multiclass LMULWriteRes<string name, list<ProcResourceKind> resources> {
+multiclass LMULWriteResImpl<string name, list<ProcResourceKind> resources> {
+ if !exists<SchedWrite>(name # "_WorstCase") then
+ def : WriteRes<!cast<SchedWrite>(name # "_WorstCase"), resources>;
foreach mx = SchedMxList in {
- def : WriteRes<!cast<SchedWrite>(name # "_" # mx), resources>;
+ if !exists<SchedWrite>(name # "_" # mx) then
+ def : WriteRes<!cast<SchedWrite>(name # "_" # mx), resources>;
}
}
-
-// Creates WriteRes for each (name, LMUL, resources) tuple for LMUL
-// in SchedMxListW
-multiclass LMULWriteResW<string name, list<ProcResourceKind> resources> {
- foreach mx = SchedMxListW in {
- def : WriteRes<!cast<SchedWrite>(name # "_" # mx), resources>;
+multiclass LMULReadAdvanceImpl<string name, int val,
+ list<SchedWrite> writes = []> {
+ if !exists<SchedRead>(name # "_WorstCase") then
+ def : ReadAdvance<!cast<SchedRead>(name # "_WorstCase"), val, writes>;
+ foreach mx = SchedMxList in {
+ if !exists<SchedRead>(name # "_" # mx) then
+ def : ReadAdvance<!cast<SchedRead>(name # "_" # mx), val, writes>;
}
}
-// Creates WriteRes for each (name, LMUL, resources) tuple for LMUL
-// in SchedMxListFW
-multiclass LMULWriteResFW<string name, list<ProcResourceKind> resources> {
- foreach mx = SchedMxListFW in {
- def : WriteRes<!cast<SchedWrite>(name # "_" # mx), resources>;
+// Define multiclasses to define SchedWrite, SchedRead, WriteRes, and
+// ReadAdvance for each (name, LMUL, SEW) tuple for each LMUL in each of the
+// SchedMxList variants above. Each multiclass is responsible for defining
+// a record that represents the WorseCase behavior for name.
+multiclass LMULSEWSchedWritesImpl<string name, list<string> MxList, bit isF = 0,
+ bit isWidening = 0> {
+ def name # "_WorstCase" : SchedWrite;
+ foreach mx = MxList in {
+ foreach sew = SchedSEWSet<mx, isF, isWidening>.val in
+ def name # "_" # mx # "_E" # sew : SchedWrite;
}
}
-
-// Creates ReadAdvance for each (name, LMUL, val) tuple for LMUL
-// in SchedMxList
-multiclass LMULReadAdvance<string name, int val, list<SchedWrite> writes = []> {
- foreach mx = SchedMxList in {
- def : ReadAdvance<!cast<SchedRead>(name # "_" # mx), val, writes>;
+multiclass LMULSEWSchedReadsImpl<string name, list<string> MxList, bit isF = 0,
+ bit isWidening = 0> {
+ def name # "_WorstCase" : SchedRead;
+ foreach mx = MxList in {
+ foreach sew = SchedSEWSet<mx, isF, isWidening>.val in
+ def name # "_" # mx # "_E" # sew : SchedRead;
}
}
-
-// Creates ReadAdvance for each (name, LMUL, val) tuple for LMUL
-// in SchedMxListW
-multiclass LMULReadAdvanceW<string name, int val, list<SchedWrite> writes = []> {
- foreach mx = SchedMxListW in {
- def : ReadAdvance<!cast<SchedRead>(name # "_" # mx), val, writes>;
+multiclass LMULSEWWriteResImpl<string name, list<ProcResourceKind> resources,
+ list<string> MxList, bit isF = 0,
+ bit isWidening = 0> {
+ if !exists<SchedWrite>(name # "_WorstCase") then
+ def : WriteRes<!cast<SchedWrite>(name # "_WorstCase"), resources>;
+ foreach mx = MxList in {
+ foreach sew = SchedSEWSet<mx, isF, isWidening>.val in
+ if !exists<SchedWrite>(name # "_" # mx # "_E" # sew) then
+ def : WriteRes<!cast<SchedWrite>(name # "_" # mx # "_E" # sew), resources>;
}
}
-
-// Creates ReadAdvance for each (name, LMUL, val) tuple for LMUL
-// in SchedMxListFW
-multiclass LMULReadAdvanceFW<string name, int val, list<SchedWrite> writes = []> {
- foreach mx = SchedMxListFW in {
- def : ReadAdvance<!cast<SchedRead>(name # "_" # mx), val, writes>;
+multiclass LMULSEWReadAdvanceImpl<string name, int val, list<SchedWrite> writes = [],
+ list<string> MxList, bit isF = 0,
+ bit isWidening = 0> {
+ if !exists<SchedRead>(name # "_WorstCase") then
+ def : ReadAdvance<!cast<SchedRead>(name # "_WorstCase"), val, writes>;
+ foreach mx = MxList in {
+ foreach sew = SchedSEWSet<mx, isF, isWidening>.val in
+ if !exists<SchedRead>(name # "_" # mx # "_E" # sew) then
+ def : ReadAdvance<!cast<SchedRead>(name # "_" # mx # "_E" # sew), val, writes>;
}
}
+// Define classes to define list containing all SchedWrites for each (name, LMUL)
+// pair for each LMUL in each of the SchedMxList variants above and name in
+// argument `names`. These classes can be used to construct a list of existing
+// definitions of writes corresponding to each (name, LMUL) pair, that are needed
+// by the ReadAdvance. For example:
+// ```
+// defm "" : LMULReadAdvance<"ReadVIALUX", 1,
+// LMULSchedWriteList<["WriteVIMovVX"]>.value>;
+// ```
+class LMULSchedWriteListImpl<list<string> names, list<string> MxList> {
+ list<SchedWrite> value = !foldl([]<SchedWrite>,
+ !foreach(name, names,
+ !foreach(mx, MxList, !cast<SchedWrite>(name # "_" # mx))),
+ all, writes, !listconcat(all, writes));
+}
+
+multiclass LMULSchedWrites<string name> : LMULSchedWritesImpl<name, SchedMxList>;
+multiclass LMULSchedReads<string name> : LMULSchedReadsImpl<name, SchedMxList>;
+multiclass LMULWriteRes<string name, list<ProcResourceKind> resources>
+ : LMULWriteResImpl<name, resources>;
+multiclass LMULReadAdvance<string name, int val, list<SchedWrite> writes = []>
+ : LMULReadAdvanceImpl<name, val, writes>;
+class LMULSchedWriteList<list<string> names> : LMULSchedWriteListImpl<names, SchedMxList>;
+
+multiclass LMULSEWSchedWrites<string name> : LMULSEWSchedWritesImpl<name, SchedMxList>;
+multiclass LMULSEWSchedReads<string name> : LMULSEWSchedReadsImpl<name, SchedMxList>;
+multiclass LMULSEWWriteRes<string name, list<ProcResourceKind> resources>
+ : LMULSEWWriteResImpl<name, resources, SchedMxList>;
+multiclass LMULSEWReadAdvance<string name, int val, list<SchedWrite> writes = []>
+ : LMULSEWReadAdvanceImpl<name, val, writes, SchedMxList>;
+
+multiclass LMULSEWSchedWritesWRed<string name>
+ : LMULSEWSchedWritesImpl<name, SchedMxListWRed, isWidening=1>;
+multiclass LMULSEWWriteResWRed<string name, list<ProcResourceKind> resources>
+ : LMULSEWWriteResImpl<name, resources, SchedMxListWRed, isWidening=1>;
+
+multiclass LMULSEWSchedWritesFWRed<string name>
+ : LMULSEWSchedWritesImpl<name, SchedMxListFWRed, isF=1, isWidening=1>;
+multiclass LMULSEWWriteResFWRed<string name, list<ProcResourceKind> resources>
+ : LMULSEWWriteResImpl<name, resources, SchedMxListFWRed, isF=1, isWidening=1>;
+
+multiclass LMULSEWSchedWritesF<string name> : LMULSEWSchedWritesImpl<name, SchedMxListF, isF=1>;
+multiclass LMULSEWSchedReadsF<string name> : LMULSEWSchedReadsImpl<name, SchedMxListF, isF=1>;
+multiclass LMULSEWWriteResF<string name, list<ProcResourceKind> resources>
+ : LMULSEWWriteResImpl<name, resources, SchedMxListF, isF=1>;
+multiclass LMULSEWReadAdvanceF<string name, int val, list<SchedWrite> writes = []>
+ : LMULSEWReadAdvanceImpl<name, val, writes, SchedMxListF, isF=1>;
+
+multiclass LMULSchedWritesW<string name> : LMULSchedWritesImpl<name, SchedMxListW>;
+multiclass LMULSchedReadsW<string name> : LMULSchedReadsImpl<name, SchedMxListW>;
+multiclass LMULWriteResW<string name, list<ProcResourceKind> resources>
+ : LMULWriteResImpl<name, resources>;
+multiclass LMULReadAdvanceW<string name, int val, list<SchedWrite> writes = []>
+ : LMULReadAdvanceImpl<name, val, writes>;
+class LMULSchedWriteListW<list<string> names> : LMULSchedWriteListImpl<names, SchedMxListW>;
+
+multiclass LMULSchedWritesFW<string name> : LMULSchedWritesImpl<name, SchedMxListFW>;
+multiclass LMULSchedReadsFW<string name> : LMULSchedReadsImpl<name, SchedMxListFW>;
+multiclass LMULWriteResFW<string name, list<ProcResourceKind> resources>
+ : LMULWriteResImpl<name, resources>;
+multiclass LMULReadAdvanceFW<string name, int val, list<SchedWrite> writes = []>
+ : LMULReadAdvanceImpl<name, val, writes>;
+class LMULSchedWriteListFW<list<string> names> : LMULSchedWriteListImpl<names, SchedMxListFW>;
// 3.6 Vector Byte Length vlenb
def WriteRdVLENB : SchedWrite;
@@ -197,16 +294,18 @@ defm "" : LMULSchedWritesW<"WriteVNShiftV">;
defm "" : LMULSchedWritesW<"WriteVNShiftX">;
defm "" : LMULSchedWritesW<"WriteVNShiftI">;
// 11.8. Vector Integer Comparison Instructions
-// 11.9. Vector Integer Min/Max Instructions
defm "" : LMULSchedWrites<"WriteVICmpV">;
defm "" : LMULSchedWrites<"WriteVICmpX">;
defm "" : LMULSchedWrites<"WriteVICmpI">;
+// 11.9. Vector Integer Min/Max Instructions
+defm "" : LMULSchedWrites<"WriteVIMinMaxV">;
+defm "" : LMULSchedWrites<"WriteVIMinMaxX">;
// 11.10. Vector Single-Width Integer Multiply Instructions
defm "" : LMULSchedWrites<"WriteVIMulV">;
defm "" : LMULSchedWrites<"WriteVIMulX">;
// 11.11. Vector Integer Divide Instructions
-defm "" : LMULSchedWrites<"WriteVIDivV">;
-defm "" : LMULSchedWrites<"WriteVIDivX">;
+defm "" : LMULSEWSchedWrites<"WriteVIDivV">;
+defm "" : LMULSEWSchedWrites<"WriteVIDivX">;
// 11.12. Vector Widening Integer Multiply Instructions
defm "" : LMULSchedWritesW<"WriteVIWMulV">;
defm "" : LMULSchedWritesW<"WriteVIWMulX">;
@@ -255,8 +354,8 @@ defm "" : LMULSchedWritesFW<"WriteVFWALUF">;
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
defm "" : LMULSchedWrites<"WriteVFMulV">;
defm "" : LMULSchedWrites<"WriteVFMulF">;
-defm "" : LMULSchedWrites<"WriteVFDivV">;
-defm "" : LMULSchedWrites<"WriteVFDivF">;
+defm "" : LMULSEWSchedWritesF<"WriteVFDivV">;
+defm "" : LMULSEWSchedWritesF<"WriteVFDivF">;
// 13.5. Vector Widening Floating-Point Multiply
defm "" : LMULSchedWritesFW<"WriteVFWMulV">;
defm "" : LMULSchedWritesFW<"WriteVFWMulF">;
@@ -267,17 +366,19 @@ defm "" : LMULSchedWrites<"WriteVFMulAddF">;
defm "" : LMULSchedWritesFW<"WriteVFWMulAddV">;
defm "" : LMULSchedWritesFW<"WriteVFWMulAddF">;
// 13.8. Vector Floating-Point Square-Root Instruction
-defm "" : LMULSchedWrites<"WriteVFSqrtV">;
+defm "" : LMULSEWSchedWritesF<"WriteVFSqrtV">;
// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
defm "" : LMULSchedWrites<"WriteVFRecpV">;
// 13.11. Vector Floating-Point MIN/MAX Instructions
-// 13.13. Vector Floating-Point Compare Instructions
-defm "" : LMULSchedWrites<"WriteVFCmpV">;
-defm "" : LMULSchedWrites<"WriteVFCmpF">;
+defm "" : LMULSchedWrites<"WriteVFMinMaxV">;
+defm "" : LMULSchedWrites<"WriteVFMinMaxF">;
// 13.12. Vector Floating-Point Sign-Injection Instructions
defm "" : LMULSchedWrites<"WriteVFSgnjV">;
defm "" : LMULSchedWrites<"WriteVFSgnjF">;
+// 13.13. Vector Floating-Point Compare Instructions
+defm "" : LMULSchedWrites<"WriteVFCmpV">;
+defm "" : LMULSchedWrites<"WriteVFCmpF">;
// 13.14. Vector Floating-Point Classify Instruction
defm "" : LMULSchedWrites<"WriteVFClassV">;
// 13.15. Vector Floating-Point Merge Instruction
@@ -297,16 +398,22 @@ defm "" : LMULSchedWritesW<"WriteVFNCvtFToIV">;
defm "" : LMULSchedWritesFW<"WriteVFNCvtFToFV">;
// 14. Vector Reduction Operations
+// The latency of reduction is determined by the size of the read resource.
+// The LMUL range of read resource(VS2) for reduction operantion is between
+// MF8 and M8. Use the _From suffix to indicate the number of the
+// LMUL from VS2.
// 14.1. Vector Single-Width Integer Reduction Instructions
-def WriteVIRedV : SchedWrite;
+defm "" : LMULSEWSchedWrites<"WriteVIRedV_From">;
+defm "" : LMULSEWSchedWrites<"WriteVIRedMinMaxV_From">;
// 14.2. Vector Widening Integer Reduction Instructions
-def WriteVIWRedV : SchedWrite;
+defm "" : LMULSEWSchedWritesWRed<"WriteVIWRedV_From">;
// 14.3. Vector Single-Width Floating-Point Reduction Instructions
-def WriteVFRedV : SchedWrite;
-def WriteVFRedOV : SchedWrite;
+defm "" : LMULSEWSchedWritesF<"WriteVFRedV_From">;
+defm "" : LMULSEWSchedWritesF<"WriteVFRedOV_From">;
+defm "" : LMULSEWSchedWritesF<"WriteVFRedMinMaxV_From">;
// 14.4. Vector Widening Floating-Point Reduction Instructions
-def WriteVFWRedV : SchedWrite;
-def WriteVFWRedOV : SchedWrite;
+defm "" : LMULSEWSchedWritesFWRed<"WriteVFWRedV_From">;
+defm "" : LMULSEWSchedWritesFWRed<"WriteVFWRedOV_From">;
// 15. Vector Mask Instructions
// 15.1. Vector Mask-Register Logical Instructions
@@ -326,22 +433,22 @@ defm "" : LMULSchedWrites<"WriteVMIdxV">;
// 16. Vector Permutation Instructions
// 16.1. Integer Scalar Move Instructions
-defm "" : LMULSchedWrites<"WriteVIMovVX">;
-defm "" : LMULSchedWrites<"WriteVIMovXV">;
+def WriteVIMovVX : SchedWrite;
+def WriteVIMovXV : SchedWrite;
// 16.2. Floating-Point Scalar Move Instructions
-defm "" : LMULSchedWrites<"WriteVFMovVF">;
-defm "" : LMULSchedWrites<"WriteVFMovFV">;
+def WriteVFMovVF : SchedWrite;
+def WriteVFMovFV : SchedWrite;
// 16.3. Vector Slide Instructions
defm "" : LMULSchedWrites<"WriteVISlideX">;
defm "" : LMULSchedWrites<"WriteVISlideI">;
defm "" : LMULSchedWrites<"WriteVISlide1X">;
defm "" : LMULSchedWrites<"WriteVFSlide1F">;
// 16.4. Vector Register Gather Instructions
-defm "" : LMULSchedWrites<"WriteVGatherV">;
-defm "" : LMULSchedWrites<"WriteVGatherX">;
-defm "" : LMULSchedWrites<"WriteVGatherI">;
+defm "" : LMULSEWSchedWrites<"WriteVRGatherVV">;
+defm "" : LMULSchedWrites<"WriteVRGatherVX">;
+defm "" : LMULSchedWrites<"WriteVRGatherVI">;
// 16.5. Vector Compress Instruction
-defm "" : LMULSchedWrites<"WriteVCompressV">;
+defm "" : LMULSEWSchedWrites<"WriteVCompressV">;
// 16.6. Whole Vector Register Move
// These are already LMUL aware
def WriteVMov1V : SchedWrite;
@@ -357,15 +464,15 @@ def ReadVSETVLI : SchedRead;
def ReadVSETVL : SchedRead;
// 7. Vector Loads and Stores
-defm "" : LMULSchedReads<"ReadVLDX">;
-defm "" : LMULSchedReads<"ReadVSTX">;
+def ReadVLDX : SchedRead;
+def ReadVSTX : SchedRead;
// 7.4. Vector Unit-Stride Instructions
defm "" : LMULSchedReads<"ReadVSTEV">;
// 7.4.1. Vector Unit-Strided Mask
defm "" : LMULSchedReads<"ReadVSTM">;
// 7.5. Vector Strided Instructions
-defm "" : LMULSchedReads<"ReadVLDSX">;
-defm "" : LMULSchedReads<"ReadVSTSX">;
+def ReadVLDSX : SchedRead;
+def ReadVSTSX : SchedRead;
defm "" : LMULSchedReads<"ReadVSTS8V">;
defm "" : LMULSchedReads<"ReadVSTS16V">;
defm "" : LMULSchedReads<"ReadVSTS32V">;
@@ -418,15 +525,17 @@ defm "" : LMULSchedReads<"ReadVShiftX">;
defm "" : LMULSchedReadsW<"ReadVNShiftV">;
defm "" : LMULSchedReadsW<"ReadVNShiftX">;
// 11.8. Vector Integer Comparison Instructions
-// 11.9. Vector Integer Min/Max Instructions
defm "" : LMULSchedReads<"ReadVICmpV">;
defm "" : LMULSchedReads<"ReadVICmpX">;
+// 11.9. Vector Integer Min/Max Instructions
+defm "" : LMULSchedReads<"ReadVIMinMaxV">;
+defm "" : LMULSchedReads<"ReadVIMinMaxX">;
// 11.10. Vector Single-Width Integer Multiply Instructions
defm "" : LMULSchedReads<"ReadVIMulV">;
defm "" : LMULSchedReads<"ReadVIMulX">;
// 11.11. Vector Integer Divide Instructions
-defm "" : LMULSchedReads<"ReadVIDivV">;
-defm "" : LMULSchedReads<"ReadVIDivX">;
+defm "" : LMULSEWSchedReads<"ReadVIDivV">;
+defm "" : LMULSEWSchedReads<"ReadVIDivX">;
// 11.12. Vector Widening Integer Multiply Instructions
defm "" : LMULSchedReadsW<"ReadVIWMulV">;
defm "" : LMULSchedReadsW<"ReadVIWMulX">;
@@ -470,8 +579,8 @@ defm "" : LMULSchedReadsFW<"ReadVFWALUF">;
// 13.4. Vector Single-Width Floating-Point Multiply/Divide Instructions
defm "" : LMULSchedReads<"ReadVFMulV">;
defm "" : LMULSchedReads<"ReadVFMulF">;
-defm "" : LMULSchedReads<"ReadVFDivV">;
-defm "" : LMULSchedReads<"ReadVFDivF">;
+defm "" : LMULSEWSchedReadsF<"ReadVFDivV">;
+defm "" : LMULSEWSchedReadsF<"ReadVFDivF">;
// 13.5. Vector Widening Floating-Point Multiply
defm "" : LMULSchedReadsFW<"ReadVFWMulV">;
defm "" : LMULSchedReadsFW<"ReadVFWMulF">;
@@ -482,17 +591,19 @@ defm "" : LMULSchedReads<"ReadVFMulAddF">;
defm "" : LMULSchedReadsFW<"ReadVFWMulAddV">;
defm "" : LMULSchedReadsFW<"ReadVFWMulAddF">;
// 13.8. Vector Floating-Point Square-Root Instruction
-defm "" : LMULSchedReads<"ReadVFSqrtV">;
+defm "" : LMULSEWSchedReadsF<"ReadVFSqrtV">;
// 13.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction
// 13.10. Vector Floating-Point Reciprocal Estimate Instruction
defm "" : LMULSchedReads<"ReadVFRecpV">;
// 13.11. Vector Floating-Point MIN/MAX Instructions
-// 13.13. Vector Floating-Point Compare Instructions
-defm "" : LMULSchedReads<"ReadVFCmpV">;
-defm "" : LMULSchedReads<"ReadVFCmpF">;
+defm "" : LMULSchedReads<"ReadVFMinMaxV">;
+defm "" : LMULSchedReads<"ReadVFMinMaxF">;
// 13.12. Vector Floating-Point Sign-Injection Instructions
defm "" : LMULSchedReads<"ReadVFSgnjV">;
defm "" : LMULSchedReads<"ReadVFSgnjF">;
+// 13.13. Vector Floating-Point Compare Instructions
+defm "" : LMULSchedReads<"ReadVFCmpV">;
+defm "" : LMULSchedReads<"ReadVFCmpF">;
// 13.14. Vector Floating-Point Classify Instruction
defm "" : LMULSchedReads<"ReadVFClassV">;
// 13.15. Vector Floating-Point Merge Instruction
@@ -524,6 +635,7 @@ def ReadVFRedV : SchedRead;
def ReadVFRedV0 : SchedRead;
def ReadVFRedOV : SchedRead;
def ReadVFRedOV0 : SchedRead;
+def ReadVFRedMinMaxV : SchedRead;
// 14.4. Vector Widening Floating-Point Reduction Instructions
def ReadVFWRedV : SchedRead;
def ReadVFWRedV0 : SchedRead;
@@ -546,23 +658,26 @@ defm "" : LMULSchedReads<"ReadVMIotV">;
// 16. Vector Permutation Instructions
// 16.1. Integer Scalar Move Instructions
-defm "" : LMULSchedReads<"ReadVIMovVX">;
-defm "" : LMULSchedReads<"ReadVIMovXV">;
-defm "" : LMULSchedReads<"ReadVIMovXX">;
+def ReadVIMovVX : SchedRead;
+def ReadVIMovXV : SchedRead;
+def ReadVIMovXX : SchedRead;
// 16.2. Floating-Point Scalar Move Instructions
-defm "" : LMULSchedReads<"ReadVFMovVF">;
-defm "" : LMULSchedReads<"ReadVFMovFV">;
-defm "" : LMULSchedReads<"ReadVFMovFX">;
+def ReadVFMovVF : SchedRead;
+def ReadVFMovFV : SchedRead;
+def ReadVFMovFX : SchedRead;
// 16.3. Vector Slide Instructions
defm "" : LMULSchedReads<"ReadVISlideV">;
defm "" : LMULSchedReads<"ReadVISlideX">;
defm "" : LMULSchedReads<"ReadVFSlideV">;
defm "" : LMULSchedReads<"ReadVFSlideF">;
// 16.4. Vector Register Gather Instructions
-defm "" : LMULSchedReads<"ReadVGatherV">;
-defm "" : LMULSchedReads<"ReadVGatherX">;
+defm "" : LMULSEWSchedReads<"ReadVRGatherVV_data">;
+defm "" : LMULSEWSchedReads<"ReadVRGatherVV_index">;
+defm "" : LMULSchedReads<"ReadVRGatherVX_data">;
+defm "" : LMULSchedReads<"ReadVRGatherVX_index">;
+defm "" : LMULSchedReads<"ReadVRGatherVI_data">;
// 16.5. Vector Compress Instruction
-defm "" : LMULSchedReads<"ReadVCompressV">;
+defm "" : LMULSEWSchedReads<"ReadVCompressV">;
// 16.6. Whole Vector Register Move
// These are already LMUL aware
def ReadVMov1V : SchedRead;
@@ -661,10 +776,12 @@ defm "" : LMULWriteResW<"WriteVNShiftI", []>;
defm "" : LMULWriteRes<"WriteVICmpV", []>;
defm "" : LMULWriteRes<"WriteVICmpX", []>;
defm "" : LMULWriteRes<"WriteVICmpI", []>;
+defm "" : LMULWriteRes<"WriteVIMinMaxV", []>;
+defm "" : LMULWriteRes<"WriteVIMinMaxX", []>;
defm "" : LMULWriteRes<"WriteVIMulV", []>;
defm "" : LMULWriteRes<"WriteVIMulX", []>;
-defm "" : LMULWriteRes<"WriteVIDivV", []>;
-defm "" : LMULWriteRes<"WriteVIDivX", []>;
+defm "" : LMULSEWWriteRes<"WriteVIDivV", []>;
+defm "" : LMULSEWWriteRes<"WriteVIDivX", []>;
defm "" : LMULWriteResW<"WriteVIWMulV", []>;
defm "" : LMULWriteResW<"WriteVIWMulX", []>;
defm "" : LMULWriteRes<"WriteVIMulAddV", []>;
@@ -700,20 +817,22 @@ defm "" : LMULWriteResFW<"WriteVFWALUV", []>;
defm "" : LMULWriteResFW<"WriteVFWALUF", []>;
defm "" : LMULWriteRes<"WriteVFMulV", []>;
defm "" : LMULWriteRes<"WriteVFMulF", []>;
-defm "" : LMULWriteRes<"WriteVFDivV", []>;
-defm "" : LMULWriteRes<"WriteVFDivF", []>;
+defm "" : LMULSEWWriteResF<"WriteVFDivV", []>;
+defm "" : LMULSEWWriteResF<"WriteVFDivF", []>;
defm "" : LMULWriteResFW<"WriteVFWMulV", []>;
defm "" : LMULWriteResFW<"WriteVFWMulF", []>;
defm "" : LMULWriteRes<"WriteVFMulAddV", []>;
defm "" : LMULWriteRes<"WriteVFMulAddF", []>;
defm "" : LMULWriteResFW<"WriteVFWMulAddV", []>;
defm "" : LMULWriteResFW<"WriteVFWMulAddF", []>;
-defm "" : LMULWriteRes<"WriteVFSqrtV", []>;
+defm "" : LMULSEWWriteResF<"WriteVFSqrtV", []>;
defm "" : LMULWriteRes<"WriteVFRecpV", []>;
-defm "" : LMULWriteRes<"WriteVFCmpV", []>;
-defm "" : LMULWriteRes<"WriteVFCmpF", []>;
+defm "" : LMULWriteRes<"WriteVFMinMaxV", []>;
+defm "" : LMULWriteRes<"WriteVFMinMaxF", []>;
defm "" : LMULWriteRes<"WriteVFSgnjV", []>;
defm "" : LMULWriteRes<"WriteVFSgnjF", []>;
+defm "" : LMULWriteRes<"WriteVFCmpV", []>;
+defm "" : LMULWriteRes<"WriteVFCmpF", []>;
defm "" : LMULWriteRes<"WriteVFClassV", []>;
defm "" : LMULWriteRes<"WriteVFMergeV", []>;
defm "" : LMULWriteRes<"WriteVFMovV", []>;
@@ -727,12 +846,14 @@ defm "" : LMULWriteResW<"WriteVFNCvtFToIV", []>;
defm "" : LMULWriteResFW<"WriteVFNCvtFToFV", []>;
// 14. Vector Reduction Operations
-def : WriteRes<WriteVIRedV, []>;
-def : WriteRes<WriteVIWRedV, []>;
-def : WriteRes<WriteVFRedV, []>;
-def : WriteRes<WriteVFRedOV, []>;
-def : WriteRes<WriteVFWRedV, []>;
-def : WriteRes<WriteVFWRedOV, []>;
+defm "" : LMULSEWWriteRes<"WriteVIRedV_From", []>;
+defm "" : LMULSEWWriteRes<"WriteVIRedMinMaxV_From", []>;
+defm "" : LMULSEWWriteResWRed<"WriteVIWRedV_From", []>;
+defm "" : LMULSEWWriteResF<"WriteVFRedV_From", []>;
+defm "" : LMULSEWWriteResF<"WriteVFRedOV_From", []>;
+defm "" : LMULSEWWriteResF<"WriteVFRedMinMaxV_From", []>;
+defm "" : LMULSEWWriteResFWRed<"WriteVFWRedV_From", []>;
+defm "" : LMULSEWWriteResFWRed<"WriteVFWRedOV_From", []>;
// 15. Vector Mask Instructions
defm "" : LMULWriteRes<"WriteVMALUV", []>;
@@ -743,18 +864,18 @@ defm "" : LMULWriteRes<"WriteVMIotV", []>;
defm "" : LMULWriteRes<"WriteVMIdxV", []>;
// 16. Vector Permutation Instructions
-defm "" : LMULWriteRes<"WriteVIMovVX", []>;
-defm "" : LMULWriteRes<"WriteVIMovXV", []>;
-defm "" : LMULWriteRes<"WriteVFMovVF", []>;
-defm "" : LMULWriteRes<"WriteVFMovFV", []>;
+def : WriteRes<WriteVIMovVX, []>;
+def : WriteRes<WriteVIMovXV, []>;
+def : WriteRes<WriteVFMovVF, []>;
+def : WriteRes<WriteVFMovFV, []>;
defm "" : LMULWriteRes<"WriteVISlideX", []>;
defm "" : LMULWriteRes<"WriteVISlideI", []>;
defm "" : LMULWriteRes<"WriteVISlide1X", []>;
defm "" : LMULWriteRes<"WriteVFSlide1F", []>;
-defm "" : LMULWriteRes<"WriteVGatherV", []>;
-defm "" : LMULWriteRes<"WriteVGatherX", []>;
-defm "" : LMULWriteRes<"WriteVGatherI", []>;
-defm "" : LMULWriteRes<"WriteVCompressV", []>;
+defm "" : LMULSEWWriteRes<"WriteVRGatherVV", []>;
+defm "" : LMULWriteRes<"WriteVRGatherVX", []>;
+defm "" : LMULWriteRes<"WriteVRGatherVI", []>;
+defm "" : LMULSEWWriteRes<"WriteVCompressV", []>;
// These are already LMUL aware
def : WriteRes<WriteVMov1V, []>;
def : WriteRes<WriteVMov2V, []>;
@@ -766,12 +887,12 @@ def : ReadAdvance<ReadVSETVLI, 0>;
def : ReadAdvance<ReadVSETVL, 0>;
// 7. Vector Loads and Stores
-defm "" : LMULReadAdvance<"ReadVLDX", 0>;
-defm "" : LMULReadAdvance<"ReadVSTX", 0>;
+def : ReadAdvance<ReadVLDX, 0>;
+def : ReadAdvance<ReadVSTX, 0>;
defm "" : LMULReadAdvance<"ReadVSTEV", 0>;
defm "" : LMULReadAdvance<"ReadVSTM", 0>;
-defm "" : LMULReadAdvance<"ReadVLDSX", 0>;
-defm "" : LMULReadAdvance<"ReadVSTSX", 0>;
+def : ReadAdvance<ReadVLDSX, 0>;
+def : ReadAdvance<ReadVSTSX, 0>;
defm "" : LMULReadAdvance<"ReadVSTS8V", 0>;
defm "" : LMULReadAdvance<"ReadVSTS16V", 0>;
defm "" : LMULReadAdvance<"ReadVSTS32V", 0>;
@@ -816,10 +937,12 @@ defm "" : LMULReadAdvanceW<"ReadVNShiftV", 0>;
defm "" : LMULReadAdvanceW<"ReadVNShiftX", 0>;
defm "" : LMULReadAdvance<"ReadVICmpV", 0>;
defm "" : LMULReadAdvance<"ReadVICmpX", 0>;
+defm "" : LMULReadAdvance<"ReadVIMinMaxV", 0>;
+defm "" : LMULReadAdvance<"ReadVIMinMaxX", 0>;
defm "" : LMULReadAdvance<"ReadVIMulV", 0>;
defm "" : LMULReadAdvance<"ReadVIMulX", 0>;
-defm "" : LMULReadAdvance<"ReadVIDivV", 0>;
-defm "" : LMULReadAdvance<"ReadVIDivX", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVIDivV", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVIDivX", 0>;
defm "" : LMULReadAdvanceW<"ReadVIWMulV", 0>;
defm "" : LMULReadAdvanceW<"ReadVIWMulX", 0>;
defm "" : LMULReadAdvance<"ReadVIMulAddV", 0>;
@@ -850,20 +973,22 @@ defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>;
defm "" : LMULReadAdvance<"ReadVFMulV", 0>;
defm "" : LMULReadAdvance<"ReadVFMulF", 0>;
-defm "" : LMULReadAdvance<"ReadVFDivV", 0>;
-defm "" : LMULReadAdvance<"ReadVFDivF", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWMulV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWMulF", 0>;
defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>;
defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>;
defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>;
-defm "" : LMULReadAdvance<"ReadVFSqrtV", 0>;
+defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>;
defm "" : LMULReadAdvance<"ReadVFRecpV", 0>;
-defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
-defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
+defm "" : LMULReadAdvance<"ReadVFMinMaxV", 0>;
+defm "" : LMULReadAdvance<"ReadVFMinMaxF", 0>;
defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>;
defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>;
+defm "" : LMULReadAdvance<"ReadVFCmpV", 0>;
+defm "" : LMULReadAdvance<"ReadVFCmpF", 0>;
defm "" : LMULReadAdvance<"ReadVFClassV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeV", 0>;
defm "" : LMULReadAdvance<"ReadVFMergeF", 0>;
@@ -886,6 +1011,7 @@ def : ReadAdvance<ReadVFRedV, 0>;
def : ReadAdvance<ReadVFRedV0, 0>;
def : ReadAdvance<ReadVFRedOV, 0>;
def : ReadAdvance<ReadVFRedOV0, 0>;
+def : ReadAdvance<ReadVFRedMinMaxV, 0>;
def : ReadAdvance<ReadVFWRedV, 0>;
def : ReadAdvance<ReadVFWRedV0, 0>;
def : ReadAdvance<ReadVFWRedOV, 0>;
@@ -899,19 +1025,23 @@ defm "" : LMULReadAdvance<"ReadVMSFSV", 0>;
defm "" : LMULReadAdvance<"ReadVMIotV", 0>;
// 16. Vector Permutation Instructions
-defm "" : LMULReadAdvance<"ReadVIMovVX", 0>;
-defm "" : LMULReadAdvance<"ReadVIMovXV", 0>;
-defm "" : LMULReadAdvance<"ReadVIMovXX", 0>;
-defm "" : LMULReadAdvance<"ReadVFMovVF", 0>;
-defm "" : LMULReadAdvance<"ReadVFMovFV", 0>;
-defm "" : LMULReadAdvance<"ReadVFMovFX", 0>;
+def : ReadAdvance<ReadVIMovVX, 0>;
+def : ReadAdvance<ReadVIMovXV, 0>;
+def : ReadAdvance<ReadVIMovXX, 0>;
+def : ReadAdvance<ReadVFMovVF, 0>;
+def : ReadAdvance<ReadVFMovFV, 0>;
+def : ReadAdvance<ReadVFMovFX, 0>;
defm "" : LMULReadAdvance<"ReadVISlideV", 0>;
defm "" : LMULReadAdvance<"ReadVISlideX", 0>;
defm "" : LMULReadAdvance<"ReadVFSlideV", 0>;
defm "" : LMULReadAdvance<"ReadVFSlideF", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>;
+defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>;
+defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>;
+defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>;
defm "" : LMULReadAdvance<"ReadVGatherV", 0>;
-defm "" : LMULReadAdvance<"ReadVGatherX", 0>;
-defm "" : LMULReadAdvance<"ReadVCompressV", 0>;
+defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>;
// These are already LMUL aware
def : ReadAdvance<ReadVMov1V, 0>;
def : ReadAdvance<ReadVMov2V, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVScheduleZb.td b/llvm/lib/Target/RISCV/RISCVScheduleZb.td
index 324216df0380..0a16390e5053 100644
--- a/llvm/lib/Target/RISCV/RISCVScheduleZb.td
+++ b/llvm/lib/Target/RISCV/RISCVScheduleZb.td
@@ -1,4 +1,4 @@
-//===-- RISCVScheduleB.td - RISCV Scheduling Definitions B -*- tablegen -*-===//
+//===- RISCVScheduleB.td - RISC-V Scheduling Definitions B -*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -30,8 +30,10 @@ def WriteORCB : SchedWrite;
def WriteCLMUL : SchedWrite; // CLMUL/CLMULR/CLMULH
// Zbs extension
-def WriteSingleBit : SchedWrite; // BCLR/BSET/BINV/BEXT
-def WriteSingleBitImm: SchedWrite; // BCLRI/BSETI/BINVI/BEXTI
+def WriteSingleBit : SchedWrite; // BCLR/BSET/BINV
+def WriteSingleBitImm: SchedWrite; // BCLRI/BSETI/BINVI
+def WriteBEXT : SchedWrite; // BEXT
+def WriteBEXTI : SchedWrite; // BEXTI
// Zbkb extension
def WriteBREV8 : SchedWrite; // brev8
@@ -132,6 +134,8 @@ multiclass UnsupportedSchedZbs {
let Unsupported = true in {
def : WriteRes<WriteSingleBit, []>;
def : WriteRes<WriteSingleBitImm, []>;
+def : WriteRes<WriteBEXT, []>;
+def : WriteRes<WriteBEXTI, []>;
def : ReadAdvance<ReadSingleBit, 0>;
def : ReadAdvance<ReadSingleBitImm, 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVStripWSuffix.cpp b/llvm/lib/Target/RISCV/RISCVStripWSuffix.cpp
deleted file mode 100644
index 14ab9c2dd655..000000000000
--- a/llvm/lib/Target/RISCV/RISCVStripWSuffix.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-//===-------------- RISCVStripWSuffix.cpp - -w Suffix Removal -------------===//
-//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===---------------------------------------------------------------------===//
-//
-// This pass removes the -w suffix from each addiw and slliw instructions
-// whenever all users are dependent only on the lower word of the result of the
-// instruction. We do this only for addiw and slliw because the -w forms are
-// less compressible.
-//
-//===---------------------------------------------------------------------===//
-
-#include "RISCV.h"
-#include "RISCVMachineFunctionInfo.h"
-
-using namespace llvm;
-
-static cl::opt<bool> DisableStripWSuffix("riscv-disable-strip-w-suffix",
- cl::desc("Disable strip W suffix"),
- cl::init(false), cl::Hidden);
-
-namespace {
-
-class RISCVStripWSuffix : public MachineFunctionPass {
-public:
- static char ID;
-
- RISCVStripWSuffix() : MachineFunctionPass(ID) {
- initializeRISCVStripWSuffixPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnMachineFunction(MachineFunction &MF) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- StringRef getPassName() const override { return "RISCV Strip W Suffix"; }
-};
-
-} // end anonymous namespace
-
-char RISCVStripWSuffix::ID = 0;
-INITIALIZE_PASS(RISCVStripWSuffix, "riscv-strip-w-suffix",
- "RISCV Strip W Suffix", false, false)
-
-FunctionPass *llvm::createRISCVStripWSuffixPass() {
- return new RISCVStripWSuffix();
-}
-
-bool RISCVStripWSuffix::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()) || DisableStripWSuffix)
- return false;
-
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
- const RISCVInstrInfo &TII = *ST.getInstrInfo();
-
- if (!ST.is64Bit())
- return false;
-
- bool MadeChange = false;
- for (MachineBasicBlock &MBB : MF) {
- for (auto I = MBB.begin(), IE = MBB.end(); I != IE; ++I) {
- MachineInstr &MI = *I;
-
- switch (MI.getOpcode()) {
- case RISCV::ADDW:
- case RISCV::SLLIW:
- if (TII.hasAllWUsers(MI, MRI)) {
- unsigned Opc =
- MI.getOpcode() == RISCV::ADDW ? RISCV::ADD : RISCV::SLLI;
- MI.setDesc(TII.get(Opc));
- MadeChange = true;
- }
- break;
- }
- }
- }
-
- return MadeChange;
-}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index c935dad1687f..eec2e7359eda 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVSubtarget.cpp - RISCV Subtarget Information ------------------===//
+//===-- RISCVSubtarget.cpp - RISC-V Subtarget Information -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,18 +6,18 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements the RISCV specific subclass of TargetSubtargetInfo.
+// This file implements the RISC-V specific subclass of TargetSubtargetInfo.
//
//===----------------------------------------------------------------------===//
#include "RISCVSubtarget.h"
+#include "GISel/RISCVCallLowering.h"
+#include "GISel/RISCVLegalizerInfo.h"
+#include "GISel/RISCVRegisterBankInfo.h"
#include "RISCV.h"
#include "RISCVFrameLowering.h"
#include "RISCVMacroFusion.h"
#include "RISCVTargetMachine.h"
-#include "GISel/RISCVCallLowering.h"
-#include "GISel/RISCVLegalizerInfo.h"
-#include "GISel/RISCVRegisterBankInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
@@ -30,7 +30,7 @@ using namespace llvm;
#include "RISCVGenSubtargetInfo.inc"
static cl::opt<bool> EnableSubRegLiveness("riscv-enable-subreg-liveness",
- cl::init(false), cl::Hidden);
+ cl::init(true), cl::Hidden);
static cl::opt<unsigned> RVVVectorLMULMax(
"riscv-v-fixed-length-vector-lmul-max",
@@ -86,7 +86,7 @@ RISCVSubtarget::RISCVSubtarget(const Triple &TT, StringRef CPU,
CallLoweringInfo.reset(new RISCVCallLowering(*getTargetLowering()));
Legalizer.reset(new RISCVLegalizerInfo(*this));
- auto *RBI = new RISCVRegisterBankInfo(*getRegisterInfo());
+ auto *RBI = new RISCVRegisterBankInfo(getHwMode());
RegBankInfo.reset(RBI);
InstSelector.reset(createRISCVInstructionSelector(
*static_cast<const RISCVTargetMachine *>(&TM), *this, *RBI));
@@ -155,10 +155,10 @@ unsigned RISCVSubtarget::getMinRVVVectorSizeInBits() const {
unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const {
assert(hasVInstructions() &&
"Tried to get vector length without Zve or V extension support!");
- assert(RVVVectorLMULMax <= 8 && isPowerOf2_32(RVVVectorLMULMax) &&
+ assert(RVVVectorLMULMax <= 8 &&
+ llvm::has_single_bit<uint32_t>(RVVVectorLMULMax) &&
"V extension requires a LMUL to be at most 8 and a power of 2!");
- return PowerOf2Floor(
- std::max<unsigned>(std::min<unsigned>(RVVVectorLMULMax, 8), 1));
+ return llvm::bit_floor(std::clamp<unsigned>(RVVVectorLMULMax, 1, 8));
}
bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 678d218a6719..a831beb7edd9 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -1,4 +1,4 @@
-//===-- RISCVSubtarget.h - Define Subtarget for the RISCV -------*- C++ -*-===//
+//===-- RISCVSubtarget.h - Define Subtarget for the RISC-V ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file declares the RISCV specific subclass of TargetSubtargetInfo.
+// This file declares the RISC-V specific subclass of TargetSubtargetInfo.
//
//===----------------------------------------------------------------------===//
@@ -56,6 +56,9 @@ private:
uint8_t MaxInterleaveFactor = 2;
RISCVABI::ABI TargetABI = RISCVABI::ABI_Unknown;
std::bitset<RISCV::NUM_TARGET_REGS> UserReservedRegister;
+ Align PrefFunctionAlignment;
+ Align PrefLoopAlignment;
+
RISCVFrameLowering FrameLowering;
RISCVInstrInfo InstrInfo;
RISCVRegisterInfo RegInfo;
@@ -95,7 +98,10 @@ public:
}
bool enableMachineScheduler() const override { return true; }
- /// Returns RISCV processor family.
+ Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
+ Align getPrefLoopAlignment() const { return PrefLoopAlignment; }
+
+ /// Returns RISC-V processor family.
/// Avoid this function! CPU specifics should be kept local to this class
/// and preferably modeled with SubtargetFeatures or properties in
/// initializeProperties().
@@ -107,8 +113,21 @@ public:
bool hasStdExtCOrZca() const { return HasStdExtC || HasStdExtZca; }
bool hasStdExtZvl() const { return ZvlLen != 0; }
+ bool hasStdExtFOrZfinx() const { return HasStdExtF || HasStdExtZfinx; }
+ bool hasStdExtDOrZdinx() const { return HasStdExtD || HasStdExtZdinx; }
bool hasStdExtZfhOrZfhmin() const { return HasStdExtZfh || HasStdExtZfhmin; }
- bool is64Bit() const { return HasRV64; }
+ bool hasStdExtZfhOrZhinx() const { return HasStdExtZfh || HasStdExtZhinx; }
+ bool hasStdExtZhinxOrZhinxmin() const {
+ return HasStdExtZhinx || HasStdExtZhinxmin;
+ }
+ bool hasStdExtZfhOrZfhminOrZhinxOrZhinxmin() const {
+ return hasStdExtZfhOrZfhmin() || hasStdExtZhinxOrZhinxmin();
+ }
+ bool hasHalfFPLoadStoreMove() const {
+ return HasStdExtZfh || HasStdExtZfhmin || HasStdExtZfbfmin ||
+ HasStdExtZvfbfwma;
+ }
+ bool is64Bit() const { return IsRV64; }
MVT getXLenVT() const { return XLenVT; }
unsigned getXLen() const { return XLen; }
unsigned getFLen() const {
@@ -133,6 +152,11 @@ public:
return VLen == 0 ? 65536 : VLen;
}
RISCVABI::ABI getTargetABI() const { return TargetABI; }
+ bool isSoftFPABI() const {
+ return TargetABI == RISCVABI::ABI_LP64 ||
+ TargetABI == RISCVABI::ABI_ILP32 ||
+ TargetABI == RISCVABI::ABI_ILP32E;
+ }
bool isRegisterReservedByUser(Register i) const {
assert(i < RISCV::NUM_TARGET_REGS && "Register out of range");
return UserReservedRegister[i];
@@ -143,19 +167,26 @@ public:
// Vector codegen related methods.
bool hasVInstructions() const { return HasStdExtZve32x; }
bool hasVInstructionsI64() const { return HasStdExtZve64x; }
- bool hasVInstructionsF16() const {
- return HasStdExtZvfh && hasStdExtZfhOrZfhmin();
- }
+ bool hasVInstructionsF16() const { return HasStdExtZvfh; }
// FIXME: Consider Zfinx in the future
bool hasVInstructionsF32() const { return HasStdExtZve32f && HasStdExtF; }
// FIXME: Consider Zdinx in the future
bool hasVInstructionsF64() const { return HasStdExtZve64d && HasStdExtD; }
// F16 and F64 both require F32.
bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
+ bool hasVInstructionsFullMultiply() const { return HasStdExtV; }
unsigned getMaxInterleaveFactor() const {
return hasVInstructions() ? MaxInterleaveFactor : 1;
}
+ // Returns VLEN divided by DLEN. Where DLEN is the datapath width of the
+ // vector hardware implementation which may be less than VLEN.
+ unsigned getDLenFactor() const {
+ if (DLenFactor2)
+ return 2;
+ return 1;
+ }
+
protected:
// GlobalISel related APIs.
std::unique_ptr<CallLowering> CallLoweringInfo;
@@ -176,6 +207,8 @@ public:
const LegalizerInfo *getLegalizerInfo() const override;
const RegisterBankInfo *getRegBankInfo() const override;
+ bool isTargetFuchsia() const { return getTargetTriple().isOSFuchsia(); }
+
bool useConstantPoolForLargeInts() const;
// Maximum cost used for building integers, integers will be put into constant
diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
index b9aa25b321b0..953df7b15e2f 100644
--- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -19,11 +19,9 @@ include "llvm/TableGen/SearchableTable.td"
class SysReg<string name, bits<12> op> {
string Name = name;
- // A maximum of one alias is supported right now.
- string AltName = name;
- // A maximum of one deprecated name is supported right now. Unlike the
- // `AltName` alias, a `DeprecatedName` generates a diagnostic when the name is
- // used to encourage software to migrate away from the name.
+ // A maximum of one deprecated name is supported right now. It generates a
+ // diagnostic when the name is used to encourage software to migrate away from
+ // the name.
string DeprecatedName = "";
bits<12> Encoding = op;
// FIXME: add these additional fields when needed.
@@ -43,7 +41,7 @@ def SysRegsList : GenericTable {
let FilterClass = "SysReg";
// FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed.
let Fields = [
- "Name", "AltName", "DeprecatedName", "Encoding", "FeaturesRequired",
+ "Name", "DeprecatedName", "Encoding", "FeaturesRequired",
"isRV32Only",
];
@@ -56,38 +54,40 @@ def lookupSysRegByName : SearchIndex {
let Key = [ "Name" ];
}
-def lookupSysRegByAltName : SearchIndex {
+def lookupSysRegByDeprecatedName : SearchIndex {
let Table = SysRegsList;
- let Key = [ "AltName" ];
+ let Key = [ "DeprecatedName" ];
}
-def lookupSysRegByDeprecatedName : SearchIndex {
- let Table = SysRegsList;
+class SiFiveReg<string name, bits<12> op> : SysReg<name, op>;
+
+def SiFiveRegsList : GenericTable {
+ let FilterClass = "SiFiveReg";
+ // FIXME: add "ReadWrite", "Mode", "Extra", "Number" fields when needed.
+ let Fields = [
+ "Name", "DeprecatedName", "Encoding", "FeaturesRequired",
+ "isRV32Only",
+ ];
+
+ let PrimaryKey = [ "Encoding" ];
+ let PrimaryKeyName = "lookupSiFiveRegByEncoding";
+}
+
+def lookupSiFiveRegByName : SearchIndex {
+ let Table = SiFiveRegsList;
+ let Key = [ "Name" ];
+}
+
+def lookupSiFiveRegByDeprecatedName : SearchIndex {
+ let Table = SiFiveRegsList;
let Key = [ "DeprecatedName" ];
}
// The following CSR encodings match those given in Tables 2.2,
-// 2.3, 2.4 and 2.5 in the RISC-V Instruction Set Manual
+// 2.3, 2.4, 2.5 and 2.6 in the RISC-V Instruction Set Manual
// Volume II: Privileged Architecture.
//===----------------------------------------------------------------------===//
-// User Trap Setup
-//===----------------------------------------------------------------------===//
-def : SysReg<"ustatus", 0x000>;
-def : SysReg<"uie", 0x004>;
-def : SysReg<"utvec", 0x005>;
-
-//===----------------------------------------------------------------------===//
-// User Trap Handling
-//===----------------------------------------------------------------------===//
-def : SysReg<"uscratch", 0x040>;
-def : SysReg<"uepc", 0x041>;
-def : SysReg<"ucause", 0x042>;
-let DeprecatedName = "ubadaddr" in
-def : SysReg<"utval", 0x043>;
-def : SysReg<"uip", 0x044>;
-
-//===----------------------------------------------------------------------===//
// User Floating-Point CSRs
//===----------------------------------------------------------------------===//
@@ -120,8 +120,6 @@ foreach i = 3...31 in
// Supervisor Trap Setup
//===----------------------------------------------------------------------===//
def : SysReg<"sstatus", 0x100>;
-def : SysReg<"sedeleg", 0x102>;
-def : SysReg<"sideleg", 0x103>;
def : SysReg<"sie", 0x104>;
def : SysReg<"stvec", 0x105>;
def : SysReg<"scounteren", 0x106>;
@@ -311,7 +309,7 @@ foreach i = 3...31 in
//===----------------------------------------------------------------------===//
// Machine Counter Setup
//===----------------------------------------------------------------------===//
-let AltName = "mucounteren" in // Privileged spec v1.9.1 Name
+let DeprecatedName = "mucounteren" in // Privileged spec v1.9.1 Name
def : SysReg<"mcountinhibit", 0x320>;
// mhpmevent3-mhpmevent31 at 0x323-0x33F.
@@ -325,6 +323,20 @@ foreach i = 3...31 in {
}
//===----------------------------------------------------------------------===//
+// SiFive Custom Machine Mode Registers
+//===----------------------------------------------------------------------===//
+
+let FeaturesRequired = [{ {RISCV::FeatureVendorXSfcie} }] in {
+def : SiFiveReg<"mnscratch", 0x350>;
+def : SiFiveReg<"mnepc", 0x351>;
+def : SiFiveReg<"mncause", 0x352>;
+def : SiFiveReg<"mnstatus", 0x353>;
+def : SiFiveReg<"mbpm", 0x7C0>;
+def : SiFiveReg<"mfd", 0x7C1>;
+def : SiFiveReg<"mpd", 0x7C8>;
+}
+
+//===----------------------------------------------------------------------===//
// Debug/ Trace Registers (shared with Debug Mode)
//===----------------------------------------------------------------------===//
def : SysReg<"tselect", 0x7A0>;
@@ -341,7 +353,7 @@ def : SysReg<"dpc", 0x7B1>;
// "dscratch" is an alternative name for "dscratch0" which appeared in earlier
// drafts of the RISC-V debug spec
-let AltName = "dscratch" in
+let DeprecatedName = "dscratch" in
def : SysReg<"dscratch0", 0x7B2>;
def : SysReg<"dscratch1", 0x7B3>;
@@ -350,9 +362,9 @@ def : SysReg<"dscratch1", 0x7B3>;
//===----------------------------------------------------------------------===//
def : SysReg<"vstart", 0x008>;
def : SysReg<"vxsat", 0x009>;
-def : SysReg<"vxrm", 0x00A>;
+def SysRegVXRM : SysReg<"vxrm", 0x00A>;
def : SysReg<"vcsr", 0x00F>;
-def : SysReg<"vl", 0xC20>;
+def SysRegVL : SysReg<"vl", 0xC20>;
def : SysReg<"vtype", 0xC21>;
def SysRegVLENB: SysReg<"vlenb", 0xC22>;
@@ -378,3 +390,56 @@ foreach i = 0...3 in {
//===-----------------------------------------------
def SEED : SysReg<"seed", 0x015>;
+
+//===-----------------------------------------------
+// Advanced Interrupt Architecture
+//===-----------------------------------------------
+
+// Machine-level CSRs
+def : SysReg<"miselect", 0x350>;
+def : SysReg<"mireg", 0x351>;
+def : SysReg<"mtopei", 0x35C>;
+def : SysReg<"mtopi", 0xFB0>;
+def : SysReg<"mvien", 0x308>;
+def : SysReg<"mvip", 0x309>;
+let isRV32Only = 1 in {
+def : SysReg<"midelegh", 0x313>;
+def : SysReg<"mieh", 0x314>;
+def : SysReg<"mvienh", 0x318>;
+def : SysReg<"mviph", 0x319>;
+def : SysReg<"miph", 0x354>;
+} // isRV32Only
+
+// Supervisor-level CSRs
+def : SysReg<"siselect", 0x150>;
+def : SysReg<"sireg", 0x151>;
+def : SysReg<"stopei", 0x15C>;
+def : SysReg<"stopi", 0xDB0>;
+let isRV32Only = 1 in {
+def : SysReg<"sieh", 0x114>;
+def : SysReg<"siph", 0x154>;
+} // isRV32Only
+
+// Hypervisor and VS CSRs
+def : SysReg<"hvien", 0x608>;
+def : SysReg<"hvictl", 0x609>;
+def : SysReg<"hviprio1", 0x646>;
+def : SysReg<"hviprio2", 0x647>;
+def : SysReg<"vsiselect", 0x250>;
+def : SysReg<"vsireg", 0x251>;
+def : SysReg<"vstopei", 0x25C>;
+def : SysReg<"vstopi", 0xEB0>;
+let isRV32Only = 1 in {
+def : SysReg<"hidelegh", 0x613>;
+def : SysReg<"hvienh", 0x618>;
+def : SysReg<"hviph", 0x655>;
+def : SysReg<"hviprio1h", 0x656>;
+def : SysReg<"hviprio2h", 0x657>;
+def : SysReg<"vsieh", 0x214>;
+def : SysReg<"vsiph", 0x254>;
+} // isRV32Only
+
+// Jump Vector Table CSR
+//===-----------------------------------------------
+
+def : SysReg<"jvt", 0x017>;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index cc881406666c..59dac5c7b57d 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVTargetMachine.cpp - Define TargetMachine for RISCV -----------===//
+//===-- RISCVTargetMachine.cpp - Define TargetMachine for RISC-V ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// Implements the info about RISCV target spec.
+// Implements the info about RISC-V target spec.
//
//===----------------------------------------------------------------------===//
@@ -29,7 +29,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/FormattedStream.h"
@@ -67,21 +66,30 @@ static cl::opt<int> RVVVectorBitsMinOpt(
"autovectorization with fixed width vectors."),
cl::init(-1), cl::Hidden);
+static cl::opt<bool> EnableRISCVCopyPropagation(
+ "riscv-enable-copy-propagation",
+ cl::desc("Enable the copy propagation with RISC-V copy instr"),
+ cl::init(true), cl::Hidden);
+
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
auto *PR = PassRegistry::getPassRegistry();
initializeGlobalISel(*PR);
+ initializeKCFIPass(*PR);
initializeRISCVMakeCompressibleOptPass(*PR);
initializeRISCVGatherScatterLoweringPass(*PR);
initializeRISCVCodeGenPreparePass(*PR);
initializeRISCVMergeBaseOffsetOptPass(*PR);
- initializeRISCVSExtWRemovalPass(*PR);
- initializeRISCVStripWSuffixPass(*PR);
+ initializeRISCVOptWInstrsPass(*PR);
initializeRISCVPreRAExpandPseudoPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
+ initializeRISCVInsertReadWriteCSRPass(*PR);
initializeRISCVDAGToDAGISelPass(*PR);
+ initializeRISCVInitUndefPass(*PR);
+ initializeRISCVMoveMergePass(*PR);
+ initializeRISCVPushPopOptPass(*PR);
}
static StringRef computeDataLayout(const Triple &TT) {
@@ -111,6 +119,9 @@ RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
// RISC-V supports the MachineOutliner.
setMachineOutliner(true);
setSupportsDefaultOutlining(true);
+
+ if (TT.isOSFuchsia() && !TT.isArch64Bit())
+ report_fatal_error("Fuchsia is only supported for 64-bit");
}
const RISCVSubtarget *
@@ -159,11 +170,11 @@ RISCVTargetMachine::getSubtargetImpl(const Function &F) const {
RVVBitsMax = std::max(RVVBitsMin, RVVBitsMax);
}
- RVVBitsMin =
- PowerOf2Floor((RVVBitsMin < 64 || RVVBitsMin > 65536) ? 0 : RVVBitsMin);
+ RVVBitsMin = llvm::bit_floor(
+ (RVVBitsMin < 64 || RVVBitsMin > 65536) ? 0 : RVVBitsMin);
}
RVVBitsMax =
- PowerOf2Floor((RVVBitsMax < 64 || RVVBitsMax > 65536) ? 0 : RVVBitsMax);
+ llvm::bit_floor((RVVBitsMax < 64 || RVVBitsMax > 65536) ? 0 : RVVBitsMax);
SmallString<512> Key;
Key += "RVVMin";
@@ -261,6 +272,7 @@ public:
void addMachineSSAOptimization() override;
void addPreRegAlloc() override;
void addPostRegAlloc() override;
+ void addOptimizedRegAlloc() override;
};
} // namespace
@@ -271,11 +283,11 @@ TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) {
void RISCVPassConfig::addIRPasses() {
addPass(createAtomicExpandPass());
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None) {
addPass(createRISCVGatherScatterLoweringPass());
-
- if (getOptLevel() != CodeGenOpt::None)
+ addPass(createInterleavedAccessPass());
addPass(createRISCVCodeGenPreparePass());
+ }
TargetPassConfig::addIRPasses();
}
@@ -323,19 +335,42 @@ bool RISCVPassConfig::addGlobalInstructionSelect() {
return false;
}
-void RISCVPassConfig::addPreSched2() {}
+void RISCVPassConfig::addPreSched2() {
+ // Emit KCFI checks for indirect calls.
+ addPass(createKCFIPass());
+}
void RISCVPassConfig::addPreEmitPass() {
addPass(&BranchRelaxationPassID);
addPass(createRISCVMakeCompressibleOptPass());
+
+ // TODO: It would potentially be better to schedule copy propagation after
+ // expanding pseudos (in addPreEmitPass2). However, performing copy
+ // propagation after the machine outliner (which runs after addPreEmitPass)
+ // currently leads to incorrect code-gen, where copies to registers within
+ // outlined functions are removed erroneously.
+ if (TM->getOptLevel() >= CodeGenOpt::Default && EnableRISCVCopyPropagation)
+ addPass(createMachineCopyPropagationPass(true));
}
void RISCVPassConfig::addPreEmitPass2() {
+ if (TM->getOptLevel() != CodeGenOpt::None) {
+ addPass(createRISCVMoveMergePass());
+ // Schedule PushPop Optimization before expansion of Pseudo instruction,
+ // ensuring return instruction is detected correctly.
+ addPass(createRISCVPushPopOptimizationPass());
+ }
addPass(createRISCVExpandPseudoPass());
+
// Schedule the expansion of AMOs at the last possible moment, avoiding the
// possibility for other passes to break the requirements for forward
// progress in the LR/SC block.
addPass(createRISCVExpandAtomicPseudoPass());
+
+ // KCFI indirect call checks are lowered to a bundle.
+ addPass(createUnpackMachineBundles([&](const MachineFunction &MF) {
+ return MF.getFunction().getParent()->getModuleFlag("kcfi");
+ }));
}
void RISCVPassConfig::addMachineSSAOptimization() {
@@ -344,8 +379,7 @@ void RISCVPassConfig::addMachineSSAOptimization() {
addPass(&MachineCombinerID);
if (TM->getTargetTriple().getArch() == Triple::riscv64) {
- addPass(createRISCVSExtWRemovalPass());
- addPass(createRISCVStripWSuffixPass());
+ addPass(createRISCVOptWInstrsPass());
}
}
@@ -354,6 +388,14 @@ void RISCVPassConfig::addPreRegAlloc() {
if (TM->getOptLevel() != CodeGenOpt::None)
addPass(createRISCVMergeBaseOffsetOptPass());
addPass(createRISCVInsertVSETVLIPass());
+ addPass(createRISCVInsertReadWriteCSRPass());
+}
+
+void RISCVPassConfig::addOptimizedRegAlloc() {
+ if (getOptimizeRegAlloc())
+ insertPass(&DetectDeadLanesID, &RISCVInitUndefID);
+
+ TargetPassConfig::addOptimizedRegAlloc();
}
void RISCVPassConfig::addPostRegAlloc() {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/llvm/lib/Target/RISCV/RISCVTargetMachine.h
index 9d3e6e98954d..775422075314 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.h
@@ -1,4 +1,4 @@
-//===-- RISCVTargetMachine.h - Define TargetMachine for RISCV ---*- C++ -*-===//
+//===-- RISCVTargetMachine.h - Define TargetMachine for RISC-V --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file declares the RISCV specific subclass of TargetMachine.
+// This file declares the RISC-V specific subclass of TargetMachine.
//
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVTargetObjectFile.cpp b/llvm/lib/Target/RISCV/RISCVTargetObjectFile.cpp
index 52083714931a..7c9e57e6eef3 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetObjectFile.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetObjectFile.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVTargetObjectFile.cpp - RISCV Object Info -----------------===//
+//===-- RISCVTargetObjectFile.cpp - RISC-V Object Info --------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "RISCVTargetObjectFile.h"
+#include "MCTargetDesc/RISCVMCObjectFileInfo.h"
#include "RISCVTargetMachine.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
@@ -14,10 +15,17 @@
using namespace llvm;
+unsigned RISCVELFTargetObjectFile::getTextSectionAlignment() const {
+ return RISCVMCObjectFileInfo::getTextSectionAlignment(
+ *getContext().getSubtargetInfo());
+}
+
void RISCVELFTargetObjectFile::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ PLTRelativeVariantKind = MCSymbolRefExpr::VK_PLT;
+
SmallDataSection = getContext().getELFSection(
".sdata", ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC);
SmallBSSSection = getContext().getELFSection(".sbss", ELF::SHT_NOBITS,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetObjectFile.h b/llvm/lib/Target/RISCV/RISCVTargetObjectFile.h
index 830a7d813c15..890effd07320 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetObjectFile.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetObjectFile.h
@@ -1,4 +1,4 @@
-//===-- RISCVTargetObjectFile.h - RISCV Object Info -*- C++ ---------*-===//
+//===-- RISCVTargetObjectFile.h - RISC-V Object Info ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -13,13 +13,15 @@
namespace llvm {
-/// This implementation is used for RISCV ELF targets.
+/// This implementation is used for RISC-V ELF targets.
class RISCVELFTargetObjectFile : public TargetLoweringObjectFileELF {
MCSection *SmallDataSection;
MCSection *SmallBSSSection;
unsigned SSThreshold = 8;
public:
+ unsigned getTextSectionAlignment() const override;
+
void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
/// Return true if this global address should be placed into small data/bss
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 585eb9a19c0a..62883e962b4c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -8,10 +8,12 @@
#include "RISCVTargetTransformInfo.h"
#include "MCTargetDesc/RISCVMatInt.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/CodeGen/CostTable.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Instructions.h"
#include <cmath>
#include <optional>
using namespace llvm;
@@ -23,20 +25,21 @@ static cl::opt<unsigned> RVVRegisterWidthLMUL(
cl::desc(
"The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
"by autovectorized code. Fractional LMULs are not supported."),
- cl::init(1), cl::Hidden);
+ cl::init(2), cl::Hidden);
static cl::opt<unsigned> SLPMaxVF(
"riscv-v-slp-max-vf",
cl::desc(
- "Result used for getMaximumVF query which is used exclusively by "
- "SLP vectorizer. Defaults to 1 which disables SLP."),
- cl::init(1), cl::Hidden);
+ "Overrides result used for getMaximumVF query which is used "
+ "exclusively by SLP vectorizer."),
+ cl::Hidden);
InstructionCost RISCVTTIImpl::getLMULCost(MVT VT) {
// TODO: Here assume reciprocal throughput is 1 for LMUL_1, it is
// implementation-defined.
if (!VT.isVector())
return InstructionCost::getInvalid();
+ unsigned DLenFactor = ST->getDLenFactor();
unsigned Cost;
if (VT.isScalableVector()) {
unsigned LMul;
@@ -44,13 +47,13 @@ InstructionCost RISCVTTIImpl::getLMULCost(MVT VT) {
std::tie(LMul, Fractional) =
RISCVVType::decodeVLMUL(RISCVTargetLowering::getLMUL(VT));
if (Fractional)
- Cost = 1;
+ Cost = LMul <= DLenFactor ? (DLenFactor / LMul) : 1;
else
- Cost = LMul;
+ Cost = (LMul * DLenFactor);
} else {
- Cost = VT.getSizeInBits() / ST->getRealMinVLen();
+ Cost = divideCeil(VT.getSizeInBits(), ST->getRealMinVLen() / DLenFactor);
}
- return std::max<unsigned>(Cost, 1);
+ return Cost;
}
InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
@@ -87,7 +90,7 @@ static bool canUseShiftPair(Instruction *Inst, const APInt &Imm) {
// (and (shl x, c2), c1) will be matched to (srli (slli x, c2+c3), c3) if c1
// is a mask shifted by c2 bits with c3 leading zeros.
if (isShiftedMask_64(Mask)) {
- unsigned Trailing = countTrailingZeros(Mask);
+ unsigned Trailing = llvm::countr_zero(Mask);
if (ShAmt == Trailing)
return true;
}
@@ -143,8 +146,8 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
Takes12BitImm = true;
break;
case Instruction::Mul:
- // Negated power of 2 is a shift and a negate.
- if (Imm.isNegatedPowerOf2())
+ // Power of 2 is a shift. Negated power of 2 is a shift and a negate.
+ if (Imm.isPowerOf2() || Imm.isNegatedPowerOf2())
return TTI::TCC_Free;
// FIXME: There is no MULI instruction.
Takes12BitImm = true;
@@ -164,7 +167,7 @@ InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
// Check immediate is the correct argument...
if (Instruction::isCommutative(Opcode) || Idx == ImmArgIdx) {
// ... and fits into the 12-bit immediate.
- if (Imm.getMinSignedBits() <= 64 &&
+ if (Imm.getSignificantBits() <= 64 &&
getTLI()->isLegalAddImmediate(Imm.getSExtValue())) {
return TTI::TCC_Free;
}
@@ -222,8 +225,8 @@ std::optional<unsigned> RISCVTTIImpl::getVScaleForTuning() const {
TypeSize
RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
- unsigned LMUL = PowerOf2Floor(
- std::max<unsigned>(std::min<unsigned>(RVVRegisterWidthLMUL, 8), 1));
+ unsigned LMUL =
+ llvm::bit_floor(std::clamp<unsigned>(RVVRegisterWidthLMUL, 1, 8));
switch (K) {
case TargetTransformInfo::RGK_Scalar:
return TypeSize::getFixed(ST->getXLen());
@@ -241,13 +244,30 @@ RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
llvm_unreachable("Unsupported register kind");
}
-InstructionCost RISCVTTIImpl::getSpliceCost(VectorType *Tp, int Index) {
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
+InstructionCost
+RISCVTTIImpl::getConstantPoolLoadCost(Type *Ty, TTI::TargetCostKind CostKind) {
+ // Add a cost of address generation + the cost of the load. The address
+ // is expected to be a PC relative offset to a constant pool entry
+ // using auipc/addi.
+ return 2 + getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),
+ /*AddressSpace=*/0, CostKind);
+}
+
+static VectorType *getVRGatherIndexType(MVT DataVT, const RISCVSubtarget &ST,
+ LLVMContext &C) {
+ assert((DataVT.getScalarSizeInBits() != 8 ||
+ DataVT.getVectorNumElements() <= 256) && "unhandled case in lowering");
+ MVT IndexVT = DataVT.changeTypeToInteger();
+ if (IndexVT.getScalarType().bitsGT(ST.getXLenVT()))
+ IndexVT = IndexVT.changeVectorElementType(MVT::i16);
+ return cast<VectorType>(EVT(IndexVT).getTypeForEVT(C));
+}
- unsigned Cost = 2; // vslidedown+vslideup.
- // TODO: Multiplying by LT.first implies this legalizes into multiple copies
- // of similar code, but I think we expand through memory.
- return Cost * LT.first * getLMULCost(LT.second);
+/// Return the cost of a vrgather.vv instruction for the type VT. vrgather.vv
+/// is generally quadratic in the number of vreg implied by LMUL. Note that
+/// operand (index and possibly mask) are handled separately.
+InstructionCost RISCVTTIImpl::getVRGatherVVCost(MVT VT) {
+ return getLMULCost(VT) * getLMULCost(VT);
}
InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
@@ -255,38 +275,102 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
TTI::TargetCostKind CostKind,
int Index, VectorType *SubTp,
ArrayRef<const Value *> Args) {
- if (isa<ScalableVectorType>(Tp)) {
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
+ Kind = improveShuffleKindFromMask(Kind, Mask);
+
+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
+
+ // First, handle cases where having a fixed length vector enables us to
+ // give a more accurate cost than falling back to generic scalable codegen.
+ // TODO: Each of these cases hints at a modeling gap around scalable vectors.
+ if (isa<FixedVectorType>(Tp)) {
switch (Kind) {
default:
- // Fallthrough to generic handling.
- // TODO: Most of these cases will return getInvalid in generic code, and
- // must be implemented here.
break;
- case TTI::SK_Broadcast: {
- return LT.first * 1;
+ case TTI::SK_PermuteSingleSrc: {
+ if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
+ MVT EltTp = LT.second.getVectorElementType();
+ // If the size of the element is < ELEN then shuffles of interleaves and
+ // deinterleaves of 2 vectors can be lowered into the following
+ // sequences
+ if (EltTp.getScalarSizeInBits() < ST->getELEN()) {
+ // Example sequence:
+ // vsetivli zero, 4, e8, mf4, ta, ma (ignored)
+ // vwaddu.vv v10, v8, v9
+ // li a0, -1 (ignored)
+ // vwmaccu.vx v10, a0, v9
+ if (ShuffleVectorInst::isInterleaveMask(Mask, 2, Mask.size()))
+ return 2 * LT.first * getLMULCost(LT.second);
+
+ if (Mask[0] == 0 || Mask[0] == 1) {
+ auto DeinterleaveMask = createStrideMask(Mask[0], 2, Mask.size());
+ // Example sequence:
+ // vnsrl.wi v10, v8, 0
+ if (equal(DeinterleaveMask, Mask))
+ return LT.first * getLMULCost(LT.second);
+ }
+ }
+
+ // vrgather + cost of generating the mask constant.
+ // We model this for an unknown mask with a single vrgather.
+ if (LT.first == 1 &&
+ (LT.second.getScalarSizeInBits() != 8 ||
+ LT.second.getVectorNumElements() <= 256)) {
+ VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, Tp->getContext());
+ InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
+ return IndexCost + getVRGatherVVCost(LT.second);
+ }
+ }
+ break;
}
- case TTI::SK_Splice:
- return getSpliceCost(Tp, Index);
- case TTI::SK_Reverse:
- // Most of the cost here is producing the vrgather index register
- // Example sequence:
- // csrr a0, vlenb
- // srli a0, a0, 3
- // addi a0, a0, -1
- // vsetvli a1, zero, e8, mf8, ta, mu (ignored)
- // vid.v v9
- // vrsub.vx v10, v9, a0
- // vrgather.vv v9, v8, v10
- if (Tp->getElementType()->isIntegerTy(1))
- // Mask operation additionally required extend and truncate
- return LT.first * 9;
- return LT.first * 6;
+ case TTI::SK_Transpose:
+ case TTI::SK_PermuteTwoSrc: {
+ if (Mask.size() >= 2 && LT.second.isFixedLengthVector()) {
+ // 2 x (vrgather + cost of generating the mask constant) + cost of mask
+ // register for the second vrgather. We model this for an unknown
+ // (shuffle) mask.
+ if (LT.first == 1 &&
+ (LT.second.getScalarSizeInBits() != 8 ||
+ LT.second.getVectorNumElements() <= 256)) {
+ auto &C = Tp->getContext();
+ auto EC = Tp->getElementCount();
+ VectorType *IdxTy = getVRGatherIndexType(LT.second, *ST, C);
+ VectorType *MaskTy = VectorType::get(IntegerType::getInt1Ty(C), EC);
+ InstructionCost IndexCost = getConstantPoolLoadCost(IdxTy, CostKind);
+ InstructionCost MaskCost = getConstantPoolLoadCost(MaskTy, CostKind);
+ return 2 * IndexCost + 2 * getVRGatherVVCost(LT.second) + MaskCost;
+ }
+ }
+ break;
}
- }
+ }
+ };
- if (isa<FixedVectorType>(Tp) && Kind == TargetTransformInfo::SK_Broadcast) {
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
+ // Handle scalable vectors (and fixed vectors legalized to scalable vectors).
+ switch (Kind) {
+ default:
+ // Fallthrough to generic handling.
+ // TODO: Most of these cases will return getInvalid in generic code, and
+ // must be implemented here.
+ break;
+ case TTI::SK_ExtractSubvector:
+ // Example sequence:
+ // vsetivli zero, 4, e8, mf2, tu, ma (ignored)
+ // vslidedown.vi v8, v9, 2
+ return LT.first * getLMULCost(LT.second);
+ case TTI::SK_InsertSubvector:
+ // Example sequence:
+ // vsetivli zero, 4, e8, mf2, tu, ma (ignored)
+ // vslideup.vi v8, v9, 2
+ return LT.first * getLMULCost(LT.second);
+ case TTI::SK_Select: {
+ // Example sequence:
+ // li a0, 90
+ // vsetivli zero, 8, e8, mf2, ta, ma (ignored)
+ // vmv.s.x v0, a0
+ // vmerge.vvm v8, v9, v8, v0
+ return LT.first * 3 * getLMULCost(LT.second);
+ }
+ case TTI::SK_Broadcast: {
bool HasScalar = (Args.size() > 0) && (Operator::getOpcode(Args[0]) ==
Instruction::InsertElement);
if (LT.second.getScalarSizeInBits() == 1) {
@@ -322,7 +406,36 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// implementation-dependent.
return LT.first * getLMULCost(LT.second);
}
-
+ case TTI::SK_Splice:
+ // vslidedown+vslideup.
+ // TODO: Multiplying by LT.first implies this legalizes into multiple copies
+ // of similar code, but I think we expand through memory.
+ return 2 * LT.first * getLMULCost(LT.second);
+ case TTI::SK_Reverse: {
+ // TODO: Cases to improve here:
+ // * Illegal vector types
+ // * i64 on RV32
+ // * i1 vector
+ // At low LMUL, most of the cost is producing the vrgather index register.
+ // At high LMUL, the cost of the vrgather itself will dominate.
+ // Example sequence:
+ // csrr a0, vlenb
+ // srli a0, a0, 3
+ // addi a0, a0, -1
+ // vsetvli a1, zero, e8, mf8, ta, mu (ignored)
+ // vid.v v9
+ // vrsub.vx v10, v9, a0
+ // vrgather.vv v9, v8, v10
+ InstructionCost LenCost = 3;
+ if (LT.second.isFixedLengthVector())
+ // vrsub.vi has a 5 bit immediate field, otherwise an li suffices
+ LenCost = isInt<5>(LT.second.getVectorNumElements() - 1) ? 0 : 1;
+ InstructionCost GatherCost = 2 + getVRGatherVVCost(LT.second);
+ // Mask operation additionally required extend and truncate
+ InstructionCost ExtendCost = Tp->getElementType()->isIntegerTy(1) ? 3 : 0;
+ return LT.first * (LenCost + GatherCost + ExtendCost);
+ }
+ }
return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
}
@@ -338,6 +451,85 @@ RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
return getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
}
+InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond, bool UseMaskForGaps) {
+ if (isa<ScalableVectorType>(VecTy))
+ return InstructionCost::getInvalid();
+ auto *FVTy = cast<FixedVectorType>(VecTy);
+ InstructionCost MemCost =
+ getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, CostKind);
+ unsigned VF = FVTy->getNumElements() / Factor;
+
+ // The interleaved memory access pass will lower interleaved memory ops (i.e
+ // a load and store followed by a specific shuffle) to vlseg/vsseg
+ // intrinsics. In those cases then we can treat it as if it's just one (legal)
+ // memory op
+ if (!UseMaskForCond && !UseMaskForGaps &&
+ Factor <= TLI->getMaxSupportedInterleaveFactor()) {
+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(FVTy);
+ // Need to make sure type has't been scalarized
+ if (LT.second.isFixedLengthVector()) {
+ auto *LegalFVTy = FixedVectorType::get(FVTy->getElementType(),
+ LT.second.getVectorNumElements());
+ // FIXME: We use the memory op cost of the *legalized* type here, becuase
+ // it's getMemoryOpCost returns a really expensive cost for types like
+ // <6 x i8>, which show up when doing interleaves of Factor=3 etc.
+ // Should the memory op cost of these be cheaper?
+ if (TLI->isLegalInterleavedAccessType(LegalFVTy, Factor, Alignment,
+ AddressSpace, DL)) {
+ InstructionCost LegalMemCost = getMemoryOpCost(
+ Opcode, LegalFVTy, Alignment, AddressSpace, CostKind);
+ return LT.first + LegalMemCost;
+ }
+ }
+ }
+
+ // An interleaved load will look like this for Factor=3:
+ // %wide.vec = load <12 x i32>, ptr %3, align 4
+ // %strided.vec = shufflevector %wide.vec, poison, <4 x i32> <stride mask>
+ // %strided.vec1 = shufflevector %wide.vec, poison, <4 x i32> <stride mask>
+ // %strided.vec2 = shufflevector %wide.vec, poison, <4 x i32> <stride mask>
+ if (Opcode == Instruction::Load) {
+ InstructionCost Cost = MemCost;
+ for (unsigned Index : Indices) {
+ FixedVectorType *SubVecTy =
+ FixedVectorType::get(FVTy->getElementType(), VF);
+ auto Mask = createStrideMask(Index, Factor, VF);
+ InstructionCost ShuffleCost =
+ getShuffleCost(TTI::ShuffleKind::SK_PermuteSingleSrc, SubVecTy, Mask,
+ CostKind, 0, nullptr, {});
+ Cost += ShuffleCost;
+ }
+ return Cost;
+ }
+
+ // TODO: Model for NF > 2
+ // We'll need to enhance getShuffleCost to model shuffles that are just
+ // inserts and extracts into subvectors, since they won't have the full cost
+ // of a vrgather.
+ // An interleaved store for 3 vectors of 4 lanes will look like
+ // %11 = shufflevector <4 x i32> %4, <4 x i32> %6, <8 x i32> <0...7>
+ // %12 = shufflevector <4 x i32> %9, <4 x i32> poison, <8 x i32> <0...3>
+ // %13 = shufflevector <8 x i32> %11, <8 x i32> %12, <12 x i32> <0...11>
+ // %interleaved.vec = shufflevector %13, poison, <12 x i32> <interleave mask>
+ // store <12 x i32> %interleaved.vec, ptr %10, align 4
+ if (Factor != 2)
+ return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+ Alignment, AddressSpace, CostKind,
+ UseMaskForCond, UseMaskForGaps);
+
+ assert(Opcode == Instruction::Store && "Opcode must be a store");
+ // For an interleaving store of 2 vectors, we perform one large interleaving
+ // shuffle that goes into the wide store
+ auto Mask = createInterleaveMask(VF, Factor);
+ InstructionCost ShuffleCost =
+ getShuffleCost(TTI::ShuffleKind::SK_PermuteSingleSrc, FVTy, Mask,
+ CostKind, 0, nullptr, {});
+ return MemCost + ShuffleCost;
+}
+
InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
@@ -453,6 +645,40 @@ static const CostTblEntry VectorIntrinsicCostTable[]{
{Intrinsic::roundeven, MVT::nxv2f64, 9},
{Intrinsic::roundeven, MVT::nxv4f64, 9},
{Intrinsic::roundeven, MVT::nxv8f64, 9},
+ {Intrinsic::rint, MVT::v2f32, 7},
+ {Intrinsic::rint, MVT::v4f32, 7},
+ {Intrinsic::rint, MVT::v8f32, 7},
+ {Intrinsic::rint, MVT::v16f32, 7},
+ {Intrinsic::rint, MVT::nxv1f32, 7},
+ {Intrinsic::rint, MVT::nxv2f32, 7},
+ {Intrinsic::rint, MVT::nxv4f32, 7},
+ {Intrinsic::rint, MVT::nxv8f32, 7},
+ {Intrinsic::rint, MVT::nxv16f32, 7},
+ {Intrinsic::rint, MVT::v2f64, 7},
+ {Intrinsic::rint, MVT::v4f64, 7},
+ {Intrinsic::rint, MVT::v8f64, 7},
+ {Intrinsic::rint, MVT::v16f64, 7},
+ {Intrinsic::rint, MVT::nxv1f64, 7},
+ {Intrinsic::rint, MVT::nxv2f64, 7},
+ {Intrinsic::rint, MVT::nxv4f64, 7},
+ {Intrinsic::rint, MVT::nxv8f64, 7},
+ {Intrinsic::nearbyint, MVT::v2f32, 9},
+ {Intrinsic::nearbyint, MVT::v4f32, 9},
+ {Intrinsic::nearbyint, MVT::v8f32, 9},
+ {Intrinsic::nearbyint, MVT::v16f32, 9},
+ {Intrinsic::nearbyint, MVT::nxv1f32, 9},
+ {Intrinsic::nearbyint, MVT::nxv2f32, 9},
+ {Intrinsic::nearbyint, MVT::nxv4f32, 9},
+ {Intrinsic::nearbyint, MVT::nxv8f32, 9},
+ {Intrinsic::nearbyint, MVT::nxv16f32, 9},
+ {Intrinsic::nearbyint, MVT::v2f64, 9},
+ {Intrinsic::nearbyint, MVT::v4f64, 9},
+ {Intrinsic::nearbyint, MVT::v8f64, 9},
+ {Intrinsic::nearbyint, MVT::v16f64, 9},
+ {Intrinsic::nearbyint, MVT::nxv1f64, 9},
+ {Intrinsic::nearbyint, MVT::nxv2f64, 9},
+ {Intrinsic::nearbyint, MVT::nxv4f64, 9},
+ {Intrinsic::nearbyint, MVT::nxv8f64, 9},
{Intrinsic::bswap, MVT::v2i16, 3},
{Intrinsic::bswap, MVT::v4i16, 3},
{Intrinsic::bswap, MVT::v8i16, 3},
@@ -840,7 +1066,9 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
case Intrinsic::sadd_sat:
case Intrinsic::ssub_sat:
case Intrinsic::uadd_sat:
- case Intrinsic::usub_sat: {
+ case Intrinsic::usub_sat:
+ case Intrinsic::fabs:
+ case Intrinsic::sqrt: {
auto LT = getTypeLegalizationCost(RetTy);
if (ST->hasVInstructions() && LT.second.isVector())
return LT.first;
@@ -855,13 +1083,6 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
}
break;
}
- case Intrinsic::fabs:
- case Intrinsic::sqrt: {
- auto LT = getTypeLegalizationCost(RetTy);
- if (ST->hasVInstructions() && LT.second.isVector())
- return LT.first;
- break;
- }
// TODO: add more intrinsic
case Intrinsic::experimental_stepvector: {
unsigned Cost = 1; // vid
@@ -998,15 +1219,15 @@ unsigned RISCVTTIImpl::getEstimatedVLFor(VectorType *Ty) {
}
InstructionCost
-RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned,
+RISCVTTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
- return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
+ return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
// Skip if scalar size of Ty is bigger than ELEN.
if (Ty->getScalarSizeInBits() > ST->getELEN())
- return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
+ return BaseT::getMinMaxReductionCost(IID, Ty, FMF, CostKind);
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
if (Ty->getElementType()->isIntegerTy(1))
@@ -1016,6 +1237,10 @@ RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
// IR Reduction is composed by two vmv and one rvv reduction instruction.
InstructionCost BaseCost = 2;
+
+ if (CostKind == TTI::TCK_CodeSize)
+ return (LT.first - 1) + BaseCost;
+
unsigned VL = getEstimatedVLFor(Ty);
return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
}
@@ -1045,6 +1270,10 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
// IR Reduction is composed by two vmv and one rvv reduction instruction.
InstructionCost BaseCost = 2;
+
+ if (CostKind == TTI::TCK_CodeSize)
+ return (LT.first - 1) + BaseCost;
+
unsigned VL = getEstimatedVLFor(Ty);
if (TTI::requiresOrderedReduction(FMF))
return (LT.first - 1) + BaseCost + VL;
@@ -1053,7 +1282,7 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
InstructionCost RISCVTTIImpl::getExtendedReductionCost(
unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *ValTy,
- std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) {
+ FastMathFlags FMF, TTI::TargetCostKind CostKind) {
if (isa<FixedVectorType>(ValTy) && !ST->useRVVForFixedLengthVectors())
return BaseT::getExtendedReductionCost(Opcode, IsUnsigned, ResTy, ValTy,
FMF, CostKind);
@@ -1093,11 +1322,7 @@ InstructionCost RISCVTTIImpl::getStoreImmCost(Type *Ty,
// with how we treat scalar constants themselves just above.
return 1;
- // Add a cost of address generation + the cost of the vector load. The
- // address is expected to be a PC relative offset to a constant pool entry
- // using auipc/addi.
- return 2 + getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),
- /*AddressSpace=*/0, CostKind);
+ return getConstantPoolLoadCost(Ty, CostKind);
}
@@ -1107,11 +1332,26 @@ InstructionCost RISCVTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
TTI::TargetCostKind CostKind,
TTI::OperandValueInfo OpInfo,
const Instruction *I) {
+ EVT VT = TLI->getValueType(DL, Src, true);
+ // Type legalization can't handle structs
+ if (VT == MVT::Other)
+ return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind, OpInfo, I);
+
InstructionCost Cost = 0;
if (Opcode == Instruction::Store && OpInfo.isConstant())
Cost += getStoreImmCost(Src, OpInfo, CostKind);
- return Cost + BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
- CostKind, OpInfo, I);
+ InstructionCost BaseCost =
+ BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind, OpInfo, I);
+ // Assume memory ops cost scale with the number of vector registers
+ // possible accessed by the instruction. Note that BasicTTI already
+ // handles the LT.first term for us.
+ if (std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
+ LT.second.isVector())
+ BaseCost *= getLMULCost(LT.second);
+ return Cost + BaseCost;
+
}
InstructionCost RISCVTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
@@ -1330,11 +1570,7 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
// scalar constants in GPRs.
return 0;
- // Add a cost of address generation + the cost of the vector load. The
- // address is expected to be a PC relative offset to a constant pool entry
- // using auipc/addi.
- return 2 + getMemoryOpCost(Instruction::Load, Ty, DL.getABITypeAlign(Ty),
- /*AddressSpace=*/0, CostKind);
+ return getConstantPoolLoadCost(Ty, CostKind);
};
// Add the cost of materializing any constant vectors required.
@@ -1369,6 +1605,55 @@ InstructionCost RISCVTTIImpl::getArithmeticInstrCost(
}
}
+// TODO: Deduplicate from TargetTransformInfoImplCRTPBase.
+InstructionCost RISCVTTIImpl::getPointersChainCost(
+ ArrayRef<const Value *> Ptrs, const Value *Base,
+ const TTI::PointersChainInfo &Info, Type *AccessTy,
+ TTI::TargetCostKind CostKind) {
+ InstructionCost Cost = TTI::TCC_Free;
+ // In the basic model we take into account GEP instructions only
+ // (although here can come alloca instruction, a value, constants and/or
+ // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
+ // pointer). Typically, if Base is a not a GEP-instruction and all the
+ // pointers are relative to the same base address, all the rest are
+ // either GEP instructions, PHIs, bitcasts or constants. When we have same
+ // base, we just calculate cost of each non-Base GEP as an ADD operation if
+ // any their index is a non-const.
+ // If no known dependecies between the pointers cost is calculated as a sum
+ // of costs of GEP instructions.
+ for (auto [I, V] : enumerate(Ptrs)) {
+ const auto *GEP = dyn_cast<GetElementPtrInst>(V);
+ if (!GEP)
+ continue;
+ if (Info.isSameBase() && V != Base) {
+ if (GEP->hasAllConstantIndices())
+ continue;
+ // If the chain is unit-stride and BaseReg + stride*i is a legal
+ // addressing mode, then presume the base GEP is sitting around in a
+ // register somewhere and check if we can fold the offset relative to
+ // it.
+ unsigned Stride = DL.getTypeStoreSize(AccessTy);
+ if (Info.isUnitStride() &&
+ isLegalAddressingMode(AccessTy,
+ /* BaseGV */ nullptr,
+ /* BaseOffset */ Stride * I,
+ /* HasBaseReg */ true,
+ /* Scale */ 0,
+ GEP->getType()->getPointerAddressSpace()))
+ continue;
+ Cost += getArithmeticInstrCost(Instruction::Add, GEP->getType(), CostKind,
+ {TTI::OK_AnyValue, TTI::OP_None},
+ {TTI::OK_AnyValue, TTI::OP_None},
+ std::nullopt);
+ } else {
+ SmallVector<const Value *> Indices(GEP->indices());
+ Cost += getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(),
+ Indices, AccessTy, CostKind);
+ }
+ }
+ return Cost;
+}
+
void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE) {
@@ -1466,17 +1751,24 @@ unsigned RISCVTTIImpl::getRegUsageForType(Type *Ty) {
}
unsigned RISCVTTIImpl::getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
- // This interface is currently only used by SLP. Returning 1 (which is the
- // default value for SLPMaxVF) disables SLP. We currently have a cost modeling
- // problem w/ constant materialization which causes SLP to perform majorly
- // unprofitable transformations.
- // TODO: Figure out constant materialization cost modeling and remove.
- return SLPMaxVF;
+ if (SLPMaxVF.getNumOccurrences())
+ return SLPMaxVF;
+
+ // Return how many elements can fit in getRegisterBitwidth. This is the
+ // same routine as used in LoopVectorizer. We should probably be
+ // accounting for whether we actually have instructions with the right
+ // lane type, but we don't have enough information to do that without
+ // some additional plumbing which hasn't been justified yet.
+ TypeSize RegWidth =
+ getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
+ // If no vector registers, or absurd element widths, disable
+ // vectorization by returning 1.
+ return std::max<unsigned>(1U, RegWidth.getFixedValue() / ElemWidth);
}
bool RISCVTTIImpl::isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
const TargetTransformInfo::LSRCost &C2) {
- // RISCV specific here are "instruction number 1st priority".
+ // RISC-V specific here are "instruction number 1st priority".
return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
C1.NumIVMuls, C1.NumBaseAdds,
C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 2bde679c184b..7ffcb4828d0c 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -51,6 +51,10 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
/// Return the cost of LMUL. The larger the LMUL, the higher the cost.
InstructionCost getLMULCost(MVT VT);
+ /// Return the cost of accessing a constant pool entry of the specified
+ /// type.
+ InstructionCost getConstantPoolLoadCost(Type *Ty,
+ TTI::TargetCostKind CostKind);
public:
explicit RISCVTTIImpl(const RISCVTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
@@ -75,10 +79,12 @@ public:
bool shouldExpandReduction(const IntrinsicInst *II) const;
bool supportsScalableVectors() const { return ST->hasVInstructions(); }
+ bool enableOrderedReductions() const { return true; }
bool enableScalableVectorization() const { return ST->hasVInstructions(); }
- PredicationStyle emitGetActiveLaneMask() const {
- return ST->hasVInstructions() ? PredicationStyle::Data
- : PredicationStyle::None;
+ TailFoldingStyle
+ getPreferredTailFoldingStyle(bool IVUpdateMayOverflow) const {
+ return ST->hasVInstructions() ? TailFoldingStyle::Data
+ : TailFoldingStyle::DataWithoutLaneMask;
}
std::optional<unsigned> getMaxVScale() const;
std::optional<unsigned> getVScaleForTuning() const;
@@ -100,6 +106,12 @@ public:
Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind);
+ InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
+ const Value *Base,
+ const TTI::PointersChainInfo &Info,
+ Type *AccessTy,
+ TTI::TargetCostKind CostKind);
+
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP,
OptimizationRemarkEmitter *ORE);
@@ -111,7 +123,8 @@ public:
return ST->useRVVForFixedLengthVectors() ? 16 : 0;
}
- InstructionCost getSpliceCost(VectorType *Tp, int Index);
+ InstructionCost getVRGatherVVCost(MVT VT);
+
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
ArrayRef<int> Mask,
TTI::TargetCostKind CostKind, int Index,
@@ -121,6 +134,11 @@ public:
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind);
+ InstructionCost getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false);
+
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
const Value *Ptr, bool VariableMask,
Align Alignment,
@@ -132,8 +150,8 @@ public:
TTI::TargetCostKind CostKind,
const Instruction *I = nullptr);
- InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned,
+ InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind);
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
@@ -142,7 +160,7 @@ public:
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
Type *ResTy, VectorType *ValTy,
- std::optional<FastMathFlags> FMF,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind);
InstructionCost
@@ -169,28 +187,25 @@ public:
const Instruction *CxtI = nullptr);
bool isElementTypeLegalForScalableVector(Type *Ty) const {
- return TLI->isLegalElementTypeForRVV(Ty);
+ return TLI->isLegalElementTypeForRVV(TLI->getValueType(DL, Ty));
}
bool isLegalMaskedLoadStore(Type *DataType, Align Alignment) {
if (!ST->hasVInstructions())
return false;
+ EVT DataTypeVT = TLI->getValueType(DL, DataType);
+
// Only support fixed vectors if we know the minimum vector size.
- if (isa<FixedVectorType>(DataType) && !ST->useRVVForFixedLengthVectors())
+ if (DataTypeVT.isFixedLengthVector() && !ST->useRVVForFixedLengthVectors())
return false;
- // Don't allow elements larger than the ELEN.
- // FIXME: How to limit for scalable vectors?
- if (isa<FixedVectorType>(DataType) &&
- DataType->getScalarSizeInBits() > ST->getELEN())
+ EVT ElemType = DataTypeVT.getScalarType();
+ if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
return false;
- if (Alignment <
- DL.getTypeStoreSize(DataType->getScalarType()).getFixedValue())
- return false;
+ return TLI->isLegalElementTypeForRVV(ElemType);
- return TLI->isLegalElementTypeForRVV(DataType->getScalarType());
}
bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
@@ -204,21 +219,17 @@ public:
if (!ST->hasVInstructions())
return false;
- // Only support fixed vectors if we know the minimum vector size.
- if (isa<FixedVectorType>(DataType) && !ST->useRVVForFixedLengthVectors())
- return false;
+ EVT DataTypeVT = TLI->getValueType(DL, DataType);
- // Don't allow elements larger than the ELEN.
- // FIXME: How to limit for scalable vectors?
- if (isa<FixedVectorType>(DataType) &&
- DataType->getScalarSizeInBits() > ST->getELEN())
+ // Only support fixed vectors if we know the minimum vector size.
+ if (DataTypeVT.isFixedLengthVector() && !ST->useRVVForFixedLengthVectors())
return false;
- if (Alignment <
- DL.getTypeStoreSize(DataType->getScalarType()).getFixedValue())
+ EVT ElemType = DataTypeVT.getScalarType();
+ if (!ST->enableUnalignedVectorMem() && Alignment < ElemType.getStoreSize())
return false;
- return TLI->isLegalElementTypeForRVV(DataType->getScalarType());
+ return TLI->isLegalElementTypeForRVV(ElemType);
}
bool isLegalMaskedGather(Type *DataType, Align Alignment) {
@@ -238,6 +249,10 @@ public:
return ST->is64Bit() && !ST->hasVInstructionsI64();
}
+ bool isVScaleKnownToBeAPowerOfTwo() const {
+ return TLI->isVScaleKnownToBeAPowerOfTwo();
+ }
+
/// \returns How the target needs this vector-predicated operation to be
/// transformed.
TargetTransformInfo::VPLegalization
@@ -258,7 +273,7 @@ public:
return true;
Type *Ty = RdxDesc.getRecurrenceType();
- if (!TLI->isLegalElementTypeForRVV(Ty))
+ if (!TLI->isLegalElementTypeForRVV(TLI->getValueType(DL, Ty)))
return false;
switch (RdxDesc.getRecurrenceKind()) {
@@ -282,12 +297,17 @@ public:
}
}
- unsigned getMaxInterleaveFactor(unsigned VF) {
+ unsigned getMaxInterleaveFactor(ElementCount VF) {
+ // Don't interleave if the loop has been vectorized with scalable vectors.
+ if (VF.isScalable())
+ return 1;
// If the loop will not be vectorized, don't interleave the loop.
// Let regular unroll to unroll the loop.
- return VF == 1 ? 1 : ST->getMaxInterleaveFactor();
+ return VF.isScalar() ? 1 : ST->getMaxInterleaveFactor();
}
+ bool enableInterleavedAccessVectorization() { return true; }
+
enum RISCVRegisterClass { GPRRC, FPRRC, VRRC };
unsigned getNumberOfRegisters(unsigned ClassID) const {
switch (ClassID) {
diff --git a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
index 7b63b060dd9c..0a675d684912 100644
--- a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
+++ b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- RISCVTargetInfo.cpp - RISCV Target Implementation -----------------===//
+//===-- RISCVTargetInfo.cpp - RISC-V Target Implementation ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
index ef3d9d116efa..ed00a01fa1a2 100644
--- a/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
+++ b/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.h
@@ -1,4 +1,4 @@
-//===-- RISCVTargetInfo.h - RISCV Target Implementation ---------*- C++ -*-===//
+//===-- RISCVTargetInfo.h - RISC-V Target Implementation --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp
index 2f3462f419e5..800bf2297fa7 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVMCAsmInfo.cpp
@@ -11,7 +11,7 @@
//===----------------------------------------------------------------------===//
#include "SPIRVMCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.h b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.h
index 2cc8f50aba67..842958695e10 100644
--- a/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.h
+++ b/llvm/lib/Target/SPIRV/MCTargetDesc/SPIRVTargetStreamer.h
@@ -21,7 +21,7 @@ public:
~SPIRVTargetStreamer() override;
void changeSection(const MCSection *CurSection, MCSection *Section,
- const MCExpr *SubSection, raw_ostream &OS) override{};
+ const MCExpr *SubSection, raw_ostream &OS) override {}
};
} // namespace llvm
diff --git a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
index 2e822a318ea6..d07c0bcdf9af 100644
--- a/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp
@@ -134,8 +134,6 @@ void SPIRVAsmPrinter::emitFunctionBodyEnd() {
}
void SPIRVAsmPrinter::emitOpLabel(const MachineBasicBlock &MBB) {
- if (MAI->MBBsToSkip.contains(&MBB))
- return;
MCInst LabelInst;
LabelInst.setOpcode(SPIRV::OpLabel);
LabelInst.addOperand(MCOperand::createReg(MAI->getOrCreateMBBRegister(MBB)));
@@ -143,6 +141,8 @@ void SPIRVAsmPrinter::emitOpLabel(const MachineBasicBlock &MBB) {
}
void SPIRVAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
+ assert(!MBB.empty() && "MBB is empty!");
+
// If it's the first MBB in MF, it has OpFunction and OpFunctionParameter, so
// OpLabel should be output after them.
if (MBB.getNumber() == MF->front().getNumber()) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 2f44c999e5a2..c53f1643adc0 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -14,6 +14,7 @@
#include "SPIRVBuiltins.h"
#include "SPIRV.h"
#include "SPIRVUtils.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicsSPIRV.h"
#include <string>
@@ -291,6 +292,7 @@ buildBoolRegister(MachineIRBuilder &MIRBuilder, const SPIRVType *ResultType,
Register ResultRegister =
MIRBuilder.getMRI()->createGenericVirtualRegister(Type);
+ MIRBuilder.getMRI()->setRegClass(ResultRegister, &SPIRV::IDRegClass);
GR->assignSPIRVTypeToVReg(BoolType, ResultRegister, MIRBuilder.getMF());
return std::make_tuple(ResultRegister, BoolType);
}
@@ -305,7 +307,7 @@ static bool buildSelectInst(MachineIRBuilder &MIRBuilder,
if (ReturnType->getOpcode() == SPIRV::OpTypeVector) {
unsigned Bits = GR->getScalarOrVectorBitWidth(ReturnType);
- uint64_t AllOnes = APInt::getAllOnesValue(Bits).getZExtValue();
+ uint64_t AllOnes = APInt::getAllOnes(Bits).getZExtValue();
TrueConst = GR->getOrCreateConsIntVector(AllOnes, MIRBuilder, ReturnType);
FalseConst = GR->getOrCreateConsIntVector(0, MIRBuilder, ReturnType);
} else {
@@ -417,33 +419,41 @@ static Register buildConstantIntReg(uint64_t Val, MachineIRBuilder &MIRBuilder,
}
static Register buildScopeReg(Register CLScopeRegister,
+ SPIRV::Scope::Scope Scope,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR,
- const MachineRegisterInfo *MRI) {
- auto CLScope =
- static_cast<SPIRV::CLMemoryScope>(getIConstVal(CLScopeRegister, MRI));
- SPIRV::Scope::Scope Scope = getSPIRVScope(CLScope);
-
- if (CLScope == static_cast<unsigned>(Scope))
- return CLScopeRegister;
-
+ MachineRegisterInfo *MRI) {
+ if (CLScopeRegister.isValid()) {
+ auto CLScope =
+ static_cast<SPIRV::CLMemoryScope>(getIConstVal(CLScopeRegister, MRI));
+ Scope = getSPIRVScope(CLScope);
+
+ if (CLScope == static_cast<unsigned>(Scope)) {
+ MRI->setRegClass(CLScopeRegister, &SPIRV::IDRegClass);
+ return CLScopeRegister;
+ }
+ }
return buildConstantIntReg(Scope, MIRBuilder, GR);
}
static Register buildMemSemanticsReg(Register SemanticsRegister,
- Register PtrRegister,
- const MachineRegisterInfo *MRI,
+ Register PtrRegister, unsigned &Semantics,
+ MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
- std::memory_order Order =
- static_cast<std::memory_order>(getIConstVal(SemanticsRegister, MRI));
- unsigned Semantics =
- getSPIRVMemSemantics(Order) |
- getMemSemanticsForStorageClass(GR->getPointerStorageClass(PtrRegister));
-
- if (Order == Semantics)
- return SemanticsRegister;
+ if (SemanticsRegister.isValid()) {
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+ std::memory_order Order =
+ static_cast<std::memory_order>(getIConstVal(SemanticsRegister, MRI));
+ Semantics =
+ getSPIRVMemSemantics(Order) |
+ getMemSemanticsForStorageClass(GR->getPointerStorageClass(PtrRegister));
- return Register();
+ if (Order == Semantics) {
+ MRI->setRegClass(SemanticsRegister, &SPIRV::IDRegClass);
+ return SemanticsRegister;
+ }
+ }
+ return buildConstantIntReg(Semantics, MIRBuilder, GR);
}
/// Helper function for translating atomic init to OpStore.
@@ -451,7 +461,8 @@ static bool buildAtomicInitInst(const SPIRV::IncomingCall *Call,
MachineIRBuilder &MIRBuilder) {
assert(Call->Arguments.size() == 2 &&
"Need 2 arguments for atomic init translation");
-
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[0], &SPIRV::IDRegClass);
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[1], &SPIRV::IDRegClass);
MIRBuilder.buildInstr(SPIRV::OpStore)
.addUse(Call->Arguments[0])
.addUse(Call->Arguments[1]);
@@ -463,19 +474,22 @@ static bool buildAtomicLoadInst(const SPIRV::IncomingCall *Call,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
Register PtrRegister = Call->Arguments[0];
+ MIRBuilder.getMRI()->setRegClass(PtrRegister, &SPIRV::IDRegClass);
// TODO: if true insert call to __translate_ocl_memory_sccope before
// OpAtomicLoad and the function implementation. We can use Translator's
// output for transcoding/atomic_explicit_arguments.cl as an example.
Register ScopeRegister;
- if (Call->Arguments.size() > 1)
+ if (Call->Arguments.size() > 1) {
ScopeRegister = Call->Arguments[1];
- else
+ MIRBuilder.getMRI()->setRegClass(ScopeRegister, &SPIRV::IDRegClass);
+ } else
ScopeRegister = buildConstantIntReg(SPIRV::Scope::Device, MIRBuilder, GR);
Register MemSemanticsReg;
if (Call->Arguments.size() > 2) {
// TODO: Insert call to __translate_ocl_memory_order before OpAtomicLoad.
MemSemanticsReg = Call->Arguments[2];
+ MIRBuilder.getMRI()->setRegClass(MemSemanticsReg, &SPIRV::IDRegClass);
} else {
int Semantics =
SPIRV::MemorySemantics::SequentiallyConsistent |
@@ -499,11 +513,12 @@ static bool buildAtomicStoreInst(const SPIRV::IncomingCall *Call,
Register ScopeRegister =
buildConstantIntReg(SPIRV::Scope::Device, MIRBuilder, GR);
Register PtrRegister = Call->Arguments[0];
+ MIRBuilder.getMRI()->setRegClass(PtrRegister, &SPIRV::IDRegClass);
int Semantics =
SPIRV::MemorySemantics::SequentiallyConsistent |
getMemSemanticsForStorageClass(GR->getPointerStorageClass(PtrRegister));
Register MemSemanticsReg = buildConstantIntReg(Semantics, MIRBuilder, GR);
-
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[1], &SPIRV::IDRegClass);
MIRBuilder.buildInstr(SPIRV::OpAtomicStore)
.addUse(PtrRegister)
.addUse(ScopeRegister)
@@ -525,6 +540,9 @@ static bool buildAtomicCompareExchangeInst(const SPIRV::IncomingCall *Call,
Register ObjectPtr = Call->Arguments[0]; // Pointer (volatile A *object.)
Register ExpectedArg = Call->Arguments[1]; // Comparator (C* expected).
Register Desired = Call->Arguments[2]; // Value (C Desired).
+ MRI->setRegClass(ObjectPtr, &SPIRV::IDRegClass);
+ MRI->setRegClass(ExpectedArg, &SPIRV::IDRegClass);
+ MRI->setRegClass(Desired, &SPIRV::IDRegClass);
SPIRVType *SpvDesiredTy = GR->getSPIRVTypeForVReg(Desired);
LLT DesiredLLT = MRI->getType(Desired);
@@ -564,6 +582,8 @@ static bool buildAtomicCompareExchangeInst(const SPIRV::IncomingCall *Call,
MemSemEqualReg = Call->Arguments[3];
if (MemOrdNeq == MemSemEqual)
MemSemUnequalReg = Call->Arguments[4];
+ MRI->setRegClass(Call->Arguments[3], &SPIRV::IDRegClass);
+ MRI->setRegClass(Call->Arguments[4], &SPIRV::IDRegClass);
}
if (!MemSemEqualReg.isValid())
MemSemEqualReg = buildConstantIntReg(MemSemEqual, MIRBuilder, GR);
@@ -580,6 +600,7 @@ static bool buildAtomicCompareExchangeInst(const SPIRV::IncomingCall *Call,
Scope = getSPIRVScope(ClScope);
if (ClScope == static_cast<unsigned>(Scope))
ScopeReg = Call->Arguments[5];
+ MRI->setRegClass(Call->Arguments[5], &SPIRV::IDRegClass);
}
if (!ScopeReg.isValid())
ScopeReg = buildConstantIntReg(Scope, MIRBuilder, GR);
@@ -591,6 +612,8 @@ static bool buildAtomicCompareExchangeInst(const SPIRV::IncomingCall *Call,
MRI->setType(Expected, DesiredLLT);
Register Tmp = !IsCmpxchg ? MRI->createGenericVirtualRegister(DesiredLLT)
: Call->ReturnRegister;
+ if (!MRI->getRegClassOrNull(Tmp))
+ MRI->setRegClass(Tmp, &SPIRV::IDRegClass);
GR->assignSPIRVTypeToVReg(SpvDesiredTy, Tmp, MIRBuilder.getMF());
SPIRVType *IntTy = GR->getOrCreateSPIRVIntegerType(32, MIRBuilder);
@@ -614,30 +637,23 @@ static bool buildAtomicCompareExchangeInst(const SPIRV::IncomingCall *Call,
static bool buildAtomicRMWInst(const SPIRV::IncomingCall *Call, unsigned Opcode,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
- const MachineRegisterInfo *MRI = MIRBuilder.getMRI();
- SPIRV::Scope::Scope Scope = SPIRV::Scope::Workgroup;
- Register ScopeRegister;
-
- if (Call->Arguments.size() >= 4) {
- assert(Call->Arguments.size() == 4 &&
- "Too many args for explicit atomic RMW");
- ScopeRegister = buildScopeReg(Call->Arguments[3], MIRBuilder, GR, MRI);
- }
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+ Register ScopeRegister =
+ Call->Arguments.size() >= 4 ? Call->Arguments[3] : Register();
- if (!ScopeRegister.isValid())
- ScopeRegister = buildConstantIntReg(Scope, MIRBuilder, GR);
+ assert(Call->Arguments.size() <= 4 &&
+ "Too many args for explicit atomic RMW");
+ ScopeRegister = buildScopeReg(ScopeRegister, SPIRV::Scope::Workgroup,
+ MIRBuilder, GR, MRI);
Register PtrRegister = Call->Arguments[0];
unsigned Semantics = SPIRV::MemorySemantics::None;
- Register MemSemanticsReg;
-
- if (Call->Arguments.size() >= 3)
- MemSemanticsReg =
- buildMemSemanticsReg(Call->Arguments[2], PtrRegister, MRI, GR);
-
- if (!MemSemanticsReg.isValid())
- MemSemanticsReg = buildConstantIntReg(Semantics, MIRBuilder, GR);
-
+ MRI->setRegClass(PtrRegister, &SPIRV::IDRegClass);
+ Register MemSemanticsReg =
+ Call->Arguments.size() >= 3 ? Call->Arguments[2] : Register();
+ MemSemanticsReg = buildMemSemanticsReg(MemSemanticsReg, PtrRegister,
+ Semantics, MIRBuilder, GR);
+ MRI->setRegClass(Call->Arguments[1], &SPIRV::IDRegClass);
MIRBuilder.buildInstr(Opcode)
.addDef(Call->ReturnRegister)
.addUse(GR->getSPIRVTypeID(Call->ReturnType))
@@ -653,32 +669,23 @@ static bool buildAtomicRMWInst(const SPIRV::IncomingCall *Call, unsigned Opcode,
static bool buildAtomicFlagInst(const SPIRV::IncomingCall *Call,
unsigned Opcode, MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
- const MachineRegisterInfo *MRI = MIRBuilder.getMRI();
-
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
Register PtrRegister = Call->Arguments[0];
unsigned Semantics = SPIRV::MemorySemantics::SequentiallyConsistent;
- Register MemSemanticsReg;
-
- if (Call->Arguments.size() >= 2)
- MemSemanticsReg =
- buildMemSemanticsReg(Call->Arguments[1], PtrRegister, MRI, GR);
-
- if (!MemSemanticsReg.isValid())
- MemSemanticsReg = buildConstantIntReg(Semantics, MIRBuilder, GR);
+ Register MemSemanticsReg =
+ Call->Arguments.size() >= 2 ? Call->Arguments[1] : Register();
+ MemSemanticsReg = buildMemSemanticsReg(MemSemanticsReg, PtrRegister,
+ Semantics, MIRBuilder, GR);
assert((Opcode != SPIRV::OpAtomicFlagClear ||
(Semantics != SPIRV::MemorySemantics::Acquire &&
Semantics != SPIRV::MemorySemantics::AcquireRelease)) &&
"Invalid memory order argument!");
- SPIRV::Scope::Scope Scope = SPIRV::Scope::Device;
- Register ScopeRegister;
-
- if (Call->Arguments.size() >= 3)
- ScopeRegister = buildScopeReg(Call->Arguments[2], MIRBuilder, GR, MRI);
-
- if (!ScopeRegister.isValid())
- ScopeRegister = buildConstantIntReg(Scope, MIRBuilder, GR);
+ Register ScopeRegister =
+ Call->Arguments.size() >= 3 ? Call->Arguments[2] : Register();
+ ScopeRegister =
+ buildScopeReg(ScopeRegister, SPIRV::Scope::Device, MIRBuilder, GR, MRI);
auto MIB = MIRBuilder.buildInstr(Opcode);
if (Opcode == SPIRV::OpAtomicFlagTestAndSet)
@@ -694,7 +701,7 @@ static bool buildAtomicFlagInst(const SPIRV::IncomingCall *Call,
static bool buildBarrierInst(const SPIRV::IncomingCall *Call, unsigned Opcode,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
- const MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
unsigned MemFlags = getIConstVal(Call->Arguments[0], MRI);
unsigned MemSemantics = SPIRV::MemorySemantics::None;
@@ -716,9 +723,10 @@ static bool buildBarrierInst(const SPIRV::IncomingCall *Call, unsigned Opcode,
}
Register MemSemanticsReg;
- if (MemFlags == MemSemantics)
+ if (MemFlags == MemSemantics) {
MemSemanticsReg = Call->Arguments[0];
- else
+ MRI->setRegClass(MemSemanticsReg, &SPIRV::IDRegClass);
+ } else
MemSemanticsReg = buildConstantIntReg(MemSemantics, MIRBuilder, GR);
Register ScopeReg;
@@ -738,8 +746,10 @@ static bool buildBarrierInst(const SPIRV::IncomingCall *Call, unsigned Opcode,
(Opcode == SPIRV::OpMemoryBarrier))
Scope = MemScope;
- if (CLScope == static_cast<unsigned>(Scope))
+ if (CLScope == static_cast<unsigned>(Scope)) {
ScopeReg = Call->Arguments[1];
+ MRI->setRegClass(ScopeReg, &SPIRV::IDRegClass);
+ }
}
if (!ScopeReg.isValid())
@@ -834,7 +844,7 @@ static bool generateGroupInst(const SPIRV::IncomingCall *Call,
const SPIRV::DemangledBuiltin *Builtin = Call->Builtin;
const SPIRV::GroupBuiltin *GroupBuiltin =
SPIRV::lookupGroupBuiltin(Builtin->Name);
- const MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
Register Arg0;
if (GroupBuiltin->HasBoolArg) {
Register ConstRegister = Call->Arguments[0];
@@ -876,8 +886,11 @@ static bool generateGroupInst(const SPIRV::IncomingCall *Call,
MIB.addImm(GroupBuiltin->GroupOperation);
if (Call->Arguments.size() > 0) {
MIB.addUse(Arg0.isValid() ? Arg0 : Call->Arguments[0]);
- for (unsigned i = 1; i < Call->Arguments.size(); i++)
+ MRI->setRegClass(Call->Arguments[0], &SPIRV::IDRegClass);
+ for (unsigned i = 1; i < Call->Arguments.size(); i++) {
MIB.addUse(Call->Arguments[i]);
+ MRI->setRegClass(Call->Arguments[i], &SPIRV::IDRegClass);
+ }
}
// Build select instruction.
@@ -936,16 +949,17 @@ static bool genWorkgroupQuery(const SPIRV::IncomingCall *Call,
// If it's out of range (max dimension is 3), we can just return the constant
// default value (0 or 1 depending on which query function).
if (IsConstantIndex && getIConstVal(IndexRegister, MRI) >= 3) {
- Register defaultReg = Call->ReturnRegister;
+ Register DefaultReg = Call->ReturnRegister;
if (PointerSize != ResultWidth) {
- defaultReg = MRI->createGenericVirtualRegister(LLT::scalar(PointerSize));
- GR->assignSPIRVTypeToVReg(PointerSizeType, defaultReg,
+ DefaultReg = MRI->createGenericVirtualRegister(LLT::scalar(PointerSize));
+ MRI->setRegClass(DefaultReg, &SPIRV::IDRegClass);
+ GR->assignSPIRVTypeToVReg(PointerSizeType, DefaultReg,
MIRBuilder.getMF());
- ToTruncate = defaultReg;
+ ToTruncate = DefaultReg;
}
auto NewRegister =
GR->buildConstantInt(DefaultValue, MIRBuilder, PointerSizeType);
- MIRBuilder.buildCopy(defaultReg, NewRegister);
+ MIRBuilder.buildCopy(DefaultReg, NewRegister);
} else { // If it could be in range, we need to load from the given builtin.
auto Vec3Ty =
GR->getOrCreateSPIRVVectorType(PointerSizeType, 3, MIRBuilder);
@@ -956,6 +970,7 @@ static bool genWorkgroupQuery(const SPIRV::IncomingCall *Call,
Register Extracted = Call->ReturnRegister;
if (!IsConstantIndex || PointerSize != ResultWidth) {
Extracted = MRI->createGenericVirtualRegister(LLT::scalar(PointerSize));
+ MRI->setRegClass(Extracted, &SPIRV::IDRegClass);
GR->assignSPIRVTypeToVReg(PointerSizeType, Extracted, MIRBuilder.getMF());
}
// Use Intrinsic::spv_extractelt so dynamic vs static extraction is
@@ -974,6 +989,7 @@ static bool genWorkgroupQuery(const SPIRV::IncomingCall *Call,
Register CompareRegister =
MRI->createGenericVirtualRegister(LLT::scalar(1));
+ MRI->setRegClass(CompareRegister, &SPIRV::IDRegClass);
GR->assignSPIRVTypeToVReg(BoolType, CompareRegister, MIRBuilder.getMF());
// Use G_ICMP to check if idxVReg < 3.
@@ -990,6 +1006,7 @@ static bool genWorkgroupQuery(const SPIRV::IncomingCall *Call,
if (PointerSize != ResultWidth) {
SelectionResult =
MRI->createGenericVirtualRegister(LLT::scalar(PointerSize));
+ MRI->setRegClass(SelectionResult, &SPIRV::IDRegClass);
GR->assignSPIRVTypeToVReg(PointerSizeType, SelectionResult,
MIRBuilder.getMF());
}
@@ -1125,6 +1142,7 @@ static bool generateImageSizeQueryInst(const SPIRV::IncomingCall *Call,
if (NumExpectedRetComponents != NumActualRetComponents) {
QueryResult = MIRBuilder.getMRI()->createGenericVirtualRegister(
LLT::fixed_vector(NumActualRetComponents, 32));
+ MIRBuilder.getMRI()->setRegClass(QueryResult, &SPIRV::IDRegClass);
SPIRVType *IntTy = GR->getOrCreateSPIRVIntegerType(32, MIRBuilder);
QueryResultType = GR->getOrCreateSPIRVVectorType(
IntTy, NumActualRetComponents, MIRBuilder);
@@ -1133,6 +1151,7 @@ static bool generateImageSizeQueryInst(const SPIRV::IncomingCall *Call,
bool IsDimBuf = ImgType->getOperand(2).getImm() == SPIRV::Dim::DIM_Buffer;
unsigned Opcode =
IsDimBuf ? SPIRV::OpImageQuerySize : SPIRV::OpImageQuerySizeLod;
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[0], &SPIRV::IDRegClass);
auto MIB = MIRBuilder.buildInstr(Opcode)
.addDef(QueryResult)
.addUse(GR->getSPIRVTypeID(QueryResultType))
@@ -1177,6 +1196,7 @@ static bool generateImageMiscQueryInst(const SPIRV::IncomingCall *Call,
SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode;
Register Image = Call->Arguments[0];
+ MIRBuilder.getMRI()->setRegClass(Image, &SPIRV::IDRegClass);
SPIRV::Dim::Dim ImageDimensionality = static_cast<SPIRV::Dim::Dim>(
GR->getSPIRVTypeForVReg(Image)->getOperand(2).getImm());
@@ -1239,8 +1259,13 @@ static bool generateReadImageInst(const StringRef DemangledCall,
SPIRVGlobalRegistry *GR) {
Register Image = Call->Arguments[0];
MachineRegisterInfo *MRI = MIRBuilder.getMRI();
-
- if (DemangledCall.contains_insensitive("ocl_sampler")) {
+ MRI->setRegClass(Image, &SPIRV::IDRegClass);
+ MRI->setRegClass(Call->Arguments[1], &SPIRV::IDRegClass);
+ bool HasOclSampler = DemangledCall.contains_insensitive("ocl_sampler");
+ bool HasMsaa = DemangledCall.contains_insensitive("msaa");
+ if (HasOclSampler || HasMsaa)
+ MRI->setRegClass(Call->Arguments[2], &SPIRV::IDRegClass);
+ if (HasOclSampler) {
Register Sampler = Call->Arguments[1];
if (!GR->isScalarOfType(Sampler, SPIRV::OpTypeSampler) &&
@@ -1274,6 +1299,7 @@ static bool generateReadImageInst(const StringRef DemangledCall,
}
LLT LLType = LLT::scalar(GR->getScalarOrVectorBitWidth(TempType));
Register TempRegister = MRI->createGenericVirtualRegister(LLType);
+ MRI->setRegClass(TempRegister, &SPIRV::IDRegClass);
GR->assignSPIRVTypeToVReg(TempType, TempRegister, MIRBuilder.getMF());
MIRBuilder.buildInstr(SPIRV::OpImageSampleExplicitLod)
@@ -1290,7 +1316,7 @@ static bool generateReadImageInst(const StringRef DemangledCall,
.addUse(GR->getSPIRVTypeID(Call->ReturnType))
.addUse(TempRegister)
.addImm(0);
- } else if (DemangledCall.contains_insensitive("msaa")) {
+ } else if (HasMsaa) {
MIRBuilder.buildInstr(SPIRV::OpImageRead)
.addDef(Call->ReturnRegister)
.addUse(GR->getSPIRVTypeID(Call->ReturnType))
@@ -1311,6 +1337,9 @@ static bool generateReadImageInst(const StringRef DemangledCall,
static bool generateWriteImageInst(const SPIRV::IncomingCall *Call,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[0], &SPIRV::IDRegClass);
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[1], &SPIRV::IDRegClass);
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[2], &SPIRV::IDRegClass);
MIRBuilder.buildInstr(SPIRV::OpImageWrite)
.addUse(Call->Arguments[0]) // Image.
.addUse(Call->Arguments[1]) // Coordinate.
@@ -1322,10 +1351,11 @@ static bool generateSampleImageInst(const StringRef DemangledCall,
const SPIRV::IncomingCall *Call,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
if (Call->Builtin->Name.contains_insensitive(
"__translate_sampler_initializer")) {
// Build sampler literal.
- uint64_t Bitmask = getIConstVal(Call->Arguments[0], MIRBuilder.getMRI());
+ uint64_t Bitmask = getIConstVal(Call->Arguments[0], MRI);
Register Sampler = GR->buildConstantSampler(
Call->ReturnRegister, getSamplerAddressingModeFromBitmask(Bitmask),
getSamplerParamFromBitmask(Bitmask),
@@ -1340,7 +1370,7 @@ static bool generateSampleImageInst(const StringRef DemangledCall,
Register SampledImage =
Call->ReturnRegister.isValid()
? Call->ReturnRegister
- : MIRBuilder.getMRI()->createVirtualRegister(&SPIRV::IDRegClass);
+ : MRI->createVirtualRegister(&SPIRV::IDRegClass);
MIRBuilder.buildInstr(SPIRV::OpSampledImage)
.addDef(SampledImage)
.addUse(GR->getSPIRVTypeID(SampledImageType))
@@ -1356,6 +1386,10 @@ static bool generateSampleImageInst(const StringRef DemangledCall,
ReturnType = ReturnType.substr(0, ReturnType.find('('));
}
SPIRVType *Type = GR->getOrCreateSPIRVTypeByName(ReturnType, MIRBuilder);
+ MRI->setRegClass(Call->Arguments[0], &SPIRV::IDRegClass);
+ MRI->setRegClass(Call->Arguments[1], &SPIRV::IDRegClass);
+ MRI->setRegClass(Call->Arguments[3], &SPIRV::IDRegClass);
+
MIRBuilder.buildInstr(SPIRV::OpImageSampleExplicitLod)
.addDef(Call->ReturnRegister)
.addUse(GR->getSPIRVTypeID(Type))
@@ -1431,6 +1465,75 @@ static bool generateSpecConstantInst(const SPIRV::IncomingCall *Call,
}
}
+static bool buildNDRange(const SPIRV::IncomingCall *Call,
+ MachineIRBuilder &MIRBuilder,
+ SPIRVGlobalRegistry *GR) {
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+ MRI->setRegClass(Call->Arguments[0], &SPIRV::IDRegClass);
+ SPIRVType *PtrType = GR->getSPIRVTypeForVReg(Call->Arguments[0]);
+ assert(PtrType->getOpcode() == SPIRV::OpTypePointer &&
+ PtrType->getOperand(2).isReg());
+ Register TypeReg = PtrType->getOperand(2).getReg();
+ SPIRVType *StructType = GR->getSPIRVTypeForVReg(TypeReg);
+ MachineFunction &MF = MIRBuilder.getMF();
+ Register TmpReg = MRI->createVirtualRegister(&SPIRV::IDRegClass);
+ GR->assignSPIRVTypeToVReg(StructType, TmpReg, MF);
+ // Skip the first arg, it's the destination pointer. OpBuildNDRange takes
+ // three other arguments, so pass zero constant on absence.
+ unsigned NumArgs = Call->Arguments.size();
+ assert(NumArgs >= 2);
+ Register GlobalWorkSize = Call->Arguments[NumArgs < 4 ? 1 : 2];
+ MRI->setRegClass(GlobalWorkSize, &SPIRV::IDRegClass);
+ Register LocalWorkSize =
+ NumArgs == 2 ? Register(0) : Call->Arguments[NumArgs < 4 ? 2 : 3];
+ if (LocalWorkSize.isValid())
+ MRI->setRegClass(LocalWorkSize, &SPIRV::IDRegClass);
+ Register GlobalWorkOffset = NumArgs <= 3 ? Register(0) : Call->Arguments[1];
+ if (GlobalWorkOffset.isValid())
+ MRI->setRegClass(GlobalWorkOffset, &SPIRV::IDRegClass);
+ if (NumArgs < 4) {
+ Register Const;
+ SPIRVType *SpvTy = GR->getSPIRVTypeForVReg(GlobalWorkSize);
+ if (SpvTy->getOpcode() == SPIRV::OpTypePointer) {
+ MachineInstr *DefInstr = MRI->getUniqueVRegDef(GlobalWorkSize);
+ assert(DefInstr && isSpvIntrinsic(*DefInstr, Intrinsic::spv_gep) &&
+ DefInstr->getOperand(3).isReg());
+ Register GWSPtr = DefInstr->getOperand(3).getReg();
+ if (!MRI->getRegClassOrNull(GWSPtr))
+ MRI->setRegClass(GWSPtr, &SPIRV::IDRegClass);
+ // TODO: Maybe simplify generation of the type of the fields.
+ unsigned Size = Call->Builtin->Name.equals("ndrange_3D") ? 3 : 2;
+ unsigned BitWidth = GR->getPointerSize() == 64 ? 64 : 32;
+ Type *BaseTy = IntegerType::get(MF.getFunction().getContext(), BitWidth);
+ Type *FieldTy = ArrayType::get(BaseTy, Size);
+ SPIRVType *SpvFieldTy = GR->getOrCreateSPIRVType(FieldTy, MIRBuilder);
+ GlobalWorkSize = MRI->createVirtualRegister(&SPIRV::IDRegClass);
+ GR->assignSPIRVTypeToVReg(SpvFieldTy, GlobalWorkSize, MF);
+ MIRBuilder.buildInstr(SPIRV::OpLoad)
+ .addDef(GlobalWorkSize)
+ .addUse(GR->getSPIRVTypeID(SpvFieldTy))
+ .addUse(GWSPtr);
+ Const = GR->getOrCreateConsIntArray(0, MIRBuilder, SpvFieldTy);
+ } else {
+ Const = GR->buildConstantInt(0, MIRBuilder, SpvTy);
+ }
+ if (!LocalWorkSize.isValid())
+ LocalWorkSize = Const;
+ if (!GlobalWorkOffset.isValid())
+ GlobalWorkOffset = Const;
+ }
+ assert(LocalWorkSize.isValid() && GlobalWorkOffset.isValid());
+ MIRBuilder.buildInstr(SPIRV::OpBuildNDRange)
+ .addDef(TmpReg)
+ .addUse(TypeReg)
+ .addUse(GlobalWorkSize)
+ .addUse(LocalWorkSize)
+ .addUse(GlobalWorkOffset);
+ return MIRBuilder.buildInstr(SPIRV::OpStore)
+ .addUse(Call->Arguments[0])
+ .addUse(TmpReg);
+}
+
static MachineInstr *getBlockStructInstr(Register ParamReg,
MachineRegisterInfo *MRI) {
// We expect the following sequence of instructions:
@@ -1538,9 +1641,8 @@ static bool buildEnqueueKernel(const SPIRV::IncomingCall *Call,
const SPIRVType *PointerSizeTy = GR->getOrCreateSPIRVPointerType(
Int32Ty, MIRBuilder, SPIRV::StorageClass::Function);
for (unsigned I = 0; I < LocalSizeNum; ++I) {
- Register Reg =
- MIRBuilder.getMRI()->createVirtualRegister(&SPIRV::IDRegClass);
- MIRBuilder.getMRI()->setType(Reg, LLType);
+ Register Reg = MRI->createVirtualRegister(&SPIRV::IDRegClass);
+ MRI->setType(Reg, LLType);
GR->assignSPIRVTypeToVReg(PointerSizeTy, Reg, MIRBuilder.getMF());
auto GEPInst = MIRBuilder.buildIntrinsic(Intrinsic::spv_gep,
ArrayRef<Register>{Reg}, true);
@@ -1587,7 +1689,7 @@ static bool buildEnqueueKernel(const SPIRV::IncomingCall *Call,
MIB.addUse(buildConstantIntReg(DL.getTypeStoreSize(PType), MIRBuilder, GR));
// Param Aligment: Aligment of block literal structure.
MIB.addUse(
- buildConstantIntReg(DL.getPrefTypeAlignment(PType), MIRBuilder, GR));
+ buildConstantIntReg(DL.getPrefTypeAlign(PType).value(), MIRBuilder, GR));
for (unsigned i = 0; i < LocalSizes.size(); i++)
MIB.addUse(LocalSizes[i]);
@@ -1605,6 +1707,7 @@ static bool generateEnqueueInst(const SPIRV::IncomingCall *Call,
switch (Opcode) {
case SPIRV::OpRetainEvent:
case SPIRV::OpReleaseEvent:
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[0], &SPIRV::IDRegClass);
return MIRBuilder.buildInstr(Opcode).addUse(Call->Arguments[0]);
case SPIRV::OpCreateUserEvent:
case SPIRV::OpGetDefaultQueue:
@@ -1612,77 +1715,27 @@ static bool generateEnqueueInst(const SPIRV::IncomingCall *Call,
.addDef(Call->ReturnRegister)
.addUse(GR->getSPIRVTypeID(Call->ReturnType));
case SPIRV::OpIsValidEvent:
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[0], &SPIRV::IDRegClass);
return MIRBuilder.buildInstr(Opcode)
.addDef(Call->ReturnRegister)
.addUse(GR->getSPIRVTypeID(Call->ReturnType))
.addUse(Call->Arguments[0]);
case SPIRV::OpSetUserEventStatus:
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[0], &SPIRV::IDRegClass);
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[1], &SPIRV::IDRegClass);
return MIRBuilder.buildInstr(Opcode)
.addUse(Call->Arguments[0])
.addUse(Call->Arguments[1]);
case SPIRV::OpCaptureEventProfilingInfo:
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[0], &SPIRV::IDRegClass);
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[1], &SPIRV::IDRegClass);
+ MIRBuilder.getMRI()->setRegClass(Call->Arguments[2], &SPIRV::IDRegClass);
return MIRBuilder.buildInstr(Opcode)
.addUse(Call->Arguments[0])
.addUse(Call->Arguments[1])
.addUse(Call->Arguments[2]);
- case SPIRV::OpBuildNDRange: {
- MachineRegisterInfo *MRI = MIRBuilder.getMRI();
- SPIRVType *PtrType = GR->getSPIRVTypeForVReg(Call->Arguments[0]);
- assert(PtrType->getOpcode() == SPIRV::OpTypePointer &&
- PtrType->getOperand(2).isReg());
- Register TypeReg = PtrType->getOperand(2).getReg();
- SPIRVType *StructType = GR->getSPIRVTypeForVReg(TypeReg);
- Register TmpReg = MRI->createVirtualRegister(&SPIRV::IDRegClass);
- GR->assignSPIRVTypeToVReg(StructType, TmpReg, MIRBuilder.getMF());
- // Skip the first arg, it's the destination pointer. OpBuildNDRange takes
- // three other arguments, so pass zero constant on absence.
- unsigned NumArgs = Call->Arguments.size();
- assert(NumArgs >= 2);
- Register GlobalWorkSize = Call->Arguments[NumArgs < 4 ? 1 : 2];
- Register LocalWorkSize =
- NumArgs == 2 ? Register(0) : Call->Arguments[NumArgs < 4 ? 2 : 3];
- Register GlobalWorkOffset = NumArgs <= 3 ? Register(0) : Call->Arguments[1];
- if (NumArgs < 4) {
- Register Const;
- SPIRVType *SpvTy = GR->getSPIRVTypeForVReg(GlobalWorkSize);
- if (SpvTy->getOpcode() == SPIRV::OpTypePointer) {
- MachineInstr *DefInstr = MRI->getUniqueVRegDef(GlobalWorkSize);
- assert(DefInstr && isSpvIntrinsic(*DefInstr, Intrinsic::spv_gep) &&
- DefInstr->getOperand(3).isReg());
- Register GWSPtr = DefInstr->getOperand(3).getReg();
- // TODO: Maybe simplify generation of the type of the fields.
- unsigned Size = Call->Builtin->Name.equals("ndrange_3D") ? 3 : 2;
- unsigned BitWidth = GR->getPointerSize() == 64 ? 64 : 32;
- Type *BaseTy = IntegerType::get(
- MIRBuilder.getMF().getFunction().getContext(), BitWidth);
- Type *FieldTy = ArrayType::get(BaseTy, Size);
- SPIRVType *SpvFieldTy = GR->getOrCreateSPIRVType(FieldTy, MIRBuilder);
- GlobalWorkSize = MRI->createVirtualRegister(&SPIRV::IDRegClass);
- GR->assignSPIRVTypeToVReg(SpvFieldTy, GlobalWorkSize,
- MIRBuilder.getMF());
- MIRBuilder.buildInstr(SPIRV::OpLoad)
- .addDef(GlobalWorkSize)
- .addUse(GR->getSPIRVTypeID(SpvFieldTy))
- .addUse(GWSPtr);
- Const = GR->getOrCreateConsIntArray(0, MIRBuilder, SpvFieldTy);
- } else {
- Const = GR->buildConstantInt(0, MIRBuilder, SpvTy);
- }
- if (!LocalWorkSize.isValid())
- LocalWorkSize = Const;
- if (!GlobalWorkOffset.isValid())
- GlobalWorkOffset = Const;
- }
- MIRBuilder.buildInstr(Opcode)
- .addDef(TmpReg)
- .addUse(TypeReg)
- .addUse(GlobalWorkSize)
- .addUse(LocalWorkSize)
- .addUse(GlobalWorkOffset);
- return MIRBuilder.buildInstr(SPIRV::OpStore)
- .addUse(Call->Arguments[0])
- .addUse(TmpReg);
- }
+ case SPIRV::OpBuildNDRange:
+ return buildNDRange(Call, MIRBuilder, GR);
case SPIRV::OpEnqueueKernel:
return buildEnqueueKernel(Call, MIRBuilder, GR);
default:
@@ -1733,7 +1786,8 @@ static bool generateConvertInst(const StringRef DemangledCall,
SPIRV::Decoration::SaturatedConversion, {});
if (Builtin->IsRounded)
buildOpDecorate(Call->ReturnRegister, MIRBuilder,
- SPIRV::Decoration::FPRoundingMode, {Builtin->RoundingMode});
+ SPIRV::Decoration::FPRoundingMode,
+ {(unsigned)Builtin->RoundingMode});
unsigned Opcode = SPIRV::OpNop;
if (GR->isScalarOrVectorOfType(Call->Arguments[0], SPIRV::OpTypeInt)) {
@@ -1816,16 +1870,23 @@ static bool generateLoadStoreInst(const SPIRV::IncomingCall *Call,
}
// Add a pointer to the value to load/store.
MIB.addUse(Call->Arguments[0]);
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+ MRI->setRegClass(Call->Arguments[0], &SPIRV::IDRegClass);
// Add a value to store.
- if (!IsLoad)
+ if (!IsLoad) {
MIB.addUse(Call->Arguments[1]);
+ MRI->setRegClass(Call->Arguments[1], &SPIRV::IDRegClass);
+ }
// Add optional memory attributes and an alignment.
- MachineRegisterInfo *MRI = MIRBuilder.getMRI();
unsigned NumArgs = Call->Arguments.size();
- if ((IsLoad && NumArgs >= 2) || NumArgs >= 3)
+ if ((IsLoad && NumArgs >= 2) || NumArgs >= 3) {
MIB.addImm(getConstFromIntrinsic(Call->Arguments[IsLoad ? 1 : 2], MRI));
- if ((IsLoad && NumArgs >= 3) || NumArgs >= 4)
+ MRI->setRegClass(Call->Arguments[IsLoad ? 1 : 2], &SPIRV::IDRegClass);
+ }
+ if ((IsLoad && NumArgs >= 3) || NumArgs >= 4) {
MIB.addImm(getConstFromIntrinsic(Call->Arguments[IsLoad ? 2 : 3], MRI));
+ MRI->setRegClass(Call->Arguments[IsLoad ? 2 : 3], &SPIRV::IDRegClass);
+ }
return true;
}
@@ -1845,6 +1906,8 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
SPIRVType *ReturnType = nullptr;
if (OrigRetTy && !OrigRetTy->isVoidTy()) {
ReturnType = GR->assignTypeToVReg(OrigRetTy, OrigRet, MIRBuilder);
+ if (!MIRBuilder.getMRI()->getRegClassOrNull(ReturnRegister))
+ MIRBuilder.getMRI()->setRegClass(ReturnRegister, &SPIRV::IDRegClass);
} else if (OrigRetTy && OrigRetTy->isVoidTy()) {
ReturnRegister = MIRBuilder.getMRI()->createVirtualRegister(&IDRegClass);
MIRBuilder.getMRI()->setType(ReturnRegister, LLT::scalar(32));
@@ -1912,135 +1975,106 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
return false;
}
-struct DemangledType {
+struct BuiltinType {
StringRef Name;
uint32_t Opcode;
};
-#define GET_DemangledTypes_DECL
-#define GET_DemangledTypes_IMPL
+#define GET_BuiltinTypes_DECL
+#define GET_BuiltinTypes_IMPL
-struct ImageType {
+struct OpenCLType {
StringRef Name;
- StringRef SampledType;
- AccessQualifier::AccessQualifier Qualifier;
- Dim::Dim Dimensionality;
- bool Arrayed;
- bool Depth;
- bool Multisampled;
- bool Sampled;
- ImageFormat::ImageFormat Format;
+ StringRef SpirvTypeLiteral;
};
-struct PipeType {
- StringRef Name;
- AccessQualifier::AccessQualifier Qualifier;
-};
+#define GET_OpenCLTypes_DECL
+#define GET_OpenCLTypes_IMPL
-using namespace AccessQualifier;
-using namespace Dim;
-using namespace ImageFormat;
-#define GET_ImageTypes_DECL
-#define GET_ImageTypes_IMPL
-#define GET_PipeTypes_DECL
-#define GET_PipeTypes_IMPL
#include "SPIRVGenTables.inc"
} // namespace SPIRV
//===----------------------------------------------------------------------===//
-// Misc functions for parsing builtin types and looking up implementation
-// details in TableGenerated tables.
+// Misc functions for parsing builtin types.
//===----------------------------------------------------------------------===//
-static const SPIRV::DemangledType *findBuiltinType(StringRef Name) {
- if (Name.startswith("opencl."))
- return SPIRV::lookupBuiltinType(Name);
- if (!Name.startswith("spirv."))
- return nullptr;
- // Some SPIR-V builtin types have a complex list of parameters as part of
- // their name (e.g. spirv.Image._void_1_0_0_0_0_0_0). Those parameters often
- // are numeric literals which cannot be easily represented by TableGen
- // records and should be parsed instead.
- unsigned BaseTypeNameLength =
- Name.contains('_') ? Name.find('_') - 1 : Name.size();
- return SPIRV::lookupBuiltinType(Name.substr(0, BaseTypeNameLength).str());
+static Type *parseTypeString(const StringRef Name, LLVMContext &Context) {
+ if (Name.startswith("void"))
+ return Type::getVoidTy(Context);
+ else if (Name.startswith("int") || Name.startswith("uint"))
+ return Type::getInt32Ty(Context);
+ else if (Name.startswith("float"))
+ return Type::getFloatTy(Context);
+ else if (Name.startswith("half"))
+ return Type::getHalfTy(Context);
+ llvm_unreachable("Unable to recognize type!");
}
-static std::unique_ptr<const SPIRV::ImageType>
-lookupOrParseBuiltinImageType(StringRef Name) {
- if (Name.startswith("opencl.")) {
- // Lookup OpenCL builtin image type lowering details in TableGen records.
- const SPIRV::ImageType *Record = SPIRV::lookupImageType(Name);
- return std::unique_ptr<SPIRV::ImageType>(new SPIRV::ImageType(*Record));
+static const TargetExtType *parseToTargetExtType(const Type *OpaqueType,
+ MachineIRBuilder &MIRBuilder) {
+ assert(isSpecialOpaqueType(OpaqueType) &&
+ "Not a SPIR-V/OpenCL special opaque type!");
+ assert(!OpaqueType->isTargetExtTy() &&
+ "This already is SPIR-V/OpenCL TargetExtType!");
+
+ StringRef NameWithParameters = OpaqueType->getStructName();
+
+ // Pointers-to-opaque-structs representing OpenCL types are first translated
+ // to equivalent SPIR-V types. OpenCL builtin type names should have the
+ // following format: e.g. %opencl.event_t
+ if (NameWithParameters.startswith("opencl.")) {
+ const SPIRV::OpenCLType *OCLTypeRecord =
+ SPIRV::lookupOpenCLType(NameWithParameters);
+ if (!OCLTypeRecord)
+ report_fatal_error("Missing TableGen record for OpenCL type: " +
+ NameWithParameters);
+ NameWithParameters = OCLTypeRecord->SpirvTypeLiteral;
+ // Continue with the SPIR-V builtin type...
}
- if (!Name.startswith("spirv."))
- llvm_unreachable("Unknown builtin image type name/literal");
- // Parse the literals of SPIR-V image builtin parameters. The name should
- // have the following format:
- // spirv.Image._Type_Dim_Depth_Arrayed_MS_Sampled_ImageFormat_AccessQualifier
- // e.g. %spirv.Image._void_1_0_0_0_0_0_0
- StringRef TypeParametersString = Name.substr(strlen("spirv.Image."));
- SmallVector<StringRef> TypeParameters;
- SplitString(TypeParametersString, TypeParameters, "_");
- assert(TypeParameters.size() == 8 &&
- "Wrong number of literals in SPIR-V builtin image type");
-
- StringRef SampledType = TypeParameters[0];
- unsigned Dim, Depth, Arrayed, Multisampled, Sampled, Format, AccessQual;
- bool AreParameterLiteralsValid =
- !(TypeParameters[1].getAsInteger(10, Dim) ||
- TypeParameters[2].getAsInteger(10, Depth) ||
- TypeParameters[3].getAsInteger(10, Arrayed) ||
- TypeParameters[4].getAsInteger(10, Multisampled) ||
- TypeParameters[5].getAsInteger(10, Sampled) ||
- TypeParameters[6].getAsInteger(10, Format) ||
- TypeParameters[7].getAsInteger(10, AccessQual));
- assert(AreParameterLiteralsValid &&
- "Invalid format of SPIR-V image type parameter literals.");
-
- return std::unique_ptr<SPIRV::ImageType>(new SPIRV::ImageType{
- Name, SampledType, SPIRV::AccessQualifier::AccessQualifier(AccessQual),
- SPIRV::Dim::Dim(Dim), static_cast<bool>(Arrayed),
- static_cast<bool>(Depth), static_cast<bool>(Multisampled),
- static_cast<bool>(Sampled), SPIRV::ImageFormat::ImageFormat(Format)});
-}
-static std::unique_ptr<const SPIRV::PipeType>
-lookupOrParseBuiltinPipeType(StringRef Name) {
- if (Name.startswith("opencl.")) {
- // Lookup OpenCL builtin pipe type lowering details in TableGen records.
- const SPIRV::PipeType *Record = SPIRV::lookupPipeType(Name);
- return std::unique_ptr<SPIRV::PipeType>(new SPIRV::PipeType(*Record));
+ // Names of the opaque structs representing a SPIR-V builtins without
+ // parameters should have the following format: e.g. %spirv.Event
+ assert(NameWithParameters.startswith("spirv.") &&
+ "Unknown builtin opaque type!");
+
+ // Parameterized SPIR-V builtins names follow this format:
+ // e.g. %spirv.Image._void_1_0_0_0_0_0_0, %spirv.Pipe._0
+ if (NameWithParameters.find('_') == std::string::npos)
+ return TargetExtType::get(OpaqueType->getContext(), NameWithParameters);
+
+ SmallVector<StringRef> Parameters;
+ unsigned BaseNameLength = NameWithParameters.find('_') - 1;
+ SplitString(NameWithParameters.substr(BaseNameLength + 1), Parameters, "_");
+
+ SmallVector<Type *, 1> TypeParameters;
+ bool HasTypeParameter = !isDigit(Parameters[0][0]);
+ if (HasTypeParameter)
+ TypeParameters.push_back(parseTypeString(
+ Parameters[0], MIRBuilder.getMF().getFunction().getContext()));
+ SmallVector<unsigned> IntParameters;
+ for (unsigned i = HasTypeParameter ? 1 : 0; i < Parameters.size(); i++) {
+ unsigned IntParameter = 0;
+ bool ValidLiteral = !Parameters[i].getAsInteger(10, IntParameter);
+ assert(ValidLiteral &&
+ "Invalid format of SPIR-V builtin parameter literal!");
+ IntParameters.push_back(IntParameter);
}
- if (!Name.startswith("spirv."))
- llvm_unreachable("Unknown builtin pipe type name/literal");
- // Parse the access qualifier literal in the name of the SPIR-V pipe type.
- // The name should have the following format:
- // spirv.Pipe._AccessQualifier
- // e.g. %spirv.Pipe._1
- if (Name.endswith("_0"))
- return std::unique_ptr<SPIRV::PipeType>(
- new SPIRV::PipeType{Name, SPIRV::AccessQualifier::ReadOnly});
- if (Name.endswith("_1"))
- return std::unique_ptr<SPIRV::PipeType>(
- new SPIRV::PipeType{Name, SPIRV::AccessQualifier::WriteOnly});
- if (Name.endswith("_2"))
- return std::unique_ptr<SPIRV::PipeType>(
- new SPIRV::PipeType{Name, SPIRV::AccessQualifier::ReadWrite});
- llvm_unreachable("Unknown pipe type access qualifier literal");
+ return TargetExtType::get(OpaqueType->getContext(),
+ NameWithParameters.substr(0, BaseNameLength),
+ TypeParameters, IntParameters);
}
//===----------------------------------------------------------------------===//
// Implementation functions for builtin types.
//===----------------------------------------------------------------------===//
-static SPIRVType *getNonParametrizedType(const StructType *OpaqueType,
- const SPIRV::DemangledType *TypeRecord,
- MachineIRBuilder &MIRBuilder,
- SPIRVGlobalRegistry *GR) {
+static SPIRVType *getNonParameterizedType(const TargetExtType *ExtensionType,
+ const SPIRV::BuiltinType *TypeRecord,
+ MachineIRBuilder &MIRBuilder,
+ SPIRVGlobalRegistry *GR) {
unsigned Opcode = TypeRecord->Opcode;
// Create or get an existing type from GlobalRegistry.
- return GR->getOrCreateOpTypeByOpcode(OpaqueType, MIRBuilder, Opcode);
+ return GR->getOrCreateOpTypeByOpcode(ExtensionType, MIRBuilder, Opcode);
}
static SPIRVType *getSamplerType(MachineIRBuilder &MIRBuilder,
@@ -2049,78 +2083,87 @@ static SPIRVType *getSamplerType(MachineIRBuilder &MIRBuilder,
return GR->getOrCreateOpTypeSampler(MIRBuilder);
}
-static SPIRVType *getPipeType(const StructType *OpaqueType,
+static SPIRVType *getPipeType(const TargetExtType *ExtensionType,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
- // Lookup pipe type lowering details in TableGen records or parse the
- // name/literal for details.
- std::unique_ptr<const SPIRV::PipeType> Record =
- lookupOrParseBuiltinPipeType(OpaqueType->getName());
+ assert(ExtensionType->getNumIntParameters() == 1 &&
+ "Invalid number of parameters for SPIR-V pipe builtin!");
// Create or get an existing type from GlobalRegistry.
- return GR->getOrCreateOpTypePipe(MIRBuilder, Record.get()->Qualifier);
+ return GR->getOrCreateOpTypePipe(MIRBuilder,
+ SPIRV::AccessQualifier::AccessQualifier(
+ ExtensionType->getIntParameter(0)));
}
static SPIRVType *
-getImageType(const StructType *OpaqueType,
- SPIRV::AccessQualifier::AccessQualifier AccessQual,
+getImageType(const TargetExtType *ExtensionType,
+ const SPIRV::AccessQualifier::AccessQualifier Qualifier,
MachineIRBuilder &MIRBuilder, SPIRVGlobalRegistry *GR) {
- // Lookup image type lowering details in TableGen records or parse the
- // name/literal for details.
- std::unique_ptr<const SPIRV::ImageType> Record =
- lookupOrParseBuiltinImageType(OpaqueType->getName());
-
- SPIRVType *SampledType =
- GR->getOrCreateSPIRVTypeByName(Record.get()->SampledType, MIRBuilder);
+ assert(ExtensionType->getNumTypeParameters() == 1 &&
+ "SPIR-V image builtin type must have sampled type parameter!");
+ const SPIRVType *SampledType =
+ GR->getOrCreateSPIRVType(ExtensionType->getTypeParameter(0), MIRBuilder);
+ assert(ExtensionType->getNumIntParameters() == 7 &&
+ "Invalid number of parameters for SPIR-V image builtin!");
+ // Create or get an existing type from GlobalRegistry.
return GR->getOrCreateOpTypeImage(
- MIRBuilder, SampledType, Record.get()->Dimensionality,
- Record.get()->Depth, Record.get()->Arrayed, Record.get()->Multisampled,
- Record.get()->Sampled, Record.get()->Format,
- AccessQual == SPIRV::AccessQualifier::WriteOnly
+ MIRBuilder, SampledType,
+ SPIRV::Dim::Dim(ExtensionType->getIntParameter(0)),
+ ExtensionType->getIntParameter(1), ExtensionType->getIntParameter(2),
+ ExtensionType->getIntParameter(3), ExtensionType->getIntParameter(4),
+ SPIRV::ImageFormat::ImageFormat(ExtensionType->getIntParameter(5)),
+ Qualifier == SPIRV::AccessQualifier::WriteOnly
? SPIRV::AccessQualifier::WriteOnly
- : Record.get()->Qualifier);
+ : SPIRV::AccessQualifier::AccessQualifier(
+ ExtensionType->getIntParameter(6)));
}
-static SPIRVType *getSampledImageType(const StructType *OpaqueType,
+static SPIRVType *getSampledImageType(const TargetExtType *OpaqueType,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
- StringRef TypeParametersString =
- OpaqueType->getName().substr(strlen("spirv.SampledImage."));
- LLVMContext &Context = MIRBuilder.getMF().getFunction().getContext();
- Type *ImageOpaqueType = StructType::getTypeByName(
- Context, "spirv.Image." + TypeParametersString.str());
- SPIRVType *TargetImageType =
- GR->getOrCreateSPIRVType(ImageOpaqueType, MIRBuilder);
- return GR->getOrCreateOpTypeSampledImage(TargetImageType, MIRBuilder);
+ SPIRVType *OpaqueImageType = getImageType(
+ OpaqueType, SPIRV::AccessQualifier::ReadOnly, MIRBuilder, GR);
+ // Create or get an existing type from GlobalRegistry.
+ return GR->getOrCreateOpTypeSampledImage(OpaqueImageType, MIRBuilder);
}
namespace SPIRV {
-SPIRVType *lowerBuiltinType(const StructType *OpaqueType,
+SPIRVType *lowerBuiltinType(const Type *OpaqueType,
SPIRV::AccessQualifier::AccessQualifier AccessQual,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR) {
- assert(OpaqueType->hasName() &&
- "Structs representing builtin types must have a parsable name");
+ // In LLVM IR, SPIR-V and OpenCL builtin types are represented as either
+ // target(...) target extension types or pointers-to-opaque-structs. The
+ // approach relying on structs is deprecated and works only in the non-opaque
+ // pointer mode (-opaque-pointers=0).
+ // In order to maintain compatibility with LLVM IR generated by older versions
+ // of Clang and LLVM/SPIR-V Translator, the pointers-to-opaque-structs are
+ // "translated" to target extension types. This translation is temporary and
+ // will be removed in the future release of LLVM.
+ const TargetExtType *BuiltinType = dyn_cast<TargetExtType>(OpaqueType);
+ if (!BuiltinType)
+ BuiltinType = parseToTargetExtType(OpaqueType, MIRBuilder);
+
unsigned NumStartingVRegs = MIRBuilder.getMRI()->getNumVirtRegs();
- const StringRef Name = OpaqueType->getName();
+ const StringRef Name = BuiltinType->getName();
LLVM_DEBUG(dbgs() << "Lowering builtin type: " << Name << "\n");
// Lookup the demangled builtin type in the TableGen records.
- const SPIRV::DemangledType *TypeRecord = findBuiltinType(Name);
+ const SPIRV::BuiltinType *TypeRecord = SPIRV::lookupBuiltinType(Name);
if (!TypeRecord)
report_fatal_error("Missing TableGen record for builtin type: " + Name);
// "Lower" the BuiltinType into TargetType. The following get<...>Type methods
- // use the implementation details from TableGen records to either create a new
- // OpType<...> machine instruction or get an existing equivalent SPIRVType
- // from GlobalRegistry.
+ // use the implementation details from TableGen records or TargetExtType
+ // parameters to either create a new OpType<...> machine instruction or get an
+ // existing equivalent SPIRVType from GlobalRegistry.
SPIRVType *TargetType;
switch (TypeRecord->Opcode) {
case SPIRV::OpTypeImage:
- TargetType = getImageType(OpaqueType, AccessQual, MIRBuilder, GR);
+ TargetType = getImageType(BuiltinType, AccessQual, MIRBuilder, GR);
break;
case SPIRV::OpTypePipe:
- TargetType = getPipeType(OpaqueType, MIRBuilder, GR);
+ TargetType = getPipeType(BuiltinType, MIRBuilder, GR);
break;
case SPIRV::OpTypeDeviceEvent:
TargetType = GR->getOrCreateOpTypeDeviceEvent(MIRBuilder);
@@ -2129,18 +2172,18 @@ SPIRVType *lowerBuiltinType(const StructType *OpaqueType,
TargetType = getSamplerType(MIRBuilder, GR);
break;
case SPIRV::OpTypeSampledImage:
- TargetType = getSampledImageType(OpaqueType, MIRBuilder, GR);
+ TargetType = getSampledImageType(BuiltinType, MIRBuilder, GR);
break;
default:
- TargetType = getNonParametrizedType(OpaqueType, TypeRecord, MIRBuilder, GR);
+ TargetType =
+ getNonParameterizedType(BuiltinType, TypeRecord, MIRBuilder, GR);
break;
}
// Emit OpName instruction if a new OpType<...> instruction was added
// (equivalent type was not found in GlobalRegistry).
if (NumStartingVRegs < MIRBuilder.getMRI()->getNumVirtRegs())
- buildOpName(GR->getSPIRVTypeID(TargetType), OpaqueType->getName(),
- MIRBuilder);
+ buildOpName(GR->getSPIRVTypeID(TargetType), Name, MIRBuilder);
return TargetType;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.h b/llvm/lib/Target/SPIRV/SPIRVBuiltins.h
index 26d2e8ab0fd6..7ee5c49dc5b3 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.h
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.h
@@ -45,7 +45,7 @@ std::optional<bool> lowerBuiltin(const StringRef DemangledCall,
/// \return A machine instruction representing the OpType<...> SPIR-V type.
///
/// \p Type is the special opaque/builtin type to be lowered.
-SPIRVType *lowerBuiltinType(const StructType *Type,
+SPIRVType *lowerBuiltinType(const Type *Type,
AccessQualifier::AccessQualifier AccessQual,
MachineIRBuilder &MIRBuilder,
SPIRVGlobalRegistry *GR);
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 635c6451ea04..8acd4691787e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -1101,141 +1101,105 @@ foreach i = ["", "2", "3", "4", "8", "16"] in {
}
//===----------------------------------------------------------------------===//
-// Class defining implementation details of demangled builtin types. The info
+// Class defining implementation details of SPIR-V builtin types. The info
// in the record is used for lowering into OpType.
//
-// name is the demangled name of the given builtin.
+// name is the name of the given SPIR-V builtin type.
// operation specifies the SPIR-V opcode the StructType should be lowered to.
//===----------------------------------------------------------------------===//
-class DemangledType<string name, Op operation> {
+class BuiltinType<string name, Op operation> {
string Name = name;
Op Opcode = operation;
}
-// Table gathering all the demangled type records.
-def DemangledTypes : GenericTable {
- let FilterClass = "DemangledType";
+// Table gathering all the builtin type records.
+def BuiltinTypes : GenericTable {
+ let FilterClass = "BuiltinType";
let Fields = ["Name", "Opcode"];
}
// Function to lookup builtin types by their demangled name.
def lookupBuiltinType : SearchIndex {
- let Table = DemangledTypes;
+ let Table = BuiltinTypes;
let Key = ["Name"];
}
-def : DemangledType<"opencl.reserve_id_t", OpTypeReserveId>;
-def : DemangledType<"opencl.event_t", OpTypeEvent>;
-def : DemangledType<"opencl.queue_t", OpTypeQueue>;
-def : DemangledType<"opencl.sampler_t", OpTypeSampler>;
-def : DemangledType<"opencl.clk_event_t", OpTypeDeviceEvent>;
-
-def : DemangledType<"spirv.ReserveId", OpTypeReserveId>;
-def : DemangledType<"spirv.PipeStorage", OpTypePipeStorage>;
-def : DemangledType<"spirv.Queue", OpTypeQueue>;
-def : DemangledType<"spirv.Event", OpTypeEvent>;
-def : DemangledType<"spirv.Sampler", OpTypeSampler>;
-def : DemangledType<"spirv.DeviceEvent", OpTypeDeviceEvent>;
-
-// Some SPIR-V builtin types (e.g. spirv.Image) have a complex list of
-// parameters as part of their name. Some of those parameters should be treated
-// as numeric literals and therefore they cannot be represented in TableGen and
-// should be parsed instead.
-def : DemangledType<"spirv.Image", OpTypeImage>;
-def : DemangledType<"spirv.SampledImage", OpTypeSampledImage>;
-def : DemangledType<"spirv.Pipe", OpTypePipe>;
-
-// Class definining lowering details for various variants of image type indentifiers.
-class ImageType<string name> {
+def : BuiltinType<"spirv.ReserveId", OpTypeReserveId>;
+def : BuiltinType<"spirv.PipeStorage", OpTypePipeStorage>;
+def : BuiltinType<"spirv.Queue", OpTypeQueue>;
+def : BuiltinType<"spirv.Event", OpTypeEvent>;
+def : BuiltinType<"spirv.Sampler", OpTypeSampler>;
+def : BuiltinType<"spirv.DeviceEvent", OpTypeDeviceEvent>;
+def : BuiltinType<"spirv.Image", OpTypeImage>;
+def : BuiltinType<"spirv.SampledImage", OpTypeSampledImage>;
+def : BuiltinType<"spirv.Pipe", OpTypePipe>;
+
+
+//===----------------------------------------------------------------------===//
+// Class matching an OpenCL builtin type name to an equivalent SPIR-V
+// builtin type literal.
+//
+// name is the name of the given OpenCL builtin type.
+// spirvTypeLiteral is the literal of an equivalent SPIR-V builtin type.
+//===----------------------------------------------------------------------===//
+class OpenCLType<string name, string spirvTypeLiteral> {
string Name = name;
- string Type = "void";
- AccessQualifier Qualifier = !cond(!not(!eq(!find(name, "_ro_t"), -1)) : ReadOnly,
- !not(!eq(!find(name, "_wo_t"), -1)) : WriteOnly,
- !not(!eq(!find(name, "_rw_t"), -1)) : ReadWrite,
- true : ReadOnly);
- Dim Dimensionality = !cond(!not(!eq(!find(name, "buffer"), -1)) : DIM_Buffer,
- !not(!eq(!find(name, "image1"), -1)) : DIM_1D,
- !not(!eq(!find(name, "image2"), -1)) : DIM_2D,
- !not(!eq(!find(name, "image3"), -1)) : DIM_3D);
- bit Arrayed = !not(!eq(!find(name, "array"), -1));
- bit Depth = !not(!eq(!find(name, "depth"), -1));
- bit Multisampled = false;
- bit Sampled = false;
- ImageFormat Format = Unknown;
+ string SpirvTypeLiteral = spirvTypeLiteral;
}
-// Table gathering all the image type records.
-def ImageTypes : GenericTable {
- let FilterClass = "ImageType";
- let Fields = ["Name", "Type", "Qualifier", "Dimensionality", "Arrayed",
- "Depth", "Multisampled", "Sampled", "Format"];
- string TypeOf_Qualifier = "AccessQualifier";
- string TypeOf_Dimensionality = "Dim";
- string TypeOf_Format = "ImageFormat";
+// Table gathering all the OpenCL type records.
+def OpenCLTypes : GenericTable {
+ let FilterClass = "OpenCLType";
+ let Fields = ["Name", "SpirvTypeLiteral"];
}
-// Function to lookup builtin image types by their demangled name.
-def lookupImageType : SearchIndex {
- let Table = ImageTypes;
+// Function to lookup OpenCL types by their name.
+def lookupOpenCLType : SearchIndex {
+ let Table = OpenCLTypes;
let Key = ["Name"];
}
-// Multiclass used to define at the same time a DemangledType record used
-// for matching an incoming demangled string to the OpTypeImage opcode and
-// ImageType conatining the lowering details.
-multiclass DemangledImageType<string name> {
- def : DemangledType<name, OpTypeImage>;
- def : ImageType<name>;
+def : OpenCLType<"opencl.reserve_id_t", "spirv.ReserveId">;
+def : OpenCLType<"opencl.event_t", "spirv.Event">;
+def : OpenCLType<"opencl.queue_t", "spirv.Queue">;
+def : OpenCLType<"opencl.sampler_t", "spirv.Sampler">;
+def : OpenCLType<"opencl.clk_event_t", "spirv.DeviceEvent">;
+
+foreach aq = ["_t", "_ro_t", "_wo_t", "_rw_t"] in {
+ defvar p = !cond(!not(!eq(!find(aq, "_rw_t"), -1)) : "2",
+ !not(!eq(!find(aq, "_wo_t"), -1)) : "1",
+ true : "0");
+ def : OpenCLType<!strconcat("opencl.pipe", aq),
+ !strconcat("spirv.Pipe._", p)>;
}
foreach aq = ["_t", "_ro_t", "_wo_t", "_rw_t"] in {
- defm : DemangledImageType<!strconcat("opencl.image1d", aq)>;
- defm : DemangledImageType<!strconcat("opencl.image1d_array", aq)>;
- defm : DemangledImageType<!strconcat("opencl.image1d_buffer", aq)>;
+ defvar p7 = !cond(!not(!eq(!find(aq, "_rw_t"), -1)) : "2",
+ !not(!eq(!find(aq, "_wo_t"), -1)) : "1",
+ true : "0");
+
+ def : OpenCLType<!strconcat("opencl.image1d", aq),
+ !strconcat("spirv.Image._void_0_0_0_0_0_0_", p7)>;
+ def : OpenCLType<!strconcat("opencl.image1d_array", aq),
+ !strconcat("spirv.Image._void_0_0_1_0_0_0_", p7)>;
+ def : OpenCLType<!strconcat("opencl.image1d_buffer", aq),
+ !strconcat("spirv.Image._void_5_0_0_0_0_0_", p7)>;
foreach a1 = ["", "_array"] in {
foreach a2 = ["", "_msaa"] in {
foreach a3 = ["", "_depth"] in {
- defm : DemangledImageType<!strconcat("opencl.image2d", a1, a2, a3, aq)>;
+ defvar p2 = !cond(!not(!eq(!find(a3, "_depth"), -1)) : "1", true : "0");
+ defvar p3 = !cond(!not(!eq(!find(a1, "_array"), -1)) : "1", true : "0");
+ defvar p4 = !cond(!not(!eq(!find(a2, "msaa"), -1)) : "1", true : "0");
+
+ def : OpenCLType<!strconcat("opencl.image2d", a1, a2, a3, aq),
+ !strconcat("spirv.Image._void_1_", p2 , "_", p3, "_", p4, "_0_0_", p7)>;
}
}
}
-
- defm : DemangledImageType<!strconcat("opencl.image3d", aq)>;
-}
-
-// Class definining lowering details for various variants of pipe type indentifiers.
-class PipeType<string name> {
- string Name = name;
- AccessQualifier Qualifier = !cond(!not(!eq(!find(name, "_ro_t"), -1)) : ReadOnly,
- !not(!eq(!find(name, "_wo_t"), -1)) : WriteOnly,
- !not(!eq(!find(name, "_rw_t"), -1)) : ReadWrite,
- true : ReadOnly);
-}
-
-// Table gathering all the pipe type records.
-def PipeTypes : GenericTable {
- let FilterClass = "PipeType";
- let Fields = ["Name", "Qualifier"];
- string TypeOf_Qualifier = "AccessQualifier";
-}
-
-// Function to lookup builtin pipe types by their demangled name.
-def lookupPipeType : SearchIndex {
- let Table = PipeTypes;
- let Key = ["Name"];
-}
-
-// Multiclass used to define at the same time a DemangledType record used
-// for matching an incoming demangled string to the OpTypePipe opcode and
-// PipeType conatining the lowering details.
-multiclass DemangledPipeType<string name> {
- def : DemangledType<name, OpTypePipe>;
- def : PipeType<name>;
-}
-
-foreach aq = ["_t", "_ro_t", "_wo_t", "_rw_t"] in {
- defm : DemangledPipeType<!strconcat("opencl.pipe", aq)>;
+
+ def : OpenCLType<!strconcat("opencl.image3d", aq),
+ !strconcat("spirv.Image._void_2_0_0_0_0_0_", p7)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
index 8b618686ee7d..47b25a1f8351 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp
@@ -374,6 +374,7 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
FTy = getOriginalFunctionType(*CF);
}
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
Register ResVReg =
Info.OrigRet.Regs.empty() ? Register(0) : Info.OrigRet.Regs[0];
std::string FuncName = Info.Callee.getGlobal()->getName().str();
@@ -410,8 +411,9 @@ bool SPIRVCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
for (const Argument &Arg : CF->args()) {
if (MIRBuilder.getDataLayout().getTypeStoreSize(Arg.getType()).isZero())
continue; // Don't handle zero sized types.
- ToInsert.push_back(
- {MIRBuilder.getMRI()->createGenericVirtualRegister(LLT::scalar(32))});
+ Register Reg = MRI->createGenericVirtualRegister(LLT::scalar(32));
+ MRI->setRegClass(Reg, &SPIRV::IDRegClass);
+ ToInsert.push_back({Reg});
VRegArgs.push_back(ToInsert.back());
}
// TODO: Reuse FunctionLoweringInfo
diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
index f91b6ea5cb14..4e8afbe2e77e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp
@@ -56,6 +56,7 @@ class SPIRVEmitIntrinsics
DenseMap<Instruction *, Constant *> AggrConsts;
DenseSet<Instruction *> AggrStores;
void preprocessCompositeConstants();
+ void preprocessUndefs();
CallInst *buildIntrWithMD(Intrinsic::ID IntrID, ArrayRef<Type *> Types,
Value *Arg, Value *Arg2) {
ConstantAsMetadata *CM = ValueAsMetadata::getConstant(Arg);
@@ -151,6 +152,29 @@ void SPIRVEmitIntrinsics::replaceMemInstrUses(Instruction *Old,
Old->eraseFromParent();
}
+void SPIRVEmitIntrinsics::preprocessUndefs() {
+ std::queue<Instruction *> Worklist;
+ for (auto &I : instructions(F))
+ Worklist.push(&I);
+
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.front();
+ Worklist.pop();
+
+ for (auto &Op : I->operands()) {
+ auto *AggrUndef = dyn_cast<UndefValue>(Op);
+ if (!AggrUndef || !Op->getType()->isAggregateType())
+ continue;
+
+ IRB->SetInsertPoint(I);
+ auto *IntrUndef = IRB->CreateIntrinsic(Intrinsic::spv_undef, {}, {});
+ Worklist.push(IntrUndef);
+ I->replaceUsesOfWith(Op, IntrUndef);
+ AggrConsts[IntrUndef] = AggrUndef;
+ }
+ }
+}
+
void SPIRVEmitIntrinsics::preprocessCompositeConstants() {
std::queue<Instruction *> Worklist;
for (auto &I : instructions(F))
@@ -369,7 +393,8 @@ void SPIRVEmitIntrinsics::insertAssignTypeIntrs(Instruction *I) {
setInsertPointSkippingPhis(*IRB, I->getNextNode());
Type *TypeToAssign = Ty;
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- if (II->getIntrinsicID() == Intrinsic::spv_const_composite) {
+ if (II->getIntrinsicID() == Intrinsic::spv_const_composite ||
+ II->getIntrinsicID() == Intrinsic::spv_undef) {
auto t = AggrConsts.find(II);
assert(t != AggrConsts.end());
TypeToAssign = t->second->getType();
@@ -453,6 +478,7 @@ bool SPIRVEmitIntrinsics::runOnFunction(Function &Func) {
for (auto &GV : Func.getParent()->globals())
processGlobalValue(GV);
+ preprocessUndefs();
preprocessCompositeConstants();
SmallVector<Instruction *> Worklist;
for (auto &I : instructions(Func))
diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
index 0f85c4839e10..c77a7f860eda 100644
--- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.cpp
@@ -80,6 +80,16 @@ SPIRVType *SPIRVGlobalRegistry::getOpTypeBool(MachineIRBuilder &MIRBuilder) {
SPIRVType *SPIRVGlobalRegistry::getOpTypeInt(uint32_t Width,
MachineIRBuilder &MIRBuilder,
bool IsSigned) {
+ assert(Width <= 64 && "Unsupported integer width!");
+ if (Width <= 8)
+ Width = 8;
+ else if (Width <= 16)
+ Width = 16;
+ else if (Width <= 32)
+ Width = 32;
+ else if (Width <= 64)
+ Width = 64;
+
auto MIB = MIRBuilder.buildInstr(SPIRV::OpTypeInt)
.addDef(createTypeVReg(MIRBuilder))
.addImm(Width)
@@ -133,6 +143,7 @@ SPIRVGlobalRegistry::getOrCreateConstIntReg(uint64_t Val, SPIRVType *SpvType,
unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
LLT LLTy = LLT::scalar(32);
Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
+ CurMF->getRegInfo().setRegClass(Res, &SPIRV::IDRegClass);
if (MIRBuilder)
assignTypeToVReg(LLVMIntTy, Res, *MIRBuilder);
else
@@ -192,6 +203,7 @@ Register SPIRVGlobalRegistry::buildConstantInt(uint64_t Val,
unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
LLT LLTy = LLT::scalar(EmitIR ? BitWidth : 32);
Res = MF.getRegInfo().createGenericVirtualRegister(LLTy);
+ MF.getRegInfo().setRegClass(Res, &SPIRV::IDRegClass);
assignTypeToVReg(LLVMIntTy, Res, MIRBuilder,
SPIRV::AccessQualifier::ReadWrite, EmitIR);
DT.add(ConstInt, &MIRBuilder.getMF(), Res);
@@ -237,6 +249,7 @@ Register SPIRVGlobalRegistry::buildConstantFP(APFloat Val,
if (!Res.isValid()) {
unsigned BitWidth = SpvType ? getScalarOrVectorBitWidth(SpvType) : 32;
Res = MF.getRegInfo().createGenericVirtualRegister(LLT::scalar(BitWidth));
+ MF.getRegInfo().setRegClass(Res, &SPIRV::IDRegClass);
assignTypeToVReg(LLVMFPTy, Res, MIRBuilder);
DT.add(ConstFP, &MF, Res);
MIRBuilder.buildFConstant(Res, *ConstFP);
@@ -262,6 +275,7 @@ Register SPIRVGlobalRegistry::getOrCreateIntCompositeOrNull(
LLT LLTy = LLT::scalar(32);
Register SpvVecConst =
CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
+ CurMF->getRegInfo().setRegClass(SpvVecConst, &SPIRV::IDRegClass);
assignSPIRVTypeToVReg(SpvType, SpvVecConst, *CurMF);
DT.add(CA, CurMF, SpvVecConst);
MachineInstrBuilder MIB;
@@ -333,6 +347,7 @@ Register SPIRVGlobalRegistry::getOrCreateIntCompositeOrNull(
LLT LLTy = EmitIR ? LLT::fixed_vector(ElemCnt, BitWidth) : LLT::scalar(32);
Register SpvVecConst =
CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
+ CurMF->getRegInfo().setRegClass(SpvVecConst, &SPIRV::IDRegClass);
assignSPIRVTypeToVReg(SpvType, SpvVecConst, *CurMF);
DT.add(CA, CurMF, SpvVecConst);
if (EmitIR) {
@@ -401,6 +416,7 @@ SPIRVGlobalRegistry::getOrCreateConstNullPtr(MachineIRBuilder &MIRBuilder,
if (!Res.isValid()) {
LLT LLTy = LLT::pointer(LLVMPtrTy->getAddressSpace(), PointerSize);
Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
+ CurMF->getRegInfo().setRegClass(Res, &SPIRV::IDRegClass);
assignSPIRVTypeToVReg(SpvType, Res, *CurMF);
MIRBuilder.buildInstr(SPIRV::OpConstantNull)
.addDef(Res)
@@ -573,9 +589,8 @@ SPIRVType *SPIRVGlobalRegistry::getOrCreateSpecialType(
assert(!PType->isOpaque());
Ty = PType->getNonOpaquePointerElementType();
}
- auto SType = cast<StructType>(Ty);
- assert(isSpecialOpaqueType(SType) && "Not a special opaque builtin type");
- return SPIRV::lowerBuiltinType(SType, AccQual, MIRBuilder, this);
+ assert(isSpecialOpaqueType(Ty) && "Not a special opaque builtin type");
+ return SPIRV::lowerBuiltinType(Ty, AccQual, MIRBuilder, this);
}
SPIRVType *SPIRVGlobalRegistry::getOpTypePointer(
@@ -1081,6 +1096,7 @@ Register SPIRVGlobalRegistry::getOrCreateUndef(MachineInstr &I,
return Res;
LLT LLTy = LLT::scalar(32);
Res = CurMF->getRegInfo().createGenericVirtualRegister(LLTy);
+ CurMF->getRegInfo().setRegClass(Res, &SPIRV::IDRegClass);
assignSPIRVTypeToVReg(SpvType, Res, *CurMF);
DT.add(UV, CurMF, Res);
diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
index c226b1ac7528..33c6aa242969 100644
--- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp
@@ -27,6 +27,8 @@ unsigned SPIRVTargetLowering::getNumRegistersForCallingConv(
(VT.getVectorElementType() == MVT::i1 ||
VT.getVectorElementType() == MVT::i8))
return 1;
+ if (!VT.isVector() && VT.isInteger() && VT.getSizeInBits() <= 64)
+ return 1;
return getNumRegisters(Context, VT);
}
@@ -53,7 +55,7 @@ bool SPIRVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
switch (Intrinsic) {
case Intrinsic::spv_load:
AlignIdx = 2;
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
case Intrinsic::spv_store: {
if (I.getNumOperands() >= AlignIdx + 1) {
auto *AlignOp = cast<ConstantInt>(I.getOperand(AlignIdx));
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index e1521d44e4e5..44b5536becf7 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -213,7 +213,7 @@ def PseudoConstI: IntImmLeaf<i32, [{ return Imm.getBitWidth() <= 32; }], imm_to_
def PseudoConstF: FPImmLeaf<f32, [{ return true; }], fimm_to_i32>;
def ConstPseudoTrue: IntImmLeaf<i32, [{ return Imm.getBitWidth() == 1 && Imm.getZExtValue() == 1; }]>;
def ConstPseudoFalse: IntImmLeaf<i32, [{ return Imm.getBitWidth() == 1 && Imm.getZExtValue() == 0; }]>;
-def ConstPseudoNull: IntImmLeaf<i64, [{ return Imm.isNullValue(); }]>;
+def ConstPseudoNull: IntImmLeaf<i64, [{ return Imm.isZero(); }]>;
multiclass IntFPImm<bits<16> opCode, string name> {
def I: Op<opCode, (outs ID:$dst), (ins TYPE:$type, ID:$src, variable_ops),
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index 5ebec6b8fa13..2fc7342458de 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -20,8 +20,8 @@
#include "SPIRVTargetMachine.h"
#include "SPIRVUtils.h"
#include "llvm/ADT/APFloat.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/IntrinsicsSPIRV.h"
@@ -55,7 +55,7 @@ public:
const SPIRVSubtarget &ST,
const RegisterBankInfo &RBI);
void setupMF(MachineFunction &MF, GISelKnownBits *KB,
- CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
+ CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) override;
// Common selection code. Instruction-specific selection occurs in spvSelect.
bool select(MachineInstr &I) override;
@@ -201,7 +201,7 @@ SPIRVInstructionSelector::SPIRVInstructionSelector(const SPIRVTargetMachine &TM,
}
void SPIRVInstructionSelector::setupMF(MachineFunction &MF, GISelKnownBits *KB,
- CodeGenCoverage &CoverageInfo,
+ CodeGenCoverage *CoverageInfo,
ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI) {
MRI = &MF.getRegInfo();
@@ -1080,8 +1080,8 @@ Register SPIRVInstructionSelector::buildOnesVal(bool AllOnes,
const SPIRVType *ResType,
MachineInstr &I) const {
unsigned BitWidth = GR.getScalarOrVectorBitWidth(ResType);
- APInt One = AllOnes ? APInt::getAllOnesValue(BitWidth)
- : APInt::getOneBitSet(BitWidth, 0);
+ APInt One =
+ AllOnes ? APInt::getAllOnes(BitWidth) : APInt::getOneBitSet(BitWidth, 0);
if (ResType->getOpcode() == SPIRV::OpTypeVector)
return GR.getOrCreateConsIntVector(One.getZExtValue(), I, ResType, TII);
return GR.getOrCreateConstInt(One.getZExtValue(), I, ResType, TII);
@@ -1180,15 +1180,16 @@ bool SPIRVInstructionSelector::selectConst(Register ResVReg,
const APInt &Imm,
MachineInstr &I) const {
unsigned TyOpcode = ResType->getOpcode();
- assert(TyOpcode != SPIRV::OpTypePointer || Imm.isNullValue());
+ assert(TyOpcode != SPIRV::OpTypePointer || Imm.isZero());
MachineBasicBlock &BB = *I.getParent();
if ((TyOpcode == SPIRV::OpTypePointer || TyOpcode == SPIRV::OpTypeEvent) &&
- Imm.isNullValue())
+ Imm.isZero())
return BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpConstantNull))
.addDef(ResVReg)
.addUse(GR.getSPIRVTypeID(ResType))
.constrainAllUses(TII, TRI, RBI);
if (TyOpcode == SPIRV::OpTypeInt) {
+ assert(Imm.getBitWidth() <= 64 && "Unsupported integer width!");
Register Reg = GR.getOrCreateConstInt(Imm.getZExtValue(), I, ResType, TII);
if (Reg == ResVReg)
return true;
@@ -1316,25 +1317,18 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
switch (I.getIntrinsicID()) {
case Intrinsic::spv_load:
return selectLoad(ResVReg, ResType, I);
- break;
case Intrinsic::spv_store:
return selectStore(I);
- break;
case Intrinsic::spv_extractv:
return selectExtractVal(ResVReg, ResType, I);
- break;
case Intrinsic::spv_insertv:
return selectInsertVal(ResVReg, ResType, I);
- break;
case Intrinsic::spv_extractelt:
return selectExtractElt(ResVReg, ResType, I);
- break;
case Intrinsic::spv_insertelt:
return selectInsertElt(ResVReg, ResType, I);
- break;
case Intrinsic::spv_gep:
return selectGEP(ResVReg, ResType, I);
- break;
case Intrinsic::spv_unref_global:
case Intrinsic::spv_init_global: {
MachineInstr *MI = MRI->getVRegDef(I.getOperand(1).getReg());
@@ -1343,7 +1337,13 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
: nullptr;
assert(MI);
return selectGlobalValue(MI->getOperand(0).getReg(), *MI, Init);
- } break;
+ }
+ case Intrinsic::spv_undef: {
+ auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpUndef))
+ .addDef(ResVReg)
+ .addUse(GR.getSPIRVTypeID(ResType));
+ return MIB.constrainAllUses(TII, TRI, RBI);
+ }
case Intrinsic::spv_const_composite: {
// If no values are attached, the composite is null constant.
bool IsNull = I.getNumExplicitDefs() + 1 == I.getNumExplicitOperands();
@@ -1360,7 +1360,7 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
}
}
return MIB.constrainAllUses(TII, TRI, RBI);
- } break;
+ }
case Intrinsic::spv_assign_name: {
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpName));
MIB.addUse(I.getOperand(I.getNumExplicitDefs() + 1).getReg());
@@ -1369,7 +1369,7 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
MIB.addImm(I.getOperand(i).getImm());
}
return MIB.constrainAllUses(TII, TRI, RBI);
- } break;
+ }
case Intrinsic::spv_switch: {
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpSwitch));
for (unsigned i = 1; i < I.getNumExplicitOperands(); ++i) {
@@ -1383,16 +1383,14 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
llvm_unreachable("Unexpected OpSwitch operand");
}
return MIB.constrainAllUses(TII, TRI, RBI);
- } break;
+ }
case Intrinsic::spv_cmpxchg:
return selectAtomicCmpXchg(ResVReg, ResType, I);
- break;
case Intrinsic::spv_unreachable:
BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpUnreachable));
break;
case Intrinsic::spv_alloca:
return selectFrameIndex(ResVReg, ResType, I);
- break;
default:
llvm_unreachable("Intrinsic selection not implemented");
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index af48d51a056f..22746788607b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -386,47 +386,6 @@ void SPIRVModuleAnalysis::numberRegistersGlobally(const Module &M) {
}
}
-// Find OpIEqual and OpBranchConditional instructions originating from
-// OpSwitches, mark them skipped for emission. Also mark MBB skipped if it
-// contains only these instructions.
-static void processSwitches(const Module &M, SPIRV::ModuleAnalysisInfo &MAI,
- MachineModuleInfo *MMI) {
- DenseSet<Register> SwitchRegs;
- for (auto F = M.begin(), E = M.end(); F != E; ++F) {
- MachineFunction *MF = MMI->getMachineFunction(*F);
- if (!MF)
- continue;
- for (MachineBasicBlock &MBB : *MF)
- for (MachineInstr &MI : MBB) {
- if (MAI.getSkipEmission(&MI))
- continue;
- if (MI.getOpcode() == SPIRV::OpSwitch) {
- assert(MI.getOperand(0).isReg());
- SwitchRegs.insert(MI.getOperand(0).getReg());
- }
- if (MI.getOpcode() == SPIRV::OpISubS &&
- SwitchRegs.contains(MI.getOperand(2).getReg())) {
- SwitchRegs.insert(MI.getOperand(0).getReg());
- MAI.setSkipEmission(&MI);
- }
- if ((MI.getOpcode() != SPIRV::OpIEqual &&
- MI.getOpcode() != SPIRV::OpULessThanEqual) ||
- !MI.getOperand(2).isReg() ||
- !SwitchRegs.contains(MI.getOperand(2).getReg()))
- continue;
- Register CmpReg = MI.getOperand(0).getReg();
- MachineInstr *CBr = MI.getNextNode();
- assert(CBr && CBr->getOpcode() == SPIRV::OpBranchConditional &&
- CBr->getOperand(0).isReg() &&
- CBr->getOperand(0).getReg() == CmpReg);
- MAI.setSkipEmission(&MI);
- MAI.setSkipEmission(CBr);
- if (&MBB.front() == &MI && &MBB.back() == CBr)
- MAI.MBBsToSkip.insert(&MBB);
- }
- }
-}
-
// RequirementHandler implementations.
void SPIRV::RequirementHandler::getAndAddRequirements(
SPIRV::OperandCategory::OperandCategory Category, uint32_t i,
@@ -1020,8 +979,6 @@ bool SPIRVModuleAnalysis::runOnModule(Module &M) {
collectReqs(M, MAI, MMI, *ST);
- processSwitches(M, MAI, MMI);
-
// Process type/const/global var/func decl instructions, number their
// destination registers from 0 to N, collect Extensions and Capabilities.
processDefInstrs(M);
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
index a8b659ce3957..abb6797c5218 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.h
@@ -136,9 +136,6 @@ struct ModuleAnalysisInfo {
// The set contains machine instructions which are necessary
// for correct MIR but will not be emitted in function bodies.
DenseSet<MachineInstr *> InstrsToDelete;
- // The set contains machine basic blocks which are necessary
- // for correct MIR but will not be emitted.
- DenseSet<MachineBasicBlock *> MBBsToSkip;
// The table contains global aliases of local registers for each machine
// function. The aliases are used to substitute local registers during
// code emission.
diff --git a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
index 27d0e8a976f0..c0c53170f462 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPreLegalizer.cpp
@@ -85,6 +85,9 @@ static void addConstantsToTrack(MachineFunction &MF, SPIRVGlobalRegistry *GR) {
Register Reg = MI->getOperand(2).getReg();
if (RegsAlreadyAddedToDT.find(MI) != RegsAlreadyAddedToDT.end())
Reg = RegsAlreadyAddedToDT[MI];
+ auto *RC = MRI.getRegClassOrNull(MI->getOperand(0).getReg());
+ if (!MRI.getRegClassOrNull(Reg) && RC)
+ MRI.setRegClass(Reg, RC);
MRI.replaceRegWith(MI->getOperand(0).getReg(), Reg);
MI->eraseFromParent();
}
@@ -201,8 +204,12 @@ Register insertAssignInstr(Register Reg, Type *Ty, SPIRVType *SpirvTy,
(Def->getNextNode() ? Def->getNextNode()->getIterator()
: Def->getParent()->end()));
Register NewReg = MRI.createGenericVirtualRegister(MRI.getType(Reg));
- if (auto *RC = MRI.getRegClassOrNull(Reg))
+ if (auto *RC = MRI.getRegClassOrNull(Reg)) {
MRI.setRegClass(NewReg, RC);
+ } else {
+ MRI.setRegClass(NewReg, &SPIRV::IDRegClass);
+ MRI.setRegClass(Reg, &SPIRV::IDRegClass);
+ }
SpirvTy = SpirvTy ? SpirvTy : GR->getOrCreateSPIRVType(Ty, MIB);
GR->assignSPIRVTypeToVReg(SpirvTy, Reg, MIB.getMF());
// This is to make it convenient for Legalizer to get the SPIRVType
@@ -210,14 +217,13 @@ Register insertAssignInstr(Register Reg, Type *Ty, SPIRVType *SpirvTy,
GR->assignSPIRVTypeToVReg(SpirvTy, NewReg, MIB.getMF());
// Copy MIFlags from Def to ASSIGN_TYPE instruction. It's required to keep
// the flags after instruction selection.
- const uint16_t Flags = Def->getFlags();
+ const uint32_t Flags = Def->getFlags();
MIB.buildInstr(SPIRV::ASSIGN_TYPE)
.addDef(Reg)
.addUse(NewReg)
.addUse(GR->getSPIRVTypeID(SpirvTy))
.setMIFlags(Flags);
Def->getOperand(0).setReg(NewReg);
- MRI.setRegClass(Reg, &SPIRV::ANYIDRegClass);
return NewReg;
}
} // namespace llvm
@@ -411,19 +417,23 @@ static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR,
//
// Sometimes (in case of range-compare switches), additional G_SUBs
// instructions are inserted before G_ICMPs. Those need to be additionally
- // processed and require type assignment.
+ // processed.
//
// This function modifies spv_switch call's operands to include destination
// MBBs (default and for each constant value).
- // Note that this function does not remove G_ICMP + G_BRCOND + G_BR sequences,
- // but they are marked by ModuleAnalysis as skipped and as a result AsmPrinter
- // does not output them.
+ //
+ // At the end, the function removes redundant [G_SUB] + G_ICMP + G_BRCOND +
+ // G_BR sequences.
MachineRegisterInfo &MRI = MF.getRegInfo();
- // Collect all MIs relevant to switches across all MBBs in MF.
+ // Collect spv_switches and G_ICMPs across all MBBs in MF.
std::vector<MachineInstr *> RelevantInsts;
+ // Collect redundant MIs from [G_SUB] + G_ICMP + G_BRCOND + G_BR sequences.
+ // After updating spv_switches, the instructions can be removed.
+ std::vector<MachineInstr *> PostUpdateArtifacts;
+
// Temporary set of compare registers. G_SUBs and G_ICMPs relating to
// spv_switch use these registers.
DenseSet<Register> CompareRegs;
@@ -443,23 +453,21 @@ static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR,
assert(MI.getOperand(0).isReg() && MI.getOperand(1).isReg());
Register Dst = MI.getOperand(0).getReg();
CompareRegs.insert(Dst);
- SPIRVType *Ty = GR->getSPIRVTypeForVReg(MI.getOperand(1).getReg());
- insertAssignInstr(Dst, nullptr, Ty, GR, MIB, MRI);
+ PostUpdateArtifacts.push_back(&MI);
}
// G_ICMPs relating to switches.
if (MI.getOpcode() == TargetOpcode::G_ICMP && MI.getOperand(2).isReg() &&
CompareRegs.contains(MI.getOperand(2).getReg())) {
Register Dst = MI.getOperand(0).getReg();
- // Set type info for destination register of switch's ICMP instruction.
- if (GR->getSPIRVTypeForVReg(Dst) == nullptr) {
- MIB.setInsertPt(*MI.getParent(), MI);
- Type *LLVMTy = IntegerType::get(MF.getFunction().getContext(), 1);
- SPIRVType *SpirvTy = GR->getOrCreateSPIRVType(LLVMTy, MIB);
- MRI.setRegClass(Dst, &SPIRV::IDRegClass);
- GR->assignSPIRVTypeToVReg(SpirvTy, Dst, MIB.getMF());
- }
RelevantInsts.push_back(&MI);
+ PostUpdateArtifacts.push_back(&MI);
+ MachineInstr *CBr = MRI.use_begin(Dst)->getParent();
+ assert(CBr->getOpcode() == SPIRV::G_BRCOND);
+ PostUpdateArtifacts.push_back(CBr);
+ MachineInstr *Br = CBr->getNextNode();
+ assert(Br->getOpcode() == SPIRV::G_BR);
+ PostUpdateArtifacts.push_back(Br);
}
}
}
@@ -503,6 +511,9 @@ static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR,
// Map switch case Value to target MBB.
ValuesToMBBs[Value] = MBB;
+ // Add target MBB as successor to the switch's MBB.
+ Switch->getParent()->addSuccessor(MBB);
+
// The next MI is always G_BR to either the next case or the default.
MachineInstr *NextMI = CBr->getNextNode();
assert(NextMI->getOpcode() == SPIRV::G_BR &&
@@ -512,8 +523,11 @@ static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR,
// register.
if (NextMBB->front().getOpcode() != SPIRV::G_ICMP ||
(NextMBB->front().getOperand(2).isReg() &&
- NextMBB->front().getOperand(2).getReg() != CompareReg))
+ NextMBB->front().getOperand(2).getReg() != CompareReg)) {
+ // Set default MBB and add it as successor to the switch's MBB.
DefaultMBB = NextMBB;
+ Switch->getParent()->addSuccessor(DefaultMBB);
+ }
}
// Modify considered spv_switch operands using collected Values and
@@ -540,6 +554,24 @@ static void processSwitches(MachineFunction &MF, SPIRVGlobalRegistry *GR,
Switch->addOperand(MachineOperand::CreateMBB(MBBs[k]));
}
}
+
+ for (MachineInstr *MI : PostUpdateArtifacts) {
+ MachineBasicBlock *ParentMBB = MI->getParent();
+ MI->eraseFromParent();
+ // If G_ICMP + G_BRCOND + G_BR were the only MIs in MBB, erase this MBB. It
+ // can be safely assumed, there are no breaks or phis directing into this
+ // MBB. However, we need to remove this MBB from the CFG graph. MBBs must be
+ // erased top-down.
+ if (ParentMBB->empty()) {
+ while (!ParentMBB->pred_empty())
+ (*ParentMBB->pred_begin())->removeSuccessor(ParentMBB);
+
+ while (!ParentMBB->succ_empty())
+ ParentMBB->removeSuccessor(ParentMBB->succ_begin());
+
+ ParentMBB->eraseFromParent();
+ }
+ }
}
bool SPIRVPreLegalizer::runOnMachineFunction(MachineFunction &MF) {
diff --git a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
index 20f32ffeba3b..554e66988f09 100644
--- a/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVPrepareFunctions.cpp
@@ -7,8 +7,11 @@
//===----------------------------------------------------------------------===//
//
// This pass modifies function signatures containing aggregate arguments
-// and/or return value. Also it substitutes some llvm intrinsic calls by
-// function calls, generating these functions as the translator does.
+// and/or return value before IRTranslator. Information about the original
+// signatures is stored in metadata. It is used during call lowering to
+// restore correct SPIR-V types of function arguments and return values.
+// This pass also substitutes some llvm intrinsic calls with calls to newly
+// generated functions (as the Khronos LLVM/SPIR-V Translator does).
//
// NOTE: this pass is a module-level one due to the necessity to modify
// GVs/functions.
@@ -33,7 +36,8 @@ void initializeSPIRVPrepareFunctionsPass(PassRegistry &);
namespace {
class SPIRVPrepareFunctions : public ModulePass {
- Function *processFunctionSignature(Function *F);
+ bool substituteIntrinsicCalls(Function *F);
+ Function *removeAggregateTypesFromSignature(Function *F);
public:
static char ID;
@@ -57,68 +61,6 @@ char SPIRVPrepareFunctions::ID = 0;
INITIALIZE_PASS(SPIRVPrepareFunctions, "prepare-functions",
"SPIRV prepare functions", false, false)
-Function *SPIRVPrepareFunctions::processFunctionSignature(Function *F) {
- IRBuilder<> B(F->getContext());
-
- bool IsRetAggr = F->getReturnType()->isAggregateType();
- bool HasAggrArg =
- std::any_of(F->arg_begin(), F->arg_end(), [](Argument &Arg) {
- return Arg.getType()->isAggregateType();
- });
- bool DoClone = IsRetAggr || HasAggrArg;
- if (!DoClone)
- return F;
- SmallVector<std::pair<int, Type *>, 4> ChangedTypes;
- Type *RetType = IsRetAggr ? B.getInt32Ty() : F->getReturnType();
- if (IsRetAggr)
- ChangedTypes.push_back(std::pair<int, Type *>(-1, F->getReturnType()));
- SmallVector<Type *, 4> ArgTypes;
- for (const auto &Arg : F->args()) {
- if (Arg.getType()->isAggregateType()) {
- ArgTypes.push_back(B.getInt32Ty());
- ChangedTypes.push_back(
- std::pair<int, Type *>(Arg.getArgNo(), Arg.getType()));
- } else
- ArgTypes.push_back(Arg.getType());
- }
- FunctionType *NewFTy =
- FunctionType::get(RetType, ArgTypes, F->getFunctionType()->isVarArg());
- Function *NewF =
- Function::Create(NewFTy, F->getLinkage(), F->getName(), *F->getParent());
-
- ValueToValueMapTy VMap;
- auto NewFArgIt = NewF->arg_begin();
- for (auto &Arg : F->args()) {
- StringRef ArgName = Arg.getName();
- NewFArgIt->setName(ArgName);
- VMap[&Arg] = &(*NewFArgIt++);
- }
- SmallVector<ReturnInst *, 8> Returns;
-
- CloneFunctionInto(NewF, F, VMap, CloneFunctionChangeType::LocalChangesOnly,
- Returns);
- NewF->takeName(F);
-
- NamedMDNode *FuncMD =
- F->getParent()->getOrInsertNamedMetadata("spv.cloned_funcs");
- SmallVector<Metadata *, 2> MDArgs;
- MDArgs.push_back(MDString::get(B.getContext(), NewF->getName()));
- for (auto &ChangedTyP : ChangedTypes)
- MDArgs.push_back(MDNode::get(
- B.getContext(),
- {ConstantAsMetadata::get(B.getInt32(ChangedTyP.first)),
- ValueAsMetadata::get(Constant::getNullValue(ChangedTyP.second))}));
- MDNode *ThisFuncMD = MDNode::get(B.getContext(), MDArgs);
- FuncMD->addOperand(ThisFuncMD);
-
- for (auto *U : make_early_inc_range(F->users())) {
- if (auto *CI = dyn_cast<CallInst>(U))
- CI->mutateFunctionType(NewF->getFunctionType());
- U->replaceUsesOfWith(F, NewF);
- }
- return NewF;
-}
-
std::string lowerLLVMIntrinsicName(IntrinsicInst *II) {
Function *IntrinsicFunc = II->getCalledFunction();
assert(IntrinsicFunc && "Missing function");
@@ -142,15 +84,16 @@ static Function *getOrCreateFunction(Module *M, Type *RetTy,
return NewF;
}
-static void lowerIntrinsicToFunction(Module *M, IntrinsicInst *Intrinsic) {
+static bool lowerIntrinsicToFunction(IntrinsicInst *Intrinsic) {
// For @llvm.memset.* intrinsic cases with constant value and length arguments
// are emulated via "storing" a constant array to the destination. For other
// cases we wrap the intrinsic in @spirv.llvm_memset_* function and expand the
// intrinsic to a loop via expandMemSetAsLoop().
if (auto *MSI = dyn_cast<MemSetInst>(Intrinsic))
if (isa<Constant>(MSI->getValue()) && isa<ConstantInt>(MSI->getLength()))
- return; // It is handled later using OpCopyMemorySized.
+ return false; // It is handled later using OpCopyMemorySized.
+ Module *M = Intrinsic->getModule();
std::string FuncName = lowerLLVMIntrinsicName(Intrinsic);
if (Intrinsic->isVolatile())
FuncName += ".volatile";
@@ -158,7 +101,7 @@ static void lowerIntrinsicToFunction(Module *M, IntrinsicInst *Intrinsic) {
Function *F = M->getFunction(FuncName);
if (F) {
Intrinsic->setCalledFunction(F);
- return;
+ return true;
}
// TODO copy arguments attributes: nocapture writeonly.
FunctionCallee FC =
@@ -202,14 +145,15 @@ static void lowerIntrinsicToFunction(Module *M, IntrinsicInst *Intrinsic) {
default:
break;
}
- return;
+ return true;
}
-static void lowerFunnelShifts(Module *M, IntrinsicInst *FSHIntrinsic) {
+static void lowerFunnelShifts(IntrinsicInst *FSHIntrinsic) {
// Get a separate function - otherwise, we'd have to rework the CFG of the
// current one. Then simply replace the intrinsic uses with a call to the new
// function.
// Generate LLVM IR for i* @spirv.llvm_fsh?_i* (i* %a, i* %b, i* %c)
+ Module *M = FSHIntrinsic->getModule();
FunctionType *FSHFuncTy = FSHIntrinsic->getFunctionType();
Type *FSHRetTy = FSHFuncTy->getReturnType();
const std::string FuncName = lowerLLVMIntrinsicName(FSHIntrinsic);
@@ -265,12 +209,13 @@ static void lowerFunnelShifts(Module *M, IntrinsicInst *FSHIntrinsic) {
FSHIntrinsic->setCalledFunction(FSHFunc);
}
-static void buildUMulWithOverflowFunc(Module *M, Function *UMulFunc) {
+static void buildUMulWithOverflowFunc(Function *UMulFunc) {
// The function body is already created.
if (!UMulFunc->empty())
return;
- BasicBlock *EntryBB = BasicBlock::Create(M->getContext(), "entry", UMulFunc);
+ BasicBlock *EntryBB = BasicBlock::Create(UMulFunc->getParent()->getContext(),
+ "entry", UMulFunc);
IRBuilder<> IRB(EntryBB);
// Build the actual unsigned multiplication logic with the overflow
// indication. Do unsigned multiplication Mul = A * B. Then check
@@ -288,65 +233,132 @@ static void buildUMulWithOverflowFunc(Module *M, Function *UMulFunc) {
IRB.CreateRet(Res);
}
-static void lowerUMulWithOverflow(Module *M, IntrinsicInst *UMulIntrinsic) {
+static void lowerUMulWithOverflow(IntrinsicInst *UMulIntrinsic) {
// Get a separate function - otherwise, we'd have to rework the CFG of the
// current one. Then simply replace the intrinsic uses with a call to the new
// function.
+ Module *M = UMulIntrinsic->getModule();
FunctionType *UMulFuncTy = UMulIntrinsic->getFunctionType();
Type *FSHLRetTy = UMulFuncTy->getReturnType();
const std::string FuncName = lowerLLVMIntrinsicName(UMulIntrinsic);
Function *UMulFunc =
getOrCreateFunction(M, FSHLRetTy, UMulFuncTy->params(), FuncName);
- buildUMulWithOverflowFunc(M, UMulFunc);
+ buildUMulWithOverflowFunc(UMulFunc);
UMulIntrinsic->setCalledFunction(UMulFunc);
}
-static void substituteIntrinsicCalls(Module *M, Function *F) {
+// Substitutes calls to LLVM intrinsics with either calls to SPIR-V intrinsics
+// or calls to proper generated functions. Returns True if F was modified.
+bool SPIRVPrepareFunctions::substituteIntrinsicCalls(Function *F) {
+ bool Changed = false;
for (BasicBlock &BB : *F) {
for (Instruction &I : BB) {
auto Call = dyn_cast<CallInst>(&I);
if (!Call)
continue;
- Call->setTailCall(false);
Function *CF = Call->getCalledFunction();
if (!CF || !CF->isIntrinsic())
continue;
auto *II = cast<IntrinsicInst>(Call);
if (II->getIntrinsicID() == Intrinsic::memset ||
II->getIntrinsicID() == Intrinsic::bswap)
- lowerIntrinsicToFunction(M, II);
+ Changed |= lowerIntrinsicToFunction(II);
else if (II->getIntrinsicID() == Intrinsic::fshl ||
- II->getIntrinsicID() == Intrinsic::fshr)
- lowerFunnelShifts(M, II);
- else if (II->getIntrinsicID() == Intrinsic::umul_with_overflow)
- lowerUMulWithOverflow(M, II);
+ II->getIntrinsicID() == Intrinsic::fshr) {
+ lowerFunnelShifts(II);
+ Changed = true;
+ } else if (II->getIntrinsicID() == Intrinsic::umul_with_overflow) {
+ lowerUMulWithOverflow(II);
+ Changed = true;
+ }
}
}
+ return Changed;
+}
+
+// Returns F if aggregate argument/return types are not present or cloned F
+// function with the types replaced by i32 types. The change in types is
+// noted in 'spv.cloned_funcs' metadata for later restoration.
+Function *
+SPIRVPrepareFunctions::removeAggregateTypesFromSignature(Function *F) {
+ IRBuilder<> B(F->getContext());
+
+ bool IsRetAggr = F->getReturnType()->isAggregateType();
+ bool HasAggrArg =
+ std::any_of(F->arg_begin(), F->arg_end(), [](Argument &Arg) {
+ return Arg.getType()->isAggregateType();
+ });
+ bool DoClone = IsRetAggr || HasAggrArg;
+ if (!DoClone)
+ return F;
+ SmallVector<std::pair<int, Type *>, 4> ChangedTypes;
+ Type *RetType = IsRetAggr ? B.getInt32Ty() : F->getReturnType();
+ if (IsRetAggr)
+ ChangedTypes.push_back(std::pair<int, Type *>(-1, F->getReturnType()));
+ SmallVector<Type *, 4> ArgTypes;
+ for (const auto &Arg : F->args()) {
+ if (Arg.getType()->isAggregateType()) {
+ ArgTypes.push_back(B.getInt32Ty());
+ ChangedTypes.push_back(
+ std::pair<int, Type *>(Arg.getArgNo(), Arg.getType()));
+ } else
+ ArgTypes.push_back(Arg.getType());
+ }
+ FunctionType *NewFTy =
+ FunctionType::get(RetType, ArgTypes, F->getFunctionType()->isVarArg());
+ Function *NewF =
+ Function::Create(NewFTy, F->getLinkage(), F->getName(), *F->getParent());
+
+ ValueToValueMapTy VMap;
+ auto NewFArgIt = NewF->arg_begin();
+ for (auto &Arg : F->args()) {
+ StringRef ArgName = Arg.getName();
+ NewFArgIt->setName(ArgName);
+ VMap[&Arg] = &(*NewFArgIt++);
+ }
+ SmallVector<ReturnInst *, 8> Returns;
+
+ CloneFunctionInto(NewF, F, VMap, CloneFunctionChangeType::LocalChangesOnly,
+ Returns);
+ NewF->takeName(F);
+
+ NamedMDNode *FuncMD =
+ F->getParent()->getOrInsertNamedMetadata("spv.cloned_funcs");
+ SmallVector<Metadata *, 2> MDArgs;
+ MDArgs.push_back(MDString::get(B.getContext(), NewF->getName()));
+ for (auto &ChangedTyP : ChangedTypes)
+ MDArgs.push_back(MDNode::get(
+ B.getContext(),
+ {ConstantAsMetadata::get(B.getInt32(ChangedTyP.first)),
+ ValueAsMetadata::get(Constant::getNullValue(ChangedTyP.second))}));
+ MDNode *ThisFuncMD = MDNode::get(B.getContext(), MDArgs);
+ FuncMD->addOperand(ThisFuncMD);
+
+ for (auto *U : make_early_inc_range(F->users())) {
+ if (auto *CI = dyn_cast<CallInst>(U))
+ CI->mutateFunctionType(NewF->getFunctionType());
+ U->replaceUsesOfWith(F, NewF);
+ }
+ return NewF;
}
bool SPIRVPrepareFunctions::runOnModule(Module &M) {
+ bool Changed = false;
for (Function &F : M)
- substituteIntrinsicCalls(&M, &F);
+ Changed |= substituteIntrinsicCalls(&F);
std::vector<Function *> FuncsWorklist;
- bool Changed = false;
for (auto &F : M)
FuncsWorklist.push_back(&F);
- for (auto *Func : FuncsWorklist) {
- Function *F = processFunctionSignature(Func);
-
- bool CreatedNewF = F != Func;
+ for (auto *F : FuncsWorklist) {
+ Function *NewF = removeAggregateTypesFromSignature(F);
- if (Func->isDeclaration()) {
- Changed |= CreatedNewF;
- continue;
+ if (NewF != F) {
+ F->eraseFromParent();
+ Changed = true;
}
-
- if (CreatedNewF)
- Func->eraseFromParent();
}
-
return Changed;
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp b/llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp
index 69681fd2a54e..3a51e29dcf16 100644
--- a/llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVRegularizer.cpp
@@ -171,9 +171,9 @@ void SPIRVRegularizer::visitCallInst(CallInst &CI) {
return;
auto MangledName = F->getName();
- size_t n;
- int status;
- char *NameStr = itaniumDemangle(F->getName().data(), nullptr, &n, &status);
+ char *NameStr = itaniumDemangle(F->getName().data());
+ if (!NameStr)
+ return;
StringRef DemangledName(NameStr);
// TODO: add support for other builtins.
diff --git a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
index 0a89f0151198..0f047b09c521 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVSubtarget.cpp
@@ -17,7 +17,7 @@
#include "SPIRVRegisterBankInfo.h"
#include "SPIRVTargetMachine.h"
#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Support/Host.h"
+#include "llvm/TargetParser/Host.h"
using namespace llvm;
diff --git a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
index 856e54ab47c5..6721c60834bd 100644
--- a/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVTargetMachine.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
diff --git a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
index 9dcddf5f5e34..f4f3cdce1ac3 100644
--- a/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVUtils.cpp
@@ -77,24 +77,19 @@ std::string getStringImm(const MachineInstr &MI, unsigned StartIndex) {
void addNumImm(const APInt &Imm, MachineInstrBuilder &MIB) {
const auto Bitwidth = Imm.getBitWidth();
- switch (Bitwidth) {
- case 1:
- break; // Already handled.
- case 8:
- case 16:
- case 32:
+ if (Bitwidth == 1)
+ return; // Already handled
+ else if (Bitwidth <= 32) {
MIB.addImm(Imm.getZExtValue());
- break;
- case 64: {
+ return;
+ } else if (Bitwidth <= 64) {
uint64_t FullImm = Imm.getZExtValue();
uint32_t LowBits = FullImm & 0xffffffff;
uint32_t HighBits = (FullImm >> 32) & 0xffffffff;
MIB.addImm(LowBits).addImm(HighBits);
- break;
- }
- default:
- report_fatal_error("Unsupported constant bitwidth");
+ return;
}
+ report_fatal_error("Unsupported constant bitwidth");
}
void buildOpName(Register Target, const StringRef &Name,
@@ -206,9 +201,9 @@ SPIRV::MemorySemantics::MemorySemantics getMemSemantics(AtomicOrdering Ord) {
case AtomicOrdering::Unordered:
case AtomicOrdering::Monotonic:
case AtomicOrdering::NotAtomic:
- default:
return SPIRV::MemorySemantics::None;
}
+ llvm_unreachable(nullptr);
}
MachineInstr *getDefInstrMaybeConstant(Register &ConstReg,
@@ -298,16 +293,11 @@ std::string getOclOrSpirvBuiltinDemangledName(StringRef Name) {
return std::string();
// Try to use the itanium demangler.
- size_t n;
- int Status;
- char *DemangledName = itaniumDemangle(Name.data(), nullptr, &n, &Status);
-
- if (Status == demangle_success) {
+ if (char *DemangledName = itaniumDemangle(Name.data())) {
std::string Result = DemangledName;
free(DemangledName);
return Result;
}
- free(DemangledName);
// Otherwise use simple demangling to return the function name.
if (IsNonMangledOCL || IsNonMangledSPIRV)
return Name.str();
@@ -332,16 +322,6 @@ std::string getOclOrSpirvBuiltinDemangledName(StringRef Name) {
return Name.substr(Start, Len).str();
}
-static bool isOpenCLBuiltinType(const StructType *SType) {
- return SType->isOpaque() && SType->hasName() &&
- SType->getName().startswith("opencl.");
-}
-
-static bool isSPIRVBuiltinType(const StructType *SType) {
- return SType->isOpaque() && SType->hasName() &&
- SType->getName().startswith("spirv.");
-}
-
const Type *getTypedPtrEltType(const Type *Ty) {
auto PType = dyn_cast<PointerType>(Ty);
if (!PType || PType->isOpaque())
@@ -349,9 +329,21 @@ const Type *getTypedPtrEltType(const Type *Ty) {
return PType->getNonOpaquePointerElementType();
}
+static bool hasBuiltinTypePrefix(StringRef Name) {
+ if (Name.starts_with("opencl.") || Name.starts_with("spirv."))
+ return true;
+ return false;
+}
+
bool isSpecialOpaqueType(const Type *Ty) {
- if (auto SType = dyn_cast<StructType>(getTypedPtrEltType(Ty)))
- return isOpenCLBuiltinType(SType) || isSPIRVBuiltinType(SType);
+ const StructType *SType = dyn_cast<StructType>(getTypedPtrEltType(Ty));
+ if (SType && SType->hasName())
+ return hasBuiltinTypePrefix(SType->getName());
+
+ if (const TargetExtType *EType =
+ dyn_cast<TargetExtType>(getTypedPtrEltType(Ty)))
+ return hasBuiltinTypePrefix(EType->getName());
+
return false;
}
} // namespace llvm
diff --git a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index 4f94392d4dae..9bfee26db806 100644
--- a/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -12,7 +12,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
@@ -31,6 +30,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -76,7 +76,7 @@ class SparcAsmParser : public MCTargetAsmParser {
SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool ParseDirective(AsmToken DirectiveID) override;
+ ParseStatus parseDirective(AsmToken DirectiveID) override;
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
@@ -769,25 +769,23 @@ bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info,
return false;
}
-bool SparcAsmParser::
-ParseDirective(AsmToken DirectiveID)
-{
+ParseStatus SparcAsmParser::parseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getString();
if (IDVal == ".register") {
// For now, ignore .register directive.
Parser.eatToEndOfStatement();
- return false;
+ return ParseStatus::Success;
}
if (IDVal == ".proc") {
// For compatibility, ignore this directive.
// (It's supposed to be an "optimization" in the Sun assembler)
Parser.eatToEndOfStatement();
- return false;
+ return ParseStatus::Success;
}
// Let the MC layer to handle other directives.
- return true;
+ return ParseStatus::NoMatch;
}
OperandMatchResultTy
diff --git a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index cc132d46de85..7e129101fefc 100644
--- a/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -250,8 +250,7 @@ bool Filler::delayHasHazard(MachineBasicBlock::iterator candidate,
return true;
}
- for (unsigned i = 0, e = candidate->getNumOperands(); i!= e; ++i) {
- const MachineOperand &MO = candidate->getOperand(i);
+ for (const MachineOperand &MO : candidate->operands()) {
if (!MO.isReg())
continue; // skip
diff --git a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 496c08f76a16..b7581c1979d8 100644
--- a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -302,7 +302,7 @@ DecodeStatus SparcDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
// Calling the auto-generated decoder function.
- if (STI.getFeatureBits()[Sparc::FeatureV9])
+ if (STI.hasFeature(Sparc::FeatureV9))
{
Result = decodeInstruction(DecoderTableSparcV932, Instr, Insn, Address, this, STI);
}
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index aa89488bbb62..2c0696e8048b 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -41,11 +41,14 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case Sparc::fixup_sparc_br19:
return (Value >> 2) & 0x7ffff;
- case Sparc::fixup_sparc_br16_2:
- return (Value >> 2) & 0xc000;
-
- case Sparc::fixup_sparc_br16_14:
- return (Value >> 2) & 0x3fff;
+ case Sparc::fixup_sparc_br16: {
+ // A.3 Branch on Integer Register with Prediction (BPr)
+ // Inst{21-20} = d16hi;
+ // Inst{13-0} = d16lo;
+ unsigned d16hi = (Value >> 16) & 0x3;
+ unsigned d16lo = (Value >> 2) & 0x3fff;
+ return (d16hi << 20) | d16lo;
+ }
case Sparc::fixup_sparc_hix22:
return (~Value >> 10) & 0x3fffff;
@@ -164,8 +167,7 @@ namespace {
{ "fixup_sparc_call30", 2, 30, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_sparc_br22", 10, 22, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_sparc_br19", 13, 19, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_sparc_br16_2", 10, 2, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_sparc_br16_14", 18, 14, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_sparc_br16", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_sparc_13", 19, 13, 0 },
{ "fixup_sparc_hi22", 10, 22, 0 },
{ "fixup_sparc_lo10", 22, 10, 0 },
@@ -211,8 +213,7 @@ namespace {
{ "fixup_sparc_call30", 0, 30, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_sparc_br22", 0, 22, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_sparc_br19", 0, 19, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_sparc_br16_2", 20, 2, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_sparc_br16_14", 0, 14, MCFixupKindInfo::FKF_IsPCRel },
+ { "fixup_sparc_br16", 32, 0, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_sparc_13", 0, 13, 0 },
{ "fixup_sparc_hi22", 0, 22, 0 },
{ "fixup_sparc_lo10", 0, 10, 0 },
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
index 9c50c41f6bf2..c48beab01229 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcELFObjectWriter.cpp
@@ -62,6 +62,8 @@ unsigned SparcELFObjectWriter::getRelocType(MCContext &Ctx,
case Sparc::fixup_sparc_call30: return ELF::R_SPARC_WDISP30;
case Sparc::fixup_sparc_br22: return ELF::R_SPARC_WDISP22;
case Sparc::fixup_sparc_br19: return ELF::R_SPARC_WDISP19;
+ case Sparc::fixup_sparc_br16:
+ return ELF::R_SPARC_WDISP16;
case Sparc::fixup_sparc_pc22: return ELF::R_SPARC_PC22;
case Sparc::fixup_sparc_pc10: return ELF::R_SPARC_PC10;
case Sparc::fixup_sparc_wplt30: return ELF::R_SPARC_WPLT30;
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
index 701d8513e657..3b9132658989 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcFixupKinds.h
@@ -26,8 +26,7 @@ namespace llvm {
fixup_sparc_br19,
/// fixup_sparc_bpr - 16-bit fixup for bpr
- fixup_sparc_br16_2,
- fixup_sparc_br16_14,
+ fixup_sparc_br16,
/// fixup_sparc_13 - 13-bit fixup
fixup_sparc_13,
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
index fb22ddd91ba0..51a6732d05c6 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcInstPrinter.cpp
@@ -35,7 +35,7 @@ namespace Sparc {
#include "SparcGenAsmWriter.inc"
bool SparcInstPrinter::isV9(const MCSubtargetInfo &STI) const {
- return (STI.getFeatureBits()[Sparc::FeatureV9]) != 0;
+ return (STI.hasFeature(Sparc::FeatureV9)) != 0;
}
void SparcInstPrinter::printRegName(raw_ostream &OS, MCRegister Reg) const {
@@ -178,6 +178,8 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
default: break;
case SP::FBCOND:
case SP::FBCONDA:
+ case SP::FBCOND_V9:
+ case SP::FBCONDA_V9:
case SP::BPFCC:
case SP::BPFCCA:
case SP::BPFCCNT:
@@ -195,6 +197,10 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum,
// Make sure CC is a cp conditional flag.
CC = (CC < SPCC::CPCC_BEGIN) ? (CC + SPCC::CPCC_BEGIN) : CC;
break;
+ case SP::BPR:
+ case SP::BPRA:
+ case SP::BPRNT:
+ case SP::BPRANT:
case SP::MOVRri:
case SP::MOVRrr:
case SP::FMOVRS:
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index c4545ff56f74..f98a9dd138d4 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -12,11 +12,11 @@
#include "SparcMCAsmInfo.h"
#include "SparcMCExpr.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index ee460002fc58..93c6365a8ddd 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -26,12 +26,12 @@
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <cassert>
#include <cstdint>
@@ -53,7 +53,7 @@ public:
SparcMCCodeEmitter &operator=(const SparcMCCodeEmitter &) = delete;
~SparcMCCodeEmitter() override = default;
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -87,11 +87,12 @@ public:
} // end anonymous namespace
-void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
unsigned Bits = getBinaryCodeForInstr(MI, Fixups, STI);
- support::endian::write(OS, Bits,
+ support::endian::write(CB, Bits,
Ctx.getAsmInfo()->isLittleEndian() ? support::little
: support::big);
@@ -235,10 +236,8 @@ getBranchOnRegTargetOpValue(const MCInst &MI, unsigned OpNo,
if (MO.isReg() || MO.isImm())
return getMachineOpValue(MI, MO, Fixups, STI);
- Fixups.push_back(MCFixup::create(0, MO.getExpr(),
- (MCFixupKind)Sparc::fixup_sparc_br16_2));
- Fixups.push_back(MCFixup::create(0, MO.getExpr(),
- (MCFixupKind)Sparc::fixup_sparc_br16_14));
+ Fixups.push_back(
+ MCFixup::create(0, MO.getExpr(), (MCFixupKind)Sparc::fixup_sparc_br16));
return 0;
}
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
index d98ad26c96a9..d26a748b6e53 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h
@@ -108,8 +108,6 @@ public:
return E->getKind() == MCExpr::Target;
}
- static bool classof(const SparcMCExpr *) { return true; }
-
static VariantKind parseVariantKind(StringRef name);
static bool printVariantKind(raw_ostream &OS, VariantKind Kind);
static Sparc::Fixups getFixupKind(VariantKind Kind);
diff --git a/llvm/lib/Target/Sparc/Sparc.td b/llvm/lib/Target/Sparc/Sparc.td
index da95602309a1..4cc713abe046 100644
--- a/llvm/lib/Target/Sparc/Sparc.td
+++ b/llvm/lib/Target/Sparc/Sparc.td
@@ -34,7 +34,7 @@ def FeatureV9
: SubtargetFeature<"v9", "IsV9", "true",
"Enable SPARC-V9 instructions">;
def FeatureV8Deprecated
- : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true",
+ : SubtargetFeature<"deprecated-v8", "UseV8DeprecatedInsts", "true",
"Enable deprecated V8 instructions in V9 mode">;
def FeatureVIS
: SubtargetFeature<"vis", "IsVIS", "true",
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 913f133465b9..0aa3c875a14f 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -268,7 +268,7 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain, CallingConv::ID CallConv,
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Sparc32);
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Make room for the return address offset.
RetOps.push_back(SDValue());
@@ -294,17 +294,17 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain, CallingConv::ID CallConv,
Arg,
DAG.getConstant(1, DL, getVectorIdxTy(DAG.getDataLayout())));
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part0, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part0, Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
VA = RVLocs[++i]; // skip ahead to next loc
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Part1,
- Flag);
+ Glue);
} else
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Glue);
// Guarantee that all emitted copies are stuck together with flags.
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
@@ -317,8 +317,8 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain, CallingConv::ID CallConv,
llvm_unreachable("sret virtual register not created in the entry block");
auto PtrVT = getPointerTy(DAG.getDataLayout());
SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, PtrVT);
- Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, SP::I0, Val, Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(SP::I0, PtrVT));
RetAddrOffset = 12; // CallInst + Delay Slot + Unimp
}
@@ -326,11 +326,11 @@ SparcTargetLowering::LowerReturn_32(SDValue Chain, CallingConv::ID CallConv,
RetOps[0] = Chain; // Update chain.
RetOps[1] = DAG.getConstant(RetAddrOffset, DL, MVT::i32);
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ // Add the glue if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
- return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, RetOps);
+ return DAG.getNode(SPISD::RET_GLUE, DL, MVT::Other, RetOps);
}
// Lower return values for the 64-bit ABI.
@@ -351,7 +351,7 @@ SparcTargetLowering::LowerReturn_64(SDValue Chain, CallingConv::ID CallConv,
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, RetCC_Sparc64);
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
// The second operand on the return instruction is the return address offset.
@@ -396,20 +396,20 @@ SparcTargetLowering::LowerReturn_64(SDValue Chain, CallingConv::ID CallConv,
}
}
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Glue);
// Guarantee that all emitted copies are stuck together with flags.
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
- return DAG.getNode(SPISD::RET_FLAG, DL, MVT::Other, RetOps);
+ return DAG.getNode(SPISD::RET_GLUE, DL, MVT::Other, RetOps);
}
SDValue SparcTargetLowering::LowerFormalArguments(
@@ -584,7 +584,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32(
};
unsigned NumAllocated = CCInfo.getFirstUnallocated(ArgRegs);
const MCPhysReg *CurArgReg = ArgRegs+NumAllocated, *ArgRegEnd = ArgRegs+6;
- unsigned ArgOffset = CCInfo.getNextStackOffset();
+ unsigned ArgOffset = CCInfo.getStackSize();
if (NumAllocated == 6)
ArgOffset += StackOffset;
else {
@@ -703,7 +703,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_64(
//
// The va_start intrinsic needs to know the offset to the first variable
// argument.
- unsigned ArgOffset = CCInfo.getNextStackOffset();
+ unsigned ArgOffset = CCInfo.getStackSize();
SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
// Skip the 128 bytes of register save area.
FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgArea +
@@ -773,8 +773,8 @@ bool SparcTargetLowering::IsEligibleForTailCallOptimization(
// Do not tail call opt if the stack is used to pass parameters.
// 64-bit targets have a slightly higher limit since the ABI requires
// to allocate some space even when all the parameters fit inside registers.
- unsigned StackOffsetLimit = Subtarget->is64Bit() ? 48 : 0;
- if (CCInfo.getNextStackOffset() > StackOffsetLimit)
+ unsigned StackSizeLimit = Subtarget->is64Bit() ? 48 : 0;
+ if (CCInfo.getStackSize() > StackSizeLimit)
return false;
// Do not tail call opt if either the callee or caller returns
@@ -816,7 +816,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
CCInfo, CLI, DAG.getMachineFunction());
// Get the size of the outgoing arguments stack space requirement.
- unsigned ArgsSize = CCInfo.getNextStackOffset();
+ unsigned ArgsSize = CCInfo.getStackSize();
// Keep stack frames 8-byte aligned.
ArgsSize = (ArgsSize+7) & ~7;
@@ -1012,15 +1012,15 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emitted instructions must be
+ // The InGlue in necessary since all emitted instructions must be
// stuck together.
- SDValue InFlag;
+ SDValue InGlue;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Register Reg = RegsToPass[i].first;
if (!isTailCall)
Reg = toCallerWindow(Reg);
- Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, RegsToPass[i].second, InGlue);
+ InGlue = Chain.getValue(1);
}
bool hasReturnsTwice = hasReturnsTwiceAttr(DAG, Callee, CLI.CB);
@@ -1058,8 +1058,8 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
if (isTailCall) {
DAG.getMachineFunction().getFrameInfo().setHasTailCall();
@@ -1067,10 +1067,10 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
}
Chain = DAG.getNode(SPISD::CALL, dl, NodeTys, Ops);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
- Chain = DAG.getCALLSEQ_END(Chain, ArgsSize, 0, InFlag, dl);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, ArgsSize, 0, InGlue, dl);
+ InGlue = Chain.getValue(1);
// Assign locations to each value returned by this call.
SmallVector<CCValAssign, 16> RVLocs;
@@ -1085,24 +1085,24 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
if (RVLocs[i].getLocVT() == MVT::v2i32) {
SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2i32);
SDValue Lo = DAG.getCopyFromReg(
- Chain, dl, toCallerWindow(RVLocs[i++].getLocReg()), MVT::i32, InFlag);
+ Chain, dl, toCallerWindow(RVLocs[i++].getLocReg()), MVT::i32, InGlue);
Chain = Lo.getValue(1);
- InFlag = Lo.getValue(2);
+ InGlue = Lo.getValue(2);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2i32, Vec, Lo,
DAG.getConstant(0, dl, MVT::i32));
SDValue Hi = DAG.getCopyFromReg(
- Chain, dl, toCallerWindow(RVLocs[i].getLocReg()), MVT::i32, InFlag);
+ Chain, dl, toCallerWindow(RVLocs[i].getLocReg()), MVT::i32, InGlue);
Chain = Hi.getValue(1);
- InFlag = Hi.getValue(2);
+ InGlue = Hi.getValue(2);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2i32, Vec, Hi,
DAG.getConstant(1, dl, MVT::i32));
InVals.push_back(Vec);
} else {
Chain =
DAG.getCopyFromReg(Chain, dl, toCallerWindow(RVLocs[i].getLocReg()),
- RVLocs[i].getValVT(), InFlag)
+ RVLocs[i].getValVT(), InGlue)
.getValue(1);
- InFlag = Chain.getValue(2);
+ InGlue = Chain.getValue(2);
InVals.push_back(Chain.getValue(0));
}
}
@@ -1204,7 +1204,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
// Called functions expect 6 argument words to exist in the stack frame, used
// or not.
unsigned StackReserved = 6 * 8u;
- unsigned ArgsSize = std::max(StackReserved, CCInfo.getNextStackOffset());
+ unsigned ArgsSize = std::max<unsigned>(StackReserved, CCInfo.getStackSize());
// Keep stack frames 16-byte aligned.
ArgsSize = alignTo(ArgsSize, 16);
@@ -1977,6 +1977,8 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
case SPISD::BRFCC: return "SPISD::BRFCC";
case SPISD::BRFCC_V9:
return "SPISD::BRFCC_V9";
+ case SPISD::BR_REG:
+ return "SPISD::BR_REG";
case SPISD::SELECT_ICC: return "SPISD::SELECT_ICC";
case SPISD::SELECT_XCC: return "SPISD::SELECT_XCC";
case SPISD::SELECT_FCC: return "SPISD::SELECT_FCC";
@@ -1989,7 +1991,7 @@ const char *SparcTargetLowering::getTargetNodeName(unsigned Opcode) const {
case SPISD::FTOX: return "SPISD::FTOX";
case SPISD::XTOF: return "SPISD::XTOF";
case SPISD::CALL: return "SPISD::CALL";
- case SPISD::RET_FLAG: return "SPISD::RET_FLAG";
+ case SPISD::RET_GLUE: return "SPISD::RET_GLUE";
case SPISD::GLOBAL_BASE_REG: return "SPISD::GLOBAL_BASE_REG";
case SPISD::FLUSHW: return "SPISD::FLUSHW";
case SPISD::TLS_ADD: return "SPISD::TLS_ADD";
@@ -2029,7 +2031,7 @@ void SparcTargetLowering::computeKnownBitsForTargetNode
Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
}
}
@@ -2200,11 +2202,11 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
withTargetFlags(Op, addTF, DAG));
SDValue Chain = DAG.getEntryNode();
- SDValue InFlag;
+ SDValue InGlue;
Chain = DAG.getCALLSEQ_START(Chain, 1, 0, DL);
- Chain = DAG.getCopyToReg(Chain, DL, SP::O0, Argument, InFlag);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, DL, SP::O0, Argument, InGlue);
+ InGlue = Chain.getValue(1);
SDValue Callee = DAG.getTargetExternalSymbol("__tls_get_addr", PtrVT);
SDValue Symbol = withTargetFlags(Op, callTF, DAG);
@@ -2217,12 +2219,12 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
Symbol,
DAG.getRegister(SP::O0, PtrVT),
DAG.getRegisterMask(Mask),
- InFlag};
+ InGlue};
Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, Ops);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCALLSEQ_END(Chain, 1, 0, InFlag, DL);
- InFlag = Chain.getValue(1);
- SDValue Ret = DAG.getCopyFromReg(Chain, DL, SP::O0, PtrVT, InFlag);
+ InGlue = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, 1, 0, InGlue, DL);
+ InGlue = Chain.getValue(1);
+ SDValue Ret = DAG.getCopyFromReg(Chain, DL, SP::O0, PtrVT, InGlue);
if (model != TLSModel::LocalDynamic)
return Ret;
@@ -2582,7 +2584,7 @@ static SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG,
static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
const SparcTargetLowering &TLI, bool hasHardQuad,
- bool isV9) {
+ bool isV9, bool is64Bit) {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue LHS = Op.getOperand(2);
@@ -2599,6 +2601,15 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
// Get the condition flag.
SDValue CompareFlag;
if (LHS.getValueType().isInteger()) {
+ // On V9 processors running in 64-bit mode, if CC compares two `i64`s
+ // and the RHS is zero we might be able to use a specialized branch.
+ const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
+ if (is64Bit && isV9 && LHS.getValueType() == MVT::i64 && RHSC &&
+ RHSC->isZero() && !ISD::isUnsignedIntSetCC(CC))
+ return DAG.getNode(SPISD::BR_REG, dl, MVT::Other, Chain, Dest,
+ DAG.getConstant(intCondCCodeToRcond(CC), dl, MVT::i32),
+ LHS);
+
CompareFlag = DAG.getNode(SPISD::CMPICC, dl, MVT::Glue, LHS, RHS);
if (SPCC == ~0U) SPCC = IntCondCCodeToICC(CC);
if (isV9)
@@ -3144,10 +3155,8 @@ static SDValue LowerUMULO_SMULO(SDValue Op, SelectionDAG &DAG,
SDValue MulResult = TLI.makeLibCall(DAG,
RTLIB::MUL_I128, WideVT,
Args, CallOptions, dl).first;
- SDValue BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
- MulResult, DAG.getIntPtrConstant(0, dl));
- SDValue TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT,
- MulResult, DAG.getIntPtrConstant(1, dl));
+ SDValue BottomHalf, TopHalf;
+ std::tie(BottomHalf, TopHalf) = DAG.SplitScalar(MulResult, dl, VT, VT);
if (isSigned) {
SDValue Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
TopHalf = DAG.getSetCC(dl, MVT::i32, TopHalf, Tmp1, ISD::SETNE);
@@ -3215,7 +3224,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG, *this,
hasHardQuad);
case ISD::BR_CC:
- return LowerBR_CC(Op, DAG, *this, hasHardQuad, isV9);
+ return LowerBR_CC(Op, DAG, *this, hasHardQuad, isV9, is64Bit);
case ISD::SELECT_CC:
return LowerSELECT_CC(Op, DAG, *this, hasHardQuad, isV9, is64Bit);
case ISD::VASTART: return LowerVASTART(Op, DAG, *this);
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h
index 563a832ee61e..5504dcd464fb 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -31,6 +31,7 @@ namespace llvm {
BPXCC, // Branch to dest on xcc condition, with prediction (64-bit only).
BRFCC, // Branch to dest on fcc condition
BRFCC_V9, // Branch to dest on fcc condition (v9 variant).
+ BR_REG, // Branch to dest using the comparison of a register with zero.
SELECT_ICC, // Select between two values using the current ICC flags.
SELECT_XCC, // Select between two values using the current XCC flags.
SELECT_FCC, // Select between two values using the current FCC flags.
@@ -46,7 +47,7 @@ namespace llvm {
XTOF, // Int64 to FP within a FP register.
CALL, // A call instruction.
- RET_FLAG, // Return with a flag operand.
+ RET_GLUE, // Return with a glue operand.
GLOBAL_BASE_REG, // Global base reg for PIC.
FLUSHW, // FLUSH register windows to stack.
diff --git a/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/llvm/lib/Target/Sparc/SparcInstr64Bit.td
index 77f203fd0d68..0a6479487418 100644
--- a/llvm/lib/Target/Sparc/SparcInstr64Bit.td
+++ b/llvm/lib/Target/Sparc/SparcInstr64Bit.td
@@ -356,15 +356,15 @@ def FMOVQ_XCC : F4_3<0b110101, 0b000011, (outs QFPRegs:$rd),
// Branch On integer register with Prediction (BPr).
let isBranch = 1, isTerminator = 1, hasDelaySlot = 1 in
-multiclass BranchOnReg<bits<3> cond, string OpcStr> {
- def napt : F2_4<cond, 0, 1, (outs), (ins I64Regs:$rs1, bprtarget16:$imm16),
- !strconcat(OpcStr, " $rs1, $imm16"), []>;
- def apt : F2_4<cond, 1, 1, (outs), (ins I64Regs:$rs1, bprtarget16:$imm16),
- !strconcat(OpcStr, ",a $rs1, $imm16"), []>;
- def napn : F2_4<cond, 0, 0, (outs), (ins I64Regs:$rs1, bprtarget16:$imm16),
- !strconcat(OpcStr, ",pn $rs1, $imm16"), []>;
- def apn : F2_4<cond, 1, 0, (outs), (ins I64Regs:$rs1, bprtarget16:$imm16),
- !strconcat(OpcStr, ",a,pn $rs1, $imm16"), []>;
+multiclass BranchOnReg<list<dag> CCPattern> {
+ def R : F2_4<0, 1, (outs), (ins bprtarget16:$imm16, RegCCOp:$rcond, I64Regs:$rs1),
+ "br$rcond $rs1, $imm16", CCPattern>;
+ def RA : F2_4<1, 1, (outs), (ins bprtarget16:$imm16, RegCCOp:$rcond, I64Regs:$rs1),
+ "br$rcond,a $rs1, $imm16", []>;
+ def RNT : F2_4<0, 0, (outs), (ins bprtarget16:$imm16, RegCCOp:$rcond, I64Regs:$rs1),
+ "br$rcond,pn $rs1, $imm16", []>;
+ def RANT : F2_4<1, 0, (outs), (ins bprtarget16:$imm16, RegCCOp:$rcond, I64Regs:$rs1),
+ "br$rcond,a,pn $rs1, $imm16", []>;
}
multiclass bpr_alias<string OpcStr, Instruction NAPT, Instruction APT> {
@@ -374,19 +374,8 @@ multiclass bpr_alias<string OpcStr, Instruction NAPT, Instruction APT> {
(APT I64Regs:$rs1, bprtarget16:$imm16), 0>;
}
-defm BPZ : BranchOnReg<0b001, "brz">;
-defm BPLEZ : BranchOnReg<0b010, "brlez">;
-defm BPLZ : BranchOnReg<0b011, "brlz">;
-defm BPNZ : BranchOnReg<0b101, "brnz">;
-defm BPGZ : BranchOnReg<0b110, "brgz">;
-defm BPGEZ : BranchOnReg<0b111, "brgez">;
-
-defm : bpr_alias<"brz", BPZnapt, BPZapt >;
-defm : bpr_alias<"brlez", BPLEZnapt, BPLEZapt>;
-defm : bpr_alias<"brlz", BPLZnapt, BPLZapt >;
-defm : bpr_alias<"brnz", BPNZnapt, BPNZapt >;
-defm : bpr_alias<"brgz", BPGZnapt, BPGZapt >;
-defm : bpr_alias<"brgez", BPGEZnapt, BPGEZapt>;
+let Predicates = [Is64Bit] in
+ defm BP : BranchOnReg<[(SPbrreg bb:$imm16, imm:$rcond, i64:$rs1)]>;
// Move integer register on register condition (MOVr).
let Predicates = [Is64Bit], Constraints = "$f = $rd" in {
diff --git a/llvm/lib/Target/Sparc/SparcInstrAliases.td b/llvm/lib/Target/Sparc/SparcInstrAliases.td
index f10021321406..01c3696cc7bc 100644
--- a/llvm/lib/Target/Sparc/SparcInstrAliases.td
+++ b/llvm/lib/Target/Sparc/SparcInstrAliases.td
@@ -295,6 +295,36 @@ multiclass cp_cond_alias<string cond, int condVal> {
// Instruction aliases for register conditional branches and moves.
multiclass reg_cond_alias<string rcond, int condVal> {
+ // br<rcond> $rs1, $imm
+ def : InstAlias<!strconcat(!strconcat("br", rcond), " $rs1, $imm"),
+ (BPR bprtarget16:$imm, condVal, I64Regs:$rs1)>,
+ Requires<[Is64Bit]>;
+
+ // br<rcond>,pt $rs1, $imm
+ def : InstAlias<!strconcat(!strconcat("br", rcond), ",pt $rs1, $imm"),
+ (BPR bprtarget16:$imm, condVal, I64Regs:$rs1)>,
+ Requires<[Is64Bit]>;
+
+ // br<rcond>,pn $rs1, $imm
+ def : InstAlias<!strconcat(!strconcat("br", rcond), ",pn $rs1, $imm"),
+ (BPRNT bprtarget16:$imm, condVal, I64Regs:$rs1)>,
+ Requires<[Is64Bit]>;
+
+ // br<rcond>,a $rs1, $imm
+ def : InstAlias<!strconcat(!strconcat("br", rcond), ",a $rs1, $imm"),
+ (BPRA bprtarget16:$imm, condVal, I64Regs:$rs1)>,
+ Requires<[Is64Bit]>;
+
+ // br<rcond>,a,pt $rs1, $imm
+ def : InstAlias<!strconcat(!strconcat("br", rcond), ",a,pt $rs1, $imm"),
+ (BPRA bprtarget16:$imm, condVal, I64Regs:$rs1)>,
+ Requires<[Is64Bit]>;
+
+ // br<rcond>,a,pn $rs1, $imm
+ def : InstAlias<!strconcat(!strconcat("br", rcond), ",a,pn $rs1, $imm"),
+ (BPRANT bprtarget16:$imm, condVal, I64Regs:$rs1)>,
+ Requires<[Is64Bit]>;
+
defm : regcond_mov_alias<rcond, condVal,
MOVRrr, MOVRri,
FMOVRS, FMOVRD, FMOVRQ>,
diff --git a/llvm/lib/Target/Sparc/SparcInstrFormats.td b/llvm/lib/Target/Sparc/SparcInstrFormats.td
index 522dcd96a112..c67b591ab98a 100644
--- a/llvm/lib/Target/Sparc/SparcInstrFormats.td
+++ b/llvm/lib/Target/Sparc/SparcInstrFormats.td
@@ -83,17 +83,18 @@ class F2_3<bits<3> op2Val, bit annul, bit pred,
let Inst{18-0} = imm19;
}
-class F2_4<bits<3> cond, bit annul, bit pred, dag outs, dag ins,
+class F2_4<bit annul, bit pred, dag outs, dag ins,
string asmstr, list<dag> pattern, InstrItinClass itin = NoItinerary>
: InstSP<outs, ins, asmstr, pattern, itin> {
bits<16> imm16;
bits<5> rs1;
+ bits<3> rcond;
let op = 0; // op = 0
let Inst{29} = annul;
let Inst{28} = 0;
- let Inst{27-25} = cond;
+ let Inst{27-25} = rcond;
let Inst{24-22} = 0b011;
let Inst{21-20} = imm16{15-14};
let Inst{19} = pred;
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index 63f662c41f93..90662cd87dcf 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -28,6 +28,14 @@ using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
#include "SparcGenInstrInfo.inc"
+static cl::opt<unsigned> BPccDisplacementBits(
+ "sparc-bpcc-offset-bits", cl::Hidden, cl::init(19),
+ cl::desc("Restrict range of BPcc/FBPfcc instructions (DEBUG)"));
+
+static cl::opt<unsigned>
+ BPrDisplacementBits("sparc-bpr-offset-bits", cl::Hidden, cl::init(16),
+ cl::desc("Restrict range of BPr instructions (DEBUG)"));
+
// Pin the vtable to this file.
void SparcInstrInfo::anchor() {}
@@ -73,11 +81,6 @@ unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
return 0;
}
-static bool IsIntegerCC(unsigned CC)
-{
- return (CC <= SPCC::ICC_VC);
-}
-
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
{
switch(CC) {
@@ -155,9 +158,7 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
llvm_unreachable("Invalid cond code");
}
-static bool isUncondBranchOpcode(int Opc) {
- return Opc == SP::BA || Opc == SP::BPA;
-}
+static bool isUncondBranchOpcode(int Opc) { return Opc == SP::BA; }
static bool isI32CondBranchOpcode(int Opc) {
return Opc == SP::BCOND || Opc == SP::BPICC || Opc == SP::BPICCA ||
@@ -169,11 +170,19 @@ static bool isI64CondBranchOpcode(int Opc) {
Opc == SP::BPXCCANT;
}
-static bool isFCondBranchOpcode(int Opc) { return Opc == SP::FBCOND; }
+static bool isRegCondBranchOpcode(int Opc) {
+ return Opc == SP::BPR || Opc == SP::BPRA || Opc == SP::BPRNT ||
+ Opc == SP::BPRANT;
+}
+
+static bool isFCondBranchOpcode(int Opc) {
+ return Opc == SP::FBCOND || Opc == SP::FBCONDA || Opc == SP::FBCOND_V9 ||
+ Opc == SP::FBCONDA_V9;
+}
static bool isCondBranchOpcode(int Opc) {
return isI32CondBranchOpcode(Opc) || isI64CondBranchOpcode(Opc) ||
- isFCondBranchOpcode(Opc);
+ isRegCondBranchOpcode(Opc) || isFCondBranchOpcode(Opc);
}
static bool isIndirectBranchOpcode(int Opc) {
@@ -190,9 +199,48 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
Cond.push_back(MachineOperand::CreateImm(Opc));
Cond.push_back(MachineOperand::CreateImm(CC));
+ // Branch on register contents need another argument to indicate
+ // the register it branches on.
+ if (isRegCondBranchOpcode(Opc)) {
+ Register Reg = LastInst->getOperand(2).getReg();
+ Cond.push_back(MachineOperand::CreateReg(Reg, false));
+ }
+
Target = LastInst->getOperand(0).getMBB();
}
+MachineBasicBlock *
+SparcInstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("unexpected opcode!");
+ case SP::BA:
+ case SP::BCOND:
+ case SP::BCONDA:
+ case SP::FBCOND:
+ case SP::FBCONDA:
+ case SP::BPICC:
+ case SP::BPICCA:
+ case SP::BPICCNT:
+ case SP::BPICCANT:
+ case SP::BPXCC:
+ case SP::BPXCCA:
+ case SP::BPXCCNT:
+ case SP::BPXCCANT:
+ case SP::BPFCC:
+ case SP::BPFCCA:
+ case SP::BPFCCNT:
+ case SP::BPFCCANT:
+ case SP::FBCOND_V9:
+ case SP::FBCONDA_V9:
+ case SP::BPR:
+ case SP::BPRA:
+ case SP::BPRNT:
+ case SP::BPRANT:
+ return MI.getOperand(0).getMBB();
+ }
+}
+
bool SparcInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -283,38 +331,44 @@ unsigned SparcInstrInfo::insertBranch(MachineBasicBlock &MBB,
const DebugLoc &DL,
int *BytesAdded) const {
assert(TBB && "insertBranch must not be told to insert a fallthrough");
- assert((Cond.size() <= 2) &&
- "Sparc branch conditions should have at most two components!");
- assert(!BytesAdded && "code size not handled");
+ assert((Cond.size() <= 3) &&
+ "Sparc branch conditions should have at most three components!");
if (Cond.empty()) {
assert(!FBB && "Unconditional branch with multiple successors!");
- BuildMI(&MBB, DL, get(Subtarget.isV9() ? SP::BPA : SP::BA)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
+ if (BytesAdded)
+ *BytesAdded = 8;
return 1;
}
// Conditional branch
unsigned Opc = Cond[0].getImm();
unsigned CC = Cond[1].getImm();
-
- if (IsIntegerCC(CC)) {
- BuildMI(&MBB, DL, get(Opc)).addMBB(TBB).addImm(CC);
+ if (isRegCondBranchOpcode(Opc)) {
+ Register Reg = Cond[2].getReg();
+ BuildMI(&MBB, DL, get(Opc)).addMBB(TBB).addImm(CC).addReg(Reg);
} else {
- BuildMI(&MBB, DL, get(SP::FBCOND)).addMBB(TBB).addImm(CC);
+ BuildMI(&MBB, DL, get(Opc)).addMBB(TBB).addImm(CC);
}
- if (!FBB)
+
+ if (!FBB) {
+ if (BytesAdded)
+ *BytesAdded = 8;
return 1;
+ }
- BuildMI(&MBB, DL, get(Subtarget.isV9() ? SP::BPA : SP::BA)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(SP::BA)).addMBB(FBB);
+ if (BytesAdded)
+ *BytesAdded = 16;
return 2;
}
unsigned SparcInstrInfo::removeBranch(MachineBasicBlock &MBB,
int *BytesRemoved) const {
- assert(!BytesRemoved && "code size not handled");
-
MachineBasicBlock::iterator I = MBB.end();
unsigned Count = 0;
+ int Removed = 0;
while (I != MBB.begin()) {
--I;
@@ -325,21 +379,62 @@ unsigned SparcInstrInfo::removeBranch(MachineBasicBlock &MBB,
!isUncondBranchOpcode(I->getOpcode()))
break; // Not a branch
+ Removed += getInstSizeInBytes(*I);
I->eraseFromParent();
I = MBB.end();
++Count;
}
+
+ if (BytesRemoved)
+ *BytesRemoved = Removed;
return Count;
}
bool SparcInstrInfo::reverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const {
- assert(Cond.size() <= 2);
+ assert(Cond.size() <= 3);
SPCC::CondCodes CC = static_cast<SPCC::CondCodes>(Cond[1].getImm());
Cond[1].setImm(GetOppositeBranchCondition(CC));
return false;
}
+bool SparcInstrInfo::isBranchOffsetInRange(unsigned BranchOpc,
+ int64_t Offset) const {
+ assert((Offset & 0b11) == 0 && "Malformed branch offset");
+ switch (BranchOpc) {
+ case SP::BA:
+ case SP::BCOND:
+ case SP::BCONDA:
+ case SP::FBCOND:
+ case SP::FBCONDA:
+ return isIntN(22, Offset >> 2);
+
+ case SP::BPICC:
+ case SP::BPICCA:
+ case SP::BPICCNT:
+ case SP::BPICCANT:
+ case SP::BPXCC:
+ case SP::BPXCCA:
+ case SP::BPXCCNT:
+ case SP::BPXCCANT:
+ case SP::BPFCC:
+ case SP::BPFCCA:
+ case SP::BPFCCNT:
+ case SP::BPFCCANT:
+ case SP::FBCOND_V9:
+ case SP::FBCONDA_V9:
+ return isIntN(BPccDisplacementBits, Offset >> 2);
+
+ case SP::BPR:
+ case SP::BPRA:
+ case SP::BPRNT:
+ case SP::BPRANT:
+ return isIntN(BPrDisplacementBits, Offset >> 2);
+ }
+
+ llvm_unreachable("Unknown branch instruction!");
+}
+
void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg,
@@ -530,6 +625,23 @@ Register SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
return GlobalBaseReg;
}
+unsigned SparcInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+
+ if (MI.isInlineAsm()) {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const char *AsmStr = MI.getOperand(0).getSymbolName();
+ return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
+ }
+
+ // If the instruction has a delay slot, be conservative and also include
+ // it for sizing purposes. This is done so that the BranchRelaxation pass
+ // will not mistakenly mark out-of-range branches as in-range.
+ if (MI.hasDelaySlot())
+ return get(Opcode).getSize() * 2;
+ return get(Opcode).getSize();
+}
+
bool SparcInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
switch (MI.getOpcode()) {
case TargetOpcode::LOAD_STACK_GUARD: {
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.h b/llvm/lib/Target/Sparc/SparcInstrInfo.h
index 39cf791c2173..7056d6babe17 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.h
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -64,6 +64,8 @@ public:
unsigned isStoreToStackSlot(const MachineInstr &MI,
int &FrameIndex) const override;
+ MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
+
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
@@ -80,6 +82,9 @@ public:
bool
reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+ /// Determine if the branch target is in range.
+ bool isBranchOffsetInRange(unsigned BranchOpc, int64_t Offset) const override;
+
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg,
bool KillSrc) const override;
@@ -99,6 +104,10 @@ public:
Register getGlobalBaseReg(MachineFunction *MF) const;
+ /// GetInstSize - Return the number of bytes of code the specified
+ /// instruction may be. This returns the maximum number of bytes.
+ unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
+
// Lower pseudo instructions after register allocation.
bool expandPostRAPseudo(MachineInstr &MI) const override;
};
diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td
index 2c45a7218d04..3d602e7e4376 100644
--- a/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -72,7 +72,7 @@ def HasFSMULD : Predicate<"!Subtarget->hasNoFSMULD()">;
// V8, or when it is V9 but the V8 deprecated instructions are efficient enough
// to use when appropriate. In either of these cases, the instruction selector
// will pick deprecated instructions.
-def UseDeprecatedInsts : Predicate<"Subtarget->useDeprecatedV8Instructions()">;
+def UseDeprecatedInsts : Predicate<"Subtarget->useV8DeprecatedInsts()">;
//===----------------------------------------------------------------------===//
// Instruction Pattern Stuff
@@ -224,6 +224,8 @@ def SDTSPcmpfcc :
SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
def SDTSPbrcc :
SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>]>;
+def SDTSPbrreg :
+SDTypeProfile<0, 3, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>, SDTCisVT<2, i64>]>;
def SDTSPselectcc :
SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisVT<3, i32>]>;
def SDTSPselectreg :
@@ -253,6 +255,7 @@ def SPbpicc : SDNode<"SPISD::BPICC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
def SPbpxcc : SDNode<"SPISD::BPXCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
def SPbrfcc : SDNode<"SPISD::BRFCC", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
def SPbrfccv9 : SDNode<"SPISD::BRFCC_V9", SDTSPbrcc, [SDNPHasChain, SDNPInGlue]>;
+def SPbrreg : SDNode<"SPISD::BR_REG", SDTSPbrreg, [SDNPHasChain, SDNPInGlue]>;
def SPhi : SDNode<"SPISD::Hi", SDTIntUnaryOp>;
def SPlo : SDNode<"SPISD::Lo", SDTIntUnaryOp>;
@@ -288,7 +291,7 @@ def tailcall : SDNode<"SPISD::TAIL_CALL", SDT_SPCall,
SDNPVariadic]>;
def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet,
+def retglue : SDNode<"SPISD::RET_GLUE", SDT_SPRet,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def flushw : SDNode<"SPISD::FLUSHW", SDTNone,
@@ -850,15 +853,8 @@ class BranchPredictAlways<dag ins, string asmstr, list<dag> pattern>
: F2_3<0b001, 0, 1, (outs), ins, asmstr, pattern>;
}
-let cond = 8 in {
- // If we're compiling for v9, prefer BPA rather than BA
- // TODO: Disallow BA emission when FeatureV8Deprecated isn't enabled
- let Predicates = [HasV9], cc = 0b00 in
- def BPA : BranchPredictAlways<(ins bprtarget:$imm19),
- "ba %icc, $imm19", [(br bb:$imm19)]>;
-
+let cond = 8 in
def BA : BranchAlways<(ins brtarget:$imm22), "ba $imm22", [(br bb:$imm22)]>;
-}
let isBranch = 1, isTerminator = 1, hasDelaySlot = 1 in {
@@ -1041,7 +1037,7 @@ let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1,
def RETL: F3_2<2, 0b111000,
(outs), (ins i32imm:$simm13),
"jmp %o7+$simm13",
- [(retflag simm13:$simm13)],
+ [(retglue simm13:$simm13)],
IIC_jmp_or_call>;
let rd = 0, rs1 = 31 in
diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index 618a8633f0a9..81c2137ea730 100644
--- a/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -27,28 +27,6 @@ void SparcSubtarget::anchor() { }
SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
StringRef FS) {
- UseSoftMulDiv = false;
- IsV9 = false;
- IsLeon = false;
- V8DeprecatedInsts = false;
- IsVIS = false;
- IsVIS2 = false;
- IsVIS3 = false;
- HasHardQuad = false;
- UsePopc = false;
- UseSoftFloat = false;
- HasNoFSMULD = false;
- HasNoFMULS = false;
-
- // Leon features
- HasLeonCasa = false;
- HasUmacSmac = false;
- HasPWRPSR = false;
- InsertNOPLoad = false;
- FixAllFDIVSQRT = false;
- DetectRoundChange = false;
- HasLeonCycleCounter = false;
-
// Determine default and user specified characteristics
std::string CPUName = std::string(CPU);
if (CPUName.empty())
diff --git a/llvm/lib/Target/Sparc/SparcSubtarget.h b/llvm/lib/Target/Sparc/SparcSubtarget.h
index 82a4aa510355..8e3d05d5d7e5 100644
--- a/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -16,10 +16,10 @@
#include "SparcFrameLowering.h"
#include "SparcISelLowering.h"
#include "SparcInstrInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/TargetParser/Triple.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -31,26 +31,12 @@ class StringRef;
class SparcSubtarget : public SparcGenSubtargetInfo {
Triple TargetTriple;
virtual void anchor();
- bool UseSoftMulDiv;
- bool IsV9;
- bool IsLeon;
- bool V8DeprecatedInsts;
- bool IsVIS, IsVIS2, IsVIS3;
+
bool Is64Bit;
- bool HasHardQuad;
- bool UsePopc;
- bool UseSoftFloat;
- bool HasNoFSMULD;
- bool HasNoFMULS;
-
- // LEON features
- bool HasUmacSmac;
- bool HasLeonCasa;
- bool HasPWRPSR;
- bool InsertNOPLoad;
- bool FixAllFDIVSQRT;
- bool DetectRoundChange;
- bool HasLeonCycleCounter;
+
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool ATTRIBUTE = DEFAULT;
+#include "SparcGenSubtargetInfo.inc"
SparcInstrInfo InstrInfo;
SparcTargetLowering TLInfo;
@@ -77,27 +63,9 @@ public:
bool enableMachineScheduler() const override;
- bool useSoftMulDiv() const { return UseSoftMulDiv; }
- bool isV9() const { return IsV9; }
- bool isLeon() const { return IsLeon; }
- bool isVIS() const { return IsVIS; }
- bool isVIS2() const { return IsVIS2; }
- bool isVIS3() const { return IsVIS3; }
- bool useDeprecatedV8Instructions() const { return V8DeprecatedInsts; }
- bool hasHardQuad() const { return HasHardQuad; }
- bool usePopc() const { return UsePopc; }
- bool useSoftFloat() const { return UseSoftFloat; }
- bool hasNoFSMULD() const { return HasNoFSMULD; }
- bool hasNoFMULS() const { return HasNoFMULS; }
-
- // Leon options
- bool hasUmacSmac() const { return HasUmacSmac; }
- bool hasLeonCasa() const { return HasLeonCasa; }
- bool hasPWRPSR() const { return HasPWRPSR; }
- bool insertNOPLoad() const { return InsertNOPLoad; }
- bool fixAllFDIVSQRT() const { return FixAllFDIVSQRT; }
- bool detectRoundChange() const { return DetectRoundChange; }
- bool hasLeonCycleCounter() const { return HasLeonCycleCounter; }
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool GETTER() const { return ATTRIBUTE; }
+#include "SparcGenSubtargetInfo.inc"
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
diff --git a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index 58faaafc29d6..577dc1351de9 100644
--- a/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -17,7 +17,6 @@
#include "TargetInfo/SparcTargetInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/MC/TargetRegistry.h"
#include <optional>
using namespace llvm;
@@ -32,6 +31,10 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSparcTarget() {
initializeSparcDAGToDAGISelPass(PR);
}
+static cl::opt<bool>
+ BranchRelaxation("sparc-enable-branch-relax", cl::Hidden, cl::init(true),
+ cl::desc("Relax out of range conditional branches"));
+
static std::string computeDataLayout(const Triple &T, bool is64Bit) {
// Sparc is typically big endian, but some are little.
std::string Ret = T.getArch() == Triple::sparcel ? "e" : "E";
@@ -182,6 +185,9 @@ bool SparcPassConfig::addInstSelector() {
}
void SparcPassConfig::addPreEmitPass(){
+ if (BranchRelaxation)
+ addPass(&BranchRelaxationPassID);
+
addPass(createSparcDelaySlotFillerPass());
if (this->getSparcTargetMachine().getSubtargetImpl()->insertNOPLoad())
diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 4e7985bd4edc..dc4f2a438c9f 100644
--- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -13,6 +13,7 @@
#include "TargetInfo/SystemZTargetInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
@@ -236,7 +237,7 @@ public:
return Kind == KindImm;
}
bool isImm(int64_t MinValue, int64_t MaxValue) const {
- return Kind == KindImm && inRange(Imm, MinValue, MaxValue);
+ return Kind == KindImm && inRange(Imm, MinValue, MaxValue, true);
}
const MCExpr *getImm() const {
assert(Kind == KindImm && "Not an immediate");
@@ -379,7 +380,6 @@ public:
bool isU2Imm() const { return isImm(0, 3); }
bool isU3Imm() const { return isImm(0, 7); }
bool isU4Imm() const { return isImm(0, 15); }
- bool isU6Imm() const { return isImm(0, 63); }
bool isU8Imm() const { return isImm(0, 255); }
bool isS8Imm() const { return isImm(-128, 127); }
bool isU12Imm() const { return isImm(0, 4095); }
@@ -494,7 +494,7 @@ public:
}
// Override MCTargetAsmParser.
- bool ParseDirective(AsmToken DirectiveID) override;
+ ParseStatus parseDirective(AsmToken DirectiveID) override;
bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
SMLoc &EndLoc) override;
bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
@@ -1219,7 +1219,7 @@ SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
return MatchOperand_Success;
}
-bool SystemZAsmParser::ParseDirective(AsmToken DirectiveID) {
+ParseStatus SystemZAsmParser::parseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".insn")
@@ -1229,7 +1229,7 @@ bool SystemZAsmParser::ParseDirective(AsmToken DirectiveID) {
if (IDVal.startswith(".gnu_attribute"))
return ParseGNUAttribute(DirectiveID.getLoc());
- return true;
+ return ParseStatus::NoMatch;
}
/// ParseDirectiveInsn
@@ -1346,12 +1346,12 @@ bool SystemZAsmParser::ParseDirectiveMachine(SMLoc L) {
MCAsmParser &Parser = getParser();
if (Parser.getTok().isNot(AsmToken::Identifier) &&
Parser.getTok().isNot(AsmToken::String))
- return Error(L, "unexpected token in '.machine' directive");
+ return TokError("unexpected token in '.machine' directive");
StringRef CPU = Parser.getTok().getIdentifier();
Parser.Lex();
- if (parseToken(AsmToken::EndOfStatement))
- return addErrorSuffix(" in '.machine' directive");
+ if (parseEOL())
+ return true;
MCSubtargetInfo &STI = copySTI();
STI.setDefaultFeatures(CPU, /*TuneCPU*/ CPU, "");
@@ -1366,18 +1366,15 @@ bool SystemZAsmParser::ParseGNUAttribute(SMLoc L) {
int64_t Tag;
int64_t IntegerValue;
if (!Parser.parseGNUAttribute(L, Tag, IntegerValue))
- return false;
+ return Error(L, "malformed .gnu_attribute directive");
// Tag_GNU_S390_ABI_Vector tag is '8' and can be 0, 1, or 2.
- if (Tag != 8 || (IntegerValue < 0 || IntegerValue > 2)) {
- Error(Parser.getTok().getLoc(),
- "Unrecognized .gnu_attribute tag/value pair.");
- return false;
- }
+ if (Tag != 8 || (IntegerValue < 0 || IntegerValue > 2))
+ return Error(L, "unrecognized .gnu_attribute tag/value pair.");
Parser.getStreamer().emitGNUAttribute(Tag, IntegerValue);
- return true;
+ return parseEOL();
}
bool SystemZAsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
diff --git a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 979141a1962a..d26ad63dc515 100644
--- a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -79,11 +79,16 @@ static bool tryAddingSymbolicOperand(int64_t Value, bool isBranch,
}
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
- const unsigned *Regs, unsigned Size) {
+ const unsigned *Regs, unsigned Size,
+ bool IsAddr = false) {
assert(RegNo < Size && "Invalid register");
- RegNo = Regs[RegNo];
- if (RegNo == 0)
- return MCDisassembler::Fail;
+ if (IsAddr && RegNo == 0) {
+ RegNo = SystemZ::NoRegister;
+ } else {
+ RegNo = Regs[RegNo];
+ if (RegNo == 0)
+ return MCDisassembler::Fail;
+ }
Inst.addOperand(MCOperand::createReg(RegNo));
return MCDisassembler::Success;
}
@@ -113,9 +118,15 @@ static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
}
static DecodeStatus
+DecodeADDR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address,
+ const MCDisassembler *Decoder) {
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs, 16, true);
+}
+
+static DecodeStatus
DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, uint64_t Address,
const MCDisassembler *Decoder) {
- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16);
+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16, true);
}
static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
@@ -206,12 +217,6 @@ static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm,
return decodeUImmOperand<4>(Inst, Imm);
}
-static DecodeStatus decodeU6ImmOperand(MCInst &Inst, uint64_t Imm,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- return decodeUImmOperand<6>(Inst, Imm);
-}
-
static DecodeStatus decodeU8ImmOperand(MCInst &Inst, uint64_t Imm,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -248,6 +253,12 @@ static DecodeStatus decodeS16ImmOperand(MCInst &Inst, uint64_t Imm,
return decodeSImmOperand<16>(Inst, Imm);
}
+static DecodeStatus decodeS20ImmOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ return decodeSImmOperand<20>(Inst, Imm);
+}
+
static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -255,6 +266,16 @@ static DecodeStatus decodeS32ImmOperand(MCInst &Inst, uint64_t Imm,
}
template <unsigned N>
+static DecodeStatus decodeLenOperand(MCInst &Inst, uint64_t Imm,
+ uint64_t Address,
+ const MCDisassembler *Decoder) {
+ if (!isUInt<N>(Imm))
+ return MCDisassembler::Fail;
+ Inst.addOperand(MCOperand::createImm(Imm + 1));
+ return MCDisassembler::Success;
+}
+
+template <unsigned N>
static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm,
uint64_t Address, bool isBranch,
const MCDisassembler *Decoder) {
@@ -298,158 +319,6 @@ static DecodeStatus decodePC32DBLOperand(MCInst &Inst, uint64_t Imm,
return decodePCDBLOperand<32>(Inst, Imm, Address, false, Decoder);
}
-static DecodeStatus decodeBDAddr12Operand(MCInst &Inst, uint64_t Field,
- const unsigned *Regs) {
- uint64_t Base = Field >> 12;
- uint64_t Disp = Field & 0xfff;
- assert(Base < 16 && "Invalid BDAddr12");
- Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
- Inst.addOperand(MCOperand::createImm(Disp));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus decodeBDAddr20Operand(MCInst &Inst, uint64_t Field,
- const unsigned *Regs) {
- uint64_t Base = Field >> 20;
- uint64_t Disp = ((Field << 12) & 0xff000) | ((Field >> 8) & 0xfff);
- assert(Base < 16 && "Invalid BDAddr20");
- Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
- Inst.addOperand(MCOperand::createImm(SignExtend64<20>(Disp)));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus decodeBDXAddr12Operand(MCInst &Inst, uint64_t Field,
- const unsigned *Regs) {
- uint64_t Index = Field >> 16;
- uint64_t Base = (Field >> 12) & 0xf;
- uint64_t Disp = Field & 0xfff;
- assert(Index < 16 && "Invalid BDXAddr12");
- Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
- Inst.addOperand(MCOperand::createImm(Disp));
- Inst.addOperand(MCOperand::createReg(Index == 0 ? 0 : Regs[Index]));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus decodeBDXAddr20Operand(MCInst &Inst, uint64_t Field,
- const unsigned *Regs) {
- uint64_t Index = Field >> 24;
- uint64_t Base = (Field >> 20) & 0xf;
- uint64_t Disp = ((Field & 0xfff00) >> 8) | ((Field & 0xff) << 12);
- assert(Index < 16 && "Invalid BDXAddr20");
- Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
- Inst.addOperand(MCOperand::createImm(SignExtend64<20>(Disp)));
- Inst.addOperand(MCOperand::createReg(Index == 0 ? 0 : Regs[Index]));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus decodeBDLAddr12Len4Operand(MCInst &Inst, uint64_t Field,
- const unsigned *Regs) {
- uint64_t Length = Field >> 16;
- uint64_t Base = (Field >> 12) & 0xf;
- uint64_t Disp = Field & 0xfff;
- assert(Length < 16 && "Invalid BDLAddr12Len4");
- Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
- Inst.addOperand(MCOperand::createImm(Disp));
- Inst.addOperand(MCOperand::createImm(Length + 1));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus decodeBDLAddr12Len8Operand(MCInst &Inst, uint64_t Field,
- const unsigned *Regs) {
- uint64_t Length = Field >> 16;
- uint64_t Base = (Field >> 12) & 0xf;
- uint64_t Disp = Field & 0xfff;
- assert(Length < 256 && "Invalid BDLAddr12Len8");
- Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
- Inst.addOperand(MCOperand::createImm(Disp));
- Inst.addOperand(MCOperand::createImm(Length + 1));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus decodeBDRAddr12Operand(MCInst &Inst, uint64_t Field,
- const unsigned *Regs) {
- uint64_t Length = Field >> 16;
- uint64_t Base = (Field >> 12) & 0xf;
- uint64_t Disp = Field & 0xfff;
- assert(Length < 16 && "Invalid BDRAddr12");
- Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
- Inst.addOperand(MCOperand::createImm(Disp));
- Inst.addOperand(MCOperand::createReg(Regs[Length]));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus decodeBDVAddr12Operand(MCInst &Inst, uint64_t Field,
- const unsigned *Regs) {
- uint64_t Index = Field >> 16;
- uint64_t Base = (Field >> 12) & 0xf;
- uint64_t Disp = Field & 0xfff;
- assert(Index < 32 && "Invalid BDVAddr12");
- Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
- Inst.addOperand(MCOperand::createImm(Disp));
- Inst.addOperand(MCOperand::createReg(SystemZMC::VR128Regs[Index]));
- return MCDisassembler::Success;
-}
-
-static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR32Regs);
-}
-
-static DecodeStatus decodeBDAddr32Disp20Operand(MCInst &Inst, uint64_t Field,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR32Regs);
-}
-
-static DecodeStatus decodeBDAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- return decodeBDAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
-}
-
-static DecodeStatus decodeBDAddr64Disp20Operand(MCInst &Inst, uint64_t Field,
- uint64_t Address,
- const MCDisassembler *Decoder) {
- return decodeBDAddr20Operand(Inst, Field, SystemZMC::GR64Regs);
-}
-
-static DecodeStatus
-decodeBDXAddr64Disp12Operand(MCInst &Inst, uint64_t Field, uint64_t Address,
- const MCDisassembler *Decoder) {
- return decodeBDXAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
-}
-
-static DecodeStatus
-decodeBDXAddr64Disp20Operand(MCInst &Inst, uint64_t Field, uint64_t Address,
- const MCDisassembler *Decoder) {
- return decodeBDXAddr20Operand(Inst, Field, SystemZMC::GR64Regs);
-}
-
-static DecodeStatus
-decodeBDLAddr64Disp12Len4Operand(MCInst &Inst, uint64_t Field, uint64_t Address,
- const MCDisassembler *Decoder) {
- return decodeBDLAddr12Len4Operand(Inst, Field, SystemZMC::GR64Regs);
-}
-
-static DecodeStatus
-decodeBDLAddr64Disp12Len8Operand(MCInst &Inst, uint64_t Field, uint64_t Address,
- const MCDisassembler *Decoder) {
- return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs);
-}
-
-static DecodeStatus
-decodeBDRAddr64Disp12Operand(MCInst &Inst, uint64_t Field, uint64_t Address,
- const MCDisassembler *Decoder) {
- return decodeBDRAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
-}
-
-static DecodeStatus
-decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field, uint64_t Address,
- const MCDisassembler *Decoder) {
- return decodeBDVAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
-}
-
#include "SystemZGenDisassemblerTables.inc"
DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
index 3e0e385b25c4..a32dc9a2e7d5 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.cpp
@@ -83,7 +83,12 @@ void SystemZInstPrinter::printInst(const MCInst *MI, uint64_t Address,
template <unsigned N>
void SystemZInstPrinter::printUImmOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
- int64_t Value = MI->getOperand(OpNum).getImm();
+ const MCOperand &MO = MI->getOperand(OpNum);
+ if (MO.isExpr()) {
+ O << *MO.getExpr();
+ return;
+ }
+ uint64_t Value = static_cast<uint64_t>(MO.getImm());
assert(isUInt<N>(Value) && "Invalid uimm argument");
O << markup("<imm:") << Value << markup(">");
}
@@ -91,6 +96,11 @@ void SystemZInstPrinter::printUImmOperand(const MCInst *MI, int OpNum,
template <unsigned N>
void SystemZInstPrinter::printSImmOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ if (MO.isExpr()) {
+ O << *MO.getExpr();
+ return;
+ }
int64_t Value = MI->getOperand(OpNum).getImm();
assert(isInt<N>(Value) && "Invalid simm argument");
O << markup("<imm:") << Value << markup(">");
@@ -116,11 +126,6 @@ void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
printUImmOperand<4>(MI, OpNum, O);
}
-void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
- raw_ostream &O) {
- printUImmOperand<6>(MI, OpNum, O);
-}
-
void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum,
raw_ostream &O) {
printSImmOperand<8>(MI, OpNum, O);
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
index 6a188ff15039..4e7490dad299 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinter.h
@@ -69,7 +69,6 @@ private:
void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU3ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
- void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU12ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index d9f770a399f6..880766a1a23f 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -47,6 +47,13 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value,
return (int64_t)Value / 2;
};
+ auto handleImmValue = [&](bool IsSigned, unsigned W) -> uint64_t {
+ if (!(IsSigned ? checkFixupInRange(minIntN(W), maxIntN(W))
+ : checkFixupInRange(0, maxUIntN(W))))
+ return 0;
+ return Value;
+ };
+
switch (unsigned(Kind)) {
case SystemZ::FK_390_PC12DBL:
return handlePCRelFixupValue(12);
@@ -57,22 +64,41 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value,
case SystemZ::FK_390_PC32DBL:
return handlePCRelFixupValue(32);
- case SystemZ::FK_390_12:
- if (!checkFixupInRange(0, maxUIntN(12)))
- return 0;
- return Value;
+ case SystemZ::FK_390_TLS_CALL:
+ return 0;
- case SystemZ::FK_390_20: {
- if (!checkFixupInRange(minIntN(20), maxIntN(20)))
- return 0;
+ case SystemZ::FK_390_S8Imm:
+ return handleImmValue(true, 8);
+ case SystemZ::FK_390_S16Imm:
+ return handleImmValue(true, 16);
+ case SystemZ::FK_390_S20Imm: {
+ Value = handleImmValue(true, 20);
+ // S20Imm is used only for signed 20-bit displacements.
// The high byte of a 20 bit displacement value comes first.
uint64_t DLo = Value & 0xfff;
uint64_t DHi = (Value >> 12) & 0xff;
return (DLo << 8) | DHi;
}
-
- case SystemZ::FK_390_TLS_CALL:
- return 0;
+ case SystemZ::FK_390_S32Imm:
+ return handleImmValue(true, 32);
+ case SystemZ::FK_390_U1Imm:
+ return handleImmValue(false, 1);
+ case SystemZ::FK_390_U2Imm:
+ return handleImmValue(false, 2);
+ case SystemZ::FK_390_U3Imm:
+ return handleImmValue(false, 3);
+ case SystemZ::FK_390_U4Imm:
+ return handleImmValue(false, 4);
+ case SystemZ::FK_390_U8Imm:
+ return handleImmValue(false, 8);
+ case SystemZ::FK_390_U12Imm:
+ return handleImmValue(false, 12);
+ case SystemZ::FK_390_U16Imm:
+ return handleImmValue(false, 16);
+ case SystemZ::FK_390_U32Imm:
+ return handleImmValue(false, 32);
+ case SystemZ::FK_390_U48Imm:
+ return handleImmValue(false, 48);
}
llvm_unreachable("Unknown fixup kind!");
@@ -130,16 +156,6 @@ SystemZMCAsmBackend::getFixupKind(StringRef Name) const {
const MCFixupKindInfo &
SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
- const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = {
- { "FK_390_PC12DBL", 4, 12, MCFixupKindInfo::FKF_IsPCRel },
- { "FK_390_PC16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
- { "FK_390_PC24DBL", 0, 24, MCFixupKindInfo::FKF_IsPCRel },
- { "FK_390_PC32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel },
- { "FK_390_TLS_CALL", 0, 0, 0 },
- { "FK_390_12", 4, 12, 0 },
- { "FK_390_20", 4, 20, 0 }
- };
-
// Fixup kinds from .reloc directive are like R_390_NONE. They
// do not require any extra processing.
if (Kind >= FirstLiteralRelocationKind)
@@ -150,7 +166,7 @@ SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
"Invalid kind!");
- return Infos[Kind - FirstTargetFixupKind];
+ return SystemZ::MCFixupKindInfos[Kind - FirstTargetFixupKind];
}
bool SystemZMCAsmBackend::shouldForceRelocation(const MCAssembler &,
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index 1a71ff28424f..e453ec60d70c 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -37,8 +37,6 @@ class SystemZMCCodeEmitter : public MCCodeEmitter {
const MCInstrInfo &MCII;
MCContext &Ctx;
- mutable unsigned MemOpsEmitted;
-
public:
SystemZMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
: MCII(mcii), Ctx(ctx) {
@@ -47,7 +45,7 @@ public:
~SystemZMCCodeEmitter() override = default;
// OVerride MCCodeEmitter.
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -56,6 +54,8 @@ private:
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
+ uint32_t getOperandBitOffset(const MCInst &MI, unsigned OpNum,
+ const MCSubtargetInfo &STI) const;
// Called by the TableGen code to get the binary encoding of operand
// MO in MI. Fixups is the list of fixups against MI.
@@ -63,40 +63,19 @@ private:
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- // Return the displacement value for the OpNum operand. If it is a symbol,
- // add a fixup for it and return 0.
- uint64_t getDispOpValue(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- SystemZ::FixupKind Kind) const;
+ // Return the encoded immediate value for the OpNum operand. If it is a
+ // symbol, add a fixup for it and return 0.
+ template <SystemZ::FixupKind Kind>
+ uint64_t getImmOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
- // Called by the TableGen code to get the binary encoding of an address.
- // The index or length, if any, is encoded first, followed by the base,
- // followed by the displacement. In a 20-bit displacement,
- // the low 12 bits are encoded before the high 8 bits.
- uint64_t getBDAddr12Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint64_t getBDAddr20Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint64_t getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint64_t getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint64_t getBDLAddr12Len4Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint64_t getBDRAddr12Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
- uint64_t getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
+ // Called by the TableGen code to get the binary encoding of a length value.
+ // Length values are encoded by subtracting 1 from the actual value.
+ template <SystemZ::FixupKind Kind>
+ uint64_t getLenEncoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
// Operand OpNum of MI needs a PC-relative fixup of kind Kind at
// Offset bytes from the start of MI. Add the fixup to Fixups
@@ -154,16 +133,16 @@ private:
} // end anonymous namespace
-void SystemZMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void SystemZMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- MemOpsEmitted = 0;
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
unsigned Size = MCII.get(MI.getOpcode()).getSize();
// Big-endian insertion of Size bytes.
unsigned ShiftValue = (Size * 8) - 8;
for (unsigned I = 0; I != Size; ++I) {
- OS << uint8_t(Bits >> ShiftValue);
+ CB.push_back(uint8_t(Bits >> ShiftValue));
ShiftValue -= 8;
}
}
@@ -174,117 +153,40 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
const MCSubtargetInfo &STI) const {
if (MO.isReg())
return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+ // SystemZAsmParser::parseAnyRegister() produces KindImm when registers are
+ // specified as integers.
if (MO.isImm())
return static_cast<uint64_t>(MO.getImm());
llvm_unreachable("Unexpected operand type!");
}
-uint64_t SystemZMCCodeEmitter::
-getDispOpValue(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- SystemZ::FixupKind Kind) const {
+template <SystemZ::FixupKind Kind>
+uint64_t SystemZMCCodeEmitter::getImmOpValue(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
const MCOperand &MO = MI.getOperand(OpNum);
- if (MO.isImm()) {
- ++MemOpsEmitted;
+ if (MO.isImm())
return static_cast<uint64_t>(MO.getImm());
- }
if (MO.isExpr()) {
- // All instructions follow the pattern where the first displacement has a
- // 2 bytes offset, and the second one 4 bytes.
- unsigned ByteOffs = MemOpsEmitted++ == 0 ? 2 : 4;
- Fixups.push_back(MCFixup::create(ByteOffs, MO.getExpr(), (MCFixupKind)Kind,
- MI.getLoc()));
+ unsigned MIBitSize = MCII.get(MI.getOpcode()).getSize() * 8;
+ uint32_t RawBitOffset = getOperandBitOffset(MI, OpNum, STI);
+ unsigned OpBitSize =
+ SystemZ::MCFixupKindInfos[Kind - FirstTargetFixupKind].TargetSize;
+ uint32_t BitOffset = MIBitSize - RawBitOffset - OpBitSize;
+ Fixups.push_back(MCFixup::create(BitOffset >> 3, MO.getExpr(),
+ (MCFixupKind)Kind, MI.getLoc()));
assert(Fixups.size() <= 2 && "More than two memory operands in MI?");
return 0;
}
llvm_unreachable("Unexpected operand type!");
}
-uint64_t SystemZMCCodeEmitter::
-getBDAddr12Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
- assert(isUInt<4>(Base) && isUInt<12>(Disp));
- return (Base << 12) | Disp;
-}
-
-uint64_t SystemZMCCodeEmitter::
-getBDAddr20Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_20);
- assert(isUInt<4>(Base) && isInt<20>(Disp));
- return (Base << 20) | ((Disp & 0xfff) << 8) | ((Disp & 0xff000) >> 12);
-}
-
-uint64_t SystemZMCCodeEmitter::
-getBDXAddr12Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
- uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
- assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Index));
- return (Index << 16) | (Base << 12) | Disp;
-}
-
-uint64_t SystemZMCCodeEmitter::
-getBDXAddr20Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_20);
- uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
- assert(isUInt<4>(Base) && isInt<20>(Disp) && isUInt<4>(Index));
- return (Index << 24) | (Base << 20) | ((Disp & 0xfff) << 8)
- | ((Disp & 0xff000) >> 12);
-}
-
-uint64_t SystemZMCCodeEmitter::
-getBDLAddr12Len4Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
- uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI) - 1;
- assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Len));
- return (Len << 16) | (Base << 12) | Disp;
-}
-
-uint64_t SystemZMCCodeEmitter::
-getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
- uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI) - 1;
- assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<8>(Len));
- return (Len << 16) | (Base << 12) | Disp;
-}
-
-uint64_t SystemZMCCodeEmitter::
-getBDRAddr12Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
- uint64_t Len = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
- assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Len));
- return (Len << 16) | (Base << 12) | Disp;
-}
-
-uint64_t SystemZMCCodeEmitter::
-getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
- uint64_t Disp = getDispOpValue(MI, OpNum + 1, Fixups, SystemZ::FK_390_12);
- uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
- assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<5>(Index));
- return (Index << 16) | (Base << 12) | Disp;
+template <SystemZ::FixupKind Kind>
+uint64_t
+SystemZMCCodeEmitter::getLenEncoding(const MCInst &MI, unsigned OpNum,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const {
+ return getImmOpValue<Kind>(MI, OpNum, Fixups, STI) - 1;
}
uint64_t
@@ -319,6 +221,7 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
return 0;
}
+#define GET_OPERAND_BIT_OFFSET
#include "SystemZGenMCCodeEmitter.inc"
MCCodeEmitter *llvm::createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.cpp
new file mode 100644
index 000000000000..647cf765c6a3
--- /dev/null
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.cpp
@@ -0,0 +1,49 @@
+//===-- SystemZMCExpr.cpp - SystemZ specific MC expression classes --------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCExpr.h"
+#include "llvm/MC/MCContext.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "systemzmcexpr"
+
+const SystemZMCExpr *SystemZMCExpr::create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx) {
+ return new (Ctx) SystemZMCExpr(Kind, Expr);
+}
+
+StringRef SystemZMCExpr::getVariantKindName() const {
+ switch (static_cast<uint32_t>(getKind())) {
+ case VK_SystemZ_None:
+ return "A";
+ case VK_SystemZ_RCon:
+ return "R";
+ case VK_SystemZ_VCon:
+ return "V";
+ default:
+ llvm_unreachable("Invalid kind");
+ }
+}
+
+void SystemZMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
+ OS << getVariantKindName() << '(';
+ Expr->print(OS, MAI);
+ OS << ')';
+}
+
+bool SystemZMCExpr::evaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const {
+ if (!getSubExpr()->evaluateAsRelocatable(Res, Layout, Fixup))
+ return false;
+
+ Res =
+ MCValue::get(Res.getSymA(), Res.getSymB(), Res.getConstant(), getKind());
+
+ return true;
+}
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.h
new file mode 100644
index 000000000000..f548b34baa42
--- /dev/null
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCExpr.h
@@ -0,0 +1,66 @@
+//===-- SystemZMCExpr.h - SystemZ specific MC expression classes -*- C++-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SystemZ_MCTARGETDESC_SystemZMCEXPR_H
+#define LLVM_LIB_TARGET_SystemZ_MCTARGETDESC_SystemZMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCValue.h"
+
+namespace llvm {
+
+class SystemZMCExpr : public MCTargetExpr {
+public:
+// HLASM docs for address constants:
+// https://www.ibm.com/docs/en/hla-and-tf/1.6?topic=value-address-constants
+ enum VariantKind {
+ VK_SystemZ_None,
+ VK_SystemZ_RCon, // Address of ADA of symbol.
+ VK_SystemZ_VCon, // Address of external function symbol.
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+
+ explicit SystemZMCExpr(VariantKind Kind, const MCExpr *Expr)
+ : Kind(Kind), Expr(Expr) {}
+
+public:
+ static const SystemZMCExpr *create(VariantKind Kind, const MCExpr *Expr,
+ MCContext &Ctx);
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ StringRef getVariantKindName() const;
+
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+ const MCFixup *Fixup) const override;
+ void visitUsedExpr(MCStreamer &Streamer) const override {
+ Streamer.visitUsedExpr(*getSubExpr());
+ }
+ MCFragment *findAssociatedFragment() const override {
+ return getSubExpr()->findAssociatedFragment();
+ }
+
+ // There are no TLS SystemZMCExprs at the moment.
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+};
+} // end namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
index 1f62baabb9e7..512e51c0f933 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -10,6 +10,7 @@
#define LLVM_LIB_TARGET_SYSTEMZ_MCTARGETDESC_SYSTEMZMCFIXUPS_H
#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
namespace llvm {
namespace SystemZ {
@@ -20,13 +21,46 @@ enum FixupKind {
FK_390_PC24DBL,
FK_390_PC32DBL,
FK_390_TLS_CALL,
- FK_390_12,
- FK_390_20,
+
+ FK_390_S8Imm,
+ FK_390_S16Imm,
+ FK_390_S20Imm,
+ FK_390_S32Imm,
+ FK_390_U1Imm,
+ FK_390_U2Imm,
+ FK_390_U3Imm,
+ FK_390_U4Imm,
+ FK_390_U8Imm,
+ FK_390_U12Imm,
+ FK_390_U16Imm,
+ FK_390_U32Imm,
+ FK_390_U48Imm,
// Marker
LastTargetFixupKind,
NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
};
+
+const static MCFixupKindInfo MCFixupKindInfos[SystemZ::NumTargetFixupKinds] = {
+ {"FK_390_PC12DBL", 4, 12, MCFixupKindInfo::FKF_IsPCRel},
+ {"FK_390_PC16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel},
+ {"FK_390_PC24DBL", 0, 24, MCFixupKindInfo::FKF_IsPCRel},
+ {"FK_390_PC32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel},
+ {"FK_390_TLS_CALL", 0, 0, 0},
+ {"FK_390_S8Imm", 0, 8, 0},
+ {"FK_390_S16Imm", 0, 16, 0},
+ {"FK_390_S20Imm", 4, 20, 0},
+ {"FK_390_S32Imm", 0, 32, 0},
+ {"FK_390_U1Imm", 0, 1, 0},
+ {"FK_390_U2Imm", 0, 2, 0},
+ {"FK_390_U3Imm", 0, 3, 0},
+ {"FK_390_U4Imm", 0, 4, 0},
+ {"FK_390_U8Imm", 0, 8, 0},
+ {"FK_390_U12Imm", 4, 12, 0},
+ {"FK_390_U16Imm", 0, 16, 0},
+ {"FK_390_U32Imm", 0, 32, 0},
+ {"FK_390_U48Imm", 0, 48, 0},
+};
} // end namespace SystemZ
} // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index c23463ab9bde..9c6a1b6e8af0 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -9,6 +9,7 @@
#include "MCTargetDesc/SystemZMCFixups.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCELFObjectWriter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCFixup.h"
@@ -40,85 +41,116 @@ SystemZObjectWriter::SystemZObjectWriter(uint8_t OSABI)
/*HasRelocationAddend_=*/ true) {}
// Return the relocation type for an absolute value of MCFixupKind Kind.
-static unsigned getAbsoluteReloc(unsigned Kind) {
+static unsigned getAbsoluteReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
switch (Kind) {
- case FK_Data_1: return ELF::R_390_8;
- case FK_Data_2: return ELF::R_390_16;
- case FK_Data_4: return ELF::R_390_32;
- case FK_Data_8: return ELF::R_390_64;
- case SystemZ::FK_390_12: return ELF::R_390_12;
- case SystemZ::FK_390_20: return ELF::R_390_20;
+ case FK_Data_1:
+ case SystemZ::FK_390_U8Imm:
+ case SystemZ::FK_390_S8Imm:
+ return ELF::R_390_8;
+ case SystemZ::FK_390_U12Imm:
+ return ELF::R_390_12;
+ case FK_Data_2:
+ case SystemZ::FK_390_U16Imm:
+ case SystemZ::FK_390_S16Imm:
+ return ELF::R_390_16;
+ case SystemZ::FK_390_S20Imm:
+ return ELF::R_390_20;
+ case FK_Data_4:
+ case SystemZ::FK_390_U32Imm:
+ case SystemZ::FK_390_S32Imm:
+ return ELF::R_390_32;
+ case FK_Data_8:
+ return ELF::R_390_64;
}
- llvm_unreachable("Unsupported absolute address");
+ Ctx.reportError(Loc, "Unsupported absolute address");
+ return 0;
}
// Return the relocation type for a PC-relative value of MCFixupKind Kind.
-static unsigned getPCRelReloc(unsigned Kind) {
+static unsigned getPCRelReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
switch (Kind) {
- case FK_Data_2: return ELF::R_390_PC16;
- case FK_Data_4: return ELF::R_390_PC32;
- case FK_Data_8: return ELF::R_390_PC64;
- case SystemZ::FK_390_PC12DBL: return ELF::R_390_PC12DBL;
- case SystemZ::FK_390_PC16DBL: return ELF::R_390_PC16DBL;
- case SystemZ::FK_390_PC24DBL: return ELF::R_390_PC24DBL;
- case SystemZ::FK_390_PC32DBL: return ELF::R_390_PC32DBL;
+ case FK_Data_2:
+ case SystemZ::FK_390_U16Imm:
+ case SystemZ::FK_390_S16Imm:
+ return ELF::R_390_PC16;
+ case FK_Data_4:
+ case SystemZ::FK_390_U32Imm:
+ case SystemZ::FK_390_S32Imm:
+ return ELF::R_390_PC32;
+ case FK_Data_8:
+ return ELF::R_390_PC64;
+ case SystemZ::FK_390_PC12DBL:
+ return ELF::R_390_PC12DBL;
+ case SystemZ::FK_390_PC16DBL:
+ return ELF::R_390_PC16DBL;
+ case SystemZ::FK_390_PC24DBL:
+ return ELF::R_390_PC24DBL;
+ case SystemZ::FK_390_PC32DBL:
+ return ELF::R_390_PC32DBL;
}
- llvm_unreachable("Unsupported PC-relative address");
+ Ctx.reportError(Loc, "Unsupported PC-relative address");
+ return 0;
}
// Return the R_390_TLS_LE* relocation type for MCFixupKind Kind.
-static unsigned getTLSLEReloc(unsigned Kind) {
+static unsigned getTLSLEReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
switch (Kind) {
case FK_Data_4: return ELF::R_390_TLS_LE32;
case FK_Data_8: return ELF::R_390_TLS_LE64;
}
- llvm_unreachable("Unsupported absolute address");
+ Ctx.reportError(Loc, "Unsupported thread-local address (local-exec)");
+ return 0;
}
// Return the R_390_TLS_LDO* relocation type for MCFixupKind Kind.
-static unsigned getTLSLDOReloc(unsigned Kind) {
+static unsigned getTLSLDOReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
switch (Kind) {
case FK_Data_4: return ELF::R_390_TLS_LDO32;
case FK_Data_8: return ELF::R_390_TLS_LDO64;
}
- llvm_unreachable("Unsupported absolute address");
+ Ctx.reportError(Loc, "Unsupported thread-local address (local-dynamic)");
+ return 0;
}
// Return the R_390_TLS_LDM* relocation type for MCFixupKind Kind.
-static unsigned getTLSLDMReloc(unsigned Kind) {
+static unsigned getTLSLDMReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
switch (Kind) {
case FK_Data_4: return ELF::R_390_TLS_LDM32;
case FK_Data_8: return ELF::R_390_TLS_LDM64;
case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_LDCALL;
}
- llvm_unreachable("Unsupported absolute address");
+ Ctx.reportError(Loc, "Unsupported thread-local address (local-dynamic)");
+ return 0;
}
// Return the R_390_TLS_GD* relocation type for MCFixupKind Kind.
-static unsigned getTLSGDReloc(unsigned Kind) {
+static unsigned getTLSGDReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
switch (Kind) {
case FK_Data_4: return ELF::R_390_TLS_GD32;
case FK_Data_8: return ELF::R_390_TLS_GD64;
case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_GDCALL;
}
- llvm_unreachable("Unsupported absolute address");
+ Ctx.reportError(Loc, "Unsupported thread-local address (general-dynamic)");
+ return 0;
}
// Return the PLT relocation counterpart of MCFixupKind Kind.
-static unsigned getPLTReloc(unsigned Kind) {
+static unsigned getPLTReloc(MCContext &Ctx, SMLoc Loc, unsigned Kind) {
switch (Kind) {
case SystemZ::FK_390_PC12DBL: return ELF::R_390_PLT12DBL;
case SystemZ::FK_390_PC16DBL: return ELF::R_390_PLT16DBL;
case SystemZ::FK_390_PC24DBL: return ELF::R_390_PLT24DBL;
case SystemZ::FK_390_PC32DBL: return ELF::R_390_PLT32DBL;
}
- llvm_unreachable("Unsupported absolute address");
+ Ctx.reportError(Loc, "Unsupported PC-relative PLT address");
+ return 0;
}
unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx,
const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
+ SMLoc Loc = Fixup.getLoc();
unsigned Kind = Fixup.getKind();
if (Kind >= FirstLiteralRelocationKind)
return Kind - FirstLiteralRelocationKind;
@@ -126,38 +158,40 @@ unsigned SystemZObjectWriter::getRelocType(MCContext &Ctx,
switch (Modifier) {
case MCSymbolRefExpr::VK_None:
if (IsPCRel)
- return getPCRelReloc(Kind);
- return getAbsoluteReloc(Kind);
+ return getPCRelReloc(Ctx, Loc, Kind);
+ return getAbsoluteReloc(Ctx, Loc, Kind);
case MCSymbolRefExpr::VK_NTPOFF:
assert(!IsPCRel && "NTPOFF shouldn't be PC-relative");
- return getTLSLEReloc(Kind);
+ return getTLSLEReloc(Ctx, Loc, Kind);
case MCSymbolRefExpr::VK_INDNTPOFF:
if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
return ELF::R_390_TLS_IEENT;
- llvm_unreachable("Only PC-relative INDNTPOFF accesses are supported for now");
+ Ctx.reportError(Loc, "Only PC-relative INDNTPOFF accesses are supported for now");
+ return 0;
case MCSymbolRefExpr::VK_DTPOFF:
assert(!IsPCRel && "DTPOFF shouldn't be PC-relative");
- return getTLSLDOReloc(Kind);
+ return getTLSLDOReloc(Ctx, Loc, Kind);
case MCSymbolRefExpr::VK_TLSLDM:
assert(!IsPCRel && "TLSLDM shouldn't be PC-relative");
- return getTLSLDMReloc(Kind);
+ return getTLSLDMReloc(Ctx, Loc, Kind);
case MCSymbolRefExpr::VK_TLSGD:
assert(!IsPCRel && "TLSGD shouldn't be PC-relative");
- return getTLSGDReloc(Kind);
+ return getTLSGDReloc(Ctx, Loc, Kind);
case MCSymbolRefExpr::VK_GOT:
if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
return ELF::R_390_GOTENT;
- llvm_unreachable("Only PC-relative GOT accesses are supported for now");
+ Ctx.reportError(Loc, "Only PC-relative GOT accesses are supported for now");
+ return 0;
case MCSymbolRefExpr::VK_PLT:
- assert(IsPCRel && "@PLT shouldt be PC-relative");
- return getPLTReloc(Kind);
+ assert(IsPCRel && "@PLT shouldn't be PC-relative");
+ return getPLTReloc(Ctx, Loc, Kind);
default:
llvm_unreachable("Modifier not supported");
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 3e63f17c6518..afebdd3f6149 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -13,9 +13,11 @@
#include "SystemZAsmPrinter.h"
#include "MCTargetDesc/SystemZInstPrinter.h"
+#include "MCTargetDesc/SystemZMCExpr.h"
#include "SystemZConstantPoolValue.h"
#include "SystemZMCInstLower.h"
#include "TargetInfo/SystemZTargetInfo.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
@@ -142,6 +144,50 @@ void SystemZAsmPrinter::emitCallInformation(CallType CT) {
.addReg(SystemZMC::GR64Regs[static_cast<unsigned>(CT)]));
}
+uint32_t SystemZAsmPrinter::AssociatedDataAreaTable::insert(const MCSymbol *Sym,
+ unsigned SlotKind) {
+ auto Key = std::make_pair(Sym, SlotKind);
+ auto It = Displacements.find(Key);
+
+ if (It != Displacements.end())
+ return (*It).second;
+
+ // Determine length of descriptor.
+ uint32_t Length;
+ switch (SlotKind) {
+ case SystemZII::MO_ADA_DIRECT_FUNC_DESC:
+ Length = 2 * PointerSize;
+ break;
+ default:
+ Length = PointerSize;
+ break;
+ }
+
+ uint32_t Displacement = NextDisplacement;
+ Displacements[std::make_pair(Sym, SlotKind)] = NextDisplacement;
+ NextDisplacement += Length;
+
+ return Displacement;
+}
+
+uint32_t
+SystemZAsmPrinter::AssociatedDataAreaTable::insert(const MachineOperand MO) {
+ MCSymbol *Sym;
+ if (MO.getType() == MachineOperand::MO_GlobalAddress) {
+ const GlobalValue *GV = MO.getGlobal();
+ Sym = MO.getParent()->getMF()->getTarget().getSymbol(GV);
+ assert(Sym && "No symbol");
+ } else if (MO.getType() == MachineOperand::MO_ExternalSymbol) {
+ const char *SymName = MO.getSymbolName();
+ Sym = MO.getParent()->getMF()->getContext().getOrCreateSymbol(SymName);
+ assert(Sym && "No symbol");
+ } else
+ llvm_unreachable("Unexpected operand type");
+
+ unsigned ADAslotType = MO.getTargetFlags();
+ return insert(Sym, ADAslotType);
+}
+
void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
SystemZ_MC::verifyInstructionPredicates(MI->getOpcode(),
getSubtargetInfo().getFeatureBits());
@@ -272,6 +318,43 @@ void SystemZAsmPrinter::emitInstruction(const MachineInstr *MI) {
emitCallInformation(CallType::BASR33);
return;
+ case SystemZ::ADA_ENTRY_VALUE:
+ case SystemZ::ADA_ENTRY: {
+ const SystemZSubtarget &Subtarget = MF->getSubtarget<SystemZSubtarget>();
+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
+ uint32_t Disp = ADATable.insert(MI->getOperand(1));
+ Register TargetReg = MI->getOperand(0).getReg();
+
+ Register ADAReg = MI->getOperand(2).getReg();
+ Disp += MI->getOperand(3).getImm();
+ bool LoadAddr = MI->getOpcode() == SystemZ::ADA_ENTRY;
+
+ unsigned Op0 = LoadAddr ? SystemZ::LA : SystemZ::LG;
+ unsigned Op = TII->getOpcodeForOffset(Op0, Disp);
+
+ Register IndexReg = 0;
+ if (!Op) {
+ if (TargetReg != ADAReg) {
+ IndexReg = TargetReg;
+ // Use TargetReg to store displacement.
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(SystemZ::LLILF).addReg(TargetReg).addImm(Disp));
+ } else
+ EmitToStreamer(
+ *OutStreamer,
+ MCInstBuilder(SystemZ::ALGFI).addReg(TargetReg).addImm(Disp));
+ Disp = 0;
+ Op = Op0;
+ }
+ EmitToStreamer(*OutStreamer, MCInstBuilder(Op)
+ .addReg(TargetReg)
+ .addReg(IndexReg)
+ .addImm(Disp)
+ .addReg(ADAReg));
+
+ return;
+ }
case SystemZ::CallBRASL:
LoweredMI = MCInstBuilder(SystemZ::BRASL)
.addReg(SystemZ::R14D)
@@ -760,7 +843,7 @@ void SystemZAsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
void SystemZAsmPrinter::emitAttributes(Module &M) {
if (M.getModuleFlag("s390x-visible-vector-ABI")) {
bool HasVectorFeature =
- TM.getMCSubtargetInfo()->getFeatureBits()[SystemZ::FeatureVector];
+ TM.getMCSubtargetInfo()->hasFeature(SystemZ::FeatureVector);
OutStreamer->emitGNUAttribute(8, HasVectorFeature ? 2 : 1);
}
}
@@ -866,9 +949,82 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
void SystemZAsmPrinter::emitEndOfAsmFile(Module &M) {
+ auto TT = OutContext.getTargetTriple();
+ if (TT.isOSzOS()) {
+ emitADASection();
+ }
emitAttributes(M);
}
+void SystemZAsmPrinter::emitADASection() {
+ OutStreamer->pushSection();
+
+ const unsigned PointerSize = getDataLayout().getPointerSize();
+ OutStreamer->switchSection(getObjFileLowering().getADASection());
+
+ unsigned EmittedBytes = 0;
+ for (auto &Entry : ADATable.getTable()) {
+ const MCSymbol *Sym;
+ unsigned SlotKind;
+ std::tie(Sym, SlotKind) = Entry.first;
+ unsigned Offset = Entry.second;
+ assert(Offset == EmittedBytes && "Offset not as expected");
+ (void)EmittedBytes;
+#define EMIT_COMMENT(Str) \
+ OutStreamer->AddComment(Twine("Offset ") \
+ .concat(utostr(Offset)) \
+ .concat(" " Str " ") \
+ .concat(Sym->getName()));
+ switch (SlotKind) {
+ case SystemZII::MO_ADA_DIRECT_FUNC_DESC:
+ // Language Environment DLL logic requires function descriptors, for
+ // imported functions, that are placed in the ADA to be 8 byte aligned.
+ EMIT_COMMENT("function descriptor of");
+ OutStreamer->emitValue(
+ SystemZMCExpr::create(SystemZMCExpr::VK_SystemZ_RCon,
+ MCSymbolRefExpr::create(Sym, OutContext),
+ OutContext),
+ PointerSize);
+ OutStreamer->emitValue(
+ SystemZMCExpr::create(SystemZMCExpr::VK_SystemZ_VCon,
+ MCSymbolRefExpr::create(Sym, OutContext),
+ OutContext),
+ PointerSize);
+ EmittedBytes += PointerSize * 2;
+ break;
+ case SystemZII::MO_ADA_DATA_SYMBOL_ADDR:
+ EMIT_COMMENT("pointer to data symbol");
+ OutStreamer->emitValue(
+ SystemZMCExpr::create(SystemZMCExpr::VK_SystemZ_None,
+ MCSymbolRefExpr::create(Sym, OutContext),
+ OutContext),
+ PointerSize);
+ EmittedBytes += PointerSize;
+ break;
+ case SystemZII::MO_ADA_INDIRECT_FUNC_DESC: {
+ MCSymbol *Alias = OutContext.createTempSymbol(
+ Twine(Sym->getName()).concat("@indirect"));
+ OutStreamer->emitAssignment(Alias,
+ MCSymbolRefExpr::create(Sym, OutContext));
+ OutStreamer->emitSymbolAttribute(Alias, MCSA_IndirectSymbol);
+
+ EMIT_COMMENT("pointer to function descriptor");
+ OutStreamer->emitValue(
+ SystemZMCExpr::create(SystemZMCExpr::VK_SystemZ_VCon,
+ MCSymbolRefExpr::create(Alias, OutContext),
+ OutContext),
+ PointerSize);
+ EmittedBytes += PointerSize;
+ break;
+ }
+ default:
+ llvm_unreachable("Unexpected slot kind");
+ }
+#undef EMIT_COMMENT
+ }
+ OutStreamer->popSection();
+}
+
void SystemZAsmPrinter::emitFunctionBodyEnd() {
if (TM.getTargetTriple().isOSzOS()) {
// Emit symbol for the end of function if the z/OS target streamer
@@ -1058,7 +1214,7 @@ void SystemZAsmPrinter::emitPPA1(MCSymbol *FnEndSym) {
OutStreamer->AddComment("Length/4 of Parms");
OutStreamer->emitInt16(
- static_cast<uint16_t>(MFFrame.getMaxCallFrameSize() / 4)); // Parms/4.
+ static_cast<uint16_t>(ZFI->getSizeOfFnParams() / 4)); // Parms/4.
OutStreamer->AddComment("Length of Code");
OutStreamer->emitAbsoluteSymbolDiff(FnEndSym, CurrentFnEPMarkerSym, 4);
diff --git a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
index c99fcda6dcc5..c9dbbfd0b4c4 100644
--- a/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -46,12 +46,56 @@ private:
BASR33 = 7, // b'x111' == BASR r3,r3
};
+ // The Associated Data Area (ADA) contains descriptors which help locating
+ // external symbols. For each symbol and type, the displacement into the ADA
+ // is stored.
+ class AssociatedDataAreaTable {
+ public:
+ using DisplacementTable =
+ MapVector<std::pair<const MCSymbol *, unsigned>, uint32_t>;
+
+ private:
+ const uint64_t PointerSize;
+
+ /// The mapping of name/slot type pairs to displacements.
+ DisplacementTable Displacements;
+
+ /// The next available displacement value. Incremented when new entries into
+ /// the ADA are created.
+ uint32_t NextDisplacement = 0;
+
+ public:
+ AssociatedDataAreaTable(uint64_t PointerSize) : PointerSize(PointerSize) {}
+
+ /// @brief Add a function descriptor to the ADA.
+ /// @param MI Pointer to an ADA_ENTRY instruction.
+ /// @return The displacement of the descriptor into the ADA.
+ uint32_t insert(const MachineOperand MO);
+
+ /// @brief Get the displacement into associated data area (ADA) for a name.
+ /// If no displacement is already associated with the name, assign one and
+ /// return it.
+ /// @param Sym The symbol for which the displacement should be returned.
+ /// @param SlotKind The ADA type.
+ /// @return The displacement of the descriptor into the ADA.
+ uint32_t insert(const MCSymbol *Sym, unsigned SlotKind);
+
+ /// Get the table of GOFF displacements. This is 'const' since it should
+ /// never be modified by anything except the APIs on this class.
+ const DisplacementTable &getTable() const { return Displacements; }
+
+ uint32_t getNextDisplacement() const { return NextDisplacement; }
+ };
+
+ AssociatedDataAreaTable ADATable;
+
void emitPPA1(MCSymbol *FnEndSym);
+ void emitADASection();
public:
SystemZAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)), CurrentFnPPA1Sym(nullptr),
- CurrentFnEPMarkerSym(nullptr) {}
+ CurrentFnEPMarkerSym(nullptr), ADATable(TM.getPointerSize(0)) {}
// Override AsmPrinter.
StringRef getPassName() const override { return "SystemZ Assembly Printer"; }
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index d7a2a51d4652..11a59df899a1 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -1315,6 +1315,10 @@ void SystemZXPLINKFrameLowering::inlineStackProbe(
if (StackAllocMI == nullptr)
return;
+ bool NeedSaveSP = hasFP(MF);
+ bool NeedSaveArg = PrologMBB.isLiveIn(SystemZ::R3D);
+ const int64_t SaveSlotR3 = 2192;
+
MachineBasicBlock &MBB = PrologMBB;
const DebugLoc DL = StackAllocMI->getDebugLoc();
@@ -1334,7 +1338,25 @@ void SystemZXPLINKFrameLowering::inlineStackProbe(
// BASR r3,r3
BuildMI(StackExtMBB, DL, ZII->get(SystemZ::CallBASR_STACKEXT))
.addReg(SystemZ::R3D);
-
+ if (NeedSaveArg) {
+ if (!NeedSaveSP) {
+ // LGR r0,r3
+ BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::LGR))
+ .addReg(SystemZ::R0D, RegState::Define)
+ .addReg(SystemZ::R3D);
+ } else {
+ // In this case, the incoming value of r4 is saved in r0 so the
+ // latter register is unavailable. Store r3 in its corresponding
+ // slot in the parameter list instead. Do this at the start of
+ // the prolog before r4 is manipulated by anything else.
+ // STG r3, 2192(r4)
+ BuildMI(MBB, MBB.begin(), DL, ZII->get(SystemZ::STG))
+ .addReg(SystemZ::R3D)
+ .addReg(SystemZ::R4D)
+ .addImm(SaveSlotR3)
+ .addReg(0);
+ }
+ }
// LLGT r3,1208
BuildMI(MBB, StackAllocMI, DL, ZII->get(SystemZ::LLGT), SystemZ::R3D)
.addReg(0)
@@ -1355,6 +1377,28 @@ void SystemZXPLINKFrameLowering::inlineStackProbe(
NextMBB = SystemZ::splitBlockBefore(StackAllocMI, &MBB);
MBB.addSuccessor(NextMBB);
MBB.addSuccessor(StackExtMBB);
+ if (NeedSaveArg) {
+ if (!NeedSaveSP) {
+ // LGR r3, r0
+ BuildMI(*NextMBB, StackAllocMI, DL, ZII->get(SystemZ::LGR))
+ .addReg(SystemZ::R3D, RegState::Define)
+ .addReg(SystemZ::R0D, RegState::Kill);
+ } else {
+ // In this case, the incoming value of r4 is saved in r0 so the
+ // latter register is unavailable. We stored r3 in its corresponding
+ // slot in the parameter list instead and we now restore it from there.
+ // LGR r3, r0
+ BuildMI(*NextMBB, StackAllocMI, DL, ZII->get(SystemZ::LGR))
+ .addReg(SystemZ::R3D, RegState::Define)
+ .addReg(SystemZ::R0D);
+ // LG r3, 2192(r3)
+ BuildMI(*NextMBB, StackAllocMI, DL, ZII->get(SystemZ::LG))
+ .addReg(SystemZ::R3D, RegState::Define)
+ .addReg(SystemZ::R3D)
+ .addImm(SaveSlotR3)
+ .addReg(0);
+ }
+ }
// Add jump back from stack extension BB.
BuildMI(StackExtMBB, DL, ZII->get(SystemZ::J)).addMBB(NextMBB);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 250edf64cb6c..1e9e2917a3aa 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1391,7 +1391,7 @@ bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) {
auto OperandV = OperandC->getAPIntValue();
if (NegateOperand)
OperandV = -OperandV;
- if (OperandV.getMinSignedBits() > 8)
+ if (OperandV.getSignificantBits() > 8)
return false;
Operand = CurDAG->getTargetConstant(OperandV, DL, MemVT);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 5dca792dc89a..785a08a763eb 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -186,8 +186,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::USUBO, VT, Custom);
// Support carry in as value rather than glue.
- setOperationAction(ISD::ADDCARRY, VT, Custom);
- setOperationAction(ISD::SUBCARRY, VT, Custom);
+ setOperationAction(ISD::UADDO_CARRY, VT, Custom);
+ setOperationAction(ISD::USUBO_CARRY, VT, Custom);
// Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
// stores, putting a serialization instruction after the stores.
@@ -399,9 +399,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
// and inverting the result as necessary.
setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
- if (Subtarget.hasVectorEnhancements1())
- setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
}
}
@@ -537,6 +534,15 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
+
+ setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom);
+ if (Subtarget.hasVectorEnhancements1()) {
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::v2f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f32, Custom);
+ }
}
// The vector enhancements facility 1 has instructions for these.
@@ -781,10 +787,10 @@ bool SystemZVectorConstantInfo::isVectorConstantLegal(
// IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
uint64_t SplatBitsZ = SplatBits.getZExtValue();
uint64_t SplatUndefZ = SplatUndef.getZExtValue();
- uint64_t Lower =
- (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
- uint64_t Upper =
- (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
+ unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
+ unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
+ uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
+ uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
if (tryValue(SplatBitsZ | Upper | Lower))
return true;
@@ -1105,19 +1111,18 @@ getSingleConstraintMatchWeight(AsmOperandInfo &info,
case 'd': // Data register (equivalent to 'r')
case 'h': // High-part register
case 'r': // General-purpose register
- if (CallOperandVal->getType()->isIntegerTy())
- weight = CW_Register;
+ weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
break;
case 'f': // Floating-point register
- if (type->isFloatingPointTy())
- weight = CW_Register;
+ if (!useSoftFloat())
+ weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
break;
case 'v': // Vector register
- if ((type->isVectorTy() || type->isFloatingPointTy()) &&
- Subtarget.hasVector())
- weight = CW_Register;
+ if (Subtarget.hasVector())
+ weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
+ : CW_Default;
break;
case 'I': // Unsigned 8-bit constant
@@ -1179,9 +1184,9 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
default: break;
case 'd': // Data register (equivalent to 'r')
case 'r': // General-purpose register
- if (VT == MVT::i64)
+ if (VT.getSizeInBits() == 64)
return std::make_pair(0U, &SystemZ::GR64BitRegClass);
- else if (VT == MVT::i128)
+ else if (VT.getSizeInBits() == 128)
return std::make_pair(0U, &SystemZ::GR128BitRegClass);
return std::make_pair(0U, &SystemZ::GR32BitRegClass);
@@ -1197,18 +1202,19 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
case 'f': // Floating-point register
if (!useSoftFloat()) {
- if (VT == MVT::f64)
+ if (VT.getSizeInBits() == 64)
return std::make_pair(0U, &SystemZ::FP64BitRegClass);
- else if (VT == MVT::f128)
+ else if (VT.getSizeInBits() == 128)
return std::make_pair(0U, &SystemZ::FP128BitRegClass);
return std::make_pair(0U, &SystemZ::FP32BitRegClass);
}
break;
+
case 'v': // Vector register
if (Subtarget.hasVector()) {
- if (VT == MVT::f32)
+ if (VT.getSizeInBits() == 32)
return std::make_pair(0U, &SystemZ::VR32BitRegClass);
- if (VT == MVT::f64)
+ if (VT.getSizeInBits() == 64)
return std::make_pair(0U, &SystemZ::VR64BitRegClass);
return std::make_pair(0U, &SystemZ::VR128BitRegClass);
}
@@ -1216,15 +1222,22 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
}
}
if (Constraint.size() > 0 && Constraint[0] == '{') {
+
+ // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
+ // to check the size on.
+ auto getVTSizeInBits = [&VT]() {
+ return VT == MVT::Other ? 0 : VT.getSizeInBits();
+ };
+
// We need to override the default register parsing for GPRs and FPRs
// because the interpretation depends on VT. The internal names of
// the registers are also different from the external names
// (F0D and F0S instead of F0, etc.).
if (Constraint[1] == 'r') {
- if (VT == MVT::i32)
+ if (getVTSizeInBits() == 32)
return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
SystemZMC::GR32Regs, 16);
- if (VT == MVT::i128)
+ if (getVTSizeInBits() == 128)
return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
SystemZMC::GR128Regs, 16);
return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
@@ -1234,10 +1247,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
if (useSoftFloat())
return std::make_pair(
0u, static_cast<const TargetRegisterClass *>(nullptr));
- if (VT == MVT::f32)
+ if (getVTSizeInBits() == 32)
return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
SystemZMC::FP32Regs, 16);
- if (VT == MVT::f128)
+ if (getVTSizeInBits() == 128)
return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
SystemZMC::FP128Regs, 16);
return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
@@ -1247,10 +1260,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
if (!Subtarget.hasVector())
return std::make_pair(
0u, static_cast<const TargetRegisterClass *>(nullptr));
- if (VT == MVT::f32)
+ if (getVTSizeInBits() == 32)
return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
SystemZMC::VR32Regs, 32);
- if (VT == MVT::f64)
+ if (getVTSizeInBits() == 64)
return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
SystemZMC::VR64Regs, 32);
return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
@@ -1265,12 +1278,10 @@ SystemZTargetLowering::getRegForInlineAsmConstraint(
Register
SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT,
const MachineFunction &MF) const {
- const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
-
Register Reg =
StringSwitch<Register>(RegName)
- .Case("r4", Subtarget->isTargetXPLINK64() ? SystemZ::R4D : 0)
- .Case("r15", Subtarget->isTargetELF() ? SystemZ::R15D : 0)
+ .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0)
+ .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0)
.Default(0);
if (Reg)
@@ -1433,10 +1444,8 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) {
SDLoc DL(In);
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
- DAG.getIntPtrConstant(0, DL));
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, In,
- DAG.getIntPtrConstant(1, DL));
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
MVT::Untyped, Hi, Lo);
return SDValue(Pair, 0);
@@ -1455,28 +1464,24 @@ bool SystemZTargetLowering::splitValueIntoRegisterParts(
SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
EVT ValueVT = Val.getValueType();
- assert((ValueVT != MVT::i128 ||
- ((NumParts == 1 && PartVT == MVT::Untyped) ||
- (NumParts == 2 && PartVT == MVT::i64))) &&
- "Unknown handling of i128 value.");
- if (ValueVT == MVT::i128 && NumParts == 1) {
+ if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
// Inline assembly operand.
- Parts[0] = lowerI128ToGR128(DAG, Val);
+ Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
return true;
}
+
return false;
}
SDValue SystemZTargetLowering::joinRegisterPartsIntoValue(
SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
- assert((ValueVT != MVT::i128 ||
- ((NumParts == 1 && PartVT == MVT::Untyped) ||
- (NumParts == 2 && PartVT == MVT::i64))) &&
- "Unknown handling of i128 value.");
- if (ValueVT == MVT::i128 && NumParts == 1)
+ if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
// Inline assembly operand.
- return lowerGR128ToI128(DAG, Parts[0]);
+ SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
+ return DAG.getBitcast(ValueVT, Res);
+ }
+
return SDValue();
}
@@ -1500,6 +1505,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
SmallVector<CCValAssign, 16> ArgLocs;
SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
+ FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
unsigned NumFixedGPRs = 0;
unsigned NumFixedFPRs = 0;
@@ -1603,8 +1609,9 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
// Likewise the address (in the form of a frame index) of where the
// first stack vararg would be. The 1-byte size here is arbitrary.
- int64_t StackSize = CCInfo.getNextStackOffset();
- FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
+ int64_t VarArgsOffset = CCInfo.getStackSize();
+ FuncInfo->setVarArgsFrameIndex(
+ MFI.CreateFixedObject(1, VarArgsOffset, true));
// ...and a similar frame index for the caller-allocated save area
// that will be used to store the incoming registers.
@@ -1635,8 +1642,15 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
}
}
- // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
- // register (R5)
+ if (Subtarget.isTargetXPLINK64()) {
+ // Create virual register for handling incoming "ADA" special register (R5)
+ const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
+ Register ADAvReg = MRI.createVirtualRegister(RC);
+ auto *Regs = static_cast<SystemZXPLINK64Registers *>(
+ Subtarget.getSpecialRegisters());
+ MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
+ FuncInfo->setADAVirtualRegister(ADAvReg);
+ }
return Chain;
}
@@ -1661,6 +1675,94 @@ static bool canUseSiblingCall(const CCState &ArgCCInfo,
return true;
}
+static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL,
+ unsigned Offset, bool LoadAdr = false) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
+ unsigned ADAvReg = MFI->getADAVirtualRegister();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+
+ SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
+ SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
+
+ SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
+ if (!LoadAdr)
+ Result = DAG.getLoad(
+ PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
+ MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant);
+
+ return Result;
+}
+
+// ADA access using Global value
+// Note: for functions, address of descriptor is returned
+static SDValue getADAEntry(SelectionDAG &DAG, const GlobalValue *GV, SDLoc DL,
+ EVT PtrVT) {
+ unsigned ADAtype;
+ bool LoadAddr = false;
+ const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
+ bool IsFunction =
+ (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
+ bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
+
+ if (IsFunction) {
+ if (IsInternal) {
+ ADAtype = SystemZII::MO_ADA_DIRECT_FUNC_DESC;
+ LoadAddr = true;
+ } else
+ ADAtype = SystemZII::MO_ADA_INDIRECT_FUNC_DESC;
+ } else {
+ ADAtype = SystemZII::MO_ADA_DATA_SYMBOL_ADDR;
+ }
+ SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
+
+ return getADAEntry(DAG, Val, DL, 0, LoadAddr);
+}
+
+static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
+ SDLoc &DL, SDValue &Chain) {
+ unsigned ADADelta = 0; // ADA offset in desc.
+ unsigned EPADelta = 8; // EPA offset in desc.
+ MachineFunction &MF = DAG.getMachineFunction();
+ EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+
+ // XPLink calling convention.
+ if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
+ G->getGlobal()->hasPrivateLinkage());
+ if (IsInternal) {
+ SystemZMachineFunctionInfo *MFI =
+ MF.getInfo<SystemZMachineFunctionInfo>();
+ unsigned ADAvReg = MFI->getADAVirtualRegister();
+ ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
+ Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+ return true;
+ } else {
+ SDValue GA = DAG.getTargetGlobalAddress(
+ G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
+ ADA = getADAEntry(DAG, GA, DL, ADADelta);
+ Callee = getADAEntry(DAG, GA, DL, EPADelta);
+ }
+ } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ SDValue ES = DAG.getTargetExternalSymbol(
+ E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
+ ADA = getADAEntry(DAG, ES, DL, ADADelta);
+ Callee = getADAEntry(DAG, ES, DL, EPADelta);
+ } else {
+ // Function pointer case
+ ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
+ DAG.getConstant(ADADelta, DL, PtrVT));
+ ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
+ Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
+ DAG.getConstant(EPADelta, DL, PtrVT));
+ Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()));
+ }
+ return false;
+}
+
SDValue
SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
@@ -1700,7 +1802,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
IsTailCall = false;
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+ unsigned NumBytes = ArgCCInfo.getStackSize();
if (Subtarget.isTargetXPLINK64())
// Although the XPLINK specifications for AMODE64 state that minimum size
@@ -1807,17 +1909,31 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// associated Target* opcodes. Force %r1 to be used for indirect
// tail calls.
SDValue Glue;
- // FIXME: Add support for XPLINK using the ADA register.
- if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
- Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
- } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
- Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
- } else if (IsTailCall) {
- Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
- Glue = Chain.getValue(1);
- Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
+
+ if (Subtarget.isTargetXPLINK64()) {
+ SDValue ADA;
+ bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
+ if (!IsBRASL) {
+ unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
+ ->getAddressOfCalleeRegister();
+ Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
+ Glue = Chain.getValue(1);
+ Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
+ }
+ RegsToPass.push_back(std::make_pair(
+ static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
+ } else {
+ if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
+ Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+ } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
+ Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
+ } else if (IsTailCall) {
+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
+ Glue = Chain.getValue(1);
+ Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
+ }
}
// Build a sequence of copy-to-reg nodes, chained and glued together.
@@ -1850,8 +1966,11 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Emit the call.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- if (IsTailCall)
- return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
+ if (IsTailCall) {
+ SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
+ DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
+ return Ret;
+ }
Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
Glue = Chain.getValue(1);
@@ -1956,7 +2075,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Quick exit for void returns
if (RetLocs.empty())
- return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
+ return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
if (CallConv == CallingConv::GHC)
report_fatal_error("GHC functions return void only");
@@ -1987,7 +2106,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (Glue.getNode())
RetOps.push_back(Glue);
- return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
+ return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
}
// Return true if Op is an intrinsic node with chain that returns the CC value
@@ -2425,6 +2544,12 @@ static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
if (N->getOpcode() == ISD::SUB &&
((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
(N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
+ // Disable the nsw and nuw flags: the backend needs to handle
+ // overflow as well during comparison elimination.
+ SDNodeFlags Flags = N->getFlags();
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ N->setFlags(Flags);
C.Op0 = SDValue(N, 0);
C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
return;
@@ -2535,9 +2660,8 @@ static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
return 0;
// Work out the masks for the lowest and highest bits.
- unsigned HighShift = 63 - countLeadingZeros(Mask);
- uint64_t High = uint64_t(1) << HighShift;
- uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
+ uint64_t High = llvm::bit_floor(Mask);
+ uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
// Signed ordered comparisons are effectively unsigned if the sign
// bit is dropped.
@@ -3236,12 +3360,15 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
}
- } else {
+ } else if (Subtarget.isTargetELF()) {
Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
- }
+ } else if (Subtarget.isTargetzOS()) {
+ Result = getADAEntry(DAG, GV, DL, PtrVT);
+ } else
+ llvm_unreachable("Unexpected Subtarget");
// If there was a non-zero offset that we didn't fold, create an explicit
// addition for it.
@@ -3991,20 +4118,20 @@ SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
}
static bool isAddCarryChain(SDValue Carry) {
- while (Carry.getOpcode() == ISD::ADDCARRY)
+ while (Carry.getOpcode() == ISD::UADDO_CARRY)
Carry = Carry.getOperand(2);
return Carry.getOpcode() == ISD::UADDO;
}
static bool isSubBorrowChain(SDValue Carry) {
- while (Carry.getOpcode() == ISD::SUBCARRY)
+ while (Carry.getOpcode() == ISD::USUBO_CARRY)
Carry = Carry.getOperand(2);
return Carry.getOpcode() == ISD::USUBO;
}
-// Lower ADDCARRY/SUBCARRY nodes.
-SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
- SelectionDAG &DAG) const {
+// Lower UADDO_CARRY/USUBO_CARRY nodes.
+SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
+ SelectionDAG &DAG) const {
SDNode *N = Op.getNode();
MVT VT = N->getSimpleValueType(0);
@@ -4023,7 +4150,7 @@ SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown instruction!");
- case ISD::ADDCARRY:
+ case ISD::UADDO_CARRY:
if (!isAddCarryChain(Carry))
return SDValue();
@@ -4031,7 +4158,7 @@ SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
CCValid = SystemZ::CCMASK_LOGICAL;
CCMask = SystemZ::CCMASK_LOGICAL_CARRY;
break;
- case ISD::SUBCARRY:
+ case ISD::USUBO_CARRY:
if (!isSubBorrowChain(Carry))
return SDValue();
@@ -4376,8 +4503,7 @@ SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
- auto *Regs = Subtarget->getSpecialRegisters();
+ auto *Regs = Subtarget.getSpecialRegisters();
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
report_fatal_error("Variable-sized stack allocations are not supported "
"in GHC calling convention");
@@ -4388,8 +4514,7 @@ SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
- auto *Regs = Subtarget->getSpecialRegisters();
+ auto *Regs = Subtarget.getSpecialRegisters();
bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
if (MF.getFunction().getCallingConv() == CallingConv::GHC)
@@ -5739,9 +5864,9 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
case ISD::UADDO:
case ISD::USUBO:
return lowerXALUO(Op, DAG);
- case ISD::ADDCARRY:
- case ISD::SUBCARRY:
- return lowerADDSUBCARRY(Op, DAG);
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY:
+ return lowerUADDSUBO_CARRY(Op, DAG);
case ISD::OR:
return lowerOR(Op, DAG);
case ISD::CTPOP:
@@ -5910,7 +6035,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
switch ((SystemZISD::NodeType)Opcode) {
case SystemZISD::FIRST_NUMBER: break;
- OPCODE(RET_FLAG);
+ OPCODE(RET_GLUE);
OPCODE(CALL);
OPCODE(SIBCALL);
OPCODE(TLS_GDCALL);
@@ -6029,6 +6154,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(VLER);
OPCODE(VSTER);
OPCODE(PREFETCH);
+ OPCODE(ADA_ENTRY);
}
return nullptr;
#undef OPCODE
@@ -7268,7 +7394,7 @@ static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
KnownBits RHSKnown =
DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
- Known = KnownBits::commonBits(LHSKnown, RHSKnown);
+ Known = LHSKnown.intersectWith(RHSKnown);
}
void
@@ -7444,6 +7570,18 @@ SystemZTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
}
+bool SystemZTargetLowering::
+isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op,
+ const APInt &DemandedElts, const SelectionDAG &DAG,
+ bool PoisonOnly, unsigned Depth) const {
+ switch (Op->getOpcode()) {
+ case SystemZISD::PCREL_WRAPPER:
+ case SystemZISD::PCREL_OFFSET:
+ return true;
+ }
+ return false;
+}
+
unsigned
SystemZTargetLowering::getStackProbeSize(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
@@ -7567,10 +7705,10 @@ static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects,
if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
std::swap(TrueReg, FalseReg);
- if (RegRewriteTable.find(TrueReg) != RegRewriteTable.end())
+ if (RegRewriteTable.contains(TrueReg))
TrueReg = RegRewriteTable[TrueReg].first;
- if (RegRewriteTable.find(FalseReg) != RegRewriteTable.end())
+ if (RegRewriteTable.contains(FalseReg))
FalseReg = RegRewriteTable[FalseReg].second;
DebugLoc DL = MI->getDebugLoc();
@@ -8488,11 +8626,13 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
.addReg(RemSrcReg).addImm(SrcDisp);
MBB->addSuccessor(AllDoneMBB);
MBB = AllDoneMBB;
- if (EndMBB) {
+ if (Opcode != SystemZ::MVC) {
EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
- MBB->addLiveIn(SystemZ::CC);
+ if (EndMBB)
+ MBB->addLiveIn(SystemZ::CC);
}
}
+ MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
}
// Handle any remaining bytes with straight-line code.
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index c5cc2cc3ae3a..47fa1831c3ee 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -26,8 +26,8 @@ namespace SystemZISD {
enum NodeType : unsigned {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
- // Return with a flag operand. Operand 0 is the chain operand.
- RET_FLAG,
+ // Return with a glue operand. Operand 0 is the chain operand.
+ RET_GLUE,
// Calls a function. Operand 0 is the chain operand and operand 1
// is the target address. The arguments start at operand 2.
@@ -283,6 +283,16 @@ enum NodeType : unsigned {
// Operand 1: the bit mask
TDC,
+ // z/OS XPLINK ADA Entry
+ // Wraps a TargetGlobalAddress that should be loaded from a function's
+ // AssociatedData Area (ADA). Tha ADA is passed to the function by the
+ // caller in the XPLink ABI defined register R5.
+ // Operand 0: the GlobalValue/External Symbol
+ // Operand 1: the ADA register
+ // Operand 2: the offset (0 for the first and 8 for the second element in the
+ // function descriptor)
+ ADA_ENTRY,
+
// Strict variants of scalar floating-point comparisons.
// Quiet and signaling versions.
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
@@ -423,10 +433,6 @@ public:
}
bool isCheapToSpeculateCtlz(Type *) const override { return true; }
bool preferZeroCompareBranch() const override { return true; }
- bool hasBitPreservingFPLogic(EVT VT) const override {
- EVT ScVT = VT.getScalarType();
- return ScVT == MVT::f32 || ScVT == MVT::f64 || ScVT == MVT::f128;
- }
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override {
ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
return Mask && Mask->getValue().isIntN(16);
@@ -600,6 +606,10 @@ public:
const SelectionDAG &DAG,
unsigned Depth) const override;
+ bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
+ SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
+ bool PoisonOnly, unsigned Depth) const override;
+
ISD::NodeType getExtendForAtomicOps() const override {
return ISD::ANY_EXTEND;
}
@@ -657,7 +667,7 @@ private:
SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerXALUO(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 7cbe125533d3..ea62e99a5839 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -191,7 +191,7 @@ let Uses = [FPC], mayRaiseFPException = 1 in {
let Predicates = [FeatureNoVectorEnhancements1] in {
def : Pat<(f32 (any_fpround FP128:$src)),
- (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
+ (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_h32)>;
def : Pat<(f64 (any_fpround FP128:$src)),
(EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index e513befd0d6f..a25719f80ad0 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -172,11 +172,9 @@ def getTwoOperandOpcode : InstrMapping {
// bits<4> Rn : register input or output for operand n
// bits<5> Vn : vector register input or output for operand n
// bits<m> In : immediate value of width m for operand n
-// bits<4> BDn : address operand n, which has a base and a displacement
-// bits<m> XBDn : address operand n, which has an index, a base and a
-// displacement
-// bits<m> VBDn : address operand n, which has a vector index, a base and a
-// displacement
+// bits<4> Bn : base register for address operand n
+// bits<m> Dn : displacement for address operand n
+// bits<5> Vn : vector index for address operand n
// bits<4> Xn : index register for address operand n
// bits<4> Mn : mode value for operand n
//
@@ -452,12 +450,14 @@ class InstRIS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
bits<4> R1;
bits<8> I2;
bits<4> M3;
- bits<16> BD4;
+ bits<4> B4;
+ bits<12> D4;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
let Inst{35-32} = M3;
- let Inst{31-16} = BD4;
+ let Inst{31-28} = B4;
+ let Inst{27-16} = D4;
let Inst{15-8} = I2;
let Inst{7-0} = op{7-0};
}
@@ -596,12 +596,14 @@ class InstRRS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
bits<4> R1;
bits<4> R2;
bits<4> M3;
- bits<16> BD4;
+ bits<4> B4;
+ bits<12> D4;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
let Inst{35-32} = R2;
- let Inst{31-16} = BD4;
+ let Inst{31-28} = B4;
+ let Inst{27-16} = D4;
let Inst{15-12} = M3;
let Inst{11-8} = 0;
let Inst{7-0} = op{7-0};
@@ -613,11 +615,15 @@ class InstRXa<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<32> SoftFail = 0;
bits<4> R1;
- bits<20> XBD2;
+ bits<4> X2;
+ bits<4> B2;
+ bits<12> D2;
let Inst{31-24} = op;
let Inst{23-20} = R1;
- let Inst{19-0} = XBD2;
+ let Inst{19-16} = X2;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
let HasIndex = 1;
}
@@ -628,11 +634,15 @@ class InstRXb<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<32> SoftFail = 0;
bits<4> M1;
- bits<20> XBD2;
+ bits<4> X2;
+ bits<4> B2;
+ bits<12> D2;
let Inst{31-24} = op;
let Inst{23-20} = M1;
- let Inst{19-0} = XBD2;
+ let Inst{19-16} = X2;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
let HasIndex = 1;
}
@@ -643,12 +653,16 @@ class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> SoftFail = 0;
bits<4> R1;
- bits<20> XBD2;
+ bits<4> X2;
+ bits<4> B2;
+ bits<12> D2;
bits<4> M3;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
- let Inst{35-16} = XBD2;
+ let Inst{35-32} = X2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
let Inst{15-12} = M3;
let Inst{11-8} = 0;
let Inst{7-0} = op{7-0};
@@ -663,11 +677,15 @@ class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
bits<4> R1;
bits<4> R3;
- bits<20> XBD2;
+ bits<4> X2;
+ bits<4> B2;
+ bits<12> D2;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R3;
- let Inst{35-16} = XBD2;
+ let Inst{35-32} = X2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
let Inst{15-12} = R1;
let Inst{11-8} = 0;
let Inst{7-0} = op{7-0};
@@ -681,11 +699,16 @@ class InstRXYa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> SoftFail = 0;
bits<4> R1;
- bits<28> XBD2;
+ bits<4> X2;
+ bits<4> B2;
+ bits<20> D2;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
- let Inst{35-8} = XBD2;
+ let Inst{35-32} = X2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2{11-0};
+ let Inst{15-8} = D2{19-12};
let Inst{7-0} = op{7-0};
let Has20BitOffset = 1;
@@ -698,11 +721,16 @@ class InstRXYb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> SoftFail = 0;
bits<4> M1;
- bits<28> XBD2;
+ bits<4> X2;
+ bits<4> B2;
+ bits<20> D2;
let Inst{47-40} = op{15-8};
let Inst{39-36} = M1;
- let Inst{35-8} = XBD2;
+ let Inst{35-32} = X2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2{11-0};
+ let Inst{15-8} = D2{19-12};
let Inst{7-0} = op{7-0};
let Has20BitOffset = 1;
@@ -716,12 +744,14 @@ class InstRSa<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
bits<4> R1;
bits<4> R3;
- bits<16> BD2;
+ bits<4> B2;
+ bits<12> D2;
let Inst{31-24} = op;
let Inst{23-20} = R1;
let Inst{19-16} = R3;
- let Inst{15-0} = BD2;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
}
class InstRSb<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -731,12 +761,33 @@ class InstRSb<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
bits<4> R1;
bits<4> M3;
- bits<16> BD2;
+ bits<4> B2;
+ bits<12> D2;
let Inst{31-24} = op;
let Inst{23-20} = R1;
let Inst{19-16} = M3;
- let Inst{15-0} = BD2;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
+}
+
+class InstRSEa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<4> R1;
+ bits<4> R3;
+ bits<4> B2;
+ bits<12> D2;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = R1;
+ let Inst{35-32} = R3;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
+ let Inst{15-8} = 0;
+ let Inst{7-0} = op{7-0};
}
class InstRSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -759,12 +810,15 @@ class InstRSLa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> Inst;
field bits<48> SoftFail = 0;
- bits<20> BDL1;
+ bits<4> B1;
+ bits<12> D1;
+ bits<4> L1;
let Inst{47-40} = op{15-8};
- let Inst{39-36} = BDL1{19-16};
+ let Inst{39-36} = L1;
let Inst{35-32} = 0;
- let Inst{31-16} = BDL1{15-0};
+ let Inst{31-28} = B1;
+ let Inst{27-16} = D1;
let Inst{15-8} = 0;
let Inst{7-0} = op{7-0};
}
@@ -775,11 +829,15 @@ class InstRSLb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> SoftFail = 0;
bits<4> R1;
- bits<24> BDL2;
+ bits<4> B2;
+ bits<12> D2;
+ bits<8> L2;
bits<4> M3;
let Inst{47-40} = op{15-8};
- let Inst{39-16} = BDL2;
+ let Inst{39-32} = L2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
let Inst{15-12} = R1;
let Inst{11-8} = M3;
let Inst{7-0} = op{7-0};
@@ -792,12 +850,15 @@ class InstRSYa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
bits<4> R1;
bits<4> R3;
- bits<24> BD2;
+ bits<4> B2;
+ bits<20> D2;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
let Inst{35-32} = R3;
- let Inst{31-8} = BD2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2{11-0};
+ let Inst{15-8} = D2{19-12};
let Inst{7-0} = op{7-0};
let Has20BitOffset = 1;
@@ -810,12 +871,15 @@ class InstRSYb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
bits<4> R1;
bits<4> M3;
- bits<24> BD2;
+ bits<4> B2;
+ bits<20> D2;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
let Inst{35-32} = M3;
- let Inst{31-8} = BD2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2{11-0};
+ let Inst{15-8} = D2{19-12};
let Inst{7-0} = op{7-0};
let Has20BitOffset = 1;
@@ -826,12 +890,14 @@ class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<32> Inst;
field bits<32> SoftFail = 0;
- bits<16> BD1;
+ bits<4> B1;
+ bits<12> D1;
bits<8> I2;
let Inst{31-24} = op;
let Inst{23-16} = I2;
- let Inst{15-0} = BD1;
+ let Inst{15-12} = B1;
+ let Inst{11-0} = D1;
}
class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -839,11 +905,13 @@ class InstSIL<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> Inst;
field bits<48> SoftFail = 0;
- bits<16> BD1;
+ bits<4> B1;
+ bits<12> D1;
bits<16> I2;
let Inst{47-32} = op;
- let Inst{31-16} = BD1;
+ let Inst{31-28} = B1;
+ let Inst{27-16} = D1;
let Inst{15-0} = I2;
}
@@ -852,12 +920,15 @@ class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> Inst;
field bits<48> SoftFail = 0;
- bits<24> BD1;
+ bits<4> B1;
+ bits<20> D1;
bits<8> I2;
let Inst{47-40} = op{15-8};
let Inst{39-32} = I2;
- let Inst{31-8} = BD1;
+ let Inst{31-28} = B1;
+ let Inst{27-16} = D1{11-0};
+ let Inst{15-8} = D1{19-12};
let Inst{7-0} = op{7-0};
let Has20BitOffset = 1;
@@ -870,12 +941,14 @@ class InstSMI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
bits<4> M1;
bits<16> RI2;
- bits<16> BD3;
+ bits<4> B3;
+ bits<12> D3;
let Inst{47-40} = op;
let Inst{39-36} = M1;
let Inst{35-32} = 0;
- let Inst{31-16} = BD3;
+ let Inst{31-28} = B3;
+ let Inst{27-16} = D3;
let Inst{15-0} = RI2;
}
@@ -884,12 +957,18 @@ class InstSSa<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> Inst;
field bits<48> SoftFail = 0;
- bits<24> BDL1;
- bits<16> BD2;
+ bits<4> B1;
+ bits<12> D1;
+ bits<8> L1;
+ bits<4> B2;
+ bits<12> D2;
let Inst{47-40} = op;
- let Inst{39-16} = BDL1;
- let Inst{15-0} = BD2;
+ let Inst{39-32} = L1;
+ let Inst{31-28} = B1;
+ let Inst{27-16} = D1;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
}
class InstSSb<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -897,14 +976,20 @@ class InstSSb<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> Inst;
field bits<48> SoftFail = 0;
- bits<20> BDL1;
- bits<20> BDL2;
+ bits<4> B1;
+ bits<12> D1;
+ bits<4> L1;
+ bits<4> B2;
+ bits<12> D2;
+ bits<4> L2;
let Inst{47-40} = op;
- let Inst{39-36} = BDL1{19-16};
- let Inst{35-32} = BDL2{19-16};
- let Inst{31-16} = BDL1{15-0};
- let Inst{15-0} = BDL2{15-0};
+ let Inst{39-36} = L1;
+ let Inst{35-32} = L2;
+ let Inst{31-28} = B1;
+ let Inst{27-16} = D1;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
}
class InstSSc<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -912,15 +997,20 @@ class InstSSc<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> Inst;
field bits<48> SoftFail = 0;
- bits<20> BDL1;
- bits<16> BD2;
+ bits<4> B1;
+ bits<12> D1;
+ bits<4> L1;
+ bits<4> B2;
+ bits<12> D2;
bits<4> I3;
let Inst{47-40} = op;
- let Inst{39-36} = BDL1{19-16};
+ let Inst{39-36} = L1;
let Inst{35-32} = I3;
- let Inst{31-16} = BDL1{15-0};
- let Inst{15-0} = BD2;
+ let Inst{31-28} = B1;
+ let Inst{27-16} = D1;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
}
class InstSSd<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -928,15 +1018,20 @@ class InstSSd<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> Inst;
field bits<48> SoftFail = 0;
- bits<20> RBD1;
- bits<16> BD2;
+ bits<4> R1;
+ bits<4> B1;
+ bits<12> D1;
+ bits<4> B2;
+ bits<12> D2;
bits<4> R3;
let Inst{47-40} = op;
- let Inst{39-36} = RBD1{19-16};
+ let Inst{39-36} = R1;
let Inst{35-32} = R3;
- let Inst{31-16} = RBD1{15-0};
- let Inst{15-0} = BD2;
+ let Inst{31-28} = B1;
+ let Inst{27-16} = D1;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
}
class InstSSe<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -945,15 +1040,19 @@ class InstSSe<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> SoftFail = 0;
bits<4> R1;
- bits<16> BD2;
+ bits<4> B2;
+ bits<12> D2;
bits<4> R3;
- bits<16> BD4;
+ bits<4> B4;
+ bits<12> D4;
let Inst{47-40} = op;
let Inst{39-36} = R1;
let Inst{35-32} = R3;
- let Inst{31-16} = BD2;
- let Inst{15-0} = BD4;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
+ let Inst{15-12} = B4;
+ let Inst{11-0} = D4;
}
class InstSSf<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -961,13 +1060,18 @@ class InstSSf<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> Inst;
field bits<48> SoftFail = 0;
- bits<16> BD1;
- bits<24> BDL2;
+ bits<4> B1;
+ bits<12> D1;
+ bits<4> B2;
+ bits<12> D2;
+ bits<8> L2;
let Inst{47-40} = op;
- let Inst{39-32} = BDL2{23-16};
- let Inst{31-16} = BD1;
- let Inst{15-0} = BDL2{15-0};
+ let Inst{39-32} = L2;
+ let Inst{31-28} = B1;
+ let Inst{27-16} = D1;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
}
class InstSSE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -975,12 +1079,16 @@ class InstSSE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> Inst;
field bits<48> SoftFail = 0;
- bits<16> BD1;
- bits<16> BD2;
+ bits<4> B1;
+ bits<12> D1;
+ bits<4> B2;
+ bits<12> D2;
let Inst{47-32} = op;
- let Inst{31-16} = BD1;
- let Inst{15-0} = BD2;
+ let Inst{31-28} = B1;
+ let Inst{27-16} = D1;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
}
class InstSSF<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -988,15 +1096,19 @@ class InstSSF<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> Inst;
field bits<48> SoftFail = 0;
- bits<16> BD1;
- bits<16> BD2;
+ bits<4> B1;
+ bits<12> D1;
+ bits<4> B2;
+ bits<12> D2;
bits<4> R3;
let Inst{47-40} = op{11-4};
let Inst{39-36} = R3;
let Inst{35-32} = op{3-0};
- let Inst{31-16} = BD1;
- let Inst{15-0} = BD2;
+ let Inst{31-28} = B1;
+ let Inst{27-16} = D1;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
}
class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -1004,10 +1116,12 @@ class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<32> Inst;
field bits<32> SoftFail = 0;
- bits<16> BD2;
+ bits<4> B2;
+ bits<12> D2;
let Inst{31-16} = op;
- let Inst{15-0} = BD2;
+ let Inst{15-12} = B2;
+ let Inst{11-0} = D2;
}
class InstVRIa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -1493,14 +1607,16 @@ class InstVRSa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> SoftFail = 0;
bits<5> V1;
- bits<16> BD2;
+ bits<4> B2;
+ bits<12> D2;
bits<5> V3;
bits<4> M4;
let Inst{47-40} = op{15-8};
let Inst{39-36} = V1{3-0};
let Inst{35-32} = V3{3-0};
- let Inst{31-16} = BD2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
let Inst{15-12} = M4;
let Inst{11} = V1{4};
let Inst{10} = V3{4};
@@ -1514,14 +1630,16 @@ class InstVRSb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> SoftFail = 0;
bits<5> V1;
- bits<16> BD2;
+ bits<4> B2;
+ bits<12> D2;
bits<4> R3;
bits<4> M4;
let Inst{47-40} = op{15-8};
let Inst{39-36} = V1{3-0};
let Inst{35-32} = R3;
- let Inst{31-16} = BD2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
let Inst{15-12} = M4;
let Inst{11} = V1{4};
let Inst{10-8} = 0;
@@ -1534,14 +1652,16 @@ class InstVRSc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> SoftFail = 0;
bits<4> R1;
- bits<16> BD2;
+ bits<4> B2;
+ bits<12> D2;
bits<5> V3;
bits<4> M4;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
let Inst{35-32} = V3{3-0};
- let Inst{31-16} = BD2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
let Inst{15-12} = M4;
let Inst{11} = 0;
let Inst{10} = V3{4};
@@ -1555,13 +1675,15 @@ class InstVRSd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> SoftFail = 0;
bits<5> V1;
- bits<16> BD2;
+ bits<4> B2;
+ bits<12> D2;
bits<4> R3;
let Inst{47-40} = op{15-8};
let Inst{39-36} = 0;
let Inst{35-32} = R3;
- let Inst{31-16} = BD2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
let Inst{15-12} = V1{3-0};
let Inst{11-9} = 0;
let Inst{8} = V1{4};
@@ -1574,15 +1696,19 @@ class InstVRV<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> SoftFail = 0;
bits<5> V1;
- bits<21> VBD2;
+ bits<5> V2;
+ bits<4> B2;
+ bits<12> D2;
bits<4> M3;
let Inst{47-40} = op{15-8};
let Inst{39-36} = V1{3-0};
- let Inst{35-16} = VBD2{19-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
let Inst{15-12} = M3;
let Inst{11} = V1{4};
- let Inst{10} = VBD2{20};
+ let Inst{10} = V2{4};
let Inst{9-8} = 0;
let Inst{7-0} = op{7-0};
}
@@ -1593,12 +1719,16 @@ class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> SoftFail = 0;
bits<5> V1;
- bits<20> XBD2;
+ bits<4> X2;
+ bits<4> B2;
+ bits<12> D2;
bits<4> M3;
let Inst{47-40} = op{15-8};
let Inst{39-36} = V1{3-0};
- let Inst{35-16} = XBD2;
+ let Inst{35-32} = X2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
let Inst{15-12} = M3;
let Inst{11} = V1{4};
let Inst{10-8} = 0;
@@ -1611,12 +1741,14 @@ class InstVSI<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> SoftFail = 0;
bits<5> V1;
- bits<16> BD2;
+ bits<4> B2;
+ bits<12> D2;
bits<8> I3;
let Inst{47-40} = op{15-8};
let Inst{39-32} = I3;
- let Inst{31-16} = BD2;
+ let Inst{31-28} = B2;
+ let Inst{27-16} = D2;
let Inst{15-12} = V1{3-0};
let Inst{11-9} = 0;
let Inst{8} = V1{4};
@@ -1703,14 +1835,11 @@ class DirectiveInsnRS<dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{31-24} = enc{31-24};
}
-// RSE is like RSY except with a 12 bit displacement (instead of 20).
class DirectiveInsnRSE<dag outs, dag ins, string asmstr, list<dag> pattern>
- : InstRSYa<6, outs, ins, asmstr, pattern> {
+ : InstRSEa<6, outs, ins, asmstr, pattern> {
bits <48> enc;
let Inst{47-40} = enc{47-40};
- let Inst{31-16} = BD2{15-0};
- let Inst{15-8} = 0;
let Inst{7-0} = enc{7-0};
}
@@ -2131,7 +2260,7 @@ class InherentVRIa<string mnemonic, bits<16> opcode, bits<16> value>
class StoreInherentS<string mnemonic, bits<16> opcode,
SDPatternOperator operator, bits<5> bytes>
- : InstS<opcode, (outs), (ins bdaddr12only:$BD2),
+ : InstS<opcode, (outs), (ins (bdaddr12only $B2, $D2):$BD2),
mnemonic#"\t$BD2", [(operator bdaddr12only:$BD2)]> {
let mayStore = 1;
let AccessBytes = bytes;
@@ -2143,7 +2272,8 @@ class SideEffectInherentE<string mnemonic, bits<16>opcode>
class SideEffectInherentS<string mnemonic, bits<16> opcode,
SDPatternOperator operator>
: InstS<opcode, (outs), (ins), mnemonic, [(operator)]> {
- let BD2 = 0;
+ let B2 = 0;
+ let D2 = 0;
}
class SideEffectInherentRRE<string mnemonic, bits<16> opcode>
@@ -2167,7 +2297,7 @@ class CallRR<string mnemonic, bits<8> opcode>
mnemonic#"\t$R1, $R2", []>;
class CallRX<string mnemonic, bits<8> opcode>
- : InstRXa<opcode, (outs), (ins GR64:$R1, bdxaddr12only:$XBD2),
+ : InstRXa<opcode, (outs), (ins GR64:$R1, (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2", []>;
class CondBranchRI<string mnemonic, bits<12> opcode,
@@ -2229,17 +2359,19 @@ class FixedCondBranchRR<CondVariant V, string mnemonic, bits<8> opcode,
}
class CondBranchRX<string mnemonic, bits<8> opcode>
- : InstRXb<opcode, (outs), (ins cond4:$valid, cond4:$M1, bdxaddr12only:$XBD2),
+ : InstRXb<opcode, (outs),
+ (ins cond4:$valid, cond4:$M1, (bdxaddr12only $B2, $D2, $X2):$XBD2),
!subst("#", "${M1}", mnemonic)#"\t$XBD2", []> {
let CCMaskFirst = 1;
}
class AsmCondBranchRX<string mnemonic, bits<8> opcode>
- : InstRXb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr12only:$XBD2),
+ : InstRXb<opcode, (outs),
+ (ins imm32zx4:$M1, (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$M1, $XBD2", []>;
class FixedCondBranchRX<CondVariant V, string mnemonic, bits<8> opcode>
- : InstRXb<opcode, (outs), (ins bdxaddr12only:$XBD2),
+ : InstRXb<opcode, (outs), (ins (bdxaddr12only $B2, $D2, $X2):$XBD2),
!subst("#", V.suffix, mnemonic)#"\t$XBD2", []> {
let isAsmParserOnly = V.alternate;
let AsmVariantName = V.asmvariant;
@@ -2247,21 +2379,23 @@ class FixedCondBranchRX<CondVariant V, string mnemonic, bits<8> opcode>
}
class CondBranchRXY<string mnemonic, bits<16> opcode>
- : InstRXYb<opcode, (outs), (ins cond4:$valid, cond4:$M1, bdxaddr20only:$XBD2),
+ : InstRXYb<opcode, (outs), (ins cond4:$valid, cond4:$M1,
+ (bdxaddr20only $B2, $D2, $X2):$XBD2),
!subst("#", "${M1}", mnemonic)#"\t$XBD2", []> {
let CCMaskFirst = 1;
let mayLoad = 1;
}
class AsmCondBranchRXY<string mnemonic, bits<16> opcode>
- : InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2),
+ : InstRXYb<opcode, (outs),
+ (ins imm32zx4:$M1, (bdxaddr20only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$M1, $XBD2", []> {
let mayLoad = 1;
}
class FixedCondBranchRXY<CondVariant V, string mnemonic, bits<16> opcode,
SDPatternOperator operator = null_frag>
- : InstRXYb<opcode, (outs), (ins bdxaddr20only:$XBD2),
+ : InstRXYb<opcode, (outs), (ins (bdxaddr20only $B2, $D2, $X2):$XBD2),
!subst("#", V.suffix, mnemonic)#"\t$XBD2",
[(operator (load bdxaddr20only:$XBD2))]> {
let isAsmParserOnly = V.alternate;
@@ -2381,18 +2515,19 @@ class FixedCmpBranchRRFc<CondVariant V, string mnemonic, bits<16> opcode,
class CmpBranchRRS<string mnemonic, bits<16> opcode,
RegisterOperand cls>
: InstRRS<opcode, (outs),
- (ins cls:$R1, cls:$R2, cond4:$M3, bdaddr12only:$BD4),
+ (ins cls:$R1, cls:$R2, cond4:$M3, (bdaddr12only $B4, $D4):$BD4),
mnemonic#"$M3\t$R1, $R2, $BD4", []>;
class AsmCmpBranchRRS<string mnemonic, bits<16> opcode,
RegisterOperand cls>
: InstRRS<opcode, (outs),
- (ins cls:$R1, cls:$R2, imm32zx4:$M3, bdaddr12only:$BD4),
+ (ins cls:$R1, cls:$R2, imm32zx4:$M3, (bdaddr12only $B4, $D4):$BD4),
mnemonic#"\t$R1, $R2, $M3, $BD4", []>;
class FixedCmpBranchRRS<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls>
- : InstRRS<opcode, (outs), (ins cls:$R1, cls:$R2, bdaddr12only:$BD4),
+ : InstRRS<opcode, (outs),
+ (ins cls:$R1, cls:$R2, (bdaddr12only $B4, $D4):$BD4),
mnemonic#V.suffix#"\t$R1, $R2, $BD4", []> {
let isAsmParserOnly = V.alternate;
let AsmVariantName = V.asmvariant;
@@ -2409,18 +2544,19 @@ multiclass CmpBranchRRSPair<string mnemonic, bits<16> opcode,
class CmpBranchRIS<string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIS<opcode, (outs),
- (ins cls:$R1, imm:$I2, cond4:$M3, bdaddr12only:$BD4),
+ (ins cls:$R1, imm:$I2, cond4:$M3, (bdaddr12only $B4, $D4):$BD4),
mnemonic#"$M3\t$R1, $I2, $BD4", []>;
class AsmCmpBranchRIS<string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm>
: InstRIS<opcode, (outs),
- (ins cls:$R1, imm:$I2, imm32zx4:$M3, bdaddr12only:$BD4),
+ (ins cls:$R1, imm:$I2, imm32zx4:$M3, (bdaddr12only $B4, $D4):$BD4),
mnemonic#"\t$R1, $I2, $M3, $BD4", []>;
class FixedCmpBranchRIS<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls, ImmOpWithPattern imm>
- : InstRIS<opcode, (outs), (ins cls:$R1, imm:$I2, bdaddr12only:$BD4),
+ : InstRIS<opcode, (outs),
+ (ins cls:$R1, imm:$I2, (bdaddr12only $B4, $D4):$BD4),
mnemonic#V.suffix#"\t$R1, $I2, $BD4", []> {
let isAsmParserOnly = V.alternate;
let AsmVariantName = V.asmvariant;
@@ -2436,12 +2572,14 @@ multiclass CmpBranchRISPair<string mnemonic, bits<16> opcode,
class CmpBranchRSYb<string mnemonic, bits<16> opcode,
RegisterOperand cls>
- : InstRSYb<opcode, (outs), (ins cls:$R1, bdaddr20only:$BD2, cond4:$M3),
+ : InstRSYb<opcode, (outs),
+ (ins cls:$R1, (bdaddr20only $B2, $D2):$BD2, cond4:$M3),
mnemonic#"$M3\t$R1, $BD2", []>;
class AsmCmpBranchRSYb<string mnemonic, bits<16> opcode,
RegisterOperand cls>
- : InstRSYb<opcode, (outs), (ins cls:$R1, bdaddr20only:$BD2, imm32zx4:$M3),
+ : InstRSYb<opcode, (outs),
+ (ins cls:$R1, (bdaddr20only $B2, $D2):$BD2, imm32zx4:$M3),
mnemonic#"\t$R1, $M3, $BD2", []>;
multiclass CmpBranchRSYbPair<string mnemonic, bits<16> opcode,
@@ -2453,7 +2591,7 @@ multiclass CmpBranchRSYbPair<string mnemonic, bits<16> opcode,
class FixedCmpBranchRSYb<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls>
- : InstRSYb<opcode, (outs), (ins cls:$R1, bdaddr20only:$BD2),
+ : InstRSYb<opcode, (outs), (ins cls:$R1, (bdaddr20only $B2, $D2):$BD2),
mnemonic#V.suffix#"\t$R1, $BD2", []> {
let isAsmParserOnly = V.alternate;
let AsmVariantName = V.asmvariant;
@@ -2489,14 +2627,16 @@ class BranchUnaryRRE<string mnemonic, bits<16> opcode, RegisterOperand cls>
}
class BranchUnaryRX<string mnemonic, bits<8> opcode, RegisterOperand cls>
- : InstRXa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2),
+ : InstRXa<opcode, (outs cls:$R1),
+ (ins cls:$R1src, (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class BranchUnaryRXY<string mnemonic, bits<16> opcode, RegisterOperand cls>
- : InstRXYa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr20only:$XBD2),
+ : InstRXYa<opcode, (outs cls:$R1),
+ (ins cls:$R1src, (bdxaddr20only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
@@ -2519,7 +2659,7 @@ class BranchBinaryRIEe<string mnemonic, bits<16> opcode, RegisterOperand cls>
class BranchBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls>
: InstRSa<opcode, (outs cls:$R1),
- (ins cls:$R1src, cls:$R3, bdaddr12only:$BD2),
+ (ins cls:$R1src, cls:$R3, (bdaddr12only $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
@@ -2527,7 +2667,8 @@ class BranchBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls>
class BranchBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstRSYa<opcode,
- (outs cls:$R1), (ins cls:$R1src, cls:$R3, bdaddr20only:$BD2),
+ (outs cls:$R1),
+ (ins cls:$R1src, cls:$R3, (bdaddr20only $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
@@ -2535,14 +2676,14 @@ class BranchBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
class LoadMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
AddressingMode mode = bdaddr12only>
- : InstRSa<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2),
+ : InstRSa<opcode, (outs cls:$R1, cls:$R3), (ins (mode $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2", []> {
let mayLoad = 1;
}
class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
AddressingMode mode = bdaddr20only>
- : InstRSYa<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2),
+ : InstRSYa<opcode, (outs cls:$R1, cls:$R3), (ins (mode $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2", []> {
let mayLoad = 1;
}
@@ -2559,7 +2700,7 @@ multiclass LoadMultipleRSPair<string mnemonic, bits<8> rsOpcode,
class LoadMultipleSSe<string mnemonic, bits<8> opcode, RegisterOperand cls>
: InstSSe<opcode, (outs cls:$R1, cls:$R3),
- (ins bdaddr12only:$BD2, bdaddr12only:$BD4),
+ (ins (bdaddr12only $B2, $D2):$BD2, (bdaddr12only $B4, $D4):$BD4),
mnemonic#"\t$R1, $R3, $BD2, $BD4", []> {
let mayLoad = 1;
}
@@ -2567,11 +2708,11 @@ class LoadMultipleSSe<string mnemonic, bits<8> opcode, RegisterOperand cls>
multiclass LoadMultipleVRSaAlign<string mnemonic, bits<16> opcode> {
let mayLoad = 1 in {
def Align : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3),
- (ins bdaddr12only:$BD2, imm32zx4:$M4),
+ (ins (bdaddr12only $B2, $D2):$BD2, imm32zx4:$M4),
mnemonic#"\t$V1, $V3, $BD2, $M4", []>;
let M4 = 0 in
def "" : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3),
- (ins bdaddr12only:$BD2),
+ (ins (bdaddr12only $B2, $D2):$BD2),
mnemonic#"\t$V1, $V3, $BD2", []>;
}
}
@@ -2591,7 +2732,7 @@ class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdxaddr12only>
- : InstRXa<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+ : InstRXa<opcode, (outs), (ins cls:$R1, (mode $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(operator cls:$R1, mode:$XBD2)]> {
let OpKey = mnemonic#"r"#cls;
@@ -2603,7 +2744,7 @@ class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdxaddr20only>
- : InstRXYa<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+ : InstRXYa<opcode, (outs), (ins cls:$R1, (mode $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(operator cls:$R1, mode:$XBD2)]> {
let OpKey = mnemonic#"r"#cls;
@@ -2626,7 +2767,8 @@ multiclass StoreRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr, bits<5> bytes, bits<4> type = 0>
- : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2),
+ : InstVRX<opcode, (outs),
+ (ins tr.op:$V1, (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$V1, $XBD2",
[(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2)]> {
let M3 = type;
@@ -2635,7 +2777,8 @@ class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class StoreVRXGeneric<string mnemonic, bits<16> opcode>
- : InstVRX<opcode, (outs), (ins VR128:$V1, bdxaddr12only:$XBD2, imm32zx4:$M3),
+ : InstVRX<opcode, (outs),
+ (ins VR128:$V1, (bdxaddr12only $B2, $D2, $X2):$XBD2, imm32zx4:$M3),
mnemonic#"\t$V1, $XBD2, $M3", []> {
let mayStore = 1;
}
@@ -2643,17 +2786,20 @@ class StoreVRXGeneric<string mnemonic, bits<16> opcode>
multiclass StoreVRXAlign<string mnemonic, bits<16> opcode> {
let mayStore = 1, AccessBytes = 16 in {
def Align : InstVRX<opcode, (outs),
- (ins VR128:$V1, bdxaddr12only:$XBD2, imm32zx4:$M3),
+ (ins VR128:$V1, (bdxaddr12only $B2, $D2, $X2):$XBD2,
+ imm32zx4:$M3),
mnemonic#"\t$V1, $XBD2, $M3", []>;
let M3 = 0 in
- def "" : InstVRX<opcode, (outs), (ins VR128:$V1, bdxaddr12only:$XBD2),
+ def "" : InstVRX<opcode, (outs),
+ (ins VR128:$V1, (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$V1, $XBD2", []>;
}
}
class StoreLengthVRSb<string mnemonic, bits<16> opcode,
SDPatternOperator operator, bits<5> bytes>
- : InstVRSb<opcode, (outs), (ins VR128:$V1, GR32:$R3, bdaddr12only:$BD2),
+ : InstVRSb<opcode, (outs),
+ (ins VR128:$V1, GR32:$R3, (bdaddr12only $B2, $D2):$BD2),
mnemonic#"\t$V1, $R3, $BD2",
[(operator VR128:$V1, GR32:$R3, bdaddr12only:$BD2)]> {
let M4 = 0;
@@ -2663,7 +2809,8 @@ class StoreLengthVRSb<string mnemonic, bits<16> opcode,
class StoreLengthVRSd<string mnemonic, bits<16> opcode,
SDPatternOperator operator, bits<5> bytes>
- : InstVRSd<opcode, (outs), (ins VR128:$V1, GR32:$R3, bdaddr12only:$BD2),
+ : InstVRSd<opcode, (outs),
+ (ins VR128:$V1, GR32:$R3, (bdaddr12only $B2, $D2):$BD2),
mnemonic#"\t$V1, $R3, $BD2",
[(operator VR128:$V1, GR32:$R3, bdaddr12only:$BD2)]> {
let mayStore = 1;
@@ -2672,7 +2819,8 @@ class StoreLengthVRSd<string mnemonic, bits<16> opcode,
class StoreLengthVSI<string mnemonic, bits<16> opcode,
SDPatternOperator operator, bits<5> bytes>
- : InstVSI<opcode, (outs), (ins VR128:$V1, bdaddr12only:$BD2, imm32zx8:$I3),
+ : InstVSI<opcode, (outs),
+ (ins VR128:$V1, (bdaddr12only $B2, $D2):$BD2, imm32zx8:$I3),
mnemonic#"\t$V1, $BD2, $I3",
[(operator VR128:$V1, imm32zx8:$I3, bdaddr12only:$BD2)]> {
let mayStore = 1;
@@ -2681,14 +2829,14 @@ class StoreLengthVSI<string mnemonic, bits<16> opcode,
class StoreMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
AddressingMode mode = bdaddr12only>
- : InstRSa<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2),
+ : InstRSa<opcode, (outs), (ins cls:$R1, cls:$R3, (mode $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2", []> {
let mayStore = 1;
}
class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
AddressingMode mode = bdaddr20only>
- : InstRSYa<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2),
+ : InstRSYa<opcode, (outs), (ins cls:$R1, cls:$R3, (mode $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2", []> {
let mayStore = 1;
}
@@ -2706,11 +2854,12 @@ multiclass StoreMultipleRSPair<string mnemonic, bits<8> rsOpcode,
multiclass StoreMultipleVRSaAlign<string mnemonic, bits<16> opcode> {
let mayStore = 1 in {
def Align : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3,
- bdaddr12only:$BD2, imm32zx4:$M4),
+ (bdaddr12only $B2, $D2):$BD2,
+ imm32zx4:$M4),
mnemonic#"\t$V1, $V3, $BD2, $M4", []>;
let M4 = 0 in
def "" : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3,
- bdaddr12only:$BD2),
+ (bdaddr12only $B2, $D2):$BD2),
mnemonic#"\t$V1, $V3, $BD2", []>;
}
}
@@ -2723,7 +2872,7 @@ multiclass StoreMultipleVRSaAlign<string mnemonic, bits<16> opcode> {
// only use the StoreSI* instruction if the matched address is suitable.
class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
ImmOpWithPattern imm>
- : InstSI<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
+ : InstSI<opcode, (outs), (ins (mviaddr12pair $B1, $D1):$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(operator imm:$I2, mviaddr12pair:$BD1)]> {
let mayStore = 1;
@@ -2731,7 +2880,7 @@ class StoreSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
ImmOpWithPattern imm>
- : InstSIY<opcode, (outs), (ins mviaddr20pair:$BD1, imm:$I2),
+ : InstSIY<opcode, (outs), (ins (mviaddr20pair $B1, $D1):$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(operator imm:$I2, mviaddr20pair:$BD1)]> {
let mayStore = 1;
@@ -2739,7 +2888,7 @@ class StoreSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class StoreSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
ImmOpWithPattern imm>
- : InstSIL<opcode, (outs), (ins mviaddr12pair:$BD1, imm:$I2),
+ : InstSIL<opcode, (outs), (ins (mviaddr12pair $B1, $D1):$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(operator imm:$I2, mviaddr12pair:$BD1)]> {
let mayStore = 1;
@@ -2756,7 +2905,8 @@ multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
}
class StoreSSE<string mnemonic, bits<16> opcode>
- : InstSSE<opcode, (outs), (ins bdaddr12only:$BD1, bdaddr12only:$BD2),
+ : InstSSE<opcode, (outs),
+ (ins (bdaddr12only $B1, $D1):$BD1, (bdaddr12only $B2, $D2):$BD2),
mnemonic#"\t$BD1, $BD2", []> {
let mayStore = 1;
}
@@ -2764,8 +2914,9 @@ class StoreSSE<string mnemonic, bits<16> opcode>
class CondStoreRSY<string mnemonic, bits<16> opcode,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
- : InstRSYb<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$M3),
- mnemonic#"$M3\t$R1, $BD2", []> {
+ : InstRSYb<opcode, (outs),
+ (ins cls:$R1, (mode $B2, $D2):$BD2, cond4:$valid, cond4:$M3),
+ mnemonic#"$M3\t$R1, $BD2", []> {
let mayStore = 1;
let AccessBytes = bytes;
let CCMaskLast = 1;
@@ -2776,7 +2927,7 @@ class CondStoreRSY<string mnemonic, bits<16> opcode,
class AsmCondStoreRSY<string mnemonic, bits<16> opcode,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
- : InstRSYb<opcode, (outs), (ins cls:$R1, mode:$BD2, imm32zx4:$M3),
+ : InstRSYb<opcode, (outs), (ins cls:$R1, (mode $B2, $D2):$BD2, imm32zx4:$M3),
mnemonic#"\t$R1, $BD2, $M3", []> {
let mayStore = 1;
let AccessBytes = bytes;
@@ -2786,7 +2937,7 @@ class AsmCondStoreRSY<string mnemonic, bits<16> opcode,
class FixedCondStoreRSY<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
- : InstRSYb<opcode, (outs), (ins cls:$R1, mode:$BD2),
+ : InstRSYb<opcode, (outs), (ins cls:$R1, (mode $B2, $D2):$BD2),
mnemonic#V.suffix#"\t$R1, $BD2", []> {
let mayStore = 1;
let AccessBytes = bytes;
@@ -2823,7 +2974,7 @@ class SideEffectUnaryRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
class SideEffectUnaryS<string mnemonic, bits<16> opcode,
SDPatternOperator operator, bits<5> bytes,
AddressingMode mode = bdaddr12only>
- : InstS<opcode, (outs), (ins mode:$BD2),
+ : InstS<opcode, (outs), (ins (mode $B2, $D2):$BD2),
mnemonic#"\t$BD2", [(operator mode:$BD2)]> {
let mayLoad = 1;
let AccessBytes = bytes;
@@ -2832,7 +2983,7 @@ class SideEffectUnaryS<string mnemonic, bits<16> opcode,
class SideEffectUnarySIY<string mnemonic, bits<16> opcode,
bits<5> bytes,
AddressingMode mode = bdaddr20only>
- : InstSIY<opcode, (outs), (ins mode:$BD1),
+ : InstSIY<opcode, (outs), (ins (mode $B1, $D1):$BD1),
mnemonic#"\t$BD1", []> {
let mayLoad = 1;
let AccessBytes = bytes;
@@ -2842,18 +2993,18 @@ class SideEffectUnarySIY<string mnemonic, bits<16> opcode,
class SideEffectAddressS<string mnemonic, bits<16> opcode,
SDPatternOperator operator,
AddressingMode mode = bdaddr12only>
- : InstS<opcode, (outs), (ins mode:$BD2),
+ : InstS<opcode, (outs), (ins (mode $B2, $D2):$BD2),
mnemonic#"\t$BD2", [(operator mode:$BD2)]>;
class LoadAddressRX<string mnemonic, bits<8> opcode,
SDPatternOperator operator, AddressingMode mode>
- : InstRXa<opcode, (outs GR64:$R1), (ins mode:$XBD2),
+ : InstRXa<opcode, (outs GR64:$R1), (ins (mode $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(set GR64:$R1, (operator mode:$XBD2))]>;
class LoadAddressRXY<string mnemonic, bits<16> opcode,
SDPatternOperator operator, AddressingMode mode>
- : InstRXYa<opcode, (outs GR64:$R1), (ins mode:$XBD2),
+ : InstRXYa<opcode, (outs GR64:$R1), (ins (mode $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(set GR64:$R1, (operator mode:$XBD2))]>;
@@ -2936,7 +3087,7 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
SDPatternOperator operator, RegisterOperand cls,
bits<5> bytes, AddressingMode mode = bdaddr20only>
: InstRSYb<opcode, (outs cls:$R1),
- (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$M3),
+ (ins cls:$R1src, (mode $B2, $D2):$BD2, cond4:$valid, cond4:$M3),
mnemonic#"$M3\t$R1, $BD2",
[(set cls:$R1,
(z_select_ccmask (operator bdaddr20only:$BD2), cls:$R1src,
@@ -2957,7 +3108,8 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
class AsmCondUnaryRSY<string mnemonic, bits<16> opcode,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
- : InstRSYb<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, imm32zx4:$M3),
+ : InstRSYb<opcode, (outs cls:$R1),
+ (ins cls:$R1src, (mode $B2, $D2):$BD2, imm32zx4:$M3),
mnemonic#"\t$R1, $BD2, $M3", []> {
let mayLoad = 1;
let AccessBytes = bytes;
@@ -2969,7 +3121,7 @@ class AsmCondUnaryRSY<string mnemonic, bits<16> opcode,
class FixedCondUnaryRSY<CondVariant V, string mnemonic, bits<16> opcode,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
- : InstRSYb<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2),
+ : InstRSYb<opcode, (outs cls:$R1), (ins cls:$R1src, (mode $B2, $D2):$BD2),
mnemonic#V.suffix#"\t$R1, $BD2", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
@@ -2992,7 +3144,7 @@ multiclass CondUnaryRSYPair<string mnemonic, bits<16> opcode,
class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdxaddr12only>
- : InstRXa<opcode, (outs cls:$R1), (ins mode:$XBD2),
+ : InstRXa<opcode, (outs cls:$R1), (ins (mode $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(set cls:$R1, (operator mode:$XBD2))]> {
let OpKey = mnemonic#"r"#cls;
@@ -3003,7 +3155,7 @@ class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, bits<5> bytes>
- : InstRXE<opcode, (outs cls:$R1), (ins bdxaddr12only:$XBD2),
+ : InstRXE<opcode, (outs cls:$R1), (ins (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(set cls:$R1, (operator bdxaddr12only:$XBD2))]> {
let OpKey = mnemonic#"r"#cls;
@@ -3016,7 +3168,7 @@ class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdxaddr20only>
- : InstRXYa<opcode, (outs cls:$R1), (ins mode:$XBD2),
+ : InstRXYa<opcode, (outs cls:$R1), (ins (mode $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(set cls:$R1, (operator mode:$XBD2))]> {
let OpKey = mnemonic#"r"#cls;
@@ -3110,7 +3262,7 @@ multiclass UnaryExtraVRRaSPairGeneric<string mnemonic, bits<16> opcode> {
class UnaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr, bits<5> bytes, bits<4> type = 0>
- : InstVRX<opcode, (outs tr.op:$V1), (ins bdxaddr12only:$XBD2),
+ : InstVRX<opcode, (outs tr.op:$V1), (ins (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$V1, $XBD2",
[(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2))]> {
let M3 = type;
@@ -3119,7 +3271,8 @@ class UnaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
}
class UnaryVRXGeneric<string mnemonic, bits<16> opcode>
- : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+ : InstVRX<opcode, (outs VR128:$V1),
+ (ins (bdxaddr12only $B2, $D2, $X2):$XBD2, imm32zx4:$M3),
mnemonic#"\t$V1, $XBD2, $M3", []> {
let mayLoad = 1;
}
@@ -3127,22 +3280,23 @@ class UnaryVRXGeneric<string mnemonic, bits<16> opcode>
multiclass UnaryVRXAlign<string mnemonic, bits<16> opcode> {
let mayLoad = 1, AccessBytes = 16 in {
def Align : InstVRX<opcode, (outs VR128:$V1),
- (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+ (ins (bdxaddr12only $B2, $D2, $X2):$XBD2, imm32zx4:$M3),
mnemonic#"\t$V1, $XBD2, $M3", []>;
let M3 = 0 in
- def "" : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2),
+ def "" : InstVRX<opcode, (outs VR128:$V1),
+ (ins (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$V1, $XBD2", []>;
}
}
class SideEffectBinaryRX<string mnemonic, bits<8> opcode,
RegisterOperand cls>
- : InstRXa<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
+ : InstRXa<opcode, (outs), (ins cls:$R1, (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2", []>;
class SideEffectBinaryRXY<string mnemonic, bits<16> opcode,
RegisterOperand cls>
- : InstRXYa<opcode, (outs), (ins cls:$R1, bdxaddr20only:$XBD2),
+ : InstRXYa<opcode, (outs), (ins cls:$R1, (bdxaddr20only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2", []>;
class SideEffectBinaryRILPC<string mnemonic, bits<12> opcode,
@@ -3181,29 +3335,33 @@ class SideEffectBinaryIE<string mnemonic, bits<16> opcode,
mnemonic#"\t$I1, $I2", []>;
class SideEffectBinarySI<string mnemonic, bits<8> opcode, Operand imm>
- : InstSI<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+ : InstSI<opcode, (outs), (ins (bdaddr12only $B1, $D1):$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2", []>;
class SideEffectBinarySIL<string mnemonic, bits<16> opcode,
SDPatternOperator operator, ImmOpWithPattern imm>
- : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+ : InstSIL<opcode, (outs), (ins (bdaddr12only $B1, $D1):$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2", [(operator bdaddr12only:$BD1, imm:$I2)]>;
class SideEffectBinarySSa<string mnemonic, bits<8> opcode>
- : InstSSa<opcode, (outs), (ins bdladdr12onlylen8:$BDL1, bdaddr12only:$BD2),
+ : InstSSa<opcode, (outs), (ins (bdladdr12onlylen8 $B1, $D1, $L1):$BDL1,
+ (bdaddr12only $B2, $D2):$BD2),
mnemonic#"\t$BDL1, $BD2", []>;
class SideEffectBinarySSb<string mnemonic, bits<8> opcode>
: InstSSb<opcode,
- (outs), (ins bdladdr12onlylen4:$BDL1, bdladdr12onlylen4:$BDL2),
+ (outs), (ins (bdladdr12onlylen4 $B1, $D1, $L1):$BDL1,
+ (bdladdr12onlylen4 $B2, $D2, $L2):$BDL2),
mnemonic#"\t$BDL1, $BDL2", []>;
class SideEffectBinarySSf<string mnemonic, bits<8> opcode>
- : InstSSf<opcode, (outs), (ins bdaddr12only:$BD1, bdladdr12onlylen8:$BDL2),
+ : InstSSf<opcode, (outs), (ins (bdaddr12only $B1, $D1):$BD1,
+ (bdladdr12onlylen8 $B2, $D2, $L2):$BDL2),
mnemonic#"\t$BD1, $BDL2", []>;
class SideEffectBinarySSE<string mnemonic, bits<16> opcode>
- : InstSSE<opcode, (outs), (ins bdaddr12only:$BD1, bdaddr12only:$BD2),
+ : InstSSE<opcode, (outs),
+ (ins (bdaddr12only $B1, $D1):$BD1, (bdaddr12only $B2, $D2):$BD2),
mnemonic#"\t$BD1, $BD2", []>;
class SideEffectBinaryMemMemRR<string mnemonic, bits<8> opcode,
@@ -3511,7 +3669,8 @@ class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
class BinaryRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls>
- : InstRSa<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2),
+ : InstRSa<opcode, (outs cls:$R1),
+ (ins cls:$R1src, (shift12only $B2, $D2):$BD2),
mnemonic#"\t$R1, $BD2",
[(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> {
let R3 = 0;
@@ -3521,7 +3680,7 @@ class BinaryRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
class BinaryRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls>
- : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2),
+ : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R3, (shift20only $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2",
[(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>;
@@ -3538,7 +3697,7 @@ multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
class BinaryRSL<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstRSLb<opcode, (outs cls:$R1),
- (ins bdladdr12onlylen8:$BDL2, imm32zx4:$M3),
+ (ins (bdladdr12onlylen8 $B2, $D2, $L2):$BDL2, imm32zx4:$M3),
mnemonic#"\t$R1, $BDL2, $M3", []> {
let mayLoad = 1;
}
@@ -3546,7 +3705,7 @@ class BinaryRSL<string mnemonic, bits<16> opcode, RegisterOperand cls>
class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr12only>
- : InstRXa<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2),
+ : InstRXa<opcode, (outs cls:$R1), (ins cls:$R1src, (mode $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> {
let OpKey = mnemonic#"r"#cls;
@@ -3559,7 +3718,8 @@ class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, SDPatternOperator load, bits<5> bytes>
- : InstRXE<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2),
+ : InstRXE<opcode, (outs cls:$R1),
+ (ins cls:$R1src, (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(set cls:$R1, (operator cls:$R1src,
(load bdxaddr12only:$XBD2)))]> {
@@ -3575,7 +3735,8 @@ class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class BinaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2,
SDPatternOperator load, bits<5> bytes>
- : InstRXF<opcode, (outs cls1:$R1), (ins cls2:$R3, bdxaddr12only:$XBD2),
+ : InstRXF<opcode, (outs cls1:$R1),
+ (ins cls2:$R3, (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $R3, $XBD2",
[(set cls1:$R1, (operator cls2:$R3, (load bdxaddr12only:$XBD2)))]> {
let OpKey = mnemonic#"r"#cls;
@@ -3587,7 +3748,8 @@ class BinaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr20only>
- : InstRXYa<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2),
+ : InstRXYa<opcode, (outs cls:$R1),
+ (ins cls:$R1src, (mode $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> {
let OpKey = mnemonic#"r"#cls;
@@ -3613,7 +3775,7 @@ multiclass BinaryRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
Operand imm, AddressingMode mode = bdaddr12only>
- : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ : InstSI<opcode, (outs), (ins (mode $B1, $D1):$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> {
let mayLoad = 1;
@@ -3622,7 +3784,7 @@ class BinarySI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
class BinarySIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
Operand imm, AddressingMode mode = bdaddr20only>
- : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ : InstSIY<opcode, (outs), (ins (mode $B1, $D1):$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(store (operator (load mode:$BD1), imm:$I2), mode:$BD1)]> {
let mayLoad = 1;
@@ -3641,7 +3803,8 @@ multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
}
class BinarySSF<string mnemonic, bits<12> opcode, RegisterOperand cls>
- : InstSSF<opcode, (outs cls:$R3), (ins bdaddr12pair:$BD1, bdaddr12pair:$BD2),
+ : InstSSF<opcode, (outs cls:$R3),
+ (ins (bdaddr12pair $B1, $D1):$BD1, (bdaddr12pair $B2, $D2):$BD2),
mnemonic#"\t$R3, $BD1, $BD2", []> {
let mayLoad = 1;
}
@@ -3849,7 +4012,8 @@ class BinaryVRRk<string mnemonic, bits<16> opcode>
class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type>
- : InstVRSa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, shift12only:$BD2),
+ : InstVRSa<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V3, (shift12only $B2, $D2):$BD2),
mnemonic#"\t$V1, $V3, $BD2",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V3),
shift12only:$BD2))]> {
@@ -3858,12 +4022,13 @@ class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class BinaryVRSaGeneric<string mnemonic, bits<16> opcode>
: InstVRSa<opcode, (outs VR128:$V1),
- (ins VR128:$V3, shift12only:$BD2, imm32zx4:$M4),
+ (ins VR128:$V3, (shift12only $B2, $D2):$BD2, imm32zx4:$M4),
mnemonic#"\t$V1, $V3, $BD2, $M4", []>;
class BinaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
bits<5> bytes>
- : InstVRSb<opcode, (outs VR128:$V1), (ins GR32:$R3, bdaddr12only:$BD2),
+ : InstVRSb<opcode, (outs VR128:$V1),
+ (ins GR32:$R3, (bdaddr12only $B2, $D2):$BD2),
mnemonic#"\t$V1, $R3, $BD2",
[(set VR128:$V1, (operator GR32:$R3, bdaddr12only:$BD2))]> {
let M4 = 0;
@@ -3873,20 +4038,22 @@ class BinaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class BinaryVRSc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr, bits<4> type>
- : InstVRSc<opcode, (outs GR64:$R1), (ins tr.op:$V3, shift12only:$BD2),
- mnemonic#"\t$R1, $V3, $BD2",
- [(set GR64:$R1, (operator (tr.vt tr.op:$V3), shift12only:$BD2))]> {
+ : InstVRSc<opcode, (outs GR64:$R1),
+ (ins tr.op:$V3, (shift12only $B2, $D2):$BD2),
+ mnemonic#"\t$R1, $V3, $BD2",
+ [(set GR64:$R1, (operator (tr.vt tr.op:$V3), shift12only:$BD2))]> {
let M4 = type;
}
class BinaryVRScGeneric<string mnemonic, bits<16> opcode>
: InstVRSc<opcode, (outs GR64:$R1),
- (ins VR128:$V3, shift12only:$BD2, imm32zx4: $M4),
+ (ins VR128:$V3, (shift12only $B2, $D2):$BD2, imm32zx4: $M4),
mnemonic#"\t$R1, $V3, $BD2, $M4", []>;
class BinaryVRSd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
bits<5> bytes>
- : InstVRSd<opcode, (outs VR128:$V1), (ins GR32:$R3, bdaddr12only:$BD2),
+ : InstVRSd<opcode, (outs VR128:$V1),
+ (ins GR32:$R3, (bdaddr12only $B2, $D2):$BD2),
mnemonic#"\t$V1, $R3, $BD2",
[(set VR128:$V1, (operator GR32:$R3, bdaddr12only:$BD2))]> {
let mayLoad = 1;
@@ -3895,7 +4062,8 @@ class BinaryVRSd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr, bits<5> bytes>
- : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+ : InstVRX<opcode, (outs VR128:$V1),
+ (ins (bdxaddr12only $B2, $D2, $X2):$XBD2, imm32zx4:$M3),
mnemonic#"\t$V1, $XBD2, $M3",
[(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2,
imm32zx4_timm:$M3))]> {
@@ -3905,7 +4073,7 @@ class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class StoreBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
bits<5> bytes, AddressingMode mode = bdaddr12only>
- : InstRSb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, mode:$BD2),
+ : InstRSb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, (mode $B2, $D2):$BD2),
mnemonic#"\t$R1, $M3, $BD2", []> {
let mayStore = 1;
let AccessBytes = bytes;
@@ -3913,7 +4081,7 @@ class StoreBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
class StoreBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
bits<5> bytes, AddressingMode mode = bdaddr20only>
- : InstRSYb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, mode:$BD2),
+ : InstRSYb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, (mode $B2, $D2):$BD2),
mnemonic#"\t$R1, $M3, $BD2", []> {
let mayStore = 1;
let AccessBytes = bytes;
@@ -3933,14 +4101,16 @@ multiclass StoreBinaryRSPair<string mnemonic, bits<8> rsOpcode,
class StoreBinaryRSL<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstRSLb<opcode, (outs),
- (ins cls:$R1, bdladdr12onlylen8:$BDL2, imm32zx4:$M3),
+ (ins cls:$R1, (bdladdr12onlylen8 $B2, $D2, $L2):$BDL2,
+ imm32zx4:$M3),
mnemonic#"\t$R1, $BDL2, $M3", []> {
let mayStore = 1;
}
class BinaryVSI<string mnemonic, bits<16> opcode, SDPatternOperator operator,
bits<5> bytes>
- : InstVSI<opcode, (outs VR128:$V1), (ins bdaddr12only:$BD2, imm32zx8:$I3),
+ : InstVSI<opcode, (outs VR128:$V1),
+ (ins (bdaddr12only $B2, $D2):$BD2, imm32zx8:$I3),
mnemonic#"\t$V1, $BD2, $I3",
[(set VR128:$V1, (operator imm32zx8:$I3, bdaddr12only:$BD2))]> {
let mayLoad = 1;
@@ -3949,7 +4119,8 @@ class BinaryVSI<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
ImmOpWithPattern index>
- : InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3),
+ : InstVRV<opcode, (outs),
+ (ins VR128:$V1, (bdvaddr12only $B2, $D2, $V2):$VBD2, index:$M3),
mnemonic#"\t$V1, $VBD2, $M3", []> {
let mayStore = 1;
let AccessBytes = bytes;
@@ -3958,7 +4129,8 @@ class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
class StoreBinaryVRX<string mnemonic, bits<16> opcode,
SDPatternOperator operator, TypedReg tr, bits<5> bytes,
ImmOpWithPattern index>
- : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2, index:$M3),
+ : InstVRX<opcode, (outs),
+ (ins tr.op:$V1, (bdxaddr12only $B2, $D2, $X2):$XBD2, index:$M3),
mnemonic#"\t$V1, $XBD2, $M3",
[(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2, index:$M3)]> {
let mayStore = 1;
@@ -3968,7 +4140,8 @@ class StoreBinaryVRX<string mnemonic, bits<16> opcode,
class MemoryBinarySSd<string mnemonic, bits<8> opcode,
RegisterOperand cls>
: InstSSd<opcode, (outs),
- (ins bdraddr12only:$RBD1, bdaddr12only:$BD2, cls:$R3),
+ (ins (bdraddr12only $B1, $D1, $R1):$RBD1,
+ (bdaddr12only $B2, $D2):$BD2, cls:$R3),
mnemonic#"\t$RBD1, $BD2, $R3", []>;
class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
@@ -4023,7 +4196,7 @@ class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr12only>
- : InstRXa<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+ : InstRXa<opcode, (outs), (ins cls:$R1, (mode $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(set CC, (operator cls:$R1, (load mode:$XBD2)))]> {
let OpKey = mnemonic#"r"#cls;
@@ -4035,7 +4208,7 @@ class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, SDPatternOperator load, bits<5> bytes>
- : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
+ : InstRXE<opcode, (outs), (ins cls:$R1, (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(set CC, (operator cls:$R1, (load bdxaddr12only:$XBD2)))]> {
let OpKey = mnemonic#"r"#cls;
@@ -4049,7 +4222,7 @@ class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr20only>
- : InstRXYa<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+ : InstRXYa<opcode, (outs), (ins cls:$R1, (mode $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(set CC, (operator cls:$R1, (load mode:$XBD2)))]> {
let OpKey = mnemonic#"r"#cls;
@@ -4074,7 +4247,7 @@ multiclass CompareRXPair<string mnemonic, bits<8> rxOpcode, bits<16> rxyOpcode,
class CompareRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
bits<5> bytes, AddressingMode mode = bdaddr12only>
- : InstRSb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, mode:$BD2),
+ : InstRSb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, (mode $B2, $D2):$BD2),
mnemonic#"\t$R1, $M3, $BD2", []> {
let mayLoad = 1;
let AccessBytes = bytes;
@@ -4082,7 +4255,7 @@ class CompareRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
class CompareRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
bits<5> bytes, AddressingMode mode = bdaddr20only>
- : InstRSYb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, mode:$BD2),
+ : InstRSYb<opcode, (outs), (ins cls:$R1, imm32zx4:$M3, (mode $B2, $D2):$BD2),
mnemonic#"\t$R1, $M3, $BD2", []> {
let mayLoad = 1;
let AccessBytes = bytes;
@@ -4100,7 +4273,8 @@ multiclass CompareRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
class CompareSSb<string mnemonic, bits<8> opcode>
: InstSSb<opcode,
- (outs), (ins bdladdr12onlylen4:$BDL1, bdladdr12onlylen4:$BDL2),
+ (outs), (ins (bdladdr12onlylen4 $B1, $D1, $L1):$BDL1,
+ (bdladdr12onlylen4 $B2, $D2, $L2):$BDL2),
mnemonic#"\t$BDL1, $BDL2", []> {
let isCompare = 1;
let mayLoad = 1;
@@ -4109,7 +4283,7 @@ class CompareSSb<string mnemonic, bits<8> opcode>
class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
SDPatternOperator load, ImmOpWithPattern imm,
AddressingMode mode = bdaddr12only>
- : InstSI<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ : InstSI<opcode, (outs), (ins (mode $B1, $D1):$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(set CC, (operator (load mode:$BD1), imm:$I2))]> {
let isCompare = 1;
@@ -4118,7 +4292,7 @@ class CompareSI<string mnemonic, bits<8> opcode, SDPatternOperator operator,
class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
SDPatternOperator load, ImmOpWithPattern imm>
- : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+ : InstSIL<opcode, (outs), (ins (bdaddr12only $B1, $D1):$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(set CC, (operator (load bdaddr12only:$BD1), imm:$I2))]> {
let isCompare = 1;
@@ -4128,7 +4302,7 @@ class CompareSIL<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class CompareSIY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
SDPatternOperator load, ImmOpWithPattern imm,
AddressingMode mode = bdaddr20only>
- : InstSIY<opcode, (outs), (ins mode:$BD1, imm:$I2),
+ : InstSIY<opcode, (outs), (ins (mode $B1, $D1):$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(set CC, (operator (load mode:$BD1), imm:$I2))]> {
let isCompare = 1;
@@ -4185,12 +4359,13 @@ class CompareVRRh<string mnemonic, bits<16> opcode>
class TestInherentS<string mnemonic, bits<16> opcode,
SDPatternOperator operator>
: InstS<opcode, (outs), (ins), mnemonic, [(set CC, (operator))]> {
- let BD2 = 0;
+ let B2 = 0;
+ let D2 = 0;
}
class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls>
- : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
+ : InstRXE<opcode, (outs), (ins cls:$R1, (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $XBD2",
[(set CC, (operator cls:$R1, bdxaddr12only:$XBD2))]> {
let M3 = 0;
@@ -4198,12 +4373,12 @@ class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class TestBinarySIL<string mnemonic, bits<16> opcode,
SDPatternOperator operator, ImmOpWithPattern imm>
- : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+ : InstSIL<opcode, (outs), (ins (bdaddr12only $B1, $D1):$BD1, imm:$I2),
mnemonic#"\t$BD1, $I2",
[(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>;
class TestRSL<string mnemonic, bits<16> opcode>
- : InstRSLa<opcode, (outs), (ins bdladdr12onlylen4:$BDL1),
+ : InstRSLa<opcode, (outs), (ins (bdladdr12onlylen4 $B1, $D1, $L1):$BDL1),
mnemonic#"\t$BDL1", []> {
let mayLoad = 1;
}
@@ -4213,8 +4388,8 @@ class TestVRRg<string mnemonic, bits<16> opcode>
mnemonic#"\t$V1", []>;
class SideEffectTernarySSc<string mnemonic, bits<8> opcode>
- : InstSSc<opcode, (outs), (ins bdladdr12onlylen4:$BDL1,
- shift12only:$BD2, imm32zx4:$I3),
+ : InstSSc<opcode, (outs), (ins (bdladdr12onlylen4 $B1, $D1, $L1):$BDL1,
+ (shift12only $B2, $D2):$BD2, imm32zx4:$I3),
mnemonic#"\t$BDL1, $BD2, $I3", []>;
class SideEffectTernaryRRFa<string mnemonic, bits<16> opcode,
@@ -4289,7 +4464,8 @@ multiclass SideEffectTernaryMemMemRRFcOpt<string mnemonic, bits<16> opcode,
class SideEffectTernarySSF<string mnemonic, bits<12> opcode,
RegisterOperand cls>
: InstSSF<opcode, (outs),
- (ins bdaddr12only:$BD1, bdaddr12only:$BD2, cls:$R3),
+ (ins (bdaddr12only $B1, $D1):$BD1,
+ (bdaddr12only $B2, $D2):$BD2, cls:$R3),
mnemonic#"\t$BD1, $BD2, $R3", []>;
class TernaryRRFa<string mnemonic, bits<16> opcode,
@@ -4328,7 +4504,7 @@ class TernaryRRD<string mnemonic, bits<16> opcode, SDPatternOperator operator,
class TernaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
bits<5> bytes, AddressingMode mode = bdaddr12only>
: InstRSb<opcode, (outs cls:$R1),
- (ins cls:$R1src, imm32zx4:$M3, mode:$BD2),
+ (ins cls:$R1src, imm32zx4:$M3, (mode $B2, $D2):$BD2),
mnemonic#"\t$R1, $M3, $BD2", []> {
let Constraints = "$R1 = $R1src";
@@ -4340,7 +4516,7 @@ class TernaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
class TernaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
bits<5> bytes, AddressingMode mode = bdaddr20only>
: InstRSYb<opcode, (outs cls:$R1),
- (ins cls:$R1src, imm32zx4:$M3, mode:$BD2),
+ (ins cls:$R1src, imm32zx4:$M3, (mode $B2, $D2):$BD2),
mnemonic#"\t$R1, $M3, $BD2", []> {
let Constraints = "$R1 = $R1src";
@@ -4362,19 +4538,19 @@ multiclass TernaryRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode,
class SideEffectTernaryRS<string mnemonic, bits<8> opcode,
RegisterOperand cls1, RegisterOperand cls2>
: InstRSa<opcode, (outs),
- (ins cls1:$R1, cls2:$R3, bdaddr12only:$BD2),
+ (ins cls1:$R1, cls2:$R3, (bdaddr12only $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2", []>;
class SideEffectTernaryRSY<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2>
: InstRSYa<opcode, (outs),
- (ins cls1:$R1, cls2:$R3, bdaddr20only:$BD2),
+ (ins cls1:$R1, cls2:$R3, (bdaddr20only $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2", []>;
class SideEffectTernaryMemMemRS<string mnemonic, bits<8> opcode,
RegisterOperand cls1, RegisterOperand cls2>
: InstRSa<opcode, (outs cls1:$R1, cls2:$R3),
- (ins cls1:$R1src, cls2:$R3src, shift12only:$BD2),
+ (ins cls1:$R1src, cls2:$R3src, (shift12only $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2", []> {
let Constraints = "$R1 = $R1src, $R3 = $R3src";
let DisableEncoding = "$R1src, $R3src";
@@ -4383,7 +4559,7 @@ class SideEffectTernaryMemMemRS<string mnemonic, bits<8> opcode,
class SideEffectTernaryMemMemRSY<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2>
: InstRSYa<opcode, (outs cls1:$R1, cls2:$R3),
- (ins cls1:$R1src, cls2:$R3src, shift20only:$BD2),
+ (ins cls1:$R1src, cls2:$R3src, (shift20only $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2", []> {
let Constraints = "$R1 = $R1src, $R3 = $R3src";
let DisableEncoding = "$R1src, $R3src";
@@ -4393,7 +4569,7 @@ class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2,
SDPatternOperator load, bits<5> bytes>
: InstRXF<opcode, (outs cls1:$R1),
- (ins cls2:$R1src, cls2:$R3, bdxaddr12only:$XBD2),
+ (ins cls2:$R1src, cls2:$R3, (bdxaddr12only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$R1, $R3, $XBD2",
[(set cls1:$R1, (operator cls2:$R1src, cls2:$R3,
(load bdxaddr12only:$XBD2)))]> {
@@ -4593,7 +4769,7 @@ class TernaryVRReFloatGeneric<string mnemonic, bits<16> opcode>
class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, RegisterOperand cls, bits<4> type>
: InstVRSb<opcode, (outs tr1.op:$V1),
- (ins tr2.op:$V1src, cls:$R3, shift12only:$BD2),
+ (ins tr2.op:$V1src, cls:$R3, (shift12only $B2, $D2):$BD2),
mnemonic#"\t$V1, $R3, $BD2",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src),
cls:$R3,
@@ -4615,7 +4791,8 @@ class TernaryVRRj<string mnemonic, bits<16> opcode>
class TernaryVRSbGeneric<string mnemonic, bits<16> opcode>
: InstVRSb<opcode, (outs VR128:$V1),
- (ins VR128:$V1src, GR64:$R3, shift12only:$BD2, imm32zx4:$M4),
+ (ins VR128:$V1src, GR64:$R3, (shift12only $B2, $D2):$BD2,
+ imm32zx4:$M4),
mnemonic#"\t$V1, $R3, $BD2, $M4", []> {
let Constraints = "$V1 = $V1src";
let DisableEncoding = "$V1src";
@@ -4624,7 +4801,7 @@ class TernaryVRSbGeneric<string mnemonic, bits<16> opcode>
class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
ImmOpWithPattern index>
: InstVRV<opcode, (outs VR128:$V1),
- (ins VR128:$V1src, bdvaddr12only:$VBD2, index:$M3),
+ (ins VR128:$V1src, (bdvaddr12only $B2, $D2, $V2):$VBD2, index:$M3),
mnemonic#"\t$V1, $VBD2, $M3", []> {
let Constraints = "$V1 = $V1src";
let DisableEncoding = "$V1src";
@@ -4635,7 +4812,7 @@ class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
class TernaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<5> bytes, ImmOpWithPattern index>
: InstVRX<opcode, (outs tr1.op:$V1),
- (ins tr2.op:$V1src, bdxaddr12only:$XBD2, index:$M3),
+ (ins tr2.op:$V1src, (bdxaddr12only $B2, $D2, $X2):$XBD2, index:$M3),
mnemonic#"\t$V1, $XBD2, $M3",
[(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V1src),
bdxaddr12only:$XBD2,
@@ -4764,12 +4941,13 @@ multiclass SideEffectQuaternaryRRFbOpt<string mnemonic, bits<16> opcode,
class SideEffectQuaternarySSe<string mnemonic, bits<8> opcode,
RegisterOperand cls>
: InstSSe<opcode, (outs),
- (ins cls:$R1, bdaddr12only:$BD2, cls:$R3, bdaddr12only:$BD4),
+ (ins cls:$R1, (bdaddr12only $B2, $D2):$BD2, cls:$R3,
+ (bdaddr12only $B4, $D4):$BD4),
mnemonic#"\t$R1, $BD2, $R3, $BD4", []>;
class LoadAndOpRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, AddressingMode mode = bdaddr20only>
- : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R3, mode:$BD2),
+ : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R3, (mode $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2",
[(set cls:$R1, (operator mode:$BD2, cls:$R3))]> {
let mayLoad = 1;
@@ -4788,7 +4966,8 @@ class CmpSwapRRE<string mnemonic, bits<16> opcode,
class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls, AddressingMode mode = bdaddr12only>
- : InstRSa<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
+ : InstRSa<opcode, (outs cls:$R1),
+ (ins cls:$R1src, cls:$R3, (mode $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2",
[(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> {
let Constraints = "$R1 = $R1src";
@@ -4799,7 +4978,8 @@ class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
RegisterOperand cls, AddressingMode mode = bdaddr20only>
- : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
+ : InstRSYa<opcode, (outs cls:$R1),
+ (ins cls:$R1src, cls:$R3, (mode $B2, $D2):$BD2),
mnemonic#"\t$R1, $R3, $BD2",
[(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> {
let Constraints = "$R1 = $R1src";
@@ -4822,14 +5002,15 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
RegisterOperand cls2>
: InstRIEf<opcode, (outs cls1:$R1),
(ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
- imm32zx6:$I5),
+ imm32zx8:$I5),
mnemonic#"\t$R1, $R2, $I3, $I4, $I5", []> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
}
class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
- : InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2),
+ : InstRXYb<opcode, (outs),
+ (ins imm32zx4:$M1, (bdxaddr20only $B2, $D2, $X2):$XBD2),
mnemonic#"\t$M1, $XBD2",
[(operator imm32zx4_timm:$M1, bdxaddr20only:$XBD2)]>;
@@ -4846,7 +5027,8 @@ class PrefetchRILPC<string mnemonic, bits<12> opcode,
class BranchPreloadSMI<string mnemonic, bits<8> opcode>
: InstSMI<opcode, (outs),
- (ins imm32zx4:$M1, brtarget16bpp:$RI2, bdxaddr12only:$BD3),
+ (ins imm32zx4:$M1, brtarget16bpp:$RI2,
+ (bdaddr12only $B3, $D3):$BD3),
mnemonic#"\t$M1, $RI2, $BD3", []>;
class BranchPreloadMII<string mnemonic, bits<8> opcode>
@@ -4892,7 +5074,7 @@ class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
class UnaryRXYPseudo<string key, SDPatternOperator operator,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdxaddr20only>
- : Pseudo<(outs cls:$R1), (ins mode:$XBD2),
+ : Pseudo<(outs cls:$R1), (ins (mode $B2, $D2, $X2):$XBD2),
[(set cls:$R1, (operator mode:$XBD2))]> {
let OpKey = key#"r"#cls;
let OpType = "mem";
@@ -4944,7 +5126,7 @@ multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
// Mapping: <INSN>R -> MemFoldPseudo -> <INSN>
class MemFoldPseudo<string mnemonic, RegisterOperand cls, bits<5> bytes,
AddressingMode mode>
- : Pseudo<(outs cls:$R1), (ins cls:$R2, mode:$XBD2), []> {
+ : Pseudo<(outs cls:$R1), (ins cls:$R2, (mode $B2, $D2, $X2):$XBD2), []> {
let OpKey = !subst("mscrk", "msrkc",
!subst("msgcrk", "msgrkc",
mnemonic#"rk"#cls));
@@ -4966,7 +5148,8 @@ class MemFoldPseudo_FP<string mnemonic, RegisterOperand cls, bits<5> bytes,
class MemFoldPseudo_FPTern<string mnemonic, RegisterOperand cls, bits<5> bytes,
AddressingMode mode>
- : Pseudo<(outs cls:$R1), (ins cls:$R2, cls:$R3, mode:$XBD2), []> {
+ : Pseudo<(outs cls:$R1),
+ (ins cls:$R2, cls:$R3, (mode $B2, $D2, $X2):$XBD2), []> {
let OpKey = mnemonic#"r"#"MemFold"#cls;
let OpType = "mem";
let MemKey = mnemonic#cls;
@@ -4981,7 +5164,7 @@ class MemFoldPseudo_FPTern<string mnemonic, RegisterOperand cls, bits<5> bytes,
class MemFoldPseudo_CondMove<string mnemonic, RegisterOperand cls, bits<5> bytes,
AddressingMode mode>
: Pseudo<(outs cls:$R1),
- (ins cls:$R2, mode:$XBD2, cond4:$valid, cond4:$M3), []> {
+ (ins cls:$R2, (mode $B2, $D2):$BD2, cond4:$valid, cond4:$M3), []> {
let OpKey = !subst("loc", "sel", mnemonic)#"r"#cls;
let OpType = "mem";
let MemKey = mnemonic#cls;
@@ -5003,7 +5186,7 @@ class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
SDPatternOperator load, bits<5> bytes,
AddressingMode mode = bdxaddr20only>
- : Pseudo<(outs), (ins cls:$R1, mode:$XBD2),
+ : Pseudo<(outs), (ins cls:$R1, (mode $B2, $D2, $X2):$XBD2),
[(set CC, (operator cls:$R1, (load mode:$XBD2)))]> {
let mayLoad = 1;
let Has20BitOffset = 1;
@@ -5013,7 +5196,7 @@ class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
// Like TestBinarySIL, but expanded later.
class TestBinarySILPseudo<SDPatternOperator operator, ImmOpWithPattern imm>
- : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2),
+ : Pseudo<(outs), (ins (bdaddr12only $B1, $D1):$BD1, imm:$I2),
[(set CC, (operator bdaddr12only:$BD1, imm:$I2))]>;
// Like CondBinaryRRF, but expanded after RA depending on the choice of
@@ -5066,7 +5249,7 @@ class CondUnaryRSYPseudo<string mnemonic, SDPatternOperator operator,
RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
: Pseudo<(outs cls:$R1),
- (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3),
+ (ins cls:$R1src, (mode $B2, $D2):$BD2, cond4:$valid, cond4:$R3),
[(set cls:$R1,
(z_select_ccmask (operator mode:$BD2), cls:$R1src,
cond4:$valid, cond4:$R3))]> {
@@ -5085,7 +5268,8 @@ class CondUnaryRSYPseudo<string mnemonic, SDPatternOperator operator,
// register.
class CondStoreRSYPseudo<RegisterOperand cls, bits<5> bytes,
AddressingMode mode = bdaddr20only>
- : Pseudo<(outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3), []> {
+ : Pseudo<(outs),
+ (ins cls:$R1, (mode $B2, $D2):$BD2, cond4:$valid, cond4:$R3), []> {
let mayStore = 1;
let AccessBytes = bytes;
let CCMaskLast = 1;
@@ -5094,7 +5278,7 @@ class CondStoreRSYPseudo<RegisterOperand cls, bits<5> bytes,
// Like StoreRXY, but expanded after RA depending on the choice of register.
class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
bits<5> bytes, AddressingMode mode = bdxaddr20only>
- : Pseudo<(outs), (ins cls:$R1, mode:$XBD2),
+ : Pseudo<(outs), (ins cls:$R1, (mode $B2, $D2, $X2):$XBD2),
[(operator cls:$R1, mode:$XBD2)]> {
let mayStore = 1;
let Has20BitOffset = 1;
@@ -5107,7 +5291,7 @@ class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2>
: Pseudo<(outs cls1:$R1),
(ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
- imm32zx6:$I5),
+ imm32zx8:$I5),
[]> {
let Constraints = "$R1 = $R1src";
let DisableEncoding = "$R1src";
@@ -5211,13 +5395,13 @@ class UnaryAliasVRR<SDPatternOperator operator, TypedReg tr1, TypedReg tr2>
// An alias of a UnaryVRX, but with different register sizes.
class UnaryAliasVRX<SDPatternOperator operator, TypedReg tr,
AddressingMode mode = bdxaddr12only>
- : Alias<6, (outs tr.op:$V1), (ins mode:$XBD2),
+ : Alias<6, (outs tr.op:$V1), (ins (mode $B2, $D2, $X2):$XBD2),
[(set (tr.vt tr.op:$V1), (operator mode:$XBD2))]>;
// An alias of a StoreVRX, but with different register sizes.
class StoreAliasVRX<SDPatternOperator operator, TypedReg tr,
AddressingMode mode = bdxaddr12only>
- : Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2),
+ : Alias<6, (outs), (ins tr.op:$V1, (mode $B2, $D2, $X2):$XBD2),
[(operator (tr.vt tr.op:$V1), mode:$XBD2)]>;
// An alias of a BinaryRI, but with different register sizes.
@@ -5252,7 +5436,7 @@ class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2>
: Alias<6, (outs cls1:$R1),
(ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4,
- imm32zx6:$I5), []> {
+ imm32zx8:$I5), []> {
let Constraints = "$R1 = $R1src";
}
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 2b9210f102de..ac8c395f9064 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -1014,17 +1014,16 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
unsigned Opcode = MI.getOpcode();
// Check CC liveness if new instruction introduces a dead def of CC.
- MCRegUnitIterator CCUnit(MCRegister::from(SystemZ::CC), TRI);
SlotIndex MISlot = SlotIndex();
LiveRange *CCLiveRange = nullptr;
bool CCLiveAtMI = true;
if (LIS) {
MISlot = LIS->getSlotIndexes()->getInstructionIndex(MI).getRegSlot();
- CCLiveRange = &LIS->getRegUnit(*CCUnit);
+ auto CCUnits = TRI->regunits(MCRegister::from(SystemZ::CC));
+ assert(range_size(CCUnits) == 1 && "CC only has one reg unit.");
+ CCLiveRange = &LIS->getRegUnit(*CCUnits.begin());
CCLiveAtMI = CCLiveRange->liveAt(MISlot);
}
- ++CCUnit;
- assert(!CCUnit.isValid() && "CC only has one reg unit.");
if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
if (!CCLiveAtMI && (Opcode == SystemZ::LA || Opcode == SystemZ::LAY) &&
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 9ce75db6c177..bb883ea464d3 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -74,6 +74,17 @@ enum {
MO_INDNTPOFF = (2 << 0)
};
+// z/OS XPLink specific: classifies the types of
+// accesses to the ADA (Associated Data Area).
+// These enums contains values that overlap with the above MO_ enums,
+// but that's fine since the above enums are used with ELF,
+// while these values are used with z/OS.
+enum {
+ MO_ADA_DATA_SYMBOL_ADDR = 1,
+ MO_ADA_INDIRECT_FUNC_DESC,
+ MO_ADA_DIRECT_FUNC_DESC,
+};
+
// Classifies a branch.
enum BranchType {
// An instruction that branches on the current value of CC.
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index c53cb7cadadb..87eb3838aec4 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -112,7 +112,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
// condition mask set to "never". NOP_bare can't be an InstAlias since it
// would need R0D hard coded which is not part of ADDR64BitRegClass.
def NOP : InstAlias<"nop\t$XBD", (BCAsm 0, bdxaddr12only:$XBD), 0>;
-let isAsmParserOnly = 1, hasNoSchedulingInfo = 1, M1 = 0, XBD2 = 0 in
+let isAsmParserOnly = 1, hasNoSchedulingInfo = 1, M1 = 0, X2 = 0, B2 = 0, D2 = 0 in
def NOP_bare : InstRXb<0x47,(outs), (ins), "nop", []>;
def NOPR : InstAlias<"nopr\t$R", (BCRAsm 0, GR64:$R), 0>;
def NOPR_bare : InstAlias<"nopr", (BCRAsm 0, R0D), 0>;
@@ -281,6 +281,9 @@ let isCall = 1, Defs = [CC] in {
def BASR : CallRR <"basr", 0x0D>;
}
+// A symbol in the ADA (z/OS only).
+def adasym : Operand<i64>;
+
// z/OS XPLINK
let Predicates = [IsTargetXPLINK64] in {
let isCall = 1, Defs = [R7D, CC], Uses = [FPC] in {
@@ -293,6 +296,19 @@ let Predicates = [IsTargetXPLINK64] in {
let isCall = 1, Defs = [R3D, CC], Uses = [FPC] in {
def CallBASR_STACKEXT : Alias<4, (outs), (ins ADDR64:$R2), []>;
}
+
+ let hasNoSchedulingInfo = 1, Defs = [CC] in {
+ def ADA_ENTRY : Alias<12, (outs GR64:$Reg), (ins adasym:$addr,
+ ADDR64:$ADA, imm64:$Offset),
+ [(set i64:$Reg, (z_ada_entry i64:$addr,
+ i64:$ADA, i64:$Offset))]>;
+ }
+ let mayLoad = 1, AddedComplexity = 20, hasNoSchedulingInfo = 1, Defs = [CC] in {
+ def ADA_ENTRY_VALUE : Alias<12, (outs GR64:$Reg), (ins adasym:$addr,
+ ADDR64:$ADA, imm64:$Offset),
+ [(set i64:$Reg, (load (z_ada_entry
+ iPTR:$addr, iPTR:$ADA, i64:$Offset)))]>;
+ }
}
// Regular calls.
@@ -347,7 +363,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1 in {
let Predicates = [IsTargetXPLINK64] in {
// A return instruction (b 2(%r7)).
let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
- def Return_XPLINK : Alias<4, (outs), (ins), [(z_retflag)]>;
+ def Return_XPLINK : Alias<4, (outs), (ins), [(z_retglue)]>;
// A conditional return instruction (bc <cond>, 2(%r7)).
let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in
@@ -357,7 +373,7 @@ let Predicates = [IsTargetXPLINK64] in {
let Predicates = [IsTargetELF] in {
// A return instruction (br %r14).
let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
- def Return : Alias<2, (outs), (ins), [(z_retflag)]>;
+ def Return : Alias<2, (outs), (ins), [(z_retglue)]>;
// A conditional return instruction (bcr <cond>, %r14).
let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in
@@ -2221,7 +2237,7 @@ let isCodeGenOnly = 1, hasSideEffects = 1 in {
def InsnRIS : DirectiveInsnRIS<(outs),
(ins imm64zx48:$enc, AnyReg:$R1,
imm32sx8:$I2, imm32zx4:$M3,
- bdaddr12only:$BD4),
+ (bdaddr12only $B4, $D4):$BD4),
".insn ris,$enc,$R1,$I2,$M3,$BD4", []>;
def InsnRR : DirectiveInsnRR<(outs),
(ins imm64zx16:$enc, AnyReg:$R1, AnyReg:$R2),
@@ -2236,15 +2252,15 @@ let isCodeGenOnly = 1, hasSideEffects = 1 in {
def InsnRRS : DirectiveInsnRRS<(outs),
(ins imm64zx48:$enc, AnyReg:$R1,
AnyReg:$R2, imm32zx4:$M3,
- bdaddr12only:$BD4),
+ (bdaddr12only $B4, $D4):$BD4),
".insn rrs,$enc,$R1,$R2,$M3,$BD4", []>;
def InsnRS : DirectiveInsnRS<(outs),
(ins imm64zx32:$enc, AnyReg:$R1,
- AnyReg:$R3, bdaddr12only:$BD2),
+ AnyReg:$R3, (bdaddr12only $B2, $D2):$BD2),
".insn rs,$enc,$R1,$R3,$BD2", []>;
def InsnRSE : DirectiveInsnRSE<(outs),
(ins imm64zx48:$enc, AnyReg:$R1,
- AnyReg:$R3, bdaddr12only:$BD2),
+ AnyReg:$R3, (bdaddr12only $B2, $D2):$BD2),
".insn rse,$enc,$R1,$R3,$BD2", []>;
def InsnRSI : DirectiveInsnRSI<(outs),
(ins imm64zx48:$enc, AnyReg:$R1,
@@ -2252,47 +2268,47 @@ let isCodeGenOnly = 1, hasSideEffects = 1 in {
".insn rsi,$enc,$R1,$R3,$RI2", []>;
def InsnRSY : DirectiveInsnRSY<(outs),
(ins imm64zx48:$enc, AnyReg:$R1,
- AnyReg:$R3, bdaddr20only:$BD2),
+ AnyReg:$R3, (bdaddr20only $B2, $D2):$BD2),
".insn rsy,$enc,$R1,$R3,$BD2", []>;
def InsnRX : DirectiveInsnRX<(outs), (ins imm64zx32:$enc, AnyReg:$R1,
- bdxaddr12only:$XBD2),
+ (bdxaddr12only $B2, $D2, $X2):$XBD2),
".insn rx,$enc,$R1,$XBD2", []>;
def InsnRXE : DirectiveInsnRXE<(outs), (ins imm64zx48:$enc, AnyReg:$R1,
- bdxaddr12only:$XBD2),
+ (bdxaddr12only $B2, $D2, $X2):$XBD2),
".insn rxe,$enc,$R1,$XBD2", []>;
def InsnRXF : DirectiveInsnRXF<(outs),
(ins imm64zx48:$enc, AnyReg:$R1,
- AnyReg:$R3, bdxaddr12only:$XBD2),
+ AnyReg:$R3, (bdxaddr12only $B2, $D2, $X2):$XBD2),
".insn rxf,$enc,$R1,$R3,$XBD2", []>;
def InsnRXY : DirectiveInsnRXY<(outs), (ins imm64zx48:$enc, AnyReg:$R1,
- bdxaddr20only:$XBD2),
+ (bdxaddr20only $B2, $D2, $X2):$XBD2),
".insn rxy,$enc,$R1,$XBD2", []>;
def InsnS : DirectiveInsnS<(outs),
- (ins imm64zx32:$enc, bdaddr12only:$BD2),
+ (ins imm64zx32:$enc, (bdaddr12only $B2, $D2):$BD2),
".insn s,$enc,$BD2", []>;
def InsnSI : DirectiveInsnSI<(outs),
- (ins imm64zx32:$enc, bdaddr12only:$BD1,
+ (ins imm64zx32:$enc, (bdaddr12only $B1, $D1):$BD1,
imm32sx8:$I2),
".insn si,$enc,$BD1,$I2", []>;
def InsnSIY : DirectiveInsnSIY<(outs),
(ins imm64zx48:$enc,
- bdaddr20only:$BD1, imm32zx8:$I2),
+ (bdaddr20only $B1, $D1):$BD1, imm32zx8:$I2),
".insn siy,$enc,$BD1,$I2", []>;
def InsnSIL : DirectiveInsnSIL<(outs),
- (ins imm64zx48:$enc, bdaddr12only:$BD1,
+ (ins imm64zx48:$enc, (bdaddr12only $B1, $D1):$BD1,
imm32zx16:$I2),
".insn sil,$enc,$BD1,$I2", []>;
def InsnSS : DirectiveInsnSS<(outs),
- (ins imm64zx48:$enc, bdraddr12only:$RBD1,
- bdaddr12only:$BD2, AnyReg:$R3),
+ (ins imm64zx48:$enc, (bdraddr12only $B1, $D1, $R1):$RBD1,
+ (bdaddr12only $B2, $D2):$BD2, AnyReg:$R3),
".insn ss,$enc,$RBD1,$BD2,$R3", []>;
def InsnSSE : DirectiveInsnSSE<(outs),
(ins imm64zx48:$enc,
- bdaddr12only:$BD1,bdaddr12only:$BD2),
+ (bdaddr12only $B1, $D1):$BD1,(bdaddr12only $B2, $D2):$BD2),
".insn sse,$enc,$BD1,$BD2", []>;
def InsnSSF : DirectiveInsnSSF<(outs),
- (ins imm64zx48:$enc, bdaddr12only:$BD1,
- bdaddr12only:$BD2, AnyReg:$R3),
+ (ins imm64zx48:$enc, (bdaddr12only $B1, $D1):$BD1,
+ (bdaddr12only $B2, $D2):$BD2, AnyReg:$R3),
".insn ssf,$enc,$BD1,$BD2,$R3", []>;
def InsnVRI : DirectiveInsnVRI<(outs),
(ins imm64zx48:$enc, VR128:$V1, VR128:$V2,
@@ -2305,19 +2321,19 @@ let isCodeGenOnly = 1, hasSideEffects = 1 in {
".insn vrr,$enc,$V1,$V2,$V3,$M4,$M5,$M6", []>;
def InsnVRS : DirectiveInsnVRS<(outs),
(ins imm64zx48:$enc, AnyReg:$R1, VR128:$V3,
- bdaddr12only:$BD2, imm32zx4:$M4),
+ (bdaddr12only $B2, $D2):$BD2, imm32zx4:$M4),
".insn vrs,$enc,$BD2,$M4", []>;
def InsnVRV : DirectiveInsnVRV<(outs),
(ins imm64zx48:$enc, VR128:$V1,
- bdvaddr12only:$VBD2, imm32zx4:$M3),
+ (bdvaddr12only $B2, $D2, $V2):$VBD2, imm32zx4:$M3),
".insn vrv,$enc,$V1,$VBD2,$M3", []>;
def InsnVRX : DirectiveInsnVRX<(outs),
(ins imm64zx48:$enc, VR128:$V1,
- bdxaddr12only:$XBD2, imm32zx4:$M3),
+ (bdxaddr12only $B2, $D2, $X2):$XBD2, imm32zx4:$M3),
".insn vrx,$enc,$V1,$XBD2,$M3", []>;
def InsnVSI : DirectiveInsnVSI<(outs),
(ins imm64zx48:$enc, VR128:$V1,
- bdaddr12only:$BD2, imm32zx8:$I3),
+ (bdaddr12only $B2, $D2):$BD2, imm32zx8:$I3),
".insn vsi,$enc,$V1,$BD2,$I3", []>;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 2e9524a44f9c..82863d7838a9 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -113,7 +113,7 @@ let Predicates = [FeatureVector] in {
// Load count to block boundary.
let Defs = [CC] in
def LCBB : InstRXE<0xE727, (outs GR32:$R1),
- (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+ (ins (bdxaddr12only $B2, $D2, $X2):$XBD2, imm32zx4:$M3),
"lcbb\t$R1, $XBD2, $M3",
[(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2,
imm32zx4_timm:$M3))]>;
diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index 333195989a11..be7012a37a3d 100644
--- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -27,6 +27,9 @@ struct GPRRegs {
class SystemZMachineFunctionInfo : public MachineFunctionInfo {
virtual void anchor();
+ /// Size of expected parameter area for current function. (Fixed args only).
+ unsigned SizeOfFnParams;
+
SystemZ::GPRRegs SpillGPRRegs;
SystemZ::GPRRegs RestoreGPRRegs;
Register VarArgsFirstGPR;
@@ -35,17 +38,25 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
unsigned RegSaveFrameIndex;
int FramePointerSaveIndex;
unsigned NumLocalDynamics;
+ /// z/OS XPLINK ABI: incoming ADA virtual register.
+ Register VRegADA;
public:
SystemZMachineFunctionInfo(const Function &F, const TargetSubtargetInfo *STI)
- : VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0),
- RegSaveFrameIndex(0), FramePointerSaveIndex(0), NumLocalDynamics(0) {}
+ : SizeOfFnParams(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0),
+ VarArgsFrameIndex(0), RegSaveFrameIndex(0), FramePointerSaveIndex(0),
+ NumLocalDynamics(0) {}
MachineFunctionInfo *
clone(BumpPtrAllocator &Allocator, MachineFunction &DestMF,
const DenseMap<MachineBasicBlock *, MachineBasicBlock *> &Src2DstMBB)
const override;
+ // z/OS: Get and set the size of the expected parameter area for the
+ // current function. (ie. Size of param area in caller).
+ unsigned getSizeOfFnParams() const { return SizeOfFnParams; }
+ void setSizeOfFnParams(unsigned Size) { SizeOfFnParams = Size; }
+
// Get and set the first and last call-saved GPR that should be saved by
// this function and the SP offset for the STMG. These are 0 if no GPRs
// need to be saved or restored.
@@ -91,6 +102,11 @@ public:
// Count number of local-dynamic TLS symbols used.
unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
+
+ // Get and set the function's incoming special XPLINK ABI defined ADA
+ // register.
+ Register getADAVirtualRegister() const { return VRegADA; }
+ void setADAVirtualRegister(Register Reg) { VRegADA = Reg; }
};
} // end namespace llvm
diff --git a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h
index 0d5cc2e03e8d..e97092409ce9 100644
--- a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h
+++ b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -14,14 +14,14 @@
// region of each MBB, so that a successor block can learn from it.
//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
+
#include "SystemZHazardRecognizer.h"
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include <set>
-#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
-#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
-
using namespace llvm;
namespace llvm {
diff --git a/llvm/lib/Target/SystemZ/SystemZOperands.td b/llvm/lib/Target/SystemZ/SystemZOperands.td
index a883daad73e7..c92e0abe38ac 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperands.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -23,6 +23,7 @@ class ImmediateTLSAsmOperand<string name>
class ImmediateOp<ValueType vt, string asmop> : Operand<vt> {
let PrintMethod = "print"#asmop#"Operand";
+ let EncoderMethod = "getImmOpValue<SystemZ::FK_390_"#asmop#">";
let DecoderMethod = "decode"#asmop#"Operand";
let ParserMatchClass = !cast<AsmOperandClass>(asmop);
let OperandType = "OPERAND_IMMEDIATE";
@@ -105,9 +106,6 @@ class AddressOperand<string bitsize, string dispsize, string length,
string format, dag operands>
: Operand<!cast<ValueType>("i"#bitsize)> {
let PrintMethod = "print"#format#"Operand";
- let EncoderMethod = "get"#format#dispsize#length#"Encoding";
- let DecoderMethod =
- "decode"#format#bitsize#"Disp"#dispsize#length#"Operand";
let OperandType = "OPERAND_MEMORY";
let MIOperandInfo = operands;
let ParserMatchClass =
@@ -151,7 +149,7 @@ class BDLMode<string type, string bitsize, string dispsize, string suffix,
"BDLAddr",
(ops !cast<RegisterOperand>("ADDR"#bitsize),
!cast<Operand>("disp"#dispsize#"imm"#bitsize),
- !cast<Operand>("imm"#bitsize))>;
+ !cast<Operand>("len"#lensize#"imm"#bitsize))>;
// A BDMode paired with a register length operand.
class BDRMode<string type, string bitsize, string dispsize, string suffix>
@@ -300,7 +298,6 @@ def U1Imm : ImmediateAsmOperand<"U1Imm">;
def U2Imm : ImmediateAsmOperand<"U2Imm">;
def U3Imm : ImmediateAsmOperand<"U3Imm">;
def U4Imm : ImmediateAsmOperand<"U4Imm">;
-def U6Imm : ImmediateAsmOperand<"U6Imm">;
def S8Imm : ImmediateAsmOperand<"S8Imm">;
def U8Imm : ImmediateAsmOperand<"U8Imm">;
def U12Imm : ImmediateAsmOperand<"U12Imm">;
@@ -357,10 +354,6 @@ defm imm32zx4even : Immediate<i32, [{
return isUInt<4>(N->getZExtValue());
}], UIMM8EVEN, "U4Imm">;
-defm imm32zx6 : Immediate<i32, [{
- return isUInt<6>(N->getZExtValue());
-}], NOOP_SDNodeXForm, "U6Imm">;
-
defm imm32sx8 : Immediate<i32, [{
return isInt<8>(N->getSExtValue());
}], SIMM8, "S8Imm">;
@@ -512,8 +505,18 @@ defm imm64zx48 : Immediate<i64, [{
return isUInt<64>(N->getZExtValue());
}], UIMM48, "U48Imm">;
-let OperandType = "OPERAND_IMMEDIATE" in
- def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>;
+class Imm64 : ImmLeaf<i64, [{}]>, Operand<i64> {
+ let OperandType = "OPERAND_IMMEDIATE";
+}
+def imm64 : Imm64;
+def len4imm64 : Imm64 {
+ let EncoderMethod = "getLenEncoding<SystemZ::FK_390_U4Imm>";
+ let DecoderMethod = "decodeLenOperand<4>";
+}
+def len8imm64 : Imm64 {
+ let EncoderMethod = "getLenEncoding<SystemZ::FK_390_U8Imm>";
+ let DecoderMethod = "decodeLenOperand<8>";
+}
//===----------------------------------------------------------------------===//
// Floating-point immediates
@@ -588,12 +591,18 @@ def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> {
//===----------------------------------------------------------------------===//
// 12-bit displacement operands.
-def disp12imm32 : Operand<i32>;
-def disp12imm64 : Operand<i64>;
+let EncoderMethod = "getImmOpValue<SystemZ::FK_390_U12Imm>",
+ DecoderMethod = "decodeU12ImmOperand" in {
+ def disp12imm32 : Operand<i32>;
+ def disp12imm64 : Operand<i64>;
+}
// 20-bit displacement operands.
-def disp20imm32 : Operand<i32>;
-def disp20imm64 : Operand<i64>;
+let EncoderMethod = "getImmOpValue<SystemZ::FK_390_S20Imm>",
+ DecoderMethod = "decodeS20ImmOperand" in {
+ def disp20imm32 : Operand<i32>;
+ def disp20imm64 : Operand<i64>;
+}
def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">;
def BDAddr32Disp20 : AddressAsmOperand<"BDAddr", "32", "20">;
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 4091c49cec28..6713cac2a780 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -127,6 +127,11 @@ def SDT_ZTBegin : SDTypeProfile<1, 2,
[SDTCisVT<0, i32>,
SDTCisPtrTy<1>,
SDTCisVT<2, i32>]>;
+def SDT_ZADAENTRY : SDTypeProfile<1, 3,
+ [SDTCisPtrTy<0>,
+ SDTCisPtrTy<1>,
+ SDTCisPtrTy<2>,
+ SDTCisVT<3, i64>]>;
def SDT_ZTEnd : SDTypeProfile<1, 0,
[SDTCisVT<0, i32>]>;
def SDT_ZInsertVectorElt : SDTypeProfile<1, 3,
@@ -235,7 +240,7 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>;
// Nodes for SystemZISD::*. See SystemZISelLowering.h for more details.
-def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone,
+def z_retglue : SDNode<"SystemZISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def z_call : SDNode<"SystemZISD::CALL", SDT_ZCall,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
@@ -433,6 +438,9 @@ def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin,
def z_tend : SDNode<"SystemZISD::TEND", SDT_ZTEnd,
[SDNPHasChain, SDNPSideEffect]>;
+def z_ada_entry : SDNode<"SystemZISD::ADA_ENTRY",
+ SDT_ZADAENTRY>;
+
def z_vshl : SDNode<"ISD::SHL", SDT_ZVecBinary>;
def z_vsra : SDNode<"ISD::SRA", SDT_ZVecBinary>;
def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>;
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 7f3d8e8d311e..4d6b94da3a27 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -30,12 +30,12 @@ static const TargetRegisterClass *getRC32(MachineOperand &MO,
const TargetRegisterClass *RC = MRI->getRegClass(MO.getReg());
if (SystemZ::GR32BitRegClass.hasSubClassEq(RC) ||
- MO.getSubReg() == SystemZ::subreg_l32 ||
- MO.getSubReg() == SystemZ::subreg_hl32)
+ MO.getSubReg() == SystemZ::subreg_ll32 ||
+ MO.getSubReg() == SystemZ::subreg_l32)
return &SystemZ::GR32BitRegClass;
if (SystemZ::GRH32BitRegClass.hasSubClassEq(RC) ||
- MO.getSubReg() == SystemZ::subreg_h32 ||
- MO.getSubReg() == SystemZ::subreg_hh32)
+ MO.getSubReg() == SystemZ::subreg_lh32 ||
+ MO.getSubReg() == SystemZ::subreg_h32)
return &SystemZ::GRH32BitRegClass;
if (VRM && VRM->hasPhys(MO.getReg())) {
@@ -430,10 +430,9 @@ bool SystemZRegisterInfo::shouldCoalesce(MachineInstr *MI,
for (; MII != MEE; ++MII) {
for (const MachineOperand &MO : MII->operands())
if (MO.isReg() && MO.getReg().isPhysical()) {
- for (MCSuperRegIterator SI(MO.getReg(), this, true/*IncludeSelf*/);
- SI.isValid(); ++SI)
- if (NewRC->contains(*SI)) {
- PhysClobbered.set(*SI);
+ for (MCPhysReg SI : superregs_inclusive(MO.getReg()))
+ if (NewRC->contains(SI)) {
+ PhysClobbered.set(SI);
break;
}
}
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 19305d4e8957..cbc02c73f1ac 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -24,10 +24,10 @@ namespace SystemZ {
// Return the subreg to use for referring to the even and odd registers
// in a GR128 pair. Is32Bit says whether we want a GR32 or GR64.
inline unsigned even128(bool Is32bit) {
- return Is32bit ? subreg_hl32 : subreg_h64;
+ return Is32bit ? subreg_l32 : subreg_h64;
}
inline unsigned odd128(bool Is32bit) {
- return Is32bit ? subreg_l32 : subreg_l64;
+ return Is32bit ? subreg_ll32 : subreg_l64;
}
// Reg should be a 32-bit GPR. Return true if it is a high register rather
@@ -89,6 +89,8 @@ public:
int getAddressOfCalleeRegister() { return SystemZ::R6D; };
+ int getADARegister() { return SystemZ::R5D; }
+
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const final;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index 00005a6d6179..5d66501172b2 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -20,12 +20,12 @@ class SystemZRegWithSubregs<string n, list<Register> subregs>
}
let Namespace = "SystemZ" in {
-def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_ll32.
-def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32.
+def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_hl32.
+def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_hh32.
def subreg_l64 : SubRegIndex<64, 0>;
def subreg_h64 : SubRegIndex<64, 64>;
-def subreg_hh32 : ComposedSubRegIndex<subreg_h64, subreg_h32>;
-def subreg_hl32 : ComposedSubRegIndex<subreg_h64, subreg_l32>;
+def subreg_lh32 : ComposedSubRegIndex<subreg_l64, subreg_h32>;
+def subreg_ll32 : ComposedSubRegIndex<subreg_l64, subreg_l32>;
}
// Define a register class that contains values of types TYPES and an
@@ -73,9 +73,9 @@ class GPR64<bits<16> num, string n, GPR32 low, GPR32 high>
// 8 even-odd pairs of GPR64s.
class GPR128<bits<16> num, string n, GPR64 low, GPR64 high>
- : SystemZRegWithSubregs<n, [low, high]> {
+ : SystemZRegWithSubregs<n, [high, low]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_l64, subreg_h64];
+ let SubRegIndices = [subreg_h64, subreg_l64];
let CoveredBySubRegs = 1;
}
@@ -215,9 +215,9 @@ class FPR64<bits<16> num, string n, FPR32 high>
// 8 pairs of FPR64s, with a one-register gap inbetween.
class FPR128<bits<16> num, string n, FPR64 low, FPR64 high>
- : SystemZRegWithSubregs<n, [low, high]> {
+ : SystemZRegWithSubregs<n, [high, low]> {
let HWEncoding = num;
- let SubRegIndices = [subreg_l64, subreg_h64];
+ let SubRegIndices = [subreg_h64, subreg_l64];
let CoveredBySubRegs = 1;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index 25b013ba1876..e008ce859a9a 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -8,6 +8,7 @@
#include "SystemZSubtarget.h"
#include "MCTargetDesc/SystemZMCTargetDesc.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"
@@ -77,8 +78,42 @@ bool SystemZSubtarget::enableSubRegLiveness() const {
return UseSubRegLiveness;
}
+bool SystemZSubtarget::isAddressedViaADA(const GlobalValue *GV) const {
+ if (const auto *GO = dyn_cast<GlobalObject>(GV)) {
+ // A R/O variable is placed in code section. If the R/O variable has as
+ // least two byte alignment, then generated code can use relative
+ // instructions to address the variable. Otherwise, use the ADA to address
+ // the variable.
+ if (GO->getAlignment() & 0x1) {
+ return true;
+ }
+
+ // getKindForGlobal only works with definitions
+ if (GO->isDeclaration()) {
+ return true;
+ }
+
+ // check AvailableExternallyLinkage here as getKindForGlobal() asserts
+ if (GO->hasAvailableExternallyLinkage()) {
+ return true;
+ }
+
+ SectionKind GOKind = TargetLoweringObjectFile::getKindForGlobal(
+ GO, TLInfo.getTargetMachine());
+ if (!GOKind.isReadOnly()) {
+ return true;
+ }
+
+ return false; // R/O variable with multiple of 2 byte alignment
+ }
+ return true;
+}
+
bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
CodeModel::Model CM) const {
+ if (isTargetzOS())
+ return !isAddressedViaADA(GV);
+
// PC32DBL accesses require the low bit to be clear.
//
// FIXME: Explicitly check for functions: the datalayout is currently
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index 9d4c1f0fe710..5fa7c8f194eb 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -18,9 +18,9 @@
#include "SystemZInstrInfo.h"
#include "SystemZRegisterInfo.h"
#include "SystemZSelectionDAGInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/TargetParser/Triple.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
@@ -106,6 +106,8 @@ public:
bool GETTER() const { return ATTRIBUTE; }
#include "SystemZGenSubtargetInfo.inc"
+ bool isAddressedViaADA(const GlobalValue *GV) const;
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 1c82e6940033..62c59ddc3f06 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -36,7 +36,7 @@ public:
/// \name Scalar TTI Implementations
/// @{
- unsigned getInliningThresholdMultiplier() { return 3; }
+ unsigned getInliningThresholdMultiplier() const { return 3; }
unsigned adjustInliningThreshold(const CallBase *CB) const;
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
diff --git a/llvm/lib/Target/Target.cpp b/llvm/lib/Target/Target.cpp
index 1094b83d72eb..f916a77204fc 100644
--- a/llvm/lib/Target/Target.cpp
+++ b/llvm/lib/Target/Target.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm-c/Target.h"
-#include "llvm-c/Initialization.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LLVMContext.h"
@@ -40,10 +39,6 @@ void llvm::initializeTarget(PassRegistry &Registry) {
initializeTargetTransformInfoWrapperPassPass(Registry);
}
-void LLVMInitializeTarget(LLVMPassRegistryRef R) {
- initializeTarget(*unwrap(R));
-}
-
LLVMTargetDataRef LLVMGetModuleDataLayout(LLVMModuleRef M) {
return wrap(&unwrap(M)->getDataLayout());
}
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index 8d1ad617889c..bc465168f1db 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
using namespace llvm;
@@ -38,6 +39,16 @@ TargetMachine::TargetMachine(const Target &T, StringRef DataLayoutString,
TargetMachine::~TargetMachine() = default;
+bool TargetMachine::isLargeData() const {
+ if (getTargetTriple().getArch() != Triple::x86_64)
+ return false;
+ // Large data under the large code model still needs to be thought about, so
+ // restrict this to medium.
+ if (getCodeModel() != CodeModel::Medium)
+ return false;
+ return true;
+}
+
bool TargetMachine::isPositionIndependent() const {
return getRelocationModel() == Reloc::PIC_;
}
@@ -143,13 +154,7 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
return false;
}
-bool TargetMachine::useEmulatedTLS() const {
- // Returns Options.EmulatedTLS if the -emulated-tls or -no-emulated-tls
- // was specified explicitly; otherwise uses target triple to decide default.
- if (Options.ExplicitEmulatedTLS)
- return Options.EmulatedTLS;
- return getTargetTriple().hasDefaultEmulatedTLS();
-}
+bool TargetMachine::useEmulatedTLS() const { return Options.EmulatedTLS; }
TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const {
bool IsPIE = GV->getParent()->getPIELevel() != PIELevel::Default;
diff --git a/llvm/lib/Target/TargetMachineC.cpp b/llvm/lib/Target/TargetMachineC.cpp
index aa9c9d176db5..7cd29b40da12 100644
--- a/llvm/lib/Target/TargetMachineC.cpp
+++ b/llvm/lib/Target/TargetMachineC.cpp
@@ -16,13 +16,13 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/CodeGenCWrappers.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <cstring>
#include <optional>
diff --git a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
index 0a72f29659b0..a9cedf1dd97c 100644
--- a/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
+++ b/llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp
@@ -62,7 +62,7 @@ class VEAsmParser : public MCTargetAsmParser {
SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
SMLoc NameLoc, OperandVector &Operands) override;
- bool ParseDirective(AsmToken DirectiveID) override;
+ ParseStatus parseDirective(AsmToken DirectiveID) override;
unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
unsigned Kind) override;
@@ -998,7 +998,7 @@ bool VEAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
return false;
}
-bool VEAsmParser::ParseDirective(AsmToken DirectiveID) {
+ParseStatus VEAsmParser::parseDirective(AsmToken DirectiveID) {
std::string IDVal = DirectiveID.getIdentifier().lower();
// Defines VE specific directives. Reference is "Vector Engine Assembly
@@ -1018,7 +1018,7 @@ bool VEAsmParser::ParseDirective(AsmToken DirectiveID) {
return parseLiteralValues(8, DirectiveID.getLoc());
// Let the MC layer to handle other directives.
- return true;
+ return ParseStatus::NoMatch;
}
/// parseLiteralValues
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
index 9f29fc092c69..0c045a5badb8 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCAsmInfo.cpp
@@ -11,11 +11,11 @@
//===----------------------------------------------------------------------===//
#include "VEMCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
index 45facd34f84e..bb643d23e618 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCCodeEmitter.cpp
@@ -48,7 +48,7 @@ public:
VEMCCodeEmitter &operator=(const VEMCCodeEmitter &) = delete;
~VEMCCodeEmitter() override = default;
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -77,11 +77,12 @@ public:
} // end anonymous namespace
-void VEMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void VEMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
- support::endian::write<uint64_t>(OS, Bits, support::little);
+ support::endian::write<uint64_t>(CB, Bits, support::little);
++MCNumEmitted; // Keep track of the # of mi's emitted.
}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h
index 2b0c44576099..2da956d739a6 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCExpr.h
@@ -82,8 +82,6 @@ public:
return E->getKind() == MCExpr::Target;
}
- static bool classof(const VEMCExpr *) { return true; }
-
static VariantKind parseVariantKind(StringRef name);
static bool printVariantKind(raw_ostream &OS, VariantKind Kind);
static void printVariantKindSuffix(raw_ostream &OS, VariantKind Kind);
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
index 5a562d77f941..6611e4c42eb2 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
+++ b/llvm/lib/Target/VE/MCTargetDesc/VEMCTargetDesc.cpp
@@ -73,6 +73,10 @@ static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
return new VETargetAsmStreamer(S, OS);
}
+static MCTargetStreamer *createNullTargetStreamer(MCStreamer &S) {
+ return new VETargetStreamer(S);
+}
+
static MCInstPrinter *createVEMCInstPrinter(const Triple &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
@@ -108,6 +112,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeVETargetMC() {
// Register the asm streamer.
TargetRegistry::RegisterAsmTargetStreamer(*T, createTargetAsmStreamer);
+ // Register the null streamer.
+ TargetRegistry::RegisterNullTargetStreamer(*T, createNullTargetStreamer);
+
// Register the MCInstPrinter
TargetRegistry::RegisterMCInstPrinter(*T, createVEMCInstPrinter);
}
diff --git a/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.h b/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.h
index 1704e0b58559..ab85c29aaaf5 100644
--- a/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.h
+++ b/llvm/lib/Target/VE/MCTargetDesc/VETargetStreamer.h
@@ -20,9 +20,9 @@ class VETargetStreamer : public MCTargetStreamer {
public:
VETargetStreamer(MCStreamer &S);
/// Emit ".register <reg>, #ignore".
- virtual void emitVERegisterIgnore(unsigned reg) = 0;
+ virtual void emitVERegisterIgnore(unsigned reg){};
/// Emit ".register <reg>, #scratch".
- virtual void emitVERegisterScratch(unsigned reg) = 0;
+ virtual void emitVERegisterScratch(unsigned reg){};
};
// This part is for ascii assembly output
diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h
index ded0460f97d6..e82cb1901633 100644
--- a/llvm/lib/Target/VE/VE.h
+++ b/llvm/lib/Target/VE/VE.h
@@ -451,8 +451,8 @@ inline static uint64_t val2MImm(uint64_t Val) {
if (Val == 0)
return 0; // (0)1
if (Val & (UINT64_C(1) << 63))
- return countLeadingOnes(Val); // (m)1
- return countLeadingZeros(Val) | 0x40; // (m)0
+ return llvm::countl_one(Val); // (m)1
+ return llvm::countl_zero(Val) | 0x40; // (m)0
}
/// mimm2Val - Convert a target MImm immediate to an integer immediate value.
diff --git a/llvm/lib/Target/VE/VE.td b/llvm/lib/Target/VE/VE.td
index 16d6c36ee4ab..1cb4a642632c 100644
--- a/llvm/lib/Target/VE/VE.td
+++ b/llvm/lib/Target/VE/VE.td
@@ -30,9 +30,7 @@ include "VERegisterInfo.td"
include "VECallingConv.td"
include "VEInstrInfo.td"
-def VEInstrInfo : InstrInfo {
- let useDeprecatedPositionallyEncodedOperands = 1;
-}
+def VEInstrInfo : InstrInfo {}
def VEAsmParser : AsmParser {
// Use both VE register name matcher to accept "S0~S63" register names
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index f373b3643679..1ebfa5330d42 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -379,7 +379,7 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
// Analyze return values.
CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Copy the result values into the output registers.
@@ -422,20 +422,20 @@ VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
llvm_unreachable("Unknown loc info!");
}
- Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Glue);
// Guarantee that all emitted copies are stuck together with flags.
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
RetOps[0] = Chain; // Update chain.
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ // Add the glue if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
- return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
+ return DAG.getNode(VEISD::RET_GLUE, DL, MVT::Other, RetOps);
}
SDValue VETargetLowering::LowerFormalArguments(
@@ -615,7 +615,7 @@ SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
// Get the size of the outgoing arguments stack space requirement.
- unsigned ArgsSize = CCInfo.getNextStackOffset();
+ unsigned ArgsSize = CCInfo.getStackSize();
// Keep stack frames 16-byte aligned.
ArgsSize = alignTo(ArgsSize, 16);
@@ -948,7 +948,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
TARGET_NODE_CASE(GLOBAL_BASE_REG)
TARGET_NODE_CASE(Hi)
TARGET_NODE_CASE(Lo)
- TARGET_NODE_CASE(RET_FLAG)
+ TARGET_NODE_CASE(RET_GLUE)
TARGET_NODE_CASE(TS1AM)
TARGET_NODE_CASE(VEC_UNPACK_LO)
TARGET_NODE_CASE(VEC_UNPACK_HI)
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index ee913659ffdc..fa4ced5e2f9e 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -41,7 +41,7 @@ enum NodeType : unsigned {
GLOBAL_BASE_REG, // Global base reg for PIC.
Hi, // Hi/Lo operations, typically on a global address.
Lo, // Hi/Lo operations, typically on a global address.
- RET_FLAG, // Return with a flag operand.
+ RET_GLUE, // Return with a flag operand.
TS1AM, // A TS1AM instruction used for 1/2 bytes swap.
VEC_UNPACK_LO, // unpack the lo v256 slice of a packed v512 vector.
VEC_UNPACK_HI, // unpack the hi v256 slice of a packed v512 vector.
diff --git a/llvm/lib/Target/VE/VEInstrFormats.td b/llvm/lib/Target/VE/VEInstrFormats.td
index f43c9755f1b9..a2d2ae929dbd 100644
--- a/llvm/lib/Target/VE/VEInstrFormats.td
+++ b/llvm/lib/Target/VE/VEInstrFormats.td
@@ -117,7 +117,7 @@ class CF<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern = []>
bits<1> cx = 0;
bits<1> cx2 = 0;
bits<2> bpf = 0;
- bits<4> cf;
+ bits<4> cond;
bits<1> cy = 1;
bits<7> sy;
bits<1> cz = 1;
@@ -127,7 +127,7 @@ class CF<bits<8>opVal, dag outs, dag ins, string asmstr, list<dag> pattern = []>
let Inst{55} = cx;
let Inst{54} = cx2;
let Inst{53-52} = bpf;
- let Inst{51-48} = cf;
+ let Inst{51-48} = cond;
let Inst{47} = cy;
let Inst{46-40} = sy;
let Inst{39} = cz;
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 40cfa4b9211b..166598cab41d 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -441,7 +441,7 @@ def call : SDNode<"VEISD::CALL", SDT_SPCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
-def retflag : SDNode<"VEISD::RET_FLAG", SDTNone,
+def retglue : SDNode<"VEISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def getGOT : Operand<iPTR>;
@@ -760,14 +760,14 @@ multiclass CVTRDm<string opcStr, bits<8> opc, RegisterClass RCo,
def r : RR<opc, (outs RCo:$sx), (ins RDOp:$rd, RCi:$sy),
!strconcat(opcStr, "${rd} $sx, $sy")> {
bits<4> rd;
- let sz{5-4} = 0;
+ let sz{6-4} = 0;
let sz{3-0} = rd;
}
let cy = 0 in
def i : RR<opc, (outs RCo:$sx), (ins RDOp:$rd, simm7:$sy),
!strconcat(opcStr, "${rd} $sx, $sy")> {
bits<4> rd;
- let sz{5-4} = 0;
+ let sz{6-4} = 0;
let sz{3-0} = rd;
}
}
@@ -790,33 +790,33 @@ multiclass CVTm<string opcStr, bits<8> opc, RegisterClass RCo, ValueType Tyo,
// e.g. PFCH
let sx = 0, hasSideEffects = 0 in
multiclass PFCHm<string opcStr, bits<8>opc> {
- def rri : RM<opc, (outs), (ins MEMrri:$addr), !strconcat(opcStr, " $addr"),
+ def rri : RM<opc, (outs), (ins (MEMrri $sz, $sy, $imm32):$addr), !strconcat(opcStr, " $addr"),
[(prefetch ADDRrri:$addr, imm, imm, (i32 1))]>;
let cy = 0 in
- def rii : RM<opc, (outs), (ins MEMrii:$addr), !strconcat(opcStr, " $addr"),
+ def rii : RM<opc, (outs), (ins (MEMrii $sz, $sy, $imm32):$addr), !strconcat(opcStr, " $addr"),
[(prefetch ADDRrii:$addr, imm, imm, (i32 1))]>;
let cz = 0 in
- def zri : RM<opc, (outs), (ins MEMzri:$addr), !strconcat(opcStr, " $addr"),
+ def zri : RM<opc, (outs), (ins (MEMzri $sz, $sy, $imm32):$addr), !strconcat(opcStr, " $addr"),
[(prefetch ADDRzri:$addr, imm, imm, (i32 1))]>;
let cy = 0, cz = 0 in
- def zii : RM<opc, (outs), (ins MEMzii:$addr), !strconcat(opcStr, " $addr"),
+ def zii : RM<opc, (outs), (ins (MEMzii $sz, $sy, $imm32):$addr), !strconcat(opcStr, " $addr"),
[(prefetch ADDRzii:$addr, imm, imm, (i32 1))]>;
}
// Multiclass for CAS instructions.
// e.g. TS1AML, TS1AMW, TS2AM, and etc.
-let Constraints = "$dest = $sd", DisableEncoding = "$sd",
+let Constraints = "$sx = $sd", DisableEncoding = "$sd",
mayStore=1, mayLoad = 1, hasSideEffects = 0 in
multiclass RRCAStgm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty,
Operand immOp, Operand MEM, ComplexPattern ADDR,
SDPatternOperator OpNode = null_frag> {
- def r : RRM<opc, (outs RC:$dest), (ins MEM:$addr, RC:$sy, RC:$sd),
- !strconcat(opcStr, " $dest, $addr, $sy"),
- [(set Ty:$dest, (OpNode ADDR:$addr, Ty:$sy, Ty:$sd))]>;
+ def r : RRM<opc, (outs RC:$sx), (ins (MEM $sz, $imm32):$addr, RC:$sy, RC:$sd),
+ !strconcat(opcStr, " $sx, $addr, $sy"),
+ [(set Ty:$sx, (OpNode ADDR:$addr, Ty:$sy, Ty:$sd))]>;
let cy = 0 in
- def i : RRM<opc, (outs RC:$dest), (ins MEM:$addr, immOp:$sy, RC:$sd),
- !strconcat(opcStr, " $dest, $addr, $sy"),
- [(set Ty:$dest, (OpNode ADDR:$addr, (Ty immOp:$sy), Ty:$sd))]>;
+ def i : RRM<opc, (outs RC:$sx), (ins (MEM $sz, $imm32):$addr, immOp:$sy, RC:$sd),
+ !strconcat(opcStr, " $sx, $addr, $sy"),
+ [(set Ty:$sx, (OpNode ADDR:$addr, (Ty immOp:$sy), Ty:$sd))]>;
}
multiclass RRCASm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty,
Operand immOp, SDPatternOperator OpNode = null_frag> {
@@ -831,13 +831,13 @@ let isBranch = 1, isTerminator = 1, isIndirectBranch = 1, hasSideEffects = 0 in
multiclass BCbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond,
Operand ADDR> {
let bpf = 0 /* NONE */ in
- def "" : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
+ def "" : CF<opc, (outs), !con(cond, (ins (ADDR $sz, $imm32):$addr)),
!strconcat(opcStr, " ", cmpStr, "$addr")>;
let bpf = 2 /* NOT TAKEN */ in
- def _nt : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
+ def _nt : CF<opc, (outs), !con(cond, (ins (ADDR $sz, $imm32):$addr)),
!strconcat(opcStr, ".nt ", cmpStr, "$addr")>;
let bpf = 3 /* TAKEN */ in
- def _t : CF<opc, (outs), !con(cond, (ins ADDR:$addr)),
+ def _t : CF<opc, (outs), !con(cond, (ins (ADDR $sz, $imm32):$addr)),
!strconcat(opcStr, ".t ", cmpStr, "$addr")>;
}
multiclass BCtgm<string opcStr, string cmpStr, bits<8> opc, dag cond> {
@@ -847,14 +847,14 @@ multiclass BCtgm<string opcStr, string cmpStr, bits<8> opc, dag cond> {
multiclass BCm<string opcStr, string opcStrAt, string opcStrAf, bits<8> opc,
RegisterClass RC, Operand immOp> {
let DecoderMethod = "DecodeBranchCondition" in
- defm r : BCtgm<opcStr, "$comp, ", opc, (ins CCOp:$cond, RC:$comp)>;
+ defm r : BCtgm<opcStr, "$sy, ", opc, (ins CCOp:$cond, RC:$sy)>;
let DecoderMethod = "DecodeBranchCondition", cy = 0 in
- defm i : BCtgm<opcStr, "$comp, ", opc, (ins CCOp:$cond, immOp:$comp)>;
+ defm i : BCtgm<opcStr, "$sy, ", opc, (ins CCOp:$cond, immOp:$sy)>;
let DecoderMethod = "DecodeBranchConditionAlways", cy = 0, sy = 0,
- cf = 15 /* AT */, isBarrier = 1 in
+ cond = 15 /* AT */, isBarrier = 1 in
defm a : BCtgm<opcStrAt, "", opc, (ins)>;
let DecoderMethod = "DecodeBranchConditionAlways", cy = 0, sy = 0,
- cf = 0 /* AF */ in
+ cond = 0 /* AF */ in
defm na : BCtgm<opcStrAf, "", opc, (ins)>;
}
@@ -874,19 +874,19 @@ multiclass BCRbpfm<string opcStr, string cmpStr, bits<8> opc, dag cond> {
}
multiclass BCRm<string opcStr, string opcStrAt, string opcStrAf, bits<8> opc,
RegisterClass RC, Operand immOp, Operand zeroOp> {
- defm rr : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, RC:$sy, RC:$sz)>;
+ defm rr : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cond, RC:$sy, RC:$sz)>;
let cy = 0 in
- defm ir : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy,
+ defm ir : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cond, immOp:$sy,
RC:$sz)>;
let cz = 0 in
- defm rz : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, RC:$sy,
+ defm rz : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cond, RC:$sy,
zeroOp:$sz)>;
let cy = 0, cz = 0 in
- defm iz : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cf, immOp:$sy,
+ defm iz : BCRbpfm<opcStr, "$sy, $sz, ", opc, (ins CCOp:$cond, immOp:$sy,
zeroOp:$sz)>;
- let cy = 0, sy = 0, cz = 0, sz = 0, cf = 15 /* AT */, isBarrier = 1 in
+ let cy = 0, sy = 0, cz = 0, sz = 0, cond = 15 /* AT */, isBarrier = 1 in
defm a : BCRbpfm<opcStrAt, "", opc, (ins)>;
- let cy = 0, sy = 0, cz = 0, sz = 0, cf = 0 /* AF */ in
+ let cy = 0, sy = 0, cz = 0, sz = 0, cond = 0 /* AF */ in
defm na : BCRbpfm<opcStrAf, "", opc, (ins)>;
}
@@ -947,20 +947,20 @@ multiclass FIDCRm<string opcStr, bits<8>opc, RegisterClass RC> {
// Multiclass for LHM instruction.
let mayLoad = 1, hasSideEffects = 0 in
multiclass LHMm<string opcStr, bits<8> opc, RegisterClass RC> {
- def ri : RRMHM<opc, (outs RC:$dest), (ins MEMriHM:$addr),
- !strconcat(opcStr, " $dest, $addr")>;
+ def ri : RRMHM<opc, (outs RC:$sx), (ins (MEMriHM $sz, $imm32):$addr),
+ !strconcat(opcStr, " $sx, $addr")>;
let cz = 0 in
- def zi : RRMHM<opc, (outs RC:$dest), (ins MEMziHM:$addr),
- !strconcat(opcStr, " $dest, $addr")>;
+ def zi : RRMHM<opc, (outs RC:$sx), (ins (MEMziHM $sz, $imm32):$addr),
+ !strconcat(opcStr, " $sx, $addr")>;
}
// Multiclass for SHM instruction.
let mayStore = 1, hasSideEffects = 0 in
multiclass SHMm<string opcStr, bits<8> opc, RegisterClass RC> {
- def ri : RRMHM<opc, (outs), (ins MEMriHM:$addr, RC:$sx),
+ def ri : RRMHM<opc, (outs), (ins (MEMriHM $sz, $imm32):$addr, RC:$sx),
!strconcat(opcStr, " $sx, $addr")>;
let cz = 0 in
- def zi : RRMHM<opc, (outs), (ins MEMziHM:$addr, RC:$sx),
+ def zi : RRMHM<opc, (outs), (ins (MEMziHM $sz, $imm32):$addr, RC:$sx),
!strconcat(opcStr, " $sx, $addr")>;
}
@@ -978,17 +978,17 @@ multiclass SHMm<string opcStr, bits<8> opc, RegisterClass RC> {
// Multiclass for generic RM instructions
multiclass RMm<string opcStr, bits<8>opc, RegisterClass RC, bit MoveImm = 0> {
- def rri : RM<opc, (outs RC:$dest), (ins MEMrri:$addr),
- !strconcat(opcStr, " $dest, $addr"), []>;
+ def rri : RM<opc, (outs RC:$sx), (ins (MEMrri $sz, $sy, $imm32):$addr),
+ !strconcat(opcStr, " $sx, $addr"), []>;
let cy = 0 in
- def rii : RM<opc, (outs RC:$dest), (ins MEMrii:$addr),
- !strconcat(opcStr, " $dest, $addr"), []>;
+ def rii : RM<opc, (outs RC:$sx), (ins (MEMrii $sz, $sy, $imm32):$addr),
+ !strconcat(opcStr, " $sx, $addr"), []>;
let cz = 0 in
- def zri : RM<opc, (outs RC:$dest), (ins MEMzri:$addr),
- !strconcat(opcStr, " $dest, $addr"), []>;
+ def zri : RM<opc, (outs RC:$sx), (ins (MEMzri $sz, $sy, $imm32):$addr),
+ !strconcat(opcStr, " $sx, $addr"), []>;
let cy = 0, cz = 0 in
- def zii : RM<opc, (outs RC:$dest), (ins MEMzii:$addr),
- !strconcat(opcStr, " $dest, $addr"), []> {
+ def zii : RM<opc, (outs RC:$sx), (ins (MEMzii $sz, $sy, $imm32):$addr),
+ !strconcat(opcStr, " $sx, $addr"), []> {
// VE uses LEAzii and LEASLzii as a move immediate instruction, so declare
// it here. An instruction declared as MoveImm will be optimized in
// FoldImmediate later.
@@ -1014,21 +1014,21 @@ def : Pat<(add I64:$base, lozero:$disp), (LEASLrii $base, 0, (HI32 $disp))>;
let mayLoad = 1, hasSideEffects = 0 in
multiclass LOADm<string opcStr, bits<8> opc, RegisterClass RC, ValueType Ty,
SDPatternOperator OpNode = null_frag> {
- def rri : RM<opc, (outs RC:$dest), (ins MEMrri:$addr),
- !strconcat(opcStr, " $dest, $addr"),
- [(set Ty:$dest, (OpNode ADDRrri:$addr))]>;
+ def rri : RM<opc, (outs RC:$sx), (ins (MEMrri $sz, $sy, $imm32):$addr),
+ !strconcat(opcStr, " $sx, $addr"),
+ [(set Ty:$sx, (OpNode ADDRrri:$addr))]>;
let cy = 0 in
- def rii : RM<opc, (outs RC:$dest), (ins MEMrii:$addr),
- !strconcat(opcStr, " $dest, $addr"),
- [(set Ty:$dest, (OpNode ADDRrii:$addr))]>;
+ def rii : RM<opc, (outs RC:$sx), (ins (MEMrii $sz, $sy, $imm32):$addr),
+ !strconcat(opcStr, " $sx, $addr"),
+ [(set Ty:$sx, (OpNode ADDRrii:$addr))]>;
let cz = 0 in
- def zri : RM<opc, (outs RC:$dest), (ins MEMzri:$addr),
- !strconcat(opcStr, " $dest, $addr"),
- [(set Ty:$dest, (OpNode ADDRzri:$addr))]>;
+ def zri : RM<opc, (outs RC:$sx), (ins (MEMzri $sz, $sy, $imm32):$addr),
+ !strconcat(opcStr, " $sx, $addr"),
+ [(set Ty:$sx, (OpNode ADDRzri:$addr))]>;
let cy = 0, cz = 0 in
- def zii : RM<opc, (outs RC:$dest), (ins MEMzii:$addr),
- !strconcat(opcStr, " $dest, $addr"),
- [(set Ty:$dest, (OpNode ADDRzii:$addr))]>;
+ def zii : RM<opc, (outs RC:$sx), (ins (MEMzii $sz, $sy, $imm32):$addr),
+ !strconcat(opcStr, " $sx, $addr"),
+ [(set Ty:$sx, (OpNode ADDRzii:$addr))]>;
}
// Section 8.2.2 - LDS
@@ -1072,19 +1072,19 @@ let mayLoad = 1, hasSideEffects = 0 in {
let mayStore = 1 in
multiclass STOREm<string opcStr, bits<8> opc, RegisterClass RC, ValueType Ty,
SDPatternOperator OpNode = null_frag> {
- def rri : RM<opc, (outs), (ins MEMrri:$addr, RC:$sx),
+ def rri : RM<opc, (outs), (ins (MEMrri $sz, $sy, $imm32):$addr, RC:$sx),
!strconcat(opcStr, " $sx, $addr"),
[(OpNode Ty:$sx, ADDRrri:$addr)]>;
let cy = 0 in
- def rii : RM<opc, (outs), (ins MEMrii:$addr, RC:$sx),
+ def rii : RM<opc, (outs), (ins (MEMrii $sz, $sy, $imm32):$addr, RC:$sx),
!strconcat(opcStr, " $sx, $addr"),
[(OpNode Ty:$sx, ADDRrii:$addr)]>;
let cz = 0 in
- def zri : RM<opc, (outs), (ins MEMzri:$addr, RC:$sx),
+ def zri : RM<opc, (outs), (ins (MEMzri $sz, $sy, $imm32):$addr, RC:$sx),
!strconcat(opcStr, " $sx, $addr"),
[(OpNode Ty:$sx, ADDRzri:$addr)]>;
let cy = 0, cz = 0 in
- def zii : RM<opc, (outs), (ins MEMzii:$addr, RC:$sx),
+ def zii : RM<opc, (outs), (ins (MEMzii $sz, $sy, $imm32):$addr, RC:$sx),
!strconcat(opcStr, " $sx, $addr"),
[(OpNode Ty:$sx, ADDRzii:$addr)]>;
}
@@ -1463,10 +1463,10 @@ def : Pat<(brind I64:$reg), (BCFLari_t $reg, 0)>;
def : Pat<(brind tblockaddress:$imm), (BCFLazi_t 0, $imm)>;
// Return instruction is a special case of jump.
-let Uses = [SX10], bpf = 3 /* TAKEN */, cf = 15 /* AT */, cy = 0, sy = 0,
+let Uses = [SX10], bpf = 3 /* TAKEN */, cond = 15 /* AT */, cy = 0, sy = 0,
sz = 10 /* SX10 */, imm32 = 0, isReturn = 1, isTerminator = 1,
isBarrier = 1, isCodeGenOnly = 1, hasSideEffects = 0 in
-def RET : CF<0x19, (outs), (ins), "b.l.t (, %s10)", [(retflag)]>;
+def RET : CF<0x19, (outs), (ins), "b.l.t (, %s10)", [(retglue)]>;
// Section 8.8.2 - BCS (Branch on Condition Single)
defm BCFW : BCm<"b${cond}.w", "b.w", "baf.w", 0x1B, I32, simm7>;
@@ -1478,13 +1478,13 @@ defm BCFS : BCm<"b${cond}.s", "b.s", "baf.s", 0x1C, F32, simm7fp>;
// Section 8.8.4 - BCR (Branch on Condition Relative)
let cx = 0, cx2 = 0 in
-defm BRCFL : BCRm<"br${cf}.l", "br.l", "braf.l", 0x18, I64, simm7, zero>;
+defm BRCFL : BCRm<"br${cond}.l", "br.l", "braf.l", 0x18, I64, simm7, zero>;
let cx = 1, cx2 = 0 in
-defm BRCFW : BCRm<"br${cf}.w", "br.w", "braf.w", 0x18, I32, simm7, zero>;
+defm BRCFW : BCRm<"br${cond}.w", "br.w", "braf.w", 0x18, I32, simm7, zero>;
let cx = 0, cx2 = 1 in
-defm BRCFD : BCRm<"br${cf}.d", "br.d", "braf.d", 0x18, I64, simm7fp, zerofp>;
+defm BRCFD : BCRm<"br${cond}.d", "br.d", "braf.d", 0x18, I64, simm7fp, zerofp>;
let cx = 1, cx2 = 1 in
-defm BRCFS : BCRm<"br${cf}.s", "br.s", "braf.s", 0x18, F32, simm7fp, zerofp>;
+defm BRCFS : BCRm<"br${cond}.s", "br.s", "braf.s", 0x18, F32, simm7fp, zerofp>;
// Section 8.8.5 - BSIC (Branch and Save IC)
let isCall = 1, hasSideEffects = 0, DecoderMethod = "DecodeCall" in
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
index 1cba0843f891..1e2d3888fe1c 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp
@@ -15,10 +15,9 @@
#include "AsmParser/WebAssemblyAsmTypeCheck.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
#include "TargetInfo/WebAssemblyTargetInfo.h"
-#include "Utils/WebAssemblyTypeUtilities.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -248,15 +247,14 @@ public:
WebAssemblyAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
const MCInstrInfo &MII, const MCTargetOptions &Options)
: MCTargetAsmParser(Options, STI, MII), Parser(Parser),
- Lexer(Parser.getLexer()),
- is64(STI.getTargetTriple().isArch64Bit()),
+ Lexer(Parser.getLexer()), is64(STI.getTargetTriple().isArch64Bit()),
TC(Parser, MII, is64), SkipTypeCheck(Options.MCNoTypeCheck) {
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
// Don't type check if this is inline asm, since that is a naked sequence of
// instructions without a function/locals decl.
auto &SM = Parser.getSourceManager();
auto BufferName =
- SM.getBufferInfo(SM.getMainFileID()).Buffer->getBufferIdentifier();
+ SM.getBufferInfo(SM.getMainFileID()).Buffer->getBufferIdentifier();
if (BufferName == "<inline asm>")
SkipTypeCheck = true;
}
@@ -323,7 +321,9 @@ public:
}
}
- void push(NestingType NT) { NestingStack.push_back({NT, wasm::WasmSignature()}); }
+ void push(NestingType NT, wasm::WasmSignature Sig = wasm::WasmSignature()) {
+ NestingStack.push_back({NT, Sig});
+ }
bool pop(StringRef Ins, NestingType NT1, NestingType NT2 = Undefined) {
if (NestingStack.empty())
@@ -337,6 +337,19 @@ public:
return false;
}
+ // Pop a NestingType and push a new NestingType with the same signature. Used
+ // for if-else and try-catch(_all).
+ bool popAndPushWithSameSignature(StringRef Ins, NestingType PopNT,
+ NestingType PushNT) {
+ if (NestingStack.empty())
+ return error(Twine("End of block construct with no start: ") + Ins);
+ auto Sig = NestingStack.back().Sig;
+ if (pop(Ins, PopNT))
+ return true;
+ push(PushNT, Sig);
+ return false;
+ }
+
bool ensureEmptyNestingStack(SMLoc Loc = SMLoc()) {
auto Err = !NestingStack.empty();
while (!NestingStack.empty()) {
@@ -588,17 +601,14 @@ public:
push(If);
ExpectBlockType = true;
} else if (Name == "else") {
- if (pop(Name, If))
+ if (popAndPushWithSameSignature(Name, If, Else))
return true;
- push(Else);
} else if (Name == "catch") {
- if (pop(Name, Try))
+ if (popAndPushWithSameSignature(Name, Try, Try))
return true;
- push(Try);
} else if (Name == "catch_all") {
- if (pop(Name, Try))
+ if (popAndPushWithSameSignature(Name, Try, CatchAll))
return true;
- push(CatchAll);
} else if (Name == "end_if") {
if (pop(Name, If, Else))
return true;
@@ -638,10 +648,10 @@ public:
if (parseSignature(Signature.get()))
return true;
// Got signature as block type, don't need more
- ExpectBlockType = false;
TC.setLastSig(*Signature.get());
if (ExpectBlockType)
NestingStack.back().Sig = *Signature.get();
+ ExpectBlockType = false;
auto &Ctx = getContext();
// The "true" here will cause this to be a nameless symbol.
MCSymbol *Sym = Ctx.createTempSymbol("typeindex", true);
@@ -691,7 +701,7 @@ public:
parseSingleInteger(true, Operands);
if (checkForP2AlignIfLoadStore(Operands, Name))
return true;
- } else if(Lexer.is(AsmToken::Real)) {
+ } else if (Lexer.is(AsmToken::Real)) {
if (parseSingleFloat(true, Operands))
return true;
} else if (!parseSpecialFloatMaybe(true, Operands)) {
@@ -775,31 +785,23 @@ public:
// This function processes wasm-specific directives streamed to
// WebAssemblyTargetStreamer, all others go to the generic parser
// (see WasmAsmParser).
- bool ParseDirective(AsmToken DirectiveID) override {
- // This function has a really weird return value behavior that is different
- // from all the other parsing functions:
- // - return true && no tokens consumed -> don't know this directive / let
- // the generic parser handle it.
- // - return true && tokens consumed -> a parsing error occurred.
- // - return false -> processed this directive successfully.
+ ParseStatus parseDirective(AsmToken DirectiveID) override {
assert(DirectiveID.getKind() == AsmToken::Identifier);
auto &Out = getStreamer();
auto &TOut =
reinterpret_cast<WebAssemblyTargetStreamer &>(*Out.getTargetStreamer());
auto &Ctx = Out.getContext();
- // TODO: any time we return an error, at least one token must have been
- // consumed, otherwise this will not signal an error to the caller.
if (DirectiveID.getString() == ".globaltype") {
auto SymName = expectIdent();
if (SymName.empty())
- return true;
+ return ParseStatus::Failure;
if (expect(AsmToken::Comma, ","))
- return true;
+ return ParseStatus::Failure;
auto TypeTok = Lexer.getTok();
auto TypeName = expectIdent();
if (TypeName.empty())
- return true;
+ return ParseStatus::Failure;
auto Type = WebAssembly::parseType(TypeName);
if (!Type)
return error("Unknown type in .globaltype directive: ", TypeTok);
@@ -810,6 +812,8 @@ public:
if (isNext(AsmToken::Comma)) {
TypeTok = Lexer.getTok();
auto Id = expectIdent();
+ if (Id.empty())
+ return ParseStatus::Failure;
if (Id == "immutable")
Mutable = false;
else
@@ -829,14 +833,14 @@ public:
// .tabletype SYM, ELEMTYPE[, MINSIZE[, MAXSIZE]]
auto SymName = expectIdent();
if (SymName.empty())
- return true;
+ return ParseStatus::Failure;
if (expect(AsmToken::Comma, ","))
- return true;
+ return ParseStatus::Failure;
auto ElemTypeTok = Lexer.getTok();
auto ElemTypeName = expectIdent();
if (ElemTypeName.empty())
- return true;
+ return ParseStatus::Failure;
std::optional<wasm::ValType> ElemType =
WebAssembly::parseType(ElemTypeName);
if (!ElemType)
@@ -844,7 +848,7 @@ public:
wasm::WasmLimits Limits = DefaultLimits();
if (isNext(AsmToken::Comma) && parseLimits(&Limits))
- return true;
+ return ParseStatus::Failure;
// Now that we have the name and table type, we can actually create the
// symbol
@@ -864,7 +868,7 @@ public:
// parses the locals separately.
auto SymName = expectIdent();
if (SymName.empty())
- return true;
+ return ParseStatus::Failure;
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
if (WasmSym->isDefined()) {
// We push 'Function' either when a label is parsed or a .functype
@@ -880,7 +884,7 @@ public:
if (CurrentState != FunctionLabel) {
// This .functype indicates a start of a function.
if (ensureEmptyNestingStack())
- return true;
+ return ParseStatus::Failure;
push(Function);
}
CurrentState = FunctionStart;
@@ -888,7 +892,7 @@ public:
}
auto Signature = std::make_unique<wasm::WasmSignature>();
if (parseSignature(Signature.get()))
- return true;
+ return ParseStatus::Failure;
TC.funcDecl(*Signature);
WasmSym->setSignature(Signature.get());
addSignature(std::move(Signature));
@@ -901,47 +905,56 @@ public:
if (DirectiveID.getString() == ".export_name") {
auto SymName = expectIdent();
if (SymName.empty())
- return true;
+ return ParseStatus::Failure;
if (expect(AsmToken::Comma, ","))
- return true;
+ return ParseStatus::Failure;
auto ExportName = expectIdent();
+ if (ExportName.empty())
+ return ParseStatus::Failure;
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setExportName(storeName(ExportName));
TOut.emitExportName(WasmSym, ExportName);
+ return expect(AsmToken::EndOfStatement, "EOL");
}
if (DirectiveID.getString() == ".import_module") {
auto SymName = expectIdent();
if (SymName.empty())
- return true;
+ return ParseStatus::Failure;
if (expect(AsmToken::Comma, ","))
- return true;
+ return ParseStatus::Failure;
auto ImportModule = expectIdent();
+ if (ImportModule.empty())
+ return ParseStatus::Failure;
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setImportModule(storeName(ImportModule));
TOut.emitImportModule(WasmSym, ImportModule);
+ return expect(AsmToken::EndOfStatement, "EOL");
}
if (DirectiveID.getString() == ".import_name") {
auto SymName = expectIdent();
if (SymName.empty())
- return true;
+ return ParseStatus::Failure;
if (expect(AsmToken::Comma, ","))
- return true;
+ return ParseStatus::Failure;
auto ImportName = expectIdent();
+ if (ImportName.empty())
+ return ParseStatus::Failure;
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
WasmSym->setImportName(storeName(ImportName));
TOut.emitImportName(WasmSym, ImportName);
+ return expect(AsmToken::EndOfStatement, "EOL");
}
if (DirectiveID.getString() == ".tagtype") {
auto SymName = expectIdent();
if (SymName.empty())
- return true;
+ return ParseStatus::Failure;
auto WasmSym = cast<MCSymbolWasm>(Ctx.getOrCreateSymbol(SymName));
auto Signature = std::make_unique<wasm::WasmSignature>();
if (parseRegTypeList(Signature->Params))
- return true;
+ return ParseStatus::Failure;
WasmSym->setSignature(Signature.get());
addSignature(std::move(Signature));
WasmSym->setType(wasm::WASM_SYMBOL_TYPE_TAG);
@@ -956,7 +969,7 @@ public:
Lexer.getTok());
SmallVector<wasm::ValType, 4> Locals;
if (parseRegTypeList(Locals))
- return true;
+ return ParseStatus::Failure;
TC.localDecl(Locals);
TOut.emitLocal(Locals);
CurrentState = FunctionLocals;
@@ -967,7 +980,8 @@ public:
DirectiveID.getString() == ".int16" ||
DirectiveID.getString() == ".int32" ||
DirectiveID.getString() == ".int64") {
- if (CheckDataSection()) return true;
+ if (CheckDataSection())
+ return ParseStatus::Failure;
const MCExpr *Val;
SMLoc End;
if (Parser.parseExpression(Val, End))
@@ -979,7 +993,8 @@ public:
}
if (DirectiveID.getString() == ".asciz") {
- if (CheckDataSection()) return true;
+ if (CheckDataSection())
+ return ParseStatus::Failure;
std::string S;
if (Parser.parseEscapedString(S))
return error("Cannot parse string constant: ", Lexer.getTok());
@@ -987,7 +1002,7 @@ public:
return expect(AsmToken::EndOfStatement, "EOL");
}
- return true; // We didn't process this directive.
+ return ParseStatus::NoMatch; // We didn't process this directive.
}
// Called either when the first instruction is parsed of the function ends.
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
index b323b265b562..bc0cb2d10cdb 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.cpp
@@ -15,10 +15,9 @@
#include "AsmParser/WebAssemblyAsmTypeCheck.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
#include "TargetInfo/WebAssemblyTargetInfo.h"
-#include "Utils/WebAssemblyTypeUtilities.h"
-#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -45,16 +44,18 @@ extern StringRef GetMnemonic(unsigned Opc);
namespace llvm {
WebAssemblyAsmTypeCheck::WebAssemblyAsmTypeCheck(MCAsmParser &Parser,
- const MCInstrInfo &MII, bool is64)
- : Parser(Parser), MII(MII), is64(is64) {
-}
+ const MCInstrInfo &MII,
+ bool is64)
+ : Parser(Parser), MII(MII), is64(is64) {}
void WebAssemblyAsmTypeCheck::funcDecl(const wasm::WasmSignature &Sig) {
LocalTypes.assign(Sig.Params.begin(), Sig.Params.end());
ReturnTypes.assign(Sig.Returns.begin(), Sig.Returns.end());
+ BrStack.emplace_back(Sig.Returns.begin(), Sig.Returns.end());
}
-void WebAssemblyAsmTypeCheck::localDecl(const SmallVector<wasm::ValType, 4> &Locals) {
+void WebAssemblyAsmTypeCheck::localDecl(
+ const SmallVectorImpl<wasm::ValType> &Locals) {
LocalTypes.insert(LocalTypes.end(), Locals.begin(), Locals.end());
}
@@ -117,38 +118,63 @@ bool WebAssemblyAsmTypeCheck::getLocal(SMLoc ErrorLoc, const MCInst &Inst,
auto Local = static_cast<size_t>(Inst.getOperand(0).getImm());
if (Local >= LocalTypes.size())
return typeError(ErrorLoc, StringRef("no local type specified for index ") +
- std::to_string(Local));
+ std::to_string(Local));
Type = LocalTypes[Local];
return false;
}
+static std::optional<std::string>
+checkStackTop(const SmallVectorImpl<wasm::ValType> &ExpectedStackTop,
+ const SmallVectorImpl<wasm::ValType> &Got) {
+ for (size_t I = 0; I < ExpectedStackTop.size(); I++) {
+ auto EVT = ExpectedStackTop[I];
+ auto PVT = Got[Got.size() - ExpectedStackTop.size() + I];
+ if (PVT != EVT)
+ return std::string{"got "} + WebAssembly::typeToString(PVT) +
+ ", expected " + WebAssembly::typeToString(EVT);
+ }
+ return std::nullopt;
+}
+
+bool WebAssemblyAsmTypeCheck::checkBr(SMLoc ErrorLoc, size_t Level) {
+ if (Level >= BrStack.size())
+ return typeError(ErrorLoc,
+ StringRef("br: invalid depth ") + std::to_string(Level));
+ const SmallVector<wasm::ValType, 4> &Expected =
+ BrStack[BrStack.size() - Level - 1];
+ if (Expected.size() > Stack.size())
+ return typeError(ErrorLoc, "br: insufficient values on the type stack");
+ auto IsStackTopInvalid = checkStackTop(Expected, Stack);
+ if (IsStackTopInvalid)
+ return typeError(ErrorLoc, "br " + IsStackTopInvalid.value());
+ return false;
+}
+
bool WebAssemblyAsmTypeCheck::checkEnd(SMLoc ErrorLoc, bool PopVals) {
+ if (!PopVals)
+ BrStack.pop_back();
if (LastSig.Returns.size() > Stack.size())
return typeError(ErrorLoc, "end: insufficient values on the type stack");
-
+
if (PopVals) {
for (auto VT : llvm::reverse(LastSig.Returns)) {
- if (popType(ErrorLoc, VT))
+ if (popType(ErrorLoc, VT))
return true;
}
return false;
}
-
- for (size_t i = 0; i < LastSig.Returns.size(); i++) {
- auto EVT = LastSig.Returns[i];
- auto PVT = Stack[Stack.size() - LastSig.Returns.size() + i];
- if (PVT != EVT)
- return typeError(
- ErrorLoc, StringRef("end got ") + WebAssembly::typeToString(PVT) +
- ", expected " + WebAssembly::typeToString(EVT));
- }
+
+ auto IsStackTopInvalid = checkStackTop(LastSig.Returns, Stack);
+ if (IsStackTopInvalid)
+ return typeError(ErrorLoc, "end " + IsStackTopInvalid.value());
return false;
}
bool WebAssemblyAsmTypeCheck::checkSig(SMLoc ErrorLoc,
- const wasm::WasmSignature& Sig) {
+ const wasm::WasmSignature &Sig) {
for (auto VT : llvm::reverse(Sig.Params))
- if (popType(ErrorLoc, VT)) return true;
+ if (popType(ErrorLoc, VT))
+ return true;
Stack.insert(Stack.end(), Sig.Returns.begin(), Sig.Returns.end());
return false;
}
@@ -187,7 +213,7 @@ bool WebAssemblyAsmTypeCheck::getGlobal(SMLoc ErrorLoc, const MCInst &Inst,
[[fallthrough]];
default:
return typeError(ErrorLoc, StringRef("symbol ") + WasmSym->getName() +
- " missing .globaltype");
+ " missing .globaltype");
}
return false;
}
@@ -272,22 +298,77 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst,
return true;
if (popType(ErrorLoc, wasm::ValType::I32))
return true;
+ } else if (Name == "memory.fill") {
+ Type = is64 ? wasm::ValType::I64 : wasm::ValType::I32;
+ if (popType(ErrorLoc, Type))
+ return true;
+ if (popType(ErrorLoc, wasm::ValType::I32))
+ return true;
+ if (popType(ErrorLoc, Type))
+ return true;
+ } else if (Name == "memory.copy") {
+ Type = is64 ? wasm::ValType::I64 : wasm::ValType::I32;
+ if (popType(ErrorLoc, Type))
+ return true;
+ if (popType(ErrorLoc, Type))
+ return true;
+ if (popType(ErrorLoc, Type))
+ return true;
+ } else if (Name == "memory.init") {
+ Type = is64 ? wasm::ValType::I64 : wasm::ValType::I32;
+ if (popType(ErrorLoc, wasm::ValType::I32))
+ return true;
+ if (popType(ErrorLoc, wasm::ValType::I32))
+ return true;
+ if (popType(ErrorLoc, Type))
+ return true;
} else if (Name == "drop") {
if (popType(ErrorLoc, {}))
return true;
+ } else if (Name == "try" || Name == "block" || Name == "loop" ||
+ Name == "if") {
+ if (Name == "if" && popType(ErrorLoc, wasm::ValType::I32))
+ return true;
+ if (Name == "loop")
+ BrStack.emplace_back(LastSig.Params.begin(), LastSig.Params.end());
+ else
+ BrStack.emplace_back(LastSig.Returns.begin(), LastSig.Returns.end());
} else if (Name == "end_block" || Name == "end_loop" || Name == "end_if" ||
- Name == "else" || Name == "end_try") {
- if (checkEnd(ErrorLoc, Name == "else"))
+ Name == "else" || Name == "end_try" || Name == "catch" ||
+ Name == "catch_all" || Name == "delegate") {
+ if (checkEnd(ErrorLoc,
+ Name == "else" || Name == "catch" || Name == "catch_all"))
+ return true;
+ Unreachable = false;
+ if (Name == "catch") {
+ const MCSymbolRefExpr *SymRef;
+ if (getSymRef(Operands[1]->getStartLoc(), Inst, SymRef))
+ return true;
+ const auto *WasmSym = cast<MCSymbolWasm>(&SymRef->getSymbol());
+ const auto *Sig = WasmSym->getSignature();
+ if (!Sig || WasmSym->getType() != wasm::WASM_SYMBOL_TYPE_TAG)
+ return typeError(Operands[1]->getStartLoc(), StringRef("symbol ") +
+ WasmSym->getName() +
+ " missing .tagtype");
+ // catch instruction pushes values whose types are specified in the tag's
+ // "params" part
+ Stack.insert(Stack.end(), Sig->Params.begin(), Sig->Params.end());
+ }
+ } else if (Name == "br") {
+ const MCOperand &Operand = Inst.getOperand(0);
+ if (!Operand.isImm())
+ return false;
+ if (checkBr(ErrorLoc, static_cast<size_t>(Operand.getImm())))
return true;
- if (Name == "end_block")
- Unreachable = false;
} else if (Name == "return") {
if (endOfFunction(ErrorLoc))
return true;
} else if (Name == "call_indirect" || Name == "return_call_indirect") {
// Function value.
- if (popType(ErrorLoc, wasm::ValType::I32)) return true;
- if (checkSig(ErrorLoc, LastSig)) return true;
+ if (popType(ErrorLoc, wasm::ValType::I32))
+ return true;
+ if (checkSig(ErrorLoc, LastSig))
+ return true;
if (Name == "return_call_indirect" && endOfFunction(ErrorLoc))
return true;
} else if (Name == "call" || Name == "return_call") {
@@ -300,22 +381,10 @@ bool WebAssemblyAsmTypeCheck::typeCheck(SMLoc ErrorLoc, const MCInst &Inst,
return typeError(Operands[1]->getStartLoc(), StringRef("symbol ") +
WasmSym->getName() +
" missing .functype");
- if (checkSig(ErrorLoc, *Sig)) return true;
- if (Name == "return_call" && endOfFunction(ErrorLoc))
+ if (checkSig(ErrorLoc, *Sig))
return true;
- } else if (Name == "catch") {
- const MCSymbolRefExpr *SymRef;
- if (getSymRef(Operands[1]->getStartLoc(), Inst, SymRef))
+ if (Name == "return_call" && endOfFunction(ErrorLoc))
return true;
- const auto *WasmSym = cast<MCSymbolWasm>(&SymRef->getSymbol());
- const auto *Sig = WasmSym->getSignature();
- if (!Sig || WasmSym->getType() != wasm::WASM_SYMBOL_TYPE_TAG)
- return typeError(Operands[1]->getStartLoc(), StringRef("symbol ") +
- WasmSym->getName() +
- " missing .tagtype");
- // catch instruction pushes values whose types are specified in the tag's
- // "params" part
- Stack.insert(Stack.end(), Sig->Params.begin(), Sig->Params.end());
} else if (Name == "unreachable") {
Unreachable = true;
} else if (Name == "ref.is_null") {
diff --git a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
index 9c190e6beae7..6fa95c392975 100644
--- a/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
+++ b/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmTypeCheck.h
@@ -29,6 +29,7 @@ class WebAssemblyAsmTypeCheck final {
const MCInstrInfo &MII;
SmallVector<wasm::ValType, 8> Stack;
+ SmallVector<SmallVector<wasm::ValType, 4>, 8> BrStack;
SmallVector<wasm::ValType, 16> LocalTypes;
SmallVector<wasm::ValType, 4> ReturnTypes;
wasm::WasmSignature LastSig;
@@ -42,6 +43,7 @@ class WebAssemblyAsmTypeCheck final {
bool popRefType(SMLoc ErrorLoc);
bool getLocal(SMLoc ErrorLoc, const MCInst &Inst, wasm::ValType &Type);
bool checkEnd(SMLoc ErrorLoc, bool PopVals = false);
+ bool checkBr(SMLoc ErrorLoc, size_t Level);
bool checkSig(SMLoc ErrorLoc, const wasm::WasmSignature &Sig);
bool getSymRef(SMLoc ErrorLoc, const MCInst &Inst,
const MCSymbolRefExpr *&SymRef);
@@ -49,16 +51,18 @@ class WebAssemblyAsmTypeCheck final {
bool getTable(SMLoc ErrorLoc, const MCInst &Inst, wasm::ValType &Type);
public:
- WebAssemblyAsmTypeCheck(MCAsmParser &Parser, const MCInstrInfo &MII, bool is64);
+ WebAssemblyAsmTypeCheck(MCAsmParser &Parser, const MCInstrInfo &MII,
+ bool is64);
void funcDecl(const wasm::WasmSignature &Sig);
- void localDecl(const SmallVector<wasm::ValType, 4> &Locals);
+ void localDecl(const SmallVectorImpl<wasm::ValType> &Locals);
void setLastSig(const wasm::WasmSignature &Sig) { LastSig = Sig; }
bool endOfFunction(SMLoc ErrorLoc);
bool typeCheck(SMLoc ErrorLoc, const MCInst &Inst, OperandVector &Operands);
void Clear() {
Stack.clear();
+ BrStack.clear();
LocalTypes.clear();
ReturnTypes.clear();
TypeErrorThisFunction = false;
@@ -68,4 +72,4 @@ public:
} // end namespace llvm
-#endif // LLVM_LIB_TARGET_WEBASSEMBLY_ASMPARSER_TYPECHECK_H
+#endif // LLVM_LIB_TARGET_WEBASSEMBLY_ASMPARSER_TYPECHECK_H
diff --git a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index 1f07b1619b49..2c3604cc72d2 100644
--- a/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -14,8 +14,8 @@
///
//===----------------------------------------------------------------------===//
+#include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
#include "TargetInfo/WebAssemblyTargetInfo.h"
-#include "Utils/WebAssemblyTypeUtilities.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDecoderOps.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
index b925519e6162..a9673ab344d3 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.cpp
@@ -13,8 +13,7 @@
#include "MCTargetDesc/WebAssemblyInstPrinter.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyTypeUtilities.h"
-#include "Utils/WebAssemblyUtilities.h"
+#include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/ADT/SmallSet.h"
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
index c81c3a3d9ffa..51e4d3656ba4 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyInstPrinter.h
@@ -16,8 +16,8 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCInstPrinter.h"
-#include "llvm/Support/MachineValueType.h"
namespace llvm {
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index 5727708a84ad..9d43c0052d52 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#include "WebAssemblyMCAsmInfo.h"
-#include "Utils/WebAssemblyUtilities.h"
-#include "llvm/ADT/Triple.h"
+#include "WebAssemblyMCTargetDesc.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index cd692f4dda33..634ed10d4df5 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -16,6 +16,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixup.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -25,6 +26,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/EndianStream.h"
#include "llvm/Support/LEB128.h"
+#include "llvm/Support/SMLoc.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -37,28 +39,31 @@ STATISTIC(MCNumFixups, "Number of MC fixups created.");
namespace {
class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
const MCInstrInfo &MCII;
-
+ MCContext &Ctx;
// Implementation generated by tablegen.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
public:
- WebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) : MCII(MCII) {}
+ WebAssemblyMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
+ : MCII(MCII), Ctx{Ctx} {}
};
} // end anonymous namespace
-MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) {
- return new WebAssemblyMCCodeEmitter(MCII);
+MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
+ MCContext &Ctx) {
+ return new WebAssemblyMCCodeEmitter(MCII, Ctx);
}
void WebAssemblyMCCodeEmitter::encodeInstruction(
- const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
+ const MCInst &MI, SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const {
+ raw_svector_ostream OS(CB);
uint64_t Start = OS.tell();
uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
@@ -119,7 +124,10 @@ void WebAssemblyMCCodeEmitter::encodeInstruction(
support::endian::write<uint64_t>(OS, MO.getImm(), support::little);
break;
case WebAssembly::OPERAND_GLOBAL:
- llvm_unreachable("wasm globals should only be accessed symbolicly");
+ Ctx.reportError(
+ SMLoc(),
+ Twine("Wasm globals should only be accessed symbolically!"));
+ break;
default:
encodeULEB128(uint64_t(MO.getImm()), OS);
}
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 97dbc35c991b..e8f58a19d25e 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -35,6 +35,26 @@ using namespace llvm;
#define GET_REGINFO_MC_DESC
#include "WebAssemblyGenRegisterInfo.inc"
+// Exception handling & setjmp-longjmp handling related options.
+
+// Emscripten's asm.js-style exception handling
+cl::opt<bool> WebAssembly::WasmEnableEmEH(
+ "enable-emscripten-cxx-exceptions",
+ cl::desc("WebAssembly Emscripten-style exception handling"),
+ cl::init(false));
+// Emscripten's asm.js-style setjmp/longjmp handling
+cl::opt<bool> WebAssembly::WasmEnableEmSjLj(
+ "enable-emscripten-sjlj",
+ cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"),
+ cl::init(false));
+// Exception handling using wasm EH instructions
+cl::opt<bool>
+ WebAssembly::WasmEnableEH("wasm-enable-eh",
+ cl::desc("WebAssembly exception handling"));
+// setjmp/longjmp handling using wasm EH instrutions
+cl::opt<bool> WebAssembly::WasmEnableSjLj(
+ "wasm-enable-sjlj", cl::desc("WebAssembly setjmp/longjmp handling"));
+
static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/,
const Triple &TT,
const MCTargetOptions &Options) {
@@ -64,7 +84,7 @@ static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/,
static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII,
MCContext &Ctx) {
- return createWebAssemblyMCCodeEmitter(MCII);
+ return createWebAssemblyMCCodeEmitter(MCII, Ctx);
}
static MCAsmBackend *createAsmBackend(const Target & /*T*/,
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 476955e434f2..fc33cebaa48a 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -16,6 +16,7 @@
#include "../WebAssemblySubtarget.h"
#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/DataTypes.h"
#include <memory>
@@ -28,7 +29,8 @@ class MCInstrInfo;
class MCObjectTargetWriter;
class Triple;
-MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII);
+MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
+ MCContext &Ctx);
MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT);
@@ -36,6 +38,13 @@ std::unique_ptr<MCObjectTargetWriter>
createWebAssemblyWasmObjectWriter(bool Is64Bit, bool IsEmscripten);
namespace WebAssembly {
+
+// Exception handling / setjmp-longjmp handling command-line options
+extern cl::opt<bool> WasmEnableEmEH; // asm.js-style EH
+extern cl::opt<bool> WasmEnableEmSjLj; // asm.js-style SjLJ
+extern cl::opt<bool> WasmEnableEH; // EH using Wasm EH instructions
+extern cl::opt<bool> WasmEnableSjLj; // SjLj using Wasm EH instructions
+
enum OperandType {
/// Basic block label in a branch construct.
OPERAND_BASIC_BLOCK = MCOI::OPERAND_FIRST_TARGET,
@@ -272,6 +281,50 @@ inline unsigned GetDefaultP2Align(unsigned Opc) {
return Align;
}
+inline bool isConst(unsigned Opc) {
+ switch (Opc) {
+ case WebAssembly::CONST_I32:
+ case WebAssembly::CONST_I32_S:
+ case WebAssembly::CONST_I64:
+ case WebAssembly::CONST_I64_S:
+ case WebAssembly::CONST_F32:
+ case WebAssembly::CONST_F32_S:
+ case WebAssembly::CONST_F64:
+ case WebAssembly::CONST_F64_S:
+ case WebAssembly::CONST_V128_I8x16:
+ case WebAssembly::CONST_V128_I8x16_S:
+ case WebAssembly::CONST_V128_I16x8:
+ case WebAssembly::CONST_V128_I16x8_S:
+ case WebAssembly::CONST_V128_I32x4:
+ case WebAssembly::CONST_V128_I32x4_S:
+ case WebAssembly::CONST_V128_I64x2:
+ case WebAssembly::CONST_V128_I64x2_S:
+ case WebAssembly::CONST_V128_F32x4:
+ case WebAssembly::CONST_V128_F32x4_S:
+ case WebAssembly::CONST_V128_F64x2:
+ case WebAssembly::CONST_V128_F64x2_S:
+ return true;
+ default:
+ return false;
+ }
+}
+
+inline bool isScalarConst(unsigned Opc) {
+ switch (Opc) {
+ case WebAssembly::CONST_I32:
+ case WebAssembly::CONST_I32_S:
+ case WebAssembly::CONST_I64:
+ case WebAssembly::CONST_I64_S:
+ case WebAssembly::CONST_F32:
+ case WebAssembly::CONST_F32_S:
+ case WebAssembly::CONST_F64:
+ case WebAssembly::CONST_F64_S:
+ return true;
+ default:
+ return false;
+ }
+}
+
inline bool isArgument(unsigned Opc) {
switch (Opc) {
case WebAssembly::ARGUMENT_i32:
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
new file mode 100644
index 000000000000..b7b5b2a97c59
--- /dev/null
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.cpp
@@ -0,0 +1,124 @@
+//===- WebAssemblyMCTypeUtilities.cpp - WebAssembly Type Utility Functions-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements several utility functions for WebAssembly type parsing.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyMCTypeUtilities.h"
+#include "WebAssemblyMCTargetDesc.h"
+#include "llvm/ADT/StringSwitch.h"
+
+using namespace llvm;
+
+std::optional<wasm::ValType> WebAssembly::parseType(StringRef Type) {
+ // FIXME: can't use StringSwitch because wasm::ValType doesn't have a
+ // "invalid" value.
+ if (Type == "i32")
+ return wasm::ValType::I32;
+ if (Type == "i64")
+ return wasm::ValType::I64;
+ if (Type == "f32")
+ return wasm::ValType::F32;
+ if (Type == "f64")
+ return wasm::ValType::F64;
+ if (Type == "v128" || Type == "i8x16" || Type == "i16x8" || Type == "i32x4" ||
+ Type == "i64x2" || Type == "f32x4" || Type == "f64x2")
+ return wasm::ValType::V128;
+ if (Type == "funcref")
+ return wasm::ValType::FUNCREF;
+ if (Type == "externref")
+ return wasm::ValType::EXTERNREF;
+ return std::nullopt;
+}
+
+WebAssembly::BlockType WebAssembly::parseBlockType(StringRef Type) {
+ // Multivalue block types are handled separately in parseSignature
+ return StringSwitch<WebAssembly::BlockType>(Type)
+ .Case("i32", WebAssembly::BlockType::I32)
+ .Case("i64", WebAssembly::BlockType::I64)
+ .Case("f32", WebAssembly::BlockType::F32)
+ .Case("f64", WebAssembly::BlockType::F64)
+ .Case("v128", WebAssembly::BlockType::V128)
+ .Case("funcref", WebAssembly::BlockType::Funcref)
+ .Case("externref", WebAssembly::BlockType::Externref)
+ .Case("void", WebAssembly::BlockType::Void)
+ .Default(WebAssembly::BlockType::Invalid);
+}
+
+// We have various enums representing a subset of these types, use this
+// function to convert any of them to text.
+const char *WebAssembly::anyTypeToString(unsigned Type) {
+ switch (Type) {
+ case wasm::WASM_TYPE_I32:
+ return "i32";
+ case wasm::WASM_TYPE_I64:
+ return "i64";
+ case wasm::WASM_TYPE_F32:
+ return "f32";
+ case wasm::WASM_TYPE_F64:
+ return "f64";
+ case wasm::WASM_TYPE_V128:
+ return "v128";
+ case wasm::WASM_TYPE_FUNCREF:
+ return "funcref";
+ case wasm::WASM_TYPE_EXTERNREF:
+ return "externref";
+ case wasm::WASM_TYPE_FUNC:
+ return "func";
+ case wasm::WASM_TYPE_NORESULT:
+ return "void";
+ default:
+ return "invalid_type";
+ }
+}
+
+const char *WebAssembly::typeToString(wasm::ValType Type) {
+ return anyTypeToString(static_cast<unsigned>(Type));
+}
+
+std::string WebAssembly::typeListToString(ArrayRef<wasm::ValType> List) {
+ std::string S;
+ for (const auto &Type : List) {
+ if (&Type != &List[0])
+ S += ", ";
+ S += WebAssembly::typeToString(Type);
+ }
+ return S;
+}
+
+std::string WebAssembly::signatureToString(const wasm::WasmSignature *Sig) {
+ std::string S("(");
+ S += typeListToString(Sig->Params);
+ S += ") -> (";
+ S += typeListToString(Sig->Returns);
+ S += ")";
+ return S;
+}
+
+wasm::ValType WebAssembly::regClassToValType(unsigned RC) {
+ switch (RC) {
+ case WebAssembly::I32RegClassID:
+ return wasm::ValType::I32;
+ case WebAssembly::I64RegClassID:
+ return wasm::ValType::I64;
+ case WebAssembly::F32RegClassID:
+ return wasm::ValType::F32;
+ case WebAssembly::F64RegClassID:
+ return wasm::ValType::F64;
+ case WebAssembly::V128RegClassID:
+ return wasm::ValType::V128;
+ case WebAssembly::FUNCREFRegClassID:
+ return wasm::ValType::FUNCREF;
+ case WebAssembly::EXTERNREFRegClassID:
+ return wasm::ValType::EXTERNREF;
+ default:
+ llvm_unreachable("unexpected type");
+ }
+}
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.h
new file mode 100644
index 000000000000..18018dfc6d6f
--- /dev/null
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTypeUtilities.h
@@ -0,0 +1,73 @@
+//===-- WebAssemblyMCTypeUtilities - WebAssembly Type Utilities-*- C++ -*-====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the declaration of the WebAssembly-specific type parsing
+/// utility functions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTYPEUTILITIES_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTYPEUTILITIES_H
+
+#include "llvm/BinaryFormat/Wasm.h"
+
+namespace llvm {
+
+namespace WebAssembly {
+
+/// Used as immediate MachineOperands for block signatures
+enum class BlockType : unsigned {
+ Invalid = 0x00,
+ Void = 0x40,
+ I32 = unsigned(wasm::ValType::I32),
+ I64 = unsigned(wasm::ValType::I64),
+ F32 = unsigned(wasm::ValType::F32),
+ F64 = unsigned(wasm::ValType::F64),
+ V128 = unsigned(wasm::ValType::V128),
+ Externref = unsigned(wasm::ValType::EXTERNREF),
+ Funcref = unsigned(wasm::ValType::FUNCREF),
+ // Multivalue blocks (and other non-void blocks) are only emitted when the
+ // blocks will never be exited and are at the ends of functions (see
+ // WebAssemblyCFGStackify::fixEndsAtEndOfFunction). They also are never made
+ // to pop values off the stack, so the exact multivalue signature can always
+ // be inferred from the return type of the parent function in MCInstLower.
+ Multivalue = 0xffff,
+};
+
+inline bool isRefType(wasm::ValType Type) {
+ return Type == wasm::ValType::EXTERNREF || Type == wasm::ValType::FUNCREF;
+}
+
+// Convert ValType or a list/signature of ValTypes to a string.
+
+// Convert an unsinged integer, which can be among wasm::ValType enum, to its
+// type name string. If the input is not within wasm::ValType, returns
+// "invalid_type".
+const char *anyTypeToString(unsigned Type);
+const char *typeToString(wasm::ValType Type);
+// Convert a list of ValTypes into a string in the format of
+// "type0, type1, ... typeN"
+std::string typeListToString(ArrayRef<wasm::ValType> List);
+// Convert a wasm signature into a string in the format of
+// "(params) -> (results)", where params and results are a string of ValType
+// lists.
+std::string signatureToString(const wasm::WasmSignature *Sig);
+
+// Convert a register class ID to a wasm ValType.
+wasm::ValType regClassToValType(unsigned RC);
+
+// Convert StringRef to ValType / HealType / BlockType
+
+std::optional<wasm::ValType> parseType(StringRef Type);
+BlockType parseBlockType(StringRef Type);
+
+} // end namespace WebAssembly
+} // end namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 2da219d54c73..f389ee2f50d8 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -14,7 +14,7 @@
#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
-#include "Utils/WebAssemblyTypeUtilities.h"
+#include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSectionWasm.h"
#include "llvm/MC/MCSubtargetInfo.h"
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 522f6356c28b..72d36a251a91 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -16,8 +16,8 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H
#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/MC/MCStreamer.h"
-#include "llvm/Support/MachineValueType.h"
namespace llvm {
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 405712906c40..43c67b4b4749 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -91,6 +91,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(
return wasm::R_WASM_TYPE_INDEX_LEB;
case MCSymbolRefExpr::VK_None:
break;
+ case MCSymbolRefExpr::VK_WASM_FUNCINDEX:
+ return wasm::R_WASM_FUNCTION_INDEX_I32;
default:
report_fatal_error("unknown VariantKind");
break;
diff --git a/llvm/lib/Target/WebAssembly/README.txt b/llvm/lib/Target/WebAssembly/README.txt
index ab1cd8f0f84a..8dc2d7fcc733 100644
--- a/llvm/lib/Target/WebAssembly/README.txt
+++ b/llvm/lib/Target/WebAssembly/README.txt
@@ -17,7 +17,7 @@ includes standard libraries, tools, and packaging for producing WebAssembly
applications that can run in browsers and other environments.
wasi-sdk provides a more minimal C/C++ SDK based on clang, llvm and a libc based
-on musl, for producing WebAssemmbly applictions that use the WASI ABI.
+on musl, for producing WebAssembly applications that use the WASI ABI.
Rust provides WebAssembly support integrated into Cargo. There are two
main options:
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
index 998905402b39..bf5db09e05de 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.cpp
@@ -21,41 +21,6 @@
using namespace llvm;
-std::optional<wasm::ValType> WebAssembly::parseType(StringRef Type) {
- // FIXME: can't use StringSwitch because wasm::ValType doesn't have a
- // "invalid" value.
- if (Type == "i32")
- return wasm::ValType::I32;
- if (Type == "i64")
- return wasm::ValType::I64;
- if (Type == "f32")
- return wasm::ValType::F32;
- if (Type == "f64")
- return wasm::ValType::F64;
- if (Type == "v128" || Type == "i8x16" || Type == "i16x8" || Type == "i32x4" ||
- Type == "i64x2" || Type == "f32x4" || Type == "f64x2")
- return wasm::ValType::V128;
- if (Type == "funcref")
- return wasm::ValType::FUNCREF;
- if (Type == "externref")
- return wasm::ValType::EXTERNREF;
- return std::nullopt;
-}
-
-WebAssembly::BlockType WebAssembly::parseBlockType(StringRef Type) {
- // Multivalue block types are handled separately in parseSignature
- return StringSwitch<WebAssembly::BlockType>(Type)
- .Case("i32", WebAssembly::BlockType::I32)
- .Case("i64", WebAssembly::BlockType::I64)
- .Case("f32", WebAssembly::BlockType::F32)
- .Case("f64", WebAssembly::BlockType::F64)
- .Case("v128", WebAssembly::BlockType::V128)
- .Case("funcref", WebAssembly::BlockType::Funcref)
- .Case("externref", WebAssembly::BlockType::Externref)
- .Case("void", WebAssembly::BlockType::Void)
- .Default(WebAssembly::BlockType::Invalid);
-}
-
MVT WebAssembly::parseMVT(StringRef Type) {
return StringSwitch<MVT>(Type)
.Case("i32", MVT::i32)
@@ -72,56 +37,6 @@ MVT WebAssembly::parseMVT(StringRef Type) {
.Default(MVT::INVALID_SIMPLE_VALUE_TYPE);
}
-// We have various enums representing a subset of these types, use this
-// function to convert any of them to text.
-const char *WebAssembly::anyTypeToString(unsigned Type) {
- switch (Type) {
- case wasm::WASM_TYPE_I32:
- return "i32";
- case wasm::WASM_TYPE_I64:
- return "i64";
- case wasm::WASM_TYPE_F32:
- return "f32";
- case wasm::WASM_TYPE_F64:
- return "f64";
- case wasm::WASM_TYPE_V128:
- return "v128";
- case wasm::WASM_TYPE_FUNCREF:
- return "funcref";
- case wasm::WASM_TYPE_EXTERNREF:
- return "externref";
- case wasm::WASM_TYPE_FUNC:
- return "func";
- case wasm::WASM_TYPE_NORESULT:
- return "void";
- default:
- return "invalid_type";
- }
-}
-
-const char *WebAssembly::typeToString(wasm::ValType Type) {
- return anyTypeToString(static_cast<unsigned>(Type));
-}
-
-std::string WebAssembly::typeListToString(ArrayRef<wasm::ValType> List) {
- std::string S;
- for (const auto &Type : List) {
- if (&Type != &List[0])
- S += ", ";
- S += WebAssembly::typeToString(Type);
- }
- return S;
-}
-
-std::string WebAssembly::signatureToString(const wasm::WasmSignature *Sig) {
- std::string S("(");
- S += typeListToString(Sig->Params);
- S += ") -> (";
- S += typeListToString(Sig->Returns);
- S += ")";
- return S;
-}
-
wasm::ValType WebAssembly::toValType(MVT Type) {
switch (Type.SimpleTy) {
case MVT::i32:
@@ -148,34 +63,13 @@ wasm::ValType WebAssembly::toValType(MVT Type) {
}
}
-wasm::ValType WebAssembly::regClassToValType(unsigned RC) {
- switch (RC) {
- case WebAssembly::I32RegClassID:
- return wasm::ValType::I32;
- case WebAssembly::I64RegClassID:
- return wasm::ValType::I64;
- case WebAssembly::F32RegClassID:
- return wasm::ValType::F32;
- case WebAssembly::F64RegClassID:
- return wasm::ValType::F64;
- case WebAssembly::V128RegClassID:
- return wasm::ValType::V128;
- case WebAssembly::FUNCREFRegClassID:
- return wasm::ValType::FUNCREF;
- case WebAssembly::EXTERNREFRegClassID:
- return wasm::ValType::EXTERNREF;
- default:
- llvm_unreachable("unexpected type");
- }
-}
-
wasm::ValType WebAssembly::regClassToValType(const TargetRegisterClass *RC) {
assert(RC != nullptr);
return regClassToValType(RC->getID());
}
void WebAssembly::wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT,
- const SmallVector<MVT, 1> &VTs) {
+ const ArrayRef<MVT> &VTs) {
assert(!Sym->getType());
// Tables are represented as Arrays in LLVM IR therefore
@@ -183,13 +77,13 @@ void WebAssembly::wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT,
// that is a reference type.
wasm::ValType ValTy;
bool IsTable = false;
- if (GlobalVT->isArrayTy() &&
- WebAssembly::isRefType(GlobalVT->getArrayElementType())) {
+ if (GlobalVT->isArrayTy() && WebAssembly::isWebAssemblyReferenceType(
+ GlobalVT->getArrayElementType())) {
IsTable = true;
const Type *ElTy = GlobalVT->getArrayElementType();
- if (WebAssembly::isExternrefType(ElTy))
+ if (WebAssembly::isWebAssemblyExternrefType(ElTy))
ValTy = wasm::ValType::EXTERNREF;
- else if (WebAssembly::isFuncrefType(ElTy))
+ else if (WebAssembly::isWebAssemblyFuncrefType(ElTy))
ValTy = wasm::ValType::FUNCREF;
else
report_fatal_error("unhandled reference type");
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
index 33f3bf31595d..9f58d7582fab 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h
@@ -15,10 +15,12 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYTYPEUTILITIES_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_UTILS_WEBASSEMBLYTYPEUTILITIES_H
+#include "MCTargetDesc/WebAssemblyMCTypeUtilities.h"
#include "llvm/BinaryFormat/Wasm.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/WasmAddressSpaces.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/MC/MCSymbolWasm.h"
-#include "llvm/Support/MachineValueType.h"
namespace llvm {
@@ -26,99 +28,36 @@ class TargetRegisterClass;
namespace WebAssembly {
-/// Used as immediate MachineOperands for block signatures
-enum class BlockType : unsigned {
- Invalid = 0x00,
- Void = 0x40,
- I32 = unsigned(wasm::ValType::I32),
- I64 = unsigned(wasm::ValType::I64),
- F32 = unsigned(wasm::ValType::F32),
- F64 = unsigned(wasm::ValType::F64),
- V128 = unsigned(wasm::ValType::V128),
- Externref = unsigned(wasm::ValType::EXTERNREF),
- Funcref = unsigned(wasm::ValType::FUNCREF),
- // Multivalue blocks (and other non-void blocks) are only emitted when the
- // blocks will never be exited and are at the ends of functions (see
- // WebAssemblyCFGStackify::fixEndsAtEndOfFunction). They also are never made
- // to pop values off the stack, so the exact multivalue signature can always
- // be inferred from the return type of the parent function in MCInstLower.
- Multivalue = 0xffff,
-};
-
-enum WasmAddressSpace : unsigned {
- // Default address space, for pointers to linear memory (stack, heap, data).
- WASM_ADDRESS_SPACE_DEFAULT = 0,
- // A non-integral address space for pointers to named objects outside of
- // linear memory: WebAssembly globals or WebAssembly locals. Loads and stores
- // to these pointers are lowered to global.get / global.set or local.get /
- // local.set, as appropriate.
- WASM_ADDRESS_SPACE_VAR = 1,
- // A non-integral address space for externref values
- WASM_ADDRESS_SPACE_EXTERNREF = 10,
- // A non-integral address space for funcref values
- WASM_ADDRESS_SPACE_FUNCREF = 20,
-};
-
-inline bool isDefaultAddressSpace(unsigned AS) {
- return AS == WASM_ADDRESS_SPACE_DEFAULT;
-}
-inline bool isWasmVarAddressSpace(unsigned AS) {
- return AS == WASM_ADDRESS_SPACE_VAR;
-}
-inline bool isValidAddressSpace(unsigned AS) {
- return isDefaultAddressSpace(AS) || isWasmVarAddressSpace(AS);
-}
-inline bool isFuncrefType(const Type *Ty) {
- return isa<PointerType>(Ty) &&
- Ty->getPointerAddressSpace() ==
- WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF;
+/// Return true if this is a WebAssembly Externref Type.
+inline bool isWebAssemblyExternrefType(const Type *Ty) {
+ return Ty->getPointerAddressSpace() ==
+ WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF;
}
-inline bool isExternrefType(const Type *Ty) {
- return isa<PointerType>(Ty) &&
- Ty->getPointerAddressSpace() ==
- WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF;
-}
-inline bool isRefType(const Type *Ty) {
- return isFuncrefType(Ty) || isExternrefType(Ty);
+
+/// Return true if this is a WebAssembly Funcref Type.
+inline bool isWebAssemblyFuncrefType(const Type *Ty) {
+ return Ty->getPointerAddressSpace() ==
+ WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF;
}
-inline bool isRefType(wasm::ValType Type) {
- return Type == wasm::ValType::EXTERNREF || Type == wasm::ValType::FUNCREF;
+/// Return true if this is a WebAssembly Reference Type.
+inline bool isWebAssemblyReferenceType(const Type *Ty) {
+ return isWebAssemblyExternrefType(Ty) || isWebAssemblyFuncrefType(Ty);
}
// Convert StringRef to ValType / HealType / BlockType
-std::optional<wasm::ValType> parseType(StringRef Type);
-BlockType parseBlockType(StringRef Type);
MVT parseMVT(StringRef Type);
-// Convert ValType or a list/signature of ValTypes to a string.
-
-// Convert an unsinged integer, which can be among wasm::ValType enum, to its
-// type name string. If the input is not within wasm::ValType, returns
-// "invalid_type".
-const char *anyTypeToString(unsigned Type);
-const char *typeToString(wasm::ValType Type);
-// Convert a list of ValTypes into a string in the format of
-// "type0, type1, ... typeN"
-std::string typeListToString(ArrayRef<wasm::ValType> List);
-// Convert a wasm signature into a string in the format of
-// "(params) -> (results)", where params and results are a string of ValType
-// lists.
-std::string signatureToString(const wasm::WasmSignature *Sig);
-
// Convert a MVT into its corresponding wasm ValType.
wasm::ValType toValType(MVT Type);
-// Convert a register class ID to a wasm ValType.
-wasm::ValType regClassToValType(unsigned RC);
-
// Convert a register class to a wasm ValType.
wasm::ValType regClassToValType(const TargetRegisterClass *RC);
/// Sets a Wasm Symbol Type.
void wasmSymbolSetType(MCSymbolWasm *Sym, const Type *GlobalVT,
- const SmallVector<MVT, 1> &VTs);
+ const ArrayRef<MVT> &VTs);
} // end namespace WebAssembly
} // end namespace llvm
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp
index a1e0db692390..8d7fa4dc3dee 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.cpp
@@ -18,30 +18,6 @@
#include "llvm/MC/MCContext.h"
using namespace llvm;
-// Exception handling & setjmp-longjmp handling related options. These are
-// defined here to be shared between WebAssembly and its subdirectories.
-
-// Emscripten's asm.js-style exception handling
-cl::opt<bool> WebAssembly::WasmEnableEmEH(
- "enable-emscripten-cxx-exceptions",
- cl::desc("WebAssembly Emscripten-style exception handling"),
- cl::init(false));
-// Emscripten's asm.js-style setjmp/longjmp handling
-cl::opt<bool> WebAssembly::WasmEnableEmSjLj(
- "enable-emscripten-sjlj",
- cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"),
- cl::init(false));
-// Exception handling using wasm EH instructions
-cl::opt<bool>
- WebAssembly::WasmEnableEH("wasm-enable-eh",
- cl::desc("WebAssembly exception handling"),
- cl::init(false));
-// setjmp/longjmp handling using wasm EH instrutions
-cl::opt<bool>
- WebAssembly::WasmEnableSjLj("wasm-enable-sjlj",
- cl::desc("WebAssembly setjmp/longjmp handling"),
- cl::init(false));
-
// Function names in libc++abi and libunwind
const char *const WebAssembly::CxaBeginCatchFn = "__cxa_begin_catch";
const char *const WebAssembly::CxaRethrowFn = "__cxa_rethrow";
diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
index d0639208fda9..7f28fb1858a6 100644
--- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
+++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyUtilities.h
@@ -33,12 +33,6 @@ namespace WebAssembly {
bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI);
bool mayThrow(const MachineInstr &MI);
-// Exception handling / setjmp-longjmp handling command-line options
-extern cl::opt<bool> WasmEnableEmEH; // asm.js-style EH
-extern cl::opt<bool> WasmEnableEmSjLj; // asm.js-style SjLJ
-extern cl::opt<bool> WasmEnableEH; // EH using Wasm EH instructions
-extern cl::opt<bool> WasmEnableSjLj; // SjLj using Wasm EH instructions
-
// Exception-related function names
extern const char *const ClangCallTerminateFn;
extern const char *const CxaBeginCatchFn;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 60b1b3f5fc27..d492bec97d46 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -25,8 +25,10 @@
#include "WebAssemblyRegisterInfo.h"
#include "WebAssemblyRuntimeLibcallSignatures.h"
#include "WebAssemblyTargetMachine.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/BinaryFormat/Wasm.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -301,8 +303,8 @@ void WebAssemblyAsmPrinter::emitDecls(const Module &M) {
// only find symbols that have been used. Unused symbols from globals will
// not be found here.
MachineModuleInfoWasm &MMIW = MMI->getObjFileInfo<MachineModuleInfoWasm>();
- for (const auto &Name : MMIW.MachineSymbolsUsed) {
- auto *WasmSym = cast<MCSymbolWasm>(getOrCreateWasmSymbol(Name.getKey()));
+ for (StringRef Name : MMIW.MachineSymbolsUsed) {
+ auto *WasmSym = cast<MCSymbolWasm>(getOrCreateWasmSymbol(Name));
if (WasmSym->isFunction()) {
// TODO(wvo): is there any case where this overlaps with the call to
// emitFunctionType in the loop below?
@@ -438,6 +440,7 @@ void WebAssemblyAsmPrinter::emitEndOfAsmFile(Module &M) {
EmitProducerInfo(M);
EmitTargetFeatures(M);
+ EmitFunctionAttributes(M);
}
void WebAssemblyAsmPrinter::EmitProducerInfo(Module &M) {
@@ -556,6 +559,48 @@ void WebAssemblyAsmPrinter::EmitTargetFeatures(Module &M) {
OutStreamer->popSection();
}
+void WebAssemblyAsmPrinter::EmitFunctionAttributes(Module &M) {
+ auto V = M.getNamedGlobal("llvm.global.annotations");
+ if (!V)
+ return;
+
+ // Group all the custom attributes by name.
+ MapVector<StringRef, SmallVector<MCSymbol *, 4>> CustomSections;
+ const ConstantArray *CA = cast<ConstantArray>(V->getOperand(0));
+ for (Value *Op : CA->operands()) {
+ auto *CS = cast<ConstantStruct>(Op);
+ // The first field is a pointer to the annotated variable.
+ Value *AnnotatedVar = CS->getOperand(0)->stripPointerCasts();
+ // Only annotated functions are supported for now.
+ if (!isa<Function>(AnnotatedVar))
+ continue;
+ auto *F = cast<Function>(AnnotatedVar);
+
+ // The second field is a pointer to a global annotation string.
+ auto *GV = cast<GlobalVariable>(CS->getOperand(1)->stripPointerCasts());
+ StringRef AnnotationString;
+ getConstantStringInfo(GV, AnnotationString);
+ auto *Sym = cast<MCSymbolWasm>(getSymbol(F));
+ CustomSections[AnnotationString].push_back(Sym);
+ }
+
+ // Emit a custom section for each unique attribute.
+ for (const auto &[Name, Symbols] : CustomSections) {
+ MCSectionWasm *CustomSection = OutContext.getWasmSection(
+ ".custom_section.llvm.func_attr.annotate." + Name, SectionKind::getMetadata());
+ OutStreamer->pushSection();
+ OutStreamer->switchSection(CustomSection);
+
+ for (auto &Sym : Symbols) {
+ OutStreamer->emitValue(
+ MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_WASM_FUNCINDEX,
+ OutContext),
+ 4);
+ }
+ OutStreamer->popSection();
+ }
+}
+
void WebAssemblyAsmPrinter::emitConstantPool() {
emitDecls(*MMI->getModule());
assert(MF->getConstantPool()->getConstants().empty() &&
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
index 65d6ee415180..c30e0155c81e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.h
@@ -66,6 +66,7 @@ public:
void emitEndOfAsmFile(Module &M) override;
void EmitProducerInfo(Module &M);
void EmitTargetFeatures(Module &M);
+ void EmitFunctionAttributes(Module &M);
void emitSymbolType(const MCSymbolWasm *Sym);
void emitGlobalVariable(const GlobalVariable *GV) override;
void emitJumpTableInfo() override;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 70c187af73a5..cc8052352b38 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -1291,6 +1291,7 @@ bool WebAssemblyCFGStackify::fixCatchUnwindMismatches(MachineFunction &MF) {
// end_try
const auto *EHInfo = MF.getWasmEHFuncInfo();
+ assert(EHInfo);
SmallVector<const MachineBasicBlock *, 8> EHPadStack;
// For EH pads that have catch unwind mismatches, a map of <EH pad, its
// correct unwind destination>.
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp
index 9a6acd157a74..f3f54a5fb501 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugFixup.cpp
@@ -65,14 +65,14 @@ FunctionPass *llvm::createWebAssemblyDebugFixup() {
// Because Wasm cannot access values in LLVM virtual registers in the debugger,
// these dangling DBG_VALUEs in effect kill the effect of any previous DBG_VALUE
// associated with the variable, which will appear as "optimized out".
-static void nullifyDanglingDebugValues(MachineBasicBlock &MBB,
- const TargetInstrInfo *TII) {
+static void setDanglingDebugValuesUndef(MachineBasicBlock &MBB,
+ const TargetInstrInfo *TII) {
for (auto &MI : llvm::make_early_inc_range(MBB)) {
if (MI.isDebugValue() && MI.getDebugOperand(0).isReg() &&
!MI.isUndefDebugValue()) {
- LLVM_DEBUG(dbgs() << "Warning: dangling DBG_VALUE nullified: " << MI
+ LLVM_DEBUG(dbgs() << "Warning: dangling DBG_VALUE set to undef: " << MI
<< "\n");
- MI.getDebugOperand(0).setReg(Register());
+ MI.setDebugValueUndef();
}
}
}
@@ -154,7 +154,7 @@ bool WebAssemblyDebugFixup::runOnMachineFunction(MachineFunction &MF) {
assert(Stack.empty() &&
"WebAssemblyDebugFixup: Stack not empty at end of basic block!");
- nullifyDanglingDebugValues(MBB, TII);
+ setDanglingDebugValuesUndef(MBB, TII);
}
return true;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
index 55be64ad7da0..fd510f85a8a3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
@@ -12,52 +12,388 @@
//===----------------------------------------------------------------------===//
#include "WebAssemblyDebugValueManager.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/IR/DebugInfoMetadata.h"
using namespace llvm;
-WebAssemblyDebugValueManager::WebAssemblyDebugValueManager(
- MachineInstr *Instr) {
+WebAssemblyDebugValueManager::WebAssemblyDebugValueManager(MachineInstr *Def)
+ : Def(Def) {
// This code differs from MachineInstr::collectDebugValues in that it scans
- // the whole BB, not just contiguous DBG_VALUEs.
- if (!Instr->getOperand(0).isReg())
+ // the whole BB, not just contiguous DBG_VALUEs, until another definition to
+ // the same register is encountered.
+ if (!Def->getOperand(0).isReg())
return;
- CurrentReg = Instr->getOperand(0).getReg();
-
- MachineBasicBlock::iterator DI = *Instr;
- ++DI;
- for (MachineBasicBlock::iterator DE = Instr->getParent()->end(); DI != DE;
- ++DI) {
- if (DI->isDebugValue() &&
- DI->hasDebugOperandForReg(Instr->getOperand(0).getReg()))
- DbgValues.push_back(&*DI);
+ CurrentReg = Def->getOperand(0).getReg();
+
+ for (MachineBasicBlock::iterator MI = std::next(Def->getIterator()),
+ ME = Def->getParent()->end();
+ MI != ME; ++MI) {
+ // If another definition appears, stop
+ if (MI->definesRegister(CurrentReg))
+ break;
+ if (MI->isDebugValue() && MI->hasDebugOperandForReg(CurrentReg))
+ DbgValues.push_back(&*MI);
}
}
-void WebAssemblyDebugValueManager::move(MachineInstr *Insert) {
- MachineBasicBlock *MBB = Insert->getParent();
- for (MachineInstr *DBI : reverse(DbgValues))
- MBB->splice(Insert, DBI->getParent(), DBI);
+// Returns true if both A and B are the same CONST_I32/I64/F32/F64 instructions.
+// Doesn't include CONST_V128.
+static bool isSameScalarConst(const MachineInstr *A, const MachineInstr *B) {
+ if (A->getOpcode() != B->getOpcode() ||
+ !WebAssembly::isScalarConst(A->getOpcode()) ||
+ !WebAssembly::isScalarConst(B->getOpcode()))
+ return false;
+ const MachineOperand &OpA = A->getOperand(1), &OpB = B->getOperand(1);
+ if ((OpA.isImm() && OpB.isImm() && OpA.getImm() == OpB.getImm()) ||
+ (OpA.isFPImm() && OpB.isFPImm() && OpA.getFPImm() == OpB.getFPImm()) ||
+ (OpA.isGlobal() && OpB.isGlobal() && OpA.getGlobal() == OpB.getGlobal()))
+ return true;
+ return false;
}
-void WebAssemblyDebugValueManager::updateReg(unsigned Reg) {
- for (auto *DBI : DbgValues)
- for (auto &MO : DBI->getDebugOperandsForReg(CurrentReg))
- MO.setReg(Reg);
- CurrentReg = Reg;
+SmallVector<MachineInstr *, 1>
+WebAssemblyDebugValueManager::getSinkableDebugValues(
+ MachineInstr *Insert) const {
+ if (DbgValues.empty())
+ return {};
+ // DBG_VALUEs between Def and Insert
+ SmallVector<MachineInstr *, 8> DbgValuesInBetween;
+
+ if (Def->getParent() == Insert->getParent()) {
+ // When Def and Insert are within the same BB, check if Insert comes after
+ // Def, because we only support sinking.
+ bool DefFirst = false;
+ for (MachineBasicBlock::iterator MI = std::next(Def->getIterator()),
+ ME = Def->getParent()->end();
+ MI != ME; ++MI) {
+ if (&*MI == Insert) {
+ DefFirst = true;
+ break;
+ }
+ if (MI->isDebugValue())
+ DbgValuesInBetween.push_back(&*MI);
+ }
+ if (!DefFirst) // Not a sink
+ return {};
+
+ } else { // Def and Insert are in different BBs
+ // If Def and Insert are in different BBs, we only handle a simple case in
+ // which Insert's BB is a successor of Def's BB.
+ if (!Def->getParent()->isSuccessor(Insert->getParent()))
+ return {};
+
+ // Gather DBG_VALUEs between 'Def~Def BB's end' and
+ // 'Insert BB's begin~Insert'
+ for (MachineBasicBlock::iterator MI = std::next(Def->getIterator()),
+ ME = Def->getParent()->end();
+ MI != ME; ++MI) {
+ if (MI->isDebugValue())
+ DbgValuesInBetween.push_back(&*MI);
+ }
+ for (MachineBasicBlock::iterator MI = Insert->getParent()->begin(),
+ ME = Insert->getIterator();
+ MI != ME; ++MI) {
+ if (MI->isDebugValue())
+ DbgValuesInBetween.push_back(&*MI);
+ }
+ }
+
+ // Gather DebugVariables that are seen between Def and Insert, excluding our
+ // own DBG_VALUEs in DbgValues.
+ SmallDenseMap<DebugVariable, SmallVector<MachineInstr *, 2>>
+ SeenDbgVarToDbgValues;
+ for (auto *DV : DbgValuesInBetween) {
+ if (!llvm::is_contained(DbgValues, DV)) {
+ DebugVariable Var(DV->getDebugVariable(), DV->getDebugExpression(),
+ DV->getDebugLoc()->getInlinedAt());
+ SeenDbgVarToDbgValues[Var].push_back(DV);
+ }
+ }
+
+ // Gather sinkable DBG_VALUEs. We should not sink a DBG_VALUE if there is
+ // another DBG_VALUE between Def and Insert referring to the same
+ // DebugVariable. For example,
+ // %0 = someinst
+ // DBG_VALUE %0, !"a", !DIExpression() // Should not sink with %0
+ // %1 = anotherinst
+ // DBG_VALUE %1, !"a", !DIExpression()
+ // Where if %0 were to sink, the DBG_VAUE should not sink with it, as that
+ // would re-order assignments.
+ SmallVector<MachineInstr *, 1> SinkableDbgValues;
+ MachineRegisterInfo &MRI = Def->getParent()->getParent()->getRegInfo();
+ for (auto *DV : DbgValues) {
+ DebugVariable Var(DV->getDebugVariable(), DV->getDebugExpression(),
+ DV->getDebugLoc()->getInlinedAt());
+ auto It = SeenDbgVarToDbgValues.find(Var);
+ if (It == SeenDbgVarToDbgValues.end()) {
+ SinkableDbgValues.push_back(DV);
+ continue;
+ }
+ if (!WebAssembly::isScalarConst(Def->getOpcode()))
+ continue;
+ auto &OverlappingDbgValues = It->second;
+ bool Sinkable = true;
+ for (auto *OverlappingDV : OverlappingDbgValues) {
+ MachineOperand &DbgOp = OverlappingDV->getDebugOperand(0);
+ if (!DbgOp.isReg()) {
+ Sinkable = false;
+ break;
+ }
+ Register OtherReg = DbgOp.getReg();
+ MachineInstr *OtherDef = MRI.getUniqueVRegDef(OtherReg);
+ // We have an exception to allow encoutering other DBG_VALUEs with the
+ // smae DebugVariables, only when they are referring to the same scalar
+ // CONST instruction. For example,
+ // %0 = CONST_I32 1
+ // DBG_VALUE %0, !"a", !DIExpression() // Can sink with %0
+ // %1 = CONST_I32 1
+ // DBG_VALUE %1, !"a", !DIExpression()
+ // When %0 were to be sunk/cloneed, the DBG_VALUE can be sunk/cloned with
+ // it because even though the second DBG_VALUE refers to the same
+ // DebugVariable, its value in effect is the same CONST instruction.
+ //
+ // This is to allow a case that can happen with RegStackify's
+ // "rematerializeCheapDef". For example, we have this program with two
+ // BBs:
+ // bb0:
+ // %0 = CONST_I32 1
+ // DBG_VALUE %0, !"a", ...
+ // ...
+ // INST0 ..., $0 ...
+ // bb1:
+ // INST1 ..., $0 ...
+ // INST2 ..., $0 ...
+ //
+ // We process bb0 first. Because %0 is used multiple times, %0 is cloned
+ // before INST0:
+ // bb0:
+ // %0 = CONST_I32 1
+ // DBG_VALUE %0, !"a", ...
+ // ...
+ // %1 = CONST_I32 1
+ // DBG_VALUE %1, !"a", ...
+ // INST0 ..., $1 ...
+ //
+ // And when we process bb1, we clone %0 and its DBG_VALUE again:
+ // bb0:
+ // %0 = CONST_I32 1
+ // DBG_VALUE %0, !"a", ...
+ // ...
+ // %1 = CONST_I32 1
+ // DBG_VALUE %1, !"a", ...
+ // INST0 ..., $1 ...
+ // bb1:
+ // %2 = CONST_I32 1
+ // DBG_VALUE %2, !"a", ... // !!!
+ // INST1 ..., $2 ...
+ // %3 = CONST_I32 1
+ // DBG_VALUE %3, !"a", ... // !!!
+ // INST2 ..., $3 ...
+ //
+ // But (without this exception) the cloned DBG_VALUEs marked with !!! are
+ // not possible to be cloned, because there is a previously cloned
+ // 'DBG_VALUE %1, !"a"' at the end of bb0 referring to the same
+ // DebugVariable "a". But in this case they are OK to be cloned, because
+ // the interfering DBG_VALUE is pointing to the same 'CONST_I32 1',
+ // because it was cloned from the same instruction.
+ if (!OtherDef || !isSameScalarConst(Def, OtherDef)) {
+ Sinkable = false;
+ break;
+ }
+ }
+ if (Sinkable)
+ SinkableDbgValues.push_back(DV);
+ }
+ return SinkableDbgValues;
+}
+
+// Returns true if the insertion point is the same as the current place.
+// Following DBG_VALUEs for 'Def' are ignored.
+bool WebAssemblyDebugValueManager::isInsertSamePlace(
+ MachineInstr *Insert) const {
+ if (Def->getParent() != Insert->getParent())
+ return false;
+ for (MachineBasicBlock::iterator MI = std::next(Def->getIterator()),
+ ME = Insert;
+ MI != ME; ++MI) {
+ if (!llvm::is_contained(DbgValues, MI)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Returns true if any instruction in MBB has the same debug location as DL.
+// Also returns true if DL is an empty location.
+static bool hasSameDebugLoc(const MachineBasicBlock *MBB, DebugLoc DL) {
+ for (const auto &MI : *MBB)
+ if (MI.getDebugLoc() == DL)
+ return true;
+ return false;
+}
+
+// Sink 'Def', and also sink its eligible DBG_VALUEs to the place before
+// 'Insert'. Convert the original DBG_VALUEs into undefs.
+//
+// For DBG_VALUEs to sink properly, if 'Def' and 'Insert' are within the same
+// BB, 'Insert' should be below 'Def'; if they are in different BBs, 'Insert'
+// should be in one of 'Def's BBs successors. Def will be sunk regardless of the
+// location.
+//
+// This DebugValueManager's new Def and DbgValues will be updated to the newly
+// sinked Def + DBG_VALUEs.
+void WebAssemblyDebugValueManager::sink(MachineInstr *Insert) {
+ // In case Def is requested to be sunk to
+ // the same place, we don't need to do anything. If we actually do the sink,
+ // it will create unnecessary undef DBG_VALUEs. For example, if the original
+ // code is:
+ // %0 = someinst // Def
+ // DBG_VALUE %0, ...
+ // %1 = anotherinst // Insert
+ //
+ // If we actually sink %0 and the following DBG_VALUE and setting the original
+ // DBG_VALUE undef, the result will be:
+ // DBG_VALUE %noreg, ... // Unnecessary!
+ // %0 = someinst // Def
+ // DBG_VALUE %0, ...
+ // %1 = anotherinst // Insert
+ if (isInsertSamePlace(Insert))
+ return;
+
+ MachineBasicBlock *MBB = Insert->getParent();
+ MachineFunction *MF = MBB->getParent();
+
+ // Get the list of sinkable DBG_VALUEs. This should be done before sinking
+ // Def, because we need to examine instructions between Def and Insert.
+ SmallVector<MachineInstr *, 1> SinkableDbgValues =
+ getSinkableDebugValues(Insert);
+
+ // Sink Def first.
+ //
+ // When moving to a different BB, we preserve the debug loc only if the
+ // destination BB contains the same location. See
+ // https://llvm.org/docs/HowToUpdateDebugInfo.html#when-to-preserve-an-instruction-location.
+ if (Def->getParent() != MBB && !hasSameDebugLoc(MBB, Def->getDebugLoc()))
+ Def->setDebugLoc(DebugLoc());
+ MBB->splice(Insert, Def->getParent(), Def);
+
+ if (DbgValues.empty())
+ return;
+
+ // Clone sinkable DBG_VALUEs and insert them.
+ SmallVector<MachineInstr *, 1> NewDbgValues;
+ for (MachineInstr *DV : SinkableDbgValues) {
+ MachineInstr *Clone = MF->CloneMachineInstr(DV);
+ MBB->insert(Insert, Clone);
+ NewDbgValues.push_back(Clone);
+ }
+
+ // When sinking a Def and its DBG_VALUEs, we shouldn't just remove the
+ // original DBG_VALUE instructions; we should set them to undef not to create
+ // an impossible combination of variable assignments in the original program.
+ // For example, this is the original program in order:
+ // %0 = CONST_I32 0
+ // DBG_VALUE %0, !"a", !DIExpression() // a = 0, b = ?
+ // %1 = CONST_I32 1
+ // DBG_VALUE %1, !"b", !DIExpression() // a = 0, b = 1
+ // %2 = CONST_I32 2
+ // DBG_VALUE %2, !"a", !DIExpression() // a = 2, b = 1
+ // %3 = CONST_I32 3
+ // DBG_VALUE %3, !"b", !DIExpression() // a = 2, b = 3
+ //
+ // If %2 were to sink below %3, if we just sink DBG_VALUE %1 with it, the
+ // debug info will show the variable "b" is updated to 2, creating the
+ // variable assignment combination of (a = 0, b = 3), which is not possible in
+ // the original program:
+ // %0 = CONST_I32 0
+ // DBG_VALUE %0, !"a", !DIExpression() // a = 0, b = ?
+ // %1 = CONST_I32 1
+ // DBG_VALUE %1, !"b", !DIExpression() // a = 0, b = 1
+ // %3 = CONST_I32 3
+ // DBG_VALUE %3, !"b", !DIExpression() // a = 0, b = 3 (Incorrect!)
+ // %2 = CONST_I32 2
+ // DBG_VALUE %2, !"a", !DIExpression() // a = 2, b = 3
+ //
+ // To fix this,we leave an undef DBG_VALUE in its original place, so that the
+ // result will be
+ // %0 = CONST_I32 0
+ // DBG_VALUE %0, !"a", !DIExpression() // a = 0, b = ?
+ // %1 = CONST_I32 1
+ // DBG_VALUE %1, !"b", !DIExpression() // a = 0, b = 1
+ // DBG_VALUE $noreg, !"a", !DIExpression() // a = ?, b = 1
+ // %3 = CONST_I32 3
+ // DBG_VALUE %3, !"b", !DIExpression() // a = ?, b = 3
+ // %2 = CONST_I32 2
+ // DBG_VALUE %2, !"a", !DIExpression() // a = 2, b = 3
+ // Now in the middle "a" will be shown as "optimized out", but it wouldn't
+ // show the impossible combination of (a = 0, b = 3).
+ for (MachineInstr *DV : DbgValues)
+ DV->setDebugValueUndef();
+
+ DbgValues.swap(NewDbgValues);
}
-void WebAssemblyDebugValueManager::clone(MachineInstr *Insert,
- unsigned NewReg) {
+// Clone 'Def', and also clone its eligible DBG_VALUEs to the place before
+// 'Insert'.
+//
+// For DBG_VALUEs to be cloned properly, if 'Def' and 'Insert' are within the
+// same BB, 'Insert' should be below 'Def'; if they are in different BBs,
+// 'Insert' should be in one of 'Def's BBs successors. Def will be cloned
+// regardless of the location.
+//
+// If NewReg is not $noreg, the newly cloned DBG_VALUEs will have the new
+// register as its operand.
+void WebAssemblyDebugValueManager::cloneSink(MachineInstr *Insert,
+ Register NewReg,
+ bool CloneDef) const {
MachineBasicBlock *MBB = Insert->getParent();
MachineFunction *MF = MBB->getParent();
- for (MachineInstr *DBI : reverse(DbgValues)) {
- MachineInstr *Clone = MF->CloneMachineInstr(DBI);
- for (auto &MO : Clone->getDebugOperandsForReg(CurrentReg))
- MO.setReg(NewReg);
+
+ SmallVector<MachineInstr *> SinkableDbgValues =
+ getSinkableDebugValues(Insert);
+
+ // Clone Def first.
+ if (CloneDef) {
+ MachineInstr *Clone = MF->CloneMachineInstr(Def);
+ // When cloning to a different BB, we preserve the debug loc only if the
+ // destination BB contains the same location. See
+ // https://llvm.org/docs/HowToUpdateDebugInfo.html#when-to-preserve-an-instruction-location.
+ if (Def->getParent() != MBB && !hasSameDebugLoc(MBB, Def->getDebugLoc()))
+ Clone->setDebugLoc(DebugLoc());
+ if (NewReg != CurrentReg && NewReg.isValid())
+ Clone->getOperand(0).setReg(NewReg);
+ MBB->insert(Insert, Clone);
+ }
+
+ if (DbgValues.empty())
+ return;
+
+ // Clone sinkable DBG_VALUEs and insert them.
+ SmallVector<MachineInstr *, 1> NewDbgValues;
+ for (MachineInstr *DV : SinkableDbgValues) {
+ MachineInstr *Clone = MF->CloneMachineInstr(DV);
MBB->insert(Insert, Clone);
+ NewDbgValues.push_back(Clone);
+ }
+
+ if (NewReg != CurrentReg && NewReg.isValid())
+ for (auto *DBI : NewDbgValues)
+ for (auto &MO : DBI->getDebugOperandsForReg(CurrentReg))
+ MO.setReg(NewReg);
+}
+
+// Update the register for Def and DBG_VALUEs.
+void WebAssemblyDebugValueManager::updateReg(Register Reg) {
+ if (Reg != CurrentReg && Reg.isValid()) {
+ for (auto *DBI : DbgValues)
+ for (auto &MO : DBI->getDebugOperandsForReg(CurrentReg))
+ MO.setReg(Reg);
+ CurrentReg = Reg;
+ Def->getOperand(0).setReg(Reg);
}
}
@@ -70,3 +406,10 @@ void WebAssemblyDebugValueManager::replaceWithLocal(unsigned LocalId) {
MO.ChangeToTargetIndex(IndexType, LocalId);
}
}
+
+// Remove Def, and set its DBG_VALUEs to undef.
+void WebAssemblyDebugValueManager::removeDef() {
+ Def->removeFromParent();
+ for (MachineInstr *DV : DbgValues)
+ DV->setDebugValueUndef();
+}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
index c2dd56909304..9ef3da758947 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h
@@ -9,6 +9,9 @@
/// \file
/// This file contains the declaration of the WebAssembly-specific
/// manager for DebugValues associated with the specific MachineInstr.
+/// This pass currently does not handle DBG_VALUE_LISTs; they are assumed to
+/// have been set to undef in NullifyDebugValueLists pass.
+/// TODO Handle DBG_VALUE_LIST
///
//===----------------------------------------------------------------------===//
@@ -16,22 +19,37 @@
#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYDEBUGVALUEMANAGER_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/Register.h"
namespace llvm {
class MachineInstr;
class WebAssemblyDebugValueManager {
- SmallVector<MachineInstr *, 2> DbgValues;
- unsigned CurrentReg;
+ MachineInstr *Def;
+ SmallVector<MachineInstr *, 1> DbgValues;
+ Register CurrentReg;
+ SmallVector<MachineInstr *, 1>
+ getSinkableDebugValues(MachineInstr *Insert) const;
+ bool isInsertSamePlace(MachineInstr *Insert) const;
public:
- WebAssemblyDebugValueManager(MachineInstr *Instr);
-
- void move(MachineInstr *Insert);
- void updateReg(unsigned Reg);
- void clone(MachineInstr *Insert, unsigned NewReg);
+ WebAssemblyDebugValueManager(MachineInstr *Def);
+
+ // Sink 'Def', and also sink its eligible DBG_VALUEs to the place before
+ // 'Insert'. Convert the original DBG_VALUEs into undefs.
+ void sink(MachineInstr *Insert);
+ // Clone 'Def' (optionally), and also clone its eligible DBG_VALUEs to the
+ // place before 'Insert'.
+ void cloneSink(MachineInstr *Insert, Register NewReg = Register(),
+ bool CloneDef = true) const;
+ // Update the register for Def and DBG_VALUEs.
+ void updateReg(Register Reg);
+ // Replace the current register in DBG_VALUEs with the given LocalId target
+ // index.
void replaceWithLocal(unsigned LocalId);
+ // Remove Def, and set its DBG_VALUEs to undef.
+ void removeDef();
};
} // end namespace llvm
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
index 7e63b6b97632..ab3512cfd640 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExceptionInfo.cpp
@@ -121,6 +121,7 @@ void WebAssemblyExceptionInfo::recalculate(
// and A's unwind destination is B and B's is C. When we visit B before A, we
// end up extracting C only out of B but not out of A.
const auto *EHInfo = MF.getWasmEHFuncInfo();
+ assert(EHInfo);
SmallVector<std::pair<WebAssemblyException *, WebAssemblyException *>>
UnwindWEVec;
for (auto *DomNode : depth_first(&MDT)) {
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
index eeec0fc671cc..84fd34d73b63 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -267,15 +267,42 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
// Replace tee instructions with local.tee. The difference is that tee
// instructions have two defs, while local.tee instructions have one def
// and an index of a local to write to.
+ //
+ // - Before:
+ // TeeReg, Reg = TEE DefReg
+ // INST ..., TeeReg, ...
+ // INST ..., Reg, ...
+ // INST ..., Reg, ...
+ // * DefReg: may or may not be stackified
+ // * Reg: not stackified
+ // * TeeReg: stackified
+ //
+ // - After (when DefReg was already stackified):
+ // TeeReg = LOCAL_TEE LocalId1, DefReg
+ // INST ..., TeeReg, ...
+ // INST ..., Reg, ...
+ // INST ..., Reg, ...
+ // * Reg: mapped to LocalId1
+ // * TeeReg: stackified
+ //
+ // - After (when DefReg was not already stackified):
+ // NewReg = LOCAL_GET LocalId1
+ // TeeReg = LOCAL_TEE LocalId2, NewReg
+ // INST ..., TeeReg, ...
+ // INST ..., Reg, ...
+ // INST ..., Reg, ...
+ // * DefReg: mapped to LocalId1
+ // * Reg: mapped to LocalId2
+ // * TeeReg: stackified
if (WebAssembly::isTee(MI.getOpcode())) {
assert(MFI.isVRegStackified(MI.getOperand(0).getReg()));
assert(!MFI.isVRegStackified(MI.getOperand(1).getReg()));
- Register OldReg = MI.getOperand(2).getReg();
- const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
+ Register DefReg = MI.getOperand(2).getReg();
+ const TargetRegisterClass *RC = MRI.getRegClass(DefReg);
// Stackify the input if it isn't stackified yet.
- if (!MFI.isVRegStackified(OldReg)) {
- unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, OldReg);
+ if (!MFI.isVRegStackified(DefReg)) {
+ unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, DefReg);
Register NewReg = MRI.createVirtualRegister(RC);
unsigned Opc = getLocalGetOpcode(RC);
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc), NewReg)
@@ -352,7 +379,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, OldReg);
// If this register operand is tied to another operand, we can't
// change it to an immediate. Untie it first.
- MI.untieRegOperand(MI.getOperandNo(&MO));
+ MI.untieRegOperand(MO.getOperandNo());
MO.ChangeToImmediate(LocalId);
continue;
}
@@ -369,7 +396,7 @@ bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
if (MI.isInlineAsm()) {
unsigned LocalId = getLocalId(Reg2Local, MFI, CurLocal, OldReg);
// Untie it first if this reg operand is tied to another operand.
- MI.untieRegOperand(MI.getOperandNo(&MO));
+ MI.untieRegOperand(MO.getOperandNo());
MO.ChangeToImmediate(LocalId);
continue;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index df79e55ce4b6..9aacddb0187e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -249,8 +249,22 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
SmallVector<SDValue, 16> Ops;
for (size_t i = 1; i < Node->getNumOperands(); ++i) {
SDValue Op = Node->getOperand(i);
- if (i == 1 && Op->getOpcode() == WebAssemblyISD::Wrapper)
- Op = Op->getOperand(0);
+ // Remove the wrapper when the call target is a function, an external
+ // symbol (which will be lowered to a library function), or an alias of
+ // a function. If the target is not a function/external symbol, we
+ // shouldn't remove the wrapper, because we cannot call it directly and
+ // instead we want it to be loaded with a CONST instruction and called
+ // with a call_indirect later.
+ if (i == 1 && Op->getOpcode() == WebAssemblyISD::Wrapper) {
+ SDValue NewOp = Op->getOperand(0);
+ if (auto *GlobalOp = dyn_cast<GlobalAddressSDNode>(NewOp.getNode())) {
+ if (isa<Function>(
+ GlobalOp->getGlobal()->stripPointerCastsAndAliases()))
+ Op = NewOp;
+ } else if (isa<ExternalSymbolSDNode>(NewOp.getNode())) {
+ Op = NewOp;
+ }
+ }
Ops.push_back(Op);
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 94544800a6fb..f00d02ad4190 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -125,8 +125,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
setOperationAction(Op, T, Expand);
// Note supported floating-point library function operators that otherwise
// default to expand.
- for (auto Op :
- {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, ISD::FRINT})
+ for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
+ ISD::FRINT, ISD::FROUNDEVEN})
setOperationAction(Op, T, Legal);
// Support minimum and maximum, which otherwise default to expand.
setOperationAction(ISD::FMINIMUM, T, Legal);
@@ -157,6 +157,12 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// SIMD-specific configuration
if (Subtarget->hasSIMD128()) {
+ // Combine vector mask reductions into alltrue/anytrue
+ setTargetDAGCombine(ISD::SETCC);
+
+ // Convert vector to integer bitcasts to bitmask
+ setTargetDAGCombine(ISD::BITCAST);
+
// Hoist bitcasts out of shuffles
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
@@ -196,7 +202,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Support splatting
for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
- MVT::v2f64})
+ MVT::v2f64})
setOperationAction(ISD::SPLAT_VECTOR, T, Legal);
// Custom lowering since wasm shifts must have a scalar shift amount
@@ -241,7 +247,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// Expand float operations supported for scalars but not SIMD
for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
- ISD::FEXP, ISD::FEXP2, ISD::FRINT})
+ ISD::FEXP, ISD::FEXP2})
for (auto T : {MVT::v4f32, MVT::v2f64})
setOperationAction(Op, T, Expand);
@@ -258,6 +264,12 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
// But saturating fp_to_int converstions are
for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
setOperationAction(Op, MVT::v4i32, Custom);
+
+ // Support vector extending
+ for (auto T : MVT::integer_fixedlen_vector_valuetypes()) {
+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, T, Custom);
+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, T, Custom);
+ }
}
// As a special case, these operators use the type to mean the type to
@@ -534,11 +546,12 @@ LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
- bool IsIndirect = CallParams.getOperand(0).isReg();
+ bool IsIndirect =
+ CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
bool IsFuncrefCall = false;
- if (IsIndirect) {
+ if (IsIndirect && CallParams.getOperand(0).isReg()) {
Register Reg = CallParams.getOperand(0).getReg();
const MachineFunction *MF = BB->getParent();
const MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -1201,8 +1214,8 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Lastly, if this is a call to a funcref we need to add an instruction
// table.set to the chain and transform the call.
- if (CLI.CB &&
- WebAssembly::isFuncrefType(CLI.CB->getCalledOperand()->getType())) {
+ if (CLI.CB && WebAssembly::isWebAssemblyFuncrefType(
+ CLI.CB->getCalledOperand()->getType())) {
// In the absence of function references proposal where a funcref call is
// lowered to call_ref, using reference types we generate a table.set to set
// the funcref to a special table used solely for this purpose, followed by
@@ -1373,6 +1386,11 @@ void WebAssemblyTargetLowering::ReplaceNodeResults(
// SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
// illegal type.
break;
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ // Do not add any results, signifying that N should not be custom lowered.
+ // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
+ break;
default:
llvm_unreachable(
"ReplaceNodeResults not implemented for this op for WebAssembly!");
@@ -1423,6 +1441,9 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
return LowerIntrinsic(Op, DAG);
case ISD::SIGN_EXTEND_INREG:
return LowerSIGN_EXTEND_INREG(Op, DAG);
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ return LowerEXTEND_VECTOR_INREG(Op, DAG);
case ISD::BUILD_VECTOR:
return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE:
@@ -1822,7 +1843,8 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
if (MaskIdx.isUndef() ||
cast<ConstantSDNode>(MaskIdx.getNode())->getZExtValue() >= 32) {
- Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32);
+ bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
+ Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
} else {
Ops[OpIdx++] = MaskIdx;
}
@@ -1875,6 +1897,48 @@ WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
Op.getOperand(1));
}
+SDValue
+WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+
+ if (SrcVT.getVectorElementType() == MVT::i1 ||
+ SrcVT.getVectorElementType() == MVT::i64)
+ return SDValue();
+
+ assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
+ "Unexpected extension factor.");
+ unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
+
+ if (Scale != 2 && Scale != 4 && Scale != 8)
+ return SDValue();
+
+ unsigned Ext;
+ switch (Op.getOpcode()) {
+ case ISD::ZERO_EXTEND_VECTOR_INREG:
+ Ext = WebAssemblyISD::EXTEND_LOW_U;
+ break;
+ case ISD::SIGN_EXTEND_VECTOR_INREG:
+ Ext = WebAssemblyISD::EXTEND_LOW_S;
+ break;
+ }
+
+ SDValue Ret = Src;
+ while (Scale != 1) {
+ Ret = DAG.getNode(Ext, DL,
+ Ret.getValueType()
+ .widenIntegerVectorElementType(*DAG.getContext())
+ .getHalfNumVectorElementsVT(*DAG.getContext()),
+ Ret);
+ Scale /= 2;
+ }
+ assert(Ret.getValueType() == VT);
+ return Ret;
+}
+
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
if (Op.getValueType() != MVT::v2f64)
@@ -2150,7 +2214,8 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
assert((LaneBits == 64 || Val >= -(1ll << (LaneBits - 1))) &&
"Unexpected out of bounds negative value");
if (Const && LaneBits != 64 && Val > (1ll << (LaneBits - 1)) - 1) {
- auto NewVal = ((uint64_t)Val % (1ll << LaneBits)) - (1ll << LaneBits);
+ uint64_t Mask = (1ll << LaneBits) - 1;
+ auto NewVal = (((uint64_t)Val & Mask) - (1ll << LaneBits)) & Mask;
ConstLanes.push_back(DAG.getConstant(NewVal, SDLoc(Lane), LaneT));
} else {
ConstLanes.push_back(Lane);
@@ -2240,7 +2305,7 @@ WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
SelectionDAG &DAG) const {
// Allow constant lane indices, expand variable lane indices
SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
- if (isa<ConstantSDNode>(IdxNode) || IdxNode->isUndef()) {
+ if (isa<ConstantSDNode>(IdxNode)) {
// Ensure the index type is i32 to match the tablegen patterns
uint64_t Idx = cast<ConstantSDNode>(IdxNode)->getZExtValue();
SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
@@ -2287,10 +2352,43 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
// Only manually lower vector shifts
assert(Op.getSimpleValueType().isVector());
- auto ShiftVal = DAG.getSplatValue(Op.getOperand(1));
+ uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
+ auto ShiftVal = Op.getOperand(1);
+
+ // Try to skip bitmask operation since it is implied inside shift instruction
+ auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
+ if (MaskOp.getOpcode() != ISD::AND)
+ return MaskOp;
+ SDValue LHS = MaskOp.getOperand(0);
+ SDValue RHS = MaskOp.getOperand(1);
+ if (MaskOp.getValueType().isVector()) {
+ APInt MaskVal;
+ if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
+ std::swap(LHS, RHS);
+
+ if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
+ MaskVal == MaskBits)
+ MaskOp = LHS;
+ } else {
+ if (!isa<ConstantSDNode>(RHS.getNode()))
+ std::swap(LHS, RHS);
+
+ auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
+ if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
+ MaskOp = LHS;
+ }
+
+ return MaskOp;
+ };
+
+ // Skip vector and operation
+ ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
+ ShiftVal = DAG.getSplatValue(ShiftVal);
if (!ShiftVal)
return unrollVectorShift(Op, DAG);
+ // Skip scalar and operation
+ ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
// Use anyext because none of the high bits can affect the shift
ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
@@ -2656,12 +2754,92 @@ static SDValue performTruncateCombine(SDNode *N,
return truncateVectorWithNARROW(OutVT, In, DL, DAG);
}
+static SDValue performBitcastCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ auto &DAG = DCI.DAG;
+ SDLoc DL(N);
+ SDValue Src = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT SrcVT = Src.getValueType();
+
+ // bitcast <N x i1> to iN
+ // ==> bitmask
+ if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&
+ SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1) {
+ unsigned NumElts = SrcVT.getVectorNumElements();
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return SDValue();
+ EVT Width = MVT::getIntegerVT(128 / NumElts);
+ return DAG.getZExtOrTrunc(
+ DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+ {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
+ DAG.getSExtOrTrunc(N->getOperand(0), DL,
+ SrcVT.changeVectorElementType(Width))}),
+ DL, VT);
+ }
+
+ return SDValue();
+}
+
+static SDValue performSETCCCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ auto &DAG = DCI.DAG;
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ // setcc (iN (bitcast (vNi1 X))), 0, ne
+ // ==> any_true (vNi1 X)
+ // setcc (iN (bitcast (vNi1 X))), 0, eq
+ // ==> xor (any_true (vNi1 X)), -1
+ // setcc (iN (bitcast (vNi1 X))), -1, eq
+ // ==> all_true (vNi1 X)
+ // setcc (iN (bitcast (vNi1 X))), -1, ne
+ // ==> xor (all_true (vNi1 X)), -1
+ if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ (isNullConstant(RHS) || isAllOnesConstant(RHS)) &&
+ LHS->getOpcode() == ISD::BITCAST) {
+ EVT FromVT = LHS->getOperand(0).getValueType();
+ if (FromVT.isFixedLengthVector() &&
+ FromVT.getVectorElementType() == MVT::i1) {
+ int Intrin = isNullConstant(RHS) ? Intrinsic::wasm_anytrue
+ : Intrinsic::wasm_alltrue;
+ unsigned NumElts = FromVT.getVectorNumElements();
+ if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
+ return SDValue();
+ EVT Width = MVT::getIntegerVT(128 / NumElts);
+ SDValue Ret = DAG.getZExtOrTrunc(
+ DAG.getNode(
+ ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+ {DAG.getConstant(Intrin, DL, MVT::i32),
+ DAG.getSExtOrTrunc(LHS->getOperand(0), DL,
+ FromVT.changeVectorElementType(Width))}),
+ DL, MVT::i1);
+ if ((isNullConstant(RHS) && (Cond == ISD::SETEQ)) ||
+ (isAllOnesConstant(RHS) && (Cond == ISD::SETNE))) {
+ Ret = DAG.getNOT(DL, Ret, MVT::i1);
+ }
+ return DAG.getZExtOrTrunc(Ret, DL, VT);
+ }
+ }
+
+ return SDValue();
+}
+
SDValue
WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default:
return SDValue();
+ case ISD::BITCAST:
+ return performBitcastCombine(N, DCI);
+ case ISD::SETCC:
+ return performSETCCCombine(N, DCI);
case ISD::VECTOR_SHUFFLE:
return performVECTOR_SHUFFLECombine(N, DCI);
case ISD::SIGN_EXTEND:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
index f70f85fe6ddd..ecf5d5b1ea5d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
@@ -131,6 +131,7 @@ private:
SDValue LowerCopyToReg(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerIntrinsic(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
@@ -140,11 +141,6 @@ private:
SDValue LowerLoad(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerStore(SDValue Op, SelectionDAG &DAG) const;
- // Helper for LoadLoad and LowerStore
- bool MatchTableForLowering(SelectionDAG &DAG, const SDLoc &DL,
- const SDValue &Base, GlobalAddressSDNode *&GA,
- SDValue &Idx) const;
-
// Custom DAG combine hooks
SDValue
PerformDAGCombine(SDNode *N,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index 6a123f8f4030..ca9a5ef9dda1 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -73,7 +73,7 @@ defm RET_CALL :
"return_call \t$callee", "return_call\t$callee", 0x12>,
Requires<[HasTailCall]>;
-let isReturn = 1 in
+let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in
defm RET_CALL_INDIRECT :
I<(outs), (ins TypeIndex:$type, table32_op:$table, variable_ops),
(outs), (ins TypeIndex:$type, table32_op:$table), [],
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
index 104f5f7d2e68..cc9a9f86f683 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -76,6 +76,10 @@ def : Pat<(fcopysign F32:$lhs, F64:$rhs),
def : Pat<(frint f32:$src), (NEAREST_F32 f32:$src)>;
def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>;
+// WebAssembly always rounds ties-to-even, so map froundeven to fnearbyint.
+def : Pat<(froundeven f32:$src), (NEAREST_F32 f32:$src)>;
+def : Pat<(froundeven f64:$src), (NEAREST_F64 f64:$src)>;
+
let isCommutable = 1 in {
defm EQ : ComparisonFP<SETOEQ, "eq ", 0x5b, 0x61>;
defm NE : ComparisonFP<SETUNE, "ne ", 0x5c, 0x62>;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index ad2ec40b8b31..8cd41d7017a0 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -376,7 +376,7 @@ multiclass SIMDStoreLane<Vec vec, bits<32> simdop> {
[], name#"\t${off}(${addr})$p2align, $vec, $idx",
name#"\t$off$p2align, $idx", simdop>;
defm STORE_LANE_#vec#_A64 :
- SIMD_I<(outs V128:$dst),
+ SIMD_I<(outs),
(ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx,
I64:$addr, V128:$vec),
(outs), (ins P2Align:$p2align, offset64_op:$off, vec_i8imm_op:$idx),
@@ -529,6 +529,22 @@ defm SHUFFLE :
def wasm_shuffle_t : SDTypeProfile<1, 18, []>;
def wasm_shuffle : SDNode<"WebAssemblyISD::SHUFFLE", wasm_shuffle_t>;
foreach vec = AllVecs in {
+// The @llvm.wasm.shuffle intrinsic has immediate arguments that become TargetConstants.
+def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y),
+ (i32 timm:$m0), (i32 timm:$m1),
+ (i32 timm:$m2), (i32 timm:$m3),
+ (i32 timm:$m4), (i32 timm:$m5),
+ (i32 timm:$m6), (i32 timm:$m7),
+ (i32 timm:$m8), (i32 timm:$m9),
+ (i32 timm:$mA), (i32 timm:$mB),
+ (i32 timm:$mC), (i32 timm:$mD),
+ (i32 timm:$mE), (i32 timm:$mF))),
+ (SHUFFLE $x, $y,
+ imm:$m0, imm:$m1, imm:$m2, imm:$m3,
+ imm:$m4, imm:$m5, imm:$m6, imm:$m7,
+ imm:$m8, imm:$m9, imm:$mA, imm:$mB,
+ imm:$mC, imm:$mD, imm:$mE, imm:$mF)>;
+// Normal shufflevector instructions may have normal constant arguemnts.
def : Pat<(vec.vt (wasm_shuffle (vec.vt V128:$x), (vec.vt V128:$y),
(i32 LaneIdx32:$m0), (i32 LaneIdx32:$m1),
(i32 LaneIdx32:$m2), (i32 LaneIdx32:$m3),
@@ -971,6 +987,12 @@ def : Pat<(wasm_shr_s (v4i32 V128:$lhs), (and I32:$rhs, 31)),
def : Pat<(wasm_shr_u (v4i32 V128:$lhs), (and I32:$rhs, 31)),
(SHR_U_I32x4 V128:$lhs, I32:$rhs)>;
+def : Pat<(wasm_shl (v2i64 V128:$lhs), (and I32:$rhs, 63)),
+ (SHL_I64x2 V128:$lhs, I32:$rhs)>;
+def : Pat<(wasm_shr_s (v2i64 V128:$lhs), (and I32:$rhs, 63)),
+ (SHR_S_I64x2 V128:$lhs, I32:$rhs)>;
+def : Pat<(wasm_shr_u (v2i64 V128:$lhs), (and I32:$rhs, 63)),
+ (SHR_U_I64x2 V128:$lhs, I32:$rhs)>;
def : Pat<(wasm_shl (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))),
(SHL_I64x2 V128:$lhs, (I32_WRAP_I64 I64:$rhs))>;
def : Pat<(wasm_shr_s (v2i64 V128:$lhs), (trunc (and I64:$rhs, 63))),
@@ -1136,6 +1158,14 @@ defm FLOOR : SIMDUnary<F64x2, ffloor, "floor", 0x75>;
defm TRUNC: SIMDUnary<F64x2, ftrunc, "trunc", 0x7a>;
defm NEAREST: SIMDUnary<F64x2, fnearbyint, "nearest", 0x94>;
+// WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint.
+def : Pat<(v4f32 (frint (v4f32 V128:$src))), (NEAREST_F32x4 V128:$src)>;
+def : Pat<(v2f64 (frint (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>;
+
+// WebAssembly always rounds ties-to-even, so map froundeven to fnearbyint.
+def : Pat<(v4f32 (froundeven (v4f32 V128:$src))), (NEAREST_F32x4 V128:$src)>;
+def : Pat<(v2f64 (froundeven (v2f64 V128:$src))), (NEAREST_F64x2 V128:$src)>;
+
//===----------------------------------------------------------------------===//
// Floating-point binary arithmetic
//===----------------------------------------------------------------------===//
@@ -1166,13 +1196,21 @@ defm MIN : SIMDBinaryFP<fminimum, "min", 232>;
defm MAX : SIMDBinaryFP<fmaximum, "max", 233>;
// Pseudo-minimum: pmin
-def pmin : PatFrag<(ops node:$lhs, node:$rhs),
- (vselect (setolt $rhs, $lhs), $rhs, $lhs)>;
+def pmin : PatFrags<(ops node:$lhs, node:$rhs), [
+ (vselect (setolt $rhs, $lhs), $rhs, $lhs),
+ (vselect (setole $rhs, $lhs), $rhs, $lhs),
+ (vselect (setogt $lhs, $rhs), $rhs, $lhs),
+ (vselect (setoge $lhs, $rhs), $rhs, $lhs)
+]>;
defm PMIN : SIMDBinaryFP<pmin, "pmin", 234>;
// Pseudo-maximum: pmax
-def pmax : PatFrag<(ops node:$lhs, node:$rhs),
- (vselect (setolt $lhs, $rhs), $rhs, $lhs)>;
+def pmax : PatFrags<(ops node:$lhs, node:$rhs), [
+ (vselect (setogt $rhs, $lhs), $rhs, $lhs),
+ (vselect (setoge $rhs, $lhs), $rhs, $lhs),
+ (vselect (setolt $lhs, $rhs), $rhs, $lhs),
+ (vselect (setole $lhs, $rhs), $rhs, $lhs)
+]>;
defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>;
// Also match the pmin/pmax cases where the operands are int vectors (but the
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
index 5faa098b94ad..4b8fdcf3a5b3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -267,7 +267,7 @@
///
///===----------------------------------------------------------------------===//
-#include "Utils/WebAssemblyUtilities.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
#include "WebAssemblyTargetMachine.h"
#include "llvm/ADT/StringExtras.h"
@@ -580,7 +580,7 @@ Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallBase *CI) {
FunctionType *CalleeFTy = CI->getFunctionType();
std::string Sig = getSignature(CalleeFTy);
- if (InvokeWrappers.find(Sig) != InvokeWrappers.end())
+ if (InvokeWrappers.contains(Sig))
return InvokeWrappers[Sig];
// Put the pointer to the callee as first argument
@@ -1217,7 +1217,7 @@ bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) {
Constant *Clause = LPI->getClause(I);
// TODO Handle filters (= exception specifications).
- // https://bugs.llvm.org/show_bug.cgi?id=50396
+ // https://github.com/llvm/llvm-project/issues/49740
if (LPI->isCatch(I))
FMCArgs.push_back(Clause);
}
@@ -1726,10 +1726,8 @@ void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj(
// that requires multivalue support in the toolchain, which is currently not
// very reliable. We instead throw and catch a pointer to a struct value of
// type 'struct __WasmLongjmpArgs', which is defined in Emscripten.
- Instruction *CatchCI =
+ Instruction *LongjmpArgs =
IRB.CreateCall(CatchF, {IRB.getInt32(WebAssembly::C_LONGJMP)}, "thrown");
- Value *LongjmpArgs =
- IRB.CreateBitCast(CatchCI, LongjmpArgsTy->getPointerTo(), "longjmp.args");
Value *EnvField =
IRB.CreateConstGEP2_32(LongjmpArgsTy, LongjmpArgs, 0, 0, "env_gep");
Value *ValField =
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
index 6fd87f10150d..94b6e41e87d0 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp
@@ -62,8 +62,9 @@ bool WebAssemblyLowerRefTypesIntPtrConv::runOnFunction(Function &F) {
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
PtrToIntInst *PTI = dyn_cast<PtrToIntInst>(&*I);
IntToPtrInst *ITP = dyn_cast<IntToPtrInst>(&*I);
- if (!(PTI && WebAssembly::isRefType(PTI->getPointerOperand()->getType())) &&
- !(ITP && WebAssembly::isRefType(ITP->getDestTy())))
+ if (!(PTI && WebAssembly::isWebAssemblyReferenceType(
+ PTI->getPointerOperand()->getType())) &&
+ !(ITP && WebAssembly::isWebAssemblyReferenceType(ITP->getDestTy())))
continue;
UndefValue *U = UndefValue::get(I->getType());
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index 85ece58f98b3..5ceeebdeab5e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -140,8 +140,8 @@ MCOperand WebAssemblyMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
}
MCOperand WebAssemblyMCInstLower::lowerTypeIndexOperand(
- SmallVector<wasm::ValType, 1> &&Returns,
- SmallVector<wasm::ValType, 4> &&Params) const {
+ SmallVectorImpl<wasm::ValType> &&Returns,
+ SmallVectorImpl<wasm::ValType> &&Params) const {
auto Signature = std::make_unique<wasm::WasmSignature>(std::move(Returns),
std::move(Params));
MCSymbol *Sym = Printer.createTempSymbol("typeindex");
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
index d79c54097eb7..9f08499e5cde 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -34,8 +34,8 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
- MCOperand lowerTypeIndexOperand(SmallVector<wasm::ValType, 1> &&,
- SmallVector<wasm::ValType, 4> &&) const;
+ MCOperand lowerTypeIndexOperand(SmallVectorImpl<wasm::ValType> &&,
+ SmallVectorImpl<wasm::ValType> &&) const;
public:
WebAssemblyMCInstLower(MCContext &ctx, WebAssemblyAsmPrinter &printer)
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp
index 5d8c58dcc334..b58f7a0152ae 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyNullifyDebugValueLists.cpp
@@ -9,7 +9,7 @@
/// \file
/// Nullify DBG_VALUE_LISTs instructions as a temporary measure before we
/// implement DBG_VALUE_LIST handling in WebAssemblyDebugValueManager.
-/// See https://bugs.llvm.org/show_bug.cgi?id=50361.
+/// See https://github.com/llvm/llvm-project/issues/49705.
/// TODO Correctly handle DBG_VALUE_LISTs
///
//===----------------------------------------------------------------------===//
@@ -48,22 +48,17 @@ bool WebAssemblyNullifyDebugValueLists::runOnMachineFunction(
LLVM_DEBUG(dbgs() << "********** Nullify DBG_VALUE_LISTs **********\n"
"********** Function: "
<< MF.getName() << '\n');
- const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
- SmallVector<MachineInstr *, 2> DbgValueLists;
- for (auto &MBB : MF)
- for (auto &MI : MBB)
- if (MI.getOpcode() == TargetOpcode::DBG_VALUE_LIST)
- DbgValueLists.push_back(&MI);
-
+ bool Changed = false;
// Our backend, including WebAssemblyDebugValueManager, currently cannot
- // handle DBG_VALUE_LISTs correctly. So this converts DBG_VALUE_LISTs to
- // "DBG_VALUE $noreg", which will appear as "optimized out".
- for (auto *DVL : DbgValueLists) {
- BuildMI(*DVL->getParent(), DVL, DVL->getDebugLoc(),
- TII.get(TargetOpcode::DBG_VALUE), false, Register(),
- DVL->getOperand(0).getMetadata(), DVL->getOperand(1).getMetadata());
- DVL->eraseFromParent();
+ // handle DBG_VALUE_LISTs correctly. So this makes them undefined, which will
+ // appear as "optimized out".
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ if (MI.getOpcode() == TargetOpcode::DBG_VALUE_LIST) {
+ MI.setDebugValueUndef();
+ Changed = true;
+ }
+ }
}
-
- return !DbgValueLists.empty();
+ return Changed;
}
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index 5252db4858b9..4a6d37d7052e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -72,6 +72,152 @@ static float computeWeight(const MachineRegisterInfo *MRI,
return Weight;
}
+// Create a map of "Register -> vector of <SlotIndex, DBG_VALUE>".
+// The SlotIndex is the slot index of the next non-debug instruction or the end
+// of a BB, because DBG_VALUE's don't have slot index themselves.
+// Adapted from RegisterCoalescer::buildVRegToDbgValueMap.
+static DenseMap<Register, std::vector<std::pair<SlotIndex, MachineInstr *>>>
+buildVRegToDbgValueMap(MachineFunction &MF, const LiveIntervals *Liveness) {
+ DenseMap<Register, std::vector<std::pair<SlotIndex, MachineInstr *>>>
+ DbgVRegToValues;
+ const SlotIndexes *Slots = Liveness->getSlotIndexes();
+ SmallVector<MachineInstr *, 8> ToInsert;
+
+ // After collecting a block of DBG_VALUEs into ToInsert, enter them into the
+ // map.
+ auto CloseNewDVRange = [&DbgVRegToValues, &ToInsert](SlotIndex Slot) {
+ for (auto *X : ToInsert) {
+ for (const auto &Op : X->debug_operands()) {
+ if (Op.isReg() && Op.getReg().isVirtual())
+ DbgVRegToValues[Op.getReg()].push_back({Slot, X});
+ }
+ }
+
+ ToInsert.clear();
+ };
+
+ // Iterate over all instructions, collecting them into the ToInsert vector.
+ // Once a non-debug instruction is found, record the slot index of the
+ // collected DBG_VALUEs.
+ for (auto &MBB : MF) {
+ SlotIndex CurrentSlot = Slots->getMBBStartIdx(&MBB);
+
+ for (auto &MI : MBB) {
+ if (MI.isDebugValue()) {
+ if (any_of(MI.debug_operands(), [](const MachineOperand &MO) {
+ return MO.isReg() && MO.getReg().isVirtual();
+ }))
+ ToInsert.push_back(&MI);
+ } else if (!MI.isDebugOrPseudoInstr()) {
+ CurrentSlot = Slots->getInstructionIndex(MI);
+ CloseNewDVRange(CurrentSlot);
+ }
+ }
+
+ // Close range of DBG_VALUEs at the end of blocks.
+ CloseNewDVRange(Slots->getMBBEndIdx(&MBB));
+ }
+
+ // Sort all DBG_VALUEs we've seen by slot number.
+ for (auto &Pair : DbgVRegToValues)
+ llvm::sort(Pair.second);
+ return DbgVRegToValues;
+}
+
+// After register coalescing, some DBG_VALUEs will be invalid. Set them undef.
+// This function has to run before the actual coalescing, i.e., the register
+// changes.
+static void undefInvalidDbgValues(
+ const LiveIntervals *Liveness,
+ const ArrayRef<SmallVector<LiveInterval *, 4>> &Assignments,
+ DenseMap<Register, std::vector<std::pair<SlotIndex, MachineInstr *>>>
+ &DbgVRegToValues) {
+#ifndef NDEBUG
+ DenseSet<Register> SeenRegs;
+#endif
+ for (size_t I = 0, E = Assignments.size(); I < E; ++I) {
+ const auto &CoalescedIntervals = Assignments[I];
+ if (CoalescedIntervals.empty())
+ continue;
+ for (LiveInterval *LI : CoalescedIntervals) {
+ Register Reg = LI->reg();
+#ifndef NDEBUG
+ // Ensure we don't process the same register twice
+ assert(SeenRegs.insert(Reg).second);
+#endif
+ auto RegMapIt = DbgVRegToValues.find(Reg);
+ if (RegMapIt == DbgVRegToValues.end())
+ continue;
+ SlotIndex LastSlot;
+ bool LastUndefResult = false;
+ for (auto [Slot, DbgValue] : RegMapIt->second) {
+ // All consecutive DBG_VALUEs have the same slot because the slot
+ // indices they have is the one for the first non-debug instruction
+ // after it, because DBG_VALUEs don't have slot index themselves. Before
+ // doing live range queries, quickly check if the current DBG_VALUE has
+ // the same slot index as the previous one, in which case we should do
+ // the same. Note that RegMapIt->second, the vector of {SlotIndex,
+ // DBG_VALUE}, is sorted by SlotIndex, which is necessary for this
+ // check.
+ if (Slot == LastSlot) {
+ if (LastUndefResult) {
+ LLVM_DEBUG(dbgs() << "Undefed: " << *DbgValue << "\n");
+ DbgValue->setDebugValueUndef();
+ }
+ continue;
+ }
+ LastSlot = Slot;
+ LastUndefResult = false;
+ for (LiveInterval *OtherLI : CoalescedIntervals) {
+ if (LI == OtherLI)
+ continue;
+
+ // This DBG_VALUE has 'Reg' (the current LiveInterval's register) as
+ // its operand. If this DBG_VALUE's slot index is within other
+ // registers' live ranges, this DBG_VALUE should be undefed. For
+ // example, suppose %0 and %1 are to be coalesced into %0.
+ // ; %0's live range starts
+ // %0 = value_0
+ // DBG_VALUE %0, !"a", ... (a)
+ // DBG_VALUE %1, !"b", ... (b)
+ // use %0
+ // ; %0's live range ends
+ // ...
+ // ; %1's live range starts
+ // %1 = value_1
+ // DBG_VALUE %0, !"c", ... (c)
+ // DBG_VALUE %1, !"d", ... (d)
+ // use %1
+ // ; %1's live range ends
+ //
+ // In this code, (b) and (c) should be set to undef. After the two
+ // registers are coalesced, (b) will incorrectly say the variable
+ // "b"'s value is 'value_0', and (c) will also incorrectly say the
+ // variable "c"'s value is value_1. Note it doesn't actually matter
+ // which register they are coalesced into (%0 or %1); (b) and (c)
+ // should be set to undef as well if they are coalesced into %1.
+ //
+ // This happens DBG_VALUEs are not included when computing live
+ // ranges.
+ //
+ // Note that it is not possible for this DBG_VALUE to be
+ // simultaneously within 'Reg''s live range and one of other coalesced
+ // registers' live ranges because if their live ranges overlapped they
+ // would have not been selected as a coalescing candidate in the first
+ // place.
+ auto *SegmentIt = OtherLI->find(Slot);
+ if (SegmentIt != OtherLI->end() && SegmentIt->contains(Slot)) {
+ LLVM_DEBUG(dbgs() << "Undefed: " << *DbgValue << "\n");
+ DbgValue->setDebugValueUndef();
+ LastUndefResult = true;
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG({
dbgs() << "********** Register Coloring **********\n"
@@ -91,11 +237,17 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
&getAnalysis<MachineBlockFrequencyInfo>();
WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+ // We don't preserve SSA form.
+ MRI->leaveSSA();
+
// Gather all register intervals into a list and sort them.
unsigned NumVRegs = MRI->getNumVirtRegs();
SmallVector<LiveInterval *, 0> SortedIntervals;
SortedIntervals.reserve(NumVRegs);
+ // Record DBG_VALUEs and their SlotIndexes.
+ auto DbgVRegToValues = buildVRegToDbgValueMap(MF, Liveness);
+
LLVM_DEBUG(dbgs() << "Interesting register intervals:\n");
for (unsigned I = 0; I < NumVRegs; ++I) {
Register VReg = Register::index2VirtReg(I);
@@ -166,6 +318,9 @@ bool WebAssemblyRegColoring::runOnMachineFunction(MachineFunction &MF) {
if (!Changed)
return false;
+ // Set DBG_VALUEs that will be invalid after coalescing to undef.
+ undefInvalidDbgValues(Liveness, Assignments, DbgVRegToValues);
+
// Rewrite register operands.
for (size_t I = 0, E = SortedIntervals.size(); I < E; ++I) {
Register Old = SortedIntervals[I]->reg();
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 4b24f7fdb118..2e0df3c47841 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -278,12 +278,13 @@ static MachineInstr *getVRegDef(unsigned Reg, const MachineInstr *Insert,
}
// Test whether Reg, as defined at Def, has exactly one use. This is a
-// generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
-// to handle complex cases.
-static bool hasOneUse(unsigned Reg, MachineInstr *Def, MachineRegisterInfo &MRI,
- MachineDominatorTree &MDT, LiveIntervals &LIS) {
+// generalization of MachineRegisterInfo::hasOneNonDBGUse that uses
+// LiveIntervals to handle complex cases.
+static bool hasOneNonDBGUse(unsigned Reg, MachineInstr *Def,
+ MachineRegisterInfo &MRI, MachineDominatorTree &MDT,
+ LiveIntervals &LIS) {
// Most registers are in SSA form here so we try a quick MRI query first.
- if (MRI.hasOneUse(Reg))
+ if (MRI.hasOneNonDBGUse(Reg))
return true;
bool HasOne = false;
@@ -525,11 +526,10 @@ static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
LLVM_DEBUG(dbgs() << "Move for single use: "; Def->dump());
WebAssemblyDebugValueManager DefDIs(Def);
- MBB.splice(Insert, &MBB, Def);
- DefDIs.move(Insert);
+ DefDIs.sink(Insert);
LIS.handleMove(*Def);
- if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
+ if (MRI.hasOneDef(Reg) && MRI.hasOneNonDBGUse(Reg)) {
// No one else is using this register for anything so we can just stackify
// it in place.
MFI.stackifyVReg(MRI, Reg);
@@ -537,8 +537,8 @@ static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
// The register may have unrelated uses or defs; create a new register for
// just our one def and use so that we can stackify it.
Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
- Def->getOperand(0).setReg(NewReg);
Op.setReg(NewReg);
+ DefDIs.updateReg(NewReg);
// Tell LiveIntervals about the new register.
LIS.createAndComputeVirtRegInterval(NewReg);
@@ -551,8 +551,6 @@ static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
MFI.stackifyVReg(MRI, NewReg);
- DefDIs.updateReg(NewReg);
-
LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
}
@@ -560,6 +558,13 @@ static MachineInstr *moveForSingleUse(unsigned Reg, MachineOperand &Op,
return Def;
}
+static MachineInstr *getPrevNonDebugInst(MachineInstr *MI) {
+ for (auto *I = MI->getPrevNode(); I; I = I->getPrevNode())
+ if (!I->isDebugInstr())
+ return I;
+ return nullptr;
+}
+
/// A trivially cloneable instruction; clone it and nest the new copy with the
/// current instruction.
static MachineInstr *rematerializeCheapDef(
@@ -573,9 +578,10 @@ static MachineInstr *rematerializeCheapDef(
WebAssemblyDebugValueManager DefDIs(&Def);
Register NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
- TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
+ DefDIs.cloneSink(&*Insert, NewReg);
Op.setReg(NewReg);
- MachineInstr *Clone = &*std::prev(Insert);
+ MachineInstr *Clone = getPrevNonDebugInst(&*Insert);
+ assert(Clone);
LIS.InsertMachineInstrInMaps(*Clone);
LIS.createAndComputeVirtRegInterval(NewReg);
MFI.stackifyVReg(MRI, NewReg);
@@ -592,19 +598,13 @@ static MachineInstr *rematerializeCheapDef(
}
// If that was the last use of the original, delete the original.
- // Move or clone corresponding DBG_VALUEs to the 'Insert' location.
if (IsDead) {
LLVM_DEBUG(dbgs() << " - Deleting original\n");
SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
LIS.removePhysRegDefAt(MCRegister::from(WebAssembly::ARGUMENTS), Idx);
LIS.removeInterval(Reg);
LIS.RemoveMachineInstrFromMaps(Def);
- Def.eraseFromParent();
-
- DefDIs.move(&*Insert);
- DefDIs.updateReg(NewReg);
- } else {
- DefDIs.clone(&*Insert, NewReg);
+ DefDIs.removeDef();
}
return Clone;
@@ -636,28 +636,26 @@ static MachineInstr *moveAndTeeForMultiUse(
MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
- WebAssemblyDebugValueManager DefDIs(Def);
+ const auto *RegClass = MRI.getRegClass(Reg);
+ Register TeeReg = MRI.createVirtualRegister(RegClass);
+ Register DefReg = MRI.createVirtualRegister(RegClass);
// Move Def into place.
- MBB.splice(Insert, &MBB, Def);
+ WebAssemblyDebugValueManager DefDIs(Def);
+ DefDIs.sink(Insert);
LIS.handleMove(*Def);
// Create the Tee and attach the registers.
- const auto *RegClass = MRI.getRegClass(Reg);
- Register TeeReg = MRI.createVirtualRegister(RegClass);
- Register DefReg = MRI.createVirtualRegister(RegClass);
MachineOperand &DefMO = Def->getOperand(0);
MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
TII->get(getTeeOpcode(RegClass)), TeeReg)
.addReg(Reg, RegState::Define)
.addReg(DefReg, getUndefRegState(DefMO.isDead()));
Op.setReg(TeeReg);
- DefMO.setReg(DefReg);
+ DefDIs.updateReg(DefReg);
SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
- DefDIs.move(Insert);
-
// Tell LiveIntervals we moved the original vreg def from Def to Tee.
LiveInterval &LI = LIS.getInterval(Reg);
LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
@@ -674,8 +672,11 @@ static MachineInstr *moveAndTeeForMultiUse(
imposeStackOrdering(Def);
imposeStackOrdering(Tee);
- DefDIs.clone(Tee, DefReg);
- DefDIs.clone(Insert, TeeReg);
+ // Even though 'TeeReg, Reg = TEE ...', has two defs, we don't need to clone
+ // DBG_VALUEs for both of them, given that the latter will cancel the former
+ // anyway. Here we only clone DBG_VALUEs for TeeReg, which will be converted
+ // to a local index in ExplicitLocals pass.
+ DefDIs.cloneSink(Insert, TeeReg, /* CloneDef */ false);
LLVM_DEBUG(dbgs() << " - Replaced register: "; Def->dump());
LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
@@ -876,7 +877,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
bool SameBlock = DefI->getParent() == &MBB;
bool CanMove = SameBlock && isSafeToMove(Def, &Use, Insert, MFI, MRI) &&
!TreeWalker.isOnStack(Reg);
- if (CanMove && hasOneUse(Reg, DefI, MRI, MDT, LIS)) {
+ if (CanMove && hasOneNonDBGUse(Reg, DefI, MRI, MDT, LIS)) {
Insert = moveForSingleUse(Reg, Use, DefI, MBB, Insert, LIS, MFI, MRI);
// If we are removing the frame base reg completely, remove the debug
@@ -913,7 +914,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
Register DefReg = SubsequentDef->getReg();
Register UseReg = SubsequentUse->getReg();
// TODO: This single-use restriction could be relaxed by using tees
- if (DefReg != UseReg || !MRI.hasOneUse(DefReg))
+ if (DefReg != UseReg || !MRI.hasOneNonDBGUse(DefReg))
break;
MFI.stackifyVReg(MRI, DefReg);
++SubsequentDef;
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
index 4fe339ce5293..2995b8816d1f 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp
@@ -62,6 +62,8 @@ enum RuntimeLibcallSignature {
i32_func_i32_i32_iPTR,
i64_func_i64_i64,
i64_func_i64_i64_iPTR,
+ i64_i64_func_i32,
+ i64_i64_func_i64,
i64_i64_func_f32,
i64_i64_func_f64,
i16_i16_func_i16_i16,
@@ -71,20 +73,13 @@ enum RuntimeLibcallSignature {
i64_i64_func_i64_i64_i64_i64_iPTR,
i64_i64_i64_i64_func_i64_i64_i64_i64,
i64_i64_func_i64_i64_i32,
+ i64_i64_func_i64_i64_i64_i64_i64_i64,
iPTR_func_i32,
iPTR_func_iPTR_i32_iPTR,
iPTR_func_iPTR_iPTR_iPTR,
f32_func_f32_f32_f32,
f64_func_f64_f64_f64,
func_i64_i64_iPTR_iPTR,
- func_iPTR_f32,
- func_iPTR_f64,
- func_iPTR_i32,
- func_iPTR_i64,
- func_iPTR_i64_i64,
- func_iPTR_i64_i64_i32,
- func_iPTR_i64_i64_i64_i64,
- func_iPTR_i64_i64_i64_i64_i64_i64,
i32_func_i64_i64,
i32_func_i64_i64_i64_i64,
iPTR_func_f32,
@@ -156,73 +151,76 @@ struct RuntimeLibcallSignatureTable {
// All F80 and PPCF128 routines are unsupported.
Table[RTLIB::ADD_F32] = f32_func_f32_f32;
Table[RTLIB::ADD_F64] = f64_func_f64_f64;
- Table[RTLIB::ADD_F128] = func_iPTR_i64_i64_i64_i64;
+ Table[RTLIB::ADD_F128] = i64_i64_func_i64_i64_i64_i64;
Table[RTLIB::SUB_F32] = f32_func_f32_f32;
Table[RTLIB::SUB_F64] = f64_func_f64_f64;
- Table[RTLIB::SUB_F128] = func_iPTR_i64_i64_i64_i64;
+ Table[RTLIB::SUB_F128] = i64_i64_func_i64_i64_i64_i64;
Table[RTLIB::MUL_F32] = f32_func_f32_f32;
Table[RTLIB::MUL_F64] = f64_func_f64_f64;
- Table[RTLIB::MUL_F128] = func_iPTR_i64_i64_i64_i64;
+ Table[RTLIB::MUL_F128] = i64_i64_func_i64_i64_i64_i64;
Table[RTLIB::DIV_F32] = f32_func_f32_f32;
Table[RTLIB::DIV_F64] = f64_func_f64_f64;
- Table[RTLIB::DIV_F128] = func_iPTR_i64_i64_i64_i64;
+ Table[RTLIB::DIV_F128] = i64_i64_func_i64_i64_i64_i64;
Table[RTLIB::REM_F32] = f32_func_f32_f32;
Table[RTLIB::REM_F64] = f64_func_f64_f64;
- Table[RTLIB::REM_F128] = func_iPTR_i64_i64_i64_i64;
+ Table[RTLIB::REM_F128] = i64_i64_func_i64_i64_i64_i64;
Table[RTLIB::FMA_F32] = f32_func_f32_f32_f32;
Table[RTLIB::FMA_F64] = f64_func_f64_f64_f64;
- Table[RTLIB::FMA_F128] = func_iPTR_i64_i64_i64_i64_i64_i64;
+ Table[RTLIB::FMA_F128] = i64_i64_func_i64_i64_i64_i64_i64_i64;
Table[RTLIB::POWI_F32] = f32_func_f32_i32;
Table[RTLIB::POWI_F64] = f64_func_f64_i32;
- Table[RTLIB::POWI_F128] = func_iPTR_i64_i64_i32;
+ Table[RTLIB::POWI_F128] = i64_i64_func_i64_i64_i32;
Table[RTLIB::SQRT_F32] = f32_func_f32;
Table[RTLIB::SQRT_F64] = f64_func_f64;
- Table[RTLIB::SQRT_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::SQRT_F128] = i64_i64_func_i64_i64;
Table[RTLIB::CBRT_F32] = f32_func_f32;
Table[RTLIB::CBRT_F64] = f64_func_f64;
- Table[RTLIB::CBRT_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::CBRT_F128] = i64_i64_func_i64_i64;
Table[RTLIB::LOG_F32] = f32_func_f32;
Table[RTLIB::LOG_F64] = f64_func_f64;
- Table[RTLIB::LOG_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::LOG_F128] = i64_i64_func_i64_i64;
Table[RTLIB::LOG2_F32] = f32_func_f32;
Table[RTLIB::LOG2_F64] = f64_func_f64;
- Table[RTLIB::LOG2_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::LOG2_F128] = i64_i64_func_i64_i64;
Table[RTLIB::LOG10_F32] = f32_func_f32;
Table[RTLIB::LOG10_F64] = f64_func_f64;
- Table[RTLIB::LOG10_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::LOG10_F128] = i64_i64_func_i64_i64;
Table[RTLIB::EXP_F32] = f32_func_f32;
Table[RTLIB::EXP_F64] = f64_func_f64;
- Table[RTLIB::EXP_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::EXP_F128] = i64_i64_func_i64_i64;
Table[RTLIB::EXP2_F32] = f32_func_f32;
Table[RTLIB::EXP2_F64] = f64_func_f64;
- Table[RTLIB::EXP2_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::EXP2_F128] = i64_i64_func_i64_i64;
Table[RTLIB::SIN_F32] = f32_func_f32;
Table[RTLIB::SIN_F64] = f64_func_f64;
- Table[RTLIB::SIN_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::SIN_F128] = i64_i64_func_i64_i64;
Table[RTLIB::COS_F32] = f32_func_f32;
Table[RTLIB::COS_F64] = f64_func_f64;
- Table[RTLIB::COS_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::COS_F128] = i64_i64_func_i64_i64;
Table[RTLIB::SINCOS_F32] = func_f32_iPTR_iPTR;
Table[RTLIB::SINCOS_F64] = func_f64_iPTR_iPTR;
Table[RTLIB::SINCOS_F128] = func_i64_i64_iPTR_iPTR;
Table[RTLIB::POW_F32] = f32_func_f32_f32;
Table[RTLIB::POW_F64] = f64_func_f64_f64;
- Table[RTLIB::POW_F128] = func_iPTR_i64_i64_i64_i64;
+ Table[RTLIB::POW_F128] = i64_i64_func_i64_i64_i64_i64;
Table[RTLIB::CEIL_F32] = f32_func_f32;
Table[RTLIB::CEIL_F64] = f64_func_f64;
- Table[RTLIB::CEIL_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::CEIL_F128] = i64_i64_func_i64_i64;
Table[RTLIB::TRUNC_F32] = f32_func_f32;
Table[RTLIB::TRUNC_F64] = f64_func_f64;
- Table[RTLIB::TRUNC_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::TRUNC_F128] = i64_i64_func_i64_i64;
Table[RTLIB::RINT_F32] = f32_func_f32;
Table[RTLIB::RINT_F64] = f64_func_f64;
- Table[RTLIB::RINT_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::RINT_F128] = i64_i64_func_i64_i64;
Table[RTLIB::NEARBYINT_F32] = f32_func_f32;
Table[RTLIB::NEARBYINT_F64] = f64_func_f64;
- Table[RTLIB::NEARBYINT_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::NEARBYINT_F128] = i64_i64_func_i64_i64;
Table[RTLIB::ROUND_F32] = f32_func_f32;
Table[RTLIB::ROUND_F64] = f64_func_f64;
- Table[RTLIB::ROUND_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::ROUND_F128] = i64_i64_func_i64_i64;
+ Table[RTLIB::ROUNDEVEN_F32] = f32_func_f32;
+ Table[RTLIB::ROUNDEVEN_F64] = f64_func_f64;
+ Table[RTLIB::ROUNDEVEN_F128] = i64_i64_func_i64_i64;
Table[RTLIB::LROUND_F32] = iPTR_func_f32;
Table[RTLIB::LROUND_F64] = iPTR_func_f64;
Table[RTLIB::LROUND_F128] = iPTR_func_i64_i64;
@@ -237,21 +235,27 @@ struct RuntimeLibcallSignatureTable {
Table[RTLIB::LLRINT_F128] = i64_func_i64_i64;
Table[RTLIB::FLOOR_F32] = f32_func_f32;
Table[RTLIB::FLOOR_F64] = f64_func_f64;
- Table[RTLIB::FLOOR_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::FLOOR_F128] = i64_i64_func_i64_i64;
Table[RTLIB::COPYSIGN_F32] = f32_func_f32_f32;
Table[RTLIB::COPYSIGN_F64] = f64_func_f64_f64;
- Table[RTLIB::COPYSIGN_F128] = func_iPTR_i64_i64_i64_i64;
+ Table[RTLIB::COPYSIGN_F128] = i64_i64_func_i64_i64_i64_i64;
Table[RTLIB::FMIN_F32] = f32_func_f32_f32;
Table[RTLIB::FMIN_F64] = f64_func_f64_f64;
- Table[RTLIB::FMIN_F128] = func_iPTR_i64_i64_i64_i64;
+ Table[RTLIB::FMIN_F128] = i64_i64_func_i64_i64_i64_i64;
Table[RTLIB::FMAX_F32] = f32_func_f32_f32;
Table[RTLIB::FMAX_F64] = f64_func_f64_f64;
- Table[RTLIB::FMAX_F128] = func_iPTR_i64_i64_i64_i64;
+ Table[RTLIB::FMAX_F128] = i64_i64_func_i64_i64_i64_i64;
+ Table[RTLIB::LDEXP_F32] = f32_func_f32_i32;
+ Table[RTLIB::LDEXP_F64] = f64_func_f64_i32;
+ Table[RTLIB::LDEXP_F128] = i64_i64_func_i64_i64_i32;
+ Table[RTLIB::FREXP_F32] = f32_func_f32_i32;
+ Table[RTLIB::FREXP_F64] = f64_func_f64_i32;
+ Table[RTLIB::FREXP_F128] = i64_i64_func_i64_i64_i32;
// Conversion
// All F80 and PPCF128 routines are unsupported.
- Table[RTLIB::FPEXT_F64_F128] = func_iPTR_f64;
- Table[RTLIB::FPEXT_F32_F128] = func_iPTR_f32;
+ Table[RTLIB::FPEXT_F64_F128] = i64_i64_func_f64;
+ Table[RTLIB::FPEXT_F32_F128] = i64_i64_func_f32;
Table[RTLIB::FPEXT_F32_F64] = f64_func_f32;
Table[RTLIB::FPEXT_F16_F32] = f32_func_i16;
Table[RTLIB::FPROUND_F32_F16] = i16_func_f32;
@@ -280,22 +284,22 @@ struct RuntimeLibcallSignatureTable {
Table[RTLIB::FPTOUINT_F128_I128] = i64_i64_func_i64_i64;
Table[RTLIB::SINTTOFP_I32_F32] = f32_func_i32;
Table[RTLIB::SINTTOFP_I32_F64] = f64_func_i32;
- Table[RTLIB::SINTTOFP_I32_F128] = func_iPTR_i32;
+ Table[RTLIB::SINTTOFP_I32_F128] = i64_i64_func_i32;
Table[RTLIB::SINTTOFP_I64_F32] = f32_func_i64;
Table[RTLIB::SINTTOFP_I64_F64] = f64_func_i64;
- Table[RTLIB::SINTTOFP_I64_F128] = func_iPTR_i64;
+ Table[RTLIB::SINTTOFP_I64_F128] = i64_i64_func_i64;
Table[RTLIB::SINTTOFP_I128_F32] = f32_func_i64_i64;
Table[RTLIB::SINTTOFP_I128_F64] = f64_func_i64_i64;
- Table[RTLIB::SINTTOFP_I128_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::SINTTOFP_I128_F128] = i64_i64_func_i64_i64;
Table[RTLIB::UINTTOFP_I32_F32] = f32_func_i32;
Table[RTLIB::UINTTOFP_I32_F64] = f64_func_i64;
- Table[RTLIB::UINTTOFP_I32_F128] = func_iPTR_i32;
+ Table[RTLIB::UINTTOFP_I32_F128] = i64_i64_func_i32;
Table[RTLIB::UINTTOFP_I64_F32] = f32_func_i64;
Table[RTLIB::UINTTOFP_I64_F64] = f64_func_i64;
- Table[RTLIB::UINTTOFP_I64_F128] = func_iPTR_i64;
+ Table[RTLIB::UINTTOFP_I64_F128] = i64_i64_func_i64;
Table[RTLIB::UINTTOFP_I128_F32] = f32_func_i64_i64;
Table[RTLIB::UINTTOFP_I128_F64] = f64_func_i64_i64;
- Table[RTLIB::UINTTOFP_I128_F128] = func_iPTR_i64_i64;
+ Table[RTLIB::UINTTOFP_I128_F128] = i64_i64_func_i64_i64;
// Comparison
// ALl F80 and PPCF128 routines are unsupported.
@@ -501,7 +505,7 @@ struct StaticLibcallNameMap {
if (NameLibcall.first != nullptr &&
getRuntimeLibcallSignatures().Table[NameLibcall.second] !=
unsupported) {
- assert(Map.find(NameLibcall.first) == Map.end() &&
+ assert(!Map.contains(NameLibcall.first) &&
"duplicate libcall names in name map");
Map[NameLibcall.first] = NameLibcall.second;
}
@@ -687,72 +691,72 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
Params.push_back(PtrTy);
break;
case i64_i64_func_f32:
-#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
- Rets.push_back(wasm::ValType::I64);
- Rets.push_back(wasm::ValType::I64);
-#else
- Params.push_back(PtrTy);
-#endif
+ if (Subtarget.hasMultivalue()) {
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ } else {
+ Params.push_back(PtrTy);
+ }
Params.push_back(wasm::ValType::F32);
break;
case i64_i64_func_f64:
-#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
- Rets.push_back(wasm::ValType::I64);
- Rets.push_back(wasm::ValType::I64);
-#else
- Params.push_back(PtrTy);
-#endif
+ if (Subtarget.hasMultivalue()) {
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ } else {
+ Params.push_back(PtrTy);
+ }
Params.push_back(wasm::ValType::F64);
break;
case i16_i16_func_i16_i16:
-#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
- Rets.push_back(wasm::ValType::I32);
- Rets.push_back(wasm::ValType::I32);
-#else
- Params.push_back(PtrTy);
-#endif
+ if (Subtarget.hasMultivalue()) {
+ Rets.push_back(wasm::ValType::I32);
+ Rets.push_back(wasm::ValType::I32);
+ } else {
+ Params.push_back(PtrTy);
+ }
Params.push_back(wasm::ValType::I32);
Params.push_back(wasm::ValType::I32);
break;
case i32_i32_func_i32_i32:
-#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
- Rets.push_back(wasm::ValType::I32);
- Rets.push_back(wasm::ValType::I32);
-#else
- Params.push_back(PtrTy);
-#endif
+ if (Subtarget.hasMultivalue()) {
+ Rets.push_back(wasm::ValType::I32);
+ Rets.push_back(wasm::ValType::I32);
+ } else {
+ Params.push_back(PtrTy);
+ }
Params.push_back(wasm::ValType::I32);
Params.push_back(wasm::ValType::I32);
break;
case i64_i64_func_i64_i64:
-#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
- Rets.push_back(wasm::ValType::I64);
- Rets.push_back(wasm::ValType::I64);
-#else
- Params.push_back(PtrTy);
-#endif
+ if (Subtarget.hasMultivalue()) {
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ } else {
+ Params.push_back(PtrTy);
+ }
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
break;
case i64_i64_func_i64_i64_i64_i64:
-#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
- Rets.push_back(wasm::ValType::I64);
- Rets.push_back(wasm::ValType::I64);
-#else
- Params.push_back(PtrTy);
-#endif
+ if (Subtarget.hasMultivalue()) {
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ } else {
+ Params.push_back(PtrTy);
+ }
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
break;
case i64_i64_func_i64_i64_i64_i64_iPTR:
-#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
- Rets.push_back(wasm::ValType::I64);
- Rets.push_back(wasm::ValType::I64);
-#else
- Params.push_back(PtrTy);
-#endif
+ if (Subtarget.hasMultivalue()) {
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ } else {
+ Params.push_back(PtrTy);
+ }
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
@@ -760,28 +764,26 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
Params.push_back(PtrTy);
break;
case i64_i64_i64_i64_func_i64_i64_i64_i64:
-#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
- Rets.push_back(wasm::ValType::I64);
- Rets.push_back(wasm::ValType::I64);
- Rets.push_back(wasm::ValType::I64);
- Rets.push_back(wasm::ValType::I64);
-#else
- Params.push_back(PtrTy);
-#endif
+ if (Subtarget.hasMultivalue()) {
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ } else {
+ Params.push_back(PtrTy);
+ }
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
break;
case i64_i64_func_i64_i64_i32:
-#if 0 // TODO: Enable this when wasm gets multiple-return-value support.
- Rets.push_back(wasm::ValType::I64);
- Rets.push_back(wasm::ValType::I64);
- Rets.push_back(wasm::ValType::I64);
- Rets.push_back(wasm::ValType::I64);
-#else
- Params.push_back(PtrTy);
-#endif
+ if (Subtarget.hasMultivalue()) {
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ } else {
+ Params.push_back(PtrTy);
+ }
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I32);
@@ -820,49 +822,6 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
Params.push_back(PtrTy);
Params.push_back(PtrTy);
break;
- case func_iPTR_f32:
- Params.push_back(PtrTy);
- Params.push_back(wasm::ValType::F32);
- break;
- case func_iPTR_f64:
- Params.push_back(PtrTy);
- Params.push_back(wasm::ValType::F64);
- break;
- case func_iPTR_i32:
- Params.push_back(PtrTy);
- Params.push_back(wasm::ValType::I32);
- break;
- case func_iPTR_i64:
- Params.push_back(PtrTy);
- Params.push_back(wasm::ValType::I64);
- break;
- case func_iPTR_i64_i64:
- Params.push_back(PtrTy);
- Params.push_back(wasm::ValType::I64);
- Params.push_back(wasm::ValType::I64);
- break;
- case func_iPTR_i64_i64_i32:
- Params.push_back(PtrTy);
- Params.push_back(wasm::ValType::I64);
- Params.push_back(wasm::ValType::I64);
- Params.push_back(wasm::ValType::I32);
- break;
- case func_iPTR_i64_i64_i64_i64:
- Params.push_back(PtrTy);
- Params.push_back(wasm::ValType::I64);
- Params.push_back(wasm::ValType::I64);
- Params.push_back(wasm::ValType::I64);
- Params.push_back(wasm::ValType::I64);
- break;
- case func_iPTR_i64_i64_i64_i64_i64_i64:
- Params.push_back(PtrTy);
- Params.push_back(wasm::ValType::I64);
- Params.push_back(wasm::ValType::I64);
- Params.push_back(wasm::ValType::I64);
- Params.push_back(wasm::ValType::I64);
- Params.push_back(wasm::ValType::I64);
- Params.push_back(wasm::ValType::I64);
- break;
case i32_func_i64_i64:
Rets.push_back(wasm::ValType::I32);
Params.push_back(wasm::ValType::I64);
@@ -888,12 +847,44 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
Params.push_back(wasm::ValType::I64);
Params.push_back(wasm::ValType::I64);
break;
+ case i64_i64_func_i64_i64_i64_i64_i64_i64:
+ if (Subtarget.hasMultivalue()) {
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ } else {
+ Params.push_back(PtrTy);
+ }
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ Params.push_back(wasm::ValType::I64);
+ break;
+ case i64_i64_func_i32:
+ if (Subtarget.hasMultivalue()) {
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ } else {
+ Params.push_back(PtrTy);
+ }
+ Params.push_back(wasm::ValType::I32);
+ break;
+ case i64_i64_func_i64:
+ if (Subtarget.hasMultivalue()) {
+ Rets.push_back(wasm::ValType::I64);
+ Rets.push_back(wasm::ValType::I64);
+ } else {
+ Params.push_back(PtrTy);
+ }
+ Params.push_back(wasm::ValType::I64);
+ break;
case unsupported:
llvm_unreachable("unsupported runtime library signature");
}
}
-// TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unsed
+// TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unused
// other than here, just roll its logic into this version.
void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
StringRef Name,
@@ -904,9 +895,9 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget,
auto Val = Map.find(Name);
#ifndef NDEBUG
if (Val == Map.end()) {
- auto message = std::string("unexpected runtime library name: ") +
- std::string(Name);
- llvm_unreachable(message.c_str());
+ auto Message =
+ std::string("unexpected runtime library name: ") + std::string(Name);
+ llvm_unreachable(Message.c_str());
}
#endif
return getLibcallSignature(Subtarget, Val->second, Rets, Params);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 630c786a3dc7..6ef219f216a3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -16,6 +16,7 @@
#include "TargetInfo/WebAssemblyTargetInfo.h"
#include "Utils/WebAssemblyUtilities.h"
#include "WebAssembly.h"
+#include "WebAssemblyISelLowering.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblyTargetObjectFile.h"
#include "WebAssemblyTargetTransformInfo.h"
@@ -98,13 +99,6 @@ static Reloc::Model getEffectiveRelocModel(std::optional<Reloc::Model> RM,
return Reloc::Static;
}
- if (!TT.isOSEmscripten()) {
- // Relocation modes other than static are currently implemented in a way
- // that only works for Emscripten, so disable them if we aren't targeting
- // Emscripten.
- return Reloc::Static;
- }
-
return *RM;
}
@@ -464,6 +458,15 @@ void WebAssemblyPassConfig::addIRPasses() {
}
void WebAssemblyPassConfig::addISelPrepare() {
+ WebAssemblyTargetMachine *WasmTM =
+ static_cast<WebAssemblyTargetMachine *>(TM);
+ const WebAssemblySubtarget *Subtarget =
+ WasmTM->getSubtargetImpl(std::string(WasmTM->getTargetCPU()),
+ std::string(WasmTM->getTargetFeatureString()));
+ if (Subtarget->hasReferenceTypes()) {
+ // We need to remove allocas for reference types
+ addPass(createPromoteMemoryToRegisterPass(true));
+ }
// Lower atomics and TLS if necessary
addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine()));
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index f0e18cc5ef03..11cfe3cba751 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86EncodingOptimization.h"
#include "MCTargetDesc/X86IntelInstPrinter.h"
#include "MCTargetDesc/X86MCExpr.h"
#include "MCTargetDesc/X86MCTargetDesc.h"
@@ -432,6 +433,7 @@ private:
InlineAsmIdentifierInfo Info;
short BracCount = 0;
bool MemExpr = false;
+ bool BracketUsed = false;
bool OffsetOperator = false;
bool AttachToOperandIdx = false;
bool IsPIC = false;
@@ -454,6 +456,7 @@ private:
void addImm(int64_t imm) { Imm += imm; }
short getBracCount() const { return BracCount; }
bool isMemExpr() const { return MemExpr; }
+ bool isBracketUsed() const { return BracketUsed; }
bool isOffsetOperator() const { return OffsetOperator; }
SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
unsigned getBaseReg() const { return BaseReg; }
@@ -954,6 +957,7 @@ private:
break;
}
MemExpr = true;
+ BracketUsed = true;
BracCount++;
return false;
}
@@ -1210,15 +1214,15 @@ private:
bool is64BitMode() const {
// FIXME: Can tablegen auto-generate this?
- return getSTI().getFeatureBits()[X86::Is64Bit];
+ return getSTI().hasFeature(X86::Is64Bit);
}
bool is32BitMode() const {
// FIXME: Can tablegen auto-generate this?
- return getSTI().getFeatureBits()[X86::Is32Bit];
+ return getSTI().hasFeature(X86::Is32Bit);
}
bool is16BitMode() const {
// FIXME: Can tablegen auto-generate this?
- return getSTI().getFeatureBits()[X86::Is16Bit];
+ return getSTI().hasFeature(X86::Is16Bit);
}
void SwitchMode(unsigned mode) {
MCSubtargetInfo &STI = copySTI();
@@ -1282,12 +1286,9 @@ public:
};
} // end anonymous namespace
-/// @name Auto-generated Match Functions
-/// {
-
-static unsigned MatchRegisterName(StringRef Name);
-
-/// }
+#define GET_REGISTER_MATCHER
+#define GET_SUBTARGET_FEATURE_NAME
+#include "X86GenAsmMatcher.inc"
static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
unsigned Scale, bool Is64BitMode,
@@ -1748,12 +1749,6 @@ bool X86AsmParser::CreateMemForMSInlineAsm(
// If we found a decl other than a VarDecl, then assume it is a FuncDecl or
// some other label reference.
if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
- // Insert an explicit size if the user didn't have one.
- if (!Size) {
- Size = getPointerWidth();
- InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
- /*Len=*/0, Size);
- }
// Create an absolute memory reference in order to match against
// instructions taking a PC relative operand.
Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
@@ -1781,10 +1776,6 @@ bool X86AsmParser::CreateMemForMSInlineAsm(
BaseReg && IndexReg));
return false;
}
- // Otherwise, we set the base register to a non-zero value
- // if we don't know the actual value at this time. This is necessary to
- // get the matching correct in some cases.
- BaseReg = BaseReg ? BaseReg : 1;
Operands.push_back(X86Operand::CreateMem(
getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
Size,
@@ -2320,7 +2311,8 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
// .Imm gets lexed as a real.
if (Tok.is(AsmToken::Real)) {
APInt DotDisp;
- DotDispStr.getAsInteger(10, DotDisp);
+ if (DotDispStr.getAsInteger(10, DotDisp))
+ return Error(Tok.getLoc(), "Unexpected offset");
Info.Offset = DotDisp.getZExtValue();
} else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
Tok.is(AsmToken::Identifier)) {
@@ -2636,9 +2628,9 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
unsigned DefaultBaseReg = X86::NoRegister;
bool MaybeDirectBranchDest = true;
+ bool IsUnconditionalBranch =
+ Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
if (Parser.isParsingMasm()) {
- bool IsUnconditionalBranch =
- Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
if (is64BitMode() && SM.getElementSize() > 0) {
DefaultBaseReg = X86::RIP;
}
@@ -2660,6 +2652,13 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
}
}
}
+ } else if (IsUnconditionalBranch) {
+ // Treat `call [offset fn_ref]` (or `jmp`) syntax as an error.
+ if (!PtrInOperand && SM.isOffsetOperator())
+ return Error(
+ Start, "`OFFSET` operator cannot be used in an unconditional branch");
+ if (PtrInOperand || SM.isBracketUsed())
+ MaybeDirectBranchDest = false;
}
if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister))
@@ -3500,8 +3499,8 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
Operands[0] = X86Operand::CreateToken(Name, NameLoc);
}
// Select the correct equivalent 16-/32-bit source register.
- unsigned Reg =
- getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
+ MCRegister Reg =
+ getX86SubSuperRegister(Op1.getReg(), is16BitMode() ? 16 : 32);
Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
}
}
@@ -3632,7 +3631,12 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
}
bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
- const MCRegisterInfo *MRI = getContext().getRegisterInfo();
+ if (ForcedVEXEncoding != VEXEncoding_VEX3 &&
+ X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode())))
+ return true;
+
+ if (X86::optimizeShiftRotateWithImmediateOne(Inst))
+ return true;
switch (Inst.getOpcode()) {
default: return false;
@@ -3656,178 +3660,13 @@ bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
}
return false;
- case X86::VMOVZPQILo2PQIrr:
- case X86::VMOVAPDrr:
- case X86::VMOVAPDYrr:
- case X86::VMOVAPSrr:
- case X86::VMOVAPSYrr:
- case X86::VMOVDQArr:
- case X86::VMOVDQAYrr:
- case X86::VMOVDQUrr:
- case X86::VMOVDQUYrr:
- case X86::VMOVUPDrr:
- case X86::VMOVUPDYrr:
- case X86::VMOVUPSrr:
- case X86::VMOVUPSYrr: {
- // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
- // the registers is extended, but other isn't.
- if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
- MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
- MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8)
- return false;
-
- unsigned NewOpc;
- switch (Inst.getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
- case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
- case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
- case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
- case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
- case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
- case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
- case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
- case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
- case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
- case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
- case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
- case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
- }
- Inst.setOpcode(NewOpc);
- return true;
- }
- case X86::VMOVSDrr:
- case X86::VMOVSSrr: {
- // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
- // the registers is extended, but other isn't.
- if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
- MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
- MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8)
- return false;
-
- unsigned NewOpc;
- switch (Inst.getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
- case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
- }
- Inst.setOpcode(NewOpc);
- return true;
- }
- case X86::RCR8ri: case X86::RCR16ri: case X86::RCR32ri: case X86::RCR64ri:
- case X86::RCL8ri: case X86::RCL16ri: case X86::RCL32ri: case X86::RCL64ri:
- case X86::ROR8ri: case X86::ROR16ri: case X86::ROR32ri: case X86::ROR64ri:
- case X86::ROL8ri: case X86::ROL16ri: case X86::ROL32ri: case X86::ROL64ri:
- case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri: case X86::SAR64ri:
- case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri: case X86::SHR64ri:
- case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri: case X86::SHL64ri: {
- // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
- // FIXME: It would be great if we could just do this with an InstAlias.
- if (!Inst.getOperand(2).isImm() || Inst.getOperand(2).getImm() != 1)
- return false;
-
- unsigned NewOpc;
- switch (Inst.getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::RCR8ri: NewOpc = X86::RCR8r1; break;
- case X86::RCR16ri: NewOpc = X86::RCR16r1; break;
- case X86::RCR32ri: NewOpc = X86::RCR32r1; break;
- case X86::RCR64ri: NewOpc = X86::RCR64r1; break;
- case X86::RCL8ri: NewOpc = X86::RCL8r1; break;
- case X86::RCL16ri: NewOpc = X86::RCL16r1; break;
- case X86::RCL32ri: NewOpc = X86::RCL32r1; break;
- case X86::RCL64ri: NewOpc = X86::RCL64r1; break;
- case X86::ROR8ri: NewOpc = X86::ROR8r1; break;
- case X86::ROR16ri: NewOpc = X86::ROR16r1; break;
- case X86::ROR32ri: NewOpc = X86::ROR32r1; break;
- case X86::ROR64ri: NewOpc = X86::ROR64r1; break;
- case X86::ROL8ri: NewOpc = X86::ROL8r1; break;
- case X86::ROL16ri: NewOpc = X86::ROL16r1; break;
- case X86::ROL32ri: NewOpc = X86::ROL32r1; break;
- case X86::ROL64ri: NewOpc = X86::ROL64r1; break;
- case X86::SAR8ri: NewOpc = X86::SAR8r1; break;
- case X86::SAR16ri: NewOpc = X86::SAR16r1; break;
- case X86::SAR32ri: NewOpc = X86::SAR32r1; break;
- case X86::SAR64ri: NewOpc = X86::SAR64r1; break;
- case X86::SHR8ri: NewOpc = X86::SHR8r1; break;
- case X86::SHR16ri: NewOpc = X86::SHR16r1; break;
- case X86::SHR32ri: NewOpc = X86::SHR32r1; break;
- case X86::SHR64ri: NewOpc = X86::SHR64r1; break;
- case X86::SHL8ri: NewOpc = X86::SHL8r1; break;
- case X86::SHL16ri: NewOpc = X86::SHL16r1; break;
- case X86::SHL32ri: NewOpc = X86::SHL32r1; break;
- case X86::SHL64ri: NewOpc = X86::SHL64r1; break;
- }
-
- MCInst TmpInst;
- TmpInst.setOpcode(NewOpc);
- TmpInst.addOperand(Inst.getOperand(0));
- TmpInst.addOperand(Inst.getOperand(1));
- Inst = TmpInst;
- return true;
- }
- case X86::RCR8mi: case X86::RCR16mi: case X86::RCR32mi: case X86::RCR64mi:
- case X86::RCL8mi: case X86::RCL16mi: case X86::RCL32mi: case X86::RCL64mi:
- case X86::ROR8mi: case X86::ROR16mi: case X86::ROR32mi: case X86::ROR64mi:
- case X86::ROL8mi: case X86::ROL16mi: case X86::ROL32mi: case X86::ROL64mi:
- case X86::SAR8mi: case X86::SAR16mi: case X86::SAR32mi: case X86::SAR64mi:
- case X86::SHR8mi: case X86::SHR16mi: case X86::SHR32mi: case X86::SHR64mi:
- case X86::SHL8mi: case X86::SHL16mi: case X86::SHL32mi: case X86::SHL64mi: {
- // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
- // FIXME: It would be great if we could just do this with an InstAlias.
- if (!Inst.getOperand(X86::AddrNumOperands).isImm() ||
- Inst.getOperand(X86::AddrNumOperands).getImm() != 1)
- return false;
-
- unsigned NewOpc;
- switch (Inst.getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::RCR8mi: NewOpc = X86::RCR8m1; break;
- case X86::RCR16mi: NewOpc = X86::RCR16m1; break;
- case X86::RCR32mi: NewOpc = X86::RCR32m1; break;
- case X86::RCR64mi: NewOpc = X86::RCR64m1; break;
- case X86::RCL8mi: NewOpc = X86::RCL8m1; break;
- case X86::RCL16mi: NewOpc = X86::RCL16m1; break;
- case X86::RCL32mi: NewOpc = X86::RCL32m1; break;
- case X86::RCL64mi: NewOpc = X86::RCL64m1; break;
- case X86::ROR8mi: NewOpc = X86::ROR8m1; break;
- case X86::ROR16mi: NewOpc = X86::ROR16m1; break;
- case X86::ROR32mi: NewOpc = X86::ROR32m1; break;
- case X86::ROR64mi: NewOpc = X86::ROR64m1; break;
- case X86::ROL8mi: NewOpc = X86::ROL8m1; break;
- case X86::ROL16mi: NewOpc = X86::ROL16m1; break;
- case X86::ROL32mi: NewOpc = X86::ROL32m1; break;
- case X86::ROL64mi: NewOpc = X86::ROL64m1; break;
- case X86::SAR8mi: NewOpc = X86::SAR8m1; break;
- case X86::SAR16mi: NewOpc = X86::SAR16m1; break;
- case X86::SAR32mi: NewOpc = X86::SAR32m1; break;
- case X86::SAR64mi: NewOpc = X86::SAR64m1; break;
- case X86::SHR8mi: NewOpc = X86::SHR8m1; break;
- case X86::SHR16mi: NewOpc = X86::SHR16m1; break;
- case X86::SHR32mi: NewOpc = X86::SHR32m1; break;
- case X86::SHR64mi: NewOpc = X86::SHR64m1; break;
- case X86::SHL8mi: NewOpc = X86::SHL8m1; break;
- case X86::SHL16mi: NewOpc = X86::SHL16m1; break;
- case X86::SHL32mi: NewOpc = X86::SHL32m1; break;
- case X86::SHL64mi: NewOpc = X86::SHL64m1; break;
- }
-
- MCInst TmpInst;
- TmpInst.setOpcode(NewOpc);
- for (int i = 0; i != X86::AddrNumOperands; ++i)
- TmpInst.addOperand(Inst.getOperand(i));
- Inst = TmpInst;
- return true;
- }
case X86::INT: {
- // Transforms "int $3" into "int3" as a size optimization. We can't write an
- // instalias with an immediate operand yet.
+ // Transforms "int $3" into "int3" as a size optimization.
+ // We can't write this as an InstAlias.
if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
return false;
-
- MCInst TmpInst;
- TmpInst.setOpcode(X86::INT3);
- Inst = TmpInst;
+ Inst.clear();
+ Inst.setOpcode(X86::INT3);
return true;
}
}
@@ -3923,11 +3762,18 @@ bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
}
}
+ if ((Opcode == X86::PREFETCHIT0 || Opcode == X86::PREFETCHIT1)) {
+ const MCOperand &MO = Inst.getOperand(X86::AddrBaseReg);
+ if (!MO.isReg() || MO.getReg() != X86::RIP)
+ return Warning(
+ Ops[0]->getStartLoc(),
+ Twine((Inst.getOpcode() == X86::PREFETCHIT0 ? "'prefetchit0'"
+ : "'prefetchit1'")) +
+ " only supports RIP-relative address");
+ }
return false;
}
-static const char *getSubtargetFeatureName(uint64_t Val);
-
void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
Warning(Loc, "Instruction may be vulnerable to LVI and "
"requires manual mitigation");
@@ -4036,13 +3882,13 @@ void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
MCStreamer &Out) {
if (LVIInlineAsmHardening &&
- getSTI().getFeatureBits()[X86::FeatureLVIControlFlowIntegrity])
+ getSTI().hasFeature(X86::FeatureLVIControlFlowIntegrity))
applyLVICFIMitigation(Inst, Out);
Out.emitInstruction(Inst, getSTI());
if (LVIInlineAsmHardening &&
- getSTI().getFeatureBits()[X86::FeatureLVILoadHardening])
+ getSTI().hasFeature(X86::FeatureLVILoadHardening))
applyLVILoadHardeningMitigation(Inst, Out);
}
@@ -4982,7 +4828,5 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser() {
RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
}
-#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
-#define GET_SUBTARGET_FEATURE_NAME
#include "X86GenAsmMatcher.inc"
diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 075b800f9e20..4661e73c3ef8 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -380,6 +380,40 @@ struct X86Operand final : public MCParsedAsmOperand {
bool isMem512_RC512() const {
return isMem512() && isMemIndexReg(X86::ZMM0, X86::ZMM31);
}
+ bool isMem512_GR16() const {
+ if (!isMem512())
+ return false;
+ if (getMemBaseReg() &&
+ !X86MCRegisterClasses[X86::GR16RegClassID].contains(getMemBaseReg()))
+ return false;
+ return true;
+ }
+ bool isMem512_GR32() const {
+ if (!isMem512())
+ return false;
+ if (getMemBaseReg() &&
+ !X86MCRegisterClasses[X86::GR32RegClassID].contains(getMemBaseReg()) &&
+ getMemBaseReg() != X86::EIP)
+ return false;
+ if (getMemIndexReg() &&
+ !X86MCRegisterClasses[X86::GR32RegClassID].contains(getMemIndexReg()) &&
+ getMemIndexReg() != X86::EIZ)
+ return false;
+ return true;
+ }
+ bool isMem512_GR64() const {
+ if (!isMem512())
+ return false;
+ if (getMemBaseReg() &&
+ !X86MCRegisterClasses[X86::GR64RegClassID].contains(getMemBaseReg()) &&
+ getMemBaseReg() != X86::RIP)
+ return false;
+ if (getMemIndexReg() &&
+ !X86MCRegisterClasses[X86::GR64RegClassID].contains(getMemIndexReg()) &&
+ getMemIndexReg() != X86::RIZ)
+ return false;
+ return true;
+ }
bool isAbsMem() const {
return Kind == Memory && !getMemSegReg() && !getMemBaseReg() &&
diff --git a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
index ed2d540d965f..3cb5edeee5b5 100644
--- a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
+++ b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
@@ -19,7 +19,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/MCA/CustomBehaviour.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/TargetParser/TargetParser.h"
namespace llvm {
namespace mca {
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
index 5a1c4ec81e1b..4b36135da352 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.cpp
@@ -55,7 +55,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address,
// InstrInfo.td as soon as Requires clause is supported properly
// for InstAlias.
if (MI->getOpcode() == X86::CALLpcrel32 &&
- (STI.getFeatureBits()[X86::Is64Bit])) {
+ (STI.hasFeature(X86::Is64Bit))) {
OS << "\tcallq\t";
printPCRelImm(MI, Address, 0, OS);
}
@@ -65,7 +65,7 @@ void X86ATTInstPrinter::printInst(const MCInst *MI, uint64_t Address,
// 0x66 to be interpreted as "data16" by the asm printer.
// Thus we add an adjustment here in order to print the "right" instruction.
else if (MI->getOpcode() == X86::DATA16_PREFIX &&
- STI.getFeatureBits()[X86::Is16Bit]) {
+ STI.hasFeature(X86::Is16Bit)) {
OS << "\tdata32";
}
// Try to print any aliases first.
@@ -178,9 +178,9 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
// Broadcast form.
// Load size is word for TA map. Otherwise it is based on W-bit.
if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA) {
- assert(!(Desc.TSFlags & X86II::VEX_W) && "Unknown W-bit value!");
+ assert(!(Desc.TSFlags & X86II::REX_W) && "Unknown W-bit value!");
printwordmem(MI, CurOp--, OS);
- } else if (Desc.TSFlags & X86II::VEX_W) {
+ } else if (Desc.TSFlags & X86II::REX_W) {
printqwordmem(MI, CurOp--, OS);
} else {
printdwordmem(MI, CurOp--, OS);
@@ -189,13 +189,13 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
// Print the number of elements broadcasted.
unsigned NumElts;
if (Desc.TSFlags & X86II::EVEX_L2)
- NumElts = (Desc.TSFlags & X86II::VEX_W) ? 8 : 16;
+ NumElts = (Desc.TSFlags & X86II::REX_W) ? 8 : 16;
else if (Desc.TSFlags & X86II::VEX_L)
- NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
+ NumElts = (Desc.TSFlags & X86II::REX_W) ? 4 : 8;
else
- NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+ NumElts = (Desc.TSFlags & X86II::REX_W) ? 2 : 4;
if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA) {
- assert(!(Desc.TSFlags & X86II::VEX_W) && "Unknown W-bit value!");
+ assert(!(Desc.TSFlags & X86II::REX_W) && "Unknown W-bit value!");
NumElts *= 2;
}
OS << "{1to" << NumElts << "}";
@@ -333,7 +333,7 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
if (Desc.TSFlags & X86II::EVEX_B) {
// Broadcast form.
// Load size is based on W-bit as only D and Q are supported.
- if (Desc.TSFlags & X86II::VEX_W)
+ if (Desc.TSFlags & X86II::REX_W)
printqwordmem(MI, CurOp--, OS);
else
printdwordmem(MI, CurOp--, OS);
@@ -341,11 +341,11 @@ bool X86ATTInstPrinter::printVecCompareInstr(const MCInst *MI,
// Print the number of elements broadcasted.
unsigned NumElts;
if (Desc.TSFlags & X86II::EVEX_L2)
- NumElts = (Desc.TSFlags & X86II::VEX_W) ? 8 : 16;
+ NumElts = (Desc.TSFlags & X86II::REX_W) ? 8 : 16;
else if (Desc.TSFlags & X86II::VEX_L)
- NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
+ NumElts = (Desc.TSFlags & X86II::REX_W) ? 4 : 8;
else
- NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+ NumElts = (Desc.TSFlags & X86II::REX_W) ? 2 : 4;
OS << "{1to" << NumElts << "}";
} else {
if (Desc.TSFlags & X86II::EVEX_L2)
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index 67f93ae7bfbe..289642ac37bb 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -8,7 +8,7 @@
#include "MCTargetDesc/X86BaseInfo.h"
#include "MCTargetDesc/X86FixupKinds.h"
-#include "MCTargetDesc/X86InstrRelaxTables.h"
+#include "MCTargetDesc/X86EncodingOptimization.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/BinaryFormat/MachO.h"
@@ -127,7 +127,7 @@ class X86AsmBackend : public MCAsmBackend {
MCInst PrevInst;
MCBoundaryAlignFragment *PendingBA = nullptr;
std::pair<MCFragment *, size_t> PrevInstPosition;
- bool CanPadInst;
+ bool CanPadInst = false;
uint8_t determinePaddingPrefix(const MCInst &Inst) const;
bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;
@@ -144,7 +144,7 @@ public:
// jumps, and (unfused) conditional jumps with nops. Both the
// instructions aligned and the alignment method (nop vs prefix) may
// change in the future.
- AlignBoundary = assumeAligned(32);;
+ AlignBoundary = assumeAligned(32);
AlignBranchType.addKind(X86::AlignBranchFused);
AlignBranchType.addKind(X86::AlignBranchJcc);
AlignBranchType.addKind(X86::AlignBranchJmp);
@@ -209,11 +209,15 @@ public:
};
} // end anonymous namespace
-static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
- unsigned Op = Inst.getOpcode();
- switch (Op) {
+static bool isRelaxableBranch(unsigned Opcode) {
+ return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;
+}
+
+static unsigned getRelaxedOpcodeBranch(unsigned Opcode,
+ bool Is16BitMode = false) {
+ switch (Opcode) {
default:
- return Op;
+ llvm_unreachable("invalid opcode for branch");
case X86::JCC_1:
return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;
case X86::JMP_1:
@@ -221,16 +225,10 @@ static unsigned getRelaxedOpcodeBranch(const MCInst &Inst, bool Is16BitMode) {
}
}
-static unsigned getRelaxedOpcodeArith(const MCInst &Inst) {
- unsigned Op = Inst.getOpcode();
- return X86::getRelaxedOpcodeArith(Op);
-}
-
-static unsigned getRelaxedOpcode(const MCInst &Inst, bool Is16BitMode) {
- unsigned R = getRelaxedOpcodeArith(Inst);
- if (R != Inst.getOpcode())
- return R;
- return getRelaxedOpcodeBranch(Inst, Is16BitMode);
+static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {
+ unsigned Opcode = MI.getOpcode();
+ return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)
+ : X86::getOpcodeForLongImmediateForm(Opcode);
}
static X86::CondCode getCondFromBranch(const MCInst &MI,
@@ -721,24 +719,12 @@ void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,
Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
}
-bool X86AsmBackend::mayNeedRelaxation(const MCInst &Inst,
+bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,
const MCSubtargetInfo &STI) const {
- // Branches can always be relaxed in either mode.
- if (getRelaxedOpcodeBranch(Inst, false) != Inst.getOpcode())
- return true;
-
- // Check if this instruction is ever relaxable.
- if (getRelaxedOpcodeArith(Inst) == Inst.getOpcode())
- return false;
-
-
- // Check if the relaxable operand has an expression. For the current set of
- // relaxable instructions, the relaxable operand is always the last operand.
- unsigned RelaxableOp = Inst.getNumOperands() - 1;
- if (Inst.getOperand(RelaxableOp).isExpr())
- return true;
-
- return false;
+ unsigned Opcode = MI.getOpcode();
+ return isRelaxableBranch(Opcode) ||
+ (X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&
+ MI.getOperand(MI.getNumOperands() - 1).isExpr());
}
bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
@@ -754,7 +740,7 @@ bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,
void X86AsmBackend::relaxInstruction(MCInst &Inst,
const MCSubtargetInfo &STI) const {
// The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
- bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
+ bool Is16BitMode = STI.hasFeature(X86::Is16Bit);
unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);
if (RelaxedOp == Inst.getOpcode()) {
@@ -768,15 +754,6 @@ void X86AsmBackend::relaxInstruction(MCInst &Inst,
Inst.setOpcode(RelaxedOp);
}
-/// Return true if this instruction has been fully relaxed into it's most
-/// general available form.
-static bool isFullyRelaxed(const MCRelaxableFragment &RF) {
- auto &Inst = RF.getInst();
- auto &STI = *RF.getSubtargetInfo();
- bool Is16BitMode = STI.getFeatureBits()[X86::Is16Bit];
- return getRelaxedOpcode(Inst, Is16BitMode) == Inst.getOpcode();
-}
-
bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
MCCodeEmitter &Emitter,
unsigned &RemainingSize) const {
@@ -786,7 +763,7 @@ bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
// larger value for one of the fixups then can be encoded. The outer loop
// will also catch this before moving to the next instruction, but we need to
// prevent padding this single instruction as well.
- if (!isFullyRelaxed(RF))
+ if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
return false;
const unsigned OldSize = RF.getContents().size();
@@ -796,8 +773,7 @@ bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);
const unsigned RemainingPrefixSize = [&]() -> unsigned {
SmallString<15> Code;
- raw_svector_ostream VecOS(Code);
- Emitter.emitPrefix(RF.getInst(), VecOS, STI);
+ Emitter.emitPrefix(RF.getInst(), Code, STI);
assert(Code.size() < 15 && "The number of prefixes must be less than 15.");
// TODO: It turns out we need a decent amount of plumbing for the target
@@ -834,7 +810,7 @@ bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,
bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
MCCodeEmitter &Emitter,
unsigned &RemainingSize) const {
- if (isFullyRelaxed(RF))
+ if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
// TODO: There are lots of other tricks we could apply for increasing
// encoding size without impacting performance.
return false;
@@ -844,8 +820,7 @@ bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,
SmallVector<MCFixup, 4> Fixups;
SmallString<15> Code;
- raw_svector_ostream VecOS(Code);
- Emitter.encodeInstruction(Relaxed, VecOS, Fixups, *RF.getSubtargetInfo());
+ Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());
const unsigned OldSize = RF.getContents().size();
const unsigned NewSize = Code.size();
assert(NewSize >= OldSize && "size decrease during relaxation?");
@@ -951,7 +926,7 @@ void X86AsmBackend::finishLayout(MCAssembler const &Asm,
// We don't need to worry about larger positive offsets as none of the
// possible offsets between this and our align are visible, and the
// ones afterwards aren't changing.
- if (!isFullyRelaxed(RF))
+ if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))
break;
}
Relaxable.clear();
@@ -1001,11 +976,11 @@ unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {
return 4;
if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))
return 1;
- if (STI.getFeatureBits()[X86::TuningFast7ByteNOP])
+ if (STI.hasFeature(X86::TuningFast7ByteNOP))
return 7;
- if (STI.getFeatureBits()[X86::TuningFast15ByteNOP])
+ if (STI.hasFeature(X86::TuningFast15ByteNOP))
return 15;
- if (STI.getFeatureBits()[X86::TuningFast11ByteNOP])
+ if (STI.hasFeature(X86::TuningFast11ByteNOP))
return 11;
// FIXME: handle 32-bit mode
// 15-bytes is the longest single NOP instruction, but 10-bytes is
@@ -1054,7 +1029,7 @@ bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,
};
const char(*Nops)[11] =
- STI->getFeatureBits()[X86::Is16Bit] ? Nops16Bit : Nops32Bit;
+ STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;
uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);
@@ -1352,9 +1327,13 @@ public:
/// Implementation of algorithm to generate the compact unwind encoding
/// for the CFI instructions.
- uint32_t
- generateCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs) const override {
+ uint32_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,
+ const MCContext *Ctxt) const override {
+ ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;
if (Instrs.empty()) return 0;
+ if (!isDarwinCanonicalPersonality(FI->Personality) &&
+ !Ctxt->emitCompactUnwindNonCanonical())
+ return CU::UNWIND_MODE_DWARF;
// Reset the saved registers.
unsigned SavedRegIdx = 0;
@@ -1541,6 +1520,12 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())
return new WindowsX86AsmBackend(T, true, STI);
+ if (TheTriple.isUEFI()) {
+ assert(TheTriple.isOSBinFormatCOFF() &&
+ "Only COFF format is supported in UEFI environment.");
+ return new WindowsX86AsmBackend(T, true, STI);
+ }
+
uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
if (TheTriple.isX32())
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 4dd1df5fb55f..e2293fe30561 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -924,15 +924,10 @@ namespace X86II {
// Opcode
OpcodeShift = EncodingShift + 2,
- /// VEX_W - Has a opcode specific functionality, but is used in the same
- /// way as REX_W is for regular SSE instructions.
- VEX_WShift = OpcodeShift + 8,
- VEX_W = 1ULL << VEX_WShift,
-
/// VEX_4V - Used to specify an additional AVX/SSE register. Several 2
/// address instructions in SSE are represented as 3 address ones in AVX
/// and the additional register is encoded in VEX_VVVV prefix.
- VEX_4VShift = VEX_WShift + 1,
+ VEX_4VShift = OpcodeShift + 8,
VEX_4V = 1ULL << VEX_4VShift,
/// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
@@ -960,10 +955,10 @@ namespace X86II {
// The scaling factor for the AVX512's 8-bit compressed displacement.
CD8_Scale_Shift = EVEX_BShift + 1,
- CD8_Scale_Mask = 127ULL << CD8_Scale_Shift,
+ CD8_Scale_Mask = 7ULL << CD8_Scale_Shift,
/// Explicitly specified rounding control
- EVEX_RCShift = CD8_Scale_Shift + 7,
+ EVEX_RCShift = CD8_Scale_Shift + 3,
EVEX_RC = 1ULL << EVEX_RCShift,
// NOTRACK prefix
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
index 8ab86f46ffe6..d083bf245af2 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp
@@ -229,23 +229,6 @@ static unsigned getRelocType64(MCContext &Ctx, SMLoc Loc,
enum X86_32RelType { RT32_NONE, RT32_32, RT32_16, RT32_8 };
-static X86_32RelType getType32(X86_64RelType T) {
- switch (T) {
- case RT64_NONE:
- return RT32_NONE;
- case RT64_64:
- llvm_unreachable("Unimplemented");
- case RT64_32:
- case RT64_32S:
- return RT32_32;
- case RT64_16:
- return RT32_16;
- case RT64_8:
- return RT32_8;
- }
- llvm_unreachable("unexpected relocation type!");
-}
-
static unsigned getRelocType32(MCContext &Ctx,
MCSymbolRefExpr::VariantKind Modifier,
X86_32RelType Type, bool IsPCRel,
@@ -339,7 +322,26 @@ unsigned X86ELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
assert((getEMachine() == ELF::EM_386 || getEMachine() == ELF::EM_IAMCU) &&
"Unsupported ELF machine type.");
- return getRelocType32(Ctx, Modifier, getType32(Type), IsPCRel, Kind);
+
+ X86_32RelType RelType = RT32_NONE;
+ switch (Type) {
+ case RT64_NONE:
+ break;
+ case RT64_64:
+ Ctx.reportError(Fixup.getLoc(), "unsupported relocation type");
+ break;
+ case RT64_32:
+ case RT64_32S:
+ RelType = RT32_32;
+ break;
+ case RT64_16:
+ RelType = RT32_16;
+ break;
+ case RT64_8:
+ RelType = RT32_8;
+ break;
+ }
+ return getRelocType32(Ctx, Modifier, RelType, IsPCRel, Kind);
}
std::unique_ptr<MCObjectTargetWriter>
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp
new file mode 100644
index 000000000000..03eeef96b502
--- /dev/null
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp
@@ -0,0 +1,479 @@
+//===-- X86EncodingOptimization.cpp - X86 Encoding optimization -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the X86 encoding optimization
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86EncodingOptimization.h"
+#include "X86BaseInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Casting.h"
+
+using namespace llvm;
+
+bool X86::optimizeInstFromVEX3ToVEX2(MCInst &MI, const MCInstrDesc &Desc) {
+ unsigned OpIdx1, OpIdx2;
+ unsigned Opcode = MI.getOpcode();
+ unsigned NewOpc = 0;
+#define FROM_TO(FROM, TO, IDX1, IDX2) \
+ case X86::FROM: \
+ NewOpc = X86::TO; \
+ OpIdx1 = IDX1; \
+ OpIdx2 = IDX2; \
+ break;
+#define TO_REV(FROM) FROM_TO(FROM, FROM##_REV, 0, 1)
+ switch (Opcode) {
+ default: {
+ // If the instruction is a commutable arithmetic instruction we might be
+ // able to commute the operands to get a 2 byte VEX prefix.
+ uint64_t TSFlags = Desc.TSFlags;
+ if (!Desc.isCommutable() || (TSFlags & X86II::EncodingMask) != X86II::VEX ||
+ (TSFlags & X86II::OpMapMask) != X86II::TB ||
+ (TSFlags & X86II::FormMask) != X86II::MRMSrcReg ||
+ (TSFlags & X86II::REX_W) || !(TSFlags & X86II::VEX_4V) ||
+ MI.getNumOperands() != 3)
+ return false;
+ // These two are not truly commutable.
+ if (Opcode == X86::VMOVHLPSrr || Opcode == X86::VUNPCKHPDrr)
+ return false;
+ OpIdx1 = 1;
+ OpIdx2 = 2;
+ break;
+ }
+ case X86::VCMPPDrri:
+ case X86::VCMPPDYrri:
+ case X86::VCMPPSrri:
+ case X86::VCMPPSYrri:
+ case X86::VCMPSDrr:
+ case X86::VCMPSSrr: {
+ switch (MI.getOperand(3).getImm() & 0x7) {
+ default:
+ return false;
+ case 0x00: // EQUAL
+ case 0x03: // UNORDERED
+ case 0x04: // NOT EQUAL
+ case 0x07: // ORDERED
+ OpIdx1 = 1;
+ OpIdx2 = 2;
+ break;
+ }
+ break;
+ }
+ // Commute operands to get a smaller encoding by using VEX.R instead of
+ // VEX.B if one of the registers is extended, but other isn't.
+ FROM_TO(VMOVZPQILo2PQIrr, VMOVPQI2QIrr, 0, 1)
+ TO_REV(VMOVAPDrr)
+ TO_REV(VMOVAPDYrr)
+ TO_REV(VMOVAPSrr)
+ TO_REV(VMOVAPSYrr)
+ TO_REV(VMOVDQArr)
+ TO_REV(VMOVDQAYrr)
+ TO_REV(VMOVDQUrr)
+ TO_REV(VMOVDQUYrr)
+ TO_REV(VMOVUPDrr)
+ TO_REV(VMOVUPDYrr)
+ TO_REV(VMOVUPSrr)
+ TO_REV(VMOVUPSYrr)
+#undef TO_REV
+#define TO_REV(FROM) FROM_TO(FROM, FROM##_REV, 0, 2)
+ TO_REV(VMOVSDrr)
+ TO_REV(VMOVSSrr)
+#undef TO_REV
+#undef FROM_TO
+ }
+ if (X86II::isX86_64ExtendedReg(MI.getOperand(OpIdx1).getReg()) ||
+ !X86II::isX86_64ExtendedReg(MI.getOperand(OpIdx2).getReg()))
+ return false;
+ if (NewOpc)
+ MI.setOpcode(NewOpc);
+ else
+ std::swap(MI.getOperand(OpIdx1), MI.getOperand(OpIdx2));
+ return true;
+}
+
+// NOTE: We may write this as an InstAlias if it's only used by AsmParser. See
+// validateTargetOperandClass.
+bool X86::optimizeShiftRotateWithImmediateOne(MCInst &MI) {
+ unsigned NewOpc;
+#define TO_IMM1(FROM) \
+ case X86::FROM##i: \
+ NewOpc = X86::FROM##1; \
+ break;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ TO_IMM1(RCR8r)
+ TO_IMM1(RCR16r)
+ TO_IMM1(RCR32r)
+ TO_IMM1(RCR64r)
+ TO_IMM1(RCL8r)
+ TO_IMM1(RCL16r)
+ TO_IMM1(RCL32r)
+ TO_IMM1(RCL64r)
+ TO_IMM1(ROR8r)
+ TO_IMM1(ROR16r)
+ TO_IMM1(ROR32r)
+ TO_IMM1(ROR64r)
+ TO_IMM1(ROL8r)
+ TO_IMM1(ROL16r)
+ TO_IMM1(ROL32r)
+ TO_IMM1(ROL64r)
+ TO_IMM1(SAR8r)
+ TO_IMM1(SAR16r)
+ TO_IMM1(SAR32r)
+ TO_IMM1(SAR64r)
+ TO_IMM1(SHR8r)
+ TO_IMM1(SHR16r)
+ TO_IMM1(SHR32r)
+ TO_IMM1(SHR64r)
+ TO_IMM1(SHL8r)
+ TO_IMM1(SHL16r)
+ TO_IMM1(SHL32r)
+ TO_IMM1(SHL64r)
+ TO_IMM1(RCR8m)
+ TO_IMM1(RCR16m)
+ TO_IMM1(RCR32m)
+ TO_IMM1(RCR64m)
+ TO_IMM1(RCL8m)
+ TO_IMM1(RCL16m)
+ TO_IMM1(RCL32m)
+ TO_IMM1(RCL64m)
+ TO_IMM1(ROR8m)
+ TO_IMM1(ROR16m)
+ TO_IMM1(ROR32m)
+ TO_IMM1(ROR64m)
+ TO_IMM1(ROL8m)
+ TO_IMM1(ROL16m)
+ TO_IMM1(ROL32m)
+ TO_IMM1(ROL64m)
+ TO_IMM1(SAR8m)
+ TO_IMM1(SAR16m)
+ TO_IMM1(SAR32m)
+ TO_IMM1(SAR64m)
+ TO_IMM1(SHR8m)
+ TO_IMM1(SHR16m)
+ TO_IMM1(SHR32m)
+ TO_IMM1(SHR64m)
+ TO_IMM1(SHL8m)
+ TO_IMM1(SHL16m)
+ TO_IMM1(SHL32m)
+ TO_IMM1(SHL64m)
+#undef TO_IMM1
+ }
+ MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1);
+ if (!LastOp.isImm() || LastOp.getImm() != 1)
+ return false;
+ MI.setOpcode(NewOpc);
+ MI.erase(&LastOp);
+ return true;
+}
+
+bool X86::optimizeVPCMPWithImmediateOneOrSix(MCInst &MI) {
+ unsigned Opc1;
+ unsigned Opc2;
+#define FROM_TO(FROM, TO1, TO2) \
+ case X86::FROM: \
+ Opc1 = X86::TO1; \
+ Opc2 = X86::TO2; \
+ break;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ FROM_TO(VPCMPBZ128rmi, VPCMPEQBZ128rm, VPCMPGTBZ128rm)
+ FROM_TO(VPCMPBZ128rmik, VPCMPEQBZ128rmk, VPCMPGTBZ128rmk)
+ FROM_TO(VPCMPBZ128rri, VPCMPEQBZ128rr, VPCMPGTBZ128rr)
+ FROM_TO(VPCMPBZ128rrik, VPCMPEQBZ128rrk, VPCMPGTBZ128rrk)
+ FROM_TO(VPCMPBZ256rmi, VPCMPEQBZ256rm, VPCMPGTBZ256rm)
+ FROM_TO(VPCMPBZ256rmik, VPCMPEQBZ256rmk, VPCMPGTBZ256rmk)
+ FROM_TO(VPCMPBZ256rri, VPCMPEQBZ256rr, VPCMPGTBZ256rr)
+ FROM_TO(VPCMPBZ256rrik, VPCMPEQBZ256rrk, VPCMPGTBZ256rrk)
+ FROM_TO(VPCMPBZrmi, VPCMPEQBZrm, VPCMPGTBZrm)
+ FROM_TO(VPCMPBZrmik, VPCMPEQBZrmk, VPCMPGTBZrmk)
+ FROM_TO(VPCMPBZrri, VPCMPEQBZrr, VPCMPGTBZrr)
+ FROM_TO(VPCMPBZrrik, VPCMPEQBZrrk, VPCMPGTBZrrk)
+ FROM_TO(VPCMPDZ128rmi, VPCMPEQDZ128rm, VPCMPGTDZ128rm)
+ FROM_TO(VPCMPDZ128rmib, VPCMPEQDZ128rmb, VPCMPGTDZ128rmb)
+ FROM_TO(VPCMPDZ128rmibk, VPCMPEQDZ128rmbk, VPCMPGTDZ128rmbk)
+ FROM_TO(VPCMPDZ128rmik, VPCMPEQDZ128rmk, VPCMPGTDZ128rmk)
+ FROM_TO(VPCMPDZ128rri, VPCMPEQDZ128rr, VPCMPGTDZ128rr)
+ FROM_TO(VPCMPDZ128rrik, VPCMPEQDZ128rrk, VPCMPGTDZ128rrk)
+ FROM_TO(VPCMPDZ256rmi, VPCMPEQDZ256rm, VPCMPGTDZ256rm)
+ FROM_TO(VPCMPDZ256rmib, VPCMPEQDZ256rmb, VPCMPGTDZ256rmb)
+ FROM_TO(VPCMPDZ256rmibk, VPCMPEQDZ256rmbk, VPCMPGTDZ256rmbk)
+ FROM_TO(VPCMPDZ256rmik, VPCMPEQDZ256rmk, VPCMPGTDZ256rmk)
+ FROM_TO(VPCMPDZ256rri, VPCMPEQDZ256rr, VPCMPGTDZ256rr)
+ FROM_TO(VPCMPDZ256rrik, VPCMPEQDZ256rrk, VPCMPGTDZ256rrk)
+ FROM_TO(VPCMPDZrmi, VPCMPEQDZrm, VPCMPGTDZrm)
+ FROM_TO(VPCMPDZrmib, VPCMPEQDZrmb, VPCMPGTDZrmb)
+ FROM_TO(VPCMPDZrmibk, VPCMPEQDZrmbk, VPCMPGTDZrmbk)
+ FROM_TO(VPCMPDZrmik, VPCMPEQDZrmk, VPCMPGTDZrmk)
+ FROM_TO(VPCMPDZrri, VPCMPEQDZrr, VPCMPGTDZrr)
+ FROM_TO(VPCMPDZrrik, VPCMPEQDZrrk, VPCMPGTDZrrk)
+ FROM_TO(VPCMPQZ128rmi, VPCMPEQQZ128rm, VPCMPGTQZ128rm)
+ FROM_TO(VPCMPQZ128rmib, VPCMPEQQZ128rmb, VPCMPGTQZ128rmb)
+ FROM_TO(VPCMPQZ128rmibk, VPCMPEQQZ128rmbk, VPCMPGTQZ128rmbk)
+ FROM_TO(VPCMPQZ128rmik, VPCMPEQQZ128rmk, VPCMPGTQZ128rmk)
+ FROM_TO(VPCMPQZ128rri, VPCMPEQQZ128rr, VPCMPGTQZ128rr)
+ FROM_TO(VPCMPQZ128rrik, VPCMPEQQZ128rrk, VPCMPGTQZ128rrk)
+ FROM_TO(VPCMPQZ256rmi, VPCMPEQQZ256rm, VPCMPGTQZ256rm)
+ FROM_TO(VPCMPQZ256rmib, VPCMPEQQZ256rmb, VPCMPGTQZ256rmb)
+ FROM_TO(VPCMPQZ256rmibk, VPCMPEQQZ256rmbk, VPCMPGTQZ256rmbk)
+ FROM_TO(VPCMPQZ256rmik, VPCMPEQQZ256rmk, VPCMPGTQZ256rmk)
+ FROM_TO(VPCMPQZ256rri, VPCMPEQQZ256rr, VPCMPGTQZ256rr)
+ FROM_TO(VPCMPQZ256rrik, VPCMPEQQZ256rrk, VPCMPGTQZ256rrk)
+ FROM_TO(VPCMPQZrmi, VPCMPEQQZrm, VPCMPGTQZrm)
+ FROM_TO(VPCMPQZrmib, VPCMPEQQZrmb, VPCMPGTQZrmb)
+ FROM_TO(VPCMPQZrmibk, VPCMPEQQZrmbk, VPCMPGTQZrmbk)
+ FROM_TO(VPCMPQZrmik, VPCMPEQQZrmk, VPCMPGTQZrmk)
+ FROM_TO(VPCMPQZrri, VPCMPEQQZrr, VPCMPGTQZrr)
+ FROM_TO(VPCMPQZrrik, VPCMPEQQZrrk, VPCMPGTQZrrk)
+ FROM_TO(VPCMPWZ128rmi, VPCMPEQWZ128rm, VPCMPGTWZ128rm)
+ FROM_TO(VPCMPWZ128rmik, VPCMPEQWZ128rmk, VPCMPGTWZ128rmk)
+ FROM_TO(VPCMPWZ128rri, VPCMPEQWZ128rr, VPCMPGTWZ128rr)
+ FROM_TO(VPCMPWZ128rrik, VPCMPEQWZ128rrk, VPCMPGTWZ128rrk)
+ FROM_TO(VPCMPWZ256rmi, VPCMPEQWZ256rm, VPCMPGTWZ256rm)
+ FROM_TO(VPCMPWZ256rmik, VPCMPEQWZ256rmk, VPCMPGTWZ256rmk)
+ FROM_TO(VPCMPWZ256rri, VPCMPEQWZ256rr, VPCMPGTWZ256rr)
+ FROM_TO(VPCMPWZ256rrik, VPCMPEQWZ256rrk, VPCMPGTWZ256rrk)
+ FROM_TO(VPCMPWZrmi, VPCMPEQWZrm, VPCMPGTWZrm)
+ FROM_TO(VPCMPWZrmik, VPCMPEQWZrmk, VPCMPGTWZrmk)
+ FROM_TO(VPCMPWZrri, VPCMPEQWZrr, VPCMPGTWZrr)
+ FROM_TO(VPCMPWZrrik, VPCMPEQWZrrk, VPCMPGTWZrrk)
+#undef FROM_TO
+ }
+ MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1);
+ int64_t Imm = LastOp.getImm();
+ unsigned NewOpc;
+ if (Imm == 0)
+ NewOpc = Opc1;
+ else if(Imm == 6)
+ NewOpc = Opc2;
+ else
+ return false;
+ MI.setOpcode(NewOpc);
+ MI.erase(&LastOp);
+ return true;
+}
+
+bool X86::optimizeMOVSX(MCInst &MI) {
+ unsigned NewOpc;
+#define FROM_TO(FROM, TO, R0, R1) \
+ case X86::FROM: \
+ if (MI.getOperand(0).getReg() != X86::R0 || \
+ MI.getOperand(1).getReg() != X86::R1) \
+ return false; \
+ NewOpc = X86::TO; \
+ break;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ FROM_TO(MOVSX16rr8, CBW, AX, AL) // movsbw %al, %ax --> cbtw
+ FROM_TO(MOVSX32rr16, CWDE, EAX, AX) // movswl %ax, %eax --> cwtl
+ FROM_TO(MOVSX64rr32, CDQE, RAX, EAX) // movslq %eax, %rax --> cltq
+#undef FROM_TO
+ }
+ MI.clear();
+ MI.setOpcode(NewOpc);
+ return true;
+}
+
+bool X86::optimizeINCDEC(MCInst &MI, bool In64BitMode) {
+ if (In64BitMode)
+ return false;
+ unsigned NewOpc;
+ // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
+#define FROM_TO(FROM, TO) \
+ case X86::FROM: \
+ NewOpc = X86::TO; \
+ break;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ FROM_TO(DEC16r, DEC16r_alt)
+ FROM_TO(DEC32r, DEC32r_alt)
+ FROM_TO(INC16r, INC16r_alt)
+ FROM_TO(INC32r, INC32r_alt)
+ }
+ MI.setOpcode(NewOpc);
+ return true;
+}
+
+static bool isARegister(unsigned Reg) {
+ return Reg == X86::AL || Reg == X86::AX || Reg == X86::EAX || Reg == X86::RAX;
+}
+
+/// Simplify things like MOV32rm to MOV32o32a.
+bool X86::optimizeMOV(MCInst &MI, bool In64BitMode) {
+ // Don't make these simplifications in 64-bit mode; other assemblers don't
+ // perform them because they make the code larger.
+ if (In64BitMode)
+ return false;
+ unsigned NewOpc;
+ // We don't currently select the correct instruction form for instructions
+ // which have a short %eax, etc. form. Handle this by custom lowering, for
+ // now.
+ //
+ // Note, we are currently not handling the following instructions:
+ // MOV64ao8, MOV64o8a
+ // XCHG16ar, XCHG32ar, XCHG64ar
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ FROM_TO(MOV8mr_NOREX, MOV8o32a)
+ FROM_TO(MOV8mr, MOV8o32a)
+ FROM_TO(MOV8rm_NOREX, MOV8ao32)
+ FROM_TO(MOV8rm, MOV8ao32)
+ FROM_TO(MOV16mr, MOV16o32a)
+ FROM_TO(MOV16rm, MOV16ao32)
+ FROM_TO(MOV32mr, MOV32o32a)
+ FROM_TO(MOV32rm, MOV32ao32)
+ }
+ bool IsStore = MI.getOperand(0).isReg() && MI.getOperand(1).isReg();
+ unsigned AddrBase = IsStore;
+ unsigned RegOp = IsStore ? 0 : 5;
+ unsigned AddrOp = AddrBase + 3;
+ // Check whether the destination register can be fixed.
+ unsigned Reg = MI.getOperand(RegOp).getReg();
+ if (!isARegister(Reg))
+ return false;
+ // Check whether this is an absolute address.
+ // FIXME: We know TLVP symbol refs aren't, but there should be a better way
+ // to do this here.
+ bool Absolute = true;
+ if (MI.getOperand(AddrOp).isExpr()) {
+ const MCExpr *MCE = MI.getOperand(AddrOp).getExpr();
+ if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
+ Absolute = false;
+ }
+ if (Absolute && (MI.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
+ MI.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
+ MI.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
+ return false;
+ // If so, rewrite the instruction.
+ MCOperand Saved = MI.getOperand(AddrOp);
+ MCOperand Seg = MI.getOperand(AddrBase + X86::AddrSegmentReg);
+ MI.clear();
+ MI.setOpcode(NewOpc);
+ MI.addOperand(Saved);
+ MI.addOperand(Seg);
+ return true;
+}
+
+/// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
+/// a short fixed-register form.
+static bool optimizeToFixedRegisterForm(MCInst &MI) {
+ unsigned NewOpc;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+ FROM_TO(ADC8ri, ADC8i8)
+ FROM_TO(ADC16ri, ADC16i16)
+ FROM_TO(ADC32ri, ADC32i32)
+ FROM_TO(ADC64ri32, ADC64i32)
+ FROM_TO(ADD8ri, ADD8i8)
+ FROM_TO(ADD16ri, ADD16i16)
+ FROM_TO(ADD32ri, ADD32i32)
+ FROM_TO(ADD64ri32, ADD64i32)
+ FROM_TO(AND8ri, AND8i8)
+ FROM_TO(AND16ri, AND16i16)
+ FROM_TO(AND32ri, AND32i32)
+ FROM_TO(AND64ri32, AND64i32)
+ FROM_TO(CMP8ri, CMP8i8)
+ FROM_TO(CMP16ri, CMP16i16)
+ FROM_TO(CMP32ri, CMP32i32)
+ FROM_TO(CMP64ri32, CMP64i32)
+ FROM_TO(OR8ri, OR8i8)
+ FROM_TO(OR16ri, OR16i16)
+ FROM_TO(OR32ri, OR32i32)
+ FROM_TO(OR64ri32, OR64i32)
+ FROM_TO(SBB8ri, SBB8i8)
+ FROM_TO(SBB16ri, SBB16i16)
+ FROM_TO(SBB32ri, SBB32i32)
+ FROM_TO(SBB64ri32, SBB64i32)
+ FROM_TO(SUB8ri, SUB8i8)
+ FROM_TO(SUB16ri, SUB16i16)
+ FROM_TO(SUB32ri, SUB32i32)
+ FROM_TO(SUB64ri32, SUB64i32)
+ FROM_TO(TEST8ri, TEST8i8)
+ FROM_TO(TEST16ri, TEST16i16)
+ FROM_TO(TEST32ri, TEST32i32)
+ FROM_TO(TEST64ri32, TEST64i32)
+ FROM_TO(XOR8ri, XOR8i8)
+ FROM_TO(XOR16ri, XOR16i16)
+ FROM_TO(XOR32ri, XOR32i32)
+ FROM_TO(XOR64ri32, XOR64i32)
+ }
+ // Check whether the destination register can be fixed.
+ unsigned Reg = MI.getOperand(0).getReg();
+ if (!isARegister(Reg))
+ return false;
+
+ // If so, rewrite the instruction.
+ MCOperand Saved = MI.getOperand(MI.getNumOperands() - 1);
+ MI.clear();
+ MI.setOpcode(NewOpc);
+ MI.addOperand(Saved);
+ return true;
+}
+
+unsigned X86::getOpcodeForShortImmediateForm(unsigned Opcode) {
+#define ENTRY(LONG, SHORT) \
+ case X86::LONG: \
+ return X86::SHORT;
+ switch (Opcode) {
+ default:
+ return Opcode;
+#include "X86EncodingOptimizationForImmediate.def"
+ }
+}
+
+unsigned X86::getOpcodeForLongImmediateForm(unsigned Opcode) {
+#define ENTRY(LONG, SHORT) \
+ case X86::SHORT: \
+ return X86::LONG;
+ switch (Opcode) {
+ default:
+ return Opcode;
+#include "X86EncodingOptimizationForImmediate.def"
+ }
+}
+
+static bool optimizeToShortImmediateForm(MCInst &MI) {
+ unsigned NewOpc;
+#define ENTRY(LONG, SHORT) \
+ case X86::LONG: \
+ NewOpc = X86::SHORT; \
+ break;
+ switch (MI.getOpcode()) {
+ default:
+ return false;
+#include "X86EncodingOptimizationForImmediate.def"
+ }
+ MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1);
+ if (LastOp.isExpr()) {
+ const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(LastOp.getExpr());
+ if (!SRE || SRE->getKind() != MCSymbolRefExpr::VK_X86_ABS8)
+ return false;
+ } else if (LastOp.isImm()) {
+ if (!isInt<8>(LastOp.getImm()))
+ return false;
+ }
+ MI.setOpcode(NewOpc);
+ return true;
+}
+
+bool X86::optimizeToFixedRegisterOrShortImmediateForm(MCInst &MI) {
+ // We may optimize twice here.
+ bool ShortImm = optimizeToShortImmediateForm(MI);
+ bool FixedReg = optimizeToFixedRegisterForm(MI);
+ return ShortImm || FixedReg;
+}
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h
new file mode 100644
index 000000000000..1a26336d1e72
--- /dev/null
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.h
@@ -0,0 +1,30 @@
+//===-- X86EncodingOptimization.h - X86 Encoding optimization ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the X86 encoding optimization
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_X86ENCODINGOPTIMIZATION_H
+#define LLVM_LIB_TARGET_X86_X86ENCODINGOPTIMIZATION_H
+namespace llvm {
+class MCInst;
+class MCInstrDesc;
+namespace X86 {
+bool optimizeInstFromVEX3ToVEX2(MCInst &MI, const MCInstrDesc &Desc);
+bool optimizeShiftRotateWithImmediateOne(MCInst &MI);
+bool optimizeVPCMPWithImmediateOneOrSix(MCInst &MI);
+bool optimizeMOVSX(MCInst &MI);
+bool optimizeINCDEC(MCInst &MI, bool In64BitMode);
+bool optimizeMOV(MCInst &MI, bool In64BitMode);
+bool optimizeToFixedRegisterOrShortImmediateForm(MCInst &MI);
+unsigned getOpcodeForShortImmediateForm(unsigned Opcode);
+unsigned getOpcodeForLongImmediateForm(unsigned Opcode);
+} // namespace X86
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimizationForImmediate.def b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimizationForImmediate.def
new file mode 100644
index 000000000000..e475e55260ed
--- /dev/null
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimizationForImmediate.def
@@ -0,0 +1,72 @@
+//===- X86EncodingOptimizationForImmediate.def.def ---------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// \file
+// This file defines all the entries of X86 instruction relaxation for immediate
+//===----------------------------------------------------------------------===//
+
+#ifndef ENTRY
+#define ENTRY(LONG, SHORT)
+#endif
+ENTRY(ADC16mi, ADC16mi8)
+ENTRY(ADC16ri, ADC16ri8)
+ENTRY(ADC32mi, ADC32mi8)
+ENTRY(ADC32ri, ADC32ri8)
+ENTRY(ADC64mi32, ADC64mi8)
+ENTRY(ADC64ri32, ADC64ri8)
+ENTRY(SBB16mi, SBB16mi8)
+ENTRY(SBB16ri, SBB16ri8)
+ENTRY(SBB32mi, SBB32mi8)
+ENTRY(SBB32ri, SBB32ri8)
+ENTRY(SBB64mi32, SBB64mi8)
+ENTRY(SBB64ri32, SBB64ri8)
+ENTRY(ADD16mi, ADD16mi8)
+ENTRY(ADD16ri, ADD16ri8)
+ENTRY(ADD32mi, ADD32mi8)
+ENTRY(ADD32ri, ADD32ri8)
+ENTRY(ADD64mi32, ADD64mi8)
+ENTRY(ADD64ri32, ADD64ri8)
+ENTRY(AND16mi, AND16mi8)
+ENTRY(AND16ri, AND16ri8)
+ENTRY(AND32mi, AND32mi8)
+ENTRY(AND32ri, AND32ri8)
+ENTRY(AND64mi32, AND64mi8)
+ENTRY(AND64ri32, AND64ri8)
+ENTRY(OR16mi, OR16mi8)
+ENTRY(OR16ri, OR16ri8)
+ENTRY(OR32mi, OR32mi8)
+ENTRY(OR32ri, OR32ri8)
+ENTRY(OR64mi32, OR64mi8)
+ENTRY(OR64ri32, OR64ri8)
+ENTRY(SUB16mi, SUB16mi8)
+ENTRY(SUB16ri, SUB16ri8)
+ENTRY(SUB32mi, SUB32mi8)
+ENTRY(SUB32ri, SUB32ri8)
+ENTRY(SUB64mi32, SUB64mi8)
+ENTRY(SUB64ri32, SUB64ri8)
+ENTRY(XOR16mi, XOR16mi8)
+ENTRY(XOR16ri, XOR16ri8)
+ENTRY(XOR32mi, XOR32mi8)
+ENTRY(XOR32ri, XOR32ri8)
+ENTRY(XOR64mi32, XOR64mi8)
+ENTRY(XOR64ri32, XOR64ri8)
+ENTRY(CMP16mi, CMP16mi8)
+ENTRY(CMP16ri, CMP16ri8)
+ENTRY(CMP32mi, CMP32mi8)
+ENTRY(CMP32ri, CMP32ri8)
+ENTRY(CMP64mi32, CMP64mi8)
+ENTRY(CMP64ri32, CMP64ri8)
+ENTRY(IMUL16rmi, IMUL16rmi8)
+ENTRY(IMUL16rri, IMUL16rri8)
+ENTRY(IMUL32rmi, IMUL32rmi8)
+ENTRY(IMUL32rri, IMUL32rri8)
+ENTRY(IMUL64rmi32, IMUL64rmi8)
+ENTRY(IMUL64rri32, IMUL64rri8)
+ENTRY(PUSH16i, PUSH16i8)
+ENTRY(PUSH32i, PUSH32i8)
+ENTRY(PUSH64i32, PUSH64i8)
+#undef ENTRY
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp
deleted file mode 100644
index 640efd468135..000000000000
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.cpp
+++ /dev/null
@@ -1,165 +0,0 @@
-//===- X86InstrRelaxTables.cpp - X86 Instruction Relaxation Tables -*- C++ -*-//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the X86 instruction relaxation tables.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86InstrRelaxTables.h"
-#include "X86InstrInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include <atomic>
-
-using namespace llvm;
-
-// These tables are sorted by their ShortOp value allowing them to be binary
-// searched at runtime without the need for additional storage. The enum values
-// are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which
-// makes sorting these tables a simple matter of alphabetizing the table.
-static const X86InstrRelaxTableEntry InstrRelaxTable[] = {
- // ADC
- { X86::ADC16mi8, X86::ADC16mi },
- { X86::ADC16ri8, X86::ADC16ri },
- { X86::ADC32mi8, X86::ADC32mi },
- { X86::ADC32ri8, X86::ADC32ri },
- { X86::ADC64mi8, X86::ADC64mi32 },
- { X86::ADC64ri8, X86::ADC64ri32 },
- // ADD
- { X86::ADD16mi8, X86::ADD16mi },
- { X86::ADD16ri8, X86::ADD16ri },
- { X86::ADD32mi8, X86::ADD32mi },
- { X86::ADD32ri8, X86::ADD32ri },
- { X86::ADD64mi8, X86::ADD64mi32 },
- { X86::ADD64ri8, X86::ADD64ri32 },
- // AND
- { X86::AND16mi8, X86::AND16mi },
- { X86::AND16ri8, X86::AND16ri },
- { X86::AND32mi8, X86::AND32mi },
- { X86::AND32ri8, X86::AND32ri },
- { X86::AND64mi8, X86::AND64mi32 },
- { X86::AND64ri8, X86::AND64ri32 },
- // CMP
- { X86::CMP16mi8, X86::CMP16mi },
- { X86::CMP16ri8, X86::CMP16ri },
- { X86::CMP32mi8, X86::CMP32mi },
- { X86::CMP32ri8, X86::CMP32ri },
- { X86::CMP64mi8, X86::CMP64mi32 },
- { X86::CMP64ri8, X86::CMP64ri32 },
- // IMUL
- { X86::IMUL16rmi8, X86::IMUL16rmi },
- { X86::IMUL16rri8, X86::IMUL16rri },
- { X86::IMUL32rmi8, X86::IMUL32rmi },
- { X86::IMUL32rri8, X86::IMUL32rri },
- { X86::IMUL64rmi8, X86::IMUL64rmi32 },
- { X86::IMUL64rri8, X86::IMUL64rri32 },
- // OR
- { X86::OR16mi8, X86::OR16mi },
- { X86::OR16ri8, X86::OR16ri },
- { X86::OR32mi8, X86::OR32mi },
- { X86::OR32ri8, X86::OR32ri },
- { X86::OR64mi8, X86::OR64mi32 },
- { X86::OR64ri8, X86::OR64ri32 },
- // PUSH
- { X86::PUSH16i8, X86::PUSHi16 },
- { X86::PUSH32i8, X86::PUSHi32 },
- { X86::PUSH64i8, X86::PUSH64i32 },
- // SBB
- { X86::SBB16mi8, X86::SBB16mi },
- { X86::SBB16ri8, X86::SBB16ri },
- { X86::SBB32mi8, X86::SBB32mi },
- { X86::SBB32ri8, X86::SBB32ri },
- { X86::SBB64mi8, X86::SBB64mi32 },
- { X86::SBB64ri8, X86::SBB64ri32 },
- // SUB
- { X86::SUB16mi8, X86::SUB16mi },
- { X86::SUB16ri8, X86::SUB16ri },
- { X86::SUB32mi8, X86::SUB32mi },
- { X86::SUB32ri8, X86::SUB32ri },
- { X86::SUB64mi8, X86::SUB64mi32 },
- { X86::SUB64ri8, X86::SUB64ri32 },
- // XOR
- { X86::XOR16mi8, X86::XOR16mi },
- { X86::XOR16ri8, X86::XOR16ri },
- { X86::XOR32mi8, X86::XOR32mi },
- { X86::XOR32ri8, X86::XOR32ri },
- { X86::XOR64mi8, X86::XOR64mi32 },
- { X86::XOR64ri8, X86::XOR64ri32 },
-};
-
-static const X86InstrRelaxTableEntry *
-lookupRelaxTableImpl(ArrayRef<X86InstrRelaxTableEntry> Table,
- unsigned ShortOp) {
-#ifndef NDEBUG
- // Make sure the tables are sorted.
- static std::atomic<bool> RelaxTableChecked(false);
- if (!RelaxTableChecked.load(std::memory_order_relaxed)) {
- assert(llvm::is_sorted(InstrRelaxTable) &&
- std::adjacent_find(std::begin(InstrRelaxTable),
- std::end(InstrRelaxTable)) ==
- std::end(InstrRelaxTable) &&
- "InstrRelaxTable is not sorted and unique!");
- RelaxTableChecked.store(true, std::memory_order_relaxed);
- }
-#endif
-
- const X86InstrRelaxTableEntry *Data = llvm::lower_bound(Table, ShortOp);
- if (Data != Table.end() && Data->KeyOp == ShortOp)
- return Data;
- return nullptr;
-}
-
-const X86InstrRelaxTableEntry *llvm::lookupRelaxTable(unsigned ShortOp) {
- return lookupRelaxTableImpl(InstrRelaxTable, ShortOp);
-}
-
-namespace {
-
-// This class stores the short form tables. It is instantiated as a
-// function scope static variable to lazily init the short form table.
-struct X86ShortFormTable {
- // Stores relaxation table entries sorted by relaxed form opcode.
- SmallVector<X86InstrRelaxTableEntry, 0> Table;
-
- X86ShortFormTable() {
- for (const X86InstrRelaxTableEntry &Entry : InstrRelaxTable)
- Table.push_back({Entry.DstOp, Entry.KeyOp});
-
- llvm::sort(Table);
-
- // Now that it's sorted, ensure its unique.
- assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() &&
- "Short form table is not unique!");
- }
-};
-} // namespace
-
-const X86InstrRelaxTableEntry *llvm::lookupShortTable(unsigned RelaxOp) {
- static X86ShortFormTable ShortTable;
- auto &Table = ShortTable.Table;
- auto I = llvm::lower_bound(Table, RelaxOp);
- if (I != Table.end() && I->KeyOp == RelaxOp)
- return &*I;
- return nullptr;
-}
-
-namespace llvm {
-
-/// Get the short instruction opcode for a given relaxed opcode.
-unsigned X86::getShortOpcodeArith(unsigned RelaxOp) {
- if (const X86InstrRelaxTableEntry *I = lookupShortTable(RelaxOp))
- return I->DstOp;
- return RelaxOp;
-}
-
-/// Get the relaxed instruction opcode for a given short opcode.
-unsigned X86::getRelaxedOpcodeArith(unsigned ShortOp) {
- if (const X86InstrRelaxTableEntry *I = lookupRelaxTable(ShortOp))
- return I->DstOp;
- return ShortOp;
-}
-} // namespace llvm
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.h b/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.h
deleted file mode 100644
index 0551c1861a58..000000000000
--- a/llvm/lib/Target/X86/MCTargetDesc/X86InstrRelaxTables.h
+++ /dev/null
@@ -1,54 +0,0 @@
-//===-- X86InstrRelaxTables.h - X86 Instruction Relaxation Tables -*- C++ -*-=//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the interface to query the X86 instruction relaxation
-// tables.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_TARGET_X86_X86INSTRRELAXTABLES_H
-#define LLVM_LIB_TARGET_X86_X86INSTRRELAXTABLES_H
-
-#include <cstdint>
-
-namespace llvm {
-
-// This struct is used for both the relaxed and short tables. The KeyOp is used
-// to determine the sorting order.
-struct X86InstrRelaxTableEntry {
- uint16_t KeyOp;
- uint16_t DstOp;
-
- bool operator<(const X86InstrRelaxTableEntry &RHS) const {
- return KeyOp < RHS.KeyOp;
- }
- bool operator==(const X86InstrRelaxTableEntry &RHS) const {
- return KeyOp == RHS.KeyOp;
- }
- friend bool operator<(const X86InstrRelaxTableEntry &TE, unsigned Opcode) {
- return TE.KeyOp < Opcode;
- }
-};
-
-/// Look up the relaxed form table entry for a given \p ShortOp.
-const X86InstrRelaxTableEntry *lookupRelaxTable(unsigned ShortOp);
-
-/// Look up the short form table entry for a given \p RelaxOp.
-const X86InstrRelaxTableEntry *lookupShortTable(unsigned RelaxOp);
-
-namespace X86 {
-
-/// Get the short instruction opcode for a given relaxed opcode.
-unsigned getShortOpcodeArith(unsigned RelaxOp);
-
-/// Get the relaxed instruction opcode for a given short opcode.
-unsigned getRelaxedOpcodeArith(unsigned ShortOp);
-} // namespace X86
-} // namespace llvm
-
-#endif
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
index 84da39c4a295..ecdc9090ac64 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86IntelInstPrinter.cpp
@@ -44,7 +44,7 @@ void X86IntelInstPrinter::printInst(const MCInst *MI, uint64_t Address,
// In 16-bit mode, print data16 as data32.
if (MI->getOpcode() == X86::DATA16_PREFIX &&
- STI.getFeatureBits()[X86::Is16Bit]) {
+ STI.hasFeature(X86::Is16Bit)) {
OS << "\tdata32";
} else if (!printAliasInstr(MI, Address, OS) && !printVecCompareInstr(MI, OS))
printInstruction(MI, Address, OS);
@@ -168,9 +168,9 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
// Broadcast form.
// Load size is word for TA map. Otherwise it is based on W-bit.
if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA) {
- assert(!(Desc.TSFlags & X86II::VEX_W) && "Unknown W-bit value!");
+ assert(!(Desc.TSFlags & X86II::REX_W) && "Unknown W-bit value!");
printwordmem(MI, CurOp++, OS);
- } else if (Desc.TSFlags & X86II::VEX_W) {
+ } else if (Desc.TSFlags & X86II::REX_W) {
printqwordmem(MI, CurOp++, OS);
} else {
printdwordmem(MI, CurOp++, OS);
@@ -179,13 +179,13 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
// Print the number of elements broadcasted.
unsigned NumElts;
if (Desc.TSFlags & X86II::EVEX_L2)
- NumElts = (Desc.TSFlags & X86II::VEX_W) ? 8 : 16;
+ NumElts = (Desc.TSFlags & X86II::REX_W) ? 8 : 16;
else if (Desc.TSFlags & X86II::VEX_L)
- NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
+ NumElts = (Desc.TSFlags & X86II::REX_W) ? 4 : 8;
else
- NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+ NumElts = (Desc.TSFlags & X86II::REX_W) ? 2 : 4;
if ((Desc.TSFlags & X86II::OpMapMask) == X86II::TA) {
- assert(!(Desc.TSFlags & X86II::VEX_W) && "Unknown W-bit value!");
+ assert(!(Desc.TSFlags & X86II::REX_W) && "Unknown W-bit value!");
NumElts *= 2;
}
OS << "{1to" << NumElts << "}";
@@ -321,7 +321,7 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
if (Desc.TSFlags & X86II::EVEX_B) {
// Broadcast form.
// Load size is based on W-bit as only D and Q are supported.
- if (Desc.TSFlags & X86II::VEX_W)
+ if (Desc.TSFlags & X86II::REX_W)
printqwordmem(MI, CurOp++, OS);
else
printdwordmem(MI, CurOp++, OS);
@@ -329,11 +329,11 @@ bool X86IntelInstPrinter::printVecCompareInstr(const MCInst *MI, raw_ostream &OS
// Print the number of elements broadcasted.
unsigned NumElts;
if (Desc.TSFlags & X86II::EVEX_L2)
- NumElts = (Desc.TSFlags & X86II::VEX_W) ? 8 : 16;
+ NumElts = (Desc.TSFlags & X86II::REX_W) ? 8 : 16;
else if (Desc.TSFlags & X86II::VEX_L)
- NumElts = (Desc.TSFlags & X86II::VEX_W) ? 4 : 8;
+ NumElts = (Desc.TSFlags & X86II::REX_W) ? 4 : 8;
else
- NumElts = (Desc.TSFlags & X86II::VEX_W) ? 2 : 4;
+ NumElts = (Desc.TSFlags & X86II::REX_W) ? 2 : 4;
OS << "{1to" << NumElts << "}";
} else {
if (Desc.TSFlags & X86II::EVEX_L2)
@@ -398,7 +398,7 @@ void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
if (IndexReg.getReg()) {
if (NeedPlus) O << " + ";
- if (ScaleVal != 1)
+ if (ScaleVal != 1 || !BaseReg.getReg())
O << ScaleVal << '*';
printOperand(MI, Op+X86::AddrIndexReg, O);
NeedPlus = true;
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index b5351358e4e5..3ce044387ada 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -11,10 +11,10 @@
//===----------------------------------------------------------------------===//
#include "X86MCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
enum AsmWriterFlavorTy {
@@ -153,7 +153,8 @@ X86MCAsmInfoMicrosoftMASM::X86MCAsmInfoMicrosoftMASM(const Triple &Triple)
void X86MCAsmInfoGNUCOFF::anchor() { }
X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) {
- assert(Triple.isOSWindows() && "Windows is the only supported COFF target");
+ assert((Triple.isOSWindows() || Triple.isUEFI()) &&
+ "Windows and UEFI are the only supported COFF targets");
if (Triple.getArch() == Triple::x86_64) {
PrivateGlobalPrefix = ".L";
PrivateLabelPrefix = ".L";
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 2b819641ee5f..6af3ebb2feae 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -26,7 +26,6 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdint>
#include <cstdlib>
@@ -37,6 +36,209 @@ using namespace llvm;
namespace {
+enum PrefixKind { None, REX, XOP, VEX2, VEX3, EVEX };
+
+static void emitByte(uint8_t C, SmallVectorImpl<char> &CB) { CB.push_back(C); }
+
+class X86OpcodePrefixHelper {
+ // REX (1 byte)
+ // +-----+ +------+
+ // | 40H | | WRXB |
+ // +-----+ +------+
+
+ // XOP (3-byte)
+ // +-----+ +--------------+ +-------------------+
+ // | 8Fh | | RXB | m-mmmm | | W | vvvv | L | pp |
+ // +-----+ +--------------+ +-------------------+
+
+ // VEX2 (2 bytes)
+ // +-----+ +-------------------+
+ // | C5h | | R | vvvv | L | pp |
+ // +-----+ +-------------------+
+
+ // VEX3 (3 bytes)
+ // +-----+ +--------------+ +-------------------+
+ // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
+ // +-----+ +--------------+ +-------------------+
+
+ // VEX_R: opcode externsion equivalent to REX.R in
+ // 1's complement (inverted) form
+ //
+ // 1: Same as REX_R=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX_R=1 (64 bit mode only)
+
+ // VEX_X: equivalent to REX.X, only used when a
+ // register is used for index in SIB Byte.
+ //
+ // 1: Same as REX.X=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX.X=1 (64-bit mode only)
+
+ // VEX_B:
+ // 1: Same as REX_B=0 (ignored in 32-bit mode)
+ // 0: Same as REX_B=1 (64 bit mode only)
+
+ // VEX_W: opcode specific (use like REX.W, or used for
+ // opcode extension, or ignored, depending on the opcode byte)
+
+ // VEX_5M (VEX m-mmmmm field):
+ //
+ // 0b00000: Reserved for future use
+ // 0b00001: implied 0F leading opcode
+ // 0b00010: implied 0F 38 leading opcode bytes
+ // 0b00011: implied 0F 3A leading opcode bytes
+ // 0b00100: Reserved for future use
+ // 0b00101: VEX MAP5
+ // 0b00110: VEX MAP6
+ // 0b00111-0b11111: Reserved for future use
+ // 0b01000: XOP map select - 08h instructions with imm byte
+ // 0b01001: XOP map select - 09h instructions with no imm byte
+ // 0b01010: XOP map select - 0Ah instructions with imm dword
+
+ // VEX_4V (VEX vvvv field): a register specifier
+ // (in 1's complement form) or 1111 if unused.
+
+ // VEX_PP: opcode extension providing equivalent
+ // functionality of a SIMD prefix
+ // 0b00: None
+ // 0b01: 66
+ // 0b10: F3
+ // 0b11: F2
+
+ // EVEX (4 bytes)
+ // +-----+ +--------------+ +-------------------+ +------------------------+
+ // | 62h | | RXBR' | 0mmm | | W | vvvv | 1 | pp | | z | L'L | b | v' | aaa |
+ // +-----+ +--------------+ +-------------------+ +------------------------+
+
+ // EVEX_L2/VEX_L (Vector Length):
+ // L2 L
+ // 0 0: scalar or 128-bit vector
+ // 0 1: 256-bit vector
+ // 1 0: 512-bit vector
+
+private:
+ unsigned W : 1;
+ unsigned R : 1;
+ unsigned X : 1;
+ unsigned B : 1;
+ unsigned VEX_4V : 4;
+ unsigned VEX_L : 1;
+ unsigned VEX_PP : 2;
+ unsigned VEX_5M : 5;
+ unsigned EVEX_R2 : 1;
+ unsigned EVEX_z : 1;
+ unsigned EVEX_L2 : 1;
+ unsigned EVEX_b : 1;
+ unsigned EVEX_V2 : 1;
+ unsigned EVEX_aaa : 3;
+ PrefixKind Kind = None;
+ const MCRegisterInfo &MRI;
+
+ unsigned getRegEncoding(const MCInst &MI, unsigned OpNum) const {
+ return MRI.getEncodingValue(MI.getOperand(OpNum).getReg());
+ }
+
+ void setR(unsigned Encoding) { R = Encoding >> 3 & 1; }
+ void setR2(unsigned Encoding) { EVEX_R2 = Encoding >> 4 & 1; }
+ void set4V(unsigned Encoding) { VEX_4V = Encoding & 0xf; }
+ void setV2(unsigned Encoding) { EVEX_V2 = Encoding >> 4 & 1; }
+
+public:
+ void setW(bool V) { W = V; }
+ void setR(const MCInst &MI, unsigned OpNum) {
+ setR(getRegEncoding(MI, OpNum));
+ }
+ void setX(const MCInst &MI, unsigned OpNum, unsigned Shift = 3) {
+ X = getRegEncoding(MI, OpNum) >> Shift & 1;
+ }
+ void setB(const MCInst &MI, unsigned OpNum) {
+ B = getRegEncoding(MI, OpNum) >> 3 & 1;
+ }
+ void set4V(const MCInst &MI, unsigned OpNum) {
+ set4V(getRegEncoding(MI, OpNum));
+ }
+ void setL(bool V) { VEX_L = V; }
+ void setPP(unsigned V) { VEX_PP = V; }
+ void set5M(unsigned V) { VEX_5M = V; }
+ void setR2(const MCInst &MI, unsigned OpNum) {
+ setR2(getRegEncoding(MI, OpNum));
+ }
+ void setRR2(const MCInst &MI, unsigned OpNum) {
+ unsigned Encoding = getRegEncoding(MI, OpNum);
+ setR(Encoding);
+ setR2(Encoding);
+ }
+ void setZ(bool V) { EVEX_z = V; }
+ void setL2(bool V) { EVEX_L2 = V; }
+ void setEVEX_b(bool V) { EVEX_b = V; }
+ void setV2(const MCInst &MI, unsigned OpNum) {
+ setV2(getRegEncoding(MI, OpNum));
+ }
+ void set4VV2(const MCInst &MI, unsigned OpNum) {
+ unsigned Encoding = getRegEncoding(MI, OpNum);
+ set4V(Encoding);
+ setV2(Encoding);
+ }
+ void setAAA(const MCInst &MI, unsigned OpNum) {
+ EVEX_aaa = getRegEncoding(MI, OpNum);
+ }
+
+ X86OpcodePrefixHelper(const MCRegisterInfo &MRI)
+ : W(0), R(0), X(0), B(0), VEX_4V(0), VEX_L(0), VEX_PP(0), VEX_5M(0),
+ EVEX_R2(0), EVEX_z(0), EVEX_L2(0), EVEX_b(0), EVEX_V2(0), EVEX_aaa(0),
+ MRI(MRI) {}
+
+ void setLowerBound(PrefixKind K) { Kind = K; }
+
+ PrefixKind determineOptimalKind() {
+ switch (Kind) {
+ case None:
+ Kind = (W | R | X | B) ? REX : None;
+ break;
+ case REX:
+ case XOP:
+ case VEX3:
+ case EVEX:
+ break;
+ case VEX2:
+ Kind = (W | X | B | (VEX_5M != 1)) ? VEX3 : VEX2;
+ break;
+ }
+ return Kind;
+ }
+
+ void emit(SmallVectorImpl<char> &CB) const {
+ uint8_t FirstPayload =
+ ((~R) & 0x1) << 7 | ((~X) & 0x1) << 6 | ((~B) & 0x1) << 5;
+ uint8_t LastPayload = ((~VEX_4V) & 0xf) << 3 | VEX_L << 2 | VEX_PP;
+ switch (Kind) {
+ case None:
+ return;
+ case REX:
+ emitByte(0x40 | W << 3 | R << 2 | X << 1 | B, CB);
+ return;
+ case VEX2:
+ emitByte(0xC5, CB);
+ emitByte(((~R) & 1) << 7 | LastPayload, CB);
+ return;
+ case VEX3:
+ case XOP:
+ emitByte(Kind == VEX3 ? 0xC4 : 0x8F, CB);
+ emitByte(FirstPayload | VEX_5M, CB);
+ emitByte(W << 7 | LastPayload, CB);
+ return;
+ case EVEX:
+ assert(VEX_5M && !(VEX_5M & 0x8) && "invalid mmm fields for EVEX!");
+ emitByte(0x62, CB);
+ emitByte(FirstPayload | ((~EVEX_R2) & 0x1) << 4 | VEX_5M, CB);
+ emitByte(W << 7 | ((~VEX_4V) & 0xf) << 3 | 1 << 2 | VEX_PP, CB);
+ emitByte(EVEX_z << 7 | EVEX_L2 << 6 | VEX_L << 5 | EVEX_b << 4 |
+ ((~EVEX_V2) & 0x1) << 3 | EVEX_aaa,
+ CB);
+ return;
+ }
+ }
+};
+
class X86MCCodeEmitter : public MCCodeEmitter {
const MCInstrInfo &MCII;
MCContext &Ctx;
@@ -48,10 +250,10 @@ public:
X86MCCodeEmitter &operator=(const X86MCCodeEmitter &) = delete;
~X86MCCodeEmitter() override = default;
- void emitPrefix(const MCInst &MI, raw_ostream &OS,
+ void emitPrefix(const MCInst &MI, SmallVectorImpl<char> &CB,
const MCSubtargetInfo &STI) const override;
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -60,42 +262,41 @@ private:
unsigned getX86RegEncoding(const MCInst &MI, unsigned OpNum) const;
- /// \param MI a single low-level machine instruction.
- /// \param OpNum the operand #.
- /// \returns true if the OpNumth operand of MI require a bit to be set in
- /// REX prefix.
- bool isREXExtendedReg(const MCInst &MI, unsigned OpNum) const;
-
void emitImmediate(const MCOperand &Disp, SMLoc Loc, unsigned ImmSize,
- MCFixupKind FixupKind, uint64_t StartByte, raw_ostream &OS,
+ MCFixupKind FixupKind, uint64_t StartByte,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups, int ImmOffset = 0) const;
void emitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld,
- raw_ostream &OS) const;
+ SmallVectorImpl<char> &CB) const;
void emitSIBByte(unsigned SS, unsigned Index, unsigned Base,
- raw_ostream &OS) const;
+ SmallVectorImpl<char> &CB) const;
void emitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField,
- uint64_t TSFlags, bool HasREX, uint64_t StartByte,
- raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
+ uint64_t TSFlags, PrefixKind Kind, uint64_t StartByte,
+ SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI,
bool ForceSIB = false) const;
- bool emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
- const MCSubtargetInfo &STI, raw_ostream &OS) const;
+ PrefixKind emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
+ const MCSubtargetInfo &STI,
+ SmallVectorImpl<char> &CB) const;
- void emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
- raw_ostream &OS) const;
+ PrefixKind emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
+ SmallVectorImpl<char> &CB) const;
void emitSegmentOverridePrefix(unsigned SegOperand, const MCInst &MI,
- raw_ostream &OS) const;
+ SmallVectorImpl<char> &CB) const;
- bool emitOpcodePrefix(int MemOperand, const MCInst &MI,
- const MCSubtargetInfo &STI, raw_ostream &OS) const;
+ PrefixKind emitOpcodePrefix(int MemOperand, const MCInst &MI,
+ const MCSubtargetInfo &STI,
+ SmallVectorImpl<char> &CB) const;
- bool emitREXPrefix(int MemOperand, const MCInst &MI,
- const MCSubtargetInfo &STI, raw_ostream &OS) const;
+ PrefixKind emitREXPrefix(int MemOperand, const MCInst &MI,
+ const MCSubtargetInfo &STI,
+ SmallVectorImpl<char> &CB) const;
};
} // end anonymous namespace
@@ -105,12 +306,11 @@ static uint8_t modRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) {
return RM | (RegOpcode << 3) | (Mod << 6);
}
-static void emitByte(uint8_t C, raw_ostream &OS) { OS << static_cast<char>(C); }
-
-static void emitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) {
+static void emitConstant(uint64_t Val, unsigned Size,
+ SmallVectorImpl<char> &CB) {
// Output the constant in little endian byte order.
for (unsigned i = 0; i != Size; ++i) {
- emitByte(Val & 255, OS);
+ emitByte(Val & 255, CB);
Val >>= 8;
}
}
@@ -121,16 +321,17 @@ static void emitConstant(uint64_t Val, unsigned Size, raw_ostream &OS) {
static bool isDispOrCDisp8(uint64_t TSFlags, int Value, int &ImmOffset) {
bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
- int CD8_Scale =
+ unsigned CD8_Scale =
(TSFlags & X86II::CD8_Scale_Mask) >> X86II::CD8_Scale_Shift;
- if (!HasEVEX || CD8_Scale == 0)
+ CD8_Scale = CD8_Scale ? 1U << (CD8_Scale - 1) : 0U;
+ if (!HasEVEX || !CD8_Scale)
return isInt<8>(Value);
assert(isPowerOf2_32(CD8_Scale) && "Unexpected CD8 scale!");
if (Value & (CD8_Scale - 1)) // Unaligned offset
return false;
- int CDisp8 = Value / CD8_Scale;
+ int CDisp8 = Value / static_cast<int>(CD8_Scale);
if (!isInt<8>(CDisp8))
return false;
@@ -218,18 +419,10 @@ unsigned X86MCCodeEmitter::getX86RegEncoding(const MCInst &MI,
return Ctx.getRegisterInfo()->getEncodingValue(MI.getOperand(OpNum).getReg());
}
-/// \param MI a single low-level machine instruction.
-/// \param OpNum the operand #.
-/// \returns true if the OpNumth operand of MI require a bit to be set in
-/// REX prefix.
-bool X86MCCodeEmitter::isREXExtendedReg(const MCInst &MI,
- unsigned OpNum) const {
- return (getX86RegEncoding(MI, OpNum) >> 3) & 1;
-}
-
void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc,
unsigned Size, MCFixupKind FixupKind,
- uint64_t StartByte, raw_ostream &OS,
+ uint64_t StartByte,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
int ImmOffset) const {
const MCExpr *Expr = nullptr;
@@ -238,7 +431,7 @@ void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc,
// relocation, emit it now.
if (FixupKind != FK_PCRel_1 && FixupKind != FK_PCRel_2 &&
FixupKind != FK_PCRel_4) {
- emitConstant(DispOp.getImm() + ImmOffset, Size, OS);
+ emitConstant(DispOp.getImm() + ImmOffset, Size, CB);
return;
}
Expr = MCConstantExpr::create(DispOp.getImm(), Ctx);
@@ -261,7 +454,7 @@ void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc,
}
if (Kind == GOT_Normal)
- ImmOffset = static_cast<int>(OS.tell() - StartByte);
+ ImmOffset = static_cast<int>(CB.size() - StartByte);
} else if (Expr->getKind() == MCExpr::SymbolRef) {
if (hasSecRelSymbolRef(Expr)) {
FixupKind = MCFixupKind(FK_SecRel_4);
@@ -300,30 +493,28 @@ void X86MCCodeEmitter::emitImmediate(const MCOperand &DispOp, SMLoc Loc,
Ctx);
// Emit a symbolic constant as a fixup and 4 zeros.
- Fixups.push_back(MCFixup::create(static_cast<uint32_t>(OS.tell() - StartByte),
+ Fixups.push_back(MCFixup::create(static_cast<uint32_t>(CB.size() - StartByte),
Expr, FixupKind, Loc));
- emitConstant(0, Size, OS);
+ emitConstant(0, Size, CB);
}
void X86MCCodeEmitter::emitRegModRMByte(const MCOperand &ModRMReg,
unsigned RegOpcodeFld,
- raw_ostream &OS) const {
- emitByte(modRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)), OS);
+ SmallVectorImpl<char> &CB) const {
+ emitByte(modRMByte(3, RegOpcodeFld, getX86RegNum(ModRMReg)), CB);
}
void X86MCCodeEmitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base,
- raw_ostream &OS) const {
+ SmallVectorImpl<char> &CB) const {
// SIB byte is in the same format as the modRMByte.
- emitByte(modRMByte(SS, Index, Base), OS);
+ emitByte(modRMByte(SS, Index, Base), CB);
}
-void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
- unsigned RegOpcodeField,
- uint64_t TSFlags, bool HasREX,
- uint64_t StartByte, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI,
- bool ForceSIB) const {
+void X86MCCodeEmitter::emitMemModRMByte(
+ const MCInst &MI, unsigned Op, unsigned RegOpcodeField, uint64_t TSFlags,
+ PrefixKind Kind, uint64_t StartByte, SmallVectorImpl<char> &CB,
+ SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI,
+ bool ForceSIB) const {
const MCOperand &Disp = MI.getOperand(Op + X86::AddrDisp);
const MCOperand &Base = MI.getOperand(Op + X86::AddrBaseReg);
const MCOperand &Scale = MI.getOperand(Op + X86::AddrScaleAmt);
@@ -337,7 +528,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
"Rip-relative addressing requires 64-bit mode");
assert(IndexReg.getReg() == 0 && !ForceSIB &&
"Invalid rip-relative address");
- emitByte(modRMByte(0, RegOpcodeField, 5), OS);
+ emitByte(modRMByte(0, RegOpcodeField, 5), CB);
unsigned Opcode = MI.getOpcode();
unsigned FixupKind = [&]() {
@@ -355,7 +546,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// movq loads is a subset of reloc_riprel_4byte_relax_rex. It is a
// special case because COFF and Mach-O don't support ELF's more
// flexible R_X86_64_REX_GOTPCRELX relaxation.
- assert(HasREX);
+ assert(Kind == REX);
return X86::reloc_riprel_4byte_movq_load;
case X86::ADC32rm:
case X86::ADD32rm:
@@ -379,8 +570,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
case X86::SBB64rm:
case X86::SUB64rm:
case X86::XOR64rm:
- return HasREX ? X86::reloc_riprel_4byte_relax_rex
- : X86::reloc_riprel_4byte_relax;
+ return Kind == REX ? X86::reloc_riprel_4byte_relax_rex
+ : X86::reloc_riprel_4byte_relax;
}
}();
@@ -395,7 +586,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
? X86II::getSizeOfImm(TSFlags)
: 0;
- emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), StartByte, OS,
+ emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), StartByte, CB,
Fixups, -ImmSize);
return;
}
@@ -442,24 +633,24 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
if (Disp.isImm() && isInt<8>(Disp.getImm())) {
if (Disp.getImm() == 0 && RMfield != 6) {
// There is no displacement; just the register.
- emitByte(modRMByte(0, RegOpcodeField, RMfield), OS);
+ emitByte(modRMByte(0, RegOpcodeField, RMfield), CB);
return;
}
// Use the [REG]+disp8 form, including for [BP] which cannot be encoded.
- emitByte(modRMByte(1, RegOpcodeField, RMfield), OS);
- emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups);
+ emitByte(modRMByte(1, RegOpcodeField, RMfield), CB);
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, CB, Fixups);
return;
}
// This is the [REG]+disp16 case.
- emitByte(modRMByte(2, RegOpcodeField, RMfield), OS);
+ emitByte(modRMByte(2, RegOpcodeField, RMfield), CB);
} else {
assert(IndexReg.getReg() == 0 && "Unexpected index register!");
// There is no BaseReg; this is the plain [disp16] case.
- emitByte(modRMByte(0, RegOpcodeField, 6), OS);
+ emitByte(modRMByte(0, RegOpcodeField, 6), CB);
}
// Emit 16-bit displacement for plain disp16 or [REG]+disp16 cases.
- emitImmediate(Disp, MI.getLoc(), 2, FK_Data_2, StartByte, OS, Fixups);
+ emitImmediate(Disp, MI.getLoc(), 2, FK_Data_2, StartByte, CB, Fixups);
return;
}
@@ -485,8 +676,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
(!STI.hasFeature(X86::Is64Bit) || BaseReg != 0)) {
if (BaseReg == 0) { // [disp32] in X86-32 mode
- emitByte(modRMByte(0, RegOpcodeField, 5), OS);
- emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, StartByte, OS, Fixups);
+ emitByte(modRMByte(0, RegOpcodeField, 5), CB);
+ emitImmediate(Disp, MI.getLoc(), 4, FK_Data_4, StartByte, CB, Fixups);
return;
}
@@ -496,7 +687,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// handle it by emitting a displacement of 0 later.
if (BaseRegNo != N86::EBP) {
if (Disp.isImm() && Disp.getImm() == 0 && AllowNoDisp) {
- emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), OS);
+ emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), CB);
return;
}
@@ -507,7 +698,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// This is exclusively used by call *a@tlscall(base). The relocation
// (R_386_TLSCALL or R_X86_64_TLSCALL) applies to the beginning.
Fixups.push_back(MCFixup::create(0, Sym, FK_NONE, MI.getLoc()));
- emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), OS);
+ emitByte(modRMByte(0, RegOpcodeField, BaseRegNo), CB);
return;
}
}
@@ -520,8 +711,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
if (Disp.isImm() && AllowDisp8) {
int ImmOffset = 0;
if (isDispOrCDisp8(TSFlags, Disp.getImm(), ImmOffset)) {
- emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), OS);
- emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups,
+ emitByte(modRMByte(1, RegOpcodeField, BaseRegNo), CB);
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, CB, Fixups,
ImmOffset);
return;
}
@@ -530,11 +721,11 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// Otherwise, emit the most general non-SIB encoding: [REG+disp32].
// Displacement may be 0 for [EBP] or [R13] case if {disp32} pseudo prefix
// prevented using disp8 above.
- emitByte(modRMByte(2, RegOpcodeField, BaseRegNo), OS);
+ emitByte(modRMByte(2, RegOpcodeField, BaseRegNo), CB);
unsigned Opcode = MI.getOpcode();
unsigned FixupKind = Opcode == X86::MOV32rm ? X86::reloc_signed_4byte_relax
: X86::reloc_signed_4byte;
- emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), StartByte, OS,
+ emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(FixupKind), StartByte, CB,
Fixups);
return;
}
@@ -550,7 +741,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// If there is no base register, we emit the special case SIB byte with
// MOD=0, BASE=5, to JUST get the index, scale, and displacement.
BaseRegNo = 5;
- emitByte(modRMByte(0, RegOpcodeField, 4), OS);
+ emitByte(modRMByte(0, RegOpcodeField, 4), CB);
ForceDisp32 = true;
} else if (Disp.isImm() && Disp.getImm() == 0 && AllowNoDisp &&
// Base reg can't be EBP/RBP/R13 as that would end up with '5' as
@@ -559,17 +750,17 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
// displacement instead.
BaseRegNo != N86::EBP) {
// Emit no displacement ModR/M byte
- emitByte(modRMByte(0, RegOpcodeField, 4), OS);
+ emitByte(modRMByte(0, RegOpcodeField, 4), CB);
} else if (Disp.isImm() && AllowDisp8 &&
isDispOrCDisp8(TSFlags, Disp.getImm(), ImmOffset)) {
// Displacement fits in a byte or matches an EVEX compressed disp8, use
// disp8 encoding. This also handles EBP/R13 base with 0 displacement unless
// {disp32} pseudo prefix was used.
- emitByte(modRMByte(1, RegOpcodeField, 4), OS);
+ emitByte(modRMByte(1, RegOpcodeField, 4), CB);
ForceDisp8 = true;
} else {
// Otherwise, emit the normal disp32 encoding.
- emitByte(modRMByte(2, RegOpcodeField, 4), OS);
+ emitByte(modRMByte(2, RegOpcodeField, 4), CB);
ForceDisp32 = true;
}
@@ -579,43 +770,44 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
unsigned IndexRegNo = IndexReg.getReg() ? getX86RegNum(IndexReg) : 4;
- emitSIBByte(SS, IndexRegNo, BaseRegNo, OS);
+ emitSIBByte(SS, IndexRegNo, BaseRegNo, CB);
// Do we need to output a displacement?
if (ForceDisp8)
- emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, OS, Fixups,
+ emitImmediate(Disp, MI.getLoc(), 1, FK_Data_1, StartByte, CB, Fixups,
ImmOffset);
else if (ForceDisp32)
emitImmediate(Disp, MI.getLoc(), 4, MCFixupKind(X86::reloc_signed_4byte),
- StartByte, OS, Fixups);
+ StartByte, CB, Fixups);
}
/// Emit all instruction prefixes.
///
-/// \returns true if REX prefix is used, otherwise returns false.
-bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
- const MCSubtargetInfo &STI,
- raw_ostream &OS) const {
+/// \returns one of the REX, XOP, VEX2, VEX3, EVEX if any of them is used,
+/// otherwise returns None.
+PrefixKind X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
+ const MCSubtargetInfo &STI,
+ SmallVectorImpl<char> &CB) const {
uint64_t TSFlags = MCII.get(MI.getOpcode()).TSFlags;
// Determine where the memory operand starts, if present.
int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
// Emit segment override opcode prefix as needed.
if (MemoryOperand != -1) {
MemoryOperand += CurOp;
- emitSegmentOverridePrefix(MemoryOperand + X86::AddrSegmentReg, MI, OS);
+ emitSegmentOverridePrefix(MemoryOperand + X86::AddrSegmentReg, MI, CB);
}
// Emit the repeat opcode prefix as needed.
unsigned Flags = MI.getFlags();
if (TSFlags & X86II::REP || Flags & X86::IP_HAS_REPEAT)
- emitByte(0xF3, OS);
+ emitByte(0xF3, CB);
if (Flags & X86::IP_HAS_REPEAT_NE)
- emitByte(0xF2, OS);
+ emitByte(0xF2, CB);
// Emit the address size opcode prefix as needed.
if (X86_MC::needsAddressSizeOverride(MI, STI, MemoryOperand, TSFlags) ||
Flags & X86::IP_HAS_AD_SIZE)
- emitByte(0x67, OS);
+ emitByte(0x67, CB);
uint64_t Form = TSFlags & X86II::FormMask;
switch (Form) {
@@ -624,14 +816,14 @@ bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
case X86II::RawFrmDstSrc: {
// Emit segment override opcode prefix as needed (not for %ds).
if (MI.getOperand(2).getReg() != X86::DS)
- emitSegmentOverridePrefix(2, MI, OS);
+ emitSegmentOverridePrefix(2, MI, CB);
CurOp += 3; // Consume operands.
break;
}
case X86II::RawFrmSrc: {
// Emit segment override opcode prefix as needed (not for %ds).
if (MI.getOperand(1).getReg() != X86::DS)
- emitSegmentOverridePrefix(1, MI, OS);
+ emitSegmentOverridePrefix(1, MI, CB);
CurOp += 2; // Consume operands.
break;
}
@@ -641,161 +833,109 @@ bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, const MCInst &MI,
}
case X86II::RawFrmMemOffs: {
// Emit segment override opcode prefix as needed.
- emitSegmentOverridePrefix(1, MI, OS);
+ emitSegmentOverridePrefix(1, MI, CB);
break;
}
}
// REX prefix is optional, but if used must be immediately before the opcode
// Encoding type for this instruction.
- uint64_t Encoding = TSFlags & X86II::EncodingMask;
- bool HasREX = false;
- if (Encoding)
- emitVEXOpcodePrefix(MemoryOperand, MI, OS);
- else
- HasREX = emitOpcodePrefix(MemoryOperand, MI, STI, OS);
-
- return HasREX;
+ return (TSFlags & X86II::EncodingMask)
+ ? emitVEXOpcodePrefix(MemoryOperand, MI, CB)
+ : emitOpcodePrefix(MemoryOperand, MI, STI, CB);
}
-/// AVX instructions are encoded using a opcode prefix called VEX.
-void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
- raw_ostream &OS) const {
+// AVX instructions are encoded using an encoding scheme that combines
+// prefix bytes, opcode extension field, operand encoding fields, and vector
+// length encoding capability into a new prefix, referred to as VEX.
+
+// The majority of the AVX-512 family of instructions (operating on
+// 512/256/128-bit vector register operands) are encoded using a new prefix
+// (called EVEX).
+
+// XOP is a revised subset of what was originally intended as SSE5. It was
+// changed to be similar but not overlapping with AVX.
+
+/// Emit XOP, VEX2, VEX3 or EVEX prefix.
+/// \returns the used prefix.
+PrefixKind
+X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
+ SmallVectorImpl<char> &CB) const {
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
uint64_t TSFlags = Desc.TSFlags;
assert(!(TSFlags & X86II::LOCK) && "Can't have LOCK VEX.");
- uint64_t Encoding = TSFlags & X86II::EncodingMask;
+ X86OpcodePrefixHelper Prefix(*Ctx.getRegisterInfo());
+ switch (TSFlags & X86II::EncodingMask) {
+ default:
+ break;
+ case X86II::XOP:
+ Prefix.setLowerBound(XOP);
+ break;
+ case X86II::VEX:
+ // VEX can be 2 byte or 3 byte, not determined yet if not explicit
+ Prefix.setLowerBound(MI.getFlags() & X86::IP_USE_VEX3 ? VEX3 : VEX2);
+ break;
+ case X86II::EVEX:
+ Prefix.setLowerBound(EVEX);
+ break;
+ }
+
+ Prefix.setW(TSFlags & X86II::REX_W);
+
bool HasEVEX_K = TSFlags & X86II::EVEX_K;
bool HasVEX_4V = TSFlags & X86II::VEX_4V;
bool HasEVEX_RC = TSFlags & X86II::EVEX_RC;
- // VEX_R: opcode externsion equivalent to REX.R in
- // 1's complement (inverted) form
- //
- // 1: Same as REX_R=0 (must be 1 in 32-bit mode)
- // 0: Same as REX_R=1 (64 bit mode only)
- //
- uint8_t VEX_R = 0x1;
- uint8_t EVEX_R2 = 0x1;
-
- // VEX_X: equivalent to REX.X, only used when a
- // register is used for index in SIB Byte.
- //
- // 1: Same as REX.X=0 (must be 1 in 32-bit mode)
- // 0: Same as REX.X=1 (64-bit mode only)
- uint8_t VEX_X = 0x1;
-
- // VEX_B:
- //
- // 1: Same as REX_B=0 (ignored in 32-bit mode)
- // 0: Same as REX_B=1 (64 bit mode only)
- //
- uint8_t VEX_B = 0x1;
-
- // VEX_W: opcode specific (use like REX.W, or used for
- // opcode extension, or ignored, depending on the opcode byte)
- uint8_t VEX_W = (TSFlags & X86II::VEX_W) ? 1 : 0;
-
- // VEX_5M (VEX m-mmmmm field):
- //
- // 0b00000: Reserved for future use
- // 0b00001: implied 0F leading opcode
- // 0b00010: implied 0F 38 leading opcode bytes
- // 0b00011: implied 0F 3A leading opcode bytes
- // 0b00100: Reserved for future use
- // 0b00101: VEX MAP5
- // 0b00110: VEX MAP6
- // 0b00111-0b11111: Reserved for future use
- // 0b01000: XOP map select - 08h instructions with imm byte
- // 0b01001: XOP map select - 09h instructions with no imm byte
- // 0b01010: XOP map select - 0Ah instructions with imm dword
- uint8_t VEX_5M;
switch (TSFlags & X86II::OpMapMask) {
default:
llvm_unreachable("Invalid prefix!");
case X86II::TB:
- VEX_5M = 0x1;
- break; // 0F
+ Prefix.set5M(0x1); // 0F
+ break;
case X86II::T8:
- VEX_5M = 0x2;
- break; // 0F 38
+ Prefix.set5M(0x2); // 0F 38
+ break;
case X86II::TA:
- VEX_5M = 0x3;
- break; // 0F 3A
+ Prefix.set5M(0x3); // 0F 3A
+ break;
case X86II::XOP8:
- VEX_5M = 0x8;
+ Prefix.set5M(0x8);
break;
case X86II::XOP9:
- VEX_5M = 0x9;
+ Prefix.set5M(0x9);
break;
case X86II::XOPA:
- VEX_5M = 0xA;
+ Prefix.set5M(0xA);
break;
case X86II::T_MAP5:
- VEX_5M = 0x5;
+ Prefix.set5M(0x5);
break;
case X86II::T_MAP6:
- VEX_5M = 0x6;
+ Prefix.set5M(0x6);
break;
}
- // VEX_4V (VEX vvvv field): a register specifier
- // (in 1's complement form) or 1111 if unused.
- uint8_t VEX_4V = 0xf;
- uint8_t EVEX_V2 = 0x1;
-
- // EVEX_L2/VEX_L (Vector Length):
- //
- // L2 L
- // 0 0: scalar or 128-bit vector
- // 0 1: 256-bit vector
- // 1 0: 512-bit vector
- //
- uint8_t VEX_L = (TSFlags & X86II::VEX_L) ? 1 : 0;
- uint8_t EVEX_L2 = (TSFlags & X86II::EVEX_L2) ? 1 : 0;
-
- // VEX_PP: opcode extension providing equivalent
- // functionality of a SIMD prefix
- //
- // 0b00: None
- // 0b01: 66
- // 0b10: F3
- // 0b11: F2
- //
- uint8_t VEX_PP = 0;
+ Prefix.setL(TSFlags & X86II::VEX_L);
+ Prefix.setL2(TSFlags & X86II::EVEX_L2);
switch (TSFlags & X86II::OpPrefixMask) {
case X86II::PD:
- VEX_PP = 0x1;
- break; // 66
+ Prefix.setPP(0x1); // 66
+ break;
case X86II::XS:
- VEX_PP = 0x2;
- break; // F3
+ Prefix.setPP(0x2); // F3
+ break;
case X86II::XD:
- VEX_PP = 0x3;
- break; // F2
+ Prefix.setPP(0x3); // F2
+ break;
}
- // EVEX_U
- uint8_t EVEX_U = 1; // Always '1' so far
-
- // EVEX_z
- uint8_t EVEX_z = (HasEVEX_K && (TSFlags & X86II::EVEX_Z)) ? 1 : 0;
-
- // EVEX_b
- uint8_t EVEX_b = (TSFlags & X86II::EVEX_B) ? 1 : 0;
-
- // EVEX_rc
- uint8_t EVEX_rc = 0;
-
- // EVEX_aaa
- uint8_t EVEX_aaa = 0;
+ Prefix.setZ(HasEVEX_K && (TSFlags & X86II::EVEX_Z));
+ Prefix.setEVEX_b(TSFlags & X86II::EVEX_B);
bool EncodeRC = false;
-
- // Classify VEX_B, VEX_4V, VEX_R, VEX_X
- unsigned NumOps = Desc.getNumOperands();
+ uint8_t EVEX_rc = 0;
unsigned CurOp = X86II::getOperandBias(Desc);
switch (TSFlags & X86II::FormMask) {
@@ -803,24 +943,15 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
llvm_unreachable("Unexpected form in emitVEXOpcodePrefix!");
case X86II::MRMDestMem4VOp3CC: {
// MemAddr, src1(ModR/M), src2(VEX_4V)
- unsigned BaseRegEnc = getX86RegEncoding(MI, MemOperand + X86::AddrBaseReg);
- VEX_B = ~(BaseRegEnc >> 3) & 1;
- unsigned IndexRegEnc =
- getX86RegEncoding(MI, MemOperand + X86::AddrIndexReg);
- VEX_X = ~(IndexRegEnc >> 3) & 1;
-
+ Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
CurOp += X86::AddrNumOperands;
-
- unsigned RegEnc = getX86RegEncoding(MI, ++CurOp);
- VEX_R = ~(RegEnc >> 3) & 1;
-
- unsigned VRegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_4V = ~VRegEnc & 0xf;
+ Prefix.setR(MI, ++CurOp);
+ Prefix.set4V(MI, CurOp++);
break;
}
case X86II::MRM_C0:
case X86II::RawFrm:
- case X86II::PrefixByte:
break;
case X86II::MRMDestMemFSIB:
case X86II::MRMDestMem: {
@@ -829,28 +960,20 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
// MemAddr, src1(VEX_4V), src2(ModR/M)
// MemAddr, src1(ModR/M), imm8
//
- unsigned BaseRegEnc = getX86RegEncoding(MI, MemOperand + X86::AddrBaseReg);
- VEX_B = ~(BaseRegEnc >> 3) & 1;
- unsigned IndexRegEnc =
- getX86RegEncoding(MI, MemOperand + X86::AddrIndexReg);
- VEX_X = ~(IndexRegEnc >> 3) & 1;
+ Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
if (!HasVEX_4V) // Only needed with VSIB which don't use VVVV.
- EVEX_V2 = ~(IndexRegEnc >> 4) & 1;
+ Prefix.setV2(MI, MemOperand + X86::AddrIndexReg);
CurOp += X86::AddrNumOperands;
if (HasEVEX_K)
- EVEX_aaa = getX86RegEncoding(MI, CurOp++);
+ Prefix.setAAA(MI, CurOp++);
- if (HasVEX_4V) {
- unsigned VRegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_4V = ~VRegEnc & 0xf;
- EVEX_V2 = ~(VRegEnc >> 4) & 1;
- }
+ if (HasVEX_4V)
+ Prefix.set4VV2(MI, CurOp++);
- unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_R = ~(RegEnc >> 3) & 1;
- EVEX_R2 = ~(RegEnc >> 4) & 1;
+ Prefix.setRR2(MI, CurOp++);
break;
}
case X86II::MRMSrcMemFSIB:
@@ -863,57 +986,36 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
//
// FMA4:
// dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(Imm[7:4])
- unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_R = ~(RegEnc >> 3) & 1;
- EVEX_R2 = ~(RegEnc >> 4) & 1;
+ Prefix.setRR2(MI, CurOp++);
if (HasEVEX_K)
- EVEX_aaa = getX86RegEncoding(MI, CurOp++);
+ Prefix.setAAA(MI, CurOp++);
- if (HasVEX_4V) {
- unsigned VRegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_4V = ~VRegEnc & 0xf;
- EVEX_V2 = ~(VRegEnc >> 4) & 1;
- }
+ if (HasVEX_4V)
+ Prefix.set4VV2(MI, CurOp++);
- unsigned BaseRegEnc = getX86RegEncoding(MI, MemOperand + X86::AddrBaseReg);
- VEX_B = ~(BaseRegEnc >> 3) & 1;
- unsigned IndexRegEnc =
- getX86RegEncoding(MI, MemOperand + X86::AddrIndexReg);
- VEX_X = ~(IndexRegEnc >> 3) & 1;
+ Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
if (!HasVEX_4V) // Only needed with VSIB which don't use VVVV.
- EVEX_V2 = ~(IndexRegEnc >> 4) & 1;
+ Prefix.setV2(MI, MemOperand + X86::AddrIndexReg);
break;
}
case X86II::MRMSrcMem4VOp3: {
// Instruction format for 4VOp3:
// src1(ModR/M), MemAddr, src3(VEX_4V)
- unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_R = ~(RegEnc >> 3) & 1;
-
- unsigned BaseRegEnc = getX86RegEncoding(MI, MemOperand + X86::AddrBaseReg);
- VEX_B = ~(BaseRegEnc >> 3) & 1;
- unsigned IndexRegEnc =
- getX86RegEncoding(MI, MemOperand + X86::AddrIndexReg);
- VEX_X = ~(IndexRegEnc >> 3) & 1;
-
- VEX_4V = ~getX86RegEncoding(MI, CurOp + X86::AddrNumOperands) & 0xf;
+ Prefix.setR(MI, CurOp++);
+ Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
+ Prefix.set4V(MI, CurOp + X86::AddrNumOperands);
break;
}
case X86II::MRMSrcMemOp4: {
// dst(ModR/M.reg), src1(VEX_4V), src2(Imm[7:4]), src3(ModR/M),
- unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_R = ~(RegEnc >> 3) & 1;
-
- unsigned VRegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_4V = ~VRegEnc & 0xf;
-
- unsigned BaseRegEnc = getX86RegEncoding(MI, MemOperand + X86::AddrBaseReg);
- VEX_B = ~(BaseRegEnc >> 3) & 1;
- unsigned IndexRegEnc =
- getX86RegEncoding(MI, MemOperand + X86::AddrIndexReg);
- VEX_X = ~(IndexRegEnc >> 3) & 1;
+ Prefix.setR(MI, CurOp++);
+ Prefix.set4V(MI, CurOp++);
+ Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
break;
}
case X86II::MRM0m:
@@ -927,22 +1029,16 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
// MRM[0-9]m instructions forms:
// MemAddr
// src1(VEX_4V), MemAddr
- if (HasVEX_4V) {
- unsigned VRegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_4V = ~VRegEnc & 0xf;
- EVEX_V2 = ~(VRegEnc >> 4) & 1;
- }
+ if (HasVEX_4V)
+ Prefix.set4VV2(MI, CurOp++);
if (HasEVEX_K)
- EVEX_aaa = getX86RegEncoding(MI, CurOp++);
+ Prefix.setAAA(MI, CurOp++);
- unsigned BaseRegEnc = getX86RegEncoding(MI, MemOperand + X86::AddrBaseReg);
- VEX_B = ~(BaseRegEnc >> 3) & 1;
- unsigned IndexRegEnc =
- getX86RegEncoding(MI, MemOperand + X86::AddrIndexReg);
- VEX_X = ~(IndexRegEnc >> 3) & 1;
+ Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
if (!HasVEX_4V) // Only needed with VSIB which don't use VVVV.
- EVEX_V2 = ~(IndexRegEnc >> 4) & 1;
+ Prefix.setV2(MI, MemOperand + X86::AddrIndexReg);
break;
}
@@ -954,25 +1050,21 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
//
// FMA4:
// dst(ModR/M.reg), src1(VEX_4V), src2(Imm[7:4]), src3(ModR/M),
- unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_R = ~(RegEnc >> 3) & 1;
- EVEX_R2 = ~(RegEnc >> 4) & 1;
+ Prefix.setRR2(MI, CurOp++);
if (HasEVEX_K)
- EVEX_aaa = getX86RegEncoding(MI, CurOp++);
+ Prefix.setAAA(MI, CurOp++);
- if (HasVEX_4V) {
- unsigned VRegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_4V = ~VRegEnc & 0xf;
- EVEX_V2 = ~(VRegEnc >> 4) & 1;
- }
+ if (HasVEX_4V)
+ Prefix.set4VV2(MI, CurOp++);
- RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_B = ~(RegEnc >> 3) & 1;
- VEX_X = ~(RegEnc >> 4) & 1;
+ Prefix.setB(MI, CurOp);
+ Prefix.setX(MI, CurOp, 4);
+ ++CurOp;
- if (EVEX_b) {
+ if (TSFlags & X86II::EVEX_B) {
if (HasEVEX_RC) {
+ unsigned NumOps = Desc.getNumOperands();
unsigned RcOperand = NumOps - 1;
assert(RcOperand >= CurOp);
EVEX_rc = MI.getOperand(RcOperand).getImm();
@@ -985,29 +1077,21 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
case X86II::MRMSrcReg4VOp3: {
// Instruction format for 4VOp3:
// src1(ModR/M), src2(ModR/M), src3(VEX_4V)
- unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_R = ~(RegEnc >> 3) & 1;
-
- RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_B = ~(RegEnc >> 3) & 1;
-
- VEX_4V = ~getX86RegEncoding(MI, CurOp++) & 0xf;
+ Prefix.setR(MI, CurOp++);
+ Prefix.setB(MI, CurOp++);
+ Prefix.set4V(MI, CurOp++);
break;
}
case X86II::MRMSrcRegOp4: {
// dst(ModR/M.reg), src1(VEX_4V), src2(Imm[7:4]), src3(ModR/M),
- unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_R = ~(RegEnc >> 3) & 1;
-
- unsigned VRegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_4V = ~VRegEnc & 0xf;
-
+ Prefix.setR(MI, CurOp++);
+ Prefix.set4V(MI, CurOp++);
// Skip second register source (encoded in Imm[7:4])
++CurOp;
- RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_B = ~(RegEnc >> 3) & 1;
- VEX_X = ~(RegEnc >> 4) & 1;
+ Prefix.setB(MI, CurOp);
+ Prefix.setX(MI, CurOp, 4);
+ ++CurOp;
break;
}
case X86II::MRMDestReg: {
@@ -1015,23 +1099,18 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
// dst(ModR/M), src(ModR/M)
// dst(ModR/M), src(ModR/M), imm8
// dst(ModR/M), src1(VEX_4V), src2(ModR/M)
- unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_B = ~(RegEnc >> 3) & 1;
- VEX_X = ~(RegEnc >> 4) & 1;
+ Prefix.setB(MI, CurOp);
+ Prefix.setX(MI, CurOp, 4);
+ ++CurOp;
if (HasEVEX_K)
- EVEX_aaa = getX86RegEncoding(MI, CurOp++);
+ Prefix.setAAA(MI, CurOp++);
- if (HasVEX_4V) {
- unsigned VRegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_4V = ~VRegEnc & 0xf;
- EVEX_V2 = ~(VRegEnc >> 4) & 1;
- }
+ if (HasVEX_4V)
+ Prefix.set4VV2(MI, CurOp++);
- RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_R = ~(RegEnc >> 3) & 1;
- EVEX_R2 = ~(RegEnc >> 4) & 1;
- if (EVEX_b)
+ Prefix.setRR2(MI, CurOp++);
+ if (TSFlags & X86II::EVEX_B)
EncodeRC = true;
break;
}
@@ -1039,9 +1118,7 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
// MRMr0 instructions forms:
// 11:rrr:000
// dst(ModR/M)
- unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_R = ~(RegEnc >> 3) & 1;
- EVEX_R2 = ~(RegEnc >> 4) & 1;
+ Prefix.setRR2(MI, CurOp++);
break;
}
case X86II::MRM0r:
@@ -1054,75 +1131,25 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
case X86II::MRM7r: {
// MRM0r-MRM7r instructions forms:
// dst(VEX_4V), src(ModR/M), imm8
- if (HasVEX_4V) {
- unsigned VRegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_4V = ~VRegEnc & 0xf;
- EVEX_V2 = ~(VRegEnc >> 4) & 1;
- }
+ if (HasVEX_4V)
+ Prefix.set4VV2(MI, CurOp++);
+
if (HasEVEX_K)
- EVEX_aaa = getX86RegEncoding(MI, CurOp++);
+ Prefix.setAAA(MI, CurOp++);
- unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
- VEX_B = ~(RegEnc >> 3) & 1;
- VEX_X = ~(RegEnc >> 4) & 1;
+ Prefix.setB(MI, CurOp);
+ Prefix.setX(MI, CurOp, 4);
+ ++CurOp;
break;
}
}
-
- if (Encoding == X86II::VEX || Encoding == X86II::XOP) {
- // VEX opcode prefix can have 2 or 3 bytes
- //
- // 3 bytes:
- // +-----+ +--------------+ +-------------------+
- // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
- // +-----+ +--------------+ +-------------------+
- // 2 bytes:
- // +-----+ +-------------------+
- // | C5h | | R | vvvv | L | pp |
- // +-----+ +-------------------+
- //
- // XOP uses a similar prefix:
- // +-----+ +--------------+ +-------------------+
- // | 8Fh | | RXB | m-mmmm | | W | vvvv | L | pp |
- // +-----+ +--------------+ +-------------------+
- uint8_t LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
-
- // Can we use the 2 byte VEX prefix?
- if (!(MI.getFlags() & X86::IP_USE_VEX3) && Encoding == X86II::VEX &&
- VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) {
- emitByte(0xC5, OS);
- emitByte(LastByte | (VEX_R << 7), OS);
- return;
- }
-
- // 3 byte VEX prefix
- emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4, OS);
- emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, OS);
- emitByte(LastByte | (VEX_W << 7), OS);
- } else {
- assert(Encoding == X86II::EVEX && "unknown encoding!");
- // EVEX opcode prefix can have 4 bytes
- //
- // +-----+ +--------------+ +-------------------+ +------------------------+
- // | 62h | | RXBR' | 0mmm | | W | vvvv | U | pp | | z | L'L | b | v' | aaa |
- // +-----+ +--------------+ +-------------------+ +------------------------+
- assert((VEX_5M & 0x7) == VEX_5M &&
- "More than 3 significant bits in VEX.m-mmmm fields for EVEX!");
-
- emitByte(0x62, OS);
- emitByte((VEX_R << 7) | (VEX_X << 6) | (VEX_B << 5) | (EVEX_R2 << 4) |
- VEX_5M,
- OS);
- emitByte((VEX_W << 7) | (VEX_4V << 3) | (EVEX_U << 2) | VEX_PP, OS);
- if (EncodeRC)
- emitByte((EVEX_z << 7) | (EVEX_rc << 5) | (EVEX_b << 4) | (EVEX_V2 << 3) |
- EVEX_aaa,
- OS);
- else
- emitByte((EVEX_z << 7) | (EVEX_L2 << 6) | (VEX_L << 5) | (EVEX_b << 4) |
- (EVEX_V2 << 3) | EVEX_aaa,
- OS);
+ if (EncodeRC) {
+ Prefix.setL(EVEX_rc & 0x1);
+ Prefix.setL2(EVEX_rc & 0x2);
}
+ PrefixKind Kind = Prefix.determineOptimalKind();
+ Prefix.emit(CB);
+ return Kind;
}
/// Emit REX prefix which specifies
@@ -1130,129 +1157,122 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(int MemOperand, const MCInst &MI,
/// 2) non-default operand size, and
/// 3) use of X86-64 extended registers.
///
-/// \returns true if REX prefix is used, otherwise returns false.
-bool X86MCCodeEmitter::emitREXPrefix(int MemOperand, const MCInst &MI,
- const MCSubtargetInfo &STI,
- raw_ostream &OS) const {
- uint8_t REX = [&, MemOperand]() {
- uint8_t REX = 0;
- bool UsesHighByteReg = false;
-
- const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
- uint64_t TSFlags = Desc.TSFlags;
-
- if (TSFlags & X86II::REX_W)
- REX |= 1 << 3; // set REX.W
-
- if (MI.getNumOperands() == 0)
- return REX;
-
- unsigned NumOps = MI.getNumOperands();
- unsigned CurOp = X86II::getOperandBias(Desc);
-
- // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
- for (unsigned i = CurOp; i != NumOps; ++i) {
- const MCOperand &MO = MI.getOperand(i);
- if (MO.isReg()) {
- unsigned Reg = MO.getReg();
- if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH ||
- Reg == X86::DH)
- UsesHighByteReg = true;
- if (X86II::isX86_64NonExtLowByteReg(Reg))
- // FIXME: The caller of determineREXPrefix slaps this prefix onto
- // anything that returns non-zero.
- REX |= 0x40; // REX fixed encoding prefix
- } else if (MO.isExpr() && STI.getTargetTriple().isX32()) {
- // GOTTPOFF and TLSDESC relocations require a REX prefix to allow
- // linker optimizations: even if the instructions we see may not require
- // any prefix, they may be replaced by instructions that do. This is
- // handled as a special case here so that it also works for hand-written
- // assembly without the user needing to write REX, as with GNU as.
- const auto *Ref = dyn_cast<MCSymbolRefExpr>(MO.getExpr());
- if (Ref && (Ref->getKind() == MCSymbolRefExpr::VK_GOTTPOFF ||
- Ref->getKind() == MCSymbolRefExpr::VK_TLSDESC)) {
- REX |= 0x40; // REX fixed encoding prefix
- }
+/// \returns the used prefix (REX or None).
+PrefixKind X86MCCodeEmitter::emitREXPrefix(int MemOperand, const MCInst &MI,
+ const MCSubtargetInfo &STI,
+ SmallVectorImpl<char> &CB) const {
+ if (!STI.hasFeature(X86::Is64Bit))
+ return None;
+ X86OpcodePrefixHelper Prefix(*Ctx.getRegisterInfo());
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ uint64_t TSFlags = Desc.TSFlags;
+ Prefix.setW(TSFlags & X86II::REX_W);
+ unsigned NumOps = MI.getNumOperands();
+ bool UsesHighByteReg = false;
+#ifndef NDEBUG
+ bool HasRegOp = false;
+#endif
+ unsigned CurOp = NumOps ? X86II::getOperandBias(Desc) : 0;
+ for (unsigned i = CurOp; i != NumOps; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg()) {
+#ifndef NDEBUG
+ HasRegOp = true;
+#endif
+ unsigned Reg = MO.getReg();
+ if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
+ UsesHighByteReg = true;
+ // If it accesses SPL, BPL, SIL, or DIL, then it requires a REX prefix.
+ if (X86II::isX86_64NonExtLowByteReg(Reg))
+ Prefix.setLowerBound(REX);
+ } else if (MO.isExpr() && STI.getTargetTriple().isX32()) {
+ // GOTTPOFF and TLSDESC relocations require a REX prefix to allow
+ // linker optimizations: even if the instructions we see may not require
+ // any prefix, they may be replaced by instructions that do. This is
+ // handled as a special case here so that it also works for hand-written
+ // assembly without the user needing to write REX, as with GNU as.
+ const auto *Ref = dyn_cast<MCSymbolRefExpr>(MO.getExpr());
+ if (Ref && (Ref->getKind() == MCSymbolRefExpr::VK_GOTTPOFF ||
+ Ref->getKind() == MCSymbolRefExpr::VK_TLSDESC)) {
+ Prefix.setLowerBound(REX);
}
}
-
- switch (TSFlags & X86II::FormMask) {
- case X86II::AddRegFrm:
- REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
- break;
- case X86II::MRMSrcReg:
- case X86II::MRMSrcRegCC:
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
- break;
- case X86II::MRMSrcMem:
- case X86II::MRMSrcMemCC:
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
- CurOp += X86::AddrNumOperands;
- break;
- case X86II::MRMDestReg:
- REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- break;
- case X86II::MRMDestMem:
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
- CurOp += X86::AddrNumOperands;
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- break;
- case X86II::MRMXmCC:
- case X86II::MRMXm:
- case X86II::MRM0m:
- case X86II::MRM1m:
- case X86II::MRM2m:
- case X86II::MRM3m:
- case X86II::MRM4m:
- case X86II::MRM5m:
- case X86II::MRM6m:
- case X86II::MRM7m:
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0; // REX.B
- REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
- break;
- case X86II::MRMXrCC:
- case X86II::MRMXr:
- case X86II::MRM0r:
- case X86II::MRM1r:
- case X86II::MRM2r:
- case X86II::MRM3r:
- case X86II::MRM4r:
- case X86II::MRM5r:
- case X86II::MRM6r:
- case X86II::MRM7r:
- REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
- break;
- case X86II::MRMr0:
- REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
- break;
- case X86II::MRMDestMemFSIB:
- llvm_unreachable("FSIB format never need REX prefix!");
- }
- if (REX && UsesHighByteReg)
- report_fatal_error(
- "Cannot encode high byte register in REX-prefixed instruction");
- return REX;
- }();
-
- if (!REX)
- return false;
-
- emitByte(0x40 | REX, OS);
- return true;
+ }
+ switch (TSFlags & X86II::FormMask) {
+ default:
+ assert(!HasRegOp && "Unexpected form in emitREXPrefix!");
+ break;
+ case X86II::RawFrm:
+ case X86II::RawFrmMemOffs:
+ case X86II::RawFrmSrc:
+ case X86II::RawFrmDst:
+ case X86II::RawFrmDstSrc:
+ break;
+ case X86II::AddRegFrm:
+ Prefix.setB(MI, CurOp++);
+ break;
+ case X86II::MRMSrcReg:
+ case X86II::MRMSrcRegCC:
+ Prefix.setR(MI, CurOp++);
+ Prefix.setB(MI, CurOp++);
+ break;
+ case X86II::MRMSrcMem:
+ case X86II::MRMSrcMemCC:
+ Prefix.setR(MI, CurOp++);
+ Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
+ CurOp += X86::AddrNumOperands;
+ break;
+ case X86II::MRMDestReg:
+ Prefix.setB(MI, CurOp++);
+ Prefix.setR(MI, CurOp++);
+ break;
+ case X86II::MRMDestMem:
+ Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
+ CurOp += X86::AddrNumOperands;
+ Prefix.setR(MI, CurOp++);
+ break;
+ case X86II::MRMXmCC:
+ case X86II::MRMXm:
+ case X86II::MRM0m:
+ case X86II::MRM1m:
+ case X86II::MRM2m:
+ case X86II::MRM3m:
+ case X86II::MRM4m:
+ case X86II::MRM5m:
+ case X86II::MRM6m:
+ case X86II::MRM7m:
+ Prefix.setB(MI, MemOperand + X86::AddrBaseReg);
+ Prefix.setX(MI, MemOperand + X86::AddrIndexReg);
+ break;
+ case X86II::MRMXrCC:
+ case X86II::MRMXr:
+ case X86II::MRM0r:
+ case X86II::MRM1r:
+ case X86II::MRM2r:
+ case X86II::MRM3r:
+ case X86II::MRM4r:
+ case X86II::MRM5r:
+ case X86II::MRM6r:
+ case X86II::MRM7r:
+ Prefix.setB(MI, CurOp++);
+ break;
+ }
+ PrefixKind Kind = Prefix.determineOptimalKind();
+ if (Kind && UsesHighByteReg)
+ report_fatal_error(
+ "Cannot encode high byte register in REX-prefixed instruction");
+ Prefix.emit(CB);
+ return Kind;
}
/// Emit segment override opcode prefix as needed.
-void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned SegOperand,
- const MCInst &MI,
- raw_ostream &OS) const {
+void X86MCCodeEmitter::emitSegmentOverridePrefix(
+ unsigned SegOperand, const MCInst &MI, SmallVectorImpl<char> &CB) const {
// Check for explicit segment override on memory operand.
if (unsigned Reg = MI.getOperand(SegOperand).getReg())
- emitByte(X86::getSegmentOverridePrefixForReg(Reg), OS);
+ emitByte(X86::getSegmentOverridePrefixForReg(Reg), CB);
}
/// Emit all instruction prefixes prior to the opcode.
@@ -1260,44 +1280,42 @@ void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned SegOperand,
/// \param MemOperand the operand # of the start of a memory operand if present.
/// If not present, it is -1.
///
-/// \returns true if REX prefix is used, otherwise returns false.
-bool X86MCCodeEmitter::emitOpcodePrefix(int MemOperand, const MCInst &MI,
- const MCSubtargetInfo &STI,
- raw_ostream &OS) const {
+/// \returns the used prefix (REX or None).
+PrefixKind X86MCCodeEmitter::emitOpcodePrefix(int MemOperand, const MCInst &MI,
+ const MCSubtargetInfo &STI,
+ SmallVectorImpl<char> &CB) const {
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
uint64_t TSFlags = Desc.TSFlags;
// Emit the operand size opcode prefix as needed.
if ((TSFlags & X86II::OpSizeMask) ==
(STI.hasFeature(X86::Is16Bit) ? X86II::OpSize32 : X86II::OpSize16))
- emitByte(0x66, OS);
+ emitByte(0x66, CB);
// Emit the LOCK opcode prefix.
if (TSFlags & X86II::LOCK || MI.getFlags() & X86::IP_HAS_LOCK)
- emitByte(0xF0, OS);
+ emitByte(0xF0, CB);
// Emit the NOTRACK opcode prefix.
if (TSFlags & X86II::NOTRACK || MI.getFlags() & X86::IP_HAS_NOTRACK)
- emitByte(0x3E, OS);
+ emitByte(0x3E, CB);
switch (TSFlags & X86II::OpPrefixMask) {
case X86II::PD: // 66
- emitByte(0x66, OS);
+ emitByte(0x66, CB);
break;
case X86II::XS: // F3
- emitByte(0xF3, OS);
+ emitByte(0xF3, CB);
break;
case X86II::XD: // F2
- emitByte(0xF2, OS);
+ emitByte(0xF2, CB);
break;
}
// Handle REX prefix.
assert((STI.hasFeature(X86::Is64Bit) || !(TSFlags & X86II::REX_W)) &&
"REX.W requires 64bit mode.");
- bool HasREX = STI.hasFeature(X86::Is64Bit)
- ? emitREXPrefix(MemOperand, MI, STI, OS)
- : false;
+ PrefixKind Kind = emitREXPrefix(MemOperand, MI, STI, CB);
// 0x0F escape code must be emitted just before the opcode.
switch (TSFlags & X86II::OpMapMask) {
@@ -1305,23 +1323,23 @@ bool X86MCCodeEmitter::emitOpcodePrefix(int MemOperand, const MCInst &MI,
case X86II::T8: // 0F 38
case X86II::TA: // 0F 3A
case X86II::ThreeDNow: // 0F 0F, second 0F emitted by caller.
- emitByte(0x0F, OS);
+ emitByte(0x0F, CB);
break;
}
switch (TSFlags & X86II::OpMapMask) {
case X86II::T8: // 0F 38
- emitByte(0x38, OS);
+ emitByte(0x38, CB);
break;
case X86II::TA: // 0F 3A
- emitByte(0x3A, OS);
+ emitByte(0x3A, CB);
break;
}
- return HasREX;
+ return Kind;
}
-void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS,
+void X86MCCodeEmitter::emitPrefix(const MCInst &MI, SmallVectorImpl<char> &CB,
const MCSubtargetInfo &STI) const {
unsigned Opcode = MI.getOpcode();
const MCInstrDesc &Desc = MCII.get(Opcode);
@@ -1333,10 +1351,11 @@ void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS,
unsigned CurOp = X86II::getOperandBias(Desc);
- emitPrefixImpl(CurOp, MI, STI, OS);
+ emitPrefixImpl(CurOp, MI, STI, CB);
}
-void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void X86MCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
unsigned Opcode = MI.getOpcode();
@@ -1350,9 +1369,9 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned NumOps = Desc.getNumOperands();
unsigned CurOp = X86II::getOperandBias(Desc);
- uint64_t StartByte = OS.tell();
+ uint64_t StartByte = CB.size();
- bool HasREX = emitPrefixImpl(CurOp, MI, STI, OS);
+ PrefixKind Kind = emitPrefixImpl(CurOp, MI, STI, CB);
// It uses the VEX.VVVV field?
bool HasVEX_4V = TSFlags & X86II::VEX_4V;
@@ -1383,7 +1402,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::RawFrmSrc:
case X86II::RawFrmDst:
case X86II::PrefixByte:
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
break;
case X86II::AddCCFrm: {
// This will be added to the opcode in the fallthrough.
@@ -1392,47 +1411,47 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
--NumOps; // Drop the operand from the end.
[[fallthrough]];
case X86II::RawFrm:
- emitByte(BaseOpcode + OpcodeOffset, OS);
+ emitByte(BaseOpcode + OpcodeOffset, CB);
if (!STI.hasFeature(X86::Is64Bit) || !isPCRel32Branch(MI, MCII))
break;
const MCOperand &Op = MI.getOperand(CurOp++);
emitImmediate(Op, MI.getLoc(), X86II::getSizeOfImm(TSFlags),
- MCFixupKind(X86::reloc_branch_4byte_pcrel), StartByte, OS,
+ MCFixupKind(X86::reloc_branch_4byte_pcrel), StartByte, CB,
Fixups);
break;
}
case X86II::RawFrmMemOffs:
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- StartByte, OS, Fixups);
+ StartByte, CB, Fixups);
++CurOp; // skip segment operand
break;
case X86II::RawFrmImm8:
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- StartByte, OS, Fixups);
+ StartByte, CB, Fixups);
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 1, FK_Data_1, StartByte,
- OS, Fixups);
+ CB, Fixups);
break;
case X86II::RawFrmImm16:
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- StartByte, OS, Fixups);
+ StartByte, CB, Fixups);
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(), 2, FK_Data_2, StartByte,
- OS, Fixups);
+ CB, Fixups);
break;
case X86II::AddRegFrm:
- emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++)), OS);
+ emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++)), CB);
break;
case X86II::MRMDestReg: {
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
unsigned SrcRegNum = CurOp + 1;
if (HasEVEX_K) // Skip writemask
@@ -1442,22 +1461,22 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++SrcRegNum;
emitRegModRMByte(MI.getOperand(CurOp),
- getX86RegNum(MI.getOperand(SrcRegNum)), OS);
+ getX86RegNum(MI.getOperand(SrcRegNum)), CB);
CurOp = SrcRegNum + 1;
break;
}
case X86II::MRMDestMem4VOp3CC: {
unsigned CC = MI.getOperand(8).getImm();
- emitByte(BaseOpcode + CC, OS);
+ emitByte(BaseOpcode + CC, CB);
unsigned SrcRegNum = CurOp + X86::AddrNumOperands;
emitMemModRMByte(MI, CurOp + 1, getX86RegNum(MI.getOperand(0)), TSFlags,
- HasREX, StartByte, OS, Fixups, STI, false);
+ Kind, StartByte, CB, Fixups, STI, false);
CurOp = SrcRegNum + 3; // skip reg, VEX_V4 and CC
break;
}
case X86II::MRMDestMemFSIB:
case X86II::MRMDestMem: {
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
unsigned SrcRegNum = CurOp + X86::AddrNumOperands;
if (HasEVEX_K) // Skip writemask
@@ -1468,12 +1487,12 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
bool ForceSIB = (Form == X86II::MRMDestMemFSIB);
emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(SrcRegNum)), TSFlags,
- HasREX, StartByte, OS, Fixups, STI, ForceSIB);
+ Kind, StartByte, CB, Fixups, STI, ForceSIB);
CurOp = SrcRegNum + 1;
break;
}
case X86II::MRMSrcReg: {
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
unsigned SrcRegNum = CurOp + 1;
if (HasEVEX_K) // Skip writemask
@@ -1483,7 +1502,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++SrcRegNum;
emitRegModRMByte(MI.getOperand(SrcRegNum),
- getX86RegNum(MI.getOperand(CurOp)), OS);
+ getX86RegNum(MI.getOperand(CurOp)), CB);
CurOp = SrcRegNum + 1;
if (HasVEX_I8Reg)
I8RegNum = getX86RegEncoding(MI, CurOp++);
@@ -1493,17 +1512,17 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
break;
}
case X86II::MRMSrcReg4VOp3: {
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
unsigned SrcRegNum = CurOp + 1;
emitRegModRMByte(MI.getOperand(SrcRegNum),
- getX86RegNum(MI.getOperand(CurOp)), OS);
+ getX86RegNum(MI.getOperand(CurOp)), CB);
CurOp = SrcRegNum + 1;
++CurOp; // Encoded in VEX.VVVV
break;
}
case X86II::MRMSrcRegOp4: {
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
unsigned SrcRegNum = CurOp + 1;
// Skip 1st src (which is encoded in VEX_VVVV)
@@ -1514,7 +1533,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
I8RegNum = getX86RegEncoding(MI, SrcRegNum++);
emitRegModRMByte(MI.getOperand(SrcRegNum),
- getX86RegNum(MI.getOperand(CurOp)), OS);
+ getX86RegNum(MI.getOperand(CurOp)), CB);
CurOp = SrcRegNum + 1;
break;
}
@@ -1523,10 +1542,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned SecondOp = CurOp++;
unsigned CC = MI.getOperand(CurOp++).getImm();
- emitByte(BaseOpcode + CC, OS);
+ emitByte(BaseOpcode + CC, CB);
emitRegModRMByte(MI.getOperand(SecondOp),
- getX86RegNum(MI.getOperand(FirstOp)), OS);
+ getX86RegNum(MI.getOperand(FirstOp)), CB);
break;
}
case X86II::MRMSrcMemFSIB:
@@ -1539,11 +1558,11 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
if (HasVEX_4V)
++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
bool ForceSIB = (Form == X86II::MRMSrcMemFSIB);
emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
- TSFlags, HasREX, StartByte, OS, Fixups, STI, ForceSIB);
+ TSFlags, Kind, StartByte, CB, Fixups, STI, ForceSIB);
CurOp = FirstMemOp + X86::AddrNumOperands;
if (HasVEX_I8Reg)
I8RegNum = getX86RegEncoding(MI, CurOp++);
@@ -1552,10 +1571,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRMSrcMem4VOp3: {
unsigned FirstMemOp = CurOp + 1;
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
- TSFlags, HasREX, StartByte, OS, Fixups, STI);
+ TSFlags, Kind, StartByte, CB, Fixups, STI);
CurOp = FirstMemOp + X86::AddrNumOperands;
++CurOp; // Encoded in VEX.VVVV.
break;
@@ -1569,10 +1588,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
assert(HasVEX_I8Reg && "MRMSrcRegOp4 should imply VEX_I8Reg");
I8RegNum = getX86RegEncoding(MI, FirstMemOp++);
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
- TSFlags, HasREX, StartByte, OS, Fixups, STI);
+ TSFlags, Kind, StartByte, CB, Fixups, STI);
CurOp = FirstMemOp + X86::AddrNumOperands;
break;
}
@@ -1582,10 +1601,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
CurOp = FirstMemOp + X86::AddrNumOperands;
unsigned CC = MI.getOperand(CurOp++).getImm();
- emitByte(BaseOpcode + CC, OS);
+ emitByte(BaseOpcode + CC, CB);
emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(RegOp)),
- TSFlags, HasREX, StartByte, OS, Fixups, STI);
+ TSFlags, Kind, StartByte, CB, Fixups, STI);
break;
}
@@ -1593,8 +1612,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned RegOp = CurOp++;
unsigned CC = MI.getOperand(CurOp++).getImm();
- emitByte(BaseOpcode + CC, OS);
- emitRegModRMByte(MI.getOperand(RegOp), 0, OS);
+ emitByte(BaseOpcode + CC, CB);
+ emitRegModRMByte(MI.getOperand(RegOp), 0, CB);
break;
}
@@ -1611,13 +1630,13 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++CurOp;
if (HasEVEX_K) // Skip writemask
++CurOp;
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
emitRegModRMByte(MI.getOperand(CurOp++),
- (Form == X86II::MRMXr) ? 0 : Form - X86II::MRM0r, OS);
+ (Form == X86II::MRMXr) ? 0 : Form - X86II::MRM0r, CB);
break;
case X86II::MRMr0:
- emitByte(BaseOpcode, OS);
- emitByte(modRMByte(3, getX86RegNum(MI.getOperand(CurOp++)),0), OS);
+ emitByte(BaseOpcode, CB);
+ emitByte(modRMByte(3, getX86RegNum(MI.getOperand(CurOp++)), 0), CB);
break;
case X86II::MRMXmCC: {
@@ -1625,9 +1644,9 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
CurOp = FirstMemOp + X86::AddrNumOperands;
unsigned CC = MI.getOperand(CurOp++).getImm();
- emitByte(BaseOpcode + CC, OS);
+ emitByte(BaseOpcode + CC, CB);
- emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, HasREX, StartByte, OS, Fixups,
+ emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, Kind, StartByte, CB, Fixups,
STI);
break;
}
@@ -1645,10 +1664,10 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
++CurOp;
if (HasEVEX_K) // Skip writemask
++CurOp;
- emitByte(BaseOpcode, OS);
+ emitByte(BaseOpcode, CB);
emitMemModRMByte(MI, CurOp,
(Form == X86II::MRMXm) ? 0 : Form - X86II::MRM0m, TSFlags,
- HasREX, StartByte, OS, Fixups, STI);
+ Kind, StartByte, CB, Fixups, STI);
CurOp += X86::AddrNumOperands;
break;
@@ -1660,8 +1679,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM5X:
case X86II::MRM6X:
case X86II::MRM7X:
- emitByte(BaseOpcode, OS);
- emitByte(0xC0 + ((Form - X86II::MRM0X) << 3), OS);
+ emitByte(BaseOpcode, CB);
+ emitByte(0xC0 + ((Form - X86II::MRM0X) << 3), CB);
break;
case X86II::MRM_C0:
@@ -1728,8 +1747,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM_FD:
case X86II::MRM_FE:
case X86II::MRM_FF:
- emitByte(BaseOpcode, OS);
- emitByte(0xC0 + Form - X86II::MRM_C0, OS);
+ emitByte(BaseOpcode, CB);
+ emitByte(0xC0 + Form - X86II::MRM_C0, CB);
break;
}
@@ -1744,7 +1763,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
I8RegNum |= Val;
}
emitImmediate(MCOperand::createImm(I8RegNum), MI.getLoc(), 1, FK_Data_1,
- StartByte, OS, Fixups);
+ StartByte, CB, Fixups);
} else {
// If there is a remaining operand, it must be a trailing immediate. Emit it
// according to the right size for the instruction. Some instructions
@@ -1752,14 +1771,14 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
while (CurOp != NumOps && NumOps - CurOp <= 2) {
emitImmediate(MI.getOperand(CurOp++), MI.getLoc(),
X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- StartByte, OS, Fixups);
+ StartByte, CB, Fixups);
}
}
if ((TSFlags & X86II::OpMapMask) == X86II::ThreeDNow)
- emitByte(X86II::getBaseOpcodeFor(TSFlags), OS);
+ emitByte(X86II::getBaseOpcodeFor(TSFlags), CB);
- assert(OS.tell() - StartByte <= 15 &&
+ assert(CB.size() - StartByte <= 15 &&
"The size of instruction must be no longer than 15.");
#ifndef NDEBUG
// FIXME: Verify.
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h
index cd2baeb1c98e..db91d38f299e 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCExpr.h
@@ -64,7 +64,7 @@ public:
return getRegNo() == E->getRegNo();
return false;
}
- void visitUsedExpr(MCStreamer &Streamer) const override{};
+ void visitUsedExpr(MCStreamer &Streamer) const override {}
MCFragment *findAssociatedFragment() const override { return nullptr; }
// There are no TLS X86MCExprs at the moment.
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 100e45e72189..9519608ac022 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -18,7 +18,6 @@
#include "X86MCAsmInfo.h"
#include "X86TargetStreamer.h"
#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCInstrAnalysis.h"
@@ -29,7 +28,8 @@
#include "llvm/MC/MachineLocation.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Host.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
@@ -441,6 +441,8 @@ static MCAsmInfo *createX86MCAsmInfo(const MCRegisterInfo &MRI,
} else if (TheTriple.isOSCygMing() ||
TheTriple.isWindowsItaniumEnvironment()) {
MAI = new X86MCAsmInfoGNUCOFF(TheTriple);
+ } else if (TheTriple.isUEFI()) {
+ MAI = new X86MCAsmInfoGNUCOFF(TheTriple);
} else {
// The default is ELF.
MAI = new X86ELFMCAsmInfo(TheTriple);
@@ -501,7 +503,6 @@ public:
APInt &Mask) const override;
std::vector<std::pair<uint64_t, uint64_t>>
findPltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
- uint64_t GotSectionVA,
const Triple &TargetTriple) const override;
bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size,
@@ -570,8 +571,7 @@ bool X86MCInstrAnalysis::clearsSuperRegisters(const MCRegisterInfo &MRI,
}
static std::vector<std::pair<uint64_t, uint64_t>>
-findX86PltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
- uint64_t GotPltSectionVA) {
+findX86PltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents) {
// Do a lightweight parsing of PLT entries.
std::vector<std::pair<uint64_t, uint64_t>> Result;
for (uint64_t Byte = 0, End = PltContents.size(); Byte + 6 < End; ) {
@@ -579,9 +579,11 @@ findX86PltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
if (PltContents[Byte] == 0xff && PltContents[Byte + 1] == 0xa3) {
// The jmp instruction at the beginning of each PLT entry jumps to the
// address of the base of the .got.plt section plus the immediate.
+ // Set the 1 << 32 bit to let ELFObjectFileBase::getPltEntries convert the
+ // offset to an address. Imm may be a negative int32_t if the GOT entry is
+ // in .got.
uint32_t Imm = support::endian::read32le(PltContents.data() + Byte + 2);
- Result.push_back(
- std::make_pair(PltSectionVA + Byte, GotPltSectionVA + Imm));
+ Result.emplace_back(PltSectionVA + Byte, Imm | (uint64_t(1) << 32));
Byte += 6;
} else if (PltContents[Byte] == 0xff && PltContents[Byte + 1] == 0x25) {
// The jmp instruction at the beginning of each PLT entry jumps to the
@@ -614,17 +616,18 @@ findX86_64PltEntries(uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents) {
return Result;
}
-std::vector<std::pair<uint64_t, uint64_t>> X86MCInstrAnalysis::findPltEntries(
- uint64_t PltSectionVA, ArrayRef<uint8_t> PltContents,
- uint64_t GotPltSectionVA, const Triple &TargetTriple) const {
+std::vector<std::pair<uint64_t, uint64_t>>
+X86MCInstrAnalysis::findPltEntries(uint64_t PltSectionVA,
+ ArrayRef<uint8_t> PltContents,
+ const Triple &TargetTriple) const {
switch (TargetTriple.getArch()) {
- case Triple::x86:
- return findX86PltEntries(PltSectionVA, PltContents, GotPltSectionVA);
- case Triple::x86_64:
- return findX86_64PltEntries(PltSectionVA, PltContents);
- default:
- return {};
- }
+ case Triple::x86:
+ return findX86PltEntries(PltSectionVA, PltContents);
+ case Triple::x86_64:
+ return findX86_64PltEntries(PltSectionVA, PltContents);
+ default:
+ return {};
+ }
}
bool X86MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
@@ -742,22 +745,14 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86TargetMC() {
createX86_64AsmBackend);
}
-MCRegister llvm::getX86SubSuperRegisterOrZero(MCRegister Reg, unsigned Size,
- bool High) {
+MCRegister llvm::getX86SubSuperRegister(MCRegister Reg, unsigned Size,
+ bool High) {
switch (Size) {
- default: return X86::NoRegister;
+ default: llvm_unreachable("illegal register size");
case 8:
if (High) {
switch (Reg.id()) {
- default: return getX86SubSuperRegisterOrZero(Reg, 64);
- case X86::SIL: case X86::SI: case X86::ESI: case X86::RSI:
- return X86::SI;
- case X86::DIL: case X86::DI: case X86::EDI: case X86::RDI:
- return X86::DI;
- case X86::BPL: case X86::BP: case X86::EBP: case X86::RBP:
- return X86::BP;
- case X86::SPL: case X86::SP: case X86::ESP: case X86::RSP:
- return X86::SP;
+ default: return X86::NoRegister;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::AH;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -878,7 +873,7 @@ MCRegister llvm::getX86SubSuperRegisterOrZero(MCRegister Reg, unsigned Size,
}
case 64:
switch (Reg.id()) {
- default: return 0;
+ default: return X86::NoRegister;
case X86::AH: case X86::AL: case X86::AX: case X86::EAX: case X86::RAX:
return X86::RAX;
case X86::DH: case X86::DL: case X86::DX: case X86::EDX: case X86::RDX:
@@ -914,11 +909,3 @@ MCRegister llvm::getX86SubSuperRegisterOrZero(MCRegister Reg, unsigned Size,
}
}
}
-
-MCRegister llvm::getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High) {
- MCRegister Res = getX86SubSuperRegisterOrZero(Reg, Size, High);
- assert(Res != X86::NoRegister && "Unexpected register or VT");
- return Res;
-}
-
-
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index d0530bd4d650..437a7bd6ff6c 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -135,16 +135,13 @@ createX86ELFObjectWriter(bool IsELF64, uint8_t OSABI, uint16_t EMachine);
std::unique_ptr<MCObjectTargetWriter>
createX86WinCOFFObjectWriter(bool Is64Bit);
-/// Returns the sub or super register of a specific X86 register.
-/// e.g. getX86SubSuperRegister(X86::EAX, 16) returns X86::AX.
-/// Aborts on error.
-MCRegister getX86SubSuperRegister(MCRegister, unsigned, bool High=false);
-
-/// Returns the sub or super register of a specific X86 register.
-/// Like getX86SubSuperRegister() but returns 0 on error.
-MCRegister getX86SubSuperRegisterOrZero(MCRegister, unsigned,
- bool High = false);
-
+/// \param Reg speicifed register.
+/// \param Size the bit size of returned register.
+/// \param High requires the high register.
+///
+/// \returns the sub or super register of a specific X86 register.
+MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size,
+ bool High = false);
} // End llvm namespace
diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h
index 0bfe3272100d..76ecc77bc39c 100644
--- a/llvm/lib/Target/X86/X86.h
+++ b/llvm/lib/Target/X86/X86.h
@@ -51,9 +51,6 @@ FunctionPass *createX86IssueVZeroUpperPass();
/// destinations as part of CET IBT mechanism.
FunctionPass *createX86IndirectBranchTrackingPass();
-/// This pass inserts KCFI checks before indirect calls.
-FunctionPass *createX86KCFIPass();
-
/// Return a pass that pads short functions with NOOPs.
/// This will prevent a stall when returning on the Atom.
FunctionPass *createX86PadShortFunctions();
@@ -63,6 +60,13 @@ FunctionPass *createX86PadShortFunctions();
/// instructions, in order to eliminate execution delays in some processors.
FunctionPass *createX86FixupLEAs();
+/// Return a pass that replaces equivalent slower instructions with faster
+/// ones.
+FunctionPass *createX86FixupInstTuning();
+
+/// Return a pass that reduces the size of vector constant pool loads.
+FunctionPass *createX86FixupVectorConstants();
+
/// Return a pass that removes redundant LEA instructions and redundant address
/// recalculations.
FunctionPass *createX86OptimizeLEAs();
@@ -162,11 +166,15 @@ FunctionPass *createX86LoadValueInjectionLoadHardeningPass();
FunctionPass *createX86LoadValueInjectionRetHardeningPass();
FunctionPass *createX86SpeculativeLoadHardeningPass();
FunctionPass *createX86SpeculativeExecutionSideEffectSuppression();
+FunctionPass *createX86ArgumentStackSlotPass();
void initializeEvexToVexInstPassPass(PassRegistry &);
void initializeFPSPass(PassRegistry &);
void initializeFixupBWInstPassPass(PassRegistry &);
void initializeFixupLEAPassPass(PassRegistry &);
+void initializeX86ArgumentStackSlotPassPass(PassRegistry &);
+void initializeX86FixupInstTuningPassPass(PassRegistry &);
+void initializeX86FixupVectorConstantsPassPass(PassRegistry &);
void initializeWinEHStatePassPass(PassRegistry &);
void initializeX86AvoidSFBPassPass(PassRegistry &);
void initializeX86AvoidTrailingCallPassPass(PassRegistry &);
@@ -180,7 +188,6 @@ void initializeX86FastPreTileConfigPass(PassRegistry &);
void initializeX86FastTileConfigPass(PassRegistry &);
void initializeX86FixupSetCCPassPass(PassRegistry &);
void initializeX86FlagsCopyLoweringPassPass(PassRegistry &);
-void initializeX86KCFIPass(PassRegistry &);
void initializeX86LoadValueInjectionLoadHardeningPassPass(PassRegistry &);
void initializeX86LoadValueInjectionRetHardeningPassPass(PassRegistry &);
void initializeX86LowerAMXIntrinsicsLegacyPassPass(PassRegistry &);
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 83bd2ff6acc3..0f677b8a4afc 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -191,6 +191,10 @@ def FeatureAVXVNNIINT8 : SubtargetFeature<"avxvnniint8",
"HasAVXVNNIINT8", "true",
"Enable AVX-VNNI-INT8",
[FeatureAVX2]>;
+def FeatureAVXVNNIINT16 : SubtargetFeature<"avxvnniint16",
+ "HasAVXVNNIINT16", "true",
+ "Enable AVX-VNNI-INT16",
+ [FeatureAVX2]>;
def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
"Enable packed carry-less multiplication instructions",
[FeatureSSE2]>;
@@ -238,10 +242,19 @@ def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
"Enable SHA instructions",
[FeatureSSE2]>;
+def FeatureSHA512 : SubtargetFeature<"sha512", "HasSHA512", "true",
+ "Support SHA512 instructions",
+ [FeatureAVX]>;
// Processor supports CET SHSTK - Control-Flow Enforcement Technology
// using Shadow Stack
def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
"Support CET Shadow-Stack instructions">;
+def FeatureSM3 : SubtargetFeature<"sm3", "HasSM3", "true",
+ "Support SM3 instructions",
+ [FeatureAVX]>;
+def FeatureSM4 : SubtargetFeature<"sm4", "HasSM4", "true",
+ "Support SM4 instructions",
+ [FeatureAVX]>;
def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true",
"Support PRFCHW instructions">;
def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true",
@@ -267,6 +280,9 @@ def FeatureAMXBF16 : SubtargetFeature<"amx-bf16", "HasAMXBF16", "true",
def FeatureAMXFP16 : SubtargetFeature<"amx-fp16", "HasAMXFP16", "true",
"Support AMX amx-fp16 instructions",
[FeatureAMXTILE]>;
+def FeatureAMXCOMPLEX : SubtargetFeature<"amx-complex", "HasAMXCOMPLEX", "true",
+ "Support AMX-COMPLEX instructions",
+ [FeatureAMXTILE]>;
def FeatureCMPCCXADD : SubtargetFeature<"cmpccxadd", "HasCMPCCXADD", "true",
"Support CMPCCXADD instructions">;
def FeatureRAOINT : SubtargetFeature<"raoint", "HasRAOINT", "true",
@@ -419,6 +435,9 @@ def FeatureHardenSlsIJmp
//===----------------------------------------------------------------------===//
// X86 Subtarget Tuning features
//===----------------------------------------------------------------------===//
+def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest",
+ "PreferMovmskOverVTest", "true",
+ "Prefer movmsk over vtest instruction">;
def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
@@ -527,6 +546,39 @@ def TuningFastVariablePerLaneShuffle
"HasFastVariablePerLaneShuffle",
"true", "Per-lane shuffles with variable masks are fast">;
+// Goldmont / Tremont (atom in general) has no bypass delay
+def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay",
+ "NoDomainDelay","true",
+ "Has no bypass delay when using the 'wrong' domain">;
+
+// Many processors (Nehalem+ on Intel) have no bypass delay when
+// using the wrong mov type.
+def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov",
+ "NoDomainDelayMov","true",
+ "Has no bypass delay when using the 'wrong' mov type">;
+
+// Newer processors (Skylake+ on Intel) have no bypass delay when
+// using the wrong blend type.
+def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend",
+ "NoDomainDelayBlend","true",
+ "Has no bypass delay when using the 'wrong' blend type">;
+
+// Newer processors (Haswell+ on Intel) have no bypass delay when
+// using the wrong shuffle type.
+def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle",
+ "NoDomainDelayShuffle","true",
+ "Has no bypass delay when using the 'wrong' shuffle type">;
+
+// Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to
+// imm shifts/rotate if they can use more ports than regular shuffles.
+def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle",
+ "PreferLowerShuffleAsShift", "true",
+ "Shifts are faster (or as fast) as shuffle">;
+
+def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift",
+ "FastImmVectorShift", "true",
+ "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">;
+
// On some X86 processors, a vzeroupper instruction should be inserted after
// using ymm/zmm registers before executing code that may use SSE instructions.
def TuningInsertVZEROUPPER
@@ -695,42 +747,96 @@ include "X86ScheduleSLM.td"
include "X86ScheduleZnver1.td"
include "X86ScheduleZnver2.td"
include "X86ScheduleZnver3.td"
+include "X86ScheduleZnver4.td"
include "X86ScheduleBdVer2.td"
include "X86ScheduleBtVer2.td"
include "X86SchedSkylakeClient.td"
include "X86SchedSkylakeServer.td"
include "X86SchedIceLake.td"
include "X86SchedAlderlakeP.td"
+include "X86SchedSapphireRapids.td"
//===----------------------------------------------------------------------===//
// X86 Processor Feature Lists
//===----------------------------------------------------------------------===//
def ProcessorFeatures {
- // x86-64 and x86-64-v[234]
+ // x86-64 micro-architecture levels: x86-64 and x86-64-v[234]
list<SubtargetFeature> X86_64V1Features = [
FeatureX87, FeatureCX8, FeatureCMOV, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, FeatureX86_64,
];
+ list<SubtargetFeature> X86_64V1Tuning = [
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningSlowIncDec,
+ TuningInsertVZEROUPPER
+ ];
+
list<SubtargetFeature> X86_64V2Features = !listconcat(X86_64V1Features, [
FeatureCX16, FeatureLAHFSAHF64, FeatureCRC32, FeaturePOPCNT,
FeatureSSE42
]);
+ list<SubtargetFeature> X86_64V2Tuning = [
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningSlowUAMem32,
+ TuningFastScalarFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER
+ ];
+
list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
FeatureMOVBE, FeatureXSAVE
]);
+ list<SubtargetFeature> X86_64V3Tuning = [
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPOPCNTFalseDeps,
+ TuningLZCNTFalseDeps,
+ TuningInsertVZEROUPPER,
+ TuningAllowLight256Bit
+ ];
+
list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
FeatureBWI,
FeatureCDI,
FeatureDQI,
FeatureVLX,
]);
+ list<SubtargetFeature> X86_64V4Tuning = [
+ TuningMacroFusion,
+ TuningSlow3OpsLEA,
+ TuningSlowDivide64,
+ TuningFastScalarFSQRT,
+ TuningFastVectorFSQRT,
+ TuningFastSHLDRotate,
+ TuningFast15ByteNOP,
+ TuningFastVariableCrossLaneShuffle,
+ TuningFastVariablePerLaneShuffle,
+ TuningPrefer256Bit,
+ TuningFastGather,
+ TuningPOPCNTFalseDeps,
+ TuningInsertVZEROUPPER,
+ TuningAllowLight256Bit
+ ];
// Nehalem
list<SubtargetFeature> NHMFeatures = X86_64V2Features;
list<SubtargetFeature> NHMTuning = [TuningMacroFusion,
- TuningInsertVZEROUPPER];
+ TuningInsertVZEROUPPER,
+ TuningNoDomainDelayMov];
// Westmere
list<SubtargetFeature> WSMAdditionalFeatures = [FeaturePCLMUL];
@@ -750,7 +856,8 @@ def ProcessorFeatures {
TuningFastSHLDRotate,
TuningFast15ByteNOP,
TuningPOPCNTFalseDeps,
- TuningInsertVZEROUPPER];
+ TuningInsertVZEROUPPER,
+ TuningNoDomainDelayMov];
list<SubtargetFeature> SNBFeatures =
!listconcat(WSMFeatures, SNBAdditionalFeatures);
@@ -782,7 +889,9 @@ def ProcessorFeatures {
TuningPOPCNTFalseDeps,
TuningLZCNTFalseDeps,
TuningInsertVZEROUPPER,
- TuningAllowLight256Bit];
+ TuningAllowLight256Bit,
+ TuningNoDomainDelayMov,
+ TuningNoDomainDelayShuffle];
list<SubtargetFeature> HSWFeatures =
!listconcat(IVBFeatures, HSWAdditionalFeatures);
@@ -811,7 +920,10 @@ def ProcessorFeatures {
TuningFastVariablePerLaneShuffle,
TuningPOPCNTFalseDeps,
TuningInsertVZEROUPPER,
- TuningAllowLight256Bit];
+ TuningAllowLight256Bit,
+ TuningNoDomainDelayMov,
+ TuningNoDomainDelayShuffle,
+ TuningNoDomainDelayBlend];
list<SubtargetFeature> SKLFeatures =
!listconcat(BDWFeatures, SKLAdditionalFeatures);
@@ -840,7 +952,12 @@ def ProcessorFeatures {
TuningPrefer256Bit,
TuningPOPCNTFalseDeps,
TuningInsertVZEROUPPER,
- TuningAllowLight256Bit];
+ TuningAllowLight256Bit,
+ TuningPreferShiftShuffle,
+ TuningNoDomainDelayMov,
+ TuningNoDomainDelayShuffle,
+ TuningNoDomainDelayBlend,
+ TuningFastImmVectorShift];
list<SubtargetFeature> SKXFeatures =
!listconcat(BDWFeatures, SKXAdditionalFeatures);
@@ -878,7 +995,11 @@ def ProcessorFeatures {
TuningFastVariablePerLaneShuffle,
TuningPrefer256Bit,
TuningInsertVZEROUPPER,
- TuningAllowLight256Bit];
+ TuningAllowLight256Bit,
+ TuningNoDomainDelayMov,
+ TuningNoDomainDelayShuffle,
+ TuningNoDomainDelayBlend,
+ TuningFastImmVectorShift];
list<SubtargetFeature> CNLFeatures =
!listconcat(SKLFeatures, CNLAdditionalFeatures);
@@ -903,7 +1024,11 @@ def ProcessorFeatures {
TuningFastVariablePerLaneShuffle,
TuningPrefer256Bit,
TuningInsertVZEROUPPER,
- TuningAllowLight256Bit];
+ TuningAllowLight256Bit,
+ TuningNoDomainDelayMov,
+ TuningNoDomainDelayShuffle,
+ TuningNoDomainDelayBlend,
+ TuningFastImmVectorShift];
list<SubtargetFeature> ICLFeatures =
!listconcat(CNLFeatures, ICLAdditionalFeatures);
@@ -957,6 +1082,11 @@ def ProcessorFeatures {
list<SubtargetFeature> GNRFeatures =
!listconcat(SPRFeatures, GNRAdditionalFeatures);
+ // Graniterapids D
+ list<SubtargetFeature> GNRDAdditionalFeatures = [FeatureAMXCOMPLEX];
+ list<SubtargetFeature> GNRDFeatures =
+ !listconcat(GNRFeatures, GNRDAdditionalFeatures);
+
// Atom
list<SubtargetFeature> AtomFeatures = [FeatureX87,
FeatureCX8,
@@ -977,7 +1107,8 @@ def ProcessorFeatures {
TuningSlowTwoMemOps,
TuningLEAUsesAG,
TuningPadShortFunctions,
- TuningInsertVZEROUPPER];
+ TuningInsertVZEROUPPER,
+ TuningNoDomainDelay];
// Silvermont
list<SubtargetFeature> SLMAdditionalFeatures = [FeatureSSE42,
@@ -995,7 +1126,8 @@ def ProcessorFeatures {
TuningFast7ByteNOP,
TuningFastMOVBE,
TuningPOPCNTFalseDeps,
- TuningInsertVZEROUPPER];
+ TuningInsertVZEROUPPER,
+ TuningNoDomainDelay];
list<SubtargetFeature> SLMFeatures =
!listconcat(AtomFeatures, SLMAdditionalFeatures);
@@ -1015,7 +1147,8 @@ def ProcessorFeatures {
TuningSlowIncDec,
TuningFastMOVBE,
TuningPOPCNTFalseDeps,
- TuningInsertVZEROUPPER];
+ TuningInsertVZEROUPPER,
+ TuningNoDomainDelay];
list<SubtargetFeature> GLMFeatures =
!listconcat(SLMFeatures, GLMAdditionalFeatures);
@@ -1027,7 +1160,8 @@ def ProcessorFeatures {
TuningSlowLEA,
TuningSlowIncDec,
TuningFastMOVBE,
- TuningInsertVZEROUPPER];
+ TuningInsertVZEROUPPER,
+ TuningNoDomainDelay];
list<SubtargetFeature> GLPFeatures =
!listconcat(GLMFeatures, GLPAdditionalFeatures);
@@ -1059,7 +1193,9 @@ def ProcessorFeatures {
FeatureMOVDIRI,
FeatureMOVDIR64B,
FeatureWAITPKG];
- list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps];
+ list<SubtargetFeature> ADLAdditionalTuning = [TuningPERMFalseDeps,
+ TuningPreferMovmskOverVTest,
+ TuningFastImmVectorShift];
list<SubtargetFeature> ADLTuning = !listconcat(SKLTuning, ADLAdditionalTuning);
list<SubtargetFeature> ADLFeatures =
!listconcat(TRMFeatures, ADLAdditionalFeatures);
@@ -1068,6 +1204,8 @@ def ProcessorFeatures {
list<SubtargetFeature> SRFAdditionalFeatures = [FeatureCMPCCXADD,
FeatureAVXIFMA,
FeatureAVXNECONVERT,
+ FeatureENQCMD,
+ FeatureUINTR,
FeatureAVXVNNIINT8];
list<SubtargetFeature> SRFFeatures =
!listconcat(ADLFeatures, SRFAdditionalFeatures);
@@ -1361,20 +1499,22 @@ def : Proc<"i586", [FeatureX87, FeatureCX8],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
def : Proc<"pentium", [FeatureX87, FeatureCX8],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"pentium-mmx", [FeatureX87, FeatureCX8, FeatureMMX],
- [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-
+foreach P = ["pentium-mmx", "pentium_mmx"] in {
+ def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX],
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
+}
def : Proc<"i686", [FeatureX87, FeatureCX8, FeatureCMOV],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-def : Proc<"pentiumpro", [FeatureX87, FeatureCX8, FeatureCMOV,
- FeatureNOPL],
- [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-
-def : Proc<"pentium2", [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV,
- FeatureFXSR, FeatureNOPL],
- [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
-
-foreach P = ["pentium3", "pentium3m"] in {
+foreach P = ["pentiumpro", "pentium_pro"] in {
+ def : Proc<P, [FeatureX87, FeatureCX8, FeatureCMOV, FeatureNOPL],
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
+}
+foreach P = ["pentium2", "pentium_ii"] in {
+ def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX, FeatureCMOV,
+ FeatureFXSR, FeatureNOPL],
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
+}
+foreach P = ["pentium3", "pentium3m", "pentium_iii_no_xmm_regs", "pentium_iii"] in {
def : Proc<P, [FeatureX87, FeatureCX8, FeatureMMX,
FeatureSSE1, FeatureFXSR, FeatureNOPL, FeatureCMOV],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
@@ -1390,12 +1530,14 @@ foreach P = ["pentium3", "pentium3m"] in {
// measure to avoid performance surprises, in case clang's default cpu
// changes slightly.
-def : ProcModel<"pentium-m", GenericPostRAModel,
+foreach P = ["pentium_m", "pentium-m"] in {
+def : ProcModel<P, GenericPostRAModel,
[FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
+}
-foreach P = ["pentium4", "pentium4m"] in {
+foreach P = ["pentium4", "pentium4m", "pentium_4"] in {
def : ProcModel<P, GenericPostRAModel,
[FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE2,
FeatureFXSR, FeatureNOPL, FeatureCMOV],
@@ -1413,10 +1555,12 @@ def : ProcModel<"yonah", SandyBridgeModel,
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
// NetBurst.
-def : ProcModel<"prescott", GenericPostRAModel,
- [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
- FeatureFXSR, FeatureNOPL, FeatureCMOV],
- [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
+foreach P = ["prescott", "pentium_4_sse3"] in {
+ def : ProcModel<P, GenericPostRAModel,
+ [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE3,
+ FeatureFXSR, FeatureNOPL, FeatureCMOV],
+ [TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
+}
def : ProcModel<"nocona", GenericPostRAModel, [
FeatureX87,
FeatureCX8,
@@ -1434,7 +1578,8 @@ def : ProcModel<"nocona", GenericPostRAModel, [
]>;
// Intel Core 2 Solo/Duo.
-def : ProcModel<"core2", SandyBridgeModel, [
+foreach P = ["core2", "core_2_duo_ssse3"] in {
+def : ProcModel<P, SandyBridgeModel, [
FeatureX87,
FeatureCX8,
FeatureCMOV,
@@ -1451,7 +1596,9 @@ def : ProcModel<"core2", SandyBridgeModel, [
TuningSlowUAMem16,
TuningInsertVZEROUPPER
]>;
-def : ProcModel<"penryn", SandyBridgeModel, [
+}
+foreach P = ["penryn", "core_2_duo_sse4_1"] in {
+def : ProcModel<P, SandyBridgeModel, [
FeatureX87,
FeatureCX8,
FeatureCMOV,
@@ -1468,6 +1615,7 @@ def : ProcModel<"penryn", SandyBridgeModel, [
TuningSlowUAMem16,
TuningInsertVZEROUPPER
]>;
+}
// Atom CPUs.
foreach P = ["bonnell", "atom"] in {
@@ -1475,15 +1623,19 @@ foreach P = ["bonnell", "atom"] in {
ProcessorFeatures.AtomTuning>;
}
-foreach P = ["silvermont", "slm"] in {
+foreach P = ["silvermont", "slm", "atom_sse4_2"] in {
def : ProcModel<P, SLMModel, ProcessorFeatures.SLMFeatures,
ProcessorFeatures.SLMTuning>;
}
+def : ProcModel<"atom_sse4_2_movbe", SLMModel, ProcessorFeatures.GLMFeatures,
+ ProcessorFeatures.SLMTuning>;
def : ProcModel<"goldmont", SLMModel, ProcessorFeatures.GLMFeatures,
ProcessorFeatures.GLMTuning>;
-def : ProcModel<"goldmont-plus", SLMModel, ProcessorFeatures.GLPFeatures,
- ProcessorFeatures.GLPTuning>;
+foreach P = ["goldmont_plus", "goldmont-plus"] in {
+ def : ProcModel<P, SLMModel, ProcessorFeatures.GLPFeatures,
+ ProcessorFeatures.GLPTuning>;
+}
def : ProcModel<"tremont", SLMModel, ProcessorFeatures.TRMFeatures,
ProcessorFeatures.TRMTuning>;
def : ProcModel<"sierraforest", AlderlakePModel, ProcessorFeatures.SRFFeatures,
@@ -1492,43 +1644,49 @@ def : ProcModel<"grandridge", AlderlakePModel, ProcessorFeatures.GRRFeatures,
ProcessorFeatures.TRMTuning>;
// "Arrandale" along with corei3 and corei5
-foreach P = ["nehalem", "corei7"] in {
+foreach P = ["nehalem", "corei7", "core_i7_sse4_2"] in {
def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.NHMFeatures,
ProcessorFeatures.NHMTuning>;
}
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
-def : ProcModel<"westmere", SandyBridgeModel, ProcessorFeatures.WSMFeatures,
- ProcessorFeatures.WSMTuning>;
+foreach P = ["westmere", "core_aes_pclmulqdq"] in {
+ def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.WSMFeatures,
+ ProcessorFeatures.WSMTuning>;
+}
-foreach P = ["sandybridge", "corei7-avx"] in {
+foreach P = ["sandybridge", "corei7-avx", "core_2nd_gen_avx"] in {
def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.SNBFeatures,
ProcessorFeatures.SNBTuning>;
}
-foreach P = ["ivybridge", "core-avx-i"] in {
+foreach P = ["ivybridge", "core-avx-i", "core_3rd_gen_avx"] in {
def : ProcModel<P, SandyBridgeModel, ProcessorFeatures.IVBFeatures,
ProcessorFeatures.IVBTuning>;
}
-foreach P = ["haswell", "core-avx2"] in {
+foreach P = ["haswell", "core-avx2", "core_4th_gen_avx", "core_4th_gen_avx_tsx"] in {
def : ProcModel<P, HaswellModel, ProcessorFeatures.HSWFeatures,
ProcessorFeatures.HSWTuning>;
}
-def : ProcModel<"broadwell", BroadwellModel, ProcessorFeatures.BDWFeatures,
- ProcessorFeatures.BDWTuning>;
+foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in {
+ def : ProcModel<P, BroadwellModel, ProcessorFeatures.BDWFeatures,
+ ProcessorFeatures.BDWTuning>;
+}
def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
ProcessorFeatures.SKLTuning>;
// FIXME: define KNL scheduler model
-def : ProcModel<"knl", HaswellModel, ProcessorFeatures.KNLFeatures,
- ProcessorFeatures.KNLTuning>;
+foreach P = ["knl", "mic_avx512"] in {
+ def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures,
+ ProcessorFeatures.KNLTuning>;
+}
def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
ProcessorFeatures.KNLTuning>;
-foreach P = ["skylake-avx512", "skx"] in {
+foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
ProcessorFeatures.SKXTuning>;
}
@@ -1539,15 +1697,19 @@ def : ProcModel<"cooperlake", SkylakeServerModel,
ProcessorFeatures.CPXFeatures, ProcessorFeatures.CPXTuning>;
def : ProcModel<"cannonlake", SkylakeServerModel,
ProcessorFeatures.CNLFeatures, ProcessorFeatures.CNLTuning>;
-def : ProcModel<"icelake-client", IceLakeModel,
+foreach P = ["icelake-client", "icelake_client"] in {
+def : ProcModel<P, IceLakeModel,
ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
+}
def : ProcModel<"rocketlake", IceLakeModel,
ProcessorFeatures.ICLFeatures, ProcessorFeatures.ICLTuning>;
-def : ProcModel<"icelake-server", IceLakeModel,
+foreach P = ["icelake-server", "icelake_server"] in {
+def : ProcModel<P, IceLakeModel,
ProcessorFeatures.ICXFeatures, ProcessorFeatures.ICXTuning>;
+}
def : ProcModel<"tigerlake", IceLakeModel,
ProcessorFeatures.TGLFeatures, ProcessorFeatures.TGLTuning>;
-def : ProcModel<"sapphirerapids", SkylakeServerModel,
+def : ProcModel<"sapphirerapids", SapphireRapidsModel,
ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
def : ProcModel<"alderlake", AlderlakePModel,
ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
@@ -1555,10 +1717,14 @@ def : ProcModel<"raptorlake", AlderlakePModel,
ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
def : ProcModel<"meteorlake", AlderlakePModel,
ProcessorFeatures.ADLFeatures, ProcessorFeatures.ADLTuning>;
-def : ProcModel<"graniterapids", SkylakeServerModel,
+def : ProcModel<"graniterapids", SapphireRapidsModel,
ProcessorFeatures.GNRFeatures, ProcessorFeatures.SPRTuning>;
-def : ProcModel<"emeraldrapids", SkylakeServerModel,
+def : ProcModel<"emeraldrapids", SapphireRapidsModel,
ProcessorFeatures.SPRFeatures, ProcessorFeatures.SPRTuning>;
+foreach P = ["graniterapids-d", "graniterapids_d"] in {
+def : ProcModel<P, SapphireRapidsModel,
+ ProcessorFeatures.GNRDFeatures, ProcessorFeatures.SPRTuning>;
+}
// AMD CPUs.
@@ -1627,7 +1793,7 @@ def : ProcModel<"znver2", Znver2Model, ProcessorFeatures.ZN2Features,
ProcessorFeatures.ZN2Tuning>;
def : ProcModel<"znver3", Znver3Model, ProcessorFeatures.ZN3Features,
ProcessorFeatures.ZN3Tuning>;
-def : Proc<"znver4",ProcessorFeatures.ZN4Features,
+def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
ProcessorFeatures.ZN4Tuning>;
def : Proc<"geode", [FeatureX87, FeatureCX8, Feature3DNowA],
@@ -1654,23 +1820,16 @@ def : Proc<"c3-2", [FeatureX87, FeatureCX8, FeatureMMX,
// knobs which need to be tuned differently for AMD chips, we might consider
// forming a common base for them.
def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
-[
- TuningSlow3OpsLEA,
- TuningSlowDivide64,
- TuningSlowIncDec,
- TuningMacroFusion,
- TuningInsertVZEROUPPER
-]>;
-
-// x86-64 micro-architecture levels.
+ ProcessorFeatures.X86_64V1Tuning>;
+// Close to Sandybridge.
def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
- ProcessorFeatures.SNBTuning>;
+ ProcessorFeatures.X86_64V2Tuning>;
// Close to Haswell.
def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
- ProcessorFeatures.HSWTuning>;
+ ProcessorFeatures.X86_64V3Tuning>;
// Close to the AVX-512 level implemented by Xeon Scalable Processors.
def : ProcModel<"x86-64-v4", SkylakeServerModel, ProcessorFeatures.X86_64V4Features,
- ProcessorFeatures.SKXTuning>;
+ ProcessorFeatures.X86_64V4Tuning>;
//===----------------------------------------------------------------------===//
// Calling Conventions
diff --git a/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp
new file mode 100644
index 000000000000..7ce1960b57a4
--- /dev/null
+++ b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp
@@ -0,0 +1,198 @@
+//===---- X86ArgumentStackSlotRebase.cpp - rebase argument stack slot -----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replace the frame register with a GPR virtual register and set
+// the stack offset for each instruction which reference argument from stack.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86RegisterInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86argumentstackrebase"
+
+namespace {
+
+class X86ArgumentStackSlotPass : public MachineFunctionPass {
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ explicit X86ArgumentStackSlotPass() : MachineFunctionPass(ID) {
+ initializeX86ArgumentStackSlotPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+
+} // end anonymous namespace
+
+char X86ArgumentStackSlotPass::ID = 0;
+
+INITIALIZE_PASS(X86ArgumentStackSlotPass, DEBUG_TYPE, "Argument Stack Rebase",
+ false, false)
+
+FunctionPass *llvm::createX86ArgumentStackSlotPass() {
+ return new X86ArgumentStackSlotPass();
+}
+
+static Register getArgBaseReg(MachineFunction &MF) {
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ const Function &F = MF.getFunction();
+ CallingConv::ID CC = F.getCallingConv();
+ Register NoReg;
+ const TargetRegisterClass *RC = nullptr;
+ switch (CC) {
+ // We need a virtual register in case there is inline assembly
+ // clobber argument base register.
+ case CallingConv::C:
+ RC = STI.is64Bit() ? &X86::GR64_ArgRefRegClass : &X86::GR32_ArgRefRegClass;
+ break;
+ case CallingConv::X86_RegCall:
+ // FIXME: For regcall there is no scratch register on 32-bit target.
+ // We may use a callee saved register as argument base register and
+ // save it before being changed as base pointer. We need DW_CFA to
+ // indicate where the callee saved register is saved, so that it can
+ // be correctly unwind.
+ // push ebx
+ // mov ebx, esp
+ // and esp, -128
+ // ...
+ // pop ebx
+ // ret
+ RC = STI.is64Bit() ? &X86::GR64_ArgRefRegClass : nullptr;
+ break;
+ // TODO: Refine register class for each calling convention.
+ default:
+ break;
+ }
+ if (RC)
+ return MRI.createVirtualRegister(RC);
+ else
+ return NoReg;
+}
+
+bool X86ArgumentStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
+ const X86RegisterInfo *TRI = STI.getRegisterInfo();
+ const X86InstrInfo *TII = STI.getInstrInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ bool Changed = false;
+
+ if (F.hasFnAttribute(Attribute::Naked))
+ return false;
+ // Only support Linux and ELF.
+ if (!STI.isTargetLinux() && !STI.isTargetELF())
+ return false;
+ if (!TRI->hasBasePointer(MF))
+ return false;
+ // Don't support X32
+ if (STI.isTarget64BitILP32())
+ return false;
+
+ Register BasePtr = TRI->getBaseRegister();
+ auto IsBaseRegisterClobbered = [&]() {
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (!MI.isInlineAsm())
+ continue;
+ for (MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Register::isPhysicalRegister(Reg))
+ continue;
+ if (TRI->isSuperOrSubRegisterEq(BasePtr, Reg))
+ return true;
+ }
+ }
+ }
+ return false;
+ };
+ if (!IsBaseRegisterClobbered())
+ return false;
+
+ Register ArgBaseReg = getArgBaseReg(MF);
+ if (!ArgBaseReg.isValid())
+ return false;
+ // leal 4(%esp), %reg
+ MachineBasicBlock &MBB = MF.front();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc DL;
+ // Emit instruction to copy get stack pointer to a virtual register
+ // and save the instruction to x86 machine functon info. We can get
+ // physical register of ArgBaseReg after register allocation. The
+ // stack slot is used to save/restore argument base pointer. We can
+ // get the index from the instruction.
+ unsigned SlotSize = TRI->getSlotSize();
+ int FI = MFI.CreateSpillStackObject(SlotSize, Align(SlotSize));
+ // Use pseudo LEA to prevent the instruction from being eliminated.
+ // TODO: if it is duplicated we can expand it to lea.
+ MachineInstr *LEA =
+ BuildMI(MBB, MBBI, DL,
+ TII->get(STI.is64Bit() ? X86::PLEA64r : X86::PLEA32r), ArgBaseReg)
+ .addFrameIndex(FI)
+ .addImm(1)
+ .addUse(X86::NoRegister)
+ .addImm(SlotSize)
+ .addUse(X86::NoRegister)
+ .setMIFlag(MachineInstr::FrameSetup);
+ X86FI->setStackPtrSaveMI(LEA);
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ int I = 0;
+ for (MachineOperand &MO : MI.operands()) {
+ if (MO.isFI()) {
+ int Idx = MO.getIndex();
+ if (!MFI.isFixedObjectIndex(Idx))
+ continue;
+ int64_t Offset = MFI.getObjectOffset(Idx);
+ if (Offset < 0)
+ continue;
+ // TODO replace register for debug instruction
+ if (MI.isDebugInstr())
+ continue;
+ // Replace frame register with argument base pointer and its offset.
+ TRI->eliminateFrameIndex(MI.getIterator(), I, ArgBaseReg, Offset);
+ Changed = true;
+ }
+ ++I;
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 88bc4b072ac8..bb94444525fb 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -23,6 +23,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/InlineAsm.h"
@@ -42,7 +43,6 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -510,7 +510,9 @@ void X86AsmPrinter::PrintIntelMemReference(const MachineInstr *MI,
if (!DispSpec.isImm()) {
if (NeedPlus) O << " + ";
- PrintOperand(MI, OpNo + X86::AddrDisp, O);
+ // Do not add `offset` operator. Matches the behaviour of
+ // X86IntelInstPrinter::printMemReference.
+ PrintSymbolOperand(DispSpec, O);
} else {
int64_t DispVal = DispSpec.getImm();
if (DispVal || (!IndexReg.getReg() && !HasBaseReg)) {
@@ -546,6 +548,8 @@ static bool printAsmMRegister(const X86AsmPrinter &P, const MachineOperand &MO,
break;
case 'h': // Print QImode high register
Reg = getX86SubSuperRegister(Reg, 8, true);
+ if (!Reg.isValid())
+ return true;
break;
case 'w': // Print HImode register
Reg = getX86SubSuperRegister(Reg, 16);
diff --git a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index 1fa559dcf2bd..792bcddde707 100644
--- a/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -285,15 +285,15 @@ X86CallFrameOptimization::classifyInstruction(
// The instructions we actually care about are movs onto the stack or special
// cases of constant-stores to stack
switch (MI->getOpcode()) {
- case X86::AND16mi8:
- case X86::AND32mi8:
- case X86::AND64mi8: {
+ case X86::AND16mi:
+ case X86::AND32mi:
+ case X86::AND64mi32: {
const MachineOperand &ImmOp = MI->getOperand(X86::AddrNumOperands);
return ImmOp.getImm() == 0 ? Convert : Exit;
}
- case X86::OR16mi8:
- case X86::OR32mi8:
- case X86::OR64mi8: {
+ case X86::OR16mi:
+ case X86::OR32mi:
+ case X86::OR64mi32: {
const MachineOperand &ImmOp = MI->getOperand(X86::AddrNumOperands);
return ImmOp.getImm() == -1 ? Convert : Exit;
}
@@ -512,24 +512,15 @@ void X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF,
switch (Store->getOpcode()) {
default:
llvm_unreachable("Unexpected Opcode!");
- case X86::AND16mi8:
- case X86::AND32mi8:
- case X86::AND64mi8:
- case X86::OR16mi8:
- case X86::OR32mi8:
- case X86::OR64mi8:
+ case X86::AND16mi:
+ case X86::AND32mi:
+ case X86::AND64mi32:
+ case X86::OR16mi:
+ case X86::OR32mi:
+ case X86::OR64mi32:
case X86::MOV32mi:
case X86::MOV64mi32:
- PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSHi32;
- // If the operand is a small (8-bit) immediate, we can use a
- // PUSH instruction with a shorter encoding.
- // Note that isImm() may fail even though this is a MOVmi, because
- // the operand can also be a symbol.
- if (PushOp.isImm()) {
- int64_t Val = PushOp.getImm();
- if (isInt<8>(Val))
- PushOpcode = Is64Bit ? X86::PUSH64i8 : X86::PUSH32i8;
- }
+ PushOpcode = Is64Bit ? X86::PUSH64i32 : X86::PUSH32i;
Push = BuildMI(MBB, Context.Call, DL, TII->get(PushOpcode)).add(PushOp);
Push->cloneMemRefs(MF, *Store);
break;
diff --git a/llvm/lib/Target/X86/X86CallLowering.cpp b/llvm/lib/Target/X86/X86CallLowering.cpp
index 919f4f9e119b..a47a09414cf7 100644
--- a/llvm/lib/Target/X86/X86CallLowering.cpp
+++ b/llvm/lib/Target/X86/X86CallLowering.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/LowLevelTypeUtils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -33,6 +34,7 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -41,8 +43,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/MachineValueType.h"
#include <cassert>
#include <cstdint>
@@ -70,7 +70,7 @@ public:
const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
CCState &State) override {
bool Res = AssignFn(ValNo, ValVT, LocVT, LocInfo, Flags, State);
- StackSize = State.getNextStackOffset();
+ StackSize = State.getStackSize();
static const MCPhysReg XMMArgRegs[] = {X86::XMM0, X86::XMM1, X86::XMM2,
X86::XMM3, X86::XMM4, X86::XMM5,
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index c92a30804014..06cebdc21594 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -430,16 +430,6 @@ def RetCC_X86_64_AnyReg : CallingConv<[
CCCustom<"CC_X86_AnyReg_Error">
]>;
-// X86-64 HHVM return-value convention.
-def RetCC_X86_64_HHVM: CallingConv<[
- // Promote all types to i64
- CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
-
- // Return: could return in any GP register save RSP and R12.
- CCIfType<[i64], CCAssignToReg<[RBX, RBP, RDI, RSI, RDX, RCX, R8, R9,
- RAX, R10, R11, R13, R14, R15]>>
-]>;
-
defm X86_32_RegCall :
X86_RegCall_base<RC_X86_32_RegCall>;
@@ -483,9 +473,6 @@ def RetCC_X86_64 : CallingConv<[
// Handle Vectorcall CC
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_64_Vectorcall>>,
- // Handle HHVM calls.
- CCIfCC<"CallingConv::HHVM", CCDelegateTo<RetCC_X86_64_HHVM>>,
-
CCIfCC<"CallingConv::X86_RegCall",
CCIfSubtarget<"isTargetWin64()",
CCDelegateTo<RetCC_X86_Win64_RegCall>>>,
@@ -603,23 +590,6 @@ def CC_X86_64_C : CallingConv<[
CCAssignToStack<64, 64>>
]>;
-// Calling convention for X86-64 HHVM.
-def CC_X86_64_HHVM : CallingConv<[
- // Use all/any GP registers for args, except RSP.
- CCIfType<[i64], CCAssignToReg<[RBX, R12, RBP, R15,
- RDI, RSI, RDX, RCX, R8, R9,
- RAX, R10, R11, R13, R14]>>
-]>;
-
-// Calling convention for helper functions in HHVM.
-def CC_X86_64_HHVM_C : CallingConv<[
- // Pass the first argument in RBP.
- CCIfType<[i64], CCAssignToReg<[RBP]>>,
-
- // Otherwise it's the same as the regular C calling convention.
- CCDelegateTo<CC_X86_64_C>
-]>;
-
// Calling convention used on Win64
def CC_X86_Win64_C : CallingConv<[
// FIXME: Handle varargs.
@@ -1097,8 +1067,6 @@ def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::Win64", CCDelegateTo<CC_X86_Win64_C>>,
CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<CC_X86_64_C>>,
CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
- CCIfCC<"CallingConv::HHVM", CCDelegateTo<CC_X86_64_HHVM>>,
- CCIfCC<"CallingConv::HHVM_C", CCDelegateTo<CC_X86_64_HHVM_C>>,
CCIfCC<"CallingConv::X86_RegCall",
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_RegCall>>>,
CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_SysV64_RegCall>>,
@@ -1210,9 +1178,6 @@ def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RSI, R14, R15,
(sequence "ZMM%u", 16, 31),
K4, K5, K6, K7)>;
-// Only R12 is preserved for PHP calls in HHVM.
-def CSR_64_HHVM : CalleeSavedRegs<(add R12)>;
-
// Register calling convention preserves few GPR and XMM8-15
def CSR_32_RegCall_NoSSE : CalleeSavedRegs<(add ESI, EDI, EBX, EBP)>;
def CSR_32_RegCall : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE,
diff --git a/llvm/lib/Target/X86/X86CmovConversion.cpp b/llvm/lib/Target/X86/X86CmovConversion.cpp
index 765ff5abf047..8dc3b91f08e2 100644
--- a/llvm/lib/Target/X86/X86CmovConversion.cpp
+++ b/llvm/lib/Target/X86/X86CmovConversion.cpp
@@ -305,9 +305,13 @@ bool X86CmovConverterPass::collectCmovCandidates(
// Skip debug instructions.
if (I.isDebugInstr())
continue;
+
X86::CondCode CC = X86::getCondFromCMov(I);
- // Check if we found a X86::CMOVrr instruction.
- if (CC != X86::COND_INVALID && (IncludeLoads || !I.mayLoad())) {
+ // Check if we found a X86::CMOVrr instruction. If it is marked as
+ // unpredictable, skip it and do not convert it to branch.
+ if (CC != X86::COND_INVALID &&
+ !I.getFlag(MachineInstr::MIFlag::Unpredictable) &&
+ (IncludeLoads || !I.mayLoad())) {
if (Group.empty()) {
// We found first CMOV in the range, reset flags.
FirstCC = CC;
@@ -770,6 +774,8 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
const TargetRegisterClass *RC = MRI->getRegClass(MI.getOperand(0).getReg());
Register TmpReg = MRI->createVirtualRegister(RC);
+ // Retain debug instr number when unfolded.
+ unsigned OldDebugInstrNum = MI.peekDebugInstrNum();
SmallVector<MachineInstr *, 4> NewMIs;
bool Unfolded = TII->unfoldMemoryOperand(*MBB->getParent(), MI, TmpReg,
/*UnfoldLoad*/ true,
@@ -787,6 +793,9 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
if (&*MIItBegin == &MI)
MIItBegin = MachineBasicBlock::iterator(NewCMOV);
+ if (OldDebugInstrNum)
+ NewCMOV->setDebugInstrNum(OldDebugInstrNum);
+
// Sink whatever instructions were needed to produce the unfolded operand
// into the false block.
for (auto *NewMI : NewMIs) {
@@ -854,10 +863,20 @@ void X86CmovConverterPass::convertCmovInstsToBranches(
LLVM_DEBUG(dbgs() << "\tFrom: "; MIIt->dump());
LLVM_DEBUG(dbgs() << "\tTo: "; MIB->dump());
+ // debug-info: we can just copy the instr-ref number from one instruction
+ // to the other, seeing how it's a one-for-one substitution.
+ if (unsigned InstrNum = MIIt->peekDebugInstrNum())
+ MIB->setDebugInstrNum(InstrNum);
+
// Add this PHI to the rewrite table.
RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
}
+ // Reset the NoPHIs property if a PHI was inserted to prevent a conflict with
+ // the MachineVerifier during testing.
+ if (MIItBegin != MIItEnd)
+ F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
+
// Now remove the CMOV(s).
MBB->erase(MIItBegin, MIItEnd);
diff --git a/llvm/lib/Target/X86/X86DynAllocaExpander.cpp b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp
index 8f237ee386b5..5ed94f329c4b 100644
--- a/llvm/lib/Target/X86/X86DynAllocaExpander.cpp
+++ b/llvm/lib/Target/X86/X86DynAllocaExpander.cpp
@@ -110,12 +110,10 @@ X86DynAllocaExpander::getLowering(int64_t CurrentOffset,
static bool isPushPop(const MachineInstr &MI) {
switch (MI.getOpcode()) {
- case X86::PUSH32i8:
case X86::PUSH32r:
case X86::PUSH32rmm:
case X86::PUSH32rmr:
- case X86::PUSHi32:
- case X86::PUSH64i8:
+ case X86::PUSH32i:
case X86::PUSH64r:
case X86::PUSH64rmm:
case X86::PUSH64rmr:
@@ -189,10 +187,10 @@ void X86DynAllocaExpander::computeLowerings(MachineFunction &MF,
}
}
-static unsigned getSubOpcode(bool Is64Bit, int64_t Amount) {
+static unsigned getSubOpcode(bool Is64Bit) {
if (Is64Bit)
- return isInt<8>(Amount) ? X86::SUB64ri8 : X86::SUB64ri32;
- return isInt<8>(Amount) ? X86::SUB32ri8 : X86::SUB32ri;
+ return X86::SUB64ri32;
+ return X86::SUB32ri;
}
void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) {
@@ -242,8 +240,7 @@ void X86DynAllocaExpander::lower(MachineInstr *MI, Lowering L) {
.addReg(RegA, RegState::Undef);
} else {
// Sub.
- BuildMI(*MBB, I, DL,
- TII->get(getSubOpcode(Is64BitAlloca, Amount)), StackPtr)
+ BuildMI(*MBB, I, DL, TII->get(getSubOpcode(Is64BitAlloca)), StackPtr)
.addReg(StackPtr)
.addImm(Amount);
}
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 337b2b93b684..085fa9280b0e 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -18,11 +18,11 @@
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -49,7 +49,7 @@ public:
const X86MachineFunctionInfo *X86FI = nullptr;
const X86FrameLowering *X86FL = nullptr;
- bool runOnMachineFunction(MachineFunction &Fn) override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
@@ -77,7 +77,7 @@ private:
/// placed into separate block guarded by check for al register(for SystemV
/// abi).
void ExpandVastartSaveXmmRegs(
- MachineBasicBlock *MBB,
+ MachineBasicBlock *EntryBlk,
MachineBasicBlock::iterator VAStartPseudoInstr) const;
};
char X86ExpandPseudo::ID = 0;
@@ -562,6 +562,8 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.setDesc(TII->get(Opc));
return true;
}
+ case X86::PTCMMIMFP16PSV:
+ case X86::PTCMMRLFP16PSV:
case X86::PTDPBSSDV:
case X86::PTDPBSUDV:
case X86::PTDPBUSDV:
@@ -573,6 +575,8 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MI.removeOperand(i);
unsigned Opc;
switch (Opcode) {
+ case X86::PTCMMIMFP16PSV: Opc = X86::TCMMIMFP16PS; break;
+ case X86::PTCMMRLFP16PSV: Opc = X86::TCMMRLFP16PS; break;
case X86::PTDPBSSDV: Opc = X86::TDPBSSD; break;
case X86::PTDPBSUDV: Opc = X86::TDPBSUD; break;
case X86::PTDPBUSDV: Opc = X86::TDPBUSD; break;
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index ade4ff61762a..ff90b402b9b9 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -1376,7 +1376,6 @@ static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
/// If we have a comparison with RHS as the RHS of the comparison, return an
/// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
- int64_t Val = RHSC->getSExtValue();
switch (VT.getSimpleVT().SimpleTy) {
// Otherwise, we can't fold the immediate into this comparison.
default:
@@ -1384,21 +1383,13 @@ static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
case MVT::i8:
return X86::CMP8ri;
case MVT::i16:
- if (isInt<8>(Val))
- return X86::CMP16ri8;
return X86::CMP16ri;
case MVT::i32:
- if (isInt<8>(Val))
- return X86::CMP32ri8;
return X86::CMP32ri;
case MVT::i64:
- if (isInt<8>(Val))
- return X86::CMP64ri8;
// 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
// field.
- if (isInt<32>(Val))
- return X86::CMP64ri32;
- return 0;
+ return isInt<32>(RHSC->getSExtValue()) ? X86::CMP64ri32 : 0;
}
}
@@ -3030,6 +3021,58 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
updateValueMap(II, ResultReg);
return true;
}
+ case Intrinsic::x86_sse42_crc32_32_8:
+ case Intrinsic::x86_sse42_crc32_32_16:
+ case Intrinsic::x86_sse42_crc32_32_32:
+ case Intrinsic::x86_sse42_crc32_64_64: {
+ if (!Subtarget->hasCRC32())
+ return false;
+
+ Type *RetTy = II->getCalledFunction()->getReturnType();
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ unsigned Opc;
+ const TargetRegisterClass *RC = nullptr;
+
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unexpected intrinsic.");
+ case Intrinsic::x86_sse42_crc32_32_8:
+ Opc = X86::CRC32r32r8;
+ RC = &X86::GR32RegClass;
+ break;
+ case Intrinsic::x86_sse42_crc32_32_16:
+ Opc = X86::CRC32r32r16;
+ RC = &X86::GR32RegClass;
+ break;
+ case Intrinsic::x86_sse42_crc32_32_32:
+ Opc = X86::CRC32r32r32;
+ RC = &X86::GR32RegClass;
+ break;
+ case Intrinsic::x86_sse42_crc32_64_64:
+ Opc = X86::CRC32r64r64;
+ RC = &X86::GR64RegClass;
+ break;
+ }
+
+ const Value *LHS = II->getArgOperand(0);
+ const Value *RHS = II->getArgOperand(1);
+
+ Register LHSReg = getRegForValue(LHS);
+ Register RHSReg = getRegForValue(RHS);
+ if (!LHSReg || !RHSReg)
+ return false;
+
+ Register ResultReg = fastEmitInst_rr(Opc, RC, LHSReg, RHSReg);
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(II, ResultReg);
+ return true;
+ }
}
}
diff --git a/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/llvm/lib/Target/X86/X86FixupBWInsts.cpp
index db6923416177..5980e4572e7a 100644
--- a/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@@ -148,8 +148,8 @@ private:
/// Register Liveness information after the current instruction.
LivePhysRegs LiveRegs;
- ProfileSummaryInfo *PSI;
- MachineBlockFrequencyInfo *MBFI;
+ ProfileSummaryInfo *PSI = nullptr;
+ MachineBlockFrequencyInfo *MBFI = nullptr;
};
char FixupBWInstPass::ID = 0;
}
@@ -193,6 +193,7 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
const X86RegisterInfo *TRI = &TII->getRegisterInfo();
Register OrigDestReg = OrigMI->getOperand(0).getReg();
SuperDestReg = getX86SubSuperRegister(OrigDestReg, 32);
+ assert(SuperDestReg.isValid() && "Invalid Operand");
const auto SubRegIdx = TRI->getSubRegIndex(SuperDestReg, OrigDestReg);
@@ -213,9 +214,9 @@ bool FixupBWInstPass::getSuperRegDestIfDead(MachineInstr *OrigMI,
// If the original destination register was the low 8-bit subregister and
// we also need to check the 16-bit subregister and the high 8-bit
// subregister.
+ MCRegister HighReg = getX86SubSuperRegister(SuperDestReg, 8, /*High=*/true);
if (!LiveRegs.contains(getX86SubSuperRegister(OrigDestReg, 16)) &&
- !LiveRegs.contains(getX86SubSuperRegister(SuperDestReg, 8,
- /*High=*/true)))
+ (!HighReg.isValid() || !LiveRegs.contains(HighReg)))
return true;
// Otherwise, we have a little more checking to do.
}
@@ -298,7 +299,7 @@ MachineInstr *FixupBWInstPass::tryReplaceLoad(unsigned New32BitOpcode,
// Safe to change the instruction.
MachineInstrBuilder MIB =
- BuildMI(*MF, MI->getDebugLoc(), TII->get(New32BitOpcode), NewDestReg);
+ BuildMI(*MF, MIMetadata(*MI), TII->get(New32BitOpcode), NewDestReg);
unsigned NumArgs = MI->getNumOperands();
for (unsigned i = 1; i < NumArgs; ++i)
@@ -327,6 +328,7 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
return nullptr;
Register NewSrcReg = getX86SubSuperRegister(OldSrc.getReg(), 32);
+ assert(NewSrcReg.isValid() && "Invalid Operand");
// This is only correct if we access the same subregister index: otherwise,
// we could try to replace "movb %ah, %al" with "movl %eax, %eax".
@@ -341,7 +343,7 @@ MachineInstr *FixupBWInstPass::tryReplaceCopy(MachineInstr *MI) const {
// we don't care about the higher bits by reading it as Undef, and adding
// an imp-use on the original subregister.
MachineInstrBuilder MIB =
- BuildMI(*MF, MI->getDebugLoc(), TII->get(X86::MOV32rr), NewDestReg)
+ BuildMI(*MF, MIMetadata(*MI), TII->get(X86::MOV32rr), NewDestReg)
.addReg(NewSrcReg, RegState::Undef)
.addReg(OldSrc.getReg(), RegState::Implicit);
@@ -369,7 +371,7 @@ MachineInstr *FixupBWInstPass::tryReplaceExtend(unsigned New32BitOpcode,
// Safe to change the instruction.
MachineInstrBuilder MIB =
- BuildMI(*MF, MI->getDebugLoc(), TII->get(New32BitOpcode), NewDestReg);
+ BuildMI(*MF, MIMetadata(*MI), TII->get(New32BitOpcode), NewDestReg);
unsigned NumArgs = MI->getNumOperands();
for (unsigned i = 1; i < NumArgs; ++i)
diff --git a/llvm/lib/Target/X86/X86FixupInstTuning.cpp b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
new file mode 100644
index 000000000000..8ffd971515a6
--- /dev/null
+++ b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
@@ -0,0 +1,517 @@
+//===-- X86FixupInstTunings.cpp - replace instructions -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file does a tuning pass replacing slower machine instructions
+// with faster ones. We do this here, as opposed to during normal ISel, as
+// attempting to get the "right" instruction can break patterns. This pass
+// is not meant search for special cases where an instruction can be transformed
+// to another, it is only meant to do transformations where the old instruction
+// is always replacable with the new instructions. For example:
+//
+// `vpermq ymm` -> `vshufd ymm`
+// -- BAD, not always valid (lane cross/non-repeated mask)
+//
+// `vpermilps ymm` -> `vshufd ymm`
+// -- GOOD, always replaceable
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-fixup-inst-tuning"
+
+STATISTIC(NumInstChanges, "Number of instructions changes");
+
+namespace {
+class X86FixupInstTuningPass : public MachineFunctionPass {
+public:
+ static char ID;
+
+ X86FixupInstTuningPass() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &I);
+
+ // This pass runs after regalloc and doesn't support VReg operands.
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ const X86InstrInfo *TII = nullptr;
+ const X86Subtarget *ST = nullptr;
+ const MCSchedModel *SM = nullptr;
+};
+} // end anonymous namespace
+
+char X86FixupInstTuningPass::ID = 0;
+
+INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
+
+FunctionPass *llvm::createX86FixupInstTuning() {
+ return new X86FixupInstTuningPass();
+}
+
+template <typename T>
+static std::optional<bool> CmpOptionals(T NewVal, T CurVal) {
+ if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal)
+ return *NewVal < *CurVal;
+
+ return std::nullopt;
+}
+
+bool X86FixupInstTuningPass::processInstruction(
+ MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &I) {
+ MachineInstr &MI = *I;
+ unsigned Opc = MI.getOpcode();
+ unsigned NumOperands = MI.getDesc().getNumOperands();
+
+ auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> {
+ // We already checked that SchedModel exists in `NewOpcPreferable`.
+ return MCSchedModel::getReciprocalThroughput(
+ *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
+ };
+
+ auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> {
+ // We already checked that SchedModel exists in `NewOpcPreferable`.
+ return MCSchedModel::computeInstrLatency(
+ *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
+ };
+
+ auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> {
+ if (unsigned Size = TII->get(Opcode).getSize())
+ return Size;
+ // Zero size means we where unable to compute it.
+ return std::nullopt;
+ };
+
+ auto NewOpcPreferable = [&](unsigned NewOpc,
+ bool ReplaceInTie = true) -> bool {
+ std::optional<bool> Res;
+ if (SM->hasInstrSchedModel()) {
+ // Compare tput -> lat -> code size.
+ Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc));
+ if (Res.has_value())
+ return *Res;
+
+ Res = CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc));
+ if (Res.has_value())
+ return *Res;
+ }
+
+ Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc));
+ if (Res.has_value())
+ return *Res;
+
+ // We either have either were unable to get tput/lat/codesize or all values
+ // were equal. Return specified option for a tie.
+ return ReplaceInTie;
+ };
+
+ // `vpermilpd r, i` -> `vshufpd r, r, i`
+ // `vpermilpd r, i, k` -> `vshufpd r, r, i, k`
+ // `vshufpd` is always as fast or faster than `vpermilpd` and takes
+ // 1 less byte of code size for VEX and EVEX encoding.
+ auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool {
+ if (!NewOpcPreferable(NewOpc))
+ return false;
+ unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
+ MI.removeOperand(NumOperands - 1);
+ MI.addOperand(MI.getOperand(NumOperands - 2));
+ MI.setDesc(TII->get(NewOpc));
+ MI.addOperand(MachineOperand::CreateImm(MaskImm));
+ return true;
+ };
+
+ // `vpermilps r, i` -> `vshufps r, r, i`
+ // `vpermilps r, i, k` -> `vshufps r, r, i, k`
+ // `vshufps` is always as fast or faster than `vpermilps` and takes
+ // 1 less byte of code size for VEX and EVEX encoding.
+ auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool {
+ if (!NewOpcPreferable(NewOpc))
+ return false;
+ unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
+ MI.removeOperand(NumOperands - 1);
+ MI.addOperand(MI.getOperand(NumOperands - 2));
+ MI.setDesc(TII->get(NewOpc));
+ MI.addOperand(MachineOperand::CreateImm(MaskImm));
+ return true;
+ };
+
+ // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles.
+ // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less
+ // byte of code size.
+ auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool {
+ // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as
+ // `vpshufd` saves a byte of code size.
+ if (!ST->hasNoDomainDelayShuffle() ||
+ !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
+ return false;
+ MI.setDesc(TII->get(NewOpc));
+ return true;
+ };
+
+ // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00`
+ // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff`
+ // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00`
+ // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff`
+ // `vunpcklpd r, m` -> `vunpcklqdq r, m, k`
+ // `vunpckhpd r, m` -> `vunpckhqdq r, m, k`
+ // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k`
+ // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k`
+ // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd`
+ // -> `vunpck{l|h}qdq`
+ // 2) If `vshufpd` faster than `vunpck{l|h}pd`
+ // -> `vshufpd`
+ //
+ // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay)
+ auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool {
+ if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
+ return false;
+
+ MI.setDesc(TII->get(NewOpc));
+ MI.addOperand(MachineOperand::CreateImm(MaskImm));
+ return true;
+ };
+
+ auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool {
+ // TODO it may be worth it to set ReplaceInTie to `true` as there is no real
+ // downside to the integer unpck, but if someone doesn't specify exact
+ // target we won't find it faster.
+ if (!ST->hasNoDomainDelayShuffle() ||
+ !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
+ return false;
+ MI.setDesc(TII->get(NewOpc));
+ return true;
+ };
+
+ auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain,
+ unsigned NewOpc) -> bool {
+ if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
+ return true;
+ return ProcessUNPCK(NewOpc, 0x00);
+ };
+ auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain,
+ unsigned NewOpc) -> bool {
+ if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
+ return true;
+ return ProcessUNPCK(NewOpc, 0xff);
+ };
+
+ auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool {
+ return ProcessUNPCKToIntDomain(NewOpcIntDomain);
+ };
+
+ auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {
+ return ProcessUNPCKToIntDomain(NewOpc);
+ };
+
+ switch (Opc) {
+ case X86::VPERMILPDri:
+ return ProcessVPERMILPDri(X86::VSHUFPDrri);
+ case X86::VPERMILPDYri:
+ return ProcessVPERMILPDri(X86::VSHUFPDYrri);
+ case X86::VPERMILPDZ128ri:
+ return ProcessVPERMILPDri(X86::VSHUFPDZ128rri);
+ case X86::VPERMILPDZ256ri:
+ return ProcessVPERMILPDri(X86::VSHUFPDZ256rri);
+ case X86::VPERMILPDZri:
+ return ProcessVPERMILPDri(X86::VSHUFPDZrri);
+ case X86::VPERMILPDZ128rikz:
+ return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz);
+ case X86::VPERMILPDZ256rikz:
+ return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz);
+ case X86::VPERMILPDZrikz:
+ return ProcessVPERMILPDri(X86::VSHUFPDZrrikz);
+ case X86::VPERMILPDZ128rik:
+ return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik);
+ case X86::VPERMILPDZ256rik:
+ return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik);
+ case X86::VPERMILPDZrik:
+ return ProcessVPERMILPDri(X86::VSHUFPDZrrik);
+
+ case X86::VPERMILPSri:
+ return ProcessVPERMILPSri(X86::VSHUFPSrri);
+ case X86::VPERMILPSYri:
+ return ProcessVPERMILPSri(X86::VSHUFPSYrri);
+ case X86::VPERMILPSZ128ri:
+ return ProcessVPERMILPSri(X86::VSHUFPSZ128rri);
+ case X86::VPERMILPSZ256ri:
+ return ProcessVPERMILPSri(X86::VSHUFPSZ256rri);
+ case X86::VPERMILPSZri:
+ return ProcessVPERMILPSri(X86::VSHUFPSZrri);
+ case X86::VPERMILPSZ128rikz:
+ return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz);
+ case X86::VPERMILPSZ256rikz:
+ return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz);
+ case X86::VPERMILPSZrikz:
+ return ProcessVPERMILPSri(X86::VSHUFPSZrrikz);
+ case X86::VPERMILPSZ128rik:
+ return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik);
+ case X86::VPERMILPSZ256rik:
+ return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik);
+ case X86::VPERMILPSZrik:
+ return ProcessVPERMILPSri(X86::VSHUFPSZrrik);
+ case X86::VPERMILPSmi:
+ return ProcessVPERMILPSmi(X86::VPSHUFDmi);
+ case X86::VPERMILPSYmi:
+ // TODO: See if there is a more generic way we can test if the replacement
+ // instruction is supported.
+ return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false;
+ case X86::VPERMILPSZ128mi:
+ return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi);
+ case X86::VPERMILPSZ256mi:
+ return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi);
+ case X86::VPERMILPSZmi:
+ return ProcessVPERMILPSmi(X86::VPSHUFDZmi);
+ case X86::VPERMILPSZ128mikz:
+ return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz);
+ case X86::VPERMILPSZ256mikz:
+ return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz);
+ case X86::VPERMILPSZmikz:
+ return ProcessVPERMILPSmi(X86::VPSHUFDZmikz);
+ case X86::VPERMILPSZ128mik:
+ return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik);
+ case X86::VPERMILPSZ256mik:
+ return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);
+ case X86::VPERMILPSZmik:
+ return ProcessVPERMILPSmi(X86::VPSHUFDZmik);
+
+ case X86::MOVLHPSrr:
+ case X86::UNPCKLPDrr:
+ return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);
+ case X86::VMOVLHPSrr:
+ case X86::VUNPCKLPDrr:
+ return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri);
+ case X86::VUNPCKLPDYrr:
+ return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri);
+ // VMOVLHPS is always 128 bits.
+ case X86::VMOVLHPSZrr:
+ case X86::VUNPCKLPDZ128rr:
+ return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri);
+ case X86::VUNPCKLPDZ256rr:
+ return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri);
+ case X86::VUNPCKLPDZrr:
+ return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri);
+ case X86::VUNPCKLPDZ128rrk:
+ return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik);
+ case X86::VUNPCKLPDZ256rrk:
+ return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik);
+ case X86::VUNPCKLPDZrrk:
+ return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik);
+ case X86::VUNPCKLPDZ128rrkz:
+ return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
+ case X86::VUNPCKLPDZ256rrkz:
+ return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
+ case X86::VUNPCKLPDZrrkz:
+ return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz);
+ case X86::UNPCKHPDrr:
+ return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri);
+ case X86::VUNPCKHPDrr:
+ return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri);
+ case X86::VUNPCKHPDYrr:
+ return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri);
+ case X86::VUNPCKHPDZ128rr:
+ return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri);
+ case X86::VUNPCKHPDZ256rr:
+ return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri);
+ case X86::VUNPCKHPDZrr:
+ return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri);
+ case X86::VUNPCKHPDZ128rrk:
+ return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik);
+ case X86::VUNPCKHPDZ256rrk:
+ return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik);
+ case X86::VUNPCKHPDZrrk:
+ return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik);
+ case X86::VUNPCKHPDZ128rrkz:
+ return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
+ case X86::VUNPCKHPDZ256rrkz:
+ return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
+ case X86::VUNPCKHPDZrrkz:
+ return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz);
+ case X86::UNPCKLPDrm:
+ return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm);
+ case X86::VUNPCKLPDrm:
+ return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm);
+ case X86::VUNPCKLPDYrm:
+ return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm);
+ case X86::VUNPCKLPDZ128rm:
+ return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm);
+ case X86::VUNPCKLPDZ256rm:
+ return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm);
+ case X86::VUNPCKLPDZrm:
+ return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm);
+ case X86::VUNPCKLPDZ128rmk:
+ return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk);
+ case X86::VUNPCKLPDZ256rmk:
+ return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk);
+ case X86::VUNPCKLPDZrmk:
+ return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk);
+ case X86::VUNPCKLPDZ128rmkz:
+ return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz);
+ case X86::VUNPCKLPDZ256rmkz:
+ return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz);
+ case X86::VUNPCKLPDZrmkz:
+ return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz);
+ case X86::UNPCKHPDrm:
+ return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm);
+ case X86::VUNPCKHPDrm:
+ return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm);
+ case X86::VUNPCKHPDYrm:
+ return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm);
+ case X86::VUNPCKHPDZ128rm:
+ return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm);
+ case X86::VUNPCKHPDZ256rm:
+ return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm);
+ case X86::VUNPCKHPDZrm:
+ return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm);
+ case X86::VUNPCKHPDZ128rmk:
+ return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk);
+ case X86::VUNPCKHPDZ256rmk:
+ return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk);
+ case X86::VUNPCKHPDZrmk:
+ return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk);
+ case X86::VUNPCKHPDZ128rmkz:
+ return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz);
+ case X86::VUNPCKHPDZ256rmkz:
+ return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz);
+ case X86::VUNPCKHPDZrmkz:
+ return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz);
+
+ case X86::UNPCKLPSrr:
+ return ProcessUNPCKPS(X86::PUNPCKLDQrr);
+ case X86::VUNPCKLPSrr:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQrr);
+ case X86::VUNPCKLPSYrr:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQYrr);
+ case X86::VUNPCKLPSZ128rr:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr);
+ case X86::VUNPCKLPSZ256rr:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr);
+ case X86::VUNPCKLPSZrr:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZrr);
+ case X86::VUNPCKLPSZ128rrk:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk);
+ case X86::VUNPCKLPSZ256rrk:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk);
+ case X86::VUNPCKLPSZrrk:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk);
+ case X86::VUNPCKLPSZ128rrkz:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz);
+ case X86::VUNPCKLPSZ256rrkz:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz);
+ case X86::VUNPCKLPSZrrkz:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz);
+ case X86::UNPCKHPSrr:
+ return ProcessUNPCKPS(X86::PUNPCKHDQrr);
+ case X86::VUNPCKHPSrr:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQrr);
+ case X86::VUNPCKHPSYrr:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQYrr);
+ case X86::VUNPCKHPSZ128rr:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr);
+ case X86::VUNPCKHPSZ256rr:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr);
+ case X86::VUNPCKHPSZrr:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZrr);
+ case X86::VUNPCKHPSZ128rrk:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk);
+ case X86::VUNPCKHPSZ256rrk:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk);
+ case X86::VUNPCKHPSZrrk:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk);
+ case X86::VUNPCKHPSZ128rrkz:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz);
+ case X86::VUNPCKHPSZ256rrkz:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz);
+ case X86::VUNPCKHPSZrrkz:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz);
+ case X86::UNPCKLPSrm:
+ return ProcessUNPCKPS(X86::PUNPCKLDQrm);
+ case X86::VUNPCKLPSrm:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQrm);
+ case X86::VUNPCKLPSYrm:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQYrm);
+ case X86::VUNPCKLPSZ128rm:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm);
+ case X86::VUNPCKLPSZ256rm:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm);
+ case X86::VUNPCKLPSZrm:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZrm);
+ case X86::VUNPCKLPSZ128rmk:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk);
+ case X86::VUNPCKLPSZ256rmk:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk);
+ case X86::VUNPCKLPSZrmk:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk);
+ case X86::VUNPCKLPSZ128rmkz:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz);
+ case X86::VUNPCKLPSZ256rmkz:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz);
+ case X86::VUNPCKLPSZrmkz:
+ return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz);
+ case X86::UNPCKHPSrm:
+ return ProcessUNPCKPS(X86::PUNPCKHDQrm);
+ case X86::VUNPCKHPSrm:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQrm);
+ case X86::VUNPCKHPSYrm:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQYrm);
+ case X86::VUNPCKHPSZ128rm:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm);
+ case X86::VUNPCKHPSZ256rm:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm);
+ case X86::VUNPCKHPSZrm:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZrm);
+ case X86::VUNPCKHPSZ128rmk:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk);
+ case X86::VUNPCKHPSZ256rmk:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk);
+ case X86::VUNPCKHPSZrmk:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk);
+ case X86::VUNPCKHPSZ128rmkz:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz);
+ case X86::VUNPCKHPSZ256rmkz:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz);
+ case X86::VUNPCKHPSZrmkz:
+ return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz);
+ default:
+ return false;
+ }
+}
+
+bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";);
+ bool Changed = false;
+ ST = &MF.getSubtarget<X86Subtarget>();
+ TII = ST->getInstrInfo();
+ SM = &ST->getSchedModel();
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
+ if (processInstruction(MF, MBB, I)) {
+ ++NumInstChanges;
+ Changed = true;
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";);
+ return Changed;
+}
diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp
index b01145809ac6..c702c015d7b3 100644
--- a/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -186,13 +186,9 @@ FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
// Only convert instructions that we've verified are safe.
return nullptr;
case X86::ADD64ri32:
- case X86::ADD64ri8:
case X86::ADD64ri32_DB:
- case X86::ADD64ri8_DB:
case X86::ADD32ri:
- case X86::ADD32ri8:
case X86::ADD32ri_DB:
- case X86::ADD32ri8_DB:
if (!MI.getOperand(2).isImm()) {
// convertToThreeAddress will call getImm()
// which requires isImm() to be true
@@ -374,15 +370,14 @@ static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {
static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
const MachineOperand &Offset) {
- bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
switch (LEAOpcode) {
default:
llvm_unreachable("Unexpected LEA instruction");
case X86::LEA32r:
case X86::LEA64_32r:
- return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
+ return X86::ADD32ri;
case X86::LEA64r:
- return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
+ return X86::ADD64ri32;
}
}
@@ -463,10 +458,8 @@ void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();
Register AluDestReg = AluI->getOperand(0).getReg();
- MachineBasicBlock::iterator CurInst = std::next(LeaI);
- while (CurInst != AluI) {
- for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {
- MachineOperand &Opnd = CurInst->getOperand(I);
+ for (MachineInstr &CurInst : llvm::make_range(std::next(LeaI), AluI)) {
+ for (MachineOperand &Opnd : CurInst.operands()) {
if (!Opnd.isReg())
continue;
Register Reg = Opnd.getReg();
@@ -485,7 +478,6 @@ void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,
*KilledIndex = &Opnd;
}
}
- ++CurInst;
}
}
@@ -786,12 +778,34 @@ void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
MachineInstr *NewMI = nullptr;
+ bool BaseOrIndexIsDst = DestReg == BaseReg || DestReg == IndexReg;
+ // First try and remove the base while sticking with LEA iff base == index and
+ // scale == 1. We can handle:
+ // 1. lea D(%base,%index,1) -> lea D(,%index,2)
+ // 2. lea D(%r13/%rbp,%index) -> lea D(,%index,2)
+ // Only do this if the LEA would otherwise be split into 2-instruction
+ // (either it has a an Offset or neither base nor index are dst)
+ if (IsScale1 && BaseReg == IndexReg &&
+ (hasLEAOffset(Offset) || (IsInefficientBase && !BaseOrIndexIsDst))) {
+ NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
+ .add(Dest)
+ .addReg(0)
+ .addImm(2)
+ .add(Index)
+ .add(Offset)
+ .add(Segment);
+ LLVM_DEBUG(NewMI->dump(););
+
+ MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
+ MBB.erase(I);
+ I = NewMI;
+ return;
+ } else if (IsScale1 && BaseOrIndexIsDst) {
+ // Try to replace LEA with one or two (for the 3-op LEA case)
+ // add instructions:
+ // 1.lea (%base,%index,1), %base => add %index,%base
+ // 2.lea (%base,%index,1), %index => add %base,%index
- // First try to replace LEA with one or two (for the 3-op LEA case)
- // add instructions:
- // 1.lea (%base,%index,1), %base => add %index,%base
- // 2.lea (%base,%index,1), %index => add %base,%index
- if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
if (DestReg != BaseReg)
std::swap(BaseReg, IndexReg);
diff --git a/llvm/lib/Target/X86/X86FixupVectorConstants.cpp b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
new file mode 100644
index 000000000000..94e221fd877c
--- /dev/null
+++ b/llvm/lib/Target/X86/X86FixupVectorConstants.cpp
@@ -0,0 +1,398 @@
+//===-- X86FixupVectorConstants.cpp - optimize constant generation -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file examines all full size vector constant pool loads and attempts to
+// replace them with smaller constant pool entries, including:
+// * Converting AVX512 memory-fold instructions to their broadcast-fold form
+// * TODO: Broadcasting of full width loads.
+// * TODO: Sign/Zero extension of full width loads.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86.h"
+#include "X86InstrFoldTables.h"
+#include "X86InstrInfo.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "x86-fixup-vector-constants"
+
+STATISTIC(NumInstChanges, "Number of instructions changes");
+
+namespace {
+class X86FixupVectorConstantsPass : public MachineFunctionPass {
+public:
+ static char ID;
+
+ X86FixupVectorConstantsPass() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "X86 Fixup Vector Constants";
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineInstr &MI);
+
+ // This pass runs after regalloc and doesn't support VReg operands.
+ MachineFunctionProperties getRequiredProperties() const override {
+ return MachineFunctionProperties().set(
+ MachineFunctionProperties::Property::NoVRegs);
+ }
+
+private:
+ const X86InstrInfo *TII = nullptr;
+ const X86Subtarget *ST = nullptr;
+ const MCSchedModel *SM = nullptr;
+};
+} // end anonymous namespace
+
+char X86FixupVectorConstantsPass::ID = 0;
+
+INITIALIZE_PASS(X86FixupVectorConstantsPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
+
+FunctionPass *llvm::createX86FixupVectorConstants() {
+ return new X86FixupVectorConstantsPass();
+}
+
+static const Constant *getConstantFromPool(const MachineInstr &MI,
+ const MachineOperand &Op) {
+ if (!Op.isCPI() || Op.getOffset() != 0)
+ return nullptr;
+
+ ArrayRef<MachineConstantPoolEntry> Constants =
+ MI.getParent()->getParent()->getConstantPool()->getConstants();
+ const MachineConstantPoolEntry &ConstantEntry = Constants[Op.getIndex()];
+
+ // Bail if this is a machine constant pool entry, we won't be able to dig out
+ // anything useful.
+ if (ConstantEntry.isMachineConstantPoolEntry())
+ return nullptr;
+
+ return ConstantEntry.Val.ConstVal;
+}
+
+// Attempt to extract the full width of bits data from the constant.
+static std::optional<APInt> extractConstantBits(const Constant *C) {
+ unsigned NumBits = C->getType()->getPrimitiveSizeInBits();
+
+ if (auto *CInt = dyn_cast<ConstantInt>(C))
+ return CInt->getValue();
+
+ if (auto *CFP = dyn_cast<ConstantFP>(C))
+ return CFP->getValue().bitcastToAPInt();
+
+ if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ if (auto *CVSplat = CV->getSplatValue(/*AllowUndefs*/ true)) {
+ if (std::optional<APInt> Bits = extractConstantBits(CVSplat)) {
+ assert((NumBits % Bits->getBitWidth()) == 0 && "Illegal splat");
+ return APInt::getSplat(NumBits, *Bits);
+ }
+ }
+ }
+
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(C)) {
+ bool IsInteger = CDS->getElementType()->isIntegerTy();
+ bool IsFloat = CDS->getElementType()->isHalfTy() ||
+ CDS->getElementType()->isBFloatTy() ||
+ CDS->getElementType()->isFloatTy() ||
+ CDS->getElementType()->isDoubleTy();
+ if (IsInteger || IsFloat) {
+ APInt Bits = APInt::getZero(NumBits);
+ unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits();
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
+ if (IsInteger)
+ Bits.insertBits(CDS->getElementAsAPInt(I), I * EltBits);
+ else
+ Bits.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(),
+ I * EltBits);
+ }
+ return Bits;
+ }
+ }
+
+ return std::nullopt;
+}
+
+// Attempt to compute the splat width of bits data by normalizing the splat to
+// remove undefs.
+static std::optional<APInt> getSplatableConstant(const Constant *C,
+ unsigned SplatBitWidth) {
+ const Type *Ty = C->getType();
+ assert((Ty->getPrimitiveSizeInBits() % SplatBitWidth) == 0 &&
+ "Illegal splat width");
+
+ if (std::optional<APInt> Bits = extractConstantBits(C))
+ if (Bits->isSplat(SplatBitWidth))
+ return Bits->trunc(SplatBitWidth);
+
+ // Detect general splats with undefs.
+ // TODO: Do we need to handle NumEltsBits > SplatBitWidth splitting?
+ if (auto *CV = dyn_cast<ConstantVector>(C)) {
+ unsigned NumOps = CV->getNumOperands();
+ unsigned NumEltsBits = Ty->getScalarSizeInBits();
+ unsigned NumScaleOps = SplatBitWidth / NumEltsBits;
+ if ((SplatBitWidth % NumEltsBits) == 0) {
+ // Collect the elements and ensure that within the repeated splat sequence
+ // they either match or are undef.
+ SmallVector<Constant *, 16> Sequence(NumScaleOps, nullptr);
+ for (unsigned Idx = 0; Idx != NumOps; ++Idx) {
+ if (Constant *Elt = CV->getAggregateElement(Idx)) {
+ if (isa<UndefValue>(Elt))
+ continue;
+ unsigned SplatIdx = Idx % NumScaleOps;
+ if (!Sequence[SplatIdx] || Sequence[SplatIdx] == Elt) {
+ Sequence[SplatIdx] = Elt;
+ continue;
+ }
+ }
+ return std::nullopt;
+ }
+ // Extract the constant bits forming the splat and insert into the bits
+ // data, leave undef as zero.
+ APInt SplatBits = APInt::getZero(SplatBitWidth);
+ for (unsigned I = 0; I != NumScaleOps; ++I) {
+ if (!Sequence[I])
+ continue;
+ if (std::optional<APInt> Bits = extractConstantBits(Sequence[I])) {
+ SplatBits.insertBits(*Bits, I * Bits->getBitWidth());
+ continue;
+ }
+ return std::nullopt;
+ }
+ return SplatBits;
+ }
+ }
+
+ return std::nullopt;
+}
+
+// Attempt to rebuild a normalized splat vector constant of the requested splat
+// width, built up of potentially smaller scalar values.
+// NOTE: We don't always bother converting to scalars if the vector length is 1.
+static Constant *rebuildSplatableConstant(const Constant *C,
+ unsigned SplatBitWidth) {
+ std::optional<APInt> Splat = getSplatableConstant(C, SplatBitWidth);
+ if (!Splat)
+ return nullptr;
+
+ // Determine scalar size to use for the constant splat vector, clamping as we
+ // might have found a splat smaller than the original constant data.
+ const Type *OriginalType = C->getType();
+ Type *SclTy = OriginalType->getScalarType();
+ unsigned NumSclBits = SclTy->getPrimitiveSizeInBits();
+ NumSclBits = std::min<unsigned>(NumSclBits, SplatBitWidth);
+
+ if (NumSclBits == 8) {
+ SmallVector<uint8_t> RawBits;
+ for (unsigned I = 0; I != SplatBitWidth; I += 8)
+ RawBits.push_back(Splat->extractBits(8, I).getZExtValue());
+ return ConstantDataVector::get(OriginalType->getContext(), RawBits);
+ }
+
+ if (NumSclBits == 16) {
+ SmallVector<uint16_t> RawBits;
+ for (unsigned I = 0; I != SplatBitWidth; I += 16)
+ RawBits.push_back(Splat->extractBits(16, I).getZExtValue());
+ if (SclTy->is16bitFPTy())
+ return ConstantDataVector::getFP(SclTy, RawBits);
+ return ConstantDataVector::get(OriginalType->getContext(), RawBits);
+ }
+
+ if (NumSclBits == 32) {
+ SmallVector<uint32_t> RawBits;
+ for (unsigned I = 0; I != SplatBitWidth; I += 32)
+ RawBits.push_back(Splat->extractBits(32, I).getZExtValue());
+ if (SclTy->isFloatTy())
+ return ConstantDataVector::getFP(SclTy, RawBits);
+ return ConstantDataVector::get(OriginalType->getContext(), RawBits);
+ }
+
+ // Fallback to i64 / double.
+ SmallVector<uint64_t> RawBits;
+ for (unsigned I = 0; I != SplatBitWidth; I += 64)
+ RawBits.push_back(Splat->extractBits(64, I).getZExtValue());
+ if (SclTy->isDoubleTy())
+ return ConstantDataVector::getFP(SclTy, RawBits);
+ return ConstantDataVector::get(OriginalType->getContext(), RawBits);
+}
+
+bool X86FixupVectorConstantsPass::processInstruction(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineInstr &MI) {
+ unsigned Opc = MI.getOpcode();
+ MachineConstantPool *CP = MI.getParent()->getParent()->getConstantPool();
+ bool HasDQI = ST->hasDQI();
+ bool HasBWI = ST->hasBWI();
+
+ auto ConvertToBroadcast = [&](unsigned OpBcst256, unsigned OpBcst128,
+ unsigned OpBcst64, unsigned OpBcst32,
+ unsigned OpBcst16, unsigned OpBcst8,
+ unsigned OperandNo) {
+ assert(MI.getNumOperands() >= (OperandNo + X86::AddrNumOperands) &&
+ "Unexpected number of operands!");
+
+ MachineOperand &CstOp = MI.getOperand(OperandNo + X86::AddrDisp);
+ if (auto *C = getConstantFromPool(MI, CstOp)) {
+ // Attempt to detect a suitable splat from increasing splat widths.
+ std::pair<unsigned, unsigned> Broadcasts[] = {
+ {8, OpBcst8}, {16, OpBcst16}, {32, OpBcst32},
+ {64, OpBcst64}, {128, OpBcst128}, {256, OpBcst256},
+ };
+ for (auto [BitWidth, OpBcst] : Broadcasts) {
+ if (OpBcst) {
+ // Construct a suitable splat constant and adjust the MI to
+ // use the new constant pool entry.
+ if (Constant *NewCst = rebuildSplatableConstant(C, BitWidth)) {
+ unsigned NewCPI =
+ CP->getConstantPoolIndex(NewCst, Align(BitWidth / 8));
+ MI.setDesc(TII->get(OpBcst));
+ CstOp.setIndex(NewCPI);
+ return true;
+ }
+ }
+ }
+ }
+ return false;
+ };
+
+ // Attempt to convert full width vector loads into broadcast loads.
+ switch (Opc) {
+ /* FP Loads */
+ case X86::MOVAPDrm:
+ case X86::MOVAPSrm:
+ case X86::MOVUPDrm:
+ case X86::MOVUPSrm:
+ // TODO: SSE3 MOVDDUP Handling
+ return false;
+ case X86::VMOVAPDrm:
+ case X86::VMOVAPSrm:
+ case X86::VMOVUPDrm:
+ case X86::VMOVUPSrm:
+ return ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0,
+ 1);
+ case X86::VMOVAPDYrm:
+ case X86::VMOVAPSYrm:
+ case X86::VMOVUPDYrm:
+ case X86::VMOVUPSYrm:
+ return ConvertToBroadcast(0, X86::VBROADCASTF128, X86::VBROADCASTSDYrm,
+ X86::VBROADCASTSSYrm, 0, 0, 1);
+ case X86::VMOVAPDZ128rm:
+ case X86::VMOVAPSZ128rm:
+ case X86::VMOVUPDZ128rm:
+ case X86::VMOVUPSZ128rm:
+ return ConvertToBroadcast(0, 0, X86::VMOVDDUPZ128rm,
+ X86::VBROADCASTSSZ128rm, 0, 0, 1);
+ case X86::VMOVAPDZ256rm:
+ case X86::VMOVAPSZ256rm:
+ case X86::VMOVUPDZ256rm:
+ case X86::VMOVUPSZ256rm:
+ return ConvertToBroadcast(
+ 0, HasDQI ? X86::VBROADCASTF64X2Z128rm : X86::VBROADCASTF32X4Z256rm,
+ X86::VBROADCASTSDZ256rm, X86::VBROADCASTSSZ256rm, 0, 0, 1);
+ case X86::VMOVAPDZrm:
+ case X86::VMOVAPSZrm:
+ case X86::VMOVUPDZrm:
+ case X86::VMOVUPSZrm:
+ return ConvertToBroadcast(
+ HasDQI ? X86::VBROADCASTF32X8rm : X86::VBROADCASTF64X4rm,
+ HasDQI ? X86::VBROADCASTF64X2rm : X86::VBROADCASTF32X4rm,
+ X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0, 0, 1);
+ /* Integer Loads */
+ case X86::VMOVDQArm:
+ case X86::VMOVDQUrm:
+ if (ST->hasAVX2())
+ return ConvertToBroadcast(0, 0, X86::VPBROADCASTQrm, X86::VPBROADCASTDrm,
+ X86::VPBROADCASTWrm, X86::VPBROADCASTBrm, 1);
+ return ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0,
+ 1);
+ case X86::VMOVDQAYrm:
+ case X86::VMOVDQUYrm:
+ if (ST->hasAVX2())
+ return ConvertToBroadcast(0, X86::VBROADCASTI128, X86::VPBROADCASTQYrm,
+ X86::VPBROADCASTDYrm, X86::VPBROADCASTWYrm,
+ X86::VPBROADCASTBYrm, 1);
+ return ConvertToBroadcast(0, X86::VBROADCASTF128, X86::VBROADCASTSDYrm,
+ X86::VBROADCASTSSYrm, 0, 0, 1);
+ case X86::VMOVDQA32Z128rm:
+ case X86::VMOVDQA64Z128rm:
+ case X86::VMOVDQU32Z128rm:
+ case X86::VMOVDQU64Z128rm:
+ return ConvertToBroadcast(0, 0, X86::VPBROADCASTQZ128rm,
+ X86::VPBROADCASTDZ128rm,
+ HasBWI ? X86::VPBROADCASTWZ128rm : 0,
+ HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1);
+ case X86::VMOVDQA32Z256rm:
+ case X86::VMOVDQA64Z256rm:
+ case X86::VMOVDQU32Z256rm:
+ case X86::VMOVDQU64Z256rm:
+ return ConvertToBroadcast(
+ 0, HasDQI ? X86::VBROADCASTI64X2Z128rm : X86::VBROADCASTI32X4Z256rm,
+ X86::VPBROADCASTQZ256rm, X86::VPBROADCASTDZ256rm,
+ HasBWI ? X86::VPBROADCASTWZ256rm : 0,
+ HasBWI ? X86::VPBROADCASTBZ256rm : 0, 1);
+ case X86::VMOVDQA32Zrm:
+ case X86::VMOVDQA64Zrm:
+ case X86::VMOVDQU32Zrm:
+ case X86::VMOVDQU64Zrm:
+ return ConvertToBroadcast(
+ HasDQI ? X86::VBROADCASTI32X8rm : X86::VBROADCASTI64X4rm,
+ HasDQI ? X86::VBROADCASTI64X2rm : X86::VBROADCASTI32X4rm,
+ X86::VPBROADCASTQZrm, X86::VPBROADCASTDZrm,
+ HasBWI ? X86::VPBROADCASTWZrm : 0, HasBWI ? X86::VPBROADCASTBZrm : 0,
+ 1);
+ }
+
+ // Attempt to find a AVX512 mapping from a full width memory-fold instruction
+ // to a broadcast-fold instruction variant.
+ if ((MI.getDesc().TSFlags & X86II::EncodingMask) == X86II::EVEX) {
+ unsigned OpBcst32 = 0, OpBcst64 = 0;
+ unsigned OpNoBcst32 = 0, OpNoBcst64 = 0;
+ if (const X86MemoryFoldTableEntry *Mem2Bcst =
+ llvm::lookupBroadcastFoldTable(Opc, 32)) {
+ OpBcst32 = Mem2Bcst->DstOp;
+ OpNoBcst32 = Mem2Bcst->Flags & TB_INDEX_MASK;
+ }
+ if (const X86MemoryFoldTableEntry *Mem2Bcst =
+ llvm::lookupBroadcastFoldTable(Opc, 64)) {
+ OpBcst64 = Mem2Bcst->DstOp;
+ OpNoBcst64 = Mem2Bcst->Flags & TB_INDEX_MASK;
+ }
+ assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) &&
+ "OperandNo mismatch");
+
+ if (OpBcst32 || OpBcst64) {
+ unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
+ return ConvertToBroadcast(0, 0, OpBcst64, OpBcst32, 0, 0, OpNo);
+ }
+ }
+
+ return false;
+}
+
+bool X86FixupVectorConstantsPass::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "Start X86FixupVectorConstants\n";);
+ bool Changed = false;
+ ST = &MF.getSubtarget<X86Subtarget>();
+ TII = ST->getInstrInfo();
+ SM = &ST->getSchedModel();
+
+ for (MachineBasicBlock &MBB : MF) {
+ for (MachineInstr &MI : MBB) {
+ if (processInstruction(MF, MBB, MI)) {
+ ++NumInstChanges;
+ Changed = true;
+ }
+ }
+ }
+ LLVM_DEBUG(dbgs() << "End X86FixupVectorConstants\n";);
+ return Changed;
+}
diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 699e83c6fe1e..7513b198e604 100644
--- a/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -931,8 +931,8 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
// Produce implicit-defs for free by using killed registers.
while (Kills && Defs) {
- unsigned KReg = countTrailingZeros(Kills);
- unsigned DReg = countTrailingZeros(Defs);
+ unsigned KReg = llvm::countr_zero(Kills);
+ unsigned DReg = llvm::countr_zero(Defs);
LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg << " as imp %fp" << DReg
<< "\n");
std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]);
@@ -956,7 +956,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
// Manually kill the rest.
while (Kills) {
- unsigned KReg = countTrailingZeros(Kills);
+ unsigned KReg = llvm::countr_zero(Kills);
LLVM_DEBUG(dbgs() << "Killing %fp" << KReg << "\n");
freeStackSlotBefore(I, KReg);
Kills &= ~(1 << KReg);
@@ -964,7 +964,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
// Load zeros for all the imp-defs.
while(Defs) {
- unsigned DReg = countTrailingZeros(Defs);
+ unsigned DReg = llvm::countr_zero(Defs);
LLVM_DEBUG(dbgs() << "Defining %fp" << DReg << " as 0\n");
BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0));
pushReg(DReg);
@@ -1047,7 +1047,7 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
if (!ClobbersFPStack)
return;
- unsigned N = countTrailingOnes(STReturns);
+ unsigned N = llvm::countr_one(STReturns);
// FP registers used for function return must be consecutive starting at
// FP0
@@ -1634,14 +1634,14 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
if (STUses && !isMask_32(STUses))
MI.emitError("fixed input regs must be last on the x87 stack");
- unsigned NumSTUses = countTrailingOnes(STUses);
+ unsigned NumSTUses = llvm::countr_one(STUses);
// Defs must be contiguous from the stack top. ST0-STn.
if (STDefs && !isMask_32(STDefs)) {
MI.emitError("output regs must be last on the x87 stack");
STDefs = NextPowerOf2(STDefs) - 1;
}
- unsigned NumSTDefs = countTrailingOnes(STDefs);
+ unsigned NumSTDefs = llvm::countr_one(STDefs);
// So must the clobbered stack slots. ST0-STm, m >= n.
if (STClobbers && !isMask_32(STDefs | STClobbers))
@@ -1651,7 +1651,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
unsigned STPopped = STUses & (STDefs | STClobbers);
if (STPopped && !isMask_32(STPopped))
MI.emitError("implicitly popped regs must be last on the x87 stack");
- unsigned NumSTPopped = countTrailingOnes(STPopped);
+ unsigned NumSTPopped = llvm::countr_one(STPopped);
LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
<< NumSTPopped << ", and defines " << NumSTDefs
@@ -1727,7 +1727,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &Inst) {
// Note: this might be a non-optimal pop sequence. We might be able to do
// better by trying to pop in stack order or something.
while (FPKills) {
- unsigned FPReg = countTrailingZeros(FPKills);
+ unsigned FPReg = llvm::countr_zero(FPKills);
if (isLive(FPReg))
freeStackSlotAfter(Inst, FPReg);
FPKills &= ~(1U << FPReg);
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index 1606413c382b..a5a4f91299f3 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -19,7 +19,6 @@
#include "X86TargetMachine.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -28,11 +27,13 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/LEB128.h"
#include "llvm/Target/TargetOptions.h"
#include <cstdlib>
@@ -104,28 +105,12 @@ bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
(isWin64Prologue(MF) && MFI.hasCopyImplyingStackAdjustment()));
}
-static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
- if (IsLP64) {
- if (isInt<8>(Imm))
- return X86::SUB64ri8;
- return X86::SUB64ri32;
- } else {
- if (isInt<8>(Imm))
- return X86::SUB32ri8;
- return X86::SUB32ri;
- }
+static unsigned getSUBriOpcode(bool IsLP64) {
+ return IsLP64 ? X86::SUB64ri32 : X86::SUB32ri;
}
-static unsigned getADDriOpcode(bool IsLP64, int64_t Imm) {
- if (IsLP64) {
- if (isInt<8>(Imm))
- return X86::ADD64ri8;
- return X86::ADD64ri32;
- } else {
- if (isInt<8>(Imm))
- return X86::ADD32ri8;
- return X86::ADD32ri;
- }
+static unsigned getADDriOpcode(bool IsLP64) {
+ return IsLP64 ? X86::ADD64ri32 : X86::ADD32ri;
}
static unsigned getSUBrrOpcode(bool IsLP64) {
@@ -137,14 +122,7 @@ static unsigned getADDrrOpcode(bool IsLP64) {
}
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
- if (IsLP64) {
- if (isInt<8>(Imm))
- return X86::AND64ri8;
- return X86::AND64ri32;
- }
- if (isInt<8>(Imm))
- return X86::AND32ri8;
- return X86::AND32ri;
+ return IsLP64 ? X86::AND64ri32 : X86::AND32ri;
}
static unsigned getLEArOpcode(bool IsLP64) {
@@ -362,8 +340,8 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
} else {
bool IsSub = Offset < 0;
uint64_t AbsOffset = IsSub ? -Offset : Offset;
- const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset)
- : getADDriOpcode(Uses64BitFramePtr, AbsOffset);
+ const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr)
+ : getADDriOpcode(Uses64BitFramePtr);
MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(AbsOffset);
@@ -399,9 +377,8 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
unsigned Opc = PI->getOpcode();
int Offset = 0;
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
- Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
- PI->getOperand(0).getReg() == StackPtr){
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
+ PI->getOperand(0).getReg() == StackPtr) {
assert(PI->getOperand(1).getReg() == StackPtr);
Offset = PI->getOperand(2).getImm();
} else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
@@ -412,8 +389,7 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
PI->getOperand(5).getReg() == X86::NoRegister) {
// For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
Offset = PI->getOperand(4).getImm();
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
- Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
PI->getOperand(0).getReg() == StackPtr) {
assert(PI->getOperand(1).getReg() == StackPtr);
Offset = -PI->getOperand(2).getImm();
@@ -441,6 +417,10 @@ void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
MachineInstr::MIFlag Flag) const {
MachineFunction &MF = *MBB.getParent();
unsigned CFIIndex = MF.addFrameInst(CFIInst);
+
+ if (CFIInst.getOperation() == MCCFIInstruction::OpAdjustCfaOffset)
+ MF.getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
+
BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlag(Flag);
@@ -476,6 +456,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
MachineFrameInfo &MFI = MF.getFrameInfo();
MachineModuleInfo &MMI = MF.getMMI();
const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+ X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
// Add callee saved registers to move list.
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
@@ -487,13 +468,62 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
if (IsPrologue) {
- BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+ if (X86FI->getStackPtrSaveMI()) {
+ // +2*SlotSize because there is return address and ebp at the bottom
+ // of the stack.
+ // | retaddr |
+ // | ebp |
+ // | |<--ebp
+ Offset += 2 * SlotSize;
+ SmallString<64> CfaExpr;
+ CfaExpr.push_back(dwarf::DW_CFA_expression);
+ uint8_t buffer[16];
+ CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
+ CfaExpr.push_back(2);
+ Register FramePtr = TRI->getFrameRegister(MF);
+ const Register MachineFramePtr =
+ STI.isTarget64BitILP32()
+ ? Register(getX86SubSuperRegister(FramePtr, 64))
+ : FramePtr;
+ unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
+ CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
+ CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
+ MachineInstr::FrameSetup);
+ } else {
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
+ }
} else {
BuildCFI(MBB, MBBI, DL,
MCCFIInstruction::createRestore(nullptr, DwarfReg));
}
}
+ if (auto *MI = X86FI->getStackPtrSaveMI()) {
+ int FI = MI->getOperand(1).getIndex();
+ int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize;
+ SmallString<64> CfaExpr;
+ Register FramePtr = TRI->getFrameRegister(MF);
+ const Register MachineFramePtr =
+ STI.isTarget64BitILP32()
+ ? Register(getX86SubSuperRegister(FramePtr, 64))
+ : FramePtr;
+ unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true);
+ CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr));
+ uint8_t buffer[16];
+ CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer));
+ CfaExpr.push_back(dwarf::DW_OP_deref);
+
+ SmallString<64> DefCfaExpr;
+ DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
+ DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer));
+ DefCfaExpr.append(CfaExpr.str());
+ // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()),
+ MachineInstr::FrameSetup);
+ }
}
void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
@@ -528,7 +558,7 @@ void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
BitVector GPRsToZero(TRI->getNumRegs());
for (MCRegister Reg : RegsToZero.set_bits())
if (TRI->isGeneralPurposeRegister(MF, Reg)) {
- GPRsToZero.set(getX86SubSuperRegisterOrZero(Reg, 32));
+ GPRsToZero.set(getX86SubSuperRegister(Reg, 32));
RegsToZero.reset(Reg);
}
@@ -782,7 +812,7 @@ void X86FrameLowering::emitStackProbeInlineGenericLoop(
// save loop bound
{
const unsigned BoundOffset = alignDown(Offset, StackProbeSize);
- const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, BoundOffset);
+ const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr);
BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed)
.addReg(FinalStackProbed)
.addImm(BoundOffset)
@@ -1205,12 +1235,20 @@ uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) con
const MachineFrameInfo &MFI = MF.getFrameInfo();
Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment.
Align StackAlign = getStackAlign();
- if (MF.getFunction().hasFnAttribute("stackrealign")) {
+ bool HasRealign = MF.getFunction().hasFnAttribute("stackrealign");
+ if (HasRealign) {
if (MFI.hasCalls())
MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
else if (MaxAlign < SlotSize)
MaxAlign = Align(SlotSize);
}
+
+ if (!Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR) {
+ if (HasRealign)
+ MaxAlign = (MaxAlign > 16) ? MaxAlign : Align(16);
+ else
+ MaxAlign = Align(16);
+ }
return MaxAlign.value();
}
@@ -1285,7 +1323,7 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
{
const unsigned SUBOpc =
- getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
+ getSUBriOpcode(Uses64BitFramePtr);
BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr)
.addReg(StackPtr)
.addImm(StackProbeSize)
@@ -1316,7 +1354,7 @@ void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB,
.setMIFlag(MachineInstr::FrameSetup);
const unsigned SUBOpc =
- getSUBriOpcode(Uses64BitFramePtr, StackProbeSize);
+ getSUBriOpcode(Uses64BitFramePtr);
BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr)
.addReg(StackPtr)
.addImm(StackProbeSize)
@@ -1509,6 +1547,42 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc DL;
+ Register ArgBaseReg;
+
+ // Emit extra prolog for argument stack slot reference.
+ if (auto *MI = X86FI->getStackPtrSaveMI()) {
+ // MI is lea instruction that created in X86ArgumentStackSlotPass.
+ // Creat extra prolog for stack realignment.
+ ArgBaseReg = MI->getOperand(0).getReg();
+ // leal 4(%esp), %basereg
+ // .cfi_def_cfa %basereg, 0
+ // andl $-128, %esp
+ // pushl -4(%basereg)
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r),
+ ArgBaseReg)
+ .addUse(StackPtr)
+ .addImm(1)
+ .addUse(X86::NoRegister)
+ .addImm(SlotSize)
+ .addUse(X86::NoRegister)
+ .setMIFlag(MachineInstr::FrameSetup);
+ if (NeedsDwarfCFI) {
+ // .cfi_def_cfa %basereg, 0
+ unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true);
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0),
+ MachineInstr::FrameSetup);
+ }
+ BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
+ int64_t Offset = -(int64_t)SlotSize;
+ BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm: X86::PUSH32rmm))
+ .addReg(ArgBaseReg)
+ .addImm(1)
+ .addReg(X86::NoRegister)
+ .addImm(Offset)
+ .addReg(X86::NoRegister)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
// Space reserved for stack-based arguments when making a (ABI-guaranteed)
// tail call.
@@ -1557,7 +1631,19 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
Fn.arg_size() == 2) {
StackSize += 8;
MFI.setStackSize(StackSize);
- emitSPUpdate(MBB, MBBI, DL, -8, /*InEpilogue=*/false);
+
+ // Update the stack pointer by pushing a register. This is the instruction
+ // emitted that would be end up being emitted by a call to `emitSPUpdate`.
+ // Hard-coding the update to a push avoids emitting a second
+ // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
+ // probing isn't needed anyways for an 8-byte update.
+ // Pushing a register leaves us in a similar situation to a regular
+ // function call where we know that the address at (rsp-8) is writeable.
+ // That way we avoid any off-by-ones with stack probing for additional
+ // stack pointer updates later on.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
+ .addReg(X86::RAX, RegState::Undef)
+ .setMIFlag(MachineInstr::FrameSetup);
}
// If this is x86-64 and the Red Zone is not disabled, if we are a leaf
@@ -1628,10 +1714,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Calculate required stack adjustment.
uint64_t FrameSize = StackSize - SlotSize;
- // If required, include space for extra hidden slot for stashing base pointer.
- if (X86FI->getRestoreBasePointer())
- FrameSize += SlotSize;
-
NumBytes = FrameSize -
(X86FI->getCalleeSavedFrameSize() + TailCallArgReserveSize);
@@ -1644,19 +1726,21 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.addReg(MachineFramePtr, RegState::Kill)
.setMIFlag(MachineInstr::FrameSetup);
- if (NeedsDwarfCFI) {
+ if (NeedsDwarfCFI && !ArgBaseReg.isValid()) {
// Mark the place where EBP/RBP was saved.
// Define the current CFA rule to use the provided offset.
assert(StackSize);
BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth),
+ MCCFIInstruction::cfiDefCfaOffset(
+ nullptr, -2 * stackGrowth + (int)TailCallArgReserveSize),
MachineInstr::FrameSetup);
// Change the rule for the FramePtr to be an "offset" rule.
unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
BuildCFI(MBB, MBBI, DL,
MCCFIInstruction::createOffset(nullptr, DwarfFramePtr,
- 2 * stackGrowth),
+ 2 * stackGrowth -
+ (int)TailCallArgReserveSize),
MachineInstr::FrameSetup);
}
@@ -1684,7 +1768,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
} else {
// No initial context, store null so that there's no pointer that
// could be misused.
- BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8))
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i32))
.addImm(0)
.setMIFlag(MachineInstr::FrameSetup);
}
@@ -1703,7 +1787,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.addImm(8)
.addUse(X86::NoRegister)
.setMIFlag(MachineInstr::FrameSetup);
- BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP)
+ BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri32), X86::RSP)
.addUse(X86::RSP)
.addImm(8)
.setMIFlag(MachineInstr::FrameSetup);
@@ -1719,13 +1803,28 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
if (NeedsDwarfCFI) {
- // Mark effective beginning of when frame pointer becomes valid.
- // Define the current CFA to use the EBP/RBP register.
- unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
- BuildCFI(
- MBB, MBBI, DL,
- MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
- MachineInstr::FrameSetup);
+ if (ArgBaseReg.isValid()) {
+ SmallString<64> CfaExpr;
+ CfaExpr.push_back(dwarf::DW_CFA_expression);
+ uint8_t buffer[16];
+ unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true);
+ CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
+ CfaExpr.push_back(2);
+ CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
+ CfaExpr.push_back(0);
+ // DW_CFA_expression: reg5 DW_OP_breg5 +0
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::createEscape(nullptr, CfaExpr.str()),
+ MachineInstr::FrameSetup);
+ } else {
+ // Mark effective beginning of when frame pointer becomes valid.
+ // Define the current CFA to use the EBP/RBP register.
+ unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
+ BuildCFI(
+ MBB, MBBI, DL,
+ MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr),
+ MachineInstr::FrameSetup);
+ }
}
if (NeedsWinFPO) {
@@ -1792,7 +1891,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
// Realign stack after we pushed callee-saved registers (so that we'll be
// able to calculate their offsets from the frame pointer).
// Don't do this for Win64, it needs to realign the stack after the prologue.
- if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) {
+ if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) &&
+ !ArgBaseReg.isValid()) {
assert(HasFP && "There should be a frame pointer if stack is realigned.");
BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign);
@@ -2050,6 +2150,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
.setMIFlag(MachineInstr::FrameSetup);
}
}
+ if (ArgBaseReg.isValid()) {
+ // Save argument base pointer.
+ auto *MI = X86FI->getStackPtrSaveMI();
+ int FI = MI->getOperand(1).getIndex();
+ unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
+ // movl %basereg, offset(%ebp)
+ addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI)
+ .addReg(ArgBaseReg)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
// Mark end of stack pointer adjustment.
@@ -2198,6 +2308,34 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
!MF.getTarget().getTargetTriple().isOSWindows()) &&
MF.needsFrameMoves();
+ Register ArgBaseReg;
+ if (auto *MI = X86FI->getStackPtrSaveMI()) {
+ unsigned Opc = X86::LEA32r;
+ Register StackReg = X86::ESP;
+ ArgBaseReg = MI->getOperand(0).getReg();
+ if (STI.is64Bit()) {
+ Opc = X86::LEA64r;
+ StackReg = X86::RSP;
+ }
+ // leal -4(%basereg), %esp
+ // .cfi_def_cfa %esp, 4
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg)
+ .addUse(ArgBaseReg)
+ .addImm(1)
+ .addUse(X86::NoRegister)
+ .addImm(-(int64_t)SlotSize)
+ .addUse(X86::NoRegister)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ if (NeedsDwarfCFI) {
+ unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true);
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
+ MachineInstr::FrameDestroy);
+ --MBBI;
+ }
+ --MBBI;
+ }
+
if (IsFunclet) {
assert(HasFP && "EH funclets without FP not yet implemented");
NumBytes = getWinEHFuncletFrameSize(MF);
@@ -2239,11 +2377,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
}
if (NeedsDwarfCFI) {
- unsigned DwarfStackPtr =
- TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
- BuildCFI(MBB, MBBI, DL,
- MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
- MachineInstr::FrameDestroy);
+ if (!ArgBaseReg.isValid()) {
+ unsigned DwarfStackPtr =
+ TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
+ BuildCFI(MBB, MBBI, DL,
+ MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize),
+ MachineInstr::FrameDestroy);
+ }
if (!MBB.succ_empty() && !MBB.isReturnBlock()) {
unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true);
BuildCFI(MBB, AfterPop, DL,
@@ -2266,13 +2406,22 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
(Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
(Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
- (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)))
+ (Opc != X86::ADD64ri32 || !PI->getFlag(MachineInstr::FrameDestroy)))
break;
FirstCSPop = PI;
}
--MBBI;
}
+ if (ArgBaseReg.isValid()) {
+ // Restore argument base pointer.
+ auto *MI = X86FI->getStackPtrSaveMI();
+ int FI = MI->getOperand(1).getIndex();
+ unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm;
+ // movl offset(%ebp), %basereg
+ addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
MBBI = FirstCSPop;
if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET)
@@ -2646,6 +2795,16 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
I.setFrameIdx(SlotIndex);
}
+ // Adjust the offset of spill slot as we know the accurate callee saved frame
+ // size.
+ if (X86FI->getRestoreBasePointer()) {
+ SpillSlotOffset -= SlotSize;
+ CalleeSavedFrameSize += SlotSize;
+
+ MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
+ // TODO: saving the slot index is better?
+ X86FI->setRestoreBasePointer(CalleeSavedFrameSize);
+ }
X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize);
@@ -2728,6 +2887,15 @@ bool X86FrameLowering::spillCalleeSavedRegisters(
.setMIFlag(MachineInstr::FrameSetup);
}
+ const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ if (X86FI->getRestoreBasePointer()) {
+ unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
+ Register BaseReg = this->TRI->getBaseRegister();
+ BuildMI(MBB, MI, DL, TII.get(Opc))
+ .addReg(BaseReg, getKillRegState(true))
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
// Make XMM regs spilled. X86 does not have ability of push/pop XMM.
// It can be done by spilling XMMs to stack frame.
for (const CalleeSavedInfo &I : llvm::reverse(CSI)) {
@@ -2825,6 +2993,16 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(
Register());
}
+ // Clear the stack slot for spill base pointer register.
+ MachineFunction &MF = *MBB.getParent();
+ const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ if (X86FI->getRestoreBasePointer()) {
+ unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
+ Register BaseReg = this->TRI->getBaseRegister();
+ BuildMI(MBB, MI, DL, TII.get(Opc), BaseReg)
+ .setMIFlag(MachineInstr::FrameDestroy);
+ }
+
// POP GPRs.
unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
for (const CalleeSavedInfo &I : CSI) {
@@ -3085,9 +3263,9 @@ void X86FrameLowering::adjustForSegmentedStacks(
Reg11)
.addImm(X86FI->getArgumentStackSize());
} else {
- BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
+ BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
.addImm(X86FI->getArgumentStackSize());
- BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
+ BuildMI(allocMBB, DL, TII.get(X86::PUSH32i))
.addImm(StackSize);
}
@@ -3602,7 +3780,7 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
if (UsedReg == FramePtr) {
// ADD $offset, %ebp
- unsigned ADDri = getADDriOpcode(false, EndOffset);
+ unsigned ADDri = getADDriOpcode(false);
BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr)
.addReg(FramePtr)
.addImm(EndOffset)
@@ -3637,7 +3815,23 @@ int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
Register
X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) const {
- return TRI->getDwarfRegNum(StackPtr, true);
+ return StackPtr;
+}
+
+TargetFrameLowering::DwarfFrameBase
+X86FrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
+ const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
+ Register FrameRegister = RI->getFrameRegister(MF);
+ if (getInitialCFARegister(MF) == FrameRegister &&
+ MF.getInfo<X86MachineFunctionInfo>()->hasCFIAdjustCfa()) {
+ DwarfFrameBase FrameBase;
+ FrameBase.Kind = DwarfFrameBase::CFA;
+ FrameBase.Location.Offset =
+ -MF.getFrameInfo().getStackSize() - getInitialCFAOffset(MF);
+ return FrameBase;
+ }
+
+ return DwarfFrameBase{DwarfFrameBase::Register, {FrameRegister}};
}
namespace {
@@ -3862,8 +4056,16 @@ void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const {
void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced(
MachineFunction &MF, RegScavenger *RS) const {
+ auto *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+
if (STI.is32Bit() && MF.hasEHFunclets())
restoreWinEHStackPointersInParent(MF);
+ // We have emitted prolog and epilog. Don't need stack pointer saving
+ // instruction any more.
+ if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) {
+ MI->eraseFromParent();
+ X86FI->setStackPtrSaveMI(nullptr);
+ }
}
void X86FrameLowering::restoreWinEHStackPointersInParent(
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 3b76f2950eb9..2dc9ecc6109d 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -193,6 +193,8 @@ public:
Register getInitialCFARegister(const MachineFunction &MF) const override;
+ DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const override;
+
/// Return true if the function has a redzone (accessible bytes past the
/// frame of the top of stack function) as part of it's ABI.
bool has128ByteRedZone(const MachineFunction& MF) const;
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index d69e2c3ed493..4380f8c7ae92 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -487,11 +487,11 @@ namespace {
assert(N->getOpcode() == ISD::AND && "Unexpected opcode");
const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
- if (Val.countTrailingOnes() >= Width)
+ if (Val.countr_one() >= Width)
return true;
APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero;
- return Mask.countTrailingOnes() >= Width;
+ return Mask.countr_one() >= Width;
}
/// Return an SDNode that returns the value of the global base register.
@@ -565,7 +565,7 @@ namespace {
const SDLoc &dl, MVT VT, SDNode *Node);
MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node,
- SDValue &InFlag);
+ SDValue &InGlue);
bool tryOptimizeRem8Extend(SDNode *N);
@@ -646,7 +646,7 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
case X86ISD::XOR:
case X86ISD::OR:
case ISD::ADD:
- case ISD::ADDCARRY:
+ case ISD::UADDO_CARRY:
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
@@ -1034,16 +1034,27 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
break;
}
case ISD::VSELECT: {
- // Replace VSELECT with non-mask conditions with with BLENDV.
- if (N->getOperand(0).getValueType().getVectorElementType() == MVT::i1)
+ // Replace VSELECT with non-mask conditions with with BLENDV/VPTERNLOG.
+ EVT EleVT = N->getOperand(0).getValueType().getVectorElementType();
+ if (EleVT == MVT::i1)
break;
assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!");
- SDValue Blendv =
- CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
- N->getOperand(0), N->getOperand(1), N->getOperand(2));
+ assert(N->getValueType(0).getVectorElementType() != MVT::i16 &&
+ "We can't replace VSELECT with BLENDV in vXi16!");
+ SDValue R;
+ if (Subtarget->hasVLX() && CurDAG->ComputeNumSignBits(N->getOperand(0)) ==
+ EleVT.getSizeInBits()) {
+ R = CurDAG->getNode(X86ISD::VPTERNLOG, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1), N->getOperand(2),
+ CurDAG->getTargetConstant(0xCA, SDLoc(N), MVT::i8));
+ } else {
+ R = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0),
+ N->getOperand(0), N->getOperand(1),
+ N->getOperand(2));
+ }
--I;
- CurDAG->ReplaceAllUsesWith(N, Blendv.getNode());
+ CurDAG->ReplaceAllUsesWith(N, R.getNode());
++I;
MadeChange = true;
continue;
@@ -1704,23 +1715,21 @@ bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM,
// zero-extended to 64 bits and then added it to the base address, which gives
// unwanted results when the register holds a negative value.
// For more information see http://people.redhat.com/drepper/tls.pdf
- if (auto *C = dyn_cast<ConstantSDNode>(Address)) {
- if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
- !IndirectTlsSegRefs &&
- (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
- Subtarget->isTargetFuchsia())) {
- if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
- return true;
- switch (N->getPointerInfo().getAddrSpace()) {
- case X86AS::GS:
- AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
- return false;
- case X86AS::FS:
- AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
- return false;
+ if (isNullConstant(Address) && AM.Segment.getNode() == nullptr &&
+ !IndirectTlsSegRefs &&
+ (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() ||
+ Subtarget->isTargetFuchsia())) {
+ if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32)
+ return true;
+ switch (N->getPointerInfo().getAddrSpace()) {
+ case X86AS::GS:
+ AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
+ return false;
+ case X86AS::FS:
+ AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
+ return false;
// Address space X86AS::SS is not handled here, because it is not used to
// address TLS areas.
- }
}
}
@@ -1923,14 +1932,16 @@ static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
Mask != (0xffu << ScaleLog))
return true;
+ MVT XVT = X.getSimpleValueType();
MVT VT = N.getSimpleValueType();
SDLoc DL(N);
SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
- SDValue NewMask = DAG.getConstant(0xff, DL, VT);
- SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
- SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
+ SDValue NewMask = DAG.getConstant(0xff, DL, XVT);
+ SDValue Srl = DAG.getNode(ISD::SRL, DL, XVT, X, Eight);
+ SDValue And = DAG.getNode(ISD::AND, DL, XVT, Srl, NewMask);
SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
- SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
+ SDValue Ext = DAG.getZExtOrTrunc(And, DL, VT);
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, Ext, ShlCount);
// Insert the new nodes into the topological ordering. We must do this in
// a valid topological ordering as nothing is going to go back and re-sort
@@ -1942,10 +1953,12 @@ static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
insertDAGNode(DAG, N, NewMask);
insertDAGNode(DAG, N, And);
insertDAGNode(DAG, N, ShlCount);
+ if (Ext != And)
+ insertDAGNode(DAG, N, Ext);
insertDAGNode(DAG, N, Shl);
DAG.ReplaceAllUsesWith(N, Shl);
DAG.RemoveDeadNode(N.getNode());
- AM.IndexReg = And;
+ AM.IndexReg = Ext;
AM.Scale = (1 << ScaleLog);
return false;
}
@@ -2054,8 +2067,8 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
return true;
unsigned ShiftAmt = Shift.getConstantOperandVal(1);
- unsigned MaskLZ = countLeadingZeros(Mask);
- unsigned MaskTZ = countTrailingZeros(Mask);
+ unsigned MaskLZ = llvm::countl_zero(Mask);
+ unsigned MaskTZ = llvm::countr_zero(Mask);
// The amount of shift we're trying to fit into the addressing mode is taken
// from the trailing zeros of the mask.
@@ -2066,7 +2079,8 @@ static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
if (AMShiftAmt == 0 || AMShiftAmt > 3) return true;
// We also need to ensure that mask is a continuous run of bits.
- if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
+ if (llvm::countr_one(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64)
+ return true;
// Scale the leading zero count down based on the actual size of the value.
// Also scale it down based on the size of the shift.
@@ -2154,7 +2168,7 @@ static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N,
// The amount of shift we're trying to fit into the addressing mode is taken
// from the trailing zeros of the mask.
- unsigned AMShiftAmt = countTrailingZeros(Mask);
+ unsigned AMShiftAmt = llvm::countr_zero(Mask);
// There is nothing we can do here unless the mask is removing some bits.
// Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
@@ -2488,37 +2502,70 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// match the shift as a scale factor.
if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
break;
- if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse())
- break;
- // Give up if the shift is not a valid scale factor [1,2,3].
- SDValue Shl = N.getOperand(0);
- auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
- if (!ShAmtC || ShAmtC->getZExtValue() > 3)
- break;
+ // Peek through mask: zext(and(shl(x,c1),c2))
+ SDValue Src = N.getOperand(0);
+ APInt Mask = APInt::getAllOnes(Src.getScalarValueSizeInBits());
+ if (Src.getOpcode() == ISD::AND && Src.hasOneUse())
+ if (auto *MaskC = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {
+ Mask = MaskC->getAPIntValue();
+ Src = Src.getOperand(0);
+ }
- // The narrow shift must only shift out zero bits (it must be 'nuw').
- // That makes it safe to widen to the destination type.
- APInt HighZeros = APInt::getHighBitsSet(Shl.getValueSizeInBits(),
- ShAmtC->getZExtValue());
- if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros))
- break;
+ if (Src.getOpcode() == ISD::SHL && Src.hasOneUse()) {
+ // Give up if the shift is not a valid scale factor [1,2,3].
+ SDValue ShlSrc = Src.getOperand(0);
+ SDValue ShlAmt = Src.getOperand(1);
+ auto *ShAmtC = dyn_cast<ConstantSDNode>(ShlAmt);
+ if (!ShAmtC)
+ break;
+ unsigned ShAmtV = ShAmtC->getZExtValue();
+ if (ShAmtV > 3)
+ break;
- // zext (shl nuw i8 %x, C) to i32 --> shl (zext i8 %x to i32), (zext C)
- MVT VT = N.getSimpleValueType();
- SDLoc DL(N);
- SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0));
- SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1));
+ // The narrow shift must only shift out zero bits (it must be 'nuw').
+ // That makes it safe to widen to the destination type.
+ APInt HighZeros =
+ APInt::getHighBitsSet(ShlSrc.getValueSizeInBits(), ShAmtV);
+ if (!CurDAG->MaskedValueIsZero(ShlSrc, HighZeros & Mask))
+ break;
- // Convert the shift to scale factor.
- AM.Scale = 1 << ShAmtC->getZExtValue();
- AM.IndexReg = Zext;
+ // zext (shl nuw i8 %x, C1) to i32
+ // --> shl (zext i8 %x to i32), (zext C1)
+ // zext (and (shl nuw i8 %x, C1), C2) to i32
+ // --> shl (zext i8 (and %x, C2 >> C1) to i32), (zext C1)
+ MVT SrcVT = ShlSrc.getSimpleValueType();
+ MVT VT = N.getSimpleValueType();
+ SDLoc DL(N);
+
+ SDValue Res = ShlSrc;
+ if (!Mask.isAllOnes()) {
+ Res = CurDAG->getConstant(Mask.lshr(ShAmtV), DL, SrcVT);
+ insertDAGNode(*CurDAG, N, Res);
+ Res = CurDAG->getNode(ISD::AND, DL, SrcVT, ShlSrc, Res);
+ insertDAGNode(*CurDAG, N, Res);
+ }
+ SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Res);
+ insertDAGNode(*CurDAG, N, Zext);
+ SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, ShlAmt);
+ insertDAGNode(*CurDAG, N, NewShl);
- insertDAGNode(*CurDAG, N, Zext);
- insertDAGNode(*CurDAG, N, NewShl);
- CurDAG->ReplaceAllUsesWith(N, NewShl);
- CurDAG->RemoveDeadNode(N.getNode());
- return false;
+ // Convert the shift to scale factor.
+ AM.Scale = 1 << ShAmtV;
+ AM.IndexReg = Zext;
+
+ CurDAG->ReplaceAllUsesWith(N, NewShl);
+ CurDAG->RemoveDeadNode(N.getNode());
+ return false;
+ }
+
+ // Try to fold the mask and shift into an extract and scale.
+ if (Src.getOpcode() == ISD::SRL && !Mask.isAllOnes() &&
+ !foldMaskAndShiftToExtract(*CurDAG, N, Mask.getZExtValue(), Src,
+ Src.getOperand(0), AM))
+ return false;
+
+ break;
}
}
@@ -3364,26 +3411,6 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
llvm_unreachable("Invalid opcode!");
}
};
- auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) {
- switch (Opc) {
- case X86ISD::ADD:
- return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0);
- case X86ISD::ADC:
- return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0);
- case X86ISD::SUB:
- return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0);
- case X86ISD::SBB:
- return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0);
- case X86ISD::AND:
- return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0);
- case X86ISD::OR:
- return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0);
- case X86ISD::XOR:
- return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0);
- default:
- llvm_unreachable("Invalid opcode!");
- }
- };
auto SelectImmOpcode = [SelectOpcode](unsigned Opc) {
switch (Opc) {
case X86ISD::ADD:
@@ -3432,12 +3459,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD;
}
- // First try to fit this into an Imm8 operand. If it doesn't fit, then try
- // the larger immediate operand.
- if (MemVT != MVT::i8 && isInt<8>(OperandV)) {
- Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
- NewOpc = SelectImm8Opcode(Opc);
- } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
+ if (MemVT != MVT::i64 || isInt<32>(OperandV)) {
Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT);
NewOpc = SelectImmOpcode(Opc);
}
@@ -3482,9 +3504,11 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
// b) x & ~(-1 << nbits)
// c) x & (-1 >> (32 - y))
// d) x << (32 - y) >> (32 - y)
+// e) (1 << nbits) - 1
bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
assert(
- (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) &&
+ (Node->getOpcode() == ISD::ADD || Node->getOpcode() == ISD::AND ||
+ Node->getOpcode() == ISD::SRL) &&
"Should be either an and-mask, or right-shift after clearing high bits.");
// BEXTR is BMI instruction, BZHI is BMI2 instruction. We need at least one.
@@ -3670,6 +3694,8 @@ bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) {
if (!matchLowBitMask(Mask))
return false;
}
+ } else if (matchLowBitMask(SDValue(Node, 0))) {
+ X = CurDAG->getAllOnesConstant(SDLoc(Node), NVT);
} else if (!matchPatternD(Node))
return false;
@@ -3944,7 +3970,7 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc,
MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
bool MayFoldLoad, const SDLoc &dl,
MVT VT, SDNode *Node,
- SDValue &InFlag) {
+ SDValue &InGlue) {
SDValue N0 = Node->getOperand(0);
SDValue N2 = Node->getOperand(2);
SDValue Imm = Node->getOperand(4);
@@ -3955,10 +3981,10 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm,
- N2.getOperand(0), InFlag };
+ N2.getOperand(0), InGlue };
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue);
MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
- InFlag = SDValue(CNode, 3);
+ InGlue = SDValue(CNode, 3);
// Update the chain.
ReplaceUses(N2.getValue(1), SDValue(CNode, 2));
// Record the mem-refs
@@ -3966,10 +3992,10 @@ MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc,
return CNode;
}
- SDValue Ops[] = { N0, N2, Imm, InFlag };
+ SDValue Ops[] = { N0, N2, Imm, InGlue };
SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue);
MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops);
- InFlag = SDValue(CNode, 2);
+ InGlue = SDValue(CNode, 2);
return CNode;
}
@@ -4006,7 +4032,7 @@ bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) {
NewShiftAmt = Add0;
- } else if (ShiftAmt->getOpcode() != ISD::ADD &&
+ } else if (ShiftAmt->getOpcode() != ISD::ADD && ShiftAmt.hasOneUse() &&
((Add0C && Add0C->getAPIntValue().urem(Size) == Size - 1) ||
(Add1C && Add1C->getAPIntValue().urem(Size) == Size - 1))) {
// If we are doing a NOT on just the lower bits with (Size*N-1) -/^ X
@@ -4020,7 +4046,10 @@ bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
EVT OpVT = ShiftAmt.getValueType();
- NewShiftAmt = CurDAG->getNOT(DL, Add0C == nullptr ? Add0 : Add1, OpVT);
+ SDValue AllOnes = CurDAG->getAllOnesConstant(DL, OpVT);
+ NewShiftAmt = CurDAG->getNode(ISD::XOR, DL, OpVT,
+ Add0C == nullptr ? Add0 : Add1, AllOnes);
+ insertDAGNode(*CurDAG, OrigShiftAmt, AllOnes);
insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt);
// If we are shifting by N-X where N == 0 mod Size, then just shift by
// -X to generate a NEG instead of a SUB of a constant.
@@ -4181,7 +4210,7 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) {
if (Opcode == ISD::AND) {
// Find the smallest zext this could possibly be.
unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits();
- ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U));
+ ZExtWidth = llvm::bit_ceil(std::max(ZExtWidth, 8U));
// Figure out which bits need to be zero to achieve that mask.
APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(),
@@ -4451,7 +4480,7 @@ bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
// implicit zeroing of 32 bit ops. So we should check if the lower 32 bits
// are negative too.
APInt MaskVal = And1C->getAPIntValue();
- unsigned MaskLZ = MaskVal.countLeadingZeros();
+ unsigned MaskLZ = MaskVal.countl_zero();
if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32))
return false;
@@ -4467,8 +4496,8 @@ bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) {
// If a negative constant would not allow a smaller encoding, there's no need
// to continue. Only change the constant when we know it's a win.
- unsigned MinWidth = NegMaskVal.getMinSignedBits();
- if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32))
+ unsigned MinWidth = NegMaskVal.getSignificantBits();
+ if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getSignificantBits() <= 32))
return false;
// Extend masks if we truncated above.
@@ -4869,21 +4898,21 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX;
SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg,
Node->getOperand(2), SDValue());
- SDValue InFlag = Chain.getValue(1);
+ SDValue InGlue = Chain.getValue(1);
if (IntNo == Intrinsic::x86_sse3_monitor ||
IntNo == Intrinsic::x86_monitorx) {
// Copy the other two operands to ECX and EDX.
Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3),
- InFlag);
- InFlag = Chain.getValue(1);
+ InGlue);
+ InGlue = Chain.getValue(1);
Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4),
- InFlag);
- InFlag = Chain.getValue(1);
+ InGlue);
+ InGlue = Chain.getValue(1);
}
MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
- { Chain, InFlag});
+ { Chain, InGlue});
ReplaceNode(Node, CNode);
return;
}
@@ -5042,6 +5071,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
[[fallthrough]];
case ISD::ADD:
+ if (Opcode == ISD::ADD && matchBitExtract(Node))
+ return;
+ [[fallthrough]];
case ISD::SUB: {
// Try to avoid folding immediates with multiple uses for optsize.
// This code tries to select to register form directly to avoid going
@@ -5190,7 +5222,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
std::swap(N0, N1);
}
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
MachineSDNode *CNode;
@@ -5204,7 +5236,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other);
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
- InFlag };
+ InGlue };
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
// Update the chain.
@@ -5220,7 +5252,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
else
VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
- CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag});
+ CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InGlue});
}
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
@@ -5272,14 +5304,14 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
std::swap(N0, N1);
}
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
+ SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
N0, SDValue()).getValue(1);
SDValue ResHi, ResLo;
if (foldedLoad) {
SDValue Chain;
MachineSDNode *CNode = nullptr;
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
- InFlag };
+ InGlue };
if (UseMULXHi) {
SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other);
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
@@ -5295,7 +5327,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
Chain = SDValue(CNode, 0);
- InFlag = SDValue(CNode, 1);
+ InGlue = SDValue(CNode, 1);
}
// Update the chain.
@@ -5303,7 +5335,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// Record the mem-refs
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
} else {
- SDValue Ops[] = { N1, InFlag };
+ SDValue Ops[] = { N1, InGlue };
if (UseMULXHi) {
SDVTList VTs = CurDAG->getVTList(NVT);
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
@@ -5316,7 +5348,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
} else {
SDVTList VTs = CurDAG->getVTList(MVT::Glue);
SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
- InFlag = SDValue(CNode, 0);
+ InGlue = SDValue(CNode, 0);
}
}
@@ -5325,8 +5357,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
if (!ResLo) {
assert(LoReg && "Register for low half is not defined!");
ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg,
- NVT, InFlag);
- InFlag = ResLo.getValue(2);
+ NVT, InGlue);
+ InGlue = ResLo.getValue(2);
}
ReplaceUses(SDValue(Node, 0), ResLo);
LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG);
@@ -5337,8 +5369,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
if (!ResHi) {
assert(HiReg && "Register for high half is not defined!");
ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg,
- NVT, InFlag);
- InFlag = ResHi.getValue(2);
+ NVT, InGlue);
+ InGlue = ResHi.getValue(2);
}
ReplaceUses(SDValue(Node, 1), ResHi);
LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG);
@@ -5401,7 +5433,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
bool signBitIsZero = CurDAG->SignBitIsZero(N0);
- SDValue InFlag;
+ SDValue InGlue;
if (NVT == MVT::i8) {
// Special case for div8, just use a move with zero extension to AX to
// clear the upper 8 bits (AH).
@@ -5424,15 +5456,15 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
}
Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0),
SDValue());
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
} else {
- InFlag =
+ InGlue =
CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
LoReg, N0, SDValue()).getValue(1);
if (isSigned && !signBitIsZero) {
// Sign extend the low part into the high part.
- InFlag =
- SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
+ InGlue =
+ SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InGlue),0);
} else {
// Zero out the high part, effectively zero extending the input.
SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32);
@@ -5462,24 +5494,24 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
llvm_unreachable("Unexpected division source");
}
- InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
- ClrNode, InFlag).getValue(1);
+ InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
+ ClrNode, InGlue).getValue(1);
}
}
if (foldedLoad) {
SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
- InFlag };
+ InGlue };
MachineSDNode *CNode =
CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
- InFlag = SDValue(CNode, 1);
+ InGlue = SDValue(CNode, 1);
// Update the chain.
ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
// Record the mem-refs
CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()});
} else {
- InFlag =
- SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InFlag), 0);
+ InGlue =
+ SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InGlue), 0);
}
// Prevent use of AH in a REX instruction by explicitly copying it to
@@ -5495,9 +5527,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX;
SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
- MVT::Glue, AHCopy, InFlag);
+ MVT::Glue, AHCopy, InGlue);
SDValue Result(RNode, 0);
- InFlag = SDValue(RNode, 1);
+ InGlue = SDValue(RNode, 1);
Result =
CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
@@ -5509,8 +5541,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// Copy the division (low) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- LoReg, NVT, InFlag);
- InFlag = Result.getValue(2);
+ LoReg, NVT, InGlue);
+ InGlue = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
dbgs() << '\n');
@@ -5518,8 +5550,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
// Copy the remainder (high) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
+ HiReg, NVT, InGlue);
+ InGlue = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG);
dbgs() << '\n');
@@ -5650,8 +5682,8 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
unsigned SubRegIdx;
MVT SubRegVT;
unsigned TestOpcode;
- unsigned LeadingZeros = countLeadingZeros(Mask);
- unsigned TrailingZeros = countTrailingZeros(Mask);
+ unsigned LeadingZeros = llvm::countl_zero(Mask);
+ unsigned TrailingZeros = llvm::countr_zero(Mask);
// With leading/trailing zeros, the transform is profitable if we can
// eliminate a movabsq or shrink a 32-bit immediate to 8-bit without
@@ -5833,11 +5865,11 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
break;
// Copy the two implicit register inputs.
- SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
+ SDValue InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX,
Node->getOperand(1),
SDValue()).getValue(1);
- InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
- Node->getOperand(3), InFlag).getValue(1);
+ InGlue = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX,
+ Node->getOperand(3), InGlue).getValue(1);
bool NeedIndex = !SDValue(Node, 0).use_empty();
bool NeedMask = !SDValue(Node, 1).use_empty();
@@ -5849,13 +5881,13 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr;
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm;
CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node,
- InFlag);
+ InGlue);
ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0));
}
if (NeedIndex || !NeedMask) {
unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr;
unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm;
- CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag);
+ CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InGlue);
ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
}
// Connect the flag usage to the last instruction created.
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a33ee63c877e..c14d51bb4fa5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -27,7 +27,6 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
@@ -45,6 +44,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
@@ -106,8 +106,8 @@ static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
/// Returns true if a CC can dynamically exclude a register from the list of
/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
-/// params/returns.
-static bool shouldDisableCalleeSavedRegisterCC(CallingConv::ID CC) {
+/// the return registers.
+static bool shouldDisableRetRegFromCSR(CallingConv::ID CC) {
switch (CC) {
default:
return false;
@@ -118,6 +118,13 @@ static bool shouldDisableCalleeSavedRegisterCC(CallingConv::ID CC) {
}
}
+/// Returns true if a CC can dynamically exclude a register from the list of
+/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
+/// the parameters.
+static bool shouldDisableArgRegFromCSR(CallingConv::ID CC) {
+ return CC == CallingConv::X86_RegCall;
+}
+
X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
const X86Subtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -222,6 +229,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ABS , MVT::i64 , Custom);
}
+ // Absolute difference.
+ for (auto Op : {ISD::ABDS, ISD::ABDU}) {
+ setOperationAction(Op , MVT::i8 , Custom);
+ setOperationAction(Op , MVT::i16 , Custom);
+ setOperationAction(Op , MVT::i32 , Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(Op , MVT::i64 , Custom);
+ }
+
// Signed saturation subtraction.
setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
@@ -380,6 +396,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
setOperationAction(ISD::GET_ROUNDING , MVT::i32 , Custom);
setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
+ setOperationAction(ISD::GET_FPENV_MEM , MVT::Other, Custom);
+ setOperationAction(ISD::SET_FPENV_MEM , MVT::Other, Custom);
+ setOperationAction(ISD::RESET_FPENV , MVT::Other, Custom);
}
// Promote the i8 variants and force them on up to i32 which has a shorter
@@ -513,7 +532,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow())
- setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
+ setOperationAction(ISD::PREFETCH , MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
@@ -603,6 +622,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FROUND, VT, Action);
setOperationAction(ISD::FROUNDEVEN, VT, Action);
setOperationAction(ISD::FTRUNC, VT, Action);
+ setOperationAction(ISD::FLDEXP, VT, Action);
};
if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
@@ -662,6 +682,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FMAXIMUM, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FPOW, MVT::f16, Promote);
+ setOperationAction(ISD::STRICT_FLDEXP, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FLOG, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FLOG2, MVT::f16, Promote);
setOperationAction(ISD::STRICT_FLOG10, MVT::f16, Promote);
@@ -995,6 +1016,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
+ setOperationAction(ISD::FMAXIMUM, MVT::f32, Custom);
+ setOperationAction(ISD::FMINIMUM, MVT::f32, Custom);
+
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
setOperationAction(ISD::FABS, MVT::v4f32, Custom);
setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
@@ -1031,6 +1055,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
: &X86::VR128RegClass);
+ for (auto VT : { MVT::f64, MVT::v4f32, MVT::v2f64 }) {
+ setOperationAction(ISD::FMAXIMUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUM, VT, Custom);
+ }
+
for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
setOperationAction(ISD::SDIV, VT, Custom);
@@ -1071,6 +1100,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
}
+ setOperationAction(ISD::ABDU, MVT::v16i8, Custom);
+ setOperationAction(ISD::ABDS, MVT::v16i8, Custom);
+ setOperationAction(ISD::ABDU, MVT::v8i16, Custom);
+ setOperationAction(ISD::ABDS, MVT::v8i16, Custom);
+ setOperationAction(ISD::ABDU, MVT::v4i32, Custom);
+ setOperationAction(ISD::ABDS, MVT::v4i32, Custom);
+
setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
@@ -1089,8 +1125,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
- setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::ABS, VT, Custom);
@@ -1100,6 +1134,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setCondCodeAction(ISD::SETLE, VT, Custom);
}
+ setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::v2f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f32, Custom);
+
for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
@@ -1199,9 +1240,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v2i64, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v4i64, Custom);
setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
// In the customized shift lowering, the legal v4i32/v2i64 cases
// in AVX2 will be recognized.
@@ -1267,6 +1318,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
+ for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
+ setOperationAction(ISD::ABDS, VT, Custom);
+ setOperationAction(ISD::ABDU, VT, Custom);
+ }
+
setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
setOperationAction(ISD::SADDSAT, MVT::v2i64, Custom);
setOperationAction(ISD::SSUBSAT, MVT::v2i64, Custom);
@@ -1365,6 +1421,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
+
+ setOperationAction(ISD::FMAXIMUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUM, VT, Custom);
}
// (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
@@ -1402,14 +1461,16 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// In the customized shift lowering, the legal v8i32/v4i64 cases
// in AVX2 will be recognized.
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::SRL, VT, Custom);
+ setOperationAction(ISD::SHL, VT, Custom);
+ setOperationAction(ISD::SRA, VT, Custom);
+ setOperationAction(ISD::ABDS, VT, Custom);
+ setOperationAction(ISD::ABDU, VT, Custom);
if (VT == MVT::v4i64) continue;
- setOperationAction(ISD::ROTL, VT, Custom);
- setOperationAction(ISD::ROTR, VT, Custom);
- setOperationAction(ISD::FSHL, VT, Custom);
- setOperationAction(ISD::FSHR, VT, Custom);
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
+ setOperationAction(ISD::FSHL, VT, Custom);
+ setOperationAction(ISD::FSHR, VT, Custom);
}
// These types need custom splitting if their input is a 128-bit vector.
@@ -1432,15 +1493,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ANY_EXTEND, VT, Custom);
}
- setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i16, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i32, Custom);
+ setOperationAction(ISD::TRUNCATE, MVT::v32i64, Custom);
+
setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
- setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
setOperationAction(ISD::CTPOP, VT, Custom);
setOperationAction(ISD::CTLZ, VT, Custom);
@@ -1450,6 +1511,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setCondCodeAction(ISD::SETLE, VT, Custom);
}
+ setOperationAction(ISD::SETCC, MVT::v4f64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v8f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v4f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v8f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::v8f32, Custom);
+
if (Subtarget.hasAnyFMA()) {
for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
MVT::v2f64, MVT::v4f64 }) {
@@ -1645,8 +1713,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
- setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::TRUNCATE, VT, Custom);
@@ -1662,7 +1728,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
- // This block controls legalization for 512-bit operations with 32/64 bit
+ // This block controls legalization for 512-bit operations with 8/16/32/64 bit
// elements. 512-bits can be disabled based on prefer-vector-width and
// required-vector-width function attributes.
if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
@@ -1687,6 +1753,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
+ setOperationAction(ISD::FMAXIMUM, VT, Custom);
+ setOperationAction(ISD::FMINIMUM, VT, Custom);
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FMA, VT, Legal);
@@ -1694,16 +1762,20 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
}
- for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
+ for (MVT VT : { MVT::v16i1, MVT::v16i8 }) {
setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
}
- setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Custom);
+
+ for (MVT VT : { MVT::v16i16, MVT::v16i32 }) {
+ setOperationAction(ISD::FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
+ }
+
setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Custom);
setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Custom);
@@ -1745,7 +1817,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
@@ -1816,12 +1887,22 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::ROTL, VT, Custom);
setOperationAction(ISD::ROTR, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::ABDS, VT, Custom);
+ setOperationAction(ISD::ABDU, VT, Custom);
// The condition codes aren't legal in SSE/AVX and under AVX512 we use
// setcc all the way to isel and prefer SETGT in some isel patterns.
setCondCodeAction(ISD::SETLT, VT, Custom);
setCondCodeAction(ISD::SETLE, VT, Custom);
}
+
+ setOperationAction(ISD::SETCC, MVT::v8f64, Custom);
+ setOperationAction(ISD::SETCC, MVT::v16f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v8f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, MVT::v16f32, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::v8f64, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, MVT::v16f32, Custom);
+
for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
setOperationAction(ISD::SMAX, VT, Legal);
setOperationAction(ISD::UMAX, VT, Legal);
@@ -1829,8 +1910,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMIN, VT, Legal);
setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
- setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
}
for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
@@ -2003,8 +2082,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
// This block control legalization of v32i1/v64i1 which are available with
- // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
- // useBWIRegs.
+ // AVX512BW..
if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
@@ -2069,6 +2147,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEARBYINT, VT, Legal);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::FROUND, VT, Custom);
+
setOperationAction(ISD::LOAD, VT, Legal);
setOperationAction(ISD::STORE, VT, Legal);
@@ -2083,6 +2163,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+
+ setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
+ setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
};
// AVX512_FP16 scalar operations
@@ -2091,15 +2175,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::STRICT_FREM, MVT::f16, Promote);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
- setOperationAction(ISD::SETCC, MVT::f16, Custom);
- setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
- setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
- setOperationAction(ISD::FROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FROUND, MVT::f16, Promote);
setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
+ setOperationAction(ISD::FMAXIMUM, MVT::f16, Custom);
+ setOperationAction(ISD::FMINIMUM, MVT::f16, Custom);
setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
@@ -2144,9 +2226,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Legal);
setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Legal);
-
- setOperationAction(ISD::STRICT_FSETCC, MVT::v32i1, Custom);
- setOperationAction(ISD::STRICT_FSETCCS, MVT::v32i1, Custom);
}
if (Subtarget.hasVLX()) {
@@ -2199,9 +2278,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
addRegisterClass(MVT::v8bf16, &X86::VR128XRegClass);
addRegisterClass(MVT::v16bf16, &X86::VR256XRegClass);
// We set the type action of bf16 to TypeSoftPromoteHalf, but we don't
- // provide the method to promote BUILD_VECTOR. Set the operation action
- // Custom to do the customization later.
+ // provide the method to promote BUILD_VECTOR and INSERT_VECTOR_ELT.
+ // Set the operation action Custom to do the customization later.
setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::bf16, Custom);
for (auto VT : {MVT::v8bf16, MVT::v16bf16}) {
setF16Action(VT, Expand);
setOperationAction(ISD::FADD, VT, Expand);
@@ -2209,6 +2289,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMUL, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
addLegalFPImmediate(APFloat::getZero(APFloat::BFloat()));
}
@@ -2221,6 +2302,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FMUL, MVT::v32bf16, Expand);
setOperationAction(ISD::FDIV, MVT::v32bf16, Expand);
setOperationAction(ISD::BUILD_VECTOR, MVT::v32bf16, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32bf16, Custom);
}
if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
@@ -2271,10 +2353,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Custom);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f16, Custom);
}
-
- setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
}
if (Subtarget.hasAMXTILE()) {
@@ -2307,8 +2385,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UMULO, VT, Custom);
// Support carry in as value rather than glue.
- setOperationAction(ISD::ADDCARRY, VT, Custom);
- setOperationAction(ISD::SUBCARRY, VT, Custom);
+ setOperationAction(ISD::UADDO_CARRY, VT, Custom);
+ setOperationAction(ISD::USUBO_CARRY, VT, Custom);
setOperationAction(ISD::SETCCCARRY, VT, Custom);
setOperationAction(ISD::SADDO_CARRY, VT, Custom);
setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
@@ -2544,7 +2622,7 @@ MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
return getRegisterTypeForCallingConv(Context, CC,
- VT.changeVectorElementTypeToInteger());
+ VT.changeVectorElementType(MVT::f16));
return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
}
@@ -2579,7 +2657,7 @@ unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
return getNumRegistersForCallingConv(Context, CC,
- VT.changeVectorElementTypeToInteger());
+ VT.changeVectorElementType(MVT::f16));
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
}
@@ -2608,6 +2686,10 @@ unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
return 2;
}
+ // Split vNbf16 vectors according to vNf16.
+ if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
+ VT = VT.changeVectorElementType(MVT::f16);
+
return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
NumIntermediates, RegisterVT);
}
@@ -2984,42 +3066,44 @@ Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
// tcbhead_t; use it instead of the usual global variable (see
// sysdeps/{i386,x86_64}/nptl/tls.h)
if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
- if (Subtarget.isTargetFuchsia()) {
- // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
- return SegmentOffset(IRB, 0x10, getAddressSpace());
- } else {
- unsigned AddressSpace = getAddressSpace();
- Module *M = IRB.GetInsertBlock()->getParent()->getParent();
- // Specially, some users may customize the base reg and offset.
- int Offset = M->getStackProtectorGuardOffset();
- // If we don't set -stack-protector-guard-offset value:
- // %fs:0x28, unless we're using a Kernel code model, in which case
- // it's %gs:0x28. gs:0x14 on i386.
- if (Offset == INT_MAX)
- Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
-
- StringRef GuardReg = M->getStackProtectorGuardReg();
- if (GuardReg == "fs")
- AddressSpace = X86AS::FS;
- else if (GuardReg == "gs")
- AddressSpace = X86AS::GS;
-
- // Use symbol guard if user specify.
- StringRef GuardSymb = M->getStackProtectorGuardSymbol();
- if (!GuardSymb.empty()) {
- GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
- if (!GV) {
- Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
- : Type::getInt32Ty(M->getContext());
- GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
- nullptr, GuardSymb, nullptr,
- GlobalValue::NotThreadLocal, AddressSpace);
- }
- return GV;
- }
-
- return SegmentOffset(IRB, Offset, AddressSpace);
- }
+ unsigned AddressSpace = getAddressSpace();
+
+ // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
+ if (Subtarget.isTargetFuchsia())
+ return SegmentOffset(IRB, 0x10, AddressSpace);
+
+ Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+ // Specially, some users may customize the base reg and offset.
+ int Offset = M->getStackProtectorGuardOffset();
+ // If we don't set -stack-protector-guard-offset value:
+ // %fs:0x28, unless we're using a Kernel code model, in which case
+ // it's %gs:0x28. gs:0x14 on i386.
+ if (Offset == INT_MAX)
+ Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
+
+ StringRef GuardReg = M->getStackProtectorGuardReg();
+ if (GuardReg == "fs")
+ AddressSpace = X86AS::FS;
+ else if (GuardReg == "gs")
+ AddressSpace = X86AS::GS;
+
+ // Use symbol guard if user specify.
+ StringRef GuardSymb = M->getStackProtectorGuardSymbol();
+ if (!GuardSymb.empty()) {
+ GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
+ if (!GV) {
+ Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
+ : Type::getInt32Ty(M->getContext());
+ GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
+ nullptr, GuardSymb, nullptr,
+ GlobalValue::NotThreadLocal, AddressSpace);
+ if (!Subtarget.isTargetDarwin())
+ GV->setDSOLocal(M->getDirectAccessExternalData());
+ }
+ return GV;
+ }
+
+ return SegmentOffset(IRB, Offset, AddressSpace);
}
return TargetLowering::getIRStackGuard(IRB);
}
@@ -3111,15 +3195,22 @@ const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
return ScratchRegs;
}
+ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
+ // FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit
+ // tests at the moment, which is not what we expected.
+ static const MCPhysReg RCRegs[] = {X86::MXCSR};
+ return RCRegs;
+}
+
/// Lowers masks values (v*i1) to the local register values
/// \returns DAG node after lowering to register type
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
- const SDLoc &Dl, SelectionDAG &DAG) {
+ const SDLoc &DL, SelectionDAG &DAG) {
EVT ValVT = ValArg.getValueType();
if (ValVT == MVT::v1i1)
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
- DAG.getIntPtrConstant(0, Dl));
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
+ DAG.getIntPtrConstant(0, DL));
if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
(ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
@@ -3129,7 +3220,7 @@ static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
if (ValLoc == MVT::i32)
- ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
+ ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
return ValToCopy;
}
@@ -3140,12 +3231,12 @@ static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
return DAG.getBitcast(ValLoc, ValArg);
}
- return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
+ return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
}
/// Breaks v64i1 value into two registers and adds the new node to the DAG
static void Passv64i1ArgInRegs(
- const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
+ const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
CCValAssign &NextVA, const X86Subtarget &Subtarget) {
assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
@@ -3159,10 +3250,7 @@ static void Passv64i1ArgInRegs(
// Splitting the value into two i32 types
SDValue Lo, Hi;
- Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
- DAG.getConstant(0, Dl, MVT::i32));
- Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
- DAG.getConstant(1, Dl, MVT::i32));
+ std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
// Attach the two i32 types into corresponding registers
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
@@ -3179,9 +3267,10 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
// In some cases we need to disable registers from the default CSR list.
- // For example, when they are used for argument passing.
+ // For example, when they are used as return registers (preserve_* and X86's
+ // regcall) or for argument passing (X86's regcall).
bool ShouldDisableCalleeSavedRegister =
- shouldDisableCalleeSavedRegisterCC(CallConv) ||
+ shouldDisableRetRegFromCSR(CallConv) ||
MF.getFunction().hasFnAttribute("no_caller_saved_registers");
if (CallConv == CallingConv::X86_INTR && !Outs.empty())
@@ -3279,7 +3368,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
}
}
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 6> RetOps;
RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
// Operand #1 = Bytes To Pop
@@ -3293,8 +3382,8 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
continue; // Don't emit a copytoreg.
}
- Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
+ Glue = Chain.getValue(1);
RetOps.push_back(
DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
}
@@ -3337,15 +3426,19 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
Register RetValReg
= (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
X86::RAX : X86::EAX;
- Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
- Flag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
+ Glue = Chain.getValue(1);
// RAX/EAX now acts like a return value.
RetOps.push_back(
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
- // Add the returned register to the CalleeSaveDisableRegs list.
- if (ShouldDisableCalleeSavedRegister)
+ // Add the returned register to the CalleeSaveDisableRegs list. Don't do
+ // this however for preserve_most/preserve_all to minimize the number of
+ // callee-saved registers for these CCs.
+ if (ShouldDisableCalleeSavedRegister &&
+ CallConv != CallingConv::PreserveAll &&
+ CallConv != CallingConv::PreserveMost)
MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
}
@@ -3363,11 +3456,11 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
RetOps[0] = Chain; // Update chain.
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ // Add the glue if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
- X86ISD::NodeType opcode = X86ISD::RET_FLAG;
+ X86ISD::NodeType opcode = X86ISD::RET_GLUE;
if (CallConv == CallingConv::X86_INTR)
opcode = X86ISD::IRET;
return DAG.getNode(opcode, dl, MVT::Other, RetOps);
@@ -3390,7 +3483,7 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
bool HasRet = false;
for (const SDNode *U : Copy->uses()) {
- if (U->getOpcode() != X86ISD::RET_FLAG)
+ if (U->getOpcode() != X86ISD::RET_GLUE)
return false;
// If we are returning more than one value, we can definitely
// not make a tail call see PR19530
@@ -3431,15 +3524,15 @@ EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
/// \param VA The current 32 bit value that need to be assigned.
/// \param NextVA The next 32 bit value that need to be assigned.
/// \param Root The parent DAG node.
-/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
+/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
/// glue purposes. In the case the DAG is already using
/// physical register instead of virtual, we should glue
-/// our new SDValue to InFlag SDvalue.
+/// our new SDValue to InGlue SDvalue.
/// \return a new SDvalue of size 64bit.
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
SDValue &Root, SelectionDAG &DAG,
- const SDLoc &Dl, const X86Subtarget &Subtarget,
- SDValue *InFlag = nullptr) {
+ const SDLoc &DL, const X86Subtarget &Subtarget,
+ SDValue *InGlue = nullptr) {
assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
assert(Subtarget.is32Bit() && "Expecting 32 bit target");
assert(VA.getValVT() == MVT::v64i1 &&
@@ -3456,22 +3549,22 @@ static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
const TargetRegisterClass *RC = &X86::GR32RegClass;
// Read a 32 bit value from the registers.
- if (nullptr == InFlag) {
+ if (nullptr == InGlue) {
// When no physical register is present,
// create an intermediate virtual register.
Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
- ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
+ ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
- ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
+ ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
} else {
// When a physical register is available read the value from it and glue
// the reads together.
ArgValueLo =
- DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
- *InFlag = ArgValueLo.getValue(2);
+ DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
+ *InGlue = ArgValueLo.getValue(2);
ArgValueHi =
- DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
- *InFlag = ArgValueHi.getValue(2);
+ DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
+ *InGlue = ArgValueHi.getValue(2);
}
// Convert the i32 type into v32i1 type.
@@ -3481,41 +3574,41 @@ static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
// Concatenate the two values together.
- return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
}
/// The function will lower a register of various sizes (8/16/32/64)
/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
/// \returns a DAG node contains the operand after lowering to mask type.
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
- const EVT &ValLoc, const SDLoc &Dl,
+ const EVT &ValLoc, const SDLoc &DL,
SelectionDAG &DAG) {
SDValue ValReturned = ValArg;
if (ValVT == MVT::v1i1)
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
if (ValVT == MVT::v64i1) {
// In 32 bit machine, this case is handled by getv64i1Argument
assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
// In 64 bit machine, There is no need to truncate the value only bitcast
} else {
- MVT maskLen;
+ MVT MaskLenVT;
switch (ValVT.getSimpleVT().SimpleTy) {
case MVT::v8i1:
- maskLen = MVT::i8;
+ MaskLenVT = MVT::i8;
break;
case MVT::v16i1:
- maskLen = MVT::i16;
+ MaskLenVT = MVT::i16;
break;
case MVT::v32i1:
- maskLen = MVT::i32;
+ MaskLenVT = MVT::i32;
break;
default:
llvm_unreachable("Expecting a vector of i1 types");
}
- ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
+ ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
}
return DAG.getBitcast(ValVT, ValReturned);
}
@@ -3524,7 +3617,7 @@ static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
/// appropriate copies out of appropriate physical registers.
///
SDValue X86TargetLowering::LowerCallResult(
- SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
+ SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
uint32_t *RegMask) const {
@@ -3545,9 +3638,8 @@ SDValue X86TargetLowering::LowerCallResult(
// In some calling conventions we need to remove the used registers
// from the register mask.
if (RegMask) {
- for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
+ RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
}
// Report an error if there was an attempt to return FP values via XMM
@@ -3584,12 +3676,12 @@ SDValue X86TargetLowering::LowerCallResult(
assert(VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs");
Val =
- getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
+ getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
} else {
- Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
+ Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
.getValue(1);
Val = Chain.getValue(0);
- InFlag = Chain.getValue(2);
+ InGlue = Chain.getValue(2);
}
if (RoundAfterCopy)
@@ -3681,8 +3773,7 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
static bool canGuaranteeTCO(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
- CC == CallingConv::HHVM || CC == CallingConv::Tail ||
- CC == CallingConv::SwiftTail);
+ CC == CallingConv::Tail || CC == CallingConv::SwiftTail);
}
/// Return true if we might ever do TCO for calls with this calling convention.
@@ -3766,13 +3857,10 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
EVT ArgVT = Ins[i].ArgVT;
- // If this is a vector that has been split into multiple parts, and the
- // scalar size of the parts don't match the vector element size, then we can't
- // elide the copy. The parts will have padding between them instead of being
- // packed like a vector.
- bool ScalarizedAndExtendedVector =
- ArgVT.isVector() && !VA.getLocVT().isVector() &&
- VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
+ // If this is a vector that has been split into multiple parts, don't elide
+ // the copy. The layout on the stack may not match the packed in-memory
+ // layout.
+ bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
// This is an argument in memory. We might be able to perform copy elision.
// If the argument is passed directly in memory without any extension, then we
@@ -3780,7 +3868,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
// indirectly by pointer.
if (Flags.isCopyElisionCandidate() &&
VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
- !ScalarizedAndExtendedVector) {
+ !ScalarizedVector) {
SDValue PartAddr;
if (Ins[i].PartOffset == 0) {
// If this is a one-part value or the first part of a multi-part value,
@@ -3793,29 +3881,28 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
return DAG.getLoad(
ValVT, dl, Chain, PartAddr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
- } else {
- // This is not the first piece of an argument in memory. See if there is
- // already a fixed stack object including this offset. If so, assume it
- // was created by the PartOffset == 0 branch above and create a load from
- // the appropriate offset into it.
- int64_t PartBegin = VA.getLocMemOffset();
- int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
- int FI = MFI.getObjectIndexBegin();
- for (; MFI.isFixedObjectIndex(FI); ++FI) {
- int64_t ObjBegin = MFI.getObjectOffset(FI);
- int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
- if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
- break;
- }
- if (MFI.isFixedObjectIndex(FI)) {
- SDValue Addr =
- DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
- DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
- return DAG.getLoad(
- ValVT, dl, Chain, Addr,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
- Ins[i].PartOffset));
- }
+ }
+
+ // This is not the first piece of an argument in memory. See if there is
+ // already a fixed stack object including this offset. If so, assume it
+ // was created by the PartOffset == 0 branch above and create a load from
+ // the appropriate offset into it.
+ int64_t PartBegin = VA.getLocMemOffset();
+ int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
+ int FI = MFI.getObjectIndexBegin();
+ for (; MFI.isFixedObjectIndex(FI); ++FI) {
+ int64_t ObjBegin = MFI.getObjectOffset(FI);
+ int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
+ if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
+ break;
+ }
+ if (MFI.isFixedObjectIndex(FI)) {
+ SDValue Addr =
+ DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
+ DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
+ return DAG.getLoad(ValVT, dl, Chain, Addr,
+ MachinePointerInfo::getFixedStack(
+ DAG.getMachineFunction(), FI, Ins[i].PartOffset));
}
}
@@ -4240,9 +4327,11 @@ SDValue X86TargetLowering::LowerFormalArguments(
}
// If value is passed via pointer - do a load.
- if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
+ if (VA.getLocInfo() == CCValAssign::Indirect &&
+ !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
ArgValue =
DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
+ }
InVals.push_back(ArgValue);
}
@@ -4284,7 +4373,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
}
}
- unsigned StackSize = CCInfo.getNextStackOffset();
+ unsigned StackSize = CCInfo.getStackSize();
// Align stack specially for tail calls.
if (shouldGuaranteeTCO(CallConv,
MF.getTarget().Options.GuaranteedTailCallOpt))
@@ -4333,7 +4422,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
}
}
- if (shouldDisableCalleeSavedRegisterCC(CallConv) ||
+ if (shouldDisableArgRegFromCSR(CallConv) ||
F.hasFnAttribute("no_caller_saved_registers")) {
MachineRegisterInfo &MRI = MF.getRegInfo();
for (std::pair<Register, Register> Pair : MRI.liveins())
@@ -4810,12 +4899,12 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
// Build a sequence of copy-to-reg nodes chained together with token chain
- // and flag operands which copy the outgoing args into registers.
- SDValue InFlag;
+ // and glue operands which copy the outgoing args into registers.
+ SDValue InGlue;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
+ RegsToPass[i].second, InGlue);
+ InGlue = Chain.getValue(1);
}
if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
@@ -4837,13 +4926,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
}
- // Returns a chain & a flag for retval copy to use.
+ // Returns a chain & a glue for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
SmallVector<SDValue, 8> Ops;
if (!IsSibcall && isTailCall && !IsMustTail) {
- Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InFlag, dl);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
+ InGlue = Chain.getValue(1);
}
Ops.push_back(Chain);
@@ -4893,8 +4982,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
uint32_t *RegMask = nullptr;
// In some calling conventions we need to remove the used physical registers
- // from the reg mask.
- if (shouldDisableCalleeSavedRegisterCC(CallConv) || HasNCSR) {
+ // from the reg mask. Create a new RegMask for such calling conventions.
+ // RegMask for calling conventions that disable only return registers (e.g.
+ // preserve_most) will be modified later in LowerCallResult.
+ bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
+ if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
// Allocate a new Reg Mask and copy Mask.
@@ -4904,10 +4996,11 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Make sure all sub registers of the argument registers are reset
// in the RegMask.
- for (auto const &RegPair : RegsToPass)
- for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
- SubRegs.isValid(); ++SubRegs)
- RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
+ if (ShouldDisableArgRegs) {
+ for (auto const &RegPair : RegsToPass)
+ for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
+ RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
+ }
// Create the RegMask Operand according to our updated mask.
Ops.push_back(DAG.getRegisterMask(RegMask));
@@ -4916,8 +5009,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
Ops.push_back(DAG.getRegisterMask(Mask));
}
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
if (isTailCall) {
// We used to do:
@@ -4932,6 +5025,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (IsCFICall)
Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
+ DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
return Ret;
}
@@ -4960,7 +5054,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (IsCFICall)
Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
@@ -4979,16 +5073,16 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// pops that struct pointer.
NumBytesForCalleeToPop = 4;
- // Returns a flag for retval copy to use.
+ // Returns a glue for retval copy to use.
if (!IsSibcall) {
Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
- InFlag, dl);
- InFlag = Chain.getValue(1);
+ InGlue, dl);
+ InGlue = Chain.getValue(1);
}
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+ return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
InVals, RegMask);
}
@@ -5202,10 +5296,9 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
-
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
- if (!ArgLocs[i].isRegLoc())
+ for (const auto &VA : ArgLocs)
+ if (!VA.isRegLoc())
return false;
}
@@ -5213,8 +5306,8 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
// stack. Therefore, if it's not used by the call it is not safe to optimize
// this into a sibcall.
bool Unused = false;
- for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- if (!Ins[i].Used) {
+ for (const auto &In : Ins) {
+ if (!In.Used) {
Unused = true;
break;
}
@@ -5223,8 +5316,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
- for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
- CCValAssign &VA = RVLocs[i];
+ for (const auto &VA : RVLocs) {
if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
return false;
}
@@ -5258,23 +5350,23 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
CCInfo.AllocateStack(32, Align(8));
CCInfo.AnalyzeCallOperands(Outs, CC_X86);
- StackArgsSize = CCInfo.getNextStackOffset();
+ StackArgsSize = CCInfo.getStackSize();
- if (CCInfo.getNextStackOffset()) {
+ if (CCInfo.getStackSize()) {
// Check if the arguments are already laid out in the right way as
// the caller's fixed stack objects.
MachineFrameInfo &MFI = MF.getFrameInfo();
const MachineRegisterInfo *MRI = &MF.getRegInfo();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
- SDValue Arg = OutVals[i];
- ISD::ArgFlagsTy Flags = Outs[i].Flags;
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ const CCValAssign &VA = ArgLocs[I];
+ SDValue Arg = OutVals[I];
+ ISD::ArgFlagsTy Flags = Outs[I].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
if (!VA.isRegLoc()) {
- if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
- MFI, MRI, TII, VA))
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
+ TII, VA))
return false;
}
}
@@ -5294,8 +5386,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
// for the callee.
unsigned MaxInRegs = PositionIndependent ? 2 : 3;
- for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- CCValAssign &VA = ArgLocs[i];
+ for (const auto &VA : ArgLocs) {
if (!VA.isRegLoc())
continue;
Register Reg = VA.getLocReg();
@@ -5961,10 +6052,6 @@ bool X86TargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
return Subtarget.hasLZCNT();
}
-bool X86TargetLowering::hasBitPreservingFPLogic(EVT VT) const {
- return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
-}
-
bool X86TargetLowering::ShouldShrinkFPConstant(EVT VT) const {
// Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
// expensive than a straight movsd. On the other hand, it's important to
@@ -6082,8 +6169,8 @@ bool X86TargetLowering::
return NewShiftOpcode == ISD::SHL;
}
-bool X86TargetLowering::preferScalarizeSplat(unsigned Opc) const {
- return Opc != ISD::FP_EXTEND;
+bool X86TargetLowering::preferScalarizeSplat(SDNode *N) const {
+ return N->getOpcode() != ISD::FP_EXTEND;
}
bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
@@ -6161,13 +6248,21 @@ static bool isUndefOrEqual(int Val, int CmpVal) {
}
/// Return true if every element in Mask is the undef sentinel value or equal to
-/// the specified value..
+/// the specified value.
static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) {
return llvm::all_of(Mask, [CmpVal](int M) {
return (M == SM_SentinelUndef) || (M == CmpVal);
});
}
+/// Return true if every element in Mask, beginning from position Pos and ending
+/// in Pos+Size is the undef sentinel value or equal to the specified value.
+static bool isUndefOrEqualInRange(ArrayRef<int> Mask, int CmpVal, unsigned Pos,
+ unsigned Size) {
+ return llvm::all_of(Mask.slice(Pos, Size),
+ [CmpVal](int M) { return isUndefOrEqual(M, CmpVal); });
+}
+
/// Val is either the undef or zero sentinel value.
static bool isUndefOrZero(int Val) {
return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
@@ -6420,7 +6515,7 @@ static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
return ConstsNode;
}
-static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
+static SDValue getConstVector(ArrayRef<APInt> Bits, const APInt &Undefs,
MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
assert(Bits.size() == Undefs.getBitWidth() &&
"Unequal constant and undef arrays");
@@ -6461,6 +6556,12 @@ static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
return DAG.getBitcast(VT, ConstsNode);
}
+static SDValue getConstVector(ArrayRef<APInt> Bits, MVT VT,
+ SelectionDAG &DAG, const SDLoc &dl) {
+ APInt Undefs = APInt::getZero(Bits.size());
+ return getConstVector(Bits, Undefs, VT, DAG, dl);
+}
+
/// Returns a vector of specified type with all zero elements.
static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
SelectionDAG &DAG, const SDLoc &dl) {
@@ -6674,6 +6775,32 @@ static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops,
return false;
}
+// Helper to check if \p V can be split into subvectors and the upper subvectors
+// are all undef. In which case return the lower subvectors.
+static bool isUpperSubvectorUndef(SDValue V, SmallVectorImpl<SDValue> &LowerOps,
+ SelectionDAG &DAG) {
+ SmallVector<SDValue> SubOps;
+ if (!collectConcatOps(V.getNode(), SubOps, DAG))
+ return false;
+
+ unsigned NumSubOps = SubOps.size();
+ assert((NumSubOps % 2) == 0 && "Unexpected number of subvectors");
+
+ ArrayRef<SDValue> UpperOps(SubOps.begin() + (NumSubOps / 2), SubOps.end());
+ if (any_of(UpperOps, [](SDValue Op) { return !Op.isUndef(); }))
+ return false;
+
+ LowerOps.assign(SubOps.begin(), SubOps.begin() + (NumSubOps / 2));
+ return true;
+}
+
+// Helper to check if we can access all the constituent subvectors without any
+// extract ops.
+static bool isFreeToSplitVector(SDNode *N, SelectionDAG &DAG) {
+ SmallVector<SDValue> Ops;
+ return collectConcatOps(N, Ops, DAG);
+}
+
static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG,
const SDLoc &dl) {
EVT VT = Op.getValueType();
@@ -7076,6 +7203,14 @@ static SDValue getEXTEND_VECTOR_INREG(unsigned Opcode, const SDLoc &DL, EVT VT,
return DAG.getNode(Opcode, DL, VT, In);
}
+// Create OR(AND(LHS,MASK),AND(RHS,~MASK)) bit select pattern
+static SDValue getBitSelect(const SDLoc &DL, MVT VT, SDValue LHS, SDValue RHS,
+ SDValue Mask, SelectionDAG &DAG) {
+ LHS = DAG.getNode(ISD::AND, DL, VT, LHS, Mask);
+ RHS = DAG.getNode(X86ISD::ANDNP, DL, VT, Mask, RHS);
+ return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
+}
+
// Match (xor X, -1) -> X.
// Match extract_subvector(xor X, -1) -> extract_subvector(X).
// Match concat_vectors(xor X, -1, xor Y, -1) -> concat_vectors(X, Y).
@@ -7375,6 +7510,24 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
Mask = CFP->getValueAPF().bitcastToAPInt();
return true;
}
+ if (auto *CDS = dyn_cast<ConstantDataSequential>(Cst)) {
+ Type *Ty = CDS->getType();
+ Mask = APInt::getZero(Ty->getPrimitiveSizeInBits());
+ Type *EltTy = CDS->getElementType();
+ bool IsInteger = EltTy->isIntegerTy();
+ bool IsFP =
+ EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy();
+ if (!IsInteger && !IsFP)
+ return false;
+ unsigned EltBits = EltTy->getPrimitiveSizeInBits();
+ for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I)
+ if (IsInteger)
+ Mask.insertBits(CDS->getElementAsAPInt(I), I * EltBits);
+ else
+ Mask.insertBits(CDS->getElementAsAPFloat(I).bitcastToAPInt(),
+ I * EltBits);
+ return true;
+ }
return false;
};
@@ -7404,7 +7557,7 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
SmallVector<APInt> SrcEltBits;
unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
if (BV->getConstantRawBits(true, SrcEltSizeInBits, SrcEltBits, Undefs)) {
- APInt UndefSrcElts = APInt::getNullValue(SrcEltBits.size());
+ APInt UndefSrcElts = APInt::getZero(SrcEltBits.size());
for (unsigned I = 0, E = SrcEltBits.size(); I != E; ++I)
if (Undefs[I])
UndefSrcElts.setBit(I);
@@ -7436,12 +7589,12 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
if (Op.getOpcode() == X86ISD::VBROADCAST_LOAD &&
EltSizeInBits <= VT.getScalarSizeInBits()) {
auto *MemIntr = cast<MemIntrinsicSDNode>(Op);
- if (MemIntr->getMemoryVT().getScalarSizeInBits() != VT.getScalarSizeInBits())
+ if (MemIntr->getMemoryVT().getStoreSizeInBits() != VT.getScalarSizeInBits())
return false;
SDValue Ptr = MemIntr->getBasePtr();
if (const Constant *C = getTargetConstantFromBasePtr(Ptr)) {
- unsigned SrcEltSizeInBits = C->getType()->getScalarSizeInBits();
+ unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
APInt UndefSrcElts(NumSrcElts, 0);
@@ -7449,6 +7602,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
if (CollectConstantBits(C, SrcEltBits[0], UndefSrcElts, 0)) {
if (UndefSrcElts[0])
UndefSrcElts.setBits(0, NumSrcElts);
+ if (SrcEltBits[0].getBitWidth() != SrcEltSizeInBits)
+ SrcEltBits[0] = SrcEltBits[0].trunc(SrcEltSizeInBits);
SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
return CastBitData(UndefSrcElts, SrcEltBits);
}
@@ -8689,6 +8844,29 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
Mask.append(NumElts, 0);
return true;
}
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ SDValue Src = N.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ unsigned NumBitsPerSrcElt = SrcVT.getScalarSizeInBits();
+
+ // Extended source must be a simple vector.
+ if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 ||
+ (NumBitsPerSrcElt % 8) != 0)
+ return false;
+
+ // We can only handle all-signbits extensions.
+ APInt DemandedSrcElts =
+ DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements());
+ if (DAG.ComputeNumSignBits(Src, DemandedSrcElts) != NumBitsPerSrcElt)
+ return false;
+
+ assert((NumBitsPerElt % NumBitsPerSrcElt) == 0 && "Unexpected extension");
+ unsigned Scale = NumBitsPerElt / NumBitsPerSrcElt;
+ for (unsigned I = 0; I != NumElts; ++I)
+ Mask.append(Scale, I);
+ Ops.push_back(Src);
+ return true;
+ }
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND_VECTOR_INREG:
@@ -9365,19 +9543,19 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
LoadMask.setBit(i);
LastLoadedElt = i;
}
- assert((ZeroMask.countPopulation() + UndefMask.countPopulation() +
- LoadMask.countPopulation()) == NumElems &&
+ assert((ZeroMask.popcount() + UndefMask.popcount() + LoadMask.popcount()) ==
+ NumElems &&
"Incomplete element masks");
// Handle Special Cases - all undef or undef/zero.
- if (UndefMask.countPopulation() == NumElems)
+ if (UndefMask.popcount() == NumElems)
return DAG.getUNDEF(VT);
- if ((ZeroMask.countPopulation() + UndefMask.countPopulation()) == NumElems)
+ if ((ZeroMask.popcount() + UndefMask.popcount()) == NumElems)
return VT.isInteger() ? DAG.getConstant(0, DL, VT)
: DAG.getConstantFP(0.0, DL, VT);
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- int FirstLoadedElt = LoadMask.countTrailingZeros();
+ int FirstLoadedElt = LoadMask.countr_zero();
SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]);
EVT EltBaseVT = EltBase.getValueType();
assert(EltBaseVT.getSizeInBits() == EltBaseVT.getStoreSizeInBits() &&
@@ -9620,24 +9798,27 @@ static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL,
static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
unsigned SplatBitSize, LLVMContext &C) {
unsigned ScalarSize = VT.getScalarSizeInBits();
- unsigned NumElm = SplatBitSize / ScalarSize;
- SmallVector<Constant *, 32> ConstantVec;
- for (unsigned i = 0; i < NumElm; i++) {
- APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
- Constant *Const;
+ auto getConstantScalar = [&](const APInt &Val) -> Constant * {
if (VT.isFloatingPoint()) {
- if (ScalarSize == 16) {
- Const = ConstantFP::get(C, APFloat(APFloat::IEEEhalf(), Val));
- } else if (ScalarSize == 32) {
- Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
- } else {
- assert(ScalarSize == 64 && "Unsupported floating point scalar size");
- Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
- }
- } else
- Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
- ConstantVec.push_back(Const);
+ if (ScalarSize == 16)
+ return ConstantFP::get(C, APFloat(APFloat::IEEEhalf(), Val));
+ if (ScalarSize == 32)
+ return ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
+ assert(ScalarSize == 64 && "Unsupported floating point scalar size");
+ return ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
+ }
+ return Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
+ };
+
+ if (ScalarSize == SplatBitSize)
+ return getConstantScalar(SplatValue);
+
+ unsigned NumElm = SplatBitSize / ScalarSize;
+ SmallVector<Constant *, 32> ConstantVec;
+ for (unsigned I = 0; I != NumElm; ++I) {
+ APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * I);
+ ConstantVec.push_back(getConstantScalar(Val));
}
return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
}
@@ -9753,44 +9934,38 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
LLVMContext *Ctx = DAG.getContext();
MVT PVT = TLI.getPointerTy(DAG.getDataLayout());
- if (Subtarget.hasAVX()) {
- if (SplatBitSize == 32 || SplatBitSize == 64 ||
- (SplatBitSize < 32 && Subtarget.hasAVX2())) {
- // Splatted value can fit in one INTEGER constant in constant pool.
- // Load the constant and broadcast it.
- MVT CVT = MVT::getIntegerVT(SplatBitSize);
- Type *ScalarTy = Type::getIntNTy(*Ctx, SplatBitSize);
- Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue);
- SDValue CP = DAG.getConstantPool(C, PVT);
- unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
-
- Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();
- SDVTList Tys =
- DAG.getVTList(MVT::getVectorVT(CVT, Repeat), MVT::Other);
- SDValue Ops[] = {DAG.getEntryNode(), CP};
- MachinePointerInfo MPI =
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
- SDValue Brdcst = DAG.getMemIntrinsicNode(
- X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, CVT, MPI, Alignment,
- MachineMemOperand::MOLoad);
- return DAG.getBitcast(VT, Brdcst);
- }
- if (SplatBitSize > 64) {
- // Load the vector of constants and broadcast it.
- Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize,
- *Ctx);
- SDValue VCP = DAG.getConstantPool(VecC, PVT);
- unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
- MVT VVT = MVT::getVectorVT(VT.getScalarType(), NumElm);
- Align Alignment = cast<ConstantPoolSDNode>(VCP)->getAlign();
- SDVTList Tys = DAG.getVTList(VT, MVT::Other);
- SDValue Ops[] = {DAG.getEntryNode(), VCP};
- MachinePointerInfo MPI =
- MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
- return DAG.getMemIntrinsicNode(
- X86ISD::SUBV_BROADCAST_LOAD, dl, Tys, Ops, VVT, MPI, Alignment,
- MachineMemOperand::MOLoad);
- }
+ if (SplatBitSize == 32 || SplatBitSize == 64 ||
+ (SplatBitSize < 32 && Subtarget.hasAVX2())) {
+ // Load the constant scalar/subvector and broadcast it.
+ MVT CVT = MVT::getIntegerVT(SplatBitSize);
+ Constant *C = getConstantVector(VT, SplatValue, SplatBitSize, *Ctx);
+ SDValue CP = DAG.getConstantPool(C, PVT);
+ unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
+
+ Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign();
+ SDVTList Tys = DAG.getVTList(MVT::getVectorVT(CVT, Repeat), MVT::Other);
+ SDValue Ops[] = {DAG.getEntryNode(), CP};
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
+ SDValue Brdcst =
+ DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, CVT,
+ MPI, Alignment, MachineMemOperand::MOLoad);
+ return DAG.getBitcast(VT, Brdcst);
+ }
+ if (SplatBitSize > 64) {
+ // Load the vector of constants and broadcast it.
+ Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize, *Ctx);
+ SDValue VCP = DAG.getConstantPool(VecC, PVT);
+ unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
+ MVT VVT = MVT::getVectorVT(VT.getScalarType(), NumElm);
+ Align Alignment = cast<ConstantPoolSDNode>(VCP)->getAlign();
+ SDVTList Tys = DAG.getVTList(VT, MVT::Other);
+ SDValue Ops[] = {DAG.getEntryNode(), VCP};
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
+ return DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, dl, Tys,
+ Ops, VVT, MPI, Alignment,
+ MachineMemOperand::MOLoad);
}
}
@@ -11200,6 +11375,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
APInt ZeroMask = APInt::getZero(NumElems);
APInt NonZeroMask = APInt::getZero(NumElems);
bool IsAllConstants = true;
+ bool OneUseFrozenUndefs = true;
SmallSet<SDValue, 8> Values;
unsigned NumConstants = NumElems;
for (unsigned i = 0; i < NumElems; ++i) {
@@ -11208,12 +11384,13 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
UndefMask.setBit(i);
continue;
}
- if (Elt.getOpcode() == ISD::FREEZE && Elt.getOperand(0).isUndef()) {
+ if (ISD::isFreezeUndef(Elt.getNode())) {
+ OneUseFrozenUndefs = OneUseFrozenUndefs && Elt->hasOneUse();
FrozenUndefMask.setBit(i);
continue;
}
Values.insert(Elt);
- if (!isa<ConstantSDNode>(Elt) && !isa<ConstantFPSDNode>(Elt)) {
+ if (!isIntOrFPConstant(Elt)) {
IsAllConstants = false;
NumConstants--;
}
@@ -11228,12 +11405,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (UndefMask.isAllOnes())
return DAG.getUNDEF(VT);
+ // All undef/freeze(undef) vector. Return a FREEZE UNDEF.
+ if (OneUseFrozenUndefs && (UndefMask | FrozenUndefMask).isAllOnes())
+ return DAG.getFreeze(DAG.getUNDEF(VT));
+
+ // All undef/freeze(undef)/zero vector. Return a zero vector.
+ if ((UndefMask | FrozenUndefMask | ZeroMask).isAllOnes())
+ return getZeroVector(VT, Subtarget, DAG, dl);
+
// If we have multiple FREEZE-UNDEF operands, we are likely going to end up
// lowering into a suboptimal insertion sequence. Instead, thaw the UNDEF in
// our source BUILD_VECTOR, create another FREEZE-UNDEF splat BUILD_VECTOR,
// and blend the FREEZE-UNDEF operands back in.
// FIXME: is this worthwhile even for a single FREEZE-UNDEF operand?
- if (unsigned NumFrozenUndefElts = FrozenUndefMask.countPopulation();
+ if (unsigned NumFrozenUndefElts = FrozenUndefMask.popcount();
NumFrozenUndefElts >= 2 && NumFrozenUndefElts < NumElems) {
SmallVector<int, 16> BlendMask(NumElems, -1);
SmallVector<SDValue, 16> Elts(NumElems, DAG.getUNDEF(OpEltVT));
@@ -11256,18 +11441,20 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
- // If the upper elts of a ymm/zmm are undef/zero then we might be better off
- // lowering to a smaller build vector and padding with undef/zero.
+ // If the upper elts of a ymm/zmm are undef/freeze(undef)/zero then we might
+ // be better off lowering to a smaller build vector and padding with
+ // undef/zero.
if ((VT.is256BitVector() || VT.is512BitVector()) &&
!isFoldableUseOfShuffle(BV)) {
unsigned UpperElems = NumElems / 2;
- APInt UndefOrZeroMask = UndefMask | ZeroMask;
- unsigned NumUpperUndefsOrZeros = UndefOrZeroMask.countLeadingOnes();
+ APInt UndefOrZeroMask = FrozenUndefMask | UndefMask | ZeroMask;
+ unsigned NumUpperUndefsOrZeros = UndefOrZeroMask.countl_one();
if (NumUpperUndefsOrZeros >= UpperElems) {
if (VT.is512BitVector() &&
NumUpperUndefsOrZeros >= (NumElems - (NumElems / 4)))
UpperElems = NumElems - (NumElems / 4);
- bool UndefUpper = UndefMask.countLeadingOnes() >= UpperElems;
+ // If freeze(undef) is in any upper elements, force to zero.
+ bool UndefUpper = UndefMask.countl_one() >= UpperElems;
MVT LowerVT = MVT::getVectorVT(EltVT, NumElems - UpperElems);
SDValue NewBV =
DAG.getBuildVector(LowerVT, dl, Op->ops().drop_back(UpperElems));
@@ -11284,8 +11471,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (SDValue BitOp = lowerBuildVectorToBitOp(BV, Subtarget, DAG))
return BitOp;
- unsigned NumZero = ZeroMask.countPopulation();
- unsigned NumNonZero = NonZeroMask.countPopulation();
+ unsigned NumZero = ZeroMask.popcount();
+ unsigned NumNonZero = NonZeroMask.popcount();
// If we are inserting one variable into a vector of non-zero constants, try
// to avoid loading each constant element as a scalar. Load the constants as a
@@ -11349,7 +11536,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// Special case for single non-zero, non-undef, element.
if (NumNonZero == 1) {
- unsigned Idx = NonZeroMask.countTrailingZeros();
+ unsigned Idx = NonZeroMask.countr_zero();
SDValue Item = Op.getOperand(Idx);
// If we have a constant or non-constant insertion into the low element of
@@ -11415,7 +11602,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
// shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
// Check if it's possible to issue this instead.
// shuffle (vload ptr)), undef, <1, 1, 1, 1>
- unsigned Idx = NonZeroMask.countTrailingZeros();
+ unsigned Idx = NonZeroMask.countr_zero();
SDValue Item = Op.getOperand(Idx);
if (Op.getNode()->isOnlyUserOf(Item.getNode()))
return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
@@ -11484,7 +11671,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (EVTBits == 64) {
if (NumNonZero == 1) {
// One half is zero or undef.
- unsigned Idx = NonZeroMask.countTrailingZeros();
+ unsigned Idx = NonZeroMask.countr_zero();
SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
Op.getOperand(Idx));
return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG);
@@ -11620,8 +11807,13 @@ static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG,
SDValue SubVec = Op.getOperand(i);
if (SubVec.isUndef())
continue;
- if (ISD::isFreezeUndef(SubVec.getNode()) && SubVec.hasOneUse())
- ++NumFreezeUndef;
+ if (ISD::isFreezeUndef(SubVec.getNode())) {
+ // If the freeze(undef) has multiple uses then we must fold to zero.
+ if (SubVec.hasOneUse())
+ ++NumFreezeUndef;
+ else
+ ++NumZero;
+ }
else if (ISD::isBuildVectorAllZeros(SubVec.getNode()))
++NumZero;
else {
@@ -12059,13 +12251,15 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
return false;
// Don't use V1/V2 if they're not the same size as the shuffle mask type.
- if (V1 && V1.getValueSizeInBits() != VT.getSizeInBits())
+ if (V1 && (V1.getValueSizeInBits() != VT.getSizeInBits() ||
+ !V1.getValueType().isVector()))
V1 = SDValue();
- if (V2 && V2.getValueSizeInBits() != VT.getSizeInBits())
+ if (V2 && (V2.getValueSizeInBits() != VT.getSizeInBits() ||
+ !V2.getValueType().isVector()))
V2 = SDValue();
- APInt ZeroV1 = APInt::getNullValue(Size);
- APInt ZeroV2 = APInt::getNullValue(Size);
+ APInt ZeroV1 = APInt::getZero(Size);
+ APInt ZeroV2 = APInt::getZero(Size);
for (int i = 0; i < Size; ++i) {
int MaskIdx = Mask[i];
@@ -12095,8 +12289,8 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
}
return false;
}
- return (ZeroV1.isNullValue() || DAG.MaskedVectorIsZero(V1, ZeroV1)) &&
- (ZeroV2.isNullValue() || DAG.MaskedVectorIsZero(V2, ZeroV2));
+ return (ZeroV1.isZero() || DAG.MaskedVectorIsZero(V1, ZeroV1)) &&
+ (ZeroV2.isZero() || DAG.MaskedVectorIsZero(V2, ZeroV2));
}
// Check if the shuffle mask is suitable for the AVX vpunpcklwd or vpunpckhwd
@@ -12939,9 +13133,7 @@ static SDValue lowerShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
}
SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps);
- V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask);
- V2 = DAG.getNode(X86ISD::ANDNP, DL, VT, V1Mask, V2);
- return DAG.getNode(ISD::OR, DL, VT, V1, V2);
+ return getBitSelect(DL, VT, V1, V2, V1Mask, DAG);
}
static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
@@ -12949,7 +13141,7 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
const X86Subtarget &Subtarget,
SelectionDAG &DAG);
-static bool matchShuffleAsBlend(SDValue V1, SDValue V2,
+static bool matchShuffleAsBlend(MVT VT, SDValue V1, SDValue V2,
MutableArrayRef<int> Mask,
const APInt &Zeroable, bool &ForceV1Zero,
bool &ForceV2Zero, uint64_t &BlendMask) {
@@ -12962,37 +13154,67 @@ static bool matchShuffleAsBlend(SDValue V1, SDValue V2,
ForceV1Zero = false, ForceV2Zero = false;
assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask");
+ int NumElts = Mask.size();
+ int NumLanes = VT.getSizeInBits() / 128;
+ int NumEltsPerLane = NumElts / NumLanes;
+ assert((NumLanes * NumEltsPerLane) == NumElts && "Value type mismatch");
+
+ // For 32/64-bit elements, if we only reference one input (plus any undefs),
+ // then ensure the blend mask part for that lane just references that input.
+ bool ForceWholeLaneMasks =
+ VT.is256BitVector() && VT.getScalarSizeInBits() >= 32;
+
// Attempt to generate the binary blend mask. If an input is zero then
// we can use any lane.
- for (int i = 0, Size = Mask.size(); i < Size; ++i) {
- int M = Mask[i];
- if (M == SM_SentinelUndef)
- continue;
- if (M == i ||
- (0 <= M && M < Size && IsElementEquivalent(Size, V1, V1, M, i))) {
- Mask[i] = i;
- continue;
- }
- if (M == (i + Size) ||
- (Size <= M && IsElementEquivalent(Size, V2, V2, M - Size, i))) {
- BlendMask |= 1ull << i;
- Mask[i] = i + Size;
- continue;
- }
- if (Zeroable[i]) {
- if (V1IsZeroOrUndef) {
- ForceV1Zero = true;
- Mask[i] = i;
+ for (int Lane = 0; Lane != NumLanes; ++Lane) {
+ // Keep track of the inputs used per lane.
+ bool LaneV1InUse = false;
+ bool LaneV2InUse = false;
+ uint64_t LaneBlendMask = 0;
+ for (int LaneElt = 0; LaneElt != NumEltsPerLane; ++LaneElt) {
+ int Elt = (Lane * NumEltsPerLane) + LaneElt;
+ int M = Mask[Elt];
+ if (M == SM_SentinelUndef)
+ continue;
+ if (M == Elt || (0 <= M && M < NumElts &&
+ IsElementEquivalent(NumElts, V1, V1, M, Elt))) {
+ Mask[Elt] = Elt;
+ LaneV1InUse = true;
continue;
}
- if (V2IsZeroOrUndef) {
- ForceV2Zero = true;
- BlendMask |= 1ull << i;
- Mask[i] = i + Size;
+ if (M == (Elt + NumElts) ||
+ (NumElts <= M &&
+ IsElementEquivalent(NumElts, V2, V2, M - NumElts, Elt))) {
+ LaneBlendMask |= 1ull << LaneElt;
+ Mask[Elt] = Elt + NumElts;
+ LaneV2InUse = true;
continue;
}
+ if (Zeroable[Elt]) {
+ if (V1IsZeroOrUndef) {
+ ForceV1Zero = true;
+ Mask[Elt] = Elt;
+ LaneV1InUse = true;
+ continue;
+ }
+ if (V2IsZeroOrUndef) {
+ ForceV2Zero = true;
+ LaneBlendMask |= 1ull << LaneElt;
+ Mask[Elt] = Elt + NumElts;
+ LaneV2InUse = true;
+ continue;
+ }
+ }
+ return false;
}
- return false;
+
+ // If we only used V2 then splat the lane blend mask to avoid any demanded
+ // elts from V1 in this lane (the V1 equivalent is implicit with a zero
+ // blend mask bit).
+ if (ForceWholeLaneMasks && LaneV2InUse && !LaneV1InUse)
+ LaneBlendMask = (1ull << NumEltsPerLane) - 1;
+
+ BlendMask |= LaneBlendMask << (Lane * NumEltsPerLane);
}
return true;
}
@@ -13020,7 +13242,7 @@ static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
uint64_t BlendMask = 0;
bool ForceV1Zero = false, ForceV2Zero = false;
SmallVector<int, 64> Mask(Original);
- if (!matchShuffleAsBlend(V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero,
+ if (!matchShuffleAsBlend(VT, V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero,
BlendMask))
return SDValue();
@@ -13225,47 +13447,60 @@ static SDValue lowerShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT,
SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)};
// Determine UNPCKL/UNPCKH type and operand order.
- for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) {
- for (int Elt = 0; Elt != NumLaneElts; ++Elt) {
- int M = Mask[Lane + Elt];
- if (M < 0)
- continue;
+ for (int Elt = 0; Elt != NumElts; ++Elt) {
+ int M = Mask[Elt];
+ if (M < 0)
+ continue;
- SDValue &Op = Ops[Elt & 1];
- if (M < NumElts && (Op.isUndef() || Op == V1))
- Op = V1;
- else if (NumElts <= M && (Op.isUndef() || Op == V2))
- Op = V2;
- else
- return SDValue();
+ // Normalize the mask value depending on whether it's V1 or V2.
+ int NormM = M;
+ SDValue &Op = Ops[Elt & 1];
+ if (M < NumElts && (Op.isUndef() || Op == V1))
+ Op = V1;
+ else if (NumElts <= M && (Op.isUndef() || Op == V2)) {
+ Op = V2;
+ NormM -= NumElts;
+ } else
+ return SDValue();
+ bool MatchLoAnyLane = false, MatchHiAnyLane = false;
+ for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) {
int Lo = Lane, Mid = Lane + NumHalfLaneElts, Hi = Lane + NumLaneElts;
- MatchLo &= isUndefOrInRange(M, Lo, Mid) ||
- isUndefOrInRange(M, NumElts + Lo, NumElts + Mid);
- MatchHi &= isUndefOrInRange(M, Mid, Hi) ||
- isUndefOrInRange(M, NumElts + Mid, NumElts + Hi);
- if (!MatchLo && !MatchHi)
- return SDValue();
+ MatchLoAnyLane |= isUndefOrInRange(NormM, Lo, Mid);
+ MatchHiAnyLane |= isUndefOrInRange(NormM, Mid, Hi);
+ if (MatchLoAnyLane || MatchHiAnyLane) {
+ assert((MatchLoAnyLane ^ MatchHiAnyLane) &&
+ "Failed to match UNPCKLO/UNPCKHI");
+ break;
+ }
}
+ MatchLo &= MatchLoAnyLane;
+ MatchHi &= MatchHiAnyLane;
+ if (!MatchLo && !MatchHi)
+ return SDValue();
}
assert((MatchLo ^ MatchHi) && "Failed to match UNPCKLO/UNPCKHI");
- // Now check that each pair of elts come from the same unpack pair
- // and set the permute mask based on each pair.
- // TODO - Investigate cases where we permute individual elements.
+ // Element indices have changed after unpacking. Calculate permute mask
+ // so that they will be put back to the position as dictated by the
+ // original shuffle mask indices.
SmallVector<int, 32> PermuteMask(NumElts, -1);
- for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) {
- for (int Elt = 0; Elt != NumLaneElts; Elt += 2) {
- int M0 = Mask[Lane + Elt + 0];
- int M1 = Mask[Lane + Elt + 1];
- if (0 <= M0 && 0 <= M1 &&
- (M0 % NumHalfLaneElts) != (M1 % NumHalfLaneElts))
- return SDValue();
- if (0 <= M0)
- PermuteMask[Lane + Elt + 0] = Lane + (2 * (M0 % NumHalfLaneElts));
- if (0 <= M1)
- PermuteMask[Lane + Elt + 1] = Lane + (2 * (M1 % NumHalfLaneElts)) + 1;
- }
+ for (int Elt = 0; Elt != NumElts; ++Elt) {
+ int M = Mask[Elt];
+ if (M < 0)
+ continue;
+ int NormM = M;
+ if (NumElts <= M)
+ NormM -= NumElts;
+ bool IsFirstOp = M < NumElts;
+ int BaseMaskElt =
+ NumLaneElts * (NormM / NumLaneElts) + (2 * (NormM % NumHalfLaneElts));
+ if ((IsFirstOp && V1 == Ops[0]) || (!IsFirstOp && V2 == Ops[0]))
+ PermuteMask[Elt] = BaseMaskElt;
+ else if ((IsFirstOp && V1 == Ops[1]) || (!IsFirstOp && V2 == Ops[1]))
+ PermuteMask[Elt] = BaseMaskElt + 1;
+ assert(PermuteMask[Elt] != -1 &&
+ "Input mask element is defined but failed to assign permute mask");
}
unsigned UnpckOp = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
@@ -13485,6 +13720,24 @@ static bool isNoopOrBroadcastShuffleMask(ArrayRef<int> Mask) {
return isNoopShuffleMask(Mask) || isBroadcastShuffleMask(Mask);
}
+/// Check if the Mask consists of the same element repeated multiple times.
+static bool isSingleElementRepeatedMask(ArrayRef<int> Mask) {
+ size_t NumUndefs = 0;
+ std::optional<int> UniqueElt;
+ for (int Elt : Mask) {
+ if (Elt == SM_SentinelUndef) {
+ NumUndefs++;
+ continue;
+ }
+ if (UniqueElt.has_value() && UniqueElt.value() != Elt)
+ return false;
+ UniqueElt = Elt;
+ }
+ // Make sure the element is repeated enough times by checking the number of
+ // undefs is small.
+ return NumUndefs <= Mask.size() / 2 && UniqueElt.has_value();
+}
+
/// Generic routine to decompose a shuffle and blend into independent
/// blends and permutes.
///
@@ -13560,9 +13813,17 @@ static SDValue lowerShuffleAsDecomposedShuffleMerge(
if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask,
DAG, true))
return BlendPerm;
- if (SDValue UnpackPerm = lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask,
- DAG))
- return UnpackPerm;
+ // If either input vector provides only a single element which is repeated
+ // multiple times, unpacking from both input vectors would generate worse
+ // code. e.g. for
+ // t5: v16i8 = vector_shuffle<16,0,16,1,16,2,16,3,16,4,16,5,16,6,16,7> t2, t4
+ // it is better to process t4 first to create a vector of t4[0], then unpack
+ // that vector with t2.
+ if (!isSingleElementRepeatedMask(V1Mask) &&
+ !isSingleElementRepeatedMask(V2Mask))
+ if (SDValue UnpackPerm =
+ lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, DAG))
+ return UnpackPerm;
if (SDValue RotatePerm = lowerShuffleAsByteRotateAndPermute(
DL, VT, V1, V2, Mask, Subtarget, DAG))
return RotatePerm;
@@ -13890,8 +14151,8 @@ static SDValue lowerShuffleAsByteShiftMask(const SDLoc &DL, MVT VT, SDValue V1,
// We need a shuffle that has zeros at one/both ends and a sequential
// shuffle from one source within.
- unsigned ZeroLo = Zeroable.countTrailingOnes();
- unsigned ZeroHi = Zeroable.countLeadingOnes();
+ unsigned ZeroLo = Zeroable.countr_one();
+ unsigned ZeroHi = Zeroable.countl_one();
if (!ZeroLo && !ZeroHi)
return SDValue();
@@ -14033,7 +14294,7 @@ static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const APInt &Zeroable,
const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG, bool BitwiseOnly) {
int Size = Mask.size();
assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
@@ -14055,6 +14316,9 @@ static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
if (ShiftAmt < 0)
return SDValue();
+ if (BitwiseOnly && (Opcode == X86ISD::VSHLDQ || Opcode == X86ISD::VSRLDQ))
+ return SDValue();
+
assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
"Illegal integer vector type");
V = DAG.getBitcast(ShiftVT, V);
@@ -14256,6 +14520,7 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
return SDValue();
MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale),
NumElements / Scale);
+ InputV = DAG.getBitcast(VT, InputV);
InputV = ShuffleOffset(InputV);
InputV = getEXTEND_VECTOR_INREG(AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND,
DL, ExtVT, InputV, DAG);
@@ -14263,6 +14528,7 @@ static SDValue lowerShuffleAsSpecificZeroOrAnyExtend(
}
assert(VT.is128BitVector() && "Only 128-bit vectors can be extended.");
+ InputV = DAG.getBitcast(VT, InputV);
// For any extends we can cheat for larger element sizes and use shuffle
// instructions that can fold with a load and/or copy.
@@ -14548,6 +14814,8 @@ static SDValue lowerShuffleAsElementInsertion(
SelectionDAG &DAG) {
MVT ExtVT = VT;
MVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltBits = VT.getScalarSizeInBits();
if (isSoftFP16(EltVT, Subtarget))
return SDValue();
@@ -14555,6 +14823,7 @@ static SDValue lowerShuffleAsElementInsertion(
int V2Index =
find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) -
Mask.begin();
+ bool IsV1Constant = getTargetConstantFromNode(V1) != nullptr;
bool IsV1Zeroable = true;
for (int i = 0, Size = Mask.size(); i < Size; ++i)
if (i != V2Index && !Zeroable[i]) {
@@ -14562,6 +14831,14 @@ static SDValue lowerShuffleAsElementInsertion(
break;
}
+ // Bail if a non-zero V1 isn't used in place.
+ if (!IsV1Zeroable) {
+ SmallVector<int, 8> V1Mask(Mask);
+ V1Mask[V2Index] = -1;
+ if (!isNoopShuffleMask(V1Mask))
+ return SDValue();
+ }
+
// Check for a single input from a SCALAR_TO_VECTOR node.
// FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and
// all the smarts here sunk into that routine. However, the current
@@ -14574,13 +14851,26 @@ static SDValue lowerShuffleAsElementInsertion(
V2S = DAG.getBitcast(EltVT, V2S);
if (EltVT == MVT::i8 || (EltVT == MVT::i16 && !Subtarget.hasFP16())) {
// Using zext to expand a narrow element won't work for non-zero
- // insertions.
- if (!IsV1Zeroable)
+ // insertions. But we can use a masked constant vector if we're
+ // inserting V2 into the bottom of V1.
+ if (!IsV1Zeroable && !(IsV1Constant && V2Index == 0))
return SDValue();
// Zero-extend directly to i32.
ExtVT = MVT::getVectorVT(MVT::i32, ExtVT.getSizeInBits() / 32);
V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S);
+
+ // If we're inserting into a constant, mask off the inserted index
+ // and OR with the zero-extended scalar.
+ if (!IsV1Zeroable) {
+ SmallVector<APInt> Bits(NumElts, APInt::getAllOnes(EltBits));
+ Bits[V2Index] = APInt::getZero(EltBits);
+ SDValue BitMask = getConstVector(Bits, VT, DAG, DL);
+ V1 = DAG.getNode(ISD::AND, DL, VT, V1, BitMask);
+ V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
+ V2 = DAG.getBitcast(VT, DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2));
+ return DAG.getNode(ISD::OR, DL, VT, V1, V2);
+ }
}
V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
} else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
@@ -14592,15 +14882,10 @@ static SDValue lowerShuffleAsElementInsertion(
if (!IsV1Zeroable) {
// If V1 can't be treated as a zero vector we have fewer options to lower
- // this. We can't support integer vectors or non-zero targets cheaply, and
- // the V1 elements can't be permuted in any way.
+ // this. We can't support integer vectors or non-zero targets cheaply.
assert(VT == ExtVT && "Cannot change extended type when non-zeroable!");
if (!VT.isFloatingPoint() || V2Index != 0)
return SDValue();
- SmallVector<int, 8> V1Mask(Mask);
- V1Mask[V2Index] = -1;
- if (!isNoopShuffleMask(V1Mask))
- return SDValue();
if (!VT.is128BitVector())
return SDValue();
@@ -14630,15 +14915,15 @@ static SDValue lowerShuffleAsElementInsertion(
// the desired position. Otherwise it is more efficient to do a vector
// shift left. We know that we can do a vector shift left because all
// the inputs are zero.
- if (VT.isFloatingPoint() || VT.getVectorNumElements() <= 4) {
+ if (VT.isFloatingPoint() || NumElts <= 4) {
SmallVector<int, 4> V2Shuffle(Mask.size(), 1);
V2Shuffle[V2Index] = 0;
V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle);
} else {
V2 = DAG.getBitcast(MVT::v16i8, V2);
- V2 = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, V2,
- DAG.getTargetConstant(
- V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8));
+ V2 = DAG.getNode(
+ X86ISD::VSHLDQ, DL, MVT::v16i8, V2,
+ DAG.getTargetConstant(V2Index * EltBits / 8, DL, MVT::i8));
V2 = DAG.getBitcast(VT, V2);
}
}
@@ -14747,10 +15032,10 @@ static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0,
"VPERM* family of shuffles requires 32-bit or 64-bit elements");
// Check that both sources are extracts of the same source vector.
- if (!N0.hasOneUse() || !N1.hasOneUse() ||
- N0.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ if (N0.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
N1.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
- N0.getOperand(0) != N1.getOperand(0))
+ N0.getOperand(0) != N1.getOperand(0) ||
+ !N0.hasOneUse() || !N1.hasOneUse())
return SDValue();
SDValue WideVec = N0.getOperand(0);
@@ -14796,9 +15081,10 @@ static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
+ MVT EltVT = VT.getVectorElementType();
if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) ||
- (Subtarget.hasAVX() && VT.isFloatingPoint()) ||
- (Subtarget.hasAVX2() && VT.isInteger())))
+ (Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) ||
+ (Subtarget.hasAVX2() && (VT.isInteger() || EltVT == MVT::f16))))
return SDValue();
// With MOVDDUP (v2f64) we can broadcast from a register or a load, otherwise
@@ -15217,8 +15503,9 @@ static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Extract;
// Try to use shift instructions.
- if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget,
+ DAG, /*BitwiseOnly*/ false))
return Shift;
// When loading a scalar and then shuffling it into a vector we can often do
@@ -15379,6 +15666,11 @@ static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
+ if (Subtarget.hasSSE41())
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
+
int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
if (NumV2Elements == 0) {
@@ -15417,6 +15709,13 @@ static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
}
+ if (Subtarget.hasSSE2())
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
+ DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) {
+ ZExt = DAG.getBitcast(MVT::v4f32, ZExt);
+ return ZExt;
+ }
+
if (Subtarget.hasAVX2())
if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG))
return Extract;
@@ -15432,10 +15731,6 @@ static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
if (Subtarget.hasSSE41()) {
- if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
- return Blend;
-
// Use INSERTPS if we can complete the shuffle efficiently.
if (SDValue V = lowerShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG))
return V;
@@ -15484,6 +15779,18 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
+ // Try to use shift instructions if fast.
+ if (Subtarget.preferLowerShuffleAsShift()) {
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask, Zeroable,
+ Subtarget, DAG, /*BitwiseOnly*/ true))
+ return Shift;
+ if (NumV2Elements == 0)
+ if (SDValue Rotate =
+ lowerShuffleAsBitRotate(DL, MVT::v4i32, V1, Mask, Subtarget, DAG))
+ return Rotate;
+ }
+
if (NumV2Elements == 0) {
// Try to use broadcast unless the mask only has one non-undef element.
if (count_if(Mask, [](int M) { return M >= 0 && M < 4; }) > 1) {
@@ -15513,8 +15820,9 @@ static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Extract;
// Try to use shift instructions.
- if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget,
+ DAG, /*BitwiseOnly*/ false))
return Shift;
// There are special ways we can lower some single-element blends.
@@ -16175,8 +16483,9 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (NumV2Inputs == 0) {
// Try to use shift instructions.
- if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask, Zeroable,
+ Subtarget, DAG, /*BitwiseOnly*/ false))
return Shift;
// Check for being able to broadcast a single element.
@@ -16214,8 +16523,9 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
"shuffles.");
// Try to use shift instructions.
- if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget,
+ DAG, /*BitwiseOnly*/ false))
return Shift;
// See if we can use SSE4A Extraction / Insertion.
@@ -16271,12 +16581,11 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
// Attempt to lower using compaction, SSE41 is necessary for PACKUSDW.
- // We could use SIGN_EXTEND_INREG+PACKSSDW for older targets but this seems to
- // be slower than a PSHUFLW+PSHUFHW+PSHUFD chain.
int NumEvenDrops = canLowerByDroppingElements(Mask, true, false);
- if ((NumEvenDrops == 1 || NumEvenDrops == 2) && Subtarget.hasSSE41() &&
+ if ((NumEvenDrops == 1 || (NumEvenDrops == 2 && Subtarget.hasSSE41())) &&
!Subtarget.hasVLX()) {
// Check if this is part of a 256-bit vector truncation.
+ unsigned PackOpc = 0;
if (NumEvenDrops == 2 && Subtarget.hasAVX2() &&
peekThroughBitcasts(V1).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
peekThroughBitcasts(V2).getOpcode() == ISD::EXTRACT_SUBVECTOR) {
@@ -16287,7 +16596,8 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
V1V2 = DAG.getBitcast(MVT::v8i32, V1V2);
V1 = extract128BitVector(V1V2, 0, DAG, DL);
V2 = extract128BitVector(V1V2, 4, DAG, DL);
- } else {
+ PackOpc = X86ISD::PACKUS;
+ } else if (Subtarget.hasSSE41()) {
SmallVector<SDValue, 4> DWordClearOps(4,
DAG.getConstant(0, DL, MVT::i32));
for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1))
@@ -16298,14 +16608,26 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
DWordClearMask);
V2 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V2),
DWordClearMask);
+ PackOpc = X86ISD::PACKUS;
+ } else if (!Subtarget.hasSSSE3()) {
+ SDValue ShAmt = DAG.getTargetConstant(16, DL, MVT::i8);
+ V1 = DAG.getBitcast(MVT::v4i32, V1);
+ V2 = DAG.getBitcast(MVT::v4i32, V2);
+ V1 = DAG.getNode(X86ISD::VSHLI, DL, MVT::v4i32, V1, ShAmt);
+ V2 = DAG.getNode(X86ISD::VSHLI, DL, MVT::v4i32, V2, ShAmt);
+ V1 = DAG.getNode(X86ISD::VSRAI, DL, MVT::v4i32, V1, ShAmt);
+ V2 = DAG.getNode(X86ISD::VSRAI, DL, MVT::v4i32, V2, ShAmt);
+ PackOpc = X86ISD::PACKSS;
+ }
+ if (PackOpc) {
+ // Now pack things back together.
+ SDValue Result = DAG.getNode(PackOpc, DL, MVT::v8i16, V1, V2);
+ if (NumEvenDrops == 2) {
+ Result = DAG.getBitcast(MVT::v4i32, Result);
+ Result = DAG.getNode(PackOpc, DL, MVT::v8i16, Result, Result);
+ }
+ return Result;
}
- // Now pack things back together.
- SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, V1, V2);
- if (NumEvenDrops == 2) {
- Result = DAG.getBitcast(MVT::v4i32, Result);
- Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, Result, Result);
- }
- return Result;
}
// When compacting odd (upper) elements, use PACKSS pre-SSE41.
@@ -16426,8 +16748,9 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
// Try to use shift instructions.
- if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget,
+ DAG, /*BitwiseOnly*/ false))
return Shift;
// Try to use byte rotation instructions.
@@ -16805,7 +17128,7 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
/// AVX vector shuffle types.
static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V2, ArrayRef<int> Mask,
- SelectionDAG &DAG) {
+ SelectionDAG &DAG, bool SimpleOnly) {
assert(VT.getSizeInBits() >= 256 &&
"Only for 256-bit or wider vector shuffles!");
assert(V1.getSimpleValueType() == VT && "Bad operand type!");
@@ -16833,11 +17156,10 @@ static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1,
std::tie(LoV2, HiV2) = SplitVector(V2);
// Now create two 4-way blends of these half-width vectors.
- auto HalfBlend = [&](ArrayRef<int> HalfMask) {
- bool UseLoV1 = false, UseHiV1 = false, UseLoV2 = false, UseHiV2 = false;
- SmallVector<int, 32> V1BlendMask((unsigned)SplitNumElements, -1);
- SmallVector<int, 32> V2BlendMask((unsigned)SplitNumElements, -1);
- SmallVector<int, 32> BlendMask((unsigned)SplitNumElements, -1);
+ auto GetHalfBlendPiecesReq = [&](const ArrayRef<int> &HalfMask, bool &UseLoV1,
+ bool &UseHiV1, bool &UseLoV2,
+ bool &UseHiV2) {
+ UseLoV1 = UseHiV1 = UseLoV2 = UseHiV2 = false;
for (int i = 0; i < SplitNumElements; ++i) {
int M = HalfMask[i];
if (M >= NumElements) {
@@ -16845,21 +17167,47 @@ static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1,
UseHiV2 = true;
else
UseLoV2 = true;
- V2BlendMask[i] = M - NumElements;
- BlendMask[i] = SplitNumElements + i;
} else if (M >= 0) {
if (M >= SplitNumElements)
UseHiV1 = true;
else
UseLoV1 = true;
+ }
+ }
+ };
+
+ auto CheckHalfBlendUsable = [&](const ArrayRef<int> &HalfMask) -> bool {
+ if (!SimpleOnly)
+ return true;
+
+ bool UseLoV1, UseHiV1, UseLoV2, UseHiV2;
+ GetHalfBlendPiecesReq(HalfMask, UseLoV1, UseHiV1, UseLoV2, UseHiV2);
+
+ return !(UseHiV1 || UseHiV2);
+ };
+
+ auto HalfBlend = [&](ArrayRef<int> HalfMask) {
+ SmallVector<int, 32> V1BlendMask((unsigned)SplitNumElements, -1);
+ SmallVector<int, 32> V2BlendMask((unsigned)SplitNumElements, -1);
+ SmallVector<int, 32> BlendMask((unsigned)SplitNumElements, -1);
+ for (int i = 0; i < SplitNumElements; ++i) {
+ int M = HalfMask[i];
+ if (M >= NumElements) {
+ V2BlendMask[i] = M - NumElements;
+ BlendMask[i] = SplitNumElements + i;
+ } else if (M >= 0) {
V1BlendMask[i] = M;
BlendMask[i] = i;
}
}
+ bool UseLoV1, UseHiV1, UseLoV2, UseHiV2;
+ GetHalfBlendPiecesReq(HalfMask, UseLoV1, UseHiV1, UseLoV2, UseHiV2);
+
// Because the lowering happens after all combining takes place, we need to
// manually combine these blend masks as much as possible so that we create
// a minimal number of high-level vector shuffle nodes.
+ assert((!SimpleOnly || (!UseHiV1 && !UseHiV2)) && "Shuffle isn't simple");
// First try just blending the halves of V1 or V2.
if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2)
@@ -16871,8 +17219,7 @@ static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1,
SDValue V1Blend, V2Blend;
if (UseLoV1 && UseHiV1) {
- V1Blend =
- DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
+ V1Blend = DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask);
} else {
// We only use half of V1 so map the usage down into the final blend mask.
V1Blend = UseLoV1 ? LoV1 : HiV1;
@@ -16881,8 +17228,7 @@ static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1,
BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements);
}
if (UseLoV2 && UseHiV2) {
- V2Blend =
- DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
+ V2Blend = DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask);
} else {
// We only use half of V2 so map the usage down into the final blend mask.
V2Blend = UseLoV2 ? LoV2 : HiV2;
@@ -16892,6 +17238,10 @@ static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1,
}
return DAG.getVectorShuffle(SplitVT, DL, V1Blend, V2Blend, BlendMask);
};
+
+ if (!CheckHalfBlendUsable(LoMask) || !CheckHalfBlendUsable(HiMask))
+ return SDValue();
+
SDValue Lo = HalfBlend(LoMask);
SDValue Hi = HalfBlend(HiMask);
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
@@ -16948,7 +17298,8 @@ static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1,
if (Mask[i] >= 0)
LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true;
if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1)
- return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG,
+ /*SimpleOnly*/ false);
// Otherwise, just fall back to decomposed shuffles and a blend/unpack. This
// requires that the decomposed single-input shuffles don't end up here.
@@ -17096,6 +17447,20 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
return getSublanePermute(/*NumSublanes=*/NumLanes * 4);
}
+/// Helper to get compute inlane shuffle mask for a complete shuffle mask.
+static void computeInLaneShuffleMask(const ArrayRef<int> &Mask, int LaneSize,
+ SmallVector<int> &InLaneMask) {
+ int Size = Mask.size();
+ InLaneMask.assign(Mask.begin(), Mask.end());
+ for (int i = 0; i < Size; ++i) {
+ int &M = InLaneMask[i];
+ if (M < 0)
+ continue;
+ if (((M % Size) / LaneSize) != (i / LaneSize))
+ M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
+ }
+}
+
/// Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one
/// source with a lane permutation.
///
@@ -17140,21 +17505,17 @@ static SDValue lowerShuffleAsLanePermuteAndShuffle(
assert(V2.isUndef() &&
"This last part of this routine only works on single input shuffles");
- SmallVector<int, 32> InLaneMask(Mask);
- for (int i = 0; i < Size; ++i) {
- int &M = InLaneMask[i];
- if (M < 0)
- continue;
- if (((M % Size) / LaneSize) != (i / LaneSize))
- M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size;
- }
+ SmallVector<int> InLaneMask;
+ computeInLaneShuffleMask(Mask, Mask.size() / 2, InLaneMask);
+
assert(!is128BitLaneCrossingShuffleMask(VT, InLaneMask) &&
"In-lane shuffle mask expected");
// If we're not using both lanes in each lane and the inlane mask is not
// repeating, then we're better off splitting.
if (!AllLanes && !is128BitLaneRepeatedShuffleMask(VT, InLaneMask))
- return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG,
+ /*SimpleOnly*/ false);
// Flip the lanes, and shuffle the results which should now be in-lane.
MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64;
@@ -17439,6 +17800,10 @@ static SDValue lowerShuffleAsLanePermuteAndRepeatedMask(
return SDValue();
for (int i = 0; i != NumElts; ++i) {
+ if (Mask[i] < 0) {
+ NewMask[i] = -1;
+ continue;
+ }
NewMask[i] = RepeatMask[i % NumLaneElts];
if (NewMask[i] < 0)
continue;
@@ -17934,7 +18299,7 @@ static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT,
return SDValue();
// Remaining elements need to be zeroable.
- if (Zeroable.countLeadingOnes() < (Mask.size() - 8))
+ if (Zeroable.countl_one() < (Mask.size() - 8))
return SDValue();
V1 = DAG.getBitcast(MVT::v4i64, V1);
@@ -18185,6 +18550,13 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Subtarget, DAG))
return Broadcast;
+ // Try to use shift instructions if fast.
+ if (Subtarget.preferLowerShuffleAsShift())
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask, Zeroable,
+ Subtarget, DAG, /*BitwiseOnly*/ true))
+ return Shift;
+
if (V2.isUndef()) {
// When the shuffle is mirrored between the 128-bit lanes of the unit, we
// can use lower latency instructions that will operate on both lanes.
@@ -18206,8 +18578,9 @@ static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
// Try to use shift instructions.
- if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask, Zeroable, Subtarget,
+ DAG, /*BitwiseOnly*/ false))
return Shift;
// If we have VLX support, we can use VALIGN or VEXPAND.
@@ -18285,6 +18658,19 @@ static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Subtarget, DAG))
return Broadcast;
+ if (!Subtarget.hasAVX2()) {
+ SmallVector<int> InLaneMask;
+ computeInLaneShuffleMask(Mask, Mask.size() / 2, InLaneMask);
+
+ if (!is128BitLaneRepeatedShuffleMask(MVT::v8f32, InLaneMask))
+ if (SDValue R = splitAndLowerShuffle(DL, MVT::v8f32, V1, V2, Mask, DAG,
+ /*SimpleOnly*/ true))
+ return R;
+ }
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return DAG.getBitcast(MVT::v8f32, ZExt);
+
// If the shuffle mask is repeated in each 128-bit lane, we have many more
// options to efficiently lower the shuffle.
SmallVector<int, 4> RepeatedMask;
@@ -18386,6 +18772,8 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
assert(Subtarget.hasAVX2() && "We can only lower v8i32 with AVX2!");
+ int NumV2Elements = count_if(Mask, [](int M) { return M >= 8; });
+
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
@@ -18417,6 +18805,18 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Subtarget, DAG))
return Broadcast;
+ // Try to use shift instructions if fast.
+ if (Subtarget.preferLowerShuffleAsShift()) {
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask, Zeroable,
+ Subtarget, DAG, /*BitwiseOnly*/ true))
+ return Shift;
+ if (NumV2Elements == 0)
+ if (SDValue Rotate =
+ lowerShuffleAsBitRotate(DL, MVT::v8i32, V1, Mask, Subtarget, DAG))
+ return Rotate;
+ }
+
// If the shuffle mask is repeated in each 128-bit lane we can use more
// efficient instructions that mirror the shuffles across the two 128-bit
// lanes.
@@ -18435,10 +18835,16 @@ static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
// Try to use shift instructions.
- if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask, Zeroable, Subtarget,
+ DAG, /*BitwiseOnly*/ false))
return Shift;
+ if (!Subtarget.preferLowerShuffleAsShift() && NumV2Elements == 0)
+ if (SDValue Rotate =
+ lowerShuffleAsBitRotate(DL, MVT::v8i32, V1, Mask, Subtarget, DAG))
+ return Rotate;
+
// If we have VLX support, we can use VALIGN or EXPAND.
if (Subtarget.hasVLX()) {
if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i32, V1, V2, Mask,
@@ -18539,8 +18945,9 @@ static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
// Try to use shift instructions.
- if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask, Zeroable,
+ Subtarget, DAG, /*BitwiseOnly*/ false))
return Shift;
// Try to use byte rotation instructions.
@@ -18661,8 +19068,9 @@ static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
// Try to use shift instructions.
- if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask, Zeroable, Subtarget,
+ DAG, /*BitwiseOnly*/ false))
return Shift;
// Try to use byte rotation instructions.
@@ -18777,7 +19185,7 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return V;
if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
return V;
- return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false);
}
MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits),
@@ -18787,10 +19195,10 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask));
}
- if (VT == MVT::v16f16) {
+ if (VT == MVT::v16f16 || VT == MVT::v16bf16) {
V1 = DAG.getBitcast(MVT::v16i16, V1);
V2 = DAG.getBitcast(MVT::v16i16, V2);
- return DAG.getBitcast(MVT::v16f16,
+ return DAG.getBitcast(VT,
DAG.getVectorShuffle(MVT::v16i16, DL, V1, V2, Mask));
}
@@ -19012,6 +19420,14 @@ static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG);
}
+ if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask,
+ Zeroable, Subtarget, DAG))
+ return Blend;
+
+ if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(
+ DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
+ return DAG.getBitcast(MVT::v16f32, ZExt);
+
// Try to create an in-lane repeating shuffle mask and then shuffle the
// results into the target lanes.
if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
@@ -19043,6 +19459,13 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!");
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+ // Try to use shift instructions if fast.
+ if (Subtarget.preferLowerShuffleAsShift())
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask, Zeroable,
+ Subtarget, DAG, /*BitwiseOnly*/ true))
+ return Shift;
+
if (V2.isUndef()) {
// When the shuffle is mirrored between the 128-bit lanes of the unit, we
// can use lower latency instructions that will operate on all four
@@ -19069,8 +19492,9 @@ static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return Shuf128;
// Try to use shift instructions.
- if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask, Zeroable, Subtarget,
+ DAG, /*BitwiseOnly*/ false))
return Shift;
// Try to use VALIGN.
@@ -19108,6 +19532,8 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
+ int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; });
+
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operands into the
// shuffle in many cases.
@@ -19115,6 +19541,18 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
return ZExt;
+ // Try to use shift instructions if fast.
+ if (Subtarget.preferLowerShuffleAsShift()) {
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask, Zeroable,
+ Subtarget, DAG, /*BitwiseOnly*/ true))
+ return Shift;
+ if (NumV2Elements == 0)
+ if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v16i32, V1, Mask,
+ Subtarget, DAG))
+ return Rotate;
+ }
+
// If the shuffle mask is repeated in each 128-bit lane we can use more
// efficient instructions that mirror the shuffles across the four 128-bit
// lanes.
@@ -19133,10 +19571,16 @@ static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
}
// Try to use shift instructions.
- if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask, Zeroable,
+ Subtarget, DAG, /*BitwiseOnly*/ false))
return Shift;
+ if (!Subtarget.preferLowerShuffleAsShift() && NumV2Elements != 0)
+ if (SDValue Rotate =
+ lowerShuffleAsBitRotate(DL, MVT::v16i32, V1, Mask, Subtarget, DAG))
+ return Rotate;
+
// Try to use VALIGN.
if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v16i32, V1, V2, Mask,
Subtarget, DAG))
@@ -19203,8 +19647,9 @@ static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
// Try to use shift instructions.
- if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask, Zeroable,
+ Subtarget, DAG, /*BitwiseOnly*/ false))
return Shift;
// Try to use byte rotation instructions.
@@ -19266,8 +19711,9 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
return V;
// Try to use shift instructions.
- if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
- Zeroable, Subtarget, DAG))
+ if (SDValue Shift =
+ lowerShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget,
+ DAG, /*BitwiseOnly*/ false))
return Shift;
// Try to use byte rotation instructions.
@@ -19329,7 +19775,7 @@ static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (Subtarget.hasVBMI())
return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, Subtarget, DAG);
- return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
+ return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG, /*SimpleOnly*/ false);
}
/// High-level routine to lower various 512-bit x86 vector shuffles.
@@ -19374,10 +19820,14 @@ static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
return V;
- return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG);
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG, /*SimpleOnly*/ false);
}
if (VT == MVT::v32f16) {
+ if (!Subtarget.hasBWI())
+ return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG,
+ /*SimpleOnly*/ false);
+
V1 = DAG.getBitcast(MVT::v32i16, V1);
V2 = DAG.getBitcast(MVT::v32i16, V2);
return DAG.getBitcast(MVT::v32f16,
@@ -19515,11 +19965,11 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
assert(SubvecElts != NumElts && "Identity shuffle?");
// Clip to a power 2.
- SubvecElts = PowerOf2Floor(SubvecElts);
+ SubvecElts = llvm::bit_floor<uint32_t>(SubvecElts);
// Make sure the number of zeroable bits in the top at least covers the bits
// not covered by the subvector.
- if ((int)Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) {
+ if ((int)Zeroable.countl_one() >= (NumElts - SubvecElts)) {
assert(Src >= 0 && "Expected a source!");
MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts);
SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT,
@@ -19691,18 +20141,22 @@ static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
return false;
}
-static bool canCombineAsMaskOperation(SDValue V1, SDValue V2,
+static bool canCombineAsMaskOperation(SDValue V,
const X86Subtarget &Subtarget) {
if (!Subtarget.hasAVX512())
return false;
- MVT VT = V1.getSimpleValueType().getScalarType();
+ if (!V.getValueType().isSimple())
+ return false;
+
+ MVT VT = V.getSimpleValueType().getScalarType();
if ((VT == MVT::i16 || VT == MVT::i8) && !Subtarget.hasBWI())
return false;
- // i8 is better to be widen to i16, because there is PBLENDW for vXi16
- // when the vector bit size is 128 or 256.
- if (VT == MVT::i8 && V1.getSimpleValueType().getSizeInBits() < 512)
+ // If vec width < 512, widen i8/i16 even with BWI as blendd/blendps/blendpd
+ // are preferable to blendw/blendvb/masked-mov.
+ if ((VT == MVT::i16 || VT == MVT::i8) &&
+ V.getSimpleValueType().getSizeInBits() < 512)
return false;
auto HasMaskOperation = [&](SDValue V) {
@@ -19715,6 +20169,16 @@ static bool canCombineAsMaskOperation(SDValue V1, SDValue V2,
case ISD::SUB:
case ISD::AND:
case ISD::XOR:
+ case ISD::OR:
+ case ISD::SMAX:
+ case ISD::SMIN:
+ case ISD::UMAX:
+ case ISD::UMIN:
+ case ISD::ABS:
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::MUL:
break;
}
if (!V->hasOneUse())
@@ -19723,7 +20187,7 @@ static bool canCombineAsMaskOperation(SDValue V1, SDValue V2,
return true;
};
- if (HasMaskOperation(V1) || HasMaskOperation(V2))
+ if (HasMaskOperation(V))
return true;
return false;
@@ -19804,7 +20268,8 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget,
// integers to handle flipping the low and high halves of AVX 256-bit vectors.
SmallVector<int, 16> WidenedMask;
if (VT.getScalarSizeInBits() < 64 && !Is1BitVector &&
- !canCombineAsMaskOperation(V1, V2, Subtarget) &&
+ !canCombineAsMaskOperation(V1, Subtarget) &&
+ !canCombineAsMaskOperation(V2, Subtarget) &&
canWidenShuffleElements(OrigMask, Zeroable, V2IsZero, WidenedMask)) {
// Shuffle mask widening should not interfere with a broadcast opportunity
// by obfuscating the operands with bitcasts.
@@ -20290,6 +20755,14 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SDValue N2 = Op.getOperand(2);
auto *N2C = dyn_cast<ConstantSDNode>(N2);
+ if (EltVT == MVT::bf16) {
+ MVT IVT = VT.changeVectorElementTypeToInteger();
+ SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVT,
+ DAG.getBitcast(IVT, N0),
+ DAG.getBitcast(MVT::i16, N1), N2);
+ return DAG.getBitcast(VT, Res);
+ }
+
if (!N2C) {
// Variable insertion indices, usually we're better off spilling to stack,
// but AVX512 can use a variable compare+select by comparing against all
@@ -20739,7 +21212,7 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
static SDValue
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
- SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
+ SDValue *InGlue, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags, bool LocalDynamic = false) {
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -20752,8 +21225,8 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
: X86ISD::TLSADDR;
- if (InFlag) {
- SDValue Ops[] = { Chain, TGA, *InFlag };
+ if (InGlue) {
+ SDValue Ops[] = { Chain, TGA, *InGlue };
Chain = DAG.getNode(CallType, dl, NodeTys, Ops);
} else {
SDValue Ops[] = { Chain, TGA };
@@ -20764,22 +21237,22 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
MFI.setAdjustsStack(true);
MFI.setHasCalls(true);
- SDValue Flag = Chain.getValue(1);
- return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
+ SDValue Glue = Chain.getValue(1);
+ return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit
static SDValue
LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT) {
- SDValue InFlag;
+ SDValue InGlue;
SDLoc dl(GA); // ? function entry point might be better
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg,
- SDLoc(), PtrVT), InFlag);
- InFlag = Chain.getValue(1);
+ SDLoc(), PtrVT), InGlue);
+ InGlue = Chain.getValue(1);
- return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
+ return GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX, X86II::MO_TLSGD);
}
// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit LP64
@@ -20814,11 +21287,11 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg,
X86II::MO_TLSLD, /*LocalDynamic=*/true);
} else {
- SDValue InFlag;
+ SDValue InGlue;
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
- DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag);
- InFlag = Chain.getValue(1);
- Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
+ DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InGlue);
+ InGlue = Chain.getValue(1);
+ Base = GetTLSADDR(DAG, Chain, GA, &InGlue, PtrVT, X86::EAX,
X86II::MO_TLSLDM, /*LocalDynamic=*/true);
}
@@ -21592,8 +22065,8 @@ static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG,
unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0;
SDLoc dl(Op);
// FP constant to bias correct the final result.
- SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl,
- MVT::f64);
+ SDValue Bias = DAG.getConstantFP(
+ llvm::bit_cast<double>(0x4330000000000000ULL), dl, MVT::f64);
// Load the 32-bit value into an XMM register.
SDValue Load =
@@ -21678,8 +22151,8 @@ static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
// since double has 52-bits of mantissa. Then subtract 2^52 in floating
// point leaving just our i32 integers in double format.
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i64, N0);
- SDValue VBias =
- DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), DL, MVT::v2f64);
+ SDValue VBias = DAG.getConstantFP(
+ llvm::bit_cast<double>(0x4330000000000000ULL), DL, MVT::v2f64);
SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v2i64, ZExtIn,
DAG.getBitcast(MVT::v2i64, VBias));
Or = DAG.getBitcast(MVT::v2f64, Or);
@@ -22000,15 +22473,25 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Extend everything to 80 bits to force it to be done on x87.
// TODO: Are there any fast-math-flags to propagate here?
if (IsStrict) {
- SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other},
- {Chain, Fild, Fudge});
+ unsigned Opc = ISD::STRICT_FADD;
+ // Windows needs the precision control changed to 80bits around this add.
+ if (Subtarget.isOSWindows() && DstVT == MVT::f32)
+ Opc = X86ISD::STRICT_FP80_ADD;
+
+ SDValue Add =
+ DAG.getNode(Opc, dl, {MVT::f80, MVT::Other}, {Chain, Fild, Fudge});
// STRICT_FP_ROUND can't handle equal types.
if (DstVT == MVT::f80)
return Add;
return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other},
{Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)});
}
- SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
+ unsigned Opc = ISD::FADD;
+ // Windows needs the precision control changed to 80bits around this add.
+ if (Subtarget.isOSWindows() && DstVT == MVT::f32)
+ Opc = X86ISD::FP80_ADD;
+
+ SDValue Add = DAG.getNode(Opc, dl, MVT::f80, Fild, Fudge);
return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add,
DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
}
@@ -22419,15 +22902,166 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In,
// Recursively pack lower/upper subvectors, concat result and pack again.
assert(SrcSizeInBits >= 256 && "Expected 256-bit vector or greater");
- EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
- Lo = truncateVectorWithPACK(Opcode, PackedVT, Lo, DL, DAG, Subtarget);
- Hi = truncateVectorWithPACK(Opcode, PackedVT, Hi, DL, DAG, Subtarget);
- PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
+ EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
+ if (PackedVT.is128BitVector()) {
+ // Avoid CONCAT_VECTORS on sub-128bit nodes as these can fail after
+ // type legalization.
+ SDValue Res =
+ truncateVectorWithPACK(Opcode, PackedVT, In, DL, DAG, Subtarget);
+ return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget);
+ }
+
+ EVT HalfPackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
+ Lo = truncateVectorWithPACK(Opcode, HalfPackedVT, Lo, DL, DAG, Subtarget);
+ Hi = truncateVectorWithPACK(Opcode, HalfPackedVT, Hi, DL, DAG, Subtarget);
SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget);
}
+/// Truncate using ISD::AND mask and X86ISD::PACKUS.
+/// e.g. trunc <8 x i32> X to <8 x i16> -->
+/// MaskX = X & 0xffff (clear high bits to prevent saturation)
+/// packus (extract_subv MaskX, 0), (extract_subv MaskX, 1)
+static SDValue truncateVectorWithPACKUS(EVT DstVT, SDValue In, const SDLoc &DL,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ EVT SrcVT = In.getValueType();
+ APInt Mask = APInt::getLowBitsSet(SrcVT.getScalarSizeInBits(),
+ DstVT.getScalarSizeInBits());
+ In = DAG.getNode(ISD::AND, DL, SrcVT, In, DAG.getConstant(Mask, DL, SrcVT));
+ return truncateVectorWithPACK(X86ISD::PACKUS, DstVT, In, DL, DAG, Subtarget);
+}
+
+/// Truncate using inreg sign extension and X86ISD::PACKSS.
+static SDValue truncateVectorWithPACKSS(EVT DstVT, SDValue In, const SDLoc &DL,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ EVT SrcVT = In.getValueType();
+ In = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, SrcVT, In,
+ DAG.getValueType(DstVT));
+ return truncateVectorWithPACK(X86ISD::PACKSS, DstVT, In, DL, DAG, Subtarget);
+}
+
+/// This function lowers a vector truncation of 'extended sign-bits' or
+/// 'extended zero-bits' values.
+/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations.
+static SDValue LowerTruncateVecPackWithSignBits(MVT DstVT, SDValue In,
+ const SDLoc &DL,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ MVT SrcVT = In.getSimpleValueType();
+ MVT DstSVT = DstVT.getVectorElementType();
+ MVT SrcSVT = SrcVT.getVectorElementType();
+ if (!((SrcSVT == MVT::i16 || SrcSVT == MVT::i32 || SrcSVT == MVT::i64) &&
+ (DstSVT == MVT::i8 || DstSVT == MVT::i16 || DstSVT == MVT::i32)))
+ return SDValue();
+
+ // Don't lower with PACK nodes on AVX512 targets if we'd need more than one.
+ if (Subtarget.hasAVX512() &&
+ SrcSVT.getSizeInBits() > (DstSVT.getSizeInBits() * 2))
+ return SDValue();
+
+ // Prefer to lower v4i64 -> v4i32 as a shuffle unless we can cheaply
+ // split this for packing.
+ if (SrcVT == MVT::v4i64 && DstVT == MVT::v4i32 &&
+ !isFreeToSplitVector(In.getNode(), DAG) &&
+ (!Subtarget.hasInt256() || DAG.ComputeNumSignBits(In) != 64))
+ return SDValue();
+
+ // If the upper half of the source is undef, then attempt to split and
+ // only truncate the lower half.
+ if (DstVT.getSizeInBits() >= 128) {
+ SmallVector<SDValue> LowerOps;
+ if (isUpperSubvectorUndef(In, LowerOps, DAG)) {
+ MVT DstHalfVT = DstVT.getHalfNumVectorElementsVT();
+ MVT SrcHalfVT = SrcVT.getHalfNumVectorElementsVT();
+ SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcHalfVT, LowerOps);
+ if (SDValue Res = LowerTruncateVecPackWithSignBits(DstHalfVT, Lo, DL,
+ Subtarget, DAG))
+ return widenSubVector(Res, false, Subtarget, DAG, DL,
+ DstVT.getSizeInBits());
+ }
+ }
+
+ unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
+ unsigned NumPackedSignBits = std::min<unsigned>(DstSVT.getSizeInBits(), 16);
+ unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;
+
+ // Truncate with PACKUS if we are truncating a vector with leading zero
+ // bits that extend all the way to the packed/truncated value. Pre-SSE41
+ // we can only use PACKUSWB.
+ KnownBits Known = DAG.computeKnownBits(In);
+ if ((NumSrcEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros())
+ if (SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, DstVT, In, DL, DAG,
+ Subtarget))
+ return V;
+
+ // Truncate with PACKSS if we are truncating a vector with sign-bits
+ // that extend all the way to the packed/truncated value.
+ if ((NumSrcEltBits - NumPackedSignBits) < DAG.ComputeNumSignBits(In))
+ if (SDValue V = truncateVectorWithPACK(X86ISD::PACKSS, DstVT, In, DL, DAG,
+ Subtarget))
+ return V;
+
+ return SDValue();
+}
+
+/// This function lowers a vector truncation from vXi32/vXi64 to vXi8/vXi16 into
+/// X86ISD::PACKUS/X86ISD::PACKSS operations.
+static SDValue LowerTruncateVecPack(MVT DstVT, SDValue In, const SDLoc &DL,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ MVT SrcVT = In.getSimpleValueType();
+ MVT DstSVT = DstVT.getVectorElementType();
+ MVT SrcSVT = SrcVT.getVectorElementType();
+ unsigned NumElems = DstVT.getVectorNumElements();
+ if (!((SrcSVT == MVT::i16 || SrcSVT == MVT::i32 || SrcSVT == MVT::i64) &&
+ (DstSVT == MVT::i8 || DstSVT == MVT::i16) && isPowerOf2_32(NumElems) &&
+ NumElems >= 8))
+ return SDValue();
+
+ // SSSE3's pshufb results in less instructions in the cases below.
+ if (Subtarget.hasSSSE3() && NumElems == 8) {
+ if (SrcSVT == MVT::i16)
+ return SDValue();
+ if (SrcSVT == MVT::i32 && (DstSVT == MVT::i8 || !Subtarget.hasSSE41()))
+ return SDValue();
+ }
+
+ // If the upper half of the source is undef, then attempt to split and
+ // only truncate the lower half.
+ if (DstVT.getSizeInBits() >= 128) {
+ SmallVector<SDValue> LowerOps;
+ if (isUpperSubvectorUndef(In, LowerOps, DAG)) {
+ MVT DstHalfVT = DstVT.getHalfNumVectorElementsVT();
+ MVT SrcHalfVT = SrcVT.getHalfNumVectorElementsVT();
+ SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, DL, SrcHalfVT, LowerOps);
+ if (SDValue Res = LowerTruncateVecPack(DstHalfVT, Lo, DL, Subtarget, DAG))
+ return widenSubVector(Res, false, Subtarget, DAG, DL,
+ DstVT.getSizeInBits());
+ }
+ }
+
+ // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS
+ // for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to
+ // truncate 2 x v4i32 to v8i16.
+ if (Subtarget.hasSSE41() || DstSVT == MVT::i8)
+ return truncateVectorWithPACKUS(DstVT, In, DL, Subtarget, DAG);
+
+ if (SrcSVT == MVT::i16 || SrcSVT == MVT::i32)
+ return truncateVectorWithPACKSS(DstVT, In, DL, Subtarget, DAG);
+
+ // Special case vXi64 -> vXi16, shuffle to vXi32 and then use PACKSS.
+ if (DstSVT == MVT::i16 && SrcSVT == MVT::i64) {
+ MVT TruncVT = MVT::getVectorVT(MVT::i32, NumElems);
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, In);
+ return truncateVectorWithPACKSS(DstVT, Trunc, DL, Subtarget, DAG);
+ }
+
+ return SDValue();
+}
+
static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -22514,8 +23148,6 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
SDValue In = Op.getOperand(0);
MVT InVT = In.getSimpleValueType();
- unsigned InNumEltBits = InVT.getScalarSizeInBits();
-
assert(VT.getVectorNumElements() == InVT.getVectorNumElements() &&
"Invalid TRUNCATE operation");
@@ -22523,7 +23155,7 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isTypeLegal(InVT)) {
if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) &&
- VT.is128BitVector()) {
+ VT.is128BitVector() && Subtarget.hasAVX512()) {
assert((InVT == MVT::v16i64 || Subtarget.hasVLX()) &&
"Unexpected subtarget!");
// The default behavior is to truncate one step, concatenate, and then
@@ -22540,6 +23172,17 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
}
+ // Pre-AVX512 (or prefer-256bit) see if we can make use of PACKSS/PACKUS.
+ if (!Subtarget.hasAVX512() ||
+ (InVT.is512BitVector() && VT.is256BitVector()))
+ if (SDValue SignPack =
+ LowerTruncateVecPackWithSignBits(VT, In, DL, Subtarget, DAG))
+ return SignPack;
+
+ // Pre-AVX512 see if we can make use of PACKSS/PACKUS.
+ if (!Subtarget.hasAVX512())
+ return LowerTruncateVecPack(VT, In, DL, Subtarget, DAG);
+
// Otherwise let default legalization handle it.
return SDValue();
}
@@ -22547,6 +23190,13 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
if (VT.getVectorElementType() == MVT::i1)
return LowerTruncateVecI1(Op, DAG, Subtarget);
+ // Attempt to truncate with PACKUS/PACKSS even on AVX512 if we'd have to
+ // concat from subvectors to use VPTRUNC etc.
+ if (!Subtarget.hasAVX512() || isFreeToSplitVector(In.getNode(), DAG))
+ if (SDValue SignPack =
+ LowerTruncateVecPackWithSignBits(VT, In, DL, Subtarget, DAG))
+ return SignPack;
+
// vpmovqb/w/d, vpmovdb/w, vpmovwb
if (Subtarget.hasAVX512()) {
if (InVT == MVT::v32i16 && !Subtarget.hasBWI()) {
@@ -22563,25 +23213,6 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
return Op;
}
- unsigned NumPackedSignBits = std::min<unsigned>(VT.getScalarSizeInBits(), 16);
- unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8;
-
- // Truncate with PACKUS if we are truncating a vector with leading zero bits
- // that extend all the way to the packed/truncated value.
- // Pre-SSE41 we can only use PACKUSWB.
- KnownBits Known = DAG.computeKnownBits(In);
- if ((InNumEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros())
- if (SDValue V =
- truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget))
- return V;
-
- // Truncate with PACKSS if we are truncating a vector with sign-bits that
- // extend all the way to the packed/truncated value.
- if ((InNumEltBits - NumPackedSignBits) < DAG.ComputeNumSignBits(In))
- if (SDValue V =
- truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget))
- return V;
-
// Handle truncation of V256 to V128 using shuffles.
assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!");
@@ -22623,39 +23254,13 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
return DAG.getBitcast(MVT::v8i16, In);
}
- SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
- DAG.getIntPtrConstant(0, DL));
- SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In,
- DAG.getIntPtrConstant(4, DL));
-
- // The PSHUFB mask:
- static const int ShufMask1[] = {0, 2, 4, 6, -1, -1, -1, -1};
-
- OpLo = DAG.getBitcast(MVT::v8i16, OpLo);
- OpHi = DAG.getBitcast(MVT::v8i16, OpHi);
-
- OpLo = DAG.getVectorShuffle(MVT::v8i16, DL, OpLo, OpLo, ShufMask1);
- OpHi = DAG.getVectorShuffle(MVT::v8i16, DL, OpHi, OpHi, ShufMask1);
-
- OpLo = DAG.getBitcast(MVT::v4i32, OpLo);
- OpHi = DAG.getBitcast(MVT::v4i32, OpHi);
-
- // The MOVLHPS Mask:
- static const int ShufMask2[] = {0, 1, 4, 5};
- SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2);
- return DAG.getBitcast(MVT::v8i16, res);
+ return Subtarget.hasSSE41()
+ ? truncateVectorWithPACKUS(VT, In, DL, Subtarget, DAG)
+ : truncateVectorWithPACKSS(VT, In, DL, Subtarget, DAG);
}
- if (VT == MVT::v16i8 && InVT == MVT::v16i16) {
- // Use an AND to zero uppper bits for PACKUS.
- In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(255, DL, InVT));
-
- SDValue InLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In,
- DAG.getIntPtrConstant(0, DL));
- SDValue InHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In,
- DAG.getIntPtrConstant(8, DL));
- return DAG.getNode(X86ISD::PACKUS, DL, VT, InLo, InHi);
- }
+ if (VT == MVT::v16i8 && InVT == MVT::v16i16)
+ return truncateVectorWithPACKUS(VT, In, DL, Subtarget, DAG);
llvm_unreachable("All 256->128 cases should have been handled above!");
}
@@ -22802,19 +23407,24 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
return Res;
}
- if (VT == MVT::v8i16 && (SrcVT == MVT::v8f32 || SrcVT == MVT::v8f64)) {
+ // v8f32/v16f32/v8f64->v8i16/v16i16 need to widen first.
+ if (VT.getVectorElementType() == MVT::i16) {
+ assert((SrcVT.getVectorElementType() == MVT::f32 ||
+ SrcVT.getVectorElementType() == MVT::f64) &&
+ "Expected f32/f64 vector!");
+ MVT NVT = VT.changeVectorElementType(MVT::i32);
if (IsStrict) {
Res = DAG.getNode(IsSigned ? ISD::STRICT_FP_TO_SINT
: ISD::STRICT_FP_TO_UINT,
- dl, {MVT::v8i32, MVT::Other}, {Chain, Src});
+ dl, {NVT, MVT::Other}, {Chain, Src});
Chain = Res.getValue(1);
} else {
Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl,
- MVT::v8i32, Src);
+ NVT, Src);
}
// TODO: Need to add exception check code for strict FP.
- Res = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i16, Res);
+ Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res);
if (IsStrict)
return DAG.getMergeValues({Res, Chain}, dl);
@@ -23834,7 +24444,195 @@ static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
DAG.getTargetConstant(Cond, dl, MVT::i8), EFLAGS);
}
-/// Helper for matching OR(EXTRACTELT(X,0),OR(EXTRACTELT(X,1),...))
+/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a
+/// recognizable memcmp expansion.
+static bool isOrXorXorTree(SDValue X, bool Root = true) {
+ if (X.getOpcode() == ISD::OR)
+ return isOrXorXorTree(X.getOperand(0), false) &&
+ isOrXorXorTree(X.getOperand(1), false);
+ if (Root)
+ return false;
+ return X.getOpcode() == ISD::XOR;
+}
+
+/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp
+/// expansion.
+template <typename F>
+static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG,
+ EVT VecVT, EVT CmpVT, bool HasPT, F SToV) {
+ SDValue Op0 = X.getOperand(0);
+ SDValue Op1 = X.getOperand(1);
+ if (X.getOpcode() == ISD::OR) {
+ SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT, HasPT, SToV);
+ SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT, HasPT, SToV);
+ if (VecVT != CmpVT)
+ return DAG.getNode(ISD::OR, DL, CmpVT, A, B);
+ if (HasPT)
+ return DAG.getNode(ISD::OR, DL, VecVT, A, B);
+ return DAG.getNode(ISD::AND, DL, CmpVT, A, B);
+ }
+ if (X.getOpcode() == ISD::XOR) {
+ SDValue A = SToV(Op0);
+ SDValue B = SToV(Op1);
+ if (VecVT != CmpVT)
+ return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE);
+ if (HasPT)
+ return DAG.getNode(ISD::XOR, DL, VecVT, A, B);
+ return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
+ }
+ llvm_unreachable("Impossible");
+}
+
+/// Try to map a 128-bit or larger integer comparison to vector instructions
+/// before type legalization splits it up into chunks.
+static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y,
+ ISD::CondCode CC,
+ const SDLoc &DL,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");
+
+ // We're looking for an oversized integer equality comparison.
+ EVT OpVT = X.getValueType();
+ unsigned OpSize = OpVT.getSizeInBits();
+ if (!OpVT.isScalarInteger() || OpSize < 128)
+ return SDValue();
+
+ // Ignore a comparison with zero because that gets special treatment in
+ // EmitTest(). But make an exception for the special case of a pair of
+ // logically-combined vector-sized operands compared to zero. This pattern may
+ // be generated by the memcmp expansion pass with oversized integer compares
+ // (see PR33325).
+ bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X);
+ if (isNullConstant(Y) && !IsOrXorXorTreeCCZero)
+ return SDValue();
+
+ // Don't perform this combine if constructing the vector will be expensive.
+ auto IsVectorBitCastCheap = [](SDValue X) {
+ X = peekThroughBitcasts(X);
+ return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
+ X.getOpcode() == ISD::LOAD;
+ };
+ if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) &&
+ !IsOrXorXorTreeCCZero)
+ return SDValue();
+
+ // Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands.
+ // Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands.
+ // Otherwise use PCMPEQ (plus AND) and mask testing.
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+ if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
+ ((OpSize == 128 && Subtarget.hasSSE2()) ||
+ (OpSize == 256 && Subtarget.hasAVX()) ||
+ (OpSize == 512 && Subtarget.useAVX512Regs()))) {
+ bool HasPT = Subtarget.hasSSE41();
+
+ // PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened
+ // vector registers are essentially free. (Technically, widening registers
+ // prevents load folding, but the tradeoff is worth it.)
+ bool PreferKOT = Subtarget.preferMaskRegisters();
+ bool NeedZExt = PreferKOT && !Subtarget.hasVLX() && OpSize != 512;
+
+ EVT VecVT = MVT::v16i8;
+ EVT CmpVT = PreferKOT ? MVT::v16i1 : VecVT;
+ if (OpSize == 256) {
+ VecVT = MVT::v32i8;
+ CmpVT = PreferKOT ? MVT::v32i1 : VecVT;
+ }
+ EVT CastVT = VecVT;
+ bool NeedsAVX512FCast = false;
+ if (OpSize == 512 || NeedZExt) {
+ if (Subtarget.hasBWI()) {
+ VecVT = MVT::v64i8;
+ CmpVT = MVT::v64i1;
+ if (OpSize == 512)
+ CastVT = VecVT;
+ } else {
+ VecVT = MVT::v16i32;
+ CmpVT = MVT::v16i1;
+ CastVT = OpSize == 512 ? VecVT
+ : OpSize == 256 ? MVT::v8i32
+ : MVT::v4i32;
+ NeedsAVX512FCast = true;
+ }
+ }
+
+ auto ScalarToVector = [&](SDValue X) -> SDValue {
+ bool TmpZext = false;
+ EVT TmpCastVT = CastVT;
+ if (X.getOpcode() == ISD::ZERO_EXTEND) {
+ SDValue OrigX = X.getOperand(0);
+ unsigned OrigSize = OrigX.getScalarValueSizeInBits();
+ if (OrigSize < OpSize) {
+ if (OrigSize == 128) {
+ TmpCastVT = NeedsAVX512FCast ? MVT::v4i32 : MVT::v16i8;
+ X = OrigX;
+ TmpZext = true;
+ } else if (OrigSize == 256) {
+ TmpCastVT = NeedsAVX512FCast ? MVT::v8i32 : MVT::v32i8;
+ X = OrigX;
+ TmpZext = true;
+ }
+ }
+ }
+ X = DAG.getBitcast(TmpCastVT, X);
+ if (!NeedZExt && !TmpZext)
+ return X;
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT,
+ DAG.getConstant(0, DL, VecVT), X,
+ DAG.getVectorIdxConstant(0, DL));
+ };
+
+ SDValue Cmp;
+ if (IsOrXorXorTreeCCZero) {
+ // This is a bitwise-combined equality comparison of 2 pairs of vectors:
+ // setcc i128 (or (xor A, B), (xor C, D)), 0, eq|ne
+ // Use 2 vector equality compares and 'and' the results before doing a
+ // MOVMSK.
+ Cmp = emitOrXorXorTree(X, DL, DAG, VecVT, CmpVT, HasPT, ScalarToVector);
+ } else {
+ SDValue VecX = ScalarToVector(X);
+ SDValue VecY = ScalarToVector(Y);
+ if (VecVT != CmpVT) {
+ Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE);
+ } else if (HasPT) {
+ Cmp = DAG.getNode(ISD::XOR, DL, VecVT, VecX, VecY);
+ } else {
+ Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
+ }
+ }
+ // AVX512 should emit a setcc that will lower to kortest.
+ if (VecVT != CmpVT) {
+ EVT KRegVT = CmpVT == MVT::v64i1 ? MVT::i64
+ : CmpVT == MVT::v32i1 ? MVT::i32
+ : MVT::i16;
+ return DAG.getSetCC(DL, VT, DAG.getBitcast(KRegVT, Cmp),
+ DAG.getConstant(0, DL, KRegVT), CC);
+ }
+ if (HasPT) {
+ SDValue BCCmp =
+ DAG.getBitcast(OpSize == 256 ? MVT::v4i64 : MVT::v2i64, Cmp);
+ SDValue PT = DAG.getNode(X86ISD::PTEST, DL, MVT::i32, BCCmp, BCCmp);
+ X86::CondCode X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
+ SDValue X86SetCC = getSETCC(X86CC, PT, DL, DAG);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, X86SetCC.getValue(0));
+ }
+ // If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
+ // setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
+ // setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
+ assert(Cmp.getValueType() == MVT::v16i8 &&
+ "Non 128-bit vector on pre-SSE41 target");
+ SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp);
+ SDValue FFFFs = DAG.getConstant(0xFFFF, DL, MVT::i32);
+ return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC);
+ }
+
+ return SDValue();
+}
+
+/// Helper for matching BINOP(EXTRACTELT(X,0),BINOP(EXTRACTELT(X,1),...))
/// style scalarized (associative) reduction patterns. Partial reductions
/// are supported when the pointer SrcMask is non-null.
/// TODO - move this to SelectionDAG?
@@ -23906,21 +24704,31 @@ static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp,
return true;
}
-// Helper function for comparing all bits of a vector against zero.
-static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC,
- const APInt &Mask,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG, X86::CondCode &X86CC) {
- EVT VT = V.getValueType();
+// Helper function for comparing all bits of two vectors.
+static SDValue LowerVectorAllEqual(const SDLoc &DL, SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, const APInt &OriginalMask,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG, X86::CondCode &X86CC) {
+ EVT VT = LHS.getValueType();
unsigned ScalarSize = VT.getScalarSizeInBits();
- if (Mask.getBitWidth() != ScalarSize) {
+ if (OriginalMask.getBitWidth() != ScalarSize) {
assert(ScalarSize == 1 && "Element Mask vs Vector bitwidth mismatch");
return SDValue();
}
+ // Quit if not convertable to legal scalar or 128/256-bit vector.
+ if (!llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
+ return SDValue();
+
+ // FCMP may use ISD::SETNE when nnan - early out if we manage to get here.
+ if (VT.isFloatingPoint())
+ return SDValue();
+
assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode");
X86CC = (CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE);
+ APInt Mask = OriginalMask;
+
auto MaskBits = [&](SDValue Src) {
if (Mask.isAllOnes())
return Src;
@@ -23932,114 +24740,238 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC,
// For sub-128-bit vector, cast to (legal) integer and compare with zero.
if (VT.getSizeInBits() < 128) {
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
- if (!DAG.getTargetLoweringInfo().isTypeLegal(IntVT))
- return SDValue();
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(IntVT)) {
+ if (IntVT != MVT::i64)
+ return SDValue();
+ auto SplitLHS = DAG.SplitScalar(DAG.getBitcast(IntVT, MaskBits(LHS)), DL,
+ MVT::i32, MVT::i32);
+ auto SplitRHS = DAG.SplitScalar(DAG.getBitcast(IntVT, MaskBits(RHS)), DL,
+ MVT::i32, MVT::i32);
+ SDValue Lo =
+ DAG.getNode(ISD::XOR, DL, MVT::i32, SplitLHS.first, SplitRHS.first);
+ SDValue Hi =
+ DAG.getNode(ISD::XOR, DL, MVT::i32, SplitLHS.second, SplitRHS.second);
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32,
+ DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi),
+ DAG.getConstant(0, DL, MVT::i32));
+ }
return DAG.getNode(X86ISD::CMP, DL, MVT::i32,
- DAG.getBitcast(IntVT, MaskBits(V)),
- DAG.getConstant(0, DL, IntVT));
+ DAG.getBitcast(IntVT, MaskBits(LHS)),
+ DAG.getBitcast(IntVT, MaskBits(RHS)));
}
- // Quit if not splittable to 128/256-bit vector.
- if (!isPowerOf2_32(VT.getSizeInBits()))
+ // Without PTEST, a masked v2i64 or-reduction is not faster than
+ // scalarization.
+ bool UseKORTEST = Subtarget.useAVX512Regs();
+ bool UsePTEST = Subtarget.hasSSE41();
+ if (!UsePTEST && !Mask.isAllOnes() && ScalarSize > 32)
return SDValue();
- // Split down to 128/256-bit vector.
- unsigned TestSize = Subtarget.hasAVX() ? 256 : 128;
- while (VT.getSizeInBits() > TestSize) {
- auto Split = DAG.SplitVector(V, DL);
- VT = Split.first.getValueType();
- V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second);
+ // Split down to 128/256/512-bit vector.
+ unsigned TestSize = UseKORTEST ? 512 : (Subtarget.hasAVX() ? 256 : 128);
+
+ // If the input vector has vector elements wider than the target test size,
+ // then cast to <X x i64> so it will safely split.
+ if (ScalarSize > TestSize) {
+ if (!Mask.isAllOnes())
+ return SDValue();
+ VT = EVT::getVectorVT(*DAG.getContext(), MVT::i64, VT.getSizeInBits() / 64);
+ LHS = DAG.getBitcast(VT, LHS);
+ RHS = DAG.getBitcast(VT, RHS);
+ Mask = APInt::getAllOnes(64);
+ }
+
+ if (VT.getSizeInBits() > TestSize) {
+ KnownBits KnownRHS = DAG.computeKnownBits(RHS);
+ if (KnownRHS.isConstant() && KnownRHS.getConstant() == Mask) {
+ // If ICMP(AND(LHS,MASK),MASK) - reduce using AND splits.
+ while (VT.getSizeInBits() > TestSize) {
+ auto Split = DAG.SplitVector(LHS, DL);
+ VT = Split.first.getValueType();
+ LHS = DAG.getNode(ISD::AND, DL, VT, Split.first, Split.second);
+ }
+ RHS = DAG.getAllOnesConstant(DL, VT);
+ } else if (!UsePTEST && !KnownRHS.isZero()) {
+ // MOVMSK Special Case:
+ // ALLOF(CMPEQ(X,Y)) -> AND(CMPEQ(X[0],Y[0]),CMPEQ(X[1],Y[1]),....)
+ MVT SVT = ScalarSize >= 32 ? MVT::i32 : MVT::i8;
+ VT = MVT::getVectorVT(SVT, VT.getSizeInBits() / SVT.getSizeInBits());
+ LHS = DAG.getBitcast(VT, MaskBits(LHS));
+ RHS = DAG.getBitcast(VT, MaskBits(RHS));
+ EVT BoolVT = VT.changeVectorElementType(MVT::i1);
+ SDValue V = DAG.getSetCC(DL, BoolVT, LHS, RHS, ISD::SETEQ);
+ V = DAG.getSExtOrTrunc(V, DL, VT);
+ while (VT.getSizeInBits() > TestSize) {
+ auto Split = DAG.SplitVector(V, DL);
+ VT = Split.first.getValueType();
+ V = DAG.getNode(ISD::AND, DL, VT, Split.first, Split.second);
+ }
+ V = DAG.getNOT(DL, V, VT);
+ V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32, V,
+ DAG.getConstant(0, DL, MVT::i32));
+ } else {
+ // Convert to a ICMP_EQ(XOR(LHS,RHS),0) pattern.
+ SDValue V = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS);
+ while (VT.getSizeInBits() > TestSize) {
+ auto Split = DAG.SplitVector(V, DL);
+ VT = Split.first.getValueType();
+ V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second);
+ }
+ LHS = V;
+ RHS = DAG.getConstant(0, DL, VT);
+ }
+ }
+
+ if (UseKORTEST && VT.is512BitVector()) {
+ MVT TestVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);
+ MVT BoolVT = TestVT.changeVectorElementType(MVT::i1);
+ LHS = DAG.getBitcast(TestVT, MaskBits(LHS));
+ RHS = DAG.getBitcast(TestVT, MaskBits(RHS));
+ SDValue V = DAG.getSetCC(DL, BoolVT, LHS, RHS, ISD::SETNE);
+ return DAG.getNode(X86ISD::KORTEST, DL, MVT::i32, V, V);
}
- bool UsePTEST = Subtarget.hasSSE41();
if (UsePTEST) {
- MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
- V = DAG.getBitcast(TestVT, MaskBits(V));
+ MVT TestVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
+ LHS = DAG.getBitcast(TestVT, MaskBits(LHS));
+ RHS = DAG.getBitcast(TestVT, MaskBits(RHS));
+ SDValue V = DAG.getNode(ISD::XOR, DL, TestVT, LHS, RHS);
return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, V, V);
}
- // Without PTEST, a masked v2i64 or-reduction is not faster than
- // scalarization.
- if (!Mask.isAllOnes() && VT.getScalarSizeInBits() > 32)
- return SDValue();
-
- V = DAG.getBitcast(MVT::v16i8, MaskBits(V));
- V = DAG.getNode(X86ISD::PCMPEQ, DL, MVT::v16i8, V,
- getZeroVector(MVT::v16i8, Subtarget, DAG, DL));
+ assert(VT.getSizeInBits() == 128 && "Failure to split to 128-bits");
+ MVT MaskVT = ScalarSize >= 32 ? MVT::v4i32 : MVT::v16i8;
+ LHS = DAG.getBitcast(MaskVT, MaskBits(LHS));
+ RHS = DAG.getBitcast(MaskVT, MaskBits(RHS));
+ SDValue V = DAG.getNode(X86ISD::PCMPEQ, DL, MaskVT, LHS, RHS);
+ V = DAG.getNOT(DL, V, MaskVT);
V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
return DAG.getNode(X86ISD::CMP, DL, MVT::i32, V,
- DAG.getConstant(0xFFFF, DL, MVT::i32));
+ DAG.getConstant(0, DL, MVT::i32));
}
-// Check whether an OR'd reduction tree is PTEST-able, or if we can fallback to
-// CMP(MOVMSK(PCMPEQB(X,0))).
-static SDValue MatchVectorAllZeroTest(SDValue Op, ISD::CondCode CC,
- const SDLoc &DL,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG, SDValue &X86CC) {
+// Check whether an AND/OR'd reduction tree is PTEST-able, or if we can fallback
+// to CMP(MOVMSK(PCMPEQB(X,Y))).
+static SDValue MatchVectorAllEqualTest(SDValue LHS, SDValue RHS,
+ ISD::CondCode CC, const SDLoc &DL,
+ const X86Subtarget &Subtarget,
+ SelectionDAG &DAG,
+ X86::CondCode &X86CC) {
assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode");
+ bool CmpNull = isNullConstant(RHS);
+ bool CmpAllOnes = isAllOnesConstant(RHS);
+ if (!CmpNull && !CmpAllOnes)
+ return SDValue();
+
+ SDValue Op = LHS;
if (!Subtarget.hasSSE2() || !Op->hasOneUse())
return SDValue();
// Check whether we're masking/truncating an OR-reduction result, in which
// case track the masked bits.
+ // TODO: Add CmpAllOnes support.
APInt Mask = APInt::getAllOnes(Op.getScalarValueSizeInBits());
- switch (Op.getOpcode()) {
- case ISD::TRUNCATE: {
- SDValue Src = Op.getOperand(0);
- Mask = APInt::getLowBitsSet(Src.getScalarValueSizeInBits(),
- Op.getScalarValueSizeInBits());
- Op = Src;
- break;
- }
- case ISD::AND: {
- if (auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- Mask = Cst->getAPIntValue();
- Op = Op.getOperand(0);
+ if (CmpNull) {
+ switch (Op.getOpcode()) {
+ case ISD::TRUNCATE: {
+ SDValue Src = Op.getOperand(0);
+ Mask = APInt::getLowBitsSet(Src.getScalarValueSizeInBits(),
+ Op.getScalarValueSizeInBits());
+ Op = Src;
+ break;
+ }
+ case ISD::AND: {
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ Mask = Cst->getAPIntValue();
+ Op = Op.getOperand(0);
+ }
+ break;
+ }
}
- break;
- }
}
+ ISD::NodeType LogicOp = CmpNull ? ISD::OR : ISD::AND;
+
+ // Match icmp(or(extract(X,0),extract(X,1)),0) anyof reduction patterns.
+ // Match icmp(and(extract(X,0),extract(X,1)),-1) allof reduction patterns.
SmallVector<SDValue, 8> VecIns;
- if (Op.getOpcode() == ISD::OR && matchScalarReduction(Op, ISD::OR, VecIns)) {
+ if (Op.getOpcode() == LogicOp && matchScalarReduction(Op, LogicOp, VecIns)) {
EVT VT = VecIns[0].getValueType();
assert(llvm::all_of(VecIns,
[VT](SDValue V) { return VT == V.getValueType(); }) &&
"Reduction source vector mismatch");
- // Quit if less than 128-bits or not splittable to 128/256-bit vector.
- if (VT.getSizeInBits() < 128 || !isPowerOf2_32(VT.getSizeInBits()))
+ // Quit if not splittable to scalar/128/256/512-bit vector.
+ if (!llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
return SDValue();
- // If more than one full vector is evaluated, OR them first before PTEST.
+ // If more than one full vector is evaluated, AND/OR them first before
+ // PTEST.
for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1;
Slot += 2, e += 1) {
- // Each iteration will OR 2 nodes and append the result until there is
- // only 1 node left, i.e. the final OR'd value of all vectors.
+ // Each iteration will AND/OR 2 nodes and append the result until there is
+ // only 1 node left, i.e. the final value of all vectors.
SDValue LHS = VecIns[Slot];
SDValue RHS = VecIns[Slot + 1];
- VecIns.push_back(DAG.getNode(ISD::OR, DL, VT, LHS, RHS));
+ VecIns.push_back(DAG.getNode(LogicOp, DL, VT, LHS, RHS));
}
- X86::CondCode CCode;
- if (SDValue V = LowerVectorAllZero(DL, VecIns.back(), CC, Mask, Subtarget,
- DAG, CCode)) {
- X86CC = DAG.getTargetConstant(CCode, DL, MVT::i8);
- return V;
- }
+ return LowerVectorAllEqual(DL, VecIns.back(),
+ CmpNull ? DAG.getConstant(0, DL, VT)
+ : DAG.getAllOnesConstant(DL, VT),
+ CC, Mask, Subtarget, DAG, X86CC);
}
+ // Match icmp(reduce_or(X),0) anyof reduction patterns.
+ // Match icmp(reduce_and(X),-1) allof reduction patterns.
if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
ISD::NodeType BinOp;
if (SDValue Match =
- DAG.matchBinOpReduction(Op.getNode(), BinOp, {ISD::OR})) {
- X86::CondCode CCode;
- if (SDValue V =
- LowerVectorAllZero(DL, Match, CC, Mask, Subtarget, DAG, CCode)) {
- X86CC = DAG.getTargetConstant(CCode, DL, MVT::i8);
- return V;
+ DAG.matchBinOpReduction(Op.getNode(), BinOp, {LogicOp})) {
+ EVT MatchVT = Match.getValueType();
+ return LowerVectorAllEqual(DL, Match,
+ CmpNull ? DAG.getConstant(0, DL, MatchVT)
+ : DAG.getAllOnesConstant(DL, MatchVT),
+ CC, Mask, Subtarget, DAG, X86CC);
+ }
+ }
+
+ if (Mask.isAllOnes()) {
+ assert(!Op.getValueType().isVector() &&
+ "Illegal vector type for reduction pattern");
+ SDValue Src = peekThroughBitcasts(Op);
+ if (Src.getValueType().isFixedLengthVector() &&
+ Src.getValueType().getScalarType() == MVT::i1) {
+ // Match icmp(bitcast(icmp_ne(X,Y)),0) reduction patterns.
+ // Match icmp(bitcast(icmp_eq(X,Y)),-1) reduction patterns.
+ if (Src.getOpcode() == ISD::SETCC) {
+ SDValue LHS = Src.getOperand(0);
+ SDValue RHS = Src.getOperand(1);
+ EVT LHSVT = LHS.getValueType();
+ ISD::CondCode SrcCC = cast<CondCodeSDNode>(Src.getOperand(2))->get();
+ if (SrcCC == (CmpNull ? ISD::SETNE : ISD::SETEQ) &&
+ llvm::has_single_bit<uint32_t>(LHSVT.getSizeInBits())) {
+ APInt SrcMask = APInt::getAllOnes(LHSVT.getScalarSizeInBits());
+ return LowerVectorAllEqual(DL, LHS, RHS, CC, SrcMask, Subtarget, DAG,
+ X86CC);
+ }
+ }
+ // Match icmp(bitcast(vXi1 trunc(Y)),0) reduction patterns.
+ // Match icmp(bitcast(vXi1 trunc(Y)),-1) reduction patterns.
+ // Peek through truncation, mask the LSB and compare against zero/LSB.
+ if (Src.getOpcode() == ISD::TRUNCATE) {
+ SDValue Inner = Src.getOperand(0);
+ EVT InnerVT = Inner.getValueType();
+ if (llvm::has_single_bit<uint32_t>(InnerVT.getSizeInBits())) {
+ unsigned BW = InnerVT.getScalarSizeInBits();
+ APInt SrcMask = APInt(BW, 1);
+ APInt Cmp = CmpNull ? APInt::getZero(BW) : SrcMask;
+ return LowerVectorAllEqual(DL, Inner,
+ DAG.getConstant(Cmp, DL, InnerVT), CC,
+ SrcMask, Subtarget, DAG, X86CC);
+ }
}
}
}
@@ -24419,7 +25351,7 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
!(Subtarget.is64Bit() && VT == MVT::i64))
return SDValue();
- unsigned Lg2 = Divisor.countTrailingZeros();
+ unsigned Lg2 = Divisor.countr_zero();
// If the divisor is 2 or -2, the default expansion is better.
if (Lg2 == 1)
@@ -24640,9 +25572,10 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
/// incremented or decremented. If incrementing or decrementing would result in
/// unsigned overflow or underflow or this is not a simple vector constant,
/// return an empty value.
-static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) {
+static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc,
+ bool NSW) {
auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode());
- if (!BV)
+ if (!BV || !V.getValueType().isSimple())
return SDValue();
MVT VT = V.getSimpleValueType();
@@ -24659,6 +25592,9 @@ static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) {
const APInt &EltC = Elt->getAPIntValue();
if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isZero()))
return SDValue();
+ if (NSW && ((IsInc && EltC.isMaxSignedValue()) ||
+ (!IsInc && EltC.isMinSignedValue())))
+ return SDValue();
NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT));
}
@@ -24692,7 +25628,8 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
// Only do this pre-AVX since vpcmp* is no longer destructive.
if (Subtarget.hasAVX())
return SDValue();
- SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false);
+ SDValue ULEOp1 =
+ incDecVectorConstant(Op1, DAG, /*IsInc*/ false, /*NSW*/ false);
if (!ULEOp1)
return SDValue();
Op1 = ULEOp1;
@@ -24703,7 +25640,8 @@ static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT,
// This is beneficial because materializing a constant 0 for the PCMPEQ is
// probably cheaper than XOR+PCMPGT using 2 different vector constants:
// cmpgt (xor X, SignMaskC) CmpC --> cmpeq (usubsat (CmpC+1), X), 0
- SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true);
+ SDValue UGEOp1 =
+ incDecVectorConstant(Op1, DAG, /*IsInc*/ true, /*NSW*/ false);
if (!UGEOp1)
return SDValue();
Op1 = Op0;
@@ -24996,14 +25934,16 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// condition to avoid an invert.
if (Cond == ISD::SETUGT) {
// X > C --> X >= (C+1) --> X == umax(X, C+1)
- if (SDValue UGTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/true)) {
+ if (SDValue UGTOp1 =
+ incDecVectorConstant(Op1, DAG, /*IsInc*/ true, /*NSW*/ false)) {
Op1 = UGTOp1;
Cond = ISD::SETUGE;
}
}
if (Cond == ISD::SETULT) {
// X < C --> X <= (C-1) --> X == umin(X, C-1)
- if (SDValue ULTOp1 = incDecVectorConstant(Op1, DAG, /*IsInc*/false)) {
+ if (SDValue ULTOp1 =
+ incDecVectorConstant(Op1, DAG, /*IsInc*/ false, /*NSW*/ false)) {
Op1 = ULTOp1;
Cond = ISD::SETULE;
}
@@ -25159,9 +26099,7 @@ static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
SDValue &X86CC) {
- // Only support equality comparisons.
- if (CC != ISD::SETEQ && CC != ISD::SETNE)
- return SDValue();
+ assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode");
// Must be a bitcast from vXi1.
if (Op0.getOpcode() != ISD::BITCAST)
@@ -25216,63 +26154,65 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
ISD::CondCode CC, const SDLoc &dl,
SelectionDAG &DAG,
SDValue &X86CC) const {
- // Optimize to BT if possible.
- // Lower (X & (1 << N)) == 0 to BT(X, N).
- // Lower ((X >>u N) & 1) != 0 to BT(X, N).
- // Lower ((X >>s N) & 1) != 0 to BT(X, N).
- if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) &&
- (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ // Equality Combines.
+ if (CC == ISD::SETEQ || CC == ISD::SETNE) {
X86::CondCode X86CondCode;
- if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CondCode)) {
- X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8);
- return BT;
+
+ // Optimize to BT if possible.
+ // Lower (X & (1 << N)) == 0 to BT(X, N).
+ // Lower ((X >>u N) & 1) != 0 to BT(X, N).
+ // Lower ((X >>s N) & 1) != 0 to BT(X, N).
+ if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1)) {
+ if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CondCode)) {
+ X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8);
+ return BT;
+ }
}
- }
- // Try to use PTEST/PMOVMSKB for a tree ORs equality compared with 0.
- // TODO: We could do AND tree with all 1s as well by using the C flag.
- if (isNullConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE))
- if (SDValue CmpZ =
- MatchVectorAllZeroTest(Op0, CC, dl, Subtarget, DAG, X86CC))
+ // Try to use PTEST/PMOVMSKB for a tree AND/ORs equality compared with -1/0.
+ if (SDValue CmpZ = MatchVectorAllEqualTest(Op0, Op1, CC, dl, Subtarget, DAG,
+ X86CondCode)) {
+ X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8);
return CmpZ;
+ }
- // Try to lower using KORTEST or KTEST.
- if (SDValue Test = EmitAVX512Test(Op0, Op1, CC, dl, DAG, Subtarget, X86CC))
- return Test;
+ // Try to lower using KORTEST or KTEST.
+ if (SDValue Test = EmitAVX512Test(Op0, Op1, CC, dl, DAG, Subtarget, X86CC))
+ return Test;
- // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms of
- // these.
- if ((isOneConstant(Op1) || isNullConstant(Op1)) &&
- (CC == ISD::SETEQ || CC == ISD::SETNE)) {
- // If the input is a setcc, then reuse the input setcc or use a new one with
- // the inverted condition.
- if (Op0.getOpcode() == X86ISD::SETCC) {
- bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1);
+ // Look for X == 0, X == 1, X != 0, or X != 1. We can simplify some forms
+ // of these.
+ if (isOneConstant(Op1) || isNullConstant(Op1)) {
+ // If the input is a setcc, then reuse the input setcc or use a new one
+ // with the inverted condition.
+ if (Op0.getOpcode() == X86ISD::SETCC) {
+ bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1);
- X86CC = Op0.getOperand(0);
- if (Invert) {
- X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
- CCode = X86::GetOppositeBranchCondition(CCode);
- X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8);
- }
+ X86CC = Op0.getOperand(0);
+ if (Invert) {
+ X86CondCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ X86CondCode = X86::GetOppositeBranchCondition(X86CondCode);
+ X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8);
+ }
- return Op0.getOperand(1);
+ return Op0.getOperand(1);
+ }
}
- }
- // Try to use the carry flag from the add in place of an separate CMP for:
- // (seteq (add X, -1), -1). Similar for setne.
- if (isAllOnesConstant(Op1) && Op0.getOpcode() == ISD::ADD &&
- Op0.getOperand(1) == Op1 && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
- if (isProfitableToUseFlagOp(Op0)) {
- SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
+ // Try to use the carry flag from the add in place of an separate CMP for:
+ // (seteq (add X, -1), -1). Similar for setne.
+ if (isAllOnesConstant(Op1) && Op0.getOpcode() == ISD::ADD &&
+ Op0.getOperand(1) == Op1) {
+ if (isProfitableToUseFlagOp(Op0)) {
+ SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32);
- SDValue New = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(0),
- Op0.getOperand(1));
- DAG.ReplaceAllUsesOfValueWith(SDValue(Op0.getNode(), 0), New);
- X86::CondCode CCode = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
- X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8);
- return SDValue(New.getNode(), 1);
+ SDValue New = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(0),
+ Op0.getOperand(1));
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op0.getNode(), 0), New);
+ X86CondCode = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+ X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8);
+ return SDValue(New.getNode(), 1);
+ }
}
}
@@ -25794,7 +26734,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// X86ISD::CMOV means set the result (which is operand 1) to the RHS if
// condition is true.
SDValue Ops[] = { Op2, Op1, CC, Cond };
- return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops);
+ return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops, Op->getFlags());
}
static SDValue LowerSIGN_EXTEND_Mask(SDValue Op,
@@ -25941,6 +26881,19 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
// We should only get here for sign extend.
assert(Opc == ISD::SIGN_EXTEND_VECTOR_INREG && "Unexpected opcode!");
assert(VT.is128BitVector() && InVT.is128BitVector() && "Unexpected VTs");
+ unsigned InNumElts = InVT.getVectorNumElements();
+
+ // If the source elements are already all-signbits, we don't need to extend,
+ // just splat the elements.
+ APInt DemandedElts = APInt::getLowBitsSet(InNumElts, NumElts);
+ if (DAG.ComputeNumSignBits(In, DemandedElts) == InVT.getScalarSizeInBits()) {
+ unsigned Scale = InNumElts / NumElts;
+ SmallVector<int, 16> ShuffleMask;
+ for (unsigned I = 0; I != NumElts; ++I)
+ ShuffleMask.append(Scale, I);
+ return DAG.getBitcast(VT,
+ DAG.getVectorShuffle(InVT, dl, In, In, ShuffleMask));
+ }
// pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.
SDValue Curr = In;
@@ -25953,8 +26906,6 @@ static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
unsigned DestWidth = DestVT.getScalarSizeInBits();
unsigned Scale = DestWidth / InSVT.getSizeInBits();
-
- unsigned InNumElts = InVT.getVectorNumElements();
unsigned DestElts = DestVT.getVectorNumElements();
// Build a shuffle mask that takes each input element and places it in the
@@ -26145,9 +27096,7 @@ static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget,
if (StoreVT.is256BitVector() ||
((StoreVT == MVT::v32i16 || StoreVT == MVT::v64i8) &&
!Subtarget.hasBWI())) {
- SmallVector<SDValue, 4> CatOps;
- if (StoredVal.hasOneUse() &&
- collectConcatOps(StoredVal.getNode(), CatOps, DAG))
+ if (StoredVal.hasOneUse() && isFreeToSplitVector(StoredVal.getNode(), DAG))
return splitVectorStore(St, DAG);
return SDValue();
}
@@ -26782,14 +27731,9 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
// In case 32bit mode, bitcast i64 is illegal, extend/split it.
SDValue Lo, Hi;
- Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
- DAG.getConstant(0, dl, MVT::i32));
- Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask,
- DAG.getConstant(1, dl, MVT::i32));
-
+ std::tie(Lo, Hi) = DAG.SplitScalar(Mask, dl, MVT::i32, MVT::i32);
Lo = DAG.getBitcast(MVT::v32i1, Lo);
Hi = DAG.getBitcast(MVT::v32i1, Hi);
-
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
} else {
MVT BitcastVT = MVT::getVectorVT(MVT::i1,
@@ -28195,25 +29139,25 @@ static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) {
/// Emit Truncating Store with signed or unsigned saturation.
static SDValue
-EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val,
+EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &DL, SDValue Val,
SDValue Ptr, EVT MemVT, MachineMemOperand *MMO,
SelectionDAG &DAG) {
SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Undef = DAG.getUNDEF(Ptr.getValueType());
SDValue Ops[] = { Chain, Val, Ptr, Undef };
unsigned Opc = SignedSat ? X86ISD::VTRUNCSTORES : X86ISD::VTRUNCSTOREUS;
- return DAG.getMemIntrinsicNode(Opc, Dl, VTs, Ops, MemVT, MMO);
+ return DAG.getMemIntrinsicNode(Opc, DL, VTs, Ops, MemVT, MMO);
}
/// Emit Masked Truncating Store with signed or unsigned saturation.
-static SDValue
-EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
+static SDValue EmitMaskedTruncSStore(bool SignedSat, SDValue Chain,
+ const SDLoc &DL,
SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT,
MachineMemOperand *MMO, SelectionDAG &DAG) {
SDVTList VTs = DAG.getVTList(MVT::Other);
SDValue Ops[] = { Chain, Val, Ptr, Mask };
unsigned Opc = SignedSat ? X86ISD::VMTRUNCSTORES : X86ISD::VMTRUNCSTOREUS;
- return DAG.getMemIntrinsicNode(Opc, Dl, VTs, Ops, MemVT, MMO);
+ return DAG.getMemIntrinsicNode(Opc, DL, VTs, Ops, MemVT, MMO);
}
static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
@@ -28233,7 +29177,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
SDValue Chain = Op->getOperand(0);
SDValue CopyRBP = DAG.getCopyFromReg(Chain, dl, X86::RBP, MVT::i64);
SDValue Result =
- SDValue(DAG.getMachineNode(X86::SUB64ri8, dl, MVT::i64, CopyRBP,
+ SDValue(DAG.getMachineNode(X86::SUB64ri32, dl, MVT::i64, CopyRBP,
DAG.getTargetConstant(8, dl, MVT::i32)),
0);
// Return { result, chain }.
@@ -29229,6 +30173,122 @@ SDValue X86TargetLowering::LowerSET_ROUNDING(SDValue Op,
return Chain;
}
+const unsigned X87StateSize = 28;
+const unsigned FPStateSize = 32;
+[[maybe_unused]] const unsigned FPStateSizeInBits = FPStateSize * 8;
+
+SDValue X86TargetLowering::LowerGET_FPENV_MEM(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDLoc DL(Op);
+ SDValue Chain = Op->getOperand(0);
+ SDValue Ptr = Op->getOperand(1);
+ auto *Node = cast<FPStateAccessSDNode>(Op);
+ EVT MemVT = Node->getMemoryVT();
+ assert(MemVT.getSizeInBits() == FPStateSizeInBits);
+ MachineMemOperand *MMO = cast<FPStateAccessSDNode>(Op)->getMemOperand();
+
+ // Get x87 state, if it presents.
+ if (Subtarget.hasX87()) {
+ Chain =
+ DAG.getMemIntrinsicNode(X86ISD::FNSTENVm, DL, DAG.getVTList(MVT::Other),
+ {Chain, Ptr}, MemVT, MMO);
+
+ // FNSTENV changes the exception mask, so load back the stored environment.
+ MachineMemOperand::Flags NewFlags =
+ MachineMemOperand::MOLoad |
+ (MMO->getFlags() & ~MachineMemOperand::MOStore);
+ MMO = MF.getMachineMemOperand(MMO, NewFlags);
+ Chain =
+ DAG.getMemIntrinsicNode(X86ISD::FLDENVm, DL, DAG.getVTList(MVT::Other),
+ {Chain, Ptr}, MemVT, MMO);
+ }
+
+ // If target supports SSE, get MXCSR as well.
+ if (Subtarget.hasSSE1()) {
+ // Get pointer to the MXCSR location in memory.
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue MXCSRAddr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr,
+ DAG.getConstant(X87StateSize, DL, PtrVT));
+ // Store MXCSR into memory.
+ Chain = DAG.getNode(
+ ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain,
+ DAG.getTargetConstant(Intrinsic::x86_sse_stmxcsr, DL, MVT::i32),
+ MXCSRAddr);
+ }
+
+ return Chain;
+}
+
+static SDValue createSetFPEnvNodes(SDValue Ptr, SDValue Chain, SDLoc DL,
+ EVT MemVT, MachineMemOperand *MMO,
+ SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ // Set x87 state, if it presents.
+ if (Subtarget.hasX87())
+ Chain =
+ DAG.getMemIntrinsicNode(X86ISD::FLDENVm, DL, DAG.getVTList(MVT::Other),
+ {Chain, Ptr}, MemVT, MMO);
+ // If target supports SSE, set MXCSR as well.
+ if (Subtarget.hasSSE1()) {
+ // Get pointer to the MXCSR location in memory.
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue MXCSRAddr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr,
+ DAG.getConstant(X87StateSize, DL, PtrVT));
+ // Load MXCSR from memory.
+ Chain = DAG.getNode(
+ ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain,
+ DAG.getTargetConstant(Intrinsic::x86_sse_ldmxcsr, DL, MVT::i32),
+ MXCSRAddr);
+ }
+ return Chain;
+}
+
+SDValue X86TargetLowering::LowerSET_FPENV_MEM(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue Chain = Op->getOperand(0);
+ SDValue Ptr = Op->getOperand(1);
+ auto *Node = cast<FPStateAccessSDNode>(Op);
+ EVT MemVT = Node->getMemoryVT();
+ assert(MemVT.getSizeInBits() == FPStateSizeInBits);
+ MachineMemOperand *MMO = cast<FPStateAccessSDNode>(Op)->getMemOperand();
+ return createSetFPEnvNodes(Ptr, Chain, DL, MemVT, MMO, DAG, Subtarget);
+}
+
+SDValue X86TargetLowering::LowerRESET_FPENV(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SDLoc DL(Op);
+ SDValue Chain = Op.getNode()->getOperand(0);
+
+ IntegerType *ItemTy = Type::getInt32Ty(*DAG.getContext());
+ ArrayType *FPEnvTy = ArrayType::get(ItemTy, 8);
+ SmallVector<Constant *, 8> FPEnvVals;
+
+ // x87 FPU Control Word: mask all floating-point exceptions, sets rounding to
+ // nearest. FPU precision is set to 53 bits on Windows and 64 bits otherwise
+ // for compatibility with glibc.
+ unsigned X87CW = Subtarget.isTargetWindowsMSVC() ? 0x27F : 0x37F;
+ FPEnvVals.push_back(ConstantInt::get(ItemTy, X87CW));
+ Constant *Zero = ConstantInt::get(ItemTy, 0);
+ for (unsigned I = 0; I < 6; ++I)
+ FPEnvVals.push_back(Zero);
+
+ // MXCSR: mask all floating-point exceptions, sets rounding to nearest, clear
+ // all exceptions, sets DAZ and FTZ to 0.
+ FPEnvVals.push_back(ConstantInt::get(ItemTy, 0x1F80));
+ Constant *FPEnvBits = ConstantArray::get(FPEnvTy, FPEnvVals);
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
+ SDValue Env = DAG.getConstantPool(FPEnvBits, PtrVT);
+ MachinePointerInfo MPI =
+ MachinePointerInfo::getConstantPool(DAG.getMachineFunction());
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MPI, MachineMemOperand::MOStore, X87StateSize, Align(4));
+
+ return createSetFPEnvNodes(Env, Chain, DL, MVT::i32, MMO, DAG, Subtarget);
+}
+
/// Lower a vector CTLZ using native supported vector CTLZ instruction.
//
// i8/i16 vector implemented using dword LZCNT vector instruction
@@ -29585,6 +30645,203 @@ static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget,
return SDValue();
}
+static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ assert((Op.getOpcode() == ISD::FMAXIMUM || Op.getOpcode() == ISD::FMINIMUM) &&
+ "Expected FMAXIMUM or FMINIMUM opcode");
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT VT = Op.getValueType();
+ SDValue X = Op.getOperand(0);
+ SDValue Y = Op.getOperand(1);
+ SDLoc DL(Op);
+ uint64_t SizeInBits = VT.getScalarSizeInBits();
+ APInt PreferredZero = APInt::getZero(SizeInBits);
+ APInt OppositeZero = PreferredZero;
+ EVT IVT = VT.changeTypeToInteger();
+ X86ISD::NodeType MinMaxOp;
+ if (Op.getOpcode() == ISD::FMAXIMUM) {
+ MinMaxOp = X86ISD::FMAX;
+ OppositeZero.setSignBit();
+ } else {
+ PreferredZero.setSignBit();
+ MinMaxOp = X86ISD::FMIN;
+ }
+ EVT SetCCType =
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+
+ // The tables below show the expected result of Max in cases of NaN and
+ // signed zeros.
+ //
+ // Y Y
+ // Num xNaN +0 -0
+ // --------------- ---------------
+ // Num | Max | Y | +0 | +0 | +0 |
+ // X --------------- X ---------------
+ // xNaN | X | X/Y | -0 | +0 | -0 |
+ // --------------- ---------------
+ //
+ // It is achieved by means of FMAX/FMIN with preliminary checks and operand
+ // reordering.
+ //
+ // We check if any of operands is NaN and return NaN. Then we check if any of
+ // operands is zero or negative zero (for fmaximum and fminimum respectively)
+ // to ensure the correct zero is returned.
+ auto MatchesZero = [](SDValue Op, APInt Zero) {
+ Op = peekThroughBitcasts(Op);
+ if (auto *CstOp = dyn_cast<ConstantFPSDNode>(Op))
+ return CstOp->getValueAPF().bitcastToAPInt() == Zero;
+ if (auto *CstOp = dyn_cast<ConstantSDNode>(Op))
+ return CstOp->getAPIntValue() == Zero;
+ if (Op->getOpcode() == ISD::BUILD_VECTOR ||
+ Op->getOpcode() == ISD::SPLAT_VECTOR) {
+ for (const SDValue &OpVal : Op->op_values()) {
+ if (OpVal.isUndef())
+ continue;
+ auto *CstOp = dyn_cast<ConstantFPSDNode>(OpVal);
+ if (!CstOp)
+ return false;
+ if (!CstOp->getValueAPF().isZero())
+ continue;
+ if (CstOp->getValueAPF().bitcastToAPInt() != Zero)
+ return false;
+ }
+ return true;
+ }
+ return false;
+ };
+
+ bool IsXNeverNaN = DAG.isKnownNeverNaN(X);
+ bool IsYNeverNaN = DAG.isKnownNeverNaN(Y);
+ bool IgnoreSignedZero = DAG.getTarget().Options.NoSignedZerosFPMath ||
+ Op->getFlags().hasNoSignedZeros() ||
+ DAG.isKnownNeverZeroFloat(X) ||
+ DAG.isKnownNeverZeroFloat(Y);
+ SDValue NewX, NewY;
+ if (IgnoreSignedZero || MatchesZero(Y, PreferredZero) ||
+ MatchesZero(X, OppositeZero)) {
+ // Operands are already in right order or order does not matter.
+ NewX = X;
+ NewY = Y;
+ } else if (MatchesZero(X, PreferredZero) || MatchesZero(Y, OppositeZero)) {
+ NewX = Y;
+ NewY = X;
+ } else if (!VT.isVector() && (VT == MVT::f16 || Subtarget.hasDQI()) &&
+ (Op->getFlags().hasNoNaNs() || IsXNeverNaN || IsYNeverNaN)) {
+ if (IsXNeverNaN)
+ std::swap(X, Y);
+ // VFPCLASSS consumes a vector type. So provide a minimal one corresponded
+ // xmm register.
+ MVT VectorType = MVT::getVectorVT(VT.getSimpleVT(), 128 / SizeInBits);
+ SDValue VX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VectorType, X);
+ // Bits of classes:
+ // Bits Imm8[0] Imm8[1] Imm8[2] Imm8[3] Imm8[4] Imm8[5] Imm8[6] Imm8[7]
+ // Class QNAN PosZero NegZero PosINF NegINF Denormal Negative SNAN
+ SDValue Imm = DAG.getTargetConstant(MinMaxOp == X86ISD::FMAX ? 0b11 : 0b101,
+ DL, MVT::i32);
+ SDValue IsNanZero = DAG.getNode(X86ISD::VFPCLASSS, DL, MVT::v1i1, VX, Imm);
+ SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
+ DAG.getConstant(0, DL, MVT::v8i1), IsNanZero,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue NeedSwap = DAG.getBitcast(MVT::i8, Ins);
+ NewX = DAG.getSelect(DL, VT, NeedSwap, Y, X);
+ NewY = DAG.getSelect(DL, VT, NeedSwap, X, Y);
+ return DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags());
+ } else {
+ SDValue IsXSigned;
+ if (Subtarget.is64Bit() || VT != MVT::f64) {
+ SDValue XInt = DAG.getNode(ISD::BITCAST, DL, IVT, X);
+ SDValue ZeroCst = DAG.getConstant(0, DL, IVT);
+ IsXSigned = DAG.getSetCC(DL, SetCCType, XInt, ZeroCst, ISD::SETLT);
+ } else {
+ assert(VT == MVT::f64);
+ SDValue Ins = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2f64,
+ DAG.getConstantFP(0, DL, MVT::v2f64), X,
+ DAG.getIntPtrConstant(0, DL));
+ SDValue VX = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, Ins);
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VX,
+ DAG.getIntPtrConstant(1, DL));
+ Hi = DAG.getBitcast(MVT::i32, Hi);
+ SDValue ZeroCst = DAG.getConstant(0, DL, MVT::i32);
+ EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), MVT::i32);
+ IsXSigned = DAG.getSetCC(DL, SetCCType, Hi, ZeroCst, ISD::SETLT);
+ }
+ if (MinMaxOp == X86ISD::FMAX) {
+ NewX = DAG.getSelect(DL, VT, IsXSigned, X, Y);
+ NewY = DAG.getSelect(DL, VT, IsXSigned, Y, X);
+ } else {
+ NewX = DAG.getSelect(DL, VT, IsXSigned, Y, X);
+ NewY = DAG.getSelect(DL, VT, IsXSigned, X, Y);
+ }
+ }
+
+ bool IgnoreNaN = DAG.getTarget().Options.NoNaNsFPMath ||
+ Op->getFlags().hasNoNaNs() || (IsXNeverNaN && IsYNeverNaN);
+
+ // If we did no ordering operands for signed zero handling and we need
+ // to process NaN and we know that the second operand is not NaN then put
+ // it in first operand and we will not need to post handle NaN after max/min.
+ if (IgnoreSignedZero && !IgnoreNaN && DAG.isKnownNeverNaN(NewY))
+ std::swap(NewX, NewY);
+
+ SDValue MinMax = DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags());
+
+ if (IgnoreNaN || DAG.isKnownNeverNaN(NewX))
+ return MinMax;
+
+ SDValue IsNaN = DAG.getSetCC(DL, SetCCType, NewX, NewX, ISD::SETUO);
+ return DAG.getSelect(DL, VT, IsNaN, NewX, MinMax);
+}
+
+static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+
+ // For AVX1 cases, split to use legal ops.
+ if (VT.is256BitVector() && !Subtarget.hasInt256())
+ return splitVectorIntBinary(Op, DAG);
+
+ if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.useBWIRegs())
+ return splitVectorIntBinary(Op, DAG);
+
+ SDLoc dl(Op);
+ bool IsSigned = Op.getOpcode() == ISD::ABDS;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // TODO: Move to TargetLowering expandABD() once we have ABD promotion.
+ if (VT.isScalarInteger()) {
+ unsigned WideBits = std::max<unsigned>(2 * VT.getScalarSizeInBits(), 32u);
+ MVT WideVT = MVT::getIntegerVT(WideBits);
+ if (TLI.isTypeLegal(WideVT)) {
+ // abds(lhs, rhs) -> trunc(abs(sub(sext(lhs), sext(rhs))))
+ // abdu(lhs, rhs) -> trunc(abs(sub(zext(lhs), zext(rhs))))
+ unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ SDValue LHS = DAG.getFreeze(Op.getOperand(0));
+ SDValue RHS = DAG.getFreeze(Op.getOperand(1));
+ LHS = DAG.getNode(ExtOpc, dl, WideVT, LHS);
+ RHS = DAG.getNode(ExtOpc, dl, WideVT, RHS);
+ SDValue Diff = DAG.getNode(ISD::SUB, dl, WideVT, LHS, RHS);
+ SDValue AbsDiff = DAG.getNode(ISD::ABS, dl, WideVT, Diff);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, AbsDiff);
+ }
+ }
+
+ // TODO: Move to TargetLowering expandABD().
+ if (!Subtarget.hasSSE41() &&
+ ((IsSigned && VT == MVT::v16i8) || VT == MVT::v4i32)) {
+ SDValue LHS = DAG.getFreeze(Op.getOperand(0));
+ SDValue RHS = DAG.getFreeze(Op.getOperand(1));
+ ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
+ SDValue Cmp = DAG.getSetCC(dl, VT, LHS, RHS, CC);
+ SDValue Diff0 = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
+ SDValue Diff1 = DAG.getNode(ISD::SUB, dl, VT, RHS, LHS);
+ return getBitSelect(dl, VT, Diff0, Diff1, Cmp, DAG);
+ }
+
+ // Default to expand.
+ return SDValue();
+}
+
static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
@@ -30181,8 +31438,11 @@ SDValue X86TargetLowering::LowerWin64_INT128_TO_FP(SDValue Op,
// Return true if the required (according to Opcode) shift-imm form is natively
// supported by the Subtarget
-static bool supportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
+static bool supportedVectorShiftWithImm(EVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
+ if (!VT.isSimple())
+ return false;
+
if (!(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))
return false;
@@ -30204,15 +31464,18 @@ static bool supportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget,
// The shift amount is a variable, but it is the same for all vector lanes.
// These instructions are defined together with shift-immediate.
static
-bool supportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget,
+bool supportedVectorShiftWithBaseAmnt(EVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
return supportedVectorShiftWithImm(VT, Subtarget, Opcode);
}
// Return true if the required (according to Opcode) variable-shift form is
// natively supported by the Subtarget
-static bool supportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget,
+static bool supportedVectorVarShift(EVT VT, const X86Subtarget &Subtarget,
unsigned Opcode) {
+ if (!VT.isSimple())
+ return false;
+
if (!(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))
return false;
@@ -30752,8 +32015,8 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
"Constant build vector expected");
if (VT == MVT::v16i8 && Subtarget.hasInt256()) {
- R = Opc == ISD::SRA ? DAG.getSExtOrTrunc(R, dl, ExVT)
- : DAG.getZExtOrTrunc(R, dl, ExVT);
+ bool IsSigned = Opc == ISD::SRA;
+ R = DAG.getExtOrTrunc(IsSigned, R, dl, ExVT);
R = DAG.getNode(ISD::MUL, dl, ExVT, R, Amt);
R = DAG.getNode(X86ISD::VSRLI, dl, ExVT, R, Cst8);
return DAG.getZExtOrTrunc(R, dl, VT);
@@ -31020,8 +32283,19 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
// fshl(x,y,z) -> unpack(y,x) << (z & (bw-1))) >> bw.
// fshr(x,y,z) -> unpack(y,x) >> (z & (bw-1))).
- if (IsCstSplat)
- return SDValue();
+ if (IsCstSplat) {
+ // TODO: Can't use generic expansion as UNDEF amt elements can be
+ // converted to other values when folded to shift amounts, losing the
+ // splat.
+ uint64_t ShiftAmt = APIntShiftAmt.urem(EltSizeInBits);
+ uint64_t ShXAmt = IsFSHR ? (EltSizeInBits - ShiftAmt) : ShiftAmt;
+ uint64_t ShYAmt = IsFSHR ? ShiftAmt : (EltSizeInBits - ShiftAmt);
+ SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, Op0,
+ DAG.getShiftAmountConstant(ShXAmt, VT, DL));
+ SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Op1,
+ DAG.getShiftAmountConstant(ShYAmt, VT, DL));
+ return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
+ }
SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);
SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
@@ -31227,8 +32501,18 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
}
// Rotate by an uniform constant - expand back to shifts.
- if (IsCstSplat)
- return SDValue();
+ // TODO: Can't use generic expansion as UNDEF amt elements can be converted
+ // to other values when folded to shift amounts, losing the splat.
+ if (IsCstSplat) {
+ uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits);
+ uint64_t ShlAmt = IsROTL ? RotAmt : (EltSizeInBits - RotAmt);
+ uint64_t SrlAmt = IsROTL ? (EltSizeInBits - RotAmt) : RotAmt;
+ SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, R,
+ DAG.getShiftAmountConstant(ShlAmt, VT, DL));
+ SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, R,
+ DAG.getShiftAmountConstant(SrlAmt, VT, DL));
+ return DAG.getNode(ISD::OR, DL, VT, Shl, Srl);
+ }
// Split 512-bit integers on non 512-bit BWI targets.
if (VT.is512BitVector() && !Subtarget.useBWIRegs())
@@ -31268,14 +32552,31 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
}
}
+ bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
+ unsigned ShiftOpc = IsROTL ? ISD::SHL : ISD::SRL;
+
+ // Attempt to fold as unpack(x,x) << zext(y):
+ // rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.
+ // rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).
+ // Const vXi16/vXi32 are excluded in favor of MUL-based lowering.
+ if (!(ConstantAmt && EltSizeInBits != 8) &&
+ !supportedVectorVarShift(VT, Subtarget, ShiftOpc) &&
+ (ConstantAmt || supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc))) {
+ SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
+ SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
+ SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z));
+ SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z));
+ SDValue Lo = DAG.getNode(ShiftOpc, DL, ExtVT, RLo, ALo);
+ SDValue Hi = DAG.getNode(ShiftOpc, DL, ExtVT, RHi, AHi);
+ return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL);
+ }
+
// v16i8/v32i8/v64i8: Split rotation into rot4/rot2/rot1 stages and select by
// the amount bit.
// TODO: We're doing nothing here that we couldn't do for funnel shifts.
if (EltSizeInBits == 8) {
- bool IsConstAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
MVT WideVT =
MVT::getVectorVT(Subtarget.hasBWI() ? MVT::i16 : MVT::i32, NumElts);
- unsigned ShiftOpc = IsROTL ? ISD::SHL : ISD::SRL;
// Attempt to fold as:
// rotl(x,y) -> (((aext(x) << bw) | zext(x)) << (y & (bw-1))) >> bw.
@@ -31283,7 +32584,7 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
if (supportedVectorVarShift(WideVT, Subtarget, ShiftOpc) &&
supportedVectorShiftWithImm(WideVT, Subtarget, ShiftOpc)) {
// If we're rotating by constant, just use default promotion.
- if (IsConstAmt)
+ if (ConstantAmt)
return SDValue();
// See if we can perform this by widening to vXi16 or vXi32.
R = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, R);
@@ -31297,21 +32598,6 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
return DAG.getNode(ISD::TRUNCATE, DL, VT, R);
}
- // Attempt to fold as unpack(x,x) << zext(y):
- // rotl(x,y) -> (unpack(x,x) << (y & (bw-1))) >> bw.
- // rotr(x,y) -> (unpack(x,x) >> (y & (bw-1))).
- if (IsConstAmt || supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) {
- // See if we can perform this by unpacking to lo/hi vXi16.
- SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, R, R));
- SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, R, R));
- SDValue ALo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, AmtMod, Z));
- SDValue AHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, AmtMod, Z));
- SDValue Lo = DAG.getNode(ShiftOpc, DL, ExtVT, RLo, ALo);
- SDValue Hi = DAG.getNode(ShiftOpc, DL, ExtVT, RHi, AHi);
- return getPack(DAG, Subtarget, DL, VT, Lo, Hi, IsROTL);
- }
- assert((VT == MVT::v16i8 || VT == MVT::v32i8) && "Unsupported vXi8 type");
-
// We don't need ModuloAmt here as we just peek at individual bits.
auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) {
if (Subtarget.hasSSE41()) {
@@ -31377,7 +32663,6 @@ static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget,
}
bool IsSplatAmt = DAG.isSplatValue(Amt);
- bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode());
bool LegalVarShifts = supportedVectorVarShift(VT, Subtarget, ISD::SHL) &&
supportedVectorVarShift(VT, Subtarget, ISD::SRL);
@@ -31563,11 +32848,19 @@ static std::pair<Value *, BitTestKind> FindSingleBitChange(Value *V) {
TargetLowering::AtomicExpansionKind
X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const {
+ using namespace llvm::PatternMatch;
// If the atomicrmw's result isn't actually used, we can just add a "lock"
// prefix to a normal instruction for these operations.
if (AI->use_empty())
return AtomicExpansionKind::None;
+ if (AI->getOperation() == AtomicRMWInst::Xor) {
+ // A ^ SignBit -> A + SignBit. This allows us to use `xadd` which is
+ // preferable to both `cmpxchg` and `btc`.
+ if (match(AI->getOperand(1), m_SignMask()))
+ return AtomicExpansionKind::None;
+ }
+
// If the atomicrmw's result is used by a single bit AND, we may use
// bts/btr/btc instruction for these operations.
// Note: InstCombinePass can cause a de-optimization here. It replaces the
@@ -31631,6 +32924,7 @@ X86TargetLowering::shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const {
void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
IRBuilder<> Builder(AI);
+ Builder.CollectMetadataToCopy(AI, {LLVMContext::MD_pcsections});
Intrinsic::ID IID_C = Intrinsic::not_intrinsic;
Intrinsic::ID IID_I = Intrinsic::not_intrinsic;
switch (AI->getOperation()) {
@@ -31663,7 +32957,7 @@ void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_C, AI->getType());
- unsigned Imm = countTrailingZeros(C->getZExtValue());
+ unsigned Imm = llvm::countr_zero(C->getZExtValue());
Result = Builder.CreateCall(BitTest, {Addr, Builder.getInt8(Imm)});
} else {
BitTest = Intrinsic::getDeclaration(AI->getModule(), IID_I, AI->getType());
@@ -31771,6 +33065,7 @@ static bool shouldExpandCmpArithRMWInIR(AtomicRMWInst *AI) {
void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
AtomicRMWInst *AI) const {
IRBuilder<> Builder(AI);
+ Builder.CollectMetadataToCopy(AI, {LLVMContext::MD_pcsections});
Instruction *TempI = nullptr;
LLVMContext &Ctx = AI->getContext();
ICmpInst *ICI = dyn_cast<ICmpInst>(AI->user_back());
@@ -31896,6 +33191,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
return nullptr;
IRBuilder<> Builder(AI);
+ Builder.CollectMetadataToCopy(AI, {LLVMContext::MD_pcsections});
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
auto SSID = AI->getSyncScopeID();
// We must restrict the ordering to avoid generating loads with Release or
@@ -32122,11 +33418,9 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget,
assert(!Subtarget.is64Bit() && "Expected 32-bit mode");
assert(Subtarget.hasBWI() && "Expected BWI target");
SDLoc dl(Op);
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src,
- DAG.getIntPtrConstant(0, dl));
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::i32, MVT::i32);
Lo = DAG.getBitcast(MVT::v32i1, Lo);
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src,
- DAG.getIntPtrConstant(1, dl));
Hi = DAG.getBitcast(MVT::v32i1, Hi);
return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi);
}
@@ -32556,10 +33850,13 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
if (N->hasAnyUseOfValue(0)) {
// Handle (atomic_load_sub p, v) as (atomic_load_add p, -v), to be able to
// select LXADD if LOCK_SUB can't be selected.
- if (Opc == ISD::ATOMIC_LOAD_SUB) {
+ // Handle (atomic_load_xor p, SignBit) as (atomic_load_add p, SignBit) so we
+ // can use LXADD as opposed to cmpxchg.
+ if (Opc == ISD::ATOMIC_LOAD_SUB ||
+ (Opc == ISD::ATOMIC_LOAD_XOR && isMinSignedConstant(RHS))) {
RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
- return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS,
- RHS, AN->getMemOperand());
+ return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, RHS,
+ AN->getMemOperand());
}
assert(Opc == ISD::ATOMIC_LOAD_ADD &&
"Used AtomicRMW ops other than Add should have been expanded!");
@@ -32571,7 +33868,9 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
// changing, all we need is a lowering for the *ordering* impacts of the
// atomicrmw. As such, we can chose a different operation and memory
// location to minimize impact on other code.
- if (Opc == ISD::ATOMIC_LOAD_OR && isNullConstant(RHS)) {
+ // The above holds unless the node is marked volatile in which
+ // case it needs to be preserved according to the langref.
+ if (Opc == ISD::ATOMIC_LOAD_OR && isNullConstant(RHS) && !AN->isVolatile()) {
// On X86, the only ordering which actually requires an instruction is
// seq_cst which isn't SingleThread, everything just needs to be preserved
// during codegen and then dropped. Note that we expect (but don't assume),
@@ -32682,7 +33981,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG,
return Swap.getValue(1);
}
-static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) {
SDNode *N = Op.getNode();
MVT VT = N->getSimpleValueType(0);
unsigned Opc = Op.getOpcode();
@@ -32700,7 +33999,7 @@ static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32),
Carry, DAG.getAllOnesConstant(DL, CarryVT));
- bool IsAdd = Opc == ISD::ADDCARRY || Opc == ISD::SADDO_CARRY;
+ bool IsAdd = Opc == ISD::UADDO_CARRY || Opc == ISD::SADDO_CARRY;
SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs,
Op.getOperand(0), Op.getOperand(1),
Carry.getValue(1));
@@ -33097,6 +34396,18 @@ static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi);
}
+static SDValue LowerPREFETCH(SDValue Op, const X86Subtarget &Subtarget,
+ SelectionDAG &DAG) {
+ unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+
+ // We don't support non-data prefetch without PREFETCHI.
+ // Just preserve the chain.
+ if (!IsData && !Subtarget.hasPREFETCHI())
+ return Op.getOperand(0);
+
+ return Op;
+}
+
static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
unsigned OpNo) {
const APInt Operand(32, OpNo);
@@ -33132,12 +34443,9 @@ static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
// "call dword ptr "
auto TmpStr = AsmStr.substr(0, I);
I = TmpStr.rfind(':');
- if (I == StringRef::npos)
- return TmpStr;
-
- assert(I < TmpStr.size() && "Unexpected inline asm string!");
- auto Asm = TmpStr.drop_front(I + 1);
- return Asm;
+ if (I != StringRef::npos)
+ TmpStr = TmpStr.substr(I + 1);
+ return TmpStr.take_while(llvm::isAlpha);
}
return StringRef();
@@ -33145,12 +34453,13 @@ static StringRef getInstrStrFromOpNo(const SmallVectorImpl<StringRef> &AsmStrs,
bool X86TargetLowering::isInlineAsmTargetBranch(
const SmallVectorImpl<StringRef> &AsmStrs, unsigned OpNo) const {
- StringRef InstrStr = getInstrStrFromOpNo(AsmStrs, OpNo);
-
- if (InstrStr.contains("call"))
- return true;
-
- return false;
+ // In a __asm block, __asm inst foo where inst is CALL or JMP should be
+ // changed from indirect TargetLowering::C_Memory to direct
+ // TargetLowering::C_Address.
+ // We don't need to special case LOOP* and Jcc, which cannot target a memory
+ // location.
+ StringRef Inst = getInstrStrFromOpNo(AsmStrs, OpNo);
+ return Inst.equals_insensitive("call") || Inst.equals_insensitive("jmp");
}
/// Provide custom lowering hooks for some operations.
@@ -33253,6 +34562,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
+ case ISD::GET_FPENV_MEM: return LowerGET_FPENV_MEM(Op, DAG);
+ case ISD::SET_FPENV_MEM: return LowerSET_FPENV_MEM(Op, DAG);
+ case ISD::RESET_FPENV: return LowerRESET_FPENV(Op, DAG);
case ISD::CTLZ:
case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG);
case ISD::CTTZ:
@@ -33275,8 +34587,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG);
case ISD::SADDO_CARRY:
case ISD::SSUBO_CARRY:
- case ISD::ADDCARRY:
- case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG);
+ case ISD::UADDO_CARRY:
+ case ISD::USUBO_CARRY: return LowerADDSUBO_CARRY(Op, DAG);
case ISD::ADD:
case ISD::SUB: return lowerAddSub(Op, DAG, Subtarget);
case ISD::UADDSAT:
@@ -33287,7 +34599,12 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMIN:
case ISD::UMAX:
case ISD::UMIN: return LowerMINMAX(Op, Subtarget, DAG);
+ case ISD::FMINIMUM:
+ case ISD::FMAXIMUM:
+ return LowerFMINIMUM_FMAXIMUM(Op, Subtarget, DAG);
case ISD::ABS: return LowerABS(Op, Subtarget, DAG);
+ case ISD::ABDS:
+ case ISD::ABDU: return LowerABD(Op, Subtarget, DAG);
case ISD::AVGCEILU: return LowerAVG(Op, Subtarget, DAG);
case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG);
case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG);
@@ -33298,6 +34615,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION(Op, DAG);
case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG);
case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG);
+ case ISD::PREFETCH: return LowerPREFETCH(Op, Subtarget, DAG);
}
}
@@ -33505,15 +34823,15 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT();
SDValue In = N->getOperand(0);
EVT InVT = In.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+ EVT EltVT = VT.getVectorElementType();
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
unsigned InBits = InVT.getSizeInBits();
if (128 % InBits == 0) {
// 128 bit and smaller inputs should avoid truncate all together and
// just use a build_vector that will become a shuffle.
// TODO: Widen and use a shuffle directly?
- MVT InEltVT = InVT.getSimpleVT().getVectorElementType();
- EVT EltVT = VT.getVectorElementType();
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
@@ -33560,6 +34878,18 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
+ // Attempt to widen the truncation input vector to let LowerTRUNCATE handle
+ // this via type legalization.
+ if ((InEltVT == MVT::i16 || InEltVT == MVT::i32 || InEltVT == MVT::i64) &&
+ (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32) &&
+ (!Subtarget.hasSSSE3() || (InVT == MVT::v8i64 && VT == MVT::v8i8) ||
+ (InVT == MVT::v4i64 && VT == MVT::v4i16 && !Subtarget.hasAVX()))) {
+ SDValue WidenIn = widenSubVector(In, false, Subtarget, DAG, dl,
+ InEltVT.getSizeInBits() * WidenNumElts);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, WidenVT, WidenIn));
+ return;
+ }
+
return;
}
case ISD::ANY_EXTEND:
@@ -33999,8 +35329,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src);
- SDValue VBias =
- DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::v2f64);
+ SDValue VBias = DAG.getConstantFP(
+ llvm::bit_cast<double>(0x4330000000000000ULL), dl, MVT::v2f64);
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
DAG.getBitcast(MVT::v2i64, VBias));
Or = DAG.getBitcast(MVT::v2f64, Or);
@@ -34121,21 +35451,16 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
"64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B");
MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32;
SDValue cpInL, cpInH;
- cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
- DAG.getConstant(0, dl, HalfT));
- cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2),
- DAG.getConstant(1, dl, HalfT));
+ std::tie(cpInL, cpInH) =
+ DAG.SplitScalar(N->getOperand(2), dl, HalfT, HalfT);
cpInL = DAG.getCopyToReg(N->getOperand(0), dl,
- Regs64bit ? X86::RAX : X86::EAX,
- cpInL, SDValue());
- cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl,
- Regs64bit ? X86::RDX : X86::EDX,
- cpInH, cpInL.getValue(1));
+ Regs64bit ? X86::RAX : X86::EAX, cpInL, SDValue());
+ cpInH =
+ DAG.getCopyToReg(cpInL.getValue(0), dl, Regs64bit ? X86::RDX : X86::EDX,
+ cpInH, cpInL.getValue(1));
SDValue swapInL, swapInH;
- swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
- DAG.getConstant(0, dl, HalfT));
- swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3),
- DAG.getConstant(1, dl, HalfT));
+ std::tie(swapInL, swapInH) =
+ DAG.SplitScalar(N->getOperand(3), dl, HalfT, HalfT);
swapInH =
DAG.getCopyToReg(cpInH.getValue(0), dl, Regs64bit ? X86::RCX : X86::ECX,
swapInH, cpInH.getValue(1));
@@ -34433,7 +35758,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FSETCCM_SAE)
NODE_NAME_CASE(CMOV)
NODE_NAME_CASE(BRCOND)
- NODE_NAME_CASE(RET_FLAG)
+ NODE_NAME_CASE(RET_GLUE)
NODE_NAME_CASE(IRET)
NODE_NAME_CASE(REP_STOS)
NODE_NAME_CASE(REP_MOVS)
@@ -34482,6 +35807,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(TC_RETURN)
NODE_NAME_CASE(FNSTCW16m)
NODE_NAME_CASE(FLDCW16m)
+ NODE_NAME_CASE(FNSTENVm)
+ NODE_NAME_CASE(FLDENVm)
NODE_NAME_CASE(LCMPXCHG_DAG)
NODE_NAME_CASE(LCMPXCHG8_DAG)
NODE_NAME_CASE(LCMPXCHG16_DAG)
@@ -34804,6 +36131,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(AESDECWIDE256KL)
NODE_NAME_CASE(CMPCCXADD)
NODE_NAME_CASE(TESTUI)
+ NODE_NAME_CASE(FP80_ADD)
+ NODE_NAME_CASE(STRICT_FP80_ADD)
}
return nullptr;
#undef NODE_NAME_CASE
@@ -35099,9 +36428,9 @@ bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
return false;
}
-bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
+bool X86TargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
// i16 instructions are longer (0x66 prefix) and potentially slower.
- return !(VT1 == MVT::i32 && VT2 == MVT::i16);
+ return !(SrcVT == MVT::i32 && DestVT == MVT::i16);
}
bool X86TargetLowering::shouldFoldSelectWithIdentityConstant(unsigned Opcode,
@@ -35199,7 +36528,7 @@ static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr,
/// Utility function to emit xbegin specifying the start of an RTM region.
static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
const TargetInstrInfo *TII) {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
const BasicBlock *BB = MBB->getBasicBlock();
MachineFunction::iterator I = ++MBB->getIterator();
@@ -35249,28 +36578,28 @@ static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB,
// xbegin fallMBB
// # fallthrough to mainMBB
// # abortion to fallMBB
- BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(fallMBB);
+ BuildMI(thisMBB, MIMD, TII->get(X86::XBEGIN_4)).addMBB(fallMBB);
thisMBB->addSuccessor(mainMBB);
thisMBB->addSuccessor(fallMBB);
// mainMBB:
// mainDstReg := -1
- BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), mainDstReg).addImm(-1);
- BuildMI(mainMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
+ BuildMI(mainMBB, MIMD, TII->get(X86::MOV32ri), mainDstReg).addImm(-1);
+ BuildMI(mainMBB, MIMD, TII->get(X86::JMP_1)).addMBB(sinkMBB);
mainMBB->addSuccessor(sinkMBB);
// fallMBB:
// ; pseudo instruction to model hardware's definition from XABORT
// EAX := XABORT_DEF
// fallDstReg := EAX
- BuildMI(fallMBB, DL, TII->get(X86::XABORT_DEF));
- BuildMI(fallMBB, DL, TII->get(TargetOpcode::COPY), fallDstReg)
+ BuildMI(fallMBB, MIMD, TII->get(X86::XABORT_DEF));
+ BuildMI(fallMBB, MIMD, TII->get(TargetOpcode::COPY), fallDstReg)
.addReg(X86::EAX);
fallMBB->addSuccessor(sinkMBB);
// sinkMBB:
// DstReg := phi(mainDstReg/mainBB, fallDstReg/fallBB)
- BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg)
+ BuildMI(*sinkMBB, sinkMBB->begin(), MIMD, TII->get(X86::PHI), DstReg)
.addReg(mainDstReg).addMBB(mainMBB)
.addReg(fallDstReg).addMBB(fallMBB);
@@ -35323,7 +36652,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
const TargetRegisterClass *AddrRegClass =
getRegClassFor(getPointerTy(MBB->getParent()->getDataLayout()));
const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32);
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
// struct va_list {
// i32 gp_offset
@@ -35407,7 +36736,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// Load the offset value into a register
OffsetReg = MRI.createVirtualRegister(OffsetRegClass);
- BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg)
+ BuildMI(thisMBB, MIMD, TII->get(X86::MOV32rm), OffsetReg)
.add(Base)
.add(Scale)
.add(Index)
@@ -35416,13 +36745,13 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
.setMemRefs(LoadOnlyMMO);
// Check if there is enough room left to pull this argument.
- BuildMI(thisMBB, DL, TII->get(X86::CMP32ri))
+ BuildMI(thisMBB, MIMD, TII->get(X86::CMP32ri))
.addReg(OffsetReg)
.addImm(MaxOffset + 8 - ArgSizeA8);
// Branch to "overflowMBB" if offset >= max
// Fall through to "offsetMBB" otherwise
- BuildMI(thisMBB, DL, TII->get(X86::JCC_1))
+ BuildMI(thisMBB, MIMD, TII->get(X86::JCC_1))
.addMBB(overflowMBB).addImm(X86::COND_AE);
}
@@ -35433,7 +36762,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// Read the reg_save_area address.
Register RegSaveReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(
- offsetMBB, DL,
+ offsetMBB, MIMD,
TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm),
RegSaveReg)
.add(Base)
@@ -35446,30 +36775,30 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
if (Subtarget.isTarget64BitLP64()) {
// Zero-extend the offset
Register OffsetReg64 = MRI.createVirtualRegister(AddrRegClass);
- BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
+ BuildMI(offsetMBB, MIMD, TII->get(X86::SUBREG_TO_REG), OffsetReg64)
.addImm(0)
.addReg(OffsetReg)
.addImm(X86::sub_32bit);
// Add the offset to the reg_save_area to get the final address.
- BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg)
+ BuildMI(offsetMBB, MIMD, TII->get(X86::ADD64rr), OffsetDestReg)
.addReg(OffsetReg64)
.addReg(RegSaveReg);
} else {
// Add the offset to the reg_save_area to get the final address.
- BuildMI(offsetMBB, DL, TII->get(X86::ADD32rr), OffsetDestReg)
+ BuildMI(offsetMBB, MIMD, TII->get(X86::ADD32rr), OffsetDestReg)
.addReg(OffsetReg)
.addReg(RegSaveReg);
}
// Compute the offset for the next argument
Register NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass);
- BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg)
+ BuildMI(offsetMBB, MIMD, TII->get(X86::ADD32ri), NextOffsetReg)
.addReg(OffsetReg)
.addImm(UseFPOffset ? 16 : 8);
// Store it back into the va_list.
- BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr))
+ BuildMI(offsetMBB, MIMD, TII->get(X86::MOV32mr))
.add(Base)
.add(Scale)
.add(Index)
@@ -35479,7 +36808,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
.setMemRefs(StoreOnlyMMO);
// Jump to endMBB
- BuildMI(offsetMBB, DL, TII->get(X86::JMP_1))
+ BuildMI(offsetMBB, MIMD, TII->get(X86::JMP_1))
.addMBB(endMBB);
}
@@ -35489,7 +36818,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// Load the overflow_area address into a register.
Register OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass);
- BuildMI(overflowMBB, DL,
+ BuildMI(overflowMBB, MIMD,
TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm),
OverflowAddrReg)
.add(Base)
@@ -35507,20 +36836,20 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// aligned_addr = (addr + (align-1)) & ~(align-1)
BuildMI(
- overflowMBB, DL,
+ overflowMBB, MIMD,
TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri),
TmpReg)
.addReg(OverflowAddrReg)
.addImm(Alignment.value() - 1);
BuildMI(
- overflowMBB, DL,
+ overflowMBB, MIMD,
TII->get(Subtarget.isTarget64BitLP64() ? X86::AND64ri32 : X86::AND32ri),
OverflowDestReg)
.addReg(TmpReg)
.addImm(~(uint64_t)(Alignment.value() - 1));
} else {
- BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg)
+ BuildMI(overflowMBB, MIMD, TII->get(TargetOpcode::COPY), OverflowDestReg)
.addReg(OverflowAddrReg);
}
@@ -35528,14 +36857,14 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// (the overflow address should be kept 8-byte aligned)
Register NextAddrReg = MRI.createVirtualRegister(AddrRegClass);
BuildMI(
- overflowMBB, DL,
+ overflowMBB, MIMD,
TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri),
NextAddrReg)
.addReg(OverflowDestReg)
.addImm(ArgSizeA8);
// Store the new overflow address.
- BuildMI(overflowMBB, DL,
+ BuildMI(overflowMBB, MIMD,
TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64mr : X86::MOV32mr))
.add(Base)
.add(Scale)
@@ -35547,7 +36876,7 @@ X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI,
// If we branched, emit the PHI to the front of endMBB.
if (offsetMBB) {
- BuildMI(*endMBB, endMBB->begin(), DL,
+ BuildMI(*endMBB, endMBB->begin(), MIMD,
TII->get(X86::PHI), DestReg)
.addReg(OffsetDestReg).addMBB(offsetMBB)
.addReg(OverflowDestReg).addMBB(overflowMBB);
@@ -35624,7 +36953,7 @@ static MachineInstrBuilder createPHIsForCMOVsInSinkBB(
MachineBasicBlock *SinkMBB) {
MachineFunction *MF = TrueMBB->getParent();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
- const DebugLoc &DL = MIItBegin->getDebugLoc();
+ const MIMetadata MIMD(*MIItBegin);
X86::CondCode CC = X86::CondCode(MIItBegin->getOperand(3).getImm());
X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC);
@@ -35651,17 +36980,18 @@ static MachineInstrBuilder createPHIsForCMOVsInSinkBB(
if (MIIt->getOperand(3).getImm() == OppCC)
std::swap(Op1Reg, Op2Reg);
- if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end())
+ if (RegRewriteTable.contains(Op1Reg))
Op1Reg = RegRewriteTable[Op1Reg].first;
- if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end())
+ if (RegRewriteTable.contains(Op2Reg))
Op2Reg = RegRewriteTable[Op2Reg].second;
- MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg)
- .addReg(Op1Reg)
- .addMBB(FalseMBB)
- .addReg(Op2Reg)
- .addMBB(TrueMBB);
+ MIB =
+ BuildMI(*SinkMBB, SinkInsertionPoint, MIMD, TII->get(X86::PHI), DestReg)
+ .addReg(Op1Reg)
+ .addMBB(FalseMBB)
+ .addReg(Op2Reg)
+ .addMBB(TrueMBB);
// Add this PHI to the rewrite table.
RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg);
@@ -35676,7 +37006,7 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV,
MachineInstr &SecondCascadedCMOV,
MachineBasicBlock *ThisMBB) const {
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- const DebugLoc &DL = FirstCMOV.getDebugLoc();
+ const MIMetadata MIMD(FirstCMOV);
// We lower cascaded CMOVs such as
//
@@ -35793,11 +37123,13 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV,
// Create the conditional branch instructions.
X86::CondCode FirstCC = X86::CondCode(FirstCMOV.getOperand(3).getImm());
- BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(FirstCC);
+ BuildMI(ThisMBB, MIMD, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(FirstCC);
X86::CondCode SecondCC =
X86::CondCode(SecondCascadedCMOV.getOperand(3).getImm());
- BuildMI(FirstInsertedMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(SecondCC);
+ BuildMI(FirstInsertedMBB, MIMD, TII->get(X86::JCC_1))
+ .addMBB(SinkMBB)
+ .addImm(SecondCC);
// SinkMBB:
// %Result = phi [ %FalseValue, SecondInsertedMBB ], [ %TrueValue, ThisMBB ]
@@ -35805,7 +37137,7 @@ X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV,
Register Op1Reg = FirstCMOV.getOperand(1).getReg();
Register Op2Reg = FirstCMOV.getOperand(2).getReg();
MachineInstrBuilder MIB =
- BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(X86::PHI), DestReg)
+ BuildMI(*SinkMBB, SinkMBB->begin(), MIMD, TII->get(X86::PHI), DestReg)
.addReg(Op1Reg)
.addMBB(SecondInsertedMBB)
.addReg(Op2Reg)
@@ -35826,7 +37158,7 @@ MachineBasicBlock *
X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
MachineBasicBlock *ThisMBB) const {
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -35944,7 +37276,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
FalseMBB->addSuccessor(SinkMBB);
// Create the conditional branch instruction.
- BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
+ BuildMI(ThisMBB, MIMD, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC);
// SinkMBB:
// %Result = phi [ %FalseValue, FalseMBB ], [ %TrueValue, ThisMBB ]
@@ -35960,16 +37292,11 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr &MI,
return SinkMBB;
}
-static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) {
- if (IsLP64) {
- if (isInt<8>(Imm))
- return X86::SUB64ri8;
+static unsigned getSUBriOpcode(bool IsLP64) {
+ if (IsLP64)
return X86::SUB64ri32;
- } else {
- if (isInt<8>(Imm))
- return X86::SUB32ri8;
+ else
return X86::SUB32ri;
- }
}
MachineBasicBlock *
@@ -35978,7 +37305,7 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
const X86FrameLowering &TFI = *Subtarget.getFrameLowering();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
const unsigned ProbeSize = getStackProbeSize(*MF);
@@ -36002,23 +37329,23 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
Register FinalStackPtr = MRI.createVirtualRegister(
TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass);
- BuildMI(*MBB, {MI}, DL, TII->get(TargetOpcode::COPY), TmpStackPtr)
+ BuildMI(*MBB, {MI}, MIMD, TII->get(TargetOpcode::COPY), TmpStackPtr)
.addReg(physSPReg);
{
const unsigned Opc = TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr;
- BuildMI(*MBB, {MI}, DL, TII->get(Opc), FinalStackPtr)
+ BuildMI(*MBB, {MI}, MIMD, TII->get(Opc), FinalStackPtr)
.addReg(TmpStackPtr)
.addReg(sizeVReg);
}
// test rsp size
- BuildMI(testMBB, DL,
+ BuildMI(testMBB, MIMD,
TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr))
.addReg(FinalStackPtr)
.addReg(physSPReg);
- BuildMI(testMBB, DL, TII->get(X86::JCC_1))
+ BuildMI(testMBB, MIMD, TII->get(X86::JCC_1))
.addMBB(tailMBB)
.addImm(X86::COND_GE);
testMBB->addSuccessor(blockMBB);
@@ -36037,21 +37364,21 @@ X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI,
// The property we want to enforce is to never have more than [page alloc] between two probes.
const unsigned XORMIOpc =
- TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8;
- addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0)
+ TFI.Uses64BitFramePtr ? X86::XOR64mi32 : X86::XOR32mi;
+ addRegOffset(BuildMI(blockMBB, MIMD, TII->get(XORMIOpc)), physSPReg, false, 0)
.addImm(0);
- BuildMI(blockMBB, DL,
- TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr, ProbeSize)), physSPReg)
+ BuildMI(blockMBB, MIMD, TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr)),
+ physSPReg)
.addReg(physSPReg)
.addImm(ProbeSize);
-
- BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB);
+ BuildMI(blockMBB, MIMD, TII->get(X86::JMP_1)).addMBB(testMBB);
blockMBB->addSuccessor(testMBB);
// Replace original instruction by the expected stack ptr
- BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg())
+ BuildMI(tailMBB, MIMD, TII->get(TargetOpcode::COPY),
+ MI.getOperand(0).getReg())
.addReg(FinalStackPtr);
tailMBB->splice(tailMBB->end(), MBB,
@@ -36071,7 +37398,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
const BasicBlock *LLVM_BB = BB->getBasicBlock();
assert(MF->shouldSplitStack());
@@ -36126,58 +37453,58 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
// Add code to the main basic block to check if the stack limit has been hit,
// and if so, jump to mallocMBB otherwise to bumpMBB.
- BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
- BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
+ BuildMI(BB, MIMD, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
+ BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
.addReg(tmpSPVReg).addReg(sizeVReg);
- BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
+ BuildMI(BB, MIMD, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
.addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
.addReg(SPLimitVReg);
- BuildMI(BB, DL, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G);
+ BuildMI(BB, MIMD, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G);
// bumpMBB simply decreases the stack pointer, since we know the current
// stacklet has enough space.
- BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg)
+ BuildMI(bumpMBB, MIMD, TII->get(TargetOpcode::COPY), physSPReg)
.addReg(SPLimitVReg);
- BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
+ BuildMI(bumpMBB, MIMD, TII->get(TargetOpcode::COPY), bumpSPPtrVReg)
.addReg(SPLimitVReg);
- BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);
+ BuildMI(bumpMBB, MIMD, TII->get(X86::JMP_1)).addMBB(continueMBB);
// Calls into a routine in libgcc to allocate more space from the heap.
const uint32_t *RegMask =
Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C);
if (IsLP64) {
- BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
+ BuildMI(mallocMBB, MIMD, TII->get(X86::MOV64rr), X86::RDI)
.addReg(sizeVReg);
- BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
+ BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::RDI, RegState::Implicit)
.addReg(X86::RAX, RegState::ImplicitDefine);
} else if (Is64Bit) {
- BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI)
+ BuildMI(mallocMBB, MIMD, TII->get(X86::MOV32rr), X86::EDI)
.addReg(sizeVReg);
- BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
+ BuildMI(mallocMBB, MIMD, TII->get(X86::CALL64pcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::EDI, RegState::Implicit)
.addReg(X86::EAX, RegState::ImplicitDefine);
} else {
- BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
+ BuildMI(mallocMBB, MIMD, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
.addImm(12);
- BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg);
- BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32))
+ BuildMI(mallocMBB, MIMD, TII->get(X86::PUSH32r)).addReg(sizeVReg);
+ BuildMI(mallocMBB, MIMD, TII->get(X86::CALLpcrel32))
.addExternalSymbol("__morestack_allocate_stack_space")
.addRegMask(RegMask)
.addReg(X86::EAX, RegState::ImplicitDefine);
}
if (!Is64Bit)
- BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
+ BuildMI(mallocMBB, MIMD, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg)
.addImm(16);
- BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
+ BuildMI(mallocMBB, MIMD, TII->get(TargetOpcode::COPY), mallocPtrVReg)
.addReg(IsLP64 ? X86::RAX : X86::EAX);
- BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB);
+ BuildMI(mallocMBB, MIMD, TII->get(X86::JMP_1)).addMBB(continueMBB);
// Set up the CFG correctly.
BB->addSuccessor(bumpMBB);
@@ -36186,7 +37513,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI,
bumpMBB->addSuccessor(continueMBB);
// Take care of the PHI nodes.
- BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI),
+ BuildMI(*continueMBB, continueMBB->begin(), MIMD, TII->get(X86::PHI),
MI.getOperand(0).getReg())
.addReg(mallocPtrVReg)
.addMBB(mallocMBB)
@@ -36206,7 +37533,7 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
MachineFunction *MF = BB->getParent();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
assert(!isAsynchronousEHPersonality(
classifyEHPersonality(MF->getFunction().getPersonalityFn())) &&
@@ -36231,7 +37558,7 @@ X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI,
RestoreMBB->setIsEHPad(true);
auto RestoreMBBI = RestoreMBB->begin();
- BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB);
+ BuildMI(*RestoreMBB, RestoreMBBI, MIMD, TII.get(X86::JMP_4)).addMBB(TargetMBB);
return BB;
}
@@ -36244,13 +37571,13 @@ X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
// inside MC, therefore without the two markers shrink-wrapping
// may push the prologue/epilogue pass them.
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction &MF = *BB->getParent();
// Emit CALLSEQ_START right before the instruction.
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
MachineInstrBuilder CallseqStart =
- BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
+ BuildMI(MF, MIMD, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0);
BB->insert(MachineBasicBlock::iterator(MI), CallseqStart);
// Emit CALLSEQ_END right after the instruction.
@@ -36258,7 +37585,7 @@ X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI,
// original instruction around.
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
MachineInstrBuilder CallseqEnd =
- BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0);
+ BuildMI(MF, MIMD, TII.get(AdjStackUp)).addImm(0).addImm(0);
BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd);
return BB;
@@ -36273,7 +37600,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
// be in the normal return register.
MachineFunction *F = BB->getParent();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?");
assert(MI.getOperand(3).isGlobal() && "This should be a global");
@@ -36287,38 +37614,38 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI,
Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C);
if (Subtarget.is64Bit()) {
MachineInstrBuilder MIB =
- BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI)
+ BuildMI(*BB, MI, MIMD, TII->get(X86::MOV64rm), X86::RDI)
.addReg(X86::RIP)
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
+ MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL64m));
addDirectMem(MIB, X86::RDI);
MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else if (!isPositionIndependent()) {
MachineInstrBuilder MIB =
- BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX)
+ BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX)
.addReg(0)
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
} else {
MachineInstrBuilder MIB =
- BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX)
+ BuildMI(*BB, MI, MIMD, TII->get(X86::MOV32rm), X86::EAX)
.addReg(TII->getGlobalBaseReg(F))
.addImm(0)
.addReg(0)
.addGlobalAddress(MI.getOperand(3).getGlobal(), 0,
MI.getOperand(3).getTargetFlags())
.addReg(0);
- MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ MIB = BuildMI(*BB, MI, MIMD, TII->get(X86::CALL32m));
addDirectMem(MIB, X86::EAX);
MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask);
}
@@ -36412,7 +37739,7 @@ X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
MachineBasicBlock *BB) const {
// Copy the virtual register into the R11 physical register and
// call the retpoline thunk.
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
const X86InstrInfo *TII = Subtarget.getInstrInfo();
Register CalleeVReg = MI.getOperand(0).getReg();
unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode());
@@ -36451,7 +37778,7 @@ X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg);
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), AvailableReg)
.addReg(CalleeVReg);
MI.getOperand(0).ChangeToES(Symbol);
MI.setDesc(TII->get(Opc));
@@ -36474,7 +37801,7 @@ X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI,
/// \param [in] MBB The Machine Basic Block that will be modified.
void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -36489,7 +37816,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
Register ZReg = MRI.createVirtualRegister(PtrRC);
unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr;
- BuildMI(*MBB, MI, DL, TII->get(XorRROpc))
+ BuildMI(*MBB, MI, MIMD, TII->get(XorRROpc))
.addDef(ZReg)
.addReg(ZReg, RegState::Undef)
.addReg(ZReg, RegState::Undef);
@@ -36497,11 +37824,11 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
// Read the current SSP Register value to the zeroed register.
Register SSPCopyReg = MRI.createVirtualRegister(PtrRC);
unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
- BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
+ BuildMI(*MBB, MI, MIMD, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
// Write the SSP register value to offset 3 in input memory buffer.
unsigned PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
- MIB = BuildMI(*MBB, MI, DL, TII->get(PtrStoreOpc));
+ MIB = BuildMI(*MBB, MI, MIMD, TII->get(PtrStoreOpc));
const int64_t SSPOffset = 3 * PVT.getStoreSize();
const unsigned MemOpndSlot = 1;
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
@@ -36517,7 +37844,7 @@ void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *
X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
@@ -36593,7 +37920,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
LabelReg = MRI.createVirtualRegister(PtrRC);
if (Subtarget.is64Bit()) {
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg)
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA64r), LabelReg)
.addReg(X86::RIP)
.addImm(0)
.addReg(0)
@@ -36601,7 +37928,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
.addReg(0);
} else {
const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII);
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg)
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::LEA32r), LabelReg)
.addReg(XII->getGlobalBaseReg(MF))
.addImm(0)
.addReg(0)
@@ -36611,7 +37938,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
} else
PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi;
// Store IP
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc));
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrStoreOpc));
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset);
@@ -36629,7 +37956,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
}
// Setup
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup))
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(X86::EH_SjLj_Setup))
.addMBB(restoreMBB);
const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -36639,14 +37966,15 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
// mainMBB:
// EAX = 0
- BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg);
+ BuildMI(mainMBB, MIMD, TII->get(X86::MOV32r0), mainDstReg);
mainMBB->addSuccessor(sinkMBB);
// sinkMBB:
- BuildMI(*sinkMBB, sinkMBB->begin(), DL,
- TII->get(X86::PHI), DstReg)
- .addReg(mainDstReg).addMBB(mainMBB)
- .addReg(restoreDstReg).addMBB(restoreMBB);
+ BuildMI(*sinkMBB, sinkMBB->begin(), MIMD, TII->get(X86::PHI), DstReg)
+ .addReg(mainDstReg)
+ .addMBB(mainMBB)
+ .addReg(restoreDstReg)
+ .addMBB(restoreMBB);
// restoreMBB:
if (RegInfo->hasBasePointer(*MF)) {
@@ -36657,12 +37985,12 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
Register FramePtr = RegInfo->getFrameRegister(*MF);
Register BasePtr = RegInfo->getBaseRegister();
unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm;
- addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr),
+ addRegOffset(BuildMI(restoreMBB, MIMD, TII->get(Opm), BasePtr),
FramePtr, true, X86FI->getRestoreBasePointerOffset())
.setMIFlag(MachineInstr::FrameSetup);
}
- BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
- BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB);
+ BuildMI(restoreMBB, MIMD, TII->get(X86::MOV32ri), restoreDstReg).addImm(1);
+ BuildMI(restoreMBB, MIMD, TII->get(X86::JMP_1)).addMBB(sinkMBB);
restoreMBB->addSuccessor(sinkMBB);
MI.eraseFromParent();
@@ -36677,7 +38005,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
MachineBasicBlock *
X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -36738,11 +38066,11 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
// Initialize a register with zero.
Register ZReg = MRI.createVirtualRegister(&X86::GR32RegClass);
- BuildMI(checkSspMBB, DL, TII->get(X86::MOV32r0), ZReg);
+ BuildMI(checkSspMBB, MIMD, TII->get(X86::MOV32r0), ZReg);
if (PVT == MVT::i64) {
Register TmpZReg = MRI.createVirtualRegister(PtrRC);
- BuildMI(checkSspMBB, DL, TII->get(X86::SUBREG_TO_REG), TmpZReg)
+ BuildMI(checkSspMBB, MIMD, TII->get(X86::SUBREG_TO_REG), TmpZReg)
.addImm(0)
.addReg(ZReg)
.addImm(X86::sub_32bit);
@@ -36752,15 +38080,17 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
// Read the current SSP Register value to the zeroed register.
Register SSPCopyReg = MRI.createVirtualRegister(PtrRC);
unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD;
- BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
+ BuildMI(checkSspMBB, MIMD, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg);
// Check whether the result of the SSP register is zero and jump directly
// to the sink.
unsigned TestRROpc = (PVT == MVT::i64) ? X86::TEST64rr : X86::TEST32rr;
- BuildMI(checkSspMBB, DL, TII->get(TestRROpc))
+ BuildMI(checkSspMBB, MIMD, TII->get(TestRROpc))
.addReg(SSPCopyReg)
.addReg(SSPCopyReg);
- BuildMI(checkSspMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E);
+ BuildMI(checkSspMBB, MIMD, TII->get(X86::JCC_1))
+ .addMBB(sinkMBB)
+ .addImm(X86::COND_E);
checkSspMBB->addSuccessor(sinkMBB);
checkSspMBB->addSuccessor(fallMBB);
@@ -36769,7 +38099,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm;
const int64_t SPPOffset = 3 * PVT.getStoreSize();
MachineInstrBuilder MIB =
- BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg);
+ BuildMI(fallMBB, MIMD, TII->get(PtrLoadOpc), PrevSSPReg);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (i == X86::AddrDisp)
@@ -36785,12 +38115,14 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
// Subtract the current SSP from the previous SSP.
Register SspSubReg = MRI.createVirtualRegister(PtrRC);
unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr;
- BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg)
+ BuildMI(fallMBB, MIMD, TII->get(SubRROpc), SspSubReg)
.addReg(PrevSSPReg)
.addReg(SSPCopyReg);
// Jump to sink in case PrevSSPReg <= SSPCopyReg.
- BuildMI(fallMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_BE);
+ BuildMI(fallMBB, MIMD, TII->get(X86::JCC_1))
+ .addMBB(sinkMBB)
+ .addImm(X86::COND_BE);
fallMBB->addSuccessor(sinkMBB);
fallMBB->addSuccessor(fixShadowMBB);
@@ -36798,35 +38130,38 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri;
unsigned Offset = (PVT == MVT::i64) ? 3 : 2;
Register SspFirstShrReg = MRI.createVirtualRegister(PtrRC);
- BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg)
+ BuildMI(fixShadowMBB, MIMD, TII->get(ShrRIOpc), SspFirstShrReg)
.addReg(SspSubReg)
.addImm(Offset);
// Increase SSP when looking only on the lower 8 bits of the delta.
unsigned IncsspOpc = (PVT == MVT::i64) ? X86::INCSSPQ : X86::INCSSPD;
- BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg);
+ BuildMI(fixShadowMBB, MIMD, TII->get(IncsspOpc)).addReg(SspFirstShrReg);
// Reset the lower 8 bits.
Register SspSecondShrReg = MRI.createVirtualRegister(PtrRC);
- BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg)
+ BuildMI(fixShadowMBB, MIMD, TII->get(ShrRIOpc), SspSecondShrReg)
.addReg(SspFirstShrReg)
.addImm(8);
// Jump if the result of the shift is zero.
- BuildMI(fixShadowMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E);
+ BuildMI(fixShadowMBB, MIMD, TII->get(X86::JCC_1))
+ .addMBB(sinkMBB)
+ .addImm(X86::COND_E);
fixShadowMBB->addSuccessor(sinkMBB);
fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB);
// Do a single shift left.
- unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1;
+ unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64ri : X86::SHL32ri;
Register SspAfterShlReg = MRI.createVirtualRegister(PtrRC);
- BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg)
- .addReg(SspSecondShrReg);
+ BuildMI(fixShadowLoopPrepareMBB, MIMD, TII->get(ShlR1Opc), SspAfterShlReg)
+ .addReg(SspSecondShrReg)
+ .addImm(1);
// Save the value 128 to a register (will be used next with incssp).
Register Value128InReg = MRI.createVirtualRegister(PtrRC);
unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri;
- BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg)
+ BuildMI(fixShadowLoopPrepareMBB, MIMD, TII->get(MovRIOpc), Value128InReg)
.addImm(128);
fixShadowLoopPrepareMBB->addSuccessor(fixShadowLoopMBB);
@@ -36834,21 +38169,23 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
// iterations of incssp until we finish fixing the shadow stack.
Register DecReg = MRI.createVirtualRegister(PtrRC);
Register CounterReg = MRI.createVirtualRegister(PtrRC);
- BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg)
+ BuildMI(fixShadowLoopMBB, MIMD, TII->get(X86::PHI), CounterReg)
.addReg(SspAfterShlReg)
.addMBB(fixShadowLoopPrepareMBB)
.addReg(DecReg)
.addMBB(fixShadowLoopMBB);
// Every iteration we increase the SSP by 128.
- BuildMI(fixShadowLoopMBB, DL, TII->get(IncsspOpc)).addReg(Value128InReg);
+ BuildMI(fixShadowLoopMBB, MIMD, TII->get(IncsspOpc)).addReg(Value128InReg);
// Every iteration we decrement the counter by 1.
unsigned DecROpc = (PVT == MVT::i64) ? X86::DEC64r : X86::DEC32r;
- BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg);
+ BuildMI(fixShadowLoopMBB, MIMD, TII->get(DecROpc), DecReg).addReg(CounterReg);
// Jump if the counter is not zero yet.
- BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JCC_1)).addMBB(fixShadowLoopMBB).addImm(X86::COND_NE);
+ BuildMI(fixShadowLoopMBB, MIMD, TII->get(X86::JCC_1))
+ .addMBB(fixShadowLoopMBB)
+ .addImm(X86::COND_NE);
fixShadowLoopMBB->addSuccessor(sinkMBB);
fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB);
@@ -36858,7 +38195,7 @@ X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI,
MachineBasicBlock *
X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MachineBasicBlock *MBB) const {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction *MF = MBB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -36895,7 +38232,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
}
// Reload FP
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), FP);
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrLoadOpc), FP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (MO.isReg()) // Don't add the whole operand, we don't want to
@@ -36907,7 +38244,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MIB.setMemRefs(MMOs);
// Reload IP
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), Tmp);
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrLoadOpc), Tmp);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (i == X86::AddrDisp)
@@ -36921,7 +38258,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MIB.setMemRefs(MMOs);
// Reload SP
- MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), SP);
+ MIB = BuildMI(*thisMBB, MI, MIMD, TII->get(PtrLoadOpc), SP);
for (unsigned i = 0; i < X86::AddrNumOperands; ++i) {
if (i == X86::AddrDisp)
MIB.addDisp(MI.getOperand(i), SPOffset);
@@ -36932,7 +38269,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
MIB.setMemRefs(MMOs);
// Jump
- BuildMI(*thisMBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp);
+ BuildMI(*thisMBB, MI, MIMD, TII->get(IJmpOpc)).addReg(Tmp);
MI.eraseFromParent();
return thisMBB;
@@ -36942,7 +38279,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
MachineBasicBlock *MBB,
MachineBasicBlock *DispatchBB,
int FI) const {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction *MF = MBB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
@@ -36965,14 +38302,14 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
Op = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr;
if (Subtarget.is64Bit())
- BuildMI(*MBB, MI, DL, TII->get(X86::LEA64r), VR)
+ BuildMI(*MBB, MI, MIMD, TII->get(X86::LEA64r), VR)
.addReg(X86::RIP)
.addImm(1)
.addReg(0)
.addMBB(DispatchBB)
.addReg(0);
else
- BuildMI(*MBB, MI, DL, TII->get(X86::LEA32r), VR)
+ BuildMI(*MBB, MI, MIMD, TII->get(X86::LEA32r), VR)
.addReg(0) /* TII->getGlobalBaseReg(MF) */
.addImm(1)
.addReg(0)
@@ -36980,7 +38317,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
.addReg(0);
}
- MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(Op));
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, MIMD, TII->get(Op));
addFrameReference(MIB, FI, Subtarget.is64Bit() ? 56 : 36);
if (UseImmLabel)
MIB.addMBB(DispatchBB);
@@ -36991,7 +38328,7 @@ void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
MachineBasicBlock *
X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
MachineBasicBlock *BB) const {
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
MachineFunction *MF = BB->getParent();
MachineRegisterInfo *MRI = &MF->getRegInfo();
const X86InstrInfo *TII = Subtarget.getInstrInfo();
@@ -37046,7 +38383,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
DispatchBB->setIsEHPad(true);
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
- BuildMI(TrapBB, DL, TII->get(X86::TRAP));
+ BuildMI(TrapBB, MIMD, TII->get(X86::TRAP));
DispatchBB->addSuccessor(TrapBB);
MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
@@ -37078,36 +38415,38 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
Register FP = RI.getFrameRegister(*MF);
Register BP = RI.getBaseRegister();
unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm;
- addRegOffset(BuildMI(DispatchBB, DL, TII->get(Op), BP), FP, true,
+ addRegOffset(BuildMI(DispatchBB, MIMD, TII->get(Op), BP), FP, true,
MFI->getRestoreBasePointerOffset())
.addRegMask(RI.getNoPreservedMask());
} else {
- BuildMI(DispatchBB, DL, TII->get(X86::NOOP))
+ BuildMI(DispatchBB, MIMD, TII->get(X86::NOOP))
.addRegMask(RI.getNoPreservedMask());
}
// IReg is used as an index in a memory operand and therefore can't be SP
Register IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass);
- addFrameReference(BuildMI(DispatchBB, DL, TII->get(X86::MOV32rm), IReg), FI,
+ addFrameReference(BuildMI(DispatchBB, MIMD, TII->get(X86::MOV32rm), IReg), FI,
Subtarget.is64Bit() ? 8 : 4);
- BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri))
+ BuildMI(DispatchBB, MIMD, TII->get(X86::CMP32ri))
.addReg(IReg)
.addImm(LPadList.size());
- BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)).addMBB(TrapBB).addImm(X86::COND_AE);
+ BuildMI(DispatchBB, MIMD, TII->get(X86::JCC_1))
+ .addMBB(TrapBB)
+ .addImm(X86::COND_AE);
if (Subtarget.is64Bit()) {
Register BReg = MRI->createVirtualRegister(&X86::GR64RegClass);
Register IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
// leaq .LJTI0_0(%rip), BReg
- BuildMI(DispContBB, DL, TII->get(X86::LEA64r), BReg)
+ BuildMI(DispContBB, MIMD, TII->get(X86::LEA64r), BReg)
.addReg(X86::RIP)
.addImm(1)
.addReg(0)
.addJumpTableIndex(MJTI)
.addReg(0);
// movzx IReg64, IReg
- BuildMI(DispContBB, DL, TII->get(TargetOpcode::SUBREG_TO_REG), IReg64)
+ BuildMI(DispContBB, MIMD, TII->get(TargetOpcode::SUBREG_TO_REG), IReg64)
.addImm(0)
.addReg(IReg)
.addImm(X86::sub_32bit);
@@ -37115,7 +38454,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
switch (JTE) {
case MachineJumpTableInfo::EK_BlockAddress:
// jmpq *(BReg,IReg64,8)
- BuildMI(DispContBB, DL, TII->get(X86::JMP64m))
+ BuildMI(DispContBB, MIMD, TII->get(X86::JMP64m))
.addReg(BReg)
.addImm(8)
.addReg(IReg64)
@@ -37128,20 +38467,21 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
Register TReg = MRI->createVirtualRegister(&X86::GR64RegClass);
// movl (BReg,IReg64,4), OReg
- BuildMI(DispContBB, DL, TII->get(X86::MOV32rm), OReg)
+ BuildMI(DispContBB, MIMD, TII->get(X86::MOV32rm), OReg)
.addReg(BReg)
.addImm(4)
.addReg(IReg64)
.addImm(0)
.addReg(0);
// movsx OReg64, OReg
- BuildMI(DispContBB, DL, TII->get(X86::MOVSX64rr32), OReg64).addReg(OReg);
+ BuildMI(DispContBB, MIMD, TII->get(X86::MOVSX64rr32), OReg64)
+ .addReg(OReg);
// addq BReg, OReg64, TReg
- BuildMI(DispContBB, DL, TII->get(X86::ADD64rr), TReg)
+ BuildMI(DispContBB, MIMD, TII->get(X86::ADD64rr), TReg)
.addReg(OReg64)
.addReg(BReg);
// jmpq *TReg
- BuildMI(DispContBB, DL, TII->get(X86::JMP64r)).addReg(TReg);
+ BuildMI(DispContBB, MIMD, TII->get(X86::JMP64r)).addReg(TReg);
break;
}
default:
@@ -37149,7 +38489,7 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
}
} else {
// jmpl *.LJTI0_0(,IReg,4)
- BuildMI(DispContBB, DL, TII->get(X86::JMP32m))
+ BuildMI(DispContBB, MIMD, TII->get(X86::JMP32m))
.addReg(0)
.addImm(4)
.addReg(IReg)
@@ -37221,7 +38561,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
- const DebugLoc &DL = MI.getDebugLoc();
+ const MIMetadata MIMD(MI);
auto TMMImmToTMMReg = [](unsigned Imm) {
assert (Imm < 8 && "Illegal tmm index");
@@ -37279,38 +38619,66 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::CMOV_VK64:
return EmitLoweredSelect(MI, BB);
- case X86::RDFLAGS32:
- case X86::RDFLAGS64: {
- unsigned PushF =
- MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
- unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r;
- MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF));
- // Permit reads of the EFLAGS and DF registers without them being defined.
- // This intrinsic exists to read external processor state in flags, such as
- // the trap flag, interrupt flag, and direction flag, none of which are
- // modeled by the backend.
- assert(Push->getOperand(2).getReg() == X86::EFLAGS &&
- "Unexpected register in operand!");
- Push->getOperand(2).setIsUndef();
- assert(Push->getOperand(3).getReg() == X86::DF &&
- "Unexpected register in operand!");
- Push->getOperand(3).setIsUndef();
- BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg());
+ case X86::FP80_ADDr:
+ case X86::FP80_ADDm32: {
+ // Change the floating point control register to use double extended
+ // precision when performing the addition.
+ int OrigCWFrameIdx =
+ MF->getFrameInfo().CreateStackObject(2, Align(2), false);
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FNSTCW16m)),
+ OrigCWFrameIdx);
- MI.eraseFromParent(); // The pseudo is gone now.
- return BB;
- }
+ // Load the old value of the control word...
+ Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOVZX32rm16), OldCW),
+ OrigCWFrameIdx);
- case X86::WRFLAGS32:
- case X86::WRFLAGS64: {
- unsigned Push =
- MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r;
- unsigned PopF =
- MI.getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64;
- BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI.getOperand(0).getReg());
- BuildMI(*BB, MI, DL, TII->get(PopF));
+ // OR 0b11 into bit 8 and 9. 0b11 is the encoding for double extended
+ // precision.
+ Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
+ BuildMI(*BB, MI, MIMD, TII->get(X86::OR32ri), NewCW)
+ .addReg(OldCW, RegState::Kill)
+ .addImm(0x300);
- MI.eraseFromParent(); // The pseudo is gone now.
+ // Extract to 16 bits.
+ Register NewCW16 =
+ MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), NewCW16)
+ .addReg(NewCW, RegState::Kill, X86::sub_16bit);
+
+ // Prepare memory for FLDCW.
+ int NewCWFrameIdx =
+ MF->getFrameInfo().CreateStackObject(2, Align(2), false);
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOV16mr)),
+ NewCWFrameIdx)
+ .addReg(NewCW16, RegState::Kill);
+
+ // Reload the modified control word now...
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)),
+ NewCWFrameIdx);
+
+ // Do the addition.
+ if (MI.getOpcode() == X86::FP80_ADDr) {
+ BuildMI(*BB, MI, MIMD, TII->get(X86::ADD_Fp80))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2));
+ } else {
+ BuildMI(*BB, MI, MIMD, TII->get(X86::ADD_Fp80m32))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4))
+ .add(MI.getOperand(5))
+ .add(MI.getOperand(6));
+ }
+
+ // Reload the original control word now.
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)),
+ OrigCWFrameIdx);
+
+ MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -37327,34 +38695,34 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// mode when truncating to an integer value.
int OrigCWFrameIdx =
MF->getFrameInfo().CreateStackObject(2, Align(2), false);
- addFrameReference(BuildMI(*BB, MI, DL,
- TII->get(X86::FNSTCW16m)), OrigCWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FNSTCW16m)),
+ OrigCWFrameIdx);
// Load the old value of the control word...
Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW),
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOVZX32rm16), OldCW),
OrigCWFrameIdx);
// OR 0b11 into bit 10 and 11. 0b11 is the encoding for round toward zero.
Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass);
- BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW)
+ BuildMI(*BB, MI, MIMD, TII->get(X86::OR32ri), NewCW)
.addReg(OldCW, RegState::Kill).addImm(0xC00);
// Extract to 16 bits.
Register NewCW16 =
MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), NewCW16)
.addReg(NewCW, RegState::Kill, X86::sub_16bit);
// Prepare memory for FLDCW.
int NewCWFrameIdx =
MF->getFrameInfo().CreateStackObject(2, Align(2), false);
- addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)),
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::MOV16mr)),
NewCWFrameIdx)
.addReg(NewCW16, RegState::Kill);
// Reload the modified control word now...
- addFrameReference(BuildMI(*BB, MI, DL,
+ addFrameReference(BuildMI(*BB, MI, MIMD,
TII->get(X86::FLDCW16m)), NewCWFrameIdx);
// Get the X86 opcode to use.
@@ -37373,12 +38741,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
X86AddressMode AM = getAddressFromInstr(&MI, 0);
- addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
+ addFullAddress(BuildMI(*BB, MI, MIMD, TII->get(Opc)), AM)
.addReg(MI.getOperand(X86::AddrNumOperands).getReg());
// Reload the original control word now.
- addFrameReference(BuildMI(*BB, MI, DL,
- TII->get(X86::FLDCW16m)), OrigCWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, MIMD, TII->get(X86::FLDCW16m)),
+ OrigCWFrameIdx);
MI.eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -37463,7 +38831,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
MachineBasicBlock::iterator MBBI(RMBBI);
addFullAddress(
- BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM);
+ BuildMI(*BB, *MBBI, MIMD, TII->get(X86::LEA32r), computedAddrVReg), AM);
setDirectAddressInInstr(&MI, 0, computedAddrVReg);
@@ -37479,21 +38847,21 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// Save RBX into a virtual register.
Register SaveRBX =
MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), SaveRBX)
.addReg(X86::RBX);
Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
MachineInstrBuilder MIB =
- BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst);
+ BuildMI(*BB, MI, MIMD, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst);
for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx)
MIB.add(MI.getOperand(Idx));
MIB.add(MI.getOperand(X86::AddrNumOperands));
MIB.addReg(SaveRBX);
} else {
// Simple case, just copy the virtual register to RBX.
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::RBX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::RBX)
.add(MI.getOperand(X86::AddrNumOperands));
MachineInstrBuilder MIB =
- BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B));
+ BuildMI(*BB, MI, MIMD, TII->get(X86::LCMPXCHG16B));
for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx)
MIB.add(MI.getOperand(Idx));
}
@@ -37507,32 +38875,32 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
// If no need to save the base pointer, we generate MWAITXrrr,
// else we generate pseudo MWAITX_SAVE_RBX.
if (!IsRBX || !TRI->hasBasePointer(*MF)) {
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::ECX)
.addReg(MI.getOperand(0).getReg());
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::EAX)
.addReg(MI.getOperand(1).getReg());
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EBX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::EBX)
.addReg(MI.getOperand(2).getReg());
- BuildMI(*BB, MI, DL, TII->get(X86::MWAITXrrr));
+ BuildMI(*BB, MI, MIMD, TII->get(X86::MWAITXrrr));
MI.eraseFromParent();
} else {
if (!BB->isLiveIn(BasePtr)) {
BB->addLiveIn(BasePtr);
}
// Parameters can be copied into ECX and EAX but not EBX yet.
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::ECX)
.addReg(MI.getOperand(0).getReg());
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), X86::EAX)
.addReg(MI.getOperand(1).getReg());
assert(Subtarget.is64Bit() && "Expected 64-bit mode!");
// Save RBX into a virtual register.
Register SaveRBX =
MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX)
+ BuildMI(*BB, MI, MIMD, TII->get(TargetOpcode::COPY), SaveRBX)
.addReg(X86::RBX);
// Generate mwaitx pseudo.
Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
- BuildMI(*BB, MI, DL, TII->get(X86::MWAITX_SAVE_RBX))
+ BuildMI(*BB, MI, MIMD, TII->get(X86::MWAITX_SAVE_RBX))
.addDef(Dst) // Destination tied in with SaveRBX.
.addReg(MI.getOperand(2).getReg()) // input value of EBX.
.addUse(SaveRBX); // Save of base pointer.
@@ -37549,7 +38917,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
assert(StackAdjustment != 0 && "0 stack adjustment");
LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment "
<< StackAdjustment << "\n");
- BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP)
+ BuildMI(*BB, MI, MIMD, TII->get(X86::SUB32ri), X86::ESP)
.addReg(X86::ESP)
.addImm(StackAdjustment);
MI.eraseFromParent();
@@ -37564,9 +38932,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
<< ", arg offset " << ArgOffset << "\n");
// stack pointer + offset
- addRegOffset(
- BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()),
- X86::ESP, false, ArgOffset);
+ addRegOffset(BuildMI(*BB, MI, MIMD, TII->get(X86::LEA32r),
+ MI.getOperand(0).getReg()),
+ X86::ESP, false, ArgOffset);
MI.eraseFromParent();
return BB;
}
@@ -37587,7 +38955,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTDPFP16PS: Opc = X86::TDPFP16PS; break;
}
- MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Define);
MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Undef);
MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef);
@@ -37598,7 +38966,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
}
case X86::PTILEZERO: {
unsigned Imm = MI.getOperand(0).getImm();
- BuildMI(*BB, MI, DL, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
+ BuildMI(*BB, MI, MIMD, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm));
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
@@ -37613,7 +38981,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case X86::PTILESTORED: Opc = X86::TILESTORED; break;
}
- MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc));
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
unsigned CurOp = 0;
if (Opc != X86::TILESTORED)
MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()),
@@ -37632,6 +39000,23 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent(); // The pseudo is gone now.
return BB;
}
+ case X86::PTCMMIMFP16PS:
+ case X86::PTCMMRLFP16PS: {
+ const MIMetadata MIMD(MI);
+ unsigned Opc;
+ switch (MI.getOpcode()) {
+ default: llvm_unreachable("Unexpected instruction!");
+ case X86::PTCMMIMFP16PS: Opc = X86::TCMMIMFP16PS; break;
+ case X86::PTCMMRLFP16PS: Opc = X86::TCMMRLFP16PS; break;
+ }
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, MIMD, TII->get(Opc));
+ MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Define);
+ MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Undef);
+ MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef);
+ MIB.addReg(TMMImmToTMMReg(MI.getOperand(2).getImm()), RegState::Undef);
+ MI.eraseFromParent(); // The pseudo is gone now.
+ return BB;
+ }
}
}
@@ -37666,14 +39051,14 @@ X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
return false;
};
// For vectors - if we have a constant, then try to sign extend.
- // TODO: Handle AND/ANDN cases.
+ // TODO: Handle AND cases.
unsigned ActiveBits = DemandedBits.getActiveBits();
if (EltSize > ActiveBits && EltSize > 1 && isTypeLegal(VT) &&
- (Opcode == ISD::OR || Opcode == ISD::XOR) &&
+ (Opcode == ISD::OR || Opcode == ISD::XOR || Opcode == X86ISD::ANDNP) &&
NeedsSignExtension(Op.getOperand(1), ActiveBits)) {
EVT ExtSVT = EVT::getIntegerVT(*TLO.DAG.getContext(), ActiveBits);
EVT ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtSVT,
- VT.getVectorNumElements());
+ VT.getVectorNumElements());
SDValue NewC =
TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(Op), VT,
Op.getOperand(1), TLO.DAG.getValueType(ExtVT));
@@ -37707,7 +39092,7 @@ X86TargetLowering::targetShrinkDemandedConstant(SDValue Op,
return false;
// Find the next power of 2 width, rounding up to a byte.
- Width = PowerOf2Ceil(std::max(Width, 8U));
+ Width = llvm::bit_ceil(std::max(Width, 8U));
// Truncate the width to size to handle illegal types.
Width = std::min(Width, EltSize);
@@ -37750,6 +39135,13 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.resetAll();
switch (Opc) {
default: break;
+ case X86ISD::MUL_IMM: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known = KnownBits::mul(Known, Known2);
+ break;
+ }
case X86ISD::SETCC:
Known.Zero.setBitsFrom(1);
break;
@@ -37812,11 +39204,11 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits Known2;
if (!!DemandedLHS) {
Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedLHS, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
if (!!DemandedRHS) {
Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedRHS, Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
if (Known.countMinLeadingZeros() < BitWidth)
@@ -37868,6 +39260,23 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.Zero.setBitsFrom(16);
break;
}
+ case X86ISD::PCMPGT:
+ case X86ISD::PCMPEQ: {
+ KnownBits KnownLhs =
+ DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ KnownBits KnownRhs =
+ DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ std::optional<bool> Res = Opc == X86ISD::PCMPEQ
+ ? KnownBits::eq(KnownLhs, KnownRhs)
+ : KnownBits::sgt(KnownLhs, KnownRhs);
+ if (Res) {
+ if (*Res)
+ Known.setAllOnes();
+ else
+ Known.setAllZero();
+ }
+ break;
+ }
case X86ISD::PMULUDQ: {
KnownBits Known2;
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
@@ -37886,7 +39295,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
break;
}
case X86ISD::BEXTR:
@@ -37926,7 +39335,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
case X86ISD::PEXT: {
Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
// The result has as many leading zeros as the number of zeroes in the mask.
- unsigned Count = Known.Zero.countPopulation();
+ unsigned Count = Known.Zero.popcount();
Known.Zero = APInt::getHighBitsSet(BitWidth, Count);
Known.One.clearAllBits();
break;
@@ -37954,8 +39363,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
EVT SrcVT = Op.getOperand(0).getValueType();
if (SrcVT.isVector()) {
unsigned NumSrcElts = SrcVT.getVectorNumElements();
- if (NumElts > NumSrcElts &&
- DemandedElts.countTrailingZeros() >= NumSrcElts)
+ if (NumElts > NumSrcElts && DemandedElts.countr_zero() >= NumSrcElts)
Known.setAllZero();
}
break;
@@ -37970,15 +39378,14 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
EVT SrcVT = Op.getOperand(1).getValueType();
if (SrcVT.isVector()) {
unsigned NumSrcElts = SrcVT.getVectorNumElements();
- if (NumElts > NumSrcElts &&
- DemandedElts.countTrailingZeros() >= NumSrcElts)
+ if (NumElts > NumSrcElts && DemandedElts.countr_zero() >= NumSrcElts)
Known.setAllZero();
}
break;
}
case X86ISD::MOVQ2DQ: {
// Move from MMX to XMM. Upper half of XMM should be 0.
- if (DemandedElts.countTrailingZeros() >= (NumElts / 2))
+ if (DemandedElts.countr_zero() >= (NumElts / 2))
Known.setAllZero();
break;
}
@@ -37998,7 +39405,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
}
KnownBits Known2 = KnownBits::makeConstant(EltBits[I]);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
return;
}
@@ -38049,7 +39456,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
continue;
KnownBits Known2 =
DAG.computeKnownBits(Ops[i], DemandedOps[i], Depth + 1);
- Known = KnownBits::commonBits(Known, Known2);
+ Known = Known.intersectWith(Known2);
}
}
}
@@ -38085,12 +39492,30 @@ unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
getPackDemandedElts(Op.getValueType(), DemandedElts, DemandedLHS,
DemandedRHS);
+ // Helper to detect PACKSSDW(BITCAST(PACKSSDW(X)),BITCAST(PACKSSDW(Y)))
+ // patterns often used to compact vXi64 allsignbit patterns.
+ auto NumSignBitsPACKSS = [&](SDValue V, const APInt &Elts) -> unsigned {
+ SDValue BC = peekThroughBitcasts(V);
+ if (BC.getOpcode() == X86ISD::PACKSS &&
+ BC.getScalarValueSizeInBits() == 16 &&
+ V.getScalarValueSizeInBits() == 32) {
+ SDValue BC0 = peekThroughBitcasts(BC.getOperand(0));
+ SDValue BC1 = peekThroughBitcasts(BC.getOperand(1));
+ if (BC0.getScalarValueSizeInBits() == 64 &&
+ BC1.getScalarValueSizeInBits() == 64 &&
+ DAG.ComputeNumSignBits(BC0, Depth + 1) == 64 &&
+ DAG.ComputeNumSignBits(BC1, Depth + 1) == 64)
+ return 32;
+ }
+ return DAG.ComputeNumSignBits(V, Elts, Depth + 1);
+ };
+
unsigned SrcBits = Op.getOperand(0).getScalarValueSizeInBits();
unsigned Tmp0 = SrcBits, Tmp1 = SrcBits;
if (!!DemandedLHS)
- Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
+ Tmp0 = NumSignBitsPACKSS(Op.getOperand(0), DemandedLHS);
if (!!DemandedRHS)
- Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1);
+ Tmp1 = NumSignBitsPACKSS(Op.getOperand(1), DemandedRHS);
unsigned Tmp = std::min(Tmp0, Tmp1);
if (Tmp > (SrcBits - VTBits))
return Tmp - (SrcBits - VTBits);
@@ -38254,31 +39679,41 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
- // Match against a ANY/ZERO_EXTEND_VECTOR_INREG instruction.
+ // Match against a ANY/SIGN/ZERO_EXTEND_VECTOR_INREG instruction.
// TODO: Add 512-bit vector support (split AVX512F and AVX512BW).
if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) ||
(MaskVT.is256BitVector() && Subtarget.hasInt256()))) {
unsigned MaxScale = 64 / MaskEltSize;
+ bool UseSign = V1.getScalarValueSizeInBits() == MaskEltSize &&
+ DAG.ComputeNumSignBits(V1) == MaskEltSize;
for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
bool MatchAny = true;
bool MatchZero = true;
+ bool MatchSign = UseSign;
unsigned NumDstElts = NumMaskElts / Scale;
- for (unsigned i = 0; i != NumDstElts && (MatchAny || MatchZero); ++i) {
+ for (unsigned i = 0;
+ i != NumDstElts && (MatchAny || MatchSign || MatchZero); ++i) {
if (!isUndefOrEqual(Mask[i * Scale], (int)i)) {
- MatchAny = MatchZero = false;
+ MatchAny = MatchSign = MatchZero = false;
break;
}
- MatchAny &= isUndefInRange(Mask, (i * Scale) + 1, Scale - 1);
- MatchZero &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
- }
- if (MatchAny || MatchZero) {
- assert(MatchZero && "Failed to match zext but matched aext?");
+ unsigned Pos = (i * Scale) + 1;
+ unsigned Len = Scale - 1;
+ MatchAny &= isUndefInRange(Mask, Pos, Len);
+ MatchZero &= isUndefOrZeroInRange(Mask, Pos, Len);
+ MatchSign &= isUndefOrEqualInRange(Mask, (int)i, Pos, Len);
+ }
+ if (MatchAny || MatchSign || MatchZero) {
+ assert((MatchSign || MatchZero) &&
+ "Failed to match sext/zext but matched aext?");
unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize);
- MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() :
- MVT::getIntegerVT(MaskEltSize);
+ MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType()
+ : MVT::getIntegerVT(MaskEltSize);
SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize);
- Shuffle = unsigned(MatchAny ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND);
+ Shuffle = unsigned(
+ MatchAny ? ISD::ANY_EXTEND
+ : (MatchSign ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND));
if (SrcVT.getVectorNumElements() != NumDstElts)
Shuffle = DAG.getOpcode_EXTEND_VECTOR_INREG(Shuffle);
@@ -38424,85 +39859,97 @@ static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask,
}
}
- // Handle PSHUFD/VPERMILPI vXi32/vXf32 repeated patterns.
- // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
- // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
- if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) &&
- !ContainsZeros && (AllowIntDomain || Subtarget.hasAVX())) {
- SmallVector<int, 4> RepeatedMask;
- if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
- // Narrow the repeated mask to create 32-bit element permutes.
- SmallVector<int, 4> WordMask = RepeatedMask;
- if (MaskScalarSizeInBits == 64)
- narrowShuffleMaskElts(2, RepeatedMask, WordMask);
-
- Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI);
- ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
- ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
- PermuteImm = getV4X86ShuffleImm(WordMask);
- return true;
- }
- }
-
- // Handle PSHUFLW/PSHUFHW vXi16 repeated patterns.
- if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16 &&
- ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
- (MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
- (MaskVT.is512BitVector() && Subtarget.hasBWI()))) {
- SmallVector<int, 4> RepeatedMask;
- if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
- ArrayRef<int> LoMask(RepeatedMask.data() + 0, 4);
- ArrayRef<int> HiMask(RepeatedMask.data() + 4, 4);
-
- // PSHUFLW: permute lower 4 elements only.
- if (isUndefOrInRange(LoMask, 0, 4) &&
- isSequentialOrUndefInRange(HiMask, 0, 4, 4)) {
- Shuffle = X86ISD::PSHUFLW;
- ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16);
- PermuteImm = getV4X86ShuffleImm(LoMask);
- return true;
+ // We are checking for shuffle match or shift match. Loop twice so we can
+ // order which we try and match first depending on target preference.
+ for (unsigned Order = 0; Order < 2; ++Order) {
+ if (Subtarget.preferLowerShuffleAsShift() ? (Order == 1) : (Order == 0)) {
+ // Handle PSHUFD/VPERMILPI vXi32/vXf32 repeated patterns.
+ // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
+ // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
+ if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) &&
+ !ContainsZeros && (AllowIntDomain || Subtarget.hasAVX())) {
+ SmallVector<int, 4> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
+ // Narrow the repeated mask to create 32-bit element permutes.
+ SmallVector<int, 4> WordMask = RepeatedMask;
+ if (MaskScalarSizeInBits == 64)
+ narrowShuffleMaskElts(2, RepeatedMask, WordMask);
+
+ Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI);
+ ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32);
+ ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
+ PermuteImm = getV4X86ShuffleImm(WordMask);
+ return true;
+ }
}
- // PSHUFHW: permute upper 4 elements only.
- if (isUndefOrInRange(HiMask, 4, 8) &&
- isSequentialOrUndefInRange(LoMask, 0, 4, 0)) {
- // Offset the HiMask so that we can create the shuffle immediate.
- int OffsetHiMask[4];
- for (int i = 0; i != 4; ++i)
- OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4);
+ // Handle PSHUFLW/PSHUFHW vXi16 repeated patterns.
+ if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16 &&
+ ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
+ (MaskVT.is512BitVector() && Subtarget.hasBWI()))) {
+ SmallVector<int, 4> RepeatedMask;
+ if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) {
+ ArrayRef<int> LoMask(RepeatedMask.data() + 0, 4);
+ ArrayRef<int> HiMask(RepeatedMask.data() + 4, 4);
+
+ // PSHUFLW: permute lower 4 elements only.
+ if (isUndefOrInRange(LoMask, 0, 4) &&
+ isSequentialOrUndefInRange(HiMask, 0, 4, 4)) {
+ Shuffle = X86ISD::PSHUFLW;
+ ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16);
+ PermuteImm = getV4X86ShuffleImm(LoMask);
+ return true;
+ }
- Shuffle = X86ISD::PSHUFHW;
- ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16);
- PermuteImm = getV4X86ShuffleImm(OffsetHiMask);
- return true;
+ // PSHUFHW: permute upper 4 elements only.
+ if (isUndefOrInRange(HiMask, 4, 8) &&
+ isSequentialOrUndefInRange(LoMask, 0, 4, 0)) {
+ // Offset the HiMask so that we can create the shuffle immediate.
+ int OffsetHiMask[4];
+ for (int i = 0; i != 4; ++i)
+ OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4);
+
+ Shuffle = X86ISD::PSHUFHW;
+ ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16);
+ PermuteImm = getV4X86ShuffleImm(OffsetHiMask);
+ return true;
+ }
+ }
+ }
+ } else {
+ // Attempt to match against bit rotates.
+ if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits < 64 &&
+ ((MaskVT.is128BitVector() && Subtarget.hasXOP()) ||
+ Subtarget.hasAVX512())) {
+ int RotateAmt = matchShuffleAsBitRotate(ShuffleVT, MaskScalarSizeInBits,
+ Subtarget, Mask);
+ if (0 < RotateAmt) {
+ Shuffle = X86ISD::VROTLI;
+ PermuteImm = (unsigned)RotateAmt;
+ return true;
+ }
}
}
- }
+ // Attempt to match against byte/bit shifts.
+ if (AllowIntDomain &&
+ ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
+ (MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
+ (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
+ int ShiftAmt =
+ matchShuffleAsShift(ShuffleVT, Shuffle, MaskScalarSizeInBits, Mask, 0,
+ Zeroable, Subtarget);
+ if (0 < ShiftAmt && (!ShuffleVT.is512BitVector() || Subtarget.hasBWI() ||
+ 32 <= ShuffleVT.getScalarSizeInBits())) {
+ // Byte shifts can be slower so only match them on second attempt.
+ if (Order == 0 &&
+ (Shuffle == X86ISD::VSHLDQ || Shuffle == X86ISD::VSRLDQ))
+ continue;
- // Attempt to match against byte/bit shifts.
- if (AllowIntDomain &&
- ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
- (MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
- (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
- int ShiftAmt = matchShuffleAsShift(ShuffleVT, Shuffle, MaskScalarSizeInBits,
- Mask, 0, Zeroable, Subtarget);
- if (0 < ShiftAmt && (!ShuffleVT.is512BitVector() || Subtarget.hasBWI() ||
- 32 <= ShuffleVT.getScalarSizeInBits())) {
- PermuteImm = (unsigned)ShiftAmt;
- return true;
- }
- }
+ PermuteImm = (unsigned)ShiftAmt;
+ return true;
+ }
- // Attempt to match against bit rotates.
- if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits < 64 &&
- ((MaskVT.is128BitVector() && Subtarget.hasXOP()) ||
- Subtarget.hasAVX512())) {
- int RotateAmt = matchShuffleAsBitRotate(ShuffleVT, MaskScalarSizeInBits,
- Subtarget, Mask);
- if (0 < RotateAmt) {
- Shuffle = X86ISD::VROTLI;
- PermuteImm = (unsigned)RotateAmt;
- return true;
}
}
@@ -38570,6 +40017,35 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
return true;
}
}
+ // TODO: Can we handle this inside matchShuffleWithPACK?
+ if (MaskVT == MVT::v4i32 && Subtarget.hasSSE2() &&
+ isTargetShuffleEquivalent(MaskVT, Mask, {0, 2, 4, 6}, DAG) &&
+ V1.getScalarValueSizeInBits() == 64 &&
+ V2.getScalarValueSizeInBits() == 64) {
+ // Use (SSE41) PACKUSWD if the leading zerobits goto the lowest 16-bits.
+ unsigned MinLZV1 = DAG.computeKnownBits(V1).countMinLeadingZeros();
+ unsigned MinLZV2 = DAG.computeKnownBits(V2).countMinLeadingZeros();
+ if (Subtarget.hasSSE41() && MinLZV1 >= 48 && MinLZV2 >= 48) {
+ SrcVT = MVT::v4i32;
+ DstVT = MVT::v8i16;
+ Shuffle = X86ISD::PACKUS;
+ return true;
+ }
+ // Use PACKUSBW if the leading zerobits goto the lowest 8-bits.
+ if (MinLZV1 >= 56 && MinLZV2 >= 56) {
+ SrcVT = MVT::v8i16;
+ DstVT = MVT::v16i8;
+ Shuffle = X86ISD::PACKUS;
+ return true;
+ }
+ // Use PACKSSWD if the signbits extend to the lowest 16-bits.
+ if (DAG.ComputeNumSignBits(V1) > 48 && DAG.ComputeNumSignBits(V2) > 48) {
+ SrcVT = MVT::v4i32;
+ DstVT = MVT::v8i16;
+ Shuffle = X86ISD::PACKSS;
+ return true;
+ }
+ }
// Attempt to match against either a unary or binary UNPCKL/UNPCKH shuffle.
if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) ||
@@ -38725,7 +40201,7 @@ static bool matchBinaryPermuteShuffle(
uint64_t BlendMask = 0;
bool ForceV1Zero = false, ForceV2Zero = false;
SmallVector<int, 8> TargetMask(Mask);
- if (matchShuffleAsBlend(V1, V2, TargetMask, Zeroable, ForceV1Zero,
+ if (matchShuffleAsBlend(MaskVT, V1, V2, TargetMask, Zeroable, ForceV1Zero,
ForceV2Zero, BlendMask)) {
if (MaskVT == MVT::v16i16) {
// We can only use v16i16 PBLENDW if the lanes are repeated.
@@ -39616,77 +41092,92 @@ static SDValue combineX86ShuffleChainWithExtract(
EVT RootVT = Root.getValueType();
unsigned RootSizeInBits = RootVT.getSizeInBits();
+ unsigned RootEltSizeInBits = RootSizeInBits / NumMaskElts;
assert((RootSizeInBits % NumMaskElts) == 0 && "Unexpected root shuffle mask");
- // Bail if we have any smaller inputs.
- if (llvm::any_of(Inputs, [RootSizeInBits](SDValue Input) {
- return Input.getValueSizeInBits() < RootSizeInBits;
- }))
- return SDValue();
-
- SmallVector<SDValue, 4> WideInputs(Inputs.begin(), Inputs.end());
- SmallVector<unsigned, 4> Offsets(NumInputs, 0);
-
- // Peek through subvectors.
- // TODO: Support inter-mixed EXTRACT_SUBVECTORs + BITCASTs?
+ // Peek through extract_subvector to find widest legal vector.
+ // TODO: Handle ISD::TRUNCATE
unsigned WideSizeInBits = RootSizeInBits;
- for (unsigned i = 0; i != NumInputs; ++i) {
- SDValue &Src = WideInputs[i];
- unsigned &Offset = Offsets[i];
- Src = peekThroughBitcasts(Src);
- EVT BaseVT = Src.getValueType();
- while (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
- Offset += Src.getConstantOperandVal(1);
- Src = Src.getOperand(0);
- }
- WideSizeInBits = std::max(WideSizeInBits,
- (unsigned)Src.getValueSizeInBits());
- assert((Offset % BaseVT.getVectorNumElements()) == 0 &&
- "Unexpected subvector extraction");
- Offset /= BaseVT.getVectorNumElements();
- Offset *= NumMaskElts;
+ for (unsigned I = 0; I != NumInputs; ++I) {
+ SDValue Input = peekThroughBitcasts(Inputs[I]);
+ while (Input.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ Input = peekThroughBitcasts(Input.getOperand(0));
+ if (DAG.getTargetLoweringInfo().isTypeLegal(Input.getValueType()) &&
+ WideSizeInBits < Input.getValueSizeInBits())
+ WideSizeInBits = Input.getValueSizeInBits();
}
- // Bail if we're always extracting from the lowest subvectors,
- // combineX86ShuffleChain should match this for the current width.
- if (llvm::all_of(Offsets, [](unsigned Offset) { return Offset == 0; }))
- return SDValue();
-
+ // Bail if we fail to find a source larger than the existing root.
unsigned Scale = WideSizeInBits / RootSizeInBits;
- assert((WideSizeInBits % RootSizeInBits) == 0 &&
- "Unexpected subvector extraction");
-
- // If the src vector types aren't the same, see if we can extend
- // them to match each other.
- // TODO: Support different scalar types?
- EVT WideSVT = WideInputs[0].getValueType().getScalarType();
- if (llvm::any_of(WideInputs, [&WideSVT, &DAG](SDValue Op) {
- return !DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()) ||
- Op.getValueType().getScalarType() != WideSVT;
- }))
+ if (WideSizeInBits <= RootSizeInBits ||
+ (WideSizeInBits % RootSizeInBits) != 0)
return SDValue();
// Create new mask for larger type.
- for (unsigned i = 1; i != NumInputs; ++i)
- Offsets[i] += i * Scale * NumMaskElts;
-
SmallVector<int, 64> WideMask(BaseMask);
for (int &M : WideMask) {
if (M < 0)
continue;
- M = (M % NumMaskElts) + Offsets[M / NumMaskElts];
+ M = (M % NumMaskElts) + ((M / NumMaskElts) * Scale * NumMaskElts);
}
WideMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef);
+ // Attempt to peek through inputs and adjust mask when we extract from an
+ // upper subvector.
+ int AdjustedMasks = 0;
+ SmallVector<SDValue, 4> WideInputs(Inputs.begin(), Inputs.end());
+ for (unsigned I = 0; I != NumInputs; ++I) {
+ SDValue &Input = WideInputs[I];
+ Input = peekThroughBitcasts(Input);
+ while (Input.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ Input.getOperand(0).getValueSizeInBits() <= WideSizeInBits) {
+ uint64_t Idx = Input.getConstantOperandVal(1);
+ if (Idx != 0) {
+ ++AdjustedMasks;
+ unsigned InputEltSizeInBits = Input.getScalarValueSizeInBits();
+ Idx = (Idx * InputEltSizeInBits) / RootEltSizeInBits;
+
+ int lo = I * WideMask.size();
+ int hi = (I + 1) * WideMask.size();
+ for (int &M : WideMask)
+ if (lo <= M && M < hi)
+ M += Idx;
+ }
+ Input = peekThroughBitcasts(Input.getOperand(0));
+ }
+ }
+
// Remove unused/repeated shuffle source ops.
resolveTargetShuffleInputsAndMask(WideInputs, WideMask);
assert(!WideInputs.empty() && "Shuffle with no inputs detected");
- if (WideInputs.size() > 2)
- return SDValue();
+ // Bail if we're always extracting from the lowest subvectors,
+ // combineX86ShuffleChain should match this for the current width, or the
+ // shuffle still references too many inputs.
+ if (AdjustedMasks == 0 || WideInputs.size() > 2)
+ return SDValue();
+
+ // Minor canonicalization of the accumulated shuffle mask to make it easier
+ // to match below. All this does is detect masks with sequential pairs of
+ // elements, and shrink them to the half-width mask. It does this in a loop
+ // so it will reduce the size of the mask to the minimal width mask which
+ // performs an equivalent shuffle.
+ while (WideMask.size() > 1) {
+ SmallVector<int, 64> WidenedMask;
+ if (!canWidenShuffleElements(WideMask, WidenedMask))
+ break;
+ WideMask = std::move(WidenedMask);
+ }
+
+ // Canonicalization of binary shuffle masks to improve pattern matching by
+ // commuting the inputs.
+ if (WideInputs.size() == 2 && canonicalizeShuffleMaskWithCommute(WideMask)) {
+ ShuffleVectorSDNode::commuteMask(WideMask);
+ std::swap(WideInputs[0], WideInputs[1]);
+ }
// Increase depth for every upper subvector we've peeked through.
- Depth += count_if(Offsets, [](unsigned Offset) { return Offset > 0; });
+ Depth += AdjustedMasks;
// Attempt to combine wider chain.
// TODO: Can we use a better Root?
@@ -39694,6 +41185,9 @@ static SDValue combineX86ShuffleChainWithExtract(
WideInputs.back().getValueSizeInBits()
? WideInputs.front()
: WideInputs.back();
+ assert(WideRoot.getValueSizeInBits() == WideSizeInBits &&
+ "WideRootSize mismatch");
+
if (SDValue WideShuffle =
combineX86ShuffleChain(WideInputs, WideRoot, WideMask, Depth,
HasVariableMask, AllowVariableCrossLaneMask,
@@ -39702,6 +41196,7 @@ static SDValue combineX86ShuffleChainWithExtract(
extractSubVector(WideShuffle, 0, DAG, SDLoc(Root), RootSizeInBits);
return DAG.getBitcast(RootVT, WideShuffle);
}
+
return SDValue();
}
@@ -39806,7 +41301,7 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
LHS = DAG.getBitcast(SrcVT, LHS);
RHS = DAG.getBitcast(SrcVT, RHS ? RHS : LHS);
SDValue Res = DAG.getNode(Opcode0, DL, VT0, LHS, RHS);
- // Use SHUFPS for the permute so this will work on SSE3 targets,
+ // Use SHUFPS for the permute so this will work on SSE2 targets,
// shuffle combining and domain handling will simplify this later on.
MVT ShuffleVT = MVT::getVectorVT(MVT::f32, RootSizeInBits / 32);
Res = DAG.getBitcast(ShuffleVT, Res);
@@ -39891,6 +41386,25 @@ static SDValue canonicalizeShuffleMaskWithHorizOp(
}
}
+ // If we are post-shuffling a 256-bit hop and not requiring the upper
+ // elements, then try to narrow to a 128-bit hop directly.
+ SmallVector<int, 16> WideMask64;
+ if (Ops.size() == 1 && NumLanes == 2 &&
+ scaleShuffleElements(Mask, 4, WideMask64) &&
+ isUndefInRange(WideMask64, 2, 2)) {
+ int M0 = WideMask64[0];
+ int M1 = WideMask64[1];
+ if (isInRange(M0, 0, 4) && isInRange(M1, 0, 4)) {
+ MVT HalfVT = VT0.getSimpleVT().getHalfNumVectorElementsVT();
+ unsigned Idx0 = (M0 & 2) ? (SrcVT.getVectorNumElements() / 2) : 0;
+ unsigned Idx1 = (M1 & 2) ? (SrcVT.getVectorNumElements() / 2) : 0;
+ SDValue V0 = extract128BitVector(BC[0].getOperand(M0 & 1), Idx0, DAG, DL);
+ SDValue V1 = extract128BitVector(BC[0].getOperand(M1 & 1), Idx1, DAG, DL);
+ SDValue Res = DAG.getNode(Opcode0, DL, HalfVT, V0, V1);
+ return widenSubVector(Res, false, Subtarget, DAG, DL, 256);
+ }
+ }
+
return SDValue();
}
@@ -39910,22 +41424,19 @@ static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops,
unsigned NumOps = Ops.size();
// Extract constant bits from each source op.
- bool OneUseConstantOp = false;
SmallVector<APInt, 16> UndefEltsOps(NumOps);
SmallVector<SmallVector<APInt, 16>, 16> RawBitsOps(NumOps);
- for (unsigned i = 0; i != NumOps; ++i) {
- SDValue SrcOp = Ops[i];
- OneUseConstantOp |= SrcOp.hasOneUse();
- if (!getTargetConstantBitsFromNode(SrcOp, MaskSizeInBits, UndefEltsOps[i],
- RawBitsOps[i]))
+ for (unsigned I = 0; I != NumOps; ++I)
+ if (!getTargetConstantBitsFromNode(Ops[I], MaskSizeInBits, UndefEltsOps[I],
+ RawBitsOps[I]))
return SDValue();
- }
// If we're optimizing for size, only fold if at least one of the constants is
// only used once or the combined shuffle has included a variable mask
// shuffle, this is to avoid constant pool bloat.
bool IsOptimizingSize = DAG.shouldOptForSize();
- if (IsOptimizingSize && !OneUseConstantOp && !HasVariableMask)
+ if (IsOptimizingSize && !HasVariableMask &&
+ llvm::none_of(Ops, [](SDValue SrcOp) { return SrcOp->hasOneUse(); }))
return SDValue();
// Shuffle the constant bits according to the mask.
@@ -39991,7 +41502,7 @@ namespace llvm {
enum {
MaxShuffleCombineDepth = 8
};
- }
+ } // namespace X86
} // namespace llvm
/// Fully generic combining of x86 shuffle instructions.
@@ -40029,7 +41540,7 @@ static SDValue combineX86ShufflesRecursively(
unsigned MaxDepth, bool HasVariableMask, bool AllowVariableCrossLaneMask,
bool AllowVariablePerLaneMask, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- assert(RootMask.size() > 0 &&
+ assert(!RootMask.empty() &&
(RootMask.size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) &&
"Illegal shuffle root mask");
MVT RootVT = Root.getSimpleValueType();
@@ -40099,7 +41610,7 @@ static SDValue combineX86ShufflesRecursively(
OpInputs.assign({SrcVec});
OpMask.assign(NumElts, SM_SentinelUndef);
std::iota(OpMask.begin(), OpMask.end(), ExtractIdx);
- OpZero = OpUndef = APInt::getNullValue(NumElts);
+ OpZero = OpUndef = APInt::getZero(NumElts);
} else {
return SDValue();
}
@@ -40188,11 +41699,12 @@ static SDValue combineX86ShufflesRecursively(
// This function can be performance-critical, so we rely on the power-of-2
// knowledge that we have about the mask sizes to replace div/rem ops with
// bit-masks and shifts.
- assert(isPowerOf2_32(RootMask.size()) &&
+ assert(llvm::has_single_bit<uint32_t>(RootMask.size()) &&
"Non-power-of-2 shuffle mask sizes");
- assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes");
- unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size());
- unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size());
+ assert(llvm::has_single_bit<uint32_t>(OpMask.size()) &&
+ "Non-power-of-2 shuffle mask sizes");
+ unsigned RootMaskSizeLog2 = llvm::countr_zero(RootMask.size());
+ unsigned OpMaskSizeLog2 = llvm::countr_zero(OpMask.size());
unsigned MaskWidth = std::max<unsigned>(OpMask.size(), RootMask.size());
unsigned RootRatio =
@@ -40204,8 +41716,8 @@ static SDValue combineX86ShufflesRecursively(
assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes");
assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes");
assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes");
- unsigned RootRatioLog2 = countTrailingZeros(RootRatio);
- unsigned OpRatioLog2 = countTrailingZeros(OpRatio);
+ unsigned RootRatioLog2 = llvm::countr_zero(RootRatio);
+ unsigned OpRatioLog2 = llvm::countr_zero(OpRatio);
Mask.resize(MaskWidth, SM_SentinelUndef);
@@ -40257,6 +41769,31 @@ static SDValue combineX86ShufflesRecursively(
}
}
+ // Peek through vector widenings and set out of bounds mask indices to undef.
+ // TODO: Can resolveTargetShuffleInputsAndMask do some of this?
+ for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
+ SDValue &Op = Ops[I];
+ if (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op.getOperand(0).isUndef() &&
+ isNullConstant(Op.getOperand(2))) {
+ Op = Op.getOperand(1);
+ unsigned Scale = RootSizeInBits / Op.getValueSizeInBits();
+ int Lo = I * Mask.size();
+ int Hi = (I + 1) * Mask.size();
+ int NewHi = Lo + (Mask.size() / Scale);
+ for (int &M : Mask) {
+ if (Lo <= M && NewHi <= M && M < Hi)
+ M = SM_SentinelUndef;
+ }
+ }
+ }
+
+ // Peek through any free extract_subvector nodes back to root size.
+ for (SDValue &Op : Ops)
+ while (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ (RootSizeInBits % Op.getOperand(0).getValueSizeInBits()) == 0 &&
+ isNullConstant(Op.getOperand(1)))
+ Op = Op.getOperand(0);
+
// Remove unused/repeated shuffle source ops.
resolveTargetShuffleInputsAndMask(Ops, Mask);
@@ -40685,6 +42222,7 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
ISD::isBuildVectorAllZeros(Op.getNode()) ||
ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()) ||
+ (Op.getOpcode() == ISD::INSERT_SUBVECTOR && Op->hasOneUse()) ||
(isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()) ||
(FoldLoad && isShuffleFoldableLoad(Op)) ||
DAG.isSplatValue(Op, /*AllowUndefs*/ false);
@@ -40766,6 +42304,7 @@ static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG,
SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1));
unsigned SrcOpcode = N0.getOpcode();
if (TLI.isBinOp(SrcOpcode) && N1.getOpcode() == SrcOpcode &&
+ N0.getValueType() == N1.getValueType() &&
IsSafeToMoveShuffle(N0, SrcOpcode) &&
IsSafeToMoveShuffle(N1, SrcOpcode)) {
SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
@@ -41320,10 +42859,37 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
}
case X86ISD::PSHUFD:
case X86ISD::PSHUFLW:
- case X86ISD::PSHUFHW:
+ case X86ISD::PSHUFHW: {
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ if (N0->hasOneUse()) {
+ SDValue V = peekThroughOneUseBitcasts(N0);
+ switch (V.getOpcode()) {
+ case X86ISD::VSHL:
+ case X86ISD::VSRL:
+ case X86ISD::VSRA:
+ case X86ISD::VSHLI:
+ case X86ISD::VSRLI:
+ case X86ISD::VSRAI:
+ case X86ISD::VROTLI:
+ case X86ISD::VROTRI: {
+ MVT InnerVT = V.getSimpleValueType();
+ if (InnerVT.getScalarSizeInBits() <= VT.getScalarSizeInBits()) {
+ SDValue Res = DAG.getNode(Opcode, DL, VT,
+ DAG.getBitcast(VT, V.getOperand(0)), N1);
+ Res = DAG.getBitcast(InnerVT, Res);
+ Res = DAG.getNode(V.getOpcode(), DL, InnerVT, Res, V.getOperand(1));
+ return DAG.getBitcast(VT, Res);
+ }
+ break;
+ }
+ }
+ }
+
Mask = getPSHUFShuffleMask(N);
assert(Mask.size() == 4);
break;
+ }
case X86ISD::MOVSD:
case X86ISD::MOVSH:
case X86ISD::MOVSS: {
@@ -41770,9 +43336,9 @@ static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
/// low half of each source vector and does not set any high half elements in
/// the destination vector, narrow the shuffle to half its original size.
static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) {
- if (!Shuf->getValueType(0).isSimple())
+ EVT VT = Shuf->getValueType(0);
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(Shuf->getValueType(0)))
return SDValue();
- MVT VT = Shuf->getSimpleValueType(0);
if (!VT.is256BitVector() && !VT.is512BitVector())
return SDValue();
@@ -41796,7 +43362,7 @@ static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) {
// the wide shuffle that we started with.
return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0),
Shuf->getOperand(1), HalfMask, HalfIdx1,
- HalfIdx2, false, DAG, /*UseConcat*/true);
+ HalfIdx2, false, DAG, /*UseConcat*/ true);
}
static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
@@ -42710,6 +44276,31 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
}
break;
}
+ case X86ISD::ANDNP: {
+ KnownBits Known2;
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (SimplifyDemandedBits(Op1, OriginalDemandedBits, OriginalDemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+
+ if (SimplifyDemandedBits(Op0, ~Known.Zero & OriginalDemandedBits,
+ OriginalDemandedElts, Known2, TLO, Depth + 1))
+ return true;
+ assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(Op, ~Known2.One & OriginalDemandedBits,
+ OriginalDemandedElts, TLO))
+ return true;
+
+ // ANDNP = (~Op0 & Op1);
+ Known.One &= Known2.Zero;
+ Known.Zero |= Known2.One;
+ break;
+ }
case X86ISD::VSHLI: {
SDValue Op0 = Op.getOperand(0);
@@ -42723,7 +44314,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
if (Op0.getOpcode() == X86ISD::VSRLI &&
- OriginalDemandedBits.countTrailingZeros() >= ShAmt) {
+ OriginalDemandedBits.countr_zero() >= ShAmt) {
unsigned Shift2Amt = Op0.getConstantOperandVal(1);
if (Shift2Amt < BitWidth) {
int Diff = ShAmt - Shift2Amt;
@@ -42741,8 +44332,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// If we are only demanding sign bits then we can use the shift source directly.
unsigned NumSignBits =
TLO.DAG.ComputeNumSignBits(Op0, OriginalDemandedElts, Depth + 1);
- unsigned UpperDemandedBits =
- BitWidth - OriginalDemandedBits.countTrailingZeros();
+ unsigned UpperDemandedBits = BitWidth - OriginalDemandedBits.countr_zero();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)
return TLO.CombineTo(Op, Op0);
@@ -42803,7 +44393,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// If any of the demanded bits are produced by the sign extension, we also
// demand the input sign bit.
- if (OriginalDemandedBits.countLeadingZeros() < ShAmt)
+ if (OriginalDemandedBits.countl_zero() < ShAmt)
DemandedMask.setSignBit();
if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known,
@@ -42817,7 +44407,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (Known.Zero[BitWidth - ShAmt - 1] ||
- OriginalDemandedBits.countLeadingZeros() >= ShAmt)
+ OriginalDemandedBits.countl_zero() >= ShAmt)
return TLO.CombineTo(
Op, TLO.DAG.getNode(X86ISD::VSRLI, SDLoc(Op), VT, Op0, Op1));
@@ -42912,7 +44502,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
return true;
KnownScl = KnownScl.trunc(VecVT.getScalarSizeInBits());
- Known = KnownBits::commonBits(KnownVec, KnownScl);
+ Known = KnownVec.intersectWith(KnownScl);
return false;
}
break;
@@ -42959,7 +44549,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// Don't attempt this on AVX512 as it might affect broadcast folding.
// TODO: Should we attempt this for i32/i16 splats? They tend to be slower.
if ((BitWidth == 64) && SrcVT.isScalarInteger() && !Subtarget.hasAVX512() &&
- OriginalDemandedBits.countLeadingZeros() >= (BitWidth / 2) &&
+ OriginalDemandedBits.countl_zero() >= (BitWidth / 2) &&
Src->hasOneUse()) {
MVT NewSrcVT = MVT::getIntegerVT(BitWidth / 2);
SDValue NewSrc =
@@ -42985,7 +44575,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
unsigned NumElts = SrcVT.getVectorNumElements();
// If we don't need the sign bits at all just return zero.
- if (OriginalDemandedBits.countTrailingZeros() >= NumElts)
+ if (OriginalDemandedBits.countr_zero() >= NumElts)
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
// See if we only demand bits from the lower 128-bit vector.
@@ -43023,6 +44613,23 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc));
return false;
}
+ case X86ISD::TESTP: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ MVT OpVT = Op0.getSimpleValueType();
+ assert((OpVT.getVectorElementType() == MVT::f32 ||
+ OpVT.getVectorElementType() == MVT::f64) &&
+ "Illegal vector type for X86ISD::TESTP");
+
+ // TESTPS/TESTPD only demands the sign bits of ALL the elements.
+ KnownBits KnownSrc;
+ APInt SignMask = APInt::getSignMask(OpVT.getScalarSizeInBits());
+ bool AssumeSingleUse = (Op0 == Op1) && Op->isOnlyUserOf(Op0.getNode());
+ return SimplifyDemandedBits(Op0, SignMask, KnownSrc, TLO, Depth + 1,
+ AssumeSingleUse) ||
+ SimplifyDemandedBits(Op1, SignMask, KnownSrc, TLO, Depth + 1,
+ AssumeSingleUse);
+ }
case X86ISD::BEXTR:
case X86ISD::BEXTRI: {
SDValue Op0 = Op.getOperand(0);
@@ -43077,7 +44684,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- unsigned DemandedBitsLZ = OriginalDemandedBits.countLeadingZeros();
+ unsigned DemandedBitsLZ = OriginalDemandedBits.countl_zero();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
// If the demanded bits has leading zeroes, we don't demand those from the
@@ -43089,7 +44696,7 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
// operand 0 used. Undemanded bits from the mask don't matter so filter
// them before counting.
KnownBits Known2;
- uint64_t Count = (~Known.Zero & LoMask).countPopulation();
+ uint64_t Count = (~Known.Zero & LoMask).popcount();
APInt DemandedMask(APInt::getLowBitsSet(BitWidth, Count));
if (SimplifyDemandedBits(Op0, DemandedMask, Known2, TLO, Depth + 1))
return true;
@@ -43133,7 +44740,7 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
unsigned ShAmt = Op.getConstantOperandVal(1);
unsigned BitWidth = DemandedBits.getBitWidth();
unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
- unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros();
+ unsigned UpperDemandedBits = BitWidth - DemandedBits.countr_zero();
if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits)
return Op0;
break;
@@ -43202,11 +44809,11 @@ SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
if (IdentityOp == 0)
break;
}
- assert((IdentityOp == 0 || IdentityOp.countPopulation() == 1) &&
+ assert((IdentityOp == 0 || IdentityOp.popcount() == 1) &&
"Multiple identity shuffles detected");
if (IdentityOp != 0)
- return DAG.getBitcast(VT, ShuffleOps[IdentityOp.countTrailingZeros()]);
+ return DAG.getBitcast(VT, ShuffleOps[IdentityOp.countr_zero()]);
}
}
@@ -43265,7 +44872,7 @@ bool X86TargetLowering::isSplatValueForTargetNode(SDValue Op,
switch (Opc) {
case X86ISD::VBROADCAST:
case X86ISD::VBROADCAST_LOAD:
- UndefElts = APInt::getNullValue(NumElts);
+ UndefElts = APInt::getZero(NumElts);
return true;
}
@@ -43289,13 +44896,14 @@ static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size,
case ISD::OR:
return checkBitcastSrcVectorSize(Src.getOperand(0), Size, AllowTruncate) &&
checkBitcastSrcVectorSize(Src.getOperand(1), Size, AllowTruncate);
+ case ISD::SELECT:
case ISD::VSELECT:
return Src.getOperand(0).getScalarValueSizeInBits() == 1 &&
checkBitcastSrcVectorSize(Src.getOperand(1), Size, AllowTruncate) &&
checkBitcastSrcVectorSize(Src.getOperand(2), Size, AllowTruncate);
case ISD::BUILD_VECTOR:
- return ISD::isBuildVectorAllZeros(Src.getNode());
-
+ return ISD::isBuildVectorAllZeros(Src.getNode()) ||
+ ISD::isBuildVectorAllOnes(Src.getNode());
}
return false;
}
@@ -43360,6 +44968,7 @@ static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT,
Src.getOpcode(), DL, SExtVT,
signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL),
signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL));
+ case ISD::SELECT:
case ISD::VSELECT:
return DAG.getSelect(
DL, SExtVT, Src.getOperand(0),
@@ -43418,6 +45027,24 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !PreferMovMsk))
return SDValue();
+ // If the upper ops of a concatenation are undef, then try to bitcast the
+ // lower op and extend.
+ SmallVector<SDValue, 4> SubSrcOps;
+ if (collectConcatOps(Src.getNode(), SubSrcOps, DAG) &&
+ SubSrcOps.size() >= 2) {
+ SDValue LowerOp = SubSrcOps[0];
+ ArrayRef<SDValue> UpperOps(std::next(SubSrcOps.begin()), SubSrcOps.end());
+ if (LowerOp.getOpcode() == ISD::SETCC &&
+ all_of(UpperOps, [](SDValue Op) { return Op.isUndef(); })) {
+ EVT SubVT = VT.getIntegerVT(
+ *DAG.getContext(), LowerOp.getValueType().getVectorMinNumElements());
+ if (SDValue V = combineBitcastvxi1(DAG, SubVT, LowerOp, DL, Subtarget)) {
+ EVT IntVT = VT.getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ return DAG.getBitcast(VT, DAG.getNode(ISD::ANY_EXTEND, DL, IntVT, V));
+ }
+ }
+ }
+
// There are MOVMSK flavors for types v16i8, v32i8, v4f32, v8f32, v4f64 and
// v8f64. So all legal 128-bit and 256-bit vectors are covered except for
// v8i16 and v16i16.
@@ -43492,9 +45119,10 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src,
if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) {
V = getPMOVMSKB(DL, V, DAG, Subtarget);
} else {
- if (SExtVT == MVT::v8i16)
- V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V,
- DAG.getUNDEF(MVT::v8i16));
+ if (SExtVT == MVT::v8i16) {
+ V = widenSubVector(V, false, Subtarget, DAG, DL, 256);
+ V = DAG.getNode(ISD::TRUNCATE, DL, MVT::v16i8, V);
+ }
V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V);
}
@@ -43845,7 +45473,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
if (getTargetConstantBitsFromNode(N0, 64, UndefElts, EltBits)) {
SDLoc DL(N0);
// Handle zero-extension of i32 with MOVD.
- if (EltBits[0].countLeadingZeros() >= 32)
+ if (EltBits[0].countl_zero() >= 32)
return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT,
DAG.getConstant(EltBits[0].trunc(32), DL, MVT::i32));
// Else, bitcast to a double.
@@ -44245,31 +45873,33 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
unsigned NumElts = MatchVT.getVectorNumElements();
unsigned MaxElts = Subtarget.hasInt256() ? 32 : 16;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ LLVMContext &Ctx = *DAG.getContext();
if (ExtractVT == MVT::i1) {
// Special case for (pre-legalization) vXi1 reductions.
if (NumElts > 64 || !isPowerOf2_32(NumElts))
return SDValue();
+ if (Match.getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Match.getOperand(2))->get();
+ if ((BinOp == ISD::AND && CC == ISD::CondCode::SETEQ) ||
+ (BinOp == ISD::OR && CC == ISD::CondCode::SETNE)) {
+ // For all_of(setcc(x,y,eq)) - use (iX)x == (iX)y.
+ // For any_of(setcc(x,y,ne)) - use (iX)x != (iX)y.
+ X86::CondCode X86CC;
+ SDValue LHS = DAG.getFreeze(Match.getOperand(0));
+ SDValue RHS = DAG.getFreeze(Match.getOperand(1));
+ APInt Mask = APInt::getAllOnes(LHS.getScalarValueSizeInBits());
+ if (SDValue V = LowerVectorAllEqual(DL, LHS, RHS, CC, Mask, Subtarget,
+ DAG, X86CC))
+ return DAG.getNode(ISD::TRUNCATE, DL, ExtractVT,
+ getSETCC(X86CC, V, DL, DAG));
+ }
+ }
if (TLI.isTypeLegal(MatchVT)) {
// If this is a legal AVX512 predicate type then we can just bitcast.
- EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
+ EVT MovmskVT = EVT::getIntegerVT(Ctx, NumElts);
Movmsk = DAG.getBitcast(MovmskVT, Match);
} else {
- // For all_of(setcc(x,y,eq)) - use PMOVMSKB(PCMPEQB()).
- if (BinOp == ISD::AND && Match.getOpcode() == ISD::SETCC &&
- cast<CondCodeSDNode>(Match.getOperand(2))->get() ==
- ISD::CondCode::SETEQ) {
- EVT VecSVT = Match.getOperand(0).getValueType().getScalarType();
- if (VecSVT != MVT::i8 && (VecSVT.getSizeInBits() % 8) == 0) {
- NumElts *= VecSVT.getSizeInBits() / 8;
- EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, NumElts);
- MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
- Match = DAG.getSetCC(
- DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)),
- DAG.getBitcast(CmpVT, Match.getOperand(1)), ISD::CondCode::SETEQ);
- }
- }
-
// Use combineBitcastvxi1 to create the MOVMSK.
while (NumElts > MaxElts) {
SDValue Lo, Hi;
@@ -44277,7 +45907,7 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi);
NumElts /= 2;
}
- EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
+ EVT MovmskVT = EVT::getIntegerVT(Ctx, NumElts);
Movmsk = combineBitcastvxi1(DAG, MovmskVT, Match, DL, Subtarget);
}
if (!Movmsk)
@@ -44345,8 +45975,7 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
// The setcc produces an i8 of 0/1, so extend that to the result width and
// negate to get the final 0/-1 mask value.
- EVT SetccVT =
- TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
+ EVT SetccVT = TLI.getSetCCResultType(DAG.getDataLayout(), Ctx, CmpVT);
SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode);
SDValue Zext = DAG.getZExtOrTrunc(Setcc, DL, ExtractVT);
SDValue Zero = DAG.getConstant(0, DL, ExtractVT);
@@ -45058,24 +46687,19 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
}
// Detect mmx extraction of all bits as a i64. It works better as a bitcast.
- if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
- VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) {
- SDValue MMXSrc = InputVector.getOperand(0);
-
- // The bitcast source is a direct mmx result.
- if (MMXSrc.getValueType() == MVT::x86mmx)
- return DAG.getBitcast(VT, InputVector);
- }
+ if (VT == MVT::i64 && SrcVT == MVT::v1i64 &&
+ InputVector.getOpcode() == ISD::BITCAST &&
+ InputVector.getOperand(0).getValueType() == MVT::x86mmx &&
+ isNullConstant(EltIdx) && InputVector.hasOneUse())
+ return DAG.getBitcast(VT, InputVector);
// Detect mmx to i32 conversion through a v2i32 elt extract.
- if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() &&
- VT == MVT::i32 && SrcVT == MVT::v2i32 && isNullConstant(EltIdx)) {
- SDValue MMXSrc = InputVector.getOperand(0);
-
- // The bitcast source is a direct mmx result.
- if (MMXSrc.getValueType() == MVT::x86mmx)
- return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc);
- }
+ if (VT == MVT::i32 && SrcVT == MVT::v2i32 &&
+ InputVector.getOpcode() == ISD::BITCAST &&
+ InputVector.getOperand(0).getValueType() == MVT::x86mmx &&
+ isNullConstant(EltIdx) && InputVector.hasOneUse())
+ return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32,
+ InputVector.getOperand(0));
// Check whether this extract is the root of a sum of absolute differences
// pattern. This has to be done here because we really want it to happen
@@ -45401,10 +47025,9 @@ static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG,
SDValue Cond = N->getOperand(0);
SDValue TVal = N->getOperand(1);
SDValue FVal = N->getOperand(2);
- SmallVector<SDValue, 4> CatOpsT, CatOpsF;
if (!TVal.hasOneUse() || !FVal.hasOneUse() ||
- !collectConcatOps(TVal.getNode(), CatOpsT, DAG) ||
- !collectConcatOps(FVal.getNode(), CatOpsF, DAG))
+ !isFreeToSplitVector(TVal.getNode(), DAG) ||
+ !isFreeToSplitVector(FVal.getNode(), DAG))
return SDValue();
auto makeBlend = [Opcode](SelectionDAG &DAG, const SDLoc &DL,
@@ -45645,6 +47268,37 @@ static SDValue combineLogicBlendIntoConditionalNegate(
return DAG.getBitcast(VT, Res);
}
+static SDValue commuteSelect(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ if (!Subtarget.hasAVX512())
+ return SDValue();
+ if (N->getOpcode() != ISD::VSELECT)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue LHS = N->getOperand(1);
+ SDValue RHS = N->getOperand(2);
+
+ if (canCombineAsMaskOperation(LHS, Subtarget))
+ return SDValue();
+
+ if (!canCombineAsMaskOperation(RHS, Subtarget))
+ return SDValue();
+
+ if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
+ return SDValue();
+
+ // Commute LHS and RHS to create opportunity to select mask instruction.
+ // (vselect M, L, R) -> (vselect ~M, R, L)
+ ISD::CondCode NewCC =
+ ISD::getSetCCInverse(cast<CondCodeSDNode>(Cond.getOperand(2))->get(),
+ Cond.getOperand(0).getValueType());
+ Cond = DAG.getSetCC(SDLoc(Cond), Cond.getValueType(), Cond.getOperand(0),
+ Cond.getOperand(1), NewCC);
+ return DAG.getSelect(DL, LHS.getValueType(), Cond, RHS, LHS);
+}
+
/// Do target-specific dag combines on SELECT and VSELECT nodes.
static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
@@ -45659,6 +47313,13 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (SDValue V = DAG.simplifySelect(Cond, LHS, RHS))
return V;
+ // When avx512 is available the lhs operand of select instruction can be
+ // folded with mask instruction, while the rhs operand can't. Commute the
+ // lhs and rhs of the select instruction to create the opportunity of
+ // folding.
+ if (SDValue V = commuteSelect(N, DAG, Subtarget))
+ return V;
+
EVT VT = LHS.getValueType();
EVT CondVT = Cond.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -46028,7 +47689,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Attempt to convert a (vXi1 bitcast(iX Cond)) selection mask before it might
// get split by legalization.
if (N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::BITCAST &&
- CondVT.getVectorElementType() == MVT::i1 && Cond.hasOneUse() &&
+ CondVT.getVectorElementType() == MVT::i1 &&
TLI.isTypeLegal(VT.getScalarType())) {
EVT ExtCondVT = VT.changeVectorElementTypeToInteger();
if (SDValue ExtCond = combineToExtendBoolVectorInReg(
@@ -46057,28 +47718,14 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(N->getOpcode(), DL, VT,
DAG.getBitcast(CondVT, CondNot), RHS, LHS);
- if (Cond.getOpcode() == X86ISD::PCMPGT && Cond.hasOneUse()) {
- // pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the
- // signbit.
- if (ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode())) {
- Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT,
- DAG.getConstant(0, DL, CondVT), Cond.getOperand(0));
- return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS);
- }
-
- // smin(LHS, RHS) : select(pcmpgt(RHS, LHS), LHS, RHS)
- // -> select(pcmpgt(LHS, RHS), RHS, LHS)
- // iff the commuted pcmpgt() already exists.
- // TODO: Could DAGCombiner::combine cse search for SETCC nodes, like it
- // does for commutative binops?
- if (Cond.getOperand(0) == RHS && Cond.getOperand(1) == LHS) {
- if (SDNode *FlipCond =
- DAG.getNodeIfExists(X86ISD::PCMPGT, DAG.getVTList(CondVT),
- {Cond.getOperand(1), Cond.getOperand(0)})) {
- return DAG.getNode(N->getOpcode(), DL, VT, SDValue(FlipCond, 0), RHS,
- LHS);
- }
- }
+ // pcmpgt(X, -1) -> pcmpgt(0, X) to help select/blendv just use the
+ // signbit.
+ if (Cond.getOpcode() == X86ISD::PCMPGT &&
+ ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode()) &&
+ Cond.hasOneUse()) {
+ Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT,
+ DAG.getConstant(0, DL, CondVT), Cond.getOperand(0));
+ return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS);
}
}
@@ -46091,27 +47738,27 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
VT.getVectorElementType() == MVT::i1 &&
(DCI.isBeforeLegalize() || (VT != MVT::v64i1 || Subtarget.is64Bit()))) {
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements());
- bool LHSIsConst = ISD::isBuildVectorOfConstantSDNodes(LHS.getNode());
- bool RHSIsConst = ISD::isBuildVectorOfConstantSDNodes(RHS.getNode());
-
- if ((LHSIsConst ||
- (LHS.getOpcode() == ISD::BITCAST &&
- LHS.getOperand(0).getValueType() == IntVT)) &&
- (RHSIsConst ||
- (RHS.getOpcode() == ISD::BITCAST &&
- RHS.getOperand(0).getValueType() == IntVT))) {
- if (LHSIsConst)
- LHS = combinevXi1ConstantToInteger(LHS, DAG);
- else
- LHS = LHS.getOperand(0);
+ if (DCI.isBeforeLegalize() || TLI.isTypeLegal(IntVT)) {
+ bool LHSIsConst = ISD::isBuildVectorOfConstantSDNodes(LHS.getNode());
+ bool RHSIsConst = ISD::isBuildVectorOfConstantSDNodes(RHS.getNode());
+
+ if ((LHSIsConst || (LHS.getOpcode() == ISD::BITCAST &&
+ LHS.getOperand(0).getValueType() == IntVT)) &&
+ (RHSIsConst || (RHS.getOpcode() == ISD::BITCAST &&
+ RHS.getOperand(0).getValueType() == IntVT))) {
+ if (LHSIsConst)
+ LHS = combinevXi1ConstantToInteger(LHS, DAG);
+ else
+ LHS = LHS.getOperand(0);
- if (RHSIsConst)
- RHS = combinevXi1ConstantToInteger(RHS, DAG);
- else
- RHS = RHS.getOperand(0);
+ if (RHSIsConst)
+ RHS = combinevXi1ConstantToInteger(RHS, DAG);
+ else
+ RHS = RHS.getOperand(0);
- SDValue Select = DAG.getSelect(DL, IntVT, Cond, LHS, RHS);
- return DAG.getBitcast(VT, Select);
+ SDValue Select = DAG.getSelect(DL, IntVT, Cond, LHS, RHS);
+ return DAG.getBitcast(VT, Select);
+ }
}
}
@@ -46535,10 +48182,10 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
// TESTZ: ZF = (Op0 & Op1) == 0
// TESTC: CF = (~Op0 & Op1) == 0
// TESTNZC: ZF == 0 && CF == 0
- EVT VT = EFLAGS.getValueType();
+ MVT VT = EFLAGS.getSimpleValueType();
SDValue Op0 = EFLAGS.getOperand(0);
SDValue Op1 = EFLAGS.getOperand(1);
- EVT OpVT = Op0.getValueType();
+ MVT OpVT = Op0.getSimpleValueType();
// TEST*(~X,Y) == TEST*(X,Y)
if (SDValue NotOp0 = IsNOT(Op0, DAG)) {
@@ -46577,6 +48224,19 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
}
}
+ if (CC == X86::COND_B || CC == X86::COND_AE) {
+ // TESTC(X,~X) == TESTC(X,-1)
+ if (SDValue NotOp1 = IsNOT(Op1, DAG)) {
+ if (peekThroughBitcasts(NotOp1) == peekThroughBitcasts(Op0)) {
+ SDLoc DL(EFLAGS);
+ return DAG.getNode(
+ EFLAGS.getOpcode(), DL, VT, DAG.getBitcast(OpVT, NotOp1),
+ DAG.getBitcast(OpVT,
+ DAG.getAllOnesConstant(DL, NotOp1.getValueType())));
+ }
+ }
+ }
+
if (CC == X86::COND_E || CC == X86::COND_NE) {
// TESTZ(X,~Y) == TESTC(Y,X)
if (SDValue NotOp1 = IsNOT(Op1, DAG)) {
@@ -46588,8 +48248,6 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
if (Op0 == Op1) {
SDValue BC = peekThroughBitcasts(Op0);
EVT BCVT = BC.getValueType();
- assert(BCVT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(BCVT) &&
- "Unexpected vector type");
// TESTZ(AND(X,Y),AND(X,Y)) == TESTZ(X,Y)
if (BC.getOpcode() == ISD::AND || BC.getOpcode() == X86ISD::FAND) {
@@ -46606,32 +48264,40 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
DAG.getBitcast(OpVT, BC.getOperand(1)));
}
- // If every element is an all-sign value, see if we can use MOVMSK to
- // more efficiently extract the sign bits and compare that.
+ // If every element is an all-sign value, see if we can use TESTP/MOVMSK
+ // to more efficiently extract the sign bits and compare that.
// TODO: Handle TESTC with comparison inversion.
// TODO: Can we remove SimplifyMultipleUseDemandedBits and rely on
- // MOVMSK combines to make sure its never worse than PTEST?
- unsigned EltBits = BCVT.getScalarSizeInBits();
- if (DAG.ComputeNumSignBits(BC) == EltBits) {
- assert(VT == MVT::i32 && "Expected i32 EFLAGS comparison result");
- APInt SignMask = APInt::getSignMask(EltBits);
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (SDValue Res =
- TLI.SimplifyMultipleUseDemandedBits(BC, SignMask, DAG)) {
- // For vXi16 cases we need to use pmovmksb and extract every other
- // sign bit.
- SDLoc DL(EFLAGS);
- if (EltBits == 16) {
- MVT MovmskVT = BCVT.is128BitVector() ? MVT::v16i8 : MVT::v32i8;
- Res = DAG.getBitcast(MovmskVT, Res);
- Res = getPMOVMSKB(DL, Res, DAG, Subtarget);
- Res = DAG.getNode(ISD::AND, DL, MVT::i32, Res,
- DAG.getConstant(0xAAAAAAAA, DL, MVT::i32));
- } else {
- Res = getPMOVMSKB(DL, Res, DAG, Subtarget);
+ // TESTP/MOVMSK combines to make sure its never worse than PTEST?
+ if (BCVT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(BCVT)) {
+ unsigned EltBits = BCVT.getScalarSizeInBits();
+ if (DAG.ComputeNumSignBits(BC) == EltBits) {
+ assert(VT == MVT::i32 && "Expected i32 EFLAGS comparison result");
+ APInt SignMask = APInt::getSignMask(EltBits);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (SDValue Res =
+ TLI.SimplifyMultipleUseDemandedBits(BC, SignMask, DAG)) {
+ // For vXi16 cases we need to use pmovmksb and extract every other
+ // sign bit.
+ SDLoc DL(EFLAGS);
+ if ((EltBits == 32 || EltBits == 64) && Subtarget.hasAVX()) {
+ MVT FloatSVT = MVT::getFloatingPointVT(EltBits);
+ MVT FloatVT =
+ MVT::getVectorVT(FloatSVT, OpVT.getSizeInBits() / EltBits);
+ Res = DAG.getBitcast(FloatVT, Res);
+ return DAG.getNode(X86ISD::TESTP, SDLoc(EFLAGS), VT, Res, Res);
+ } else if (EltBits == 16) {
+ MVT MovmskVT = BCVT.is128BitVector() ? MVT::v16i8 : MVT::v32i8;
+ Res = DAG.getBitcast(MovmskVT, Res);
+ Res = getPMOVMSKB(DL, Res, DAG, Subtarget);
+ Res = DAG.getNode(ISD::AND, DL, MVT::i32, Res,
+ DAG.getConstant(0xAAAAAAAA, DL, MVT::i32));
+ } else {
+ Res = getPMOVMSKB(DL, Res, DAG, Subtarget);
+ }
+ return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Res,
+ DAG.getConstant(0, DL, MVT::i32));
}
- return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Res,
- DAG.getConstant(0, DL, MVT::i32));
}
}
}
@@ -46654,10 +48320,12 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
peekThroughBitcasts(Src0.getOperand(1)), true);
Src1 = getSplitVectorSrc(peekThroughBitcasts(Src1.getOperand(0)),
peekThroughBitcasts(Src1.getOperand(1)), true);
- if (Src0 && Src1)
+ if (Src0 && Src1) {
+ MVT OpVT2 = OpVT.getDoubleNumVectorElementsVT();
return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
- DAG.getBitcast(MVT::v4i64, Src0),
- DAG.getBitcast(MVT::v4i64, Src1));
+ DAG.getBitcast(OpVT2, Src0),
+ DAG.getBitcast(OpVT2, Src1));
+ }
}
}
}
@@ -46761,15 +48429,15 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
// MOVMSK(PCMPEQ(X,0)) == -1 -> PTESTZ(X,X).
// MOVMSK(PCMPEQ(X,0)) != -1 -> !PTESTZ(X,X).
- // MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(SUB(X,Y),SUB(X,Y)).
- // MOVMSK(PCMPEQ(X,Y)) != -1 -> !PTESTZ(SUB(X,Y),SUB(X,Y)).
+ // MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(XOR(X,Y),XOR(X,Y)).
+ // MOVMSK(PCMPEQ(X,Y)) != -1 -> !PTESTZ(XOR(X,Y),XOR(X,Y)).
if (IsAllOf && Subtarget.hasSSE41() && IsOneUse) {
MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
SDValue BC = peekThroughBitcasts(Vec);
// Ensure MOVMSK was testing every signbit of BC.
if (BC.getValueType().getVectorNumElements() <= NumElts) {
if (BC.getOpcode() == X86ISD::PCMPEQ) {
- SDValue V = DAG.getNode(ISD::SUB, SDLoc(BC), BC.getValueType(),
+ SDValue V = DAG.getNode(ISD::XOR, SDLoc(BC), BC.getValueType(),
BC.getOperand(0), BC.getOperand(1));
V = DAG.getBitcast(TestVT, V);
return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
@@ -46780,9 +48448,9 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
BC.getOperand(1).getOpcode() == X86ISD::PCMPEQ) {
SDValue LHS = BC.getOperand(0);
SDValue RHS = BC.getOperand(1);
- LHS = DAG.getNode(ISD::SUB, SDLoc(LHS), LHS.getValueType(),
+ LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), LHS.getValueType(),
LHS.getOperand(0), LHS.getOperand(1));
- RHS = DAG.getNode(ISD::SUB, SDLoc(RHS), RHS.getValueType(),
+ RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), RHS.getValueType(),
RHS.getOperand(0), RHS.getOperand(1));
LHS = DAG.getBitcast(TestVT, LHS);
RHS = DAG.getBitcast(TestVT, RHS);
@@ -46824,7 +48492,7 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
SDValue Result = peekThroughBitcasts(Src);
if (IsAllOf && Result.getOpcode() == X86ISD::PCMPEQ &&
Result.getValueType().getVectorNumElements() <= NumElts) {
- SDValue V = DAG.getNode(ISD::SUB, DL, Result.getValueType(),
+ SDValue V = DAG.getNode(ISD::XOR, DL, Result.getValueType(),
Result.getOperand(0), Result.getOperand(1));
V = DAG.getBitcast(MVT::v4i64, V);
return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
@@ -46869,6 +48537,26 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
}
}
+ // MOVMSKPS(V) !=/== 0 -> TESTPS(V,V)
+ // MOVMSKPD(V) !=/== 0 -> TESTPD(V,V)
+ // MOVMSKPS(V) !=/== -1 -> TESTPS(V,V)
+ // MOVMSKPD(V) !=/== -1 -> TESTPD(V,V)
+ // iff every element is referenced.
+ if (NumElts <= CmpBits && Subtarget.hasAVX() &&
+ !Subtarget.preferMovmskOverVTest() && IsOneUse &&
+ (NumEltBits == 32 || NumEltBits == 64)) {
+ SDLoc DL(EFLAGS);
+ MVT FloatSVT = MVT::getFloatingPointVT(NumEltBits);
+ MVT FloatVT = MVT::getVectorVT(FloatSVT, NumElts);
+ MVT IntVT = FloatVT.changeVectorElementTypeToInteger();
+ SDValue LHS = Vec;
+ SDValue RHS = IsAnyOf ? Vec : DAG.getAllOnesConstant(DL, IntVT);
+ CC = IsAnyOf ? CC : (CC == X86::COND_E ? X86::COND_B : X86::COND_AE);
+ return DAG.getNode(X86ISD::TESTP, DL, MVT::i32,
+ DAG.getBitcast(FloatVT, LHS),
+ DAG.getBitcast(FloatVT, RHS));
+ }
+
return SDValue();
}
@@ -47044,6 +48732,32 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
}
}
+ // Transform:
+ //
+ // (cmov 1 T (uge T 2))
+ //
+ // to:
+ //
+ // (adc T 0 (sub T 1))
+ if (CC == X86::COND_AE && isOneConstant(FalseOp) &&
+ Cond.getOpcode() == X86ISD::SUB && Cond->hasOneUse()) {
+ SDValue Cond0 = Cond.getOperand(0);
+ if (Cond0.getOpcode() == ISD::TRUNCATE)
+ Cond0 = Cond0.getOperand(0);
+ auto *Sub1C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (Cond0 == TrueOp && Sub1C && Sub1C->getZExtValue() == 2) {
+ EVT CondVT = Cond->getValueType(0);
+ EVT OuterVT = N->getValueType(0);
+ // Subtract 1 and generate a carry.
+ SDValue NewSub =
+ DAG.getNode(X86ISD::SUB, DL, Cond->getVTList(), Cond.getOperand(0),
+ DAG.getConstant(1, DL, CondVT));
+ SDValue EFLAGS(NewSub.getNode(), 1);
+ return DAG.getNode(X86ISD::ADC, DL, DAG.getVTList(OuterVT, MVT::i32),
+ TrueOp, DAG.getConstant(0, DL, OuterVT), EFLAGS);
+ }
+ }
+
// Fold and/or of setcc's to double CMOV:
// (CMOV F, T, ((cc1 | cc2) != 0)) -> (CMOV (CMOV F, T, cc1), T, cc2)
// (CMOV F, T, ((cc1 & cc2) != 0)) -> (CMOV (CMOV T, F, !cc1), F, !cc2)
@@ -47325,7 +49039,7 @@ static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG,
// count how many zeros are up to the first bit.
// TODO: We can do this even without LEA at a cost of two shifts and an add.
if (isPowerOf2_64(MulAmt & (MulAmt - 1))) {
- unsigned ScaleShift = countTrailingZeros(MulAmt);
+ unsigned ScaleShift = llvm::countr_zero(MulAmt);
if (ScaleShift >= 1 && ScaleShift < 4) {
unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1)));
SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
@@ -47522,12 +49236,25 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
- if (VT != MVT::i64 && VT != MVT::i32)
+ if (VT != MVT::i64 && VT != MVT::i32 &&
+ (!VT.isVector() || !VT.isSimple() || !VT.isInteger()))
return SDValue();
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (!C)
- return SDValue();
+ ConstantSDNode *CNode = isConstOrConstSplat(
+ N->getOperand(1), /*AllowUndefs*/ true, /*AllowTrunc*/ false);
+ const APInt *C = nullptr;
+ if (!CNode) {
+ if (VT.isVector())
+ if (auto *RawC = getTargetConstantFromNode(N->getOperand(1)))
+ if (auto *SplatC = RawC->getSplatValue())
+ C = &(SplatC->getUniqueInteger());
+
+ if (!C || C->getBitWidth() != VT.getScalarSizeInBits())
+ return SDValue();
+ } else {
+ C = &(CNode->getAPIntValue());
+ }
+
if (isPowerOf2_64(C->getZExtValue()))
return SDValue();
@@ -47536,68 +49263,69 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt;
SDLoc DL(N);
- if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) {
- SDValue NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
- DAG.getConstant(AbsMulAmt, DL, VT));
- if (SignMulAmt < 0)
- NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- NewMul);
-
- return NewMul;
- }
-
- uint64_t MulAmt1 = 0;
- uint64_t MulAmt2 = 0;
- if ((AbsMulAmt % 9) == 0) {
- MulAmt1 = 9;
- MulAmt2 = AbsMulAmt / 9;
- } else if ((AbsMulAmt % 5) == 0) {
- MulAmt1 = 5;
- MulAmt2 = AbsMulAmt / 5;
- } else if ((AbsMulAmt % 3) == 0) {
- MulAmt1 = 3;
- MulAmt2 = AbsMulAmt / 3;
- }
-
- SDValue NewMul;
- // For negative multiply amounts, only allow MulAmt2 to be a power of 2.
- if (MulAmt2 &&
- (isPowerOf2_64(MulAmt2) ||
- (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) {
-
- if (isPowerOf2_64(MulAmt2) &&
- !(SignMulAmt >= 0 && N->hasOneUse() &&
- N->use_begin()->getOpcode() == ISD::ADD))
- // If second multiplifer is pow2, issue it first. We want the multiply by
- // 3, 5, or 9 to be folded into the addressing mode unless the lone use
- // is an add. Only do this for positive multiply amounts since the
- // negate would prevent it from being used as an address mode anyway.
- std::swap(MulAmt1, MulAmt2);
-
- if (isPowerOf2_64(MulAmt1))
- NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
- DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8));
- else
+ SDValue NewMul = SDValue();
+ if (VT == MVT::i64 || VT == MVT::i32) {
+ if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) {
NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
- DAG.getConstant(MulAmt1, DL, VT));
-
- if (isPowerOf2_64(MulAmt2))
- NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
- DAG.getConstant(Log2_64(MulAmt2), DL, MVT::i8));
- else
- NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
- DAG.getConstant(MulAmt2, DL, VT));
+ DAG.getConstant(AbsMulAmt, DL, VT));
+ if (SignMulAmt < 0)
+ NewMul =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
+
+ return NewMul;
+ }
+
+ uint64_t MulAmt1 = 0;
+ uint64_t MulAmt2 = 0;
+ if ((AbsMulAmt % 9) == 0) {
+ MulAmt1 = 9;
+ MulAmt2 = AbsMulAmt / 9;
+ } else if ((AbsMulAmt % 5) == 0) {
+ MulAmt1 = 5;
+ MulAmt2 = AbsMulAmt / 5;
+ } else if ((AbsMulAmt % 3) == 0) {
+ MulAmt1 = 3;
+ MulAmt2 = AbsMulAmt / 3;
+ }
+
+ // For negative multiply amounts, only allow MulAmt2 to be a power of 2.
+ if (MulAmt2 &&
+ (isPowerOf2_64(MulAmt2) ||
+ (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) {
+
+ if (isPowerOf2_64(MulAmt2) && !(SignMulAmt >= 0 && N->hasOneUse() &&
+ N->use_begin()->getOpcode() == ISD::ADD))
+ // If second multiplifer is pow2, issue it first. We want the multiply
+ // by 3, 5, or 9 to be folded into the addressing mode unless the lone
+ // use is an add. Only do this for positive multiply amounts since the
+ // negate would prevent it from being used as an address mode anyway.
+ std::swap(MulAmt1, MulAmt2);
+
+ if (isPowerOf2_64(MulAmt1))
+ NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8));
+ else
+ NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0),
+ DAG.getConstant(MulAmt1, DL, VT));
- // Negate the result.
- if (SignMulAmt < 0)
- NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- NewMul);
- } else if (!Subtarget.slowLEA())
- NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);
+ if (isPowerOf2_64(MulAmt2))
+ NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul,
+ DAG.getConstant(Log2_64(MulAmt2), DL, MVT::i8));
+ else
+ NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul,
+ DAG.getConstant(MulAmt2, DL, VT));
+ // Negate the result.
+ if (SignMulAmt < 0)
+ NewMul =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
+ } else if (!Subtarget.slowLEA())
+ NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL);
+ }
if (!NewMul) {
+ EVT ShiftVT = VT.isVector() ? VT : MVT::i8;
assert(C->getZExtValue() != 0 &&
- C->getZExtValue() != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) &&
+ C->getZExtValue() != maxUIntN(VT.getScalarSizeInBits()) &&
"Both cases that could cause potential overflows should have "
"already been handled.");
if (isPowerOf2_64(AbsMulAmt - 1)) {
@@ -47605,38 +49333,61 @@ static SDValue combineMul(SDNode *N, SelectionDAG &DAG,
NewMul = DAG.getNode(
ISD::ADD, DL, VT, N->getOperand(0),
DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
- DAG.getConstant(Log2_64(AbsMulAmt - 1), DL,
- MVT::i8)));
+ DAG.getConstant(Log2_64(AbsMulAmt - 1), DL, ShiftVT)));
// To negate, subtract the number from zero
if (SignMulAmt < 0)
- NewMul = DAG.getNode(ISD::SUB, DL, VT,
- DAG.getConstant(0, DL, VT), NewMul);
+ NewMul =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), NewMul);
} else if (isPowerOf2_64(AbsMulAmt + 1)) {
// (mul x, 2^N - 1) => (sub (shl x, N), x)
- NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
- DAG.getConstant(Log2_64(AbsMulAmt + 1),
- DL, MVT::i8));
+ NewMul =
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(AbsMulAmt + 1), DL, ShiftVT));
// To negate, reverse the operands of the subtract.
if (SignMulAmt < 0)
NewMul = DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), NewMul);
else
NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0));
- } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt - 2)) {
+ } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt - 2) &&
+ (!VT.isVector() || Subtarget.fastImmVectorShift())) {
// (mul x, 2^N + 2) => (add (shl x, N), (add x, x))
- NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
- DAG.getConstant(Log2_64(AbsMulAmt - 2),
- DL, MVT::i8));
+ NewMul =
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(AbsMulAmt - 2), DL, ShiftVT));
NewMul = DAG.getNode(
ISD::ADD, DL, VT, NewMul,
DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), N->getOperand(0)));
- } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt + 2)) {
+ } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt + 2) &&
+ (!VT.isVector() || Subtarget.fastImmVectorShift())) {
// (mul x, 2^N - 2) => (sub (shl x, N), (add x, x))
- NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
- DAG.getConstant(Log2_64(AbsMulAmt + 2),
- DL, MVT::i8));
+ NewMul =
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(AbsMulAmt + 2), DL, ShiftVT));
NewMul = DAG.getNode(
ISD::SUB, DL, VT, NewMul,
DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), N->getOperand(0)));
+ } else if (SignMulAmt >= 0 && VT.isVector() &&
+ Subtarget.fastImmVectorShift()) {
+ uint64_t AbsMulAmtLowBit = AbsMulAmt & (-AbsMulAmt);
+ uint64_t ShiftAmt1;
+ std::optional<unsigned> Opc;
+ if (isPowerOf2_64(AbsMulAmt - AbsMulAmtLowBit)) {
+ ShiftAmt1 = AbsMulAmt - AbsMulAmtLowBit;
+ Opc = ISD::ADD;
+ } else if (isPowerOf2_64(AbsMulAmt + AbsMulAmtLowBit)) {
+ ShiftAmt1 = AbsMulAmt + AbsMulAmtLowBit;
+ Opc = ISD::SUB;
+ }
+
+ if (Opc) {
+ SDValue Shift1 =
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(ShiftAmt1), DL, ShiftVT));
+ SDValue Shift2 =
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant(Log2_64(AbsMulAmtLowBit), DL, ShiftVT));
+ NewMul = DAG.getNode(*Opc, DL, VT, Shift1, Shift2);
+ }
}
}
@@ -47834,14 +49585,14 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG,
// If this can be matched by a zero extend, don't optimize.
if (MaskVal.isMask()) {
- unsigned TO = MaskVal.countTrailingOnes();
+ unsigned TO = MaskVal.countr_one();
if (TO >= 8 && isPowerOf2_32(TO))
return SDValue();
}
APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue());
- unsigned OldMaskSize = MaskVal.getMinSignedBits();
- unsigned NewMaskSize = NewMaskVal.getMinSignedBits();
+ unsigned OldMaskSize = MaskVal.getSignificantBits();
+ unsigned NewMaskSize = NewMaskVal.getSignificantBits();
if ((OldMaskSize > 8 && NewMaskSize <= 8) ||
(OldMaskSize > 32 && NewMaskSize <= 32)) {
// srl (and X, AndC), ShiftC --> and (srl X, ShiftC), (AndC >> ShiftC)
@@ -48074,6 +49825,23 @@ static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget))
return V;
+ // Try to fold PACKSS(NOT(X),NOT(Y)) -> NOT(PACKSS(X,Y)).
+ // Currently limit this to allsignbits cases only.
+ if (IsSigned &&
+ (N0.isUndef() || DAG.ComputeNumSignBits(N0) == SrcBitsPerElt) &&
+ (N1.isUndef() || DAG.ComputeNumSignBits(N1) == SrcBitsPerElt)) {
+ SDValue Not0 = N0.isUndef() ? N0 : IsNOT(N0, DAG);
+ SDValue Not1 = N1.isUndef() ? N1 : IsNOT(N1, DAG);
+ if (Not0 && Not1) {
+ SDLoc DL(N);
+ MVT SrcVT = N0.getSimpleValueType();
+ SDValue Pack =
+ DAG.getNode(X86ISD::PACKSS, DL, VT, DAG.getBitcast(SrcVT, Not0),
+ DAG.getBitcast(SrcVT, Not1));
+ return DAG.getNOT(DL, Pack, VT);
+ }
+ }
+
// Try to combine a PACKUSWB/PACKSSWB implemented truncate with a regular
// truncate to create a larger truncate.
if (Subtarget.hasAVX512() &&
@@ -48220,11 +49988,11 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
bool LogicalShift = X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode;
EVT VT = N->getValueType(0);
SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 &&
"Unexpected value type");
- assert(N->getOperand(1).getValueType() == MVT::i8 &&
- "Unexpected shift amount type");
+ assert(N1.getValueType() == MVT::i8 && "Unexpected shift amount type");
// (shift undef, X) -> 0
if (N0.isUndef())
@@ -48284,11 +50052,34 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
return Res;
}
- // Constant Folding.
- APInt UndefElts;
- SmallVector<APInt, 32> EltBits;
- if (N->isOnlyUserOf(N0.getNode()) &&
- getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) {
+ // Attempt to detect an expanded vXi64 SIGN_EXTEND_INREG vXi1 pattern, and
+ // convert to a splatted v2Xi32 SIGN_EXTEND_INREG pattern:
+ // psrad(pshufd(psllq(X,63),1,1,3,3),31) ->
+ // pshufd(psrad(pslld(X,31),31),0,0,2,2).
+ if (Opcode == X86ISD::VSRAI && NumBitsPerElt == 32 && ShiftVal == 31 &&
+ N0.getOpcode() == X86ISD::PSHUFD &&
+ N0.getConstantOperandVal(1) == getV4X86ShuffleImm({1, 1, 3, 3}) &&
+ N0->hasOneUse()) {
+ SDValue BC = peekThroughOneUseBitcasts(N0.getOperand(0));
+ if (BC.getOpcode() == X86ISD::VSHLI &&
+ BC.getScalarValueSizeInBits() == 64 &&
+ BC.getConstantOperandVal(1) == 63) {
+ SDLoc DL(N);
+ SDValue Src = BC.getOperand(0);
+ Src = DAG.getBitcast(VT, Src);
+ Src = DAG.getNode(X86ISD::PSHUFD, DL, VT, Src,
+ getV4X86ShuffleImm8ForMask({0, 0, 2, 2}, DL, DAG));
+ Src = DAG.getNode(X86ISD::VSHLI, DL, VT, Src, N1);
+ Src = DAG.getNode(X86ISD::VSRAI, DL, VT, Src, N1);
+ return Src;
+ }
+ }
+
+ auto TryConstantFold = [&](SDValue V) {
+ APInt UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ if (!getTargetConstantBitsFromNode(V, NumBitsPerElt, UndefElts, EltBits))
+ return SDValue();
assert(EltBits.size() == VT.getVectorNumElements() &&
"Unexpected shift value type");
// Undef elements need to fold to 0. It's possible SimplifyDemandedBits
@@ -48308,6 +50099,26 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
// Reset undef elements since they were zeroed above.
UndefElts = 0;
return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
+ };
+
+ // Constant Folding.
+ if (N->isOnlyUserOf(N0.getNode())) {
+ if (SDValue C = TryConstantFold(N0))
+ return C;
+
+ // Fold (shift (logic X, C2), C1) -> (logic (shift X, C1), (shift C2, C1))
+ // Don't break NOT patterns.
+ SDValue BC = peekThroughOneUseBitcasts(N0);
+ if (ISD::isBitwiseLogicOp(BC.getOpcode()) &&
+ BC->isOnlyUserOf(BC.getOperand(1).getNode()) &&
+ !ISD::isBuildVectorAllOnes(BC.getOperand(1).getNode())) {
+ if (SDValue RHS = TryConstantFold(BC.getOperand(1))) {
+ SDLoc DL(N);
+ SDValue LHS = DAG.getNode(Opcode, DL, VT,
+ DAG.getBitcast(VT, BC.getOperand(0)), N1);
+ return DAG.getNode(BC.getOpcode(), DL, VT, LHS, RHS);
+ }
+ }
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -48328,10 +50139,13 @@ static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG,
Opcode == ISD::INSERT_VECTOR_ELT) &&
"Unexpected vector insertion");
+ SDValue Vec = N->getOperand(0);
+ SDValue Scl = N->getOperand(1);
+ SDValue Idx = N->getOperand(2);
+
// Fold insert_vector_elt(undef, elt, 0) --> scalar_to_vector(elt).
- if (Opcode == ISD::INSERT_VECTOR_ELT && N->getOperand(0).isUndef() &&
- isNullConstant(N->getOperand(2)))
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, N->getOperand(1));
+ if (Opcode == ISD::INSERT_VECTOR_ELT && Vec.isUndef() && isNullConstant(Idx))
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Scl);
if (Opcode == X86ISD::PINSRB || Opcode == X86ISD::PINSRW) {
unsigned NumBitsPerElt = VT.getScalarSizeInBits();
@@ -48809,6 +50623,49 @@ static SDValue combineBitOpWithShift(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// Attempt to fold:
+// BITOP(PACKSS(X,Z),PACKSS(Y,W)) --> PACKSS(BITOP(X,Y),BITOP(Z,W)).
+// TODO: Handle PACKUS handling.
+static SDValue combineBitOpWithPACK(SDNode *N, SelectionDAG &DAG) {
+ unsigned Opc = N->getOpcode();
+ assert((Opc == ISD::OR || Opc == ISD::AND || Opc == ISD::XOR) &&
+ "Unexpected bit opcode");
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+
+ // Both operands must be single use.
+ if (!N0.hasOneUse() || !N1.hasOneUse())
+ return SDValue();
+
+ // Search for matching packs.
+ N0 = peekThroughOneUseBitcasts(N0);
+ N1 = peekThroughOneUseBitcasts(N1);
+
+ if (N0.getOpcode() != X86ISD::PACKSS || N1.getOpcode() != X86ISD::PACKSS)
+ return SDValue();
+
+ MVT DstVT = N0.getSimpleValueType();
+ if (DstVT != N1.getSimpleValueType())
+ return SDValue();
+
+ MVT SrcVT = N0.getOperand(0).getSimpleValueType();
+ unsigned NumSrcBits = SrcVT.getScalarSizeInBits();
+
+ // Limit to allsignbits packing.
+ if (DAG.ComputeNumSignBits(N0.getOperand(0)) != NumSrcBits ||
+ DAG.ComputeNumSignBits(N0.getOperand(1)) != NumSrcBits ||
+ DAG.ComputeNumSignBits(N1.getOperand(0)) != NumSrcBits ||
+ DAG.ComputeNumSignBits(N1.getOperand(1)) != NumSrcBits)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue LHS = DAG.getNode(Opc, DL, SrcVT, N0.getOperand(0), N1.getOperand(0));
+ SDValue RHS = DAG.getNode(Opc, DL, SrcVT, N0.getOperand(1), N1.getOperand(1));
+ return DAG.getBitcast(VT, DAG.getNode(X86ISD::PACKSS, DL, DstVT, LHS, RHS));
+}
+
/// If this is a zero/all-bits result that is bitwise-anded with a low bits
/// mask. (Mask == 1 for the x86 lowering of a SETCC + ZEXT), replace the 'and'
/// with a shift-right to eliminate loading the vector constant mask value.
@@ -48830,14 +50687,14 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
// If the bitcasts can't be eliminated, then it is unlikely that this fold
// will be profitable.
if (N->getValueType(0) == VT &&
- supportedVectorShiftWithImm(VT.getSimpleVT(), Subtarget, ISD::SRA)) {
+ supportedVectorShiftWithImm(VT, Subtarget, ISD::SRA)) {
SDValue X, Y;
- if (Op1.hasOneUse() && Op1.getOpcode() == X86ISD::PCMPGT &&
- isAllOnesOrAllOnesSplat(Op1.getOperand(1))) {
+ if (Op1.getOpcode() == X86ISD::PCMPGT &&
+ isAllOnesOrAllOnesSplat(Op1.getOperand(1)) && Op1.hasOneUse()) {
X = Op1.getOperand(0);
Y = Op0;
- } else if (Op0.hasOneUse() && Op0.getOpcode() == X86ISD::PCMPGT &&
- isAllOnesOrAllOnesSplat(Op0.getOperand(1))) {
+ } else if (Op0.getOpcode() == X86ISD::PCMPGT &&
+ isAllOnesOrAllOnesSplat(Op0.getOperand(1)) && Op0.hasOneUse()) {
X = Op0.getOperand(0);
Y = Op1;
}
@@ -48851,15 +50708,14 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
}
APInt SplatVal;
- if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) ||
- !SplatVal.isMask())
+ if (!X86::isConstantSplat(Op1, SplatVal, false) || !SplatVal.isMask())
return SDValue();
// Don't prevent creation of ANDN.
if (isBitwiseNot(Op0))
return SDValue();
- if (!supportedVectorShiftWithImm(VT.getSimpleVT(), Subtarget, ISD::SRL))
+ if (!supportedVectorShiftWithImm(VT, Subtarget, ISD::SRL))
return SDValue();
unsigned EltBitWidth = VT.getScalarSizeInBits();
@@ -48867,7 +50723,7 @@ static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG,
return SDValue();
SDLoc DL(N);
- unsigned ShiftVal = SplatVal.countTrailingOnes();
+ unsigned ShiftVal = SplatVal.countr_one();
SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8);
SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT, Op0, ShAmt);
return DAG.getBitcast(N->getValueType(0), Shift);
@@ -49076,6 +50932,70 @@ static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG,
return DAG.getZExtOrTrunc(DAG.getBitcast(IntVT, Concat), dl, VT);
}
+static SDValue getBMIMatchingOp(unsigned Opc, SelectionDAG &DAG,
+ SDValue OpMustEq, SDValue Op, unsigned Depth) {
+ // We don't want to go crazy with the recursion here. This isn't a super
+ // important optimization.
+ static constexpr unsigned kMaxDepth = 2;
+
+ // Only do this re-ordering if op has one use.
+ if (!Op.hasOneUse())
+ return SDValue();
+
+ SDLoc DL(Op);
+ // If we hit another assosiative op, recurse further.
+ if (Op.getOpcode() == Opc) {
+ // Done recursing.
+ if (Depth++ >= kMaxDepth)
+ return SDValue();
+
+ for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx)
+ if (SDValue R =
+ getBMIMatchingOp(Opc, DAG, OpMustEq, Op.getOperand(OpIdx), Depth))
+ return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(), R,
+ Op.getOperand(1 - OpIdx));
+
+ } else if (Op.getOpcode() == ISD::SUB) {
+ if (Opc == ISD::AND) {
+ // BLSI: (and x, (sub 0, x))
+ if (isNullConstant(Op.getOperand(0)) && Op.getOperand(1) == OpMustEq)
+ return DAG.getNode(Opc, DL, Op.getValueType(), OpMustEq, Op);
+ }
+ // Opc must be ISD::AND or ISD::XOR
+ // BLSR: (and x, (sub x, 1))
+ // BLSMSK: (xor x, (sub x, 1))
+ if (isOneConstant(Op.getOperand(1)) && Op.getOperand(0) == OpMustEq)
+ return DAG.getNode(Opc, DL, Op.getValueType(), OpMustEq, Op);
+
+ } else if (Op.getOpcode() == ISD::ADD) {
+ // Opc must be ISD::AND or ISD::XOR
+ // BLSR: (and x, (add x, -1))
+ // BLSMSK: (xor x, (add x, -1))
+ if (isAllOnesConstant(Op.getOperand(1)) && Op.getOperand(0) == OpMustEq)
+ return DAG.getNode(Opc, DL, Op.getValueType(), OpMustEq, Op);
+ }
+ return SDValue();
+}
+
+static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ EVT VT = N->getValueType(0);
+ // Make sure this node is a candidate for BMI instructions.
+ if (!Subtarget.hasBMI() || !VT.isScalarInteger() ||
+ (VT != MVT::i32 && VT != MVT::i64))
+ return SDValue();
+
+ assert(N->getOpcode() == ISD::AND || N->getOpcode() == ISD::XOR);
+
+ // Try and match LHS and RHS.
+ for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx)
+ if (SDValue OpMatch =
+ getBMIMatchingOp(N->getOpcode(), DAG, N->getOperand(OpIdx),
+ N->getOperand(1 - OpIdx), 0))
+ return OpMatch;
+ return SDValue();
+}
+
static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -49127,6 +51047,39 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
}
}
+ // InstCombine converts:
+ // `(-x << C0) & C1`
+ // to
+ // `(x * (Pow2_Ceil(C1) - (1 << C0))) & C1`
+ // This saves an IR instruction but on x86 the neg/shift version is preferable
+ // so undo the transform.
+
+ if (N0.getOpcode() == ISD::MUL && N0.hasOneUse()) {
+ // TODO: We don't actually need a splat for this, we just need the checks to
+ // hold for each element.
+ ConstantSDNode *N1C = isConstOrConstSplat(N1, /*AllowUndefs*/ true,
+ /*AllowTruncation*/ false);
+ ConstantSDNode *N01C =
+ isConstOrConstSplat(N0.getOperand(1), /*AllowUndefs*/ true,
+ /*AllowTruncation*/ false);
+ if (N1C && N01C) {
+ const APInt &MulC = N01C->getAPIntValue();
+ const APInt &AndC = N1C->getAPIntValue();
+ APInt MulCLowBit = MulC & (-MulC);
+ if (MulC.uge(AndC) && !MulC.isPowerOf2() &&
+ (MulCLowBit + MulC).isPowerOf2()) {
+ SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT),
+ N0.getOperand(0));
+ int32_t MulCLowBitLog = MulCLowBit.exactLogBase2();
+ assert(MulCLowBitLog != -1 &&
+ "Isolated lowbit is somehow not a power of 2!");
+ SDValue Shift = DAG.getNode(ISD::SHL, dl, VT, Neg,
+ DAG.getConstant(MulCLowBitLog, dl, VT));
+ return DAG.getNode(ISD::AND, dl, VT, Shift, N1);
+ }
+ }
+ }
+
if (SDValue V = combineScalarAndWithMaskSetcc(N, DAG, Subtarget))
return V;
@@ -49136,6 +51089,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBitOpWithShift(N, DAG))
return R;
+ if (SDValue R = combineBitOpWithPACK(N, DAG))
+ return R;
+
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
return FPLogic;
@@ -49297,6 +51253,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
}
}
+ if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
+ return R;
+
return SDValue();
}
@@ -49538,7 +51497,7 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
return SDValue();
// Try to lower nodes matching the or(or, setcc(eq, cmp 0)) pattern.
- while (ORNodes.size() > 0) {
+ while (!ORNodes.empty()) {
OR = ORNodes.pop_back_val();
LHS = OR->getOperand(0);
RHS = OR->getOperand(1);
@@ -49607,6 +51566,243 @@ static SDValue foldMaskedMerge(SDNode *Node, SelectionDAG &DAG) {
return SDValue();
}
+/// If this is an add or subtract where one operand is produced by a cmp+setcc,
+/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
+/// with CMP+{ADC, SBB}.
+/// Also try (ADD/SUB)+(AND(SRL,1)) bit extraction pattern with BT+{ADC, SBB}.
+static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
+ SDValue X, SDValue Y,
+ SelectionDAG &DAG,
+ bool ZeroSecondOpOnly = false) {
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
+ return SDValue();
+
+ // Look through a one-use zext.
+ if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse())
+ Y = Y.getOperand(0);
+
+ X86::CondCode CC;
+ SDValue EFLAGS;
+ if (Y.getOpcode() == X86ISD::SETCC && Y.hasOneUse()) {
+ CC = (X86::CondCode)Y.getConstantOperandVal(0);
+ EFLAGS = Y.getOperand(1);
+ } else if (Y.getOpcode() == ISD::AND && isOneConstant(Y.getOperand(1)) &&
+ Y.hasOneUse()) {
+ EFLAGS = LowerAndToBT(Y, ISD::SETNE, DL, DAG, CC);
+ }
+
+ if (!EFLAGS)
+ return SDValue();
+
+ // If X is -1 or 0, then we have an opportunity to avoid constants required in
+ // the general case below.
+ auto *ConstantX = dyn_cast<ConstantSDNode>(X);
+ if (ConstantX && !ZeroSecondOpOnly) {
+ if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) ||
+ (IsSub && CC == X86::COND_B && ConstantX->isZero())) {
+ // This is a complicated way to get -1 or 0 from the carry flag:
+ // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax
+ // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
+ EFLAGS);
+ }
+
+ if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnes()) ||
+ (IsSub && CC == X86::COND_A && ConstantX->isZero())) {
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ // Swap the operands of a SUB, and we have the same pattern as above.
+ // -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB
+ // 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB
+ SDValue NewSub = DAG.getNode(
+ X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
+ NewEFLAGS);
+ }
+ }
+ }
+
+ if (CC == X86::COND_B) {
+ // X + SETB Z --> adc X, 0
+ // X - SETB Z --> sbb X, 0
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(0, DL, VT), EFLAGS);
+ }
+
+ if (ZeroSecondOpOnly)
+ return SDValue();
+
+ if (CC == X86::COND_A) {
+ // Try to convert COND_A into COND_B in an attempt to facilitate
+ // materializing "setb reg".
+ //
+ // Do not flip "e > c", where "c" is a constant, because Cmp instruction
+ // cannot take an immediate as its first operand.
+ //
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ SDValue NewSub =
+ DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(0, DL, VT), NewEFLAGS);
+ }
+ }
+
+ if (CC == X86::COND_AE) {
+ // X + SETAE --> sbb X, -1
+ // X - SETAE --> adc X, -1
+ return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(-1, DL, VT), EFLAGS);
+ }
+
+ if (CC == X86::COND_BE) {
+ // X + SETBE --> sbb X, -1
+ // X - SETBE --> adc X, -1
+ // Try to convert COND_BE into COND_AE in an attempt to facilitate
+ // materializing "setae reg".
+ //
+ // Do not flip "e <= c", where "c" is a constant, because Cmp instruction
+ // cannot take an immediate as its first operand.
+ //
+ if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
+ EFLAGS.getValueType().isInteger() &&
+ !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
+ SDValue NewSub =
+ DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
+ EFLAGS.getOperand(1), EFLAGS.getOperand(0));
+ SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
+ return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL,
+ DAG.getVTList(VT, MVT::i32), X,
+ DAG.getConstant(-1, DL, VT), NewEFLAGS);
+ }
+ }
+
+ if (CC != X86::COND_E && CC != X86::COND_NE)
+ return SDValue();
+
+ if (EFLAGS.getOpcode() != X86ISD::CMP || !EFLAGS.hasOneUse() ||
+ !X86::isZeroNode(EFLAGS.getOperand(1)) ||
+ !EFLAGS.getOperand(0).getValueType().isInteger())
+ return SDValue();
+
+ SDValue Z = EFLAGS.getOperand(0);
+ EVT ZVT = Z.getValueType();
+
+ // If X is -1 or 0, then we have an opportunity to avoid constants required in
+ // the general case below.
+ if (ConstantX) {
+ // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with
+ // fake operands:
+ // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z)
+ // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z)
+ if ((IsSub && CC == X86::COND_NE && ConstantX->isZero()) ||
+ (!IsSub && CC == X86::COND_E && ConstantX->isAllOnes())) {
+ SDValue Zero = DAG.getConstant(0, DL, ZVT);
+ SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
+ SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z);
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
+ SDValue(Neg.getNode(), 1));
+ }
+
+ // cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb'
+ // with fake operands:
+ // 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1)
+ // -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1)
+ if ((IsSub && CC == X86::COND_E && ConstantX->isZero()) ||
+ (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnes())) {
+ SDValue One = DAG.getConstant(1, DL, ZVT);
+ SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
+ SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
+ return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
+ DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
+ Cmp1.getValue(1));
+ }
+ }
+
+ // (cmp Z, 1) sets the carry flag if Z is 0.
+ SDValue One = DAG.getConstant(1, DL, ZVT);
+ SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
+ SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
+
+ // Add the flags type for ADC/SBB nodes.
+ SDVTList VTs = DAG.getVTList(VT, MVT::i32);
+
+ // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1)
+ // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1)
+ if (CC == X86::COND_NE)
+ return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X,
+ DAG.getConstant(-1ULL, DL, VT), Cmp1.getValue(1));
+
+ // X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1)
+ // X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1)
+ return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X,
+ DAG.getConstant(0, DL, VT), Cmp1.getValue(1));
+}
+
+/// If this is an add or subtract where one operand is produced by a cmp+setcc,
+/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
+/// with CMP+{ADC, SBB}.
+static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
+ bool IsSub = N->getOpcode() == ISD::SUB;
+ SDValue X = N->getOperand(0);
+ SDValue Y = N->getOperand(1);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, X, Y, DAG))
+ return ADCOrSBB;
+
+ // Commute and try again (negate the result for subtracts).
+ if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, Y, X, DAG)) {
+ if (IsSub)
+ ADCOrSBB =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), ADCOrSBB);
+ return ADCOrSBB;
+ }
+
+ return SDValue();
+}
+
+static SDValue combineOrXorWithSETCC(SDNode *N, SDValue N0, SDValue N1,
+ SelectionDAG &DAG) {
+ assert((N->getOpcode() == ISD::XOR || N->getOpcode() == ISD::OR) &&
+ "Unexpected opcode");
+
+ // Delegate to combineAddOrSubToADCOrSBB if we have:
+ //
+ // (xor/or (zero_extend (setcc)) imm)
+ //
+ // where imm is odd if and only if we have xor, in which case the XOR/OR are
+ // equivalent to a SUB/ADD, respectively.
+ if (N0.getOpcode() == ISD::ZERO_EXTEND &&
+ N0.getOperand(0).getOpcode() == X86ISD::SETCC && N0.hasOneUse()) {
+ if (auto *N1C = dyn_cast<ConstantSDNode>(N1)) {
+ bool IsSub = N->getOpcode() == ISD::XOR;
+ bool N1COdd = N1C->getZExtValue() & 1;
+ if (IsSub ? N1COdd : !N1COdd) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ if (SDValue R = combineAddOrSubToADCOrSBB(IsSub, DL, VT, N1, N0, DAG))
+ return R;
+ }
+ }
+ }
+
+ return SDValue();
+}
+
static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -49653,6 +51849,9 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBitOpWithShift(N, DAG))
return R;
+ if (SDValue R = combineBitOpWithPACK(N, DAG))
+ return R;
+
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
return FPLogic;
@@ -49754,6 +51953,9 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
if (SDValue R = foldMaskedMerge(N, DAG))
return R;
+ if (SDValue R = combineOrXorWithSETCC(N, N0, N1, DAG))
+ return R;
+
return SDValue();
}
@@ -50330,7 +52532,7 @@ static int getOneTrueElt(SDValue V) {
auto *ConstNode = dyn_cast<ConstantSDNode>(Op);
if (!ConstNode)
return -1;
- if (ConstNode->getAPIntValue().countTrailingOnes() >= 1) {
+ if (ConstNode->getAPIntValue().countr_one() >= 1) {
// If we already found a one, this is too many.
if (TrueIndex >= 0)
return -1;
@@ -50945,7 +53147,7 @@ static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS,
resolveTargetShuffleInputsAndMask(SrcOps, SrcMask);
if (!UseSubVector && SrcOps.size() <= 2 &&
scaleShuffleElements(SrcMask, NumElts, ScaledMask)) {
- N0 = SrcOps.size() > 0 ? SrcOps[0] : SDValue();
+ N0 = !SrcOps.empty() ? SrcOps[0] : SDValue();
N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue();
ShuffleMask.assign(ScaledMask.begin(), ScaledMask.end());
}
@@ -51180,7 +53382,7 @@ static SDValue combineFMulcFCMulc(SDNode *N, SelectionDAG &DAG,
}
}
if (const auto *CF = dyn_cast<ConstantFP>(c))
- return CF->isNegativeZeroValue();
+ return CF->getType()->isFloatTy() && CF->isNegativeZeroValue();
return false;
};
auto combineConjugation = [&](SDValue &r) {
@@ -51391,88 +53593,10 @@ static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// Truncate using ISD::AND mask and X86ISD::PACKUS.
-/// e.g. trunc <8 x i32> X to <8 x i16> -->
-/// MaskX = X & 0xffff (clear high bits to prevent saturation)
-/// packus (extract_subv MaskX, 0), (extract_subv MaskX, 1)
-static SDValue combineVectorTruncationWithPACKUS(SDNode *N, const SDLoc &DL,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- SDValue In = N->getOperand(0);
- EVT InVT = In.getValueType();
- EVT OutVT = N->getValueType(0);
-
- APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(),
- OutVT.getScalarSizeInBits());
- In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
- return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget);
-}
-
-/// Truncate a group of v4i32 into v8i16 using X86ISD::PACKSS.
-static SDValue combineVectorTruncationWithPACKSS(SDNode *N, const SDLoc &DL,
- const X86Subtarget &Subtarget,
- SelectionDAG &DAG) {
- SDValue In = N->getOperand(0);
- EVT InVT = In.getValueType();
- EVT OutVT = N->getValueType(0);
- In = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, InVT, In,
- DAG.getValueType(OutVT));
- return truncateVectorWithPACK(X86ISD::PACKSS, OutVT, In, DL, DAG, Subtarget);
-}
-
-/// This function transforms truncation from vXi32/vXi64 to vXi8/vXi16 into
-/// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type
-/// legalization the truncation will be translated into a BUILD_VECTOR with each
-/// element that is extracted from a vector and then truncated, and it is
-/// difficult to do this optimization based on them.
-static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- EVT OutVT = N->getValueType(0);
- if (!OutVT.isVector())
- return SDValue();
-
- SDValue In = N->getOperand(0);
- if (!In.getValueType().isSimple())
- return SDValue();
-
- EVT InVT = In.getValueType();
- unsigned NumElems = OutVT.getVectorNumElements();
-
- // AVX512 provides fast truncate ops.
- if (!Subtarget.hasSSE2() || Subtarget.hasAVX512())
- return SDValue();
-
- EVT OutSVT = OutVT.getVectorElementType();
- EVT InSVT = InVT.getVectorElementType();
- if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
- (OutSVT == MVT::i8 || OutSVT == MVT::i16) && isPowerOf2_32(NumElems) &&
- NumElems >= 8))
- return SDValue();
-
- // SSSE3's pshufb results in less instructions in the cases below.
- if (Subtarget.hasSSSE3() && NumElems == 8) {
- if (InSVT == MVT::i16)
- return SDValue();
- if (InSVT == MVT::i32 &&
- (OutSVT == MVT::i8 || !Subtarget.hasSSE41() || Subtarget.hasInt256()))
- return SDValue();
- }
-
- SDLoc DL(N);
- // SSE2 provides PACKUS for only 2 x v8i16 -> v16i8 and SSE4.1 provides PACKUS
- // for 2 x v4i32 -> v8i16. For SSSE3 and below, we need to use PACKSS to
- // truncate 2 x v4i32 to v8i16.
- if (Subtarget.hasSSE41() || OutSVT == MVT::i8)
- return combineVectorTruncationWithPACKUS(N, DL, Subtarget, DAG);
- if (InSVT == MVT::i32)
- return combineVectorTruncationWithPACKSS(N, DL, Subtarget, DAG);
-
- return SDValue();
-}
-
/// This function transforms vector truncation of 'extended sign-bits' or
/// 'extended zero-bits' values.
/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS/PACKUS operations.
+/// TODO: Remove this and just use LowerTruncateVecPackWithSignBits.
static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
@@ -51512,9 +53636,7 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
InVT.is512BitVector())) {
// PACK should still be worth it for 128-bit vectors if the sources were
// originally concatenated from subvectors.
- SmallVector<SDValue> ConcatOps;
- if (VT.getSizeInBits() > 128 ||
- !collectConcatOps(In.getNode(), ConcatOps, DAG))
+ if (VT.getSizeInBits() > 128 || !isFreeToSplitVector(In.getNode(), DAG))
return SDValue();
}
@@ -51818,7 +53940,7 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget))
return V;
- return combineVectorTruncation(N, DAG, Subtarget);
+ return SDValue();
}
static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG,
@@ -52136,6 +54258,63 @@ static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) {
return getSETCC(NewCC, LHS->getOperand(1), DL, DAG);
}
+static SDValue combineXorSubCTLZ(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ assert((N->getOpcode() == ISD::XOR || N->getOpcode() == ISD::SUB) &&
+ "Invalid opcode for combing with CTLZ");
+ if (Subtarget.hasFastLZCNT())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32 &&
+ (VT != MVT::i64 || !Subtarget.is64Bit()))
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ if (N0.getOpcode() != ISD::CTLZ_ZERO_UNDEF &&
+ N1.getOpcode() != ISD::CTLZ_ZERO_UNDEF)
+ return SDValue();
+
+ SDValue OpCTLZ;
+ SDValue OpSizeTM1;
+
+ if (N1.getOpcode() == ISD::CTLZ_ZERO_UNDEF) {
+ OpCTLZ = N1;
+ OpSizeTM1 = N0;
+ } else if (N->getOpcode() == ISD::SUB) {
+ return SDValue();
+ } else {
+ OpCTLZ = N0;
+ OpSizeTM1 = N1;
+ }
+
+ if (!OpCTLZ.hasOneUse())
+ return SDValue();
+ auto *C = dyn_cast<ConstantSDNode>(OpSizeTM1);
+ if (!C)
+ return SDValue();
+
+ if (C->getZExtValue() != uint64_t(OpCTLZ.getValueSizeInBits() - 1))
+ return SDValue();
+ SDLoc DL(N);
+ EVT OpVT = VT;
+ SDValue Op = OpCTLZ.getOperand(0);
+ if (VT == MVT::i8) {
+ // Zero extend to i32 since there is not an i8 bsr.
+ OpVT = MVT::i32;
+ Op = DAG.getNode(ISD::ZERO_EXTEND, DL, OpVT, Op);
+ }
+
+ SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
+ Op = DAG.getNode(X86ISD::BSR, DL, VTs, Op);
+ if (VT == MVT::i8)
+ Op = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Op);
+
+ return Op;
+}
+
static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -52160,15 +54339,24 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineBitOpWithShift(N, DAG))
return R;
+ if (SDValue R = combineBitOpWithPACK(N, DAG))
+ return R;
+
if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
return FPLogic;
+ if (SDValue R = combineXorSubCTLZ(N, DAG, Subtarget))
+ return R;
+
if (DCI.isBeforeLegalizeOps())
return SDValue();
if (SDValue SetCC = foldXor1SetCC(N, DAG))
return SetCC;
+ if (SDValue R = combineOrXorWithSETCC(N, N0, N1, DAG))
+ return R;
+
if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
return RV;
@@ -52211,6 +54399,9 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
}
}
+ if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
+ return R;
+
return combineFneg(N, DAG, DCI, Subtarget);
}
@@ -52500,11 +54691,12 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
MVT VT = N->getSimpleValueType(0);
int NumElts = VT.getVectorNumElements();
unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ SDLoc DL(N);
// ANDNP(undef, x) -> 0
// ANDNP(x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// ANDNP(0, x) -> x
if (ISD::isBuildVectorAllZeros(N0.getNode()))
@@ -52512,24 +54704,32 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
// ANDNP(x, 0) -> 0
if (ISD::isBuildVectorAllZeros(N1.getNode()))
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
+
+ // ANDNP(x, -1) -> NOT(x) -> XOR(x, -1)
+ if (ISD::isBuildVectorAllOnes(N1.getNode()))
+ return DAG.getNOT(DL, N0, VT);
// Turn ANDNP back to AND if input is inverted.
if (SDValue Not = IsNOT(N0, DAG))
- return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), N1);
+ return DAG.getNode(ISD::AND, DL, VT, DAG.getBitcast(VT, Not), N1);
+
+ // Fold for better commutatvity:
+ // ANDNP(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)).
+ if (N1->hasOneUse())
+ if (SDValue Not = IsNOT(N1, DAG))
+ return DAG.getNOT(
+ DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT);
// Constant Folding
APInt Undefs0, Undefs1;
SmallVector<APInt> EltBits0, EltBits1;
if (getTargetConstantBitsFromNode(N0, EltSizeInBits, Undefs0, EltBits0)) {
- SDLoc DL(N);
- APInt ResultUndefs = APInt::getZero(NumElts);
-
if (getTargetConstantBitsFromNode(N1, EltSizeInBits, Undefs1, EltBits1)) {
SmallVector<APInt> ResultBits;
for (int I = 0; I != NumElts; ++I)
ResultBits.push_back(~EltBits0[I] & EltBits1[I]);
- return getConstVector(ResultBits, ResultUndefs, VT, DAG, DL);
+ return getConstVector(ResultBits, VT, DAG, DL);
}
// Constant fold NOT(N0) to allow us to use AND.
@@ -52540,7 +54740,7 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG,
if (BC0.getOpcode() != ISD::BITCAST) {
for (APInt &Elt : EltBits0)
Elt = ~Elt;
- SDValue Not = getConstVector(EltBits0, ResultUndefs, VT, DAG, DL);
+ SDValue Not = getConstVector(EltBits0, VT, DAG, DL);
return DAG.getNode(ISD::AND, DL, VT, Not, N1);
}
}
@@ -52976,6 +55176,59 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+// Inverting a constant vector is profitable if it can be eliminated and the
+// inverted vector is already present in DAG. Otherwise, it will be loaded
+// anyway.
+//
+// We determine which of the values can be completely eliminated and invert it.
+// If both are eliminable, select a vector with the first negative element.
+static SDValue getInvertedVectorForFMA(SDValue V, SelectionDAG &DAG) {
+ assert(ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()) &&
+ "ConstantFP build vector expected");
+ // Check if we can eliminate V. We assume if a value is only used in FMAs, we
+ // can eliminate it. Since this function is invoked for each FMA with this
+ // vector.
+ auto IsNotFMA = [](SDNode *Use) {
+ return Use->getOpcode() != ISD::FMA && Use->getOpcode() != ISD::STRICT_FMA;
+ };
+ if (llvm::any_of(V->uses(), IsNotFMA))
+ return SDValue();
+
+ SmallVector<SDValue, 8> Ops;
+ EVT VT = V.getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ for (auto Op : V->op_values()) {
+ if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) {
+ Ops.push_back(DAG.getConstantFP(-Cst->getValueAPF(), SDLoc(Op), EltVT));
+ } else {
+ assert(Op.isUndef());
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ }
+ }
+
+ SDNode *NV = DAG.getNodeIfExists(ISD::BUILD_VECTOR, DAG.getVTList(VT), Ops);
+ if (!NV)
+ return SDValue();
+
+ // If an inverted version cannot be eliminated, choose it instead of the
+ // original version.
+ if (llvm::any_of(NV->uses(), IsNotFMA))
+ return SDValue(NV, 0);
+
+ // If the inverted version also can be eliminated, we have to consistently
+ // prefer one of the values. We prefer a constant with a negative value on
+ // the first place.
+ // N.B. We need to skip undefs that may precede a value.
+ for (auto op : V->op_values()) {
+ if (auto *Cst = dyn_cast<ConstantFPSDNode>(op)) {
+ if (Cst->isNegative())
+ return SDValue();
+ break;
+ }
+ }
+ return SDValue(NV, 0);
+}
+
static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -53027,7 +55280,13 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
return true;
}
}
-
+ // Lookup if there is an inverted version of constant vector V in DAG.
+ if (ISD::isBuildVectorOfConstantFPSDNodes(V.getNode())) {
+ if (SDValue NegV = getInvertedVectorForFMA(V, DAG)) {
+ V = NegV;
+ return true;
+ }
+ }
return false;
};
@@ -53143,192 +55402,20 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
-/// Recursive helper for combineVectorSizedSetCCEquality() to see if we have a
-/// recognizable memcmp expansion.
-static bool isOrXorXorTree(SDValue X, bool Root = true) {
- if (X.getOpcode() == ISD::OR)
- return isOrXorXorTree(X.getOperand(0), false) &&
- isOrXorXorTree(X.getOperand(1), false);
- if (Root)
- return false;
- return X.getOpcode() == ISD::XOR;
-}
-
-/// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp
-/// expansion.
-template <typename F>
-static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG,
- EVT VecVT, EVT CmpVT, bool HasPT, F SToV) {
- SDValue Op0 = X.getOperand(0);
- SDValue Op1 = X.getOperand(1);
- if (X.getOpcode() == ISD::OR) {
- SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT, HasPT, SToV);
- SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT, HasPT, SToV);
- if (VecVT != CmpVT)
- return DAG.getNode(ISD::OR, DL, CmpVT, A, B);
- if (HasPT)
- return DAG.getNode(ISD::OR, DL, VecVT, A, B);
- return DAG.getNode(ISD::AND, DL, CmpVT, A, B);
- }
- if (X.getOpcode() == ISD::XOR) {
- SDValue A = SToV(Op0);
- SDValue B = SToV(Op1);
- if (VecVT != CmpVT)
- return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE);
- if (HasPT)
- return DAG.getNode(ISD::XOR, DL, VecVT, A, B);
- return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ);
- }
- llvm_unreachable("Impossible");
-}
-
-/// Try to map a 128-bit or larger integer comparison to vector instructions
-/// before type legalization splits it up into chunks.
-static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG,
- const X86Subtarget &Subtarget) {
- ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
- assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate");
-
- // We're looking for an oversized integer equality comparison.
- SDValue X = SetCC->getOperand(0);
- SDValue Y = SetCC->getOperand(1);
- EVT OpVT = X.getValueType();
- unsigned OpSize = OpVT.getSizeInBits();
- if (!OpVT.isScalarInteger() || OpSize < 128)
- return SDValue();
-
- // Ignore a comparison with zero because that gets special treatment in
- // EmitTest(). But make an exception for the special case of a pair of
- // logically-combined vector-sized operands compared to zero. This pattern may
- // be generated by the memcmp expansion pass with oversized integer compares
- // (see PR33325).
- bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X);
- if (isNullConstant(Y) && !IsOrXorXorTreeCCZero)
- return SDValue();
-
- // Don't perform this combine if constructing the vector will be expensive.
- auto IsVectorBitCastCheap = [](SDValue X) {
- X = peekThroughBitcasts(X);
- return isa<ConstantSDNode>(X) || X.getValueType().isVector() ||
- X.getOpcode() == ISD::LOAD;
- };
- if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) &&
- !IsOrXorXorTreeCCZero)
- return SDValue();
-
- EVT VT = SetCC->getValueType(0);
- SDLoc DL(SetCC);
-
- // Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands.
- // Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands.
- // Otherwise use PCMPEQ (plus AND) and mask testing.
- bool NoImplicitFloatOps =
- DAG.getMachineFunction().getFunction().hasFnAttribute(
- Attribute::NoImplicitFloat);
- if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps &&
- ((OpSize == 128 && Subtarget.hasSSE2()) ||
- (OpSize == 256 && Subtarget.hasAVX()) ||
- (OpSize == 512 && Subtarget.useAVX512Regs()))) {
- bool HasPT = Subtarget.hasSSE41();
-
- // PTEST and MOVMSK are slow on Knights Landing and Knights Mill and widened
- // vector registers are essentially free. (Technically, widening registers
- // prevents load folding, but the tradeoff is worth it.)
- bool PreferKOT = Subtarget.preferMaskRegisters();
- bool NeedZExt = PreferKOT && !Subtarget.hasVLX() && OpSize != 512;
-
- EVT VecVT = MVT::v16i8;
- EVT CmpVT = PreferKOT ? MVT::v16i1 : VecVT;
- if (OpSize == 256) {
- VecVT = MVT::v32i8;
- CmpVT = PreferKOT ? MVT::v32i1 : VecVT;
- }
- EVT CastVT = VecVT;
- bool NeedsAVX512FCast = false;
- if (OpSize == 512 || NeedZExt) {
- if (Subtarget.hasBWI()) {
- VecVT = MVT::v64i8;
- CmpVT = MVT::v64i1;
- if (OpSize == 512)
- CastVT = VecVT;
- } else {
- VecVT = MVT::v16i32;
- CmpVT = MVT::v16i1;
- CastVT = OpSize == 512 ? VecVT :
- OpSize == 256 ? MVT::v8i32 : MVT::v4i32;
- NeedsAVX512FCast = true;
- }
- }
-
- auto ScalarToVector = [&](SDValue X) -> SDValue {
- bool TmpZext = false;
- EVT TmpCastVT = CastVT;
- if (X.getOpcode() == ISD::ZERO_EXTEND) {
- SDValue OrigX = X.getOperand(0);
- unsigned OrigSize = OrigX.getScalarValueSizeInBits();
- if (OrigSize < OpSize) {
- if (OrigSize == 128) {
- TmpCastVT = NeedsAVX512FCast ? MVT::v4i32 : MVT::v16i8;
- X = OrigX;
- TmpZext = true;
- } else if (OrigSize == 256) {
- TmpCastVT = NeedsAVX512FCast ? MVT::v8i32 : MVT::v32i8;
- X = OrigX;
- TmpZext = true;
- }
- }
- }
- X = DAG.getBitcast(TmpCastVT, X);
- if (!NeedZExt && !TmpZext)
- return X;
- return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT,
- DAG.getConstant(0, DL, VecVT), X,
- DAG.getVectorIdxConstant(0, DL));
- };
-
- SDValue Cmp;
- if (IsOrXorXorTreeCCZero) {
- // This is a bitwise-combined equality comparison of 2 pairs of vectors:
- // setcc i128 (or (xor A, B), (xor C, D)), 0, eq|ne
- // Use 2 vector equality compares and 'and' the results before doing a
- // MOVMSK.
- Cmp = emitOrXorXorTree(X, DL, DAG, VecVT, CmpVT, HasPT, ScalarToVector);
- } else {
- SDValue VecX = ScalarToVector(X);
- SDValue VecY = ScalarToVector(Y);
- if (VecVT != CmpVT) {
- Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE);
- } else if (HasPT) {
- Cmp = DAG.getNode(ISD::XOR, DL, VecVT, VecX, VecY);
- } else {
- Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ);
- }
- }
- // AVX512 should emit a setcc that will lower to kortest.
- if (VecVT != CmpVT) {
- EVT KRegVT = CmpVT == MVT::v64i1 ? MVT::i64 :
- CmpVT == MVT::v32i1 ? MVT::i32 : MVT::i16;
- return DAG.getSetCC(DL, VT, DAG.getBitcast(KRegVT, Cmp),
- DAG.getConstant(0, DL, KRegVT), CC);
- }
- if (HasPT) {
- SDValue BCCmp = DAG.getBitcast(OpSize == 256 ? MVT::v4i64 : MVT::v2i64,
- Cmp);
- SDValue PT = DAG.getNode(X86ISD::PTEST, DL, MVT::i32, BCCmp, BCCmp);
- X86::CondCode X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE;
- SDValue X86SetCC = getSETCC(X86CC, PT, DL, DAG);
- return DAG.getNode(ISD::TRUNCATE, DL, VT, X86SetCC.getValue(0));
- }
- // If all bytes match (bitmask is 0x(FFFF)FFFF), that's equality.
- // setcc i128 X, Y, eq --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, eq
- // setcc i128 X, Y, ne --> setcc (pmovmskb (pcmpeqb X, Y)), 0xFFFF, ne
- assert(Cmp.getValueType() == MVT::v16i8 &&
- "Non 128-bit vector on pre-SSE41 target");
- SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp);
- SDValue FFFFs = DAG.getConstant(0xFFFF, DL, MVT::i32);
- return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC);
+/// If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just
+/// pre-promote its result type since vXi1 vectors don't get promoted
+/// during type legalization.
+static SDValue truncateAVX512SetCCNoBWI(EVT VT, EVT OpVT, SDValue LHS,
+ SDValue RHS, ISD::CondCode CC,
+ const SDLoc &DL, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+ if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
+ VT.getVectorElementType() == MVT::i1 &&
+ (OpVT.getVectorElementType() == MVT::i8 ||
+ OpVT.getVectorElementType() == MVT::i16)) {
+ SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);
}
-
return SDValue();
}
@@ -53343,15 +55430,15 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
SDLoc DL(N);
if (CC == ISD::SETNE || CC == ISD::SETEQ) {
- if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget))
+ if (SDValue V = combineVectorSizedSetCCEquality(VT, LHS, RHS, CC, DL, DAG,
+ Subtarget))
return V;
- if (VT == MVT::i1 && isNullConstant(RHS)) {
- SDValue X86CC;
+ if (VT == MVT::i1) {
+ X86::CondCode X86CC;
if (SDValue V =
- MatchVectorAllZeroTest(LHS, CC, DL, Subtarget, DAG, X86CC))
- return DAG.getNode(ISD::TRUNCATE, DL, VT,
- DAG.getNode(X86ISD::SETCC, DL, MVT::i8, X86CC, V));
+ MatchVectorAllEqualTest(LHS, RHS, CC, DL, Subtarget, DAG, X86CC))
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, getSETCC(X86CC, V, DL, DAG));
}
if (OpVT.isScalarInteger()) {
@@ -53391,21 +55478,47 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
if (SDValue AndN = MatchAndCmpEq(RHS, LHS))
return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC);
- // cmpeq(trunc(x),0) --> cmpeq(x,0)
- // cmpne(trunc(x),0) --> cmpne(x,0)
+ // cmpeq(trunc(x),C) --> cmpeq(x,C)
+ // cmpne(trunc(x),C) --> cmpne(x,C)
// iff x upper bits are zero.
- // TODO: Add support for RHS to be truncate as well?
if (LHS.getOpcode() == ISD::TRUNCATE &&
LHS.getOperand(0).getScalarValueSizeInBits() >= 32 &&
- isNullConstant(RHS) && !DCI.isBeforeLegalize()) {
+ isa<ConstantSDNode>(RHS) && !DCI.isBeforeLegalize()) {
EVT SrcVT = LHS.getOperand(0).getValueType();
APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(),
OpVT.getScalarSizeInBits());
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ auto *C = cast<ConstantSDNode>(RHS);
if (DAG.MaskedValueIsZero(LHS.getOperand(0), UpperBits) &&
TLI.isTypeLegal(LHS.getOperand(0).getValueType()))
return DAG.getSetCC(DL, VT, LHS.getOperand(0),
- DAG.getConstant(0, DL, SrcVT), CC);
+ DAG.getConstant(C->getAPIntValue().zextOrTrunc(
+ SrcVT.getScalarSizeInBits()),
+ DL, SrcVT),
+ CC);
+ }
+
+ // With C as a power of 2 and C != 0 and C != INT_MIN:
+ // icmp eq Abs(X) C ->
+ // (icmp eq A, C) | (icmp eq A, -C)
+ // icmp ne Abs(X) C ->
+ // (icmp ne A, C) & (icmp ne A, -C)
+ // Both of these patterns can be better optimized in
+ // DAGCombiner::foldAndOrOfSETCC. Note this only applies for scalar
+ // integers which is checked above.
+ if (LHS.getOpcode() == ISD::ABS && LHS.hasOneUse()) {
+ if (auto *C = dyn_cast<ConstantSDNode>(RHS)) {
+ const APInt &CInt = C->getAPIntValue();
+ // We can better optimize this case in DAGCombiner::foldAndOrOfSETCC.
+ if (CInt.isPowerOf2() && !CInt.isMinSignedValue()) {
+ SDValue BaseOp = LHS.getOperand(0);
+ SDValue SETCC0 = DAG.getSetCC(DL, VT, BaseOp, RHS, CC);
+ SDValue SETCC1 = DAG.getSetCC(
+ DL, VT, BaseOp, DAG.getConstant(-CInt, DL, OpVT), CC);
+ return DAG.getNode(CC == ISD::SETEQ ? ISD::OR : ISD::AND, DL, VT,
+ SETCC0, SETCC1);
+ }
+ }
}
}
}
@@ -53444,19 +55557,79 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG,
}
}
- // If we have AVX512, but not BWI and this is a vXi16/vXi8 setcc, just
- // pre-promote its result type since vXi1 vectors don't get promoted
- // during type legalization.
- // NOTE: The element count check is to ignore operand types that need to
- // go through type promotion to a 128-bit vector.
- if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() &&
- VT.getVectorElementType() == MVT::i1 &&
- (OpVT.getVectorElementType() == MVT::i8 ||
- OpVT.getVectorElementType() == MVT::i16)) {
- SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC);
- return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc);
+ // Try and make unsigned vector comparison signed. On pre AVX512 targets there
+ // only are unsigned comparisons (`PCMPGT`) and on AVX512 its often better to
+ // use `PCMPGT` if the result is mean to stay in a vector (and if its going to
+ // a mask, there are signed AVX512 comparisons).
+ if (VT.isVector() && OpVT.isVector() && OpVT.isInteger()) {
+ bool CanMakeSigned = false;
+ if (ISD::isUnsignedIntSetCC(CC)) {
+ KnownBits CmpKnown =
+ DAG.computeKnownBits(LHS).intersectWith(DAG.computeKnownBits(RHS));
+ // If we know LHS/RHS share the same sign bit at each element we can
+ // make this signed.
+ // NOTE: `computeKnownBits` on a vector type aggregates common bits
+ // across all lanes. So a pattern where the sign varies from lane to
+ // lane, but at each lane Sign(LHS) is known to equal Sign(RHS), will be
+ // missed. We could get around this by demanding each lane
+ // independently, but this isn't the most important optimization and
+ // that may eat into compile time.
+ CanMakeSigned =
+ CmpKnown.Zero.isSignBitSet() || CmpKnown.One.isSignBitSet();
+ }
+ if (CanMakeSigned || ISD::isSignedIntSetCC(CC)) {
+ SDValue LHSOut = LHS;
+ SDValue RHSOut = RHS;
+ ISD::CondCode NewCC = CC;
+ switch (CC) {
+ case ISD::SETGE:
+ case ISD::SETUGE:
+ if (SDValue NewLHS = incDecVectorConstant(LHS, DAG, /*IsInc*/ true,
+ /*NSW*/ true))
+ LHSOut = NewLHS;
+ else if (SDValue NewRHS = incDecVectorConstant(
+ RHS, DAG, /*IsInc*/ false, /*NSW*/ true))
+ RHSOut = NewRHS;
+ else
+ break;
+
+ [[fallthrough]];
+ case ISD::SETUGT:
+ NewCC = ISD::SETGT;
+ break;
+
+ case ISD::SETLE:
+ case ISD::SETULE:
+ if (SDValue NewLHS = incDecVectorConstant(LHS, DAG, /*IsInc*/ false,
+ /*NSW*/ true))
+ LHSOut = NewLHS;
+ else if (SDValue NewRHS = incDecVectorConstant(RHS, DAG, /*IsInc*/ true,
+ /*NSW*/ true))
+ RHSOut = NewRHS;
+ else
+ break;
+
+ [[fallthrough]];
+ case ISD::SETULT:
+ // Will be swapped to SETGT in LowerVSETCC*.
+ NewCC = ISD::SETLT;
+ break;
+ default:
+ break;
+ }
+ if (NewCC != CC) {
+ if (SDValue R = truncateAVX512SetCCNoBWI(VT, OpVT, LHSOut, RHSOut,
+ NewCC, DL, DAG, Subtarget))
+ return R;
+ return DAG.getSetCC(DL, VT, LHSOut, RHSOut, NewCC);
+ }
+ }
}
+ if (SDValue R =
+ truncateAVX512SetCCNoBWI(VT, OpVT, LHS, RHS, CC, DL, DAG, Subtarget))
+ return R;
+
// For an SSE1-only target, lower a comparison of v4f32 to X86ISD::CMPP early
// to avoid scalarization via legalization because v4i32 is not a legal type.
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 &&
@@ -53527,27 +55700,58 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
DAG.getConstant(NotMask, DL, VT));
}
+ // Fold movmsk(icmp_eq(and(x,c1),c1)) -> movmsk(shl(x,c2))
// Fold movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2)))
// iff pow2splat(c1).
- if (Src.getOpcode() == X86ISD::PCMPEQ &&
- Src.getOperand(0).getOpcode() == ISD::AND &&
- ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) {
- SDValue LHS = Src.getOperand(0).getOperand(0);
- SDValue RHS = Src.getOperand(0).getOperand(1);
- KnownBits KnownRHS = DAG.computeKnownBits(RHS);
- if (KnownRHS.isConstant() && KnownRHS.getConstant().isPowerOf2()) {
+ // Use KnownBits to determine if only a single bit is non-zero
+ // in each element (pow2 or zero), and shift that bit to the msb.
+ if (Src.getOpcode() == X86ISD::PCMPEQ) {
+ KnownBits KnownLHS = DAG.computeKnownBits(Src.getOperand(0));
+ KnownBits KnownRHS = DAG.computeKnownBits(Src.getOperand(1));
+ unsigned ShiftAmt = KnownLHS.countMinLeadingZeros();
+ if (KnownLHS.countMaxPopulation() == 1 &&
+ (KnownRHS.isZero() || (KnownRHS.countMaxPopulation() == 1 &&
+ ShiftAmt == KnownRHS.countMinLeadingZeros()))) {
SDLoc DL(N);
MVT ShiftVT = SrcVT;
+ SDValue ShiftLHS = Src.getOperand(0);
+ SDValue ShiftRHS = Src.getOperand(1);
if (ShiftVT.getScalarType() == MVT::i8) {
// vXi8 shifts - we only care about the signbit so can use PSLLW.
ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2);
- LHS = DAG.getBitcast(ShiftVT, LHS);
+ ShiftLHS = DAG.getBitcast(ShiftVT, ShiftLHS);
+ ShiftRHS = DAG.getBitcast(ShiftVT, ShiftRHS);
+ }
+ ShiftLHS = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT,
+ ShiftLHS, ShiftAmt, DAG);
+ ShiftRHS = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT,
+ ShiftRHS, ShiftAmt, DAG);
+ ShiftLHS = DAG.getBitcast(SrcVT, ShiftLHS);
+ ShiftRHS = DAG.getBitcast(SrcVT, ShiftRHS);
+ SDValue Res = DAG.getNode(ISD::XOR, DL, SrcVT, ShiftLHS, ShiftRHS);
+ return DAG.getNode(X86ISD::MOVMSK, DL, VT, DAG.getNOT(DL, Res, SrcVT));
+ }
+ }
+
+ // Fold movmsk(logic(X,C)) -> logic(movmsk(X),C)
+ if (N->isOnlyUserOf(Src.getNode())) {
+ SDValue SrcBC = peekThroughOneUseBitcasts(Src);
+ if (ISD::isBitwiseLogicOp(SrcBC.getOpcode())) {
+ APInt UndefElts;
+ SmallVector<APInt, 32> EltBits;
+ if (getTargetConstantBitsFromNode(SrcBC.getOperand(1), NumBitsPerElt,
+ UndefElts, EltBits)) {
+ APInt Mask = APInt::getZero(NumBits);
+ for (unsigned Idx = 0; Idx != NumElts; ++Idx) {
+ if (!UndefElts[Idx] && EltBits[Idx].isNegative())
+ Mask.setBit(Idx);
+ }
+ SDLoc DL(N);
+ SDValue NewSrc = DAG.getBitcast(SrcVT, SrcBC.getOperand(0));
+ SDValue NewMovMsk = DAG.getNode(X86ISD::MOVMSK, DL, VT, NewSrc);
+ return DAG.getNode(SrcBC.getOpcode(), DL, VT, NewMovMsk,
+ DAG.getConstant(Mask, DL, VT));
}
- unsigned ShiftAmt = KnownRHS.getConstant().countLeadingZeros();
- LHS = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT, LHS,
- ShiftAmt, DAG);
- LHS = DAG.getNOT(DL, DAG.getBitcast(SrcVT, LHS), SrcVT);
- return DAG.getNode(X86ISD::MOVMSK, DL, VT, LHS);
}
}
@@ -53560,6 +55764,21 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue combineTESTP(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ MVT VT = N->getSimpleValueType(0);
+ unsigned NumBits = VT.getScalarSizeInBits();
+
+ // Simplify the inputs.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ APInt DemandedMask(APInt::getAllOnes(NumBits));
+ if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI))
+ return SDValue(N, 0);
+
+ return SDValue();
+}
+
static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -54100,215 +56319,6 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
return true;
}
-/// If this is an add or subtract where one operand is produced by a cmp+setcc,
-/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
-/// with CMP+{ADC, SBB}.
-/// Also try (ADD/SUB)+(AND(SRL,1)) bit extraction pattern with BT+{ADC, SBB}.
-static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
- SDValue X, SDValue Y,
- SelectionDAG &DAG,
- bool ZeroSecondOpOnly = false) {
- if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
- return SDValue();
-
- // Look through a one-use zext.
- if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse())
- Y = Y.getOperand(0);
-
- X86::CondCode CC;
- SDValue EFLAGS;
- if (Y.getOpcode() == X86ISD::SETCC && Y.hasOneUse()) {
- CC = (X86::CondCode)Y.getConstantOperandVal(0);
- EFLAGS = Y.getOperand(1);
- } else if (Y.getOpcode() == ISD::AND && isOneConstant(Y.getOperand(1)) &&
- Y.hasOneUse()) {
- EFLAGS = LowerAndToBT(Y, ISD::SETNE, DL, DAG, CC);
- }
-
- if (!EFLAGS)
- return SDValue();
-
- // If X is -1 or 0, then we have an opportunity to avoid constants required in
- // the general case below.
- auto *ConstantX = dyn_cast<ConstantSDNode>(X);
- if (ConstantX && !ZeroSecondOpOnly) {
- if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnes()) ||
- (IsSub && CC == X86::COND_B && ConstantX->isZero())) {
- // This is a complicated way to get -1 or 0 from the carry flag:
- // -1 + SETAE --> -1 + (!CF) --> CF ? -1 : 0 --> SBB %eax, %eax
- // 0 - SETB --> 0 - (CF) --> CF ? -1 : 0 --> SBB %eax, %eax
- return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
- EFLAGS);
- }
-
- if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnes()) ||
- (IsSub && CC == X86::COND_A && ConstantX->isZero())) {
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() &&
- EFLAGS.getValueType().isInteger() &&
- !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
- // Swap the operands of a SUB, and we have the same pattern as above.
- // -1 + SETBE (SUB A, B) --> -1 + SETAE (SUB B, A) --> SUB + SBB
- // 0 - SETA (SUB A, B) --> 0 - SETB (SUB B, A) --> SUB + SBB
- SDValue NewSub = DAG.getNode(
- X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
- EFLAGS.getOperand(1), EFLAGS.getOperand(0));
- SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo());
- return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
- NewEFLAGS);
- }
- }
- }
-
- if (CC == X86::COND_B) {
- // X + SETB Z --> adc X, 0
- // X - SETB Z --> sbb X, 0
- return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
- DAG.getVTList(VT, MVT::i32), X,
- DAG.getConstant(0, DL, VT), EFLAGS);
- }
-
- if (ZeroSecondOpOnly)
- return SDValue();
-
- if (CC == X86::COND_A) {
- // Try to convert COND_A into COND_B in an attempt to facilitate
- // materializing "setb reg".
- //
- // Do not flip "e > c", where "c" is a constant, because Cmp instruction
- // cannot take an immediate as its first operand.
- //
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
- EFLAGS.getValueType().isInteger() &&
- !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
- SDValue NewSub =
- DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
- EFLAGS.getOperand(1), EFLAGS.getOperand(0));
- SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
- return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL,
- DAG.getVTList(VT, MVT::i32), X,
- DAG.getConstant(0, DL, VT), NewEFLAGS);
- }
- }
-
- if (CC == X86::COND_AE) {
- // X + SETAE --> sbb X, -1
- // X - SETAE --> adc X, -1
- return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL,
- DAG.getVTList(VT, MVT::i32), X,
- DAG.getConstant(-1, DL, VT), EFLAGS);
- }
-
- if (CC == X86::COND_BE) {
- // X + SETBE --> sbb X, -1
- // X - SETBE --> adc X, -1
- // Try to convert COND_BE into COND_AE in an attempt to facilitate
- // materializing "setae reg".
- //
- // Do not flip "e <= c", where "c" is a constant, because Cmp instruction
- // cannot take an immediate as its first operand.
- //
- if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() &&
- EFLAGS.getValueType().isInteger() &&
- !isa<ConstantSDNode>(EFLAGS.getOperand(1))) {
- SDValue NewSub =
- DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(),
- EFLAGS.getOperand(1), EFLAGS.getOperand(0));
- SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo());
- return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL,
- DAG.getVTList(VT, MVT::i32), X,
- DAG.getConstant(-1, DL, VT), NewEFLAGS);
- }
- }
-
- if (CC != X86::COND_E && CC != X86::COND_NE)
- return SDValue();
-
- if (EFLAGS.getOpcode() != X86ISD::CMP || !EFLAGS.hasOneUse() ||
- !X86::isZeroNode(EFLAGS.getOperand(1)) ||
- !EFLAGS.getOperand(0).getValueType().isInteger())
- return SDValue();
-
- SDValue Z = EFLAGS.getOperand(0);
- EVT ZVT = Z.getValueType();
-
- // If X is -1 or 0, then we have an opportunity to avoid constants required in
- // the general case below.
- if (ConstantX) {
- // 'neg' sets the carry flag when Z != 0, so create 0 or -1 using 'sbb' with
- // fake operands:
- // 0 - (Z != 0) --> sbb %eax, %eax, (neg Z)
- // -1 + (Z == 0) --> sbb %eax, %eax, (neg Z)
- if ((IsSub && CC == X86::COND_NE && ConstantX->isZero()) ||
- (!IsSub && CC == X86::COND_E && ConstantX->isAllOnes())) {
- SDValue Zero = DAG.getConstant(0, DL, ZVT);
- SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
- SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z);
- return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
- SDValue(Neg.getNode(), 1));
- }
-
- // cmp with 1 sets the carry flag when Z == 0, so create 0 or -1 using 'sbb'
- // with fake operands:
- // 0 - (Z == 0) --> sbb %eax, %eax, (cmp Z, 1)
- // -1 + (Z != 0) --> sbb %eax, %eax, (cmp Z, 1)
- if ((IsSub && CC == X86::COND_E && ConstantX->isZero()) ||
- (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnes())) {
- SDValue One = DAG.getConstant(1, DL, ZVT);
- SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
- SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
- return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT,
- DAG.getTargetConstant(X86::COND_B, DL, MVT::i8),
- Cmp1.getValue(1));
- }
- }
-
- // (cmp Z, 1) sets the carry flag if Z is 0.
- SDValue One = DAG.getConstant(1, DL, ZVT);
- SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32);
- SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One);
-
- // Add the flags type for ADC/SBB nodes.
- SDVTList VTs = DAG.getVTList(VT, MVT::i32);
-
- // X - (Z != 0) --> sub X, (zext(setne Z, 0)) --> adc X, -1, (cmp Z, 1)
- // X + (Z != 0) --> add X, (zext(setne Z, 0)) --> sbb X, -1, (cmp Z, 1)
- if (CC == X86::COND_NE)
- return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X,
- DAG.getConstant(-1ULL, DL, VT), Cmp1.getValue(1));
-
- // X - (Z == 0) --> sub X, (zext(sete Z, 0)) --> sbb X, 0, (cmp Z, 1)
- // X + (Z == 0) --> add X, (zext(sete Z, 0)) --> adc X, 0, (cmp Z, 1)
- return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X,
- DAG.getConstant(0, DL, VT), Cmp1.getValue(1));
-}
-
-/// If this is an add or subtract where one operand is produced by a cmp+setcc,
-/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
-/// with CMP+{ADC, SBB}.
-static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) {
- bool IsSub = N->getOpcode() == ISD::SUB;
- SDValue X = N->getOperand(0);
- SDValue Y = N->getOperand(1);
- EVT VT = N->getValueType(0);
- SDLoc DL(N);
-
- if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, X, Y, DAG))
- return ADCOrSBB;
-
- // Commute and try again (negate the result for subtracts).
- if (SDValue ADCOrSBB = combineAddOrSubToADCOrSBB(IsSub, DL, VT, Y, X, DAG)) {
- if (IsSub)
- ADCOrSBB =
- DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), ADCOrSBB);
- return ADCOrSBB;
- }
-
- return SDValue();
-}
-
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) {
// Only handle test patterns.
if (!isNullConstant(N->getOperand(1)))
@@ -54991,6 +57001,33 @@ static SDValue combineSubABS(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::ADD, DL, VT, N0, Cmov);
}
+static SDValue combineSubSetcc(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+
+ // (sub C (zero_extend (setcc)))
+ // =>
+ // (add (zero_extend (setcc inverted) C-1)) if C is a nonzero immediate
+ // Don't disturb (sub 0 setcc), which is easily done with neg.
+ EVT VT = N->getValueType(0);
+ auto *Op0C = dyn_cast<ConstantSDNode>(Op0);
+ if (Op1.getOpcode() == ISD::ZERO_EXTEND && Op1.hasOneUse() && Op0C &&
+ !Op0C->isZero() && Op1.getOperand(0).getOpcode() == X86ISD::SETCC &&
+ Op1.getOperand(0).hasOneUse()) {
+ SDValue SetCC = Op1.getOperand(0);
+ X86::CondCode CC = (X86::CondCode)SetCC.getConstantOperandVal(0);
+ X86::CondCode NewCC = X86::GetOppositeBranchCondition(CC);
+ uint64_t NewImm = Op0C->getZExtValue() - 1;
+ SDLoc DL(Op1);
+ SDValue NewSetCC = getSETCC(NewCC, SetCC.getOperand(1), DL, DAG);
+ NewSetCC = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NewSetCC);
+ return DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(VT, VT), NewSetCC,
+ DAG.getConstant(NewImm, DL, VT));
+ }
+
+ return SDValue();
+}
+
static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -55010,9 +57047,11 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
// X86 can't encode an immediate LHS of a sub. See if we can push the
// negation into a preceding instruction. If the RHS of the sub is a XOR with
// one use and a constant, invert the immediate, saving one register.
+ // However, ignore cases where C1 is 0, as those will become a NEG.
// sub(C1, xor(X, C2)) -> add(xor(X, ~C2), C1+1)
if (Op1.getOpcode() == ISD::XOR && IsNonOpaqueConstant(Op0) &&
- IsNonOpaqueConstant(Op1.getOperand(1)) && Op1->hasOneUse()) {
+ !isNullConstant(Op0) && IsNonOpaqueConstant(Op1.getOperand(1)) &&
+ Op1->hasOneUse()) {
SDLoc DL(N);
EVT VT = Op0.getValueType();
SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0),
@@ -55048,7 +57087,13 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
Op1.getOperand(0));
}
- return combineAddOrSubToADCOrSBB(N, DAG);
+ if (SDValue V = combineXorSubCTLZ(N, DAG, Subtarget))
+ return V;
+
+ if (SDValue V = combineAddOrSubToADCOrSBB(N, DAG))
+ return V;
+
+ return combineSubSetcc(N, DAG);
}
static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
@@ -55170,7 +57215,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
if (llvm::all_of(Ops, [Op0](SDValue Op) {
return Op.getOpcode() == Op0.getOpcode();
})) {
- auto ConcatSubOperand = [&](MVT VT, ArrayRef<SDValue> SubOps, unsigned I) {
+ auto ConcatSubOperand = [&](EVT VT, ArrayRef<SDValue> SubOps, unsigned I) {
SmallVector<SDValue> Subs;
for (SDValue SubOp : SubOps)
Subs.push_back(SubOp.getOperand(I));
@@ -55227,6 +57272,24 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
break;
}
+ case X86ISD::UNPCKH:
+ case X86ISD::UNPCKL: {
+ // Don't concatenate build_vector patterns.
+ if (!IsSplat && VT.getScalarSizeInBits() >= 32 &&
+ ((VT.is256BitVector() && Subtarget.hasInt256()) ||
+ (VT.is512BitVector() && Subtarget.useAVX512Regs())) &&
+ none_of(Ops, [](SDValue Op) {
+ return peekThroughBitcasts(Op.getOperand(0)).getOpcode() ==
+ ISD::SCALAR_TO_VECTOR ||
+ peekThroughBitcasts(Op.getOperand(1)).getOpcode() ==
+ ISD::SCALAR_TO_VECTOR;
+ })) {
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ ConcatSubOperand(VT, Ops, 0),
+ ConcatSubOperand(VT, Ops, 1));
+ }
+ break;
+ }
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::PSHUFD:
@@ -55237,11 +57300,16 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
[[fallthrough]];
case X86ISD::VPERMILPI:
- if (!IsSplat && NumOps == 2 && (VT == MVT::v8f32 || VT == MVT::v8i32) &&
- Op0.getOperand(1) == Ops[1].getOperand(1)) {
- SDValue Res = DAG.getBitcast(MVT::v8f32, ConcatSubOperand(VT, Ops, 0));
- Res = DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, Res,
- Op0.getOperand(1));
+ if (!IsSplat && VT.getScalarSizeInBits() == 32 &&
+ (VT.is256BitVector() ||
+ (VT.is512BitVector() && Subtarget.useAVX512Regs())) &&
+ all_of(Ops, [&Op0](SDValue Op) {
+ return Op0.getOperand(1) == Op.getOperand(1);
+ })) {
+ MVT FloatVT = VT.changeVectorElementType(MVT::f32);
+ SDValue Res = DAG.getBitcast(FloatVT, ConcatSubOperand(VT, Ops, 0));
+ Res =
+ DAG.getNode(X86ISD::VPERMILPI, DL, FloatVT, Res, Op0.getOperand(1));
return DAG.getBitcast(VT, Res);
}
if (!IsSplat && NumOps == 2 && VT == MVT::v4f64) {
@@ -55254,11 +57322,43 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
break;
case X86ISD::PSHUFB:
+ case X86ISD::PSADBW:
if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||
(VT.is512BitVector() && Subtarget.useBWIRegs()))) {
+ MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
+ SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
+ NumOps * SrcVT.getVectorNumElements());
return DAG.getNode(Op0.getOpcode(), DL, VT,
- ConcatSubOperand(VT, Ops, 0),
- ConcatSubOperand(VT, Ops, 1));
+ ConcatSubOperand(SrcVT, Ops, 0),
+ ConcatSubOperand(SrcVT, Ops, 1));
+ }
+ break;
+ case X86ISD::VPERMV:
+ if (!IsSplat && NumOps == 2 &&
+ (VT.is512BitVector() && Subtarget.useAVX512Regs())) {
+ MVT OpVT = Op0.getSimpleValueType();
+ int NumSrcElts = OpVT.getVectorNumElements();
+ SmallVector<int, 64> ConcatMask;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ SmallVector<int, 64> SubMask;
+ SmallVector<SDValue, 2> SubOps;
+ if (!getTargetShuffleMask(Ops[i].getNode(), OpVT, false, SubOps,
+ SubMask))
+ break;
+ for (int M : SubMask) {
+ if (0 <= M)
+ M += i * NumSrcElts;
+ ConcatMask.push_back(M);
+ }
+ }
+ if (ConcatMask.size() == (NumOps * NumSrcElts)) {
+ SDValue Src = concatSubVectors(Ops[0].getOperand(1),
+ Ops[1].getOperand(1), DAG, DL);
+ MVT IntMaskSVT = MVT::getIntegerVT(EltSizeInBits);
+ MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts);
+ SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
+ return DAG.getNode(X86ISD::VPERMV, DL, VT, Mask, Src);
+ }
}
break;
case X86ISD::VPERMV3:
@@ -55285,13 +57385,27 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
Ops[1].getOperand(0), DAG, DL);
SDValue Src1 = concatSubVectors(Ops[0].getOperand(2),
Ops[1].getOperand(2), DAG, DL);
- MVT IntMaskSVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
+ MVT IntMaskSVT = MVT::getIntegerVT(EltSizeInBits);
MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts);
SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true);
return DAG.getNode(X86ISD::VPERMV3, DL, VT, Src0, Mask, Src1);
}
}
break;
+ case ISD::TRUNCATE:
+ if (!IsSplat && NumOps == 2 && VT.is256BitVector()) {
+ EVT SrcVT = Ops[0].getOperand(0).getValueType();
+ if (SrcVT.is256BitVector() && SrcVT.isSimple() &&
+ SrcVT == Ops[1].getOperand(0).getValueType() &&
+ Subtarget.useAVX512Regs() &&
+ Subtarget.getPreferVectorWidth() >= 512 &&
+ (SrcVT.getScalarSizeInBits() > 16 || Subtarget.useBWIRegs())) {
+ EVT NewSrcVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
+ return DAG.getNode(ISD::TRUNCATE, DL, VT,
+ ConcatSubOperand(NewSrcVT, Ops, 0));
+ }
+ }
+ break;
case X86ISD::VSHLI:
case X86ISD::VSRLI:
// Special case: SHL/SRL AVX1 V4i64 by 32-bits can lower as a shuffle.
@@ -55341,8 +57455,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
case ISD::OR:
case ISD::XOR:
case X86ISD::ANDNP:
- // TODO: Add 256-bit support.
- if (!IsSplat && VT.is512BitVector()) {
+ if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||
+ (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
NumOps * SrcVT.getVectorNumElements());
@@ -55351,6 +57465,17 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
ConcatSubOperand(SrcVT, Ops, 1));
}
break;
+ case ISD::CTPOP:
+ case ISD::CTTZ:
+ case ISD::CTLZ:
+ case ISD::CTTZ_ZERO_UNDEF:
+ case ISD::CTLZ_ZERO_UNDEF:
+ if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||
+ (VT.is512BitVector() && Subtarget.useBWIRegs()))) {
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ ConcatSubOperand(VT, Ops, 0));
+ }
+ break;
case X86ISD::GF2P8AFFINEQB:
if (!IsSplat &&
(VT.is256BitVector() ||
@@ -55363,6 +57488,33 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2));
}
break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ if (!IsSplat && ((VT.is256BitVector() && Subtarget.hasInt256()) ||
+ (VT.is512BitVector() && Subtarget.useAVX512Regs() &&
+ (EltSizeInBits >= 32 || Subtarget.useBWIRegs())))) {
+ MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
+ SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
+ NumOps * SrcVT.getVectorNumElements());
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ ConcatSubOperand(SrcVT, Ops, 0),
+ ConcatSubOperand(SrcVT, Ops, 1));
+ }
+ break;
+ // Due to VADD, VSUB, VMUL can executed on more ports than VINSERT and
+ // their latency are short, so here we don't replace them.
+ case ISD::FDIV:
+ if (!IsSplat && (VT.is256BitVector() ||
+ (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
+ MVT SrcVT = Op0.getOperand(0).getSimpleValueType();
+ SrcVT = MVT::getVectorVT(SrcVT.getScalarType(),
+ NumOps * SrcVT.getVectorNumElements());
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ ConcatSubOperand(SrcVT, Ops, 0),
+ ConcatSubOperand(SrcVT, Ops, 1));
+ }
+ break;
case X86ISD::HADD:
case X86ISD::HSUB:
case X86ISD::FHADD:
@@ -55392,9 +57544,25 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
}
break;
case ISD::VSELECT:
+ if (!IsSplat && Subtarget.hasAVX512() &&
+ (VT.is256BitVector() ||
+ (VT.is512BitVector() && Subtarget.useAVX512Regs())) &&
+ (EltSizeInBits >= 32 || Subtarget.hasBWI())) {
+ EVT SelVT = Ops[0].getOperand(0).getValueType();
+ if (SelVT.getVectorElementType() == MVT::i1) {
+ SelVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+ Ops.size() * SelVT.getVectorNumElements());
+ if (DAG.getTargetLoweringInfo().isTypeLegal(SelVT))
+ return DAG.getNode(Op0.getOpcode(), DL, VT,
+ ConcatSubOperand(SelVT.getSimpleVT(), Ops, 0),
+ ConcatSubOperand(VT, Ops, 1),
+ ConcatSubOperand(VT, Ops, 2));
+ }
+ }
+ [[fallthrough]];
case X86ISD::BLENDV:
if (!IsSplat && VT.is256BitVector() && Ops.size() == 2 &&
- (VT.getScalarSizeInBits() >= 32 || Subtarget.hasInt256()) &&
+ (EltSizeInBits >= 32 || Subtarget.hasInt256()) &&
IsConcatFree(VT, Ops, 1) && IsConcatFree(VT, Ops, 2)) {
EVT SelVT = Ops[0].getOperand(0).getValueType();
SelVT = SelVT.getDoubleNumVectorElementsVT(*DAG.getContext());
@@ -55425,7 +57593,7 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
// Attempt to fold target constant loads.
if (all_of(Ops, [](SDValue Op) { return getTargetConstantFromNode(Op); })) {
SmallVector<APInt> EltBits;
- APInt UndefElts = APInt::getNullValue(VT.getVectorNumElements());
+ APInt UndefElts = APInt::getZero(VT.getVectorNumElements());
for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
APInt OpUndefElts;
SmallVector<APInt> OpEltBits;
@@ -55448,13 +57616,28 @@ static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG,
EVT VT = N->getValueType(0);
EVT SrcVT = N->getOperand(0).getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
- // Don't do anything for i1 vectors.
- if (VT.getVectorElementType() == MVT::i1)
+ if (VT.getVectorElementType() == MVT::i1) {
+ // Attempt to constant fold.
+ unsigned SubSizeInBits = SrcVT.getSizeInBits();
+ APInt Constant = APInt::getZero(VT.getSizeInBits());
+ for (unsigned I = 0, E = Ops.size(); I != E; ++I) {
+ auto *C = dyn_cast<ConstantSDNode>(peekThroughBitcasts(Ops[I]));
+ if (!C) break;
+ Constant.insertBits(C->getAPIntValue(), I * SubSizeInBits);
+ if (I == (E - 1)) {
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (TLI.isTypeLegal(IntVT))
+ return DAG.getBitcast(VT, DAG.getConstant(Constant, SDLoc(N), IntVT));
+ }
+ }
+
+ // Don't do anything else for i1 vectors.
return SDValue();
+ }
if (Subtarget.hasAVX() && TLI.isTypeLegal(VT) && TLI.isTypeLegal(SrcVT)) {
- SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end());
if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops, DAG,
DCI, Subtarget))
return R;
@@ -55522,6 +57705,16 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
if (IsI1Vector)
return SDValue();
+ // Eliminate an intermediate vector widening:
+ // insert_subvector X, (insert_subvector undef, Y, 0), Idx -->
+ // insert_subvector X, Y, Idx
+ // TODO: This is a more general version of a DAGCombiner fold, can we move it
+ // there?
+ if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ SubVec.getOperand(0).isUndef() && isNullConstant(SubVec.getOperand(2)))
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Vec,
+ SubVec.getOperand(1), N->getOperand(2));
+
// If this is an insert of an extract, combine to a shuffle. Don't do this
// if the insert or extract can be represented with a subregister operation.
if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
@@ -55606,9 +57799,8 @@ static SDValue combineINSERT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
/// to get simple value types will assert).
static SDValue narrowExtractedVectorSelect(SDNode *Ext, SelectionDAG &DAG) {
SDValue Sel = Ext->getOperand(0);
- SmallVector<SDValue, 4> CatOps;
if (Sel.getOpcode() != ISD::VSELECT ||
- !collectConcatOps(Sel.getOperand(0).getNode(), CatOps, DAG))
+ !isFreeToSplitVector(Sel.getOperand(0).getNode(), DAG))
return SDValue();
// Note: We assume simple value types because this should only be called with
@@ -55796,12 +57988,7 @@ static SDValue combineEXTRACT_SUBVECTOR(SDNode *N, SelectionDAG &DAG,
}
}
if (IdxVal == 0 &&
- (InOpcode == ISD::ANY_EXTEND ||
- InOpcode == ISD::ANY_EXTEND_VECTOR_INREG ||
- InOpcode == ISD::ZERO_EXTEND ||
- InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
- InOpcode == ISD::SIGN_EXTEND ||
- InOpcode == ISD::SIGN_EXTEND_VECTOR_INREG) &&
+ (ISD::isExtOpcode(InOpcode) || ISD::isExtVecInRegOpcode(InOpcode)) &&
(SizeInBits == 128 || SizeInBits == 256) &&
InVec.getOperand(0).getValueSizeInBits() >= SizeInBits) {
SDLoc DL(N);
@@ -55861,11 +58048,9 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
// This occurs frequently in our masked scalar intrinsic code and our
// floating point select lowering with AVX512.
// TODO: SimplifyDemandedBits instead?
- if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse())
- if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
- if (C->getAPIntValue().isOne())
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1,
- Src.getOperand(0));
+ if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse() &&
+ isOneConstant(Src.getOperand(1)))
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Src.getOperand(0));
// Combine scalar_to_vector of an extract_vector_elt into an extract_subvec.
if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -56069,9 +58254,7 @@ static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG,
}
// Attempt to combine as a shuffle on SSE41+ targets.
- if ((Opcode == ISD::ANY_EXTEND_VECTOR_INREG ||
- Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) &&
- Subtarget.hasSSE41()) {
+ if (Subtarget.hasSSE41()) {
SDValue Op(N, 0);
if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType()))
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
@@ -56237,9 +58420,6 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
if (!Subtarget.hasF16C() || Subtarget.useSoftFloat())
return SDValue();
- if (Subtarget.hasFP16())
- return SDValue();
-
bool IsStrict = N->isStrictFPOpcode();
EVT VT = N->getValueType(0);
SDValue Src = N->getOperand(IsStrict ? 1 : 0);
@@ -56249,11 +58429,47 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
SrcVT.getVectorElementType() != MVT::f32)
return SDValue();
+ SDLoc dl(N);
+
+ SDValue Cvt, Chain;
unsigned NumElts = VT.getVectorNumElements();
- if (NumElts == 1 || !isPowerOf2_32(NumElts))
+ if (Subtarget.hasFP16()) {
+ // Combine (v8f16 fp_round(concat_vectors(v4f32 (xint_to_fp v4i64), ..)))
+ // into (v8f16 vector_shuffle(v8f16 (CVTXI2P v4i64), ..))
+ if (NumElts == 8 && Src.getOpcode() == ISD::CONCAT_VECTORS) {
+ SDValue Cvt0, Cvt1;
+ SDValue Op0 = Src.getOperand(0);
+ SDValue Op1 = Src.getOperand(1);
+ bool IsOp0Strict = Op0->isStrictFPOpcode();
+ if (Op0.getOpcode() != Op1.getOpcode() ||
+ Op0.getOperand(IsOp0Strict ? 1 : 0).getValueType() != MVT::v4i64 ||
+ Op1.getOperand(IsOp0Strict ? 1 : 0).getValueType() != MVT::v4i64) {
+ return SDValue();
+ }
+ int Mask[8] = {0, 1, 2, 3, 8, 9, 10, 11};
+ if (IsStrict) {
+ assert(IsOp0Strict && "Op0 must be strict node");
+ unsigned Opc = Op0.getOpcode() == ISD::STRICT_SINT_TO_FP
+ ? X86ISD::STRICT_CVTSI2P
+ : X86ISD::STRICT_CVTUI2P;
+ Cvt0 = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other},
+ {Op0.getOperand(0), Op0.getOperand(1)});
+ Cvt1 = DAG.getNode(Opc, dl, {MVT::v8f16, MVT::Other},
+ {Op1.getOperand(0), Op1.getOperand(1)});
+ Cvt = DAG.getVectorShuffle(MVT::v8f16, dl, Cvt0, Cvt1, Mask);
+ return DAG.getMergeValues({Cvt, Cvt0.getValue(1)}, dl);
+ }
+ unsigned Opc = Op0.getOpcode() == ISD::SINT_TO_FP ? X86ISD::CVTSI2P
+ : X86ISD::CVTUI2P;
+ Cvt0 = DAG.getNode(Opc, dl, MVT::v8f16, Op0.getOperand(0));
+ Cvt1 = DAG.getNode(Opc, dl, MVT::v8f16, Op1.getOperand(0));
+ return Cvt = DAG.getVectorShuffle(MVT::v8f16, dl, Cvt0, Cvt1, Mask);
+ }
return SDValue();
+ }
- SDLoc dl(N);
+ if (NumElts == 1 || !isPowerOf2_32(NumElts))
+ return SDValue();
// Widen to at least 4 input elements.
if (NumElts < 4)
@@ -56261,9 +58477,8 @@ static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG,
DAG.getConstantFP(0.0, dl, SrcVT));
// Destination is v8i16 with at least 8 elements.
- EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
- std::max(8U, NumElts));
- SDValue Cvt, Chain;
+ EVT CvtVT =
+ EVT::getVectorVT(*DAG.getContext(), MVT::i16, std::max(8U, NumElts));
SDValue Rnd = DAG.getTargetConstant(4, dl, MVT::i32);
if (IsStrict) {
Cvt = DAG.getNode(X86ISD::STRICT_CVTPS2PH, dl, {CvtVT, MVT::Other},
@@ -56477,6 +58692,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FMADDSUB:
case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, DCI);
case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget);
+ case X86ISD::TESTP: return combineTESTP(N, DAG, DCI, Subtarget);
case X86ISD::MGATHER:
case X86ISD::MSCATTER:
return combineX86GatherScatter(N, DAG, DCI, Subtarget);
@@ -56504,6 +58720,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
+bool X86TargetLowering::preferABDSToABSWithNSW(EVT VT) const {
+ return false;
+}
+
bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
if (!isTypeLegal(VT))
return false;
@@ -56566,6 +58786,29 @@ SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG);
}
+TargetLowering::AndOrSETCCFoldKind
+X86TargetLowering::isDesirableToCombineLogicOpOfSETCC(
+ const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const {
+ using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind;
+ EVT VT = LogicOp->getValueType(0);
+ EVT OpVT = SETCC0->getOperand(0).getValueType();
+ if (!VT.isInteger())
+ return AndOrSETCCFoldKind::None;
+
+ if (VT.isVector())
+ return AndOrSETCCFoldKind(AndOrSETCCFoldKind::NotAnd |
+ (isOperationLegal(ISD::ABS, OpVT)
+ ? AndOrSETCCFoldKind::ABS
+ : AndOrSETCCFoldKind::None));
+
+ // Don't use `NotAnd` as even though `not` is generally shorter code size than
+ // `add`, `add` can lower to LEA which can save moves / spills. Any case where
+ // `NotAnd` applies, `AddAnd` does as well.
+ // TODO: Currently we lower (icmp eq/ne (and ~X, Y), 0) -> `test (not X), Y`,
+ // if we change that to `andn Y, X` it may be worth prefering `NotAnd` here.
+ return AndOrSETCCFoldKind::AddAnd;
+}
+
bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
EVT VT = Op.getValueType();
bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL &&
@@ -57023,7 +59266,7 @@ LowerXConstraint(EVT ConstraintVT) const {
// Lower @cc targets via setcc.
SDValue X86TargetLowering::LowerAsmOutputForConstraint(
- SDValue &Chain, SDValue &Flag, const SDLoc &DL,
+ SDValue &Chain, SDValue &Glue, const SDLoc &DL,
const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
X86::CondCode Cond = parseConstraintCode(OpInfo.ConstraintCode);
if (Cond == X86::COND_INVALID)
@@ -57031,16 +59274,16 @@ SDValue X86TargetLowering::LowerAsmOutputForConstraint(
// Check that return type is valid.
if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
OpInfo.ConstraintVT.getSizeInBits() < 8)
- report_fatal_error("Flag output operand is of invalid type");
+ report_fatal_error("Glue output operand is of invalid type");
// Get EFLAGS register. Only update chain when copyfrom is glued.
- if (Flag.getNode()) {
- Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32, Flag);
- Chain = Flag.getValue(1);
+ if (Glue.getNode()) {
+ Glue = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32, Glue);
+ Chain = Glue.getValue(1);
} else
- Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32);
+ Glue = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32);
// Extract CC code.
- SDValue CC = getSETCC(Cond, Flag, DL, DAG);
+ SDValue CC = getSETCC(Cond, Glue, DL, DAG);
// Extend to 32-bits
SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, OpInfo.ConstraintVT, CC);
@@ -57569,15 +59812,16 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (isGRClass(*Class)) {
unsigned Size = VT.getSizeInBits();
if (Size == 1) Size = 8;
- Register DestReg = getX86SubSuperRegisterOrZero(Res.first, Size);
- if (DestReg > 0) {
+ if (Size != 8 && Size != 16 && Size != 32 && Size != 64)
+ return std::make_pair(0, nullptr);
+ Register DestReg = getX86SubSuperRegister(Res.first, Size);
+ if (DestReg.isValid()) {
bool is64Bit = Subtarget.is64Bit();
const TargetRegisterClass *RC =
Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass)
: Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass)
: Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass)
- : Size == 64 ? (is64Bit ? &X86::GR64RegClass : nullptr)
- : nullptr;
+ : /*Size == 64*/ (is64Bit ? &X86::GR64RegClass : nullptr);
if (Size == 64 && !is64Bit) {
// Model GCC's behavior here and select a fixed pair of 32-bit
// registers.
@@ -57701,12 +59945,12 @@ void X86TargetLowering::insertCopiesSplitCSR(
Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) &&
"Function should be nounwind in insertCopiesSplitCSR!");
Entry->addLiveIn(*I);
- BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
+ BuildMI(*Entry, MBBI, MIMetadata(), TII->get(TargetOpcode::COPY), NewVR)
.addReg(*I);
// Insert the copy-back instructions right before the terminator.
for (auto *Exit : Exits)
- BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
+ BuildMI(*Exit, Exit->getFirstTerminator(), MIMetadata(),
TII->get(TargetOpcode::COPY), *I)
.addReg(NewVR);
}
@@ -57716,6 +59960,71 @@ bool X86TargetLowering::supportSwiftError() const {
return Subtarget.is64Bit();
}
+MachineInstr *
+X86TargetLowering::EmitKCFICheck(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator &MBBI,
+ const TargetInstrInfo *TII) const {
+ assert(MBBI->isCall() && MBBI->getCFIType() &&
+ "Invalid call instruction for a KCFI check");
+
+ MachineFunction &MF = *MBB.getParent();
+ // If the call target is a memory operand, unfold it and use R11 for the
+ // call, so KCFI_CHECK won't have to recompute the address.
+ switch (MBBI->getOpcode()) {
+ case X86::CALL64m:
+ case X86::CALL64m_NT:
+ case X86::TAILJMPm64:
+ case X86::TAILJMPm64_REX: {
+ MachineBasicBlock::instr_iterator OrigCall = MBBI;
+ SmallVector<MachineInstr *, 2> NewMIs;
+ if (!TII->unfoldMemoryOperand(MF, *OrigCall, X86::R11, /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false, NewMIs))
+ report_fatal_error("Failed to unfold memory operand for a KCFI check");
+ for (auto *NewMI : NewMIs)
+ MBBI = MBB.insert(OrigCall, NewMI);
+ assert(MBBI->isCall() &&
+ "Unexpected instruction after memory operand unfolding");
+ if (OrigCall->shouldUpdateCallSiteInfo())
+ MF.moveCallSiteInfo(&*OrigCall, &*MBBI);
+ MBBI->setCFIType(MF, OrigCall->getCFIType());
+ OrigCall->eraseFromParent();
+ break;
+ }
+ default:
+ break;
+ }
+
+ MachineOperand &Target = MBBI->getOperand(0);
+ Register TargetReg;
+ switch (MBBI->getOpcode()) {
+ case X86::CALL64r:
+ case X86::CALL64r_NT:
+ case X86::TAILJMPr64:
+ case X86::TAILJMPr64_REX:
+ assert(Target.isReg() && "Unexpected target operand for an indirect call");
+ Target.setIsRenamable(false);
+ TargetReg = Target.getReg();
+ break;
+ case X86::CALL64pcrel32:
+ case X86::TAILJMPd64:
+ assert(Target.isSymbol() && "Unexpected target operand for a direct call");
+ // X86TargetLowering::EmitLoweredIndirectThunk always uses r11 for
+ // 64-bit indirect thunk calls.
+ assert(StringRef(Target.getSymbolName()).endswith("_r11") &&
+ "Unexpected register for an indirect thunk call");
+ TargetReg = X86::R11;
+ break;
+ default:
+ llvm_unreachable("Unexpected CFI call opcode");
+ break;
+ }
+
+ return BuildMI(MBB, MBBI, MIMetadata(*MBBI), TII->get(X86::KCFI_CHECK))
+ .addReg(TargetReg)
+ .addImm(MBBI->getCFIType())
+ .getInstr();
+}
+
/// Returns true if stack probing through a function call is requested.
bool X86TargetLowering::hasStackProbeSymbol(const MachineFunction &MF) const {
return !getStackProbeSymbolName(MF).empty();
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index c5c115047271..250df82a30c2 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -126,9 +126,9 @@ namespace llvm {
/// operand 1 is the target address.
NT_BRIND,
- /// Return with a flag operand. Operand 0 is the chain operand, operand
+ /// Return with a glue operand. Operand 0 is the chain operand, operand
/// 1 is the number of bytes of stack to pop.
- RET_FLAG,
+ RET_GLUE,
/// Return from interrupt. Operand 0 is the number of bytes to pop.
IRET,
@@ -740,6 +740,9 @@ namespace llvm {
// User level interrupts - testui
TESTUI,
+ // Perform an FP80 add after changing precision control in FPCW.
+ FP80_ADD,
+
/// X86 strict FP compare instructions.
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
STRICT_FCMPS,
@@ -779,6 +782,9 @@ namespace llvm {
STRICT_CVTPS2PH,
STRICT_CVTPH2PS,
+ // Perform an FP80 add after changing precision control in FPCW.
+ STRICT_FP80_ADD,
+
// WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
// non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
@@ -827,6 +833,12 @@ namespace llvm {
// Load FP control word from i16 memory.
FLDCW16m,
+ // Store x87 FPU environment into memory.
+ FNSTENVm,
+
+ // Load x87 FPU environment from memory.
+ FLDENVm,
+
/// This instruction implements FP_TO_SINT with the
/// integer destination in memory and a FP reg source. This corresponds
/// to the X86::FIST*m instructions and the rounding mode change stuff. It
@@ -886,8 +898,8 @@ namespace llvm {
AESDECWIDE256KL,
/// Compare and Add if Condition is Met. Compare value in operand 2 with
- /// value in memory of operand 1. If condition of operand 4 is met, add value
- /// operand 3 to m32 and write new value in operand 1. Operand 2 is
+ /// value in memory of operand 1. If condition of operand 4 is met, add
+ /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
/// always updated with the original value from operand 1.
CMPCCXADD,
@@ -1040,6 +1052,8 @@ namespace llvm {
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
+ bool preferABDSToABSWithNSW(EVT VT) const override;
+
/// Return true if the target has native support for
/// the specified value type and it is 'desirable' to use the type for the
/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
@@ -1052,6 +1066,13 @@ namespace llvm {
/// and some i16 instructions are slow.
bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
+ /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
+ /// integer, None otherwise.
+ TargetLowering::AndOrSETCCFoldKind
+ isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
+ const SDNode *SETCC0,
+ const SDNode *SETCC1) const override;
+
/// Return the newly negated expression if the cost is not expensive and
/// set the cost in \p Cost to indicate that if it is cheaper or neutral to
/// do the negation.
@@ -1082,8 +1103,6 @@ namespace llvm {
bool isCtlzFast() const override;
- bool hasBitPreservingFPLogic(EVT VT) const override;
-
bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
// If the pair to store is a mixture of float and int values, we will
// save two bitwise instructions and one float-to-int instruction and
@@ -1113,7 +1132,7 @@ namespace llvm {
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
SelectionDAG &DAG) const override;
- bool preferScalarizeSplat(unsigned Opc) const override;
+ bool preferScalarizeSplat(SDNode *N) const override;
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
CombineLevel Level) const override;
@@ -1339,10 +1358,10 @@ namespace llvm {
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT VT) const override;
- /// Return true if it's profitable to narrow
- /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
- /// from i32 to i8 but not from i32 to i16.
- bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
+ /// Return true if it's profitable to narrow operations of type SrcVT to
+ /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
+ /// from i32 to i16.
+ bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override;
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
EVT VT) const override;
@@ -1426,11 +1445,11 @@ namespace llvm {
bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
bool MathUsed) const override;
- bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
+ bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
unsigned AddrSpace) const override {
// If we can replace more than 2 scalar stores, there will be a reduction
// in instructions even after we add a vector constant load.
- return NumElem > 2;
+ return IsZero || NumElem > 2;
}
bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
@@ -1508,6 +1527,10 @@ namespace llvm {
bool supportKCFIBundles() const override { return true; }
+ MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
+ MachineBasicBlock::instr_iterator &MBBI,
+ const TargetInstrInfo *TII) const override;
+
bool hasStackProbeSymbol(const MachineFunction &MF) const override;
bool hasInlineStackProbe(const MachineFunction &MF) const override;
StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
@@ -1564,7 +1587,7 @@ namespace llvm {
LegalFPImmediates.push_back(Imm);
}
- SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
@@ -1646,6 +1669,9 @@ namespace llvm {
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
SDValue &Chain) const;
@@ -1702,6 +1728,7 @@ namespace llvm {
LLVMContext &Context) const override;
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
+ ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
diff --git a/llvm/lib/Target/X86/X86InsertPrefetch.cpp b/llvm/lib/Target/X86/X86InsertPrefetch.cpp
index 08dc514a6476..29ae05bf0c94 100644
--- a/llvm/lib/Target/X86/X86InsertPrefetch.cpp
+++ b/llvm/lib/Target/X86/X86InsertPrefetch.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Transforms/IPO/SampleProfile.h"
using namespace llvm;
using namespace sampleprof;
@@ -159,8 +160,10 @@ bool X86InsertPrefetch::doInitialization(Module &M) {
return false;
LLVMContext &Ctx = M.getContext();
+ // TODO: Propagate virtual file system into LLVM targets.
+ auto FS = vfs::getRealFileSystem();
ErrorOr<std::unique_ptr<SampleProfileReader>> ReaderOrErr =
- SampleProfileReader::create(Filename, Ctx);
+ SampleProfileReader::create(Filename, Ctx, *FS);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();
Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg,
diff --git a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
index 3587353f7df1..e46fc034cc26 100644
--- a/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp
@@ -557,6 +557,1074 @@ static Value *simplifyX86addcarry(const IntrinsicInst &II,
return nullptr;
}
+static Value *simplifyTernarylogic(const IntrinsicInst &II,
+ InstCombiner::BuilderTy &Builder) {
+
+ auto *ArgImm = dyn_cast<ConstantInt>(II.getArgOperand(3));
+ if (!ArgImm || ArgImm->getValue().uge(256))
+ return nullptr;
+
+ Value *ArgA = II.getArgOperand(0);
+ Value *ArgB = II.getArgOperand(1);
+ Value *ArgC = II.getArgOperand(2);
+
+ Type *Ty = II.getType();
+
+ auto Or = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
+ return {Builder.CreateOr(Lhs.first, Rhs.first), Lhs.second | Rhs.second};
+ };
+ auto Xor = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
+ return {Builder.CreateXor(Lhs.first, Rhs.first), Lhs.second ^ Rhs.second};
+ };
+ auto And = [&](auto Lhs, auto Rhs) -> std::pair<Value *, uint8_t> {
+ return {Builder.CreateAnd(Lhs.first, Rhs.first), Lhs.second & Rhs.second};
+ };
+ auto Not = [&](auto V) -> std::pair<Value *, uint8_t> {
+ return {Builder.CreateNot(V.first), ~V.second};
+ };
+ auto Nor = [&](auto Lhs, auto Rhs) { return Not(Or(Lhs, Rhs)); };
+ auto Xnor = [&](auto Lhs, auto Rhs) { return Not(Xor(Lhs, Rhs)); };
+ auto Nand = [&](auto Lhs, auto Rhs) { return Not(And(Lhs, Rhs)); };
+
+ bool AIsConst = match(ArgA, PatternMatch::m_ImmConstant());
+ bool BIsConst = match(ArgB, PatternMatch::m_ImmConstant());
+ bool CIsConst = match(ArgC, PatternMatch::m_ImmConstant());
+
+ bool ABIsConst = AIsConst && BIsConst;
+ bool ACIsConst = AIsConst && CIsConst;
+ bool BCIsConst = BIsConst && CIsConst;
+ bool ABCIsConst = AIsConst && BIsConst && CIsConst;
+
+ // Use for verification. Its a big table. Its difficult to go from Imm ->
+ // logic ops, but easy to verify that a set of logic ops is correct. We track
+ // the logic ops through the second value in the pair. At the end it should
+ // equal Imm.
+ std::pair<Value *, uint8_t> A = {ArgA, 0xf0};
+ std::pair<Value *, uint8_t> B = {ArgB, 0xcc};
+ std::pair<Value *, uint8_t> C = {ArgC, 0xaa};
+ std::pair<Value *, uint8_t> Res = {nullptr, 0};
+
+ // Currently we only handle cases that convert directly to another instruction
+ // or cases where all the ops are constant. This is because we don't properly
+ // handle creating ternary ops in the backend, so splitting them here may
+ // cause regressions. As the backend improves, uncomment more cases.
+
+ uint8_t Imm = ArgImm->getValue().getZExtValue();
+ switch (Imm) {
+ case 0x0:
+ Res = {Constant::getNullValue(Ty), 0};
+ break;
+ case 0x1:
+ if (ABCIsConst)
+ Res = Nor(Or(A, B), C);
+ break;
+ case 0x2:
+ if (ABCIsConst)
+ Res = And(Nor(A, B), C);
+ break;
+ case 0x3:
+ if (ABIsConst)
+ Res = Nor(A, B);
+ break;
+ case 0x4:
+ if (ABCIsConst)
+ Res = And(Nor(A, C), B);
+ break;
+ case 0x5:
+ if (ACIsConst)
+ Res = Nor(A, C);
+ break;
+ case 0x6:
+ if (ABCIsConst)
+ Res = Nor(A, Xnor(B, C));
+ break;
+ case 0x7:
+ if (ABCIsConst)
+ Res = Nor(A, And(B, C));
+ break;
+ case 0x8:
+ if (ABCIsConst)
+ Res = Nor(A, Nand(B, C));
+ break;
+ case 0x9:
+ if (ABCIsConst)
+ Res = Nor(A, Xor(B, C));
+ break;
+ case 0xa:
+ if (ACIsConst)
+ Res = Nor(A, Not(C));
+ break;
+ case 0xb:
+ if (ABCIsConst)
+ Res = Nor(A, Nor(C, Not(B)));
+ break;
+ case 0xc:
+ if (ABIsConst)
+ Res = Nor(A, Not(B));
+ break;
+ case 0xd:
+ if (ABCIsConst)
+ Res = Nor(A, Nor(B, Not(C)));
+ break;
+ case 0xe:
+ if (ABCIsConst)
+ Res = Nor(A, Nor(B, C));
+ break;
+ case 0xf:
+ Res = Not(A);
+ break;
+ case 0x10:
+ if (ABCIsConst)
+ Res = And(A, Nor(B, C));
+ break;
+ case 0x11:
+ if (BCIsConst)
+ Res = Nor(B, C);
+ break;
+ case 0x12:
+ if (ABCIsConst)
+ Res = Nor(Xnor(A, C), B);
+ break;
+ case 0x13:
+ if (ABCIsConst)
+ Res = Nor(And(A, C), B);
+ break;
+ case 0x14:
+ if (ABCIsConst)
+ Res = Nor(Xnor(A, B), C);
+ break;
+ case 0x15:
+ if (ABCIsConst)
+ Res = Nor(And(A, B), C);
+ break;
+ case 0x16:
+ if (ABCIsConst)
+ Res = Xor(Xor(A, B), And(Nand(A, B), C));
+ break;
+ case 0x17:
+ if (ABCIsConst)
+ Res = Xor(Or(A, B), Or(Xnor(A, B), C));
+ break;
+ case 0x18:
+ if (ABCIsConst)
+ Res = Nor(Xnor(A, B), Xnor(A, C));
+ break;
+ case 0x19:
+ if (ABCIsConst)
+ Res = And(Nand(A, B), Xnor(B, C));
+ break;
+ case 0x1a:
+ if (ABCIsConst)
+ Res = Xor(A, Or(And(A, B), C));
+ break;
+ case 0x1b:
+ if (ABCIsConst)
+ Res = Xor(A, Or(Xnor(A, B), C));
+ break;
+ case 0x1c:
+ if (ABCIsConst)
+ Res = Xor(A, Or(And(A, C), B));
+ break;
+ case 0x1d:
+ if (ABCIsConst)
+ Res = Xor(A, Or(Xnor(A, C), B));
+ break;
+ case 0x1e:
+ if (ABCIsConst)
+ Res = Xor(A, Or(B, C));
+ break;
+ case 0x1f:
+ if (ABCIsConst)
+ Res = Nand(A, Or(B, C));
+ break;
+ case 0x20:
+ if (ABCIsConst)
+ Res = Nor(Nand(A, C), B);
+ break;
+ case 0x21:
+ if (ABCIsConst)
+ Res = Nor(Xor(A, C), B);
+ break;
+ case 0x22:
+ if (BCIsConst)
+ Res = Nor(B, Not(C));
+ break;
+ case 0x23:
+ if (ABCIsConst)
+ Res = Nor(B, Nor(C, Not(A)));
+ break;
+ case 0x24:
+ if (ABCIsConst)
+ Res = Nor(Xnor(A, B), Xor(A, C));
+ break;
+ case 0x25:
+ if (ABCIsConst)
+ Res = Xor(A, Nand(Nand(A, B), C));
+ break;
+ case 0x26:
+ if (ABCIsConst)
+ Res = And(Nand(A, B), Xor(B, C));
+ break;
+ case 0x27:
+ if (ABCIsConst)
+ Res = Xor(Or(Xnor(A, B), C), B);
+ break;
+ case 0x28:
+ if (ABCIsConst)
+ Res = And(Xor(A, B), C);
+ break;
+ case 0x29:
+ if (ABCIsConst)
+ Res = Xor(Xor(A, B), Nor(And(A, B), C));
+ break;
+ case 0x2a:
+ if (ABCIsConst)
+ Res = And(Nand(A, B), C);
+ break;
+ case 0x2b:
+ if (ABCIsConst)
+ Res = Xor(Or(Xnor(A, B), Xor(A, C)), A);
+ break;
+ case 0x2c:
+ if (ABCIsConst)
+ Res = Nor(Xnor(A, B), Nor(B, C));
+ break;
+ case 0x2d:
+ if (ABCIsConst)
+ Res = Xor(A, Or(B, Not(C)));
+ break;
+ case 0x2e:
+ if (ABCIsConst)
+ Res = Xor(A, Or(Xor(A, C), B));
+ break;
+ case 0x2f:
+ if (ABCIsConst)
+ Res = Nand(A, Or(B, Not(C)));
+ break;
+ case 0x30:
+ if (ABIsConst)
+ Res = Nor(B, Not(A));
+ break;
+ case 0x31:
+ if (ABCIsConst)
+ Res = Nor(Nor(A, Not(C)), B);
+ break;
+ case 0x32:
+ if (ABCIsConst)
+ Res = Nor(Nor(A, C), B);
+ break;
+ case 0x33:
+ Res = Not(B);
+ break;
+ case 0x34:
+ if (ABCIsConst)
+ Res = And(Xor(A, B), Nand(B, C));
+ break;
+ case 0x35:
+ if (ABCIsConst)
+ Res = Xor(B, Or(A, Xnor(B, C)));
+ break;
+ case 0x36:
+ if (ABCIsConst)
+ Res = Xor(Or(A, C), B);
+ break;
+ case 0x37:
+ if (ABCIsConst)
+ Res = Nand(Or(A, C), B);
+ break;
+ case 0x38:
+ if (ABCIsConst)
+ Res = Nor(Xnor(A, B), Nor(A, C));
+ break;
+ case 0x39:
+ if (ABCIsConst)
+ Res = Xor(Or(A, Not(C)), B);
+ break;
+ case 0x3a:
+ if (ABCIsConst)
+ Res = Xor(B, Or(A, Xor(B, C)));
+ break;
+ case 0x3b:
+ if (ABCIsConst)
+ Res = Nand(Or(A, Not(C)), B);
+ break;
+ case 0x3c:
+ Res = Xor(A, B);
+ break;
+ case 0x3d:
+ if (ABCIsConst)
+ Res = Xor(A, Or(Nor(A, C), B));
+ break;
+ case 0x3e:
+ if (ABCIsConst)
+ Res = Xor(A, Or(Nor(A, Not(C)), B));
+ break;
+ case 0x3f:
+ if (ABIsConst)
+ Res = Nand(A, B);
+ break;
+ case 0x40:
+ if (ABCIsConst)
+ Res = Nor(Nand(A, B), C);
+ break;
+ case 0x41:
+ if (ABCIsConst)
+ Res = Nor(Xor(A, B), C);
+ break;
+ case 0x42:
+ if (ABCIsConst)
+ Res = Nor(Xor(A, B), Xnor(A, C));
+ break;
+ case 0x43:
+ if (ABCIsConst)
+ Res = Xor(A, Nand(Nand(A, C), B));
+ break;
+ case 0x44:
+ if (BCIsConst)
+ Res = Nor(C, Not(B));
+ break;
+ case 0x45:
+ if (ABCIsConst)
+ Res = Nor(Nor(B, Not(A)), C);
+ break;
+ case 0x46:
+ if (ABCIsConst)
+ Res = Xor(Or(And(A, C), B), C);
+ break;
+ case 0x47:
+ if (ABCIsConst)
+ Res = Xor(Or(Xnor(A, C), B), C);
+ break;
+ case 0x48:
+ if (ABCIsConst)
+ Res = And(Xor(A, C), B);
+ break;
+ case 0x49:
+ if (ABCIsConst)
+ Res = Xor(Or(Xnor(A, B), And(A, C)), C);
+ break;
+ case 0x4a:
+ if (ABCIsConst)
+ Res = Nor(Xnor(A, C), Nor(B, C));
+ break;
+ case 0x4b:
+ if (ABCIsConst)
+ Res = Xor(A, Or(C, Not(B)));
+ break;
+ case 0x4c:
+ if (ABCIsConst)
+ Res = And(Nand(A, C), B);
+ break;
+ case 0x4d:
+ if (ABCIsConst)
+ Res = Xor(Or(Xor(A, B), Xnor(A, C)), A);
+ break;
+ case 0x4e:
+ if (ABCIsConst)
+ Res = Xor(A, Or(Xor(A, B), C));
+ break;
+ case 0x4f:
+ if (ABCIsConst)
+ Res = Nand(A, Nand(B, Not(C)));
+ break;
+ case 0x50:
+ if (ACIsConst)
+ Res = Nor(C, Not(A));
+ break;
+ case 0x51:
+ if (ABCIsConst)
+ Res = Nor(Nor(A, Not(B)), C);
+ break;
+ case 0x52:
+ if (ABCIsConst)
+ Res = And(Xor(A, C), Nand(B, C));
+ break;
+ case 0x53:
+ if (ABCIsConst)
+ Res = Xor(Or(Xnor(B, C), A), C);
+ break;
+ case 0x54:
+ if (ABCIsConst)
+ Res = Nor(Nor(A, B), C);
+ break;
+ case 0x55:
+ Res = Not(C);
+ break;
+ case 0x56:
+ if (ABCIsConst)
+ Res = Xor(Or(A, B), C);
+ break;
+ case 0x57:
+ if (ABCIsConst)
+ Res = Nand(Or(A, B), C);
+ break;
+ case 0x58:
+ if (ABCIsConst)
+ Res = Nor(Nor(A, B), Xnor(A, C));
+ break;
+ case 0x59:
+ if (ABCIsConst)
+ Res = Xor(Or(A, Not(B)), C);
+ break;
+ case 0x5a:
+ Res = Xor(A, C);
+ break;
+ case 0x5b:
+ if (ABCIsConst)
+ Res = Xor(A, Or(Nor(A, B), C));
+ break;
+ case 0x5c:
+ if (ABCIsConst)
+ Res = Xor(Or(Xor(B, C), A), C);
+ break;
+ case 0x5d:
+ if (ABCIsConst)
+ Res = Nand(Or(A, Not(B)), C);
+ break;
+ case 0x5e:
+ if (ABCIsConst)
+ Res = Xor(A, Or(Nor(A, Not(B)), C));
+ break;
+ case 0x5f:
+ if (ACIsConst)
+ Res = Nand(A, C);
+ break;
+ case 0x60:
+ if (ABCIsConst)
+ Res = And(A, Xor(B, C));
+ break;
+ case 0x61:
+ if (ABCIsConst)
+ Res = Xor(Or(Xnor(A, B), And(B, C)), C);
+ break;
+ case 0x62:
+ if (ABCIsConst)
+ Res = Nor(Nor(A, C), Xnor(B, C));
+ break;
+ case 0x63:
+ if (ABCIsConst)
+ Res = Xor(B, Or(C, Not(A)));
+ break;
+ case 0x64:
+ if (ABCIsConst)
+ Res = Nor(Nor(A, B), Xnor(B, C));
+ break;
+ case 0x65:
+ if (ABCIsConst)
+ Res = Xor(Or(B, Not(A)), C);
+ break;
+ case 0x66:
+ Res = Xor(B, C);
+ break;
+ case 0x67:
+ if (ABCIsConst)
+ Res = Or(Nor(A, B), Xor(B, C));
+ break;
+ case 0x68:
+ if (ABCIsConst)
+ Res = Xor(Xor(A, B), Nor(Nor(A, B), C));
+ break;
+ case 0x69:
+ if (ABCIsConst)
+ Res = Xor(Xnor(A, B), C);
+ break;
+ case 0x6a:
+ if (ABCIsConst)
+ Res = Xor(And(A, B), C);
+ break;
+ case 0x6b:
+ if (ABCIsConst)
+ Res = Or(Nor(A, B), Xor(Xnor(A, B), C));
+ break;
+ case 0x6c:
+ if (ABCIsConst)
+ Res = Xor(And(A, C), B);
+ break;
+ case 0x6d:
+ if (ABCIsConst)
+ Res = Xor(Or(Xnor(A, B), Nor(A, C)), C);
+ break;
+ case 0x6e:
+ if (ABCIsConst)
+ Res = Or(Nor(A, Not(B)), Xor(B, C));
+ break;
+ case 0x6f:
+ if (ABCIsConst)
+ Res = Nand(A, Xnor(B, C));
+ break;
+ case 0x70:
+ if (ABCIsConst)
+ Res = And(A, Nand(B, C));
+ break;
+ case 0x71:
+ if (ABCIsConst)
+ Res = Xor(Nor(Xor(A, B), Xor(A, C)), A);
+ break;
+ case 0x72:
+ if (ABCIsConst)
+ Res = Xor(Or(Xor(A, B), C), B);
+ break;
+ case 0x73:
+ if (ABCIsConst)
+ Res = Nand(Nand(A, Not(C)), B);
+ break;
+ case 0x74:
+ if (ABCIsConst)
+ Res = Xor(Or(Xor(A, C), B), C);
+ break;
+ case 0x75:
+ if (ABCIsConst)
+ Res = Nand(Nand(A, Not(B)), C);
+ break;
+ case 0x76:
+ if (ABCIsConst)
+ Res = Xor(B, Or(Nor(B, Not(A)), C));
+ break;
+ case 0x77:
+ if (BCIsConst)
+ Res = Nand(B, C);
+ break;
+ case 0x78:
+ if (ABCIsConst)
+ Res = Xor(A, And(B, C));
+ break;
+ case 0x79:
+ if (ABCIsConst)
+ Res = Xor(Or(Xnor(A, B), Nor(B, C)), C);
+ break;
+ case 0x7a:
+ if (ABCIsConst)
+ Res = Or(Xor(A, C), Nor(B, Not(A)));
+ break;
+ case 0x7b:
+ if (ABCIsConst)
+ Res = Nand(Xnor(A, C), B);
+ break;
+ case 0x7c:
+ if (ABCIsConst)
+ Res = Or(Xor(A, B), Nor(C, Not(A)));
+ break;
+ case 0x7d:
+ if (ABCIsConst)
+ Res = Nand(Xnor(A, B), C);
+ break;
+ case 0x7e:
+ if (ABCIsConst)
+ Res = Or(Xor(A, B), Xor(A, C));
+ break;
+ case 0x7f:
+ if (ABCIsConst)
+ Res = Nand(And(A, B), C);
+ break;
+ case 0x80:
+ if (ABCIsConst)
+ Res = And(And(A, B), C);
+ break;
+ case 0x81:
+ if (ABCIsConst)
+ Res = Nor(Xor(A, B), Xor(A, C));
+ break;
+ case 0x82:
+ if (ABCIsConst)
+ Res = And(Xnor(A, B), C);
+ break;
+ case 0x83:
+ if (ABCIsConst)
+ Res = Nor(Xor(A, B), Nor(C, Not(A)));
+ break;
+ case 0x84:
+ if (ABCIsConst)
+ Res = And(Xnor(A, C), B);
+ break;
+ case 0x85:
+ if (ABCIsConst)
+ Res = Nor(Xor(A, C), Nor(B, Not(A)));
+ break;
+ case 0x86:
+ if (ABCIsConst)
+ Res = Xor(Nor(Xnor(A, B), Nor(B, C)), C);
+ break;
+ case 0x87:
+ if (ABCIsConst)
+ Res = Xor(A, Nand(B, C));
+ break;
+ case 0x88:
+ Res = And(B, C);
+ break;
+ case 0x89:
+ if (ABCIsConst)
+ Res = Xor(B, Nor(Nor(B, Not(A)), C));
+ break;
+ case 0x8a:
+ if (ABCIsConst)
+ Res = And(Nand(A, Not(B)), C);
+ break;
+ case 0x8b:
+ if (ABCIsConst)
+ Res = Xor(Nor(Xor(A, C), B), C);
+ break;
+ case 0x8c:
+ if (ABCIsConst)
+ Res = And(Nand(A, Not(C)), B);
+ break;
+ case 0x8d:
+ if (ABCIsConst)
+ Res = Xor(Nor(Xor(A, B), C), B);
+ break;
+ case 0x8e:
+ if (ABCIsConst)
+ Res = Xor(Or(Xor(A, B), Xor(A, C)), A);
+ break;
+ case 0x8f:
+ if (ABCIsConst)
+ Res = Nand(A, Nand(B, C));
+ break;
+ case 0x90:
+ if (ABCIsConst)
+ Res = And(A, Xnor(B, C));
+ break;
+ case 0x91:
+ if (ABCIsConst)
+ Res = Nor(Nor(A, Not(B)), Xor(B, C));
+ break;
+ case 0x92:
+ if (ABCIsConst)
+ Res = Xor(Nor(Xnor(A, B), Nor(A, C)), C);
+ break;
+ case 0x93:
+ if (ABCIsConst)
+ Res = Xor(Nand(A, C), B);
+ break;
+ case 0x94:
+ if (ABCIsConst)
+ Res = Nor(Nor(A, B), Xor(Xnor(A, B), C));
+ break;
+ case 0x95:
+ if (ABCIsConst)
+ Res = Xor(Nand(A, B), C);
+ break;
+ case 0x96:
+ if (ABCIsConst)
+ Res = Xor(Xor(A, B), C);
+ break;
+ case 0x97:
+ if (ABCIsConst)
+ Res = Xor(Xor(A, B), Or(Nor(A, B), C));
+ break;
+ case 0x98:
+ if (ABCIsConst)
+ Res = Nor(Nor(A, B), Xor(B, C));
+ break;
+ case 0x99:
+ if (BCIsConst)
+ Res = Xnor(B, C);
+ break;
+ case 0x9a:
+ if (ABCIsConst)
+ Res = Xor(Nor(B, Not(A)), C);
+ break;
+ case 0x9b:
+ if (ABCIsConst)
+ Res = Or(Nor(A, B), Xnor(B, C));
+ break;
+ case 0x9c:
+ if (ABCIsConst)
+ Res = Xor(B, Nor(C, Not(A)));
+ break;
+ case 0x9d:
+ if (ABCIsConst)
+ Res = Or(Nor(A, C), Xnor(B, C));
+ break;
+ case 0x9e:
+ if (ABCIsConst)
+ Res = Xor(And(Xor(A, B), Nand(B, C)), C);
+ break;
+ case 0x9f:
+ if (ABCIsConst)
+ Res = Nand(A, Xor(B, C));
+ break;
+ case 0xa0:
+ Res = And(A, C);
+ break;
+ case 0xa1:
+ if (ABCIsConst)
+ Res = Xor(A, Nor(Nor(A, Not(B)), C));
+ break;
+ case 0xa2:
+ if (ABCIsConst)
+ Res = And(Or(A, Not(B)), C);
+ break;
+ case 0xa3:
+ if (ABCIsConst)
+ Res = Xor(Nor(Xor(B, C), A), C);
+ break;
+ case 0xa4:
+ if (ABCIsConst)
+ Res = Xor(A, Nor(Nor(A, B), C));
+ break;
+ case 0xa5:
+ if (ACIsConst)
+ Res = Xnor(A, C);
+ break;
+ case 0xa6:
+ if (ABCIsConst)
+ Res = Xor(Nor(A, Not(B)), C);
+ break;
+ case 0xa7:
+ if (ABCIsConst)
+ Res = Or(Nor(A, B), Xnor(A, C));
+ break;
+ case 0xa8:
+ if (ABCIsConst)
+ Res = And(Or(A, B), C);
+ break;
+ case 0xa9:
+ if (ABCIsConst)
+ Res = Xor(Nor(A, B), C);
+ break;
+ case 0xaa:
+ Res = C;
+ break;
+ case 0xab:
+ if (ABCIsConst)
+ Res = Or(Nor(A, B), C);
+ break;
+ case 0xac:
+ if (ABCIsConst)
+ Res = Xor(Nor(Xnor(B, C), A), C);
+ break;
+ case 0xad:
+ if (ABCIsConst)
+ Res = Or(Xnor(A, C), And(B, C));
+ break;
+ case 0xae:
+ if (ABCIsConst)
+ Res = Or(Nor(A, Not(B)), C);
+ break;
+ case 0xaf:
+ if (ACIsConst)
+ Res = Or(C, Not(A));
+ break;
+ case 0xb0:
+ if (ABCIsConst)
+ Res = And(A, Nand(B, Not(C)));
+ break;
+ case 0xb1:
+ if (ABCIsConst)
+ Res = Xor(A, Nor(Xor(A, B), C));
+ break;
+ case 0xb2:
+ if (ABCIsConst)
+ Res = Xor(Nor(Xor(A, B), Xnor(A, C)), A);
+ break;
+ case 0xb3:
+ if (ABCIsConst)
+ Res = Nand(Nand(A, C), B);
+ break;
+ case 0xb4:
+ if (ABCIsConst)
+ Res = Xor(A, Nor(C, Not(B)));
+ break;
+ case 0xb5:
+ if (ABCIsConst)
+ Res = Or(Xnor(A, C), Nor(B, C));
+ break;
+ case 0xb6:
+ if (ABCIsConst)
+ Res = Xor(And(Xor(A, B), Nand(A, C)), C);
+ break;
+ case 0xb7:
+ if (ABCIsConst)
+ Res = Nand(Xor(A, C), B);
+ break;
+ case 0xb8:
+ if (ABCIsConst)
+ Res = Xor(Nor(Xnor(A, C), B), C);
+ break;
+ case 0xb9:
+ if (ABCIsConst)
+ Res = Xor(Nor(And(A, C), B), C);
+ break;
+ case 0xba:
+ if (ABCIsConst)
+ Res = Or(Nor(B, Not(A)), C);
+ break;
+ case 0xbb:
+ if (BCIsConst)
+ Res = Or(C, Not(B));
+ break;
+ case 0xbc:
+ if (ABCIsConst)
+ Res = Xor(A, And(Nand(A, C), B));
+ break;
+ case 0xbd:
+ if (ABCIsConst)
+ Res = Or(Xor(A, B), Xnor(A, C));
+ break;
+ case 0xbe:
+ if (ABCIsConst)
+ Res = Or(Xor(A, B), C);
+ break;
+ case 0xbf:
+ if (ABCIsConst)
+ Res = Or(Nand(A, B), C);
+ break;
+ case 0xc0:
+ Res = And(A, B);
+ break;
+ case 0xc1:
+ if (ABCIsConst)
+ Res = Xor(A, Nor(Nor(A, Not(C)), B));
+ break;
+ case 0xc2:
+ if (ABCIsConst)
+ Res = Xor(A, Nor(Nor(A, C), B));
+ break;
+ case 0xc3:
+ if (ABIsConst)
+ Res = Xnor(A, B);
+ break;
+ case 0xc4:
+ if (ABCIsConst)
+ Res = And(Or(A, Not(C)), B);
+ break;
+ case 0xc5:
+ if (ABCIsConst)
+ Res = Xor(B, Nor(A, Xor(B, C)));
+ break;
+ case 0xc6:
+ if (ABCIsConst)
+ Res = Xor(Nor(A, Not(C)), B);
+ break;
+ case 0xc7:
+ if (ABCIsConst)
+ Res = Or(Xnor(A, B), Nor(A, C));
+ break;
+ case 0xc8:
+ if (ABCIsConst)
+ Res = And(Or(A, C), B);
+ break;
+ case 0xc9:
+ if (ABCIsConst)
+ Res = Xor(Nor(A, C), B);
+ break;
+ case 0xca:
+ if (ABCIsConst)
+ Res = Xor(B, Nor(A, Xnor(B, C)));
+ break;
+ case 0xcb:
+ if (ABCIsConst)
+ Res = Or(Xnor(A, B), And(B, C));
+ break;
+ case 0xcc:
+ Res = B;
+ break;
+ case 0xcd:
+ if (ABCIsConst)
+ Res = Or(Nor(A, C), B);
+ break;
+ case 0xce:
+ if (ABCIsConst)
+ Res = Or(Nor(A, Not(C)), B);
+ break;
+ case 0xcf:
+ if (ABIsConst)
+ Res = Or(B, Not(A));
+ break;
+ case 0xd0:
+ if (ABCIsConst)
+ Res = And(A, Or(B, Not(C)));
+ break;
+ case 0xd1:
+ if (ABCIsConst)
+ Res = Xor(A, Nor(Xor(A, C), B));
+ break;
+ case 0xd2:
+ if (ABCIsConst)
+ Res = Xor(A, Nor(B, Not(C)));
+ break;
+ case 0xd3:
+ if (ABCIsConst)
+ Res = Or(Xnor(A, B), Nor(B, C));
+ break;
+ case 0xd4:
+ if (ABCIsConst)
+ Res = Xor(Nor(Xnor(A, B), Xor(A, C)), A);
+ break;
+ case 0xd5:
+ if (ABCIsConst)
+ Res = Nand(Nand(A, B), C);
+ break;
+ case 0xd6:
+ if (ABCIsConst)
+ Res = Xor(Xor(A, B), Or(And(A, B), C));
+ break;
+ case 0xd7:
+ if (ABCIsConst)
+ Res = Nand(Xor(A, B), C);
+ break;
+ case 0xd8:
+ if (ABCIsConst)
+ Res = Xor(Nor(Xnor(A, B), C), B);
+ break;
+ case 0xd9:
+ if (ABCIsConst)
+ Res = Or(And(A, B), Xnor(B, C));
+ break;
+ case 0xda:
+ if (ABCIsConst)
+ Res = Xor(A, And(Nand(A, B), C));
+ break;
+ case 0xdb:
+ if (ABCIsConst)
+ Res = Or(Xnor(A, B), Xor(A, C));
+ break;
+ case 0xdc:
+ if (ABCIsConst)
+ Res = Or(B, Nor(C, Not(A)));
+ break;
+ case 0xdd:
+ if (BCIsConst)
+ Res = Or(B, Not(C));
+ break;
+ case 0xde:
+ if (ABCIsConst)
+ Res = Or(Xor(A, C), B);
+ break;
+ case 0xdf:
+ if (ABCIsConst)
+ Res = Or(Nand(A, C), B);
+ break;
+ case 0xe0:
+ if (ABCIsConst)
+ Res = And(A, Or(B, C));
+ break;
+ case 0xe1:
+ if (ABCIsConst)
+ Res = Xor(A, Nor(B, C));
+ break;
+ case 0xe2:
+ if (ABCIsConst)
+ Res = Xor(A, Nor(Xnor(A, C), B));
+ break;
+ case 0xe3:
+ if (ABCIsConst)
+ Res = Xor(A, Nor(And(A, C), B));
+ break;
+ case 0xe4:
+ if (ABCIsConst)
+ Res = Xor(A, Nor(Xnor(A, B), C));
+ break;
+ case 0xe5:
+ if (ABCIsConst)
+ Res = Xor(A, Nor(And(A, B), C));
+ break;
+ case 0xe6:
+ if (ABCIsConst)
+ Res = Or(And(A, B), Xor(B, C));
+ break;
+ case 0xe7:
+ if (ABCIsConst)
+ Res = Or(Xnor(A, B), Xnor(A, C));
+ break;
+ case 0xe8:
+ if (ABCIsConst)
+ Res = Xor(Or(A, B), Nor(Xnor(A, B), C));
+ break;
+ case 0xe9:
+ if (ABCIsConst)
+ Res = Xor(Xor(A, B), Nand(Nand(A, B), C));
+ break;
+ case 0xea:
+ if (ABCIsConst)
+ Res = Or(And(A, B), C);
+ break;
+ case 0xeb:
+ if (ABCIsConst)
+ Res = Or(Xnor(A, B), C);
+ break;
+ case 0xec:
+ if (ABCIsConst)
+ Res = Or(And(A, C), B);
+ break;
+ case 0xed:
+ if (ABCIsConst)
+ Res = Or(Xnor(A, C), B);
+ break;
+ case 0xee:
+ Res = Or(B, C);
+ break;
+ case 0xef:
+ if (ABCIsConst)
+ Res = Nand(A, Nor(B, C));
+ break;
+ case 0xf0:
+ Res = A;
+ break;
+ case 0xf1:
+ if (ABCIsConst)
+ Res = Or(A, Nor(B, C));
+ break;
+ case 0xf2:
+ if (ABCIsConst)
+ Res = Or(A, Nor(B, Not(C)));
+ break;
+ case 0xf3:
+ if (ABIsConst)
+ Res = Or(A, Not(B));
+ break;
+ case 0xf4:
+ if (ABCIsConst)
+ Res = Or(A, Nor(C, Not(B)));
+ break;
+ case 0xf5:
+ if (ACIsConst)
+ Res = Or(A, Not(C));
+ break;
+ case 0xf6:
+ if (ABCIsConst)
+ Res = Or(A, Xor(B, C));
+ break;
+ case 0xf7:
+ if (ABCIsConst)
+ Res = Or(A, Nand(B, C));
+ break;
+ case 0xf8:
+ if (ABCIsConst)
+ Res = Or(A, And(B, C));
+ break;
+ case 0xf9:
+ if (ABCIsConst)
+ Res = Or(A, Xnor(B, C));
+ break;
+ case 0xfa:
+ Res = Or(A, C);
+ break;
+ case 0xfb:
+ if (ABCIsConst)
+ Res = Nand(Nor(A, C), B);
+ break;
+ case 0xfc:
+ Res = Or(A, B);
+ break;
+ case 0xfd:
+ if (ABCIsConst)
+ Res = Nand(Nor(A, B), C);
+ break;
+ case 0xfe:
+ if (ABCIsConst)
+ Res = Or(Or(A, B), C);
+ break;
+ case 0xff:
+ Res = {Constant::getAllOnesValue(Ty), 0xff};
+ break;
+ }
+
+ assert((Res.first == nullptr || Res.second == Imm) &&
+ "Simplification of ternary logic does not verify!");
+ return Res.first;
+}
+
static Value *simplifyX86insertps(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
auto *CInt = dyn_cast<ConstantInt>(II.getArgOperand(2));
@@ -1728,6 +2796,16 @@ X86TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
}
break;
+ case Intrinsic::x86_avx512_pternlog_d_128:
+ case Intrinsic::x86_avx512_pternlog_d_256:
+ case Intrinsic::x86_avx512_pternlog_d_512:
+ case Intrinsic::x86_avx512_pternlog_q_128:
+ case Intrinsic::x86_avx512_pternlog_q_256:
+ case Intrinsic::x86_avx512_pternlog_q_512:
+ if (Value *V = simplifyTernarylogic(II, IC.Builder)) {
+ return IC.replaceInstUsesWith(II, V);
+ }
+ break;
default:
break;
}
diff --git a/llvm/lib/Target/X86/X86InstrAMX.td b/llvm/lib/Target/X86/X86InstrAMX.td
index f47a06869796..2dbb3e5ee316 100644
--- a/llvm/lib/Target/X86/X86InstrAMX.td
+++ b/llvm/lib/Target/X86/X86InstrAMX.td
@@ -215,3 +215,45 @@ let Predicates = [HasAMXFP16, In64BitMode] in {
}
}
} // HasAMXTILE, HasAMXFP16
+
+let Predicates = [HasAMXCOMPLEX, In64BitMode] in {
+ let SchedRW = [WriteSystem] in {
+ let Constraints = "$src1 = $dst" in {
+ def TCMMIMFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
+ (ins TILE:$src1, TILE:$src2, TILE:$src3),
+ "tcmmimfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+ []>, T8PD, VEX_4V;
+ def TCMMRLFP16PS : I<0x6c, MRMSrcReg4VOp3, (outs TILE:$dst),
+ (ins TILE:$src1, TILE:$src2, TILE:$src3),
+ "tcmmrlfp16ps\t{$src3, $src2, $src1|$src1, $src2, $src3}",
+ []>, VEX_4V, WIG, T8PS;
+
+ } // Constraints = "$src1 = $dst"
+
+ let Constraints = "$src4 = $dst" in {
+ def PTCMMIMFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
+ GR16:$src2, GR16:$src3, TILE:$src4,
+ TILE:$src5, TILE:$src6),
+ [(set TILE: $dst,
+ (int_x86_tcmmimfp16ps_internal GR16:$src1, GR16:$src2,
+ GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
+ def PTCMMRLFP16PSV : PseudoI<(outs TILE:$dst), (ins GR16:$src1,
+ GR16:$src2, GR16:$src3, TILE:$src4,
+ TILE:$src5, TILE:$src6),
+ [(set TILE: $dst,
+ (int_x86_tcmmrlfp16ps_internal GR16:$src1, GR16:$src2,
+ GR16:$src3, TILE:$src4, TILE:$src5, TILE:$src6))]>;
+ }
+
+ let usesCustomInserter = 1 in {
+ def PTCMMIMFP16PS : PseudoI<(outs), (ins u8imm:$src1,
+ u8imm:$src2, u8imm:$src3),
+ [(int_x86_tcmmimfp16ps timm:$src1,
+ timm:$src2, timm:$src3)]>;
+ def PTCMMRLFP16PS : PseudoI<(outs), (ins u8imm:$src1,
+ u8imm:$src2, u8imm:$src3),
+ [(int_x86_tcmmrlfp16ps timm:$src1,
+ timm:$src2, timm:$src3)]>;
+ }
+ } // SchedRW = [WriteSystem]
+}
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 6da4dd2b942c..ecb5c3e91240 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -628,7 +628,7 @@ multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
X86VectorVTInfo< 4, EltVT64, VR256X>,
X86VectorVTInfo< 8, EltVT64, VR512>,
- vinsert256_insert, sched>, VEX_W, EVEX_V512;
+ vinsert256_insert, sched>, REX_W, EVEX_V512;
// Even with DQI we'd like to only use these instructions for masking.
let Predicates = [HasVLX, HasDQI] in
@@ -644,7 +644,7 @@ multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
X86VectorVTInfo< 2, EltVT64, VR128X>,
X86VectorVTInfo< 8, EltVT64, VR512>,
null_frag, vinsert128_insert, sched>,
- VEX_W, EVEX_V512;
+ REX_W, EVEX_V512;
defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
X86VectorVTInfo< 8, EltVT32, VR256X>,
@@ -883,7 +883,7 @@ multiclass vextract_for_size_split<int Opcode,
"vextract" # To.EltTypeName # "x" # To.NumElts #
"\t{$idx, $src1, $dst {${mask}}|"
"$dst {${mask}}, $src1, $idx}", []>,
- EVEX_K, EVEX, Sched<[SchedMR]>, NotMemoryFoldable;
+ EVEX_K, EVEX, Sched<[SchedMR]>;
}
}
@@ -923,7 +923,7 @@ multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
X86VectorVTInfo< 8, EltVT64, VR512>,
X86VectorVTInfo< 4, EltVT64, VR256X>,
vextract256_extract, SchedRR, SchedMR>,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
+ REX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
}
let Predicates = [HasVLX] in
defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
@@ -946,7 +946,7 @@ multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
X86VectorVTInfo< 8, EltVT64, VR512>,
X86VectorVTInfo< 2, EltVT64, VR128X>,
null_frag, vextract128_extract, SchedRR, SchedMR>,
- VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
+ REX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
X86VectorVTInfo<16, EltVT32, VR512>,
X86VectorVTInfo< 8, EltVT32, VR256X>,
@@ -1178,14 +1178,14 @@ def VEXTRACTPSZrr : AVX512AIi8<0x17, MRMDestReg, (outs GR32orGR64:$dst),
(ins VR128X:$src1, u8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2))]>,
- EVEX, VEX_WIG, Sched<[WriteVecExtract]>;
+ EVEX, WIG, Sched<[WriteVecExtract]>;
def VEXTRACTPSZmr : AVX512AIi8<0x17, MRMDestMem, (outs),
(ins f32mem:$dst, VR128X:$src1, u8imm:$src2),
"vextractps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(store (extractelt (bc_v4i32 (v4f32 VR128X:$src1)), imm:$src2),
addr:$dst)]>,
- EVEX, VEX_WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
+ EVEX, WIG, EVEX_CD8<32, CD8VT1>, Sched<[WriteVecExtractSt]>;
//===---------------------------------------------------------------------===//
// AVX-512 BROADCAST
@@ -1425,7 +1425,7 @@ defm VPBROADCASTWr : avx512_int_broadcastbw_reg_vl<0x7B, "VPBROADCASTWr",
defm VPBROADCASTDr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i32_info,
X86VBroadcast, GR32, HasAVX512>;
defm VPBROADCASTQr : avx512_int_broadcast_reg_vl<0x7C, avx512vl_i64_info,
- X86VBroadcast, GR64, HasAVX512>, VEX_W;
+ X86VBroadcast, GR64, HasAVX512>, REX_W;
multiclass avx512_int_broadcast_rm_vl<bits<8> opc, string OpcodeStr,
AVX512VLVectorVTInfo _, Predicate prd,
@@ -1520,10 +1520,10 @@ defm VBROADCASTF32X4 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
X86SubVBroadcastld128, v16f32_info, v4f32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT4>;
defm VBROADCASTI64X4 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti64x4",
- X86SubVBroadcastld256, v8i64_info, v4i64x_info>, VEX_W,
+ X86SubVBroadcastld256, v8i64_info, v4i64x_info>, REX_W,
EVEX_V512, EVEX_CD8<64, CD8VT4>;
defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
- X86SubVBroadcastld256, v8f64_info, v4f64x_info>, VEX_W,
+ X86SubVBroadcastld256, v8f64_info, v4f64x_info>, REX_W,
EVEX_V512, EVEX_CD8<64, CD8VT4>;
let Predicates = [HasAVX512] in {
@@ -1664,13 +1664,13 @@ def : Pat<(vselect_mask VK4WM:$mask,
let Predicates = [HasDQI] in {
defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
- X86SubVBroadcastld128, v8i64_info, v2i64x_info>, VEX_W,
+ X86SubVBroadcastld128, v8i64_info, v2i64x_info>, REX_W,
EVEX_V512, EVEX_CD8<64, CD8VT2>;
defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
X86SubVBroadcastld256, v16i32_info, v8i32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT8>;
defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
- X86SubVBroadcastld128, v8f64_info, v2f64x_info>, VEX_W,
+ X86SubVBroadcastld128, v8f64_info, v2f64x_info>, REX_W,
EVEX_V512, EVEX_CD8<64, CD8VT2>;
defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
X86SubVBroadcastld256, v16f32_info, v8f32x_info>,
@@ -1768,7 +1768,7 @@ multiclass avx512_mask_broadcast<bits<8> opc, string OpcodeStr,
defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d",
avx512vl_i32_info, VK16>;
defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
- avx512vl_i64_info, VK8>, VEX_W;
+ avx512vl_i64_info, VK8>, REX_W;
//===----------------------------------------------------------------------===//
// -- VPERMI2 - 3 source operands form --
@@ -1847,17 +1847,17 @@ multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
defm VPERMI2D : avx512_perm_i_sizes<0x76, "vpermi2d", WriteVarShuffle256,
avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2Q : avx512_perm_i_sizes<0x76, "vpermi2q", WriteVarShuffle256,
- avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+ avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
defm VPERMI2W : avx512_perm_i_sizes_bw<0x75, "vpermi2w", WriteVarShuffle256,
avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
- VEX_W, EVEX_CD8<16, CD8VF>;
+ REX_W, EVEX_CD8<16, CD8VF>;
defm VPERMI2B : avx512_perm_i_sizes_bw<0x75, "vpermi2b", WriteVarShuffle256,
avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
EVEX_CD8<8, CD8VF>;
defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", WriteFVarShuffle256,
avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", WriteFVarShuffle256,
- avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+ avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
// Extra patterns to deal with extra bitcasts due to passthru and index being
// different types on the fp versions.
@@ -1965,17 +1965,17 @@ multiclass avx512_perm_t_sizes_bw<bits<8> opc, string OpcodeStr,
defm VPERMT2D : avx512_perm_t_sizes<0x7E, "vpermt2d", WriteVarShuffle256,
avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMT2Q : avx512_perm_t_sizes<0x7E, "vpermt2q", WriteVarShuffle256,
- avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+ avx512vl_i64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
defm VPERMT2W : avx512_perm_t_sizes_bw<0x7D, "vpermt2w", WriteVarShuffle256,
avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
- VEX_W, EVEX_CD8<16, CD8VF>;
+ REX_W, EVEX_CD8<16, CD8VF>;
defm VPERMT2B : avx512_perm_t_sizes_bw<0x7D, "vpermt2b", WriteVarShuffle256,
avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
EVEX_CD8<8, CD8VF>;
defm VPERMT2PS : avx512_perm_t_sizes<0x7F, "vpermt2ps", WriteFVarShuffle256,
avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VPERMT2PD : avx512_perm_t_sizes<0x7F, "vpermt2pd", WriteFVarShuffle256,
- avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+ avx512vl_f64_info, avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
//===----------------------------------------------------------------------===//
// AVX-512 - BLEND using mask
@@ -1998,7 +1998,7 @@ multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
- []>, EVEX_4V, EVEX_KZ, Sched<[sched]>, NotMemoryFoldable;
+ []>, EVEX_4V, EVEX_KZ, Sched<[sched]>;
let mayLoad = 1 in {
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2),
@@ -2017,7 +2017,7 @@ multiclass WriteFVarBlendask<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr,
"\t{$src2, $src1, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_KZ, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
}
@@ -2038,7 +2038,7 @@ multiclass WriteFVarBlendask_rmb<bits<8> opc, string OpcodeStr,
"\t{${src2}", _.BroadcastStr, ", $src1, $dst {${mask}} {z}|",
"$dst {${mask}} {z}, $src1, ${src2}", _.BroadcastStr, "}"), []>,
EVEX_4V, EVEX_KZ, EVEX_B, EVEX_CD8<_.EltSize, CD8VF>,
- Sched<[sched.Folded, sched.ReadAfterFold]>, NotMemoryFoldable;
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
def rmb : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.ScalarMemOp:$src2),
@@ -2083,15 +2083,15 @@ multiclass blendmask_bw<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched
defm VBLENDMPS : blendmask_dq<0x65, "vblendmps", SchedWriteFVarBlend,
avx512vl_f32_info>;
defm VBLENDMPD : blendmask_dq<0x65, "vblendmpd", SchedWriteFVarBlend,
- avx512vl_f64_info>, VEX_W;
+ avx512vl_f64_info>, REX_W;
defm VPBLENDMD : blendmask_dq<0x64, "vpblendmd", SchedWriteVarBlend,
avx512vl_i32_info>;
defm VPBLENDMQ : blendmask_dq<0x64, "vpblendmq", SchedWriteVarBlend,
- avx512vl_i64_info>, VEX_W;
+ avx512vl_i64_info>, REX_W;
defm VPBLENDMB : blendmask_bw<0x66, "vpblendmb", SchedWriteVarBlend,
avx512vl_i8_info>;
defm VPBLENDMW : blendmask_bw<0x66, "vpblendmw", SchedWriteVarBlend,
- avx512vl_i16_info>, VEX_W;
+ avx512vl_i16_info>, REX_W;
//===----------------------------------------------------------------------===//
// Compare Instructions
@@ -2174,7 +2174,7 @@ let Predicates = [HasAVX512] in {
let ExeDomain = SSEPackedDouble in
defm VCMPSDZ : avx512_cmp_scalar<f64x_info, X86cmpms, X86cmpmsSAE,
X86cmpms_su, X86cmpmsSAE_su,
- SchedWriteFCmp.Scl>, AVX512XDIi8Base, VEX_W;
+ SchedWriteFCmp.Scl>, AVX512XDIi8Base, REX_W;
}
let Predicates = [HasFP16], ExeDomain = SSEPackedSingle in
defm VCMPSHZ : avx512_cmp_scalar<f16x_info, X86cmpms, X86cmpmsSAE,
@@ -2273,11 +2273,11 @@ let AddedComplexity = 2 in {
// FIXME: Is there a better scheduler class for VPCMP?
defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb",
SchedWriteVecALU, avx512vl_i8_info, HasBWI, 1>,
- EVEX_CD8<8, CD8VF>, VEX_WIG;
+ EVEX_CD8<8, CD8VF>, WIG;
defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw",
SchedWriteVecALU, avx512vl_i16_info, HasBWI, 1>,
- EVEX_CD8<16, CD8VF>, VEX_WIG;
+ EVEX_CD8<16, CD8VF>, WIG;
defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
SchedWriteVecALU, avx512vl_i32_info, HasAVX512, 1>,
@@ -2285,15 +2285,15 @@ defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd",
defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq",
SchedWriteVecALU, avx512vl_i64_info, HasAVX512, 1>,
- T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
+ T8PD, REX_W, EVEX_CD8<64, CD8VF>;
defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb",
SchedWriteVecALU, avx512vl_i8_info, HasBWI>,
- EVEX_CD8<8, CD8VF>, VEX_WIG;
+ EVEX_CD8<8, CD8VF>, WIG;
defm VPCMPGTW : avx512_icmp_packed_vl<0x65, "vpcmpgtw",
SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
- EVEX_CD8<16, CD8VF>, VEX_WIG;
+ EVEX_CD8<16, CD8VF>, WIG;
defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
SchedWriteVecALU, avx512vl_i32_info, HasAVX512>,
@@ -2301,7 +2301,7 @@ defm VPCMPGTD : avx512_icmp_packed_rmb_vl<0x66, "vpcmpgtd",
defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq",
SchedWriteVecALU, avx512vl_i64_info, HasAVX512>,
- T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
+ T8PD, REX_W, EVEX_CD8<64, CD8VF>;
}
def X86pcmpm_imm : SDNodeXForm<setcc, [{
@@ -2485,10 +2485,10 @@ defm VPCMPUB : avx512_icmp_cc_vl<0x3E, "ub", X86pcmpum, X86pcmpum_su,
defm VPCMPW : avx512_icmp_cc_vl<0x3F, "w", X86pcmpm, X86pcmpm_su,
SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
- VEX_W, EVEX_CD8<16, CD8VF>;
+ REX_W, EVEX_CD8<16, CD8VF>;
defm VPCMPUW : avx512_icmp_cc_vl<0x3E, "uw", X86pcmpum, X86pcmpum_su,
SchedWriteVecALU, avx512vl_i16_info, HasBWI>,
- VEX_W, EVEX_CD8<16, CD8VF>;
+ REX_W, EVEX_CD8<16, CD8VF>;
defm VPCMPD : avx512_icmp_cc_rmb_vl<0x1F, "d", X86pcmpm, X86pcmpm_su,
SchedWriteVecALU, avx512vl_i32_info,
@@ -2499,10 +2499,10 @@ defm VPCMPUD : avx512_icmp_cc_rmb_vl<0x1E, "ud", X86pcmpum, X86pcmpum_su,
defm VPCMPQ : avx512_icmp_cc_rmb_vl<0x1F, "q", X86pcmpm, X86pcmpm_su,
SchedWriteVecALU, avx512vl_i64_info,
- HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
+ HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
defm VPCMPUQ : avx512_icmp_cc_rmb_vl<0x1E, "uq", X86pcmpum, X86pcmpum_su,
SchedWriteVecALU, avx512vl_i64_info,
- HasAVX512>, VEX_W, EVEX_CD8<64, CD8VF>;
+ HasAVX512>, REX_W, EVEX_CD8<64, CD8VF>;
def X86cmpm_su : PatFrag<(ops node:$src1, node:$src2, node:$cc),
(X86cmpm node:$src1, node:$src2, node:$cc), [{
@@ -2656,7 +2656,7 @@ multiclass avx512_vcmp<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _,
}
defm VCMPPD : avx512_vcmp<SchedWriteFCmp, avx512vl_f64_info>,
- AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+ AVX512PDIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
defm VCMPPS : avx512_vcmp<SchedWriteFCmp, avx512vl_f32_info>,
AVX512PSIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
defm VCMPPH : avx512_vcmp<SchedWriteFCmp, avx512vl_f16_info, HasFP16>,
@@ -2835,13 +2835,13 @@ multiclass avx512_fp_fpclass_all<string OpcodeStr, bits<8> opcVec,
EVEX_CD8<32, CD8VF>, AVX512AIi8Base;
defm PD : avx512_vector_fpclass_all<OpcodeStr, avx512vl_f64_info, opcVec,
sched, HasDQI>,
- EVEX_CD8<64, CD8VF>, AVX512AIi8Base, VEX_W;
+ EVEX_CD8<64, CD8VF>, AVX512AIi8Base, REX_W;
defm SSZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
sched.Scl, f32x_info, HasDQI>, VEX_LIG,
EVEX_CD8<32, CD8VT1>, AVX512AIi8Base;
defm SDZ : avx512_scalar_fpclass<opcScalar, OpcodeStr,
sched.Scl, f64x_info, HasDQI>, VEX_LIG,
- EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, VEX_W;
+ EVEX_CD8<64, CD8VT1>, AVX512AIi8Base, REX_W;
}
defm VFPCLASS : avx512_fp_fpclass_all<"vfpclass", 0x66, 0x67, SchedWriteFCmp>, EVEX;
@@ -2894,13 +2894,13 @@ let Predicates = [HasAVX512] in
let Predicates = [HasBWI] in {
defm KMOVD : avx512_mask_mov<0x90, 0x90, 0x91, "kmovd", VK32, v32i1,i32mem>,
- VEX, PD, VEX_W;
+ VEX, PD, REX_W;
defm KMOVD : avx512_mask_mov_gpr<0x92, 0x93, "kmovd", VK32, GR32>,
VEX, XD;
defm KMOVQ : avx512_mask_mov<0x90, 0x90, 0x91, "kmovq", VK64, v64i1, i64mem>,
- VEX, PS, VEX_W;
+ VEX, PS, REX_W;
defm KMOVQ : avx512_mask_mov_gpr<0x92, 0x93, "kmovq", VK64, GR64>,
- VEX, XD, VEX_W;
+ VEX, XD, REX_W;
}
// GR from/to mask register
@@ -2990,7 +2990,7 @@ let Predicates = [HasAVX512] in {
def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
- (KMOVWkr (AND32ri8
+ (KMOVWkr (AND32ri
(INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
(i32 1)))>;
}
@@ -3015,9 +3015,9 @@ multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
defm W : avx512_mask_unop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
sched, HasAVX512>, VEX, PS;
defm D : avx512_mask_unop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
- sched, HasBWI>, VEX, PD, VEX_W;
+ sched, HasBWI>, VEX, PD, REX_W;
defm Q : avx512_mask_unop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
- sched, HasBWI>, VEX, PS, VEX_W;
+ sched, HasBWI>, VEX, PS, REX_W;
}
// TODO - do we need a X86SchedWriteWidths::KMASK type?
@@ -3058,9 +3058,9 @@ multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
defm W : avx512_mask_binop<opc, !strconcat(OpcodeStr, "w"), VK16, OpNode,
sched, prdW, IsCommutable>, VEX_4V, VEX_L, PS;
defm D : avx512_mask_binop<opc, !strconcat(OpcodeStr, "d"), VK32, OpNode,
- sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PD;
+ sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PD;
defm Q : avx512_mask_binop<opc, !strconcat(OpcodeStr, "q"), VK64, OpNode,
- sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, VEX_W, PS;
+ sched, HasBWI, IsCommutable>, VEX_4V, VEX_L, REX_W, PS;
}
// These nodes use 'vnot' instead of 'not' to support vectors.
@@ -3124,7 +3124,7 @@ multiclass avx512_mask_unpck<string Suffix, X86KVectorVTInfo Dst,
defm KUNPCKBW : avx512_mask_unpck<"bw", v16i1_info, v8i1_info, WriteShuffle, HasAVX512>, PD;
defm KUNPCKWD : avx512_mask_unpck<"wd", v32i1_info, v16i1_info, WriteShuffle, HasBWI>, PS;
-defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, VEX_W;
+defm KUNPCKDQ : avx512_mask_unpck<"dq", v64i1_info, v32i1_info, WriteShuffle, HasBWI>, PS, REX_W;
// Mask bit testing
multiclass avx512_mask_testop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
@@ -3145,9 +3145,9 @@ multiclass avx512_mask_testop_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm W : avx512_mask_testop<opc, OpcodeStr#"w", VK16, OpNode, sched, prdW>,
VEX, PS;
defm Q : avx512_mask_testop<opc, OpcodeStr#"q", VK64, OpNode, sched, HasBWI>,
- VEX, PS, VEX_W;
+ VEX, PS, REX_W;
defm D : avx512_mask_testop<opc, OpcodeStr#"d", VK32, OpNode, sched, HasBWI>,
- VEX, PD, VEX_W;
+ VEX, PD, REX_W;
}
// TODO - do we need a X86SchedWriteWidths::KMASK type?
@@ -3168,13 +3168,13 @@ multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
multiclass avx512_mask_shiftop_w<bits<8> opc1, bits<8> opc2, string OpcodeStr,
SDNode OpNode, X86FoldableSchedWrite sched> {
defm W : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "w"), VK16, OpNode,
- sched>, VEX, TAPD, VEX_W;
+ sched>, VEX, TAPD, REX_W;
let Predicates = [HasDQI] in
defm B : avx512_mask_shiftop<opc1, !strconcat(OpcodeStr, "b"), VK8, OpNode,
sched>, VEX, TAPD;
let Predicates = [HasBWI] in {
defm Q : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "q"), VK64, OpNode,
- sched>, VEX, TAPD, VEX_W;
+ sched>, VEX, TAPD, REX_W;
defm D : avx512_mask_shiftop<opc2, !strconcat(OpcodeStr, "d"), VK32, OpNode,
sched>, VEX, TAPD;
}
@@ -3533,21 +3533,19 @@ multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
def rr_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst), (ins _.RC:$src),
OpcodeStr # "\t{$src, $dst|$dst, $src}",
[], _.ExeDomain>, EVEX,
- FoldGenData<BaseName#_.ZSuffix#rr>, Sched<[Sched.RR]>,
+ Sched<[Sched.RR]>,
EVEX2VEXOverride<EVEX2VEXOvrd#"rr_REV">;
def rrk_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, ${dst} {${mask}}|"#
"${dst} {${mask}}, $src}",
[], _.ExeDomain>, EVEX, EVEX_K,
- FoldGenData<BaseName#_.ZSuffix#rrk>,
Sched<[Sched.RR]>;
def rrkz_REV : AVX512PI<opc, MRMDestReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, ${dst} {${mask}} {z}|" #
"${dst} {${mask}} {z}, $src}",
[], _.ExeDomain>, EVEX, EVEX_KZ,
- FoldGenData<BaseName#_.ZSuffix#rrkz>,
Sched<[Sched.RR]>;
}
@@ -3561,8 +3559,7 @@ multiclass avx512_store<bits<8> opc, string OpcodeStr, string BaseName,
def mrk : AVX512PI<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}",
- [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>,
- NotMemoryFoldable;
+ [], _.ExeDomain>, EVEX, EVEX_K, Sched<[Sched.MR]>;
def: Pat<(mstore (_.VT _.RC:$src), addr:$ptr, _.KRCWM:$mask),
(!cast<Instruction>(BaseName#_.ZSuffix#mrk) addr:$ptr,
@@ -3626,7 +3623,7 @@ defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
avx512_alignedstore_vl<0x29, "vmovapd", avx512vl_f64_info,
HasAVX512, SchedWriteFMoveLS, "VMOVAPD">,
- PD, VEX_W, EVEX_CD8<64, CD8VF>;
+ PD, REX_W, EVEX_CD8<64, CD8VF>;
defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
SchedWriteFMoveLS, "VMOVUPS", 0, null_frag>,
@@ -3638,7 +3635,7 @@ defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
SchedWriteFMoveLS, "VMOVUPD", 0, null_frag>,
avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512,
SchedWriteFMoveLS, "VMOVUPD">,
- PD, VEX_W, EVEX_CD8<64, CD8VF>;
+ PD, REX_W, EVEX_CD8<64, CD8VF>;
defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info,
HasAVX512, SchedWriteVecMoveLS,
@@ -3654,7 +3651,7 @@ defm VMOVDQA64 : avx512_alignedload_vl<0x6F, "vmovdqa64", avx512vl_i64_info,
avx512_alignedstore_vl<0x7F, "vmovdqa64", avx512vl_i64_info,
HasAVX512, SchedWriteVecMoveLS,
"VMOVDQA">,
- PD, VEX_W, EVEX_CD8<64, CD8VF>;
+ PD, REX_W, EVEX_CD8<64, CD8VF>;
defm VMOVDQU8 : avx512_load_vl<0x6F, "vmovdqu8", avx512vl_i8_info, HasBWI,
SchedWriteVecMoveLS, "VMOVDQU", 1>,
@@ -3666,7 +3663,7 @@ defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI,
SchedWriteVecMoveLS, "VMOVDQU", 1>,
avx512_store_vl<0x7F, "vmovdqu16", avx512vl_i16_info, HasBWI,
SchedWriteVecMoveLS, "VMOVDQU", 1>,
- XD, VEX_W, EVEX_CD8<16, CD8VF>;
+ XD, REX_W, EVEX_CD8<16, CD8VF>;
defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
SchedWriteVecMoveLS, "VMOVDQU", 1, null_frag>,
@@ -3678,7 +3675,7 @@ defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
SchedWriteVecMoveLS, "VMOVDQU", 0, null_frag>,
avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
SchedWriteVecMoveLS, "VMOVDQU">,
- XS, VEX_W, EVEX_CD8<64, CD8VF>;
+ XS, REX_W, EVEX_CD8<64, CD8VF>;
// Special instructions to help with spilling when we don't have VLX. We need
// to load or store from a ZMM register instead. These are converted in
@@ -4015,21 +4012,21 @@ def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$sr
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v2i64 (scalar_to_vector GR64:$src)))]>,
- EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
+ EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst),
(ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>,
- EVEX, VEX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
+ EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecLoad]>;
let isCodeGenOnly = 1 in {
def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set FR64X:$dst, (bitconvert GR64:$src))]>,
- EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
+ EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (bitconvert FR64X:$src))]>,
- EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
+ EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
}
} // ExeDomain = SSEPackedInt
@@ -4065,13 +4062,13 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
(iPTR 0)))]>,
- PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>,
+ PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>,
Requires<[HasAVX512]>;
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
def VMOVPQIto64Zmr : I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>, PD,
- EVEX, VEX_W, Sched<[WriteVecStore]>,
+ EVEX, REX_W, EVEX_CD8<64, CD8VT1>, Sched<[WriteVecStore]>,
Requires<[HasAVX512, In64BitMode]>;
def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
@@ -4079,14 +4076,14 @@ def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
"vmovq\t{$src, $dst|$dst, $src}",
[(store (extractelt (v2i64 VR128X:$src), (iPTR 0)),
addr:$dst)]>,
- EVEX, PD, VEX_W, EVEX_CD8<64, CD8VT1>,
+ EVEX, PD, REX_W, EVEX_CD8<64, CD8VT1>,
Sched<[WriteVecStore]>, Requires<[HasAVX512]>;
let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in
def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>,
- EVEX, VEX_W, Sched<[SchedWriteVecLogic.XMM]>;
+ EVEX, REX_W, Sched<[SchedWriteVecLogic.XMM]>;
} // ExeDomain = SSEPackedInt
def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
@@ -4115,7 +4112,7 @@ def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
- EVEX, VEX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
+ EVEX, REX_W, EVEX_CD8<8, CD8VT8>, Sched<[WriteVecLoad]>;
} // ExeDomain = SSEPackedInt
// Allow "vmovd" but print "vmovq".
@@ -4198,8 +4195,7 @@ multiclass avx512_move_scalar<string asm, SDNode OpNode, PatFrag vzload_frag,
def mrk: AVX512PI<0x11, MRMDestMem, (outs),
(ins _.ScalarMemOp:$dst, VK1WM:$mask, _.RC:$src),
!strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
- [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>,
- NotMemoryFoldable;
+ [], _.ExeDomain>, EVEX, EVEX_K, Sched<[WriteFStore]>;
}
}
@@ -4207,7 +4203,7 @@ defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, X86vzload32, f32x_info>,
VEX_LIG, XS, EVEX_CD8<32, CD8VT1>;
defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, X86vzload64, f64x_info>,
- VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+ VEX_LIG, XD, REX_W, EVEX_CD8<64, CD8VT1>;
defm VMOVSHZ : avx512_move_scalar<"vmovsh", X86Movsh, X86vzload16, f16x_info,
HasFP16>,
@@ -4537,7 +4533,6 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
(ins VR128X:$src1, VR128X:$src2),
"vmovsh\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, T_MAP5XS, EVEX_4V, VEX_LIG,
- FoldGenData<"VMOVSHZrr">,
Sched<[SchedWriteFShuffle.XMM]>;
let Constraints = "$src0 = $dst" in
@@ -4547,7 +4542,6 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
"vmovsh\t{$src2, $src1, $dst {${mask}}|"#
"$dst {${mask}}, $src1, $src2}",
[]>, T_MAP5XS, EVEX_K, EVEX_4V, VEX_LIG,
- FoldGenData<"VMOVSHZrrk">,
Sched<[SchedWriteFShuffle.XMM]>;
def VMOVSHZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
@@ -4555,14 +4549,12 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
"vmovsh\t{$src2, $src1, $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, $src1, $src2}",
[]>, EVEX_KZ, T_MAP5XS, EVEX_4V, VEX_LIG,
- FoldGenData<"VMOVSHZrrkz">,
Sched<[SchedWriteFShuffle.XMM]>;
}
def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
"vmovss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, EVEX_4V, VEX_LIG,
- FoldGenData<"VMOVSSZrr">,
Sched<[SchedWriteFShuffle.XMM]>;
let Constraints = "$src0 = $dst" in
@@ -4572,7 +4564,6 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
"vmovss\t{$src2, $src1, $dst {${mask}}|"#
"$dst {${mask}}, $src1, $src2}",
[]>, EVEX_K, XS, EVEX_4V, VEX_LIG,
- FoldGenData<"VMOVSSZrrk">,
Sched<[SchedWriteFShuffle.XMM]>;
def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
@@ -4580,14 +4571,12 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
"vmovss\t{$src2, $src1, $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, $src1, $src2}",
[]>, EVEX_KZ, XS, EVEX_4V, VEX_LIG,
- FoldGenData<"VMOVSSZrrkz">,
Sched<[SchedWriteFShuffle.XMM]>;
def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
"vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XD, EVEX_4V, VEX_LIG, VEX_W,
- FoldGenData<"VMOVSDZrr">,
+ []>, XD, EVEX_4V, VEX_LIG, REX_W,
Sched<[SchedWriteFShuffle.XMM]>;
let Constraints = "$src0 = $dst" in
@@ -4597,8 +4586,7 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
"vmovsd\t{$src2, $src1, $dst {${mask}}|"#
"$dst {${mask}}, $src1, $src2}",
[]>, EVEX_K, XD, EVEX_4V, VEX_LIG,
- VEX_W, FoldGenData<"VMOVSDZrrk">,
- Sched<[SchedWriteFShuffle.XMM]>;
+ REX_W, Sched<[SchedWriteFShuffle.XMM]>;
def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst),
(ins f64x_info.KRCWM:$mask, VR128X:$src1,
@@ -4606,8 +4594,7 @@ let hasSideEffects = 0, isCodeGenOnly = 1, ForceDisassemble = 1 in {
"vmovsd\t{$src2, $src1, $dst {${mask}} {z}|"#
"$dst {${mask}} {z}, $src1, $src2}",
[]>, EVEX_KZ, XD, EVEX_4V, VEX_LIG,
- VEX_W, FoldGenData<"VMOVSDZrrkz">,
- Sched<[SchedWriteFShuffle.XMM]>;
+ REX_W, Sched<[SchedWriteFShuffle.XMM]>;
}
def : InstAlias<"vmovsh.s\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -4744,7 +4731,7 @@ def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128X:$dst, (v2i64 (X86vzmovl
(v2i64 VR128X:$src))))]>,
- EVEX, VEX_W;
+ EVEX, REX_W;
}
let Predicates = [HasAVX512] in {
@@ -4846,7 +4833,7 @@ multiclass avx512_movnt_vl<bits<8> opc, string OpcodeStr,
defm VMOVNTDQ : avx512_movnt_vl<0xE7, "vmovntdq", avx512vl_i64_info,
SchedWriteVecMoveLSNT>, PD;
defm VMOVNTPD : avx512_movnt_vl<0x2B, "vmovntpd", avx512vl_f64_info,
- SchedWriteFMoveLSNT>, PD, VEX_W;
+ SchedWriteFMoveLSNT>, PD, REX_W;
defm VMOVNTPS : avx512_movnt_vl<0x2B, "vmovntps", avx512vl_f32_info,
SchedWriteFMoveLSNT>, PS;
@@ -4986,7 +4973,7 @@ multiclass avx512_binop_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit IsCommutable = 0> {
defm NAME : avx512_binop_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
sched, prd, IsCommutable>,
- VEX_W, EVEX_CD8<64, CD8VF>;
+ REX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_binop_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -5001,7 +4988,7 @@ multiclass avx512_binop_rm_vl_w<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit IsCommutable = 0> {
defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i16_info,
sched, prd, IsCommutable>, EVEX_CD8<16, CD8VF>,
- VEX_WIG;
+ WIG;
}
multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -5009,7 +4996,7 @@ multiclass avx512_binop_rm_vl_b<bits<8> opc, string OpcodeStr, SDNode OpNode,
bit IsCommutable = 0> {
defm NAME : avx512_binop_rm_vl<opc, OpcodeStr, OpNode, avx512vl_i8_info,
sched, prd, IsCommutable>, EVEX_CD8<8, CD8VF>,
- VEX_WIG;
+ WIG;
}
multiclass avx512_binop_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
@@ -5116,16 +5103,16 @@ multiclass avx512_binop_all<bits<8> opc, string OpcodeStr,
defm NAME#Z : avx512_binop_rm2<opc, OpcodeStr, sched.ZMM, OpNode,
_SrcVTInfo.info512, _DstVTInfo.info512,
v8i64_info, IsCommutable>,
- EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_W;
+ EVEX_V512, EVEX_CD8<64, CD8VF>, REX_W;
let Predicates = [HasVLX, prd] in {
defm NAME#Z256 : avx512_binop_rm2<opc, OpcodeStr, sched.YMM, OpNode,
_SrcVTInfo.info256, _DstVTInfo.info256,
v4i64x_info, IsCommutable>,
- EVEX_V256, EVEX_CD8<64, CD8VF>, VEX_W;
+ EVEX_V256, EVEX_CD8<64, CD8VF>, REX_W;
defm NAME#Z128 : avx512_binop_rm2<opc, OpcodeStr, sched.XMM, OpNode,
_SrcVTInfo.info128, _DstVTInfo.info128,
v2i64x_info, IsCommutable>,
- EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_W;
+ EVEX_V128, EVEX_CD8<64, CD8VF>, REX_W;
}
}
@@ -5192,14 +5179,14 @@ multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
let Predicates = [HasBWI] in
defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, v32i16_info, v64i8_info,
- SchedWriteShuffle.ZMM>, EVEX_V512, VEX_WIG;
+ SchedWriteShuffle.ZMM>, EVEX_V512, WIG;
let Predicates = [HasBWI, HasVLX] in {
defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, v16i16x_info,
v32i8x_info, SchedWriteShuffle.YMM>,
- EVEX_V256, VEX_WIG;
+ EVEX_V256, WIG;
defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, v8i16x_info,
v16i8x_info, SchedWriteShuffle.XMM>,
- EVEX_V128, VEX_WIG;
+ EVEX_V128, WIG;
}
}
@@ -5226,9 +5213,9 @@ defm VPACKSSWB : avx512_packs_all_i16_i8 <0x63, "vpacksswb", X86Packss>, AVX512B
defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512BIBase;
defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
- avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, VEX_WIG;
+ avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD, WIG;
defm VPMADDWD : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
- avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, VEX_WIG;
+ avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase, WIG;
defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
SchedWriteVecALU, HasBWI, 1>, T8PD;
@@ -5709,7 +5696,7 @@ multiclass avx512_binop_s_round<bits<8> opc, string OpcodeStr, SDPatternOperator
sched.PD.Scl, IsCommutable>,
avx512_fp_scalar_round<opc, OpcodeStr#"sd", f64x_info, RndNode,
sched.PD.Scl>,
- XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+ XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
let Predicates = [HasFP16] in
defm SHZ : avx512_fp_scalar<opc, OpcodeStr#"sh", f16x_info, OpNode,
VecNode, sched.PH.Scl, IsCommutable>,
@@ -5728,7 +5715,7 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm SDZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sd", f64x_info, OpNode,
VecNode, SaeNode, sched.PD.Scl, IsCommutable,
NAME#"SD">,
- XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
+ XD, REX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
let Predicates = [HasFP16] in {
defm SHZ : avx512_fp_scalar_sae<opc, OpcodeStr#"sh", f16x_info, OpNode,
VecNode, SaeNode, sched.PH.Scl, IsCommutable,
@@ -5779,7 +5766,7 @@ defm VMINCSSZ : avx512_comutable_binop_s<0x5D, "vminss", f32x_info, X86fminc,
defm VMINCSDZ : avx512_comutable_binop_s<0x5D, "vminsd", f64x_info, X86fminc,
SchedWriteFCmp.Scl, "VMINCSD">, XD,
- VEX_W, EVEX_4V, VEX_LIG,
+ REX_W, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>, SIMD_EXC;
defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
@@ -5788,7 +5775,7 @@ defm VMAXCSSZ : avx512_comutable_binop_s<0x5F, "vmaxss", f32x_info, X86fmaxc,
defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
SchedWriteFCmp.Scl, "VMAXCSD">, XD,
- VEX_W, EVEX_4V, VEX_LIG,
+ REX_W, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>, SIMD_EXC;
defm VMINCSHZ : avx512_comutable_binop_s<0x5D, "vminsh", f16x_info, X86fminc,
@@ -5869,7 +5856,7 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator Op
sched.PS.ZMM, IsCommutable>, EVEX_V512, PS,
EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v8f64_info,
- sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, VEX_W,
+ sched.PD.ZMM, IsCommutable>, EVEX_V512, PD, REX_W,
EVEX_CD8<64, CD8VF>;
}
@@ -5883,10 +5870,10 @@ multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator Op
EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v2f64x_info,
sched.PD.XMM, IsPD128Commutable,
- IsCommutable>, EVEX_V128, PD, VEX_W,
+ IsCommutable>, EVEX_V128, PD, REX_W,
EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, MaskOpNode, v4f64x_info,
- sched.PD.YMM, IsCommutable>, EVEX_V256, PD, VEX_W,
+ sched.PD.YMM, IsCommutable>, EVEX_V256, PD, REX_W,
EVEX_CD8<64, CD8VF>;
}
}
@@ -5922,7 +5909,7 @@ multiclass avx512_fp_binop_p_round<bits<8> opc, string OpcodeStr, SDNode OpNodeR
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_round_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
v8f64_info>,
- EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
+ EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>;
}
let Uses = [MXCSR] in
@@ -5938,7 +5925,7 @@ multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp_sae_packed<opc, OpcodeStr, OpNodeRnd, sched.PD.ZMM,
v8f64_info>,
- EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
+ EVEX_V512, PD, REX_W,EVEX_CD8<64, CD8VF>;
}
defm VADD : avx512_fp_binop_p<0x58, "vadd", any_fadd, fadd, HasAVX512,
@@ -6039,7 +6026,7 @@ multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr
EVEX_V512, EVEX_CD8<32, CD8VF>, T8PD;
defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.ZMM, v8f64_info>,
avx512_fp_round_packed<opc, OpcodeStr, X86scalefRnd, sched.ZMM, v8f64_info>,
- EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
+ EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
defm SSZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f32x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr#"ss", f32x_info,
X86scalefsRnd, sched.Scl>,
@@ -6047,7 +6034,7 @@ multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr
defm SDZ : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, X86scalefs, sched.Scl, f64x_info>,
avx512_fp_scalar_round<opcScaler, OpcodeStr#"sd", f64x_info,
X86scalefsRnd, sched.Scl>,
- EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, VEX_W, T8PD;
+ EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>, REX_W, T8PD;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
@@ -6056,9 +6043,9 @@ multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr
defm PSZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v8f32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VF>, T8PD;
defm PDZ128 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.XMM, v2f64x_info>,
- EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
+ EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
defm PDZ256 : avx512_fp_scalef_p<opc, OpcodeStr, X86scalef, sched.YMM, v4f64x_info>,
- EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>, T8PD;
+ EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>, T8PD;
}
let Predicates = [HasFP16, HasVLX] in {
@@ -6127,23 +6114,23 @@ multiclass avx512_vptest_dq<bits<8> opc, string OpcodeStr,
defm D : avx512_vptest_dq_sizes<opc, OpcodeStr#"d", sched,
avx512vl_i32_info>;
defm Q : avx512_vptest_dq_sizes<opc, OpcodeStr#"q", sched,
- avx512vl_i64_info>, VEX_W;
+ avx512vl_i64_info>, REX_W;
}
multiclass avx512_vptest_wb<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
let Predicates = [HasBWI] in {
defm WZ: avx512_vptest<opc, OpcodeStr#"w", sched.ZMM,
- v32i16_info>, EVEX_V512, VEX_W;
+ v32i16_info>, EVEX_V512, REX_W;
defm BZ: avx512_vptest<opc, OpcodeStr#"b", sched.ZMM,
v64i8_info>, EVEX_V512;
}
let Predicates = [HasVLX, HasBWI] in {
defm WZ256: avx512_vptest<opc, OpcodeStr#"w", sched.YMM,
- v16i16x_info>, EVEX_V256, VEX_W;
+ v16i16x_info>, EVEX_V256, REX_W;
defm WZ128: avx512_vptest<opc, OpcodeStr#"w", sched.XMM,
- v8i16x_info>, EVEX_V128, VEX_W;
+ v8i16x_info>, EVEX_V128, REX_W;
defm BZ256: avx512_vptest<opc, OpcodeStr#"b", sched.YMM,
v32i8x_info>, EVEX_V256;
defm BZ128: avx512_vptest<opc, OpcodeStr#"b", sched.XMM,
@@ -6239,7 +6226,7 @@ multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw,
avx512vl_i32_info, HasAVX512>;
let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64,
- avx512vl_i64_info, HasAVX512>, VEX_W;
+ avx512vl_i64_info, HasAVX512>, REX_W;
defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16,
avx512vl_i16_info, HasBWI>;
}
@@ -6270,12 +6257,12 @@ multiclass avx512_shift_rmi_w<bits<8> opcw, Format ImmFormR, Format ImmFormM,
X86SchedWriteWidths sched> {
let Predicates = [HasBWI] in
defm WZ: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
- sched.ZMM, v32i16_info>, EVEX_V512, VEX_WIG;
+ sched.ZMM, v32i16_info>, EVEX_V512, WIG;
let Predicates = [HasVLX, HasBWI] in {
defm WZ256: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
- sched.YMM, v16i16x_info>, EVEX_V256, VEX_WIG;
+ sched.YMM, v16i16x_info>, EVEX_V256, WIG;
defm WZ128: avx512_shift_rmi<opcw, ImmFormR, ImmFormM, OpcodeStr, OpNode,
- sched.XMM, v8i16x_info>, EVEX_V128, VEX_WIG;
+ sched.XMM, v8i16x_info>, EVEX_V128, WIG;
}
}
@@ -6288,7 +6275,7 @@ multiclass avx512_shift_rmi_dq<bits<8> opcd, bits<8> opcq,
sched, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in
defm Q: avx512_shift_rmi_sizes<opcq, ImmFormR, ImmFormM, OpcodeStr#"q", OpNode,
- sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
+ sched, avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
}
defm VPSRL : avx512_shift_rmi_dq<0x72, 0x73, MRM2r, MRM2m, "vpsrl", X86vsrli,
@@ -6398,7 +6385,7 @@ multiclass avx512_var_shift_types<bits<8> opc, string OpcodeStr,
defm D : avx512_var_shift_sizes<opc, OpcodeStr#"d", OpNode, sched,
avx512vl_i32_info>;
defm Q : avx512_var_shift_sizes<opc, OpcodeStr#"q", OpNode, sched,
- avx512vl_i64_info>, VEX_W;
+ avx512vl_i64_info>, REX_W;
}
// Use 512bit version to implement 128/256 bit in case NoVLX.
@@ -6426,13 +6413,13 @@ multiclass avx512_var_shift_w<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86SchedWriteWidths sched> {
let Predicates = [HasBWI] in
defm WZ: avx512_var_shift<opc, OpcodeStr, OpNode, sched.ZMM, v32i16_info>,
- EVEX_V512, VEX_W;
+ EVEX_V512, REX_W;
let Predicates = [HasVLX, HasBWI] in {
defm WZ256: avx512_var_shift<opc, OpcodeStr, OpNode, sched.YMM, v16i16x_info>,
- EVEX_V256, VEX_W;
+ EVEX_V256, REX_W;
defm WZ128: avx512_var_shift<opc, OpcodeStr, OpNode, sched.XMM, v8i16x_info>,
- EVEX_V128, VEX_W;
+ EVEX_V128, REX_W;
}
}
@@ -6601,25 +6588,25 @@ multiclass avx512_vperm_bw<bits<8> opc, string OpcodeStr,
}
defm VPERMW : avx512_vperm_bw<0x8D, "vpermw", HasBWI, X86VPermv,
- WriteVarShuffle256, avx512vl_i16_info>, VEX_W;
+ WriteVarShuffle256, avx512vl_i16_info>, REX_W;
defm VPERMB : avx512_vperm_bw<0x8D, "vpermb", HasVBMI, X86VPermv,
WriteVarShuffle256, avx512vl_i8_info>;
defm VPERMD : avx512_vperm_dq_sizes<0x36, "vpermd", X86VPermv,
WriteVarShuffle256, avx512vl_i32_info>;
defm VPERMQ : avx512_vperm_dq_sizes<0x36, "vpermq", X86VPermv,
- WriteVarShuffle256, avx512vl_i64_info>, VEX_W;
+ WriteVarShuffle256, avx512vl_i64_info>, REX_W;
defm VPERMPS : avx512_vperm_dq_sizes<0x16, "vpermps", X86VPermv,
WriteFVarShuffle256, avx512vl_f32_info>;
defm VPERMPD : avx512_vperm_dq_sizes<0x16, "vpermpd", X86VPermv,
- WriteFVarShuffle256, avx512vl_f64_info>, VEX_W;
+ WriteFVarShuffle256, avx512vl_f64_info>, REX_W;
defm VPERMQ : avx512_vpermi_dq_sizes<0x00, MRMSrcReg, MRMSrcMem, "vpermq",
X86VPermi, WriteShuffle256, avx512vl_i64_info>,
- EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
+ EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
defm VPERMPD : avx512_vpermi_dq_sizes<0x01, MRMSrcReg, MRMSrcMem, "vpermpd",
X86VPermi, WriteFShuffle256, avx512vl_f64_info>,
- EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, VEX_W;
+ EVEX, AVX512AIi8Base, EVEX_CD8<64, CD8VF>, REX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - VPERMIL
@@ -6718,7 +6705,7 @@ multiclass avx512_pshufb_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
defm VPSHUFB: avx512_pshufb_sizes<0x00, "vpshufb", X86pshufb,
- SchedWriteVarShuffle>, VEX_WIG;
+ SchedWriteVarShuffle>, WIG;
//===----------------------------------------------------------------------===//
// Move Low to High and High to Low packed FP Instructions
@@ -6734,7 +6721,7 @@ def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2),
"vmovhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128X:$dst, (v4f32 (X86Movhlps VR128X:$src1, VR128X:$src2)))]>,
- Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V, NotMemoryFoldable;
+ Sched<[SchedWriteFShuffle.XMM]>, EVEX_4V;
//===----------------------------------------------------------------------===//
// VMOVHPS/PD VMOVLPS Instructions
@@ -6761,11 +6748,11 @@ multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", null_frag,
v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
- v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
+ v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W;
defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movsd,
- v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
+ v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, REX_W;
let Predicates = [HasAVX512] in {
// VMOVHPD patterns
@@ -6789,7 +6776,7 @@ def VMOVHPDZ128mr : AVX512PDI<0x17, MRMDestMem, (outs),
[(store (f64 (extractelt
(v2f64 (X86Unpckh VR128X:$src, VR128X:$src)),
(iPTR 0))), addr:$dst)]>,
- EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
+ EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
let mayStore = 1, hasSideEffects = 0 in
def VMOVLPSZ128mr : AVX512PSI<0x13, MRMDestMem, (outs),
(ins f64mem:$dst, VR128X:$src),
@@ -6800,7 +6787,7 @@ def VMOVLPDZ128mr : AVX512PDI<0x13, MRMDestMem, (outs),
"vmovlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (v2f64 VR128X:$src),
(iPTR 0))), addr:$dst)]>,
- EVEX, EVEX_CD8<64, CD8VT1>, VEX_W;
+ EVEX, EVEX_CD8<64, CD8VT1>, REX_W;
} // SchedRW
let Predicates = [HasAVX512] in {
@@ -6831,7 +6818,8 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))),
(_.VT (MaskOpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6841,7 +6829,8 @@ multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))),
(MaskOpNode _.RC:$src2,
_.RC:$src1,(_.VT (_.BroadcastLdFrag addr:$src3))), 1, 0>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
}
}
@@ -6890,7 +6879,7 @@ multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDPatternOperator O
avx512vl_f32_info>, T8PD;
defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f64_info>, T8PD, VEX_W;
+ avx512vl_f64_info>, T8PD, REX_W;
}
defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", any_fma,
@@ -6924,7 +6913,8 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)),
(_.VT (MaskOpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
defm mb: AVX512_maskable_fma<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -6936,7 +6926,8 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
(_.VT (MaskOpNode _.RC:$src2,
(_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1)), 1, 0>, EVEX_4V, EVEX_B,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
}
}
@@ -6985,7 +6976,7 @@ multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDPatternOperator O
avx512vl_f32_info>, T8PD;
defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f64_info>, T8PD, VEX_W;
+ avx512vl_f64_info>, T8PD, REX_W;
}
defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", any_fma,
@@ -7020,7 +7011,8 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)),
(_.VT (MaskOpNode (_.LdFrag addr:$src3), _.RC:$src1, _.RC:$src2)), 1, 0>,
- EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
// Pattern is 312 order so that the load is in a different place from the
// 213 and 231 patterns this helps tablegen's duplicate pattern detection.
@@ -7032,7 +7024,8 @@ multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDPatternOperator
_.RC:$src1, _.RC:$src2)),
(_.VT (MaskOpNode (_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1, _.RC:$src2)), 1, 0>,
- EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
}
}
@@ -7081,7 +7074,7 @@ multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDPatternOperator O
avx512vl_f32_info>, T8PD;
defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, MaskOpNode,
OpNodeRnd, SchedWriteFMA,
- avx512vl_f64_info>, T8PD, VEX_W;
+ avx512vl_f64_info>, T8PD, REX_W;
}
defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", any_fma,
@@ -7110,7 +7103,8 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.IntScalarMemOp:$src3), OpcodeStr,
"$src3, $src2", "$src2, $src3", (null_frag), 1, 1>,
- EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
+ EVEX_4V, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
+ SchedWriteFMA.Scl.ReadAfterFold]>, SIMD_EXC;
let Uses = [MXCSR] in
defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
@@ -7128,7 +7122,8 @@ let Constraints = "$src1 = $dst", hasSideEffects = 0 in {
(ins _.FRC:$src1, _.FRC:$src2, _.ScalarMemOp:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
+ [RHS_m]>, Sched<[SchedWriteFMA.Scl.Folded, SchedWriteFMA.Scl.ReadAfterFold,
+ SchedWriteFMA.Scl.ReadAfterFold]>, EVEX_4V, SIMD_EXC;
let Uses = [MXCSR] in
def rb : AVX512<opc, MRMSrcReg, (outs _.FRC:$dst),
@@ -7183,7 +7178,7 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD;
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
OpNodeRnd, f64x_info, "SD">,
- EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD;
+ EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD;
}
let Predicates = [HasFP16] in {
defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
@@ -7446,7 +7441,8 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins _.RC:$src2, _.MemOp:$src3),
OpcodeStr, "$src3, $src2", "$src2, $src3",
(_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
- T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ T8PD, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src2, _.ScalarMemOp:$src3),
@@ -7455,7 +7451,8 @@ multiclass avx512_pmadd52_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
(OpNode _.RC:$src2,
(_.VT (_.BroadcastLdFrag addr:$src3)),
_.RC:$src1)>,
- T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ T8PD, EVEX_4V, EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
}
}
} // Constraints = "$src1 = $dst"
@@ -7476,10 +7473,10 @@ multiclass avx512_pmadd52_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm VPMADD52LUQ : avx512_pmadd52_common<0xb4, "vpmadd52luq", x86vpmadd52l,
SchedWriteVecIMul, avx512vl_i64_info>,
- VEX_W;
+ REX_W;
defm VPMADD52HUQ : avx512_pmadd52_common<0xb5, "vpmadd52huq", x86vpmadd52h,
SchedWriteVecIMul, avx512vl_i64_info>,
- VEX_W;
+ REX_W;
//===----------------------------------------------------------------------===//
// AVX-512 Scalar convert from sign integer to float/double
@@ -7560,14 +7557,14 @@ defm VCVTSI2SSZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
defm VCVTSI642SSZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
WriteCvtI2SS, GR64,
v4f32x_info, i64mem, loadi64, "cvtsi2ss", "q">,
- XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+ XS, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTSI2SDZ : avx512_vcvtsi<0x2A, null_frag, WriteCvtI2SD, GR32,
v2f64x_info, i32mem, loadi32, "cvtsi2sd", "l", [], 0>,
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd,
WriteCvtI2SD, GR64,
v2f64x_info, i64mem, loadi64, "cvtsi2sd", "q">,
- XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+ XD, REX_W, EVEX_CD8<64, CD8VT1>;
def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
@@ -7599,14 +7596,14 @@ defm VCVTUSI2SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
defm VCVTUSI642SSZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
WriteCvtI2SS, GR64,
v4f32x_info, i64mem, loadi64, "cvtusi2ss", "q">,
- XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+ XS, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTUSI2SDZ : avx512_vcvtsi<0x7B, null_frag, WriteCvtI2SD, GR32, v2f64x_info,
i32mem, loadi32, "cvtusi2sd", "l", [], 0>,
XD, VEX_LIG, EVEX_CD8<32, CD8VT1>;
defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd,
WriteCvtI2SD, GR64,
v2f64x_info, i64mem, loadi64, "cvtusi2sd", "q">,
- XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+ XD, REX_W, EVEX_CD8<64, CD8VT1>;
def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTUSI2SSZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
@@ -7674,25 +7671,25 @@ defm VCVTSS2SIZ: avx512_cvt_s_int_round<0x2D, f32x_info, i32x_info,X86cvts2si,
XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2SI64Z: avx512_cvt_s_int_round<0x2D, f32x_info, i64x_info, X86cvts2si,
X86cvts2siRnd, WriteCvtSS2I, "cvtss2si", "{q}">,
- XS, VEX_W, EVEX_CD8<32, CD8VT1>;
+ XS, REX_W, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2USIZ: avx512_cvt_s_int_round<0x79, f32x_info, i32x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{l}">,
XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2USI64Z: avx512_cvt_s_int_round<0x79, f32x_info, i64x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSS2I, "cvtss2usi", "{q}">,
- XS, VEX_W, EVEX_CD8<32, CD8VT1>;
+ XS, REX_W, EVEX_CD8<32, CD8VT1>;
defm VCVTSD2SIZ: avx512_cvt_s_int_round<0x2D, f64x_info, i32x_info, X86cvts2si,
X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{l}">,
XD, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2SI64Z: avx512_cvt_s_int_round<0x2D, f64x_info, i64x_info, X86cvts2si,
X86cvts2siRnd, WriteCvtSD2I, "cvtsd2si", "{q}">,
- XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+ XD, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2USIZ: avx512_cvt_s_int_round<0x79, f64x_info, i32x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{l}">,
XD, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSD2I, "cvtsd2usi", "{q}">,
- XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+ XD, REX_W, EVEX_CD8<64, CD8VT1>;
multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
X86VectorVTInfo DstVT, SDNode OpNode,
@@ -7714,11 +7711,11 @@ multiclass avx512_cvt_s<bits<8> opc, string asm, X86VectorVTInfo SrcVT,
defm VCVTSS2SIZ: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i32x_info,
lrint, WriteCvtSS2I>, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSS2SI64Z: avx512_cvt_s<0x2D, "vcvtss2si", f32x_info, i64x_info,
- llrint, WriteCvtSS2I>, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+ llrint, WriteCvtSS2I>, REX_W, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSD2SIZ: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i32x_info,
lrint, WriteCvtSD2I>, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTSD2SI64Z: avx512_cvt_s<0x2D, "vcvtsd2si", f64x_info, i64x_info,
- llrint, WriteCvtSD2I>, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+ llrint, WriteCvtSD2I>, REX_W, XD, EVEX_CD8<64, CD8VT1>;
let Predicates = [HasAVX512] in {
def : Pat<(i64 (lrint FR32:$src)), (VCVTSS2SI64Zrr FR32:$src)>;
@@ -7861,26 +7858,26 @@ defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
"{l}">, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
- "{q}">, VEX_W, XS, EVEX_CD8<32, CD8VT1>;
+ "{q}">, REX_W, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
"{l}">, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSD2I,
- "{q}">, VEX_W, XD, EVEX_CD8<64, CD8VT1>;
+ "{q}">, REX_W, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
"{l}">, XS, EVEX_CD8<32, CD8VT1>;
defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
- "{q}">, XS,VEX_W, EVEX_CD8<32, CD8VT1>;
+ "{q}">, XS,REX_W, EVEX_CD8<32, CD8VT1>;
defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
"{l}">, XD, EVEX_CD8<64, CD8VT1>;
defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSD2I,
- "{q}">, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
+ "{q}">, XD, REX_W, EVEX_CD8<64, CD8VT1>;
//===----------------------------------------------------------------------===//
// AVX-512 Convert form float to double and back
@@ -7968,13 +7965,13 @@ multiclass avx512_cvt_fp_scalar_extend<bits<8> opc, string OpcodeStr,
}
defm VCVTSD2SS : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2ss", X86frounds,
X86froundsRnd, WriteCvtSD2SS, f64x_info,
- f32x_info>, XD, VEX_W;
+ f32x_info>, XD, REX_W;
defm VCVTSS2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtss2sd", X86fpexts,
X86fpextsSAE, WriteCvtSS2SD, f32x_info,
f64x_info>, XS;
defm VCVTSD2SH : avx512_cvt_fp_scalar_trunc<0x5A, "vcvtsd2sh", X86frounds,
X86froundsRnd, WriteCvtSD2SS, f64x_info,
- f16x_info, HasFP16>, T_MAP5XD, VEX_W;
+ f16x_info, HasFP16>, T_MAP5XD, REX_W;
defm VCVTSH2SD : avx512_cvt_fp_scalar_extend<0x5A, "vcvtsh2sd", X86fpexts,
X86fpextsSAE, WriteCvtSS2SD, f16x_info,
f64x_info, HasFP16>, T_MAP5XS;
@@ -8245,7 +8242,7 @@ multiclass avx512_cvt_trunc<bits<8> opc, string OpcodeStr,
defm VCVTPD2PS : avx512_cvt_trunc<0x5A, "vcvtpd2ps",
avx512vl_f32_info, avx512vl_f64_info, SchedWriteCvtPD2PS>,
- VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ REX_W, PD, EVEX_CD8<64, CD8VF>;
defm VCVTPS2PD : avx512_cvt_extend<0x5A, "vcvtps2pd",
avx512vl_f64_info, avx512vl_f32_info, SchedWriteCvtPS2PD>,
PS, EVEX_CD8<32, CD8VH>;
@@ -8362,7 +8359,7 @@ defm VCVTPH2PSX : avx512_cvt_extend<0x13, "vcvtph2psx", avx512vl_f32_info,
avx512vl_f16_info, SchedWriteCvtPS2PD,
HasFP16>, T_MAP6PD, EVEX_CD8<16, CD8VH>;
defm VCVTPD2PH : avx512_cvtpd2ph<0x5A, "vcvtpd2ph", SchedWriteCvtPD2PS>,
- VEX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
+ REX_W, T_MAP5PD, EVEX_CD8<64, CD8VF>;
defm VCVTPH2PD : avx512_cvtph2pd<0x5A, "vcvtph2pd", SchedWriteCvtPS2PD>,
T_MAP5PS, EVEX_CD8<16, CD8VQ>;
@@ -8858,7 +8855,7 @@ defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", X86any_cvttp2si,
defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", X86any_cvttp2si,
X86cvttp2si, X86cvttp2siSAE,
SchedWriteCvtPD2DQ>,
- PD, VEX_W, EVEX_CD8<64, CD8VF>;
+ PD, REX_W, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
X86cvttp2ui, X86cvttp2uiSAE,
@@ -8867,7 +8864,7 @@ defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", X86any_cvttp2ui,
defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", X86any_cvttp2ui,
X86cvttp2ui, X86cvttp2uiSAE,
SchedWriteCvtPD2DQ>,
- PS, VEX_W, EVEX_CD8<64, CD8VF>;
+ PS, REX_W, EVEX_CD8<64, CD8VF>;
defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", any_uint_to_fp,
uint_to_fp, X86any_VUintToFP, X86VUintToFP,
@@ -8883,18 +8880,18 @@ defm VCVTPS2DQ : avx512_cvtps2dq<0x5B, "vcvtps2dq", X86cvtp2Int, X86cvtp2Int,
defm VCVTPD2DQ : avx512_cvtpd2dq<0xE6, "vcvtpd2dq", X86cvtp2Int, X86cvtp2Int,
X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, XD,
- VEX_W, EVEX_CD8<64, CD8VF>;
+ REX_W, EVEX_CD8<64, CD8VF>;
defm VCVTPS2UDQ : avx512_cvtps2dq<0x79, "vcvtps2udq", X86cvtp2UInt, X86cvtp2UInt,
X86cvtp2UIntRnd, SchedWriteCvtPS2DQ>,
PS, EVEX_CD8<32, CD8VF>;
defm VCVTPD2UDQ : avx512_cvtpd2dq<0x79, "vcvtpd2udq", X86cvtp2UInt, X86cvtp2UInt,
- X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
+ X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
PS, EVEX_CD8<64, CD8VF>;
defm VCVTPD2QQ : avx512_cvtpd2qq<0x7B, "vcvtpd2qq", X86cvtp2Int, X86cvtp2Int,
- X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, VEX_W,
+ X86cvtp2IntRnd, SchedWriteCvtPD2DQ>, REX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
@@ -8902,7 +8899,7 @@ defm VCVTPS2QQ : avx512_cvtps2qq<0x7B, "vcvtps2qq", X86cvtp2Int, X86cvtp2Int,
EVEX_CD8<32, CD8VH>;
defm VCVTPD2UQQ : avx512_cvtpd2qq<0x79, "vcvtpd2uqq", X86cvtp2UInt, X86cvtp2UInt,
- X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, VEX_W,
+ X86cvtp2UIntRnd, SchedWriteCvtPD2DQ>, REX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt,
@@ -8911,7 +8908,7 @@ defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt, X86cvtp2UInt
defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", X86any_cvttp2si,
X86cvttp2si, X86cvttp2siSAE,
- SchedWriteCvtPD2DQ>, VEX_W,
+ SchedWriteCvtPD2DQ>, REX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
@@ -8921,7 +8918,7 @@ defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", X86any_cvttp2si,
defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", X86any_cvttp2ui,
X86cvttp2ui, X86cvttp2uiSAE,
- SchedWriteCvtPD2DQ>, VEX_W,
+ SchedWriteCvtPD2DQ>, REX_W,
PD, EVEX_CD8<64, CD8VF>;
defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
@@ -8931,11 +8928,11 @@ defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", X86any_cvttp2ui,
defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", any_sint_to_fp,
sint_to_fp, X86VSintToFpRnd,
- SchedWriteCvtDQ2PD>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
+ SchedWriteCvtDQ2PD>, REX_W, XS, EVEX_CD8<64, CD8VF>;
defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", any_uint_to_fp,
uint_to_fp, X86VUintToFpRnd, SchedWriteCvtDQ2PD>,
- VEX_W, XS, EVEX_CD8<64, CD8VF>;
+ REX_W, XS, EVEX_CD8<64, CD8VF>;
defm VCVTDQ2PH : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtdq2ph", any_sint_to_fp, sint_to_fp,
X86any_VSintToFP, X86VMSintToFP,
@@ -8952,13 +8949,13 @@ defm VCVTUDQ2PH : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtudq2ph", any_uint_to_fp, uint
defm VCVTQQ2PS : avx512_cvtqq2ps_dq2ph<0x5B, "vcvtqq2ps", any_sint_to_fp, sint_to_fp,
X86any_VSintToFP, X86VMSintToFP,
X86VSintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
- SchedWriteCvtDQ2PS>, VEX_W, PS,
+ SchedWriteCvtDQ2PS>, REX_W, PS,
EVEX_CD8<64, CD8VF>;
defm VCVTUQQ2PS : avx512_cvtqq2ps_dq2ph<0x7A, "vcvtuqq2ps", any_uint_to_fp, uint_to_fp,
X86any_VUintToFP, X86VMUintToFP,
X86VUintToFpRnd, avx512vl_f32_info, avx512vl_i64_info,
- SchedWriteCvtDQ2PS>, VEX_W, XD,
+ SchedWriteCvtDQ2PS>, REX_W, XD,
EVEX_CD8<64, CD8VF>;
let Predicates = [HasVLX] in {
@@ -9232,7 +9229,7 @@ let ExeDomain = GenericDomain, Uses = [MXCSR], mayRaiseFPException = 1 in {
def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
(ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2),
"vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>,
- EVEX_K, Sched<[MR]>, NotMemoryFoldable;
+ EVEX_K, Sched<[MR]>;
}
}
}
@@ -9308,11 +9305,11 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss", SSEPackedSingle>,
AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd", SSEPackedDouble>,
- AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
+ AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss", SSEPackedSingle>,
AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd", SSEPackedDouble>,
- AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
+ AVX512PDIi8Base, REX_W, EVEX_CD8<64, CD8VT1>;
}
let Defs = [EFLAGS], Predicates = [HasAVX512] in {
@@ -9321,27 +9318,27 @@ let Defs = [EFLAGS], Predicates = [HasAVX512] in {
EVEX_CD8<32, CD8VT1>;
defm VUCOMISDZ : sse12_ord_cmp<0x2E, FR64X, X86any_fcmp, f64, f64mem, loadf64,
"ucomisd", SSEPackedDouble>, PD, EVEX,
- VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+ VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCOMISSZ : sse12_ord_cmp<0x2F, FR32X, X86strict_fcmps, f32, f32mem, loadf32,
"comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VCOMISDZ : sse12_ord_cmp<0x2F, FR64X, X86strict_fcmps, f64, f64mem, loadf64,
"comisd", SSEPackedDouble>, PD, EVEX,
- VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+ VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
let isCodeGenOnly = 1 in {
defm VUCOMISSZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v4f32, ssmem,
sse_load_f32, "ucomiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VUCOMISDZ : sse12_ord_cmp_int<0x2E, VR128X, X86ucomi, v2f64, sdmem,
sse_load_f64, "ucomisd", SSEPackedDouble>, PD, EVEX,
- VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+ VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCOMISSZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v4f32, ssmem,
sse_load_f32, "comiss", SSEPackedSingle>, PS, EVEX, VEX_LIG,
EVEX_CD8<32, CD8VT1>;
defm VCOMISDZ : sse12_ord_cmp_int<0x2F, VR128X, X86comi, v2f64, sdmem,
sse_load_f64, "comisd", SSEPackedDouble>, PD, EVEX,
- VEX_LIG, VEX_W, EVEX_CD8<64, CD8VT1>;
+ VEX_LIG, REX_W, EVEX_CD8<64, CD8VT1>;
}
}
@@ -9399,13 +9396,13 @@ defm VRCP14SSZ : avx512_fp14_s<0x4D, "vrcp14ss", X86rcp14s, SchedWriteFRcp.Scl,
f32x_info>, EVEX_CD8<32, CD8VT1>,
T8PD;
defm VRCP14SDZ : avx512_fp14_s<0x4D, "vrcp14sd", X86rcp14s, SchedWriteFRcp.Scl,
- f64x_info>, VEX_W, EVEX_CD8<64, CD8VT1>,
+ f64x_info>, REX_W, EVEX_CD8<64, CD8VT1>,
T8PD;
defm VRSQRT14SSZ : avx512_fp14_s<0x4F, "vrsqrt14ss", X86rsqrt14s,
SchedWriteFRsqrt.Scl, f32x_info>,
EVEX_CD8<32, CD8VT1>, T8PD;
defm VRSQRT14SDZ : avx512_fp14_s<0x4F, "vrsqrt14sd", X86rsqrt14s,
- SchedWriteFRsqrt.Scl, f64x_info>, VEX_W,
+ SchedWriteFRsqrt.Scl, f64x_info>, REX_W,
EVEX_CD8<64, CD8VT1>, T8PD;
}
@@ -9437,7 +9434,7 @@ multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm 14PSZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14ps"), OpNode, sched.ZMM,
v16f32_info>, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm 14PDZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"), OpNode, sched.ZMM,
- v8f64_info>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ v8f64_info>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
}
let Predicates = [HasFP16] in
defm PHZ : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"), OpNode, sched.ZMM,
@@ -9453,10 +9450,10 @@ multiclass avx512_fp14_p_vl_all<bits<8> opc, string OpcodeStr, SDNode OpNode,
EVEX_V256, EVEX_CD8<32, CD8VF>;
defm 14PDZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
OpNode, sched.XMM, v2f64x_info>,
- EVEX_V128, VEX_W, EVEX_CD8<64, CD8VF>;
+ EVEX_V128, REX_W, EVEX_CD8<64, CD8VF>;
defm 14PDZ256 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "14pd"),
OpNode, sched.YMM, v4f64x_info>,
- EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;
+ EVEX_V256, REX_W, EVEX_CD8<64, CD8VF>;
}
let Predicates = [HasFP16, HasVLX] in {
defm PHZ128 : avx512_fp14_p<opc, !strconcat(OpcodeStr, "ph"),
@@ -9501,7 +9498,7 @@ multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
sched>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
defm SDZ : avx512_fp28_s<opc, OpcodeStr#"sd", f64x_info, OpNode, OpNodeSAE,
- sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W, T8PD, EVEX_4V;
+ sched>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD, EVEX_4V;
}
multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -9563,7 +9560,7 @@ multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_fp28_p<opc, OpcodeStr#"pd", v8f64_info, OpNode, sched.ZMM>,
avx512_fp28_p_sae<opc, OpcodeStr#"pd", v8f64_info, OpNodeSAE, sched.ZMM>,
- T8PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>;
+ T8PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
@@ -9578,10 +9575,10 @@ multiclass avx512_fp_unaryop_packed<bits<8> opc, string OpcodeStr,
EVEX_V256, T8PD, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_fp28_p<opc, OpcodeStr#"pd", v2f64x_info, OpNode,
sched.XMM>,
- EVEX_V128, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
+ EVEX_V128, REX_W, T8PD, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_fp28_p<opc, OpcodeStr#"pd", v4f64x_info, OpNode,
sched.YMM>,
- EVEX_V256, VEX_W, T8PD, EVEX_CD8<64, CD8VF>;
+ EVEX_V256, REX_W, T8PD, EVEX_CD8<64, CD8VF>;
}
}
@@ -9664,7 +9661,7 @@ multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
sched.PD.ZMM, v8f64_info>,
- EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>;
// Define only if AVX512VL feature is present.
let Predicates = [HasVLX] in {
defm PSZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "ps"),
@@ -9675,10 +9672,10 @@ multiclass avx512_sqrt_packed_all<bits<8> opc, string OpcodeStr,
EVEX_V256, PS, EVEX_CD8<32, CD8VF>;
defm PDZ128 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
sched.PD.XMM, v2f64x_info>,
- EVEX_V128, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ EVEX_V128, REX_W, PD, EVEX_CD8<64, CD8VF>;
defm PDZ256 : avx512_sqrt_packed<opc, !strconcat(OpcodeStr, "pd"),
sched.PD.YMM, v4f64x_info>,
- EVEX_V256, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ EVEX_V256, REX_W, PD, EVEX_CD8<64, CD8VF>;
}
}
@@ -9694,7 +9691,7 @@ multiclass avx512_sqrt_packed_all_round<bits<8> opc, string OpcodeStr,
EVEX_V512, PS, EVEX_CD8<32, CD8VF>;
defm PDZ : avx512_sqrt_packed_round<opc, !strconcat(OpcodeStr, "pd"),
sched.PD.ZMM, v8f64_info>,
- EVEX_V512, VEX_W, PD, EVEX_CD8<64, CD8VF>;
+ EVEX_V512, REX_W, PD, EVEX_CD8<64, CD8VF>;
}
multiclass avx512_sqrt_scalar<bits<8> opc, string OpcodeStr, X86FoldableSchedWrite sched,
@@ -9754,7 +9751,7 @@ multiclass avx512_sqrt_scalar_all<bits<8> opc, string OpcodeStr,
defm SSZ : avx512_sqrt_scalar<opc, OpcodeStr#"ss", sched.PS.Scl, f32x_info, NAME#"SS">,
EVEX_CD8<32, CD8VT1>, EVEX_4V, XS;
defm SDZ : avx512_sqrt_scalar<opc, OpcodeStr#"sd", sched.PD.Scl, f64x_info, NAME#"SD">,
- EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, VEX_W;
+ EVEX_CD8<64, CD8VT1>, EVEX_4V, XD, REX_W;
}
defm VSQRT : avx512_sqrt_packed_all<0x51, "vsqrt", SchedWriteFSqrtSizes>,
@@ -9828,7 +9825,7 @@ defm VRNDSCALESSZ : avx512_rndscale_scalar<0x0A, "vrndscaless",
defm VRNDSCALESDZ : avx512_rndscale_scalar<0x0B, "vrndscalesd",
SchedWriteFRnd.Scl, f64x_info>,
- VEX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
+ REX_W, AVX512AIi8Base, EVEX_4V, VEX_LIG,
EVEX_CD8<64, CD8VT1>;
multiclass avx512_masked_scalar<SDNode OpNode, string OpcPrefix, SDNode Move,
@@ -9915,7 +9912,7 @@ multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
(ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>,
- EVEX, EVEX_K, Sched<[sched.Folded]>, NotMemoryFoldable;
+ EVEX, EVEX_K, Sched<[sched.Folded]>;
}//mayStore = 1, hasSideEffects = 0
}
@@ -10185,16 +10182,16 @@ multiclass avx512_pmovx_bw<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX, HasBWI] in {
defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v8i16x_info,
v16i8x_info, i64mem, LdFrag, InVecNode>,
- EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
+ EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, WIG;
defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v16i16x_info,
v16i8x_info, i128mem, LdFrag, OpNode>,
- EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
+ EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, WIG;
}
let Predicates = [HasBWI] in {
defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v32i16_info,
v32i8x_info, i256mem, LdFrag, OpNode>,
- EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
+ EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, WIG;
}
}
@@ -10205,16 +10202,16 @@ multiclass avx512_pmovx_bd<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
v16i8x_info, i32mem, LdFrag, InVecNode>,
- EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
+ EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, WIG;
defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
v16i8x_info, i64mem, LdFrag, InVecNode>,
- EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
+ EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, WIG;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
v16i8x_info, i128mem, LdFrag, OpNode>,
- EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
+ EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, WIG;
}
}
@@ -10225,16 +10222,16 @@ multiclass avx512_pmovx_bq<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
v16i8x_info, i16mem, LdFrag, InVecNode>,
- EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG;
+ EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, WIG;
defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
v16i8x_info, i32mem, LdFrag, InVecNode>,
- EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG;
+ EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, WIG;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
v16i8x_info, i64mem, LdFrag, InVecNode>,
- EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG;
+ EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, WIG;
}
}
@@ -10245,16 +10242,16 @@ multiclass avx512_pmovx_wd<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v4i32x_info,
v8i16x_info, i64mem, LdFrag, InVecNode>,
- EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG;
+ EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, WIG;
defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v8i32x_info,
v8i16x_info, i128mem, LdFrag, OpNode>,
- EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG;
+ EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, WIG;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v16i32_info,
v16i16x_info, i256mem, LdFrag, OpNode>,
- EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG;
+ EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, WIG;
}
}
@@ -10265,16 +10262,16 @@ multiclass avx512_pmovx_wq<bits<8> opc, string OpcodeStr,
let Predicates = [HasVLX, HasAVX512] in {
defm Z128: avx512_pmovx_common<opc, OpcodeStr, sched.XMM, v2i64x_info,
v8i16x_info, i32mem, LdFrag, InVecNode>,
- EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG;
+ EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, WIG;
defm Z256: avx512_pmovx_common<opc, OpcodeStr, sched.YMM, v4i64x_info,
v8i16x_info, i64mem, LdFrag, InVecNode>,
- EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG;
+ EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, WIG;
}
let Predicates = [HasAVX512] in {
defm Z : avx512_pmovx_common<opc, OpcodeStr, sched.ZMM, v8i64_info,
v8i16x_info, i128mem, LdFrag, OpNode>,
- EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG;
+ EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, WIG;
}
}
@@ -10453,18 +10450,18 @@ multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME#D#SUFF#Z: avx512_gather<dopc, OpcodeStr#"d", _.info512,
- vy512xmem>, EVEX_V512, VEX_W;
+ vy512xmem>, EVEX_V512, REX_W;
defm NAME#Q#SUFF#Z: avx512_gather<qopc, OpcodeStr#"q", _.info512,
- vz512mem>, EVEX_V512, VEX_W;
+ vz512mem>, EVEX_V512, REX_W;
let Predicates = [HasVLX] in {
defm NAME#D#SUFF#Z256: avx512_gather<dopc, OpcodeStr#"d", _.info256,
- vx256xmem>, EVEX_V256, VEX_W;
+ vx256xmem>, EVEX_V256, REX_W;
defm NAME#Q#SUFF#Z256: avx512_gather<qopc, OpcodeStr#"q", _.info256,
- vy256xmem>, EVEX_V256, VEX_W;
+ vy256xmem>, EVEX_V256, REX_W;
defm NAME#D#SUFF#Z128: avx512_gather<dopc, OpcodeStr#"d", _.info128,
- vx128xmem>, EVEX_V128, VEX_W;
+ vx128xmem>, EVEX_V128, REX_W;
defm NAME#Q#SUFF#Z128: avx512_gather<qopc, OpcodeStr#"q", _.info128,
- vx128xmem>, EVEX_V128, VEX_W;
+ vx128xmem>, EVEX_V128, REX_W;
}
}
@@ -10496,7 +10493,7 @@ defm VPGATHER : avx512_gather_q_pd<0x90, 0x91, avx512vl_i64_info, "vpgather", "Q
multiclass avx512_scatter<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86MemOperand memop, RegisterClass MaskRC = _.KRCWM> {
-let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
+let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
hasSideEffects = 0 in
def mr : AVX5128I<opc, MRMDestMem, (outs MaskRC:$mask_wb),
@@ -10510,18 +10507,18 @@ let mayStore = 1, Constraints = "$mask = $mask_wb", ExeDomain = _.ExeDomain,
multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
AVX512VLVectorVTInfo _, string OpcodeStr, string SUFF> {
defm NAME#D#SUFF#Z: avx512_scatter<dopc, OpcodeStr#"d", _.info512,
- vy512xmem>, EVEX_V512, VEX_W;
+ vy512xmem>, EVEX_V512, REX_W;
defm NAME#Q#SUFF#Z: avx512_scatter<qopc, OpcodeStr#"q", _.info512,
- vz512mem>, EVEX_V512, VEX_W;
+ vz512mem>, EVEX_V512, REX_W;
let Predicates = [HasVLX] in {
defm NAME#D#SUFF#Z256: avx512_scatter<dopc, OpcodeStr#"d", _.info256,
- vx256xmem>, EVEX_V256, VEX_W;
+ vx256xmem>, EVEX_V256, REX_W;
defm NAME#Q#SUFF#Z256: avx512_scatter<qopc, OpcodeStr#"q", _.info256,
- vy256xmem>, EVEX_V256, VEX_W;
+ vy256xmem>, EVEX_V256, REX_W;
defm NAME#D#SUFF#Z128: avx512_scatter<dopc, OpcodeStr#"d", _.info128,
- vx128xmem>, EVEX_V128, VEX_W;
+ vx128xmem>, EVEX_V128, REX_W;
defm NAME#Q#SUFF#Z128: avx512_scatter<qopc, OpcodeStr#"q", _.info128,
- vx128xmem>, EVEX_V128, VEX_W;
+ vx128xmem>, EVEX_V128, REX_W;
}
}
@@ -10565,10 +10562,10 @@ defm VGATHERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qps",
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VGATHERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dpd",
- VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
+ VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM1m, "vgatherpf0qpd",
- VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
defm VGATHERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
@@ -10577,10 +10574,10 @@ defm VGATHERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qps",
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VGATHERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM2m, "vgatherpf1dpd",
- VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
+ VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
defm VGATHERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM2m, "vgatherpf1qpd",
- VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
@@ -10589,10 +10586,10 @@ defm VSCATTERPF0QPS: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qps
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF0DPD: avx512_gather_scatter_prefetch<0xC6, MRM5m, "vscatterpf0dpd",
- VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
+ VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF0QPD: avx512_gather_scatter_prefetch<0xC7, MRM5m, "vscatterpf0qpd",
- VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF1DPS: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dps",
VK16WM, vz512mem>, EVEX_V512, EVEX_CD8<32, CD8VT1>;
@@ -10601,10 +10598,10 @@ defm VSCATTERPF1QPS: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qps
VK8WM, vz256mem>, EVEX_V512, EVEX_CD8<64, CD8VT1>;
defm VSCATTERPF1DPD: avx512_gather_scatter_prefetch<0xC6, MRM6m, "vscatterpf1dpd",
- VK8WM, vy512xmem>, EVEX_V512, VEX_W, EVEX_CD8<32, CD8VT1>;
+ VK8WM, vy512xmem>, EVEX_V512, REX_W, EVEX_CD8<32, CD8VT1>;
defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd",
- VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
+ VK8WM, vz512mem>, EVEX_V512, REX_W, EVEX_CD8<64, CD8VT1>;
multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr, SchedWrite Sched> {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
@@ -10625,9 +10622,9 @@ let Predicates = [prd] in
}
defm VPMOVM2B : cvt_mask_by_elt_width<0x28, avx512vl_i8_info, "vpmovm2" , HasBWI>;
-defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , VEX_W;
+defm VPMOVM2W : cvt_mask_by_elt_width<0x28, avx512vl_i16_info, "vpmovm2", HasBWI> , REX_W;
defm VPMOVM2D : cvt_mask_by_elt_width<0x38, avx512vl_i32_info, "vpmovm2", HasDQI>;
-defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , VEX_W;
+defm VPMOVM2Q : cvt_mask_by_elt_width<0x38, avx512vl_i64_info, "vpmovm2", HasDQI> , REX_W;
multiclass convert_vector_to_mask_common<bits<8> opc, X86VectorVTInfo _, string OpcodeStr > {
def rr : AVX512XS8I<opc, MRMSrcReg, (outs _.KRC:$dst), (ins _.RC:$src),
@@ -10670,11 +10667,11 @@ multiclass avx512_convert_vector_to_mask<bits<8> opc, string OpcodeStr,
defm VPMOVB2M : avx512_convert_vector_to_mask<0x29, "vpmovb2m",
avx512vl_i8_info, HasBWI>;
defm VPMOVW2M : avx512_convert_vector_to_mask<0x29, "vpmovw2m",
- avx512vl_i16_info, HasBWI>, VEX_W;
+ avx512vl_i16_info, HasBWI>, REX_W;
defm VPMOVD2M : avx512_convert_vector_to_mask<0x39, "vpmovd2m",
avx512vl_i32_info, HasDQI>;
defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
- avx512vl_i64_info, HasDQI>, VEX_W;
+ avx512vl_i64_info, HasDQI>, REX_W;
// Patterns for handling sext from a mask register to v16i8/v16i16 when DQI
// is available, but BWI is not. We can't handle this in lowering because
@@ -10748,13 +10745,13 @@ multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
// FIXME: Is there a better scheduler class for VPCOMPRESS?
defm VPCOMPRESSD : compress_by_elt_width <0x8B, "vpcompressd", WriteVarShuffle256,
- avx512vl_i32_info>, EVEX, NotMemoryFoldable;
+ avx512vl_i32_info>, EVEX;
defm VPCOMPRESSQ : compress_by_elt_width <0x8B, "vpcompressq", WriteVarShuffle256,
- avx512vl_i64_info>, EVEX, VEX_W, NotMemoryFoldable;
+ avx512vl_i64_info>, EVEX, REX_W;
defm VCOMPRESSPS : compress_by_elt_width <0x8A, "vcompressps", WriteVarShuffle256,
- avx512vl_f32_info>, EVEX, NotMemoryFoldable;
+ avx512vl_f32_info>, EVEX;
defm VCOMPRESSPD : compress_by_elt_width <0x8A, "vcompresspd", WriteVarShuffle256,
- avx512vl_f64_info>, EVEX, VEX_W, NotMemoryFoldable;
+ avx512vl_f64_info>, EVEX, REX_W;
// expand
multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
@@ -10814,11 +10811,11 @@ multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
defm VPEXPANDD : expand_by_elt_width <0x89, "vpexpandd", WriteVarShuffle256,
avx512vl_i32_info>, EVEX;
defm VPEXPANDQ : expand_by_elt_width <0x89, "vpexpandq", WriteVarShuffle256,
- avx512vl_i64_info>, EVEX, VEX_W;
+ avx512vl_i64_info>, EVEX, REX_W;
defm VEXPANDPS : expand_by_elt_width <0x88, "vexpandps", WriteVarShuffle256,
avx512vl_f32_info>, EVEX;
defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", WriteVarShuffle256,
- avx512vl_f64_info>, EVEX, VEX_W;
+ avx512vl_f64_info>, EVEX, REX_W;
//handle instruction reg_vec1 = op(reg_vec,imm)
// op(mem_vec,imm)
@@ -11081,7 +11078,7 @@ multiclass avx512_common_unary_fp_sae_packed_imm_all<string OpcodeStr,
AVX512AIi8Base, EVEX, EVEX_CD8<32, CD8VF>;
defm PD : avx512_common_unary_fp_sae_packed_imm<OpcodeStr, avx512vl_f64_info,
opcPd, OpNode, MaskOpNode, OpNodeSAE, sched, prd>,
- AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, VEX_W;
+ AVX512AIi8Base, EVEX, EVEX_CD8<64, CD8VF>, REX_W;
}
defm VREDUCE : avx512_common_unary_fp_sae_packed_imm_all<"vreduce", 0x56, 0x56,
@@ -11097,7 +11094,7 @@ defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26
defm VRANGEPD : avx512_common_fp_sae_packed_imm<"vrangepd", avx512vl_f64_info,
0x50, X86VRange, X86VRangeSAE,
SchedWriteFAdd, HasDQI>,
- AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+ AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
0x50, X86VRange, X86VRangeSAE,
SchedWriteFAdd, HasDQI>,
@@ -11105,14 +11102,14 @@ defm VRANGEPS : avx512_common_fp_sae_packed_imm<"vrangeps", avx512vl_f32_info,
defm VRANGESD: avx512_common_fp_sae_scalar_imm<"vrangesd",
f64x_info, 0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
defm VRANGESS: avx512_common_fp_sae_scalar_imm<"vrangess", f32x_info,
0x51, X86Ranges, X86RangesSAE, SchedWriteFAdd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VREDUCESD: avx512_common_fp_sae_scalar_imm<"vreducesd", f64x_info,
0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
defm VREDUCESS: avx512_common_fp_sae_scalar_imm<"vreducess", f32x_info,
0x57, X86Reduces, X86ReducesSAE, SchedWriteFRnd, HasDQI>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
@@ -11122,7 +11119,7 @@ defm VREDUCESH: avx512_common_fp_sae_scalar_imm<"vreducesh", f16x_info,
defm VGETMANTSD: avx512_common_fp_sae_scalar_imm<"vgetmantsd", f64x_info,
0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
0x27, X86GetMants, X86GetMantsSAE, SchedWriteFRnd, HasAVX512>,
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
@@ -11184,11 +11181,11 @@ multiclass avx512_shuff_packed_128<string OpcodeStr, X86FoldableSchedWrite sched
defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", WriteFShuffle256,
avx512vl_f32_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", WriteFShuffle256,
- avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+ avx512vl_f64_info, avx512vl_f64_info, 0x23, "VPERM2F128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", WriteFShuffle256,
avx512vl_i32_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", WriteFShuffle256,
- avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+ avx512vl_i64_info, avx512vl_i64_info, 0x43, "VPERM2I128">, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
multiclass avx512_valign<bits<8> opc, string OpcodeStr,
X86FoldableSchedWrite sched, X86VectorVTInfo _>{
@@ -11240,7 +11237,7 @@ defm VALIGND: avx512_valign_common<"valignd", SchedWriteShuffle,
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VALIGNQ: avx512_valign_common<"valignq", SchedWriteShuffle,
avx512vl_i64_info>, EVEX_CD8<64, CD8VF>,
- VEX_W;
+ REX_W;
defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr",
SchedWriteShuffle, avx512vl_i8_info,
@@ -11428,7 +11425,7 @@ multiclass avx512_unary_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
SDNode OpNode, X86SchedWriteWidths sched,
Predicate prd> {
defm Q : avx512_unary_rmb_vl<opc_q, OpcodeStr#"q", OpNode, sched,
- avx512vl_i64_info, prd>, VEX_W;
+ avx512vl_i64_info, prd>, REX_W;
defm D : avx512_unary_rmb_vl<opc_d, OpcodeStr#"d", OpNode, sched,
avx512vl_i32_info, prd>;
}
@@ -11437,9 +11434,9 @@ multiclass avx512_unary_rm_vl_bw<bits<8> opc_b, bits<8> opc_w, string OpcodeStr,
SDNode OpNode, X86SchedWriteWidths sched,
Predicate prd> {
defm W : avx512_unary_rm_vl<opc_w, OpcodeStr#"w", OpNode, sched,
- avx512vl_i16_info, prd>, VEX_WIG;
+ avx512vl_i16_info, prd>, WIG;
defm B : avx512_unary_rm_vl<opc_b, OpcodeStr#"b", OpNode, sched,
- avx512vl_i8_info, prd>, VEX_WIG;
+ avx512vl_i8_info, prd>, WIG;
}
multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
@@ -11563,7 +11560,7 @@ multiclass avx512_movddup_common<bits<8> opc, string OpcodeStr,
multiclass avx512_movddup<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched> {
defm NAME: avx512_movddup_common<opc, OpcodeStr, sched,
- avx512vl_f64_info>, XD, VEX_W;
+ avx512vl_f64_info>, XD, REX_W;
}
defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", SchedWriteFShuffle>;
@@ -11650,8 +11647,7 @@ multiclass avx512_extract_elt_w<string OpcodeStr, X86VectorVTInfo _> {
def rr_REV : AVX512Ii8<0x15, MRMDestReg, (outs GR32orGR64:$dst),
(ins _.RC:$src1, u8imm:$src2),
OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- EVEX, TAPD, FoldGenData<NAME#rr>,
- Sched<[WriteVecExtract]>;
+ EVEX, TAPD, Sched<[WriteVecExtract]>;
defm NAME : avx512_extract_elt_bw_m<0x15, OpcodeStr, X86pextrw, _>, TAPD;
}
@@ -11677,10 +11673,10 @@ multiclass avx512_extract_elt_dq<string OpcodeStr, X86VectorVTInfo _,
}
}
-defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, VEX_WIG;
-defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, VEX_WIG;
+defm VPEXTRBZ : avx512_extract_elt_b<"vpextrb", v16i8x_info>, WIG;
+defm VPEXTRWZ : avx512_extract_elt_w<"vpextrw", v8i16x_info>, WIG;
defm VPEXTRDZ : avx512_extract_elt_dq<"vpextrd", v4i32x_info, GR32>;
-defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, VEX_W;
+defm VPEXTRQZ : avx512_extract_elt_dq<"vpextrq", v2i64x_info, GR64>, REX_W;
multiclass avx512_insert_elt_m<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, PatFrag LdFrag,
@@ -11723,11 +11719,11 @@ multiclass avx512_insert_elt_dq<bits<8> opc, string OpcodeStr,
}
defm VPINSRBZ : avx512_insert_elt_bw<0x20, "vpinsrb", X86pinsrb, v16i8x_info,
- extloadi8>, TAPD, VEX_WIG;
+ extloadi8>, TAPD, WIG;
defm VPINSRWZ : avx512_insert_elt_bw<0xC4, "vpinsrw", X86pinsrw, v8i16x_info,
- extloadi16>, PD, VEX_WIG;
+ extloadi16>, PD, WIG;
defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
-defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
+defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, REX_W;
let Predicates = [HasAVX512, NoBWI] in {
def : Pat<(X86pinsrb VR128:$src1,
@@ -11768,7 +11764,7 @@ multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_FP>{
}
defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_f32_info>, PS;
-defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, VEX_W;
+defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_f64_info>, PD, REX_W;
//===----------------------------------------------------------------------===//
// AVX-512 - Byte shift Left/Right
@@ -11806,10 +11802,10 @@ multiclass avx512_shift_packed_all<bits<8> opc, SDNode OpNode, Format MRMr,
}
defm VPSLLDQ : avx512_shift_packed_all<0x73, X86vshldq, MRM7r, MRM7m, "vpslldq",
SchedWriteShuffle, HasBWI>,
- AVX512PDIi8Base, EVEX_4V, VEX_WIG;
+ AVX512PDIi8Base, EVEX_4V, WIG;
defm VPSRLDQ : avx512_shift_packed_all<0x73, X86vshrdq, MRM3r, MRM3m, "vpsrldq",
SchedWriteShuffle, HasBWI>,
- AVX512PDIi8Base, EVEX_4V, VEX_WIG;
+ AVX512PDIi8Base, EVEX_4V, WIG;
multiclass avx512_psadbw_packed<bits<8> opc, SDNode OpNode,
string OpcodeStr, X86FoldableSchedWrite sched,
@@ -11847,7 +11843,7 @@ multiclass avx512_psadbw_packed_all<bits<8> opc, SDNode OpNode,
}
defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
- SchedWritePSADBW, HasBWI>, EVEX_4V, VEX_WIG;
+ SchedWritePSADBW, HasBWI>, EVEX_4V, WIG;
// Transforms to swizzle an immediate to enable better matching when
// memory operand isn't in the right place.
@@ -12073,7 +12069,7 @@ multiclass avx512_common_ternlog<string OpcodeStr, X86SchedWriteWidths sched,
defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", SchedWriteVecALU,
avx512vl_i32_info>;
defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
- avx512vl_i64_info>, VEX_W;
+ avx512vl_i64_info>, REX_W;
// Patterns to implement vnot using vpternlog instead of creating all ones
// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
@@ -12281,11 +12277,11 @@ defm VFIXUPIMMSSZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<32, CD8VT1>;
defm VFIXUPIMMSDZ : avx512_fixupimm_scalar<0x55, "vfixupimm",
SchedWriteFAdd.Scl, f64x_info, v2i64x_info>,
- AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;
+ AVX512AIi8Base, VEX_LIG, EVEX_4V, EVEX_CD8<64, CD8VT1>, REX_W;
defm VFIXUPIMMPS : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f32_info,
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm VFIXUPIMMPD : avx512_fixupimm_packed_all<SchedWriteFAdd, avx512vl_f64_info,
- avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, VEX_W;
+ avx512vl_i64_info>, EVEX_CD8<64, CD8VF>, REX_W;
// Patterns used to select SSE scalar fp arithmetic instructions from
// either:
@@ -12428,17 +12424,17 @@ multiclass avx512_vaes<bits<8> Op, string OpStr, string IntPrefix> {
defm Z128 : AESI_binop_rm_int<Op, OpStr,
!cast<Intrinsic>(IntPrefix),
loadv2i64, 0, VR128X, i128mem>,
- EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, VEX_WIG;
+ EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V128, WIG;
defm Z256 : AESI_binop_rm_int<Op, OpStr,
!cast<Intrinsic>(IntPrefix#"_256"),
loadv4i64, 0, VR256X, i256mem>,
- EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, VEX_WIG;
+ EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V256, WIG;
}
let Predicates = [HasAVX512, HasVAES] in
defm Z : AESI_binop_rm_int<Op, OpStr,
!cast<Intrinsic>(IntPrefix#"_512"),
loadv8i64, 0, VR512, i512mem>,
- EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, VEX_WIG;
+ EVEX_4V, EVEX_CD8<64, CD8VF>, EVEX_V512, WIG;
}
defm VAESENC : avx512_vaes<0xDC, "vaesenc", "int_x86_aesni_aesenc">;
@@ -12452,15 +12448,15 @@ defm VAESDECLAST : avx512_vaes<0xDF, "vaesdeclast", "int_x86_aesni_aesdeclast">
let Predicates = [HasAVX512, HasVPCLMULQDQ] in
defm VPCLMULQDQZ : vpclmulqdq<VR512, i512mem, loadv8i64, int_x86_pclmulqdq_512>,
- EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, VEX_WIG;
+ EVEX_4V, EVEX_V512, EVEX_CD8<64, CD8VF>, WIG;
let Predicates = [HasVLX, HasVPCLMULQDQ] in {
defm VPCLMULQDQZ128 : vpclmulqdq<VR128X, i128mem, loadv2i64, int_x86_pclmulqdq>,
- EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, VEX_WIG;
+ EVEX_4V, EVEX_V128, EVEX_CD8<64, CD8VF>, WIG;
defm VPCLMULQDQZ256: vpclmulqdq<VR256X, i256mem, loadv4i64,
int_x86_pclmulqdq_256>, EVEX_4V, EVEX_V256,
- EVEX_CD8<64, CD8VF>, VEX_WIG;
+ EVEX_CD8<64, CD8VF>, WIG;
}
// Aliases
@@ -12534,22 +12530,22 @@ multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
SDNode OpNode, X86SchedWriteWidths sched> {
defm W : VBMI2_shift_var_rm_common<wOp, Prefix#"w", OpNode, sched,
- avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+ avx512vl_i16_info>, REX_W, EVEX_CD8<16, CD8VF>;
defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix#"d", OpNode, sched,
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix#"q", OpNode, sched,
- avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+ avx512vl_i64_info>, REX_W, EVEX_CD8<64, CD8VF>;
}
multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
SDNode OpNode, X86SchedWriteWidths sched> {
defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix#"w", sched,
avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
- VEX_W, EVEX_CD8<16, CD8VF>;
+ REX_W, EVEX_CD8<16, CD8VF>;
defm D : avx512_common_3Op_imm8<Prefix#"d", avx512vl_i32_info, dqOp,
OpNode, sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
defm Q : avx512_common_3Op_imm8<Prefix#"q", avx512vl_i64_info, dqOp, OpNode,
- sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+ sched, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, REX_W;
}
// Concat & Shift
@@ -12560,16 +12556,14 @@ defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SchedWriteVecIMul
// Compress
defm VPCOMPRESSB : compress_by_elt_width<0x63, "vpcompressb", WriteVarShuffle256,
- avx512vl_i8_info, HasVBMI2>, EVEX,
- NotMemoryFoldable;
+ avx512vl_i8_info, HasVBMI2>, EVEX;
defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", WriteVarShuffle256,
- avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W,
- NotMemoryFoldable;
+ avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
// Expand
defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", WriteVarShuffle256,
avx512vl_i8_info, HasVBMI2>, EVEX;
defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", WriteVarShuffle256,
- avx512vl_i16_info, HasVBMI2>, EVEX, VEX_W;
+ avx512vl_i16_info, HasVBMI2>, EVEX, REX_W;
//===----------------------------------------------------------------------===//
// VNNI
@@ -12593,7 +12587,8 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.LdFrag addr:$src3))))>,
EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD,
- Sched<[sched.Folded, sched.ReadAfterFold]>;
+ Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
defm mb : AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
OpStr, "${src3}"#VTI.BroadcastStr#", $src2",
@@ -12601,7 +12596,8 @@ multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
(VTI.VT (VTI.BroadcastLdFrag addr:$src3)))>,
EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B,
- T8PD, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ T8PD, Sched<[sched.Folded, sched.ReadAfterFold,
+ sched.ReadAfterFold]>;
}
}
@@ -12656,7 +12652,7 @@ let Predicates = [HasVNNI,HasVLX] in {
defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, SchedWriteVecALU,
avx512vl_i8_info, HasBITALG>;
defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, SchedWriteVecALU,
- avx512vl_i16_info, HasBITALG>, VEX_W;
+ avx512vl_i16_info, HasBITALG>, REX_W;
defm : avx512_unary_lowering<"VPOPCNTB", ctpop, avx512vl_i8_info, HasBITALG>;
defm : avx512_unary_lowering<"VPOPCNTW", ctpop, avx512vl_i16_info, HasBITALG>;
@@ -12751,10 +12747,10 @@ multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
defm VGF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
X86GF2P8affineinvqb, SchedWriteVecIMul>,
- EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
+ EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
defm VGF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
X86GF2P8affineqb, SchedWriteVecIMul>,
- EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
+ EVEX_4V, EVEX_CD8<8, CD8VF>, REX_W, AVX512AIi8Base;
//===----------------------------------------------------------------------===//
@@ -12860,7 +12856,7 @@ multiclass avx512_vp2intersect<X86SchedWriteWidths sched, AVX512VLVectorVTInfo _
}
defm VP2INTERSECTD : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i32_info>;
-defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, VEX_W;
+defm VP2INTERSECTQ : avx512_vp2intersect<SchedWriteVecALU, avx512vl_i64_info>, REX_W;
multiclass avx512_binop_all2<bits<8> opc, string OpcodeStr,
X86SchedWriteWidths sched,
@@ -12969,6 +12965,27 @@ let Predicates = [HasBF16, HasVLX] in {
(VCVTNEPS2BF16Z256rr VR256X:$src)>;
def : Pat<(v8bf16 (int_x86_vcvtneps2bf16256 (loadv8f32 addr:$src))),
(VCVTNEPS2BF16Z256rm addr:$src)>;
+
+ def : Pat<(v8bf16 (X86VBroadcastld16 addr:$src)),
+ (VPBROADCASTWZ128rm addr:$src)>;
+ def : Pat<(v16bf16 (X86VBroadcastld16 addr:$src)),
+ (VPBROADCASTWZ256rm addr:$src)>;
+
+ def : Pat<(v8bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
+ (VPBROADCASTWZ128rr VR128X:$src)>;
+ def : Pat<(v16bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
+ (VPBROADCASTWZ256rr VR128X:$src)>;
+
+ // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
+}
+
+let Predicates = [HasBF16] in {
+ def : Pat<(v32bf16 (X86VBroadcastld16 addr:$src)),
+ (VPBROADCASTWZrm addr:$src)>;
+
+ def : Pat<(v32bf16 (X86VBroadcast (v8bf16 VR128X:$src))),
+ (VPBROADCASTWZrr VR128X:$src)>;
+ // TODO: No scalar broadcast due to we don't support legal scalar bf16 so far.
}
let Constraints = "$src1 = $dst" in {
@@ -13107,9 +13124,9 @@ def : Pat<(i16 (extractelt (v8i16 VR128X:$src), (iPTR 0))),
// Allow "vmovw" to use GR64
let hasSideEffects = 0 in {
def VMOVW64toSHrr : AVX512<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$src),
- "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>;
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveFromGpr]>;
def VMOVSHtoW64rr : AVX512<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
- "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>;
+ "vmovw\t{$src, $dst|$dst, $src}", []>, T_MAP5PD, EVEX, REX_W, Sched<[WriteVecMoveToGpr]>;
}
}
@@ -13389,11 +13406,11 @@ multiclass avx512_cvtqq2ph<bits<8> opc, string OpcodeStr, SDPatternOperator OpNo
}
defm VCVTQQ2PH : avx512_cvtqq2ph<0x5B, "vcvtqq2ph", any_sint_to_fp, sint_to_fp,
- X86VSintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5PS,
+ X86VSintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5PS,
EVEX_CD8<64, CD8VF>;
defm VCVTUQQ2PH : avx512_cvtqq2ph<0x7A, "vcvtuqq2ph", any_uint_to_fp, uint_to_fp,
- X86VUintToFpRnd, SchedWriteCvtDQ2PS>, VEX_W, T_MAP5XD,
+ X86VUintToFpRnd, SchedWriteCvtDQ2PS>, REX_W, T_MAP5XD,
EVEX_CD8<64, CD8VF>;
// Convert half to signed/unsigned int 32/64
@@ -13402,26 +13419,26 @@ defm VCVTSH2SIZ: avx512_cvt_s_int_round<0x2D, f16x_info, i32x_info, X86cvts2si,
T_MAP5XS, EVEX_CD8<16, CD8VT1>;
defm VCVTSH2SI64Z: avx512_cvt_s_int_round<0x2D, f16x_info, i64x_info, X86cvts2si,
X86cvts2siRnd, WriteCvtSS2I, "cvtsh2si", "{q}", HasFP16>,
- T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+ T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
defm VCVTSH2USIZ: avx512_cvt_s_int_round<0x79, f16x_info, i32x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{l}", HasFP16>,
T_MAP5XS, EVEX_CD8<16, CD8VT1>;
defm VCVTSH2USI64Z: avx512_cvt_s_int_round<0x79, f16x_info, i64x_info, X86cvts2usi,
X86cvts2usiRnd, WriteCvtSS2I, "cvtsh2usi", "{q}", HasFP16>,
- T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+ T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
defm VCVTTSH2SIZ: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i32x_info,
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
"{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
defm VCVTTSH2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsh2si", f16x_info, i64x_info,
any_fp_to_sint, X86cvtts2Int, X86cvtts2IntSAE, WriteCvtSS2I,
- "{q}", HasFP16>, VEX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
+ "{q}", HasFP16>, REX_W, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
defm VCVTTSH2USIZ: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i32x_info,
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
"{l}", HasFP16>, T_MAP5XS, EVEX_CD8<16, CD8VT1>;
defm VCVTTSH2USI64Z: avx512_cvt_s_all<0x78, "vcvttsh2usi", f16x_info, i64x_info,
any_fp_to_uint, X86cvtts2UInt, X86cvtts2UIntSAE, WriteCvtSS2I,
- "{q}", HasFP16>, T_MAP5XS, VEX_W, EVEX_CD8<16, CD8VT1>;
+ "{q}", HasFP16>, T_MAP5XS, REX_W, EVEX_CD8<16, CD8VT1>;
let Predicates = [HasFP16] in {
defm VCVTSI2SHZ : avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR32,
@@ -13429,13 +13446,13 @@ let Predicates = [HasFP16] in {
T_MAP5XS, EVEX_CD8<32, CD8VT1>;
defm VCVTSI642SHZ: avx512_vcvtsi_common<0x2A, X86SintToFp, X86SintToFpRnd, WriteCvtI2SS, GR64,
v8f16x_info, i64mem, loadi64, "cvtsi2sh","q">,
- T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+ T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>;
defm VCVTUSI2SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR32,
v8f16x_info, i32mem, loadi32,
"cvtusi2sh","l">, T_MAP5XS, EVEX_CD8<32, CD8VT1>;
defm VCVTUSI642SHZ : avx512_vcvtsi_common<0x7B, X86UintToFp, X86UintToFpRnd, WriteCvtI2SS, GR64,
v8f16x_info, i64mem, loadi64, "cvtusi2sh", "q">,
- T_MAP5XS, VEX_W, EVEX_CD8<64, CD8VT1>;
+ T_MAP5XS, REX_W, EVEX_CD8<64, CD8VT1>;
def : InstAlias<"vcvtsi2sh\t{$src, $src1, $dst|$dst, $src1, $src}",
(VCVTSI2SHZrm_Int VR128X:$dst, VR128X:$src1, i32mem:$src), 0, "att">;
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index f08ecdf6afc9..9cde6f559886 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -37,6 +37,13 @@ def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
[(set GR64:$dst, lea64addr:$src)]>;
} // SchedRW
+// Pseudo instruction for lea that prevent optimizer from eliminating
+// the instruction.
+let SchedRW = [WriteLEA], isPseudo = true, hasSideEffects = 1 in {
+def PLEA32r : PseudoI<(outs GR32:$dst), (ins anymem:$src), []>;
+def PLEA64r : PseudoI<(outs GR64:$dst), (ins anymem:$src), []>;
+}
+
//===----------------------------------------------------------------------===//
// Fixed-Register Multiplication and Division Instructions.
//
@@ -51,593 +58,7 @@ class SchedLoadReg<X86FoldableSchedWrite Sched> : Sched<[Sched.Folded,
// Register reads (implicit or explicit).
Sched.ReadAfterFold, Sched.ReadAfterFold]>;
-// Extra precision multiplication
-
-// AL is really implied by AX, but the registers in Defs must match the
-// SDNode results (i8, i32).
-// AL,AH = AL*GR8
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src",
- // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
- // This probably ought to be moved to a def : Pat<> if the
- // syntax can be accepted.
- [(set AL, (mul AL, GR8:$src)),
- (implicit EFLAGS)]>, Sched<[WriteIMul8]>;
-// AX,DX = AX*GR16
-let Defs = [AX,DX,EFLAGS], Uses = [AX], hasSideEffects = 0 in
-def MUL16r : I<0xF7, MRM4r, (outs), (ins GR16:$src),
- "mul{w}\t$src",
- []>, OpSize16, Sched<[WriteIMul16]>;
-// EAX,EDX = EAX*GR32
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], hasSideEffects = 0 in
-def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src),
- "mul{l}\t$src",
- [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>,
- OpSize32, Sched<[WriteIMul32]>;
-// RAX,RDX = RAX*GR64
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], hasSideEffects = 0 in
-def MUL64r : RI<0xF7, MRM4r, (outs), (ins GR64:$src),
- "mul{q}\t$src",
- [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>,
- Sched<[WriteIMul64]>;
-// AL,AH = AL*[mem8]
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src),
- "mul{b}\t$src",
- // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
- // This probably ought to be moved to a def : Pat<> if the
- // syntax can be accepted.
- [(set AL, (mul AL, (loadi8 addr:$src))),
- (implicit EFLAGS)]>, SchedLoadReg<WriteIMul8>;
-// AX,DX = AX*[mem16]
-let mayLoad = 1, hasSideEffects = 0 in {
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def MUL16m : I<0xF7, MRM4m, (outs), (ins i16mem:$src),
- "mul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16>;
-// EAX,EDX = EAX*[mem32]
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src),
- "mul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32>;
-// RAX,RDX = RAX*[mem64]
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
-def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src),
- "mul{q}\t$src", []>, SchedLoadReg<WriteIMul64>,
- Requires<[In64BitMode]>;
-}
-
-let hasSideEffects = 0 in {
-// AL,AH = AL*GR8
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>,
- Sched<[WriteIMul8]>;
-// AX,DX = AX*GR16
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>,
- OpSize16, Sched<[WriteIMul16]>;
-// EAX,EDX = EAX*GR32
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>,
- OpSize32, Sched<[WriteIMul32]>;
-// RAX,RDX = RAX*GR64
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
-def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>,
- Sched<[WriteIMul64]>;
-
-let mayLoad = 1 in {
-// AL,AH = AL*[mem8]
-let Defs = [AL,EFLAGS,AX], Uses = [AL] in
-def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src),
- "imul{b}\t$src", []>, SchedLoadReg<WriteIMul8>;
-// AX,DX = AX*[mem16]
-let Defs = [AX,DX,EFLAGS], Uses = [AX] in
-def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src),
- "imul{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIMul16>;
-// EAX,EDX = EAX*[mem32]
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
-def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
- "imul{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIMul32>;
-// RAX,RDX = RAX*[mem64]
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
-def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
- "imul{q}\t$src", []>, SchedLoadReg<WriteIMul64>,
- Requires<[In64BitMode]>;
-}
-} // hasSideEffects
-
-
-let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst" in {
-
-let isCommutable = 1 in {
-// X = IMUL Y, Z --> X = IMUL Z, Y
-// Register-Register Signed Integer Multiply
-def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
- "imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, GR16:$src2))]>,
- Sched<[WriteIMul16Reg]>, TB, OpSize16;
-def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
- "imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, GR32:$src2))]>,
- Sched<[WriteIMul32Reg]>, TB, OpSize32;
-def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "imul{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, GR64:$src2))]>,
- Sched<[WriteIMul64Reg]>, TB;
-} // isCommutable
-
-// Register-Memory Signed Integer Multiply
-def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$src1, i16mem:$src2),
- "imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, (loadi16 addr:$src2)))]>,
- Sched<[WriteIMul16Reg.Folded, WriteIMul16Reg.ReadAfterFold]>, TB, OpSize16;
-def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, (loadi32 addr:$src2)))]>,
- Sched<[WriteIMul32Reg.Folded, WriteIMul32Reg.ReadAfterFold]>, TB, OpSize32;
-def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "imul{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, (loadi64 addr:$src2)))]>,
- Sched<[WriteIMul64Reg.Folded, WriteIMul32Reg.ReadAfterFold]>, TB;
-} // Constraints = "$src1 = $dst"
-
-} // Defs = [EFLAGS]
-
-// Surprisingly enough, these are not two address instructions!
-let Defs = [EFLAGS] in {
-// NOTE: These are order specific, we want the ri8 forms to be listed
-// first so that they are slightly preferred to the ri forms.
-
-// Register-Integer Signed Integer Multiply
-def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
- (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
- Sched<[WriteIMul16Imm]>, OpSize16;
-def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag GR16:$src1, imm:$src2))]>,
- Sched<[WriteIMul16Imm]>, OpSize16;
-def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
- (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, imm:$src2))]>,
- Sched<[WriteIMul32Imm]>, OpSize32;
-def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
- (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>,
- Sched<[WriteIMul32Imm]>, OpSize32;
-def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
- (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>,
- Sched<[WriteIMul64Imm]>;
-def IMUL64rri32 : RIi32S<0x69, MRMSrcReg, // GR64 = GR64*I32
- (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>,
- Sched<[WriteIMul64Imm]>;
-
-// Memory-Integer Signed Integer Multiply
-def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
- (outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag (loadi16 addr:$src1),
- i16immSExt8:$src2))]>,
- Sched<[WriteIMul16Imm.Folded]>, OpSize16;
-def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
- (outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
- "imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, EFLAGS,
- (X86smul_flag (loadi16 addr:$src1), imm:$src2))]>,
- Sched<[WriteIMul16Imm.Folded]>, OpSize16;
-def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
- (outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag (loadi32 addr:$src1),
- i32immSExt8:$src2))]>,
- Sched<[WriteIMul32Imm.Folded]>, OpSize32;
-def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
- (outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
- "imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, EFLAGS,
- (X86smul_flag (loadi32 addr:$src1), imm:$src2))]>,
- Sched<[WriteIMul32Imm.Folded]>, OpSize32;
-def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
- (outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag (loadi64 addr:$src1),
- i64immSExt8:$src2))]>,
- Sched<[WriteIMul64Imm.Folded]>;
-def IMUL64rmi32 : RIi32S<0x69, MRMSrcMem, // GR64 = [mem64]*I32
- (outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
- "imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, EFLAGS,
- (X86smul_flag (loadi64 addr:$src1),
- i64immSExt32:$src2))]>,
- Sched<[WriteIMul64Imm.Folded]>;
-} // Defs = [EFLAGS]
-
-// unsigned division/remainder
-let hasSideEffects = 1 in { // so that we don't speculatively execute
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
-def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
- "div{b}\t$src", []>, Sched<[WriteDiv8]>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def DIV16r : I<0xF7, MRM6r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
- "div{w}\t$src", []>, Sched<[WriteDiv16]>, OpSize16;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
- "div{l}\t$src", []>, Sched<[WriteDiv32]>, OpSize32;
-// RDX:RAX/r64 = RAX,RDX
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
-def DIV64r : RI<0xF7, MRM6r, (outs), (ins GR64:$src),
- "div{q}\t$src", []>, Sched<[WriteDiv64]>;
-
-let mayLoad = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
-def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "div{b}\t$src", []>, SchedLoadReg<WriteDiv8>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def DIV16m : I<0xF7, MRM6m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "div{w}\t$src", []>, OpSize16, SchedLoadReg<WriteDiv16>;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
-def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src),
- "div{l}\t$src", []>, SchedLoadReg<WriteDiv32>, OpSize32;
-// RDX:RAX/[mem64] = RAX,RDX
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
-def DIV64m : RI<0xF7, MRM6m, (outs), (ins i64mem:$src),
- "div{q}\t$src", []>, SchedLoadReg<WriteDiv64>,
- Requires<[In64BitMode]>;
-}
-
-// Signed division/remainder.
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
-def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
- "idiv{b}\t$src", []>, Sched<[WriteIDiv8]>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def IDIV16r: I<0xF7, MRM7r, (outs), (ins GR16:$src), // DX:AX/r16 = AX,DX
- "idiv{w}\t$src", []>, Sched<[WriteIDiv16]>, OpSize16;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
-def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX
- "idiv{l}\t$src", []>, Sched<[WriteIDiv32]>, OpSize32;
-// RDX:RAX/r64 = RAX,RDX
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
-def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src),
- "idiv{q}\t$src", []>, Sched<[WriteIDiv64]>;
-
-let mayLoad = 1 in {
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
-def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH
- "idiv{b}\t$src", []>, SchedLoadReg<WriteIDiv8>;
-let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
-def IDIV16m: I<0xF7, MRM7m, (outs), (ins i16mem:$src), // DX:AX/[mem16] = AX,DX
- "idiv{w}\t$src", []>, OpSize16, SchedLoadReg<WriteIDiv16>;
-let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
-def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
- "idiv{l}\t$src", []>, OpSize32, SchedLoadReg<WriteIDiv32>;
-let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
-def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
- "idiv{q}\t$src", []>, SchedLoadReg<WriteIDiv64>,
- Requires<[In64BitMode]>;
-}
-} // hasSideEffects = 0
-
-//===----------------------------------------------------------------------===//
-// Two address Instructions.
-//
-
-// unary instructions
-let CodeSize = 2 in {
-let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
-def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
- "neg{b}\t$dst",
- [(set GR8:$dst, (ineg GR8:$src1)),
- (implicit EFLAGS)]>;
-def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
- "neg{w}\t$dst",
- [(set GR16:$dst, (ineg GR16:$src1)),
- (implicit EFLAGS)]>, OpSize16;
-def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
- "neg{l}\t$dst",
- [(set GR32:$dst, (ineg GR32:$src1)),
- (implicit EFLAGS)]>, OpSize32;
-def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src1), "neg{q}\t$dst",
- [(set GR64:$dst, (ineg GR64:$src1)),
- (implicit EFLAGS)]>;
-} // Constraints = "$src1 = $dst", SchedRW
-
-// Read-modify-write negate.
-let SchedRW = [WriteALURMW] in {
-def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
- "neg{b}\t$dst",
- [(store (ineg (loadi8 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>;
-def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
- "neg{w}\t$dst",
- [(store (ineg (loadi16 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>, OpSize16;
-def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
- "neg{l}\t$dst",
- [(store (ineg (loadi32 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>, OpSize32;
-def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
- [(store (ineg (loadi64 addr:$dst)), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In64BitMode]>;
-} // SchedRW
-} // Defs = [EFLAGS]
-
-
-// Note: NOT does not set EFLAGS!
-
-let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
-def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
- "not{b}\t$dst",
- [(set GR8:$dst, (not GR8:$src1))]>;
-def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
- "not{w}\t$dst",
- [(set GR16:$dst, (not GR16:$src1))]>, OpSize16;
-def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
- "not{l}\t$dst",
- [(set GR32:$dst, (not GR32:$src1))]>, OpSize32;
-def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src1), "not{q}\t$dst",
- [(set GR64:$dst, (not GR64:$src1))]>;
-} // Constraints = "$src1 = $dst", SchedRW
-
-let SchedRW = [WriteALURMW] in {
-def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
- "not{b}\t$dst",
- [(store (not (loadi8 addr:$dst)), addr:$dst)]>;
-def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
- "not{w}\t$dst",
- [(store (not (loadi16 addr:$dst)), addr:$dst)]>,
- OpSize16;
-def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
- "not{l}\t$dst",
- [(store (not (loadi32 addr:$dst)), addr:$dst)]>,
- OpSize32;
-def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
- [(store (not (loadi64 addr:$dst)), addr:$dst)]>,
- Requires<[In64BitMode]>;
-} // SchedRW
-} // CodeSize
-
-def X86add_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
- (X86add_flag node:$lhs, node:$rhs), [{
- return hasNoCarryFlagUses(SDValue(N, 1));
-}]>;
-
-def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
- (X86sub_flag node:$lhs, node:$rhs), [{
- // Only use DEC if the result is used.
- return !SDValue(N, 0).use_empty() && hasNoCarryFlagUses(SDValue(N, 1));
-}]>;
-
-// TODO: inc/dec is slow for P4, but fast for Pentium-M.
-let Defs = [EFLAGS] in {
-let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
-let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
-def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
- "inc{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86add_flag_nocf GR8:$src1, 1))]>;
-def INC16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
- "inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86add_flag_nocf GR16:$src1, 1))]>,
- OpSize16;
-def INC32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
- "inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86add_flag_nocf GR32:$src1, 1))]>,
- OpSize32;
-def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "inc{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86add_flag_nocf GR64:$src1, 1))]>;
-} // isConvertibleToThreeAddress = 1, CodeSize = 2
-
-// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
-let CodeSize = 1, hasSideEffects = 0 in {
-def INC16r_alt : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
- "inc{w}\t$dst", []>,
- OpSize16, Requires<[Not64BitMode]>;
-def INC32r_alt : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
- "inc{l}\t$dst", []>,
- OpSize32, Requires<[Not64BitMode]>;
-} // CodeSize = 1, hasSideEffects = 0
-} // Constraints = "$src1 = $dst", SchedRW
-
-let CodeSize = 2, SchedRW = [WriteALURMW] in {
-let Predicates = [UseIncDec] in {
- def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
- [(store (add (loadi8 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>;
- def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
- [(store (add (loadi16 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>, OpSize16;
- def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
- [(store (add (loadi32 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>, OpSize32;
-} // Predicates
-let Predicates = [UseIncDec, In64BitMode] in {
- def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
- [(store (add (loadi64 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>;
-} // Predicates
-} // CodeSize = 2, SchedRW
-
-let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
-let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
-def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
- "dec{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86sub_flag_nocf GR8:$src1, 1))]>;
-def DEC16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
- "dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86sub_flag_nocf GR16:$src1, 1))]>,
- OpSize16;
-def DEC32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
- "dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86sub_flag_nocf GR32:$src1, 1))]>,
- OpSize32;
-def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "dec{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86sub_flag_nocf GR64:$src1, 1))]>;
-} // isConvertibleToThreeAddress = 1, CodeSize = 2
-
-// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
-let CodeSize = 1, hasSideEffects = 0 in {
-def DEC16r_alt : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
- "dec{w}\t$dst", []>,
- OpSize16, Requires<[Not64BitMode]>;
-def DEC32r_alt : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
- "dec{l}\t$dst", []>,
- OpSize32, Requires<[Not64BitMode]>;
-} // CodeSize = 1, hasSideEffects = 0
-} // Constraints = "$src1 = $dst", SchedRW
-
-
-let CodeSize = 2, SchedRW = [WriteALURMW] in {
-let Predicates = [UseIncDec] in {
- def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
- [(store (add (loadi8 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>;
- def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
- [(store (add (loadi16 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>, OpSize16;
- def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
- [(store (add (loadi32 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>, OpSize32;
-} // Predicates
-let Predicates = [UseIncDec, In64BitMode] in {
- def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
- [(store (add (loadi64 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>;
-} // Predicates
-} // CodeSize = 2, SchedRW
-} // Defs = [EFLAGS]
-
-/// X86TypeInfo - This is a bunch of information that describes relevant X86
-/// information about value types. For example, it can tell you what the
-/// register class and preferred load to use.
-class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
- PatFrag loadnode, X86MemOperand memoperand, ImmType immkind,
- Operand immoperand, SDPatternOperator immoperator,
- Operand imm8operand, SDPatternOperator imm8operator,
- bit hasOddOpcode, OperandSize opSize,
- bit hasREX_W> {
- /// VT - This is the value type itself.
- ValueType VT = vt;
-
- /// InstrSuffix - This is the suffix used on instructions with this type. For
- /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
- string InstrSuffix = instrsuffix;
-
- /// RegClass - This is the register class associated with this type. For
- /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
- RegisterClass RegClass = regclass;
-
- /// LoadNode - This is the load node associated with this type. For
- /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
- PatFrag LoadNode = loadnode;
-
- /// MemOperand - This is the memory operand associated with this type. For
- /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
- X86MemOperand MemOperand = memoperand;
-
- /// ImmEncoding - This is the encoding of an immediate of this type. For
- /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32
- /// since the immediate fields of i64 instructions is a 32-bit sign extended
- /// value.
- ImmType ImmEncoding = immkind;
-
- /// ImmOperand - This is the operand kind of an immediate of this type. For
- /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 ->
- /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
- /// extended value.
- Operand ImmOperand = immoperand;
-
- /// ImmOperator - This is the operator that should be used to match an
- /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
- SDPatternOperator ImmOperator = immoperator;
-
- /// Imm8Operand - This is the operand kind to use for an imm8 of this type.
- /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is
- /// only used for instructions that have a sign-extended imm8 field form.
- Operand Imm8Operand = imm8operand;
-
- /// Imm8Operator - This is the operator that should be used to match an 8-bit
- /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
- SDPatternOperator Imm8Operator = imm8operator;
-
- /// HasOddOpcode - This bit is true if the instruction should have an odd (as
- /// opposed to even) opcode. Operations on i8 are usually even, operations on
- /// other datatypes are odd.
- bit HasOddOpcode = hasOddOpcode;
-
- /// OpSize - Selects whether the instruction needs a 0x66 prefix based on
- /// 16-bit vs 32-bit mode. i8/i64 set this to OpSizeFixed. i16 sets this
- /// to Opsize16. i32 sets this to OpSize32.
- OperandSize OpSize = opSize;
-
- /// HasREX_W - This bit is set to true if the instruction should have
- /// the 0x40 REX prefix. This is set for i64 types.
- bit HasREX_W = hasREX_W;
-}
-
-def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
-
-
-def Xi8 : X86TypeInfo<i8, "b", GR8, loadi8, i8mem,
- Imm8, i8imm, imm_su, i8imm, invalid_node,
- 0, OpSizeFixed, 0>;
-def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem,
- Imm16, i16imm, imm_su, i16i8imm, i16immSExt8_su,
- 1, OpSize16, 0>;
-def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
- Imm32, i32imm, imm_su, i32i8imm, i32immSExt8_su,
- 1, OpSize32, 0>;
-def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
- Imm32S, i64i32imm, i64immSExt32_su, i64i8imm, i64immSExt8_su,
- 1, OpSizeFixed, 1>;
-
-/// ITy - This instruction base class takes the type info for the instruction.
-/// Using this, it:
-/// 1. Concatenates together the instruction mnemonic with the appropriate
-/// suffix letter, a tab, and the arguments.
-/// 2. Infers whether the instruction should have a 0x66 prefix byte.
-/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
-/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
-/// or 1 (for i16,i32,i64 operations).
-class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
- string mnemonic, string args, list<dag> pattern>
- : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
- opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
- f, outs, ins,
- !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
-
- // Infer instruction prefixes from type info.
- let OpSize = typeinfo.OpSize;
- let hasREX_W = typeinfo.HasREX_W;
-}
-
-// BinOpRR - Instructions like "add reg, reg, reg".
+// BinOpRR - Binary instructions with inputs "reg, reg".
class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
: ITy<opcode, MRMDestReg, typeinfo, outlist,
@@ -645,24 +66,24 @@ class BinOpRR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
Sched<[sched]>;
-// BinOpRR_F - Instructions like "cmp reg, Reg", where the pattern has
-// just a EFLAGS as a result.
+// BinOpRR_F - Binary instructions with inputs "reg, reg", where the pattern
+// has just a EFLAGS as a result.
class BinOpRR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDPatternOperator opnode>
: BinOpRR<opcode, mnemonic, typeinfo, (outs), WriteALU,
[(set EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
-// BinOpRR_RF - Instructions like "add reg, reg, reg", where the pattern has
-// both a regclass and EFLAGS as a result.
+// BinOpRR_RF - Binary instructions with inputs "reg, reg", where the pattern
+// has both a regclass and EFLAGS as a result.
class BinOpRR_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
: BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteALU,
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2))]>;
-// BinOpRR_RFF - Instructions like "adc reg, reg, reg", where the pattern has
-// both a regclass and EFLAGS as a result, and has EFLAGS as input.
+// BinOpRR_RFF - Binary instructions with inputs "reg, reg", where the pattern
+// has both a regclass and EFLAGS as a result, and has EFLAGS as input.
class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
: BinOpRR<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
@@ -670,7 +91,7 @@ class BinOpRR_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
(opnode typeinfo.RegClass:$src1, typeinfo.RegClass:$src2,
EFLAGS))]>;
-// BinOpRR_Rev - Instructions like "add reg, reg, reg" (reversed encoding).
+// BinOpRR_Rev - Binary instructions with inputs "reg, reg"(reversed encoding).
class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
X86FoldableSchedWrite sched = WriteALU>
: ITy<opcode, MRMSrcReg, typeinfo,
@@ -684,11 +105,13 @@ class BinOpRR_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
let hasSideEffects = 0;
}
-// BinOpRR_RDD_Rev - Instructions like "adc reg, reg, reg" (reversed encoding).
+// BinOpRR_RFF_Rev - Binary instructions with inputs "reg, reg"(reversed
+// encoding), with sched = WriteADC.
class BinOpRR_RFF_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: BinOpRR_Rev<opcode, mnemonic, typeinfo, WriteADC>;
-// BinOpRR_F_Rev - Instructions like "cmp reg, reg" (reversed encoding).
+// BinOpRR_F_Rev - Binary instructions with inputs "reg, reg"(reversed
+// encoding), without outlist dag.
class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
: ITy<opcode, MRMSrcReg, typeinfo, (outs),
(ins typeinfo.RegClass:$src1, typeinfo.RegClass:$src2),
@@ -700,7 +123,7 @@ class BinOpRR_F_Rev<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo>
let hasSideEffects = 0;
}
-// BinOpRM - Instructions like "add reg, reg, [mem]".
+// BinOpRM - Binary instructions with inputs "reg, [mem]".
class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
@@ -708,10 +131,10 @@ class BinOpRM<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
-// BinOpRM - Instructions like "adc reg, reg, [mem]".
+// BinOpRM_ImplicitUse - Binary instructions with inputs "reg, [mem]".
// There is an implicit register read at the end of the operand sequence.
class BinOpRM_ImplicitUse<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
+ dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
: ITy<opcode, MRMSrcMem, typeinfo, outlist,
(ins typeinfo.RegClass:$src1, typeinfo.MemOperand:$src2),
mnemonic, "{$src2, $src1|$src1, $src2}", pattern>,
@@ -721,29 +144,33 @@ class BinOpRM_ImplicitUse<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
// implicit register read.
sched.ReadAfterFold]>;
-// BinOpRM_F - Instructions like "cmp reg, [mem]".
+// BinOpRM_F - Binary instructions with inputs "reg, [mem]", where the pattern
+// has just a EFLAGS as a result.
class BinOpRM_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
: BinOpRM<opcode, mnemonic, typeinfo, (outs), WriteALU,
[(set EFLAGS,
(opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
-// BinOpRM_RF - Instructions like "add reg, reg, [mem]".
+// BinOpRM_RF - Binary instructions with inputs "reg, [mem]", where the pattern
+// has both a regclass and EFLAGS as a result.
class BinOpRM_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
: BinOpRM<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteALU,
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2)))]>;
-// BinOpRM_RFF - Instructions like "adc reg, reg, [mem]".
+// BinOpRM_RFF - Binary instructions with inputs "reg, [mem]", where the pattern
+// has both a regclass and EFLAGS as a result, and has EFLAGS as input.
class BinOpRM_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode>
- : BinOpRM_ImplicitUse<opcode, mnemonic, typeinfo, (outs typeinfo.RegClass:$dst), WriteADC,
- [(set typeinfo.RegClass:$dst, EFLAGS,
- (opnode typeinfo.RegClass:$src1, (typeinfo.LoadNode addr:$src2),
- EFLAGS))]>;
+ SDNode opnode>
+ : BinOpRM_ImplicitUse<opcode, mnemonic, typeinfo,
+ (outs typeinfo.RegClass:$dst), WriteADC,
+ [(set typeinfo.RegClass:$dst, EFLAGS,
+ (opnode typeinfo.RegClass:$src1,
+ (typeinfo.LoadNode addr:$src2), EFLAGS))]>;
-// BinOpRI - Instructions like "add reg, reg, imm".
+// BinOpRI - Binary instructions with inputs "reg, imm".
class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
@@ -753,28 +180,32 @@ class BinOpRI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
let ImmT = typeinfo.ImmEncoding;
}
-// BinOpRI_F - Instructions like "cmp reg, imm".
+// BinOpRI_F - Binary instructions with inputs "reg, imm", where the pattern
+// has EFLAGS as a result.
class BinOpRI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDPatternOperator opnode, Format f>
: BinOpRI<opcode, mnemonic, typeinfo, f, (outs), WriteALU,
[(set EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
-// BinOpRI_RF - Instructions like "add reg, reg, imm".
+// BinOpRI_RF - Binary instructions with inputs "reg, imm", where the pattern
+// has both a regclass and EFLAGS as a result.
class BinOpRI_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
: BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteALU,
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2))]>;
-// BinOpRI_RFF - Instructions like "adc reg, reg, imm".
+
+// BinOpRI_RFF - Binary instructions with inputs "reg, imm", where the pattern
+// has both a regclass and EFLAGS as a result, and has EFLAGS as input.
class BinOpRI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode, Format f>
+ SDNode opnode, Format f>
: BinOpRI<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteADC,
[(set typeinfo.RegClass:$dst, EFLAGS,
(opnode typeinfo.RegClass:$src1, typeinfo.ImmOperator:$src2,
EFLAGS))]>;
-// BinOpRI8 - Instructions like "add reg, reg, imm8".
+// BinOpRI8 - Binary instructions with inputs "reg, imm8".
class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, dag outlist, X86FoldableSchedWrite sched, list<dag> pattern>
: ITy<opcode, f, typeinfo, outlist,
@@ -784,69 +215,64 @@ class BinOpRI8<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
let ImmT = Imm8; // Always 8-bit immediate.
}
-// BinOpRI8_F - Instructions like "cmp reg, imm8".
-class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDPatternOperator opnode, Format f>
- : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs), WriteALU,
- [(set EFLAGS,
- (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
-
-// BinOpRI8_RF - Instructions like "add reg, reg, imm8".
-class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDPatternOperator opnode, Format f>
- : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteALU,
- [(set typeinfo.RegClass:$dst, EFLAGS,
- (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2))]>;
-
-// BinOpRI8_RFF - Instructions like "adc reg, reg, imm8".
-class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDPatternOperator opnode, Format f>
- : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteADC,
- [(set typeinfo.RegClass:$dst, EFLAGS,
- (opnode typeinfo.RegClass:$src1, typeinfo.Imm8Operator:$src2,
- EFLAGS))]>;
-
-// BinOpMR - Instructions like "add [mem], reg".
+// BinOpRI8_F - Binary instructions with inputs "reg, imm8".
+class BinOpRI8_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs), WriteALU, []>;
+
+// BinOpRI8_RF - Binary instructions with inputs "reg, imm8".
+class BinOpRI8_RF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteALU, []>;
+
+// BinOpRI8_RFF - Binary instructions with inputs "reg, imm8".
+class BinOpRI8_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo, Format f>
+ : BinOpRI8<opcode, mnemonic, typeinfo, f, (outs typeinfo.RegClass:$dst), WriteADC, []>;
+
+// BinOpMR - Binary instructions with inputs "[mem], reg".
class BinOpMR<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
list<dag> pattern>
: ITy<opcode, MRMDestMem, typeinfo,
(outs), (ins typeinfo.MemOperand:$dst, typeinfo.RegClass:$src),
mnemonic, "{$src, $dst|$dst, $src}", pattern>;
-// BinOpMR_RMW - Instructions like "add [mem], reg".
+// BinOpMR_RMW - Binary instructions with inputs "[mem], reg", where the pattern
+// implicitly use EFLAGS.
class BinOpMR_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode>
: BinOpMR<opcode, mnemonic, typeinfo,
- [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteALURMW,
- // base, scale, index, offset, segment
- ReadDefault, ReadDefault, ReadDefault,
- ReadDefault, ReadDefault,
- WriteALU.ReadAfterFold]>; // reg
-
-// BinOpMR_RMW_FF - Instructions like "adc [mem], reg".
+ [(store (opnode (load addr:$dst), typeinfo.RegClass:$src), addr:$dst),
+ (implicit EFLAGS)]>,
+ Sched<[WriteALURMW,
+ // base, scale, index, offset, segment
+ ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault,
+ WriteALU.ReadAfterFold]>; // reg
+
+// BinOpMR_RMW_FF - Binary instructions with inputs "[mem], reg", where the
+// pattern sets EFLAGS and implicitly uses EFLAGS.
class BinOpMR_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
- SDNode opnode>
+ SDNode opnode>
: BinOpMR<opcode, mnemonic, typeinfo,
[(store (opnode (load addr:$dst), typeinfo.RegClass:$src, EFLAGS),
addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteADCRMW,
- // base, scale, index, offset, segment
- ReadDefault, ReadDefault, ReadDefault,
- ReadDefault, ReadDefault,
- WriteALU.ReadAfterFold, // reg
- WriteALU.ReadAfterFold]>; // EFLAGS
-
-// BinOpMR_F - Instructions like "cmp [mem], reg".
+ (implicit EFLAGS)]>,
+ Sched<[WriteADCRMW,
+ // base, scale, index, offset, segment
+ ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault,
+ WriteALU.ReadAfterFold, // reg
+ WriteALU.ReadAfterFold]>; // EFLAGS
+
+// BinOpMR_F - Binary instructions with inputs "[mem], reg", where the pattern
+// has EFLAGS as a result.
class BinOpMR_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDPatternOperator opnode>
: BinOpMR<opcode, mnemonic, typeinfo,
[(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
typeinfo.RegClass:$src))]>,
- Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>;
+ Sched<[WriteALU.Folded, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>;
-// BinOpMI - Instructions like "add [mem], imm".
+// BinOpMI - Binary instructions with inputs "[mem], imm".
class BinOpMI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Format f, list<dag> pattern>
: ITy<opcode, f, typeinfo,
@@ -855,30 +281,36 @@ class BinOpMI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
let ImmT = typeinfo.ImmEncoding;
}
-// BinOpMI_RMW - Instructions like "add [mem], imm".
+// BinOpMI_RMW - Binary instructions with inputs "[mem], imm", where the
+// pattern implicitly use EFLAGS.
class BinOpMI_RMW<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
: BinOpMI<opcode, mnemonic, typeinfo, f,
[(store (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src), addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteALURMW]>;
-// BinOpMI_RMW_FF - Instructions like "adc [mem], imm".
+ (implicit EFLAGS)]>,
+ Sched<[WriteALURMW]>;
+
+// BinOpMI_RMW_FF - Binary instructions with inputs "[mem], imm", where the
+// pattern sets EFLAGS and implicitly uses EFLAGS.
class BinOpMI_RMW_FF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDNode opnode, Format f>
: BinOpMI<opcode, mnemonic, typeinfo, f,
[(store (opnode (typeinfo.VT (load addr:$dst)),
typeinfo.ImmOperator:$src, EFLAGS), addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteADCRMW]>;
+ (implicit EFLAGS)]>,
+ Sched<[WriteADCRMW]>;
-// BinOpMI_F - Instructions like "cmp [mem], imm".
+// BinOpMI_F - Binary instructions with inputs "[mem], imm", where the pattern
+// has EFLAGS as a result.
class BinOpMI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
SDPatternOperator opnode, Format f>
: BinOpMI<opcode, mnemonic, typeinfo, f,
[(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
typeinfo.ImmOperator:$src))]>,
- Sched<[WriteALU.Folded]>;
+ Sched<[WriteALU.Folded]>;
-// BinOpMI8 - Instructions like "add [mem], imm8".
+// BinOpMI8 - Binary instructions with inputs "[mem], imm8".
class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
Format f, list<dag> pattern>
: ITy<0x82, f, typeinfo,
@@ -887,57 +319,487 @@ class BinOpMI8<string mnemonic, X86TypeInfo typeinfo,
let ImmT = Imm8; // Always 8-bit immediate.
}
-// BinOpMI8_RMW - Instructions like "add [mem], imm8".
-class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo,
- SDPatternOperator opnode, Format f>
- : BinOpMI8<mnemonic, typeinfo, f,
- [(store (opnode (load addr:$dst),
- typeinfo.Imm8Operator:$src), addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteALURMW]>;
-
-// BinOpMI8_RMW_FF - Instructions like "adc [mem], imm8".
-class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo,
- SDPatternOperator opnode, Format f>
- : BinOpMI8<mnemonic, typeinfo, f,
- [(store (opnode (load addr:$dst),
- typeinfo.Imm8Operator:$src, EFLAGS), addr:$dst),
- (implicit EFLAGS)]>, Sched<[WriteADCRMW]>;
-
-// BinOpMI8_F - Instructions like "cmp [mem], imm8".
-class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo,
- SDPatternOperator opnode, Format f>
- : BinOpMI8<mnemonic, typeinfo, f,
- [(set EFLAGS, (opnode (typeinfo.LoadNode addr:$dst),
- typeinfo.Imm8Operator:$src))]>,
- Sched<[WriteALU.Folded]>;
-
-// BinOpAI - Instructions like "add %eax, %eax, imm", that imp-def EFLAGS.
+// BinOpMI8_RMW - Binary instructions with inputs "[mem], imm8".
+class BinOpMI8_RMW<string mnemonic, X86TypeInfo typeinfo, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f, []>, Sched<[WriteALURMW]>;
+
+// BinOpMI8_RMW_FF - Binary instructions with inputs "[mem], imm8".
+class BinOpMI8_RMW_FF<string mnemonic, X86TypeInfo typeinfo, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f, []>, Sched<[WriteADCRMW]>;
+
+// BinOpMI8_F - Binary instructions with inputs "[mem], imm8"
+class BinOpMI8_F<string mnemonic, X86TypeInfo typeinfo, Format f>
+ : BinOpMI8<mnemonic, typeinfo, f, []>, Sched<[WriteALU.Folded]>;
+
+// BinOpAI - Binary instructions with input imm, that implicitly use A reg and
+// implicitly define Areg and EFLAGS.
class BinOpAI<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Register areg, string operands, X86FoldableSchedWrite sched = WriteALU>
: ITy<opcode, RawFrm, typeinfo,
(outs), (ins typeinfo.ImmOperand:$src),
- mnemonic, operands, []>, Sched<[sched]> {
+ mnemonic, operands, []>,
+ Sched<[sched]> {
let ImmT = typeinfo.ImmEncoding;
let Uses = [areg];
let Defs = [areg, EFLAGS];
let hasSideEffects = 0;
}
-// BinOpAI_RFF - Instructions like "adc %eax, %eax, imm", that implicitly define
-// and use EFLAGS.
+// BinOpAI_RFF - Binary instructions with input imm, that implicitly use and
+// define Areg and EFLAGS.
class BinOpAI_RFF<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Register areg, string operands>
: BinOpAI<opcode, mnemonic, typeinfo, areg, operands, WriteADC> {
let Uses = [areg, EFLAGS];
}
-// BinOpAI_F - Instructions like "cmp %eax, %eax, imm", that imp-def EFLAGS.
+// BinOpAI_F - Binary instructions with input imm, that implicitly use A reg and
+// implicitly define EFLAGS.
class BinOpAI_F<bits<8> opcode, string mnemonic, X86TypeInfo typeinfo,
Register areg, string operands>
: BinOpAI<opcode, mnemonic, typeinfo, areg, operands> {
let Defs = [EFLAGS];
}
+// UnaryOpM - Unary instructions with a memory operand.
+class UnaryOpM<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info,
+ list<dag> pattern>
+ : ITy<opcode, f, info, (outs), (ins info.MemOperand:$dst), mnemonic,
+ "$dst", pattern>;
+
+// UnaryOpR - Unary instructions with a register.
+class UnaryOpR<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info,
+ list<dag> pattern>
+ : ITy<opcode, f, info, (outs info.RegClass:$dst),
+ (ins info.RegClass:$src1), mnemonic, "$dst", pattern>;
+
+// INCDECR - Instructions like "inc reg".
+class INCDECR<Format f, string mnemonic, X86TypeInfo info,
+ SDPatternOperator node>
+ : UnaryOpR<0xFE, f, mnemonic, info,
+ [(set info.RegClass:$dst, EFLAGS,
+ (node info.RegClass:$src1, 1))]>;
+
+// INCDECM - Instructions like "inc [mem]".
+class INCDECM<Format f, string mnemonic, X86TypeInfo info, int num>
+ : UnaryOpM<0xFE, f, mnemonic, info,
+ [(store (add (info.LoadNode addr:$dst), num), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// INCDECR_ALT - Instructions like "inc reg" short forms.
+class INCDECR_ALT<bits<8> opcode, string mnemonic, X86TypeInfo info>
+ : UnaryOpR<opcode, AddRegFrm, mnemonic, info, []>{
+ let Predicates = [Not64BitMode];
+ let Opcode = opcode;
+}
+
+// MulOpR - Instructions like "mul reg".
+class MulOpR<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info,
+ X86FoldableSchedWrite sched, list<dag> pattern>
+ : ITy<opcode, f, info, (outs), (ins info.RegClass:$src), mnemonic,
+ "$src", pattern>,
+ Sched<[sched]>;
+
+// MulOpM - Instructions like "mul [mem]".
+class MulOpM<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info,
+ X86FoldableSchedWrite sched, list<dag> pattern>
+ : ITy<opcode, f, info, (outs), (ins info.MemOperand:$src), mnemonic,
+ "$src", pattern>, SchedLoadReg<sched>;
+
+// NegOpR - Instructions like "neg reg", with implicit EFLAGS.
+class NegOpR<bits<8> opcode, string mnemonic, X86TypeInfo info>
+ : UnaryOpR<opcode, MRM3r, mnemonic, info,
+ [(set info.RegClass:$dst, (ineg info.RegClass:$src1)),
+ (implicit EFLAGS)]>;
+
+// NotOpR - Instructions like "not reg".
+class NotOpR<bits<8> opcode, string mnemonic, X86TypeInfo info>
+ : UnaryOpR<opcode, MRM2r, mnemonic, info,
+ [(set info.RegClass:$dst,
+ (not info.RegClass:$src1))]>;
+
+// NegOpM - Instructions like "neg [mem]", with implicit EFLAGS.
+class NegOpM<bits<8> opcode, string mnemonic, X86TypeInfo info>
+ : UnaryOpM<opcode, MRM3m, mnemonic, info,
+ [(store (ineg (info.LoadNode addr:$dst)), addr:$dst),
+ (implicit EFLAGS)]>;
+
+// NotOpM - Instructions like "neg [mem]".
+class NotOpM<bits<8> opcode, string mnemonic, X86TypeInfo info>
+ : UnaryOpM<opcode, MRM2m, mnemonic, info,
+ [(store (not (info.LoadNode addr:$dst)), addr:$dst)]>;
+
+// BinOpRR_C - Binary instructions with inputs "reg, reg", which used mainly
+// with Constraints = "$src1 = $dst".
+class BinOpRR_C<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info,
+ list<dag> pattern>
+ : ITy<opcode, f, info, (outs info.RegClass:$dst),
+ (ins info.RegClass:$src1, info.RegClass:$src2),
+ mnemonic, "{$src2, $dst|$dst, $src2}", pattern>;
+
+// BinOpRM_C - Binary instructions with inputs "reg, [mem]", which used mainly
+// with Constraints = "$src1 = $dst".
+class BinOpRM_C<bits<8> opcode, Format f, string mnemonic, X86TypeInfo info,
+ list<dag> pattern>
+ : ITy<opcode, f, info, (outs info.RegClass:$dst),
+ (ins info.RegClass:$src1, info.MemOperand:$src2),
+ mnemonic, "{$src2, $dst|$dst, $src2}", pattern>;
+
+// IMulOpRR - Instructions like "imul reg, reg, i8".
+class IMulOpRR<bits<8> opcode, string mnemonic, X86TypeInfo info,
+ X86FoldableSchedWrite sched>
+ : BinOpRR_C<opcode, MRMSrcReg, mnemonic, info,
+ [(set info.RegClass:$dst, EFLAGS,
+ (X86smul_flag info.RegClass:$src1,
+ info.RegClass:$src2))]>,
+ Sched<[sched]>, TB;
+
+// IMulOpRM - Instructions like "imul reg, reg, [mem]".
+class IMulOpRM<bits<8> opcode, string mnemonic, X86TypeInfo info,
+ X86FoldableSchedWrite sched>
+ : BinOpRM_C<opcode, MRMSrcMem, mnemonic, info,
+ [(set info.RegClass:$dst, EFLAGS,
+ (X86smul_flag info.RegClass:$src1, (info.LoadNode addr:$src2)))]>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>, TB;
+
+// IMulOpRRI8 - Instructions like "imul reg, reg, i8".
+class IMulOpRRI8<bits<8> opcode, string mnemonic, X86TypeInfo info,
+ X86FoldableSchedWrite sched>
+ : ITy<opcode, MRMSrcReg, info, (outs info.RegClass:$dst),
+ (ins info.RegClass:$src1, info.Imm8Operand:$src2), mnemonic,
+ "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched]> {
+ let ImmT = Imm8;
+}
+
+// IMulOpRRI - Instructions like "imul reg, reg, i16/i32/i64".
+class IMulOpRRI<bits<8> opcode, string mnemonic, X86TypeInfo info,
+ X86FoldableSchedWrite sched>
+ : ITy<opcode, MRMSrcReg, info, (outs info.RegClass:$dst),
+ (ins info.RegClass:$src1, info.ImmOperand:$src2), mnemonic,
+ "{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set info.RegClass:$dst, EFLAGS,
+ (X86smul_flag info.RegClass:$src1,
+ info.ImmNoSuOperator:$src2))]>,
+ Sched<[sched]>{
+ let ImmT = info.ImmEncoding;
+}
+
+// IMulOpRMI8 - Instructions like "imul reg, [mem], i8".
+class IMulOpRMI8<bits<8> opcode, string mnemonic, X86TypeInfo info,
+ X86FoldableSchedWrite sched>
+ : ITy<opcode, MRMSrcMem, info, (outs info.RegClass:$dst),
+ (ins info.MemOperand:$src1, info.Imm8Operand:$src2), mnemonic,
+ "{$src2, $src1, $dst|$dst, $src1, $src2}", []>, Sched<[sched.Folded]> {
+ let ImmT = Imm8;
+}
+
+// IMulOpRMI - Instructions like "imul reg, [mem], i16/i32/i64".
+class IMulOpRMI<bits<8> opcode, string mnemonic, X86TypeInfo info,
+ X86FoldableSchedWrite sched>
+ : ITy<opcode, MRMSrcMem, info, (outs info.RegClass:$dst),
+ (ins info.MemOperand:$src1, info.ImmOperand:$src2), mnemonic,
+ "{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set info.RegClass:$dst, EFLAGS,
+ (X86smul_flag (info.LoadNode addr:$src1),
+ info.ImmNoSuOperator:$src2))]>,
+ Sched<[sched.Folded]>{
+ let ImmT = info.ImmEncoding;
+}
+
+def X86add_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86add_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86sub_flag node:$lhs, node:$rhs), [{
+ // Only use DEC if the result is used.
+ return !SDValue(N, 0).use_empty() && hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
+// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
+let CodeSize = 1, hasSideEffects = 0 in {
+def INC16r_alt : INCDECR_ALT<0x40, "inc", Xi16>;
+def INC32r_alt : INCDECR_ALT<0x40, "inc", Xi32>;
+} // CodeSize = 1, hasSideEffects = 0
+
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
+def INC8r : INCDECR<MRM0r, "inc", Xi8, X86add_flag_nocf>;
+def INC16r : INCDECR<MRM0r, "inc", Xi16, X86add_flag_nocf>;
+def INC32r : INCDECR<MRM0r, "inc", Xi32, X86add_flag_nocf>;
+def INC64r : INCDECR<MRM0r, "inc", Xi64, X86add_flag_nocf>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 2
+} // Constraints = "$src1 = $dst", SchedRW
+
+let CodeSize = 2, SchedRW = [WriteALURMW] in {
+let Predicates = [UseIncDec] in {
+ def INC8m : INCDECM<MRM0m, "inc", Xi8, 1>;
+ def INC16m : INCDECM<MRM0m, "inc", Xi16, 1>;
+ def INC32m : INCDECM<MRM0m, "inc", Xi32, 1>;
+} // Predicates
+let Predicates = [UseIncDec, In64BitMode] in {
+ def INC64m : INCDECM<MRM0m, "inc", Xi64, 1>;
+} // Predicates
+} // CodeSize = 2, SchedRW
+
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
+// Short forms only valid in 32-bit mode. Selected during MCInst lowering.
+let CodeSize = 1, hasSideEffects = 0 in {
+def DEC16r_alt : INCDECR_ALT<0x48, "dec", Xi16>;
+def DEC32r_alt : INCDECR_ALT<0x48, "dec", Xi32>;
+} // CodeSize = 1, hasSideEffects = 0
+
+let isConvertibleToThreeAddress = 1, CodeSize = 2 in { // Can xform into LEA.
+def DEC8r : INCDECR<MRM1r, "dec", Xi8, X86sub_flag_nocf>;
+def DEC16r : INCDECR<MRM1r, "dec", Xi16, X86sub_flag_nocf>;
+def DEC32r : INCDECR<MRM1r, "dec", Xi32, X86sub_flag_nocf>;
+def DEC64r : INCDECR<MRM1r, "dec", Xi64, X86sub_flag_nocf>;
+} // isConvertibleToThreeAddress = 1, CodeSize = 2
+} // Constraints = "$src1 = $dst", SchedRW
+
+let CodeSize = 2, SchedRW = [WriteALURMW] in {
+let Predicates = [UseIncDec] in {
+ def DEC8m : INCDECM<MRM1m, "dec", Xi8, -1>;
+ def DEC16m : INCDECM<MRM1m, "dec", Xi16, -1>;
+ def DEC32m : INCDECM<MRM1m, "dec", Xi32, -1>;
+} // Predicates
+let Predicates = [UseIncDec, In64BitMode] in {
+ def DEC64m : INCDECM<MRM1m, "dec", Xi64, -1>;
+} // Predicates
+} // CodeSize = 2, SchedRW
+} // Defs = [EFLAGS]
+
+// Extra precision multiplication
+
+// AL is really implied by AX, but the registers in Defs must match the
+// SDNode results (i8, i32).
+// AL,AH = AL*GR8
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8r : MulOpR<0xF6, MRM4r, "mul", Xi8, WriteIMul8,
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, GR8:$src)), (implicit EFLAGS)]>;
+// AX,DX = AX*GR16
+let Defs = [AX,DX,EFLAGS], Uses = [AX], hasSideEffects = 0 in
+def MUL16r : MulOpR<0xF7, MRM4r, "mul", Xi16, WriteIMul16, []>;
+// EAX,EDX = EAX*GR32
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX], hasSideEffects = 0 in
+def MUL32r : MulOpR<0xF7, MRM4r, "mul", Xi32, WriteIMul32,
+ [/*(set EAX, EDX, EFLAGS, (X86umul_flag EAX, GR32:$src))*/]>;
+// RAX,RDX = RAX*GR64
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], hasSideEffects = 0 in
+def MUL64r : MulOpR<0xF7, MRM4r, "mul", Xi64, WriteIMul64,
+ [/*(set RAX, RDX, EFLAGS, (X86umul_flag RAX, GR64:$src))*/]>;
+// AL,AH = AL*[mem8]
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def MUL8m : MulOpM<0xF6, MRM4m, "mul", Xi8, WriteIMul8,
+ // FIXME: Used for 8-bit mul, ignore result upper 8 bits.
+ // This probably ought to be moved to a def : Pat<> if the
+ // syntax can be accepted.
+ [(set AL, (mul AL, (loadi8 addr:$src))),
+ (implicit EFLAGS)]>;
+// AX,DX = AX*[mem16]
+let mayLoad = 1, hasSideEffects = 0 in {
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def MUL16m : MulOpM<0xF7, MRM4m, "mul", Xi16, WriteIMul16, []>;
+// EAX,EDX = EAX*[mem32]
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def MUL32m : MulOpM<0xF7, MRM4m, "mul", Xi32, WriteIMul32, []>;
+// RAX,RDX = RAX*[mem64]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
+def MUL64m : MulOpM<0xF7, MRM4m, "mul", Xi64, WriteIMul64, []>,
+ Requires<[In64BitMode]>;
+}
+
+let hasSideEffects = 0 in {
+// AL,AH = AL*GR8
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8r : MulOpR<0xF6, MRM5r, "imul", Xi8, WriteIMul8, []>;
+// AX,DX = AX*GR16
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16r : MulOpR<0xF7, MRM5r, "imul", Xi16, WriteIMul16, []>;
+// EAX,EDX = EAX*GR32
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32r : MulOpR<0xF7, MRM5r, "imul", Xi32, WriteIMul32, []>;
+// RAX,RDX = RAX*GR64
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
+def IMUL64r : MulOpR<0xF7, MRM5r, "imul", Xi64, WriteIMul64, []>;
+
+let mayLoad = 1 in {
+// AL,AH = AL*[mem8]
+let Defs = [AL,EFLAGS,AX], Uses = [AL] in
+def IMUL8m : MulOpM<0xF6, MRM5m, "imul", Xi8, WriteIMul8, []>;
+// AX,DX = AX*[mem16]
+let Defs = [AX,DX,EFLAGS], Uses = [AX] in
+def IMUL16m : MulOpM<0xF7, MRM5m, "imul", Xi16, WriteIMul16, []>;
+// EAX,EDX = EAX*[mem32]
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in
+def IMUL32m : MulOpM<0xF7, MRM5m, "imul", Xi32, WriteIMul32, []>;
+// RAX,RDX = RAX*[mem64]
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in
+def IMUL64m : MulOpM<0xF7, MRM5m, "imul", Xi64, WriteIMul64, []>,
+ Requires<[In64BitMode]>;
+}
+
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst" in {
+let isCommutable = 1 in {
+// X = IMUL Y, Z --> X = IMUL Z, Y
+// Register-Register Signed Integer Multiply
+def IMUL16rr : IMulOpRR<0xAF, "imul", Xi16, WriteIMul16Reg>;
+def IMUL32rr : IMulOpRR<0xAF, "imul", Xi32, WriteIMul32Reg>;
+def IMUL64rr : IMulOpRR<0xAF, "imul", Xi64, WriteIMul64Reg>;
+} // isCommutable
+
+// Register-Memory Signed Integer Multiply
+def IMUL16rm : IMulOpRM<0xAF, "imul", Xi16, WriteIMul16Reg>;
+def IMUL32rm : IMulOpRM<0xAF, "imul", Xi32, WriteIMul32Reg>;
+def IMUL64rm : IMulOpRM<0xAF, "imul", Xi64, WriteIMul64Reg>;
+} // Constraints = "$src1 = $dst"
+} // Defs = [EFLAGS]
+
+// Surprisingly enough, these are not two address instructions!
+let Defs = [EFLAGS] in {
+// NOTE: These are order specific, we want the ri8 forms to be listed
+// first so that they are slightly preferred to the ri forms.
+
+// Register-Integer Signed Integer Multiply
+// GR16 = GR16*I8
+def IMUL16rri8 : IMulOpRRI8<0x6B, "imul", Xi16, WriteIMul16Imm>;
+// GR16 = GR16*I16
+def IMUL16rri : IMulOpRRI<0x69, "imul", Xi16, WriteIMul16Imm>;
+// GR32 = GR32*I8
+def IMUL32rri8 : IMulOpRRI8<0x6B, "imul", Xi32, WriteIMul32Imm>;
+// GR32 = GR32*I32
+def IMUL32rri : IMulOpRRI<0x69, "imul", Xi32, WriteIMul32Imm>;
+// GR64 = GR64*I8
+def IMUL64rri8 : IMulOpRRI8<0x6B, "imul", Xi64, WriteIMul64Imm>;
+// GR64 = GR64*I32
+def IMUL64rri32 : IMulOpRRI<0x69, "imul", Xi64, WriteIMul64Imm>;
+
+// Memory-Integer Signed Integer Multiply
+// GR16 = [mem16]*I8
+let mayLoad = 1 in {
+def IMUL16rmi8 : IMulOpRMI8<0x6B, "imul", Xi16, WriteIMul16Imm>;
+// GR16 = [mem16]*I16
+def IMUL16rmi : IMulOpRMI<0x69, "imul", Xi16, WriteIMul16Imm>;
+// GR32 = [mem32]*I8
+def IMUL32rmi8 : IMulOpRMI8<0x6B, "imul", Xi32, WriteIMul32Imm>;
+// GR32 = [mem32]*I32
+def IMUL32rmi : IMulOpRMI<0x69, "imul", Xi32, WriteIMul32Imm>;
+// GR64 = [mem64]*I8
+def IMUL64rmi8 : IMulOpRMI8<0x6B, "imul", Xi64, WriteIMul64Imm>;
+// GR64 = [mem64]*I32
+def IMUL64rmi32 : IMulOpRMI<0x69, "imul", Xi64, WriteIMul64Imm>;
+} // mayLoad
+} // Defs = [EFLAGS]
+} // hasSideEffects
+
+// unsigned division/remainder
+let hasSideEffects = 1 in { // so that we don't speculatively execute
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+// AX/r8 = AL,AH
+def DIV8r : MulOpR<0xF6, MRM6r, "div", Xi8, WriteDiv8, []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+// DX:AX/r16 = AX,DX
+def DIV16r : MulOpR<0xF7, MRM6r, "div", Xi16, WriteDiv16, []>;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+// EDX:EAX/r32 = EAX,EDX
+def DIV32r : MulOpR<0xF7, MRM6r, "div", Xi32, WriteDiv32, []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64r : MulOpR<0xF7, MRM6r, "div", Xi64, WriteDiv64, []>;
+
+let mayLoad = 1 in {
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+// AX/[mem8] = AL,AH
+def DIV8m : MulOpM<0xF6, MRM6m, "div", Xi8, WriteDiv8, []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+// DX:AX/[mem16] = AX,DX
+def DIV16m : MulOpM<0xF7, MRM6m, "div", Xi16, WriteDiv16, []>;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in // EDX:EAX/[mem32] = EAX,EDX
+def DIV32m : MulOpM<0xF7, MRM6m, "div", Xi32, WriteDiv32, []>;
+// RDX:RAX/[mem64] = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def DIV64m : MulOpM<0xF7, MRM6m, "div", Xi64, WriteDiv64, []>,
+ Requires<[In64BitMode]>;
+}
+
+// Signed division/remainder.
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+// AX/r8 = AL,AH
+def IDIV8r : MulOpR<0xF6, MRM7r, "idiv", Xi8, WriteIDiv8, []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+// DX:AX/r16 = AX,DX
+def IDIV16r: MulOpR<0xF7, MRM7r, "idiv", Xi16, WriteIDiv16, []>;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+// EDX:EAX/r32 = EAX,EDX
+def IDIV32r: MulOpR<0xF7, MRM7r, "idiv", Xi32, WriteIDiv32, []>;
+// RDX:RAX/r64 = RAX,RDX
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in
+def IDIV64r: MulOpR<0xF7, MRM7r, "idiv", Xi64, WriteIDiv64, []>;
+
+let mayLoad = 1 in {
+let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+// AX/[mem8] = AL,AH
+def IDIV8m : MulOpM<0xF6, MRM7m, "idiv", Xi8, WriteIDiv8, []>;
+let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
+// DX:AX/[mem16] = AX,DX
+def IDIV16m: MulOpM<0xF7, MRM7m, "idiv", Xi16, WriteIDiv16, []>;
+let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in
+// EDX:EAX/[mem32] = EAX,EDX
+def IDIV32m: MulOpM<0xF7, MRM7m, "idiv", Xi32, WriteIDiv32, []>;
+let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in // RDX:RAX/[mem64] = RAX,RDX
+// RDX:RAX/[mem64] = RAX,RDX
+def IDIV64m: MulOpM<0xF7, MRM7m, "idiv", Xi64, WriteIDiv64, []>,
+ Requires<[In64BitMode]>;
+}
+} // hasSideEffects = 1
+
+//===----------------------------------------------------------------------===//
+// Two address Instructions.
+//
+
+// unary instructions
+let CodeSize = 2 in {
+let Defs = [EFLAGS] in {
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
+def NEG8r : NegOpR<0xF6, "neg", Xi8>;
+def NEG16r : NegOpR<0xF7, "neg", Xi16>;
+def NEG32r : NegOpR<0xF7, "neg", Xi32>;
+def NEG64r : NegOpR<0xF7, "neg", Xi64>;
+} // Constraints = "$src1 = $dst", SchedRW
+
+// Read-modify-write negate.
+let SchedRW = [WriteALURMW] in {
+def NEG8m : NegOpM<0xF6, "neg", Xi8>;
+def NEG16m : NegOpM<0xF7, "neg", Xi16>;
+def NEG32m : NegOpM<0xF7, "neg", Xi32>;
+def NEG64m : NegOpM<0xF7, "neg", Xi64>, Requires<[In64BitMode]>;
+} // SchedRW
+} // Defs = [EFLAGS]
+
+
+// Note: NOT does not set EFLAGS!
+
+let Constraints = "$src1 = $dst", SchedRW = [WriteALU] in {
+def NOT8r : NotOpR<0xF6, "not", Xi8>;
+def NOT16r : NotOpR<0xF7, "not", Xi16>;
+def NOT32r : NotOpR<0xF7, "not", Xi32>;
+def NOT64r : NotOpR<0xF7, "not", Xi64>;
+} // Constraints = "$src1 = $dst", SchedRW
+
+let SchedRW = [WriteALURMW] in {
+def NOT8m : NotOpM<0xF6, "not", Xi8>;
+def NOT16m : NotOpM<0xF7, "not", Xi16>;
+def NOT32m : NotOpM<0xF7, "not", Xi32>;
+def NOT64m : NotOpM<0xF7, "not", Xi64>, Requires<[In64BitMode]>;
+} // SchedRW
+} // CodeSize
+
/// ArithBinOp_RF - This is an arithmetic binary operator where the pattern is
/// defined with "(set GPR:$dst, EFLAGS, (...".
///
@@ -959,24 +821,24 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
} // isConvertibleToThreeAddress
} // isCommutable
- def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>, FoldGenData<NAME#8rr>;
- def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>, FoldGenData<NAME#16rr>;
- def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>, FoldGenData<NAME#32rr>;
- def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>, FoldGenData<NAME#64rr>;
+ def NAME#8rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_Rev<BaseOpc2, mnemonic, Xi64>;
def NAME#8rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi8 , opnodeflag>;
def NAME#16rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi16, opnodeflag>;
def NAME#32rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi32, opnodeflag>;
def NAME#64rm : BinOpRM_RF<BaseOpc2, mnemonic, Xi64, opnodeflag>;
- let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress, hasSideEffects= 0 in {
def NAME#8ri : BinOpRI_RF<0x80, mnemonic, Xi8 , opnodeflag, RegMRM>;
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
- def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, opnodeflag, RegMRM>;
- def NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, opnodeflag, RegMRM>;
- def NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, opnodeflag, RegMRM>;
+ def NAME#16ri8 : BinOpRI8_RF<0x82, mnemonic, Xi16, RegMRM>;
+ def NAME#32ri8 : BinOpRI8_RF<0x82, mnemonic, Xi32, RegMRM>;
+ def NAME#64ri8 : BinOpRI8_RF<0x82, mnemonic, Xi64, RegMRM>;
def NAME#16ri : BinOpRI_RF<0x80, mnemonic, Xi16, opnodeflag, RegMRM>;
def NAME#32ri : BinOpRI_RF<0x80, mnemonic, Xi32, opnodeflag, RegMRM>;
@@ -984,34 +846,34 @@ multiclass ArithBinOp_RF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
}
} // Constraints = "$src1 = $dst"
- let mayLoad = 1, mayStore = 1 in {
+ let mayLoad = 1, mayStore = 1, hasSideEffects = 0 in {
def NAME#8mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi8 , opnode>;
def NAME#16mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi16, opnode>;
def NAME#32mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi32, opnode>;
def NAME#64mr : BinOpMR_RMW<BaseOpc, mnemonic, Xi64, opnode>;
- }
- // NOTE: These are order specific, we want the mi8 forms to be listed
- // first so that they are slightly preferred to the mi forms.
- def NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, opnode, MemMRM>;
- def NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, opnode, MemMRM>;
- let Predicates = [In64BitMode] in
- def NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, opnode, MemMRM>;
-
- def NAME#8mi : BinOpMI_RMW<0x80, mnemonic, Xi8 , opnode, MemMRM>;
- def NAME#16mi : BinOpMI_RMW<0x80, mnemonic, Xi16, opnode, MemMRM>;
- def NAME#32mi : BinOpMI_RMW<0x80, mnemonic, Xi32, opnode, MemMRM>;
- let Predicates = [In64BitMode] in
- def NAME#64mi32 : BinOpMI_RMW<0x80, mnemonic, Xi64, opnode, MemMRM>;
+ // NOTE: These are order specific, we want the mi8 forms to be listed
+ // first so that they are slightly preferred to the mi forms.
+ def NAME#16mi8 : BinOpMI8_RMW<mnemonic, Xi16, MemMRM>;
+ def NAME#32mi8 : BinOpMI8_RMW<mnemonic, Xi32, MemMRM>;
+ let Predicates = [In64BitMode] in
+ def NAME#64mi8 : BinOpMI8_RMW<mnemonic, Xi64, MemMRM>;
+
+ def NAME#8mi : BinOpMI_RMW<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_RMW<0x80, mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_RMW<0x80, mnemonic, Xi32, opnode, MemMRM>;
+ let Predicates = [In64BitMode] in
+ def NAME#64mi32 : BinOpMI_RMW<0x80, mnemonic, Xi64, opnode, MemMRM>;
+ }
// These are for the disassembler since 0x82 opcode behaves like 0x80, but
// not in 64-bit mode.
let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1,
hasSideEffects = 0 in {
let Constraints = "$src1 = $dst" in
- def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, null_frag, RegMRM>;
+ def NAME#8ri8 : BinOpRI8_RF<0x82, mnemonic, Xi8, RegMRM>;
let mayLoad = 1, mayStore = 1 in
- def NAME#8mi8 : BinOpMI8_RMW<mnemonic, Xi8, null_frag, MemMRM>;
+ def NAME#8mi8 : BinOpMI8_RMW<mnemonic, Xi8, MemMRM>;
}
} // Defs = [EFLAGS]
@@ -1046,10 +908,10 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
} // isConvertibleToThreeAddress
} // isCommutable
- def NAME#8rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi8>, FoldGenData<NAME#8rr>;
- def NAME#16rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi16>, FoldGenData<NAME#16rr>;
- def NAME#32rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi32>, FoldGenData<NAME#32rr>;
- def NAME#64rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi64>, FoldGenData<NAME#64rr>;
+ def NAME#8rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_RFF_Rev<BaseOpc2, mnemonic, Xi64>;
def NAME#8rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi8 , opnode>;
def NAME#16rm : BinOpRM_RFF<BaseOpc2, mnemonic, Xi16, opnode>;
@@ -1058,12 +920,12 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#8ri : BinOpRI_RFF<0x80, mnemonic, Xi8 , opnode, RegMRM>;
- let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress, hasSideEffects = 0 in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
- def NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, opnode, RegMRM>;
- def NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, opnode, RegMRM>;
- def NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, opnode, RegMRM>;
+ def NAME#16ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi16, RegMRM>;
+ def NAME#32ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi32, RegMRM>;
+ def NAME#64ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi64, RegMRM>;
def NAME#16ri : BinOpRI_RFF<0x80, mnemonic, Xi16, opnode, RegMRM>;
def NAME#32ri : BinOpRI_RFF<0x80, mnemonic, Xi32, opnode, RegMRM>;
@@ -1078,25 +940,27 @@ multiclass ArithBinOp_RFF<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
- def NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, opnode, MemMRM>;
- def NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, opnode, MemMRM>;
+ let mayLoad = 1, mayStore = 1, hasSideEffects = 0 in {
+ def NAME#16mi8 : BinOpMI8_RMW_FF<mnemonic, Xi16, MemMRM>;
+ def NAME#32mi8 : BinOpMI8_RMW_FF<mnemonic, Xi32, MemMRM>;
let Predicates = [In64BitMode] in
- def NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, opnode, MemMRM>;
+ def NAME#64mi8 : BinOpMI8_RMW_FF<mnemonic, Xi64, MemMRM>;
def NAME#8mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi8 , opnode, MemMRM>;
def NAME#16mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi16, opnode, MemMRM>;
def NAME#32mi : BinOpMI_RMW_FF<0x80, mnemonic, Xi32, opnode, MemMRM>;
let Predicates = [In64BitMode] in
def NAME#64mi32 : BinOpMI_RMW_FF<0x80, mnemonic, Xi64, opnode, MemMRM>;
+ }
// These are for the disassembler since 0x82 opcode behaves like 0x80, but
// not in 64-bit mode.
let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1,
hasSideEffects = 0 in {
let Constraints = "$src1 = $dst" in
- def NAME#8ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi8, null_frag, RegMRM>;
+ def NAME#8ri8 : BinOpRI8_RFF<0x82, mnemonic, Xi8, RegMRM>;
let mayLoad = 1, mayStore = 1 in
- def NAME#8mi8 : BinOpMI8_RMW_FF<mnemonic, Xi8, null_frag, MemMRM>;
+ def NAME#8mi8 : BinOpMI8_RMW_FF<mnemonic, Xi8, MemMRM>;
}
} // Uses = [EFLAGS], Defs = [EFLAGS]
@@ -1128,10 +992,10 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
}
} // isCommutable
- def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>, FoldGenData<NAME#8rr>;
- def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>, FoldGenData<NAME#16rr>;
- def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>, FoldGenData<NAME#32rr>;
- def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>, FoldGenData<NAME#64rr>;
+ def NAME#8rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi8>;
+ def NAME#16rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi16>;
+ def NAME#32rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi32>;
+ def NAME#64rr_REV : BinOpRR_F_Rev<BaseOpc2, mnemonic, Xi64>;
def NAME#8rm : BinOpRM_F<BaseOpc2, mnemonic, Xi8 , opnode>;
def NAME#16rm : BinOpRM_F<BaseOpc2, mnemonic, Xi16, opnode>;
@@ -1140,12 +1004,12 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
def NAME#8ri : BinOpRI_F<0x80, mnemonic, Xi8 , opnode, RegMRM>;
- let isConvertibleToThreeAddress = ConvertibleToThreeAddress in {
+ let isConvertibleToThreeAddress = ConvertibleToThreeAddress, hasSideEffects = 0 in {
// NOTE: These are order specific, we want the ri8 forms to be listed
// first so that they are slightly preferred to the ri forms.
- def NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, opnode, RegMRM>;
- def NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, opnode, RegMRM>;
- def NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, opnode, RegMRM>;
+ def NAME#16ri8 : BinOpRI8_F<0x82, mnemonic, Xi16, RegMRM>;
+ def NAME#32ri8 : BinOpRI8_F<0x82, mnemonic, Xi32, RegMRM>;
+ def NAME#64ri8 : BinOpRI8_F<0x82, mnemonic, Xi64, RegMRM>;
def NAME#16ri : BinOpRI_F<0x80, mnemonic, Xi16, opnode, RegMRM>;
def NAME#32ri : BinOpRI_F<0x80, mnemonic, Xi32, opnode, RegMRM>;
@@ -1159,24 +1023,26 @@ multiclass ArithBinOp_F<bits<8> BaseOpc, bits<8> BaseOpc2, bits<8> BaseOpc4,
// NOTE: These are order specific, we want the mi8 forms to be listed
// first so that they are slightly preferred to the mi forms.
- def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, opnode, MemMRM>;
- def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, opnode, MemMRM>;
- let Predicates = [In64BitMode] in
- def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, opnode, MemMRM>;
-
- def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>;
- def NAME#16mi : BinOpMI_F<0x80, mnemonic, Xi16, opnode, MemMRM>;
- def NAME#32mi : BinOpMI_F<0x80, mnemonic, Xi32, opnode, MemMRM>;
- let Predicates = [In64BitMode] in
- def NAME#64mi32 : BinOpMI_F<0x80, mnemonic, Xi64, opnode, MemMRM>;
+ let mayLoad = 1, hasSideEffects = 0 in {
+ def NAME#16mi8 : BinOpMI8_F<mnemonic, Xi16, MemMRM>;
+ def NAME#32mi8 : BinOpMI8_F<mnemonic, Xi32, MemMRM>;
+ let Predicates = [In64BitMode] in
+ def NAME#64mi8 : BinOpMI8_F<mnemonic, Xi64, MemMRM>;
+
+ def NAME#8mi : BinOpMI_F<0x80, mnemonic, Xi8 , opnode, MemMRM>;
+ def NAME#16mi : BinOpMI_F<0x80, mnemonic, Xi16, opnode, MemMRM>;
+ def NAME#32mi : BinOpMI_F<0x80, mnemonic, Xi32, opnode, MemMRM>;
+ let Predicates = [In64BitMode] in
+ def NAME#64mi32 : BinOpMI_F<0x80, mnemonic, Xi64, opnode, MemMRM>;
+ }
// These are for the disassembler since 0x82 opcode behaves like 0x80, but
// not in 64-bit mode.
let Predicates = [Not64BitMode], isCodeGenOnly = 1, ForceDisassemble = 1,
hasSideEffects = 0 in {
- def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, null_frag, RegMRM>;
+ def NAME#8ri8 : BinOpRI8_F<0x82, mnemonic, Xi8, RegMRM>;
let mayLoad = 1 in
- def NAME#8mi8 : BinOpMI8_F<mnemonic, Xi8, null_frag, MemMRM>;
+ def NAME#8mi8 : BinOpMI8_F<mnemonic, Xi8, MemMRM>;
}
} // Defs = [EFLAGS]
@@ -1252,31 +1118,19 @@ def : Pat<(store (X86adc_flag GR64:$src, (loadi64 addr:$dst), EFLAGS),
multiclass ArithBinOp_RF_relocImm_Pats<SDNode OpNodeFlag, SDNode OpNode> {
def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2),
(!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
- def : Pat<(OpNodeFlag GR16:$src1, i16relocImmSExt8_su:$src2),
- (!cast<Instruction>(NAME#"16ri8") GR16:$src1, i16relocImmSExt8_su:$src2)>;
def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2),
(!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
- def : Pat<(OpNodeFlag GR32:$src1, i32relocImmSExt8_su:$src2),
- (!cast<Instruction>(NAME#"32ri8") GR32:$src1, i32relocImmSExt8_su:$src2)>;
def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2),
(!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
- def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt8_su:$src2),
- (!cast<Instruction>(NAME#"64ri8") GR64:$src1, i64relocImmSExt8_su:$src2)>;
def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2),
(!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst),
(!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
- def : Pat<(store (OpNode (load addr:$dst), i16relocImmSExt8_su:$src), addr:$dst),
- (!cast<Instruction>(NAME#"16mi8") addr:$dst, i16relocImmSExt8_su:$src)>;
def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst),
(!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
- def : Pat<(store (OpNode (load addr:$dst), i32relocImmSExt8_su:$src), addr:$dst),
- (!cast<Instruction>(NAME#"32mi8") addr:$dst, i32relocImmSExt8_su:$src)>;
def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst),
(!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
- def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt8_su:$src), addr:$dst),
- (!cast<Instruction>(NAME#"64mi8") addr:$dst, i64relocImmSExt8_su:$src)>;
def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst),
(!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
}
@@ -1284,31 +1138,19 @@ multiclass ArithBinOp_RF_relocImm_Pats<SDNode OpNodeFlag, SDNode OpNode> {
multiclass ArithBinOp_RFF_relocImm_Pats<SDNode OpNodeFlag> {
def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS),
(!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
- def : Pat<(OpNodeFlag GR16:$src1, i16relocImmSExt8_su:$src2, EFLAGS),
- (!cast<Instruction>(NAME#"16ri8") GR16:$src1, i16relocImmSExt8_su:$src2)>;
def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS),
(!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
- def : Pat<(OpNodeFlag GR32:$src1, i32relocImmSExt8_su:$src2, EFLAGS),
- (!cast<Instruction>(NAME#"32ri8") GR32:$src1, i32relocImmSExt8_su:$src2)>;
def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS),
(!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
- def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt8_su:$src2, EFLAGS),
- (!cast<Instruction>(NAME#"64ri8") GR64:$src1, i64relocImmSExt8_su:$src2)>;
def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS),
(!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst),
(!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
- def : Pat<(store (OpNodeFlag (load addr:$dst), i16relocImmSExt8_su:$src, EFLAGS), addr:$dst),
- (!cast<Instruction>(NAME#"16mi8") addr:$dst, i16relocImmSExt8_su:$src)>;
def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst),
(!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
- def : Pat<(store (OpNodeFlag (load addr:$dst), i32relocImmSExt8_su:$src, EFLAGS), addr:$dst),
- (!cast<Instruction>(NAME#"32mi8") addr:$dst, i32relocImmSExt8_su:$src)>;
def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst),
(!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
- def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt8_su:$src, EFLAGS), addr:$dst),
- (!cast<Instruction>(NAME#"64mi8") addr:$dst, i64relocImmSExt8_su:$src)>;
def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst),
(!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
}
@@ -1316,31 +1158,19 @@ multiclass ArithBinOp_RFF_relocImm_Pats<SDNode OpNodeFlag> {
multiclass ArithBinOp_F_relocImm_Pats<SDNode OpNodeFlag> {
def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2),
(!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
- def : Pat<(OpNodeFlag GR16:$src1, i16relocImmSExt8_su:$src2),
- (!cast<Instruction>(NAME#"16ri8") GR16:$src1, i16relocImmSExt8_su:$src2)>;
def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2),
(!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
- def : Pat<(OpNodeFlag GR32:$src1, i32relocImmSExt8_su:$src2),
- (!cast<Instruction>(NAME#"32ri8") GR32:$src1, i32relocImmSExt8_su:$src2)>;
def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2),
(!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
- def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt8_su:$src2),
- (!cast<Instruction>(NAME#"64ri8") GR64:$src1, i64relocImmSExt8_su:$src2)>;
def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2),
(!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
def : Pat<(OpNodeFlag (loadi8 addr:$src1), relocImm8_su:$src2),
(!cast<Instruction>(NAME#"8mi") addr:$src1, relocImm8_su:$src2)>;
- def : Pat<(OpNodeFlag (loadi16 addr:$src1), i16relocImmSExt8_su:$src2),
- (!cast<Instruction>(NAME#"16mi8") addr:$src1, i16relocImmSExt8_su:$src2)>;
def : Pat<(OpNodeFlag (loadi16 addr:$src1), relocImm16_su:$src2),
(!cast<Instruction>(NAME#"16mi") addr:$src1, relocImm16_su:$src2)>;
- def : Pat<(OpNodeFlag (loadi32 addr:$src1), i32relocImmSExt8_su:$src2),
- (!cast<Instruction>(NAME#"32mi8") addr:$src1, i32relocImmSExt8_su:$src2)>;
def : Pat<(OpNodeFlag (loadi32 addr:$src1), relocImm32_su:$src2),
(!cast<Instruction>(NAME#"32mi") addr:$src1, relocImm32_su:$src2)>;
- def : Pat<(OpNodeFlag (loadi64 addr:$src1), i64relocImmSExt8_su:$src2),
- (!cast<Instruction>(NAME#"64mi8") addr:$src1, i64relocImmSExt8_su:$src2)>;
def : Pat<(OpNodeFlag (loadi64 addr:$src1), i64relocImmSExt32_su:$src2),
(!cast<Instruction>(NAME#"64mi32") addr:$src1, i64relocImmSExt32_su:$src2)>;
}
@@ -1475,7 +1305,7 @@ multiclass bmi_andn<string mnemonic, RegisterClass RC, X86MemOperand x86memop,
// Complexity is reduced to give and with immediate a chance to match first.
let Predicates = [HasBMI], Defs = [EFLAGS], AddedComplexity = -6 in {
defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32, WriteALU>, T8PS, VEX_4V;
- defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, VEX_4V, VEX_W;
+ defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64, WriteALU>, T8PS, VEX_4V, REX_W;
}
let Predicates = [HasBMI], AddedComplexity = -6 in {
@@ -1524,7 +1354,7 @@ let Predicates = [HasBMI2] in {
let Uses = [EDX] in
defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem, WriteMULX32>;
let Uses = [RDX] in
- defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, VEX_W;
+ defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem, WriteMULX64>, REX_W;
}
//===----------------------------------------------------------------------===//
@@ -1532,23 +1362,26 @@ let Predicates = [HasBMI2] in {
//
// We don't have patterns for these as there is no advantage over ADC for
// most code.
+class ADCOXOpRR <bits<8> opcode, string mnemonic, X86TypeInfo info>
+ : BinOpRR_C<opcode, MRMSrcReg, mnemonic, info, []>{
+ let Opcode = opcode;
+ let OpSize = OpSizeFixed;
+}
+
+class ADCOXOpRM <bits<8> opcode, string mnemonic, X86TypeInfo info>
+ : BinOpRM_C<opcode, MRMSrcMem, mnemonic, info, []>{
+ let Opcode = opcode;
+ let OpSize = OpSizeFixed;
+}
+
let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
Constraints = "$src1 = $dst", hasSideEffects = 0 in {
let SchedRW = [WriteADC], isCommutable = 1 in {
- def ADCX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
- def ADCX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "adcx{q}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
-
- def ADOX32rr : I<0xF6, MRMSrcReg, (outs GR32:$dst),
- (ins GR32:$src1, GR32:$src2),
- "adox{l}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
-
- def ADOX64rr : RI<0xF6, MRMSrcReg, (outs GR64:$dst),
- (ins GR64:$src1, GR64:$src2),
- "adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
+ def ADCX32rr : ADCOXOpRR<0xF6, "adcx", Xi32>, T8PD;
+ def ADCX64rr : ADCOXOpRR<0xF6, "adcx", Xi64>, T8PD;
+
+ def ADOX32rr : ADCOXOpRR<0xF6, "adox", Xi32>, T8XS;
+ def ADOX64rr : ADCOXOpRR<0xF6, "adox", Xi64>, T8XS;
} // SchedRW
let mayLoad = 1,
@@ -1557,20 +1390,10 @@ let Predicates = [HasADX], Defs = [EFLAGS], Uses = [EFLAGS],
ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault,
// Implicit read of EFLAGS
WriteADC.ReadAfterFold] in {
- def ADCX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "adcx{l}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
-
- def ADCX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "adcx{q}\t{$src2, $dst|$dst, $src2}", []>, T8PD;
-
- def ADOX32rm : I<0xF6, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$src1, i32mem:$src2),
- "adox{l}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
+ def ADCX32rm : ADCOXOpRM<0xF6, "adcx", Xi32>, T8PD;
+ def ADCX64rm : ADCOXOpRM<0xF6, "adcx", Xi64>, T8PD;
- def ADOX64rm : RI<0xF6, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$src1, i64mem:$src2),
- "adox{q}\t{$src2, $dst|$dst, $src2}", []>, T8XS;
+ def ADOX32rm : ADCOXOpRM<0xF6, "adox", Xi32>, T8XS;
+ def ADOX64rm : ADCOXOpRM<0xF6, "adox", Xi64>, T8XS;
} // mayLoad, SchedRW
}
diff --git a/llvm/lib/Target/X86/X86InstrAsmAlias.td b/llvm/lib/Target/X86/X86InstrAsmAlias.td
new file mode 100644
index 000000000000..9d0735c9cbba
--- /dev/null
+++ b/llvm/lib/Target/X86/X86InstrAsmAlias.td
@@ -0,0 +1,688 @@
+//==- X86InstrAsmAlias.td - Assembler Instruction Aliases --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the assembler mnemonic/instruction aliases in the X86
+// architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// Reversed version with ".s" suffix for GAS compatibility.
+def : InstAlias<"mov{b}.s\t{$src, $dst|$dst, $src}",
+ (MOV8rr_REV GR8:$dst, GR8:$src), 0>;
+def : InstAlias<"mov{w}.s\t{$src, $dst|$dst, $src}",
+ (MOV16rr_REV GR16:$dst, GR16:$src), 0>;
+def : InstAlias<"mov{l}.s\t{$src, $dst|$dst, $src}",
+ (MOV32rr_REV GR32:$dst, GR32:$src), 0>;
+def : InstAlias<"mov{q}.s\t{$src, $dst|$dst, $src}",
+ (MOV64rr_REV GR64:$dst, GR64:$src), 0>;
+def : InstAlias<"mov.s\t{$src, $dst|$dst, $src}",
+ (MOV8rr_REV GR8:$dst, GR8:$src), 0, "att">;
+def : InstAlias<"mov.s\t{$src, $dst|$dst, $src}",
+ (MOV16rr_REV GR16:$dst, GR16:$src), 0, "att">;
+def : InstAlias<"mov.s\t{$src, $dst|$dst, $src}",
+ (MOV32rr_REV GR32:$dst, GR32:$src), 0, "att">;
+def : InstAlias<"mov.s\t{$src, $dst|$dst, $src}",
+ (MOV64rr_REV GR64:$dst, GR64:$src), 0, "att">;
+
+// MONITORX/MWAITX Instructions Alias
+def : InstAlias<"mwaitx\t{%eax, %ecx, %ebx|ebx, ecx, eax}", (MWAITXrrr)>,
+ Requires<[ Not64BitMode ]>;
+def : InstAlias<"mwaitx\t{%rax, %rcx, %rbx|rbx, rcx, rax}", (MWAITXrrr)>,
+ Requires<[ In64BitMode ]>;
+
+// MONITORX/MWAITX Instructions Alias
+def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORX32rrr)>,
+ Requires<[ Not64BitMode ]>;
+def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORX64rrr)>,
+ Requires<[ In64BitMode ]>;
+
+// CLZERO Instruction Alias
+def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>;
+def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>;
+
+// INVLPGB Instruction Alias
+def : InstAlias<"invlpgb\t{%eax, %edx|eax, edx}", (INVLPGB32)>, Requires<[Not64BitMode]>;
+def : InstAlias<"invlpgb\t{%rax, %edx|rax, edx}", (INVLPGB64)>, Requires<[In64BitMode]>;
+
+// CMPCCXADD Instructions Alias
+multiclass CMPCCXADD_Aliases<string Cond, int CC> {
+ def : InstAlias<"cmp"#Cond#"xadd"#"\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
+ (CMPCCXADDmr32 GR32:$dst, i32mem:$dstsrc2, GR32:$src3, CC), 0>;
+ def : InstAlias<"cmp"#Cond#"xadd"#"\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
+ (CMPCCXADDmr64 GR64:$dst, i64mem:$dstsrc2, GR64:$src3, CC), 0>;
+}
+
+//===----------------------------------------------------------------------===//
+// Assembler Mnemonic Aliases
+//===----------------------------------------------------------------------===//
+
+defm : CMPCCXADD_Aliases<"o" , 0>;
+defm : CMPCCXADD_Aliases<"no", 1>;
+defm : CMPCCXADD_Aliases<"b" , 2>;
+defm : CMPCCXADD_Aliases<"ae", 3>;
+defm : CMPCCXADD_Aliases<"nb", 3>;
+defm : CMPCCXADD_Aliases<"e" , 4>;
+defm : CMPCCXADD_Aliases<"z" , 4>;
+defm : CMPCCXADD_Aliases<"ne", 5>;
+defm : CMPCCXADD_Aliases<"nz", 5>;
+defm : CMPCCXADD_Aliases<"be", 6>;
+defm : CMPCCXADD_Aliases<"nbe", 7>;
+defm : CMPCCXADD_Aliases<"a", 7>;
+defm : CMPCCXADD_Aliases<"s" , 8>;
+defm : CMPCCXADD_Aliases<"ns", 9>;
+defm : CMPCCXADD_Aliases<"p" , 10>;
+defm : CMPCCXADD_Aliases<"np", 11>;
+defm : CMPCCXADD_Aliases<"l" , 12>;
+defm : CMPCCXADD_Aliases<"ge", 13>;
+defm : CMPCCXADD_Aliases<"nl", 13>;
+defm : CMPCCXADD_Aliases<"le", 14>;
+defm : CMPCCXADD_Aliases<"g", 15>;
+defm : CMPCCXADD_Aliases<"nle",15>;
+
+
+def : MnemonicAlias<"call", "callw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"call", "calll", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"call", "callq", "att">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"cbw", "cbtw", "att">;
+def : MnemonicAlias<"cwde", "cwtl", "att">;
+def : MnemonicAlias<"cwd", "cwtd", "att">;
+def : MnemonicAlias<"cdq", "cltd", "att">;
+def : MnemonicAlias<"cdqe", "cltq", "att">;
+def : MnemonicAlias<"cqo", "cqto", "att">;
+
+// In 64-bit mode lret maps to lretl; it is not ambiguous with lretq.
+def : MnemonicAlias<"lret", "lretw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"lret", "lretl", "att">, Requires<[Not16BitMode]>;
+
+def : MnemonicAlias<"leavel", "leave", "att">, Requires<[Not64BitMode]>;
+def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"loopz", "loope">;
+def : MnemonicAlias<"loopnz", "loopne">;
+
+def : MnemonicAlias<"pop", "popw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"pop", "popl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pop", "popq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf", "popfw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"popf", "popfl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popf", "popfq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popf", "popfq", "intel">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"popfd", "popfl", "att">;
+def : MnemonicAlias<"popfw", "popf", "intel">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"popfw", "popf", "intel">, Requires<[In64BitMode]>;
+
+// FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in
+// all modes. However: "push (addr)" and "push $42" should default to
+// pushl/pushq depending on the current mode. Similar for "pop %bx"
+def : MnemonicAlias<"push", "pushw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"push", "pushl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"push", "pushq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf", "pushfw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"pushf", "pushfl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushf", "pushfq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushf", "pushfq", "intel">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"pushfd", "pushfl", "att">;
+def : MnemonicAlias<"pushfw", "pushf", "intel">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pushfw", "pushf", "intel">, Requires<[In64BitMode]>;
+
+def : MnemonicAlias<"popad", "popal", "intel">, Requires<[Not64BitMode]>;
+def : MnemonicAlias<"pushad", "pushal", "intel">, Requires<[Not64BitMode]>;
+def : MnemonicAlias<"popa", "popaw", "intel">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"pusha", "pushaw", "intel">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"popa", "popal", "intel">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pusha", "pushal", "intel">, Requires<[In32BitMode]>;
+
+def : MnemonicAlias<"popa", "popaw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"pusha", "pushaw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"popa", "popal", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"pusha", "pushal", "att">, Requires<[In32BitMode]>;
+
+def : MnemonicAlias<"repe", "rep">;
+def : MnemonicAlias<"repz", "rep">;
+def : MnemonicAlias<"repnz", "repne">;
+
+def : MnemonicAlias<"ret", "retw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"ret", "retl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"ret", "retq", "att">, Requires<[In64BitMode]>;
+
+// Apply 'ret' behavior to 'retn'
+def : MnemonicAlias<"retn", "retw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"retn", "retl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retn", "retq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"retn", "ret", "intel">;
+
+def : MnemonicAlias<"sal", "shl", "intel">;
+def : MnemonicAlias<"salb", "shlb", "att">;
+def : MnemonicAlias<"salw", "shlw", "att">;
+def : MnemonicAlias<"sall", "shll", "att">;
+def : MnemonicAlias<"salq", "shlq", "att">;
+
+def : MnemonicAlias<"smovb", "movsb", "att">;
+def : MnemonicAlias<"smovw", "movsw", "att">;
+def : MnemonicAlias<"smovl", "movsl", "att">;
+def : MnemonicAlias<"smovq", "movsq", "att">;
+
+def : MnemonicAlias<"ud2a", "ud2", "att">;
+def : MnemonicAlias<"ud2bw", "ud1w", "att">;
+def : MnemonicAlias<"ud2bl", "ud1l", "att">;
+def : MnemonicAlias<"ud2bq", "ud1q", "att">;
+def : MnemonicAlias<"verrw", "verr", "att">;
+
+// MS recognizes 'xacquire'/'xrelease' as 'acquire'/'release'
+def : MnemonicAlias<"acquire", "xacquire", "intel">;
+def : MnemonicAlias<"release", "xrelease", "intel">;
+
+// System instruction aliases.
+def : MnemonicAlias<"iret", "iretw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"iret", "iretl", "att">, Requires<[Not16BitMode]>;
+def : MnemonicAlias<"sysret", "sysretl", "att">;
+def : MnemonicAlias<"sysexit", "sysexitl", "att">;
+
+def : MnemonicAlias<"lgdt", "lgdtw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"lgdt", "lgdtl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lgdt", "lgdtq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lidt", "lidtw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"lidt", "lidtl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidt", "lidtq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sgdt", "sgdtw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"sgdt", "sgdtl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdt", "sgdtq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"sidt", "sidtw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"sidt", "sidtl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidt", "sidtq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"lgdt", "lgdtw", "intel">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"lgdt", "lgdtd", "intel">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"lidt", "lidtw", "intel">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"lidt", "lidtd", "intel">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sgdt", "sgdtw", "intel">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"sgdt", "sgdtd", "intel">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"sidt", "sidtw", "intel">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"sidt", "sidtd", "intel">, Requires<[In32BitMode]>;
+
+
+// Floating point stack aliases.
+def : MnemonicAlias<"fcmovz", "fcmove", "att">;
+def : MnemonicAlias<"fcmova", "fcmovnbe", "att">;
+def : MnemonicAlias<"fcmovnae", "fcmovb", "att">;
+def : MnemonicAlias<"fcmovna", "fcmovbe", "att">;
+def : MnemonicAlias<"fcmovae", "fcmovnb", "att">;
+def : MnemonicAlias<"fcomip", "fcompi">;
+def : MnemonicAlias<"fildq", "fildll", "att">;
+def : MnemonicAlias<"fistpq", "fistpll", "att">;
+def : MnemonicAlias<"fisttpq", "fisttpll", "att">;
+def : MnemonicAlias<"fldcww", "fldcw", "att">;
+def : MnemonicAlias<"fnstcww", "fnstcw", "att">;
+def : MnemonicAlias<"fnstsww", "fnstsw", "att">;
+def : MnemonicAlias<"fucomip", "fucompi">;
+def : MnemonicAlias<"fwait", "wait">;
+
+def : MnemonicAlias<"fxsaveq", "fxsave64", "att">;
+def : MnemonicAlias<"fxrstorq", "fxrstor64", "att">;
+def : MnemonicAlias<"xsaveq", "xsave64", "att">;
+def : MnemonicAlias<"xrstorq", "xrstor64", "att">;
+def : MnemonicAlias<"xsaveoptq", "xsaveopt64", "att">;
+def : MnemonicAlias<"xrstorsq", "xrstors64", "att">;
+def : MnemonicAlias<"xsavecq", "xsavec64", "att">;
+def : MnemonicAlias<"xsavesq", "xsaves64", "att">;
+
+class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond,
+ string VariantName>
+ : MnemonicAlias<!strconcat(Prefix, OldCond, Suffix),
+ !strconcat(Prefix, NewCond, Suffix), VariantName>;
+
+/// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of
+/// MnemonicAlias's that canonicalize the condition code in a mnemonic, for
+/// example "setz" -> "sete".
+multiclass IntegerCondCodeMnemonicAlias<string Prefix, string Suffix,
+ string V = ""> {
+ def C : CondCodeAlias<Prefix, Suffix, "c", "b", V>; // setc -> setb
+ def Z : CondCodeAlias<Prefix, Suffix, "z" , "e", V>; // setz -> sete
+ def NA : CondCodeAlias<Prefix, Suffix, "na", "be", V>; // setna -> setbe
+ def NB : CondCodeAlias<Prefix, Suffix, "nb", "ae", V>; // setnb -> setae
+ def NC : CondCodeAlias<Prefix, Suffix, "nc", "ae", V>; // setnc -> setae
+ def NG : CondCodeAlias<Prefix, Suffix, "ng", "le", V>; // setng -> setle
+ def NL : CondCodeAlias<Prefix, Suffix, "nl", "ge", V>; // setnl -> setge
+ def NZ : CondCodeAlias<Prefix, Suffix, "nz", "ne", V>; // setnz -> setne
+ def PE : CondCodeAlias<Prefix, Suffix, "pe", "p", V>; // setpe -> setp
+ def PO : CondCodeAlias<Prefix, Suffix, "po", "np", V>; // setpo -> setnp
+
+ def NAE : CondCodeAlias<Prefix, Suffix, "nae", "b", V>; // setnae -> setb
+ def NBE : CondCodeAlias<Prefix, Suffix, "nbe", "a", V>; // setnbe -> seta
+ def NGE : CondCodeAlias<Prefix, Suffix, "nge", "l", V>; // setnge -> setl
+ def NLE : CondCodeAlias<Prefix, Suffix, "nle", "g", V>; // setnle -> setg
+}
+
+// Aliases for set<CC>
+defm : IntegerCondCodeMnemonicAlias<"set", "">;
+// Aliases for j<CC>
+defm : IntegerCondCodeMnemonicAlias<"j", "">;
+// Aliases for cmov<CC>{w,l,q}
+defm : IntegerCondCodeMnemonicAlias<"cmov", "w", "att">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "l", "att">;
+defm : IntegerCondCodeMnemonicAlias<"cmov", "q", "att">;
+// No size suffix for intel-style asm.
+defm : IntegerCondCodeMnemonicAlias<"cmov", "", "intel">;
+
+//===----------------------------------------------------------------------===//
+// Assembler Instruction Aliases
+//===----------------------------------------------------------------------===//
+
+// aad/aam default to base 10 if no operand is specified.
+def : InstAlias<"aad", (AAD8i8 10)>, Requires<[Not64BitMode]>;
+def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>;
+
+// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
+// Likewise for btc/btr/bts.
+def : InstAlias<"bt\t{$imm, $mem|$mem, $imm}",
+ (BT32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">;
+def : InstAlias<"btc\t{$imm, $mem|$mem, $imm}",
+ (BTC32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">;
+def : InstAlias<"btr\t{$imm, $mem|$mem, $imm}",
+ (BTR32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">;
+def : InstAlias<"bts\t{$imm, $mem|$mem, $imm}",
+ (BTS32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">;
+
+// clr aliases.
+def : InstAlias<"clr{b}\t$reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>;
+def : InstAlias<"clr{w}\t$reg", (XOR16rr GR16:$reg, GR16:$reg), 0>;
+def : InstAlias<"clr{l}\t$reg", (XOR32rr GR32:$reg, GR32:$reg), 0>;
+def : InstAlias<"clr{q}\t$reg", (XOR64rr GR64:$reg, GR64:$reg), 0>;
+
+// lods aliases. Accept the destination being omitted because it's implicit
+// in the mnemonic, or the mnemonic suffix being omitted because it's implicit
+// in the destination.
+def : InstAlias<"lodsb\t$src", (LODSB srcidx8:$src), 0>;
+def : InstAlias<"lodsw\t$src", (LODSW srcidx16:$src), 0>;
+def : InstAlias<"lods{l|d}\t$src", (LODSL srcidx32:$src), 0>;
+def : InstAlias<"lodsq\t$src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"lods\t{$src, %al|al, $src}", (LODSB srcidx8:$src), 0>;
+def : InstAlias<"lods\t{$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>;
+def : InstAlias<"lods\t{$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>;
+def : InstAlias<"lods\t{$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"lods\t$src", (LODSB srcidx8:$src), 0, "intel">;
+def : InstAlias<"lods\t$src", (LODSW srcidx16:$src), 0, "intel">;
+def : InstAlias<"lods\t$src", (LODSL srcidx32:$src), 0, "intel">;
+def : InstAlias<"lods\t$src", (LODSQ srcidx64:$src), 0, "intel">, Requires<[In64BitMode]>;
+
+
+// stos aliases. Accept the source being omitted because it's implicit in
+// the mnemonic, or the mnemonic suffix being omitted because it's implicit
+// in the source.
+def : InstAlias<"stosb\t$dst", (STOSB dstidx8:$dst), 0>;
+def : InstAlias<"stosw\t$dst", (STOSW dstidx16:$dst), 0>;
+def : InstAlias<"stos{l|d}\t$dst", (STOSL dstidx32:$dst), 0>;
+def : InstAlias<"stosq\t$dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"stos\t{%al, $dst|$dst, al}", (STOSB dstidx8:$dst), 0>;
+def : InstAlias<"stos\t{%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>;
+def : InstAlias<"stos\t{%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>;
+def : InstAlias<"stos\t{%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"stos\t$dst", (STOSB dstidx8:$dst), 0, "intel">;
+def : InstAlias<"stos\t$dst", (STOSW dstidx16:$dst), 0, "intel">;
+def : InstAlias<"stos\t$dst", (STOSL dstidx32:$dst), 0, "intel">;
+def : InstAlias<"stos\t$dst", (STOSQ dstidx64:$dst), 0, "intel">, Requires<[In64BitMode]>;
+
+
+// scas aliases. Accept the destination being omitted because it's implicit
+// in the mnemonic, or the mnemonic suffix being omitted because it's implicit
+// in the destination.
+def : InstAlias<"scasb\t$dst", (SCASB dstidx8:$dst), 0>;
+def : InstAlias<"scasw\t$dst", (SCASW dstidx16:$dst), 0>;
+def : InstAlias<"scas{l|d}\t$dst", (SCASL dstidx32:$dst), 0>;
+def : InstAlias<"scasq\t$dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"scas\t{$dst, %al|al, $dst}", (SCASB dstidx8:$dst), 0>;
+def : InstAlias<"scas\t{$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>;
+def : InstAlias<"scas\t{$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>;
+def : InstAlias<"scas\t{$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"scas\t$dst", (SCASB dstidx8:$dst), 0, "intel">;
+def : InstAlias<"scas\t$dst", (SCASW dstidx16:$dst), 0, "intel">;
+def : InstAlias<"scas\t$dst", (SCASL dstidx32:$dst), 0, "intel">;
+def : InstAlias<"scas\t$dst", (SCASQ dstidx64:$dst), 0, "intel">, Requires<[In64BitMode]>;
+
+// cmps aliases. Mnemonic suffix being omitted because it's implicit
+// in the destination.
+def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSB dstidx8:$dst, srcidx8:$src), 0, "intel">;
+def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSW dstidx16:$dst, srcidx16:$src), 0, "intel">;
+def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSL dstidx32:$dst, srcidx32:$src), 0, "intel">;
+def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSQ dstidx64:$dst, srcidx64:$src), 0, "intel">, Requires<[In64BitMode]>;
+
+// movs aliases. Mnemonic suffix being omitted because it's implicit
+// in the destination.
+def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSB dstidx8:$dst, srcidx8:$src), 0, "intel">;
+def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSW dstidx16:$dst, srcidx16:$src), 0, "intel">;
+def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSL dstidx32:$dst, srcidx32:$src), 0, "intel">;
+def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSQ dstidx64:$dst, srcidx64:$src), 0, "intel">, Requires<[In64BitMode]>;
+
+// div and idiv aliases for explicit A register.
+def : InstAlias<"div{b}\t{$src, %al|al, $src}", (DIV8r GR8 :$src)>;
+def : InstAlias<"div{w}\t{$src, %ax|ax, $src}", (DIV16r GR16:$src)>;
+def : InstAlias<"div{l}\t{$src, %eax|eax, $src}", (DIV32r GR32:$src)>;
+def : InstAlias<"div{q}\t{$src, %rax|rax, $src}", (DIV64r GR64:$src)>;
+def : InstAlias<"div{b}\t{$src, %al|al, $src}", (DIV8m i8mem :$src)>;
+def : InstAlias<"div{w}\t{$src, %ax|ax, $src}", (DIV16m i16mem:$src)>;
+def : InstAlias<"div{l}\t{$src, %eax|eax, $src}", (DIV32m i32mem:$src)>;
+def : InstAlias<"div{q}\t{$src, %rax|rax, $src}", (DIV64m i64mem:$src)>;
+def : InstAlias<"idiv{b}\t{$src, %al|al, $src}", (IDIV8r GR8 :$src)>;
+def : InstAlias<"idiv{w}\t{$src, %ax|ax, $src}", (IDIV16r GR16:$src)>;
+def : InstAlias<"idiv{l}\t{$src, %eax|eax, $src}", (IDIV32r GR32:$src)>;
+def : InstAlias<"idiv{q}\t{$src, %rax|rax, $src}", (IDIV64r GR64:$src)>;
+def : InstAlias<"idiv{b}\t{$src, %al|al, $src}", (IDIV8m i8mem :$src)>;
+def : InstAlias<"idiv{w}\t{$src, %ax|ax, $src}", (IDIV16m i16mem:$src)>;
+def : InstAlias<"idiv{l}\t{$src, %eax|eax, $src}", (IDIV32m i32mem:$src)>;
+def : InstAlias<"idiv{q}\t{$src, %rax|rax, $src}", (IDIV64m i64mem:$src)>;
+
+
+
+// Various unary fpstack operations default to operating on ST1.
+// For example, "fxch" -> "fxch %st(1)"
+def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
+def: InstAlias<"fadd", (ADD_FPrST0 ST1), 0>;
+def : InstAlias<"fsub{|r}p", (SUBR_FPrST0 ST1), 0>;
+def : InstAlias<"fsub{r|}p", (SUB_FPrST0 ST1), 0>;
+def : InstAlias<"fmul", (MUL_FPrST0 ST1), 0>;
+def : InstAlias<"fmulp", (MUL_FPrST0 ST1), 0>;
+def : InstAlias<"fdiv{|r}p", (DIVR_FPrST0 ST1), 0>;
+def : InstAlias<"fdiv{r|}p", (DIV_FPrST0 ST1), 0>;
+def : InstAlias<"fxch", (XCH_F ST1), 0>;
+def : InstAlias<"fcom", (COM_FST0r ST1), 0>;
+def : InstAlias<"fcomp", (COMP_FST0r ST1), 0>;
+def : InstAlias<"fcomi", (COM_FIr ST1), 0>;
+def : InstAlias<"fcompi", (COM_FIPr ST1), 0>;
+def : InstAlias<"fucom", (UCOM_Fr ST1), 0>;
+def : InstAlias<"fucomp", (UCOM_FPr ST1), 0>;
+def : InstAlias<"fucomi", (UCOM_FIr ST1), 0>;
+def : InstAlias<"fucompi", (UCOM_FIPr ST1), 0>;
+
+// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
+// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
+// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
+// gas.
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
+ def : InstAlias<!strconcat(Mnemonic, "\t$op"),
+ (Inst RSTi:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, "\t{%st, %st|st, st}"),
+ (Inst ST0), EmitAlias>;
+}
+
+defm : FpUnaryAlias<"fadd", ADD_FST0r, 0>;
+defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>;
+defm : FpUnaryAlias<"fsub", SUB_FST0r, 0>;
+defm : FpUnaryAlias<"fsub{|r}p", SUBR_FPrST0, 0>;
+defm : FpUnaryAlias<"fsubr", SUBR_FST0r, 0>;
+defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0, 0>;
+defm : FpUnaryAlias<"fmul", MUL_FST0r, 0>;
+defm : FpUnaryAlias<"fmulp", MUL_FPrST0, 0>;
+defm : FpUnaryAlias<"fdiv", DIV_FST0r, 0>;
+defm : FpUnaryAlias<"fdiv{|r}p", DIVR_FPrST0, 0>;
+defm : FpUnaryAlias<"fdivr", DIVR_FST0r, 0>;
+defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0, 0>;
+defm : FpUnaryAlias<"fcomi", COM_FIr, 0>;
+defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>;
+defm : FpUnaryAlias<"fcompi", COM_FIPr, 0>;
+defm : FpUnaryAlias<"fucompi", UCOM_FIPr, 0>;
+
+
+// Handle "f{mulp,addp} $op, %st(0)" the same as "f{mulp,addp} $op", since they
+// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
+// solely because gas supports it.
+def : InstAlias<"faddp\t{$op, %st|st, $op}", (ADD_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fmulp\t{$op, %st|st, $op}", (MUL_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fsub{|r}p\t{$op, %st|st, $op}", (SUBR_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fsub{r|}p\t{$op, %st|st, $op}", (SUB_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fdiv{|r}p\t{$op, %st|st, $op}", (DIVR_FPrST0 RSTi:$op), 0>;
+def : InstAlias<"fdiv{r|}p\t{$op, %st|st, $op}", (DIV_FPrST0 RSTi:$op), 0>;
+
+def : InstAlias<"fnstsw" , (FNSTSW16r), 0>;
+
+// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but
+// this is compatible with what GAS does.
+def : InstAlias<"lcall\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[In32BitMode]>;
+def : InstAlias<"ljmp\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg), 0>, Requires<[In32BitMode]>;
+def : InstAlias<"lcall\t{*}$dst", (FARCALL32m opaquemem:$dst), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"ljmp\t{*}$dst", (FARJMP32m opaquemem:$dst), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"lcall\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"ljmp\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"lcall\t{*}$dst", (FARCALL16m opaquemem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"ljmp\t{*}$dst", (FARJMP16m opaquemem:$dst), 0>, Requires<[In16BitMode]>;
+
+def : InstAlias<"jmp\t{*}$dst", (JMP64m i64mem:$dst), 0, "att">, Requires<[In64BitMode]>;
+def : InstAlias<"jmp\t{*}$dst", (JMP32m i32mem:$dst), 0, "att">, Requires<[In32BitMode]>;
+def : InstAlias<"jmp\t{*}$dst", (JMP16m i16mem:$dst), 0, "att">, Requires<[In16BitMode]>;
+
+
+// "imul <imm>, B" is an alias for "imul <imm>, B, B".
+def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>;
+def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
+def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>;
+def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
+def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
+def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
+
+// ins aliases. Accept the mnemonic suffix being omitted because it's implicit
+// in the destination.
+def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSB dstidx8:$dst), 0, "intel">;
+def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSW dstidx16:$dst), 0, "intel">;
+def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSL dstidx32:$dst), 0, "intel">;
+
+// outs aliases. Accept the mnemonic suffix being omitted because it's implicit
+// in the source.
+def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSB srcidx8:$src), 0, "intel">;
+def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSW srcidx16:$src), 0, "intel">;
+def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSL srcidx32:$src), 0, "intel">;
+
+// inb %dx -> inb %al, %dx
+def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>;
+def : InstAlias<"inw\t{%dx|dx}", (IN16rr), 0>;
+def : InstAlias<"inl\t{%dx|dx}", (IN32rr), 0>;
+def : InstAlias<"inb\t$port", (IN8ri u8imm:$port), 0>;
+def : InstAlias<"inw\t$port", (IN16ri u8imm:$port), 0>;
+def : InstAlias<"inl\t$port", (IN32ri u8imm:$port), 0>;
+
+
+// jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp
+def : InstAlias<"call\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
+def : InstAlias<"jmp\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
+def : InstAlias<"call\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[In32BitMode]>;
+def : InstAlias<"jmp\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[In32BitMode]>;
+def : InstAlias<"callw\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[Not64BitMode]>;
+def : InstAlias<"jmpw\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[Not64BitMode]>;
+def : InstAlias<"calll\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[Not64BitMode]>;
+def : InstAlias<"jmpl\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[Not64BitMode]>;
+
+// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
+def : InstAlias<"mov{q}\t{$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
+
+// Match 'movd GR64, MMX' as an alias for movq to be compatible with gas,
+// which supports this due to an old AMD documentation bug when 64-bit mode was
+// created.
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
+ (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
+ (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
+
+// movsx aliases
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rr8 GR16:$dst, GR8:$src), 0, "att">;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0, "att">;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr8 GR32:$dst, GR8:$src), 0, "att">;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr16 GR32:$dst, GR16:$src), 0, "att">;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr8 GR64:$dst, GR8:$src), 0, "att">;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr16 GR64:$dst, GR16:$src), 0, "att">;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr32 GR64:$dst, GR32:$src), 0, "att">;
+
+// movzx aliases
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rr8 GR16:$dst, GR8:$src), 0, "att">;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0, "att">;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr8 GR32:$dst, GR8:$src), 0, "att">;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr16 GR32:$dst, GR16:$src), 0, "att">;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr8 GR64:$dst, GR8:$src), 0, "att">;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr16 GR64:$dst, GR16:$src), 0, "att">;
+// Note: No GR32->GR64 movzx form.
+
+// outb %dx -> outb %al, %dx
+def : InstAlias<"outb\t{%dx|dx}", (OUT8rr), 0>;
+def : InstAlias<"outw\t{%dx|dx}", (OUT16rr), 0>;
+def : InstAlias<"outl\t{%dx|dx}", (OUT32rr), 0>;
+def : InstAlias<"outb\t$port", (OUT8ir u8imm:$port), 0>;
+def : InstAlias<"outw\t$port", (OUT16ir u8imm:$port), 0>;
+def : InstAlias<"outl\t$port", (OUT32ir u8imm:$port), 0>;
+
+// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
+// effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
+// errors, since its encoding is the most compact.
+def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem), 0>;
+
+// shld/shrd op,op -> shld op, op, CL
+def : InstAlias<"shld{w}\t{$r2, $r1|$r1, $r2}", (SHLD16rrCL GR16:$r1, GR16:$r2), 0>;
+def : InstAlias<"shld{l}\t{$r2, $r1|$r1, $r2}", (SHLD32rrCL GR32:$r1, GR32:$r2), 0>;
+def : InstAlias<"shld{q}\t{$r2, $r1|$r1, $r2}", (SHLD64rrCL GR64:$r1, GR64:$r2), 0>;
+def : InstAlias<"shrd{w}\t{$r2, $r1|$r1, $r2}", (SHRD16rrCL GR16:$r1, GR16:$r2), 0>;
+def : InstAlias<"shrd{l}\t{$r2, $r1|$r1, $r2}", (SHRD32rrCL GR32:$r1, GR32:$r2), 0>;
+def : InstAlias<"shrd{q}\t{$r2, $r1|$r1, $r2}", (SHRD64rrCL GR64:$r1, GR64:$r2), 0>;
+
+def : InstAlias<"shld{w}\t{$reg, $mem|$mem, $reg}", (SHLD16mrCL i16mem:$mem, GR16:$reg), 0>;
+def : InstAlias<"shld{l}\t{$reg, $mem|$mem, $reg}", (SHLD32mrCL i32mem:$mem, GR32:$reg), 0>;
+def : InstAlias<"shld{q}\t{$reg, $mem|$mem, $reg}", (SHLD64mrCL i64mem:$mem, GR64:$reg), 0>;
+def : InstAlias<"shrd{w}\t{$reg, $mem|$mem, $reg}", (SHRD16mrCL i16mem:$mem, GR16:$reg), 0>;
+def : InstAlias<"shrd{l}\t{$reg, $mem|$mem, $reg}", (SHRD32mrCL i32mem:$mem, GR32:$reg), 0>;
+def : InstAlias<"shrd{q}\t{$reg, $mem|$mem, $reg}", (SHRD64mrCL i64mem:$mem, GR64:$reg), 0>;
+
+// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms.
+def : InstAlias<"test{b}\t{$mem, $val|$val, $mem}",
+ (TEST8mr i8mem :$mem, GR8 :$val), 0>;
+def : InstAlias<"test{w}\t{$mem, $val|$val, $mem}",
+ (TEST16mr i16mem:$mem, GR16:$val), 0>;
+def : InstAlias<"test{l}\t{$mem, $val|$val, $mem}",
+ (TEST32mr i32mem:$mem, GR32:$val), 0>;
+def : InstAlias<"test{q}\t{$mem, $val|$val, $mem}",
+ (TEST64mr i64mem:$mem, GR64:$val), 0>;
+
+// xchg: We accept "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as synonyms.
+def : InstAlias<"xchg{b}\t{$mem, $val|$val, $mem}",
+ (XCHG8rm GR8 :$val, i8mem :$mem), 0>;
+def : InstAlias<"xchg{w}\t{$mem, $val|$val, $mem}",
+ (XCHG16rm GR16:$val, i16mem:$mem), 0>;
+def : InstAlias<"xchg{l}\t{$mem, $val|$val, $mem}",
+ (XCHG32rm GR32:$val, i32mem:$mem), 0>;
+def : InstAlias<"xchg{q}\t{$mem, $val|$val, $mem}",
+ (XCHG64rm GR64:$val, i64mem:$mem), 0>;
+
+// xchg: We accept "xchgX <reg>, %eax" and "xchgX %eax, <reg>" as synonyms.
+def : InstAlias<"xchg{w}\t{%ax, $src|$src, ax}", (XCHG16ar GR16:$src), 0>;
+def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", (XCHG32ar GR32:$src), 0>;
+def : InstAlias<"xchg{q}\t{%rax, $src|$src, rax}", (XCHG64ar GR64:$src), 0>;
+
+// In 64-bit mode, xchg %eax, %eax can't be encoded with the 0x90 opcode we
+// would get by default because it's defined as NOP. But xchg %eax, %eax implies
+// implicit zeroing of the upper 32 bits. So alias to the longer encoding.
+def : InstAlias<"xchg{l}\t{%eax, %eax|eax, eax}",
+ (XCHG32rr EAX, EAX), 0>, Requires<[In64BitMode]>;
+
+// xchg %rax, %rax is a nop in x86-64 and can be encoded as such. Without this
+// we emit an unneeded REX.w prefix.
+def : InstAlias<"xchg{q}\t{%rax, %rax|rax, rax}", (NOOP), 0>;
+
+// These aliases exist to get the parser to prioritize matching 8-bit
+// immediate encodings over matching the implicit ax/eax/rax encodings. By
+// explicitly mentioning the A register here, these entries will be ordered
+// first due to the more explicit immediate type.
+def : InstAlias<"adc{w}\t{$imm, %ax|ax, $imm}", (ADC16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"add{w}\t{$imm, %ax|ax, $imm}", (ADD16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"and{w}\t{$imm, %ax|ax, $imm}", (AND16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"cmp{w}\t{$imm, %ax|ax, $imm}", (CMP16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"or{w}\t{$imm, %ax|ax, $imm}", (OR16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"sbb{w}\t{$imm, %ax|ax, $imm}", (SBB16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"sub{w}\t{$imm, %ax|ax, $imm}", (SUB16ri8 AX, i16i8imm:$imm), 0>;
+def : InstAlias<"xor{w}\t{$imm, %ax|ax, $imm}", (XOR16ri8 AX, i16i8imm:$imm), 0>;
+
+def : InstAlias<"adc{l}\t{$imm, %eax|eax, $imm}", (ADC32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"add{l}\t{$imm, %eax|eax, $imm}", (ADD32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"and{l}\t{$imm, %eax|eax, $imm}", (AND32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"cmp{l}\t{$imm, %eax|eax, $imm}", (CMP32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"or{l}\t{$imm, %eax|eax, $imm}", (OR32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"sbb{l}\t{$imm, %eax|eax, $imm}", (SBB32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"sub{l}\t{$imm, %eax|eax, $imm}", (SUB32ri8 EAX, i32i8imm:$imm), 0>;
+def : InstAlias<"xor{l}\t{$imm, %eax|eax, $imm}", (XOR32ri8 EAX, i32i8imm:$imm), 0>;
+
+def : InstAlias<"adc{q}\t{$imm, %rax|rax, $imm}", (ADC64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"add{q}\t{$imm, %rax|rax, $imm}", (ADD64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"and{q}\t{$imm, %rax|rax, $imm}", (AND64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"cmp{q}\t{$imm, %rax|rax, $imm}", (CMP64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"or{q}\t{$imm, %rax|rax, $imm}", (OR64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"sbb{q}\t{$imm, %rax|rax, $imm}", (SBB64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"sub{q}\t{$imm, %rax|rax, $imm}", (SUB64ri8 RAX, i64i8imm:$imm), 0>;
+def : InstAlias<"xor{q}\t{$imm, %rax|rax, $imm}", (XOR64ri8 RAX, i64i8imm:$imm), 0>;
+
+// MMX instr alia
+def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
+ (MMX_MOVQ64rr_REV VR64:$dst, VR64:$src), 0>;
+
+// CMOV SETCC Aliases
+multiclass CMOV_SETCC_Aliases<string Cond, int CC> {
+ def : InstAlias<"cmov"#Cond#"{w}\t{$src, $dst|$dst, $src}",
+ (CMOV16rr GR16:$dst, GR16:$src, CC), 0>;
+ def : InstAlias<"cmov"#Cond#"{w}\t{$src, $dst|$dst, $src}",
+ (CMOV16rm GR16:$dst, i16mem:$src, CC), 0>;
+ def : InstAlias<"cmov"#Cond#"{l}\t{$src, $dst|$dst, $src}",
+ (CMOV32rr GR32:$dst, GR32:$src, CC), 0>;
+ def : InstAlias<"cmov"#Cond#"{l}\t{$src, $dst|$dst, $src}",
+ (CMOV32rm GR32:$dst, i32mem:$src, CC), 0>;
+ def : InstAlias<"cmov"#Cond#"{q}\t{$src, $dst|$dst, $src}",
+ (CMOV64rr GR64:$dst, GR64:$src, CC), 0>;
+ def : InstAlias<"cmov"#Cond#"{q}\t{$src, $dst|$dst, $src}",
+ (CMOV64rm GR64:$dst, i64mem:$src, CC), 0>;
+
+ def : InstAlias<"set"#Cond#"\t$dst", (SETCCr GR8:$dst, CC), 0>;
+ def : InstAlias<"set"#Cond#"\t$dst", (SETCCm i8mem:$dst, CC), 0>;
+}
+
+defm : CMOV_SETCC_Aliases<"o" , 0>;
+defm : CMOV_SETCC_Aliases<"no", 1>;
+defm : CMOV_SETCC_Aliases<"b" , 2>;
+defm : CMOV_SETCC_Aliases<"ae", 3>;
+defm : CMOV_SETCC_Aliases<"e" , 4>;
+defm : CMOV_SETCC_Aliases<"ne", 5>;
+defm : CMOV_SETCC_Aliases<"be", 6>;
+defm : CMOV_SETCC_Aliases<"a" , 7>;
+defm : CMOV_SETCC_Aliases<"s" , 8>;
+defm : CMOV_SETCC_Aliases<"ns", 9>;
+defm : CMOV_SETCC_Aliases<"p" , 10>;
+defm : CMOV_SETCC_Aliases<"np", 11>;
+defm : CMOV_SETCC_Aliases<"l" , 12>;
+defm : CMOV_SETCC_Aliases<"ge", 13>;
+defm : CMOV_SETCC_Aliases<"le", 14>;
+defm : CMOV_SETCC_Aliases<"g" , 15>;
+
+// Condition dump instructions Alias
+def : InstAlias<"jo\t$dst", (JCC_1 brtarget8:$dst, 0), 0>;
+def : InstAlias<"jno\t$dst", (JCC_1 brtarget8:$dst, 1), 0>;
+def : InstAlias<"jb\t$dst", (JCC_1 brtarget8:$dst, 2), 0>;
+def : InstAlias<"jae\t$dst", (JCC_1 brtarget8:$dst, 3), 0>;
+def : InstAlias<"je\t$dst", (JCC_1 brtarget8:$dst, 4), 0>;
+def : InstAlias<"jne\t$dst", (JCC_1 brtarget8:$dst, 5), 0>;
+def : InstAlias<"jbe\t$dst", (JCC_1 brtarget8:$dst, 6), 0>;
+def : InstAlias<"ja\t$dst", (JCC_1 brtarget8:$dst, 7), 0>;
+def : InstAlias<"js\t$dst", (JCC_1 brtarget8:$dst, 8), 0>;
+def : InstAlias<"jns\t$dst", (JCC_1 brtarget8:$dst, 9), 0>;
+def : InstAlias<"jp\t$dst", (JCC_1 brtarget8:$dst, 10), 0>;
+def : InstAlias<"jnp\t$dst", (JCC_1 brtarget8:$dst, 11), 0>;
+def : InstAlias<"jl\t$dst", (JCC_1 brtarget8:$dst, 12), 0>;
+def : InstAlias<"jge\t$dst", (JCC_1 brtarget8:$dst, 13), 0>;
+def : InstAlias<"jle\t$dst", (JCC_1 brtarget8:$dst, 14), 0>;
+def : InstAlias<"jg\t$dst", (JCC_1 brtarget8:$dst, 15), 0>;
+
+// SVM instructions Alias
+def : InstAlias<"skinit\t{%eax|eax}", (SKINIT), 0>;
+def : InstAlias<"vmrun\t{%eax|eax}", (VMRUN32), 0>, Requires<[Not64BitMode]>;
+def : InstAlias<"vmrun\t{%rax|rax}", (VMRUN64), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"vmload\t{%eax|eax}", (VMLOAD32), 0>, Requires<[Not64BitMode]>;
+def : InstAlias<"vmload\t{%rax|rax}", (VMLOAD64), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"vmsave\t{%eax|eax}", (VMSAVE32), 0>, Requires<[Not64BitMode]>;
+def : InstAlias<"vmsave\t{%rax|rax}", (VMSAVE64), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"invlpga\t{%eax, %ecx|eax, ecx}", (INVLPGA32), 0>, Requires<[Not64BitMode]>;
+def : InstAlias<"invlpga\t{%rax, %ecx|rax, ecx}", (INVLPGA64), 0>, Requires<[In64BitMode]>;
+
diff --git a/llvm/lib/Target/X86/X86InstrCMovSetCC.td b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
index 79ac2a2d8019..2e31c05cd687 100644
--- a/llvm/lib/Target/X86/X86InstrCMovSetCC.td
+++ b/llvm/lib/Target/X86/X86InstrCMovSetCC.td
@@ -84,41 +84,6 @@ let Uses = [EFLAGS], isCodeGenOnly = 1, ForceDisassemble = 1 in {
TB, Sched<[WriteSETCCStore]>;
} // Uses = [EFLAGS]
-multiclass CMOV_SETCC_Aliases<string Cond, int CC> {
- def : InstAlias<"cmov"#Cond#"{w}\t{$src, $dst|$dst, $src}",
- (CMOV16rr GR16:$dst, GR16:$src, CC), 0>;
- def : InstAlias<"cmov"#Cond#"{w}\t{$src, $dst|$dst, $src}",
- (CMOV16rm GR16:$dst, i16mem:$src, CC), 0>;
- def : InstAlias<"cmov"#Cond#"{l}\t{$src, $dst|$dst, $src}",
- (CMOV32rr GR32:$dst, GR32:$src, CC), 0>;
- def : InstAlias<"cmov"#Cond#"{l}\t{$src, $dst|$dst, $src}",
- (CMOV32rm GR32:$dst, i32mem:$src, CC), 0>;
- def : InstAlias<"cmov"#Cond#"{q}\t{$src, $dst|$dst, $src}",
- (CMOV64rr GR64:$dst, GR64:$src, CC), 0>;
- def : InstAlias<"cmov"#Cond#"{q}\t{$src, $dst|$dst, $src}",
- (CMOV64rm GR64:$dst, i64mem:$src, CC), 0>;
-
- def : InstAlias<"set"#Cond#"\t$dst", (SETCCr GR8:$dst, CC), 0>;
- def : InstAlias<"set"#Cond#"\t$dst", (SETCCm i8mem:$dst, CC), 0>;
-}
-
-defm : CMOV_SETCC_Aliases<"o" , 0>;
-defm : CMOV_SETCC_Aliases<"no", 1>;
-defm : CMOV_SETCC_Aliases<"b" , 2>;
-defm : CMOV_SETCC_Aliases<"ae", 3>;
-defm : CMOV_SETCC_Aliases<"e" , 4>;
-defm : CMOV_SETCC_Aliases<"ne", 5>;
-defm : CMOV_SETCC_Aliases<"be", 6>;
-defm : CMOV_SETCC_Aliases<"a" , 7>;
-defm : CMOV_SETCC_Aliases<"s" , 8>;
-defm : CMOV_SETCC_Aliases<"ns", 9>;
-defm : CMOV_SETCC_Aliases<"p" , 10>;
-defm : CMOV_SETCC_Aliases<"np", 11>;
-defm : CMOV_SETCC_Aliases<"l" , 12>;
-defm : CMOV_SETCC_Aliases<"ge", 13>;
-defm : CMOV_SETCC_Aliases<"le", 14>;
-defm : CMOV_SETCC_Aliases<"g" , 15>;
-
// SALC is an undocumented instruction. Information for this instruction can be found
// here http://www.rcollins.org/secrets/opcodes/SALC.html
// Set AL if carry.
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index 8fddd0037999..52750937c425 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -896,15 +896,15 @@ multiclass ATOMIC_LOGIC_OP<Format Form, string s> {
multiclass ATOMIC_LOGIC_OP_RM<bits<8> Opc8, string s> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
SchedRW = [WriteBitTestSetRegRMW] in {
- def 16rm : Ii8<Opc8, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ def 16rm : I<Opc8, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
!strconcat(s, "{w}\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (!cast<SDNode>("x86_rm_" # s) addr:$src1, GR16:$src2))]>,
OpSize16, TB, LOCK;
- def 32rm : Ii8<Opc8, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ def 32rm : I<Opc8, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
!strconcat(s, "{l}\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (!cast<SDNode>("x86_rm_" # s) addr:$src1, GR32:$src2))]>,
OpSize32, TB, LOCK;
- def 64rm : RIi8<Opc8, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ def 64rm : RI<Opc8, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
!strconcat(s, "{q}\t{$src2, $src1|$src1, $src2}"),
[(set EFLAGS, (!cast<SDNode>("x86_rm_" # s) addr:$src1, GR64:$src2))]>,
TB, LOCK;
@@ -1225,12 +1225,12 @@ def : Pat<(f64 (bitconvert (i64 (atomic_load_64 addr:$src)))),
// binary size compared to a regular MOV, but it introduces an unnecessary
// load, so is not suitable for regular or optsize functions.
let Predicates = [OptForMinSize] in {
-def : Pat<(simple_store (i16 0), addr:$dst), (AND16mi8 addr:$dst, 0)>;
-def : Pat<(simple_store (i32 0), addr:$dst), (AND32mi8 addr:$dst, 0)>;
-def : Pat<(simple_store (i64 0), addr:$dst), (AND64mi8 addr:$dst, 0)>;
-def : Pat<(simple_store (i16 -1), addr:$dst), (OR16mi8 addr:$dst, -1)>;
-def : Pat<(simple_store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;
-def : Pat<(simple_store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;
+def : Pat<(simple_store (i16 0), addr:$dst), (AND16mi addr:$dst, 0)>;
+def : Pat<(simple_store (i32 0), addr:$dst), (AND32mi addr:$dst, 0)>;
+def : Pat<(simple_store (i64 0), addr:$dst), (AND64mi32 addr:$dst, 0)>;
+def : Pat<(simple_store (i16 -1), addr:$dst), (OR16mi addr:$dst, -1)>;
+def : Pat<(simple_store (i32 -1), addr:$dst), (OR32mi addr:$dst, -1)>;
+def : Pat<(simple_store (i64 -1), addr:$dst), (OR64mi32 addr:$dst, -1)>;
}
// In kernel code model, we can get the address of a label
@@ -1526,35 +1526,16 @@ def ADD64rr_DB : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
[(set GR64:$dst, (or_is_add GR64:$src1, GR64:$src2))]>;
} // isCommutable
-// NOTE: These are order specific, we want the ri8 forms to be listed
-// first so that they are slightly preferred to the ri forms.
-
def ADD8ri_DB : I<0, Pseudo,
(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
"", // orb/addb REG, imm8
[(set GR8:$dst, (or_is_add GR8:$src1, imm:$src2))]>;
-def ADD16ri8_DB : I<0, Pseudo,
- (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
- "", // orw/addw REG, imm8
- [(set GR16:$dst,(or_is_add GR16:$src1,i16immSExt8:$src2))]>;
def ADD16ri_DB : I<0, Pseudo, (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
"", // orw/addw REG, imm
[(set GR16:$dst, (or_is_add GR16:$src1, imm:$src2))]>;
-
-def ADD32ri8_DB : I<0, Pseudo,
- (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
- "", // orl/addl REG, imm8
- [(set GR32:$dst,(or_is_add GR32:$src1,i32immSExt8:$src2))]>;
def ADD32ri_DB : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"", // orl/addl REG, imm
[(set GR32:$dst, (or_is_add GR32:$src1, imm:$src2))]>;
-
-
-def ADD64ri8_DB : I<0, Pseudo,
- (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
- "", // orq/addq REG, imm8
- [(set GR64:$dst, (or_is_add GR64:$src1,
- i64immSExt8:$src2))]>;
def ADD64ri32_DB : I<0, Pseudo,
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
"", // orq/addq REG, imm
@@ -1585,26 +1566,26 @@ def : Pat<(xor GR32:$src1, -2147483648),
// Odd encoding trick: -128 fits into an 8-bit immediate field while
// +128 doesn't, so in this special case use a sub instead of an add.
def : Pat<(add GR16:$src1, 128),
- (SUB16ri8 GR16:$src1, -128)>;
+ (SUB16ri GR16:$src1, -128)>;
def : Pat<(store (add (loadi16 addr:$dst), 128), addr:$dst),
- (SUB16mi8 addr:$dst, -128)>;
+ (SUB16mi addr:$dst, -128)>;
def : Pat<(add GR32:$src1, 128),
- (SUB32ri8 GR32:$src1, -128)>;
+ (SUB32ri GR32:$src1, -128)>;
def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
- (SUB32mi8 addr:$dst, -128)>;
+ (SUB32mi addr:$dst, -128)>;
def : Pat<(add GR64:$src1, 128),
- (SUB64ri8 GR64:$src1, -128)>;
+ (SUB64ri32 GR64:$src1, -128)>;
def : Pat<(store (add (loadi64 addr:$dst), 128), addr:$dst),
- (SUB64mi8 addr:$dst, -128)>;
+ (SUB64mi32 addr:$dst, -128)>;
def : Pat<(X86add_flag_nocf GR16:$src1, 128),
- (SUB16ri8 GR16:$src1, -128)>;
+ (SUB16ri GR16:$src1, -128)>;
def : Pat<(X86add_flag_nocf GR32:$src1, 128),
- (SUB32ri8 GR32:$src1, -128)>;
+ (SUB32ri GR32:$src1, -128)>;
def : Pat<(X86add_flag_nocf GR64:$src1, 128),
- (SUB64ri8 GR64:$src1, -128)>;
+ (SUB64ri32 GR64:$src1, -128)>;
// The same trick applies for 32-bit immediate fields in 64-bit
// instructions.
@@ -1612,7 +1593,6 @@ def : Pat<(add GR64:$src1, 0x0000000080000000),
(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
def : Pat<(store (add (loadi64 addr:$dst), 0x0000000080000000), addr:$dst),
(SUB64mi32 addr:$dst, 0xffffffff80000000)>;
-
def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000),
(SUB64ri32 GR64:$src1, 0xffffffff80000000)>;
@@ -1625,14 +1605,6 @@ def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000),
// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.
let AddedComplexity = 1 in {
-def : Pat<(and GR64:$src, i64immZExt32SExt8:$imm),
- (SUBREG_TO_REG
- (i64 0),
- (AND32ri8
- (EXTRACT_SUBREG GR64:$src, sub_32bit),
- (i32 (GetLo32XForm imm:$imm))),
- sub_32bit)>;
-
def : Pat<(and GR64:$src, i64immZExt32:$imm),
(SUBREG_TO_REG
(i64 0),
@@ -1680,12 +1652,12 @@ def : Pat<(and GR64:$src, 0xff),
def BTRXForm : SDNodeXForm<imm, [{
// Transformation function: Find the lowest 0.
- return getI64Imm((uint8_t)N->getAPIntValue().countTrailingOnes(), SDLoc(N));
+ return getI64Imm((uint8_t)N->getAPIntValue().countr_one(), SDLoc(N));
}]>;
def BTCBTSXForm : SDNodeXForm<imm, [{
// Transformation function: Find the lowest 1.
- return getI64Imm((uint8_t)N->getAPIntValue().countTrailingZeros(), SDLoc(N));
+ return getI64Imm((uint8_t)N->getAPIntValue().countr_zero(), SDLoc(N));
}]>;
def BTRMask64 : ImmLeaf<i64, [{
@@ -2057,14 +2029,7 @@ def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(add GR16:$src1, i16immSExt8:$src2),
- (ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(add GR32:$src1, i32immSExt8:$src2),
- (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(add GR64:$src1, i64immSExt8:$src2),
- (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(add GR64:$src1, i64immSExt32:$src2),
- (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+def : Pat<(add GR64:$src1, i64immSExt32:$src2), (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
// sub reg, reg
def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
@@ -2089,12 +2054,6 @@ def : Pat<(sub GR16:$src1, imm:$src2),
(SUB16ri GR16:$src1, imm:$src2)>;
def : Pat<(sub GR32:$src1, imm:$src2),
(SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
- (SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
- (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
- (SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
(SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
@@ -2125,12 +2084,6 @@ def : Pat<(mul GR16:$src1, imm:$src2),
(IMUL16rri GR16:$src1, imm:$src2)>;
def : Pat<(mul GR32:$src1, imm:$src2),
(IMUL32rri GR32:$src1, imm:$src2)>;
-def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
- (IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
- (IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
- (IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
(IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
@@ -2139,12 +2092,6 @@ def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
(IMUL16rmi addr:$src1, imm:$src2)>;
def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
(IMUL32rmi addr:$src1, imm:$src2)>;
-def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
- (IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
-def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
- (IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
-def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
- (IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
@@ -2190,12 +2137,6 @@ def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(or GR16:$src1, i16immSExt8:$src2),
- (OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(or GR32:$src1, i32immSExt8:$src2),
- (OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(or GR64:$src1, i64immSExt8:$src2),
- (OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(or GR64:$src1, i64immSExt32:$src2),
(OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
@@ -2222,12 +2163,6 @@ def : Pat<(xor GR16:$src1, imm:$src2),
(XOR16ri GR16:$src1, imm:$src2)>;
def : Pat<(xor GR32:$src1, imm:$src2),
(XOR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
- (XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
- (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
- (XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
(XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
@@ -2254,12 +2189,6 @@ def : Pat<(and GR16:$src1, imm:$src2),
(AND16ri GR16:$src1, imm:$src2)>;
def : Pat<(and GR32:$src1, imm:$src2),
(AND32ri GR32:$src1, imm:$src2)>;
-def : Pat<(and GR16:$src1, i16immSExt8:$src2),
- (AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(and GR32:$src1, i32immSExt8:$src2),
- (AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(and GR64:$src1, i64immSExt8:$src2),
- (AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
def : Pat<(and GR64:$src1, i64immSExt32:$src2),
(AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
diff --git a/llvm/lib/Target/X86/X86InstrControl.td b/llvm/lib/Target/X86/X86InstrControl.td
index aa89a6f0ff9d..fd996603476d 100644
--- a/llvm/lib/Target/X86/X86InstrControl.td
+++ b/llvm/lib/Target/X86/X86InstrControl.td
@@ -16,7 +16,7 @@
// Return instructions.
//
-// The X86retflag return instructions are variadic because we may add ST0 and
+// The X86retglue return instructions are variadic because we may add ST0 and
// ST1 arguments when returning values on the x87 stack.
let isTerminator = 1, isReturn = 1, isBarrier = 1,
hasCtrlDep = 1, FPForm = SpecialFP, SchedRW = [WriteJumpLd] in {
@@ -54,7 +54,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1,
def IRET64 : RI <0xcf, RawFrm, (outs), (ins), "iretq", []>, Requires<[In64BitMode]>;
let isCodeGenOnly = 1 in
def IRET : PseudoI<(outs), (ins i32imm:$adj), [(X86iret timm:$adj)]>;
- def RET : PseudoI<(outs), (ins i32imm:$adj, variable_ops), [(X86retflag timm:$adj)]>;
+ def RET : PseudoI<(outs), (ins i32imm:$adj, variable_ops), [(X86retglue timm:$adj)]>;
}
// Unconditional branches.
@@ -88,23 +88,6 @@ let isBranch = 1, isTerminator = 1, Uses = [EFLAGS], SchedRW = [WriteJump],
}
}
-def : InstAlias<"jo\t$dst", (JCC_1 brtarget8:$dst, 0), 0>;
-def : InstAlias<"jno\t$dst", (JCC_1 brtarget8:$dst, 1), 0>;
-def : InstAlias<"jb\t$dst", (JCC_1 brtarget8:$dst, 2), 0>;
-def : InstAlias<"jae\t$dst", (JCC_1 brtarget8:$dst, 3), 0>;
-def : InstAlias<"je\t$dst", (JCC_1 brtarget8:$dst, 4), 0>;
-def : InstAlias<"jne\t$dst", (JCC_1 brtarget8:$dst, 5), 0>;
-def : InstAlias<"jbe\t$dst", (JCC_1 brtarget8:$dst, 6), 0>;
-def : InstAlias<"ja\t$dst", (JCC_1 brtarget8:$dst, 7), 0>;
-def : InstAlias<"js\t$dst", (JCC_1 brtarget8:$dst, 8), 0>;
-def : InstAlias<"jns\t$dst", (JCC_1 brtarget8:$dst, 9), 0>;
-def : InstAlias<"jp\t$dst", (JCC_1 brtarget8:$dst, 10), 0>;
-def : InstAlias<"jnp\t$dst", (JCC_1 brtarget8:$dst, 11), 0>;
-def : InstAlias<"jl\t$dst", (JCC_1 brtarget8:$dst, 12), 0>;
-def : InstAlias<"jge\t$dst", (JCC_1 brtarget8:$dst, 13), 0>;
-def : InstAlias<"jle\t$dst", (JCC_1 brtarget8:$dst, 14), 0>;
-def : InstAlias<"jg\t$dst", (JCC_1 brtarget8:$dst, 15), 0>;
-
// jcx/jecx/jrcx instructions.
let isBranch = 1, isTerminator = 1, hasSideEffects = 0, SchedRW = [WriteJump] in {
// These are the 32-bit versions of this instruction for the asmparser. In
@@ -229,7 +212,7 @@ let isCall = 1 in
def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
(outs), (ins i16imm_brtarget:$dst),
"call{w}\t$dst", []>, OpSize16,
- Sched<[WriteJump]>;
+ Requires<[Not64BitMode]>, Sched<[WriteJump]>;
def CALL16r : I<0xFF, MRM2r, (outs), (ins GR16:$dst),
"call{w}\t{*}$dst", [(X86call GR16:$dst)]>,
OpSize16, Requires<[Not64BitMode]>, Sched<[WriteJump]>;
@@ -290,9 +273,9 @@ let isCall = 1 in
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1, Uses = [ESP, SSP] in {
def TCRETURNdi : PseudoI<(outs), (ins i32imm_brtarget:$dst, i32imm:$offset),
- []>, Sched<[WriteJump]>, NotMemoryFoldable;
+ []>, Sched<[WriteJump]>;
def TCRETURNri : PseudoI<(outs), (ins ptr_rc_tailcall:$dst, i32imm:$offset),
- []>, Sched<[WriteJump]>, NotMemoryFoldable;
+ []>, Sched<[WriteJump]>;
let mayLoad = 1 in
def TCRETURNmi : PseudoI<(outs), (ins i32mem_TC:$dst, i32imm:$offset),
[]>, Sched<[WriteJumpLd]>;
@@ -367,11 +350,11 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
[]>, Sched<[WriteJump]>;
def TCRETURNri64 : PseudoI<(outs),
(ins ptr_rc_tailcall:$dst, i32imm:$offset),
- []>, Sched<[WriteJump]>, NotMemoryFoldable;
+ []>, Sched<[WriteJump]>;
let mayLoad = 1 in
def TCRETURNmi64 : PseudoI<(outs),
(ins i64mem_TC:$dst, i32imm:$offset),
- []>, Sched<[WriteJumpLd]>, NotMemoryFoldable;
+ []>, Sched<[WriteJumpLd]>;
def TAILJMPd64 : PseudoI<(outs), (ins i64i32imm_brtarget:$dst),
[]>, Sched<[WriteJump]>;
diff --git a/llvm/lib/Target/X86/X86InstrExtension.td b/llvm/lib/Target/X86/X86InstrExtension.td
index 8d3fce7f55bc..46554dfc5167 100644
--- a/llvm/lib/Target/X86/X86InstrExtension.td
+++ b/llvm/lib/Target/X86/X86InstrExtension.td
@@ -93,17 +93,17 @@ def MOVZX32rm16: I<0xB7, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def MOVSX16rr16: I<0xBF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"movs{ww|x}\t{$src, $dst|$dst, $src}",
- []>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
+ []>, TB, OpSize16, Sched<[WriteALU]>;
def MOVZX16rr16: I<0xB7, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"movz{ww|x}\t{$src, $dst|$dst, $src}",
- []>, TB, OpSize16, Sched<[WriteALU]>, NotMemoryFoldable;
+ []>, TB, OpSize16, Sched<[WriteALU]>;
let mayLoad = 1 in {
def MOVSX16rm16: I<0xBF, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movs{ww|x}\t{$src, $dst|$dst, $src}",
- []>, OpSize16, TB, Sched<[WriteLoad]>, NotMemoryFoldable;
+ []>, OpSize16, TB, Sched<[WriteLoad]>;
def MOVZX16rm16: I<0xB7, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"movz{ww|x}\t{$src, $dst|$dst, $src}",
- []>, TB, OpSize16, Sched<[WriteLoad]>, NotMemoryFoldable;
+ []>, TB, OpSize16, Sched<[WriteLoad]>;
} // mayLoad = 1
} // isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0
diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td
index 1f92293fa73f..03e1225ad9a0 100644
--- a/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/llvm/lib/Target/X86/X86InstrFMA.td
@@ -139,16 +139,16 @@ let ExeDomain = SSEPackedSingle in {
let ExeDomain = SSEPackedDouble in {
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD",
loadv2f64, loadv4f64, any_fma, v2f64,
- v4f64, SchedWriteFMA>, VEX_W;
+ v4f64, SchedWriteFMA>, REX_W;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD",
loadv2f64, loadv4f64, X86any_Fmsub, v2f64,
- v4f64, SchedWriteFMA>, VEX_W;
+ v4f64, SchedWriteFMA>, REX_W;
defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", "PD",
loadv2f64, loadv4f64, X86Fmaddsub,
- v2f64, v4f64, SchedWriteFMA>, VEX_W;
+ v2f64, v4f64, SchedWriteFMA>, REX_W;
defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", "PD",
loadv2f64, loadv4f64, X86Fmsubadd,
- v2f64, v4f64, SchedWriteFMA>, VEX_W;
+ v2f64, v4f64, SchedWriteFMA>, REX_W;
}
// Fused Negative Multiply-Add
@@ -160,9 +160,9 @@ let ExeDomain = SSEPackedSingle in {
}
let ExeDomain = SSEPackedDouble in {
defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", "PD", loadv2f64,
- loadv4f64, X86any_Fnmadd, v2f64, v4f64, SchedWriteFMA>, VEX_W;
+ loadv4f64, X86any_Fnmadd, v2f64, v4f64, SchedWriteFMA>, REX_W;
defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", "PD", loadv2f64,
- loadv4f64, X86any_Fnmsub, v2f64, v4f64, SchedWriteFMA>, VEX_W;
+ loadv4f64, X86any_Fnmsub, v2f64, v4f64, SchedWriteFMA>, REX_W;
}
// All source register operands of FMA opcodes defined in fma3s_rm multiclass
@@ -316,7 +316,7 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "SD", OpNode,
FR64, f64mem, sched>,
fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", "SD",
- VR128, sdmem, sched>, VEX_W;
+ VR128, sdmem, sched>, REX_W;
}
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", any_fma,
@@ -396,14 +396,14 @@ multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst,
- (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG,
+ (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, REX_W, VEX_LIG,
Sched<[sched]>;
def rm : FMA4S<opc, MRMSrcMemOp4, (outs RC:$dst),
(ins RC:$src1, RC:$src2, x86memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set RC:$dst, (OpNode RC:$src1, RC:$src2,
- (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG,
+ (mem_frag addr:$src3)))]>, REX_W, VEX_LIG,
Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def mr : FMA4S<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
@@ -423,7 +423,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
- VEX_LIG, FoldGenData<NAME#rr>, Sched<[sched]>;
+ VEX_LIG, Sched<[sched]>;
}
multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
@@ -434,13 +434,13 @@ let isCodeGenOnly = 1, hasSideEffects = 0,
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, VEX_W, VEX_LIG, Sched<[sched]>;
+ []>, REX_W, VEX_LIG, Sched<[sched]>;
let mayLoad = 1 in
def rm_Int : FMA4S_Int<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, memop:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, VEX_W, VEX_LIG,
+ []>, REX_W, VEX_LIG,
Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
let mayLoad = 1 in
def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst),
@@ -458,7 +458,7 @@ let isCodeGenOnly = 1, hasSideEffects = 0,
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, VEX_LIG, FoldGenData<NAME#rr_Int>, Sched<[sched]>;
+ []>, VEX_LIG, Sched<[sched]>;
} // isCodeGenOnly = 1
}
@@ -474,13 +474,13 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst,
(OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>,
- VEX_W, Sched<[sched.XMM]>;
+ REX_W, Sched<[sched.XMM]>;
def rm : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, f128mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
- (ld_frag128 addr:$src3)))]>, VEX_W,
+ (ld_frag128 addr:$src3)))]>, REX_W,
Sched<[sched.XMM.Folded, sched.XMM.ReadAfterFold, sched.XMM.ReadAfterFold]>;
def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, f128mem:$src2, VR128:$src3),
@@ -501,13 +501,13 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst,
(OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>,
- VEX_W, VEX_L, Sched<[sched.YMM]>;
+ REX_W, VEX_L, Sched<[sched.YMM]>;
def Yrm : FMA4<opc, MRMSrcMemOp4, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, f256mem:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
- (ld_frag256 addr:$src3)))]>, VEX_W, VEX_L,
+ (ld_frag256 addr:$src3)))]>, REX_W, VEX_L,
Sched<[sched.YMM.Folded, sched.YMM.ReadAfterFold, sched.YMM.ReadAfterFold]>;
def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, f256mem:$src2, VR256:$src3),
@@ -527,12 +527,12 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
- Sched<[sched.XMM]>, FoldGenData<NAME#rr>;
+ Sched<[sched.XMM]>;
def Yrr_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, VR256:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
- VEX_L, Sched<[sched.YMM]>, FoldGenData<NAME#Yrr>;
+ VEX_L, Sched<[sched.YMM]>;
} // isCodeGenOnly = 1
}
diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td
index a68d61043c5c..66a2d27abf86 100644
--- a/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -25,6 +25,14 @@ def SDTX86Fist : SDTypeProfile<0, 2, [SDTCisFP<0>, SDTCisPtrTy<1>]>;
def SDTX86CwdStore : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
def SDTX86CwdLoad : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDTX86FPEnv : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
+def X86fp80_add : SDNode<"X86ISD::FP80_ADD", SDTFPBinOp, [SDNPCommutative]>;
+def X86strict_fp80_add : SDNode<"X86ISD::STRICT_FP80_ADD", SDTFPBinOp,
+ [SDNPHasChain,SDNPCommutative]>;
+def any_X86fp80_add : PatFrags<(ops node:$lhs, node:$rhs),
+ [(X86strict_fp80_add node:$lhs, node:$rhs),
+ (X86fp80_add node:$lhs, node:$rhs)]>;
def X86fld : SDNode<"X86ISD::FLD", SDTX86Fld,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
@@ -42,6 +50,12 @@ def X86fp_cwd_get16 : SDNode<"X86ISD::FNSTCW16m", SDTX86CwdStore,
def X86fp_cwd_set16 : SDNode<"X86ISD::FLDCW16m", SDTX86CwdLoad,
[SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
SDNPMemOperand]>;
+def X86fpenv_get : SDNode<"X86ISD::FNSTENVm", SDTX86FPEnv,
+ [SDNPHasChain, SDNPMayStore, SDNPSideEffect,
+ SDNPMemOperand]>;
+def X86fpenv_set : SDNode<"X86ISD::FLDENVm", SDTX86FPEnv,
+ [SDNPHasChain, SDNPMayLoad, SDNPSideEffect,
+ SDNPMemOperand]>;
def X86fstf32 : PatFrag<(ops node:$val, node:$ptr),
(X86fst node:$val, node:$ptr), [{
@@ -141,6 +155,14 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [EFLAGS] in {
[(X86fp_to_i32mem RFP80:$src, addr:$dst)]>;
def FP80_TO_INT64_IN_MEM : PseudoI<(outs), (ins i64mem:$dst, RFP80:$src),
[(X86fp_to_i64mem RFP80:$src, addr:$dst)]>;
+
+ def FP80_ADDr : PseudoI<(outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2),
+ [(set RFP80:$dst,
+ (any_X86fp80_add RFP80:$src1, RFP80:$src2))]>;
+ def FP80_ADDm32 : PseudoI<(outs RFP80:$dst), (ins RFP80:$src1, f32mem:$src2),
+ [(set RFP80:$dst,
+ (any_X86fp80_add RFP80:$src1,
+ (f80 (extloadf32 addr:$src2))))]>;
}
// All FP Stack operations are represented with four instructions here. The
@@ -403,13 +425,17 @@ def FICOMP32m: FPI<0xDA, MRM3m, (outs), (ins i32mem:$src), "ficomp{l}\t$src">;
let SchedRW = [WriteMicrocoded] in {
let Defs = [FPSW, FPCW], mayLoad = 1 in {
-def FLDENVm : FPI<0xD9, MRM4m, (outs), (ins anymem:$src), "fldenv\t$src">;
def FRSTORm : FPI<0xDD, MRM4m, (outs), (ins anymem:$src), "frstor\t$src">;
+let Predicates = [HasX87] in
+def FLDENVm : I<0xD9, MRM4m, (outs), (ins anymem:$src), "fldenv\t$src",
+ [(X86fpenv_set addr:$src)]>;
}
let Defs = [FPSW, FPCW], Uses = [FPSW, FPCW], mayStore = 1 in {
-def FSTENVm : FPI<0xD9, MRM6m, (outs), (ins anymem:$dst), "fnstenv\t$dst">;
def FSAVEm : FPI<0xDD, MRM6m, (outs), (ins anymem:$dst), "fnsave\t$dst">;
+let Predicates = [HasX87] in
+def FSTENVm : I<0xD9, MRM6m, (outs), (ins anymem:$dst), "fnstenv\t$dst",
+ [(X86fpenv_get addr:$dst)]>;
}
let Uses = [FPSW], mayStore = 1 in
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.cpp b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
index 4abce1fcae5a..e1feca25469b 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.cpp
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.cpp
@@ -22,5749 +22,7 @@ using namespace llvm;
// searched at runtime without the need for additional storage. The enum values
// are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which
// makes sorting these tables a simple matter of alphabetizing the table.
-//
-// We also have a tablegen emitter that tries to autogenerate these tables
-// by comparing encoding information. This can be enabled by passing
-// X86_GEN_FOLD_TABLES=ON to cmake which fill produce X86GenFoldTables.inc
-// in the build area. There are currently some bugs in the autogenerated table
-// that require a manual review to copy them from the autogenerated table into
-// this table. It is unclear if we will ever be able to fully automate this
-// because as new instruction are added into holes in the X86 opcode map they
-// potentially pair up with old instructions and create new entries in the
-// tables that would be incorrect. The manual review process allows us a chance
-// to catch these before they become observable bugs.
-static const X86MemoryFoldTableEntry MemoryFoldTable2Addr[] = {
- { X86::ADD16ri8_DB, X86::ADD16mi8, TB_NO_REVERSE },
- { X86::ADD16ri_DB, X86::ADD16mi, TB_NO_REVERSE },
- { X86::ADD16rr_DB, X86::ADD16mr, TB_NO_REVERSE },
- { X86::ADD32ri8_DB, X86::ADD32mi8, TB_NO_REVERSE },
- { X86::ADD32ri_DB, X86::ADD32mi, TB_NO_REVERSE },
- { X86::ADD32rr_DB, X86::ADD32mr, TB_NO_REVERSE },
- { X86::ADD64ri32_DB,X86::ADD64mi32, TB_NO_REVERSE },
- { X86::ADD64ri8_DB, X86::ADD64mi8, TB_NO_REVERSE },
- { X86::ADD64rr_DB, X86::ADD64mr, TB_NO_REVERSE },
- { X86::ADD8ri_DB, X86::ADD8mi, TB_NO_REVERSE },
- { X86::ADD8rr_DB, X86::ADD8mr, TB_NO_REVERSE },
- { X86::ADC16ri, X86::ADC16mi, 0 },
- { X86::ADC16ri8, X86::ADC16mi8, 0 },
- { X86::ADC16rr, X86::ADC16mr, 0 },
- { X86::ADC32ri, X86::ADC32mi, 0 },
- { X86::ADC32ri8, X86::ADC32mi8, 0 },
- { X86::ADC32rr, X86::ADC32mr, 0 },
- { X86::ADC64ri32, X86::ADC64mi32, 0 },
- { X86::ADC64ri8, X86::ADC64mi8, 0 },
- { X86::ADC64rr, X86::ADC64mr, 0 },
- { X86::ADC8ri, X86::ADC8mi, 0 },
- { X86::ADC8ri8, X86::ADC8mi8, 0 },
- { X86::ADC8rr, X86::ADC8mr, 0 },
- { X86::ADD16ri, X86::ADD16mi, 0 },
- { X86::ADD16ri8, X86::ADD16mi8, 0 },
- { X86::ADD16rr, X86::ADD16mr, 0 },
- { X86::ADD32ri, X86::ADD32mi, 0 },
- { X86::ADD32ri8, X86::ADD32mi8, 0 },
- { X86::ADD32rr, X86::ADD32mr, 0 },
- { X86::ADD64ri32, X86::ADD64mi32, 0 },
- { X86::ADD64ri8, X86::ADD64mi8, 0 },
- { X86::ADD64rr, X86::ADD64mr, 0 },
- { X86::ADD8ri, X86::ADD8mi, 0 },
- { X86::ADD8ri8, X86::ADD8mi8, 0 },
- { X86::ADD8rr, X86::ADD8mr, 0 },
- { X86::AND16ri, X86::AND16mi, 0 },
- { X86::AND16ri8, X86::AND16mi8, 0 },
- { X86::AND16rr, X86::AND16mr, 0 },
- { X86::AND32ri, X86::AND32mi, 0 },
- { X86::AND32ri8, X86::AND32mi8, 0 },
- { X86::AND32rr, X86::AND32mr, 0 },
- { X86::AND64ri32, X86::AND64mi32, 0 },
- { X86::AND64ri8, X86::AND64mi8, 0 },
- { X86::AND64rr, X86::AND64mr, 0 },
- { X86::AND8ri, X86::AND8mi, 0 },
- { X86::AND8ri8, X86::AND8mi8, 0 },
- { X86::AND8rr, X86::AND8mr, 0 },
- { X86::BTC16ri8, X86::BTC16mi8, 0 },
- { X86::BTC32ri8, X86::BTC32mi8, 0 },
- { X86::BTC64ri8, X86::BTC64mi8, 0 },
- { X86::BTR16ri8, X86::BTR16mi8, 0 },
- { X86::BTR32ri8, X86::BTR32mi8, 0 },
- { X86::BTR64ri8, X86::BTR64mi8, 0 },
- { X86::BTS16ri8, X86::BTS16mi8, 0 },
- { X86::BTS32ri8, X86::BTS32mi8, 0 },
- { X86::BTS64ri8, X86::BTS64mi8, 0 },
- { X86::DEC16r, X86::DEC16m, 0 },
- { X86::DEC32r, X86::DEC32m, 0 },
- { X86::DEC64r, X86::DEC64m, 0 },
- { X86::DEC8r, X86::DEC8m, 0 },
- { X86::INC16r, X86::INC16m, 0 },
- { X86::INC32r, X86::INC32m, 0 },
- { X86::INC64r, X86::INC64m, 0 },
- { X86::INC8r, X86::INC8m, 0 },
- { X86::NEG16r, X86::NEG16m, 0 },
- { X86::NEG32r, X86::NEG32m, 0 },
- { X86::NEG64r, X86::NEG64m, 0 },
- { X86::NEG8r, X86::NEG8m, 0 },
- { X86::NOT16r, X86::NOT16m, 0 },
- { X86::NOT32r, X86::NOT32m, 0 },
- { X86::NOT64r, X86::NOT64m, 0 },
- { X86::NOT8r, X86::NOT8m, 0 },
- { X86::OR16ri, X86::OR16mi, 0 },
- { X86::OR16ri8, X86::OR16mi8, 0 },
- { X86::OR16rr, X86::OR16mr, 0 },
- { X86::OR32ri, X86::OR32mi, 0 },
- { X86::OR32ri8, X86::OR32mi8, 0 },
- { X86::OR32rr, X86::OR32mr, 0 },
- { X86::OR64ri32, X86::OR64mi32, 0 },
- { X86::OR64ri8, X86::OR64mi8, 0 },
- { X86::OR64rr, X86::OR64mr, 0 },
- { X86::OR8ri, X86::OR8mi, 0 },
- { X86::OR8ri8, X86::OR8mi8, 0 },
- { X86::OR8rr, X86::OR8mr, 0 },
- { X86::RCL16r1, X86::RCL16m1, 0 },
- { X86::RCL16rCL, X86::RCL16mCL, 0 },
- { X86::RCL16ri, X86::RCL16mi, 0 },
- { X86::RCL32r1, X86::RCL32m1, 0 },
- { X86::RCL32rCL, X86::RCL32mCL, 0 },
- { X86::RCL32ri, X86::RCL32mi, 0 },
- { X86::RCL64r1, X86::RCL64m1, 0 },
- { X86::RCL64rCL, X86::RCL64mCL, 0 },
- { X86::RCL64ri, X86::RCL64mi, 0 },
- { X86::RCL8r1, X86::RCL8m1, 0 },
- { X86::RCL8rCL, X86::RCL8mCL, 0 },
- { X86::RCL8ri, X86::RCL8mi, 0 },
- { X86::RCR16r1, X86::RCR16m1, 0 },
- { X86::RCR16rCL, X86::RCR16mCL, 0 },
- { X86::RCR16ri, X86::RCR16mi, 0 },
- { X86::RCR32r1, X86::RCR32m1, 0 },
- { X86::RCR32rCL, X86::RCR32mCL, 0 },
- { X86::RCR32ri, X86::RCR32mi, 0 },
- { X86::RCR64r1, X86::RCR64m1, 0 },
- { X86::RCR64rCL, X86::RCR64mCL, 0 },
- { X86::RCR64ri, X86::RCR64mi, 0 },
- { X86::RCR8r1, X86::RCR8m1, 0 },
- { X86::RCR8rCL, X86::RCR8mCL, 0 },
- { X86::RCR8ri, X86::RCR8mi, 0 },
- { X86::ROL16r1, X86::ROL16m1, 0 },
- { X86::ROL16rCL, X86::ROL16mCL, 0 },
- { X86::ROL16ri, X86::ROL16mi, 0 },
- { X86::ROL32r1, X86::ROL32m1, 0 },
- { X86::ROL32rCL, X86::ROL32mCL, 0 },
- { X86::ROL32ri, X86::ROL32mi, 0 },
- { X86::ROL64r1, X86::ROL64m1, 0 },
- { X86::ROL64rCL, X86::ROL64mCL, 0 },
- { X86::ROL64ri, X86::ROL64mi, 0 },
- { X86::ROL8r1, X86::ROL8m1, 0 },
- { X86::ROL8rCL, X86::ROL8mCL, 0 },
- { X86::ROL8ri, X86::ROL8mi, 0 },
- { X86::ROR16r1, X86::ROR16m1, 0 },
- { X86::ROR16rCL, X86::ROR16mCL, 0 },
- { X86::ROR16ri, X86::ROR16mi, 0 },
- { X86::ROR32r1, X86::ROR32m1, 0 },
- { X86::ROR32rCL, X86::ROR32mCL, 0 },
- { X86::ROR32ri, X86::ROR32mi, 0 },
- { X86::ROR64r1, X86::ROR64m1, 0 },
- { X86::ROR64rCL, X86::ROR64mCL, 0 },
- { X86::ROR64ri, X86::ROR64mi, 0 },
- { X86::ROR8r1, X86::ROR8m1, 0 },
- { X86::ROR8rCL, X86::ROR8mCL, 0 },
- { X86::ROR8ri, X86::ROR8mi, 0 },
- { X86::SAR16r1, X86::SAR16m1, 0 },
- { X86::SAR16rCL, X86::SAR16mCL, 0 },
- { X86::SAR16ri, X86::SAR16mi, 0 },
- { X86::SAR32r1, X86::SAR32m1, 0 },
- { X86::SAR32rCL, X86::SAR32mCL, 0 },
- { X86::SAR32ri, X86::SAR32mi, 0 },
- { X86::SAR64r1, X86::SAR64m1, 0 },
- { X86::SAR64rCL, X86::SAR64mCL, 0 },
- { X86::SAR64ri, X86::SAR64mi, 0 },
- { X86::SAR8r1, X86::SAR8m1, 0 },
- { X86::SAR8rCL, X86::SAR8mCL, 0 },
- { X86::SAR8ri, X86::SAR8mi, 0 },
- { X86::SBB16ri, X86::SBB16mi, 0 },
- { X86::SBB16ri8, X86::SBB16mi8, 0 },
- { X86::SBB16rr, X86::SBB16mr, 0 },
- { X86::SBB32ri, X86::SBB32mi, 0 },
- { X86::SBB32ri8, X86::SBB32mi8, 0 },
- { X86::SBB32rr, X86::SBB32mr, 0 },
- { X86::SBB64ri32, X86::SBB64mi32, 0 },
- { X86::SBB64ri8, X86::SBB64mi8, 0 },
- { X86::SBB64rr, X86::SBB64mr, 0 },
- { X86::SBB8ri, X86::SBB8mi, 0 },
- { X86::SBB8ri8, X86::SBB8mi8, 0 },
- { X86::SBB8rr, X86::SBB8mr, 0 },
- { X86::SHL16r1, X86::SHL16m1, 0 },
- { X86::SHL16rCL, X86::SHL16mCL, 0 },
- { X86::SHL16ri, X86::SHL16mi, 0 },
- { X86::SHL32r1, X86::SHL32m1, 0 },
- { X86::SHL32rCL, X86::SHL32mCL, 0 },
- { X86::SHL32ri, X86::SHL32mi, 0 },
- { X86::SHL64r1, X86::SHL64m1, 0 },
- { X86::SHL64rCL, X86::SHL64mCL, 0 },
- { X86::SHL64ri, X86::SHL64mi, 0 },
- { X86::SHL8r1, X86::SHL8m1, 0 },
- { X86::SHL8rCL, X86::SHL8mCL, 0 },
- { X86::SHL8ri, X86::SHL8mi, 0 },
- { X86::SHLD16rrCL, X86::SHLD16mrCL, 0 },
- { X86::SHLD16rri8, X86::SHLD16mri8, 0 },
- { X86::SHLD32rrCL, X86::SHLD32mrCL, 0 },
- { X86::SHLD32rri8, X86::SHLD32mri8, 0 },
- { X86::SHLD64rrCL, X86::SHLD64mrCL, 0 },
- { X86::SHLD64rri8, X86::SHLD64mri8, 0 },
- { X86::SHR16r1, X86::SHR16m1, 0 },
- { X86::SHR16rCL, X86::SHR16mCL, 0 },
- { X86::SHR16ri, X86::SHR16mi, 0 },
- { X86::SHR32r1, X86::SHR32m1, 0 },
- { X86::SHR32rCL, X86::SHR32mCL, 0 },
- { X86::SHR32ri, X86::SHR32mi, 0 },
- { X86::SHR64r1, X86::SHR64m1, 0 },
- { X86::SHR64rCL, X86::SHR64mCL, 0 },
- { X86::SHR64ri, X86::SHR64mi, 0 },
- { X86::SHR8r1, X86::SHR8m1, 0 },
- { X86::SHR8rCL, X86::SHR8mCL, 0 },
- { X86::SHR8ri, X86::SHR8mi, 0 },
- { X86::SHRD16rrCL, X86::SHRD16mrCL, 0 },
- { X86::SHRD16rri8, X86::SHRD16mri8, 0 },
- { X86::SHRD32rrCL, X86::SHRD32mrCL, 0 },
- { X86::SHRD32rri8, X86::SHRD32mri8, 0 },
- { X86::SHRD64rrCL, X86::SHRD64mrCL, 0 },
- { X86::SHRD64rri8, X86::SHRD64mri8, 0 },
- { X86::SUB16ri, X86::SUB16mi, 0 },
- { X86::SUB16ri8, X86::SUB16mi8, 0 },
- { X86::SUB16rr, X86::SUB16mr, 0 },
- { X86::SUB32ri, X86::SUB32mi, 0 },
- { X86::SUB32ri8, X86::SUB32mi8, 0 },
- { X86::SUB32rr, X86::SUB32mr, 0 },
- { X86::SUB64ri32, X86::SUB64mi32, 0 },
- { X86::SUB64ri8, X86::SUB64mi8, 0 },
- { X86::SUB64rr, X86::SUB64mr, 0 },
- { X86::SUB8ri, X86::SUB8mi, 0 },
- { X86::SUB8ri8, X86::SUB8mi8, 0 },
- { X86::SUB8rr, X86::SUB8mr, 0 },
- { X86::XOR16ri, X86::XOR16mi, 0 },
- { X86::XOR16ri8, X86::XOR16mi8, 0 },
- { X86::XOR16rr, X86::XOR16mr, 0 },
- { X86::XOR32ri, X86::XOR32mi, 0 },
- { X86::XOR32ri8, X86::XOR32mi8, 0 },
- { X86::XOR32rr, X86::XOR32mr, 0 },
- { X86::XOR64ri32, X86::XOR64mi32, 0 },
- { X86::XOR64ri8, X86::XOR64mi8, 0 },
- { X86::XOR64rr, X86::XOR64mr, 0 },
- { X86::XOR8ri, X86::XOR8mi, 0 },
- { X86::XOR8ri8, X86::XOR8mi8, 0 },
- { X86::XOR8rr, X86::XOR8mr, 0 },
-};
-
-static const X86MemoryFoldTableEntry MemoryFoldTable0[] = {
- { X86::BT16ri8, X86::BT16mi8, TB_FOLDED_LOAD },
- { X86::BT32ri8, X86::BT32mi8, TB_FOLDED_LOAD },
- { X86::BT64ri8, X86::BT64mi8, TB_FOLDED_LOAD },
- { X86::CALL16r, X86::CALL16m, TB_FOLDED_LOAD },
- { X86::CALL16r_NT, X86::CALL16m_NT, TB_FOLDED_LOAD },
- { X86::CALL32r, X86::CALL32m, TB_FOLDED_LOAD },
- { X86::CALL32r_NT, X86::CALL32m_NT, TB_FOLDED_LOAD },
- { X86::CALL64r, X86::CALL64m, TB_FOLDED_LOAD },
- { X86::CALL64r_NT, X86::CALL64m_NT, TB_FOLDED_LOAD },
- { X86::CMP16ri, X86::CMP16mi, TB_FOLDED_LOAD },
- { X86::CMP16ri8, X86::CMP16mi8, TB_FOLDED_LOAD },
- { X86::CMP16rr, X86::CMP16mr, TB_FOLDED_LOAD },
- { X86::CMP32ri, X86::CMP32mi, TB_FOLDED_LOAD },
- { X86::CMP32ri8, X86::CMP32mi8, TB_FOLDED_LOAD },
- { X86::CMP32rr, X86::CMP32mr, TB_FOLDED_LOAD },
- { X86::CMP64ri32, X86::CMP64mi32, TB_FOLDED_LOAD },
- { X86::CMP64ri8, X86::CMP64mi8, TB_FOLDED_LOAD },
- { X86::CMP64rr, X86::CMP64mr, TB_FOLDED_LOAD },
- { X86::CMP8ri, X86::CMP8mi, TB_FOLDED_LOAD },
- { X86::CMP8ri8, X86::CMP8mi8, TB_FOLDED_LOAD },
- { X86::CMP8rr, X86::CMP8mr, TB_FOLDED_LOAD },
- { X86::DIV16r, X86::DIV16m, TB_FOLDED_LOAD },
- { X86::DIV32r, X86::DIV32m, TB_FOLDED_LOAD },
- { X86::DIV64r, X86::DIV64m, TB_FOLDED_LOAD },
- { X86::DIV8r, X86::DIV8m, TB_FOLDED_LOAD },
- { X86::EXTRACTPSrr, X86::EXTRACTPSmr, TB_FOLDED_STORE },
- { X86::IDIV16r, X86::IDIV16m, TB_FOLDED_LOAD },
- { X86::IDIV32r, X86::IDIV32m, TB_FOLDED_LOAD },
- { X86::IDIV64r, X86::IDIV64m, TB_FOLDED_LOAD },
- { X86::IDIV8r, X86::IDIV8m, TB_FOLDED_LOAD },
- { X86::IMUL16r, X86::IMUL16m, TB_FOLDED_LOAD },
- { X86::IMUL32r, X86::IMUL32m, TB_FOLDED_LOAD },
- { X86::IMUL64r, X86::IMUL64m, TB_FOLDED_LOAD },
- { X86::IMUL8r, X86::IMUL8m, TB_FOLDED_LOAD },
- { X86::JMP16r, X86::JMP16m, TB_FOLDED_LOAD },
- { X86::JMP16r_NT, X86::JMP16m_NT, TB_FOLDED_LOAD },
- { X86::JMP32r, X86::JMP32m, TB_FOLDED_LOAD },
- { X86::JMP32r_NT, X86::JMP32m_NT, TB_FOLDED_LOAD },
- { X86::JMP64r, X86::JMP64m, TB_FOLDED_LOAD },
- { X86::JMP64r_NT, X86::JMP64m_NT, TB_FOLDED_LOAD },
- { X86::MMX_MOVD64from64rr, X86::MMX_MOVQ64mr, TB_FOLDED_STORE },
- { X86::MMX_MOVD64grr, X86::MMX_MOVD64mr, TB_FOLDED_STORE },
- { X86::MOV16ri, X86::MOV16mi, TB_FOLDED_STORE },
- { X86::MOV16rr, X86::MOV16mr, TB_FOLDED_STORE },
- { X86::MOV32ri, X86::MOV32mi, TB_FOLDED_STORE },
- { X86::MOV32rr, X86::MOV32mr, TB_FOLDED_STORE },
- { X86::MOV64ri32, X86::MOV64mi32, TB_FOLDED_STORE },
- { X86::MOV64rr, X86::MOV64mr, TB_FOLDED_STORE },
- { X86::MOV64toSDrr, X86::MOV64mr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::MOV8ri, X86::MOV8mi, TB_FOLDED_STORE },
- { X86::MOV8rr, X86::MOV8mr, TB_FOLDED_STORE },
- { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, TB_FOLDED_STORE },
- { X86::MOVAPDrr, X86::MOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::MOVAPSrr, X86::MOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::MOVDI2SSrr, X86::MOV32mr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::MOVDQArr, X86::MOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::MOVDQUrr, X86::MOVDQUmr, TB_FOLDED_STORE },
- { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, TB_FOLDED_STORE },
- { X86::MOVPQIto64rr, X86::MOVPQI2QImr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::MOVSDto64rr, X86::MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::MOVSS2DIrr, X86::MOVSSmr, TB_FOLDED_STORE },
- { X86::MOVUPDrr, X86::MOVUPDmr, TB_FOLDED_STORE },
- { X86::MOVUPSrr, X86::MOVUPSmr, TB_FOLDED_STORE },
- { X86::MUL16r, X86::MUL16m, TB_FOLDED_LOAD },
- { X86::MUL32r, X86::MUL32m, TB_FOLDED_LOAD },
- { X86::MUL64r, X86::MUL64m, TB_FOLDED_LOAD },
- { X86::MUL8r, X86::MUL8m, TB_FOLDED_LOAD },
- { X86::PEXTRDrr, X86::PEXTRDmr, TB_FOLDED_STORE },
- { X86::PEXTRQrr, X86::PEXTRQmr, TB_FOLDED_STORE },
- { X86::PTWRITE64r, X86::PTWRITE64m, TB_FOLDED_LOAD },
- { X86::PTWRITEr, X86::PTWRITEm, TB_FOLDED_LOAD },
- { X86::PUSH16r, X86::PUSH16rmm, TB_FOLDED_LOAD },
- { X86::PUSH32r, X86::PUSH32rmm, TB_FOLDED_LOAD },
- { X86::PUSH64r, X86::PUSH64rmm, TB_FOLDED_LOAD },
- { X86::SETCCr, X86::SETCCm, TB_FOLDED_STORE },
- { X86::TAILJMPr, X86::TAILJMPm, TB_FOLDED_LOAD },
- { X86::TAILJMPr64, X86::TAILJMPm64, TB_FOLDED_LOAD },
- { X86::TAILJMPr64_REX, X86::TAILJMPm64_REX, TB_FOLDED_LOAD },
- { X86::TCRETURNri, X86::TCRETURNmi, TB_FOLDED_LOAD | TB_NO_FORWARD },
- { X86::TCRETURNri64, X86::TCRETURNmi64, TB_FOLDED_LOAD | TB_NO_FORWARD },
- { X86::TEST16ri, X86::TEST16mi, TB_FOLDED_LOAD },
- { X86::TEST16rr, X86::TEST16mr, TB_FOLDED_LOAD },
- { X86::TEST32ri, X86::TEST32mi, TB_FOLDED_LOAD },
- { X86::TEST32rr, X86::TEST32mr, TB_FOLDED_LOAD },
- { X86::TEST64ri32, X86::TEST64mi32, TB_FOLDED_LOAD },
- { X86::TEST64rr, X86::TEST64mr, TB_FOLDED_LOAD },
- { X86::TEST8ri, X86::TEST8mi, TB_FOLDED_LOAD },
- { X86::TEST8rr, X86::TEST8mr, TB_FOLDED_LOAD },
- { X86::VCVTPS2PHYrr, X86::VCVTPS2PHYmr, TB_FOLDED_STORE },
- { X86::VCVTPS2PHZ256rr, X86::VCVTPS2PHZ256mr, TB_FOLDED_STORE },
- { X86::VCVTPS2PHZrr, X86::VCVTPS2PHZmr, TB_FOLDED_STORE },
- { X86::VEXTRACTF128rr, X86::VEXTRACTF128mr, TB_FOLDED_STORE },
- { X86::VEXTRACTF32x4Z256rr, X86::VEXTRACTF32x4Z256mr, TB_FOLDED_STORE },
- { X86::VEXTRACTF32x4Zrr, X86::VEXTRACTF32x4Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTF32x8Zrr, X86::VEXTRACTF32x8Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTF64x2Z256rr, X86::VEXTRACTF64x2Z256mr, TB_FOLDED_STORE },
- { X86::VEXTRACTF64x2Zrr, X86::VEXTRACTF64x2Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTF64x4Zrr, X86::VEXTRACTF64x4Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTI128rr, X86::VEXTRACTI128mr, TB_FOLDED_STORE },
- { X86::VEXTRACTI32x4Z256rr, X86::VEXTRACTI32x4Z256mr, TB_FOLDED_STORE },
- { X86::VEXTRACTI32x4Zrr, X86::VEXTRACTI32x4Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTI32x8Zrr, X86::VEXTRACTI32x8Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTI64x2Z256rr, X86::VEXTRACTI64x2Z256mr, TB_FOLDED_STORE },
- { X86::VEXTRACTI64x2Zrr, X86::VEXTRACTI64x2Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTI64x4Zrr, X86::VEXTRACTI64x4Zmr, TB_FOLDED_STORE },
- { X86::VEXTRACTPSZrr, X86::VEXTRACTPSZmr, TB_FOLDED_STORE },
- { X86::VEXTRACTPSrr, X86::VEXTRACTPSmr, TB_FOLDED_STORE },
- { X86::VMOV64toSDZrr, X86::MOV64mr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::VMOV64toSDrr, X86::MOV64mr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::VMOVAPDYrr, X86::VMOVAPDYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVAPDZrr, X86::VMOVAPDZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
- { X86::VMOVAPDrr, X86::VMOVAPDmr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVAPSYrr, X86::VMOVAPSYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVAPSZrr, X86::VMOVAPSZmr, TB_FOLDED_STORE | TB_ALIGN_64 },
- { X86::VMOVAPSrr, X86::VMOVAPSmr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVDI2SSZrr, X86::MOV32mr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::VMOVDI2SSrr, X86::MOV32mr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
- { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128mr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256mr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zmr, TB_FOLDED_STORE | TB_ALIGN_64 },
- { X86::VMOVDQAYrr, X86::VMOVDQAYmr, TB_FOLDED_STORE | TB_ALIGN_32 },
- { X86::VMOVDQArr, X86::VMOVDQAmr, TB_FOLDED_STORE | TB_ALIGN_16 },
- { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128mr, TB_FOLDED_STORE },
- { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256mr, TB_FOLDED_STORE },
- { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zmr, TB_FOLDED_STORE },
- { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128mr, TB_FOLDED_STORE },
- { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256mr, TB_FOLDED_STORE },
- { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zmr, TB_FOLDED_STORE },
- { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128mr, TB_FOLDED_STORE },
- { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256mr, TB_FOLDED_STORE },
- { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zmr, TB_FOLDED_STORE },
- { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128mr, TB_FOLDED_STORE },
- { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256mr, TB_FOLDED_STORE },
- { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zmr, TB_FOLDED_STORE },
- { X86::VMOVDQUYrr, X86::VMOVDQUYmr, TB_FOLDED_STORE },
- { X86::VMOVDQUrr, X86::VMOVDQUmr, TB_FOLDED_STORE },
- { X86::VMOVPDI2DIZrr, X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
- { X86::VMOVPDI2DIrr, X86::VMOVPDI2DImr, TB_FOLDED_STORE },
- { X86::VMOVPQIto64Zrr, X86::VMOVPQI2QIZmr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::VMOVSDto64Zrr, X86::VMOVSDZmr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::VMOVSDto64rr, X86::VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE },
- { X86::VMOVSS2DIZrr, X86::VMOVSSZmr, TB_FOLDED_STORE },
- { X86::VMOVSS2DIrr, X86::VMOVSSmr, TB_FOLDED_STORE },
- { X86::VMOVUPDYrr, X86::VMOVUPDYmr, TB_FOLDED_STORE },
- { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128mr, TB_FOLDED_STORE },
- { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256mr, TB_FOLDED_STORE },
- { X86::VMOVUPDZrr, X86::VMOVUPDZmr, TB_FOLDED_STORE },
- { X86::VMOVUPDrr, X86::VMOVUPDmr, TB_FOLDED_STORE },
- { X86::VMOVUPSYrr, X86::VMOVUPSYmr, TB_FOLDED_STORE },
- { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128mr, TB_FOLDED_STORE },
- { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256mr, TB_FOLDED_STORE },
- { X86::VMOVUPSZrr, X86::VMOVUPSZmr, TB_FOLDED_STORE },
- { X86::VMOVUPSrr, X86::VMOVUPSmr, TB_FOLDED_STORE },
- { X86::VPEXTRDZrr, X86::VPEXTRDZmr, TB_FOLDED_STORE },
- { X86::VPEXTRDrr, X86::VPEXTRDmr, TB_FOLDED_STORE },
- { X86::VPEXTRQZrr, X86::VPEXTRQZmr, TB_FOLDED_STORE },
- { X86::VPEXTRQrr, X86::VPEXTRQmr, TB_FOLDED_STORE },
- { X86::VPMOVDBZrr, X86::VPMOVDBZmr, TB_FOLDED_STORE },
- { X86::VPMOVDWZ256rr, X86::VPMOVDWZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVDWZrr, X86::VPMOVDWZmr, TB_FOLDED_STORE },
- { X86::VPMOVQDZ256rr, X86::VPMOVQDZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVQDZrr, X86::VPMOVQDZmr, TB_FOLDED_STORE },
- { X86::VPMOVQWZrr, X86::VPMOVQWZmr, TB_FOLDED_STORE },
- { X86::VPMOVSDBZrr, X86::VPMOVSDBZmr, TB_FOLDED_STORE },
- { X86::VPMOVSDWZ256rr, X86::VPMOVSDWZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVSDWZrr, X86::VPMOVSDWZmr, TB_FOLDED_STORE },
- { X86::VPMOVSQDZ256rr, X86::VPMOVSQDZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVSQDZrr, X86::VPMOVSQDZmr, TB_FOLDED_STORE },
- { X86::VPMOVSQWZrr, X86::VPMOVSQWZmr, TB_FOLDED_STORE },
- { X86::VPMOVSWBZ256rr, X86::VPMOVSWBZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVSWBZrr, X86::VPMOVSWBZmr, TB_FOLDED_STORE },
- { X86::VPMOVUSDBZrr, X86::VPMOVUSDBZmr, TB_FOLDED_STORE },
- { X86::VPMOVUSDWZ256rr, X86::VPMOVUSDWZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVUSDWZrr, X86::VPMOVUSDWZmr, TB_FOLDED_STORE },
- { X86::VPMOVUSQDZ256rr, X86::VPMOVUSQDZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVUSQDZrr, X86::VPMOVUSQDZmr, TB_FOLDED_STORE },
- { X86::VPMOVUSQWZrr, X86::VPMOVUSQWZmr, TB_FOLDED_STORE },
- { X86::VPMOVUSWBZ256rr, X86::VPMOVUSWBZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVUSWBZrr, X86::VPMOVUSWBZmr, TB_FOLDED_STORE },
- { X86::VPMOVWBZ256rr, X86::VPMOVWBZ256mr, TB_FOLDED_STORE },
- { X86::VPMOVWBZrr, X86::VPMOVWBZmr, TB_FOLDED_STORE },
-};
-
-static const X86MemoryFoldTableEntry MemoryFoldTable1[] = {
- { X86::AESIMCrr, X86::AESIMCrm, TB_ALIGN_16 },
- { X86::AESKEYGENASSIST128rr, X86::AESKEYGENASSIST128rm, TB_ALIGN_16 },
- { X86::BEXTR32rr, X86::BEXTR32rm, 0 },
- { X86::BEXTR64rr, X86::BEXTR64rm, 0 },
- { X86::BEXTRI32ri, X86::BEXTRI32mi, 0 },
- { X86::BEXTRI64ri, X86::BEXTRI64mi, 0 },
- { X86::BLCFILL32rr, X86::BLCFILL32rm, 0 },
- { X86::BLCFILL64rr, X86::BLCFILL64rm, 0 },
- { X86::BLCI32rr, X86::BLCI32rm, 0 },
- { X86::BLCI64rr, X86::BLCI64rm, 0 },
- { X86::BLCIC32rr, X86::BLCIC32rm, 0 },
- { X86::BLCIC64rr, X86::BLCIC64rm, 0 },
- { X86::BLCMSK32rr, X86::BLCMSK32rm, 0 },
- { X86::BLCMSK64rr, X86::BLCMSK64rm, 0 },
- { X86::BLCS32rr, X86::BLCS32rm, 0 },
- { X86::BLCS64rr, X86::BLCS64rm, 0 },
- { X86::BLSFILL32rr, X86::BLSFILL32rm, 0 },
- { X86::BLSFILL64rr, X86::BLSFILL64rm, 0 },
- { X86::BLSI32rr, X86::BLSI32rm, 0 },
- { X86::BLSI64rr, X86::BLSI64rm, 0 },
- { X86::BLSIC32rr, X86::BLSIC32rm, 0 },
- { X86::BLSIC64rr, X86::BLSIC64rm, 0 },
- { X86::BLSMSK32rr, X86::BLSMSK32rm, 0 },
- { X86::BLSMSK64rr, X86::BLSMSK64rm, 0 },
- { X86::BLSR32rr, X86::BLSR32rm, 0 },
- { X86::BLSR64rr, X86::BLSR64rm, 0 },
- { X86::BSF16rr, X86::BSF16rm, 0 },
- { X86::BSF32rr, X86::BSF32rm, 0 },
- { X86::BSF64rr, X86::BSF64rm, 0 },
- { X86::BSR16rr, X86::BSR16rm, 0 },
- { X86::BSR32rr, X86::BSR32rm, 0 },
- { X86::BSR64rr, X86::BSR64rm, 0 },
- { X86::BZHI32rr, X86::BZHI32rm, 0 },
- { X86::BZHI64rr, X86::BZHI64rm, 0 },
- { X86::CMP16rr, X86::CMP16rm, 0 },
- { X86::CMP32rr, X86::CMP32rm, 0 },
- { X86::CMP64rr, X86::CMP64rm, 0 },
- { X86::CMP8rr, X86::CMP8rm, 0 },
- { X86::COMISDrr, X86::COMISDrm, 0 },
- { X86::COMISDrr_Int, X86::COMISDrm_Int, TB_NO_REVERSE },
- { X86::COMISSrr, X86::COMISSrm, 0 },
- { X86::COMISSrr_Int, X86::COMISSrm_Int, TB_NO_REVERSE },
- { X86::CVTDQ2PDrr, X86::CVTDQ2PDrm, TB_NO_REVERSE },
- { X86::CVTDQ2PSrr, X86::CVTDQ2PSrm, TB_ALIGN_16 },
- { X86::CVTPD2DQrr, X86::CVTPD2DQrm, TB_ALIGN_16 },
- { X86::CVTPD2PSrr, X86::CVTPD2PSrm, TB_ALIGN_16 },
- { X86::CVTPS2DQrr, X86::CVTPS2DQrm, TB_ALIGN_16 },
- { X86::CVTPS2PDrr, X86::CVTPS2PDrm, TB_NO_REVERSE },
- { X86::CVTSD2SI64rr, X86::CVTSD2SI64rm, 0 },
- { X86::CVTSD2SI64rr_Int, X86::CVTSD2SI64rm_Int, TB_NO_REVERSE },
- { X86::CVTSD2SIrr, X86::CVTSD2SIrm, 0 },
- { X86::CVTSD2SIrr_Int, X86::CVTSD2SIrm_Int, TB_NO_REVERSE },
- { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 },
- { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 },
- { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 },
- { X86::CVTSI642SDrr, X86::CVTSI642SDrm, 0 },
- { X86::CVTSI642SSrr, X86::CVTSI642SSrm, 0 },
- { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 },
- { X86::CVTSS2SI64rr, X86::CVTSS2SI64rm, 0 },
- { X86::CVTSS2SI64rr_Int, X86::CVTSS2SI64rm_Int, TB_NO_REVERSE },
- { X86::CVTSS2SIrr, X86::CVTSS2SIrm, 0 },
- { X86::CVTSS2SIrr_Int, X86::CVTSS2SIrm_Int, TB_NO_REVERSE },
- { X86::CVTTPD2DQrr, X86::CVTTPD2DQrm, TB_ALIGN_16 },
- { X86::CVTTPS2DQrr, X86::CVTTPS2DQrm, TB_ALIGN_16 },
- { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 },
- { X86::CVTTSD2SI64rr_Int, X86::CVTTSD2SI64rm_Int, TB_NO_REVERSE },
- { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 },
- { X86::CVTTSD2SIrr_Int, X86::CVTTSD2SIrm_Int, TB_NO_REVERSE },
- { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 },
- { X86::CVTTSS2SI64rr_Int, X86::CVTTSS2SI64rm_Int, TB_NO_REVERSE },
- { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 },
- { X86::CVTTSS2SIrr_Int, X86::CVTTSS2SIrm_Int, TB_NO_REVERSE },
- { X86::IMUL16rri, X86::IMUL16rmi, 0 },
- { X86::IMUL16rri8, X86::IMUL16rmi8, 0 },
- { X86::IMUL32rri, X86::IMUL32rmi, 0 },
- { X86::IMUL32rri8, X86::IMUL32rmi8, 0 },
- { X86::IMUL64rri32, X86::IMUL64rmi32, 0 },
- { X86::IMUL64rri8, X86::IMUL64rmi8, 0 },
- { X86::LWPINS32rri, X86::LWPINS32rmi, 0 },
- { X86::LWPINS64rri, X86::LWPINS64rmi, 0 },
- { X86::LWPVAL32rri, X86::LWPVAL32rmi, 0 },
- { X86::LWPVAL64rri, X86::LWPVAL64rmi, 0 },
- { X86::LZCNT16rr, X86::LZCNT16rm, 0 },
- { X86::LZCNT32rr, X86::LZCNT32rm, 0 },
- { X86::LZCNT64rr, X86::LZCNT64rm, 0 },
- { X86::MMX_CVTPD2PIrr, X86::MMX_CVTPD2PIrm, TB_ALIGN_16 },
- { X86::MMX_CVTPI2PDrr, X86::MMX_CVTPI2PDrm, 0 },
- { X86::MMX_CVTPS2PIrr, X86::MMX_CVTPS2PIrm, TB_NO_REVERSE },
- { X86::MMX_CVTTPD2PIrr, X86::MMX_CVTTPD2PIrm, TB_ALIGN_16 },
- { X86::MMX_CVTTPS2PIrr, X86::MMX_CVTTPS2PIrm, TB_NO_REVERSE },
- { X86::MMX_MOVD64to64rr, X86::MMX_MOVQ64rm, 0 },
- { X86::MMX_PABSBrr, X86::MMX_PABSBrm, 0 },
- { X86::MMX_PABSDrr, X86::MMX_PABSDrm, 0 },
- { X86::MMX_PABSWrr, X86::MMX_PABSWrm, 0 },
- { X86::MMX_PSHUFWri, X86::MMX_PSHUFWmi, 0 },
- { X86::MOV16rr, X86::MOV16rm, 0 },
- { X86::MOV32rr, X86::MOV32rm, 0 },
- { X86::MOV64rr, X86::MOV64rm, 0 },
- { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, TB_NO_REVERSE },
- { X86::MOV64toSDrr, X86::MOVSDrm_alt, TB_NO_REVERSE },
- { X86::MOV8rr, X86::MOV8rm, 0 },
- { X86::MOVAPDrr, X86::MOVAPDrm, TB_ALIGN_16 },
- { X86::MOVAPSrr, X86::MOVAPSrm, TB_ALIGN_16 },
- { X86::MOVDDUPrr, X86::MOVDDUPrm, TB_NO_REVERSE },
- { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
- { X86::MOVDI2SSrr, X86::MOVSSrm_alt, 0 },
- { X86::MOVDQArr, X86::MOVDQArm, TB_ALIGN_16 },
- { X86::MOVDQUrr, X86::MOVDQUrm, 0 },
- { X86::MOVSHDUPrr, X86::MOVSHDUPrm, TB_ALIGN_16 },
- { X86::MOVSLDUPrr, X86::MOVSLDUPrm, TB_ALIGN_16 },
- { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
- { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
- { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
- { X86::MOVSX32rr8_NOREX, X86::MOVSX32rm8_NOREX, 0 },
- { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 },
- { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 },
- { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 },
- { X86::MOVUPDrr, X86::MOVUPDrm, 0 },
- { X86::MOVUPSrr, X86::MOVUPSrm, 0 },
- { X86::MOVZPQILo2PQIrr, X86::MOVQI2PQIrm, TB_NO_REVERSE },
- { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 },
- { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 },
- { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 },
- { X86::MOVZX32rr8_NOREX, X86::MOVZX32rm8_NOREX, 0 },
- { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 },
- { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 },
- { X86::PABSBrr, X86::PABSBrm, TB_ALIGN_16 },
- { X86::PABSDrr, X86::PABSDrm, TB_ALIGN_16 },
- { X86::PABSWrr, X86::PABSWrm, TB_ALIGN_16 },
- { X86::PCMPESTRIrr, X86::PCMPESTRIrm, 0 },
- { X86::PCMPESTRMrr, X86::PCMPESTRMrm, 0 },
- { X86::PCMPISTRIrr, X86::PCMPISTRIrm, 0 },
- { X86::PCMPISTRMrr, X86::PCMPISTRMrm, 0 },
- { X86::PF2IDrr, X86::PF2IDrm, 0 },
- { X86::PF2IWrr, X86::PF2IWrm, 0 },
- { X86::PFRCPrr, X86::PFRCPrm, 0 },
- { X86::PFRSQRTrr, X86::PFRSQRTrm, 0 },
- { X86::PHMINPOSUWrr, X86::PHMINPOSUWrm, TB_ALIGN_16 },
- { X86::PI2FDrr, X86::PI2FDrm, 0 },
- { X86::PI2FWrr, X86::PI2FWrm, 0 },
- { X86::PMOVSXBDrr, X86::PMOVSXBDrm, TB_NO_REVERSE },
- { X86::PMOVSXBQrr, X86::PMOVSXBQrm, TB_NO_REVERSE },
- { X86::PMOVSXBWrr, X86::PMOVSXBWrm, TB_NO_REVERSE },
- { X86::PMOVSXDQrr, X86::PMOVSXDQrm, TB_NO_REVERSE },
- { X86::PMOVSXWDrr, X86::PMOVSXWDrm, TB_NO_REVERSE },
- { X86::PMOVSXWQrr, X86::PMOVSXWQrm, TB_NO_REVERSE },
- { X86::PMOVZXBDrr, X86::PMOVZXBDrm, TB_NO_REVERSE },
- { X86::PMOVZXBQrr, X86::PMOVZXBQrm, TB_NO_REVERSE },
- { X86::PMOVZXBWrr, X86::PMOVZXBWrm, TB_NO_REVERSE },
- { X86::PMOVZXDQrr, X86::PMOVZXDQrm, TB_NO_REVERSE },
- { X86::PMOVZXWDrr, X86::PMOVZXWDrm, TB_NO_REVERSE },
- { X86::PMOVZXWQrr, X86::PMOVZXWQrm, TB_NO_REVERSE },
- { X86::POPCNT16rr, X86::POPCNT16rm, 0 },
- { X86::POPCNT32rr, X86::POPCNT32rm, 0 },
- { X86::POPCNT64rr, X86::POPCNT64rm, 0 },
- { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 },
- { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 },
- { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 },
- { X86::PSWAPDrr, X86::PSWAPDrm, 0 },
- { X86::PTESTrr, X86::PTESTrm, TB_ALIGN_16 },
- { X86::RCPPSr, X86::RCPPSm, TB_ALIGN_16 },
- { X86::RCPSSr, X86::RCPSSm, 0 },
- { X86::RORX32ri, X86::RORX32mi, 0 },
- { X86::RORX64ri, X86::RORX64mi, 0 },
- { X86::ROUNDPDr, X86::ROUNDPDm, TB_ALIGN_16 },
- { X86::ROUNDPSr, X86::ROUNDPSm, TB_ALIGN_16 },
- { X86::ROUNDSDr, X86::ROUNDSDm, 0 },
- { X86::ROUNDSSr, X86::ROUNDSSm, 0 },
- { X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16 },
- { X86::RSQRTSSr, X86::RSQRTSSm, 0 },
- { X86::SARX32rr, X86::SARX32rm, 0 },
- { X86::SARX64rr, X86::SARX64rm, 0 },
- { X86::SHLX32rr, X86::SHLX32rm, 0 },
- { X86::SHLX64rr, X86::SHLX64rm, 0 },
- { X86::SHRX32rr, X86::SHRX32rm, 0 },
- { X86::SHRX64rr, X86::SHRX64rm, 0 },
- { X86::SQRTPDr, X86::SQRTPDm, TB_ALIGN_16 },
- { X86::SQRTPSr, X86::SQRTPSm, TB_ALIGN_16 },
- { X86::SQRTSDr, X86::SQRTSDm, 0 },
- { X86::SQRTSSr, X86::SQRTSSm, 0 },
- { X86::T1MSKC32rr, X86::T1MSKC32rm, 0 },
- { X86::T1MSKC64rr, X86::T1MSKC64rm, 0 },
- { X86::TZCNT16rr, X86::TZCNT16rm, 0 },
- { X86::TZCNT32rr, X86::TZCNT32rm, 0 },
- { X86::TZCNT64rr, X86::TZCNT64rm, 0 },
- { X86::TZMSK32rr, X86::TZMSK32rm, 0 },
- { X86::TZMSK64rr, X86::TZMSK64rm, 0 },
- { X86::UCOMISDrr, X86::UCOMISDrm, 0 },
- { X86::UCOMISDrr_Int, X86::UCOMISDrm_Int, TB_NO_REVERSE },
- { X86::UCOMISSrr, X86::UCOMISSrm, 0 },
- { X86::UCOMISSrr_Int, X86::UCOMISSrm_Int, TB_NO_REVERSE },
- { X86::VAESIMCrr, X86::VAESIMCrm, 0 },
- { X86::VAESKEYGENASSIST128rr,X86::VAESKEYGENASSIST128rm,0 },
- { X86::VBROADCASTF32X2Z256rr,X86::VBROADCASTF32X2Z256rm,TB_NO_REVERSE },
- { X86::VBROADCASTF32X2Zrr, X86::VBROADCASTF32X2Zrm, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Z128rr,X86::VBROADCASTI32X2Z128rm,TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Z256rr,X86::VBROADCASTI32X2Z256rm,TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Zrr, X86::VBROADCASTI32X2Zrm, TB_NO_REVERSE },
- { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256rr, X86::VBROADCASTSDZ256rm, TB_NO_REVERSE },
- { X86::VBROADCASTSDZrr, X86::VBROADCASTSDZrm, TB_NO_REVERSE },
- { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ128rr, X86::VBROADCASTSSZ128rm, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256rr, X86::VBROADCASTSSZ256rm, TB_NO_REVERSE },
- { X86::VBROADCASTSSZrr, X86::VBROADCASTSSZrm, TB_NO_REVERSE },
- { X86::VBROADCASTSSrr, X86::VBROADCASTSSrm, TB_NO_REVERSE },
- { X86::VCOMISDZrr, X86::VCOMISDZrm, 0 },
- { X86::VCOMISDZrr_Int, X86::VCOMISDZrm_Int, TB_NO_REVERSE },
- { X86::VCOMISDrr, X86::VCOMISDrm, 0 },
- { X86::VCOMISDrr_Int, X86::VCOMISDrm_Int, TB_NO_REVERSE },
- { X86::VCOMISSZrr, X86::VCOMISSZrm, 0 },
- { X86::VCOMISSZrr_Int, X86::VCOMISSZrm_Int, TB_NO_REVERSE },
- { X86::VCOMISSrr, X86::VCOMISSrm, 0 },
- { X86::VCOMISSrr_Int, X86::VCOMISSrm_Int, TB_NO_REVERSE },
- { X86::VCVTDQ2PDYrr, X86::VCVTDQ2PDYrm, 0 },
- { X86::VCVTDQ2PDZ128rr, X86::VCVTDQ2PDZ128rm, TB_NO_REVERSE },
- { X86::VCVTDQ2PDZ256rr, X86::VCVTDQ2PDZ256rm, 0 },
- { X86::VCVTDQ2PDZrr, X86::VCVTDQ2PDZrm, 0 },
- { X86::VCVTDQ2PDrr, X86::VCVTDQ2PDrm, TB_NO_REVERSE },
- { X86::VCVTDQ2PSYrr, X86::VCVTDQ2PSYrm, 0 },
- { X86::VCVTDQ2PSZ128rr, X86::VCVTDQ2PSZ128rm, 0 },
- { X86::VCVTDQ2PSZ256rr, X86::VCVTDQ2PSZ256rm, 0 },
- { X86::VCVTDQ2PSZrr, X86::VCVTDQ2PSZrm, 0 },
- { X86::VCVTDQ2PSrr, X86::VCVTDQ2PSrm, 0 },
- { X86::VCVTNEPS2BF16Z128rr, X86::VCVTNEPS2BF16Z128rm, 0 },
- { X86::VCVTNEPS2BF16Z256rr, X86::VCVTNEPS2BF16Z256rm, 0 },
- { X86::VCVTNEPS2BF16Zrr, X86::VCVTNEPS2BF16Zrm, 0 },
- { X86::VCVTPD2DQYrr, X86::VCVTPD2DQYrm, 0 },
- { X86::VCVTPD2DQZ128rr, X86::VCVTPD2DQZ128rm, 0 },
- { X86::VCVTPD2DQZ256rr, X86::VCVTPD2DQZ256rm, 0 },
- { X86::VCVTPD2DQZrr, X86::VCVTPD2DQZrm, 0 },
- { X86::VCVTPD2DQrr, X86::VCVTPD2DQrm, 0 },
- { X86::VCVTPD2PSYrr, X86::VCVTPD2PSYrm, 0 },
- { X86::VCVTPD2PSZ128rr, X86::VCVTPD2PSZ128rm, 0 },
- { X86::VCVTPD2PSZ256rr, X86::VCVTPD2PSZ256rm, 0 },
- { X86::VCVTPD2PSZrr, X86::VCVTPD2PSZrm, 0 },
- { X86::VCVTPD2PSrr, X86::VCVTPD2PSrm, 0 },
- { X86::VCVTPD2QQZ128rr, X86::VCVTPD2QQZ128rm, 0 },
- { X86::VCVTPD2QQZ256rr, X86::VCVTPD2QQZ256rm, 0 },
- { X86::VCVTPD2QQZrr, X86::VCVTPD2QQZrm, 0 },
- { X86::VCVTPD2UDQZ128rr, X86::VCVTPD2UDQZ128rm, 0 },
- { X86::VCVTPD2UDQZ256rr, X86::VCVTPD2UDQZ256rm, 0 },
- { X86::VCVTPD2UDQZrr, X86::VCVTPD2UDQZrm, 0 },
- { X86::VCVTPD2UQQZ128rr, X86::VCVTPD2UQQZ128rm, 0 },
- { X86::VCVTPD2UQQZ256rr, X86::VCVTPD2UQQZ256rm, 0 },
- { X86::VCVTPD2UQQZrr, X86::VCVTPD2UQQZrm, 0 },
- { X86::VCVTPH2PSYrr, X86::VCVTPH2PSYrm, 0 },
- { X86::VCVTPH2PSZ128rr, X86::VCVTPH2PSZ128rm, TB_NO_REVERSE },
- { X86::VCVTPH2PSZ256rr, X86::VCVTPH2PSZ256rm, 0 },
- { X86::VCVTPH2PSZrr, X86::VCVTPH2PSZrm, 0 },
- { X86::VCVTPH2PSrr, X86::VCVTPH2PSrm, TB_NO_REVERSE },
- { X86::VCVTPS2DQYrr, X86::VCVTPS2DQYrm, 0 },
- { X86::VCVTPS2DQZ128rr, X86::VCVTPS2DQZ128rm, 0 },
- { X86::VCVTPS2DQZ256rr, X86::VCVTPS2DQZ256rm, 0 },
- { X86::VCVTPS2DQZrr, X86::VCVTPS2DQZrm, 0 },
- { X86::VCVTPS2DQrr, X86::VCVTPS2DQrm, 0 },
- { X86::VCVTPS2PDYrr, X86::VCVTPS2PDYrm, 0 },
- { X86::VCVTPS2PDZ128rr, X86::VCVTPS2PDZ128rm, TB_NO_REVERSE },
- { X86::VCVTPS2PDZ256rr, X86::VCVTPS2PDZ256rm, 0 },
- { X86::VCVTPS2PDZrr, X86::VCVTPS2PDZrm, 0 },
- { X86::VCVTPS2PDrr, X86::VCVTPS2PDrm, TB_NO_REVERSE },
- { X86::VCVTPS2QQZ128rr, X86::VCVTPS2QQZ128rm, TB_NO_REVERSE },
- { X86::VCVTPS2QQZ256rr, X86::VCVTPS2QQZ256rm, 0 },
- { X86::VCVTPS2QQZrr, X86::VCVTPS2QQZrm, 0 },
- { X86::VCVTPS2UDQZ128rr, X86::VCVTPS2UDQZ128rm, 0 },
- { X86::VCVTPS2UDQZ256rr, X86::VCVTPS2UDQZ256rm, 0 },
- { X86::VCVTPS2UDQZrr, X86::VCVTPS2UDQZrm, 0 },
- { X86::VCVTPS2UQQZ128rr, X86::VCVTPS2UQQZ128rm, TB_NO_REVERSE },
- { X86::VCVTPS2UQQZ256rr, X86::VCVTPS2UQQZ256rm, 0 },
- { X86::VCVTPS2UQQZrr, X86::VCVTPS2UQQZrm, 0 },
- { X86::VCVTQQ2PDZ128rr, X86::VCVTQQ2PDZ128rm, 0 },
- { X86::VCVTQQ2PDZ256rr, X86::VCVTQQ2PDZ256rm, 0 },
- { X86::VCVTQQ2PDZrr, X86::VCVTQQ2PDZrm, 0 },
- { X86::VCVTQQ2PSZ128rr, X86::VCVTQQ2PSZ128rm, 0 },
- { X86::VCVTQQ2PSZ256rr, X86::VCVTQQ2PSZ256rm, 0 },
- { X86::VCVTQQ2PSZrr, X86::VCVTQQ2PSZrm, 0 },
- { X86::VCVTSD2SI64Zrr, X86::VCVTSD2SI64Zrm, 0 },
- { X86::VCVTSD2SI64Zrr_Int, X86::VCVTSD2SI64Zrm_Int, TB_NO_REVERSE },
- { X86::VCVTSD2SI64rr, X86::VCVTSD2SI64rm, 0 },
- { X86::VCVTSD2SI64rr_Int, X86::VCVTSD2SI64rm_Int, TB_NO_REVERSE },
- { X86::VCVTSD2SIZrr, X86::VCVTSD2SIZrm, 0 },
- { X86::VCVTSD2SIZrr_Int, X86::VCVTSD2SIZrm_Int, TB_NO_REVERSE },
- { X86::VCVTSD2SIrr, X86::VCVTSD2SIrm, 0 },
- { X86::VCVTSD2SIrr_Int, X86::VCVTSD2SIrm_Int, TB_NO_REVERSE },
- { X86::VCVTSD2USI64Zrr_Int, X86::VCVTSD2USI64Zrm_Int, TB_NO_REVERSE },
- { X86::VCVTSD2USIZrr_Int, X86::VCVTSD2USIZrm_Int, TB_NO_REVERSE },
- { X86::VCVTSS2SI64Zrr, X86::VCVTSS2SI64Zrm, 0 },
- { X86::VCVTSS2SI64Zrr_Int, X86::VCVTSS2SI64Zrm_Int, TB_NO_REVERSE },
- { X86::VCVTSS2SI64rr, X86::VCVTSS2SI64rm, 0 },
- { X86::VCVTSS2SI64rr_Int, X86::VCVTSS2SI64rm_Int, TB_NO_REVERSE },
- { X86::VCVTSS2SIZrr, X86::VCVTSS2SIZrm, 0 },
- { X86::VCVTSS2SIZrr_Int, X86::VCVTSS2SIZrm_Int, TB_NO_REVERSE },
- { X86::VCVTSS2SIrr, X86::VCVTSS2SIrm, 0 },
- { X86::VCVTSS2SIrr_Int, X86::VCVTSS2SIrm_Int, TB_NO_REVERSE },
- { X86::VCVTSS2USI64Zrr_Int, X86::VCVTSS2USI64Zrm_Int, TB_NO_REVERSE },
- { X86::VCVTSS2USIZrr_Int, X86::VCVTSS2USIZrm_Int, TB_NO_REVERSE },
- { X86::VCVTTPD2DQYrr, X86::VCVTTPD2DQYrm, 0 },
- { X86::VCVTTPD2DQZ128rr, X86::VCVTTPD2DQZ128rm, 0 },
- { X86::VCVTTPD2DQZ256rr, X86::VCVTTPD2DQZ256rm, 0 },
- { X86::VCVTTPD2DQZrr, X86::VCVTTPD2DQZrm, 0 },
- { X86::VCVTTPD2DQrr, X86::VCVTTPD2DQrm, 0 },
- { X86::VCVTTPD2QQZ128rr, X86::VCVTTPD2QQZ128rm, 0 },
- { X86::VCVTTPD2QQZ256rr, X86::VCVTTPD2QQZ256rm, 0 },
- { X86::VCVTTPD2QQZrr, X86::VCVTTPD2QQZrm, 0 },
- { X86::VCVTTPD2UDQZ128rr, X86::VCVTTPD2UDQZ128rm, 0 },
- { X86::VCVTTPD2UDQZ256rr, X86::VCVTTPD2UDQZ256rm, 0 },
- { X86::VCVTTPD2UDQZrr, X86::VCVTTPD2UDQZrm, 0 },
- { X86::VCVTTPD2UQQZ128rr, X86::VCVTTPD2UQQZ128rm, 0 },
- { X86::VCVTTPD2UQQZ256rr, X86::VCVTTPD2UQQZ256rm, 0 },
- { X86::VCVTTPD2UQQZrr, X86::VCVTTPD2UQQZrm, 0 },
- { X86::VCVTTPS2DQYrr, X86::VCVTTPS2DQYrm, 0 },
- { X86::VCVTTPS2DQZ128rr, X86::VCVTTPS2DQZ128rm, 0 },
- { X86::VCVTTPS2DQZ256rr, X86::VCVTTPS2DQZ256rm, 0 },
- { X86::VCVTTPS2DQZrr, X86::VCVTTPS2DQZrm, 0 },
- { X86::VCVTTPS2DQrr, X86::VCVTTPS2DQrm, 0 },
- { X86::VCVTTPS2QQZ128rr, X86::VCVTTPS2QQZ128rm, TB_NO_REVERSE },
- { X86::VCVTTPS2QQZ256rr, X86::VCVTTPS2QQZ256rm, 0 },
- { X86::VCVTTPS2QQZrr, X86::VCVTTPS2QQZrm, 0 },
- { X86::VCVTTPS2UDQZ128rr, X86::VCVTTPS2UDQZ128rm, 0 },
- { X86::VCVTTPS2UDQZ256rr, X86::VCVTTPS2UDQZ256rm, 0 },
- { X86::VCVTTPS2UDQZrr, X86::VCVTTPS2UDQZrm, 0 },
- { X86::VCVTTPS2UQQZ128rr, X86::VCVTTPS2UQQZ128rm, TB_NO_REVERSE },
- { X86::VCVTTPS2UQQZ256rr, X86::VCVTTPS2UQQZ256rm, 0 },
- { X86::VCVTTPS2UQQZrr, X86::VCVTTPS2UQQZrm, 0 },
- { X86::VCVTTSD2SI64Zrr, X86::VCVTTSD2SI64Zrm, 0 },
- { X86::VCVTTSD2SI64Zrr_Int, X86::VCVTTSD2SI64Zrm_Int, TB_NO_REVERSE },
- { X86::VCVTTSD2SI64rr, X86::VCVTTSD2SI64rm, 0 },
- { X86::VCVTTSD2SI64rr_Int, X86::VCVTTSD2SI64rm_Int, TB_NO_REVERSE },
- { X86::VCVTTSD2SIZrr, X86::VCVTTSD2SIZrm, 0 },
- { X86::VCVTTSD2SIZrr_Int, X86::VCVTTSD2SIZrm_Int, TB_NO_REVERSE },
- { X86::VCVTTSD2SIrr, X86::VCVTTSD2SIrm, 0 },
- { X86::VCVTTSD2SIrr_Int, X86::VCVTTSD2SIrm_Int, TB_NO_REVERSE },
- { X86::VCVTTSD2USI64Zrr, X86::VCVTTSD2USI64Zrm, 0 },
- { X86::VCVTTSD2USI64Zrr_Int, X86::VCVTTSD2USI64Zrm_Int, TB_NO_REVERSE },
- { X86::VCVTTSD2USIZrr, X86::VCVTTSD2USIZrm, 0 },
- { X86::VCVTTSD2USIZrr_Int, X86::VCVTTSD2USIZrm_Int, TB_NO_REVERSE },
- { X86::VCVTTSS2SI64Zrr, X86::VCVTTSS2SI64Zrm, 0 },
- { X86::VCVTTSS2SI64Zrr_Int, X86::VCVTTSS2SI64Zrm_Int, TB_NO_REVERSE },
- { X86::VCVTTSS2SI64rr, X86::VCVTTSS2SI64rm, 0 },
- { X86::VCVTTSS2SI64rr_Int, X86::VCVTTSS2SI64rm_Int, TB_NO_REVERSE },
- { X86::VCVTTSS2SIZrr, X86::VCVTTSS2SIZrm, 0 },
- { X86::VCVTTSS2SIZrr_Int, X86::VCVTTSS2SIZrm_Int, TB_NO_REVERSE },
- { X86::VCVTTSS2SIrr, X86::VCVTTSS2SIrm, 0 },
- { X86::VCVTTSS2SIrr_Int, X86::VCVTTSS2SIrm_Int, TB_NO_REVERSE },
- { X86::VCVTTSS2USI64Zrr, X86::VCVTTSS2USI64Zrm, 0 },
- { X86::VCVTTSS2USI64Zrr_Int, X86::VCVTTSS2USI64Zrm_Int, TB_NO_REVERSE },
- { X86::VCVTTSS2USIZrr, X86::VCVTTSS2USIZrm, 0 },
- { X86::VCVTTSS2USIZrr_Int, X86::VCVTTSS2USIZrm_Int, TB_NO_REVERSE },
- { X86::VCVTUDQ2PDZ128rr, X86::VCVTUDQ2PDZ128rm, TB_NO_REVERSE },
- { X86::VCVTUDQ2PDZ256rr, X86::VCVTUDQ2PDZ256rm, 0 },
- { X86::VCVTUDQ2PDZrr, X86::VCVTUDQ2PDZrm, 0 },
- { X86::VCVTUDQ2PSZ128rr, X86::VCVTUDQ2PSZ128rm, 0 },
- { X86::VCVTUDQ2PSZ256rr, X86::VCVTUDQ2PSZ256rm, 0 },
- { X86::VCVTUDQ2PSZrr, X86::VCVTUDQ2PSZrm, 0 },
- { X86::VCVTUQQ2PDZ128rr, X86::VCVTUQQ2PDZ128rm, 0 },
- { X86::VCVTUQQ2PDZ256rr, X86::VCVTUQQ2PDZ256rm, 0 },
- { X86::VCVTUQQ2PDZrr, X86::VCVTUQQ2PDZrm, 0 },
- { X86::VCVTUQQ2PSZ128rr, X86::VCVTUQQ2PSZ128rm, 0 },
- { X86::VCVTUQQ2PSZ256rr, X86::VCVTUQQ2PSZ256rm, 0 },
- { X86::VCVTUQQ2PSZrr, X86::VCVTUQQ2PSZrm, 0 },
- { X86::VEXP2PDZr, X86::VEXP2PDZm, 0 },
- { X86::VEXP2PSZr, X86::VEXP2PSZm, 0 },
- { X86::VEXPANDPDZ128rr, X86::VEXPANDPDZ128rm, TB_NO_REVERSE },
- { X86::VEXPANDPDZ256rr, X86::VEXPANDPDZ256rm, TB_NO_REVERSE },
- { X86::VEXPANDPDZrr, X86::VEXPANDPDZrm, TB_NO_REVERSE },
- { X86::VEXPANDPSZ128rr, X86::VEXPANDPSZ128rm, TB_NO_REVERSE },
- { X86::VEXPANDPSZ256rr, X86::VEXPANDPSZ256rm, TB_NO_REVERSE },
- { X86::VEXPANDPSZrr, X86::VEXPANDPSZrm, TB_NO_REVERSE },
- { X86::VFPCLASSPDZ128rr, X86::VFPCLASSPDZ128rm, 0 },
- { X86::VFPCLASSPDZ256rr, X86::VFPCLASSPDZ256rm, 0 },
- { X86::VFPCLASSPDZrr, X86::VFPCLASSPDZrm, 0 },
- { X86::VFPCLASSPHZ128rr, X86::VFPCLASSPHZ128rm, 0 },
- { X86::VFPCLASSPHZ256rr, X86::VFPCLASSPHZ256rm, 0 },
- { X86::VFPCLASSPHZrr, X86::VFPCLASSPHZrm, 0 },
- { X86::VFPCLASSPSZ128rr, X86::VFPCLASSPSZ128rm, 0 },
- { X86::VFPCLASSPSZ256rr, X86::VFPCLASSPSZ256rm, 0 },
- { X86::VFPCLASSPSZrr, X86::VFPCLASSPSZrm, 0 },
- { X86::VFPCLASSSDZrr, X86::VFPCLASSSDZrm, TB_NO_REVERSE },
- { X86::VFPCLASSSHZrr, X86::VFPCLASSSHZrm, TB_NO_REVERSE },
- { X86::VFPCLASSSSZrr, X86::VFPCLASSSSZrm, TB_NO_REVERSE },
- { X86::VFRCZPDYrr, X86::VFRCZPDYrm, 0 },
- { X86::VFRCZPDrr, X86::VFRCZPDrm, 0 },
- { X86::VFRCZPSYrr, X86::VFRCZPSYrm, 0 },
- { X86::VFRCZPSrr, X86::VFRCZPSrm, 0 },
- { X86::VFRCZSDrr, X86::VFRCZSDrm, TB_NO_REVERSE },
- { X86::VFRCZSSrr, X86::VFRCZSSrm, TB_NO_REVERSE },
- { X86::VGETEXPPDZ128r, X86::VGETEXPPDZ128m, 0 },
- { X86::VGETEXPPDZ256r, X86::VGETEXPPDZ256m, 0 },
- { X86::VGETEXPPDZr, X86::VGETEXPPDZm, 0 },
- { X86::VGETEXPPHZ128r, X86::VGETEXPPHZ128m, 0 },
- { X86::VGETEXPPHZ256r, X86::VGETEXPPHZ256m, 0 },
- { X86::VGETEXPPHZr, X86::VGETEXPPHZm, 0 },
- { X86::VGETEXPPSZ128r, X86::VGETEXPPSZ128m, 0 },
- { X86::VGETEXPPSZ256r, X86::VGETEXPPSZ256m, 0 },
- { X86::VGETEXPPSZr, X86::VGETEXPPSZm, 0 },
- { X86::VGETMANTPDZ128rri, X86::VGETMANTPDZ128rmi, 0 },
- { X86::VGETMANTPDZ256rri, X86::VGETMANTPDZ256rmi, 0 },
- { X86::VGETMANTPDZrri, X86::VGETMANTPDZrmi, 0 },
- { X86::VGETMANTPHZ128rri, X86::VGETMANTPHZ128rmi, 0 },
- { X86::VGETMANTPHZ256rri, X86::VGETMANTPHZ256rmi, 0 },
- { X86::VGETMANTPHZrri, X86::VGETMANTPHZrmi, 0 },
- { X86::VGETMANTPSZ128rri, X86::VGETMANTPSZ128rmi, 0 },
- { X86::VGETMANTPSZ256rri, X86::VGETMANTPSZ256rmi, 0 },
- { X86::VGETMANTPSZrri, X86::VGETMANTPSZrmi, 0 },
- { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE },
- { X86::VMOV64toPQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE },
- { X86::VMOV64toSDZrr, X86::VMOVSDZrm_alt, TB_NO_REVERSE },
- { X86::VMOV64toSDrr, X86::VMOVSDrm_alt, TB_NO_REVERSE },
- { X86::VMOVAPDYrr, X86::VMOVAPDYrm, TB_ALIGN_32 },
- { X86::VMOVAPDZ128rr, X86::VMOVAPDZ128rm, TB_ALIGN_16 },
- { X86::VMOVAPDZ256rr, X86::VMOVAPDZ256rm, TB_ALIGN_32 },
- { X86::VMOVAPDZrr, X86::VMOVAPDZrm, TB_ALIGN_64 },
- { X86::VMOVAPDrr, X86::VMOVAPDrm, TB_ALIGN_16 },
- { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 },
- { X86::VMOVAPSZ128rr, X86::VMOVAPSZ128rm, TB_ALIGN_16 },
- { X86::VMOVAPSZ256rr, X86::VMOVAPSZ256rm, TB_ALIGN_32 },
- { X86::VMOVAPSZrr, X86::VMOVAPSZrm, TB_ALIGN_64 },
- { X86::VMOVAPSrr, X86::VMOVAPSrm, TB_ALIGN_16 },
- { X86::VMOVDDUPYrr, X86::VMOVDDUPYrm, 0 },
- { X86::VMOVDDUPZ128rr, X86::VMOVDDUPZ128rm, TB_NO_REVERSE },
- { X86::VMOVDDUPZ256rr, X86::VMOVDDUPZ256rm, 0 },
- { X86::VMOVDDUPZrr, X86::VMOVDDUPZrm, 0 },
- { X86::VMOVDDUPrr, X86::VMOVDDUPrm, TB_NO_REVERSE },
- { X86::VMOVDI2PDIZrr, X86::VMOVDI2PDIZrm, 0 },
- { X86::VMOVDI2PDIrr, X86::VMOVDI2PDIrm, 0 },
- { X86::VMOVDI2SSZrr, X86::VMOVSSZrm_alt, 0 },
- { X86::VMOVDI2SSrr, X86::VMOVSSrm_alt, 0 },
- { X86::VMOVDQA32Z128rr, X86::VMOVDQA32Z128rm, TB_ALIGN_16 },
- { X86::VMOVDQA32Z256rr, X86::VMOVDQA32Z256rm, TB_ALIGN_32 },
- { X86::VMOVDQA32Zrr, X86::VMOVDQA32Zrm, TB_ALIGN_64 },
- { X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rm, TB_ALIGN_16 },
- { X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rm, TB_ALIGN_32 },
- { X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrm, TB_ALIGN_64 },
- { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_32 },
- { X86::VMOVDQArr, X86::VMOVDQArm, TB_ALIGN_16 },
- { X86::VMOVDQU16Z128rr, X86::VMOVDQU16Z128rm, 0 },
- { X86::VMOVDQU16Z256rr, X86::VMOVDQU16Z256rm, 0 },
- { X86::VMOVDQU16Zrr, X86::VMOVDQU16Zrm, 0 },
- { X86::VMOVDQU32Z128rr, X86::VMOVDQU32Z128rm, 0 },
- { X86::VMOVDQU32Z256rr, X86::VMOVDQU32Z256rm, 0 },
- { X86::VMOVDQU32Zrr, X86::VMOVDQU32Zrm, 0 },
- { X86::VMOVDQU64Z128rr, X86::VMOVDQU64Z128rm, 0 },
- { X86::VMOVDQU64Z256rr, X86::VMOVDQU64Z256rm, 0 },
- { X86::VMOVDQU64Zrr, X86::VMOVDQU64Zrm, 0 },
- { X86::VMOVDQU8Z128rr, X86::VMOVDQU8Z128rm, 0 },
- { X86::VMOVDQU8Z256rr, X86::VMOVDQU8Z256rm, 0 },
- { X86::VMOVDQU8Zrr, X86::VMOVDQU8Zrm, 0 },
- { X86::VMOVDQUYrr, X86::VMOVDQUYrm, 0 },
- { X86::VMOVDQUrr, X86::VMOVDQUrm, 0 },
- { X86::VMOVSHDUPYrr, X86::VMOVSHDUPYrm, 0 },
- { X86::VMOVSHDUPZ128rr, X86::VMOVSHDUPZ128rm, 0 },
- { X86::VMOVSHDUPZ256rr, X86::VMOVSHDUPZ256rm, 0 },
- { X86::VMOVSHDUPZrr, X86::VMOVSHDUPZrm, 0 },
- { X86::VMOVSHDUPrr, X86::VMOVSHDUPrm, 0 },
- { X86::VMOVSLDUPYrr, X86::VMOVSLDUPYrm, 0 },
- { X86::VMOVSLDUPZ128rr, X86::VMOVSLDUPZ128rm, 0 },
- { X86::VMOVSLDUPZ256rr, X86::VMOVSLDUPZ256rm, 0 },
- { X86::VMOVSLDUPZrr, X86::VMOVSLDUPZrm, 0 },
- { X86::VMOVSLDUPrr, X86::VMOVSLDUPrm, 0 },
- { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 },
- { X86::VMOVUPDZ128rr, X86::VMOVUPDZ128rm, 0 },
- { X86::VMOVUPDZ256rr, X86::VMOVUPDZ256rm, 0 },
- { X86::VMOVUPDZrr, X86::VMOVUPDZrm, 0 },
- { X86::VMOVUPDrr, X86::VMOVUPDrm, 0 },
- { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 },
- { X86::VMOVUPSZ128rr, X86::VMOVUPSZ128rm, 0 },
- { X86::VMOVUPSZ256rr, X86::VMOVUPSZ256rm, 0 },
- { X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0 },
- { X86::VMOVUPSrr, X86::VMOVUPSrm, 0 },
- { X86::VMOVZPQILo2PQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE },
- { X86::VMOVZPQILo2PQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE },
- { X86::VPABSBYrr, X86::VPABSBYrm, 0 },
- { X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0 },
- { X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0 },
- { X86::VPABSBZrr, X86::VPABSBZrm, 0 },
- { X86::VPABSBrr, X86::VPABSBrm, 0 },
- { X86::VPABSDYrr, X86::VPABSDYrm, 0 },
- { X86::VPABSDZ128rr, X86::VPABSDZ128rm, 0 },
- { X86::VPABSDZ256rr, X86::VPABSDZ256rm, 0 },
- { X86::VPABSDZrr, X86::VPABSDZrm, 0 },
- { X86::VPABSDrr, X86::VPABSDrm, 0 },
- { X86::VPABSQZ128rr, X86::VPABSQZ128rm, 0 },
- { X86::VPABSQZ256rr, X86::VPABSQZ256rm, 0 },
- { X86::VPABSQZrr, X86::VPABSQZrm, 0 },
- { X86::VPABSWYrr, X86::VPABSWYrm, 0 },
- { X86::VPABSWZ128rr, X86::VPABSWZ128rm, 0 },
- { X86::VPABSWZ256rr, X86::VPABSWZ256rm, 0 },
- { X86::VPABSWZrr, X86::VPABSWZrm, 0 },
- { X86::VPABSWrr, X86::VPABSWrm, 0 },
- { X86::VPBROADCASTBYrr, X86::VPBROADCASTBYrm, TB_NO_REVERSE },
- { X86::VPBROADCASTBZ128rr, X86::VPBROADCASTBZ128rm, TB_NO_REVERSE },
- { X86::VPBROADCASTBZ256rr, X86::VPBROADCASTBZ256rm, TB_NO_REVERSE },
- { X86::VPBROADCASTBZrr, X86::VPBROADCASTBZrm, TB_NO_REVERSE },
- { X86::VPBROADCASTBrr , X86::VPBROADCASTBrm, TB_NO_REVERSE },
- { X86::VPBROADCASTDYrr, X86::VPBROADCASTDYrm, TB_NO_REVERSE },
- { X86::VPBROADCASTDZ128rr, X86::VPBROADCASTDZ128rm, TB_NO_REVERSE },
- { X86::VPBROADCASTDZ256rr, X86::VPBROADCASTDZ256rm, TB_NO_REVERSE },
- { X86::VPBROADCASTDZrr, X86::VPBROADCASTDZrm, TB_NO_REVERSE },
- { X86::VPBROADCASTDrr, X86::VPBROADCASTDrm, TB_NO_REVERSE },
- { X86::VPBROADCASTQYrr, X86::VPBROADCASTQYrm, TB_NO_REVERSE },
- { X86::VPBROADCASTQZ128rr, X86::VPBROADCASTQZ128rm, TB_NO_REVERSE },
- { X86::VPBROADCASTQZ256rr, X86::VPBROADCASTQZ256rm, TB_NO_REVERSE },
- { X86::VPBROADCASTQZrr, X86::VPBROADCASTQZrm, TB_NO_REVERSE },
- { X86::VPBROADCASTQrr, X86::VPBROADCASTQrm, TB_NO_REVERSE },
- { X86::VPBROADCASTWYrr, X86::VPBROADCASTWYrm, TB_NO_REVERSE },
- { X86::VPBROADCASTWZ128rr, X86::VPBROADCASTWZ128rm, TB_NO_REVERSE },
- { X86::VPBROADCASTWZ256rr, X86::VPBROADCASTWZ256rm, TB_NO_REVERSE },
- { X86::VPBROADCASTWZrr, X86::VPBROADCASTWZrm, TB_NO_REVERSE },
- { X86::VPBROADCASTWrr, X86::VPBROADCASTWrm, TB_NO_REVERSE },
- { X86::VPCMPESTRIrr, X86::VPCMPESTRIrm, 0 },
- { X86::VPCMPESTRMrr, X86::VPCMPESTRMrm, 0 },
- { X86::VPCMPISTRIrr, X86::VPCMPISTRIrm, 0 },
- { X86::VPCMPISTRMrr, X86::VPCMPISTRMrm, 0 },
- { X86::VPCONFLICTDZ128rr, X86::VPCONFLICTDZ128rm, 0 },
- { X86::VPCONFLICTDZ256rr, X86::VPCONFLICTDZ256rm, 0 },
- { X86::VPCONFLICTDZrr, X86::VPCONFLICTDZrm, 0 },
- { X86::VPCONFLICTQZ128rr, X86::VPCONFLICTQZ128rm, 0 },
- { X86::VPCONFLICTQZ256rr, X86::VPCONFLICTQZ256rm, 0 },
- { X86::VPCONFLICTQZrr, X86::VPCONFLICTQZrm, 0 },
- { X86::VPERMILPDYri, X86::VPERMILPDYmi, 0 },
- { X86::VPERMILPDZ128ri, X86::VPERMILPDZ128mi, 0 },
- { X86::VPERMILPDZ256ri, X86::VPERMILPDZ256mi, 0 },
- { X86::VPERMILPDZri, X86::VPERMILPDZmi, 0 },
- { X86::VPERMILPDri, X86::VPERMILPDmi, 0 },
- { X86::VPERMILPSYri, X86::VPERMILPSYmi, 0 },
- { X86::VPERMILPSZ128ri, X86::VPERMILPSZ128mi, 0 },
- { X86::VPERMILPSZ256ri, X86::VPERMILPSZ256mi, 0 },
- { X86::VPERMILPSZri, X86::VPERMILPSZmi, 0 },
- { X86::VPERMILPSri, X86::VPERMILPSmi, 0 },
- { X86::VPERMPDYri, X86::VPERMPDYmi, 0 },
- { X86::VPERMPDZ256ri, X86::VPERMPDZ256mi, 0 },
- { X86::VPERMPDZri, X86::VPERMPDZmi, 0 },
- { X86::VPERMQYri, X86::VPERMQYmi, 0 },
- { X86::VPERMQZ256ri, X86::VPERMQZ256mi, 0 },
- { X86::VPERMQZri, X86::VPERMQZmi, 0 },
- { X86::VPEXPANDBZ128rr, X86::VPEXPANDBZ128rm, TB_NO_REVERSE },
- { X86::VPEXPANDBZ256rr, X86::VPEXPANDBZ256rm, TB_NO_REVERSE },
- { X86::VPEXPANDBZrr, X86::VPEXPANDBZrm, TB_NO_REVERSE },
- { X86::VPEXPANDDZ128rr, X86::VPEXPANDDZ128rm, TB_NO_REVERSE },
- { X86::VPEXPANDDZ256rr, X86::VPEXPANDDZ256rm, TB_NO_REVERSE },
- { X86::VPEXPANDDZrr, X86::VPEXPANDDZrm, TB_NO_REVERSE },
- { X86::VPEXPANDQZ128rr, X86::VPEXPANDQZ128rm, TB_NO_REVERSE },
- { X86::VPEXPANDQZ256rr, X86::VPEXPANDQZ256rm, TB_NO_REVERSE },
- { X86::VPEXPANDQZrr, X86::VPEXPANDQZrm, TB_NO_REVERSE },
- { X86::VPEXPANDWZ128rr, X86::VPEXPANDWZ128rm, TB_NO_REVERSE },
- { X86::VPEXPANDWZ256rr, X86::VPEXPANDWZ256rm, TB_NO_REVERSE },
- { X86::VPEXPANDWZrr, X86::VPEXPANDWZrm, TB_NO_REVERSE },
- { X86::VPHADDBDrr, X86::VPHADDBDrm, 0 },
- { X86::VPHADDBQrr, X86::VPHADDBQrm, 0 },
- { X86::VPHADDBWrr, X86::VPHADDBWrm, 0 },
- { X86::VPHADDDQrr, X86::VPHADDDQrm, 0 },
- { X86::VPHADDUBDrr, X86::VPHADDUBDrm, 0 },
- { X86::VPHADDUBQrr, X86::VPHADDUBQrm, 0 },
- { X86::VPHADDUBWrr, X86::VPHADDUBWrm, 0 },
- { X86::VPHADDUDQrr, X86::VPHADDUDQrm, 0 },
- { X86::VPHADDUWDrr, X86::VPHADDUWDrm, 0 },
- { X86::VPHADDUWQrr, X86::VPHADDUWQrm, 0 },
- { X86::VPHADDWDrr, X86::VPHADDWDrm, 0 },
- { X86::VPHADDWQrr, X86::VPHADDWQrm, 0 },
- { X86::VPHMINPOSUWrr, X86::VPHMINPOSUWrm, 0 },
- { X86::VPHSUBBWrr, X86::VPHSUBBWrm, 0 },
- { X86::VPHSUBDQrr, X86::VPHSUBDQrm, 0 },
- { X86::VPHSUBWDrr, X86::VPHSUBWDrm, 0 },
- { X86::VPLZCNTDZ128rr, X86::VPLZCNTDZ128rm, 0 },
- { X86::VPLZCNTDZ256rr, X86::VPLZCNTDZ256rm, 0 },
- { X86::VPLZCNTDZrr, X86::VPLZCNTDZrm, 0 },
- { X86::VPLZCNTQZ128rr, X86::VPLZCNTQZ128rm, 0 },
- { X86::VPLZCNTQZ256rr, X86::VPLZCNTQZ256rm, 0 },
- { X86::VPLZCNTQZrr, X86::VPLZCNTQZrm, 0 },
- { X86::VPMOVSXBDYrr, X86::VPMOVSXBDYrm, TB_NO_REVERSE },
- { X86::VPMOVSXBDZ128rr, X86::VPMOVSXBDZ128rm, TB_NO_REVERSE },
- { X86::VPMOVSXBDZ256rr, X86::VPMOVSXBDZ256rm, TB_NO_REVERSE },
- { X86::VPMOVSXBDZrr, X86::VPMOVSXBDZrm, 0 },
- { X86::VPMOVSXBDrr, X86::VPMOVSXBDrm, TB_NO_REVERSE },
- { X86::VPMOVSXBQYrr, X86::VPMOVSXBQYrm, TB_NO_REVERSE },
- { X86::VPMOVSXBQZ128rr, X86::VPMOVSXBQZ128rm, TB_NO_REVERSE },
- { X86::VPMOVSXBQZ256rr, X86::VPMOVSXBQZ256rm, TB_NO_REVERSE },
- { X86::VPMOVSXBQZrr, X86::VPMOVSXBQZrm, TB_NO_REVERSE },
- { X86::VPMOVSXBQrr, X86::VPMOVSXBQrm, TB_NO_REVERSE },
- { X86::VPMOVSXBWYrr, X86::VPMOVSXBWYrm, 0 },
- { X86::VPMOVSXBWZ128rr, X86::VPMOVSXBWZ128rm, TB_NO_REVERSE },
- { X86::VPMOVSXBWZ256rr, X86::VPMOVSXBWZ256rm, 0 },
- { X86::VPMOVSXBWZrr, X86::VPMOVSXBWZrm, 0 },
- { X86::VPMOVSXBWrr, X86::VPMOVSXBWrm, TB_NO_REVERSE },
- { X86::VPMOVSXDQYrr, X86::VPMOVSXDQYrm, 0 },
- { X86::VPMOVSXDQZ128rr, X86::VPMOVSXDQZ128rm, TB_NO_REVERSE },
- { X86::VPMOVSXDQZ256rr, X86::VPMOVSXDQZ256rm, 0 },
- { X86::VPMOVSXDQZrr, X86::VPMOVSXDQZrm, 0 },
- { X86::VPMOVSXDQrr, X86::VPMOVSXDQrm, TB_NO_REVERSE },
- { X86::VPMOVSXWDYrr, X86::VPMOVSXWDYrm, 0 },
- { X86::VPMOVSXWDZ128rr, X86::VPMOVSXWDZ128rm, TB_NO_REVERSE },
- { X86::VPMOVSXWDZ256rr, X86::VPMOVSXWDZ256rm, 0 },
- { X86::VPMOVSXWDZrr, X86::VPMOVSXWDZrm, 0 },
- { X86::VPMOVSXWDrr, X86::VPMOVSXWDrm, TB_NO_REVERSE },
- { X86::VPMOVSXWQYrr, X86::VPMOVSXWQYrm, TB_NO_REVERSE },
- { X86::VPMOVSXWQZ128rr, X86::VPMOVSXWQZ128rm, TB_NO_REVERSE },
- { X86::VPMOVSXWQZ256rr, X86::VPMOVSXWQZ256rm, TB_NO_REVERSE },
- { X86::VPMOVSXWQZrr, X86::VPMOVSXWQZrm, 0 },
- { X86::VPMOVSXWQrr, X86::VPMOVSXWQrm, TB_NO_REVERSE },
- { X86::VPMOVZXBDYrr, X86::VPMOVZXBDYrm, TB_NO_REVERSE },
- { X86::VPMOVZXBDZ128rr, X86::VPMOVZXBDZ128rm, TB_NO_REVERSE },
- { X86::VPMOVZXBDZ256rr, X86::VPMOVZXBDZ256rm, TB_NO_REVERSE },
- { X86::VPMOVZXBDZrr, X86::VPMOVZXBDZrm, 0 },
- { X86::VPMOVZXBDrr, X86::VPMOVZXBDrm, TB_NO_REVERSE },
- { X86::VPMOVZXBQYrr, X86::VPMOVZXBQYrm, TB_NO_REVERSE },
- { X86::VPMOVZXBQZ128rr, X86::VPMOVZXBQZ128rm, TB_NO_REVERSE },
- { X86::VPMOVZXBQZ256rr, X86::VPMOVZXBQZ256rm, TB_NO_REVERSE },
- { X86::VPMOVZXBQZrr, X86::VPMOVZXBQZrm, TB_NO_REVERSE },
- { X86::VPMOVZXBQrr, X86::VPMOVZXBQrm, TB_NO_REVERSE },
- { X86::VPMOVZXBWYrr, X86::VPMOVZXBWYrm, 0 },
- { X86::VPMOVZXBWZ128rr, X86::VPMOVZXBWZ128rm, TB_NO_REVERSE },
- { X86::VPMOVZXBWZ256rr, X86::VPMOVZXBWZ256rm, 0 },
- { X86::VPMOVZXBWZrr, X86::VPMOVZXBWZrm, 0 },
- { X86::VPMOVZXBWrr, X86::VPMOVZXBWrm, TB_NO_REVERSE },
- { X86::VPMOVZXDQYrr, X86::VPMOVZXDQYrm, 0 },
- { X86::VPMOVZXDQZ128rr, X86::VPMOVZXDQZ128rm, TB_NO_REVERSE },
- { X86::VPMOVZXDQZ256rr, X86::VPMOVZXDQZ256rm, 0 },
- { X86::VPMOVZXDQZrr, X86::VPMOVZXDQZrm, 0 },
- { X86::VPMOVZXDQrr, X86::VPMOVZXDQrm, TB_NO_REVERSE },
- { X86::VPMOVZXWDYrr, X86::VPMOVZXWDYrm, 0 },
- { X86::VPMOVZXWDZ128rr, X86::VPMOVZXWDZ128rm, TB_NO_REVERSE },
- { X86::VPMOVZXWDZ256rr, X86::VPMOVZXWDZ256rm, 0 },
- { X86::VPMOVZXWDZrr, X86::VPMOVZXWDZrm, 0 },
- { X86::VPMOVZXWDrr, X86::VPMOVZXWDrm, TB_NO_REVERSE },
- { X86::VPMOVZXWQYrr, X86::VPMOVZXWQYrm, TB_NO_REVERSE },
- { X86::VPMOVZXWQZ128rr, X86::VPMOVZXWQZ128rm, TB_NO_REVERSE },
- { X86::VPMOVZXWQZ256rr, X86::VPMOVZXWQZ256rm, TB_NO_REVERSE },
- { X86::VPMOVZXWQZrr, X86::VPMOVZXWQZrm, 0 },
- { X86::VPMOVZXWQrr, X86::VPMOVZXWQrm, TB_NO_REVERSE },
- { X86::VPOPCNTBZ128rr, X86::VPOPCNTBZ128rm, 0 },
- { X86::VPOPCNTBZ256rr, X86::VPOPCNTBZ256rm, 0 },
- { X86::VPOPCNTBZrr, X86::VPOPCNTBZrm, 0 },
- { X86::VPOPCNTDZ128rr, X86::VPOPCNTDZ128rm, 0 },
- { X86::VPOPCNTDZ256rr, X86::VPOPCNTDZ256rm, 0 },
- { X86::VPOPCNTDZrr, X86::VPOPCNTDZrm, 0 },
- { X86::VPOPCNTQZ128rr, X86::VPOPCNTQZ128rm, 0 },
- { X86::VPOPCNTQZ256rr, X86::VPOPCNTQZ256rm, 0 },
- { X86::VPOPCNTQZrr, X86::VPOPCNTQZrm, 0 },
- { X86::VPOPCNTWZ128rr, X86::VPOPCNTWZ128rm, 0 },
- { X86::VPOPCNTWZ256rr, X86::VPOPCNTWZ256rm, 0 },
- { X86::VPOPCNTWZrr, X86::VPOPCNTWZrm, 0 },
- { X86::VPROLDZ128ri, X86::VPROLDZ128mi, 0 },
- { X86::VPROLDZ256ri, X86::VPROLDZ256mi, 0 },
- { X86::VPROLDZri, X86::VPROLDZmi, 0 },
- { X86::VPROLQZ128ri, X86::VPROLQZ128mi, 0 },
- { X86::VPROLQZ256ri, X86::VPROLQZ256mi, 0 },
- { X86::VPROLQZri, X86::VPROLQZmi, 0 },
- { X86::VPRORDZ128ri, X86::VPRORDZ128mi, 0 },
- { X86::VPRORDZ256ri, X86::VPRORDZ256mi, 0 },
- { X86::VPRORDZri, X86::VPRORDZmi, 0 },
- { X86::VPRORQZ128ri, X86::VPRORQZ128mi, 0 },
- { X86::VPRORQZ256ri, X86::VPRORQZ256mi, 0 },
- { X86::VPRORQZri, X86::VPRORQZmi, 0 },
- { X86::VPROTBri, X86::VPROTBmi, 0 },
- { X86::VPROTBrr, X86::VPROTBmr, 0 },
- { X86::VPROTDri, X86::VPROTDmi, 0 },
- { X86::VPROTDrr, X86::VPROTDmr, 0 },
- { X86::VPROTQri, X86::VPROTQmi, 0 },
- { X86::VPROTQrr, X86::VPROTQmr, 0 },
- { X86::VPROTWri, X86::VPROTWmi, 0 },
- { X86::VPROTWrr, X86::VPROTWmr, 0 },
- { X86::VPSHABrr, X86::VPSHABmr, 0 },
- { X86::VPSHADrr, X86::VPSHADmr, 0 },
- { X86::VPSHAQrr, X86::VPSHAQmr, 0 },
- { X86::VPSHAWrr, X86::VPSHAWmr, 0 },
- { X86::VPSHLBrr, X86::VPSHLBmr, 0 },
- { X86::VPSHLDrr, X86::VPSHLDmr, 0 },
- { X86::VPSHLQrr, X86::VPSHLQmr, 0 },
- { X86::VPSHLWrr, X86::VPSHLWmr, 0 },
- { X86::VPSHUFDYri, X86::VPSHUFDYmi, 0 },
- { X86::VPSHUFDZ128ri, X86::VPSHUFDZ128mi, 0 },
- { X86::VPSHUFDZ256ri, X86::VPSHUFDZ256mi, 0 },
- { X86::VPSHUFDZri, X86::VPSHUFDZmi, 0 },
- { X86::VPSHUFDri, X86::VPSHUFDmi, 0 },
- { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, 0 },
- { X86::VPSHUFHWZ128ri, X86::VPSHUFHWZ128mi, 0 },
- { X86::VPSHUFHWZ256ri, X86::VPSHUFHWZ256mi, 0 },
- { X86::VPSHUFHWZri, X86::VPSHUFHWZmi, 0 },
- { X86::VPSHUFHWri, X86::VPSHUFHWmi, 0 },
- { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, 0 },
- { X86::VPSHUFLWZ128ri, X86::VPSHUFLWZ128mi, 0 },
- { X86::VPSHUFLWZ256ri, X86::VPSHUFLWZ256mi, 0 },
- { X86::VPSHUFLWZri, X86::VPSHUFLWZmi, 0 },
- { X86::VPSHUFLWri, X86::VPSHUFLWmi, 0 },
- { X86::VPSLLDQZ128ri, X86::VPSLLDQZ128mi, 0 },
- { X86::VPSLLDQZ256ri, X86::VPSLLDQZ256mi, 0 },
- { X86::VPSLLDQZri, X86::VPSLLDQZmi, 0 },
- { X86::VPSLLDZ128ri, X86::VPSLLDZ128mi, 0 },
- { X86::VPSLLDZ256ri, X86::VPSLLDZ256mi, 0 },
- { X86::VPSLLDZri, X86::VPSLLDZmi, 0 },
- { X86::VPSLLQZ128ri, X86::VPSLLQZ128mi, 0 },
- { X86::VPSLLQZ256ri, X86::VPSLLQZ256mi, 0 },
- { X86::VPSLLQZri, X86::VPSLLQZmi, 0 },
- { X86::VPSLLWZ128ri, X86::VPSLLWZ128mi, 0 },
- { X86::VPSLLWZ256ri, X86::VPSLLWZ256mi, 0 },
- { X86::VPSLLWZri, X86::VPSLLWZmi, 0 },
- { X86::VPSRADZ128ri, X86::VPSRADZ128mi, 0 },
- { X86::VPSRADZ256ri, X86::VPSRADZ256mi, 0 },
- { X86::VPSRADZri, X86::VPSRADZmi, 0 },
- { X86::VPSRAQZ128ri, X86::VPSRAQZ128mi, 0 },
- { X86::VPSRAQZ256ri, X86::VPSRAQZ256mi, 0 },
- { X86::VPSRAQZri, X86::VPSRAQZmi, 0 },
- { X86::VPSRAWZ128ri, X86::VPSRAWZ128mi, 0 },
- { X86::VPSRAWZ256ri, X86::VPSRAWZ256mi, 0 },
- { X86::VPSRAWZri, X86::VPSRAWZmi, 0 },
- { X86::VPSRLDQZ128ri, X86::VPSRLDQZ128mi, 0 },
- { X86::VPSRLDQZ256ri, X86::VPSRLDQZ256mi, 0 },
- { X86::VPSRLDQZri, X86::VPSRLDQZmi, 0 },
- { X86::VPSRLDZ128ri, X86::VPSRLDZ128mi, 0 },
- { X86::VPSRLDZ256ri, X86::VPSRLDZ256mi, 0 },
- { X86::VPSRLDZri, X86::VPSRLDZmi, 0 },
- { X86::VPSRLQZ128ri, X86::VPSRLQZ128mi, 0 },
- { X86::VPSRLQZ256ri, X86::VPSRLQZ256mi, 0 },
- { X86::VPSRLQZri, X86::VPSRLQZmi, 0 },
- { X86::VPSRLWZ128ri, X86::VPSRLWZ128mi, 0 },
- { X86::VPSRLWZ256ri, X86::VPSRLWZ256mi, 0 },
- { X86::VPSRLWZri, X86::VPSRLWZmi, 0 },
- { X86::VPTESTYrr, X86::VPTESTYrm, 0 },
- { X86::VPTESTrr, X86::VPTESTrm, 0 },
- { X86::VRCP14PDZ128r, X86::VRCP14PDZ128m, 0 },
- { X86::VRCP14PDZ256r, X86::VRCP14PDZ256m, 0 },
- { X86::VRCP14PDZr, X86::VRCP14PDZm, 0 },
- { X86::VRCP14PSZ128r, X86::VRCP14PSZ128m, 0 },
- { X86::VRCP14PSZ256r, X86::VRCP14PSZ256m, 0 },
- { X86::VRCP14PSZr, X86::VRCP14PSZm, 0 },
- { X86::VRCP28PDZr, X86::VRCP28PDZm, 0 },
- { X86::VRCP28PSZr, X86::VRCP28PSZm, 0 },
- { X86::VRCPPHZ128r, X86::VRCPPHZ128m, 0 },
- { X86::VRCPPHZ256r, X86::VRCPPHZ256m, 0 },
- { X86::VRCPPHZr, X86::VRCPPHZm, 0 },
- { X86::VRCPPSYr, X86::VRCPPSYm, 0 },
- { X86::VRCPPSr, X86::VRCPPSm, 0 },
- { X86::VREDUCEPDZ128rri, X86::VREDUCEPDZ128rmi, 0 },
- { X86::VREDUCEPDZ256rri, X86::VREDUCEPDZ256rmi, 0 },
- { X86::VREDUCEPDZrri, X86::VREDUCEPDZrmi, 0 },
- { X86::VREDUCEPHZ128rri, X86::VREDUCEPHZ128rmi, 0 },
- { X86::VREDUCEPHZ256rri, X86::VREDUCEPHZ256rmi, 0 },
- { X86::VREDUCEPHZrri, X86::VREDUCEPHZrmi, 0 },
- { X86::VREDUCEPSZ128rri, X86::VREDUCEPSZ128rmi, 0 },
- { X86::VREDUCEPSZ256rri, X86::VREDUCEPSZ256rmi, 0 },
- { X86::VREDUCEPSZrri, X86::VREDUCEPSZrmi, 0 },
- { X86::VRNDSCALEPDZ128rri, X86::VRNDSCALEPDZ128rmi, 0 },
- { X86::VRNDSCALEPDZ256rri, X86::VRNDSCALEPDZ256rmi, 0 },
- { X86::VRNDSCALEPDZrri, X86::VRNDSCALEPDZrmi, 0 },
- { X86::VRNDSCALEPHZ128rri, X86::VRNDSCALEPHZ128rmi, 0 },
- { X86::VRNDSCALEPHZ256rri, X86::VRNDSCALEPHZ256rmi, 0 },
- { X86::VRNDSCALEPHZrri, X86::VRNDSCALEPHZrmi, 0 },
- { X86::VRNDSCALEPSZ128rri, X86::VRNDSCALEPSZ128rmi, 0 },
- { X86::VRNDSCALEPSZ256rri, X86::VRNDSCALEPSZ256rmi, 0 },
- { X86::VRNDSCALEPSZrri, X86::VRNDSCALEPSZrmi, 0 },
- { X86::VROUNDPDYr, X86::VROUNDPDYm, 0 },
- { X86::VROUNDPDr, X86::VROUNDPDm, 0 },
- { X86::VROUNDPSYr, X86::VROUNDPSYm, 0 },
- { X86::VROUNDPSr, X86::VROUNDPSm, 0 },
- { X86::VRSQRT14PDZ128r, X86::VRSQRT14PDZ128m, 0 },
- { X86::VRSQRT14PDZ256r, X86::VRSQRT14PDZ256m, 0 },
- { X86::VRSQRT14PDZr, X86::VRSQRT14PDZm, 0 },
- { X86::VRSQRT14PSZ128r, X86::VRSQRT14PSZ128m, 0 },
- { X86::VRSQRT14PSZ256r, X86::VRSQRT14PSZ256m, 0 },
- { X86::VRSQRT14PSZr, X86::VRSQRT14PSZm, 0 },
- { X86::VRSQRT28PDZr, X86::VRSQRT28PDZm, 0 },
- { X86::VRSQRT28PSZr, X86::VRSQRT28PSZm, 0 },
- { X86::VRSQRTPHZ128r, X86::VRSQRTPHZ128m, 0 },
- { X86::VRSQRTPHZ256r, X86::VRSQRTPHZ256m, 0 },
- { X86::VRSQRTPHZr, X86::VRSQRTPHZm, 0 },
- { X86::VRSQRTPSYr, X86::VRSQRTPSYm, 0 },
- { X86::VRSQRTPSr, X86::VRSQRTPSm, 0 },
- { X86::VSQRTPDYr, X86::VSQRTPDYm, 0 },
- { X86::VSQRTPDZ128r, X86::VSQRTPDZ128m, 0 },
- { X86::VSQRTPDZ256r, X86::VSQRTPDZ256m, 0 },
- { X86::VSQRTPDZr, X86::VSQRTPDZm, 0 },
- { X86::VSQRTPDr, X86::VSQRTPDm, 0 },
- { X86::VSQRTPHZ128r, X86::VSQRTPHZ128m, 0 },
- { X86::VSQRTPHZ256r, X86::VSQRTPHZ256m, 0 },
- { X86::VSQRTPHZr, X86::VSQRTPHZm, 0 },
- { X86::VSQRTPSYr, X86::VSQRTPSYm, 0 },
- { X86::VSQRTPSZ128r, X86::VSQRTPSZ128m, 0 },
- { X86::VSQRTPSZ256r, X86::VSQRTPSZ256m, 0 },
- { X86::VSQRTPSZr, X86::VSQRTPSZm, 0 },
- { X86::VSQRTPSr, X86::VSQRTPSm, 0 },
- { X86::VTESTPDYrr, X86::VTESTPDYrm, 0 },
- { X86::VTESTPDrr, X86::VTESTPDrm, 0 },
- { X86::VTESTPSYrr, X86::VTESTPSYrm, 0 },
- { X86::VTESTPSrr, X86::VTESTPSrm, 0 },
- { X86::VUCOMISDZrr, X86::VUCOMISDZrm, 0 },
- { X86::VUCOMISDZrr_Int, X86::VUCOMISDZrm_Int, TB_NO_REVERSE },
- { X86::VUCOMISDrr, X86::VUCOMISDrm, 0 },
- { X86::VUCOMISDrr_Int, X86::VUCOMISDrm_Int, TB_NO_REVERSE },
- { X86::VUCOMISSZrr, X86::VUCOMISSZrm, 0 },
- { X86::VUCOMISSZrr_Int, X86::VUCOMISSZrm_Int, TB_NO_REVERSE },
- { X86::VUCOMISSrr, X86::VUCOMISSrm, 0 },
- { X86::VUCOMISSrr_Int, X86::VUCOMISSrm_Int, TB_NO_REVERSE },
-};
-
-static const X86MemoryFoldTableEntry MemoryFoldTable2[] = {
- { X86::ADD16rr_DB, X86::ADD16rm, TB_NO_REVERSE },
- { X86::ADD32rr_DB, X86::ADD32rm, TB_NO_REVERSE },
- { X86::ADD64rr_DB, X86::ADD64rm, TB_NO_REVERSE },
- { X86::ADD8rr_DB, X86::ADD8rm, TB_NO_REVERSE },
- { X86::ADC16rr, X86::ADC16rm, 0 },
- { X86::ADC32rr, X86::ADC32rm, 0 },
- { X86::ADC64rr, X86::ADC64rm, 0 },
- { X86::ADC8rr, X86::ADC8rm, 0 },
- { X86::ADCX32rr, X86::ADCX32rm, 0 },
- { X86::ADCX64rr, X86::ADCX64rm, 0 },
- { X86::ADD16rr, X86::ADD16rm, 0 },
- { X86::ADD32rr, X86::ADD32rm, 0 },
- { X86::ADD64rr, X86::ADD64rm, 0 },
- { X86::ADD8rr, X86::ADD8rm, 0 },
- { X86::ADDPDrr, X86::ADDPDrm, TB_ALIGN_16 },
- { X86::ADDPSrr, X86::ADDPSrm, TB_ALIGN_16 },
- { X86::ADDSDrr, X86::ADDSDrm, 0 },
- { X86::ADDSDrr_Int, X86::ADDSDrm_Int, TB_NO_REVERSE },
- { X86::ADDSSrr, X86::ADDSSrm, 0 },
- { X86::ADDSSrr_Int, X86::ADDSSrm_Int, TB_NO_REVERSE },
- { X86::ADDSUBPDrr, X86::ADDSUBPDrm, TB_ALIGN_16 },
- { X86::ADDSUBPSrr, X86::ADDSUBPSrm, TB_ALIGN_16 },
- { X86::ADOX32rr, X86::ADOX32rm, 0 },
- { X86::ADOX64rr, X86::ADOX64rm, 0 },
- { X86::AESDECLASTrr, X86::AESDECLASTrm, TB_ALIGN_16 },
- { X86::AESDECrr, X86::AESDECrm, TB_ALIGN_16 },
- { X86::AESENCLASTrr, X86::AESENCLASTrm, TB_ALIGN_16 },
- { X86::AESENCrr, X86::AESENCrm, TB_ALIGN_16 },
- { X86::AND16rr, X86::AND16rm, 0 },
- { X86::AND32rr, X86::AND32rm, 0 },
- { X86::AND64rr, X86::AND64rm, 0 },
- { X86::AND8rr, X86::AND8rm, 0 },
- { X86::ANDN32rr, X86::ANDN32rm, 0 },
- { X86::ANDN64rr, X86::ANDN64rm, 0 },
- { X86::ANDNPDrr, X86::ANDNPDrm, TB_ALIGN_16 },
- { X86::ANDNPSrr, X86::ANDNPSrm, TB_ALIGN_16 },
- { X86::ANDPDrr, X86::ANDPDrm, TB_ALIGN_16 },
- { X86::ANDPSrr, X86::ANDPSrm, TB_ALIGN_16 },
- { X86::BLENDPDrri, X86::BLENDPDrmi, TB_ALIGN_16 },
- { X86::BLENDPSrri, X86::BLENDPSrmi, TB_ALIGN_16 },
- { X86::BLENDVPDrr0, X86::BLENDVPDrm0, TB_ALIGN_16 },
- { X86::BLENDVPSrr0, X86::BLENDVPSrm0, TB_ALIGN_16 },
- { X86::CMOV16rr, X86::CMOV16rm, 0 },
- { X86::CMOV32rr, X86::CMOV32rm, 0 },
- { X86::CMOV64rr, X86::CMOV64rm, 0 },
- { X86::CMPPDrri, X86::CMPPDrmi, TB_ALIGN_16 },
- { X86::CMPPSrri, X86::CMPPSrmi, TB_ALIGN_16 },
- { X86::CMPSDrr, X86::CMPSDrm, 0 },
- { X86::CMPSDrr_Int, X86::CMPSDrm_Int, TB_NO_REVERSE },
- { X86::CMPSSrr, X86::CMPSSrm, 0 },
- { X86::CMPSSrr_Int, X86::CMPSSrm_Int, TB_NO_REVERSE },
- { X86::CRC32r32r16, X86::CRC32r32m16, 0 },
- { X86::CRC32r32r32, X86::CRC32r32m32, 0 },
- { X86::CRC32r32r8, X86::CRC32r32m8, 0 },
- { X86::CRC32r64r64, X86::CRC32r64m64, 0 },
- { X86::CRC32r64r8, X86::CRC32r64m8, 0 },
- { X86::CVTSD2SSrr_Int, X86::CVTSD2SSrm_Int, TB_NO_REVERSE },
- { X86::CVTSI2SDrr_Int, X86::CVTSI2SDrm_Int, 0 },
- { X86::CVTSI2SSrr_Int, X86::CVTSI2SSrm_Int, 0 },
- { X86::CVTSI642SDrr_Int, X86::CVTSI642SDrm_Int, 0 },
- { X86::CVTSI642SSrr_Int, X86::CVTSI642SSrm_Int, 0 },
- { X86::CVTSS2SDrr_Int, X86::CVTSS2SDrm_Int, TB_NO_REVERSE },
- { X86::DIVPDrr, X86::DIVPDrm, TB_ALIGN_16 },
- { X86::DIVPSrr, X86::DIVPSrm, TB_ALIGN_16 },
- { X86::DIVSDrr, X86::DIVSDrm, 0 },
- { X86::DIVSDrr_Int, X86::DIVSDrm_Int, TB_NO_REVERSE },
- { X86::DIVSSrr, X86::DIVSSrm, 0 },
- { X86::DIVSSrr_Int, X86::DIVSSrm_Int, TB_NO_REVERSE },
- { X86::DPPDrri, X86::DPPDrmi, TB_ALIGN_16 },
- { X86::DPPSrri, X86::DPPSrmi, TB_ALIGN_16 },
- { X86::GF2P8AFFINEINVQBrri, X86::GF2P8AFFINEINVQBrmi, TB_ALIGN_16 },
- { X86::GF2P8AFFINEQBrri, X86::GF2P8AFFINEQBrmi, TB_ALIGN_16 },
- { X86::GF2P8MULBrr, X86::GF2P8MULBrm, TB_ALIGN_16 },
- { X86::HADDPDrr, X86::HADDPDrm, TB_ALIGN_16 },
- { X86::HADDPSrr, X86::HADDPSrm, TB_ALIGN_16 },
- { X86::HSUBPDrr, X86::HSUBPDrm, TB_ALIGN_16 },
- { X86::HSUBPSrr, X86::HSUBPSrm, TB_ALIGN_16 },
- { X86::IMUL16rr, X86::IMUL16rm, 0 },
- { X86::IMUL32rr, X86::IMUL32rm, 0 },
- { X86::IMUL64rr, X86::IMUL64rm, 0 },
- { X86::MAXCPDrr, X86::MAXCPDrm, TB_ALIGN_16 },
- { X86::MAXCPSrr, X86::MAXCPSrm, TB_ALIGN_16 },
- { X86::MAXCSDrr, X86::MAXCSDrm, 0 },
- { X86::MAXCSSrr, X86::MAXCSSrm, 0 },
- { X86::MAXPDrr, X86::MAXPDrm, TB_ALIGN_16 },
- { X86::MAXPSrr, X86::MAXPSrm, TB_ALIGN_16 },
- { X86::MAXSDrr, X86::MAXSDrm, 0 },
- { X86::MAXSDrr_Int, X86::MAXSDrm_Int, TB_NO_REVERSE },
- { X86::MAXSSrr, X86::MAXSSrm, 0 },
- { X86::MAXSSrr_Int, X86::MAXSSrm_Int, TB_NO_REVERSE },
- { X86::MINCPDrr, X86::MINCPDrm, TB_ALIGN_16 },
- { X86::MINCPSrr, X86::MINCPSrm, TB_ALIGN_16 },
- { X86::MINCSDrr, X86::MINCSDrm, 0 },
- { X86::MINCSSrr, X86::MINCSSrm, 0 },
- { X86::MINPDrr, X86::MINPDrm, TB_ALIGN_16 },
- { X86::MINPSrr, X86::MINPSrm, TB_ALIGN_16 },
- { X86::MINSDrr, X86::MINSDrm, 0 },
- { X86::MINSDrr_Int, X86::MINSDrm_Int, TB_NO_REVERSE },
- { X86::MINSSrr, X86::MINSSrm, 0 },
- { X86::MINSSrr_Int, X86::MINSSrm_Int, TB_NO_REVERSE },
- { X86::MMX_CVTPI2PSrr, X86::MMX_CVTPI2PSrm, 0 },
- { X86::MMX_PACKSSDWrr, X86::MMX_PACKSSDWrm, 0 },
- { X86::MMX_PACKSSWBrr, X86::MMX_PACKSSWBrm, 0 },
- { X86::MMX_PACKUSWBrr, X86::MMX_PACKUSWBrm, 0 },
- { X86::MMX_PADDBrr, X86::MMX_PADDBrm, 0 },
- { X86::MMX_PADDDrr, X86::MMX_PADDDrm, 0 },
- { X86::MMX_PADDQrr, X86::MMX_PADDQrm, 0 },
- { X86::MMX_PADDSBrr, X86::MMX_PADDSBrm, 0 },
- { X86::MMX_PADDSWrr, X86::MMX_PADDSWrm, 0 },
- { X86::MMX_PADDUSBrr, X86::MMX_PADDUSBrm, 0 },
- { X86::MMX_PADDUSWrr, X86::MMX_PADDUSWrm, 0 },
- { X86::MMX_PADDWrr, X86::MMX_PADDWrm, 0 },
- { X86::MMX_PALIGNRrri, X86::MMX_PALIGNRrmi, 0 },
- { X86::MMX_PANDNrr, X86::MMX_PANDNrm, 0 },
- { X86::MMX_PANDrr, X86::MMX_PANDrm, 0 },
- { X86::MMX_PAVGBrr, X86::MMX_PAVGBrm, 0 },
- { X86::MMX_PAVGWrr, X86::MMX_PAVGWrm, 0 },
- { X86::MMX_PCMPEQBrr, X86::MMX_PCMPEQBrm, 0 },
- { X86::MMX_PCMPEQDrr, X86::MMX_PCMPEQDrm, 0 },
- { X86::MMX_PCMPEQWrr, X86::MMX_PCMPEQWrm, 0 },
- { X86::MMX_PCMPGTBrr, X86::MMX_PCMPGTBrm, 0 },
- { X86::MMX_PCMPGTDrr, X86::MMX_PCMPGTDrm, 0 },
- { X86::MMX_PCMPGTWrr, X86::MMX_PCMPGTWrm, 0 },
- { X86::MMX_PHADDDrr, X86::MMX_PHADDDrm, 0 },
- { X86::MMX_PHADDSWrr, X86::MMX_PHADDSWrm, 0 },
- { X86::MMX_PHADDWrr, X86::MMX_PHADDWrm, 0 },
- { X86::MMX_PHSUBDrr, X86::MMX_PHSUBDrm, 0 },
- { X86::MMX_PHSUBSWrr, X86::MMX_PHSUBSWrm, 0 },
- { X86::MMX_PHSUBWrr, X86::MMX_PHSUBWrm, 0 },
- { X86::MMX_PINSRWrr, X86::MMX_PINSRWrm, TB_NO_REVERSE },
- { X86::MMX_PMADDUBSWrr, X86::MMX_PMADDUBSWrm, 0 },
- { X86::MMX_PMADDWDrr, X86::MMX_PMADDWDrm, 0 },
- { X86::MMX_PMAXSWrr, X86::MMX_PMAXSWrm, 0 },
- { X86::MMX_PMAXUBrr, X86::MMX_PMAXUBrm, 0 },
- { X86::MMX_PMINSWrr, X86::MMX_PMINSWrm, 0 },
- { X86::MMX_PMINUBrr, X86::MMX_PMINUBrm, 0 },
- { X86::MMX_PMULHRSWrr, X86::MMX_PMULHRSWrm, 0 },
- { X86::MMX_PMULHUWrr, X86::MMX_PMULHUWrm, 0 },
- { X86::MMX_PMULHWrr, X86::MMX_PMULHWrm, 0 },
- { X86::MMX_PMULLWrr, X86::MMX_PMULLWrm, 0 },
- { X86::MMX_PMULUDQrr, X86::MMX_PMULUDQrm, 0 },
- { X86::MMX_PORrr, X86::MMX_PORrm, 0 },
- { X86::MMX_PSADBWrr, X86::MMX_PSADBWrm, 0 },
- { X86::MMX_PSHUFBrr, X86::MMX_PSHUFBrm, 0 },
- { X86::MMX_PSIGNBrr, X86::MMX_PSIGNBrm, 0 },
- { X86::MMX_PSIGNDrr, X86::MMX_PSIGNDrm, 0 },
- { X86::MMX_PSIGNWrr, X86::MMX_PSIGNWrm, 0 },
- { X86::MMX_PSLLDrr, X86::MMX_PSLLDrm, 0 },
- { X86::MMX_PSLLQrr, X86::MMX_PSLLQrm, 0 },
- { X86::MMX_PSLLWrr, X86::MMX_PSLLWrm, 0 },
- { X86::MMX_PSRADrr, X86::MMX_PSRADrm, 0 },
- { X86::MMX_PSRAWrr, X86::MMX_PSRAWrm, 0 },
- { X86::MMX_PSRLDrr, X86::MMX_PSRLDrm, 0 },
- { X86::MMX_PSRLQrr, X86::MMX_PSRLQrm, 0 },
- { X86::MMX_PSRLWrr, X86::MMX_PSRLWrm, 0 },
- { X86::MMX_PSUBBrr, X86::MMX_PSUBBrm, 0 },
- { X86::MMX_PSUBDrr, X86::MMX_PSUBDrm, 0 },
- { X86::MMX_PSUBQrr, X86::MMX_PSUBQrm, 0 },
- { X86::MMX_PSUBSBrr, X86::MMX_PSUBSBrm, 0 },
- { X86::MMX_PSUBSWrr, X86::MMX_PSUBSWrm, 0 },
- { X86::MMX_PSUBUSBrr, X86::MMX_PSUBUSBrm, 0 },
- { X86::MMX_PSUBUSWrr, X86::MMX_PSUBUSWrm, 0 },
- { X86::MMX_PSUBWrr, X86::MMX_PSUBWrm, 0 },
- { X86::MMX_PUNPCKHBWrr, X86::MMX_PUNPCKHBWrm, 0 },
- { X86::MMX_PUNPCKHDQrr, X86::MMX_PUNPCKHDQrm, 0 },
- { X86::MMX_PUNPCKHWDrr, X86::MMX_PUNPCKHWDrm, 0 },
- { X86::MMX_PUNPCKLBWrr, X86::MMX_PUNPCKLBWrm, TB_NO_REVERSE },
- { X86::MMX_PUNPCKLDQrr, X86::MMX_PUNPCKLDQrm, TB_NO_REVERSE },
- { X86::MMX_PUNPCKLWDrr, X86::MMX_PUNPCKLWDrm, TB_NO_REVERSE },
- { X86::MMX_PXORrr, X86::MMX_PXORrm, 0 },
- { X86::MOVLHPSrr, X86::MOVHPSrm, TB_NO_REVERSE },
- { X86::MOVSDrr, X86::MOVLPDrm, TB_NO_REVERSE },
- { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
- { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
- { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
- { X86::MULSDrr, X86::MULSDrm, 0 },
- { X86::MULSDrr_Int, X86::MULSDrm_Int, TB_NO_REVERSE },
- { X86::MULSSrr, X86::MULSSrm, 0 },
- { X86::MULSSrr_Int, X86::MULSSrm_Int, TB_NO_REVERSE },
- { X86::MULX32rr, X86::MULX32rm, 0 },
- { X86::MULX64rr, X86::MULX64rm, 0 },
- { X86::OR16rr, X86::OR16rm, 0 },
- { X86::OR32rr, X86::OR32rm, 0 },
- { X86::OR64rr, X86::OR64rm, 0 },
- { X86::OR8rr, X86::OR8rm, 0 },
- { X86::ORPDrr, X86::ORPDrm, TB_ALIGN_16 },
- { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 },
- { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 },
- { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 },
- { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 },
- { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 },
- { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 },
- { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 },
- { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 },
- { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 },
- { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 },
- { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 },
- { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 },
- { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 },
- { X86::PALIGNRrri, X86::PALIGNRrmi, TB_ALIGN_16 },
- { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 },
- { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 },
- { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 },
- { X86::PAVGUSBrr, X86::PAVGUSBrm, 0 },
- { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 },
- { X86::PBLENDVBrr0, X86::PBLENDVBrm0, TB_ALIGN_16 },
- { X86::PBLENDWrri, X86::PBLENDWrmi, TB_ALIGN_16 },
- { X86::PCLMULQDQrr, X86::PCLMULQDQrm, TB_ALIGN_16 },
- { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 },
- { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 },
- { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 },
- { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 },
- { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 },
- { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 },
- { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 },
- { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 },
- { X86::PDEP32rr, X86::PDEP32rm, 0 },
- { X86::PDEP64rr, X86::PDEP64rm, 0 },
- { X86::PEXT32rr, X86::PEXT32rm, 0 },
- { X86::PEXT64rr, X86::PEXT64rm, 0 },
- { X86::PFACCrr, X86::PFACCrm, 0 },
- { X86::PFADDrr, X86::PFADDrm, 0 },
- { X86::PFCMPEQrr, X86::PFCMPEQrm, 0 },
- { X86::PFCMPGErr, X86::PFCMPGErm, 0 },
- { X86::PFCMPGTrr, X86::PFCMPGTrm, 0 },
- { X86::PFMAXrr, X86::PFMAXrm, 0 },
- { X86::PFMINrr, X86::PFMINrm, 0 },
- { X86::PFMULrr, X86::PFMULrm, 0 },
- { X86::PFNACCrr, X86::PFNACCrm, 0 },
- { X86::PFPNACCrr, X86::PFPNACCrm, 0 },
- { X86::PFRCPIT1rr, X86::PFRCPIT1rm, 0 },
- { X86::PFRCPIT2rr, X86::PFRCPIT2rm, 0 },
- { X86::PFRSQIT1rr, X86::PFRSQIT1rm, 0 },
- { X86::PFSUBRrr, X86::PFSUBRrm, 0 },
- { X86::PFSUBrr, X86::PFSUBrm, 0 },
- { X86::PHADDDrr, X86::PHADDDrm, TB_ALIGN_16 },
- { X86::PHADDSWrr, X86::PHADDSWrm, TB_ALIGN_16 },
- { X86::PHADDWrr, X86::PHADDWrm, TB_ALIGN_16 },
- { X86::PHSUBDrr, X86::PHSUBDrm, TB_ALIGN_16 },
- { X86::PHSUBSWrr, X86::PHSUBSWrm, TB_ALIGN_16 },
- { X86::PHSUBWrr, X86::PHSUBWrm, TB_ALIGN_16 },
- { X86::PINSRBrr, X86::PINSRBrm, TB_NO_REVERSE },
- { X86::PINSRDrr, X86::PINSRDrm, 0 },
- { X86::PINSRQrr, X86::PINSRQrm, 0 },
- { X86::PINSRWrr, X86::PINSRWrm, TB_NO_REVERSE },
- { X86::PMADDUBSWrr, X86::PMADDUBSWrm, TB_ALIGN_16 },
- { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 },
- { X86::PMAXSBrr, X86::PMAXSBrm, TB_ALIGN_16 },
- { X86::PMAXSDrr, X86::PMAXSDrm, TB_ALIGN_16 },
- { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 },
- { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 },
- { X86::PMAXUDrr, X86::PMAXUDrm, TB_ALIGN_16 },
- { X86::PMAXUWrr, X86::PMAXUWrm, TB_ALIGN_16 },
- { X86::PMINSBrr, X86::PMINSBrm, TB_ALIGN_16 },
- { X86::PMINSDrr, X86::PMINSDrm, TB_ALIGN_16 },
- { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 },
- { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 },
- { X86::PMINUDrr, X86::PMINUDrm, TB_ALIGN_16 },
- { X86::PMINUWrr, X86::PMINUWrm, TB_ALIGN_16 },
- { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 },
- { X86::PMULHRSWrr, X86::PMULHRSWrm, TB_ALIGN_16 },
- { X86::PMULHRWrr, X86::PMULHRWrm, 0 },
- { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 },
- { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 },
- { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 },
- { X86::PMULLWrr, X86::PMULLWrm, TB_ALIGN_16 },
- { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 },
- { X86::PORrr, X86::PORrm, TB_ALIGN_16 },
- { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 },
- { X86::PSHUFBrr, X86::PSHUFBrm, TB_ALIGN_16 },
- { X86::PSIGNBrr, X86::PSIGNBrm, TB_ALIGN_16 },
- { X86::PSIGNDrr, X86::PSIGNDrm, TB_ALIGN_16 },
- { X86::PSIGNWrr, X86::PSIGNWrm, TB_ALIGN_16 },
- { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 },
- { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 },
- { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 },
- { X86::PSRADrr, X86::PSRADrm, TB_ALIGN_16 },
- { X86::PSRAWrr, X86::PSRAWrm, TB_ALIGN_16 },
- { X86::PSRLDrr, X86::PSRLDrm, TB_ALIGN_16 },
- { X86::PSRLQrr, X86::PSRLQrm, TB_ALIGN_16 },
- { X86::PSRLWrr, X86::PSRLWrm, TB_ALIGN_16 },
- { X86::PSUBBrr, X86::PSUBBrm, TB_ALIGN_16 },
- { X86::PSUBDrr, X86::PSUBDrm, TB_ALIGN_16 },
- { X86::PSUBQrr, X86::PSUBQrm, TB_ALIGN_16 },
- { X86::PSUBSBrr, X86::PSUBSBrm, TB_ALIGN_16 },
- { X86::PSUBSWrr, X86::PSUBSWrm, TB_ALIGN_16 },
- { X86::PSUBUSBrr, X86::PSUBUSBrm, TB_ALIGN_16 },
- { X86::PSUBUSWrr, X86::PSUBUSWrm, TB_ALIGN_16 },
- { X86::PSUBWrr, X86::PSUBWrm, TB_ALIGN_16 },
- { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, TB_ALIGN_16 },
- { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, TB_ALIGN_16 },
- { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, TB_ALIGN_16 },
- { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, TB_ALIGN_16 },
- { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, TB_ALIGN_16 },
- { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, TB_ALIGN_16 },
- { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, TB_ALIGN_16 },
- { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16 },
- { X86::PXORrr, X86::PXORrm, TB_ALIGN_16 },
- { X86::RCPSSr_Int, X86::RCPSSm_Int, TB_NO_REVERSE },
- { X86::ROUNDSDr_Int, X86::ROUNDSDm_Int, TB_NO_REVERSE },
- { X86::ROUNDSSr_Int, X86::ROUNDSSm_Int, TB_NO_REVERSE },
- { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, TB_NO_REVERSE },
- { X86::SBB16rr, X86::SBB16rm, 0 },
- { X86::SBB32rr, X86::SBB32rm, 0 },
- { X86::SBB64rr, X86::SBB64rm, 0 },
- { X86::SBB8rr, X86::SBB8rm, 0 },
- { X86::SHA1MSG1rr, X86::SHA1MSG1rm, TB_ALIGN_16 },
- { X86::SHA1MSG2rr, X86::SHA1MSG2rm, TB_ALIGN_16 },
- { X86::SHA1NEXTErr, X86::SHA1NEXTErm, TB_ALIGN_16 },
- { X86::SHA1RNDS4rri, X86::SHA1RNDS4rmi, TB_ALIGN_16 },
- { X86::SHA256MSG1rr, X86::SHA256MSG1rm, TB_ALIGN_16 },
- { X86::SHA256MSG2rr, X86::SHA256MSG2rm, TB_ALIGN_16 },
- { X86::SHA256RNDS2rr, X86::SHA256RNDS2rm, TB_ALIGN_16 },
- { X86::SHUFPDrri, X86::SHUFPDrmi, TB_ALIGN_16 },
- { X86::SHUFPSrri, X86::SHUFPSrmi, TB_ALIGN_16 },
- { X86::SQRTSDr_Int, X86::SQRTSDm_Int, TB_NO_REVERSE },
- { X86::SQRTSSr_Int, X86::SQRTSSm_Int, TB_NO_REVERSE },
- { X86::SUB16rr, X86::SUB16rm, 0 },
- { X86::SUB32rr, X86::SUB32rm, 0 },
- { X86::SUB64rr, X86::SUB64rm, 0 },
- { X86::SUB8rr, X86::SUB8rm, 0 },
- { X86::SUBPDrr, X86::SUBPDrm, TB_ALIGN_16 },
- { X86::SUBPSrr, X86::SUBPSrm, TB_ALIGN_16 },
- { X86::SUBSDrr, X86::SUBSDrm, 0 },
- { X86::SUBSDrr_Int, X86::SUBSDrm_Int, TB_NO_REVERSE },
- { X86::SUBSSrr, X86::SUBSSrm, 0 },
- { X86::SUBSSrr_Int, X86::SUBSSrm_Int, TB_NO_REVERSE },
- { X86::UNPCKHPDrr, X86::UNPCKHPDrm, TB_ALIGN_16 },
- { X86::UNPCKHPSrr, X86::UNPCKHPSrm, TB_ALIGN_16 },
- { X86::UNPCKLPDrr, X86::UNPCKLPDrm, TB_ALIGN_16 },
- { X86::UNPCKLPSrr, X86::UNPCKLPSrm, TB_ALIGN_16 },
- { X86::VADDPDYrr, X86::VADDPDYrm, 0 },
- { X86::VADDPDZ128rr, X86::VADDPDZ128rm, 0 },
- { X86::VADDPDZ256rr, X86::VADDPDZ256rm, 0 },
- { X86::VADDPDZrr, X86::VADDPDZrm, 0 },
- { X86::VADDPDrr, X86::VADDPDrm, 0 },
- { X86::VADDPHZ128rr, X86::VADDPHZ128rm, 0 },
- { X86::VADDPHZ256rr, X86::VADDPHZ256rm, 0 },
- { X86::VADDPHZrr, X86::VADDPHZrm, 0 },
- { X86::VADDPSYrr, X86::VADDPSYrm, 0 },
- { X86::VADDPSZ128rr, X86::VADDPSZ128rm, 0 },
- { X86::VADDPSZ256rr, X86::VADDPSZ256rm, 0 },
- { X86::VADDPSZrr, X86::VADDPSZrm, 0 },
- { X86::VADDPSrr, X86::VADDPSrm, 0 },
- { X86::VADDSDZrr, X86::VADDSDZrm, 0 },
- { X86::VADDSDZrr_Int, X86::VADDSDZrm_Int, TB_NO_REVERSE },
- { X86::VADDSDrr, X86::VADDSDrm, 0 },
- { X86::VADDSDrr_Int, X86::VADDSDrm_Int, TB_NO_REVERSE },
- { X86::VADDSHZrr, X86::VADDSHZrm, 0 },
- { X86::VADDSHZrr_Int, X86::VADDSHZrm_Int, TB_NO_REVERSE },
- { X86::VADDSSZrr, X86::VADDSSZrm, 0 },
- { X86::VADDSSZrr_Int, X86::VADDSSZrm_Int, TB_NO_REVERSE },
- { X86::VADDSSrr, X86::VADDSSrm, 0 },
- { X86::VADDSSrr_Int, X86::VADDSSrm_Int, TB_NO_REVERSE },
- { X86::VADDSUBPDYrr, X86::VADDSUBPDYrm, 0 },
- { X86::VADDSUBPDrr, X86::VADDSUBPDrm, 0 },
- { X86::VADDSUBPSYrr, X86::VADDSUBPSYrm, 0 },
- { X86::VADDSUBPSrr, X86::VADDSUBPSrm, 0 },
- { X86::VAESDECLASTYrr, X86::VAESDECLASTYrm, 0 },
- { X86::VAESDECLASTZ128rr, X86::VAESDECLASTZ128rm, 0 },
- { X86::VAESDECLASTZ256rr, X86::VAESDECLASTZ256rm, 0 },
- { X86::VAESDECLASTZrr, X86::VAESDECLASTZrm, 0 },
- { X86::VAESDECLASTrr, X86::VAESDECLASTrm, 0 },
- { X86::VAESDECYrr, X86::VAESDECYrm, 0 },
- { X86::VAESDECZ128rr, X86::VAESDECZ128rm, 0 },
- { X86::VAESDECZ256rr, X86::VAESDECZ256rm, 0 },
- { X86::VAESDECZrr, X86::VAESDECZrm, 0 },
- { X86::VAESDECrr, X86::VAESDECrm, 0 },
- { X86::VAESENCLASTYrr, X86::VAESENCLASTYrm, 0 },
- { X86::VAESENCLASTZ128rr, X86::VAESENCLASTZ128rm, 0 },
- { X86::VAESENCLASTZ256rr, X86::VAESENCLASTZ256rm, 0 },
- { X86::VAESENCLASTZrr, X86::VAESENCLASTZrm, 0 },
- { X86::VAESENCLASTrr, X86::VAESENCLASTrm, 0 },
- { X86::VAESENCYrr, X86::VAESENCYrm, 0 },
- { X86::VAESENCZ128rr, X86::VAESENCZ128rm, 0 },
- { X86::VAESENCZ256rr, X86::VAESENCZ256rm, 0 },
- { X86::VAESENCZrr, X86::VAESENCZrm, 0 },
- { X86::VAESENCrr, X86::VAESENCrm, 0 },
- { X86::VALIGNDZ128rri, X86::VALIGNDZ128rmi, 0 },
- { X86::VALIGNDZ256rri, X86::VALIGNDZ256rmi, 0 },
- { X86::VALIGNDZrri, X86::VALIGNDZrmi, 0 },
- { X86::VALIGNQZ128rri, X86::VALIGNQZ128rmi, 0 },
- { X86::VALIGNQZ256rri, X86::VALIGNQZ256rmi, 0 },
- { X86::VALIGNQZrri, X86::VALIGNQZrmi, 0 },
- { X86::VANDNPDYrr, X86::VANDNPDYrm, 0 },
- { X86::VANDNPDZ128rr, X86::VANDNPDZ128rm, 0 },
- { X86::VANDNPDZ256rr, X86::VANDNPDZ256rm, 0 },
- { X86::VANDNPDZrr, X86::VANDNPDZrm, 0 },
- { X86::VANDNPDrr, X86::VANDNPDrm, 0 },
- { X86::VANDNPSYrr, X86::VANDNPSYrm, 0 },
- { X86::VANDNPSZ128rr, X86::VANDNPSZ128rm, 0 },
- { X86::VANDNPSZ256rr, X86::VANDNPSZ256rm, 0 },
- { X86::VANDNPSZrr, X86::VANDNPSZrm, 0 },
- { X86::VANDNPSrr, X86::VANDNPSrm, 0 },
- { X86::VANDPDYrr, X86::VANDPDYrm, 0 },
- { X86::VANDPDZ128rr, X86::VANDPDZ128rm, 0 },
- { X86::VANDPDZ256rr, X86::VANDPDZ256rm, 0 },
- { X86::VANDPDZrr, X86::VANDPDZrm, 0 },
- { X86::VANDPDrr, X86::VANDPDrm, 0 },
- { X86::VANDPSYrr, X86::VANDPSYrm, 0 },
- { X86::VANDPSZ128rr, X86::VANDPSZ128rm, 0 },
- { X86::VANDPSZ256rr, X86::VANDPSZ256rm, 0 },
- { X86::VANDPSZrr, X86::VANDPSZrm, 0 },
- { X86::VANDPSrr, X86::VANDPSrm, 0 },
- { X86::VBLENDMPDZ128rr, X86::VBLENDMPDZ128rm, 0 },
- { X86::VBLENDMPDZ256rr, X86::VBLENDMPDZ256rm, 0 },
- { X86::VBLENDMPDZrr, X86::VBLENDMPDZrm, 0 },
- { X86::VBLENDMPSZ128rr, X86::VBLENDMPSZ128rm, 0 },
- { X86::VBLENDMPSZ256rr, X86::VBLENDMPSZ256rm, 0 },
- { X86::VBLENDMPSZrr, X86::VBLENDMPSZrm, 0 },
- { X86::VBLENDPDYrri, X86::VBLENDPDYrmi, 0 },
- { X86::VBLENDPDrri, X86::VBLENDPDrmi, 0 },
- { X86::VBLENDPSYrri, X86::VBLENDPSYrmi, 0 },
- { X86::VBLENDPSrri, X86::VBLENDPSrmi, 0 },
- { X86::VBLENDVPDYrr, X86::VBLENDVPDYrm, 0 },
- { X86::VBLENDVPDrr, X86::VBLENDVPDrm, 0 },
- { X86::VBLENDVPSYrr, X86::VBLENDVPSYrm, 0 },
- { X86::VBLENDVPSrr, X86::VBLENDVPSrm, 0 },
- { X86::VBROADCASTF32X2Z256rrkz, X86::VBROADCASTF32X2Z256rmkz, TB_NO_REVERSE },
- { X86::VBROADCASTF32X2Zrrkz, X86::VBROADCASTF32X2Zrmkz, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Z128rrkz, X86::VBROADCASTI32X2Z128rmkz, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Z256rrkz, X86::VBROADCASTI32X2Z256rmkz, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Zrrkz, X86::VBROADCASTI32X2Zrmkz, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256rrkz, X86::VBROADCASTSDZ256rmkz, TB_NO_REVERSE },
- { X86::VBROADCASTSDZrrkz, X86::VBROADCASTSDZrmkz, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ128rrkz, X86::VBROADCASTSSZ128rmkz, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256rrkz, X86::VBROADCASTSSZ256rmkz, TB_NO_REVERSE },
- { X86::VBROADCASTSSZrrkz, X86::VBROADCASTSSZrmkz, TB_NO_REVERSE },
- { X86::VCMPPDYrri, X86::VCMPPDYrmi, 0 },
- { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmi, 0 },
- { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmi, 0 },
- { X86::VCMPPDZrri, X86::VCMPPDZrmi, 0 },
- { X86::VCMPPDrri, X86::VCMPPDrmi, 0 },
- { X86::VCMPPHZ128rri, X86::VCMPPHZ128rmi, 0 },
- { X86::VCMPPHZ256rri, X86::VCMPPHZ256rmi, 0 },
- { X86::VCMPPHZrri, X86::VCMPPHZrmi, 0 },
- { X86::VCMPPSYrri, X86::VCMPPSYrmi, 0 },
- { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmi, 0 },
- { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmi, 0 },
- { X86::VCMPPSZrri, X86::VCMPPSZrmi, 0 },
- { X86::VCMPPSrri, X86::VCMPPSrmi, 0 },
- { X86::VCMPSDZrr, X86::VCMPSDZrm, 0 },
- { X86::VCMPSDZrr_Int, X86::VCMPSDZrm_Int, TB_NO_REVERSE },
- { X86::VCMPSDrr, X86::VCMPSDrm, 0 },
- { X86::VCMPSDrr_Int, X86::VCMPSDrm_Int, TB_NO_REVERSE },
- { X86::VCMPSHZrr, X86::VCMPSHZrm, 0 },
- { X86::VCMPSHZrr_Int, X86::VCMPSHZrm_Int, TB_NO_REVERSE },
- { X86::VCMPSSZrr, X86::VCMPSSZrm, 0 },
- { X86::VCMPSSZrr_Int, X86::VCMPSSZrm_Int, TB_NO_REVERSE },
- { X86::VCMPSSrr, X86::VCMPSSrm, 0 },
- { X86::VCMPSSrr_Int, X86::VCMPSSrm_Int, TB_NO_REVERSE },
- { X86::VCVTDQ2PDZ128rrkz, X86::VCVTDQ2PDZ128rmkz, TB_NO_REVERSE },
- { X86::VCVTDQ2PDZ256rrkz, X86::VCVTDQ2PDZ256rmkz, 0 },
- { X86::VCVTDQ2PDZrrkz, X86::VCVTDQ2PDZrmkz, 0 },
- { X86::VCVTDQ2PSZ128rrkz, X86::VCVTDQ2PSZ128rmkz, 0 },
- { X86::VCVTDQ2PSZ256rrkz, X86::VCVTDQ2PSZ256rmkz, 0 },
- { X86::VCVTDQ2PSZrrkz, X86::VCVTDQ2PSZrmkz, 0 },
- { X86::VCVTNE2PS2BF16Z128rr, X86::VCVTNE2PS2BF16Z128rm, 0 },
- { X86::VCVTNE2PS2BF16Z256rr, X86::VCVTNE2PS2BF16Z256rm, 0 },
- { X86::VCVTNE2PS2BF16Zrr, X86::VCVTNE2PS2BF16Zrm, 0 },
- { X86::VCVTNEPS2BF16Z128rrkz, X86::VCVTNEPS2BF16Z128rmkz, 0 },
- { X86::VCVTNEPS2BF16Z256rrkz, X86::VCVTNEPS2BF16Z256rmkz, 0 },
- { X86::VCVTNEPS2BF16Zrrkz, X86::VCVTNEPS2BF16Zrmkz, 0 },
- { X86::VCVTPD2DQZ128rrkz, X86::VCVTPD2DQZ128rmkz, 0 },
- { X86::VCVTPD2DQZ256rrkz, X86::VCVTPD2DQZ256rmkz, 0 },
- { X86::VCVTPD2DQZrrkz, X86::VCVTPD2DQZrmkz, 0 },
- { X86::VCVTPD2PSZ128rrkz, X86::VCVTPD2PSZ128rmkz, 0 },
- { X86::VCVTPD2PSZ256rrkz, X86::VCVTPD2PSZ256rmkz, 0 },
- { X86::VCVTPD2PSZrrkz, X86::VCVTPD2PSZrmkz, 0 },
- { X86::VCVTPD2QQZ128rrkz, X86::VCVTPD2QQZ128rmkz, 0 },
- { X86::VCVTPD2QQZ256rrkz, X86::VCVTPD2QQZ256rmkz, 0 },
- { X86::VCVTPD2QQZrrkz, X86::VCVTPD2QQZrmkz, 0 },
- { X86::VCVTPD2UDQZ128rrkz, X86::VCVTPD2UDQZ128rmkz, 0 },
- { X86::VCVTPD2UDQZ256rrkz, X86::VCVTPD2UDQZ256rmkz, 0 },
- { X86::VCVTPD2UDQZrrkz, X86::VCVTPD2UDQZrmkz, 0 },
- { X86::VCVTPD2UQQZ128rrkz, X86::VCVTPD2UQQZ128rmkz, 0 },
- { X86::VCVTPD2UQQZ256rrkz, X86::VCVTPD2UQQZ256rmkz, 0 },
- { X86::VCVTPD2UQQZrrkz, X86::VCVTPD2UQQZrmkz, 0 },
- { X86::VCVTPH2PSZ128rrkz, X86::VCVTPH2PSZ128rmkz, TB_NO_REVERSE },
- { X86::VCVTPH2PSZ256rrkz, X86::VCVTPH2PSZ256rmkz, 0 },
- { X86::VCVTPH2PSZrrkz, X86::VCVTPH2PSZrmkz, 0 },
- { X86::VCVTPS2DQZ128rrkz, X86::VCVTPS2DQZ128rmkz, 0 },
- { X86::VCVTPS2DQZ256rrkz, X86::VCVTPS2DQZ256rmkz, 0 },
- { X86::VCVTPS2DQZrrkz, X86::VCVTPS2DQZrmkz, 0 },
- { X86::VCVTPS2PDZ128rrkz, X86::VCVTPS2PDZ128rmkz, TB_NO_REVERSE },
- { X86::VCVTPS2PDZ256rrkz, X86::VCVTPS2PDZ256rmkz, 0 },
- { X86::VCVTPS2PDZrrkz, X86::VCVTPS2PDZrmkz, 0 },
- { X86::VCVTPS2QQZ128rrkz, X86::VCVTPS2QQZ128rmkz, TB_NO_REVERSE },
- { X86::VCVTPS2QQZ256rrkz, X86::VCVTPS2QQZ256rmkz, 0 },
- { X86::VCVTPS2QQZrrkz, X86::VCVTPS2QQZrmkz, 0 },
- { X86::VCVTPS2UDQZ128rrkz, X86::VCVTPS2UDQZ128rmkz, 0 },
- { X86::VCVTPS2UDQZ256rrkz, X86::VCVTPS2UDQZ256rmkz, 0 },
- { X86::VCVTPS2UDQZrrkz, X86::VCVTPS2UDQZrmkz, 0 },
- { X86::VCVTPS2UQQZ128rrkz, X86::VCVTPS2UQQZ128rmkz, TB_NO_REVERSE },
- { X86::VCVTPS2UQQZ256rrkz, X86::VCVTPS2UQQZ256rmkz, 0 },
- { X86::VCVTPS2UQQZrrkz, X86::VCVTPS2UQQZrmkz, 0 },
- { X86::VCVTQQ2PDZ128rrkz, X86::VCVTQQ2PDZ128rmkz, 0 },
- { X86::VCVTQQ2PDZ256rrkz, X86::VCVTQQ2PDZ256rmkz, 0 },
- { X86::VCVTQQ2PDZrrkz, X86::VCVTQQ2PDZrmkz, 0 },
- { X86::VCVTQQ2PSZ128rrkz, X86::VCVTQQ2PSZ128rmkz, 0 },
- { X86::VCVTQQ2PSZ256rrkz, X86::VCVTQQ2PSZ256rmkz, 0 },
- { X86::VCVTQQ2PSZrrkz, X86::VCVTQQ2PSZrmkz, 0 },
- { X86::VCVTSD2SSZrr, X86::VCVTSD2SSZrm, 0 },
- { X86::VCVTSD2SSZrr_Int, X86::VCVTSD2SSZrm_Int, TB_NO_REVERSE },
- { X86::VCVTSD2SSrr, X86::VCVTSD2SSrm, 0 },
- { X86::VCVTSD2SSrr_Int, X86::VCVTSD2SSrm_Int, TB_NO_REVERSE },
- { X86::VCVTSI2SDZrr, X86::VCVTSI2SDZrm, 0 },
- { X86::VCVTSI2SDZrr_Int, X86::VCVTSI2SDZrm_Int, 0 },
- { X86::VCVTSI2SDrr, X86::VCVTSI2SDrm, 0 },
- { X86::VCVTSI2SDrr_Int, X86::VCVTSI2SDrm_Int, 0 },
- { X86::VCVTSI2SSZrr, X86::VCVTSI2SSZrm, 0 },
- { X86::VCVTSI2SSZrr_Int, X86::VCVTSI2SSZrm_Int, 0 },
- { X86::VCVTSI2SSrr, X86::VCVTSI2SSrm, 0 },
- { X86::VCVTSI2SSrr_Int, X86::VCVTSI2SSrm_Int, 0 },
- { X86::VCVTSI642SDZrr, X86::VCVTSI642SDZrm, 0 },
- { X86::VCVTSI642SDZrr_Int, X86::VCVTSI642SDZrm_Int, 0 },
- { X86::VCVTSI642SDrr, X86::VCVTSI642SDrm, 0 },
- { X86::VCVTSI642SDrr_Int, X86::VCVTSI642SDrm_Int, 0 },
- { X86::VCVTSI642SSZrr, X86::VCVTSI642SSZrm, 0 },
- { X86::VCVTSI642SSZrr_Int, X86::VCVTSI642SSZrm_Int, 0 },
- { X86::VCVTSI642SSrr, X86::VCVTSI642SSrm, 0 },
- { X86::VCVTSI642SSrr_Int, X86::VCVTSI642SSrm_Int, 0 },
- { X86::VCVTSS2SDZrr, X86::VCVTSS2SDZrm, 0 },
- { X86::VCVTSS2SDZrr_Int, X86::VCVTSS2SDZrm_Int, TB_NO_REVERSE },
- { X86::VCVTSS2SDrr, X86::VCVTSS2SDrm, 0 },
- { X86::VCVTSS2SDrr_Int, X86::VCVTSS2SDrm_Int, TB_NO_REVERSE },
- { X86::VCVTTPD2DQZ128rrkz, X86::VCVTTPD2DQZ128rmkz, 0 },
- { X86::VCVTTPD2DQZ256rrkz, X86::VCVTTPD2DQZ256rmkz, 0 },
- { X86::VCVTTPD2DQZrrkz, X86::VCVTTPD2DQZrmkz, 0 },
- { X86::VCVTTPD2QQZ128rrkz, X86::VCVTTPD2QQZ128rmkz, 0 },
- { X86::VCVTTPD2QQZ256rrkz, X86::VCVTTPD2QQZ256rmkz, 0 },
- { X86::VCVTTPD2QQZrrkz, X86::VCVTTPD2QQZrmkz, 0 },
- { X86::VCVTTPD2UDQZ128rrkz, X86::VCVTTPD2UDQZ128rmkz, 0 },
- { X86::VCVTTPD2UDQZ256rrkz, X86::VCVTTPD2UDQZ256rmkz, 0 },
- { X86::VCVTTPD2UDQZrrkz, X86::VCVTTPD2UDQZrmkz, 0 },
- { X86::VCVTTPD2UQQZ128rrkz, X86::VCVTTPD2UQQZ128rmkz, 0 },
- { X86::VCVTTPD2UQQZ256rrkz, X86::VCVTTPD2UQQZ256rmkz, 0 },
- { X86::VCVTTPD2UQQZrrkz, X86::VCVTTPD2UQQZrmkz, 0 },
- { X86::VCVTTPS2DQZ128rrkz, X86::VCVTTPS2DQZ128rmkz, 0 },
- { X86::VCVTTPS2DQZ256rrkz, X86::VCVTTPS2DQZ256rmkz, 0 },
- { X86::VCVTTPS2DQZrrkz, X86::VCVTTPS2DQZrmkz, 0 },
- { X86::VCVTTPS2QQZ128rrkz, X86::VCVTTPS2QQZ128rmkz, TB_NO_REVERSE },
- { X86::VCVTTPS2QQZ256rrkz, X86::VCVTTPS2QQZ256rmkz, 0 },
- { X86::VCVTTPS2QQZrrkz, X86::VCVTTPS2QQZrmkz, 0 },
- { X86::VCVTTPS2UDQZ128rrkz, X86::VCVTTPS2UDQZ128rmkz, 0 },
- { X86::VCVTTPS2UDQZ256rrkz, X86::VCVTTPS2UDQZ256rmkz, 0 },
- { X86::VCVTTPS2UDQZrrkz, X86::VCVTTPS2UDQZrmkz, 0 },
- { X86::VCVTTPS2UQQZ128rrkz, X86::VCVTTPS2UQQZ128rmkz, TB_NO_REVERSE },
- { X86::VCVTTPS2UQQZ256rrkz, X86::VCVTTPS2UQQZ256rmkz, 0 },
- { X86::VCVTTPS2UQQZrrkz, X86::VCVTTPS2UQQZrmkz, 0 },
- { X86::VCVTUDQ2PDZ128rrkz, X86::VCVTUDQ2PDZ128rmkz, TB_NO_REVERSE },
- { X86::VCVTUDQ2PDZ256rrkz, X86::VCVTUDQ2PDZ256rmkz, 0 },
- { X86::VCVTUDQ2PDZrrkz, X86::VCVTUDQ2PDZrmkz, 0 },
- { X86::VCVTUDQ2PSZ128rrkz, X86::VCVTUDQ2PSZ128rmkz, 0 },
- { X86::VCVTUDQ2PSZ256rrkz, X86::VCVTUDQ2PSZ256rmkz, 0 },
- { X86::VCVTUDQ2PSZrrkz, X86::VCVTUDQ2PSZrmkz, 0 },
- { X86::VCVTUQQ2PDZ128rrkz, X86::VCVTUQQ2PDZ128rmkz, 0 },
- { X86::VCVTUQQ2PDZ256rrkz, X86::VCVTUQQ2PDZ256rmkz, 0 },
- { X86::VCVTUQQ2PDZrrkz, X86::VCVTUQQ2PDZrmkz, 0 },
- { X86::VCVTUQQ2PSZ128rrkz, X86::VCVTUQQ2PSZ128rmkz, 0 },
- { X86::VCVTUQQ2PSZ256rrkz, X86::VCVTUQQ2PSZ256rmkz, 0 },
- { X86::VCVTUQQ2PSZrrkz, X86::VCVTUQQ2PSZrmkz, 0 },
- { X86::VCVTUSI2SDZrr, X86::VCVTUSI2SDZrm, 0 },
- { X86::VCVTUSI2SDZrr_Int, X86::VCVTUSI2SDZrm_Int, 0 },
- { X86::VCVTUSI2SSZrr, X86::VCVTUSI2SSZrm, 0 },
- { X86::VCVTUSI2SSZrr_Int, X86::VCVTUSI2SSZrm_Int, 0 },
- { X86::VCVTUSI642SDZrr, X86::VCVTUSI642SDZrm, 0 },
- { X86::VCVTUSI642SDZrr_Int, X86::VCVTUSI642SDZrm_Int, 0 },
- { X86::VCVTUSI642SSZrr, X86::VCVTUSI642SSZrm, 0 },
- { X86::VCVTUSI642SSZrr_Int, X86::VCVTUSI642SSZrm_Int, 0 },
- { X86::VDBPSADBWZ128rri, X86::VDBPSADBWZ128rmi, 0 },
- { X86::VDBPSADBWZ256rri, X86::VDBPSADBWZ256rmi, 0 },
- { X86::VDBPSADBWZrri, X86::VDBPSADBWZrmi, 0 },
- { X86::VDIVPDYrr, X86::VDIVPDYrm, 0 },
- { X86::VDIVPDZ128rr, X86::VDIVPDZ128rm, 0 },
- { X86::VDIVPDZ256rr, X86::VDIVPDZ256rm, 0 },
- { X86::VDIVPDZrr, X86::VDIVPDZrm, 0 },
- { X86::VDIVPDrr, X86::VDIVPDrm, 0 },
- { X86::VDIVPHZ128rr, X86::VDIVPHZ128rm, 0 },
- { X86::VDIVPHZ256rr, X86::VDIVPHZ256rm, 0 },
- { X86::VDIVPHZrr, X86::VDIVPHZrm, 0 },
- { X86::VDIVPSYrr, X86::VDIVPSYrm, 0 },
- { X86::VDIVPSZ128rr, X86::VDIVPSZ128rm, 0 },
- { X86::VDIVPSZ256rr, X86::VDIVPSZ256rm, 0 },
- { X86::VDIVPSZrr, X86::VDIVPSZrm, 0 },
- { X86::VDIVPSrr, X86::VDIVPSrm, 0 },
- { X86::VDIVSDZrr, X86::VDIVSDZrm, 0 },
- { X86::VDIVSDZrr_Int, X86::VDIVSDZrm_Int, TB_NO_REVERSE },
- { X86::VDIVSDrr, X86::VDIVSDrm, 0 },
- { X86::VDIVSDrr_Int, X86::VDIVSDrm_Int, TB_NO_REVERSE },
- { X86::VDIVSHZrr, X86::VDIVSHZrm, 0 },
- { X86::VDIVSHZrr_Int, X86::VDIVSHZrm_Int, TB_NO_REVERSE },
- { X86::VDIVSSZrr, X86::VDIVSSZrm, 0 },
- { X86::VDIVSSZrr_Int, X86::VDIVSSZrm_Int, TB_NO_REVERSE },
- { X86::VDIVSSrr, X86::VDIVSSrm, 0 },
- { X86::VDIVSSrr_Int, X86::VDIVSSrm_Int, TB_NO_REVERSE },
- { X86::VDPPDrri, X86::VDPPDrmi, 0 },
- { X86::VDPPSYrri, X86::VDPPSYrmi, 0 },
- { X86::VDPPSrri, X86::VDPPSrmi, 0 },
- { X86::VEXP2PDZrkz, X86::VEXP2PDZmkz, 0 },
- { X86::VEXP2PSZrkz, X86::VEXP2PSZmkz, 0 },
- { X86::VEXPANDPDZ128rrkz, X86::VEXPANDPDZ128rmkz, TB_NO_REVERSE },
- { X86::VEXPANDPDZ256rrkz, X86::VEXPANDPDZ256rmkz, TB_NO_REVERSE },
- { X86::VEXPANDPDZrrkz, X86::VEXPANDPDZrmkz, TB_NO_REVERSE },
- { X86::VEXPANDPSZ128rrkz, X86::VEXPANDPSZ128rmkz, TB_NO_REVERSE },
- { X86::VEXPANDPSZ256rrkz, X86::VEXPANDPSZ256rmkz, TB_NO_REVERSE },
- { X86::VEXPANDPSZrrkz, X86::VEXPANDPSZrmkz, TB_NO_REVERSE },
- { X86::VFCMULCPHZ128rr, X86::VFCMULCPHZ128rm, 0 },
- { X86::VFCMULCPHZ256rr, X86::VFCMULCPHZ256rm, 0 },
- { X86::VFCMULCPHZrr, X86::VFCMULCPHZrm, 0 },
- { X86::VFCMULCSHZrr, X86::VFCMULCSHZrm, TB_NO_REVERSE },
- { X86::VFMADDPD4Yrr, X86::VFMADDPD4Ymr, 0 },
- { X86::VFMADDPD4rr, X86::VFMADDPD4mr, 0 },
- { X86::VFMADDPS4Yrr, X86::VFMADDPS4Ymr, 0 },
- { X86::VFMADDPS4rr, X86::VFMADDPS4mr, 0 },
- { X86::VFMADDSD4rr, X86::VFMADDSD4mr, 0 },
- { X86::VFMADDSD4rr_Int, X86::VFMADDSD4mr_Int, TB_NO_REVERSE },
- { X86::VFMADDSS4rr, X86::VFMADDSS4mr, 0 },
- { X86::VFMADDSS4rr_Int, X86::VFMADDSS4mr_Int, TB_NO_REVERSE },
- { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Ymr, 0 },
- { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4mr, 0 },
- { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Ymr, 0 },
- { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4mr, 0 },
- { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Ymr, 0 },
- { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4mr, 0 },
- { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Ymr, 0 },
- { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4mr, 0 },
- { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Ymr, 0 },
- { X86::VFMSUBPD4rr, X86::VFMSUBPD4mr, 0 },
- { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Ymr, 0 },
- { X86::VFMSUBPS4rr, X86::VFMSUBPS4mr, 0 },
- { X86::VFMSUBSD4rr, X86::VFMSUBSD4mr, 0 },
- { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4mr_Int, TB_NO_REVERSE },
- { X86::VFMSUBSS4rr, X86::VFMSUBSS4mr, 0 },
- { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4mr_Int, TB_NO_REVERSE },
- { X86::VFMULCPHZ128rr, X86::VFMULCPHZ128rm, 0 },
- { X86::VFMULCPHZ256rr, X86::VFMULCPHZ256rm, 0 },
- { X86::VFMULCPHZrr, X86::VFMULCPHZrm, 0 },
- { X86::VFMULCSHZrr, X86::VFMULCSHZrm, TB_NO_REVERSE },
- { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Ymr, 0 },
- { X86::VFNMADDPD4rr, X86::VFNMADDPD4mr, 0 },
- { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Ymr, 0 },
- { X86::VFNMADDPS4rr, X86::VFNMADDPS4mr, 0 },
- { X86::VFNMADDSD4rr, X86::VFNMADDSD4mr, 0 },
- { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4mr_Int, TB_NO_REVERSE },
- { X86::VFNMADDSS4rr, X86::VFNMADDSS4mr, 0 },
- { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4mr_Int, TB_NO_REVERSE },
- { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Ymr, 0 },
- { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4mr, 0 },
- { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Ymr, 0 },
- { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4mr, 0 },
- { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4mr, 0 },
- { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4mr_Int, TB_NO_REVERSE },
- { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4mr, 0 },
- { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4mr_Int, TB_NO_REVERSE },
- { X86::VFPCLASSPDZ128rrk, X86::VFPCLASSPDZ128rmk, 0 },
- { X86::VFPCLASSPDZ256rrk, X86::VFPCLASSPDZ256rmk, 0 },
- { X86::VFPCLASSPDZrrk, X86::VFPCLASSPDZrmk, 0 },
- { X86::VFPCLASSPHZ128rrk, X86::VFPCLASSPHZ128rmk, 0 },
- { X86::VFPCLASSPHZ256rrk, X86::VFPCLASSPHZ256rmk, 0 },
- { X86::VFPCLASSPHZrrk, X86::VFPCLASSPHZrmk, 0 },
- { X86::VFPCLASSPSZ128rrk, X86::VFPCLASSPSZ128rmk, 0 },
- { X86::VFPCLASSPSZ256rrk, X86::VFPCLASSPSZ256rmk, 0 },
- { X86::VFPCLASSPSZrrk, X86::VFPCLASSPSZrmk, 0 },
- { X86::VFPCLASSSDZrrk, X86::VFPCLASSSDZrmk, TB_NO_REVERSE },
- { X86::VFPCLASSSHZrrk, X86::VFPCLASSSHZrmk, TB_NO_REVERSE },
- { X86::VFPCLASSSSZrrk, X86::VFPCLASSSSZrmk, TB_NO_REVERSE },
- { X86::VGETEXPPDZ128rkz, X86::VGETEXPPDZ128mkz, 0 },
- { X86::VGETEXPPDZ256rkz, X86::VGETEXPPDZ256mkz, 0 },
- { X86::VGETEXPPDZrkz, X86::VGETEXPPDZmkz, 0 },
- { X86::VGETEXPPHZ128rkz, X86::VGETEXPPHZ128mkz, 0 },
- { X86::VGETEXPPHZ256rkz, X86::VGETEXPPHZ256mkz, 0 },
- { X86::VGETEXPPHZrkz, X86::VGETEXPPHZmkz, 0 },
- { X86::VGETEXPPSZ128rkz, X86::VGETEXPPSZ128mkz, 0 },
- { X86::VGETEXPPSZ256rkz, X86::VGETEXPPSZ256mkz, 0 },
- { X86::VGETEXPPSZrkz, X86::VGETEXPPSZmkz, 0 },
- { X86::VGETEXPSDZr, X86::VGETEXPSDZm, TB_NO_REVERSE },
- { X86::VGETEXPSHZr, X86::VGETEXPSHZm, TB_NO_REVERSE },
- { X86::VGETEXPSSZr, X86::VGETEXPSSZm, TB_NO_REVERSE },
- { X86::VGETMANTPDZ128rrikz, X86::VGETMANTPDZ128rmikz, 0 },
- { X86::VGETMANTPDZ256rrikz, X86::VGETMANTPDZ256rmikz, 0 },
- { X86::VGETMANTPDZrrikz, X86::VGETMANTPDZrmikz, 0 },
- { X86::VGETMANTPHZ128rrikz, X86::VGETMANTPHZ128rmikz, 0 },
- { X86::VGETMANTPHZ256rrikz, X86::VGETMANTPHZ256rmikz, 0 },
- { X86::VGETMANTPHZrrikz, X86::VGETMANTPHZrmikz, 0 },
- { X86::VGETMANTPSZ128rrikz, X86::VGETMANTPSZ128rmikz, 0 },
- { X86::VGETMANTPSZ256rrikz, X86::VGETMANTPSZ256rmikz, 0 },
- { X86::VGETMANTPSZrrikz, X86::VGETMANTPSZrmikz, 0 },
- { X86::VGETMANTSDZrri, X86::VGETMANTSDZrmi, TB_NO_REVERSE },
- { X86::VGETMANTSHZrri, X86::VGETMANTSHZrmi, TB_NO_REVERSE },
- { X86::VGETMANTSSZrri, X86::VGETMANTSSZrmi, TB_NO_REVERSE },
- { X86::VGF2P8AFFINEINVQBYrri, X86::VGF2P8AFFINEINVQBYrmi, 0 },
- { X86::VGF2P8AFFINEINVQBZ128rri, X86::VGF2P8AFFINEINVQBZ128rmi, 0 },
- { X86::VGF2P8AFFINEINVQBZ256rri, X86::VGF2P8AFFINEINVQBZ256rmi, 0 },
- { X86::VGF2P8AFFINEINVQBZrri, X86::VGF2P8AFFINEINVQBZrmi, 0 },
- { X86::VGF2P8AFFINEINVQBrri, X86::VGF2P8AFFINEINVQBrmi, 0 },
- { X86::VGF2P8AFFINEQBYrri, X86::VGF2P8AFFINEQBYrmi, 0 },
- { X86::VGF2P8AFFINEQBZ128rri, X86::VGF2P8AFFINEQBZ128rmi, 0 },
- { X86::VGF2P8AFFINEQBZ256rri, X86::VGF2P8AFFINEQBZ256rmi, 0 },
- { X86::VGF2P8AFFINEQBZrri, X86::VGF2P8AFFINEQBZrmi, 0 },
- { X86::VGF2P8AFFINEQBrri, X86::VGF2P8AFFINEQBrmi, 0 },
- { X86::VGF2P8MULBYrr, X86::VGF2P8MULBYrm, 0 },
- { X86::VGF2P8MULBZ128rr, X86::VGF2P8MULBZ128rm, 0 },
- { X86::VGF2P8MULBZ256rr, X86::VGF2P8MULBZ256rm, 0 },
- { X86::VGF2P8MULBZrr, X86::VGF2P8MULBZrm, 0 },
- { X86::VGF2P8MULBrr, X86::VGF2P8MULBrm, 0 },
- { X86::VHADDPDYrr, X86::VHADDPDYrm, 0 },
- { X86::VHADDPDrr, X86::VHADDPDrm, 0 },
- { X86::VHADDPSYrr, X86::VHADDPSYrm, 0 },
- { X86::VHADDPSrr, X86::VHADDPSrm, 0 },
- { X86::VHSUBPDYrr, X86::VHSUBPDYrm, 0 },
- { X86::VHSUBPDrr, X86::VHSUBPDrm, 0 },
- { X86::VHSUBPSYrr, X86::VHSUBPSYrm, 0 },
- { X86::VHSUBPSrr, X86::VHSUBPSrm, 0 },
- { X86::VINSERTF128rr, X86::VINSERTF128rm, 0 },
- { X86::VINSERTF32x4Z256rr, X86::VINSERTF32x4Z256rm, 0 },
- { X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrm, 0 },
- { X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrm, 0 },
- { X86::VINSERTF64x2Z256rr, X86::VINSERTF64x2Z256rm, 0 },
- { X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrm, 0 },
- { X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrm, 0 },
- { X86::VINSERTI128rr, X86::VINSERTI128rm, 0 },
- { X86::VINSERTI32x4Z256rr, X86::VINSERTI32x4Z256rm, 0 },
- { X86::VINSERTI32x4Zrr, X86::VINSERTI32x4Zrm, 0 },
- { X86::VINSERTI32x8Zrr, X86::VINSERTI32x8Zrm, 0 },
- { X86::VINSERTI64x2Z256rr, X86::VINSERTI64x2Z256rm, 0 },
- { X86::VINSERTI64x2Zrr, X86::VINSERTI64x2Zrm, 0 },
- { X86::VINSERTI64x4Zrr, X86::VINSERTI64x4Zrm, 0 },
- { X86::VMAXCPDYrr, X86::VMAXCPDYrm, 0 },
- { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rm, 0 },
- { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rm, 0 },
- { X86::VMAXCPDZrr, X86::VMAXCPDZrm, 0 },
- { X86::VMAXCPDrr, X86::VMAXCPDrm, 0 },
- { X86::VMAXCPHZ128rr, X86::VMAXCPHZ128rm, 0 },
- { X86::VMAXCPHZ256rr, X86::VMAXCPHZ256rm, 0 },
- { X86::VMAXCPHZrr, X86::VMAXCPHZrm, 0 },
- { X86::VMAXCPSYrr, X86::VMAXCPSYrm, 0 },
- { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rm, 0 },
- { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rm, 0 },
- { X86::VMAXCPSZrr, X86::VMAXCPSZrm, 0 },
- { X86::VMAXCPSrr, X86::VMAXCPSrm, 0 },
- { X86::VMAXCSDZrr, X86::VMAXCSDZrm, 0 },
- { X86::VMAXCSDrr, X86::VMAXCSDrm, 0 },
- { X86::VMAXCSHZrr, X86::VMAXCSHZrm, 0 },
- { X86::VMAXCSSZrr, X86::VMAXCSSZrm, 0 },
- { X86::VMAXCSSrr, X86::VMAXCSSrm, 0 },
- { X86::VMAXPDYrr, X86::VMAXPDYrm, 0 },
- { X86::VMAXPDZ128rr, X86::VMAXPDZ128rm, 0 },
- { X86::VMAXPDZ256rr, X86::VMAXPDZ256rm, 0 },
- { X86::VMAXPDZrr, X86::VMAXPDZrm, 0 },
- { X86::VMAXPDrr, X86::VMAXPDrm, 0 },
- { X86::VMAXPHZ128rr, X86::VMAXPHZ128rm, 0 },
- { X86::VMAXPHZ256rr, X86::VMAXPHZ256rm, 0 },
- { X86::VMAXPHZrr, X86::VMAXPHZrm, 0 },
- { X86::VMAXPSYrr, X86::VMAXPSYrm, 0 },
- { X86::VMAXPSZ128rr, X86::VMAXPSZ128rm, 0 },
- { X86::VMAXPSZ256rr, X86::VMAXPSZ256rm, 0 },
- { X86::VMAXPSZrr, X86::VMAXPSZrm, 0 },
- { X86::VMAXPSrr, X86::VMAXPSrm, 0 },
- { X86::VMAXSDZrr, X86::VMAXSDZrm, 0 },
- { X86::VMAXSDZrr_Int, X86::VMAXSDZrm_Int, TB_NO_REVERSE },
- { X86::VMAXSDrr, X86::VMAXSDrm, 0 },
- { X86::VMAXSDrr_Int, X86::VMAXSDrm_Int, TB_NO_REVERSE },
- { X86::VMAXSHZrr, X86::VMAXSHZrm, 0 },
- { X86::VMAXSHZrr_Int, X86::VMAXSHZrm_Int, TB_NO_REVERSE },
- { X86::VMAXSSZrr, X86::VMAXSSZrm, 0 },
- { X86::VMAXSSZrr_Int, X86::VMAXSSZrm_Int, TB_NO_REVERSE },
- { X86::VMAXSSrr, X86::VMAXSSrm, 0 },
- { X86::VMAXSSrr_Int, X86::VMAXSSrm_Int, TB_NO_REVERSE },
- { X86::VMINCPDYrr, X86::VMINCPDYrm, 0 },
- { X86::VMINCPDZ128rr, X86::VMINCPDZ128rm, 0 },
- { X86::VMINCPDZ256rr, X86::VMINCPDZ256rm, 0 },
- { X86::VMINCPDZrr, X86::VMINCPDZrm, 0 },
- { X86::VMINCPDrr, X86::VMINCPDrm, 0 },
- { X86::VMINCPHZ128rr, X86::VMINCPHZ128rm, 0 },
- { X86::VMINCPHZ256rr, X86::VMINCPHZ256rm, 0 },
- { X86::VMINCPHZrr, X86::VMINCPHZrm, 0 },
- { X86::VMINCPSYrr, X86::VMINCPSYrm, 0 },
- { X86::VMINCPSZ128rr, X86::VMINCPSZ128rm, 0 },
- { X86::VMINCPSZ256rr, X86::VMINCPSZ256rm, 0 },
- { X86::VMINCPSZrr, X86::VMINCPSZrm, 0 },
- { X86::VMINCPSrr, X86::VMINCPSrm, 0 },
- { X86::VMINCSDZrr, X86::VMINCSDZrm, 0 },
- { X86::VMINCSDrr, X86::VMINCSDrm, 0 },
- { X86::VMINCSHZrr, X86::VMINCSHZrm, 0 },
- { X86::VMINCSSZrr, X86::VMINCSSZrm, 0 },
- { X86::VMINCSSrr, X86::VMINCSSrm, 0 },
- { X86::VMINPDYrr, X86::VMINPDYrm, 0 },
- { X86::VMINPDZ128rr, X86::VMINPDZ128rm, 0 },
- { X86::VMINPDZ256rr, X86::VMINPDZ256rm, 0 },
- { X86::VMINPDZrr, X86::VMINPDZrm, 0 },
- { X86::VMINPDrr, X86::VMINPDrm, 0 },
- { X86::VMINPHZ128rr, X86::VMINPHZ128rm, 0 },
- { X86::VMINPHZ256rr, X86::VMINPHZ256rm, 0 },
- { X86::VMINPHZrr, X86::VMINPHZrm, 0 },
- { X86::VMINPSYrr, X86::VMINPSYrm, 0 },
- { X86::VMINPSZ128rr, X86::VMINPSZ128rm, 0 },
- { X86::VMINPSZ256rr, X86::VMINPSZ256rm, 0 },
- { X86::VMINPSZrr, X86::VMINPSZrm, 0 },
- { X86::VMINPSrr, X86::VMINPSrm, 0 },
- { X86::VMINSDZrr, X86::VMINSDZrm, 0 },
- { X86::VMINSDZrr_Int, X86::VMINSDZrm_Int, TB_NO_REVERSE },
- { X86::VMINSDrr, X86::VMINSDrm, 0 },
- { X86::VMINSDrr_Int, X86::VMINSDrm_Int, TB_NO_REVERSE },
- { X86::VMINSHZrr, X86::VMINSHZrm, 0 },
- { X86::VMINSHZrr_Int, X86::VMINSHZrm_Int, TB_NO_REVERSE },
- { X86::VMINSSZrr, X86::VMINSSZrm, 0 },
- { X86::VMINSSZrr_Int, X86::VMINSSZrm_Int, TB_NO_REVERSE },
- { X86::VMINSSrr, X86::VMINSSrm, 0 },
- { X86::VMINSSrr_Int, X86::VMINSSrm_Int, TB_NO_REVERSE },
- { X86::VMOVAPDZ128rrkz, X86::VMOVAPDZ128rmkz, TB_NO_REVERSE | TB_ALIGN_16 },
- { X86::VMOVAPDZ256rrkz, X86::VMOVAPDZ256rmkz, TB_NO_REVERSE | TB_ALIGN_32 },
- { X86::VMOVAPDZrrkz, X86::VMOVAPDZrmkz, TB_NO_REVERSE | TB_ALIGN_64 },
- { X86::VMOVAPSZ128rrkz, X86::VMOVAPSZ128rmkz, TB_NO_REVERSE | TB_ALIGN_16 },
- { X86::VMOVAPSZ256rrkz, X86::VMOVAPSZ256rmkz, TB_NO_REVERSE | TB_ALIGN_32 },
- { X86::VMOVAPSZrrkz, X86::VMOVAPSZrmkz, TB_NO_REVERSE | TB_ALIGN_64 },
- { X86::VMOVDDUPZ128rrkz, X86::VMOVDDUPZ128rmkz, TB_NO_REVERSE },
- { X86::VMOVDDUPZ256rrkz, X86::VMOVDDUPZ256rmkz, 0 },
- { X86::VMOVDDUPZrrkz, X86::VMOVDDUPZrmkz, 0 },
- { X86::VMOVDQA32Z128rrkz, X86::VMOVDQA32Z128rmkz, TB_NO_REVERSE | TB_ALIGN_16 },
- { X86::VMOVDQA32Z256rrkz, X86::VMOVDQA32Z256rmkz, TB_NO_REVERSE | TB_ALIGN_32 },
- { X86::VMOVDQA32Zrrkz, X86::VMOVDQA32Zrmkz, TB_NO_REVERSE | TB_ALIGN_64 },
- { X86::VMOVDQA64Z128rrkz, X86::VMOVDQA64Z128rmkz, TB_NO_REVERSE | TB_ALIGN_16 },
- { X86::VMOVDQA64Z256rrkz, X86::VMOVDQA64Z256rmkz, TB_NO_REVERSE | TB_ALIGN_32 },
- { X86::VMOVDQA64Zrrkz, X86::VMOVDQA64Zrmkz, TB_NO_REVERSE | TB_ALIGN_64 },
- { X86::VMOVDQU16Z128rrkz, X86::VMOVDQU16Z128rmkz, TB_NO_REVERSE },
- { X86::VMOVDQU16Z256rrkz, X86::VMOVDQU16Z256rmkz, TB_NO_REVERSE },
- { X86::VMOVDQU16Zrrkz, X86::VMOVDQU16Zrmkz, TB_NO_REVERSE },
- { X86::VMOVDQU32Z128rrkz, X86::VMOVDQU32Z128rmkz, TB_NO_REVERSE },
- { X86::VMOVDQU32Z256rrkz, X86::VMOVDQU32Z256rmkz, TB_NO_REVERSE },
- { X86::VMOVDQU32Zrrkz, X86::VMOVDQU32Zrmkz, TB_NO_REVERSE },
- { X86::VMOVDQU64Z128rrkz, X86::VMOVDQU64Z128rmkz, TB_NO_REVERSE },
- { X86::VMOVDQU64Z256rrkz, X86::VMOVDQU64Z256rmkz, TB_NO_REVERSE },
- { X86::VMOVDQU64Zrrkz, X86::VMOVDQU64Zrmkz, TB_NO_REVERSE },
- { X86::VMOVDQU8Z128rrkz, X86::VMOVDQU8Z128rmkz, TB_NO_REVERSE },
- { X86::VMOVDQU8Z256rrkz, X86::VMOVDQU8Z256rmkz, TB_NO_REVERSE },
- { X86::VMOVDQU8Zrrkz, X86::VMOVDQU8Zrmkz, TB_NO_REVERSE },
- { X86::VMOVLHPSZrr, X86::VMOVHPSZ128rm, TB_NO_REVERSE },
- { X86::VMOVLHPSrr, X86::VMOVHPSrm, TB_NO_REVERSE },
- { X86::VMOVSDZrr, X86::VMOVLPDZ128rm, TB_NO_REVERSE },
- { X86::VMOVSDrr, X86::VMOVLPDrm, TB_NO_REVERSE },
- { X86::VMOVSHDUPZ128rrkz, X86::VMOVSHDUPZ128rmkz, 0 },
- { X86::VMOVSHDUPZ256rrkz, X86::VMOVSHDUPZ256rmkz, 0 },
- { X86::VMOVSHDUPZrrkz, X86::VMOVSHDUPZrmkz, 0 },
- { X86::VMOVSLDUPZ128rrkz, X86::VMOVSLDUPZ128rmkz, 0 },
- { X86::VMOVSLDUPZ256rrkz, X86::VMOVSLDUPZ256rmkz, 0 },
- { X86::VMOVSLDUPZrrkz, X86::VMOVSLDUPZrmkz, 0 },
- { X86::VMOVUPDZ128rrkz, X86::VMOVUPDZ128rmkz, TB_NO_REVERSE },
- { X86::VMOVUPDZ256rrkz, X86::VMOVUPDZ256rmkz, TB_NO_REVERSE },
- { X86::VMOVUPDZrrkz, X86::VMOVUPDZrmkz, TB_NO_REVERSE },
- { X86::VMOVUPSZ128rrkz, X86::VMOVUPSZ128rmkz, TB_NO_REVERSE },
- { X86::VMOVUPSZ256rrkz, X86::VMOVUPSZ256rmkz, TB_NO_REVERSE },
- { X86::VMOVUPSZrrkz, X86::VMOVUPSZrmkz, TB_NO_REVERSE },
- { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, 0 },
- { X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 },
- { X86::VMULPDYrr, X86::VMULPDYrm, 0 },
- { X86::VMULPDZ128rr, X86::VMULPDZ128rm, 0 },
- { X86::VMULPDZ256rr, X86::VMULPDZ256rm, 0 },
- { X86::VMULPDZrr, X86::VMULPDZrm, 0 },
- { X86::VMULPDrr, X86::VMULPDrm, 0 },
- { X86::VMULPHZ128rr, X86::VMULPHZ128rm, 0 },
- { X86::VMULPHZ256rr, X86::VMULPHZ256rm, 0 },
- { X86::VMULPHZrr, X86::VMULPHZrm, 0 },
- { X86::VMULPSYrr, X86::VMULPSYrm, 0 },
- { X86::VMULPSZ128rr, X86::VMULPSZ128rm, 0 },
- { X86::VMULPSZ256rr, X86::VMULPSZ256rm, 0 },
- { X86::VMULPSZrr, X86::VMULPSZrm, 0 },
- { X86::VMULPSrr, X86::VMULPSrm, 0 },
- { X86::VMULSDZrr, X86::VMULSDZrm, 0 },
- { X86::VMULSDZrr_Int, X86::VMULSDZrm_Int, TB_NO_REVERSE },
- { X86::VMULSDrr, X86::VMULSDrm, 0 },
- { X86::VMULSDrr_Int, X86::VMULSDrm_Int, TB_NO_REVERSE },
- { X86::VMULSHZrr, X86::VMULSHZrm, 0 },
- { X86::VMULSHZrr_Int, X86::VMULSHZrm_Int, TB_NO_REVERSE },
- { X86::VMULSSZrr, X86::VMULSSZrm, 0 },
- { X86::VMULSSZrr_Int, X86::VMULSSZrm_Int, TB_NO_REVERSE },
- { X86::VMULSSrr, X86::VMULSSrm, 0 },
- { X86::VMULSSrr_Int, X86::VMULSSrm_Int, TB_NO_REVERSE },
- { X86::VORPDYrr, X86::VORPDYrm, 0 },
- { X86::VORPDZ128rr, X86::VORPDZ128rm, 0 },
- { X86::VORPDZ256rr, X86::VORPDZ256rm, 0 },
- { X86::VORPDZrr, X86::VORPDZrm, 0 },
- { X86::VORPDrr, X86::VORPDrm, 0 },
- { X86::VORPSYrr, X86::VORPSYrm, 0 },
- { X86::VORPSZ128rr, X86::VORPSZ128rm, 0 },
- { X86::VORPSZ256rr, X86::VORPSZ256rm, 0 },
- { X86::VORPSZrr, X86::VORPSZrm, 0 },
- { X86::VORPSrr, X86::VORPSrm, 0 },
- { X86::VP2INTERSECTDZ128rr, X86::VP2INTERSECTDZ128rm, 0 },
- { X86::VP2INTERSECTDZ256rr, X86::VP2INTERSECTDZ256rm, 0 },
- { X86::VP2INTERSECTDZrr, X86::VP2INTERSECTDZrm, 0 },
- { X86::VP2INTERSECTQZ128rr, X86::VP2INTERSECTQZ128rm, 0 },
- { X86::VP2INTERSECTQZ256rr, X86::VP2INTERSECTQZ256rm, 0 },
- { X86::VP2INTERSECTQZrr, X86::VP2INTERSECTQZrm, 0 },
- { X86::VPABSBZ128rrkz, X86::VPABSBZ128rmkz, 0 },
- { X86::VPABSBZ256rrkz, X86::VPABSBZ256rmkz, 0 },
- { X86::VPABSBZrrkz, X86::VPABSBZrmkz, 0 },
- { X86::VPABSDZ128rrkz, X86::VPABSDZ128rmkz, 0 },
- { X86::VPABSDZ256rrkz, X86::VPABSDZ256rmkz, 0 },
- { X86::VPABSDZrrkz, X86::VPABSDZrmkz, 0 },
- { X86::VPABSQZ128rrkz, X86::VPABSQZ128rmkz, 0 },
- { X86::VPABSQZ256rrkz, X86::VPABSQZ256rmkz, 0 },
- { X86::VPABSQZrrkz, X86::VPABSQZrmkz, 0 },
- { X86::VPABSWZ128rrkz, X86::VPABSWZ128rmkz, 0 },
- { X86::VPABSWZ256rrkz, X86::VPABSWZ256rmkz, 0 },
- { X86::VPABSWZrrkz, X86::VPABSWZrmkz, 0 },
- { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, 0 },
- { X86::VPACKSSDWZ128rr, X86::VPACKSSDWZ128rm, 0 },
- { X86::VPACKSSDWZ256rr, X86::VPACKSSDWZ256rm, 0 },
- { X86::VPACKSSDWZrr, X86::VPACKSSDWZrm, 0 },
- { X86::VPACKSSDWrr, X86::VPACKSSDWrm, 0 },
- { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, 0 },
- { X86::VPACKSSWBZ128rr, X86::VPACKSSWBZ128rm, 0 },
- { X86::VPACKSSWBZ256rr, X86::VPACKSSWBZ256rm, 0 },
- { X86::VPACKSSWBZrr, X86::VPACKSSWBZrm, 0 },
- { X86::VPACKSSWBrr, X86::VPACKSSWBrm, 0 },
- { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, 0 },
- { X86::VPACKUSDWZ128rr, X86::VPACKUSDWZ128rm, 0 },
- { X86::VPACKUSDWZ256rr, X86::VPACKUSDWZ256rm, 0 },
- { X86::VPACKUSDWZrr, X86::VPACKUSDWZrm, 0 },
- { X86::VPACKUSDWrr, X86::VPACKUSDWrm, 0 },
- { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, 0 },
- { X86::VPACKUSWBZ128rr, X86::VPACKUSWBZ128rm, 0 },
- { X86::VPACKUSWBZ256rr, X86::VPACKUSWBZ256rm, 0 },
- { X86::VPACKUSWBZrr, X86::VPACKUSWBZrm, 0 },
- { X86::VPACKUSWBrr, X86::VPACKUSWBrm, 0 },
- { X86::VPADDBYrr, X86::VPADDBYrm, 0 },
- { X86::VPADDBZ128rr, X86::VPADDBZ128rm, 0 },
- { X86::VPADDBZ256rr, X86::VPADDBZ256rm, 0 },
- { X86::VPADDBZrr, X86::VPADDBZrm, 0 },
- { X86::VPADDBrr, X86::VPADDBrm, 0 },
- { X86::VPADDDYrr, X86::VPADDDYrm, 0 },
- { X86::VPADDDZ128rr, X86::VPADDDZ128rm, 0 },
- { X86::VPADDDZ256rr, X86::VPADDDZ256rm, 0 },
- { X86::VPADDDZrr, X86::VPADDDZrm, 0 },
- { X86::VPADDDrr, X86::VPADDDrm, 0 },
- { X86::VPADDQYrr, X86::VPADDQYrm, 0 },
- { X86::VPADDQZ128rr, X86::VPADDQZ128rm, 0 },
- { X86::VPADDQZ256rr, X86::VPADDQZ256rm, 0 },
- { X86::VPADDQZrr, X86::VPADDQZrm, 0 },
- { X86::VPADDQrr, X86::VPADDQrm, 0 },
- { X86::VPADDSBYrr, X86::VPADDSBYrm, 0 },
- { X86::VPADDSBZ128rr, X86::VPADDSBZ128rm, 0 },
- { X86::VPADDSBZ256rr, X86::VPADDSBZ256rm, 0 },
- { X86::VPADDSBZrr, X86::VPADDSBZrm, 0 },
- { X86::VPADDSBrr, X86::VPADDSBrm, 0 },
- { X86::VPADDSWYrr, X86::VPADDSWYrm, 0 },
- { X86::VPADDSWZ128rr, X86::VPADDSWZ128rm, 0 },
- { X86::VPADDSWZ256rr, X86::VPADDSWZ256rm, 0 },
- { X86::VPADDSWZrr, X86::VPADDSWZrm, 0 },
- { X86::VPADDSWrr, X86::VPADDSWrm, 0 },
- { X86::VPADDUSBYrr, X86::VPADDUSBYrm, 0 },
- { X86::VPADDUSBZ128rr, X86::VPADDUSBZ128rm, 0 },
- { X86::VPADDUSBZ256rr, X86::VPADDUSBZ256rm, 0 },
- { X86::VPADDUSBZrr, X86::VPADDUSBZrm, 0 },
- { X86::VPADDUSBrr, X86::VPADDUSBrm, 0 },
- { X86::VPADDUSWYrr, X86::VPADDUSWYrm, 0 },
- { X86::VPADDUSWZ128rr, X86::VPADDUSWZ128rm, 0 },
- { X86::VPADDUSWZ256rr, X86::VPADDUSWZ256rm, 0 },
- { X86::VPADDUSWZrr, X86::VPADDUSWZrm, 0 },
- { X86::VPADDUSWrr, X86::VPADDUSWrm, 0 },
- { X86::VPADDWYrr, X86::VPADDWYrm, 0 },
- { X86::VPADDWZ128rr, X86::VPADDWZ128rm, 0 },
- { X86::VPADDWZ256rr, X86::VPADDWZ256rm, 0 },
- { X86::VPADDWZrr, X86::VPADDWZrm, 0 },
- { X86::VPADDWrr, X86::VPADDWrm, 0 },
- { X86::VPALIGNRYrri, X86::VPALIGNRYrmi, 0 },
- { X86::VPALIGNRZ128rri, X86::VPALIGNRZ128rmi, 0 },
- { X86::VPALIGNRZ256rri, X86::VPALIGNRZ256rmi, 0 },
- { X86::VPALIGNRZrri, X86::VPALIGNRZrmi, 0 },
- { X86::VPALIGNRrri, X86::VPALIGNRrmi, 0 },
- { X86::VPANDDZ128rr, X86::VPANDDZ128rm, 0 },
- { X86::VPANDDZ256rr, X86::VPANDDZ256rm, 0 },
- { X86::VPANDDZrr, X86::VPANDDZrm, 0 },
- { X86::VPANDNDZ128rr, X86::VPANDNDZ128rm, 0 },
- { X86::VPANDNDZ256rr, X86::VPANDNDZ256rm, 0 },
- { X86::VPANDNDZrr, X86::VPANDNDZrm, 0 },
- { X86::VPANDNQZ128rr, X86::VPANDNQZ128rm, 0 },
- { X86::VPANDNQZ256rr, X86::VPANDNQZ256rm, 0 },
- { X86::VPANDNQZrr, X86::VPANDNQZrm, 0 },
- { X86::VPANDNYrr, X86::VPANDNYrm, 0 },
- { X86::VPANDNrr, X86::VPANDNrm, 0 },
- { X86::VPANDQZ128rr, X86::VPANDQZ128rm, 0 },
- { X86::VPANDQZ256rr, X86::VPANDQZ256rm, 0 },
- { X86::VPANDQZrr, X86::VPANDQZrm, 0 },
- { X86::VPANDYrr, X86::VPANDYrm, 0 },
- { X86::VPANDrr, X86::VPANDrm, 0 },
- { X86::VPAVGBYrr, X86::VPAVGBYrm, 0 },
- { X86::VPAVGBZ128rr, X86::VPAVGBZ128rm, 0 },
- { X86::VPAVGBZ256rr, X86::VPAVGBZ256rm, 0 },
- { X86::VPAVGBZrr, X86::VPAVGBZrm, 0 },
- { X86::VPAVGBrr, X86::VPAVGBrm, 0 },
- { X86::VPAVGWYrr, X86::VPAVGWYrm, 0 },
- { X86::VPAVGWZ128rr, X86::VPAVGWZ128rm, 0 },
- { X86::VPAVGWZ256rr, X86::VPAVGWZ256rm, 0 },
- { X86::VPAVGWZrr, X86::VPAVGWZrm, 0 },
- { X86::VPAVGWrr, X86::VPAVGWrm, 0 },
- { X86::VPBLENDDYrri, X86::VPBLENDDYrmi, 0 },
- { X86::VPBLENDDrri, X86::VPBLENDDrmi, 0 },
- { X86::VPBLENDMBZ128rr, X86::VPBLENDMBZ128rm, 0 },
- { X86::VPBLENDMBZ256rr, X86::VPBLENDMBZ256rm, 0 },
- { X86::VPBLENDMBZrr, X86::VPBLENDMBZrm, 0 },
- { X86::VPBLENDMDZ128rr, X86::VPBLENDMDZ128rm, 0 },
- { X86::VPBLENDMDZ256rr, X86::VPBLENDMDZ256rm, 0 },
- { X86::VPBLENDMDZrr, X86::VPBLENDMDZrm, 0 },
- { X86::VPBLENDMQZ128rr, X86::VPBLENDMQZ128rm, 0 },
- { X86::VPBLENDMQZ256rr, X86::VPBLENDMQZ256rm, 0 },
- { X86::VPBLENDMQZrr, X86::VPBLENDMQZrm, 0 },
- { X86::VPBLENDMWZ128rr, X86::VPBLENDMWZ128rm, 0 },
- { X86::VPBLENDMWZ256rr, X86::VPBLENDMWZ256rm, 0 },
- { X86::VPBLENDMWZrr, X86::VPBLENDMWZrm, 0 },
- { X86::VPBLENDVBYrr, X86::VPBLENDVBYrm, 0 },
- { X86::VPBLENDVBrr, X86::VPBLENDVBrm, 0 },
- { X86::VPBLENDWYrri, X86::VPBLENDWYrmi, 0 },
- { X86::VPBLENDWrri, X86::VPBLENDWrmi, 0 },
- { X86::VPBROADCASTBZ128rrkz, X86::VPBROADCASTBZ128rmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTBZ256rrkz, X86::VPBROADCASTBZ256rmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTBZrrkz, X86::VPBROADCASTBZrmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTDZ128rrkz, X86::VPBROADCASTDZ128rmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTDZ256rrkz, X86::VPBROADCASTDZ256rmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTDZrrkz, X86::VPBROADCASTDZrmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTQZ128rrkz, X86::VPBROADCASTQZ128rmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTQZ256rrkz, X86::VPBROADCASTQZ256rmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTQZrrkz, X86::VPBROADCASTQZrmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTWZ128rrkz, X86::VPBROADCASTWZ128rmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTWZ256rrkz, X86::VPBROADCASTWZ256rmkz, TB_NO_REVERSE },
- { X86::VPBROADCASTWZrrkz, X86::VPBROADCASTWZrmkz, TB_NO_REVERSE },
- { X86::VPCLMULQDQYrr, X86::VPCLMULQDQYrm, 0 },
- { X86::VPCLMULQDQZ128rr, X86::VPCLMULQDQZ128rm, 0 },
- { X86::VPCLMULQDQZ256rr, X86::VPCLMULQDQZ256rm, 0 },
- { X86::VPCLMULQDQZrr, X86::VPCLMULQDQZrm, 0 },
- { X86::VPCLMULQDQrr, X86::VPCLMULQDQrm, 0 },
- { X86::VPCMOVYrrr, X86::VPCMOVYrmr, 0 },
- { X86::VPCMOVrrr, X86::VPCMOVrmr, 0 },
- { X86::VPCMPBZ128rri, X86::VPCMPBZ128rmi, 0 },
- { X86::VPCMPBZ256rri, X86::VPCMPBZ256rmi, 0 },
- { X86::VPCMPBZrri, X86::VPCMPBZrmi, 0 },
- { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmi, 0 },
- { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmi, 0 },
- { X86::VPCMPDZrri, X86::VPCMPDZrmi, 0 },
- { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, 0 },
- { X86::VPCMPEQBZ128rr, X86::VPCMPEQBZ128rm, 0 },
- { X86::VPCMPEQBZ256rr, X86::VPCMPEQBZ256rm, 0 },
- { X86::VPCMPEQBZrr, X86::VPCMPEQBZrm, 0 },
- { X86::VPCMPEQBrr, X86::VPCMPEQBrm, 0 },
- { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, 0 },
- { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rm, 0 },
- { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rm, 0 },
- { X86::VPCMPEQDZrr, X86::VPCMPEQDZrm, 0 },
- { X86::VPCMPEQDrr, X86::VPCMPEQDrm, 0 },
- { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, 0 },
- { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rm, 0 },
- { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rm, 0 },
- { X86::VPCMPEQQZrr, X86::VPCMPEQQZrm, 0 },
- { X86::VPCMPEQQrr, X86::VPCMPEQQrm, 0 },
- { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, 0 },
- { X86::VPCMPEQWZ128rr, X86::VPCMPEQWZ128rm, 0 },
- { X86::VPCMPEQWZ256rr, X86::VPCMPEQWZ256rm, 0 },
- { X86::VPCMPEQWZrr, X86::VPCMPEQWZrm, 0 },
- { X86::VPCMPEQWrr, X86::VPCMPEQWrm, 0 },
- { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, 0 },
- { X86::VPCMPGTBZ128rr, X86::VPCMPGTBZ128rm, 0 },
- { X86::VPCMPGTBZ256rr, X86::VPCMPGTBZ256rm, 0 },
- { X86::VPCMPGTBZrr, X86::VPCMPGTBZrm, 0 },
- { X86::VPCMPGTBrr, X86::VPCMPGTBrm, 0 },
- { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, 0 },
- { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rm, 0 },
- { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rm, 0 },
- { X86::VPCMPGTDZrr, X86::VPCMPGTDZrm, 0 },
- { X86::VPCMPGTDrr, X86::VPCMPGTDrm, 0 },
- { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, 0 },
- { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rm, 0 },
- { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rm, 0 },
- { X86::VPCMPGTQZrr, X86::VPCMPGTQZrm, 0 },
- { X86::VPCMPGTQrr, X86::VPCMPGTQrm, 0 },
- { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, 0 },
- { X86::VPCMPGTWZ128rr, X86::VPCMPGTWZ128rm, 0 },
- { X86::VPCMPGTWZ256rr, X86::VPCMPGTWZ256rm, 0 },
- { X86::VPCMPGTWZrr, X86::VPCMPGTWZrm, 0 },
- { X86::VPCMPGTWrr, X86::VPCMPGTWrm, 0 },
- { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmi, 0 },
- { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmi, 0 },
- { X86::VPCMPQZrri, X86::VPCMPQZrmi, 0 },
- { X86::VPCMPUBZ128rri, X86::VPCMPUBZ128rmi, 0 },
- { X86::VPCMPUBZ256rri, X86::VPCMPUBZ256rmi, 0 },
- { X86::VPCMPUBZrri, X86::VPCMPUBZrmi, 0 },
- { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmi, 0 },
- { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmi, 0 },
- { X86::VPCMPUDZrri, X86::VPCMPUDZrmi, 0 },
- { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmi, 0 },
- { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmi, 0 },
- { X86::VPCMPUQZrri, X86::VPCMPUQZrmi, 0 },
- { X86::VPCMPUWZ128rri, X86::VPCMPUWZ128rmi, 0 },
- { X86::VPCMPUWZ256rri, X86::VPCMPUWZ256rmi, 0 },
- { X86::VPCMPUWZrri, X86::VPCMPUWZrmi, 0 },
- { X86::VPCMPWZ128rri, X86::VPCMPWZ128rmi, 0 },
- { X86::VPCMPWZ256rri, X86::VPCMPWZ256rmi, 0 },
- { X86::VPCMPWZrri, X86::VPCMPWZrmi, 0 },
- { X86::VPCOMBri, X86::VPCOMBmi, 0 },
- { X86::VPCOMDri, X86::VPCOMDmi, 0 },
- { X86::VPCOMQri, X86::VPCOMQmi, 0 },
- { X86::VPCOMUBri, X86::VPCOMUBmi, 0 },
- { X86::VPCOMUDri, X86::VPCOMUDmi, 0 },
- { X86::VPCOMUQri, X86::VPCOMUQmi, 0 },
- { X86::VPCOMUWri, X86::VPCOMUWmi, 0 },
- { X86::VPCOMWri, X86::VPCOMWmi, 0 },
- { X86::VPCONFLICTDZ128rrkz, X86::VPCONFLICTDZ128rmkz, 0 },
- { X86::VPCONFLICTDZ256rrkz, X86::VPCONFLICTDZ256rmkz, 0 },
- { X86::VPCONFLICTDZrrkz, X86::VPCONFLICTDZrmkz, 0 },
- { X86::VPCONFLICTQZ128rrkz, X86::VPCONFLICTQZ128rmkz, 0 },
- { X86::VPCONFLICTQZ256rrkz, X86::VPCONFLICTQZ256rmkz, 0 },
- { X86::VPCONFLICTQZrrkz, X86::VPCONFLICTQZrmkz, 0 },
- { X86::VPERM2F128rr, X86::VPERM2F128rm, 0 },
- { X86::VPERM2I128rr, X86::VPERM2I128rm, 0 },
- { X86::VPERMBZ128rr, X86::VPERMBZ128rm, 0 },
- { X86::VPERMBZ256rr, X86::VPERMBZ256rm, 0 },
- { X86::VPERMBZrr, X86::VPERMBZrm, 0 },
- { X86::VPERMDYrr, X86::VPERMDYrm, 0 },
- { X86::VPERMDZ256rr, X86::VPERMDZ256rm, 0 },
- { X86::VPERMDZrr, X86::VPERMDZrm, 0 },
- { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYmr, 0 },
- { X86::VPERMIL2PDrr, X86::VPERMIL2PDmr, 0 },
- { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYmr, 0 },
- { X86::VPERMIL2PSrr, X86::VPERMIL2PSmr, 0 },
- { X86::VPERMILPDYrr, X86::VPERMILPDYrm, 0 },
- { X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz, 0 },
- { X86::VPERMILPDZ128rr, X86::VPERMILPDZ128rm, 0 },
- { X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz, 0 },
- { X86::VPERMILPDZ256rr, X86::VPERMILPDZ256rm, 0 },
- { X86::VPERMILPDZrikz, X86::VPERMILPDZmikz, 0 },
- { X86::VPERMILPDZrr, X86::VPERMILPDZrm, 0 },
- { X86::VPERMILPDrr, X86::VPERMILPDrm, 0 },
- { X86::VPERMILPSYrr, X86::VPERMILPSYrm, 0 },
- { X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz, 0 },
- { X86::VPERMILPSZ128rr, X86::VPERMILPSZ128rm, 0 },
- { X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz, 0 },
- { X86::VPERMILPSZ256rr, X86::VPERMILPSZ256rm, 0 },
- { X86::VPERMILPSZrikz, X86::VPERMILPSZmikz, 0 },
- { X86::VPERMILPSZrr, X86::VPERMILPSZrm, 0 },
- { X86::VPERMILPSrr, X86::VPERMILPSrm, 0 },
- { X86::VPERMPDZ256rikz, X86::VPERMPDZ256mikz, 0 },
- { X86::VPERMPDZ256rr, X86::VPERMPDZ256rm, 0 },
- { X86::VPERMPDZrikz, X86::VPERMPDZmikz, 0 },
- { X86::VPERMPDZrr, X86::VPERMPDZrm, 0 },
- { X86::VPERMPSYrr, X86::VPERMPSYrm, 0 },
- { X86::VPERMPSZ256rr, X86::VPERMPSZ256rm, 0 },
- { X86::VPERMPSZrr, X86::VPERMPSZrm, 0 },
- { X86::VPERMQZ256rikz, X86::VPERMQZ256mikz, 0 },
- { X86::VPERMQZ256rr, X86::VPERMQZ256rm, 0 },
- { X86::VPERMQZrikz, X86::VPERMQZmikz, 0 },
- { X86::VPERMQZrr, X86::VPERMQZrm, 0 },
- { X86::VPERMWZ128rr, X86::VPERMWZ128rm, 0 },
- { X86::VPERMWZ256rr, X86::VPERMWZ256rm, 0 },
- { X86::VPERMWZrr, X86::VPERMWZrm, 0 },
- { X86::VPEXPANDBZ128rrkz, X86::VPEXPANDBZ128rmkz, TB_NO_REVERSE },
- { X86::VPEXPANDBZ256rrkz, X86::VPEXPANDBZ256rmkz, TB_NO_REVERSE },
- { X86::VPEXPANDBZrrkz, X86::VPEXPANDBZrmkz, TB_NO_REVERSE },
- { X86::VPEXPANDDZ128rrkz, X86::VPEXPANDDZ128rmkz, TB_NO_REVERSE },
- { X86::VPEXPANDDZ256rrkz, X86::VPEXPANDDZ256rmkz, TB_NO_REVERSE },
- { X86::VPEXPANDDZrrkz, X86::VPEXPANDDZrmkz, TB_NO_REVERSE },
- { X86::VPEXPANDQZ128rrkz, X86::VPEXPANDQZ128rmkz, TB_NO_REVERSE },
- { X86::VPEXPANDQZ256rrkz, X86::VPEXPANDQZ256rmkz, TB_NO_REVERSE },
- { X86::VPEXPANDQZrrkz, X86::VPEXPANDQZrmkz, TB_NO_REVERSE },
- { X86::VPEXPANDWZ128rrkz, X86::VPEXPANDWZ128rmkz, TB_NO_REVERSE },
- { X86::VPEXPANDWZ256rrkz, X86::VPEXPANDWZ256rmkz, TB_NO_REVERSE },
- { X86::VPEXPANDWZrrkz, X86::VPEXPANDWZrmkz, TB_NO_REVERSE },
- { X86::VPHADDDYrr, X86::VPHADDDYrm, 0 },
- { X86::VPHADDDrr, X86::VPHADDDrm, 0 },
- { X86::VPHADDSWYrr, X86::VPHADDSWYrm, 0 },
- { X86::VPHADDSWrr, X86::VPHADDSWrm, 0 },
- { X86::VPHADDWYrr, X86::VPHADDWYrm, 0 },
- { X86::VPHADDWrr, X86::VPHADDWrm, 0 },
- { X86::VPHSUBDYrr, X86::VPHSUBDYrm, 0 },
- { X86::VPHSUBDrr, X86::VPHSUBDrm, 0 },
- { X86::VPHSUBSWYrr, X86::VPHSUBSWYrm, 0 },
- { X86::VPHSUBSWrr, X86::VPHSUBSWrm, 0 },
- { X86::VPHSUBWYrr, X86::VPHSUBWYrm, 0 },
- { X86::VPHSUBWrr, X86::VPHSUBWrm, 0 },
- { X86::VPINSRBZrr, X86::VPINSRBZrm, TB_NO_REVERSE },
- { X86::VPINSRBrr, X86::VPINSRBrm, TB_NO_REVERSE },
- { X86::VPINSRDZrr, X86::VPINSRDZrm, 0 },
- { X86::VPINSRDrr, X86::VPINSRDrm, 0 },
- { X86::VPINSRQZrr, X86::VPINSRQZrm, 0 },
- { X86::VPINSRQrr, X86::VPINSRQrm, 0 },
- { X86::VPINSRWZrr, X86::VPINSRWZrm, TB_NO_REVERSE },
- { X86::VPINSRWrr, X86::VPINSRWrm, TB_NO_REVERSE },
- { X86::VPLZCNTDZ128rrkz, X86::VPLZCNTDZ128rmkz, 0 },
- { X86::VPLZCNTDZ256rrkz, X86::VPLZCNTDZ256rmkz, 0 },
- { X86::VPLZCNTDZrrkz, X86::VPLZCNTDZrmkz, 0 },
- { X86::VPLZCNTQZ128rrkz, X86::VPLZCNTQZ128rmkz, 0 },
- { X86::VPLZCNTQZ256rrkz, X86::VPLZCNTQZ256rmkz, 0 },
- { X86::VPLZCNTQZrrkz, X86::VPLZCNTQZrmkz, 0 },
- { X86::VPMACSDDrr, X86::VPMACSDDrm, 0 },
- { X86::VPMACSDQHrr, X86::VPMACSDQHrm, 0 },
- { X86::VPMACSDQLrr, X86::VPMACSDQLrm, 0 },
- { X86::VPMACSSDDrr, X86::VPMACSSDDrm, 0 },
- { X86::VPMACSSDQHrr, X86::VPMACSSDQHrm, 0 },
- { X86::VPMACSSDQLrr, X86::VPMACSSDQLrm, 0 },
- { X86::VPMACSSWDrr, X86::VPMACSSWDrm, 0 },
- { X86::VPMACSSWWrr, X86::VPMACSSWWrm, 0 },
- { X86::VPMACSWDrr, X86::VPMACSWDrm, 0 },
- { X86::VPMACSWWrr, X86::VPMACSWWrm, 0 },
- { X86::VPMADCSSWDrr, X86::VPMADCSSWDrm, 0 },
- { X86::VPMADCSWDrr, X86::VPMADCSWDrm, 0 },
- { X86::VPMADDUBSWYrr, X86::VPMADDUBSWYrm, 0 },
- { X86::VPMADDUBSWZ128rr, X86::VPMADDUBSWZ128rm, 0 },
- { X86::VPMADDUBSWZ256rr, X86::VPMADDUBSWZ256rm, 0 },
- { X86::VPMADDUBSWZrr, X86::VPMADDUBSWZrm, 0 },
- { X86::VPMADDUBSWrr, X86::VPMADDUBSWrm, 0 },
- { X86::VPMADDWDYrr, X86::VPMADDWDYrm, 0 },
- { X86::VPMADDWDZ128rr, X86::VPMADDWDZ128rm, 0 },
- { X86::VPMADDWDZ256rr, X86::VPMADDWDZ256rm, 0 },
- { X86::VPMADDWDZrr, X86::VPMADDWDZrm, 0 },
- { X86::VPMADDWDrr, X86::VPMADDWDrm, 0 },
- { X86::VPMAXSBYrr, X86::VPMAXSBYrm, 0 },
- { X86::VPMAXSBZ128rr, X86::VPMAXSBZ128rm, 0 },
- { X86::VPMAXSBZ256rr, X86::VPMAXSBZ256rm, 0 },
- { X86::VPMAXSBZrr, X86::VPMAXSBZrm, 0 },
- { X86::VPMAXSBrr, X86::VPMAXSBrm, 0 },
- { X86::VPMAXSDYrr, X86::VPMAXSDYrm, 0 },
- { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rm, 0 },
- { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rm, 0 },
- { X86::VPMAXSDZrr, X86::VPMAXSDZrm, 0 },
- { X86::VPMAXSDrr, X86::VPMAXSDrm, 0 },
- { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rm, 0 },
- { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rm, 0 },
- { X86::VPMAXSQZrr, X86::VPMAXSQZrm, 0 },
- { X86::VPMAXSWYrr, X86::VPMAXSWYrm, 0 },
- { X86::VPMAXSWZ128rr, X86::VPMAXSWZ128rm, 0 },
- { X86::VPMAXSWZ256rr, X86::VPMAXSWZ256rm, 0 },
- { X86::VPMAXSWZrr, X86::VPMAXSWZrm, 0 },
- { X86::VPMAXSWrr, X86::VPMAXSWrm, 0 },
- { X86::VPMAXUBYrr, X86::VPMAXUBYrm, 0 },
- { X86::VPMAXUBZ128rr, X86::VPMAXUBZ128rm, 0 },
- { X86::VPMAXUBZ256rr, X86::VPMAXUBZ256rm, 0 },
- { X86::VPMAXUBZrr, X86::VPMAXUBZrm, 0 },
- { X86::VPMAXUBrr, X86::VPMAXUBrm, 0 },
- { X86::VPMAXUDYrr, X86::VPMAXUDYrm, 0 },
- { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rm, 0 },
- { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rm, 0 },
- { X86::VPMAXUDZrr, X86::VPMAXUDZrm, 0 },
- { X86::VPMAXUDrr, X86::VPMAXUDrm, 0 },
- { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rm, 0 },
- { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rm, 0 },
- { X86::VPMAXUQZrr, X86::VPMAXUQZrm, 0 },
- { X86::VPMAXUWYrr, X86::VPMAXUWYrm, 0 },
- { X86::VPMAXUWZ128rr, X86::VPMAXUWZ128rm, 0 },
- { X86::VPMAXUWZ256rr, X86::VPMAXUWZ256rm, 0 },
- { X86::VPMAXUWZrr, X86::VPMAXUWZrm, 0 },
- { X86::VPMAXUWrr, X86::VPMAXUWrm, 0 },
- { X86::VPMINSBYrr, X86::VPMINSBYrm, 0 },
- { X86::VPMINSBZ128rr, X86::VPMINSBZ128rm, 0 },
- { X86::VPMINSBZ256rr, X86::VPMINSBZ256rm, 0 },
- { X86::VPMINSBZrr, X86::VPMINSBZrm, 0 },
- { X86::VPMINSBrr, X86::VPMINSBrm, 0 },
- { X86::VPMINSDYrr, X86::VPMINSDYrm, 0 },
- { X86::VPMINSDZ128rr, X86::VPMINSDZ128rm, 0 },
- { X86::VPMINSDZ256rr, X86::VPMINSDZ256rm, 0 },
- { X86::VPMINSDZrr, X86::VPMINSDZrm, 0 },
- { X86::VPMINSDrr, X86::VPMINSDrm, 0 },
- { X86::VPMINSQZ128rr, X86::VPMINSQZ128rm, 0 },
- { X86::VPMINSQZ256rr, X86::VPMINSQZ256rm, 0 },
- { X86::VPMINSQZrr, X86::VPMINSQZrm, 0 },
- { X86::VPMINSWYrr, X86::VPMINSWYrm, 0 },
- { X86::VPMINSWZ128rr, X86::VPMINSWZ128rm, 0 },
- { X86::VPMINSWZ256rr, X86::VPMINSWZ256rm, 0 },
- { X86::VPMINSWZrr, X86::VPMINSWZrm, 0 },
- { X86::VPMINSWrr, X86::VPMINSWrm, 0 },
- { X86::VPMINUBYrr, X86::VPMINUBYrm, 0 },
- { X86::VPMINUBZ128rr, X86::VPMINUBZ128rm, 0 },
- { X86::VPMINUBZ256rr, X86::VPMINUBZ256rm, 0 },
- { X86::VPMINUBZrr, X86::VPMINUBZrm, 0 },
- { X86::VPMINUBrr, X86::VPMINUBrm, 0 },
- { X86::VPMINUDYrr, X86::VPMINUDYrm, 0 },
- { X86::VPMINUDZ128rr, X86::VPMINUDZ128rm, 0 },
- { X86::VPMINUDZ256rr, X86::VPMINUDZ256rm, 0 },
- { X86::VPMINUDZrr, X86::VPMINUDZrm, 0 },
- { X86::VPMINUDrr, X86::VPMINUDrm, 0 },
- { X86::VPMINUQZ128rr, X86::VPMINUQZ128rm, 0 },
- { X86::VPMINUQZ256rr, X86::VPMINUQZ256rm, 0 },
- { X86::VPMINUQZrr, X86::VPMINUQZrm, 0 },
- { X86::VPMINUWYrr, X86::VPMINUWYrm, 0 },
- { X86::VPMINUWZ128rr, X86::VPMINUWZ128rm, 0 },
- { X86::VPMINUWZ256rr, X86::VPMINUWZ256rm, 0 },
- { X86::VPMINUWZrr, X86::VPMINUWZrm, 0 },
- { X86::VPMINUWrr, X86::VPMINUWrm, 0 },
- { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXBDZrrkz, X86::VPMOVSXBDZrmkz, 0 },
- { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXBQZrrkz, X86::VPMOVSXBQZrmkz, TB_NO_REVERSE },
- { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz, 0 },
- { X86::VPMOVSXBWZrrkz, X86::VPMOVSXBWZrmkz, 0 },
- { X86::VPMOVSXDQZ128rrkz, X86::VPMOVSXDQZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXDQZ256rrkz, X86::VPMOVSXDQZ256rmkz, 0 },
- { X86::VPMOVSXDQZrrkz, X86::VPMOVSXDQZrmkz, 0 },
- { X86::VPMOVSXWDZ128rrkz, X86::VPMOVSXWDZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXWDZ256rrkz, X86::VPMOVSXWDZ256rmkz, 0 },
- { X86::VPMOVSXWDZrrkz, X86::VPMOVSXWDZrmkz, 0 },
- { X86::VPMOVSXWQZ128rrkz, X86::VPMOVSXWQZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXWQZ256rrkz, X86::VPMOVSXWQZ256rmkz, TB_NO_REVERSE },
- { X86::VPMOVSXWQZrrkz, X86::VPMOVSXWQZrmkz, 0 },
- { X86::VPMOVZXBDZ128rrkz, X86::VPMOVZXBDZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXBDZ256rrkz, X86::VPMOVZXBDZ256rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXBDZrrkz, X86::VPMOVZXBDZrmkz, 0 },
- { X86::VPMOVZXBQZ128rrkz, X86::VPMOVZXBQZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXBQZ256rrkz, X86::VPMOVZXBQZ256rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXBQZrrkz, X86::VPMOVZXBQZrmkz, TB_NO_REVERSE },
- { X86::VPMOVZXBWZ128rrkz, X86::VPMOVZXBWZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXBWZ256rrkz, X86::VPMOVZXBWZ256rmkz, 0 },
- { X86::VPMOVZXBWZrrkz, X86::VPMOVZXBWZrmkz, 0 },
- { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz, 0 },
- { X86::VPMOVZXDQZrrkz, X86::VPMOVZXDQZrmkz, 0 },
- { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz, 0 },
- { X86::VPMOVZXWDZrrkz, X86::VPMOVZXWDZrmkz, 0 },
- { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz, TB_NO_REVERSE },
- { X86::VPMOVZXWQZrrkz, X86::VPMOVZXWQZrmkz, 0 },
- { X86::VPMULDQYrr, X86::VPMULDQYrm, 0 },
- { X86::VPMULDQZ128rr, X86::VPMULDQZ128rm, 0 },
- { X86::VPMULDQZ256rr, X86::VPMULDQZ256rm, 0 },
- { X86::VPMULDQZrr, X86::VPMULDQZrm, 0 },
- { X86::VPMULDQrr, X86::VPMULDQrm, 0 },
- { X86::VPMULHRSWYrr, X86::VPMULHRSWYrm, 0 },
- { X86::VPMULHRSWZ128rr, X86::VPMULHRSWZ128rm, 0 },
- { X86::VPMULHRSWZ256rr, X86::VPMULHRSWZ256rm, 0 },
- { X86::VPMULHRSWZrr, X86::VPMULHRSWZrm, 0 },
- { X86::VPMULHRSWrr, X86::VPMULHRSWrm, 0 },
- { X86::VPMULHUWYrr, X86::VPMULHUWYrm, 0 },
- { X86::VPMULHUWZ128rr, X86::VPMULHUWZ128rm, 0 },
- { X86::VPMULHUWZ256rr, X86::VPMULHUWZ256rm, 0 },
- { X86::VPMULHUWZrr, X86::VPMULHUWZrm, 0 },
- { X86::VPMULHUWrr, X86::VPMULHUWrm, 0 },
- { X86::VPMULHWYrr, X86::VPMULHWYrm, 0 },
- { X86::VPMULHWZ128rr, X86::VPMULHWZ128rm, 0 },
- { X86::VPMULHWZ256rr, X86::VPMULHWZ256rm, 0 },
- { X86::VPMULHWZrr, X86::VPMULHWZrm, 0 },
- { X86::VPMULHWrr, X86::VPMULHWrm, 0 },
- { X86::VPMULLDYrr, X86::VPMULLDYrm, 0 },
- { X86::VPMULLDZ128rr, X86::VPMULLDZ128rm, 0 },
- { X86::VPMULLDZ256rr, X86::VPMULLDZ256rm, 0 },
- { X86::VPMULLDZrr, X86::VPMULLDZrm, 0 },
- { X86::VPMULLDrr, X86::VPMULLDrm, 0 },
- { X86::VPMULLQZ128rr, X86::VPMULLQZ128rm, 0 },
- { X86::VPMULLQZ256rr, X86::VPMULLQZ256rm, 0 },
- { X86::VPMULLQZrr, X86::VPMULLQZrm, 0 },
- { X86::VPMULLWYrr, X86::VPMULLWYrm, 0 },
- { X86::VPMULLWZ128rr, X86::VPMULLWZ128rm, 0 },
- { X86::VPMULLWZ256rr, X86::VPMULLWZ256rm, 0 },
- { X86::VPMULLWZrr, X86::VPMULLWZrm, 0 },
- { X86::VPMULLWrr, X86::VPMULLWrm, 0 },
- { X86::VPMULTISHIFTQBZ128rr, X86::VPMULTISHIFTQBZ128rm, 0 },
- { X86::VPMULTISHIFTQBZ256rr, X86::VPMULTISHIFTQBZ256rm, 0 },
- { X86::VPMULTISHIFTQBZrr, X86::VPMULTISHIFTQBZrm, 0 },
- { X86::VPMULUDQYrr, X86::VPMULUDQYrm, 0 },
- { X86::VPMULUDQZ128rr, X86::VPMULUDQZ128rm, 0 },
- { X86::VPMULUDQZ256rr, X86::VPMULUDQZ256rm, 0 },
- { X86::VPMULUDQZrr, X86::VPMULUDQZrm, 0 },
- { X86::VPMULUDQrr, X86::VPMULUDQrm, 0 },
- { X86::VPOPCNTBZ128rrkz, X86::VPOPCNTBZ128rmkz, 0 },
- { X86::VPOPCNTBZ256rrkz, X86::VPOPCNTBZ256rmkz, 0 },
- { X86::VPOPCNTBZrrkz, X86::VPOPCNTBZrmkz, 0 },
- { X86::VPOPCNTDZ128rrkz, X86::VPOPCNTDZ128rmkz, 0 },
- { X86::VPOPCNTDZ256rrkz, X86::VPOPCNTDZ256rmkz, 0 },
- { X86::VPOPCNTDZrrkz, X86::VPOPCNTDZrmkz, 0 },
- { X86::VPOPCNTQZ128rrkz, X86::VPOPCNTQZ128rmkz, 0 },
- { X86::VPOPCNTQZ256rrkz, X86::VPOPCNTQZ256rmkz, 0 },
- { X86::VPOPCNTQZrrkz, X86::VPOPCNTQZrmkz, 0 },
- { X86::VPOPCNTWZ128rrkz, X86::VPOPCNTWZ128rmkz, 0 },
- { X86::VPOPCNTWZ256rrkz, X86::VPOPCNTWZ256rmkz, 0 },
- { X86::VPOPCNTWZrrkz, X86::VPOPCNTWZrmkz, 0 },
- { X86::VPORDZ128rr, X86::VPORDZ128rm, 0 },
- { X86::VPORDZ256rr, X86::VPORDZ256rm, 0 },
- { X86::VPORDZrr, X86::VPORDZrm, 0 },
- { X86::VPORQZ128rr, X86::VPORQZ128rm, 0 },
- { X86::VPORQZ256rr, X86::VPORQZ256rm, 0 },
- { X86::VPORQZrr, X86::VPORQZrm, 0 },
- { X86::VPORYrr, X86::VPORYrm, 0 },
- { X86::VPORrr, X86::VPORrm, 0 },
- { X86::VPPERMrrr, X86::VPPERMrmr, 0 },
- { X86::VPROLDZ128rikz, X86::VPROLDZ128mikz, 0 },
- { X86::VPROLDZ256rikz, X86::VPROLDZ256mikz, 0 },
- { X86::VPROLDZrikz, X86::VPROLDZmikz, 0 },
- { X86::VPROLQZ128rikz, X86::VPROLQZ128mikz, 0 },
- { X86::VPROLQZ256rikz, X86::VPROLQZ256mikz, 0 },
- { X86::VPROLQZrikz, X86::VPROLQZmikz, 0 },
- { X86::VPROLVDZ128rr, X86::VPROLVDZ128rm, 0 },
- { X86::VPROLVDZ256rr, X86::VPROLVDZ256rm, 0 },
- { X86::VPROLVDZrr, X86::VPROLVDZrm, 0 },
- { X86::VPROLVQZ128rr, X86::VPROLVQZ128rm, 0 },
- { X86::VPROLVQZ256rr, X86::VPROLVQZ256rm, 0 },
- { X86::VPROLVQZrr, X86::VPROLVQZrm, 0 },
- { X86::VPRORDZ128rikz, X86::VPRORDZ128mikz, 0 },
- { X86::VPRORDZ256rikz, X86::VPRORDZ256mikz, 0 },
- { X86::VPRORDZrikz, X86::VPRORDZmikz, 0 },
- { X86::VPRORQZ128rikz, X86::VPRORQZ128mikz, 0 },
- { X86::VPRORQZ256rikz, X86::VPRORQZ256mikz, 0 },
- { X86::VPRORQZrikz, X86::VPRORQZmikz, 0 },
- { X86::VPRORVDZ128rr, X86::VPRORVDZ128rm, 0 },
- { X86::VPRORVDZ256rr, X86::VPRORVDZ256rm, 0 },
- { X86::VPRORVDZrr, X86::VPRORVDZrm, 0 },
- { X86::VPRORVQZ128rr, X86::VPRORVQZ128rm, 0 },
- { X86::VPRORVQZ256rr, X86::VPRORVQZ256rm, 0 },
- { X86::VPRORVQZrr, X86::VPRORVQZrm, 0 },
- { X86::VPROTBrr, X86::VPROTBrm, 0 },
- { X86::VPROTDrr, X86::VPROTDrm, 0 },
- { X86::VPROTQrr, X86::VPROTQrm, 0 },
- { X86::VPROTWrr, X86::VPROTWrm, 0 },
- { X86::VPSADBWYrr, X86::VPSADBWYrm, 0 },
- { X86::VPSADBWZ128rr, X86::VPSADBWZ128rm, 0 },
- { X86::VPSADBWZ256rr, X86::VPSADBWZ256rm, 0 },
- { X86::VPSADBWZrr, X86::VPSADBWZrm, 0 },
- { X86::VPSADBWrr, X86::VPSADBWrm, 0 },
- { X86::VPSHABrr, X86::VPSHABrm, 0 },
- { X86::VPSHADrr, X86::VPSHADrm, 0 },
- { X86::VPSHAQrr, X86::VPSHAQrm, 0 },
- { X86::VPSHAWrr, X86::VPSHAWrm, 0 },
- { X86::VPSHLBrr, X86::VPSHLBrm, 0 },
- { X86::VPSHLDDZ128rri, X86::VPSHLDDZ128rmi, 0 },
- { X86::VPSHLDDZ256rri, X86::VPSHLDDZ256rmi, 0 },
- { X86::VPSHLDDZrri, X86::VPSHLDDZrmi, 0 },
- { X86::VPSHLDQZ128rri, X86::VPSHLDQZ128rmi, 0 },
- { X86::VPSHLDQZ256rri, X86::VPSHLDQZ256rmi, 0 },
- { X86::VPSHLDQZrri, X86::VPSHLDQZrmi, 0 },
- { X86::VPSHLDWZ128rri, X86::VPSHLDWZ128rmi, 0 },
- { X86::VPSHLDWZ256rri, X86::VPSHLDWZ256rmi, 0 },
- { X86::VPSHLDWZrri, X86::VPSHLDWZrmi, 0 },
- { X86::VPSHLDrr, X86::VPSHLDrm, 0 },
- { X86::VPSHLQrr, X86::VPSHLQrm, 0 },
- { X86::VPSHLWrr, X86::VPSHLWrm, 0 },
- { X86::VPSHRDDZ128rri, X86::VPSHRDDZ128rmi, 0 },
- { X86::VPSHRDDZ256rri, X86::VPSHRDDZ256rmi, 0 },
- { X86::VPSHRDDZrri, X86::VPSHRDDZrmi, 0 },
- { X86::VPSHRDQZ128rri, X86::VPSHRDQZ128rmi, 0 },
- { X86::VPSHRDQZ256rri, X86::VPSHRDQZ256rmi, 0 },
- { X86::VPSHRDQZrri, X86::VPSHRDQZrmi, 0 },
- { X86::VPSHRDWZ128rri, X86::VPSHRDWZ128rmi, 0 },
- { X86::VPSHRDWZ256rri, X86::VPSHRDWZ256rmi, 0 },
- { X86::VPSHRDWZrri, X86::VPSHRDWZrmi, 0 },
- { X86::VPSHUFBITQMBZ128rr, X86::VPSHUFBITQMBZ128rm, 0 },
- { X86::VPSHUFBITQMBZ256rr, X86::VPSHUFBITQMBZ256rm, 0 },
- { X86::VPSHUFBITQMBZrr, X86::VPSHUFBITQMBZrm, 0 },
- { X86::VPSHUFBYrr, X86::VPSHUFBYrm, 0 },
- { X86::VPSHUFBZ128rr, X86::VPSHUFBZ128rm, 0 },
- { X86::VPSHUFBZ256rr, X86::VPSHUFBZ256rm, 0 },
- { X86::VPSHUFBZrr, X86::VPSHUFBZrm, 0 },
- { X86::VPSHUFBrr, X86::VPSHUFBrm, 0 },
- { X86::VPSHUFDZ128rikz, X86::VPSHUFDZ128mikz, 0 },
- { X86::VPSHUFDZ256rikz, X86::VPSHUFDZ256mikz, 0 },
- { X86::VPSHUFDZrikz, X86::VPSHUFDZmikz, 0 },
- { X86::VPSHUFHWZ128rikz, X86::VPSHUFHWZ128mikz, 0 },
- { X86::VPSHUFHWZ256rikz, X86::VPSHUFHWZ256mikz, 0 },
- { X86::VPSHUFHWZrikz, X86::VPSHUFHWZmikz, 0 },
- { X86::VPSHUFLWZ128rikz, X86::VPSHUFLWZ128mikz, 0 },
- { X86::VPSHUFLWZ256rikz, X86::VPSHUFLWZ256mikz, 0 },
- { X86::VPSHUFLWZrikz, X86::VPSHUFLWZmikz, 0 },
- { X86::VPSIGNBYrr, X86::VPSIGNBYrm, 0 },
- { X86::VPSIGNBrr, X86::VPSIGNBrm, 0 },
- { X86::VPSIGNDYrr, X86::VPSIGNDYrm, 0 },
- { X86::VPSIGNDrr, X86::VPSIGNDrm, 0 },
- { X86::VPSIGNWYrr, X86::VPSIGNWYrm, 0 },
- { X86::VPSIGNWrr, X86::VPSIGNWrm, 0 },
- { X86::VPSLLDYrr, X86::VPSLLDYrm, 0 },
- { X86::VPSLLDZ128rikz, X86::VPSLLDZ128mikz, 0 },
- { X86::VPSLLDZ128rr, X86::VPSLLDZ128rm, 0 },
- { X86::VPSLLDZ256rikz, X86::VPSLLDZ256mikz, 0 },
- { X86::VPSLLDZ256rr, X86::VPSLLDZ256rm, 0 },
- { X86::VPSLLDZrikz, X86::VPSLLDZmikz, 0 },
- { X86::VPSLLDZrr, X86::VPSLLDZrm, 0 },
- { X86::VPSLLDrr, X86::VPSLLDrm, 0 },
- { X86::VPSLLQYrr, X86::VPSLLQYrm, 0 },
- { X86::VPSLLQZ128rikz, X86::VPSLLQZ128mikz, 0 },
- { X86::VPSLLQZ128rr, X86::VPSLLQZ128rm, 0 },
- { X86::VPSLLQZ256rikz, X86::VPSLLQZ256mikz, 0 },
- { X86::VPSLLQZ256rr, X86::VPSLLQZ256rm, 0 },
- { X86::VPSLLQZrikz, X86::VPSLLQZmikz, 0 },
- { X86::VPSLLQZrr, X86::VPSLLQZrm, 0 },
- { X86::VPSLLQrr, X86::VPSLLQrm, 0 },
- { X86::VPSLLVDYrr, X86::VPSLLVDYrm, 0 },
- { X86::VPSLLVDZ128rr, X86::VPSLLVDZ128rm, 0 },
- { X86::VPSLLVDZ256rr, X86::VPSLLVDZ256rm, 0 },
- { X86::VPSLLVDZrr, X86::VPSLLVDZrm, 0 },
- { X86::VPSLLVDrr, X86::VPSLLVDrm, 0 },
- { X86::VPSLLVQYrr, X86::VPSLLVQYrm, 0 },
- { X86::VPSLLVQZ128rr, X86::VPSLLVQZ128rm, 0 },
- { X86::VPSLLVQZ256rr, X86::VPSLLVQZ256rm, 0 },
- { X86::VPSLLVQZrr, X86::VPSLLVQZrm, 0 },
- { X86::VPSLLVQrr, X86::VPSLLVQrm, 0 },
- { X86::VPSLLVWZ128rr, X86::VPSLLVWZ128rm, 0 },
- { X86::VPSLLVWZ256rr, X86::VPSLLVWZ256rm, 0 },
- { X86::VPSLLVWZrr, X86::VPSLLVWZrm, 0 },
- { X86::VPSLLWYrr, X86::VPSLLWYrm, 0 },
- { X86::VPSLLWZ128rikz, X86::VPSLLWZ128mikz, 0 },
- { X86::VPSLLWZ128rr, X86::VPSLLWZ128rm, 0 },
- { X86::VPSLLWZ256rikz, X86::VPSLLWZ256mikz, 0 },
- { X86::VPSLLWZ256rr, X86::VPSLLWZ256rm, 0 },
- { X86::VPSLLWZrikz, X86::VPSLLWZmikz, 0 },
- { X86::VPSLLWZrr, X86::VPSLLWZrm, 0 },
- { X86::VPSLLWrr, X86::VPSLLWrm, 0 },
- { X86::VPSRADYrr, X86::VPSRADYrm, 0 },
- { X86::VPSRADZ128rikz, X86::VPSRADZ128mikz, 0 },
- { X86::VPSRADZ128rr, X86::VPSRADZ128rm, 0 },
- { X86::VPSRADZ256rikz, X86::VPSRADZ256mikz, 0 },
- { X86::VPSRADZ256rr, X86::VPSRADZ256rm, 0 },
- { X86::VPSRADZrikz, X86::VPSRADZmikz, 0 },
- { X86::VPSRADZrr, X86::VPSRADZrm, 0 },
- { X86::VPSRADrr, X86::VPSRADrm, 0 },
- { X86::VPSRAQZ128rikz, X86::VPSRAQZ128mikz, 0 },
- { X86::VPSRAQZ128rr, X86::VPSRAQZ128rm, 0 },
- { X86::VPSRAQZ256rikz, X86::VPSRAQZ256mikz, 0 },
- { X86::VPSRAQZ256rr, X86::VPSRAQZ256rm, 0 },
- { X86::VPSRAQZrikz, X86::VPSRAQZmikz, 0 },
- { X86::VPSRAQZrr, X86::VPSRAQZrm, 0 },
- { X86::VPSRAVDYrr, X86::VPSRAVDYrm, 0 },
- { X86::VPSRAVDZ128rr, X86::VPSRAVDZ128rm, 0 },
- { X86::VPSRAVDZ256rr, X86::VPSRAVDZ256rm, 0 },
- { X86::VPSRAVDZrr, X86::VPSRAVDZrm, 0 },
- { X86::VPSRAVDrr, X86::VPSRAVDrm, 0 },
- { X86::VPSRAVQZ128rr, X86::VPSRAVQZ128rm, 0 },
- { X86::VPSRAVQZ256rr, X86::VPSRAVQZ256rm, 0 },
- { X86::VPSRAVQZrr, X86::VPSRAVQZrm, 0 },
- { X86::VPSRAVWZ128rr, X86::VPSRAVWZ128rm, 0 },
- { X86::VPSRAVWZ256rr, X86::VPSRAVWZ256rm, 0 },
- { X86::VPSRAVWZrr, X86::VPSRAVWZrm, 0 },
- { X86::VPSRAWYrr, X86::VPSRAWYrm, 0 },
- { X86::VPSRAWZ128rikz, X86::VPSRAWZ128mikz, 0 },
- { X86::VPSRAWZ128rr, X86::VPSRAWZ128rm, 0 },
- { X86::VPSRAWZ256rikz, X86::VPSRAWZ256mikz, 0 },
- { X86::VPSRAWZ256rr, X86::VPSRAWZ256rm, 0 },
- { X86::VPSRAWZrikz, X86::VPSRAWZmikz, 0 },
- { X86::VPSRAWZrr, X86::VPSRAWZrm, 0 },
- { X86::VPSRAWrr, X86::VPSRAWrm, 0 },
- { X86::VPSRLDYrr, X86::VPSRLDYrm, 0 },
- { X86::VPSRLDZ128rikz, X86::VPSRLDZ128mikz, 0 },
- { X86::VPSRLDZ128rr, X86::VPSRLDZ128rm, 0 },
- { X86::VPSRLDZ256rikz, X86::VPSRLDZ256mikz, 0 },
- { X86::VPSRLDZ256rr, X86::VPSRLDZ256rm, 0 },
- { X86::VPSRLDZrikz, X86::VPSRLDZmikz, 0 },
- { X86::VPSRLDZrr, X86::VPSRLDZrm, 0 },
- { X86::VPSRLDrr, X86::VPSRLDrm, 0 },
- { X86::VPSRLQYrr, X86::VPSRLQYrm, 0 },
- { X86::VPSRLQZ128rikz, X86::VPSRLQZ128mikz, 0 },
- { X86::VPSRLQZ128rr, X86::VPSRLQZ128rm, 0 },
- { X86::VPSRLQZ256rikz, X86::VPSRLQZ256mikz, 0 },
- { X86::VPSRLQZ256rr, X86::VPSRLQZ256rm, 0 },
- { X86::VPSRLQZrikz, X86::VPSRLQZmikz, 0 },
- { X86::VPSRLQZrr, X86::VPSRLQZrm, 0 },
- { X86::VPSRLQrr, X86::VPSRLQrm, 0 },
- { X86::VPSRLVDYrr, X86::VPSRLVDYrm, 0 },
- { X86::VPSRLVDZ128rr, X86::VPSRLVDZ128rm, 0 },
- { X86::VPSRLVDZ256rr, X86::VPSRLVDZ256rm, 0 },
- { X86::VPSRLVDZrr, X86::VPSRLVDZrm, 0 },
- { X86::VPSRLVDrr, X86::VPSRLVDrm, 0 },
- { X86::VPSRLVQYrr, X86::VPSRLVQYrm, 0 },
- { X86::VPSRLVQZ128rr, X86::VPSRLVQZ128rm, 0 },
- { X86::VPSRLVQZ256rr, X86::VPSRLVQZ256rm, 0 },
- { X86::VPSRLVQZrr, X86::VPSRLVQZrm, 0 },
- { X86::VPSRLVQrr, X86::VPSRLVQrm, 0 },
- { X86::VPSRLVWZ128rr, X86::VPSRLVWZ128rm, 0 },
- { X86::VPSRLVWZ256rr, X86::VPSRLVWZ256rm, 0 },
- { X86::VPSRLVWZrr, X86::VPSRLVWZrm, 0 },
- { X86::VPSRLWYrr, X86::VPSRLWYrm, 0 },
- { X86::VPSRLWZ128rikz, X86::VPSRLWZ128mikz, 0 },
- { X86::VPSRLWZ128rr, X86::VPSRLWZ128rm, 0 },
- { X86::VPSRLWZ256rikz, X86::VPSRLWZ256mikz, 0 },
- { X86::VPSRLWZ256rr, X86::VPSRLWZ256rm, 0 },
- { X86::VPSRLWZrikz, X86::VPSRLWZmikz, 0 },
- { X86::VPSRLWZrr, X86::VPSRLWZrm, 0 },
- { X86::VPSRLWrr, X86::VPSRLWrm, 0 },
- { X86::VPSUBBYrr, X86::VPSUBBYrm, 0 },
- { X86::VPSUBBZ128rr, X86::VPSUBBZ128rm, 0 },
- { X86::VPSUBBZ256rr, X86::VPSUBBZ256rm, 0 },
- { X86::VPSUBBZrr, X86::VPSUBBZrm, 0 },
- { X86::VPSUBBrr, X86::VPSUBBrm, 0 },
- { X86::VPSUBDYrr, X86::VPSUBDYrm, 0 },
- { X86::VPSUBDZ128rr, X86::VPSUBDZ128rm, 0 },
- { X86::VPSUBDZ256rr, X86::VPSUBDZ256rm, 0 },
- { X86::VPSUBDZrr, X86::VPSUBDZrm, 0 },
- { X86::VPSUBDrr, X86::VPSUBDrm, 0 },
- { X86::VPSUBQYrr, X86::VPSUBQYrm, 0 },
- { X86::VPSUBQZ128rr, X86::VPSUBQZ128rm, 0 },
- { X86::VPSUBQZ256rr, X86::VPSUBQZ256rm, 0 },
- { X86::VPSUBQZrr, X86::VPSUBQZrm, 0 },
- { X86::VPSUBQrr, X86::VPSUBQrm, 0 },
- { X86::VPSUBSBYrr, X86::VPSUBSBYrm, 0 },
- { X86::VPSUBSBZ128rr, X86::VPSUBSBZ128rm, 0 },
- { X86::VPSUBSBZ256rr, X86::VPSUBSBZ256rm, 0 },
- { X86::VPSUBSBZrr, X86::VPSUBSBZrm, 0 },
- { X86::VPSUBSBrr, X86::VPSUBSBrm, 0 },
- { X86::VPSUBSWYrr, X86::VPSUBSWYrm, 0 },
- { X86::VPSUBSWZ128rr, X86::VPSUBSWZ128rm, 0 },
- { X86::VPSUBSWZ256rr, X86::VPSUBSWZ256rm, 0 },
- { X86::VPSUBSWZrr, X86::VPSUBSWZrm, 0 },
- { X86::VPSUBSWrr, X86::VPSUBSWrm, 0 },
- { X86::VPSUBUSBYrr, X86::VPSUBUSBYrm, 0 },
- { X86::VPSUBUSBZ128rr, X86::VPSUBUSBZ128rm, 0 },
- { X86::VPSUBUSBZ256rr, X86::VPSUBUSBZ256rm, 0 },
- { X86::VPSUBUSBZrr, X86::VPSUBUSBZrm, 0 },
- { X86::VPSUBUSBrr, X86::VPSUBUSBrm, 0 },
- { X86::VPSUBUSWYrr, X86::VPSUBUSWYrm, 0 },
- { X86::VPSUBUSWZ128rr, X86::VPSUBUSWZ128rm, 0 },
- { X86::VPSUBUSWZ256rr, X86::VPSUBUSWZ256rm, 0 },
- { X86::VPSUBUSWZrr, X86::VPSUBUSWZrm, 0 },
- { X86::VPSUBUSWrr, X86::VPSUBUSWrm, 0 },
- { X86::VPSUBWYrr, X86::VPSUBWYrm, 0 },
- { X86::VPSUBWZ128rr, X86::VPSUBWZ128rm, 0 },
- { X86::VPSUBWZ256rr, X86::VPSUBWZ256rm, 0 },
- { X86::VPSUBWZrr, X86::VPSUBWZrm, 0 },
- { X86::VPSUBWrr, X86::VPSUBWrm, 0 },
- { X86::VPTESTMBZ128rr, X86::VPTESTMBZ128rm, 0 },
- { X86::VPTESTMBZ256rr, X86::VPTESTMBZ256rm, 0 },
- { X86::VPTESTMBZrr, X86::VPTESTMBZrm, 0 },
- { X86::VPTESTMDZ128rr, X86::VPTESTMDZ128rm, 0 },
- { X86::VPTESTMDZ256rr, X86::VPTESTMDZ256rm, 0 },
- { X86::VPTESTMDZrr, X86::VPTESTMDZrm, 0 },
- { X86::VPTESTMQZ128rr, X86::VPTESTMQZ128rm, 0 },
- { X86::VPTESTMQZ256rr, X86::VPTESTMQZ256rm, 0 },
- { X86::VPTESTMQZrr, X86::VPTESTMQZrm, 0 },
- { X86::VPTESTMWZ128rr, X86::VPTESTMWZ128rm, 0 },
- { X86::VPTESTMWZ256rr, X86::VPTESTMWZ256rm, 0 },
- { X86::VPTESTMWZrr, X86::VPTESTMWZrm, 0 },
- { X86::VPTESTNMBZ128rr, X86::VPTESTNMBZ128rm, 0 },
- { X86::VPTESTNMBZ256rr, X86::VPTESTNMBZ256rm, 0 },
- { X86::VPTESTNMBZrr, X86::VPTESTNMBZrm, 0 },
- { X86::VPTESTNMDZ128rr, X86::VPTESTNMDZ128rm, 0 },
- { X86::VPTESTNMDZ256rr, X86::VPTESTNMDZ256rm, 0 },
- { X86::VPTESTNMDZrr, X86::VPTESTNMDZrm, 0 },
- { X86::VPTESTNMQZ128rr, X86::VPTESTNMQZ128rm, 0 },
- { X86::VPTESTNMQZ256rr, X86::VPTESTNMQZ256rm, 0 },
- { X86::VPTESTNMQZrr, X86::VPTESTNMQZrm, 0 },
- { X86::VPTESTNMWZ128rr, X86::VPTESTNMWZ128rm, 0 },
- { X86::VPTESTNMWZ256rr, X86::VPTESTNMWZ256rm, 0 },
- { X86::VPTESTNMWZrr, X86::VPTESTNMWZrm, 0 },
- { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, 0 },
- { X86::VPUNPCKHBWZ128rr, X86::VPUNPCKHBWZ128rm, 0 },
- { X86::VPUNPCKHBWZ256rr, X86::VPUNPCKHBWZ256rm, 0 },
- { X86::VPUNPCKHBWZrr, X86::VPUNPCKHBWZrm, 0 },
- { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrm, 0 },
- { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, 0 },
- { X86::VPUNPCKHDQZ128rr, X86::VPUNPCKHDQZ128rm, 0 },
- { X86::VPUNPCKHDQZ256rr, X86::VPUNPCKHDQZ256rm, 0 },
- { X86::VPUNPCKHDQZrr, X86::VPUNPCKHDQZrm, 0 },
- { X86::VPUNPCKHDQrr, X86::VPUNPCKHDQrm, 0 },
- { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, 0 },
- { X86::VPUNPCKHQDQZ128rr, X86::VPUNPCKHQDQZ128rm, 0 },
- { X86::VPUNPCKHQDQZ256rr, X86::VPUNPCKHQDQZ256rm, 0 },
- { X86::VPUNPCKHQDQZrr, X86::VPUNPCKHQDQZrm, 0 },
- { X86::VPUNPCKHQDQrr, X86::VPUNPCKHQDQrm, 0 },
- { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, 0 },
- { X86::VPUNPCKHWDZ128rr, X86::VPUNPCKHWDZ128rm, 0 },
- { X86::VPUNPCKHWDZ256rr, X86::VPUNPCKHWDZ256rm, 0 },
- { X86::VPUNPCKHWDZrr, X86::VPUNPCKHWDZrm, 0 },
- { X86::VPUNPCKHWDrr, X86::VPUNPCKHWDrm, 0 },
- { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, 0 },
- { X86::VPUNPCKLBWZ128rr, X86::VPUNPCKLBWZ128rm, 0 },
- { X86::VPUNPCKLBWZ256rr, X86::VPUNPCKLBWZ256rm, 0 },
- { X86::VPUNPCKLBWZrr, X86::VPUNPCKLBWZrm, 0 },
- { X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrm, 0 },
- { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, 0 },
- { X86::VPUNPCKLDQZ128rr, X86::VPUNPCKLDQZ128rm, 0 },
- { X86::VPUNPCKLDQZ256rr, X86::VPUNPCKLDQZ256rm, 0 },
- { X86::VPUNPCKLDQZrr, X86::VPUNPCKLDQZrm, 0 },
- { X86::VPUNPCKLDQrr, X86::VPUNPCKLDQrm, 0 },
- { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, 0 },
- { X86::VPUNPCKLQDQZ128rr, X86::VPUNPCKLQDQZ128rm, 0 },
- { X86::VPUNPCKLQDQZ256rr, X86::VPUNPCKLQDQZ256rm, 0 },
- { X86::VPUNPCKLQDQZrr, X86::VPUNPCKLQDQZrm, 0 },
- { X86::VPUNPCKLQDQrr, X86::VPUNPCKLQDQrm, 0 },
- { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, 0 },
- { X86::VPUNPCKLWDZ128rr, X86::VPUNPCKLWDZ128rm, 0 },
- { X86::VPUNPCKLWDZ256rr, X86::VPUNPCKLWDZ256rm, 0 },
- { X86::VPUNPCKLWDZrr, X86::VPUNPCKLWDZrm, 0 },
- { X86::VPUNPCKLWDrr, X86::VPUNPCKLWDrm, 0 },
- { X86::VPXORDZ128rr, X86::VPXORDZ128rm, 0 },
- { X86::VPXORDZ256rr, X86::VPXORDZ256rm, 0 },
- { X86::VPXORDZrr, X86::VPXORDZrm, 0 },
- { X86::VPXORQZ128rr, X86::VPXORQZ128rm, 0 },
- { X86::VPXORQZ256rr, X86::VPXORQZ256rm, 0 },
- { X86::VPXORQZrr, X86::VPXORQZrm, 0 },
- { X86::VPXORYrr, X86::VPXORYrm, 0 },
- { X86::VPXORrr, X86::VPXORrm, 0 },
- { X86::VRANGEPDZ128rri, X86::VRANGEPDZ128rmi, 0 },
- { X86::VRANGEPDZ256rri, X86::VRANGEPDZ256rmi, 0 },
- { X86::VRANGEPDZrri, X86::VRANGEPDZrmi, 0 },
- { X86::VRANGEPSZ128rri, X86::VRANGEPSZ128rmi, 0 },
- { X86::VRANGEPSZ256rri, X86::VRANGEPSZ256rmi, 0 },
- { X86::VRANGEPSZrri, X86::VRANGEPSZrmi, 0 },
- { X86::VRANGESDZrri, X86::VRANGESDZrmi, TB_NO_REVERSE },
- { X86::VRANGESSZrri, X86::VRANGESSZrmi, TB_NO_REVERSE },
- { X86::VRCP14PDZ128rkz, X86::VRCP14PDZ128mkz, 0 },
- { X86::VRCP14PDZ256rkz, X86::VRCP14PDZ256mkz, 0 },
- { X86::VRCP14PDZrkz, X86::VRCP14PDZmkz, 0 },
- { X86::VRCP14PSZ128rkz, X86::VRCP14PSZ128mkz, 0 },
- { X86::VRCP14PSZ256rkz, X86::VRCP14PSZ256mkz, 0 },
- { X86::VRCP14PSZrkz, X86::VRCP14PSZmkz, 0 },
- { X86::VRCP14SDZrr, X86::VRCP14SDZrm, TB_NO_REVERSE },
- { X86::VRCP14SSZrr, X86::VRCP14SSZrm, TB_NO_REVERSE },
- { X86::VRCP28PDZrkz, X86::VRCP28PDZmkz, 0 },
- { X86::VRCP28PSZrkz, X86::VRCP28PSZmkz, 0 },
- { X86::VRCP28SDZr, X86::VRCP28SDZm, TB_NO_REVERSE },
- { X86::VRCP28SSZr, X86::VRCP28SSZm, TB_NO_REVERSE },
- { X86::VRCPPHZ128rkz, X86::VRCPPHZ128mkz, 0 },
- { X86::VRCPPHZ256rkz, X86::VRCPPHZ256mkz, 0 },
- { X86::VRCPPHZrkz, X86::VRCPPHZmkz, 0 },
- { X86::VRCPSHZrr, X86::VRCPSHZrm, TB_NO_REVERSE },
- { X86::VRCPSSr, X86::VRCPSSm, 0 },
- { X86::VRCPSSr_Int, X86::VRCPSSm_Int, TB_NO_REVERSE },
- { X86::VREDUCEPDZ128rrikz, X86::VREDUCEPDZ128rmikz, 0 },
- { X86::VREDUCEPDZ256rrikz, X86::VREDUCEPDZ256rmikz, 0 },
- { X86::VREDUCEPDZrrikz, X86::VREDUCEPDZrmikz, 0 },
- { X86::VREDUCEPHZ128rrikz, X86::VREDUCEPHZ128rmikz, 0 },
- { X86::VREDUCEPHZ256rrikz, X86::VREDUCEPHZ256rmikz, 0 },
- { X86::VREDUCEPHZrrikz, X86::VREDUCEPHZrmikz, 0 },
- { X86::VREDUCEPSZ128rrikz, X86::VREDUCEPSZ128rmikz, 0 },
- { X86::VREDUCEPSZ256rrikz, X86::VREDUCEPSZ256rmikz, 0 },
- { X86::VREDUCEPSZrrikz, X86::VREDUCEPSZrmikz, 0 },
- { X86::VREDUCESDZrri, X86::VREDUCESDZrmi, TB_NO_REVERSE },
- { X86::VREDUCESHZrri, X86::VREDUCESHZrmi, TB_NO_REVERSE },
- { X86::VREDUCESSZrri, X86::VREDUCESSZrmi, TB_NO_REVERSE },
- { X86::VRNDSCALEPDZ128rrikz, X86::VRNDSCALEPDZ128rmikz, 0 },
- { X86::VRNDSCALEPDZ256rrikz, X86::VRNDSCALEPDZ256rmikz, 0 },
- { X86::VRNDSCALEPDZrrikz, X86::VRNDSCALEPDZrmikz, 0 },
- { X86::VRNDSCALEPHZ128rrikz, X86::VRNDSCALEPHZ128rmikz, 0 },
- { X86::VRNDSCALEPHZ256rrikz, X86::VRNDSCALEPHZ256rmikz, 0 },
- { X86::VRNDSCALEPHZrrikz, X86::VRNDSCALEPHZrmikz, 0 },
- { X86::VRNDSCALEPSZ128rrikz, X86::VRNDSCALEPSZ128rmikz, 0 },
- { X86::VRNDSCALEPSZ256rrikz, X86::VRNDSCALEPSZ256rmikz, 0 },
- { X86::VRNDSCALEPSZrrikz, X86::VRNDSCALEPSZrmikz, 0 },
- { X86::VRNDSCALESDZr, X86::VRNDSCALESDZm, 0 },
- { X86::VRNDSCALESDZr_Int, X86::VRNDSCALESDZm_Int, TB_NO_REVERSE },
- { X86::VRNDSCALESHZr, X86::VRNDSCALESHZm, 0 },
- { X86::VRNDSCALESHZr_Int, X86::VRNDSCALESHZm_Int, TB_NO_REVERSE },
- { X86::VRNDSCALESSZr, X86::VRNDSCALESSZm, 0 },
- { X86::VRNDSCALESSZr_Int, X86::VRNDSCALESSZm_Int, TB_NO_REVERSE },
- { X86::VROUNDSDr, X86::VROUNDSDm, 0 },
- { X86::VROUNDSDr_Int, X86::VROUNDSDm_Int, TB_NO_REVERSE },
- { X86::VROUNDSSr, X86::VROUNDSSm, 0 },
- { X86::VROUNDSSr_Int, X86::VROUNDSSm_Int, TB_NO_REVERSE },
- { X86::VRSQRT14PDZ128rkz, X86::VRSQRT14PDZ128mkz, 0 },
- { X86::VRSQRT14PDZ256rkz, X86::VRSQRT14PDZ256mkz, 0 },
- { X86::VRSQRT14PDZrkz, X86::VRSQRT14PDZmkz, 0 },
- { X86::VRSQRT14PSZ128rkz, X86::VRSQRT14PSZ128mkz, 0 },
- { X86::VRSQRT14PSZ256rkz, X86::VRSQRT14PSZ256mkz, 0 },
- { X86::VRSQRT14PSZrkz, X86::VRSQRT14PSZmkz, 0 },
- { X86::VRSQRT14SDZrr, X86::VRSQRT14SDZrm, TB_NO_REVERSE },
- { X86::VRSQRT14SSZrr, X86::VRSQRT14SSZrm, TB_NO_REVERSE },
- { X86::VRSQRT28PDZrkz, X86::VRSQRT28PDZmkz, 0 },
- { X86::VRSQRT28PSZrkz, X86::VRSQRT28PSZmkz, 0 },
- { X86::VRSQRT28SDZr, X86::VRSQRT28SDZm, TB_NO_REVERSE },
- { X86::VRSQRT28SSZr, X86::VRSQRT28SSZm, TB_NO_REVERSE },
- { X86::VRSQRTPHZ128rkz, X86::VRSQRTPHZ128mkz, 0 },
- { X86::VRSQRTPHZ256rkz, X86::VRSQRTPHZ256mkz, 0 },
- { X86::VRSQRTPHZrkz, X86::VRSQRTPHZmkz, 0 },
- { X86::VRSQRTSHZrr, X86::VRSQRTSHZrm, TB_NO_REVERSE },
- { X86::VRSQRTSSr, X86::VRSQRTSSm, 0 },
- { X86::VRSQRTSSr_Int, X86::VRSQRTSSm_Int, TB_NO_REVERSE },
- { X86::VSCALEFPDZ128rr, X86::VSCALEFPDZ128rm, 0 },
- { X86::VSCALEFPDZ256rr, X86::VSCALEFPDZ256rm, 0 },
- { X86::VSCALEFPDZrr, X86::VSCALEFPDZrm, 0 },
- { X86::VSCALEFPHZ128rr, X86::VSCALEFPHZ128rm, 0 },
- { X86::VSCALEFPHZ256rr, X86::VSCALEFPHZ256rm, 0 },
- { X86::VSCALEFPHZrr, X86::VSCALEFPHZrm, 0 },
- { X86::VSCALEFPSZ128rr, X86::VSCALEFPSZ128rm, 0 },
- { X86::VSCALEFPSZ256rr, X86::VSCALEFPSZ256rm, 0 },
- { X86::VSCALEFPSZrr, X86::VSCALEFPSZrm, 0 },
- { X86::VSCALEFSDZrr, X86::VSCALEFSDZrm, TB_NO_REVERSE },
- { X86::VSCALEFSHZrr, X86::VSCALEFSHZrm, TB_NO_REVERSE },
- { X86::VSCALEFSSZrr, X86::VSCALEFSSZrm, TB_NO_REVERSE },
- { X86::VSHUFF32X4Z256rri, X86::VSHUFF32X4Z256rmi, 0 },
- { X86::VSHUFF32X4Zrri, X86::VSHUFF32X4Zrmi, 0 },
- { X86::VSHUFF64X2Z256rri, X86::VSHUFF64X2Z256rmi, 0 },
- { X86::VSHUFF64X2Zrri, X86::VSHUFF64X2Zrmi, 0 },
- { X86::VSHUFI32X4Z256rri, X86::VSHUFI32X4Z256rmi, 0 },
- { X86::VSHUFI32X4Zrri, X86::VSHUFI32X4Zrmi, 0 },
- { X86::VSHUFI64X2Z256rri, X86::VSHUFI64X2Z256rmi, 0 },
- { X86::VSHUFI64X2Zrri, X86::VSHUFI64X2Zrmi, 0 },
- { X86::VSHUFPDYrri, X86::VSHUFPDYrmi, 0 },
- { X86::VSHUFPDZ128rri, X86::VSHUFPDZ128rmi, 0 },
- { X86::VSHUFPDZ256rri, X86::VSHUFPDZ256rmi, 0 },
- { X86::VSHUFPDZrri, X86::VSHUFPDZrmi, 0 },
- { X86::VSHUFPDrri, X86::VSHUFPDrmi, 0 },
- { X86::VSHUFPSYrri, X86::VSHUFPSYrmi, 0 },
- { X86::VSHUFPSZ128rri, X86::VSHUFPSZ128rmi, 0 },
- { X86::VSHUFPSZ256rri, X86::VSHUFPSZ256rmi, 0 },
- { X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0 },
- { X86::VSHUFPSrri, X86::VSHUFPSrmi, 0 },
- { X86::VSQRTPDZ128rkz, X86::VSQRTPDZ128mkz, 0 },
- { X86::VSQRTPDZ256rkz, X86::VSQRTPDZ256mkz, 0 },
- { X86::VSQRTPDZrkz, X86::VSQRTPDZmkz, 0 },
- { X86::VSQRTPHZ128rkz, X86::VSQRTPHZ128mkz, 0 },
- { X86::VSQRTPHZ256rkz, X86::VSQRTPHZ256mkz, 0 },
- { X86::VSQRTPHZrkz, X86::VSQRTPHZmkz, 0 },
- { X86::VSQRTPSZ128rkz, X86::VSQRTPSZ128mkz, 0 },
- { X86::VSQRTPSZ256rkz, X86::VSQRTPSZ256mkz, 0 },
- { X86::VSQRTPSZrkz, X86::VSQRTPSZmkz, 0 },
- { X86::VSQRTSDZr, X86::VSQRTSDZm, 0 },
- { X86::VSQRTSDZr_Int, X86::VSQRTSDZm_Int, TB_NO_REVERSE },
- { X86::VSQRTSDr, X86::VSQRTSDm, 0 },
- { X86::VSQRTSDr_Int, X86::VSQRTSDm_Int, TB_NO_REVERSE },
- { X86::VSQRTSHZr, X86::VSQRTSHZm, 0 },
- { X86::VSQRTSHZr_Int, X86::VSQRTSHZm_Int, TB_NO_REVERSE },
- { X86::VSQRTSSZr, X86::VSQRTSSZm, 0 },
- { X86::VSQRTSSZr_Int, X86::VSQRTSSZm_Int, TB_NO_REVERSE },
- { X86::VSQRTSSr, X86::VSQRTSSm, 0 },
- { X86::VSQRTSSr_Int, X86::VSQRTSSm_Int, TB_NO_REVERSE },
- { X86::VSUBPDYrr, X86::VSUBPDYrm, 0 },
- { X86::VSUBPDZ128rr, X86::VSUBPDZ128rm, 0 },
- { X86::VSUBPDZ256rr, X86::VSUBPDZ256rm, 0 },
- { X86::VSUBPDZrr, X86::VSUBPDZrm, 0 },
- { X86::VSUBPDrr, X86::VSUBPDrm, 0 },
- { X86::VSUBPHZ128rr, X86::VSUBPHZ128rm, 0 },
- { X86::VSUBPHZ256rr, X86::VSUBPHZ256rm, 0 },
- { X86::VSUBPHZrr, X86::VSUBPHZrm, 0 },
- { X86::VSUBPSYrr, X86::VSUBPSYrm, 0 },
- { X86::VSUBPSZ128rr, X86::VSUBPSZ128rm, 0 },
- { X86::VSUBPSZ256rr, X86::VSUBPSZ256rm, 0 },
- { X86::VSUBPSZrr, X86::VSUBPSZrm, 0 },
- { X86::VSUBPSrr, X86::VSUBPSrm, 0 },
- { X86::VSUBSDZrr, X86::VSUBSDZrm, 0 },
- { X86::VSUBSDZrr_Int, X86::VSUBSDZrm_Int, TB_NO_REVERSE },
- { X86::VSUBSDrr, X86::VSUBSDrm, 0 },
- { X86::VSUBSDrr_Int, X86::VSUBSDrm_Int, TB_NO_REVERSE },
- { X86::VSUBSHZrr, X86::VSUBSHZrm, 0 },
- { X86::VSUBSHZrr_Int, X86::VSUBSHZrm_Int, TB_NO_REVERSE },
- { X86::VSUBSSZrr, X86::VSUBSSZrm, 0 },
- { X86::VSUBSSZrr_Int, X86::VSUBSSZrm_Int, TB_NO_REVERSE },
- { X86::VSUBSSrr, X86::VSUBSSrm, 0 },
- { X86::VSUBSSrr_Int, X86::VSUBSSrm_Int, TB_NO_REVERSE },
- { X86::VUNPCKHPDYrr, X86::VUNPCKHPDYrm, 0 },
- { X86::VUNPCKHPDZ128rr, X86::VUNPCKHPDZ128rm, 0 },
- { X86::VUNPCKHPDZ256rr, X86::VUNPCKHPDZ256rm, 0 },
- { X86::VUNPCKHPDZrr, X86::VUNPCKHPDZrm, 0 },
- { X86::VUNPCKHPDrr, X86::VUNPCKHPDrm, 0 },
- { X86::VUNPCKHPSYrr, X86::VUNPCKHPSYrm, 0 },
- { X86::VUNPCKHPSZ128rr, X86::VUNPCKHPSZ128rm, 0 },
- { X86::VUNPCKHPSZ256rr, X86::VUNPCKHPSZ256rm, 0 },
- { X86::VUNPCKHPSZrr, X86::VUNPCKHPSZrm, 0 },
- { X86::VUNPCKHPSrr, X86::VUNPCKHPSrm, 0 },
- { X86::VUNPCKLPDYrr, X86::VUNPCKLPDYrm, 0 },
- { X86::VUNPCKLPDZ128rr, X86::VUNPCKLPDZ128rm, 0 },
- { X86::VUNPCKLPDZ256rr, X86::VUNPCKLPDZ256rm, 0 },
- { X86::VUNPCKLPDZrr, X86::VUNPCKLPDZrm, 0 },
- { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, 0 },
- { X86::VUNPCKLPSYrr, X86::VUNPCKLPSYrm, 0 },
- { X86::VUNPCKLPSZ128rr, X86::VUNPCKLPSZ128rm, 0 },
- { X86::VUNPCKLPSZ256rr, X86::VUNPCKLPSZ256rm, 0 },
- { X86::VUNPCKLPSZrr, X86::VUNPCKLPSZrm, 0 },
- { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, 0 },
- { X86::VXORPDYrr, X86::VXORPDYrm, 0 },
- { X86::VXORPDZ128rr, X86::VXORPDZ128rm, 0 },
- { X86::VXORPDZ256rr, X86::VXORPDZ256rm, 0 },
- { X86::VXORPDZrr, X86::VXORPDZrm, 0 },
- { X86::VXORPDrr, X86::VXORPDrm, 0 },
- { X86::VXORPSYrr, X86::VXORPSYrm, 0 },
- { X86::VXORPSZ128rr, X86::VXORPSZ128rm, 0 },
- { X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 },
- { X86::VXORPSZrr, X86::VXORPSZrm, 0 },
- { X86::VXORPSrr, X86::VXORPSrm, 0 },
- { X86::XOR16rr, X86::XOR16rm, 0 },
- { X86::XOR32rr, X86::XOR32rm, 0 },
- { X86::XOR64rr, X86::XOR64rm, 0 },
- { X86::XOR8rr, X86::XOR8rm, 0 },
- { X86::XORPDrr, X86::XORPDrm, TB_ALIGN_16 },
- { X86::XORPSrr, X86::XORPSrm, TB_ALIGN_16 },
-};
-
-static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
- { X86::VADDPDZ128rrkz, X86::VADDPDZ128rmkz, 0 },
- { X86::VADDPDZ256rrkz, X86::VADDPDZ256rmkz, 0 },
- { X86::VADDPDZrrkz, X86::VADDPDZrmkz, 0 },
- { X86::VADDPHZ128rrkz, X86::VADDPHZ128rmkz, 0 },
- { X86::VADDPHZ256rrkz, X86::VADDPHZ256rmkz, 0 },
- { X86::VADDPHZrrkz, X86::VADDPHZrmkz, 0 },
- { X86::VADDPSZ128rrkz, X86::VADDPSZ128rmkz, 0 },
- { X86::VADDPSZ256rrkz, X86::VADDPSZ256rmkz, 0 },
- { X86::VADDPSZrrkz, X86::VADDPSZrmkz, 0 },
- { X86::VADDSDZrr_Intkz, X86::VADDSDZrm_Intkz, TB_NO_REVERSE },
- { X86::VADDSHZrr_Intkz, X86::VADDSHZrm_Intkz, TB_NO_REVERSE },
- { X86::VADDSSZrr_Intkz, X86::VADDSSZrm_Intkz, TB_NO_REVERSE },
- { X86::VALIGNDZ128rrikz, X86::VALIGNDZ128rmikz, 0 },
- { X86::VALIGNDZ256rrikz, X86::VALIGNDZ256rmikz, 0 },
- { X86::VALIGNDZrrikz, X86::VALIGNDZrmikz, 0 },
- { X86::VALIGNQZ128rrikz, X86::VALIGNQZ128rmikz, 0 },
- { X86::VALIGNQZ256rrikz, X86::VALIGNQZ256rmikz, 0 },
- { X86::VALIGNQZrrikz, X86::VALIGNQZrmikz, 0 },
- { X86::VANDNPDZ128rrkz, X86::VANDNPDZ128rmkz, 0 },
- { X86::VANDNPDZ256rrkz, X86::VANDNPDZ256rmkz, 0 },
- { X86::VANDNPDZrrkz, X86::VANDNPDZrmkz, 0 },
- { X86::VANDNPSZ128rrkz, X86::VANDNPSZ128rmkz, 0 },
- { X86::VANDNPSZ256rrkz, X86::VANDNPSZ256rmkz, 0 },
- { X86::VANDNPSZrrkz, X86::VANDNPSZrmkz, 0 },
- { X86::VANDPDZ128rrkz, X86::VANDPDZ128rmkz, 0 },
- { X86::VANDPDZ256rrkz, X86::VANDPDZ256rmkz, 0 },
- { X86::VANDPDZrrkz, X86::VANDPDZrmkz, 0 },
- { X86::VANDPSZ128rrkz, X86::VANDPSZ128rmkz, 0 },
- { X86::VANDPSZ256rrkz, X86::VANDPSZ256rmkz, 0 },
- { X86::VANDPSZrrkz, X86::VANDPSZrmkz, 0 },
- { X86::VBLENDMPDZ128rrk, X86::VBLENDMPDZ128rmk, 0 },
- { X86::VBLENDMPDZ256rrk, X86::VBLENDMPDZ256rmk, 0 },
- { X86::VBLENDMPDZrrk, X86::VBLENDMPDZrmk, 0 },
- { X86::VBLENDMPSZ128rrk, X86::VBLENDMPSZ128rmk, 0 },
- { X86::VBLENDMPSZ256rrk, X86::VBLENDMPSZ256rmk, 0 },
- { X86::VBLENDMPSZrrk, X86::VBLENDMPSZrmk, 0 },
- { X86::VBROADCASTF32X2Z256rrk, X86::VBROADCASTF32X2Z256rmk, TB_NO_REVERSE },
- { X86::VBROADCASTF32X2Zrrk, X86::VBROADCASTF32X2Zrmk, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Z128rrk, X86::VBROADCASTI32X2Z128rmk, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Z256rrk, X86::VBROADCASTI32X2Z256rmk, TB_NO_REVERSE },
- { X86::VBROADCASTI32X2Zrrk, X86::VBROADCASTI32X2Zrmk, TB_NO_REVERSE },
- { X86::VBROADCASTSDZ256rrk, X86::VBROADCASTSDZ256rmk, TB_NO_REVERSE },
- { X86::VBROADCASTSDZrrk, X86::VBROADCASTSDZrmk, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ128rrk, X86::VBROADCASTSSZ128rmk, TB_NO_REVERSE },
- { X86::VBROADCASTSSZ256rrk, X86::VBROADCASTSSZ256rmk, TB_NO_REVERSE },
- { X86::VBROADCASTSSZrrk, X86::VBROADCASTSSZrmk, TB_NO_REVERSE },
- { X86::VCMPPDZ128rrik, X86::VCMPPDZ128rmik, 0 },
- { X86::VCMPPDZ256rrik, X86::VCMPPDZ256rmik, 0 },
- { X86::VCMPPDZrrik, X86::VCMPPDZrmik, 0 },
- { X86::VCMPPHZ128rrik, X86::VCMPPHZ128rmik, 0 },
- { X86::VCMPPHZ256rrik, X86::VCMPPHZ256rmik, 0 },
- { X86::VCMPPHZrrik, X86::VCMPPHZrmik, 0 },
- { X86::VCMPPSZ128rrik, X86::VCMPPSZ128rmik, 0 },
- { X86::VCMPPSZ256rrik, X86::VCMPPSZ256rmik, 0 },
- { X86::VCMPPSZrrik, X86::VCMPPSZrmik, 0 },
- { X86::VCMPSDZrr_Intk, X86::VCMPSDZrm_Intk, TB_NO_REVERSE },
- { X86::VCMPSHZrr_Intk, X86::VCMPSHZrm_Intk, TB_NO_REVERSE },
- { X86::VCMPSSZrr_Intk, X86::VCMPSSZrm_Intk, TB_NO_REVERSE },
- { X86::VCVTDQ2PDZ128rrk, X86::VCVTDQ2PDZ128rmk, TB_NO_REVERSE },
- { X86::VCVTDQ2PDZ256rrk, X86::VCVTDQ2PDZ256rmk, 0 },
- { X86::VCVTDQ2PDZrrk, X86::VCVTDQ2PDZrmk, 0 },
- { X86::VCVTDQ2PSZ128rrk, X86::VCVTDQ2PSZ128rmk, 0 },
- { X86::VCVTDQ2PSZ256rrk, X86::VCVTDQ2PSZ256rmk, 0 },
- { X86::VCVTDQ2PSZrrk, X86::VCVTDQ2PSZrmk, 0 },
- { X86::VCVTNE2PS2BF16Z128rrkz, X86::VCVTNE2PS2BF16Z128rmkz, 0 },
- { X86::VCVTNE2PS2BF16Z256rrkz, X86::VCVTNE2PS2BF16Z256rmkz, 0 },
- { X86::VCVTNE2PS2BF16Zrrkz, X86::VCVTNE2PS2BF16Zrmkz, 0 },
- { X86::VCVTNEPS2BF16Z128rrk, X86::VCVTNEPS2BF16Z128rmk, 0 },
- { X86::VCVTNEPS2BF16Z256rrk, X86::VCVTNEPS2BF16Z256rmk, 0 },
- { X86::VCVTNEPS2BF16Zrrk, X86::VCVTNEPS2BF16Zrmk, 0 },
- { X86::VCVTPD2DQZ128rrk, X86::VCVTPD2DQZ128rmk, 0 },
- { X86::VCVTPD2DQZ256rrk, X86::VCVTPD2DQZ256rmk, 0 },
- { X86::VCVTPD2DQZrrk, X86::VCVTPD2DQZrmk, 0 },
- { X86::VCVTPD2PSZ128rrk, X86::VCVTPD2PSZ128rmk, 0 },
- { X86::VCVTPD2PSZ256rrk, X86::VCVTPD2PSZ256rmk, 0 },
- { X86::VCVTPD2PSZrrk, X86::VCVTPD2PSZrmk, 0 },
- { X86::VCVTPD2QQZ128rrk, X86::VCVTPD2QQZ128rmk, 0 },
- { X86::VCVTPD2QQZ256rrk, X86::VCVTPD2QQZ256rmk, 0 },
- { X86::VCVTPD2QQZrrk, X86::VCVTPD2QQZrmk, 0 },
- { X86::VCVTPD2UDQZ128rrk, X86::VCVTPD2UDQZ128rmk, 0 },
- { X86::VCVTPD2UDQZ256rrk, X86::VCVTPD2UDQZ256rmk, 0 },
- { X86::VCVTPD2UDQZrrk, X86::VCVTPD2UDQZrmk, 0 },
- { X86::VCVTPD2UQQZ128rrk, X86::VCVTPD2UQQZ128rmk, 0 },
- { X86::VCVTPD2UQQZ256rrk, X86::VCVTPD2UQQZ256rmk, 0 },
- { X86::VCVTPD2UQQZrrk, X86::VCVTPD2UQQZrmk, 0 },
- { X86::VCVTPH2PSZ128rrk, X86::VCVTPH2PSZ128rmk, TB_NO_REVERSE },
- { X86::VCVTPH2PSZ256rrk, X86::VCVTPH2PSZ256rmk, 0 },
- { X86::VCVTPH2PSZrrk, X86::VCVTPH2PSZrmk, 0 },
- { X86::VCVTPS2DQZ128rrk, X86::VCVTPS2DQZ128rmk, 0 },
- { X86::VCVTPS2DQZ256rrk, X86::VCVTPS2DQZ256rmk, 0 },
- { X86::VCVTPS2DQZrrk, X86::VCVTPS2DQZrmk, 0 },
- { X86::VCVTPS2PDZ128rrk, X86::VCVTPS2PDZ128rmk, TB_NO_REVERSE },
- { X86::VCVTPS2PDZ256rrk, X86::VCVTPS2PDZ256rmk, 0 },
- { X86::VCVTPS2PDZrrk, X86::VCVTPS2PDZrmk, 0 },
- { X86::VCVTPS2QQZ128rrk, X86::VCVTPS2QQZ128rmk, TB_NO_REVERSE },
- { X86::VCVTPS2QQZ256rrk, X86::VCVTPS2QQZ256rmk, 0 },
- { X86::VCVTPS2QQZrrk, X86::VCVTPS2QQZrmk, 0 },
- { X86::VCVTPS2UDQZ128rrk, X86::VCVTPS2UDQZ128rmk, 0 },
- { X86::VCVTPS2UDQZ256rrk, X86::VCVTPS2UDQZ256rmk, 0 },
- { X86::VCVTPS2UDQZrrk, X86::VCVTPS2UDQZrmk, 0 },
- { X86::VCVTPS2UQQZ128rrk, X86::VCVTPS2UQQZ128rmk, TB_NO_REVERSE },
- { X86::VCVTPS2UQQZ256rrk, X86::VCVTPS2UQQZ256rmk, 0 },
- { X86::VCVTPS2UQQZrrk, X86::VCVTPS2UQQZrmk, 0 },
- { X86::VCVTQQ2PDZ128rrk, X86::VCVTQQ2PDZ128rmk, 0 },
- { X86::VCVTQQ2PDZ256rrk, X86::VCVTQQ2PDZ256rmk, 0 },
- { X86::VCVTQQ2PDZrrk, X86::VCVTQQ2PDZrmk, 0 },
- { X86::VCVTQQ2PSZ128rrk, X86::VCVTQQ2PSZ128rmk, 0 },
- { X86::VCVTQQ2PSZ256rrk, X86::VCVTQQ2PSZ256rmk, 0 },
- { X86::VCVTQQ2PSZrrk, X86::VCVTQQ2PSZrmk, 0 },
- { X86::VCVTSD2SSZrr_Intkz, X86::VCVTSD2SSZrm_Intkz, TB_NO_REVERSE },
- { X86::VCVTSS2SDZrr_Intkz, X86::VCVTSS2SDZrm_Intkz, TB_NO_REVERSE },
- { X86::VCVTTPD2DQZ128rrk, X86::VCVTTPD2DQZ128rmk, 0 },
- { X86::VCVTTPD2DQZ256rrk, X86::VCVTTPD2DQZ256rmk, 0 },
- { X86::VCVTTPD2DQZrrk, X86::VCVTTPD2DQZrmk, 0 },
- { X86::VCVTTPD2QQZ128rrk, X86::VCVTTPD2QQZ128rmk, 0 },
- { X86::VCVTTPD2QQZ256rrk, X86::VCVTTPD2QQZ256rmk, 0 },
- { X86::VCVTTPD2QQZrrk, X86::VCVTTPD2QQZrmk, 0 },
- { X86::VCVTTPD2UDQZ128rrk, X86::VCVTTPD2UDQZ128rmk, 0 },
- { X86::VCVTTPD2UDQZ256rrk, X86::VCVTTPD2UDQZ256rmk, 0 },
- { X86::VCVTTPD2UDQZrrk, X86::VCVTTPD2UDQZrmk, 0 },
- { X86::VCVTTPD2UQQZ128rrk, X86::VCVTTPD2UQQZ128rmk, 0 },
- { X86::VCVTTPD2UQQZ256rrk, X86::VCVTTPD2UQQZ256rmk, 0 },
- { X86::VCVTTPD2UQQZrrk, X86::VCVTTPD2UQQZrmk, 0 },
- { X86::VCVTTPS2DQZ128rrk, X86::VCVTTPS2DQZ128rmk, 0 },
- { X86::VCVTTPS2DQZ256rrk, X86::VCVTTPS2DQZ256rmk, 0 },
- { X86::VCVTTPS2DQZrrk, X86::VCVTTPS2DQZrmk, 0 },
- { X86::VCVTTPS2QQZ128rrk, X86::VCVTTPS2QQZ128rmk, TB_NO_REVERSE },
- { X86::VCVTTPS2QQZ256rrk, X86::VCVTTPS2QQZ256rmk, 0 },
- { X86::VCVTTPS2QQZrrk, X86::VCVTTPS2QQZrmk, 0 },
- { X86::VCVTTPS2UDQZ128rrk, X86::VCVTTPS2UDQZ128rmk, 0 },
- { X86::VCVTTPS2UDQZ256rrk, X86::VCVTTPS2UDQZ256rmk, 0 },
- { X86::VCVTTPS2UDQZrrk, X86::VCVTTPS2UDQZrmk, 0 },
- { X86::VCVTTPS2UQQZ128rrk, X86::VCVTTPS2UQQZ128rmk, TB_NO_REVERSE },
- { X86::VCVTTPS2UQQZ256rrk, X86::VCVTTPS2UQQZ256rmk, 0 },
- { X86::VCVTTPS2UQQZrrk, X86::VCVTTPS2UQQZrmk, 0 },
- { X86::VCVTUDQ2PDZ128rrk, X86::VCVTUDQ2PDZ128rmk, TB_NO_REVERSE },
- { X86::VCVTUDQ2PDZ256rrk, X86::VCVTUDQ2PDZ256rmk, 0 },
- { X86::VCVTUDQ2PDZrrk, X86::VCVTUDQ2PDZrmk, 0 },
- { X86::VCVTUDQ2PSZ128rrk, X86::VCVTUDQ2PSZ128rmk, 0 },
- { X86::VCVTUDQ2PSZ256rrk, X86::VCVTUDQ2PSZ256rmk, 0 },
- { X86::VCVTUDQ2PSZrrk, X86::VCVTUDQ2PSZrmk, 0 },
- { X86::VCVTUQQ2PDZ128rrk, X86::VCVTUQQ2PDZ128rmk, 0 },
- { X86::VCVTUQQ2PDZ256rrk, X86::VCVTUQQ2PDZ256rmk, 0 },
- { X86::VCVTUQQ2PDZrrk, X86::VCVTUQQ2PDZrmk, 0 },
- { X86::VCVTUQQ2PSZ128rrk, X86::VCVTUQQ2PSZ128rmk, 0 },
- { X86::VCVTUQQ2PSZ256rrk, X86::VCVTUQQ2PSZ256rmk, 0 },
- { X86::VCVTUQQ2PSZrrk, X86::VCVTUQQ2PSZrmk, 0 },
- { X86::VDBPSADBWZ128rrikz, X86::VDBPSADBWZ128rmikz, 0 },
- { X86::VDBPSADBWZ256rrikz, X86::VDBPSADBWZ256rmikz, 0 },
- { X86::VDBPSADBWZrrikz, X86::VDBPSADBWZrmikz, 0 },
- { X86::VDIVPDZ128rrkz, X86::VDIVPDZ128rmkz, 0 },
- { X86::VDIVPDZ256rrkz, X86::VDIVPDZ256rmkz, 0 },
- { X86::VDIVPDZrrkz, X86::VDIVPDZrmkz, 0 },
- { X86::VDIVPHZ128rrkz, X86::VDIVPHZ128rmkz, 0 },
- { X86::VDIVPHZ256rrkz, X86::VDIVPHZ256rmkz, 0 },
- { X86::VDIVPHZrrkz, X86::VDIVPHZrmkz, 0 },
- { X86::VDIVPSZ128rrkz, X86::VDIVPSZ128rmkz, 0 },
- { X86::VDIVPSZ256rrkz, X86::VDIVPSZ256rmkz, 0 },
- { X86::VDIVPSZrrkz, X86::VDIVPSZrmkz, 0 },
- { X86::VDIVSDZrr_Intkz, X86::VDIVSDZrm_Intkz, TB_NO_REVERSE },
- { X86::VDIVSHZrr_Intkz, X86::VDIVSHZrm_Intkz, TB_NO_REVERSE },
- { X86::VDIVSSZrr_Intkz, X86::VDIVSSZrm_Intkz, TB_NO_REVERSE },
- { X86::VDPBF16PSZ128r, X86::VDPBF16PSZ128m, 0 },
- { X86::VDPBF16PSZ256r, X86::VDPBF16PSZ256m, 0 },
- { X86::VDPBF16PSZr, X86::VDPBF16PSZm, 0 },
- { X86::VEXP2PDZrk, X86::VEXP2PDZmk, 0 },
- { X86::VEXP2PSZrk, X86::VEXP2PSZmk, 0 },
- { X86::VEXPANDPDZ128rrk, X86::VEXPANDPDZ128rmk, TB_NO_REVERSE },
- { X86::VEXPANDPDZ256rrk, X86::VEXPANDPDZ256rmk, TB_NO_REVERSE },
- { X86::VEXPANDPDZrrk, X86::VEXPANDPDZrmk, TB_NO_REVERSE },
- { X86::VEXPANDPSZ128rrk, X86::VEXPANDPSZ128rmk, TB_NO_REVERSE },
- { X86::VEXPANDPSZ256rrk, X86::VEXPANDPSZ256rmk, TB_NO_REVERSE },
- { X86::VEXPANDPSZrrk, X86::VEXPANDPSZrmk, TB_NO_REVERSE },
- { X86::VFCMADDCPHZ128r, X86::VFCMADDCPHZ128m, 0 },
- { X86::VFCMADDCPHZ256r, X86::VFCMADDCPHZ256m, 0 },
- { X86::VFCMADDCPHZr, X86::VFCMADDCPHZm, 0 },
- { X86::VFCMADDCSHZr, X86::VFCMADDCSHZm, TB_NO_REVERSE },
- { X86::VFCMULCPHZ128rrkz, X86::VFCMULCPHZ128rmkz, 0 },
- { X86::VFCMULCPHZ256rrkz, X86::VFCMULCPHZ256rmkz, 0 },
- { X86::VFCMULCPHZrrkz, X86::VFCMULCPHZrmkz, 0 },
- { X86::VFCMULCSHZrrkz, X86::VFCMULCSHZrmkz, TB_NO_REVERSE },
- { X86::VFIXUPIMMPDZ128rri, X86::VFIXUPIMMPDZ128rmi, 0 },
- { X86::VFIXUPIMMPDZ256rri, X86::VFIXUPIMMPDZ256rmi, 0 },
- { X86::VFIXUPIMMPDZrri, X86::VFIXUPIMMPDZrmi, 0 },
- { X86::VFIXUPIMMPSZ128rri, X86::VFIXUPIMMPSZ128rmi, 0 },
- { X86::VFIXUPIMMPSZ256rri, X86::VFIXUPIMMPSZ256rmi, 0 },
- { X86::VFIXUPIMMPSZrri, X86::VFIXUPIMMPSZrmi, 0 },
- { X86::VFIXUPIMMSDZrri, X86::VFIXUPIMMSDZrmi, TB_NO_REVERSE },
- { X86::VFIXUPIMMSSZrri, X86::VFIXUPIMMSSZrmi, TB_NO_REVERSE },
- { X86::VFMADD132PDYr, X86::VFMADD132PDYm, 0 },
- { X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128m, 0 },
- { X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256m, 0 },
- { X86::VFMADD132PDZr, X86::VFMADD132PDZm, 0 },
- { X86::VFMADD132PDr, X86::VFMADD132PDm, 0 },
- { X86::VFMADD132PHZ128r, X86::VFMADD132PHZ128m, 0 },
- { X86::VFMADD132PHZ256r, X86::VFMADD132PHZ256m, 0 },
- { X86::VFMADD132PHZr, X86::VFMADD132PHZm, 0 },
- { X86::VFMADD132PSYr, X86::VFMADD132PSYm, 0 },
- { X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128m, 0 },
- { X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256m, 0 },
- { X86::VFMADD132PSZr, X86::VFMADD132PSZm, 0 },
- { X86::VFMADD132PSr, X86::VFMADD132PSm, 0 },
- { X86::VFMADD132SDZr, X86::VFMADD132SDZm, 0 },
- { X86::VFMADD132SDZr_Int, X86::VFMADD132SDZm_Int, TB_NO_REVERSE },
- { X86::VFMADD132SDr, X86::VFMADD132SDm, 0 },
- { X86::VFMADD132SDr_Int, X86::VFMADD132SDm_Int, TB_NO_REVERSE },
- { X86::VFMADD132SHZr, X86::VFMADD132SHZm, 0 },
- { X86::VFMADD132SHZr_Int, X86::VFMADD132SHZm_Int, TB_NO_REVERSE },
- { X86::VFMADD132SSZr, X86::VFMADD132SSZm, 0 },
- { X86::VFMADD132SSZr_Int, X86::VFMADD132SSZm_Int, TB_NO_REVERSE },
- { X86::VFMADD132SSr, X86::VFMADD132SSm, 0 },
- { X86::VFMADD132SSr_Int, X86::VFMADD132SSm_Int, TB_NO_REVERSE },
- { X86::VFMADD213PDYr, X86::VFMADD213PDYm, 0 },
- { X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128m, 0 },
- { X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256m, 0 },
- { X86::VFMADD213PDZr, X86::VFMADD213PDZm, 0 },
- { X86::VFMADD213PDr, X86::VFMADD213PDm, 0 },
- { X86::VFMADD213PHZ128r, X86::VFMADD213PHZ128m, 0 },
- { X86::VFMADD213PHZ256r, X86::VFMADD213PHZ256m, 0 },
- { X86::VFMADD213PHZr, X86::VFMADD213PHZm, 0 },
- { X86::VFMADD213PSYr, X86::VFMADD213PSYm, 0 },
- { X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128m, 0 },
- { X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256m, 0 },
- { X86::VFMADD213PSZr, X86::VFMADD213PSZm, 0 },
- { X86::VFMADD213PSr, X86::VFMADD213PSm, 0 },
- { X86::VFMADD213SDZr, X86::VFMADD213SDZm, 0 },
- { X86::VFMADD213SDZr_Int, X86::VFMADD213SDZm_Int, TB_NO_REVERSE },
- { X86::VFMADD213SDr, X86::VFMADD213SDm, 0 },
- { X86::VFMADD213SDr_Int, X86::VFMADD213SDm_Int, TB_NO_REVERSE },
- { X86::VFMADD213SHZr, X86::VFMADD213SHZm, 0 },
- { X86::VFMADD213SHZr_Int, X86::VFMADD213SHZm_Int, TB_NO_REVERSE },
- { X86::VFMADD213SSZr, X86::VFMADD213SSZm, 0 },
- { X86::VFMADD213SSZr_Int, X86::VFMADD213SSZm_Int, TB_NO_REVERSE },
- { X86::VFMADD213SSr, X86::VFMADD213SSm, 0 },
- { X86::VFMADD213SSr_Int, X86::VFMADD213SSm_Int, TB_NO_REVERSE },
- { X86::VFMADD231PDYr, X86::VFMADD231PDYm, 0 },
- { X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128m, 0 },
- { X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256m, 0 },
- { X86::VFMADD231PDZr, X86::VFMADD231PDZm, 0 },
- { X86::VFMADD231PDr, X86::VFMADD231PDm, 0 },
- { X86::VFMADD231PHZ128r, X86::VFMADD231PHZ128m, 0 },
- { X86::VFMADD231PHZ256r, X86::VFMADD231PHZ256m, 0 },
- { X86::VFMADD231PHZr, X86::VFMADD231PHZm, 0 },
- { X86::VFMADD231PSYr, X86::VFMADD231PSYm, 0 },
- { X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128m, 0 },
- { X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256m, 0 },
- { X86::VFMADD231PSZr, X86::VFMADD231PSZm, 0 },
- { X86::VFMADD231PSr, X86::VFMADD231PSm, 0 },
- { X86::VFMADD231SDZr, X86::VFMADD231SDZm, 0 },
- { X86::VFMADD231SDZr_Int, X86::VFMADD231SDZm_Int, TB_NO_REVERSE },
- { X86::VFMADD231SDr, X86::VFMADD231SDm, 0 },
- { X86::VFMADD231SDr_Int, X86::VFMADD231SDm_Int, TB_NO_REVERSE },
- { X86::VFMADD231SHZr, X86::VFMADD231SHZm, 0 },
- { X86::VFMADD231SHZr_Int, X86::VFMADD231SHZm_Int, TB_NO_REVERSE },
- { X86::VFMADD231SSZr, X86::VFMADD231SSZm, 0 },
- { X86::VFMADD231SSZr_Int, X86::VFMADD231SSZm_Int, TB_NO_REVERSE },
- { X86::VFMADD231SSr, X86::VFMADD231SSm, 0 },
- { X86::VFMADD231SSr_Int, X86::VFMADD231SSm_Int, TB_NO_REVERSE },
- { X86::VFMADDCPHZ128r, X86::VFMADDCPHZ128m, 0 },
- { X86::VFMADDCPHZ256r, X86::VFMADDCPHZ256m, 0 },
- { X86::VFMADDCPHZr, X86::VFMADDCPHZm, 0 },
- { X86::VFMADDCSHZr, X86::VFMADDCSHZm, TB_NO_REVERSE },
- { X86::VFMADDPD4Yrr, X86::VFMADDPD4Yrm, 0 },
- { X86::VFMADDPD4rr, X86::VFMADDPD4rm, 0 },
- { X86::VFMADDPS4Yrr, X86::VFMADDPS4Yrm, 0 },
- { X86::VFMADDPS4rr, X86::VFMADDPS4rm, 0 },
- { X86::VFMADDSD4rr, X86::VFMADDSD4rm, 0 },
- { X86::VFMADDSD4rr_Int, X86::VFMADDSD4rm_Int, TB_NO_REVERSE },
- { X86::VFMADDSS4rr, X86::VFMADDSS4rm, 0 },
- { X86::VFMADDSS4rr_Int, X86::VFMADDSS4rm_Int, TB_NO_REVERSE },
- { X86::VFMADDSUB132PDYr, X86::VFMADDSUB132PDYm, 0 },
- { X86::VFMADDSUB132PDZ128r, X86::VFMADDSUB132PDZ128m, 0 },
- { X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256m, 0 },
- { X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZm, 0 },
- { X86::VFMADDSUB132PDr, X86::VFMADDSUB132PDm, 0 },
- { X86::VFMADDSUB132PHZ128r, X86::VFMADDSUB132PHZ128m, 0 },
- { X86::VFMADDSUB132PHZ256r, X86::VFMADDSUB132PHZ256m, 0 },
- { X86::VFMADDSUB132PHZr, X86::VFMADDSUB132PHZm, 0 },
- { X86::VFMADDSUB132PSYr, X86::VFMADDSUB132PSYm, 0 },
- { X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128m, 0 },
- { X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256m, 0 },
- { X86::VFMADDSUB132PSZr, X86::VFMADDSUB132PSZm, 0 },
- { X86::VFMADDSUB132PSr, X86::VFMADDSUB132PSm, 0 },
- { X86::VFMADDSUB213PDYr, X86::VFMADDSUB213PDYm, 0 },
- { X86::VFMADDSUB213PDZ128r, X86::VFMADDSUB213PDZ128m, 0 },
- { X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256m, 0 },
- { X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZm, 0 },
- { X86::VFMADDSUB213PDr, X86::VFMADDSUB213PDm, 0 },
- { X86::VFMADDSUB213PHZ128r, X86::VFMADDSUB213PHZ128m, 0 },
- { X86::VFMADDSUB213PHZ256r, X86::VFMADDSUB213PHZ256m, 0 },
- { X86::VFMADDSUB213PHZr, X86::VFMADDSUB213PHZm, 0 },
- { X86::VFMADDSUB213PSYr, X86::VFMADDSUB213PSYm, 0 },
- { X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128m, 0 },
- { X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256m, 0 },
- { X86::VFMADDSUB213PSZr, X86::VFMADDSUB213PSZm, 0 },
- { X86::VFMADDSUB213PSr, X86::VFMADDSUB213PSm, 0 },
- { X86::VFMADDSUB231PDYr, X86::VFMADDSUB231PDYm, 0 },
- { X86::VFMADDSUB231PDZ128r, X86::VFMADDSUB231PDZ128m, 0 },
- { X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256m, 0 },
- { X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZm, 0 },
- { X86::VFMADDSUB231PDr, X86::VFMADDSUB231PDm, 0 },
- { X86::VFMADDSUB231PHZ128r, X86::VFMADDSUB231PHZ128m, 0 },
- { X86::VFMADDSUB231PHZ256r, X86::VFMADDSUB231PHZ256m, 0 },
- { X86::VFMADDSUB231PHZr, X86::VFMADDSUB231PHZm, 0 },
- { X86::VFMADDSUB231PSYr, X86::VFMADDSUB231PSYm, 0 },
- { X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128m, 0 },
- { X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256m, 0 },
- { X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZm, 0 },
- { X86::VFMADDSUB231PSr, X86::VFMADDSUB231PSm, 0 },
- { X86::VFMADDSUBPD4Yrr, X86::VFMADDSUBPD4Yrm, 0 },
- { X86::VFMADDSUBPD4rr, X86::VFMADDSUBPD4rm, 0 },
- { X86::VFMADDSUBPS4Yrr, X86::VFMADDSUBPS4Yrm, 0 },
- { X86::VFMADDSUBPS4rr, X86::VFMADDSUBPS4rm, 0 },
- { X86::VFMSUB132PDYr, X86::VFMSUB132PDYm, 0 },
- { X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128m, 0 },
- { X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256m, 0 },
- { X86::VFMSUB132PDZr, X86::VFMSUB132PDZm, 0 },
- { X86::VFMSUB132PDr, X86::VFMSUB132PDm, 0 },
- { X86::VFMSUB132PHZ128r, X86::VFMSUB132PHZ128m, 0 },
- { X86::VFMSUB132PHZ256r, X86::VFMSUB132PHZ256m, 0 },
- { X86::VFMSUB132PHZr, X86::VFMSUB132PHZm, 0 },
- { X86::VFMSUB132PSYr, X86::VFMSUB132PSYm, 0 },
- { X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128m, 0 },
- { X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256m, 0 },
- { X86::VFMSUB132PSZr, X86::VFMSUB132PSZm, 0 },
- { X86::VFMSUB132PSr, X86::VFMSUB132PSm, 0 },
- { X86::VFMSUB132SDZr, X86::VFMSUB132SDZm, 0 },
- { X86::VFMSUB132SDZr_Int, X86::VFMSUB132SDZm_Int, TB_NO_REVERSE },
- { X86::VFMSUB132SDr, X86::VFMSUB132SDm, 0 },
- { X86::VFMSUB132SDr_Int, X86::VFMSUB132SDm_Int, TB_NO_REVERSE },
- { X86::VFMSUB132SHZr, X86::VFMSUB132SHZm, 0 },
- { X86::VFMSUB132SHZr_Int, X86::VFMSUB132SHZm_Int, TB_NO_REVERSE },
- { X86::VFMSUB132SSZr, X86::VFMSUB132SSZm, 0 },
- { X86::VFMSUB132SSZr_Int, X86::VFMSUB132SSZm_Int, TB_NO_REVERSE },
- { X86::VFMSUB132SSr, X86::VFMSUB132SSm, 0 },
- { X86::VFMSUB132SSr_Int, X86::VFMSUB132SSm_Int, TB_NO_REVERSE },
- { X86::VFMSUB213PDYr, X86::VFMSUB213PDYm, 0 },
- { X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128m, 0 },
- { X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256m, 0 },
- { X86::VFMSUB213PDZr, X86::VFMSUB213PDZm, 0 },
- { X86::VFMSUB213PDr, X86::VFMSUB213PDm, 0 },
- { X86::VFMSUB213PHZ128r, X86::VFMSUB213PHZ128m, 0 },
- { X86::VFMSUB213PHZ256r, X86::VFMSUB213PHZ256m, 0 },
- { X86::VFMSUB213PHZr, X86::VFMSUB213PHZm, 0 },
- { X86::VFMSUB213PSYr, X86::VFMSUB213PSYm, 0 },
- { X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128m, 0 },
- { X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256m, 0 },
- { X86::VFMSUB213PSZr, X86::VFMSUB213PSZm, 0 },
- { X86::VFMSUB213PSr, X86::VFMSUB213PSm, 0 },
- { X86::VFMSUB213SDZr, X86::VFMSUB213SDZm, 0 },
- { X86::VFMSUB213SDZr_Int, X86::VFMSUB213SDZm_Int, TB_NO_REVERSE },
- { X86::VFMSUB213SDr, X86::VFMSUB213SDm, 0 },
- { X86::VFMSUB213SDr_Int, X86::VFMSUB213SDm_Int, TB_NO_REVERSE },
- { X86::VFMSUB213SHZr, X86::VFMSUB213SHZm, 0 },
- { X86::VFMSUB213SHZr_Int, X86::VFMSUB213SHZm_Int, TB_NO_REVERSE },
- { X86::VFMSUB213SSZr, X86::VFMSUB213SSZm, 0 },
- { X86::VFMSUB213SSZr_Int, X86::VFMSUB213SSZm_Int, TB_NO_REVERSE },
- { X86::VFMSUB213SSr, X86::VFMSUB213SSm, 0 },
- { X86::VFMSUB213SSr_Int, X86::VFMSUB213SSm_Int, TB_NO_REVERSE },
- { X86::VFMSUB231PDYr, X86::VFMSUB231PDYm, 0 },
- { X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128m, 0 },
- { X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256m, 0 },
- { X86::VFMSUB231PDZr, X86::VFMSUB231PDZm, 0 },
- { X86::VFMSUB231PDr, X86::VFMSUB231PDm, 0 },
- { X86::VFMSUB231PHZ128r, X86::VFMSUB231PHZ128m, 0 },
- { X86::VFMSUB231PHZ256r, X86::VFMSUB231PHZ256m, 0 },
- { X86::VFMSUB231PHZr, X86::VFMSUB231PHZm, 0 },
- { X86::VFMSUB231PSYr, X86::VFMSUB231PSYm, 0 },
- { X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128m, 0 },
- { X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256m, 0 },
- { X86::VFMSUB231PSZr, X86::VFMSUB231PSZm, 0 },
- { X86::VFMSUB231PSr, X86::VFMSUB231PSm, 0 },
- { X86::VFMSUB231SDZr, X86::VFMSUB231SDZm, 0 },
- { X86::VFMSUB231SDZr_Int, X86::VFMSUB231SDZm_Int, TB_NO_REVERSE },
- { X86::VFMSUB231SDr, X86::VFMSUB231SDm, 0 },
- { X86::VFMSUB231SDr_Int, X86::VFMSUB231SDm_Int, TB_NO_REVERSE },
- { X86::VFMSUB231SHZr, X86::VFMSUB231SHZm, 0 },
- { X86::VFMSUB231SHZr_Int, X86::VFMSUB231SHZm_Int, TB_NO_REVERSE },
- { X86::VFMSUB231SSZr, X86::VFMSUB231SSZm, 0 },
- { X86::VFMSUB231SSZr_Int, X86::VFMSUB231SSZm_Int, TB_NO_REVERSE },
- { X86::VFMSUB231SSr, X86::VFMSUB231SSm, 0 },
- { X86::VFMSUB231SSr_Int, X86::VFMSUB231SSm_Int, TB_NO_REVERSE },
- { X86::VFMSUBADD132PDYr, X86::VFMSUBADD132PDYm, 0 },
- { X86::VFMSUBADD132PDZ128r, X86::VFMSUBADD132PDZ128m, 0 },
- { X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256m, 0 },
- { X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZm, 0 },
- { X86::VFMSUBADD132PDr, X86::VFMSUBADD132PDm, 0 },
- { X86::VFMSUBADD132PHZ128r, X86::VFMSUBADD132PHZ128m, 0 },
- { X86::VFMSUBADD132PHZ256r, X86::VFMSUBADD132PHZ256m, 0 },
- { X86::VFMSUBADD132PHZr, X86::VFMSUBADD132PHZm, 0 },
- { X86::VFMSUBADD132PSYr, X86::VFMSUBADD132PSYm, 0 },
- { X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128m, 0 },
- { X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256m, 0 },
- { X86::VFMSUBADD132PSZr, X86::VFMSUBADD132PSZm, 0 },
- { X86::VFMSUBADD132PSr, X86::VFMSUBADD132PSm, 0 },
- { X86::VFMSUBADD213PDYr, X86::VFMSUBADD213PDYm, 0 },
- { X86::VFMSUBADD213PDZ128r, X86::VFMSUBADD213PDZ128m, 0 },
- { X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256m, 0 },
- { X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZm, 0 },
- { X86::VFMSUBADD213PDr, X86::VFMSUBADD213PDm, 0 },
- { X86::VFMSUBADD213PHZ128r, X86::VFMSUBADD213PHZ128m, 0 },
- { X86::VFMSUBADD213PHZ256r, X86::VFMSUBADD213PHZ256m, 0 },
- { X86::VFMSUBADD213PHZr, X86::VFMSUBADD213PHZm, 0 },
- { X86::VFMSUBADD213PSYr, X86::VFMSUBADD213PSYm, 0 },
- { X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128m, 0 },
- { X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256m, 0 },
- { X86::VFMSUBADD213PSZr, X86::VFMSUBADD213PSZm, 0 },
- { X86::VFMSUBADD213PSr, X86::VFMSUBADD213PSm, 0 },
- { X86::VFMSUBADD231PDYr, X86::VFMSUBADD231PDYm, 0 },
- { X86::VFMSUBADD231PDZ128r, X86::VFMSUBADD231PDZ128m, 0 },
- { X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256m, 0 },
- { X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZm, 0 },
- { X86::VFMSUBADD231PDr, X86::VFMSUBADD231PDm, 0 },
- { X86::VFMSUBADD231PHZ128r, X86::VFMSUBADD231PHZ128m, 0 },
- { X86::VFMSUBADD231PHZ256r, X86::VFMSUBADD231PHZ256m, 0 },
- { X86::VFMSUBADD231PHZr, X86::VFMSUBADD231PHZm, 0 },
- { X86::VFMSUBADD231PSYr, X86::VFMSUBADD231PSYm, 0 },
- { X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128m, 0 },
- { X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256m, 0 },
- { X86::VFMSUBADD231PSZr, X86::VFMSUBADD231PSZm, 0 },
- { X86::VFMSUBADD231PSr, X86::VFMSUBADD231PSm, 0 },
- { X86::VFMSUBADDPD4Yrr, X86::VFMSUBADDPD4Yrm, 0 },
- { X86::VFMSUBADDPD4rr, X86::VFMSUBADDPD4rm, 0 },
- { X86::VFMSUBADDPS4Yrr, X86::VFMSUBADDPS4Yrm, 0 },
- { X86::VFMSUBADDPS4rr, X86::VFMSUBADDPS4rm, 0 },
- { X86::VFMSUBPD4Yrr, X86::VFMSUBPD4Yrm, 0 },
- { X86::VFMSUBPD4rr, X86::VFMSUBPD4rm, 0 },
- { X86::VFMSUBPS4Yrr, X86::VFMSUBPS4Yrm, 0 },
- { X86::VFMSUBPS4rr, X86::VFMSUBPS4rm, 0 },
- { X86::VFMSUBSD4rr, X86::VFMSUBSD4rm, 0 },
- { X86::VFMSUBSD4rr_Int, X86::VFMSUBSD4rm_Int, TB_NO_REVERSE },
- { X86::VFMSUBSS4rr, X86::VFMSUBSS4rm, 0 },
- { X86::VFMSUBSS4rr_Int, X86::VFMSUBSS4rm_Int, TB_NO_REVERSE },
- { X86::VFMULCPHZ128rrkz, X86::VFMULCPHZ128rmkz, 0 },
- { X86::VFMULCPHZ256rrkz, X86::VFMULCPHZ256rmkz, 0 },
- { X86::VFMULCPHZrrkz, X86::VFMULCPHZrmkz, 0 },
- { X86::VFMULCSHZrrkz, X86::VFMULCSHZrmkz, TB_NO_REVERSE },
- { X86::VFNMADD132PDYr, X86::VFNMADD132PDYm, 0 },
- { X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128m, 0 },
- { X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256m, 0 },
- { X86::VFNMADD132PDZr, X86::VFNMADD132PDZm, 0 },
- { X86::VFNMADD132PDr, X86::VFNMADD132PDm, 0 },
- { X86::VFNMADD132PHZ128r, X86::VFNMADD132PHZ128m, 0 },
- { X86::VFNMADD132PHZ256r, X86::VFNMADD132PHZ256m, 0 },
- { X86::VFNMADD132PHZr, X86::VFNMADD132PHZm, 0 },
- { X86::VFNMADD132PSYr, X86::VFNMADD132PSYm, 0 },
- { X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128m, 0 },
- { X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256m, 0 },
- { X86::VFNMADD132PSZr, X86::VFNMADD132PSZm, 0 },
- { X86::VFNMADD132PSr, X86::VFNMADD132PSm, 0 },
- { X86::VFNMADD132SDZr, X86::VFNMADD132SDZm, 0 },
- { X86::VFNMADD132SDZr_Int, X86::VFNMADD132SDZm_Int, TB_NO_REVERSE },
- { X86::VFNMADD132SDr, X86::VFNMADD132SDm, 0 },
- { X86::VFNMADD132SDr_Int, X86::VFNMADD132SDm_Int, TB_NO_REVERSE },
- { X86::VFNMADD132SHZr, X86::VFNMADD132SHZm, 0 },
- { X86::VFNMADD132SHZr_Int, X86::VFNMADD132SHZm_Int, TB_NO_REVERSE },
- { X86::VFNMADD132SSZr, X86::VFNMADD132SSZm, 0 },
- { X86::VFNMADD132SSZr_Int, X86::VFNMADD132SSZm_Int, TB_NO_REVERSE },
- { X86::VFNMADD132SSr, X86::VFNMADD132SSm, 0 },
- { X86::VFNMADD132SSr_Int, X86::VFNMADD132SSm_Int, TB_NO_REVERSE },
- { X86::VFNMADD213PDYr, X86::VFNMADD213PDYm, 0 },
- { X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128m, 0 },
- { X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256m, 0 },
- { X86::VFNMADD213PDZr, X86::VFNMADD213PDZm, 0 },
- { X86::VFNMADD213PDr, X86::VFNMADD213PDm, 0 },
- { X86::VFNMADD213PHZ128r, X86::VFNMADD213PHZ128m, 0 },
- { X86::VFNMADD213PHZ256r, X86::VFNMADD213PHZ256m, 0 },
- { X86::VFNMADD213PHZr, X86::VFNMADD213PHZm, 0 },
- { X86::VFNMADD213PSYr, X86::VFNMADD213PSYm, 0 },
- { X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128m, 0 },
- { X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256m, 0 },
- { X86::VFNMADD213PSZr, X86::VFNMADD213PSZm, 0 },
- { X86::VFNMADD213PSr, X86::VFNMADD213PSm, 0 },
- { X86::VFNMADD213SDZr, X86::VFNMADD213SDZm, 0 },
- { X86::VFNMADD213SDZr_Int, X86::VFNMADD213SDZm_Int, TB_NO_REVERSE },
- { X86::VFNMADD213SDr, X86::VFNMADD213SDm, 0 },
- { X86::VFNMADD213SDr_Int, X86::VFNMADD213SDm_Int, TB_NO_REVERSE },
- { X86::VFNMADD213SHZr, X86::VFNMADD213SHZm, 0 },
- { X86::VFNMADD213SHZr_Int, X86::VFNMADD213SHZm_Int, TB_NO_REVERSE },
- { X86::VFNMADD213SSZr, X86::VFNMADD213SSZm, 0 },
- { X86::VFNMADD213SSZr_Int, X86::VFNMADD213SSZm_Int, TB_NO_REVERSE },
- { X86::VFNMADD213SSr, X86::VFNMADD213SSm, 0 },
- { X86::VFNMADD213SSr_Int, X86::VFNMADD213SSm_Int, TB_NO_REVERSE },
- { X86::VFNMADD231PDYr, X86::VFNMADD231PDYm, 0 },
- { X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128m, 0 },
- { X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256m, 0 },
- { X86::VFNMADD231PDZr, X86::VFNMADD231PDZm, 0 },
- { X86::VFNMADD231PDr, X86::VFNMADD231PDm, 0 },
- { X86::VFNMADD231PHZ128r, X86::VFNMADD231PHZ128m, 0 },
- { X86::VFNMADD231PHZ256r, X86::VFNMADD231PHZ256m, 0 },
- { X86::VFNMADD231PHZr, X86::VFNMADD231PHZm, 0 },
- { X86::VFNMADD231PSYr, X86::VFNMADD231PSYm, 0 },
- { X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128m, 0 },
- { X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256m, 0 },
- { X86::VFNMADD231PSZr, X86::VFNMADD231PSZm, 0 },
- { X86::VFNMADD231PSr, X86::VFNMADD231PSm, 0 },
- { X86::VFNMADD231SDZr, X86::VFNMADD231SDZm, 0 },
- { X86::VFNMADD231SDZr_Int, X86::VFNMADD231SDZm_Int, TB_NO_REVERSE },
- { X86::VFNMADD231SDr, X86::VFNMADD231SDm, 0 },
- { X86::VFNMADD231SDr_Int, X86::VFNMADD231SDm_Int, TB_NO_REVERSE },
- { X86::VFNMADD231SHZr, X86::VFNMADD231SHZm, 0 },
- { X86::VFNMADD231SHZr_Int, X86::VFNMADD231SHZm_Int, TB_NO_REVERSE },
- { X86::VFNMADD231SSZr, X86::VFNMADD231SSZm, 0 },
- { X86::VFNMADD231SSZr_Int, X86::VFNMADD231SSZm_Int, TB_NO_REVERSE },
- { X86::VFNMADD231SSr, X86::VFNMADD231SSm, 0 },
- { X86::VFNMADD231SSr_Int, X86::VFNMADD231SSm_Int, TB_NO_REVERSE },
- { X86::VFNMADDPD4Yrr, X86::VFNMADDPD4Yrm, 0 },
- { X86::VFNMADDPD4rr, X86::VFNMADDPD4rm, 0 },
- { X86::VFNMADDPS4Yrr, X86::VFNMADDPS4Yrm, 0 },
- { X86::VFNMADDPS4rr, X86::VFNMADDPS4rm, 0 },
- { X86::VFNMADDSD4rr, X86::VFNMADDSD4rm, 0 },
- { X86::VFNMADDSD4rr_Int, X86::VFNMADDSD4rm_Int, TB_NO_REVERSE },
- { X86::VFNMADDSS4rr, X86::VFNMADDSS4rm, 0 },
- { X86::VFNMADDSS4rr_Int, X86::VFNMADDSS4rm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB132PDYr, X86::VFNMSUB132PDYm, 0 },
- { X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128m, 0 },
- { X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256m, 0 },
- { X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZm, 0 },
- { X86::VFNMSUB132PDr, X86::VFNMSUB132PDm, 0 },
- { X86::VFNMSUB132PHZ128r, X86::VFNMSUB132PHZ128m, 0 },
- { X86::VFNMSUB132PHZ256r, X86::VFNMSUB132PHZ256m, 0 },
- { X86::VFNMSUB132PHZr, X86::VFNMSUB132PHZm, 0 },
- { X86::VFNMSUB132PSYr, X86::VFNMSUB132PSYm, 0 },
- { X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128m, 0 },
- { X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256m, 0 },
- { X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZm, 0 },
- { X86::VFNMSUB132PSr, X86::VFNMSUB132PSm, 0 },
- { X86::VFNMSUB132SDZr, X86::VFNMSUB132SDZm, 0 },
- { X86::VFNMSUB132SDZr_Int, X86::VFNMSUB132SDZm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB132SDr, X86::VFNMSUB132SDm, 0 },
- { X86::VFNMSUB132SDr_Int, X86::VFNMSUB132SDm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB132SHZr, X86::VFNMSUB132SHZm, 0 },
- { X86::VFNMSUB132SHZr_Int, X86::VFNMSUB132SHZm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB132SSZr, X86::VFNMSUB132SSZm, 0 },
- { X86::VFNMSUB132SSZr_Int, X86::VFNMSUB132SSZm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB132SSr, X86::VFNMSUB132SSm, 0 },
- { X86::VFNMSUB132SSr_Int, X86::VFNMSUB132SSm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB213PDYr, X86::VFNMSUB213PDYm, 0 },
- { X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128m, 0 },
- { X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256m, 0 },
- { X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZm, 0 },
- { X86::VFNMSUB213PDr, X86::VFNMSUB213PDm, 0 },
- { X86::VFNMSUB213PHZ128r, X86::VFNMSUB213PHZ128m, 0 },
- { X86::VFNMSUB213PHZ256r, X86::VFNMSUB213PHZ256m, 0 },
- { X86::VFNMSUB213PHZr, X86::VFNMSUB213PHZm, 0 },
- { X86::VFNMSUB213PSYr, X86::VFNMSUB213PSYm, 0 },
- { X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128m, 0 },
- { X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256m, 0 },
- { X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZm, 0 },
- { X86::VFNMSUB213PSr, X86::VFNMSUB213PSm, 0 },
- { X86::VFNMSUB213SDZr, X86::VFNMSUB213SDZm, 0 },
- { X86::VFNMSUB213SDZr_Int, X86::VFNMSUB213SDZm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB213SDr, X86::VFNMSUB213SDm, 0 },
- { X86::VFNMSUB213SDr_Int, X86::VFNMSUB213SDm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB213SHZr, X86::VFNMSUB213SHZm, 0 },
- { X86::VFNMSUB213SHZr_Int, X86::VFNMSUB213SHZm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB213SSZr, X86::VFNMSUB213SSZm, 0 },
- { X86::VFNMSUB213SSZr_Int, X86::VFNMSUB213SSZm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB213SSr, X86::VFNMSUB213SSm, 0 },
- { X86::VFNMSUB213SSr_Int, X86::VFNMSUB213SSm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB231PDYr, X86::VFNMSUB231PDYm, 0 },
- { X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128m, 0 },
- { X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256m, 0 },
- { X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZm, 0 },
- { X86::VFNMSUB231PDr, X86::VFNMSUB231PDm, 0 },
- { X86::VFNMSUB231PHZ128r, X86::VFNMSUB231PHZ128m, 0 },
- { X86::VFNMSUB231PHZ256r, X86::VFNMSUB231PHZ256m, 0 },
- { X86::VFNMSUB231PHZr, X86::VFNMSUB231PHZm, 0 },
- { X86::VFNMSUB231PSYr, X86::VFNMSUB231PSYm, 0 },
- { X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128m, 0 },
- { X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256m, 0 },
- { X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZm, 0 },
- { X86::VFNMSUB231PSr, X86::VFNMSUB231PSm, 0 },
- { X86::VFNMSUB231SDZr, X86::VFNMSUB231SDZm, 0 },
- { X86::VFNMSUB231SDZr_Int, X86::VFNMSUB231SDZm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB231SDr, X86::VFNMSUB231SDm, 0 },
- { X86::VFNMSUB231SDr_Int, X86::VFNMSUB231SDm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB231SHZr, X86::VFNMSUB231SHZm, 0 },
- { X86::VFNMSUB231SHZr_Int, X86::VFNMSUB231SHZm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB231SSZr, X86::VFNMSUB231SSZm, 0 },
- { X86::VFNMSUB231SSZr_Int, X86::VFNMSUB231SSZm_Int, TB_NO_REVERSE },
- { X86::VFNMSUB231SSr, X86::VFNMSUB231SSm, 0 },
- { X86::VFNMSUB231SSr_Int, X86::VFNMSUB231SSm_Int, TB_NO_REVERSE },
- { X86::VFNMSUBPD4Yrr, X86::VFNMSUBPD4Yrm, 0 },
- { X86::VFNMSUBPD4rr, X86::VFNMSUBPD4rm, 0 },
- { X86::VFNMSUBPS4Yrr, X86::VFNMSUBPS4Yrm, 0 },
- { X86::VFNMSUBPS4rr, X86::VFNMSUBPS4rm, 0 },
- { X86::VFNMSUBSD4rr, X86::VFNMSUBSD4rm, 0 },
- { X86::VFNMSUBSD4rr_Int, X86::VFNMSUBSD4rm_Int, TB_NO_REVERSE },
- { X86::VFNMSUBSS4rr, X86::VFNMSUBSS4rm, 0 },
- { X86::VFNMSUBSS4rr_Int, X86::VFNMSUBSS4rm_Int, TB_NO_REVERSE },
- { X86::VGETEXPPDZ128rk, X86::VGETEXPPDZ128mk, 0 },
- { X86::VGETEXPPDZ256rk, X86::VGETEXPPDZ256mk, 0 },
- { X86::VGETEXPPDZrk, X86::VGETEXPPDZmk, 0 },
- { X86::VGETEXPPHZ128rk, X86::VGETEXPPHZ128mk, 0 },
- { X86::VGETEXPPHZ256rk, X86::VGETEXPPHZ256mk, 0 },
- { X86::VGETEXPPHZrk, X86::VGETEXPPHZmk, 0 },
- { X86::VGETEXPPSZ128rk, X86::VGETEXPPSZ128mk, 0 },
- { X86::VGETEXPPSZ256rk, X86::VGETEXPPSZ256mk, 0 },
- { X86::VGETEXPPSZrk, X86::VGETEXPPSZmk, 0 },
- { X86::VGETEXPSDZrkz, X86::VGETEXPSDZmkz, TB_NO_REVERSE },
- { X86::VGETEXPSHZrkz, X86::VGETEXPSHZmkz, TB_NO_REVERSE },
- { X86::VGETEXPSSZrkz, X86::VGETEXPSSZmkz, TB_NO_REVERSE },
- { X86::VGETMANTPDZ128rrik, X86::VGETMANTPDZ128rmik, 0 },
- { X86::VGETMANTPDZ256rrik, X86::VGETMANTPDZ256rmik, 0 },
- { X86::VGETMANTPDZrrik, X86::VGETMANTPDZrmik, 0 },
- { X86::VGETMANTPHZ128rrik, X86::VGETMANTPHZ128rmik, 0 },
- { X86::VGETMANTPHZ256rrik, X86::VGETMANTPHZ256rmik, 0 },
- { X86::VGETMANTPHZrrik, X86::VGETMANTPHZrmik, 0 },
- { X86::VGETMANTPSZ128rrik, X86::VGETMANTPSZ128rmik, 0 },
- { X86::VGETMANTPSZ256rrik, X86::VGETMANTPSZ256rmik, 0 },
- { X86::VGETMANTPSZrrik, X86::VGETMANTPSZrmik, 0 },
- { X86::VGETMANTSDZrrikz, X86::VGETMANTSDZrmikz, TB_NO_REVERSE },
- { X86::VGETMANTSHZrrikz, X86::VGETMANTSHZrmikz, TB_NO_REVERSE },
- { X86::VGETMANTSSZrrikz, X86::VGETMANTSSZrmikz, TB_NO_REVERSE },
- { X86::VGF2P8AFFINEINVQBZ128rrikz, X86::VGF2P8AFFINEINVQBZ128rmikz, 0 },
- { X86::VGF2P8AFFINEINVQBZ256rrikz, X86::VGF2P8AFFINEINVQBZ256rmikz, 0 },
- { X86::VGF2P8AFFINEINVQBZrrikz, X86::VGF2P8AFFINEINVQBZrmikz, 0 },
- { X86::VGF2P8AFFINEQBZ128rrikz, X86::VGF2P8AFFINEQBZ128rmikz, 0 },
- { X86::VGF2P8AFFINEQBZ256rrikz, X86::VGF2P8AFFINEQBZ256rmikz, 0 },
- { X86::VGF2P8AFFINEQBZrrikz, X86::VGF2P8AFFINEQBZrmikz, 0 },
- { X86::VGF2P8MULBZ128rrkz, X86::VGF2P8MULBZ128rmkz, 0 },
- { X86::VGF2P8MULBZ256rrkz, X86::VGF2P8MULBZ256rmkz, 0 },
- { X86::VGF2P8MULBZrrkz, X86::VGF2P8MULBZrmkz, 0 },
- { X86::VINSERTF32x4Z256rrkz, X86::VINSERTF32x4Z256rmkz, 0 },
- { X86::VINSERTF32x4Zrrkz, X86::VINSERTF32x4Zrmkz, 0 },
- { X86::VINSERTF32x8Zrrkz, X86::VINSERTF32x8Zrmkz, 0 },
- { X86::VINSERTF64x2Z256rrkz, X86::VINSERTF64x2Z256rmkz, 0 },
- { X86::VINSERTF64x2Zrrkz, X86::VINSERTF64x2Zrmkz, 0 },
- { X86::VINSERTF64x4Zrrkz, X86::VINSERTF64x4Zrmkz, 0 },
- { X86::VINSERTI32x4Z256rrkz, X86::VINSERTI32x4Z256rmkz, 0 },
- { X86::VINSERTI32x4Zrrkz, X86::VINSERTI32x4Zrmkz, 0 },
- { X86::VINSERTI32x8Zrrkz, X86::VINSERTI32x8Zrmkz, 0 },
- { X86::VINSERTI64x2Z256rrkz, X86::VINSERTI64x2Z256rmkz, 0 },
- { X86::VINSERTI64x2Zrrkz, X86::VINSERTI64x2Zrmkz, 0 },
- { X86::VINSERTI64x4Zrrkz, X86::VINSERTI64x4Zrmkz, 0 },
- { X86::VMAXCPDZ128rrkz, X86::VMAXCPDZ128rmkz, 0 },
- { X86::VMAXCPDZ256rrkz, X86::VMAXCPDZ256rmkz, 0 },
- { X86::VMAXCPDZrrkz, X86::VMAXCPDZrmkz, 0 },
- { X86::VMAXCPHZ128rrkz, X86::VMAXCPHZ128rmkz, 0 },
- { X86::VMAXCPHZ256rrkz, X86::VMAXCPHZ256rmkz, 0 },
- { X86::VMAXCPHZrrkz, X86::VMAXCPHZrmkz, 0 },
- { X86::VMAXCPSZ128rrkz, X86::VMAXCPSZ128rmkz, 0 },
- { X86::VMAXCPSZ256rrkz, X86::VMAXCPSZ256rmkz, 0 },
- { X86::VMAXCPSZrrkz, X86::VMAXCPSZrmkz, 0 },
- { X86::VMAXPDZ128rrkz, X86::VMAXPDZ128rmkz, 0 },
- { X86::VMAXPDZ256rrkz, X86::VMAXPDZ256rmkz, 0 },
- { X86::VMAXPDZrrkz, X86::VMAXPDZrmkz, 0 },
- { X86::VMAXPHZ128rrkz, X86::VMAXPHZ128rmkz, 0 },
- { X86::VMAXPHZ256rrkz, X86::VMAXPHZ256rmkz, 0 },
- { X86::VMAXPHZrrkz, X86::VMAXPHZrmkz, 0 },
- { X86::VMAXPSZ128rrkz, X86::VMAXPSZ128rmkz, 0 },
- { X86::VMAXPSZ256rrkz, X86::VMAXPSZ256rmkz, 0 },
- { X86::VMAXPSZrrkz, X86::VMAXPSZrmkz, 0 },
- { X86::VMAXSDZrr_Intkz, X86::VMAXSDZrm_Intkz, TB_NO_REVERSE },
- { X86::VMAXSHZrr_Intkz, X86::VMAXSHZrm_Intkz, TB_NO_REVERSE },
- { X86::VMAXSSZrr_Intkz, X86::VMAXSSZrm_Intkz, TB_NO_REVERSE },
- { X86::VMINCPDZ128rrkz, X86::VMINCPDZ128rmkz, 0 },
- { X86::VMINCPDZ256rrkz, X86::VMINCPDZ256rmkz, 0 },
- { X86::VMINCPDZrrkz, X86::VMINCPDZrmkz, 0 },
- { X86::VMINCPHZ128rrkz, X86::VMINCPHZ128rmkz, 0 },
- { X86::VMINCPHZ256rrkz, X86::VMINCPHZ256rmkz, 0 },
- { X86::VMINCPHZrrkz, X86::VMINCPHZrmkz, 0 },
- { X86::VMINCPSZ128rrkz, X86::VMINCPSZ128rmkz, 0 },
- { X86::VMINCPSZ256rrkz, X86::VMINCPSZ256rmkz, 0 },
- { X86::VMINCPSZrrkz, X86::VMINCPSZrmkz, 0 },
- { X86::VMINPDZ128rrkz, X86::VMINPDZ128rmkz, 0 },
- { X86::VMINPDZ256rrkz, X86::VMINPDZ256rmkz, 0 },
- { X86::VMINPDZrrkz, X86::VMINPDZrmkz, 0 },
- { X86::VMINPHZ128rrkz, X86::VMINPHZ128rmkz, 0 },
- { X86::VMINPHZ256rrkz, X86::VMINPHZ256rmkz, 0 },
- { X86::VMINPHZrrkz, X86::VMINPHZrmkz, 0 },
- { X86::VMINPSZ128rrkz, X86::VMINPSZ128rmkz, 0 },
- { X86::VMINPSZ256rrkz, X86::VMINPSZ256rmkz, 0 },
- { X86::VMINPSZrrkz, X86::VMINPSZrmkz, 0 },
- { X86::VMINSDZrr_Intkz, X86::VMINSDZrm_Intkz, TB_NO_REVERSE },
- { X86::VMINSHZrr_Intkz, X86::VMINSHZrm_Intkz, TB_NO_REVERSE },
- { X86::VMINSSZrr_Intkz, X86::VMINSSZrm_Intkz, TB_NO_REVERSE },
- { X86::VMOVAPDZ128rrk, X86::VMOVAPDZ128rmk, TB_NO_REVERSE | TB_ALIGN_16 },
- { X86::VMOVAPDZ256rrk, X86::VMOVAPDZ256rmk, TB_NO_REVERSE | TB_ALIGN_32 },
- { X86::VMOVAPDZrrk, X86::VMOVAPDZrmk, TB_NO_REVERSE | TB_ALIGN_64 },
- { X86::VMOVAPSZ128rrk, X86::VMOVAPSZ128rmk, TB_NO_REVERSE | TB_ALIGN_16 },
- { X86::VMOVAPSZ256rrk, X86::VMOVAPSZ256rmk, TB_NO_REVERSE | TB_ALIGN_32 },
- { X86::VMOVAPSZrrk, X86::VMOVAPSZrmk, TB_NO_REVERSE | TB_ALIGN_64 },
- { X86::VMOVDDUPZ128rrk, X86::VMOVDDUPZ128rmk, TB_NO_REVERSE },
- { X86::VMOVDDUPZ256rrk, X86::VMOVDDUPZ256rmk, 0 },
- { X86::VMOVDDUPZrrk, X86::VMOVDDUPZrmk, 0 },
- { X86::VMOVDQA32Z128rrk, X86::VMOVDQA32Z128rmk, TB_NO_REVERSE | TB_ALIGN_16 },
- { X86::VMOVDQA32Z256rrk, X86::VMOVDQA32Z256rmk, TB_NO_REVERSE | TB_ALIGN_32 },
- { X86::VMOVDQA32Zrrk, X86::VMOVDQA32Zrmk, TB_NO_REVERSE | TB_ALIGN_64 },
- { X86::VMOVDQA64Z128rrk, X86::VMOVDQA64Z128rmk, TB_NO_REVERSE | TB_ALIGN_16 },
- { X86::VMOVDQA64Z256rrk, X86::VMOVDQA64Z256rmk, TB_NO_REVERSE | TB_ALIGN_32 },
- { X86::VMOVDQA64Zrrk, X86::VMOVDQA64Zrmk, TB_NO_REVERSE | TB_ALIGN_64 },
- { X86::VMOVDQU16Z128rrk, X86::VMOVDQU16Z128rmk, TB_NO_REVERSE },
- { X86::VMOVDQU16Z256rrk, X86::VMOVDQU16Z256rmk, TB_NO_REVERSE },
- { X86::VMOVDQU16Zrrk, X86::VMOVDQU16Zrmk, TB_NO_REVERSE },
- { X86::VMOVDQU32Z128rrk, X86::VMOVDQU32Z128rmk, TB_NO_REVERSE },
- { X86::VMOVDQU32Z256rrk, X86::VMOVDQU32Z256rmk, TB_NO_REVERSE },
- { X86::VMOVDQU32Zrrk, X86::VMOVDQU32Zrmk, TB_NO_REVERSE },
- { X86::VMOVDQU64Z128rrk, X86::VMOVDQU64Z128rmk, TB_NO_REVERSE },
- { X86::VMOVDQU64Z256rrk, X86::VMOVDQU64Z256rmk, TB_NO_REVERSE },
- { X86::VMOVDQU64Zrrk, X86::VMOVDQU64Zrmk, TB_NO_REVERSE },
- { X86::VMOVDQU8Z128rrk, X86::VMOVDQU8Z128rmk, TB_NO_REVERSE },
- { X86::VMOVDQU8Z256rrk, X86::VMOVDQU8Z256rmk, TB_NO_REVERSE },
- { X86::VMOVDQU8Zrrk, X86::VMOVDQU8Zrmk, TB_NO_REVERSE },
- { X86::VMOVSHDUPZ128rrk, X86::VMOVSHDUPZ128rmk, 0 },
- { X86::VMOVSHDUPZ256rrk, X86::VMOVSHDUPZ256rmk, 0 },
- { X86::VMOVSHDUPZrrk, X86::VMOVSHDUPZrmk, 0 },
- { X86::VMOVSLDUPZ128rrk, X86::VMOVSLDUPZ128rmk, 0 },
- { X86::VMOVSLDUPZ256rrk, X86::VMOVSLDUPZ256rmk, 0 },
- { X86::VMOVSLDUPZrrk, X86::VMOVSLDUPZrmk, 0 },
- { X86::VMOVUPDZ128rrk, X86::VMOVUPDZ128rmk, TB_NO_REVERSE },
- { X86::VMOVUPDZ256rrk, X86::VMOVUPDZ256rmk, TB_NO_REVERSE },
- { X86::VMOVUPDZrrk, X86::VMOVUPDZrmk, TB_NO_REVERSE },
- { X86::VMOVUPSZ128rrk, X86::VMOVUPSZ128rmk, TB_NO_REVERSE },
- { X86::VMOVUPSZ256rrk, X86::VMOVUPSZ256rmk, TB_NO_REVERSE },
- { X86::VMOVUPSZrrk, X86::VMOVUPSZrmk, TB_NO_REVERSE },
- { X86::VMULPDZ128rrkz, X86::VMULPDZ128rmkz, 0 },
- { X86::VMULPDZ256rrkz, X86::VMULPDZ256rmkz, 0 },
- { X86::VMULPDZrrkz, X86::VMULPDZrmkz, 0 },
- { X86::VMULPHZ128rrkz, X86::VMULPHZ128rmkz, 0 },
- { X86::VMULPHZ256rrkz, X86::VMULPHZ256rmkz, 0 },
- { X86::VMULPHZrrkz, X86::VMULPHZrmkz, 0 },
- { X86::VMULPSZ128rrkz, X86::VMULPSZ128rmkz, 0 },
- { X86::VMULPSZ256rrkz, X86::VMULPSZ256rmkz, 0 },
- { X86::VMULPSZrrkz, X86::VMULPSZrmkz, 0 },
- { X86::VMULSDZrr_Intkz, X86::VMULSDZrm_Intkz, TB_NO_REVERSE },
- { X86::VMULSHZrr_Intkz, X86::VMULSHZrm_Intkz, TB_NO_REVERSE },
- { X86::VMULSSZrr_Intkz, X86::VMULSSZrm_Intkz, TB_NO_REVERSE },
- { X86::VORPDZ128rrkz, X86::VORPDZ128rmkz, 0 },
- { X86::VORPDZ256rrkz, X86::VORPDZ256rmkz, 0 },
- { X86::VORPDZrrkz, X86::VORPDZrmkz, 0 },
- { X86::VORPSZ128rrkz, X86::VORPSZ128rmkz, 0 },
- { X86::VORPSZ256rrkz, X86::VORPSZ256rmkz, 0 },
- { X86::VORPSZrrkz, X86::VORPSZrmkz, 0 },
- { X86::VPABSBZ128rrk, X86::VPABSBZ128rmk, 0 },
- { X86::VPABSBZ256rrk, X86::VPABSBZ256rmk, 0 },
- { X86::VPABSBZrrk, X86::VPABSBZrmk, 0 },
- { X86::VPABSDZ128rrk, X86::VPABSDZ128rmk, 0 },
- { X86::VPABSDZ256rrk, X86::VPABSDZ256rmk, 0 },
- { X86::VPABSDZrrk, X86::VPABSDZrmk, 0 },
- { X86::VPABSQZ128rrk, X86::VPABSQZ128rmk, 0 },
- { X86::VPABSQZ256rrk, X86::VPABSQZ256rmk, 0 },
- { X86::VPABSQZrrk, X86::VPABSQZrmk, 0 },
- { X86::VPABSWZ128rrk, X86::VPABSWZ128rmk, 0 },
- { X86::VPABSWZ256rrk, X86::VPABSWZ256rmk, 0 },
- { X86::VPABSWZrrk, X86::VPABSWZrmk, 0 },
- { X86::VPACKSSDWZ128rrkz, X86::VPACKSSDWZ128rmkz, 0 },
- { X86::VPACKSSDWZ256rrkz, X86::VPACKSSDWZ256rmkz, 0 },
- { X86::VPACKSSDWZrrkz, X86::VPACKSSDWZrmkz, 0 },
- { X86::VPACKSSWBZ128rrkz, X86::VPACKSSWBZ128rmkz, 0 },
- { X86::VPACKSSWBZ256rrkz, X86::VPACKSSWBZ256rmkz, 0 },
- { X86::VPACKSSWBZrrkz, X86::VPACKSSWBZrmkz, 0 },
- { X86::VPACKUSDWZ128rrkz, X86::VPACKUSDWZ128rmkz, 0 },
- { X86::VPACKUSDWZ256rrkz, X86::VPACKUSDWZ256rmkz, 0 },
- { X86::VPACKUSDWZrrkz, X86::VPACKUSDWZrmkz, 0 },
- { X86::VPACKUSWBZ128rrkz, X86::VPACKUSWBZ128rmkz, 0 },
- { X86::VPACKUSWBZ256rrkz, X86::VPACKUSWBZ256rmkz, 0 },
- { X86::VPACKUSWBZrrkz, X86::VPACKUSWBZrmkz, 0 },
- { X86::VPADDBZ128rrkz, X86::VPADDBZ128rmkz, 0 },
- { X86::VPADDBZ256rrkz, X86::VPADDBZ256rmkz, 0 },
- { X86::VPADDBZrrkz, X86::VPADDBZrmkz, 0 },
- { X86::VPADDDZ128rrkz, X86::VPADDDZ128rmkz, 0 },
- { X86::VPADDDZ256rrkz, X86::VPADDDZ256rmkz, 0 },
- { X86::VPADDDZrrkz, X86::VPADDDZrmkz, 0 },
- { X86::VPADDQZ128rrkz, X86::VPADDQZ128rmkz, 0 },
- { X86::VPADDQZ256rrkz, X86::VPADDQZ256rmkz, 0 },
- { X86::VPADDQZrrkz, X86::VPADDQZrmkz, 0 },
- { X86::VPADDSBZ128rrkz, X86::VPADDSBZ128rmkz, 0 },
- { X86::VPADDSBZ256rrkz, X86::VPADDSBZ256rmkz, 0 },
- { X86::VPADDSBZrrkz, X86::VPADDSBZrmkz, 0 },
- { X86::VPADDSWZ128rrkz, X86::VPADDSWZ128rmkz, 0 },
- { X86::VPADDSWZ256rrkz, X86::VPADDSWZ256rmkz, 0 },
- { X86::VPADDSWZrrkz, X86::VPADDSWZrmkz, 0 },
- { X86::VPADDUSBZ128rrkz, X86::VPADDUSBZ128rmkz, 0 },
- { X86::VPADDUSBZ256rrkz, X86::VPADDUSBZ256rmkz, 0 },
- { X86::VPADDUSBZrrkz, X86::VPADDUSBZrmkz, 0 },
- { X86::VPADDUSWZ128rrkz, X86::VPADDUSWZ128rmkz, 0 },
- { X86::VPADDUSWZ256rrkz, X86::VPADDUSWZ256rmkz, 0 },
- { X86::VPADDUSWZrrkz, X86::VPADDUSWZrmkz, 0 },
- { X86::VPADDWZ128rrkz, X86::VPADDWZ128rmkz, 0 },
- { X86::VPADDWZ256rrkz, X86::VPADDWZ256rmkz, 0 },
- { X86::VPADDWZrrkz, X86::VPADDWZrmkz, 0 },
- { X86::VPALIGNRZ128rrikz, X86::VPALIGNRZ128rmikz, 0 },
- { X86::VPALIGNRZ256rrikz, X86::VPALIGNRZ256rmikz, 0 },
- { X86::VPALIGNRZrrikz, X86::VPALIGNRZrmikz, 0 },
- { X86::VPANDDZ128rrkz, X86::VPANDDZ128rmkz, 0 },
- { X86::VPANDDZ256rrkz, X86::VPANDDZ256rmkz, 0 },
- { X86::VPANDDZrrkz, X86::VPANDDZrmkz, 0 },
- { X86::VPANDNDZ128rrkz, X86::VPANDNDZ128rmkz, 0 },
- { X86::VPANDNDZ256rrkz, X86::VPANDNDZ256rmkz, 0 },
- { X86::VPANDNDZrrkz, X86::VPANDNDZrmkz, 0 },
- { X86::VPANDNQZ128rrkz, X86::VPANDNQZ128rmkz, 0 },
- { X86::VPANDNQZ256rrkz, X86::VPANDNQZ256rmkz, 0 },
- { X86::VPANDNQZrrkz, X86::VPANDNQZrmkz, 0 },
- { X86::VPANDQZ128rrkz, X86::VPANDQZ128rmkz, 0 },
- { X86::VPANDQZ256rrkz, X86::VPANDQZ256rmkz, 0 },
- { X86::VPANDQZrrkz, X86::VPANDQZrmkz, 0 },
- { X86::VPAVGBZ128rrkz, X86::VPAVGBZ128rmkz, 0 },
- { X86::VPAVGBZ256rrkz, X86::VPAVGBZ256rmkz, 0 },
- { X86::VPAVGBZrrkz, X86::VPAVGBZrmkz, 0 },
- { X86::VPAVGWZ128rrkz, X86::VPAVGWZ128rmkz, 0 },
- { X86::VPAVGWZ256rrkz, X86::VPAVGWZ256rmkz, 0 },
- { X86::VPAVGWZrrkz, X86::VPAVGWZrmkz, 0 },
- { X86::VPBLENDMBZ128rrk, X86::VPBLENDMBZ128rmk, 0 },
- { X86::VPBLENDMBZ256rrk, X86::VPBLENDMBZ256rmk, 0 },
- { X86::VPBLENDMBZrrk, X86::VPBLENDMBZrmk, 0 },
- { X86::VPBLENDMDZ128rrk, X86::VPBLENDMDZ128rmk, 0 },
- { X86::VPBLENDMDZ256rrk, X86::VPBLENDMDZ256rmk, 0 },
- { X86::VPBLENDMDZrrk, X86::VPBLENDMDZrmk, 0 },
- { X86::VPBLENDMQZ128rrk, X86::VPBLENDMQZ128rmk, 0 },
- { X86::VPBLENDMQZ256rrk, X86::VPBLENDMQZ256rmk, 0 },
- { X86::VPBLENDMQZrrk, X86::VPBLENDMQZrmk, 0 },
- { X86::VPBLENDMWZ128rrk, X86::VPBLENDMWZ128rmk, 0 },
- { X86::VPBLENDMWZ256rrk, X86::VPBLENDMWZ256rmk, 0 },
- { X86::VPBLENDMWZrrk, X86::VPBLENDMWZrmk, 0 },
- { X86::VPBROADCASTBZ128rrk, X86::VPBROADCASTBZ128rmk, TB_NO_REVERSE },
- { X86::VPBROADCASTBZ256rrk, X86::VPBROADCASTBZ256rmk, TB_NO_REVERSE },
- { X86::VPBROADCASTBZrrk, X86::VPBROADCASTBZrmk, TB_NO_REVERSE },
- { X86::VPBROADCASTDZ128rrk, X86::VPBROADCASTDZ128rmk, TB_NO_REVERSE },
- { X86::VPBROADCASTDZ256rrk, X86::VPBROADCASTDZ256rmk, TB_NO_REVERSE },
- { X86::VPBROADCASTDZrrk, X86::VPBROADCASTDZrmk, TB_NO_REVERSE },
- { X86::VPBROADCASTQZ128rrk, X86::VPBROADCASTQZ128rmk, TB_NO_REVERSE },
- { X86::VPBROADCASTQZ256rrk, X86::VPBROADCASTQZ256rmk, TB_NO_REVERSE },
- { X86::VPBROADCASTQZrrk, X86::VPBROADCASTQZrmk, TB_NO_REVERSE },
- { X86::VPBROADCASTWZ128rrk, X86::VPBROADCASTWZ128rmk, TB_NO_REVERSE },
- { X86::VPBROADCASTWZ256rrk, X86::VPBROADCASTWZ256rmk, TB_NO_REVERSE },
- { X86::VPBROADCASTWZrrk, X86::VPBROADCASTWZrmk, TB_NO_REVERSE },
- { X86::VPCMOVYrrr, X86::VPCMOVYrrm, 0 },
- { X86::VPCMOVrrr, X86::VPCMOVrrm, 0 },
- { X86::VPCMPBZ128rrik, X86::VPCMPBZ128rmik, 0 },
- { X86::VPCMPBZ256rrik, X86::VPCMPBZ256rmik, 0 },
- { X86::VPCMPBZrrik, X86::VPCMPBZrmik, 0 },
- { X86::VPCMPDZ128rrik, X86::VPCMPDZ128rmik, 0 },
- { X86::VPCMPDZ256rrik, X86::VPCMPDZ256rmik, 0 },
- { X86::VPCMPDZrrik, X86::VPCMPDZrmik, 0 },
- { X86::VPCMPEQBZ128rrk, X86::VPCMPEQBZ128rmk, 0 },
- { X86::VPCMPEQBZ256rrk, X86::VPCMPEQBZ256rmk, 0 },
- { X86::VPCMPEQBZrrk, X86::VPCMPEQBZrmk, 0 },
- { X86::VPCMPEQDZ128rrk, X86::VPCMPEQDZ128rmk, 0 },
- { X86::VPCMPEQDZ256rrk, X86::VPCMPEQDZ256rmk, 0 },
- { X86::VPCMPEQDZrrk, X86::VPCMPEQDZrmk, 0 },
- { X86::VPCMPEQQZ128rrk, X86::VPCMPEQQZ128rmk, 0 },
- { X86::VPCMPEQQZ256rrk, X86::VPCMPEQQZ256rmk, 0 },
- { X86::VPCMPEQQZrrk, X86::VPCMPEQQZrmk, 0 },
- { X86::VPCMPEQWZ128rrk, X86::VPCMPEQWZ128rmk, 0 },
- { X86::VPCMPEQWZ256rrk, X86::VPCMPEQWZ256rmk, 0 },
- { X86::VPCMPEQWZrrk, X86::VPCMPEQWZrmk, 0 },
- { X86::VPCMPGTBZ128rrk, X86::VPCMPGTBZ128rmk, 0 },
- { X86::VPCMPGTBZ256rrk, X86::VPCMPGTBZ256rmk, 0 },
- { X86::VPCMPGTBZrrk, X86::VPCMPGTBZrmk, 0 },
- { X86::VPCMPGTDZ128rrk, X86::VPCMPGTDZ128rmk, 0 },
- { X86::VPCMPGTDZ256rrk, X86::VPCMPGTDZ256rmk, 0 },
- { X86::VPCMPGTDZrrk, X86::VPCMPGTDZrmk, 0 },
- { X86::VPCMPGTQZ128rrk, X86::VPCMPGTQZ128rmk, 0 },
- { X86::VPCMPGTQZ256rrk, X86::VPCMPGTQZ256rmk, 0 },
- { X86::VPCMPGTQZrrk, X86::VPCMPGTQZrmk, 0 },
- { X86::VPCMPGTWZ128rrk, X86::VPCMPGTWZ128rmk, 0 },
- { X86::VPCMPGTWZ256rrk, X86::VPCMPGTWZ256rmk, 0 },
- { X86::VPCMPGTWZrrk, X86::VPCMPGTWZrmk, 0 },
- { X86::VPCMPQZ128rrik, X86::VPCMPQZ128rmik, 0 },
- { X86::VPCMPQZ256rrik, X86::VPCMPQZ256rmik, 0 },
- { X86::VPCMPQZrrik, X86::VPCMPQZrmik, 0 },
- { X86::VPCMPUBZ128rrik, X86::VPCMPUBZ128rmik, 0 },
- { X86::VPCMPUBZ256rrik, X86::VPCMPUBZ256rmik, 0 },
- { X86::VPCMPUBZrrik, X86::VPCMPUBZrmik, 0 },
- { X86::VPCMPUDZ128rrik, X86::VPCMPUDZ128rmik, 0 },
- { X86::VPCMPUDZ256rrik, X86::VPCMPUDZ256rmik, 0 },
- { X86::VPCMPUDZrrik, X86::VPCMPUDZrmik, 0 },
- { X86::VPCMPUQZ128rrik, X86::VPCMPUQZ128rmik, 0 },
- { X86::VPCMPUQZ256rrik, X86::VPCMPUQZ256rmik, 0 },
- { X86::VPCMPUQZrrik, X86::VPCMPUQZrmik, 0 },
- { X86::VPCMPUWZ128rrik, X86::VPCMPUWZ128rmik, 0 },
- { X86::VPCMPUWZ256rrik, X86::VPCMPUWZ256rmik, 0 },
- { X86::VPCMPUWZrrik, X86::VPCMPUWZrmik, 0 },
- { X86::VPCMPWZ128rrik, X86::VPCMPWZ128rmik, 0 },
- { X86::VPCMPWZ256rrik, X86::VPCMPWZ256rmik, 0 },
- { X86::VPCMPWZrrik, X86::VPCMPWZrmik, 0 },
- { X86::VPCONFLICTDZ128rrk, X86::VPCONFLICTDZ128rmk, 0 },
- { X86::VPCONFLICTDZ256rrk, X86::VPCONFLICTDZ256rmk, 0 },
- { X86::VPCONFLICTDZrrk, X86::VPCONFLICTDZrmk, 0 },
- { X86::VPCONFLICTQZ128rrk, X86::VPCONFLICTQZ128rmk, 0 },
- { X86::VPCONFLICTQZ256rrk, X86::VPCONFLICTQZ256rmk, 0 },
- { X86::VPCONFLICTQZrrk, X86::VPCONFLICTQZrmk, 0 },
- { X86::VPDPBSSDSYrr, X86::VPDPBSSDSYrm, 0 },
- { X86::VPDPBSSDSrr, X86::VPDPBSSDSrm, 0 },
- { X86::VPDPBSSDYrr, X86::VPDPBSSDYrm, 0 },
- { X86::VPDPBSSDrr, X86::VPDPBSSDrm, 0 },
- { X86::VPDPBSUDSYrr, X86::VPDPBSUDSYrm, 0 },
- { X86::VPDPBSUDSrr, X86::VPDPBSUDSrm, 0 },
- { X86::VPDPBSUDYrr, X86::VPDPBSUDYrm, 0 },
- { X86::VPDPBSUDrr, X86::VPDPBSUDrm, 0 },
- { X86::VPDPBUSDSYrr, X86::VPDPBUSDSYrm, 0 },
- { X86::VPDPBUSDSZ128r, X86::VPDPBUSDSZ128m, 0 },
- { X86::VPDPBUSDSZ256r, X86::VPDPBUSDSZ256m, 0 },
- { X86::VPDPBUSDSZr, X86::VPDPBUSDSZm, 0 },
- { X86::VPDPBUSDSrr, X86::VPDPBUSDSrm, 0 },
- { X86::VPDPBUSDYrr, X86::VPDPBUSDYrm, 0 },
- { X86::VPDPBUSDZ128r, X86::VPDPBUSDZ128m, 0 },
- { X86::VPDPBUSDZ256r, X86::VPDPBUSDZ256m, 0 },
- { X86::VPDPBUSDZr, X86::VPDPBUSDZm, 0 },
- { X86::VPDPBUSDrr, X86::VPDPBUSDrm, 0 },
- { X86::VPDPBUUDSYrr, X86::VPDPBUUDSYrm, 0 },
- { X86::VPDPBUUDSrr, X86::VPDPBUUDSrm, 0 },
- { X86::VPDPBUUDYrr, X86::VPDPBUUDYrm, 0 },
- { X86::VPDPBUUDrr, X86::VPDPBUUDrm, 0 },
- { X86::VPDPWSSDSYrr, X86::VPDPWSSDSYrm, 0 },
- { X86::VPDPWSSDSZ128r, X86::VPDPWSSDSZ128m, 0 },
- { X86::VPDPWSSDSZ256r, X86::VPDPWSSDSZ256m, 0 },
- { X86::VPDPWSSDSZr, X86::VPDPWSSDSZm, 0 },
- { X86::VPDPWSSDSrr, X86::VPDPWSSDSrm, 0 },
- { X86::VPDPWSSDYrr, X86::VPDPWSSDYrm, 0 },
- { X86::VPDPWSSDZ128r, X86::VPDPWSSDZ128m, 0 },
- { X86::VPDPWSSDZ256r, X86::VPDPWSSDZ256m, 0 },
- { X86::VPDPWSSDZr, X86::VPDPWSSDZm, 0 },
- { X86::VPDPWSSDrr, X86::VPDPWSSDrm, 0 },
- { X86::VPERMBZ128rrkz, X86::VPERMBZ128rmkz, 0 },
- { X86::VPERMBZ256rrkz, X86::VPERMBZ256rmkz, 0 },
- { X86::VPERMBZrrkz, X86::VPERMBZrmkz, 0 },
- { X86::VPERMDZ256rrkz, X86::VPERMDZ256rmkz, 0 },
- { X86::VPERMDZrrkz, X86::VPERMDZrmkz, 0 },
- { X86::VPERMI2B128rr, X86::VPERMI2B128rm, 0 },
- { X86::VPERMI2B256rr, X86::VPERMI2B256rm, 0 },
- { X86::VPERMI2Brr, X86::VPERMI2Brm, 0 },
- { X86::VPERMI2D128rr, X86::VPERMI2D128rm, 0 },
- { X86::VPERMI2D256rr, X86::VPERMI2D256rm, 0 },
- { X86::VPERMI2Drr, X86::VPERMI2Drm, 0 },
- { X86::VPERMI2PD128rr, X86::VPERMI2PD128rm, 0 },
- { X86::VPERMI2PD256rr, X86::VPERMI2PD256rm, 0 },
- { X86::VPERMI2PDrr, X86::VPERMI2PDrm, 0 },
- { X86::VPERMI2PS128rr, X86::VPERMI2PS128rm, 0 },
- { X86::VPERMI2PS256rr, X86::VPERMI2PS256rm, 0 },
- { X86::VPERMI2PSrr, X86::VPERMI2PSrm, 0 },
- { X86::VPERMI2Q128rr, X86::VPERMI2Q128rm, 0 },
- { X86::VPERMI2Q256rr, X86::VPERMI2Q256rm, 0 },
- { X86::VPERMI2Qrr, X86::VPERMI2Qrm, 0 },
- { X86::VPERMI2W128rr, X86::VPERMI2W128rm, 0 },
- { X86::VPERMI2W256rr, X86::VPERMI2W256rm, 0 },
- { X86::VPERMI2Wrr, X86::VPERMI2Wrm, 0 },
- { X86::VPERMIL2PDYrr, X86::VPERMIL2PDYrm, 0 },
- { X86::VPERMIL2PDrr, X86::VPERMIL2PDrm, 0 },
- { X86::VPERMIL2PSYrr, X86::VPERMIL2PSYrm, 0 },
- { X86::VPERMIL2PSrr, X86::VPERMIL2PSrm, 0 },
- { X86::VPERMILPDZ128rik, X86::VPERMILPDZ128mik, 0 },
- { X86::VPERMILPDZ128rrkz, X86::VPERMILPDZ128rmkz, 0 },
- { X86::VPERMILPDZ256rik, X86::VPERMILPDZ256mik, 0 },
- { X86::VPERMILPDZ256rrkz, X86::VPERMILPDZ256rmkz, 0 },
- { X86::VPERMILPDZrik, X86::VPERMILPDZmik, 0 },
- { X86::VPERMILPDZrrkz, X86::VPERMILPDZrmkz, 0 },
- { X86::VPERMILPSZ128rik, X86::VPERMILPSZ128mik, 0 },
- { X86::VPERMILPSZ128rrkz, X86::VPERMILPSZ128rmkz, 0 },
- { X86::VPERMILPSZ256rik, X86::VPERMILPSZ256mik, 0 },
- { X86::VPERMILPSZ256rrkz, X86::VPERMILPSZ256rmkz, 0 },
- { X86::VPERMILPSZrik, X86::VPERMILPSZmik, 0 },
- { X86::VPERMILPSZrrkz, X86::VPERMILPSZrmkz, 0 },
- { X86::VPERMPDZ256rik, X86::VPERMPDZ256mik, 0 },
- { X86::VPERMPDZ256rrkz, X86::VPERMPDZ256rmkz, 0 },
- { X86::VPERMPDZrik, X86::VPERMPDZmik, 0 },
- { X86::VPERMPDZrrkz, X86::VPERMPDZrmkz, 0 },
- { X86::VPERMPSZ256rrkz, X86::VPERMPSZ256rmkz, 0 },
- { X86::VPERMPSZrrkz, X86::VPERMPSZrmkz, 0 },
- { X86::VPERMQZ256rik, X86::VPERMQZ256mik, 0 },
- { X86::VPERMQZ256rrkz, X86::VPERMQZ256rmkz, 0 },
- { X86::VPERMQZrik, X86::VPERMQZmik, 0 },
- { X86::VPERMQZrrkz, X86::VPERMQZrmkz, 0 },
- { X86::VPERMT2B128rr, X86::VPERMT2B128rm, 0 },
- { X86::VPERMT2B256rr, X86::VPERMT2B256rm, 0 },
- { X86::VPERMT2Brr, X86::VPERMT2Brm, 0 },
- { X86::VPERMT2D128rr, X86::VPERMT2D128rm, 0 },
- { X86::VPERMT2D256rr, X86::VPERMT2D256rm, 0 },
- { X86::VPERMT2Drr, X86::VPERMT2Drm, 0 },
- { X86::VPERMT2PD128rr, X86::VPERMT2PD128rm, 0 },
- { X86::VPERMT2PD256rr, X86::VPERMT2PD256rm, 0 },
- { X86::VPERMT2PDrr, X86::VPERMT2PDrm, 0 },
- { X86::VPERMT2PS128rr, X86::VPERMT2PS128rm, 0 },
- { X86::VPERMT2PS256rr, X86::VPERMT2PS256rm, 0 },
- { X86::VPERMT2PSrr, X86::VPERMT2PSrm, 0 },
- { X86::VPERMT2Q128rr, X86::VPERMT2Q128rm, 0 },
- { X86::VPERMT2Q256rr, X86::VPERMT2Q256rm, 0 },
- { X86::VPERMT2Qrr, X86::VPERMT2Qrm, 0 },
- { X86::VPERMT2W128rr, X86::VPERMT2W128rm, 0 },
- { X86::VPERMT2W256rr, X86::VPERMT2W256rm, 0 },
- { X86::VPERMT2Wrr, X86::VPERMT2Wrm, 0 },
- { X86::VPERMWZ128rrkz, X86::VPERMWZ128rmkz, 0 },
- { X86::VPERMWZ256rrkz, X86::VPERMWZ256rmkz, 0 },
- { X86::VPERMWZrrkz, X86::VPERMWZrmkz, 0 },
- { X86::VPEXPANDBZ128rrk, X86::VPEXPANDBZ128rmk, TB_NO_REVERSE },
- { X86::VPEXPANDBZ256rrk, X86::VPEXPANDBZ256rmk, TB_NO_REVERSE },
- { X86::VPEXPANDBZrrk, X86::VPEXPANDBZrmk, TB_NO_REVERSE },
- { X86::VPEXPANDDZ128rrk, X86::VPEXPANDDZ128rmk, TB_NO_REVERSE },
- { X86::VPEXPANDDZ256rrk, X86::VPEXPANDDZ256rmk, TB_NO_REVERSE },
- { X86::VPEXPANDDZrrk, X86::VPEXPANDDZrmk, TB_NO_REVERSE },
- { X86::VPEXPANDQZ128rrk, X86::VPEXPANDQZ128rmk, TB_NO_REVERSE },
- { X86::VPEXPANDQZ256rrk, X86::VPEXPANDQZ256rmk, TB_NO_REVERSE },
- { X86::VPEXPANDQZrrk, X86::VPEXPANDQZrmk, TB_NO_REVERSE },
- { X86::VPEXPANDWZ128rrk, X86::VPEXPANDWZ128rmk, TB_NO_REVERSE },
- { X86::VPEXPANDWZ256rrk, X86::VPEXPANDWZ256rmk, TB_NO_REVERSE },
- { X86::VPEXPANDWZrrk, X86::VPEXPANDWZrmk, TB_NO_REVERSE },
- { X86::VPLZCNTDZ128rrk, X86::VPLZCNTDZ128rmk, 0 },
- { X86::VPLZCNTDZ256rrk, X86::VPLZCNTDZ256rmk, 0 },
- { X86::VPLZCNTDZrrk, X86::VPLZCNTDZrmk, 0 },
- { X86::VPLZCNTQZ128rrk, X86::VPLZCNTQZ128rmk, 0 },
- { X86::VPLZCNTQZ256rrk, X86::VPLZCNTQZ256rmk, 0 },
- { X86::VPLZCNTQZrrk, X86::VPLZCNTQZrmk, 0 },
- { X86::VPMADD52HUQYrr, X86::VPMADD52HUQYrm, 0 },
- { X86::VPMADD52HUQZ128r, X86::VPMADD52HUQZ128m, 0 },
- { X86::VPMADD52HUQZ256r, X86::VPMADD52HUQZ256m, 0 },
- { X86::VPMADD52HUQZr, X86::VPMADD52HUQZm, 0 },
- { X86::VPMADD52HUQrr, X86::VPMADD52HUQrm, 0 },
- { X86::VPMADD52LUQYrr, X86::VPMADD52LUQYrm, 0 },
- { X86::VPMADD52LUQZ128r, X86::VPMADD52LUQZ128m, 0 },
- { X86::VPMADD52LUQZ256r, X86::VPMADD52LUQZ256m, 0 },
- { X86::VPMADD52LUQZr, X86::VPMADD52LUQZm, 0 },
- { X86::VPMADD52LUQrr, X86::VPMADD52LUQrm, 0 },
- { X86::VPMADDUBSWZ128rrkz, X86::VPMADDUBSWZ128rmkz, 0 },
- { X86::VPMADDUBSWZ256rrkz, X86::VPMADDUBSWZ256rmkz, 0 },
- { X86::VPMADDUBSWZrrkz, X86::VPMADDUBSWZrmkz, 0 },
- { X86::VPMADDWDZ128rrkz, X86::VPMADDWDZ128rmkz, 0 },
- { X86::VPMADDWDZ256rrkz, X86::VPMADDWDZ256rmkz, 0 },
- { X86::VPMADDWDZrrkz, X86::VPMADDWDZrmkz, 0 },
- { X86::VPMAXSBZ128rrkz, X86::VPMAXSBZ128rmkz, 0 },
- { X86::VPMAXSBZ256rrkz, X86::VPMAXSBZ256rmkz, 0 },
- { X86::VPMAXSBZrrkz, X86::VPMAXSBZrmkz, 0 },
- { X86::VPMAXSDZ128rrkz, X86::VPMAXSDZ128rmkz, 0 },
- { X86::VPMAXSDZ256rrkz, X86::VPMAXSDZ256rmkz, 0 },
- { X86::VPMAXSDZrrkz, X86::VPMAXSDZrmkz, 0 },
- { X86::VPMAXSQZ128rrkz, X86::VPMAXSQZ128rmkz, 0 },
- { X86::VPMAXSQZ256rrkz, X86::VPMAXSQZ256rmkz, 0 },
- { X86::VPMAXSQZrrkz, X86::VPMAXSQZrmkz, 0 },
- { X86::VPMAXSWZ128rrkz, X86::VPMAXSWZ128rmkz, 0 },
- { X86::VPMAXSWZ256rrkz, X86::VPMAXSWZ256rmkz, 0 },
- { X86::VPMAXSWZrrkz, X86::VPMAXSWZrmkz, 0 },
- { X86::VPMAXUBZ128rrkz, X86::VPMAXUBZ128rmkz, 0 },
- { X86::VPMAXUBZ256rrkz, X86::VPMAXUBZ256rmkz, 0 },
- { X86::VPMAXUBZrrkz, X86::VPMAXUBZrmkz, 0 },
- { X86::VPMAXUDZ128rrkz, X86::VPMAXUDZ128rmkz, 0 },
- { X86::VPMAXUDZ256rrkz, X86::VPMAXUDZ256rmkz, 0 },
- { X86::VPMAXUDZrrkz, X86::VPMAXUDZrmkz, 0 },
- { X86::VPMAXUQZ128rrkz, X86::VPMAXUQZ128rmkz, 0 },
- { X86::VPMAXUQZ256rrkz, X86::VPMAXUQZ256rmkz, 0 },
- { X86::VPMAXUQZrrkz, X86::VPMAXUQZrmkz, 0 },
- { X86::VPMAXUWZ128rrkz, X86::VPMAXUWZ128rmkz, 0 },
- { X86::VPMAXUWZ256rrkz, X86::VPMAXUWZ256rmkz, 0 },
- { X86::VPMAXUWZrrkz, X86::VPMAXUWZrmkz, 0 },
- { X86::VPMINSBZ128rrkz, X86::VPMINSBZ128rmkz, 0 },
- { X86::VPMINSBZ256rrkz, X86::VPMINSBZ256rmkz, 0 },
- { X86::VPMINSBZrrkz, X86::VPMINSBZrmkz, 0 },
- { X86::VPMINSDZ128rrkz, X86::VPMINSDZ128rmkz, 0 },
- { X86::VPMINSDZ256rrkz, X86::VPMINSDZ256rmkz, 0 },
- { X86::VPMINSDZrrkz, X86::VPMINSDZrmkz, 0 },
- { X86::VPMINSQZ128rrkz, X86::VPMINSQZ128rmkz, 0 },
- { X86::VPMINSQZ256rrkz, X86::VPMINSQZ256rmkz, 0 },
- { X86::VPMINSQZrrkz, X86::VPMINSQZrmkz, 0 },
- { X86::VPMINSWZ128rrkz, X86::VPMINSWZ128rmkz, 0 },
- { X86::VPMINSWZ256rrkz, X86::VPMINSWZ256rmkz, 0 },
- { X86::VPMINSWZrrkz, X86::VPMINSWZrmkz, 0 },
- { X86::VPMINUBZ128rrkz, X86::VPMINUBZ128rmkz, 0 },
- { X86::VPMINUBZ256rrkz, X86::VPMINUBZ256rmkz, 0 },
- { X86::VPMINUBZrrkz, X86::VPMINUBZrmkz, 0 },
- { X86::VPMINUDZ128rrkz, X86::VPMINUDZ128rmkz, 0 },
- { X86::VPMINUDZ256rrkz, X86::VPMINUDZ256rmkz, 0 },
- { X86::VPMINUDZrrkz, X86::VPMINUDZrmkz, 0 },
- { X86::VPMINUQZ128rrkz, X86::VPMINUQZ128rmkz, 0 },
- { X86::VPMINUQZ256rrkz, X86::VPMINUQZ256rmkz, 0 },
- { X86::VPMINUQZrrkz, X86::VPMINUQZrmkz, 0 },
- { X86::VPMINUWZ128rrkz, X86::VPMINUWZ128rmkz, 0 },
- { X86::VPMINUWZ256rrkz, X86::VPMINUWZ256rmkz, 0 },
- { X86::VPMINUWZrrkz, X86::VPMINUWZrmkz, 0 },
- { X86::VPMOVSXBDZ128rrk, X86::VPMOVSXBDZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVSXBDZ256rrk, X86::VPMOVSXBDZ256rmk, TB_NO_REVERSE },
- { X86::VPMOVSXBDZrrk, X86::VPMOVSXBDZrmk, 0 },
- { X86::VPMOVSXBQZ128rrk, X86::VPMOVSXBQZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVSXBQZ256rrk, X86::VPMOVSXBQZ256rmk, TB_NO_REVERSE },
- { X86::VPMOVSXBQZrrk, X86::VPMOVSXBQZrmk, TB_NO_REVERSE },
- { X86::VPMOVSXBWZ128rrk, X86::VPMOVSXBWZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVSXBWZ256rrk, X86::VPMOVSXBWZ256rmk, 0 },
- { X86::VPMOVSXBWZrrk, X86::VPMOVSXBWZrmk, 0 },
- { X86::VPMOVSXDQZ128rrk, X86::VPMOVSXDQZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVSXDQZ256rrk, X86::VPMOVSXDQZ256rmk, 0 },
- { X86::VPMOVSXDQZrrk, X86::VPMOVSXDQZrmk, 0 },
- { X86::VPMOVSXWDZ128rrk, X86::VPMOVSXWDZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVSXWDZ256rrk, X86::VPMOVSXWDZ256rmk, 0 },
- { X86::VPMOVSXWDZrrk, X86::VPMOVSXWDZrmk, 0 },
- { X86::VPMOVSXWQZ128rrk, X86::VPMOVSXWQZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVSXWQZ256rrk, X86::VPMOVSXWQZ256rmk, TB_NO_REVERSE },
- { X86::VPMOVSXWQZrrk, X86::VPMOVSXWQZrmk, 0 },
- { X86::VPMOVZXBDZ128rrk, X86::VPMOVZXBDZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVZXBDZ256rrk, X86::VPMOVZXBDZ256rmk, TB_NO_REVERSE },
- { X86::VPMOVZXBDZrrk, X86::VPMOVZXBDZrmk, 0 },
- { X86::VPMOVZXBQZ128rrk, X86::VPMOVZXBQZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVZXBQZ256rrk, X86::VPMOVZXBQZ256rmk, TB_NO_REVERSE },
- { X86::VPMOVZXBQZrrk, X86::VPMOVZXBQZrmk, TB_NO_REVERSE },
- { X86::VPMOVZXBWZ128rrk, X86::VPMOVZXBWZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVZXBWZ256rrk, X86::VPMOVZXBWZ256rmk, 0 },
- { X86::VPMOVZXBWZrrk, X86::VPMOVZXBWZrmk, 0 },
- { X86::VPMOVZXDQZ128rrk, X86::VPMOVZXDQZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVZXDQZ256rrk, X86::VPMOVZXDQZ256rmk, 0 },
- { X86::VPMOVZXDQZrrk, X86::VPMOVZXDQZrmk, 0 },
- { X86::VPMOVZXWDZ128rrk, X86::VPMOVZXWDZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVZXWDZ256rrk, X86::VPMOVZXWDZ256rmk, 0 },
- { X86::VPMOVZXWDZrrk, X86::VPMOVZXWDZrmk, 0 },
- { X86::VPMOVZXWQZ128rrk, X86::VPMOVZXWQZ128rmk, TB_NO_REVERSE },
- { X86::VPMOVZXWQZ256rrk, X86::VPMOVZXWQZ256rmk, TB_NO_REVERSE },
- { X86::VPMOVZXWQZrrk, X86::VPMOVZXWQZrmk, 0 },
- { X86::VPMULDQZ128rrkz, X86::VPMULDQZ128rmkz, 0 },
- { X86::VPMULDQZ256rrkz, X86::VPMULDQZ256rmkz, 0 },
- { X86::VPMULDQZrrkz, X86::VPMULDQZrmkz, 0 },
- { X86::VPMULHRSWZ128rrkz, X86::VPMULHRSWZ128rmkz, 0 },
- { X86::VPMULHRSWZ256rrkz, X86::VPMULHRSWZ256rmkz, 0 },
- { X86::VPMULHRSWZrrkz, X86::VPMULHRSWZrmkz, 0 },
- { X86::VPMULHUWZ128rrkz, X86::VPMULHUWZ128rmkz, 0 },
- { X86::VPMULHUWZ256rrkz, X86::VPMULHUWZ256rmkz, 0 },
- { X86::VPMULHUWZrrkz, X86::VPMULHUWZrmkz, 0 },
- { X86::VPMULHWZ128rrkz, X86::VPMULHWZ128rmkz, 0 },
- { X86::VPMULHWZ256rrkz, X86::VPMULHWZ256rmkz, 0 },
- { X86::VPMULHWZrrkz, X86::VPMULHWZrmkz, 0 },
- { X86::VPMULLDZ128rrkz, X86::VPMULLDZ128rmkz, 0 },
- { X86::VPMULLDZ256rrkz, X86::VPMULLDZ256rmkz, 0 },
- { X86::VPMULLDZrrkz, X86::VPMULLDZrmkz, 0 },
- { X86::VPMULLQZ128rrkz, X86::VPMULLQZ128rmkz, 0 },
- { X86::VPMULLQZ256rrkz, X86::VPMULLQZ256rmkz, 0 },
- { X86::VPMULLQZrrkz, X86::VPMULLQZrmkz, 0 },
- { X86::VPMULLWZ128rrkz, X86::VPMULLWZ128rmkz, 0 },
- { X86::VPMULLWZ256rrkz, X86::VPMULLWZ256rmkz, 0 },
- { X86::VPMULLWZrrkz, X86::VPMULLWZrmkz, 0 },
- { X86::VPMULTISHIFTQBZ128rrkz, X86::VPMULTISHIFTQBZ128rmkz, 0 },
- { X86::VPMULTISHIFTQBZ256rrkz, X86::VPMULTISHIFTQBZ256rmkz, 0 },
- { X86::VPMULTISHIFTQBZrrkz, X86::VPMULTISHIFTQBZrmkz, 0 },
- { X86::VPMULUDQZ128rrkz, X86::VPMULUDQZ128rmkz, 0 },
- { X86::VPMULUDQZ256rrkz, X86::VPMULUDQZ256rmkz, 0 },
- { X86::VPMULUDQZrrkz, X86::VPMULUDQZrmkz, 0 },
- { X86::VPOPCNTBZ128rrk, X86::VPOPCNTBZ128rmk, 0 },
- { X86::VPOPCNTBZ256rrk, X86::VPOPCNTBZ256rmk, 0 },
- { X86::VPOPCNTBZrrk, X86::VPOPCNTBZrmk, 0 },
- { X86::VPOPCNTDZ128rrk, X86::VPOPCNTDZ128rmk, 0 },
- { X86::VPOPCNTDZ256rrk, X86::VPOPCNTDZ256rmk, 0 },
- { X86::VPOPCNTDZrrk, X86::VPOPCNTDZrmk, 0 },
- { X86::VPOPCNTQZ128rrk, X86::VPOPCNTQZ128rmk, 0 },
- { X86::VPOPCNTQZ256rrk, X86::VPOPCNTQZ256rmk, 0 },
- { X86::VPOPCNTQZrrk, X86::VPOPCNTQZrmk, 0 },
- { X86::VPOPCNTWZ128rrk, X86::VPOPCNTWZ128rmk, 0 },
- { X86::VPOPCNTWZ256rrk, X86::VPOPCNTWZ256rmk, 0 },
- { X86::VPOPCNTWZrrk, X86::VPOPCNTWZrmk, 0 },
- { X86::VPORDZ128rrkz, X86::VPORDZ128rmkz, 0 },
- { X86::VPORDZ256rrkz, X86::VPORDZ256rmkz, 0 },
- { X86::VPORDZrrkz, X86::VPORDZrmkz, 0 },
- { X86::VPORQZ128rrkz, X86::VPORQZ128rmkz, 0 },
- { X86::VPORQZ256rrkz, X86::VPORQZ256rmkz, 0 },
- { X86::VPORQZrrkz, X86::VPORQZrmkz, 0 },
- { X86::VPPERMrrr, X86::VPPERMrrm, 0 },
- { X86::VPROLDZ128rik, X86::VPROLDZ128mik, 0 },
- { X86::VPROLDZ256rik, X86::VPROLDZ256mik, 0 },
- { X86::VPROLDZrik, X86::VPROLDZmik, 0 },
- { X86::VPROLQZ128rik, X86::VPROLQZ128mik, 0 },
- { X86::VPROLQZ256rik, X86::VPROLQZ256mik, 0 },
- { X86::VPROLQZrik, X86::VPROLQZmik, 0 },
- { X86::VPROLVDZ128rrkz, X86::VPROLVDZ128rmkz, 0 },
- { X86::VPROLVDZ256rrkz, X86::VPROLVDZ256rmkz, 0 },
- { X86::VPROLVDZrrkz, X86::VPROLVDZrmkz, 0 },
- { X86::VPROLVQZ128rrkz, X86::VPROLVQZ128rmkz, 0 },
- { X86::VPROLVQZ256rrkz, X86::VPROLVQZ256rmkz, 0 },
- { X86::VPROLVQZrrkz, X86::VPROLVQZrmkz, 0 },
- { X86::VPRORDZ128rik, X86::VPRORDZ128mik, 0 },
- { X86::VPRORDZ256rik, X86::VPRORDZ256mik, 0 },
- { X86::VPRORDZrik, X86::VPRORDZmik, 0 },
- { X86::VPRORQZ128rik, X86::VPRORQZ128mik, 0 },
- { X86::VPRORQZ256rik, X86::VPRORQZ256mik, 0 },
- { X86::VPRORQZrik, X86::VPRORQZmik, 0 },
- { X86::VPRORVDZ128rrkz, X86::VPRORVDZ128rmkz, 0 },
- { X86::VPRORVDZ256rrkz, X86::VPRORVDZ256rmkz, 0 },
- { X86::VPRORVDZrrkz, X86::VPRORVDZrmkz, 0 },
- { X86::VPRORVQZ128rrkz, X86::VPRORVQZ128rmkz, 0 },
- { X86::VPRORVQZ256rrkz, X86::VPRORVQZ256rmkz, 0 },
- { X86::VPRORVQZrrkz, X86::VPRORVQZrmkz, 0 },
- { X86::VPSHLDDZ128rrikz, X86::VPSHLDDZ128rmikz, 0 },
- { X86::VPSHLDDZ256rrikz, X86::VPSHLDDZ256rmikz, 0 },
- { X86::VPSHLDDZrrikz, X86::VPSHLDDZrmikz, 0 },
- { X86::VPSHLDQZ128rrikz, X86::VPSHLDQZ128rmikz, 0 },
- { X86::VPSHLDQZ256rrikz, X86::VPSHLDQZ256rmikz, 0 },
- { X86::VPSHLDQZrrikz, X86::VPSHLDQZrmikz, 0 },
- { X86::VPSHLDVDZ128r, X86::VPSHLDVDZ128m, 0 },
- { X86::VPSHLDVDZ256r, X86::VPSHLDVDZ256m, 0 },
- { X86::VPSHLDVDZr, X86::VPSHLDVDZm, 0 },
- { X86::VPSHLDVQZ128r, X86::VPSHLDVQZ128m, 0 },
- { X86::VPSHLDVQZ256r, X86::VPSHLDVQZ256m, 0 },
- { X86::VPSHLDVQZr, X86::VPSHLDVQZm, 0 },
- { X86::VPSHLDVWZ128r, X86::VPSHLDVWZ128m, 0 },
- { X86::VPSHLDVWZ256r, X86::VPSHLDVWZ256m, 0 },
- { X86::VPSHLDVWZr, X86::VPSHLDVWZm, 0 },
- { X86::VPSHLDWZ128rrikz, X86::VPSHLDWZ128rmikz, 0 },
- { X86::VPSHLDWZ256rrikz, X86::VPSHLDWZ256rmikz, 0 },
- { X86::VPSHLDWZrrikz, X86::VPSHLDWZrmikz, 0 },
- { X86::VPSHRDDZ128rrikz, X86::VPSHRDDZ128rmikz, 0 },
- { X86::VPSHRDDZ256rrikz, X86::VPSHRDDZ256rmikz, 0 },
- { X86::VPSHRDDZrrikz, X86::VPSHRDDZrmikz, 0 },
- { X86::VPSHRDQZ128rrikz, X86::VPSHRDQZ128rmikz, 0 },
- { X86::VPSHRDQZ256rrikz, X86::VPSHRDQZ256rmikz, 0 },
- { X86::VPSHRDQZrrikz, X86::VPSHRDQZrmikz, 0 },
- { X86::VPSHRDVDZ128r, X86::VPSHRDVDZ128m, 0 },
- { X86::VPSHRDVDZ256r, X86::VPSHRDVDZ256m, 0 },
- { X86::VPSHRDVDZr, X86::VPSHRDVDZm, 0 },
- { X86::VPSHRDVQZ128r, X86::VPSHRDVQZ128m, 0 },
- { X86::VPSHRDVQZ256r, X86::VPSHRDVQZ256m, 0 },
- { X86::VPSHRDVQZr, X86::VPSHRDVQZm, 0 },
- { X86::VPSHRDVWZ128r, X86::VPSHRDVWZ128m, 0 },
- { X86::VPSHRDVWZ256r, X86::VPSHRDVWZ256m, 0 },
- { X86::VPSHRDVWZr, X86::VPSHRDVWZm, 0 },
- { X86::VPSHRDWZ128rrikz, X86::VPSHRDWZ128rmikz, 0 },
- { X86::VPSHRDWZ256rrikz, X86::VPSHRDWZ256rmikz, 0 },
- { X86::VPSHRDWZrrikz, X86::VPSHRDWZrmikz, 0 },
- { X86::VPSHUFBITQMBZ128rrk, X86::VPSHUFBITQMBZ128rmk, 0 },
- { X86::VPSHUFBITQMBZ256rrk, X86::VPSHUFBITQMBZ256rmk, 0 },
- { X86::VPSHUFBITQMBZrrk, X86::VPSHUFBITQMBZrmk, 0 },
- { X86::VPSHUFBZ128rrkz, X86::VPSHUFBZ128rmkz, 0 },
- { X86::VPSHUFBZ256rrkz, X86::VPSHUFBZ256rmkz, 0 },
- { X86::VPSHUFBZrrkz, X86::VPSHUFBZrmkz, 0 },
- { X86::VPSHUFDZ128rik, X86::VPSHUFDZ128mik, 0 },
- { X86::VPSHUFDZ256rik, X86::VPSHUFDZ256mik, 0 },
- { X86::VPSHUFDZrik, X86::VPSHUFDZmik, 0 },
- { X86::VPSHUFHWZ128rik, X86::VPSHUFHWZ128mik, 0 },
- { X86::VPSHUFHWZ256rik, X86::VPSHUFHWZ256mik, 0 },
- { X86::VPSHUFHWZrik, X86::VPSHUFHWZmik, 0 },
- { X86::VPSHUFLWZ128rik, X86::VPSHUFLWZ128mik, 0 },
- { X86::VPSHUFLWZ256rik, X86::VPSHUFLWZ256mik, 0 },
- { X86::VPSHUFLWZrik, X86::VPSHUFLWZmik, 0 },
- { X86::VPSLLDZ128rik, X86::VPSLLDZ128mik, 0 },
- { X86::VPSLLDZ128rrkz, X86::VPSLLDZ128rmkz, 0 },
- { X86::VPSLLDZ256rik, X86::VPSLLDZ256mik, 0 },
- { X86::VPSLLDZ256rrkz, X86::VPSLLDZ256rmkz, 0 },
- { X86::VPSLLDZrik, X86::VPSLLDZmik, 0 },
- { X86::VPSLLDZrrkz, X86::VPSLLDZrmkz, 0 },
- { X86::VPSLLQZ128rik, X86::VPSLLQZ128mik, 0 },
- { X86::VPSLLQZ128rrkz, X86::VPSLLQZ128rmkz, 0 },
- { X86::VPSLLQZ256rik, X86::VPSLLQZ256mik, 0 },
- { X86::VPSLLQZ256rrkz, X86::VPSLLQZ256rmkz, 0 },
- { X86::VPSLLQZrik, X86::VPSLLQZmik, 0 },
- { X86::VPSLLQZrrkz, X86::VPSLLQZrmkz, 0 },
- { X86::VPSLLVDZ128rrkz, X86::VPSLLVDZ128rmkz, 0 },
- { X86::VPSLLVDZ256rrkz, X86::VPSLLVDZ256rmkz, 0 },
- { X86::VPSLLVDZrrkz, X86::VPSLLVDZrmkz, 0 },
- { X86::VPSLLVQZ128rrkz, X86::VPSLLVQZ128rmkz, 0 },
- { X86::VPSLLVQZ256rrkz, X86::VPSLLVQZ256rmkz, 0 },
- { X86::VPSLLVQZrrkz, X86::VPSLLVQZrmkz, 0 },
- { X86::VPSLLVWZ128rrkz, X86::VPSLLVWZ128rmkz, 0 },
- { X86::VPSLLVWZ256rrkz, X86::VPSLLVWZ256rmkz, 0 },
- { X86::VPSLLVWZrrkz, X86::VPSLLVWZrmkz, 0 },
- { X86::VPSLLWZ128rik, X86::VPSLLWZ128mik, 0 },
- { X86::VPSLLWZ128rrkz, X86::VPSLLWZ128rmkz, 0 },
- { X86::VPSLLWZ256rik, X86::VPSLLWZ256mik, 0 },
- { X86::VPSLLWZ256rrkz, X86::VPSLLWZ256rmkz, 0 },
- { X86::VPSLLWZrik, X86::VPSLLWZmik, 0 },
- { X86::VPSLLWZrrkz, X86::VPSLLWZrmkz, 0 },
- { X86::VPSRADZ128rik, X86::VPSRADZ128mik, 0 },
- { X86::VPSRADZ128rrkz, X86::VPSRADZ128rmkz, 0 },
- { X86::VPSRADZ256rik, X86::VPSRADZ256mik, 0 },
- { X86::VPSRADZ256rrkz, X86::VPSRADZ256rmkz, 0 },
- { X86::VPSRADZrik, X86::VPSRADZmik, 0 },
- { X86::VPSRADZrrkz, X86::VPSRADZrmkz, 0 },
- { X86::VPSRAQZ128rik, X86::VPSRAQZ128mik, 0 },
- { X86::VPSRAQZ128rrkz, X86::VPSRAQZ128rmkz, 0 },
- { X86::VPSRAQZ256rik, X86::VPSRAQZ256mik, 0 },
- { X86::VPSRAQZ256rrkz, X86::VPSRAQZ256rmkz, 0 },
- { X86::VPSRAQZrik, X86::VPSRAQZmik, 0 },
- { X86::VPSRAQZrrkz, X86::VPSRAQZrmkz, 0 },
- { X86::VPSRAVDZ128rrkz, X86::VPSRAVDZ128rmkz, 0 },
- { X86::VPSRAVDZ256rrkz, X86::VPSRAVDZ256rmkz, 0 },
- { X86::VPSRAVDZrrkz, X86::VPSRAVDZrmkz, 0 },
- { X86::VPSRAVQZ128rrkz, X86::VPSRAVQZ128rmkz, 0 },
- { X86::VPSRAVQZ256rrkz, X86::VPSRAVQZ256rmkz, 0 },
- { X86::VPSRAVQZrrkz, X86::VPSRAVQZrmkz, 0 },
- { X86::VPSRAVWZ128rrkz, X86::VPSRAVWZ128rmkz, 0 },
- { X86::VPSRAVWZ256rrkz, X86::VPSRAVWZ256rmkz, 0 },
- { X86::VPSRAVWZrrkz, X86::VPSRAVWZrmkz, 0 },
- { X86::VPSRAWZ128rik, X86::VPSRAWZ128mik, 0 },
- { X86::VPSRAWZ128rrkz, X86::VPSRAWZ128rmkz, 0 },
- { X86::VPSRAWZ256rik, X86::VPSRAWZ256mik, 0 },
- { X86::VPSRAWZ256rrkz, X86::VPSRAWZ256rmkz, 0 },
- { X86::VPSRAWZrik, X86::VPSRAWZmik, 0 },
- { X86::VPSRAWZrrkz, X86::VPSRAWZrmkz, 0 },
- { X86::VPSRLDZ128rik, X86::VPSRLDZ128mik, 0 },
- { X86::VPSRLDZ128rrkz, X86::VPSRLDZ128rmkz, 0 },
- { X86::VPSRLDZ256rik, X86::VPSRLDZ256mik, 0 },
- { X86::VPSRLDZ256rrkz, X86::VPSRLDZ256rmkz, 0 },
- { X86::VPSRLDZrik, X86::VPSRLDZmik, 0 },
- { X86::VPSRLDZrrkz, X86::VPSRLDZrmkz, 0 },
- { X86::VPSRLQZ128rik, X86::VPSRLQZ128mik, 0 },
- { X86::VPSRLQZ128rrkz, X86::VPSRLQZ128rmkz, 0 },
- { X86::VPSRLQZ256rik, X86::VPSRLQZ256mik, 0 },
- { X86::VPSRLQZ256rrkz, X86::VPSRLQZ256rmkz, 0 },
- { X86::VPSRLQZrik, X86::VPSRLQZmik, 0 },
- { X86::VPSRLQZrrkz, X86::VPSRLQZrmkz, 0 },
- { X86::VPSRLVDZ128rrkz, X86::VPSRLVDZ128rmkz, 0 },
- { X86::VPSRLVDZ256rrkz, X86::VPSRLVDZ256rmkz, 0 },
- { X86::VPSRLVDZrrkz, X86::VPSRLVDZrmkz, 0 },
- { X86::VPSRLVQZ128rrkz, X86::VPSRLVQZ128rmkz, 0 },
- { X86::VPSRLVQZ256rrkz, X86::VPSRLVQZ256rmkz, 0 },
- { X86::VPSRLVQZrrkz, X86::VPSRLVQZrmkz, 0 },
- { X86::VPSRLVWZ128rrkz, X86::VPSRLVWZ128rmkz, 0 },
- { X86::VPSRLVWZ256rrkz, X86::VPSRLVWZ256rmkz, 0 },
- { X86::VPSRLVWZrrkz, X86::VPSRLVWZrmkz, 0 },
- { X86::VPSRLWZ128rik, X86::VPSRLWZ128mik, 0 },
- { X86::VPSRLWZ128rrkz, X86::VPSRLWZ128rmkz, 0 },
- { X86::VPSRLWZ256rik, X86::VPSRLWZ256mik, 0 },
- { X86::VPSRLWZ256rrkz, X86::VPSRLWZ256rmkz, 0 },
- { X86::VPSRLWZrik, X86::VPSRLWZmik, 0 },
- { X86::VPSRLWZrrkz, X86::VPSRLWZrmkz, 0 },
- { X86::VPSUBBZ128rrkz, X86::VPSUBBZ128rmkz, 0 },
- { X86::VPSUBBZ256rrkz, X86::VPSUBBZ256rmkz, 0 },
- { X86::VPSUBBZrrkz, X86::VPSUBBZrmkz, 0 },
- { X86::VPSUBDZ128rrkz, X86::VPSUBDZ128rmkz, 0 },
- { X86::VPSUBDZ256rrkz, X86::VPSUBDZ256rmkz, 0 },
- { X86::VPSUBDZrrkz, X86::VPSUBDZrmkz, 0 },
- { X86::VPSUBQZ128rrkz, X86::VPSUBQZ128rmkz, 0 },
- { X86::VPSUBQZ256rrkz, X86::VPSUBQZ256rmkz, 0 },
- { X86::VPSUBQZrrkz, X86::VPSUBQZrmkz, 0 },
- { X86::VPSUBSBZ128rrkz, X86::VPSUBSBZ128rmkz, 0 },
- { X86::VPSUBSBZ256rrkz, X86::VPSUBSBZ256rmkz, 0 },
- { X86::VPSUBSBZrrkz, X86::VPSUBSBZrmkz, 0 },
- { X86::VPSUBSWZ128rrkz, X86::VPSUBSWZ128rmkz, 0 },
- { X86::VPSUBSWZ256rrkz, X86::VPSUBSWZ256rmkz, 0 },
- { X86::VPSUBSWZrrkz, X86::VPSUBSWZrmkz, 0 },
- { X86::VPSUBUSBZ128rrkz, X86::VPSUBUSBZ128rmkz, 0 },
- { X86::VPSUBUSBZ256rrkz, X86::VPSUBUSBZ256rmkz, 0 },
- { X86::VPSUBUSBZrrkz, X86::VPSUBUSBZrmkz, 0 },
- { X86::VPSUBUSWZ128rrkz, X86::VPSUBUSWZ128rmkz, 0 },
- { X86::VPSUBUSWZ256rrkz, X86::VPSUBUSWZ256rmkz, 0 },
- { X86::VPSUBUSWZrrkz, X86::VPSUBUSWZrmkz, 0 },
- { X86::VPSUBWZ128rrkz, X86::VPSUBWZ128rmkz, 0 },
- { X86::VPSUBWZ256rrkz, X86::VPSUBWZ256rmkz, 0 },
- { X86::VPSUBWZrrkz, X86::VPSUBWZrmkz, 0 },
- { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmi, 0 },
- { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmi, 0 },
- { X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmi, 0 },
- { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmi, 0 },
- { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmi, 0 },
- { X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmi, 0 },
- { X86::VPTESTMBZ128rrk, X86::VPTESTMBZ128rmk, 0 },
- { X86::VPTESTMBZ256rrk, X86::VPTESTMBZ256rmk, 0 },
- { X86::VPTESTMBZrrk, X86::VPTESTMBZrmk, 0 },
- { X86::VPTESTMDZ128rrk, X86::VPTESTMDZ128rmk, 0 },
- { X86::VPTESTMDZ256rrk, X86::VPTESTMDZ256rmk, 0 },
- { X86::VPTESTMDZrrk, X86::VPTESTMDZrmk, 0 },
- { X86::VPTESTMQZ128rrk, X86::VPTESTMQZ128rmk, 0 },
- { X86::VPTESTMQZ256rrk, X86::VPTESTMQZ256rmk, 0 },
- { X86::VPTESTMQZrrk, X86::VPTESTMQZrmk, 0 },
- { X86::VPTESTMWZ128rrk, X86::VPTESTMWZ128rmk, 0 },
- { X86::VPTESTMWZ256rrk, X86::VPTESTMWZ256rmk, 0 },
- { X86::VPTESTMWZrrk, X86::VPTESTMWZrmk, 0 },
- { X86::VPTESTNMBZ128rrk, X86::VPTESTNMBZ128rmk, 0 },
- { X86::VPTESTNMBZ256rrk, X86::VPTESTNMBZ256rmk, 0 },
- { X86::VPTESTNMBZrrk, X86::VPTESTNMBZrmk, 0 },
- { X86::VPTESTNMDZ128rrk, X86::VPTESTNMDZ128rmk, 0 },
- { X86::VPTESTNMDZ256rrk, X86::VPTESTNMDZ256rmk, 0 },
- { X86::VPTESTNMDZrrk, X86::VPTESTNMDZrmk, 0 },
- { X86::VPTESTNMQZ128rrk, X86::VPTESTNMQZ128rmk, 0 },
- { X86::VPTESTNMQZ256rrk, X86::VPTESTNMQZ256rmk, 0 },
- { X86::VPTESTNMQZrrk, X86::VPTESTNMQZrmk, 0 },
- { X86::VPTESTNMWZ128rrk, X86::VPTESTNMWZ128rmk, 0 },
- { X86::VPTESTNMWZ256rrk, X86::VPTESTNMWZ256rmk, 0 },
- { X86::VPTESTNMWZrrk, X86::VPTESTNMWZrmk, 0 },
- { X86::VPUNPCKHBWZ128rrkz, X86::VPUNPCKHBWZ128rmkz, 0 },
- { X86::VPUNPCKHBWZ256rrkz, X86::VPUNPCKHBWZ256rmkz, 0 },
- { X86::VPUNPCKHBWZrrkz, X86::VPUNPCKHBWZrmkz, 0 },
- { X86::VPUNPCKHDQZ128rrkz, X86::VPUNPCKHDQZ128rmkz, 0 },
- { X86::VPUNPCKHDQZ256rrkz, X86::VPUNPCKHDQZ256rmkz, 0 },
- { X86::VPUNPCKHDQZrrkz, X86::VPUNPCKHDQZrmkz, 0 },
- { X86::VPUNPCKHQDQZ128rrkz, X86::VPUNPCKHQDQZ128rmkz, 0 },
- { X86::VPUNPCKHQDQZ256rrkz, X86::VPUNPCKHQDQZ256rmkz, 0 },
- { X86::VPUNPCKHQDQZrrkz, X86::VPUNPCKHQDQZrmkz, 0 },
- { X86::VPUNPCKHWDZ128rrkz, X86::VPUNPCKHWDZ128rmkz, 0 },
- { X86::VPUNPCKHWDZ256rrkz, X86::VPUNPCKHWDZ256rmkz, 0 },
- { X86::VPUNPCKHWDZrrkz, X86::VPUNPCKHWDZrmkz, 0 },
- { X86::VPUNPCKLBWZ128rrkz, X86::VPUNPCKLBWZ128rmkz, 0 },
- { X86::VPUNPCKLBWZ256rrkz, X86::VPUNPCKLBWZ256rmkz, 0 },
- { X86::VPUNPCKLBWZrrkz, X86::VPUNPCKLBWZrmkz, 0 },
- { X86::VPUNPCKLDQZ128rrkz, X86::VPUNPCKLDQZ128rmkz, 0 },
- { X86::VPUNPCKLDQZ256rrkz, X86::VPUNPCKLDQZ256rmkz, 0 },
- { X86::VPUNPCKLDQZrrkz, X86::VPUNPCKLDQZrmkz, 0 },
- { X86::VPUNPCKLQDQZ128rrkz, X86::VPUNPCKLQDQZ128rmkz, 0 },
- { X86::VPUNPCKLQDQZ256rrkz, X86::VPUNPCKLQDQZ256rmkz, 0 },
- { X86::VPUNPCKLQDQZrrkz, X86::VPUNPCKLQDQZrmkz, 0 },
- { X86::VPUNPCKLWDZ128rrkz, X86::VPUNPCKLWDZ128rmkz, 0 },
- { X86::VPUNPCKLWDZ256rrkz, X86::VPUNPCKLWDZ256rmkz, 0 },
- { X86::VPUNPCKLWDZrrkz, X86::VPUNPCKLWDZrmkz, 0 },
- { X86::VPXORDZ128rrkz, X86::VPXORDZ128rmkz, 0 },
- { X86::VPXORDZ256rrkz, X86::VPXORDZ256rmkz, 0 },
- { X86::VPXORDZrrkz, X86::VPXORDZrmkz, 0 },
- { X86::VPXORQZ128rrkz, X86::VPXORQZ128rmkz, 0 },
- { X86::VPXORQZ256rrkz, X86::VPXORQZ256rmkz, 0 },
- { X86::VPXORQZrrkz, X86::VPXORQZrmkz, 0 },
- { X86::VRANGEPDZ128rrikz, X86::VRANGEPDZ128rmikz, 0 },
- { X86::VRANGEPDZ256rrikz, X86::VRANGEPDZ256rmikz, 0 },
- { X86::VRANGEPDZrrikz, X86::VRANGEPDZrmikz, 0 },
- { X86::VRANGEPSZ128rrikz, X86::VRANGEPSZ128rmikz, 0 },
- { X86::VRANGEPSZ256rrikz, X86::VRANGEPSZ256rmikz, 0 },
- { X86::VRANGEPSZrrikz, X86::VRANGEPSZrmikz, 0 },
- { X86::VRANGESDZrrikz, X86::VRANGESDZrmikz, TB_NO_REVERSE },
- { X86::VRANGESSZrrikz, X86::VRANGESSZrmikz, TB_NO_REVERSE },
- { X86::VRCP14PDZ128rk, X86::VRCP14PDZ128mk, 0 },
- { X86::VRCP14PDZ256rk, X86::VRCP14PDZ256mk, 0 },
- { X86::VRCP14PDZrk, X86::VRCP14PDZmk, 0 },
- { X86::VRCP14PSZ128rk, X86::VRCP14PSZ128mk, 0 },
- { X86::VRCP14PSZ256rk, X86::VRCP14PSZ256mk, 0 },
- { X86::VRCP14PSZrk, X86::VRCP14PSZmk, 0 },
- { X86::VRCP14SDZrrkz, X86::VRCP14SDZrmkz, TB_NO_REVERSE },
- { X86::VRCP14SSZrrkz, X86::VRCP14SSZrmkz, TB_NO_REVERSE },
- { X86::VRCP28PDZrk, X86::VRCP28PDZmk, 0 },
- { X86::VRCP28PSZrk, X86::VRCP28PSZmk, 0 },
- { X86::VRCP28SDZrkz, X86::VRCP28SDZmkz, TB_NO_REVERSE },
- { X86::VRCP28SSZrkz, X86::VRCP28SSZmkz, TB_NO_REVERSE },
- { X86::VRCPPHZ128rk, X86::VRCPPHZ128mk, 0 },
- { X86::VRCPPHZ256rk, X86::VRCPPHZ256mk, 0 },
- { X86::VRCPPHZrk, X86::VRCPPHZmk, 0 },
- { X86::VRCPSHZrrkz, X86::VRCPSHZrmkz, TB_NO_REVERSE },
- { X86::VREDUCEPDZ128rrik, X86::VREDUCEPDZ128rmik, 0 },
- { X86::VREDUCEPDZ256rrik, X86::VREDUCEPDZ256rmik, 0 },
- { X86::VREDUCEPDZrrik, X86::VREDUCEPDZrmik, 0 },
- { X86::VREDUCEPHZ128rrik, X86::VREDUCEPHZ128rmik, 0 },
- { X86::VREDUCEPHZ256rrik, X86::VREDUCEPHZ256rmik, 0 },
- { X86::VREDUCEPHZrrik, X86::VREDUCEPHZrmik, 0 },
- { X86::VREDUCEPSZ128rrik, X86::VREDUCEPSZ128rmik, 0 },
- { X86::VREDUCEPSZ256rrik, X86::VREDUCEPSZ256rmik, 0 },
- { X86::VREDUCEPSZrrik, X86::VREDUCEPSZrmik, 0 },
- { X86::VREDUCESDZrrikz, X86::VREDUCESDZrmikz, TB_NO_REVERSE },
- { X86::VREDUCESHZrrikz, X86::VREDUCESHZrmikz, TB_NO_REVERSE },
- { X86::VREDUCESSZrrikz, X86::VREDUCESSZrmikz, TB_NO_REVERSE },
- { X86::VRNDSCALEPDZ128rrik, X86::VRNDSCALEPDZ128rmik, 0 },
- { X86::VRNDSCALEPDZ256rrik, X86::VRNDSCALEPDZ256rmik, 0 },
- { X86::VRNDSCALEPDZrrik, X86::VRNDSCALEPDZrmik, 0 },
- { X86::VRNDSCALEPHZ128rrik, X86::VRNDSCALEPHZ128rmik, 0 },
- { X86::VRNDSCALEPHZ256rrik, X86::VRNDSCALEPHZ256rmik, 0 },
- { X86::VRNDSCALEPHZrrik, X86::VRNDSCALEPHZrmik, 0 },
- { X86::VRNDSCALEPSZ128rrik, X86::VRNDSCALEPSZ128rmik, 0 },
- { X86::VRNDSCALEPSZ256rrik, X86::VRNDSCALEPSZ256rmik, 0 },
- { X86::VRNDSCALEPSZrrik, X86::VRNDSCALEPSZrmik, 0 },
- { X86::VRNDSCALESDZr_Intkz, X86::VRNDSCALESDZm_Intkz, TB_NO_REVERSE },
- { X86::VRNDSCALESHZr_Intkz, X86::VRNDSCALESHZm_Intkz, TB_NO_REVERSE },
- { X86::VRNDSCALESSZr_Intkz, X86::VRNDSCALESSZm_Intkz, TB_NO_REVERSE },
- { X86::VRSQRT14PDZ128rk, X86::VRSQRT14PDZ128mk, 0 },
- { X86::VRSQRT14PDZ256rk, X86::VRSQRT14PDZ256mk, 0 },
- { X86::VRSQRT14PDZrk, X86::VRSQRT14PDZmk, 0 },
- { X86::VRSQRT14PSZ128rk, X86::VRSQRT14PSZ128mk, 0 },
- { X86::VRSQRT14PSZ256rk, X86::VRSQRT14PSZ256mk, 0 },
- { X86::VRSQRT14PSZrk, X86::VRSQRT14PSZmk, 0 },
- { X86::VRSQRT14SDZrrkz, X86::VRSQRT14SDZrmkz, TB_NO_REVERSE },
- { X86::VRSQRT14SSZrrkz, X86::VRSQRT14SSZrmkz, TB_NO_REVERSE },
- { X86::VRSQRT28PDZrk, X86::VRSQRT28PDZmk, 0 },
- { X86::VRSQRT28PSZrk, X86::VRSQRT28PSZmk, 0 },
- { X86::VRSQRT28SDZrkz, X86::VRSQRT28SDZmkz, TB_NO_REVERSE },
- { X86::VRSQRT28SSZrkz, X86::VRSQRT28SSZmkz, TB_NO_REVERSE },
- { X86::VRSQRTPHZ128rk, X86::VRSQRTPHZ128mk, 0 },
- { X86::VRSQRTPHZ256rk, X86::VRSQRTPHZ256mk, 0 },
- { X86::VRSQRTPHZrk, X86::VRSQRTPHZmk, 0 },
- { X86::VRSQRTSHZrrkz, X86::VRSQRTSHZrmkz, TB_NO_REVERSE },
- { X86::VSCALEFPDZ128rrkz, X86::VSCALEFPDZ128rmkz, 0 },
- { X86::VSCALEFPDZ256rrkz, X86::VSCALEFPDZ256rmkz, 0 },
- { X86::VSCALEFPDZrrkz, X86::VSCALEFPDZrmkz, 0 },
- { X86::VSCALEFPHZ128rrkz, X86::VSCALEFPHZ128rmkz, 0 },
- { X86::VSCALEFPHZ256rrkz, X86::VSCALEFPHZ256rmkz, 0 },
- { X86::VSCALEFPHZrrkz, X86::VSCALEFPHZrmkz, 0 },
- { X86::VSCALEFPSZ128rrkz, X86::VSCALEFPSZ128rmkz, 0 },
- { X86::VSCALEFPSZ256rrkz, X86::VSCALEFPSZ256rmkz, 0 },
- { X86::VSCALEFPSZrrkz, X86::VSCALEFPSZrmkz, 0 },
- { X86::VSCALEFSDZrrkz, X86::VSCALEFSDZrmkz, TB_NO_REVERSE },
- { X86::VSCALEFSHZrrkz, X86::VSCALEFSHZrmkz, TB_NO_REVERSE },
- { X86::VSCALEFSSZrrkz, X86::VSCALEFSSZrmkz, TB_NO_REVERSE },
- { X86::VSHUFF32X4Z256rrikz, X86::VSHUFF32X4Z256rmikz, 0 },
- { X86::VSHUFF32X4Zrrikz, X86::VSHUFF32X4Zrmikz, 0 },
- { X86::VSHUFF64X2Z256rrikz, X86::VSHUFF64X2Z256rmikz, 0 },
- { X86::VSHUFF64X2Zrrikz, X86::VSHUFF64X2Zrmikz, 0 },
- { X86::VSHUFI32X4Z256rrikz, X86::VSHUFI32X4Z256rmikz, 0 },
- { X86::VSHUFI32X4Zrrikz, X86::VSHUFI32X4Zrmikz, 0 },
- { X86::VSHUFI64X2Z256rrikz, X86::VSHUFI64X2Z256rmikz, 0 },
- { X86::VSHUFI64X2Zrrikz, X86::VSHUFI64X2Zrmikz, 0 },
- { X86::VSHUFPDZ128rrikz, X86::VSHUFPDZ128rmikz, 0 },
- { X86::VSHUFPDZ256rrikz, X86::VSHUFPDZ256rmikz, 0 },
- { X86::VSHUFPDZrrikz, X86::VSHUFPDZrmikz, 0 },
- { X86::VSHUFPSZ128rrikz, X86::VSHUFPSZ128rmikz, 0 },
- { X86::VSHUFPSZ256rrikz, X86::VSHUFPSZ256rmikz, 0 },
- { X86::VSHUFPSZrrikz, X86::VSHUFPSZrmikz, 0 },
- { X86::VSQRTPDZ128rk, X86::VSQRTPDZ128mk, 0 },
- { X86::VSQRTPDZ256rk, X86::VSQRTPDZ256mk, 0 },
- { X86::VSQRTPDZrk, X86::VSQRTPDZmk, 0 },
- { X86::VSQRTPHZ128rk, X86::VSQRTPHZ128mk, 0 },
- { X86::VSQRTPHZ256rk, X86::VSQRTPHZ256mk, 0 },
- { X86::VSQRTPHZrk, X86::VSQRTPHZmk, 0 },
- { X86::VSQRTPSZ128rk, X86::VSQRTPSZ128mk, 0 },
- { X86::VSQRTPSZ256rk, X86::VSQRTPSZ256mk, 0 },
- { X86::VSQRTPSZrk, X86::VSQRTPSZmk, 0 },
- { X86::VSQRTSDZr_Intkz, X86::VSQRTSDZm_Intkz, TB_NO_REVERSE },
- { X86::VSQRTSHZr_Intkz, X86::VSQRTSHZm_Intkz, TB_NO_REVERSE },
- { X86::VSQRTSSZr_Intkz, X86::VSQRTSSZm_Intkz, TB_NO_REVERSE },
- { X86::VSUBPDZ128rrkz, X86::VSUBPDZ128rmkz, 0 },
- { X86::VSUBPDZ256rrkz, X86::VSUBPDZ256rmkz, 0 },
- { X86::VSUBPDZrrkz, X86::VSUBPDZrmkz, 0 },
- { X86::VSUBPHZ128rrkz, X86::VSUBPHZ128rmkz, 0 },
- { X86::VSUBPHZ256rrkz, X86::VSUBPHZ256rmkz, 0 },
- { X86::VSUBPHZrrkz, X86::VSUBPHZrmkz, 0 },
- { X86::VSUBPSZ128rrkz, X86::VSUBPSZ128rmkz, 0 },
- { X86::VSUBPSZ256rrkz, X86::VSUBPSZ256rmkz, 0 },
- { X86::VSUBPSZrrkz, X86::VSUBPSZrmkz, 0 },
- { X86::VSUBSDZrr_Intkz, X86::VSUBSDZrm_Intkz, TB_NO_REVERSE },
- { X86::VSUBSHZrr_Intkz, X86::VSUBSHZrm_Intkz, TB_NO_REVERSE },
- { X86::VSUBSSZrr_Intkz, X86::VSUBSSZrm_Intkz, TB_NO_REVERSE },
- { X86::VUNPCKHPDZ128rrkz, X86::VUNPCKHPDZ128rmkz, 0 },
- { X86::VUNPCKHPDZ256rrkz, X86::VUNPCKHPDZ256rmkz, 0 },
- { X86::VUNPCKHPDZrrkz, X86::VUNPCKHPDZrmkz, 0 },
- { X86::VUNPCKHPSZ128rrkz, X86::VUNPCKHPSZ128rmkz, 0 },
- { X86::VUNPCKHPSZ256rrkz, X86::VUNPCKHPSZ256rmkz, 0 },
- { X86::VUNPCKHPSZrrkz, X86::VUNPCKHPSZrmkz, 0 },
- { X86::VUNPCKLPDZ128rrkz, X86::VUNPCKLPDZ128rmkz, 0 },
- { X86::VUNPCKLPDZ256rrkz, X86::VUNPCKLPDZ256rmkz, 0 },
- { X86::VUNPCKLPDZrrkz, X86::VUNPCKLPDZrmkz, 0 },
- { X86::VUNPCKLPSZ128rrkz, X86::VUNPCKLPSZ128rmkz, 0 },
- { X86::VUNPCKLPSZ256rrkz, X86::VUNPCKLPSZ256rmkz, 0 },
- { X86::VUNPCKLPSZrrkz, X86::VUNPCKLPSZrmkz, 0 },
- { X86::VXORPDZ128rrkz, X86::VXORPDZ128rmkz, 0 },
- { X86::VXORPDZ256rrkz, X86::VXORPDZ256rmkz, 0 },
- { X86::VXORPDZrrkz, X86::VXORPDZrmkz, 0 },
- { X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 },
- { X86::VXORPSZ256rrkz, X86::VXORPSZ256rmkz, 0 },
- { X86::VXORPSZrrkz, X86::VXORPSZrmkz, 0 },
-};
-
-static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
- { X86::VADDPDZ128rrk, X86::VADDPDZ128rmk, 0 },
- { X86::VADDPDZ256rrk, X86::VADDPDZ256rmk, 0 },
- { X86::VADDPDZrrk, X86::VADDPDZrmk, 0 },
- { X86::VADDPHZ128rrk, X86::VADDPHZ128rmk, 0 },
- { X86::VADDPHZ256rrk, X86::VADDPHZ256rmk, 0 },
- { X86::VADDPHZrrk, X86::VADDPHZrmk, 0 },
- { X86::VADDPSZ128rrk, X86::VADDPSZ128rmk, 0 },
- { X86::VADDPSZ256rrk, X86::VADDPSZ256rmk, 0 },
- { X86::VADDPSZrrk, X86::VADDPSZrmk, 0 },
- { X86::VADDSDZrr_Intk, X86::VADDSDZrm_Intk, TB_NO_REVERSE },
- { X86::VADDSHZrr_Intk, X86::VADDSHZrm_Intk, TB_NO_REVERSE },
- { X86::VADDSSZrr_Intk, X86::VADDSSZrm_Intk, TB_NO_REVERSE },
- { X86::VALIGNDZ128rrik, X86::VALIGNDZ128rmik, 0 },
- { X86::VALIGNDZ256rrik, X86::VALIGNDZ256rmik, 0 },
- { X86::VALIGNDZrrik, X86::VALIGNDZrmik, 0 },
- { X86::VALIGNQZ128rrik, X86::VALIGNQZ128rmik, 0 },
- { X86::VALIGNQZ256rrik, X86::VALIGNQZ256rmik, 0 },
- { X86::VALIGNQZrrik, X86::VALIGNQZrmik, 0 },
- { X86::VANDNPDZ128rrk, X86::VANDNPDZ128rmk, 0 },
- { X86::VANDNPDZ256rrk, X86::VANDNPDZ256rmk, 0 },
- { X86::VANDNPDZrrk, X86::VANDNPDZrmk, 0 },
- { X86::VANDNPSZ128rrk, X86::VANDNPSZ128rmk, 0 },
- { X86::VANDNPSZ256rrk, X86::VANDNPSZ256rmk, 0 },
- { X86::VANDNPSZrrk, X86::VANDNPSZrmk, 0 },
- { X86::VANDPDZ128rrk, X86::VANDPDZ128rmk, 0 },
- { X86::VANDPDZ256rrk, X86::VANDPDZ256rmk, 0 },
- { X86::VANDPDZrrk, X86::VANDPDZrmk, 0 },
- { X86::VANDPSZ128rrk, X86::VANDPSZ128rmk, 0 },
- { X86::VANDPSZ256rrk, X86::VANDPSZ256rmk, 0 },
- { X86::VANDPSZrrk, X86::VANDPSZrmk, 0 },
- { X86::VCVTNE2PS2BF16Z128rrk, X86::VCVTNE2PS2BF16Z128rmk, 0 },
- { X86::VCVTNE2PS2BF16Z256rrk, X86::VCVTNE2PS2BF16Z256rmk, 0 },
- { X86::VCVTNE2PS2BF16Zrrk, X86::VCVTNE2PS2BF16Zrmk, 0 },
- { X86::VCVTSD2SHZrr_Intk, X86::VCVTSD2SHZrm_Intk, TB_NO_REVERSE },
- { X86::VCVTSD2SSZrr_Intk, X86::VCVTSD2SSZrm_Intk, TB_NO_REVERSE },
- { X86::VCVTSH2SDZrr_Intk, X86::VCVTSH2SDZrm_Intk, TB_NO_REVERSE },
- { X86::VCVTSH2SSZrr_Intk, X86::VCVTSH2SSZrm_Intk, TB_NO_REVERSE },
- { X86::VCVTSS2SDZrr_Intk, X86::VCVTSS2SDZrm_Intk, TB_NO_REVERSE },
- { X86::VCVTSS2SHZrr_Intk, X86::VCVTSS2SHZrm_Intk, TB_NO_REVERSE },
- { X86::VDBPSADBWZ128rrik, X86::VDBPSADBWZ128rmik, 0 },
- { X86::VDBPSADBWZ256rrik, X86::VDBPSADBWZ256rmik, 0 },
- { X86::VDBPSADBWZrrik, X86::VDBPSADBWZrmik, 0 },
- { X86::VDIVPDZ128rrk, X86::VDIVPDZ128rmk, 0 },
- { X86::VDIVPDZ256rrk, X86::VDIVPDZ256rmk, 0 },
- { X86::VDIVPDZrrk, X86::VDIVPDZrmk, 0 },
- { X86::VDIVPHZ128rrk, X86::VDIVPHZ128rmk, 0 },
- { X86::VDIVPHZ256rrk, X86::VDIVPHZ256rmk, 0 },
- { X86::VDIVPHZrrk, X86::VDIVPHZrmk, 0 },
- { X86::VDIVPSZ128rrk, X86::VDIVPSZ128rmk, 0 },
- { X86::VDIVPSZ256rrk, X86::VDIVPSZ256rmk, 0 },
- { X86::VDIVPSZrrk, X86::VDIVPSZrmk, 0 },
- { X86::VDIVSDZrr_Intk, X86::VDIVSDZrm_Intk, TB_NO_REVERSE },
- { X86::VDIVSHZrr_Intk, X86::VDIVSHZrm_Intk, TB_NO_REVERSE },
- { X86::VDIVSSZrr_Intk, X86::VDIVSSZrm_Intk, TB_NO_REVERSE },
- { X86::VDPBF16PSZ128rk, X86::VDPBF16PSZ128mk, 0 },
- { X86::VDPBF16PSZ128rkz, X86::VDPBF16PSZ128mkz, 0 },
- { X86::VDPBF16PSZ256rk, X86::VDPBF16PSZ256mk, 0 },
- { X86::VDPBF16PSZ256rkz, X86::VDPBF16PSZ256mkz, 0 },
- { X86::VDPBF16PSZrk, X86::VDPBF16PSZmk, 0 },
- { X86::VDPBF16PSZrkz, X86::VDPBF16PSZmkz, 0 },
- { X86::VFCMADDCPHZ128rk, X86::VFCMADDCPHZ128mk, 0 },
- { X86::VFCMADDCPHZ128rkz, X86::VFCMADDCPHZ128mkz, 0 },
- { X86::VFCMADDCPHZ256rk, X86::VFCMADDCPHZ256mk, 0 },
- { X86::VFCMADDCPHZ256rkz, X86::VFCMADDCPHZ256mkz, 0 },
- { X86::VFCMADDCPHZrk, X86::VFCMADDCPHZmk, 0 },
- { X86::VFCMADDCPHZrkz, X86::VFCMADDCPHZmkz, 0 },
- { X86::VFCMADDCSHZrk, X86::VFCMADDCSHZmk, TB_NO_REVERSE },
- { X86::VFCMADDCSHZrkz, X86::VFCMADDCSHZmkz, TB_NO_REVERSE },
- { X86::VFCMULCPHZ128rrk, X86::VFCMULCPHZ128rmk, 0 },
- { X86::VFCMULCPHZ256rrk, X86::VFCMULCPHZ256rmk, 0 },
- { X86::VFCMULCPHZrrk, X86::VFCMULCPHZrmk, 0 },
- { X86::VFCMULCSHZrrk, X86::VFCMULCSHZrmk, TB_NO_REVERSE },
- { X86::VFIXUPIMMPDZ128rrik, X86::VFIXUPIMMPDZ128rmik, 0 },
- { X86::VFIXUPIMMPDZ128rrikz, X86::VFIXUPIMMPDZ128rmikz, 0 },
- { X86::VFIXUPIMMPDZ256rrik, X86::VFIXUPIMMPDZ256rmik, 0 },
- { X86::VFIXUPIMMPDZ256rrikz, X86::VFIXUPIMMPDZ256rmikz, 0 },
- { X86::VFIXUPIMMPDZrrik, X86::VFIXUPIMMPDZrmik, 0 },
- { X86::VFIXUPIMMPDZrrikz, X86::VFIXUPIMMPDZrmikz, 0 },
- { X86::VFIXUPIMMPSZ128rrik, X86::VFIXUPIMMPSZ128rmik, 0 },
- { X86::VFIXUPIMMPSZ128rrikz, X86::VFIXUPIMMPSZ128rmikz, 0 },
- { X86::VFIXUPIMMPSZ256rrik, X86::VFIXUPIMMPSZ256rmik, 0 },
- { X86::VFIXUPIMMPSZ256rrikz, X86::VFIXUPIMMPSZ256rmikz, 0 },
- { X86::VFIXUPIMMPSZrrik, X86::VFIXUPIMMPSZrmik, 0 },
- { X86::VFIXUPIMMPSZrrikz, X86::VFIXUPIMMPSZrmikz, 0 },
- { X86::VFIXUPIMMSDZrrik, X86::VFIXUPIMMSDZrmik, TB_NO_REVERSE },
- { X86::VFIXUPIMMSDZrrikz, X86::VFIXUPIMMSDZrmikz, TB_NO_REVERSE },
- { X86::VFIXUPIMMSSZrrik, X86::VFIXUPIMMSSZrmik, TB_NO_REVERSE },
- { X86::VFIXUPIMMSSZrrikz, X86::VFIXUPIMMSSZrmikz, TB_NO_REVERSE },
- { X86::VFMADD132PDZ128rk, X86::VFMADD132PDZ128mk, 0 },
- { X86::VFMADD132PDZ128rkz, X86::VFMADD132PDZ128mkz, 0 },
- { X86::VFMADD132PDZ256rk, X86::VFMADD132PDZ256mk, 0 },
- { X86::VFMADD132PDZ256rkz, X86::VFMADD132PDZ256mkz, 0 },
- { X86::VFMADD132PDZrk, X86::VFMADD132PDZmk, 0 },
- { X86::VFMADD132PDZrkz, X86::VFMADD132PDZmkz, 0 },
- { X86::VFMADD132PHZ128rk, X86::VFMADD132PHZ128mk, 0 },
- { X86::VFMADD132PHZ128rkz, X86::VFMADD132PHZ128mkz, 0 },
- { X86::VFMADD132PHZ256rk, X86::VFMADD132PHZ256mk, 0 },
- { X86::VFMADD132PHZ256rkz, X86::VFMADD132PHZ256mkz, 0 },
- { X86::VFMADD132PHZrk, X86::VFMADD132PHZmk, 0 },
- { X86::VFMADD132PHZrkz, X86::VFMADD132PHZmkz, 0 },
- { X86::VFMADD132PSZ128rk, X86::VFMADD132PSZ128mk, 0 },
- { X86::VFMADD132PSZ128rkz, X86::VFMADD132PSZ128mkz, 0 },
- { X86::VFMADD132PSZ256rk, X86::VFMADD132PSZ256mk, 0 },
- { X86::VFMADD132PSZ256rkz, X86::VFMADD132PSZ256mkz, 0 },
- { X86::VFMADD132PSZrk, X86::VFMADD132PSZmk, 0 },
- { X86::VFMADD132PSZrkz, X86::VFMADD132PSZmkz, 0 },
- { X86::VFMADD132SDZr_Intk, X86::VFMADD132SDZm_Intk, TB_NO_REVERSE },
- { X86::VFMADD132SDZr_Intkz, X86::VFMADD132SDZm_Intkz, TB_NO_REVERSE },
- { X86::VFMADD132SHZr_Intk, X86::VFMADD132SHZm_Intk, TB_NO_REVERSE },
- { X86::VFMADD132SHZr_Intkz, X86::VFMADD132SHZm_Intkz, TB_NO_REVERSE },
- { X86::VFMADD132SSZr_Intk, X86::VFMADD132SSZm_Intk, TB_NO_REVERSE },
- { X86::VFMADD132SSZr_Intkz, X86::VFMADD132SSZm_Intkz, TB_NO_REVERSE },
- { X86::VFMADD213PDZ128rk, X86::VFMADD213PDZ128mk, 0 },
- { X86::VFMADD213PDZ128rkz, X86::VFMADD213PDZ128mkz, 0 },
- { X86::VFMADD213PDZ256rk, X86::VFMADD213PDZ256mk, 0 },
- { X86::VFMADD213PDZ256rkz, X86::VFMADD213PDZ256mkz, 0 },
- { X86::VFMADD213PDZrk, X86::VFMADD213PDZmk, 0 },
- { X86::VFMADD213PDZrkz, X86::VFMADD213PDZmkz, 0 },
- { X86::VFMADD213PHZ128rk, X86::VFMADD213PHZ128mk, 0 },
- { X86::VFMADD213PHZ128rkz, X86::VFMADD213PHZ128mkz, 0 },
- { X86::VFMADD213PHZ256rk, X86::VFMADD213PHZ256mk, 0 },
- { X86::VFMADD213PHZ256rkz, X86::VFMADD213PHZ256mkz, 0 },
- { X86::VFMADD213PHZrk, X86::VFMADD213PHZmk, 0 },
- { X86::VFMADD213PHZrkz, X86::VFMADD213PHZmkz, 0 },
- { X86::VFMADD213PSZ128rk, X86::VFMADD213PSZ128mk, 0 },
- { X86::VFMADD213PSZ128rkz, X86::VFMADD213PSZ128mkz, 0 },
- { X86::VFMADD213PSZ256rk, X86::VFMADD213PSZ256mk, 0 },
- { X86::VFMADD213PSZ256rkz, X86::VFMADD213PSZ256mkz, 0 },
- { X86::VFMADD213PSZrk, X86::VFMADD213PSZmk, 0 },
- { X86::VFMADD213PSZrkz, X86::VFMADD213PSZmkz, 0 },
- { X86::VFMADD213SDZr_Intk, X86::VFMADD213SDZm_Intk, TB_NO_REVERSE },
- { X86::VFMADD213SDZr_Intkz, X86::VFMADD213SDZm_Intkz, TB_NO_REVERSE },
- { X86::VFMADD213SHZr_Intk, X86::VFMADD213SHZm_Intk, TB_NO_REVERSE },
- { X86::VFMADD213SHZr_Intkz, X86::VFMADD213SHZm_Intkz, TB_NO_REVERSE },
- { X86::VFMADD213SSZr_Intk, X86::VFMADD213SSZm_Intk, TB_NO_REVERSE },
- { X86::VFMADD213SSZr_Intkz, X86::VFMADD213SSZm_Intkz, TB_NO_REVERSE },
- { X86::VFMADD231PDZ128rk, X86::VFMADD231PDZ128mk, 0 },
- { X86::VFMADD231PDZ128rkz, X86::VFMADD231PDZ128mkz, 0 },
- { X86::VFMADD231PDZ256rk, X86::VFMADD231PDZ256mk, 0 },
- { X86::VFMADD231PDZ256rkz, X86::VFMADD231PDZ256mkz, 0 },
- { X86::VFMADD231PDZrk, X86::VFMADD231PDZmk, 0 },
- { X86::VFMADD231PDZrkz, X86::VFMADD231PDZmkz, 0 },
- { X86::VFMADD231PHZ128rk, X86::VFMADD231PHZ128mk, 0 },
- { X86::VFMADD231PHZ128rkz, X86::VFMADD231PHZ128mkz, 0 },
- { X86::VFMADD231PHZ256rk, X86::VFMADD231PHZ256mk, 0 },
- { X86::VFMADD231PHZ256rkz, X86::VFMADD231PHZ256mkz, 0 },
- { X86::VFMADD231PHZrk, X86::VFMADD231PHZmk, 0 },
- { X86::VFMADD231PHZrkz, X86::VFMADD231PHZmkz, 0 },
- { X86::VFMADD231PSZ128rk, X86::VFMADD231PSZ128mk, 0 },
- { X86::VFMADD231PSZ128rkz, X86::VFMADD231PSZ128mkz, 0 },
- { X86::VFMADD231PSZ256rk, X86::VFMADD231PSZ256mk, 0 },
- { X86::VFMADD231PSZ256rkz, X86::VFMADD231PSZ256mkz, 0 },
- { X86::VFMADD231PSZrk, X86::VFMADD231PSZmk, 0 },
- { X86::VFMADD231PSZrkz, X86::VFMADD231PSZmkz, 0 },
- { X86::VFMADD231SDZr_Intk, X86::VFMADD231SDZm_Intk, TB_NO_REVERSE },
- { X86::VFMADD231SDZr_Intkz, X86::VFMADD231SDZm_Intkz, TB_NO_REVERSE },
- { X86::VFMADD231SHZr_Intk, X86::VFMADD231SHZm_Intk, TB_NO_REVERSE },
- { X86::VFMADD231SHZr_Intkz, X86::VFMADD231SHZm_Intkz, TB_NO_REVERSE },
- { X86::VFMADD231SSZr_Intk, X86::VFMADD231SSZm_Intk, TB_NO_REVERSE },
- { X86::VFMADD231SSZr_Intkz, X86::VFMADD231SSZm_Intkz, TB_NO_REVERSE },
- { X86::VFMADDCPHZ128rk, X86::VFMADDCPHZ128mk, 0 },
- { X86::VFMADDCPHZ128rkz, X86::VFMADDCPHZ128mkz, 0 },
- { X86::VFMADDCPHZ256rk, X86::VFMADDCPHZ256mk, 0 },
- { X86::VFMADDCPHZ256rkz, X86::VFMADDCPHZ256mkz, 0 },
- { X86::VFMADDCPHZrk, X86::VFMADDCPHZmk, 0 },
- { X86::VFMADDCPHZrkz, X86::VFMADDCPHZmkz, 0 },
- { X86::VFMADDCSHZrk, X86::VFMADDCSHZmk, TB_NO_REVERSE },
- { X86::VFMADDCSHZrkz, X86::VFMADDCSHZmkz, TB_NO_REVERSE },
- { X86::VFMADDSUB132PDZ128rk, X86::VFMADDSUB132PDZ128mk, 0 },
- { X86::VFMADDSUB132PDZ128rkz, X86::VFMADDSUB132PDZ128mkz, 0 },
- { X86::VFMADDSUB132PDZ256rk, X86::VFMADDSUB132PDZ256mk, 0 },
- { X86::VFMADDSUB132PDZ256rkz, X86::VFMADDSUB132PDZ256mkz, 0 },
- { X86::VFMADDSUB132PDZrk, X86::VFMADDSUB132PDZmk, 0 },
- { X86::VFMADDSUB132PDZrkz, X86::VFMADDSUB132PDZmkz, 0 },
- { X86::VFMADDSUB132PHZ128rk, X86::VFMADDSUB132PHZ128mk, 0 },
- { X86::VFMADDSUB132PHZ128rkz, X86::VFMADDSUB132PHZ128mkz, 0 },
- { X86::VFMADDSUB132PHZ256rk, X86::VFMADDSUB132PHZ256mk, 0 },
- { X86::VFMADDSUB132PHZ256rkz, X86::VFMADDSUB132PHZ256mkz, 0 },
- { X86::VFMADDSUB132PHZrk, X86::VFMADDSUB132PHZmk, 0 },
- { X86::VFMADDSUB132PHZrkz, X86::VFMADDSUB132PHZmkz, 0 },
- { X86::VFMADDSUB132PSZ128rk, X86::VFMADDSUB132PSZ128mk, 0 },
- { X86::VFMADDSUB132PSZ128rkz, X86::VFMADDSUB132PSZ128mkz, 0 },
- { X86::VFMADDSUB132PSZ256rk, X86::VFMADDSUB132PSZ256mk, 0 },
- { X86::VFMADDSUB132PSZ256rkz, X86::VFMADDSUB132PSZ256mkz, 0 },
- { X86::VFMADDSUB132PSZrk, X86::VFMADDSUB132PSZmk, 0 },
- { X86::VFMADDSUB132PSZrkz, X86::VFMADDSUB132PSZmkz, 0 },
- { X86::VFMADDSUB213PDZ128rk, X86::VFMADDSUB213PDZ128mk, 0 },
- { X86::VFMADDSUB213PDZ128rkz, X86::VFMADDSUB213PDZ128mkz, 0 },
- { X86::VFMADDSUB213PDZ256rk, X86::VFMADDSUB213PDZ256mk, 0 },
- { X86::VFMADDSUB213PDZ256rkz, X86::VFMADDSUB213PDZ256mkz, 0 },
- { X86::VFMADDSUB213PDZrk, X86::VFMADDSUB213PDZmk, 0 },
- { X86::VFMADDSUB213PDZrkz, X86::VFMADDSUB213PDZmkz, 0 },
- { X86::VFMADDSUB213PHZ128rk, X86::VFMADDSUB213PHZ128mk, 0 },
- { X86::VFMADDSUB213PHZ128rkz, X86::VFMADDSUB213PHZ128mkz, 0 },
- { X86::VFMADDSUB213PHZ256rk, X86::VFMADDSUB213PHZ256mk, 0 },
- { X86::VFMADDSUB213PHZ256rkz, X86::VFMADDSUB213PHZ256mkz, 0 },
- { X86::VFMADDSUB213PHZrk, X86::VFMADDSUB213PHZmk, 0 },
- { X86::VFMADDSUB213PHZrkz, X86::VFMADDSUB213PHZmkz, 0 },
- { X86::VFMADDSUB213PSZ128rk, X86::VFMADDSUB213PSZ128mk, 0 },
- { X86::VFMADDSUB213PSZ128rkz, X86::VFMADDSUB213PSZ128mkz, 0 },
- { X86::VFMADDSUB213PSZ256rk, X86::VFMADDSUB213PSZ256mk, 0 },
- { X86::VFMADDSUB213PSZ256rkz, X86::VFMADDSUB213PSZ256mkz, 0 },
- { X86::VFMADDSUB213PSZrk, X86::VFMADDSUB213PSZmk, 0 },
- { X86::VFMADDSUB213PSZrkz, X86::VFMADDSUB213PSZmkz, 0 },
- { X86::VFMADDSUB231PDZ128rk, X86::VFMADDSUB231PDZ128mk, 0 },
- { X86::VFMADDSUB231PDZ128rkz, X86::VFMADDSUB231PDZ128mkz, 0 },
- { X86::VFMADDSUB231PDZ256rk, X86::VFMADDSUB231PDZ256mk, 0 },
- { X86::VFMADDSUB231PDZ256rkz, X86::VFMADDSUB231PDZ256mkz, 0 },
- { X86::VFMADDSUB231PDZrk, X86::VFMADDSUB231PDZmk, 0 },
- { X86::VFMADDSUB231PDZrkz, X86::VFMADDSUB231PDZmkz, 0 },
- { X86::VFMADDSUB231PHZ128rk, X86::VFMADDSUB231PHZ128mk, 0 },
- { X86::VFMADDSUB231PHZ128rkz, X86::VFMADDSUB231PHZ128mkz, 0 },
- { X86::VFMADDSUB231PHZ256rk, X86::VFMADDSUB231PHZ256mk, 0 },
- { X86::VFMADDSUB231PHZ256rkz, X86::VFMADDSUB231PHZ256mkz, 0 },
- { X86::VFMADDSUB231PHZrk, X86::VFMADDSUB231PHZmk, 0 },
- { X86::VFMADDSUB231PHZrkz, X86::VFMADDSUB231PHZmkz, 0 },
- { X86::VFMADDSUB231PSZ128rk, X86::VFMADDSUB231PSZ128mk, 0 },
- { X86::VFMADDSUB231PSZ128rkz, X86::VFMADDSUB231PSZ128mkz, 0 },
- { X86::VFMADDSUB231PSZ256rk, X86::VFMADDSUB231PSZ256mk, 0 },
- { X86::VFMADDSUB231PSZ256rkz, X86::VFMADDSUB231PSZ256mkz, 0 },
- { X86::VFMADDSUB231PSZrk, X86::VFMADDSUB231PSZmk, 0 },
- { X86::VFMADDSUB231PSZrkz, X86::VFMADDSUB231PSZmkz, 0 },
- { X86::VFMSUB132PDZ128rk, X86::VFMSUB132PDZ128mk, 0 },
- { X86::VFMSUB132PDZ128rkz, X86::VFMSUB132PDZ128mkz, 0 },
- { X86::VFMSUB132PDZ256rk, X86::VFMSUB132PDZ256mk, 0 },
- { X86::VFMSUB132PDZ256rkz, X86::VFMSUB132PDZ256mkz, 0 },
- { X86::VFMSUB132PDZrk, X86::VFMSUB132PDZmk, 0 },
- { X86::VFMSUB132PDZrkz, X86::VFMSUB132PDZmkz, 0 },
- { X86::VFMSUB132PHZ128rk, X86::VFMSUB132PHZ128mk, 0 },
- { X86::VFMSUB132PHZ128rkz, X86::VFMSUB132PHZ128mkz, 0 },
- { X86::VFMSUB132PHZ256rk, X86::VFMSUB132PHZ256mk, 0 },
- { X86::VFMSUB132PHZ256rkz, X86::VFMSUB132PHZ256mkz, 0 },
- { X86::VFMSUB132PHZrk, X86::VFMSUB132PHZmk, 0 },
- { X86::VFMSUB132PHZrkz, X86::VFMSUB132PHZmkz, 0 },
- { X86::VFMSUB132PSZ128rk, X86::VFMSUB132PSZ128mk, 0 },
- { X86::VFMSUB132PSZ128rkz, X86::VFMSUB132PSZ128mkz, 0 },
- { X86::VFMSUB132PSZ256rk, X86::VFMSUB132PSZ256mk, 0 },
- { X86::VFMSUB132PSZ256rkz, X86::VFMSUB132PSZ256mkz, 0 },
- { X86::VFMSUB132PSZrk, X86::VFMSUB132PSZmk, 0 },
- { X86::VFMSUB132PSZrkz, X86::VFMSUB132PSZmkz, 0 },
- { X86::VFMSUB132SDZr_Intk, X86::VFMSUB132SDZm_Intk, TB_NO_REVERSE },
- { X86::VFMSUB132SDZr_Intkz, X86::VFMSUB132SDZm_Intkz, TB_NO_REVERSE },
- { X86::VFMSUB132SHZr_Intk, X86::VFMSUB132SHZm_Intk, TB_NO_REVERSE },
- { X86::VFMSUB132SHZr_Intkz, X86::VFMSUB132SHZm_Intkz, TB_NO_REVERSE },
- { X86::VFMSUB132SSZr_Intk, X86::VFMSUB132SSZm_Intk, TB_NO_REVERSE },
- { X86::VFMSUB132SSZr_Intkz, X86::VFMSUB132SSZm_Intkz, TB_NO_REVERSE },
- { X86::VFMSUB213PDZ128rk, X86::VFMSUB213PDZ128mk, 0 },
- { X86::VFMSUB213PDZ128rkz, X86::VFMSUB213PDZ128mkz, 0 },
- { X86::VFMSUB213PDZ256rk, X86::VFMSUB213PDZ256mk, 0 },
- { X86::VFMSUB213PDZ256rkz, X86::VFMSUB213PDZ256mkz, 0 },
- { X86::VFMSUB213PDZrk, X86::VFMSUB213PDZmk, 0 },
- { X86::VFMSUB213PDZrkz, X86::VFMSUB213PDZmkz, 0 },
- { X86::VFMSUB213PHZ128rk, X86::VFMSUB213PHZ128mk, 0 },
- { X86::VFMSUB213PHZ128rkz, X86::VFMSUB213PHZ128mkz, 0 },
- { X86::VFMSUB213PHZ256rk, X86::VFMSUB213PHZ256mk, 0 },
- { X86::VFMSUB213PHZ256rkz, X86::VFMSUB213PHZ256mkz, 0 },
- { X86::VFMSUB213PHZrk, X86::VFMSUB213PHZmk, 0 },
- { X86::VFMSUB213PHZrkz, X86::VFMSUB213PHZmkz, 0 },
- { X86::VFMSUB213PSZ128rk, X86::VFMSUB213PSZ128mk, 0 },
- { X86::VFMSUB213PSZ128rkz, X86::VFMSUB213PSZ128mkz, 0 },
- { X86::VFMSUB213PSZ256rk, X86::VFMSUB213PSZ256mk, 0 },
- { X86::VFMSUB213PSZ256rkz, X86::VFMSUB213PSZ256mkz, 0 },
- { X86::VFMSUB213PSZrk, X86::VFMSUB213PSZmk, 0 },
- { X86::VFMSUB213PSZrkz, X86::VFMSUB213PSZmkz, 0 },
- { X86::VFMSUB213SDZr_Intk, X86::VFMSUB213SDZm_Intk, TB_NO_REVERSE },
- { X86::VFMSUB213SDZr_Intkz, X86::VFMSUB213SDZm_Intkz, TB_NO_REVERSE },
- { X86::VFMSUB213SHZr_Intk, X86::VFMSUB213SHZm_Intk, TB_NO_REVERSE },
- { X86::VFMSUB213SHZr_Intkz, X86::VFMSUB213SHZm_Intkz, TB_NO_REVERSE },
- { X86::VFMSUB213SSZr_Intk, X86::VFMSUB213SSZm_Intk, TB_NO_REVERSE },
- { X86::VFMSUB213SSZr_Intkz, X86::VFMSUB213SSZm_Intkz, TB_NO_REVERSE },
- { X86::VFMSUB231PDZ128rk, X86::VFMSUB231PDZ128mk, 0 },
- { X86::VFMSUB231PDZ128rkz, X86::VFMSUB231PDZ128mkz, 0 },
- { X86::VFMSUB231PDZ256rk, X86::VFMSUB231PDZ256mk, 0 },
- { X86::VFMSUB231PDZ256rkz, X86::VFMSUB231PDZ256mkz, 0 },
- { X86::VFMSUB231PDZrk, X86::VFMSUB231PDZmk, 0 },
- { X86::VFMSUB231PDZrkz, X86::VFMSUB231PDZmkz, 0 },
- { X86::VFMSUB231PHZ128rk, X86::VFMSUB231PHZ128mk, 0 },
- { X86::VFMSUB231PHZ128rkz, X86::VFMSUB231PHZ128mkz, 0 },
- { X86::VFMSUB231PHZ256rk, X86::VFMSUB231PHZ256mk, 0 },
- { X86::VFMSUB231PHZ256rkz, X86::VFMSUB231PHZ256mkz, 0 },
- { X86::VFMSUB231PHZrk, X86::VFMSUB231PHZmk, 0 },
- { X86::VFMSUB231PHZrkz, X86::VFMSUB231PHZmkz, 0 },
- { X86::VFMSUB231PSZ128rk, X86::VFMSUB231PSZ128mk, 0 },
- { X86::VFMSUB231PSZ128rkz, X86::VFMSUB231PSZ128mkz, 0 },
- { X86::VFMSUB231PSZ256rk, X86::VFMSUB231PSZ256mk, 0 },
- { X86::VFMSUB231PSZ256rkz, X86::VFMSUB231PSZ256mkz, 0 },
- { X86::VFMSUB231PSZrk, X86::VFMSUB231PSZmk, 0 },
- { X86::VFMSUB231PSZrkz, X86::VFMSUB231PSZmkz, 0 },
- { X86::VFMSUB231SDZr_Intk, X86::VFMSUB231SDZm_Intk, TB_NO_REVERSE },
- { X86::VFMSUB231SDZr_Intkz, X86::VFMSUB231SDZm_Intkz, TB_NO_REVERSE },
- { X86::VFMSUB231SHZr_Intk, X86::VFMSUB231SHZm_Intk, TB_NO_REVERSE },
- { X86::VFMSUB231SHZr_Intkz, X86::VFMSUB231SHZm_Intkz, TB_NO_REVERSE },
- { X86::VFMSUB231SSZr_Intk, X86::VFMSUB231SSZm_Intk, TB_NO_REVERSE },
- { X86::VFMSUB231SSZr_Intkz, X86::VFMSUB231SSZm_Intkz, TB_NO_REVERSE },
- { X86::VFMSUBADD132PDZ128rk, X86::VFMSUBADD132PDZ128mk, 0 },
- { X86::VFMSUBADD132PDZ128rkz, X86::VFMSUBADD132PDZ128mkz, 0 },
- { X86::VFMSUBADD132PDZ256rk, X86::VFMSUBADD132PDZ256mk, 0 },
- { X86::VFMSUBADD132PDZ256rkz, X86::VFMSUBADD132PDZ256mkz, 0 },
- { X86::VFMSUBADD132PDZrk, X86::VFMSUBADD132PDZmk, 0 },
- { X86::VFMSUBADD132PDZrkz, X86::VFMSUBADD132PDZmkz, 0 },
- { X86::VFMSUBADD132PHZ128rk, X86::VFMSUBADD132PHZ128mk, 0 },
- { X86::VFMSUBADD132PHZ128rkz, X86::VFMSUBADD132PHZ128mkz, 0 },
- { X86::VFMSUBADD132PHZ256rk, X86::VFMSUBADD132PHZ256mk, 0 },
- { X86::VFMSUBADD132PHZ256rkz, X86::VFMSUBADD132PHZ256mkz, 0 },
- { X86::VFMSUBADD132PHZrk, X86::VFMSUBADD132PHZmk, 0 },
- { X86::VFMSUBADD132PHZrkz, X86::VFMSUBADD132PHZmkz, 0 },
- { X86::VFMSUBADD132PSZ128rk, X86::VFMSUBADD132PSZ128mk, 0 },
- { X86::VFMSUBADD132PSZ128rkz, X86::VFMSUBADD132PSZ128mkz, 0 },
- { X86::VFMSUBADD132PSZ256rk, X86::VFMSUBADD132PSZ256mk, 0 },
- { X86::VFMSUBADD132PSZ256rkz, X86::VFMSUBADD132PSZ256mkz, 0 },
- { X86::VFMSUBADD132PSZrk, X86::VFMSUBADD132PSZmk, 0 },
- { X86::VFMSUBADD132PSZrkz, X86::VFMSUBADD132PSZmkz, 0 },
- { X86::VFMSUBADD213PDZ128rk, X86::VFMSUBADD213PDZ128mk, 0 },
- { X86::VFMSUBADD213PDZ128rkz, X86::VFMSUBADD213PDZ128mkz, 0 },
- { X86::VFMSUBADD213PDZ256rk, X86::VFMSUBADD213PDZ256mk, 0 },
- { X86::VFMSUBADD213PDZ256rkz, X86::VFMSUBADD213PDZ256mkz, 0 },
- { X86::VFMSUBADD213PDZrk, X86::VFMSUBADD213PDZmk, 0 },
- { X86::VFMSUBADD213PDZrkz, X86::VFMSUBADD213PDZmkz, 0 },
- { X86::VFMSUBADD213PHZ128rk, X86::VFMSUBADD213PHZ128mk, 0 },
- { X86::VFMSUBADD213PHZ128rkz, X86::VFMSUBADD213PHZ128mkz, 0 },
- { X86::VFMSUBADD213PHZ256rk, X86::VFMSUBADD213PHZ256mk, 0 },
- { X86::VFMSUBADD213PHZ256rkz, X86::VFMSUBADD213PHZ256mkz, 0 },
- { X86::VFMSUBADD213PHZrk, X86::VFMSUBADD213PHZmk, 0 },
- { X86::VFMSUBADD213PHZrkz, X86::VFMSUBADD213PHZmkz, 0 },
- { X86::VFMSUBADD213PSZ128rk, X86::VFMSUBADD213PSZ128mk, 0 },
- { X86::VFMSUBADD213PSZ128rkz, X86::VFMSUBADD213PSZ128mkz, 0 },
- { X86::VFMSUBADD213PSZ256rk, X86::VFMSUBADD213PSZ256mk, 0 },
- { X86::VFMSUBADD213PSZ256rkz, X86::VFMSUBADD213PSZ256mkz, 0 },
- { X86::VFMSUBADD213PSZrk, X86::VFMSUBADD213PSZmk, 0 },
- { X86::VFMSUBADD213PSZrkz, X86::VFMSUBADD213PSZmkz, 0 },
- { X86::VFMSUBADD231PDZ128rk, X86::VFMSUBADD231PDZ128mk, 0 },
- { X86::VFMSUBADD231PDZ128rkz, X86::VFMSUBADD231PDZ128mkz, 0 },
- { X86::VFMSUBADD231PDZ256rk, X86::VFMSUBADD231PDZ256mk, 0 },
- { X86::VFMSUBADD231PDZ256rkz, X86::VFMSUBADD231PDZ256mkz, 0 },
- { X86::VFMSUBADD231PDZrk, X86::VFMSUBADD231PDZmk, 0 },
- { X86::VFMSUBADD231PDZrkz, X86::VFMSUBADD231PDZmkz, 0 },
- { X86::VFMSUBADD231PHZ128rk, X86::VFMSUBADD231PHZ128mk, 0 },
- { X86::VFMSUBADD231PHZ128rkz, X86::VFMSUBADD231PHZ128mkz, 0 },
- { X86::VFMSUBADD231PHZ256rk, X86::VFMSUBADD231PHZ256mk, 0 },
- { X86::VFMSUBADD231PHZ256rkz, X86::VFMSUBADD231PHZ256mkz, 0 },
- { X86::VFMSUBADD231PHZrk, X86::VFMSUBADD231PHZmk, 0 },
- { X86::VFMSUBADD231PHZrkz, X86::VFMSUBADD231PHZmkz, 0 },
- { X86::VFMSUBADD231PSZ128rk, X86::VFMSUBADD231PSZ128mk, 0 },
- { X86::VFMSUBADD231PSZ128rkz, X86::VFMSUBADD231PSZ128mkz, 0 },
- { X86::VFMSUBADD231PSZ256rk, X86::VFMSUBADD231PSZ256mk, 0 },
- { X86::VFMSUBADD231PSZ256rkz, X86::VFMSUBADD231PSZ256mkz, 0 },
- { X86::VFMSUBADD231PSZrk, X86::VFMSUBADD231PSZmk, 0 },
- { X86::VFMSUBADD231PSZrkz, X86::VFMSUBADD231PSZmkz, 0 },
- { X86::VFMULCPHZ128rrk, X86::VFMULCPHZ128rmk, 0 },
- { X86::VFMULCPHZ256rrk, X86::VFMULCPHZ256rmk, 0 },
- { X86::VFMULCPHZrrk, X86::VFMULCPHZrmk, 0 },
- { X86::VFMULCSHZrrk, X86::VFMULCSHZrmk, TB_NO_REVERSE },
- { X86::VFNMADD132PDZ128rk, X86::VFNMADD132PDZ128mk, 0 },
- { X86::VFNMADD132PDZ128rkz, X86::VFNMADD132PDZ128mkz, 0 },
- { X86::VFNMADD132PDZ256rk, X86::VFNMADD132PDZ256mk, 0 },
- { X86::VFNMADD132PDZ256rkz, X86::VFNMADD132PDZ256mkz, 0 },
- { X86::VFNMADD132PDZrk, X86::VFNMADD132PDZmk, 0 },
- { X86::VFNMADD132PDZrkz, X86::VFNMADD132PDZmkz, 0 },
- { X86::VFNMADD132PHZ128rk, X86::VFNMADD132PHZ128mk, 0 },
- { X86::VFNMADD132PHZ128rkz, X86::VFNMADD132PHZ128mkz, 0 },
- { X86::VFNMADD132PHZ256rk, X86::VFNMADD132PHZ256mk, 0 },
- { X86::VFNMADD132PHZ256rkz, X86::VFNMADD132PHZ256mkz, 0 },
- { X86::VFNMADD132PHZrk, X86::VFNMADD132PHZmk, 0 },
- { X86::VFNMADD132PHZrkz, X86::VFNMADD132PHZmkz, 0 },
- { X86::VFNMADD132PSZ128rk, X86::VFNMADD132PSZ128mk, 0 },
- { X86::VFNMADD132PSZ128rkz, X86::VFNMADD132PSZ128mkz, 0 },
- { X86::VFNMADD132PSZ256rk, X86::VFNMADD132PSZ256mk, 0 },
- { X86::VFNMADD132PSZ256rkz, X86::VFNMADD132PSZ256mkz, 0 },
- { X86::VFNMADD132PSZrk, X86::VFNMADD132PSZmk, 0 },
- { X86::VFNMADD132PSZrkz, X86::VFNMADD132PSZmkz, 0 },
- { X86::VFNMADD132SDZr_Intk, X86::VFNMADD132SDZm_Intk, TB_NO_REVERSE },
- { X86::VFNMADD132SDZr_Intkz, X86::VFNMADD132SDZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMADD132SHZr_Intk, X86::VFNMADD132SHZm_Intk, TB_NO_REVERSE },
- { X86::VFNMADD132SHZr_Intkz, X86::VFNMADD132SHZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMADD132SSZr_Intk, X86::VFNMADD132SSZm_Intk, TB_NO_REVERSE },
- { X86::VFNMADD132SSZr_Intkz, X86::VFNMADD132SSZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMADD213PDZ128rk, X86::VFNMADD213PDZ128mk, 0 },
- { X86::VFNMADD213PDZ128rkz, X86::VFNMADD213PDZ128mkz, 0 },
- { X86::VFNMADD213PDZ256rk, X86::VFNMADD213PDZ256mk, 0 },
- { X86::VFNMADD213PDZ256rkz, X86::VFNMADD213PDZ256mkz, 0 },
- { X86::VFNMADD213PDZrk, X86::VFNMADD213PDZmk, 0 },
- { X86::VFNMADD213PDZrkz, X86::VFNMADD213PDZmkz, 0 },
- { X86::VFNMADD213PHZ128rk, X86::VFNMADD213PHZ128mk, 0 },
- { X86::VFNMADD213PHZ128rkz, X86::VFNMADD213PHZ128mkz, 0 },
- { X86::VFNMADD213PHZ256rk, X86::VFNMADD213PHZ256mk, 0 },
- { X86::VFNMADD213PHZ256rkz, X86::VFNMADD213PHZ256mkz, 0 },
- { X86::VFNMADD213PHZrk, X86::VFNMADD213PHZmk, 0 },
- { X86::VFNMADD213PHZrkz, X86::VFNMADD213PHZmkz, 0 },
- { X86::VFNMADD213PSZ128rk, X86::VFNMADD213PSZ128mk, 0 },
- { X86::VFNMADD213PSZ128rkz, X86::VFNMADD213PSZ128mkz, 0 },
- { X86::VFNMADD213PSZ256rk, X86::VFNMADD213PSZ256mk, 0 },
- { X86::VFNMADD213PSZ256rkz, X86::VFNMADD213PSZ256mkz, 0 },
- { X86::VFNMADD213PSZrk, X86::VFNMADD213PSZmk, 0 },
- { X86::VFNMADD213PSZrkz, X86::VFNMADD213PSZmkz, 0 },
- { X86::VFNMADD213SDZr_Intk, X86::VFNMADD213SDZm_Intk, TB_NO_REVERSE },
- { X86::VFNMADD213SDZr_Intkz, X86::VFNMADD213SDZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMADD213SHZr_Intk, X86::VFNMADD213SHZm_Intk, TB_NO_REVERSE },
- { X86::VFNMADD213SHZr_Intkz, X86::VFNMADD213SHZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMADD213SSZr_Intk, X86::VFNMADD213SSZm_Intk, TB_NO_REVERSE },
- { X86::VFNMADD213SSZr_Intkz, X86::VFNMADD213SSZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMADD231PDZ128rk, X86::VFNMADD231PDZ128mk, 0 },
- { X86::VFNMADD231PDZ128rkz, X86::VFNMADD231PDZ128mkz, 0 },
- { X86::VFNMADD231PDZ256rk, X86::VFNMADD231PDZ256mk, 0 },
- { X86::VFNMADD231PDZ256rkz, X86::VFNMADD231PDZ256mkz, 0 },
- { X86::VFNMADD231PDZrk, X86::VFNMADD231PDZmk, 0 },
- { X86::VFNMADD231PDZrkz, X86::VFNMADD231PDZmkz, 0 },
- { X86::VFNMADD231PHZ128rk, X86::VFNMADD231PHZ128mk, 0 },
- { X86::VFNMADD231PHZ128rkz, X86::VFNMADD231PHZ128mkz, 0 },
- { X86::VFNMADD231PHZ256rk, X86::VFNMADD231PHZ256mk, 0 },
- { X86::VFNMADD231PHZ256rkz, X86::VFNMADD231PHZ256mkz, 0 },
- { X86::VFNMADD231PHZrk, X86::VFNMADD231PHZmk, 0 },
- { X86::VFNMADD231PHZrkz, X86::VFNMADD231PHZmkz, 0 },
- { X86::VFNMADD231PSZ128rk, X86::VFNMADD231PSZ128mk, 0 },
- { X86::VFNMADD231PSZ128rkz, X86::VFNMADD231PSZ128mkz, 0 },
- { X86::VFNMADD231PSZ256rk, X86::VFNMADD231PSZ256mk, 0 },
- { X86::VFNMADD231PSZ256rkz, X86::VFNMADD231PSZ256mkz, 0 },
- { X86::VFNMADD231PSZrk, X86::VFNMADD231PSZmk, 0 },
- { X86::VFNMADD231PSZrkz, X86::VFNMADD231PSZmkz, 0 },
- { X86::VFNMADD231SDZr_Intk, X86::VFNMADD231SDZm_Intk, TB_NO_REVERSE },
- { X86::VFNMADD231SDZr_Intkz, X86::VFNMADD231SDZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMADD231SHZr_Intk, X86::VFNMADD231SHZm_Intk, TB_NO_REVERSE },
- { X86::VFNMADD231SHZr_Intkz, X86::VFNMADD231SHZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMADD231SSZr_Intk, X86::VFNMADD231SSZm_Intk, TB_NO_REVERSE },
- { X86::VFNMADD231SSZr_Intkz, X86::VFNMADD231SSZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMSUB132PDZ128rk, X86::VFNMSUB132PDZ128mk, 0 },
- { X86::VFNMSUB132PDZ128rkz, X86::VFNMSUB132PDZ128mkz, 0 },
- { X86::VFNMSUB132PDZ256rk, X86::VFNMSUB132PDZ256mk, 0 },
- { X86::VFNMSUB132PDZ256rkz, X86::VFNMSUB132PDZ256mkz, 0 },
- { X86::VFNMSUB132PDZrk, X86::VFNMSUB132PDZmk, 0 },
- { X86::VFNMSUB132PDZrkz, X86::VFNMSUB132PDZmkz, 0 },
- { X86::VFNMSUB132PHZ128rk, X86::VFNMSUB132PHZ128mk, 0 },
- { X86::VFNMSUB132PHZ128rkz, X86::VFNMSUB132PHZ128mkz, 0 },
- { X86::VFNMSUB132PHZ256rk, X86::VFNMSUB132PHZ256mk, 0 },
- { X86::VFNMSUB132PHZ256rkz, X86::VFNMSUB132PHZ256mkz, 0 },
- { X86::VFNMSUB132PHZrk, X86::VFNMSUB132PHZmk, 0 },
- { X86::VFNMSUB132PHZrkz, X86::VFNMSUB132PHZmkz, 0 },
- { X86::VFNMSUB132PSZ128rk, X86::VFNMSUB132PSZ128mk, 0 },
- { X86::VFNMSUB132PSZ128rkz, X86::VFNMSUB132PSZ128mkz, 0 },
- { X86::VFNMSUB132PSZ256rk, X86::VFNMSUB132PSZ256mk, 0 },
- { X86::VFNMSUB132PSZ256rkz, X86::VFNMSUB132PSZ256mkz, 0 },
- { X86::VFNMSUB132PSZrk, X86::VFNMSUB132PSZmk, 0 },
- { X86::VFNMSUB132PSZrkz, X86::VFNMSUB132PSZmkz, 0 },
- { X86::VFNMSUB132SDZr_Intk, X86::VFNMSUB132SDZm_Intk, TB_NO_REVERSE },
- { X86::VFNMSUB132SDZr_Intkz, X86::VFNMSUB132SDZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMSUB132SHZr_Intk, X86::VFNMSUB132SHZm_Intk, TB_NO_REVERSE },
- { X86::VFNMSUB132SHZr_Intkz, X86::VFNMSUB132SHZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMSUB132SSZr_Intk, X86::VFNMSUB132SSZm_Intk, TB_NO_REVERSE },
- { X86::VFNMSUB132SSZr_Intkz, X86::VFNMSUB132SSZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMSUB213PDZ128rk, X86::VFNMSUB213PDZ128mk, 0 },
- { X86::VFNMSUB213PDZ128rkz, X86::VFNMSUB213PDZ128mkz, 0 },
- { X86::VFNMSUB213PDZ256rk, X86::VFNMSUB213PDZ256mk, 0 },
- { X86::VFNMSUB213PDZ256rkz, X86::VFNMSUB213PDZ256mkz, 0 },
- { X86::VFNMSUB213PDZrk, X86::VFNMSUB213PDZmk, 0 },
- { X86::VFNMSUB213PDZrkz, X86::VFNMSUB213PDZmkz, 0 },
- { X86::VFNMSUB213PHZ128rk, X86::VFNMSUB213PHZ128mk, 0 },
- { X86::VFNMSUB213PHZ128rkz, X86::VFNMSUB213PHZ128mkz, 0 },
- { X86::VFNMSUB213PHZ256rk, X86::VFNMSUB213PHZ256mk, 0 },
- { X86::VFNMSUB213PHZ256rkz, X86::VFNMSUB213PHZ256mkz, 0 },
- { X86::VFNMSUB213PHZrk, X86::VFNMSUB213PHZmk, 0 },
- { X86::VFNMSUB213PHZrkz, X86::VFNMSUB213PHZmkz, 0 },
- { X86::VFNMSUB213PSZ128rk, X86::VFNMSUB213PSZ128mk, 0 },
- { X86::VFNMSUB213PSZ128rkz, X86::VFNMSUB213PSZ128mkz, 0 },
- { X86::VFNMSUB213PSZ256rk, X86::VFNMSUB213PSZ256mk, 0 },
- { X86::VFNMSUB213PSZ256rkz, X86::VFNMSUB213PSZ256mkz, 0 },
- { X86::VFNMSUB213PSZrk, X86::VFNMSUB213PSZmk, 0 },
- { X86::VFNMSUB213PSZrkz, X86::VFNMSUB213PSZmkz, 0 },
- { X86::VFNMSUB213SDZr_Intk, X86::VFNMSUB213SDZm_Intk, TB_NO_REVERSE },
- { X86::VFNMSUB213SDZr_Intkz, X86::VFNMSUB213SDZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMSUB213SHZr_Intk, X86::VFNMSUB213SHZm_Intk, TB_NO_REVERSE },
- { X86::VFNMSUB213SHZr_Intkz, X86::VFNMSUB213SHZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMSUB213SSZr_Intk, X86::VFNMSUB213SSZm_Intk, TB_NO_REVERSE },
- { X86::VFNMSUB213SSZr_Intkz, X86::VFNMSUB213SSZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMSUB231PDZ128rk, X86::VFNMSUB231PDZ128mk, 0 },
- { X86::VFNMSUB231PDZ128rkz, X86::VFNMSUB231PDZ128mkz, 0 },
- { X86::VFNMSUB231PDZ256rk, X86::VFNMSUB231PDZ256mk, 0 },
- { X86::VFNMSUB231PDZ256rkz, X86::VFNMSUB231PDZ256mkz, 0 },
- { X86::VFNMSUB231PDZrk, X86::VFNMSUB231PDZmk, 0 },
- { X86::VFNMSUB231PDZrkz, X86::VFNMSUB231PDZmkz, 0 },
- { X86::VFNMSUB231PHZ128rk, X86::VFNMSUB231PHZ128mk, 0 },
- { X86::VFNMSUB231PHZ128rkz, X86::VFNMSUB231PHZ128mkz, 0 },
- { X86::VFNMSUB231PHZ256rk, X86::VFNMSUB231PHZ256mk, 0 },
- { X86::VFNMSUB231PHZ256rkz, X86::VFNMSUB231PHZ256mkz, 0 },
- { X86::VFNMSUB231PHZrk, X86::VFNMSUB231PHZmk, 0 },
- { X86::VFNMSUB231PHZrkz, X86::VFNMSUB231PHZmkz, 0 },
- { X86::VFNMSUB231PSZ128rk, X86::VFNMSUB231PSZ128mk, 0 },
- { X86::VFNMSUB231PSZ128rkz, X86::VFNMSUB231PSZ128mkz, 0 },
- { X86::VFNMSUB231PSZ256rk, X86::VFNMSUB231PSZ256mk, 0 },
- { X86::VFNMSUB231PSZ256rkz, X86::VFNMSUB231PSZ256mkz, 0 },
- { X86::VFNMSUB231PSZrk, X86::VFNMSUB231PSZmk, 0 },
- { X86::VFNMSUB231PSZrkz, X86::VFNMSUB231PSZmkz, 0 },
- { X86::VFNMSUB231SDZr_Intk, X86::VFNMSUB231SDZm_Intk, TB_NO_REVERSE },
- { X86::VFNMSUB231SDZr_Intkz, X86::VFNMSUB231SDZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMSUB231SHZr_Intk, X86::VFNMSUB231SHZm_Intk, TB_NO_REVERSE },
- { X86::VFNMSUB231SHZr_Intkz, X86::VFNMSUB231SHZm_Intkz, TB_NO_REVERSE },
- { X86::VFNMSUB231SSZr_Intk, X86::VFNMSUB231SSZm_Intk, TB_NO_REVERSE },
- { X86::VFNMSUB231SSZr_Intkz, X86::VFNMSUB231SSZm_Intkz, TB_NO_REVERSE },
- { X86::VGETEXPSDZrk, X86::VGETEXPSDZmk, TB_NO_REVERSE },
- { X86::VGETEXPSHZrk, X86::VGETEXPSHZmk, TB_NO_REVERSE },
- { X86::VGETEXPSSZrk, X86::VGETEXPSSZmk, TB_NO_REVERSE },
- { X86::VGETMANTSDZrrik, X86::VGETMANTSDZrmik, TB_NO_REVERSE },
- { X86::VGETMANTSHZrrik, X86::VGETMANTSHZrmik, TB_NO_REVERSE },
- { X86::VGETMANTSSZrrik, X86::VGETMANTSSZrmik, TB_NO_REVERSE },
- { X86::VGF2P8AFFINEINVQBZ128rrik, X86::VGF2P8AFFINEINVQBZ128rmik, 0 },
- { X86::VGF2P8AFFINEINVQBZ256rrik, X86::VGF2P8AFFINEINVQBZ256rmik, 0 },
- { X86::VGF2P8AFFINEINVQBZrrik, X86::VGF2P8AFFINEINVQBZrmik, 0 },
- { X86::VGF2P8AFFINEQBZ128rrik, X86::VGF2P8AFFINEQBZ128rmik, 0 },
- { X86::VGF2P8AFFINEQBZ256rrik, X86::VGF2P8AFFINEQBZ256rmik, 0 },
- { X86::VGF2P8AFFINEQBZrrik, X86::VGF2P8AFFINEQBZrmik, 0 },
- { X86::VGF2P8MULBZ128rrk, X86::VGF2P8MULBZ128rmk, 0 },
- { X86::VGF2P8MULBZ256rrk, X86::VGF2P8MULBZ256rmk, 0 },
- { X86::VGF2P8MULBZrrk, X86::VGF2P8MULBZrmk, 0 },
- { X86::VINSERTF32x4Z256rrk, X86::VINSERTF32x4Z256rmk, 0 },
- { X86::VINSERTF32x4Zrrk, X86::VINSERTF32x4Zrmk, 0 },
- { X86::VINSERTF32x8Zrrk, X86::VINSERTF32x8Zrmk, 0 },
- { X86::VINSERTF64x2Z256rrk, X86::VINSERTF64x2Z256rmk, 0 },
- { X86::VINSERTF64x2Zrrk, X86::VINSERTF64x2Zrmk, 0 },
- { X86::VINSERTF64x4Zrrk, X86::VINSERTF64x4Zrmk, 0 },
- { X86::VINSERTI32x4Z256rrk, X86::VINSERTI32x4Z256rmk, 0 },
- { X86::VINSERTI32x4Zrrk, X86::VINSERTI32x4Zrmk, 0 },
- { X86::VINSERTI32x8Zrrk, X86::VINSERTI32x8Zrmk, 0 },
- { X86::VINSERTI64x2Z256rrk, X86::VINSERTI64x2Z256rmk, 0 },
- { X86::VINSERTI64x2Zrrk, X86::VINSERTI64x2Zrmk, 0 },
- { X86::VINSERTI64x4Zrrk, X86::VINSERTI64x4Zrmk, 0 },
- { X86::VMAXCPDZ128rrk, X86::VMAXCPDZ128rmk, 0 },
- { X86::VMAXCPDZ256rrk, X86::VMAXCPDZ256rmk, 0 },
- { X86::VMAXCPDZrrk, X86::VMAXCPDZrmk, 0 },
- { X86::VMAXCPHZ128rrk, X86::VMAXCPHZ128rmk, 0 },
- { X86::VMAXCPHZ256rrk, X86::VMAXCPHZ256rmk, 0 },
- { X86::VMAXCPHZrrk, X86::VMAXCPHZrmk, 0 },
- { X86::VMAXCPSZ128rrk, X86::VMAXCPSZ128rmk, 0 },
- { X86::VMAXCPSZ256rrk, X86::VMAXCPSZ256rmk, 0 },
- { X86::VMAXCPSZrrk, X86::VMAXCPSZrmk, 0 },
- { X86::VMAXPDZ128rrk, X86::VMAXPDZ128rmk, 0 },
- { X86::VMAXPDZ256rrk, X86::VMAXPDZ256rmk, 0 },
- { X86::VMAXPDZrrk, X86::VMAXPDZrmk, 0 },
- { X86::VMAXPHZ128rrk, X86::VMAXPHZ128rmk, 0 },
- { X86::VMAXPHZ256rrk, X86::VMAXPHZ256rmk, 0 },
- { X86::VMAXPHZrrk, X86::VMAXPHZrmk, 0 },
- { X86::VMAXPSZ128rrk, X86::VMAXPSZ128rmk, 0 },
- { X86::VMAXPSZ256rrk, X86::VMAXPSZ256rmk, 0 },
- { X86::VMAXPSZrrk, X86::VMAXPSZrmk, 0 },
- { X86::VMAXSDZrr_Intk, X86::VMAXSDZrm_Intk, TB_NO_REVERSE },
- { X86::VMAXSHZrr_Intk, X86::VMAXSHZrm_Intk, TB_NO_REVERSE },
- { X86::VMAXSSZrr_Intk, X86::VMAXSSZrm_Intk, TB_NO_REVERSE },
- { X86::VMINCPDZ128rrk, X86::VMINCPDZ128rmk, 0 },
- { X86::VMINCPDZ256rrk, X86::VMINCPDZ256rmk, 0 },
- { X86::VMINCPDZrrk, X86::VMINCPDZrmk, 0 },
- { X86::VMINCPHZ128rrk, X86::VMINCPHZ128rmk, 0 },
- { X86::VMINCPHZ256rrk, X86::VMINCPHZ256rmk, 0 },
- { X86::VMINCPHZrrk, X86::VMINCPHZrmk, 0 },
- { X86::VMINCPSZ128rrk, X86::VMINCPSZ128rmk, 0 },
- { X86::VMINCPSZ256rrk, X86::VMINCPSZ256rmk, 0 },
- { X86::VMINCPSZrrk, X86::VMINCPSZrmk, 0 },
- { X86::VMINPDZ128rrk, X86::VMINPDZ128rmk, 0 },
- { X86::VMINPDZ256rrk, X86::VMINPDZ256rmk, 0 },
- { X86::VMINPDZrrk, X86::VMINPDZrmk, 0 },
- { X86::VMINPHZ128rrk, X86::VMINPHZ128rmk, 0 },
- { X86::VMINPHZ256rrk, X86::VMINPHZ256rmk, 0 },
- { X86::VMINPHZrrk, X86::VMINPHZrmk, 0 },
- { X86::VMINPSZ128rrk, X86::VMINPSZ128rmk, 0 },
- { X86::VMINPSZ256rrk, X86::VMINPSZ256rmk, 0 },
- { X86::VMINPSZrrk, X86::VMINPSZrmk, 0 },
- { X86::VMINSDZrr_Intk, X86::VMINSDZrm_Intk, TB_NO_REVERSE },
- { X86::VMINSHZrr_Intk, X86::VMINSHZrm_Intk, TB_NO_REVERSE },
- { X86::VMINSSZrr_Intk, X86::VMINSSZrm_Intk, TB_NO_REVERSE },
- { X86::VMULPDZ128rrk, X86::VMULPDZ128rmk, 0 },
- { X86::VMULPDZ256rrk, X86::VMULPDZ256rmk, 0 },
- { X86::VMULPDZrrk, X86::VMULPDZrmk, 0 },
- { X86::VMULPHZ128rrk, X86::VMULPHZ128rmk, 0 },
- { X86::VMULPHZ256rrk, X86::VMULPHZ256rmk, 0 },
- { X86::VMULPHZrrk, X86::VMULPHZrmk, 0 },
- { X86::VMULPSZ128rrk, X86::VMULPSZ128rmk, 0 },
- { X86::VMULPSZ256rrk, X86::VMULPSZ256rmk, 0 },
- { X86::VMULPSZrrk, X86::VMULPSZrmk, 0 },
- { X86::VMULSDZrr_Intk, X86::VMULSDZrm_Intk, TB_NO_REVERSE },
- { X86::VMULSHZrr_Intk, X86::VMULSHZrm_Intk, TB_NO_REVERSE },
- { X86::VMULSSZrr_Intk, X86::VMULSSZrm_Intk, TB_NO_REVERSE },
- { X86::VORPDZ128rrk, X86::VORPDZ128rmk, 0 },
- { X86::VORPDZ256rrk, X86::VORPDZ256rmk, 0 },
- { X86::VORPDZrrk, X86::VORPDZrmk, 0 },
- { X86::VORPSZ128rrk, X86::VORPSZ128rmk, 0 },
- { X86::VORPSZ256rrk, X86::VORPSZ256rmk, 0 },
- { X86::VORPSZrrk, X86::VORPSZrmk, 0 },
- { X86::VPACKSSDWZ128rrk, X86::VPACKSSDWZ128rmk, 0 },
- { X86::VPACKSSDWZ256rrk, X86::VPACKSSDWZ256rmk, 0 },
- { X86::VPACKSSDWZrrk, X86::VPACKSSDWZrmk, 0 },
- { X86::VPACKSSWBZ128rrk, X86::VPACKSSWBZ128rmk, 0 },
- { X86::VPACKSSWBZ256rrk, X86::VPACKSSWBZ256rmk, 0 },
- { X86::VPACKSSWBZrrk, X86::VPACKSSWBZrmk, 0 },
- { X86::VPACKUSDWZ128rrk, X86::VPACKUSDWZ128rmk, 0 },
- { X86::VPACKUSDWZ256rrk, X86::VPACKUSDWZ256rmk, 0 },
- { X86::VPACKUSDWZrrk, X86::VPACKUSDWZrmk, 0 },
- { X86::VPACKUSWBZ128rrk, X86::VPACKUSWBZ128rmk, 0 },
- { X86::VPACKUSWBZ256rrk, X86::VPACKUSWBZ256rmk, 0 },
- { X86::VPACKUSWBZrrk, X86::VPACKUSWBZrmk, 0 },
- { X86::VPADDBZ128rrk, X86::VPADDBZ128rmk, 0 },
- { X86::VPADDBZ256rrk, X86::VPADDBZ256rmk, 0 },
- { X86::VPADDBZrrk, X86::VPADDBZrmk, 0 },
- { X86::VPADDDZ128rrk, X86::VPADDDZ128rmk, 0 },
- { X86::VPADDDZ256rrk, X86::VPADDDZ256rmk, 0 },
- { X86::VPADDDZrrk, X86::VPADDDZrmk, 0 },
- { X86::VPADDQZ128rrk, X86::VPADDQZ128rmk, 0 },
- { X86::VPADDQZ256rrk, X86::VPADDQZ256rmk, 0 },
- { X86::VPADDQZrrk, X86::VPADDQZrmk, 0 },
- { X86::VPADDSBZ128rrk, X86::VPADDSBZ128rmk, 0 },
- { X86::VPADDSBZ256rrk, X86::VPADDSBZ256rmk, 0 },
- { X86::VPADDSBZrrk, X86::VPADDSBZrmk, 0 },
- { X86::VPADDSWZ128rrk, X86::VPADDSWZ128rmk, 0 },
- { X86::VPADDSWZ256rrk, X86::VPADDSWZ256rmk, 0 },
- { X86::VPADDSWZrrk, X86::VPADDSWZrmk, 0 },
- { X86::VPADDUSBZ128rrk, X86::VPADDUSBZ128rmk, 0 },
- { X86::VPADDUSBZ256rrk, X86::VPADDUSBZ256rmk, 0 },
- { X86::VPADDUSBZrrk, X86::VPADDUSBZrmk, 0 },
- { X86::VPADDUSWZ128rrk, X86::VPADDUSWZ128rmk, 0 },
- { X86::VPADDUSWZ256rrk, X86::VPADDUSWZ256rmk, 0 },
- { X86::VPADDUSWZrrk, X86::VPADDUSWZrmk, 0 },
- { X86::VPADDWZ128rrk, X86::VPADDWZ128rmk, 0 },
- { X86::VPADDWZ256rrk, X86::VPADDWZ256rmk, 0 },
- { X86::VPADDWZrrk, X86::VPADDWZrmk, 0 },
- { X86::VPALIGNRZ128rrik, X86::VPALIGNRZ128rmik, 0 },
- { X86::VPALIGNRZ256rrik, X86::VPALIGNRZ256rmik, 0 },
- { X86::VPALIGNRZrrik, X86::VPALIGNRZrmik, 0 },
- { X86::VPANDDZ128rrk, X86::VPANDDZ128rmk, 0 },
- { X86::VPANDDZ256rrk, X86::VPANDDZ256rmk, 0 },
- { X86::VPANDDZrrk, X86::VPANDDZrmk, 0 },
- { X86::VPANDNDZ128rrk, X86::VPANDNDZ128rmk, 0 },
- { X86::VPANDNDZ256rrk, X86::VPANDNDZ256rmk, 0 },
- { X86::VPANDNDZrrk, X86::VPANDNDZrmk, 0 },
- { X86::VPANDNQZ128rrk, X86::VPANDNQZ128rmk, 0 },
- { X86::VPANDNQZ256rrk, X86::VPANDNQZ256rmk, 0 },
- { X86::VPANDNQZrrk, X86::VPANDNQZrmk, 0 },
- { X86::VPANDQZ128rrk, X86::VPANDQZ128rmk, 0 },
- { X86::VPANDQZ256rrk, X86::VPANDQZ256rmk, 0 },
- { X86::VPANDQZrrk, X86::VPANDQZrmk, 0 },
- { X86::VPAVGBZ128rrk, X86::VPAVGBZ128rmk, 0 },
- { X86::VPAVGBZ256rrk, X86::VPAVGBZ256rmk, 0 },
- { X86::VPAVGBZrrk, X86::VPAVGBZrmk, 0 },
- { X86::VPAVGWZ128rrk, X86::VPAVGWZ128rmk, 0 },
- { X86::VPAVGWZ256rrk, X86::VPAVGWZ256rmk, 0 },
- { X86::VPAVGWZrrk, X86::VPAVGWZrmk, 0 },
- { X86::VPDPBUSDSZ128rk, X86::VPDPBUSDSZ128mk, 0 },
- { X86::VPDPBUSDSZ128rkz, X86::VPDPBUSDSZ128mkz, 0 },
- { X86::VPDPBUSDSZ256rk, X86::VPDPBUSDSZ256mk, 0 },
- { X86::VPDPBUSDSZ256rkz, X86::VPDPBUSDSZ256mkz, 0 },
- { X86::VPDPBUSDSZrk, X86::VPDPBUSDSZmk, 0 },
- { X86::VPDPBUSDSZrkz, X86::VPDPBUSDSZmkz, 0 },
- { X86::VPDPBUSDZ128rk, X86::VPDPBUSDZ128mk, 0 },
- { X86::VPDPBUSDZ128rkz, X86::VPDPBUSDZ128mkz, 0 },
- { X86::VPDPBUSDZ256rk, X86::VPDPBUSDZ256mk, 0 },
- { X86::VPDPBUSDZ256rkz, X86::VPDPBUSDZ256mkz, 0 },
- { X86::VPDPBUSDZrk, X86::VPDPBUSDZmk, 0 },
- { X86::VPDPBUSDZrkz, X86::VPDPBUSDZmkz, 0 },
- { X86::VPDPWSSDSZ128rk, X86::VPDPWSSDSZ128mk, 0 },
- { X86::VPDPWSSDSZ128rkz, X86::VPDPWSSDSZ128mkz, 0 },
- { X86::VPDPWSSDSZ256rk, X86::VPDPWSSDSZ256mk, 0 },
- { X86::VPDPWSSDSZ256rkz, X86::VPDPWSSDSZ256mkz, 0 },
- { X86::VPDPWSSDSZrk, X86::VPDPWSSDSZmk, 0 },
- { X86::VPDPWSSDSZrkz, X86::VPDPWSSDSZmkz, 0 },
- { X86::VPDPWSSDZ128rk, X86::VPDPWSSDZ128mk, 0 },
- { X86::VPDPWSSDZ128rkz, X86::VPDPWSSDZ128mkz, 0 },
- { X86::VPDPWSSDZ256rk, X86::VPDPWSSDZ256mk, 0 },
- { X86::VPDPWSSDZ256rkz, X86::VPDPWSSDZ256mkz, 0 },
- { X86::VPDPWSSDZrk, X86::VPDPWSSDZmk, 0 },
- { X86::VPDPWSSDZrkz, X86::VPDPWSSDZmkz, 0 },
- { X86::VPERMBZ128rrk, X86::VPERMBZ128rmk, 0 },
- { X86::VPERMBZ256rrk, X86::VPERMBZ256rmk, 0 },
- { X86::VPERMBZrrk, X86::VPERMBZrmk, 0 },
- { X86::VPERMDZ256rrk, X86::VPERMDZ256rmk, 0 },
- { X86::VPERMDZrrk, X86::VPERMDZrmk, 0 },
- { X86::VPERMI2B128rrk, X86::VPERMI2B128rmk, 0 },
- { X86::VPERMI2B128rrkz, X86::VPERMI2B128rmkz, 0 },
- { X86::VPERMI2B256rrk, X86::VPERMI2B256rmk, 0 },
- { X86::VPERMI2B256rrkz, X86::VPERMI2B256rmkz, 0 },
- { X86::VPERMI2Brrk, X86::VPERMI2Brmk, 0 },
- { X86::VPERMI2Brrkz, X86::VPERMI2Brmkz, 0 },
- { X86::VPERMI2D128rrk, X86::VPERMI2D128rmk, 0 },
- { X86::VPERMI2D128rrkz, X86::VPERMI2D128rmkz, 0 },
- { X86::VPERMI2D256rrk, X86::VPERMI2D256rmk, 0 },
- { X86::VPERMI2D256rrkz, X86::VPERMI2D256rmkz, 0 },
- { X86::VPERMI2Drrk, X86::VPERMI2Drmk, 0 },
- { X86::VPERMI2Drrkz, X86::VPERMI2Drmkz, 0 },
- { X86::VPERMI2PD128rrk, X86::VPERMI2PD128rmk, 0 },
- { X86::VPERMI2PD128rrkz, X86::VPERMI2PD128rmkz, 0 },
- { X86::VPERMI2PD256rrk, X86::VPERMI2PD256rmk, 0 },
- { X86::VPERMI2PD256rrkz, X86::VPERMI2PD256rmkz, 0 },
- { X86::VPERMI2PDrrk, X86::VPERMI2PDrmk, 0 },
- { X86::VPERMI2PDrrkz, X86::VPERMI2PDrmkz, 0 },
- { X86::VPERMI2PS128rrk, X86::VPERMI2PS128rmk, 0 },
- { X86::VPERMI2PS128rrkz, X86::VPERMI2PS128rmkz, 0 },
- { X86::VPERMI2PS256rrk, X86::VPERMI2PS256rmk, 0 },
- { X86::VPERMI2PS256rrkz, X86::VPERMI2PS256rmkz, 0 },
- { X86::VPERMI2PSrrk, X86::VPERMI2PSrmk, 0 },
- { X86::VPERMI2PSrrkz, X86::VPERMI2PSrmkz, 0 },
- { X86::VPERMI2Q128rrk, X86::VPERMI2Q128rmk, 0 },
- { X86::VPERMI2Q128rrkz, X86::VPERMI2Q128rmkz, 0 },
- { X86::VPERMI2Q256rrk, X86::VPERMI2Q256rmk, 0 },
- { X86::VPERMI2Q256rrkz, X86::VPERMI2Q256rmkz, 0 },
- { X86::VPERMI2Qrrk, X86::VPERMI2Qrmk, 0 },
- { X86::VPERMI2Qrrkz, X86::VPERMI2Qrmkz, 0 },
- { X86::VPERMI2W128rrk, X86::VPERMI2W128rmk, 0 },
- { X86::VPERMI2W128rrkz, X86::VPERMI2W128rmkz, 0 },
- { X86::VPERMI2W256rrk, X86::VPERMI2W256rmk, 0 },
- { X86::VPERMI2W256rrkz, X86::VPERMI2W256rmkz, 0 },
- { X86::VPERMI2Wrrk, X86::VPERMI2Wrmk, 0 },
- { X86::VPERMI2Wrrkz, X86::VPERMI2Wrmkz, 0 },
- { X86::VPERMILPDZ128rrk, X86::VPERMILPDZ128rmk, 0 },
- { X86::VPERMILPDZ256rrk, X86::VPERMILPDZ256rmk, 0 },
- { X86::VPERMILPDZrrk, X86::VPERMILPDZrmk, 0 },
- { X86::VPERMILPSZ128rrk, X86::VPERMILPSZ128rmk, 0 },
- { X86::VPERMILPSZ256rrk, X86::VPERMILPSZ256rmk, 0 },
- { X86::VPERMILPSZrrk, X86::VPERMILPSZrmk, 0 },
- { X86::VPERMPDZ256rrk, X86::VPERMPDZ256rmk, 0 },
- { X86::VPERMPDZrrk, X86::VPERMPDZrmk, 0 },
- { X86::VPERMPSZ256rrk, X86::VPERMPSZ256rmk, 0 },
- { X86::VPERMPSZrrk, X86::VPERMPSZrmk, 0 },
- { X86::VPERMQZ256rrk, X86::VPERMQZ256rmk, 0 },
- { X86::VPERMQZrrk, X86::VPERMQZrmk, 0 },
- { X86::VPERMT2B128rrk, X86::VPERMT2B128rmk, 0 },
- { X86::VPERMT2B128rrkz, X86::VPERMT2B128rmkz, 0 },
- { X86::VPERMT2B256rrk, X86::VPERMT2B256rmk, 0 },
- { X86::VPERMT2B256rrkz, X86::VPERMT2B256rmkz, 0 },
- { X86::VPERMT2Brrk, X86::VPERMT2Brmk, 0 },
- { X86::VPERMT2Brrkz, X86::VPERMT2Brmkz, 0 },
- { X86::VPERMT2D128rrk, X86::VPERMT2D128rmk, 0 },
- { X86::VPERMT2D128rrkz, X86::VPERMT2D128rmkz, 0 },
- { X86::VPERMT2D256rrk, X86::VPERMT2D256rmk, 0 },
- { X86::VPERMT2D256rrkz, X86::VPERMT2D256rmkz, 0 },
- { X86::VPERMT2Drrk, X86::VPERMT2Drmk, 0 },
- { X86::VPERMT2Drrkz, X86::VPERMT2Drmkz, 0 },
- { X86::VPERMT2PD128rrk, X86::VPERMT2PD128rmk, 0 },
- { X86::VPERMT2PD128rrkz, X86::VPERMT2PD128rmkz, 0 },
- { X86::VPERMT2PD256rrk, X86::VPERMT2PD256rmk, 0 },
- { X86::VPERMT2PD256rrkz, X86::VPERMT2PD256rmkz, 0 },
- { X86::VPERMT2PDrrk, X86::VPERMT2PDrmk, 0 },
- { X86::VPERMT2PDrrkz, X86::VPERMT2PDrmkz, 0 },
- { X86::VPERMT2PS128rrk, X86::VPERMT2PS128rmk, 0 },
- { X86::VPERMT2PS128rrkz, X86::VPERMT2PS128rmkz, 0 },
- { X86::VPERMT2PS256rrk, X86::VPERMT2PS256rmk, 0 },
- { X86::VPERMT2PS256rrkz, X86::VPERMT2PS256rmkz, 0 },
- { X86::VPERMT2PSrrk, X86::VPERMT2PSrmk, 0 },
- { X86::VPERMT2PSrrkz, X86::VPERMT2PSrmkz, 0 },
- { X86::VPERMT2Q128rrk, X86::VPERMT2Q128rmk, 0 },
- { X86::VPERMT2Q128rrkz, X86::VPERMT2Q128rmkz, 0 },
- { X86::VPERMT2Q256rrk, X86::VPERMT2Q256rmk, 0 },
- { X86::VPERMT2Q256rrkz, X86::VPERMT2Q256rmkz, 0 },
- { X86::VPERMT2Qrrk, X86::VPERMT2Qrmk, 0 },
- { X86::VPERMT2Qrrkz, X86::VPERMT2Qrmkz, 0 },
- { X86::VPERMT2W128rrk, X86::VPERMT2W128rmk, 0 },
- { X86::VPERMT2W128rrkz, X86::VPERMT2W128rmkz, 0 },
- { X86::VPERMT2W256rrk, X86::VPERMT2W256rmk, 0 },
- { X86::VPERMT2W256rrkz, X86::VPERMT2W256rmkz, 0 },
- { X86::VPERMT2Wrrk, X86::VPERMT2Wrmk, 0 },
- { X86::VPERMT2Wrrkz, X86::VPERMT2Wrmkz, 0 },
- { X86::VPERMWZ128rrk, X86::VPERMWZ128rmk, 0 },
- { X86::VPERMWZ256rrk, X86::VPERMWZ256rmk, 0 },
- { X86::VPERMWZrrk, X86::VPERMWZrmk, 0 },
- { X86::VPMADD52HUQZ128rk, X86::VPMADD52HUQZ128mk, 0 },
- { X86::VPMADD52HUQZ128rkz, X86::VPMADD52HUQZ128mkz, 0 },
- { X86::VPMADD52HUQZ256rk, X86::VPMADD52HUQZ256mk, 0 },
- { X86::VPMADD52HUQZ256rkz, X86::VPMADD52HUQZ256mkz, 0 },
- { X86::VPMADD52HUQZrk, X86::VPMADD52HUQZmk, 0 },
- { X86::VPMADD52HUQZrkz, X86::VPMADD52HUQZmkz, 0 },
- { X86::VPMADD52LUQZ128rk, X86::VPMADD52LUQZ128mk, 0 },
- { X86::VPMADD52LUQZ128rkz, X86::VPMADD52LUQZ128mkz, 0 },
- { X86::VPMADD52LUQZ256rk, X86::VPMADD52LUQZ256mk, 0 },
- { X86::VPMADD52LUQZ256rkz, X86::VPMADD52LUQZ256mkz, 0 },
- { X86::VPMADD52LUQZrk, X86::VPMADD52LUQZmk, 0 },
- { X86::VPMADD52LUQZrkz, X86::VPMADD52LUQZmkz, 0 },
- { X86::VPMADDUBSWZ128rrk, X86::VPMADDUBSWZ128rmk, 0 },
- { X86::VPMADDUBSWZ256rrk, X86::VPMADDUBSWZ256rmk, 0 },
- { X86::VPMADDUBSWZrrk, X86::VPMADDUBSWZrmk, 0 },
- { X86::VPMADDWDZ128rrk, X86::VPMADDWDZ128rmk, 0 },
- { X86::VPMADDWDZ256rrk, X86::VPMADDWDZ256rmk, 0 },
- { X86::VPMADDWDZrrk, X86::VPMADDWDZrmk, 0 },
- { X86::VPMAXSBZ128rrk, X86::VPMAXSBZ128rmk, 0 },
- { X86::VPMAXSBZ256rrk, X86::VPMAXSBZ256rmk, 0 },
- { X86::VPMAXSBZrrk, X86::VPMAXSBZrmk, 0 },
- { X86::VPMAXSDZ128rrk, X86::VPMAXSDZ128rmk, 0 },
- { X86::VPMAXSDZ256rrk, X86::VPMAXSDZ256rmk, 0 },
- { X86::VPMAXSDZrrk, X86::VPMAXSDZrmk, 0 },
- { X86::VPMAXSQZ128rrk, X86::VPMAXSQZ128rmk, 0 },
- { X86::VPMAXSQZ256rrk, X86::VPMAXSQZ256rmk, 0 },
- { X86::VPMAXSQZrrk, X86::VPMAXSQZrmk, 0 },
- { X86::VPMAXSWZ128rrk, X86::VPMAXSWZ128rmk, 0 },
- { X86::VPMAXSWZ256rrk, X86::VPMAXSWZ256rmk, 0 },
- { X86::VPMAXSWZrrk, X86::VPMAXSWZrmk, 0 },
- { X86::VPMAXUBZ128rrk, X86::VPMAXUBZ128rmk, 0 },
- { X86::VPMAXUBZ256rrk, X86::VPMAXUBZ256rmk, 0 },
- { X86::VPMAXUBZrrk, X86::VPMAXUBZrmk, 0 },
- { X86::VPMAXUDZ128rrk, X86::VPMAXUDZ128rmk, 0 },
- { X86::VPMAXUDZ256rrk, X86::VPMAXUDZ256rmk, 0 },
- { X86::VPMAXUDZrrk, X86::VPMAXUDZrmk, 0 },
- { X86::VPMAXUQZ128rrk, X86::VPMAXUQZ128rmk, 0 },
- { X86::VPMAXUQZ256rrk, X86::VPMAXUQZ256rmk, 0 },
- { X86::VPMAXUQZrrk, X86::VPMAXUQZrmk, 0 },
- { X86::VPMAXUWZ128rrk, X86::VPMAXUWZ128rmk, 0 },
- { X86::VPMAXUWZ256rrk, X86::VPMAXUWZ256rmk, 0 },
- { X86::VPMAXUWZrrk, X86::VPMAXUWZrmk, 0 },
- { X86::VPMINSBZ128rrk, X86::VPMINSBZ128rmk, 0 },
- { X86::VPMINSBZ256rrk, X86::VPMINSBZ256rmk, 0 },
- { X86::VPMINSBZrrk, X86::VPMINSBZrmk, 0 },
- { X86::VPMINSDZ128rrk, X86::VPMINSDZ128rmk, 0 },
- { X86::VPMINSDZ256rrk, X86::VPMINSDZ256rmk, 0 },
- { X86::VPMINSDZrrk, X86::VPMINSDZrmk, 0 },
- { X86::VPMINSQZ128rrk, X86::VPMINSQZ128rmk, 0 },
- { X86::VPMINSQZ256rrk, X86::VPMINSQZ256rmk, 0 },
- { X86::VPMINSQZrrk, X86::VPMINSQZrmk, 0 },
- { X86::VPMINSWZ128rrk, X86::VPMINSWZ128rmk, 0 },
- { X86::VPMINSWZ256rrk, X86::VPMINSWZ256rmk, 0 },
- { X86::VPMINSWZrrk, X86::VPMINSWZrmk, 0 },
- { X86::VPMINUBZ128rrk, X86::VPMINUBZ128rmk, 0 },
- { X86::VPMINUBZ256rrk, X86::VPMINUBZ256rmk, 0 },
- { X86::VPMINUBZrrk, X86::VPMINUBZrmk, 0 },
- { X86::VPMINUDZ128rrk, X86::VPMINUDZ128rmk, 0 },
- { X86::VPMINUDZ256rrk, X86::VPMINUDZ256rmk, 0 },
- { X86::VPMINUDZrrk, X86::VPMINUDZrmk, 0 },
- { X86::VPMINUQZ128rrk, X86::VPMINUQZ128rmk, 0 },
- { X86::VPMINUQZ256rrk, X86::VPMINUQZ256rmk, 0 },
- { X86::VPMINUQZrrk, X86::VPMINUQZrmk, 0 },
- { X86::VPMINUWZ128rrk, X86::VPMINUWZ128rmk, 0 },
- { X86::VPMINUWZ256rrk, X86::VPMINUWZ256rmk, 0 },
- { X86::VPMINUWZrrk, X86::VPMINUWZrmk, 0 },
- { X86::VPMULDQZ128rrk, X86::VPMULDQZ128rmk, 0 },
- { X86::VPMULDQZ256rrk, X86::VPMULDQZ256rmk, 0 },
- { X86::VPMULDQZrrk, X86::VPMULDQZrmk, 0 },
- { X86::VPMULHRSWZ128rrk, X86::VPMULHRSWZ128rmk, 0 },
- { X86::VPMULHRSWZ256rrk, X86::VPMULHRSWZ256rmk, 0 },
- { X86::VPMULHRSWZrrk, X86::VPMULHRSWZrmk, 0 },
- { X86::VPMULHUWZ128rrk, X86::VPMULHUWZ128rmk, 0 },
- { X86::VPMULHUWZ256rrk, X86::VPMULHUWZ256rmk, 0 },
- { X86::VPMULHUWZrrk, X86::VPMULHUWZrmk, 0 },
- { X86::VPMULHWZ128rrk, X86::VPMULHWZ128rmk, 0 },
- { X86::VPMULHWZ256rrk, X86::VPMULHWZ256rmk, 0 },
- { X86::VPMULHWZrrk, X86::VPMULHWZrmk, 0 },
- { X86::VPMULLDZ128rrk, X86::VPMULLDZ128rmk, 0 },
- { X86::VPMULLDZ256rrk, X86::VPMULLDZ256rmk, 0 },
- { X86::VPMULLDZrrk, X86::VPMULLDZrmk, 0 },
- { X86::VPMULLQZ128rrk, X86::VPMULLQZ128rmk, 0 },
- { X86::VPMULLQZ256rrk, X86::VPMULLQZ256rmk, 0 },
- { X86::VPMULLQZrrk, X86::VPMULLQZrmk, 0 },
- { X86::VPMULLWZ128rrk, X86::VPMULLWZ128rmk, 0 },
- { X86::VPMULLWZ256rrk, X86::VPMULLWZ256rmk, 0 },
- { X86::VPMULLWZrrk, X86::VPMULLWZrmk, 0 },
- { X86::VPMULTISHIFTQBZ128rrk, X86::VPMULTISHIFTQBZ128rmk, 0 },
- { X86::VPMULTISHIFTQBZ256rrk, X86::VPMULTISHIFTQBZ256rmk, 0 },
- { X86::VPMULTISHIFTQBZrrk, X86::VPMULTISHIFTQBZrmk, 0 },
- { X86::VPMULUDQZ128rrk, X86::VPMULUDQZ128rmk, 0 },
- { X86::VPMULUDQZ256rrk, X86::VPMULUDQZ256rmk, 0 },
- { X86::VPMULUDQZrrk, X86::VPMULUDQZrmk, 0 },
- { X86::VPORDZ128rrk, X86::VPORDZ128rmk, 0 },
- { X86::VPORDZ256rrk, X86::VPORDZ256rmk, 0 },
- { X86::VPORDZrrk, X86::VPORDZrmk, 0 },
- { X86::VPORQZ128rrk, X86::VPORQZ128rmk, 0 },
- { X86::VPORQZ256rrk, X86::VPORQZ256rmk, 0 },
- { X86::VPORQZrrk, X86::VPORQZrmk, 0 },
- { X86::VPROLVDZ128rrk, X86::VPROLVDZ128rmk, 0 },
- { X86::VPROLVDZ256rrk, X86::VPROLVDZ256rmk, 0 },
- { X86::VPROLVDZrrk, X86::VPROLVDZrmk, 0 },
- { X86::VPROLVQZ128rrk, X86::VPROLVQZ128rmk, 0 },
- { X86::VPROLVQZ256rrk, X86::VPROLVQZ256rmk, 0 },
- { X86::VPROLVQZrrk, X86::VPROLVQZrmk, 0 },
- { X86::VPRORVDZ128rrk, X86::VPRORVDZ128rmk, 0 },
- { X86::VPRORVDZ256rrk, X86::VPRORVDZ256rmk, 0 },
- { X86::VPRORVDZrrk, X86::VPRORVDZrmk, 0 },
- { X86::VPRORVQZ128rrk, X86::VPRORVQZ128rmk, 0 },
- { X86::VPRORVQZ256rrk, X86::VPRORVQZ256rmk, 0 },
- { X86::VPRORVQZrrk, X86::VPRORVQZrmk, 0 },
- { X86::VPSHLDDZ128rrik, X86::VPSHLDDZ128rmik, 0 },
- { X86::VPSHLDDZ256rrik, X86::VPSHLDDZ256rmik, 0 },
- { X86::VPSHLDDZrrik, X86::VPSHLDDZrmik, 0 },
- { X86::VPSHLDQZ128rrik, X86::VPSHLDQZ128rmik, 0 },
- { X86::VPSHLDQZ256rrik, X86::VPSHLDQZ256rmik, 0 },
- { X86::VPSHLDQZrrik, X86::VPSHLDQZrmik, 0 },
- { X86::VPSHLDVDZ128rk, X86::VPSHLDVDZ128mk, 0 },
- { X86::VPSHLDVDZ128rkz, X86::VPSHLDVDZ128mkz, 0 },
- { X86::VPSHLDVDZ256rk, X86::VPSHLDVDZ256mk, 0 },
- { X86::VPSHLDVDZ256rkz, X86::VPSHLDVDZ256mkz, 0 },
- { X86::VPSHLDVDZrk, X86::VPSHLDVDZmk, 0 },
- { X86::VPSHLDVDZrkz, X86::VPSHLDVDZmkz, 0 },
- { X86::VPSHLDVQZ128rk, X86::VPSHLDVQZ128mk, 0 },
- { X86::VPSHLDVQZ128rkz, X86::VPSHLDVQZ128mkz, 0 },
- { X86::VPSHLDVQZ256rk, X86::VPSHLDVQZ256mk, 0 },
- { X86::VPSHLDVQZ256rkz, X86::VPSHLDVQZ256mkz, 0 },
- { X86::VPSHLDVQZrk, X86::VPSHLDVQZmk, 0 },
- { X86::VPSHLDVQZrkz, X86::VPSHLDVQZmkz, 0 },
- { X86::VPSHLDVWZ128rk, X86::VPSHLDVWZ128mk, 0 },
- { X86::VPSHLDVWZ128rkz, X86::VPSHLDVWZ128mkz, 0 },
- { X86::VPSHLDVWZ256rk, X86::VPSHLDVWZ256mk, 0 },
- { X86::VPSHLDVWZ256rkz, X86::VPSHLDVWZ256mkz, 0 },
- { X86::VPSHLDVWZrk, X86::VPSHLDVWZmk, 0 },
- { X86::VPSHLDVWZrkz, X86::VPSHLDVWZmkz, 0 },
- { X86::VPSHLDWZ128rrik, X86::VPSHLDWZ128rmik, 0 },
- { X86::VPSHLDWZ256rrik, X86::VPSHLDWZ256rmik, 0 },
- { X86::VPSHLDWZrrik, X86::VPSHLDWZrmik, 0 },
- { X86::VPSHRDDZ128rrik, X86::VPSHRDDZ128rmik, 0 },
- { X86::VPSHRDDZ256rrik, X86::VPSHRDDZ256rmik, 0 },
- { X86::VPSHRDDZrrik, X86::VPSHRDDZrmik, 0 },
- { X86::VPSHRDQZ128rrik, X86::VPSHRDQZ128rmik, 0 },
- { X86::VPSHRDQZ256rrik, X86::VPSHRDQZ256rmik, 0 },
- { X86::VPSHRDQZrrik, X86::VPSHRDQZrmik, 0 },
- { X86::VPSHRDVDZ128rk, X86::VPSHRDVDZ128mk, 0 },
- { X86::VPSHRDVDZ128rkz, X86::VPSHRDVDZ128mkz, 0 },
- { X86::VPSHRDVDZ256rk, X86::VPSHRDVDZ256mk, 0 },
- { X86::VPSHRDVDZ256rkz, X86::VPSHRDVDZ256mkz, 0 },
- { X86::VPSHRDVDZrk, X86::VPSHRDVDZmk, 0 },
- { X86::VPSHRDVDZrkz, X86::VPSHRDVDZmkz, 0 },
- { X86::VPSHRDVQZ128rk, X86::VPSHRDVQZ128mk, 0 },
- { X86::VPSHRDVQZ128rkz, X86::VPSHRDVQZ128mkz, 0 },
- { X86::VPSHRDVQZ256rk, X86::VPSHRDVQZ256mk, 0 },
- { X86::VPSHRDVQZ256rkz, X86::VPSHRDVQZ256mkz, 0 },
- { X86::VPSHRDVQZrk, X86::VPSHRDVQZmk, 0 },
- { X86::VPSHRDVQZrkz, X86::VPSHRDVQZmkz, 0 },
- { X86::VPSHRDVWZ128rk, X86::VPSHRDVWZ128mk, 0 },
- { X86::VPSHRDVWZ128rkz, X86::VPSHRDVWZ128mkz, 0 },
- { X86::VPSHRDVWZ256rk, X86::VPSHRDVWZ256mk, 0 },
- { X86::VPSHRDVWZ256rkz, X86::VPSHRDVWZ256mkz, 0 },
- { X86::VPSHRDVWZrk, X86::VPSHRDVWZmk, 0 },
- { X86::VPSHRDVWZrkz, X86::VPSHRDVWZmkz, 0 },
- { X86::VPSHRDWZ128rrik, X86::VPSHRDWZ128rmik, 0 },
- { X86::VPSHRDWZ256rrik, X86::VPSHRDWZ256rmik, 0 },
- { X86::VPSHRDWZrrik, X86::VPSHRDWZrmik, 0 },
- { X86::VPSHUFBZ128rrk, X86::VPSHUFBZ128rmk, 0 },
- { X86::VPSHUFBZ256rrk, X86::VPSHUFBZ256rmk, 0 },
- { X86::VPSHUFBZrrk, X86::VPSHUFBZrmk, 0 },
- { X86::VPSLLDZ128rrk, X86::VPSLLDZ128rmk, 0 },
- { X86::VPSLLDZ256rrk, X86::VPSLLDZ256rmk, 0 },
- { X86::VPSLLDZrrk, X86::VPSLLDZrmk, 0 },
- { X86::VPSLLQZ128rrk, X86::VPSLLQZ128rmk, 0 },
- { X86::VPSLLQZ256rrk, X86::VPSLLQZ256rmk, 0 },
- { X86::VPSLLQZrrk, X86::VPSLLQZrmk, 0 },
- { X86::VPSLLVDZ128rrk, X86::VPSLLVDZ128rmk, 0 },
- { X86::VPSLLVDZ256rrk, X86::VPSLLVDZ256rmk, 0 },
- { X86::VPSLLVDZrrk, X86::VPSLLVDZrmk, 0 },
- { X86::VPSLLVQZ128rrk, X86::VPSLLVQZ128rmk, 0 },
- { X86::VPSLLVQZ256rrk, X86::VPSLLVQZ256rmk, 0 },
- { X86::VPSLLVQZrrk, X86::VPSLLVQZrmk, 0 },
- { X86::VPSLLVWZ128rrk, X86::VPSLLVWZ128rmk, 0 },
- { X86::VPSLLVWZ256rrk, X86::VPSLLVWZ256rmk, 0 },
- { X86::VPSLLVWZrrk, X86::VPSLLVWZrmk, 0 },
- { X86::VPSLLWZ128rrk, X86::VPSLLWZ128rmk, 0 },
- { X86::VPSLLWZ256rrk, X86::VPSLLWZ256rmk, 0 },
- { X86::VPSLLWZrrk, X86::VPSLLWZrmk, 0 },
- { X86::VPSRADZ128rrk, X86::VPSRADZ128rmk, 0 },
- { X86::VPSRADZ256rrk, X86::VPSRADZ256rmk, 0 },
- { X86::VPSRADZrrk, X86::VPSRADZrmk, 0 },
- { X86::VPSRAQZ128rrk, X86::VPSRAQZ128rmk, 0 },
- { X86::VPSRAQZ256rrk, X86::VPSRAQZ256rmk, 0 },
- { X86::VPSRAQZrrk, X86::VPSRAQZrmk, 0 },
- { X86::VPSRAVDZ128rrk, X86::VPSRAVDZ128rmk, 0 },
- { X86::VPSRAVDZ256rrk, X86::VPSRAVDZ256rmk, 0 },
- { X86::VPSRAVDZrrk, X86::VPSRAVDZrmk, 0 },
- { X86::VPSRAVQZ128rrk, X86::VPSRAVQZ128rmk, 0 },
- { X86::VPSRAVQZ256rrk, X86::VPSRAVQZ256rmk, 0 },
- { X86::VPSRAVQZrrk, X86::VPSRAVQZrmk, 0 },
- { X86::VPSRAVWZ128rrk, X86::VPSRAVWZ128rmk, 0 },
- { X86::VPSRAVWZ256rrk, X86::VPSRAVWZ256rmk, 0 },
- { X86::VPSRAVWZrrk, X86::VPSRAVWZrmk, 0 },
- { X86::VPSRAWZ128rrk, X86::VPSRAWZ128rmk, 0 },
- { X86::VPSRAWZ256rrk, X86::VPSRAWZ256rmk, 0 },
- { X86::VPSRAWZrrk, X86::VPSRAWZrmk, 0 },
- { X86::VPSRLDZ128rrk, X86::VPSRLDZ128rmk, 0 },
- { X86::VPSRLDZ256rrk, X86::VPSRLDZ256rmk, 0 },
- { X86::VPSRLDZrrk, X86::VPSRLDZrmk, 0 },
- { X86::VPSRLQZ128rrk, X86::VPSRLQZ128rmk, 0 },
- { X86::VPSRLQZ256rrk, X86::VPSRLQZ256rmk, 0 },
- { X86::VPSRLQZrrk, X86::VPSRLQZrmk, 0 },
- { X86::VPSRLVDZ128rrk, X86::VPSRLVDZ128rmk, 0 },
- { X86::VPSRLVDZ256rrk, X86::VPSRLVDZ256rmk, 0 },
- { X86::VPSRLVDZrrk, X86::VPSRLVDZrmk, 0 },
- { X86::VPSRLVQZ128rrk, X86::VPSRLVQZ128rmk, 0 },
- { X86::VPSRLVQZ256rrk, X86::VPSRLVQZ256rmk, 0 },
- { X86::VPSRLVQZrrk, X86::VPSRLVQZrmk, 0 },
- { X86::VPSRLVWZ128rrk, X86::VPSRLVWZ128rmk, 0 },
- { X86::VPSRLVWZ256rrk, X86::VPSRLVWZ256rmk, 0 },
- { X86::VPSRLVWZrrk, X86::VPSRLVWZrmk, 0 },
- { X86::VPSRLWZ128rrk, X86::VPSRLWZ128rmk, 0 },
- { X86::VPSRLWZ256rrk, X86::VPSRLWZ256rmk, 0 },
- { X86::VPSRLWZrrk, X86::VPSRLWZrmk, 0 },
- { X86::VPSUBBZ128rrk, X86::VPSUBBZ128rmk, 0 },
- { X86::VPSUBBZ256rrk, X86::VPSUBBZ256rmk, 0 },
- { X86::VPSUBBZrrk, X86::VPSUBBZrmk, 0 },
- { X86::VPSUBDZ128rrk, X86::VPSUBDZ128rmk, 0 },
- { X86::VPSUBDZ256rrk, X86::VPSUBDZ256rmk, 0 },
- { X86::VPSUBDZrrk, X86::VPSUBDZrmk, 0 },
- { X86::VPSUBQZ128rrk, X86::VPSUBQZ128rmk, 0 },
- { X86::VPSUBQZ256rrk, X86::VPSUBQZ256rmk, 0 },
- { X86::VPSUBQZrrk, X86::VPSUBQZrmk, 0 },
- { X86::VPSUBSBZ128rrk, X86::VPSUBSBZ128rmk, 0 },
- { X86::VPSUBSBZ256rrk, X86::VPSUBSBZ256rmk, 0 },
- { X86::VPSUBSBZrrk, X86::VPSUBSBZrmk, 0 },
- { X86::VPSUBSWZ128rrk, X86::VPSUBSWZ128rmk, 0 },
- { X86::VPSUBSWZ256rrk, X86::VPSUBSWZ256rmk, 0 },
- { X86::VPSUBSWZrrk, X86::VPSUBSWZrmk, 0 },
- { X86::VPSUBUSBZ128rrk, X86::VPSUBUSBZ128rmk, 0 },
- { X86::VPSUBUSBZ256rrk, X86::VPSUBUSBZ256rmk, 0 },
- { X86::VPSUBUSBZrrk, X86::VPSUBUSBZrmk, 0 },
- { X86::VPSUBUSWZ128rrk, X86::VPSUBUSWZ128rmk, 0 },
- { X86::VPSUBUSWZ256rrk, X86::VPSUBUSWZ256rmk, 0 },
- { X86::VPSUBUSWZrrk, X86::VPSUBUSWZrmk, 0 },
- { X86::VPSUBWZ128rrk, X86::VPSUBWZ128rmk, 0 },
- { X86::VPSUBWZ256rrk, X86::VPSUBWZ256rmk, 0 },
- { X86::VPSUBWZrrk, X86::VPSUBWZrmk, 0 },
- { X86::VPTERNLOGDZ128rrik, X86::VPTERNLOGDZ128rmik, 0 },
- { X86::VPTERNLOGDZ128rrikz, X86::VPTERNLOGDZ128rmikz, 0 },
- { X86::VPTERNLOGDZ256rrik, X86::VPTERNLOGDZ256rmik, 0 },
- { X86::VPTERNLOGDZ256rrikz, X86::VPTERNLOGDZ256rmikz, 0 },
- { X86::VPTERNLOGDZrrik, X86::VPTERNLOGDZrmik, 0 },
- { X86::VPTERNLOGDZrrikz, X86::VPTERNLOGDZrmikz, 0 },
- { X86::VPTERNLOGQZ128rrik, X86::VPTERNLOGQZ128rmik, 0 },
- { X86::VPTERNLOGQZ128rrikz, X86::VPTERNLOGQZ128rmikz, 0 },
- { X86::VPTERNLOGQZ256rrik, X86::VPTERNLOGQZ256rmik, 0 },
- { X86::VPTERNLOGQZ256rrikz, X86::VPTERNLOGQZ256rmikz, 0 },
- { X86::VPTERNLOGQZrrik, X86::VPTERNLOGQZrmik, 0 },
- { X86::VPTERNLOGQZrrikz, X86::VPTERNLOGQZrmikz, 0 },
- { X86::VPUNPCKHBWZ128rrk, X86::VPUNPCKHBWZ128rmk, 0 },
- { X86::VPUNPCKHBWZ256rrk, X86::VPUNPCKHBWZ256rmk, 0 },
- { X86::VPUNPCKHBWZrrk, X86::VPUNPCKHBWZrmk, 0 },
- { X86::VPUNPCKHDQZ128rrk, X86::VPUNPCKHDQZ128rmk, 0 },
- { X86::VPUNPCKHDQZ256rrk, X86::VPUNPCKHDQZ256rmk, 0 },
- { X86::VPUNPCKHDQZrrk, X86::VPUNPCKHDQZrmk, 0 },
- { X86::VPUNPCKHQDQZ128rrk, X86::VPUNPCKHQDQZ128rmk, 0 },
- { X86::VPUNPCKHQDQZ256rrk, X86::VPUNPCKHQDQZ256rmk, 0 },
- { X86::VPUNPCKHQDQZrrk, X86::VPUNPCKHQDQZrmk, 0 },
- { X86::VPUNPCKHWDZ128rrk, X86::VPUNPCKHWDZ128rmk, 0 },
- { X86::VPUNPCKHWDZ256rrk, X86::VPUNPCKHWDZ256rmk, 0 },
- { X86::VPUNPCKHWDZrrk, X86::VPUNPCKHWDZrmk, 0 },
- { X86::VPUNPCKLBWZ128rrk, X86::VPUNPCKLBWZ128rmk, 0 },
- { X86::VPUNPCKLBWZ256rrk, X86::VPUNPCKLBWZ256rmk, 0 },
- { X86::VPUNPCKLBWZrrk, X86::VPUNPCKLBWZrmk, 0 },
- { X86::VPUNPCKLDQZ128rrk, X86::VPUNPCKLDQZ128rmk, 0 },
- { X86::VPUNPCKLDQZ256rrk, X86::VPUNPCKLDQZ256rmk, 0 },
- { X86::VPUNPCKLDQZrrk, X86::VPUNPCKLDQZrmk, 0 },
- { X86::VPUNPCKLQDQZ128rrk, X86::VPUNPCKLQDQZ128rmk, 0 },
- { X86::VPUNPCKLQDQZ256rrk, X86::VPUNPCKLQDQZ256rmk, 0 },
- { X86::VPUNPCKLQDQZrrk, X86::VPUNPCKLQDQZrmk, 0 },
- { X86::VPUNPCKLWDZ128rrk, X86::VPUNPCKLWDZ128rmk, 0 },
- { X86::VPUNPCKLWDZ256rrk, X86::VPUNPCKLWDZ256rmk, 0 },
- { X86::VPUNPCKLWDZrrk, X86::VPUNPCKLWDZrmk, 0 },
- { X86::VPXORDZ128rrk, X86::VPXORDZ128rmk, 0 },
- { X86::VPXORDZ256rrk, X86::VPXORDZ256rmk, 0 },
- { X86::VPXORDZrrk, X86::VPXORDZrmk, 0 },
- { X86::VPXORQZ128rrk, X86::VPXORQZ128rmk, 0 },
- { X86::VPXORQZ256rrk, X86::VPXORQZ256rmk, 0 },
- { X86::VPXORQZrrk, X86::VPXORQZrmk, 0 },
- { X86::VRANGEPDZ128rrik, X86::VRANGEPDZ128rmik, 0 },
- { X86::VRANGEPDZ256rrik, X86::VRANGEPDZ256rmik, 0 },
- { X86::VRANGEPDZrrik, X86::VRANGEPDZrmik, 0 },
- { X86::VRANGEPSZ128rrik, X86::VRANGEPSZ128rmik, 0 },
- { X86::VRANGEPSZ256rrik, X86::VRANGEPSZ256rmik, 0 },
- { X86::VRANGEPSZrrik, X86::VRANGEPSZrmik, 0 },
- { X86::VRANGESDZrrik, X86::VRANGESDZrmik, TB_NO_REVERSE },
- { X86::VRANGESSZrrik, X86::VRANGESSZrmik, TB_NO_REVERSE },
- { X86::VRCP14SDZrrk, X86::VRCP14SDZrmk, TB_NO_REVERSE },
- { X86::VRCP14SSZrrk, X86::VRCP14SSZrmk, TB_NO_REVERSE },
- { X86::VRCP28SDZrk, X86::VRCP28SDZmk, TB_NO_REVERSE },
- { X86::VRCP28SSZrk, X86::VRCP28SSZmk, TB_NO_REVERSE },
- { X86::VRCPSHZrrk, X86::VRCPSHZrmk, TB_NO_REVERSE },
- { X86::VREDUCESDZrrik, X86::VREDUCESDZrmik, TB_NO_REVERSE },
- { X86::VREDUCESHZrrik, X86::VREDUCESHZrmik, TB_NO_REVERSE },
- { X86::VREDUCESSZrrik, X86::VREDUCESSZrmik, TB_NO_REVERSE },
- { X86::VRNDSCALESDZr_Intk, X86::VRNDSCALESDZm_Intk, TB_NO_REVERSE },
- { X86::VRNDSCALESHZr_Intk, X86::VRNDSCALESHZm_Intk, TB_NO_REVERSE },
- { X86::VRNDSCALESSZr_Intk, X86::VRNDSCALESSZm_Intk, TB_NO_REVERSE },
- { X86::VRSQRT14SDZrrk, X86::VRSQRT14SDZrmk, TB_NO_REVERSE },
- { X86::VRSQRT14SSZrrk, X86::VRSQRT14SSZrmk, TB_NO_REVERSE },
- { X86::VRSQRT28SDZrk, X86::VRSQRT28SDZmk, TB_NO_REVERSE },
- { X86::VRSQRT28SSZrk, X86::VRSQRT28SSZmk, TB_NO_REVERSE },
- { X86::VRSQRTSHZrrk, X86::VRSQRTSHZrmk, TB_NO_REVERSE },
- { X86::VSCALEFPDZ128rrk, X86::VSCALEFPDZ128rmk, 0 },
- { X86::VSCALEFPDZ256rrk, X86::VSCALEFPDZ256rmk, 0 },
- { X86::VSCALEFPDZrrk, X86::VSCALEFPDZrmk, 0 },
- { X86::VSCALEFPHZ128rrk, X86::VSCALEFPHZ128rmk, 0 },
- { X86::VSCALEFPHZ256rrk, X86::VSCALEFPHZ256rmk, 0 },
- { X86::VSCALEFPHZrrk, X86::VSCALEFPHZrmk, 0 },
- { X86::VSCALEFPSZ128rrk, X86::VSCALEFPSZ128rmk, 0 },
- { X86::VSCALEFPSZ256rrk, X86::VSCALEFPSZ256rmk, 0 },
- { X86::VSCALEFPSZrrk, X86::VSCALEFPSZrmk, 0 },
- { X86::VSCALEFSDZrrk, X86::VSCALEFSDZrmk, TB_NO_REVERSE },
- { X86::VSCALEFSHZrrk, X86::VSCALEFSHZrmk, TB_NO_REVERSE },
- { X86::VSCALEFSSZrrk, X86::VSCALEFSSZrmk, TB_NO_REVERSE },
- { X86::VSHUFF32X4Z256rrik, X86::VSHUFF32X4Z256rmik, 0 },
- { X86::VSHUFF32X4Zrrik, X86::VSHUFF32X4Zrmik, 0 },
- { X86::VSHUFF64X2Z256rrik, X86::VSHUFF64X2Z256rmik, 0 },
- { X86::VSHUFF64X2Zrrik, X86::VSHUFF64X2Zrmik, 0 },
- { X86::VSHUFI32X4Z256rrik, X86::VSHUFI32X4Z256rmik, 0 },
- { X86::VSHUFI32X4Zrrik, X86::VSHUFI32X4Zrmik, 0 },
- { X86::VSHUFI64X2Z256rrik, X86::VSHUFI64X2Z256rmik, 0 },
- { X86::VSHUFI64X2Zrrik, X86::VSHUFI64X2Zrmik, 0 },
- { X86::VSHUFPDZ128rrik, X86::VSHUFPDZ128rmik, 0 },
- { X86::VSHUFPDZ256rrik, X86::VSHUFPDZ256rmik, 0 },
- { X86::VSHUFPDZrrik, X86::VSHUFPDZrmik, 0 },
- { X86::VSHUFPSZ128rrik, X86::VSHUFPSZ128rmik, 0 },
- { X86::VSHUFPSZ256rrik, X86::VSHUFPSZ256rmik, 0 },
- { X86::VSHUFPSZrrik, X86::VSHUFPSZrmik, 0 },
- { X86::VSQRTSDZr_Intk, X86::VSQRTSDZm_Intk, TB_NO_REVERSE },
- { X86::VSQRTSHZr_Intk, X86::VSQRTSHZm_Intk, TB_NO_REVERSE },
- { X86::VSQRTSSZr_Intk, X86::VSQRTSSZm_Intk, TB_NO_REVERSE },
- { X86::VSUBPDZ128rrk, X86::VSUBPDZ128rmk, 0 },
- { X86::VSUBPDZ256rrk, X86::VSUBPDZ256rmk, 0 },
- { X86::VSUBPDZrrk, X86::VSUBPDZrmk, 0 },
- { X86::VSUBPHZ128rrk, X86::VSUBPHZ128rmk, 0 },
- { X86::VSUBPHZ256rrk, X86::VSUBPHZ256rmk, 0 },
- { X86::VSUBPHZrrk, X86::VSUBPHZrmk, 0 },
- { X86::VSUBPSZ128rrk, X86::VSUBPSZ128rmk, 0 },
- { X86::VSUBPSZ256rrk, X86::VSUBPSZ256rmk, 0 },
- { X86::VSUBPSZrrk, X86::VSUBPSZrmk, 0 },
- { X86::VSUBSDZrr_Intk, X86::VSUBSDZrm_Intk, TB_NO_REVERSE },
- { X86::VSUBSHZrr_Intk, X86::VSUBSHZrm_Intk, TB_NO_REVERSE },
- { X86::VSUBSSZrr_Intk, X86::VSUBSSZrm_Intk, TB_NO_REVERSE },
- { X86::VUNPCKHPDZ128rrk, X86::VUNPCKHPDZ128rmk, 0 },
- { X86::VUNPCKHPDZ256rrk, X86::VUNPCKHPDZ256rmk, 0 },
- { X86::VUNPCKHPDZrrk, X86::VUNPCKHPDZrmk, 0 },
- { X86::VUNPCKHPSZ128rrk, X86::VUNPCKHPSZ128rmk, 0 },
- { X86::VUNPCKHPSZ256rrk, X86::VUNPCKHPSZ256rmk, 0 },
- { X86::VUNPCKHPSZrrk, X86::VUNPCKHPSZrmk, 0 },
- { X86::VUNPCKLPDZ128rrk, X86::VUNPCKLPDZ128rmk, 0 },
- { X86::VUNPCKLPDZ256rrk, X86::VUNPCKLPDZ256rmk, 0 },
- { X86::VUNPCKLPDZrrk, X86::VUNPCKLPDZrmk, 0 },
- { X86::VUNPCKLPSZ128rrk, X86::VUNPCKLPSZ128rmk, 0 },
- { X86::VUNPCKLPSZ256rrk, X86::VUNPCKLPSZ256rmk, 0 },
- { X86::VUNPCKLPSZrrk, X86::VUNPCKLPSZrmk, 0 },
- { X86::VXORPDZ128rrk, X86::VXORPDZ128rmk, 0 },
- { X86::VXORPDZ256rrk, X86::VXORPDZ256rmk, 0 },
- { X86::VXORPDZrrk, X86::VXORPDZrmk, 0 },
- { X86::VXORPSZ128rrk, X86::VXORPSZ128rmk, 0 },
- { X86::VXORPSZ256rrk, X86::VXORPSZ256rmk, 0 },
- { X86::VXORPSZrrk, X86::VXORPSZrmk, 0 },
-};
-
+#include "X86GenFoldTables.inc"
static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
{ X86::VADDPDZ128rr, X86::VADDPDZ128rmb, TB_BCAST_SD },
{ X86::VADDPDZ256rr, X86::VADDPDZ256rmb, TB_BCAST_SD },
@@ -5772,6 +30,18 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
{ X86::VADDPSZ128rr, X86::VADDPSZ128rmb, TB_BCAST_SS },
{ X86::VADDPSZ256rr, X86::VADDPSZ256rmb, TB_BCAST_SS },
{ X86::VADDPSZrr, X86::VADDPSZrmb, TB_BCAST_SS },
+ { X86::VANDNPDZ128rr, X86::VANDNPDZ128rmb, TB_BCAST_SD },
+ { X86::VANDNPDZ256rr, X86::VANDNPDZ256rmb, TB_BCAST_SD },
+ { X86::VANDNPDZrr, X86::VANDNPDZrmb, TB_BCAST_SD },
+ { X86::VANDNPSZ128rr, X86::VANDNPSZ128rmb, TB_BCAST_SS },
+ { X86::VANDNPSZ256rr, X86::VANDNPSZ256rmb, TB_BCAST_SS },
+ { X86::VANDNPSZrr, X86::VANDNPSZrmb, TB_BCAST_SS },
+ { X86::VANDPDZ128rr, X86::VANDPDZ128rmb, TB_BCAST_SD },
+ { X86::VANDPDZ256rr, X86::VANDPDZ256rmb, TB_BCAST_SD },
+ { X86::VANDPDZrr, X86::VANDPDZrmb, TB_BCAST_SD },
+ { X86::VANDPSZ128rr, X86::VANDPSZ128rmb, TB_BCAST_SS },
+ { X86::VANDPSZ256rr, X86::VANDPSZ256rmb, TB_BCAST_SS },
+ { X86::VANDPSZrr, X86::VANDPSZrmb, TB_BCAST_SS },
{ X86::VCMPPDZ128rri, X86::VCMPPDZ128rmbi, TB_BCAST_SD },
{ X86::VCMPPDZ256rri, X86::VCMPPDZ256rmbi, TB_BCAST_SD },
{ X86::VCMPPDZrri, X86::VCMPPDZrmbi, TB_BCAST_SD },
@@ -5814,6 +84,12 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
{ X86::VMULPSZ128rr, X86::VMULPSZ128rmb, TB_BCAST_SS },
{ X86::VMULPSZ256rr, X86::VMULPSZ256rmb, TB_BCAST_SS },
{ X86::VMULPSZrr, X86::VMULPSZrmb, TB_BCAST_SS },
+ { X86::VORPDZ128rr, X86::VORPDZ128rmb, TB_BCAST_SD },
+ { X86::VORPDZ256rr, X86::VORPDZ256rmb, TB_BCAST_SD },
+ { X86::VORPDZrr, X86::VORPDZrmb, TB_BCAST_SD },
+ { X86::VORPSZ128rr, X86::VORPSZ128rmb, TB_BCAST_SS },
+ { X86::VORPSZ256rr, X86::VORPSZ256rmb, TB_BCAST_SS },
+ { X86::VORPSZrr, X86::VORPSZrmb, TB_BCAST_SS },
{ X86::VPADDDZ128rr, X86::VPADDDZ128rmb, TB_BCAST_D },
{ X86::VPADDDZ256rr, X86::VPADDDZ256rmb, TB_BCAST_D },
{ X86::VPADDDZrr, X86::VPADDDZrmb, TB_BCAST_D },
@@ -5916,6 +192,12 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable2[] = {
{ X86::VSUBPSZ128rr, X86::VSUBPSZ128rmb, TB_BCAST_SS },
{ X86::VSUBPSZ256rr, X86::VSUBPSZ256rmb, TB_BCAST_SS },
{ X86::VSUBPSZrr, X86::VSUBPSZrmb, TB_BCAST_SS },
+ { X86::VXORPDZ128rr, X86::VXORPDZ128rmb, TB_BCAST_SD },
+ { X86::VXORPDZ256rr, X86::VXORPDZ256rmb, TB_BCAST_SD },
+ { X86::VXORPDZrr, X86::VXORPDZrmb, TB_BCAST_SD },
+ { X86::VXORPSZ128rr, X86::VXORPSZ128rmb, TB_BCAST_SS },
+ { X86::VXORPSZ256rr, X86::VXORPSZ256rmb, TB_BCAST_SS },
+ { X86::VXORPSZrr, X86::VXORPSZrmb, TB_BCAST_SS },
};
static const X86MemoryFoldTableEntry BroadcastFoldTable3[] = {
@@ -6035,6 +317,68 @@ static const X86MemoryFoldTableEntry BroadcastFoldTable3[] = {
{ X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q },
};
+// Table to map instructions safe to broadcast using a different width from the
+// element width.
+static const X86MemoryFoldTableEntry BroadcastSizeFoldTable2[] = {
+ { X86::VANDNPDZ128rr, X86::VANDNPSZ128rmb, TB_BCAST_SS },
+ { X86::VANDNPDZ256rr, X86::VANDNPSZ256rmb, TB_BCAST_SS },
+ { X86::VANDNPDZrr, X86::VANDNPSZrmb, TB_BCAST_SS },
+ { X86::VANDNPSZ128rr, X86::VANDNPDZ128rmb, TB_BCAST_SD },
+ { X86::VANDNPSZ256rr, X86::VANDNPDZ256rmb, TB_BCAST_SD },
+ { X86::VANDNPSZrr, X86::VANDNPDZrmb, TB_BCAST_SD },
+ { X86::VANDPDZ128rr, X86::VANDPSZ128rmb, TB_BCAST_SS },
+ { X86::VANDPDZ256rr, X86::VANDPSZ256rmb, TB_BCAST_SS },
+ { X86::VANDPDZrr, X86::VANDPSZrmb, TB_BCAST_SS },
+ { X86::VANDPSZ128rr, X86::VANDPDZ128rmb, TB_BCAST_SD },
+ { X86::VANDPSZ256rr, X86::VANDPDZ256rmb, TB_BCAST_SD },
+ { X86::VANDPSZrr, X86::VANDPDZrmb, TB_BCAST_SD },
+ { X86::VORPDZ128rr, X86::VORPSZ128rmb, TB_BCAST_SS },
+ { X86::VORPDZ256rr, X86::VORPSZ256rmb, TB_BCAST_SS },
+ { X86::VORPDZrr, X86::VORPSZrmb, TB_BCAST_SS },
+ { X86::VORPSZ128rr, X86::VORPDZ128rmb, TB_BCAST_SD },
+ { X86::VORPSZ256rr, X86::VORPDZ256rmb, TB_BCAST_SD },
+ { X86::VORPSZrr, X86::VORPDZrmb, TB_BCAST_SD },
+ { X86::VPANDDZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q },
+ { X86::VPANDDZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q },
+ { X86::VPANDDZrr, X86::VPANDQZrmb, TB_BCAST_Q },
+ { X86::VPANDNDZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q },
+ { X86::VPANDNDZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q },
+ { X86::VPANDNDZrr, X86::VPANDNQZrmb, TB_BCAST_Q },
+ { X86::VPANDNQZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D },
+ { X86::VPANDNQZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D },
+ { X86::VPANDNQZrr, X86::VPANDNDZrmb, TB_BCAST_D },
+ { X86::VPANDQZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D },
+ { X86::VPANDQZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D },
+ { X86::VPANDQZrr, X86::VPANDDZrmb, TB_BCAST_D },
+ { X86::VPORDZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q },
+ { X86::VPORDZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q },
+ { X86::VPORDZrr, X86::VPORQZrmb, TB_BCAST_Q },
+ { X86::VPORQZ128rr, X86::VPORDZ128rmb, TB_BCAST_D },
+ { X86::VPORQZ256rr, X86::VPORDZ256rmb, TB_BCAST_D },
+ { X86::VPORQZrr, X86::VPORDZrmb, TB_BCAST_D },
+ { X86::VPXORDZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q },
+ { X86::VPXORDZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q },
+ { X86::VPXORDZrr, X86::VPXORQZrmb, TB_BCAST_Q },
+ { X86::VPXORQZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D },
+ { X86::VPXORQZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D },
+ { X86::VPXORQZrr, X86::VPXORDZrmb, TB_BCAST_D },
+ { X86::VXORPDZ128rr, X86::VXORPSZ128rmb, TB_BCAST_SS },
+ { X86::VXORPDZ256rr, X86::VXORPSZ256rmb, TB_BCAST_SS },
+ { X86::VXORPDZrr, X86::VXORPSZrmb, TB_BCAST_SS },
+ { X86::VXORPSZ128rr, X86::VXORPDZ128rmb, TB_BCAST_SD },
+ { X86::VXORPSZ256rr, X86::VXORPDZ256rmb, TB_BCAST_SD },
+ { X86::VXORPSZrr, X86::VXORPDZrmb, TB_BCAST_SD },
+};
+
+static const X86MemoryFoldTableEntry BroadcastSizeFoldTable3[] = {
+ { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q },
+ { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q },
+ { X86::VPTERNLOGDZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q },
+ { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGDZ128rmbi, TB_BCAST_D },
+ { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGDZ256rmbi, TB_BCAST_D },
+ { X86::VPTERNLOGQZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D },
+};
+
static const X86MemoryFoldTableEntry *
lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) {
#ifndef NDEBUG
@@ -6081,6 +425,16 @@ lookupFoldTableImpl(ArrayRef<X86MemoryFoldTableEntry> Table, unsigned RegOp) {
std::end(BroadcastFoldTable3)) ==
std::end(BroadcastFoldTable3) &&
"BroadcastFoldTable3 is not sorted and unique!");
+ assert(llvm::is_sorted(BroadcastSizeFoldTable2) &&
+ std::adjacent_find(std::begin(BroadcastSizeFoldTable2),
+ std::end(BroadcastSizeFoldTable2)) ==
+ std::end(BroadcastSizeFoldTable2) &&
+ "BroadcastSizeFoldTable2 is not sorted and unique!");
+ assert(llvm::is_sorted(BroadcastSizeFoldTable3) &&
+ std::adjacent_find(std::begin(BroadcastSizeFoldTable3),
+ std::end(BroadcastSizeFoldTable3)) ==
+ std::end(BroadcastSizeFoldTable3) &&
+ "BroadcastSizeFoldTable3 is not sorted and unique!");
FoldTablesChecked.store(true, std::memory_order_relaxed);
}
#endif
@@ -6186,3 +540,85 @@ llvm::lookupUnfoldTable(unsigned MemOp) {
return nullptr;
}
+namespace {
+
+// This class stores the memory -> broadcast folding tables. It is instantiated
+// as a function scope static variable to lazily init the folding table.
+struct X86MemBroadcastFoldTable {
+ // Stores memory broadcast folding tables entries sorted by opcode.
+ std::vector<X86MemoryFoldTableEntry> Table;
+
+ X86MemBroadcastFoldTable() {
+ // Broadcast tables.
+ for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastFoldTable2) {
+ unsigned RegOp = Reg2Bcst.KeyOp;
+ unsigned BcstOp = Reg2Bcst.DstOp;
+ if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
+ unsigned MemOp = Reg2Mem->DstOp;
+ uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 |
+ TB_FOLDED_LOAD | TB_FOLDED_BCAST;
+ Table.push_back({MemOp, BcstOp, Flags});
+ }
+ }
+ for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastSizeFoldTable2) {
+ unsigned RegOp = Reg2Bcst.KeyOp;
+ unsigned BcstOp = Reg2Bcst.DstOp;
+ if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 2)) {
+ unsigned MemOp = Reg2Mem->DstOp;
+ uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_2 |
+ TB_FOLDED_LOAD | TB_FOLDED_BCAST;
+ Table.push_back({MemOp, BcstOp, Flags});
+ }
+ }
+
+ for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastFoldTable3) {
+ unsigned RegOp = Reg2Bcst.KeyOp;
+ unsigned BcstOp = Reg2Bcst.DstOp;
+ if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
+ unsigned MemOp = Reg2Mem->DstOp;
+ uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 |
+ TB_FOLDED_LOAD | TB_FOLDED_BCAST;
+ Table.push_back({MemOp, BcstOp, Flags});
+ }
+ }
+ for (const X86MemoryFoldTableEntry &Reg2Bcst : BroadcastSizeFoldTable3) {
+ unsigned RegOp = Reg2Bcst.KeyOp;
+ unsigned BcstOp = Reg2Bcst.DstOp;
+ if (const X86MemoryFoldTableEntry *Reg2Mem = lookupFoldTable(RegOp, 3)) {
+ unsigned MemOp = Reg2Mem->DstOp;
+ uint16_t Flags = Reg2Mem->Flags | Reg2Bcst.Flags | TB_INDEX_3 |
+ TB_FOLDED_LOAD | TB_FOLDED_BCAST;
+ Table.push_back({MemOp, BcstOp, Flags});
+ }
+ }
+
+ // Sort the memory->broadcast fold table.
+ array_pod_sort(Table.begin(), Table.end());
+ }
+};
+} // namespace
+
+static bool matchBroadcastSize(const X86MemoryFoldTableEntry &Entry,
+ unsigned BroadcastBits) {
+ switch (Entry.Flags & TB_BCAST_MASK) {
+ case TB_BCAST_SD:
+ case TB_BCAST_Q:
+ return BroadcastBits == 64;
+ case TB_BCAST_SS:
+ case TB_BCAST_D:
+ return BroadcastBits == 32;
+ }
+ return false;
+}
+
+const X86MemoryFoldTableEntry *
+llvm::lookupBroadcastFoldTable(unsigned MemOp, unsigned BroadcastBits) {
+ static X86MemBroadcastFoldTable MemBroadcastFoldTable;
+ auto &Table = MemBroadcastFoldTable.Table;
+ for (auto I = llvm::lower_bound(Table, MemOp);
+ I != Table.end() && I->KeyOp == MemOp; ++I) {
+ if (matchBroadcastSize(*I, BroadcastBits))
+ return &*I;
+ }
+ return nullptr;
+}
diff --git a/llvm/lib/Target/X86/X86InstrFoldTables.h b/llvm/lib/Target/X86/X86InstrFoldTables.h
index b7aca27ab2bb..28db61d9a3f8 100644
--- a/llvm/lib/Target/X86/X86InstrFoldTables.h
+++ b/llvm/lib/Target/X86/X86InstrFoldTables.h
@@ -14,60 +14,15 @@
#define LLVM_LIB_TARGET_X86_X86INSTRFOLDTABLES_H
#include <cstdint>
+#include "llvm/Support/X86FoldTablesUtils.h"
namespace llvm {
-enum {
- // Select which memory operand is being unfolded.
- // (stored in bits 0 - 2)
- TB_INDEX_0 = 0,
- TB_INDEX_1 = 1,
- TB_INDEX_2 = 2,
- TB_INDEX_3 = 3,
- TB_INDEX_4 = 4,
- TB_INDEX_MASK = 0x7,
-
- // Do not insert the reverse map (MemOp -> RegOp) into the table.
- // This may be needed because there is a many -> one mapping.
- TB_NO_REVERSE = 1 << 3,
-
- // Do not insert the forward map (RegOp -> MemOp) into the table.
- // This is needed for Native Client, which prohibits branch
- // instructions from using a memory operand.
- TB_NO_FORWARD = 1 << 4,
-
- TB_FOLDED_LOAD = 1 << 5,
- TB_FOLDED_STORE = 1 << 6,
- TB_FOLDED_BCAST = 1 << 7,
-
- // Minimum alignment required for load/store.
- // Used for RegOp->MemOp conversion. Encoded as Log2(Align) + 1 to allow 0
- // to mean align of 0.
- // (stored in bits 8 - 11)
- TB_ALIGN_SHIFT = 8,
- TB_ALIGN_NONE = 0 << TB_ALIGN_SHIFT,
- TB_ALIGN_16 = 5 << TB_ALIGN_SHIFT,
- TB_ALIGN_32 = 6 << TB_ALIGN_SHIFT,
- TB_ALIGN_64 = 7 << TB_ALIGN_SHIFT,
- TB_ALIGN_MASK = 0xf << TB_ALIGN_SHIFT,
-
- // Broadcast type.
- // (stored in bits 12 - 13)
- TB_BCAST_TYPE_SHIFT = 12,
- TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT,
- TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT,
- TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT,
- TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT,
- TB_BCAST_MASK = 0x3 << TB_BCAST_TYPE_SHIFT,
-
- // Unused bits 14-15
-};
-
// This struct is used for both the folding and unfold tables. They KeyOp
// is used to determine the sorting order.
struct X86MemoryFoldTableEntry {
- uint16_t KeyOp;
- uint16_t DstOp;
+ unsigned KeyOp;
+ unsigned DstOp;
uint16_t Flags;
bool operator<(const X86MemoryFoldTableEntry &RHS) const {
@@ -92,6 +47,11 @@ const X86MemoryFoldTableEntry *lookupFoldTable(unsigned RegOp, unsigned OpNum);
// Look up the memory unfolding table entry for this instruction.
const X86MemoryFoldTableEntry *lookupUnfoldTable(unsigned MemOp);
+// Look up the broadcast memory folding table entry for this instruction from
+// the regular memory instruction.
+const X86MemoryFoldTableEntry *lookupBroadcastFoldTable(unsigned MemOp,
+ unsigned BroadcastBits);
+
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td
index 165dbb85c8e7..f45869e15267 100644
--- a/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/llvm/lib/Target/X86/X86InstrFormats.td
@@ -231,10 +231,9 @@ class TAPD : TA { Prefix OpPrefix = PD; }
class TAXD : TA { Prefix OpPrefix = XD; }
class TAXS : TA { Prefix OpPrefix = XS; }
class VEX { Encoding OpEnc = EncVEX; }
-class VEX_W { bit HasVEX_W = 1; }
-class VEX_WIG { bit IgnoresVEX_W = 1; }
-// Special version of VEX_W that can be changed to VEX.W==0 for EVEX2VEX.
-class VEX_W1X { bit HasVEX_W = 1; bit EVEX_W1_VEX_W0 = 1; }
+class WIG { bit IgnoresW = 1; }
+// Special version of REX_W that can be changed to VEX.W==0 for EVEX2VEX.
+class VEX_W1X { bit hasREX_W = 1; bit EVEX_W1_VEX_W0 = 1; }
class VEX_4V : VEX { bit hasVEX_4V = 1; }
class VEX_L { bit hasVEX_L = 1; }
class VEX_LIG { bit ignoresVEX_L = 1; }
@@ -260,20 +259,11 @@ class EVEX_CD8<int esize, CD8VForm form> {
class XOP { Encoding OpEnc = EncXOP; }
class XOP_4V : XOP { bit hasVEX_4V = 1; }
-// Specify the alternative register form instruction to replace the current
-// instruction in case it was picked during generation of memory folding tables
-class FoldGenData<string _RegisterForm> {
- string FoldGenRegForm = _RegisterForm;
-}
-
// Provide a specific instruction to be used by the EVEX2VEX conversion.
class EVEX2VEXOverride<string VEXInstrName> {
string EVEX2VEXOverride = VEXInstrName;
}
-// Mark the instruction as "illegal to memory fold/unfold"
-class NotMemoryFoldable { bit isMemoryFoldable = 0; }
-
// Prevent EVEX->VEX conversion from considering this instruction.
class NotEVEX2VEXConvertible { bit notEVEX2VEXConvertible = 1; }
@@ -324,8 +314,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasREPPrefix = 0; // Does this inst have a REP prefix?
Encoding OpEnc = EncNormal; // Encoding used by this instruction
bits<2> OpEncBits = OpEnc.Value;
- bit HasVEX_W = 0; // Does this inst set the VEX_W field?
- bit IgnoresVEX_W = 0; // Does this inst ignore VEX_W field?
+ bit IgnoresW = 0; // Does this inst ignore REX_W field?
bit EVEX_W1_VEX_W0 = 0; // This EVEX inst with VEX.W==1 can become a VEX
// instruction with VEX.W == 0.
bit hasVEX_4V = 0; // Does this inst require the VEX.VVVV field?
@@ -357,14 +346,9 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
CD8_EltSize,
!srl(VectSize, CD8_Form{1-0}))), 0);
- // Used in the memory folding generation (TableGen backend) to point to an alternative
- // instruction to replace the current one in case it got picked during generation.
- string FoldGenRegForm = ?;
-
// Used to prevent an explicit EVEX2VEX override for this instruction.
string EVEX2VEXOverride = ?;
- bit isMemoryFoldable = 1; // Is it allowed to memory fold/unfold this instruction?
bit notEVEX2VEXConvertible = 0; // Prevent EVEX->VEX conversion.
bit ExplicitVEXPrefix = 0; // Force the instruction to use VEX encoding.
// Force to check predicate before compress EVEX to VEX encoding.
@@ -384,19 +368,16 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{28-27} = ExeDomain.Value;
let TSFlags{30-29} = OpEncBits;
let TSFlags{38-31} = Opcode;
- // Currently no need for second bit in TSFlags - W Ignore is equivalent to 0.
- let TSFlags{39} = HasVEX_W;
- let TSFlags{40} = hasVEX_4V;
- let TSFlags{41} = hasVEX_L;
- let TSFlags{42} = hasEVEX_K;
- let TSFlags{43} = hasEVEX_Z;
- let TSFlags{44} = hasEVEX_L2;
- let TSFlags{45} = hasEVEX_B;
- // If we run out of TSFlags bits, it's possible to encode this in 3 bits.
- let TSFlags{52-46} = CD8_Scale;
- let TSFlags{53} = hasEVEX_RC;
- let TSFlags{54} = hasNoTrackPrefix;
- let TSFlags{55} = ExplicitVEXPrefix;
+ let TSFlags{39} = hasVEX_4V;
+ let TSFlags{40} = hasVEX_L;
+ let TSFlags{41} = hasEVEX_K;
+ let TSFlags{42} = hasEVEX_Z;
+ let TSFlags{43} = hasEVEX_L2;
+ let TSFlags{44} = hasEVEX_B;
+ let TSFlags{47-45} = !if(!eq(CD8_Scale, 0), 0, !add(!logtwo(CD8_Scale), 1));
+ let TSFlags{48} = hasEVEX_RC;
+ let TSFlags{49} = hasNoTrackPrefix;
+ let TSFlags{50} = ExplicitVEXPrefix;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -990,7 +971,7 @@ class RS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
: S2I<o, F, outs, ins, asm, pattern>, REX_W;
class VRS2I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : VS2I<o, F, outs, ins, asm, pattern>, VEX_W;
+ : VS2I<o, F, outs, ins, asm, pattern>, REX_W;
// MMX Instruction templates
//
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index e804122adae3..10a0ccdcb023 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineCombinerPattern.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -43,6 +44,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
+#include <optional>
using namespace llvm;
@@ -216,14 +218,11 @@ bool X86InstrInfo::isDataInvariantLoad(MachineInstr &MI) {
// However, they set flags and are perhaps the most surprisingly constant
// time operations so we call them out here separately.
case X86::IMUL16rm:
- case X86::IMUL16rmi8:
case X86::IMUL16rmi:
case X86::IMUL32rm:
- case X86::IMUL32rmi8:
case X86::IMUL32rmi:
case X86::IMUL64rm:
case X86::IMUL64rmi32:
- case X86::IMUL64rmi8:
// Bit scanning and counting instructions that are somewhat surprisingly
// constant time as they scan across bits and do other fairly complex
@@ -433,13 +432,11 @@ int X86InstrInfo::getSPAdjust(const MachineInstr &MI) const {
switch (MI.getOpcode()) {
default:
return 0;
- case X86::PUSH32i8:
case X86::PUSH32r:
case X86::PUSH32rmm:
case X86::PUSH32rmr:
- case X86::PUSHi32:
+ case X86::PUSH32i:
return 4;
- case X86::PUSH64i8:
case X86::PUSH64r:
case X86::PUSH64rmm:
case X86::PUSH64rmr:
@@ -974,18 +971,19 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
MachineInstr **AndInstr,
const TargetRegisterInfo *TRI,
bool &NoSignFlag, bool &ClearsOverflowFlag) {
- if (CmpValDefInstr.getOpcode() != X86::SUBREG_TO_REG)
+ if (!(CmpValDefInstr.getOpcode() == X86::SUBREG_TO_REG &&
+ CmpInstr.getOpcode() == X86::TEST64rr) &&
+ !(CmpValDefInstr.getOpcode() == X86::COPY &&
+ CmpInstr.getOpcode() == X86::TEST16rr))
return false;
- if (CmpInstr.getOpcode() != X86::TEST64rr)
- return false;
-
- // CmpInstr is a TEST64rr instruction, and `X86InstrInfo::analyzeCompare`
- // guarantees that it's analyzable only if two registers are identical.
- assert(
- (CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
- "CmpInstr is an analyzable TEST64rr, and `X86InstrInfo::analyzeCompare` "
- "requires two reg operands are the same.");
+ // CmpInstr is a TEST16rr/TEST64rr instruction, and
+ // `X86InstrInfo::analyzeCompare` guarantees that it's analyzable only if two
+ // registers are identical.
+ assert((CmpInstr.getOperand(0).getReg() == CmpInstr.getOperand(1).getReg()) &&
+ "CmpInstr is an analyzable TEST16rr/TEST64rr, and "
+ "`X86InstrInfo::analyzeCompare` requires two reg operands are the"
+ "same.");
// Caller (`X86InstrInfo::optimizeCompareInstr`) guarantees that
// `CmpValDefInstr` defines the value that's used by `CmpInstr`; in this case
@@ -993,20 +991,37 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
// redundant.
assert(
(MRI->getVRegDef(CmpInstr.getOperand(0).getReg()) == &CmpValDefInstr) &&
- "Caller guarantees that TEST64rr is a user of SUBREG_TO_REG.");
+ "Caller guarantees that TEST64rr is a user of SUBREG_TO_REG or TEST16rr "
+ "is a user of COPY sub16bit.");
+ MachineInstr *VregDefInstr = nullptr;
+ if (CmpInstr.getOpcode() == X86::TEST16rr) {
+ if (!CmpValDefInstr.getOperand(1).getReg().isVirtual())
+ return false;
+ VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(1).getReg());
+ if (!VregDefInstr)
+ return false;
+ // We can only remove test when AND32ri or AND64ri32 whose imm can fit 16bit
+ // size, others 32/64 bit ops would test higher bits which test16rr don't
+ // want to.
+ if (!((VregDefInstr->getOpcode() == X86::AND32ri ||
+ VregDefInstr->getOpcode() == X86::AND64ri32) &&
+ isUInt<16>(VregDefInstr->getOperand(2).getImm())))
+ return false;
+ }
- // As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is typically
- // 0.
- if (CmpValDefInstr.getOperand(1).getImm() != 0)
- return false;
+ if (CmpInstr.getOpcode() == X86::TEST64rr) {
+ // As seen in X86 td files, CmpValDefInstr.getOperand(1).getImm() is
+ // typically 0.
+ if (CmpValDefInstr.getOperand(1).getImm() != 0)
+ return false;
- // As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
- // sub_32bit or sub_xmm.
- if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
- return false;
+ // As seen in X86 td files, CmpValDefInstr.getOperand(3) is typically
+ // sub_32bit or sub_xmm.
+ if (CmpValDefInstr.getOperand(3).getImm() != X86::sub_32bit)
+ return false;
- MachineInstr *VregDefInstr =
- MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
+ VregDefInstr = MRI->getVRegDef(CmpValDefInstr.getOperand(2).getReg());
+ }
assert(VregDefInstr && "Must have a definition (SSA)");
@@ -1024,6 +1039,11 @@ static bool findRedundantFlagInstr(MachineInstr &CmpInstr,
// ... // EFLAGS not changed
// %extended_reg = subreg_to_reg 0, %reg, %subreg.sub_32bit
// test64rr %extended_reg, %extended_reg, implicit-def $eflags
+ // or
+ // %reg = and32* ...
+ // ... // EFLAGS not changed.
+ // %src_reg = copy %reg.sub_16bit:gr32
+ // test16rr %src_reg, %src_reg, implicit-def $eflags
//
// If subsequent readers use a subset of bits that don't change
// after `and*` instructions, it's likely that the test64rr could
@@ -1098,6 +1118,7 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
ImplicitOp.setImplicit();
NewSrc = getX86SubSuperRegister(SrcReg, 64);
+ assert(NewSrc.isValid() && "Invalid Operand");
assert(!Src.isUndef() && "Undef op doesn't need optimization");
} else {
// Virtual register of the wrong class, we have to create a temporary 64-bit
@@ -1204,9 +1225,7 @@ MachineInstr *X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
case X86::ADD8ri:
case X86::ADD8ri_DB:
case X86::ADD16ri:
- case X86::ADD16ri8:
case X86::ADD16ri_DB:
- case X86::ADD16ri8_DB:
addRegOffset(MIB, InRegLEA, true, MI.getOperand(2).getImm());
break;
case X86::ADD8rr:
@@ -1517,18 +1536,14 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
case X86::ADD16rr_DB:
return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
case X86::ADD64ri32:
- case X86::ADD64ri8:
case X86::ADD64ri32_DB:
- case X86::ADD64ri8_DB:
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
NewMI = addOffset(
BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r)).add(Dest).add(Src),
MI.getOperand(2));
break;
case X86::ADD32ri:
- case X86::ADD32ri8:
- case X86::ADD32ri_DB:
- case X86::ADD32ri8_DB: {
+ case X86::ADD32ri_DB: {
assert(MI.getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc = Is64Bit ? X86::LEA64_32r : X86::LEA32r;
@@ -1556,16 +1571,12 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
Is8BitOp = true;
[[fallthrough]];
case X86::ADD16ri:
- case X86::ADD16ri8:
case X86::ADD16ri_DB:
- case X86::ADD16ri8_DB:
return convertToThreeAddressWithLEA(MIOpc, MI, LV, LIS, Is8BitOp);
case X86::SUB8ri:
- case X86::SUB16ri8:
case X86::SUB16ri:
/// FIXME: Support these similar to ADD8ri/ADD16ri*.
return nullptr;
- case X86::SUB32ri8:
case X86::SUB32ri: {
if (!MI.getOperand(2).isImm())
return nullptr;
@@ -1596,7 +1607,6 @@ MachineInstr *X86InstrInfo::convertToThreeAddress(MachineInstr &MI,
break;
}
- case X86::SUB64ri8:
case X86::SUB64ri32: {
if (!MI.getOperand(2).isImm())
return nullptr;
@@ -2096,7 +2106,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
// "inline" and we don't override the insertion with a zero.
if (DstIdx == SrcIdx && (ZMask & (1 << DstIdx)) == 0 &&
llvm::popcount(ZMask) == 2) {
- unsigned AltIdx = findFirstSet((ZMask | (1 << DstIdx)) ^ 15);
+ unsigned AltIdx = llvm::countr_zero((ZMask | (1 << DstIdx)) ^ 15);
assert(AltIdx < 4 && "Illegal insertion index");
unsigned AltImm = (AltIdx << 6) | (AltIdx << 4) | ZMask;
auto &WorkingMI = cloneIfNew(MI);
@@ -2555,6 +2565,10 @@ bool X86InstrInfo::findCommutedOpIndices(const MachineInstr &MI,
case X86::VPDPWSSDrr:
case X86::VPDPWSSDSYrr:
case X86::VPDPWSSDSrr:
+ case X86::VPDPWUUDrr:
+ case X86::VPDPWUUDYrr:
+ case X86::VPDPWUUDSrr:
+ case X86::VPDPWUUDSYrr:
case X86::VPDPBSSDSrr:
case X86::VPDPBSSDSYrr:
case X86::VPDPBSSDrr:
@@ -3201,6 +3215,65 @@ bool X86InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
return AnalyzeBranchImpl(MBB, TBB, FBB, Cond, CondBranches, AllowModify);
}
+static int getJumpTableIndexFromAddr(const MachineInstr &MI) {
+ const MCInstrDesc &Desc = MI.getDesc();
+ int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
+ assert(MemRefBegin >= 0 && "instr should have memory operand");
+ MemRefBegin += X86II::getOperandBias(Desc);
+
+ const MachineOperand &MO = MI.getOperand(MemRefBegin + X86::AddrDisp);
+ if (!MO.isJTI())
+ return -1;
+
+ return MO.getIndex();
+}
+
+static int getJumpTableIndexFromReg(const MachineRegisterInfo &MRI,
+ Register Reg) {
+ if (!Reg.isVirtual())
+ return -1;
+ MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
+ if (MI == nullptr)
+ return -1;
+ unsigned Opcode = MI->getOpcode();
+ if (Opcode != X86::LEA64r && Opcode != X86::LEA32r)
+ return -1;
+ return getJumpTableIndexFromAddr(*MI);
+}
+
+int X86InstrInfo::getJumpTableIndex(const MachineInstr &MI) const {
+ unsigned Opcode = MI.getOpcode();
+ // Switch-jump pattern for non-PIC code looks like:
+ // JMP64m $noreg, 8, %X, %jump-table.X, $noreg
+ if (Opcode == X86::JMP64m || Opcode == X86::JMP32m) {
+ return getJumpTableIndexFromAddr(MI);
+ }
+ // The pattern for PIC code looks like:
+ // %0 = LEA64r $rip, 1, $noreg, %jump-table.X
+ // %1 = MOVSX64rm32 %0, 4, XX, 0, $noreg
+ // %2 = ADD64rr %1, %0
+ // JMP64r %2
+ if (Opcode == X86::JMP64r || Opcode == X86::JMP32r) {
+ Register Reg = MI.getOperand(0).getReg();
+ if (!Reg.isVirtual())
+ return -1;
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineInstr *Add = MRI.getUniqueVRegDef(Reg);
+ if (Add == nullptr)
+ return -1;
+ if (Add->getOpcode() != X86::ADD64rr && Add->getOpcode() != X86::ADD32rr)
+ return -1;
+ int JTI1 = getJumpTableIndexFromReg(MRI, Add->getOperand(1).getReg());
+ if (JTI1 >= 0)
+ return JTI1;
+ int JTI2 = getJumpTableIndexFromReg(MRI, Add->getOperand(2).getReg());
+ if (JTI2 >= 0)
+ return JTI2;
+ }
+ return -1;
+}
+
bool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
MachineBranchPredicate &MBP,
bool AllowModify) const {
@@ -3601,6 +3674,7 @@ static unsigned getLoadStoreRegOpcode(Register Reg,
bool HasAVX512 = STI.hasAVX512();
bool HasVLX = STI.hasVLX();
+ assert(RC != nullptr && "Invalid target register class");
switch (STI.getRegisterInfo()->getSpillSize(*RC)) {
default:
llvm_unreachable("Unknown spill size");
@@ -3977,11 +4051,8 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
switch (MI.getOpcode()) {
default: break;
case X86::CMP64ri32:
- case X86::CMP64ri8:
case X86::CMP32ri:
- case X86::CMP32ri8:
case X86::CMP16ri:
- case X86::CMP16ri8:
case X86::CMP8ri:
SrcReg = MI.getOperand(0).getReg();
SrcReg2 = 0;
@@ -4012,11 +4083,8 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
CmpValue = 0;
return true;
case X86::SUB64ri32:
- case X86::SUB64ri8:
case X86::SUB32ri:
- case X86::SUB32ri8:
case X86::SUB16ri:
- case X86::SUB16ri8:
case X86::SUB8ri:
SrcReg = MI.getOperand(1).getReg();
SrcReg2 = 0;
@@ -4084,18 +4152,12 @@ bool X86InstrInfo::isRedundantFlagInstr(const MachineInstr &FlagI,
return false;
}
case X86::CMP64ri32:
- case X86::CMP64ri8:
case X86::CMP32ri:
- case X86::CMP32ri8:
case X86::CMP16ri:
- case X86::CMP16ri8:
case X86::CMP8ri:
case X86::SUB64ri32:
- case X86::SUB64ri8:
case X86::SUB32ri:
- case X86::SUB32ri8:
case X86::SUB16ri:
- case X86::SUB16ri8:
case X86::SUB8ri:
case X86::TEST64rr:
case X86::TEST32rr:
@@ -4170,32 +4232,25 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
case X86::SHLD16rri8:case X86::SHLD32rri8:case X86::SHLD64rri8:
return getTruncatedShiftCount(MI, 3) != 0;
- case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB32ri:
- case X86::SUB32ri8: case X86::SUB16ri: case X86::SUB16ri8:
+ case X86::SUB64ri32: case X86::SUB32ri: case X86::SUB16ri:
case X86::SUB8ri: case X86::SUB64rr: case X86::SUB32rr:
case X86::SUB16rr: case X86::SUB8rr: case X86::SUB64rm:
case X86::SUB32rm: case X86::SUB16rm: case X86::SUB8rm:
case X86::DEC64r: case X86::DEC32r: case X86::DEC16r: case X86::DEC8r:
- case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD32ri:
- case X86::ADD32ri8: case X86::ADD16ri: case X86::ADD16ri8:
+ case X86::ADD64ri32: case X86::ADD32ri: case X86::ADD16ri:
case X86::ADD8ri: case X86::ADD64rr: case X86::ADD32rr:
case X86::ADD16rr: case X86::ADD8rr: case X86::ADD64rm:
case X86::ADD32rm: case X86::ADD16rm: case X86::ADD8rm:
case X86::INC64r: case X86::INC32r: case X86::INC16r: case X86::INC8r:
- case X86::ADC64ri32: case X86::ADC64ri8: case X86::ADC32ri:
- case X86::ADC32ri8: case X86::ADC16ri: case X86::ADC16ri8:
+ case X86::ADC64ri32: case X86::ADC32ri: case X86::ADC16ri:
case X86::ADC8ri: case X86::ADC64rr: case X86::ADC32rr:
case X86::ADC16rr: case X86::ADC8rr: case X86::ADC64rm:
case X86::ADC32rm: case X86::ADC16rm: case X86::ADC8rm:
- case X86::SBB64ri32: case X86::SBB64ri8: case X86::SBB32ri:
- case X86::SBB32ri8: case X86::SBB16ri: case X86::SBB16ri8:
+ case X86::SBB64ri32: case X86::SBB32ri: case X86::SBB16ri:
case X86::SBB8ri: case X86::SBB64rr: case X86::SBB32rr:
case X86::SBB16rr: case X86::SBB8rr: case X86::SBB64rm:
case X86::SBB32rm: case X86::SBB16rm: case X86::SBB8rm:
case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r:
- case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1:case X86::SAR64r1:
- case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1:case X86::SHR64r1:
- case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1:case X86::SHL64r1:
case X86::LZCNT16rr: case X86::LZCNT16rm:
case X86::LZCNT32rr: case X86::LZCNT32rm:
case X86::LZCNT64rr: case X86::LZCNT64rm:
@@ -4206,18 +4261,15 @@ inline static bool isDefConvertible(const MachineInstr &MI, bool &NoSignFlag,
case X86::TZCNT32rr: case X86::TZCNT32rm:
case X86::TZCNT64rr: case X86::TZCNT64rm:
return true;
- case X86::AND64ri32: case X86::AND64ri8: case X86::AND32ri:
- case X86::AND32ri8: case X86::AND16ri: case X86::AND16ri8:
+ case X86::AND64ri32: case X86::AND32ri: case X86::AND16ri:
case X86::AND8ri: case X86::AND64rr: case X86::AND32rr:
case X86::AND16rr: case X86::AND8rr: case X86::AND64rm:
case X86::AND32rm: case X86::AND16rm: case X86::AND8rm:
- case X86::XOR64ri32: case X86::XOR64ri8: case X86::XOR32ri:
- case X86::XOR32ri8: case X86::XOR16ri: case X86::XOR16ri8:
+ case X86::XOR64ri32: case X86::XOR32ri: case X86::XOR16ri:
case X86::XOR8ri: case X86::XOR64rr: case X86::XOR32rr:
case X86::XOR16rr: case X86::XOR8rr: case X86::XOR64rm:
case X86::XOR32rm: case X86::XOR16rm: case X86::XOR8rm:
- case X86::OR64ri32: case X86::OR64ri8: case X86::OR32ri:
- case X86::OR32ri8: case X86::OR16ri: case X86::OR16ri8:
+ case X86::OR64ri32: case X86::OR32ri: case X86::OR16ri:
case X86::OR8ri: case X86::OR64rr: case X86::OR32rr:
case X86::OR16rr: case X86::OR8rr: case X86::OR64rm:
case X86::OR32rm: case X86::OR16rm: case X86::OR8rm:
@@ -4316,11 +4368,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
switch (CmpInstr.getOpcode()) {
default: break;
case X86::SUB64ri32:
- case X86::SUB64ri8:
case X86::SUB32ri:
- case X86::SUB32ri8:
case X86::SUB16ri:
- case X86::SUB16ri8:
case X86::SUB8ri:
case X86::SUB64rm:
case X86::SUB32rm:
@@ -4345,11 +4394,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
case X86::SUB16rr: NewOpcode = X86::CMP16rr; break;
case X86::SUB8rr: NewOpcode = X86::CMP8rr; break;
case X86::SUB64ri32: NewOpcode = X86::CMP64ri32; break;
- case X86::SUB64ri8: NewOpcode = X86::CMP64ri8; break;
case X86::SUB32ri: NewOpcode = X86::CMP32ri; break;
- case X86::SUB32ri8: NewOpcode = X86::CMP32ri8; break;
case X86::SUB16ri: NewOpcode = X86::CMP16ri; break;
- case X86::SUB16ri8: NewOpcode = X86::CMP16ri8; break;
case X86::SUB8ri: NewOpcode = X86::CMP8ri; break;
}
CmpInstr.setDesc(get(NewOpcode));
@@ -4402,10 +4448,15 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,
break;
}
- // Look back for the following pattern, in which case the test64rr
- // instruction could be erased.
+ // Look back for the following pattern, in which case the
+ // test16rr/test64rr instruction could be erased.
//
- // Example:
+ // Example for test16rr:
+ // %reg = and32ri %in_reg, 5
+ // ... // EFLAGS not changed.
+ // %src_reg = copy %reg.sub_16bit:gr32
+ // test16rr %src_reg, %src_reg, implicit-def $eflags
+ // Example for test64rr:
// %reg = and32ri %in_reg, 5
// ... // EFLAGS not changed.
// %src_reg = subreg_to_reg 0, %reg, %subreg.sub_index
@@ -4793,14 +4844,14 @@ static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
// 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
// widen the register if necessary.
StackAdjustment = 8;
- BuildMI(MBB, I, DL, TII.get(X86::PUSH64i8)).addImm(Imm);
+ BuildMI(MBB, I, DL, TII.get(X86::PUSH64i32)).addImm(Imm);
MIB->setDesc(TII.get(X86::POP64r));
MIB->getOperand(0)
.setReg(getX86SubSuperRegister(MIB.getReg(0), 64));
} else {
assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
StackAdjustment = 4;
- BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm);
+ BuildMI(MBB, I, DL, TII.get(X86::PUSH32i)).addImm(Imm);
MIB->setDesc(TII.get(X86::POP32r));
}
MIB->removeOperand(1);
@@ -5060,6 +5111,45 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
return true;
}
+ case X86::RDFLAGS32:
+ case X86::RDFLAGS64: {
+ unsigned Is64Bit = MI.getOpcode() == X86::RDFLAGS64;
+ MachineBasicBlock &MBB = *MIB->getParent();
+
+ MachineInstr *NewMI =
+ BuildMI(MBB, MI, MIB->getDebugLoc(),
+ get(Is64Bit ? X86::PUSHF64 : X86::PUSHF32))
+ .getInstr();
+
+ // Permit reads of the EFLAGS and DF registers without them being defined.
+ // This intrinsic exists to read external processor state in flags, such as
+ // the trap flag, interrupt flag, and direction flag, none of which are
+ // modeled by the backend.
+ assert(NewMI->getOperand(2).getReg() == X86::EFLAGS &&
+ "Unexpected register in operand! Should be EFLAGS.");
+ NewMI->getOperand(2).setIsUndef();
+ assert(NewMI->getOperand(3).getReg() == X86::DF &&
+ "Unexpected register in operand! Should be DF.");
+ NewMI->getOperand(3).setIsUndef();
+
+ MIB->setDesc(get(Is64Bit ? X86::POP64r : X86::POP32r));
+ return true;
+ }
+
+ case X86::WRFLAGS32:
+ case X86::WRFLAGS64: {
+ unsigned Is64Bit = MI.getOpcode() == X86::WRFLAGS64;
+ MachineBasicBlock &MBB = *MIB->getParent();
+
+ BuildMI(MBB, MI, MIB->getDebugLoc(),
+ get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
+ .addReg(MI.getOperand(0).getReg());
+ BuildMI(MBB, MI, MIB->getDebugLoc(),
+ get(Is64Bit ? X86::POPF64 : X86::POPF32));
+ MI.eraseFromParent();
+ return true;
+ }
+
// KNL does not recognize dependency-breaking idioms for mask registers,
// so kxnor %k1, %k1, %k2 has a RAW dependence on %k1.
// Using %k0 as the undef input register is a performance heuristic based
@@ -5091,9 +5181,6 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
case X86::ADD16ri_DB: MIB->setDesc(get(X86::OR16ri)); break;
case X86::ADD32ri_DB: MIB->setDesc(get(X86::OR32ri)); break;
case X86::ADD64ri32_DB: MIB->setDesc(get(X86::OR64ri32)); break;
- case X86::ADD16ri8_DB: MIB->setDesc(get(X86::OR16ri8)); break;
- case X86::ADD32ri8_DB: MIB->setDesc(get(X86::OR32ri8)); break;
- case X86::ADD64ri8_DB: MIB->setDesc(get(X86::OR64ri8)); break;
}
return false;
}
@@ -6126,7 +6213,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
return nullptr;
// Don't fold loads into indirect calls that need a KCFI check as we'll
- // have to unfold these in X86KCFIPass anyway.
+ // have to unfold these in X86TargetLowering::EmitKCFICheck anyway.
if (MI.isCall() && MI.getCFIType())
return nullptr;
@@ -6165,9 +6252,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
isTwoAddrFold || (OpNum == 0 && I->Flags & TB_FOLDED_LOAD) || OpNum > 0;
bool FoldedStore =
isTwoAddrFold || (OpNum == 0 && I->Flags & TB_FOLDED_STORE);
- MaybeAlign MinAlign =
- decodeMaybeAlign((I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT);
- if (MinAlign && Alignment < *MinAlign)
+ if (Alignment < Align(1ULL << ((I->Flags & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT)))
return nullptr;
bool NarrowToMOV32rm = false;
if (Size) {
@@ -6314,9 +6399,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
switch (MI.getOpcode()) {
default: return nullptr;
case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
- case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
- case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
- case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break;
}
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
@@ -6693,9 +6778,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
switch (MI.getOpcode()) {
default: return nullptr;
case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
- case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
- case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
- case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
}
// Change to CMPXXri r, 0 first.
MI.setDesc(get(NewOpc));
@@ -6991,11 +7076,8 @@ bool X86InstrInfo::unfoldMemoryOperand(
switch (DataMI->getOpcode()) {
default: break;
case X86::CMP64ri32:
- case X86::CMP64ri8:
case X86::CMP32ri:
- case X86::CMP32ri8:
case X86::CMP16ri:
- case X86::CMP16ri8:
case X86::CMP8ri: {
MachineOperand &MO0 = DataMI->getOperand(0);
MachineOperand &MO1 = DataMI->getOperand(1);
@@ -7003,11 +7085,8 @@ bool X86InstrInfo::unfoldMemoryOperand(
unsigned NewOpc;
switch (DataMI->getOpcode()) {
default: llvm_unreachable("Unreachable!");
- case X86::CMP64ri8:
case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
- case X86::CMP32ri8:
case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
- case X86::CMP16ri8:
case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
}
@@ -7120,20 +7199,14 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
switch (Opc) {
default: break;
case X86::CMP64ri32:
- case X86::CMP64ri8:
case X86::CMP32ri:
- case X86::CMP32ri8:
case X86::CMP16ri:
- case X86::CMP16ri8:
case X86::CMP8ri:
if (isNullConstant(BeforeOps[1])) {
switch (Opc) {
default: llvm_unreachable("Unreachable!");
- case X86::CMP64ri8:
case X86::CMP64ri32: Opc = X86::TEST64rr; break;
- case X86::CMP32ri8:
case X86::CMP32ri: Opc = X86::TEST32rr; break;
- case X86::CMP16ri8:
case X86::CMP16ri: Opc = X86::TEST16rr; break;
case X86::CMP8ri: Opc = X86::TEST8rr; break;
}
@@ -9158,7 +9231,7 @@ X86InstrInfo::describeLoadedValue(const MachineInstr &MI, Register Reg) const {
DIExpression::appendOffset(Ops, Offset);
Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), Ops);
- return ParamLoadedValue(*Op, Expr);;
+ return ParamLoadedValue(*Op, Expr);
}
case X86::MOV8ri:
case X86::MOV16ri:
@@ -9226,7 +9299,7 @@ void X86InstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1,
// Propagate FP flags from the original instructions.
// But clear poison-generating flags because those may not be valid now.
// TODO: There should be a helper function for copying only fast-math-flags.
- uint16_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
+ uint32_t IntersectedFlags = OldMI1.getFlags() & OldMI2.getFlags();
NewMI1.setFlags(IntersectedFlags);
NewMI1.clearFlag(MachineInstr::MIFlag::NoSWrap);
NewMI1.clearFlag(MachineInstr::MIFlag::NoUWrap);
@@ -9558,7 +9631,8 @@ enum MachineOutlinerClass {
MachineOutlinerTailCall
};
-outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo(
+std::optional<outliner::OutlinedFunction>
+X86InstrInfo::getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
unsigned SequenceSize =
std::accumulate(RepeatedSequenceLocs[0].front(),
@@ -9591,7 +9665,7 @@ outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo(
C.getMF()->getFrameInstructions();
if (CFICount > 0 && CFICount != CFIInstructions.size())
- return outliner::OutlinedFunction();
+ return std::nullopt;
}
// FIXME: Use real size in bytes for call and ret instructions.
@@ -9606,7 +9680,7 @@ outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo(
}
if (CFICount > 0)
- return outliner::OutlinedFunction();
+ return std::nullopt;
for (outliner::Candidate &C : RepeatedSequenceLocs)
C.setCallInfo(MachineOutlinerDefault, 1);
@@ -9638,32 +9712,15 @@ bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF,
}
outliner::InstrType
-X86InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const {
+X86InstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const {
MachineInstr &MI = *MIT;
- // Don't allow debug values to impact outlining type.
- if (MI.isDebugInstr() || MI.isIndirectDebugValue())
- return outliner::InstrType::Invisible;
-
- // At this point, KILL instructions don't really tell us much so we can go
- // ahead and skip over them.
- if (MI.isKill())
- return outliner::InstrType::Invisible;
- // Is this a tail call? If yes, we can outline as a tail call.
- if (isTailCall(MI))
+ // Is this a terminator for a basic block?
+ if (MI.isTerminator())
+ // TargetInstrInfo::getOutliningType has already filtered out anything
+ // that would break this, so we can allow it here.
return outliner::InstrType::Legal;
- // Is this the terminator of a basic block?
- if (MI.isTerminator() || MI.isReturn()) {
-
- // Does its parent have any successors in its MachineFunction?
- if (MI.getParent()->succ_empty())
- return outliner::InstrType::Legal;
-
- // It does, so we can't tail call it.
- return outliner::InstrType::Illegal;
- }
-
// Don't outline anything that modifies or reads from the stack pointer.
//
// FIXME: There are instructions which are being manually built without
@@ -9684,16 +9741,10 @@ X86InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags
MI.getDesc().hasImplicitDefOfPhysReg(X86::RIP))
return outliner::InstrType::Illegal;
- // Positions can't safely be outlined.
- if (MI.isPosition())
+ // Don't outline CFI instructions.
+ if (MI.isCFIInstruction())
return outliner::InstrType::Illegal;
- // Make sure none of the operands of this instruction do anything tricky.
- for (const MachineOperand &MOP : MI.operands())
- if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
- MOP.isTargetIndex())
- return outliner::InstrType::Illegal;
-
return outliner::InstrType::Legal;
}
@@ -9732,5 +9783,141 @@ X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB,
return It;
}
+bool X86InstrInfo::getMachineCombinerPatterns(
+ MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const {
+ unsigned Opc = Root.getOpcode();
+ switch (Opc) {
+ default:
+ return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
+ DoRegPressureReduce);
+ case X86::VPDPWSSDrr:
+ case X86::VPDPWSSDrm:
+ case X86::VPDPWSSDYrr:
+ case X86::VPDPWSSDYrm: {
+ Patterns.push_back(MachineCombinerPattern::DPWSSD);
+ return true;
+ }
+ case X86::VPDPWSSDZ128r:
+ case X86::VPDPWSSDZ128m:
+ case X86::VPDPWSSDZ256r:
+ case X86::VPDPWSSDZ256m:
+ case X86::VPDPWSSDZr:
+ case X86::VPDPWSSDZm: {
+ if (Subtarget.hasBWI())
+ Patterns.push_back(MachineCombinerPattern::DPWSSD);
+ return true;
+ }
+ }
+}
+
+static void
+genAlternativeDpCodeSequence(MachineInstr &Root, const TargetInstrInfo &TII,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
+ MachineFunction *MF = Root.getMF();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+
+ unsigned Opc = Root.getOpcode();
+ unsigned AddOpc = 0;
+ unsigned MaddOpc = 0;
+ switch (Opc) {
+ default:
+ assert(false && "It should not reach here");
+ break;
+ // vpdpwssd xmm2,xmm3,xmm1
+ // -->
+ // vpmaddwd xmm3,xmm3,xmm1
+ // vpaddd xmm2,xmm2,xmm3
+ case X86::VPDPWSSDrr:
+ MaddOpc = X86::VPMADDWDrr;
+ AddOpc = X86::VPADDDrr;
+ break;
+ case X86::VPDPWSSDrm:
+ MaddOpc = X86::VPMADDWDrm;
+ AddOpc = X86::VPADDDrr;
+ break;
+ case X86::VPDPWSSDZ128r:
+ MaddOpc = X86::VPMADDWDZ128rr;
+ AddOpc = X86::VPADDDZ128rr;
+ break;
+ case X86::VPDPWSSDZ128m:
+ MaddOpc = X86::VPMADDWDZ128rm;
+ AddOpc = X86::VPADDDZ128rr;
+ break;
+ // vpdpwssd ymm2,ymm3,ymm1
+ // -->
+ // vpmaddwd ymm3,ymm3,ymm1
+ // vpaddd ymm2,ymm2,ymm3
+ case X86::VPDPWSSDYrr:
+ MaddOpc = X86::VPMADDWDYrr;
+ AddOpc = X86::VPADDDYrr;
+ break;
+ case X86::VPDPWSSDYrm:
+ MaddOpc = X86::VPMADDWDYrm;
+ AddOpc = X86::VPADDDYrr;
+ break;
+ case X86::VPDPWSSDZ256r:
+ MaddOpc = X86::VPMADDWDZ256rr;
+ AddOpc = X86::VPADDDZ256rr;
+ break;
+ case X86::VPDPWSSDZ256m:
+ MaddOpc = X86::VPMADDWDZ256rm;
+ AddOpc = X86::VPADDDZ256rr;
+ break;
+ // vpdpwssd zmm2,zmm3,zmm1
+ // -->
+ // vpmaddwd zmm3,zmm3,zmm1
+ // vpaddd zmm2,zmm2,zmm3
+ case X86::VPDPWSSDZr:
+ MaddOpc = X86::VPMADDWDZrr;
+ AddOpc = X86::VPADDDZrr;
+ break;
+ case X86::VPDPWSSDZm:
+ MaddOpc = X86::VPMADDWDZrm;
+ AddOpc = X86::VPADDDZrr;
+ break;
+ }
+ // Create vpmaddwd.
+ const TargetRegisterClass *RC =
+ RegInfo.getRegClass(Root.getOperand(0).getReg());
+ Register NewReg = RegInfo.createVirtualRegister(RC);
+ MachineInstr *Madd = Root.getMF()->CloneMachineInstr(&Root);
+ Madd->setDesc(TII.get(MaddOpc));
+ Madd->untieRegOperand(1);
+ Madd->removeOperand(1);
+ Madd->getOperand(0).setReg(NewReg);
+ InstrIdxForVirtReg.insert(std::make_pair(NewReg, 0));
+ // Create vpaddd.
+ Register DstReg = Root.getOperand(0).getReg();
+ bool IsKill = Root.getOperand(1).isKill();
+ MachineInstr *Add =
+ BuildMI(*MF, MIMetadata(Root), TII.get(AddOpc), DstReg)
+ .addReg(Root.getOperand(1).getReg(), getKillRegState(IsKill))
+ .addReg(Madd->getOperand(0).getReg(), getKillRegState(true));
+ InsInstrs.push_back(Madd);
+ InsInstrs.push_back(Add);
+ DelInstrs.push_back(&Root);
+}
+
+void X86InstrInfo::genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
+ switch (Pattern) {
+ default:
+ // Reassociate instructions.
+ TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
+ DelInstrs, InstrIdxForVirtReg);
+ return;
+ case MachineCombinerPattern::DPWSSD:
+ genAlternativeDpCodeSequence(Root, *this, InsInstrs, DelInstrs,
+ InstrIdxForVirtReg);
+ return;
+ }
+}
+
#define GET_INSTRINFO_HELPERS
#include "X86GenInstrInfo.inc"
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 02cb5f761747..82554032ebd6 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -327,6 +327,8 @@ public:
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const override;
+ int getJumpTableIndex(const MachineInstr &MI) const override;
+
std::optional<ExtAddrMode>
getAddrModeFromMemoryOp(const MachineInstr &MemI,
const TargetRegisterInfo *TRI) const override;
@@ -551,14 +553,14 @@ public:
ArrayRef<std::pair<unsigned, const char *>>
getSerializableDirectMachineOperandTargetFlags() const override;
- outliner::OutlinedFunction getOutliningCandidateInfo(
+ std::optional<outliner::OutlinedFunction> getOutliningCandidateInfo(
std::vector<outliner::Candidate> &RepeatedSequenceLocs) const override;
bool isFunctionSafeToOutlineFrom(MachineFunction &MF,
bool OutlineFromLinkOnceODRs) const override;
outliner::InstrType
- getOutliningType(MachineBasicBlock::iterator &MIT, unsigned Flags) const override;
+ getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override;
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF,
const outliner::OutlinedFunction &OF) const override;
@@ -602,6 +604,34 @@ protected:
std::optional<DestSourcePair>
isCopyInstrImpl(const MachineInstr &MI) const override;
+ /// Return true when there is potentially a faster code sequence for an
+ /// instruction chain ending in \p Root. All potential patterns are listed in
+ /// the \p Pattern vector. Pattern should be sorted in priority order since
+ /// the pattern evaluator stops checking as soon as it finds a faster
+ /// sequence.
+ bool
+ getMachineCombinerPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns,
+ bool DoRegPressureReduce) const override;
+
+ /// When getMachineCombinerPatterns() finds potential patterns,
+ /// this function generates the instructions that could replace the
+ /// original code sequence.
+ void genAlternativeCodeSequence(
+ MachineInstr &Root, MachineCombinerPattern Pattern,
+ SmallVectorImpl<MachineInstr *> &InsInstrs,
+ SmallVectorImpl<MachineInstr *> &DelInstrs,
+ DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const override;
+
+ /// When calculate the latency of the root instruction, accumulate the
+ /// latency of the sequence to the root latency.
+ /// \param Root - Instruction that could be combined with one of its operands
+ /// For X86 instruction (vpmaddwd + vpmaddwd) -> vpdpwssd, the vpmaddwd
+ /// is not in the critical path, so the root latency only include vpmaddwd.
+ bool accumulateInstrSeqToRootLatency(MachineInstr &Root) const override {
+ return false;
+ }
+
private:
/// This is a helper for convertToThreeAddress for 8 and 16-bit instructions.
/// We use 32-bit LEA to form 3-address code by promoting to a 32-bit
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index f26b6d7a588a..08e6e4e0627b 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -1,4 +1,4 @@
-//===-- X86InstrInfo.td - Main X86 Instruction Definition --*- tablegen -*-===//
+//===-- X86InstrInfo.td - Main X86 Instruction Properties --*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -6,9 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file describes the X86 instruction set, defining the instructions, and
-// properties of the instructions which are needed for code generation, machine
-// code emission, and analysis.
+// This file describes the X86 properties of the instructions which are needed
+// for code generation, machine code emission, and analysis.
//
//===----------------------------------------------------------------------===//
@@ -174,7 +173,7 @@ def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86cas16pair,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore,
SDNPMayLoad, SDNPMemOperand]>;
-def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret,
+def X86retglue : SDNode<"X86ISD::RET_GLUE", SDTX86Ret,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def X86iret : SDNode<"X86ISD::IRET", SDTX86Ret,
[SDNPHasChain, SDNPOptInGlue]>;
@@ -381,6 +380,9 @@ let RenderMethod = "addMemOperands", SuperClasses = [X86MemAsmOperand] in {
def X86Mem512_RC256XOperand : AsmOperandClass { let Name = "Mem512_RC256X"; }
def X86Mem256_RC512Operand : AsmOperandClass { let Name = "Mem256_RC512"; }
def X86Mem512_RC512Operand : AsmOperandClass { let Name = "Mem512_RC512"; }
+ def X86Mem512_GR16Operand : AsmOperandClass { let Name = "Mem512_GR16"; }
+ def X86Mem512_GR32Operand : AsmOperandClass { let Name = "Mem512_GR32"; }
+ def X86Mem512_GR64Operand : AsmOperandClass { let Name = "Mem512_GR64"; }
def X86SibMemOperand : AsmOperandClass { let Name = "SibMem"; }
}
@@ -433,6 +435,11 @@ def f128mem : X86MemOperand<"printxmmwordmem", X86Mem128AsmOperand, 128>;
def f256mem : X86MemOperand<"printymmwordmem", X86Mem256AsmOperand, 256>;
def f512mem : X86MemOperand<"printzmmwordmem", X86Mem512AsmOperand, 512>;
+// 32/64 mode specific mem operands
+def i512mem_GR16 : X86MemOperand<"printzmmwordmem", X86Mem512_GR16Operand, 512>;
+def i512mem_GR32 : X86MemOperand<"printzmmwordmem", X86Mem512_GR32Operand, 512>;
+def i512mem_GR64 : X86MemOperand<"printzmmwordmem", X86Mem512_GR64Operand, 512>;
+
// Gather mem operands
def vx64mem : X86VMemOperand<VR128, "printqwordmem", X86Mem64_RC128Operand, 64>;
def vx128mem : X86VMemOperand<VR128, "printxmmwordmem", X86Mem128_RC128Operand, 128>;
@@ -917,6 +924,7 @@ def HasVNNI : Predicate<"Subtarget->hasVNNI()">;
def HasVP2INTERSECT : Predicate<"Subtarget->hasVP2INTERSECT()">;
def HasBF16 : Predicate<"Subtarget->hasBF16()">;
def HasFP16 : Predicate<"Subtarget->hasFP16()">;
+def HasAVXVNNIINT16 : Predicate<"Subtarget->hasAVXVNNIINT16()">;
def HasAVXVNNIINT8 : Predicate<"Subtarget->hasAVXVNNIINT8()">;
def HasAVXVNNI : Predicate <"Subtarget->hasAVXVNNI()">;
def NoVLX_Or_NoVNNI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasVNNI()">;
@@ -960,7 +968,9 @@ def NoVLX_Or_NoIFMA : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasIFMA()">
def HasRTM : Predicate<"Subtarget->hasRTM()">;
def HasADX : Predicate<"Subtarget->hasADX()">;
def HasSHA : Predicate<"Subtarget->hasSHA()">;
+def HasSHA512 : Predicate<"Subtarget->hasSHA512()">;
def HasSGX : Predicate<"Subtarget->hasSGX()">;
+def HasSM3 : Predicate<"Subtarget->hasSM3()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
@@ -979,6 +989,7 @@ def HasPTWRITE : Predicate<"Subtarget->hasPTWRITE()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def HasSHSTK : Predicate<"Subtarget->hasSHSTK()">;
+def HasSM4 : Predicate<"Subtarget->hasSM4()">;
def HasCLFLUSH : Predicate<"Subtarget->hasCLFLUSH()">;
def HasCLFLUSHOPT : Predicate<"Subtarget->hasCLFLUSHOPT()">;
def HasCLWB : Predicate<"Subtarget->hasCLWB()">;
@@ -1003,6 +1014,7 @@ def HasTSXLDTRK : Predicate<"Subtarget->hasTSXLDTRK()">;
def HasAMXTILE : Predicate<"Subtarget->hasAMXTILE()">;
def HasAMXBF16 : Predicate<"Subtarget->hasAMXBF16()">;
def HasAMXINT8 : Predicate<"Subtarget->hasAMXINT8()">;
+def HasAMXCOMPLEX : Predicate<"Subtarget->hasAMXCOMPLEX()">;
def HasUINTR : Predicate<"Subtarget->hasUINTR()">;
def HasCRC32 : Predicate<"Subtarget->hasCRC32()">;
@@ -1263,1950 +1275,146 @@ def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (unindexedload node:$ptr)), [
return LD->getAlign() >= 4 && LD->isSimple();
}]>;
-
-// An 'and' node with a single use.
-def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
- return N->hasOneUse();
-}]>;
-// An 'srl' node with a single use.
-def srl_su : PatFrag<(ops node:$lhs, node:$rhs), (srl node:$lhs, node:$rhs), [{
+// binary op with only one user
+class binop_oneuse<SDPatternOperator operator>
+ : PatFrag<(ops node:$A, node:$B),
+ (operator node:$A, node:$B), [{
return N->hasOneUse();
}]>;
-// An 'trunc' node with a single use.
-def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
- return N->hasOneUse();
-}]>;
-
-//===----------------------------------------------------------------------===//
-// Instruction list.
-//
-
-// Nop
-let hasSideEffects = 0, SchedRW = [WriteNop] in {
- def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
- def NOOPW : I<0x1f, MRMXm, (outs), (ins i16mem:$zero),
- "nop{w}\t$zero", []>, TB, OpSize16, NotMemoryFoldable;
- def NOOPL : I<0x1f, MRMXm, (outs), (ins i32mem:$zero),
- "nop{l}\t$zero", []>, TB, OpSize32, NotMemoryFoldable;
- def NOOPQ : RI<0x1f, MRMXm, (outs), (ins i64mem:$zero),
- "nop{q}\t$zero", []>, TB, NotMemoryFoldable,
- Requires<[In64BitMode]>;
- // Also allow register so we can assemble/disassemble
- def NOOPWr : I<0x1f, MRMXr, (outs), (ins GR16:$zero),
- "nop{w}\t$zero", []>, TB, OpSize16, NotMemoryFoldable;
- def NOOPLr : I<0x1f, MRMXr, (outs), (ins GR32:$zero),
- "nop{l}\t$zero", []>, TB, OpSize32, NotMemoryFoldable;
- def NOOPQr : RI<0x1f, MRMXr, (outs), (ins GR64:$zero),
- "nop{q}\t$zero", []>, TB, NotMemoryFoldable,
- Requires<[In64BitMode]>;
-}
-
-
-// Constructing a stack frame.
-def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
- "enter\t$len, $lvl", []>, Sched<[WriteMicrocoded]>;
-
-let SchedRW = [WriteALU] in {
-let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, hasSideEffects=0 in
-def LEAVE : I<0xC9, RawFrm, (outs), (ins), "leave", []>,
- Requires<[Not64BitMode]>;
-
-let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, hasSideEffects = 0 in
-def LEAVE64 : I<0xC9, RawFrm, (outs), (ins), "leave", []>,
- Requires<[In64BitMode]>;
-} // SchedRW
-
-//===----------------------------------------------------------------------===//
-// Miscellaneous Instructions.
-//
-
-let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1,
- SchedRW = [WriteSystem] in
- def Int_eh_sjlj_setup_dispatch
- : PseudoI<(outs), (ins), [(X86eh_sjlj_setup_dispatch)]>;
-
-let Defs = [ESP], Uses = [ESP], hasSideEffects=0 in {
-let mayLoad = 1, SchedRW = [WriteLoad] in {
-def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
- OpSize16;
-def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>,
- OpSize32, Requires<[Not64BitMode]>;
-// Long form for the disassembler.
-let isCodeGenOnly = 1, ForceDisassemble = 1 in {
-def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
- OpSize16, NotMemoryFoldable;
-def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>,
- OpSize32, Requires<[Not64BitMode]>, NotMemoryFoldable;
-} // isCodeGenOnly = 1, ForceDisassemble = 1
-} // mayLoad, SchedRW
-let mayStore = 1, mayLoad = 1, SchedRW = [WriteCopy] in {
-def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", []>,
- OpSize16;
-def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", []>,
- OpSize32, Requires<[Not64BitMode]>;
-} // mayStore, mayLoad, SchedRW
-
-let mayStore = 1, SchedRW = [WriteStore] in {
-def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
- OpSize16;
-def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>,
- OpSize32, Requires<[Not64BitMode]>;
-// Long form for the disassembler.
-let isCodeGenOnly = 1, ForceDisassemble = 1 in {
-def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
- OpSize16, NotMemoryFoldable;
-def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>,
- OpSize32, Requires<[Not64BitMode]>, NotMemoryFoldable;
-} // isCodeGenOnly = 1, ForceDisassemble = 1
-
-def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm),
- "push{w}\t$imm", []>, OpSize16;
-def PUSHi16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
- "push{w}\t$imm", []>, OpSize16;
-
-def PUSH32i8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
- "push{l}\t$imm", []>, OpSize32,
- Requires<[Not64BitMode]>;
-def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
- "push{l}\t$imm", []>, OpSize32,
- Requires<[Not64BitMode]>;
-} // mayStore, SchedRW
-
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
-def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src", []>,
- OpSize16;
-def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src", []>,
- OpSize32, Requires<[Not64BitMode]>;
-} // mayLoad, mayStore, SchedRW
-
-}
-
-let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
- SchedRW = [WriteRMW], Defs = [ESP] in {
- let Uses = [ESP] in
- def RDFLAGS32 : PseudoI<(outs GR32:$dst), (ins),
- [(set GR32:$dst, (int_x86_flags_read_u32))]>,
- Requires<[Not64BitMode]>;
-
- let Uses = [RSP] in
- def RDFLAGS64 : PseudoI<(outs GR64:$dst), (ins),
- [(set GR64:$dst, (int_x86_flags_read_u64))]>,
- Requires<[In64BitMode]>;
-}
-
-let mayLoad = 1, mayStore = 1, usesCustomInserter = 1,
- SchedRW = [WriteRMW] in {
- let Defs = [ESP, EFLAGS, DF], Uses = [ESP] in
- def WRFLAGS32 : PseudoI<(outs), (ins GR32:$src),
- [(int_x86_flags_write_u32 GR32:$src)]>,
- Requires<[Not64BitMode]>;
-
- let Defs = [RSP, EFLAGS, DF], Uses = [RSP] in
- def WRFLAGS64 : PseudoI<(outs), (ins GR64:$src),
- [(int_x86_flags_write_u64 GR64:$src)]>,
- Requires<[In64BitMode]>;
-}
-
-let Defs = [ESP, EFLAGS, DF], Uses = [ESP], mayLoad = 1, hasSideEffects=0,
- SchedRW = [WriteLoad] in {
-def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize16;
-def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>, OpSize32,
- Requires<[Not64BitMode]>;
-}
-
-let Defs = [ESP], Uses = [ESP, EFLAGS, DF], mayStore = 1, hasSideEffects=0,
- SchedRW = [WriteStore] in {
-def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize16;
-def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>, OpSize32,
- Requires<[Not64BitMode]>;
-}
-
-let Defs = [RSP], Uses = [RSP], hasSideEffects=0 in {
-let mayLoad = 1, SchedRW = [WriteLoad] in {
-def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>,
- OpSize32, Requires<[In64BitMode]>;
-// Long form for the disassembler.
-let isCodeGenOnly = 1, ForceDisassemble = 1 in {
-def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>,
- OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
-} // isCodeGenOnly = 1, ForceDisassemble = 1
-} // mayLoad, SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in
-def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", []>,
- OpSize32, Requires<[In64BitMode]>;
-let mayStore = 1, SchedRW = [WriteStore] in {
-def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", []>,
- OpSize32, Requires<[In64BitMode]>;
-// Long form for the disassembler.
-let isCodeGenOnly = 1, ForceDisassemble = 1 in {
-def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>,
- OpSize32, Requires<[In64BitMode]>, NotMemoryFoldable;
-} // isCodeGenOnly = 1, ForceDisassemble = 1
-} // mayStore, SchedRW
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
-def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>,
- OpSize32, Requires<[In64BitMode]>;
-} // mayLoad, mayStore, SchedRW
-}
-
-let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1,
- SchedRW = [WriteStore] in {
-def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
- "push{q}\t$imm", []>, OpSize32,
- Requires<[In64BitMode]>;
-def PUSH64i32 : Ii32S<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
- "push{q}\t$imm", []>, OpSize32,
- Requires<[In64BitMode]>;
-}
-
-let Defs = [RSP, EFLAGS, DF], Uses = [RSP], mayLoad = 1, hasSideEffects=0 in
-def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
- OpSize32, Requires<[In64BitMode]>, Sched<[WriteLoad]>;
-let Defs = [RSP], Uses = [RSP, EFLAGS, DF], mayStore = 1, hasSideEffects=0 in
-def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
- OpSize32, Requires<[In64BitMode]>, Sched<[WriteStore]>;
-
-let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
- mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteLoad] in {
-def POPA32 : I<0x61, RawFrm, (outs), (ins), "popal", []>,
- OpSize32, Requires<[Not64BitMode]>;
-def POPA16 : I<0x61, RawFrm, (outs), (ins), "popaw", []>,
- OpSize16, Requires<[Not64BitMode]>;
-}
-let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
- mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in {
-def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pushal", []>,
- OpSize32, Requires<[Not64BitMode]>;
-def PUSHA16 : I<0x60, RawFrm, (outs), (ins), "pushaw", []>,
- OpSize16, Requires<[Not64BitMode]>;
-}
-
-let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32] in {
-// This instruction is a consequence of BSWAP32r observing operand size. The
-// encoding is valid, but the behavior is undefined.
-let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
-def BSWAP16r_BAD : I<0xC8, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
- "bswap{w}\t$dst", []>, OpSize16, TB;
-// GR32 = bswap GR32
-def BSWAP32r : I<0xC8, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
- "bswap{l}\t$dst",
- [(set GR32:$dst, (bswap GR32:$src))]>, OpSize32, TB;
-
-let SchedRW = [WriteBSWAP64] in
-def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
- "bswap{q}\t$dst",
- [(set GR64:$dst, (bswap GR64:$src))]>, TB;
-} // Constraints = "$src = $dst", SchedRW
-
-// Bit scan instructions.
-let Defs = [EFLAGS] in {
-def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "bsf{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>,
- PS, OpSize16, Sched<[WriteBSF]>;
-def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "bsf{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>,
- PS, OpSize16, Sched<[WriteBSFLd]>;
-def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "bsf{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>,
- PS, OpSize32, Sched<[WriteBSF]>;
-def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "bsf{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>,
- PS, OpSize32, Sched<[WriteBSFLd]>;
-def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "bsf{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>,
- PS, Sched<[WriteBSF]>;
-def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "bsf{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>,
- PS, Sched<[WriteBSFLd]>;
-
-def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "bsr{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>,
- PS, OpSize16, Sched<[WriteBSR]>;
-def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "bsr{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>,
- PS, OpSize16, Sched<[WriteBSRLd]>;
-def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "bsr{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>,
- PS, OpSize32, Sched<[WriteBSR]>;
-def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "bsr{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>,
- PS, OpSize32, Sched<[WriteBSRLd]>;
-def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>,
- PS, Sched<[WriteBSR]>;
-def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>,
- PS, Sched<[WriteBSRLd]>;
-} // Defs = [EFLAGS]
-
-let SchedRW = [WriteMicrocoded] in {
-let Defs = [EDI,ESI], Uses = [EDI,ESI,DF] in {
-def MOVSB : I<0xA4, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src),
- "movsb\t{$src, $dst|$dst, $src}", []>;
-def MOVSW : I<0xA5, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src),
- "movsw\t{$src, $dst|$dst, $src}", []>, OpSize16;
-def MOVSL : I<0xA5, RawFrmDstSrc, (outs), (ins dstidx32:$dst, srcidx32:$src),
- "movs{l|d}\t{$src, $dst|$dst, $src}", []>, OpSize32;
-def MOVSQ : RI<0xA5, RawFrmDstSrc, (outs), (ins dstidx64:$dst, srcidx64:$src),
- "movsq\t{$src, $dst|$dst, $src}", []>,
- Requires<[In64BitMode]>;
-}
-
-let Defs = [EDI], Uses = [AL,EDI,DF] in
-def STOSB : I<0xAA, RawFrmDst, (outs), (ins dstidx8:$dst),
- "stosb\t{%al, $dst|$dst, al}", []>;
-let Defs = [EDI], Uses = [AX,EDI,DF] in
-def STOSW : I<0xAB, RawFrmDst, (outs), (ins dstidx16:$dst),
- "stosw\t{%ax, $dst|$dst, ax}", []>, OpSize16;
-let Defs = [EDI], Uses = [EAX,EDI,DF] in
-def STOSL : I<0xAB, RawFrmDst, (outs), (ins dstidx32:$dst),
- "stos{l|d}\t{%eax, $dst|$dst, eax}", []>, OpSize32;
-let Defs = [RDI], Uses = [RAX,RDI,DF] in
-def STOSQ : RI<0xAB, RawFrmDst, (outs), (ins dstidx64:$dst),
- "stosq\t{%rax, $dst|$dst, rax}", []>,
- Requires<[In64BitMode]>;
-
-let Defs = [EDI,EFLAGS], Uses = [AL,EDI,DF] in
-def SCASB : I<0xAE, RawFrmDst, (outs), (ins dstidx8:$dst),
- "scasb\t{$dst, %al|al, $dst}", []>;
-let Defs = [EDI,EFLAGS], Uses = [AX,EDI,DF] in
-def SCASW : I<0xAF, RawFrmDst, (outs), (ins dstidx16:$dst),
- "scasw\t{$dst, %ax|ax, $dst}", []>, OpSize16;
-let Defs = [EDI,EFLAGS], Uses = [EAX,EDI,DF] in
-def SCASL : I<0xAF, RawFrmDst, (outs), (ins dstidx32:$dst),
- "scas{l|d}\t{$dst, %eax|eax, $dst}", []>, OpSize32;
-let Defs = [EDI,EFLAGS], Uses = [RAX,EDI,DF] in
-def SCASQ : RI<0xAF, RawFrmDst, (outs), (ins dstidx64:$dst),
- "scasq\t{$dst, %rax|rax, $dst}", []>,
- Requires<[In64BitMode]>;
-
-let Defs = [EDI,ESI,EFLAGS], Uses = [EDI,ESI,DF] in {
-def CMPSB : I<0xA6, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src),
- "cmpsb\t{$dst, $src|$src, $dst}", []>;
-def CMPSW : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src),
- "cmpsw\t{$dst, $src|$src, $dst}", []>, OpSize16;
-def CMPSL : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx32:$dst, srcidx32:$src),
- "cmps{l|d}\t{$dst, $src|$src, $dst}", []>, OpSize32;
-def CMPSQ : RI<0xA7, RawFrmDstSrc, (outs), (ins dstidx64:$dst, srcidx64:$src),
- "cmpsq\t{$dst, $src|$src, $dst}", []>,
- Requires<[In64BitMode]>;
-}
-} // SchedRW
-
-//===----------------------------------------------------------------------===//
-// Move Instructions.
-//
-let SchedRW = [WriteMove] in {
-let hasSideEffects = 0, isMoveReg = 1 in {
-def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
- "mov{b}\t{$src, $dst|$dst, $src}", []>;
-def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
-def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
-def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>;
-}
-
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
-def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
- "mov{b}\t{$src, $dst|$dst, $src}",
- [(set GR8:$dst, imm:$src)]>;
-def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
- "mov{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, imm:$src)]>, OpSize16;
-def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
- "mov{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, imm:$src)]>, OpSize32;
-def MOV64ri32 : RIi32S<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, i64immSExt32:$src)]>;
-}
-let isReMaterializable = 1, isMoveImm = 1 in {
-def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
- "movabs{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, imm:$src)]>;
-}
-
-// Longer forms that use a ModR/M byte. Needed for disassembler
-let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
-def MOV8ri_alt : Ii8 <0xC6, MRM0r, (outs GR8 :$dst), (ins i8imm :$src),
- "mov{b}\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MOV8ri">;
-def MOV16ri_alt : Ii16<0xC7, MRM0r, (outs GR16:$dst), (ins i16imm:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16,
- FoldGenData<"MOV16ri">;
-def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32,
- FoldGenData<"MOV32ri">;
-}
-} // SchedRW
-
-let SchedRW = [WriteStore] in {
-def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
- "mov{b}\t{$src, $dst|$dst, $src}",
- [(store (i8 imm_su:$src), addr:$dst)]>;
-def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
- "mov{w}\t{$src, $dst|$dst, $src}",
- [(store (i16 imm_su:$src), addr:$dst)]>, OpSize16;
-def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
- "mov{l}\t{$src, $dst|$dst, $src}",
- [(store (i32 imm_su:$src), addr:$dst)]>, OpSize32;
-def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- [(store i64immSExt32_su:$src, addr:$dst)]>,
- Requires<[In64BitMode]>;
-} // SchedRW
-
-def : Pat<(i32 relocImm:$src), (MOV32ri relocImm:$src)>;
-def : Pat<(i64 relocImm:$src), (MOV64ri relocImm:$src)>;
-
-def : Pat<(store (i8 relocImm8_su:$src), addr:$dst),
- (MOV8mi addr:$dst, relocImm8_su:$src)>;
-def : Pat<(store (i16 relocImm16_su:$src), addr:$dst),
- (MOV16mi addr:$dst, relocImm16_su:$src)>;
-def : Pat<(store (i32 relocImm32_su:$src), addr:$dst),
- (MOV32mi addr:$dst, relocImm32_su:$src)>;
-def : Pat<(store (i64 i64relocImmSExt32_su:$src), addr:$dst),
- (MOV64mi32 addr:$dst, i64immSExt32_su:$src)>;
-
-let hasSideEffects = 0 in {
-
-/// Memory offset versions of moves. The immediate is an address mode sized
-/// offset from the segment base.
-let SchedRW = [WriteALU] in {
-let mayLoad = 1 in {
-let Defs = [AL] in
-def MOV8ao32 : Ii32<0xA0, RawFrmMemOffs, (outs), (ins offset32_8:$src),
- "mov{b}\t{$src, %al|al, $src}", []>,
- AdSize32;
-let Defs = [AX] in
-def MOV16ao32 : Ii32<0xA1, RawFrmMemOffs, (outs), (ins offset32_16:$src),
- "mov{w}\t{$src, %ax|ax, $src}", []>,
- OpSize16, AdSize32;
-let Defs = [EAX] in
-def MOV32ao32 : Ii32<0xA1, RawFrmMemOffs, (outs), (ins offset32_32:$src),
- "mov{l}\t{$src, %eax|eax, $src}", []>,
- OpSize32, AdSize32;
-let Defs = [RAX] in
-def MOV64ao32 : RIi32<0xA1, RawFrmMemOffs, (outs), (ins offset32_64:$src),
- "mov{q}\t{$src, %rax|rax, $src}", []>,
- AdSize32;
-
-let Defs = [AL] in
-def MOV8ao16 : Ii16<0xA0, RawFrmMemOffs, (outs), (ins offset16_8:$src),
- "mov{b}\t{$src, %al|al, $src}", []>, AdSize16;
-let Defs = [AX] in
-def MOV16ao16 : Ii16<0xA1, RawFrmMemOffs, (outs), (ins offset16_16:$src),
- "mov{w}\t{$src, %ax|ax, $src}", []>,
- OpSize16, AdSize16;
-let Defs = [EAX] in
-def MOV32ao16 : Ii16<0xA1, RawFrmMemOffs, (outs), (ins offset16_32:$src),
- "mov{l}\t{$src, %eax|eax, $src}", []>,
- AdSize16, OpSize32;
-} // mayLoad
-let mayStore = 1 in {
-let Uses = [AL] in
-def MOV8o32a : Ii32<0xA2, RawFrmMemOffs, (outs), (ins offset32_8:$dst),
- "mov{b}\t{%al, $dst|$dst, al}", []>, AdSize32;
-let Uses = [AX] in
-def MOV16o32a : Ii32<0xA3, RawFrmMemOffs, (outs), (ins offset32_16:$dst),
- "mov{w}\t{%ax, $dst|$dst, ax}", []>,
- OpSize16, AdSize32;
-let Uses = [EAX] in
-def MOV32o32a : Ii32<0xA3, RawFrmMemOffs, (outs), (ins offset32_32:$dst),
- "mov{l}\t{%eax, $dst|$dst, eax}", []>,
- OpSize32, AdSize32;
-let Uses = [RAX] in
-def MOV64o32a : RIi32<0xA3, RawFrmMemOffs, (outs), (ins offset32_64:$dst),
- "mov{q}\t{%rax, $dst|$dst, rax}", []>,
- AdSize32;
-
-let Uses = [AL] in
-def MOV8o16a : Ii16<0xA2, RawFrmMemOffs, (outs), (ins offset16_8:$dst),
- "mov{b}\t{%al, $dst|$dst, al}", []>, AdSize16;
-let Uses = [AX] in
-def MOV16o16a : Ii16<0xA3, RawFrmMemOffs, (outs), (ins offset16_16:$dst),
- "mov{w}\t{%ax, $dst|$dst, ax}", []>,
- OpSize16, AdSize16;
-let Uses = [EAX] in
-def MOV32o16a : Ii16<0xA3, RawFrmMemOffs, (outs), (ins offset16_32:$dst),
- "mov{l}\t{%eax, $dst|$dst, eax}", []>,
- OpSize32, AdSize16;
-} // mayStore
-
-// These forms all have full 64-bit absolute addresses in their instructions
-// and use the movabs mnemonic to indicate this specific form.
-let mayLoad = 1 in {
-let Defs = [AL] in
-def MOV8ao64 : Ii64<0xA0, RawFrmMemOffs, (outs), (ins offset64_8:$src),
- "movabs{b}\t{$src, %al|al, $src}", []>,
- AdSize64;
-let Defs = [AX] in
-def MOV16ao64 : Ii64<0xA1, RawFrmMemOffs, (outs), (ins offset64_16:$src),
- "movabs{w}\t{$src, %ax|ax, $src}", []>,
- OpSize16, AdSize64;
-let Defs = [EAX] in
-def MOV32ao64 : Ii64<0xA1, RawFrmMemOffs, (outs), (ins offset64_32:$src),
- "movabs{l}\t{$src, %eax|eax, $src}", []>,
- OpSize32, AdSize64;
-let Defs = [RAX] in
-def MOV64ao64 : RIi64<0xA1, RawFrmMemOffs, (outs), (ins offset64_64:$src),
- "movabs{q}\t{$src, %rax|rax, $src}", []>,
- AdSize64;
-} // mayLoad
-
-let mayStore = 1 in {
-let Uses = [AL] in
-def MOV8o64a : Ii64<0xA2, RawFrmMemOffs, (outs), (ins offset64_8:$dst),
- "movabs{b}\t{%al, $dst|$dst, al}", []>,
- AdSize64;
-let Uses = [AX] in
-def MOV16o64a : Ii64<0xA3, RawFrmMemOffs, (outs), (ins offset64_16:$dst),
- "movabs{w}\t{%ax, $dst|$dst, ax}", []>,
- OpSize16, AdSize64;
-let Uses = [EAX] in
-def MOV32o64a : Ii64<0xA3, RawFrmMemOffs, (outs), (ins offset64_32:$dst),
- "movabs{l}\t{%eax, $dst|$dst, eax}", []>,
- OpSize32, AdSize64;
-let Uses = [RAX] in
-def MOV64o64a : RIi64<0xA3, RawFrmMemOffs, (outs), (ins offset64_64:$dst),
- "movabs{q}\t{%rax, $dst|$dst, rax}", []>,
- AdSize64;
-} // mayStore
-} // SchedRW
-} // hasSideEffects = 0
-
-let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
- SchedRW = [WriteMove], isMoveReg = 1 in {
-def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
- "mov{b}\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MOV8rr">;
-def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16,
- FoldGenData<"MOV16rr">;
-def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32,
- FoldGenData<"MOV32rr">;
-def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MOV64rr">;
-}
-
-// Reversed version with ".s" suffix for GAS compatibility.
-def : InstAlias<"mov{b}.s\t{$src, $dst|$dst, $src}",
- (MOV8rr_REV GR8:$dst, GR8:$src), 0>;
-def : InstAlias<"mov{w}.s\t{$src, $dst|$dst, $src}",
- (MOV16rr_REV GR16:$dst, GR16:$src), 0>;
-def : InstAlias<"mov{l}.s\t{$src, $dst|$dst, $src}",
- (MOV32rr_REV GR32:$dst, GR32:$src), 0>;
-def : InstAlias<"mov{q}.s\t{$src, $dst|$dst, $src}",
- (MOV64rr_REV GR64:$dst, GR64:$src), 0>;
-def : InstAlias<"mov.s\t{$src, $dst|$dst, $src}",
- (MOV8rr_REV GR8:$dst, GR8:$src), 0, "att">;
-def : InstAlias<"mov.s\t{$src, $dst|$dst, $src}",
- (MOV16rr_REV GR16:$dst, GR16:$src), 0, "att">;
-def : InstAlias<"mov.s\t{$src, $dst|$dst, $src}",
- (MOV32rr_REV GR32:$dst, GR32:$src), 0, "att">;
-def : InstAlias<"mov.s\t{$src, $dst|$dst, $src}",
- (MOV64rr_REV GR64:$dst, GR64:$src), 0, "att">;
-
-let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
-def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
- "mov{b}\t{$src, $dst|$dst, $src}",
- [(set GR8:$dst, (loadi8 addr:$src))]>;
-def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "mov{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (loadi16 addr:$src))]>, OpSize16;
-def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "mov{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (loadi32 addr:$src))]>, OpSize32;
-def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (load addr:$src))]>;
-}
-
-let SchedRW = [WriteStore] in {
-def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
- "mov{b}\t{$src, $dst|$dst, $src}",
- [(store GR8:$src, addr:$dst)]>;
-def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}",
- [(store GR16:$src, addr:$dst)]>, OpSize16;
-def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "mov{l}\t{$src, $dst|$dst, $src}",
- [(store GR32:$src, addr:$dst)]>, OpSize32;
-def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "mov{q}\t{$src, $dst|$dst, $src}",
- [(store GR64:$src, addr:$dst)]>;
-} // SchedRW
-
-// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
-// that they can be used for copying and storing h registers, which can't be
-// encoded when a REX prefix is present.
-let isCodeGenOnly = 1 in {
-let hasSideEffects = 0, isMoveReg = 1 in
-def MOV8rr_NOREX : I<0x88, MRMDestReg,
- (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
- "mov{b}\t{$src, $dst|$dst, $src}", []>,
- Sched<[WriteMove]>;
-let mayStore = 1, hasSideEffects = 0 in
-def MOV8mr_NOREX : I<0x88, MRMDestMem,
- (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
- "mov{b}\t{$src, $dst|$dst, $src}", []>,
- Sched<[WriteStore]>;
-let mayLoad = 1, hasSideEffects = 0,
- canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
- (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
- "mov{b}\t{$src, $dst|$dst, $src}", []>,
- Sched<[WriteLoad]>;
-}
-
-
-// Condition code ops, incl. set if equal/not equal/...
-let SchedRW = [WriteLAHFSAHF] in {
-let Defs = [EFLAGS], Uses = [AH], hasSideEffects = 0 in
-def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>, // flags = AH
- Requires<[HasLAHFSAHF]>;
-let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in
-def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>, // AH = flags
- Requires<[HasLAHFSAHF]>;
-} // SchedRW
-
-//===----------------------------------------------------------------------===//
-// Bit tests instructions: BT, BTS, BTR, BTC.
-
-let Defs = [EFLAGS] in {
-let SchedRW = [WriteBitTest] in {
-def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
- "bt{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>,
- OpSize16, TB, NotMemoryFoldable;
-def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
- "bt{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>,
- OpSize32, TB, NotMemoryFoldable;
-def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB,
- NotMemoryFoldable;
-} // SchedRW
-
-// Unlike with the register+register form, the memory+register form of the
-// bt instruction does not ignore the high bits of the index. From ISel's
-// perspective, this is pretty bizarre. Make these instructions disassembly
-// only for now. These instructions are also slow on modern CPUs so that's
-// another reason to avoid generating them.
-
-let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteBitTestRegLd] in {
- def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
- "bt{w}\t{$src2, $src1|$src1, $src2}",
- []>, OpSize16, TB, NotMemoryFoldable;
- def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
- "bt{l}\t{$src2, $src1|$src1, $src2}",
- []>, OpSize32, TB, NotMemoryFoldable;
- def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
- []>, TB, NotMemoryFoldable;
-}
-
-let SchedRW = [WriteBitTest] in {
-def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16u8imm:$src2),
- "bt{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR16:$src1, imm:$src2))]>,
- OpSize16, TB;
-def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32u8imm:$src2),
- "bt{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR32:$src1, imm:$src2))]>,
- OpSize32, TB;
-def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64u8imm:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, imm:$src2))]>, TB;
-} // SchedRW
-
-// Note that these instructions aren't slow because that only applies when the
-// other operand is in a register. When it's an immediate, bt is still fast.
-let SchedRW = [WriteBitTestImmLd] in {
-def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
- "bt{w}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt (loadi16 addr:$src1),
- imm:$src2))]>,
- OpSize16, TB;
-def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
- "bt{l}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt (loadi32 addr:$src1),
- imm:$src2))]>,
- OpSize32, TB;
-def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
- "bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt (loadi64 addr:$src1),
- imm:$src2))]>, TB,
- Requires<[In64BitMode]>;
-} // SchedRW
-
-let hasSideEffects = 0 in {
-let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
-def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "btc{w}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize16, TB, NotMemoryFoldable;
-def BTC32rr : I<0xBB, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "btc{l}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize32, TB, NotMemoryFoldable;
-def BTC64rr : RI<0xBB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
- NotMemoryFoldable;
-} // SchedRW
-
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
-def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
- "btc{w}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize16, TB, NotMemoryFoldable;
-def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
- "btc{l}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize32, TB, NotMemoryFoldable;
-def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
- NotMemoryFoldable;
-}
-let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
-def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2),
- "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
-def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2),
- "btc{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
-def BTC64ri8 : RIi8<0xBA, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-} // SchedRW
-
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
-def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
- "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
-def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
- "btc{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
-def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
- "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
- Requires<[In64BitMode]>;
-}
-
-let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
-def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize16, TB, NotMemoryFoldable;
-def BTR32rr : I<0xB3, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "btr{l}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize32, TB, NotMemoryFoldable;
-def BTR64rr : RI<0xB3, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
- NotMemoryFoldable;
-} // SchedRW
-
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
-def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
- "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize16, TB, NotMemoryFoldable;
-def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
- "btr{l}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize32, TB, NotMemoryFoldable;
-def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
- NotMemoryFoldable;
-}
-
-let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
-def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2),
- "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize16, TB;
-def BTR32ri8 : Ii8<0xBA, MRM6r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2),
- "btr{l}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize32, TB;
-def BTR64ri8 : RIi8<0xBA, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-} // SchedRW
-
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
-def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
- "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize16, TB;
-def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
- "btr{l}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize32, TB;
-def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
- "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
- Requires<[In64BitMode]>;
-}
-
-let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
-def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "bts{w}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize16, TB, NotMemoryFoldable;
-def BTS32rr : I<0xAB, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "bts{l}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize32, TB, NotMemoryFoldable;
-def BTS64rr : RI<0xAB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
- NotMemoryFoldable;
-} // SchedRW
-
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
-def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
- "bts{w}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize16, TB, NotMemoryFoldable;
-def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
- "bts{l}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize32, TB, NotMemoryFoldable;
-def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
- NotMemoryFoldable;
-}
-
-let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
-def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2),
- "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
-def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2),
- "bts{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
-def BTS64ri8 : RIi8<0xBA, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-} // SchedRW
-
-let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
-def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
- "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
-def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
- "bts{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
-def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
- "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
- Requires<[In64BitMode]>;
-}
-} // hasSideEffects = 0
-} // Defs = [EFLAGS]
-
-
-//===----------------------------------------------------------------------===//
-// Atomic support
-//
-
-// Atomic swap. These are just normal xchg instructions. But since a memory
-// operand is referenced, the atomicity is ensured.
-multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag> {
- let Constraints = "$val = $dst", SchedRW = [WriteALULd, WriteRMW] in {
- def NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst),
- (ins GR8:$val, i8mem:$ptr),
- !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR8:$dst,
- (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))]>;
- def NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$val, i16mem:$ptr),
- !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR16:$dst,
- (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))]>,
- OpSize16;
- def NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$val, i32mem:$ptr),
- !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR32:$dst,
- (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))]>,
- OpSize32;
- def NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$val, i64mem:$ptr),
- !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
- [(set
- GR64:$dst,
- (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))]>;
- }
-}
-
-defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap">, NotMemoryFoldable;
-
-// Swap between registers.
-let SchedRW = [WriteXCHG] in {
-let Constraints = "$src1 = $dst1, $src2 = $dst2", hasSideEffects = 0 in {
-def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst1, GR8:$dst2),
- (ins GR8:$src1, GR8:$src2),
- "xchg{b}\t{$src2, $src1|$src1, $src2}", []>, NotMemoryFoldable;
-def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst1, GR16:$dst2),
- (ins GR16:$src1, GR16:$src2),
- "xchg{w}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize16, NotMemoryFoldable;
-def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst1, GR32:$dst2),
- (ins GR32:$src1, GR32:$src2),
- "xchg{l}\t{$src2, $src1|$src1, $src2}", []>,
- OpSize32, NotMemoryFoldable;
-def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst1, GR64:$dst2),
- (ins GR64:$src1 ,GR64:$src2),
- "xchg{q}\t{$src2, $src1|$src1, $src2}", []>, NotMemoryFoldable;
-}
-
-// Swap between EAX and other registers.
-let Constraints = "$src = $dst", hasSideEffects = 0 in {
-let Uses = [AX], Defs = [AX] in
-def XCHG16ar : I<0x90, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
- "xchg{w}\t{$src, %ax|ax, $src}", []>, OpSize16;
-let Uses = [EAX], Defs = [EAX] in
-def XCHG32ar : I<0x90, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
- "xchg{l}\t{$src, %eax|eax, $src}", []>, OpSize32;
-let Uses = [RAX], Defs = [RAX] in
-def XCHG64ar : RI<0x90, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
- "xchg{q}\t{$src, %rax|rax, $src}", []>;
-}
-} // SchedRW
-
-let hasSideEffects = 0, Constraints = "$src1 = $dst1, $src2 = $dst2",
- Defs = [EFLAGS], SchedRW = [WriteXCHG] in {
-def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst1, GR8:$dst2),
- (ins GR8:$src1, GR8:$src2),
- "xadd{b}\t{$src2, $src1|$src1, $src2}", []>, TB;
-def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst1, GR16:$dst2),
- (ins GR16:$src1, GR16:$src2),
- "xadd{w}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize16;
-def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst1, GR32:$dst2),
- (ins GR32:$src1, GR32:$src2),
- "xadd{l}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize32;
-def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst1, GR64:$dst2),
- (ins GR64:$src1, GR64:$src2),
- "xadd{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
-} // SchedRW
-
-let mayLoad = 1, mayStore = 1, hasSideEffects = 0, Constraints = "$val = $dst",
- Defs = [EFLAGS], SchedRW = [WriteALULd, WriteRMW] in {
-def XADD8rm : I<0xC0, MRMSrcMem, (outs GR8:$dst),
- (ins GR8:$val, i8mem:$ptr),
- "xadd{b}\t{$val, $ptr|$ptr, $val}", []>, TB;
-def XADD16rm : I<0xC1, MRMSrcMem, (outs GR16:$dst),
- (ins GR16:$val, i16mem:$ptr),
- "xadd{w}\t{$val, $ptr|$ptr, $val}", []>, TB,
- OpSize16;
-def XADD32rm : I<0xC1, MRMSrcMem, (outs GR32:$dst),
- (ins GR32:$val, i32mem:$ptr),
- "xadd{l}\t{$val, $ptr|$ptr, $val}", []>, TB,
- OpSize32;
-def XADD64rm : RI<0xC1, MRMSrcMem, (outs GR64:$dst),
- (ins GR64:$val, i64mem:$ptr),
- "xadd{q}\t{$val, $ptr|$ptr, $val}", []>, TB;
-
-}
-
-let SchedRW = [WriteCMPXCHG], hasSideEffects = 0 in {
-let Defs = [AL, EFLAGS], Uses = [AL] in
-def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
- "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB,
- NotMemoryFoldable;
-let Defs = [AX, EFLAGS], Uses = [AX] in
-def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
- "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16,
- NotMemoryFoldable;
-let Defs = [EAX, EFLAGS], Uses = [EAX] in
-def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
- "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32,
- NotMemoryFoldable;
-let Defs = [RAX, EFLAGS], Uses = [RAX] in
-def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB,
- NotMemoryFoldable;
-} // SchedRW, hasSideEffects
-
-let SchedRW = [WriteCMPXCHGRMW], mayLoad = 1, mayStore = 1,
- hasSideEffects = 0 in {
-let Defs = [AL, EFLAGS], Uses = [AL] in
-def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
- "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB,
- NotMemoryFoldable;
-let Defs = [AX, EFLAGS], Uses = [AX] in
-def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
- "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16,
- NotMemoryFoldable;
-let Defs = [EAX, EFLAGS], Uses = [EAX] in
-def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32,
- NotMemoryFoldable;
-let Defs = [RAX, EFLAGS], Uses = [RAX] in
-def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB,
- NotMemoryFoldable;
-
-let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
-def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
- "cmpxchg8b\t$dst", []>, TB, Requires<[HasCX8]>;
-
-let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
-// NOTE: In64BitMode check needed for the AssemblerPredicate.
-def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
- "cmpxchg16b\t$dst", []>,
- TB, Requires<[HasCX16,In64BitMode]>;
-} // SchedRW, mayLoad, mayStore, hasSideEffects
-
-
-// Lock instruction prefix
-let SchedRW = [WriteMicrocoded] in
-def LOCK_PREFIX : I<0xF0, PrefixByte, (outs), (ins), "lock", []>;
-
-let SchedRW = [WriteNop] in {
-
-// Rex64 instruction prefix
-def REX64_PREFIX : I<0x48, PrefixByte, (outs), (ins), "rex64", []>,
- Requires<[In64BitMode]>;
-
-// Data16 instruction prefix
-def DATA16_PREFIX : I<0x66, PrefixByte, (outs), (ins), "data16", []>;
-} // SchedRW
-
-// Repeat string operation instruction prefixes
-let Defs = [ECX], Uses = [ECX,DF], SchedRW = [WriteMicrocoded] in {
-// Repeat (used with INS, OUTS, MOVS, LODS and STOS)
-def REP_PREFIX : I<0xF3, PrefixByte, (outs), (ins), "rep", []>;
-// Repeat while not equal (used with CMPS and SCAS)
-def REPNE_PREFIX : I<0xF2, PrefixByte, (outs), (ins), "repne", []>;
-}
-
-// String manipulation instructions
-let SchedRW = [WriteMicrocoded] in {
-let Defs = [AL,ESI], Uses = [ESI,DF] in
-def LODSB : I<0xAC, RawFrmSrc, (outs), (ins srcidx8:$src),
- "lodsb\t{$src, %al|al, $src}", []>;
-let Defs = [AX,ESI], Uses = [ESI,DF] in
-def LODSW : I<0xAD, RawFrmSrc, (outs), (ins srcidx16:$src),
- "lodsw\t{$src, %ax|ax, $src}", []>, OpSize16;
-let Defs = [EAX,ESI], Uses = [ESI,DF] in
-def LODSL : I<0xAD, RawFrmSrc, (outs), (ins srcidx32:$src),
- "lods{l|d}\t{$src, %eax|eax, $src}", []>, OpSize32;
-let Defs = [RAX,ESI], Uses = [ESI,DF] in
-def LODSQ : RI<0xAD, RawFrmSrc, (outs), (ins srcidx64:$src),
- "lodsq\t{$src, %rax|rax, $src}", []>,
- Requires<[In64BitMode]>;
-}
-
-let SchedRW = [WriteSystem] in {
-let Defs = [ESI], Uses = [DX,ESI,DF] in {
-def OUTSB : I<0x6E, RawFrmSrc, (outs), (ins srcidx8:$src),
- "outsb\t{$src, %dx|dx, $src}", []>;
-def OUTSW : I<0x6F, RawFrmSrc, (outs), (ins srcidx16:$src),
- "outsw\t{$src, %dx|dx, $src}", []>, OpSize16;
-def OUTSL : I<0x6F, RawFrmSrc, (outs), (ins srcidx32:$src),
- "outs{l|d}\t{$src, %dx|dx, $src}", []>, OpSize32;
-}
-
-let Defs = [EDI], Uses = [DX,EDI,DF] in {
-def INSB : I<0x6C, RawFrmDst, (outs), (ins dstidx8:$dst),
- "insb\t{%dx, $dst|$dst, dx}", []>;
-def INSW : I<0x6D, RawFrmDst, (outs), (ins dstidx16:$dst),
- "insw\t{%dx, $dst|$dst, dx}", []>, OpSize16;
-def INSL : I<0x6D, RawFrmDst, (outs), (ins dstidx32:$dst),
- "ins{l|d}\t{%dx, $dst|$dst, dx}", []>, OpSize32;
-}
-}
-
-// EFLAGS management instructions.
-let SchedRW = [WriteALU], Defs = [EFLAGS], Uses = [EFLAGS] in {
-def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>;
-def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>;
-def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>;
-}
-
-// DF management instructions.
-let SchedRW = [WriteALU], Defs = [DF] in {
-def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", []>;
-def STD : I<0xFD, RawFrm, (outs), (ins), "std", []>;
-}
-
-// Table lookup instructions
-let Uses = [AL,EBX], Defs = [AL], hasSideEffects = 0, mayLoad = 1 in
-def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>, Sched<[WriteLoad]>;
-
-let SchedRW = [WriteMicrocoded] in {
-// ASCII Adjust After Addition
-let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in
-def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>,
- Requires<[Not64BitMode]>;
-
-// ASCII Adjust AX Before Division
-let Uses = [AX], Defs = [AX,EFLAGS], hasSideEffects = 0 in
-def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
- "aad\t$src", []>, Requires<[Not64BitMode]>;
-
-// ASCII Adjust AX After Multiply
-let Uses = [AL], Defs = [AX,EFLAGS], hasSideEffects = 0 in
-def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
- "aam\t$src", []>, Requires<[Not64BitMode]>;
-
-// ASCII Adjust AL After Subtraction - sets
-let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in
-def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>,
- Requires<[Not64BitMode]>;
-
-// Decimal Adjust AL after Addition
-let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in
-def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>,
- Requires<[Not64BitMode]>;
-
-// Decimal Adjust AL after Subtraction
-let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in
-def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>,
- Requires<[Not64BitMode]>;
-} // SchedRW
-
-let SchedRW = [WriteSystem] in {
-// Check Array Index Against Bounds
-// Note: "bound" does not have reversed operands in at&t syntax.
-def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "bound\t$dst, $src", []>, OpSize16,
- Requires<[Not64BitMode]>;
-def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "bound\t$dst, $src", []>, OpSize32,
- Requires<[Not64BitMode]>;
-
-// Adjust RPL Field of Segment Selector
-def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
- "arpl\t{$src, $dst|$dst, $src}", []>,
- Requires<[Not64BitMode]>, NotMemoryFoldable;
-let mayStore = 1 in
-def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
- "arpl\t{$src, $dst|$dst, $src}", []>,
- Requires<[Not64BitMode]>, NotMemoryFoldable;
-} // SchedRW
-
-//===----------------------------------------------------------------------===//
-// MOVBE Instructions
-//
-let Predicates = [HasMOVBE] in {
- let SchedRW = [WriteALULd] in {
- def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "movbe{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>,
- OpSize16, T8PS;
- def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "movbe{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>,
- OpSize32, T8PS;
- def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "movbe{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>,
- T8PS;
- }
- let SchedRW = [WriteStore] in {
- def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
- "movbe{w}\t{$src, $dst|$dst, $src}",
- [(store (bswap GR16:$src), addr:$dst)]>,
- OpSize16, T8PS;
- def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "movbe{l}\t{$src, $dst|$dst, $src}",
- [(store (bswap GR32:$src), addr:$dst)]>,
- OpSize32, T8PS;
- def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "movbe{q}\t{$src, $dst|$dst, $src}",
- [(store (bswap GR64:$src), addr:$dst)]>,
- T8PS;
- }
-}
-
-//===----------------------------------------------------------------------===//
-// RDRAND Instruction
-//
-let Predicates = [HasRDRAND], Defs = [EFLAGS], SchedRW = [WriteSystem] in {
- def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins),
- "rdrand{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86rdrand))]>,
- OpSize16, PS;
- def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins),
- "rdrand{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86rdrand))]>,
- OpSize32, PS;
- def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins),
- "rdrand{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86rdrand))]>,
- PS;
-}
-
-//===----------------------------------------------------------------------===//
-// RDSEED Instruction
-//
-let Predicates = [HasRDSEED], Defs = [EFLAGS], SchedRW = [WriteSystem] in {
- def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins), "rdseed{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize16, PS;
- def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins), "rdseed{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86rdseed))]>, OpSize32, PS;
- def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdseed{q}\t$dst",
- [(set GR64:$dst, EFLAGS, (X86rdseed))]>, PS;
-}
-
-//===----------------------------------------------------------------------===//
-// LZCNT Instruction
-//
-let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
- def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "lzcnt{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)]>,
- XS, OpSize16, Sched<[WriteLZCNT]>;
- def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "lzcnt{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (ctlz (loadi16 addr:$src))),
- (implicit EFLAGS)]>, XS, OpSize16, Sched<[WriteLZCNTLd]>;
-
- def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "lzcnt{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)]>,
- XS, OpSize32, Sched<[WriteLZCNT]>;
- def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "lzcnt{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (ctlz (loadi32 addr:$src))),
- (implicit EFLAGS)]>, XS, OpSize32, Sched<[WriteLZCNTLd]>;
-
- def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "lzcnt{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)]>,
- XS, Sched<[WriteLZCNT]>;
- def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "lzcnt{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (ctlz (loadi64 addr:$src))),
- (implicit EFLAGS)]>, XS, Sched<[WriteLZCNTLd]>;
-}
-
-//===----------------------------------------------------------------------===//
-// BMI Instructions
-//
-let Predicates = [HasBMI], Defs = [EFLAGS] in {
- def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
- "tzcnt{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>,
- XS, OpSize16, Sched<[WriteTZCNT]>;
- def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
- "tzcnt{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (cttz (loadi16 addr:$src))),
- (implicit EFLAGS)]>, XS, OpSize16, Sched<[WriteTZCNTLd]>;
-
- def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "tzcnt{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>,
- XS, OpSize32, Sched<[WriteTZCNT]>;
- def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "tzcnt{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (cttz (loadi32 addr:$src))),
- (implicit EFLAGS)]>, XS, OpSize32, Sched<[WriteTZCNTLd]>;
-
- def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "tzcnt{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>,
- XS, Sched<[WriteTZCNT]>;
- def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "tzcnt{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (cttz (loadi64 addr:$src))),
- (implicit EFLAGS)]>, XS, Sched<[WriteTZCNTLd]>;
-}
-
-multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
- RegisterClass RC, X86MemOperand x86memop,
- X86FoldableSchedWrite sched> {
-let hasSideEffects = 0 in {
- def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
- !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8PS, VEX_4V, Sched<[sched]>;
- let mayLoad = 1 in
- def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
- T8PS, VEX_4V, Sched<[sched.Folded]>;
-}
-}
+def add_su : binop_oneuse<add>;
+def and_su : binop_oneuse<and>;
+def srl_su : binop_oneuse<srl>;
-let Predicates = [HasBMI], Defs = [EFLAGS] in {
- defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>;
- defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, VEX_W;
- defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>;
- defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS>, VEX_W;
- defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS>;
- defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, VEX_W;
-}
-
-//===----------------------------------------------------------------------===//
-// Pattern fragments to auto generate BMI instructions.
-//===----------------------------------------------------------------------===//
-
-def or_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
- (X86or_flag node:$lhs, node:$rhs), [{
- return hasNoCarryFlagUses(SDValue(N, 1));
-}]>;
-
-def xor_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
- (X86xor_flag node:$lhs, node:$rhs), [{
- return hasNoCarryFlagUses(SDValue(N, 1));
-}]>;
-
-def and_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
- (X86and_flag node:$lhs, node:$rhs), [{
- return hasNoCarryFlagUses(SDValue(N, 1));
-}]>;
-
-let Predicates = [HasBMI] in {
- // FIXME: patterns for the load versions are not implemented
- def : Pat<(and GR32:$src, (add GR32:$src, -1)),
- (BLSR32rr GR32:$src)>;
- def : Pat<(and GR64:$src, (add GR64:$src, -1)),
- (BLSR64rr GR64:$src)>;
-
- def : Pat<(xor GR32:$src, (add GR32:$src, -1)),
- (BLSMSK32rr GR32:$src)>;
- def : Pat<(xor GR64:$src, (add GR64:$src, -1)),
- (BLSMSK64rr GR64:$src)>;
-
- def : Pat<(and GR32:$src, (ineg GR32:$src)),
- (BLSI32rr GR32:$src)>;
- def : Pat<(and GR64:$src, (ineg GR64:$src)),
- (BLSI64rr GR64:$src)>;
-
- // Versions to match flag producing ops.
- def : Pat<(and_flag_nocf GR32:$src, (add GR32:$src, -1)),
- (BLSR32rr GR32:$src)>;
- def : Pat<(and_flag_nocf GR64:$src, (add GR64:$src, -1)),
- (BLSR64rr GR64:$src)>;
-
- def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, -1)),
- (BLSMSK32rr GR32:$src)>;
- def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, -1)),
- (BLSMSK64rr GR64:$src)>;
-
- def : Pat<(and_flag_nocf GR32:$src, (ineg GR32:$src)),
- (BLSI32rr GR32:$src)>;
- def : Pat<(and_flag_nocf GR64:$src, (ineg GR64:$src)),
- (BLSI64rr GR64:$src)>;
-}
-
-multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
- X86MemOperand x86memop, SDNode OpNode,
- PatFrag ld_frag, X86FoldableSchedWrite Sched> {
- def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
- T8PS, VEX, Sched<[Sched]>;
- def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
- (implicit EFLAGS)]>, T8PS, VEX,
- Sched<[Sched.Folded,
- // x86memop:$src1
- ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault,
- // RC:$src2
- Sched.ReadAfterFold]>;
-}
-
-let Predicates = [HasBMI], Defs = [EFLAGS] in {
- defm BEXTR32 : bmi_bextr<0xF7, "bextr{l}", GR32, i32mem,
- X86bextr, loadi32, WriteBEXTR>;
- defm BEXTR64 : bmi_bextr<0xF7, "bextr{q}", GR64, i64mem,
- X86bextr, loadi64, WriteBEXTR>, VEX_W;
-}
-
-multiclass bmi_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
- X86MemOperand x86memop, SDNode Int,
- PatFrag ld_frag, X86FoldableSchedWrite Sched> {
- def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
- T8PS, VEX, Sched<[Sched]>;
- def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
- (implicit EFLAGS)]>, T8PS, VEX,
- Sched<[Sched.Folded,
- // x86memop:$src1
- ReadDefault, ReadDefault, ReadDefault, ReadDefault,
- ReadDefault,
- // RC:$src2
- Sched.ReadAfterFold]>;
-}
-
-let Predicates = [HasBMI2], Defs = [EFLAGS] in {
- defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
- X86bzhi, loadi32, WriteBZHI>;
- defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
- X86bzhi, loadi64, WriteBZHI>, VEX_W;
-}
-
-def CountTrailingOnes : SDNodeXForm<imm, [{
- // Count the trailing ones in the immediate.
- return getI8Imm(countTrailingOnes(N->getZExtValue()), SDLoc(N));
-}]>;
-
-def BEXTRMaskXForm : SDNodeXForm<imm, [{
- unsigned Length = countTrailingOnes(N->getZExtValue());
- return getI32Imm(Length << 8, SDLoc(N));
-}]>;
-
-def AndMask64 : ImmLeaf<i64, [{
- return isMask_64(Imm) && !isUInt<32>(Imm);
+// unary op with only one user
+class unop_oneuse<SDPatternOperator operator>
+ : PatFrag<(ops node:$A),
+ (operator node:$A), [{
+ return N->hasOneUse();
}]>;
-// Use BEXTR for 64-bit 'and' with large immediate 'mask'.
-let Predicates = [HasBMI, NoBMI2, NoTBM] in {
- def : Pat<(and GR64:$src, AndMask64:$mask),
- (BEXTR64rr GR64:$src,
- (SUBREG_TO_REG (i64 0),
- (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
- def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
- (BEXTR64rm addr:$src,
- (SUBREG_TO_REG (i64 0),
- (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
-}
-// Use BZHI for 64-bit 'and' with large immediate 'mask'.
-let Predicates = [HasBMI2, NoTBM] in {
- def : Pat<(and GR64:$src, AndMask64:$mask),
- (BZHI64rr GR64:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
- def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
- (BZHI64rm addr:$src,
- (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
- (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
-}
-
-multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
- X86MemOperand x86memop, SDNode OpNode,
- PatFrag ld_frag> {
- def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>,
- VEX_4V, Sched<[WriteALU]>;
- def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
- !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>,
- VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
-}
-
-let Predicates = [HasBMI2] in {
- defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
- X86pdep, loadi32>, T8XD;
- defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
- X86pdep, loadi64>, T8XD, VEX_W;
- defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
- X86pext, loadi32>, T8XS;
- defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
- X86pext, loadi64>, T8XS, VEX_W;
-}
+def ineg_su : unop_oneuse<ineg>;
+def trunc_su : unop_oneuse<trunc>;
//===----------------------------------------------------------------------===//
-// TBM Instructions
-//
-let Predicates = [HasTBM], Defs = [EFLAGS] in {
-
-multiclass tbm_bextri<bits<8> opc, RegisterClass RC, string OpcodeStr,
- X86MemOperand x86memop, PatFrag ld_frag,
- SDNode OpNode, Operand immtype,
- SDPatternOperator immoperator,
- X86FoldableSchedWrite Sched> {
- def ri : Ii32<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, immtype:$cntl),
- !strconcat(OpcodeStr,
- "\t{$cntl, $src1, $dst|$dst, $src1, $cntl}"),
- [(set RC:$dst, (OpNode RC:$src1, immoperator:$cntl))]>,
- XOP, XOPA, Sched<[Sched]>;
- def mi : Ii32<opc, MRMSrcMem, (outs RC:$dst),
- (ins x86memop:$src1, immtype:$cntl),
- !strconcat(OpcodeStr,
- "\t{$cntl, $src1, $dst|$dst, $src1, $cntl}"),
- [(set RC:$dst, (OpNode (ld_frag addr:$src1), immoperator:$cntl))]>,
- XOP, XOPA, Sched<[Sched.Folded]>;
-}
-
-defm BEXTRI32 : tbm_bextri<0x10, GR32, "bextr{l}", i32mem, loadi32,
- X86bextri, i32imm, timm, WriteBEXTR>;
-let ImmT = Imm32S in
-defm BEXTRI64 : tbm_bextri<0x10, GR64, "bextr{q}", i64mem, loadi64,
- X86bextri, i64i32imm,
- i64timmSExt32, WriteBEXTR>, VEX_W;
-
-multiclass tbm_binary_rm<bits<8> opc, Format FormReg, Format FormMem,
- RegisterClass RC, string OpcodeStr,
- X86MemOperand x86memop, X86FoldableSchedWrite Sched> {
-let hasSideEffects = 0 in {
- def rr : I<opc, FormReg, (outs RC:$dst), (ins RC:$src),
- !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"), []>,
- XOP_4V, XOP9, Sched<[Sched]>;
- let mayLoad = 1 in
- def rm : I<opc, FormMem, (outs RC:$dst), (ins x86memop:$src),
- !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"), []>,
- XOP_4V, XOP9, Sched<[Sched.Folded]>;
-}
-}
-
-multiclass tbm_binary_intr<bits<8> opc, string OpcodeStr,
- X86FoldableSchedWrite Sched,
- Format FormReg, Format FormMem> {
- defm NAME#32 : tbm_binary_rm<opc, FormReg, FormMem, GR32, OpcodeStr#"{l}",
- i32mem, Sched>;
- defm NAME#64 : tbm_binary_rm<opc, FormReg, FormMem, GR64, OpcodeStr#"{q}",
- i64mem, Sched>, VEX_W;
-}
-
-defm BLCFILL : tbm_binary_intr<0x01, "blcfill", WriteALU, MRM1r, MRM1m>;
-defm BLCI : tbm_binary_intr<0x02, "blci", WriteALU, MRM6r, MRM6m>;
-defm BLCIC : tbm_binary_intr<0x01, "blcic", WriteALU, MRM5r, MRM5m>;
-defm BLCMSK : tbm_binary_intr<0x02, "blcmsk", WriteALU, MRM1r, MRM1m>;
-defm BLCS : tbm_binary_intr<0x01, "blcs", WriteALU, MRM3r, MRM3m>;
-defm BLSFILL : tbm_binary_intr<0x01, "blsfill", WriteALU, MRM2r, MRM2m>;
-defm BLSIC : tbm_binary_intr<0x01, "blsic", WriteALU, MRM6r, MRM6m>;
-defm T1MSKC : tbm_binary_intr<0x01, "t1mskc", WriteALU, MRM7r, MRM7m>;
-defm TZMSK : tbm_binary_intr<0x01, "tzmsk", WriteALU, MRM4r, MRM4m>;
-} // HasTBM, EFLAGS
-
-// Use BEXTRI for 64-bit 'and' with large immediate 'mask'.
-let Predicates = [HasTBM] in {
- def : Pat<(and GR64:$src, AndMask64:$mask),
- (BEXTRI64ri GR64:$src, (BEXTRMaskXForm imm:$mask))>;
-
- def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
- (BEXTRI64mi addr:$src, (BEXTRMaskXForm imm:$mask))>;
-}
-
+// X86 Type infomation definitions
//===----------------------------------------------------------------------===//
-// Lightweight Profiling Instructions
-
-let Predicates = [HasLWP], SchedRW = [WriteSystem] in {
-
-def LLWPCB : I<0x12, MRM0r, (outs), (ins GR32:$src), "llwpcb\t$src",
- [(int_x86_llwpcb GR32:$src)]>, XOP, XOP9;
-def SLWPCB : I<0x12, MRM1r, (outs GR32:$dst), (ins), "slwpcb\t$dst",
- [(set GR32:$dst, (int_x86_slwpcb))]>, XOP, XOP9;
-
-def LLWPCB64 : I<0x12, MRM0r, (outs), (ins GR64:$src), "llwpcb\t$src",
- [(int_x86_llwpcb GR64:$src)]>, XOP, XOP9, VEX_W;
-def SLWPCB64 : I<0x12, MRM1r, (outs GR64:$dst), (ins), "slwpcb\t$dst",
- [(set GR64:$dst, (int_x86_slwpcb))]>, XOP, XOP9, VEX_W;
-
-multiclass lwpins_intr<RegisterClass RC> {
- def rri : Ii32<0x12, MRM0r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl),
- "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
- [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, timm:$cntl))]>,
- XOP_4V, XOPA;
- let mayLoad = 1 in
- def rmi : Ii32<0x12, MRM0m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl),
- "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
- [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), timm:$cntl))]>,
- XOP_4V, XOPA;
-}
-let Defs = [EFLAGS] in {
- defm LWPINS32 : lwpins_intr<GR32>;
- defm LWPINS64 : lwpins_intr<GR64>, VEX_W;
-} // EFLAGS
-
-multiclass lwpval_intr<RegisterClass RC, Intrinsic Int> {
- def rri : Ii32<0x12, MRM1r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl),
- "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
- [(Int RC:$src0, GR32:$src1, timm:$cntl)]>, XOP_4V, XOPA;
- let mayLoad = 1 in
- def rmi : Ii32<0x12, MRM1m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl),
- "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
- [(Int RC:$src0, (loadi32 addr:$src1), timm:$cntl)]>,
- XOP_4V, XOPA;
+/// X86TypeInfo - This is a bunch of information that describes relevant X86
+/// information about value types. For example, it can tell you what the
+/// register class and preferred load to use.
+class X86TypeInfo<ValueType vt, string instrsuffix, RegisterClass regclass,
+ PatFrag loadnode, X86MemOperand memoperand, ImmType immkind,
+ Operand immoperand, SDPatternOperator immoperator,
+ SDPatternOperator immnosuoperator, Operand imm8operand,
+ SDPatternOperator imm8operator, SDPatternOperator imm8nosuoperator,
+ bit hasOddOpcode, OperandSize opSize,
+ bit hasREX_W> {
+ /// VT - This is the value type itself.
+ ValueType VT = vt;
+
+ /// InstrSuffix - This is the suffix used on instructions with this type. For
+ /// example, i8 -> "b", i16 -> "w", i32 -> "l", i64 -> "q".
+ string InstrSuffix = instrsuffix;
+
+ /// RegClass - This is the register class associated with this type. For
+ /// example, i8 -> GR8, i16 -> GR16, i32 -> GR32, i64 -> GR64.
+ RegisterClass RegClass = regclass;
+
+ /// LoadNode - This is the load node associated with this type. For
+ /// example, i8 -> loadi8, i16 -> loadi16, i32 -> loadi32, i64 -> loadi64.
+ PatFrag LoadNode = loadnode;
+
+ /// MemOperand - This is the memory operand associated with this type. For
+ /// example, i8 -> i8mem, i16 -> i16mem, i32 -> i32mem, i64 -> i64mem.
+ X86MemOperand MemOperand = memoperand;
+
+ /// ImmEncoding - This is the encoding of an immediate of this type. For
+ /// example, i8 -> Imm8, i16 -> Imm16, i32 -> Imm32. Note that i64 -> Imm32
+ /// since the immediate fields of i64 instructions is a 32-bit sign extended
+ /// value.
+ ImmType ImmEncoding = immkind;
+
+ /// ImmOperand - This is the operand kind of an immediate of this type. For
+ /// example, i8 -> i8imm, i16 -> i16imm, i32 -> i32imm. Note that i64 ->
+ /// i64i32imm since the immediate fields of i64 instructions is a 32-bit sign
+ /// extended value.
+ Operand ImmOperand = immoperand;
+
+ /// ImmOperator - This is the operator that should be used to match an
+ /// immediate of this kind in a pattern (e.g. imm, or i64immSExt32).
+ SDPatternOperator ImmOperator = immoperator;
+
+ SDPatternOperator ImmNoSuOperator = immnosuoperator;
+
+ /// Imm8Operand - This is the operand kind to use for an imm8 of this type.
+ /// For example, i8 -> <invalid>, i16 -> i16i8imm, i32 -> i32i8imm. This is
+ /// only used for instructions that have a sign-extended imm8 field form.
+ Operand Imm8Operand = imm8operand;
+
+ /// Imm8Operator - This is the operator that should be used to match an 8-bit
+ /// sign extended immediate of this kind in a pattern (e.g. imm16immSExt8).
+ SDPatternOperator Imm8Operator = imm8operator;
+
+ SDPatternOperator Imm8NoSuOperator = imm8nosuoperator;
+
+ /// HasOddOpcode - This bit is true if the instruction should have an odd (as
+ /// opposed to even) opcode. Operations on i8 are usually even, operations on
+ /// other datatypes are odd.
+ bit HasOddOpcode = hasOddOpcode;
+
+ /// OpSize - Selects whether the instruction needs a 0x66 prefix based on
+ /// 16-bit vs 32-bit mode. i8/i64 set this to OpSizeFixed. i16 sets this
+ /// to Opsize16. i32 sets this to OpSize32.
+ OperandSize OpSize = opSize;
+
+ /// HasREX_W - This bit is set to true if the instruction should have
+ /// the 0x40 REX prefix. This is set for i64 types.
+ bit HasREX_W = hasREX_W;
+}
+
+def invalid_node : SDNode<"<<invalid_node>>", SDTIntLeaf,[],"<<invalid_node>>">;
+
+def Xi8 : X86TypeInfo<i8, "b", GR8, loadi8, i8mem, Imm8, i8imm,
+ imm_su, imm, i8imm, invalid_node, invalid_node,
+ 0, OpSizeFixed, 0>;
+def Xi16 : X86TypeInfo<i16, "w", GR16, loadi16, i16mem, Imm16, i16imm,
+ imm_su, imm, i16i8imm, i16immSExt8_su, i16immSExt8,
+ 1, OpSize16, 0>;
+def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem, Imm32, i32imm,
+ imm_su, imm, i32i8imm, i32immSExt8_su, i32immSExt8,
+ 1, OpSize32, 0>;
+def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem, Imm32S, i64i32imm,
+ i64immSExt32_su, i64immSExt32, i64i8imm, i64immSExt8_su,
+ i64immSExt8, 1, OpSizeFixed, 1>;
+
+/// ITy - This instruction base class takes the type info for the instruction.
+/// Using this, it:
+/// 1. Concatenates together the instruction mnemonic with the appropriate
+/// suffix letter, a tab, and the arguments.
+/// 2. Infers whether the instruction should have a 0x66 prefix byte.
+/// 3. Infers whether the instruction should have a 0x40 REX_W prefix.
+/// 4. Infers whether the low bit of the opcode should be 0 (for i8 operations)
+/// or 1 (for i16,i32,i64 operations).
+class ITy<bits<8> opcode, Format f, X86TypeInfo typeinfo, dag outs, dag ins,
+ string mnemonic, string args, list<dag> pattern>
+ : I<{opcode{7}, opcode{6}, opcode{5}, opcode{4},
+ opcode{3}, opcode{2}, opcode{1}, typeinfo.HasOddOpcode },
+ f, outs, ins,
+ !strconcat(mnemonic, "{", typeinfo.InstrSuffix, "}\t", args), pattern> {
+
+ // Infer instruction prefixes from type info.
+ let OpSize = typeinfo.OpSize;
+ let hasREX_W = typeinfo.HasREX_W;
}
-defm LWPVAL32 : lwpval_intr<GR32, int_x86_lwpval32>;
-defm LWPVAL64 : lwpval_intr<GR64, int_x86_lwpval64>, VEX_W;
-
-} // HasLWP, SchedRW
-
-//===----------------------------------------------------------------------===//
-// MONITORX/MWAITX Instructions
-//
-let SchedRW = [ WriteSystem ] in {
- let Uses = [ EAX, ECX, EDX ] in
- def MONITORX32rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>,
- TB, Requires<[ HasMWAITX, Not64BitMode ]>;
- let Uses = [ RAX, ECX, EDX ] in
- def MONITORX64rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>,
- TB, Requires<[ HasMWAITX, In64BitMode ]>;
-
- let Uses = [ ECX, EAX, EBX ] in {
- def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx",
- []>, TB, Requires<[ HasMWAITX ]>;
- }
-} // SchedRW
-
-def : InstAlias<"mwaitx\t{%eax, %ecx, %ebx|ebx, ecx, eax}", (MWAITXrrr)>,
- Requires<[ Not64BitMode ]>;
-def : InstAlias<"mwaitx\t{%rax, %rcx, %rbx|rbx, rcx, rax}", (MWAITXrrr)>,
- Requires<[ In64BitMode ]>;
-
-def : InstAlias<"monitorx\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORX32rrr)>,
- Requires<[ Not64BitMode ]>;
-def : InstAlias<"monitorx\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORX64rrr)>,
- Requires<[ In64BitMode ]>;
-
-//===----------------------------------------------------------------------===//
-// WAITPKG Instructions
-//
-let SchedRW = [WriteSystem] in {
- def UMONITOR16 : I<0xAE, MRM6r, (outs), (ins GR16:$src),
- "umonitor\t$src", [(int_x86_umonitor GR16:$src)]>,
- XS, AdSize16, Requires<[HasWAITPKG, Not64BitMode]>;
- def UMONITOR32 : I<0xAE, MRM6r, (outs), (ins GR32:$src),
- "umonitor\t$src", [(int_x86_umonitor GR32:$src)]>,
- XS, AdSize32, Requires<[HasWAITPKG]>;
- def UMONITOR64 : I<0xAE, MRM6r, (outs), (ins GR64:$src),
- "umonitor\t$src", [(int_x86_umonitor GR64:$src)]>,
- XS, AdSize64, Requires<[HasWAITPKG, In64BitMode]>;
- let Uses = [EAX, EDX], Defs = [EFLAGS] in {
- def UMWAIT : I<0xAE, MRM6r,
- (outs), (ins GR32orGR64:$src), "umwait\t$src",
- [(set EFLAGS, (X86umwait GR32orGR64:$src, EDX, EAX))]>,
- XD, Requires<[HasWAITPKG]>;
- def TPAUSE : I<0xAE, MRM6r,
- (outs), (ins GR32orGR64:$src), "tpause\t$src",
- [(set EFLAGS, (X86tpause GR32orGR64:$src, EDX, EAX))]>,
- PD, Requires<[HasWAITPKG]>;
- }
-} // SchedRW
-
-//===----------------------------------------------------------------------===//
-// MOVDIRI - Move doubleword/quadword as direct store
-//
-let SchedRW = [WriteStore] in {
-def MOVDIRI32 : I<0xF9, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "movdiri\t{$src, $dst|$dst, $src}",
- [(int_x86_directstore32 addr:$dst, GR32:$src)]>,
- T8PS, Requires<[HasMOVDIRI]>;
-def MOVDIRI64 : RI<0xF9, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "movdiri\t{$src, $dst|$dst, $src}",
- [(int_x86_directstore64 addr:$dst, GR64:$src)]>,
- T8PS, Requires<[In64BitMode, HasMOVDIRI]>;
-} // SchedRW
-
-//===----------------------------------------------------------------------===//
-// MOVDIR64B - Move 64 bytes as direct store
-//
-let SchedRW = [WriteStore] in {
-def MOVDIR64B16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
- "movdir64b\t{$src, $dst|$dst, $src}", []>,
- T8PD, AdSize16, Requires<[HasMOVDIR64B, Not64BitMode]>;
-def MOVDIR64B32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
- "movdir64b\t{$src, $dst|$dst, $src}",
- [(int_x86_movdir64b GR32:$dst, addr:$src)]>,
- T8PD, AdSize32, Requires<[HasMOVDIR64B]>;
-def MOVDIR64B64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
- "movdir64b\t{$src, $dst|$dst, $src}",
- [(int_x86_movdir64b GR64:$dst, addr:$src)]>,
- T8PD, AdSize64, Requires<[HasMOVDIR64B, In64BitMode]>;
-} // SchedRW
-
-//===----------------------------------------------------------------------===//
-// ENQCMD/S - Enqueue 64-byte command as user with 64-byte write atomicity
-//
-let SchedRW = [WriteStore], Defs = [EFLAGS] in {
- def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
- "enqcmd\t{$src, $dst|$dst, $src}",
- [(set EFLAGS, (X86enqcmd GR16:$dst, addr:$src))]>,
- T8XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
- def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
- "enqcmd\t{$src, $dst|$dst, $src}",
- [(set EFLAGS, (X86enqcmd GR32:$dst, addr:$src))]>,
- T8XD, AdSize32, Requires<[HasENQCMD]>;
- def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
- "enqcmd\t{$src, $dst|$dst, $src}",
- [(set EFLAGS, (X86enqcmd GR64:$dst, addr:$src))]>,
- T8XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
-
- def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
- "enqcmds\t{$src, $dst|$dst, $src}",
- [(set EFLAGS, (X86enqcmds GR16:$dst, addr:$src))]>,
- T8XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
- def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
- "enqcmds\t{$src, $dst|$dst, $src}",
- [(set EFLAGS, (X86enqcmds GR32:$dst, addr:$src))]>,
- T8XS, AdSize32, Requires<[HasENQCMD]>;
- def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
- "enqcmds\t{$src, $dst|$dst, $src}",
- [(set EFLAGS, (X86enqcmds GR64:$dst, addr:$src))]>,
- T8XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
-}
-
-//===----------------------------------------------------------------------===//
-// CLZERO Instruction
-//
-let SchedRW = [WriteLoad] in {
- let Uses = [EAX] in
- def CLZERO32r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
- TB, Requires<[HasCLZERO, Not64BitMode]>;
- let Uses = [RAX] in
- def CLZERO64r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
- TB, Requires<[HasCLZERO, In64BitMode]>;
-} // SchedRW
-
-def : InstAlias<"clzero\t{%eax|eax}", (CLZERO32r)>, Requires<[Not64BitMode]>;
-def : InstAlias<"clzero\t{%rax|rax}", (CLZERO64r)>, Requires<[In64BitMode]>;
-
-//===----------------------------------------------------------------------===//
-// INVLPGB Instruction
-// OPCODE 0F 01 FE
-//
-let SchedRW = [WriteSystem] in {
- let Uses = [EAX, EDX] in
- def INVLPGB32 : I<0x01, MRM_FE, (outs), (ins),
- "invlpgb", []>,
- PS, Requires<[Not64BitMode]>;
- let Uses = [RAX, EDX] in
- def INVLPGB64 : I<0x01, MRM_FE, (outs), (ins),
- "invlpgb", []>,
- PS, Requires<[In64BitMode]>;
-} // SchedRW
-
-def : InstAlias<"invlpgb\t{%eax, %edx|eax, edx}", (INVLPGB32)>, Requires<[Not64BitMode]>;
-def : InstAlias<"invlpgb\t{%rax, %edx|rax, edx}", (INVLPGB64)>, Requires<[In64BitMode]>;
-
-//===----------------------------------------------------------------------===//
-// TLBSYNC Instruction
-// OPCODE 0F 01 FF
-//
-let SchedRW = [WriteSystem] in {
- def TLBSYNC : I<0x01, MRM_FF, (outs), (ins),
- "tlbsync", []>,
- PS, Requires<[]>;
-} // SchedRW
-
-//===----------------------------------------------------------------------===//
-// HRESET Instruction
-//
-let Uses = [EAX], SchedRW = [WriteSystem] in
- def HRESET : Ii8<0xF0, MRM_C0, (outs), (ins i32u8imm:$imm), "hreset\t$imm", []>,
- Requires<[HasHRESET]>, TAXS;
-
-//===----------------------------------------------------------------------===//
-// SERIALIZE Instruction
-//
-let SchedRW = [WriteSystem] in
- def SERIALIZE : I<0x01, MRM_E8, (outs), (ins), "serialize",
- [(int_x86_serialize)]>, PS,
- Requires<[HasSERIALIZE]>;
-
-//===----------------------------------------------------------------------===//
-// TSXLDTRK - TSX Suspend Load Address Tracking
-//
-let Predicates = [HasTSXLDTRK], SchedRW = [WriteSystem] in {
- def XSUSLDTRK : I<0x01, MRM_E8, (outs), (ins), "xsusldtrk",
- [(int_x86_xsusldtrk)]>, XD;
- def XRESLDTRK : I<0x01, MRM_E9, (outs), (ins), "xresldtrk",
- [(int_x86_xresldtrk)]>, XD;
-}
-
-//===----------------------------------------------------------------------===//
-// UINTR Instructions
-//
-let Predicates = [HasUINTR, In64BitMode], SchedRW = [WriteSystem] in {
- def UIRET : I<0x01, MRM_EC, (outs), (ins), "uiret",
- []>, XS;
- def CLUI : I<0x01, MRM_EE, (outs), (ins), "clui",
- [(int_x86_clui)]>, XS;
- def STUI : I<0x01, MRM_EF, (outs), (ins), "stui",
- [(int_x86_stui)]>, XS;
-
- def SENDUIPI : I<0xC7, MRM6r, (outs), (ins GR64:$arg), "senduipi\t$arg",
- [(int_x86_senduipi GR64:$arg)]>, XS;
-
- let Defs = [EFLAGS] in
- def TESTUI : I<0x01, MRM_ED, (outs), (ins), "testui",
- [(set EFLAGS, (X86testui))]>, XS;
-}
-
-//===----------------------------------------------------------------------===//
-// PREFETCHIT0 and PREFETCHIT1 Instructions
-// prefetch ADDR, RW, Locality, Data
-let Predicates = [HasPREFETCHI, In64BitMode], SchedRW = [WriteLoad] in {
- def PREFETCHIT0 : I<0x18, MRM7m, (outs), (ins i8mem:$src),
- "prefetchit0\t$src", [(prefetch addr:$src, (i32 0), (i32 3), (i32 0))]>, TB;
- def PREFETCHIT1 : I<0x18, MRM6m, (outs), (ins i8mem:$src),
- "prefetchit1\t$src", [(prefetch addr:$src, (i32 0), (i32 2), (i32 0))]>, TB;
-}
-
-//===----------------------------------------------------------------------===//
-// CMPCCXADD Instructions
-//
-let isCodeGenOnly = 1, ForceDisassemble = 1, mayLoad = 1, mayStore = 1,
- Predicates = [HasCMPCCXADD, In64BitMode], Defs = [EFLAGS],
- Constraints = "$dstsrc1 = $dst" in {
-def CMPCCXADDmr32 : I<0xe0, MRMDestMem4VOp3CC, (outs GR32:$dst),
- (ins GR32:$dstsrc1, i32mem:$dstsrc2, GR32:$src3, ccode:$cond),
- "cmp${cond}xadd\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
- [(set GR32:$dst, (X86cmpccxadd addr:$dstsrc2,
- GR32:$dstsrc1, GR32:$src3, timm:$cond))]>,
- VEX_4V, T8PD, Sched<[WriteXCHG]>;
-
-def CMPCCXADDmr64 : I<0xe0, MRMDestMem4VOp3CC, (outs GR64:$dst),
- (ins GR64:$dstsrc1, i64mem:$dstsrc2, GR64:$src3, ccode:$cond),
- "cmp${cond}xadd\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
- [(set GR64:$dst, (X86cmpccxadd addr:$dstsrc2,
- GR64:$dstsrc1, GR64:$src3, timm:$cond))]>,
- VEX_4V, VEX_W, T8PD, Sched<[WriteXCHG]>;
-}
-
-multiclass CMPCCXADD_Aliases<string Cond, int CC> {
- def : InstAlias<"cmp"#Cond#"xadd"#"\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
- (CMPCCXADDmr32 GR32:$dst, i32mem:$dstsrc2, GR32:$src3, CC), 0>;
- def : InstAlias<"cmp"#Cond#"xadd"#"\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
- (CMPCCXADDmr64 GR64:$dst, i64mem:$dstsrc2, GR64:$src3, CC), 0>;
-}
-
-defm : CMPCCXADD_Aliases<"o" , 0>;
-defm : CMPCCXADD_Aliases<"no", 1>;
-defm : CMPCCXADD_Aliases<"b" , 2>;
-defm : CMPCCXADD_Aliases<"ae", 3>;
-defm : CMPCCXADD_Aliases<"nb", 3>;
-defm : CMPCCXADD_Aliases<"e" , 4>;
-defm : CMPCCXADD_Aliases<"z" , 4>;
-defm : CMPCCXADD_Aliases<"ne", 5>;
-defm : CMPCCXADD_Aliases<"nz", 5>;
-defm : CMPCCXADD_Aliases<"be", 6>;
-defm : CMPCCXADD_Aliases<"nbe", 7>;
-defm : CMPCCXADD_Aliases<"a", 7>;
-defm : CMPCCXADD_Aliases<"s" , 8>;
-defm : CMPCCXADD_Aliases<"ns", 9>;
-defm : CMPCCXADD_Aliases<"p" , 10>;
-defm : CMPCCXADD_Aliases<"np", 11>;
-defm : CMPCCXADD_Aliases<"l" , 12>;
-defm : CMPCCXADD_Aliases<"ge", 13>;
-defm : CMPCCXADD_Aliases<"nl", 13>;
-defm : CMPCCXADD_Aliases<"le", 14>;
-defm : CMPCCXADD_Aliases<"g", 15>;
-defm : CMPCCXADD_Aliases<"nle",15>;
-
-//===----------------------------------------------------------------------===//
-// Pattern fragments to auto generate TBM instructions.
-//===----------------------------------------------------------------------===//
-
-let Predicates = [HasTBM] in {
- // FIXME: patterns for the load versions are not implemented
- def : Pat<(and GR32:$src, (add GR32:$src, 1)),
- (BLCFILL32rr GR32:$src)>;
- def : Pat<(and GR64:$src, (add GR64:$src, 1)),
- (BLCFILL64rr GR64:$src)>;
-
- def : Pat<(or GR32:$src, (not (add GR32:$src, 1))),
- (BLCI32rr GR32:$src)>;
- def : Pat<(or GR64:$src, (not (add GR64:$src, 1))),
- (BLCI64rr GR64:$src)>;
-
- // Extra patterns because opt can optimize the above patterns to this.
- def : Pat<(or GR32:$src, (sub -2, GR32:$src)),
- (BLCI32rr GR32:$src)>;
- def : Pat<(or GR64:$src, (sub -2, GR64:$src)),
- (BLCI64rr GR64:$src)>;
-
- def : Pat<(and (not GR32:$src), (add GR32:$src, 1)),
- (BLCIC32rr GR32:$src)>;
- def : Pat<(and (not GR64:$src), (add GR64:$src, 1)),
- (BLCIC64rr GR64:$src)>;
-
- def : Pat<(xor GR32:$src, (add GR32:$src, 1)),
- (BLCMSK32rr GR32:$src)>;
- def : Pat<(xor GR64:$src, (add GR64:$src, 1)),
- (BLCMSK64rr GR64:$src)>;
-
- def : Pat<(or GR32:$src, (add GR32:$src, 1)),
- (BLCS32rr GR32:$src)>;
- def : Pat<(or GR64:$src, (add GR64:$src, 1)),
- (BLCS64rr GR64:$src)>;
-
- def : Pat<(or GR32:$src, (add GR32:$src, -1)),
- (BLSFILL32rr GR32:$src)>;
- def : Pat<(or GR64:$src, (add GR64:$src, -1)),
- (BLSFILL64rr GR64:$src)>;
-
- def : Pat<(or (not GR32:$src), (add GR32:$src, -1)),
- (BLSIC32rr GR32:$src)>;
- def : Pat<(or (not GR64:$src), (add GR64:$src, -1)),
- (BLSIC64rr GR64:$src)>;
-
- def : Pat<(or (not GR32:$src), (add GR32:$src, 1)),
- (T1MSKC32rr GR32:$src)>;
- def : Pat<(or (not GR64:$src), (add GR64:$src, 1)),
- (T1MSKC64rr GR64:$src)>;
-
- def : Pat<(and (not GR32:$src), (add GR32:$src, -1)),
- (TZMSK32rr GR32:$src)>;
- def : Pat<(and (not GR64:$src), (add GR64:$src, -1)),
- (TZMSK64rr GR64:$src)>;
-
- // Patterns to match flag producing ops.
- def : Pat<(and_flag_nocf GR32:$src, (add GR32:$src, 1)),
- (BLCFILL32rr GR32:$src)>;
- def : Pat<(and_flag_nocf GR64:$src, (add GR64:$src, 1)),
- (BLCFILL64rr GR64:$src)>;
-
- def : Pat<(or_flag_nocf GR32:$src, (not (add GR32:$src, 1))),
- (BLCI32rr GR32:$src)>;
- def : Pat<(or_flag_nocf GR64:$src, (not (add GR64:$src, 1))),
- (BLCI64rr GR64:$src)>;
-
- // Extra patterns because opt can optimize the above patterns to this.
- def : Pat<(or_flag_nocf GR32:$src, (sub -2, GR32:$src)),
- (BLCI32rr GR32:$src)>;
- def : Pat<(or_flag_nocf GR64:$src, (sub -2, GR64:$src)),
- (BLCI64rr GR64:$src)>;
-
- def : Pat<(and_flag_nocf (not GR32:$src), (add GR32:$src, 1)),
- (BLCIC32rr GR32:$src)>;
- def : Pat<(and_flag_nocf (not GR64:$src), (add GR64:$src, 1)),
- (BLCIC64rr GR64:$src)>;
-
- def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, 1)),
- (BLCMSK32rr GR32:$src)>;
- def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, 1)),
- (BLCMSK64rr GR64:$src)>;
-
- def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, 1)),
- (BLCS32rr GR32:$src)>;
- def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, 1)),
- (BLCS64rr GR64:$src)>;
-
- def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, -1)),
- (BLSFILL32rr GR32:$src)>;
- def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, -1)),
- (BLSFILL64rr GR64:$src)>;
-
- def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, -1)),
- (BLSIC32rr GR32:$src)>;
- def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, -1)),
- (BLSIC64rr GR64:$src)>;
-
- def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, 1)),
- (T1MSKC32rr GR32:$src)>;
- def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, 1)),
- (T1MSKC64rr GR64:$src)>;
-
- def : Pat<(and_flag_nocf (not GR32:$src), (add GR32:$src, -1)),
- (TZMSK32rr GR32:$src)>;
- def : Pat<(and_flag_nocf (not GR64:$src), (add GR64:$src, -1)),
- (TZMSK64rr GR64:$src)>;
-} // HasTBM
-
-//===----------------------------------------------------------------------===//
-// Memory Instructions
-//
-
-let Predicates = [HasCLFLUSHOPT], SchedRW = [WriteLoad] in
-def CLFLUSHOPT : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
- "clflushopt\t$src", [(int_x86_clflushopt addr:$src)]>, PD;
-
-let Predicates = [HasCLWB], SchedRW = [WriteLoad] in
-def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src",
- [(int_x86_clwb addr:$src)]>, PD;
-
-let Predicates = [HasCLDEMOTE], SchedRW = [WriteLoad] in
-def CLDEMOTE : I<0x1C, MRM0m, (outs), (ins i8mem:$src), "cldemote\t$src",
- [(int_x86_cldemote addr:$src)]>, PS;
-
//===----------------------------------------------------------------------===//
// Subsystems.
//===----------------------------------------------------------------------===//
+include "X86InstrMisc.td"
+include "X86InstrTBM.td"
include "X86InstrArithmetic.td"
include "X86InstrCMovSetCC.td"
include "X86InstrExtension.td"
@@ -3256,567 +1464,6 @@ include "X86InstrSystem.td"
include "X86InstrCompiler.td"
include "X86InstrVecCompiler.td"
-//===----------------------------------------------------------------------===//
-// Assembler Mnemonic Aliases
-//===----------------------------------------------------------------------===//
-
-def : MnemonicAlias<"call", "callw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"call", "calll", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"call", "callq", "att">, Requires<[In64BitMode]>;
-
-def : MnemonicAlias<"cbw", "cbtw", "att">;
-def : MnemonicAlias<"cwde", "cwtl", "att">;
-def : MnemonicAlias<"cwd", "cwtd", "att">;
-def : MnemonicAlias<"cdq", "cltd", "att">;
-def : MnemonicAlias<"cdqe", "cltq", "att">;
-def : MnemonicAlias<"cqo", "cqto", "att">;
-
-// In 64-bit mode lret maps to lretl; it is not ambiguous with lretq.
-def : MnemonicAlias<"lret", "lretw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"lret", "lretl", "att">, Requires<[Not16BitMode]>;
-
-def : MnemonicAlias<"leavel", "leave", "att">, Requires<[Not64BitMode]>;
-def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>;
-
-def : MnemonicAlias<"loopz", "loope">;
-def : MnemonicAlias<"loopnz", "loopne">;
-
-def : MnemonicAlias<"pop", "popw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"pop", "popl", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"pop", "popq", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"popf", "popfw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"popf", "popfl", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"popf", "popfq", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"popf", "popfq", "intel">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"popfd", "popfl", "att">;
-def : MnemonicAlias<"popfw", "popf", "intel">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"popfw", "popf", "intel">, Requires<[In64BitMode]>;
-
-// FIXME: This is wrong for "push reg". "push %bx" should turn into pushw in
-// all modes. However: "push (addr)" and "push $42" should default to
-// pushl/pushq depending on the current mode. Similar for "pop %bx"
-def : MnemonicAlias<"push", "pushw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"push", "pushl", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"push", "pushq", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"pushf", "pushfw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"pushf", "pushfl", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"pushf", "pushfq", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"pushf", "pushfq", "intel">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"pushfd", "pushfl", "att">;
-def : MnemonicAlias<"pushfw", "pushf", "intel">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"pushfw", "pushf", "intel">, Requires<[In64BitMode]>;
-
-def : MnemonicAlias<"popad", "popal", "intel">, Requires<[Not64BitMode]>;
-def : MnemonicAlias<"pushad", "pushal", "intel">, Requires<[Not64BitMode]>;
-def : MnemonicAlias<"popa", "popaw", "intel">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"pusha", "pushaw", "intel">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"popa", "popal", "intel">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"pusha", "pushal", "intel">, Requires<[In32BitMode]>;
-
-def : MnemonicAlias<"popa", "popaw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"pusha", "pushaw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"popa", "popal", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"pusha", "pushal", "att">, Requires<[In32BitMode]>;
-
-def : MnemonicAlias<"repe", "rep">;
-def : MnemonicAlias<"repz", "rep">;
-def : MnemonicAlias<"repnz", "repne">;
-
-def : MnemonicAlias<"ret", "retw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"ret", "retl", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"ret", "retq", "att">, Requires<[In64BitMode]>;
-
-// Apply 'ret' behavior to 'retn'
-def : MnemonicAlias<"retn", "retw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"retn", "retl", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"retn", "retq", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"retn", "ret", "intel">;
-
-def : MnemonicAlias<"sal", "shl", "intel">;
-def : MnemonicAlias<"salb", "shlb", "att">;
-def : MnemonicAlias<"salw", "shlw", "att">;
-def : MnemonicAlias<"sall", "shll", "att">;
-def : MnemonicAlias<"salq", "shlq", "att">;
-
-def : MnemonicAlias<"smovb", "movsb", "att">;
-def : MnemonicAlias<"smovw", "movsw", "att">;
-def : MnemonicAlias<"smovl", "movsl", "att">;
-def : MnemonicAlias<"smovq", "movsq", "att">;
-
-def : MnemonicAlias<"ud2a", "ud2", "att">;
-def : MnemonicAlias<"ud2bw", "ud1w", "att">;
-def : MnemonicAlias<"ud2bl", "ud1l", "att">;
-def : MnemonicAlias<"ud2bq", "ud1q", "att">;
-def : MnemonicAlias<"verrw", "verr", "att">;
-
-// MS recognizes 'xacquire'/'xrelease' as 'acquire'/'release'
-def : MnemonicAlias<"acquire", "xacquire", "intel">;
-def : MnemonicAlias<"release", "xrelease", "intel">;
-
-// System instruction aliases.
-def : MnemonicAlias<"iret", "iretw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"iret", "iretl", "att">, Requires<[Not16BitMode]>;
-def : MnemonicAlias<"sysret", "sysretl", "att">;
-def : MnemonicAlias<"sysexit", "sysexitl", "att">;
-
-def : MnemonicAlias<"lgdt", "lgdtw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"lgdt", "lgdtl", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"lgdt", "lgdtq", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"lidt", "lidtw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"lidt", "lidtl", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"lidt", "lidtq", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"sgdt", "sgdtw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"sgdt", "sgdtl", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"sgdt", "sgdtq", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"sidt", "sidtw", "att">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"sidt", "sidtl", "att">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"sidt", "sidtq", "att">, Requires<[In64BitMode]>;
-def : MnemonicAlias<"lgdt", "lgdtw", "intel">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"lgdt", "lgdtd", "intel">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"lidt", "lidtw", "intel">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"lidt", "lidtd", "intel">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"sgdt", "sgdtw", "intel">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"sgdt", "sgdtd", "intel">, Requires<[In32BitMode]>;
-def : MnemonicAlias<"sidt", "sidtw", "intel">, Requires<[In16BitMode]>;
-def : MnemonicAlias<"sidt", "sidtd", "intel">, Requires<[In32BitMode]>;
-
-
-// Floating point stack aliases.
-def : MnemonicAlias<"fcmovz", "fcmove", "att">;
-def : MnemonicAlias<"fcmova", "fcmovnbe", "att">;
-def : MnemonicAlias<"fcmovnae", "fcmovb", "att">;
-def : MnemonicAlias<"fcmovna", "fcmovbe", "att">;
-def : MnemonicAlias<"fcmovae", "fcmovnb", "att">;
-def : MnemonicAlias<"fcomip", "fcompi">;
-def : MnemonicAlias<"fildq", "fildll", "att">;
-def : MnemonicAlias<"fistpq", "fistpll", "att">;
-def : MnemonicAlias<"fisttpq", "fisttpll", "att">;
-def : MnemonicAlias<"fldcww", "fldcw", "att">;
-def : MnemonicAlias<"fnstcww", "fnstcw", "att">;
-def : MnemonicAlias<"fnstsww", "fnstsw", "att">;
-def : MnemonicAlias<"fucomip", "fucompi">;
-def : MnemonicAlias<"fwait", "wait">;
-
-def : MnemonicAlias<"fxsaveq", "fxsave64", "att">;
-def : MnemonicAlias<"fxrstorq", "fxrstor64", "att">;
-def : MnemonicAlias<"xsaveq", "xsave64", "att">;
-def : MnemonicAlias<"xrstorq", "xrstor64", "att">;
-def : MnemonicAlias<"xsaveoptq", "xsaveopt64", "att">;
-def : MnemonicAlias<"xrstorsq", "xrstors64", "att">;
-def : MnemonicAlias<"xsavecq", "xsavec64", "att">;
-def : MnemonicAlias<"xsavesq", "xsaves64", "att">;
-
-class CondCodeAlias<string Prefix,string Suffix, string OldCond, string NewCond,
- string VariantName>
- : MnemonicAlias<!strconcat(Prefix, OldCond, Suffix),
- !strconcat(Prefix, NewCond, Suffix), VariantName>;
-
-/// IntegerCondCodeMnemonicAlias - This multiclass defines a bunch of
-/// MnemonicAlias's that canonicalize the condition code in a mnemonic, for
-/// example "setz" -> "sete".
-multiclass IntegerCondCodeMnemonicAlias<string Prefix, string Suffix,
- string V = ""> {
- def C : CondCodeAlias<Prefix, Suffix, "c", "b", V>; // setc -> setb
- def Z : CondCodeAlias<Prefix, Suffix, "z" , "e", V>; // setz -> sete
- def NA : CondCodeAlias<Prefix, Suffix, "na", "be", V>; // setna -> setbe
- def NB : CondCodeAlias<Prefix, Suffix, "nb", "ae", V>; // setnb -> setae
- def NC : CondCodeAlias<Prefix, Suffix, "nc", "ae", V>; // setnc -> setae
- def NG : CondCodeAlias<Prefix, Suffix, "ng", "le", V>; // setng -> setle
- def NL : CondCodeAlias<Prefix, Suffix, "nl", "ge", V>; // setnl -> setge
- def NZ : CondCodeAlias<Prefix, Suffix, "nz", "ne", V>; // setnz -> setne
- def PE : CondCodeAlias<Prefix, Suffix, "pe", "p", V>; // setpe -> setp
- def PO : CondCodeAlias<Prefix, Suffix, "po", "np", V>; // setpo -> setnp
-
- def NAE : CondCodeAlias<Prefix, Suffix, "nae", "b", V>; // setnae -> setb
- def NBE : CondCodeAlias<Prefix, Suffix, "nbe", "a", V>; // setnbe -> seta
- def NGE : CondCodeAlias<Prefix, Suffix, "nge", "l", V>; // setnge -> setl
- def NLE : CondCodeAlias<Prefix, Suffix, "nle", "g", V>; // setnle -> setg
-}
-
-// Aliases for set<CC>
-defm : IntegerCondCodeMnemonicAlias<"set", "">;
-// Aliases for j<CC>
-defm : IntegerCondCodeMnemonicAlias<"j", "">;
-// Aliases for cmov<CC>{w,l,q}
-defm : IntegerCondCodeMnemonicAlias<"cmov", "w", "att">;
-defm : IntegerCondCodeMnemonicAlias<"cmov", "l", "att">;
-defm : IntegerCondCodeMnemonicAlias<"cmov", "q", "att">;
-// No size suffix for intel-style asm.
-defm : IntegerCondCodeMnemonicAlias<"cmov", "", "intel">;
-
-
-//===----------------------------------------------------------------------===//
-// Assembler Instruction Aliases
-//===----------------------------------------------------------------------===//
-
-// aad/aam default to base 10 if no operand is specified.
-def : InstAlias<"aad", (AAD8i8 10)>, Requires<[Not64BitMode]>;
-def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>;
-
-// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
-// Likewise for btc/btr/bts.
-def : InstAlias<"bt\t{$imm, $mem|$mem, $imm}",
- (BT32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">;
-def : InstAlias<"btc\t{$imm, $mem|$mem, $imm}",
- (BTC32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">;
-def : InstAlias<"btr\t{$imm, $mem|$mem, $imm}",
- (BTR32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">;
-def : InstAlias<"bts\t{$imm, $mem|$mem, $imm}",
- (BTS32mi8 i32mem:$mem, i32u8imm:$imm), 0, "att">;
-
-// clr aliases.
-def : InstAlias<"clr{b}\t$reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>;
-def : InstAlias<"clr{w}\t$reg", (XOR16rr GR16:$reg, GR16:$reg), 0>;
-def : InstAlias<"clr{l}\t$reg", (XOR32rr GR32:$reg, GR32:$reg), 0>;
-def : InstAlias<"clr{q}\t$reg", (XOR64rr GR64:$reg, GR64:$reg), 0>;
-
-// lods aliases. Accept the destination being omitted because it's implicit
-// in the mnemonic, or the mnemonic suffix being omitted because it's implicit
-// in the destination.
-def : InstAlias<"lodsb\t$src", (LODSB srcidx8:$src), 0>;
-def : InstAlias<"lodsw\t$src", (LODSW srcidx16:$src), 0>;
-def : InstAlias<"lods{l|d}\t$src", (LODSL srcidx32:$src), 0>;
-def : InstAlias<"lodsq\t$src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"lods\t{$src, %al|al, $src}", (LODSB srcidx8:$src), 0>;
-def : InstAlias<"lods\t{$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>;
-def : InstAlias<"lods\t{$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>;
-def : InstAlias<"lods\t{$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"lods\t$src", (LODSB srcidx8:$src), 0, "intel">;
-def : InstAlias<"lods\t$src", (LODSW srcidx16:$src), 0, "intel">;
-def : InstAlias<"lods\t$src", (LODSL srcidx32:$src), 0, "intel">;
-def : InstAlias<"lods\t$src", (LODSQ srcidx64:$src), 0, "intel">, Requires<[In64BitMode]>;
-
-
-// stos aliases. Accept the source being omitted because it's implicit in
-// the mnemonic, or the mnemonic suffix being omitted because it's implicit
-// in the source.
-def : InstAlias<"stosb\t$dst", (STOSB dstidx8:$dst), 0>;
-def : InstAlias<"stosw\t$dst", (STOSW dstidx16:$dst), 0>;
-def : InstAlias<"stos{l|d}\t$dst", (STOSL dstidx32:$dst), 0>;
-def : InstAlias<"stosq\t$dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"stos\t{%al, $dst|$dst, al}", (STOSB dstidx8:$dst), 0>;
-def : InstAlias<"stos\t{%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>;
-def : InstAlias<"stos\t{%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>;
-def : InstAlias<"stos\t{%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"stos\t$dst", (STOSB dstidx8:$dst), 0, "intel">;
-def : InstAlias<"stos\t$dst", (STOSW dstidx16:$dst), 0, "intel">;
-def : InstAlias<"stos\t$dst", (STOSL dstidx32:$dst), 0, "intel">;
-def : InstAlias<"stos\t$dst", (STOSQ dstidx64:$dst), 0, "intel">, Requires<[In64BitMode]>;
-
-
-// scas aliases. Accept the destination being omitted because it's implicit
-// in the mnemonic, or the mnemonic suffix being omitted because it's implicit
-// in the destination.
-def : InstAlias<"scasb\t$dst", (SCASB dstidx8:$dst), 0>;
-def : InstAlias<"scasw\t$dst", (SCASW dstidx16:$dst), 0>;
-def : InstAlias<"scas{l|d}\t$dst", (SCASL dstidx32:$dst), 0>;
-def : InstAlias<"scasq\t$dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"scas\t{$dst, %al|al, $dst}", (SCASB dstidx8:$dst), 0>;
-def : InstAlias<"scas\t{$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>;
-def : InstAlias<"scas\t{$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>;
-def : InstAlias<"scas\t{$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"scas\t$dst", (SCASB dstidx8:$dst), 0, "intel">;
-def : InstAlias<"scas\t$dst", (SCASW dstidx16:$dst), 0, "intel">;
-def : InstAlias<"scas\t$dst", (SCASL dstidx32:$dst), 0, "intel">;
-def : InstAlias<"scas\t$dst", (SCASQ dstidx64:$dst), 0, "intel">, Requires<[In64BitMode]>;
-
-// cmps aliases. Mnemonic suffix being omitted because it's implicit
-// in the destination.
-def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSB dstidx8:$dst, srcidx8:$src), 0, "intel">;
-def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSW dstidx16:$dst, srcidx16:$src), 0, "intel">;
-def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSL dstidx32:$dst, srcidx32:$src), 0, "intel">;
-def : InstAlias<"cmps\t{$dst, $src|$src, $dst}", (CMPSQ dstidx64:$dst, srcidx64:$src), 0, "intel">, Requires<[In64BitMode]>;
-
-// movs aliases. Mnemonic suffix being omitted because it's implicit
-// in the destination.
-def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSB dstidx8:$dst, srcidx8:$src), 0, "intel">;
-def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSW dstidx16:$dst, srcidx16:$src), 0, "intel">;
-def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSL dstidx32:$dst, srcidx32:$src), 0, "intel">;
-def : InstAlias<"movs\t{$src, $dst|$dst, $src}", (MOVSQ dstidx64:$dst, srcidx64:$src), 0, "intel">, Requires<[In64BitMode]>;
-
-// div and idiv aliases for explicit A register.
-def : InstAlias<"div{b}\t{$src, %al|al, $src}", (DIV8r GR8 :$src)>;
-def : InstAlias<"div{w}\t{$src, %ax|ax, $src}", (DIV16r GR16:$src)>;
-def : InstAlias<"div{l}\t{$src, %eax|eax, $src}", (DIV32r GR32:$src)>;
-def : InstAlias<"div{q}\t{$src, %rax|rax, $src}", (DIV64r GR64:$src)>;
-def : InstAlias<"div{b}\t{$src, %al|al, $src}", (DIV8m i8mem :$src)>;
-def : InstAlias<"div{w}\t{$src, %ax|ax, $src}", (DIV16m i16mem:$src)>;
-def : InstAlias<"div{l}\t{$src, %eax|eax, $src}", (DIV32m i32mem:$src)>;
-def : InstAlias<"div{q}\t{$src, %rax|rax, $src}", (DIV64m i64mem:$src)>;
-def : InstAlias<"idiv{b}\t{$src, %al|al, $src}", (IDIV8r GR8 :$src)>;
-def : InstAlias<"idiv{w}\t{$src, %ax|ax, $src}", (IDIV16r GR16:$src)>;
-def : InstAlias<"idiv{l}\t{$src, %eax|eax, $src}", (IDIV32r GR32:$src)>;
-def : InstAlias<"idiv{q}\t{$src, %rax|rax, $src}", (IDIV64r GR64:$src)>;
-def : InstAlias<"idiv{b}\t{$src, %al|al, $src}", (IDIV8m i8mem :$src)>;
-def : InstAlias<"idiv{w}\t{$src, %ax|ax, $src}", (IDIV16m i16mem:$src)>;
-def : InstAlias<"idiv{l}\t{$src, %eax|eax, $src}", (IDIV32m i32mem:$src)>;
-def : InstAlias<"idiv{q}\t{$src, %rax|rax, $src}", (IDIV64m i64mem:$src)>;
-
-
-
-// Various unary fpstack operations default to operating on ST1.
-// For example, "fxch" -> "fxch %st(1)"
-def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
-def: InstAlias<"fadd", (ADD_FPrST0 ST1), 0>;
-def : InstAlias<"fsub{|r}p", (SUBR_FPrST0 ST1), 0>;
-def : InstAlias<"fsub{r|}p", (SUB_FPrST0 ST1), 0>;
-def : InstAlias<"fmul", (MUL_FPrST0 ST1), 0>;
-def : InstAlias<"fmulp", (MUL_FPrST0 ST1), 0>;
-def : InstAlias<"fdiv{|r}p", (DIVR_FPrST0 ST1), 0>;
-def : InstAlias<"fdiv{r|}p", (DIV_FPrST0 ST1), 0>;
-def : InstAlias<"fxch", (XCH_F ST1), 0>;
-def : InstAlias<"fcom", (COM_FST0r ST1), 0>;
-def : InstAlias<"fcomp", (COMP_FST0r ST1), 0>;
-def : InstAlias<"fcomi", (COM_FIr ST1), 0>;
-def : InstAlias<"fcompi", (COM_FIPr ST1), 0>;
-def : InstAlias<"fucom", (UCOM_Fr ST1), 0>;
-def : InstAlias<"fucomp", (UCOM_FPr ST1), 0>;
-def : InstAlias<"fucomi", (UCOM_FIr ST1), 0>;
-def : InstAlias<"fucompi", (UCOM_FIPr ST1), 0>;
-
-// Handle fmul/fadd/fsub/fdiv instructions with explicitly written st(0) op.
-// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
-// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
-// gas.
-multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
- def : InstAlias<!strconcat(Mnemonic, "\t$op"),
- (Inst RSTi:$op), EmitAlias>;
- def : InstAlias<!strconcat(Mnemonic, "\t{%st, %st|st, st}"),
- (Inst ST0), EmitAlias>;
-}
-
-defm : FpUnaryAlias<"fadd", ADD_FST0r, 0>;
-defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>;
-defm : FpUnaryAlias<"fsub", SUB_FST0r, 0>;
-defm : FpUnaryAlias<"fsub{|r}p", SUBR_FPrST0, 0>;
-defm : FpUnaryAlias<"fsubr", SUBR_FST0r, 0>;
-defm : FpUnaryAlias<"fsub{r|}p", SUB_FPrST0, 0>;
-defm : FpUnaryAlias<"fmul", MUL_FST0r, 0>;
-defm : FpUnaryAlias<"fmulp", MUL_FPrST0, 0>;
-defm : FpUnaryAlias<"fdiv", DIV_FST0r, 0>;
-defm : FpUnaryAlias<"fdiv{|r}p", DIVR_FPrST0, 0>;
-defm : FpUnaryAlias<"fdivr", DIVR_FST0r, 0>;
-defm : FpUnaryAlias<"fdiv{r|}p", DIV_FPrST0, 0>;
-defm : FpUnaryAlias<"fcomi", COM_FIr, 0>;
-defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>;
-defm : FpUnaryAlias<"fcompi", COM_FIPr, 0>;
-defm : FpUnaryAlias<"fucompi", UCOM_FIPr, 0>;
-
-
-// Handle "f{mulp,addp} $op, %st(0)" the same as "f{mulp,addp} $op", since they
-// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
-// solely because gas supports it.
-def : InstAlias<"faddp\t{$op, %st|st, $op}", (ADD_FPrST0 RSTi:$op), 0>;
-def : InstAlias<"fmulp\t{$op, %st|st, $op}", (MUL_FPrST0 RSTi:$op), 0>;
-def : InstAlias<"fsub{|r}p\t{$op, %st|st, $op}", (SUBR_FPrST0 RSTi:$op), 0>;
-def : InstAlias<"fsub{r|}p\t{$op, %st|st, $op}", (SUB_FPrST0 RSTi:$op), 0>;
-def : InstAlias<"fdiv{|r}p\t{$op, %st|st, $op}", (DIVR_FPrST0 RSTi:$op), 0>;
-def : InstAlias<"fdiv{r|}p\t{$op, %st|st, $op}", (DIV_FPrST0 RSTi:$op), 0>;
-
-def : InstAlias<"fnstsw" , (FNSTSW16r), 0>;
-
-// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but
-// this is compatible with what GAS does.
-def : InstAlias<"lcall\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[In32BitMode]>;
-def : InstAlias<"ljmp\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg), 0>, Requires<[In32BitMode]>;
-def : InstAlias<"lcall\t{*}$dst", (FARCALL32m opaquemem:$dst), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"ljmp\t{*}$dst", (FARJMP32m opaquemem:$dst), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"lcall\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"ljmp\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"lcall\t{*}$dst", (FARCALL16m opaquemem:$dst), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"ljmp\t{*}$dst", (FARJMP16m opaquemem:$dst), 0>, Requires<[In16BitMode]>;
-
-def : InstAlias<"jmp\t{*}$dst", (JMP64m i64mem:$dst), 0, "att">, Requires<[In64BitMode]>;
-def : InstAlias<"jmp\t{*}$dst", (JMP32m i32mem:$dst), 0, "att">, Requires<[In32BitMode]>;
-def : InstAlias<"jmp\t{*}$dst", (JMP16m i16mem:$dst), 0, "att">, Requires<[In16BitMode]>;
-
-
-// "imul <imm>, B" is an alias for "imul <imm>, B, B".
-def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>;
-def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
-def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>;
-def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
-def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
-def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
-
-// ins aliases. Accept the mnemonic suffix being omitted because it's implicit
-// in the destination.
-def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSB dstidx8:$dst), 0, "intel">;
-def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSW dstidx16:$dst), 0, "intel">;
-def : InstAlias<"ins\t{%dx, $dst|$dst, dx}", (INSL dstidx32:$dst), 0, "intel">;
-
-// outs aliases. Accept the mnemonic suffix being omitted because it's implicit
-// in the source.
-def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSB srcidx8:$src), 0, "intel">;
-def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSW srcidx16:$src), 0, "intel">;
-def : InstAlias<"outs\t{$src, %dx|dx, $src}", (OUTSL srcidx32:$src), 0, "intel">;
-
-// inb %dx -> inb %al, %dx
-def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>;
-def : InstAlias<"inw\t{%dx|dx}", (IN16rr), 0>;
-def : InstAlias<"inl\t{%dx|dx}", (IN32rr), 0>;
-def : InstAlias<"inb\t$port", (IN8ri u8imm:$port), 0>;
-def : InstAlias<"inw\t$port", (IN16ri u8imm:$port), 0>;
-def : InstAlias<"inl\t$port", (IN32ri u8imm:$port), 0>;
-
-
-// jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp
-def : InstAlias<"call\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
-def : InstAlias<"jmp\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
-def : InstAlias<"call\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[In32BitMode]>;
-def : InstAlias<"jmp\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[In32BitMode]>;
-def : InstAlias<"callw\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[Not64BitMode]>;
-def : InstAlias<"jmpw\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[Not64BitMode]>;
-def : InstAlias<"calll\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[Not64BitMode]>;
-def : InstAlias<"jmpl\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[Not64BitMode]>;
-
-// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
-def : InstAlias<"mov{q}\t{$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
-
-// Match 'movd GR64, MMX' as an alias for movq to be compatible with gas,
-// which supports this due to an old AMD documentation bug when 64-bit mode was
-// created.
-def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
- (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
-def : InstAlias<"movd\t{$src, $dst|$dst, $src}",
- (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
-
-// movsx aliases
-def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rr8 GR16:$dst, GR8:$src), 0, "att">;
-def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0, "att">;
-def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr8 GR32:$dst, GR8:$src), 0, "att">;
-def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr16 GR32:$dst, GR16:$src), 0, "att">;
-def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr8 GR64:$dst, GR8:$src), 0, "att">;
-def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr16 GR64:$dst, GR16:$src), 0, "att">;
-def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr32 GR64:$dst, GR32:$src), 0, "att">;
-
-// movzx aliases
-def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rr8 GR16:$dst, GR8:$src), 0, "att">;
-def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0, "att">;
-def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr8 GR32:$dst, GR8:$src), 0, "att">;
-def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr16 GR32:$dst, GR16:$src), 0, "att">;
-def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr8 GR64:$dst, GR8:$src), 0, "att">;
-def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr16 GR64:$dst, GR16:$src), 0, "att">;
-// Note: No GR32->GR64 movzx form.
-
-// outb %dx -> outb %al, %dx
-def : InstAlias<"outb\t{%dx|dx}", (OUT8rr), 0>;
-def : InstAlias<"outw\t{%dx|dx}", (OUT16rr), 0>;
-def : InstAlias<"outl\t{%dx|dx}", (OUT32rr), 0>;
-def : InstAlias<"outb\t$port", (OUT8ir u8imm:$port), 0>;
-def : InstAlias<"outw\t$port", (OUT16ir u8imm:$port), 0>;
-def : InstAlias<"outl\t$port", (OUT32ir u8imm:$port), 0>;
-
-// 'sldt <mem>' can be encoded with either sldtw or sldtq with the same
-// effect (both store to a 16-bit mem). Force to sldtw to avoid ambiguity
-// errors, since its encoding is the most compact.
-def : InstAlias<"sldt $mem", (SLDT16m i16mem:$mem), 0>;
-
-// shld/shrd op,op -> shld op, op, CL
-def : InstAlias<"shld{w}\t{$r2, $r1|$r1, $r2}", (SHLD16rrCL GR16:$r1, GR16:$r2), 0>;
-def : InstAlias<"shld{l}\t{$r2, $r1|$r1, $r2}", (SHLD32rrCL GR32:$r1, GR32:$r2), 0>;
-def : InstAlias<"shld{q}\t{$r2, $r1|$r1, $r2}", (SHLD64rrCL GR64:$r1, GR64:$r2), 0>;
-def : InstAlias<"shrd{w}\t{$r2, $r1|$r1, $r2}", (SHRD16rrCL GR16:$r1, GR16:$r2), 0>;
-def : InstAlias<"shrd{l}\t{$r2, $r1|$r1, $r2}", (SHRD32rrCL GR32:$r1, GR32:$r2), 0>;
-def : InstAlias<"shrd{q}\t{$r2, $r1|$r1, $r2}", (SHRD64rrCL GR64:$r1, GR64:$r2), 0>;
-
-def : InstAlias<"shld{w}\t{$reg, $mem|$mem, $reg}", (SHLD16mrCL i16mem:$mem, GR16:$reg), 0>;
-def : InstAlias<"shld{l}\t{$reg, $mem|$mem, $reg}", (SHLD32mrCL i32mem:$mem, GR32:$reg), 0>;
-def : InstAlias<"shld{q}\t{$reg, $mem|$mem, $reg}", (SHLD64mrCL i64mem:$mem, GR64:$reg), 0>;
-def : InstAlias<"shrd{w}\t{$reg, $mem|$mem, $reg}", (SHRD16mrCL i16mem:$mem, GR16:$reg), 0>;
-def : InstAlias<"shrd{l}\t{$reg, $mem|$mem, $reg}", (SHRD32mrCL i32mem:$mem, GR32:$reg), 0>;
-def : InstAlias<"shrd{q}\t{$reg, $mem|$mem, $reg}", (SHRD64mrCL i64mem:$mem, GR64:$reg), 0>;
-
-/* FIXME: This is disabled because the asm matcher is currently incapable of
- * matching a fixed immediate like $1.
-// "shl X, $1" is an alias for "shl X".
-multiclass ShiftRotateByOneAlias<string Mnemonic, string Opc> {
- def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
- (!cast<Instruction>(!strconcat(Opc, "8r1")) GR8:$op)>;
- def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
- (!cast<Instruction>(!strconcat(Opc, "16r1")) GR16:$op)>;
- def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
- (!cast<Instruction>(!strconcat(Opc, "32r1")) GR32:$op)>;
- def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
- (!cast<Instruction>(!strconcat(Opc, "64r1")) GR64:$op)>;
- def : InstAlias<!strconcat(Mnemonic, "b $op, $$1"),
- (!cast<Instruction>(!strconcat(Opc, "8m1")) i8mem:$op)>;
- def : InstAlias<!strconcat(Mnemonic, "w $op, $$1"),
- (!cast<Instruction>(!strconcat(Opc, "16m1")) i16mem:$op)>;
- def : InstAlias<!strconcat(Mnemonic, "l $op, $$1"),
- (!cast<Instruction>(!strconcat(Opc, "32m1")) i32mem:$op)>;
- def : InstAlias<!strconcat(Mnemonic, "q $op, $$1"),
- (!cast<Instruction>(!strconcat(Opc, "64m1")) i64mem:$op)>;
-}
+// Assembler mnemonic/instruction aliases
+include "X86InstrAsmAlias.td"
-defm : ShiftRotateByOneAlias<"rcl", "RCL">;
-defm : ShiftRotateByOneAlias<"rcr", "RCR">;
-defm : ShiftRotateByOneAlias<"rol", "ROL">;
-defm : ShiftRotateByOneAlias<"ror", "ROR">;
-FIXME */
-
-// test: We accept "testX <reg>, <mem>" and "testX <mem>, <reg>" as synonyms.
-def : InstAlias<"test{b}\t{$mem, $val|$val, $mem}",
- (TEST8mr i8mem :$mem, GR8 :$val), 0>;
-def : InstAlias<"test{w}\t{$mem, $val|$val, $mem}",
- (TEST16mr i16mem:$mem, GR16:$val), 0>;
-def : InstAlias<"test{l}\t{$mem, $val|$val, $mem}",
- (TEST32mr i32mem:$mem, GR32:$val), 0>;
-def : InstAlias<"test{q}\t{$mem, $val|$val, $mem}",
- (TEST64mr i64mem:$mem, GR64:$val), 0>;
-
-// xchg: We accept "xchgX <reg>, <mem>" and "xchgX <mem>, <reg>" as synonyms.
-def : InstAlias<"xchg{b}\t{$mem, $val|$val, $mem}",
- (XCHG8rm GR8 :$val, i8mem :$mem), 0>;
-def : InstAlias<"xchg{w}\t{$mem, $val|$val, $mem}",
- (XCHG16rm GR16:$val, i16mem:$mem), 0>;
-def : InstAlias<"xchg{l}\t{$mem, $val|$val, $mem}",
- (XCHG32rm GR32:$val, i32mem:$mem), 0>;
-def : InstAlias<"xchg{q}\t{$mem, $val|$val, $mem}",
- (XCHG64rm GR64:$val, i64mem:$mem), 0>;
-
-// xchg: We accept "xchgX <reg>, %eax" and "xchgX %eax, <reg>" as synonyms.
-def : InstAlias<"xchg{w}\t{%ax, $src|$src, ax}", (XCHG16ar GR16:$src), 0>;
-def : InstAlias<"xchg{l}\t{%eax, $src|$src, eax}", (XCHG32ar GR32:$src), 0>;
-def : InstAlias<"xchg{q}\t{%rax, $src|$src, rax}", (XCHG64ar GR64:$src), 0>;
-
-// In 64-bit mode, xchg %eax, %eax can't be encoded with the 0x90 opcode we
-// would get by default because it's defined as NOP. But xchg %eax, %eax implies
-// implicit zeroing of the upper 32 bits. So alias to the longer encoding.
-def : InstAlias<"xchg{l}\t{%eax, %eax|eax, eax}",
- (XCHG32rr EAX, EAX), 0>, Requires<[In64BitMode]>;
-
-// xchg %rax, %rax is a nop in x86-64 and can be encoded as such. Without this
-// we emit an unneeded REX.w prefix.
-def : InstAlias<"xchg{q}\t{%rax, %rax|rax, rax}", (NOOP), 0>;
-
-// These aliases exist to get the parser to prioritize matching 8-bit
-// immediate encodings over matching the implicit ax/eax/rax encodings. By
-// explicitly mentioning the A register here, these entries will be ordered
-// first due to the more explicit immediate type.
-def : InstAlias<"adc{w}\t{$imm, %ax|ax, $imm}", (ADC16ri8 AX, i16i8imm:$imm), 0>;
-def : InstAlias<"add{w}\t{$imm, %ax|ax, $imm}", (ADD16ri8 AX, i16i8imm:$imm), 0>;
-def : InstAlias<"and{w}\t{$imm, %ax|ax, $imm}", (AND16ri8 AX, i16i8imm:$imm), 0>;
-def : InstAlias<"cmp{w}\t{$imm, %ax|ax, $imm}", (CMP16ri8 AX, i16i8imm:$imm), 0>;
-def : InstAlias<"or{w}\t{$imm, %ax|ax, $imm}", (OR16ri8 AX, i16i8imm:$imm), 0>;
-def : InstAlias<"sbb{w}\t{$imm, %ax|ax, $imm}", (SBB16ri8 AX, i16i8imm:$imm), 0>;
-def : InstAlias<"sub{w}\t{$imm, %ax|ax, $imm}", (SUB16ri8 AX, i16i8imm:$imm), 0>;
-def : InstAlias<"xor{w}\t{$imm, %ax|ax, $imm}", (XOR16ri8 AX, i16i8imm:$imm), 0>;
-
-def : InstAlias<"adc{l}\t{$imm, %eax|eax, $imm}", (ADC32ri8 EAX, i32i8imm:$imm), 0>;
-def : InstAlias<"add{l}\t{$imm, %eax|eax, $imm}", (ADD32ri8 EAX, i32i8imm:$imm), 0>;
-def : InstAlias<"and{l}\t{$imm, %eax|eax, $imm}", (AND32ri8 EAX, i32i8imm:$imm), 0>;
-def : InstAlias<"cmp{l}\t{$imm, %eax|eax, $imm}", (CMP32ri8 EAX, i32i8imm:$imm), 0>;
-def : InstAlias<"or{l}\t{$imm, %eax|eax, $imm}", (OR32ri8 EAX, i32i8imm:$imm), 0>;
-def : InstAlias<"sbb{l}\t{$imm, %eax|eax, $imm}", (SBB32ri8 EAX, i32i8imm:$imm), 0>;
-def : InstAlias<"sub{l}\t{$imm, %eax|eax, $imm}", (SUB32ri8 EAX, i32i8imm:$imm), 0>;
-def : InstAlias<"xor{l}\t{$imm, %eax|eax, $imm}", (XOR32ri8 EAX, i32i8imm:$imm), 0>;
-
-def : InstAlias<"adc{q}\t{$imm, %rax|rax, $imm}", (ADC64ri8 RAX, i64i8imm:$imm), 0>;
-def : InstAlias<"add{q}\t{$imm, %rax|rax, $imm}", (ADD64ri8 RAX, i64i8imm:$imm), 0>;
-def : InstAlias<"and{q}\t{$imm, %rax|rax, $imm}", (AND64ri8 RAX, i64i8imm:$imm), 0>;
-def : InstAlias<"cmp{q}\t{$imm, %rax|rax, $imm}", (CMP64ri8 RAX, i64i8imm:$imm), 0>;
-def : InstAlias<"or{q}\t{$imm, %rax|rax, $imm}", (OR64ri8 RAX, i64i8imm:$imm), 0>;
-def : InstAlias<"sbb{q}\t{$imm, %rax|rax, $imm}", (SBB64ri8 RAX, i64i8imm:$imm), 0>;
-def : InstAlias<"sub{q}\t{$imm, %rax|rax, $imm}", (SUB64ri8 RAX, i64i8imm:$imm), 0>;
-def : InstAlias<"xor{q}\t{$imm, %rax|rax, $imm}", (XOR64ri8 RAX, i64i8imm:$imm), 0>;
diff --git a/llvm/lib/Target/X86/X86InstrKL.td b/llvm/lib/Target/X86/X86InstrKL.td
index a716aab4260b..a3392b691c0a 100644
--- a/llvm/lib/Target/X86/X86InstrKL.td
+++ b/llvm/lib/Target/X86/X86InstrKL.td
@@ -19,20 +19,17 @@ let SchedRW = [WriteSystem], Predicates = [HasKL] in {
let Uses = [XMM0, EAX], Defs = [EFLAGS] in {
def LOADIWKEY : I<0xDC, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"loadiwkey\t{$src2, $src1|$src1, $src2}",
- [(int_x86_loadiwkey XMM0, VR128:$src1, VR128:$src2, EAX)]>, T8XS,
- NotMemoryFoldable;
+ [(int_x86_loadiwkey XMM0, VR128:$src1, VR128:$src2, EAX)]>, T8XS;
}
let Uses = [XMM0], Defs = [XMM0, XMM1, XMM2, XMM4, XMM5, XMM6, EFLAGS] in {
def ENCODEKEY128 : I<0xFA, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "encodekey128\t{$src, $dst|$dst, $src}", []>, T8XS,
- NotMemoryFoldable;
+ "encodekey128\t{$src, $dst|$dst, $src}", []>, T8XS;
}
let Uses = [XMM0, XMM1], Defs = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, EFLAGS] in {
def ENCODEKEY256 : I<0xFB, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "encodekey256\t{$src, $dst|$dst, $src}", []>, T8XS,
- NotMemoryFoldable;
+ "encodekey256\t{$src, $dst|$dst, $src}", []>, T8XS;
}
let Constraints = "$src1 = $dst",
@@ -40,26 +37,22 @@ let SchedRW = [WriteSystem], Predicates = [HasKL] in {
def AESENC128KL : I<0xDC, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
"aesenc128kl\t{$src2, $src1|$src1, $src2}",
[(set VR128:$dst, EFLAGS,
- (X86aesenc128kl VR128:$src1, addr:$src2))]>, T8XS,
- NotMemoryFoldable;
+ (X86aesenc128kl VR128:$src1, addr:$src2))]>, T8XS;
def AESDEC128KL : I<0xDD, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
"aesdec128kl\t{$src2, $src1|$src1, $src2}",
[(set VR128:$dst, EFLAGS,
- (X86aesdec128kl VR128:$src1, addr:$src2))]>, T8XS,
- NotMemoryFoldable;
+ (X86aesdec128kl VR128:$src1, addr:$src2))]>, T8XS;
def AESENC256KL : I<0xDE, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
"aesenc256kl\t{$src2, $src1|$src1, $src2}",
[(set VR128:$dst, EFLAGS,
- (X86aesenc256kl VR128:$src1, addr:$src2))]>, T8XS,
- NotMemoryFoldable;
+ (X86aesenc256kl VR128:$src1, addr:$src2))]>, T8XS;
def AESDEC256KL : I<0xDF, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, opaquemem:$src2),
"aesdec256kl\t{$src2, $src1|$src1, $src2}",
[(set VR128:$dst, EFLAGS,
- (X86aesdec256kl VR128:$src1, addr:$src2))]>, T8XS,
- NotMemoryFoldable;
+ (X86aesdec256kl VR128:$src1, addr:$src2))]>, T8XS;
}
} // SchedRW, Predicates
@@ -69,17 +62,13 @@ let SchedRW = [WriteSystem], Predicates = [HasWIDEKL] in {
Defs = [EFLAGS, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7],
mayLoad = 1 in {
def AESENCWIDE128KL : I<0xD8, MRM0m, (outs), (ins opaquemem:$src),
- "aesencwide128kl\t$src", []>, T8XS,
- NotMemoryFoldable;
+ "aesencwide128kl\t$src", []>, T8XS;
def AESDECWIDE128KL : I<0xD8, MRM1m, (outs), (ins opaquemem:$src),
- "aesdecwide128kl\t$src", []>, T8XS,
- NotMemoryFoldable;
+ "aesdecwide128kl\t$src", []>, T8XS;
def AESENCWIDE256KL : I<0xD8, MRM2m, (outs), (ins opaquemem:$src),
- "aesencwide256kl\t$src", []>, T8XS,
- NotMemoryFoldable;
+ "aesencwide256kl\t$src", []>, T8XS;
def AESDECWIDE256KL : I<0xD8, MRM3m, (outs), (ins opaquemem:$src),
- "aesdecwide256kl\t$src", []>, T8XS,
- NotMemoryFoldable;
+ "aesdecwide256kl\t$src", []>, T8XS;
}
} // SchedRW, Predicates
diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td
index 4196aff240c4..acf7605b3f53 100644
--- a/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/llvm/lib/Target/X86/X86InstrMMX.td
@@ -178,7 +178,7 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst,
(MMX_X86movd2w (x86mmx VR64:$src)))]>,
- Sched<[WriteVecMoveToGpr]>, FoldGenData<"MMX_MOVD64rr">;
+ Sched<[WriteVecMoveToGpr]>;
let isBitcast = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
@@ -202,19 +202,15 @@ def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>;
let isCodeGenOnly = 1, ForceDisassemble = 1 in
def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src),
- "movq\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MMX_MOVQ64rr">;
+ "movq\t{$src, $dst|$dst, $src}", []>;
} // SchedRW, hasSideEffects, isMoveReg
} // isBitcast
-def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
- (MMX_MOVQ64rr_REV VR64:$dst, VR64:$src), 0>;
-
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
def MMX_MOVD64from64mr : MMXRI<0x7E, MRMDestMem,
(outs), (ins i64mem:$dst, VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.MMX.MR]>, NotMemoryFoldable;
+ Sched<[SchedWriteVecMoveLS.MMX.MR]>;
let SchedRW = [SchedWriteVecMoveLS.MMX.RM] in {
let canFoldAsLoad = 1 in
diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
new file mode 100644
index 000000000000..88e7a388713f
--- /dev/null
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -0,0 +1,1670 @@
+//===-- X86InstrMisc.td - Misc X86 Instruction Definition -*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defining the misc X86 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction list.
+//
+
+// Nop
+let hasSideEffects = 0, SchedRW = [WriteNop] in {
+ def NOOP : I<0x90, RawFrm, (outs), (ins), "nop", []>;
+ def NOOPW : I<0x1f, MRMXm, (outs), (ins i16mem:$zero),
+ "nop{w}\t$zero", []>, TB, OpSize16;
+ def NOOPL : I<0x1f, MRMXm, (outs), (ins i32mem:$zero),
+ "nop{l}\t$zero", []>, TB, OpSize32;
+ def NOOPQ : RI<0x1f, MRMXm, (outs), (ins i64mem:$zero),
+ "nop{q}\t$zero", []>, TB, Requires<[In64BitMode]>;
+ // Also allow register so we can assemble/disassemble
+ def NOOPWr : I<0x1f, MRMXr, (outs), (ins GR16:$zero),
+ "nop{w}\t$zero", []>, TB, OpSize16;
+ def NOOPLr : I<0x1f, MRMXr, (outs), (ins GR32:$zero),
+ "nop{l}\t$zero", []>, TB, OpSize32;
+ def NOOPQr : RI<0x1f, MRMXr, (outs), (ins GR64:$zero),
+ "nop{q}\t$zero", []>, TB, Requires<[In64BitMode]>;
+}
+
+
+// Constructing a stack frame.
+def ENTER : Ii16<0xC8, RawFrmImm8, (outs), (ins i16imm:$len, i8imm:$lvl),
+ "enter\t$len, $lvl", []>, Sched<[WriteMicrocoded]>;
+
+let SchedRW = [WriteALU] in {
+let Defs = [EBP, ESP], Uses = [EBP, ESP], mayLoad = 1, hasSideEffects=0 in
+def LEAVE : I<0xC9, RawFrm, (outs), (ins), "leave", []>,
+ Requires<[Not64BitMode]>;
+
+let Defs = [RBP,RSP], Uses = [RBP,RSP], mayLoad = 1, hasSideEffects = 0 in
+def LEAVE64 : I<0xC9, RawFrm, (outs), (ins), "leave", []>,
+ Requires<[In64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//
+
+let isBarrier = 1, hasSideEffects = 1, usesCustomInserter = 1,
+ SchedRW = [WriteSystem] in
+ def Int_eh_sjlj_setup_dispatch
+ : PseudoI<(outs), (ins), [(X86eh_sjlj_setup_dispatch)]>;
+
+let Defs = [ESP], Uses = [ESP], hasSideEffects=0 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
+def POP16r : I<0x58, AddRegFrm, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+ OpSize16;
+def POP32r : I<0x58, AddRegFrm, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>,
+ OpSize32, Requires<[Not64BitMode]>;
+// Long form for the disassembler.
+let isCodeGenOnly = 1, ForceDisassemble = 1 in {
+def POP16rmr: I<0x8F, MRM0r, (outs GR16:$reg), (ins), "pop{w}\t$reg", []>,
+ OpSize16;
+def POP32rmr: I<0x8F, MRM0r, (outs GR32:$reg), (ins), "pop{l}\t$reg", []>,
+ OpSize32, Requires<[Not64BitMode]>;
+} // isCodeGenOnly = 1, ForceDisassemble = 1
+} // mayLoad, SchedRW
+let mayStore = 1, mayLoad = 1, SchedRW = [WriteCopy] in {
+def POP16rmm: I<0x8F, MRM0m, (outs), (ins i16mem:$dst), "pop{w}\t$dst", []>,
+ OpSize16;
+def POP32rmm: I<0x8F, MRM0m, (outs), (ins i32mem:$dst), "pop{l}\t$dst", []>,
+ OpSize32, Requires<[Not64BitMode]>;
+} // mayStore, mayLoad, SchedRW
+
+let mayStore = 1, SchedRW = [WriteStore] in {
+def PUSH16r : I<0x50, AddRegFrm, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+ OpSize16;
+def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>,
+ OpSize32, Requires<[Not64BitMode]>;
+// Long form for the disassembler.
+let isCodeGenOnly = 1, ForceDisassemble = 1 in {
+def PUSH16rmr: I<0xFF, MRM6r, (outs), (ins GR16:$reg), "push{w}\t$reg",[]>,
+ OpSize16;
+def PUSH32rmr: I<0xFF, MRM6r, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>,
+ OpSize32, Requires<[Not64BitMode]>;
+} // isCodeGenOnly = 1, ForceDisassemble = 1
+
+def PUSH16i8 : Ii8<0x6a, RawFrm, (outs), (ins i16i8imm:$imm),
+ "push{w}\t$imm", []>, OpSize16;
+def PUSH16i : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{w}\t$imm", []>, OpSize16;
+
+def PUSH32i8 : Ii8<0x6a, RawFrm, (outs), (ins i32i8imm:$imm),
+ "push{l}\t$imm", []>, OpSize32,
+ Requires<[Not64BitMode]>;
+def PUSH32i : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+ "push{l}\t$imm", []>, OpSize32,
+ Requires<[Not64BitMode]>;
+} // mayStore, SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
+def PUSH16rmm: I<0xFF, MRM6m, (outs), (ins i16mem:$src), "push{w}\t$src", []>,
+ OpSize16;
+def PUSH32rmm: I<0xFF, MRM6m, (outs), (ins i32mem:$src), "push{l}\t$src", []>,
+ OpSize32, Requires<[Not64BitMode]>;
+} // mayLoad, mayStore, SchedRW
+
+}
+
+let isPseudo = 1, mayLoad = 1, mayStore = 1,
+ SchedRW = [WriteRMW], Defs = [ESP] in {
+ let Uses = [ESP] in
+ def RDFLAGS32 : PseudoI<(outs GR32:$dst), (ins),
+ [(set GR32:$dst, (int_x86_flags_read_u32))]>,
+ Requires<[Not64BitMode]>;
+
+ let Uses = [RSP] in
+ def RDFLAGS64 : PseudoI<(outs GR64:$dst), (ins),
+ [(set GR64:$dst, (int_x86_flags_read_u64))]>,
+ Requires<[In64BitMode]>;
+}
+
+let isPseudo = 1, mayLoad = 1, mayStore = 1,
+ SchedRW = [WriteRMW] in {
+ let Defs = [ESP, EFLAGS, DF], Uses = [ESP] in
+ def WRFLAGS32 : PseudoI<(outs), (ins GR32:$src),
+ [(int_x86_flags_write_u32 GR32:$src)]>,
+ Requires<[Not64BitMode]>;
+
+ let Defs = [RSP, EFLAGS, DF], Uses = [RSP] in
+ def WRFLAGS64 : PseudoI<(outs), (ins GR64:$src),
+ [(int_x86_flags_write_u64 GR64:$src)]>,
+ Requires<[In64BitMode]>;
+}
+
+let Defs = [ESP, EFLAGS, DF], Uses = [ESP], mayLoad = 1, hasSideEffects=0,
+ SchedRW = [WriteLoad] in {
+def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize16;
+def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>, OpSize32,
+ Requires<[Not64BitMode]>;
+}
+
+let Defs = [ESP], Uses = [ESP, EFLAGS, DF], mayStore = 1, hasSideEffects=0,
+ SchedRW = [WriteStore] in {
+def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize16;
+def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>, OpSize32,
+ Requires<[Not64BitMode]>;
+}
+
+let Defs = [RSP], Uses = [RSP], hasSideEffects=0 in {
+let mayLoad = 1, SchedRW = [WriteLoad] in {
+def POP64r : I<0x58, AddRegFrm, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>,
+ OpSize32, Requires<[In64BitMode]>;
+// Long form for the disassembler.
+let isCodeGenOnly = 1, ForceDisassemble = 1 in {
+def POP64rmr: I<0x8F, MRM0r, (outs GR64:$reg), (ins), "pop{q}\t$reg", []>,
+ OpSize32, Requires<[In64BitMode]>;
+} // isCodeGenOnly = 1, ForceDisassemble = 1
+} // mayLoad, SchedRW
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in
+def POP64rmm: I<0x8F, MRM0m, (outs), (ins i64mem:$dst), "pop{q}\t$dst", []>,
+ OpSize32, Requires<[In64BitMode]>;
+let mayStore = 1, SchedRW = [WriteStore] in {
+def PUSH64r : I<0x50, AddRegFrm, (outs), (ins GR64:$reg), "push{q}\t$reg", []>,
+ OpSize32, Requires<[In64BitMode]>;
+// Long form for the disassembler.
+let isCodeGenOnly = 1, ForceDisassemble = 1 in {
+def PUSH64rmr: I<0xFF, MRM6r, (outs), (ins GR64:$reg), "push{q}\t$reg", []>,
+ OpSize32, Requires<[In64BitMode]>;
+} // isCodeGenOnly = 1, ForceDisassemble = 1
+} // mayStore, SchedRW
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteCopy] in {
+def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>,
+ OpSize32, Requires<[In64BitMode]>;
+} // mayLoad, mayStore, SchedRW
+}
+
+let Defs = [RSP], Uses = [RSP], hasSideEffects = 0, mayStore = 1,
+ SchedRW = [WriteStore] in {
+def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
+ "push{q}\t$imm", []>, OpSize32,
+ Requires<[In64BitMode]>;
+def PUSH64i32 : Ii32S<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
+ "push{q}\t$imm", []>, OpSize32,
+ Requires<[In64BitMode]>;
+}
+
+let Defs = [RSP, EFLAGS, DF], Uses = [RSP], mayLoad = 1, hasSideEffects=0 in
+def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+ OpSize32, Requires<[In64BitMode]>, Sched<[WriteLoad]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS, DF], mayStore = 1, hasSideEffects=0 in
+def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+ OpSize32, Requires<[In64BitMode]>, Sched<[WriteStore]>;
+
+let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
+ mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteLoad] in {
+def POPA32 : I<0x61, RawFrm, (outs), (ins), "popal", []>,
+ OpSize32, Requires<[Not64BitMode]>;
+def POPA16 : I<0x61, RawFrm, (outs), (ins), "popaw", []>,
+ OpSize16, Requires<[Not64BitMode]>;
+}
+let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
+ mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in {
+def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pushal", []>,
+ OpSize32, Requires<[Not64BitMode]>;
+def PUSHA16 : I<0x60, RawFrm, (outs), (ins), "pushaw", []>,
+ OpSize16, Requires<[Not64BitMode]>;
+}
+
+let Constraints = "$src = $dst", SchedRW = [WriteBSWAP32] in {
+// This instruction is a consequence of BSWAP32r observing operand size. The
+// encoding is valid, but the behavior is undefined.
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+def BSWAP16r_BAD : I<0xC8, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
+ "bswap{w}\t$dst", []>, OpSize16, TB;
+// GR32 = bswap GR32
+def BSWAP32r : I<0xC8, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
+ "bswap{l}\t$dst",
+ [(set GR32:$dst, (bswap GR32:$src))]>, OpSize32, TB;
+
+let SchedRW = [WriteBSWAP64] in
+def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+ "bswap{q}\t$dst",
+ [(set GR64:$dst, (bswap GR64:$src))]>, TB;
+} // Constraints = "$src = $dst", SchedRW
+
+// Bit scan instructions.
+let Defs = [EFLAGS] in {
+def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "bsf{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>,
+ PS, OpSize16, Sched<[WriteBSF]>;
+def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bsf{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>,
+ PS, OpSize16, Sched<[WriteBSFLd]>;
+def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "bsf{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>,
+ PS, OpSize32, Sched<[WriteBSF]>;
+def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bsf{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>,
+ PS, OpSize32, Sched<[WriteBSFLd]>;
+def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>,
+ PS, Sched<[WriteBSF]>;
+def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsf{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>,
+ PS, Sched<[WriteBSFLd]>;
+
+def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "bsr{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>,
+ PS, OpSize16, Sched<[WriteBSR]>;
+def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bsr{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>,
+ PS, OpSize16, Sched<[WriteBSRLd]>;
+def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "bsr{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>,
+ PS, OpSize32, Sched<[WriteBSR]>;
+def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bsr{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>,
+ PS, OpSize32, Sched<[WriteBSRLd]>;
+def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>,
+ PS, Sched<[WriteBSR]>;
+def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "bsr{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>,
+ PS, Sched<[WriteBSRLd]>;
+} // Defs = [EFLAGS]
+
+let SchedRW = [WriteMicrocoded] in {
+let Defs = [EDI,ESI], Uses = [EDI,ESI,DF] in {
+def MOVSB : I<0xA4, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src),
+ "movsb\t{$src, $dst|$dst, $src}", []>;
+def MOVSW : I<0xA5, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src),
+ "movsw\t{$src, $dst|$dst, $src}", []>, OpSize16;
+def MOVSL : I<0xA5, RawFrmDstSrc, (outs), (ins dstidx32:$dst, srcidx32:$src),
+ "movs{l|d}\t{$src, $dst|$dst, $src}", []>, OpSize32;
+def MOVSQ : RI<0xA5, RawFrmDstSrc, (outs), (ins dstidx64:$dst, srcidx64:$src),
+ "movsq\t{$src, $dst|$dst, $src}", []>,
+ Requires<[In64BitMode]>;
+}
+
+let Defs = [EDI], Uses = [AL,EDI,DF] in
+def STOSB : I<0xAA, RawFrmDst, (outs), (ins dstidx8:$dst),
+ "stosb\t{%al, $dst|$dst, al}", []>;
+let Defs = [EDI], Uses = [AX,EDI,DF] in
+def STOSW : I<0xAB, RawFrmDst, (outs), (ins dstidx16:$dst),
+ "stosw\t{%ax, $dst|$dst, ax}", []>, OpSize16;
+let Defs = [EDI], Uses = [EAX,EDI,DF] in
+def STOSL : I<0xAB, RawFrmDst, (outs), (ins dstidx32:$dst),
+ "stos{l|d}\t{%eax, $dst|$dst, eax}", []>, OpSize32;
+let Defs = [RDI], Uses = [RAX,RDI,DF] in
+def STOSQ : RI<0xAB, RawFrmDst, (outs), (ins dstidx64:$dst),
+ "stosq\t{%rax, $dst|$dst, rax}", []>,
+ Requires<[In64BitMode]>;
+
+let Defs = [EDI,EFLAGS], Uses = [AL,EDI,DF] in
+def SCASB : I<0xAE, RawFrmDst, (outs), (ins dstidx8:$dst),
+ "scasb\t{$dst, %al|al, $dst}", []>;
+let Defs = [EDI,EFLAGS], Uses = [AX,EDI,DF] in
+def SCASW : I<0xAF, RawFrmDst, (outs), (ins dstidx16:$dst),
+ "scasw\t{$dst, %ax|ax, $dst}", []>, OpSize16;
+let Defs = [EDI,EFLAGS], Uses = [EAX,EDI,DF] in
+def SCASL : I<0xAF, RawFrmDst, (outs), (ins dstidx32:$dst),
+ "scas{l|d}\t{$dst, %eax|eax, $dst}", []>, OpSize32;
+let Defs = [EDI,EFLAGS], Uses = [RAX,EDI,DF] in
+def SCASQ : RI<0xAF, RawFrmDst, (outs), (ins dstidx64:$dst),
+ "scasq\t{$dst, %rax|rax, $dst}", []>,
+ Requires<[In64BitMode]>;
+
+let Defs = [EDI,ESI,EFLAGS], Uses = [EDI,ESI,DF] in {
+def CMPSB : I<0xA6, RawFrmDstSrc, (outs), (ins dstidx8:$dst, srcidx8:$src),
+ "cmpsb\t{$dst, $src|$src, $dst}", []>;
+def CMPSW : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx16:$dst, srcidx16:$src),
+ "cmpsw\t{$dst, $src|$src, $dst}", []>, OpSize16;
+def CMPSL : I<0xA7, RawFrmDstSrc, (outs), (ins dstidx32:$dst, srcidx32:$src),
+ "cmps{l|d}\t{$dst, $src|$src, $dst}", []>, OpSize32;
+def CMPSQ : RI<0xA7, RawFrmDstSrc, (outs), (ins dstidx64:$dst, srcidx64:$src),
+ "cmpsq\t{$dst, $src|$src, $dst}", []>,
+ Requires<[In64BitMode]>;
+}
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+let SchedRW = [WriteMove] in {
+let hasSideEffects = 0, isMoveReg = 1 in {
+def MOV8rr : I<0x88, MRMDestReg, (outs GR8 :$dst), (ins GR8 :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", []>;
+def MOV16rr : I<0x89, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
+def MOV32rr : I<0x89, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
+def MOV64rr : RI<0x89, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
+def MOV8ri : Ii8 <0xB0, AddRegFrm, (outs GR8 :$dst), (ins i8imm :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(set GR8:$dst, imm:$src)]>;
+def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, imm:$src)]>, OpSize16;
+def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, imm:$src)]>, OpSize32;
+def MOV64ri32 : RIi32S<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, i64immSExt32:$src)]>;
+}
+let isReMaterializable = 1, isMoveImm = 1 in {
+def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+ "movabs{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, imm:$src)]>;
+}
+
+// Longer forms that use a ModR/M byte. Needed for disassembler
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
+def MOV8ri_alt : Ii8 <0xC6, MRM0r, (outs GR8 :$dst), (ins i8imm :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", []>;
+def MOV16ri_alt : Ii16<0xC7, MRM0r, (outs GR16:$dst), (ins i16imm:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
+def MOV32ri_alt : Ii32<0xC7, MRM0r, (outs GR32:$dst), (ins i32imm:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
+}
+} // SchedRW
+
+let SchedRW = [WriteStore] in {
+def MOV8mi : Ii8 <0xC6, MRM0m, (outs), (ins i8mem :$dst, i8imm :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(store (i8 imm_su:$src), addr:$dst)]>;
+def MOV16mi : Ii16<0xC7, MRM0m, (outs), (ins i16mem:$dst, i16imm:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(store (i16 imm_su:$src), addr:$dst)]>, OpSize16;
+def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(store (i32 imm_su:$src), addr:$dst)]>, OpSize32;
+def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store i64immSExt32_su:$src, addr:$dst)]>,
+ Requires<[In64BitMode]>;
+} // SchedRW
+
+def : Pat<(i32 relocImm:$src), (MOV32ri relocImm:$src)>;
+def : Pat<(i64 relocImm:$src), (MOV64ri relocImm:$src)>;
+
+def : Pat<(store (i8 relocImm8_su:$src), addr:$dst),
+ (MOV8mi addr:$dst, relocImm8_su:$src)>;
+def : Pat<(store (i16 relocImm16_su:$src), addr:$dst),
+ (MOV16mi addr:$dst, relocImm16_su:$src)>;
+def : Pat<(store (i32 relocImm32_su:$src), addr:$dst),
+ (MOV32mi addr:$dst, relocImm32_su:$src)>;
+def : Pat<(store (i64 i64relocImmSExt32_su:$src), addr:$dst),
+ (MOV64mi32 addr:$dst, i64immSExt32_su:$src)>;
+
+let hasSideEffects = 0 in {
+
+/// Memory offset versions of moves. The immediate is an address mode sized
+/// offset from the segment base.
+let SchedRW = [WriteALU] in {
+let mayLoad = 1 in {
+let Defs = [AL] in
+def MOV8ao32 : Ii32<0xA0, RawFrmMemOffs, (outs), (ins offset32_8:$src),
+ "mov{b}\t{$src, %al|al, $src}", []>,
+ AdSize32;
+let Defs = [AX] in
+def MOV16ao32 : Ii32<0xA1, RawFrmMemOffs, (outs), (ins offset32_16:$src),
+ "mov{w}\t{$src, %ax|ax, $src}", []>,
+ OpSize16, AdSize32;
+let Defs = [EAX] in
+def MOV32ao32 : Ii32<0xA1, RawFrmMemOffs, (outs), (ins offset32_32:$src),
+ "mov{l}\t{$src, %eax|eax, $src}", []>,
+ OpSize32, AdSize32;
+let Defs = [RAX] in
+def MOV64ao32 : RIi32<0xA1, RawFrmMemOffs, (outs), (ins offset32_64:$src),
+ "mov{q}\t{$src, %rax|rax, $src}", []>,
+ AdSize32;
+
+let Defs = [AL] in
+def MOV8ao16 : Ii16<0xA0, RawFrmMemOffs, (outs), (ins offset16_8:$src),
+ "mov{b}\t{$src, %al|al, $src}", []>, AdSize16;
+let Defs = [AX] in
+def MOV16ao16 : Ii16<0xA1, RawFrmMemOffs, (outs), (ins offset16_16:$src),
+ "mov{w}\t{$src, %ax|ax, $src}", []>,
+ OpSize16, AdSize16;
+let Defs = [EAX] in
+def MOV32ao16 : Ii16<0xA1, RawFrmMemOffs, (outs), (ins offset16_32:$src),
+ "mov{l}\t{$src, %eax|eax, $src}", []>,
+ AdSize16, OpSize32;
+} // mayLoad
+let mayStore = 1 in {
+let Uses = [AL] in
+def MOV8o32a : Ii32<0xA2, RawFrmMemOffs, (outs), (ins offset32_8:$dst),
+ "mov{b}\t{%al, $dst|$dst, al}", []>, AdSize32;
+let Uses = [AX] in
+def MOV16o32a : Ii32<0xA3, RawFrmMemOffs, (outs), (ins offset32_16:$dst),
+ "mov{w}\t{%ax, $dst|$dst, ax}", []>,
+ OpSize16, AdSize32;
+let Uses = [EAX] in
+def MOV32o32a : Ii32<0xA3, RawFrmMemOffs, (outs), (ins offset32_32:$dst),
+ "mov{l}\t{%eax, $dst|$dst, eax}", []>,
+ OpSize32, AdSize32;
+let Uses = [RAX] in
+def MOV64o32a : RIi32<0xA3, RawFrmMemOffs, (outs), (ins offset32_64:$dst),
+ "mov{q}\t{%rax, $dst|$dst, rax}", []>,
+ AdSize32;
+
+let Uses = [AL] in
+def MOV8o16a : Ii16<0xA2, RawFrmMemOffs, (outs), (ins offset16_8:$dst),
+ "mov{b}\t{%al, $dst|$dst, al}", []>, AdSize16;
+let Uses = [AX] in
+def MOV16o16a : Ii16<0xA3, RawFrmMemOffs, (outs), (ins offset16_16:$dst),
+ "mov{w}\t{%ax, $dst|$dst, ax}", []>,
+ OpSize16, AdSize16;
+let Uses = [EAX] in
+def MOV32o16a : Ii16<0xA3, RawFrmMemOffs, (outs), (ins offset16_32:$dst),
+ "mov{l}\t{%eax, $dst|$dst, eax}", []>,
+ OpSize32, AdSize16;
+} // mayStore
+
+// These forms all have full 64-bit absolute addresses in their instructions
+// and use the movabs mnemonic to indicate this specific form.
+let mayLoad = 1 in {
+let Defs = [AL] in
+def MOV8ao64 : Ii64<0xA0, RawFrmMemOffs, (outs), (ins offset64_8:$src),
+ "movabs{b}\t{$src, %al|al, $src}", []>,
+ AdSize64;
+let Defs = [AX] in
+def MOV16ao64 : Ii64<0xA1, RawFrmMemOffs, (outs), (ins offset64_16:$src),
+ "movabs{w}\t{$src, %ax|ax, $src}", []>,
+ OpSize16, AdSize64;
+let Defs = [EAX] in
+def MOV32ao64 : Ii64<0xA1, RawFrmMemOffs, (outs), (ins offset64_32:$src),
+ "movabs{l}\t{$src, %eax|eax, $src}", []>,
+ OpSize32, AdSize64;
+let Defs = [RAX] in
+def MOV64ao64 : RIi64<0xA1, RawFrmMemOffs, (outs), (ins offset64_64:$src),
+ "movabs{q}\t{$src, %rax|rax, $src}", []>,
+ AdSize64;
+} // mayLoad
+
+let mayStore = 1 in {
+let Uses = [AL] in
+def MOV8o64a : Ii64<0xA2, RawFrmMemOffs, (outs), (ins offset64_8:$dst),
+ "movabs{b}\t{%al, $dst|$dst, al}", []>,
+ AdSize64;
+let Uses = [AX] in
+def MOV16o64a : Ii64<0xA3, RawFrmMemOffs, (outs), (ins offset64_16:$dst),
+ "movabs{w}\t{%ax, $dst|$dst, ax}", []>,
+ OpSize16, AdSize64;
+let Uses = [EAX] in
+def MOV32o64a : Ii64<0xA3, RawFrmMemOffs, (outs), (ins offset64_32:$dst),
+ "movabs{l}\t{%eax, $dst|$dst, eax}", []>,
+ OpSize32, AdSize64;
+let Uses = [RAX] in
+def MOV64o64a : RIi64<0xA3, RawFrmMemOffs, (outs), (ins offset64_64:$dst),
+ "movabs{q}\t{%rax, $dst|$dst, rax}", []>,
+ AdSize64;
+} // mayStore
+} // SchedRW
+} // hasSideEffects = 0
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
+ SchedRW = [WriteMove], isMoveReg = 1 in {
+def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", []>;
+def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize16;
+def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, OpSize32;
+def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
+def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(set GR8:$dst, (loadi8 addr:$src))]>;
+def MOV16rm : I<0x8B, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (loadi16 addr:$src))]>, OpSize16;
+def MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (loadi32 addr:$src))]>, OpSize32;
+def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (load addr:$src))]>;
+}
+
+let SchedRW = [WriteStore] in {
+def MOV8mr : I<0x88, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src),
+ "mov{b}\t{$src, $dst|$dst, $src}",
+ [(store GR8:$src, addr:$dst)]>;
+def MOV16mr : I<0x89, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "mov{w}\t{$src, $dst|$dst, $src}",
+ [(store GR16:$src, addr:$dst)]>, OpSize16;
+def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}",
+ [(store GR32:$src, addr:$dst)]>, OpSize32;
+def MOV64mr : RI<0x89, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}",
+ [(store GR64:$src, addr:$dst)]>;
+} // SchedRW
+
+// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
+// that they can be used for copying and storing h registers, which can't be
+// encoded when a REX prefix is present.
+let isCodeGenOnly = 1 in {
+let hasSideEffects = 0, isMoveReg = 1 in
+def MOV8rr_NOREX : I<0x88, MRMDestReg,
+ (outs GR8_NOREX:$dst), (ins GR8_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteMove]>;
+let mayStore = 1, hasSideEffects = 0 in
+def MOV8mr_NOREX : I<0x88, MRMDestMem,
+ (outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteStore]>;
+let mayLoad = 1, hasSideEffects = 0,
+ canFoldAsLoad = 1, isReMaterializable = 1 in
+def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
+ (outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
+ "mov{b}\t{$src, $dst|$dst, $src}", []>,
+ Sched<[WriteLoad]>;
+}
+
+
+// Condition code ops, incl. set if equal/not equal/...
+let SchedRW = [WriteLAHFSAHF] in {
+let Defs = [EFLAGS], Uses = [AH], hasSideEffects = 0 in
+def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", []>, // flags = AH
+ Requires<[HasLAHFSAHF]>;
+let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in
+def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>, // AH = flags
+ Requires<[HasLAHFSAHF]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// Bit tests instructions: BT, BTS, BTR, BTC.
+
+let Defs = [EFLAGS] in {
+let SchedRW = [WriteBitTest] in {
+def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>,
+ OpSize16, TB;
+def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>,
+ OpSize32, TB;
+def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB;
+} // SchedRW
+
+// Unlike with the register+register form, the memory+register form of the
+// bt instruction does not ignore the high bits of the index. From ISel's
+// perspective, this is pretty bizarre. Make these instructions disassembly
+// only for now. These instructions are also slow on modern CPUs so that's
+// another reason to avoid generating them.
+
+let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteBitTestRegLd] in {
+ def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ []>, OpSize16, TB;
+ def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ []>, OpSize32, TB;
+ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ []>, TB;
+}
+
+let SchedRW = [WriteBitTest] in {
+def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16u8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR16:$src1, imm:$src2))]>,
+ OpSize16, TB;
+def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32u8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR32:$src1, imm:$src2))]>,
+ OpSize32, TB;
+def BT64ri8 : RIi8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64u8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt GR64:$src1, imm:$src2))]>, TB;
+} // SchedRW
+
+// Note that these instructions aren't slow because that only applies when the
+// other operand is in a register. When it's an immediate, bt is still fast.
+let SchedRW = [WriteBitTestImmLd] in {
+def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
+ "bt{w}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi16 addr:$src1),
+ imm:$src2))]>,
+ OpSize16, TB;
+def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
+ "bt{l}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi32 addr:$src1),
+ imm:$src2))]>,
+ OpSize32, TB;
+def BT64mi8 : RIi8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
+ "bt{q}\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86bt (loadi64 addr:$src1),
+ imm:$src2))]>, TB,
+ Requires<[In64BitMode]>;
+} // SchedRW
+
+let hasSideEffects = 0 in {
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
+def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize16, TB;
+def BTC32rr : I<0xBB, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize32, TB;
+def BTC64rr : RI<0xBB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
+def BTC16mr : I<0xBB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize16, TB;
+def BTC32mr : I<0xBB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize32, TB;
+def BTC64mr : RI<0xBB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+}
+
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
+def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
+def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
+def BTC64ri8 : RIi8<0xBA, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
+def BTC16mi8 : Ii8<0xBA, MRM7m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
+ "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
+def BTC32mi8 : Ii8<0xBA, MRM7m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
+ "btc{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
+def BTC64mi8 : RIi8<0xBA, MRM7m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
+ "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
+ Requires<[In64BitMode]>;
+}
+
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
+def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize16, TB;
+def BTR32rr : I<0xB3, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize32, TB;
+def BTR64rr : RI<0xB3, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
+def BTR16mr : I<0xB3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize16, TB;
+def BTR32mr : I<0xB3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize32, TB;
+def BTR64mr : RI<0xB3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+}
+
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
+def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize16, TB;
+def BTR32ri8 : Ii8<0xBA, MRM6r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize32, TB;
+def BTR64ri8 : RIi8<0xBA, MRM6r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
+def BTR16mi8 : Ii8<0xBA, MRM6m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
+ "btr{w}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize16, TB;
+def BTR32mi8 : Ii8<0xBA, MRM6m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
+ "btr{l}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize32, TB;
+def BTR64mi8 : RIi8<0xBA, MRM6m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
+ "btr{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
+ Requires<[In64BitMode]>;
+}
+
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
+def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize16, TB;
+def BTS32rr : I<0xAB, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize32, TB;
+def BTS64rr : RI<0xAB, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetRegRMW] in {
+def BTS16mr : I<0xAB, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize16, TB;
+def BTS32mr : I<0xAB, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize32, TB;
+def BTS64mr : RI<0xAB, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+}
+
+let SchedRW = [WriteBitTestSet], Constraints = "$src1 = $dst" in {
+def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16u8imm:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
+def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32u8imm:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
+def BTS64ri8 : RIi8<0xBA, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i64u8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, SchedRW = [WriteBitTestSetImmRMW] in {
+def BTS16mi8 : Ii8<0xBA, MRM5m, (outs), (ins i16mem:$src1, i16u8imm:$src2),
+ "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB;
+def BTS32mi8 : Ii8<0xBA, MRM5m, (outs), (ins i32mem:$src1, i32u8imm:$src2),
+ "bts{l}\t{$src2, $src1|$src1, $src2}", []>, OpSize32, TB;
+def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64u8imm:$src2),
+ "bts{q}\t{$src2, $src1|$src1, $src2}", []>, TB,
+ Requires<[In64BitMode]>;
+}
+} // hasSideEffects = 0
+} // Defs = [EFLAGS]
+
+
+//===----------------------------------------------------------------------===//
+// Atomic support
+//
+
+// Atomic swap. These are just normal xchg instructions. But since a memory
+// operand is referenced, the atomicity is ensured.
+multiclass ATOMIC_SWAP<bits<8> opc8, bits<8> opc, string mnemonic, string frag> {
+ let Constraints = "$val = $dst", SchedRW = [WriteALULd, WriteRMW] in {
+ def NAME#8rm : I<opc8, MRMSrcMem, (outs GR8:$dst),
+ (ins GR8:$val, i8mem:$ptr),
+ !strconcat(mnemonic, "{b}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR8:$dst,
+ (!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val))]>;
+ def NAME#16rm : I<opc, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$val, i16mem:$ptr),
+ !strconcat(mnemonic, "{w}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR16:$dst,
+ (!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val))]>,
+ OpSize16;
+ def NAME#32rm : I<opc, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$val, i32mem:$ptr),
+ !strconcat(mnemonic, "{l}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR32:$dst,
+ (!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val))]>,
+ OpSize32;
+ def NAME#64rm : RI<opc, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$val, i64mem:$ptr),
+ !strconcat(mnemonic, "{q}\t{$val, $ptr|$ptr, $val}"),
+ [(set
+ GR64:$dst,
+ (!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val))]>;
+ }
+}
+
+defm XCHG : ATOMIC_SWAP<0x86, 0x87, "xchg", "atomic_swap">;
+
+// Swap between registers.
+let SchedRW = [WriteXCHG] in {
+let Constraints = "$src1 = $dst1, $src2 = $dst2", hasSideEffects = 0 in {
+def XCHG8rr : I<0x86, MRMSrcReg, (outs GR8:$dst1, GR8:$dst2),
+ (ins GR8:$src1, GR8:$src2),
+ "xchg{b}\t{$src2, $src1|$src1, $src2}", []>;
+def XCHG16rr : I<0x87, MRMSrcReg, (outs GR16:$dst1, GR16:$dst2),
+ (ins GR16:$src1, GR16:$src2),
+ "xchg{w}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize16;
+def XCHG32rr : I<0x87, MRMSrcReg, (outs GR32:$dst1, GR32:$dst2),
+ (ins GR32:$src1, GR32:$src2),
+ "xchg{l}\t{$src2, $src1|$src1, $src2}", []>,
+ OpSize32;
+def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst1, GR64:$dst2),
+ (ins GR64:$src1 ,GR64:$src2),
+ "xchg{q}\t{$src2, $src1|$src1, $src2}", []>;
+}
+
+// Swap between EAX and other registers.
+let Constraints = "$src = $dst", hasSideEffects = 0 in {
+let Uses = [AX], Defs = [AX] in
+def XCHG16ar : I<0x90, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
+ "xchg{w}\t{$src, %ax|ax, $src}", []>, OpSize16;
+let Uses = [EAX], Defs = [EAX] in
+def XCHG32ar : I<0x90, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
+ "xchg{l}\t{$src, %eax|eax, $src}", []>, OpSize32;
+let Uses = [RAX], Defs = [RAX] in
+def XCHG64ar : RI<0x90, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
+ "xchg{q}\t{$src, %rax|rax, $src}", []>;
+}
+} // SchedRW
+
+let hasSideEffects = 0, Constraints = "$src1 = $dst1, $src2 = $dst2",
+ Defs = [EFLAGS], SchedRW = [WriteXCHG] in {
+def XADD8rr : I<0xC0, MRMDestReg, (outs GR8:$dst1, GR8:$dst2),
+ (ins GR8:$src1, GR8:$src2),
+ "xadd{b}\t{$src2, $src1|$src1, $src2}", []>, TB;
+def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst1, GR16:$dst2),
+ (ins GR16:$src1, GR16:$src2),
+ "xadd{w}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize16;
+def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst1, GR32:$dst2),
+ (ins GR32:$src1, GR32:$src2),
+ "xadd{l}\t{$src2, $src1|$src1, $src2}", []>, TB, OpSize32;
+def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst1, GR64:$dst2),
+ (ins GR64:$src1, GR64:$src2),
+ "xadd{q}\t{$src2, $src1|$src1, $src2}", []>, TB;
+} // SchedRW
+
+let mayLoad = 1, mayStore = 1, hasSideEffects = 0, Constraints = "$val = $dst",
+ Defs = [EFLAGS], SchedRW = [WriteALULd, WriteRMW] in {
+def XADD8rm : I<0xC0, MRMSrcMem, (outs GR8:$dst),
+ (ins GR8:$val, i8mem:$ptr),
+ "xadd{b}\t{$val, $ptr|$ptr, $val}", []>, TB;
+def XADD16rm : I<0xC1, MRMSrcMem, (outs GR16:$dst),
+ (ins GR16:$val, i16mem:$ptr),
+ "xadd{w}\t{$val, $ptr|$ptr, $val}", []>, TB,
+ OpSize16;
+def XADD32rm : I<0xC1, MRMSrcMem, (outs GR32:$dst),
+ (ins GR32:$val, i32mem:$ptr),
+ "xadd{l}\t{$val, $ptr|$ptr, $val}", []>, TB,
+ OpSize32;
+def XADD64rm : RI<0xC1, MRMSrcMem, (outs GR64:$dst),
+ (ins GR64:$val, i64mem:$ptr),
+ "xadd{q}\t{$val, $ptr|$ptr, $val}", []>, TB;
+
+}
+
+let SchedRW = [WriteCMPXCHG], hasSideEffects = 0 in {
+let Defs = [AL, EFLAGS], Uses = [AL] in
+def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
+ "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
+let Defs = [AX, EFLAGS], Uses = [AX] in
+def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
+let Defs = [EAX, EFLAGS], Uses = [EAX] in
+def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
+ "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
+let Defs = [RAX, EFLAGS], Uses = [RAX] in
+def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+} // SchedRW, hasSideEffects
+
+let SchedRW = [WriteCMPXCHGRMW], mayLoad = 1, mayStore = 1,
+ hasSideEffects = 0 in {
+let Defs = [AL, EFLAGS], Uses = [AL] in
+def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
+ "cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
+let Defs = [AX, EFLAGS], Uses = [AX] in
+def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize16;
+let Defs = [EAX, EFLAGS], Uses = [EAX] in
+def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB, OpSize32;
+let Defs = [RAX, EFLAGS], Uses = [RAX] in
+def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+
+let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
+def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
+ "cmpxchg8b\t$dst", []>, TB, Requires<[HasCX8]>;
+
+let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX] in
+// NOTE: In64BitMode check needed for the AssemblerPredicate.
+def CMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$dst),
+ "cmpxchg16b\t$dst", []>,
+ TB, Requires<[HasCX16,In64BitMode]>;
+} // SchedRW, mayLoad, mayStore, hasSideEffects
+
+
+// Lock instruction prefix
+let SchedRW = [WriteMicrocoded] in
+def LOCK_PREFIX : I<0xF0, PrefixByte, (outs), (ins), "lock", []>;
+
+let SchedRW = [WriteNop] in {
+
+// Rex64 instruction prefix
+def REX64_PREFIX : I<0x48, PrefixByte, (outs), (ins), "rex64", []>,
+ Requires<[In64BitMode]>;
+
+// Data16 instruction prefix
+def DATA16_PREFIX : I<0x66, PrefixByte, (outs), (ins), "data16", []>;
+} // SchedRW
+
+// Repeat string operation instruction prefixes
+let Defs = [ECX], Uses = [ECX,DF], SchedRW = [WriteMicrocoded] in {
+// Repeat (used with INS, OUTS, MOVS, LODS and STOS)
+def REP_PREFIX : I<0xF3, PrefixByte, (outs), (ins), "rep", []>;
+// Repeat while not equal (used with CMPS and SCAS)
+def REPNE_PREFIX : I<0xF2, PrefixByte, (outs), (ins), "repne", []>;
+}
+
+// String manipulation instructions
+let SchedRW = [WriteMicrocoded] in {
+let Defs = [AL,ESI], Uses = [ESI,DF] in
+def LODSB : I<0xAC, RawFrmSrc, (outs), (ins srcidx8:$src),
+ "lodsb\t{$src, %al|al, $src}", []>;
+let Defs = [AX,ESI], Uses = [ESI,DF] in
+def LODSW : I<0xAD, RawFrmSrc, (outs), (ins srcidx16:$src),
+ "lodsw\t{$src, %ax|ax, $src}", []>, OpSize16;
+let Defs = [EAX,ESI], Uses = [ESI,DF] in
+def LODSL : I<0xAD, RawFrmSrc, (outs), (ins srcidx32:$src),
+ "lods{l|d}\t{$src, %eax|eax, $src}", []>, OpSize32;
+let Defs = [RAX,ESI], Uses = [ESI,DF] in
+def LODSQ : RI<0xAD, RawFrmSrc, (outs), (ins srcidx64:$src),
+ "lodsq\t{$src, %rax|rax, $src}", []>,
+ Requires<[In64BitMode]>;
+}
+
+let SchedRW = [WriteSystem] in {
+let Defs = [ESI], Uses = [DX,ESI,DF] in {
+def OUTSB : I<0x6E, RawFrmSrc, (outs), (ins srcidx8:$src),
+ "outsb\t{$src, %dx|dx, $src}", []>;
+def OUTSW : I<0x6F, RawFrmSrc, (outs), (ins srcidx16:$src),
+ "outsw\t{$src, %dx|dx, $src}", []>, OpSize16;
+def OUTSL : I<0x6F, RawFrmSrc, (outs), (ins srcidx32:$src),
+ "outs{l|d}\t{$src, %dx|dx, $src}", []>, OpSize32;
+}
+
+let Defs = [EDI], Uses = [DX,EDI,DF] in {
+def INSB : I<0x6C, RawFrmDst, (outs), (ins dstidx8:$dst),
+ "insb\t{%dx, $dst|$dst, dx}", []>;
+def INSW : I<0x6D, RawFrmDst, (outs), (ins dstidx16:$dst),
+ "insw\t{%dx, $dst|$dst, dx}", []>, OpSize16;
+def INSL : I<0x6D, RawFrmDst, (outs), (ins dstidx32:$dst),
+ "ins{l|d}\t{%dx, $dst|$dst, dx}", []>, OpSize32;
+}
+}
+
+// EFLAGS management instructions.
+let SchedRW = [WriteALU], Defs = [EFLAGS], Uses = [EFLAGS] in {
+def CLC : I<0xF8, RawFrm, (outs), (ins), "clc", []>;
+def STC : I<0xF9, RawFrm, (outs), (ins), "stc", []>;
+def CMC : I<0xF5, RawFrm, (outs), (ins), "cmc", []>;
+}
+
+// DF management instructions.
+let SchedRW = [WriteALU], Defs = [DF] in {
+def CLD : I<0xFC, RawFrm, (outs), (ins), "cld", []>;
+def STD : I<0xFD, RawFrm, (outs), (ins), "std", []>;
+}
+
+// Table lookup instructions
+let Uses = [AL,EBX], Defs = [AL], hasSideEffects = 0, mayLoad = 1 in
+def XLAT : I<0xD7, RawFrm, (outs), (ins), "xlatb", []>, Sched<[WriteLoad]>;
+
+let SchedRW = [WriteMicrocoded] in {
+// ASCII Adjust After Addition
+let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in
+def AAA : I<0x37, RawFrm, (outs), (ins), "aaa", []>,
+ Requires<[Not64BitMode]>;
+
+// ASCII Adjust AX Before Division
+let Uses = [AX], Defs = [AX,EFLAGS], hasSideEffects = 0 in
+def AAD8i8 : Ii8<0xD5, RawFrm, (outs), (ins i8imm:$src),
+ "aad\t$src", []>, Requires<[Not64BitMode]>;
+
+// ASCII Adjust AX After Multiply
+let Uses = [AL], Defs = [AX,EFLAGS], hasSideEffects = 0 in
+def AAM8i8 : Ii8<0xD4, RawFrm, (outs), (ins i8imm:$src),
+ "aam\t$src", []>, Requires<[Not64BitMode]>;
+
+// ASCII Adjust AL After Subtraction - sets
+let Uses = [AL,EFLAGS], Defs = [AX,EFLAGS], hasSideEffects = 0 in
+def AAS : I<0x3F, RawFrm, (outs), (ins), "aas", []>,
+ Requires<[Not64BitMode]>;
+
+// Decimal Adjust AL after Addition
+let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in
+def DAA : I<0x27, RawFrm, (outs), (ins), "daa", []>,
+ Requires<[Not64BitMode]>;
+
+// Decimal Adjust AL after Subtraction
+let Uses = [AL,EFLAGS], Defs = [AL,EFLAGS], hasSideEffects = 0 in
+def DAS : I<0x2F, RawFrm, (outs), (ins), "das", []>,
+ Requires<[Not64BitMode]>;
+} // SchedRW
+
+let SchedRW = [WriteSystem] in {
+// Check Array Index Against Bounds
+// Note: "bound" does not have reversed operands in at&t syntax.
+def BOUNDS16rm : I<0x62, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "bound\t$dst, $src", []>, OpSize16,
+ Requires<[Not64BitMode]>;
+def BOUNDS32rm : I<0x62, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "bound\t$dst, $src", []>, OpSize32,
+ Requires<[Not64BitMode]>;
+
+// Adjust RPL Field of Segment Selector
+def ARPL16rr : I<0x63, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
+ "arpl\t{$src, $dst|$dst, $src}", []>,
+ Requires<[Not64BitMode]>;
+let mayStore = 1 in
+def ARPL16mr : I<0x63, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "arpl\t{$src, $dst|$dst, $src}", []>,
+ Requires<[Not64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// MOVBE Instructions
+//
+let Predicates = [HasMOVBE] in {
+ let SchedRW = [WriteALULd] in {
+ def MOVBE16rm : I<0xF0, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "movbe{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (bswap (loadi16 addr:$src)))]>,
+ OpSize16, T8PS;
+ def MOVBE32rm : I<0xF0, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "movbe{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bswap (loadi32 addr:$src)))]>,
+ OpSize32, T8PS;
+ def MOVBE64rm : RI<0xF0, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "movbe{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (bswap (loadi64 addr:$src)))]>,
+ T8PS;
+ }
+ let SchedRW = [WriteStore] in {
+ def MOVBE16mr : I<0xF1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
+ "movbe{w}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR16:$src), addr:$dst)]>,
+ OpSize16, T8PS;
+ def MOVBE32mr : I<0xF1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movbe{l}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR32:$src), addr:$dst)]>,
+ OpSize32, T8PS;
+ def MOVBE64mr : RI<0xF1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movbe{q}\t{$src, $dst|$dst, $src}",
+ [(store (bswap GR64:$src), addr:$dst)]>,
+ T8PS;
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// RDRAND Instruction
+//
+let Predicates = [HasRDRAND], Defs = [EFLAGS], SchedRW = [WriteSystem] in {
+ def RDRAND16r : I<0xC7, MRM6r, (outs GR16:$dst), (ins),
+ "rdrand{w}\t$dst", [(set GR16:$dst, EFLAGS, (X86rdrand))]>,
+ OpSize16, PS;
+ def RDRAND32r : I<0xC7, MRM6r, (outs GR32:$dst), (ins),
+ "rdrand{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86rdrand))]>,
+ OpSize32, PS;
+ def RDRAND64r : RI<0xC7, MRM6r, (outs GR64:$dst), (ins),
+ "rdrand{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86rdrand))]>,
+ PS;
+}
+
+//===----------------------------------------------------------------------===//
+// RDSEED Instruction
+//
+let Predicates = [HasRDSEED], Defs = [EFLAGS], SchedRW = [WriteSystem] in {
+ def RDSEED16r : I<0xC7, MRM7r, (outs GR16:$dst), (ins), "rdseed{w}\t$dst",
+ [(set GR16:$dst, EFLAGS, (X86rdseed))]>, OpSize16, PS;
+ def RDSEED32r : I<0xC7, MRM7r, (outs GR32:$dst), (ins), "rdseed{l}\t$dst",
+ [(set GR32:$dst, EFLAGS, (X86rdseed))]>, OpSize32, PS;
+ def RDSEED64r : RI<0xC7, MRM7r, (outs GR64:$dst), (ins), "rdseed{q}\t$dst",
+ [(set GR64:$dst, EFLAGS, (X86rdseed))]>, PS;
+}
+
+//===----------------------------------------------------------------------===//
+// LZCNT Instruction
+//
+let Predicates = [HasLZCNT], Defs = [EFLAGS] in {
+ def LZCNT16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "lzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctlz GR16:$src)), (implicit EFLAGS)]>,
+ XS, OpSize16, Sched<[WriteLZCNT]>;
+ def LZCNT16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "lzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (ctlz (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, XS, OpSize16, Sched<[WriteLZCNTLd]>;
+
+ def LZCNT32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "lzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctlz GR32:$src)), (implicit EFLAGS)]>,
+ XS, OpSize32, Sched<[WriteLZCNT]>;
+ def LZCNT32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "lzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (ctlz (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS, OpSize32, Sched<[WriteLZCNTLd]>;
+
+ def LZCNT64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "lzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctlz GR64:$src)), (implicit EFLAGS)]>,
+ XS, Sched<[WriteLZCNT]>;
+ def LZCNT64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "lzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (ctlz (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS, Sched<[WriteLZCNTLd]>;
+}
+
+//===----------------------------------------------------------------------===//
+// BMI Instructions
+//
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ def TZCNT16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+ "tzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (cttz GR16:$src)), (implicit EFLAGS)]>,
+ XS, OpSize16, Sched<[WriteTZCNT]>;
+ def TZCNT16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
+ "tzcnt{w}\t{$src, $dst|$dst, $src}",
+ [(set GR16:$dst, (cttz (loadi16 addr:$src))),
+ (implicit EFLAGS)]>, XS, OpSize16, Sched<[WriteTZCNTLd]>;
+
+ def TZCNT32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+ "tzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (cttz GR32:$src)), (implicit EFLAGS)]>,
+ XS, OpSize32, Sched<[WriteTZCNT]>;
+ def TZCNT32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
+ "tzcnt{l}\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (cttz (loadi32 addr:$src))),
+ (implicit EFLAGS)]>, XS, OpSize32, Sched<[WriteTZCNTLd]>;
+
+ def TZCNT64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
+ "tzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (cttz GR64:$src)), (implicit EFLAGS)]>,
+ XS, Sched<[WriteTZCNT]>;
+ def TZCNT64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
+ "tzcnt{q}\t{$src, $dst|$dst, $src}",
+ [(set GR64:$dst, (cttz (loadi64 addr:$src))),
+ (implicit EFLAGS)]>, XS, Sched<[WriteTZCNTLd]>;
+}
+
+multiclass bmi_bls<string mnemonic, Format RegMRM, Format MemMRM,
+ RegisterClass RC, X86MemOperand x86memop,
+ X86FoldableSchedWrite sched> {
+let hasSideEffects = 0 in {
+ def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src),
+ !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
+ T8PS, VEX_4V, Sched<[sched]>;
+ let mayLoad = 1 in
+ def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), []>,
+ T8PS, VEX_4V, Sched<[sched.Folded]>;
+}
+}
+
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, WriteBLS>;
+ defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, WriteBLS>, REX_W;
+ defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, WriteBLS>;
+ defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, WriteBLS>, REX_W;
+ defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, WriteBLS>;
+ defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, WriteBLS>, REX_W;
+}
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments to auto generate BMI instructions.
+//===----------------------------------------------------------------------===//
+
+def or_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86or_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def xor_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86xor_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+def and_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
+ (X86and_flag node:$lhs, node:$rhs), [{
+ return hasNoCarryFlagUses(SDValue(N, 1));
+}]>;
+
+
+let Predicates = [HasBMI] in {
+ // FIXME(1): patterns for the load versions are not implemented
+ // FIXME(2): By only matching `add_su` and `ineg_su` we may emit
+ // extra `mov` instructions if `src` has future uses. It may be better
+ // to always match if `src` has more users.
+ def : Pat<(and GR32:$src, (add_su GR32:$src, -1)),
+ (BLSR32rr GR32:$src)>;
+ def : Pat<(and GR64:$src, (add_su GR64:$src, -1)),
+ (BLSR64rr GR64:$src)>;
+
+ def : Pat<(xor GR32:$src, (add_su GR32:$src, -1)),
+ (BLSMSK32rr GR32:$src)>;
+ def : Pat<(xor GR64:$src, (add_su GR64:$src, -1)),
+ (BLSMSK64rr GR64:$src)>;
+
+ def : Pat<(and GR32:$src, (ineg_su GR32:$src)),
+ (BLSI32rr GR32:$src)>;
+ def : Pat<(and GR64:$src, (ineg_su GR64:$src)),
+ (BLSI64rr GR64:$src)>;
+
+ // Versions to match flag producing ops.
+ def : Pat<(and_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
+ (BLSR32rr GR32:$src)>;
+ def : Pat<(and_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
+ (BLSR64rr GR64:$src)>;
+
+ def : Pat<(xor_flag_nocf GR32:$src, (add_su GR32:$src, -1)),
+ (BLSMSK32rr GR32:$src)>;
+ def : Pat<(xor_flag_nocf GR64:$src, (add_su GR64:$src, -1)),
+ (BLSMSK64rr GR64:$src)>;
+
+ def : Pat<(and_flag_nocf GR32:$src, (ineg_su GR32:$src)),
+ (BLSI32rr GR32:$src)>;
+ def : Pat<(and_flag_nocf GR64:$src, (ineg_su GR64:$src)),
+ (BLSI64rr GR64:$src)>;
+}
+
+multiclass bmi_bextr<bits<8> opc, string mnemonic, RegisterClass RC,
+ X86MemOperand x86memop, SDNode OpNode,
+ PatFrag ld_frag, X86FoldableSchedWrite Sched> {
+ def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
+ T8PS, VEX, Sched<[Sched]>;
+ def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpNode (ld_frag addr:$src1), RC:$src2)),
+ (implicit EFLAGS)]>, T8PS, VEX,
+ Sched<[Sched.Folded,
+ // x86memop:$src1
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault,
+ // RC:$src2
+ Sched.ReadAfterFold]>;
+}
+
+let Predicates = [HasBMI], Defs = [EFLAGS] in {
+ defm BEXTR32 : bmi_bextr<0xF7, "bextr{l}", GR32, i32mem,
+ X86bextr, loadi32, WriteBEXTR>;
+ defm BEXTR64 : bmi_bextr<0xF7, "bextr{q}", GR64, i64mem,
+ X86bextr, loadi64, WriteBEXTR>, REX_W;
+}
+
+multiclass bmi_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
+ X86MemOperand x86memop, SDNode Int,
+ PatFrag ld_frag, X86FoldableSchedWrite Sched> {
+ def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
+ T8PS, VEX, Sched<[Sched]>;
+ def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
+ (implicit EFLAGS)]>, T8PS, VEX,
+ Sched<[Sched.Folded,
+ // x86memop:$src1
+ ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+ ReadDefault,
+ // RC:$src2
+ Sched.ReadAfterFold]>;
+}
+
+let Predicates = [HasBMI2], Defs = [EFLAGS] in {
+ defm BZHI32 : bmi_bzhi<0xF5, "bzhi{l}", GR32, i32mem,
+ X86bzhi, loadi32, WriteBZHI>;
+ defm BZHI64 : bmi_bzhi<0xF5, "bzhi{q}", GR64, i64mem,
+ X86bzhi, loadi64, WriteBZHI>, REX_W;
+}
+
+def CountTrailingOnes : SDNodeXForm<imm, [{
+ // Count the trailing ones in the immediate.
+ return getI8Imm(llvm::countr_one(N->getZExtValue()), SDLoc(N));
+}]>;
+
+def BEXTRMaskXForm : SDNodeXForm<imm, [{
+ unsigned Length = llvm::countr_one(N->getZExtValue());
+ return getI32Imm(Length << 8, SDLoc(N));
+}]>;
+
+def AndMask64 : ImmLeaf<i64, [{
+ return isMask_64(Imm) && !isUInt<32>(Imm);
+}]>;
+
+// Use BEXTR for 64-bit 'and' with large immediate 'mask'.
+let Predicates = [HasBMI, NoBMI2, NoTBM] in {
+ def : Pat<(and GR64:$src, AndMask64:$mask),
+ (BEXTR64rr GR64:$src,
+ (SUBREG_TO_REG (i64 0),
+ (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
+ def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
+ (BEXTR64rm addr:$src,
+ (SUBREG_TO_REG (i64 0),
+ (MOV32ri (BEXTRMaskXForm imm:$mask)), sub_32bit))>;
+}
+
+// Use BZHI for 64-bit 'and' with large immediate 'mask'.
+let Predicates = [HasBMI2, NoTBM] in {
+ def : Pat<(and GR64:$src, AndMask64:$mask),
+ (BZHI64rr GR64:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
+ def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
+ (BZHI64rm addr:$src,
+ (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+ (MOV8ri (CountTrailingOnes imm:$mask)), sub_8bit))>;
+}
+
+multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
+ X86MemOperand x86memop, SDNode OpNode,
+ PatFrag ld_frag> {
+ def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>,
+ VEX_4V, Sched<[WriteALU]>;
+ def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (OpNode RC:$src1, (ld_frag addr:$src2)))]>,
+ VEX_4V, Sched<[WriteALU.Folded, WriteALU.ReadAfterFold]>;
+}
+
+let Predicates = [HasBMI2] in {
+ defm PDEP32 : bmi_pdep_pext<"pdep{l}", GR32, i32mem,
+ X86pdep, loadi32>, T8XD;
+ defm PDEP64 : bmi_pdep_pext<"pdep{q}", GR64, i64mem,
+ X86pdep, loadi64>, T8XD, REX_W;
+ defm PEXT32 : bmi_pdep_pext<"pext{l}", GR32, i32mem,
+ X86pext, loadi32>, T8XS;
+ defm PEXT64 : bmi_pdep_pext<"pext{q}", GR64, i64mem,
+ X86pext, loadi64>, T8XS, REX_W;
+}
+
+//===----------------------------------------------------------------------===//
+// Lightweight Profiling Instructions
+
+let Predicates = [HasLWP], SchedRW = [WriteSystem] in {
+
+def LLWPCB : I<0x12, MRM0r, (outs), (ins GR32:$src), "llwpcb\t$src",
+ [(int_x86_llwpcb GR32:$src)]>, XOP, XOP9;
+def SLWPCB : I<0x12, MRM1r, (outs GR32:$dst), (ins), "slwpcb\t$dst",
+ [(set GR32:$dst, (int_x86_slwpcb))]>, XOP, XOP9;
+
+def LLWPCB64 : I<0x12, MRM0r, (outs), (ins GR64:$src), "llwpcb\t$src",
+ [(int_x86_llwpcb GR64:$src)]>, XOP, XOP9, REX_W;
+def SLWPCB64 : I<0x12, MRM1r, (outs GR64:$dst), (ins), "slwpcb\t$dst",
+ [(set GR64:$dst, (int_x86_slwpcb))]>, XOP, XOP9, REX_W;
+
+multiclass lwpins_intr<RegisterClass RC> {
+ def rri : Ii32<0x12, MRM0r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl),
+ "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
+ [(set EFLAGS, (X86lwpins RC:$src0, GR32:$src1, timm:$cntl))]>,
+ XOP_4V, XOPA;
+ let mayLoad = 1 in
+ def rmi : Ii32<0x12, MRM0m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl),
+ "lwpins\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
+ [(set EFLAGS, (X86lwpins RC:$src0, (loadi32 addr:$src1), timm:$cntl))]>,
+ XOP_4V, XOPA;
+}
+
+let Defs = [EFLAGS] in {
+ defm LWPINS32 : lwpins_intr<GR32>;
+ defm LWPINS64 : lwpins_intr<GR64>, REX_W;
+} // EFLAGS
+
+multiclass lwpval_intr<RegisterClass RC, Intrinsic Int> {
+ def rri : Ii32<0x12, MRM1r, (outs), (ins RC:$src0, GR32:$src1, i32imm:$cntl),
+ "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
+ [(Int RC:$src0, GR32:$src1, timm:$cntl)]>, XOP_4V, XOPA;
+ let mayLoad = 1 in
+ def rmi : Ii32<0x12, MRM1m, (outs), (ins RC:$src0, i32mem:$src1, i32imm:$cntl),
+ "lwpval\t{$cntl, $src1, $src0|$src0, $src1, $cntl}",
+ [(Int RC:$src0, (loadi32 addr:$src1), timm:$cntl)]>,
+ XOP_4V, XOPA;
+}
+
+defm LWPVAL32 : lwpval_intr<GR32, int_x86_lwpval32>;
+defm LWPVAL64 : lwpval_intr<GR64, int_x86_lwpval64>, REX_W;
+
+} // HasLWP, SchedRW
+
+//===----------------------------------------------------------------------===//
+// MONITORX/MWAITX Instructions
+//
+let SchedRW = [ WriteSystem ] in {
+ let Uses = [ EAX, ECX, EDX ] in
+ def MONITORX32rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>,
+ TB, Requires<[ HasMWAITX, Not64BitMode ]>;
+ let Uses = [ RAX, ECX, EDX ] in
+ def MONITORX64rrr : I<0x01, MRM_FA, (outs), (ins), "monitorx", []>,
+ TB, Requires<[ HasMWAITX, In64BitMode ]>;
+
+ let Uses = [ ECX, EAX, EBX ] in {
+ def MWAITXrrr : I<0x01, MRM_FB, (outs), (ins), "mwaitx",
+ []>, TB, Requires<[ HasMWAITX ]>;
+ }
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// WAITPKG Instructions
+//
+let SchedRW = [WriteSystem] in {
+ def UMONITOR16 : I<0xAE, MRM6r, (outs), (ins GR16:$src),
+ "umonitor\t$src", [(int_x86_umonitor GR16:$src)]>,
+ XS, AdSize16, Requires<[HasWAITPKG, Not64BitMode]>;
+ def UMONITOR32 : I<0xAE, MRM6r, (outs), (ins GR32:$src),
+ "umonitor\t$src", [(int_x86_umonitor GR32:$src)]>,
+ XS, AdSize32, Requires<[HasWAITPKG]>;
+ def UMONITOR64 : I<0xAE, MRM6r, (outs), (ins GR64:$src),
+ "umonitor\t$src", [(int_x86_umonitor GR64:$src)]>,
+ XS, AdSize64, Requires<[HasWAITPKG, In64BitMode]>;
+ let Uses = [EAX, EDX], Defs = [EFLAGS] in {
+ def UMWAIT : I<0xAE, MRM6r,
+ (outs), (ins GR32orGR64:$src), "umwait\t$src",
+ [(set EFLAGS, (X86umwait GR32orGR64:$src, EDX, EAX))]>,
+ XD, Requires<[HasWAITPKG]>;
+ def TPAUSE : I<0xAE, MRM6r,
+ (outs), (ins GR32orGR64:$src), "tpause\t$src",
+ [(set EFLAGS, (X86tpause GR32orGR64:$src, EDX, EAX))]>,
+ PD, Requires<[HasWAITPKG]>;
+ }
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// MOVDIRI - Move doubleword/quadword as direct store
+//
+let SchedRW = [WriteStore] in {
+def MOVDIRI32 : I<0xF9, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movdiri\t{$src, $dst|$dst, $src}",
+ [(int_x86_directstore32 addr:$dst, GR32:$src)]>,
+ T8PS, Requires<[HasMOVDIRI]>;
+def MOVDIRI64 : RI<0xF9, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movdiri\t{$src, $dst|$dst, $src}",
+ [(int_x86_directstore64 addr:$dst, GR64:$src)]>,
+ T8PS, Requires<[In64BitMode, HasMOVDIRI]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// MOVDIR64B - Move 64 bytes as direct store
+//
+let SchedRW = [WriteStore] in {
+def MOVDIR64B16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src),
+ "movdir64b\t{$src, $dst|$dst, $src}", []>,
+ T8PD, AdSize16, Requires<[HasMOVDIR64B, Not64BitMode]>;
+def MOVDIR64B32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src),
+ "movdir64b\t{$src, $dst|$dst, $src}",
+ [(int_x86_movdir64b GR32:$dst, addr:$src)]>,
+ T8PD, AdSize32, Requires<[HasMOVDIR64B]>;
+def MOVDIR64B64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src),
+ "movdir64b\t{$src, $dst|$dst, $src}",
+ [(int_x86_movdir64b GR64:$dst, addr:$src)]>,
+ T8PD, AdSize64, Requires<[HasMOVDIR64B, In64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// ENQCMD/S - Enqueue 64-byte command as user with 64-byte write atomicity
+//
+let SchedRW = [WriteStore], Defs = [EFLAGS] in {
+ def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
+ "enqcmd\t{$src, $dst|$dst, $src}",
+ [(set EFLAGS, (X86enqcmd GR16:$dst, addr:$src))]>,
+ T8XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
+ def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
+ "enqcmd\t{$src, $dst|$dst, $src}",
+ [(set EFLAGS, (X86enqcmd GR32:$dst, addr:$src))]>,
+ T8XD, AdSize32, Requires<[HasENQCMD]>;
+ def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
+ "enqcmd\t{$src, $dst|$dst, $src}",
+ [(set EFLAGS, (X86enqcmd GR64:$dst, addr:$src))]>,
+ T8XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
+
+ def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
+ "enqcmds\t{$src, $dst|$dst, $src}",
+ [(set EFLAGS, (X86enqcmds GR16:$dst, addr:$src))]>,
+ T8XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
+ def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
+ "enqcmds\t{$src, $dst|$dst, $src}",
+ [(set EFLAGS, (X86enqcmds GR32:$dst, addr:$src))]>,
+ T8XS, AdSize32, Requires<[HasENQCMD]>;
+ def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
+ "enqcmds\t{$src, $dst|$dst, $src}",
+ [(set EFLAGS, (X86enqcmds GR64:$dst, addr:$src))]>,
+ T8XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
+}
+
+//===----------------------------------------------------------------------===//
+// CLZERO Instruction
+//
+let SchedRW = [WriteLoad] in {
+ let Uses = [EAX] in
+ def CLZERO32r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
+ TB, Requires<[HasCLZERO, Not64BitMode]>;
+ let Uses = [RAX] in
+ def CLZERO64r : I<0x01, MRM_FC, (outs), (ins), "clzero", []>,
+ TB, Requires<[HasCLZERO, In64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// INVLPGB Instruction
+// OPCODE 0F 01 FE
+//
+let SchedRW = [WriteSystem] in {
+ let Uses = [EAX, EDX] in
+ def INVLPGB32 : I<0x01, MRM_FE, (outs), (ins),
+ "invlpgb", []>,
+ PS, Requires<[Not64BitMode]>;
+ let Uses = [RAX, EDX] in
+ def INVLPGB64 : I<0x01, MRM_FE, (outs), (ins),
+ "invlpgb", []>,
+ PS, Requires<[In64BitMode]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// TLBSYNC Instruction
+// OPCODE 0F 01 FF
+//
+let SchedRW = [WriteSystem] in {
+ def TLBSYNC : I<0x01, MRM_FF, (outs), (ins),
+ "tlbsync", []>,
+ PS, Requires<[]>;
+} // SchedRW
+
+//===----------------------------------------------------------------------===//
+// HRESET Instruction
+//
+let Uses = [EAX], SchedRW = [WriteSystem] in
+ def HRESET : Ii8<0xF0, MRM_C0, (outs), (ins i32u8imm:$imm), "hreset\t$imm", []>,
+ Requires<[HasHRESET]>, TAXS;
+
+//===----------------------------------------------------------------------===//
+// SERIALIZE Instruction
+//
+let SchedRW = [WriteSystem] in
+ def SERIALIZE : I<0x01, MRM_E8, (outs), (ins), "serialize",
+ [(int_x86_serialize)]>, PS,
+ Requires<[HasSERIALIZE]>;
+
+//===----------------------------------------------------------------------===//
+// TSXLDTRK - TSX Suspend Load Address Tracking
+//
+let Predicates = [HasTSXLDTRK], SchedRW = [WriteSystem] in {
+ def XSUSLDTRK : I<0x01, MRM_E8, (outs), (ins), "xsusldtrk",
+ [(int_x86_xsusldtrk)]>, XD;
+ def XRESLDTRK : I<0x01, MRM_E9, (outs), (ins), "xresldtrk",
+ [(int_x86_xresldtrk)]>, XD;
+}
+
+//===----------------------------------------------------------------------===//
+// UINTR Instructions
+//
+let Predicates = [HasUINTR, In64BitMode], SchedRW = [WriteSystem] in {
+ def UIRET : I<0x01, MRM_EC, (outs), (ins), "uiret",
+ []>, XS;
+ def CLUI : I<0x01, MRM_EE, (outs), (ins), "clui",
+ [(int_x86_clui)]>, XS;
+ def STUI : I<0x01, MRM_EF, (outs), (ins), "stui",
+ [(int_x86_stui)]>, XS;
+
+ def SENDUIPI : I<0xC7, MRM6r, (outs), (ins GR64:$arg), "senduipi\t$arg",
+ [(int_x86_senduipi GR64:$arg)]>, XS;
+
+ let Defs = [EFLAGS] in
+ def TESTUI : I<0x01, MRM_ED, (outs), (ins), "testui",
+ [(set EFLAGS, (X86testui))]>, XS;
+}
+
+//===----------------------------------------------------------------------===//
+// PREFETCHIT0 and PREFETCHIT1 Instructions
+// prefetch ADDR, RW, Locality, Data
+let Predicates = [HasPREFETCHI, In64BitMode], SchedRW = [WriteLoad] in {
+ def PREFETCHIT0 : I<0x18, MRM7m, (outs), (ins i8mem:$src),
+ "prefetchit0\t$src", [(prefetch addr:$src, (i32 0), (i32 3), (i32 0))]>, TB;
+ def PREFETCHIT1 : I<0x18, MRM6m, (outs), (ins i8mem:$src),
+ "prefetchit1\t$src", [(prefetch addr:$src, (i32 0), (i32 2), (i32 0))]>, TB;
+}
+
+//===----------------------------------------------------------------------===//
+// CMPCCXADD Instructions
+//
+let isCodeGenOnly = 1, ForceDisassemble = 1, mayLoad = 1, mayStore = 1,
+ Predicates = [HasCMPCCXADD, In64BitMode], Defs = [EFLAGS],
+ Constraints = "$dstsrc1 = $dst" in {
+def CMPCCXADDmr32 : I<0xe0, MRMDestMem4VOp3CC, (outs GR32:$dst),
+ (ins GR32:$dstsrc1, i32mem:$dstsrc2, GR32:$src3, ccode:$cond),
+ "cmp${cond}xadd\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
+ [(set GR32:$dst, (X86cmpccxadd addr:$dstsrc2,
+ GR32:$dstsrc1, GR32:$src3, timm:$cond))]>,
+ VEX_4V, T8PD, Sched<[WriteXCHG]>;
+
+def CMPCCXADDmr64 : I<0xe0, MRMDestMem4VOp3CC, (outs GR64:$dst),
+ (ins GR64:$dstsrc1, i64mem:$dstsrc2, GR64:$src3, ccode:$cond),
+ "cmp${cond}xadd\t{$src3, $dst, $dstsrc2|$dstsrc2, $dst, $src3}",
+ [(set GR64:$dst, (X86cmpccxadd addr:$dstsrc2,
+ GR64:$dstsrc1, GR64:$src3, timm:$cond))]>,
+ VEX_4V, REX_W, T8PD, Sched<[WriteXCHG]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Memory Instructions
+//
+
+let Predicates = [HasCLFLUSHOPT], SchedRW = [WriteLoad] in
+def CLFLUSHOPT : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+ "clflushopt\t$src", [(int_x86_clflushopt addr:$src)]>, PD;
+
+let Predicates = [HasCLWB], SchedRW = [WriteLoad] in
+def CLWB : I<0xAE, MRM6m, (outs), (ins i8mem:$src), "clwb\t$src",
+ [(int_x86_clwb addr:$src)]>, PD;
+
+let Predicates = [HasCLDEMOTE], SchedRW = [WriteLoad] in
+def CLDEMOTE : I<0x1C, MRM0m, (outs), (ins i8mem:$src), "cldemote\t$src",
+ [(int_x86_cldemote addr:$src)]>, PS;
diff --git a/llvm/lib/Target/X86/X86InstrSNP.td b/llvm/lib/Target/X86/X86InstrSNP.td
index de59f3fe2750..ab13fa43c92d 100644
--- a/llvm/lib/Target/X86/X86InstrSNP.td
+++ b/llvm/lib/Target/X86/X86InstrSNP.td
@@ -16,32 +16,38 @@
let SchedRW = [WriteSystem] in {
// F3 0F 01 FF
-let Uses = [RAX] in
+let Uses = [RAX], Defs = [EAX, EFLAGS] in
def PSMASH: I<0x01, MRM_FF, (outs), (ins), "psmash", []>, XS,
Requires<[In64BitMode]>;
// F2 0F 01 FF
-let Uses = [RAX] in
+let Uses = [RAX, RCX, RDX], Defs = [EAX, EFLAGS] in
def PVALIDATE64: I<0x01, MRM_FF, (outs), (ins), "pvalidate",[]>,
XD, Requires<[In64BitMode]>;
-let Uses = [EAX] in
+let Uses = [EAX, ECX, EDX], Defs = [EAX, EFLAGS] in
def PVALIDATE32: I<0x01, MRM_FF, (outs), (ins), "pvalidate",[]>,
XD, Requires<[Not64BitMode]>;
// F2 0F 01 FE
-let Uses = [RAX] in
+let Uses = [RAX, RCX], Defs = [EAX, EFLAGS] in
def RMPUPDATE: I<0x01, MRM_FE, (outs), (ins), "rmpupdate", []>, XD,
Requires<[In64BitMode]>;
// F3 0F 01 FE
-let Uses = [RAX] in
+let Uses = [RAX, RCX, RDX], Defs = [EAX, EFLAGS] in
def RMPADJUST: I<0x01, MRM_FE, (outs), (ins), "rmpadjust", []>, XS,
Requires<[In64BitMode]>;
+
+// F3 0F 01 FD
+let Uses = [RAX, RDX], Defs = [RAX, RCX, RDX, EFLAGS] in
+def RMPQUERY: I<0x01, MRM_FD, (outs), (ins), "rmpquery", []>, XS,
+ Requires<[In64BitMode]>;
} // SchedRW
def : InstAlias<"psmash\t{%rax|rax}", (PSMASH)>, Requires<[In64BitMode]>;
-def : InstAlias<"pvalidate\t{%rax|rax}", (PVALIDATE64)>, Requires<[In64BitMode]>;
-def : InstAlias<"pvalidate\t{%eax|eax}", (PVALIDATE32)>, Requires<[Not64BitMode]>;
-def : InstAlias<"rmpupdate\t{%rax|rax}", (RMPUPDATE)>, Requires<[In64BitMode]>;
-def : InstAlias<"rmpadjust\t{%rax|rax}", (RMPADJUST)>, Requires<[In64BitMode]>;
+def : InstAlias<"pvalidate\t{%rax, %rcx, %rdx|rdx, rcx, rax|}", (PVALIDATE64)>, Requires<[In64BitMode]>;
+def : InstAlias<"pvalidate\t{%eax, %ecx, %edx|edx, ecx, eax|}", (PVALIDATE32)>, Requires<[Not64BitMode]>;
+def : InstAlias<"rmpupdate\t{%rax, %rcx|rcx, rax|}", (RMPUPDATE)>, Requires<[In64BitMode]>;
+def : InstAlias<"rmpadjust\t{%rax, %rcx, %rdx|rdx, rcx, rax|}", (RMPADJUST)>, Requires<[In64BitMode]>;
+def : InstAlias<"rmpquery\t{%rax, %rdx|rdx, rax|}", (RMPQUERY)>, Requires<[In64BitMode]>;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 561ba99db4af..6c57eceab376 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -192,7 +192,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
//===----------------------------------------------------------------------===//
multiclass sse12_move_rr<SDNode OpNode, ValueType vt, string base_opc,
- string asm_opr, Domain d, string Name> {
+ string asm_opr, Domain d> {
let isCommutable = 1 in
def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -205,28 +205,27 @@ multiclass sse12_move_rr<SDNode OpNode, ValueType vt, string base_opc,
def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(base_opc, asm_opr), []>,
- Sched<[SchedWriteFShuffle.XMM]>, FoldGenData<Name#rr>;
+ Sched<[SchedWriteFShuffle.XMM]>;
}
multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt,
X86MemOperand x86memop, string OpcodeStr,
- Domain d, string Name, Predicate pred> {
+ Domain d, Predicate pred> {
// AVX
let Predicates = [UseAVX, OptForSize] in
defm V#NAME : sse12_move_rr<OpNode, vt, OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d,
- "V"#Name>,
- VEX_4V, VEX_LIG, VEX_WIG;
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>,
+ VEX_4V, VEX_LIG, WIG;
def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(store RC:$src, addr:$dst)], d>,
- VEX, VEX_LIG, Sched<[WriteFStore]>, VEX_WIG;
+ VEX, VEX_LIG, Sched<[WriteFStore]>, WIG;
// SSE1 & 2
let Constraints = "$src1 = $dst" in {
let Predicates = [pred, NoSSE41_Or_OptForSize] in
defm NAME : sse12_move_rr<OpNode, vt, OpcodeStr,
- "\t{$src2, $dst|$dst, $src2}", d, Name>;
+ "\t{$src2, $dst|$dst, $src2}", d>;
}
def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src),
@@ -249,7 +248,7 @@ multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
def V#NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
- VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
+ VEX, VEX_LIG, Sched<[WriteFLoad]>, WIG;
def NAME#rm : SI<0x10, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (vt (vzloadfrag addr:$src)))], d>,
@@ -260,7 +259,7 @@ multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
def V#NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))], d>,
- VEX, VEX_LIG, Sched<[WriteFLoad]>, VEX_WIG;
+ VEX, VEX_LIG, Sched<[WriteFLoad]>, WIG;
def NAME#rm_alt : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (mem_pat addr:$src))], d>,
@@ -269,9 +268,9 @@ multiclass sse12_move_rm<RegisterClass RC, ValueType vt, X86MemOperand x86memop,
}
defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss",
- SSEPackedSingle, "MOVSS", UseSSE1>, XS;
+ SSEPackedSingle, UseSSE1>, XS;
defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd",
- SSEPackedDouble, "MOVSD", UseSSE2>, XD;
+ SSEPackedDouble, UseSSE2>, XD;
let canFoldAsLoad = 1, isReMaterializable = 1 in {
defm MOVSS : sse12_move_rm<FR32, v4f32, f32mem, loadf32, X86vzload32, "movss",
@@ -353,29 +352,29 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in
let Predicates = [HasAVX, NoVLX] in {
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps",
SSEPackedSingle, SchedWriteFMoveLS.XMM>,
- PS, VEX, VEX_WIG;
+ PS, VEX, WIG;
defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, "movapd",
SSEPackedDouble, SchedWriteFMoveLS.XMM>,
- PD, VEX, VEX_WIG;
+ PD, VEX, WIG;
defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, "movups",
SSEPackedSingle, SchedWriteFMoveLS.XMM>,
- PS, VEX, VEX_WIG;
+ PS, VEX, WIG;
defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd",
SSEPackedDouble, SchedWriteFMoveLS.XMM>,
- PD, VEX, VEX_WIG;
+ PD, VEX, WIG;
defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, "movaps",
SSEPackedSingle, SchedWriteFMoveLS.YMM>,
- PS, VEX, VEX_L, VEX_WIG;
+ PS, VEX, VEX_L, WIG;
defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, "movapd",
SSEPackedDouble, SchedWriteFMoveLS.YMM>,
- PD, VEX, VEX_L, VEX_WIG;
+ PD, VEX, VEX_L, WIG;
defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups",
SSEPackedSingle, SchedWriteFMoveLS.YMM>,
- PS, VEX, VEX_L, VEX_WIG;
+ PS, VEX, VEX_L, WIG;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd",
SSEPackedDouble, SchedWriteFMoveLS.YMM>,
- PD, VEX, VEX_L, VEX_WIG;
+ PD, VEX, VEX_L, WIG;
}
let Predicates = [UseSSE1] in {
@@ -400,38 +399,38 @@ let SchedRW = [SchedWriteFMoveLS.XMM.MR] in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>,
- VEX, VEX_WIG;
+ VEX, WIG;
def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(alignedstore (v2f64 VR128:$src), addr:$dst)]>,
- VEX, VEX_WIG;
+ VEX, WIG;
def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v4f32 VR128:$src), addr:$dst)]>,
- VEX, VEX_WIG;
+ VEX, WIG;
def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v2f64 VR128:$src), addr:$dst)]>,
- VEX, VEX_WIG;
+ VEX, WIG;
} // SchedRW
let SchedRW = [SchedWriteFMoveLS.YMM.MR] in {
def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v8f32 VR256:$src), addr:$dst)]>,
- VEX, VEX_L, VEX_WIG;
+ VEX, VEX_L, WIG;
def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f64 VR256:$src), addr:$dst)]>,
- VEX, VEX_L, VEX_WIG;
+ VEX, VEX_L, WIG;
def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v8f32 VR256:$src), addr:$dst)]>,
- VEX, VEX_L, VEX_WIG;
+ VEX, VEX_L, WIG;
def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)]>,
- VEX, VEX_L, VEX_WIG;
+ VEX, VEX_L, WIG;
} // SchedRW
} // Predicate
@@ -442,38 +441,38 @@ let SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_WIG, FoldGenData<"VMOVAPSrr">;
+ VEX, WIG;
def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movapd\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_WIG, FoldGenData<"VMOVAPDrr">;
+ VEX, WIG;
def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movups\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_WIG, FoldGenData<"VMOVUPSrr">;
+ VEX, WIG;
def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst),
(ins VR128:$src),
"movupd\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_WIG, FoldGenData<"VMOVUPDrr">;
+ VEX, WIG;
} // SchedRW
let SchedRW = [SchedWriteFMoveLS.YMM.RR] in {
def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movaps\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPSYrr">;
+ VEX, VEX_L, WIG;
def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movapd\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVAPDYrr">;
+ VEX, VEX_L, WIG;
def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movups\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPSYrr">;
+ VEX, VEX_L, WIG;
def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst),
(ins VR256:$src),
"movupd\t{$src, $dst|$dst, $src}", []>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVUPDYrr">;
+ VEX, VEX_L, WIG;
} // SchedRW
} // Predicate
@@ -514,17 +513,13 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
isMoveReg = 1, SchedRW = [SchedWriteFMoveLS.XMM.RR] in {
def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MOVAPSrr">;
+ "movaps\t{$src, $dst|$dst, $src}", []>;
def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MOVAPDrr">;
+ "movapd\t{$src, $dst|$dst, $src}", []>;
def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movups\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MOVUPSrr">;
+ "movups\t{$src, $dst|$dst, $src}", []>;
def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MOVUPDrr">;
+ "movupd\t{$src, $dst|$dst, $src}", []>;
}
// Reversed version with ".s" suffix for GAS compatibility.
@@ -577,20 +572,37 @@ let Predicates = [HasAVX, NoVLX] in {
def : Pat<(alignedloadv8f16 addr:$src),
(VMOVAPSrm addr:$src)>;
+ def : Pat<(alignedloadv8bf16 addr:$src),
+ (VMOVAPSrm addr:$src)>;
def : Pat<(loadv8f16 addr:$src),
(VMOVUPSrm addr:$src)>;
+ def : Pat<(loadv8bf16 addr:$src),
+ (VMOVUPSrm addr:$src)>;
def : Pat<(alignedstore (v8f16 VR128:$src), addr:$dst),
(VMOVAPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(alignedstore (v8bf16 VR128:$src), addr:$dst),
+ (VMOVAPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v8f16 VR128:$src), addr:$dst),
(VMOVUPSmr addr:$dst, VR128:$src)>;
+ def : Pat<(store (v8bf16 VR128:$src), addr:$dst),
+ (VMOVUPSmr addr:$dst, VR128:$src)>;
+
def : Pat<(alignedloadv16f16 addr:$src),
(VMOVAPSYrm addr:$src)>;
+ def : Pat<(alignedloadv16bf16 addr:$src),
+ (VMOVAPSYrm addr:$src)>;
def : Pat<(loadv16f16 addr:$src),
(VMOVUPSYrm addr:$src)>;
+ def : Pat<(loadv16bf16 addr:$src),
+ (VMOVUPSYrm addr:$src)>;
def : Pat<(alignedstore (v16f16 VR256:$src), addr:$dst),
(VMOVAPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(alignedstore (v16bf16 VR256:$src), addr:$dst),
+ (VMOVAPSYmr addr:$dst, VR256:$src)>;
def : Pat<(store (v16f16 VR256:$src), addr:$dst),
(VMOVUPSYmr addr:$dst, VR256:$src)>;
+ def : Pat<(store (v16bf16 VR256:$src), addr:$dst),
+ (VMOVUPSYmr addr:$dst, VR256:$src)>;
}
// Use movaps / movups for SSE integer load / store (one byte shorter).
@@ -671,7 +683,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator pdnode,
let Predicates = [UseAVX] in
defm V#NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
let Constraints = "$src1 = $dst" in
defm NAME : sse12_mov_hilo_packed_base<opc, pdnode, base_opc,
@@ -686,12 +698,12 @@ let mayStore = 1, hasSideEffects = 0 in
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[]>,
- VEX, VEX_WIG;
+ VEX, WIG;
def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)]>,
- VEX, VEX_WIG;
+ VEX, WIG;
}// UseAVX
let mayStore = 1, hasSideEffects = 0 in
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -732,12 +744,12 @@ let Predicates = [UseAVX] in {
let mayStore = 1, hasSideEffects = 0 in
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
- []>, VEX, VEX_WIG;
+ []>, VEX, WIG;
def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhpd\t{$src, $dst|$dst, $src}",
[(store (f64 (extractelt
(v2f64 (X86Unpckh VR128:$src, VR128:$src)),
- (iPTR 0))), addr:$dst)]>, VEX, VEX_WIG;
+ (iPTR 0))), addr:$dst)]>, VEX, WIG;
} // UseAVX
let mayStore = 1, hasSideEffects = 0 in
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
@@ -811,15 +823,14 @@ let Predicates = [UseAVX] in {
"movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))]>,
- VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG;
+ VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, WIG;
let isCommutable = 1 in
def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
- VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, VEX_WIG,
- NotMemoryFoldable;
+ VEX_4V, Sched<[SchedWriteFShuffle.XMM]>, WIG;
}
let Constraints = "$src1 = $dst" in {
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
@@ -834,7 +845,7 @@ let Constraints = "$src1 = $dst" in {
"movhlps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))]>,
- Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable;
+ Sched<[SchedWriteFShuffle.XMM]>;
}
//===----------------------------------------------------------------------===//
@@ -896,7 +907,7 @@ defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, any_fp_to_sint, f32mem, loadf3
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, any_fp_to_sint, f32mem, loadf32,
"cvttss2si", "cvttss2si",
WriteCvtSS2I, SSEPackedSingle>,
- XS, VEX, VEX_W, VEX_LIG;
+ XS, VEX, REX_W, VEX_LIG;
defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
WriteCvtSD2I, SSEPackedDouble>,
@@ -904,7 +915,7 @@ defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, any_fp_to_sint, f64mem, loadf6
defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, any_fp_to_sint, f64mem, loadf64,
"cvttsd2si", "cvttsd2si",
WriteCvtSD2I, SSEPackedDouble>,
- XD, VEX, VEX_W, VEX_LIG;
+ XD, VEX, REX_W, VEX_LIG;
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
"cvtss2si", "cvtss2si",
@@ -913,7 +924,7 @@ defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, lrint, f32mem, loadf32,
defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, llrint, f32mem, loadf32,
"cvtss2si", "cvtss2si",
WriteCvtSS2I, SSEPackedSingle>,
- XS, VEX, VEX_W, VEX_LIG;
+ XS, VEX, REX_W, VEX_LIG;
defm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
"cvtsd2si", "cvtsd2si",
WriteCvtSD2I, SSEPackedDouble>,
@@ -921,7 +932,7 @@ defm VCVTSD2SI : sse12_cvt_s<0x2D, FR64, GR32, lrint, f64mem, loadf64,
defm VCVTSD2SI64 : sse12_cvt_s<0x2D, FR64, GR64, llrint, f64mem, loadf64,
"cvtsd2si", "cvtsd2si",
WriteCvtSD2I, SSEPackedDouble>,
- XD, VEX, VEX_W, VEX_LIG;
+ XD, VEX, REX_W, VEX_LIG;
}
// The assembler can recognize rr 64-bit instructions by seeing a rxx
@@ -934,13 +945,13 @@ defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss", "l",
VEX_LIG, SIMD_EXC;
defm VCVTSI642SS : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss", "q",
WriteCvtI2SS, SSEPackedSingle>, XS, VEX_4V,
- VEX_W, VEX_LIG, SIMD_EXC;
+ REX_W, VEX_LIG, SIMD_EXC;
defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd", "l",
WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
VEX_LIG;
defm VCVTSI642SD : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd", "q",
WriteCvtI2SD, SSEPackedDouble>, XD, VEX_4V,
- VEX_W, VEX_LIG, SIMD_EXC;
+ REX_W, VEX_LIG, SIMD_EXC;
} // isCodeGenOnly = 1
let Predicates = [UseAVX] in {
@@ -1066,7 +1077,7 @@ defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64,
WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v2f64,
X86cvts2si, sdmem, sse_load_f64, "cvtsd2si",
- WriteCvtSD2I, SSEPackedDouble>, XD, VEX, VEX_W, VEX_LIG;
+ WriteCvtSD2I, SSEPackedDouble>, XD, VEX, REX_W, VEX_LIG;
}
defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v2f64, X86cvts2si,
sdmem, sse_load_f64, "cvtsd2si", WriteCvtSD2I,
@@ -1082,13 +1093,13 @@ defm VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
XS, VEX_4V, VEX_LIG, SIMD_EXC;
defm VCVTSI642SS : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
i64mem, "cvtsi2ss", "q", WriteCvtI2SS, SSEPackedSingle, 0>,
- XS, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC;
+ XS, VEX_4V, VEX_LIG, REX_W, SIMD_EXC;
defm VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
i32mem, "cvtsi2sd", "l", WriteCvtI2SD, SSEPackedDouble, 0>,
XD, VEX_4V, VEX_LIG;
defm VCVTSI642SD : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
i64mem, "cvtsi2sd", "q", WriteCvtI2SD, SSEPackedDouble, 0>,
- XD, VEX_4V, VEX_LIG, VEX_W, SIMD_EXC;
+ XD, VEX_4V, VEX_LIG, REX_W, SIMD_EXC;
}
let Constraints = "$src1 = $dst" in {
defm CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
@@ -1143,14 +1154,14 @@ defm VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
defm VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v4f32,
X86cvtts2Int, ssmem, sse_load_f32,
"cvttss2si", WriteCvtSS2I, SSEPackedSingle>,
- XS, VEX, VEX_LIG, VEX_W;
+ XS, VEX, VEX_LIG, REX_W;
defm VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v2f64, X86cvtts2Int,
sdmem, sse_load_f64, "cvttsd2si",
WriteCvtSS2I, SSEPackedDouble>, XD, VEX, VEX_LIG;
defm VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, i64, v2f64,
X86cvtts2Int, sdmem, sse_load_f64,
"cvttsd2si", WriteCvtSS2I, SSEPackedDouble>,
- XD, VEX, VEX_LIG, VEX_W;
+ XD, VEX, VEX_LIG, REX_W;
}
let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, i32, v4f32, X86cvtts2Int,
@@ -1209,7 +1220,7 @@ defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_LIG;
defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
ssmem, sse_load_f32, "cvtss2si",
- WriteCvtSS2I, SSEPackedSingle>, XS, VEX, VEX_W, VEX_LIG;
+ WriteCvtSS2I, SSEPackedSingle>, XS, VEX, REX_W, VEX_LIG;
}
let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, i32, v4f32, X86cvts2si,
@@ -1222,11 +1233,11 @@ defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, i64, v4f32, X86cvts2si,
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, load,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PS>,
- PS, VEX, Requires<[HasAVX, NoVLX]>, VEX_WIG;
+ PS, VEX, Requires<[HasAVX, NoVLX]>, WIG;
defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, load,
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle, WriteCvtI2PSY>,
- PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, VEX_WIG;
+ PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>, WIG;
defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memop,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
@@ -1278,13 +1289,13 @@ let isCodeGenOnly = 1, hasSideEffects = 0, Predicates = [UseAVX],
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR32:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- VEX_4V, VEX_LIG, VEX_WIG,
+ VEX_4V, VEX_LIG, WIG,
Sched<[WriteCvtSD2SS]>, SIMD_EXC;
let mayLoad = 1 in
def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR32:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- XD, VEX_4V, VEX_LIG, VEX_WIG,
+ XD, VEX_4V, VEX_LIG, WIG,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>, SIMD_EXC;
}
@@ -1310,14 +1321,14 @@ def VCVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86frounds VR128:$src1, (v2f64 VR128:$src2))))]>,
- XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>,
+ XD, VEX_4V, VEX_LIG, WIG, Requires<[UseAVX]>,
Sched<[WriteCvtSD2SS]>;
def VCVTSD2SSrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, sdmem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(v4f32 (X86frounds VR128:$src1, (sse_load_f64 addr:$src2))))]>,
- XD, VEX_4V, VEX_LIG, VEX_WIG, Requires<[UseAVX]>,
+ XD, VEX_4V, VEX_LIG, WIG, Requires<[UseAVX]>,
Sched<[WriteCvtSD2SS.Folded, WriteCvtSD2SS.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in {
def CVTSD2SSrr_Int: I<0x5A, MRMSrcReg,
@@ -1342,13 +1353,13 @@ let isCodeGenOnly = 1, hasSideEffects = 0, ExeDomain = SSEPackedSingle in {
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR64:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- XS, VEX_4V, VEX_LIG, VEX_WIG,
+ XS, VEX_4V, VEX_LIG, WIG,
Sched<[WriteCvtSS2SD]>, Requires<[UseAVX]>, SIMD_EXC;
let mayLoad = 1 in
def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR64:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
- XS, VEX_4V, VEX_LIG, VEX_WIG,
+ XS, VEX_4V, VEX_LIG, WIG,
Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>,
Requires<[UseAVX, OptForSize]>, SIMD_EXC;
} // isCodeGenOnly = 1, hasSideEffects = 0
@@ -1375,13 +1386,13 @@ let hasSideEffects = 0, Uses = [MXCSR], mayRaiseFPException = 1,
def VCVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, VEX_4V, VEX_LIG, VEX_WIG,
+ []>, XS, VEX_4V, VEX_LIG, WIG,
Requires<[HasAVX]>, Sched<[WriteCvtSS2SD]>;
let mayLoad = 1 in
def VCVTSS2SDrm_Int: I<0x5A, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, ssmem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- []>, XS, VEX_4V, VEX_LIG, VEX_WIG, Requires<[HasAVX]>,
+ []>, XS, VEX_4V, VEX_LIG, WIG, Requires<[HasAVX]>,
Sched<[WriteCvtSS2SD.Folded, WriteCvtSS2SD.ReadAfterFold]>;
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def CVTSS2SDrr_Int: I<0x5A, MRMSrcReg,
@@ -1516,22 +1527,22 @@ let Predicates = [HasAVX, NoVLX] in {
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86cvtp2Int (v4f32 VR128:$src))))]>,
- VEX, Sched<[WriteCvtPS2I]>, VEX_WIG, SIMD_EXC;
+ VEX, Sched<[WriteCvtPS2I]>, WIG, SIMD_EXC;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (loadv4f32 addr:$src))))]>,
- VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG, SIMD_EXC;
+ VEX, Sched<[WriteCvtPS2ILd]>, WIG, SIMD_EXC;
def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (X86cvtp2Int (v8f32 VR256:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG, SIMD_EXC;
+ VEX, VEX_L, Sched<[WriteCvtPS2IY]>, WIG, SIMD_EXC;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (X86cvtp2Int (loadv8f32 addr:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, VEX_WIG, SIMD_EXC;
+ VEX, VEX_L, Sched<[WriteCvtPS2IYLd]>, WIG, SIMD_EXC;
}
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
@@ -1553,26 +1564,26 @@ def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
- VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
+ VEX, Sched<[WriteCvtPD2I]>, WIG;
// XMM only
def VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtpd2dq{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX,
- Sched<[WriteCvtPD2ILd]>, VEX_WIG;
+ Sched<[WriteCvtPD2ILd]>, WIG;
// YMM only
def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG;
+ VEX, VEX_L, Sched<[WriteCvtPD2IY]>, WIG;
def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
+ VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, WIG;
}
def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
@@ -1599,23 +1610,23 @@ def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86any_cvttp2si (v4f32 VR128:$src))))]>,
- VEX, Sched<[WriteCvtPS2I]>, VEX_WIG;
+ VEX, Sched<[WriteCvtPS2I]>, WIG;
def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86any_cvttp2si (loadv4f32 addr:$src))))]>,
- VEX, Sched<[WriteCvtPS2ILd]>, VEX_WIG;
+ VEX, Sched<[WriteCvtPS2ILd]>, WIG;
def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (X86any_cvttp2si (v8f32 VR256:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPS2IY]>, VEX_WIG;
+ VEX, VEX_L, Sched<[WriteCvtPS2IY]>, WIG;
def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v8i32 (X86any_cvttp2si (loadv8f32 addr:$src))))]>,
VEX, VEX_L,
- Sched<[WriteCvtPS2IYLd]>, VEX_WIG;
+ Sched<[WriteCvtPS2IYLd]>, WIG;
}
def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -1639,24 +1650,24 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86any_cvttp2si (v2f64 VR128:$src))))]>,
- VEX, Sched<[WriteCvtPD2I]>, VEX_WIG;
+ VEX, Sched<[WriteCvtPD2I]>, WIG;
def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttpd2dq{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86any_cvttp2si (loadv2f64 addr:$src))))]>,
- VEX, Sched<[WriteCvtPD2ILd]>, VEX_WIG;
+ VEX, Sched<[WriteCvtPD2ILd]>, WIG;
// YMM only
def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86any_cvttp2si (v4f64 VR256:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPD2IY]>, VEX_WIG;
+ VEX, VEX_L, Sched<[WriteCvtPD2IY]>, WIG;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4i32 (X86any_cvttp2si (loadv4f64 addr:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, VEX_WIG;
+ VEX, VEX_L, Sched<[WriteCvtPD2IYLd]>, WIG;
} // Predicates = [HasAVX, NoVLX]
def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
@@ -1688,19 +1699,19 @@ let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (X86any_vfpext (v4f32 VR128:$src))))]>,
- PS, VEX, Sched<[WriteCvtPS2PD]>, VEX_WIG;
+ PS, VEX, Sched<[WriteCvtPS2PD]>, WIG;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))]>,
- PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, VEX_WIG;
+ PS, VEX, Sched<[WriteCvtPS2PD.Folded]>, WIG;
def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (any_fpextend (v4f32 VR128:$src))))]>,
- PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, VEX_WIG;
+ PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY]>, WIG;
def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))]>,
- PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, VEX_WIG;
+ PS, VEX, VEX_L, Sched<[WriteCvtPS2PDY.Folded]>, WIG;
}
let Predicates = [UseSSE2], Uses = [MXCSR], mayRaiseFPException = 1 in {
@@ -1724,23 +1735,23 @@ def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(bc_v4i32
(v2i64 (scalar_to_vector
(loadi64 addr:$src)))))))]>,
- VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG;
+ VEX, Sched<[WriteCvtI2PDLd]>, WIG;
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2f64 (X86any_VSintToFP (v4i32 VR128:$src))))]>,
- VEX, Sched<[WriteCvtI2PD]>, VEX_WIG;
+ VEX, Sched<[WriteCvtI2PD]>, WIG;
def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v4f64 (any_sint_to_fp (loadv4i32 addr:$src))))]>,
VEX, VEX_L, Sched<[WriteCvtI2PDYLd]>,
- VEX_WIG;
+ WIG;
def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR256:$dst,
(v4f64 (any_sint_to_fp (v4i32 VR128:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtI2PDY]>, VEX_WIG;
+ VEX, VEX_L, Sched<[WriteCvtI2PDY]>, WIG;
}
let hasSideEffects = 0, mayLoad = 1 in
@@ -1779,20 +1790,20 @@ let Predicates = [HasAVX, NoVLX], Uses = [MXCSR], mayRaiseFPException = 1 in {
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (X86any_vfpround (v2f64 VR128:$src))))]>,
- VEX, Sched<[WriteCvtPD2PS]>, VEX_WIG;
+ VEX, Sched<[WriteCvtPD2PS]>, WIG;
def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (X86any_vfpround (loadv2f64 addr:$src))))]>,
- VEX, Sched<[WriteCvtPD2PS.Folded]>, VEX_WIG;
+ VEX, Sched<[WriteCvtPD2PS.Folded]>, WIG;
def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (X86any_vfpround (v4f64 VR256:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, VEX_WIG;
+ VEX, VEX_L, Sched<[WriteCvtPD2PSY]>, WIG;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (X86any_vfpround (loadv4f64 addr:$src))))]>,
- VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, VEX_WIG;
+ VEX, VEX_L, Sched<[WriteCvtPD2PSY.Folded]>, WIG;
} // Predicates = [HasAVX, NoVLX]
def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
@@ -1849,12 +1860,12 @@ let ExeDomain = SSEPackedSingle in
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, ssmem, X86cmps, v4f32, loadf32,
"cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PS.Scl, sse_load_f32>,
- XS, VEX_4V, VEX_LIG, VEX_WIG;
+ XS, VEX_4V, VEX_LIG, WIG;
let ExeDomain = SSEPackedDouble in
defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, sdmem, X86cmps, v2f64, loadf64,
"cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
SchedWriteFCmpSizes.PD.Scl, sse_load_f64>,
- XD, VEX_4V, VEX_LIG, VEX_WIG;
+ XD, VEX_4V, VEX_LIG, WIG;
let Constraints = "$src1 = $dst" in {
let ExeDomain = SSEPackedSingle in
@@ -1908,24 +1919,24 @@ let mayLoad = 1 in
let Defs = [EFLAGS] in {
defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32,
- "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
+ "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG;
defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86any_fcmp, f64, f64mem, loadf64,
- "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
+ "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG;
defm VCOMISS : sse12_ord_cmp<0x2F, FR32, X86strict_fcmps, f32, f32mem, loadf32,
- "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
+ "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG;
defm VCOMISD : sse12_ord_cmp<0x2F, FR64, X86strict_fcmps, f64, f64mem, loadf64,
- "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
+ "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG;
let isCodeGenOnly = 1 in {
defm VUCOMISS : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v4f32, ssmem,
- sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
+ sse_load_f32, "ucomiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG;
defm VUCOMISD : sse12_ord_cmp_int<0x2E, VR128, X86ucomi, v2f64, sdmem,
- sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
+ sse_load_f64, "ucomisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG;
defm VCOMISS : sse12_ord_cmp_int<0x2F, VR128, X86comi, v4f32, ssmem,
- sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, VEX_WIG;
+ sse_load_f32, "comiss", SSEPackedSingle>, PS, VEX, VEX_LIG, WIG;
defm VCOMISD : sse12_ord_cmp_int<0x2F, VR128, X86comi, v2f64, sdmem,
- sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, VEX_WIG;
+ sse_load_f64, "comisd", SSEPackedDouble>, PD, VEX, VEX_LIG, WIG;
}
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86any_fcmp, f32, f32mem, loadf32,
"ucomiss", SSEPackedSingle>, PS;
@@ -1968,16 +1979,16 @@ multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
defm VCMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, VEX_WIG;
+ SchedWriteFCmpSizes.PS.XMM, SSEPackedSingle, loadv4f32>, PS, VEX_4V, WIG;
defm VCMPPD : sse12_cmp_packed<VR128, f128mem, v2f64,
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, VEX_WIG;
+ SchedWriteFCmpSizes.PD.XMM, SSEPackedDouble, loadv2f64>, PD, VEX_4V, WIG;
defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, v8f32,
"cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteFCmpSizes.PS.YMM, SSEPackedSingle, loadv8f32>, PS, VEX_4V, VEX_L, WIG;
defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, v4f64,
"cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}",
- SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteFCmpSizes.PD.YMM, SSEPackedDouble, loadv4f64>, PD, VEX_4V, VEX_L, WIG;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, v4f32,
"cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}",
@@ -2065,19 +2076,19 @@ let Predicates = [HasAVX, NoVLX] in {
defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>,
- PS, VEX_4V, VEX_WIG;
+ PS, VEX_4V, WIG;
defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
"shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv8f32, SchedWriteFShuffle.YMM, SSEPackedSingle>,
- PS, VEX_4V, VEX_L, VEX_WIG;
+ PS, VEX_4V, VEX_L, WIG;
defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>,
- PD, VEX_4V, VEX_WIG;
+ PD, VEX_4V, WIG;
defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
"shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
loadv4f64, SchedWriteFShuffle.YMM, SSEPackedDouble>,
- PD, VEX_4V, VEX_L, VEX_WIG;
+ PD, VEX_4V, VEX_L, WIG;
}
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -2115,29 +2126,29 @@ multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
let Predicates = [HasAVX, NoVLX] in {
defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, load,
VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
+ SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, WIG;
defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, load,
VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, VEX_WIG;
+ SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD, VEX_4V, WIG;
defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, load,
VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, VEX_WIG;
+ SchedWriteFShuffle.XMM, SSEPackedSingle>, PS, VEX_4V, WIG;
defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, load,
VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, VEX_WIG;
+ SchedWriteFShuffle.XMM, SSEPackedDouble>, PD, VEX_4V, WIG;
defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, load,
VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, WIG;
defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, load,
VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, WIG;
defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, load,
VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteFShuffle.YMM, SSEPackedSingle>, PS, VEX_4V, VEX_L, WIG;
defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, load,
VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteFShuffle.YMM, SSEPackedDouble>, PD, VEX_4V, VEX_L, WIG;
}// Predicates = [HasAVX, NoVLX]
let Constraints = "$src1 = $dst" in {
@@ -2197,13 +2208,13 @@ multiclass sse12_extr_sign_mask<RegisterClass RC, ValueType vt,
let Predicates = [HasAVX] in {
defm VMOVMSKPS : sse12_extr_sign_mask<VR128, v4f32, "movmskps",
- SSEPackedSingle>, PS, VEX, VEX_WIG;
+ SSEPackedSingle>, PS, VEX, WIG;
defm VMOVMSKPD : sse12_extr_sign_mask<VR128, v2f64, "movmskpd",
- SSEPackedDouble>, PD, VEX, VEX_WIG;
+ SSEPackedDouble>, PD, VEX, WIG;
defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, v8f32, "movmskps",
- SSEPackedSingle>, PS, VEX, VEX_L, VEX_WIG;
+ SSEPackedSingle>, PS, VEX, VEX_L, WIG;
defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, v4f64, "movmskpd",
- SSEPackedDouble>, PD, VEX, VEX_L, VEX_WIG;
+ SSEPackedDouble>, PD, VEX, VEX_L, WIG;
// Also support integer VTs to avoid a int->fp bitcast in the DAG.
def : Pat<(X86movmsk (v4i32 VR128:$src)),
@@ -2265,7 +2276,7 @@ multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
VR128, load, i128mem, sched.XMM,
- IsCommutable, 0>, VEX_4V, VEX_WIG;
+ IsCommutable, 0>, VEX_4V, WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
@@ -2274,7 +2285,7 @@ let Constraints = "$src1 = $dst" in
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
OpVT256, VR256, load, i256mem, sched.YMM,
- IsCommutable, 0>, VEX_4V, VEX_L, VEX_WIG;
+ IsCommutable, 0>, VEX_4V, VEX_L, WIG;
}
// These are ordered here for pattern ordering requirements with the fp versions
@@ -2301,19 +2312,19 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem, sched.YMM,
- [], [], 0>, PS, VEX_4V, VEX_L, VEX_WIG;
+ [], [], 0>, PS, VEX_4V, VEX_L, WIG;
defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem, sched.YMM,
- [], [], 0>, PD, VEX_4V, VEX_L, VEX_WIG;
+ [], [], 0>, PD, VEX_4V, VEX_L, WIG;
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem, sched.XMM,
- [], [], 0>, PS, VEX_4V, VEX_WIG;
+ [], [], 0>, PS, VEX_4V, WIG;
defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f128mem, sched.XMM,
- [], [], 0>, PD, VEX_4V, VEX_WIG;
+ [], [], 0>, PD, VEX_4V, WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -2625,17 +2636,17 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in {
let Predicates = [HasAVX, NoVLX] in {
defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
VR128, v4f32, f128mem, loadv4f32,
- SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, VEX_WIG;
+ SSEPackedSingle, sched.PS.XMM, 0>, PS, VEX_4V, WIG;
defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
VR128, v2f64, f128mem, loadv2f64,
- SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, VEX_WIG;
+ SSEPackedDouble, sched.PD.XMM, 0>, PD, VEX_4V, WIG;
defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"),
OpNode, VR256, v8f32, f256mem, loadv8f32,
- SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, VEX_WIG;
+ SSEPackedSingle, sched.PS.YMM, 0>, PS, VEX_4V, VEX_L, WIG;
defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"),
OpNode, VR256, v4f64, f256mem, loadv4f64,
- SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, VEX_WIG;
+ SSEPackedDouble, sched.PD.YMM, 0>, PD, VEX_4V, VEX_L, WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -2654,10 +2665,10 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDPatternOperat
let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
OpNode, FR32, f32mem, SSEPackedSingle, sched.PS.Scl, 0>,
- XS, VEX_4V, VEX_LIG, VEX_WIG;
+ XS, VEX_4V, VEX_LIG, WIG;
defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
OpNode, FR64, f64mem, SSEPackedDouble, sched.PD.Scl, 0>,
- XD, VEX_4V, VEX_LIG, VEX_WIG;
+ XD, VEX_4V, VEX_LIG, WIG;
let Constraints = "$src1 = $dst" in {
defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
@@ -2676,10 +2687,10 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
let Uses = [MXCSR], mayRaiseFPException = 1 in {
defm V#NAME#SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32,
!strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
- SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, VEX_WIG;
+ SSEPackedSingle, sched.PS.Scl, 0>, XS, VEX_4V, VEX_LIG, WIG;
defm V#NAME#SD : sse12_fp_scalar_int<opc, OpNode, VR128, v2f64,
!strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
- SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, VEX_WIG;
+ SSEPackedDouble, sched.PD.Scl, 0>, XD, VEX_4V, VEX_LIG, WIG;
let Constraints = "$src1 = $dst" in {
defm SS : sse12_fp_scalar_int<opc, OpNode, VR128, v4f32,
@@ -2938,22 +2949,22 @@ let Predicates = prds in {
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>,
- VEX, Sched<[sched.XMM]>, VEX_WIG;
+ VEX, Sched<[sched.XMM]>, WIG;
def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))]>,
- VEX, Sched<[sched.XMM.Folded]>, VEX_WIG;
+ VEX, Sched<[sched.XMM.Folded]>, WIG;
def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>,
- VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
+ VEX, VEX_L, Sched<[sched.YMM]>, WIG;
def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"ps\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))]>,
- VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG;
+ VEX, VEX_L, Sched<[sched.YMM.Folded]>, WIG;
}
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -2974,22 +2985,22 @@ let Predicates = [HasAVX, NoVLX] in {
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>,
- VEX, Sched<[sched.XMM]>, VEX_WIG;
+ VEX, Sched<[sched.XMM]>, WIG;
def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))]>,
- VEX, Sched<[sched.XMM.Folded]>, VEX_WIG;
+ VEX, Sched<[sched.XMM.Folded]>, WIG;
def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>,
- VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
+ VEX, VEX_L, Sched<[sched.YMM]>, WIG;
def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat("v", OpcodeStr,
"pd\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))]>,
- VEX, VEX_L, Sched<[sched.YMM.Folded]>, VEX_WIG;
+ VEX, VEX_L, Sched<[sched.YMM.Folded]>, WIG;
}
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -3009,7 +3020,7 @@ multiclass sse1_fp_unop_s_intr<string OpcodeStr, Predicate AVXTarget> {
defm V#NAME#SS : avx_fp_unop_s_intr<v4f32, sse_load_f32,
!cast<Intrinsic>("int_x86_sse_"#OpcodeStr#_ss),
AVXTarget>,
- XS, VEX_4V, VEX_LIG, VEX_WIG, NotMemoryFoldable;
+ XS, VEX_4V, VEX_LIG, WIG;
}
multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
@@ -3018,7 +3029,7 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNod
ssmem, OpNode, SSEPackedSingle, sched.Scl, UseSSE1>, XS;
defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr#ss, FR32, f32,
f32mem, ssmem, OpNode, SSEPackedSingle, sched.Scl, AVXTarget>,
- XS, VEX_4V, VEX_LIG, VEX_WIG;
+ XS, VEX_4V, VEX_LIG, WIG;
}
multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
@@ -3027,7 +3038,7 @@ multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDPatternOperator OpNod
sdmem, OpNode, SSEPackedDouble, sched.Scl, UseSSE2>, XD;
defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr#sd, FR64, f64,
f64mem, sdmem, OpNode, SSEPackedDouble, sched.Scl, AVXTarget>,
- XD, VEX_4V, VEX_LIG, VEX_WIG;
+ XD, VEX_4V, VEX_LIG, WIG;
}
// Square root.
@@ -3098,12 +3109,12 @@ def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)]>, VEX, VEX_WIG;
+ addr:$dst)]>, VEX, WIG;
def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)]>, VEX, VEX_WIG;
+ addr:$dst)]>, VEX, WIG;
} // SchedRW
let SchedRW = [SchedWriteFMoveLSNT.YMM.MR] in {
@@ -3111,12 +3122,12 @@ def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v8f32 VR256:$src),
- addr:$dst)]>, VEX, VEX_L, VEX_WIG;
+ addr:$dst)]>, VEX, VEX_L, WIG;
def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)]>, VEX, VEX_L, VEX_WIG;
+ addr:$dst)]>, VEX, VEX_L, WIG;
} // SchedRW
let ExeDomain = SSEPackedInt in {
@@ -3124,13 +3135,13 @@ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v2i64 VR128:$src),
- addr:$dst)]>, VEX, VEX_WIG,
+ addr:$dst)]>, VEX, WIG,
Sched<[SchedWriteVecMoveLSNT.XMM.MR]>;
def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4i64 VR256:$src),
- addr:$dst)]>, VEX, VEX_L, VEX_WIG,
+ addr:$dst)]>, VEX, VEX_L, WIG,
Sched<[SchedWriteVecMoveLSNT.YMM.MR]>;
} // ExeDomain
} // Predicates
@@ -3246,11 +3257,11 @@ def : Pat<(X86MFence), (MFENCE)>;
let mayLoad=1, hasSideEffects=1, Defs=[MXCSR] in
def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
"ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>,
- VEX, Sched<[WriteLDMXCSR]>, VEX_WIG;
+ VEX, Sched<[WriteLDMXCSR]>, WIG;
let mayStore=1, hasSideEffects=1, Uses=[MXCSR] in
def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
"stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>,
- VEX, Sched<[WriteSTMXCSR]>, VEX_WIG;
+ VEX, Sched<[WriteSTMXCSR]>, WIG;
let mayLoad=1, hasSideEffects=1, Defs=[MXCSR] in
def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src),
@@ -3270,16 +3281,16 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions
let hasSideEffects = 0 in {
def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, WIG;
def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.XMM.RR]>, VEX, WIG;
def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, WIG;
def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"movdqu\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.YMM.RR]>, VEX, VEX_L, WIG;
}
// For Disassembler
@@ -3287,19 +3298,19 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteVecMoveLS.XMM.RR]>,
- VEX, VEX_WIG, FoldGenData<"VMOVDQArr">;
+ VEX, WIG;
def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteVecMoveLS.YMM.RR]>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQAYrr">;
+ VEX, VEX_L, WIG;
def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteVecMoveLS.XMM.RR]>,
- VEX, VEX_WIG, FoldGenData<"VMOVDQUrr">;
+ VEX, WIG;
def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src),
"movdqu\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteVecMoveLS.YMM.RR]>,
- VEX, VEX_L, VEX_WIG, FoldGenData<"VMOVDQUYrr">;
+ VEX, VEX_L, WIG;
}
let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
@@ -3307,20 +3318,20 @@ let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (alignedloadv2i64 addr:$src))]>,
- Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, WIG;
def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteVecMoveLS.YMM.RM]>,
- VEX, VEX_L, VEX_WIG;
+ VEX, VEX_L, WIG;
def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (loadv2i64 addr:$src))]>,
Sched<[SchedWriteVecMoveLS.XMM.RM]>,
- XS, VEX, VEX_WIG;
+ XS, VEX, WIG;
def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteVecMoveLS.YMM.RM]>,
- XS, VEX, VEX_L, VEX_WIG;
+ XS, VEX, VEX_L, WIG;
}
let mayStore = 1, hasSideEffects = 0, Predicates = [HasAVX,NoVLX] in {
@@ -3328,18 +3339,18 @@ def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i128mem:$dst, VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[(alignedstore (v2i64 VR128:$src), addr:$dst)]>,
- Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.XMM.MR]>, VEX, WIG;
def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
(ins i256mem:$dst, VR256:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.YMM.MR]>, VEX, VEX_L, WIG;
def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",
[(store (v2i64 VR128:$src), addr:$dst)]>,
- Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.XMM.MR]>, XS, VEX, WIG;
def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[]>,
- Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.YMM.MR]>, XS, VEX, VEX_L, WIG;
}
let SchedRW = [SchedWriteVecMoveLS.XMM.RR] in {
@@ -3355,12 +3366,11 @@ def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
// For Disassembler
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>,
- FoldGenData<"MOVDQArr">;
+ "movdqa\t{$src, $dst|$dst, $src}", []>;
def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}", []>,
- XS, Requires<[UseSSE2]>, FoldGenData<"MOVDQUrr">;
+ XS, Requires<[UseSSE2]>;
}
} // SchedRW
@@ -3527,12 +3537,12 @@ defm PMULUDQ : PDI_binop_all<0xF4, "pmuludq", X86pmuludq, v2i64, v4i64,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
load, i128mem, SchedWriteVecIMul.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16,
VR256, load, i256mem, SchedWriteVecIMul.YMM,
- 0>, VEX_4V, VEX_L, VEX_WIG;
+ 0>, VEX_4V, VEX_L, WIG;
let Constraints = "$src1 = $dst" in
defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
memop, i128mem, SchedWriteVecIMul.XMM>;
@@ -3540,11 +3550,11 @@ defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
load, i128mem, SchedWritePSADBW.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
load, i256mem, SchedWritePSADBW.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
let Constraints = "$src1 = $dst" in
defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
memop, i128mem, SchedWritePSADBW.XMM>;
@@ -3594,12 +3604,12 @@ multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm,
let Predicates = [HasAVX, prd] in
defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
OpNode, OpNode2, VR128, sched.XMM, schedImm.XMM,
- DstVT128, SrcVT, load, 0>, VEX_4V, VEX_WIG;
+ DstVT128, SrcVT, load, 0>, VEX_4V, WIG;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
OpNode, OpNode2, VR256, sched.YMM, schedImm.YMM,
DstVT256, SrcVT, load, 0>, VEX_4V, VEX_L,
- VEX_WIG;
+ WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2,
VR128, sched.XMM, schedImm.XMM, DstVT128, SrcVT,
@@ -3621,11 +3631,11 @@ multiclass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr,
SDNode OpNode, X86SchedWriteWidths sched> {
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
- VR128, v16i8, sched.XMM, 0>, VEX_4V, VEX_WIG;
+ VR128, v16i8, sched.XMM, 0>, VEX_4V, WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
VR256, v32i8, sched.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
let Constraints = "$src1 = $dst" in
defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8,
sched.XMM>;
@@ -3697,7 +3707,7 @@ let Predicates = [HasAVX, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(vt128 (OpNode VR128:$src1, (i8 timm:$src2))))]>,
- VEX, Sched<[sched.XMM]>, VEX_WIG;
+ VEX, Sched<[sched.XMM]>, WIG;
def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
!strconcat("v", OpcodeStr,
@@ -3705,7 +3715,7 @@ let Predicates = [HasAVX, prd] in {
[(set VR128:$dst,
(vt128 (OpNode (load addr:$src1),
(i8 timm:$src2))))]>, VEX,
- Sched<[sched.XMM.Folded]>, VEX_WIG;
+ Sched<[sched.XMM.Folded]>, WIG;
}
let Predicates = [HasAVX2, prd] in {
@@ -3715,7 +3725,7 @@ let Predicates = [HasAVX2, prd] in {
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR256:$dst,
(vt256 (OpNode VR256:$src1, (i8 timm:$src2))))]>,
- VEX, VEX_L, Sched<[sched.YMM]>, VEX_WIG;
+ VEX, VEX_L, Sched<[sched.YMM]>, WIG;
def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst),
(ins i256mem:$src1, u8imm:$src2),
!strconcat("v", OpcodeStr,
@@ -3723,7 +3733,7 @@ let Predicates = [HasAVX2, prd] in {
[(set VR256:$dst,
(vt256 (OpNode (load addr:$src1),
(i8 timm:$src2))))]>, VEX, VEX_L,
- Sched<[sched.YMM.Folded]>, VEX_WIG;
+ Sched<[sched.YMM.Folded]>, WIG;
}
let Predicates = [UseSSE2] in {
@@ -3811,33 +3821,33 @@ multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPACKSSWBY : sse2_pack<0x63, "vpacksswb", v32i8, v16i16, X86Packss, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPACKSSDWY : sse2_pack<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPACKUSWBY : sse2_pack<0x67, "vpackuswb", v32i8, v16i16, X86Packus, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPACKUSDWY : sse4_pack<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -3882,61 +3892,61 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
}
let Predicates = [HasAVX, NoVLX] in {
defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, VR128,
i128mem, SchedWriteShuffle.XMM, load, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPUNPCKLBWY : sse2_unpack<0x60, "vpunpcklbw", v32i8, X86Unpckl, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPUNPCKLWDY : sse2_unpack<0x61, "vpunpcklwd", v16i16, X86Unpckl, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPUNPCKHBWY : sse2_unpack<0x68, "vpunpckhbw", v32i8, X86Unpckh, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPUNPCKHWDY : sse2_unpack<0x69, "vpunpckhwd", v16i16, X86Unpckh, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPUNPCKLDQY : sse2_unpack<0x62, "vpunpckldq", v8i32, X86Unpckl, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPUNPCKLQDQY : sse2_unpack<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPUNPCKHDQY : sse2_unpack<0x6A, "vpunpckhdq", v8i32, X86Unpckh, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPUNPCKHQDQY : sse2_unpack<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, VR256,
i256mem, SchedWriteShuffle.YMM, load, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -3994,7 +4004,7 @@ def VPEXTRWrr : Ii8<0xC5, MRMSrcReg,
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1),
timm:$src2))]>,
- PD, VEX, VEX_WIG, Sched<[WriteVecExtract]>;
+ PD, VEX, WIG, Sched<[WriteVecExtract]>;
def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
(outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -4004,7 +4014,7 @@ def PEXTRWrr : PDIi8<0xC5, MRMSrcReg,
// Insert
let Predicates = [HasAVX, NoBWI] in
-defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, VEX_WIG;
+defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V, WIG;
let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in
defm PINSRW : sse2_pinsrw, PD;
@@ -4035,14 +4045,14 @@ def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32orGR64:$dst, (X86movmsk (v16i8 VR128:$src)))]>,
- Sched<[WriteVecMOVMSK]>, VEX, VEX_WIG;
+ Sched<[WriteVecMOVMSK]>, VEX, WIG;
let Predicates = [HasAVX2] in {
def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst),
(ins VR256:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32orGR64:$dst, (X86movmsk (v32i8 VR256:$src)))]>,
- Sched<[WriteVecMOVMSKY]>, VEX, VEX_L, VEX_WIG;
+ Sched<[WriteVecMOVMSKY]>, VEX, VEX_L, WIG;
}
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src),
@@ -4060,27 +4070,27 @@ let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecMoveLS.XMM.MR] in {
// As VEX does not have separate instruction contexts for address size
// overrides, VMASKMOVDQU and VMASKMOVDQU64 would have a decode conflict.
// Prefer VMASKMODDQU64.
-let Uses = [EDI], Predicates = [HasAVX], isAsmParserOnly = 1 in
-def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
- (ins VR128:$src, VR128:$mask),
- "maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
- VEX, VEX_WIG;
let Uses = [RDI], Predicates = [HasAVX,In64BitMode] in
def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>,
- VEX, VEX_WIG;
-
-let Uses = [EDI], Predicates = [UseSSE2] in
-def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ VEX, WIG;
+let Uses = [EDI], Predicates = [HasAVX], isAsmParserOnly = 1 in
+def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>,
+ VEX, WIG;
+
let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in
def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+let Uses = [EDI], Predicates = [UseSSE2] in
+def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
} // ExeDomain = SSEPackedInt
@@ -4297,7 +4307,7 @@ def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
- VEX, Requires<[UseAVX]>, VEX_WIG;
+ VEX, Requires<[UseAVX]>, WIG;
def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -4313,7 +4323,7 @@ def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (extractelt (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)]>,
- VEX, VEX_WIG;
+ VEX, WIG;
def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (extractelt (v2i64 VR128:$src),
@@ -4324,7 +4334,7 @@ def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0,
SchedRW = [SchedWriteVecLogic.XMM] in {
def VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
- "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_WIG;
+ "movq\t{$src, $dst|$dst, $src}", []>, VEX, WIG;
def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", []>;
}
@@ -4359,7 +4369,7 @@ let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
- XS, VEX, Requires<[UseAVX]>, VEX_WIG;
+ XS, VEX, Requires<[UseAVX]>, WIG;
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
@@ -4408,16 +4418,16 @@ def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
let Predicates = [HasAVX, NoVLX] in {
defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
v4f32, VR128, loadv4f32, f128mem,
- SchedWriteFShuffle.XMM>, VEX, VEX_WIG;
+ SchedWriteFShuffle.XMM>, VEX, WIG;
defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
v4f32, VR128, loadv4f32, f128mem,
- SchedWriteFShuffle.XMM>, VEX, VEX_WIG;
+ SchedWriteFShuffle.XMM>, VEX, WIG;
defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup",
v8f32, VR256, loadv8f32, f256mem,
- SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG;
+ SchedWriteFShuffle.YMM>, VEX, VEX_L, WIG;
defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup",
v8f32, VR256, loadv8f32, f256mem,
- SchedWriteFShuffle.YMM>, VEX, VEX_L, VEX_WIG;
+ SchedWriteFShuffle.YMM>, VEX, VEX_L, WIG;
}
defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128,
memopv4f32, f128mem, SchedWriteFShuffle.XMM>;
@@ -4486,9 +4496,9 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
let Predicates = [HasAVX, NoVLX] in {
defm VMOVDDUP : sse3_replicate_dfp<"vmovddup", SchedWriteFShuffle>,
- VEX, VEX_WIG;
+ VEX, WIG;
defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup", SchedWriteFShuffle>,
- VEX, VEX_L, VEX_WIG;
+ VEX, VEX_L, WIG;
}
defm MOVDDUP : sse3_replicate_dfp<"movddup", SchedWriteFShuffle>;
@@ -4512,11 +4522,11 @@ let Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>,
- Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.XMM.RM]>, VEX, WIG;
def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
[(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>,
- Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecMoveLS.YMM.RM]>, VEX, VEX_L, WIG;
} // Predicates
def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
@@ -4553,18 +4563,18 @@ let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
defm VADDSUBPS : sse3_addsub<"vaddsubps", v4f32, VR128, f128mem,
SchedWriteFAddSizes.PS.XMM, loadv4f32, 0>,
- XD, VEX_4V, VEX_WIG;
+ XD, VEX_4V, WIG;
defm VADDSUBPSY : sse3_addsub<"vaddsubps", v8f32, VR256, f256mem,
SchedWriteFAddSizes.PS.YMM, loadv8f32, 0>,
- XD, VEX_4V, VEX_L, VEX_WIG;
+ XD, VEX_4V, VEX_L, WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VADDSUBPD : sse3_addsub<"vaddsubpd", v2f64, VR128, f128mem,
SchedWriteFAddSizes.PD.XMM, loadv2f64, 0>,
- PD, VEX_4V, VEX_WIG;
+ PD, VEX_4V, WIG;
defm VADDSUBPDY : sse3_addsub<"vaddsubpd", v4f64, VR256, f256mem,
SchedWriteFAddSizes.PD.YMM, loadv4f64, 0>,
- PD, VEX_4V, VEX_L, VEX_WIG;
+ PD, VEX_4V, VEX_L, WIG;
}
}
let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in {
@@ -4625,23 +4635,23 @@ let Uses = [MXCSR], mayRaiseFPException = 1 in {
let Predicates = [HasAVX] in {
let ExeDomain = SSEPackedSingle in {
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
- X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG;
+ X86fhadd, WriteFHAdd, loadv4f32, 0>, VEX_4V, WIG;
defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem,
- X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, VEX_WIG;
+ X86fhsub, WriteFHAdd, loadv4f32, 0>, VEX_4V, WIG;
defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem,
- X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
+ X86fhadd, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, WIG;
defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem,
- X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, VEX_WIG;
+ X86fhsub, WriteFHAddY, loadv8f32, 0>, VEX_4V, VEX_L, WIG;
}
let ExeDomain = SSEPackedDouble in {
defm VHADDPD : S3_Int<0x7C, "vhaddpd", v2f64, VR128, f128mem,
- X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG;
+ X86fhadd, WriteFHAdd, loadv2f64, 0>, VEX_4V, WIG;
defm VHSUBPD : S3_Int<0x7D, "vhsubpd", v2f64, VR128, f128mem,
- X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, VEX_WIG;
+ X86fhsub, WriteFHAdd, loadv2f64, 0>, VEX_4V, WIG;
defm VHADDPDY : S3_Int<0x7C, "vhaddpd", v4f64, VR256, f256mem,
- X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
+ X86fhadd, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, WIG;
defm VHSUBPDY : S3_Int<0x7D, "vhsubpd", v4f64, VR256, f256mem,
- X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, VEX_WIG;
+ X86fhsub, WriteFHAddY, loadv4f64, 0>, VEX_4V, VEX_L, WIG;
}
}
@@ -4700,23 +4710,23 @@ multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPABSB : SS3I_unop_rm<0x1C, "vpabsb", v16i8, abs, SchedWriteVecALU,
- load>, VEX, VEX_WIG;
+ load>, VEX, WIG;
defm VPABSW : SS3I_unop_rm<0x1D, "vpabsw", v8i16, abs, SchedWriteVecALU,
- load>, VEX, VEX_WIG;
+ load>, VEX, WIG;
}
let Predicates = [HasAVX, NoVLX] in {
defm VPABSD : SS3I_unop_rm<0x1E, "vpabsd", v4i32, abs, SchedWriteVecALU,
- load>, VEX, VEX_WIG;
+ load>, VEX, WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPABSB : SS3I_unop_rm_y<0x1C, "vpabsb", v32i8, abs, SchedWriteVecALU>,
- VEX, VEX_L, VEX_WIG;
+ VEX, VEX_L, WIG;
defm VPABSW : SS3I_unop_rm_y<0x1D, "vpabsw", v16i16, abs, SchedWriteVecALU>,
- VEX, VEX_L, VEX_WIG;
+ VEX, VEX_L, WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPABSD : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, abs, SchedWriteVecALU>,
- VEX, VEX_L, VEX_WIG;
+ VEX, VEX_L, WIG;
}
defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, abs, SchedWriteVecALU,
@@ -4796,45 +4806,45 @@ let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8,
VR128, load, i128mem,
- SchedWriteVarShuffle.XMM, 0>, VEX_4V, VEX_WIG;
+ SchedWriteVarShuffle.XMM, 0>, VEX_4V, WIG;
defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16,
v16i8, VR128, load, i128mem,
- SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
+ SchedWriteVecIMul.XMM, 0>, VEX_4V, WIG;
}
defm VPMULHRSW : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16,
VR128, load, i128mem,
- SchedWriteVecIMul.XMM, 0>, VEX_4V, VEX_WIG;
+ SchedWriteVecIMul.XMM, 0>, VEX_4V, WIG;
}
let ImmT = NoImm, Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, 0>, VEX_4V, WIG;
defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, 0>, VEX_4V, WIG;
defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, 0>, VEX_4V, WIG;
defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
load, i128mem,
- SchedWritePHAdd.XMM, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, 0>, VEX_4V, WIG;
defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb",
int_x86_ssse3_psign_b_128,
- SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
+ SchedWriteVecALU.XMM, load, 0>, VEX_4V, WIG;
defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw",
int_x86_ssse3_psign_w_128,
- SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
+ SchedWriteVecALU.XMM, load, 0>, VEX_4V, WIG;
defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd",
int_x86_ssse3_psign_d_128,
- SchedWriteVecALU.XMM, load, 0>, VEX_4V, VEX_WIG;
+ SchedWriteVecALU.XMM, load, 0>, VEX_4V, WIG;
defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw",
int_x86_ssse3_phadd_sw_128,
- SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, load, 0>, VEX_4V, WIG;
defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw",
int_x86_ssse3_phsub_sw_128,
- SchedWritePHAdd.XMM, load, 0>, VEX_4V, VEX_WIG;
+ SchedWritePHAdd.XMM, load, 0>, VEX_4V, WIG;
}
}
@@ -4842,42 +4852,42 @@ let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
let isCommutable = 0 in {
defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8,
VR256, load, i256mem,
- SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteVarShuffle.YMM, 0>, VEX_4V, VEX_L, WIG;
defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16,
v32i8, VR256, load, i256mem,
- SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, WIG;
}
defm VPMULHRSWY : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16,
VR256, load, i256mem,
- SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteVecIMul.YMM, 0>, VEX_4V, VEX_L, WIG;
}
let ImmT = NoImm, Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
VR256, load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG;
defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG;
defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
VR256, load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG;
defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
load, i256mem,
- SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM, 0>, VEX_4V, VEX_L, WIG;
defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
- SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteVecALU.YMM>, VEX_4V, VEX_L, WIG;
defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", int_x86_avx2_psign_w,
- SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteVecALU.YMM>, VEX_4V, VEX_L, WIG;
defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d,
- SchedWriteVecALU.YMM>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteVecALU.YMM>, VEX_4V, VEX_L, WIG;
defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw",
int_x86_avx2_phadd_sw,
- SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM>, VEX_4V, VEX_L, WIG;
defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw",
int_x86_avx2_phsub_sw,
- SchedWritePHAdd.YMM>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWritePHAdd.YMM>, VEX_4V, VEX_L, WIG;
}
}
@@ -4946,10 +4956,10 @@ multiclass ssse3_palignr<string asm, ValueType VT, RegisterClass RC,
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
defm VPALIGNR : ssse3_palignr<"vpalignr", v16i8, VR128, load, i128mem,
- SchedWriteShuffle.XMM, 0>, VEX_4V, VEX_WIG;
+ SchedWriteShuffle.XMM, 0>, VEX_4V, WIG;
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
defm VPALIGNRY : ssse3_palignr<"vpalignr", v32i8, VR256, load, i256mem,
- SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteShuffle.YMM, 0>, VEX_4V, VEX_L, WIG;
let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in
defm PALIGNR : ssse3_palignr<"palignr", v16i8, VR128, memop, i128mem,
SchedWriteShuffle.XMM>;
@@ -5004,11 +5014,11 @@ multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX, prd] in
defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
VR128, VR128, SchedWriteVecExtend.XMM>,
- VEX, VEX_WIG;
+ VEX, WIG;
let Predicates = [HasAVX2, prd] in
defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
VR256, VR128, SchedWriteVecExtend.YMM>,
- VEX, VEX_L, VEX_WIG;
+ VEX, VEX_L, WIG;
}
multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp,
@@ -5228,7 +5238,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
}
let Predicates = [HasAVX, NoBWI] in
- defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, VEX_WIG;
+ defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX, WIG;
defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
@@ -5240,7 +5250,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
(ins VR128:$src1, u8imm:$src2),
!strconcat(OpcodeStr,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
- Sched<[WriteVecExtract]>, FoldGenData<NAME#rr>;
+ Sched<[WriteVecExtract]>;
let hasSideEffects = 0, mayStore = 1 in
def mr : SS4AIi8<opc, MRMDestMem, (outs),
@@ -5252,7 +5262,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
}
let Predicates = [HasAVX, NoBWI] in
- defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, VEX_WIG;
+ defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX, WIG;
defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
@@ -5303,7 +5313,7 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
}
let Predicates = [HasAVX, NoDQI] in
- defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
+ defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, REX_W;
defm PEXTRQ : SS41I_extract64<0x16, "pextrq">, REX_W;
@@ -5327,7 +5337,7 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
let ExeDomain = SSEPackedSingle in {
let Predicates = [UseAVX] in
- defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, VEX_WIG;
+ defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX, WIG;
defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
}
@@ -5357,7 +5367,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
}
let Predicates = [HasAVX, NoBWI] in {
- defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, VEX_WIG;
+ defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V, WIG;
def : Pat<(X86pinsrb VR128:$src1, (i32 (anyext (i8 GR8:$src2))), timm:$src3),
(VPINSRBrr VR128:$src1, (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
GR8:$src2, sub_8bit), timm:$src3)>;
@@ -5414,7 +5424,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
}
let Predicates = [HasAVX, NoDQI] in
- defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
+ defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, REX_W;
let Constraints = "$src1 = $dst" in
defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
@@ -5449,7 +5459,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
let ExeDomain = SSEPackedSingle in {
let Predicates = [UseAVX] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
let Constraints = "$src1 = $dst" in
defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1>;
}
@@ -5610,27 +5620,27 @@ let Predicates = [HasAVX, NoVLX] in {
// Intrinsic form
defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32,
loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>,
- VEX, VEX_WIG;
+ VEX, WIG;
defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32,
loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>,
- VEX, VEX_L, VEX_WIG;
+ VEX, VEX_L, WIG;
}
let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in {
defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64,
loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>,
- VEX, VEX_WIG;
+ VEX, WIG;
defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64,
loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>,
- VEX, VEX_L, VEX_WIG;
+ VEX, VEX_L, WIG;
}
}
let Predicates = [UseAVX] in {
defm VROUND : sse41_fp_binop_s<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl,
v4f32, v2f64, X86RndScales, 0>,
- VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
+ VEX_4V, VEX_LIG, WIG, SIMD_EXC;
defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>,
- VEX_4V, VEX_LIG, VEX_WIG, SIMD_EXC;
+ VEX_4V, VEX_LIG, WIG, SIMD_EXC;
}
let Predicates = [UseAVX] in {
@@ -5684,22 +5694,22 @@ let Defs = [EFLAGS], Predicates = [HasAVX] in {
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>,
- Sched<[SchedWriteVecTest.XMM]>, VEX, VEX_WIG;
+ Sched<[SchedWriteVecTest.XMM]>, VEX, WIG;
def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>,
Sched<[SchedWriteVecTest.XMM.Folded, SchedWriteVecTest.XMM.ReadAfterFold]>,
- VEX, VEX_WIG;
+ VEX, WIG;
def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>,
- Sched<[SchedWriteVecTest.YMM]>, VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecTest.YMM]>, VEX, VEX_L, WIG;
def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>,
Sched<[SchedWriteVecTest.YMM.Folded, SchedWriteVecTest.YMM.ReadAfterFold]>,
- VEX, VEX_L, VEX_WIG;
+ VEX, VEX_L, WIG;
}
let Defs = [EFLAGS] in {
@@ -5801,7 +5811,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
let Predicates = [HasAVX] in
defm VPHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "vphminposuw",
X86phminpos, load,
- WritePHMINPOS>, VEX, VEX_WIG;
+ WritePHMINPOS>, VEX, WIG;
defm PHMINPOSUW : SS41I_unop_rm_int_v16<0x41, "phminposuw",
X86phminpos, memop,
WritePHMINPOS>;
@@ -5832,65 +5842,65 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX, NoVLX] in {
defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", smin, v4i32, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", umin, v4i32, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v4i32, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", umax, v4i32, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPMULDQ : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v2i64, VR128,
load, i128mem, SchedWriteVecIMul.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
}
let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", smin, v16i8, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", umin, v8i16, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v16i8, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v8i16, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
}
let Predicates = [HasAVX2, NoVLX] in {
defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", smin, v8i32, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", umin, v8i32, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", smax, v8i32, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", umax, v8i32, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPMULDQY : SS48I_binop_rm<0x28, "vpmuldq", X86pmuldq, v4i64, VR256,
load, i256mem, SchedWriteVecIMul.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
}
let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", smin, v32i8, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", umin, v16i16, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", smax, v32i8, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", umax, v16i16, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -5917,20 +5927,20 @@ let Constraints = "$src1 = $dst" in {
let Predicates = [HasAVX, NoVLX] in
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
load, i128mem, SchedWritePMULLD.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
let Predicates = [HasAVX] in
defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
let Predicates = [HasAVX2, NoVLX] in
defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256,
load, i256mem, SchedWritePMULLD.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
let Predicates = [HasAVX2] in
defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
let Constraints = "$src1 = $dst" in {
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128,
@@ -6078,22 +6088,22 @@ let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
VR128, load, i128mem, 0,
- SchedWriteMPSAD.XMM>, VEX_4V, VEX_WIG;
+ SchedWriteMPSAD.XMM>, VEX_4V, WIG;
}
let Uses = [MXCSR], mayRaiseFPException = 1 in {
let ExeDomain = SSEPackedSingle in
defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
VR128, load, f128mem, 0,
- SchedWriteDPPS.XMM>, VEX_4V, VEX_WIG;
+ SchedWriteDPPS.XMM>, VEX_4V, WIG;
let ExeDomain = SSEPackedDouble in
defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
VR128, load, f128mem, 0,
- SchedWriteDPPD.XMM>, VEX_4V, VEX_WIG;
+ SchedWriteDPPD.XMM>, VEX_4V, WIG;
let ExeDomain = SSEPackedSingle in
defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256,
VR256, load, i256mem, 0,
- SchedWriteDPPS.YMM>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteDPPS.YMM>, VEX_4V, VEX_L, WIG;
}
}
@@ -6101,7 +6111,7 @@ let Predicates = [HasAVX2] in {
let isCommutable = 0 in {
defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw,
VR256, load, i256mem, 0,
- SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, VEX_WIG;
+ SchedWriteMPSAD.YMM>, VEX_4V, VEX_L, WIG;
}
}
@@ -6160,30 +6170,30 @@ let Predicates = [HasAVX] in {
defm VBLENDPS : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v4f32,
VR128, load, f128mem, 0, SSEPackedSingle,
SchedWriteFBlend.XMM, BlendCommuteImm4>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VBLENDPSY : SS41I_blend_rmi<0x0C, "vblendps", X86Blendi, v8f32,
VR256, load, f256mem, 0, SSEPackedSingle,
SchedWriteFBlend.YMM, BlendCommuteImm8>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VBLENDPD : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v2f64,
VR128, load, f128mem, 0, SSEPackedDouble,
SchedWriteFBlend.XMM, BlendCommuteImm2>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
defm VBLENDPDY : SS41I_blend_rmi<0x0D, "vblendpd", X86Blendi, v4f64,
VR256, load, f256mem, 0, SSEPackedDouble,
SchedWriteFBlend.YMM, BlendCommuteImm4>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
defm VPBLENDW : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v8i16,
VR128, load, i128mem, 0, SSEPackedInt,
SchedWriteBlend.XMM, BlendCommuteImm8>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
}
let Predicates = [HasAVX2] in {
defm VPBLENDWY : SS41I_blend_rmi<0x0E, "vpblendw", X86Blendi, v16i16,
VR256, load, i256mem, 0, SSEPackedInt,
SchedWriteBlend.YMM, BlendCommuteImm8>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
}
// Emulate vXi32/vXi64 blends with vXf32/vXf64 or pblendw.
@@ -6463,11 +6473,11 @@ let AddedComplexity = 400 in { // Prefer non-temporal versions
let Predicates = [HasAVX, NoVLX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, VEX_WIG;
+ Sched<[SchedWriteVecMoveLSNT.XMM.RM]>, VEX, WIG;
let Predicates = [HasAVX2, NoVLX] in
def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}", []>,
- Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, VEX_WIG;
+ Sched<[SchedWriteVecMoveLSNT.YMM.RM]>, VEX, VEX_L, WIG;
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}", []>,
Sched<[SchedWriteVecMoveLSNT.XMM.RM]>;
@@ -6554,12 +6564,12 @@ multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX] in
defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128,
load, i128mem, SchedWriteVecALU.XMM, 0>,
- VEX_4V, VEX_WIG;
+ VEX_4V, WIG;
let Predicates = [HasAVX2] in
defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256,
load, i256mem, SchedWriteVecALU.YMM, 0>,
- VEX_4V, VEX_L, VEX_WIG;
+ VEX_4V, VEX_L, WIG;
let Constraints = "$src1 = $dst" in
defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128,
@@ -6583,7 +6593,7 @@ multiclass pcmpistrm_SS42AI<string asm> {
let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
- defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, VEX_WIG;
+ defm VPCMPISTRM : pcmpistrm_SS42AI<"vpcmpistrm">, VEX, WIG;
defm PCMPISTRM : pcmpistrm_SS42AI<"pcmpistrm"> ;
}
@@ -6601,7 +6611,7 @@ multiclass SS42AI_pcmpestrm<string asm> {
let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
- defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, VEX_WIG;
+ defm VPCMPESTRM : SS42AI_pcmpestrm<"vpcmpestrm">, VEX, WIG;
defm PCMPESTRM : SS42AI_pcmpestrm<"pcmpestrm">;
}
@@ -6619,7 +6629,7 @@ multiclass SS42AI_pcmpistri<string asm> {
let Defs = [ECX, EFLAGS], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
- defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, VEX_WIG;
+ defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX, WIG;
defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">;
}
@@ -6637,7 +6647,7 @@ multiclass SS42AI_pcmpestri<string asm> {
let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in {
let Predicates = [HasAVX] in
- defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, VEX_WIG;
+ defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX, WIG;
defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">;
}
@@ -6786,28 +6796,28 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
// Perform One Round of an AES Encryption/Decryption Flow
let Predicates = [HasAVX, NoVLX_Or_NoVAES, HasAES] in {
defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
- int_x86_aesni_aesenc, load>, VEX_4V, VEX_WIG;
+ int_x86_aesni_aesenc, load>, VEX_4V, WIG;
defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
- int_x86_aesni_aesenclast, load>, VEX_4V, VEX_WIG;
+ int_x86_aesni_aesenclast, load>, VEX_4V, WIG;
defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec",
- int_x86_aesni_aesdec, load>, VEX_4V, VEX_WIG;
+ int_x86_aesni_aesdec, load>, VEX_4V, WIG;
defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast",
- int_x86_aesni_aesdeclast, load>, VEX_4V, VEX_WIG;
+ int_x86_aesni_aesdeclast, load>, VEX_4V, WIG;
}
let Predicates = [NoVLX, HasVAES] in {
defm VAESENCY : AESI_binop_rm_int<0xDC, "vaesenc",
int_x86_aesni_aesenc_256, load, 0, VR256,
- i256mem>, VEX_4V, VEX_L, VEX_WIG;
+ i256mem>, VEX_4V, VEX_L, WIG;
defm VAESENCLASTY : AESI_binop_rm_int<0xDD, "vaesenclast",
int_x86_aesni_aesenclast_256, load, 0, VR256,
- i256mem>, VEX_4V, VEX_L, VEX_WIG;
+ i256mem>, VEX_4V, VEX_L, WIG;
defm VAESDECY : AESI_binop_rm_int<0xDE, "vaesdec",
int_x86_aesni_aesdec_256, load, 0, VR256,
- i256mem>, VEX_4V, VEX_L, VEX_WIG;
+ i256mem>, VEX_4V, VEX_L, WIG;
defm VAESDECLASTY : AESI_binop_rm_int<0xDF, "vaesdeclast",
int_x86_aesni_aesdeclast_256, load, 0, VR256,
- i256mem>, VEX_4V, VEX_L, VEX_WIG;
+ i256mem>, VEX_4V, VEX_L, WIG;
}
let Constraints = "$src1 = $dst" in {
@@ -6828,12 +6838,12 @@ let Predicates = [HasAVX, HasAES] in {
"vaesimc\t{$src1, $dst|$dst, $src1}",
[(set VR128:$dst,
(int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>,
- VEX, VEX_WIG;
+ VEX, WIG;
def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"vaesimc\t{$src1, $dst|$dst, $src1}",
[(set VR128:$dst, (int_x86_aesni_aesimc (load addr:$src1)))]>,
- Sched<[WriteAESIMC.Folded]>, VEX, VEX_WIG;
+ Sched<[WriteAESIMC.Folded]>, VEX, WIG;
}
def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1),
@@ -6853,13 +6863,13 @@ let Predicates = [HasAVX, HasAES] in {
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist VR128:$src1, timm:$src2))]>,
- Sched<[WriteAESKeyGen]>, VEX, VEX_WIG;
+ Sched<[WriteAESKeyGen]>, VEX, WIG;
def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, u8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist (load addr:$src1), timm:$src2))]>,
- Sched<[WriteAESKeyGen.Folded]>, VEX, VEX_WIG;
+ Sched<[WriteAESKeyGen.Folded]>, VEX, WIG;
}
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, u8imm:$src2),
@@ -6948,11 +6958,11 @@ multiclass vpclmulqdq<RegisterClass RC, X86MemOperand MemOp,
let Predicates = [HasAVX, NoVLX_Or_NoVPCLMULQDQ, HasPCLMUL] in
defm VPCLMULQDQ : vpclmulqdq<VR128, i128mem, load,
- int_x86_pclmulqdq>, VEX_4V, VEX_WIG;
+ int_x86_pclmulqdq>, VEX_4V, WIG;
let Predicates = [NoVLX, HasVPCLMULQDQ] in
defm VPCLMULQDQY : vpclmulqdq<VR256, i256mem, load,
- int_x86_pclmulqdq_256>, VEX_4V, VEX_L, VEX_WIG;
+ int_x86_pclmulqdq_256>, VEX_4V, VEX_L, WIG;
multiclass vpclmulqdq_aliases_impl<string InstStr, RegisterClass RC,
X86MemOperand MemOp, string Hi, string Lo> {
@@ -7322,7 +7332,9 @@ multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR128:$dst, (v4i32 (OpNode VR128:$src1, VR128:$src2,
(loadv4i32 addr:$src3))))]>,
- VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+ VEX_4V, Sched<[SchedWriteVecIMul.XMM.Folded,
+ SchedWriteVecIMul.XMM.ReadAfterFold,
+ SchedWriteVecIMul.XMM.ReadAfterFold]>;
let isCommutable = IsCommutable in
def Yrr : AVX8I<opc, MRMSrcReg, (outs VR256:$dst),
@@ -7330,14 +7342,16 @@ multiclass avx_vnni_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR256:$dst, (v8i32 (OpNode VR256:$src1,
VR256:$src2, VR256:$src3)))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
+ VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
def Yrm : AVX8I<opc, MRMSrcMem, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i256mem:$src3),
!strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
[(set VR256:$dst, (v8i32 (OpNode VR256:$src1, VR256:$src2,
(loadv8i32 addr:$src3))))]>,
- VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.XMM]>;
+ VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM.Folded,
+ SchedWriteVecIMul.YMM.ReadAfterFold,
+ SchedWriteVecIMul.YMM.ReadAfterFold]>;
}
defm VPDPBUSD : avx_vnni_rm<0x50, "vpdpbusd", X86Vpdpbusd, 0>;
@@ -7430,12 +7444,12 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
// Zero All YMM registers
def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
[(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L,
- Requires<[HasAVX]>, VEX_WIG;
+ Requires<[HasAVX]>, WIG;
// Zero Upper bits of YMM registers
def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
[(int_x86_avx_vzeroupper)]>, PS, VEX,
- Requires<[HasAVX]>, VEX_WIG;
+ Requires<[HasAVX]>, WIG;
} // Defs
} // SchedRW
@@ -7809,10 +7823,10 @@ multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag,
}
defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64,
- WriteShuffle256, i256mem>, VEX_W;
+ WriteShuffle256, i256mem>, REX_W;
let ExeDomain = SSEPackedDouble in
defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64,
- WriteFShuffle256, f256mem>, VEX_W;
+ WriteFShuffle256, f256mem>, REX_W;
//===----------------------------------------------------------------------===//
// VPERM2I128 - Permute Integer vector Values in 128-bit chunks
@@ -7923,7 +7937,7 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq",
int_x86_avx2_maskload_q_256,
int_x86_avx2_maskstore_q,
int_x86_avx2_maskstore_q_256,
- WriteVecMaskMove64, WriteVecMaskMove64Y>, VEX_W;
+ WriteVecMaskMove64, WriteVecMaskMove64Y>, REX_W;
multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
ValueType MaskVT> {
@@ -7994,9 +8008,9 @@ multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [HasAVX2, NoVLX] in {
defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", X86vshlv, v4i32, v8i32>;
- defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, VEX_W;
+ defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", X86vshlv, v2i64, v4i64>, REX_W;
defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", X86vsrlv, v4i32, v8i32>;
- defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, VEX_W;
+ defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", X86vsrlv, v2i64, v4i64>, REX_W;
defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>;
}
@@ -8025,9 +8039,9 @@ let Predicates = [HasAVX2] in {
= "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb"
in {
defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq",
- VR256, vx128mem, vx256mem>, VEX_W;
+ VR256, vx128mem, vx256mem>, REX_W;
defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq",
- VR256, vx128mem, vy256mem>, VEX_W;
+ VR256, vx128mem, vy256mem>, REX_W;
defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd",
VR256, vx128mem, vy256mem>;
defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd",
@@ -8035,9 +8049,9 @@ let Predicates = [HasAVX2] in {
let ExeDomain = SSEPackedDouble in {
defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd",
- VR256, vx128mem, vx256mem>, VEX_W;
+ VR256, vx128mem, vx256mem>, REX_W;
defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd",
- VR256, vx128mem, vy256mem>, VEX_W;
+ VR256, vx128mem, vy256mem>, REX_W;
}
let ExeDomain = SSEPackedSingle in {
@@ -8101,10 +8115,10 @@ multiclass GF2P8AFFINE_common<bits<8> Op, string OpStr, SDNode OpNode> {
let Predicates = [HasGFNI, HasAVX, NoVLX] in {
defm V#NAME : GF2P8AFFINE_rmi<Op, "v"#OpStr, v16i8, OpNode, VR128,
load, i128mem, SchedWriteVecIMul.XMM>,
- VEX_4V, VEX_W;
+ VEX_4V, REX_W;
defm V#NAME#Y : GF2P8AFFINE_rmi<Op, "v"#OpStr, v32i8, OpNode, VR256,
load, i256mem, SchedWriteVecIMul.YMM>,
- VEX_4V, VEX_L, VEX_W;
+ VEX_4V, VEX_L, REX_W;
}
}
@@ -8163,8 +8177,8 @@ multiclass avx_ifma_rm<bits<8> opc, string OpcodeStr, SDNode OpNode> {
VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
}
-defm VPMADD52HUQ : avx_ifma_rm<0xb5, "vpmadd52huq", x86vpmadd52h>, VEX_W, ExplicitVEXPrefix;
-defm VPMADD52LUQ : avx_ifma_rm<0xb4, "vpmadd52luq", x86vpmadd52l>, VEX_W, ExplicitVEXPrefix;
+defm VPMADD52HUQ : avx_ifma_rm<0xb5, "vpmadd52huq", x86vpmadd52h>, REX_W, ExplicitVEXPrefix;
+defm VPMADD52LUQ : avx_ifma_rm<0xb4, "vpmadd52luq", x86vpmadd52l>, REX_W, ExplicitVEXPrefix;
// AVX-VNNI-INT8
let Constraints = "$src1 = $dst" in
@@ -8281,3 +8295,134 @@ def : InstAlias<"vcvtneps2bf16x\t{$src, $dst|$dst, $src}",
(VCVTNEPS2BF16rr VR128:$dst, VR128:$src), 0, "att">;
def : InstAlias<"vcvtneps2bf16y\t{$src, $dst|$dst, $src}",
(VCVTNEPS2BF16Yrr VR128:$dst, VR256:$src), 0, "att">;
+
+// FIXME: Is there a better scheduler class for SHA512 than WriteVecIMul?
+let Predicates = [HasSHA512], Constraints = "$src1 = $dst" in {
+def VSHA512MSG1rr : I<0xcc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR128:$src2),
+ "vsha512msg1\t{$src2, $dst|$dst, $src2}",
+ [(set VR256:$dst,
+ (int_x86_vsha512msg1 VR256:$src1, VR128:$src2))]>, VEX_L,
+ VEX, T8XD, Sched<[WriteVecIMul]>;
+def VSHA512MSG2rr : I<0xcd, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ "vsha512msg2\t{$src2, $dst|$dst, $src2}",
+ [(set VR256:$dst,
+ (int_x86_vsha512msg2 VR256:$src1, VR256:$src2))]>, VEX_L,
+ VEX, T8XD, Sched<[WriteVecIMul]>;
+def VSHA512RNDS2rr : I<0xcb, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR128:$src3),
+ "vsha512rnds2\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR256:$dst,
+ (int_x86_vsha512rnds2 VR256:$src1, VR256:$src2, VR128:$src3))]>,
+ VEX_L, VEX_4V, T8XD, Sched<[WriteVecIMul]>;
+}
+
+// FIXME: Is there a better scheduler class for SM3 than WriteVecIMul?
+let Predicates = [HasSM3], Constraints = "$src1 = $dst" in {
+ multiclass SM3_Base<string OpStr> {
+ def rr : I<0xda, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (!cast<Intrinsic>("int_x86_"#OpStr) VR128:$src1,
+ VR128:$src2, VR128:$src3))]>,
+ Sched<[WriteVecIMul]>, VEX_4V;
+ def rm : I<0xda, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i128mem:$src3),
+ !strconcat(OpStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (!cast<Intrinsic>("int_x86_"#OpStr) VR128:$src1,
+ VR128:$src2, (loadv4i32 addr:$src3)))]>,
+ Sched<[WriteVecIMul]>, VEX_4V;
+ }
+
+ multiclass VSM3RNDS2_Base {
+ def rr : Ii8<0xde, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3, i32u8imm:$src4),
+ "vsm3rnds2\t{$src4, $src3, $src2, $dst|$dst, $src2, $src3, $src4}",
+ [(set VR128:$dst,
+ (int_x86_vsm3rnds2 VR128:$src1,
+ VR128:$src2, VR128:$src3, timm:$src4))]>,
+ Sched<[WriteVecIMul]>;
+ def rm : Ii8<0xde, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i128mem:$src3, i32u8imm:$src4),
+ "vsm3rnds2\t{$src4, $src3, $src2, $dst|$dst, $src2, $src3, $src4}",
+ [(set VR128:$dst,
+ (int_x86_vsm3rnds2 VR128:$src1,
+ VR128:$src2, (loadv4i32 addr:$src3), timm:$src4))]>,
+ Sched<[WriteVecIMul]>;
+ }
+}
+
+defm VSM3MSG1 : SM3_Base<"vsm3msg1">, T8PS;
+defm VSM3MSG2 : SM3_Base<"vsm3msg2">, T8PD;
+defm VSM3RNDS2 : VSM3RNDS2_Base, VEX_4V, TAPD;
+
+// FIXME: Is there a better scheduler class for SM4 than WriteVecIMul?
+let Predicates = [HasSM4] in {
+ multiclass SM4_Base<string OpStr, RegisterClass RC, string VL,
+ PatFrag LD, X86MemOperand MemOp> {
+ def rr : I<0xda, MRMSrcReg, (outs RC:$dst),
+ (ins RC:$src1, RC:$src2),
+ !strconcat(OpStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (!cast<Intrinsic>("int_x86_"#OpStr#VL) RC:$src1,
+ RC:$src2))]>,
+ Sched<[WriteVecIMul]>;
+ def rm : I<0xda, MRMSrcMem, (outs RC:$dst),
+ (ins RC:$src1, MemOp:$src2),
+ !strconcat(OpStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set RC:$dst, (!cast<Intrinsic>("int_x86_"#OpStr#VL) RC:$src1,
+ (LD addr:$src2)))]>,
+ Sched<[WriteVecIMul]>;
+ }
+}
+
+defm VSM4KEY4 : SM4_Base<"vsm4key4", VR128, "128", loadv4i32, i128mem>, T8XS, VEX_4V;
+defm VSM4KEY4Y : SM4_Base<"vsm4key4", VR256, "256", loadv8i32, i256mem>, T8XS, VEX_L, VEX_4V;
+defm VSM4RNDS4 : SM4_Base<"vsm4rnds4", VR128, "128", loadv4i32, i128mem>, T8XD, VEX_4V;
+defm VSM4RNDS4Y : SM4_Base<"vsm4rnds4", VR256, "256", loadv8i32, i256mem>, T8XD, VEX_L, VEX_4V;
+
+let Predicates = [HasAVXVNNIINT16], Constraints = "$src1 = $dst" in
+multiclass avx_vnni_int16<bits<8> opc, string OpcodeStr, bit IsCommutable> {
+ let isCommutable = IsCommutable in
+ def rr : I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v4i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_128")
+ VR128:$src1, VR128:$src2, VR128:$src3)))]>,
+ VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+
+ def rm : I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i128mem:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR128:$dst,
+ (v4i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_128")
+ VR128:$src1, VR128:$src2, (loadv4i32 addr:$src3))))]>,
+ VEX_4V, Sched<[SchedWriteVecIMul.XMM]>;
+
+ let isCommutable = IsCommutable in
+ def Yrr : I<opc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR256:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst,
+ (v8i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_256")
+ VR256:$src1, VR256:$src2, VR256:$src3)))]>,
+ VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
+
+ def Yrm : I<opc, MRMSrcMem, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, i256mem:$src3),
+ !strconcat(OpcodeStr, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ [(set VR256:$dst,
+ (v8i32 (!cast<Intrinsic>("int_x86_avx2_"#OpcodeStr#"_256")
+ VR256:$src1, VR256:$src2, (loadv8i32 addr:$src3))))]>,
+ VEX_4V, VEX_L, Sched<[SchedWriteVecIMul.YMM]>;
+}
+
+defm VPDPWSUD : avx_vnni_int16<0xd2, "vpdpwsud", 0>, T8XS;
+defm VPDPWSUDS : avx_vnni_int16<0xd3, "vpdpwsuds", 0>, T8XS;
+defm VPDPWUSD : avx_vnni_int16<0xd2, "vpdpwusd", 0>, T8PD;
+defm VPDPWUSDS : avx_vnni_int16<0xd3, "vpdpwusds", 0>, T8PD;
+defm VPDPWUUD : avx_vnni_int16<0xd2, "vpdpwuud", 1>, T8PS;
+defm VPDPWUUDS : avx_vnni_int16<0xd3, "vpdpwuuds", 1>, T8PS;
diff --git a/llvm/lib/Target/X86/X86InstrSVM.td b/llvm/lib/Target/X86/X86InstrSVM.td
index d8f70b016c7b..799f2416cca6 100644
--- a/llvm/lib/Target/X86/X86InstrSVM.td
+++ b/llvm/lib/Target/X86/X86InstrSVM.td
@@ -60,13 +60,3 @@ let Uses = [RAX, ECX] in
def INVLPGA64 : I<0x01, MRM_DF, (outs), (ins),
"invlpga", []>, TB, Requires<[In64BitMode]>;
} // SchedRW
-
-def : InstAlias<"skinit\t{%eax|eax}", (SKINIT), 0>;
-def : InstAlias<"vmrun\t{%eax|eax}", (VMRUN32), 0>, Requires<[Not64BitMode]>;
-def : InstAlias<"vmrun\t{%rax|rax}", (VMRUN64), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"vmload\t{%eax|eax}", (VMLOAD32), 0>, Requires<[Not64BitMode]>;
-def : InstAlias<"vmload\t{%rax|rax}", (VMLOAD64), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"vmsave\t{%eax|eax}", (VMSAVE32), 0>, Requires<[Not64BitMode]>;
-def : InstAlias<"vmsave\t{%rax|rax}", (VMSAVE64), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"invlpga\t{%eax, %ecx|eax, ecx}", (INVLPGA32), 0>, Requires<[Not64BitMode]>;
-def : InstAlias<"invlpga\t{%rax, %ecx|rax, ecx}", (INVLPGA64), 0>, Requires<[In64BitMode]>;
diff --git a/llvm/lib/Target/X86/X86InstrShiftRotate.td b/llvm/lib/Target/X86/X86InstrShiftRotate.td
index e57169db7b1d..e416e4495e22 100644
--- a/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -12,7 +12,7 @@
// FIXME: Someone needs to smear multipattern goodness all over this file.
-let Defs = [EFLAGS] in {
+let Defs = [EFLAGS], hasSideEffects = 0 in {
let Constraints = "$src1 = $dst" in {
let Uses = [CL], SchedRW = [WriteShiftCL] in {
@@ -50,9 +50,6 @@ def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
[(set GR64:$dst, (shl GR64:$src1, (i8 imm:$src2)))]>;
} // isConvertibleToThreeAddress = 1
-// NOTE: We don't include patterns for shifts of a register by one, because
-// 'add reg,reg' is cheaper (and we have a Pat pattern for shift-by-one).
-let hasSideEffects = 0 in {
def SHL8r1 : I<0xD0, MRM4r, (outs GR8:$dst), (ins GR8:$src1),
"shl{b}\t$dst", []>;
def SHL16r1 : I<0xD1, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
@@ -61,7 +58,6 @@ def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
"shl{l}\t$dst", []>, OpSize32;
def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
"shl{q}\t$dst", []>;
-} // hasSideEffects = 0
} // SchedRW
} // Constraints = "$src = $dst"
@@ -85,7 +81,7 @@ def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
Requires<[In64BitMode]>;
} // Uses, SchedRW
-let SchedRW = [WriteShiftLd, WriteRMW] in {
+let SchedRW = [WriteShiftLd, WriteRMW], mayLoad = 1, mayStore = 1 in {
def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, u8imm:$src),
"shl{b}\t{$src, $dst|$dst, $src}",
[(store (shl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -104,21 +100,14 @@ def SHL64mi : RIi8<0xC1, MRM4m, (outs), (ins i64mem:$dst, u8imm:$src),
// Shift by 1
def SHL8m1 : I<0xD0, MRM4m, (outs), (ins i8mem :$dst),
- "shl{b}\t$dst",
- [(store (shl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ "shl{b}\t$dst", []>;
def SHL16m1 : I<0xD1, MRM4m, (outs), (ins i16mem:$dst),
- "shl{w}\t$dst",
- [(store (shl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize16;
+ "shl{w}\t$dst", []>, OpSize16;
def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
- "shl{l}\t$dst",
- [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize32;
+ "shl{l}\t$dst", []>, OpSize32;
def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
- "shl{q}\t$dst",
- [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>,
- Requires<[In64BitMode]>;
-} // SchedRW
+ "shl{q}\t$dst", []>, Requires<[In64BitMode]>;
+} // SchedRW, mayLoad, mayStore
let Constraints = "$src1 = $dst" in {
let Uses = [CL], SchedRW = [WriteShiftCL] in {
@@ -154,17 +143,13 @@ def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$src2),
// Shift right by 1
def SHR8r1 : I<0xD0, MRM5r, (outs GR8:$dst), (ins GR8:$src1),
- "shr{b}\t$dst",
- [(set GR8:$dst, (srl GR8:$src1, (i8 1)))]>;
+ "shr{b}\t$dst", []>;
def SHR16r1 : I<0xD1, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
- "shr{w}\t$dst",
- [(set GR16:$dst, (srl GR16:$src1, (i8 1)))]>, OpSize16;
+ "shr{w}\t$dst", []>, OpSize16;
def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
- "shr{l}\t$dst",
- [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>, OpSize32;
+ "shr{l}\t$dst", []>, OpSize32;
def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
- "shr{q}\t$dst",
- [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
+ "shr{q}\t$dst", []>;
} // SchedRW
} // Constraints = "$src = $dst"
@@ -187,7 +172,7 @@ def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
Requires<[In64BitMode]>;
} // Uses, SchedRW
-let SchedRW = [WriteShiftLd, WriteRMW] in {
+let SchedRW = [WriteShiftLd, WriteRMW], mayLoad = 1, mayStore = 1 in {
def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, u8imm:$src),
"shr{b}\t{$src, $dst|$dst, $src}",
[(store (srl (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -206,21 +191,15 @@ def SHR64mi : RIi8<0xC1, MRM5m, (outs), (ins i64mem:$dst, u8imm:$src),
// Shift by 1
def SHR8m1 : I<0xD0, MRM5m, (outs), (ins i8mem :$dst),
- "shr{b}\t$dst",
- [(store (srl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ "shr{b}\t$dst", []>;
def SHR16m1 : I<0xD1, MRM5m, (outs), (ins i16mem:$dst),
- "shr{w}\t$dst",
- [(store (srl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize16;
+ "shr{w}\t$dst", []>, OpSize16;
def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
- "shr{l}\t$dst",
- [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize32;
+ "shr{l}\t$dst", []>, OpSize32;
def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
- "shr{q}\t$dst",
- [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>,
- Requires<[In64BitMode]>;
-} // SchedRW
+ "shr{q}\t$dst", []>, Requires<[In64BitMode]>;
+} // SchedRW, mayLoad, mayStore
+
let Constraints = "$src1 = $dst" in {
let Uses = [CL], SchedRW = [WriteShiftCL] in {
@@ -259,17 +238,13 @@ def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
// Shift by 1
def SAR8r1 : I<0xD0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
- "sar{b}\t$dst",
- [(set GR8:$dst, (sra GR8:$src1, (i8 1)))]>;
+ "sar{b}\t$dst", []>;
def SAR16r1 : I<0xD1, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
- "sar{w}\t$dst",
- [(set GR16:$dst, (sra GR16:$src1, (i8 1)))]>, OpSize16;
+ "sar{w}\t$dst", []>, OpSize16;
def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
- "sar{l}\t$dst",
- [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>, OpSize32;
+ "sar{l}\t$dst", []>, OpSize32;
def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
- "sar{q}\t$dst",
- [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
+ "sar{q}\t$dst", []>;
} // SchedRW
} // Constraints = "$src = $dst"
@@ -292,7 +267,7 @@ def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
Requires<[In64BitMode]>;
} // Uses, SchedRW
-let SchedRW = [WriteShiftLd, WriteRMW] in {
+let SchedRW = [WriteShiftLd, WriteRMW], mayLoad = 1, mayStore = 1 in {
def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, u8imm:$src),
"sar{b}\t{$src, $dst|$dst, $src}",
[(store (sra (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -311,27 +286,19 @@ def SAR64mi : RIi8<0xC1, MRM7m, (outs), (ins i64mem:$dst, u8imm:$src),
// Shift by 1
def SAR8m1 : I<0xD0, MRM7m, (outs), (ins i8mem :$dst),
- "sar{b}\t$dst",
- [(store (sra (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ "sar{b}\t$dst", []>;
def SAR16m1 : I<0xD1, MRM7m, (outs), (ins i16mem:$dst),
- "sar{w}\t$dst",
- [(store (sra (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize16;
+ "sar{w}\t$dst", []>, OpSize16;
def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
- "sar{l}\t$dst",
- [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize32;
+ "sar{l}\t$dst", []>, OpSize32;
def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
- "sar{q}\t$dst",
- [(store (sra (loadi64 addr:$dst), (i8 1)), addr:$dst)]>,
- Requires<[In64BitMode]>;
+ "sar{q}\t$dst", []>, Requires<[In64BitMode]>;
} // SchedRW
//===----------------------------------------------------------------------===//
// Rotate instructions
//===----------------------------------------------------------------------===//
-let hasSideEffects = 0 in {
let Constraints = "$src1 = $dst" in {
let Uses = [CL, EFLAGS], SchedRW = [WriteRotateCL] in {
@@ -393,10 +360,9 @@ def RCR64r1 : RI<0xD1, MRM3r, (outs GR64:$dst), (ins GR64:$src1),
def RCR64ri : RIi8<0xC1, MRM3r, (outs GR64:$dst), (ins GR64:$src1, u8imm:$cnt),
"rcr{q}\t{$cnt, $dst|$dst, $cnt}", []>;
} // Uses = [EFLAGS], SchedRW
-
} // Constraints = "$src = $dst"
-let mayStore = 1 in {
+let mayLoad = 1, mayStore = 1 in {
let Uses = [EFLAGS], SchedRW = [WriteRotateLd, WriteRMW] in {
def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t$dst", []>;
@@ -456,8 +422,7 @@ def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
"rcr{q}\t{%cl, $dst|$dst, cl}", []>,
Requires<[In64BitMode]>;
} // Uses = [CL, EFLAGS], SchedRW
-} // mayStore
-} // hasSideEffects = 0
+} // mayLoad, mayStore
let Constraints = "$src1 = $dst" in {
// FIXME: provide shorter instructions when imm8 == 1
@@ -495,17 +460,13 @@ def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
// Rotate by 1
def ROL8r1 : I<0xD0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
- "rol{b}\t$dst",
- [(set GR8:$dst, (rotl GR8:$src1, (i8 1)))]>;
+ "rol{b}\t$dst", []>;
def ROL16r1 : I<0xD1, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
- "rol{w}\t$dst",
- [(set GR16:$dst, (rotl GR16:$src1, (i8 1)))]>, OpSize16;
+ "rol{w}\t$dst", []>, OpSize16;
def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
- "rol{l}\t$dst",
- [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>, OpSize32;
+ "rol{l}\t$dst", []>, OpSize32;
def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
- "rol{q}\t$dst",
- [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
+ "rol{q}\t$dst", []>;
} // SchedRW
} // Constraints = "$src = $dst"
@@ -525,7 +486,7 @@ def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
Requires<[In64BitMode]>;
} // Uses, SchedRW
-let SchedRW = [WriteRotateLd, WriteRMW] in {
+let SchedRW = [WriteRotateLd, WriteRMW], mayLoad = 1, mayStore = 1 in {
def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, u8imm:$src1),
"rol{b}\t{$src1, $dst|$dst, $src1}",
[(store (rotl (loadi8 addr:$dst), (i8 imm:$src1)), addr:$dst)]>;
@@ -544,21 +505,14 @@ def ROL64mi : RIi8<0xC1, MRM0m, (outs), (ins i64mem:$dst, u8imm:$src1),
// Rotate by 1
def ROL8m1 : I<0xD0, MRM0m, (outs), (ins i8mem :$dst),
- "rol{b}\t$dst",
- [(store (rotl (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ "rol{b}\t$dst", []>;
def ROL16m1 : I<0xD1, MRM0m, (outs), (ins i16mem:$dst),
- "rol{w}\t$dst",
- [(store (rotl (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize16;
+ "rol{w}\t$dst", []>, OpSize16;
def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
- "rol{l}\t$dst",
- [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize32;
+ "rol{l}\t$dst", []>, OpSize32;
def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
- "rol{q}\t$dst",
- [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>,
- Requires<[In64BitMode]>;
-} // SchedRW
+ "rol{q}\t$dst", []>, Requires<[In64BitMode]>;
+} // SchedRW, mayLoad, mayStore
let Constraints = "$src1 = $dst" in {
let Uses = [CL], SchedRW = [WriteRotateCL] in {
@@ -595,17 +549,13 @@ def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
// Rotate by 1
def ROR8r1 : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
- "ror{b}\t$dst",
- [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))]>;
+ "ror{b}\t$dst", []>;
def ROR16r1 : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
- "ror{w}\t$dst",
- [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))]>, OpSize16;
+ "ror{w}\t$dst", []>, OpSize16;
def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
- "ror{l}\t$dst",
- [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>, OpSize32;
+ "ror{l}\t$dst", []>, OpSize32;
def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
- "ror{q}\t$dst",
- [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
+ "ror{q}\t$dst", []>;
} // SchedRW
} // Constraints = "$src = $dst", SchedRW
@@ -625,7 +575,7 @@ def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
Requires<[In64BitMode]>;
} // Uses, SchedRW
-let SchedRW = [WriteRotateLd, WriteRMW] in {
+let SchedRW = [WriteRotateLd, WriteRMW], mayLoad = 1, mayStore =1 in {
def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, u8imm:$src),
"ror{b}\t{$src, $dst|$dst, $src}",
[(store (rotr (loadi8 addr:$dst), (i8 imm:$src)), addr:$dst)]>;
@@ -644,21 +594,15 @@ def ROR64mi : RIi8<0xC1, MRM1m, (outs), (ins i64mem:$dst, u8imm:$src),
// Rotate by 1
def ROR8m1 : I<0xD0, MRM1m, (outs), (ins i8mem :$dst),
- "ror{b}\t$dst",
- [(store (rotr (loadi8 addr:$dst), (i8 1)), addr:$dst)]>;
+ "ror{b}\t$dst", []>;
def ROR16m1 : I<0xD1, MRM1m, (outs), (ins i16mem:$dst),
- "ror{w}\t$dst",
- [(store (rotr (loadi16 addr:$dst), (i8 1)), addr:$dst)]>,
- OpSize16;
+ "ror{w}\t$dst", []>, OpSize16;
def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
- "ror{l}\t$dst",
- [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>,
+ "ror{l}\t$dst", []>,
OpSize32;
def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
- "ror{q}\t$dst",
- [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>,
- Requires<[In64BitMode]>;
-} // SchedRW
+ "ror{q}\t$dst", []>, Requires<[In64BitMode]>;
+} // SchedRW, mayLoad, mayStore
//===----------------------------------------------------------------------===//
@@ -816,7 +760,7 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
TB;
} // SchedRW
-} // Defs = [EFLAGS]
+} // Defs = [EFLAGS], hasSideEffects
// Use the opposite rotate if allows us to use the rotate by 1 instruction.
def : Pat<(rotl GR8:$src1, (i8 7)), (ROR8r1 GR8:$src1)>;
@@ -913,13 +857,13 @@ let hasSideEffects = 0 in {
let Predicates = [HasBMI2] in {
defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem>;
- defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem>, VEX_W;
+ defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem>, REX_W;
defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem>, T8XS;
- defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem>, T8XS, VEX_W;
+ defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem>, T8XS, REX_W;
defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem>, T8XD;
- defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W;
+ defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, REX_W;
defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8PD;
- defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8PD, VEX_W;
+ defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8PD, REX_W;
// Prefer RORX which is non-destructive and doesn't update EFLAGS.
let AddedComplexity = 10 in {
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index ca981f58908e..0272f7de0f9e 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -216,43 +216,43 @@ def SWAPGS : I<0x01, MRM_F8, (outs), (ins), "swapgs", []>, TB;
let mayLoad = 1 in
def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
- OpSize16, NotMemoryFoldable;
+ OpSize16;
def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16orGR32orGR64:$src),
"lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
- OpSize16, NotMemoryFoldable;
+ OpSize16;
let mayLoad = 1 in
def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
- OpSize32, NotMemoryFoldable;
+ OpSize32;
def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR16orGR32orGR64:$src),
"lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
- OpSize32, NotMemoryFoldable;
+ OpSize32;
let mayLoad = 1 in
def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
+ "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR16orGR32orGR64:$src),
- "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
+ "lar{q}\t{$src, $dst|$dst, $src}", []>, TB;
let mayLoad = 1 in
def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
- OpSize16, NotMemoryFoldable;
+ OpSize16;
def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16orGR32orGR64:$src),
"lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
- OpSize16, NotMemoryFoldable;
+ OpSize16;
let mayLoad = 1 in
def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
"lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
- OpSize32, NotMemoryFoldable;
+ OpSize32;
def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR16orGR32orGR64:$src),
"lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
- OpSize32, NotMemoryFoldable;
+ OpSize32;
let mayLoad = 1 in
def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
+ "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR16orGR32orGR64:$src),
- "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
+ "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB;
def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
@@ -265,9 +265,9 @@ def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
let mayStore = 1 in
def STRm : I<0x00, MRM1m, (outs), (ins i16mem:$dst), "str{w}\t$dst", []>, TB;
-def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), "ltr{w}\t$src", []>, TB, NotMemoryFoldable;
+def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), "ltr{w}\t$src", []>, TB;
let mayLoad = 1 in
-def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), "ltr{w}\t$src", []>, TB, NotMemoryFoldable;
+def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), "ltr{w}\t$src", []>, TB;
def PUSHCS16 : I<0x0E, RawFrm, (outs), (ins), "push{w}\t{%cs|cs}", []>,
OpSize16, Requires<[Not64BitMode]>;
@@ -364,11 +364,11 @@ def LGS32rm : I<0xb5, MRMSrcMem, (outs GR32:$dst), (ins opaquemem:$src),
def LGS64rm : RI<0xb5, MRMSrcMem, (outs GR64:$dst), (ins opaquemem:$src),
"lgs{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), "verr\t$seg", []>, TB, NotMemoryFoldable;
-def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", []>, TB, NotMemoryFoldable;
+def VERRr : I<0x00, MRM4r, (outs), (ins GR16:$seg), "verr\t$seg", []>, TB;
+def VERWr : I<0x00, MRM5r, (outs), (ins GR16:$seg), "verw\t$seg", []>, TB;
let mayLoad = 1 in {
-def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), "verr\t$seg", []>, TB, NotMemoryFoldable;
-def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", []>, TB, NotMemoryFoldable;
+def VERRm : I<0x00, MRM4m, (outs), (ins i16mem:$seg), "verr\t$seg", []>, TB;
+def VERWm : I<0x00, MRM5m, (outs), (ins i16mem:$seg), "verw\t$seg", []>, TB;
}
} // SchedRW
@@ -414,10 +414,10 @@ def LIDT32m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
def LIDT64m : I<0x01, MRM3m, (outs), (ins opaquemem:$src),
"lidt{q}\t$src", []>, TB, Requires<[In64BitMode]>;
def LLDT16r : I<0x00, MRM2r, (outs), (ins GR16:$src),
- "lldt{w}\t$src", []>, TB, NotMemoryFoldable;
+ "lldt{w}\t$src", []>, TB;
let mayLoad = 1 in
def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src),
- "lldt{w}\t$src", []>, TB, NotMemoryFoldable;
+ "lldt{w}\t$src", []>, TB;
} // SchedRW
//===----------------------------------------------------------------------===//
@@ -429,7 +429,8 @@ let Uses = [EAX, ECX, EDX] in
def WRMSRNS : I<0x01, MRM_C6, (outs), (ins), "wrmsrns", []>, PS;
let Defs = [EAX, EDX], Uses = [ECX] in
def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", []>, TB;
-
+let Defs = [RAX, EFLAGS], Uses = [RBX, RCX], Predicates = [In64BitMode] in
+def PBNDKB : I<0x01, MRM_C7, (outs), (ins), "pbndkb", []>, PS;
let Uses = [RSI, RDI, RCX], Predicates = [In64BitMode] in {
def WRMSRLIST : I<0x01, MRM_C6, (outs), (ins), "wrmsrlist", []>, XS;
def RDMSRLIST : I<0x01, MRM_C6, (outs), (ins), "rdmsrlist", []>, XD;
@@ -451,10 +452,10 @@ def SMSW16m : I<0x01, MRM4m, (outs), (ins i16mem:$dst),
"smsw{w}\t$dst", []>, TB;
def LMSW16r : I<0x01, MRM6r, (outs), (ins GR16:$src),
- "lmsw{w}\t$src", []>, TB, NotMemoryFoldable;
+ "lmsw{w}\t$src", []>, TB;
let mayLoad = 1 in
def LMSW16m : I<0x01, MRM6m, (outs), (ins i16mem:$src),
- "lmsw{w}\t$src", []>, TB, NotMemoryFoldable;
+ "lmsw{w}\t$src", []>, TB;
let Defs = [EAX, EBX, ECX, EDX], Uses = [EAX, ECX] in
def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB;
diff --git a/llvm/lib/Target/X86/X86InstrTBM.td b/llvm/lib/Target/X86/X86InstrTBM.td
new file mode 100644
index 000000000000..ed514038a12e
--- /dev/null
+++ b/llvm/lib/Target/X86/X86InstrTBM.td
@@ -0,0 +1,194 @@
+//====-- X86InstrTBM.td - TBM X86 Instruction Definition -*- tablegen -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defining the TBM X86 instructions.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// TBM Instructions
+//
+let Predicates = [HasTBM], Defs = [EFLAGS] in {
+
+multiclass tbm_bextri<bits<8> opc, RegisterClass RC, string OpcodeStr,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ SDNode OpNode, Operand immtype,
+ SDPatternOperator immoperator,
+ X86FoldableSchedWrite Sched> {
+ def ri : Ii32<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, immtype:$cntl),
+ !strconcat(OpcodeStr,
+ "\t{$cntl, $src1, $dst|$dst, $src1, $cntl}"),
+ [(set RC:$dst, (OpNode RC:$src1, immoperator:$cntl))]>,
+ XOP, XOPA, Sched<[Sched]>;
+ def mi : Ii32<opc, MRMSrcMem, (outs RC:$dst),
+ (ins x86memop:$src1, immtype:$cntl),
+ !strconcat(OpcodeStr,
+ "\t{$cntl, $src1, $dst|$dst, $src1, $cntl}"),
+ [(set RC:$dst, (OpNode (ld_frag addr:$src1), immoperator:$cntl))]>,
+ XOP, XOPA, Sched<[Sched.Folded]>;
+}
+
+defm BEXTRI32 : tbm_bextri<0x10, GR32, "bextr{l}", i32mem, loadi32,
+ X86bextri, i32imm, timm, WriteBEXTR>;
+let ImmT = Imm32S in
+defm BEXTRI64 : tbm_bextri<0x10, GR64, "bextr{q}", i64mem, loadi64,
+ X86bextri, i64i32imm,
+ i64timmSExt32, WriteBEXTR>, REX_W;
+
+multiclass tbm_binary_rm<bits<8> opc, Format FormReg, Format FormMem,
+ RegisterClass RC, string OpcodeStr,
+ X86MemOperand x86memop, X86FoldableSchedWrite Sched> {
+let hasSideEffects = 0 in {
+ def rr : I<opc, FormReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"), []>,
+ XOP_4V, XOP9, Sched<[Sched]>;
+ let mayLoad = 1 in
+ def rm : I<opc, FormMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"), []>,
+ XOP_4V, XOP9, Sched<[Sched.Folded]>;
+}
+}
+
+multiclass tbm_binary_intr<bits<8> opc, string OpcodeStr,
+ X86FoldableSchedWrite Sched,
+ Format FormReg, Format FormMem> {
+ defm NAME#32 : tbm_binary_rm<opc, FormReg, FormMem, GR32, OpcodeStr#"{l}",
+ i32mem, Sched>;
+ defm NAME#64 : tbm_binary_rm<opc, FormReg, FormMem, GR64, OpcodeStr#"{q}",
+ i64mem, Sched>, REX_W;
+}
+
+defm BLCFILL : tbm_binary_intr<0x01, "blcfill", WriteALU, MRM1r, MRM1m>;
+defm BLCI : tbm_binary_intr<0x02, "blci", WriteALU, MRM6r, MRM6m>;
+defm BLCIC : tbm_binary_intr<0x01, "blcic", WriteALU, MRM5r, MRM5m>;
+defm BLCMSK : tbm_binary_intr<0x02, "blcmsk", WriteALU, MRM1r, MRM1m>;
+defm BLCS : tbm_binary_intr<0x01, "blcs", WriteALU, MRM3r, MRM3m>;
+defm BLSFILL : tbm_binary_intr<0x01, "blsfill", WriteALU, MRM2r, MRM2m>;
+defm BLSIC : tbm_binary_intr<0x01, "blsic", WriteALU, MRM6r, MRM6m>;
+defm T1MSKC : tbm_binary_intr<0x01, "t1mskc", WriteALU, MRM7r, MRM7m>;
+defm TZMSK : tbm_binary_intr<0x01, "tzmsk", WriteALU, MRM4r, MRM4m>;
+} // HasTBM, EFLAGS
+
+// Use BEXTRI for 64-bit 'and' with large immediate 'mask'.
+let Predicates = [HasTBM] in {
+ def : Pat<(and GR64:$src, AndMask64:$mask),
+ (BEXTRI64ri GR64:$src, (BEXTRMaskXForm imm:$mask))>;
+
+ def : Pat<(and (loadi64 addr:$src), AndMask64:$mask),
+ (BEXTRI64mi addr:$src, (BEXTRMaskXForm imm:$mask))>;
+}
+
+//===----------------------------------------------------------------------===//
+// Pattern fragments to auto generate TBM instructions.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasTBM] in {
+ // FIXME: patterns for the load versions are not implemented
+ def : Pat<(and GR32:$src, (add GR32:$src, 1)),
+ (BLCFILL32rr GR32:$src)>;
+ def : Pat<(and GR64:$src, (add GR64:$src, 1)),
+ (BLCFILL64rr GR64:$src)>;
+
+ def : Pat<(or GR32:$src, (not (add GR32:$src, 1))),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or GR64:$src, (not (add GR64:$src, 1))),
+ (BLCI64rr GR64:$src)>;
+
+ // Extra patterns because opt can optimize the above patterns to this.
+ def : Pat<(or GR32:$src, (sub -2, GR32:$src)),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or GR64:$src, (sub -2, GR64:$src)),
+ (BLCI64rr GR64:$src)>;
+
+ def : Pat<(and (not GR32:$src), (add GR32:$src, 1)),
+ (BLCIC32rr GR32:$src)>;
+ def : Pat<(and (not GR64:$src), (add GR64:$src, 1)),
+ (BLCIC64rr GR64:$src)>;
+
+ def : Pat<(xor GR32:$src, (add GR32:$src, 1)),
+ (BLCMSK32rr GR32:$src)>;
+ def : Pat<(xor GR64:$src, (add GR64:$src, 1)),
+ (BLCMSK64rr GR64:$src)>;
+
+ def : Pat<(or GR32:$src, (add GR32:$src, 1)),
+ (BLCS32rr GR32:$src)>;
+ def : Pat<(or GR64:$src, (add GR64:$src, 1)),
+ (BLCS64rr GR64:$src)>;
+
+ def : Pat<(or GR32:$src, (add GR32:$src, -1)),
+ (BLSFILL32rr GR32:$src)>;
+ def : Pat<(or GR64:$src, (add GR64:$src, -1)),
+ (BLSFILL64rr GR64:$src)>;
+
+ def : Pat<(or (not GR32:$src), (add GR32:$src, -1)),
+ (BLSIC32rr GR32:$src)>;
+ def : Pat<(or (not GR64:$src), (add GR64:$src, -1)),
+ (BLSIC64rr GR64:$src)>;
+
+ def : Pat<(or (not GR32:$src), (add GR32:$src, 1)),
+ (T1MSKC32rr GR32:$src)>;
+ def : Pat<(or (not GR64:$src), (add GR64:$src, 1)),
+ (T1MSKC64rr GR64:$src)>;
+
+ def : Pat<(and (not GR32:$src), (add GR32:$src, -1)),
+ (TZMSK32rr GR32:$src)>;
+ def : Pat<(and (not GR64:$src), (add GR64:$src, -1)),
+ (TZMSK64rr GR64:$src)>;
+
+ // Patterns to match flag producing ops.
+ def : Pat<(and_flag_nocf GR32:$src, (add GR32:$src, 1)),
+ (BLCFILL32rr GR32:$src)>;
+ def : Pat<(and_flag_nocf GR64:$src, (add GR64:$src, 1)),
+ (BLCFILL64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf GR32:$src, (not (add GR32:$src, 1))),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (not (add GR64:$src, 1))),
+ (BLCI64rr GR64:$src)>;
+
+ // Extra patterns because opt can optimize the above patterns to this.
+ def : Pat<(or_flag_nocf GR32:$src, (sub -2, GR32:$src)),
+ (BLCI32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (sub -2, GR64:$src)),
+ (BLCI64rr GR64:$src)>;
+
+ def : Pat<(and_flag_nocf (not GR32:$src), (add GR32:$src, 1)),
+ (BLCIC32rr GR32:$src)>;
+ def : Pat<(and_flag_nocf (not GR64:$src), (add GR64:$src, 1)),
+ (BLCIC64rr GR64:$src)>;
+
+ def : Pat<(xor_flag_nocf GR32:$src, (add GR32:$src, 1)),
+ (BLCMSK32rr GR32:$src)>;
+ def : Pat<(xor_flag_nocf GR64:$src, (add GR64:$src, 1)),
+ (BLCMSK64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, 1)),
+ (BLCS32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, 1)),
+ (BLCS64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf GR32:$src, (add GR32:$src, -1)),
+ (BLSFILL32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf GR64:$src, (add GR64:$src, -1)),
+ (BLSFILL64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, -1)),
+ (BLSIC32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, -1)),
+ (BLSIC64rr GR64:$src)>;
+
+ def : Pat<(or_flag_nocf (not GR32:$src), (add GR32:$src, 1)),
+ (T1MSKC32rr GR32:$src)>;
+ def : Pat<(or_flag_nocf (not GR64:$src), (add GR64:$src, 1)),
+ (T1MSKC64rr GR64:$src)>;
+
+ def : Pat<(and_flag_nocf (not GR32:$src), (add GR32:$src, -1)),
+ (TZMSK32rr GR32:$src)>;
+ def : Pat<(and_flag_nocf (not GR64:$src), (add GR64:$src, -1)),
+ (TZMSK64rr GR64:$src)>;
+} // HasTBM
diff --git a/llvm/lib/Target/X86/X86InstrVMX.td b/llvm/lib/Target/X86/X86InstrVMX.td
index d204a33358ea..cfeddbccccac 100644
--- a/llvm/lib/Target/X86/X86InstrVMX.td
+++ b/llvm/lib/Target/X86/X86InstrVMX.td
@@ -49,35 +49,27 @@ def VMPTRLDm : I<0xC7, MRM6m, (outs), (ins i64mem:$vmcs),
def VMPTRSTm : I<0xC7, MRM7m, (outs), (ins i64mem:$vmcs),
"vmptrst\t$vmcs", []>, PS;
def VMREAD64rr : I<0x78, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
- NotMemoryFoldable;
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
def VMREAD32rr : I<0x78, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
- NotMemoryFoldable;
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
let mayStore = 1 in {
def VMREAD64mr : I<0x78, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
- NotMemoryFoldable;
+ "vmread{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
def VMREAD32mr : I<0x78, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
- NotMemoryFoldable;
+ "vmread{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
} // mayStore
def VMWRITE64rr : I<0x79, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
- NotMemoryFoldable;
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
def VMWRITE32rr : I<0x79, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
- NotMemoryFoldable;
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
let mayLoad = 1 in {
def VMWRITE64rm : I<0x79, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
- "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>,
- NotMemoryFoldable;
+ "vmwrite{q}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[In64BitMode]>;
def VMWRITE32rm : I<0x79, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
- "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>,
- NotMemoryFoldable;
+ "vmwrite{l}\t{$src, $dst|$dst, $src}", []>, PS, Requires<[Not64BitMode]>;
} // mayLoad
// 0F 01 C4
diff --git a/llvm/lib/Target/X86/X86InstrXOP.td b/llvm/lib/Target/X86/X86InstrXOP.td
index d89e481f4522..a62bb2e855c9 100644
--- a/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/llvm/lib/Target/X86/X86InstrXOP.td
@@ -105,7 +105,7 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1),
(vt128 (load addr:$src2)))))]>,
- XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold]>;
+ XOP_4V, REX_W, Sched<[sched.Folded, sched.ReadAfterFold]>;
def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst),
(ins i128mem:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -119,7 +119,7 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[]>,
- XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>;
+ XOP_4V, REX_W, Sched<[sched]>;
}
let ExeDomain = SSEPackedInt in {
@@ -296,7 +296,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
[(set VR128:$dst,
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
(vt128 (load addr:$src3)))))]>,
- XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
+ XOP_4V, REX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
!strconcat(OpcodeStr,
@@ -316,7 +316,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
(ins VR128:$src1, VR128:$src2, VR128:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>;
+ []>, XOP_4V, REX_W, Sched<[sched]>;
}
let ExeDomain = SSEPackedInt in {
@@ -342,7 +342,7 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
[]>,
- XOP_4V, VEX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
+ XOP_4V, REX_W, Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, RC:$src3),
!strconcat(OpcodeStr,
@@ -361,7 +361,7 @@ multiclass xop4op_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
(ins RC:$src1, RC:$src2, RC:$src3),
!strconcat(OpcodeStr,
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
- []>, XOP_4V, VEX_W, Sched<[sched]>, FoldGenData<NAME#rrr>;
+ []>, XOP_4V, REX_W, Sched<[sched]>;
}
let ExeDomain = SSEPackedInt in {
@@ -430,7 +430,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
[(set RC:$dst,
(VT (X86vpermil2 RC:$src1, RC:$src2, (IntLdFrag addr:$src3),
- (i8 timm:$src4))))]>, VEX_W,
+ (i8 timm:$src4))))]>, REX_W,
Sched<[sched.Folded, sched.ReadAfterFold, sched.ReadAfterFold]>;
def mr : IXOP5<Opc, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, fpmemop:$src2, RC:$src3, u4imm:$src4),
@@ -450,7 +450,7 @@ multiclass xop_vpermil2<bits<8> Opc, string OpcodeStr, RegisterClass RC,
(ins RC:$src1, RC:$src2, RC:$src3, u4imm:$src4),
!strconcat(OpcodeStr,
"\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
- []>, VEX_W, Sched<[sched]>, FoldGenData<NAME#rr>;
+ []>, REX_W, Sched<[sched]>;
}
let ExeDomain = SSEPackedDouble in {
diff --git a/llvm/lib/Target/X86/X86InstructionSelector.cpp b/llvm/lib/Target/X86/X86InstructionSelector.cpp
index 0f95e5c142f9..6157dafb5c51 100644
--- a/llvm/lib/Target/X86/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/X86InstructionSelector.cpp
@@ -19,9 +19,10 @@
#include "X86RegisterInfo.h"
#include "X86Subtarget.h"
#include "X86TargetMachine.h"
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
-#include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -40,7 +41,6 @@
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
@@ -92,8 +92,8 @@ private:
MachineFunction &MF) const;
bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
- bool selectUadde(MachineInstr &I, MachineRegisterInfo &MRI,
- MachineFunction &MF) const;
+ bool selectUAddSub(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
@@ -114,8 +114,8 @@ private:
bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
- bool selectDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
- MachineFunction &MF) const;
+ bool selectMulDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
+ MachineFunction &MF) const;
bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI,
MachineFunction &MF) const;
@@ -247,9 +247,9 @@ bool X86InstructionSelector::selectDebugInstr(MachineInstr &I,
LLT Ty = MRI.getType(Reg);
const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
const TargetRegisterClass *RC =
- RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+ dyn_cast_if_present<const TargetRegisterClass *>(RegClassOrBank);
if (!RC) {
- const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
+ const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
RC = getRegClass(Ty, RB);
if (!RC) {
LLVM_DEBUG(
@@ -403,7 +403,10 @@ bool X86InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_FCMP:
return selectFCmp(I, MRI, MF);
case TargetOpcode::G_UADDE:
- return selectUadde(I, MRI, MF);
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_USUBO:
+ return selectUAddSub(I, MRI, MF);
case TargetOpcode::G_UNMERGE_VALUES:
return selectUnmergeValues(I, MRI, MF);
case TargetOpcode::G_MERGE_VALUES:
@@ -418,11 +421,14 @@ bool X86InstructionSelector::select(MachineInstr &I) {
case TargetOpcode::G_IMPLICIT_DEF:
case TargetOpcode::G_PHI:
return selectImplicitDefOrPHI(I, MRI);
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_SMULH:
+ case TargetOpcode::G_UMULH:
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UDIV:
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM:
- return selectDivRem(I, MRI, MF);
+ return selectMulDivRem(I, MRI, MF);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
return selectIntrinsicWSideEffects(I, MRI, MF);
}
@@ -838,11 +844,11 @@ bool X86InstructionSelector::selectZext(MachineInstr &I,
if (DstTy == LLT::scalar(8))
AndOpc = X86::AND8ri;
else if (DstTy == LLT::scalar(16))
- AndOpc = X86::AND16ri8;
+ AndOpc = X86::AND16ri;
else if (DstTy == LLT::scalar(32))
- AndOpc = X86::AND32ri8;
+ AndOpc = X86::AND32ri;
else if (DstTy == LLT::scalar(64))
- AndOpc = X86::AND64ri8;
+ AndOpc = X86::AND64ri32;
else
return false;
@@ -1069,50 +1075,97 @@ bool X86InstructionSelector::selectFCmp(MachineInstr &I,
return true;
}
-bool X86InstructionSelector::selectUadde(MachineInstr &I,
- MachineRegisterInfo &MRI,
- MachineFunction &MF) const {
- assert((I.getOpcode() == TargetOpcode::G_UADDE) && "unexpected instruction");
+bool X86InstructionSelector::selectUAddSub(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ assert((I.getOpcode() == TargetOpcode::G_UADDE ||
+ I.getOpcode() == TargetOpcode::G_UADDO ||
+ I.getOpcode() == TargetOpcode::G_USUBE ||
+ I.getOpcode() == TargetOpcode::G_USUBO) &&
+ "unexpected instruction");
const Register DstReg = I.getOperand(0).getReg();
const Register CarryOutReg = I.getOperand(1).getReg();
const Register Op0Reg = I.getOperand(2).getReg();
const Register Op1Reg = I.getOperand(3).getReg();
- Register CarryInReg = I.getOperand(4).getReg();
+ bool IsSub = I.getOpcode() == TargetOpcode::G_USUBE ||
+ I.getOpcode() == TargetOpcode::G_USUBO;
+ bool HasCarryIn = I.getOpcode() == TargetOpcode::G_UADDE ||
+ I.getOpcode() == TargetOpcode::G_USUBE;
const LLT DstTy = MRI.getType(DstReg);
+ assert(DstTy.isScalar() && "selectUAddSub only supported for scalar types");
- if (DstTy != LLT::scalar(32))
- return false;
-
- // find CarryIn def instruction.
- MachineInstr *Def = MRI.getVRegDef(CarryInReg);
- while (Def->getOpcode() == TargetOpcode::G_TRUNC) {
- CarryInReg = Def->getOperand(1).getReg();
- Def = MRI.getVRegDef(CarryInReg);
+ // TODO: Handle immediate argument variants?
+ unsigned OpADC, OpADD, OpSBB, OpSUB;
+ switch (DstTy.getSizeInBits()) {
+ case 8:
+ OpADC = X86::ADC8rr;
+ OpADD = X86::ADD8rr;
+ OpSBB = X86::SBB8rr;
+ OpSUB = X86::SUB8rr;
+ break;
+ case 16:
+ OpADC = X86::ADC16rr;
+ OpADD = X86::ADD16rr;
+ OpSBB = X86::SBB16rr;
+ OpSUB = X86::SUB16rr;
+ break;
+ case 32:
+ OpADC = X86::ADC32rr;
+ OpADD = X86::ADD32rr;
+ OpSBB = X86::SBB32rr;
+ OpSUB = X86::SUB32rr;
+ break;
+ case 64:
+ OpADC = X86::ADC64rr;
+ OpADD = X86::ADD64rr;
+ OpSBB = X86::SBB64rr;
+ OpSUB = X86::SUB64rr;
+ break;
+ default:
+ llvm_unreachable("selectUAddSub unsupported type.");
}
- unsigned Opcode;
- if (Def->getOpcode() == TargetOpcode::G_UADDE) {
- // carry set by prev ADD.
+ const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
+ const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), X86::EFLAGS)
- .addReg(CarryInReg);
+ unsigned Opcode = IsSub ? OpSUB : OpADD;
- if (!RBI.constrainGenericRegister(CarryInReg, X86::GR32RegClass, MRI))
- return false;
+ // G_UADDE/G_USUBE - find CarryIn def instruction.
+ if (HasCarryIn) {
+ Register CarryInReg = I.getOperand(4).getReg();
+ MachineInstr *Def = MRI.getVRegDef(CarryInReg);
+ while (Def->getOpcode() == TargetOpcode::G_TRUNC) {
+ CarryInReg = Def->getOperand(1).getReg();
+ Def = MRI.getVRegDef(CarryInReg);
+ }
- Opcode = X86::ADC32rr;
- } else if (auto val = getIConstantVRegVal(CarryInReg, MRI)) {
- // carry is constant, support only 0.
- if (*val != 0)
+ // TODO - handle more CF generating instructions
+ if (Def->getOpcode() == TargetOpcode::G_UADDE ||
+ Def->getOpcode() == TargetOpcode::G_UADDO ||
+ Def->getOpcode() == TargetOpcode::G_USUBE ||
+ Def->getOpcode() == TargetOpcode::G_USUBO) {
+ // carry set by prev ADD/SUB.
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY),
+ X86::EFLAGS)
+ .addReg(CarryInReg);
+
+ if (!RBI.constrainGenericRegister(CarryInReg, *DstRC, MRI))
+ return false;
+
+ Opcode = IsSub ? OpSBB : OpADC;
+ } else if (auto val = getIConstantVRegVal(CarryInReg, MRI)) {
+ // carry is constant, support only 0.
+ if (*val != 0)
+ return false;
+
+ Opcode = IsSub ? OpSUB : OpADD;
+ } else
return false;
+ }
- Opcode = X86::ADD32rr;
- } else
- return false;
-
- MachineInstr &AddInst =
+ MachineInstr &Inst =
*BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg)
.addReg(Op0Reg)
.addReg(Op1Reg);
@@ -1120,8 +1173,8 @@ bool X86InstructionSelector::selectUadde(MachineInstr &I,
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg)
.addReg(X86::EFLAGS);
- if (!constrainSelectedInstRegOperands(AddInst, TII, TRI, RBI) ||
- !RBI.constrainGenericRegister(CarryOutReg, X86::GR32RegClass, MRI))
+ if (!constrainSelectedInstRegOperands(Inst, TII, TRI, RBI) ||
+ !RBI.constrainGenericRegister(CarryOutReg, *DstRC, MRI))
return false;
I.eraseFromParent();
@@ -1508,11 +1561,14 @@ bool X86InstructionSelector::selectImplicitDefOrPHI(
return true;
}
-bool X86InstructionSelector::selectDivRem(MachineInstr &I,
- MachineRegisterInfo &MRI,
- MachineFunction &MF) const {
- // The implementation of this function is taken from X86FastISel.
- assert((I.getOpcode() == TargetOpcode::G_SDIV ||
+bool X86InstructionSelector::selectMulDivRem(MachineInstr &I,
+ MachineRegisterInfo &MRI,
+ MachineFunction &MF) const {
+ // The implementation of this function is adapted from X86FastISel.
+ assert((I.getOpcode() == TargetOpcode::G_MUL ||
+ I.getOpcode() == TargetOpcode::G_SMULH ||
+ I.getOpcode() == TargetOpcode::G_UMULH ||
+ I.getOpcode() == TargetOpcode::G_SDIV ||
I.getOpcode() == TargetOpcode::G_SREM ||
I.getOpcode() == TargetOpcode::G_UDIV ||
I.getOpcode() == TargetOpcode::G_UREM) &&
@@ -1531,10 +1587,11 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
return false;
const static unsigned NumTypes = 4; // i8, i16, i32, i64
- const static unsigned NumOps = 4; // SDiv, SRem, UDiv, URem
+ const static unsigned NumOps = 7; // SDiv/SRem/UDiv/URem/Mul/SMulH/UMulh
const static bool S = true; // IsSigned
const static bool U = false; // !IsSigned
const static unsigned Copy = TargetOpcode::COPY;
+
// For the X86 IDIV instruction, in most cases the dividend
// (numerator) must be in a specific register pair highreg:lowreg,
// producing the quotient in lowreg and the remainder in highreg.
@@ -1543,19 +1600,19 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
// exception is i8, where the dividend is defined as a single register rather
// than a register pair, and we therefore directly sign-extend the dividend
// into lowreg, instead of copying, and ignore the highreg.
- const static struct DivRemEntry {
+ const static struct MulDivRemEntry {
// The following portion depends only on the data type.
unsigned SizeInBits;
unsigned LowInReg; // low part of the register pair
unsigned HighInReg; // high part of the register pair
// The following portion depends on both the data type and the operation.
- struct DivRemResult {
- unsigned OpDivRem; // The specific DIV/IDIV opcode to use.
+ struct MulDivRemResult {
+ unsigned OpMulDivRem; // The specific MUL/DIV opcode to use.
unsigned OpSignExtend; // Opcode for sign-extending lowreg into
// highreg, or copying a zero into highreg.
unsigned OpCopy; // Opcode for copying dividend into lowreg, or
// zero/sign-extending into lowreg for i8.
- unsigned DivRemResultReg; // Register containing the desired result.
+ unsigned ResultReg; // Register containing the desired result.
bool IsOpSigned; // Whether to use signed or unsigned form.
} ResultTable[NumOps];
} OpTable[NumTypes] = {
@@ -1567,25 +1624,34 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
{X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem
{X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U}, // UDiv
{X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U}, // URem
+ {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AL, S}, // Mul
+ {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SMulH
+ {X86::MUL8r, 0, X86::MOVZX16rr8, X86::AH, U}, // UMulH
}}, // i8
{16,
X86::AX,
X86::DX,
{
- {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv
- {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem
- {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
- {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
- }}, // i16
+ {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv
+ {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem
+ {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv
+ {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem
+ {X86::IMUL16r, X86::MOV32r0, Copy, X86::AX, S}, // Mul
+ {X86::IMUL16r, X86::MOV32r0, Copy, X86::DX, S}, // SMulH
+ {X86::MUL16r, X86::MOV32r0, Copy, X86::DX, U}, // UMulH
+ }}, // i16
{32,
X86::EAX,
X86::EDX,
{
- {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv
- {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem
- {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
- {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
- }}, // i32
+ {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv
+ {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem
+ {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv
+ {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem
+ {X86::IMUL32r, X86::MOV32r0, Copy, X86::EAX, S}, // Mul
+ {X86::IMUL32r, X86::MOV32r0, Copy, X86::EDX, S}, // SMulH
+ {X86::MUL32r, X86::MOV32r0, Copy, X86::EDX, U}, // UMulH
+ }}, // i32
{64,
X86::RAX,
X86::RDX,
@@ -1594,10 +1660,13 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
{X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem
{X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv
{X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem
- }}, // i64
+ {X86::IMUL64r, X86::MOV32r0, Copy, X86::RAX, S}, // Mul
+ {X86::IMUL64r, X86::MOV32r0, Copy, X86::RDX, S}, // SMulH
+ {X86::MUL64r, X86::MOV32r0, Copy, X86::RDX, U}, // UMulH
+ }}, // i64
};
- auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const DivRemEntry &El) {
+ auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const MulDivRemEntry &El) {
return El.SizeInBits == RegTy.getSizeInBits();
});
if (OpEntryIt == std::end(OpTable))
@@ -1606,7 +1675,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
unsigned OpIndex;
switch (I.getOpcode()) {
default:
- llvm_unreachable("Unexpected div/rem opcode");
+ llvm_unreachable("Unexpected mul/div/rem opcode");
case TargetOpcode::G_SDIV:
OpIndex = 0;
break;
@@ -1619,10 +1688,20 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
case TargetOpcode::G_UREM:
OpIndex = 3;
break;
+ case TargetOpcode::G_MUL:
+ OpIndex = 4;
+ break;
+ case TargetOpcode::G_SMULH:
+ OpIndex = 5;
+ break;
+ case TargetOpcode::G_UMULH:
+ OpIndex = 6;
+ break;
}
- const DivRemEntry &TypeEntry = *OpEntryIt;
- const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
+ const MulDivRemEntry &TypeEntry = *OpEntryIt;
+ const MulDivRemEntry::MulDivRemResult &OpEntry =
+ TypeEntry.ResultTable[OpIndex];
const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB);
if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
@@ -1637,6 +1716,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy),
TypeEntry.LowInReg)
.addReg(Op1Reg);
+
// Zero-extend or sign-extend into high-order input register.
if (OpEntry.OpSignExtend) {
if (OpEntry.IsOpSigned)
@@ -1667,9 +1747,11 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
}
}
}
- // Generate the DIV/IDIV instruction.
- BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpDivRem))
+
+ // Generate the DIV/IDIV/MUL/IMUL instruction.
+ BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpMulDivRem))
.addReg(Op2Reg);
+
// For i8 remainder, we can't reference ah directly, as we'll end
// up with bogus copies like %r9b = COPY %ah. Reference ax
// instead to prevent ah references in a rex instruction.
@@ -1678,9 +1760,7 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
// won't generate explicit references to the GR8_NOREX registers. If
// the allocator and/or the backend get enhanced to be more robust in
// that regard, this can be, and should be, removed.
- if ((I.getOpcode() == Instruction::SRem ||
- I.getOpcode() == Instruction::URem) &&
- OpEntry.DivRemResultReg == X86::AH && STI.is64Bit()) {
+ if (OpEntry.ResultReg == X86::AH && STI.is64Bit()) {
Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg)
@@ -1702,9 +1782,10 @@ bool X86InstructionSelector::selectDivRem(MachineInstr &I,
} else {
BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
DstReg)
- .addReg(OpEntry.DivRemResultReg);
+ .addReg(OpEntry.ResultReg);
}
I.eraseFromParent();
+
return true;
}
diff --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
index df4437397f4c..47c3eca7b6bd 100644
--- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -28,7 +29,6 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/MachineValueType.h"
#include <algorithm>
#include <cassert>
#include <cmath>
@@ -193,22 +193,18 @@ void X86InterleavedAccessGroup::decompose(
// Decompose the load instruction.
LoadInst *LI = cast<LoadInst>(VecInst);
- Type *VecBaseTy, *VecBasePtrTy;
- Value *VecBasePtr;
+ Type *VecBaseTy;
unsigned int NumLoads = NumSubVectors;
// In the case of stride 3 with a vector of 32 elements load the information
// in the following way:
// [0,1...,VF/2-1,VF/2+VF,VF/2+VF+1,...,2VF-1]
unsigned VecLength = DL.getTypeSizeInBits(VecWidth);
+ Value *VecBasePtr = LI->getPointerOperand();
if (VecLength == 768 || VecLength == 1536) {
VecBaseTy = FixedVectorType::get(Type::getInt8Ty(LI->getContext()), 16);
- VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
- VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
NumLoads = NumSubVectors * (VecLength / 384);
} else {
VecBaseTy = SubVecTy;
- VecBasePtrTy = VecBaseTy->getPointerTo(LI->getPointerAddressSpace());
- VecBasePtr = Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
}
// Generate N loads of T type.
assert(VecBaseTy->getPrimitiveSizeInBits().isKnownMultipleOf(8) &&
diff --git a/llvm/lib/Target/X86/X86KCFI.cpp b/llvm/lib/Target/X86/X86KCFI.cpp
deleted file mode 100644
index 4086f28804fc..000000000000
--- a/llvm/lib/Target/X86/X86KCFI.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-//===---- X86KCFI.cpp - Implements KCFI -----------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements KCFI indirect call checking.
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86.h"
-#include "X86InstrInfo.h"
-#include "X86Subtarget.h"
-#include "X86TargetMachine.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "x86-kcfi"
-#define X86_KCFI_PASS_NAME "Insert KCFI indirect call checks"
-
-STATISTIC(NumKCFIChecksAdded, "Number of indirect call checks added");
-
-namespace {
-class X86KCFI : public MachineFunctionPass {
-public:
- static char ID;
-
- X86KCFI() : MachineFunctionPass(ID) {}
-
- StringRef getPassName() const override { return X86_KCFI_PASS_NAME; }
- bool runOnMachineFunction(MachineFunction &MF) override;
-
-private:
- /// Machine instruction info used throughout the class.
- const X86InstrInfo *TII = nullptr;
-
- /// Emits a KCFI check before an indirect call.
- /// \returns true if the check was added and false otherwise.
- bool emitCheck(MachineBasicBlock &MBB,
- MachineBasicBlock::instr_iterator I) const;
-};
-
-char X86KCFI::ID = 0;
-} // end anonymous namespace
-
-INITIALIZE_PASS(X86KCFI, DEBUG_TYPE, X86_KCFI_PASS_NAME, false, false)
-
-FunctionPass *llvm::createX86KCFIPass() { return new X86KCFI(); }
-
-bool X86KCFI::emitCheck(MachineBasicBlock &MBB,
- MachineBasicBlock::instr_iterator MBBI) const {
- assert(TII && "Target instruction info was not initialized");
-
- // If the call instruction is bundled, we can only emit a check safely if
- // it's the first instruction in the bundle.
- if (MBBI->isBundled() && !std::prev(MBBI)->isBundle())
- report_fatal_error("Cannot emit a KCFI check for a bundled call");
-
- MachineFunction &MF = *MBB.getParent();
- // If the call target is a memory operand, unfold it and use R11 for the
- // call, so KCFI_CHECK won't have to recompute the address.
- switch (MBBI->getOpcode()) {
- case X86::CALL64m:
- case X86::CALL64m_NT:
- case X86::TAILJMPm64:
- case X86::TAILJMPm64_REX: {
- MachineBasicBlock::instr_iterator OrigCall = MBBI;
- SmallVector<MachineInstr *, 2> NewMIs;
- if (!TII->unfoldMemoryOperand(MF, *OrigCall, X86::R11, /*UnfoldLoad=*/true,
- /*UnfoldStore=*/false, NewMIs))
- report_fatal_error("Failed to unfold memory operand for a KCFI check");
- for (auto *NewMI : NewMIs)
- MBBI = MBB.insert(OrigCall, NewMI);
- assert(MBBI->isCall() &&
- "Unexpected instruction after memory operand unfolding");
- if (OrigCall->shouldUpdateCallSiteInfo())
- MF.moveCallSiteInfo(&*OrigCall, &*MBBI);
- MBBI->setCFIType(MF, OrigCall->getCFIType());
- OrigCall->eraseFromParent();
- break;
- }
- default:
- break;
- }
-
- MachineInstr *Check =
- BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(X86::KCFI_CHECK))
- .getInstr();
- MachineOperand &Target = MBBI->getOperand(0);
- switch (MBBI->getOpcode()) {
- case X86::CALL64r:
- case X86::CALL64r_NT:
- case X86::TAILJMPr64:
- case X86::TAILJMPr64_REX:
- assert(Target.isReg() && "Unexpected target operand for an indirect call");
- Check->addOperand(MachineOperand::CreateReg(Target.getReg(), false));
- Target.setIsRenamable(false);
- break;
- case X86::CALL64pcrel32:
- case X86::TAILJMPd64:
- assert(Target.isSymbol() && "Unexpected target operand for a direct call");
- // X86TargetLowering::EmitLoweredIndirectThunk always uses r11 for
- // 64-bit indirect thunk calls.
- assert(StringRef(Target.getSymbolName()).endswith("_r11") &&
- "Unexpected register for an indirect thunk call");
- Check->addOperand(MachineOperand::CreateReg(X86::R11, false));
- break;
- default:
- llvm_unreachable("Unexpected CFI call opcode");
- }
-
- Check->addOperand(MachineOperand::CreateImm(MBBI->getCFIType()));
- MBBI->setCFIType(MF, 0);
-
- // If not already bundled, bundle the check and the call to prevent
- // further changes.
- if (!MBBI->isBundled())
- finalizeBundle(MBB, Check->getIterator(), std::next(MBBI->getIterator()));
-
- ++NumKCFIChecksAdded;
- return true;
-}
-
-bool X86KCFI::runOnMachineFunction(MachineFunction &MF) {
- const Module *M = MF.getMMI().getModule();
- if (!M->getModuleFlag("kcfi"))
- return false;
-
- const auto &SubTarget = MF.getSubtarget<X86Subtarget>();
- TII = SubTarget.getInstrInfo();
-
- bool Changed = false;
- for (MachineBasicBlock &MBB : MF) {
- for (MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),
- MIE = MBB.instr_end();
- MII != MIE; ++MII) {
- if (MII->isCall() && MII->getCFIType())
- Changed |= emitCheck(MBB, MII);
- }
- }
-
- return Changed;
-}
diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.cpp b/llvm/lib/Target/X86/X86LegalizerInfo.cpp
index 2fd740573d24..a4a247f85f3d 100644
--- a/llvm/lib/Target/X86/X86LegalizerInfo.cpp
+++ b/llvm/lib/Target/X86/X86LegalizerInfo.cpp
@@ -22,86 +22,22 @@
using namespace llvm;
using namespace TargetOpcode;
using namespace LegalizeActions;
-
-/// FIXME: The following static functions are SizeChangeStrategy functions
-/// that are meant to temporarily mimic the behaviour of the old legalization
-/// based on doubling/halving non-legal types as closely as possible. This is
-/// not entirly possible as only legalizing the types that are exactly a power
-/// of 2 times the size of the legal types would require specifying all those
-/// sizes explicitly.
-/// In practice, not specifying those isn't a problem, and the below functions
-/// should disappear quickly as we add support for legalizing non-power-of-2
-/// sized types further.
-static void addAndInterleaveWithUnsupported(
- LegacyLegalizerInfo::SizeAndActionsVec &result,
- const LegacyLegalizerInfo::SizeAndActionsVec &v) {
- for (unsigned i = 0; i < v.size(); ++i) {
- result.push_back(v[i]);
- if (i + 1 < v[i].first && i + 1 < v.size() &&
- v[i + 1].first != v[i].first + 1)
- result.push_back({v[i].first + 1, LegacyLegalizeActions::Unsupported});
- }
-}
-
-static LegacyLegalizerInfo::SizeAndActionsVec
-widen_1(const LegacyLegalizerInfo::SizeAndActionsVec &v) {
- assert(v.size() >= 1);
- assert(v[0].first > 1);
- LegacyLegalizerInfo::SizeAndActionsVec result = {
- {1, LegacyLegalizeActions::WidenScalar},
- {2, LegacyLegalizeActions::Unsupported}};
- addAndInterleaveWithUnsupported(result, v);
- auto Largest = result.back().first;
- result.push_back({Largest + 1, LegacyLegalizeActions::Unsupported});
- return result;
-}
+using namespace LegalityPredicates;
X86LegalizerInfo::X86LegalizerInfo(const X86Subtarget &STI,
const X86TargetMachine &TM)
- : Subtarget(STI), TM(TM) {
-
- setLegalizerInfo32bit();
- setLegalizerInfo64bit();
- setLegalizerInfoSSE1();
- setLegalizerInfoSSE2();
- setLegalizerInfoSSE41();
- setLegalizerInfoAVX();
- setLegalizerInfoAVX2();
- setLegalizerInfoAVX512();
- setLegalizerInfoAVX512DQ();
- setLegalizerInfoAVX512BW();
-
- getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN)
- .scalarize(0)
- .minScalar(0, LLT::scalar(32))
- .libcall();
-
- auto &LegacyInfo = getLegacyLegalizerInfo();
- LegacyInfo.setLegalizeScalarToDifferentSizeStrategy(G_PHI, 0, widen_1);
- for (unsigned BinOp : {G_SUB, G_MUL, G_AND, G_OR, G_XOR})
- LegacyInfo.setLegalizeScalarToDifferentSizeStrategy(BinOp, 0, widen_1);
- for (unsigned MemOp : {G_LOAD, G_STORE})
- LegacyInfo.setLegalizeScalarToDifferentSizeStrategy(
- MemOp, 0, LegacyLegalizerInfo::narrowToSmallerAndWidenToSmallest);
- LegacyInfo.setLegalizeScalarToDifferentSizeStrategy(
- G_PTR_ADD, 1,
- LegacyLegalizerInfo::widenToLargerTypesUnsupportedOtherwise);
- LegacyInfo.setLegalizeScalarToDifferentSizeStrategy(
- G_CONSTANT, 0,
- LegacyLegalizerInfo::widenToLargerTypesAndNarrowToLargest);
-
- getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
-
- LegacyInfo.computeTables();
- verify(*STI.getInstrInfo());
-}
-
-bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
- MachineInstr &MI) const {
- return true;
-}
-
-void X86LegalizerInfo::setLegalizerInfo32bit() {
+ : Subtarget(STI) {
+
+ bool Is64Bit = Subtarget.is64Bit();
+ bool HasSSE1 = Subtarget.hasSSE1();
+ bool HasSSE2 = Subtarget.hasSSE2();
+ bool HasSSE41 = Subtarget.hasSSE41();
+ bool HasAVX = Subtarget.hasAVX();
+ bool HasAVX2 = Subtarget.hasAVX2();
+ bool HasAVX512 = Subtarget.hasAVX512();
+ bool HasVLX = Subtarget.hasVLX();
+ bool HasDQI = Subtarget.hasAVX512() && Subtarget.hasDQI();
+ bool HasBWI = Subtarget.hasAVX512() && Subtarget.hasBWI();
const LLT p0 = LLT::pointer(0, TM.getPointerSizeInBits(0));
const LLT s1 = LLT::scalar(1);
@@ -109,455 +45,505 @@ void X86LegalizerInfo::setLegalizerInfo32bit() {
const LLT s16 = LLT::scalar(16);
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
+ const LLT s80 = LLT::scalar(80);
const LLT s128 = LLT::scalar(128);
+ const LLT sMaxScalar = Subtarget.is64Bit() ? s64 : s32;
+ const LLT v2s32 = LLT::fixed_vector(2, 32);
+ const LLT v4s8 = LLT::fixed_vector(4, 8);
- auto &LegacyInfo = getLegacyLegalizerInfo();
-
- for (auto Ty : {p0, s1, s8, s16, s32})
- LegacyInfo.setAction({G_IMPLICIT_DEF, Ty}, LegacyLegalizeActions::Legal);
-
- for (auto Ty : {s8, s16, s32, p0})
- LegacyInfo.setAction({G_PHI, Ty}, LegacyLegalizeActions::Legal);
-
- for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
- for (auto Ty : {s8, s16, s32})
- LegacyInfo.setAction({BinOp, Ty}, LegacyLegalizeActions::Legal);
-
- for (unsigned Op : {G_UADDE}) {
- LegacyInfo.setAction({Op, s32}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({Op, 1, s1}, LegacyLegalizeActions::Legal);
- }
-
- for (unsigned MemOp : {G_LOAD, G_STORE}) {
- for (auto Ty : {s8, s16, s32, p0})
- LegacyInfo.setAction({MemOp, Ty}, LegacyLegalizeActions::Legal);
-
- // And everything's fine in addrspace 0.
- LegacyInfo.setAction({MemOp, 1, p0}, LegacyLegalizeActions::Legal);
- }
-
- // Pointer-handling
- LegacyInfo.setAction({G_FRAME_INDEX, p0}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_GLOBAL_VALUE, p0}, LegacyLegalizeActions::Legal);
-
- LegacyInfo.setAction({G_PTR_ADD, p0}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_PTR_ADD, 1, s32}, LegacyLegalizeActions::Legal);
-
- if (!Subtarget.is64Bit()) {
- getActionDefinitionsBuilder(G_PTRTOINT)
- .legalForCartesianProduct({s1, s8, s16, s32}, {p0})
- .maxScalar(0, s32)
- .widenScalarToNextPow2(0, /*Min*/ 8);
- getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s32}});
-
- // Shifts and SDIV
- getActionDefinitionsBuilder(
- {G_SDIV, G_SREM, G_UDIV, G_UREM})
- .legalFor({s8, s16, s32})
- .clampScalar(0, s8, s32);
-
- getActionDefinitionsBuilder(
- {G_SHL, G_LSHR, G_ASHR})
- .legalFor({{s8, s8}, {s16, s8}, {s32, s8}})
- .clampScalar(0, s8, s32)
- .clampScalar(1, s8, s8);
-
- // Comparison
- getActionDefinitionsBuilder(G_ICMP)
- .legalForCartesianProduct({s8}, {s8, s16, s32, p0})
- .clampScalar(0, s8, s8);
- }
-
- // Control-flow
- LegacyInfo.setAction({G_BRCOND, s1}, LegacyLegalizeActions::Legal);
-
- // Constants
- for (auto Ty : {s8, s16, s32, p0})
- LegacyInfo.setAction({TargetOpcode::G_CONSTANT, Ty},
- LegacyLegalizeActions::Legal);
-
- // Extensions
- for (auto Ty : {s8, s16, s32}) {
- LegacyInfo.setAction({G_ZEXT, Ty}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_SEXT, Ty}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_ANYEXT, Ty}, LegacyLegalizeActions::Legal);
- }
- LegacyInfo.setAction({G_ANYEXT, s128}, LegacyLegalizeActions::Legal);
- getActionDefinitionsBuilder(G_SEXT_INREG).lower();
-
- // Merge/Unmerge
- for (const auto &Ty : {s16, s32, s64}) {
- LegacyInfo.setAction({G_MERGE_VALUES, Ty}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_UNMERGE_VALUES, 1, Ty},
- LegacyLegalizeActions::Legal);
- }
- for (const auto &Ty : {s8, s16, s32}) {
- LegacyInfo.setAction({G_MERGE_VALUES, 1, Ty}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_UNMERGE_VALUES, Ty}, LegacyLegalizeActions::Legal);
- }
-}
-
-void X86LegalizerInfo::setLegalizerInfo64bit() {
-
- if (!Subtarget.is64Bit())
- return;
-
- const LLT p0 = LLT::pointer(0, TM.getPointerSizeInBits(0));
- const LLT s1 = LLT::scalar(1);
- const LLT s8 = LLT::scalar(8);
- const LLT s16 = LLT::scalar(16);
- const LLT s32 = LLT::scalar(32);
- const LLT s64 = LLT::scalar(64);
- const LLT s128 = LLT::scalar(128);
- auto &LegacyInfo = getLegacyLegalizerInfo();
-
- LegacyInfo.setAction({G_IMPLICIT_DEF, s64}, LegacyLegalizeActions::Legal);
- // Need to have that, as tryFoldImplicitDef will create this pattern:
- // s128 = EXTEND (G_IMPLICIT_DEF s32/s64) -> s128 = G_IMPLICIT_DEF
- LegacyInfo.setAction({G_IMPLICIT_DEF, s128}, LegacyLegalizeActions::Legal);
-
- LegacyInfo.setAction({G_PHI, s64}, LegacyLegalizeActions::Legal);
-
- for (unsigned BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR})
- LegacyInfo.setAction({BinOp, s64}, LegacyLegalizeActions::Legal);
-
- for (unsigned MemOp : {G_LOAD, G_STORE})
- LegacyInfo.setAction({MemOp, s64}, LegacyLegalizeActions::Legal);
-
- // Pointer-handling
- LegacyInfo.setAction({G_PTR_ADD, 1, s64}, LegacyLegalizeActions::Legal);
- getActionDefinitionsBuilder(G_PTRTOINT)
- .legalForCartesianProduct({s1, s8, s16, s32, s64}, {p0})
- .maxScalar(0, s64)
- .widenScalarToNextPow2(0, /*Min*/ 8);
- getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, s64}});
-
- // Constants
- LegacyInfo.setAction({TargetOpcode::G_CONSTANT, s64},
- LegacyLegalizeActions::Legal);
-
- // Extensions
- for (unsigned extOp : {G_ZEXT, G_SEXT, G_ANYEXT}) {
- LegacyInfo.setAction({extOp, s64}, LegacyLegalizeActions::Legal);
- }
-
- getActionDefinitionsBuilder(G_SITOFP)
- .legalForCartesianProduct({s32, s64})
- .clampScalar(1, s32, s64)
- .widenScalarToNextPow2(1)
- .clampScalar(0, s32, s64)
- .widenScalarToNextPow2(0);
-
- getActionDefinitionsBuilder(G_FPTOSI)
- .legalForCartesianProduct({s32, s64})
- .clampScalar(1, s32, s64)
- .widenScalarToNextPow2(0)
- .clampScalar(0, s32, s64)
- .widenScalarToNextPow2(1);
-
- // Comparison
- getActionDefinitionsBuilder(G_ICMP)
- .legalForCartesianProduct({s8}, {s8, s16, s32, s64, p0})
- .clampScalar(0, s8, s8);
-
- getActionDefinitionsBuilder(G_FCMP)
- .legalForCartesianProduct({s8}, {s32, s64})
- .clampScalar(0, s8, s8)
- .clampScalar(1, s32, s64)
- .widenScalarToNextPow2(1);
-
- // Divisions
- getActionDefinitionsBuilder(
- {G_SDIV, G_SREM, G_UDIV, G_UREM})
- .legalFor({s8, s16, s32, s64})
- .clampScalar(0, s8, s64);
-
- // Shifts
- getActionDefinitionsBuilder(
- {G_SHL, G_LSHR, G_ASHR})
- .legalFor({{s8, s8}, {s16, s8}, {s32, s8}, {s64, s8}})
- .clampScalar(0, s8, s64)
- .clampScalar(1, s8, s8);
-
- // Merge/Unmerge
- LegacyInfo.setAction({G_MERGE_VALUES, s128}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_UNMERGE_VALUES, 1, s128},
- LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_MERGE_VALUES, 1, s128}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_UNMERGE_VALUES, s128}, LegacyLegalizeActions::Legal);
-}
-
-void X86LegalizerInfo::setLegalizerInfoSSE1() {
- if (!Subtarget.hasSSE1())
- return;
-
- const LLT s32 = LLT::scalar(32);
- const LLT s64 = LLT::scalar(64);
- const LLT v4s32 = LLT::fixed_vector(4, 32);
- const LLT v2s64 = LLT::fixed_vector(2, 64);
-
- auto &LegacyInfo = getLegacyLegalizerInfo();
-
- for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
- for (auto Ty : {s32, v4s32})
- LegacyInfo.setAction({BinOp, Ty}, LegacyLegalizeActions::Legal);
-
- for (unsigned MemOp : {G_LOAD, G_STORE})
- for (auto Ty : {v4s32, v2s64})
- LegacyInfo.setAction({MemOp, Ty}, LegacyLegalizeActions::Legal);
-
- // Constants
- LegacyInfo.setAction({TargetOpcode::G_FCONSTANT, s32},
- LegacyLegalizeActions::Legal);
-
- // Merge/Unmerge
- for (const auto &Ty : {v4s32, v2s64}) {
- LegacyInfo.setAction({G_CONCAT_VECTORS, Ty}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_UNMERGE_VALUES, 1, Ty},
- LegacyLegalizeActions::Legal);
- }
- LegacyInfo.setAction({G_MERGE_VALUES, 1, s64}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_UNMERGE_VALUES, s64}, LegacyLegalizeActions::Legal);
-}
-
-void X86LegalizerInfo::setLegalizerInfoSSE2() {
- if (!Subtarget.hasSSE2())
- return;
-
- const LLT s32 = LLT::scalar(32);
- const LLT s64 = LLT::scalar(64);
const LLT v16s8 = LLT::fixed_vector(16, 8);
const LLT v8s16 = LLT::fixed_vector(8, 16);
const LLT v4s32 = LLT::fixed_vector(4, 32);
const LLT v2s64 = LLT::fixed_vector(2, 64);
+ const LLT v2p0 = LLT::fixed_vector(2, p0);
const LLT v32s8 = LLT::fixed_vector(32, 8);
const LLT v16s16 = LLT::fixed_vector(16, 16);
const LLT v8s32 = LLT::fixed_vector(8, 32);
const LLT v4s64 = LLT::fixed_vector(4, 64);
+ const LLT v4p0 = LLT::fixed_vector(4, p0);
- auto &LegacyInfo = getLegacyLegalizerInfo();
-
- for (unsigned BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
- for (auto Ty : {s64, v2s64})
- LegacyInfo.setAction({BinOp, Ty}, LegacyLegalizeActions::Legal);
-
- for (unsigned BinOp : {G_ADD, G_SUB})
- for (auto Ty : {v16s8, v8s16, v4s32, v2s64})
- LegacyInfo.setAction({BinOp, Ty}, LegacyLegalizeActions::Legal);
-
- LegacyInfo.setAction({G_MUL, v8s16}, LegacyLegalizeActions::Legal);
-
- LegacyInfo.setAction({G_FPEXT, s64}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_FPEXT, 1, s32}, LegacyLegalizeActions::Legal);
-
- LegacyInfo.setAction({G_FPTRUNC, s32}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_FPTRUNC, 1, s64}, LegacyLegalizeActions::Legal);
-
- // Constants
- LegacyInfo.setAction({TargetOpcode::G_FCONSTANT, s64},
- LegacyLegalizeActions::Legal);
-
- // Merge/Unmerge
- for (const auto &Ty :
- {v16s8, v32s8, v8s16, v16s16, v4s32, v8s32, v2s64, v4s64}) {
- LegacyInfo.setAction({G_CONCAT_VECTORS, Ty}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_UNMERGE_VALUES, 1, Ty},
- LegacyLegalizeActions::Legal);
- }
- for (const auto &Ty : {v16s8, v8s16, v4s32, v2s64}) {
- LegacyInfo.setAction({G_CONCAT_VECTORS, 1, Ty},
- LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_UNMERGE_VALUES, Ty}, LegacyLegalizeActions::Legal);
- }
-}
-
-void X86LegalizerInfo::setLegalizerInfoSSE41() {
- if (!Subtarget.hasSSE41())
- return;
-
- const LLT v4s32 = LLT::fixed_vector(4, 32);
-
- auto &LegacyInfo = getLegacyLegalizerInfo();
-
- LegacyInfo.setAction({G_MUL, v4s32}, LegacyLegalizeActions::Legal);
-}
-
-void X86LegalizerInfo::setLegalizerInfoAVX() {
- if (!Subtarget.hasAVX())
- return;
-
- const LLT v16s8 = LLT::fixed_vector(16, 8);
- const LLT v8s16 = LLT::fixed_vector(8, 16);
- const LLT v4s32 = LLT::fixed_vector(4, 32);
- const LLT v2s64 = LLT::fixed_vector(2, 64);
-
- const LLT v32s8 = LLT::fixed_vector(32, 8);
const LLT v64s8 = LLT::fixed_vector(64, 8);
- const LLT v16s16 = LLT::fixed_vector(16, 16);
const LLT v32s16 = LLT::fixed_vector(32, 16);
- const LLT v8s32 = LLT::fixed_vector(8, 32);
const LLT v16s32 = LLT::fixed_vector(16, 32);
- const LLT v4s64 = LLT::fixed_vector(4, 64);
const LLT v8s64 = LLT::fixed_vector(8, 64);
- auto &LegacyInfo = getLegacyLegalizerInfo();
-
- for (unsigned MemOp : {G_LOAD, G_STORE})
- for (auto Ty : {v8s32, v4s64})
- LegacyInfo.setAction({MemOp, Ty}, LegacyLegalizeActions::Legal);
-
- for (auto Ty : {v32s8, v16s16, v8s32, v4s64}) {
- LegacyInfo.setAction({G_INSERT, Ty}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_EXTRACT, 1, Ty}, LegacyLegalizeActions::Legal);
+ // todo: AVX512 bool vector predicate types
+
+ // implicit/constants
+ getActionDefinitionsBuilder(G_IMPLICIT_DEF)
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ // 32/64-bits needs support for s64/s128 to handle cases:
+ // s64 = EXTEND (G_IMPLICIT_DEF s32) -> s64 = G_IMPLICIT_DEF
+ // s128 = EXTEND (G_IMPLICIT_DEF s32/s64) -> s128 = G_IMPLICIT_DEF
+ return typeInSet(0, {p0, s1, s8, s16, s32, s64})(Query) ||
+ (Is64Bit && typeInSet(0, {s128})(Query));
+ });
+
+ getActionDefinitionsBuilder(G_CONSTANT)
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ return typeInSet(0, {p0, s8, s16, s32})(Query) ||
+ (Is64Bit && typeInSet(0, {s64})(Query));
+ })
+ .widenScalarToNextPow2(0, /*Min=*/8)
+ .clampScalar(0, s8, sMaxScalar);
+
+ // merge/unmerge
+ for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
+ unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
+ unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
+ getActionDefinitionsBuilder(Op)
+ .widenScalarToNextPow2(LitTyIdx, /*Min=*/8)
+ .widenScalarToNextPow2(BigTyIdx, /*Min=*/16)
+ .minScalar(LitTyIdx, s8)
+ .minScalar(BigTyIdx, s32)
+ .legalIf([=](const LegalityQuery &Q) {
+ switch (Q.Types[BigTyIdx].getSizeInBits()) {
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ case 256:
+ case 512:
+ break;
+ default:
+ return false;
+ }
+ switch (Q.Types[LitTyIdx].getSizeInBits()) {
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ case 256:
+ return true;
+ default:
+ return false;
+ }
+ });
}
- for (auto Ty : {v16s8, v8s16, v4s32, v2s64}) {
- LegacyInfo.setAction({G_INSERT, 1, Ty}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_EXTRACT, Ty}, LegacyLegalizeActions::Legal);
- }
- // Merge/Unmerge
- for (const auto &Ty :
- {v32s8, v64s8, v16s16, v32s16, v8s32, v16s32, v4s64, v8s64}) {
- LegacyInfo.setAction({G_CONCAT_VECTORS, Ty}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_UNMERGE_VALUES, 1, Ty},
- LegacyLegalizeActions::Legal);
- }
- for (const auto &Ty :
- {v16s8, v32s8, v8s16, v16s16, v4s32, v8s32, v2s64, v4s64}) {
- LegacyInfo.setAction({G_CONCAT_VECTORS, 1, Ty},
- LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_UNMERGE_VALUES, Ty}, LegacyLegalizeActions::Legal);
- }
-}
-
-void X86LegalizerInfo::setLegalizerInfoAVX2() {
- if (!Subtarget.hasAVX2())
- return;
- const LLT v32s8 = LLT::fixed_vector(32, 8);
- const LLT v16s16 = LLT::fixed_vector(16, 16);
- const LLT v8s32 = LLT::fixed_vector(8, 32);
- const LLT v4s64 = LLT::fixed_vector(4, 64);
+ // integer addition/subtraction
+ getActionDefinitionsBuilder({G_ADD, G_SUB})
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ if (typeInSet(0, {s8, s16, s32})(Query))
+ return true;
+ if (Is64Bit && typeInSet(0, {s64})(Query))
+ return true;
+ if (HasSSE2 && typeInSet(0, {v16s8, v8s16, v4s32, v2s64})(Query))
+ return true;
+ if (HasAVX2 && typeInSet(0, {v32s8, v16s16, v8s32, v4s64})(Query))
+ return true;
+ if (HasAVX512 && typeInSet(0, {v16s32, v8s64})(Query))
+ return true;
+ if (HasBWI && typeInSet(0, {v64s8, v32s16})(Query))
+ return true;
+ return false;
+ })
+ .clampMinNumElements(0, s8, 16)
+ .clampMinNumElements(0, s16, 8)
+ .clampMinNumElements(0, s32, 4)
+ .clampMinNumElements(0, s64, 2)
+ .clampMaxNumElements(0, s8, HasBWI ? 64 : (HasAVX2 ? 32 : 16))
+ .clampMaxNumElements(0, s16, HasBWI ? 32 : (HasAVX2 ? 16 : 8))
+ .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX2 ? 8 : 4))
+ .clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX2 ? 4 : 2))
+ .widenScalarToNextPow2(0, /*Min=*/32)
+ .clampScalar(0, s8, sMaxScalar)
+ .scalarize(0);
+
+ getActionDefinitionsBuilder({G_UADDE, G_UADDO, G_USUBE, G_USUBO})
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ return typePairInSet(0, 1, {{s8, s1}, {s16, s1}, {s32, s1}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{s64, s1}})(Query));
+ })
+ .widenScalarToNextPow2(0, /*Min=*/32)
+ .clampScalar(0, s8, sMaxScalar)
+ .clampScalar(1, s1, s1)
+ .scalarize(0);
+
+ // integer multiply
+ getActionDefinitionsBuilder(G_MUL)
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ if (typeInSet(0, {s8, s16, s32})(Query))
+ return true;
+ if (Is64Bit && typeInSet(0, {s64})(Query))
+ return true;
+ if (HasSSE2 && typeInSet(0, {v8s16})(Query))
+ return true;
+ if (HasSSE41 && typeInSet(0, {v4s32})(Query))
+ return true;
+ if (HasAVX2 && typeInSet(0, {v16s16, v8s32})(Query))
+ return true;
+ if (HasAVX512 && typeInSet(0, {v16s32})(Query))
+ return true;
+ if (HasDQI && typeInSet(0, {v8s64})(Query))
+ return true;
+ if (HasDQI && HasVLX && typeInSet(0, {v2s64, v4s64})(Query))
+ return true;
+ if (HasBWI && typeInSet(0, {v32s16})(Query))
+ return true;
+ return false;
+ })
+ .clampMinNumElements(0, s16, 8)
+ .clampMinNumElements(0, s32, 4)
+ .clampMinNumElements(0, s64, HasVLX ? 2 : 8)
+ .clampMaxNumElements(0, s16, HasBWI ? 32 : (HasAVX2 ? 16 : 8))
+ .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX2 ? 8 : 4))
+ .clampMaxNumElements(0, s64, 8)
+ .widenScalarToNextPow2(0, /*Min=*/32)
+ .clampScalar(0, s8, sMaxScalar)
+ .scalarize(0);
+
+ getActionDefinitionsBuilder({G_SMULH, G_UMULH})
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ return typeInSet(0, {s8, s16, s32})(Query) ||
+ (Is64Bit && typeInSet(0, {s64})(Query));
+ })
+ .widenScalarToNextPow2(0, /*Min=*/32)
+ .clampScalar(0, s8, sMaxScalar)
+ .scalarize(0);
+
+ // integer divisions
+ getActionDefinitionsBuilder({G_SDIV, G_SREM, G_UDIV, G_UREM})
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ return typeInSet(0, {s8, s16, s32})(Query) ||
+ (Is64Bit && typeInSet(0, {s64})(Query));
+ })
+ .clampScalar(0, s8, sMaxScalar);
+
+ // integer shifts
+ getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ return typePairInSet(0, 1, {{s8, s8}, {s16, s8}, {s32, s8}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{s64, s8}})(Query));
+ })
+ .clampScalar(0, s8, sMaxScalar)
+ .clampScalar(1, s8, s8);
- const LLT v64s8 = LLT::fixed_vector(64, 8);
- const LLT v32s16 = LLT::fixed_vector(32, 16);
- const LLT v16s32 = LLT::fixed_vector(16, 32);
- const LLT v8s64 = LLT::fixed_vector(8, 64);
+ // integer logic
+ getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ if (typeInSet(0, {s8, s16, s32})(Query))
+ return true;
+ if (Is64Bit && typeInSet(0, {s64})(Query))
+ return true;
+ if (HasSSE2 && typeInSet(0, {v16s8, v8s16, v4s32, v2s64})(Query))
+ return true;
+ if (HasAVX && typeInSet(0, {v32s8, v16s16, v8s32, v4s64})(Query))
+ return true;
+ if (HasAVX512 && typeInSet(0, {v64s8, v32s16, v16s32, v8s64})(Query))
+ return true;
+ return false;
+ })
+ .clampMinNumElements(0, s8, 16)
+ .clampMinNumElements(0, s16, 8)
+ .clampMinNumElements(0, s32, 4)
+ .clampMinNumElements(0, s64, 2)
+ .clampMaxNumElements(0, s8, HasAVX512 ? 64 : (HasAVX ? 32 : 16))
+ .clampMaxNumElements(0, s16, HasAVX512 ? 32 : (HasAVX ? 16 : 8))
+ .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX ? 8 : 4))
+ .clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX ? 4 : 2))
+ .widenScalarToNextPow2(0, /*Min=*/32)
+ .clampScalar(0, s8, sMaxScalar)
+ .scalarize(0);
+
+ // integer comparison
+ const std::initializer_list<LLT> IntTypes32 = {s8, s16, s32, p0};
+ const std::initializer_list<LLT> IntTypes64 = {s8, s16, s32, s64, p0};
- auto &LegacyInfo = getLegacyLegalizerInfo();
+ getActionDefinitionsBuilder(G_ICMP)
+ .legalForCartesianProduct({s8}, Is64Bit ? IntTypes64 : IntTypes32)
+ .clampScalar(0, s8, s8);
- for (unsigned BinOp : {G_ADD, G_SUB})
- for (auto Ty : {v32s8, v16s16, v8s32, v4s64})
- LegacyInfo.setAction({BinOp, Ty}, LegacyLegalizeActions::Legal);
+ // bswap
+ getActionDefinitionsBuilder(G_BSWAP)
+ .legalIf([=](const LegalityQuery &Query) {
+ return Query.Types[0] == s32 ||
+ (Subtarget.is64Bit() && Query.Types[0] == s64);
+ })
+ .widenScalarToNextPow2(0, /*Min=*/32)
+ .clampScalar(0, s32, sMaxScalar);
+
+ // popcount
+ getActionDefinitionsBuilder(G_CTPOP)
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ return Subtarget.hasPOPCNT() &&
+ (typePairInSet(0, 1, {{s16, s16}, {s32, s32}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query)));
+ })
+ .widenScalarToNextPow2(1, /*Min=*/16)
+ .clampScalar(1, s16, sMaxScalar)
+ .scalarSameSizeAs(0, 1);
+
+ // count leading zeros (LZCNT)
+ getActionDefinitionsBuilder(G_CTLZ)
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ return Subtarget.hasLZCNT() &&
+ (typePairInSet(0, 1, {{s16, s16}, {s32, s32}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query)));
+ })
+ .widenScalarToNextPow2(1, /*Min=*/16)
+ .clampScalar(1, s16, sMaxScalar)
+ .scalarSameSizeAs(0, 1);
+
+ // count trailing zeros
+ getActionDefinitionsBuilder({G_CTTZ_ZERO_UNDEF, G_CTTZ})
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ return (Query.Opcode == G_CTTZ_ZERO_UNDEF || Subtarget.hasBMI()) &&
+ (typePairInSet(0, 1, {{s16, s16}, {s32, s32}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query)));
+ })
+ .widenScalarToNextPow2(1, /*Min=*/16)
+ .clampScalar(1, s16, sMaxScalar)
+ .scalarSameSizeAs(0, 1);
+
+ // control flow
+ getActionDefinitionsBuilder(G_PHI)
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ return typeInSet(0, {s8, s16, s32, p0})(Query) ||
+ (Is64Bit && typeInSet(0, {s64})(Query)) ||
+ (HasSSE1 && typeInSet(0, {v16s8, v8s16, v4s32, v2s64})(Query)) ||
+ (HasAVX && typeInSet(0, {v32s8, v16s16, v8s32, v4s64})(Query)) ||
+ (HasAVX512 &&
+ typeInSet(0, {v64s8, v32s16, v16s32, v8s64})(Query));
+ })
+ .clampMinNumElements(0, s8, 16)
+ .clampMinNumElements(0, s16, 8)
+ .clampMinNumElements(0, s32, 4)
+ .clampMinNumElements(0, s64, 2)
+ .clampMaxNumElements(0, s8, HasAVX512 ? 64 : (HasAVX ? 32 : 16))
+ .clampMaxNumElements(0, s16, HasAVX512 ? 32 : (HasAVX ? 16 : 8))
+ .clampMaxNumElements(0, s32, HasAVX512 ? 16 : (HasAVX ? 8 : 4))
+ .clampMaxNumElements(0, s64, HasAVX512 ? 8 : (HasAVX ? 4 : 2))
+ .widenScalarToNextPow2(0, /*Min=*/32)
+ .clampScalar(0, s8, sMaxScalar)
+ .scalarize(0);
+
+ getActionDefinitionsBuilder(G_BRCOND).legalFor({s1});
+
+ // pointer handling
+ const std::initializer_list<LLT> PtrTypes32 = {s1, s8, s16, s32};
+ const std::initializer_list<LLT> PtrTypes64 = {s1, s8, s16, s32, s64};
- for (auto Ty : {v16s16, v8s32})
- LegacyInfo.setAction({G_MUL, Ty}, LegacyLegalizeActions::Legal);
+ getActionDefinitionsBuilder(G_PTRTOINT)
+ .legalForCartesianProduct(Is64Bit ? PtrTypes64 : PtrTypes32, {p0})
+ .maxScalar(0, sMaxScalar)
+ .widenScalarToNextPow2(0, /*Min*/ 8);
- // Merge/Unmerge
- for (const auto &Ty : {v64s8, v32s16, v16s32, v8s64}) {
- LegacyInfo.setAction({G_CONCAT_VECTORS, Ty}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_UNMERGE_VALUES, 1, Ty},
- LegacyLegalizeActions::Legal);
+ getActionDefinitionsBuilder(G_INTTOPTR).legalFor({{p0, sMaxScalar}});
+
+ getActionDefinitionsBuilder(G_PTR_ADD)
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ return typePairInSet(0, 1, {{p0, s32}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{p0, s64}})(Query));
+ })
+ .widenScalarToNextPow2(1, /*Min*/ 32)
+ .clampScalar(1, s32, sMaxScalar);
+
+ getActionDefinitionsBuilder({G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor({p0});
+
+ // load/store: add more corner cases
+ for (unsigned Op : {G_LOAD, G_STORE}) {
+ auto &Action = getActionDefinitionsBuilder(Op);
+ Action.legalForTypesWithMemDesc({{s8, p0, s1, 1},
+ {s8, p0, s8, 1},
+ {s16, p0, s8, 1},
+ {s16, p0, s16, 1},
+ {s32, p0, s8, 1},
+ {s32, p0, s16, 1},
+ {s32, p0, s32, 1},
+ {s80, p0, s80, 1},
+ {p0, p0, p0, 1},
+ {v4s8, p0, v4s8, 1}});
+ if (Is64Bit)
+ Action.legalForTypesWithMemDesc({{s64, p0, s8, 1},
+ {s64, p0, s16, 1},
+ {s64, p0, s32, 1},
+ {s64, p0, s64, 1},
+ {v2s32, p0, v2s32, 1}});
+ if (HasSSE1)
+ Action.legalForTypesWithMemDesc({{v16s8, p0, v16s8, 1},
+ {v8s16, p0, v8s16, 1},
+ {v4s32, p0, v4s32, 1},
+ {v2s64, p0, v2s64, 1},
+ {v2p0, p0, v2p0, 1}});
+ if (HasAVX)
+ Action.legalForTypesWithMemDesc({{v32s8, p0, v32s8, 1},
+ {v16s16, p0, v16s16, 1},
+ {v8s32, p0, v8s32, 1},
+ {v4s64, p0, v4s64, 1},
+ {v4p0, p0, v4p0, 1}});
+ if (HasAVX512)
+ Action.legalForTypesWithMemDesc({{v64s8, p0, v64s8, 1},
+ {v32s16, p0, v32s16, 1},
+ {v16s32, p0, v16s32, 1},
+ {v8s64, p0, v8s64, 1}});
+ Action.widenScalarToNextPow2(0, /*Min=*/8).clampScalar(0, s8, sMaxScalar);
}
- for (const auto &Ty : {v32s8, v16s16, v8s32, v4s64}) {
- LegacyInfo.setAction({G_CONCAT_VECTORS, 1, Ty},
- LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_UNMERGE_VALUES, Ty}, LegacyLegalizeActions::Legal);
- }
-}
-
-void X86LegalizerInfo::setLegalizerInfoAVX512() {
- if (!Subtarget.hasAVX512())
- return;
- const LLT v16s8 = LLT::fixed_vector(16, 8);
- const LLT v8s16 = LLT::fixed_vector(8, 16);
- const LLT v4s32 = LLT::fixed_vector(4, 32);
- const LLT v2s64 = LLT::fixed_vector(2, 64);
-
- const LLT v32s8 = LLT::fixed_vector(32, 8);
- const LLT v16s16 = LLT::fixed_vector(16, 16);
- const LLT v8s32 = LLT::fixed_vector(8, 32);
- const LLT v4s64 = LLT::fixed_vector(4, 64);
-
- const LLT v64s8 = LLT::fixed_vector(64, 8);
- const LLT v32s16 = LLT::fixed_vector(32, 16);
- const LLT v16s32 = LLT::fixed_vector(16, 32);
- const LLT v8s64 = LLT::fixed_vector(8, 64);
-
- auto &LegacyInfo = getLegacyLegalizerInfo();
-
- for (unsigned BinOp : {G_ADD, G_SUB})
- for (auto Ty : {v16s32, v8s64})
- LegacyInfo.setAction({BinOp, Ty}, LegacyLegalizeActions::Legal);
-
- LegacyInfo.setAction({G_MUL, v16s32}, LegacyLegalizeActions::Legal);
-
- for (unsigned MemOp : {G_LOAD, G_STORE})
- for (auto Ty : {v16s32, v8s64})
- LegacyInfo.setAction({MemOp, Ty}, LegacyLegalizeActions::Legal);
-
- for (auto Ty : {v64s8, v32s16, v16s32, v8s64}) {
- LegacyInfo.setAction({G_INSERT, Ty}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_EXTRACT, 1, Ty}, LegacyLegalizeActions::Legal);
- }
- for (auto Ty : {v32s8, v16s16, v8s32, v4s64, v16s8, v8s16, v4s32, v2s64}) {
- LegacyInfo.setAction({G_INSERT, 1, Ty}, LegacyLegalizeActions::Legal);
- LegacyInfo.setAction({G_EXTRACT, Ty}, LegacyLegalizeActions::Legal);
+ for (unsigned Op : {G_SEXTLOAD, G_ZEXTLOAD}) {
+ auto &Action = getActionDefinitionsBuilder(Op);
+ Action.legalForTypesWithMemDesc({{s16, p0, s8, 1},
+ {s32, p0, s8, 1},
+ {s32, p0, s16, 1}});
+ if (Is64Bit)
+ Action.legalForTypesWithMemDesc({{s64, p0, s8, 1},
+ {s64, p0, s16, 1},
+ {s64, p0, s32, 1}});
+ // TODO - SSE41/AVX2/AVX512F/AVX512BW vector extensions
}
- /************ VLX *******************/
- if (!Subtarget.hasVLX())
- return;
-
- for (auto Ty : {v4s32, v8s32})
- LegacyInfo.setAction({G_MUL, Ty}, LegacyLegalizeActions::Legal);
-}
-
-void X86LegalizerInfo::setLegalizerInfoAVX512DQ() {
- if (!(Subtarget.hasAVX512() && Subtarget.hasDQI()))
- return;
-
- const LLT v8s64 = LLT::fixed_vector(8, 64);
-
- auto &LegacyInfo = getLegacyLegalizerInfo();
-
- LegacyInfo.setAction({G_MUL, v8s64}, LegacyLegalizeActions::Legal);
+ // sext, zext, and anyext
+ getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
+ .legalIf([=](const LegalityQuery &Query) {
+ return typeInSet(0, {s8, s16, s32})(Query) ||
+ (Query.Opcode == G_ANYEXT && Query.Types[0] == s128) ||
+ (Is64Bit && Query.Types[0] == s64);
+ })
+ .widenScalarToNextPow2(0, /*Min=*/8)
+ .clampScalar(0, s8, sMaxScalar)
+ .widenScalarToNextPow2(1, /*Min=*/8)
+ .clampScalar(1, s8, sMaxScalar);
- /************ VLX *******************/
- if (!Subtarget.hasVLX())
- return;
+ getActionDefinitionsBuilder(G_SEXT_INREG).lower();
- const LLT v2s64 = LLT::fixed_vector(2, 64);
- const LLT v4s64 = LLT::fixed_vector(4, 64);
+ // fp constants
+ getActionDefinitionsBuilder(G_FCONSTANT)
+ .legalIf([=](const LegalityQuery &Query) -> bool {
+ return (HasSSE1 && typeInSet(0, {s32})(Query)) ||
+ (HasSSE2 && typeInSet(0, {s64})(Query));
+ });
+
+ // fp arithmetic
+ getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV})
+ .legalIf([=](const LegalityQuery &Query) {
+ return (HasSSE1 && typeInSet(0, {s32, v4s32})(Query)) ||
+ (HasSSE2 && typeInSet(0, {s64, v2s64})(Query)) ||
+ (HasAVX && typeInSet(0, {v8s32, v4s64})(Query)) ||
+ (HasAVX512 && typeInSet(0, {v16s32, v8s64})(Query));
+ });
+
+ // fp comparison
+ getActionDefinitionsBuilder(G_FCMP)
+ .legalIf([=](const LegalityQuery &Query) {
+ return (HasSSE1 && typePairInSet(0, 1, {{s8, s32}})(Query)) ||
+ (HasSSE2 && typePairInSet(0, 1, {{s8, s64}})(Query));
+ })
+ .clampScalar(0, s8, s8)
+ .clampScalar(1, s32, HasSSE2 ? s64 : s32)
+ .widenScalarToNextPow2(1);
- for (auto Ty : {v2s64, v4s64})
- LegacyInfo.setAction({G_MUL, Ty}, LegacyLegalizeActions::Legal);
-}
+ // fp conversions
+ getActionDefinitionsBuilder(G_FPEXT).legalIf([=](const LegalityQuery &Query) {
+ return (HasSSE2 && typePairInSet(0, 1, {{s64, s32}})(Query)) ||
+ (HasAVX && typePairInSet(0, 1, {{v4s64, v4s32}})(Query)) ||
+ (HasAVX512 && typePairInSet(0, 1, {{v8s64, v8s32}})(Query));
+ });
-void X86LegalizerInfo::setLegalizerInfoAVX512BW() {
- if (!(Subtarget.hasAVX512() && Subtarget.hasBWI()))
- return;
+ getActionDefinitionsBuilder(G_FPTRUNC).legalIf(
+ [=](const LegalityQuery &Query) {
+ return (HasSSE2 && typePairInSet(0, 1, {{s32, s64}})(Query)) ||
+ (HasAVX && typePairInSet(0, 1, {{v4s32, v4s64}})(Query)) ||
+ (HasAVX512 && typePairInSet(0, 1, {{v8s32, v8s64}})(Query));
+ });
- const LLT v64s8 = LLT::fixed_vector(64, 8);
- const LLT v32s16 = LLT::fixed_vector(32, 16);
+ getActionDefinitionsBuilder(G_SITOFP)
+ .legalIf([=](const LegalityQuery &Query) {
+ return (HasSSE1 &&
+ (typePairInSet(0, 1, {{s32, s32}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{s32, s64}})(Query)))) ||
+ (HasSSE2 &&
+ (typePairInSet(0, 1, {{s64, s32}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
+ })
+ .clampScalar(1, s32, sMaxScalar)
+ .widenScalarToNextPow2(1)
+ .clampScalar(0, s32, HasSSE2 ? s64 : s32)
+ .widenScalarToNextPow2(0);
- auto &LegacyInfo = getLegacyLegalizerInfo();
+ getActionDefinitionsBuilder(G_FPTOSI)
+ .legalIf([=](const LegalityQuery &Query) {
+ return (HasSSE1 &&
+ (typePairInSet(0, 1, {{s32, s32}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{s64, s32}})(Query)))) ||
+ (HasSSE2 &&
+ (typePairInSet(0, 1, {{s32, s64}})(Query) ||
+ (Is64Bit && typePairInSet(0, 1, {{s64, s64}})(Query))));
+ })
+ .clampScalar(1, s32, HasSSE2 ? s64 : s32)
+ .widenScalarToNextPow2(0)
+ .clampScalar(0, s32, sMaxScalar)
+ .widenScalarToNextPow2(1);
- for (unsigned BinOp : {G_ADD, G_SUB})
- for (auto Ty : {v64s8, v32s16})
- LegacyInfo.setAction({BinOp, Ty}, LegacyLegalizeActions::Legal);
+ // vector ops
+ getActionDefinitionsBuilder({G_EXTRACT, G_INSERT})
+ .legalIf([=](const LegalityQuery &Query) {
+ unsigned SubIdx = Query.Opcode == G_EXTRACT ? 0 : 1;
+ unsigned FullIdx = Query.Opcode == G_EXTRACT ? 1 : 0;
+ return (HasAVX && typePairInSet(SubIdx, FullIdx,
+ {{v16s8, v32s8},
+ {v8s16, v16s16},
+ {v4s32, v8s32},
+ {v2s64, v4s64}})(Query)) ||
+ (HasAVX512 && typePairInSet(SubIdx, FullIdx,
+ {{v16s8, v64s8},
+ {v32s8, v64s8},
+ {v8s16, v32s16},
+ {v16s16, v32s16},
+ {v4s32, v16s32},
+ {v8s32, v16s32},
+ {v2s64, v8s64},
+ {v4s64, v8s64}})(Query));
+ });
+
+ // todo: only permit dst types up to max legal vector register size?
+ getActionDefinitionsBuilder(G_CONCAT_VECTORS)
+ .legalIf([=](const LegalityQuery &Query) {
+ return (HasSSE1 && typePairInSet(1, 0,
+ {{v16s8, v32s8},
+ {v8s16, v16s16},
+ {v4s32, v8s32},
+ {v2s64, v4s64}})(Query)) ||
+ (HasAVX && typePairInSet(1, 0,
+ {{v16s8, v64s8},
+ {v32s8, v64s8},
+ {v8s16, v32s16},
+ {v16s16, v32s16},
+ {v4s32, v16s32},
+ {v8s32, v16s32},
+ {v2s64, v8s64},
+ {v4s64, v8s64}})(Query));
+ });
+
+ // todo: vectors and address spaces
+ getActionDefinitionsBuilder(G_SELECT)
+ .legalFor({{s8, s32}, {s16, s32}, {s32, s32}, {s64, s32},
+ {p0, s32}})
+ .widenScalarToNextPow2(0, /*Min=*/8)
+ .clampScalar(0, s8, sMaxScalar)
+ .clampScalar(1, s32, s32);
+
+ // memory intrinsics
+ getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();
- LegacyInfo.setAction({G_MUL, v32s16}, LegacyLegalizeActions::Legal);
+ // fp intrinsics
+ getActionDefinitionsBuilder(G_INTRINSIC_ROUNDEVEN)
+ .scalarize(0)
+ .minScalar(0, LLT::scalar(32))
+ .libcall();
- /************ VLX *******************/
- if (!Subtarget.hasVLX())
- return;
+ getActionDefinitionsBuilder({G_FREEZE, G_CONSTANT_FOLD_BARRIER})
+ .legalFor({s8, s16, s32, s64, p0})
+ .widenScalarToNextPow2(0, /*Min=*/8)
+ .clampScalar(0, s8, sMaxScalar);
- const LLT v8s16 = LLT::fixed_vector(8, 16);
- const LLT v16s16 = LLT::fixed_vector(16, 16);
+ getLegacyLegalizerInfo().computeTables();
+ verify(*STI.getInstrInfo());
+}
- for (auto Ty : {v8s16, v16s16})
- LegacyInfo.setAction({G_MUL, Ty}, LegacyLegalizeActions::Legal);
+bool X86LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
+ MachineInstr &MI) const {
+ return true;
}
diff --git a/llvm/lib/Target/X86/X86LegalizerInfo.h b/llvm/lib/Target/X86/X86LegalizerInfo.h
index 72d25096d72b..1f69feceae27 100644
--- a/llvm/lib/Target/X86/X86LegalizerInfo.h
+++ b/llvm/lib/Target/X86/X86LegalizerInfo.h
@@ -27,25 +27,12 @@ private:
/// Keep a reference to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
const X86Subtarget &Subtarget;
- const X86TargetMachine &TM;
public:
X86LegalizerInfo(const X86Subtarget &STI, const X86TargetMachine &TM);
bool legalizeIntrinsic(LegalizerHelper &Helper,
MachineInstr &MI) const override;
-
-private:
- void setLegalizerInfo32bit();
- void setLegalizerInfo64bit();
- void setLegalizerInfoSSE1();
- void setLegalizerInfoSSE2();
- void setLegalizerInfoSSE41();
- void setLegalizerInfoAVX();
- void setLegalizerInfoAVX2();
- void setLegalizerInfoAVX512();
- void setLegalizerInfoAVX512DQ();
- void setLegalizerInfoAVX512BW();
};
} // namespace llvm
#endif
diff --git a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
index 5d9a9231fea9..c17b96cc9fdd 100644
--- a/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
+++ b/llvm/lib/Target/X86/X86LoadValueInjectionLoadHardening.cpp
@@ -154,9 +154,9 @@ private:
using EdgeSet = MachineGadgetGraph::EdgeSet;
using NodeSet = MachineGadgetGraph::NodeSet;
- const X86Subtarget *STI;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
+ const X86Subtarget *STI = nullptr;
+ const TargetInstrInfo *TII = nullptr;
+ const TargetRegisterInfo *TRI = nullptr;
std::unique_ptr<MachineGadgetGraph>
getGadgetGraph(MachineFunction &MF, const MachineLoopInfo &MLI,
@@ -362,7 +362,7 @@ X86LoadValueInjectionLoadHardeningPass::getGadgetGraph(
SmallSet<NodeId, 8> UsesVisited, DefsVisited;
std::function<void(NodeAddr<DefNode *>)> AnalyzeDefUseChain =
[&](NodeAddr<DefNode *> Def) {
- if (Transmitters.find(Def.Id) != Transmitters.end())
+ if (Transmitters.contains(Def.Id))
return; // Already analyzed `Def`
// Use RDF to find all the uses of `Def`
diff --git a/llvm/lib/Target/X86/X86LowerAMXType.cpp b/llvm/lib/Target/X86/X86LowerAMXType.cpp
index 325bc3af83e8..0416f0f0d2ec 100644
--- a/llvm/lib/Target/X86/X86LowerAMXType.cpp
+++ b/llvm/lib/Target/X86/X86LowerAMXType.cpp
@@ -129,6 +129,8 @@ static std::pair<Value *, Value *> getShape(IntrinsicInst *II, unsigned OpNo) {
}
// a * b + c
// The shape depends on which operand.
+ case Intrinsic::x86_tcmmimfp16ps_internal:
+ case Intrinsic::x86_tcmmrlfp16ps_internal:
case Intrinsic::x86_tdpbssd_internal:
case Intrinsic::x86_tdpbsud_internal:
case Intrinsic::x86_tdpbusd_internal:
@@ -486,7 +488,7 @@ static void replaceWithTileLoad(Use &U, Value *Ptr, bool IsPHI = false) {
// Get tile shape.
IntrinsicInst *II = nullptr;
if (IsPHI) {
- Value *PhiOp = dyn_cast<PHINode>(V)->getIncomingValue(0);
+ Value *PhiOp = cast<PHINode>(V)->getIncomingValue(0);
II = cast<IntrinsicInst>(PhiOp);
} else {
II = cast<IntrinsicInst>(V);
@@ -525,7 +527,7 @@ public:
SmallVector<Instruction *, 2> &Incomings);
void replacePhiDefWithLoad(Instruction *PHI, Value *StorePtr);
bool volatileTileData();
- void volatileTilePHI(PHINode *Inst);
+ void volatileTilePHI(PHINode *PHI);
void volatileTileNonPHI(Instruction *I);
};
@@ -707,7 +709,7 @@ class X86LowerAMXCast {
public:
X86LowerAMXCast(Function &F) : Func(F), DT(nullptr) {}
- void combineCastStore(IntrinsicInst *Cast, StoreInst *ST);
+ bool combineCastStore(IntrinsicInst *Cast, StoreInst *ST);
bool combineLoadCast(IntrinsicInst *Cast, LoadInst *LD);
bool combineLdSt(SmallVectorImpl<Instruction *> &Casts);
bool combineAMXcast(TargetLibraryInfo *TLI);
@@ -920,26 +922,26 @@ bool X86LowerAMXCast::optimizeAMXCastFromPhi(
// -->
// call void @llvm.x86.tilestored64.internal(i16 %row, i16 %col, i8* %p,
// i64 64, x86_amx %42)
-void X86LowerAMXCast::combineCastStore(IntrinsicInst *Cast, StoreInst *ST) {
+bool X86LowerAMXCast::combineCastStore(IntrinsicInst *Cast, StoreInst *ST) {
Value *Tile = Cast->getOperand(0);
// TODO: If it is cast intrinsic or phi node, we can propagate the
// shape information through def-use chain.
if (!isAMXIntrinsic(Tile))
- return;
+ return false;
auto *II = cast<IntrinsicInst>(Tile);
// Tile is output from AMX intrinsic. The first operand of the
// intrinsic is row, the second operand of the intrinsic is column.
Value *Row = II->getOperand(0);
Value *Col = II->getOperand(1);
IRBuilder<> Builder(ST);
- // Use the maximum column as stride. It must be the same with load
- // stride.
- Value *Stride = Builder.getInt64(64);
+ // Stride should be equal to col(measured by bytes)
+ Value *Stride = Builder.CreateSExt(Col, Builder.getInt64Ty());
Value *I8Ptr =
Builder.CreateBitCast(ST->getOperand(1), Builder.getInt8PtrTy());
std::array<Value *, 5> Args = {Row, Col, I8Ptr, Stride, Tile};
Builder.CreateIntrinsic(Intrinsic::x86_tilestored64_internal, std::nullopt,
Args);
+ return true;
}
// %65 = load <256 x i32>, <256 x i32>* %p, align 64
@@ -959,8 +961,8 @@ bool X86LowerAMXCast::combineLoadCast(IntrinsicInst *Cast, LoadInst *LD) {
return false;
std::tie(Row, Col) = getShape(II, OpNo);
IRBuilder<> Builder(LD);
- // Use the maximun column as stride.
- Value *Stride = Builder.getInt64(64);
+ // Stride should be equal to col(measured by bytes)
+ Value *Stride = Builder.CreateSExt(Col, Builder.getInt64Ty());
Value *I8Ptr;
// To save compiling time, we create doninator tree when it is really
@@ -1004,9 +1006,10 @@ bool X86LowerAMXCast::combineLdSt(SmallVectorImpl<Instruction *> &Casts) {
StoreInst *Store = dyn_cast<StoreInst>(U);
if (!Store)
continue;
- combineCastStore(cast<IntrinsicInst>(Cast), Store);
- DeadStores.push_back(Store);
- Change = true;
+ if (combineCastStore(cast<IntrinsicInst>(Cast), Store)) {
+ DeadStores.push_back(Store);
+ Change = true;
+ }
}
for (auto *Store : DeadStores)
Store->eraseFromParent();
@@ -1085,8 +1088,14 @@ bool X86LowerAMXCast::combineAMXcast(TargetLibraryInfo *TLI) {
EraseInst(Vec2TileInsts);
EraseInst(Tile2VecInsts);
+ LLVM_DEBUG(dbgs() << "[LowerAMXTYpe][combineAMXcast] IR dump after combine "
+ "Vec2Tile and Tile2Vec:\n";
+ Func.dump());
Change |= combineLdSt(LiveCasts);
EraseInst(LiveCasts);
+ LLVM_DEBUG(dbgs() << "[LowerAMXTYpe][combineAMXcast] IR dump after combine "
+ "AMXCast and load/store:\n";
+ Func.dump());
// Handle the A->B->A cast, and there is an intervening PHI node.
for (BasicBlock &BB : Func) {
@@ -1114,6 +1123,9 @@ bool X86LowerAMXCast::combineAMXcast(TargetLibraryInfo *TLI) {
Instruction *I = DeadInst.pop_back_val();
Change |= DCEInstruction(I, DeadInst, TLI);
}
+ LLVM_DEBUG(dbgs() << "[LowerAMXTYpe][combineAMXcast] IR dump after "
+ "optimizeAMXCastFromPhi:\n";
+ Func.dump());
return Change;
}
diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp
index 6f89b2e79c45..ecab0c7e6179 100644
--- a/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -13,14 +13,17 @@
#include "MCTargetDesc/X86ATTInstPrinter.h"
#include "MCTargetDesc/X86BaseInfo.h"
+#include "MCTargetDesc/X86EncodingOptimization.h"
#include "MCTargetDesc/X86InstComments.h"
#include "MCTargetDesc/X86ShuffleDecode.h"
#include "MCTargetDesc/X86TargetStreamer.h"
#include "X86AsmPrinter.h"
+#include "X86MachineFunctionInfo.h"
#include "X86RegisterInfo.h"
#include "X86ShuffleDecodeConstantPool.h"
#include "X86Subtarget.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -108,8 +111,7 @@ void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst,
if (InShadow) {
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
- raw_svector_ostream VecOS(Code);
- CodeEmitter->encodeInstruction(Inst, VecOS, Fixups, STI);
+ CodeEmitter->encodeInstruction(Inst, Code, Fixups, STI);
CurrentShadowSize += Code.size();
if (CurrentShadowSize >= RequiredShadowSize)
InShadow = false; // The shadow is big enough. Stop counting.
@@ -320,109 +322,6 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
return MCOperand::createExpr(Expr);
}
-/// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
-/// a short fixed-register form.
-static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
- unsigned ImmOp = Inst.getNumOperands() - 1;
- assert(Inst.getOperand(0).isReg() &&
- (Inst.getOperand(ImmOp).isImm() || Inst.getOperand(ImmOp).isExpr()) &&
- ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
- Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
- Inst.getNumOperands() == 2) &&
- "Unexpected instruction!");
-
- // Check whether the destination register can be fixed.
- unsigned Reg = Inst.getOperand(0).getReg();
- if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
- return;
-
- // If so, rewrite the instruction.
- MCOperand Saved = Inst.getOperand(ImmOp);
- Inst = MCInst();
- Inst.setOpcode(Opcode);
- Inst.addOperand(Saved);
-}
-
-/// If a movsx instruction has a shorter encoding for the used register
-/// simplify the instruction to use it instead.
-static void SimplifyMOVSX(MCInst &Inst) {
- unsigned NewOpcode = 0;
- unsigned Op0 = Inst.getOperand(0).getReg(), Op1 = Inst.getOperand(1).getReg();
- switch (Inst.getOpcode()) {
- default:
- llvm_unreachable("Unexpected instruction!");
- case X86::MOVSX16rr8: // movsbw %al, %ax --> cbtw
- if (Op0 == X86::AX && Op1 == X86::AL)
- NewOpcode = X86::CBW;
- break;
- case X86::MOVSX32rr16: // movswl %ax, %eax --> cwtl
- if (Op0 == X86::EAX && Op1 == X86::AX)
- NewOpcode = X86::CWDE;
- break;
- case X86::MOVSX64rr32: // movslq %eax, %rax --> cltq
- if (Op0 == X86::RAX && Op1 == X86::EAX)
- NewOpcode = X86::CDQE;
- break;
- }
-
- if (NewOpcode != 0) {
- Inst = MCInst();
- Inst.setOpcode(NewOpcode);
- }
-}
-
-/// Simplify things like MOV32rm to MOV32o32a.
-static void SimplifyShortMoveForm(X86AsmPrinter &Printer, MCInst &Inst,
- unsigned Opcode) {
- // Don't make these simplifications in 64-bit mode; other assemblers don't
- // perform them because they make the code larger.
- if (Printer.getSubtarget().is64Bit())
- return;
-
- bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
- unsigned AddrBase = IsStore;
- unsigned RegOp = IsStore ? 0 : 5;
- unsigned AddrOp = AddrBase + 3;
- assert(
- Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
- Inst.getOperand(AddrBase + X86::AddrBaseReg).isReg() &&
- Inst.getOperand(AddrBase + X86::AddrScaleAmt).isImm() &&
- Inst.getOperand(AddrBase + X86::AddrIndexReg).isReg() &&
- Inst.getOperand(AddrBase + X86::AddrSegmentReg).isReg() &&
- (Inst.getOperand(AddrOp).isExpr() || Inst.getOperand(AddrOp).isImm()) &&
- "Unexpected instruction!");
-
- // Check whether the destination register can be fixed.
- unsigned Reg = Inst.getOperand(RegOp).getReg();
- if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
- return;
-
- // Check whether this is an absolute address.
- // FIXME: We know TLVP symbol refs aren't, but there should be a better way
- // to do this here.
- bool Absolute = true;
- if (Inst.getOperand(AddrOp).isExpr()) {
- const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
- if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
- if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
- Absolute = false;
- }
-
- if (Absolute &&
- (Inst.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||
- Inst.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||
- Inst.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))
- return;
-
- // If so, rewrite the instruction.
- MCOperand Saved = Inst.getOperand(AddrOp);
- MCOperand Seg = Inst.getOperand(AddrBase + X86::AddrSegmentReg);
- Inst = MCInst();
- Inst.setOpcode(Opcode);
- Inst.addOperand(Saved);
- Inst.addOperand(Seg);
-}
-
static unsigned getRetOpcode(const X86Subtarget &Subtarget) {
return Subtarget.is64Bit() ? X86::RET64 : X86::RET32;
}
@@ -502,6 +401,15 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
if (auto MaybeMCOp = LowerMachineOperand(MI, MO))
OutMI.addOperand(*MaybeMCOp);
+ bool In64BitMode = AsmPrinter.getSubtarget().is64Bit();
+ if (X86::optimizeInstFromVEX3ToVEX2(OutMI, MI->getDesc()) ||
+ X86::optimizeShiftRotateWithImmediateOne(OutMI) ||
+ X86::optimizeVPCMPWithImmediateOneOrSix(OutMI) ||
+ X86::optimizeMOVSX(OutMI) || X86::optimizeINCDEC(OutMI, In64BitMode) ||
+ X86::optimizeMOV(OutMI, In64BitMode) ||
+ X86::optimizeToFixedRegisterOrShortImmediateForm(OutMI))
+ return;
+
// Handle a few special cases to eliminate operand modifiers.
switch (OutMI.getOpcode()) {
case X86::LEA64_32r:
@@ -514,7 +422,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
assert(OutMI.getOperand(1 + X86::AddrSegmentReg).getReg() == 0 &&
"LEA has segment specified!");
break;
-
case X86::MULX32Hrr:
case X86::MULX32Hrm:
case X86::MULX64Hrr:
@@ -534,237 +441,6 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.insert(OutMI.begin(), MCOperand::createReg(DestReg));
break;
}
-
- // Commute operands to get a smaller encoding by using VEX.R instead of VEX.B
- // if one of the registers is extended, but other isn't.
- case X86::VMOVZPQILo2PQIrr:
- case X86::VMOVAPDrr:
- case X86::VMOVAPDYrr:
- case X86::VMOVAPSrr:
- case X86::VMOVAPSYrr:
- case X86::VMOVDQArr:
- case X86::VMOVDQAYrr:
- case X86::VMOVDQUrr:
- case X86::VMOVDQUYrr:
- case X86::VMOVUPDrr:
- case X86::VMOVUPDYrr:
- case X86::VMOVUPSrr:
- case X86::VMOVUPSYrr: {
- if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
- X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg())) {
- unsigned NewOpc;
- switch (OutMI.getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
- case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
- case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
- case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
- case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
- case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
- case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
- case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
- case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
- case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
- case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
- case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
- case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
- }
- OutMI.setOpcode(NewOpc);
- }
- break;
- }
- case X86::VMOVSDrr:
- case X86::VMOVSSrr: {
- if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(0).getReg()) &&
- X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
- unsigned NewOpc;
- switch (OutMI.getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
- case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
- }
- OutMI.setOpcode(NewOpc);
- }
- break;
- }
-
- case X86::VPCMPBZ128rmi: case X86::VPCMPBZ128rmik:
- case X86::VPCMPBZ128rri: case X86::VPCMPBZ128rrik:
- case X86::VPCMPBZ256rmi: case X86::VPCMPBZ256rmik:
- case X86::VPCMPBZ256rri: case X86::VPCMPBZ256rrik:
- case X86::VPCMPBZrmi: case X86::VPCMPBZrmik:
- case X86::VPCMPBZrri: case X86::VPCMPBZrrik:
- case X86::VPCMPDZ128rmi: case X86::VPCMPDZ128rmik:
- case X86::VPCMPDZ128rmib: case X86::VPCMPDZ128rmibk:
- case X86::VPCMPDZ128rri: case X86::VPCMPDZ128rrik:
- case X86::VPCMPDZ256rmi: case X86::VPCMPDZ256rmik:
- case X86::VPCMPDZ256rmib: case X86::VPCMPDZ256rmibk:
- case X86::VPCMPDZ256rri: case X86::VPCMPDZ256rrik:
- case X86::VPCMPDZrmi: case X86::VPCMPDZrmik:
- case X86::VPCMPDZrmib: case X86::VPCMPDZrmibk:
- case X86::VPCMPDZrri: case X86::VPCMPDZrrik:
- case X86::VPCMPQZ128rmi: case X86::VPCMPQZ128rmik:
- case X86::VPCMPQZ128rmib: case X86::VPCMPQZ128rmibk:
- case X86::VPCMPQZ128rri: case X86::VPCMPQZ128rrik:
- case X86::VPCMPQZ256rmi: case X86::VPCMPQZ256rmik:
- case X86::VPCMPQZ256rmib: case X86::VPCMPQZ256rmibk:
- case X86::VPCMPQZ256rri: case X86::VPCMPQZ256rrik:
- case X86::VPCMPQZrmi: case X86::VPCMPQZrmik:
- case X86::VPCMPQZrmib: case X86::VPCMPQZrmibk:
- case X86::VPCMPQZrri: case X86::VPCMPQZrrik:
- case X86::VPCMPWZ128rmi: case X86::VPCMPWZ128rmik:
- case X86::VPCMPWZ128rri: case X86::VPCMPWZ128rrik:
- case X86::VPCMPWZ256rmi: case X86::VPCMPWZ256rmik:
- case X86::VPCMPWZ256rri: case X86::VPCMPWZ256rrik:
- case X86::VPCMPWZrmi: case X86::VPCMPWZrmik:
- case X86::VPCMPWZrri: case X86::VPCMPWZrrik: {
- // Turn immediate 0 into the VPCMPEQ instruction.
- if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 0) {
- unsigned NewOpc;
- switch (OutMI.getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPEQBZ128rm; break;
- case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPEQBZ128rmk; break;
- case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPEQBZ128rr; break;
- case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPEQBZ128rrk; break;
- case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPEQBZ256rm; break;
- case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPEQBZ256rmk; break;
- case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPEQBZ256rr; break;
- case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPEQBZ256rrk; break;
- case X86::VPCMPBZrmi: NewOpc = X86::VPCMPEQBZrm; break;
- case X86::VPCMPBZrmik: NewOpc = X86::VPCMPEQBZrmk; break;
- case X86::VPCMPBZrri: NewOpc = X86::VPCMPEQBZrr; break;
- case X86::VPCMPBZrrik: NewOpc = X86::VPCMPEQBZrrk; break;
- case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPEQDZ128rm; break;
- case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPEQDZ128rmb; break;
- case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPEQDZ128rmbk; break;
- case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPEQDZ128rmk; break;
- case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPEQDZ128rr; break;
- case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPEQDZ128rrk; break;
- case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPEQDZ256rm; break;
- case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPEQDZ256rmb; break;
- case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPEQDZ256rmbk; break;
- case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPEQDZ256rmk; break;
- case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPEQDZ256rr; break;
- case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPEQDZ256rrk; break;
- case X86::VPCMPDZrmi: NewOpc = X86::VPCMPEQDZrm; break;
- case X86::VPCMPDZrmib: NewOpc = X86::VPCMPEQDZrmb; break;
- case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPEQDZrmbk; break;
- case X86::VPCMPDZrmik: NewOpc = X86::VPCMPEQDZrmk; break;
- case X86::VPCMPDZrri: NewOpc = X86::VPCMPEQDZrr; break;
- case X86::VPCMPDZrrik: NewOpc = X86::VPCMPEQDZrrk; break;
- case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPEQQZ128rm; break;
- case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPEQQZ128rmb; break;
- case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPEQQZ128rmbk; break;
- case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPEQQZ128rmk; break;
- case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPEQQZ128rr; break;
- case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPEQQZ128rrk; break;
- case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPEQQZ256rm; break;
- case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPEQQZ256rmb; break;
- case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPEQQZ256rmbk; break;
- case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPEQQZ256rmk; break;
- case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPEQQZ256rr; break;
- case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPEQQZ256rrk; break;
- case X86::VPCMPQZrmi: NewOpc = X86::VPCMPEQQZrm; break;
- case X86::VPCMPQZrmib: NewOpc = X86::VPCMPEQQZrmb; break;
- case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPEQQZrmbk; break;
- case X86::VPCMPQZrmik: NewOpc = X86::VPCMPEQQZrmk; break;
- case X86::VPCMPQZrri: NewOpc = X86::VPCMPEQQZrr; break;
- case X86::VPCMPQZrrik: NewOpc = X86::VPCMPEQQZrrk; break;
- case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPEQWZ128rm; break;
- case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPEQWZ128rmk; break;
- case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPEQWZ128rr; break;
- case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPEQWZ128rrk; break;
- case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPEQWZ256rm; break;
- case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPEQWZ256rmk; break;
- case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPEQWZ256rr; break;
- case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPEQWZ256rrk; break;
- case X86::VPCMPWZrmi: NewOpc = X86::VPCMPEQWZrm; break;
- case X86::VPCMPWZrmik: NewOpc = X86::VPCMPEQWZrmk; break;
- case X86::VPCMPWZrri: NewOpc = X86::VPCMPEQWZrr; break;
- case X86::VPCMPWZrrik: NewOpc = X86::VPCMPEQWZrrk; break;
- }
-
- OutMI.setOpcode(NewOpc);
- OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
- break;
- }
-
- // Turn immediate 6 into the VPCMPGT instruction.
- if (OutMI.getOperand(OutMI.getNumOperands() - 1).getImm() == 6) {
- unsigned NewOpc;
- switch (OutMI.getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::VPCMPBZ128rmi: NewOpc = X86::VPCMPGTBZ128rm; break;
- case X86::VPCMPBZ128rmik: NewOpc = X86::VPCMPGTBZ128rmk; break;
- case X86::VPCMPBZ128rri: NewOpc = X86::VPCMPGTBZ128rr; break;
- case X86::VPCMPBZ128rrik: NewOpc = X86::VPCMPGTBZ128rrk; break;
- case X86::VPCMPBZ256rmi: NewOpc = X86::VPCMPGTBZ256rm; break;
- case X86::VPCMPBZ256rmik: NewOpc = X86::VPCMPGTBZ256rmk; break;
- case X86::VPCMPBZ256rri: NewOpc = X86::VPCMPGTBZ256rr; break;
- case X86::VPCMPBZ256rrik: NewOpc = X86::VPCMPGTBZ256rrk; break;
- case X86::VPCMPBZrmi: NewOpc = X86::VPCMPGTBZrm; break;
- case X86::VPCMPBZrmik: NewOpc = X86::VPCMPGTBZrmk; break;
- case X86::VPCMPBZrri: NewOpc = X86::VPCMPGTBZrr; break;
- case X86::VPCMPBZrrik: NewOpc = X86::VPCMPGTBZrrk; break;
- case X86::VPCMPDZ128rmi: NewOpc = X86::VPCMPGTDZ128rm; break;
- case X86::VPCMPDZ128rmib: NewOpc = X86::VPCMPGTDZ128rmb; break;
- case X86::VPCMPDZ128rmibk: NewOpc = X86::VPCMPGTDZ128rmbk; break;
- case X86::VPCMPDZ128rmik: NewOpc = X86::VPCMPGTDZ128rmk; break;
- case X86::VPCMPDZ128rri: NewOpc = X86::VPCMPGTDZ128rr; break;
- case X86::VPCMPDZ128rrik: NewOpc = X86::VPCMPGTDZ128rrk; break;
- case X86::VPCMPDZ256rmi: NewOpc = X86::VPCMPGTDZ256rm; break;
- case X86::VPCMPDZ256rmib: NewOpc = X86::VPCMPGTDZ256rmb; break;
- case X86::VPCMPDZ256rmibk: NewOpc = X86::VPCMPGTDZ256rmbk; break;
- case X86::VPCMPDZ256rmik: NewOpc = X86::VPCMPGTDZ256rmk; break;
- case X86::VPCMPDZ256rri: NewOpc = X86::VPCMPGTDZ256rr; break;
- case X86::VPCMPDZ256rrik: NewOpc = X86::VPCMPGTDZ256rrk; break;
- case X86::VPCMPDZrmi: NewOpc = X86::VPCMPGTDZrm; break;
- case X86::VPCMPDZrmib: NewOpc = X86::VPCMPGTDZrmb; break;
- case X86::VPCMPDZrmibk: NewOpc = X86::VPCMPGTDZrmbk; break;
- case X86::VPCMPDZrmik: NewOpc = X86::VPCMPGTDZrmk; break;
- case X86::VPCMPDZrri: NewOpc = X86::VPCMPGTDZrr; break;
- case X86::VPCMPDZrrik: NewOpc = X86::VPCMPGTDZrrk; break;
- case X86::VPCMPQZ128rmi: NewOpc = X86::VPCMPGTQZ128rm; break;
- case X86::VPCMPQZ128rmib: NewOpc = X86::VPCMPGTQZ128rmb; break;
- case X86::VPCMPQZ128rmibk: NewOpc = X86::VPCMPGTQZ128rmbk; break;
- case X86::VPCMPQZ128rmik: NewOpc = X86::VPCMPGTQZ128rmk; break;
- case X86::VPCMPQZ128rri: NewOpc = X86::VPCMPGTQZ128rr; break;
- case X86::VPCMPQZ128rrik: NewOpc = X86::VPCMPGTQZ128rrk; break;
- case X86::VPCMPQZ256rmi: NewOpc = X86::VPCMPGTQZ256rm; break;
- case X86::VPCMPQZ256rmib: NewOpc = X86::VPCMPGTQZ256rmb; break;
- case X86::VPCMPQZ256rmibk: NewOpc = X86::VPCMPGTQZ256rmbk; break;
- case X86::VPCMPQZ256rmik: NewOpc = X86::VPCMPGTQZ256rmk; break;
- case X86::VPCMPQZ256rri: NewOpc = X86::VPCMPGTQZ256rr; break;
- case X86::VPCMPQZ256rrik: NewOpc = X86::VPCMPGTQZ256rrk; break;
- case X86::VPCMPQZrmi: NewOpc = X86::VPCMPGTQZrm; break;
- case X86::VPCMPQZrmib: NewOpc = X86::VPCMPGTQZrmb; break;
- case X86::VPCMPQZrmibk: NewOpc = X86::VPCMPGTQZrmbk; break;
- case X86::VPCMPQZrmik: NewOpc = X86::VPCMPGTQZrmk; break;
- case X86::VPCMPQZrri: NewOpc = X86::VPCMPGTQZrr; break;
- case X86::VPCMPQZrrik: NewOpc = X86::VPCMPGTQZrrk; break;
- case X86::VPCMPWZ128rmi: NewOpc = X86::VPCMPGTWZ128rm; break;
- case X86::VPCMPWZ128rmik: NewOpc = X86::VPCMPGTWZ128rmk; break;
- case X86::VPCMPWZ128rri: NewOpc = X86::VPCMPGTWZ128rr; break;
- case X86::VPCMPWZ128rrik: NewOpc = X86::VPCMPGTWZ128rrk; break;
- case X86::VPCMPWZ256rmi: NewOpc = X86::VPCMPGTWZ256rm; break;
- case X86::VPCMPWZ256rmik: NewOpc = X86::VPCMPGTWZ256rmk; break;
- case X86::VPCMPWZ256rri: NewOpc = X86::VPCMPGTWZ256rr; break;
- case X86::VPCMPWZ256rrik: NewOpc = X86::VPCMPGTWZ256rrk; break;
- case X86::VPCMPWZrmi: NewOpc = X86::VPCMPGTWZrm; break;
- case X86::VPCMPWZrmik: NewOpc = X86::VPCMPGTWZrmk; break;
- case X86::VPCMPWZrri: NewOpc = X86::VPCMPGTWZrr; break;
- case X86::VPCMPWZrrik: NewOpc = X86::VPCMPGTWZrrk; break;
- }
-
- OutMI.setOpcode(NewOpc);
- OutMI.erase(&OutMI.getOperand(OutMI.getNumOperands() - 1));
- break;
- }
-
- break;
- }
-
// CALL64r, CALL64pcrel32 - These instructions used to have
// register inputs modeled as normal uses instead of implicit uses. As such,
// they we used to truncate off all but the first operand (the callee). This
@@ -773,31 +449,27 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case X86::CALL64pcrel32:
assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
break;
-
case X86::EH_RETURN:
case X86::EH_RETURN64: {
OutMI = MCInst();
OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
break;
}
-
case X86::CLEANUPRET: {
// Replace CLEANUPRET with the appropriate RET.
OutMI = MCInst();
OutMI.setOpcode(getRetOpcode(AsmPrinter.getSubtarget()));
break;
}
-
case X86::CATCHRET: {
// Replace CATCHRET with the appropriate RET.
const X86Subtarget &Subtarget = AsmPrinter.getSubtarget();
- unsigned ReturnReg = Subtarget.is64Bit() ? X86::RAX : X86::EAX;
+ unsigned ReturnReg = In64BitMode ? X86::RAX : X86::EAX;
OutMI = MCInst();
OutMI.setOpcode(getRetOpcode(Subtarget));
OutMI.addOperand(MCOperand::createReg(ReturnReg));
break;
}
-
// TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump
// instruction.
case X86::TAILJMPr:
@@ -808,13 +480,11 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
assert(OutMI.getNumOperands() == 1 && "Unexpected number of operands!");
OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
break;
-
case X86::TAILJMPd_CC:
case X86::TAILJMPd64_CC:
assert(OutMI.getNumOperands() == 2 && "Unexpected number of operands!");
OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
break;
-
case X86::TAILJMPm:
case X86::TAILJMPm64:
case X86::TAILJMPm64_REX:
@@ -822,176 +492,28 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
"Unexpected number of operands!");
OutMI.setOpcode(convertTailJumpOpcode(OutMI.getOpcode()));
break;
-
- case X86::DEC16r:
- case X86::DEC32r:
- case X86::INC16r:
- case X86::INC32r:
- // If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.
- if (!AsmPrinter.getSubtarget().is64Bit()) {
- unsigned Opcode;
- switch (OutMI.getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::DEC16r: Opcode = X86::DEC16r_alt; break;
- case X86::DEC32r: Opcode = X86::DEC32r_alt; break;
- case X86::INC16r: Opcode = X86::INC16r_alt; break;
- case X86::INC32r: Opcode = X86::INC32r_alt; break;
- }
- OutMI.setOpcode(Opcode);
- }
- break;
-
- // We don't currently select the correct instruction form for instructions
- // which have a short %eax, etc. form. Handle this by custom lowering, for
- // now.
- //
- // Note, we are currently not handling the following instructions:
- // MOV64ao8, MOV64o8a
- // XCHG16ar, XCHG32ar, XCHG64ar
- case X86::MOV8mr_NOREX:
- case X86::MOV8mr:
- case X86::MOV8rm_NOREX:
- case X86::MOV8rm:
- case X86::MOV16mr:
- case X86::MOV16rm:
- case X86::MOV32mr:
- case X86::MOV32rm: {
- unsigned NewOpc;
- switch (OutMI.getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::MOV8mr_NOREX:
- case X86::MOV8mr: NewOpc = X86::MOV8o32a; break;
- case X86::MOV8rm_NOREX:
- case X86::MOV8rm: NewOpc = X86::MOV8ao32; break;
- case X86::MOV16mr: NewOpc = X86::MOV16o32a; break;
- case X86::MOV16rm: NewOpc = X86::MOV16ao32; break;
- case X86::MOV32mr: NewOpc = X86::MOV32o32a; break;
- case X86::MOV32rm: NewOpc = X86::MOV32ao32; break;
- }
- SimplifyShortMoveForm(AsmPrinter, OutMI, NewOpc);
- break;
- }
-
- case X86::ADC8ri: case X86::ADC16ri: case X86::ADC32ri: case X86::ADC64ri32:
- case X86::ADD8ri: case X86::ADD16ri: case X86::ADD32ri: case X86::ADD64ri32:
- case X86::AND8ri: case X86::AND16ri: case X86::AND32ri: case X86::AND64ri32:
- case X86::CMP8ri: case X86::CMP16ri: case X86::CMP32ri: case X86::CMP64ri32:
- case X86::OR8ri: case X86::OR16ri: case X86::OR32ri: case X86::OR64ri32:
- case X86::SBB8ri: case X86::SBB16ri: case X86::SBB32ri: case X86::SBB64ri32:
- case X86::SUB8ri: case X86::SUB16ri: case X86::SUB32ri: case X86::SUB64ri32:
- case X86::TEST8ri:case X86::TEST16ri:case X86::TEST32ri:case X86::TEST64ri32:
- case X86::XOR8ri: case X86::XOR16ri: case X86::XOR32ri: case X86::XOR64ri32: {
- unsigned NewOpc;
- switch (OutMI.getOpcode()) {
- default: llvm_unreachable("Invalid opcode");
- case X86::ADC8ri: NewOpc = X86::ADC8i8; break;
- case X86::ADC16ri: NewOpc = X86::ADC16i16; break;
- case X86::ADC32ri: NewOpc = X86::ADC32i32; break;
- case X86::ADC64ri32: NewOpc = X86::ADC64i32; break;
- case X86::ADD8ri: NewOpc = X86::ADD8i8; break;
- case X86::ADD16ri: NewOpc = X86::ADD16i16; break;
- case X86::ADD32ri: NewOpc = X86::ADD32i32; break;
- case X86::ADD64ri32: NewOpc = X86::ADD64i32; break;
- case X86::AND8ri: NewOpc = X86::AND8i8; break;
- case X86::AND16ri: NewOpc = X86::AND16i16; break;
- case X86::AND32ri: NewOpc = X86::AND32i32; break;
- case X86::AND64ri32: NewOpc = X86::AND64i32; break;
- case X86::CMP8ri: NewOpc = X86::CMP8i8; break;
- case X86::CMP16ri: NewOpc = X86::CMP16i16; break;
- case X86::CMP32ri: NewOpc = X86::CMP32i32; break;
- case X86::CMP64ri32: NewOpc = X86::CMP64i32; break;
- case X86::OR8ri: NewOpc = X86::OR8i8; break;
- case X86::OR16ri: NewOpc = X86::OR16i16; break;
- case X86::OR32ri: NewOpc = X86::OR32i32; break;
- case X86::OR64ri32: NewOpc = X86::OR64i32; break;
- case X86::SBB8ri: NewOpc = X86::SBB8i8; break;
- case X86::SBB16ri: NewOpc = X86::SBB16i16; break;
- case X86::SBB32ri: NewOpc = X86::SBB32i32; break;
- case X86::SBB64ri32: NewOpc = X86::SBB64i32; break;
- case X86::SUB8ri: NewOpc = X86::SUB8i8; break;
- case X86::SUB16ri: NewOpc = X86::SUB16i16; break;
- case X86::SUB32ri: NewOpc = X86::SUB32i32; break;
- case X86::SUB64ri32: NewOpc = X86::SUB64i32; break;
- case X86::TEST8ri: NewOpc = X86::TEST8i8; break;
- case X86::TEST16ri: NewOpc = X86::TEST16i16; break;
- case X86::TEST32ri: NewOpc = X86::TEST32i32; break;
- case X86::TEST64ri32: NewOpc = X86::TEST64i32; break;
- case X86::XOR8ri: NewOpc = X86::XOR8i8; break;
- case X86::XOR16ri: NewOpc = X86::XOR16i16; break;
- case X86::XOR32ri: NewOpc = X86::XOR32i32; break;
- case X86::XOR64ri32: NewOpc = X86::XOR64i32; break;
- }
- SimplifyShortImmForm(OutMI, NewOpc);
- break;
- }
-
- // Try to shrink some forms of movsx.
- case X86::MOVSX16rr8:
- case X86::MOVSX32rr16:
- case X86::MOVSX64rr32:
- SimplifyMOVSX(OutMI);
- break;
-
- case X86::VCMPPDrri:
- case X86::VCMPPDYrri:
- case X86::VCMPPSrri:
- case X86::VCMPPSYrri:
- case X86::VCMPSDrr:
- case X86::VCMPSSrr: {
- // Swap the operands if it will enable a 2 byte VEX encoding.
- // FIXME: Change the immediate to improve opportunities?
- if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
- X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg())) {
- unsigned Imm = MI->getOperand(3).getImm() & 0x7;
- switch (Imm) {
- default: break;
- case 0x00: // EQUAL
- case 0x03: // UNORDERED
- case 0x04: // NOT EQUAL
- case 0x07: // ORDERED
- std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
- break;
- }
- }
- break;
- }
-
- case X86::VMOVHLPSrr:
- case X86::VUNPCKHPDrr:
- // These are not truly commutable so hide them from the default case.
- break;
-
case X86::MASKMOVDQU:
case X86::VMASKMOVDQU:
- if (AsmPrinter.getSubtarget().is64Bit())
+ if (In64BitMode)
OutMI.setFlags(X86::IP_HAS_AD_SIZE);
break;
-
- default: {
- // If the instruction is a commutable arithmetic instruction we might be
- // able to commute the operands to get a 2 byte VEX prefix.
- uint64_t TSFlags = MI->getDesc().TSFlags;
- if (MI->getDesc().isCommutable() &&
- (TSFlags & X86II::EncodingMask) == X86II::VEX &&
- (TSFlags & X86II::OpMapMask) == X86II::TB &&
- (TSFlags & X86II::FormMask) == X86II::MRMSrcReg &&
- !(TSFlags & X86II::VEX_W) && (TSFlags & X86II::VEX_4V) &&
- OutMI.getNumOperands() == 3) {
- if (!X86II::isX86_64ExtendedReg(OutMI.getOperand(1).getReg()) &&
- X86II::isX86_64ExtendedReg(OutMI.getOperand(2).getReg()))
- std::swap(OutMI.getOperand(1), OutMI.getOperand(2));
- }
+ case X86::BSF16rm:
+ case X86::BSF16rr:
+ case X86::BSF32rm:
+ case X86::BSF32rr:
+ case X86::BSF64rm:
+ case X86::BSF64rr: {
// Add an REP prefix to BSF instructions so that new processors can
// recognize as TZCNT, which has better performance than BSF.
- if (X86::isBSF(OutMI.getOpcode()) && !MF.getFunction().hasOptSize()) {
- // BSF and TZCNT have different interpretations on ZF bit. So make sure
- // it won't be used later.
- const MachineOperand *FlagDef = MI->findRegisterDefOperand(X86::EFLAGS);
- if (FlagDef && FlagDef->isDead())
- OutMI.setFlags(X86::IP_HAS_REPEAT);
- }
+ // BSF and TZCNT have different interpretations on ZF bit. So make sure
+ // it won't be used later.
+ const MachineOperand *FlagDef = MI->findRegisterDefOperand(X86::EFLAGS);
+ if (!MF.getFunction().hasOptSize() && FlagDef && FlagDef->isDead())
+ OutMI.setFlags(X86::IP_HAS_REPEAT);
break;
}
+ default:
+ break;
}
}
@@ -1446,8 +968,7 @@ void X86AsmPrinter::LowerPATCHABLE_OP(const MachineInstr &MI,
SmallString<256> Code;
if (!EmptyInst) {
SmallVector<MCFixup, 4> Fixups;
- raw_svector_ostream VecOS(Code);
- CodeEmitter->encodeInstruction(MCI, VecOS, Fixups, getSubtargetInfo());
+ CodeEmitter->encodeInstruction(MCI, Code, Fixups, getSubtargetInfo());
}
if (Code.size() < MinSize) {
@@ -1608,6 +1129,7 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI,
if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) {
assert(Op->isReg() && "Only support arguments in registers");
SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
+ assert(SrcRegs[I].isValid() && "Invalid operand");
if (SrcRegs[I] != DestRegs[I]) {
UsedMask[I] = true;
EmitAndCountInstruction(
@@ -1706,6 +1228,7 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI,
// TODO: Is register only support adequate?
assert(Op->isReg() && "Only supports arguments in registers");
SrcRegs[I] = getX86SubSuperRegister(Op->getReg(), 64);
+ assert(SrcRegs[I].isValid() && "Invalid operand");
if (SrcRegs[I] != DestRegs[I]) {
UsedMask[I] = true;
EmitAndCountInstruction(
@@ -2001,13 +1524,31 @@ static void printConstant(const APFloat &Flt, raw_ostream &CS) {
CS << Str;
}
-static void printConstant(const Constant *COp, raw_ostream &CS) {
+static void printConstant(const Constant *COp, unsigned BitWidth,
+ raw_ostream &CS) {
if (isa<UndefValue>(COp)) {
CS << "u";
} else if (auto *CI = dyn_cast<ConstantInt>(COp)) {
printConstant(CI->getValue(), CS);
} else if (auto *CF = dyn_cast<ConstantFP>(COp)) {
printConstant(CF->getValueAPF(), CS);
+ } else if (auto *CDS = dyn_cast<ConstantDataSequential>(COp)) {
+ Type *EltTy = CDS->getElementType();
+ bool IsInteger = EltTy->isIntegerTy();
+ bool IsFP = EltTy->isHalfTy() || EltTy->isFloatTy() || EltTy->isDoubleTy();
+ unsigned EltBits = EltTy->getPrimitiveSizeInBits();
+ unsigned E = std::min(BitWidth / EltBits, CDS->getNumElements());
+ assert((BitWidth % EltBits) == 0 && "Broadcast element size mismatch");
+ for (unsigned I = 0; I != E; ++I) {
+ if (I != 0)
+ CS << ",";
+ if (IsInteger)
+ printConstant(CDS->getElementAsAPInt(I), CS);
+ else if (IsFP)
+ printConstant(CDS->getElementAsAPFloat(I), CS);
+ else
+ CS << "?";
+ }
} else {
CS << "?";
}
@@ -2015,7 +1556,8 @@ static void printConstant(const Constant *COp, raw_ostream &CS) {
void X86AsmPrinter::EmitSEHInstruction(const MachineInstr *MI) {
assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
- assert(getSubtarget().isOSWindows() && "SEH_ instruction Windows only");
+ assert((getSubtarget().isOSWindows() || TM.getTargetTriple().isUEFI()) &&
+ "SEH_ instruction Windows and UEFI only");
// Use the .cv_fpo directives if we're emitting CodeView on 32-bit x86.
if (EmitFPOData) {
@@ -2378,7 +1920,8 @@ static void addConstantComments(const MachineInstr *MI,
++i) {
if (i != 0 || l != 0)
CS << ",";
- printConstant(CV->getOperand(i), CS);
+ printConstant(CV->getOperand(i),
+ CV->getType()->getPrimitiveSizeInBits(), CS);
}
}
CS << ">";
@@ -2421,40 +1964,40 @@ static void addConstantComments(const MachineInstr *MI,
assert(MI->getNumOperands() >= (1 + X86::AddrNumOperands) &&
"Unexpected number of operands!");
if (auto *C = getConstantFromPool(*MI, MI->getOperand(1 + X86::AddrDisp))) {
- int NumElts;
+ int NumElts, EltBits;
switch (MI->getOpcode()) {
default: llvm_unreachable("Invalid opcode");
- case X86::MOVDDUPrm: NumElts = 2; break;
- case X86::VMOVDDUPrm: NumElts = 2; break;
- case X86::VMOVDDUPZ128rm: NumElts = 2; break;
- case X86::VBROADCASTSSrm: NumElts = 4; break;
- case X86::VBROADCASTSSYrm: NumElts = 8; break;
- case X86::VBROADCASTSSZ128rm: NumElts = 4; break;
- case X86::VBROADCASTSSZ256rm: NumElts = 8; break;
- case X86::VBROADCASTSSZrm: NumElts = 16; break;
- case X86::VBROADCASTSDYrm: NumElts = 4; break;
- case X86::VBROADCASTSDZ256rm: NumElts = 4; break;
- case X86::VBROADCASTSDZrm: NumElts = 8; break;
- case X86::VPBROADCASTBrm: NumElts = 16; break;
- case X86::VPBROADCASTBYrm: NumElts = 32; break;
- case X86::VPBROADCASTBZ128rm: NumElts = 16; break;
- case X86::VPBROADCASTBZ256rm: NumElts = 32; break;
- case X86::VPBROADCASTBZrm: NumElts = 64; break;
- case X86::VPBROADCASTDrm: NumElts = 4; break;
- case X86::VPBROADCASTDYrm: NumElts = 8; break;
- case X86::VPBROADCASTDZ128rm: NumElts = 4; break;
- case X86::VPBROADCASTDZ256rm: NumElts = 8; break;
- case X86::VPBROADCASTDZrm: NumElts = 16; break;
- case X86::VPBROADCASTQrm: NumElts = 2; break;
- case X86::VPBROADCASTQYrm: NumElts = 4; break;
- case X86::VPBROADCASTQZ128rm: NumElts = 2; break;
- case X86::VPBROADCASTQZ256rm: NumElts = 4; break;
- case X86::VPBROADCASTQZrm: NumElts = 8; break;
- case X86::VPBROADCASTWrm: NumElts = 8; break;
- case X86::VPBROADCASTWYrm: NumElts = 16; break;
- case X86::VPBROADCASTWZ128rm: NumElts = 8; break;
- case X86::VPBROADCASTWZ256rm: NumElts = 16; break;
- case X86::VPBROADCASTWZrm: NumElts = 32; break;
+ case X86::MOVDDUPrm: NumElts = 2; EltBits = 64; break;
+ case X86::VMOVDDUPrm: NumElts = 2; EltBits = 64; break;
+ case X86::VMOVDDUPZ128rm: NumElts = 2; EltBits = 64; break;
+ case X86::VBROADCASTSSrm: NumElts = 4; EltBits = 32; break;
+ case X86::VBROADCASTSSYrm: NumElts = 8; EltBits = 32; break;
+ case X86::VBROADCASTSSZ128rm: NumElts = 4; EltBits = 32; break;
+ case X86::VBROADCASTSSZ256rm: NumElts = 8; EltBits = 32; break;
+ case X86::VBROADCASTSSZrm: NumElts = 16; EltBits = 32; break;
+ case X86::VBROADCASTSDYrm: NumElts = 4; EltBits = 64; break;
+ case X86::VBROADCASTSDZ256rm: NumElts = 4; EltBits = 64; break;
+ case X86::VBROADCASTSDZrm: NumElts = 8; EltBits = 64; break;
+ case X86::VPBROADCASTBrm: NumElts = 16; EltBits = 8; break;
+ case X86::VPBROADCASTBYrm: NumElts = 32; EltBits = 8; break;
+ case X86::VPBROADCASTBZ128rm: NumElts = 16; EltBits = 8; break;
+ case X86::VPBROADCASTBZ256rm: NumElts = 32; EltBits = 8; break;
+ case X86::VPBROADCASTBZrm: NumElts = 64; EltBits = 8; break;
+ case X86::VPBROADCASTDrm: NumElts = 4; EltBits = 32; break;
+ case X86::VPBROADCASTDYrm: NumElts = 8; EltBits = 32; break;
+ case X86::VPBROADCASTDZ128rm: NumElts = 4; EltBits = 32; break;
+ case X86::VPBROADCASTDZ256rm: NumElts = 8; EltBits = 32; break;
+ case X86::VPBROADCASTDZrm: NumElts = 16; EltBits = 32; break;
+ case X86::VPBROADCASTQrm: NumElts = 2; EltBits = 64; break;
+ case X86::VPBROADCASTQYrm: NumElts = 4; EltBits = 64; break;
+ case X86::VPBROADCASTQZ128rm: NumElts = 2; EltBits = 64; break;
+ case X86::VPBROADCASTQZ256rm: NumElts = 4; EltBits = 64; break;
+ case X86::VPBROADCASTQZrm: NumElts = 8; EltBits = 64; break;
+ case X86::VPBROADCASTWrm: NumElts = 8; EltBits = 16; break;
+ case X86::VPBROADCASTWYrm: NumElts = 16; EltBits = 16; break;
+ case X86::VPBROADCASTWZ128rm: NumElts = 8; EltBits = 16; break;
+ case X86::VPBROADCASTWZ256rm: NumElts = 16; EltBits = 16; break;
+ case X86::VPBROADCASTWZrm: NumElts = 32; EltBits = 16; break;
}
std::string Comment;
@@ -2465,7 +2008,7 @@ static void addConstantComments(const MachineInstr *MI,
for (int i = 0; i != NumElts; ++i) {
if (i != 0)
CS << ",";
- printConstant(C, CS);
+ printConstant(C, EltBits, CS);
}
CS << "]";
OutStreamer.AddComment(CS.str());
@@ -2597,6 +2140,7 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
if (HasActiveDwarfFrame && !hasFP) {
OutStreamer->emitCFIAdjustCfaOffset(-stackGrowth);
+ MF->getInfo<X86MachineFunctionInfo>()->setHasCFIAdjustCfa(true);
}
// Emit the label.
@@ -2712,9 +2256,10 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
for (MBBI = PrevCrossBBInst(MBBI);
MBBI != MachineBasicBlock::const_iterator();
MBBI = PrevCrossBBInst(MBBI)) {
- // Conservatively assume that pseudo instructions don't emit code and keep
- // looking for a call. We may emit an unnecessary nop in some cases.
- if (!MBBI->isPseudo()) {
+ // Pseudo instructions that aren't a call are assumed to not emit any
+ // code. If they do, we worst case generate unnecessary noops after a
+ // call.
+ if (MBBI->isCall() || !MBBI->isPseudo()) {
if (MBBI->isCall())
EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
break;
diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 6d7cf165699b..9b2cc35c57e0 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -117,6 +117,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// determine if we should insert tilerelease in frame lowering.
bool HasVirtualTileReg = false;
+ /// True if this function has CFI directives that adjust the CFA.
+ /// This is used to determine if we should direct the debugger to use
+ /// the CFA instead of the stack pointer.
+ bool HasCFIAdjustCfa = false;
+
+ MachineInstr *StackPtrSaveMI = nullptr;
+
std::optional<int> SwiftAsyncContextFrameIdx;
// Preallocated fields are only used during isel.
@@ -149,6 +156,9 @@ public:
bool getRestoreBasePointer() const { return RestoreBasePointerOffset!=0; }
void setRestoreBasePointer(const MachineFunction *MF);
+ void setRestoreBasePointer(unsigned CalleeSavedFrameSize) {
+ RestoreBasePointerOffset = -CalleeSavedFrameSize;
+ }
int getRestoreBasePointerOffset() const {return RestoreBasePointerOffset; }
DenseMap<int, unsigned>& getWinEHXMMSlotInfo() { return WinEHXMMSlotInfo; }
@@ -222,6 +232,12 @@ public:
bool hasVirtualTileReg() const { return HasVirtualTileReg; }
void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; }
+ bool hasCFIAdjustCfa() const { return HasCFIAdjustCfa; }
+ void setHasCFIAdjustCfa(bool v) { HasCFIAdjustCfa = v; }
+
+ void setStackPtrSaveMI(MachineInstr *MI) { StackPtrSaveMI = MI; }
+ MachineInstr *getStackPtrSaveMI() const { return StackPtrSaveMI; }
+
std::optional<int> getSwiftAsyncContextFrameIdx() const {
return SwiftAsyncContextFrameIdx;
}
diff --git a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index e0018a0ea58b..3172896a8f60 100644
--- a/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -321,8 +321,7 @@ int X86OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
// presented in InstrPos.
assert(Last.getParent() == First.getParent() &&
"Instructions are in different basic blocks");
- assert(InstrPos.find(&First) != InstrPos.end() &&
- InstrPos.find(&Last) != InstrPos.end() &&
+ assert(InstrPos.contains(&First) && InstrPos.contains(&Last) &&
"Instructions' positions are undefined");
return InstrPos[&Last] - InstrPos[&First];
diff --git a/llvm/lib/Target/X86/X86PartialReduction.cpp b/llvm/lib/Target/X86/X86PartialReduction.cpp
index c760a32e2579..a11be9507cea 100644
--- a/llvm/lib/Target/X86/X86PartialReduction.cpp
+++ b/llvm/lib/Target/X86/X86PartialReduction.cpp
@@ -33,8 +33,8 @@ using namespace llvm;
namespace {
class X86PartialReduction : public FunctionPass {
- const DataLayout *DL;
- const X86Subtarget *ST;
+ const DataLayout *DL = nullptr;
+ const X86Subtarget *ST = nullptr;
public:
static char ID; // Pass identification, replacement for typeid.
diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
index d2460e12b005..49ef6efc6aec 100644
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@@ -290,4 +290,17 @@ def ZnVer3PfmCounters : ProcPfmCounters {
];
}
def : PfmCountersBinding<"znver3", ZnVer3PfmCounters>;
-def : PfmCountersBinding<"znver4", ZnVer3PfmCounters>;
+
+def ZnVer4PfmCounters : ProcPfmCounters {
+ let CycleCounter = PfmCounter<"cycles_not_in_halt">;
+ let UopsCounter = PfmCounter<"retired_ops">;
+ let IssueCounters = [
+ PfmIssueCounter<"Zn4Int", "ops_type_dispatched_from_decoder:int_disp_retire_mode">,
+ PfmIssueCounter<"Zn4FPU", "ops_type_dispatched_from_decoder:fp_disp_retire_mode">,
+ PfmIssueCounter<"Zn4Load", "ls_dispatch:ld_dispatch">,
+ PfmIssueCounter<"Zn4Store", "ls_dispatch:store_dispatch">,
+ PfmIssueCounter<"Zn4Divider", "div_op_count">,
+ PfmIssueCounter<"Zn4AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">
+ ];
+}
+def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;
diff --git a/llvm/lib/Target/X86/X86PreAMXConfig.cpp b/llvm/lib/Target/X86/X86PreAMXConfig.cpp
index 2429b85cf868..c9c59af8d6d7 100644
--- a/llvm/lib/Target/X86/X86PreAMXConfig.cpp
+++ b/llvm/lib/Target/X86/X86PreAMXConfig.cpp
@@ -36,6 +36,7 @@
//
#include "X86.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
diff --git a/llvm/lib/Target/X86/X86PreTileConfig.cpp b/llvm/lib/Target/X86/X86PreTileConfig.cpp
index 479db8585ca0..a382db493fd4 100644
--- a/llvm/lib/Target/X86/X86PreTileConfig.cpp
+++ b/llvm/lib/Target/X86/X86PreTileConfig.cpp
@@ -43,12 +43,10 @@ using namespace llvm;
#define DEBUG_TYPE "tile-pre-config"
static void emitErrorMsg(MachineFunction &MF) {
- SmallString<32> Str;
- Twine ErrorMsg =
- MF.getName() +
- ": Failed to config tile register, please define the shape earlier";
LLVMContext &Context = MF.getMMI().getModule()->getContext();
- Context.emitError(ErrorMsg);
+ Context.emitError(
+ MF.getName() +
+ ": Failed to config tile register, please define the shape earlier");
}
namespace {
@@ -98,8 +96,8 @@ struct BBInfo {
};
class X86PreTileConfig : public MachineFunctionPass {
- MachineRegisterInfo *MRI;
- const MachineLoopInfo *MLI;
+ MachineRegisterInfo *MRI = nullptr;
+ const MachineLoopInfo *MLI = nullptr;
SmallSet<MachineInstr *, 8> DefVisited;
DenseMap<MachineBasicBlock *, BBInfo> BBVisitedInfo;
DenseMap<MachineBasicBlock *, SmallVector<MIRef, 8>> ShapeBBs;
diff --git a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
index 733db70f14a2..3160969e81e4 100644
--- a/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterBankInfo.cpp
@@ -36,7 +36,8 @@ X86RegisterBankInfo::X86RegisterBankInfo(const TargetRegisterInfo &TRI) {
// GR64 + its subclasses.
assert(RBGPR.covers(*TRI.getRegClass(X86::GR64RegClassID)) &&
"Subclass not added?");
- assert(RBGPR.getSize() == 64 && "GPRs should hold up to 64-bit");
+ assert(getMaximumSize(RBGPR.getID()) == 64 &&
+ "GPRs should hold up to 64-bit");
}
const RegisterBank &
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 7ce3dca7f3a7..bd29e9317ca5 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -329,8 +329,6 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_Intel_OCL_BI_SaveList;
break;
}
- case CallingConv::HHVM:
- return CSR_64_HHVM_SaveList;
case CallingConv::X86_RegCall:
if (Is64Bit) {
if (IsWin64) {
@@ -451,8 +449,6 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
return CSR_64_Intel_OCL_BI_RegMask;
break;
}
- case CallingConv::HHVM:
- return CSR_64_HHVM_RegMask;
case CallingConv::X86_RegCall:
if (Is64Bit) {
if (IsWin64) {
@@ -706,6 +702,11 @@ static bool CantUseSP(const MachineFrameInfo &MFI) {
bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ // We have a virtual register to reference argument, and don't need base
+ // pointer.
+ if (X86FI->getStackPtrSaveMI() != nullptr)
+ return false;
+
if (X86FI->hasPreallocatedCall())
return true;
@@ -742,6 +743,13 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
return true;
}
+bool X86RegisterInfo::shouldRealignStack(const MachineFunction &MF) const {
+ if (TargetRegisterInfo::shouldRealignStack(MF))
+ return true;
+
+ return !Is64Bit && MF.getFunction().getCallingConv() == CallingConv::X86_INTR;
+}
+
// tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
// of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
// TODO: In this case we should be really trying first to entirely eliminate
@@ -782,6 +790,45 @@ static bool isFuncletReturnInstr(MachineInstr &MI) {
llvm_unreachable("impossible");
}
+void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+ unsigned FIOperandNum,
+ Register BaseReg,
+ int FIOffset) const {
+ MachineInstr &MI = *II;
+ unsigned Opc = MI.getOpcode();
+ if (Opc == TargetOpcode::LOCAL_ESCAPE) {
+ MachineOperand &FI = MI.getOperand(FIOperandNum);
+ FI.ChangeToImmediate(FIOffset);
+ return;
+ }
+
+ MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
+
+ // The frame index format for stackmaps and patchpoints is different from the
+ // X86 format. It only has a FI and an offset.
+ if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) {
+ assert(BasePtr == FramePtr && "Expected the FP as base register");
+ int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset;
+ MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
+ return;
+ }
+
+ if (MI.getOperand(FIOperandNum + 3).isImm()) {
+ // Offset is a 32-bit integer.
+ int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm());
+ int Offset = FIOffset + Imm;
+ assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
+ "Requesting 64-bit offset in 32-bit immediate!");
+ if (Offset != 0)
+ MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
+ } else {
+ // Offset is symbolic. This is extremely rare.
+ uint64_t Offset =
+ FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset();
+ MI.getOperand(FIOperandNum + 3).setOffset(Offset);
+ }
+}
+
bool
X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
@@ -892,8 +939,7 @@ unsigned X86RegisterInfo::findDeadCallerSavedReg(
case X86::EH_RETURN:
case X86::EH_RETURN64: {
SmallSet<uint16_t, 8> Uses;
- for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
- MachineOperand &MO = MBBI->getOperand(I);
+ for (MachineOperand &MO : MBBI->operands()) {
if (!MO.isReg() || MO.isDef())
continue;
Register Reg = MO.getReg();
@@ -963,6 +1009,8 @@ static ShapeT getTileShape(Register VirtReg, VirtRegMap *VRM,
case X86::PTILEZEROV:
case X86::PTDPBF16PSV:
case X86::PTDPFP16PSV:
+ case X86::PTCMMIMFP16PSV:
+ case X86::PTCMMRLFP16PSV:
MachineOperand &MO1 = MI->getOperand(1);
MachineOperand &MO2 = MI->getOperand(2);
ShapeT Shape(&MO1, &MO2, MRI);
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h
index f88d4b18f1d8..da7b171e4cf6 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -133,6 +133,12 @@ public:
bool canRealignStack(const MachineFunction &MF) const override;
+ bool shouldRealignStack(const MachineFunction &MF) const override;
+
+ void eliminateFrameIndex(MachineBasicBlock::iterator II,
+ unsigned FIOperandNum, Register BaseReg,
+ int FIOffset) const;
+
bool eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td
index aa4b83a6aaee..1e6477e658b9 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -320,12 +320,12 @@ def DF : X86Reg<"dirflag", 0>;
// Segment registers
-def CS : X86Reg<"cs", 1>;
-def DS : X86Reg<"ds", 3>;
-def SS : X86Reg<"ss", 2>;
-def ES : X86Reg<"es", 0>;
-def FS : X86Reg<"fs", 4>;
-def GS : X86Reg<"gs", 5>;
+def CS : X86Reg<"cs", 1>, DwarfRegNum<[51, -2, 41]>;
+def DS : X86Reg<"ds", 3>, DwarfRegNum<[53, -2, 43]>;
+def SS : X86Reg<"ss", 2>, DwarfRegNum<[52, -2, 42]>;
+def ES : X86Reg<"es", 0>, DwarfRegNum<[50, -2, 40]>;
+def FS : X86Reg<"fs", 4>, DwarfRegNum<[54, -2, 44]>;
+def GS : X86Reg<"gs", 5>, DwarfRegNum<[55, -2, 45]>;
def FS_BASE : X86Reg<"fs.base", 0>, DwarfRegNum<[58, -2, -2]>;
def GS_BASE : X86Reg<"gs.base", 0>, DwarfRegNum<[59, -2, -2]>;
@@ -433,6 +433,18 @@ def GR64PLTSafe : RegisterClass<"X86", [i64], 64,
(add RAX, RCX, RDX, RSI, RDI, R8, R9,
RBX, R14, R15, R12, R13, RBP)>;
+// It includes the GPR that are used as scratch register for Linux64 calling
+// convention.
+def GR64_ArgRef: RegisterClass<"X86", [i64], 64, (add R10, R11)> {
+ let GeneratePressureSet = 0;
+}
+
+// It includes the GPR that are used as scratch register for Linux32 calling
+// convention.
+def GR32_ArgRef: RegisterClass<"X86", [i32], 32, (add ECX, EDX)> {
+ let GeneratePressureSet = 0;
+}
+
// Segment registers for use by MOV instructions (and others) that have a
// segment register as one operand. Always contain a 16-bit segment
// descriptor.
diff --git a/llvm/lib/Target/X86/X86ReturnThunks.cpp b/llvm/lib/Target/X86/X86ReturnThunks.cpp
index aaa204597797..a65e9bfe26b0 100644
--- a/llvm/lib/Target/X86/X86ReturnThunks.cpp
+++ b/llvm/lib/Target/X86/X86ReturnThunks.cpp
@@ -28,7 +28,6 @@
#include "X86Subtarget.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -37,6 +36,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/Debug.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/X86/X86SchedAlderlakeP.td b/llvm/lib/Target/X86/X86SchedAlderlakeP.td
index 2cf5c6fe9f46..eb7dcfc6108b 100644
--- a/llvm/lib/Target/X86/X86SchedAlderlakeP.td
+++ b/llvm/lib/Target/X86/X86SchedAlderlakeP.td
@@ -2304,13 +2304,6 @@ def ADLPWriteResGroup264 : SchedWriteRes<[ADLPPort01_05, ADLPPort02_03_11]> {
def : InstRW<[ADLPWriteResGroup264, ReadAfterVecYLd], (instregex "^VSHUFP(D|S)Yrmi$")>;
def : InstRW<[ADLPWriteResGroup264, ReadAfterVecYLd], (instrs VPBLENDWYrmi)>;
-def ADLPWriteResGroup265 : SchedWriteRes<[ADLPPort00_01, ADLPPort02_03_11]> {
- let Latency = 13;
- let NumMicroOps = 2;
-}
-def : InstRW<[ADLPWriteResGroup265], (instregex "^VPDP(BU|WS)SD((SY)?)rm$",
- "^VPDP(BU|WS)SD(S|Y)rm$")>;
-
def ADLPWriteResGroup266 : SchedWriteRes<[ADLPPort00_01, ADLPPort01_05, ADLPPort02_03_11]> {
let ResourceCycles = [1, 2, 1];
let Latency = 10;
diff --git a/llvm/lib/Target/X86/X86SchedIceLake.td b/llvm/lib/Target/X86/X86SchedIceLake.td
index 4dfeafbca793..283995f8203b 100644
--- a/llvm/lib/Target/X86/X86SchedIceLake.td
+++ b/llvm/lib/Target/X86/X86SchedIceLake.td
@@ -19,7 +19,7 @@ def IceLakeModel : SchedMachineModel {
// All x86 instructions are modeled as a single micro-op, and Ice Lake can
// decode 6 instructions per cycle.
let IssueWidth = 6;
- let MicroOpBufferSize = 224; // Based on the reorder buffer.
+ let MicroOpBufferSize = 352; // Based on the reorder buffer.
let LoadLatency = 5;
let MispredictPenalty = 14;
@@ -55,7 +55,6 @@ def ICXPort9 : ProcResource<1>;
// Many micro-ops are capable of issuing on multiple ports.
def ICXPort01 : ProcResGroup<[ICXPort0, ICXPort1]>;
def ICXPort23 : ProcResGroup<[ICXPort2, ICXPort3]>;
-def ICXPort237 : ProcResGroup<[ICXPort2, ICXPort3, ICXPort7]>;
def ICXPort04 : ProcResGroup<[ICXPort0, ICXPort4]>;
def ICXPort05 : ProcResGroup<[ICXPort0, ICXPort5]>;
def ICXPort06 : ProcResGroup<[ICXPort0, ICXPort6]>;
@@ -117,7 +116,7 @@ multiclass ICXWriteResPair<X86FoldableSchedWrite SchedRW,
// A folded store needs a cycle on port 4 for the store data, and an extra port
// 2/3/7 cycle to recompute the address.
-def : WriteRes<WriteRMW, [ICXPort237,ICXPort4]>;
+def : WriteRes<WriteRMW, [ICXPort78,ICXPort49]>;
// Arithmetic.
defm : ICXWriteResPair<WriteALU, [ICXPort0156], 1>; // Simple integer ALU op.
@@ -146,7 +145,7 @@ def : WriteRes<WriteIMulHLd, []> {
defm : X86WriteRes<WriteBSWAP32, [ICXPort15], 1, [1], 1>;
defm : X86WriteRes<WriteBSWAP64, [ICXPort06, ICXPort15], 2, [1,1], 2>;
defm : X86WriteRes<WriteCMPXCHG,[ICXPort06, ICXPort0156], 5, [2,3], 5>;
-defm : X86WriteRes<WriteCMPXCHGRMW,[ICXPort23,ICXPort06,ICXPort0156,ICXPort237,ICXPort4], 8, [1,2,1,1,1], 6>;
+defm : X86WriteRes<WriteCMPXCHGRMW,[ICXPort23,ICXPort06,ICXPort0156,ICXPort78,ICXPort49], 8, [1,2,1,1,1], 6>;
defm : X86WriteRes<WriteXCHG, [ICXPort0156], 2, [3], 3>;
// TODO: Why isn't the ICXDivider used?
@@ -174,7 +173,7 @@ def : WriteRes<WriteLEA, [ICXPort15]>; // LEA instructions can't fold loads.
defm : ICXWriteResPair<WriteCMOV, [ICXPort06], 1, [1], 1>; // Conditional move.
defm : X86WriteRes<WriteFCMOV, [ICXPort1], 3, [1], 1>; // x87 conditional move.
def : WriteRes<WriteSETCC, [ICXPort06]>; // Setcc.
-def : WriteRes<WriteSETCCStore, [ICXPort06,ICXPort4,ICXPort237]> {
+def : WriteRes<WriteSETCCStore, [ICXPort06,ICXPort49,ICXPort78]> {
let Latency = 2;
let NumMicroOps = 3;
}
@@ -195,8 +194,8 @@ defm : ICXWriteResPair<WriteRotateCL, [ICXPort06], 3, [3], 3>;
// SHLD/SHRD.
defm : X86WriteRes<WriteSHDrri, [ICXPort1], 3, [1], 1>;
defm : X86WriteRes<WriteSHDrrcl,[ICXPort1,ICXPort06,ICXPort0156], 6, [1, 2, 1], 4>;
-defm : X86WriteRes<WriteSHDmri, [ICXPort1,ICXPort23,ICXPort237,ICXPort0156], 9, [1, 1, 1, 1], 4>;
-defm : X86WriteRes<WriteSHDmrcl,[ICXPort1,ICXPort23,ICXPort237,ICXPort06,ICXPort0156], 11, [1, 1, 1, 2, 1], 6>;
+defm : X86WriteRes<WriteSHDmri, [ICXPort1,ICXPort23,ICXPort78,ICXPort0156], 9, [1, 1, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl,[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort0156], 11, [1, 1, 1, 2, 1], 6>;
// Bit counts.
defm : ICXWriteResPair<WriteBSF, [ICXPort1], 3>;
@@ -212,8 +211,8 @@ defm : ICXWriteResPair<WriteBZHI, [ICXPort15], 1>;
// Loads, stores, and moves, not folded with other operations.
defm : X86WriteRes<WriteLoad, [ICXPort23], 5, [1], 1>;
-defm : X86WriteRes<WriteStore, [ICXPort237, ICXPort4], 1, [1,1], 1>;
-defm : X86WriteRes<WriteStoreNT, [ICXPort237, ICXPort4], 1, [1,1], 2>;
+defm : X86WriteRes<WriteStore, [ICXPort78, ICXPort49], 1, [1,1], 1>;
+defm : X86WriteRes<WriteStoreNT, [ICXPort78, ICXPort49], 1, [1,1], 2>;
defm : X86WriteRes<WriteMove, [ICXPort0156], 1, [1], 1>;
// Model the effect of clobbering the read-write mask operand of the GATHER operation.
@@ -237,17 +236,17 @@ defm : X86WriteRes<WriteFLoadX, [ICXPort23], 6, [1], 1>;
defm : X86WriteRes<WriteFLoadY, [ICXPort23], 7, [1], 1>;
defm : X86WriteRes<WriteFMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteFMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>;
-defm : X86WriteRes<WriteFStore, [ICXPort237,ICXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFStoreX, [ICXPort237,ICXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFStoreY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFStoreNT, [ICXPort237,ICXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFStoreNTX, [ICXPort237,ICXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFStoreNTY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
-
-defm : X86WriteRes<WriteFMaskedStore32, [ICXPort237,ICXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore32Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore64, [ICXPort237,ICXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore64Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFStore, [ICXPort78,ICXPort49], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreX, [ICXPort78,ICXPort49], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreY, [ICXPort78,ICXPort49], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNT, [ICXPort78,ICXPort49], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTX, [ICXPort78,ICXPort49], 1, [1,1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [ICXPort78,ICXPort49], 1, [1,1], 2>;
+
+defm : X86WriteRes<WriteFMaskedStore32, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
defm : X86WriteRes<WriteFMove, [ICXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [ICXPort015], 1, [1], 1>;
@@ -355,15 +354,15 @@ defm : X86WriteRes<WriteVecLoadNT, [ICXPort23], 6, [1], 1>;
defm : X86WriteRes<WriteVecLoadNTY, [ICXPort23], 7, [1], 1>;
defm : X86WriteRes<WriteVecMaskedLoad, [ICXPort23,ICXPort015], 7, [1,1], 2>;
defm : X86WriteRes<WriteVecMaskedLoadY, [ICXPort23,ICXPort015], 8, [1,1], 2>;
-defm : X86WriteRes<WriteVecStore, [ICXPort237,ICXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecStoreX, [ICXPort237,ICXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecStoreY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecStoreNT, [ICXPort237,ICXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecStoreNTY, [ICXPort237,ICXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore32, [ICXPort237,ICXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore32Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore64, [ICXPort237,ICXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore64Y, [ICXPort237,ICXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecStore, [ICXPort78,ICXPort49], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreX, [ICXPort78,ICXPort49], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreY, [ICXPort78,ICXPort49], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [ICXPort78,ICXPort49], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [ICXPort78,ICXPort49], 1, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [ICXPort78,ICXPort49,ICXPort0], 2, [1,1,1], 2>;
defm : X86WriteRes<WriteVecMove, [ICXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [ICXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [ICXPort015], 1, [1], 1>;
@@ -445,7 +444,7 @@ def : WriteRes<WriteVecExtract, [ICXPort0,ICXPort5]> {
let Latency = 3;
let NumMicroOps = 2;
}
-def : WriteRes<WriteVecExtractSt, [ICXPort4,ICXPort5,ICXPort237]> {
+def : WriteRes<WriteVecExtractSt, [ICXPort49,ICXPort5,ICXPort78]> {
let Latency = 2;
let NumMicroOps = 3;
}
@@ -488,9 +487,9 @@ defm : X86WriteRes<WriteCvtPH2PSZLd, [ICXPort23,ICXPort05], 10, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PH, [ICXPort5,ICXPort01], 5, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHY, [ICXPort5,ICXPort01], 7, [1,1], 2>;
defm : X86WriteRes<WriteCvtPS2PHZ, [ICXPort5,ICXPort05], 7, [1,1], 2>;
-defm : X86WriteRes<WriteCvtPS2PHSt, [ICXPort4,ICXPort5,ICXPort237,ICXPort01], 6, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteCvtPS2PHYSt, [ICXPort4,ICXPort5,ICXPort237,ICXPort01], 8, [1,1,1,1], 4>;
-defm : X86WriteRes<WriteCvtPS2PHZSt, [ICXPort4,ICXPort5,ICXPort237,ICXPort05], 8, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [ICXPort49,ICXPort5,ICXPort78,ICXPort01], 6, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [ICXPort49,ICXPort5,ICXPort78,ICXPort01], 8, [1,1,1,1], 4>;
+defm : X86WriteRes<WriteCvtPS2PHZSt, [ICXPort49,ICXPort5,ICXPort78,ICXPort05], 8, [1,1,1,1], 4>;
// Strings instructions.
@@ -608,11 +607,11 @@ defm : ICXWriteResPair<WriteVarShuffle256, [ICXPort5], 3, [1], 1, 7>; // 256-bi
def : WriteRes<WriteMicrocoded, [ICXPort0156]> { let Latency = 100; } // def WriteMicrocoded : SchedWrite;
// Fence instructions.
-def : WriteRes<WriteFence, [ICXPort23, ICXPort4]>;
+def : WriteRes<WriteFence, [ICXPort78, ICXPort49]>;
// Load/store MXCSR.
def : WriteRes<WriteLDMXCSR, [ICXPort0,ICXPort23,ICXPort0156]> { let Latency = 7; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
-def : WriteRes<WriteSTMXCSR, [ICXPort4,ICXPort5,ICXPort237]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
+def : WriteRes<WriteSTMXCSR, [ICXPort49,ICXPort5,ICXPort78]> { let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; }
// Nop, not very useful expect it provides a model for nops!
def : WriteRes<WriteNop, []>;
@@ -732,7 +731,7 @@ def: InstRW<[ICXWriteResGroup10], (instrs SGDT64m,
STRm,
SYSCALL)>;
-def ICXWriteResGroup11 : SchedWriteRes<[ICXPort4,ICXPort237]> {
+def ICXWriteResGroup11 : SchedWriteRes<[ICXPort49,ICXPort78]> {
let Latency = 1;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
@@ -772,7 +771,7 @@ def ICXWriteResGroup20 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
}
def: InstRW<[ICXWriteResGroup20], (instregex "CLFLUSH")>;
-def ICXWriteResGroup21 : SchedWriteRes<[ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup21 : SchedWriteRes<[ICXPort49,ICXPort78]> {
let Latency = 2;
let NumMicroOps = 2;
let ResourceCycles = [1,1];
@@ -791,21 +790,21 @@ def: InstRW<[ICXWriteResGroup23], (instrs CWD,
ADC32i32, SBB32i32,
ADC64i32, SBB64i32)>;
-def ICXWriteResGroup25 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort237]> {
+def ICXWriteResGroup25 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort78]> {
let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup25], (instrs FNSTCW16m)>;
-def ICXWriteResGroup27 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort15]> {
+def ICXWriteResGroup27 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort15]> {
let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup27], (instregex "MOVBE(16|32|64)mr")>;
-def ICXWriteResGroup28 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup28 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort0156]> {
let Latency = 2;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
@@ -814,7 +813,7 @@ def: InstRW<[ICXWriteResGroup28], (instrs PUSH16r, PUSH32r, PUSH64r, PUSH64i8,
STOSB, STOSL, STOSQ, STOSW)>;
def: InstRW<[ICXWriteResGroup28], (instregex "PUSH(16|32|64)rmr")>;
-def ICXWriteResGroup29 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort15]> {
+def ICXWriteResGroup29 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort15]> {
let Latency = 2;
let NumMicroOps = 5;
let ResourceCycles = [2,2,1];
@@ -910,7 +909,7 @@ def ICXWriteResGroup42 : SchedWriteRes<[ICXPort6,ICXPort0156]> {
}
def: InstRW<[ICXWriteResGroup42], (instregex "CLD")>;
-def ICXWriteResGroup43 : SchedWriteRes<[ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup43 : SchedWriteRes<[ICXPort49,ICXPort78]> {
let Latency = 3;
let NumMicroOps = 3;
let ResourceCycles = [1,2];
@@ -939,21 +938,21 @@ def ICXWriteResGroup44c : SchedWriteRes<[ICXPort1,ICXPort06,ICXPort0156]> {
}
def: InstRW<[ICXWriteResGroup44c], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
-def ICXWriteResGroup45 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237]> {
+def ICXWriteResGroup45 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78]> {
let Latency = 3;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
}
def: InstRW<[ICXWriteResGroup45], (instrs FNSTSWm)>;
-def ICXWriteResGroup47 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup47 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort78,ICXPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
}
def: InstRW<[ICXWriteResGroup47], (instregex "CALL(16|32|64)r")>;
-def ICXWriteResGroup48 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort06,ICXPort0156]> {
+def ICXWriteResGroup48 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 3;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
@@ -1023,7 +1022,7 @@ def: InstRW<[ICXWriteResGroup51], (instregex "VEXPANDPD(Z|Z128|Z256)rr",
"VPMOVUSWB(Z|Z128|Z256)rr",
"VPMOVWB(Z|Z128|Z256)rr")>;
-def ICXWriteResGroup54 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort237]> {
+def ICXWriteResGroup54 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> {
let Latency = 4;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
@@ -1098,7 +1097,7 @@ def ICXWriteResGroup63 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06]> {
}
def: InstRW<[ICXWriteResGroup63], (instregex "STR(16|32|64)r")>;
-def ICXWriteResGroup65 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort01]> {
+def ICXWriteResGroup65 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort01]> {
let Latency = 5;
let NumMicroOps = 3;
let ResourceCycles = [1,1,1];
@@ -1107,7 +1106,7 @@ def: InstRW<[ICXWriteResGroup65], (instregex "VCVTPS2PHZ128mr(b?)",
"VCVTPS2PHZ256mr(b?)",
"VCVTPS2PHZmr(b?)")>;
-def ICXWriteResGroup66 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort237]> {
+def ICXWriteResGroup66 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> {
let Latency = 5;
let NumMicroOps = 4;
let ResourceCycles = [1,2,1];
@@ -1137,7 +1136,7 @@ def ICXWriteResGroup67 : SchedWriteRes<[ICXPort06,ICXPort0156]> {
}
def: InstRW<[ICXWriteResGroup67], (instrs XSETBV)>;
-def ICXWriteResGroup69 : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup69 : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort0156]> {
let Latency = 5;
let NumMicroOps = 6;
let ResourceCycles = [1,1,4];
@@ -1245,7 +1244,7 @@ def ICXWriteResGroup84 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort06,ICXPort0156]
}
def: InstRW<[ICXWriteResGroup84], (instregex "SLDT(16|32|64)r")>;
-def ICXWriteResGroup86 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06]> {
+def ICXWriteResGroup86 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> {
let Latency = 6;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
@@ -1254,7 +1253,7 @@ def: InstRW<[ICXWriteResGroup86], (instregex "SAR(8|16|32|64)m(1|i)",
"SHL(8|16|32|64)m(1|i)",
"SHR(8|16|32|64)m(1|i)")>;
-def ICXWriteResGroup87 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup87 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort0156]> {
let Latency = 6;
let NumMicroOps = 4;
let ResourceCycles = [1,1,1,1];
@@ -1437,7 +1436,7 @@ def ICXWriteResGroup104 : SchedWriteRes<[ICXPort6,ICXPort23,ICXPort0156]> {
}
def: InstRW<[ICXWriteResGroup104], (instrs LRET64, RET64)>;
-def ICXWriteResGroup106 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort237]> {
+def ICXWriteResGroup106 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort78]> {
let Latency = 7;
let NumMicroOps = 4;
let ResourceCycles = [1,2,1];
@@ -1447,7 +1446,7 @@ def: InstRW<[ICXWriteResGroup106], (instregex "VCOMPRESSPD(Z|Z128|Z256)mr(b?)",
"VPCOMPRESSD(Z|Z128|Z256)mr(b?)",
"VPCOMPRESSQ(Z|Z128|Z256)mr(b?)")>;
-def ICXWriteResGroup107 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06]> {
+def ICXWriteResGroup107 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> {
let Latency = 7;
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
@@ -1463,14 +1462,14 @@ def ICXWriteResGroup107_1 : SchedWriteRes<[ICXPort06]> {
def: InstRW<[ICXWriteResGroup107_1], (instrs ROL8r1, ROL16r1, ROL32r1, ROL64r1,
ROR8r1, ROR16r1, ROR32r1, ROR64r1)>;
-def ICXWriteResGroup108 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup108 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
}
def: InstRW<[ICXWriteResGroup108], (instregex "XADD(8|16|32|64)rm")>;
-def ICXWriteResGroup109 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup109 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,1,1];
@@ -1478,7 +1477,7 @@ def ICXWriteResGroup109 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,
def: InstRW<[ICXWriteResGroup109], (instregex "CALL(16|32|64)m")>;
def: InstRW<[ICXWriteResGroup109], (instrs FARCALL64m)>;
-def ICXWriteResGroup110 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup110 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 7;
let ResourceCycles = [1,2,2,2];
@@ -1495,7 +1494,7 @@ def ICXWriteResGroup111 : SchedWriteRes<[ICXPort6,ICXPort06,ICXPort15,ICXPort015
}
def: InstRW<[ICXWriteResGroup111], (instrs LOOP)>;
-def ICXWriteResGroup112 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup112 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 11;
let ResourceCycles = [1,4,4,2];
@@ -1505,7 +1504,7 @@ def: InstRW<[ICXWriteResGroup112], (instrs VPSCATTERDQZ256mr,
VSCATTERDPDZ256mr,
VSCATTERQPDZ256mr)>;
-def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup113 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort78,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 19;
let ResourceCycles = [1,8,8,2];
@@ -1515,7 +1514,7 @@ def: InstRW<[ICXWriteResGroup113], (instrs VPSCATTERDQZmr,
VSCATTERDPDZmr,
VSCATTERQPDZmr)>;
-def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup114 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
let Latency = 7;
let NumMicroOps = 36;
let ResourceCycles = [1,16,1,16,2];
@@ -1614,7 +1613,7 @@ def ICXWriteResGroup123 : SchedWriteRes<[ICXPort0,ICXPort5,ICXPort23]> {
}
def: InstRW<[ICXWriteResGroup123], (instregex "MMX_PH(ADD|SUB)SWrm")>;
-def ICXWriteResGroup127 : SchedWriteRes<[ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+def ICXWriteResGroup127 : SchedWriteRes<[ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 8;
let NumMicroOps = 5;
let ResourceCycles = [1,1,1,2];
@@ -1622,7 +1621,7 @@ def ICXWriteResGroup127 : SchedWriteRes<[ICXPort23,ICXPort237,ICXPort06,ICXPort0
def: InstRW<[ICXWriteResGroup127], (instregex "RCL(8|16|32|64)m(1|i)",
"RCR(8|16|32|64)m(1|i)")>;
-def ICXWriteResGroup128 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06]> {
+def ICXWriteResGroup128 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06]> {
let Latency = 8;
let NumMicroOps = 6;
let ResourceCycles = [1,1,1,3];
@@ -1633,14 +1632,14 @@ def: InstRW<[ICXWriteResGroup128], (instregex "ROL(8|16|32|64)mCL",
"SHL(8|16|32|64)mCL",
"SHR(8|16|32|64)mCL")>;
-def ICXWriteResGroup130 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+def ICXWriteResGroup130 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 8;
let NumMicroOps = 6;
let ResourceCycles = [1,1,1,2,1];
}
def: SchedAlias<WriteADCRMW, ICXWriteResGroup130>;
-def ICXWriteResGroup131 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup131 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
let Latency = 8;
let NumMicroOps = 8;
let ResourceCycles = [1,2,1,2,2];
@@ -1650,7 +1649,7 @@ def: InstRW<[ICXWriteResGroup131], (instrs VPSCATTERQDZ128mr,
VSCATTERQPSZ128mr,
VSCATTERQPSZ256mr)>;
-def ICXWriteResGroup132 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup132 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
let Latency = 8;
let NumMicroOps = 12;
let ResourceCycles = [1,4,1,4,2];
@@ -1658,7 +1657,7 @@ def ICXWriteResGroup132 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,I
def: InstRW<[ICXWriteResGroup132], (instrs VPSCATTERDDZ128mr,
VSCATTERDPSZ128mr)>;
-def ICXWriteResGroup133 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup133 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
let Latency = 8;
let NumMicroOps = 20;
let ResourceCycles = [1,8,1,8,2];
@@ -1666,7 +1665,7 @@ def ICXWriteResGroup133 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,I
def: InstRW<[ICXWriteResGroup133], (instrs VPSCATTERDDZ256mr,
VSCATTERDPSZ256mr)>;
-def ICXWriteResGroup134 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup134 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort78,ICXPort0156]> {
let Latency = 8;
let NumMicroOps = 36;
let ResourceCycles = [1,16,1,16,2];
@@ -1851,7 +1850,7 @@ def ICXWriteResGroup154 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort23]> {
def: InstRW<[ICXWriteResGroup154], (instrs VPHADDSWYrm,
VPHSUBSWYrm)>;
-def ICXWriteResGroup157 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+def ICXWriteResGroup157 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 10;
let NumMicroOps = 8;
let ResourceCycles = [1,1,1,1,1,3];
@@ -2056,14 +2055,14 @@ def ICXWriteResGroup194 : SchedWriteRes<[ICXPort1,ICXPort5,ICXPort01,ICXPort23,I
}
def: InstRW<[ICXWriteResGroup194], (instregex "VPCONFLICTDZ128rm(b?)")>;
-def ICXWriteResGroup195 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort237,ICXPort06,ICXPort15,ICXPort0156]> {
+def ICXWriteResGroup195 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> {
let Latency = 15;
let NumMicroOps = 10;
let ResourceCycles = [1,1,1,5,1,1];
}
def: InstRW<[ICXWriteResGroup195], (instregex "RCL(8|16|32|64)mCL")>;
-def ICXWriteResGroup199 : SchedWriteRes<[ICXPort4,ICXPort23,ICXPort237,ICXPort06,ICXPort15,ICXPort0156]> {
+def ICXWriteResGroup199 : SchedWriteRes<[ICXPort49,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> {
let Latency = 16;
let NumMicroOps = 14;
let ResourceCycles = [1,1,1,4,2,5];
@@ -2098,7 +2097,7 @@ def ICXWriteResGroup207 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort06,ICXPort0156
}
def: InstRW<[ICXWriteResGroup207], (instrs CPUID, RDTSC)>;
-def ICXWriteResGroup208 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort237,ICXPort06,ICXPort15,ICXPort0156]> {
+def ICXWriteResGroup208 : SchedWriteRes<[ICXPort1,ICXPort23,ICXPort78,ICXPort06,ICXPort15,ICXPort0156]> {
let Latency = 18;
let NumMicroOps = 11;
let ResourceCycles = [2,1,1,4,1,2];
@@ -2162,7 +2161,7 @@ def ICXWriteGatherEVEX16 : SchedWriteRes<[ICXPort0,ICXPort23,ICXPort015,ICXPort0
}
def: InstRW<[ICXWriteGatherEVEX16], (instrs VGATHERDPSZrm, VPGATHERDDZrm)>;
-def ICXWriteResGroup219 : SchedWriteRes<[ICXPort4,ICXPort5,ICXPort6,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+def ICXWriteResGroup219 : SchedWriteRes<[ICXPort49,ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 20;
let NumMicroOps = 8;
let ResourceCycles = [1,1,1,1,1,1,2];
@@ -2217,7 +2216,7 @@ def ICXWriteResGroup225 : SchedWriteRes<[ICXPort5,ICXPort01,ICXPort015]> {
def: InstRW<[ICXWriteResGroup225], (instregex "VPCONFLICTDZ128rr",
"VPCONFLICTQZ256rr")>;
-def ICXWriteResGroup228 : SchedWriteRes<[ICXPort0,ICXPort4,ICXPort5,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+def ICXWriteResGroup228 : SchedWriteRes<[ICXPort0,ICXPort49,ICXPort5,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 23;
let NumMicroOps = 19;
let ResourceCycles = [2,1,4,1,1,4,6];
@@ -2260,7 +2259,7 @@ def ICXWriteResGroup247 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort06,I
def: InstRW<[ICXWriteResGroup247], (instregex "IN(8|16|32)ri",
"IN(8|16|32)rr")>;
-def ICXWriteResGroup248 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort237,ICXPort06,ICXPort0156]> {
+def ICXWriteResGroup248 : SchedWriteRes<[ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 35;
let NumMicroOps = 23;
let ResourceCycles = [1,5,2,1,4,10];
@@ -2283,14 +2282,14 @@ def ICXWriteResGroup250 : SchedWriteRes<[ICXPort1,ICXPort6,ICXPort23,ICXPort0156
}
def: InstRW<[ICXWriteResGroup250], (instregex "XRSTOR(64)?")>;
-def ICXWriteResGroup252 : SchedWriteRes<[ICXPort1,ICXPort4,ICXPort5,ICXPort6,ICXPort23,ICXPort237,ICXPort15,ICXPort0156]> {
+def ICXWriteResGroup252 : SchedWriteRes<[ICXPort1,ICXPort49,ICXPort5,ICXPort6,ICXPort23,ICXPort78,ICXPort15,ICXPort0156]> {
let Latency = 40;
let NumMicroOps = 18;
let ResourceCycles = [1,1,2,3,1,1,1,8];
}
def: InstRW<[ICXWriteResGroup252], (instrs VMCLEARm)>;
-def ICXWriteResGroup253 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup253 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> {
let Latency = 41;
let NumMicroOps = 39;
let ResourceCycles = [1,10,1,1,26];
@@ -2304,7 +2303,7 @@ def ICXWriteResGroup254 : SchedWriteRes<[ICXPort5,ICXPort0156]> {
}
def: InstRW<[ICXWriteResGroup254], (instrs RDTSCP)>;
-def ICXWriteResGroup255 : SchedWriteRes<[ICXPort4,ICXPort6,ICXPort23,ICXPort237,ICXPort0156]> {
+def ICXWriteResGroup255 : SchedWriteRes<[ICXPort49,ICXPort6,ICXPort23,ICXPort78,ICXPort0156]> {
let Latency = 42;
let NumMicroOps = 40;
let ResourceCycles = [1,11,1,1,26];
@@ -2362,7 +2361,7 @@ def ICXWriteResGroup263 : SchedWriteRes<[ICXPort5,ICXPort05,ICXPort0156]> {
}
def: InstRW<[ICXWriteResGroup263], (instrs FNINIT)>;
-def ICXWriteResGroup266 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort4,ICXPort5,ICXPort6,ICXPort237,ICXPort06,ICXPort0156]> {
+def ICXWriteResGroup266 : SchedWriteRes<[ICXPort0,ICXPort1,ICXPort49,ICXPort5,ICXPort6,ICXPort78,ICXPort06,ICXPort0156]> {
let Latency = 106;
let NumMicroOps = 100;
let ResourceCycles = [9,1,11,16,1,11,21,30];
@@ -2526,7 +2525,7 @@ def ICXWriteSETA_SETBEr : SchedWriteRes<[ICXPort06]> {
let NumMicroOps = 2;
}
-def ICXWriteSETA_SETBEm : SchedWriteRes<[ICXPort4,ICXPort237,ICXPort06]> {
+def ICXWriteSETA_SETBEm : SchedWriteRes<[ICXPort49,ICXPort78,ICXPort06]> {
let Latency = 3;
let ResourceCycles = [1,1,2];
let NumMicroOps = 4;
diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
new file mode 100644
index 000000000000..bcf1601f26bb
--- /dev/null
+++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td
@@ -0,0 +1,5202 @@
+//=- X86SchedSapphireRapids.td - X86 SapphireRapids Scheduling *- tablegen -*=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for SapphireRapids to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def SapphireRapidsModel : SchedMachineModel {
+ // SapphireRapids can allocate 6 uops per cycle.
+ let IssueWidth = 6; // Based on allocator width.
+ let MicroOpBufferSize = 512; // Based on the reorder buffer.
+ let LoadLatency = 5;
+ let MispredictPenalty = 14;
+
+ // Latency for microcoded instructions or instructions without latency info.
+ int MaxLatency = 100;
+
+ // Based on the LSD (loop-stream detector) queue size (ST).
+ let LoopMicroOpBufferSize = 72;
+
+ // This flag is set to allow the scheduler to assign a default model to
+ // unrecognized opcodes.
+ let CompleteModel = 0;
+}
+
+let SchedModel = SapphireRapidsModel in {
+
+// SapphireRapids can issue micro-ops to 12 different ports in one cycle.
+def SPRPort00 : ProcResource<1>;
+def SPRPort01 : ProcResource<1>;
+def SPRPort02 : ProcResource<1>;
+def SPRPort03 : ProcResource<1>;
+def SPRPort04 : ProcResource<1>;
+def SPRPort05 : ProcResource<1>;
+def SPRPort06 : ProcResource<1>;
+def SPRPort07 : ProcResource<1>;
+def SPRPort08 : ProcResource<1>;
+def SPRPort09 : ProcResource<1>;
+def SPRPort10 : ProcResource<1>;
+def SPRPort11 : ProcResource<1>;
+
+// Workaround to represent invalid ports. WriteRes shouldn't use this resource.
+def SPRPortInvalid :ProcResource<1>;
+
+// Many micro-ops are capable of issuing on multiple ports.
+def SPRPort00_01 : ProcResGroup<[SPRPort00, SPRPort01]>;
+def SPRPort00_01_05 : ProcResGroup<[SPRPort00, SPRPort01, SPRPort05]>;
+def SPRPort00_01_05_06 : ProcResGroup<[SPRPort00, SPRPort01, SPRPort05, SPRPort06]>;
+def SPRPort00_05 : ProcResGroup<[SPRPort00, SPRPort05]>;
+def SPRPort00_05_06 : ProcResGroup<[SPRPort00, SPRPort05, SPRPort06]>;
+def SPRPort00_06 : ProcResGroup<[SPRPort00, SPRPort06]>;
+def SPRPort01_05 : ProcResGroup<[SPRPort01, SPRPort05]>;
+def SPRPort01_05_10 : ProcResGroup<[SPRPort01, SPRPort05, SPRPort10]>;
+def SPRPort02_03 : ProcResGroup<[SPRPort02, SPRPort03]>;
+def SPRPort02_03_11 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort11]>;
+def SPRPort07_08 : ProcResGroup<[SPRPort07, SPRPort08]>;
+
+// EU has 112 reservation stations.
+def SPRPort00_01_05_06_10 : ProcResGroup<[SPRPort00, SPRPort01, SPRPort05,
+ SPRPort06, SPRPort10]> {
+ let BufferSize = 112;
+}
+
+// STD has 48 reservation stations.
+def SPRPort04_09 : ProcResGroup<[SPRPort04, SPRPort09]> {
+ let BufferSize = 48;
+}
+
+// MEM has 72 reservation stations.
+def SPRPort02_03_07_08_11 : ProcResGroup<[SPRPort02, SPRPort03, SPRPort07,
+ SPRPort08, SPRPort11]> {
+ let BufferSize = 72;
+}
+
+// Integer loads are 5 cycles, so ReadAfterLd registers needn't be available
+// until 5 cycles after the memory operand.
+def : ReadAdvance<ReadAfterLd, 5>;
+
+// Vector loads are 6 cycles, so ReadAfterVec*Ld registers needn't be available
+// until 6 cycles after the memory operand.
+def : ReadAdvance<ReadAfterVecLd, 6>;
+def : ReadAdvance<ReadAfterVecXLd, 6>;
+def : ReadAdvance<ReadAfterVecYLd, 6>;
+
+def : ReadAdvance<ReadInt2Fpu, 0>;
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when queued in the reservation station.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+multiclass SPRWriteResPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts,
+ int Lat, list<int> Res = [1], int UOps = 1,
+ int LoadLat = 5, int LoadUOps = 1> {
+ // Register variant is using a single cycle on ExePort.
+ def : WriteRes<SchedRW, ExePorts> {
+ let Latency = Lat;
+ let ResourceCycles = Res;
+ let NumMicroOps = UOps;
+ }
+
+ // Memory variant also uses a cycle on port 2/3/11 and adds LoadLat cycles to
+ // the latency (default = 5).
+ def : WriteRes<SchedRW.Folded, !listconcat([SPRPort02_03_11], ExePorts)> {
+ let Latency = !add(Lat, LoadLat);
+ let ResourceCycles = !listconcat([1], Res);
+ let NumMicroOps = !add(UOps, LoadUOps);
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// The following definitons are infered by smg.
+//===----------------------------------------------------------------------===//
+
+// Infered SchedWrite definition.
+def : WriteRes<WriteADC, [SPRPort00_06]>;
+defm : X86WriteRes<WriteADCLd, [SPRPort00_01_05_06_10, SPRPort00_06], 11, [1, 1], 2>;
+defm : SPRWriteResPair<WriteAESDecEnc, [SPRPort00_01], 5, [1], 1, 7>;
+defm : SPRWriteResPair<WriteAESIMC, [SPRPort00_01], 8, [2], 2, 7>;
+defm : X86WriteRes<WriteAESKeyGen, [SPRPort00, SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort01_05, SPRPort05], 7, [4, 1, 1, 2, 3, 3], 14>;
+defm : X86WriteRes<WriteAESKeyGenLd, [SPRPort00, SPRPort00_01, SPRPort00_06, SPRPort01_05, SPRPort02_03_11, SPRPort05], 12, [4, 1, 2, 3, 1, 3], 14>;
+def : WriteRes<WriteALU, [SPRPort00_01_05_06_10]>;
+def : WriteRes<WriteALULd, [SPRPort00_01_05_06_10]> {
+ let Latency = 11;
+}
+defm : SPRWriteResPair<WriteBEXTR, [SPRPort00_06, SPRPort01], 6, [1, 1], 2>;
+defm : SPRWriteResPair<WriteBLS, [SPRPort01_05_10], 2, [1]>;
+defm : SPRWriteResPair<WriteBSF, [SPRPort01], 3, [1]>;
+defm : SPRWriteResPair<WriteBSR, [SPRPort01], 3, [1]>;
+def : WriteRes<WriteBSWAP32, [SPRPort01]>;
+defm : X86WriteRes<WriteBSWAP64, [SPRPort00_06, SPRPort01], 2, [1, 1], 2>;
+defm : SPRWriteResPair<WriteBZHI, [SPRPort01], 3, [1]>;
+def : WriteRes<WriteBitTest, [SPRPort01]>;
+defm : X86WriteRes<WriteBitTestImmLd, [SPRPort01, SPRPort02_03_11], 6, [1, 1], 2>;
+defm : X86WriteRes<WriteBitTestRegLd, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11], 11, [4, 2, 1, 2, 1], 10>;
+def : WriteRes<WriteBitTestSet, [SPRPort01]>;
+def : WriteRes<WriteBitTestSetImmLd, [SPRPort01]> {
+ let Latency = 11;
+}
+defm : X86WriteRes<WriteBitTestSetRegLd, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10], 17, [3, 2, 1, 2], 8>;
+defm : SPRWriteResPair<WriteBlend, [SPRPort01_05], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteBlendY, [SPRPort00_01_05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteCLMul, [SPRPort05], 3, [1], 1, 7>;
+defm : SPRWriteResPair<WriteCMOV, [SPRPort00_06], 1, [1], 1, 6>;
+defm : X86WriteRes<WriteCMPXCHG, [SPRPort00_01_05_06_10, SPRPort00_06], 3, [3, 2], 5>;
+defm : X86WriteRes<WriteCMPXCHGRMW, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08], 12, [1, 2, 1, 1, 1], 6>;
+defm : SPRWriteResPair<WriteCRC32, [SPRPort01], 3, [1]>;
+defm : X86WriteRes<WriteCvtI2PD, [SPRPort00_01, SPRPort05], 5, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2PDLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2PDY, [SPRPort00_01, SPRPort05], 7, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2PDYLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : SPRWriteResPair<WriteCvtI2PDZ, [SPRPort00], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteCvtI2PS, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteCvtI2PSY, [SPRPort00_01], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteCvtI2PSZ, [SPRPort00], 4, [1], 1, 8>;
+defm : X86WriteRes<WriteCvtI2SD, [SPRPort00_01, SPRPort05], 7, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2SDLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtI2SS, [SPRPort00_01, SPRPort00_01_05, SPRPort05], 9, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteCvtI2SSLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPD2I, [SPRPort00_01, SPRPort05], 5, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPD2ILd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPD2IY, [SPRPort00_01, SPRPort05], 7, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPD2IYLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPD2IZ, [SPRPort00, SPRPort05], 7, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPD2IZLd, [SPRPort00, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : SPRWriteResPair<WriteCvtPD2PS, [SPRPort00_01, SPRPort05], 5, [1, 1], 2, 7>;
+defm : SPRWriteResPair<WriteCvtPD2PSY, [SPRPort00_01, SPRPort05], 7, [1, 1], 2, 8>;
+defm : SPRWriteResPair<WriteCvtPD2PSZ, [SPRPort00, SPRPort05], 7, [1, 1], 2, 8>;
+defm : X86WriteRes<WriteCvtPH2PS, [SPRPort00_01, SPRPort05], 6, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSY, [SPRPort00_01, SPRPort05], 8, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPH2PSYLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : SPRWriteResPair<WriteCvtPH2PSZ, [SPRPort00, SPRPort05], 11, [1, 1], 2>;
+defm : SPRWriteResPair<WriteCvtPS2I, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteCvtPS2IY, [SPRPort00_01], 4, [1], 1, 8>;
+defm : X86WriteRes<WriteCvtPS2IZ, [SPRPort00, SPRPort00_05, SPRPort05], 10, [1, 2, 1], 4>;
+defm : X86WriteRes<WriteCvtPS2IZLd, [SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05], 18, [1, 2, 1, 1, 1], 6>;
+defm : X86WriteRes<WriteCvtPS2PD, [SPRPort00_01, SPRPort05], 5, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDY, [SPRPort00_01, SPRPort05], 7, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPS2PDYLd, [SPRPort00_01, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : SPRWriteResPair<WriteCvtPS2PDZ, [SPRPort00, SPRPort05], 7, [1, 1], 2, 6>;
+defm : X86WriteRes<WriteCvtPS2PH, [SPRPort00_01, SPRPort05], 6, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHSt, [SPRPort00_01, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteCvtPS2PHY, [SPRPort00_01, SPRPort05], 8, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHYSt, [SPRPort00_01, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteCvtPS2PHZ, [SPRPort00, SPRPort05], 11, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtPS2PHZSt, [SPRPort00, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1], 3>;
+defm : SPRWriteResPair<WriteCvtSD2I, [SPRPort00, SPRPort00_01], 7, [1, 1], 2>;
+defm : SPRWriteResPair<WriteCvtSD2SS, [SPRPort00_01, SPRPort05], 5, [1, 1], 2, 7>;
+defm : SPRWriteResPair<WriteCvtSS2I, [SPRPort00, SPRPort00_01], 7, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtSS2SD, [SPRPort00_01, SPRPort05], 5, [1, 1], 2>;
+defm : X86WriteRes<WriteCvtSS2SDLd, [SPRPort00_01, SPRPort02_03_11], 11, [1, 1], 2>;
+defm : SPRWriteResPair<WriteDPPD, [SPRPort00_01, SPRPort01_05], 9, [2, 1], 3, 7>;
+defm : SPRWriteResPair<WriteDPPS, [SPRPort00_01, SPRPort00_06, SPRPort01_05, SPRPort05], 14, [2, 1, 2, 1], 6, 7>;
+defm : SPRWriteResPair<WriteDPPSY, [SPRPort00_01, SPRPort00_06, SPRPort01_05, SPRPort05], 14, [2, 1, 2, 1], 6, 8>;
+defm : SPRWriteResPair<WriteDiv16, [SPRPort00_01_05_06_10, SPRPort01], 16, [1, 3], 4, 4>;
+defm : SPRWriteResPair<WriteDiv32, [SPRPort00_01_05_06_10, SPRPort01], 15, [1, 3], 4, 4>;
+defm : SPRWriteResPair<WriteDiv64, [SPRPort01], 18, [3], 3>;
+defm : X86WriteRes<WriteDiv8, [SPRPort01], 17, [3], 3>;
+defm : X86WriteRes<WriteDiv8Ld, [SPRPort01], 22, [3], 3>;
+defm : X86WriteRes<WriteEMMS, [SPRPort00, SPRPort00_05, SPRPort00_06], 10, [1, 8, 1], 10>;
+defm : SPRWriteResPair<WriteFAdd, [SPRPort01_05], 3, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFAdd64, [SPRPort01_05], 3, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFAdd64X, [SPRPort01_05], 3, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFAdd64Y, [SPRPort01_05], 3, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFAdd64Z, [SPRPort00_05], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFAddX, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFAddY, [SPRPort00_01], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFAddZ, [SPRPort00], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFBlend, [SPRPort00_01_05], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFBlendY, [SPRPort00_01_05], 1, [1], 1, 8>;
+def : WriteRes<WriteFCMOV, [SPRPort01]> {
+ let Latency = 3;
+}
+defm : SPRWriteResPair<WriteFCmp, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFCmp64, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFCmp64X, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFCmp64Y, [SPRPort00_01], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFCmp64Z, [SPRPort00], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFCmpX, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFCmpY, [SPRPort00_01], 4, [1], 1, 8>;
+def : WriteRes<WriteFCmpZ, [SPRPort05]> {
+ let Latency = 3;
+}
+defm : X86WriteRes<WriteFCmpZLd, [SPRPort00, SPRPort02_03_11], 12, [1, 1], 2>;
+defm : SPRWriteResPair<WriteFCom, [SPRPort05], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFComX, [SPRPort00], 3, [1]>;
+defm : SPRWriteResPair<WriteFDiv, [SPRPort00], 11, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFDiv64, [SPRPort00], 14, [1], 1, 6>;
+defm : SPRWriteResPair<WriteFDiv64X, [SPRPort00], 14, [1], 1, 6>;
+defm : SPRWriteResPair<WriteFDiv64Y, [SPRPort00], 14, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFDiv64Z, [SPRPort00, SPRPort00_05], 23, [2, 1], 3, 7>;
+defm : SPRWriteResPair<WriteFDivX, [SPRPort00], 11, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFDivY, [SPRPort00], 11, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFDivZ, [SPRPort00, SPRPort00_05], 18, [2, 1], 3, 7>;
+defm : SPRWriteResPair<WriteFHAdd, [SPRPort01_05, SPRPort05], 6, [1, 2], 3, 6>;
+defm : SPRWriteResPair<WriteFHAddY, [SPRPort01_05, SPRPort05], 5, [1, 2], 3, 8>;
+def : WriteRes<WriteFLD0, [SPRPort00_05]>;
+defm : X86WriteRes<WriteFLD1, [SPRPort00_05], 1, [2], 2>;
+defm : X86WriteRes<WriteFLDC, [SPRPort00_05], 1, [2], 2>;
+def : WriteRes<WriteFLoad, [SPRPort02_03_11]> {
+ let Latency = 7;
+}
+def : WriteRes<WriteFLoadX, [SPRPort02_03_11]> {
+ let Latency = 7;
+}
+def : WriteRes<WriteFLoadY, [SPRPort02_03_11]> {
+ let Latency = 8;
+}
+defm : SPRWriteResPair<WriteFLogic, [SPRPort00_01_05], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFLogicY, [SPRPort00_01_05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFLogicZ, [SPRPort00_05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFMA, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFMAX, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFMAY, [SPRPort00_01], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFMAZ, [SPRPort00], 4, [1], 1, 8>;
+def : WriteRes<WriteFMOVMSK, [SPRPort00]> {
+ let Latency = 3;
+}
+defm : X86WriteRes<WriteFMaskedLoad, [SPRPort00_01_05, SPRPort02_03_11], 8, [1, 1], 2>;
+defm : X86WriteRes<WriteFMaskedLoadY, [SPRPort00_01_05, SPRPort02_03_11], 9, [1, 1], 2>;
+defm : X86WriteRes<WriteFMaskedStore32, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteFMaskedStore64, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteFMoveX, [], 1, [], 0>;
+defm : X86WriteRes<WriteFMoveY, [], 1, [], 0>;
+def : WriteRes<WriteFMoveZ, [SPRPort00_05]>;
+defm : SPRWriteResPair<WriteFMul, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFMul64, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFMul64X, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFMul64Y, [SPRPort00_01], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFMul64Z, [SPRPort00], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFMulX, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFMulY, [SPRPort00_01], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFMulZ, [SPRPort00], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFRcp, [SPRPort00], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFRcpX, [SPRPort00], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFRcpY, [SPRPort00], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFRcpZ, [SPRPort00, SPRPort00_05], 7, [2, 1], 3, 7>;
+defm : SPRWriteResPair<WriteFRnd, [SPRPort00_01], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFRndY, [SPRPort00_01], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFRndZ, [SPRPort00], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFRsqrt, [SPRPort00], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFRsqrtX, [SPRPort00], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFRsqrtY, [SPRPort00], 4, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFRsqrtZ, [SPRPort00, SPRPort00_05], 9, [2, 1], 3>;
+defm : SPRWriteResPair<WriteFShuffle, [SPRPort05], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFShuffle256, [SPRPort05], 3, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFShuffleY, [SPRPort05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFShuffleZ, [SPRPort05], 1, [1], 1, 8>;
+def : WriteRes<WriteFSign, [SPRPort00]>;
+defm : SPRWriteResPair<WriteFSqrt, [SPRPort00], 12, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFSqrt64, [SPRPort00], 18, [1]>;
+defm : SPRWriteResPair<WriteFSqrt64X, [SPRPort00], 18, [1], 1, 6>;
+defm : SPRWriteResPair<WriteFSqrt64Y, [SPRPort00], 18, [1], 1, 3>;
+// Warning: negtive load latency.
+defm : SPRWriteResPair<WriteFSqrt64Z, [SPRPort00, SPRPort00_05], 32, [2, 1], 3, -1>;
+def : WriteRes<WriteFSqrt80, [SPRPortInvalid, SPRPort00]> {
+ let ResourceCycles = [7, 1];
+ let Latency = 21;
+}
+defm : SPRWriteResPair<WriteFSqrtX, [SPRPort00], 12, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFSqrtY, [SPRPort00], 12, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFSqrtZ, [SPRPort00, SPRPort00_05], 20, [2, 1], 3, 7>;
+defm : X86WriteRes<WriteFStore, [SPRPort04_09, SPRPort07_08], 12, [1, 1], 2>;
+defm : X86WriteResUnsupported<WriteFStoreNT>;
+defm : X86WriteRes<WriteFStoreNTX, [SPRPort04_09, SPRPort07_08], 518, [1, 1], 2>;
+defm : X86WriteRes<WriteFStoreNTY, [SPRPort04_09, SPRPort07_08], 542, [1, 1], 2>;
+defm : X86WriteRes<WriteFStoreX, [SPRPort04_09, SPRPort07_08], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteFStoreY, [SPRPort04_09, SPRPort07_08], 12, [1, 1], 2>;
+defm : SPRWriteResPair<WriteFTest, [SPRPort00], 3, [1]>;
+defm : SPRWriteResPair<WriteFTestY, [SPRPort00], 5, [1], 1, 6>;
+defm : SPRWriteResPair<WriteFVarBlend, [SPRPort00_01_05], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFVarBlendY, [SPRPort00_01_05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFVarBlendZ, [SPRPort00_05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFVarShuffle, [SPRPort05], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteFVarShuffle256, [SPRPort05], 3, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFVarShuffleY, [SPRPort05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteFVarShuffleZ, [SPRPort05], 1, [1], 1, 8>;
+def : WriteRes<WriteFence, [SPRPort00_06]> {
+ let Latency = 2;
+}
+defm : SPRWriteResPair<WriteIDiv16, [SPRPort00_01_05_06_10, SPRPort01], 16, [1, 3], 4, 4>;
+defm : SPRWriteResPair<WriteIDiv32, [SPRPort00_01_05_06_10, SPRPort01], 15, [1, 3], 4, 4>;
+defm : SPRWriteResPair<WriteIDiv64, [SPRPort01], 18, [3], 3>;
+defm : X86WriteRes<WriteIDiv8, [SPRPort01], 17, [3], 3>;
+defm : X86WriteRes<WriteIDiv8Ld, [SPRPort01], 22, [3], 3>;
+defm : SPRWriteResPair<WriteIMul16, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 5, [2, 1, 1], 4>;
+defm : SPRWriteResPair<WriteIMul16Imm, [SPRPort00_01_05_06_10, SPRPort01], 4, [1, 1], 2>;
+defm : SPRWriteResPair<WriteIMul16Reg, [SPRPort01], 3, [1]>;
+defm : SPRWriteResPair<WriteIMul32, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 4, [1, 1, 1], 3>;
+defm : SPRWriteResPair<WriteIMul32Imm, [SPRPort01], 3, [1]>;
+defm : SPRWriteResPair<WriteIMul32Reg, [SPRPort01], 3, [1]>;
+defm : SPRWriteResPair<WriteIMul64, [SPRPort01, SPRPort05], 4, [1, 1], 2>;
+defm : SPRWriteResPair<WriteIMul64Imm, [SPRPort01], 3, [1]>;
+defm : SPRWriteResPair<WriteIMul64Reg, [SPRPort01], 3, [1]>;
+defm : SPRWriteResPair<WriteIMul8, [SPRPort01], 3, [1]>;
+def : WriteRes<WriteIMulH, []> {
+ let Latency = 3;
+}
+def : WriteRes<WriteIMulHLd, []> {
+ let Latency = 3;
+}
+defm : SPRWriteResPair<WriteJump, [SPRPort00_06], 1, [1]>;
+def : WriteRes<WriteLAHFSAHF, [SPRPort00_06]> {
+ let Latency = 3;
+}
+defm : X86WriteRes<WriteLDMXCSR, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11], 7, [1, 1, 1, 1], 4>;
+def : WriteRes<WriteLEA, [SPRPort01]>;
+defm : SPRWriteResPair<WriteLZCNT, [SPRPort01], 3, [1]>;
+def : WriteRes<WriteLoad, [SPRPort02_03_11]> {
+ let Latency = 5;
+}
+def : WriteRes<WriteMMXMOVMSK, [SPRPort00]> {
+ let Latency = 3;
+}
+defm : SPRWriteResPair<WriteMPSAD, [SPRPort01_05, SPRPort05], 4, [1, 1], 2, 7>;
+defm : SPRWriteResPair<WriteMPSADY, [SPRPort01_05, SPRPort05], 4, [1, 1], 2, 8>;
+defm : SPRWriteResPair<WriteMULX32, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 4, [1, 1, 1], 2>;
+defm : SPRWriteResPair<WriteMULX64, [SPRPort01, SPRPort05], 4, [1, 1]>;
+def : WriteRes<WriteMicrocoded, [SPRPort00_01_05_06]> {
+ let Latency = SapphireRapidsModel.MaxLatency;
+}
+def : WriteRes<WriteMove, [SPRPort00]> {
+ let Latency = 3;
+}
+defm : X86WriteRes<WriteNop, [], 1, [], 0>;
+defm : X86WriteRes<WritePCmpEStrI, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort05], 16, [3, 2, 1, 1, 1], 8>;
+defm : X86WriteRes<WritePCmpEStrILd, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05], 31, [3, 1, 1, 1, 1, 1], 8>;
+defm : X86WriteRes<WritePCmpEStrM, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort05], 16, [3, 3, 1, 1, 1], 9>;
+defm : X86WriteRes<WritePCmpEStrMLd, [SPRPort00, SPRPort00_01_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05], 17, [3, 2, 1, 1, 1, 1], 9>;
+defm : SPRWriteResPair<WritePCmpIStrI, [SPRPort00], 11, [3], 3, 20>;
+defm : SPRWriteResPair<WritePCmpIStrM, [SPRPort00], 11, [3], 3>;
+defm : SPRWriteResPair<WritePHAdd, [SPRPort00_05, SPRPort05], 3, [1, 2], 3, 8>;
+defm : SPRWriteResPair<WritePHAddX, [SPRPort00_01_05, SPRPort01_05], 2, [1, 2], 3, 7>;
+defm : SPRWriteResPair<WritePHAddY, [SPRPort00_01_05, SPRPort01_05], 2, [1, 2], 3, 8>;
+defm : SPRWriteResPair<WritePHMINPOS, [SPRPort00], 4, [1], 1, 7>;
+defm : SPRWriteResPair<WritePMULLD, [SPRPort00_01], 10, [2], 2, 8>;
+defm : SPRWriteResPair<WritePMULLDY, [SPRPort00_01], 10, [2], 2, 8>;
+defm : SPRWriteResPair<WritePMULLDZ, [SPRPort00], 10, [2], 2, 8>;
+defm : SPRWriteResPair<WritePOPCNT, [SPRPort01], 3, [1]>;
+defm : SPRWriteResPair<WritePSADBW, [SPRPort05], 3, [1], 1, 8>;
+defm : SPRWriteResPair<WritePSADBWX, [SPRPort05], 3, [1], 1, 7>;
+defm : SPRWriteResPair<WritePSADBWY, [SPRPort05], 3, [1], 1, 8>;
+defm : SPRWriteResPair<WritePSADBWZ, [SPRPort05], 3, [1], 1, 8>;
+defm : X86WriteRes<WriteRMW, [SPRPort02_03_11, SPRPort04_09, SPRPort07_08], 1, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteRotate, [SPRPort00_01_05_06_10, SPRPort00_06], 2, [1, 2], 3>;
+defm : X86WriteRes<WriteRotateLd, [SPRPort00_01_05_06_10, SPRPort00_06], 12, [1, 2], 3>;
+defm : X86WriteRes<WriteRotateCL, [SPRPort00_06], 2, [2], 2>;
+defm : X86WriteRes<WriteRotateCLLd, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 19, [2, 3, 2], 7>;
+defm : X86WriteRes<WriteSETCC, [SPRPort00_06], 2, [2], 2>;
+defm : X86WriteRes<WriteSETCCStore, [SPRPort00_06, SPRPort04_09, SPRPort07_08], 13, [2, 1, 1], 4>;
+defm : X86WriteRes<WriteSHDmrcl, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1, 1, 1, 1], 6>;
+defm : X86WriteRes<WriteSHDmri, [SPRPort00_01_05_06_10, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1, 1, 1], 5>;
+defm : X86WriteRes<WriteSHDrrcl, [SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01], 5, [1, 1, 1], 3>;
+def : WriteRes<WriteSHDrri, [SPRPort01]> {
+ let Latency = 3;
+}
+defm : X86WriteRes<WriteSTMXCSR, [SPRPort00, SPRPort00_06, SPRPort04_09, SPRPort07_08], 12, [1, 1, 1, 1], 4>;
+def : WriteRes<WriteShift, [SPRPort00_06]>;
+def : WriteRes<WriteShiftLd, [SPRPort00_06]> {
+ let Latency = 12;
+}
+defm : X86WriteRes<WriteShiftCL, [SPRPort00_06], 2, [2], 2>;
+defm : X86WriteRes<WriteShiftCLLd, [SPRPort00_06], 12, [2], 2>;
+defm : SPRWriteResPair<WriteShuffle, [SPRPort05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteShuffle256, [SPRPort05], 3, [1], 1, 8>;
+defm : SPRWriteResPair<WriteShuffleX, [SPRPort01_05], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteShuffleY, [SPRPort01_05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteShuffleZ, [SPRPort05], 3, [1], 1, 6>;
+defm : X86WriteRes<WriteStore, [SPRPort04_09, SPRPort07_08], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteStoreNT, [SPRPort04_09, SPRPort07_08], 512, [1, 1], 2>;
+def : WriteRes<WriteSystem, [SPRPort00_01_05_06]> {
+ let Latency = SapphireRapidsModel.MaxLatency;
+}
+defm : SPRWriteResPair<WriteTZCNT, [SPRPort01], 3, [1]>;
+defm : SPRWriteResPair<WriteVPMOV256, [SPRPort05], 3, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVarBlend, [SPRPort00_01_05], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteVarBlendY, [SPRPort00_01_05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVarBlendZ, [SPRPort00_05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVarShuffle, [SPRPort00, SPRPort05], 3, [1, 1], 2, 8>;
+defm : X86WriteRes<WriteVarShuffle256, [SPRPort05], 6, [2], 2>;
+defm : X86WriteRes<WriteVarShuffle256Ld, [SPRPort02_03_11, SPRPort05], 11, [1, 1], 2>;
+defm : SPRWriteResPair<WriteVarShuffleX, [SPRPort01_05], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteVarShuffleY, [SPRPort01_05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVarShuffleZ, [SPRPort05], 3, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVarVecShift, [SPRPort00_01], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteVarVecShiftY, [SPRPort00_01], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVarVecShiftZ, [SPRPort00], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVecALU, [SPRPort00], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVecALUX, [SPRPort00_01], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteVecALUY, [SPRPort00_01], 1, [1], 1, 8>;
+def : WriteRes<WriteVecALUZ, [SPRPort05]> {
+ let Latency = 3;
+}
+defm : X86WriteRes<WriteVecALUZLd, [SPRPort00, SPRPort02_03_11], 9, [1, 1], 2>;
+defm : X86WriteRes<WriteVecExtract, [SPRPort00, SPRPort01_05], 4, [1, 1], 2>;
+defm : X86WriteRes<WriteVecExtractSt, [SPRPort01_05, SPRPort04_09, SPRPort07_08], 19, [1, 1, 1], 3>;
+defm : SPRWriteResPair<WriteVecIMul, [SPRPort00], 5, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVecIMulX, [SPRPort00_01], 5, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVecIMulY, [SPRPort00_01], 5, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVecIMulZ, [SPRPort00], 5, [1], 1, 8>;
+defm : X86WriteRes<WriteVecInsert, [SPRPort01_05, SPRPort05], 4, [1, 1], 2>;
+defm : X86WriteRes<WriteVecInsertLd, [SPRPort01_05, SPRPort02_03_11], 8, [1, 1], 2>;
+def : WriteRes<WriteVecLoad, [SPRPort02_03_11]> {
+ let Latency = 7;
+}
+def : WriteRes<WriteVecLoadNT, [SPRPort02_03_11]> {
+ let Latency = 7;
+}
+def : WriteRes<WriteVecLoadNTY, [SPRPort02_03_11]> {
+ let Latency = 8;
+}
+def : WriteRes<WriteVecLoadX, [SPRPort02_03_11]> {
+ let Latency = 7;
+}
+def : WriteRes<WriteVecLoadY, [SPRPort02_03_11]> {
+ let Latency = 8;
+}
+defm : SPRWriteResPair<WriteVecLogic, [SPRPort00_05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVecLogicX, [SPRPort00_01_05], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteVecLogicY, [SPRPort00_01_05], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVecLogicZ, [SPRPort00_05], 1, [1], 1, 8>;
+def : WriteRes<WriteVecMOVMSK, [SPRPort00]> {
+ let Latency = 3;
+}
+def : WriteRes<WriteVecMOVMSKY, [SPRPort00]> {
+ let Latency = 4;
+}
+defm : X86WriteRes<WriteVecMaskedGatherWriteback, [], 5, [], 0>;
+defm : X86WriteRes<WriteVecMaskedLoad, [SPRPort00_01_05, SPRPort02_03_11], 8, [1, 1], 2>;
+defm : X86WriteRes<WriteVecMaskedLoadY, [SPRPort00_01_05, SPRPort02_03_11], 9, [1, 1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore64, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [SPRPort00, SPRPort04_09, SPRPort07_08], 14, [1, 1, 1], 3>;
+def : WriteRes<WriteVecMove, [SPRPort00_05]>;
+def : WriteRes<WriteVecMoveFromGpr, [SPRPort05]> {
+ let Latency = 3;
+}
+def : WriteRes<WriteVecMoveToGpr, [SPRPort00]> {
+ let Latency = 3;
+}
+defm : X86WriteRes<WriteVecMoveX, [], 1, [], 0>;
+def : WriteRes<WriteVecMoveY, [SPRPort00_01_05]>;
+def : WriteRes<WriteVecMoveZ, [SPRPort00_05]>;
+defm : SPRWriteResPair<WriteVecShift, [SPRPort00], 1, [1], 1, 8>;
+def : WriteRes<WriteVecShiftImm, [SPRPort00]>;
+defm : SPRWriteResPair<WriteVecShiftImmX, [SPRPort00_01], 1, [1], 1, 7>;
+defm : SPRWriteResPair<WriteVecShiftImmY, [SPRPort00_01], 1, [1], 1, 8>;
+defm : SPRWriteResPair<WriteVecShiftImmZ, [SPRPort00], 1, [1], 1, 8>;
+defm : X86WriteRes<WriteVecShiftX, [SPRPort00_01, SPRPort01_05], 2, [1, 1], 2>;
+defm : X86WriteRes<WriteVecShiftXLd, [SPRPort00_01, SPRPort02_03_11], 8, [1, 1], 2>;
+defm : X86WriteRes<WriteVecShiftY, [SPRPort00_01, SPRPort05], 4, [1, 1], 2>;
+defm : X86WriteRes<WriteVecShiftYLd, [SPRPort00_01, SPRPort02_03_11], 9, [1, 1], 2>;
+defm : X86WriteRes<WriteVecShiftZ, [SPRPort00, SPRPort05], 4, [1, 1], 2>;
+defm : X86WriteRes<WriteVecShiftZLd, [SPRPort00, SPRPort02_03_11], 9, [1, 1], 2>;
+defm : X86WriteRes<WriteVecStore, [SPRPort04_09, SPRPort07_08], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteVecStoreNT, [SPRPort04_09, SPRPort07_08], 511, [1, 1], 2>;
+defm : X86WriteRes<WriteVecStoreNTY, [SPRPort04_09, SPRPort07_08], 507, [1, 1], 2>;
+defm : X86WriteRes<WriteVecStoreX, [SPRPort04_09, SPRPort07_08], 12, [1, 1], 2>;
+defm : X86WriteRes<WriteVecStoreY, [SPRPort04_09, SPRPort07_08], 12, [1, 1], 2>;
+defm : SPRWriteResPair<WriteVecTest, [SPRPort00, SPRPort05], 4, [1, 1], 2>;
+defm : SPRWriteResPair<WriteVecTestY, [SPRPort00, SPRPort05], 6, [1, 1], 2, 6>;
+defm : X86WriteRes<WriteXCHG, [SPRPort00_01_05_06_10], 2, [3], 3>;
+def : WriteRes<WriteZero, []>;
+
+// Infered SchedWriteRes and InstRW definition.
+
+def SPRWriteResGroup0 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_11, SPRPort04, SPRPort04_09]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup0], (instregex "^AA(D|N)D64mr$",
+ "^A(X?)OR64mr$")>;
+
+def SPRWriteResGroup1 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [2, 1, 1, 1, 1];
+ let Latency = 12;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup1, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)(16|32|64)mr$")>;
+
+def SPRWriteResGroup2 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup2], (instregex "^RORX(32|64)mi$")>;
+def : InstRW<[SPRWriteResGroup2, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)(8|16|32|64)rm$",
+ "^AD(C|O)X(32|64)rm$")>;
+
+def SPRWriteResGroup3 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 13;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup3], (instregex "^(ADC|SBB)8mi(8?)$")>;
+
+def SPRWriteResGroup4 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [2, 1, 1, 1, 1];
+ let Latency = 13;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup4, ReadAfterLd, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^(ADC|SBB)8mr$")>;
+
+def SPRWriteResGroup5 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup5], (instregex "^CMP(8|16|32)mi$",
+ "^CMP(8|16|32|64)mi8$",
+ "^MOV(8|16)rm$",
+ "^POP(16|32)r((mr)?)$")>;
+def : InstRW<[SPRWriteResGroup5], (instrs CMP64mi32,
+ MOV8rm_NOREX,
+ MOVZX16rm8)>;
+def : InstRW<[SPRWriteResGroup5, ReadAfterLd], (instregex "^(ADD|CMP|SUB)(8|16|32|64)rm$",
+ "^AND(8|16|32)rm$",
+ "^(X?)OR(8|16|32)rm$")>;
+def : InstRW<[SPRWriteResGroup5, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^CMP(8|16|32|64)mr$")>;
+
+def SPRWriteResGroup6 : SchedWriteRes<[]> {
+ let NumMicroOps = 0;
+}
+def : InstRW<[SPRWriteResGroup6], (instregex "^(ADD|SUB)64ri8$",
+ "^(DE|IN)C64r$",
+ "^MOV64rr((_REV)?)$",
+ "^VMOV(A|U)P(D|S)Zrr((_REV)?)$",
+ "^VMOVDQA(32|64)Z((256)?)rr((_REV)?)$",
+ "^VMOVDQ(A|U)Yrr((_REV)?)$",
+ "^VMOVDQU(8|16|32|64)Z((256)?)rr((_REV)?)$")>;
+def : InstRW<[SPRWriteResGroup6], (instrs CLC,
+ JMP_2)>;
+
+def SPRWriteResGroup7 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 13;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup7], (instregex "^A(D|N)D8mi(8?)$",
+ "^(DE|IN)C8m$",
+ "^N(EG|OT)8m$",
+ "^(X?)OR8mi(8?)$",
+ "^SUB8mi(8?)$")>;
+def : InstRW<[SPRWriteResGroup7, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^A(D|N)D8mr$",
+ "^(X?)OR8mr$")>;
+def : InstRW<[SPRWriteResGroup7, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs SUB8mr)>;
+
+def SPRWriteResGroup8 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_11]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup8, ReadAfterVecXLd], (instregex "^(V?)(ADD|SUB)PSrm$",
+ "^(V?)ADDSUBPSrm$",
+ "^V(ADD|SUB)PSZ128rm((b|k|bk|kz)?)$",
+ "^V(ADD|SUB)PSZ128rmbkz$")>;
+
+def SPRWriteResGroup9 : SchedWriteRes<[SPRPort01_05]> {
+ let Latency = 3;
+}
+def : InstRW<[SPRWriteResGroup9], (instregex "^(V?)(ADD|SUB)PSrr$",
+ "^(V?)ADDSUBPSrr$",
+ "^V(ADD|SUB)PSYrr$",
+ "^V(ADD|SUB)PSZ(128|256)rr(k?)$",
+ "^VPMOV(S|Z)XBWZ128rrk(z?)$",
+ "^VPSHUFBZ(128|256)rrk(z?)$",
+ "^VPSHUF(H|L)WZ(128|256)rik(z?)$",
+ "^VPUNPCK(H|L)(BW|WD)Z(128|256)rrk(z?)$")>;
+def : InstRW<[SPRWriteResGroup9], (instrs VADDSUBPSYrr)>;
+
+def SPRWriteResGroup10 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup10], (instregex "^ADD_F(32|64)m$",
+ "^ILD_F(16|32|64)m$",
+ "^SUB(R?)_F(32|64)m$",
+ "^VPOPCNT(B|D|Q|W)Z128rm$",
+ "^VPOPCNT(D|Q)Z128rm(b|k|kz)$",
+ "^VPOPCNT(D|Q)Z128rmbk(z?)$")>;
+def : InstRW<[SPRWriteResGroup10, ReadAfterVecXLd], (instregex "^(V?)PACK(S|U)S(DW|WB)rm$",
+ "^(V?)PCMPGTQrm$",
+ "^VFPCLASSP(D|H|S)Z128rmb$",
+ "^VPACK(S|U)S(DW|WB)Z128rm$",
+ "^VPACK(S|U)SDWZ128rmb$",
+ "^VPM(AX|IN)(S|U)QZ128rm((b|k|bk|kz)?)$",
+ "^VPM(AX|IN)(S|U)QZ128rmbkz$",
+ "^VPMULTISHIFTQBZ128rm(b?)$")>;
+def : InstRW<[SPRWriteResGroup10, ReadAfterVecXLd], (instrs VFPCLASSPHZ128rm)>;
+def : InstRW<[SPRWriteResGroup10, ReadAfterVecYLd], (instregex "^VFPCLASSP(D|H|S)Z((256)?)rm$",
+ "^VPERM(I|T)2(D|Q|PS)128rm((b|k|bk|kz)?)$",
+ "^VPERM(I|T)2(D|Q|PS)128rmbkz$",
+ "^VPERM(I|T)2PD128rm((b|k|bk|kz)?)$",
+ "^VPERM(I|T)2PD128rmbkz$")>;
+def : InstRW<[SPRWriteResGroup10, ReadAfterVecYLd], (instrs VPERMBZ128rm)>;
+
+def SPRWriteResGroup11 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 13;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup11], (instregex "^ADD_FI(16|32)m$",
+ "^SUB(R?)_FI(16|32)m$")>;
+def : InstRW<[SPRWriteResGroup11, ReadAfterVecXLd], (instrs SHA256MSG2rm)>;
+def : InstRW<[SPRWriteResGroup11, ReadAfterVecYLd], (instregex "^VPEXPAND(B|W)Z(128|256)rmk(z?)$",
+ "^VPEXPAND(B|W)Zrmk(z?)$")>;
+
+def SPRWriteResGroup12 : SchedWriteRes<[SPRPort05]> {
+ let Latency = 3;
+}
+def : InstRW<[SPRWriteResGroup12], (instregex "^ADD_F(P?)rST0$",
+ "^KMOV(B|D|W)kr$",
+ "^(V?)PACK(S|U)S(DW|WB)rr$",
+ "^(V?)PCMPGTQrr$",
+ "^SUB(R?)_F(P?)rST0$",
+ "^SUB(R?)_FST0r$",
+ "^VALIGN(D|Q)Z256rri((k|kz)?)$",
+ "^VCMPP(D|H|S)Z(128|256)rri(k?)$",
+ "^VCMPS(D|H|S)Zrr$",
+ "^VCMPS(D|H|S)Zrr(b?)_Int(k?)$",
+ "^VFPCLASSP(D|H|S)Z(128|256)rr(k?)$",
+ "^VFPCLASSS(D|H|S)Zrr(k?)$",
+ "^VPACK(S|U)S(DW|WB)Yrr$",
+ "^VPACK(S|U)S(DW|WB)Z(128|256)rr$",
+ "^VPALIGNRZ(128|256)rrik(z?)$",
+ "^VPBROADCAST(B|W)Z128rrk(z?)$",
+ "^VPCMP(B|D|Q|W|UD|UQ|UW)Z(128|256)rri(k?)$",
+ "^VPCMP(EQ|GT)(B|D|Q|W)Z(128|256)rr(k?)$",
+ "^VPCMPUBZ(128|256)rri(k?)$",
+ "^VPERMBZ(128|256)rr$",
+ "^VPERM(B|D|Q)Zrr$",
+ "^VPERM(D|Q)Z256rr((k|kz)?)$",
+ "^VPERM(D|Q)Zrrk(z?)$",
+ "^VPERM(I|T)2(D|Q)(128|256)rr((k|kz)?)$",
+ "^VPERM(I|T)2(D|Q)rr((k|kz)?)$",
+ "^VPM(AX|IN)(S|U)QZ(128|256)rr((k|kz)?)$",
+ "^VPMULTISHIFTQBZ(128|256)rr$",
+ "^VPOPCNT(B|D|Q|W)Z(128|256)rr$",
+ "^VPOPCNT(D|Q)Z(128|256)rrk(z?)$",
+ "^VPTEST(N?)M(B|D|Q|W)Z(128|256)rr(k?)$",
+ "^VPTEST(N?)M(B|D|Q|W)Zrr(k?)$")>;
+def : InstRW<[SPRWriteResGroup12], (instrs ADD_FST0r,
+ VPCMPGTQYrr,
+ VPERMDYrr)>;
+
+def SPRWriteResGroup13 : SchedWriteRes<[SPRPort00_01_05_06_10]> {
+ let Latency = 2;
+}
+def : InstRW<[SPRWriteResGroup13], (instregex "^AND(8|16|32|64)r(r|i8)$",
+ "^AND(8|16|32|64)rr_REV$",
+ "^(AND|TEST)(32|64)i32$",
+ "^(AND|TEST)(8|32)ri$",
+ "^(AND|TEST)64ri32$",
+ "^(AND|TEST)8i8$",
+ "^(X?)OR(8|16|32|64)r(r|i8)$",
+ "^(X?)OR(8|16|32|64)rr_REV$",
+ "^(X?)OR(32|64)i32$",
+ "^(X?)OR(8|32)ri$",
+ "^(X?)OR64ri32$",
+ "^(X?)OR8i8$",
+ "^TEST(8|16|32|64)rr$")>;
+def : InstRW<[SPRWriteResGroup13], (instrs XOR8rr_NOREX)>;
+
+def SPRWriteResGroup14 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup14], (instregex "^TEST(8|16|32)mi$")>;
+def : InstRW<[SPRWriteResGroup14], (instrs TEST64mi32)>;
+def : InstRW<[SPRWriteResGroup14, ReadAfterLd], (instregex "^(X?)OR64rm$")>;
+def : InstRW<[SPRWriteResGroup14, ReadAfterLd], (instrs AND64rm)>;
+def : InstRW<[SPRWriteResGroup14, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^TEST(8|16|32|64)mr$")>;
+
+def SPRWriteResGroup15 : SchedWriteRes<[SPRPort01_05_10, SPRPort02_03_11]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup15, ReadAfterLd], (instregex "^ANDN(32|64)rm$")>;
+
+def SPRWriteResGroup16 : SchedWriteRes<[SPRPort01_05_10]> {
+ let Latency = 2;
+}
+def : InstRW<[SPRWriteResGroup16], (instregex "^ANDN(32|64)rr$")>;
+
+def SPRWriteResGroup17 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11]> {
+ let ResourceCycles = [5, 2, 1, 1];
+ let Latency = 10;
+ let NumMicroOps = 9;
+}
+def : InstRW<[SPRWriteResGroup17], (instrs BT64mr)>;
+
+def SPRWriteResGroup18 : SchedWriteRes<[SPRPort01]> {
+ let Latency = 3;
+}
+def : InstRW<[SPRWriteResGroup18], (instregex "^BT((C|R|S)?)64rr$",
+ "^P(DEP|EXT)(32|64)rr$")>;
+
+def SPRWriteResGroup19 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [4, 2, 1, 1, 1, 1];
+ let Latency = 17;
+ let NumMicroOps = 10;
+}
+def : InstRW<[SPRWriteResGroup19], (instregex "^BT(C|R|S)64mr$")>;
+
+def SPRWriteResGroup20 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup20], (instregex "^CALL(16|32|64)m((_NT)?)$")>;
+
+def SPRWriteResGroup21 : SchedWriteRes<[SPRPort00_06, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup21], (instregex "^CALL(16|32|64)r((_NT)?)$")>;
+
+def SPRWriteResGroup22 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup22], (instrs CALL64pcrel32,
+ MFENCE)>;
+
+def SPRWriteResGroup23 : SchedWriteRes<[SPRPort01_05]>;
+def : InstRW<[SPRWriteResGroup23], (instregex "^C(DQ|WD)E$",
+ "^(V?)MOVS(H|L)DUPrr$",
+ "^(V?)SHUFP(D|S)rri$",
+ "^VMOVS(H|L)DUPYrr$",
+ "^VMOVS(H|L)DUPZ(128|256)rr((k|kz)?)$",
+ "^VPMOVQDZ128rr((k|kz)?)$",
+ "^VSHUFP(D|S)Yrri$",
+ "^VSHUFP(D|S)Z(128|256)rri((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup23], (instrs CBW,
+ VPBLENDWYrri)>;
+
+def SPRWriteResGroup24 : SchedWriteRes<[SPRPort00_06]>;
+def : InstRW<[SPRWriteResGroup24], (instregex "^C(DQ|QO)$",
+ "^(CL|ST)AC$")>;
+
+def SPRWriteResGroup25 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup25], (instrs CLD)>;
+
+def SPRWriteResGroup26 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup26], (instrs CLDEMOTE)>;
+
+def SPRWriteResGroup27 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 2;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup27], (instrs CLFLUSH)>;
+
+def SPRWriteResGroup28 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup28], (instrs CLFLUSHOPT)>;
+
+def SPRWriteResGroup29 : SchedWriteRes<[SPRPort00_06, SPRPort01]> {
+ let ResourceCycles = [2, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup29], (instrs CLI)>;
+
+def SPRWriteResGroup30 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort05]> {
+ let ResourceCycles = [6, 1, 3];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 10;
+}
+def : InstRW<[SPRWriteResGroup30], (instrs CLTS)>;
+
+def SPRWriteResGroup31 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup31], (instregex "^MOV16o(16|32|64)a$")>;
+def : InstRW<[SPRWriteResGroup31], (instrs CLWB)>;
+
+def SPRWriteResGroup32 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
+ let ResourceCycles = [5, 2];
+ let Latency = 6;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup32], (instregex "^CMPS(B|L|Q|W)$")>;
+
+def SPRWriteResGroup33 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [2, 7, 6, 2, 1, 1, 2, 1];
+ let Latency = 32;
+ let NumMicroOps = 22;
+}
+def : InstRW<[SPRWriteResGroup33], (instrs CMPXCHG16B)>;
+
+def SPRWriteResGroup34 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [4, 7, 2, 1, 1, 1];
+ let Latency = 25;
+ let NumMicroOps = 16;
+}
+def : InstRW<[SPRWriteResGroup34], (instrs CMPXCHG8B)>;
+
+def SPRWriteResGroup35 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [1, 2, 1, 1, 1];
+ let Latency = 13;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup35], (instrs CMPXCHG8rm)>;
+
+def SPRWriteResGroup36 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [2, 1, 10, 6, 1, 5, 1];
+ let Latency = 18;
+ let NumMicroOps = 26;
+}
+def : InstRW<[SPRWriteResGroup36], (instrs CPUID)>;
+
+def SPRWriteResGroup37 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup37], (instregex "^(V?)CVT(T?)PD2DQrm$",
+ "^VCVT(T?)PD2(U?)DQZ128rm((b|k|bk|kz)?)$",
+ "^VCVT(T?)PD2(U?)DQZ128rmbkz$",
+ "^VCVTPH2PSXZ128rm(b?)$",
+ "^VCVT(U?)QQ2PSZ128rm((b|k|bk|kz)?)$",
+ "^VCVT(U?)QQ2PSZ128rmbkz$")>;
+def : InstRW<[SPRWriteResGroup37], (instrs CVTSI642SSrm)>;
+def : InstRW<[SPRWriteResGroup37, ReadAfterVecLd], (instregex "^(V?)CVTSI642SSrm_Int$",
+ "^VCVT(U?)SI642SSZrm((_Int)?)$")>;
+def : InstRW<[SPRWriteResGroup37, ReadAfterVecLd], (instrs VCVTSI642SSrm)>;
+
+def SPRWriteResGroup38 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort02_03_11]> {
+ let Latency = 26;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup38], (instregex "^(V?)CVT(T?)SD2SIrm((_Int)?)$")>;
+def : InstRW<[SPRWriteResGroup38, ReadAfterVecLd], (instregex "^VCVT(T?)SD2SIZrm$",
+ "^VCVT(T?)SD2(U?)SIZrm_Int$")>;
+def : InstRW<[SPRWriteResGroup38, ReadAfterVecLd], (instrs VCVTTSD2USIZrm)>;
+
+def SPRWriteResGroup39 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup39], (instregex "^VCVT(T?)PS2(U?)QQZ256rr((k|kz)?)$",
+ "^VCVT(U?)QQ2PSZ256rr((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup39, ReadInt2Fpu], (instrs CVTSI2SSrr)>;
+def : InstRW<[SPRWriteResGroup39, ReadDefault, ReadInt2Fpu], (instregex "^(V?)CVTSI2SSrr_Int$",
+ "^VCVT(U?)SI2SSZrr$",
+ "^VCVT(U?)SI2SSZrr(b?)_Int$")>;
+def : InstRW<[SPRWriteResGroup39, ReadDefault, ReadInt2Fpu], (instrs VCVTSI2SSrr)>;
+
+def SPRWriteResGroup40 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup40, ReadInt2Fpu], (instrs CVTSI642SSrr)>;
+def : InstRW<[SPRWriteResGroup40, ReadDefault, ReadInt2Fpu], (instregex "^(V?)CVTSI642SSrr_Int$",
+ "^VCVT(U?)SI642SSZrr$",
+ "^VCVT(U?)SI642SSZrr(b?)_Int$")>;
+def : InstRW<[SPRWriteResGroup40, ReadDefault, ReadInt2Fpu], (instrs VCVTSI642SSrr)>;
+
+def SPRWriteResGroup41 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort05]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup41], (instregex "^(V?)CVT(T?)SS2SI64rr_Int$",
+ "^VCVT(T?)SS2SI64Zrr$",
+ "^VCVT(T?)SS2(U?)SI64Zrr(b?)_Int$")>;
+def : InstRW<[SPRWriteResGroup41], (instrs VCVTTSS2USI64Zrr)>;
+def : InstRW<[SPRWriteResGroup41, ReadDefault], (instregex "^(V?)CVT(T?)SS2SI64rr$")>;
+
+def SPRWriteResGroup42 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup42], (instregex "^J(E|R)CXZ$")>;
+def : InstRW<[SPRWriteResGroup42], (instrs CWD)>;
+
+def SPRWriteResGroup43 : SchedWriteRes<[SPRPort00_01_05_06]>;
+def : InstRW<[SPRWriteResGroup43], (instregex "^(LD|ST)_Frr$",
+ "^MOV16s(m|r)$",
+ "^MOV(32|64)sr$")>;
+def : InstRW<[SPRWriteResGroup43], (instrs DEC16r_alt,
+ SALC,
+ ST_FPrr,
+ SYSCALL)>;
+
+def SPRWriteResGroup44 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 7;
+}
+def : InstRW<[SPRWriteResGroup44], (instrs DEC32r_alt)>;
+
+def SPRWriteResGroup45 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 27;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup45], (instregex "^DIVR_F(32|64)m$")>;
+
+def SPRWriteResGroup46 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 30;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup46], (instregex "^DIVR_FI(16|32)m$")>;
+
+def SPRWriteResGroup47 : SchedWriteRes<[SPRPort00]> {
+ let Latency = 15;
+}
+def : InstRW<[SPRWriteResGroup47], (instregex "^DIVR_F(P?)rST0$")>;
+def : InstRW<[SPRWriteResGroup47], (instrs DIVR_FST0r)>;
+
+def SPRWriteResGroup48 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 19;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup48, ReadAfterVecLd], (instregex "^(V?)DIVSDrm$")>;
+def : InstRW<[SPRWriteResGroup48, ReadAfterVecLd], (instrs VDIVSDZrm)>;
+
+def SPRWriteResGroup49 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 22;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup49], (instregex "^DIV_F(32|64)m$")>;
+def : InstRW<[SPRWriteResGroup49, ReadAfterVecLd], (instregex "^VSQRTSHZm_Int((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup49, ReadAfterVecLd], (instrs VSQRTSHZm)>;
+
+def SPRWriteResGroup50 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 25;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup50], (instregex "^DIV_FI(16|32)m$")>;
+
+def SPRWriteResGroup51 : SchedWriteRes<[SPRPort00]> {
+ let Latency = 20;
+}
+def : InstRW<[SPRWriteResGroup51], (instregex "^DIV_F(P?)rST0$")>;
+def : InstRW<[SPRWriteResGroup51], (instrs DIV_FST0r)>;
+
+def SPRWriteResGroup52 : SchedWriteRes<[SPRPort04, SPRPort04_09]>;
+def : InstRW<[SPRWriteResGroup52], (instregex "^ENQCMD(S?)(16|32|64)$",
+ "^PUSHA(16|32)$",
+ "^ST_F(32|64)m$")>;
+def : InstRW<[SPRWriteResGroup52], (instrs PUSHF32)>;
+
+def SPRWriteResGroup53 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [2, 21, 2, 14, 4, 9, 5];
+ let Latency = 126;
+ let NumMicroOps = 57;
+}
+def : InstRW<[SPRWriteResGroup53], (instrs ENTER)>;
+
+def SPRWriteResGroup54 : SchedWriteRes<[SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup54], (instregex "^(V?)EXTRACTPSmr$",
+ "^VPMOVQDZ((256)?)mr$")>;
+def : InstRW<[SPRWriteResGroup54], (instrs SMSW16m,
+ VEXTRACTPSZmr)>;
+
+def SPRWriteResGroup55 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup55], (instregex "^(V?)EXTRACTPSrr$")>;
+def : InstRW<[SPRWriteResGroup55], (instrs MMX_PEXTRWrr,
+ VEXTRACTPSZrr,
+ VPERMWZrr)>;
+
+def SPRWriteResGroup56 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_11, SPRPort04, SPRPort04_09, SPRPort06]> {
+ let Latency = 7;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup56], (instrs FARCALL64m)>;
+
+def SPRWriteResGroup57 : SchedWriteRes<[SPRPort02_03_11, SPRPort06]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup57], (instrs FARJMP64m,
+ JMP64m_REX)>;
+
+def SPRWriteResGroup58 : SchedWriteRes<[SPRPort04, SPRPort04_09]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup58], (instregex "^(V?)MASKMOVDQU((64)?)$",
+ "^ST_FP(32|64|80)m$")>;
+def : InstRW<[SPRWriteResGroup58], (instrs FBSTPm,
+ VMPTRSTm)>;
+
+def SPRWriteResGroup59 : SchedWriteRes<[SPRPort00_05]> {
+ let ResourceCycles = [2];
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup59], (instrs FDECSTP)>;
+
+def SPRWriteResGroup60 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 11;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup60], (instregex "^FICOM(P?)(16|32)m$")>;
+def : InstRW<[SPRWriteResGroup60, ReadAfterVecYLd], (instregex "^VEXPANDP(D|S)Z((256)?)rm((k|kz)?)$",
+ "^VPEXPAND(B|D|Q|W)Z((256)?)rm$",
+ "^VPEXPAND(D|Q)Z((256)?)rmk(z?)$")>;
+
+def SPRWriteResGroup61 : SchedWriteRes<[SPRPort00_05]>;
+def : InstRW<[SPRWriteResGroup61], (instregex "^MMX_P(ADD|SUB)(B|D|Q|W)rr$",
+ "^VP(ADD|SUB)(B|D|Q|W)Zrr$",
+ "^VP(ADD|SUB)(D|Q)Zrrk(z?)$",
+ "^VPTERNLOG(D|Q)Zrri((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup61], (instrs FINCSTP,
+ FNOP)>;
+
+def SPRWriteResGroup62 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup62], (instrs FLDCW16m)>;
+
+def SPRWriteResGroup63 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 5, 10, 39, 8];
+ let Latency = 62;
+ let NumMicroOps = 64;
+}
+def : InstRW<[SPRWriteResGroup63], (instrs FLDENVm)>;
+
+def SPRWriteResGroup64 : SchedWriteRes<[SPRPort00_01_05_06]> {
+ let ResourceCycles = [4];
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup64], (instrs FNCLEX)>;
+
+def SPRWriteResGroup65 : SchedWriteRes<[SPRPort00_01_05_06, SPRPort00_05, SPRPort05]> {
+ let ResourceCycles = [6, 3, 6];
+ let Latency = 75;
+ let NumMicroOps = 15;
+}
+def : InstRW<[SPRWriteResGroup65], (instrs FNINIT)>;
+
+def SPRWriteResGroup66 : SchedWriteRes<[SPRPort04, SPRPort04_09, SPRPort06]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup66], (instrs FNSTCW16m)>;
+
+def SPRWriteResGroup67 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup67], (instrs FNSTSW16r)>;
+
+def SPRWriteResGroup68 : SchedWriteRes<[SPRPort00, SPRPort04, SPRPort04_09]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup68], (instrs FNSTSWm)>;
+
+def SPRWriteResGroup69 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06, SPRPort00_06, SPRPort01, SPRPort04, SPRPort04_09, SPRPort05, SPRPort06]> {
+ let ResourceCycles = [9, 11, 21, 1, 30, 11, 16, 1];
+ let Latency = 106;
+ let NumMicroOps = 100;
+}
+def : InstRW<[SPRWriteResGroup69], (instrs FSTENVm)>;
+
+def SPRWriteResGroup70 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort01_05, SPRPort02_03, SPRPort02_03_11, SPRPort06]> {
+ let ResourceCycles = [4, 1, 2, 1, 47, 33, 2];
+ let Latency = 63;
+ let NumMicroOps = 90;
+}
+def : InstRW<[SPRWriteResGroup70], (instrs FXRSTOR)>;
+
+def SPRWriteResGroup71 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort01_05, SPRPort02_03, SPRPort02_03_11, SPRPort06]> {
+ let ResourceCycles = [4, 1, 2, 1, 45, 31, 4];
+ let Latency = 63;
+ let NumMicroOps = 88;
+}
+def : InstRW<[SPRWriteResGroup71], (instrs FXRSTOR64)>;
+
+def SPRWriteResGroup72 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [2, 5, 10, 10, 2, 38, 5, 38];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 110;
+}
+def : InstRW<[SPRWriteResGroup72], (instregex "^FXSAVE((64)?)$")>;
+
+def SPRWriteResGroup73 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
+ let Latency = 12;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup73], (instregex "^VPLZCNT(D|Q)Z256rm((b|k|bk|kz)?)$",
+ "^VPLZCNT(D|Q)Z256rmbkz$")>;
+def : InstRW<[SPRWriteResGroup73, ReadAfterVecXLd], (instregex "^(V?)GF2P8AFFINE((INV)?)QBrmi$",
+ "^(V?)GF2P8MULBrm$",
+ "^V(ADD|SUB)PHZ128rm((b|k|bk|kz)?)$",
+ "^V(ADD|SUB)PHZ128rmbkz$",
+ "^VGETEXPPHZ128m((b|k|bk|kz)?)$",
+ "^VGETEXPSHZm((k|kz)?)$",
+ "^VGETMANTPHZ128rm(bi|ik)$",
+ "^VGETMANTPHZ128rmbik(z?)$",
+ "^VGETMANTPHZ128rmi((kz)?)$",
+ "^VGETMANTSHZrmi((k|kz)?)$",
+ "^VGF2P8AFFINE((INV)?)QBZ128rm(b?)i$",
+ "^VM(AX|IN)CPHZ128rm((b|k|bk|kz)?)$",
+ "^VM(AX|IN)CPHZ128rmbkz$",
+ "^VM(AX|IN|UL)PHZ128rm((b|k|bk|kz)?)$",
+ "^VM(AX|IN|UL)PHZ128rmbkz$")>;
+def : InstRW<[SPRWriteResGroup73, ReadAfterVecXLd], (instrs VGETEXPPHZ128mbkz,
+ VGF2P8MULBZ128rm)>;
+def : InstRW<[SPRWriteResGroup73, ReadAfterVecLd], (instregex "^V(ADD|SUB)SHZrm$",
+ "^V(ADD|SUB)SHZrm_Int((k|kz)?)$",
+ "^VCVTSH2SSZrm((_Int)?)$",
+ "^VM(AX|IN)CSHZrm$",
+ "^VM(AX|IN|UL)SHZrm$",
+ "^VM(AX|IN|UL)SHZrm_Int((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup73, ReadAfterVecYLd], (instregex "^VGF2P8AFFINE((INV)?)QBYrmi$",
+ "^VGF2P8AFFINE((INV)?)QBZ256rm(b?)i$",
+ "^VGF2P8MULB(Y|Z256)rm$")>;
+def : InstRW<[SPRWriteResGroup73, ReadAfterVecXLd, ReadAfterVecXLd], (instregex "^VF(N?)M(ADD|SUB)(132|213|231)PHZ128m((b|k|bk|kz)?)$",
+ "^VF(N?)M(ADD|SUB)(132|213|231)PHZ128mbkz$",
+ "^VFMADDSUB(132|213|231)PHZ128m((b|k|bk|kz)?)$",
+ "^VFMADDSUB(132|213|231)PHZ128mbkz$",
+ "^VFMSUBADD(132|213|231)PHZ128m((b|k|bk|kz)?)$",
+ "^VFMSUBADD(132|213|231)PHZ128mbkz$")>;
+def : InstRW<[SPRWriteResGroup73, ReadAfterVecLd, ReadAfterVecLd], (instregex "^VF(N?)M(ADD|SUB)(132|213|231)SHZm$",
+ "^VF(N?)M(ADD|SUB)(132|213|231)SHZm_Int((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup73, ReadAfterVecYLd, ReadAfterVecYLd], (instregex "^VPMADD52(H|L)UQZ256m((b|k|bk|kz)?)$",
+ "^VPMADD52(H|L)UQZ256mbkz$")>;
+
+def SPRWriteResGroup74 : SchedWriteRes<[SPRPort00_01]> {
+ let Latency = 5;
+}
+def : InstRW<[SPRWriteResGroup74], (instregex "^(V?)GF2P8MULBrr$",
+ "^V(ADD|SUB)PHZ(128|256)rr$",
+ "^V(ADD|SUB)SHZrr$",
+ "^V(ADD|SUB)SHZrr(b?)_Int$",
+ "^VCVT(T?)PH2(U?)WZ(128|256)rr$",
+ "^VCVTSH2SSZrr(b?)_Int$",
+ "^VCVT(U?)W2PHZ(128|256)rr$",
+ "^VF(N?)M(ADD|SUB)(132|213|231)PHZ(128|256)r$",
+ "^VF(N?)M(ADD|SUB)(132|213|231)SHZr(b?)((_Int)?)$",
+ "^VFMADDSUB(132|213|231)PHZ(128|256)r$",
+ "^VFMSUBADD(132|213|231)PHZ(128|256)r$",
+ "^VGETEXPPHZ(128|256)r$",
+ "^VGETEXPSHZr(b?)$",
+ "^VGETMANTPHZ(128|256)rri$",
+ "^VGETMANTSHZrri(b?)$",
+ "^VGF2P8MULBZ(128|256)rr$",
+ "^VM(AX|IN)CPHZ(128|256)rr$",
+ "^VM(AX|IN)CSHZrr$",
+ "^VM(AX|IN|UL)PHZ(128|256)rr$",
+ "^VM(AX|IN|UL)SHZrr$",
+ "^VM(AX|IN|UL)SHZrr(b?)_Int$")>;
+def : InstRW<[SPRWriteResGroup74], (instrs VCVTSH2SSZrr,
+ VGF2P8MULBYrr)>;
+
+def SPRWriteResGroup75 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [7, 5, 26, 19, 2, 7, 21];
+ let Latency = 35;
+ let NumMicroOps = 87;
+}
+def : InstRW<[SPRWriteResGroup75], (instrs IN16ri)>;
+
+def SPRWriteResGroup76 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [7, 1, 4, 26, 19, 3, 7, 20];
+ let Latency = 35;
+ let NumMicroOps = 87;
+}
+def : InstRW<[SPRWriteResGroup76], (instrs IN16rr)>;
+
+def SPRWriteResGroup77 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [7, 6, 28, 21, 2, 10, 20];
+ let Latency = 35;
+ let NumMicroOps = 94;
+}
+def : InstRW<[SPRWriteResGroup77], (instrs IN32ri)>;
+
+def SPRWriteResGroup78 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [7, 9, 28, 21, 2, 11, 21];
+ let NumMicroOps = 99;
+}
+def : InstRW<[SPRWriteResGroup78], (instrs IN32rr)>;
+
+def SPRWriteResGroup79 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [7, 6, 25, 19, 2, 8, 20];
+ let Latency = 35;
+ let NumMicroOps = 87;
+}
+def : InstRW<[SPRWriteResGroup79], (instrs IN8ri)>;
+
+def SPRWriteResGroup80 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [7, 6, 25, 19, 2, 7, 20];
+ let Latency = 35;
+ let NumMicroOps = 86;
+}
+def : InstRW<[SPRWriteResGroup80], (instrs IN8rr)>;
+
+def SPRWriteResGroup81 : SchedWriteRes<[SPRPort00_06]> {
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup81], (instrs INC16r_alt)>;
+
+def SPRWriteResGroup82 : SchedWriteRes<[SPRPort02_03_11]> {
+ let Latency = 7;
+}
+def : InstRW<[SPRWriteResGroup82], (instregex "^LD_F(32|64|80)m$",
+ "^(V?)MOV(D|SH|SL)DUPrm$",
+ "^VBROADCASTSS((Z128)?)rm$",
+ "^VMOV(D|SH|SL)DUPZ128rm$",
+ "^VPBROADCAST(D|Q)((Z128)?)rm$")>;
+def : InstRW<[SPRWriteResGroup82], (instrs INC32r_alt,
+ VBROADCASTI32X2Z128rm)>;
+
+def SPRWriteResGroup83 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [7, 6, 24, 17, 8, 1, 19, 1];
+ let Latency = 20;
+ let NumMicroOps = 83;
+}
+def : InstRW<[SPRWriteResGroup83], (instrs INSB)>;
+
+def SPRWriteResGroup84 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [7, 1, 5, 1, 27, 17, 11, 1, 21, 1];
+ let Latency = 20;
+ let NumMicroOps = 92;
+}
+def : InstRW<[SPRWriteResGroup84], (instrs INSL)>;
+
+def SPRWriteResGroup85 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [7, 1, 4, 1, 25, 17, 1, 9, 1, 19, 1];
+ let Latency = 20;
+ let NumMicroOps = 86;
+}
+def : InstRW<[SPRWriteResGroup85], (instrs INSW)>;
+
+def SPRWriteResGroup86 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [5, 4, 8, 6, 2, 5, 7, 5];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 42;
+}
+def : InstRW<[SPRWriteResGroup86], (instrs INVLPG)>;
+
+def SPRWriteResGroup87 : SchedWriteRes<[SPRPort04, SPRPort04_09, SPRPort05]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup87], (instregex "^IST(T?)_FP(16|32|64)m$",
+ "^IST_F(16|32)m$")>;
+
+def SPRWriteResGroup88 : SchedWriteRes<[SPRPort00_01_05_06, SPRPort00_06]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup88], (instrs JCXZ)>;
+
+def SPRWriteResGroup89 : SchedWriteRes<[SPRPort06]>;
+def : InstRW<[SPRWriteResGroup89], (instrs JMP64r_REX)>;
+
+def SPRWriteResGroup90 : SchedWriteRes<[]> {
+ let Latency = 0;
+ let NumMicroOps = 0;
+}
+def : InstRW<[SPRWriteResGroup90], (instregex "^JMP_(1|4)$")>;
+def : InstRW<[SPRWriteResGroup90], (instrs VZEROUPPER)>;
+
+def SPRWriteResGroup91 : SchedWriteRes<[SPRPort05]> {
+ let Latency = 4;
+}
+def : InstRW<[SPRWriteResGroup91], (instregex "^KADD(B|D|Q|W)rr$",
+ "^KSHIFT(LB|RD|RQ|RW)ri$",
+ "^KSHIFT(LD|RB)ri$",
+ "^KSHIFTL(Q|W)ri$",
+ "^KUNPCK(BW|DQ|WD)rr$")>;
+
+def SPRWriteResGroup92 : SchedWriteRes<[SPRPort00]>;
+def : InstRW<[SPRWriteResGroup92], (instregex "^KAND(B|D|Q|W|ND|NQ|NW)rr$",
+ "^KMOV(B|D|Q|W)kk$",
+ "^KNOT(B|D|Q|W)rr$",
+ "^K((X|XN)?)OR(B|D|Q|W)rr$",
+ "^VP(A|SU)BSBZrr$",
+ "^VPABS(D|Q|W)Zrr$",
+ "^VPABS(D|Q)Zrrk(z?)$",
+ "^VPADD(U?)S(B|W)Zrr$",
+ "^VPAVG(B|W)Zrr$",
+ "^VPM(AX|IN)(SB|UD|UW)Zrr$",
+ "^VPM(AX|IN)(SD|UB)Zrr$",
+ "^VPM(AX|IN)(S|U)DZrrk(z?)$",
+ "^VPM(AX|IN)SWZrr$",
+ "^VPSH(L|R)D(D|Q|W)Zrri$",
+ "^VPSH(L|R)DV(D|Q|W)Zr$",
+ "^VPSH(L|R)DV(D|Q)Zrk(z?)$",
+ "^VPSUB(U?)SWZrr$")>;
+def : InstRW<[SPRWriteResGroup92], (instrs KANDNBrr,
+ VPSUBUSBZrr)>;
+
+def SPRWriteResGroup93 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup93], (instregex "^KMOV(B|D|Q|W)km$")>;
+
+def SPRWriteResGroup94 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 13;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup94], (instregex "^MOV8m(i|r)$")>;
+def : InstRW<[SPRWriteResGroup94], (instrs KMOVBmk,
+ MOV8mr_NOREX)>;
+
+def SPRWriteResGroup95 : SchedWriteRes<[SPRPort05]>;
+def : InstRW<[SPRWriteResGroup95], (instregex "^(V?)PALIGNRrri$",
+ "^VALIGN(D|Q)Z128rri((k|kz)?)$",
+ "^VBROADCASTSSZ128rr((k|kz)?)$",
+ "^VPALIGNR(Y|Z)rri$",
+ "^VPALIGNRZ(128|256)rri$",
+ "^VPBROADCAST(B|D|Q|W)rr$",
+ "^VPSHUF(D|HW|LW)Zri$",
+ "^VPSHUFDZrik(z?)$",
+ "^VPS(L|R)LDQZri$",
+ "^VPUNPCK(H|L)(BW|WD)Zrr$",
+ "^VPUNPCK(H|L|LQ)DQZrr((k|kz)?)$",
+ "^VPUNPCKHQDQZrr((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup95], (instrs KMOVQkr,
+ VPSHUFBZrr)>;
+
+def SPRWriteResGroup96 : SchedWriteRes<[SPRPort00]> {
+ let Latency = 3;
+}
+def : InstRW<[SPRWriteResGroup96], (instregex "^K((OR)?)TEST(B|D|Q|W)rr$",
+ "^VP(A|SU)BS(B|W)Zrrk(z?)$",
+ "^VPADD(U?)S(B|W)Zrrk(z?)$",
+ "^VPAVG(B|W)Zrrk(z?)$",
+ "^VPM(AX|IN)(SB|UW)Zrrk(z?)$",
+ "^VPM(AX|IN)(SW|UB)Zrrk(z?)$",
+ "^VPSH(L|R)DVWZrk(z?)$",
+ "^VPS(L|R)LVWZrrk(z?)$",
+ "^VPS(L|R)LWZrik(z?)$",
+ "^VPSRAVWZrrk(z?)$",
+ "^VPSRAWZrik(z?)$",
+ "^VPSUBUS(B|W)Zrrk(z?)$")>;
+def : InstRW<[SPRWriteResGroup96], (instrs VMOVSDto64Zrr)>;
+
+def SPRWriteResGroup97 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [8, 2, 14, 3, 1];
+ let Latency = 198;
+ let NumMicroOps = 81;
+}
+def : InstRW<[SPRWriteResGroup97], (instrs LAR16rm)>;
+
+def SPRWriteResGroup98 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 3, 1, 8, 5, 1, 2, 1];
+ let Latency = 66;
+ let NumMicroOps = 22;
+}
+def : InstRW<[SPRWriteResGroup98], (instrs LAR16rr)>;
+
+def SPRWriteResGroup99 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 2, 2, 9, 5, 3, 1];
+ let Latency = 71;
+ let NumMicroOps = 85;
+}
+def : InstRW<[SPRWriteResGroup99], (instrs LAR32rm)>;
+
+def SPRWriteResGroup100 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 3, 1, 8, 5, 1, 2, 1];
+ let Latency = 65;
+ let NumMicroOps = 22;
+}
+def : InstRW<[SPRWriteResGroup100], (instregex "^LAR(32|64)rr$")>;
+
+def SPRWriteResGroup101 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 2, 2, 9, 5, 3, 1];
+ let Latency = 71;
+ let NumMicroOps = 87;
+}
+def : InstRW<[SPRWriteResGroup101], (instrs LAR64rm)>;
+
+def SPRWriteResGroup102 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup102], (instrs LEA16r)>;
+
+def SPRWriteResGroup103 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
+ let ResourceCycles = [3, 1];
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup103], (instregex "^LODS(B|W)$",
+ "^SCAS(B|L|Q|W)$")>;
+def : InstRW<[SPRWriteResGroup103], (instrs LEAVE)>;
+
+def SPRWriteResGroup104 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup104], (instrs LEAVE64)>;
+
+def SPRWriteResGroup105 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [1, 2, 4, 3, 2, 1, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 14;
+}
+def : InstRW<[SPRWriteResGroup105], (instrs LGDT64m)>;
+
+def SPRWriteResGroup106 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [1, 1, 5, 3, 2, 1, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 14;
+}
+def : InstRW<[SPRWriteResGroup106], (instrs LIDT64m)>;
+
+def SPRWriteResGroup107 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [5, 3, 2, 1, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 12;
+}
+def : InstRW<[SPRWriteResGroup107], (instrs LLDT16m)>;
+
+def SPRWriteResGroup108 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [1, 4, 3, 1, 1, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 11;
+}
+def : InstRW<[SPRWriteResGroup108], (instrs LLDT16r)>;
+
+def SPRWriteResGroup109 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [1, 1, 2, 8, 3, 1, 2, 7, 2];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 27;
+}
+def : InstRW<[SPRWriteResGroup109], (instrs LMSW16m)>;
+
+def SPRWriteResGroup110 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [5, 7, 1, 2, 5, 2];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 22;
+}
+def : InstRW<[SPRWriteResGroup110], (instrs LMSW16r)>;
+
+def SPRWriteResGroup111 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup111], (instregex "^LODS(L|Q)$")>;
+
+def SPRWriteResGroup112 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
+ let ResourceCycles = [2, 4, 1];
+ let Latency = 3;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup112], (instrs LOOP)>;
+
+def SPRWriteResGroup113 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
+ let ResourceCycles = [4, 6, 1];
+ let Latency = 3;
+ let NumMicroOps = 11;
+}
+def : InstRW<[SPRWriteResGroup113], (instrs LOOPE)>;
+
+def SPRWriteResGroup114 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
+ let ResourceCycles = [4, 6, 1];
+ let Latency = 2;
+ let NumMicroOps = 11;
+}
+def : InstRW<[SPRWriteResGroup114], (instrs LOOPNE)>;
+
+def SPRWriteResGroup115 : SchedWriteRes<[SPRPort02_03, SPRPort02_03_11, SPRPort06]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup115], (instrs LRET64)>;
+
+def SPRWriteResGroup116 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 5, 3, 3, 1];
+ let Latency = 70;
+ let NumMicroOps = 13;
+}
+def : InstRW<[SPRWriteResGroup116], (instregex "^LSL(16|32|64)rm$")>;
+
+def SPRWriteResGroup117 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 4, 4, 3, 2, 1];
+ let Latency = 63;
+ let NumMicroOps = 15;
+}
+def : InstRW<[SPRWriteResGroup117], (instregex "^LSL(16|32|64)rr$")>;
+
+def SPRWriteResGroup118 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 24;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup118], (instregex "^MMX_CVT(T?)PD2PIrm$")>;
+
+def SPRWriteResGroup119 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup119], (instregex "^MMX_CVT(T?)PD2PIrr$",
+ "^VCVT(T?)PH2(U?)DQZ(128|256)rr$",
+ "^VCVTP(H2PS|S2PH)XZ256rr$")>;
+
+def SPRWriteResGroup120 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup120], (instregex "^VCVTP(H2PS|S2PH)XZ128rr$",
+ "^VPERMWZ(128|256)rrk(z?)$",
+ "^VPS(L|R)LWZ256rrk(z?)$",
+ "^VPSRAWZ256rrk(z?)$")>;
+def : InstRW<[SPRWriteResGroup120], (instrs MMX_CVTPI2PDrr)>;
+
+def SPRWriteResGroup121 : SchedWriteRes<[SPRPort00, SPRPort00_01]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup121], (instrs MMX_CVTPI2PSrr)>;
+
+def SPRWriteResGroup122 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 13;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup122], (instregex "^MMX_CVT(T?)PS2PIrm$")>;
+
+def SPRWriteResGroup123 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup123], (instregex "^MMX_CVT(T?)PS2PIrr$")>;
+
+def SPRWriteResGroup124 : SchedWriteRes<[SPRPort00, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 12;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup124], (instregex "^MMX_MASKMOVQ((64)?)$")>;
+
+def SPRWriteResGroup125 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 18;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup125], (instregex "^VMOV(W|SHZ)mr$")>;
+def : InstRW<[SPRWriteResGroup125], (instrs MMX_MOVD64mr)>;
+
+def SPRWriteResGroup126 : SchedWriteRes<[SPRPort02_03_11]> {
+ let Latency = 8;
+}
+def : InstRW<[SPRWriteResGroup126], (instregex "^MMX_MOV(D|Q)64rm$",
+ "^VBROADCAST(F|I)128$",
+ "^VBROADCAST(F|I)32X(2|4)Z256rm$",
+ "^VBROADCAST(F|I)32X(8|2Z)rm$",
+ "^VBROADCAST(F|I)(32|64)X4rm$",
+ "^VBROADCAST(F|I)64X2((Z128)?)rm$",
+ "^VBROADCASTS(DY|SZ)rm$",
+ "^VBROADCASTS(D|S)Z256rm$",
+ "^VBROADCASTS(DZ|SY)rm$",
+ "^VMOV(D|SH|SL)DUP(Y|Z)rm$",
+ "^VMOV(D|SH|SL)DUPZ256rm$",
+ "^VPBROADCAST(DY|QZ)rm$",
+ "^VPBROADCAST(D|Q)Z256rm$",
+ "^VPBROADCAST(DZ|QY)rm$")>;
+def : InstRW<[SPRWriteResGroup126], (instrs MMX_MOVD64to64rm)>;
+
+def SPRWriteResGroup127 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_05]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup127], (instregex "^MMX_MOV(DQ|FR64)2Qrr$")>;
+
+def SPRWriteResGroup128 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup128], (instregex "^MMX_MOVQ2(DQ|FR64)rr$")>;
+
+def SPRWriteResGroup129 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 12;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup129, ReadAfterVecLd], (instregex "^MMX_PACKSS(DW|WB)rm$")>;
+def : InstRW<[SPRWriteResGroup129, ReadAfterVecLd], (instrs MMX_PACKUSWBrm)>;
+
+def SPRWriteResGroup130 : SchedWriteRes<[SPRPort05]> {
+ let ResourceCycles = [2];
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup130], (instregex "^MMX_PACKSS(DW|WB)rr$",
+ "^VPMOV(D|Q|W|SQ|SW)BZrr$",
+ "^VPMOV((S|US)?)(D|Q)WZrr$",
+ "^VPMOV(U?)S(DB|QD)Zrr$",
+ "^VPMOV(U?)SQDZrrk(z?)$",
+ "^VPMOVUS(Q|W)BZrr$")>;
+def : InstRW<[SPRWriteResGroup130], (instrs MMX_PACKUSWBrr)>;
+def : InstRW<[SPRWriteResGroup130, ReadDefault, ReadInt2Fpu], (instrs MMX_PINSRWrr)>;
+
+def SPRWriteResGroup131 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup131], (instregex "^VBROADCAST(F|I)32X(8|2Z)rmk(z?)$",
+ "^VBROADCAST(F|I)(32|64)X4rmk(z?)$",
+ "^VBROADCAST(F|I)64X2rmk(z?)$",
+ "^VBROADCASTS(D|S)Zrmk(z?)$",
+ "^VMOV(A|U)P(D|S)Zrmk(z?)$",
+ "^VMOV(D|SH|SL)DUPZrmk(z?)$",
+ "^VMOVDQ(A|U)(32|64)Zrmk(z?)$",
+ "^VPBROADCAST(D|Q)Zrmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup131, ReadAfterVecLd], (instregex "^MMX_P(ADD|SUB)(B|D|Q|W)rm$")>;
+def : InstRW<[SPRWriteResGroup131, ReadAfterVecYLd], (instregex "^VINSERT(F|I)(32|64)x4Zrm((k|kz)?)$",
+ "^VINSERT(F|I)(32x8|64x2)Zrm((k|kz)?)$",
+ "^VP(ADD|SUB)(B|D|Q|W)Zrm$",
+ "^VP(ADD|SUB)(D|Q)Zrm(b|k|kz)$",
+ "^VP(ADD|SUB)(D|Q)Zrmbk(z?)$",
+ "^VPTERNLOG(D|Q)Zrm(bi|ik)$",
+ "^VPTERNLOG(D|Q)Zrmbik(z?)$",
+ "^VPTERNLOG(D|Q)Zrmi((kz)?)$")>;
+
+def SPRWriteResGroup132 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 11;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup132, ReadAfterVecLd], (instregex "^MMX_PH(ADD|SUB)SWrm$")>;
+
+def SPRWriteResGroup133 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup133], (instregex "^MMX_PH(ADD|SUB)SWrr$")>;
+
+def SPRWriteResGroup134 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup134], (instregex "^VPBROADCAST(BY|WZ)rm$",
+ "^VPBROADCAST(B|W)Z256rm$",
+ "^VPBROADCAST(BZ|WY)rm$")>;
+def : InstRW<[SPRWriteResGroup134, ReadAfterLd], (instrs MMX_PINSRWrm)>;
+def : InstRW<[SPRWriteResGroup134, ReadAfterVecXLd], (instregex "^VFPCLASSP(D|S)Z128rm$")>;
+def : InstRW<[SPRWriteResGroup134, ReadAfterVecLd], (instregex "^VFPCLASSS(D|H|S)Zrm$")>;
+def : InstRW<[SPRWriteResGroup134, ReadAfterVecYLd], (instregex "^VPALIGNR(Y|Z256)rmi$")>;
+def : InstRW<[SPRWriteResGroup134, ReadAfterVecYLd], (instrs VPSHUFBZrm)>;
+
+def SPRWriteResGroup135 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup135], (instregex "^MOV16ao(16|32|64)$")>;
+
+def SPRWriteResGroup136 : SchedWriteRes<[SPRPort01, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup136], (instregex "^PUSH(F|G)S(16|32)$")>;
+def : InstRW<[SPRWriteResGroup136], (instrs MOV16ms,
+ MOVBE32mr)>;
+
+def SPRWriteResGroup137 : SchedWriteRes<[SPRPort00_01_05_06_10]>;
+def : InstRW<[SPRWriteResGroup137], (instregex "^MOV(8|16|32|64)ri$",
+ "^MOV(8|16|32)ri_alt$",
+ "^MOV(8|16)rr((_REV)?)$")>;
+def : InstRW<[SPRWriteResGroup137], (instrs MOV64ri32,
+ MOV8rr_NOREX)>;
+
+def SPRWriteResGroup138 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup138], (instregex "^MOV(16|32|64)rs$",
+ "^S(TR|LDT)16r$")>;
+
+def SPRWriteResGroup139 : SchedWriteRes<[SPRPort02_03_11]>;
+def : InstRW<[SPRWriteResGroup139], (instregex "^MOV32ao(16|32|64)$")>;
+def : InstRW<[SPRWriteResGroup139], (instrs MOV64ao64)>;
+
+def SPRWriteResGroup140 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup140], (instregex "^MOV(8|32)o(16|32)a$",
+ "^MOV(8|32|64)o64a$")>;
+
+def SPRWriteResGroup141 : SchedWriteRes<[SPRPort00_01_05_06_10]> {
+ let Latency = 0;
+}
+def : InstRW<[SPRWriteResGroup141], (instregex "^MOV32rr((_REV)?)$",
+ "^MOVZX(32|64)rr8$")>;
+def : InstRW<[SPRWriteResGroup141], (instrs MOVZX32rr8_NOREX)>;
+
+def SPRWriteResGroup142 : SchedWriteRes<[SPRPort02_03_11]> {
+ let Latency = 5;
+}
+def : InstRW<[SPRWriteResGroup142], (instrs MOV64ao32)>;
+
+def SPRWriteResGroup143 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [1, 2, 4, 16, 7, 2, 2, 12, 2];
+ let Latency = 217;
+ let NumMicroOps = 48;
+}
+def : InstRW<[SPRWriteResGroup143], (instrs MOV64dr)>;
+
+def SPRWriteResGroup144 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 12;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup144], (instrs MOV64o32a)>;
+
+def SPRWriteResGroup145 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort05]> {
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup145], (instrs MOV64rc)>;
+
+def SPRWriteResGroup146 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort05]> {
+ let ResourceCycles = [3, 4, 8, 4, 2, 3];
+ let Latency = 181;
+ let NumMicroOps = 24;
+}
+def : InstRW<[SPRWriteResGroup146], (instrs MOV64rd)>;
+
+def SPRWriteResGroup147 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup147], (instregex "^MOV8ao(16|32|64)$")>;
+
+def SPRWriteResGroup148 : SchedWriteRes<[SPRPort00_06, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup148], (instrs MOVBE16mr)>;
+
+def SPRWriteResGroup149 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort02_03_11]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup149], (instrs MOVBE16rm)>;
+
+def SPRWriteResGroup150 : SchedWriteRes<[SPRPort01, SPRPort02_03_11]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup150], (instrs MOVBE32rm)>;
+
+def SPRWriteResGroup151 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 12;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup151], (instrs MOVBE64mr,
+ PUSHF16,
+ SLDT16m,
+ STRm)>;
+
+def SPRWriteResGroup152 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_11]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup152], (instrs MOVBE64rm)>;
+
+def SPRWriteResGroup153 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup153], (instregex "^MOVDIR64B(16|32|64)$")>;
+
+def SPRWriteResGroup154 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 511;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup154], (instrs MOVDIRI32)>;
+
+def SPRWriteResGroup155 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 514;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup155], (instrs MOVDIRI64)>;
+
+def SPRWriteResGroup156 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_11]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup156, ReadAfterVecXLd], (instregex "^(V?)MOVLP(D|S)rm$",
+ "^(V?)SHUFP(D|S)rmi$",
+ "^VMOVLP(D|S)Z128rm$",
+ "^VSHUFP(D|S)Z128rm(bi|ik)$",
+ "^VSHUFP(D|S)Z128rmbik(z?)$",
+ "^VSHUFP(D|S)Z128rmi((kz)?)$")>;
+
+def SPRWriteResGroup157 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 512;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup157], (instrs MOVNTDQmr)>;
+
+def SPRWriteResGroup158 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 518;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup158], (instrs MOVNTImr)>;
+
+def SPRWriteResGroup159 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [4, 1, 1, 1];
+ let Latency = 8;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup159], (instrs MOVSB)>;
+
+def SPRWriteResGroup160 : SchedWriteRes<[SPRPort00_01_05]>;
+def : InstRW<[SPRWriteResGroup160], (instregex "^(V?)MOVS(D|S)rr((_REV)?)$",
+ "^(V?)P(ADD|SUB)(B|D|Q|W)rr$",
+ "^VMOV(A|U)P(D|S)Z(128|256)rrk(z?)((_REV)?)$",
+ "^VMOVDQ(A|U)(32|64)Z128rrk(z?)((_REV)?)$",
+ "^VMOVS(D|H|S)Zrr((_REV)?)$",
+ "^VMOVS(D|S)Zrrk(z?)((_REV)?)$",
+ "^VP(ADD|SUB)(B|D|Q|W)Yrr$",
+ "^VP(ADD|SUB)(B|D|Q|W)Z(128|256)rr$",
+ "^VP(ADD|SUB)(D|Q)Z(128|256)rrk(z?)$",
+ "^VPMOVM2(D|Q)Z128rr$",
+ "^VPTERNLOG(D|Q)Z(128|256)rri((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup160], (instrs VPBLENDDrri)>;
+
+def SPRWriteResGroup161 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [4, 1, 1, 1];
+ let Latency = 7;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup161], (instregex "^MOVS(L|Q|W)$")>;
+
+def SPRWriteResGroup162 : SchedWriteRes<[SPRPort02_03_11]> {
+ let Latency = 6;
+}
+def : InstRW<[SPRWriteResGroup162], (instregex "^MOVSX(16|32|64)rm(16|32)$",
+ "^MOVSX(32|64)rm8$")>;
+def : InstRW<[SPRWriteResGroup162], (instrs MOVSX32rm8_NOREX)>;
+
+def SPRWriteResGroup163 : SchedWriteRes<[SPRPort01_05_10, SPRPort02_03_11]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup163], (instrs MOVSX16rm8)>;
+
+def SPRWriteResGroup164 : SchedWriteRes<[SPRPort01_05_10]>;
+def : InstRW<[SPRWriteResGroup164], (instregex "^MOVSX(16|32|64)rr(8|16|32)$")>;
+def : InstRW<[SPRWriteResGroup164], (instrs MOVSX32rr8_NOREX)>;
+
+def SPRWriteResGroup165 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup165], (instregex "^MUL_F(32|64)m$",
+ "^VPABS(B|W)Zrmk(z?)$",
+ "^VPS(L|R)LWZmik(z?)$",
+ "^VPSRAWZmik(z?)$")>;
+def : InstRW<[SPRWriteResGroup165, ReadAfterVecYLd], (instregex "^VP(ADD|SUB)(U?)S(B|W)Zrmk(z?)$",
+ "^VPAVG(B|W)Zrmk(z?)$",
+ "^VPM(AX|IN)(SB|UW)Zrmk(z?)$",
+ "^VPM(AX|IN)(SW|UB)Zrmk(z?)$",
+ "^VPSH(L|R)DVWZmk(z?)$",
+ "^VPS(L|R)L(V?)WZrmk(z?)$",
+ "^VPSRA(V?)WZrmk(z?)$")>;
+
+def SPRWriteResGroup166 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 14;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup166], (instregex "^MUL_FI(16|32)m$")>;
+
+def SPRWriteResGroup167 : SchedWriteRes<[SPRPort00]> {
+ let Latency = 4;
+}
+def : InstRW<[SPRWriteResGroup167], (instregex "^MUL_F(P?)rST0$",
+ "^V(U?)COMISHZrr(b?)$",
+ "^V(U?)COMISHZrr_Int$",
+ "^VCVT(T?)PD2(U?)QQZrr((b|k|bk|kz)?)$",
+ "^VCVT(T?)PD2(U?)QQZrrbkz$",
+ "^VCVT(T?)PS2(U?)DQZrr((b|k|bk|kz)?)$",
+ "^VCVT(T?)PS2(U?)DQZrrbkz$",
+ "^VM(AX|IN)(C?)PSZrr((k|kz)?)$",
+ "^VM(AX|IN)PSZrrb((k|kz)?)$",
+ "^VPLZCNT(D|Q)Zrr((k|kz)?)$",
+ "^VPMADD52(H|L)UQZr((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup167], (instrs MUL_FST0r)>;
+
+def SPRWriteResGroup168 : SchedWriteRes<[SPRPort00_01_05_06, SPRPort05, SPRPort06]> {
+ let ResourceCycles = [7, 1, 2];
+ let Latency = 20;
+ let NumMicroOps = 10;
+}
+def : InstRW<[SPRWriteResGroup168], (instrs MWAITrr)>;
+
+def SPRWriteResGroup169 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [6, 4, 1, 28, 15, 7, 1, 16, 1];
+ let Latency = 35;
+ let NumMicroOps = 79;
+}
+def : InstRW<[SPRWriteResGroup169], (instrs OUT16ir)>;
+
+def SPRWriteResGroup170 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [6, 6, 27, 15, 7, 1, 16, 1];
+ let Latency = 35;
+ let NumMicroOps = 79;
+}
+def : InstRW<[SPRWriteResGroup170], (instrs OUT16rr)>;
+
+def SPRWriteResGroup171 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [6, 4, 1, 30, 15, 9, 1, 18, 1];
+ let Latency = 35;
+ let NumMicroOps = 85;
+}
+def : InstRW<[SPRWriteResGroup171], (instrs OUT32ir)>;
+
+def SPRWriteResGroup172 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [6, 6, 29, 15, 9, 1, 18, 1];
+ let Latency = 35;
+ let NumMicroOps = 85;
+}
+def : InstRW<[SPRWriteResGroup172], (instrs OUT32rr)>;
+
+def SPRWriteResGroup173 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [5, 5, 1, 25, 15, 5, 1, 15, 1];
+ let Latency = 35;
+ let NumMicroOps = 73;
+}
+def : InstRW<[SPRWriteResGroup173], (instrs OUT8ir)>;
+
+def SPRWriteResGroup174 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [5, 5, 26, 15, 5, 1, 15, 1];
+ let Latency = 35;
+ let NumMicroOps = 73;
+}
+def : InstRW<[SPRWriteResGroup174], (instrs OUT8rr)>;
+
+def SPRWriteResGroup175 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [7, 6, 25, 16, 7, 1, 17, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 80;
+}
+def : InstRW<[SPRWriteResGroup175], (instrs OUTSB)>;
+
+def SPRWriteResGroup176 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [7, 6, 28, 16, 10, 1, 20, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 89;
+}
+def : InstRW<[SPRWriteResGroup176], (instrs OUTSL)>;
+
+def SPRWriteResGroup177 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [6, 1, 5, 27, 16, 8, 1, 18, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 83;
+}
+def : InstRW<[SPRWriteResGroup177], (instrs OUTSW)>;
+
+def SPRWriteResGroup178 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup178], (instregex "^VBROADCASTI32X2Z128rmk(z?)$",
+ "^VBROADCASTSSZ128rmk(z?)$",
+ "^VMOV(A|U)P(D|S)Z128rmk(z?)$",
+ "^VMOV(D|SH|SL)DUPZ128rmk(z?)$",
+ "^VMOVDQ(A|U)(32|64)Z128rmk(z?)$",
+ "^VMOVS(D|S)Zrmk(z?)$",
+ "^VPBROADCAST(D|Q)Z128rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup178, ReadAfterVecXLd], (instregex "^(V?)P(ADD|SUB)(B|D|Q|W)rm$",
+ "^VP(ADD|SUB)(B|D|Q|W)Z128rm$",
+ "^VP(ADD|SUB)(D|Q)Z128rm(b|k|kz)$",
+ "^VP(ADD|SUB)(D|Q)Z128rmbk(z?)$",
+ "^VPTERNLOG(D|Q)Z128rm(bi|ik)$",
+ "^VPTERNLOG(D|Q)Z128rmbik(z?)$",
+ "^VPTERNLOG(D|Q)Z128rmi((kz)?)$")>;
+def : InstRW<[SPRWriteResGroup178, ReadAfterVecXLd], (instrs VPBLENDDrmi)>;
+
+def SPRWriteResGroup179 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup179], (instregex "^VPBROADCAST(B|W)((Z128)?)rm$")>;
+def : InstRW<[SPRWriteResGroup179, ReadAfterVecXLd], (instregex "^(V?)PALIGNRrmi$",
+ "^VALIGN(D|Q)Z128rm(bi|ik)$",
+ "^VALIGN(D|Q)Z128rmbik(z?)$",
+ "^VALIGN(D|Q)Z128rmi((kz)?)$")>;
+def : InstRW<[SPRWriteResGroup179, ReadAfterVecXLd], (instrs VPALIGNRZ128rmi)>;
+
+def SPRWriteResGroup180 : SchedWriteRes<[SPRPort00_06, SPRPort05]> {
+ let Latency = 140;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup180], (instrs PAUSE)>;
+
+def SPRWriteResGroup181 : SchedWriteRes<[SPRPort01, SPRPort02_03_11]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup181, ReadAfterLd], (instregex "^P(DEP|EXT)(32|64)rm$")>;
+
+def SPRWriteResGroup182 : SchedWriteRes<[SPRPort01_05, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup182], (instregex "^(V?)PEXTR(D|Q)mr$",
+ "^VPEXTR(D|Q)Zmr$",
+ "^VPMOVQDZ128mr(k?)$")>;
+
+def SPRWriteResGroup183 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [1, 2, 1];
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup183, ReadAfterVecXLd], (instregex "^(V?)PH(ADD|SUB)SWrm$")>;
+
+def SPRWriteResGroup184 : SchedWriteRes<[SPRPort00_01, SPRPort01_05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup184], (instregex "^(V?)PH(ADD|SUB)SWrr$",
+ "^VPH(ADD|SUB)SWYrr$")>;
+
+def SPRWriteResGroup185 : SchedWriteRes<[SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup185], (instregex "^POP(16|32|64)rmm$",
+ "^PUSH(16|32)rmm$")>;
+
+def SPRWriteResGroup186 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort02_03_11]> {
+ let ResourceCycles = [6, 2, 1, 1];
+ let Latency = 5;
+ let NumMicroOps = 10;
+}
+def : InstRW<[SPRWriteResGroup186], (instrs POPF16)>;
+
+def SPRWriteResGroup187 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 5;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup187], (instrs POPF64)>;
+
+def SPRWriteResGroup188 : SchedWriteRes<[SPRPort02_03_11]> {
+ let Latency = 0;
+}
+def : InstRW<[SPRWriteResGroup188], (instregex "^PREFETCHT(0|1|2)$")>;
+def : InstRW<[SPRWriteResGroup188], (instrs PREFETCHNTA)>;
+
+def SPRWriteResGroup189 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11, SPRPort06]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup189], (instregex "^PTWRITE((64)?)m$")>;
+
+def SPRWriteResGroup190 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort06]> {
+ let ResourceCycles = [1, 2];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup190], (instrs PTWRITE64r)>;
+
+def SPRWriteResGroup191 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort06]> {
+ let ResourceCycles = [2, 2];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup191], (instrs PTWRITEr)>;
+
+def SPRWriteResGroup192 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup192], (instregex "^PUSH64r((mr)?)$")>;
+
+def SPRWriteResGroup193 : SchedWriteRes<[SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup193], (instrs PUSH64rmm)>;
+
+def SPRWriteResGroup194 : SchedWriteRes<[SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 4;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup194], (instrs PUSHF64)>;
+
+def SPRWriteResGroup195 : SchedWriteRes<[SPRPort01, SPRPort04_09, SPRPort07_08]> {
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup195], (instregex "^PUSH(F|G)S64$")>;
+
+def SPRWriteResGroup196 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
+ let ResourceCycles = [2, 3, 2];
+ let Latency = 8;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup196], (instregex "^RC(L|R)(16|32|64)rCL$")>;
+
+def SPRWriteResGroup197 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 13;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup197, WriteRMW], (instregex "^RC(L|R)8m(1|i)$")>;
+
+def SPRWriteResGroup198 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
+ let ResourceCycles = [1, 5, 2];
+ let Latency = 20;
+ let NumMicroOps = 8;
+}
+def : InstRW<[SPRWriteResGroup198, WriteRMW], (instrs RCL8mCL)>;
+
+def SPRWriteResGroup199 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
+ let ResourceCycles = [2, 5, 2];
+ let Latency = 7;
+ let NumMicroOps = 9;
+}
+def : InstRW<[SPRWriteResGroup199], (instrs RCL8rCL)>;
+
+def SPRWriteResGroup200 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
+ let ResourceCycles = [2, 4, 3];
+ let Latency = 20;
+ let NumMicroOps = 9;
+}
+def : InstRW<[SPRWriteResGroup200, WriteRMW], (instrs RCR8mCL)>;
+
+def SPRWriteResGroup201 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
+ let ResourceCycles = [3, 4, 3];
+ let Latency = 9;
+ let NumMicroOps = 10;
+}
+def : InstRW<[SPRWriteResGroup201], (instrs RCR8rCL)>;
+
+def SPRWriteResGroup202 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort01_05_10, SPRPort05]> {
+ let ResourceCycles = [1, 6, 1, 10, 20, 8, 5, 1, 2];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 54;
+}
+def : InstRW<[SPRWriteResGroup202], (instrs RDMSR)>;
+
+def SPRWriteResGroup203 : SchedWriteRes<[SPRPort01]> {
+ let Latency = SapphireRapidsModel.MaxLatency;
+}
+def : InstRW<[SPRWriteResGroup203], (instrs RDPID64)>;
+
+def SPRWriteResGroup204 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup204], (instrs RDPKRUr)>;
+
+def SPRWriteResGroup205 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort05]> {
+ let ResourceCycles = [9, 6, 2, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 18;
+}
+def : InstRW<[SPRWriteResGroup205], (instrs RDPMC)>;
+
+def SPRWriteResGroup206 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 3, 2, 5, 7, 3, 1, 2];
+ let Latency = 1386;
+ let NumMicroOps = 25;
+}
+def : InstRW<[SPRWriteResGroup206], (instrs RDRAND16r)>;
+
+def SPRWriteResGroup207 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 3, 2, 5, 7, 3, 1, 2];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 25;
+}
+def : InstRW<[SPRWriteResGroup207], (instregex "^RDRAND(32|64)r$")>;
+
+def SPRWriteResGroup208 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 3, 3, 5, 7, 1, 4];
+ let Latency = 1381;
+ let NumMicroOps = 25;
+}
+def : InstRW<[SPRWriteResGroup208], (instrs RDSEED16r)>;
+
+def SPRWriteResGroup209 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 3, 3, 5, 7, 1, 4];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 25;
+}
+def : InstRW<[SPRWriteResGroup209], (instregex "^RDSEED(32|64)r$")>;
+
+def SPRWriteResGroup210 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort05]> {
+ let ResourceCycles = [5, 6, 3, 1];
+ let Latency = 18;
+ let NumMicroOps = 15;
+}
+def : InstRW<[SPRWriteResGroup210], (instrs RDTSC)>;
+
+def SPRWriteResGroup211 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort05]> {
+ let ResourceCycles = [2, 2, 1, 2, 7, 4, 3];
+ let Latency = 42;
+ let NumMicroOps = 21;
+}
+def : InstRW<[SPRWriteResGroup211], (instrs RDTSCP)>;
+
+def SPRWriteResGroup212 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup212], (instrs RET64)>;
+
+def SPRWriteResGroup213 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup213], (instregex "^RETI(16|32|64)$")>;
+
+def SPRWriteResGroup214 : SchedWriteRes<[]>;
+def : InstRW<[SPRWriteResGroup214], (instrs REX64_PREFIX)>;
+
+def SPRWriteResGroup215 : SchedWriteRes<[SPRPort00_06]> {
+ let ResourceCycles = [2];
+ let Latency = 12;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup215, WriteRMW], (instregex "^RO(L|R)(16|32|64)m(1|i|CL)$")>;
+
+def SPRWriteResGroup216 : SchedWriteRes<[SPRPort00_06]> {
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup216], (instregex "^RO(L|R)(8|16|32|64)r(1|i)$")>;
+
+def SPRWriteResGroup217 : SchedWriteRes<[SPRPort00_06]> {
+ let ResourceCycles = [2];
+ let Latency = 13;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup217, WriteRMW], (instregex "^RO(L|R)8m(1|i)$",
+ "^(RO|SH)L8mCL$",
+ "^(RO|SA|SH)R8mCL$")>;
+
+def SPRWriteResGroup218 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 15;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup218], (instregex "^(V?)ROUNDP(D|S)m$")>;
+def : InstRW<[SPRWriteResGroup218, ReadAfterVecXLd], (instregex "^(V?)ROUNDS(D|S)m((_Int)?)$",
+ "^VRNDSCALEP(D|S)Z128rm(bi|ik)$",
+ "^VRNDSCALEP(D|S)Z128rmbik(z?)$",
+ "^VRNDSCALEP(D|S)Z128rmi((kz)?)$",
+ "^VRNDSCALES(D|S)Zm$",
+ "^VRNDSCALES(D|S)Zm_Int((k|kz)?)$")>;
+
+def SPRWriteResGroup219 : SchedWriteRes<[SPRPort00_01]> {
+ let ResourceCycles = [2];
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup219], (instregex "^(V?)ROUND(PD|SS)r$",
+ "^(V?)ROUND(PS|SD)r$",
+ "^(V?)ROUNDS(D|S)r_Int$",
+ "^VRNDSCALEP(D|S)Z(128|256)rri((k|kz)?)$",
+ "^VRNDSCALES(D|S)Zr$",
+ "^VRNDSCALES(D|S)Zr(b?)_Int((k|kz)?)$",
+ "^VROUNDP(D|S)Yr$")>;
+
+def SPRWriteResGroup220 : SchedWriteRes<[SPRPort00_06]> {
+ let ResourceCycles = [2];
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup220], (instrs SAHF)>;
+
+def SPRWriteResGroup221 : SchedWriteRes<[SPRPort00_06]> {
+ let Latency = 13;
+}
+def : InstRW<[SPRWriteResGroup221, WriteRMW], (instregex "^S(A|H)R8m(1|i)$",
+ "^SHL8m(1|i)$")>;
+
+def SPRWriteResGroup222 : SchedWriteRes<[SPRPort00_06, SPRPort02_03_11]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup222, ReadAfterLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^S(A|H)RX(32|64)rm$",
+ "^SHLX(32|64)rm$")>;
+
+def SPRWriteResGroup223 : SchedWriteRes<[SPRPort00_06]> {
+ let Latency = 3;
+}
+def : InstRW<[SPRWriteResGroup223], (instregex "^S(A|H)RX(32|64)rr$",
+ "^SHLX(32|64)rr$")>;
+
+def SPRWriteResGroup224 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [2, 2, 1, 1, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup224], (instrs SERIALIZE)>;
+
+def SPRWriteResGroup225 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup225], (instrs SFENCE)>;
+
+def SPRWriteResGroup226 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [1, 2, 2, 2];
+ let Latency = 21;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup226], (instregex "^S(G|I)DT64m$")>;
+
+def SPRWriteResGroup227 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup227, ReadAfterVecXLd], (instrs SHA1MSG1rm)>;
+
+def SPRWriteResGroup228 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup228], (instrs SHA1MSG1rr)>;
+
+def SPRWriteResGroup229 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 2, 1, 2, 1];
+ let Latency = 13;
+ let NumMicroOps = 8;
+}
+def : InstRW<[SPRWriteResGroup229, ReadAfterVecXLd], (instrs SHA1MSG2rm)>;
+
+def SPRWriteResGroup230 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort01_05]> {
+ let ResourceCycles = [2, 2, 1, 2];
+ let Latency = 6;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup230], (instrs SHA1MSG2rr)>;
+
+def SPRWriteResGroup231 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup231, ReadAfterVecXLd], (instrs SHA1NEXTErm)>;
+
+def SPRWriteResGroup232 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup232], (instrs SHA1NEXTErr)>;
+
+def SPRWriteResGroup233 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
+ let Latency = 13;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup233], (instregex "^VPMOV(S|Z)XBWZ((256)?)rmk(z?)$",
+ "^VPOPCNT(B|W)Z(128|256)rmk(z?)$",
+ "^VPOPCNT(B|W)Zrmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup233, ReadAfterVecXLd], (instregex "^VDBPSADBWZ128rmik(z?)$",
+ "^VPACK(S|U)SDWZ128rm(bk|kz)$",
+ "^VPACK(S|U)SDWZ128rmbkz$",
+ "^VPACK(S|U)S(DW|WB)Z128rmk$",
+ "^VPACK(S|U)SWBZ128rmkz$",
+ "^VPMULTISHIFTQBZ128rm(bk|kz)$",
+ "^VPMULTISHIFTQBZ128rm(k|bkz)$")>;
+def : InstRW<[SPRWriteResGroup233, ReadAfterVecXLd], (instrs SHA1RNDS4rmi,
+ SHA256RNDS2rm)>;
+def : InstRW<[SPRWriteResGroup233, ReadAfterVecYLd], (instregex "^VDBPSADBWZ((256)?)rmik(z?)$",
+ "^VPACK(S|U)SDWZ((256)?)rm(bk|kz)$",
+ "^VPACK(S|U)SDWZ((256)?)rmbkz$",
+ "^VPACK(S|U)S(DW|WB)Z((256)?)rmk$",
+ "^VPACK(S|U)SWBZ((256)?)rmkz$",
+ "^VPERMBZ(128|256)rmk(z?)$",
+ "^VPERMBZrmk(z?)$",
+ "^VPMULTISHIFTQBZ((256)?)rm(bk|kz)$",
+ "^VPMULTISHIFTQBZ((256)?)rm(k|bkz)$")>;
+
+def SPRWriteResGroup234 : SchedWriteRes<[SPRPort05]> {
+ let Latency = 6;
+}
+def : InstRW<[SPRWriteResGroup234], (instrs SHA1RNDS4rri,
+ SHA256RNDS2rr)>;
+
+def SPRWriteResGroup235 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [3, 2, 1, 1, 1];
+ let Latency = 12;
+ let NumMicroOps = 8;
+}
+def : InstRW<[SPRWriteResGroup235, ReadAfterVecXLd], (instrs SHA256MSG1rm)>;
+
+def SPRWriteResGroup236 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort05]> {
+ let ResourceCycles = [3, 2, 1, 1];
+ let Latency = 5;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup236], (instrs SHA256MSG1rr)>;
+
+def SPRWriteResGroup237 : SchedWriteRes<[SPRPort05]> {
+ let ResourceCycles = [2];
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup237], (instregex "^VPMOV(D|Q|W|SQ|SW)BZrrk(z?)$",
+ "^VPMOV((S|US)?)(D|Q)WZrrk(z?)$",
+ "^VPMOV(U?)SDBZrrk(z?)$",
+ "^VPMOVUS(Q|W)BZrrk(z?)$")>;
+def : InstRW<[SPRWriteResGroup237], (instrs SHA256MSG2rr)>;
+
+def SPRWriteResGroup238 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 13;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup238], (instrs SHRD16mri8)>;
+
+def SPRWriteResGroup239 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup239], (instregex "^SLDT(32|64)r$")>;
+
+def SPRWriteResGroup240 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort05]> {
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup240], (instrs SMSW16r)>;
+
+def SPRWriteResGroup241 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort05]> {
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup241], (instregex "^SMSW(32|64)r$")>;
+
+def SPRWriteResGroup242 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 24;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup242, ReadAfterVecLd], (instregex "^(V?)SQRTSDm_Int$")>;
+def : InstRW<[SPRWriteResGroup242, ReadAfterVecLd], (instrs VSQRTSDZm_Int)>;
+
+def SPRWriteResGroup243 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup243], (instrs STD)>;
+
+def SPRWriteResGroup244 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01]> {
+ let ResourceCycles = [1, 4, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup244], (instrs STI)>;
+
+def SPRWriteResGroup245 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup245], (instrs STOSB)>;
+
+def SPRWriteResGroup246 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 7;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup246], (instregex "^STOS(L|Q|W)$")>;
+
+def SPRWriteResGroup247 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort01]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup247], (instregex "^STR(32|64)r$")>;
+
+def SPRWriteResGroup248 : SchedWriteRes<[SPRPort00]> {
+ let Latency = 2;
+}
+def : InstRW<[SPRWriteResGroup248], (instregex "^(TST|XAM)_F$")>;
+def : InstRW<[SPRWriteResGroup248], (instrs UCOM_FPPr)>;
+
+def SPRWriteResGroup249 : SchedWriteRes<[SPRPort01_05]> {
+ let Latency = 4;
+}
+def : InstRW<[SPRWriteResGroup249], (instregex "^V(ADD|SUB)P(D|S)Z(128|256)rrkz$",
+ "^V(ADD|SUB)S(D|S)Zrr(b?)_Intkz$")>;
+
+def SPRWriteResGroup250 : SchedWriteRes<[SPRPort00_05]> {
+ let Latency = 3;
+}
+def : InstRW<[SPRWriteResGroup250], (instregex "^V(ADD|SUB)P(D|S)Zrr(b?)$",
+ "^VMOVDQU(8|16)Zrrk(z?)((_REV)?)$",
+ "^VP(ADD|SUB)(B|W)Zrrk(z?)$",
+ "^VPBLENDM(B|W)Zrrk(z?)$",
+ "^VPMOVM2(B|W)Zrr$")>;
+
+def SPRWriteResGroup251 : SchedWriteRes<[SPRPort00_01]> {
+ let Latency = 6;
+}
+def : InstRW<[SPRWriteResGroup251], (instregex "^V(ADD|SUB)PHZ(128|256)rrk(z?)$",
+ "^V(ADD|SUB)SHZrr(b?)_Intk(z?)$",
+ "^VCVT(T?)PH2(U?)WZ(128|256)rrk(z?)$",
+ "^VCVT(U?)W2PHZ(128|256)rrk(z?)$",
+ "^VF(N?)M(ADD|SUB)(132|213|231)PHZ(128|256)rk(z?)$",
+ "^VF(N?)M(ADD|SUB)(132|213|231)SHZr(b?)_Intk(z?)$",
+ "^VFMADDSUB(132|213|231)PHZ(128|256)rk(z?)$",
+ "^VFMSUBADD(132|213|231)PHZ(128|256)rk(z?)$",
+ "^VGETEXPPHZ(128|256)rk(z?)$",
+ "^VGETEXPSHZr(bk|kz)$",
+ "^VGETEXPSHZr(k|bkz)$",
+ "^VGETMANTPHZ(128|256)rrik(z?)$",
+ "^VGETMANTSHZrri(bk|kz)$",
+ "^VGETMANTSHZrri(k|bkz)$",
+ "^VM(AX|IN)CPHZ(128|256)rrk(z?)$",
+ "^VM(AX|IN|UL)PHZ(128|256)rrk(z?)$",
+ "^VM(AX|IN|UL)SHZrr(b?)_Intk(z?)$")>;
+
+def SPRWriteResGroup252 : SchedWriteRes<[SPRPort00]> {
+ let Latency = 5;
+}
+def : InstRW<[SPRWriteResGroup252], (instregex "^V(ADD|SUB)PHZrr(b?)$",
+ "^VAES(DE|EN)C((LAST)?)Zrr$",
+ "^VCVT(T?)PH2(U?)WZrr(b?)$",
+ "^VCVT(U?)W2PHZrr(b?)$",
+ "^VF(N?)M(ADD|SUB)(132|213|231)PHZr(b?)$",
+ "^VFMADDSUB(132|213|231)PHZr(b?)$",
+ "^VFMSUBADD(132|213|231)PHZr(b?)$",
+ "^VGETEXPPHZr(b?)$",
+ "^VGETMANTPHZrri(b?)$",
+ "^VM(AX|IN)CPHZrr$",
+ "^VM(AX|IN|UL)PHZrr(b?)$",
+ "^VMOVMSKP(D|S)Yrr$")>;
+def : InstRW<[SPRWriteResGroup252], (instrs VGF2P8MULBZrr)>;
+
+def SPRWriteResGroup253 : SchedWriteRes<[SPRPort00]> {
+ let Latency = 6;
+}
+def : InstRW<[SPRWriteResGroup253], (instregex "^V(ADD|SUB)PHZrr(bk|kz)$",
+ "^V(ADD|SUB)PHZrr(k|bkz)$",
+ "^VCVT(T?)PH2(U?)WZrr(bk|kz)$",
+ "^VCVT(T?)PH2(U?)WZrr(k|bkz)$",
+ "^VCVT(U?)W2PHZrr(bk|kz)$",
+ "^VCVT(U?)W2PHZrr(k|bkz)$",
+ "^VF(N?)M(ADD|SUB)(132|213|231)PHZr(bk|kz)$",
+ "^VF(N?)M(ADD|SUB)(132|213|231)PHZr(k|bkz)$",
+ "^VFMADDSUB(132|213|231)PHZr(bk|kz)$",
+ "^VFMADDSUB(132|213|231)PHZr(k|bkz)$",
+ "^VFMSUBADD(132|213|231)PHZr(bk|kz)$",
+ "^VFMSUBADD(132|213|231)PHZr(k|bkz)$",
+ "^VGETEXPPHZr(bk|kz)$",
+ "^VGETEXPPHZr(k|bkz)$",
+ "^VGETMANTPHZrri(bk|kz)$",
+ "^VGETMANTPHZrri(k|bkz)$",
+ "^VM(AX|IN)CPHZrrk(z?)$",
+ "^VM(AX|IN|UL)PHZrr(bk|kz)$",
+ "^VM(AX|IN|UL)PHZrr(k|bkz)$")>;
+
+def SPRWriteResGroup254 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_11]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup254], (instregex "^VPMOV(S|Z)XBWZ128rmk(z?)$",
+ "^VPSHUF(H|L)WZ(128|256)mik(z?)$")>;
+def : InstRW<[SPRWriteResGroup254, ReadAfterVecYLd], (instregex "^V(ADD|SUB)PSYrm$",
+ "^V(ADD|SUB)PSZ256rm((b|k|bk|kz)?)$",
+ "^V(ADD|SUB)PSZ256rmbkz$",
+ "^VPSHUFBZ256rmk(z?)$",
+ "^VPUNPCK(H|L)(BW|WD)Z256rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup254, ReadAfterVecYLd], (instrs VADDSUBPSYrm)>;
+def : InstRW<[SPRWriteResGroup254, ReadAfterVecXLd], (instregex "^VPSHUFBZ128rmk(z?)$",
+ "^VPUNPCK(H|L)(BW|WD)Z128rmk(z?)$")>;
+
+def SPRWriteResGroup255 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup255], (instregex "^VMOVDQU(8|16)Zrmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup255, ReadAfterVecYLd], (instregex "^V(ADD|SUB)PSZrm((b|k|bk|kz)?)$",
+ "^V(ADD|SUB)PSZrmbkz$",
+ "^VP(ADD|SUB)(B|W)Zrmk(z?)$",
+ "^VPBLENDM(B|W)Zrmk(z?)$")>;
+
+def SPRWriteResGroup256 : SchedWriteRes<[SPRPort00_05]> {
+ let Latency = 4;
+}
+def : InstRW<[SPRWriteResGroup256], (instregex "^V(ADD|SUB)PSZrr(bk|kz)$",
+ "^V(ADD|SUB)PSZrr(k|bkz)$")>;
+
+def SPRWriteResGroup257 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 12;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup257], (instregex "^VCVT(T?)PS2(U?)DQZrm((b|k|bk|kz)?)$",
+ "^VCVT(T?)PS2(U?)DQZrmbkz$",
+ "^VPLZCNT(D|Q)Zrm((b|k|bk|kz)?)$",
+ "^VPLZCNT(D|Q)Zrmbkz$")>;
+def : InstRW<[SPRWriteResGroup257, ReadAfterVecXLd], (instregex "^VAES(DE|EN)C((LAST)?)Zrm$")>;
+def : InstRW<[SPRWriteResGroup257, ReadAfterVecYLd], (instregex "^VGF2P8AFFINE((INV)?)QBZrm(b?)i$")>;
+def : InstRW<[SPRWriteResGroup257, ReadAfterVecYLd], (instrs VGF2P8MULBZrm)>;
+def : InstRW<[SPRWriteResGroup257, ReadAfterVecYLd, ReadAfterVecYLd], (instregex "^VPMADD52(H|L)UQZm((b|k|bk|kz)?)$",
+ "^VPMADD52(H|L)UQZmbkz$")>;
+
+def SPRWriteResGroup258 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup258], (instregex "^VPBROADCAST(B|W)Z128rmk(z?)$",
+ "^VPOPCNT(B|D|Q|W)Z((256)?)rm$",
+ "^VPOPCNT(D|Q)Z((256)?)rm(b|k|kz)$",
+ "^VPOPCNT(D|Q)Z((256)?)rmbk(z?)$",
+ "^VPSHUF(H|L)WZmik(z?)$")>;
+def : InstRW<[SPRWriteResGroup258, ReadAfterVecYLd], (instregex "^VALIGN(D|Q)Z((256)?)rm(bi|ik)$",
+ "^VALIGN(D|Q)Z((256)?)rmbik(z?)$",
+ "^VALIGN(D|Q)Z((256)?)rmi((kz)?)$",
+ "^VFPCLASSP(D|H|S)Z((256)?)rmb$",
+ "^VPACK(S|U)S(DW|WB)(Y|Z)rm$",
+ "^VPACK(S|U)S(DW|WB)Z256rm$",
+ "^VPACK(S|U)SDWZ((256)?)rmb$",
+ "^VPALIGNRZ((256)?)rmik(z?)$",
+ "^VPM(AX|IN)(S|U)QZ((256)?)rm((b|k|bk|kz)?)$",
+ "^VPM(AX|IN)(S|U)QZ((256)?)rmbkz$",
+ "^VPMULTISHIFTQBZ((256)?)rm(b?)$",
+ "^VPUNPCK(H|L)(BW|WD)Zrmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup258, ReadAfterVecYLd], (instrs VPCMPGTQYrm)>;
+def : InstRW<[SPRWriteResGroup258, ReadAfterVecXLd], (instregex "^VPALIGNRZ128rmik(z?)$",
+ "^VPCLMULQDQ(Y|Z)rm$")>;
+def : InstRW<[SPRWriteResGroup258, ReadAfterVecXLd], (instrs VPCLMULQDQZ256rm)>;
+
+def SPRWriteResGroup259 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [3, 1];
+ let Latency = 10;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)Yrm$")>;
+def : InstRW<[SPRWriteResGroup259, ReadAfterVecYLd, ReadAfterVecYLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBYrm)>;
+
+def SPRWriteResGroup260 : SchedWriteRes<[SPRPort00_01_05]> {
+ let ResourceCycles = [3];
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup260], (instregex "^VBLENDVP(S|DY)rr$",
+ "^VBLENDVP(D|SY)rr$",
+ "^VPBLENDVB(Y?)rr$")>;
+
+def SPRWriteResGroup261 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [3, 1];
+ let Latency = 9;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instregex "^VBLENDVP(D|S)rm$")>;
+def : InstRW<[SPRWriteResGroup261, ReadAfterVecXLd, ReadAfterVecXLd, ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault], (instrs VPBLENDVBrm)>;
+
+def SPRWriteResGroup262 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup262], (instregex "^VBROADCAST(F|I)32X(2|4)Z256rmk(z?)$",
+ "^VBROADCAST(F|I)64X2Z128rmk(z?)$",
+ "^VBROADCASTS(D|S)Z256rmk(z?)$",
+ "^VMOV(A|U)P(D|S)Z256rmk(z?)$",
+ "^VMOV(D|SH|SL)DUPZ256rmk(z?)$",
+ "^VMOVDQ(A|U)(32|64)Z256rmk(z?)$",
+ "^VPBROADCAST(D|Q)Z256rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup262, ReadAfterVecYLd], (instregex "^VINSERT(F|I)128rm$",
+ "^VINSERT(F|I)(32x4|64x2)Z256rm((k|kz)?)$",
+ "^VP(ADD|SUB)(B|D|Q|W)(Y|Z256)rm$",
+ "^VP(ADD|SUB)(D|Q)Z256rm(b|k|kz)$",
+ "^VP(ADD|SUB)(D|Q)Z256rmbk(z?)$",
+ "^VPTERNLOG(D|Q)Z256rm(bi|ik)$",
+ "^VPTERNLOG(D|Q)Z256rmbik(z?)$",
+ "^VPTERNLOG(D|Q)Z256rmi((kz)?)$")>;
+
+def SPRWriteResGroup263 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup263, ReadAfterVecXLd], (instregex "^VCMPP(D|H|S)Z128rm(bi|ik)$",
+ "^VCMPP(D|H|S)Z128rm(i|bik)$",
+ "^VFPCLASSP(D|H|S)Z128rm(b?)k$",
+ "^VPCMP(B|D|Q|W|UD|UQ|UW)Z128rmi(k?)$",
+ "^VPCMP(D|Q|UQ)Z128rmib(k?)$",
+ "^VPCMP(EQ|GT)(B|D|Q|W)Z128rm(k?)$",
+ "^VPCMP(EQ|GT)(D|Q)Z128rmb(k?)$",
+ "^VPCMPUBZ128rmi(k?)$",
+ "^VPCMPUDZ128rmib(k?)$",
+ "^VPTEST(N?)M(B|D|Q|W)Z128rm(k?)$",
+ "^VPTEST(N?)M(D|Q)Z128rmb(k?)$")>;
+def : InstRW<[SPRWriteResGroup263, ReadAfterVecYLd], (instregex "^VCMPP(D|H|S)Z((256)?)rm(bi|ik)$",
+ "^VCMPP(D|H|S)Z((256)?)rm(i|bik)$",
+ "^VFPCLASSP(D|H|S)Z((256)?)rm(b?)k$",
+ "^VPCMP(B|D|Q|W|UD|UQ|UW)Z((256)?)rmi(k?)$",
+ "^VPCMP(D|Q|UQ)Z((256)?)rmib(k?)$",
+ "^VPCMP(EQ|GT)(B|D|Q|W)Z((256)?)rm(k?)$",
+ "^VPCMP(EQ|GT)(D|Q)Z((256)?)rmb(k?)$",
+ "^VPCMPUBZ((256)?)rmi(k?)$",
+ "^VPCMPUDZ((256)?)rmib(k?)$",
+ "^VPTEST(N?)M(B|D|Q|W)Z((256)?)rm(k?)$",
+ "^VPTEST(N?)M(D|Q)Z((256)?)rmb(k?)$")>;
+def : InstRW<[SPRWriteResGroup263, ReadAfterVecLd], (instregex "^VCMPS(D|H|S)Zrm$",
+ "^VCMPS(D|H|S)Zrm_Int(k?)$",
+ "^VFPCLASSS(D|H|S)Zrmk$")>;
+
+def SPRWriteResGroup264 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup264, ReadAfterVecLd], (instregex "^V(U?)COMISHZrm((_Int)?)$")>;
+
+def SPRWriteResGroup265 : SchedWriteRes<[SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [1, 2, 1];
+ let Latency = 12;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup265], (instregex "^VCOMPRESSP(D|S)Z(128|256)mr$",
+ "^VCOMPRESSP(D|S)Zmr$",
+ "^VPCOMPRESS(D|Q)Z(128|256)mr$",
+ "^VPCOMPRESS(D|Q)Zmr$",
+ "^VPMOV(D|Q|W|SQ|SW)BZmr$",
+ "^VPMOV((S|US)?)(D|Q)WZmr$",
+ "^VPMOV(U?)S(DB|QD)Zmr$",
+ "^VPMOVUS(Q|W)BZmr$")>;
+
+def SPRWriteResGroup266 : SchedWriteRes<[SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [1, 2, 1];
+ let Latency = 15;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup266], (instregex "^VCOMPRESSP(D|S)Z(128|256)mrk$",
+ "^VCOMPRESSP(D|S)Zmrk$",
+ "^VPCOMPRESS(D|Q)Z(128|256)mrk$",
+ "^VPCOMPRESS(D|Q)Zmrk$",
+ "^VPMOV(D|Q|W|SQ|SW)BZmrk$",
+ "^VPMOV((S|US)?)(D|Q)WZmrk$",
+ "^VPMOV(U?)S(DB|QD)Zmrk$",
+ "^VPMOVUS(Q|W)BZmrk$")>;
+
+def SPRWriteResGroup267 : SchedWriteRes<[SPRPort05]> {
+ let ResourceCycles = [2];
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup267], (instregex "^VCOMPRESSP(D|S)Z(128|256)rr$",
+ "^VCOMPRESSP(D|S)Zrr$",
+ "^VEXPANDP(D|S)Z(128|256)rr$",
+ "^VEXPANDP(D|S)Zrr$",
+ "^VPCOMPRESS(B|D|Q|W)Z(128|256)rr$",
+ "^VPCOMPRESS(B|D|Q|W)Zrr$",
+ "^VPEXPAND(B|D|Q|W)Z(128|256)rr$",
+ "^VPEXPAND(B|D|Q|W)Zrr$")>;
+
+def SPRWriteResGroup268 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup268], (instregex "^VCVT(U?)DQ2PDZrr((k|kz)?)$",
+ "^VCVT(T?)PS2(U?)QQZrr((b|k|bk|kz)?)$",
+ "^VCVT(T?)PS2(U?)QQZrrbkz$",
+ "^VCVT(U?)QQ2PSZrr((b|k|bk|kz)?)$",
+ "^VCVT(U?)QQ2PSZrrbkz$")>;
+
+def SPRWriteResGroup269 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 15;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup269], (instregex "^VCVT(U?)DQ2PHZ128rm(b?)$",
+ "^VCVTNEPS2BF16Z128rm(b?)$")>;
+
+def SPRWriteResGroup270 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 19;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup270], (instregex "^VCVT(U?)DQ2PHZ128rm(bk|kz)$",
+ "^VCVT(U?)DQ2PHZ128rm(k|bkz)$")>;
+
+def SPRWriteResGroup271 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup271], (instregex "^VCVT(U?)DQ2PHZ128rr$")>;
+
+def SPRWriteResGroup272 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup272], (instregex "^VCVT(U?)DQ2PHZ128rrk(z?)$")>;
+
+def SPRWriteResGroup273 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 17;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup273], (instregex "^VCVT(U?)DQ2PHZ256rm(b?)$",
+ "^VCVTNEPS2BF16Z128rm(bk|kz)$",
+ "^VCVTNEPS2BF16Z128rm(k|bkz)$")>;
+
+def SPRWriteResGroup274 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 21;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup274], (instregex "^VCVT(U?)DQ2PHZ256rm(bk|kz)$",
+ "^VCVT(U?)DQ2PHZ256rm(k|bkz)$")>;
+
+def SPRWriteResGroup275 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup275], (instregex "^VCVT(U?)DQ2PHZ256rr$")>;
+
+def SPRWriteResGroup276 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> {
+ let Latency = 14;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup276], (instregex "^VCVT(U?)DQ2PHZ256rrk(z?)$")>;
+
+def SPRWriteResGroup277 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 17;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup277], (instregex "^VCVT(U?)DQ2PHZrm(b?)$")>;
+
+def SPRWriteResGroup278 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 21;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup278], (instregex "^VCVT(U?)DQ2PHZrm(bk|kz)$",
+ "^VCVT(U?)DQ2PHZrm(k|bkz)$")>;
+
+def SPRWriteResGroup279 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup279], (instregex "^VCVT(U?)DQ2PHZrr(b?)$")>;
+
+def SPRWriteResGroup280 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 14;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup280], (instregex "^VCVT(U?)DQ2PHZrr(bk|kz)$",
+ "^VCVT(U?)DQ2PHZrr(k|bkz)$")>;
+
+def SPRWriteResGroup281 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 1];
+ let Latency = 15;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup281, ReadAfterVecXLd], (instregex "^VCVTNE2PS2BF16Z128rm(b?)$")>;
+
+def SPRWriteResGroup282 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 1];
+ let Latency = 17;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup282, ReadAfterVecXLd], (instregex "^VCVTNE2PS2BF16Z128rm(bk|kz)$",
+ "^VCVTNE2PS2BF16Z128rm(k|bkz)$")>;
+
+def SPRWriteResGroup283 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup283], (instregex "^VCVTNE2PS2BF16Z(128|256)rr$")>;
+
+def SPRWriteResGroup284 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 10;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup284], (instregex "^VCVTNE2PS2BF16Z(128|256)rrk(z?)$")>;
+
+def SPRWriteResGroup285 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 1];
+ let Latency = 16;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup285, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16Z256rm(b?)$")>;
+
+def SPRWriteResGroup286 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 1];
+ let Latency = 18;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup286, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16Z256rm(bk|kz)$",
+ "^VCVTNE2PS2BF16Z256rm(k|bkz)$")>;
+
+def SPRWriteResGroup287 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 2];
+ let Latency = 16;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup287, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16Zrm(b?)$",
+ "^VDPBF16PSZm((b|k|bk|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup287, ReadAfterVecYLd], (instrs VDPBF16PSZmbkz)>;
+
+def SPRWriteResGroup288 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 2];
+ let Latency = 18;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup288, ReadAfterVecYLd], (instregex "^VCVTNE2PS2BF16Zrm(bk|kz)$",
+ "^VCVTNE2PS2BF16Zrm(k|bkz)$")>;
+
+def SPRWriteResGroup289 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let ResourceCycles = [2, 2];
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup289], (instregex "^VDPBF16PSZr((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup289], (instrs VCVTNE2PS2BF16Zrr)>;
+
+def SPRWriteResGroup290 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let ResourceCycles = [2, 2];
+ let Latency = 10;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup290], (instregex "^VCVTNE2PS2BF16Zrrk(z?)$")>;
+
+def SPRWriteResGroup291 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup291], (instregex "^VCVTNEPS2BF16Z(128|256)rr$")>;
+
+def SPRWriteResGroup292 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort05]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup292], (instregex "^VCVTNEPS2BF16Z(128|256)rrk(z?)$")>;
+
+def SPRWriteResGroup293 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 16;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup293], (instregex "^VCVTNEPS2BF16Z256rm(b?)$")>;
+
+def SPRWriteResGroup294 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 18;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup294], (instregex "^VCVTNEPS2BF16Z256rm(bk|kz)$",
+ "^VCVTNEPS2BF16Z256rm(k|bkz)$")>;
+
+def SPRWriteResGroup295 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 16;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup295], (instregex "^VCVTNEPS2BF16Zrm(b?)$")>;
+
+def SPRWriteResGroup296 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 18;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup296], (instregex "^VCVTNEPS2BF16Zrm(bk|kz)$",
+ "^VCVTNEPS2BF16Zrm(k|bkz)$")>;
+
+def SPRWriteResGroup297 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup297], (instrs VCVTNEPS2BF16Zrr)>;
+
+def SPRWriteResGroup298 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup298], (instregex "^VCVTNEPS2BF16Zrrk(z?)$")>;
+
+def SPRWriteResGroup299 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 15;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup299], (instregex "^VCVT(T?)PD2DQYrm$",
+ "^VCVT(T?)P(D|H)2(U?)DQZ256rm(b?)$",
+ "^VCVT(T?)PD2(U?)DQZ256rm(bk|kz)$",
+ "^VCVT(T?)PD2(U?)DQZ256rm(k|bkz)$",
+ "^VCVTPH2PSXZ128rm(bk|kz)$",
+ "^VCVTPH2PSXZ128rm(k|bkz)$",
+ "^VCVTPH2PSXZ256rm(b?)$",
+ "^VCVT(U?)QQ2PSZ256rm((b|k|bk|kz)?)$",
+ "^VCVT(U?)QQ2PSZ256rmbkz$")>;
+
+def SPRWriteResGroup300 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 15;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup300], (instregex "^VCVT(T?)P(D|H)2(U?)DQZrm(b?)$",
+ "^VCVT(T?)PD2(U?)DQZrm(bk|kz)$",
+ "^VCVT(T?)PD2(U?)DQZrm(k|bkz)$",
+ "^VCVTPH2PSXZrm(b?)$",
+ "^VCVT(U?)QQ2PSZrm((b|k|bk|kz)?)$",
+ "^VCVT(U?)QQ2PSZrmbkz$")>;
+
+def SPRWriteResGroup301 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 1, 2];
+ let Latency = 19;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup301], (instregex "^VCVTPD2PHZ128rm(b?)$")>;
+
+def SPRWriteResGroup302 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 1, 2];
+ let Latency = 22;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup302], (instregex "^VCVTPD2PHZ128rm(bk|kz)$",
+ "^VCVTPD2PHZ128rm(k|bkz)$")>;
+
+def SPRWriteResGroup303 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
+ let ResourceCycles = [2, 1, 2];
+ let Latency = 12;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup303], (instrs VCVTPD2PHZ128rr)>;
+
+def SPRWriteResGroup304 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
+ let ResourceCycles = [2, 1, 2];
+ let Latency = 15;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup304], (instregex "^VCVTPD2PHZ128rrk(z?)$")>;
+
+def SPRWriteResGroup305 : SchedWriteRes<[SPRPort00_01, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 2];
+ let Latency = 21;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup305], (instregex "^VCVTPD2PHZ256rm(b?)$")>;
+
+def SPRWriteResGroup306 : SchedWriteRes<[SPRPort00_01, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 2];
+ let Latency = 24;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup306], (instregex "^VCVTPD2PHZ256rm(bk|kz)$",
+ "^VCVTPD2PHZ256rm(k|bkz)$")>;
+
+def SPRWriteResGroup307 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let ResourceCycles = [2, 2];
+ let Latency = 13;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup307], (instrs VCVTPD2PHZ256rr)>;
+
+def SPRWriteResGroup308 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let ResourceCycles = [2, 2];
+ let Latency = 16;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup308], (instregex "^VCVTPD2PHZ256rrk(z?)$")>;
+
+def SPRWriteResGroup309 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 2];
+ let Latency = 23;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup309], (instregex "^VCVTP(D2PH|H2PD)Zrm(b?)$")>;
+
+def SPRWriteResGroup310 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 2];
+ let Latency = 26;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup310], (instregex "^VCVTP(D2PH|H2PD)Zrm(bk|kz)$",
+ "^VCVTP(D2PH|H2PD)Zrm(k|bkz)$")>;
+
+def SPRWriteResGroup311 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let ResourceCycles = [2, 2];
+ let Latency = 15;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup311], (instregex "^VCVTP(D2PH|H2PD)Zrr(b?)$")>;
+
+def SPRWriteResGroup312 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let ResourceCycles = [2, 2];
+ let Latency = 18;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup312], (instregex "^VCVTP(D2PH|H2PD)Zrr(bk|kz)$",
+ "^VCVTP(D2PH|H2PD)Zrr(k|bkz)$")>;
+
+def SPRWriteResGroup313 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup313], (instregex "^VCVT(T?)PD2(U?)QQZ128rm((b|k|bk|kz)?)$",
+ "^VCVT(T?)PD2(U?)QQZ128rmbkz$",
+ "^VPABS(B|W)Z(128|256)rmk(z?)$",
+ "^VPLZCNT(D|Q)Z128rm((b|k|bk|kz)?)$",
+ "^VPLZCNT(D|Q)Z128rmbkz$",
+ "^VPS(L|R)LWZ(128|256)mik(z?)$",
+ "^VPSRAWZ(128|256)mik(z?)$")>;
+def : InstRW<[SPRWriteResGroup313, ReadAfterVecLd], (instregex "^VFIXUPIMMS(D|S)Zrmi((k|kz)?)$",
+ "^VSCALEFS(D|S)Zrm((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup313, ReadAfterVecXLd], (instregex "^VP(ADD|SUB)(U?)S(B|W)Z128rmk(z?)$",
+ "^VPAVG(B|W)Z128rmk(z?)$",
+ "^VPM(AX|IN)(SB|UW)Z128rmk(z?)$",
+ "^VPM(AX|IN)(SW|UB)Z128rmk(z?)$",
+ "^VPSH(L|R)DVWZ128mk(z?)$",
+ "^VPS(L|R)L(V?)WZ128rmk(z?)$",
+ "^VPSRA(V?)WZ128rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup313, ReadAfterVecYLd], (instregex "^VP(ADD|SUB)(U?)S(B|W)Z256rmk(z?)$",
+ "^VPAVG(B|W)Z256rmk(z?)$",
+ "^VPM(AX|IN)(SB|UW)Z256rmk(z?)$",
+ "^VPM(AX|IN)(SW|UB)Z256rmk(z?)$",
+ "^VPSH(L|R)DVWZ256mk(z?)$",
+ "^VPS(L|R)L(V?)WZ256rmk(z?)$",
+ "^VPSRA(V?)WZ256rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup313, ReadAfterVecXLd, ReadAfterVecXLd], (instregex "^VPMADD52(H|L)UQZ128m((b|k|bk|kz)?)$",
+ "^VPMADD52(H|L)UQZ128mbkz$")>;
+
+def SPRWriteResGroup314 : SchedWriteRes<[SPRPort00_01]> {
+ let Latency = 4;
+}
+def : InstRW<[SPRWriteResGroup314], (instregex "^VCVT(T?)PD2(U?)QQZ(128|256)rr((k|kz)?)$",
+ "^VCVT(U?)QQ2PDZ(128|256)rr((k|kz)?)$",
+ "^VFIXUPIMMS(D|S)Zrri((k|kz)?)$",
+ "^VPLZCNT(D|Q)Z(128|256)rr((k|kz)?)$",
+ "^VPMADD52(H|L)UQZ(128|256)r((k|kz)?)$",
+ "^VSCALEFS(D|S)Zrr((k|kz)?)$",
+ "^VSCALEFS(D|S)Zrrb_Int((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup314, ReadAfterVecLd], (instregex "^VFIXUPIMMS(D|S)Zrrib((k|kz)?)$")>;
+
+def SPRWriteResGroup315 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 14;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup315], (instregex "^VCVT(T?)PH2(U?)DQZ128rm(b?)$",
+ "^VCVTPS2PHXZ128rm(b?)$")>;
+
+def SPRWriteResGroup316 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 17;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup316], (instregex "^VCVT(T?)PH2(U?)DQZ128rm(bk|kz)$",
+ "^VCVT(T?)PH2(U?)DQZ128rm(k|bkz)$")>;
+
+def SPRWriteResGroup317 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup317], (instregex "^VCVT(T?)PH2(U?)DQZ(128|256)rrk(z?)$",
+ "^VCVTP(H2PS|S2PH)(X?)Z256rrk(z?)$")>;
+
+def SPRWriteResGroup318 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 18;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup318], (instregex "^VCVT(T?)PH2(U?)DQZ256rm(bk|kz)$",
+ "^VCVT(T?)PH2(U?)DQZ256rm(k|bkz)$",
+ "^VCVTP(H2PS|S2PH)XZ256rm(bk|kz)$",
+ "^VCVTP(H2PS|S2PH)XZ256rm(k|bkz)$")>;
+
+def SPRWriteResGroup319 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 18;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup319], (instregex "^VCVT(T?)PH2(U?)DQZrm(bk|kz)$",
+ "^VCVT(T?)PH2(U?)DQZrm(k|bkz)$",
+ "^VCVTP(H2PS|S2PH)XZrm(bk|kz)$",
+ "^VCVTP(H2PS|S2PH)XZrm(k|bkz)$")>;
+
+def SPRWriteResGroup320 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup320], (instregex "^VCVT(T?)PH2(U?)DQZrr(b?)$",
+ "^VCVTP(H2PS|S2PH)(X?)Zrr(b?)$",
+ "^VPSHUFBITQMBZ(128|256)rrk$")>;
+def : InstRW<[SPRWriteResGroup320], (instrs VPSHUFBITQMBZrrk)>;
+
+def SPRWriteResGroup321 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup321], (instregex "^VCVT(T?)PH2(U?)DQZrr(bk|kz)$",
+ "^VCVT(T?)PH2(U?)DQZrr(k|bkz)$",
+ "^VCVTP(H2PS|S2PH)XZrr(bk|kz)$",
+ "^VCVTP(H2PS|S2PH)XZrr(k|bkz)$")>;
+
+def SPRWriteResGroup322 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 1, 2];
+ let Latency = 23;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup322], (instregex "^VCVTPH2PDZ128rm(b?)$")>;
+
+def SPRWriteResGroup323 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 1, 2];
+ let Latency = 26;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup323], (instregex "^VCVTPH2PDZ128rm(bk|kz)$",
+ "^VCVTPH2PDZ128rm(k|bkz)$")>;
+
+def SPRWriteResGroup324 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 2];
+ let Latency = 16;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup324], (instrs VCVTPH2PDZ128rr)>;
+
+def SPRWriteResGroup325 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1, 2];
+ let Latency = 19;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup325], (instregex "^VCVTPH2PDZ128rrk(z?)$")>;
+
+def SPRWriteResGroup326 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 2];
+ let Latency = 22;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup326], (instregex "^VCVTPH2PDZ256rm(b?)$")>;
+
+def SPRWriteResGroup327 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 2];
+ let Latency = 25;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup327], (instregex "^VCVTPH2PDZ256rm(bk|kz)$",
+ "^VCVTPH2PDZ256rm(k|bkz)$")>;
+
+def SPRWriteResGroup328 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let ResourceCycles = [2, 2];
+ let Latency = 15;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup328], (instrs VCVTPH2PDZ256rr)>;
+
+def SPRWriteResGroup329 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let ResourceCycles = [2, 2];
+ let Latency = 18;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup329], (instregex "^VCVTPH2PDZ256rrk(z?)$")>;
+
+def SPRWriteResGroup330 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup330], (instregex "^VCVTP(H2PS|S2PH)(X?)Z128rrk(z?)$")>;
+
+def SPRWriteResGroup331 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
+ let Latency = 14;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup331], (instregex "^VCVTPH2PSZ(128|256)rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup331, ReadAfterVecLd], (instregex "^VCVTSH2SSZrm_Intk(z?)$")>;
+def : InstRW<[SPRWriteResGroup331, ReadAfterVecXLd], (instregex "^VPMADDUBSWZ128rmk(z?)$",
+ "^VPMULH((U|RS)?)WZ128rmk(z?)$",
+ "^VPMULLWZ128rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup331, ReadAfterVecYLd], (instregex "^VPMADDUBSWZ256rmk(z?)$",
+ "^VPMULH((U|RS)?)WZ256rmk(z?)$",
+ "^VPMULLWZ256rmk(z?)$")>;
+
+def SPRWriteResGroup332 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 13;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup332], (instregex "^VCVT(T?)PS2(U?)QQZrm((b|k|bk|kz)?)$",
+ "^VCVT(T?)PS2(U?)QQZrmbkz$")>;
+def : InstRW<[SPRWriteResGroup332], (instrs VCVTPH2PSZrm)>;
+def : InstRW<[SPRWriteResGroup332, ReadAfterVecYLd], (instregex "^VPERMWZrmk(z?)$")>;
+
+def SPRWriteResGroup333 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 2, 1, 1, 1];
+ let Latency = 17;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup333], (instregex "^VCVT(T?)PH2(U?)QQZ128rm((b|k|bk|kz)?)$",
+ "^VCVT(T?)PH2(U?)QQZ128rmbkz$")>;
+
+def SPRWriteResGroup334 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
+ let ResourceCycles = [1, 2, 1];
+ let Latency = 10;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup334], (instregex "^VCVT(T?)PH2(U?)QQZ(128|256)rr((k|kz)?)$")>;
+
+def SPRWriteResGroup335 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 2, 1, 1, 1];
+ let Latency = 18;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup335], (instregex "^VCVT(T?)PH2(U?)QQZ256rm((b|k|bk|kz)?)$",
+ "^VCVT(T?)PH2(U?)QQZ256rmbkz$")>;
+
+def SPRWriteResGroup336 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 16;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup336], (instregex "^VCVTPS2PHXZ128rm(bk|kz)$",
+ "^VCVTPS2PHXZ128rm(k|bkz)$",
+ "^VCVTPS2PHXZ256rm(b?)$")>;
+
+def SPRWriteResGroup337 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 16;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup337], (instregex "^VCVTPS2PHXZrm(b?)$")>;
+
+def SPRWriteResGroup338 : SchedWriteRes<[SPRPort00_01, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 16;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup338], (instregex "^VCVTPS2PHZ(128|256)mrk$")>;
+
+def SPRWriteResGroup339 : SchedWriteRes<[SPRPort00, SPRPort04_09, SPRPort07_08]> {
+ let Latency = 16;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup339], (instrs VCVTPS2PHZmrk)>;
+
+def SPRWriteResGroup340 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup340], (instregex "^VCVT(T?)PS2(U?)QQZ128rr((k|kz)?)$",
+ "^VCVT(U?)QQ2PSZ128rr((k|kz)?)$")>;
+
+def SPRWriteResGroup341 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 15;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup341], (instregex "^VCVT(U?)QQ2PHZ128rm(b?)$")>;
+
+def SPRWriteResGroup342 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 17;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup342], (instregex "^VCVT(U?)QQ2PHZ128rm(bk|kz)$",
+ "^VCVT(U?)QQ2PHZ128rm(k|bkz)$")>;
+
+def SPRWriteResGroup343 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort05]> {
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup343], (instregex "^VCVT(U?)QQ2PHZ128rr$")>;
+
+def SPRWriteResGroup344 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort05]> {
+ let Latency = 10;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup344], (instregex "^VCVT(U?)QQ2PHZ128rrk(z?)$",
+ "^VCVT(U?)QQ2PHZ256rr$")>;
+
+def SPRWriteResGroup345 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 18;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup345], (instregex "^VCVT(U?)QQ2PHZ256rm(b?)$")>;
+
+def SPRWriteResGroup346 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 20;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup346], (instregex "^VCVT(U?)QQ2PHZ256rm(bk|kz)$",
+ "^VCVT(U?)QQ2PHZ256rm(k|bkz)$")>;
+
+def SPRWriteResGroup347 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort05]> {
+ let Latency = 12;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup347], (instregex "^VCVT(U?)QQ2PHZ256rrk(z?)$")>;
+
+def SPRWriteResGroup348 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 1, 2];
+ let Latency = 18;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup348], (instregex "^VCVT(U?)QQ2PHZrm(b?)$")>;
+
+def SPRWriteResGroup349 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 1, 2];
+ let Latency = 20;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup349], (instregex "^VCVT(U?)QQ2PHZrm(bk|kz)$",
+ "^VCVT(U?)QQ2PHZrm(k|bkz)$")>;
+
+def SPRWriteResGroup350 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 10;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup350], (instregex "^VCVT(U?)QQ2PHZrr(b?)$")>;
+
+def SPRWriteResGroup351 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 12;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup351], (instregex "^VCVT(U?)QQ2PHZrr(bk|kz)$",
+ "^VCVT(U?)QQ2PHZrr(k|bkz)$")>;
+
+def SPRWriteResGroup352 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 2, 1, 1, 1];
+ let Latency = 18;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup352, ReadAfterVecLd], (instregex "^VCVTSD2SHZrm((_Int)?)$")>;
+
+def SPRWriteResGroup353 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 2, 1, 1, 1];
+ let Latency = 21;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup353, ReadAfterVecLd], (instregex "^VCVTSD2SHZrm_Intk(z?)$")>;
+
+def SPRWriteResGroup354 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 11;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup354], (instregex "^VCVTSD2SHZrr(b?)_Int$")>;
+def : InstRW<[SPRWriteResGroup354], (instrs VCVTSD2SHZrr)>;
+
+def SPRWriteResGroup355 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 14;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup355], (instregex "^VCVTSD2SHZrr(b?)_Intk(z?)$")>;
+
+def SPRWriteResGroup356 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 18;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup356, ReadAfterVecLd], (instregex "^VCVTSH2SDZrm((_Int)?)$")>;
+
+def SPRWriteResGroup357 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 20;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup357, ReadAfterVecLd], (instregex "^VCVTSH2SDZrm_Intk(z?)$")>;
+
+def SPRWriteResGroup358 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup358], (instregex "^VCVTSH2SDZrr(b?)_Int$")>;
+def : InstRW<[SPRWriteResGroup358], (instrs VCVTSH2SDZrr)>;
+
+def SPRWriteResGroup359 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 13;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup359], (instregex "^VCVTSH2SDZrr(b?)_Intk(z?)$")>;
+
+def SPRWriteResGroup360 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort02_03_11]> {
+ let Latency = 13;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup360, ReadAfterVecLd], (instregex "^VCVT(T?)SH2(U?)SI((64)?)Zrm_Int$",
+ "^VCVTTSH2(U?)SI((64)?)Zrm$")>;
+
+def SPRWriteResGroup361 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup361], (instregex "^VCVT(T?)SH2(U?)SI((64)?)Zrr(b?)_Int$",
+ "^VCVTTSH2(U?)SI((64)?)Zrr$")>;
+
+def SPRWriteResGroup362 : SchedWriteRes<[SPRPort00_01]> {
+ let Latency = 8;
+}
+def : InstRW<[SPRWriteResGroup362], (instregex "^VCVTSH2SSZrr(b?)_Intk(z?)$")>;
+
+def SPRWriteResGroup363 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> {
+ let Latency = 14;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup363, ReadAfterVecLd], (instregex "^VCVT(U?)SI((64)?)2SHZrm((_Int)?)$",
+ "^VCVTSS2SHZrm((_Int)?)$")>;
+
+def SPRWriteResGroup364 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> {
+ let Latency = 16;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup364, ReadAfterVecLd], (instregex "^VCVTSS2SHZrm_Intk(z?)$")>;
+
+def SPRWriteResGroup365 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup365], (instregex "^VCVTSS2SHZrr(b?)_Int$")>;
+def : InstRW<[SPRWriteResGroup365], (instrs VCVTSS2SHZrr)>;
+
+def SPRWriteResGroup366 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup366], (instregex "^VCVTSS2SHZrr(b?)_Intk(z?)$")>;
+
+def SPRWriteResGroup367 : SchedWriteRes<[SPRPort05]> {
+ let Latency = 5;
+}
+def : InstRW<[SPRWriteResGroup367], (instregex "^VDBPSADBWZ(128|256)rrik(z?)$",
+ "^VDBPSADBWZrrik(z?)$",
+ "^VPACK(S|U)S(DW|WB)Z(128|256)rrk(z?)$",
+ "^VPACK(S|U)S(DW|WB)Zrrk(z?)$",
+ "^VPBROADCAST(B|W|Dr|Qr|Wr)Z((256)?)rrk(z?)$",
+ "^VPBROADCAST(B|D|Q|W)rZ(128|256)rr$",
+ "^VPBROADCASTBrZ(128|256)rrk(z?)$",
+ "^VPBROADCAST(B|D|Q|W)rZrr$",
+ "^VPBROADCASTBrZrrk(z?)$",
+ "^VPBROADCAST(D|Q|W)rZ128rrk(z?)$",
+ "^VPERMBZ(128|256)rrk(z?)$",
+ "^VPERMBZrrk(z?)$",
+ "^VPMOV(S|Z)XBWZ((256)?)rrk(z?)$",
+ "^VPMULTISHIFTQBZ(128|256)rrk(z?)$",
+ "^VPMULTISHIFTQBZrrk(z?)$",
+ "^VPOPCNT(B|W)Z(128|256)rrk(z?)$",
+ "^VPOPCNT(B|W)Zrrk(z?)$")>;
+
+def SPRWriteResGroup368 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 36;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup368, ReadAfterVecXLd], (instregex "^VDIVPHZ128rm(b?)$")>;
+
+def SPRWriteResGroup369 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 38;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup369, ReadAfterVecXLd], (instregex "^VDIVPHZ128rm(bk|kz)$",
+ "^VDIVPHZ128rm(k|bkz)$")>;
+
+def SPRWriteResGroup370 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 31;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup370], (instregex "^VDIVPHZ(128|256)rr$")>;
+
+def SPRWriteResGroup371 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 33;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup371], (instregex "^VDIVPHZ(128|256)rrk$",
+ "^VSQRTPHZ(128|256)r$")>;
+def : InstRW<[SPRWriteResGroup371], (instrs VDIVPHZ128rrkz)>;
+
+def SPRWriteResGroup372 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 37;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup372, ReadAfterVecYLd], (instregex "^VDIVPHZ256rm(b?)$")>;
+
+def SPRWriteResGroup373 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 39;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup373, ReadAfterVecYLd], (instregex "^VDIVPHZ256rm(bk|kz)$",
+ "^VDIVPHZ256rm(k|bkz)$")>;
+def : InstRW<[SPRWriteResGroup373, ReadAfterVecXLd], (instregex "^VSQRTPHZ128m(b?)$")>;
+
+def SPRWriteResGroup374 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 11;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup374], (instrs VDIVPHZ256rrkz)>;
+
+def SPRWriteResGroup375 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [4, 2, 1, 1, 1];
+ let Latency = 49;
+ let NumMicroOps = 9;
+}
+def : InstRW<[SPRWriteResGroup375, ReadAfterVecYLd], (instregex "^VDIVPHZrm(b?)$")>;
+
+def SPRWriteResGroup376 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [4, 2, 1, 1, 1];
+ let Latency = 51;
+ let NumMicroOps = 9;
+}
+def : InstRW<[SPRWriteResGroup376, ReadAfterVecYLd], (instregex "^VDIVPHZrm(bk|kz)$",
+ "^VDIVPHZrm(k|bkz)$")>;
+
+def SPRWriteResGroup377 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort05]> {
+ let ResourceCycles = [4, 1, 1];
+ let Latency = 41;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup377], (instregex "^VDIVPHZrr(b?)$")>;
+
+def SPRWriteResGroup378 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort05]> {
+ let ResourceCycles = [4, 1, 1];
+ let Latency = 43;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup378], (instregex "^VDIVPHZrr(bk|kz)$",
+ "^VDIVPHZrr(k|bkz)$")>;
+
+def SPRWriteResGroup379 : SchedWriteRes<[SPRPort00, SPRPort00_05]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 17;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup379], (instrs VDIVPSZrr)>;
+
+def SPRWriteResGroup380 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 21;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup380, ReadAfterVecLd], (instregex "^VDIVSHZrm_Int((k|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup380, ReadAfterVecLd], (instrs VDIVSHZrm)>;
+
+def SPRWriteResGroup381 : SchedWriteRes<[SPRPort00]> {
+ let Latency = 14;
+}
+def : InstRW<[SPRWriteResGroup381], (instrs VDIVSHZrr_Int,
+ VSQRTSHZr_Int)>;
+
+def SPRWriteResGroup382 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 2];
+ let Latency = 15;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup382, ReadAfterVecXLd], (instregex "^VDPBF16PSZ128m((b|k|bk|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup382, ReadAfterVecXLd], (instrs VDPBF16PSZ128mbkz)>;
+
+def SPRWriteResGroup383 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let ResourceCycles = [2, 2];
+ let Latency = 8;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup383], (instregex "^VDPBF16PSZ(128|256)r((k|kz)?)$")>;
+
+def SPRWriteResGroup384 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [2, 1, 2];
+ let Latency = 16;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup384, ReadAfterVecYLd], (instregex "^VDPBF16PSZ256m((b|k|bk|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup384, ReadAfterVecYLd], (instrs VDPBF16PSZ256mbkz)>;
+
+def SPRWriteResGroup385 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> {
+ let ResourceCycles = [6, 7, 18];
+ let Latency = 81;
+ let NumMicroOps = 31;
+}
+def : InstRW<[SPRWriteResGroup385], (instrs VERRm)>;
+
+def SPRWriteResGroup386 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> {
+ let ResourceCycles = [6, 7, 17];
+ let Latency = 74;
+ let NumMicroOps = 30;
+}
+def : InstRW<[SPRWriteResGroup386], (instrs VERRr)>;
+
+def SPRWriteResGroup387 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> {
+ let ResourceCycles = [5, 8, 21];
+ let Latency = 81;
+ let NumMicroOps = 34;
+}
+def : InstRW<[SPRWriteResGroup387], (instrs VERWm)>;
+
+def SPRWriteResGroup388 : SchedWriteRes<[SPRPort00, SPRPort01, SPRPort02_03_11]> {
+ let ResourceCycles = [5, 8, 20];
+ let Latency = 74;
+ let NumMicroOps = 33;
+}
+def : InstRW<[SPRWriteResGroup388], (instrs VERWr)>;
+
+def SPRWriteResGroup389 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup389, ReadAfterVecYLd], (instregex "^VEXPANDP(D|S)Z128rm((k|kz)?)$",
+ "^VPEXPAND(B|D|Q|W)Z128rm$",
+ "^VPEXPAND(D|Q)Z128rmk(z?)$")>;
+
+def SPRWriteResGroup390 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 16;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup390], (instregex "^VF(C?)MADDCPHZ(128|256)m(b?)$",
+ "^VROUNDP(D|S)Ym$")>;
+def : InstRW<[SPRWriteResGroup390, ReadAfterVecXLd], (instregex "^VF(C?)MADDCSHZm$",
+ "^VF(C?)MULCPHZ128rm(b?)$",
+ "^VF(C?)MULCSHZrm$",
+ "^VRNDSCALEPHZ128rm(b?)i$",
+ "^VRNDSCALESHZm((_Int)?)$",
+ "^VSCALEFPHZ128rm(b?)$")>;
+def : InstRW<[SPRWriteResGroup390, ReadAfterVecYLd], (instregex "^VF(C?)MULCPHZ256rm(b?)$",
+ "^VRNDSCALEP(D|H|S)Z256rm(b?)i$",
+ "^VRNDSCALEP(D|S)Z256rm(b?)ik(z?)$",
+ "^VSCALEFPHZ256rm(b?)$")>;
+def : InstRW<[SPRWriteResGroup390, ReadAfterVecLd], (instrs VSCALEFSHZrm)>;
+
+def SPRWriteResGroup391 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 21;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup391], (instregex "^VF(C?)MADDCPHZ(128|256)m(bk|kz)$",
+ "^VF(C?)MADDCPHZ(128|256)m(k|bkz)$")>;
+def : InstRW<[SPRWriteResGroup391, ReadAfterVecXLd], (instregex "^VF(C?)MADDCSHZmk(z?)$",
+ "^VF(C?)MULCPHZ128rm(bk|kz)$",
+ "^VF(C?)MULCPHZ128rm(k|bkz)$",
+ "^VF(C?)MULCSHZrmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup391, ReadAfterVecYLd], (instregex "^VF(C?)MULCPHZ256rm(bk|kz)$",
+ "^VF(C?)MULCPHZ256rm(k|bkz)$")>;
+
+def SPRWriteResGroup392 : SchedWriteRes<[SPRPort00_01]> {
+ let ResourceCycles = [2];
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup392], (instregex "^VF(C?)MADDCPHZ(128|256)r$",
+ "^VF(C?)MADDCSHZr(b?)$",
+ "^VF(C?)MULCPHZ(128|256)rr$",
+ "^VF(C?)MULCSHZrr(b?)$",
+ "^VRNDSCALEPHZ(128|256)rri$",
+ "^VRNDSCALESHZr(b?)_Int$",
+ "^VSCALEFPHZ(128|256)rr$")>;
+def : InstRW<[SPRWriteResGroup392], (instrs VRNDSCALESHZr,
+ VSCALEFSHZrr,
+ VSCALEFSHZrrb_Int)>;
+
+def SPRWriteResGroup393 : SchedWriteRes<[SPRPort00_01]> {
+ let ResourceCycles = [2];
+ let Latency = 15;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup393], (instregex "^VF(C?)MADDCPHZ(128|256)rk(z?)$",
+ "^VF(C?)MADDCSHZr(bk|kz)$",
+ "^VF(C?)MADDCSHZr(k|bkz)$",
+ "^VF(C?)MULCPHZ(128|256)rrk(z?)$",
+ "^VF(C?)MULCSHZrr(bk|kz)$",
+ "^VF(C?)MULCSHZrr(k|bkz)$")>;
+
+def SPRWriteResGroup394 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 16;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup394], (instregex "^VF(C?)MADDCPHZm(b?)$")>;
+def : InstRW<[SPRWriteResGroup394, ReadAfterVecYLd], (instregex "^VF(C?)MULCPHZrm(b?)$",
+ "^VRNDSCALEP(D|H|S)Zrm(b?)i$",
+ "^VRNDSCALEP(D|S)Zrm(b?)ik(z?)$",
+ "^VSCALEFPHZrm(b?)$")>;
+
+def SPRWriteResGroup395 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 21;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup395], (instregex "^VF(C?)MADDCPHZm(bk|kz)$",
+ "^VF(C?)MADDCPHZm(k|bkz)$")>;
+def : InstRW<[SPRWriteResGroup395, ReadAfterVecYLd], (instregex "^VF(C?)MULCPHZrm(bk|kz)$",
+ "^VF(C?)MULCPHZrm(k|bkz)$")>;
+
+def SPRWriteResGroup396 : SchedWriteRes<[SPRPort00]> {
+ let ResourceCycles = [2];
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup396], (instregex "^VF(C?)MADDCPHZr(b?)$",
+ "^VF(C?)MULCPHZrr(b?)$",
+ "^VRNDSCALEPHZrri(b?)$",
+ "^VSCALEFPHZrr(b?)$")>;
+
+def SPRWriteResGroup397 : SchedWriteRes<[SPRPort00]> {
+ let ResourceCycles = [2];
+ let Latency = 15;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup397], (instregex "^VF(C?)MADDCPHZr(bk|kz)$",
+ "^VF(C?)MADDCPHZr(k|bkz)$",
+ "^VF(C?)MULCPHZrr(bk|kz)$",
+ "^VF(C?)MULCPHZrr(k|bkz)$")>;
+
+def SPRWriteResGroup398 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [1, 1, 2, 4];
+ let Latency = 29;
+ let NumMicroOps = 8;
+}
+def : InstRW<[SPRWriteResGroup398, WriteVecMaskedGatherWriteback], (instregex "^VGATHER(D|Q)PDYrm$",
+ "^VPGATHER(D|Q)QYrm$")>;
+def : InstRW<[SPRWriteResGroup398, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSYrm,
+ VPGATHERQDYrm)>;
+
+def SPRWriteResGroup399 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 20;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup399, WriteVecMaskedGatherWriteback], (instregex "^VGATHER(D|Q)PDZ128rm$",
+ "^VPGATHER(D|Q)QZ128rm$")>;
+def : InstRW<[SPRWriteResGroup399, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSZ128rm,
+ VPGATHERQDZ128rm)>;
+
+def SPRWriteResGroup400 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [1, 2, 4];
+ let Latency = 28;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup400, WriteVecMaskedGatherWriteback], (instregex "^VGATHER(D|Q)PDZ256rm$",
+ "^VPGATHER(D|Q)QZ256rm$")>;
+def : InstRW<[SPRWriteResGroup400, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSZ256rm,
+ VPGATHERQDZ256rm)>;
+
+def SPRWriteResGroup401 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 8, 2];
+ let Latency = 28;
+ let NumMicroOps = 11;
+}
+def : InstRW<[SPRWriteResGroup401, WriteVecMaskedGatherWriteback], (instregex "^VGATHER(D|Q)PDZrm$",
+ "^VPGATHER(D|Q)QZrm$")>;
+def : InstRW<[SPRWriteResGroup401, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSZrm,
+ VPGATHERQDZrm)>;
+
+def SPRWriteResGroup402 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [1, 1, 1, 2];
+ let Latency = 20;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup402, WriteVecMaskedGatherWriteback], (instregex "^VGATHER(D|Q)PDrm$",
+ "^VPGATHER(D|Q)Qrm$")>;
+def : InstRW<[SPRWriteResGroup402, WriteVecMaskedGatherWriteback], (instrs VGATHERQPSrm,
+ VPGATHERQDrm)>;
+
+def SPRWriteResGroup403 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [1, 1, 2, 8];
+ let Latency = 30;
+ let NumMicroOps = 12;
+}
+def : InstRW<[SPRWriteResGroup403, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSYrm,
+ VPGATHERDDYrm)>;
+
+def SPRWriteResGroup404 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [1, 2, 4];
+ let Latency = 27;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup404, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSZ128rm,
+ VPGATHERDDZ128rm)>;
+
+def SPRWriteResGroup405 : SchedWriteRes<[SPRPort00, SPRPort01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [1, 2, 8];
+ let Latency = 29;
+ let NumMicroOps = 11;
+}
+def : InstRW<[SPRWriteResGroup405, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSZ256rm,
+ VPGATHERDDZ256rm)>;
+
+def SPRWriteResGroup406 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 16, 2];
+ let Latency = 30;
+ let NumMicroOps = 19;
+}
+def : InstRW<[SPRWriteResGroup406, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSZrm,
+ VPGATHERDDZrm)>;
+
+def SPRWriteResGroup407 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [1, 1, 2, 4];
+ let Latency = 28;
+ let NumMicroOps = 8;
+}
+def : InstRW<[SPRWriteResGroup407, WriteVecMaskedGatherWriteback], (instrs VGATHERDPSrm,
+ VPGATHERDDrm)>;
+
+def SPRWriteResGroup408 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
+ let Latency = 15;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup408, ReadAfterVecXLd], (instregex "^VGF2P8AFFINE((INV)?)QBZ128rm(b?)ik(z?)$",
+ "^VGF2P8MULBZ128rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup408, ReadAfterVecYLd], (instregex "^VGF2P8AFFINE((INV)?)QBZ256rm(b?)ik(z?)$",
+ "^VGF2P8MULBZ256rmk(z?)$")>;
+
+def SPRWriteResGroup409 : SchedWriteRes<[SPRPort00_01]> {
+ let Latency = 9;
+}
+def : InstRW<[SPRWriteResGroup409], (instregex "^VGF2P8AFFINE((INV)?)QBZ(128|256)rrik$",
+ "^VGF2P8MULBZ(128|256)rrk$")>;
+
+def SPRWriteResGroup410 : SchedWriteRes<[SPRPort00_01]> {
+ let Latency = 10;
+}
+def : InstRW<[SPRWriteResGroup410], (instregex "^VGF2P8AFFINE((INV)?)QBZ(128|256)rrikz$",
+ "^VGF2P8MULBZ(128|256)rrkz$")>;
+
+def SPRWriteResGroup411 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 15;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup411, ReadAfterVecYLd], (instregex "^VGF2P8AFFINE((INV)?)QBZrm(b?)ik(z?)$",
+ "^VGF2P8MULBZrmk(z?)$")>;
+
+def SPRWriteResGroup412 : SchedWriteRes<[SPRPort00]> {
+ let Latency = 9;
+}
+def : InstRW<[SPRWriteResGroup412], (instregex "^VGF2P8AFFINE((INV)?)QBZrrik$")>;
+def : InstRW<[SPRWriteResGroup412], (instrs VGF2P8MULBZrrk)>;
+
+def SPRWriteResGroup413 : SchedWriteRes<[SPRPort00]> {
+ let Latency = 10;
+}
+def : InstRW<[SPRWriteResGroup413], (instregex "^VGF2P8AFFINE((INV)?)QBZrrikz$")>;
+def : InstRW<[SPRWriteResGroup413], (instrs VGF2P8MULBZrrkz)>;
+
+def SPRWriteResGroup414 : SchedWriteRes<[SPRPort01_05, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup414], (instregex "^VH(ADD|SUB)P(D|S)rr$")>;
+
+def SPRWriteResGroup415 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort02_03_11]> {
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup415], (instrs VLDMXCSR)>;
+
+def SPRWriteResGroup416 : SchedWriteRes<[SPRPort01, SPRPort01_05, SPRPort02_03, SPRPort02_03_11, SPRPort04, SPRPort04_09, SPRPort05, SPRPort06]> {
+ let ResourceCycles = [1, 1, 1, 8, 1, 1, 2, 3];
+ let Latency = 40;
+ let NumMicroOps = 18;
+}
+def : InstRW<[SPRWriteResGroup416], (instrs VMCLEARm)>;
+
+def SPRWriteResGroup417 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11]> {
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup417], (instregex "^VMOVDQU(8|16)Z(128|256)rmk(z?)$",
+ "^VMOVSHZrmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup417, ReadAfterVecXLd], (instregex "^VP(ADD|SUB)(B|W)Z128rmk(z?)$",
+ "^VPBLENDM(B|W)Z128rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup417, ReadAfterVecYLd], (instregex "^VP(ADD|SUB)(B|W)Z256rmk(z?)$",
+ "^VPBLENDM(B|W)Z256rmk(z?)$")>;
+
+def SPRWriteResGroup418 : SchedWriteRes<[SPRPort00_01_05]> {
+ let Latency = 3;
+}
+def : InstRW<[SPRWriteResGroup418], (instregex "^VMOVDQU(8|16)Z(128|256)rrk(z?)((_REV)?)$",
+ "^VMOVSHZrrk(z?)((_REV)?)$",
+ "^VP(ADD|SUB)(B|W)Z(128|256)rrk(z?)$",
+ "^VPBLENDM(B|W)Z(128|256)rrk(z?)$",
+ "^VPMOVM2(B|W)Z(128|256)rr$")>;
+
+def SPRWriteResGroup419 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [1, 2, 2];
+ let Latency = 12;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup419], (instrs VMOVDQU8Zmrk)>;
+
+def SPRWriteResGroup420 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 477;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup420], (instrs VMOVNTDQZ128mr)>;
+
+def SPRWriteResGroup421 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 470;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup421], (instrs VMOVNTDQZ256mr,
+ VMOVNTPSmr)>;
+
+def SPRWriteResGroup422 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 473;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup422], (instregex "^VMOVNT(PD|DQZ)mr$")>;
+
+def SPRWriteResGroup423 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 521;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup423], (instrs VMOVNTDQmr)>;
+
+def SPRWriteResGroup424 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 550;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup424], (instrs VMOVNTPDZ128mr)>;
+
+def SPRWriteResGroup425 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 474;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup425], (instrs VMOVNTPDZ256mr)>;
+
+def SPRWriteResGroup426 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 464;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup426], (instrs VMOVNTPDZmr)>;
+
+def SPRWriteResGroup427 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 494;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup427], (instrs VMOVNTPSYmr)>;
+
+def SPRWriteResGroup428 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 475;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup428], (instrs VMOVNTPSZ128mr)>;
+
+def SPRWriteResGroup429 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 476;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup429], (instrs VMOVNTPSZ256mr)>;
+
+def SPRWriteResGroup430 : SchedWriteRes<[SPRPort04_09, SPRPort07_08]> {
+ let Latency = 471;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup430], (instrs VMOVNTPSZmr)>;
+
+def SPRWriteResGroup431 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [3, 1, 8];
+ let Latency = 10;
+ let NumMicroOps = 12;
+}
+def : InstRW<[SPRWriteResGroup431, ReadAfterVecXLd], (instregex "^VP2INTERSECTDZ128rm(b?)$")>;
+def : InstRW<[SPRWriteResGroup431, ReadAfterVecYLd], (instregex "^VP2INTERSECTQZ256rm(b?)$")>;
+
+def SPRWriteResGroup432 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> {
+ let ResourceCycles = [4, 8];
+ let Latency = 10;
+ let NumMicroOps = 12;
+}
+def : InstRW<[SPRWriteResGroup432], (instrs VP2INTERSECTDZ128rr,
+ VP2INTERSECTQZ256rr)>;
+
+def SPRWriteResGroup433 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 8, 7, 2, 1, 11];
+ let Latency = 27;
+ let NumMicroOps = 30;
+}
+def : InstRW<[SPRWriteResGroup433, ReadAfterVecYLd], (instregex "^VP2INTERSECTDZ256rm(b?)$")>;
+
+def SPRWriteResGroup434 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05, SPRPort01_05, SPRPort05]> {
+ let ResourceCycles = [1, 8, 8, 2, 11];
+ let Latency = 27;
+ let NumMicroOps = 30;
+}
+def : InstRW<[SPRWriteResGroup434], (instrs VP2INTERSECTDZ256rr)>;
+
+def SPRWriteResGroup435 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [13, 9, 1, 23];
+ let Latency = 40;
+ let NumMicroOps = 46;
+}
+def : InstRW<[SPRWriteResGroup435, ReadAfterVecYLd], (instregex "^VP2INTERSECTDZrm(b?)$")>;
+
+def SPRWriteResGroup436 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
+ let ResourceCycles = [13, 10, 23];
+ let Latency = 40;
+ let NumMicroOps = 46;
+}
+def : InstRW<[SPRWriteResGroup436], (instrs VP2INTERSECTDZrr)>;
+
+def SPRWriteResGroup437 : SchedWriteRes<[SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 4];
+ let Latency = 6;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup437, ReadAfterVecXLd], (instregex "^VP2INTERSECTQZ128rm(b?)$")>;
+
+def SPRWriteResGroup438 : SchedWriteRes<[SPRPort05]> {
+ let ResourceCycles = [4];
+ let Latency = 6;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup438], (instrs VP2INTERSECTQZ128rr)>;
+
+def SPRWriteResGroup439 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [8, 7, 1, 14];
+ let Latency = 29;
+ let NumMicroOps = 30;
+}
+def : InstRW<[SPRWriteResGroup439, ReadAfterVecYLd], (instregex "^VP2INTERSECTQZrm(b?)$")>;
+
+def SPRWriteResGroup440 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
+ let ResourceCycles = [8, 8, 14];
+ let Latency = 30;
+ let NumMicroOps = 30;
+}
+def : InstRW<[SPRWriteResGroup440], (instrs VP2INTERSECTQZrr)>;
+
+def SPRWriteResGroup441 : SchedWriteRes<[SPRPort00_01]> {
+ let Latency = 3;
+}
+def : InstRW<[SPRWriteResGroup441], (instregex "^VP(A|SU)BS(B|W)Z(128|256)rrk(z?)$",
+ "^VPADD(U?)S(B|W)Z(128|256)rrk(z?)$",
+ "^VPAVG(B|W)Z(128|256)rrk(z?)$",
+ "^VPM(AX|IN)(SB|UW)Z(128|256)rrk(z?)$",
+ "^VPM(AX|IN)(SW|UB)Z(128|256)rrk(z?)$",
+ "^VPSH(L|R)DVWZ(128|256)rk(z?)$",
+ "^VPS(L|R)LVWZ(128|256)rrk(z?)$",
+ "^VPS(L|R)LWZ(128|256)rik(z?)$",
+ "^VPSRAVWZ(128|256)rrk(z?)$",
+ "^VPSRAWZ(128|256)rik(z?)$",
+ "^VPSUBUS(B|W)Z(128|256)rrk(z?)$")>;
+
+def SPRWriteResGroup442 : SchedWriteRes<[SPRPort01_05, SPRPort02_03_11]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup442, ReadAfterVecYLd], (instregex "^VSHUFP(D|S)Yrmi$",
+ "^VSHUFP(D|S)Z256rm(bi|ik)$",
+ "^VSHUFP(D|S)Z256rmbik(z?)$",
+ "^VSHUFP(D|S)Z256rmi((kz)?)$")>;
+def : InstRW<[SPRWriteResGroup442, ReadAfterVecYLd], (instrs VPBLENDWYrmi)>;
+
+def SPRWriteResGroup443 : SchedWriteRes<[SPRPort00, SPRPort05]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup443], (instregex "^VPBROADCASTM(B2Q|W2D)Z(128|256)rr$",
+ "^VPBROADCASTM(B2Q|W2D)Zrr$",
+ "^VP(ERM|SRA)WZrrk(z?)$",
+ "^VPSHUFBITQMBZ(128|256)rr$",
+ "^VPS(L|R)LWZrrk(z?)$")>;
+def : InstRW<[SPRWriteResGroup443], (instrs VPSHUFBITQMBZrr)>;
+
+def SPRWriteResGroup444 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [1, 1, 1, 2, 1];
+ let Latency = 12;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup444], (instregex "^VPCOMPRESS(B|W)Z(128|256)mr$")>;
+def : InstRW<[SPRWriteResGroup444], (instrs VPCOMPRESSWZmr)>;
+
+def SPRWriteResGroup445 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [1, 1, 1, 2, 1];
+ let Latency = 14;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup445], (instregex "^VPCOMPRESS(B|W)Z(128|256)mrk$")>;
+def : InstRW<[SPRWriteResGroup445], (instrs VPCOMPRESSWZmrk)>;
+
+def SPRWriteResGroup446 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [1, 1, 2, 2, 2];
+ let Latency = 12;
+ let NumMicroOps = 8;
+}
+def : InstRW<[SPRWriteResGroup446], (instrs VPCOMPRESSBZmr)>;
+
+def SPRWriteResGroup447 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [1, 1, 2, 2, 2];
+ let Latency = 14;
+ let NumMicroOps = 8;
+}
+def : InstRW<[SPRWriteResGroup447], (instrs VPCOMPRESSBZmrk)>;
+
+def SPRWriteResGroup448 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [5, 4, 1, 5];
+ let Latency = 17;
+ let NumMicroOps = 15;
+}
+def : InstRW<[SPRWriteResGroup448], (instregex "^VPCONFLICTDZ128rm((b|k|bk|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup448], (instrs VPCONFLICTDZ128rmbkz)>;
+
+def SPRWriteResGroup449 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
+ let ResourceCycles = [5, 5, 5];
+ let Latency = 12;
+ let NumMicroOps = 15;
+}
+def : InstRW<[SPRWriteResGroup449], (instregex "^VPCONFLICTDZ128rr((k|kz)?)$")>;
+
+def SPRWriteResGroup450 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [7, 5, 1, 1, 9];
+ let Latency = 24;
+ let NumMicroOps = 23;
+}
+def : InstRW<[SPRWriteResGroup450], (instregex "^VPCONFLICTDZ256rm((b|k|bk|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup450], (instrs VPCONFLICTDZ256rmbkz)>;
+
+def SPRWriteResGroup451 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort00_06, SPRPort05]> {
+ let ResourceCycles = [7, 6, 1, 9];
+ let Latency = 17;
+ let NumMicroOps = 23;
+}
+def : InstRW<[SPRWriteResGroup451], (instregex "^VPCONFLICTDZ256rr((k|kz)?)$")>;
+
+def SPRWriteResGroup452 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [11, 8, 1, 17];
+ let Latency = 33;
+ let NumMicroOps = 37;
+}
+def : InstRW<[SPRWriteResGroup452], (instregex "^VPCONFLICTDZrm((b|k|bk|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup452], (instrs VPCONFLICTDZrmbkz)>;
+
+def SPRWriteResGroup453 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
+ let ResourceCycles = [11, 9, 17];
+ let Latency = 26;
+ let NumMicroOps = 37;
+}
+def : InstRW<[SPRWriteResGroup453], (instregex "^VPCONFLICTDZrr((kz)?)$")>;
+
+def SPRWriteResGroup454 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
+ let ResourceCycles = [11, 9, 17];
+ let Latency = 25;
+ let NumMicroOps = 37;
+}
+def : InstRW<[SPRWriteResGroup454], (instrs VPCONFLICTDZrrk)>;
+
+def SPRWriteResGroup455 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 11;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup455], (instregex "^VPCONFLICTQZ128rm((b|k|bk|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup455], (instrs VPCONFLICTQZ128rmbkz)>;
+def : InstRW<[SPRWriteResGroup455, ReadAfterVecYLd], (instregex "^VPERM(I|T)2B128rm$")>;
+
+def SPRWriteResGroup456 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup456], (instregex "^VPCONFLICTQZ128rr((k|kz)?)$")>;
+
+def SPRWriteResGroup457 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [5, 4, 1, 5];
+ let Latency = 20;
+ let NumMicroOps = 15;
+}
+def : InstRW<[SPRWriteResGroup457], (instregex "^VPCONFLICTQZ256rm((b|k|bk|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup457], (instrs VPCONFLICTQZ256rmbkz)>;
+
+def SPRWriteResGroup458 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort05]> {
+ let ResourceCycles = [5, 5, 5];
+ let Latency = 13;
+ let NumMicroOps = 15;
+}
+def : InstRW<[SPRWriteResGroup458], (instregex "^VPCONFLICTQZ256rr((k|kz)?)$")>;
+
+def SPRWriteResGroup459 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [7, 5, 1, 9];
+ let Latency = 23;
+ let NumMicroOps = 22;
+}
+def : InstRW<[SPRWriteResGroup459], (instregex "^VPCONFLICTQZrm((b|k|bk|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup459], (instrs VPCONFLICTQZrmbkz)>;
+
+def SPRWriteResGroup460 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
+ let ResourceCycles = [7, 6, 9];
+ let Latency = 17;
+ let NumMicroOps = 22;
+}
+def : InstRW<[SPRWriteResGroup460], (instregex "^VPCONFLICTQZrr((kz)?)$")>;
+
+def SPRWriteResGroup461 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort05]> {
+ let ResourceCycles = [7, 6, 9];
+ let Latency = 16;
+ let NumMicroOps = 22;
+}
+def : InstRW<[SPRWriteResGroup461], (instrs VPCONFLICTQZrrk)>;
+
+def SPRWriteResGroup462 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 13;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup462, ReadAfterVecYLd], (instregex "^VPERM(I|T)2B128rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup462, ReadAfterVecYLd], (instrs VPERMT2W128rm)>;
+
+def SPRWriteResGroup463 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup463], (instregex "^VPERM(I|T)2B(128|256)rr$")>;
+
+def SPRWriteResGroup464 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup464], (instregex "^VPERM(I|T)2B(128|256)rrk(z?)$",
+ "^VPERM(I|T)2W(128|256)rr$")>;
+
+def SPRWriteResGroup465 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 12;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup465, ReadAfterVecYLd], (instregex "^VPERM(I|T)2B256rm$")>;
+
+def SPRWriteResGroup466 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 14;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instregex "^VPERM(I|T)2B256rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup466, ReadAfterVecYLd], (instrs VPERMI2W128rm,
+ VPERMT2W256rm)>;
+
+def SPRWriteResGroup467 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 12;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup467, ReadAfterVecYLd], (instregex "^VPERM(I|T)2Brm$")>;
+
+def SPRWriteResGroup468 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 14;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup468, ReadAfterVecYLd], (instregex "^VPERM(I|T)2Brmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup468, ReadAfterVecYLd], (instrs VPERMT2Wrm)>;
+
+def SPRWriteResGroup469 : SchedWriteRes<[SPRPort00_05, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 5;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup469], (instregex "^VPERM(I|T)2Brr$")>;
+
+def SPRWriteResGroup470 : SchedWriteRes<[SPRPort00_05, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup470], (instregex "^VPERM(I|T)2Brrk(z?)$",
+ "^VPERM(I|T)2Wrr$")>;
+
+def SPRWriteResGroup471 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 16;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup471, ReadAfterVecYLd], (instregex "^VPERMI2W128rmk(z?)$",
+ "^VPERMT2W256rmk(z?)$")>;
+
+def SPRWriteResGroup472 : SchedWriteRes<[SPRPort00_01_05, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup472], (instregex "^VPERM(I|T)2W(128|256)rrk(z?)$")>;
+
+def SPRWriteResGroup473 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 15;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instregex "^VPERMT2W128rmk(z?)$")>;
+def : InstRW<[SPRWriteResGroup473, ReadAfterVecYLd], (instrs VPERMI2W256rm)>;
+
+def SPRWriteResGroup474 : SchedWriteRes<[SPRPort00_01_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 17;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup474, ReadAfterVecYLd], (instregex "^VPERMI2W256rmk(z?)$")>;
+
+def SPRWriteResGroup475 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 15;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup475, ReadAfterVecYLd], (instrs VPERMI2Wrm)>;
+
+def SPRWriteResGroup476 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 17;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup476, ReadAfterVecYLd], (instregex "^VPERMI2Wrmk(z?)$")>;
+
+def SPRWriteResGroup477 : SchedWriteRes<[SPRPort00_05, SPRPort05]> {
+ let ResourceCycles = [1, 2];
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup477], (instregex "^VPERM(I|T)2Wrrk(z?)$")>;
+
+def SPRWriteResGroup478 : SchedWriteRes<[SPRPort00_05, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [1, 1, 2];
+ let Latency = 16;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup478, ReadAfterVecYLd], (instregex "^VPERMT2Wrmk(z?)$")>;
+
+def SPRWriteResGroup479 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup479, ReadAfterVecYLd], (instrs VPERMWZ128rm)>;
+
+def SPRWriteResGroup480 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 13;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup480, ReadAfterVecYLd], (instregex "^VPERMWZ(128|256)rmk(z?)$")>;
+
+def SPRWriteResGroup481 : SchedWriteRes<[SPRPort00_01, SPRPort05]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup481], (instregex "^VPERMWZ(128|256)rr$")>;
+
+def SPRWriteResGroup482 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup482, ReadAfterVecYLd], (instrs VPERMWZ256rm)>;
+
+def SPRWriteResGroup483 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup483, ReadAfterVecYLd], (instrs VPERMWZrm)>;
+
+def SPRWriteResGroup484 : SchedWriteRes<[SPRPort05]> {
+ let ResourceCycles = [2];
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup484], (instregex "^VPEXPAND(B|W)Z(128|256)rrk(z?)$",
+ "^VPEXPAND(B|W)Zrrk(z?)$")>;
+
+def SPRWriteResGroup485 : SchedWriteRes<[SPRPort00_01, SPRPort01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [1, 2, 1];
+ let Latency = 10;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup485, ReadAfterVecYLd], (instregex "^VPH(ADD|SUB)SWYrm$")>;
+
+def SPRWriteResGroup486 : SchedWriteRes<[SPRPort00_01]> {
+ let Latency = 7;
+}
+def : InstRW<[SPRWriteResGroup486], (instregex "^VPMADDUBSWZ(128|256)rrk(z?)$",
+ "^VPMULH((U|RS)?)WZ(128|256)rrk(z?)$",
+ "^VPMULLWZ(128|256)rrk(z?)$")>;
+
+def SPRWriteResGroup487 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 14;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup487, ReadAfterVecYLd], (instregex "^VPMADDUBSWZrmk(z?)$",
+ "^VPMULH((U|RS)?)WZrmk(z?)$",
+ "^VPMULLWZrmk(z?)$")>;
+
+def SPRWriteResGroup488 : SchedWriteRes<[SPRPort00]> {
+ let Latency = 7;
+}
+def : InstRW<[SPRWriteResGroup488], (instregex "^VPMADDUBSWZrrk(z?)$",
+ "^VPMULH((U|RS)?)WZrrk(z?)$",
+ "^VPMULLWZrrk(z?)$")>;
+
+def SPRWriteResGroup489 : SchedWriteRes<[SPRPort01_05, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let Latency = 12;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup489], (instregex "^VPMOV((US)?)DBZ(128|256)mr$",
+ "^VPMOV((S|US)?)(D|Q)WZ(128|256)mr$",
+ "^VPMOV(Q|W|SD|SW)BZ256mr$",
+ "^VPMOV(W|SD)BZ128mr$",
+ "^VPMOV(U?)SQBZ256mr$",
+ "^VPMOV(U?)SQDZ(128|256)mr$",
+ "^VPMOV(U?)SWBZ128mr$")>;
+def : InstRW<[SPRWriteResGroup489], (instrs VPMOVUSWBZ256mr)>;
+
+def SPRWriteResGroup490 : SchedWriteRes<[SPRPort01_05, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let Latency = 13;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup490], (instregex "^VPMOV(D|Q|W|SQ|SW)BZ128mrk$",
+ "^VPMOV((S|US)?)(D|Q)WZ128mrk$",
+ "^VPMOV(U?)S(DB|QD)Z128mrk$",
+ "^VPMOVUS(Q|W)BZ128mrk$")>;
+
+def SPRWriteResGroup491 : SchedWriteRes<[SPRPort01_05, SPRPort05]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup491], (instregex "^VPMOV(D|Q|W|SQ|SW)BZ128rr$",
+ "^VPMOV((S|US)?)(D|Q)WZ128rr$",
+ "^VPMOV(U?)S(DB|QD)Z128rr$",
+ "^VPMOV(U?)SQDZ128rrk(z?)$",
+ "^VPMOVUS(Q|W)BZ128rr$")>;
+
+def SPRWriteResGroup492 : SchedWriteRes<[SPRPort01_05, SPRPort05]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup492], (instregex "^VPMOV(D|Q|W|SQ|SW)BZ128rrk(z?)$",
+ "^VPMOV(D|Q|W|SQ|SW)BZ256rr$",
+ "^VPMOV((S|US)?)(D|Q)WZ128rrk(z?)$",
+ "^VPMOV((S|US)?)(D|Q)WZ256rr$",
+ "^VPMOV(U?)SDBZ128rrk(z?)$",
+ "^VPMOV(U?)S(DB|QD)Z256rr$",
+ "^VPMOV(U?)SQDZ256rrk(z?)$",
+ "^VPMOVUS(Q|W)BZ128rrk(z?)$",
+ "^VPMOVUS(Q|W)BZ256rr$")>;
+
+def SPRWriteResGroup493 : SchedWriteRes<[SPRPort01_05, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let Latency = 15;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup493], (instregex "^VPMOV(D|Q|W|SQ|SW)BZ256mrk$",
+ "^VPMOV((S|US)?)(D|Q)WZ256mrk$",
+ "^VPMOV(U?)S(DB|QD)Z256mrk$",
+ "^VPMOVUS(Q|W)BZ256mrk$")>;
+
+def SPRWriteResGroup494 : SchedWriteRes<[SPRPort01_05, SPRPort05]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup494], (instregex "^VPMOV(D|Q|W|SQ|SW)BZ256rrk(z?)$",
+ "^VPMOV((S|US)?)(D|Q)WZ256rrk(z?)$",
+ "^VPMOV(U?)SDBZ256rrk(z?)$",
+ "^VPMOVUS(Q|W)BZ256rrk(z?)$")>;
+
+def SPRWriteResGroup495 : SchedWriteRes<[SPRPort01_05, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let Latency = 20;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup495], (instregex "^VPMOV((S|US)?)QBZ128mr$")>;
+
+def SPRWriteResGroup496 : SchedWriteRes<[SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let Latency = 14;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup496], (instregex "^VPMOVQDZ((256)?)mrk$")>;
+
+def SPRWriteResGroup497 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
+ let ResourceCycles = [3, 1];
+ let Latency = 23;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup497, ReadAfterVecXLd], (instregex "^VPMULLQZ128rm((b|k|bk|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup497, ReadAfterVecXLd], (instrs VPMULLQZ128rmbkz)>;
+def : InstRW<[SPRWriteResGroup497, ReadAfterVecYLd], (instregex "^VPMULLQZ256rm((b|k|bk|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup497, ReadAfterVecYLd], (instrs VPMULLQZ256rmbkz)>;
+
+def SPRWriteResGroup498 : SchedWriteRes<[SPRPort00_01]> {
+ let ResourceCycles = [3];
+ let Latency = 15;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup498], (instregex "^VPMULLQZ(128|256)rr((k|kz)?)$")>;
+
+def SPRWriteResGroup499 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let ResourceCycles = [3, 1];
+ let Latency = 23;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup499, ReadAfterVecYLd], (instregex "^VPMULLQZrm((b|k|bk|kz)?)$")>;
+def : InstRW<[SPRWriteResGroup499, ReadAfterVecYLd], (instrs VPMULLQZrmbkz)>;
+
+def SPRWriteResGroup500 : SchedWriteRes<[SPRPort00]> {
+ let ResourceCycles = [3];
+ let Latency = 15;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup500], (instregex "^VPMULLQZrr((k|kz)?)$")>;
+
+def SPRWriteResGroup501 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [1, 1, 1, 4, 4];
+ let Latency = 12;
+ let NumMicroOps = 11;
+}
+def : InstRW<[SPRWriteResGroup501], (instregex "^VPSCATTER(D|Q)QZ256mr$",
+ "^VSCATTER(D|Q)PDZ256mr$")>;
+def : InstRW<[SPRWriteResGroup501], (instrs VPSCATTERDDZ128mr,
+ VPSCATTERQDZ256mr,
+ VSCATTERDPSZ128mr,
+ VSCATTERQPSZ256mr)>;
+
+def SPRWriteResGroup502 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [1, 1, 1, 8, 8];
+ let Latency = 12;
+ let NumMicroOps = 19;
+}
+def : InstRW<[SPRWriteResGroup502], (instrs VPSCATTERDDZ256mr,
+ VSCATTERDPSZ256mr)>;
+
+def SPRWriteResGroup503 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [2, 1, 16, 16];
+ let Latency = 19;
+ let NumMicroOps = 35;
+}
+def : InstRW<[SPRWriteResGroup503], (instrs VPSCATTERDDZmr,
+ VSCATTERDPSZmr)>;
+
+def SPRWriteResGroup504 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [1, 1, 1, 2, 2];
+ let Latency = 12;
+ let NumMicroOps = 7;
+}
+def : InstRW<[SPRWriteResGroup504], (instregex "^VPSCATTER(D|Q)QZ128mr$",
+ "^VSCATTER(D|Q)PDZ128mr$")>;
+def : InstRW<[SPRWriteResGroup504], (instrs VPSCATTERQDZ128mr,
+ VSCATTERQPSZ128mr)>;
+
+def SPRWriteResGroup505 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06_10, SPRPort04_09, SPRPort07_08]> {
+ let ResourceCycles = [2, 1, 8, 8];
+ let Latency = 12;
+ let NumMicroOps = 19;
+}
+def : InstRW<[SPRWriteResGroup505], (instregex "^VPSCATTER(D|Q)QZmr$",
+ "^VSCATTER(D|Q)PDZmr$")>;
+def : InstRW<[SPRWriteResGroup505], (instrs VPSCATTERQDZmr,
+ VSCATTERQPSZmr)>;
+
+def SPRWriteResGroup506 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup506, ReadAfterVecXLd], (instregex "^VPSH(L|R)D(D|Q)Z128rmbi$",
+ "^VPSH(L|R)D(D|Q|W)Z128rmi$",
+ "^VPSH(L|R)DV(D|Q|W)Z128m$",
+ "^VPSH(L|R)DV(D|Q)Z128m(b|k|kz)$",
+ "^VPSH(L|R)DV(D|Q)Z128mbk(z?)$")>;
+
+def SPRWriteResGroup507 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup507, ReadAfterVecXLd], (instregex "^VPSH(L|R)D(D|Q)Z128rm(b?)ik(z?)$")>;
+
+def SPRWriteResGroup508 : SchedWriteRes<[SPRPort00_01]>;
+def : InstRW<[SPRWriteResGroup508], (instregex "^VPSH(L|R)D(D|Q|W)Z(128|256)rri$",
+ "^VPSH(L|R)DV(D|Q|W)Z(128|256)r$",
+ "^VPSH(L|R)DV(D|Q)Z(128|256)rk(z?)$")>;
+
+def SPRWriteResGroup509 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup509], (instregex "^VPSH(L|R)D(D|Q)Z(128|256)rrik(z?)$")>;
+
+def SPRWriteResGroup510 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup510, ReadAfterVecYLd], (instregex "^VPSH(L|R)D(D|Q)Z256rmbi$",
+ "^VPSH(L|R)D(D|Q|W)Z256rmi$",
+ "^VPSH(L|R)DV(D|Q|W)Z256m$",
+ "^VPSH(L|R)DV(D|Q)Z256m(b|k|kz)$",
+ "^VPSH(L|R)DV(D|Q)Z256mbk(z?)$")>;
+
+def SPRWriteResGroup511 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup511, ReadAfterVecYLd], (instregex "^VPSH(L|R)D(D|Q)Z256rm(b?)ik(z?)$")>;
+
+def SPRWriteResGroup512 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup512, ReadAfterVecYLd], (instregex "^VPSH(L|R)D(D|Q)Zrmbi$",
+ "^VPSH(L|R)D(D|Q|W)Zrmi$",
+ "^VPSH(L|R)DV(D|Q|W)Zm$",
+ "^VPSH(L|R)DV(D|Q)Zm(b|k|kz)$",
+ "^VPSH(L|R)DV(D|Q)Zmbk(z?)$")>;
+
+def SPRWriteResGroup513 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup513, ReadAfterVecYLd], (instregex "^VPSH(L|R)D(D|Q)Zrm(b?)ik(z?)$")>;
+
+def SPRWriteResGroup514 : SchedWriteRes<[SPRPort00, SPRPort00_05]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup514], (instregex "^VPSH(L|R)D(D|Q)Zrrik(z?)$")>;
+
+def SPRWriteResGroup515 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> {
+ let Latency = 11;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup515, ReadAfterVecXLd], (instregex "^VPSH(L|R)DWZ128rmik(z?)$")>;
+
+def SPRWriteResGroup516 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup516], (instregex "^VPSH(L|R)DWZ(128|256)rrik(z?)$")>;
+
+def SPRWriteResGroup517 : SchedWriteRes<[SPRPort00_01, SPRPort00_01_05, SPRPort02_03_11]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup517, ReadAfterVecYLd], (instregex "^VPSH(L|R)DWZ256rmik(z?)$")>;
+
+def SPRWriteResGroup518 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> {
+ let Latency = 12;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup518, ReadAfterVecYLd], (instregex "^VPSH(L|R)DWZrmik(z?)$")>;
+
+def SPRWriteResGroup519 : SchedWriteRes<[SPRPort00, SPRPort00_05]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup519], (instregex "^VPSH(L|R)DWZrrik(z?)$")>;
+
+def SPRWriteResGroup520 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup520, ReadAfterVecXLd], (instrs VPSHUFBITQMBZ128rm)>;
+def : InstRW<[SPRWriteResGroup520, ReadAfterVecYLd], (instregex "^VPSHUFBITQMBZ((256)?)rm$")>;
+
+def SPRWriteResGroup521 : SchedWriteRes<[SPRPort00, SPRPort02_03_11, SPRPort05]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup521, ReadAfterVecXLd], (instrs VPSHUFBITQMBZ128rmk)>;
+def : InstRW<[SPRWriteResGroup521, ReadAfterVecYLd], (instregex "^VPSHUFBITQMBZ((256)?)rmk$")>;
+
+def SPRWriteResGroup522 : SchedWriteRes<[SPRPort00_01, SPRPort01_05]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup522], (instregex "^VPS(L|R)LWZ128rrk(z?)$",
+ "^VPSRAWZ128rrk(z?)$")>;
+
+def SPRWriteResGroup523 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 16;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup523, ReadAfterVecYLd], (instregex "^VR(CP|SQRT)PHZm(bk|kz)$",
+ "^VR(CP|SQRT)PHZm(k|bkz)$")>;
+
+def SPRWriteResGroup524 : SchedWriteRes<[SPRPort00, SPRPort00_05]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup524], (instregex "^VRCPPHZrk(z?)$")>;
+
+def SPRWriteResGroup525 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
+ let ResourceCycles = [3, 1];
+ let Latency = 20;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup525, ReadAfterVecXLd], (instregex "^VREDUCEPHZ128rm(b?)i$")>;
+def : InstRW<[SPRWriteResGroup525, ReadAfterVecXLd], (instrs VREDUCESHZrmi)>;
+def : InstRW<[SPRWriteResGroup525, ReadAfterVecYLd], (instregex "^VREDUCEPHZ256rm(b?)i$")>;
+
+def SPRWriteResGroup526 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
+ let ResourceCycles = [3, 1];
+ let Latency = 22;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup526, ReadAfterVecXLd], (instregex "^VREDUCEPHZ128rm(b?)ik(z?)$",
+ "^VREDUCESHZrmik(z?)$")>;
+def : InstRW<[SPRWriteResGroup526, ReadAfterVecYLd], (instregex "^VREDUCEPHZ256rm(b?)ik(z?)$")>;
+
+def SPRWriteResGroup527 : SchedWriteRes<[SPRPort00_01]> {
+ let ResourceCycles = [3];
+ let Latency = 13;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup527], (instregex "^VREDUCEPHZ(128|256)rri$",
+ "^VREDUCESHZrri(b?)$")>;
+
+def SPRWriteResGroup528 : SchedWriteRes<[SPRPort00_01]> {
+ let ResourceCycles = [3];
+ let Latency = 16;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup528], (instregex "^VREDUCEPHZ(128|256)rrik(z?)$",
+ "^VREDUCESHZrri(bk|kz)$",
+ "^VREDUCESHZrri(k|bkz)$")>;
+
+def SPRWriteResGroup529 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let ResourceCycles = [3, 1];
+ let Latency = 20;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup529, ReadAfterVecYLd], (instregex "^VREDUCEPHZrm(b?)i$")>;
+
+def SPRWriteResGroup530 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let ResourceCycles = [3, 1];
+ let Latency = 22;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup530, ReadAfterVecYLd], (instregex "^VREDUCEPHZrm(b?)ik(z?)$")>;
+
+def SPRWriteResGroup531 : SchedWriteRes<[SPRPort00]> {
+ let ResourceCycles = [3];
+ let Latency = 13;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup531], (instregex "^VREDUCEPHZrri(b?)$")>;
+
+def SPRWriteResGroup532 : SchedWriteRes<[SPRPort00]> {
+ let ResourceCycles = [3];
+ let Latency = 16;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup532], (instregex "^VREDUCEPHZrri(bk|kz)$",
+ "^VREDUCEPHZrri(k|bkz)$")>;
+
+def SPRWriteResGroup533 : SchedWriteRes<[SPRPort00]> {
+ let ResourceCycles = [2];
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup533], (instregex "^VRNDSCALEP(D|S)Zrri((b|k|bk|kz)?)$",
+ "^VRNDSCALEP(D|S)Zrribkz$")>;
+
+def SPRWriteResGroup534 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 17;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup534, ReadAfterVecXLd], (instregex "^VRNDSCALEPHZ128rm(b?)ik(z?)$",
+ "^VRNDSCALESHZm_Intk(z?)$",
+ "^VSCALEFPHZ128rm(bk|kz)$",
+ "^VSCALEFPHZ128rm(k|bkz)$")>;
+def : InstRW<[SPRWriteResGroup534, ReadAfterVecYLd], (instregex "^VRNDSCALEPHZ256rm(b?)ik(z?)$",
+ "^VSCALEFPHZ256rm(bk|kz)$",
+ "^VSCALEFPHZ256rm(k|bkz)$")>;
+def : InstRW<[SPRWriteResGroup534, ReadAfterVecLd], (instregex "^VSCALEFSHZrmk(z?)$")>;
+
+def SPRWriteResGroup535 : SchedWriteRes<[SPRPort00_01]> {
+ let ResourceCycles = [2];
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup535], (instregex "^VRNDSCALEPHZ(128|256)rrik(z?)$",
+ "^VRNDSCALESHZr(b?)_Intk(z?)$",
+ "^VSCALEFPHZ(128|256)rrk(z?)$",
+ "^VSCALEFSHZrrb_Intk(z?)$",
+ "^VSCALEFSHZrrk(z?)$")>;
+
+def SPRWriteResGroup536 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 17;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup536, ReadAfterVecYLd], (instregex "^VRNDSCALEPHZrm(b?)ik(z?)$",
+ "^VSCALEFPHZrm(bk|kz)$",
+ "^VSCALEFPHZrm(k|bkz)$")>;
+
+def SPRWriteResGroup537 : SchedWriteRes<[SPRPort00]> {
+ let ResourceCycles = [2];
+ let Latency = 11;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup537], (instregex "^VRNDSCALEPHZrri(bk|kz)$",
+ "^VRNDSCALEPHZrri(k|bkz)$",
+ "^VSCALEFPHZrr(bk|kz)$",
+ "^VSCALEFPHZrr(k|bkz)$")>;
+
+def SPRWriteResGroup538 : SchedWriteRes<[SPRPort00, SPRPort00_05]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 6;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup538], (instregex "^VRSQRT14P(D|S)Zr$")>;
+def : InstRW<[SPRWriteResGroup538], (instrs VRSQRT14PSZrk,
+ VRSQRTPHZr)>;
+
+def SPRWriteResGroup539 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 25;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup539], (instrs VSQRTPDYm)>;
+def : InstRW<[SPRWriteResGroup539, ReadAfterVecYLd], (instregex "^VSQRTPDZ256m(b?)$")>;
+
+def SPRWriteResGroup540 : SchedWriteRes<[SPRPort00, SPRPort02_03_11]> {
+ let Latency = 20;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup540, ReadAfterVecXLd], (instregex "^VSQRTPDZ128m(bk|kz)$",
+ "^VSQRTPDZ128m(k|bkz)$")>;
+def : InstRW<[SPRWriteResGroup540, ReadAfterVecLd], (instregex "^VSQRTSDZm_Intk(z?)$")>;
+
+def SPRWriteResGroup541 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 38;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup541, ReadAfterVecYLd], (instrs VSQRTPDZm)>;
+
+def SPRWriteResGroup542 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 39;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup542, ReadAfterVecYLd], (instrs VSQRTPDZmb)>;
+
+def SPRWriteResGroup543 : SchedWriteRes<[SPRPort00, SPRPort00_05]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 31;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup543], (instrs VSQRTPDZr)>;
+
+def SPRWriteResGroup544 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 41;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup544, ReadAfterVecXLd], (instregex "^VSQRTPHZ128m(bk|kz)$",
+ "^VSQRTPHZ128m(k|bkz)$")>;
+
+def SPRWriteResGroup545 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 35;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup545], (instregex "^VSQRTPHZ(128|256)rk$")>;
+def : InstRW<[SPRWriteResGroup545], (instrs VSQRTPHZ256rkz)>;
+
+def SPRWriteResGroup546 : SchedWriteRes<[SPRPort00, SPRPort00_01_05]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 12;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup546], (instrs VSQRTPHZ128rkz)>;
+
+def SPRWriteResGroup547 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 40;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup547, ReadAfterVecYLd], (instregex "^VSQRTPHZ256m(b?)$")>;
+
+def SPRWriteResGroup548 : SchedWriteRes<[SPRPort00, SPRPort00_01_05, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1, 1];
+ let Latency = 42;
+ let NumMicroOps = 4;
+}
+def : InstRW<[SPRWriteResGroup548, ReadAfterVecYLd], (instregex "^VSQRTPHZ256m(bk|kz)$",
+ "^VSQRTPHZ256m(k|bkz)$")>;
+
+def SPRWriteResGroup549 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [4, 2, 1, 1, 1];
+ let Latency = 53;
+ let NumMicroOps = 9;
+}
+def : InstRW<[SPRWriteResGroup549, ReadAfterVecYLd], (instregex "^VSQRTPHZm(b?)$")>;
+
+def SPRWriteResGroup550 : SchedWriteRes<[SPRPort00, SPRPort00_05, SPRPort00_06, SPRPort02_03_11, SPRPort05]> {
+ let ResourceCycles = [4, 2, 1, 1, 1];
+ let Latency = 55;
+ let NumMicroOps = 9;
+}
+def : InstRW<[SPRWriteResGroup550, ReadAfterVecYLd], (instregex "^VSQRTPHZm(bk|kz)$",
+ "^VSQRTPHZm(k|bkz)$")>;
+
+def SPRWriteResGroup551 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort05]> {
+ let ResourceCycles = [4, 1, 1];
+ let Latency = 45;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup551], (instregex "^VSQRTPHZr(b?)$")>;
+
+def SPRWriteResGroup552 : SchedWriteRes<[SPRPort00, SPRPort00_06, SPRPort05]> {
+ let ResourceCycles = [4, 1, 1];
+ let Latency = 47;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup552], (instregex "^VSQRTPHZr(bk|kz)$",
+ "^VSQRTPHZr(k|bkz)$")>;
+
+def SPRWriteResGroup553 : SchedWriteRes<[SPRPort00, SPRPort00_05]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 19;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup553], (instrs VSQRTPSZr)>;
+
+def SPRWriteResGroup554 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort01_05_10]> {
+ let ResourceCycles = [1, 2, 3, 3, 1];
+ let Latency = 12;
+ let NumMicroOps = 10;
+}
+def : InstRW<[SPRWriteResGroup554], (instrs VZEROALL)>;
+
+def SPRWriteResGroup555 : SchedWriteRes<[SPRPort00_01_05_06]> {
+ let ResourceCycles = [2];
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup555], (instrs WAIT)>;
+
+def SPRWriteResGroup556 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [8, 6, 19, 63, 21, 15, 1, 10, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 144;
+}
+def : InstRW<[SPRWriteResGroup556], (instrs WRMSR)>;
+
+def SPRWriteResGroup557 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06, SPRPort01, SPRPort05]> {
+ let ResourceCycles = [2, 1, 4, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 8;
+}
+def : InstRW<[SPRWriteResGroup557], (instrs WRPKRUr)>;
+
+def SPRWriteResGroup558 : SchedWriteRes<[SPRPort00_01_05_06_10]> {
+ let ResourceCycles = [2];
+ let Latency = 12;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup558, WriteRMW], (instregex "^XADD(16|32|64)rm$")>;
+
+def SPRWriteResGroup559 : SchedWriteRes<[SPRPort00_01_05_06_10]> {
+ let ResourceCycles = [2];
+ let Latency = 13;
+ let NumMicroOps = 2;
+}
+def : InstRW<[SPRWriteResGroup559, WriteRMW], (instrs XADD8rm)>;
+
+def SPRWriteResGroup560 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
+ let ResourceCycles = [4, 1];
+ let Latency = 39;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup560, WriteRMW], (instregex "^XCHG(16|32)rm$")>;
+
+def SPRWriteResGroup561 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
+ let ResourceCycles = [5, 1];
+ let Latency = 39;
+ let NumMicroOps = 6;
+}
+def : InstRW<[SPRWriteResGroup561, WriteRMW], (instrs XCHG64rm)>;
+
+def SPRWriteResGroup562 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
+ let ResourceCycles = [4, 1];
+ let Latency = 40;
+ let NumMicroOps = 5;
+}
+def : InstRW<[SPRWriteResGroup562, WriteRMW], (instrs XCHG8rm)>;
+
+def SPRWriteResGroup563 : SchedWriteRes<[SPRPort00, SPRPort00_01_05_06, SPRPort00_05, SPRPort01, SPRPort05, SPRPort06]> {
+ let ResourceCycles = [2, 4, 2, 1, 2, 4];
+ let Latency = 17;
+ let NumMicroOps = 15;
+}
+def : InstRW<[SPRWriteResGroup563], (instrs XCH_F)>;
+
+def SPRWriteResGroup564 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01]> {
+ let ResourceCycles = [7, 3, 8, 5];
+ let Latency = 4;
+ let NumMicroOps = 23;
+}
+def : InstRW<[SPRWriteResGroup564], (instrs XGETBV)>;
+
+def SPRWriteResGroup565 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort02_03_11]> {
+ let ResourceCycles = [2, 1];
+ let Latency = 7;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup565], (instrs XLAT)>;
+
+def SPRWriteResGroup566 : SchedWriteRes<[SPRPort01, SPRPort02_03, SPRPort02_03_11, SPRPort06]> {
+ let ResourceCycles = [1, 21, 1, 8];
+ let Latency = 37;
+ let NumMicroOps = 31;
+}
+def : InstRW<[SPRWriteResGroup566], (instregex "^XRSTOR((S|64)?)$")>;
+def : InstRW<[SPRWriteResGroup566], (instrs XRSTORS64)>;
+
+def SPRWriteResGroup567 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1];
+ let Latency = 42;
+ let NumMicroOps = 140;
+}
+def : InstRW<[SPRWriteResGroup567], (instrs XSAVE)>;
+
+def SPRWriteResGroup568 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [14, 25, 44, 21, 21, 4, 1, 9, 1];
+ let Latency = 41;
+ let NumMicroOps = 140;
+}
+def : InstRW<[SPRWriteResGroup568], (instrs XSAVE64)>;
+
+def SPRWriteResGroup569 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [1, 19, 36, 52, 23, 4, 2, 12, 2];
+ let Latency = 42;
+ let NumMicroOps = 151;
+}
+def : InstRW<[SPRWriteResGroup569], (instrs XSAVEC)>;
+
+def SPRWriteResGroup570 : SchedWriteRes<[SPRPort00, SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [1, 19, 36, 53, 23, 4, 2, 12, 2];
+ let Latency = 42;
+ let NumMicroOps = 152;
+}
+def : InstRW<[SPRWriteResGroup570], (instrs XSAVEC64)>;
+
+def SPRWriteResGroup571 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [25, 35, 52, 27, 4, 1, 10, 1];
+ let Latency = 42;
+ let NumMicroOps = 155;
+}
+def : InstRW<[SPRWriteResGroup571], (instrs XSAVEOPT)>;
+
+def SPRWriteResGroup572 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [25, 35, 53, 27, 4, 1, 10, 1];
+ let Latency = 42;
+ let NumMicroOps = 156;
+}
+def : InstRW<[SPRWriteResGroup572], (instrs XSAVEOPT64)>;
+
+def SPRWriteResGroup573 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [23, 32, 53, 29, 30, 4, 2, 9, 2];
+ let Latency = 42;
+ let NumMicroOps = 184;
+}
+def : InstRW<[SPRWriteResGroup573], (instrs XSAVES)>;
+
+def SPRWriteResGroup574 : SchedWriteRes<[SPRPort00_01, SPRPort00_05, SPRPort00_06, SPRPort01, SPRPort01_05, SPRPort02_03_11, SPRPort04_09, SPRPort05, SPRPort07_08]> {
+ let ResourceCycles = [23, 33, 53, 29, 32, 4, 2, 8, 2];
+ let Latency = 42;
+ let NumMicroOps = 186;
+}
+def : InstRW<[SPRWriteResGroup574], (instrs XSAVES64)>;
+
+def SPRWriteResGroup575 : SchedWriteRes<[SPRPort00_01_05, SPRPort00_01_05_06_10, SPRPort00_05_06, SPRPort00_06, SPRPort01, SPRPort01_05_10, SPRPort05]> {
+ let ResourceCycles = [4, 23, 2, 14, 8, 1, 2];
+ let Latency = 5;
+ let NumMicroOps = 54;
+}
+def : InstRW<[SPRWriteResGroup575], (instrs XSETBV)>;
+
+def SPRWriteResGroup576 : SchedWriteRes<[SPRPort00_01_05_06_10, SPRPort00_06]> {
+ let ResourceCycles = [2, 1];
+ let Latency = SapphireRapidsModel.MaxLatency;
+ let NumMicroOps = 3;
+}
+def : InstRW<[SPRWriteResGroup576], (instrs XTEST)>;
+
+}
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
index 114e9d1f5a56..9412a40be48c 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td
@@ -236,10 +236,10 @@ defm : X86WriteRes<WriteFStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore32, [SKLPort237,SKLPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore32Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore64, [SKLPort237,SKLPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore64Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore32, [SKLPort23,SKLPort0,SKLPort4], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [SKLPort23,SKLPort0,SKLPort4], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64, [SKLPort23,SKLPort0,SKLPort4], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [SKLPort23,SKLPort0,SKLPort4], 2, [1,1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SKLPort015], 1, [1], 1>;
@@ -352,10 +352,10 @@ defm : X86WriteRes<WriteVecStoreX, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore32, [SKLPort237,SKLPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore32Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore64, [SKLPort237,SKLPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore64Y, [SKLPort237,SKLPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [SKLPort23,SKLPort0,SKLPort4], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [SKLPort23,SKLPort0,SKLPort4], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [SKLPort23,SKLPort0,SKLPort4], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [SKLPort23,SKLPort0,SKLPort4], 2, [1,1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKLPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKLPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKLPort015], 1, [1], 1>;
diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
index 36d5c76a1e50..2f9d075891f8 100644
--- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td
+++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td
@@ -236,10 +236,10 @@ defm : X86WriteRes<WriteFStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteFStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore32, [SKXPort237,SKXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore32Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore64, [SKXPort237,SKXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteFMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore32, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore32Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteFMaskedStore64Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>;
defm : X86WriteRes<WriteFMove, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteFMoveX, [SKXPort015], 1, [1], 1>;
@@ -352,10 +352,10 @@ defm : X86WriteRes<WriteVecStoreX, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>;
defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore32, [SKXPort237,SKXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore32Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore64, [SKXPort237,SKXPort0], 2, [1,1], 2>;
-defm : X86WriteRes<WriteVecMaskedStore64Y, [SKXPort237,SKXPort0], 2, [1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore32Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>;
+defm : X86WriteRes<WriteVecMaskedStore64Y, [SKXPort23,SKXPort0,SKXPort4], 2, [1,1,1], 2>;
defm : X86WriteRes<WriteVecMove, [SKXPort05], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveX, [SKXPort015], 1, [1], 1>;
defm : X86WriteRes<WriteVecMoveY, [SKXPort015], 1, [1], 1>;
diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td
index 09a09185b3bc..b7336161b2e0 100644
--- a/llvm/lib/Target/X86/X86ScheduleAtom.td
+++ b/llvm/lib/Target/X86/X86ScheduleAtom.td
@@ -537,7 +537,7 @@ def AtomWrite0_1_1 : SchedWriteRes<[AtomPort0, AtomPort1]> {
def : InstRW<[AtomWrite0_1_1], (instrs POP32r, POP64r,
POP16rmr, POP32rmr, POP64rmr,
PUSH16r, PUSH32r, PUSH64r,
- PUSHi16, PUSHi32,
+ PUSH16i, PUSH32i,
PUSH16rmr, PUSH32rmr, PUSH64rmr,
PUSH16i8, PUSH32i8, PUSH64i8, PUSH64i32,
XCH_F)>;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td
index d5cce17de12b..0e001638d03d 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver1.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td
@@ -1315,14 +1315,6 @@ def : SchedAlias<WriteDPPD, ZnWriteMicrocoded>;
// x,m,i.
def : SchedAlias<WriteDPPDLd, ZnWriteMicrocoded>;
-//-- Other instructions --//
-
-// VZEROUPPER.
-def : InstRW<[WriteMicrocoded], (instrs VZEROUPPER)>;
-
-// VZEROALL.
-def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>;
-
///////////////////////////////////////////////////////////////////////////////
// Dependency breaking instructions.
///////////////////////////////////////////////////////////////////////////////
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver2.td b/llvm/lib/Target/X86/X86ScheduleZnver2.td
index b6c29eebeb79..117885406a0a 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver2.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver2.td
@@ -1321,9 +1321,6 @@ def : SchedAlias<WriteDPPDLd, Zn2WriteMicrocoded>;
// VZEROUPPER.
def : InstRW<[WriteALU], (instrs VZEROUPPER)>;
-// VZEROALL.
-def : InstRW<[WriteMicrocoded], (instrs VZEROALL)>;
-
///////////////////////////////////////////////////////////////////////////////
// Dependency breaking instructions.
///////////////////////////////////////////////////////////////////////////////
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
new file mode 100644
index 000000000000..269d77374beb
--- /dev/null
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -0,0 +1,1957 @@
+//=- X86ScheduleZnver4.td - X86 Znver4 Scheduling ------------*- tablegen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Znver4 to support instruction
+// scheduling and other instruction cost heuristics.
+// Based on:
+// * AMD Software Optimization Guide for AMD Family 19h Processors.
+// https://www.amd.com/system/files/TechDocs/56665.zip
+//===----------------------------------------------------------------------===//
+
+def Znver4Model : SchedMachineModel {
+ // AMD SOG 19h, 2.9.6 Dispatch
+ // The processor may dispatch up to 6 macro ops per cycle
+ // into the execution engine.
+ let IssueWidth = 6;
+ // AMD SOG 19h, 2.10.3
+ // The retire control unit (RCU) tracks the completion status of all
+ // outstanding operations (integer, load/store, and floating-point) and is
+ // the final arbiter for exception processing and recovery.
+ // The unit can receive up to 6 macro ops dispatched per cycle and track up
+ // to 320 macro ops in-flight in non-SMT mode or 160 per thread in SMT mode.
+ let MicroOpBufferSize = 320;
+ // AMD SOG 19h, 2.9.1 Op Cache
+ // The op cache is organized as an associative cache with 64 sets and 8 ways.
+ // At each set-way intersection is an entry containing up to 8 macro ops.
+ // The maximum capacity of the op cache is 4K ops.
+ // Agner, 22.5 µop cache
+ // The size of the µop cache is big enough for holding most critical loops.
+ // FIXME: PR50584: MachineScheduler/PostRAScheduler have quadradic complexity,
+ // with large values here the compilation of certain loops
+ // ends up taking way too long.
+ // Ideally for znver4, we should have 6.75K. However we don't add that
+ // considerting the impact compile time and prefer using default values
+ // instead.
+ // let LoopMicroOpBufferSize = 6750;
+ // AMD SOG 19h, 2.6.2 L1 Data Cache
+ // The L1 data cache has a 4- or 5- cycle integer load-to-use latency.
+ // AMD SOG 19h, 2.12 L1 Data Cache
+ // The AGU and LS pipelines are optimized for simple address generation modes.
+ // <...> and can achieve 4-cycle load-to-use integer load latency.
+ let LoadLatency = 4;
+ // AMD SOG 19h, 2.12 L1 Data Cache
+ // The AGU and LS pipelines are optimized for simple address generation modes.
+ // <...> and can achieve <...> 7-cycle load-to-use FP load latency.
+ int VecLoadLatency = 7;
+ // Latency of a simple store operation.
+ int StoreLatency = 1;
+ // FIXME:
+ let HighLatency = 25; // FIXME: any better choice?
+ // AMD SOG 19h, 2.8 Optimizing Branching
+ // The branch misprediction penalty is in the range from 11 to 18 cycles,
+ // <...>. The common case penalty is 13 cycles.
+ let MispredictPenalty = 13;
+
+ let PostRAScheduler = 1; // Enable Post RegAlloc Scheduler pass.
+
+ let CompleteModel = 1;
+}
+
+let SchedModel = Znver4Model in {
+
+
+//===----------------------------------------------------------------------===//
+// RCU
+//===----------------------------------------------------------------------===//
+
+// AMD SOG 19h, 2.10.3 Retire Control Unit
+// The unit can receive up to 6 macro ops dispatched per cycle and track up to
+// 320 macro ops in-flight in non-SMT mode or 128 per thread in SMT mode. <...>
+// The retire unit handles in-order commit of up to nine macro ops per cycle.
+def Zn4RCU : RetireControlUnit<Znver4Model.MicroOpBufferSize, 9>;
+
+//===----------------------------------------------------------------------===//
+// Integer Execution Unit
+//
+
+// AMD SOG 19h, 2.4 Superscalar Organization
+// The processor uses four decoupled independent integer scheduler queues,
+// each one servicing one ALU pipeline and one or two other pipelines
+
+//
+// Execution pipes
+//===----------------------------------------------------------------------===//
+
+// AMD SOG 19h, 2.10.2 Execution Units
+// The processor contains 4 general purpose integer execution pipes.
+// Each pipe has an ALU capable of general purpose integer operations.
+def Zn4ALU0 : ProcResource<1>;
+def Zn4ALU1 : ProcResource<1>;
+def Zn4ALU2 : ProcResource<1>;
+def Zn4ALU3 : ProcResource<1>;
+
+// AMD SOG 19h, 2.10.2 Execution Units
+// There is also a separate branch execution unit.
+def Zn4BRU1 : ProcResource<1>;
+
+// AMD SOG 19h, 2.10.2 Execution Units
+// There are three Address Generation Units (AGUs) for all load and store
+// address generation. There are also 3 store data movement units
+// associated with the same schedulers as the AGUs.
+def Zn4AGU0 : ProcResource<1>;
+def Zn4AGU1 : ProcResource<1>;
+def Zn4AGU2 : ProcResource<1>;
+
+//
+// Execution Units
+//===----------------------------------------------------------------------===//
+
+// AMD SOG 19h, 2.10.2 Execution Units
+// ALU0 additionally has divide <...> execution capability.
+defvar Zn4Divider = Zn4ALU0;
+
+// AMD SOG 19h, 2.10.2 Execution Units
+// ALU0 additionally has <...> branch execution capability.
+defvar Zn4BRU0 = Zn4ALU0;
+
+// Integer Multiplication issued on ALU1.
+defvar Zn4Multiplier = Zn4ALU1;
+
+// Execution pipeline grouping
+//===----------------------------------------------------------------------===//
+
+// General ALU operations
+def Zn4ALU0123 : ProcResGroup<[Zn4ALU0, Zn4ALU1, Zn4ALU2, Zn4ALU3]>;
+
+// General AGU operations
+def Zn4AGU012 : ProcResGroup<[Zn4AGU0, Zn4AGU1, Zn4AGU2]>;
+
+// Control flow: jumps, calls
+def Zn4BRU01 : ProcResGroup<[Zn4BRU0, Zn4BRU1]>;
+
+// Everything that isn't control flow, but still needs to access CC register,
+// namely: conditional moves, SETcc.
+def Zn4ALU03 : ProcResGroup<[Zn4ALU0, Zn4ALU3]>;
+
+// Zn4ALU1 handles complex bit twiddling: CRC/PDEP/PEXT
+
+// Simple bit twiddling: bit test, shift/rotate, bit extraction
+def Zn4ALU12 : ProcResGroup<[Zn4ALU1, Zn4ALU2]>;
+
+
+//
+// Scheduling
+//===----------------------------------------------------------------------===//
+
+// AMD SOG 19h, 2.10.3 Retire Control Unit
+// The integer physical register file (PRF) consists of 224 registers.
+def Zn4IntegerPRF : RegisterFile<224, [GR64, CCR], [1, 1], [1, 0],
+ 6, // Max moves that can be eliminated per cycle.
+ 0>; // Restrict move elimination to zero regs.
+
+// anandtech, The integer scheduler has a 4*24 entry macro op capacity.
+// AMD SOG 19h, 2.10.1 Schedulers
+// The schedulers can receive up to six macro ops per cycle, with a limit of
+// two per scheduler. Each scheduler can issue one micro op per cycle into
+// each of its associated pipelines
+def Zn4Int : ProcResGroup<[Zn4ALU0, Zn4AGU0, Zn4BRU0, // scheduler 0
+ Zn4ALU1, Zn4AGU1, // scheduler 1
+ Zn4ALU2, Zn4AGU2, // scheduler 2
+ Zn4ALU3, Zn4BRU1 // scheduler 3
+ ]> {
+ let BufferSize = !mul(4, 24);
+}
+
+
+//===----------------------------------------------------------------------===//
+// Floating-Point Unit
+//
+
+// AMD SOG 19h, 2.4 Superscalar Organization
+// The processor uses <...> two decoupled independent floating point schedulers
+// each servicing two FP pipelines and one store or FP-to-integer pipeline.
+
+//
+// Execution pipes
+//===----------------------------------------------------------------------===//
+
+// AMD SOG 19h, 2.10.1 Schedulers
+// <...>, and six FPU pipes.
+// Agner, 22.10 Floating point execution pipes
+// There are six floating point/vector execution pipes,
+def Zn4FP0 : ProcResource<1>;
+def Zn4FP1 : ProcResource<1>;
+def Zn4FP2 : ProcResource<1>;
+def Zn4FP3 : ProcResource<1>;
+def Zn4FP45 : ProcResource<2>;
+
+//
+// Execution Units
+//===----------------------------------------------------------------------===//
+// AMD SOG 19h, 2.11.1 Floating Point Execution Resources
+
+// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ)
+defvar Zn4FPFMul0 = Zn4FP0;
+defvar Zn4FPFMul1 = Zn4FP1;
+
+// (v)FADD*
+defvar Zn4FPFAdd0 = Zn4FP2;
+defvar Zn4FPFAdd1 = Zn4FP3;
+
+// All convert operations except pack/unpack
+defvar Zn4FPFCvt0 = Zn4FP2;
+defvar Zn4FPFCvt1 = Zn4FP3;
+
+// All Divide and Square Root except Reciprocal Approximation
+// AMD SOG 19h, 2.11.1 Floating Point Execution Resources
+// FDIV unit can support 2 simultaneous operations in flight
+// even though it occupies a single pipe.
+// FIXME: BufferSize=2 ?
+defvar Zn4FPFDiv = Zn4FP1;
+
+// Moves and Logical operations on Floating Point Data Types
+defvar Zn4FPFMisc0 = Zn4FP0;
+defvar Zn4FPFMisc1 = Zn4FP1;
+defvar Zn4FPFMisc2 = Zn4FP2;
+defvar Zn4FPFMisc3 = Zn4FP3;
+
+// Integer Adds, Subtracts, and Compares
+// Some complex VADD operations are not available in all pipes.
+defvar Zn4FPVAdd0 = Zn4FP0;
+defvar Zn4FPVAdd1 = Zn4FP1;
+defvar Zn4FPVAdd2 = Zn4FP2;
+defvar Zn4FPVAdd3 = Zn4FP3;
+
+// Integer Multiplies, SAD, Blendvb
+defvar Zn4FPVMul0 = Zn4FP0;
+defvar Zn4FPVMul1 = Zn4FP3;
+
+// Data Shuffles, Packs, Unpacks, Permute
+// Some complex shuffle operations are only available in pipe1.
+defvar Zn4FPVShuf = Zn4FP1;
+defvar Zn4FPVShufAux = Zn4FP2;
+
+// Bit Shift Left/Right operations
+defvar Zn4FPVShift0 = Zn4FP1;
+defvar Zn4FPVShift1 = Zn4FP2;
+
+// Moves and Logical operations on Packed Integer Data Types
+defvar Zn4FPVMisc0 = Zn4FP0;
+defvar Zn4FPVMisc1 = Zn4FP1;
+defvar Zn4FPVMisc2 = Zn4FP2;
+defvar Zn4FPVMisc3 = Zn4FP3;
+
+// *AES*
+defvar Zn4FPAES0 = Zn4FP0;
+defvar Zn4FPAES1 = Zn4FP1;
+
+// *CLM*
+defvar Zn4FPCLM0 = Zn4FP0;
+defvar Zn4FPCLM1 = Zn4FP1;
+
+// Execution pipeline grouping
+//===----------------------------------------------------------------------===//
+
+// AMD SOG 19h, 2.11 Floating-Point Unit
+// Stores and floating point to general purpose register transfer
+// have 2 dedicated pipelines (pipe 5 and 6).
+def Zn4FPU0123 : ProcResGroup<[Zn4FP0, Zn4FP1, Zn4FP2, Zn4FP3]>;
+
+// (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ)
+def Zn4FPFMul01 : ProcResGroup<[Zn4FPFMul0, Zn4FPFMul1]>;
+
+// (v)FADD*
+// Some complex VADD operations are not available in all pipes.
+def Zn4FPFAdd01 : ProcResGroup<[Zn4FPFAdd0, Zn4FPFAdd1]>;
+
+// All convert operations except pack/unpack
+def Zn4FPFCvt01 : ProcResGroup<[Zn4FPFCvt0, Zn4FPFCvt1]>;
+
+// All Divide and Square Root except Reciprocal Approximation
+// def Zn4FPFDiv : ProcResGroup<[Zn4FPFDiv]>;
+
+// Moves and Logical operations on Floating Point Data Types
+def Zn4FPFMisc0123 : ProcResGroup<[Zn4FPFMisc0, Zn4FPFMisc1, Zn4FPFMisc2, Zn4FPFMisc3]>;
+
+// FIXUP and RANGE use FP01 pipelines
+def Zn4FPFMisc01 : ProcResGroup<[Zn4FPFMisc0, Zn4FPFMisc1]>;
+def Zn4FPFMisc12 : ProcResGroup<[Zn4FPFMisc1, Zn4FPFMisc2]>;
+// SCALE instructions use FP23 pipelines
+def Zn4FPFMisc23 : ProcResGroup<[Zn4FPFMisc2, Zn4FPFMisc3]>;
+def Zn4FPFMisc123 : ProcResGroup<[Zn4FPFMisc1,Zn4FPFMisc2, Zn4FPFMisc3]>;
+
+// Loads, Stores and Move to General Register (EX) Operations
+// AMD SOG 19h, 2.11 Floating-Point Unit
+// Stores and floating point to general purpose register transfer
+// have 2 dedicated pipelines (pipe 5 and 6).
+defvar Zn4FPLd01 = Zn4FP45;
+
+// AMD SOG 19h, 2.11 Floating-Point Unit
+// Note that FP stores are supported on two pipelines,
+// but throughput is limited to one per cycle.
+let Super = Zn4FP45 in
+def Zn4FPSt : ProcResource<1>;
+
+// Integer Adds, Subtracts, and Compares
+// Some complex VADD operations are not available in all pipes.
+def Zn4FPVAdd0123 : ProcResGroup<[Zn4FPVAdd0, Zn4FPVAdd1, Zn4FPVAdd2, Zn4FPVAdd3]>;
+
+def Zn4FPVAdd01: ProcResGroup<[Zn4FPVAdd0, Zn4FPVAdd1]>;
+def Zn4FPVAdd12: ProcResGroup<[Zn4FPVAdd1, Zn4FPVAdd2]>;
+
+// AVX512 Opmask pipelines
+def Zn4FPOpMask01: ProcResGroup<[Zn4FP2, Zn4FP3]>;
+def Zn4FPOpMask4: ProcResGroup<[Zn4FP45]>;
+
+// Integer Multiplies, SAD, Blendvb
+def Zn4FPVMul01 : ProcResGroup<[Zn4FPVMul0, Zn4FPVMul1]>;
+
+// Data Shuffles, Packs, Unpacks, Permute
+// Some complex shuffle operations are only available in pipe1.
+def Zn4FPVShuf01 : ProcResGroup<[Zn4FPVShuf, Zn4FPVShufAux]>;
+
+// Bit Shift Left/Right operations
+def Zn4FPVShift01 : ProcResGroup<[Zn4FPVShift0, Zn4FPVShift1]>;
+
+// Moves and Logical operations on Packed Integer Data Types
+def Zn4FPVMisc0123 : ProcResGroup<[Zn4FPVMisc0, Zn4FPVMisc1, Zn4FPVMisc2, Zn4FPVMisc3]>;
+
+// *AES*
+def Zn4FPAES01 : ProcResGroup<[Zn4FPAES0, Zn4FPAES1]>;
+
+// *CLM*
+def Zn4FPCLM01 : ProcResGroup<[Zn4FPCLM0, Zn4FPCLM1]>;
+
+
+//
+// Scheduling
+//===----------------------------------------------------------------------===//
+
+// Agner, 21.8 Register renaming and out-of-order schedulers
+// The floating point register file has 192 vector registers
+// of 512b each in zen4.
+def Zn4FpPRF : RegisterFile<192, [VR64, VR128, VR256, VR512], [1, 1, 1, 1], [0, 1, 1],
+ 6, // Max moves that can be eliminated per cycle.
+ 0>; // Restrict move elimination to zero regs.
+
+// AMD SOG 19h, 2.11 Floating-Point Unit
+// The floating-point scheduler has a 2*32 entry macro op capacity.
+// AMD SOG 19h, 2.11 Floating-Point Unit
+// <...> the scheduler can issue 1 micro op per cycle for each pipe.
+// FIXME: those are two separate schedulers, not a single big one.
+def Zn4FP : ProcResGroup<[Zn4FP0, Zn4FP2, /*Zn4FP4,*/ // scheduler 0
+ Zn4FP1, Zn4FP3, Zn4FP45 /*Zn4FP5*/ // scheduler 1
+ ]> {
+ let BufferSize = !mul(2, 32);
+}
+
+// AMD SOG 19h, 2.11 Floating-Point Unit
+// Macro ops can be dispatched to the 64 entry Non Scheduling Queue (NSQ)
+// even if floating-point scheduler is full.
+// FIXME: how to model this properly?
+
+
+//===----------------------------------------------------------------------===//
+// Load-Store Unit
+//
+
+// AMD SOG 19h, 2.12 Load-Store Unit
+// The LS unit contains three largely independent pipe-lines
+// enabling the execution of three 256-bit memory operations per cycle.
+def Zn4LSU : ProcResource<3>;
+
+// AMD SOG 19h, 2.12 Load-Store Unit
+// All three memory operations can be loads.
+let Super = Zn4LSU in
+def Zn4Load : ProcResource<3> {
+ // AMD SOG 19h, 2.12 Load-Store Unit
+ // The LS unit can process up to 72 out-of-order loads.
+ let BufferSize = 72;
+}
+
+def Zn4LoadQueue : LoadQueue<Zn4Load>;
+
+// AMD SOG 19h, 2.12 Load-Store Unit
+// A maximum of two of the memory operations can be stores.
+let Super = Zn4LSU in
+def Zn4Store : ProcResource<2> {
+ // AMD SOG 19h, 2.12 Load-Store Unit
+ // The LS unit utilizes a 64-entry store queue (STQ).
+ let BufferSize = 64;
+}
+
+def Zn4StoreQueue : StoreQueue<Zn4Store>;
+
+//===----------------------------------------------------------------------===//
+// Basic helper classes.
+//===----------------------------------------------------------------------===//
+
+// Many SchedWrites are defined in pairs with and without a folded load.
+// Instructions with folded loads are usually micro-fused, so they only appear
+// as two micro-ops when dispatched by the schedulers.
+// This multiclass defines the resource usage for variants with and without
+// folded loads.
+
+multiclass __Zn4WriteRes<SchedWrite SchedRW, list<ProcResourceKind> ExePorts,
+ int Lat = 1, list<int> Res = [], int UOps = 1> {
+ def : WriteRes<SchedRW, ExePorts> {
+ let Latency = Lat;
+ let ResourceCycles = Res;
+ let NumMicroOps = UOps;
+ }
+}
+
+multiclass __Zn4WriteResPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat,
+ list<int> Res, int UOps, int LoadLat, int LoadUOps,
+ ProcResourceKind AGU, int LoadRes> {
+ defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
+
+ defm : __Zn4WriteRes<SchedRW.Folded,
+ !listconcat([AGU, Zn4Load], ExePorts),
+ !add(Lat, LoadLat),
+ !if(!and(!empty(Res), !eq(LoadRes, 1)),
+ [],
+ !listconcat([1, LoadRes],
+ !if(!empty(Res),
+ !listsplat(1, !size(ExePorts)),
+ Res))),
+ !add(UOps, LoadUOps)>;
+}
+
+// For classes without folded loads.
+multiclass Zn4WriteResInt<SchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 1> {
+ defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
+}
+
+multiclass Zn4WriteResXMM<SchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 1> {
+ defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
+}
+
+multiclass Zn4WriteResYMM<SchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 1> {
+ defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
+}
+
+multiclass Zn4WriteResZMM<SchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 1> {
+ defm : __Zn4WriteRes<SchedRW, ExePorts, Lat, Res, UOps>;
+}
+
+// For classes with folded loads.
+multiclass Zn4WriteResIntPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 1,
+ int LoadUOps = 0, int LoadRes = 1> {
+ defm : __Zn4WriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
+ Znver4Model.LoadLatency,
+ LoadUOps, Zn4AGU012, LoadRes>;
+}
+
+multiclass Zn4WriteResXMMPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 1,
+ int LoadUOps = 0, int LoadRes = 1> {
+ defm : __Zn4WriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
+ Znver4Model.VecLoadLatency,
+ LoadUOps, Zn4FPLd01, LoadRes>;
+}
+
+multiclass Zn4WriteResYMMPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 1,
+ int LoadUOps = 0, int LoadRes = 1> {
+ defm : __Zn4WriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
+ Znver4Model.VecLoadLatency,
+ LoadUOps, Zn4FPLd01, LoadRes>;
+}
+
+multiclass Zn4WriteResZMMPair<X86FoldableSchedWrite SchedRW,
+ list<ProcResourceKind> ExePorts, int Lat = 1,
+ list<int> Res = [], int UOps = 2,
+ int LoadUOps = 0, int LoadRes = 1> {
+ defm : __Zn4WriteResPair<SchedRW, ExePorts, Lat, Res, UOps,
+ Znver4Model.VecLoadLatency,
+ LoadUOps, Zn4FPLd01, LoadRes>;
+}
+
+//===----------------------------------------------------------------------===//
+// Here be dragons.
+//===----------------------------------------------------------------------===//
+
+def : ReadAdvance<ReadAfterLd, Znver4Model.LoadLatency>;
+
+def : ReadAdvance<ReadAfterVecLd, Znver4Model.VecLoadLatency>;
+def : ReadAdvance<ReadAfterVecXLd, Znver4Model.VecLoadLatency>;
+def : ReadAdvance<ReadAfterVecYLd, Znver4Model.VecLoadLatency>;
+
+// AMD SOG 19h, 2.11 Floating-Point Unit
+// There is 1 cycle of added latency for a result to cross
+// from F to I or I to F domain.
+def : ReadAdvance<ReadInt2Fpu, -1>;
+
+// Instructions with both a load and a store folded are modeled as a folded
+// load + WriteRMW.
+defm : Zn4WriteResInt<WriteRMW, [Zn4AGU012, Zn4Store], Znver4Model.StoreLatency, [1, 1], 0>;
+
+// Loads, stores, and moves, not folded with other operations.
+defm : Zn4WriteResInt<WriteLoad, [Zn4AGU012, Zn4Load], !add(Znver4Model.LoadLatency, 1), [1, 1], 1>;
+
+// Model the effect of clobbering the read-write mask operand of the GATHER operation.
+// Does not cost anything by itself, only has latency, matching that of the WriteLoad,
+defm : Zn4WriteResInt<WriteVecMaskedGatherWriteback, [], !add(Znver4Model.LoadLatency, 1), [], 0>;
+
+def Zn4WriteMOVSlow : SchedWriteRes<[Zn4AGU012, Zn4Load]> {
+ let Latency = !add(Znver4Model.LoadLatency, 1);
+ let ResourceCycles = [3, 1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteMOVSlow], (instrs MOV8rm, MOV8rm_NOREX, MOV16rm, MOVSX16rm16, MOVSX16rm32, MOVZX16rm16, MOVSX16rm8, MOVZX16rm8)>;
+
+defm : Zn4WriteResInt<WriteStore, [Zn4AGU012, Zn4Store], Znver4Model.StoreLatency, [1, 2], 1>;
+defm : Zn4WriteResInt<WriteStoreNT, [Zn4AGU012, Zn4Store], Znver4Model.StoreLatency, [1, 2], 1>;
+defm : Zn4WriteResInt<WriteMove, [Zn4ALU0123], 1, [4], 1>;
+
+// Treat misc copies as a move.
+def : InstRW<[WriteMove], (instrs COPY)>;
+
+def Zn4WriteMOVBE16rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> {
+ let Latency = Znver4Model.LoadLatency;
+ let ResourceCycles = [1, 1, 4];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteMOVBE16rm], (instrs MOVBE16rm)>;
+
+def Zn4WriteMOVBEmr : SchedWriteRes<[Zn4ALU0123, Zn4AGU012, Zn4Store]> {
+ let Latency = Znver4Model.StoreLatency;
+ let ResourceCycles = [4, 1, 1];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteMOVBEmr], (instrs MOVBE16mr, MOVBE32mr, MOVBE64mr)>;
+
+// Arithmetic.
+defm : Zn4WriteResIntPair<WriteALU, [Zn4ALU0123], 1, [1], 1>; // Simple integer ALU op.
+
+def Zn4WriteALUSlow : SchedWriteRes<[Zn4ALU0123]> {
+ let Latency = 1;
+ let ResourceCycles = [4];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteALUSlow], (instrs ADD8i8, ADD16i16, ADD32i32, ADD64i32,
+ AND8i8, AND16i16, AND32i32, AND64i32,
+ OR8i8, OR16i16, OR32i32, OR64i32,
+ SUB8i8, SUB16i16, SUB32i32, SUB64i32,
+ XOR8i8, XOR16i16, XOR32i32, XOR64i32)>;
+
+def Zn4WriteMoveExtend : SchedWriteRes<[Zn4ALU0123]> {
+ let Latency = 1;
+ let ResourceCycles = [4];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteMoveExtend], (instrs MOVSX16rr16, MOVSX16rr32, MOVZX16rr16, MOVSX16rr8, MOVZX16rr8)>;
+
+def Zn4WriteMaterialize32bitImm: SchedWriteRes<[Zn4ALU0123]> {
+ let Latency = 1;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteMaterialize32bitImm], (instrs MOV32ri, MOV32ri_alt, MOV64ri32)>;
+
+def Zn4WritePDEP_PEXT : SchedWriteRes<[Zn4ALU1]> {
+ let Latency = 3;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WritePDEP_PEXT], (instrs PDEP32rr, PDEP64rr,
+ PEXT32rr, PEXT64rr)>;
+
+defm : Zn4WriteResIntPair<WriteADC, [Zn4ALU0123], 1, [4], 1>; // Integer ALU + flags op.
+
+def Zn4WriteADC8mr_SBB8mr : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123, Zn4Store]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 1, 7, 1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteADC8mr_SBB8mr], (instrs ADC8mr, SBB8mr)>;
+
+// This is for simple LEAs with one or two input operands.
+defm : Zn4WriteResInt<WriteLEA, [Zn4AGU012], 1, [1], 1>; // LEA instructions can't fold loads.
+
+// This write is used for slow LEA instructions.
+def Zn4Write3OpsLEA : SchedWriteRes<[Zn4ALU0123]> {
+ let Latency = 2;
+ let ResourceCycles = [1];
+ let NumMicroOps = 2;
+}
+
+// On Znver4, a slow LEA is either a 3Ops LEA (base, index, offset),
+// or an LEA with a `Scale` value different than 1.
+def Zn4SlowLEAPredicate : MCSchedPredicate<
+ CheckAny<[
+ // A 3-operand LEA (base, index, offset).
+ IsThreeOperandsLEAFn,
+ // An LEA with a "Scale" different than 1.
+ CheckAll<[
+ CheckIsImmOperand<2>,
+ CheckNot<CheckImmOperand<2, 1>>
+ ]>
+ ]>
+>;
+
+def Zn4WriteLEA : SchedWriteVariant<[
+ SchedVar<Zn4SlowLEAPredicate, [Zn4Write3OpsLEA]>,
+ SchedVar<NoSchedPred, [WriteLEA]>
+]>;
+
+def : InstRW<[Zn4WriteLEA], (instrs LEA32r, LEA64r, LEA64_32r)>;
+
+def Zn4SlowLEA16r : SchedWriteRes<[Zn4ALU0123]> {
+ let Latency = 2; // FIXME: not from llvm-exegesis
+ let ResourceCycles = [4];
+ let NumMicroOps = 2;
+}
+
+def : InstRW<[Zn4SlowLEA16r], (instrs LEA16r)>;
+
+// Integer multiplication
+defm : Zn4WriteResIntPair<WriteIMul8, [Zn4Multiplier], 3, [3], 1>; // Integer 8-bit multiplication.
+defm : Zn4WriteResIntPair<WriteIMul16, [Zn4Multiplier], 3, [3], 3, /*LoadUOps=*/1>; // Integer 16-bit multiplication.
+defm : Zn4WriteResIntPair<WriteIMul16Imm, [Zn4Multiplier], 4, [4], 2>; // Integer 16-bit multiplication by immediate.
+defm : Zn4WriteResIntPair<WriteIMul16Reg, [Zn4Multiplier], 3, [1], 1>; // Integer 16-bit multiplication by register.
+defm : Zn4WriteResIntPair<WriteIMul32, [Zn4Multiplier], 3, [3], 2>; // Integer 32-bit multiplication.
+defm : Zn4WriteResIntPair<WriteMULX32, [Zn4Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
+defm : Zn4WriteResIntPair<WriteIMul32Imm, [Zn4Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by immediate.
+defm : Zn4WriteResIntPair<WriteIMul32Reg, [Zn4Multiplier], 3, [1], 1>; // Integer 32-bit multiplication by register.
+defm : Zn4WriteResIntPair<WriteIMul64, [Zn4Multiplier], 3, [3], 2>; // Integer 64-bit multiplication.
+defm : Zn4WriteResIntPair<WriteMULX64, [Zn4Multiplier], 3, [1], 2>; // Integer 32-bit Unsigned Multiply Without Affecting Flags.
+defm : Zn4WriteResIntPair<WriteIMul64Imm, [Zn4Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by immediate.
+defm : Zn4WriteResIntPair<WriteIMul64Reg, [Zn4Multiplier], 3, [1], 1>; // Integer 64-bit multiplication by register.
+defm : Zn4WriteResInt<WriteIMulHLd, [], !add(4, Znver4Model.LoadLatency), [], 0>; // Integer multiplication, high part.
+defm : Zn4WriteResInt<WriteIMulH, [], 4, [], 0>; // Integer multiplication, high part.
+
+defm : Zn4WriteResInt<WriteBSWAP32, [Zn4ALU0123], 1, [1], 1>; // Byte Order (Endianness) 32-bit Swap.
+defm : Zn4WriteResInt<WriteBSWAP64, [Zn4ALU0123], 1, [1], 1>; // Byte Order (Endianness) 64-bit Swap.
+
+defm : Zn4WriteResIntPair<WriteCMPXCHG, [Zn4ALU0123], 3, [12], 5>; // Compare and set, compare and swap.
+
+def Zn4WriteCMPXCHG8rr : SchedWriteRes<[Zn4ALU0123]> {
+ let Latency = 3;
+ let ResourceCycles = [12];
+ let NumMicroOps = 3;
+}
+def : InstRW<[Zn4WriteCMPXCHG8rr], (instrs CMPXCHG8rr)>;
+
+defm : Zn4WriteResInt<WriteCMPXCHGRMW, [Zn4ALU0123], 3, [12], 6>; // Compare and set, compare and swap.
+
+def Zn4WriteCMPXCHG8rm_LCMPXCHG8 : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteCMPXCHG8rr.Latency);
+ let ResourceCycles = [1, 1, 12];
+ let NumMicroOps = !add(Zn4WriteCMPXCHG8rr.NumMicroOps, 2);
+}
+def : InstRW<[Zn4WriteCMPXCHG8rm_LCMPXCHG8], (instrs CMPXCHG8rm, LCMPXCHG8)>;
+
+def Zn4WriteCMPXCHG8B : SchedWriteRes<[Zn4ALU0123]> {
+ let Latency = 3; // FIXME: not from llvm-exegesis
+ let ResourceCycles = [24];
+ let NumMicroOps = 19;
+}
+def : InstRW<[Zn4WriteCMPXCHG8B], (instrs CMPXCHG8B)>;
+
+def Zn4WriteCMPXCHG16B_LCMPXCHG16B : SchedWriteRes<[Zn4ALU0123]> {
+ let Latency = 4; // FIXME: not from llvm-exegesis
+ let ResourceCycles = [59];
+ let NumMicroOps = 28;
+}
+def : InstRW<[Zn4WriteCMPXCHG16B_LCMPXCHG16B], (instrs CMPXCHG16B, LCMPXCHG16B)>;
+
+def Zn4WriteWriteXCHGUnrenameable : SchedWriteRes<[Zn4ALU0123]> {
+ let Latency = 1;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteWriteXCHGUnrenameable], (instrs XCHG8rr, XCHG16rr, XCHG16ar)>;
+
+def Zn4WriteXCHG8rm_XCHG16rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> {
+ let Latency = !add(Znver4Model.LoadLatency, 3); // FIXME: not from llvm-exegesis
+ let ResourceCycles = [1, 1, 2];
+ let NumMicroOps = 5;
+}
+def : InstRW<[Zn4WriteXCHG8rm_XCHG16rm], (instrs XCHG8rm, XCHG16rm)>;
+
+def Zn4WriteXCHG32rm_XCHG64rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU0123]> {
+ let Latency = !add(Znver4Model.LoadLatency, 2); // FIXME: not from llvm-exegesis
+ let ResourceCycles = [1, 1, 2];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteXCHG32rm_XCHG64rm], (instrs XCHG32rm, XCHG64rm)>;
+
+// Integer division.
+// FIXME: uops for 8-bit division measures as 2. for others it's a guess.
+// FIXME: latency for 8-bit division measures as 10. for others it's a guess.
+defm : Zn4WriteResIntPair<WriteDiv8, [Zn4Divider], 10, [10], 2>;
+defm : Zn4WriteResIntPair<WriteDiv16, [Zn4Divider], 11, [11], 2>;
+defm : Zn4WriteResIntPair<WriteDiv32, [Zn4Divider], 13, [13], 2>;
+defm : Zn4WriteResIntPair<WriteDiv64, [Zn4Divider], 17, [17], 2>;
+defm : Zn4WriteResIntPair<WriteIDiv8, [Zn4Divider], 10, [10], 2>;
+defm : Zn4WriteResIntPair<WriteIDiv16, [Zn4Divider], 11, [11], 2>;
+defm : Zn4WriteResIntPair<WriteIDiv32, [Zn4Divider], 13, [13], 2>;
+defm : Zn4WriteResIntPair<WriteIDiv64, [Zn4Divider], 17, [17], 2>;
+
+defm : Zn4WriteResIntPair<WriteBSF, [Zn4ALU1], 1, [1], 6, /*LoadUOps=*/1>; // Bit scan forward.
+defm : Zn4WriteResIntPair<WriteBSR, [Zn4ALU1], 1, [1], 6, /*LoadUOps=*/1>; // Bit scan reverse.
+
+defm : Zn4WriteResIntPair<WritePOPCNT, [Zn4ALU0123], 1, [1], 1>; // Bit population count.
+
+def Zn4WritePOPCNT16rr : SchedWriteRes<[Zn4ALU0123]> {
+ let Latency = 1;
+ let ResourceCycles = [4];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WritePOPCNT16rr], (instrs POPCNT16rr)>;
+
+defm : Zn4WriteResIntPair<WriteLZCNT, [Zn4ALU0123], 1, [1], 1>; // Leading zero count.
+
+def Zn4WriteLZCNT16rr : SchedWriteRes<[Zn4ALU0123]> {
+ let Latency = 1;
+ let ResourceCycles = [4];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteLZCNT16rr], (instrs LZCNT16rr)>;
+
+defm : Zn4WriteResIntPair<WriteTZCNT, [Zn4ALU12], 2, [1], 2>; // Trailing zero count.
+
+def Zn4WriteTZCNT16rr : SchedWriteRes<[Zn4ALU0123]> {
+ let Latency = 2;
+ let ResourceCycles = [4];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteTZCNT16rr], (instrs TZCNT16rr)>;
+
+defm : Zn4WriteResIntPair<WriteCMOV, [Zn4ALU03], 1, [1], 1>; // Conditional move.
+defm : Zn4WriteResInt<WriteFCMOV, [Zn4ALU0123], 7, [28], 7>; // FIXME: not from llvm-exegesis // X87 conditional move.
+defm : Zn4WriteResInt<WriteSETCC, [Zn4ALU03], 1, [2], 1>; // Set register based on condition code.
+defm : Zn4WriteResInt<WriteSETCCStore, [Zn4ALU03, Zn4AGU012, Zn4Store], 2, [2, 1, 1], 2>; // FIXME: latency not from llvm-exegesis
+defm : Zn4WriteResInt<WriteLAHFSAHF, [Zn4ALU3], 1, [1], 1>; // Load/Store flags in AH.
+
+defm : Zn4WriteResInt<WriteBitTest, [Zn4ALU12], 1, [1], 1>; // Bit Test
+defm : Zn4WriteResInt<WriteBitTestImmLd, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 1), [1, 1, 1], 2>;
+defm : Zn4WriteResInt<WriteBitTestRegLd, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 1), [1, 1, 1], 7>;
+
+defm : Zn4WriteResInt<WriteBitTestSet, [Zn4ALU12], 2, [2], 2>; // Bit Test + Set
+defm : Zn4WriteResInt<WriteBitTestSetImmLd, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 2), [1, 1, 1], 4>;
+defm : Zn4WriteResInt<WriteBitTestSetRegLd, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 2), [1, 1, 1], 9>;
+
+// Integer shifts and rotates.
+defm : Zn4WriteResIntPair<WriteShift, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
+defm : Zn4WriteResIntPair<WriteShiftCL, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
+defm : Zn4WriteResIntPair<WriteRotate, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
+
+def Zn4WriteRotateR1 : SchedWriteRes<[Zn4ALU12]> {
+ let Latency = 1;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteRotateR1], (instrs RCL8r1, RCL16r1, RCL32r1, RCL64r1,
+ RCR8r1, RCR16r1, RCR32r1, RCR64r1)>;
+
+def Zn4WriteRotateM1 : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateR1.Latency);
+ let ResourceCycles = [1, 1, 2];
+ let NumMicroOps = !add(Zn4WriteRotateR1.NumMicroOps, 1);
+}
+def : InstRW<[Zn4WriteRotateM1], (instrs RCL8m1, RCL16m1, RCL32m1, RCL64m1,
+ RCR8m1, RCR16m1, RCR32m1, RCR64m1)>;
+
+def Zn4WriteRotateRightRI : SchedWriteRes<[Zn4ALU12]> {
+ let Latency = 3;
+ let ResourceCycles = [6];
+ let NumMicroOps = 7;
+}
+def : InstRW<[Zn4WriteRotateRightRI], (instrs RCR8ri, RCR16ri, RCR32ri, RCR64ri)>;
+
+def Zn4WriteRotateRightMI : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateRightRI.Latency);
+ let ResourceCycles = [1, 1, 8];
+ let NumMicroOps = !add(Zn4WriteRotateRightRI.NumMicroOps, 3);
+}
+def : InstRW<[Zn4WriteRotateRightMI], (instrs RCR8mi, RCR16mi, RCR32mi, RCR64mi)>;
+
+def Zn4WriteRotateLeftRI : SchedWriteRes<[Zn4ALU12]> {
+ let Latency = 4;
+ let ResourceCycles = [8];
+ let NumMicroOps = 9;
+}
+def : InstRW<[Zn4WriteRotateLeftRI], (instrs RCL8ri, RCL16ri, RCL32ri, RCL64ri)>;
+
+def Zn4WriteRotateLeftMI : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateLeftRI.Latency);
+ let ResourceCycles = [1, 1, 8];
+ let NumMicroOps = !add(Zn4WriteRotateLeftRI.NumMicroOps, 2);
+}
+def : InstRW<[Zn4WriteRotateLeftMI], (instrs RCL8mi, RCL16mi, RCL32mi, RCL64mi)>;
+
+defm : Zn4WriteResIntPair<WriteRotateCL, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
+
+def Zn4WriteRotateRightRCL : SchedWriteRes<[Zn4ALU12]> {
+ let Latency = 3;
+ let ResourceCycles = [6];
+ let NumMicroOps = 7;
+}
+def : InstRW<[Zn4WriteRotateRightRCL], (instrs RCR8rCL, RCR16rCL, RCR32rCL, RCR64rCL)>;
+
+def Zn4WriteRotateRightMCL : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateRightRCL.Latency);
+ let ResourceCycles = [1, 1, 8];
+ let NumMicroOps = !add(Zn4WriteRotateRightRCL.NumMicroOps, 2);
+}
+def : InstRW<[Zn4WriteRotateRightMCL], (instrs RCR8mCL, RCR16mCL, RCR32mCL, RCR64mCL)>;
+
+def Zn4WriteRotateLeftRCL : SchedWriteRes<[Zn4ALU12]> {
+ let Latency = 4;
+ let ResourceCycles = [8];
+ let NumMicroOps = 9;
+}
+def : InstRW<[Zn4WriteRotateLeftRCL], (instrs RCL8rCL, RCL16rCL, RCL32rCL, RCL64rCL)>;
+
+def Zn4WriteRotateLeftMCL : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4ALU12]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteRotateLeftRCL.Latency);
+ let ResourceCycles = [1, 1, 8];
+ let NumMicroOps = !add(Zn4WriteRotateLeftRCL.NumMicroOps, 2);
+}
+def : InstRW<[Zn4WriteRotateLeftMCL], (instrs RCL8mCL, RCL16mCL, RCL32mCL, RCL64mCL)>;
+
+// Double shift instructions.
+defm : Zn4WriteResInt<WriteSHDrri, [Zn4ALU12], 2, [3], 4>;
+defm : Zn4WriteResInt<WriteSHDrrcl, [Zn4ALU12], 2, [3], 5>;
+defm : Zn4WriteResInt<WriteSHDmri, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 2), [1, 1, 4], 6>;
+defm : Zn4WriteResInt<WriteSHDmrcl, [Zn4AGU012, Zn4Load, Zn4ALU12], !add(Znver4Model.LoadLatency, 2), [1, 1, 4], 6>;
+
+// BMI1 BEXTR/BLS, BMI2 BZHI
+defm : Zn4WriteResIntPair<WriteBEXTR, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
+defm : Zn4WriteResIntPair<WriteBLS, [Zn4ALU0123], 1, [1], 1, /*LoadUOps=*/1>;
+defm : Zn4WriteResIntPair<WriteBZHI, [Zn4ALU12], 1, [1], 1, /*LoadUOps=*/1>;
+
+// Idioms that clear a register, like xorps %xmm0, %xmm0.
+// These can often bypass execution ports completely.
+defm : Zn4WriteResInt<WriteZero, [Zn4ALU0123], 0, [0], 1>;
+
+// Branches don't produce values, so they have no latency, but they still
+// consume resources. Indirect branches can fold loads.
+defm : Zn4WriteResIntPair<WriteJump, [Zn4BRU01], 1, [1], 1>; // FIXME: not from llvm-exegesis
+
+// Floating point. This covers both scalar and vector operations.
+defm : Zn4WriteResInt<WriteFLD0, [Zn4FPLd01, Zn4Load, Zn4FP1], !add(Znver4Model.LoadLatency, 4), [1, 1, 1], 1>;
+defm : Zn4WriteResInt<WriteFLD1, [Zn4FPLd01, Zn4Load, Zn4FP1], !add(Znver4Model.LoadLatency, 7), [1, 1, 1], 1>;
+defm : Zn4WriteResInt<WriteFLDC, [Zn4FPLd01, Zn4Load, Zn4FP1], !add(Znver4Model.LoadLatency, 7), [1, 1, 1], 1>;
+defm : Zn4WriteResXMM<WriteFLoad, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
+defm : Zn4WriteResXMM<WriteFLoadX, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
+defm : Zn4WriteResYMM<WriteFLoadY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
+defm : Zn4WriteResXMM<WriteFMaskedLoad, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
+defm : Zn4WriteResYMM<WriteFMaskedLoadY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
+defm : Zn4WriteResXMM<WriteFStore, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
+
+def Zn4WriteWriteFStoreMMX : SchedWriteRes<[Zn4FPSt, Zn4Store]> {
+ let Latency = 2; // FIXME: not from llvm-exegesis
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteWriteFStoreMMX], (instrs MOVHPDmr, MOVHPSmr,
+ VMOVHPDmr, VMOVHPSmr)>;
+
+defm : Zn4WriteResXMM<WriteFStoreX, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
+defm : Zn4WriteResYMM<WriteFStoreY, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
+defm : Zn4WriteResXMM<WriteFStoreNT, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
+defm : Zn4WriteResXMM<WriteFStoreNTX, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
+defm : Zn4WriteResYMM<WriteFStoreNTY, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
+
+defm : Zn4WriteResXMM<WriteFMaskedStore32, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [6, 1], 18>;
+defm : Zn4WriteResXMM<WriteFMaskedStore64, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [4, 1], 10>;
+defm : Zn4WriteResYMM<WriteFMaskedStore32Y, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [12, 1], 42>;
+defm : Zn4WriteResYMM<WriteFMaskedStore64Y, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [6, 1], 18>;
+
+defm : Zn4WriteResXMMPair<WriteFAdd, [Zn4FPFAdd01], 3, [1], 1>; // Floating point add/sub.
+
+def Zn4WriteX87Arith : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
+ let Latency = !add(Znver4Model.LoadLatency, 1); // FIXME: not from llvm-exegesis
+ let ResourceCycles = [1, 1, 24];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteX87Arith], (instrs ADD_FI16m, ADD_FI32m,
+ SUB_FI16m, SUB_FI32m,
+ SUBR_FI16m, SUBR_FI32m,
+ MUL_FI16m, MUL_FI32m)>;
+
+def Zn4WriteX87Div : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
+ let Latency = !add(Znver4Model.LoadLatency, 1); // FIXME: not from llvm-exegesis
+ let ResourceCycles = [1, 1, 62];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteX87Div], (instrs DIV_FI16m, DIV_FI32m,
+ DIVR_FI16m, DIVR_FI32m)>;
+
+defm : Zn4WriteResXMMPair<WriteFAddX, [Zn4FPFAdd01], 3, [1], 1>; // Floating point add/sub (XMM).
+defm : Zn4WriteResYMMPair<WriteFAddY, [Zn4FPFAdd01], 3, [1], 1>; // Floating point add/sub (YMM).
+defm : Zn4WriteResZMMPair<WriteFAddZ, [Zn4FPFAdd01], 3, [2], 1>; // Floating point add/sub (ZMM).
+defm : Zn4WriteResXMMPair<WriteFAdd64, [Zn4FPFAdd01], 3, [1], 1>; // Floating point double add/sub.
+defm : Zn4WriteResXMMPair<WriteFAdd64X, [Zn4FPFAdd01], 3, [1], 1>; // Floating point double add/sub (XMM).
+defm : Zn4WriteResYMMPair<WriteFAdd64Y, [Zn4FPFAdd01], 3, [1], 1>; // Floating point double add/sub (YMM).
+defm : Zn4WriteResZMMPair<WriteFAdd64Z, [Zn4FPFAdd01], 3, [2], 1>; // Floating point double add/sub (ZMM).
+defm : Zn4WriteResXMMPair<WriteFCmp, [Zn4FPFMul01], 2, [2], 1>; // Floating point compare.
+defm : Zn4WriteResXMMPair<WriteFCmpX, [Zn4FPFMul01], 2, [1], 1>; // Floating point compare (XMM).
+defm : Zn4WriteResYMMPair<WriteFCmpY, [Zn4FPFMul01], 2, [1], 1>; // Floating point compare (YMM).
+defm : Zn4WriteResZMMPair<WriteFCmpZ, [Zn4FPFMul01], 2, [2], 1>; // Floating point compare (ZMM).
+defm : Zn4WriteResXMMPair<WriteFCmp64, [Zn4FPFMul01], 1, [1], 1>; // Floating point double compare.
+defm : Zn4WriteResXMMPair<WriteFCmp64X, [Zn4FPFMul01], 2, [1], 1>; // Floating point double compare (XMM).
+defm : Zn4WriteResYMMPair<WriteFCmp64Y, [Zn4FPFMul01], 2, [1], 1>; // Floating point double compare (YMM).
+defm : Zn4WriteResZMMPair<WriteFCmp64Z, [Zn4FPFMul01], 2, [2], 1>; // Floating point double compare (ZMM).
+defm : Zn4WriteResXMMPair<WriteFCom, [Zn4FPFMul01], 3, [2], 1>; // FIXME: latency not from llvm-exegesis // Floating point compare to flags (X87).
+defm : Zn4WriteResXMMPair<WriteFComX, [Zn4FPFMul01], 4, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point compare to flags (SSE).
+defm : Zn4WriteResXMMPair<WriteFMul, [Zn4FPFMul01], 3, [1], 1>; // Floating point multiplication.
+defm : Zn4WriteResXMMPair<WriteFMulX, [Zn4FPFMul01], 3, [1], 1>; // Floating point multiplication (XMM).
+defm : Zn4WriteResYMMPair<WriteFMulY, [Zn4FPFMul01], 3, [1], 1>; // Floating point multiplication (YMM).
+defm : Zn4WriteResZMMPair<WriteFMulZ, [Zn4FPFMul01], 3, [2], 1>; // Floating point multiplication (ZMM).
+defm : Zn4WriteResXMMPair<WriteFMul64, [Zn4FPFMul01], 3, [1], 1>; // Floating point double multiplication.
+defm : Zn4WriteResXMMPair<WriteFMul64X, [Zn4FPFMul01], 3, [1], 1>; // Floating point double multiplication (XMM).
+defm : Zn4WriteResYMMPair<WriteFMul64Y, [Zn4FPFMul01], 3, [1], 1>; // Floating point double multiplication (YMM).
+defm : Zn4WriteResZMMPair<WriteFMul64Z, [Zn4FPFMul01], 3, [2], 1>; // Floating point double multiplication (ZMM).
+defm : Zn4WriteResXMMPair<WriteFDiv, [Zn4FPFDiv], 11, [3], 1>; // Floating point division.
+defm : Zn4WriteResXMMPair<WriteFDivX, [Zn4FPFDiv], 11, [3], 1>; // Floating point division (XMM).
+defm : Zn4WriteResYMMPair<WriteFDivY, [Zn4FPFDiv], 11, [3], 1>; // Floating point division (YMM).
+defm : Zn4WriteResZMMPair<WriteFDivZ, [Zn4FPFDiv], 11, [6], 1>; // Floating point division (ZMM).
+defm : Zn4WriteResXMMPair<WriteFDiv64, [Zn4FPFDiv], 13, [5], 1>; // Floating point double division.
+defm : Zn4WriteResXMMPair<WriteFDiv64X, [Zn4FPFDiv], 13, [5], 1>; // Floating point double division (XMM).
+defm : Zn4WriteResYMMPair<WriteFDiv64Y, [Zn4FPFDiv], 13, [5], 1>; // Floating point double division (YMM).
+defm : Zn4WriteResZMMPair<WriteFDiv64Z, [Zn4FPFDiv], 13, [10], 1>; // Floating point double division (ZMM).
+defm : Zn4WriteResXMMPair<WriteFSqrt, [Zn4FPFDiv], 15, [5], 1>; // Floating point square root.
+defm : Zn4WriteResXMMPair<WriteFSqrtX, [Zn4FPFDiv], 15, [5], 1>; // Floating point square root (XMM).
+defm : Zn4WriteResYMMPair<WriteFSqrtY, [Zn4FPFDiv], 15, [5], 1>; // Floating point square root (YMM).
+defm : Zn4WriteResZMMPair<WriteFSqrtZ, [Zn4FPFDiv], 15, [10], 1>; // Floating point square root (ZMM).
+defm : Zn4WriteResXMMPair<WriteFSqrt64, [Zn4FPFDiv], 21, [9], 1>; // Floating point double square root.
+defm : Zn4WriteResXMMPair<WriteFSqrt64X, [Zn4FPFDiv], 21, [9], 1>; // Floating point double square root (XMM).
+defm : Zn4WriteResYMMPair<WriteFSqrt64Y, [Zn4FPFDiv], 21, [9], 1>; // Floating point double square root (YMM).
+defm : Zn4WriteResZMMPair<WriteFSqrt64Z, [Zn4FPFDiv], 21, [18], 1>; // Floating point double square root (ZMM).
+defm : Zn4WriteResXMMPair<WriteFSqrt80, [Zn4FPFDiv], 22, [23], 1>; // FIXME: latency not from llvm-exegesis // Floating point long double square root.
+defm : Zn4WriteResXMMPair<WriteFRcp, [Zn4FPFMul01], 4, [1], 1>; // Floating point reciprocal estimate.
+defm : Zn4WriteResXMMPair<WriteFRcpX, [Zn4FPFMul01], 4, [1], 1>; // Floating point reciprocal estimate (XMM).
+defm : Zn4WriteResYMMPair<WriteFRcpY, [Zn4FPFMul01], 5, [1], 1>; // Floating point reciprocal estimate (YMM).
+defm : Zn4WriteResZMMPair<WriteFRcpZ, [Zn4FPFMul01], 5, [2], 1>; // Floating point reciprocal estimate (ZMM).
+defm : Zn4WriteResXMMPair<WriteFRsqrt, [Zn4FPFDiv], 4, [1], 1>; // Floating point reciprocal square root estimate.
+defm : Zn4WriteResXMMPair<WriteFRsqrtX, [Zn4FPFDiv], 4, [1], 1>; // Floating point reciprocal square root estimate (XMM).
+defm : Zn4WriteResYMMPair<WriteFRsqrtY, [Zn4FPFDiv], 4, [1], 1>; // Floating point reciprocal square root estimate (YMM).
+defm : Zn4WriteResZMMPair<WriteFRsqrtZ, [Zn4FPFDiv], 5, [2], 1>; // Floating point reciprocal square root estimate (ZMM).
+defm : Zn4WriteResXMMPair<WriteFMA, [Zn4FPFMul01], 4, [2], 1>; // Fused Multiply Add.
+defm : Zn4WriteResXMMPair<WriteFMAX, [Zn4FPFMul01], 4, [1], 1>; // Fused Multiply Add (XMM).
+defm : Zn4WriteResYMMPair<WriteFMAY, [Zn4FPFMul01], 4, [1], 1>; // Fused Multiply Add (YMM).
+defm : Zn4WriteResZMMPair<WriteFMAZ, [Zn4FPFMul01], 4, [2], 1>; // Fused Multiply Add (ZMM).
+defm : Zn4WriteResXMMPair<WriteDPPD, [Zn4FPFMul01], 7, [6], 3, /*LoadUOps=*/2>; // Floating point double dot product.
+defm : Zn4WriteResXMMPair<WriteDPPS, [Zn4FPFMul01], 11, [8], 8, /*LoadUOps=*/2>; // Floating point single dot product.
+defm : Zn4WriteResYMMPair<WriteDPPSY, [Zn4FPFMul01], 11, [8], 7, /*LoadUOps=*/1>; // Floating point single dot product (YMM).
+defm : Zn4WriteResXMMPair<WriteFSign, [Zn4FPFMul01], 1, [2], 1>; // FIXME: latency not from llvm-exegesis // Floating point fabs/fchs.
+defm : Zn4WriteResXMMPair<WriteFRnd, [Zn4FPFCvt01], 3, [1], 1>; // Floating point rounding.
+defm : Zn4WriteResYMMPair<WriteFRndY, [Zn4FPFCvt01], 3, [1], 1>; // Floating point rounding (YMM).
+defm : Zn4WriteResZMMPair<WriteFRndZ, [Zn4FPFCvt01], 3, [2], 1>; // Floating point rounding (ZMM).
+
+defm : Zn4WriteResXMMPair<WriteFLogic, [Zn4FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor logicals.
+defm : Zn4WriteResYMMPair<WriteFLogicY, [Zn4FPVMisc0123], 1, [1], 1>; // Floating point and/or/xor logicals (YMM).
+defm : Zn4WriteResZMMPair<WriteFLogicZ, [Zn4FPVMisc0123], 1, [2], 1>; // Floating point and/or/xor logicals (ZMM).
+defm : Zn4WriteResXMMPair<WriteFTest, [Zn4FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions.
+defm : Zn4WriteResYMMPair<WriteFTestY, [Zn4FPFMisc12], 1, [2], 2>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions (YMM).
+defm : Zn4WriteResZMMPair<WriteFTestZ, [Zn4FPFMisc12], 1, [4], 1>; // FIXME: latency not from llvm-exegesis // Floating point TEST instructions (ZMM).
+defm : Zn4WriteResXMMPair<WriteFShuffle, [Zn4FPVShuf01], 1, [1], 1>; // Floating point vector shuffles.
+defm : Zn4WriteResYMMPair<WriteFShuffleY, [Zn4FPVShuf01], 1, [1], 1>; // Floating point vector shuffles (YMM).
+defm : Zn4WriteResZMMPair<WriteFShuffleZ, [Zn4FPVShuf01], 1, [2], 1>; // Floating point vector shuffles (ZMM).
+defm : Zn4WriteResXMMPair<WriteFVarShuffle, [Zn4FPVShuf01], 3, [1], 1>; // Floating point vector variable shuffles.
+defm : Zn4WriteResYMMPair<WriteFVarShuffleY, [Zn4FPVShuf01], 3, [1], 1>; // Floating point vector variable shuffles (YMM).
+defm : Zn4WriteResZMMPair<WriteFVarShuffleZ, [Zn4FPVShuf01], 3, [2], 1>; // Floating point vector variable shuffles (ZMM).
+defm : Zn4WriteResXMMPair<WriteFBlend, [Zn4FPFMul01], 1, [1], 1>; // Floating point vector blends.
+defm : Zn4WriteResYMMPair<WriteFBlendY, [Zn4FPFMul01], 1, [1], 1>; // Floating point vector blends (YMM).
+defm : Zn4WriteResZMMPair<WriteFBlendZ, [Zn4FPFMul01], 1, [2], 1>; // Floating point vector blends (ZMM).
+defm : Zn4WriteResXMMPair<WriteFVarBlend, [Zn4FPFMul01], 1, [1], 1>; // Fp vector variable blends.
+defm : Zn4WriteResYMMPair<WriteFVarBlendY, [Zn4FPFMul01], 1, [1], 1>; // Fp vector variable blends (YMM).
+defm : Zn4WriteResZMMPair<WriteFVarBlendZ, [Zn4FPFMul01], 1, [2], 1>; // Fp vector variable blends (ZMM).
+
+// Horizontal Add/Sub (float and integer)
+defm : Zn4WriteResXMMPair<WriteFHAdd, [Zn4FPFAdd0], 4, [2], 3>;
+defm : Zn4WriteResYMMPair<WriteFHAddY, [Zn4FPFAdd0], 4, [2], 3, /*LoadUOps=*/1>;
+defm : Zn4WriteResZMMPair<WriteFHAddZ, [Zn4FPFAdd0], 6, [4], 3, /*LoadUOps=*/1>;
+defm : Zn4WriteResXMMPair<WritePHAdd, [Zn4FPVAdd0], 2, [2], 3, /*LoadUOps=*/1>;
+defm : Zn4WriteResXMMPair<WritePHAddX, [Zn4FPVAdd0], 2, [2], 3>;
+defm : Zn4WriteResYMMPair<WritePHAddY, [Zn4FPVAdd0], 3, [3], 3, /*LoadUOps=*/1>;
+defm : Zn4WriteResZMMPair<WritePHAddZ, [Zn4FPVAdd0], 2, [4], 3, /*LoadUOps=*/1>;
+
+// Vector integer operations.
+defm : Zn4WriteResXMM<WriteVecLoad, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
+defm : Zn4WriteResXMM<WriteVecLoadX, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
+defm : Zn4WriteResYMM<WriteVecLoadY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
+defm : Zn4WriteResXMM<WriteVecLoadNT, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
+defm : Zn4WriteResYMM<WriteVecLoadNTY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
+defm : Zn4WriteResXMM<WriteVecMaskedLoad, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
+defm : Zn4WriteResYMM<WriteVecMaskedLoadY, [Zn4FPLd01, Zn4Load], !add(Znver4Model.VecLoadLatency, 1), [1, 1], 1>;
+defm : Zn4WriteResXMM<WriteVecStore, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
+defm : Zn4WriteResXMM<WriteVecStoreX, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
+
+def Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr : SchedWriteRes<[Zn4FPFMisc0]> {
+ let Latency = 4;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr], (instrs VEXTRACTF128rr, VEXTRACTI128rr)>;
+
+def Zn4WriteVEXTRACTI128mr : SchedWriteRes<[Zn4FPFMisc0, Zn4FPSt, Zn4Store]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
+ let ResourceCycles = [1, 1, 1];
+ let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 1);
+}
+def : InstRW<[Zn4WriteVEXTRACTI128mr], (instrs VEXTRACTI128mr, VEXTRACTF128mr)>;
+
+def Zn4WriteVINSERTF128rmr : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPFMisc0]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.Latency);
+ let ResourceCycles = [1, 1, 1];
+ let NumMicroOps = !add(Zn4WriteVEXTRACTF128rr_VEXTRACTI128rr.NumMicroOps, 0);
+}
+def : InstRW<[Zn4WriteVINSERTF128rmr], (instrs VINSERTF128rm)>;
+
+defm : Zn4WriteResYMM<WriteVecStoreY, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
+defm : Zn4WriteResXMM<WriteVecStoreNT, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
+defm : Zn4WriteResYMM<WriteVecStoreNTY, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [1, 1], 1>;
+defm : Zn4WriteResXMM<WriteVecMaskedStore32, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [6, 1], 18>;
+defm : Zn4WriteResXMM<WriteVecMaskedStore64, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [4, 1], 10>;
+defm : Zn4WriteResYMM<WriteVecMaskedStore32Y, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [12, 1], 42>;
+defm : Zn4WriteResYMM<WriteVecMaskedStore64Y, [Zn4FPSt, Zn4Store], Znver4Model.StoreLatency, [6, 1], 18>;
+
+defm : Zn4WriteResXMM<WriteVecMoveToGpr, [Zn4FPLd01], 1, [2], 1>;
+defm : Zn4WriteResXMM<WriteVecMoveFromGpr, [Zn4FPLd01], 1, [2], 1>;
+
+def Zn4WriteMOVMMX : SchedWriteRes<[Zn4FPLd01, Zn4FPFMisc0123]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 2];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteMOVMMX], (instrs MMX_MOVQ2FR64rr, MMX_MOVQ2DQrr)>;
+
+def Zn4WriteMOVMMXSlow : SchedWriteRes<[Zn4FPLd01, Zn4FPFMisc0123]> {
+ let Latency = 1;
+ let ResourceCycles = [1, 4];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteMOVMMXSlow], (instrs MMX_MOVD64rr, MMX_MOVD64to64rr)>;
+
+defm : Zn4WriteResXMMPair<WriteVecALU, [Zn4FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals.
+
+def Zn4WriteEXTRQ_INSERTQ : SchedWriteRes<[Zn4FPVShuf01, Zn4FPLd01]> {
+ let Latency = 3;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteEXTRQ_INSERTQ], (instrs EXTRQ, INSERTQ)>;
+
+def Zn4WriteEXTRQI_INSERTQI : SchedWriteRes<[Zn4FPVShuf01, Zn4FPLd01]> {
+ let Latency = 3;
+ let ResourceCycles = [1, 1];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteEXTRQI_INSERTQI], (instrs EXTRQI, INSERTQI)>;
+
+defm : Zn4WriteResXMMPair<WriteVecALUX, [Zn4FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (XMM).
+
+def Zn4WriteVecALUXSlow : SchedWriteRes<[Zn4FPVAdd01]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteVecALUXSlow], (instrs PABSBrr, PABSDrr, PABSWrr,
+ PADDSBrr, PADDSWrr, PADDUSBrr, PADDUSWrr,
+ PAVGBrr, PAVGWrr,
+ PSIGNBrr, PSIGNDrr, PSIGNWrr,
+ VPABSBrr, VPABSDrr, VPABSWrr,
+ VPADDSBrr, VPADDSWrr, VPADDUSBrr, VPADDUSWrr,
+ VPAVGBrr, VPAVGWrr,
+ VPCMPEQQrr,
+ VPSIGNBrr, VPSIGNDrr, VPSIGNWrr,
+ PSUBSBrr, PSUBSWrr, PSUBUSBrr, PSUBUSWrr, VPSUBSBrr, VPSUBSWrr, VPSUBUSBrr, VPSUBUSWrr)>;
+
+def Zn4WriteVecOpMask : SchedWriteRes<[Zn4FPOpMask01]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteVecOpMask], (instrs KADDBrr, KADDDrr, KADDQrr, KADDWrr,
+ KANDBrr, KANDDrr, KANDQrr, KANDWrr,
+ KANDNBrr, KANDNDrr, KANDNQrr, KANDNWrr,
+ KMOVBkk, KMOVDkk, KMOVQkk, KMOVWkk,
+ KMOVBrk, KMOVDrk, KMOVQrk, KMOVWrk,
+ KNOTBrr, KNOTDrr, KNOTQrr, KNOTWrr,
+ KORBrr, KORDrr, KORQrr, KORWrr,
+ KORTESTBrr, KORTESTDrr, KORTESTQrr, KORTESTWrr,
+ KTESTBrr, KTESTDrr, KTESTQrr, KTESTWrr,
+ KUNPCKBWrr, KUNPCKDQrr, KUNPCKWDrr,
+ KXNORBrr, KXNORDrr, KXNORQrr, KXNORWrr,
+ KXORBrr, KXORDrr, KXORQrr, KXORWrr)>;
+
+def Zn4WriteVecOpMaskMemMov : SchedWriteRes<[Zn4FPOpMask4]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteVecOpMaskMemMov], (instrs KMOVBmk, KMOVDmk, KMOVQmk, KMOVWmk)>;
+
+def Zn4WriteVecOpMaskKRMov : SchedWriteRes<[Zn4FPOpMask4]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteVecOpMaskKRMov], (instrs KMOVBkr, KMOVDkr, KMOVQkr, KMOVWkr)>;
+
+def Zn4WriteVecALU2Slow : SchedWriteRes<[Zn4FPVAdd12]> {
+ // TODO: All align instructions are expected to be of 4 cycle latency
+ let Latency = 4;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteVecALU2Slow], (instrs VALIGNDZrri, VALIGNDZ128rri, VALIGNDZ256rri,
+ VALIGNQZrri, VALIGNQZ128rri, VALIGNQZ256rri)
+ >;
+defm : Zn4WriteResYMMPair<WriteVecALUY, [Zn4FPVAdd0123], 1, [1], 1>; // Vector integer ALU op, no logicals (YMM).
+
+def Zn4WriteVecALUYSlow : SchedWriteRes<[Zn4FPVAdd01]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteVecALUYSlow], (instrs VPABSBYrr, VPABSDYrr, VPABSWYrr,
+ VPADDSBYrr, VPADDSWYrr, VPADDUSBYrr, VPADDUSWYrr,
+ VPSUBSBYrr, VPSUBSWYrr, VPSUBUSBYrr, VPSUBUSWYrr,
+ VPAVGBYrr, VPAVGWYrr,
+ VPCMPEQQYrr,
+ VPSIGNBYrr, VPSIGNDYrr, VPSIGNWYrr)>;
+
+defm : Zn4WriteResZMMPair<WriteVecALUZ, [Zn4FPVAdd0123], 1, [2], 1>; // Vector integer ALU op, no logicals (ZMM).
+
+defm : Zn4WriteResXMMPair<WriteVecLogic, [Zn4FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals.
+defm : Zn4WriteResXMMPair<WriteVecLogicX, [Zn4FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals (XMM).
+defm : Zn4WriteResYMMPair<WriteVecLogicY, [Zn4FPVMisc0123], 1, [1], 1>; // Vector integer and/or/xor logicals (YMM).
+defm : Zn4WriteResZMMPair<WriteVecLogicZ, [Zn4FPVMisc0123], 1, [2], 1>; // Vector integer and/or/xor logicals (ZMM).
+defm : Zn4WriteResXMMPair<WriteVecTest, [Zn4FPVAdd12, Zn4FPSt], 1, [1, 1], 2>; // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions.
+defm : Zn4WriteResYMMPair<WriteVecTestY, [Zn4FPVAdd12, Zn4FPSt], 1, [1, 1], 2>; // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions (YMM).
+defm : Zn4WriteResZMMPair<WriteVecTestZ, [Zn4FPVAdd12, Zn4FPSt], 1, [2, 2], 2>; // FIXME: latency not from llvm-exegesis // Vector integer TEST instructions (ZMM).
+defm : Zn4WriteResXMMPair<WriteVecShift, [Zn4FPVShift01], 1, [1], 1>; // Vector integer shifts (default).
+defm : Zn4WriteResXMMPair<WriteVecShiftX, [Zn4FPVShift01], 2, [2], 1>; // Vector integer shifts (XMM).
+defm : Zn4WriteResYMMPair<WriteVecShiftY, [Zn4FPVShift01], 1, [1], 1>; // Vector integer shifts (YMM).
+defm : Zn4WriteResZMMPair<WriteVecShiftZ, [Zn4FPVShift01], 1, [2], 1>; // Vector integer shifts (ZMM).
+defm : Zn4WriteResXMMPair<WriteVecShiftImm, [Zn4FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (default).
+defm : Zn4WriteResXMMPair<WriteVecShiftImmX, [Zn4FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (XMM).
+defm : Zn4WriteResYMMPair<WriteVecShiftImmY, [Zn4FPVShift01], 1, [1], 1>; // Vector integer immediate shifts (YMM).
+defm : Zn4WriteResZMMPair<WriteVecShiftImmZ, [Zn4FPVShift01], 1, [2], 1>; // Vector integer immediate shifts (ZMM).
+defm : Zn4WriteResXMMPair<WriteVecIMul, [Zn4FPVMul01], 3, [1], 1>; // Vector integer multiply (default).
+defm : Zn4WriteResXMMPair<WriteVecIMulX, [Zn4FPVMul01], 3, [1], 1>; // Vector integer multiply (XMM).
+defm : Zn4WriteResYMMPair<WriteVecIMulY, [Zn4FPVMul01], 3, [1], 1>; // Vector integer multiply (YMM).
+defm : Zn4WriteResZMMPair<WriteVecIMulZ, [Zn4FPVMul01], 3, [2], 1>; // Vector integer multiply (ZMM).
+defm : Zn4WriteResXMMPair<WritePMULLD, [Zn4FPVMul01], 3, [1], 1>; // Vector PMULLD.
+defm : Zn4WriteResYMMPair<WritePMULLDY, [Zn4FPVMul01], 3, [1], 1>; // Vector PMULLD (YMM).
+defm : Zn4WriteResZMMPair<WritePMULLDZ, [Zn4FPVMul01], 3, [2], 1>; // Vector PMULLD (ZMM).
+defm : Zn4WriteResXMMPair<WriteShuffle, [Zn4FPVShuf01], 1, [1], 1>; // Vector shuffles.
+defm : Zn4WriteResXMMPair<WriteShuffleX, [Zn4FPVShuf01], 1, [1], 1>; // Vector shuffles (XMM).
+defm : Zn4WriteResYMMPair<WriteShuffleY, [Zn4FPVShuf01], 1, [1], 1>; // Vector shuffles (YMM).
+defm : Zn4WriteResZMMPair<WriteShuffleZ, [Zn4FPVShuf01], 1, [2], 1>; // Vector shuffles (ZMM).
+defm : Zn4WriteResXMMPair<WriteVarShuffle, [Zn4FPVShuf01], 1, [1], 1>; // Vector variable shuffles.
+defm : Zn4WriteResXMMPair<WriteVarShuffleX, [Zn4FPVShuf01], 1, [1], 1>; // Vector variable shuffles (XMM).
+defm : Zn4WriteResYMMPair<WriteVarShuffleY, [Zn4FPVShuf01], 1, [1], 1>; // Vector variable shuffles (YMM).
+defm : Zn4WriteResZMMPair<WriteVarShuffleZ, [Zn4FPVShuf01], 1, [2], 1>; // Vector variable shuffles (ZMM).
+defm : Zn4WriteResXMMPair<WriteBlend, [Zn4FPVMisc0123], 1, [1], 1>; // Vector blends.
+defm : Zn4WriteResYMMPair<WriteBlendY, [Zn4FPVMisc0123], 1, [1], 1>; // Vector blends (YMM).
+defm : Zn4WriteResZMMPair<WriteBlendZ, [Zn4FPVMisc0123], 1, [2], 1>; // Vector blends (ZMM).
+defm : Zn4WriteResXMMPair<WriteVarBlend, [Zn4FPVMul01], 1, [1], 1>; // Vector variable blends.
+defm : Zn4WriteResYMMPair<WriteVarBlendY, [Zn4FPVMul01], 1, [1], 1>; // Vector variable blends (YMM).
+defm : Zn4WriteResZMMPair<WriteVarBlendZ, [Zn4FPVMul01], 1, [2], 1>; // Vector variable blends (ZMM).
+defm : Zn4WriteResXMMPair<WritePSADBW, [Zn4FPVAdd0123], 3, [2], 1>; // Vector PSADBW.
+defm : Zn4WriteResXMMPair<WritePSADBWX, [Zn4FPVAdd0123], 3, [2], 1>; // Vector PSADBW (XMM).
+defm : Zn4WriteResYMMPair<WritePSADBWY, [Zn4FPVAdd0123], 3, [2], 1>; // Vector PSADBW (YMM).
+defm : Zn4WriteResZMMPair<WritePSADBWZ, [Zn4FPVAdd0123], 4, [4], 1>; // Vector PSADBW (ZMM).
+defm : Zn4WriteResXMMPair<WriteMPSAD, [Zn4FPVAdd0123], 4, [8], 4, /*LoadUOps=*/2>; // Vector MPSAD.
+defm : Zn4WriteResYMMPair<WriteMPSADY, [Zn4FPVAdd0123], 4, [8], 3, /*LoadUOps=*/1>; // Vector MPSAD (YMM).
+defm : Zn4WriteResZMMPair<WriteMPSADZ, [Zn4FPVAdd0123], 4, [16], 3, /*LoadUOps=*/1>; // Vector MPSAD (ZMM).
+defm : Zn4WriteResXMMPair<WritePHMINPOS, [Zn4FPVAdd01], 3, [1], 1>; // Vector PHMINPOS.
+
+// Vector insert/extract operations.
+defm : Zn4WriteResXMMPair<WriteVecInsert, [Zn4FPLd01], 1, [2], 2, /*LoadUOps=*/-1>; // Insert gpr to vector element.
+defm : Zn4WriteResXMM<WriteVecExtract, [Zn4FPLd01], 1, [2], 2>; // Extract vector element to gpr.
+defm : Zn4WriteResXMM<WriteVecExtractSt, [Zn4FPSt, Zn4Store], !add(1, Znver4Model.StoreLatency), [1, 1], 2>; // Extract vector element and store.
+
+// MOVMSK operations.
+defm : Zn4WriteResXMM<WriteFMOVMSK, [Zn4FPVMisc2], 1, [1], 1>;
+defm : Zn4WriteResXMM<WriteVecMOVMSK, [Zn4FPVMisc2], 1, [1], 1>;
+defm : Zn4WriteResYMM<WriteVecMOVMSKY, [Zn4FPVMisc2], 1, [1], 1>;
+defm : Zn4WriteResXMM<WriteMMXMOVMSK, [Zn4FPVMisc2], 1, [1], 1>;
+
+// Conversion between integer and float.
+defm : Zn4WriteResXMMPair<WriteCvtSD2I, [Zn4FPFCvt01], 1, [1], 1>; // Double -> Integer.
+defm : Zn4WriteResXMMPair<WriteCvtPD2I, [Zn4FPFCvt01], 3, [2], 1>; // Double -> Integer (XMM).
+defm : Zn4WriteResYMMPair<WriteCvtPD2IY, [Zn4FPFCvt01], 3, [2], 2>; // Double -> Integer (YMM).
+defm : Zn4WriteResZMMPair<WriteCvtPD2IZ, [Zn4FPFCvt01], 3, [4], 2>; // Double -> Integer (ZMM).
+
+def Zn4WriteCvtPD2IMMX : SchedWriteRes<[Zn4FPFCvt01]> {
+ let Latency = 1;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+defm : Zn4WriteResXMMPair<WriteCvtSS2I, [Zn4FPFCvt01], 5, [5], 2>; // Float -> Integer.
+
+defm : Zn4WriteResXMMPair<WriteCvtPS2I, [Zn4FPFCvt01], 3, [1], 1>; // Float -> Integer (XMM).
+defm : Zn4WriteResYMMPair<WriteCvtPS2IY, [Zn4FPFCvt01], 4, [1], 1>; // Float -> Integer (YMM).
+defm : Zn4WriteResZMMPair<WriteCvtPS2IZ, [Zn4FPFCvt01], 4, [2], 2>; // Float -> Integer (ZMM).
+
+defm : Zn4WriteResXMMPair<WriteCvtI2SD, [Zn4FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Integer -> Double.
+defm : Zn4WriteResXMMPair<WriteCvtI2PD, [Zn4FPFCvt01], 3, [1], 1>; // Integer -> Double (XMM).
+defm : Zn4WriteResYMMPair<WriteCvtI2PDY, [Zn4FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>; // Integer -> Double (YMM).
+defm : Zn4WriteResZMMPair<WriteCvtI2PDZ, [Zn4FPFCvt01], 4, [4], 4, /*LoadUOps=*/-1>; // Integer -> Double (ZMM).
+
+def Zn4WriteCvtI2PDMMX : SchedWriteRes<[Zn4FPFCvt01]> {
+ let Latency = 2;
+ let ResourceCycles = [6];
+ let NumMicroOps = 2;
+}
+
+defm : Zn4WriteResXMMPair<WriteCvtI2SS, [Zn4FPFCvt01], 3, [2], 2, /*LoadUOps=*/-1>; // Integer -> Float.
+defm : Zn4WriteResXMMPair<WriteCvtI2PS, [Zn4FPFCvt01], 3, [1], 1>; // Integer -> Float (XMM).
+defm : Zn4WriteResYMMPair<WriteCvtI2PSY, [Zn4FPFCvt01], 3, [1], 1>; // Integer -> Float (YMM).
+defm : Zn4WriteResZMMPair<WriteCvtI2PSZ, [Zn4FPFCvt01], 3, [2], 2>; // Integer -> Float (ZMM).
+
+def Zn4WriteCvtI2PSMMX : SchedWriteRes<[Zn4FPFCvt01]> {
+ let Latency = 3;
+ let ResourceCycles = [1];
+ let NumMicroOps = 2;
+}
+
+defm : Zn4WriteResXMMPair<WriteCvtSS2SD, [Zn4FPFCvt01], 3, [1], 1>; // Float -> Double size conversion.
+defm : Zn4WriteResXMMPair<WriteCvtPS2PD, [Zn4FPFCvt01], 3, [1], 1>; // Float -> Double size conversion (XMM).
+defm : Zn4WriteResYMMPair<WriteCvtPS2PDY, [Zn4FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Float -> Double size conversion (YMM).
+defm : Zn4WriteResZMMPair<WriteCvtPS2PDZ, [Zn4FPFCvt01], 6, [4], 4, /*LoadUOps=*/-1>; // Float -> Double size conversion (ZMM).
+
+defm : Zn4WriteResXMMPair<WriteCvtSD2SS, [Zn4FPFCvt01], 3, [1], 1>; // Double -> Float size conversion.
+defm : Zn4WriteResXMMPair<WriteCvtPD2PS, [Zn4FPFCvt01], 3, [1], 1>; // Double -> Float size conversion (XMM).
+defm : Zn4WriteResYMMPair<WriteCvtPD2PSY, [Zn4FPFCvt01], 6, [2], 2>; // Double -> Float size conversion (YMM).
+defm : Zn4WriteResZMMPair<WriteCvtPD2PSZ, [Zn4FPFCvt01], 6, [4], 4>; // Double -> Float size conversion (ZMM).
+
+defm : Zn4WriteResXMMPair<WriteCvtPH2PS, [Zn4FPFCvt01], 3, [1], 1>; // Half -> Float size conversion.
+defm : Zn4WriteResYMMPair<WriteCvtPH2PSY, [Zn4FPFCvt01], 4, [2], 2, /*LoadUOps=*/-1>; // Half -> Float size conversion (YMM).
+defm : Zn4WriteResZMMPair<WriteCvtPH2PSZ, [Zn4FPFCvt01], 4, [4], 4, /*LoadUOps=*/-1>; // Half -> Float size conversion (ZMM).
+
+defm : Zn4WriteResXMM<WriteCvtPS2PH, [Zn4FPFCvt01], 3, [2], 1>; // Float -> Half size conversion.
+defm : Zn4WriteResYMM<WriteCvtPS2PHY, [Zn4FPFCvt01], 6, [2], 2>; // Float -> Half size conversion (YMM).
+defm : Zn4WriteResZMM<WriteCvtPS2PHZ, [Zn4FPFCvt01], 6, [2], 2>; // Float -> Half size conversion (ZMM).
+
+defm : Zn4WriteResXMM<WriteCvtPS2PHSt, [Zn4FPFCvt01, Zn4FPSt, Zn4Store], !add(3, Znver4Model.StoreLatency), [1, 1, 1], 2>; // Float -> Half + store size conversion.
+defm : Zn4WriteResYMM<WriteCvtPS2PHYSt, [Zn4FPFCvt01, Zn4FPSt, Zn4Store], !add(6, Znver4Model.StoreLatency), [2, 1, 1], 3>; // Float -> Half + store size conversion (YMM).
+defm : Zn4WriteResYMM<WriteCvtPS2PHZSt, [Zn4FPFCvt01, Zn4FPSt, Zn4Store], !add(6, Znver4Model.StoreLatency), [2, 1, 1], 3>; // Float -> Half + store size conversion (ZMM).
+
+// CRC32 instruction.
+defm : Zn4WriteResIntPair<WriteCRC32, [Zn4ALU1], 3, [1], 1>;
+
+def Zn4WriteSHA1MSG1rr : SchedWriteRes<[Zn4FPU0123]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteSHA1MSG1rr], (instrs SHA1MSG1rr)>;
+
+def Zn4WriteSHA1MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG1rr.Latency);
+ let ResourceCycles = [1, 1, 2];
+ let NumMicroOps = !add(Zn4WriteSHA1MSG1rr.NumMicroOps, 0);
+}
+def : InstRW<[Zn4WriteSHA1MSG1rm], (instrs SHA1MSG1rm)>;
+
+def Zn4WriteSHA1MSG2rr_SHA1NEXTErr : SchedWriteRes<[Zn4FPU0123]> {
+ let Latency = 1;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteSHA1MSG2rr_SHA1NEXTErr], (instrs SHA1MSG2rr, SHA1NEXTErr)>;
+
+def Zn4Writerm_SHA1MSG2rm_SHA1NEXTErm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA1MSG2rr_SHA1NEXTErr.Latency);
+ let ResourceCycles = [1, 1, 2];
+ let NumMicroOps = !add(Zn4WriteSHA1MSG2rr_SHA1NEXTErr.NumMicroOps, 0);
+}
+def : InstRW<[Zn4Writerm_SHA1MSG2rm_SHA1NEXTErm], (instrs SHA1MSG2rm, SHA1NEXTErm)>;
+
+def Zn4WriteSHA256MSG1rr : SchedWriteRes<[Zn4FPU0123]> {
+ let Latency = 2;
+ let ResourceCycles = [3];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteSHA256MSG1rr], (instrs SHA256MSG1rr)>;
+
+def Zn4Writerm_SHA256MSG1rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG1rr.Latency);
+ let ResourceCycles = [1, 1, 3];
+ let NumMicroOps = !add(Zn4WriteSHA256MSG1rr.NumMicroOps, 0);
+}
+def : InstRW<[Zn4Writerm_SHA256MSG1rm], (instrs SHA256MSG1rm)>;
+
+def Zn4WriteSHA256MSG2rr : SchedWriteRes<[Zn4FPU0123]> {
+ let Latency = 3;
+ let ResourceCycles = [8];
+ let NumMicroOps = 4;
+}
+def : InstRW<[Zn4WriteSHA256MSG2rr], (instrs SHA256MSG2rr)>;
+
+def Zn4WriteSHA256MSG2rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPU0123]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteSHA256MSG2rr.Latency);
+ let ResourceCycles = [1, 1, 8];
+ let NumMicroOps = !add(Zn4WriteSHA256MSG2rr.NumMicroOps, 1);
+}
+def : InstRW<[Zn4WriteSHA256MSG2rm], (instrs SHA256MSG2rm)>;
+
+def Zn4WriteSHA1RNDS4rri : SchedWriteRes<[Zn4FPU0123]> {
+ let Latency = 6;
+ let ResourceCycles = [8];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteSHA1RNDS4rri], (instrs SHA1RNDS4rri)>;
+
+def Zn4WriteSHA256RNDS2rr : SchedWriteRes<[Zn4FPU0123]> {
+ let Latency = 4;
+ let ResourceCycles = [8];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteSHA256RNDS2rr], (instrs SHA256RNDS2rr)>;
+
+// Strings instructions.
+// Packed Compare Implicit Length Strings, Return Mask
+defm : Zn4WriteResXMMPair<WritePCmpIStrM, [Zn4FPVAdd0123], 6, [8], 3, /*LoadUOps=*/1>;
+// Packed Compare Explicit Length Strings, Return Mask
+defm : Zn4WriteResXMMPair<WritePCmpEStrM, [Zn4FPVAdd0123], 6, [12], 7, /*LoadUOps=*/5>;
+// Packed Compare Implicit Length Strings, Return Index
+defm : Zn4WriteResXMMPair<WritePCmpIStrI, [Zn4FPVAdd0123], 2, [8], 4>;
+// Packed Compare Explicit Length Strings, Return Index
+defm : Zn4WriteResXMMPair<WritePCmpEStrI, [Zn4FPVAdd0123], 6, [12], 8, /*LoadUOps=*/4>;
+
+// AES instructions.
+defm : Zn4WriteResXMMPair<WriteAESDecEnc, [Zn4FPAES01], 4, [1], 1>; // Decryption, encryption.
+defm : Zn4WriteResXMMPair<WriteAESIMC, [Zn4FPAES01], 4, [1], 1>; // InvMixColumn.
+defm : Zn4WriteResXMMPair<WriteAESKeyGen, [Zn4FPAES01], 4, [1], 1>; // Key Generation.
+
+// Carry-less multiplication instructions.
+defm : Zn4WriteResXMMPair<WriteCLMul, [Zn4FPCLM01], 4, [4], 4>;
+
+// EMMS/FEMMS
+defm : Zn4WriteResInt<WriteEMMS, [Zn4ALU0123], 2, [1], 1>; // FIXME: latency not from llvm-exegesis
+
+// Load/store MXCSR
+defm : Zn4WriteResInt<WriteLDMXCSR, [Zn4AGU012, Zn4Load, Zn4ALU0123], !add(Znver4Model.LoadLatency, 1), [1, 1, 6], 1>; // FIXME: latency not from llvm-exegesis
+defm : Zn4WriteResInt<WriteSTMXCSR, [Zn4ALU0123, Zn4AGU012, Zn4Store], !add(1, Znver4Model.StoreLatency), [60, 1, 1], 2>; // FIXME: latency not from llvm-exegesis
+
+// Catch-all for expensive system instructions.
+defm : Zn4WriteResInt<WriteSystem, [Zn4ALU0123], 100, [100], 100>;
+
+def Zn4WriteVZEROUPPER : SchedWriteRes<[Zn4FPU0123]> {
+ let Latency = 0; // FIXME: not from llvm-exegesis
+ let ResourceCycles = [1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteVZEROUPPER], (instrs VZEROUPPER)>;
+
+def Zn4WriteVZEROALL : SchedWriteRes<[Zn4FPU0123]> {
+ let Latency = 10; // FIXME: not from llvm-exegesis
+ let ResourceCycles = [24];
+ let NumMicroOps = 18;
+}
+def : InstRW<[Zn4WriteVZEROALL], (instrs VZEROALL)>;
+
+// AVX2.
+defm : Zn4WriteResYMMPair<WriteFShuffle256, [Zn4FPVShuf], 2, [1], 1, /*LoadUOps=*/2>; // Fp 256-bit width vector shuffles.
+defm : Zn4WriteResYMMPair<WriteFVarShuffle256, [Zn4FPVShuf], 7, [1], 2, /*LoadUOps=*/1>; // Fp 256-bit width variable shuffles.
+defm : Zn4WriteResYMMPair<WriteShuffle256, [Zn4FPVShuf], 1, [1], 1>; // 256-bit width vector shuffles.
+
+def Zn4WriteVPERM2I128rr_VPERM2F128rr : SchedWriteRes<[Zn4FPVShuf]> {
+ let Latency = 3;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteVPERM2I128rr_VPERM2F128rr], (instrs VPERM2I128rr, VPERM2F128rr)>;
+
+def Zn4WriteVPERM2F128rm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERM2I128rr_VPERM2F128rr.Latency);
+ let ResourceCycles = [1, 1, 1];
+ let NumMicroOps = !add(Zn4WriteVPERM2I128rr_VPERM2F128rr.NumMicroOps, 0);
+}
+def : InstRW<[Zn4WriteVPERM2F128rm], (instrs VPERM2F128rm)>;
+
+def Zn4WriteVPERMPSYrr : SchedWriteRes<[Zn4FPVShuf]> {
+ let Latency = 7;
+ let ResourceCycles = [1];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteVPERMPSYrr], (instrs VPERMPSYrr)>;
+
+def Zn4WriteVPERMPSYrm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMPSYrr.Latency);
+ let ResourceCycles = [1, 1, 2];
+ let NumMicroOps = !add(Zn4WriteVPERMPSYrr.NumMicroOps, 1);
+}
+def : InstRW<[Zn4WriteVPERMPSYrm], (instrs VPERMPSYrm)>;
+
+def Zn4WriteVPERMYri : SchedWriteRes<[Zn4FPVShuf]> {
+ let Latency = 6;
+ let ResourceCycles = [1];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteVPERMYri], (instrs VPERMPDYri, VPERMQYri)>;
+
+def Zn4WriteVPERMPDYmi : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMYri.Latency);
+ let ResourceCycles = [1, 1, 2];
+ let NumMicroOps = !add(Zn4WriteVPERMYri.NumMicroOps, 1);
+}
+def : InstRW<[Zn4WriteVPERMPDYmi], (instrs VPERMPDYmi)>;
+
+def Zn4WriteVPERMDYrr : SchedWriteRes<[Zn4FPVShuf]> {
+ let Latency = 5;
+ let ResourceCycles = [1];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteVPERMDYrr], (instrs VPERMDYrr)>;
+
+def Zn4WriteVPERMYm : SchedWriteRes<[Zn4AGU012, Zn4Load, Zn4FPVShuf]> {
+ let Latency = !add(Znver4Model.LoadLatency, Zn4WriteVPERMDYrr.Latency);
+ let ResourceCycles = [1, 1, 2];
+ let NumMicroOps = !add(Zn4WriteVPERMDYrr.NumMicroOps, 0);
+}
+def : InstRW<[Zn4WriteVPERMYm], (instrs VPERMQYmi, VPERMDYrm)>;
+
+defm : Zn4WriteResYMMPair<WriteVPMOV256, [Zn4FPVShuf01], 4, [3], 2, /*LoadUOps=*/-1>; // 256-bit width packed vector width-changing move.
+defm : Zn4WriteResYMMPair<WriteVarShuffle256, [Zn4FPVShuf01], 1, [1], 2>; // 256-bit width vector variable shuffles.
+defm : Zn4WriteResXMMPair<WriteVarVecShift, [Zn4FPVShift01], 1, [1], 1>; // Variable vector shifts.
+defm : Zn4WriteResYMMPair<WriteVarVecShiftY, [Zn4FPVShift01], 1, [1], 1>; // Variable vector shifts (YMM).
+defm : Zn4WriteResZMMPair<WriteVarVecShiftZ, [Zn4FPVShift01], 1, [2], 2>; // Variable vector shifts (ZMM).
+
+// Old microcoded instructions that nobody use.
+defm : Zn4WriteResInt<WriteMicrocoded, [Zn4ALU0123], 100, [100], 100>;
+
+// Fence instructions.
+defm : Zn4WriteResInt<WriteFence, [Zn4ALU0123], 1, [100], 1>;
+
+def Zn4WriteLFENCE : SchedWriteRes<[Zn4LSU]> {
+ let Latency = 1;
+ let ResourceCycles = [30];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteLFENCE], (instrs LFENCE)>;
+
+def Zn4WriteSFENCE : SchedWriteRes<[Zn4LSU]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteSFENCE], (instrs SFENCE)>;
+
+// Nop, not very useful expect it provides a model for nops!
+defm : Zn4WriteResInt<WriteNop, [Zn4ALU0123], 0, [1], 1>; // FIXME: latency not from llvm-exegesis
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Zero Cycle Move
+///////////////////////////////////////////////////////////////////////////////
+
+def Zn4WriteZeroLatency : SchedWriteRes<[]> {
+ let Latency = 0;
+ let ResourceCycles = [];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteZeroLatency], (instrs MOV32rr, MOV32rr_REV,
+ MOV64rr, MOV64rr_REV,
+ MOVSX32rr32)>;
+
+def Zn4WriteSwapRenameable : SchedWriteRes<[]> {
+ let Latency = 0;
+ let ResourceCycles = [];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteSwapRenameable], (instrs XCHG32rr, XCHG32ar,
+ XCHG64rr, XCHG64ar)>;
+
+defm : Zn4WriteResInt<WriteXCHG, [Zn4ALU0123], 0, [8], 2>; // Compare+Exchange - TODO RMW support.
+
+defm : Zn4WriteResXMM<WriteFMoveX, [], 0, [], 1>;
+defm : Zn4WriteResYMM<WriteFMoveY, [], 0, [], 1>;
+defm : Zn4WriteResYMM<WriteFMoveZ, [], 0, [], 1>;
+
+defm : Zn4WriteResXMM<WriteVecMove, [Zn4FPFMisc0123], 1, [1], 1>; // MMX
+defm : Zn4WriteResXMM<WriteVecMoveX, [], 0, [], 1>;
+defm : Zn4WriteResYMM<WriteVecMoveY, [], 0, [], 1>;
+defm : Zn4WriteResYMM<WriteVecMoveZ, [], 0, [], 1>;
+
+def : IsOptimizableRegisterMove<[
+ InstructionEquivalenceClass<[
+ // GPR variants.
+ MOV32rr, MOV32rr_REV,
+ MOV64rr, MOV64rr_REV,
+ MOVSX32rr32,
+ XCHG32rr, XCHG32ar,
+ XCHG64rr, XCHG64ar,
+
+ // MMX variants.
+ // MMX moves are *NOT* eliminated.
+
+ // SSE variants.
+ MOVAPSrr, MOVAPSrr_REV,
+ MOVUPSrr, MOVUPSrr_REV,
+ MOVAPDrr, MOVAPDrr_REV,
+ MOVUPDrr, MOVUPDrr_REV,
+ MOVDQArr, MOVDQArr_REV,
+ MOVDQUrr, MOVDQUrr_REV,
+
+ // AVX variants.
+ VMOVAPSrr, VMOVAPSrr_REV,
+ VMOVUPSrr, VMOVUPSrr_REV,
+ VMOVAPDrr, VMOVAPDrr_REV,
+ VMOVUPDrr, VMOVUPDrr_REV,
+ VMOVDQArr, VMOVDQArr_REV,
+ VMOVDQUrr, VMOVDQUrr_REV,
+
+ // AVX YMM variants.
+ VMOVAPSYrr, VMOVAPSYrr_REV,
+ VMOVUPSYrr, VMOVUPSYrr_REV,
+ VMOVAPDYrr, VMOVAPDYrr_REV,
+ VMOVUPDYrr, VMOVUPDYrr_REV,
+ VMOVDQAYrr, VMOVDQAYrr_REV,
+ VMOVDQUYrr, VMOVDQUYrr_REV,
+ ], TruePred >
+]>;
+
+// FIXUP and RANGE Instructions
+def Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr : SchedWriteRes<[Zn4FPFMisc01]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr], (instregex
+ "VFIXUPIMM(S|P)(S|D)(Z|Z128|Z256?)rrik", "VFIXUPIMM(S|P)(S|D)(Z?|Z128?|Z256?)rrikz",
+ "VFIXUPIMM(S|P)(S|D)(Z128|Z256?)rri", "VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)",
+ "VRANGE(S|P)(S|D)(Z|Z128|Z256?)rri(b?)k","VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)kz"
+ )>;
+
+// SCALE & REDUCE instructions
+def Zn4WriteSCALErr: SchedWriteRes<[Zn4FPFMisc23]> {
+ let Latency = 6;
+ let ResourceCycles = [6];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteSCALErr], (instregex
+ "V(SCALEF|REDUCE)(S|P)(S|D)(Z?|Z128?|Z256?)(rr|rrb|rrkz|rrik|rrikz|rri)(_Int?|_Intkz?)",
+ "(V?)REDUCE(PD|PS|SD|SS)(Z?|Z128?)(rri|rrikz|rrib)"
+ )>;
+
+//BF16PS Instructions
+def Zn4WriteBF16: SchedWriteRes<[Zn4FPFMisc23]> {
+ let Latency = 6;
+ let ResourceCycles = [6];
+ let NumMicroOps = 2;
+}
+def : InstRW<[Zn4WriteBF16], (instregex
+ "(V?)DPBF16PS(Z?|Z128?|Z256?)(r|rk|rkz)"
+ )>;
+
+// BUSD and VPMADD Instructions
+def Zn4WriteBUSDr_VPMADDr: SchedWriteRes<[Zn4FPFMisc01]> {
+ let Latency = 4;
+ let ResourceCycles = [4];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteBUSDr_VPMADDr], (instregex
+ "VPDP(BU|WS)(S|P)(S|D|DS)(Z|Z128|Z256)(r|rk|rkz)",
+ "VPMADD52(H|L)UQ(Z|Z128|Z256)(r|rk|rkz)"
+ )>;
+
+// SHIFT instructions
+def Zn4WriteSHIFTrr: SchedWriteRes<[Zn4FPFMisc01]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteSHIFTrr], (instregex
+ "VP(LZCNT|SHLD|SHRD?)(D|Q|W|VD|VQ|VW?)(Z?|Z128?|Z256?)(rr|rk|rrk|rrkz|rri|rrik|rrikz)",
+ "(V?)P(SLL|SRL|SRA)(D|Q|W|DQ)(Y?|Z?|Z128?|Z256?)(rr|rrk|rrkz)",
+ "(V?)P(SLL|SRL|SRA)DQYri",
+ "(V?)P(SLL|SRL)DQ(Z?|Z256?)ri",
+ "(V?)P(SHUFB)(Y|Z|Z128|Z256?)(rr|rrk|rrkz)",
+ "(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z?|Z128?|Z256?)(rr|rrk|rrkz)",
+ "(V?)P(ROL|ROR)(D|Q|VD|VQ)(Z256?)(ri|rik|rikz)",
+ "(V?)P(ROL|ROR)(D|Q)(Z?|Z128?)(ri|rik|rikz)",
+ "VPSHUFBITQMBZ128rr", "VFMSUB231SSZr_Intkz"
+ )>;
+
+def Zn4WriteSHIFTri: SchedWriteRes<[Zn4FPFMisc01]> {
+ let Latency = 1;
+ let ResourceCycles = [1];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteSHIFTri], (instregex
+ "VP(SLL|SRL|SRA)(D|Q|W)(Z|Z128|Z256?)(ri|rik|rikz)"
+ )>;
+
+// ALIGN Instructions
+def Zn4WriteALIGN: SchedWriteRes<[Zn4FPFMisc12]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteALIGN], (instregex
+ "(V?)PALIGNR(Z?|Z128?|Z256?)(rri|rrik|rrikz)"
+ )>;
+
+//PACK Instructions
+def Zn4WritePACK: SchedWriteRes<[Zn4FPFMisc12]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WritePACK], (instregex
+ "(V?)PACK(SS|US)(DW|WB)(Z?|Z128?|Z256?)(rr|rrk|rrkz)"
+ )>;
+
+// MAX and MIN Instructions
+def Zn4WriteFCmp64: SchedWriteRes<[Zn4FPFMisc01]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4WriteFCmp64], (instregex
+ "(V?)CMP(S|P)(S|D)(rr|rri|rr_Int)",
+ "(V?|VP?)(MAX|MIN|MINC|MAXC)(S|P|U)(S|D|Q)(Z?|Z128?|Z256?)(rr|rri|rrk|rrkz)(_Int?)",
+ "VP(MAX|MIN)(SQ|UQ)(Z|Z128|Z256)(rr|rrk|rrkz)",
+ "(V?)(MAX|MAXC|MIN|MINC)PD(Z|Z128|Z256?)(rr|rrk|rrkz)"
+ )>;
+
+// MOV Instructions
+def Zn4MOVS: SchedWriteRes<[Zn4FPFMisc12]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4MOVS], (instregex
+ "(V?)PMOV(SX|ZX)(BD|BQ|BW|WD|WQ|DQ)(Z128?|Z256?)(rr|rrk|rrkz)",
+ "(V?)PMOV(SX|QD|UZ|ZX)(BD|BQ|BW?)(Y|Z128?)(rr|rrk|rrkz)",
+ "(V?)PMOV(SX|US|ZX)(DQ|WD|QW|WQ?)(Y|Z128?)(rr|rrk|rrkz)",
+ "(V?)VMOVDDUP(Z|Z128|Z256)(rr|rrk|rrkz)",
+ "VPMOV(DB|DW|QB|QD|QW|SDB|SDW|SQB|SQD|SQW|SWB|USDB|USDW|USQB|USQD|USWB|WB)(Z128?)(rr|rrk|rrkz)"
+ )>;
+
+def Zn4MOVSZ: SchedWriteRes<[Zn4FPFMisc12]> {
+ let Latency = 4;
+ let ResourceCycles = [4];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4MOVSZ], (instregex
+ "(V?)PMOV(SX|ZX)(BD|BQ|BW|WD|WQ|DQ)(Z?)(rr|rrk|rrkz)"
+ )>;
+
+def Zn4MOVSrr: SchedWriteRes<[Zn4FPFMisc12]> {
+ let Latency = 5;
+ let ResourceCycles = [5];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4MOVSrr], (instregex
+ "(V?)PMOV(DB|QB|QW|SDB|SQB|SQW|USDB|USQB|USQW)(Z?)(rr|rrk|rrkz)"
+ )>;
+
+
+//VPTEST Instructions
+def Zn4VPTESTZ128: SchedWriteRes<[Zn4FPFMisc01]> {
+ let Latency = 3;
+ let ResourceCycles = [3];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4VPTESTZ128], (instregex
+ "(V?)PTEST(N?)(MB|MD|MQ|MW)(Z128?)(rrk)"
+ )>;
+
+def Zn4VPTESTZ256: SchedWriteRes<[Zn4FPFMisc01]> {
+ let Latency = 4;
+ let ResourceCycles = [4];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4VPTESTZ256], (instregex
+ "(V?)PTEST(N?)(MB|MD|MQ|MW)(Z256?)(rr|rrk)"
+ )>;
+
+def Zn4VPTESTZ: SchedWriteRes<[Zn4FPFMisc01]> {
+ let Latency = 5;
+ let ResourceCycles = [5];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4VPTESTZ], (instregex
+ "(V?)PTEST(N?)(MB|MD|MQ|MW)(Z?)(rrk)"
+ )>;
+
+// CONFLICT Instructions
+def Zn4CONFLICTZ128: SchedWriteRes<[Zn4FPFMisc01]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4CONFLICTZ128], (instregex
+ "VPCONFLICT(D|Q)(Z128)(rr|rrk|rrkz)"
+ )>;
+
+def Zn4CONFLICTrr: SchedWriteRes<[Zn4FPFMisc01,Zn4FPFMisc12,Zn4FPFMisc23]> {
+ let Latency = 6;
+ let ResourceCycles = [2,2,2];
+ let NumMicroOps = 4;
+}
+def : InstRW<[Zn4CONFLICTrr], (instregex
+ "VPCONFLICT(D|Q)(Z|Z256)(rr|rrkz)"
+ )>;
+
+// RSQRT Instructions
+def Zn4VRSQRT14PDZ256: SchedWriteRes<[Zn4FPFMisc01]> {
+ let Latency = 5;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4VRSQRT14PDZ256], (instregex
+ "VRSQRT14(PD|PS)(Z?|Z128?|Z256?)(r|rr|rk|rrk|rkz|rrkz)"
+ )>;
+
+
+// PERM Instructions
+def Zn4PERMILP: SchedWriteRes<[Zn4FPFMisc123]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4PERMILP], (instregex
+ "VPERMILP(S|D)(Y|Z|Z128|Z256)(rr|rrk|rrkz)"
+ )>;
+
+def Zn4PERMIT2_128: SchedWriteRes<[Zn4FPFMisc12]> {
+ let Latency = 3;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4PERMIT2_128], (instregex
+ "VPERM(I2|T2)(PS|PD|W)128(rr|rrk|rrkz)",
+ "VPERM(I2|T2)(B|D|Q)128(rr|rrk|rrkz)"
+ )>;
+
+def Zn4PERMIT2_128rr:SchedWriteRes<[Zn4FPFMisc12]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4PERMIT2_128rr], (instregex
+ "V(P?)COMPRESS(B|W|D|Q|PD|PS|SD|SQ)Z128(rr|rrk|rrkz)",
+ "VPERM(B|D|Q|W)(Z128?)(rr|rrk|rrkz)"
+ )>;
+
+def Zn4PERMIT2_256: SchedWriteRes<[Zn4FPFMisc12]> {
+ let Latency = 4;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4PERMIT2_256], (instregex
+ "VPERM(I2|T2)(PS|PD|W)256(rr|rrk|rrkz)",
+ "VPERMP(S|D)Z256(rr|rrk|rrkz)",
+ "V(P?)COMPRESS(B|W|D|Q|PD|PS|SD|SQ)Z256(rr|rrk|rrkz)",
+ "VPERM(B|D|Q|W)Z256(rr|rrk|rrkz)",
+ "VPERM(I2|Q|T2)(B|D|Q)(Z?)256(rr|rrk|rrkz)",
+ "VPEXPAND(B|W)Z256(rr|rrk|rrkz)"
+ )>;
+
+def Zn4PERMIT2Z: SchedWriteRes<[Zn4FPFMisc12]> {
+ let Latency = 5;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4PERMIT2Z], (instregex
+ "VPERM(I2|T2)(PS|PD|W)(rr|rrk|rrkz)",
+ "VPERM(B|D|W)Z(rr|rrk|rrkz)",
+ "VPERM(I2|Q|T2)(B|D|Q)(Z?)(rr|rrk|rrkz)",
+ "V(P?)COMPRESS(B|W|D|Q|PD|PS|SD|SQ)Z(rr|rrk|rrkz)",
+ "VPEXPAND(B|W)Z(rr|rrk|rrkz)",
+ "VPERMP(S|D)Z(rr|rrk|rrkz)"
+ )>;
+
+// ALU SLOW Misc Instructions
+def Zn4VecALUZSlow: SchedWriteRes<[Zn4FPFMisc01]> {
+ let Latency = 2;
+ let ResourceCycles = [2];
+ let NumMicroOps = 1;
+}
+def : InstRW<[Zn4VecALUZSlow], (instrs
+ VPABSBZ128rr, VPABSBZ128rrk, VPABSBZ128rrkz, VPABSDZ128rr,
+ VPABSDZ128rrk, VPABSDZ128rrkz, VPABSQZ128rr, VPABSQZ128rrk,
+ VPABSQZ128rrkz, VPABSWZ128rr, VPABSWZ128rrk, VPABSWZ128rrkz,
+ VPADDSBZ128rr, VPADDSBZ128rrk, VPADDSBZ128rrkz, VPADDSWZ128rr,
+ VPADDSWZ128rrk, VPADDSWZ128rrkz,VPADDUSBZ128rr, VPADDUSBZ128rrk,
+ VPADDUSBZ128rrkz, VPADDUSWZ128rr, VPADDUSWZ128rrk, VPADDUSWZ128rrkz,
+ VPAVGBZ128rr, VPAVGBZ128rrk, VPAVGBZ128rrkz, VPAVGWZ128rr,
+ VPAVGWZ128rrk, VPAVGWZ128rrkz, VPOPCNTBZ128rr, VPOPCNTBZ128rrk,
+ VPOPCNTBZ128rrkz, VPOPCNTDZ128rr, VPOPCNTDZ128rrk, VPOPCNTDZ128rrkz,
+ VPOPCNTQZ128rr, VPOPCNTQZ128rrk,VPOPCNTQZ128rrkz, VPOPCNTWZ128rr,
+ VPOPCNTWZ128rrk, VPOPCNTWZ128rrkz,VPSUBSBZ128rr, VPSUBSBZ128rrk,
+ VPSUBSBZ128rrkz, VPSUBSWZ128rr, VPSUBSWZ128rrk, VPSUBSWZ128rrkz,
+ VPSUBUSBZ128rr, VPSUBUSBZ128rrk, VPSUBUSBZ128rrkz,VPSUBUSWZ128rr,
+ VPSUBUSWZ128rrk, VPSUBUSWZ128rrkz
+ )>;
+
+
+///////////////////////////////////////////////////////////////////////////////
+// Dependency breaking instructions.
+///////////////////////////////////////////////////////////////////////////////
+
+def Zn4WriteZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+]>;
+def : InstRW<[Zn4WriteZeroIdiom], (instrs XOR32rr, XOR32rr_REV,
+ XOR64rr, XOR64rr_REV,
+ SUB32rr, SUB32rr_REV,
+ SUB64rr, SUB64rr_REV)>;
+
+def Zn4WriteZeroIdiomEFLAGS : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<CheckSameRegOperand<0, 1>>, [Zn4WriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteALU]>
+]>;
+def : InstRW<[Zn4WriteZeroIdiomEFLAGS], (instrs CMP8rr, CMP8rr_REV,
+ CMP16rr, CMP16rr_REV,
+ CMP32rr, CMP32rr_REV,
+ CMP64rr, CMP64rr_REV)>;
+
+def Zn4WriteFZeroIdiom : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogic]>
+]>;
+// NOTE: XORPSrr, XORPDrr are not zero-cycle!
+def : InstRW<[Zn4WriteFZeroIdiom], (instrs VXORPSrr, VXORPDrr,
+ VANDNPSrr, VANDNPDrr)>;
+
+def Zn4WriteFZeroIdiomY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteFLogicY]>
+]>;
+def : InstRW<[Zn4WriteFZeroIdiomY], (instrs VXORPSYrr, VXORPDYrr,
+ VANDNPSYrr, VANDNPDYrr)>;
+
+def Zn4WriteVZeroIdiomLogicX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicX]>
+]>;
+// NOTE: PXORrr,PANDNrr are not zero-cycle!
+def : InstRW<[Zn4WriteVZeroIdiomLogicX], (instrs VPXORrr, VPANDNrr)>;
+
+def Zn4WriteVZeroIdiomLogicY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecLogicY]>
+]>;
+def : InstRW<[Zn4WriteVZeroIdiomLogicY], (instrs VPXORYrr, VPANDNYrr)>;
+
+def Zn4WriteVZeroIdiomALUX : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUX]>
+]>;
+// NOTE: PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+// PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr are not zero-cycle!
+def : InstRW<[Zn4WriteVZeroIdiomALUX],
+ (instrs VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr)>;
+
+def Zn4WriteVZeroIdiomALUY : SchedWriteVariant<[
+ SchedVar<MCSchedPredicate<ZeroIdiomPredicate>, [Zn4WriteZeroLatency]>,
+ SchedVar<NoSchedPred, [WriteVecALUY]>
+]>;
+def : InstRW<[Zn4WriteVZeroIdiomALUY],
+ (instrs VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
+ VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr)>;
+
+def : IsZeroIdiomFunction<[
+ // GPR Zero-idioms.
+ DepBreakingClass<[ XOR32rr, XOR32rr_REV,
+ XOR64rr, XOR64rr_REV,
+ SUB32rr, SUB32rr_REV,
+ SUB64rr, SUB64rr_REV ], ZeroIdiomPredicate>,
+
+ // SSE XMM Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ XORPSrr, XORPDrr,
+ ANDNPSrr, ANDNPDrr,
+
+ // int variants.
+ PXORrr,
+ PANDNrr,
+ PSUBBrr, PSUBWrr, PSUBDrr, PSUBQrr,
+ PSUBSBrr, PSUBSWrr,
+ PSUBUSBrr, PSUBUSWrr,
+ PCMPGTBrr, PCMPGTWrr, PCMPGTDrr, PCMPGTQrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX XMM Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ VXORPSrr, VXORPDrr,
+ VANDNPSrr, VANDNPDrr,
+
+ // int variants.
+ VPXORrr,
+ VPANDNrr,
+ VPSUBBrr, VPSUBWrr, VPSUBDrr, VPSUBQrr,
+ VPSUBSBrr, VPSUBSWrr,
+ VPSUBUSBrr, VPSUBUSWrr,
+ VPCMPGTBrr, VPCMPGTWrr, VPCMPGTDrr, VPCMPGTQrr,
+ ], ZeroIdiomPredicate>,
+
+ // AVX YMM Zero-idioms.
+ DepBreakingClass<[
+ // fp variants.
+ VXORPSYrr, VXORPDYrr,
+ VANDNPSYrr, VANDNPDYrr,
+
+ // int variants.
+ VPXORYrr,
+ VPANDNYrr,
+ VPSUBBYrr, VPSUBWYrr, VPSUBDYrr, VPSUBQYrr,
+ VPSUBSBYrr, VPSUBSWYrr,
+ VPSUBUSBYrr, VPSUBUSWYrr,
+ VPCMPGTBYrr, VPCMPGTWYrr, VPCMPGTDYrr, VPCMPGTQYrr
+ ], ZeroIdiomPredicate>,
+]>;
+
+def : IsDepBreakingFunction<[
+ // GPR
+ DepBreakingClass<[ SBB32rr, SBB32rr_REV,
+ SBB64rr, SBB64rr_REV ], ZeroIdiomPredicate>,
+ DepBreakingClass<[ CMP8rr, CMP8rr_REV,
+ CMP16rr, CMP16rr_REV,
+ CMP32rr, CMP32rr_REV,
+ CMP64rr, CMP64rr_REV ], CheckSameRegOperand<0, 1> >,
+ // SSE
+ DepBreakingClass<[
+ PCMPEQBrr, PCMPEQWrr, PCMPEQDrr, PCMPEQQrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX XMM
+ DepBreakingClass<[
+ VPCMPEQBrr, VPCMPEQWrr, VPCMPEQDrr, VPCMPEQQrr
+ ], ZeroIdiomPredicate>,
+
+ // AVX YMM
+ DepBreakingClass<[
+ VPCMPEQBYrr, VPCMPEQWYrr, VPCMPEQDYrr, VPCMPEQQYrr
+ ], ZeroIdiomPredicate>,
+]>;
+
+} // SchedModel
+
diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index f47d8a6e8348..7c630a2b0da0 100644
--- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -71,7 +71,7 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
return SDValue();
uint64_t SizeVal = ConstantSize->getZExtValue();
- SDValue InFlag;
+ SDValue InGlue;
EVT AVT;
SDValue Count;
unsigned BytesLeft = 0;
@@ -110,25 +110,25 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(
}
Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT),
- InFlag);
- InFlag = Chain.getValue(1);
+ InGlue);
+ InGlue = Chain.getValue(1);
} else {
AVT = MVT::i8;
Count = DAG.getIntPtrConstant(SizeVal, dl);
- Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InFlag);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Val, InGlue);
+ InGlue = Chain.getValue(1);
}
bool Use64BitRegs = Subtarget.isTarget64BitLP64();
Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RCX : X86::ECX,
- Count, InFlag);
- InFlag = Chain.getValue(1);
+ Count, InGlue);
+ InGlue = Chain.getValue(1);
Chain = DAG.getCopyToReg(Chain, dl, Use64BitRegs ? X86::RDI : X86::EDI,
- Dst, InFlag);
- InFlag = Chain.getValue(1);
+ Dst, InGlue);
+ InGlue = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InGlue };
Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops);
if (BytesLeft) {
@@ -159,16 +159,16 @@ static SDValue emitRepmovs(const X86Subtarget &Subtarget, SelectionDAG &DAG,
const unsigned DI = Use64BitRegs ? X86::RDI : X86::EDI;
const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI;
- SDValue InFlag;
- Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InFlag);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InFlag);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, SI, Src, InFlag);
- InFlag = Chain.getValue(1);
+ SDValue InGlue;
+ Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InGlue);
+ InGlue = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InGlue);
+ InGlue = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, SI, Src, InGlue);
+ InGlue = Chain.getValue(1);
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDValue Ops[] = {Chain, DAG.getValueType(AVT), InFlag};
+ SDValue Ops[] = {Chain, DAG.getValueType(AVT), InGlue};
return DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops);
}
@@ -182,7 +182,8 @@ static SDValue emitRepmovsB(const X86Subtarget &Subtarget, SelectionDAG &DAG,
/// Returns the best type to use with repmovs depending on alignment.
static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget,
- uint64_t Align) {
+ Align Alignment) {
+ uint64_t Align = Alignment.value();
assert((Align != 0) && "Align is normalized");
assert(isPowerOf2_64(Align) && "Align is a power of 2");
switch (Align) {
@@ -204,7 +205,7 @@ static MVT getOptimalRepmovsType(const X86Subtarget &Subtarget,
static SDValue emitConstantSizeRepmov(
SelectionDAG &DAG, const X86Subtarget &Subtarget, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, EVT SizeVT,
- unsigned Align, bool isVolatile, bool AlwaysInline,
+ Align Alignment, bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) {
/// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
@@ -219,10 +220,10 @@ static SDValue emitConstantSizeRepmov(
assert(!Subtarget.hasERMSB() && "No efficient RepMovs");
/// We assume runtime memcpy will do a better job for unaligned copies when
/// ERMS is not present.
- if (!AlwaysInline && (Align & 3) != 0)
+ if (!AlwaysInline && (Alignment.value() & 3) != 0)
return SDValue();
- const MVT BlockType = getOptimalRepmovsType(Subtarget, Align);
+ const MVT BlockType = getOptimalRepmovsType(Subtarget, Alignment);
const uint64_t BlockBytes = BlockType.getSizeInBits() / 8;
const uint64_t BlockCount = Size / BlockBytes;
const uint64_t BytesLeft = Size % BlockBytes;
@@ -251,7 +252,7 @@ static SDValue emitConstantSizeRepmov(
Chain, dl,
DAG.getNode(ISD::ADD, dl, DstVT, Dst, DAG.getConstant(Offset, dl, DstVT)),
DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, dl, SrcVT)),
- DAG.getConstant(BytesLeft, dl, SizeVT), llvm::Align(Align), isVolatile,
+ DAG.getConstant(BytesLeft, dl, SizeVT), Alignment, isVolatile,
/*AlwaysInline*/ true, /*isTailCall*/ false,
DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset)));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Results);
@@ -281,10 +282,10 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
/// Handle constant sizes,
if (ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size))
- return emitConstantSizeRepmov(
- DAG, Subtarget, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
- Size.getValueType(), Alignment.value(), isVolatile, AlwaysInline,
- DstPtrInfo, SrcPtrInfo);
+ return emitConstantSizeRepmov(DAG, Subtarget, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),
+ Size.getValueType(), Alignment, isVolatile,
+ AlwaysInline, DstPtrInfo, SrcPtrInfo);
return SDValue();
}
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index e991cde5ffbf..c2fe6690479e 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -18,7 +18,6 @@
#include "X86MacroFusion.h"
#include "X86RegisterBankInfo.h"
#include "X86TargetMachine.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
@@ -34,6 +33,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Triple.h"
#if defined(_MSC_VER)
#include <intrin.h>
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 4c7123a1f955..4c11a4212c31 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -17,9 +17,9 @@
#include "X86ISelLowering.h"
#include "X86InstrInfo.h"
#include "X86SelectionDAGInfo.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/TargetParser/Triple.h"
#include <climits>
#include <memory>
@@ -249,6 +249,17 @@ public:
return hasBWI() && canExtendTo512DQ();
}
+ bool hasNoDomainDelay() const { return NoDomainDelay; }
+ bool hasNoDomainDelayMov() const {
+ return hasNoDomainDelay() || NoDomainDelayMov;
+ }
+ bool hasNoDomainDelayBlend() const {
+ return hasNoDomainDelay() || NoDomainDelayBlend;
+ }
+ bool hasNoDomainDelayShuffle() const {
+ return hasNoDomainDelay() || NoDomainDelayShuffle;
+ }
+
// If there are no 512-bit vectors and we prefer not to use 512-bit registers,
// disable them in the legalizer.
bool useAVX512Regs() const {
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 3e8e8af7c2cc..c096e6dd9686 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -24,7 +24,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/CodeGen/ExecutionDomainFix.h"
#include "llvm/CodeGen/GlobalISel/CSEInfo.h"
@@ -49,6 +48,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/CFGuard.h"
#include <memory>
#include <optional>
@@ -87,7 +87,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
initializeX86TileConfigPass(PR);
initializeX86FastPreTileConfigPass(PR);
initializeX86FastTileConfigPass(PR);
- initializeX86KCFIPass(PR);
+ initializeKCFIPass(PR);
initializeX86LowerTileCopyPass(PR);
initializeX86ExpandPseudoPass(PR);
initializeX86ExecutionDomainFixPass(PR);
@@ -104,6 +104,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() {
initializePseudoProbeInserterPass(PR);
initializeX86ReturnThunksPass(PR);
initializeX86DAGToDAGISelPass(PR);
+ initializeX86ArgumentStackSlotPassPass(PR);
}
static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -478,6 +479,7 @@ bool X86PassConfig::addInstSelector() {
addPass(createCleanupLocalDynamicTLSPass());
addPass(createX86GlobalBaseRegPass());
+ addPass(createX86ArgumentStackSlotPass());
return false;
}
@@ -554,7 +556,7 @@ void X86PassConfig::addPostRegAlloc() {
void X86PassConfig::addPreSched2() {
addPass(createX86ExpandPseudoPass());
- addPass(createX86KCFIPass());
+ addPass(createKCFIPass());
}
void X86PassConfig::addPreEmitPass() {
@@ -571,6 +573,8 @@ void X86PassConfig::addPreEmitPass() {
addPass(createX86FixupBWInsts());
addPass(createX86PadShortFunctions());
addPass(createX86FixupLEAs());
+ addPass(createX86FixupInstTuning());
+ addPass(createX86FixupVectorConstants());
}
addPass(createX86EvexToVexInsts());
addPass(createX86DiscriminateMemOpsPass());
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 9366c1b3d0d9..17981b3b9374 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -199,11 +199,11 @@ unsigned X86TTIImpl::getLoadStoreVecRegBitWidth(unsigned) const {
.getFixedValue();
}
-unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
+unsigned X86TTIImpl::getMaxInterleaveFactor(ElementCount VF) {
// If the loop will not be vectorized, don't interleave the loop.
// Let regular unroll to unroll the loop, which saves the overflow
// check and memory check cost.
- if (VF == 1)
+ if (VF.isScalar())
return 1;
if (ST->isAtom())
@@ -224,8 +224,9 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
const Instruction *CxtI) {
// vXi8 multiplications are always promoted to vXi16.
+ // Sub-128-bit types can be extended/packed more efficiently.
if (Opcode == Instruction::Mul && Ty->isVectorTy() &&
- Ty->getScalarSizeInBits() == 8) {
+ Ty->getPrimitiveSizeInBits() <= 64 && Ty->getScalarSizeInBits() == 8) {
Type *WideVecTy =
VectorType::getExtendedElementVectorType(cast<VectorType>(Ty));
return getCastInstrCost(Instruction::ZExt, WideVecTy, Ty,
@@ -244,7 +245,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
assert(ISD && "Invalid opcode");
if (ISD == ISD::MUL && Args.size() == 2 && LT.second.isVector() &&
- LT.second.getScalarType() == MVT::i32) {
+ (LT.second.getScalarType() == MVT::i32 ||
+ LT.second.getScalarType() == MVT::i64)) {
// Check if the operands can be represented as a smaller datatype.
bool Op1Signed = false, Op2Signed = false;
unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed);
@@ -252,10 +254,11 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize);
bool SignedMode = Op1Signed || Op2Signed;
- // If both are representable as i15 and at least one is constant,
+ // If both vXi32 are representable as i15 and at least one is constant,
// zero-extended, or sign-extended from vXi16 (or less pre-SSE41) then we
// can treat this as PMADDWD which has the same costs as a vXi16 multiply.
- if (OpMinSize <= 15 && !ST->isPMADDWDSlow()) {
+ if (OpMinSize <= 15 && !ST->isPMADDWDSlow() &&
+ LT.second.getScalarType() == MVT::i32) {
bool Op1Constant =
isa<ConstantDataVector>(Args[0]) || isa<ConstantVector>(Args[0]);
bool Op2Constant =
@@ -286,6 +289,12 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
if (!SignedMode && OpMinSize <= 16)
return LT.first * 5; // pmullw/pmulhw/pshuf
}
+
+ // If both vXi64 are representable as (unsigned) i32, then we can perform
+ // the multiple with a single PMULUDQ instruction.
+ // TODO: Add (SSE41+) PMULDQ handling for signed extensions.
+ if (!SignedMode && OpMinSize <= 32 && LT.second.getScalarType() == MVT::i64)
+ ISD = X86ISD::PMULUDQ;
}
// Vector multiply by pow2 will be simplified to shifts.
@@ -821,6 +830,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
{ ISD::SUB, MVT::v64i8, { 1, 1, 1, 1 } }, // psubb
{ ISD::SUB, MVT::v32i16, { 1, 1, 1, 1 } }, // psubw
+ { ISD::MUL, MVT::v64i8, { 5, 10,10,11 } },
{ ISD::MUL, MVT::v32i16, { 1, 5, 1, 1 } }, // pmullw
{ ISD::SUB, MVT::v32i8, { 1, 1, 1, 1 } }, // psubb
@@ -891,6 +901,8 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
{ ISD::MUL, MVT::v8i64, { 6, 9, 8, 8 } }, // 3*pmuludq/3*shift/2*add
{ ISD::MUL, MVT::i64, { 1 } }, // Skylake from http://www.agner.org/
+ { X86ISD::PMULUDQ, MVT::v8i64, { 1, 5, 1, 1 } },
+
{ ISD::FNEG, MVT::v8f64, { 1, 1, 1, 2 } }, // Skylake from http://www.agner.org/
{ ISD::FADD, MVT::v8f64, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/
{ ISD::FADD, MVT::v4f64, { 1, 4, 1, 1 } }, // Skylake from http://www.agner.org/
@@ -1083,12 +1095,16 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
{ ISD::SUB, MVT::v4i64, { 1, 1, 1, 2 } }, // psubq
{ ISD::ADD, MVT::v4i64, { 1, 1, 1, 2 } }, // paddq
- { ISD::MUL, MVT::v16i16, { 2, 5, 1, 1 } }, // pmullw
+ { ISD::MUL, MVT::v16i8, { 5, 18, 6,12 } }, // extend/pmullw/pack
+ { ISD::MUL, MVT::v32i8, { 6, 11,10,19 } }, // unpack/pmullw
+ { ISD::MUL, MVT::v16i16, { 2, 5, 1, 2 } }, // pmullw
{ ISD::MUL, MVT::v8i32, { 4, 10, 1, 2 } }, // pmulld
{ ISD::MUL, MVT::v4i32, { 2, 10, 1, 2 } }, // pmulld
{ ISD::MUL, MVT::v4i64, { 6, 10, 8,13 } }, // 3*pmuludq/3*shift/2*add
{ ISD::MUL, MVT::v2i64, { 6, 10, 8, 8 } }, // 3*pmuludq/3*shift/2*add
+ { X86ISD::PMULUDQ, MVT::v4i64, { 1, 5, 1, 1 } },
+
{ ISD::FNEG, MVT::v4f64, { 1, 1, 1, 2 } }, // vxorpd
{ ISD::FNEG, MVT::v8f32, { 1, 1, 1, 2 } }, // vxorps
@@ -1131,6 +1147,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
// We don't have to scalarize unsupported ops. We can issue two half-sized
// operations and we only need to extract the upper YMM half.
// Two ops + 1 extract + 1 insert = 4.
+ { ISD::MUL, MVT::v32i8, { 12, 13, 22, 23 } }, // unpack/pmullw + split
{ ISD::MUL, MVT::v16i16, { 4, 8, 5, 6 } }, // pmullw + split
{ ISD::MUL, MVT::v8i32, { 5, 8, 5, 10 } }, // pmulld + split
{ ISD::MUL, MVT::v4i32, { 2, 5, 1, 3 } }, // pmulld
@@ -1270,6 +1287,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
{ ISD::SRA, MVT::v4i32, { 16, 17,15,19 } }, // Shift each lane + blend.
{ ISD::SRA, MVT::v2i64, { 8, 17, 5, 7 } }, // splat+shuffle sequence.
+ { ISD::MUL, MVT::v16i8, { 5, 18,10,12 } }, // 2*unpack/2*pmullw/2*and/pack
{ ISD::MUL, MVT::v4i32, { 2, 11, 1, 1 } } // pmulld (Nehalem from agner.org)
};
@@ -1314,9 +1332,12 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
{ ISD::ADD, MVT::v2i64, { 1, 2, 1, 2 } }, // paddq
{ ISD::SUB, MVT::v2i64, { 1, 2, 1, 2 } }, // psubq
+ { ISD::MUL, MVT::v16i8, { 5, 18,12,12 } }, // 2*unpack/2*pmullw/2*and/pack
{ ISD::MUL, MVT::v8i16, { 1, 5, 1, 1 } }, // pmullw
{ ISD::MUL, MVT::v4i32, { 6, 8, 7, 7 } }, // 3*pmuludq/4*shuffle
- { ISD::MUL, MVT::v2i64, { 8, 10, 8, 8 } }, // 3*pmuludq/3*shift/2*add
+ { ISD::MUL, MVT::v2i64, { 7, 10,10,10 } }, // 3*pmuludq/3*shift/2*add
+
+ { X86ISD::PMULUDQ, MVT::v2i64, { 1, 5, 1, 1 } },
{ ISD::FDIV, MVT::f32, { 23, 23, 1, 1 } }, // Pentium IV from http://www.agner.org/
{ ISD::FDIV, MVT::v4f32, { 39, 39, 1, 1 } }, // Pentium IV from http://www.agner.org/
@@ -1370,7 +1391,7 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
static const CostKindTblEntry X64CostTbl[] = { // 64-bit targets
{ ISD::ADD, MVT::i64, { 1 } }, // Core (Merom) from http://www.agner.org/
{ ISD::SUB, MVT::i64, { 1 } }, // Core (Merom) from http://www.agner.org/
- { ISD::MUL, MVT::i64, { 2 } }, // Nehalem from http://www.agner.org/
+ { ISD::MUL, MVT::i64, { 2, 6, 1, 2 } },
};
if (ST->is64Bit())
@@ -1387,6 +1408,10 @@ InstructionCost X86TTIImpl::getArithmeticInstrCost(
{ ISD::SUB, MVT::i16, { 1 } }, // Pentium III from http://www.agner.org/
{ ISD::SUB, MVT::i32, { 1 } }, // Pentium III from http://www.agner.org/
+ { ISD::MUL, MVT::i8, { 3, 4, 1, 1 } },
+ { ISD::MUL, MVT::i16, { 2, 4, 1, 1 } },
+ { ISD::MUL, MVT::i32, { 1, 4, 1, 1 } },
+
{ ISD::FNEG, MVT::f64, { 2, 2, 1, 3 } }, // (x87)
{ ISD::FADD, MVT::f64, { 2, 3, 1, 1 } }, // (x87)
{ ISD::FSUB, MVT::f64, { 2, 3, 1, 1 } }, // (x87)
@@ -1601,7 +1626,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
LegalVT.getVectorNumElements() * std::max(NumOfSrcs, E);
unsigned NumOfSrcRegs = NormalizedVF / LegalVT.getVectorNumElements();
unsigned NumOfDestRegs = NormalizedVF / LegalVT.getVectorNumElements();
- SmallVector<int> NormalizedMask(NormalizedVF, UndefMaskElem);
+ SmallVector<int> NormalizedMask(NormalizedVF, PoisonMaskElem);
copy(Mask, NormalizedMask.begin());
unsigned PrevSrcReg = 0;
ArrayRef<int> PrevRegMask;
@@ -1623,7 +1648,7 @@ InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
return;
}
if (SrcReg != DestReg &&
- any_of(RegMask, [](int I) { return I != UndefMaskElem; })) {
+ any_of(RegMask, [](int I) { return I != PoisonMaskElem; })) {
// Just a copy of the source register.
Cost += TTI::TCC_Basic;
}
@@ -3388,13 +3413,27 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
static const CostKindTblEntry AVX512BWCostTbl[] = {
{ ISD::ABS, MVT::v32i16, { 1, 1, 1, 1 } },
{ ISD::ABS, MVT::v64i8, { 1, 1, 1, 1 } },
- { ISD::BITREVERSE, MVT::v8i64, { 3 } },
- { ISD::BITREVERSE, MVT::v16i32, { 3 } },
- { ISD::BITREVERSE, MVT::v32i16, { 3 } },
- { ISD::BITREVERSE, MVT::v64i8, { 2 } },
- { ISD::BSWAP, MVT::v8i64, { 1 } },
- { ISD::BSWAP, MVT::v16i32, { 1 } },
- { ISD::BSWAP, MVT::v32i16, { 1 } },
+ { ISD::BITREVERSE, MVT::v2i64, { 3, 10, 10, 11 } },
+ { ISD::BITREVERSE, MVT::v4i64, { 3, 11, 10, 11 } },
+ { ISD::BITREVERSE, MVT::v8i64, { 3, 12, 10, 14 } },
+ { ISD::BITREVERSE, MVT::v4i32, { 3, 10, 10, 11 } },
+ { ISD::BITREVERSE, MVT::v8i32, { 3, 11, 10, 11 } },
+ { ISD::BITREVERSE, MVT::v16i32, { 3, 12, 10, 14 } },
+ { ISD::BITREVERSE, MVT::v8i16, { 3, 10, 10, 11 } },
+ { ISD::BITREVERSE, MVT::v16i16, { 3, 11, 10, 11 } },
+ { ISD::BITREVERSE, MVT::v32i16, { 3, 12, 10, 14 } },
+ { ISD::BITREVERSE, MVT::v16i8, { 2, 5, 9, 9 } },
+ { ISD::BITREVERSE, MVT::v32i8, { 2, 5, 9, 9 } },
+ { ISD::BITREVERSE, MVT::v64i8, { 2, 5, 9, 12 } },
+ { ISD::BSWAP, MVT::v2i64, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v4i64, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i64, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v4i32, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i32, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v16i32, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i16, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v16i16, { 1, 1, 1, 2 } },
+ { ISD::BSWAP, MVT::v32i16, { 1, 1, 1, 2 } },
{ ISD::CTLZ, MVT::v8i64, { 8, 22, 23, 23 } },
{ ISD::CTLZ, MVT::v16i32, { 8, 23, 25, 25 } },
{ ISD::CTLZ, MVT::v32i16, { 4, 15, 15, 16 } },
@@ -3456,13 +3495,13 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ABS, MVT::v16i16, { 1, 1, 1, 1 } },
{ ISD::ABS, MVT::v64i8, { 2, 7, 4, 4 } },
{ ISD::ABS, MVT::v32i8, { 1, 1, 1, 1 } },
- { ISD::BITREVERSE, MVT::v8i64, { 36 } },
- { ISD::BITREVERSE, MVT::v16i32, { 24 } },
- { ISD::BITREVERSE, MVT::v32i16, { 10 } },
- { ISD::BITREVERSE, MVT::v64i8, { 10 } },
- { ISD::BSWAP, MVT::v8i64, { 4 } },
- { ISD::BSWAP, MVT::v16i32, { 4 } },
- { ISD::BSWAP, MVT::v32i16, { 4 } },
+ { ISD::BITREVERSE, MVT::v8i64, { 9, 13, 20, 20 } },
+ { ISD::BITREVERSE, MVT::v16i32, { 9, 13, 20, 20 } },
+ { ISD::BITREVERSE, MVT::v32i16, { 9, 13, 20, 20 } },
+ { ISD::BITREVERSE, MVT::v64i8, { 6, 11, 17, 17 } },
+ { ISD::BSWAP, MVT::v8i64, { 4, 7, 5, 5 } },
+ { ISD::BSWAP, MVT::v16i32, { 4, 7, 5, 5 } },
+ { ISD::BSWAP, MVT::v32i16, { 4, 7, 5, 5 } },
{ ISD::CTLZ, MVT::v8i64, { 10, 28, 32, 32 } },
{ ISD::CTLZ, MVT::v16i32, { 12, 30, 38, 38 } },
{ ISD::CTLZ, MVT::v32i16, { 8, 15, 29, 29 } },
@@ -3527,36 +3566,36 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::UADDSAT, MVT::v64i8, { 2 } },
{ ISD::USUBSAT, MVT::v32i16, { 2 } },
{ ISD::USUBSAT, MVT::v64i8, { 2 } },
- { ISD::FMAXNUM, MVT::f32, { 2 } },
- { ISD::FMAXNUM, MVT::v4f32, { 2 } },
- { ISD::FMAXNUM, MVT::v8f32, { 2 } },
- { ISD::FMAXNUM, MVT::v16f32, { 2 } },
- { ISD::FMAXNUM, MVT::f64, { 2 } },
- { ISD::FMAXNUM, MVT::v2f64, { 2 } },
- { ISD::FMAXNUM, MVT::v4f64, { 2 } },
- { ISD::FMAXNUM, MVT::v8f64, { 2 } },
- { ISD::FSQRT, MVT::f32, { 3, 12, 1, 1 } }, // Skylake from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, { 3, 12, 1, 1 } }, // Skylake from http://www.agner.org/
- { ISD::FSQRT, MVT::v8f32, { 6, 12, 1, 1 } }, // Skylake from http://www.agner.org/
- { ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } }, // Skylake from http://www.agner.org/
- { ISD::FSQRT, MVT::f64, { 6, 18, 1, 1 } }, // Skylake from http://www.agner.org/
- { ISD::FSQRT, MVT::v2f64, { 6, 18, 1, 1 } }, // Skylake from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } }, // Skylake from http://www.agner.org/
- { ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } }, // Skylake from http://www.agner.org/
+ { ISD::FMAXNUM, MVT::f32, { 2, 2, 3, 3 } },
+ { ISD::FMAXNUM, MVT::v4f32, { 1, 1, 3, 3 } },
+ { ISD::FMAXNUM, MVT::v8f32, { 2, 2, 3, 3 } },
+ { ISD::FMAXNUM, MVT::v16f32, { 4, 4, 3, 3 } },
+ { ISD::FMAXNUM, MVT::f64, { 2, 2, 3, 3 } },
+ { ISD::FMAXNUM, MVT::v2f64, { 1, 1, 3, 3 } },
+ { ISD::FMAXNUM, MVT::v4f64, { 2, 2, 3, 3 } },
+ { ISD::FMAXNUM, MVT::v8f64, { 3, 3, 3, 3 } },
+ { ISD::FSQRT, MVT::f32, { 3, 12, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, { 3, 12, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::v8f32, { 6, 12, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::v16f32, { 12, 20, 1, 3 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::f64, { 6, 18, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::v2f64, { 6, 18, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f64, { 12, 18, 1, 1 } }, // Skylake from http://www.agner.org/
+ { ISD::FSQRT, MVT::v8f64, { 24, 32, 1, 3 } }, // Skylake from http://www.agner.org/
};
static const CostKindTblEntry XOPCostTbl[] = {
- { ISD::BITREVERSE, MVT::v4i64, { 4 } },
- { ISD::BITREVERSE, MVT::v8i32, { 4 } },
- { ISD::BITREVERSE, MVT::v16i16, { 4 } },
- { ISD::BITREVERSE, MVT::v32i8, { 4 } },
- { ISD::BITREVERSE, MVT::v2i64, { 1 } },
- { ISD::BITREVERSE, MVT::v4i32, { 1 } },
- { ISD::BITREVERSE, MVT::v8i16, { 1 } },
- { ISD::BITREVERSE, MVT::v16i8, { 1 } },
- { ISD::BITREVERSE, MVT::i64, { 3 } },
- { ISD::BITREVERSE, MVT::i32, { 3 } },
- { ISD::BITREVERSE, MVT::i16, { 3 } },
- { ISD::BITREVERSE, MVT::i8, { 3 } },
+ { ISD::BITREVERSE, MVT::v4i64, { 3, 6, 5, 6 } },
+ { ISD::BITREVERSE, MVT::v8i32, { 3, 6, 5, 6 } },
+ { ISD::BITREVERSE, MVT::v16i16, { 3, 6, 5, 6 } },
+ { ISD::BITREVERSE, MVT::v32i8, { 3, 6, 5, 6 } },
+ { ISD::BITREVERSE, MVT::v2i64, { 2, 7, 1, 1 } },
+ { ISD::BITREVERSE, MVT::v4i32, { 2, 7, 1, 1 } },
+ { ISD::BITREVERSE, MVT::v8i16, { 2, 7, 1, 1 } },
+ { ISD::BITREVERSE, MVT::v16i8, { 2, 7, 1, 1 } },
+ { ISD::BITREVERSE, MVT::i64, { 2, 2, 3, 4 } },
+ { ISD::BITREVERSE, MVT::i32, { 2, 2, 3, 4 } },
+ { ISD::BITREVERSE, MVT::i16, { 2, 2, 3, 4 } },
+ { ISD::BITREVERSE, MVT::i8, { 2, 2, 3, 4 } },
// XOP: ROTL = VPROT(X,Y), ROTR = VPROT(X,SUB(0,Y))
{ ISD::ROTL, MVT::v4i64, { 4, 7, 5, 6 } },
{ ISD::ROTL, MVT::v8i32, { 4, 7, 5, 6 } },
@@ -3584,17 +3623,20 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ABS, MVT::v16i16, { 1, 1, 1, 2 } },
{ ISD::ABS, MVT::v16i8, { 1, 1, 1, 1 } },
{ ISD::ABS, MVT::v32i8, { 1, 1, 1, 2 } },
- { ISD::BITREVERSE, MVT::v2i64, { 3 } },
- { ISD::BITREVERSE, MVT::v4i64, { 3 } },
- { ISD::BITREVERSE, MVT::v4i32, { 3 } },
- { ISD::BITREVERSE, MVT::v8i32, { 3 } },
- { ISD::BITREVERSE, MVT::v8i16, { 3 } },
- { ISD::BITREVERSE, MVT::v16i16, { 3 } },
- { ISD::BITREVERSE, MVT::v16i8, { 3 } },
- { ISD::BITREVERSE, MVT::v32i8, { 3 } },
- { ISD::BSWAP, MVT::v4i64, { 1 } },
- { ISD::BSWAP, MVT::v8i32, { 1 } },
- { ISD::BSWAP, MVT::v16i16, { 1 } },
+ { ISD::BITREVERSE, MVT::v2i64, { 3, 11, 10, 11 } },
+ { ISD::BITREVERSE, MVT::v4i64, { 5, 11, 10, 17 } },
+ { ISD::BITREVERSE, MVT::v4i32, { 3, 11, 10, 11 } },
+ { ISD::BITREVERSE, MVT::v8i32, { 5, 11, 10, 17 } },
+ { ISD::BITREVERSE, MVT::v8i16, { 3, 11, 10, 11 } },
+ { ISD::BITREVERSE, MVT::v16i16, { 5, 11, 10, 17 } },
+ { ISD::BITREVERSE, MVT::v16i8, { 3, 6, 9, 9 } },
+ { ISD::BITREVERSE, MVT::v32i8, { 4, 5, 9, 15 } },
+ { ISD::BSWAP, MVT::v2i64, { 1, 2, 1, 2 } },
+ { ISD::BSWAP, MVT::v4i64, { 1, 3, 1, 2 } },
+ { ISD::BSWAP, MVT::v4i32, { 1, 2, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i32, { 1, 3, 1, 2 } },
+ { ISD::BSWAP, MVT::v8i16, { 1, 2, 1, 2 } },
+ { ISD::BSWAP, MVT::v16i16, { 1, 3, 1, 2 } },
{ ISD::CTLZ, MVT::v2i64, { 7, 18, 24, 25 } },
{ ISD::CTLZ, MVT::v4i64, { 14, 18, 24, 44 } },
{ ISD::CTLZ, MVT::v4i32, { 5, 16, 19, 20 } },
@@ -3649,27 +3691,38 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::USUBSAT, MVT::v16i16, { 1 } },
{ ISD::USUBSAT, MVT::v32i8, { 1 } },
{ ISD::USUBSAT, MVT::v8i32, { 2 } }, // pmaxud + psubd
- { ISD::FMAXNUM, MVT::v8f32, { 3 } }, // MAXPS + CMPUNORDPS + BLENDVPS
- { ISD::FMAXNUM, MVT::v4f64, { 3 } }, // MAXPD + CMPUNORDPD + BLENDVPD
- { ISD::FSQRT, MVT::f32, { 7, 15, 1, 1 } }, // vsqrtss
- { ISD::FSQRT, MVT::v4f32, { 7, 15, 1, 1 } }, // vsqrtps
- { ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } }, // vsqrtps
- { ISD::FSQRT, MVT::f64, { 14, 21, 1, 1 } }, // vsqrtsd
- { ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } }, // vsqrtpd
- { ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } }, // vsqrtpd
+ { ISD::FMAXNUM, MVT::f32, { 2, 7, 3, 5 } }, // MAXSS + CMPUNORDSS + BLENDVPS
+ { ISD::FMAXNUM, MVT::v4f32, { 2, 7, 3, 5 } }, // MAXPS + CMPUNORDPS + BLENDVPS
+ { ISD::FMAXNUM, MVT::v8f32, { 3, 7, 3, 6 } }, // MAXPS + CMPUNORDPS + BLENDVPS
+ { ISD::FMAXNUM, MVT::f64, { 2, 7, 3, 5 } }, // MAXSD + CMPUNORDSD + BLENDVPD
+ { ISD::FMAXNUM, MVT::v2f64, { 2, 7, 3, 5 } }, // MAXPD + CMPUNORDPD + BLENDVPD
+ { ISD::FMAXNUM, MVT::v4f64, { 3, 7, 3, 6 } }, // MAXPD + CMPUNORDPD + BLENDVPD
+ { ISD::FSQRT, MVT::f32, { 7, 15, 1, 1 } }, // vsqrtss
+ { ISD::FSQRT, MVT::v4f32, { 7, 15, 1, 1 } }, // vsqrtps
+ { ISD::FSQRT, MVT::v8f32, { 14, 21, 1, 3 } }, // vsqrtps
+ { ISD::FSQRT, MVT::f64, { 14, 21, 1, 1 } }, // vsqrtsd
+ { ISD::FSQRT, MVT::v2f64, { 14, 21, 1, 1 } }, // vsqrtpd
+ { ISD::FSQRT, MVT::v4f64, { 28, 35, 1, 3 } }, // vsqrtpd
};
static const CostKindTblEntry AVX1CostTbl[] = {
{ ISD::ABS, MVT::v4i64, { 6, 8, 6, 12 } }, // VBLENDVPD(X,VPSUBQ(0,X),X)
{ ISD::ABS, MVT::v8i32, { 3, 6, 4, 5 } },
{ ISD::ABS, MVT::v16i16, { 3, 6, 4, 5 } },
{ ISD::ABS, MVT::v32i8, { 3, 6, 4, 5 } },
- { ISD::BITREVERSE, MVT::v4i64, { 12 } }, // 2 x 128-bit Op + extract/insert
- { ISD::BITREVERSE, MVT::v8i32, { 12 } }, // 2 x 128-bit Op + extract/insert
- { ISD::BITREVERSE, MVT::v16i16, { 12 } }, // 2 x 128-bit Op + extract/insert
- { ISD::BITREVERSE, MVT::v32i8, { 12 } }, // 2 x 128-bit Op + extract/insert
- { ISD::BSWAP, MVT::v4i64, { 4 } },
- { ISD::BSWAP, MVT::v8i32, { 4 } },
- { ISD::BSWAP, MVT::v16i16, { 4 } },
+ { ISD::BITREVERSE, MVT::v4i64, { 17, 20, 20, 33 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::BITREVERSE, MVT::v2i64, { 8, 13, 10, 16 } },
+ { ISD::BITREVERSE, MVT::v8i32, { 17, 20, 20, 33 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::BITREVERSE, MVT::v4i32, { 8, 13, 10, 16 } },
+ { ISD::BITREVERSE, MVT::v16i16, { 17, 20, 20, 33 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::BITREVERSE, MVT::v8i16, { 8, 13, 10, 16 } },
+ { ISD::BITREVERSE, MVT::v32i8, { 13, 15, 17, 26 } }, // 2 x 128-bit Op + extract/insert
+ { ISD::BITREVERSE, MVT::v16i8, { 7, 7, 9, 13 } },
+ { ISD::BSWAP, MVT::v4i64, { 5, 7, 5, 10 } },
+ { ISD::BSWAP, MVT::v2i64, { 2, 3, 1, 3 } },
+ { ISD::BSWAP, MVT::v8i32, { 5, 7, 5, 10 } },
+ { ISD::BSWAP, MVT::v4i32, { 2, 3, 1, 3 } },
+ { ISD::BSWAP, MVT::v16i16, { 5, 6, 5, 10 } },
+ { ISD::BSWAP, MVT::v8i16, { 2, 2, 1, 3 } },
{ ISD::CTLZ, MVT::v4i64, { 29, 33, 49, 58 } }, // 2 x 128-bit Op + extract/insert
{ ISD::CTLZ, MVT::v2i64, { 14, 24, 24, 28 } },
{ ISD::CTLZ, MVT::v8i32, { 24, 28, 39, 48 } }, // 2 x 128-bit Op + extract/insert
@@ -3724,18 +3777,18 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::USUBSAT, MVT::v16i16, { 4 } }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v32i8, { 4 } }, // 2 x 128-bit Op + extract/insert
{ ISD::USUBSAT, MVT::v8i32, { 6 } }, // 2 x 128-bit Op + extract/insert
- { ISD::FMAXNUM, MVT::f32, { 3 } }, // MAXSS + CMPUNORDSS + BLENDVPS
- { ISD::FMAXNUM, MVT::v4f32, { 3 } }, // MAXPS + CMPUNORDPS + BLENDVPS
- { ISD::FMAXNUM, MVT::v8f32, { 5 } }, // MAXPS + CMPUNORDPS + BLENDVPS + ?
- { ISD::FMAXNUM, MVT::f64, { 3 } }, // MAXSD + CMPUNORDSD + BLENDVPD
- { ISD::FMAXNUM, MVT::v2f64, { 3 } }, // MAXPD + CMPUNORDPD + BLENDVPD
- { ISD::FMAXNUM, MVT::v4f64, { 5 } }, // MAXPD + CMPUNORDPD + BLENDVPD + ?
- { ISD::FSQRT, MVT::f32, { 21, 21, 1, 1 } }, // vsqrtss
- { ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } }, // vsqrtps
- { ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } }, // vsqrtps
- { ISD::FSQRT, MVT::f64, { 27, 27, 1, 1 } }, // vsqrtsd
- { ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } }, // vsqrtpd
- { ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } }, // vsqrtpd
+ { ISD::FMAXNUM, MVT::f32, { 3, 6, 3, 5 } }, // MAXSS + CMPUNORDSS + BLENDVPS
+ { ISD::FMAXNUM, MVT::v4f32, { 3, 6, 3, 5 } }, // MAXPS + CMPUNORDPS + BLENDVPS
+ { ISD::FMAXNUM, MVT::v8f32, { 5, 7, 3, 10 } }, // MAXPS + CMPUNORDPS + BLENDVPS
+ { ISD::FMAXNUM, MVT::f64, { 3, 6, 3, 5 } }, // MAXSD + CMPUNORDSD + BLENDVPD
+ { ISD::FMAXNUM, MVT::v2f64, { 3, 6, 3, 5 } }, // MAXPD + CMPUNORDPD + BLENDVPD
+ { ISD::FMAXNUM, MVT::v4f64, { 5, 7, 3, 10 } }, // MAXPD + CMPUNORDPD + BLENDVPD
+ { ISD::FSQRT, MVT::f32, { 21, 21, 1, 1 } }, // vsqrtss
+ { ISD::FSQRT, MVT::v4f32, { 21, 21, 1, 1 } }, // vsqrtps
+ { ISD::FSQRT, MVT::v8f32, { 42, 42, 1, 3 } }, // vsqrtps
+ { ISD::FSQRT, MVT::f64, { 27, 27, 1, 1 } }, // vsqrtsd
+ { ISD::FSQRT, MVT::v2f64, { 27, 27, 1, 1 } }, // vsqrtpd
+ { ISD::FSQRT, MVT::v4f64, { 54, 54, 1, 3 } }, // vsqrtpd
};
static const CostKindTblEntry GLMCostTbl[] = {
{ ISD::FSQRT, MVT::f32, { 19, 20, 1, 1 } }, // sqrtss
@@ -3752,8 +3805,12 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
static const CostKindTblEntry SSE42CostTbl[] = {
{ ISD::USUBSAT, MVT::v4i32, { 2 } }, // pmaxud + psubd
{ ISD::UADDSAT, MVT::v4i32, { 3 } }, // not + pminud + paddd
- { ISD::FSQRT, MVT::f32, { 18, 18, 1, 1 } }, // Nehalem from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } }, // Nehalem from http://www.agner.org/
+ { ISD::FMAXNUM, MVT::f32, { 5, 5, 7, 7 } }, // MAXSS + CMPUNORDSS + BLENDVPS
+ { ISD::FMAXNUM, MVT::v4f32, { 4, 4, 4, 5 } }, // MAXPS + CMPUNORDPS + BLENDVPS
+ { ISD::FMAXNUM, MVT::f64, { 5, 5, 7, 7 } }, // MAXSD + CMPUNORDSD + BLENDVPD
+ { ISD::FMAXNUM, MVT::v2f64, { 4, 4, 4, 5 } }, // MAXPD + CMPUNORDPD + BLENDVPD
+ { ISD::FSQRT, MVT::f32, { 18, 18, 1, 1 } }, // Nehalem from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, { 18, 18, 1, 1 } }, // Nehalem from http://www.agner.org/
};
static const CostKindTblEntry SSE41CostTbl[] = {
{ ISD::ABS, MVT::v2i64, { 3, 4, 3, 5 } }, // BLENDVPD(X,PSUBQ(0,X),X)
@@ -3774,13 +3831,13 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ABS, MVT::v4i32, { 1, 2, 1, 1 } },
{ ISD::ABS, MVT::v8i16, { 1, 2, 1, 1 } },
{ ISD::ABS, MVT::v16i8, { 1, 2, 1, 1 } },
- { ISD::BITREVERSE, MVT::v2i64, { 5 } },
- { ISD::BITREVERSE, MVT::v4i32, { 5 } },
- { ISD::BITREVERSE, MVT::v8i16, { 5 } },
- { ISD::BITREVERSE, MVT::v16i8, { 5 } },
- { ISD::BSWAP, MVT::v2i64, { 1 } },
- { ISD::BSWAP, MVT::v4i32, { 1 } },
- { ISD::BSWAP, MVT::v8i16, { 1 } },
+ { ISD::BITREVERSE, MVT::v2i64, { 16, 20, 11, 21 } },
+ { ISD::BITREVERSE, MVT::v4i32, { 16, 20, 11, 21 } },
+ { ISD::BITREVERSE, MVT::v8i16, { 16, 20, 11, 21 } },
+ { ISD::BITREVERSE, MVT::v16i8, { 11, 12, 10, 16 } },
+ { ISD::BSWAP, MVT::v2i64, { 5, 5, 1, 5 } },
+ { ISD::BSWAP, MVT::v4i32, { 5, 5, 1, 5 } },
+ { ISD::BSWAP, MVT::v8i16, { 5, 5, 1, 5 } },
{ ISD::CTLZ, MVT::v2i64, { 18, 28, 28, 35 } },
{ ISD::CTLZ, MVT::v4i32, { 15, 20, 22, 28 } },
{ ISD::CTLZ, MVT::v8i16, { 13, 17, 16, 22 } },
@@ -3799,13 +3856,13 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ABS, MVT::v4i32, { 1, 4, 4, 4 } },
{ ISD::ABS, MVT::v8i16, { 1, 2, 3, 3 } },
{ ISD::ABS, MVT::v16i8, { 1, 2, 3, 3 } },
- { ISD::BITREVERSE, MVT::v2i64, { 29 } },
- { ISD::BITREVERSE, MVT::v4i32, { 27 } },
- { ISD::BITREVERSE, MVT::v8i16, { 27 } },
- { ISD::BITREVERSE, MVT::v16i8, { 20 } },
- { ISD::BSWAP, MVT::v2i64, { 7 } },
- { ISD::BSWAP, MVT::v4i32, { 7 } },
- { ISD::BSWAP, MVT::v8i16, { 7 } },
+ { ISD::BITREVERSE, MVT::v2i64, { 16, 20, 32, 32 } },
+ { ISD::BITREVERSE, MVT::v4i32, { 16, 20, 30, 30 } },
+ { ISD::BITREVERSE, MVT::v8i16, { 16, 20, 25, 25 } },
+ { ISD::BITREVERSE, MVT::v16i8, { 11, 12, 21, 21 } },
+ { ISD::BSWAP, MVT::v2i64, { 5, 6, 11, 11 } },
+ { ISD::BSWAP, MVT::v4i32, { 5, 5, 9, 9 } },
+ { ISD::BSWAP, MVT::v8i16, { 5, 5, 4, 5 } },
{ ISD::CTLZ, MVT::v2i64, { 10, 45, 36, 38 } },
{ ISD::CTLZ, MVT::v4i32, { 10, 45, 38, 40 } },
{ ISD::CTLZ, MVT::v8i16, { 9, 38, 32, 34 } },
@@ -3842,16 +3899,16 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::UMIN, MVT::v16i8, { 1, 1, 1, 1 } },
{ ISD::USUBSAT, MVT::v8i16, { 1 } },
{ ISD::USUBSAT, MVT::v16i8, { 1 } },
- { ISD::FMAXNUM, MVT::f64, { 4 } },
- { ISD::FMAXNUM, MVT::v2f64, { 4 } },
- { ISD::FSQRT, MVT::f64, { 32, 32, 1, 1 } }, // Nehalem from http://www.agner.org/
- { ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } }, // Nehalem from http://www.agner.org/
+ { ISD::FMAXNUM, MVT::f64, { 5, 5, 7, 7 } },
+ { ISD::FMAXNUM, MVT::v2f64, { 4, 6, 6, 6 } },
+ { ISD::FSQRT, MVT::f64, { 32, 32, 1, 1 } }, // Nehalem from http://www.agner.org/
+ { ISD::FSQRT, MVT::v2f64, { 32, 32, 1, 1 } }, // Nehalem from http://www.agner.org/
};
static const CostKindTblEntry SSE1CostTbl[] = {
- { ISD::FMAXNUM, MVT::f32, { 4 } },
- { ISD::FMAXNUM, MVT::v4f32, { 4 } },
- { ISD::FSQRT, MVT::f32, { 28, 30, 1, 2 } }, // Pentium III from http://www.agner.org/
- { ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } }, // Pentium III from http://www.agner.org/
+ { ISD::FMAXNUM, MVT::f32, { 5, 5, 7, 7 } },
+ { ISD::FMAXNUM, MVT::v4f32, { 4, 6, 6, 6 } },
+ { ISD::FSQRT, MVT::f32, { 28, 30, 1, 2 } }, // Pentium III from http://www.agner.org/
+ { ISD::FSQRT, MVT::v4f32, { 56, 56, 1, 2 } }, // Pentium III from http://www.agner.org/
};
static const CostKindTblEntry BMI64CostTbl[] = { // 64-bit targets
{ ISD::CTTZ, MVT::i64, { 1 } },
@@ -3879,8 +3936,8 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
};
static const CostKindTblEntry X64CostTbl[] = { // 64-bit targets
{ ISD::ABS, MVT::i64, { 1, 2, 3, 4 } }, // SUB+CMOV
- { ISD::BITREVERSE, MVT::i64, { 14 } },
- { ISD::BSWAP, MVT::i64, { 1 } },
+ { ISD::BITREVERSE, MVT::i64, { 10, 12, 20, 22 } },
+ { ISD::BSWAP, MVT::i64, { 1, 2, 1, 2 } },
{ ISD::CTLZ, MVT::i64, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ_ZERO_UNDEF, MVT::i64,{ 1, 1, 1, 1 } }, // BSR+XOR
{ ISD::CTTZ, MVT::i64, { 3 } }, // TEST+BSF+CMOV/BRANCH
@@ -3901,11 +3958,11 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::ABS, MVT::i32, { 1, 2, 3, 4 } }, // SUB+XOR+SRA or SUB+CMOV
{ ISD::ABS, MVT::i16, { 2, 2, 3, 4 } }, // SUB+XOR+SRA or SUB+CMOV
{ ISD::ABS, MVT::i8, { 2, 4, 4, 4 } }, // SUB+XOR+SRA
- { ISD::BITREVERSE, MVT::i32, { 14 } },
- { ISD::BITREVERSE, MVT::i16, { 14 } },
- { ISD::BITREVERSE, MVT::i8, { 11 } },
- { ISD::BSWAP, MVT::i32, { 1 } },
- { ISD::BSWAP, MVT::i16, { 1 } }, // ROL
+ { ISD::BITREVERSE, MVT::i32, { 9, 12, 17, 19 } },
+ { ISD::BITREVERSE, MVT::i16, { 9, 12, 17, 19 } },
+ { ISD::BITREVERSE, MVT::i8, { 7, 9, 13, 14 } },
+ { ISD::BSWAP, MVT::i32, { 1, 1, 1, 1 } },
+ { ISD::BSWAP, MVT::i16, { 1, 2, 1, 2 } }, // ROL
{ ISD::CTLZ, MVT::i32, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i16, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i8, { 4 } }, // BSR+XOR or BSR+XOR+CMOV
@@ -4345,7 +4402,9 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
// Floating point scalars are already located in index #0.
// Many insertions to #0 can fold away for scalar fp-ops, so let's assume
// true for all.
- if (ScalarType->isFloatingPointTy())
+ if (ScalarType->isFloatingPointTy() &&
+ (Opcode != Instruction::InsertElement || !Op0 ||
+ isa<UndefValue>(Op0)))
return RegisterFileMoveCost;
if (Opcode == Instruction::InsertElement &&
@@ -4396,11 +4455,6 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
return ShuffleCost + IntOrFpCost + RegisterFileMoveCost;
}
- // Add to the base cost if we know that the extracted element of a vector is
- // destined to be moved to and used in the integer register file.
- if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy())
- RegisterFileMoveCost += 1;
-
return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1) +
RegisterFileMoveCost;
}
@@ -4468,7 +4522,7 @@ X86TTIImpl::getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
for (unsigned I = 0; I != NumLanesTotal; ++I) {
APInt LaneEltMask = WidenedDemandedElts.extractBits(
NumEltsPerLane, NumEltsPerLane * I);
- if (LaneEltMask.isNullValue())
+ if (LaneEltMask.isZero())
continue;
// FIXME: we don't need to extract if all non-demanded elements
// are legalization-inserted padding.
@@ -4502,7 +4556,7 @@ X86TTIImpl::getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
// series of UNPCK followed by CONCAT_VECTORS - all of these can be
// considered cheap.
if (Ty->isIntOrIntVectorTy())
- Cost += DemandedElts.countPopulation();
+ Cost += DemandedElts.popcount();
// Get the smaller of the legalized or original pow2-extended number of
// vector elements, which represents the number of unpacks we'll end up
@@ -4549,7 +4603,7 @@ X86TTIImpl::getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
for (unsigned I = 0; I != NumLanesTotal; ++I) {
APInt LaneEltMask = WidenedDemandedElts.extractBits(
NumEltsPerLane, I * NumEltsPerLane);
- if (LaneEltMask.isNullValue())
+ if (LaneEltMask.isZero())
continue;
Cost += getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt,
CostKind, I * NumEltsPerLane, LaneTy);
@@ -4667,7 +4721,7 @@ X86TTIImpl::getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
// then we won't need to do that shuffle, so adjust the cost accordingly.
APInt DemandedDstVectors = APIntOps::ScaleBitMask(
DemandedDstElts.zext(NumDstVectors * NumEltsPerDstVec), NumDstVectors);
- unsigned NumDstVectorsDemanded = DemandedDstVectors.countPopulation();
+ unsigned NumDstVectorsDemanded = DemandedDstVectors.popcount();
InstructionCost SingleShuffleCost = getShuffleCost(
TTI::SK_PermuteSingleSrc, SingleDstVecTy, /*Mask=*/std::nullopt, CostKind,
@@ -4813,7 +4867,7 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
APInt DemandedElts =
APInt::getBitsSet(CoalescedVecTy->getNumElements(),
CoalescedVecEltIdx, CoalescedVecEltIdx + 1);
- assert(DemandedElts.countPopulation() == 1 && "Inserting single value");
+ assert(DemandedElts.popcount() == 1 && "Inserting single value");
Cost += getScalarizationOverhead(CoalescedVecTy, DemandedElts, IsLoad,
!IsLoad, CostKind);
}
@@ -4821,8 +4875,12 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
// This isn't exactly right. We're using slow unaligned 32-byte accesses
// as a proxy for a double-pumped AVX memory interface such as on
// Sandybridge.
+ // Sub-32-bit loads/stores will be slower either with PINSR*/PEXTR* or
+ // will be scalarized.
if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow())
Cost += 2;
+ else if (CurrOpSizeBytes < 4)
+ Cost += 2;
else
Cost += 1;
@@ -4899,6 +4957,26 @@ X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment,
return Cost + LT.first;
}
+InstructionCost
+X86TTIImpl::getPointersChainCost(ArrayRef<const Value *> Ptrs,
+ const Value *Base,
+ const TTI::PointersChainInfo &Info,
+ Type *AccessTy, TTI::TargetCostKind CostKind) {
+ if (Info.isSameBase() && Info.isKnownStride()) {
+ // If all the pointers have known stride all the differences are translated
+ // into constants. X86 memory addressing allows encoding it into
+ // displacement. So we just need to take the base GEP cost.
+ if (const auto *BaseGEP = dyn_cast<GetElementPtrInst>(Base)) {
+ SmallVector<const Value *> Indices(BaseGEP->indices());
+ return getGEPCost(BaseGEP->getSourceElementType(),
+ BaseGEP->getPointerOperand(), Indices, nullptr,
+ CostKind);
+ }
+ return TTI::TCC_Free;
+ }
+ return BaseT::getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
+}
+
InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty,
ScalarEvolution *SE,
const SCEV *Ptr) {
@@ -4937,12 +5015,12 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
// and make it as the cost.
- static const CostTblEntry SLMCostTblNoPairWise[] = {
+ static const CostTblEntry SLMCostTbl[] = {
{ ISD::FADD, MVT::v2f64, 3 },
{ ISD::ADD, MVT::v2i64, 5 },
};
- static const CostTblEntry SSE2CostTblNoPairWise[] = {
+ static const CostTblEntry SSE2CostTbl[] = {
{ ISD::FADD, MVT::v2f64, 2 },
{ ISD::FADD, MVT::v2f32, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
@@ -4958,7 +5036,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
{ ISD::ADD, MVT::v16i8, 3 },
};
- static const CostTblEntry AVX1CostTblNoPairWise[] = {
+ static const CostTblEntry AVX1CostTbl[] = {
{ ISD::FADD, MVT::v4f64, 3 },
{ ISD::FADD, MVT::v4f32, 3 },
{ ISD::FADD, MVT::v8f32, 4 },
@@ -4979,15 +5057,15 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
if (VT.isSimple()) {
MVT MTy = VT.getSimpleVT();
if (ST->useSLMArithCosts())
- if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
return Entry->Cost;
if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
return Entry->Cost;
if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
return Entry->Cost;
}
@@ -5018,15 +5096,15 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
}
if (ST->useSLMArithCosts())
- if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
return ArithmeticCost + Entry->Cost;
if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
return ArithmeticCost + Entry->Cost;
if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
return ArithmeticCost + Entry->Cost;
// FIXME: These assume a naive kshift+binop lowering, which is probably
@@ -5178,137 +5256,16 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
CostKind, 0, nullptr, nullptr);
}
-InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy,
- bool IsUnsigned) {
- std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
-
- MVT MTy = LT.second;
-
- int ISD;
- if (Ty->isIntOrIntVectorTy()) {
- ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN;
- } else {
- assert(Ty->isFPOrFPVectorTy() &&
- "Expected float point or integer vector type.");
- ISD = ISD::FMINNUM;
- }
-
- static const CostTblEntry SSE1CostTbl[] = {
- {ISD::FMINNUM, MVT::v4f32, 1},
- };
-
- static const CostTblEntry SSE2CostTbl[] = {
- {ISD::FMINNUM, MVT::v2f64, 1},
- {ISD::SMIN, MVT::v8i16, 1},
- {ISD::UMIN, MVT::v16i8, 1},
- };
-
- static const CostTblEntry SSE41CostTbl[] = {
- {ISD::SMIN, MVT::v4i32, 1},
- {ISD::UMIN, MVT::v4i32, 1},
- {ISD::UMIN, MVT::v8i16, 1},
- {ISD::SMIN, MVT::v16i8, 1},
- };
-
- static const CostTblEntry SSE42CostTbl[] = {
- {ISD::UMIN, MVT::v2i64, 3}, // xor+pcmpgtq+blendvpd
- };
-
- static const CostTblEntry AVX1CostTbl[] = {
- {ISD::FMINNUM, MVT::v8f32, 1},
- {ISD::FMINNUM, MVT::v4f64, 1},
- {ISD::SMIN, MVT::v8i32, 3},
- {ISD::UMIN, MVT::v8i32, 3},
- {ISD::SMIN, MVT::v16i16, 3},
- {ISD::UMIN, MVT::v16i16, 3},
- {ISD::SMIN, MVT::v32i8, 3},
- {ISD::UMIN, MVT::v32i8, 3},
- };
-
- static const CostTblEntry AVX2CostTbl[] = {
- {ISD::SMIN, MVT::v8i32, 1},
- {ISD::UMIN, MVT::v8i32, 1},
- {ISD::SMIN, MVT::v16i16, 1},
- {ISD::UMIN, MVT::v16i16, 1},
- {ISD::SMIN, MVT::v32i8, 1},
- {ISD::UMIN, MVT::v32i8, 1},
- };
-
- static const CostTblEntry AVX512CostTbl[] = {
- {ISD::FMINNUM, MVT::v16f32, 1},
- {ISD::FMINNUM, MVT::v8f64, 1},
- {ISD::SMIN, MVT::v2i64, 1},
- {ISD::UMIN, MVT::v2i64, 1},
- {ISD::SMIN, MVT::v4i64, 1},
- {ISD::UMIN, MVT::v4i64, 1},
- {ISD::SMIN, MVT::v8i64, 1},
- {ISD::UMIN, MVT::v8i64, 1},
- {ISD::SMIN, MVT::v16i32, 1},
- {ISD::UMIN, MVT::v16i32, 1},
- };
-
- static const CostTblEntry AVX512BWCostTbl[] = {
- {ISD::SMIN, MVT::v32i16, 1},
- {ISD::UMIN, MVT::v32i16, 1},
- {ISD::SMIN, MVT::v64i8, 1},
- {ISD::UMIN, MVT::v64i8, 1},
- };
-
- // If we have a native MIN/MAX instruction for this type, use it.
- if (ST->hasBWI())
- if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
-
- if (ST->hasAVX512())
- if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
-
- if (ST->hasAVX2())
- if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
-
- if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
-
- if (ST->hasSSE42())
- if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
-
- if (ST->hasSSE41())
- if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
-
- if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
-
- if (ST->hasSSE1())
- if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
- return LT.first * Entry->Cost;
-
- unsigned CmpOpcode;
- if (Ty->isFPOrFPVectorTy()) {
- CmpOpcode = Instruction::FCmp;
- } else {
- assert(Ty->isIntOrIntVectorTy() &&
- "expecting floating point or integer type for min/max reduction");
- CmpOpcode = Instruction::ICmp;
- }
-
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- // Otherwise fall back to cmp+select.
- InstructionCost Result =
- getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CmpInst::BAD_ICMP_PREDICATE,
- CostKind) +
- getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
- CmpInst::BAD_ICMP_PREDICATE, CostKind);
- return Result;
+InstructionCost X86TTIImpl::getMinMaxCost(Intrinsic::ID IID, Type *Ty,
+ TTI::TargetCostKind CostKind,
+ FastMathFlags FMF) {
+ IntrinsicCostAttributes ICA(IID, Ty, {Ty, Ty}, FMF);
+ return getIntrinsicInstrCost(ICA, CostKind);
}
InstructionCost
-X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
- bool IsUnsigned,
+X86TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *ValTy,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind) {
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
@@ -5316,23 +5273,26 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
int ISD;
if (ValTy->isIntOrIntVectorTy()) {
- ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN;
+ ISD = (IID == Intrinsic::umin || IID == Intrinsic::umax) ? ISD::UMIN
+ : ISD::SMIN;
} else {
assert(ValTy->isFPOrFPVectorTy() &&
"Expected float point or integer vector type.");
- ISD = ISD::FMINNUM;
+ ISD = (IID == Intrinsic::minnum || IID == Intrinsic::maxnum)
+ ? ISD::FMINNUM
+ : ISD::FMINIMUM;
}
// We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
// and make it as the cost.
- static const CostTblEntry SSE2CostTblNoPairWise[] = {
+ static const CostTblEntry SSE2CostTbl[] = {
{ISD::UMIN, MVT::v2i16, 5}, // need pxors to use pminsw/pmaxsw
{ISD::UMIN, MVT::v4i16, 7}, // need pxors to use pminsw/pmaxsw
{ISD::UMIN, MVT::v8i16, 9}, // need pxors to use pminsw/pmaxsw
};
- static const CostTblEntry SSE41CostTblNoPairWise[] = {
+ static const CostTblEntry SSE41CostTbl[] = {
{ISD::SMIN, MVT::v2i16, 3}, // same as sse2
{ISD::SMIN, MVT::v4i16, 5}, // same as sse2
{ISD::UMIN, MVT::v2i16, 5}, // same as sse2
@@ -5349,14 +5309,14 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
{ISD::UMIN, MVT::v16i8, 6}, // FIXME: umin is cheaper than umax
};
- static const CostTblEntry AVX1CostTblNoPairWise[] = {
+ static const CostTblEntry AVX1CostTbl[] = {
{ISD::SMIN, MVT::v16i16, 6},
{ISD::UMIN, MVT::v16i16, 6}, // FIXME: umin is cheaper than umax
{ISD::SMIN, MVT::v32i8, 8},
{ISD::UMIN, MVT::v32i8, 8},
};
- static const CostTblEntry AVX512BWCostTblNoPairWise[] = {
+ static const CostTblEntry AVX512BWCostTbl[] = {
{ISD::SMIN, MVT::v32i16, 8},
{ISD::UMIN, MVT::v32i16, 8}, // FIXME: umin is cheaper than umax
{ISD::SMIN, MVT::v64i8, 10},
@@ -5370,19 +5330,19 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
if (VT.isSimple()) {
MVT MTy = VT.getSimpleVT();
if (ST->hasBWI())
- if (const auto *Entry = CostTableLookup(AVX512BWCostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
return Entry->Cost;
if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
return Entry->Cost;
if (ST->hasSSE41())
- if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
return Entry->Cost;
if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
return Entry->Cost;
}
@@ -5396,27 +5356,25 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
// Type needs to be split. We need LT.first - 1 operations ops.
Ty = FixedVectorType::get(ValVTy->getElementType(),
MTy.getVectorNumElements());
- auto *SubCondTy = FixedVectorType::get(CondTy->getElementType(),
- MTy.getVectorNumElements());
- MinMaxCost = getMinMaxCost(Ty, SubCondTy, IsUnsigned);
+ MinMaxCost = getMinMaxCost(IID, Ty, CostKind, FMF);
MinMaxCost *= LT.first - 1;
NumVecElts = MTy.getVectorNumElements();
}
if (ST->hasBWI())
- if (const auto *Entry = CostTableLookup(AVX512BWCostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy))
return MinMaxCost + Entry->Cost;
if (ST->hasAVX())
- if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
return MinMaxCost + Entry->Cost;
if (ST->hasSSE41())
- if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy))
return MinMaxCost + Entry->Cost;
if (ST->hasSSE2())
- if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy))
+ if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
return MinMaxCost + Entry->Cost;
unsigned ScalarSize = ValTy->getScalarSizeInBits();
@@ -5425,7 +5383,7 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
// by type legalization.
if (!isPowerOf2_32(ValVTy->getNumElements()) ||
ScalarSize != MTy.getScalarSizeInBits())
- return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsUnsigned, CostKind);
+ return BaseT::getMinMaxReductionCost(IID, ValTy, FMF, CostKind);
// Now handle reduction with the legal type, taking into account size changes
// at each level.
@@ -5469,9 +5427,7 @@ X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy,
}
// Add the arithmetic op for this level.
- auto *SubCondTy =
- FixedVectorType::get(CondTy->getElementType(), Ty->getNumElements());
- MinMaxCost += getMinMaxCost(Ty, SubCondTy, IsUnsigned);
+ MinMaxCost += getMinMaxCost(IID, Ty, CostKind, FMF);
}
// Add the final extract element to the cost.
@@ -5616,7 +5572,7 @@ InstructionCost X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
}
if (Idx == ImmIdx) {
- int NumConstants = divideCeil(BitSize, 64);
+ uint64_t NumConstants = divideCeil(BitSize, 64);
InstructionCost Cost = X86TTIImpl::getIntImmCost(Imm, Ty, CostKind);
return (Cost <= NumConstants * TTI::TCC_Basic)
? static_cast<int>(TTI::TCC_Free)
@@ -5715,15 +5671,15 @@ InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy,
const Value *Ptrs = GEP->getPointerOperand();
if (Ptrs->getType()->isVectorTy() && !getSplatValue(Ptrs))
return IndexSize;
- for (unsigned i = 1; i < GEP->getNumOperands(); ++i) {
- if (isa<Constant>(GEP->getOperand(i)))
+ for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) {
+ if (isa<Constant>(GEP->getOperand(I)))
continue;
- Type *IndxTy = GEP->getOperand(i)->getType();
+ Type *IndxTy = GEP->getOperand(I)->getType();
if (auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
IndxTy = IndexVTy->getElementType();
if ((IndxTy->getPrimitiveSizeInBits() == 64 &&
- !isa<SExtInst>(GEP->getOperand(i))) ||
- ++NumOfVarIndices > 1)
+ !isa<SExtInst>(GEP->getOperand(I))) ||
+ ++NumOfVarIndices > 1)
return IndexSize; // 64
}
return (unsigned)32;
@@ -5883,6 +5839,9 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) {
if (ScalarTy->isHalfTy() && ST->hasBWI())
return true;
+ if (ScalarTy->isBFloatTy() && ST->hasBF16())
+ return true;
+
if (!ScalarTy->isIntegerTy())
return false;
@@ -6338,16 +6297,18 @@ InstructionCost X86TTIImpl::getInterleavedMemoryOpCost(
bool UseMaskForCond, bool UseMaskForGaps) {
auto *VecTy = cast<FixedVectorType>(BaseTy);
- auto isSupportedOnAVX512 = [&](Type *VecTy, bool HasBW) {
+ auto isSupportedOnAVX512 = [&](Type *VecTy) {
Type *EltTy = cast<VectorType>(VecTy)->getElementType();
if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
EltTy->isIntegerTy(32) || EltTy->isPointerTy())
return true;
if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8) || EltTy->isHalfTy())
- return HasBW;
+ return ST->hasBWI();
+ if (EltTy->isBFloatTy())
+ return ST->hasBF16();
return false;
};
- if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI()))
+ if (ST->hasAVX512() && isSupportedOnAVX512(VecTy))
return getInterleavedMemoryOpCostAVX512(
Opcode, VecTy, Factor, Indices, Alignment,
AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index ef8c4a1d533e..89c7916260a4 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -88,6 +88,12 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
X86::TuningInsertVZEROUPPER,
X86::TuningUseSLMArithCosts,
X86::TuningUseGLMDivSqrtCosts,
+ X86::TuningNoDomainDelay,
+ X86::TuningNoDomainDelayMov,
+ X86::TuningNoDomainDelayShuffle,
+ X86::TuningNoDomainDelayBlend,
+ X86::TuningPreferShiftShuffle,
+ X86::TuningFastImmVectorShift,
// Perf-tuning flags.
X86::TuningFastGather,
@@ -127,7 +133,7 @@ public:
unsigned getNumberOfRegisters(unsigned ClassID) const;
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const;
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
- unsigned getMaxInterleaveFactor(unsigned VF);
+ unsigned getMaxInterleaveFactor(ElementCount VF);
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None},
@@ -172,6 +178,11 @@ public:
Align Alignment,
TTI::TargetCostKind CostKind,
const Instruction *I);
+ InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
+ const Value *Base,
+ const TTI::PointersChainInfo &Info,
+ Type *AccessTy,
+ TTI::TargetCostKind CostKind);
InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
const SCEV *Ptr);
@@ -196,10 +207,12 @@ public:
std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind);
- InstructionCost getMinMaxCost(Type *Ty, Type *CondTy, bool IsUnsigned);
+ InstructionCost getMinMaxCost(Intrinsic::ID IID, Type *Ty,
+ TTI::TargetCostKind CostKind,
+ FastMathFlags FMF);
- InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
- bool IsUnsigned,
+ InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
+ FastMathFlags FMF,
TTI::TargetCostKind CostKind);
InstructionCost getInterleavedMemoryOpCost(
@@ -261,6 +274,11 @@ public:
const Function *Callee) const;
bool areTypesABICompatible(const Function *Caller, const Function *Callee,
const ArrayRef<Type *> &Type) const;
+
+ uint64_t getMaxMemIntrinsicInlineSizeThreshold() const {
+ return ST->getMaxInlineSizeThreshold();
+ }
+
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
bool prefersVectorizedAddressing() const;
diff --git a/llvm/lib/Target/X86/X86WinEHState.cpp b/llvm/lib/Target/X86/X86WinEHState.cpp
index 085876a19d0a..fe9088ec1ec6 100644
--- a/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -16,10 +16,10 @@
#include "X86.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/CFG.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -212,7 +212,8 @@ Type *WinEHStatePass::getEHLinkRegistrationType() {
LLVMContext &Context = TheModule->getContext();
EHLinkRegistrationTy = StructType::create(Context, "EHRegistrationNode");
Type *FieldTys[] = {
- EHLinkRegistrationTy->getPointerTo(0), // EHRegistrationNode *Next
+ PointerType::getUnqual(
+ EHLinkRegistrationTy->getContext()), // EHRegistrationNode *Next
Type::getInt8PtrTy(Context) // EXCEPTION_DISPOSITION (*Handler)(...)
};
EHLinkRegistrationTy->setBody(FieldTys, false);
@@ -404,11 +405,9 @@ Function *WinEHStatePass::generateLSDAInEAXThunk(Function *ParentFunc) {
BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", Trampoline);
IRBuilder<> Builder(EntryBB);
Value *LSDA = emitEHLSDA(Builder, ParentFunc);
- Value *CastPersonality =
- Builder.CreateBitCast(PersonalityFn, TargetFuncTy->getPointerTo());
auto AI = Trampoline->arg_begin();
Value *Args[5] = {LSDA, &*AI++, &*AI++, &*AI++, &*AI++};
- CallInst *Call = Builder.CreateCall(TargetFuncTy, CastPersonality, Args);
+ CallInst *Call = Builder.CreateCall(TargetFuncTy, PersonalityFn, Args);
// Can't use musttail due to prototype mismatch, but we can use tail.
Call->setTailCall(true);
// Set inreg so we pass it in EAX.
diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index c686a2324daf..34f2a0576e7c 100644
--- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1054,7 +1054,7 @@ XCoreTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers / memory locations.
-static SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+static SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
const SmallVectorImpl<CCValAssign> &RVLocs,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) {
@@ -1064,8 +1064,8 @@ static SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
const CCValAssign &VA = RVLocs[i];
if (VA.isRegLoc()) {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getValVT(),
- InFlag).getValue(1);
- InFlag = Chain.getValue(2);
+ InGlue).getValue(1);
+ InGlue = Chain.getValue(2);
InVals.push_back(Chain.getValue(0));
} else {
assert(VA.isMemLoc());
@@ -1122,11 +1122,11 @@ SDValue XCoreTargetLowering::LowerCCCCallTo(
// Analyze return values to determine the number of bytes of stack required.
CCState RetCCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
*DAG.getContext());
- RetCCInfo.AllocateStack(CCInfo.getNextStackOffset(), Align(4));
+ RetCCInfo.AllocateStack(CCInfo.getStackSize(), Align(4));
RetCCInfo.AnalyzeCallResult(Ins, RetCC_XCore);
// Get a count of how many bytes are to be pushed on the stack.
- unsigned NumBytes = RetCCInfo.getNextStackOffset();
+ unsigned NumBytes = RetCCInfo.getStackSize();
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
@@ -1176,13 +1176,13 @@ SDValue XCoreTargetLowering::LowerCCCCallTo(
// Build a sequence of copy-to-reg nodes chained together with token
// chain and flag operands which copy the outgoing args into registers.
- // The InFlag in necessary since all emitted instructions must be
+ // The InGlue in necessary since all emitted instructions must be
// stuck together.
- SDValue InFlag;
+ SDValue InGlue;
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
+ RegsToPass[i].second, InGlue);
+ InGlue = Chain.getValue(1);
}
// If the callee is a GlobalAddress node (quite common, every direct call is)
@@ -1208,19 +1208,19 @@ SDValue XCoreTargetLowering::LowerCCCCallTo(
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
- if (InFlag.getNode())
- Ops.push_back(InFlag);
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
Chain = DAG.getNode(XCoreISD::BL, dl, NodeTys, Ops);
- InFlag = Chain.getValue(1);
+ InGlue = Chain.getValue(1);
// Create the CALLSEQ_END node.
- Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InFlag, dl);
- InFlag = Chain.getValue(1);
+ Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
+ InGlue = Chain.getValue(1);
// Handle result values, copying them out of physregs into vregs that we
// return.
- return LowerCallResult(Chain, InFlag, RVLocs, dl, DAG, InVals);
+ return LowerCallResult(Chain, InGlue, RVLocs, dl, DAG, InVals);
}
//===----------------------------------------------------------------------===//
@@ -1272,7 +1272,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
unsigned LRSaveSize = StackSlotSize;
if (!isVarArg)
- XFI->setReturnStackOffset(CCInfo.getNextStackOffset() + LRSaveSize);
+ XFI->setReturnStackOffset(CCInfo.getStackSize() + LRSaveSize);
// All getCopyFromReg ops must precede any getMemcpys to prevent the
// scheduler clobbering a register before it has been copied.
@@ -1299,7 +1299,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
{
#ifndef NDEBUG
errs() << "LowerFormalArguments Unhandled argument type: "
- << RegVT.getEVTString() << "\n";
+ << RegVT << "\n";
#endif
llvm_unreachable(nullptr);
}
@@ -1316,8 +1316,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
unsigned ObjSize = VA.getLocVT().getSizeInBits()/8;
if (ObjSize > StackSlotSize) {
errs() << "LowerFormalArguments Unhandled argument type: "
- << EVT(VA.getLocVT()).getEVTString()
- << "\n";
+ << VA.getLocVT() << "\n";
}
// Create the frame index object for this incoming parameter...
int FI = MFI.CreateFixedObject(ObjSize,
@@ -1367,8 +1366,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
} else {
// This will point to the next argument passed via stack.
XFI->setVarArgsFrameIndex(
- MFI.CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(),
- true));
+ MFI.CreateFixedObject(4, LRSaveSize + CCInfo.getStackSize(), true));
}
}
@@ -1420,7 +1418,7 @@ CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
if (!CCInfo.CheckReturn(Outs, RetCC_XCore))
return false;
- if (CCInfo.getNextStackOffset() != 0 && isVarArg)
+ if (CCInfo.getStackSize() != 0 && isVarArg)
return false;
return true;
}
@@ -1450,7 +1448,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
CCInfo.AnalyzeReturn(Outs, RetCC_XCore);
- SDValue Flag;
+ SDValue Glue;
SmallVector<SDValue, 4> RetOps(1, Chain);
// Return on XCore is always a "retsp 0"
@@ -1491,19 +1489,19 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
if (!VA.isRegLoc())
continue;
// Copy the result values into the output registers.
- Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Glue);
// guarantee that all emitted copies are
// stuck together, avoiding something bad
- Flag = Chain.getValue(1);
+ Glue = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
RetOps[0] = Chain; // Update chain.
- // Add the flag if we have it.
- if (Flag.getNode())
- RetOps.push_back(Flag);
+ // Add the glue if we have it.
+ if (Glue.getNode())
+ RetOps.push_back(Glue);
return DAG.getNode(XCoreISD::RETSP, dl, MVT::Other, RetOps);
}
diff --git a/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
index ed5a0ad5d4b8..7c11ec06d635 100644
--- a/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -97,7 +97,8 @@ static void InsertFPConstInst(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc dl = MI.getDebugLoc();
- Register ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0);
+ Register ScratchOffset =
+ RS->scavengeRegisterBackwards(XCore::GRRegsRegClass, II, false, 0);
RS->setRegUsed(ScratchOffset);
TII.loadImmediate(MBB, II, ScratchOffset, Offset);
@@ -169,12 +170,14 @@ static void InsertSPConstInst(MachineBasicBlock::iterator II,
unsigned ScratchBase;
if (OpCode==XCore::STWFI) {
- ScratchBase = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0);
+ ScratchBase =
+ RS->scavengeRegisterBackwards(XCore::GRRegsRegClass, II, false, 0);
RS->setRegUsed(ScratchBase);
} else
ScratchBase = Reg;
BuildMI(MBB, II, dl, TII.get(XCore::LDAWSP_ru6), ScratchBase).addImm(0);
- Register ScratchOffset = RS->scavengeRegister(&XCore::GRRegsRegClass, II, 0);
+ Register ScratchOffset =
+ RS->scavengeRegisterBackwards(XCore::GRRegsRegClass, II, false, 0);
RS->setRegUsed(ScratchOffset);
TII.loadImmediate(MBB, II, ScratchOffset, Offset);
diff --git a/llvm/lib/Target/XCore/XCoreRegisterInfo.h b/llvm/lib/Target/XCore/XCoreRegisterInfo.h
index b72875c29c34..8d420ab712f1 100644
--- a/llvm/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/llvm/lib/Target/XCore/XCoreRegisterInfo.h
@@ -34,6 +34,8 @@ public:
bool useFPForScavengingIndex(const MachineFunction &MF) const override;
+ bool supportsBackwardScavenger() const override { return true; }
+
bool eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
diff --git a/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp b/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp
index 1bf2f3cbc284..8ffe1253aa01 100644
--- a/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp
+++ b/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp
@@ -35,8 +35,6 @@ class XtensaAsmParser : public MCTargetAsmParser {
SMLoc getLoc() const { return getParser().getTok().getLoc(); }
- // Override MCTargetAsmParser.
- bool ParseDirective(AsmToken DirectiveID) override;
bool parseRegister(MCRegister &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) override;
bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
@@ -642,14 +640,12 @@ bool XtensaAsmParser::ParseInstructionWithSR(ParseInstructionInfo &Info,
if (parseOperand(Operands, Name))
return true;
- if (!getLexer().is(AsmToken::Comma)) {
+ if (!parseOptionalToken(AsmToken::Comma)) {
SMLoc Loc = getLexer().getLoc();
getParser().eatToEndOfStatement();
return Error(Loc, "unexpected token");
}
- getLexer().Lex();
-
// Parse second operand
if (parseOperand(Operands, Name, true))
return true;
@@ -685,14 +681,9 @@ bool XtensaAsmParser::ParseInstruction(ParseInstructionInfo &Info,
return true;
// Parse until end of statement, consuming commas between operands
- while (getLexer().is(AsmToken::Comma)) {
- // Consume comma token
- getLexer().Lex();
-
- // Parse next operand
+ while (parseOptionalToken(AsmToken::Comma))
if (parseOperand(Operands, Name))
return true;
- }
if (getLexer().isNot(AsmToken::EndOfStatement)) {
SMLoc Loc = getLexer().getLoc();
@@ -704,8 +695,6 @@ bool XtensaAsmParser::ParseInstruction(ParseInstructionInfo &Info,
return false;
}
-bool XtensaAsmParser::ParseDirective(AsmToken DirectiveID) { return true; }
-
// Force static initialization.
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeXtensaAsmParser() {
RegisterMCAsmParser<XtensaAsmParser> X(getTheXtensaTarget());
diff --git a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
index 3e68a955daa4..2d36b94dd40c 100644
--- a/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
+++ b/llvm/lib/Target/Xtensa/Disassembler/XtensaDisassembler.cpp
@@ -39,7 +39,7 @@ public:
: MCDisassembler(STI, Ctx), IsLittleEndian(isLE) {}
bool hasDensity() const {
- return STI.getFeatureBits()[Xtensa::FeatureDensity];
+ return STI.hasFeature(Xtensa::FeatureDensity);
}
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCAsmInfo.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCAsmInfo.cpp
index ce80722230bb..28764d369247 100644
--- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCAsmInfo.cpp
+++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCAsmInfo.cpp
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
#include "XtensaMCAsmInfo.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp
index 1ef5b110c927..1afdbb38f957 100644
--- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCCodeEmitter.cpp
@@ -43,7 +43,7 @@ public:
~XtensaMCCodeEmitter() {}
// OVerride MCCodeEmitter.
- void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+ void encodeInstruction(const MCInst &MI, SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
@@ -122,7 +122,8 @@ MCCodeEmitter *llvm::createXtensaMCCodeEmitter(const MCInstrInfo &MCII,
return new XtensaMCCodeEmitter(MCII, Ctx, true);
}
-void XtensaMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+void XtensaMCCodeEmitter::encodeInstruction(const MCInst &MI,
+ SmallVectorImpl<char> &CB,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
@@ -132,7 +133,7 @@ void XtensaMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
// Little-endian insertion of Size bytes.
unsigned ShiftValue = 0;
for (unsigned I = 0; I != Size; ++I) {
- OS << uint8_t(Bits >> ShiftValue);
+ CB.push_back(char(Bits >> ShiftValue));
ShiftValue += 8;
}
} else {
diff --git a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
index 31e63461176d..561ff4f0d5bb 100644
--- a/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
+++ b/llvm/lib/Target/Xtensa/XtensaTargetMachine.cpp
@@ -17,9 +17,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/MC/TargetRegistry.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/Scalar.h"
#include <optional>
diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp
index 41d5c2544f29..3a1f549b2803 100644
--- a/llvm/lib/TargetParser/AArch64TargetParser.cpp
+++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/TargetParser/AArch64TargetParser.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/TargetParser/ARMTargetParserCommon.h"
#include "llvm/TargetParser/Triple.h"
#include <cctype>
@@ -25,40 +24,22 @@ static unsigned checkArchVersion(llvm::StringRef Arch) {
return 0;
}
-uint64_t AArch64::getDefaultExtensions(StringRef CPU,
- const AArch64::ArchInfo &AI) {
- if (CPU == "generic")
- return AI.DefaultExts;
-
- // Note: this now takes cpu aliases into account
- const CpuInfo &Cpu = parseCpu(CPU);
- return Cpu.Arch.DefaultExts | Cpu.DefaultExtensions;
-}
-
-void AArch64::getFeatureOption(StringRef Name, std::string &Feature) {
- for (const auto &E : llvm::AArch64::Extensions) {
- if (Name == E.Name) {
- Feature = E.Feature;
- return;
- }
- }
- Feature = Name.str();
-}
-
-const AArch64::ArchInfo &AArch64::getArchForCpu(StringRef CPU) {
+std::optional<AArch64::ArchInfo> AArch64::getArchForCpu(StringRef CPU) {
if (CPU == "generic")
return ARMV8A;
// Note: this now takes cpu aliases into account
- const CpuInfo &Cpu = parseCpu(CPU);
- return Cpu.Arch;
+ std::optional<CpuInfo> Cpu = parseCpu(CPU);
+ if (!Cpu)
+ return {};
+ return Cpu->Arch;
}
-const AArch64::ArchInfo &AArch64::ArchInfo::findBySubArch(StringRef SubArch) {
+std::optional<AArch64::ArchInfo> AArch64::ArchInfo::findBySubArch(StringRef SubArch) {
for (const auto *A : AArch64::ArchInfos)
if (A->getSubArch() == SubArch)
return *A;
- return AArch64::INVALID;
+ return {};
}
uint64_t AArch64::getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
@@ -75,9 +56,6 @@ uint64_t AArch64::getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
bool AArch64::getExtensionFeatures(uint64_t InputExts,
std::vector<StringRef> &Features) {
- if (InputExts == AArch64::AEK_INVALID)
- return false;
-
for (const auto &E : Extensions)
/* INVALID and NONE have no feature name. */
if ((InputExts & E.ID) && !E.Feature.empty())
@@ -110,7 +88,6 @@ StringRef AArch64::getArchExtFeature(StringRef ArchExt) {
void AArch64::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) {
for (const auto &C : CpuInfos)
- if (C.Arch != INVALID)
Values.push_back(C.Name);
for (const auto &Alias : CpuAliases)
@@ -119,32 +96,32 @@ void AArch64::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values) {
bool AArch64::isX18ReservedByDefault(const Triple &TT) {
return TT.isAndroid() || TT.isOSDarwin() || TT.isOSFuchsia() ||
- TT.isOSWindows();
+ TT.isOSWindows() || TT.isOHOSFamily();
}
// Allows partial match, ex. "v8a" matches "armv8a".
-const AArch64::ArchInfo &AArch64::parseArch(StringRef Arch) {
+std::optional<AArch64::ArchInfo> AArch64::parseArch(StringRef Arch) {
Arch = llvm::ARM::getCanonicalArchName(Arch);
if (checkArchVersion(Arch) < 8)
- return AArch64::INVALID;
+ return {};
StringRef Syn = llvm::ARM::getArchSynonym(Arch);
for (const auto *A : ArchInfos) {
if (A->Name.endswith(Syn))
return *A;
}
- return AArch64::INVALID;
+ return {};
}
-AArch64::ArchExtKind AArch64::parseArchExt(StringRef ArchExt) {
+std::optional<AArch64::ExtensionInfo> AArch64::parseArchExtension(StringRef ArchExt) {
for (const auto &A : Extensions) {
if (ArchExt == A.Name)
- return static_cast<ArchExtKind>(A.ID);
+ return A;
}
- return AArch64::AEK_INVALID;
+ return {};
}
-const AArch64::CpuInfo &AArch64::parseCpu(StringRef Name) {
+std::optional<AArch64::CpuInfo> AArch64::parseCpu(StringRef Name) {
// Resolve aliases first.
Name = resolveCPUAlias(Name);
@@ -153,7 +130,5 @@ const AArch64::CpuInfo &AArch64::parseCpu(StringRef Name) {
if (Name == C.Name)
return C;
- // "generic" returns invalid.
- assert(Name != "invalid" && "Unexpected recursion.");
- return parseCpu("invalid");
+ return {};
}
diff --git a/llvm/lib/TargetParser/ARMTargetParser.cpp b/llvm/lib/TargetParser/ARMTargetParser.cpp
index af98ecb122d6..785e9a4fe3fb 100644
--- a/llvm/lib/TargetParser/ARMTargetParser.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParser.cpp
@@ -147,7 +147,8 @@ ARM::ProfileKind ARM::parseArchProfile(StringRef Arch) {
return getProfileKind(parseArch(Arch));
}
-bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) {
+bool ARM::getFPUFeatures(ARM::FPUKind FPUKind,
+ std::vector<StringRef> &Features) {
if (FPUKind >= FK_LAST || FPUKind == FK_INVALID)
return false;
@@ -211,7 +212,7 @@ bool ARM::getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features) {
return true;
}
-unsigned ARM::parseFPU(StringRef FPU) {
+ARM::FPUKind ARM::parseFPU(StringRef FPU) {
StringRef Syn = getFPUSynonym(FPU);
for (const auto &F : FPUNames) {
if (Syn == F.Name)
@@ -220,7 +221,7 @@ unsigned ARM::parseFPU(StringRef FPU) {
return FK_INVALID;
}
-ARM::NeonSupportLevel ARM::getFPUNeonSupportLevel(unsigned FPUKind) {
+ARM::NeonSupportLevel ARM::getFPUNeonSupportLevel(ARM::FPUKind FPUKind) {
if (FPUKind >= FK_LAST)
return NeonSupportLevel::None;
return FPUNames[FPUKind].NeonSupport;
@@ -243,33 +244,33 @@ StringRef ARM::getFPUSynonym(StringRef FPU) {
.Default(FPU);
}
-StringRef ARM::getFPUName(unsigned FPUKind) {
+StringRef ARM::getFPUName(ARM::FPUKind FPUKind) {
if (FPUKind >= FK_LAST)
return StringRef();
return FPUNames[FPUKind].Name;
}
-ARM::FPUVersion ARM::getFPUVersion(unsigned FPUKind) {
+ARM::FPUVersion ARM::getFPUVersion(ARM::FPUKind FPUKind) {
if (FPUKind >= FK_LAST)
return FPUVersion::NONE;
return FPUNames[FPUKind].FPUVer;
}
-ARM::FPURestriction ARM::getFPURestriction(unsigned FPUKind) {
+ARM::FPURestriction ARM::getFPURestriction(ARM::FPUKind FPUKind) {
if (FPUKind >= FK_LAST)
return FPURestriction::None;
return FPUNames[FPUKind].Restriction;
}
-unsigned ARM::getDefaultFPU(StringRef CPU, ARM::ArchKind AK) {
+ARM::FPUKind ARM::getDefaultFPU(StringRef CPU, ARM::ArchKind AK) {
if (CPU == "generic")
return ARM::ARMArchNames[static_cast<unsigned>(AK)].DefaultFPU;
- return StringSwitch<unsigned>(CPU)
+ return StringSwitch<ARM::FPUKind>(CPU)
#define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
.Case(NAME, DEFAULT_FPU)
#include "llvm/TargetParser/ARMTargetParser.def"
- .Default(ARM::FK_INVALID);
+ .Default(ARM::FK_INVALID);
}
uint64_t ARM::getDefaultExtensions(StringRef CPU, ARM::ArchKind AK) {
@@ -362,7 +363,7 @@ StringRef ARM::getArchExtFeature(StringRef ArchExt) {
return StringRef();
}
-static unsigned findDoublePrecisionFPU(unsigned InputFPUKind) {
+static ARM::FPUKind findDoublePrecisionFPU(ARM::FPUKind InputFPUKind) {
const ARM::FPUName &InputFPU = ARM::FPUNames[InputFPUKind];
// If the input FPU already supports double-precision, then there
@@ -394,7 +395,7 @@ static unsigned findDoublePrecisionFPU(unsigned InputFPUKind) {
bool ARM::appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK,
StringRef ArchExt,
std::vector<StringRef> &Features,
- unsigned &ArgFPUID) {
+ ARM::FPUKind &ArgFPUKind) {
size_t StartingNumFeatures = Features.size();
const bool Negated = stripNegationPrefix(ArchExt);
@@ -417,7 +418,7 @@ bool ARM::appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK,
CPU = "generic";
if (ArchExt == "fp" || ArchExt == "fp.dp") {
- unsigned FPUKind;
+ ARM::FPUKind FPUKind;
if (ArchExt == "fp.dp") {
if (Negated) {
Features.push_back("-fp64");
@@ -429,7 +430,7 @@ bool ARM::appendArchExtFeatures(StringRef CPU, ARM::ArchKind AK,
} else {
FPUKind = getDefaultFPU(CPU, AK);
}
- ArgFPUID = FPUKind;
+ ArgFPUKind = FPUKind;
return ARM::getFPUFeatures(FPUKind, Features);
}
return StartingNumFeatures != Features.size();
@@ -523,7 +524,7 @@ StringRef ARM::computeDefaultTargetABI(const Triple &TT, StringRef CPU) {
default:
if (TT.isOSNetBSD())
return "apcs-gnu";
- if (TT.isOSOpenBSD())
+ if (TT.isOSFreeBSD() || TT.isOSOpenBSD() || TT.isOHOSFamily())
return "aapcs-linux";
return "aapcs";
}
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 80ebe0fa57d4..518c859b11cc 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -833,13 +833,19 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
break;
// Graniterapids:
- case 0xae:
case 0xad:
CPU = "graniterapids";
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_GRANITERAPIDS;
break;
+ // Granite Rapids D:
+ case 0xae:
+ CPU = "graniterapids-d";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_GRANITERAPIDS_D;
+ break;
+
// Icelake Xeon:
case 0x6a:
case 0x6c:
@@ -1448,6 +1454,20 @@ StringRef sys::getHostCPUName() {
return "generic";
}
}
+#elif defined(__loongarch__)
+StringRef sys::getHostCPUName() {
+ // Use processor id to detect cpu name.
+ uint32_t processor_id;
+ __asm__("cpucfg %[prid], $zero\n\t" : [prid] "=r"(processor_id));
+ switch (processor_id & 0xff00) {
+ case 0xc000: // Loongson 64bit, 4-issue
+ return "la464";
+ // TODO: Others.
+ default:
+ break;
+ }
+ return "generic";
+}
#elif defined(__riscv)
StringRef sys::getHostCPUName() {
#if defined(__linux__)
@@ -1732,6 +1752,9 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
bool HasLeaf7Subleaf1 =
MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+ Features["sha512"] = HasLeaf7Subleaf1 && ((EAX >> 0) & 1);
+ Features["sm3"] = HasLeaf7Subleaf1 && ((EAX >> 1) & 1);
+ Features["sm4"] = HasLeaf7Subleaf1 && ((EAX >> 2) & 1);
Features["raoint"] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
@@ -1741,6 +1764,8 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
+ Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;
+ Features["avxvnniint16"] = HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave;
Features["prefetchi"] = HasLeaf7Subleaf1 && ((EDX >> 14) & 1);
bool HasLeafD = MaxLevel >= 0xd &&
@@ -1842,14 +1867,65 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
return true;
}
+#elif defined(__linux__) && defined(__loongarch__)
+#include <sys/auxv.h>
+bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
+ unsigned long hwcap = getauxval(AT_HWCAP);
+ bool HasFPU = hwcap & (1UL << 3); // HWCAP_LOONGARCH_FPU
+ uint32_t cpucfg2 = 0x2;
+ __asm__("cpucfg %[cpucfg2], %[cpucfg2]\n\t" : [cpucfg2] "+r"(cpucfg2));
+
+ Features["f"] = HasFPU && (cpucfg2 & (1U << 1)); // CPUCFG.2.FP_SP
+ Features["d"] = HasFPU && (cpucfg2 & (1U << 2)); // CPUCFG.2.FP_DP
+
+ Features["lsx"] = hwcap & (1UL << 4); // HWCAP_LOONGARCH_LSX
+ Features["lasx"] = hwcap & (1UL << 5); // HWCAP_LOONGARCH_LASX
+ Features["lvz"] = hwcap & (1UL << 9); // HWCAP_LOONGARCH_LVZ
+
+ return true;
+}
#else
bool sys::getHostCPUFeatures(StringMap<bool> &Features) { return false; }
#endif
+#if __APPLE__
+/// \returns the \p triple, but with the Host's arch spliced in.
+static Triple withHostArch(Triple T) {
+#if defined(__arm__)
+ T.setArch(Triple::arm);
+ T.setArchName("arm");
+#elif defined(__arm64e__)
+ T.setArch(Triple::aarch64, Triple::AArch64SubArch_arm64e);
+ T.setArchName("arm64e");
+#elif defined(__aarch64__)
+ T.setArch(Triple::aarch64);
+ T.setArchName("arm64");
+#elif defined(__x86_64h__)
+ T.setArch(Triple::x86_64);
+ T.setArchName("x86_64h");
+#elif defined(__x86_64__)
+ T.setArch(Triple::x86_64);
+ T.setArchName("x86_64");
+#elif defined(__powerpc__)
+ T.setArch(Triple::ppc);
+ T.setArchName("powerpc");
+#else
+# error "Unimplemented host arch fixup"
+#endif
+ return T;
+}
+#endif
+
std::string sys::getProcessTriple() {
std::string TargetTripleString = updateTripleOSVersion(LLVM_HOST_TRIPLE);
Triple PT(Triple::normalize(TargetTripleString));
+#if __APPLE__
+ /// In Universal builds, LLVM_HOST_TRIPLE will have the wrong arch in one of
+ /// the slices. This fixes that up.
+ PT = withHostArch(PT);
+#endif
+
if (sizeof(void *) == 8 && PT.isArch32Bit())
PT = PT.get64BitArchVariant();
if (sizeof(void *) == 4 && PT.isArch64Bit())
diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp
index faa8c314fc00..18b04600dbc6 100644
--- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp
+++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp
@@ -1,4 +1,4 @@
-//==-- LoongArch64TargetParser - Parser for LoongArch64 features --*- C++ -*-=//
+//===-- LoongArchTargetParser - Parser for LoongArch features --*- C++ -*-====//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -27,12 +27,11 @@ const ArchInfo AllArchs[] = {
#include "llvm/TargetParser/LoongArchTargetParser.def"
};
-LoongArch::ArchKind LoongArch::parseArch(StringRef Arch) {
+bool LoongArch::isValidArchName(StringRef Arch) {
for (const auto A : AllArchs)
if (A.Name == Arch)
- return A.Kind;
-
- return LoongArch::ArchKind::AK_INVALID;
+ return true;
+ return false;
}
bool LoongArch::getArchFeatures(StringRef Arch,
@@ -40,7 +39,7 @@ bool LoongArch::getArchFeatures(StringRef Arch,
for (const auto A : AllArchs) {
if (A.Name == Arch) {
for (const auto F : AllFeatures)
- if ((A.Features & F.Kind) == F.Kind && F.Kind != FK_INVALID)
+ if ((A.Features & F.Kind) == F.Kind)
Features.push_back(F.Name);
return true;
}
diff --git a/llvm/lib/TargetParser/RISCVTargetParser.cpp b/llvm/lib/TargetParser/RISCVTargetParser.cpp
index 89cd5c082d72..30a1023c0673 100644
--- a/llvm/lib/TargetParser/RISCVTargetParser.cpp
+++ b/llvm/lib/TargetParser/RISCVTargetParser.cpp
@@ -7,98 +7,87 @@
//===----------------------------------------------------------------------===//
//
// This file implements a target parser to recognise hardware features
-// FOR RISC-V CPUS.
+// for RISC-V CPUs.
//
//===----------------------------------------------------------------------===//
#include "llvm/TargetParser/RISCVTargetParser.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
namespace RISCV {
+enum CPUKind : unsigned {
+#define PROC(ENUM, NAME, DEFAULT_MARCH) CK_##ENUM,
+#define TUNE_PROC(ENUM, NAME) CK_##ENUM,
+#include "llvm/TargetParser/RISCVTargetParserDef.inc"
+};
+
struct CPUInfo {
StringLiteral Name;
- CPUKind Kind;
StringLiteral DefaultMarch;
- bool isInvalid() const { return DefaultMarch.empty(); }
bool is64Bit() const { return DefaultMarch.starts_with("rv64"); }
};
constexpr CPUInfo RISCVCPUInfo[] = {
#define PROC(ENUM, NAME, DEFAULT_MARCH) \
- {NAME, CK_##ENUM, DEFAULT_MARCH},
+ {NAME, DEFAULT_MARCH},
#include "llvm/TargetParser/RISCVTargetParserDef.inc"
};
-bool checkCPUKind(CPUKind Kind, bool IsRV64) {
- if (Kind == CK_INVALID)
- return false;
- return RISCVCPUInfo[static_cast<unsigned>(Kind)].is64Bit() == IsRV64;
+static const CPUInfo *getCPUInfoByName(StringRef CPU) {
+ for (auto &C : RISCVCPUInfo)
+ if (C.Name == CPU)
+ return &C;
+ return nullptr;
}
-bool checkTuneCPUKind(CPUKind Kind, bool IsRV64) {
- if (Kind == CK_INVALID)
- return false;
-#define TUNE_PROC(ENUM, NAME) \
- if (Kind == CK_##ENUM) \
- return true;
-#include "llvm/TargetParser/RISCVTargetParserDef.inc"
- return RISCVCPUInfo[static_cast<unsigned>(Kind)].is64Bit() == IsRV64;
-}
+bool parseCPU(StringRef CPU, bool IsRV64) {
+ const CPUInfo *Info = getCPUInfoByName(CPU);
-CPUKind parseCPUKind(StringRef CPU) {
- return llvm::StringSwitch<CPUKind>(CPU)
-#define PROC(ENUM, NAME, DEFAULT_MARCH) .Case(NAME, CK_##ENUM)
-#include "llvm/TargetParser/RISCVTargetParserDef.inc"
- .Default(CK_INVALID);
+ if (!Info)
+ return false;
+ return Info->is64Bit() == IsRV64;
}
-CPUKind parseTuneCPUKind(StringRef TuneCPU, bool IsRV64) {
- return llvm::StringSwitch<CPUKind>(TuneCPU)
-#define PROC(ENUM, NAME, DEFAULT_MARCH) .Case(NAME, CK_##ENUM)
+bool parseTuneCPU(StringRef TuneCPU, bool IsRV64) {
+ std::optional<CPUKind> Kind =
+ llvm::StringSwitch<std::optional<CPUKind>>(TuneCPU)
#define TUNE_PROC(ENUM, NAME) .Case(NAME, CK_##ENUM)
-#include "llvm/TargetParser/RISCVTargetParserDef.inc"
- .Default(CK_INVALID);
+ #include "llvm/TargetParser/RISCVTargetParserDef.inc"
+ .Default(std::nullopt);
+
+ if (Kind.has_value())
+ return true;
+
+ // Fallback to parsing as a CPU.
+ return parseCPU(TuneCPU, IsRV64);
}
StringRef getMArchFromMcpu(StringRef CPU) {
- CPUKind Kind = parseCPUKind(CPU);
- return RISCVCPUInfo[static_cast<unsigned>(Kind)].DefaultMarch;
+ const CPUInfo *Info = getCPUInfoByName(CPU);
+ if (!Info)
+ return "";
+ return Info->DefaultMarch;
}
void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64) {
for (const auto &C : RISCVCPUInfo) {
- if (C.Kind != CK_INVALID && IsRV64 == C.is64Bit())
+ if (IsRV64 == C.is64Bit())
Values.emplace_back(C.Name);
}
}
void fillValidTuneCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64) {
for (const auto &C : RISCVCPUInfo) {
- if (C.Kind != CK_INVALID && IsRV64 == C.is64Bit())
+ if (IsRV64 == C.is64Bit())
Values.emplace_back(C.Name);
}
#define TUNE_PROC(ENUM, NAME) Values.emplace_back(StringRef(NAME));
#include "llvm/TargetParser/RISCVTargetParserDef.inc"
}
-// Get all features except standard extension feature
-bool getCPUFeaturesExceptStdExt(CPUKind Kind,
- std::vector<StringRef> &Features) {
- const CPUInfo &Info = RISCVCPUInfo[static_cast<unsigned>(Kind)];
-
- if (Info.isInvalid())
- return false;
-
- if (Info.is64Bit())
- Features.push_back("+64bit");
- else
- Features.push_back("-64bit");
-
- return true;
-}
-
} // namespace RISCV
} // namespace llvm
diff --git a/llvm/lib/MC/SubtargetFeature.cpp b/llvm/lib/TargetParser/SubtargetFeature.cpp
index d0ddfc789ba5..7c8bd44f7885 100644
--- a/llvm/lib/MC/SubtargetFeature.cpp
+++ b/llvm/lib/TargetParser/SubtargetFeature.cpp
@@ -10,15 +10,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <string>
#include <vector>
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index e9fccef0433e..7faa992e472e 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -13,7 +13,6 @@
#include "llvm/TargetParser/TargetParser.h"
#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringSwitch.h"
#include "llvm/TargetParser/Triple.h"
using namespace llvm;
@@ -105,21 +104,25 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
{{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
- {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
- {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
- {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
- {{"gfx1013"}, {"gfx1013"}, GK_GFX1013, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
- {{"gfx1030"}, {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
- {{"gfx1031"}, {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
- {{"gfx1032"}, {"gfx1032"}, GK_GFX1032, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
- {{"gfx1033"}, {"gfx1033"}, GK_GFX1033, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
- {{"gfx1034"}, {"gfx1034"}, GK_GFX1034, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
- {{"gfx1035"}, {"gfx1035"}, GK_GFX1035, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
- {{"gfx1036"}, {"gfx1036"}, GK_GFX1036, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
- {{"gfx1100"}, {"gfx1100"}, GK_GFX1100, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
- {{"gfx1101"}, {"gfx1101"}, GK_GFX1101, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
- {{"gfx1102"}, {"gfx1102"}, GK_GFX1102, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
- {{"gfx1103"}, {"gfx1103"}, GK_GFX1103, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32},
+ {{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
+ {{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
+ {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
+ {{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
+ {{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
+ {{"gfx1013"}, {"gfx1013"}, GK_GFX1013, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
+ {{"gfx1030"}, {"gfx1030"}, GK_GFX1030, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1031"}, {"gfx1031"}, GK_GFX1031, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1032"}, {"gfx1032"}, GK_GFX1032, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1033"}, {"gfx1033"}, GK_GFX1033, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1034"}, {"gfx1034"}, GK_GFX1034, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1035"}, {"gfx1035"}, GK_GFX1035, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1036"}, {"gfx1036"}, GK_GFX1036, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1100"}, {"gfx1100"}, GK_GFX1100, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1101"}, {"gfx1101"}, GK_GFX1101, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1102"}, {"gfx1102"}, GK_GFX1102, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1103"}, {"gfx1103"}, GK_GFX1103, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1150"}, {"gfx1150"}, GK_GFX1150, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
};
const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
@@ -224,6 +227,8 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX90A: return {9, 0, 10};
case GK_GFX90C: return {9, 0, 12};
case GK_GFX940: return {9, 4, 0};
+ case GK_GFX941: return {9, 4, 1};
+ case GK_GFX942: return {9, 4, 2};
case GK_GFX1010: return {10, 1, 0};
case GK_GFX1011: return {10, 1, 1};
case GK_GFX1012: return {10, 1, 2};
@@ -239,6 +244,8 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX1101: return {11, 0, 1};
case GK_GFX1102: return {11, 0, 2};
case GK_GFX1103: return {11, 0, 3};
+ case GK_GFX1150: return {11, 5, 0};
+ case GK_GFX1151: return {11, 5, 1};
default: return {0, 0, 0};
}
}
@@ -251,3 +258,248 @@ StringRef AMDGPU::getCanonicalArchName(const Triple &T, StringRef Arch) {
return T.isAMDGCN() ? getArchNameAMDGCN(ProcKind) : getArchNameR600(ProcKind);
}
+
+void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
+ StringMap<bool> &Features) {
+ // XXX - What does the member GPU mean if device name string passed here?
+ if (T.isAMDGCN()) {
+ switch (parseArchAMDGCN(GPU)) {
+ case GK_GFX1151:
+ case GK_GFX1150:
+ case GK_GFX1103:
+ case GK_GFX1102:
+ case GK_GFX1101:
+ case GK_GFX1100:
+ Features["ci-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot8-insts"] = true;
+ Features["dot9-insts"] = true;
+ Features["dot10-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["gfx10-3-insts"] = true;
+ Features["gfx11-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ Features["image-insts"] = true;
+ break;
+ case GK_GFX1036:
+ case GK_GFX1035:
+ case GK_GFX1034:
+ case GK_GFX1033:
+ case GK_GFX1032:
+ case GK_GFX1031:
+ case GK_GFX1030:
+ Features["ci-insts"] = true;
+ Features["dot1-insts"] = true;
+ Features["dot2-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot6-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot10-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["gfx10-3-insts"] = true;
+ Features["image-insts"] = true;
+ Features["s-memrealtime"] = true;
+ Features["s-memtime-inst"] = true;
+ break;
+ case GK_GFX1012:
+ case GK_GFX1011:
+ Features["dot1-insts"] = true;
+ Features["dot2-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot6-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot10-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX1013:
+ case GK_GFX1010:
+ Features["dl-insts"] = true;
+ Features["ci-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["gfx8-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx10-insts"] = true;
+ Features["image-insts"] = true;
+ Features["s-memrealtime"] = true;
+ Features["s-memtime-inst"] = true;
+ break;
+ case GK_GFX942:
+ case GK_GFX941:
+ case GK_GFX940:
+ Features["gfx940-insts"] = true;
+ Features["fp8-insts"] = true;
+ Features["atomic-ds-pk-add-16-insts"] = true;
+ Features["atomic-flat-pk-add-16-insts"] = true;
+ Features["atomic-global-pk-add-bf16-inst"] = true;
+ Features["gfx90a-insts"] = true;
+ Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ Features["dot3-insts"] = true;
+ Features["dot4-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot6-insts"] = true;
+ Features["mai-insts"] = true;
+ Features["dl-insts"] = true;
+ Features["dot1-insts"] = true;
+ Features["dot2-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot10-insts"] = true;
+ Features["gfx9-insts"] = true;
+ Features["gfx8-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["s-memrealtime"] = true;
+ Features["ci-insts"] = true;
+ Features["s-memtime-inst"] = true;
+ break;
+ case GK_GFX90A:
+ Features["gfx90a-insts"] = true;
+ Features["atomic-buffer-global-pk-add-f16-insts"] = true;
+ Features["atomic-fadd-rtn-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX908:
+ Features["dot3-insts"] = true;
+ Features["dot4-insts"] = true;
+ Features["dot5-insts"] = true;
+ Features["dot6-insts"] = true;
+ Features["mai-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX906:
+ Features["dl-insts"] = true;
+ Features["dot1-insts"] = true;
+ Features["dot2-insts"] = true;
+ Features["dot7-insts"] = true;
+ Features["dot10-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX90C:
+ case GK_GFX909:
+ case GK_GFX904:
+ case GK_GFX902:
+ case GK_GFX900:
+ Features["gfx9-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX810:
+ case GK_GFX805:
+ case GK_GFX803:
+ case GK_GFX802:
+ case GK_GFX801:
+ Features["gfx8-insts"] = true;
+ Features["16-bit-insts"] = true;
+ Features["dpp"] = true;
+ Features["s-memrealtime"] = true;
+ [[fallthrough]];
+ case GK_GFX705:
+ case GK_GFX704:
+ case GK_GFX703:
+ case GK_GFX702:
+ case GK_GFX701:
+ case GK_GFX700:
+ Features["ci-insts"] = true;
+ [[fallthrough]];
+ case GK_GFX602:
+ case GK_GFX601:
+ case GK_GFX600:
+ Features["image-insts"] = true;
+ Features["s-memtime-inst"] = true;
+ break;
+ case GK_NONE:
+ break;
+ default:
+ llvm_unreachable("Unhandled GPU!");
+ }
+ } else {
+ if (GPU.empty())
+ GPU = "r600";
+
+ switch (llvm::AMDGPU::parseArchR600(GPU)) {
+ case GK_CAYMAN:
+ case GK_CYPRESS:
+ case GK_RV770:
+ case GK_RV670:
+ // TODO: Add fp64 when implemented.
+ break;
+ case GK_TURKS:
+ case GK_CAICOS:
+ case GK_BARTS:
+ case GK_SUMO:
+ case GK_REDWOOD:
+ case GK_JUNIPER:
+ case GK_CEDAR:
+ case GK_RV730:
+ case GK_RV710:
+ case GK_RS880:
+ case GK_R630:
+ case GK_R600:
+ break;
+ default:
+ llvm_unreachable("Unhandled GPU!");
+ }
+ }
+}
+
+static bool isWave32Capable(StringRef GPU, const Triple &T) {
+ bool IsWave32Capable = false;
+ // XXX - What does the member GPU mean if device name string passed here?
+ if (T.isAMDGCN()) {
+ switch (parseArchAMDGCN(GPU)) {
+ case GK_GFX1151:
+ case GK_GFX1150:
+ case GK_GFX1103:
+ case GK_GFX1102:
+ case GK_GFX1101:
+ case GK_GFX1100:
+ case GK_GFX1036:
+ case GK_GFX1035:
+ case GK_GFX1034:
+ case GK_GFX1033:
+ case GK_GFX1032:
+ case GK_GFX1031:
+ case GK_GFX1030:
+ case GK_GFX1012:
+ case GK_GFX1011:
+ case GK_GFX1013:
+ case GK_GFX1010:
+ IsWave32Capable = true;
+ break;
+ default:
+ break;
+ }
+ }
+ return IsWave32Capable;
+}
+
+bool AMDGPU::insertWaveSizeFeature(StringRef GPU, const Triple &T,
+ StringMap<bool> &Features,
+ std::string &ErrorMsg) {
+ bool IsWave32Capable = isWave32Capable(GPU, T);
+ const bool IsNullGPU = GPU.empty();
+ // FIXME: Not diagnosing wavefrontsize32 on wave64 only targets.
+ const bool HaveWave32 =
+ (IsWave32Capable || IsNullGPU) && Features.count("wavefrontsize32");
+ const bool HaveWave64 = Features.count("wavefrontsize64");
+ if (HaveWave32 && HaveWave64) {
+ ErrorMsg = "'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive";
+ return false;
+ }
+ // Don't assume any wavesize with an unknown subtarget.
+ if (!IsNullGPU) {
+ // Default to wave32 if available, or wave64 if not
+ if (!HaveWave32 && !HaveWave64) {
+ StringRef DefaultWaveSizeFeature =
+ IsWave32Capable ? "wavefrontsize32" : "wavefrontsize64";
+ Features.insert(std::make_pair(DefaultWaveSizeFeature, true));
+ }
+ }
+ return true;
+}
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index a68035989a93..a3d6a06af976 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -238,11 +238,13 @@ StringRef Triple::getOSTypeName(OSType Kind) {
case RTEMS: return "rtems";
case Solaris: return "solaris";
case TvOS: return "tvos";
+ case UEFI: return "uefi";
case WASI: return "wasi";
case WatchOS: return "watchos";
case Win32: return "windows";
case ZOS: return "zos";
case ShaderModel: return "shadermodel";
+ case LiteOS: return "liteos";
}
llvm_unreachable("Invalid OSType");
@@ -290,11 +292,27 @@ StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
case Callable: return "callable";
case Mesh: return "mesh";
case Amplification: return "amplification";
+ case OpenHOS: return "ohos";
}
llvm_unreachable("Invalid EnvironmentType!");
}
+StringRef Triple::getObjectFormatTypeName(ObjectFormatType Kind) {
+ switch (Kind) {
+ case UnknownObjectFormat: return "";
+ case COFF: return "coff";
+ case ELF: return "elf";
+ case GOFF: return "goff";
+ case MachO: return "macho";
+ case Wasm: return "wasm";
+ case XCOFF: return "xcoff";
+ case DXContainer: return "dxcontainer";
+ case SPIRV: return "spirv";
+ }
+ llvm_unreachable("unknown object format type");
+}
+
static Triple::ArchType parseBPFArch(StringRef ArchName) {
if (ArchName.equals("bpf")) {
if (sys::IsLittleEndianHost)
@@ -571,6 +589,7 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("netbsd", Triple::NetBSD)
.StartsWith("openbsd", Triple::OpenBSD)
.StartsWith("solaris", Triple::Solaris)
+ .StartsWith("uefi", Triple::UEFI)
.StartsWith("win32", Triple::Win32)
.StartsWith("windows", Triple::Win32)
.StartsWith("zos", Triple::ZOS)
@@ -596,6 +615,7 @@ static Triple::OSType parseOS(StringRef OSName) {
.StartsWith("wasi", Triple::WASI)
.StartsWith("emscripten", Triple::Emscripten)
.StartsWith("shadermodel", Triple::ShaderModel)
+ .StartsWith("liteos", Triple::LiteOS)
.Default(Triple::UnknownOS);
}
@@ -640,6 +660,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
.StartsWith("callable", Triple::Callable)
.StartsWith("mesh", Triple::Mesh)
.StartsWith("amplification", Triple::Amplification)
+ .StartsWith("ohos", Triple::OpenHOS)
.Default(Triple::UnknownEnvironment);
}
@@ -770,30 +791,6 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
}
}
-static StringRef getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
- switch (Kind) {
- case Triple::UnknownObjectFormat:
- return "";
- case Triple::COFF:
- return "coff";
- case Triple::ELF:
- return "elf";
- case Triple::GOFF:
- return "goff";
- case Triple::MachO:
- return "macho";
- case Triple::Wasm:
- return "wasm";
- case Triple::XCOFF:
- return "xcoff";
- case Triple::DXContainer:
- return "dxcontainer";
- case Triple::SPIRV:
- return "spirv";
- }
- llvm_unreachable("unknown object format type");
-}
-
static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
switch (T.getArch()) {
case Triple::UnknownArch:
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 20770a49f5c6..8bd063116cf6 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -107,6 +107,8 @@ struct ProcInfo {
X86::CPUKind Kind;
unsigned KeyFeature;
FeatureBitset Features;
+ char Mangling;
+ bool OnlyForCPUDispatchSpecific;
};
struct FeatureInfo {
@@ -126,7 +128,7 @@ constexpr FeatureBitset FeaturesPentiumMMX =
// Pentium 2 and 3.
constexpr FeatureBitset FeaturesPentium2 =
- FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | FeatureFXSR;
+ FeatureX87 | FeatureCMPXCHG8B | FeatureMMX | FeatureFXSR | FeatureCMOV;
constexpr FeatureBitset FeaturesPentium3 = FeaturesPentium2 | FeatureSSE;
// Pentium 4 CPUs
@@ -231,8 +233,8 @@ constexpr FeatureBitset FeaturesAlderlake =
FeatureCLDEMOTE | FeatureMOVDIR64B | FeatureMOVDIRI | FeatureWAITPKG |
FeatureAVXVNNI | FeatureHRESET | FeatureWIDEKL;
constexpr FeatureBitset FeaturesSierraforest =
- FeaturesAlderlake | FeatureCMPCCXADD | FeatureAVXIFMA |
- FeatureAVXNECONVERT | FeatureAVXVNNIINT8;
+ FeaturesAlderlake | FeatureCMPCCXADD | FeatureAVXIFMA | FeatureUINTR |
+ FeatureENQCMD | FeatureAVXNECONVERT | FeatureAVXVNNIINT8;
constexpr FeatureBitset FeaturesGrandridge =
FeaturesSierraforest | FeatureRAOINT;
@@ -305,149 +307,187 @@ static constexpr FeatureBitset FeaturesZNVER4 =
FeatureAVX512VPOPCNTDQ | FeatureAVX512BF16 | FeatureGFNI |
FeatureSHSTK;
+// D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from
+// X86TargetParser.def to here. They are assigned by following ways:
+// 1. Copy the mangling from the original CPU_SPEICIFC MACROs. If no, assign
+// to '\0' by default, which means not support cpu_specific/dispatch feature.
+// 2. set OnlyForCPUDispatchSpecific as true if this cpu name was not
+// listed here before, which means it doesn't support -march, -mtune and so on.
+// FIXME: Remove OnlyForCPUDispatchSpecific after all CPUs here support both
+// cpu_dispatch/specific() feature and -march, -mtune, and so on.
constexpr ProcInfo Processors[] = {
- // Empty processor. Include X87 and CMPXCHG8 for backwards compatibility.
- { {""}, CK_None, ~0U, FeatureX87 | FeatureCMPXCHG8B },
+ // Empty processor. Include X87 and CMPXCHG8 for backwards compatibility.
+ { {""}, CK_None, ~0U, FeatureX87 | FeatureCMPXCHG8B, '\0', false },
+ { {"generic"}, CK_None, ~0U, FeatureX87 | FeatureCMPXCHG8B | Feature64BIT, 'A', true },
// i386-generation processors.
- { {"i386"}, CK_i386, ~0U, FeatureX87 },
+ { {"i386"}, CK_i386, ~0U, FeatureX87, '\0', false },
// i486-generation processors.
- { {"i486"}, CK_i486, ~0U, FeatureX87 },
- { {"winchip-c6"}, CK_WinChipC6, ~0U, FeaturesPentiumMMX },
- { {"winchip2"}, CK_WinChip2, ~0U, FeaturesPentiumMMX | Feature3DNOW },
- { {"c3"}, CK_C3, ~0U, FeaturesPentiumMMX | Feature3DNOW },
+ { {"i486"}, CK_i486, ~0U, FeatureX87, '\0', false },
+ { {"winchip-c6"}, CK_WinChipC6, ~0U, FeaturesPentiumMMX, '\0', false },
+ { {"winchip2"}, CK_WinChip2, ~0U, FeaturesPentiumMMX | Feature3DNOW, '\0', false },
+ { {"c3"}, CK_C3, ~0U, FeaturesPentiumMMX | Feature3DNOW, '\0', false },
// i586-generation processors, P5 microarchitecture based.
- { {"i586"}, CK_i586, ~0U, FeatureX87 | FeatureCMPXCHG8B },
- { {"pentium"}, CK_Pentium, ~0U, FeatureX87 | FeatureCMPXCHG8B },
- { {"pentium-mmx"}, CK_PentiumMMX, ~0U, FeaturesPentiumMMX },
+ { {"i586"}, CK_i586, ~0U, FeatureX87 | FeatureCMPXCHG8B, '\0', false },
+ { {"pentium"}, CK_Pentium, ~0U, FeatureX87 | FeatureCMPXCHG8B, 'B', false },
+ { {"pentium-mmx"}, CK_PentiumMMX, ~0U, FeaturesPentiumMMX, '\0', false },
+ { {"pentium_mmx"}, CK_PentiumMMX, ~0U, FeaturesPentiumMMX, 'D', true },
// i686-generation processors, P6 / Pentium M microarchitecture based.
- { {"pentiumpro"}, CK_PentiumPro, ~0U, FeatureX87 | FeatureCMPXCHG8B },
- { {"i686"}, CK_i686, ~0U, FeatureX87 | FeatureCMPXCHG8B },
- { {"pentium2"}, CK_Pentium2, ~0U, FeaturesPentium2 },
- { {"pentium3"}, CK_Pentium3, ~0U, FeaturesPentium3 },
- { {"pentium3m"}, CK_Pentium3, ~0U, FeaturesPentium3 },
- { {"pentium-m"}, CK_PentiumM, ~0U, FeaturesPentium4 },
- { {"c3-2"}, CK_C3_2, ~0U, FeaturesPentium3 },
- { {"yonah"}, CK_Yonah, ~0U, FeaturesPrescott },
+ { {"pentiumpro"}, CK_PentiumPro, ~0U, FeatureCMOV | FeatureX87 | FeatureCMPXCHG8B, 'C', false },
+ { {"pentium_pro"}, CK_PentiumPro, ~0U, FeatureCMOV | FeatureX87 | FeatureCMPXCHG8B, 'C', true },
+ { {"i686"}, CK_i686, ~0U, FeatureCMOV | FeatureX87 | FeatureCMPXCHG8B, '\0', false },
+ { {"pentium2"}, CK_Pentium2, ~0U, FeaturesPentium2, 'E', false },
+ { {"pentium_ii"}, CK_Pentium2, ~0U, FeaturesPentium2, 'E', true },
+ { {"pentium3"}, CK_Pentium3, ~0U, FeaturesPentium3, 'H', false },
+ { {"pentium3m"}, CK_Pentium3, ~0U, FeaturesPentium3, 'H', false },
+ { {"pentium_iii"}, CK_Pentium3, ~0U, FeaturesPentium3, 'H', true },
+ { {"pentium_iii_no_xmm_regs"}, CK_Pentium3, ~0U, FeaturesPentium3, 'H', true },
+ { {"pentium-m"}, CK_PentiumM, ~0U, FeaturesPentium4, '\0', false },
+ { {"pentium_m"}, CK_PentiumM, ~0U, FeaturesPentium4, 'K', true },
+ { {"c3-2"}, CK_C3_2, ~0U, FeaturesPentium3, '\0', false },
+ { {"yonah"}, CK_Yonah, ~0U, FeaturesPrescott, 'L', false },
// Netburst microarchitecture based processors.
- { {"pentium4"}, CK_Pentium4, ~0U, FeaturesPentium4 },
- { {"pentium4m"}, CK_Pentium4, ~0U, FeaturesPentium4 },
- { {"prescott"}, CK_Prescott, ~0U, FeaturesPrescott },
- { {"nocona"}, CK_Nocona, ~0U, FeaturesNocona },
+ { {"pentium4"}, CK_Pentium4, ~0U, FeaturesPentium4, 'J', false },
+ { {"pentium4m"}, CK_Pentium4, ~0U, FeaturesPentium4, 'J', false },
+ { {"pentium_4"}, CK_Pentium4, ~0U, FeaturesPentium4, 'J', true },
+ { {"pentium_4_sse3"}, CK_Prescott, ~0U, FeaturesPrescott, 'L', true },
+ { {"prescott"}, CK_Prescott, ~0U, FeaturesPrescott, 'L', false },
+ { {"nocona"}, CK_Nocona, ~0U, FeaturesNocona, 'L', false },
// Core microarchitecture based processors.
- { {"core2"}, CK_Core2, FEATURE_SSSE3, FeaturesCore2 },
- { {"penryn"}, CK_Penryn, ~0U, FeaturesPenryn },
+ { {"core2"}, CK_Core2, FEATURE_SSSE3, FeaturesCore2, 'M', false },
+ { {"core_2_duo_ssse3"}, CK_Core2, ~0U, FeaturesCore2, 'M', true },
+ { {"penryn"}, CK_Penryn, ~0U, FeaturesPenryn, 'N', false },
+ { {"core_2_duo_sse4_1"}, CK_Penryn, ~0U, FeaturesPenryn, 'N', true },
// Atom processors
- { {"bonnell"}, CK_Bonnell, FEATURE_SSSE3, FeaturesBonnell },
- { {"atom"}, CK_Bonnell, FEATURE_SSSE3, FeaturesBonnell },
- { {"silvermont"}, CK_Silvermont, FEATURE_SSE4_2, FeaturesSilvermont },
- { {"slm"}, CK_Silvermont, FEATURE_SSE4_2, FeaturesSilvermont },
- { {"goldmont"}, CK_Goldmont, FEATURE_SSE4_2, FeaturesGoldmont },
- { {"goldmont-plus"}, CK_GoldmontPlus, FEATURE_SSE4_2, FeaturesGoldmontPlus },
- { {"tremont"}, CK_Tremont, FEATURE_SSE4_2, FeaturesTremont },
+ { {"bonnell"}, CK_Bonnell, FEATURE_SSSE3, FeaturesBonnell, 'O', false },
+ { {"atom"}, CK_Bonnell, FEATURE_SSSE3, FeaturesBonnell, 'O', false },
+ { {"silvermont"}, CK_Silvermont, FEATURE_SSE4_2, FeaturesSilvermont, 'c', false },
+ { {"slm"}, CK_Silvermont, FEATURE_SSE4_2, FeaturesSilvermont, 'c', false },
+ { {"atom_sse4_2"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem, 'c', true },
+ { {"atom_sse4_2_movbe"}, CK_Goldmont, FEATURE_SSE4_2, FeaturesGoldmont, 'd', true },
+ { {"goldmont"}, CK_Goldmont, FEATURE_SSE4_2, FeaturesGoldmont, 'i', false },
+ { {"goldmont-plus"}, CK_GoldmontPlus, FEATURE_SSE4_2, FeaturesGoldmontPlus, '\0', false },
+ { {"goldmont_plus"}, CK_GoldmontPlus, FEATURE_SSE4_2, FeaturesGoldmontPlus, 'd', true },
+ { {"tremont"}, CK_Tremont, FEATURE_SSE4_2, FeaturesTremont, 'd', false },
// Nehalem microarchitecture based processors.
- { {"nehalem"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem },
- { {"corei7"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem },
+ { {"nehalem"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem, 'P', false },
+ { {"core_i7_sse4_2"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem, 'P', true },
+ { {"corei7"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem, 'P', false },
// Westmere microarchitecture based processors.
- { {"westmere"}, CK_Westmere, FEATURE_PCLMUL, FeaturesWestmere },
+ { {"westmere"}, CK_Westmere, FEATURE_PCLMUL, FeaturesWestmere, 'Q', false },
+ { {"core_aes_pclmulqdq"}, CK_Nehalem, FEATURE_SSE4_2, FeaturesNehalem, 'Q', true },
// Sandy Bridge microarchitecture based processors.
- { {"sandybridge"}, CK_SandyBridge, FEATURE_AVX, FeaturesSandyBridge },
- { {"corei7-avx"}, CK_SandyBridge, FEATURE_AVX, FeaturesSandyBridge },
+ { {"sandybridge"}, CK_SandyBridge, FEATURE_AVX, FeaturesSandyBridge, 'R', false },
+ { {"core_2nd_gen_avx"}, CK_SandyBridge, FEATURE_AVX, FeaturesSandyBridge, 'R', true },
+ { {"corei7-avx"}, CK_SandyBridge, FEATURE_AVX, FeaturesSandyBridge, '\0', false },
// Ivy Bridge microarchitecture based processors.
- { {"ivybridge"}, CK_IvyBridge, FEATURE_AVX, FeaturesIvyBridge },
- { {"core-avx-i"}, CK_IvyBridge, FEATURE_AVX, FeaturesIvyBridge },
+ { {"ivybridge"}, CK_IvyBridge, FEATURE_AVX, FeaturesIvyBridge, 'S', false },
+ { {"core_3rd_gen_avx"}, CK_IvyBridge, FEATURE_AVX, FeaturesIvyBridge, 'S', true },
+ { {"core-avx-i"}, CK_IvyBridge, FEATURE_AVX, FeaturesIvyBridge, '\0', false },
// Haswell microarchitecture based processors.
- { {"haswell"}, CK_Haswell, FEATURE_AVX2, FeaturesHaswell },
- { {"core-avx2"}, CK_Haswell, FEATURE_AVX2, FeaturesHaswell },
+ { {"haswell"}, CK_Haswell, FEATURE_AVX2, FeaturesHaswell, 'V', false },
+ { {"core-avx2"}, CK_Haswell, FEATURE_AVX2, FeaturesHaswell, '\0', false },
+ { {"core_4th_gen_avx"}, CK_Haswell, FEATURE_AVX2, FeaturesHaswell, 'V', true },
+ { {"core_4th_gen_avx_tsx"}, CK_Haswell, FEATURE_AVX2, FeaturesHaswell, 'W', true },
// Broadwell microarchitecture based processors.
- { {"broadwell"}, CK_Broadwell, FEATURE_AVX2, FeaturesBroadwell },
+ { {"broadwell"}, CK_Broadwell, FEATURE_AVX2, FeaturesBroadwell, 'X', false },
+ { {"core_5th_gen_avx"}, CK_Broadwell, FEATURE_AVX2, FeaturesBroadwell, 'X', true },
+ { {"core_5th_gen_avx_tsx"}, CK_Broadwell, FEATURE_AVX2, FeaturesBroadwell, 'Y', true },
// Skylake client microarchitecture based processors.
- { {"skylake"}, CK_SkylakeClient, FEATURE_AVX2, FeaturesSkylakeClient },
+ { {"skylake"}, CK_SkylakeClient, FEATURE_AVX2, FeaturesSkylakeClient, 'b', false },
// Skylake server microarchitecture based processors.
- { {"skylake-avx512"}, CK_SkylakeServer, FEATURE_AVX512F, FeaturesSkylakeServer },
- { {"skx"}, CK_SkylakeServer, FEATURE_AVX512F, FeaturesSkylakeServer },
+ { {"skylake-avx512"}, CK_SkylakeServer, FEATURE_AVX512F, FeaturesSkylakeServer, '\0', false },
+ { {"skx"}, CK_SkylakeServer, FEATURE_AVX512F, FeaturesSkylakeServer, 'a', false },
+ { {"skylake_avx512"}, CK_SkylakeServer, FEATURE_AVX512F, FeaturesSkylakeServer, 'a', true },
// Cascadelake Server microarchitecture based processors.
- { {"cascadelake"}, CK_Cascadelake, FEATURE_AVX512VNNI, FeaturesCascadeLake },
+ { {"cascadelake"}, CK_Cascadelake, FEATURE_AVX512VNNI, FeaturesCascadeLake, 'o', false },
// Cooperlake Server microarchitecture based processors.
- { {"cooperlake"}, CK_Cooperlake, FEATURE_AVX512BF16, FeaturesCooperLake },
+ { {"cooperlake"}, CK_Cooperlake, FEATURE_AVX512BF16, FeaturesCooperLake, 'f', false },
// Cannonlake client microarchitecture based processors.
- { {"cannonlake"}, CK_Cannonlake, FEATURE_AVX512VBMI, FeaturesCannonlake },
+ { {"cannonlake"}, CK_Cannonlake, FEATURE_AVX512VBMI, FeaturesCannonlake, 'e', false },
// Icelake client microarchitecture based processors.
- { {"icelake-client"}, CK_IcelakeClient, FEATURE_AVX512VBMI2, FeaturesICLClient },
+ { {"icelake-client"}, CK_IcelakeClient, FEATURE_AVX512VBMI2, FeaturesICLClient, '\0', false },
+ { {"icelake_client"}, CK_IcelakeClient, FEATURE_AVX512VBMI2, FeaturesICLClient, 'k', true },
// Rocketlake microarchitecture based processors.
- { {"rocketlake"}, CK_Rocketlake, FEATURE_AVX512VBMI2, FeaturesRocketlake },
+ { {"rocketlake"}, CK_Rocketlake, FEATURE_AVX512VBMI2, FeaturesRocketlake, 'k', false },
// Icelake server microarchitecture based processors.
- { {"icelake-server"}, CK_IcelakeServer, FEATURE_AVX512VBMI2, FeaturesICLServer },
+ { {"icelake-server"}, CK_IcelakeServer, FEATURE_AVX512VBMI2, FeaturesICLServer, '\0', false },
+ { {"icelake_server"}, CK_IcelakeServer, FEATURE_AVX512VBMI2, FeaturesICLServer, 'k', true },
// Tigerlake microarchitecture based processors.
- { {"tigerlake"}, CK_Tigerlake, FEATURE_AVX512VP2INTERSECT, FeaturesTigerlake },
+ { {"tigerlake"}, CK_Tigerlake, FEATURE_AVX512VP2INTERSECT, FeaturesTigerlake, 'l', false },
// Sapphire Rapids microarchitecture based processors.
- { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512BF16, FeaturesSapphireRapids },
+ { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512BF16, FeaturesSapphireRapids, 'n', false },
// Alderlake microarchitecture based processors.
- { {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake },
+ { {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
// Raptorlake microarchitecture based processors.
- { {"raptorlake"}, CK_Raptorlake, FEATURE_AVX2, FeaturesAlderlake },
+ { {"raptorlake"}, CK_Raptorlake, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
// Meteorlake microarchitecture based processors.
- { {"meteorlake"}, CK_Meteorlake, FEATURE_AVX2, FeaturesAlderlake },
+ { {"meteorlake"}, CK_Meteorlake, FEATURE_AVX2, FeaturesAlderlake, 'p', false },
// Sierraforest microarchitecture based processors.
- { {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest },
+ { {"sierraforest"}, CK_Sierraforest, FEATURE_AVX2, FeaturesSierraforest, 'p', false },
// Grandridge microarchitecture based processors.
- { {"grandridge"}, CK_Grandridge, FEATURE_AVX2, FeaturesGrandridge },
+ { {"grandridge"}, CK_Grandridge, FEATURE_AVX2, FeaturesGrandridge, 'p', false },
// Granite Rapids microarchitecture based processors.
- { {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512BF16, FeaturesGraniteRapids },
+ { {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512BF16, FeaturesGraniteRapids, 'n', false },
+ // Granite Rapids D microarchitecture based processors.
+ { {"graniterapids-d"}, CK_GraniterapidsD, FEATURE_AVX512BF16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, '\0', false },
+ { {"graniterapids_d"}, CK_GraniterapidsD, FEATURE_AVX512BF16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, 'n', true },
// Emerald Rapids microarchitecture based processors.
- { {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512BF16, FeaturesSapphireRapids },
+ { {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512BF16, FeaturesSapphireRapids, 'n', false },
// Knights Landing processor.
- { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL },
+ { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', false },
+ { {"mic_avx512"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', true },
// Knights Mill processor.
- { {"knm"}, CK_KNM, FEATURE_AVX5124FMAPS, FeaturesKNM },
+ { {"knm"}, CK_KNM, FEATURE_AVX5124FMAPS, FeaturesKNM, 'j', false },
// Lakemont microarchitecture based processors.
- { {"lakemont"}, CK_Lakemont, ~0U, FeatureCMPXCHG8B },
+ { {"lakemont"}, CK_Lakemont, ~0U, FeatureCMPXCHG8B, '\0', false },
// K6 architecture processors.
- { {"k6"}, CK_K6, ~0U, FeaturesK6 },
- { {"k6-2"}, CK_K6_2, ~0U, FeaturesK6 | Feature3DNOW },
- { {"k6-3"}, CK_K6_3, ~0U, FeaturesK6 | Feature3DNOW },
+ { {"k6"}, CK_K6, ~0U, FeaturesK6, '\0', false },
+ { {"k6-2"}, CK_K6_2, ~0U, FeaturesK6 | Feature3DNOW, '\0', false },
+ { {"k6-3"}, CK_K6_3, ~0U, FeaturesK6 | Feature3DNOW, '\0', false },
// K7 architecture processors.
- { {"athlon"}, CK_Athlon, ~0U, FeaturesAthlon },
- { {"athlon-tbird"}, CK_Athlon, ~0U, FeaturesAthlon },
- { {"athlon-xp"}, CK_AthlonXP, ~0U, FeaturesAthlonXP },
- { {"athlon-mp"}, CK_AthlonXP, ~0U, FeaturesAthlonXP },
- { {"athlon-4"}, CK_AthlonXP, ~0U, FeaturesAthlonXP },
+ { {"athlon"}, CK_Athlon, ~0U, FeaturesAthlon, '\0', false },
+ { {"athlon-tbird"}, CK_Athlon, ~0U, FeaturesAthlon, '\0', false },
+ { {"athlon-xp"}, CK_AthlonXP, ~0U, FeaturesAthlonXP, '\0', false },
+ { {"athlon-mp"}, CK_AthlonXP, ~0U, FeaturesAthlonXP, '\0', false },
+ { {"athlon-4"}, CK_AthlonXP, ~0U, FeaturesAthlonXP, '\0', false },
// K8 architecture processors.
- { {"k8"}, CK_K8, ~0U, FeaturesK8 },
- { {"athlon64"}, CK_K8, ~0U, FeaturesK8 },
- { {"athlon-fx"}, CK_K8, ~0U, FeaturesK8 },
- { {"opteron"}, CK_K8, ~0U, FeaturesK8 },
- { {"k8-sse3"}, CK_K8SSE3, ~0U, FeaturesK8SSE3 },
- { {"athlon64-sse3"}, CK_K8SSE3, ~0U, FeaturesK8SSE3 },
- { {"opteron-sse3"}, CK_K8SSE3, ~0U, FeaturesK8SSE3 },
- { {"amdfam10"}, CK_AMDFAM10, FEATURE_SSE4_A, FeaturesAMDFAM10 },
- { {"barcelona"}, CK_AMDFAM10, FEATURE_SSE4_A, FeaturesAMDFAM10 },
+ { {"k8"}, CK_K8, ~0U, FeaturesK8, '\0', false },
+ { {"athlon64"}, CK_K8, ~0U, FeaturesK8, '\0', false },
+ { {"athlon-fx"}, CK_K8, ~0U, FeaturesK8, '\0', false },
+ { {"opteron"}, CK_K8, ~0U, FeaturesK8, '\0', false },
+ { {"k8-sse3"}, CK_K8SSE3, ~0U, FeaturesK8SSE3, '\0', false },
+ { {"athlon64-sse3"}, CK_K8SSE3, ~0U, FeaturesK8SSE3, '\0', false },
+ { {"opteron-sse3"}, CK_K8SSE3, ~0U, FeaturesK8SSE3, '\0', false },
+ { {"amdfam10"}, CK_AMDFAM10, FEATURE_SSE4_A, FeaturesAMDFAM10, '\0', false },
+ { {"barcelona"}, CK_AMDFAM10, FEATURE_SSE4_A, FeaturesAMDFAM10, '\0', false },
// Bobcat architecture processors.
- { {"btver1"}, CK_BTVER1, FEATURE_SSE4_A, FeaturesBTVER1 },
- { {"btver2"}, CK_BTVER2, FEATURE_BMI, FeaturesBTVER2 },
+ { {"btver1"}, CK_BTVER1, FEATURE_SSE4_A, FeaturesBTVER1, '\0', false },
+ { {"btver2"}, CK_BTVER2, FEATURE_BMI, FeaturesBTVER2, '\0', false },
// Bulldozer architecture processors.
- { {"bdver1"}, CK_BDVER1, FEATURE_XOP, FeaturesBDVER1 },
- { {"bdver2"}, CK_BDVER2, FEATURE_FMA, FeaturesBDVER2 },
- { {"bdver3"}, CK_BDVER3, FEATURE_FMA, FeaturesBDVER3 },
- { {"bdver4"}, CK_BDVER4, FEATURE_AVX2, FeaturesBDVER4 },
+ { {"bdver1"}, CK_BDVER1, FEATURE_XOP, FeaturesBDVER1, '\0', false },
+ { {"bdver2"}, CK_BDVER2, FEATURE_FMA, FeaturesBDVER2, '\0', false },
+ { {"bdver3"}, CK_BDVER3, FEATURE_FMA, FeaturesBDVER3, '\0', false },
+ { {"bdver4"}, CK_BDVER4, FEATURE_AVX2, FeaturesBDVER4, '\0', false },
// Zen architecture processors.
- { {"znver1"}, CK_ZNVER1, FEATURE_AVX2, FeaturesZNVER1 },
- { {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2 },
- { {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3 },
- { {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4 },
+ { {"znver1"}, CK_ZNVER1, FEATURE_AVX2, FeaturesZNVER1, '\0', false },
+ { {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2, '\0', false },
+ { {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3, '\0', false },
+ { {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4, '\0', false },
// Generic 64-bit processor.
- { {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64 },
- { {"x86-64-v2"}, CK_x86_64_v2, ~0U, FeaturesX86_64_V2 },
- { {"x86-64-v3"}, CK_x86_64_v3, ~0U, FeaturesX86_64_V3 },
- { {"x86-64-v4"}, CK_x86_64_v4, ~0U, FeaturesX86_64_V4 },
+ { {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64, '\0', false },
+ { {"x86-64-v2"}, CK_x86_64_v2, ~0U, FeaturesX86_64_V2, '\0', false },
+ { {"x86-64-v3"}, CK_x86_64_v3, ~0U, FeaturesX86_64_V3, '\0', false },
+ { {"x86-64-v4"}, CK_x86_64_v4, ~0U, FeaturesX86_64_V4, '\0', false },
// Geode processors.
- { {"geode"}, CK_Geode, ~0U, FeaturesGeode },
+ { {"geode"}, CK_Geode, ~0U, FeaturesGeode, '\0', false },
};
constexpr const char *NoTuneList[] = {"x86-64-v2", "x86-64-v3", "x86-64-v4"};
X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) {
for (const auto &P : Processors)
- if (P.Name == CPU && (P.Features[FEATURE_64BIT] || !Only64Bit))
+ if (!P.OnlyForCPUDispatchSpecific && P.Name == CPU &&
+ (P.Features[FEATURE_64BIT] || !Only64Bit))
return P.Kind;
return CK_None;
@@ -462,14 +502,16 @@ X86::CPUKind llvm::X86::parseTuneCPU(StringRef CPU, bool Only64Bit) {
void llvm::X86::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
bool Only64Bit) {
for (const auto &P : Processors)
- if (!P.Name.empty() && (P.Features[FEATURE_64BIT] || !Only64Bit))
+ if (!P.OnlyForCPUDispatchSpecific && !P.Name.empty() &&
+ (P.Features[FEATURE_64BIT] || !Only64Bit))
Values.emplace_back(P.Name);
}
void llvm::X86::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values,
bool Only64Bit) {
for (const ProcInfo &P : Processors)
- if (!P.Name.empty() && (P.Features[FEATURE_64BIT] || !Only64Bit) &&
+ if (!P.OnlyForCPUDispatchSpecific && !P.Name.empty() &&
+ (P.Features[FEATURE_64BIT] || !Only64Bit) &&
!llvm::is_contained(NoTuneList, P.Name))
Values.emplace_back(P.Name);
}
@@ -573,6 +615,8 @@ constexpr FeatureBitset ImpliedFeaturesPCLMUL = FeatureSSE2;
constexpr FeatureBitset ImpliedFeaturesSHA = FeatureSSE2;
constexpr FeatureBitset ImpliedFeaturesVAES = FeatureAES | FeatureAVX;
constexpr FeatureBitset ImpliedFeaturesVPCLMULQDQ = FeatureAVX | FeaturePCLMUL;
+constexpr FeatureBitset ImpliedFeaturesSM3 = FeatureAVX;
+constexpr FeatureBitset ImpliedFeaturesSM4 = FeatureAVX;
// AVX512 features.
constexpr FeatureBitset ImpliedFeaturesAVX512CD = FeatureAVX512F;
@@ -606,14 +650,17 @@ constexpr FeatureBitset ImpliedFeaturesAMX_TILE = {};
constexpr FeatureBitset ImpliedFeaturesAMX_BF16 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_FP16 = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesAMX_INT8 = FeatureAMX_TILE;
+constexpr FeatureBitset ImpliedFeaturesAMX_COMPLEX = FeatureAMX_TILE;
constexpr FeatureBitset ImpliedFeaturesHRESET = {};
constexpr FeatureBitset ImpliedFeaturesPREFETCHI = {};
constexpr FeatureBitset ImpliedFeaturesCMPCCXADD = {};
constexpr FeatureBitset ImpliedFeaturesRAOINT = {};
+constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT16 = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT8 = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVXIFMA = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVXNECONVERT = FeatureAVX2;
+constexpr FeatureBitset ImpliedFeaturesSHA512 = FeatureAVX;
constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
// Key Locker Features
@@ -628,8 +675,14 @@ constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = {
#include "llvm/TargetParser/X86TargetParser.def"
};
+constexpr FeatureInfo FeatureInfos_WithPLUS[X86::CPU_FEATURE_MAX] = {
+#define X86_FEATURE(ENUM, STR) {{"+" STR}, ImpliedFeatures##ENUM},
+#include "llvm/TargetParser/X86TargetParser.def"
+};
+
void llvm::X86::getFeaturesForCPU(StringRef CPU,
- SmallVectorImpl<StringRef> &EnabledFeatures) {
+ SmallVectorImpl<StringRef> &EnabledFeatures,
+ bool IfNeedPlus) {
auto I = llvm::find_if(Processors,
[&](const ProcInfo &P) { return P.Name == CPU; });
assert(I != std::end(Processors) && "Processor not found!");
@@ -642,8 +695,11 @@ void llvm::X86::getFeaturesForCPU(StringRef CPU,
// Add the string version of all set bits.
for (unsigned i = 0; i != CPU_FEATURE_MAX; ++i)
- if (Bits[i] && !FeatureInfos[i].Name.empty())
- EnabledFeatures.push_back(FeatureInfos[i].Name);
+ if (Bits[i] && !FeatureInfos[i].Name.empty() &&
+ !FeatureInfos_WithPLUS[i].Name.empty()){
+ EnabledFeatures.push_back(IfNeedPlus ? FeatureInfos_WithPLUS[i].Name
+ : FeatureInfos[i].Name);
+ }
}
// For each feature that is (transitively) implied by this feature, set it.
@@ -701,6 +757,20 @@ void llvm::X86::updateImpliedFeatures(
Features[FeatureInfos[i].Name] = Enabled;
}
+char llvm::X86::getCPUDispatchMangling(StringRef CPU) {
+ auto I = llvm::find_if(Processors,
+ [&](const ProcInfo &P) { return P.Name == CPU; });
+ assert(I != std::end(Processors) && "Processor not found!");
+ assert(I->Mangling != '\0' && "Processor dooesn't support function multiversion!");
+ return I->Mangling;
+}
+
+bool llvm::X86::validateCPUSpecificCPUDispatch(StringRef Name) {
+ auto I = llvm::find_if(Processors,
+ [&](const ProcInfo &P) { return P.Name == Name; });
+ return I != std::end(Processors);
+}
+
uint64_t llvm::X86::getCpuSupportsMask(ArrayRef<StringRef> FeatureStrs) {
// Processor features and mapping to processor feature value.
uint64_t FeaturesMask = 0;
diff --git a/llvm/lib/TextAPI/Architecture.cpp b/llvm/lib/TextAPI/Architecture.cpp
index bb349b21774e..51ca91db1300 100644
--- a/llvm/lib/TextAPI/Architecture.cpp
+++ b/llvm/lib/TextAPI/Architecture.cpp
@@ -12,10 +12,10 @@
#include "llvm/TextAPI/Architecture.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/MachO.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
namespace MachO {
diff --git a/llvm/lib/TextAPI/InterfaceFile.cpp b/llvm/lib/TextAPI/InterfaceFile.cpp
index 1156a39228e7..b7f967aa754e 100644
--- a/llvm/lib/TextAPI/InterfaceFile.cpp
+++ b/llvm/lib/TextAPI/InterfaceFile.cpp
@@ -17,31 +17,6 @@
using namespace llvm;
using namespace llvm::MachO;
-namespace {
-template <typename C>
-typename C::iterator addEntry(C &Container, StringRef InstallName) {
- auto I = partition_point(Container, [=](const InterfaceFileRef &O) {
- return O.getInstallName() < InstallName;
- });
- if (I != Container.end() && I->getInstallName() == InstallName)
- return I;
-
- return Container.emplace(I, InstallName);
-}
-
-template <typename C>
-typename C::iterator addEntry(C &Container, const Target &Target_) {
- auto Iter =
- lower_bound(Container, Target_, [](const Target &LHS, const Target &RHS) {
- return LHS < RHS;
- });
- if ((Iter != std::end(Container)) && !(Target_ < *Iter))
- return Iter;
-
- return Container.insert(Iter, Target_);
-}
-} // end namespace
-
void InterfaceFileRef::addTarget(const Target &Target) {
addEntry(Targets, Target);
}
@@ -71,28 +46,17 @@ void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) {
ParentUmbrellas.emplace(Iter, Target_, std::string(Parent));
}
-void InterfaceFile::addUUID(const Target &Target_, StringRef UUID) {
- auto Iter = lower_bound(UUIDs, Target_,
+void InterfaceFile::addRPath(const Target &InputTarget, StringRef RPath) {
+ auto Iter = lower_bound(RPaths, InputTarget,
[](const std::pair<Target, std::string> &LHS,
Target RHS) { return LHS.first < RHS; });
- if ((Iter != UUIDs.end()) && !(Target_ < Iter->first)) {
- Iter->second = std::string(UUID);
+ if ((Iter != RPaths.end()) && !(InputTarget < Iter->first)) {
+ Iter->second = std::string(RPath);
return;
}
- UUIDs.emplace(Iter, Target_, std::string(UUID));
-}
-
-void InterfaceFile::addUUID(const Target &Target, uint8_t UUID[16]) {
- std::stringstream Stream;
- for (unsigned i = 0; i < 16; ++i) {
- if (i == 4 || i == 6 || i == 8 || i == 10)
- Stream << '-';
- Stream << std::setfill('0') << std::setw(2) << std::uppercase << std::hex
- << static_cast<int>(UUID[i]);
- }
- addUUID(Target, Stream.str());
+ RPaths.emplace(Iter, InputTarget, std::string(RPath));
}
void InterfaceFile::addTarget(const Target &Target) {
@@ -107,17 +71,6 @@ InterfaceFile::targets(ArchitectureSet Archs) const {
return make_filter_range(Targets, fn);
}
-void InterfaceFile::addSymbol(SymbolKind Kind, StringRef Name,
- const TargetList &Targets, SymbolFlags Flags) {
- Name = copyString(Name);
- auto result = Symbols.try_emplace(SymbolsMapKey{Kind, Name}, nullptr);
- if (result.second)
- result.first->second = new (Allocator) Symbol{Kind, Name, Targets, Flags};
- else
- for (const auto &Target : Targets)
- result.first->second->addTarget(Target);
-}
-
void InterfaceFile::addDocument(std::shared_ptr<InterfaceFile> &&Document) {
auto Pos = llvm::lower_bound(Documents, Document,
[](const std::shared_ptr<InterfaceFile> &LHS,
@@ -128,6 +81,10 @@ void InterfaceFile::addDocument(std::shared_ptr<InterfaceFile> &&Document) {
Documents.insert(Pos, Document);
}
+static bool isYAMLTextStub(const FileType &Kind) {
+ return (Kind >= FileType::TBD_V1) && (Kind < FileType::TBD_V5);
+}
+
bool InterfaceFile::operator==(const InterfaceFile &O) const {
if (Targets != O.Targets)
return false;
@@ -142,16 +99,23 @@ bool InterfaceFile::operator==(const InterfaceFile &O) const {
return false;
if (IsAppExtensionSafe != O.IsAppExtensionSafe)
return false;
- if (IsInstallAPI != O.IsInstallAPI)
- return false;
if (ParentUmbrellas != O.ParentUmbrellas)
return false;
if (AllowableClients != O.AllowableClients)
return false;
if (ReexportedLibraries != O.ReexportedLibraries)
return false;
- if (Symbols != O.Symbols)
+ if (*SymbolsSet != *O.SymbolsSet)
return false;
+ // Don't compare run search paths for older filetypes that cannot express
+ // them.
+ if (!(isYAMLTextStub(FileKind)) && !(isYAMLTextStub(O.FileKind))) {
+ if (RPaths != O.RPaths)
+ return false;
+ if (mapToPlatformVersionSet(Targets) != mapToPlatformVersionSet(O.Targets))
+ return false;
+ }
+
if (!std::equal(Documents.begin(), Documents.end(), O.Documents.begin(),
O.Documents.end(),
[](const std::shared_ptr<InterfaceFile> LHS,
diff --git a/llvm/lib/TextAPI/PackedVersion.cpp b/llvm/lib/TextAPI/PackedVersion.cpp
index 67fb30aeb127..22960c33e9ee 100644
--- a/llvm/lib/TextAPI/PackedVersion.cpp
+++ b/llvm/lib/TextAPI/PackedVersion.cpp
@@ -100,6 +100,13 @@ std::pair<bool, bool> PackedVersion::parse64(StringRef Str) {
return std::make_pair(true, Truncated);
}
+PackedVersion::operator std::string() const {
+ SmallString<32> Str;
+ raw_svector_ostream OS(Str);
+ print(OS);
+ return std::string(Str);
+}
+
void PackedVersion::print(raw_ostream &OS) const {
OS << format("%d", getMajor());
if (getMinor() || getSubminor())
diff --git a/llvm/lib/TextAPI/Platform.cpp b/llvm/lib/TextAPI/Platform.cpp
index c3c74252301e..d0575847a876 100644
--- a/llvm/lib/TextAPI/Platform.cpp
+++ b/llvm/lib/TextAPI/Platform.cpp
@@ -13,7 +13,7 @@
#include "llvm/TextAPI/Platform.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
namespace MachO {
@@ -90,6 +90,7 @@ StringRef getPlatformName(PlatformType Platform) {
PlatformType getPlatformFromName(StringRef Name) {
return StringSwitch<PlatformType>(Name)
+ .Case("osx", PLATFORM_MACOS)
.Case("macos", PLATFORM_MACOS)
.Case("ios", PLATFORM_IOS)
.Case("tvos", PLATFORM_TVOS)
@@ -132,5 +133,12 @@ std::string getOSAndEnvironmentName(PlatformType Platform,
llvm_unreachable("Unknown llvm::MachO::PlatformType enum");
}
+VersionTuple mapToSupportedOSVersion(const Triple &Triple) {
+ const VersionTuple MinSupportedOS = Triple.getMinimumSupportedOSVersion();
+ if (MinSupportedOS > Triple.getOSVersion())
+ return MinSupportedOS;
+ return Triple.getOSVersion();
+}
+
} // end namespace MachO.
} // end namespace llvm.
diff --git a/llvm/lib/TextAPI/Symbol.cpp b/llvm/lib/TextAPI/Symbol.cpp
index 041f553c66f3..20fa6362716a 100644
--- a/llvm/lib/TextAPI/Symbol.cpp
+++ b/llvm/lib/TextAPI/Symbol.cpp
@@ -54,5 +54,23 @@ Symbol::targets(ArchitectureSet Architectures) const {
return make_filter_range(Targets, FN);
}
+bool Symbol::operator==(const Symbol &O) const {
+ // Older Tapi files do not express all these symbol flags. In those
+ // cases, ignore those differences.
+ auto RemoveFlag = [](const Symbol &Sym, SymbolFlags &Flag) {
+ if (Sym.isData())
+ Flag &= ~SymbolFlags::Data;
+ if (Sym.isText())
+ Flag &= ~SymbolFlags::Text;
+ };
+ SymbolFlags LHSFlags = Flags;
+ SymbolFlags RHSFlags = O.Flags;
+ // Ignore Text and Data for now.
+ RemoveFlag(*this, LHSFlags);
+ RemoveFlag(O, RHSFlags);
+ return std::tie(Name, Kind, Targets, LHSFlags) ==
+ std::tie(O.Name, O.Kind, O.Targets, RHSFlags);
+}
+
} // end namespace MachO.
} // end namespace llvm.
diff --git a/llvm/lib/TextAPI/SymbolSet.cpp b/llvm/lib/TextAPI/SymbolSet.cpp
new file mode 100644
index 000000000000..157e13749729
--- /dev/null
+++ b/llvm/lib/TextAPI/SymbolSet.cpp
@@ -0,0 +1,36 @@
+//===- lib/TextAPI/SymbolSet.cpp - TAPI Symbol Set ------------*- C++-*----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/TextAPI/SymbolSet.h"
+
+using namespace llvm;
+using namespace llvm::MachO;
+
+Symbol *SymbolSet::addGlobalImpl(SymbolKind Kind, StringRef Name,
+ SymbolFlags Flags) {
+ Name = copyString(Name);
+ auto Result = Symbols.try_emplace(SymbolsMapKey{Kind, Name}, nullptr);
+ if (Result.second)
+ Result.first->second =
+ new (Allocator) Symbol{Kind, Name, TargetList(), Flags};
+ return Result.first->second;
+}
+
+Symbol *SymbolSet::addGlobal(SymbolKind Kind, StringRef Name, SymbolFlags Flags,
+ const Target &Targ) {
+ auto *Sym = addGlobalImpl(Kind, Name, Flags);
+ Sym->addTarget(Targ);
+ return Sym;
+}
+
+const Symbol *SymbolSet::findSymbol(SymbolKind Kind, StringRef Name) const {
+ auto It = Symbols.find({Kind, Name});
+ if (It != Symbols.end())
+ return It->second;
+ return nullptr;
+}
diff --git a/llvm/lib/TextAPI/Target.cpp b/llvm/lib/TextAPI/Target.cpp
index c54c3bd66b9d..e20842498331 100644
--- a/llvm/lib/TextAPI/Target.cpp
+++ b/llvm/lib/TextAPI/Target.cpp
@@ -46,7 +46,10 @@ Expected<Target> Target::create(StringRef TargetValue) {
}
Target::operator std::string() const {
- return (getArchitectureName(Arch) + " (" + getPlatformName(Platform) + ")")
+ auto Version = MinDeployment.empty() ? "" : MinDeployment.getAsString();
+
+ return (getArchitectureName(Arch) + " (" + getPlatformName(Platform) +
+ Version + ")")
.str();
}
@@ -55,6 +58,13 @@ raw_ostream &operator<<(raw_ostream &OS, const Target &Target) {
return OS;
}
+PlatformVersionSet mapToPlatformVersionSet(ArrayRef<Target> Targets) {
+ PlatformVersionSet Result;
+ for (const auto &Target : Targets)
+ Result.insert({Target.Platform, Target.MinDeployment});
+ return Result;
+}
+
PlatformSet mapToPlatformSet(ArrayRef<Target> Targets) {
PlatformSet Result;
for (const auto &Target : Targets)
@@ -70,8 +80,11 @@ ArchitectureSet mapToArchitectureSet(ArrayRef<Target> Targets) {
}
std::string getTargetTripleName(const Target &Targ) {
+ auto Version =
+ Targ.MinDeployment.empty() ? "" : Targ.MinDeployment.getAsString();
+
return (getArchitectureName(Targ.Arch) + "-apple-" +
- getOSAndEnvironmentName(Targ.Platform))
+ getOSAndEnvironmentName(Targ.Platform, Version))
.str();
}
diff --git a/llvm/lib/TextAPI/TextStub.cpp b/llvm/lib/TextAPI/TextStub.cpp
index ff93e43356f7..78de3ebf3f3a 100644
--- a/llvm/lib/TextAPI/TextStub.cpp
+++ b/llvm/lib/TextAPI/TextStub.cpp
@@ -258,16 +258,6 @@ struct UUIDv4 {
UUIDv4(const Target &TargetID, const std::string &Value)
: TargetID(TargetID), Value(Value) {}
};
-
-// clang-format off
-enum TBDFlags : unsigned {
- None = 0U,
- FlatNamespace = 1U << 0,
- NotApplicationExtensionSafe = 1U << 1,
- InstallAPI = 1U << 2,
- LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/InstallAPI),
-};
-// clang-format on
} // end anonymous namespace.
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(Architecture)
@@ -437,7 +427,6 @@ template <> struct MappingTraits<const InterfaceFile *> {
explicit NormalizedTBD(IO &IO) {}
NormalizedTBD(IO &IO, const InterfaceFile *&File) {
Architectures = File->getArchitectures();
- UUIDs = File->uuids();
Platforms = File->getPlatforms();
InstallName = File->getInstallName();
CurrentVersion = PackedVersion(File->getCurrentVersion());
@@ -452,9 +441,6 @@ template <> struct MappingTraits<const InterfaceFile *> {
if (!File->isTwoLevelNamespace())
Flags |= TBDFlags::FlatNamespace;
- if (File->isInstallAPI())
- Flags |= TBDFlags::InstallAPI;
-
if (!File->umbrellas().empty())
ParentUmbrella = File->umbrellas().begin()->second;
@@ -466,7 +452,7 @@ template <> struct MappingTraits<const InterfaceFile *> {
ArchSet.insert(Library.getArchitectures());
std::map<const Symbol *, ArchitectureSet> SymbolToArchSet;
- for (const auto *Symbol : File->exports()) {
+ for (const auto *Symbol : File->symbols()) {
auto Architectures = Symbol->getArchitectures();
SymbolToArchSet[Symbol] = Architectures;
ArchSet.insert(Architectures);
@@ -617,8 +603,6 @@ template <> struct MappingTraits<const InterfaceFile *> {
File->setPath(Ctx->Path);
File->setFileType(Ctx->FileKind);
File->addTargets(synthesizeTargets(Architectures, Platforms));
- for (auto &ID : UUIDs)
- File->addUUID(ID.first, ID.second);
File->setInstallName(InstallName);
File->setCurrentVersion(CurrentVersion);
File->setCompatibilityVersion(CompatibilityVersion);
@@ -634,7 +618,6 @@ template <> struct MappingTraits<const InterfaceFile *> {
File->setTwoLevelNamespace(!(Flags & TBDFlags::FlatNamespace));
File->setApplicationExtensionSafe(
!(Flags & TBDFlags::NotApplicationExtensionSafe));
- File->setInstallAPI(Flags & TBDFlags::InstallAPI);
}
for (const auto &Section : Exports) {
@@ -807,8 +790,6 @@ template <> struct MappingTraits<const InterfaceFile *> {
TBDVersion = Ctx->FileKind >> 1;
Targets.insert(Targets.begin(), File->targets().begin(),
File->targets().end());
- for (const auto &IT : File->uuids())
- UUIDs.emplace_back(IT.first, IT.second);
InstallName = File->getInstallName();
CurrentVersion = File->getCurrentVersion();
CompatibilityVersion = File->getCompatibilityVersion();
@@ -821,9 +802,6 @@ template <> struct MappingTraits<const InterfaceFile *> {
if (!File->isTwoLevelNamespace())
Flags |= TBDFlags::FlatNamespace;
- if (File->isInstallAPI())
- Flags |= TBDFlags::InstallAPI;
-
{
std::map<std::string, TargetList> valueToTargetList;
for (const auto &it : File->umbrellas())
@@ -843,13 +821,10 @@ template <> struct MappingTraits<const InterfaceFile *> {
auto handleSymbols =
[](SectionList &CurrentSections,
- InterfaceFile::const_filtered_symbol_range Symbols,
- std::function<bool(const Symbol *)> Pred) {
+ InterfaceFile::const_filtered_symbol_range Symbols) {
std::set<TargetList> TargetSet;
std::map<const Symbol *, TargetList> SymbolToTargetList;
for (const auto *Symbol : Symbols) {
- if (!Pred(Symbol))
- continue;
TargetList Targets(Symbol->targets());
SymbolToTargetList[Symbol] = Targets;
TargetSet.emplace(std::move(Targets));
@@ -894,14 +869,9 @@ template <> struct MappingTraits<const InterfaceFile *> {
}
};
- handleSymbols(Exports, File->exports(), [](const Symbol *Symbol) {
- return !Symbol->isReexported();
- });
- handleSymbols(Reexports, File->exports(), [](const Symbol *Symbol) {
- return Symbol->isReexported();
- });
- handleSymbols(Undefineds, File->undefineds(),
- [](const Symbol *Symbol) { return true; });
+ handleSymbols(Exports, File->exports());
+ handleSymbols(Reexports, File->reexports());
+ handleSymbols(Undefineds, File->undefineds());
}
const InterfaceFile *denormalize(IO &IO) {
@@ -911,8 +881,6 @@ template <> struct MappingTraits<const InterfaceFile *> {
auto *File = new InterfaceFile;
File->setPath(Ctx->Path);
File->setFileType(Ctx->FileKind);
- for (auto &id : UUIDs)
- File->addUUID(id.TargetID, id.Value);
File->addTargets(Targets);
File->setInstallName(InstallName);
File->setCurrentVersion(CurrentVersion);
@@ -924,7 +892,6 @@ template <> struct MappingTraits<const InterfaceFile *> {
File->setTwoLevelNamespace(!(Flags & TBDFlags::FlatNamespace));
File->setApplicationExtensionSafe(
!(Flags & TBDFlags::NotApplicationExtensionSafe));
- File->setInstallAPI(Flags & TBDFlags::InstallAPI);
for (const auto &CurrentSection : AllowableClients) {
for (const auto &lib : CurrentSection.Values)
@@ -947,24 +914,28 @@ template <> struct MappingTraits<const InterfaceFile *> {
for (auto &sym : CurrentSection.Classes)
File->addSymbol(SymbolKind::ObjectiveCClass, sym,
- CurrentSection.Targets);
+ CurrentSection.Targets, Flag);
for (auto &sym : CurrentSection.ClassEHs)
File->addSymbol(SymbolKind::ObjectiveCClassEHType, sym,
- CurrentSection.Targets);
+ CurrentSection.Targets, Flag);
for (auto &sym : CurrentSection.Ivars)
File->addSymbol(SymbolKind::ObjectiveCInstanceVariable, sym,
- CurrentSection.Targets);
+ CurrentSection.Targets, Flag);
- for (auto &sym : CurrentSection.WeakSymbols)
+ SymbolFlags SymFlag = (Flag == SymbolFlags::Undefined)
+ ? SymbolFlags::WeakReferenced
+ : SymbolFlags::WeakDefined;
+ for (auto &sym : CurrentSection.WeakSymbols) {
File->addSymbol(SymbolKind::GlobalSymbol, sym,
- CurrentSection.Targets, SymbolFlags::WeakDefined);
+ CurrentSection.Targets, Flag | SymFlag);
+ }
for (auto &sym : CurrentSection.TlvSymbols)
File->addSymbol(SymbolKind::GlobalSymbol, sym,
CurrentSection.Targets,
- SymbolFlags::ThreadLocalValue);
+ Flag | SymbolFlags::ThreadLocalValue);
}
};
@@ -1021,9 +992,10 @@ template <> struct MappingTraits<const InterfaceFile *> {
static void mapKeysToValues(FileType FileKind, IO &IO,
const InterfaceFile *&File) {
MappingNormalization<NormalizedTBD, const InterfaceFile *> Keys(IO, File);
+ std::vector<UUID> EmptyUUID;
IO.mapRequired("archs", Keys->Architectures);
if (FileKind != FileType::TBD_V1)
- IO.mapOptional("uuids", Keys->UUIDs);
+ IO.mapOptional("uuids", EmptyUUID);
IO.mapRequired("platform", Keys->Platforms);
if (FileKind != FileType::TBD_V1)
IO.mapOptional("flags", Keys->Flags, TBDFlags::None);
@@ -1051,10 +1023,11 @@ template <> struct MappingTraits<const InterfaceFile *> {
static void mapKeysToValuesV4(IO &IO, const InterfaceFile *&File) {
MappingNormalization<NormalizedTBD_V4, const InterfaceFile *> Keys(IO,
File);
+ std::vector<UUIDv4> EmptyUUID;
IO.mapTag("!tapi-tbd", true);
IO.mapRequired("tbd-version", Keys->TBDVersion);
IO.mapRequired("targets", Keys->Targets);
- IO.mapOptional("uuids", Keys->UUIDs);
+ IO.mapOptional("uuids", EmptyUUID);
IO.mapOptional("flags", Keys->Flags, TBDFlags::None);
IO.mapRequired("install-name", Keys->InstallName);
IO.mapOptional("current-version", Keys->CurrentVersion,
@@ -1105,10 +1078,49 @@ static void DiagHandler(const SMDiagnostic &Diag, void *Context) {
File->ErrorMessage = ("malformed file\n" + Message).str();
}
+namespace {
+
+Expected<FileType> canReadFileType(MemoryBufferRef InputBuffer) {
+ auto TAPIFile = InputBuffer.getBuffer().trim();
+ if (TAPIFile.startswith("{") && TAPIFile.endswith("}"))
+ return FileType::TBD_V5;
+
+ if (!TAPIFile.endswith("..."))
+ return createStringError(std::errc::not_supported, "unsupported file type");
+
+ if (TAPIFile.startswith("--- !tapi-tbd\n"))
+ return FileType::TBD_V4;
+
+ if (TAPIFile.startswith("--- !tapi-tbd-v3\n"))
+ return FileType::TBD_V3;
+
+ if (TAPIFile.startswith("--- !tapi-tbd-v2\n"))
+ return FileType::TBD_V2;
+
+ if (TAPIFile.startswith("--- !tapi-tbd-v1\n") ||
+ TAPIFile.startswith("---\narchs:"))
+ return FileType::TBD_V1;
+
+ return createStringError(std::errc::not_supported, "unsupported file type");
+}
+} // namespace
+
Expected<std::unique_ptr<InterfaceFile>>
TextAPIReader::get(MemoryBufferRef InputBuffer) {
TextAPIContext Ctx;
Ctx.Path = std::string(InputBuffer.getBufferIdentifier());
+ if (auto FTOrErr = canReadFileType(InputBuffer))
+ Ctx.FileKind = *FTOrErr;
+ else
+ return FTOrErr.takeError();
+
+ // Handle JSON Format.
+ if (Ctx.FileKind >= FileType::TBD_V5) {
+ auto FileOrErr = getInterfaceFileFromJSON(InputBuffer.getBuffer());
+ if (!FileOrErr)
+ return FileOrErr.takeError();
+ return std::move(*FileOrErr);
+ }
yaml::Input YAMLIn(InputBuffer.getBuffer(), &Ctx, DiagHandler, &Ctx);
// Fill vector with interface file objects created by parsing the YAML file.
@@ -1130,10 +1142,17 @@ TextAPIReader::get(MemoryBufferRef InputBuffer) {
return std::move(File);
}
-Error TextAPIWriter::writeToStream(raw_ostream &OS, const InterfaceFile &File) {
+Error TextAPIWriter::writeToStream(raw_ostream &OS, const InterfaceFile &File,
+ bool Compact) {
TextAPIContext Ctx;
Ctx.Path = std::string(File.getPath());
Ctx.FileKind = File.getFileType();
+
+ // Write out in JSON format.
+ if (Ctx.FileKind >= FileType::TBD_V5) {
+ return serializeInterfaceFileToJSON(OS, File, Compact);
+ }
+
llvm::yaml::Output YAMLOut(OS, &Ctx, /*WrapColumn=*/80);
std::vector<const InterfaceFile *> Files;
diff --git a/llvm/lib/TextAPI/TextStubCommon.cpp b/llvm/lib/TextAPI/TextStubCommon.cpp
index 01a90078e150..0b710b0790b3 100644
--- a/llvm/lib/TextAPI/TextStubCommon.cpp
+++ b/llvm/lib/TextAPI/TextStubCommon.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// Implememts common Text Stub YAML mappings.
+// Implements common Text Stub YAML mappings.
//
//===----------------------------------------------------------------------===//
@@ -82,7 +82,7 @@ void ScalarTraits<PlatformSet>::output(const PlatformSet &Values, void *IO,
OS << "bridgeos";
break;
case PLATFORM_MACCATALYST:
- OS << "iosmac";
+ OS << "maccatalyst";
break;
case PLATFORM_DRIVERKIT:
OS << "driverkit";
@@ -112,6 +112,7 @@ StringRef ScalarTraits<PlatformSet>::input(StringRef Scalar, void *IO,
.Case("tvos", PLATFORM_TVOS)
.Case("bridgeos", PLATFORM_BRIDGEOS)
.Case("iosmac", PLATFORM_MACCATALYST)
+ .Case("maccatalyst", PLATFORM_MACCATALYST)
.Case("driverkit", PLATFORM_DRIVERKIT)
.Default(PLATFORM_UNKNOWN);
@@ -216,17 +217,10 @@ QuotingType ScalarTraits<SwiftVersion>::mustQuote(StringRef) {
return QuotingType::None;
}
-void ScalarTraits<UUID>::output(const UUID &Value, void *, raw_ostream &OS) {
- OS << Value.first << ": " << Value.second;
-}
+void ScalarTraits<UUID>::output(const UUID &Value, void *, raw_ostream &OS) {}
+
StringRef ScalarTraits<UUID>::input(StringRef Scalar, void *, UUID &Value) {
- auto Split = Scalar.split(':');
- auto Arch = Split.first.trim();
- auto UUID = Split.second.trim();
- if (UUID.empty())
- return "invalid uuid string pair";
- Value.second = std::string(UUID);
- Value.first = Target{getArchitectureFromName(Arch), PLATFORM_UNKNOWN};
+ Value = {};
return {};
}
diff --git a/llvm/lib/TextAPI/TextStubCommon.h b/llvm/lib/TextAPI/TextStubCommon.h
index aac27221b5ff..d4dcd3af447a 100644
--- a/llvm/lib/TextAPI/TextStubCommon.h
+++ b/llvm/lib/TextAPI/TextStubCommon.h
@@ -22,6 +22,16 @@
using UUID = std::pair<llvm::MachO::Target, std::string>;
+// clang-format off
+enum TBDFlags : unsigned {
+ None = 0U,
+ FlatNamespace = 1U << 0,
+ NotApplicationExtensionSafe = 1U << 1,
+ InstallAPI = 1U << 2,
+ LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/InstallAPI),
+};
+// clang-format on
+
LLVM_YAML_STRONG_TYPEDEF(llvm::StringRef, FlowStringRef)
LLVM_YAML_STRONG_TYPEDEF(uint8_t, SwiftVersion)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(UUID)
@@ -30,9 +40,16 @@ LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(FlowStringRef)
namespace llvm {
namespace MachO {
- class ArchitectureSet;
- class PackedVersion;
-}
+class ArchitectureSet;
+class PackedVersion;
+
+Expected<std::unique_ptr<InterfaceFile>>
+getInterfaceFileFromJSON(StringRef JSON);
+
+Error serializeInterfaceFileToJSON(raw_ostream &OS, const InterfaceFile &File,
+ bool Compact);
+} // namespace MachO
+
namespace yaml {
template <> struct ScalarTraits<FlowStringRef> {
@@ -73,6 +90,8 @@ template <> struct ScalarTraits<SwiftVersion> {
static QuotingType mustQuote(StringRef);
};
+// UUIDs are no longer respected but kept in the YAML parser
+// to keep reading in older TBDs.
template <> struct ScalarTraits<UUID> {
static void output(const UUID &, void *, raw_ostream &);
static StringRef input(StringRef, void *, UUID &);
diff --git a/llvm/lib/TextAPI/TextStubV5.cpp b/llvm/lib/TextAPI/TextStubV5.cpp
new file mode 100644
index 000000000000..5b3d69b8d94a
--- /dev/null
+++ b/llvm/lib/TextAPI/TextStubV5.cpp
@@ -0,0 +1,1021 @@
+//===- TextStubV5.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements Text Stub JSON mappings.
+//
+//===----------------------------------------------------------------------===//
+#include "TextStubCommon.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/JSON.h"
+#include <utility>
+
+// clang-format off
+/*
+
+JSON Format specification.
+
+All library level keys, accept target values and are defaulted if not specified.
+
+{
+"tapi_tbd_version": 5, # Required: TBD version for all documents in file
+"main_library": { # Required: top level library
+ "target_info": [ # Required: target information
+ {
+ "target": "x86_64-macos",
+ "min_deployment": "10.14" # Required: minimum OS deployment version
+ },
+ {
+ "target": "arm64-macos",
+ "min_deployment": "10.14"
+ },
+ {
+ "target": "arm64-maccatalyst",
+ "min_deployment": "12.1"
+ }],
+ "flags":[{"attributes": ["flat_namespace"]}], # Optional:
+ "install_names":[{"name":"/S/L/F/Foo.fwk/Foo"}], # Required: library install name
+ "current_versions":[{"version": "1.2"}], # Optional: defaults to 1
+ "compatibility_versions":[{ "version": "1.1"}], # Optional: defaults to 1
+ "rpaths": [ # Optional:
+ {
+ "targets": ["x86_64-macos"], # Optional: defaults to targets in `target-info`
+ "paths": ["@executable_path/.../Frameworks"]
+ }],
+ "parent_umbrellas": [{"umbrella": "System"}],
+ "allowable_clients": [{"clients": ["ClientA"]}],
+ "reexported_libraries": [{"names": ["/u/l/l/foo.dylib"]}],
+ "exported_symbols": [{ # List of export symbols section
+ "targets": ["x86_64-macos", "arm64-macos"], # Optional: defaults to targets in `target-info`
+ "text": { # List of Text segment symbols
+ "global": [ "_func" ],
+ "weak": [],
+ "thread_local": []
+ },
+ "data": { ... }, # List of Data segment symbols
+ }],
+ "reexported_symbols": [{ ... }], # List of reexported symbols section
+ "undefined_symbols": [{ ... }] # List of undefined symbols section
+},
+"libraries": [ # Optional: Array of inlined libraries
+ {...}, {...}, {...}
+]
+}
+*/
+// clang-format on
+
+using namespace llvm;
+using namespace llvm::json;
+using namespace llvm::MachO;
+
+namespace {
+struct JSONSymbol {
+ SymbolKind Kind;
+ std::string Name;
+ SymbolFlags Flags;
+};
+
+using AttrToTargets = std::map<std::string, TargetList>;
+using TargetsToSymbols =
+ SmallVector<std::pair<TargetList, std::vector<JSONSymbol>>>;
+
+enum TBDKey : size_t {
+ TBDVersion = 0U,
+ MainLibrary,
+ Documents,
+ TargetInfo,
+ Targets,
+ Target,
+ Deployment,
+ Flags,
+ Attributes,
+ InstallName,
+ CurrentVersion,
+ CompatibilityVersion,
+ Version,
+ SwiftABI,
+ ABI,
+ ParentUmbrella,
+ Umbrella,
+ AllowableClients,
+ Clients,
+ ReexportLibs,
+ Names,
+ Name,
+ Exports,
+ Reexports,
+ Undefineds,
+ Data,
+ Text,
+ Weak,
+ ThreadLocal,
+ Globals,
+ ObjCClass,
+ ObjCEHType,
+ ObjCIvar,
+ RPath,
+ Paths,
+};
+
+std::array<StringRef, 64> Keys = {
+ "tapi_tbd_version",
+ "main_library",
+ "libraries",
+ "target_info",
+ "targets",
+ "target",
+ "min_deployment",
+ "flags",
+ "attributes",
+ "install_names",
+ "current_versions",
+ "compatibility_versions",
+ "version",
+ "swift_abi",
+ "abi",
+ "parent_umbrellas",
+ "umbrella",
+ "allowable_clients",
+ "clients",
+ "reexported_libraries",
+ "names",
+ "name",
+ "exported_symbols",
+ "reexported_symbols",
+ "undefined_symbols",
+ "data",
+ "text",
+ "weak",
+ "thread_local",
+ "global",
+ "objc_class",
+ "objc_eh_type",
+ "objc_ivar",
+ "rpaths",
+ "paths",
+};
+
+static llvm::SmallString<128> getParseErrorMsg(TBDKey Key) {
+ return {"invalid ", Keys[Key], " section"};
+}
+
+static llvm::SmallString<128> getSerializeErrorMsg(TBDKey Key) {
+ return {"missing ", Keys[Key], " information"};
+}
+
+class JSONStubError : public llvm::ErrorInfo<llvm::json::ParseError> {
+public:
+ JSONStubError(Twine ErrMsg) : Message(ErrMsg.str()) {}
+
+ void log(llvm::raw_ostream &OS) const override { OS << Message << "\n"; }
+ std::error_code convertToErrorCode() const override {
+ return llvm::inconvertibleErrorCode();
+ }
+
+private:
+ std::string Message;
+};
+
+template <typename JsonT, typename StubT = JsonT>
+Expected<StubT> getRequiredValue(
+ TBDKey Key, const Object *Obj,
+ std::function<std::optional<JsonT>(const Object *, StringRef)> GetValue,
+ std::function<std::optional<StubT>(JsonT)> Validate = nullptr) {
+ std::optional<JsonT> Val = GetValue(Obj, Keys[Key]);
+ if (!Val)
+ return make_error<JSONStubError>(getParseErrorMsg(Key));
+
+ if (Validate == nullptr)
+ return static_cast<StubT>(*Val);
+
+ std::optional<StubT> Result = Validate(*Val);
+ if (!Result.has_value())
+ return make_error<JSONStubError>(getParseErrorMsg(Key));
+ return Result.value();
+}
+
+template <typename JsonT, typename StubT = JsonT>
+Expected<StubT> getRequiredValue(
+ TBDKey Key, const Object *Obj,
+ std::function<std::optional<JsonT>(const Object *, StringRef)> GetValue,
+ StubT DefaultValue, std::function<std::optional<StubT>(JsonT)> Validate) {
+ std::optional<JsonT> Val = GetValue(Obj, Keys[Key]);
+ if (!Val)
+ return DefaultValue;
+
+ std::optional<StubT> Result;
+ Result = Validate(*Val);
+ if (!Result.has_value())
+ return make_error<JSONStubError>(getParseErrorMsg(Key));
+ return Result.value();
+}
+
+Error collectFromArray(TBDKey Key, const Object *Obj,
+ std::function<void(StringRef)> Append,
+ bool IsRequired = false) {
+ const auto *Values = Obj->getArray(Keys[Key]);
+ if (!Values) {
+ if (IsRequired)
+ return make_error<JSONStubError>(getParseErrorMsg(Key));
+ return Error::success();
+ }
+
+ for (const Value &Val : *Values) {
+ auto ValStr = Val.getAsString();
+ if (!ValStr.has_value())
+ return make_error<JSONStubError>(getParseErrorMsg(Key));
+ Append(ValStr.value());
+ }
+
+ return Error::success();
+}
+
+namespace StubParser {
+
+Expected<FileType> getVersion(const Object *File) {
+ auto VersionOrErr = getRequiredValue<int64_t, FileType>(
+ TBDKey::TBDVersion, File, &Object::getInteger,
+ [](int64_t Val) -> std::optional<FileType> {
+ unsigned Result = Val;
+ if (Result != 5)
+ return std::nullopt;
+ return FileType::TBD_V5;
+ });
+
+ if (!VersionOrErr)
+ return VersionOrErr.takeError();
+ return *VersionOrErr;
+}
+
+Expected<TargetList> getTargets(const Object *Section) {
+ const auto *Targets = Section->getArray(Keys[TBDKey::Targets]);
+ if (!Targets)
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Targets));
+
+ TargetList IFTargets;
+ for (const Value &JSONTarget : *Targets) {
+ auto TargetStr = JSONTarget.getAsString();
+ if (!TargetStr.has_value())
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Target));
+ auto TargetOrErr = Target::create(TargetStr.value());
+ if (!TargetOrErr)
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Target));
+ IFTargets.push_back(*TargetOrErr);
+ }
+ return std::move(IFTargets);
+}
+
+Expected<TargetList> getTargetsSection(const Object *Section) {
+ const Array *Targets = Section->getArray(Keys[TBDKey::TargetInfo]);
+ if (!Targets)
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Targets));
+
+ TargetList IFTargets;
+ for (const Value &JSONTarget : *Targets) {
+ const auto *Obj = JSONTarget.getAsObject();
+ if (!Obj)
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Target));
+ auto TargetStr =
+ getRequiredValue<StringRef>(TBDKey::Target, Obj, &Object::getString);
+ if (!TargetStr)
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Target));
+ auto VersionStr = getRequiredValue<StringRef>(TBDKey::Deployment, Obj,
+ &Object::getString);
+ if (!VersionStr)
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Deployment));
+ VersionTuple Version;
+ if (Version.tryParse(*VersionStr))
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Deployment));
+ auto TargetOrErr = Target::create(*TargetStr);
+ if (!TargetOrErr)
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Target));
+ TargetOrErr->MinDeployment = Version;
+ // Convert to LLVM::Triple to accurately compute minOS + platform + arch
+ // pairing.
+ IFTargets.push_back(
+ MachO::Target(Triple(getTargetTripleName(*TargetOrErr))));
+ }
+ return std::move(IFTargets);
+}
+
+Error collectSymbolsFromSegment(const Object *Segment, TargetsToSymbols &Result,
+ SymbolFlags SectionFlag) {
+ auto Err = collectFromArray(
+ TBDKey::Globals, Segment, [&Result, &SectionFlag](StringRef Name) {
+ JSONSymbol Sym = {SymbolKind::GlobalSymbol, Name.str(), SectionFlag};
+ Result.back().second.emplace_back(Sym);
+ });
+ if (Err)
+ return Err;
+
+ Err = collectFromArray(
+ TBDKey::ObjCClass, Segment, [&Result, &SectionFlag](StringRef Name) {
+ JSONSymbol Sym = {SymbolKind::ObjectiveCClass, Name.str(), SectionFlag};
+ Result.back().second.emplace_back(Sym);
+ });
+ if (Err)
+ return Err;
+
+ Err = collectFromArray(TBDKey::ObjCEHType, Segment,
+ [&Result, &SectionFlag](StringRef Name) {
+ JSONSymbol Sym = {SymbolKind::ObjectiveCClassEHType,
+ Name.str(), SectionFlag};
+ Result.back().second.emplace_back(Sym);
+ });
+ if (Err)
+ return Err;
+
+ Err = collectFromArray(
+ TBDKey::ObjCIvar, Segment, [&Result, &SectionFlag](StringRef Name) {
+ JSONSymbol Sym = {SymbolKind::ObjectiveCInstanceVariable, Name.str(),
+ SectionFlag};
+ Result.back().second.emplace_back(Sym);
+ });
+ if (Err)
+ return Err;
+
+ SymbolFlags WeakFlag =
+ SectionFlag |
+ (((SectionFlag & SymbolFlags::Undefined) == SymbolFlags::Undefined)
+ ? SymbolFlags::WeakReferenced
+ : SymbolFlags::WeakDefined);
+ Err = collectFromArray(
+ TBDKey::Weak, Segment, [&Result, WeakFlag](StringRef Name) {
+ JSONSymbol Sym = {SymbolKind::GlobalSymbol, Name.str(), WeakFlag};
+ Result.back().second.emplace_back(Sym);
+ });
+ if (Err)
+ return Err;
+
+ Err = collectFromArray(
+ TBDKey::ThreadLocal, Segment, [&Result, SectionFlag](StringRef Name) {
+ JSONSymbol Sym = {SymbolKind::GlobalSymbol, Name.str(),
+ SymbolFlags::ThreadLocalValue | SectionFlag};
+ Result.back().second.emplace_back(Sym);
+ });
+ if (Err)
+ return Err;
+
+ return Error::success();
+}
+
+Expected<StringRef> getNameSection(const Object *File) {
+ const Array *Section = File->getArray(Keys[TBDKey::InstallName]);
+ if (!Section)
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::InstallName));
+
+ assert(!Section->empty() && "unexpected missing install name");
+ // TODO: Just take first for now.
+ const auto *Obj = Section->front().getAsObject();
+ if (!Obj)
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::InstallName));
+
+ return getRequiredValue<StringRef>(TBDKey::Name, Obj, &Object::getString);
+}
+
+Expected<TargetsToSymbols> getSymbolSection(const Object *File, TBDKey Key,
+ TargetList &Targets) {
+
+ const Array *Section = File->getArray(Keys[Key]);
+ if (!Section)
+ return TargetsToSymbols();
+
+ SymbolFlags SectionFlag;
+ switch (Key) {
+ case TBDKey::Reexports:
+ SectionFlag = SymbolFlags::Rexported;
+ break;
+ case TBDKey::Undefineds:
+ SectionFlag = SymbolFlags::Undefined;
+ break;
+ default:
+ SectionFlag = SymbolFlags::None;
+ break;
+ };
+
+ TargetsToSymbols Result;
+ TargetList MappedTargets;
+ for (auto Val : *Section) {
+ auto *Obj = Val.getAsObject();
+ if (!Obj)
+ continue;
+
+ auto TargetsOrErr = getTargets(Obj);
+ if (!TargetsOrErr) {
+ MappedTargets = Targets;
+ consumeError(TargetsOrErr.takeError());
+ } else {
+ MappedTargets = *TargetsOrErr;
+ }
+ Result.emplace_back(
+ std::make_pair(std::move(MappedTargets), std::vector<JSONSymbol>()));
+
+ auto *DataSection = Obj->getObject(Keys[TBDKey::Data]);
+ auto *TextSection = Obj->getObject(Keys[TBDKey::Text]);
+ // There should be at least one valid section.
+ if (!DataSection && !TextSection)
+ return make_error<JSONStubError>(getParseErrorMsg(Key));
+
+ if (DataSection) {
+ auto Err = collectSymbolsFromSegment(DataSection, Result,
+ SectionFlag | SymbolFlags::Data);
+ if (Err)
+ return std::move(Err);
+ }
+ if (TextSection) {
+ auto Err = collectSymbolsFromSegment(TextSection, Result,
+ SectionFlag | SymbolFlags::Text);
+ if (Err)
+ return std::move(Err);
+ }
+ }
+
+ return std::move(Result);
+}
+
+Expected<AttrToTargets> getLibSection(const Object *File, TBDKey Key,
+ TBDKey SubKey,
+ const TargetList &Targets) {
+ auto *Section = File->getArray(Keys[Key]);
+ if (!Section)
+ return AttrToTargets();
+
+ AttrToTargets Result;
+ TargetList MappedTargets;
+ for (auto Val : *Section) {
+ auto *Obj = Val.getAsObject();
+ if (!Obj)
+ continue;
+
+ auto TargetsOrErr = getTargets(Obj);
+ if (!TargetsOrErr) {
+ MappedTargets = Targets;
+ consumeError(TargetsOrErr.takeError());
+ } else {
+ MappedTargets = *TargetsOrErr;
+ }
+ auto Err =
+ collectFromArray(SubKey, Obj, [&Result, &MappedTargets](StringRef Key) {
+ Result[Key.str()] = MappedTargets;
+ });
+ if (Err)
+ return std::move(Err);
+ }
+
+ return std::move(Result);
+}
+
+Expected<AttrToTargets> getUmbrellaSection(const Object *File,
+ const TargetList &Targets) {
+ const auto *Umbrella = File->getArray(Keys[TBDKey::ParentUmbrella]);
+ if (!Umbrella)
+ return AttrToTargets();
+
+ AttrToTargets Result;
+ TargetList MappedTargets;
+ for (auto Val : *Umbrella) {
+ auto *Obj = Val.getAsObject();
+ if (!Obj)
+ return make_error<JSONStubError>(
+ getParseErrorMsg(TBDKey::ParentUmbrella));
+
+ // Get Targets section.
+ auto TargetsOrErr = getTargets(Obj);
+ if (!TargetsOrErr) {
+ MappedTargets = Targets;
+ consumeError(TargetsOrErr.takeError());
+ } else {
+ MappedTargets = *TargetsOrErr;
+ }
+
+ auto UmbrellaOrErr =
+ getRequiredValue<StringRef>(TBDKey::Umbrella, Obj, &Object::getString);
+ if (!UmbrellaOrErr)
+ return UmbrellaOrErr.takeError();
+ Result[UmbrellaOrErr->str()] = Targets;
+ }
+ return std::move(Result);
+}
+
+Expected<uint8_t> getSwiftVersion(const Object *File) {
+ const Array *Versions = File->getArray(Keys[TBDKey::SwiftABI]);
+ if (!Versions)
+ return 0;
+
+ for (const auto &Val : *Versions) {
+ const auto *Obj = Val.getAsObject();
+ if (!Obj)
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::SwiftABI));
+
+ // TODO: Take first for now.
+ return getRequiredValue<int64_t, uint8_t>(TBDKey::ABI, Obj,
+ &Object::getInteger);
+ }
+
+ return 0;
+}
+
+Expected<PackedVersion> getPackedVersion(const Object *File, TBDKey Key) {
+ const Array *Versions = File->getArray(Keys[Key]);
+ if (!Versions)
+ return PackedVersion(1, 0, 0);
+
+ for (const auto &Val : *Versions) {
+ const auto *Obj = Val.getAsObject();
+ if (!Obj)
+ return make_error<JSONStubError>(getParseErrorMsg(Key));
+
+ auto ValidatePV = [](StringRef Version) -> std::optional<PackedVersion> {
+ PackedVersion PV;
+ auto [success, truncated] = PV.parse64(Version);
+ if (!success || truncated)
+ return std::nullopt;
+ return PV;
+ };
+ // TODO: Take first for now.
+ return getRequiredValue<StringRef, PackedVersion>(
+ TBDKey::Version, Obj, &Object::getString, PackedVersion(1, 0, 0),
+ ValidatePV);
+ }
+
+ return PackedVersion(1, 0, 0);
+}
+
+Expected<TBDFlags> getFlags(const Object *File) {
+ TBDFlags Flags = TBDFlags::None;
+ const Array *Section = File->getArray(Keys[TBDKey::Flags]);
+ if (!Section)
+ return Flags;
+
+ for (auto &Val : *Section) {
+ // TODO: Just take first for now.
+ const auto *Obj = Val.getAsObject();
+ if (!Obj)
+ return make_error<JSONStubError>(getParseErrorMsg(TBDKey::Flags));
+
+ auto FlagsOrErr =
+ collectFromArray(TBDKey::Attributes, Obj, [&Flags](StringRef Flag) {
+ TBDFlags TBDFlag =
+ StringSwitch<TBDFlags>(Flag)
+ .Case("flat_namespace", TBDFlags::FlatNamespace)
+ .Case("not_app_extension_safe",
+ TBDFlags::NotApplicationExtensionSafe)
+ .Default(TBDFlags::None);
+ Flags |= TBDFlag;
+ });
+
+ if (FlagsOrErr)
+ return std::move(FlagsOrErr);
+
+ return Flags;
+ }
+
+ return Flags;
+}
+
+using IFPtr = std::unique_ptr<InterfaceFile>;
+Expected<IFPtr> parseToInterfaceFile(const Object *File) {
+ auto TargetsOrErr = getTargetsSection(File);
+ if (!TargetsOrErr)
+ return TargetsOrErr.takeError();
+ TargetList Targets = *TargetsOrErr;
+
+ auto NameOrErr = getNameSection(File);
+ if (!NameOrErr)
+ return NameOrErr.takeError();
+ StringRef Name = *NameOrErr;
+
+ auto CurrVersionOrErr = getPackedVersion(File, TBDKey::CurrentVersion);
+ if (!CurrVersionOrErr)
+ return CurrVersionOrErr.takeError();
+ PackedVersion CurrVersion = *CurrVersionOrErr;
+
+ auto CompVersionOrErr = getPackedVersion(File, TBDKey::CompatibilityVersion);
+ if (!CompVersionOrErr)
+ return CompVersionOrErr.takeError();
+ PackedVersion CompVersion = *CompVersionOrErr;
+
+ auto SwiftABIOrErr = getSwiftVersion(File);
+ if (!SwiftABIOrErr)
+ return SwiftABIOrErr.takeError();
+ uint8_t SwiftABI = *SwiftABIOrErr;
+
+ auto FlagsOrErr = getFlags(File);
+ if (!FlagsOrErr)
+ return FlagsOrErr.takeError();
+ TBDFlags Flags = *FlagsOrErr;
+
+ auto UmbrellasOrErr = getUmbrellaSection(File, Targets);
+ if (!UmbrellasOrErr)
+ return UmbrellasOrErr.takeError();
+ AttrToTargets Umbrellas = *UmbrellasOrErr;
+
+ auto ClientsOrErr =
+ getLibSection(File, TBDKey::AllowableClients, TBDKey::Clients, Targets);
+ if (!ClientsOrErr)
+ return ClientsOrErr.takeError();
+ AttrToTargets Clients = *ClientsOrErr;
+
+ auto RLOrErr =
+ getLibSection(File, TBDKey::ReexportLibs, TBDKey::Names, Targets);
+ if (!RLOrErr)
+ return RLOrErr.takeError();
+ AttrToTargets ReexportLibs = std::move(*RLOrErr);
+
+ auto RPathsOrErr = getLibSection(File, TBDKey::RPath, TBDKey::Paths, Targets);
+ if (!RPathsOrErr)
+ return RPathsOrErr.takeError();
+ AttrToTargets RPaths = std::move(*RPathsOrErr);
+
+ auto ExportsOrErr = getSymbolSection(File, TBDKey::Exports, Targets);
+ if (!ExportsOrErr)
+ return ExportsOrErr.takeError();
+ TargetsToSymbols Exports = std::move(*ExportsOrErr);
+
+ auto ReexportsOrErr = getSymbolSection(File, TBDKey::Reexports, Targets);
+ if (!ReexportsOrErr)
+ return ReexportsOrErr.takeError();
+ TargetsToSymbols Reexports = std::move(*ReexportsOrErr);
+
+ auto UndefinedsOrErr = getSymbolSection(File, TBDKey::Undefineds, Targets);
+ if (!UndefinedsOrErr)
+ return UndefinedsOrErr.takeError();
+ TargetsToSymbols Undefineds = std::move(*UndefinedsOrErr);
+
+ IFPtr F(new InterfaceFile);
+ F->setInstallName(Name);
+ F->setCurrentVersion(CurrVersion);
+ F->setCompatibilityVersion(CompVersion);
+ F->setSwiftABIVersion(SwiftABI);
+ F->setTwoLevelNamespace(!(Flags & TBDFlags::FlatNamespace));
+ F->setApplicationExtensionSafe(
+ !(Flags & TBDFlags::NotApplicationExtensionSafe));
+ for (auto &T : Targets)
+ F->addTarget(T);
+ for (auto &[Lib, Targets] : Clients)
+ for (auto Target : Targets)
+ F->addAllowableClient(Lib, Target);
+ for (auto &[Lib, Targets] : ReexportLibs)
+ for (auto Target : Targets)
+ F->addReexportedLibrary(Lib, Target);
+ for (auto &[Lib, Targets] : Umbrellas)
+ for (auto Target : Targets)
+ F->addParentUmbrella(Target, Lib);
+ for (auto &[Path, Targets] : RPaths)
+ for (auto Target : Targets)
+ F->addRPath(Target, Path);
+ for (auto &[Targets, Symbols] : Exports)
+ for (auto &Sym : Symbols)
+ F->addSymbol(Sym.Kind, Sym.Name, Targets, Sym.Flags);
+ for (auto &[Targets, Symbols] : Reexports)
+ for (auto &Sym : Symbols)
+ F->addSymbol(Sym.Kind, Sym.Name, Targets, Sym.Flags);
+ for (auto &[Targets, Symbols] : Undefineds)
+ for (auto &Sym : Symbols)
+ F->addSymbol(Sym.Kind, Sym.Name, Targets, Sym.Flags);
+
+ return std::move(F);
+}
+
+Expected<std::vector<IFPtr>> getInlinedLibs(const Object *File) {
+ std::vector<IFPtr> IFs;
+ const Array *Files = File->getArray(Keys[TBDKey::Documents]);
+ if (!Files)
+ return std::move(IFs);
+
+ for (auto Lib : *Files) {
+ auto IFOrErr = parseToInterfaceFile(Lib.getAsObject());
+ if (!IFOrErr)
+ return IFOrErr.takeError();
+ auto IF = std::move(*IFOrErr);
+ IFs.emplace_back(std::move(IF));
+ }
+ return std::move(IFs);
+}
+
+} // namespace StubParser
+} // namespace
+
+Expected<std::unique_ptr<InterfaceFile>>
+MachO::getInterfaceFileFromJSON(StringRef JSON) {
+ auto ValOrErr = parse(JSON);
+ if (!ValOrErr)
+ return ValOrErr.takeError();
+
+ auto *Root = ValOrErr->getAsObject();
+ auto VersionOrErr = StubParser::getVersion(Root);
+ if (!VersionOrErr)
+ return VersionOrErr.takeError();
+ FileType Version = *VersionOrErr;
+
+ Object *MainLib = Root->getObject(Keys[TBDKey::MainLibrary]);
+ auto IFOrErr = StubParser::parseToInterfaceFile(MainLib);
+ if (!IFOrErr)
+ return IFOrErr.takeError();
+ (*IFOrErr)->setFileType(Version);
+ std::unique_ptr<InterfaceFile> IF(std::move(*IFOrErr));
+
+ auto IFsOrErr = StubParser::getInlinedLibs(Root);
+ if (!IFsOrErr)
+ return IFsOrErr.takeError();
+ for (auto &File : *IFsOrErr) {
+ File->setFileType(Version);
+ IF->addDocument(std::shared_ptr<InterfaceFile>(std::move(File)));
+ }
+ return std::move(IF);
+}
+
+namespace {
+
+template <typename ContainerT = Array>
+bool insertNonEmptyValues(Object &Obj, TBDKey Key, ContainerT &&Contents) {
+ if (Contents.empty())
+ return false;
+ Obj[Keys[Key]] = std::move(Contents);
+ return true;
+}
+
+std::string getFormattedStr(const MachO::Target &Targ) {
+ std::string PlatformStr = Targ.Platform == PLATFORM_MACCATALYST
+ ? "maccatalyst"
+ : getOSAndEnvironmentName(Targ.Platform);
+ return (getArchitectureName(Targ.Arch) + "-" + PlatformStr).str();
+}
+
+template <typename AggregateT>
+std::vector<std::string> serializeTargets(const AggregateT Targets,
+ const TargetList &ActiveTargets) {
+ std::vector<std::string> TargetsStr;
+ if (Targets.size() == ActiveTargets.size())
+ return TargetsStr;
+
+ llvm::for_each(Targets, [&TargetsStr](const MachO::Target &Target) {
+ TargetsStr.emplace_back(getFormattedStr(Target));
+ });
+ return TargetsStr;
+}
+
+Array serializeTargetInfo(const TargetList &ActiveTargets) {
+ Array Targets;
+ for (const auto Targ : ActiveTargets) {
+ Object TargetInfo;
+ TargetInfo[Keys[TBDKey::Deployment]] = Targ.MinDeployment.getAsString();
+ TargetInfo[Keys[TBDKey::Target]] = getFormattedStr(Targ);
+ Targets.emplace_back(std::move(TargetInfo));
+ }
+ return Targets;
+}
+
+template <typename ValueT, typename EntryT = ValueT>
+Array serializeScalar(TBDKey Key, ValueT Value, ValueT Default = ValueT()) {
+ if (Value == Default)
+ return {};
+ Array Container;
+ Object ScalarObj({Object::KV({Keys[Key], EntryT(Value)})});
+
+ Container.emplace_back(std::move(ScalarObj));
+ return Container;
+}
+
+using TargetsToValuesMap =
+ std::map<std::vector<std::string>, std::vector<std::string>>;
+
+template <typename AggregateT = TargetsToValuesMap>
+Array serializeAttrToTargets(AggregateT &Entries, TBDKey Key) {
+ Array Container;
+ for (const auto &[Targets, Values] : Entries) {
+ Object Obj;
+ insertNonEmptyValues(Obj, TBDKey::Targets, std::move(Targets));
+ Obj[Keys[Key]] = Values;
+ Container.emplace_back(std::move(Obj));
+ }
+ return Container;
+}
+
+template <typename ValueT = std::string,
+ typename AggregateT = std::vector<std::pair<MachO::Target, ValueT>>>
+Array serializeField(TBDKey Key, const AggregateT &Values,
+ const TargetList &ActiveTargets, bool IsArray = true) {
+ std::map<ValueT, std::set<MachO::Target>> Entries;
+ for (const auto &[Target, Val] : Values)
+ Entries[Val].insert(Target);
+
+ if (!IsArray) {
+ std::map<std::vector<std::string>, std::string> FinalEntries;
+ for (const auto &[Val, Targets] : Entries)
+ FinalEntries[serializeTargets(Targets, ActiveTargets)] = Val;
+ return serializeAttrToTargets(FinalEntries, Key);
+ }
+
+ TargetsToValuesMap FinalEntries;
+ for (const auto &[Val, Targets] : Entries)
+ FinalEntries[serializeTargets(Targets, ActiveTargets)].emplace_back(Val);
+ return serializeAttrToTargets(FinalEntries, Key);
+}
+
+Array serializeField(TBDKey Key, const std::vector<InterfaceFileRef> &Values,
+ const TargetList &ActiveTargets) {
+ TargetsToValuesMap FinalEntries;
+ for (const auto &Ref : Values) {
+ TargetList Targets{Ref.targets().begin(), Ref.targets().end()};
+ FinalEntries[serializeTargets(Targets, ActiveTargets)].emplace_back(
+ Ref.getInstallName());
+ }
+ return serializeAttrToTargets(FinalEntries, Key);
+}
+
+struct SymbolFields {
+ struct SymbolTypes {
+ std::vector<StringRef> Weaks;
+ std::vector<StringRef> Globals;
+ std::vector<StringRef> TLV;
+ std::vector<StringRef> ObjCClasses;
+ std::vector<StringRef> IVars;
+ std::vector<StringRef> EHTypes;
+
+ bool empty() const {
+ return Weaks.empty() && Globals.empty() && TLV.empty() &&
+ ObjCClasses.empty() && IVars.empty() && EHTypes.empty();
+ }
+ };
+ SymbolTypes Data;
+ SymbolTypes Text;
+};
+
+Array serializeSymbols(InterfaceFile::const_filtered_symbol_range Symbols,
+ const TargetList &ActiveTargets) {
+ auto AssignForSymbolType = [](SymbolFields::SymbolTypes &Assignment,
+ const Symbol *Sym) {
+ switch (Sym->getKind()) {
+ case SymbolKind::ObjectiveCClass:
+ Assignment.ObjCClasses.emplace_back(Sym->getName());
+ return;
+ case SymbolKind::ObjectiveCClassEHType:
+ Assignment.EHTypes.emplace_back(Sym->getName());
+ return;
+ case SymbolKind::ObjectiveCInstanceVariable:
+ Assignment.IVars.emplace_back(Sym->getName());
+ return;
+ case SymbolKind::GlobalSymbol: {
+ if (Sym->isWeakReferenced() || Sym->isWeakDefined())
+ Assignment.Weaks.emplace_back(Sym->getName());
+ else if (Sym->isThreadLocalValue())
+ Assignment.TLV.emplace_back(Sym->getName());
+ else
+ Assignment.Globals.emplace_back(Sym->getName());
+ return;
+ }
+ }
+ };
+
+ std::map<std::vector<std::string>, SymbolFields> Entries;
+ for (const auto *Sym : Symbols) {
+ std::set<MachO::Target> Targets{Sym->targets().begin(),
+ Sym->targets().end()};
+ auto JSONTargets = serializeTargets(Targets, ActiveTargets);
+ if (Sym->isData())
+ AssignForSymbolType(Entries[std::move(JSONTargets)].Data, Sym);
+ else if (Sym->isText())
+ AssignForSymbolType(Entries[std::move(JSONTargets)].Text, Sym);
+ else
+ llvm_unreachable("unexpected symbol type");
+ }
+
+ auto InsertSymbolsToJSON = [](Object &SymSection, TBDKey SegmentKey,
+ SymbolFields::SymbolTypes &SymField) {
+ if (SymField.empty())
+ return;
+ Object Segment;
+ insertNonEmptyValues(Segment, TBDKey::Globals, std::move(SymField.Globals));
+ insertNonEmptyValues(Segment, TBDKey::ThreadLocal, std::move(SymField.TLV));
+ insertNonEmptyValues(Segment, TBDKey::Weak, std::move(SymField.Weaks));
+ insertNonEmptyValues(Segment, TBDKey::ObjCClass,
+ std::move(SymField.ObjCClasses));
+ insertNonEmptyValues(Segment, TBDKey::ObjCEHType,
+ std::move(SymField.EHTypes));
+ insertNonEmptyValues(Segment, TBDKey::ObjCIvar, std::move(SymField.IVars));
+ insertNonEmptyValues(SymSection, SegmentKey, std::move(Segment));
+ };
+
+ Array SymbolSection;
+ for (auto &[Targets, Fields] : Entries) {
+ Object AllSyms;
+ insertNonEmptyValues(AllSyms, TBDKey::Targets, std::move(Targets));
+ InsertSymbolsToJSON(AllSyms, TBDKey::Data, Fields.Data);
+ InsertSymbolsToJSON(AllSyms, TBDKey::Text, Fields.Text);
+ SymbolSection.emplace_back(std::move(AllSyms));
+ }
+
+ return SymbolSection;
+}
+
+Array serializeFlags(const InterfaceFile *File) {
+ // TODO: Give all Targets the same flags for now.
+ Array Flags;
+ if (!File->isTwoLevelNamespace())
+ Flags.emplace_back("flat_namespace");
+ if (!File->isApplicationExtensionSafe())
+ Flags.emplace_back("not_app_extension_safe");
+ return serializeScalar(TBDKey::Attributes, std::move(Flags));
+}
+
+Expected<Object> serializeIF(const InterfaceFile *File) {
+ Object Library;
+
+ // Handle required keys.
+ TargetList ActiveTargets{File->targets().begin(), File->targets().end()};
+ if (!insertNonEmptyValues(Library, TBDKey::TargetInfo,
+ serializeTargetInfo(ActiveTargets)))
+ return make_error<JSONStubError>(getSerializeErrorMsg(TBDKey::TargetInfo));
+
+ Array Name = serializeScalar<StringRef>(TBDKey::Name, File->getInstallName());
+ if (!insertNonEmptyValues(Library, TBDKey::InstallName, std::move(Name)))
+ return make_error<JSONStubError>(getSerializeErrorMsg(TBDKey::InstallName));
+
+ // Handle optional keys.
+ Array Flags = serializeFlags(File);
+ insertNonEmptyValues(Library, TBDKey::Flags, std::move(Flags));
+
+ Array CurrentV = serializeScalar<PackedVersion, std::string>(
+ TBDKey::Version, File->getCurrentVersion(), PackedVersion(1, 0, 0));
+ insertNonEmptyValues(Library, TBDKey::CurrentVersion, std::move(CurrentV));
+
+ Array CompatV = serializeScalar<PackedVersion, std::string>(
+ TBDKey::Version, File->getCompatibilityVersion(), PackedVersion(1, 0, 0));
+ insertNonEmptyValues(Library, TBDKey::CompatibilityVersion,
+ std::move(CompatV));
+
+ Array SwiftABI = serializeScalar<uint8_t, int64_t>(
+ TBDKey::ABI, File->getSwiftABIVersion(), 0u);
+ insertNonEmptyValues(Library, TBDKey::SwiftABI, std::move(SwiftABI));
+
+ Array RPaths = serializeField(TBDKey::Paths, File->rpaths(), ActiveTargets);
+ insertNonEmptyValues(Library, TBDKey::RPath, std::move(RPaths));
+
+ Array Umbrellas = serializeField(TBDKey::Umbrella, File->umbrellas(),
+ ActiveTargets, /*IsArray=*/false);
+ insertNonEmptyValues(Library, TBDKey::ParentUmbrella, std::move(Umbrellas));
+
+ Array Clients =
+ serializeField(TBDKey::Clients, File->allowableClients(), ActiveTargets);
+ insertNonEmptyValues(Library, TBDKey::AllowableClients, std::move(Clients));
+
+ Array ReexportLibs =
+ serializeField(TBDKey::Names, File->reexportedLibraries(), ActiveTargets);
+ insertNonEmptyValues(Library, TBDKey::ReexportLibs, std::move(ReexportLibs));
+
+ // Handle symbols.
+ Array Exports = serializeSymbols(File->exports(), ActiveTargets);
+ insertNonEmptyValues(Library, TBDKey::Exports, std::move(Exports));
+
+ Array Reexports = serializeSymbols(File->reexports(), ActiveTargets);
+ insertNonEmptyValues(Library, TBDKey::Reexports, std::move(Reexports));
+
+ if (!File->isTwoLevelNamespace()) {
+ Array Undefineds = serializeSymbols(File->undefineds(), ActiveTargets);
+ insertNonEmptyValues(Library, TBDKey::Undefineds, std::move(Undefineds));
+ }
+
+ return std::move(Library);
+}
+
+Expected<Object> getJSON(const InterfaceFile *File) {
+ assert(File->getFileType() == FileType::TBD_V5 &&
+ "unexpected json file format version");
+ Object Root;
+
+ auto MainLibOrErr = serializeIF(File);
+ if (!MainLibOrErr)
+ return MainLibOrErr;
+ Root[Keys[TBDKey::MainLibrary]] = std::move(*MainLibOrErr);
+ Array Documents;
+ for (const auto &Doc : File->documents()) {
+ auto LibOrErr = serializeIF(Doc.get());
+ if (!LibOrErr)
+ return LibOrErr;
+ Documents.emplace_back(std::move(*LibOrErr));
+ }
+
+ Root[Keys[TBDKey::TBDVersion]] = 5;
+ insertNonEmptyValues(Root, TBDKey::Documents, std::move(Documents));
+ return std::move(Root);
+}
+
+} // namespace
+
+Error MachO::serializeInterfaceFileToJSON(raw_ostream &OS,
+ const InterfaceFile &File,
+ bool Compact) {
+ auto TextFile = getJSON(&File);
+ if (!TextFile)
+ return TextFile.takeError();
+ if (Compact)
+ OS << formatv("{0}", Value(std::move(*TextFile))) << "\n";
+ else
+ OS << formatv("{0:2}", Value(std::move(*TextFile))) << "\n";
+ return Error::success();
+}
diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
index d4b9c4b5f92b..39bb8dd8ec85 100644
--- a/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
+++ b/llvm/lib/ToolDrivers/llvm-dlltool/DlltoolDriver.cpp
@@ -18,8 +18,8 @@
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/Path.h"
+#include "llvm/TargetParser/Host.h"
#include <optional>
#include <vector>
@@ -165,8 +165,9 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
return 1;
}
- Expected<COFFModuleDefinition> Def =
- parseCOFFModuleDefinition(*MB, Machine, true);
+ bool AddUnderscores = !Args.hasArg(OPT_no_leading_underscore);
+ Expected<COFFModuleDefinition> Def = parseCOFFModuleDefinition(
+ *MB, Machine, /*MingwDef=*/true, AddUnderscores);
if (!Def) {
llvm::errs() << "error parsing definition\n"
@@ -197,7 +198,7 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
}
}
- if (Machine == IMAGE_FILE_MACHINE_I386 && Args.getLastArg(OPT_k)) {
+ if (Machine == IMAGE_FILE_MACHINE_I386 && Args.hasArg(OPT_k)) {
for (COFFShortExport& E : Def->Exports) {
if (!E.AliasTarget.empty() || (!E.Name.empty() && E.Name[0] == '?'))
continue;
@@ -214,8 +215,8 @@ int llvm::dlltoolDriverMain(llvm::ArrayRef<const char *> ArgsArr) {
}
}
- if (!Path.empty() &&
- writeImportLibrary(Def->OutputFile, Path, Def->Exports, Machine, true))
+ if (!Path.empty() && writeImportLibrary(Def->OutputFile, Path, Def->Exports,
+ Machine, /*MinGW=*/true))
return 1;
return 0;
}
diff --git a/llvm/lib/ToolDrivers/llvm-dlltool/Options.td b/llvm/lib/ToolDrivers/llvm-dlltool/Options.td
index e78182ab8130..fee408fd0e9a 100644
--- a/llvm/lib/ToolDrivers/llvm-dlltool/Options.td
+++ b/llvm/lib/ToolDrivers/llvm-dlltool/Options.td
@@ -15,6 +15,9 @@ def d_long : JoinedOrSeparate<["--"], "input-def">, Alias<d>;
def k: Flag<["-"], "k">, HelpText<"Kill @n Symbol from export">;
def k_alias: Flag<["--"], "kill-at">, Alias<k>;
+def no_leading_underscore: Flag<["--"], "no-leading-underscore">,
+ HelpText<"Don't add leading underscores on symbols">;
+
//==============================================================================
// The flags below do nothing. They are defined only for dlltool compatibility.
//==============================================================================
@@ -24,3 +27,6 @@ def S_alias: JoinedOrSeparate<["--"], "as">, Alias<S>;
def f: JoinedOrSeparate<["-"], "f">, HelpText<"Assembler Flags">;
def f_alias: JoinedOrSeparate<["--"], "as-flags">, Alias<f>;
+
+def t: JoinedOrSeparate<["-"], "t">, HelpText<"Prefix for temporary files (ignored)">;
+def t_alias: JoinedOrSeparate<["--"], "temp-prefix">, Alias<t>;
diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
index ade753ad8918..747e4c5928ea 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
+++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp
@@ -19,6 +19,7 @@
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/Object/ArchiveWriter.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Object/COFFModuleDefinition.h"
#include "llvm/Object/WindowsMachineFlag.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
@@ -31,6 +32,7 @@
#include <optional>
using namespace llvm;
+using namespace llvm::object;
namespace {
@@ -60,7 +62,7 @@ class LibOptTable : public opt::GenericOptTable {
public:
LibOptTable() : opt::GenericOptTable(InfoTable, true) {}
};
-}
+} // namespace
static std::string getDefaultOutputPath(const NewArchiveMember &FirstMember) {
SmallString<128> Val = StringRef(FirstMember.Buf->getBufferIdentifier());
@@ -91,6 +93,18 @@ static std::vector<StringRef> getSearchPaths(opt::InputArgList *Args,
return Ret;
}
+// Opens a file. Path has to be resolved already. (used for def file)
+std::unique_ptr<MemoryBuffer> openFile(const Twine &Path) {
+ ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MB = MemoryBuffer::getFile(Path);
+
+ if (std::error_code EC = MB.getError()) {
+ llvm::errs() << "cannot open file " << Path << ": " << EC.message() << "\n";
+ return nullptr;
+ }
+
+ return std::move(*MB);
+}
+
static std::string findInputFile(StringRef File, ArrayRef<StringRef> Paths) {
for (StringRef Dir : Paths) {
SmallString<128> Path = Dir;
@@ -110,7 +124,7 @@ static void fatalOpenError(llvm::Error E, Twine File) {
});
}
-static void doList(opt::InputArgList& Args) {
+static void doList(opt::InputArgList &Args) {
// lib.exe prints the contents of the first archive file.
std::unique_ptr<MemoryBuffer> B;
for (auto *Arg : Args.filtered(OPT_INPUT)) {
@@ -133,12 +147,14 @@ static void doList(opt::InputArgList& Args) {
object::Archive Archive(B.get()->getMemBufferRef(), Err);
fatalOpenError(std::move(Err), B->getBufferIdentifier());
+ std::vector<StringRef> Names;
for (auto &C : Archive.children(Err)) {
Expected<StringRef> NameOrErr = C.getName();
fatalOpenError(NameOrErr.takeError(), B->getBufferIdentifier());
- StringRef Name = NameOrErr.get();
- llvm::outs() << Name << '\n';
+ Names.push_back(NameOrErr.get());
}
+ for (auto Name : reverse(Names))
+ llvm::outs() << Name << '\n';
fatalOpenError(std::move(Err), B->getBufferIdentifier());
}
@@ -151,8 +167,7 @@ static Expected<COFF::MachineTypes> getCOFFFileMachine(MemoryBufferRef MB) {
uint16_t Machine = (*Obj)->getMachine();
if (Machine != COFF::IMAGE_FILE_MACHINE_I386 &&
Machine != COFF::IMAGE_FILE_MACHINE_AMD64 &&
- Machine != COFF::IMAGE_FILE_MACHINE_ARMNT &&
- Machine != COFF::IMAGE_FILE_MACHINE_ARM64) {
+ Machine != COFF::IMAGE_FILE_MACHINE_ARMNT && !COFF::isAnyArm64(Machine)) {
return createStringError(inconvertibleErrorCode(),
"unknown machine: " + std::to_string(Machine));
}
@@ -165,7 +180,8 @@ static Expected<COFF::MachineTypes> getBitcodeFileMachine(MemoryBufferRef MB) {
if (!TripleStr)
return TripleStr.takeError();
- switch (Triple(*TripleStr).getArch()) {
+ Triple T(*TripleStr);
+ switch (T.getArch()) {
case Triple::x86:
return COFF::IMAGE_FILE_MACHINE_I386;
case Triple::x86_64:
@@ -173,13 +189,32 @@ static Expected<COFF::MachineTypes> getBitcodeFileMachine(MemoryBufferRef MB) {
case Triple::arm:
return COFF::IMAGE_FILE_MACHINE_ARMNT;
case Triple::aarch64:
- return COFF::IMAGE_FILE_MACHINE_ARM64;
+ return T.isWindowsArm64EC() ? COFF::IMAGE_FILE_MACHINE_ARM64EC
+ : COFF::IMAGE_FILE_MACHINE_ARM64;
default:
return createStringError(inconvertibleErrorCode(),
"unknown arch in target triple: " + *TripleStr);
}
}
+static bool machineMatches(COFF::MachineTypes LibMachine,
+ COFF::MachineTypes FileMachine) {
+ if (LibMachine == FileMachine)
+ return true;
+ // ARM64EC mode allows both pure ARM64, ARM64EC and X64 objects to be mixed in
+ // the archive.
+ switch (LibMachine) {
+ case COFF::IMAGE_FILE_MACHINE_ARM64:
+ return FileMachine == COFF::IMAGE_FILE_MACHINE_ARM64X;
+ case COFF::IMAGE_FILE_MACHINE_ARM64EC:
+ case COFF::IMAGE_FILE_MACHINE_ARM64X:
+ return COFF::isAnyArm64(FileMachine) ||
+ FileMachine == COFF::IMAGE_FILE_MACHINE_AMD64;
+ default:
+ return false;
+ }
+}
+
static void appendFile(std::vector<NewArchiveMember> &Members,
COFF::MachineTypes &LibMachine,
std::string &LibMachineSource, MemoryBufferRef MB) {
@@ -247,11 +282,18 @@ static void appendFile(std::vector<NewArchiveMember> &Members,
// this check. See PR42180.
if (FileMachine != COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
+ if (FileMachine == COFF::IMAGE_FILE_MACHINE_ARM64EC) {
+ llvm::errs() << MB.getBufferIdentifier() << ": file machine type "
+ << machineToStr(FileMachine)
+ << " conflicts with inferred library machine type,"
+ << " use /machine:arm64ec or /machine:arm64x\n";
+ exit(1);
+ }
LibMachine = FileMachine;
LibMachineSource =
(" (inferred from earlier file '" + MB.getBufferIdentifier() + "')")
.str();
- } else if (LibMachine != FileMachine) {
+ } else if (!machineMatches(LibMachine, FileMachine)) {
llvm::errs() << MB.getBufferIdentifier() << ": file machine type "
<< machineToStr(FileMachine)
<< " conflicts with library machine type "
@@ -300,6 +342,63 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
for (auto *Arg : Args.filtered(OPT_ignore))
IgnoredWarnings.insert(Arg->getValue());
+ // get output library path, if any
+ std::string OutputPath;
+ if (auto *Arg = Args.getLastArg(OPT_out)) {
+ OutputPath = Arg->getValue();
+ }
+
+ COFF::MachineTypes LibMachine = COFF::IMAGE_FILE_MACHINE_UNKNOWN;
+ std::string LibMachineSource;
+ if (auto *Arg = Args.getLastArg(OPT_machine)) {
+ LibMachine = getMachineType(Arg->getValue());
+ if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
+ llvm::errs() << "unknown /machine: arg " << Arg->getValue() << '\n';
+ return 1;
+ }
+ LibMachineSource =
+ std::string(" (from '/machine:") + Arg->getValue() + "' flag)";
+ }
+
+ // create an import library
+ if (Args.hasArg(OPT_deffile)) {
+
+ if (OutputPath.empty()) {
+ llvm::errs() << "no output path given\n";
+ return 1;
+ }
+
+ if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
+ llvm::errs() << "/def option requires /machine to be specified" << '\n';
+ return 1;
+ }
+
+ std::unique_ptr<MemoryBuffer> MB =
+ openFile(Args.getLastArg(OPT_deffile)->getValue());
+ if (!MB)
+ return 1;
+
+ if (!MB->getBufferSize()) {
+ llvm::errs() << "definition file empty\n";
+ return 1;
+ }
+
+ Expected<COFFModuleDefinition> Def =
+ parseCOFFModuleDefinition(*MB, LibMachine, /*MingwDef=*/false);
+
+ if (!Def) {
+ llvm::errs() << "error parsing definition\n"
+ << errorToErrorCode(Def.takeError()).message();
+ return 1;
+ }
+
+ return writeImportLibrary(Def->OutputFile, OutputPath, Def->Exports,
+ LibMachine,
+ /*MinGW=*/false)
+ ? 1
+ : 0;
+ }
+
// If no input files and not told otherwise, silently do nothing to match
// lib.exe
if (!Args.hasArgNoClaim(OPT_INPUT) && !Args.hasArg(OPT_llvmlibempty)) {
@@ -322,18 +421,6 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
std::vector<StringRef> SearchPaths = getSearchPaths(&Args, Saver);
- COFF::MachineTypes LibMachine = COFF::IMAGE_FILE_MACHINE_UNKNOWN;
- std::string LibMachineSource;
- if (auto *Arg = Args.getLastArg(OPT_machine)) {
- LibMachine = getMachineType(Arg->getValue());
- if (LibMachine == COFF::IMAGE_FILE_MACHINE_UNKNOWN) {
- llvm::errs() << "unknown /machine: arg " << Arg->getValue() << '\n';
- return 1;
- }
- LibMachineSource =
- std::string(" (from '/machine:") + Arg->getValue() + "' flag)";
- }
-
std::vector<std::unique_ptr<MemoryBuffer>> MBs;
StringSet<> Seen;
std::vector<NewArchiveMember> Members;
@@ -371,14 +458,13 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
}
// Create an archive file.
- std::string OutputPath;
- if (auto *Arg = Args.getLastArg(OPT_out)) {
- OutputPath = Arg->getValue();
- } else if (!Members.empty()) {
- OutputPath = getDefaultOutputPath(Members[0]);
- } else {
- llvm::errs() << "no output path given, and cannot infer with no inputs\n";
- return 1;
+ if (OutputPath.empty()) {
+ if (!Members.empty()) {
+ OutputPath = getDefaultOutputPath(Members[0]);
+ } else {
+ llvm::errs() << "no output path given, and cannot infer with no inputs\n";
+ return 1;
+ }
}
// llvm-lib uses relative paths for both regular and thin archives, unlike
// standard GNU ar, which only uses relative paths for thin archives and
@@ -392,10 +478,16 @@ int llvm::libDriverMain(ArrayRef<const char *> ArgsArr) {
}
}
+ // For compatibility with MSVC, reverse member vector after de-duplication.
+ std::reverse(Members.begin(), Members.end());
+
+ bool Thin = Args.hasArg(OPT_llvmlibthin);
if (Error E =
writeArchive(OutputPath, Members,
- /*WriteSymtab=*/true, object::Archive::K_GNU,
- /*Deterministic*/ true, Args.hasArg(OPT_llvmlibthin))) {
+ /*WriteSymtab=*/true,
+ Thin ? object::Archive::K_GNU : object::Archive::K_COFF,
+ /*Deterministic*/ true, Thin, nullptr,
+ COFF::isArm64EC(LibMachine))) {
handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
llvm::errs() << OutputPath << ": " << EI.message() << "\n";
});
diff --git a/llvm/lib/ToolDrivers/llvm-lib/Options.td b/llvm/lib/ToolDrivers/llvm-lib/Options.td
index 4af250e8ad73..22ac1fb842e4 100644
--- a/llvm/lib/ToolDrivers/llvm-lib/Options.td
+++ b/llvm/lib/ToolDrivers/llvm-lib/Options.td
@@ -22,6 +22,7 @@ def libpath: P<"libpath", "Object file search path">;
// Can't be called "list" since that's a keyword.
def lst : F<"list">, HelpText<"List contents of .lib file on stdout">;
def out : P<"out", "Path to file to write output">;
+def deffile : P<"def", "def file to use to generate import library">;
def llvmlibthin : F<"llvmlibthin">,
HelpText<"Make .lib point to .obj files instead of copying their contents">;
diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index 473b41241b8a..34c8a380448e 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -18,6 +18,8 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -27,6 +29,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -64,7 +67,6 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
// shift amount.
auto matchFunnelShift = [](Value *V, Value *&ShVal0, Value *&ShVal1,
Value *&ShAmt) {
- Value *SubAmt;
unsigned Width = V->getType()->getScalarSizeInBits();
// fshl(ShVal0, ShVal1, ShAmt)
@@ -72,8 +74,7 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
if (match(V, m_OneUse(m_c_Or(
m_Shl(m_Value(ShVal0), m_Value(ShAmt)),
m_LShr(m_Value(ShVal1),
- m_Sub(m_SpecificInt(Width), m_Value(SubAmt))))))) {
- if (ShAmt == SubAmt) // TODO: Use m_Specific
+ m_Sub(m_SpecificInt(Width), m_Deferred(ShAmt))))))) {
return Intrinsic::fshl;
}
@@ -81,9 +82,8 @@ static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT) {
// == (ShVal0 >> ShAmt) | (ShVal1 << (Width - ShAmt))
if (match(V,
m_OneUse(m_c_Or(m_Shl(m_Value(ShVal0), m_Sub(m_SpecificInt(Width),
- m_Value(SubAmt))),
- m_LShr(m_Value(ShVal1), m_Value(ShAmt)))))) {
- if (ShAmt == SubAmt) // TODO: Use m_Specific
+ m_Value(ShAmt))),
+ m_LShr(m_Value(ShVal1), m_Deferred(ShAmt)))))) {
return Intrinsic::fshr;
}
@@ -305,7 +305,7 @@ static bool tryToRecognizePopCount(Instruction &I) {
Value *MulOp0;
// Matching "(i * 0x01010101...) >> 24".
if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) &&
- match(Op1, m_SpecificInt(MaskShift))) {
+ match(Op1, m_SpecificInt(MaskShift))) {
Value *ShiftOp0;
// Matching "((i + (i >> 4)) & 0x0F0F0F0F...)".
if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)),
@@ -398,51 +398,6 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
return true;
}
-/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
-/// pessimistic codegen that has to account for setting errno and can enable
-/// vectorization.
-static bool
-foldSqrt(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI) {
- // Match a call to sqrt mathlib function.
- auto *Call = dyn_cast<CallInst>(&I);
- if (!Call)
- return false;
-
- Module *M = Call->getModule();
- LibFunc Func;
- if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func))
- return false;
-
- if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
- return false;
-
- // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
- // (because NNAN or the operand arg must not be less than -0.0) and (2) we
- // would not end up lowering to a libcall anyway (which could change the value
- // of errno), then:
- // (1) errno won't be set.
- // (2) it is safe to convert this to an intrinsic call.
- Type *Ty = Call->getType();
- Value *Arg = Call->getArgOperand(0);
- if (TTI.haveFastSqrt(Ty) &&
- (Call->hasNoNaNs() || CannotBeOrderedLessThanZero(Arg, &TLI))) {
- IRBuilder<> Builder(&I);
- IRBuilderBase::FastMathFlagGuard Guard(Builder);
- Builder.setFastMathFlags(Call->getFastMathFlags());
-
- Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
- Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
- I.replaceAllUsesWith(NewSqrt);
-
- // Explicitly erase the old call because a call with side effects is not
- // trivially dead.
- I.eraseFromParent();
- return true;
- }
-
- return false;
-}
-
// Check if this array of constants represents a cttz table.
// Iterate over the elements from \p Table by trying to find/match all
// the numbers from 0 to \p InputBits that should represent cttz results.
@@ -613,7 +568,7 @@ struct LoadOps {
LoadInst *RootInsert = nullptr;
bool FoundRoot = false;
uint64_t LoadSize = 0;
- Value *Shift = nullptr;
+ const APInt *Shift = nullptr;
Type *ZextType;
AAMDNodes AATags;
};
@@ -623,7 +578,7 @@ struct LoadOps {
// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
AliasAnalysis &AA) {
- Value *ShAmt2 = nullptr;
+ const APInt *ShAmt2 = nullptr;
Value *X;
Instruction *L1, *L2;
@@ -631,7 +586,7 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
if (match(V, m_OneUse(m_c_Or(
m_Value(X),
m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))),
- m_Value(ShAmt2)))))) ||
+ m_APInt(ShAmt2)))))) ||
match(V, m_OneUse(m_Or(m_Value(X),
m_OneUse(m_ZExt(m_OneUse(m_Instruction(L2)))))))) {
if (!foldLoadsRecursive(X, LOps, DL, AA) && LOps.FoundRoot)
@@ -642,11 +597,11 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
// Check if the pattern has loads
LoadInst *LI1 = LOps.Root;
- Value *ShAmt1 = LOps.Shift;
+ const APInt *ShAmt1 = LOps.Shift;
if (LOps.FoundRoot == false &&
(match(X, m_OneUse(m_ZExt(m_Instruction(L1)))) ||
match(X, m_OneUse(m_Shl(m_OneUse(m_ZExt(m_OneUse(m_Instruction(L1)))),
- m_Value(ShAmt1)))))) {
+ m_APInt(ShAmt1)))))) {
LI1 = dyn_cast<LoadInst>(L1);
}
LoadInst *LI2 = dyn_cast<LoadInst>(L2);
@@ -721,12 +676,11 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
std::swap(ShAmt1, ShAmt2);
// Find Shifts values.
- const APInt *Temp;
uint64_t Shift1 = 0, Shift2 = 0;
- if (ShAmt1 && match(ShAmt1, m_APInt(Temp)))
- Shift1 = Temp->getZExtValue();
- if (ShAmt2 && match(ShAmt2, m_APInt(Temp)))
- Shift2 = Temp->getZExtValue();
+ if (ShAmt1)
+ Shift1 = ShAmt1->getZExtValue();
+ if (ShAmt2)
+ Shift2 = ShAmt2->getZExtValue();
// First load is always LI1. This is where we put the new load.
// Use the merged load size available from LI1 for forward loads.
@@ -768,7 +722,8 @@ static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
// pattern which suggests that the loads can be combined. The one and only use
// of the loads is to form a wider load.
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
- TargetTransformInfo &TTI, AliasAnalysis &AA) {
+ TargetTransformInfo &TTI, AliasAnalysis &AA,
+ const DominatorTree &DT) {
// Only consider load chains of scalar values.
if (isa<VectorType>(I.getType()))
return false;
@@ -793,17 +748,18 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
if (!Allowed || !Fast)
return false;
- // Make sure the Load pointer of type GEP/non-GEP is above insert point
- Instruction *Inst = dyn_cast<Instruction>(LI1->getPointerOperand());
- if (Inst && Inst->getParent() == LI1->getParent() &&
- !Inst->comesBefore(LOps.RootInsert))
- Inst->moveBefore(LOps.RootInsert);
-
- // New load can be generated
+ // Get the Index and Ptr for the new GEP.
Value *Load1Ptr = LI1->getPointerOperand();
Builder.SetInsertPoint(LOps.RootInsert);
- Value *NewPtr = Builder.CreateBitCast(Load1Ptr, WiderType->getPointerTo(AS));
- NewLoad = Builder.CreateAlignedLoad(WiderType, NewPtr, LI1->getAlign(),
+ if (!DT.dominates(Load1Ptr, LOps.RootInsert)) {
+ APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
+ Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets(
+ DL, Offset1, /* AllowNonInbounds */ true);
+ Load1Ptr = Builder.CreateGEP(Builder.getInt8Ty(), Load1Ptr,
+ Builder.getInt32(Offset1.getZExtValue()));
+ }
+ // Generate wider load.
+ NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(),
LI1->isVolatile(), "");
NewLoad->takeName(LI1);
// Set the New Load AATags Metadata.
@@ -818,18 +774,254 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
// Check if shift needed. We need to shift with the amount of load1
// shift if not zero.
if (LOps.Shift)
- NewOp = Builder.CreateShl(NewOp, LOps.Shift);
+ NewOp = Builder.CreateShl(NewOp, ConstantInt::get(I.getContext(), *LOps.Shift));
I.replaceAllUsesWith(NewOp);
return true;
}
+// Calculate GEP Stride and accumulated const ModOffset. Return Stride and
+// ModOffset
+static std::pair<APInt, APInt>
+getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL) {
+ unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
+ std::optional<APInt> Stride;
+ APInt ModOffset(BW, 0);
+ // Return a minimum gep stride, greatest common divisor of consective gep
+ // index scales(c.f. Bézout's identity).
+ while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) {
+ MapVector<Value *, APInt> VarOffsets;
+ if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset))
+ break;
+
+ for (auto [V, Scale] : VarOffsets) {
+ // Only keep a power of two factor for non-inbounds
+ if (!GEP->isInBounds())
+ Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
+
+ if (!Stride)
+ Stride = Scale;
+ else
+ Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale);
+ }
+
+ PtrOp = GEP->getPointerOperand();
+ }
+
+ // Check whether pointer arrives back at Global Variable via at least one GEP.
+ // Even if it doesn't, we can check by alignment.
+ if (!isa<GlobalVariable>(PtrOp) || !Stride)
+ return {APInt(BW, 1), APInt(BW, 0)};
+
+ // In consideration of signed GEP indices, non-negligible offset become
+ // remainder of division by minimum GEP stride.
+ ModOffset = ModOffset.srem(*Stride);
+ if (ModOffset.isNegative())
+ ModOffset += *Stride;
+
+ return {*Stride, ModOffset};
+}
+
+/// If C is a constant patterned array and all valid loaded results for given
+/// alignment are same to a constant, return that constant.
+static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
+ auto *LI = dyn_cast<LoadInst>(&I);
+ if (!LI || LI->isVolatile())
+ return false;
+
+ // We can only fold the load if it is from a constant global with definitive
+ // initializer. Skip expensive logic if this is not the case.
+ auto *PtrOp = LI->getPointerOperand();
+ auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(PtrOp));
+ if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ return false;
+
+ // Bail for large initializers in excess of 4K to avoid too many scans.
+ Constant *C = GV->getInitializer();
+ uint64_t GVSize = DL.getTypeAllocSize(C->getType());
+ if (!GVSize || 4096 < GVSize)
+ return false;
+
+ Type *LoadTy = LI->getType();
+ unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
+ auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL);
+
+ // Any possible offset could be multiple of GEP stride. And any valid
+ // offset is multiple of load alignment, so checking only multiples of bigger
+ // one is sufficient to say results' equality.
+ if (auto LA = LI->getAlign();
+ LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {
+ ConstOffset = APInt(BW, 0);
+ Stride = APInt(BW, LA.value());
+ }
+
+ Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);
+ if (!Ca)
+ return false;
+
+ unsigned E = GVSize - DL.getTypeStoreSize(LoadTy);
+ for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride)
+ if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL))
+ return false;
+
+ I.replaceAllUsesWith(Ca);
+
+ return true;
+}
+
+/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
+/// pessimistic codegen that has to account for setting errno and can enable
+/// vectorization.
+static bool foldSqrt(CallInst *Call, TargetTransformInfo &TTI,
+ TargetLibraryInfo &TLI, AssumptionCache &AC,
+ DominatorTree &DT) {
+ Module *M = Call->getModule();
+
+ // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
+ // (because NNAN or the operand arg must not be less than -0.0) and (2) we
+ // would not end up lowering to a libcall anyway (which could change the value
+ // of errno), then:
+ // (1) errno won't be set.
+ // (2) it is safe to convert this to an intrinsic call.
+ Type *Ty = Call->getType();
+ Value *Arg = Call->getArgOperand(0);
+ if (TTI.haveFastSqrt(Ty) &&
+ (Call->hasNoNaNs() ||
+ cannotBeOrderedLessThanZero(Arg, M->getDataLayout(), &TLI, 0, &AC, Call,
+ &DT))) {
+ IRBuilder<> Builder(Call);
+ IRBuilderBase::FastMathFlagGuard Guard(Builder);
+ Builder.setFastMathFlags(Call->getFastMathFlags());
+
+ Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
+ Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
+ Call->replaceAllUsesWith(NewSqrt);
+
+ // Explicitly erase the old call because a call with side effects is not
+ // trivially dead.
+ Call->eraseFromParent();
+ return true;
+ }
+
+ return false;
+}
+
+/// Try to expand strcmp(P, "x") calls.
+static bool expandStrcmp(CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) {
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
+
+ // Trivial cases are optimized during inst combine
+ if (Str1P == Str2P)
+ return false;
+
+ StringRef Str1, Str2;
+ bool HasStr1 = getConstantStringInfo(Str1P, Str1);
+ bool HasStr2 = getConstantStringInfo(Str2P, Str2);
+
+ Value *NonConstantP = nullptr;
+ StringRef ConstantStr;
+
+ if (!HasStr1 && HasStr2 && Str2.size() == 1) {
+ NonConstantP = Str1P;
+ ConstantStr = Str2;
+ } else if (!HasStr2 && HasStr1 && Str1.size() == 1) {
+ NonConstantP = Str2P;
+ ConstantStr = Str1;
+ } else {
+ return false;
+ }
+
+ // Check if strcmp result is only used in a comparison with zero
+ if (!isOnlyUsedInZeroComparison(CI))
+ return false;
+
+ // For strcmp(P, "x") do the following transformation:
+ //
+ // (before)
+ // dst = strcmp(P, "x")
+ //
+ // (after)
+ // v0 = P[0] - 'x'
+ // [if v0 == 0]
+ // v1 = P[1]
+ // dst = phi(v0, v1)
+ //
+
+ IRBuilder<> B(CI->getParent());
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+
+ Type *RetType = CI->getType();
+
+ B.SetInsertPoint(CI);
+ BasicBlock *InitialBB = B.GetInsertBlock();
+ Value *Str1FirstCharacterValue =
+ B.CreateZExt(B.CreateLoad(B.getInt8Ty(), NonConstantP), RetType);
+ Value *Str2FirstCharacterValue =
+ ConstantInt::get(RetType, static_cast<unsigned char>(ConstantStr[0]));
+ Value *FirstCharacterSub =
+ B.CreateNSWSub(Str1FirstCharacterValue, Str2FirstCharacterValue);
+ Value *IsFirstCharacterSubZero =
+ B.CreateICmpEQ(FirstCharacterSub, ConstantInt::get(RetType, 0));
+ Instruction *IsFirstCharacterSubZeroBBTerminator = SplitBlockAndInsertIfThen(
+ IsFirstCharacterSubZero, CI, /*Unreachable*/ false,
+ /*BranchWeights*/ nullptr, &DTU);
+
+ B.SetInsertPoint(IsFirstCharacterSubZeroBBTerminator);
+ B.GetInsertBlock()->setName("strcmp_expand_sub_is_zero");
+ BasicBlock *IsFirstCharacterSubZeroBB = B.GetInsertBlock();
+ Value *Str1SecondCharacterValue = B.CreateZExt(
+ B.CreateLoad(B.getInt8Ty(), B.CreateConstInBoundsGEP1_64(
+ B.getInt8Ty(), NonConstantP, 1)),
+ RetType);
+
+ B.SetInsertPoint(CI);
+ B.GetInsertBlock()->setName("strcmp_expand_sub_join");
+
+ PHINode *Result = B.CreatePHI(RetType, 2);
+ Result->addIncoming(FirstCharacterSub, InitialBB);
+ Result->addIncoming(Str1SecondCharacterValue, IsFirstCharacterSubZeroBB);
+
+ CI->replaceAllUsesWith(Result);
+ CI->eraseFromParent();
+
+ MadeCFGChange = true;
+
+ return true;
+}
+
+static bool foldLibraryCalls(Instruction &I, TargetTransformInfo &TTI,
+ TargetLibraryInfo &TLI, DominatorTree &DT,
+ AssumptionCache &AC, bool &MadeCFGChange) {
+ CallInst *CI = dyn_cast<CallInst>(&I);
+ if (!CI)
+ return false;
+
+ LibFunc Func;
+ Module *M = I.getModule();
+ if (!TLI.getLibFunc(*CI, Func) || !isLibFuncEmittable(M, &TLI, Func))
+ return false;
+
+ switch (Func) {
+ case LibFunc_sqrt:
+ case LibFunc_sqrtf:
+ case LibFunc_sqrtl:
+ return foldSqrt(CI, TTI, TLI, AC, DT);
+ case LibFunc_strcmp:
+ return expandStrcmp(CI, DT, MadeCFGChange);
+ default:
+ break;
+ }
+
+ return false;
+}
+
/// This is the entry point for folds that could be implemented in regular
/// InstCombine, but they are separated because they are not expected to
/// occur frequently and/or have more than a constant-length pattern match.
static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
TargetTransformInfo &TTI,
- TargetLibraryInfo &TLI, AliasAnalysis &AA) {
+ TargetLibraryInfo &TLI, AliasAnalysis &AA,
+ AssumptionCache &AC, bool &MadeCFGChange) {
bool MadeChange = false;
for (BasicBlock &BB : F) {
// Ignore unreachable basic blocks.
@@ -849,11 +1041,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
MadeChange |= tryToRecognizePopCount(I);
MadeChange |= tryToFPToSat(I, TTI);
MadeChange |= tryToRecognizeTableBasedCttz(I);
- MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA);
+ MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
+ MadeChange |= foldPatternedLoads(I, DL);
// NOTE: This function introduces erasing of the instruction `I`, so it
// needs to be called at the end of this sequence, otherwise we may make
// bugs.
- MadeChange |= foldSqrt(I, TTI, TLI);
+ MadeChange |= foldLibraryCalls(I, TTI, TLI, DT, AC, MadeCFGChange);
}
}
@@ -869,12 +1062,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
/// handled in the callers of this function.
static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
TargetLibraryInfo &TLI, DominatorTree &DT,
- AliasAnalysis &AA) {
+ AliasAnalysis &AA, bool &ChangedCFG) {
bool MadeChange = false;
const DataLayout &DL = F.getParent()->getDataLayout();
TruncInstCombine TIC(AC, TLI, DL, DT);
MadeChange |= TIC.run(F);
- MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA);
+ MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, ChangedCFG);
return MadeChange;
}
@@ -885,12 +1078,21 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
- if (!runImpl(F, AC, TTI, TLI, DT, AA)) {
+
+ bool MadeCFGChange = false;
+
+ if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
}
+
// Mark all the analyses that instcombine updates as preserved.
PreservedAnalyses PA;
- PA.preserveSet<CFGAnalyses>();
+
+ if (MadeCFGChange)
+ PA.preserve<DominatorTreeAnalysis>();
+ else
+ PA.preserveSet<CFGAnalyses>();
+
return PA;
}
diff --git a/llvm/lib/Transforms/CFGuard/CFGuard.cpp b/llvm/lib/Transforms/CFGuard/CFGuard.cpp
index bebaa6cb5969..bf823ac55497 100644
--- a/llvm/lib/Transforms/CFGuard/CFGuard.cpp
+++ b/llvm/lib/Transforms/CFGuard/CFGuard.cpp
@@ -15,12 +15,12 @@
#include "llvm/Transforms/CFGuard.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
index 81b43a2ab2c2..29978bef661c 100644
--- a/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -127,10 +127,16 @@ PreservedAnalyses CoroCleanupPass::run(Module &M,
FunctionPassManager FPM;
FPM.addPass(SimplifyCFGPass());
+ PreservedAnalyses FuncPA;
+ FuncPA.preserveSet<CFGAnalyses>();
+
Lowerer L(M);
- for (auto &F : M)
- if (L.lower(F))
+ for (auto &F : M) {
+ if (L.lower(F)) {
+ FAM.invalidate(F, FuncPA);
FPM.run(F, FAM);
+ }
+ }
return PreservedAnalyses::none();
}
diff --git a/llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp b/llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp
index 974123fe36a1..3e71e58bb1de 100644
--- a/llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroConditionalWrapper.cpp
@@ -26,7 +26,7 @@ PreservedAnalyses CoroConditionalWrapper::run(Module &M,
void CoroConditionalWrapper::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
OS << "coro-cond";
- OS << "(";
+ OS << '(';
PM.printPipeline(OS, MapClassName2PassName);
- OS << ")";
+ OS << ')';
}
diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
index f032c568449b..d78ab1c1ea28 100644
--- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/Support/ErrorHandling.h"
@@ -46,7 +47,8 @@ struct Lowerer : coro::LowererBase {
AAResults &AA);
bool shouldElide(Function *F, DominatorTree &DT) const;
void collectPostSplitCoroIds(Function *F);
- bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT);
+ bool processCoroId(CoroIdInst *, AAResults &AA, DominatorTree &DT,
+ OptimizationRemarkEmitter &ORE);
bool hasEscapePath(const CoroBeginInst *,
const SmallPtrSetImpl<BasicBlock *> &) const;
};
@@ -299,7 +301,7 @@ void Lowerer::collectPostSplitCoroIds(Function *F) {
}
bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,
- DominatorTree &DT) {
+ DominatorTree &DT, OptimizationRemarkEmitter &ORE) {
CoroBegins.clear();
CoroAllocs.clear();
ResumeAddr.clear();
@@ -343,6 +345,24 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,
replaceWithConstant(ResumeAddrConstant, ResumeAddr);
bool ShouldElide = shouldElide(CoroId->getFunction(), DT);
+ if (!ShouldElide)
+ ORE.emit([&]() {
+ if (auto FrameSizeAndAlign =
+ getFrameLayout(cast<Function>(ResumeAddrConstant)))
+ return OptimizationRemarkMissed(DEBUG_TYPE, "CoroElide", CoroId)
+ << "'" << ore::NV("callee", CoroId->getCoroutine()->getName())
+ << "' not elided in '"
+ << ore::NV("caller", CoroId->getFunction()->getName())
+ << "' (frame_size="
+ << ore::NV("frame_size", FrameSizeAndAlign->first) << ", align="
+ << ore::NV("align", FrameSizeAndAlign->second.value()) << ")";
+ else
+ return OptimizationRemarkMissed(DEBUG_TYPE, "CoroElide", CoroId)
+ << "'" << ore::NV("callee", CoroId->getCoroutine()->getName())
+ << "' not elided in '"
+ << ore::NV("caller", CoroId->getFunction()->getName())
+ << "' (frame_size=unknown, align=unknown)";
+ });
auto *DestroyAddrConstant = Resumers->getAggregateElement(
ShouldElide ? CoroSubFnInst::CleanupIndex : CoroSubFnInst::DestroyIndex);
@@ -363,6 +383,23 @@ bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA,
<< "Elide " << CoroId->getCoroutine()->getName() << " in "
<< CoroId->getFunction()->getName() << "\n";
#endif
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "CoroElide", CoroId)
+ << "'" << ore::NV("callee", CoroId->getCoroutine()->getName())
+ << "' elided in '"
+ << ore::NV("caller", CoroId->getFunction()->getName())
+ << "' (frame_size="
+ << ore::NV("frame_size", FrameSizeAndAlign->first) << ", align="
+ << ore::NV("align", FrameSizeAndAlign->second.value()) << ")";
+ });
+ } else {
+ ORE.emit([&]() {
+ return OptimizationRemarkMissed(DEBUG_TYPE, "CoroElide", CoroId)
+ << "'" << ore::NV("callee", CoroId->getCoroutine()->getName())
+ << "' not elided in '"
+ << ore::NV("caller", CoroId->getFunction()->getName())
+ << "' (frame_size=unknown, align=unknown)";
+ });
}
}
@@ -387,10 +424,11 @@ PreservedAnalyses CoroElidePass::run(Function &F, FunctionAnalysisManager &AM) {
AAResults &AA = AM.getResult<AAManager>(F);
DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
bool Changed = false;
for (auto *CII : L.CoroIds)
- Changed |= L.processCoroId(CII, AA, DT);
+ Changed |= L.processCoroId(CII, AA, DT, ORE);
return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index e98c601648e0..1f373270f951 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -16,6 +16,7 @@
#include "CoroInternal.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/PtrUseVisitor.h"
@@ -37,6 +38,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
+#include <deque>
#include <optional>
using namespace llvm;
@@ -87,7 +89,7 @@ public:
// crosses a suspend point.
//
namespace {
-struct SuspendCrossingInfo {
+class SuspendCrossingInfo {
BlockToIndexMapping Mapping;
struct BlockData {
@@ -96,20 +98,30 @@ struct SuspendCrossingInfo {
bool Suspend = false;
bool End = false;
bool KillLoop = false;
+ bool Changed = false;
};
SmallVector<BlockData, SmallVectorThreshold> Block;
- iterator_range<succ_iterator> successors(BlockData const &BD) const {
+ iterator_range<pred_iterator> predecessors(BlockData const &BD) const {
BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
- return llvm::successors(BB);
+ return llvm::predecessors(BB);
}
BlockData &getBlockData(BasicBlock *BB) {
return Block[Mapping.blockToIndex(BB)];
}
+ /// Compute the BlockData for the current function in one iteration.
+ /// Returns whether the BlockData changes in this iteration.
+ /// Initialize - Whether this is the first iteration, we can optimize
+ /// the initial case a little bit by manual loop switch.
+ template <bool Initialize = false> bool computeBlockData();
+
+public:
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void dump() const;
void dump(StringRef Label, BitVector const &BV) const;
+#endif
SuspendCrossingInfo(Function &F, coro::Shape &Shape);
@@ -211,6 +223,72 @@ LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
}
#endif
+template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
+ const size_t N = Mapping.size();
+ bool Changed = false;
+
+ for (size_t I = 0; I < N; ++I) {
+ auto &B = Block[I];
+
+ // We don't need to count the predecessors when initialization.
+ if constexpr (!Initialize)
+ // If all the predecessors of the current Block don't change,
+ // the BlockData for the current block must not change too.
+ if (all_of(predecessors(B), [this](BasicBlock *BB) {
+ return !Block[Mapping.blockToIndex(BB)].Changed;
+ })) {
+ B.Changed = false;
+ continue;
+ }
+
+ // Saved Consumes and Kills bitsets so that it is easy to see
+ // if anything changed after propagation.
+ auto SavedConsumes = B.Consumes;
+ auto SavedKills = B.Kills;
+
+ for (BasicBlock *PI : predecessors(B)) {
+ auto PrevNo = Mapping.blockToIndex(PI);
+ auto &P = Block[PrevNo];
+
+ // Propagate Kills and Consumes from predecessors into B.
+ B.Consumes |= P.Consumes;
+ B.Kills |= P.Kills;
+
+ // If block P is a suspend block, it should propagate kills into block
+ // B for every block P consumes.
+ if (P.Suspend)
+ B.Kills |= P.Consumes;
+ }
+
+ if (B.Suspend) {
+ // If block S is a suspend block, it should kill all of the blocks it
+ // consumes.
+ B.Kills |= B.Consumes;
+ } else if (B.End) {
+ // If block B is an end block, it should not propagate kills as the
+ // blocks following coro.end() are reached during initial invocation
+ // of the coroutine while all the data are still available on the
+ // stack or in the registers.
+ B.Kills.reset();
+ } else {
+ // This is reached when B block it not Suspend nor coro.end and it
+ // need to make sure that it is not in the kill set.
+ B.KillLoop |= B.Kills[I];
+ B.Kills.reset(I);
+ }
+
+ if constexpr (!Initialize) {
+ B.Changed = (B.Kills != SavedKills) || (B.Consumes != SavedConsumes);
+ Changed |= B.Changed;
+ }
+ }
+
+ if constexpr (Initialize)
+ return true;
+
+ return Changed;
+}
+
SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
: Mapping(F) {
const size_t N = Mapping.size();
@@ -222,6 +300,7 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
B.Consumes.resize(N);
B.Kills.resize(N);
B.Consumes.set(I);
+ B.Changed = true;
}
// Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
@@ -246,73 +325,123 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
markSuspendBlock(Save);
}
- // Iterate propagating consumes and kills until they stop changing.
- int Iteration = 0;
- (void)Iteration;
+ computeBlockData</*Initialize=*/true>();
- bool Changed;
- do {
- LLVM_DEBUG(dbgs() << "iteration " << ++Iteration);
- LLVM_DEBUG(dbgs() << "==============\n");
-
- Changed = false;
- for (size_t I = 0; I < N; ++I) {
- auto &B = Block[I];
- for (BasicBlock *SI : successors(B)) {
-
- auto SuccNo = Mapping.blockToIndex(SI);
-
- // Saved Consumes and Kills bitsets so that it is easy to see
- // if anything changed after propagation.
- auto &S = Block[SuccNo];
- auto SavedConsumes = S.Consumes;
- auto SavedKills = S.Kills;
-
- // Propagate Kills and Consumes from block B into its successor S.
- S.Consumes |= B.Consumes;
- S.Kills |= B.Kills;
-
- // If block B is a suspend block, it should propagate kills into the
- // its successor for every block B consumes.
- if (B.Suspend) {
- S.Kills |= B.Consumes;
- }
- if (S.Suspend) {
- // If block S is a suspend block, it should kill all of the blocks it
- // consumes.
- S.Kills |= S.Consumes;
- } else if (S.End) {
- // If block S is an end block, it should not propagate kills as the
- // blocks following coro.end() are reached during initial invocation
- // of the coroutine while all the data are still available on the
- // stack or in the registers.
- S.Kills.reset();
- } else {
- // This is reached when S block it not Suspend nor coro.end and it
- // need to make sure that it is not in the kill set.
- S.KillLoop |= S.Kills[SuccNo];
- S.Kills.reset(SuccNo);
- }
+ while (computeBlockData())
+ ;
+
+ LLVM_DEBUG(dump());
+}
- // See if anything changed.
- Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes);
+namespace {
- if (S.Kills != SavedKills) {
- LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName()
- << "\n");
- LLVM_DEBUG(dump("S.Kills", S.Kills));
- LLVM_DEBUG(dump("SavedKills", SavedKills));
- }
- if (S.Consumes != SavedConsumes) {
- LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n");
- LLVM_DEBUG(dump("S.Consume", S.Consumes));
- LLVM_DEBUG(dump("SavedCons", SavedConsumes));
+// RematGraph is used to construct a DAG for rematerializable instructions
+// When the constructor is invoked with a candidate instruction (which is
+// materializable) it builds a DAG of materializable instructions from that
+// point.
+// Typically, for each instruction identified as re-materializable across a
+// suspend point, a RematGraph will be created.
+struct RematGraph {
+ // Each RematNode in the graph contains the edges to instructions providing
+ // operands in the current node.
+ struct RematNode {
+ Instruction *Node;
+ SmallVector<RematNode *> Operands;
+ RematNode() = default;
+ RematNode(Instruction *V) : Node(V) {}
+ };
+
+ RematNode *EntryNode;
+ using RematNodeMap =
+ SmallMapVector<Instruction *, std::unique_ptr<RematNode>, 8>;
+ RematNodeMap Remats;
+ const std::function<bool(Instruction &)> &MaterializableCallback;
+ SuspendCrossingInfo &Checker;
+
+ RematGraph(const std::function<bool(Instruction &)> &MaterializableCallback,
+ Instruction *I, SuspendCrossingInfo &Checker)
+ : MaterializableCallback(MaterializableCallback), Checker(Checker) {
+ std::unique_ptr<RematNode> FirstNode = std::make_unique<RematNode>(I);
+ EntryNode = FirstNode.get();
+ std::deque<std::unique_ptr<RematNode>> WorkList;
+ addNode(std::move(FirstNode), WorkList, cast<User>(I));
+ while (WorkList.size()) {
+ std::unique_ptr<RematNode> N = std::move(WorkList.front());
+ WorkList.pop_front();
+ addNode(std::move(N), WorkList, cast<User>(I));
+ }
+ }
+
+ void addNode(std::unique_ptr<RematNode> NUPtr,
+ std::deque<std::unique_ptr<RematNode>> &WorkList,
+ User *FirstUse) {
+ RematNode *N = NUPtr.get();
+ if (Remats.count(N->Node))
+ return;
+
+ // We haven't see this node yet - add to the list
+ Remats[N->Node] = std::move(NUPtr);
+ for (auto &Def : N->Node->operands()) {
+ Instruction *D = dyn_cast<Instruction>(Def.get());
+ if (!D || !MaterializableCallback(*D) ||
+ !Checker.isDefinitionAcrossSuspend(*D, FirstUse))
+ continue;
+
+ if (Remats.count(D)) {
+ // Already have this in the graph
+ N->Operands.push_back(Remats[D].get());
+ continue;
+ }
+
+ bool NoMatch = true;
+ for (auto &I : WorkList) {
+ if (I->Node == D) {
+ NoMatch = false;
+ N->Operands.push_back(I.get());
+ break;
}
}
+ if (NoMatch) {
+ // Create a new node
+ std::unique_ptr<RematNode> ChildNode = std::make_unique<RematNode>(D);
+ N->Operands.push_back(ChildNode.get());
+ WorkList.push_back(std::move(ChildNode));
+ }
}
- } while (Changed);
- LLVM_DEBUG(dump());
-}
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ void dump() const {
+ dbgs() << "Entry (";
+ if (EntryNode->Node->getParent()->hasName())
+ dbgs() << EntryNode->Node->getParent()->getName();
+ else
+ EntryNode->Node->getParent()->printAsOperand(dbgs(), false);
+ dbgs() << ") : " << *EntryNode->Node << "\n";
+ for (auto &E : Remats) {
+ dbgs() << *(E.first) << "\n";
+ for (RematNode *U : E.second->Operands)
+ dbgs() << " " << *U->Node << "\n";
+ }
+ }
+#endif
+};
+} // end anonymous namespace
+
+namespace llvm {
+
+template <> struct GraphTraits<RematGraph *> {
+ using NodeRef = RematGraph::RematNode *;
+ using ChildIteratorType = RematGraph::RematNode **;
+
+ static NodeRef getEntryNode(RematGraph *G) { return G->EntryNode; }
+ static ChildIteratorType child_begin(NodeRef N) {
+ return N->Operands.begin();
+ }
+ static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); }
+};
+
+} // end namespace llvm
#undef DEBUG_TYPE // "coro-suspend-crossing"
#define DEBUG_TYPE "coro-frame"
@@ -425,6 +554,15 @@ static void dumpSpills(StringRef Title, const SpillInfo &Spills) {
I->dump();
}
}
+static void dumpRemats(
+ StringRef Title,
+ const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> &RM) {
+ dbgs() << "------------- " << Title << "--------------\n";
+ for (const auto &E : RM) {
+ E.second->dump();
+ dbgs() << "--\n";
+ }
+}
static void dumpAllocas(const SmallVectorImpl<AllocaInfo> &Allocas) {
dbgs() << "------------- Allocas --------------\n";
@@ -637,10 +775,10 @@ void FrameTypeBuilder::addFieldForAllocas(const Function &F,
return;
}
- // Because there are pathes from the lifetime.start to coro.end
+ // Because there are paths from the lifetime.start to coro.end
// for each alloca, the liferanges for every alloca is overlaped
// in the blocks who contain coro.end and the successor blocks.
- // So we choose to skip there blocks when we calculates the liferange
+ // So we choose to skip there blocks when we calculate the liferange
// for each alloca. It should be reasonable since there shouldn't be uses
// in these blocks and the coroutine frame shouldn't be used outside the
// coroutine body.
@@ -820,7 +958,7 @@ void FrameTypeBuilder::finish(StructType *Ty) {
static void cacheDIVar(FrameDataInfo &FrameData,
DenseMap<Value *, DILocalVariable *> &DIVarCache) {
for (auto *V : FrameData.getAllDefs()) {
- if (DIVarCache.find(V) != DIVarCache.end())
+ if (DIVarCache.contains(V))
continue;
auto DDIs = FindDbgDeclareUses(V);
@@ -852,18 +990,8 @@ static StringRef solveTypeName(Type *Ty) {
return "__floating_type_";
}
- if (auto *PtrTy = dyn_cast<PointerType>(Ty)) {
- if (PtrTy->isOpaque())
- return "PointerType";
- Type *PointeeTy = PtrTy->getNonOpaquePointerElementType();
- auto Name = solveTypeName(PointeeTy);
- if (Name == "UnknownType")
- return "PointerType";
- SmallString<16> Buffer;
- Twine(Name + "_Ptr").toStringRef(Buffer);
- auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
- return MDName->getString();
- }
+ if (Ty->isPointerTy())
+ return "PointerType";
if (Ty->isStructTy()) {
if (!cast<StructType>(Ty)->hasName())
@@ -1043,7 +1171,7 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
dwarf::DW_ATE_unsigned_char)});
for (auto *V : FrameData.getAllDefs()) {
- if (DIVarCache.find(V) == DIVarCache.end())
+ if (!DIVarCache.contains(V))
continue;
auto Index = FrameData.getFieldIndex(V);
@@ -1075,7 +1203,7 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
// fields confilicts with each other.
unsigned UnknownTypeNum = 0;
for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) {
- if (OffsetCache.find(Index) == OffsetCache.end())
+ if (!OffsetCache.contains(Index))
continue;
std::string Name;
@@ -1090,7 +1218,7 @@ static void buildFrameDebugInfo(Function &F, coro::Shape &Shape,
AlignInBits = OffsetCache[Index].first * 8;
OffsetInBits = OffsetCache[Index].second * 8;
- if (NameCache.find(Index) != NameCache.end()) {
+ if (NameCache.contains(Index)) {
Name = NameCache[Index].str();
DITy = TyCache[Index];
} else {
@@ -1282,7 +1410,7 @@ static StructType *buildFrameType(Function &F, coro::Shape &Shape,
// function call or any of the memory intrinsics, we check whether this
// instruction is prior to CoroBegin. To answer question 3, we track the offsets
// of all aliases created for the alloca prior to CoroBegin but used after
-// CoroBegin. llvm::Optional is used to be able to represent the case when the
+// CoroBegin. std::optional is used to be able to represent the case when the
// offset is unknown (e.g. when you have a PHINode that takes in different
// offset values). We cannot handle unknown offsets and will assert. This is the
// potential issue left out. An ideal solution would likely require a
@@ -1586,11 +1714,12 @@ static void createFramePtr(coro::Shape &Shape) {
static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
auto *CB = Shape.CoroBegin;
LLVMContext &C = CB->getContext();
+ Function *F = CB->getFunction();
IRBuilder<> Builder(C);
StructType *FrameTy = Shape.FrameTy;
Value *FramePtr = Shape.FramePtr;
- DominatorTree DT(*CB->getFunction());
- SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
+ DominatorTree DT(*F);
+ SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
// Create a GEP with the given index into the coroutine frame for the original
// value Orig. Appends an extra 0 index for array-allocas, preserving the
@@ -1723,6 +1852,21 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
SpillAlignment, E.first->getName() + Twine(".reload"));
TinyPtrVector<DbgDeclareInst *> DIs = FindDbgDeclareUses(Def);
+ // Try best to find dbg.declare. If the spill is a temp, there may not
+ // be a direct dbg.declare. Walk up the load chain to find one from an
+ // alias.
+ if (F->getSubprogram()) {
+ auto *CurDef = Def;
+ while (DIs.empty() && isa<LoadInst>(CurDef)) {
+ auto *LdInst = cast<LoadInst>(CurDef);
+ // Only consider ptr to ptr same type load.
+ if (LdInst->getPointerOperandType() != LdInst->getType())
+ break;
+ CurDef = LdInst->getPointerOperand();
+ DIs = FindDbgDeclareUses(CurDef);
+ }
+ }
+
for (DbgDeclareInst *DDI : DIs) {
bool AllowUnresolved = false;
// This dbg.declare is preserved for all coro-split function
@@ -1734,16 +1878,10 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
&*Builder.GetInsertPoint());
// This dbg.declare is for the main function entry point. It
// will be deleted in all coro-split functions.
- coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame);
+ coro::salvageDebugInfo(ArgToAllocaMap, DDI, Shape.OptimizeFrame);
}
}
- // Salvage debug info on any dbg.addr that we see. We do not insert them
- // into each block where we have a use though.
- if (auto *DI = dyn_cast<DbgAddrIntrinsic>(U)) {
- coro::salvageDebugInfo(DbgPtrAllocaCache, DI, Shape.OptimizeFrame);
- }
-
// If we have a single edge PHINode, remove it and replace it with a
// reload from the coroutine frame. (We already took care of multi edge
// PHINodes by rewriting them in the rewritePHIs function).
@@ -1813,11 +1951,13 @@ static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
DVI->replaceUsesOfWith(Alloca, G);
for (Instruction *I : UsersToUpdate) {
- // It is meaningless to remain the lifetime intrinsics refer for the
+ // It is meaningless to retain the lifetime intrinsics refer for the
// member of coroutine frames and the meaningless lifetime intrinsics
// are possible to block further optimizations.
- if (I->isLifetimeStartOrEnd())
+ if (I->isLifetimeStartOrEnd()) {
+ I->eraseFromParent();
continue;
+ }
I->replaceUsesOfWith(Alloca, G);
}
@@ -2089,11 +2229,12 @@ static void rewritePHIs(Function &F) {
rewritePHIs(*BB);
}
+/// Default materializable callback
// Check for instructions that we can recreate on resume as opposed to spill
// the result into a coroutine frame.
-static bool materializable(Instruction &V) {
- return isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
- isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V);
+bool coro::defaultMaterializable(Instruction &V) {
+ return (isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
+ isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V));
}
// Check for structural coroutine intrinsics that should not be spilled into
@@ -2103,41 +2244,82 @@ static bool isCoroutineStructureIntrinsic(Instruction &I) {
isa<CoroSuspendInst>(&I);
}
-// For every use of the value that is across suspend point, recreate that value
-// after a suspend point.
-static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
- const SpillInfo &Spills) {
- for (const auto &E : Spills) {
- Value *Def = E.first;
- BasicBlock *CurrentBlock = nullptr;
+// For each instruction identified as materializable across the suspend point,
+// and its associated DAG of other rematerializable instructions,
+// recreate the DAG of instructions after the suspend point.
+static void rewriteMaterializableInstructions(
+ const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8>
+ &AllRemats) {
+ // This has to be done in 2 phases
+ // Do the remats and record the required defs to be replaced in the
+ // original use instructions
+ // Once all the remats are complete, replace the uses in the final
+ // instructions with the new defs
+ typedef struct {
+ Instruction *Use;
+ Instruction *Def;
+ Instruction *Remat;
+ } ProcessNode;
+
+ SmallVector<ProcessNode> FinalInstructionsToProcess;
+
+ for (const auto &E : AllRemats) {
+ Instruction *Use = E.first;
Instruction *CurrentMaterialization = nullptr;
- for (Instruction *U : E.second) {
- // If we have not seen this block, materialize the value.
- if (CurrentBlock != U->getParent()) {
+ RematGraph *RG = E.second.get();
+ ReversePostOrderTraversal<RematGraph *> RPOT(RG);
+ SmallVector<Instruction *> InstructionsToProcess;
+
+ // If the target use is actually a suspend instruction then we have to
+ // insert the remats into the end of the predecessor (there should only be
+ // one). This is so that suspend blocks always have the suspend instruction
+ // as the first instruction.
+ auto InsertPoint = &*Use->getParent()->getFirstInsertionPt();
+ if (isa<AnyCoroSuspendInst>(Use)) {
+ BasicBlock *SuspendPredecessorBlock =
+ Use->getParent()->getSinglePredecessor();
+ assert(SuspendPredecessorBlock && "malformed coro suspend instruction");
+ InsertPoint = SuspendPredecessorBlock->getTerminator();
+ }
- bool IsInCoroSuspendBlock = isa<AnyCoroSuspendInst>(U);
- CurrentBlock = U->getParent();
- auto *InsertBlock = IsInCoroSuspendBlock
- ? CurrentBlock->getSinglePredecessor()
- : CurrentBlock;
- CurrentMaterialization = cast<Instruction>(Def)->clone();
- CurrentMaterialization->setName(Def->getName());
- CurrentMaterialization->insertBefore(
- IsInCoroSuspendBlock ? InsertBlock->getTerminator()
- : &*InsertBlock->getFirstInsertionPt());
- }
- if (auto *PN = dyn_cast<PHINode>(U)) {
- assert(PN->getNumIncomingValues() == 1 &&
- "unexpected number of incoming "
- "values in the PHINode");
- PN->replaceAllUsesWith(CurrentMaterialization);
- PN->eraseFromParent();
- continue;
- }
- // Replace all uses of Def in the current instruction with the
- // CurrentMaterialization for the block.
- U->replaceUsesOfWith(Def, CurrentMaterialization);
+ // Note: skip the first instruction as this is the actual use that we're
+ // rematerializing everything for.
+ auto I = RPOT.begin();
+ ++I;
+ for (; I != RPOT.end(); ++I) {
+ Instruction *D = (*I)->Node;
+ CurrentMaterialization = D->clone();
+ CurrentMaterialization->setName(D->getName());
+ CurrentMaterialization->insertBefore(InsertPoint);
+ InsertPoint = CurrentMaterialization;
+
+ // Replace all uses of Def in the instructions being added as part of this
+ // rematerialization group
+ for (auto &I : InstructionsToProcess)
+ I->replaceUsesOfWith(D, CurrentMaterialization);
+
+ // Don't replace the final use at this point as this can cause problems
+ // for other materializations. Instead, for any final use that uses a
+ // define that's being rematerialized, record the replace values
+ for (unsigned i = 0, E = Use->getNumOperands(); i != E; ++i)
+ if (Use->getOperand(i) == D) // Is this operand pointing to oldval?
+ FinalInstructionsToProcess.push_back(
+ {Use, D, CurrentMaterialization});
+
+ InstructionsToProcess.push_back(CurrentMaterialization);
+ }
+ }
+
+ // Finally, replace the uses with the defines that we've just rematerialized
+ for (auto &R : FinalInstructionsToProcess) {
+ if (auto *PN = dyn_cast<PHINode>(R.Use)) {
+ assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
+ "values in the PHINode");
+ PN->replaceAllUsesWith(R.Remat);
+ PN->eraseFromParent();
+ continue;
}
+ R.Use->replaceUsesOfWith(R.Def, R.Remat);
}
}
@@ -2407,10 +2589,7 @@ static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
auto ArgTy = cast<PointerType>(Arg.getType());
- // swifterror arguments are required to have pointer-to-pointer type,
- // so create a pointer-typed alloca with opaque pointers.
- auto ValueTy = ArgTy->isOpaque() ? PointerType::getUnqual(F.getContext())
- : ArgTy->getNonOpaquePointerElementType();
+ auto ValueTy = PointerType::getUnqual(F.getContext());
// Reduce to the alloca case:
@@ -2523,6 +2702,9 @@ static void sinkSpillUsesAfterCoroBegin(Function &F,
/// hence minimizing the amount of data we end up putting on the frame.
static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape,
SuspendCrossingInfo &Checker) {
+ if (F.hasOptNone())
+ return;
+
DominatorTree DT(F);
// Collect all possible basic blocks which may dominate all uses of allocas.
@@ -2635,7 +2817,7 @@ static void collectFrameAlloca(AllocaInst *AI, coro::Shape &Shape,
}
void coro::salvageDebugInfo(
- SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
+ SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
DbgVariableIntrinsic *DVI, bool OptimizeFrame) {
Function *F = DVI->getFunction();
IRBuilder<> Builder(F->getContext());
@@ -2652,7 +2834,7 @@ void coro::salvageDebugInfo(
while (auto *Inst = dyn_cast_or_null<Instruction>(Storage)) {
if (auto *LdInst = dyn_cast<LoadInst>(Inst)) {
- Storage = LdInst->getOperand(0);
+ Storage = LdInst->getPointerOperand();
// FIXME: This is a heuristic that works around the fact that
// LLVM IR debug intrinsics cannot yet distinguish between
// memory and value locations: Because a dbg.declare(alloca) is
@@ -2662,7 +2844,7 @@ void coro::salvageDebugInfo(
if (!SkipOutermostLoad)
Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
} else if (auto *StInst = dyn_cast<StoreInst>(Inst)) {
- Storage = StInst->getOperand(0);
+ Storage = StInst->getValueOperand();
} else {
SmallVector<uint64_t, 16> Ops;
SmallVector<Value *, 0> AdditionalValues;
@@ -2682,38 +2864,44 @@ void coro::salvageDebugInfo(
if (!Storage)
return;
- // Store a pointer to the coroutine frame object in an alloca so it
- // is available throughout the function when producing unoptimized
- // code. Extending the lifetime this way is correct because the
- // variable has been declared by a dbg.declare intrinsic.
- //
- // Avoid to create the alloca would be eliminated by optimization
- // passes and the corresponding dbg.declares would be invalid.
- if (!OptimizeFrame)
- if (auto *Arg = dyn_cast<llvm::Argument>(Storage)) {
- auto &Cached = DbgPtrAllocaCache[Storage];
- if (!Cached) {
- Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
- Arg->getName() + ".debug");
- Builder.CreateStore(Storage, Cached);
- }
- Storage = Cached;
- // FIXME: LLVM lacks nuanced semantics to differentiate between
- // memory and direct locations at the IR level. The backend will
- // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
- // location. Thus, if there are deref and offset operations in the
- // expression, we need to add a DW_OP_deref at the *start* of the
- // expression to first load the contents of the alloca before
- // adjusting it with the expression.
- Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
+ auto *StorageAsArg = dyn_cast<Argument>(Storage);
+ const bool IsSwiftAsyncArg =
+ StorageAsArg && StorageAsArg->hasAttribute(Attribute::SwiftAsync);
+
+ // Swift async arguments are described by an entry value of the ABI-defined
+ // register containing the coroutine context.
+ if (IsSwiftAsyncArg && !Expr->isEntryValue())
+ Expr = DIExpression::prepend(Expr, DIExpression::EntryValue);
+
+ // If the coroutine frame is an Argument, store it in an alloca to improve
+ // its availability (e.g. registers may be clobbered).
+ // Avoid this if optimizations are enabled (they would remove the alloca) or
+ // if the value is guaranteed to be available through other means (e.g. swift
+ // ABI guarantees).
+ if (StorageAsArg && !OptimizeFrame && !IsSwiftAsyncArg) {
+ auto &Cached = ArgToAllocaMap[StorageAsArg];
+ if (!Cached) {
+ Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
+ Storage->getName() + ".debug");
+ Builder.CreateStore(Storage, Cached);
}
+ Storage = Cached;
+ // FIXME: LLVM lacks nuanced semantics to differentiate between
+ // memory and direct locations at the IR level. The backend will
+ // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
+ // location. Thus, if there are deref and offset operations in the
+ // expression, we need to add a DW_OP_deref at the *start* of the
+ // expression to first load the contents of the alloca before
+ // adjusting it with the expression.
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
+ }
DVI->replaceVariableLocationOp(OriginalStorage, Storage);
DVI->setExpression(Expr);
// We only hoist dbg.declare today since it doesn't make sense to hoist
- // dbg.value or dbg.addr since they do not have the same function wide
- // guarantees that dbg.declare does.
- if (!isa<DbgValueInst>(DVI) && !isa<DbgAddrIntrinsic>(DVI)) {
+ // dbg.value since it does not have the same function wide guarantees that
+ // dbg.declare does.
+ if (isa<DbgDeclareInst>(DVI)) {
Instruction *InsertPt = nullptr;
if (auto *I = dyn_cast<Instruction>(Storage))
InsertPt = I->getInsertionPointAfterDef();
@@ -2724,7 +2912,71 @@ void coro::salvageDebugInfo(
}
}
-void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
+static void doRematerializations(
+ Function &F, SuspendCrossingInfo &Checker,
+ const std::function<bool(Instruction &)> &MaterializableCallback) {
+ if (F.hasOptNone())
+ return;
+
+ SpillInfo Spills;
+
+ // See if there are materializable instructions across suspend points
+ // We record these as the starting point to also identify materializable
+ // defs of uses in these operations
+ for (Instruction &I : instructions(F)) {
+ if (!MaterializableCallback(I))
+ continue;
+ for (User *U : I.users())
+ if (Checker.isDefinitionAcrossSuspend(I, U))
+ Spills[&I].push_back(cast<Instruction>(U));
+ }
+
+ // Process each of the identified rematerializable instructions
+ // and add predecessor instructions that can also be rematerialized.
+ // This is actually a graph of instructions since we could potentially
+ // have multiple uses of a def in the set of predecessor instructions.
+ // The approach here is to maintain a graph of instructions for each bottom
+ // level instruction - where we have a unique set of instructions (nodes)
+ // and edges between them. We then walk the graph in reverse post-dominator
+ // order to insert them past the suspend point, but ensure that ordering is
+ // correct. We also rely on CSE removing duplicate defs for remats of
+ // different instructions with a def in common (rather than maintaining more
+ // complex graphs for each suspend point)
+
+ // We can do this by adding new nodes to the list for each suspend
+ // point. Then using standard GraphTraits to give a reverse post-order
+ // traversal when we insert the nodes after the suspend
+ SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> AllRemats;
+ for (auto &E : Spills) {
+ for (Instruction *U : E.second) {
+ // Don't process a user twice (this can happen if the instruction uses
+ // more than one rematerializable def)
+ if (AllRemats.count(U))
+ continue;
+
+ // Constructor creates the whole RematGraph for the given Use
+ auto RematUPtr =
+ std::make_unique<RematGraph>(MaterializableCallback, U, Checker);
+
+ LLVM_DEBUG(dbgs() << "***** Next remat group *****\n";
+ ReversePostOrderTraversal<RematGraph *> RPOT(RematUPtr.get());
+ for (auto I = RPOT.begin(); I != RPOT.end();
+ ++I) { (*I)->Node->dump(); } dbgs()
+ << "\n";);
+
+ AllRemats[U] = std::move(RematUPtr);
+ }
+ }
+
+ // Rewrite materializable instructions to be materialized at the use
+ // point.
+ LLVM_DEBUG(dumpRemats("Materializations", AllRemats));
+ rewriteMaterializableInstructions(AllRemats);
+}
+
+void coro::buildCoroutineFrame(
+ Function &F, Shape &Shape,
+ const std::function<bool(Instruction &)> &MaterializableCallback) {
// Don't eliminate swifterror in async functions that won't be split.
if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
eliminateSwiftError(F, Shape);
@@ -2775,35 +3027,11 @@ void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
// Build suspend crossing info.
SuspendCrossingInfo Checker(F, Shape);
- IRBuilder<> Builder(F.getContext());
+ doRematerializations(F, Checker, MaterializableCallback);
+
FrameDataInfo FrameData;
SmallVector<CoroAllocaAllocInst*, 4> LocalAllocas;
SmallVector<Instruction*, 4> DeadInstructions;
-
- {
- SpillInfo Spills;
- for (int Repeat = 0; Repeat < 4; ++Repeat) {
- // See if there are materializable instructions across suspend points.
- // FIXME: We can use a worklist to track the possible materialize
- // instructions instead of iterating the whole function again and again.
- for (Instruction &I : instructions(F))
- if (materializable(I)) {
- for (User *U : I.users())
- if (Checker.isDefinitionAcrossSuspend(I, U))
- Spills[&I].push_back(cast<Instruction>(U));
- }
-
- if (Spills.empty())
- break;
-
- // Rewrite materializable instructions to be materialized at the use
- // point.
- LLVM_DEBUG(dumpSpills("Materializations", Spills));
- rewriteMaterializableInstructions(Builder, Spills);
- Spills.clear();
- }
- }
-
if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
Shape.ABI != coro::ABI::RetconOnce)
sinkLifetimeStartMarkers(F, Shape, Checker);
diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h
index 032361c22045..067fb6bba47e 100644
--- a/llvm/lib/Transforms/Coroutines/CoroInternal.h
+++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -25,10 +25,13 @@ bool declaresIntrinsics(const Module &M,
const std::initializer_list<StringRef>);
void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
-/// Recover a dbg.declare prepared by the frontend and emit an alloca
-/// holding a pointer to the coroutine frame.
+/// Attempts to rewrite the location operand of debug intrinsics in terms of
+/// the coroutine frame pointer, folding pointer offsets into the DIExpression
+/// of the intrinsic.
+/// If the frame pointer is an Argument, store it into an alloca if
+/// OptimizeFrame is false.
void salvageDebugInfo(
- SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> &DbgPtrAllocaCache,
+ SmallDenseMap<Argument *, AllocaInst *, 4> &ArgToAllocaMap,
DbgVariableIntrinsic *DVI, bool OptimizeFrame);
// Keeps data and helper functions for lowering coroutine intrinsics.
@@ -124,7 +127,6 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
};
struct AsyncLoweringStorage {
- FunctionType *AsyncFuncTy;
Value *Context;
CallingConv::ID AsyncCC;
unsigned ContextArgNo;
@@ -261,7 +263,10 @@ struct LLVM_LIBRARY_VISIBILITY Shape {
void buildFrom(Function &F);
};
-void buildCoroutineFrame(Function &F, Shape &Shape);
+bool defaultMaterializable(Instruction &V);
+void buildCoroutineFrame(
+ Function &F, Shape &Shape,
+ const std::function<bool(Instruction &)> &MaterializableCallback);
CallInst *createMustTailCall(DebugLoc Loc, Function *MustTailCallFn,
ArrayRef<Value *> Arguments, IRBuilder<> &);
} // End namespace coro.
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 1171878f749a..39e909bf3316 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -31,6 +31,7 @@
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/IR/Argument.h"
@@ -299,6 +300,26 @@ static void markCoroutineAsDone(IRBuilder<> &Builder, const coro::Shape &Shape,
auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
Shape.FrameTy->getTypeAtIndex(coro::Shape::SwitchFieldIndex::Resume)));
Builder.CreateStore(NullPtr, GepIndex);
+
+ // If the coroutine don't have unwind coro end, we could omit the store to
+ // the final suspend point since we could infer the coroutine is suspended
+ // at the final suspend point by the nullness of ResumeFnAddr.
+ // However, we can't skip it if the coroutine have unwind coro end. Since
+ // the coroutine reaches unwind coro end is considered suspended at the
+ // final suspend point (the ResumeFnAddr is null) but in fact the coroutine
+ // didn't complete yet. We need the IndexVal for the final suspend point
+ // to make the states clear.
+ if (Shape.SwitchLowering.HasUnwindCoroEnd &&
+ Shape.SwitchLowering.HasFinalSuspend) {
+ assert(cast<CoroSuspendInst>(Shape.CoroSuspends.back())->isFinal() &&
+ "The final suspend should only live in the last position of "
+ "CoroSuspends.");
+ ConstantInt *IndexVal = Shape.getIndex(Shape.CoroSuspends.size() - 1);
+ auto *FinalIndex = Builder.CreateStructGEP(
+ Shape.FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
+
+ Builder.CreateStore(IndexVal, FinalIndex);
+ }
}
/// Replace an unwind call to llvm.coro.end.
@@ -396,17 +417,7 @@ static void createResumeEntryBlock(Function &F, coro::Shape &Shape) {
// The coroutine should be marked done if it reaches the final suspend
// point.
markCoroutineAsDone(Builder, Shape, FramePtr);
- }
-
- // If the coroutine don't have unwind coro end, we could omit the store to
- // the final suspend point since we could infer the coroutine is suspended
- // at the final suspend point by the nullness of ResumeFnAddr.
- // However, we can't skip it if the coroutine have unwind coro end. Since
- // the coroutine reaches unwind coro end is considered suspended at the
- // final suspend point (the ResumeFnAddr is null) but in fact the coroutine
- // didn't complete yet. We need the IndexVal for the final suspend point
- // to make the states clear.
- if (!S->isFinal() || Shape.SwitchLowering.HasUnwindCoroEnd) {
+ } else {
auto *GepIndex = Builder.CreateStructGEP(
FrameTy, FramePtr, Shape.getSwitchIndexField(), "index.addr");
Builder.CreateStore(IndexVal, GepIndex);
@@ -565,7 +576,7 @@ void CoroCloner::replaceRetconOrAsyncSuspendUses() {
if (NewS->use_empty()) return;
// Otherwise, we need to create an aggregate.
- Value *Agg = UndefValue::get(NewS->getType());
+ Value *Agg = PoisonValue::get(NewS->getType());
for (size_t I = 0, E = Args.size(); I != E; ++I)
Agg = Builder.CreateInsertValue(Agg, Args[I], I);
@@ -623,20 +634,13 @@ static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
return;
Value *CachedSlot = nullptr;
auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
- if (CachedSlot) {
- assert(cast<PointerType>(CachedSlot->getType())
- ->isOpaqueOrPointeeTypeMatches(ValueTy) &&
- "multiple swifterror slots in function with different types");
+ if (CachedSlot)
return CachedSlot;
- }
// Check if the function has a swifterror argument.
for (auto &Arg : F.args()) {
if (Arg.isSwiftError()) {
CachedSlot = &Arg;
- assert(cast<PointerType>(Arg.getType())
- ->isOpaqueOrPointeeTypeMatches(ValueTy) &&
- "swifterror argument does not have expected type");
return &Arg;
}
}
@@ -679,19 +683,26 @@ static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
}
}
+/// Returns all DbgVariableIntrinsic in F.
+static SmallVector<DbgVariableIntrinsic *, 8>
+collectDbgVariableIntrinsics(Function &F) {
+ SmallVector<DbgVariableIntrinsic *, 8> Intrinsics;
+ for (auto &I : instructions(F))
+ if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
+ Intrinsics.push_back(DVI);
+ return Intrinsics;
+}
+
void CoroCloner::replaceSwiftErrorOps() {
::replaceSwiftErrorOps(*NewF, Shape, &VMap);
}
void CoroCloner::salvageDebugInfo() {
- SmallVector<DbgVariableIntrinsic *, 8> Worklist;
- SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
- for (auto &BB : *NewF)
- for (auto &I : BB)
- if (auto *DVI = dyn_cast<DbgVariableIntrinsic>(&I))
- Worklist.push_back(DVI);
+ SmallVector<DbgVariableIntrinsic *, 8> Worklist =
+ collectDbgVariableIntrinsics(*NewF);
+ SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
for (DbgVariableIntrinsic *DVI : Worklist)
- coro::salvageDebugInfo(DbgPtrAllocaCache, DVI, Shape.OptimizeFrame);
+ coro::salvageDebugInfo(ArgToAllocaMap, DVI, Shape.OptimizeFrame);
// Remove all salvaged dbg.declare intrinsics that became
// either unreachable or stale due to the CoroSplit transformation.
@@ -886,7 +897,7 @@ void CoroCloner::create() {
// frame.
SmallVector<Instruction *> DummyArgs;
for (Argument &A : OrigF.args()) {
- DummyArgs.push_back(new FreezeInst(UndefValue::get(A.getType())));
+ DummyArgs.push_back(new FreezeInst(PoisonValue::get(A.getType())));
VMap[&A] = DummyArgs.back();
}
@@ -1044,7 +1055,7 @@ void CoroCloner::create() {
// All uses of the arguments should have been resolved by this point,
// so we can safely remove the dummy values.
for (Instruction *DummyArg : DummyArgs) {
- DummyArg->replaceAllUsesWith(UndefValue::get(DummyArg->getType()));
+ DummyArg->replaceAllUsesWith(PoisonValue::get(DummyArg->getType()));
DummyArg->deleteValue();
}
@@ -1231,8 +1242,11 @@ scanPHIsAndUpdateValueMap(Instruction *Prev, BasicBlock *NewBlock,
// instruction. Suspend instruction represented by a switch, track the PHI
// values and select the correct case successor when possible.
static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
+ // There is nothing to simplify.
+ if (isa<ReturnInst>(InitialInst))
+ return false;
+
DenseMap<Value *, Value *> ResolvedValues;
- BasicBlock *UnconditionalSucc = nullptr;
assert(InitialInst->getModule());
const DataLayout &DL = InitialInst->getModule()->getDataLayout();
@@ -1262,39 +1276,35 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
Instruction *I = InitialInst;
while (I->isTerminator() || isa<CmpInst>(I)) {
if (isa<ReturnInst>(I)) {
- if (I != InitialInst) {
- // If InitialInst is an unconditional branch,
- // remove PHI values that come from basic block of InitialInst
- if (UnconditionalSucc)
- UnconditionalSucc->removePredecessor(InitialInst->getParent(), true);
- ReplaceInstWithInst(InitialInst, I->clone());
- }
+ ReplaceInstWithInst(InitialInst, I->clone());
return true;
}
+
if (auto *BR = dyn_cast<BranchInst>(I)) {
- if (BR->isUnconditional()) {
- BasicBlock *Succ = BR->getSuccessor(0);
- if (I == InitialInst)
- UnconditionalSucc = Succ;
- scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues);
- I = GetFirstValidInstruction(Succ->getFirstNonPHIOrDbgOrLifetime());
- continue;
+ unsigned SuccIndex = 0;
+ if (BR->isConditional()) {
+ // Handle the case the condition of the conditional branch is constant.
+ // e.g.,
+ //
+ // br i1 false, label %cleanup, label %CoroEnd
+ //
+ // It is possible during the transformation. We could continue the
+ // simplifying in this case.
+ ConstantInt *Cond = TryResolveConstant(BR->getCondition());
+ if (!Cond)
+ return false;
+
+ SuccIndex = Cond->isOne() ? 0 : 1;
}
- BasicBlock *BB = BR->getParent();
- // Handle the case the condition of the conditional branch is constant.
- // e.g.,
- //
- // br i1 false, label %cleanup, label %CoroEnd
- //
- // It is possible during the transformation. We could continue the
- // simplifying in this case.
- if (ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true)) {
- // Handle this branch in next iteration.
- I = BB->getTerminator();
- continue;
- }
- } else if (auto *CondCmp = dyn_cast<CmpInst>(I)) {
+ BasicBlock *Succ = BR->getSuccessor(SuccIndex);
+ scanPHIsAndUpdateValueMap(I, Succ, ResolvedValues);
+ I = GetFirstValidInstruction(Succ->getFirstNonPHIOrDbgOrLifetime());
+
+ continue;
+ }
+
+ if (auto *CondCmp = dyn_cast<CmpInst>(I)) {
// If the case number of suspended switch instruction is reduced to
// 1, then it is simplified to CmpInst in llvm::ConstantFoldTerminator.
auto *BR = dyn_cast<BranchInst>(
@@ -1318,13 +1328,14 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
if (!ConstResult)
return false;
- CondCmp->replaceAllUsesWith(ConstResult);
- CondCmp->eraseFromParent();
+ ResolvedValues[BR->getCondition()] = ConstResult;
// Handle this branch in next iteration.
I = BR;
continue;
- } else if (auto *SI = dyn_cast<SwitchInst>(I)) {
+ }
+
+ if (auto *SI = dyn_cast<SwitchInst>(I)) {
ConstantInt *Cond = TryResolveConstant(SI->getCondition());
if (!Cond)
return false;
@@ -1337,6 +1348,7 @@ static bool simplifyTerminatorLeadingToRet(Instruction *InitialInst) {
return false;
}
+
return false;
}
@@ -1889,7 +1901,7 @@ static void splitRetconCoroutine(Function &F, coro::Shape &Shape,
if (ReturnPHIs.size() == 1) {
RetV = CastedContinuation;
} else {
- RetV = UndefValue::get(RetTy);
+ RetV = PoisonValue::get(RetTy);
RetV = Builder.CreateInsertValue(RetV, CastedContinuation, 0);
for (size_t I = 1, E = ReturnPHIs.size(); I != E; ++I)
RetV = Builder.CreateInsertValue(RetV, ReturnPHIs[I], I);
@@ -1929,10 +1941,10 @@ namespace {
};
}
-static coro::Shape splitCoroutine(Function &F,
- SmallVectorImpl<Function *> &Clones,
- TargetTransformInfo &TTI,
- bool OptimizeFrame) {
+static coro::Shape
+splitCoroutine(Function &F, SmallVectorImpl<Function *> &Clones,
+ TargetTransformInfo &TTI, bool OptimizeFrame,
+ std::function<bool(Instruction &)> MaterializableCallback) {
PrettyStackTraceFunction prettyStackTrace(F);
// The suspend-crossing algorithm in buildCoroutineFrame get tripped
@@ -1944,7 +1956,7 @@ static coro::Shape splitCoroutine(Function &F,
return Shape;
simplifySuspendPoints(Shape);
- buildCoroutineFrame(F, Shape);
+ buildCoroutineFrame(F, Shape, MaterializableCallback);
replaceFrameSizeAndAlignment(Shape);
// If there are no suspend points, no split required, just remove
@@ -1970,25 +1982,12 @@ static coro::Shape splitCoroutine(Function &F,
// This invalidates SwiftErrorOps in the Shape.
replaceSwiftErrorOps(F, Shape, nullptr);
- // Finally, salvage the llvm.dbg.{declare,addr} in our original function that
- // point into the coroutine frame. We only do this for the current function
- // since the Cloner salvaged debug info for us in the new coroutine funclets.
- SmallVector<DbgVariableIntrinsic *, 8> Worklist;
- SmallDenseMap<llvm::Value *, llvm::AllocaInst *, 4> DbgPtrAllocaCache;
- for (auto &BB : F) {
- for (auto &I : BB) {
- if (auto *DDI = dyn_cast<DbgDeclareInst>(&I)) {
- Worklist.push_back(DDI);
- continue;
- }
- if (auto *DDI = dyn_cast<DbgAddrIntrinsic>(&I)) {
- Worklist.push_back(DDI);
- continue;
- }
- }
- }
- for (auto *DDI : Worklist)
- coro::salvageDebugInfo(DbgPtrAllocaCache, DDI, Shape.OptimizeFrame);
+ // Salvage debug intrinsics that point into the coroutine frame in the
+ // original function. The Cloner has already salvaged debug info in the new
+ // coroutine funclets.
+ SmallDenseMap<Argument *, AllocaInst *, 4> ArgToAllocaMap;
+ for (auto *DDI : collectDbgVariableIntrinsics(F))
+ coro::salvageDebugInfo(ArgToAllocaMap, DDI, Shape.OptimizeFrame);
return Shape;
}
@@ -2104,6 +2103,10 @@ static void addPrepareFunction(const Module &M,
Fns.push_back(PrepareFn);
}
+CoroSplitPass::CoroSplitPass(bool OptimizeFrame)
+ : MaterializableCallback(coro::defaultMaterializable),
+ OptimizeFrame(OptimizeFrame) {}
+
PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR) {
@@ -2142,10 +2145,19 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
F.setSplittedCoroutine();
SmallVector<Function *, 4> Clones;
- const coro::Shape Shape = splitCoroutine(
- F, Clones, FAM.getResult<TargetIRAnalysis>(F), OptimizeFrame);
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ const coro::Shape Shape =
+ splitCoroutine(F, Clones, FAM.getResult<TargetIRAnalysis>(F),
+ OptimizeFrame, MaterializableCallback);
updateCallGraphAfterCoroutineSplit(*N, Shape, Clones, C, CG, AM, UR, FAM);
+ ORE.emit([&]() {
+ return OptimizationRemark(DEBUG_TYPE, "CoroSplit", &F)
+ << "Split '" << ore::NV("function", F.getName())
+ << "' (frame_size=" << ore::NV("frame_size", Shape.FrameSize)
+ << ", align=" << ore::NV("align", Shape.FrameAlign.value()) << ")";
+ });
+
if (!Shape.CoroSuspends.empty()) {
// Run the CGSCC pipeline on the original and newly split functions.
UR.CWorklist.insert(&C);
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index ce4262e593b6..cde74c5e693b 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -596,20 +596,6 @@ static void checkAsyncFuncPointer(const Instruction *I, Value *V) {
auto *AsyncFuncPtrAddr = dyn_cast<GlobalVariable>(V->stripPointerCasts());
if (!AsyncFuncPtrAddr)
fail(I, "llvm.coro.id.async async function pointer not a global", V);
-
- if (AsyncFuncPtrAddr->getType()->isOpaquePointerTy())
- return;
-
- auto *StructTy = cast<StructType>(
- AsyncFuncPtrAddr->getType()->getNonOpaquePointerElementType());
- if (StructTy->isOpaque() || !StructTy->isPacked() ||
- StructTy->getNumElements() != 2 ||
- !StructTy->getElementType(0)->isIntegerTy(32) ||
- !StructTy->getElementType(1)->isIntegerTy(32))
- fail(I,
- "llvm.coro.id.async async function pointer argument's type is not "
- "<{i32, i32}>",
- V);
}
void CoroIdAsyncInst::checkWellFormed() const {
@@ -625,19 +611,15 @@ void CoroIdAsyncInst::checkWellFormed() const {
static void checkAsyncContextProjectFunction(const Instruction *I,
Function *F) {
auto *FunTy = cast<FunctionType>(F->getValueType());
- Type *Int8Ty = Type::getInt8Ty(F->getContext());
- auto *RetPtrTy = dyn_cast<PointerType>(FunTy->getReturnType());
- if (!RetPtrTy || !RetPtrTy->isOpaqueOrPointeeTypeMatches(Int8Ty))
+ if (!FunTy->getReturnType()->isPointerTy())
fail(I,
"llvm.coro.suspend.async resume function projection function must "
- "return an i8* type",
+ "return a ptr type",
F);
- if (FunTy->getNumParams() != 1 || !FunTy->getParamType(0)->isPointerTy() ||
- !cast<PointerType>(FunTy->getParamType(0))
- ->isOpaqueOrPointeeTypeMatches(Int8Ty))
+ if (FunTy->getNumParams() != 1 || !FunTy->getParamType(0)->isPointerTy())
fail(I,
"llvm.coro.suspend.async resume function projection function must "
- "take one i8* type as parameter",
+ "take one ptr type as parameter",
F);
}
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 09286482edff..cc375f9badcd 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -28,16 +28,13 @@ using namespace llvm;
#define DEBUG_TYPE "inline"
-PreservedAnalyses AlwaysInlinerPass::run(Module &M,
- ModuleAnalysisManager &MAM) {
- // Add inline assumptions during code generation.
- FunctionAnalysisManager &FAM =
- MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
- return FAM.getResult<AssumptionAnalysis>(F);
- };
- auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
+namespace {
+bool AlwaysInlineImpl(
+ Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI,
+ function_ref<AssumptionCache &(Function &)> GetAssumptionCache,
+ function_ref<AAResults &(Function &)> GetAAR,
+ function_ref<BlockFrequencyInfo &(Function &)> GetBFI) {
SmallSetVector<CallBase *, 16> Calls;
bool Changed = false;
SmallVector<Function *, 16> InlinedFunctions;
@@ -65,14 +62,12 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
DebugLoc DLoc = CB->getDebugLoc();
BasicBlock *Block = CB->getParent();
- InlineFunctionInfo IFI(
- /*cg=*/nullptr, GetAssumptionCache, &PSI,
- &FAM.getResult<BlockFrequencyAnalysis>(*Caller),
- &FAM.getResult<BlockFrequencyAnalysis>(F));
+ InlineFunctionInfo IFI(GetAssumptionCache, &PSI,
+ GetBFI ? &GetBFI(*Caller) : nullptr,
+ GetBFI ? &GetBFI(F) : nullptr);
- InlineResult Res =
- InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
- &FAM.getResult<AAManager>(F), InsertLifetime);
+ InlineResult Res = InlineFunction(*CB, IFI, /*MergeAttributes=*/true,
+ &GetAAR(F), InsertLifetime);
if (!Res.isSuccess()) {
ORE.emit([&]() {
return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
@@ -127,48 +122,52 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
}
}
- return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+ return Changed;
}
-namespace {
-
-/// Inliner pass which only handles "always inline" functions.
-///
-/// Unlike the \c AlwaysInlinerPass, this uses the more heavyweight \c Inliner
-/// base class to provide several facilities such as array alloca merging.
-class AlwaysInlinerLegacyPass : public LegacyInlinerBase {
+struct AlwaysInlinerLegacyPass : public ModulePass {
+ bool InsertLifetime;
-public:
- AlwaysInlinerLegacyPass() : LegacyInlinerBase(ID, /*InsertLifetime*/ true) {
- initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
- }
+ AlwaysInlinerLegacyPass()
+ : AlwaysInlinerLegacyPass(/*InsertLifetime*/ true) {}
AlwaysInlinerLegacyPass(bool InsertLifetime)
- : LegacyInlinerBase(ID, InsertLifetime) {
+ : ModulePass(ID), InsertLifetime(InsertLifetime) {
initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
}
/// Main run interface method. We override here to avoid calling skipSCC().
- bool runOnSCC(CallGraphSCC &SCC) override { return inlineCalls(SCC); }
+ bool runOnModule(Module &M) override {
+
+ auto &PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto GetAAR = [&](Function &F) -> AAResults & {
+ return getAnalysis<AAResultsWrapperPass>(F).getAAResults();
+ };
+ auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
+ return getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ };
+
+ return AlwaysInlineImpl(M, InsertLifetime, PSI, GetAssumptionCache, GetAAR,
+ /*GetBFI*/ nullptr);
+ }
static char ID; // Pass identification, replacement for typeid
- InlineCost getInlineCost(CallBase &CB) override;
-
- using llvm::Pass::doFinalization;
- bool doFinalization(CallGraph &CG) override {
- return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true);
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<AAResultsWrapperPass>();
+ AU.addRequired<ProfileSummaryInfoWrapperPass>();
}
};
-}
+
+} // namespace
char AlwaysInlinerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(AlwaysInlinerLegacyPass, "always-inline",
"Inliner for always_inline functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(AlwaysInlinerLegacyPass, "always-inline",
"Inliner for always_inline functions", false, false)
@@ -176,46 +175,23 @@ Pass *llvm::createAlwaysInlinerLegacyPass(bool InsertLifetime) {
return new AlwaysInlinerLegacyPass(InsertLifetime);
}
-/// Get the inline cost for the always-inliner.
-///
-/// The always inliner *only* handles functions which are marked with the
-/// attribute to force inlining. As such, it is dramatically simpler and avoids
-/// using the powerful (but expensive) inline cost analysis. Instead it uses
-/// a very simple and boring direct walk of the instructions looking for
-/// impossible-to-inline constructs.
-///
-/// Note, it would be possible to go to some lengths to cache the information
-/// computed here, but as we only expect to do this for relatively few and
-/// small functions which have the explicit attribute to force inlining, it is
-/// likely not worth it in practice.
-InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallBase &CB) {
- Function *Callee = CB.getCalledFunction();
-
- // Only inline direct calls to functions with always-inline attributes
- // that are viable for inlining.
- if (!Callee)
- return InlineCost::getNever("indirect call");
-
- // When callee coroutine function is inlined into caller coroutine function
- // before coro-split pass,
- // coro-early pass can not handle this quiet well.
- // So we won't inline the coroutine function if it have not been unsplited
- if (Callee->isPresplitCoroutine())
- return InlineCost::getNever("unsplited coroutine call");
-
- // FIXME: We shouldn't even get here for declarations.
- if (Callee->isDeclaration())
- return InlineCost::getNever("no definition");
-
- if (!CB.hasFnAttr(Attribute::AlwaysInline))
- return InlineCost::getNever("no alwaysinline attribute");
-
- if (Callee->hasFnAttribute(Attribute::AlwaysInline) && CB.isNoInline())
- return InlineCost::getNever("noinline call site attribute");
-
- auto IsViable = isInlineViable(*Callee);
- if (!IsViable.isSuccess())
- return InlineCost::getNever(IsViable.getFailureReason());
-
- return InlineCost::getAlways("always inliner");
+PreservedAnalyses AlwaysInlinerPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ FunctionAnalysisManager &FAM =
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
+ return FAM.getResult<AssumptionAnalysis>(F);
+ };
+ auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & {
+ return FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+ auto GetAAR = [&](Function &F) -> AAResults & {
+ return FAM.getResult<AAManager>(F);
+ };
+ auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
+
+ bool Changed = AlwaysInlineImpl(M, InsertLifetime, PSI, GetAssumptionCache,
+ GetAAR, GetBFI);
+
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/IPO/Annotation2Metadata.cpp b/llvm/lib/Transforms/IPO/Annotation2Metadata.cpp
index 6cc04544cabc..40cc00d2c78c 100644
--- a/llvm/lib/Transforms/IPO/Annotation2Metadata.cpp
+++ b/llvm/lib/Transforms/IPO/Annotation2Metadata.cpp
@@ -17,8 +17,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
@@ -64,36 +62,8 @@ static bool convertAnnotation2Metadata(Module &M) {
return true;
}
-namespace {
-struct Annotation2MetadataLegacy : public ModulePass {
- static char ID;
-
- Annotation2MetadataLegacy() : ModulePass(ID) {
- initializeAnnotation2MetadataLegacyPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override { return convertAnnotation2Metadata(M); }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-};
-
-} // end anonymous namespace
-
-char Annotation2MetadataLegacy::ID = 0;
-
-INITIALIZE_PASS_BEGIN(Annotation2MetadataLegacy, DEBUG_TYPE,
- "Annotation2Metadata", false, false)
-INITIALIZE_PASS_END(Annotation2MetadataLegacy, DEBUG_TYPE,
- "Annotation2Metadata", false, false)
-
-ModulePass *llvm::createAnnotation2MetadataLegacyPass() {
- return new Annotation2MetadataLegacy();
-}
-
PreservedAnalyses Annotation2MetadataPass::run(Module &M,
ModuleAnalysisManager &AM) {
- convertAnnotation2Metadata(M);
- return PreservedAnalyses::all();
+ return convertAnnotation2Metadata(M) ? PreservedAnalyses::none()
+ : PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index dd1a3b78a378..824da6395f2e 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -67,6 +67,7 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
#include <algorithm>
#include <cassert>
@@ -97,49 +98,11 @@ using OffsetAndArgPart = std::pair<int64_t, ArgPart>;
static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,
Value *Ptr, Type *ResElemTy, int64_t Offset) {
- // For non-opaque pointers, try to create a "nice" GEP if possible, otherwise
- // fall back to an i8 GEP to a specific offset.
- unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
- APInt OrigOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
- if (!Ptr->getType()->isOpaquePointerTy()) {
- Type *OrigElemTy = Ptr->getType()->getNonOpaquePointerElementType();
- if (OrigOffset == 0 && OrigElemTy == ResElemTy)
- return Ptr;
-
- if (OrigElemTy->isSized()) {
- APInt TmpOffset = OrigOffset;
- Type *TmpTy = OrigElemTy;
- SmallVector<APInt> IntIndices =
- DL.getGEPIndicesForOffset(TmpTy, TmpOffset);
- if (TmpOffset == 0) {
- // Try to add trailing zero indices to reach the right type.
- while (TmpTy != ResElemTy) {
- Type *NextTy = GetElementPtrInst::getTypeAtIndex(TmpTy, (uint64_t)0);
- if (!NextTy)
- break;
-
- IntIndices.push_back(APInt::getZero(
- isa<StructType>(TmpTy) ? 32 : OrigOffset.getBitWidth()));
- TmpTy = NextTy;
- }
-
- SmallVector<Value *> Indices;
- for (const APInt &Index : IntIndices)
- Indices.push_back(IRB.getInt(Index));
-
- if (OrigOffset != 0 || TmpTy == ResElemTy) {
- Ptr = IRB.CreateGEP(OrigElemTy, Ptr, Indices);
- return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
- }
- }
- }
+ if (Offset != 0) {
+ APInt APOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);
+ Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(APOffset));
}
-
- if (OrigOffset != 0) {
- Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(AddrSpace));
- Ptr = IRB.CreateGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(OrigOffset));
- }
- return IRB.CreateBitCast(Ptr, ResElemTy->getPointerTo(AddrSpace));
+ return Ptr;
}
/// DoPromotion - This method actually performs the promotion of the specified
@@ -220,6 +183,8 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
// pass in the loaded pointers.
SmallVector<Value *, 16> Args;
const DataLayout &DL = F->getParent()->getDataLayout();
+ SmallVector<WeakTrackingVH, 16> DeadArgs;
+
while (!F->use_empty()) {
CallBase &CB = cast<CallBase>(*F->user_back());
assert(CB.getCalledFunction() == F);
@@ -246,15 +211,25 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
if (Pair.second.MustExecInstr) {
LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());
LI->copyMetadata(*Pair.second.MustExecInstr,
- {LLVMContext::MD_range, LLVMContext::MD_nonnull,
- LLVMContext::MD_dereferenceable,
+ {LLVMContext::MD_dereferenceable,
LLVMContext::MD_dereferenceable_or_null,
- LLVMContext::MD_align, LLVMContext::MD_noundef,
+ LLVMContext::MD_noundef,
LLVMContext::MD_nontemporal});
+ // Only transfer poison-generating metadata if we also have
+ // !noundef.
+ // TODO: Without !noundef, we could merge this metadata across
+ // all promoted loads.
+ if (LI->hasMetadata(LLVMContext::MD_noundef))
+ LI->copyMetadata(*Pair.second.MustExecInstr,
+ {LLVMContext::MD_range, LLVMContext::MD_nonnull,
+ LLVMContext::MD_align});
}
Args.push_back(LI);
ArgAttrVec.push_back(AttributeSet());
}
+ } else {
+ assert(ArgsToPromote.count(&*I) && I->use_empty());
+ DeadArgs.emplace_back(AI->get());
}
}
@@ -297,6 +272,8 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
CB.eraseFromParent();
}
+ RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadArgs);
+
// Since we have now created the new function, splice the body of the old
// function right into the new function, leaving the old rotting hulk of the
// function empty.
@@ -766,6 +743,7 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
// Check to see which arguments are promotable. If an argument is promotable,
// add it to ArgsToPromote.
DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;
+ unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams();
for (Argument *PtrArg : PointerArgs) {
// Replace sret attribute with noalias. This reduces register pressure by
// avoiding a register copy.
@@ -789,6 +767,7 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
Types.push_back(Pair.second.Ty);
if (areTypesABICompatible(Types, *F, TTI)) {
+ NumArgsAfterPromote += ArgParts.size() - 1;
ArgsToPromote.insert({PtrArg, std::move(ArgParts)});
}
}
@@ -798,6 +777,9 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
if (ArgsToPromote.empty())
return nullptr;
+ if (NumArgsAfterPromote > TTI.getMaxNumArgs())
+ return nullptr;
+
return doPromotion(F, FAM, ArgsToPromote);
}
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index b9134ce26e80..847d07a49dee 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -15,16 +15,17 @@
#include "llvm/Transforms/IPO/Attributor.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MustExecute.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantFold.h"
@@ -35,14 +36,15 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/ModRef.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -98,11 +100,6 @@ static cl::opt<unsigned, true> MaxInitializationChainLengthX(
cl::location(MaxInitializationChainLength), cl::init(1024));
unsigned llvm::MaxInitializationChainLength;
-static cl::opt<bool> VerifyMaxFixpointIterations(
- "attributor-max-iterations-verify", cl::Hidden,
- cl::desc("Verify that max-iterations is a tight bound for a fixpoint"),
- cl::init(false));
-
static cl::opt<bool> AnnotateDeclarationCallSites(
"attributor-annotate-decl-cs", cl::Hidden,
cl::desc("Annotate call sites of function declarations."), cl::init(false));
@@ -188,6 +185,11 @@ ChangeStatus &llvm::operator&=(ChangeStatus &L, ChangeStatus R) {
}
///}
+bool AA::isGPU(const Module &M) {
+ Triple T(M.getTargetTriple());
+ return T.isAMDGPU() || T.isNVPTX();
+}
+
bool AA::isNoSyncInst(Attributor &A, const Instruction &I,
const AbstractAttribute &QueryingAA) {
// We are looking for volatile instructions or non-relaxed atomics.
@@ -202,9 +204,10 @@ bool AA::isNoSyncInst(Attributor &A, const Instruction &I,
if (AANoSync::isNoSyncIntrinsic(&I))
return true;
- const auto &NoSyncAA = A.getAAFor<AANoSync>(
- QueryingAA, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
- return NoSyncAA.isAssumedNoSync();
+ bool IsKnownNoSync;
+ return AA::hasAssumedIRAttr<Attribute::NoSync>(
+ A, &QueryingAA, IRPosition::callsite_function(*CB),
+ DepClassTy::OPTIONAL, IsKnownNoSync);
}
if (!I.mayReadOrWriteMemory())
@@ -218,12 +221,12 @@ bool AA::isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
// TODO: See the AAInstanceInfo class comment.
if (!ForAnalysisOnly)
return false;
- auto &InstanceInfoAA = A.getAAFor<AAInstanceInfo>(
+ auto *InstanceInfoAA = A.getAAFor<AAInstanceInfo>(
QueryingAA, IRPosition::value(V), DepClassTy::OPTIONAL);
- return InstanceInfoAA.isAssumedUniqueForAnalysis();
+ return InstanceInfoAA && InstanceInfoAA->isAssumedUniqueForAnalysis();
}
-Constant *AA::getInitialValueForObj(Value &Obj, Type &Ty,
+Constant *AA::getInitialValueForObj(Attributor &A, Value &Obj, Type &Ty,
const TargetLibraryInfo *TLI,
const DataLayout &DL,
AA::RangeTy *RangePtr) {
@@ -234,17 +237,31 @@ Constant *AA::getInitialValueForObj(Value &Obj, Type &Ty,
auto *GV = dyn_cast<GlobalVariable>(&Obj);
if (!GV)
return nullptr;
- if (!GV->hasLocalLinkage() && !(GV->isConstant() && GV->hasInitializer()))
- return nullptr;
- if (!GV->hasInitializer())
- return UndefValue::get(&Ty);
+
+ bool UsedAssumedInformation = false;
+ Constant *Initializer = nullptr;
+ if (A.hasGlobalVariableSimplificationCallback(*GV)) {
+ auto AssumedGV = A.getAssumedInitializerFromCallBack(
+ *GV, /* const AbstractAttribute *AA */ nullptr, UsedAssumedInformation);
+ Initializer = *AssumedGV;
+ if (!Initializer)
+ return nullptr;
+ } else {
+ if (!GV->hasLocalLinkage() && !(GV->isConstant() && GV->hasInitializer()))
+ return nullptr;
+ if (!GV->hasInitializer())
+ return UndefValue::get(&Ty);
+
+ if (!Initializer)
+ Initializer = GV->getInitializer();
+ }
if (RangePtr && !RangePtr->offsetOrSizeAreUnknown()) {
APInt Offset = APInt(64, RangePtr->Offset);
- return ConstantFoldLoadFromConst(GV->getInitializer(), &Ty, Offset, DL);
+ return ConstantFoldLoadFromConst(Initializer, &Ty, Offset, DL);
}
- return ConstantFoldLoadFromUniformValue(GV->getInitializer(), &Ty);
+ return ConstantFoldLoadFromUniformValue(Initializer, &Ty);
}
bool AA::isValidInScope(const Value &V, const Function *Scope) {
@@ -396,6 +413,18 @@ static bool getPotentialCopiesOfMemoryValue(
NullOnly = false;
};
+ auto AdjustWrittenValueType = [&](const AAPointerInfo::Access &Acc,
+ Value &V) {
+ Value *AdjV = AA::getWithType(V, *I.getType());
+ if (!AdjV) {
+ LLVM_DEBUG(dbgs() << "Underlying object written but stored value "
+ "cannot be converted to read type: "
+ << *Acc.getRemoteInst() << " : " << *I.getType()
+ << "\n";);
+ }
+ return AdjV;
+ };
+
auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
if ((IsLoad && !Acc.isWriteOrAssumption()) || (!IsLoad && !Acc.isRead()))
return true;
@@ -417,7 +446,10 @@ static bool getPotentialCopiesOfMemoryValue(
if (IsLoad) {
assert(isa<LoadInst>(I) && "Expected load or store instruction only!");
if (!Acc.isWrittenValueUnknown()) {
- NewCopies.push_back(Acc.getWrittenValue());
+ Value *V = AdjustWrittenValueType(Acc, *Acc.getWrittenValue());
+ if (!V)
+ return false;
+ NewCopies.push_back(V);
NewCopyOrigins.push_back(Acc.getRemoteInst());
return true;
}
@@ -428,7 +460,10 @@ static bool getPotentialCopiesOfMemoryValue(
<< *Acc.getRemoteInst() << "\n";);
return false;
}
- NewCopies.push_back(SI->getValueOperand());
+ Value *V = AdjustWrittenValueType(Acc, *SI->getValueOperand());
+ if (!V)
+ return false;
+ NewCopies.push_back(V);
NewCopyOrigins.push_back(SI);
} else {
assert(isa<StoreInst>(I) && "Expected load or store instruction only!");
@@ -449,10 +484,13 @@ static bool getPotentialCopiesOfMemoryValue(
bool HasBeenWrittenTo = false;
AA::RangeTy Range;
- auto &PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(Obj),
+ auto *PI = A.getAAFor<AAPointerInfo>(QueryingAA, IRPosition::value(Obj),
DepClassTy::NONE);
- if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess,
- HasBeenWrittenTo, Range)) {
+ if (!PI ||
+ !PI->forallInterferingAccesses(A, QueryingAA, I,
+ /* FindInterferingWrites */ IsLoad,
+ /* FindInterferingReads */ !IsLoad,
+ CheckAccess, HasBeenWrittenTo, Range)) {
LLVM_DEBUG(
dbgs()
<< "Failed to verify all interfering accesses for underlying object: "
@@ -463,7 +501,7 @@ static bool getPotentialCopiesOfMemoryValue(
if (IsLoad && !HasBeenWrittenTo && !Range.isUnassigned()) {
const DataLayout &DL = A.getDataLayout();
Value *InitialValue =
- AA::getInitialValueForObj(Obj, *I.getType(), TLI, DL, &Range);
+ AA::getInitialValueForObj(A, Obj, *I.getType(), TLI, DL, &Range);
if (!InitialValue) {
LLVM_DEBUG(dbgs() << "Could not determine required initial value of "
"underlying object, abort!\n");
@@ -480,14 +518,14 @@ static bool getPotentialCopiesOfMemoryValue(
NewCopyOrigins.push_back(nullptr);
}
- PIs.push_back(&PI);
+ PIs.push_back(PI);
return true;
};
- const auto &AAUO = A.getAAFor<AAUnderlyingObjects>(
+ const auto *AAUO = A.getAAFor<AAUnderlyingObjects>(
QueryingAA, IRPosition::value(Ptr), DepClassTy::OPTIONAL);
- if (!AAUO.forallUnderlyingObjects(Pred)) {
+ if (!AAUO || !AAUO->forallUnderlyingObjects(Pred)) {
LLVM_DEBUG(
dbgs() << "Underlying objects stored into could not be determined\n";);
return false;
@@ -530,27 +568,37 @@ bool AA::getPotentialCopiesOfStoredValue(
static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP,
const AbstractAttribute &QueryingAA,
bool RequireReadNone, bool &IsKnown) {
+ if (RequireReadNone) {
+ if (AA::hasAssumedIRAttr<Attribute::ReadNone>(
+ A, &QueryingAA, IRP, DepClassTy::OPTIONAL, IsKnown,
+ /* IgnoreSubsumingPositions */ true))
+ return true;
+ } else if (AA::hasAssumedIRAttr<Attribute::ReadOnly>(
+ A, &QueryingAA, IRP, DepClassTy::OPTIONAL, IsKnown,
+ /* IgnoreSubsumingPositions */ true))
+ return true;
IRPosition::Kind Kind = IRP.getPositionKind();
if (Kind == IRPosition::IRP_FUNCTION || Kind == IRPosition::IRP_CALL_SITE) {
- const auto &MemLocAA =
+ const auto *MemLocAA =
A.getAAFor<AAMemoryLocation>(QueryingAA, IRP, DepClassTy::NONE);
- if (MemLocAA.isAssumedReadNone()) {
- IsKnown = MemLocAA.isKnownReadNone();
+ if (MemLocAA && MemLocAA->isAssumedReadNone()) {
+ IsKnown = MemLocAA->isKnownReadNone();
if (!IsKnown)
- A.recordDependence(MemLocAA, QueryingAA, DepClassTy::OPTIONAL);
+ A.recordDependence(*MemLocAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
}
}
- const auto &MemBehaviorAA =
+ const auto *MemBehaviorAA =
A.getAAFor<AAMemoryBehavior>(QueryingAA, IRP, DepClassTy::NONE);
- if (MemBehaviorAA.isAssumedReadNone() ||
- (!RequireReadNone && MemBehaviorAA.isAssumedReadOnly())) {
- IsKnown = RequireReadNone ? MemBehaviorAA.isKnownReadNone()
- : MemBehaviorAA.isKnownReadOnly();
+ if (MemBehaviorAA &&
+ (MemBehaviorAA->isAssumedReadNone() ||
+ (!RequireReadNone && MemBehaviorAA->isAssumedReadOnly()))) {
+ IsKnown = RequireReadNone ? MemBehaviorAA->isKnownReadNone()
+ : MemBehaviorAA->isKnownReadOnly();
if (!IsKnown)
- A.recordDependence(MemBehaviorAA, QueryingAA, DepClassTy::OPTIONAL);
+ A.recordDependence(*MemBehaviorAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
}
@@ -574,7 +622,7 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
const AbstractAttribute &QueryingAA,
const AA::InstExclusionSetTy *ExclusionSet,
std::function<bool(const Function &F)> GoBackwardsCB) {
- LLVM_DEBUG({
+ DEBUG_WITH_TYPE(VERBOSE_DEBUG_TYPE, {
dbgs() << "[AA] isPotentiallyReachable @" << ToFn.getName() << " from "
<< FromI << " [GBCB: " << bool(GoBackwardsCB) << "][#ExS: "
<< (ExclusionSet ? std::to_string(ExclusionSet->size()) : "none")
@@ -584,6 +632,19 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
dbgs() << *ES << "\n";
});
+ // We know kernels (generally) cannot be called from within the module. Thus,
+ // for reachability we would need to step back from a kernel which would allow
+ // us to reach anything anyway. Even if a kernel is invoked from another
+ // kernel, values like allocas and shared memory are not accessible. We
+ // implicitly check for this situation to avoid costly lookups.
+ if (GoBackwardsCB && &ToFn != FromI.getFunction() &&
+ !GoBackwardsCB(*FromI.getFunction()) && ToFn.hasFnAttribute("kernel") &&
+ FromI.getFunction()->hasFnAttribute("kernel")) {
+ LLVM_DEBUG(dbgs() << "[AA] assume kernel cannot be reached from within the "
+ "module; success\n";);
+ return false;
+ }
+
// If we can go arbitrarily backwards we will eventually reach an entry point
// that can reach ToI. Only if a set of blocks through which we cannot go is
// provided, or once we track internal functions not accessible from the
@@ -611,10 +672,10 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
return true;
LLVM_DEBUG(dbgs() << "[AA] check " << *ToI << " from " << *CurFromI
<< " intraprocedurally\n");
- const auto &ReachabilityAA = A.getAAFor<AAIntraFnReachability>(
+ const auto *ReachabilityAA = A.getAAFor<AAIntraFnReachability>(
QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL);
- bool Result =
- ReachabilityAA.isAssumedReachable(A, *CurFromI, *ToI, ExclusionSet);
+ bool Result = !ReachabilityAA || ReachabilityAA->isAssumedReachable(
+ A, *CurFromI, *ToI, ExclusionSet);
LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " "
<< (Result ? "can potentially " : "cannot ") << "reach "
<< *ToI << " [Intra]\n");
@@ -624,11 +685,11 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
bool Result = true;
if (!ToFn.isDeclaration() && ToI) {
- const auto &ToReachabilityAA = A.getAAFor<AAIntraFnReachability>(
+ const auto *ToReachabilityAA = A.getAAFor<AAIntraFnReachability>(
QueryingAA, IRPosition::function(ToFn), DepClassTy::OPTIONAL);
const Instruction &EntryI = ToFn.getEntryBlock().front();
- Result =
- ToReachabilityAA.isAssumedReachable(A, EntryI, *ToI, ExclusionSet);
+ Result = !ToReachabilityAA || ToReachabilityAA->isAssumedReachable(
+ A, EntryI, *ToI, ExclusionSet);
LLVM_DEBUG(dbgs() << "[AA] Entry " << EntryI << " of @" << ToFn.getName()
<< " " << (Result ? "can potentially " : "cannot ")
<< "reach @" << *ToI << " [ToFn]\n");
@@ -637,10 +698,10 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
if (Result) {
// The entry of the ToFn can reach the instruction ToI. If the current
// instruction is already known to reach the ToFn.
- const auto &FnReachabilityAA = A.getAAFor<AAInterFnReachability>(
+ const auto *FnReachabilityAA = A.getAAFor<AAInterFnReachability>(
QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL);
- Result = FnReachabilityAA.instructionCanReach(A, *CurFromI, ToFn,
- ExclusionSet);
+ Result = !FnReachabilityAA || FnReachabilityAA->instructionCanReach(
+ A, *CurFromI, ToFn, ExclusionSet);
LLVM_DEBUG(dbgs() << "[AA] " << *CurFromI << " in @" << FromFn->getName()
<< " " << (Result ? "can potentially " : "cannot ")
<< "reach @" << ToFn.getName() << " [FromFn]\n");
@@ -649,11 +710,11 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI,
}
// TODO: Check assumed nounwind.
- const auto &ReachabilityAA = A.getAAFor<AAIntraFnReachability>(
+ const auto *ReachabilityAA = A.getAAFor<AAIntraFnReachability>(
QueryingAA, IRPosition::function(*FromFn), DepClassTy::OPTIONAL);
auto ReturnInstCB = [&](Instruction &Ret) {
- bool Result =
- ReachabilityAA.isAssumedReachable(A, *CurFromI, Ret, ExclusionSet);
+ bool Result = !ReachabilityAA || ReachabilityAA->isAssumedReachable(
+ A, *CurFromI, Ret, ExclusionSet);
LLVM_DEBUG(dbgs() << "[AA][Ret] " << *CurFromI << " "
<< (Result ? "can potentially " : "cannot ") << "reach "
<< Ret << " [Intra]\n");
@@ -743,14 +804,15 @@ bool AA::isAssumedThreadLocalObject(Attributor &A, Value &Obj,
<< "' is thread local; stack objects are thread local.\n");
return true;
}
- const auto &NoCaptureAA = A.getAAFor<AANoCapture>(
- QueryingAA, IRPosition::value(Obj), DepClassTy::OPTIONAL);
+ bool IsKnownNoCapture;
+ bool IsAssumedNoCapture = AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, &QueryingAA, IRPosition::value(Obj), DepClassTy::OPTIONAL,
+ IsKnownNoCapture);
LLVM_DEBUG(dbgs() << "[AA] Object '" << Obj << "' is "
- << (NoCaptureAA.isAssumedNoCapture() ? "" : "not")
- << " thread local; "
- << (NoCaptureAA.isAssumedNoCapture() ? "non-" : "")
+ << (IsAssumedNoCapture ? "" : "not") << " thread local; "
+ << (IsAssumedNoCapture ? "non-" : "")
<< "captured stack object.\n");
- return NoCaptureAA.isAssumedNoCapture();
+ return IsAssumedNoCapture;
}
if (auto *GV = dyn_cast<GlobalVariable>(&Obj)) {
if (GV->isConstant()) {
@@ -831,9 +893,9 @@ bool AA::isPotentiallyAffectedByBarrier(Attributor &A,
return false;
};
- const auto &UnderlyingObjsAA = A.getAAFor<AAUnderlyingObjects>(
+ const auto *UnderlyingObjsAA = A.getAAFor<AAUnderlyingObjects>(
QueryingAA, IRPosition::value(*Ptr), DepClassTy::OPTIONAL);
- if (!UnderlyingObjsAA.forallUnderlyingObjects(Pred))
+ if (!UnderlyingObjsAA || !UnderlyingObjsAA->forallUnderlyingObjects(Pred))
return true;
}
return false;
@@ -848,38 +910,42 @@ static bool isEqualOrWorse(const Attribute &New, const Attribute &Old) {
}
/// Return true if the information provided by \p Attr was added to the
-/// attribute list \p Attrs. This is only the case if it was not already present
-/// in \p Attrs at the position describe by \p PK and \p AttrIdx.
+/// attribute set \p AttrSet. This is only the case if it was not already
+/// present in \p AttrSet.
static bool addIfNotExistent(LLVMContext &Ctx, const Attribute &Attr,
- AttributeList &Attrs, int AttrIdx,
- bool ForceReplace = false) {
+ AttributeSet AttrSet, bool ForceReplace,
+ AttrBuilder &AB) {
if (Attr.isEnumAttribute()) {
Attribute::AttrKind Kind = Attr.getKindAsEnum();
- if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
- if (!ForceReplace &&
- isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
- return false;
- Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
+ if (AttrSet.hasAttribute(Kind))
+ return false;
+ AB.addAttribute(Kind);
return true;
}
if (Attr.isStringAttribute()) {
StringRef Kind = Attr.getKindAsString();
- if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
- if (!ForceReplace &&
- isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
+ if (AttrSet.hasAttribute(Kind)) {
+ if (!ForceReplace)
return false;
- Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
+ }
+ AB.addAttribute(Kind, Attr.getValueAsString());
return true;
}
if (Attr.isIntAttribute()) {
Attribute::AttrKind Kind = Attr.getKindAsEnum();
- if (Attrs.hasAttributeAtIndex(AttrIdx, Kind))
- if (!ForceReplace &&
- isEqualOrWorse(Attr, Attrs.getAttributeAtIndex(AttrIdx, Kind)))
+ if (!ForceReplace && Kind == Attribute::Memory) {
+ MemoryEffects ME = Attr.getMemoryEffects() & AttrSet.getMemoryEffects();
+ if (ME == AttrSet.getMemoryEffects())
return false;
- Attrs = Attrs.removeAttributeAtIndex(Ctx, AttrIdx, Kind);
- Attrs = Attrs.addAttributeAtIndex(Ctx, AttrIdx, Attr);
+ AB.addMemoryAttr(ME);
+ return true;
+ }
+ if (AttrSet.hasAttribute(Kind)) {
+ if (!ForceReplace && isEqualOrWorse(Attr, AttrSet.getAttribute(Kind)))
+ return false;
+ }
+ AB.addAttribute(Attr);
return true;
}
@@ -933,7 +999,7 @@ Argument *IRPosition::getAssociatedArgument() const {
// If no callbacks were found, or none used the underlying call site operand
// exclusively, use the direct callee argument if available.
- const Function *Callee = CB.getCalledFunction();
+ auto *Callee = dyn_cast_if_present<Function>(CB.getCalledOperand());
if (Callee && Callee->arg_size() > unsigned(ArgNo))
return Callee->getArg(ArgNo);
@@ -955,63 +1021,168 @@ ChangeStatus AbstractAttribute::update(Attributor &A) {
return HasChanged;
}
+bool Attributor::getAttrsFromAssumes(const IRPosition &IRP,
+ Attribute::AttrKind AK,
+ SmallVectorImpl<Attribute> &Attrs) {
+ assert(IRP.getPositionKind() != IRPosition::IRP_INVALID &&
+ "Did expect a valid position!");
+ MustBeExecutedContextExplorer *Explorer =
+ getInfoCache().getMustBeExecutedContextExplorer();
+ if (!Explorer)
+ return false;
+
+ Value &AssociatedValue = IRP.getAssociatedValue();
+
+ const Assume2KnowledgeMap &A2K =
+ getInfoCache().getKnowledgeMap().lookup({&AssociatedValue, AK});
+
+ // Check if we found any potential assume use, if not we don't need to create
+ // explorer iterators.
+ if (A2K.empty())
+ return false;
+
+ LLVMContext &Ctx = AssociatedValue.getContext();
+ unsigned AttrsSize = Attrs.size();
+ auto EIt = Explorer->begin(IRP.getCtxI()),
+ EEnd = Explorer->end(IRP.getCtxI());
+ for (const auto &It : A2K)
+ if (Explorer->findInContextOf(It.first, EIt, EEnd))
+ Attrs.push_back(Attribute::get(Ctx, AK, It.second.Max));
+ return AttrsSize != Attrs.size();
+}
+
+template <typename DescTy>
ChangeStatus
-IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
- const ArrayRef<Attribute> &DeducedAttrs,
- bool ForceReplace) {
- Function *ScopeFn = IRP.getAnchorScope();
- IRPosition::Kind PK = IRP.getPositionKind();
-
- // In the following some generic code that will manifest attributes in
- // DeducedAttrs if they improve the current IR. Due to the different
- // annotation positions we use the underlying AttributeList interface.
-
- AttributeList Attrs;
- switch (PK) {
- case IRPosition::IRP_INVALID:
+Attributor::updateAttrMap(const IRPosition &IRP,
+ const ArrayRef<DescTy> &AttrDescs,
+ function_ref<bool(const DescTy &, AttributeSet,
+ AttributeMask &, AttrBuilder &)>
+ CB) {
+ if (AttrDescs.empty())
+ return ChangeStatus::UNCHANGED;
+ switch (IRP.getPositionKind()) {
case IRPosition::IRP_FLOAT:
+ case IRPosition::IRP_INVALID:
return ChangeStatus::UNCHANGED;
- case IRPosition::IRP_ARGUMENT:
- case IRPosition::IRP_FUNCTION:
- case IRPosition::IRP_RETURNED:
- Attrs = ScopeFn->getAttributes();
- break;
- case IRPosition::IRP_CALL_SITE:
- case IRPosition::IRP_CALL_SITE_RETURNED:
- case IRPosition::IRP_CALL_SITE_ARGUMENT:
- Attrs = cast<CallBase>(IRP.getAnchorValue()).getAttributes();
+ default:
break;
- }
+ };
+
+ AttributeList AL;
+ Value *AttrListAnchor = IRP.getAttrListAnchor();
+ auto It = AttrsMap.find(AttrListAnchor);
+ if (It == AttrsMap.end())
+ AL = IRP.getAttrList();
+ else
+ AL = It->getSecond();
- ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
LLVMContext &Ctx = IRP.getAnchorValue().getContext();
- for (const Attribute &Attr : DeducedAttrs) {
- if (!addIfNotExistent(Ctx, Attr, Attrs, IRP.getAttrIdx(), ForceReplace))
- continue;
+ auto AttrIdx = IRP.getAttrIdx();
+ AttributeSet AS = AL.getAttributes(AttrIdx);
+ AttributeMask AM;
+ AttrBuilder AB(Ctx);
- HasChanged = ChangeStatus::CHANGED;
- }
+ ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
+ for (const DescTy &AttrDesc : AttrDescs)
+ if (CB(AttrDesc, AS, AM, AB))
+ HasChanged = ChangeStatus::CHANGED;
if (HasChanged == ChangeStatus::UNCHANGED)
- return HasChanged;
+ return ChangeStatus::UNCHANGED;
- switch (PK) {
- case IRPosition::IRP_ARGUMENT:
- case IRPosition::IRP_FUNCTION:
- case IRPosition::IRP_RETURNED:
- ScopeFn->setAttributes(Attrs);
- break;
- case IRPosition::IRP_CALL_SITE:
- case IRPosition::IRP_CALL_SITE_RETURNED:
- case IRPosition::IRP_CALL_SITE_ARGUMENT:
- cast<CallBase>(IRP.getAnchorValue()).setAttributes(Attrs);
- break;
- case IRPosition::IRP_INVALID:
- case IRPosition::IRP_FLOAT:
- break;
+ AL = AL.removeAttributesAtIndex(Ctx, AttrIdx, AM);
+ AL = AL.addAttributesAtIndex(Ctx, AttrIdx, AB);
+ AttrsMap[AttrListAnchor] = AL;
+ return ChangeStatus::CHANGED;
+}
+
+bool Attributor::hasAttr(const IRPosition &IRP,
+ ArrayRef<Attribute::AttrKind> AttrKinds,
+ bool IgnoreSubsumingPositions,
+ Attribute::AttrKind ImpliedAttributeKind) {
+ bool Implied = false;
+ bool HasAttr = false;
+ auto HasAttrCB = [&](const Attribute::AttrKind &Kind, AttributeSet AttrSet,
+ AttributeMask &, AttrBuilder &) {
+ if (AttrSet.hasAttribute(Kind)) {
+ Implied |= Kind != ImpliedAttributeKind;
+ HasAttr = true;
+ }
+ return false;
+ };
+ for (const IRPosition &EquivIRP : SubsumingPositionIterator(IRP)) {
+ updateAttrMap<Attribute::AttrKind>(EquivIRP, AttrKinds, HasAttrCB);
+ if (HasAttr)
+ break;
+ // The first position returned by the SubsumingPositionIterator is
+ // always the position itself. If we ignore subsuming positions we
+ // are done after the first iteration.
+ if (IgnoreSubsumingPositions)
+ break;
+ Implied = true;
+ }
+ if (!HasAttr) {
+ Implied = true;
+ SmallVector<Attribute> Attrs;
+ for (Attribute::AttrKind AK : AttrKinds)
+ if (getAttrsFromAssumes(IRP, AK, Attrs)) {
+ HasAttr = true;
+ break;
+ }
}
- return HasChanged;
+ // Check if we should manifest the implied attribute kind at the IRP.
+ if (ImpliedAttributeKind != Attribute::None && HasAttr && Implied)
+ manifestAttrs(IRP, {Attribute::get(IRP.getAnchorValue().getContext(),
+ ImpliedAttributeKind)});
+ return HasAttr;
+}
+
+void Attributor::getAttrs(const IRPosition &IRP,
+ ArrayRef<Attribute::AttrKind> AttrKinds,
+ SmallVectorImpl<Attribute> &Attrs,
+ bool IgnoreSubsumingPositions) {
+ auto CollectAttrCB = [&](const Attribute::AttrKind &Kind,
+ AttributeSet AttrSet, AttributeMask &,
+ AttrBuilder &) {
+ if (AttrSet.hasAttribute(Kind))
+ Attrs.push_back(AttrSet.getAttribute(Kind));
+ return false;
+ };
+ for (const IRPosition &EquivIRP : SubsumingPositionIterator(IRP)) {
+ updateAttrMap<Attribute::AttrKind>(EquivIRP, AttrKinds, CollectAttrCB);
+ // The first position returned by the SubsumingPositionIterator is
+ // always the position itself. If we ignore subsuming positions we
+ // are done after the first iteration.
+ if (IgnoreSubsumingPositions)
+ break;
+ }
+ for (Attribute::AttrKind AK : AttrKinds)
+ getAttrsFromAssumes(IRP, AK, Attrs);
+}
+
+ChangeStatus
+Attributor::removeAttrs(const IRPosition &IRP,
+ const ArrayRef<Attribute::AttrKind> &AttrKinds) {
+ auto RemoveAttrCB = [&](const Attribute::AttrKind &Kind, AttributeSet AttrSet,
+ AttributeMask &AM, AttrBuilder &) {
+ if (!AttrSet.hasAttribute(Kind))
+ return false;
+ AM.addAttribute(Kind);
+ return true;
+ };
+ return updateAttrMap<Attribute::AttrKind>(IRP, AttrKinds, RemoveAttrCB);
+}
+
+ChangeStatus Attributor::manifestAttrs(const IRPosition &IRP,
+ const ArrayRef<Attribute> &Attrs,
+ bool ForceReplace) {
+ LLVMContext &Ctx = IRP.getAnchorValue().getContext();
+ auto AddAttrCB = [&](const Attribute &Attr, AttributeSet AttrSet,
+ AttributeMask &, AttrBuilder &AB) {
+ return addIfNotExistent(Ctx, Attr, AttrSet, ForceReplace, AB);
+ };
+ return updateAttrMap<Attribute>(IRP, Attrs, AddAttrCB);
}
const IRPosition IRPosition::EmptyKey(DenseMapInfo<void *>::getEmptyKey());
@@ -1021,7 +1192,7 @@ const IRPosition
SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
IRPositions.emplace_back(IRP);
- // Helper to determine if operand bundles on a call site are benin or
+ // Helper to determine if operand bundles on a call site are benign or
// potentially problematic. We handle only llvm.assume for now.
auto CanIgnoreOperandBundles = [](const CallBase &CB) {
return (isa<IntrinsicInst>(CB) &&
@@ -1043,7 +1214,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB))
- if (const Function *Callee = CB->getCalledFunction())
+ if (auto *Callee = dyn_cast_if_present<Function>(CB->getCalledOperand()))
IRPositions.emplace_back(IRPosition::function(*Callee));
return;
case IRPosition::IRP_CALL_SITE_RETURNED:
@@ -1051,7 +1222,8 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) {
- if (const Function *Callee = CB->getCalledFunction()) {
+ if (auto *Callee =
+ dyn_cast_if_present<Function>(CB->getCalledOperand())) {
IRPositions.emplace_back(IRPosition::returned(*Callee));
IRPositions.emplace_back(IRPosition::function(*Callee));
for (const Argument &Arg : Callee->args())
@@ -1071,7 +1243,7 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
// TODO: We need to look at the operand bundles similar to the redirection
// in CallBase.
if (!CB->hasOperandBundles() || CanIgnoreOperandBundles(*CB)) {
- const Function *Callee = CB->getCalledFunction();
+ auto *Callee = dyn_cast_if_present<Function>(CB->getCalledOperand());
if (Callee) {
if (Argument *Arg = IRP.getAssociatedArgument())
IRPositions.emplace_back(IRPosition::argument(*Arg));
@@ -1084,85 +1256,6 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
}
}
-bool IRPosition::hasAttr(ArrayRef<Attribute::AttrKind> AKs,
- bool IgnoreSubsumingPositions, Attributor *A) const {
- SmallVector<Attribute, 4> Attrs;
- for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) {
- for (Attribute::AttrKind AK : AKs)
- if (EquivIRP.getAttrsFromIRAttr(AK, Attrs))
- return true;
- // The first position returned by the SubsumingPositionIterator is
- // always the position itself. If we ignore subsuming positions we
- // are done after the first iteration.
- if (IgnoreSubsumingPositions)
- break;
- }
- if (A)
- for (Attribute::AttrKind AK : AKs)
- if (getAttrsFromAssumes(AK, Attrs, *A))
- return true;
- return false;
-}
-
-void IRPosition::getAttrs(ArrayRef<Attribute::AttrKind> AKs,
- SmallVectorImpl<Attribute> &Attrs,
- bool IgnoreSubsumingPositions, Attributor *A) const {
- for (const IRPosition &EquivIRP : SubsumingPositionIterator(*this)) {
- for (Attribute::AttrKind AK : AKs)
- EquivIRP.getAttrsFromIRAttr(AK, Attrs);
- // The first position returned by the SubsumingPositionIterator is
- // always the position itself. If we ignore subsuming positions we
- // are done after the first iteration.
- if (IgnoreSubsumingPositions)
- break;
- }
- if (A)
- for (Attribute::AttrKind AK : AKs)
- getAttrsFromAssumes(AK, Attrs, *A);
-}
-
-bool IRPosition::getAttrsFromIRAttr(Attribute::AttrKind AK,
- SmallVectorImpl<Attribute> &Attrs) const {
- if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT)
- return false;
-
- AttributeList AttrList;
- if (const auto *CB = dyn_cast<CallBase>(&getAnchorValue()))
- AttrList = CB->getAttributes();
- else
- AttrList = getAssociatedFunction()->getAttributes();
-
- bool HasAttr = AttrList.hasAttributeAtIndex(getAttrIdx(), AK);
- if (HasAttr)
- Attrs.push_back(AttrList.getAttributeAtIndex(getAttrIdx(), AK));
- return HasAttr;
-}
-
-bool IRPosition::getAttrsFromAssumes(Attribute::AttrKind AK,
- SmallVectorImpl<Attribute> &Attrs,
- Attributor &A) const {
- assert(getPositionKind() != IRP_INVALID && "Did expect a valid position!");
- Value &AssociatedValue = getAssociatedValue();
-
- const Assume2KnowledgeMap &A2K =
- A.getInfoCache().getKnowledgeMap().lookup({&AssociatedValue, AK});
-
- // Check if we found any potential assume use, if not we don't need to create
- // explorer iterators.
- if (A2K.empty())
- return false;
-
- LLVMContext &Ctx = AssociatedValue.getContext();
- unsigned AttrsSize = Attrs.size();
- MustBeExecutedContextExplorer &Explorer =
- A.getInfoCache().getMustBeExecutedContextExplorer();
- auto EIt = Explorer.begin(getCtxI()), EEnd = Explorer.end(getCtxI());
- for (const auto &It : A2K)
- if (Explorer.findInContextOf(It.first, EIt, EEnd))
- Attrs.push_back(Attribute::get(Ctx, AK, It.second.Max));
- return AttrsSize != Attrs.size();
-}
-
void IRPosition::verify() {
#ifdef EXPENSIVE_CHECKS
switch (getPositionKind()) {
@@ -1285,35 +1378,67 @@ std::optional<Value *> Attributor::getAssumedSimplified(
}
bool Attributor::getAssumedSimplifiedValues(
- const IRPosition &IRP, const AbstractAttribute *AA,
+ const IRPosition &InitialIRP, const AbstractAttribute *AA,
SmallVectorImpl<AA::ValueAndContext> &Values, AA::ValueScope S,
- bool &UsedAssumedInformation) {
- // First check all callbacks provided by outside AAs. If any of them returns
- // a non-null value that is different from the associated value, or
- // std::nullopt, we assume it's simplified.
- const auto &SimplificationCBs = SimplificationCallbacks.lookup(IRP);
- for (const auto &CB : SimplificationCBs) {
- std::optional<Value *> CBResult = CB(IRP, AA, UsedAssumedInformation);
- if (!CBResult.has_value())
- continue;
- Value *V = *CBResult;
- if (!V)
- return false;
- if ((S & AA::ValueScope::Interprocedural) ||
- AA::isValidInScope(*V, IRP.getAnchorScope()))
- Values.push_back(AA::ValueAndContext{*V, nullptr});
- else
- return false;
- }
- if (!SimplificationCBs.empty())
- return true;
+ bool &UsedAssumedInformation, bool RecurseForSelectAndPHI) {
+ SmallPtrSet<Value *, 8> Seen;
+ SmallVector<IRPosition, 8> Worklist;
+ Worklist.push_back(InitialIRP);
+ while (!Worklist.empty()) {
+ const IRPosition &IRP = Worklist.pop_back_val();
+
+ // First check all callbacks provided by outside AAs. If any of them returns
+ // a non-null value that is different from the associated value, or
+ // std::nullopt, we assume it's simplified.
+ int NV = Values.size();
+ const auto &SimplificationCBs = SimplificationCallbacks.lookup(IRP);
+ for (const auto &CB : SimplificationCBs) {
+ std::optional<Value *> CBResult = CB(IRP, AA, UsedAssumedInformation);
+ if (!CBResult.has_value())
+ continue;
+ Value *V = *CBResult;
+ if (!V)
+ return false;
+ if ((S & AA::ValueScope::Interprocedural) ||
+ AA::isValidInScope(*V, IRP.getAnchorScope()))
+ Values.push_back(AA::ValueAndContext{*V, nullptr});
+ else
+ return false;
+ }
+ if (SimplificationCBs.empty()) {
+ // If no high-level/outside simplification occurred, use
+ // AAPotentialValues.
+ const auto *PotentialValuesAA =
+ getOrCreateAAFor<AAPotentialValues>(IRP, AA, DepClassTy::OPTIONAL);
+ if (PotentialValuesAA && PotentialValuesAA->getAssumedSimplifiedValues(*this, Values, S)) {
+ UsedAssumedInformation |= !PotentialValuesAA->isAtFixpoint();
+ } else if (IRP.getPositionKind() != IRPosition::IRP_RETURNED) {
+ Values.push_back({IRP.getAssociatedValue(), IRP.getCtxI()});
+ } else {
+ // TODO: We could visit all returns and add the operands.
+ return false;
+ }
+ }
- // If no high-level/outside simplification occurred, use AAPotentialValues.
- const auto &PotentialValuesAA =
- getOrCreateAAFor<AAPotentialValues>(IRP, AA, DepClassTy::OPTIONAL);
- if (!PotentialValuesAA.getAssumedSimplifiedValues(*this, Values, S))
- return false;
- UsedAssumedInformation |= !PotentialValuesAA.isAtFixpoint();
+ if (!RecurseForSelectAndPHI)
+ break;
+
+ for (int I = NV, E = Values.size(); I < E; ++I) {
+ Value *V = Values[I].getValue();
+ if (!isa<PHINode>(V) && !isa<SelectInst>(V))
+ continue;
+ if (!Seen.insert(V).second)
+ continue;
+ // Move the last element to this slot.
+ Values[I] = Values[E - 1];
+ // Eliminate the last slot, adjust the indices.
+ Values.pop_back();
+ --E;
+ --I;
+ // Add a new value (select or phi) to the worklist.
+ Worklist.push_back(IRPosition::value(*V));
+ }
+ }
return true;
}
@@ -1325,7 +1450,8 @@ std::optional<Value *> Attributor::translateArgumentToCallSiteContent(
if (*V == nullptr || isa<Constant>(*V))
return V;
if (auto *Arg = dyn_cast<Argument>(*V))
- if (CB.getCalledFunction() == Arg->getParent())
+ if (CB.getCalledOperand() == Arg->getParent() &&
+ CB.arg_size() > Arg->getArgNo())
if (!Arg->hasPointeeInMemoryValueAttr())
return getAssumedSimplified(
IRPosition::callsite_argument(CB, Arg->getArgNo()), AA,
@@ -1346,6 +1472,8 @@ bool Attributor::isAssumedDead(const AbstractAttribute &AA,
const AAIsDead *FnLivenessAA,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly, DepClassTy DepClass) {
+ if (!Configuration.UseLiveness)
+ return false;
const IRPosition &IRP = AA.getIRPosition();
if (!Functions.count(IRP.getAnchorScope()))
return false;
@@ -1358,6 +1486,8 @@ bool Attributor::isAssumedDead(const Use &U,
const AAIsDead *FnLivenessAA,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly, DepClassTy DepClass) {
+ if (!Configuration.UseLiveness)
+ return false;
Instruction *UserI = dyn_cast<Instruction>(U.getUser());
if (!UserI)
return isAssumedDead(IRPosition::value(*U.get()), QueryingAA, FnLivenessAA,
@@ -1384,12 +1514,12 @@ bool Attributor::isAssumedDead(const Use &U,
} else if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
if (!CheckBBLivenessOnly && SI->getPointerOperand() != U.get()) {
const IRPosition IRP = IRPosition::inst(*SI);
- const AAIsDead &IsDeadAA =
+ const AAIsDead *IsDeadAA =
getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE);
- if (IsDeadAA.isRemovableStore()) {
+ if (IsDeadAA && IsDeadAA->isRemovableStore()) {
if (QueryingAA)
- recordDependence(IsDeadAA, *QueryingAA, DepClass);
- if (!IsDeadAA.isKnown(AAIsDead::IS_REMOVABLE))
+ recordDependence(*IsDeadAA, *QueryingAA, DepClass);
+ if (!IsDeadAA->isKnown(AAIsDead::IS_REMOVABLE))
UsedAssumedInformation = true;
return true;
}
@@ -1406,6 +1536,8 @@ bool Attributor::isAssumedDead(const Instruction &I,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly, DepClassTy DepClass,
bool CheckForDeadStore) {
+ if (!Configuration.UseLiveness)
+ return false;
const IRPosition::CallBaseContext *CBCtx =
QueryingAA ? QueryingAA->getCallBaseContext() : nullptr;
@@ -1414,11 +1546,11 @@ bool Attributor::isAssumedDead(const Instruction &I,
const Function &F = *I.getFunction();
if (!FnLivenessAA || FnLivenessAA->getAnchorScope() != &F)
- FnLivenessAA = &getOrCreateAAFor<AAIsDead>(IRPosition::function(F, CBCtx),
- QueryingAA, DepClassTy::NONE);
+ FnLivenessAA = getOrCreateAAFor<AAIsDead>(IRPosition::function(F, CBCtx),
+ QueryingAA, DepClassTy::NONE);
// Don't use recursive reasoning.
- if (QueryingAA == FnLivenessAA)
+ if (!FnLivenessAA || QueryingAA == FnLivenessAA)
return false;
// If we have a context instruction and a liveness AA we use it.
@@ -1435,25 +1567,25 @@ bool Attributor::isAssumedDead(const Instruction &I,
return false;
const IRPosition IRP = IRPosition::inst(I, CBCtx);
- const AAIsDead &IsDeadAA =
+ const AAIsDead *IsDeadAA =
getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE);
// Don't use recursive reasoning.
- if (QueryingAA == &IsDeadAA)
+ if (!IsDeadAA || QueryingAA == IsDeadAA)
return false;
- if (IsDeadAA.isAssumedDead()) {
+ if (IsDeadAA->isAssumedDead()) {
if (QueryingAA)
- recordDependence(IsDeadAA, *QueryingAA, DepClass);
- if (!IsDeadAA.isKnownDead())
+ recordDependence(*IsDeadAA, *QueryingAA, DepClass);
+ if (!IsDeadAA->isKnownDead())
UsedAssumedInformation = true;
return true;
}
- if (CheckForDeadStore && isa<StoreInst>(I) && IsDeadAA.isRemovableStore()) {
+ if (CheckForDeadStore && isa<StoreInst>(I) && IsDeadAA->isRemovableStore()) {
if (QueryingAA)
- recordDependence(IsDeadAA, *QueryingAA, DepClass);
- if (!IsDeadAA.isKnownDead())
+ recordDependence(*IsDeadAA, *QueryingAA, DepClass);
+ if (!IsDeadAA->isKnownDead())
UsedAssumedInformation = true;
return true;
}
@@ -1466,6 +1598,8 @@ bool Attributor::isAssumedDead(const IRPosition &IRP,
const AAIsDead *FnLivenessAA,
bool &UsedAssumedInformation,
bool CheckBBLivenessOnly, DepClassTy DepClass) {
+ if (!Configuration.UseLiveness)
+ return false;
// Don't check liveness for constants, e.g. functions, used as (floating)
// values since the context instruction and such is here meaningless.
if (IRP.getPositionKind() == IRPosition::IRP_FLOAT &&
@@ -1486,14 +1620,14 @@ bool Attributor::isAssumedDead(const IRPosition &IRP,
// If we haven't succeeded we query the specific liveness info for the IRP.
const AAIsDead *IsDeadAA;
if (IRP.getPositionKind() == IRPosition::IRP_CALL_SITE)
- IsDeadAA = &getOrCreateAAFor<AAIsDead>(
+ IsDeadAA = getOrCreateAAFor<AAIsDead>(
IRPosition::callsite_returned(cast<CallBase>(IRP.getAssociatedValue())),
QueryingAA, DepClassTy::NONE);
else
- IsDeadAA = &getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE);
+ IsDeadAA = getOrCreateAAFor<AAIsDead>(IRP, QueryingAA, DepClassTy::NONE);
// Don't use recursive reasoning.
- if (QueryingAA == IsDeadAA)
+ if (!IsDeadAA || QueryingAA == IsDeadAA)
return false;
if (IsDeadAA->isAssumedDead()) {
@@ -1511,13 +1645,15 @@ bool Attributor::isAssumedDead(const BasicBlock &BB,
const AbstractAttribute *QueryingAA,
const AAIsDead *FnLivenessAA,
DepClassTy DepClass) {
+ if (!Configuration.UseLiveness)
+ return false;
const Function &F = *BB.getParent();
if (!FnLivenessAA || FnLivenessAA->getAnchorScope() != &F)
- FnLivenessAA = &getOrCreateAAFor<AAIsDead>(IRPosition::function(F),
- QueryingAA, DepClassTy::NONE);
+ FnLivenessAA = getOrCreateAAFor<AAIsDead>(IRPosition::function(F),
+ QueryingAA, DepClassTy::NONE);
// Don't use recursive reasoning.
- if (QueryingAA == FnLivenessAA)
+ if (!FnLivenessAA || QueryingAA == FnLivenessAA)
return false;
if (FnLivenessAA->isAssumedDead(&BB)) {
@@ -1570,8 +1706,8 @@ bool Attributor::checkForAllUses(
const Function *ScopeFn = IRP.getAnchorScope();
const auto *LivenessAA =
- ScopeFn ? &getAAFor<AAIsDead>(QueryingAA, IRPosition::function(*ScopeFn),
- DepClassTy::NONE)
+ ScopeFn ? getAAFor<AAIsDead>(QueryingAA, IRPosition::function(*ScopeFn),
+ DepClassTy::NONE)
: nullptr;
while (!Worklist.empty()) {
@@ -1777,49 +1913,26 @@ bool Attributor::shouldPropagateCallBaseContext(const IRPosition &IRP) {
return EnableCallSiteSpecific;
}
-bool Attributor::checkForAllReturnedValuesAndReturnInsts(
- function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred,
- const AbstractAttribute &QueryingAA) {
+bool Attributor::checkForAllReturnedValues(function_ref<bool(Value &)> Pred,
+ const AbstractAttribute &QueryingAA,
+ AA::ValueScope S,
+ bool RecurseForSelectAndPHI) {
const IRPosition &IRP = QueryingAA.getIRPosition();
- // Since we need to provide return instructions we have to have an exact
- // definition.
const Function *AssociatedFunction = IRP.getAssociatedFunction();
if (!AssociatedFunction)
return false;
- // If this is a call site query we use the call site specific return values
- // and liveness information.
- // TODO: use the function scope once we have call site AAReturnedValues.
- const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
- const auto &AARetVal =
- getAAFor<AAReturnedValues>(QueryingAA, QueryIRP, DepClassTy::REQUIRED);
- if (!AARetVal.getState().isValidState())
- return false;
-
- return AARetVal.checkForAllReturnedValuesAndReturnInsts(Pred);
-}
-
-bool Attributor::checkForAllReturnedValues(
- function_ref<bool(Value &)> Pred, const AbstractAttribute &QueryingAA) {
-
- const IRPosition &IRP = QueryingAA.getIRPosition();
- const Function *AssociatedFunction = IRP.getAssociatedFunction();
- if (!AssociatedFunction)
- return false;
-
- // TODO: use the function scope once we have call site AAReturnedValues.
- const IRPosition &QueryIRP = IRPosition::function(
- *AssociatedFunction, QueryingAA.getCallBaseContext());
- const auto &AARetVal =
- getAAFor<AAReturnedValues>(QueryingAA, QueryIRP, DepClassTy::REQUIRED);
- if (!AARetVal.getState().isValidState())
+ bool UsedAssumedInformation = false;
+ SmallVector<AA::ValueAndContext> Values;
+ if (!getAssumedSimplifiedValues(
+ IRPosition::returned(*AssociatedFunction), &QueryingAA, Values, S,
+ UsedAssumedInformation, RecurseForSelectAndPHI))
return false;
- return AARetVal.checkForAllReturnedValuesAndReturnInsts(
- [&](Value &RV, const SmallSetVector<ReturnInst *, 4> &) {
- return Pred(RV);
- });
+ return llvm::all_of(Values, [&](const AA::ValueAndContext &VAC) {
+ return Pred(*VAC.getValue());
+ });
}
static bool checkForAllInstructionsImpl(
@@ -1863,12 +1976,11 @@ bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
if (!Fn || Fn->isDeclaration())
return false;
- // TODO: use the function scope once we have call site AAReturnedValues.
const IRPosition &QueryIRP = IRPosition::function(*Fn);
const auto *LivenessAA =
- (CheckBBLivenessOnly || CheckPotentiallyDead)
+ CheckPotentiallyDead
? nullptr
- : &(getAAFor<AAIsDead>(QueryingAA, QueryIRP, DepClassTy::NONE));
+ : (getAAFor<AAIsDead>(QueryingAA, QueryIRP, DepClassTy::NONE));
auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn);
if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA,
@@ -1895,21 +2007,21 @@ bool Attributor::checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
bool Attributor::checkForAllReadWriteInstructions(
function_ref<bool(Instruction &)> Pred, AbstractAttribute &QueryingAA,
bool &UsedAssumedInformation) {
+ TimeTraceScope TS("checkForAllReadWriteInstructions");
const Function *AssociatedFunction =
QueryingAA.getIRPosition().getAssociatedFunction();
if (!AssociatedFunction)
return false;
- // TODO: use the function scope once we have call site AAReturnedValues.
const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction);
- const auto &LivenessAA =
+ const auto *LivenessAA =
getAAFor<AAIsDead>(QueryingAA, QueryIRP, DepClassTy::NONE);
for (Instruction *I :
InfoCache.getReadOrWriteInstsForFunction(*AssociatedFunction)) {
// Skip dead instructions.
- if (isAssumedDead(IRPosition::inst(*I), &QueryingAA, &LivenessAA,
+ if (isAssumedDead(IRPosition::inst(*I), &QueryingAA, LivenessAA,
UsedAssumedInformation))
continue;
@@ -1954,11 +2066,9 @@ void Attributor::runTillFixpoint() {
dbgs() << "[Attributor] InvalidAA: " << *InvalidAA
<< " has " << InvalidAA->Deps.size()
<< " required & optional dependences\n");
- while (!InvalidAA->Deps.empty()) {
- const auto &Dep = InvalidAA->Deps.back();
- InvalidAA->Deps.pop_back();
- AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer());
- if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) {
+ for (auto &DepIt : InvalidAA->Deps) {
+ AbstractAttribute *DepAA = cast<AbstractAttribute>(DepIt.getPointer());
+ if (DepIt.getInt() == unsigned(DepClassTy::OPTIONAL)) {
DEBUG_WITH_TYPE(VERBOSE_DEBUG_TYPE,
dbgs() << " - recompute: " << *DepAA);
Worklist.insert(DepAA);
@@ -1973,16 +2083,16 @@ void Attributor::runTillFixpoint() {
else
ChangedAAs.push_back(DepAA);
}
+ InvalidAA->Deps.clear();
}
// Add all abstract attributes that are potentially dependent on one that
// changed to the work list.
- for (AbstractAttribute *ChangedAA : ChangedAAs)
- while (!ChangedAA->Deps.empty()) {
- Worklist.insert(
- cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
- ChangedAA->Deps.pop_back();
- }
+ for (AbstractAttribute *ChangedAA : ChangedAAs) {
+ for (auto &DepIt : ChangedAA->Deps)
+ Worklist.insert(cast<AbstractAttribute>(DepIt.getPointer()));
+ ChangedAA->Deps.clear();
+ }
LLVM_DEBUG(dbgs() << "[Attributor] #Iteration: " << IterationCounter
<< ", Worklist+Dependent size: " << Worklist.size()
@@ -2019,8 +2129,7 @@ void Attributor::runTillFixpoint() {
QueryAAsAwaitingUpdate.end());
QueryAAsAwaitingUpdate.clear();
- } while (!Worklist.empty() &&
- (IterationCounter++ < MaxIterations || VerifyMaxFixpointIterations));
+ } while (!Worklist.empty() && (IterationCounter++ < MaxIterations));
if (IterationCounter > MaxIterations && !Functions.empty()) {
auto Remark = [&](OptimizationRemarkMissed ORM) {
@@ -2053,11 +2162,9 @@ void Attributor::runTillFixpoint() {
NumAttributesTimedOut++;
}
- while (!ChangedAA->Deps.empty()) {
- ChangedAAs.push_back(
- cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
- ChangedAA->Deps.pop_back();
- }
+ for (auto &DepIt : ChangedAA->Deps)
+ ChangedAAs.push_back(cast<AbstractAttribute>(DepIt.getPointer()));
+ ChangedAA->Deps.clear();
}
LLVM_DEBUG({
@@ -2065,13 +2172,6 @@ void Attributor::runTillFixpoint() {
dbgs() << "\n[Attributor] Finalized " << Visited.size()
<< " abstract attributes.\n";
});
-
- if (VerifyMaxFixpointIterations && IterationCounter != MaxIterations) {
- errs() << "\n[Attributor] Fixpoint iteration done after: "
- << IterationCounter << "/" << MaxIterations << " iterations\n";
- llvm_unreachable("The fixpoint was not reached with exactly the number of "
- "specified iterations!");
- }
}
void Attributor::registerForUpdate(AbstractAttribute &AA) {
@@ -2141,17 +2241,31 @@ ChangeStatus Attributor::manifestAttributes() {
(void)NumFinalAAs;
if (NumFinalAAs != DG.SyntheticRoot.Deps.size()) {
- for (unsigned u = NumFinalAAs; u < DG.SyntheticRoot.Deps.size(); ++u)
+ auto DepIt = DG.SyntheticRoot.Deps.begin();
+ for (unsigned u = 0; u < NumFinalAAs; ++u)
+ ++DepIt;
+ for (unsigned u = NumFinalAAs; u < DG.SyntheticRoot.Deps.size();
+ ++u, ++DepIt) {
errs() << "Unexpected abstract attribute: "
- << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
- << " :: "
- << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
+ << cast<AbstractAttribute>(DepIt->getPointer()) << " :: "
+ << cast<AbstractAttribute>(DepIt->getPointer())
->getIRPosition()
.getAssociatedValue()
<< "\n";
+ }
llvm_unreachable("Expected the final number of abstract attributes to "
"remain unchanged!");
}
+
+ for (auto &It : AttrsMap) {
+ AttributeList &AL = It.getSecond();
+ const IRPosition &IRP =
+ isa<Function>(It.getFirst())
+ ? IRPosition::function(*cast<Function>(It.getFirst()))
+ : IRPosition::callsite_function(*cast<CallBase>(It.getFirst()));
+ IRP.setAttrList(AL);
+ }
+
return ManifestChange;
}
@@ -2271,9 +2385,9 @@ ChangeStatus Attributor::cleanupIR() {
if (CB->isArgOperand(U)) {
unsigned Idx = CB->getArgOperandNo(U);
CB->removeParamAttr(Idx, Attribute::NoUndef);
- Function *Fn = CB->getCalledFunction();
- if (Fn && Fn->arg_size() > Idx)
- Fn->removeParamAttr(Idx, Attribute::NoUndef);
+ auto *Callee = dyn_cast_if_present<Function>(CB->getCalledOperand());
+ if (Callee && Callee->arg_size() > Idx)
+ Callee->removeParamAttr(Idx, Attribute::NoUndef);
}
}
if (isa<Constant>(NewV) && isa<BranchInst>(U->getUser())) {
@@ -2484,9 +2598,9 @@ ChangeStatus Attributor::run() {
}
ChangeStatus Attributor::updateAA(AbstractAttribute &AA) {
- TimeTraceScope TimeScope(
- AA.getName() + std::to_string(AA.getIRPosition().getPositionKind()) +
- "::updateAA");
+ TimeTraceScope TimeScope("updateAA", [&]() {
+ return AA.getName() + std::to_string(AA.getIRPosition().getPositionKind());
+ });
assert(Phase == AttributorPhase::UPDATE &&
"We can update AA only in the update stage!");
@@ -2672,7 +2786,10 @@ bool Attributor::isValidFunctionSignatureRewrite(
ACS.getInstruction()->getType() !=
ACS.getCalledFunction()->getReturnType())
return false;
- if (ACS.getCalledOperand()->getType() != Fn->getType())
+ if (cast<CallBase>(ACS.getInstruction())->getCalledOperand()->getType() !=
+ Fn->getType())
+ return false;
+ if (ACS.getNumArgOperands() != Fn->arg_size())
return false;
// Forbid must-tail calls for now.
return !ACS.isCallbackCall() && !ACS.getInstruction()->isMustTailCall();
@@ -2698,7 +2815,8 @@ bool Attributor::isValidFunctionSignatureRewrite(
// Avoid callbacks for now.
bool UsedAssumedInformation = false;
if (!checkForAllCallSites(CallSiteCanBeChanged, *Fn, true, nullptr,
- UsedAssumedInformation)) {
+ UsedAssumedInformation,
+ /* CheckPotentiallyDead */ true)) {
LLVM_DEBUG(dbgs() << "[Attributor] Cannot rewrite all call sites\n");
return false;
}
@@ -3041,7 +3159,8 @@ void InformationCache::initializeInformationCache(const Function &CF,
AddToAssumeUsesMap(*Assume->getArgOperand(0));
} else if (cast<CallInst>(I).isMustTailCall()) {
FI.ContainsMustTailCall = true;
- if (const Function *Callee = cast<CallInst>(I).getCalledFunction())
+ if (auto *Callee = dyn_cast_if_present<Function>(
+ cast<CallInst>(I).getCalledOperand()))
getFunctionInfo(*Callee).CalledViaMustTail = true;
}
[[fallthrough]];
@@ -3077,10 +3196,6 @@ void InformationCache::initializeInformationCache(const Function &CF,
InlineableFunctions.insert(&F);
}
-AAResults *InformationCache::getAAResultsForFunction(const Function &F) {
- return AG.getAnalysis<AAManager>(F);
-}
-
InformationCache::FunctionInfo::~FunctionInfo() {
// The instruction vectors are allocated using a BumpPtrAllocator, we need to
// manually destroy them.
@@ -3111,11 +3226,21 @@ void Attributor::rememberDependences() {
DI.DepClass == DepClassTy::OPTIONAL) &&
"Expected required or optional dependence (1 bit)!");
auto &DepAAs = const_cast<AbstractAttribute &>(*DI.FromAA).Deps;
- DepAAs.push_back(AbstractAttribute::DepTy(
+ DepAAs.insert(AbstractAttribute::DepTy(
const_cast<AbstractAttribute *>(DI.ToAA), unsigned(DI.DepClass)));
}
}
+template <Attribute::AttrKind AK, typename AAType>
+void Attributor::checkAndQueryIRAttr(const IRPosition &IRP,
+ AttributeSet Attrs) {
+ bool IsKnown;
+ if (!Attrs.hasAttribute(AK))
+ if (!AA::hasAssumedIRAttr<AK>(*this, nullptr, IRP, DepClassTy::NONE,
+ IsKnown))
+ getOrCreateAAFor<AAType>(IRP);
+}
+
void Attributor::identifyDefaultAbstractAttributes(Function &F) {
if (!VisitedFunctions.insert(&F).second)
return;
@@ -3134,89 +3259,114 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
}
IRPosition FPos = IRPosition::function(F);
+ bool IsIPOAmendable = isFunctionIPOAmendable(F);
+ auto Attrs = F.getAttributes();
+ auto FnAttrs = Attrs.getFnAttrs();
// Check for dead BasicBlocks in every function.
// We need dead instruction detection because we do not want to deal with
// broken IR in which SSA rules do not apply.
getOrCreateAAFor<AAIsDead>(FPos);
- // Every function might be "will-return".
- getOrCreateAAFor<AAWillReturn>(FPos);
-
- // Every function might contain instructions that cause "undefined behavior".
+ // Every function might contain instructions that cause "undefined
+ // behavior".
getOrCreateAAFor<AAUndefinedBehavior>(FPos);
- // Every function can be nounwind.
- getOrCreateAAFor<AANoUnwind>(FPos);
+ // Every function might be applicable for Heap-To-Stack conversion.
+ if (EnableHeapToStack)
+ getOrCreateAAFor<AAHeapToStack>(FPos);
- // Every function might be marked "nosync"
- getOrCreateAAFor<AANoSync>(FPos);
+ // Every function might be "must-progress".
+ checkAndQueryIRAttr<Attribute::MustProgress, AAMustProgress>(FPos, FnAttrs);
// Every function might be "no-free".
- getOrCreateAAFor<AANoFree>(FPos);
+ checkAndQueryIRAttr<Attribute::NoFree, AANoFree>(FPos, FnAttrs);
- // Every function might be "no-return".
- getOrCreateAAFor<AANoReturn>(FPos);
+ // Every function might be "will-return".
+ checkAndQueryIRAttr<Attribute::WillReturn, AAWillReturn>(FPos, FnAttrs);
- // Every function might be "no-recurse".
- getOrCreateAAFor<AANoRecurse>(FPos);
+ // Everything that is visible from the outside (=function, argument, return
+ // positions), cannot be changed if the function is not IPO amendable. We can
+ // however analyse the code inside.
+ if (IsIPOAmendable) {
- // Every function might be "readnone/readonly/writeonly/...".
- getOrCreateAAFor<AAMemoryBehavior>(FPos);
+ // Every function can be nounwind.
+ checkAndQueryIRAttr<Attribute::NoUnwind, AANoUnwind>(FPos, FnAttrs);
- // Every function can be "readnone/argmemonly/inaccessiblememonly/...".
- getOrCreateAAFor<AAMemoryLocation>(FPos);
+ // Every function might be marked "nosync"
+ checkAndQueryIRAttr<Attribute::NoSync, AANoSync>(FPos, FnAttrs);
- // Every function can track active assumptions.
- getOrCreateAAFor<AAAssumptionInfo>(FPos);
+ // Every function might be "no-return".
+ checkAndQueryIRAttr<Attribute::NoReturn, AANoReturn>(FPos, FnAttrs);
- // Every function might be applicable for Heap-To-Stack conversion.
- if (EnableHeapToStack)
- getOrCreateAAFor<AAHeapToStack>(FPos);
+ // Every function might be "no-recurse".
+ checkAndQueryIRAttr<Attribute::NoRecurse, AANoRecurse>(FPos, FnAttrs);
- // Return attributes are only appropriate if the return type is non void.
- Type *ReturnType = F.getReturnType();
- if (!ReturnType->isVoidTy()) {
- // Argument attribute "returned" --- Create only one per function even
- // though it is an argument attribute.
- getOrCreateAAFor<AAReturnedValues>(FPos);
+ // Every function can be "non-convergent".
+ if (Attrs.hasFnAttr(Attribute::Convergent))
+ getOrCreateAAFor<AANonConvergent>(FPos);
- IRPosition RetPos = IRPosition::returned(F);
+ // Every function might be "readnone/readonly/writeonly/...".
+ getOrCreateAAFor<AAMemoryBehavior>(FPos);
- // Every returned value might be dead.
- getOrCreateAAFor<AAIsDead>(RetPos);
+ // Every function can be "readnone/argmemonly/inaccessiblememonly/...".
+ getOrCreateAAFor<AAMemoryLocation>(FPos);
- // Every function might be simplified.
- bool UsedAssumedInformation = false;
- getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation,
- AA::Intraprocedural);
+ // Every function can track active assumptions.
+ getOrCreateAAFor<AAAssumptionInfo>(FPos);
- // Every returned value might be marked noundef.
- getOrCreateAAFor<AANoUndef>(RetPos);
+ // Return attributes are only appropriate if the return type is non void.
+ Type *ReturnType = F.getReturnType();
+ if (!ReturnType->isVoidTy()) {
+ IRPosition RetPos = IRPosition::returned(F);
+ AttributeSet RetAttrs = Attrs.getRetAttrs();
- if (ReturnType->isPointerTy()) {
+ // Every returned value might be dead.
+ getOrCreateAAFor<AAIsDead>(RetPos);
- // Every function with pointer return type might be marked align.
- getOrCreateAAFor<AAAlign>(RetPos);
+ // Every function might be simplified.
+ bool UsedAssumedInformation = false;
+ getAssumedSimplified(RetPos, nullptr, UsedAssumedInformation,
+ AA::Intraprocedural);
+
+ // Every returned value might be marked noundef.
+ checkAndQueryIRAttr<Attribute::NoUndef, AANoUndef>(RetPos, RetAttrs);
+
+ if (ReturnType->isPointerTy()) {
- // Every function with pointer return type might be marked nonnull.
- getOrCreateAAFor<AANonNull>(RetPos);
+ // Every function with pointer return type might be marked align.
+ getOrCreateAAFor<AAAlign>(RetPos);
- // Every function with pointer return type might be marked noalias.
- getOrCreateAAFor<AANoAlias>(RetPos);
+ // Every function with pointer return type might be marked nonnull.
+ checkAndQueryIRAttr<Attribute::NonNull, AANonNull>(RetPos, RetAttrs);
- // Every function with pointer return type might be marked
- // dereferenceable.
- getOrCreateAAFor<AADereferenceable>(RetPos);
+ // Every function with pointer return type might be marked noalias.
+ checkAndQueryIRAttr<Attribute::NoAlias, AANoAlias>(RetPos, RetAttrs);
+
+ // Every function with pointer return type might be marked
+ // dereferenceable.
+ getOrCreateAAFor<AADereferenceable>(RetPos);
+ } else if (AttributeFuncs::isNoFPClassCompatibleType(ReturnType)) {
+ getOrCreateAAFor<AANoFPClass>(RetPos);
+ }
}
}
for (Argument &Arg : F.args()) {
IRPosition ArgPos = IRPosition::argument(Arg);
+ auto ArgNo = Arg.getArgNo();
+ AttributeSet ArgAttrs = Attrs.getParamAttrs(ArgNo);
+
+ if (!IsIPOAmendable) {
+ if (Arg.getType()->isPointerTy())
+ // Every argument with pointer type might be marked nofree.
+ checkAndQueryIRAttr<Attribute::NoFree, AANoFree>(ArgPos, ArgAttrs);
+ continue;
+ }
- // Every argument might be simplified. We have to go through the Attributor
- // interface though as outside AAs can register custom simplification
- // callbacks.
+ // Every argument might be simplified. We have to go through the
+ // Attributor interface though as outside AAs can register custom
+ // simplification callbacks.
bool UsedAssumedInformation = false;
getAssumedSimplified(ArgPos, /* AA */ nullptr, UsedAssumedInformation,
AA::Intraprocedural);
@@ -3225,14 +3375,14 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
getOrCreateAAFor<AAIsDead>(ArgPos);
// Every argument might be marked noundef.
- getOrCreateAAFor<AANoUndef>(ArgPos);
+ checkAndQueryIRAttr<Attribute::NoUndef, AANoUndef>(ArgPos, ArgAttrs);
if (Arg.getType()->isPointerTy()) {
// Every argument with pointer type might be marked nonnull.
- getOrCreateAAFor<AANonNull>(ArgPos);
+ checkAndQueryIRAttr<Attribute::NonNull, AANonNull>(ArgPos, ArgAttrs);
// Every argument with pointer type might be marked noalias.
- getOrCreateAAFor<AANoAlias>(ArgPos);
+ checkAndQueryIRAttr<Attribute::NoAlias, AANoAlias>(ArgPos, ArgAttrs);
// Every argument with pointer type might be marked dereferenceable.
getOrCreateAAFor<AADereferenceable>(ArgPos);
@@ -3241,17 +3391,20 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
getOrCreateAAFor<AAAlign>(ArgPos);
// Every argument with pointer type might be marked nocapture.
- getOrCreateAAFor<AANoCapture>(ArgPos);
+ checkAndQueryIRAttr<Attribute::NoCapture, AANoCapture>(ArgPos, ArgAttrs);
// Every argument with pointer type might be marked
// "readnone/readonly/writeonly/..."
getOrCreateAAFor<AAMemoryBehavior>(ArgPos);
// Every argument with pointer type might be marked nofree.
- getOrCreateAAFor<AANoFree>(ArgPos);
+ checkAndQueryIRAttr<Attribute::NoFree, AANoFree>(ArgPos, ArgAttrs);
- // Every argument with pointer type might be privatizable (or promotable)
+ // Every argument with pointer type might be privatizable (or
+ // promotable)
getOrCreateAAFor<AAPrivatizablePtr>(ArgPos);
+ } else if (AttributeFuncs::isNoFPClassCompatibleType(Arg.getType())) {
+ getOrCreateAAFor<AANoFPClass>(ArgPos);
}
}
@@ -3264,7 +3417,7 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// users. The return value might be dead if there are no live users.
getOrCreateAAFor<AAIsDead>(CBInstPos);
- Function *Callee = CB.getCalledFunction();
+ Function *Callee = dyn_cast_if_present<Function>(CB.getCalledOperand());
// TODO: Even if the callee is not known now we might be able to simplify
// the call/callee.
if (!Callee)
@@ -3280,16 +3433,20 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
return true;
if (!Callee->getReturnType()->isVoidTy() && !CB.use_empty()) {
-
IRPosition CBRetPos = IRPosition::callsite_returned(CB);
bool UsedAssumedInformation = false;
getAssumedSimplified(CBRetPos, nullptr, UsedAssumedInformation,
AA::Intraprocedural);
+
+ if (AttributeFuncs::isNoFPClassCompatibleType(Callee->getReturnType()))
+ getOrCreateAAFor<AANoFPClass>(CBInstPos);
}
+ const AttributeList &CBAttrs = CBFnPos.getAttrList();
for (int I = 0, E = CB.arg_size(); I < E; ++I) {
IRPosition CBArgPos = IRPosition::callsite_argument(CB, I);
+ AttributeSet CBArgAttrs = CBAttrs.getParamAttrs(I);
// Every call site argument might be dead.
getOrCreateAAFor<AAIsDead>(CBArgPos);
@@ -3302,19 +3459,26 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
AA::Intraprocedural);
// Every call site argument might be marked "noundef".
- getOrCreateAAFor<AANoUndef>(CBArgPos);
+ checkAndQueryIRAttr<Attribute::NoUndef, AANoUndef>(CBArgPos, CBArgAttrs);
+
+ Type *ArgTy = CB.getArgOperand(I)->getType();
+
+ if (!ArgTy->isPointerTy()) {
+ if (AttributeFuncs::isNoFPClassCompatibleType(ArgTy))
+ getOrCreateAAFor<AANoFPClass>(CBArgPos);
- if (!CB.getArgOperand(I)->getType()->isPointerTy())
continue;
+ }
// Call site argument attribute "non-null".
- getOrCreateAAFor<AANonNull>(CBArgPos);
+ checkAndQueryIRAttr<Attribute::NonNull, AANonNull>(CBArgPos, CBArgAttrs);
// Call site argument attribute "nocapture".
- getOrCreateAAFor<AANoCapture>(CBArgPos);
+ checkAndQueryIRAttr<Attribute::NoCapture, AANoCapture>(CBArgPos,
+ CBArgAttrs);
// Call site argument attribute "no-alias".
- getOrCreateAAFor<AANoAlias>(CBArgPos);
+ checkAndQueryIRAttr<Attribute::NoAlias, AANoAlias>(CBArgPos, CBArgAttrs);
// Call site argument attribute "dereferenceable".
getOrCreateAAFor<AADereferenceable>(CBArgPos);
@@ -3324,10 +3488,11 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
// Call site argument attribute
// "readnone/readonly/writeonly/..."
- getOrCreateAAFor<AAMemoryBehavior>(CBArgPos);
+ if (!CBAttrs.hasParamAttr(I, Attribute::ReadNone))
+ getOrCreateAAFor<AAMemoryBehavior>(CBArgPos);
// Call site argument attribute "nofree".
- getOrCreateAAFor<AANoFree>(CBArgPos);
+ checkAndQueryIRAttr<Attribute::NoFree, AANoFree>(CBArgPos, CBArgAttrs);
}
return true;
};
@@ -3344,18 +3509,21 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
assert(Success && "Expected the check call to be successful!");
auto LoadStorePred = [&](Instruction &I) -> bool {
- if (isa<LoadInst>(I)) {
- getOrCreateAAFor<AAAlign>(
- IRPosition::value(*cast<LoadInst>(I).getPointerOperand()));
+ if (auto *LI = dyn_cast<LoadInst>(&I)) {
+ getOrCreateAAFor<AAAlign>(IRPosition::value(*LI->getPointerOperand()));
if (SimplifyAllLoads)
getAssumedSimplified(IRPosition::value(I), nullptr,
UsedAssumedInformation, AA::Intraprocedural);
+ getOrCreateAAFor<AAAddressSpace>(
+ IRPosition::value(*LI->getPointerOperand()));
} else {
auto &SI = cast<StoreInst>(I);
getOrCreateAAFor<AAIsDead>(IRPosition::inst(I));
getAssumedSimplified(IRPosition::value(*SI.getValueOperand()), nullptr,
UsedAssumedInformation, AA::Intraprocedural);
getOrCreateAAFor<AAAlign>(IRPosition::value(*SI.getPointerOperand()));
+ getOrCreateAAFor<AAAddressSpace>(
+ IRPosition::value(*SI.getPointerOperand()));
}
return true;
};
@@ -3461,7 +3629,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS,
return OS;
}
-void AbstractAttribute::print(raw_ostream &OS) const {
+void AbstractAttribute::print(Attributor *A, raw_ostream &OS) const {
OS << "[";
OS << getName();
OS << "] for CtxI ";
@@ -3473,7 +3641,7 @@ void AbstractAttribute::print(raw_ostream &OS) const {
} else
OS << "<<null inst>>";
- OS << " at position " << getIRPosition() << " with state " << getAsStr()
+ OS << " at position " << getIRPosition() << " with state " << getAsStr(A)
<< '\n';
}
@@ -3679,11 +3847,11 @@ template <> struct GraphTraits<AADepGraphNode *> {
using EdgeRef = PointerIntPair<AADepGraphNode *, 1>;
static NodeRef getEntryNode(AADepGraphNode *DGN) { return DGN; }
- static NodeRef DepGetVal(DepTy &DT) { return DT.getPointer(); }
+ static NodeRef DepGetVal(const DepTy &DT) { return DT.getPointer(); }
using ChildIteratorType =
- mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
- using ChildEdgeIteratorType = TinyPtrVector<DepTy>::iterator;
+ mapped_iterator<AADepGraphNode::DepSetTy::iterator, decltype(&DepGetVal)>;
+ using ChildEdgeIteratorType = AADepGraphNode::DepSetTy::iterator;
static ChildIteratorType child_begin(NodeRef N) { return N->child_begin(); }
@@ -3695,7 +3863,7 @@ struct GraphTraits<AADepGraph *> : public GraphTraits<AADepGraphNode *> {
static NodeRef getEntryNode(AADepGraph *DG) { return DG->GetEntryNode(); }
using nodes_iterator =
- mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+ mapped_iterator<AADepGraphNode::DepSetTy::iterator, decltype(&DepGetVal)>;
static nodes_iterator nodes_begin(AADepGraph *DG) { return DG->begin(); }
@@ -3715,98 +3883,3 @@ template <> struct DOTGraphTraits<AADepGraph *> : public DefaultDOTGraphTraits {
};
} // end namespace llvm
-
-namespace {
-
-struct AttributorLegacyPass : public ModulePass {
- static char ID;
-
- AttributorLegacyPass() : ModulePass(ID) {
- initializeAttributorLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
-
- AnalysisGetter AG;
- SetVector<Function *> Functions;
- for (Function &F : M)
- Functions.insert(&F);
-
- CallGraphUpdater CGUpdater;
- BumpPtrAllocator Allocator;
- InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ nullptr);
- return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
- /* DeleteFns*/ true,
- /* IsModulePass */ true);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- // FIXME: Think about passes we will preserve and add them here.
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-};
-
-struct AttributorCGSCCLegacyPass : public CallGraphSCCPass {
- static char ID;
-
- AttributorCGSCCLegacyPass() : CallGraphSCCPass(ID) {
- initializeAttributorCGSCCLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnSCC(CallGraphSCC &SCC) override {
- if (skipSCC(SCC))
- return false;
-
- SetVector<Function *> Functions;
- for (CallGraphNode *CGN : SCC)
- if (Function *Fn = CGN->getFunction())
- if (!Fn->isDeclaration())
- Functions.insert(Fn);
-
- if (Functions.empty())
- return false;
-
- AnalysisGetter AG;
- CallGraph &CG = const_cast<CallGraph &>(SCC.getCallGraph());
- CallGraphUpdater CGUpdater;
- CGUpdater.initialize(CG, SCC);
- Module &M = *Functions.back()->getParent();
- BumpPtrAllocator Allocator;
- InformationCache InfoCache(M, AG, Allocator, /* CGSCC */ &Functions);
- return runAttributorOnFunctions(InfoCache, Functions, AG, CGUpdater,
- /* DeleteFns */ false,
- /* IsModulePass */ false);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- // FIXME: Think about passes we will preserve and add them here.
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
-};
-
-} // end anonymous namespace
-
-Pass *llvm::createAttributorLegacyPass() { return new AttributorLegacyPass(); }
-Pass *llvm::createAttributorCGSCCLegacyPass() {
- return new AttributorCGSCCLegacyPass();
-}
-
-char AttributorLegacyPass::ID = 0;
-char AttributorCGSCCLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(AttributorLegacyPass, "attributor",
- "Deduce and propagate attributes", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(AttributorLegacyPass, "attributor",
- "Deduce and propagate attributes", false, false)
-INITIALIZE_PASS_BEGIN(AttributorCGSCCLegacyPass, "attributor-cgscc",
- "Deduce and propagate attributes (CGSCC pass)", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_END(AttributorCGSCCLegacyPass, "attributor-cgscc",
- "Deduce and propagate attributes (CGSCC pass)", false,
- false)
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 001ef55ba472..3a9a89d61355 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -24,6 +24,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
@@ -38,6 +39,7 @@
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Assumptions.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -52,6 +54,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/NoFolder.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
@@ -156,10 +159,11 @@ PIPE_OPERATOR(AAIsDead)
PIPE_OPERATOR(AANoUnwind)
PIPE_OPERATOR(AANoSync)
PIPE_OPERATOR(AANoRecurse)
+PIPE_OPERATOR(AANonConvergent)
PIPE_OPERATOR(AAWillReturn)
PIPE_OPERATOR(AANoReturn)
-PIPE_OPERATOR(AAReturnedValues)
PIPE_OPERATOR(AANonNull)
+PIPE_OPERATOR(AAMustProgress)
PIPE_OPERATOR(AANoAlias)
PIPE_OPERATOR(AADereferenceable)
PIPE_OPERATOR(AAAlign)
@@ -177,11 +181,13 @@ PIPE_OPERATOR(AAUndefinedBehavior)
PIPE_OPERATOR(AAPotentialConstantValues)
PIPE_OPERATOR(AAPotentialValues)
PIPE_OPERATOR(AANoUndef)
+PIPE_OPERATOR(AANoFPClass)
PIPE_OPERATOR(AACallEdges)
PIPE_OPERATOR(AAInterFnReachability)
PIPE_OPERATOR(AAPointerInfo)
PIPE_OPERATOR(AAAssumptionInfo)
PIPE_OPERATOR(AAUnderlyingObjects)
+PIPE_OPERATOR(AAAddressSpace)
#undef PIPE_OPERATOR
@@ -196,6 +202,19 @@ ChangeStatus clampStateAndIndicateChange<DerefState>(DerefState &S,
} // namespace llvm
+static bool mayBeInCycle(const CycleInfo *CI, const Instruction *I,
+ bool HeaderOnly, Cycle **CPtr = nullptr) {
+ if (!CI)
+ return true;
+ auto *BB = I->getParent();
+ auto *C = CI->getCycle(BB);
+ if (!C)
+ return false;
+ if (CPtr)
+ *CPtr = C;
+ return !HeaderOnly || BB == C->getHeader();
+}
+
/// Checks if a type could have padding bytes.
static bool isDenselyPacked(Type *Ty, const DataLayout &DL) {
// There is no size information, so be conservative.
@@ -317,12 +336,14 @@ stripAndAccumulateOffsets(Attributor &A, const AbstractAttribute &QueryingAA,
auto AttributorAnalysis = [&](Value &V, APInt &ROffset) -> bool {
const IRPosition &Pos = IRPosition::value(V);
// Only track dependence if we are going to use the assumed info.
- const AAValueConstantRange &ValueConstantRangeAA =
+ const AAValueConstantRange *ValueConstantRangeAA =
A.getAAFor<AAValueConstantRange>(QueryingAA, Pos,
UseAssumed ? DepClassTy::OPTIONAL
: DepClassTy::NONE);
- ConstantRange Range = UseAssumed ? ValueConstantRangeAA.getAssumed()
- : ValueConstantRangeAA.getKnown();
+ if (!ValueConstantRangeAA)
+ return false;
+ ConstantRange Range = UseAssumed ? ValueConstantRangeAA->getAssumed()
+ : ValueConstantRangeAA->getKnown();
if (Range.isFullSet())
return false;
@@ -355,7 +376,9 @@ getMinimalBaseOfPointer(Attributor &A, const AbstractAttribute &QueryingAA,
/// Clamp the information known for all returned values of a function
/// (identified by \p QueryingAA) into \p S.
-template <typename AAType, typename StateType = typename AAType::StateType>
+template <typename AAType, typename StateType = typename AAType::StateType,
+ Attribute::AttrKind IRAttributeKind = Attribute::None,
+ bool RecurseForSelectAndPHI = true>
static void clampReturnedValueStates(
Attributor &A, const AAType &QueryingAA, StateType &S,
const IRPosition::CallBaseContext *CBContext = nullptr) {
@@ -376,11 +399,20 @@ static void clampReturnedValueStates(
// Callback for each possibly returned value.
auto CheckReturnValue = [&](Value &RV) -> bool {
const IRPosition &RVPos = IRPosition::value(RV, CBContext);
- const AAType &AA =
+ // If possible, use the hasAssumedIRAttr interface.
+ if (IRAttributeKind != Attribute::None) {
+ bool IsKnown;
+ return AA::hasAssumedIRAttr<IRAttributeKind>(
+ A, &QueryingAA, RVPos, DepClassTy::REQUIRED, IsKnown);
+ }
+
+ const AAType *AA =
A.getAAFor<AAType>(QueryingAA, RVPos, DepClassTy::REQUIRED);
- LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV << " AA: " << AA.getAsStr()
- << " @ " << RVPos << "\n");
- const StateType &AAS = AA.getState();
+ if (!AA)
+ return false;
+ LLVM_DEBUG(dbgs() << "[Attributor] RV: " << RV
+ << " AA: " << AA->getAsStr(&A) << " @ " << RVPos << "\n");
+ const StateType &AAS = AA->getState();
if (!T)
T = StateType::getBestState(AAS);
*T &= AAS;
@@ -389,7 +421,9 @@ static void clampReturnedValueStates(
return T->isValidState();
};
- if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA))
+ if (!A.checkForAllReturnedValues(CheckReturnValue, QueryingAA,
+ AA::ValueScope::Intraprocedural,
+ RecurseForSelectAndPHI))
S.indicatePessimisticFixpoint();
else if (T)
S ^= *T;
@@ -399,7 +433,9 @@ namespace {
/// Helper class for generic deduction: return value -> returned position.
template <typename AAType, typename BaseType,
typename StateType = typename BaseType::StateType,
- bool PropagateCallBaseContext = false>
+ bool PropagateCallBaseContext = false,
+ Attribute::AttrKind IRAttributeKind = Attribute::None,
+ bool RecurseForSelectAndPHI = true>
struct AAReturnedFromReturnedValues : public BaseType {
AAReturnedFromReturnedValues(const IRPosition &IRP, Attributor &A)
: BaseType(IRP, A) {}
@@ -407,7 +443,7 @@ struct AAReturnedFromReturnedValues : public BaseType {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
StateType S(StateType::getBestState(this->getState()));
- clampReturnedValueStates<AAType, StateType>(
+ clampReturnedValueStates<AAType, StateType, IRAttributeKind, RecurseForSelectAndPHI>(
A, *this, S,
PropagateCallBaseContext ? this->getCallBaseContext() : nullptr);
// TODO: If we know we visited all returned values, thus no are assumed
@@ -418,7 +454,8 @@ struct AAReturnedFromReturnedValues : public BaseType {
/// Clamp the information known at all call sites for a given argument
/// (identified by \p QueryingAA) into \p S.
-template <typename AAType, typename StateType = typename AAType::StateType>
+template <typename AAType, typename StateType = typename AAType::StateType,
+ Attribute::AttrKind IRAttributeKind = Attribute::None>
static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
StateType &S) {
LLVM_DEBUG(dbgs() << "[Attributor] Clamp call site argument states for "
@@ -442,11 +479,21 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
return false;
- const AAType &AA =
+ // If possible, use the hasAssumedIRAttr interface.
+ if (IRAttributeKind != Attribute::None) {
+ bool IsKnown;
+ return AA::hasAssumedIRAttr<IRAttributeKind>(
+ A, &QueryingAA, ACSArgPos, DepClassTy::REQUIRED, IsKnown);
+ }
+
+ const AAType *AA =
A.getAAFor<AAType>(QueryingAA, ACSArgPos, DepClassTy::REQUIRED);
+ if (!AA)
+ return false;
LLVM_DEBUG(dbgs() << "[Attributor] ACS: " << *ACS.getInstruction()
- << " AA: " << AA.getAsStr() << " @" << ACSArgPos << "\n");
- const StateType &AAS = AA.getState();
+ << " AA: " << AA->getAsStr(&A) << " @" << ACSArgPos
+ << "\n");
+ const StateType &AAS = AA->getState();
if (!T)
T = StateType::getBestState(AAS);
*T &= AAS;
@@ -466,7 +513,8 @@ static void clampCallSiteArgumentStates(Attributor &A, const AAType &QueryingAA,
/// This function is the bridge between argument position and the call base
/// context.
template <typename AAType, typename BaseType,
- typename StateType = typename AAType::StateType>
+ typename StateType = typename AAType::StateType,
+ Attribute::AttrKind IRAttributeKind = Attribute::None>
bool getArgumentStateFromCallBaseContext(Attributor &A,
BaseType &QueryingAttribute,
IRPosition &Pos, StateType &State) {
@@ -478,12 +526,21 @@ bool getArgumentStateFromCallBaseContext(Attributor &A,
int ArgNo = Pos.getCallSiteArgNo();
assert(ArgNo >= 0 && "Invalid Arg No!");
+ const IRPosition CBArgPos = IRPosition::callsite_argument(*CBContext, ArgNo);
+
+ // If possible, use the hasAssumedIRAttr interface.
+ if (IRAttributeKind != Attribute::None) {
+ bool IsKnown;
+ return AA::hasAssumedIRAttr<IRAttributeKind>(
+ A, &QueryingAttribute, CBArgPos, DepClassTy::REQUIRED, IsKnown);
+ }
- const auto &AA = A.getAAFor<AAType>(
- QueryingAttribute, IRPosition::callsite_argument(*CBContext, ArgNo),
- DepClassTy::REQUIRED);
+ const auto *AA =
+ A.getAAFor<AAType>(QueryingAttribute, CBArgPos, DepClassTy::REQUIRED);
+ if (!AA)
+ return false;
const StateType &CBArgumentState =
- static_cast<const StateType &>(AA.getState());
+ static_cast<const StateType &>(AA->getState());
LLVM_DEBUG(dbgs() << "[Attributor] Briding Call site context to argument"
<< "Position:" << Pos << "CB Arg state:" << CBArgumentState
@@ -497,7 +554,8 @@ bool getArgumentStateFromCallBaseContext(Attributor &A,
/// Helper class for generic deduction: call site argument -> argument position.
template <typename AAType, typename BaseType,
typename StateType = typename AAType::StateType,
- bool BridgeCallBaseContext = false>
+ bool BridgeCallBaseContext = false,
+ Attribute::AttrKind IRAttributeKind = Attribute::None>
struct AAArgumentFromCallSiteArguments : public BaseType {
AAArgumentFromCallSiteArguments(const IRPosition &IRP, Attributor &A)
: BaseType(IRP, A) {}
@@ -508,12 +566,14 @@ struct AAArgumentFromCallSiteArguments : public BaseType {
if (BridgeCallBaseContext) {
bool Success =
- getArgumentStateFromCallBaseContext<AAType, BaseType, StateType>(
+ getArgumentStateFromCallBaseContext<AAType, BaseType, StateType,
+ IRAttributeKind>(
A, *this, this->getIRPosition(), S);
if (Success)
return clampStateAndIndicateChange<StateType>(this->getState(), S);
}
- clampCallSiteArgumentStates<AAType, StateType>(A, *this, S);
+ clampCallSiteArgumentStates<AAType, StateType, IRAttributeKind>(A, *this,
+ S);
// TODO: If we know we visited all incoming values, thus no are assumed
// dead, we can take the known information from the state T.
@@ -524,7 +584,8 @@ struct AAArgumentFromCallSiteArguments : public BaseType {
/// Helper class for generic replication: function returned -> cs returned.
template <typename AAType, typename BaseType,
typename StateType = typename BaseType::StateType,
- bool IntroduceCallBaseContext = false>
+ bool IntroduceCallBaseContext = false,
+ Attribute::AttrKind IRAttributeKind = Attribute::None>
struct AACallSiteReturnedFromReturned : public BaseType {
AACallSiteReturnedFromReturned(const IRPosition &IRP, Attributor &A)
: BaseType(IRP, A) {}
@@ -549,8 +610,20 @@ struct AACallSiteReturnedFromReturned : public BaseType {
IRPosition FnPos = IRPosition::returned(
*AssociatedFunction, IntroduceCallBaseContext ? &CBContext : nullptr);
- const AAType &AA = A.getAAFor<AAType>(*this, FnPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(S, AA.getState());
+
+ // If possible, use the hasAssumedIRAttr interface.
+ if (IRAttributeKind != Attribute::None) {
+ bool IsKnown;
+ if (!AA::hasAssumedIRAttr<IRAttributeKind>(A, this, FnPos,
+ DepClassTy::REQUIRED, IsKnown))
+ return S.indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ const AAType *AA = A.getAAFor<AAType>(*this, FnPos, DepClassTy::REQUIRED);
+ if (!AA)
+ return S.indicatePessimisticFixpoint();
+ return clampStateAndIndicateChange(S, AA->getState());
}
};
@@ -585,16 +658,17 @@ static void followUsesInContext(AAType &AA, Attributor &A,
template <class AAType, typename StateType = typename AAType::StateType>
static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S,
Instruction &CtxI) {
+ MustBeExecutedContextExplorer *Explorer =
+ A.getInfoCache().getMustBeExecutedContextExplorer();
+ if (!Explorer)
+ return;
// Container for (transitive) uses of the associated value.
SetVector<const Use *> Uses;
for (const Use &U : AA.getIRPosition().getAssociatedValue().uses())
Uses.insert(&U);
- MustBeExecutedContextExplorer &Explorer =
- A.getInfoCache().getMustBeExecutedContextExplorer();
-
- followUsesInContext<AAType>(AA, A, Explorer, &CtxI, Uses, S);
+ followUsesInContext<AAType>(AA, A, *Explorer, &CtxI, Uses, S);
if (S.isAtFixpoint())
return;
@@ -639,7 +713,7 @@ static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S,
// }
// }
- Explorer.checkForAllContext(&CtxI, Pred);
+ Explorer->checkForAllContext(&CtxI, Pred);
for (const BranchInst *Br : BrInsts) {
StateType ParentState;
@@ -651,7 +725,7 @@ static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S,
StateType ChildState;
size_t BeforeSize = Uses.size();
- followUsesInContext(AA, A, Explorer, &BB->front(), Uses, ChildState);
+ followUsesInContext(AA, A, *Explorer, &BB->front(), Uses, ChildState);
// Erase uses which only appear in the child.
for (auto It = Uses.begin() + BeforeSize; It != Uses.end();)
@@ -855,7 +929,7 @@ protected:
for (unsigned Index : LocalList->getSecond()) {
for (auto &R : AccessList[Index]) {
Range &= R;
- if (Range.offsetOrSizeAreUnknown())
+ if (Range.offsetAndSizeAreUnknown())
break;
}
}
@@ -887,10 +961,8 @@ ChangeStatus AA::PointerInfo::State::addAccess(
}
auto AddToBins = [&](const AAPointerInfo::RangeList &ToAdd) {
- LLVM_DEBUG(
- if (ToAdd.size())
- dbgs() << "[AAPointerInfo] Inserting access in new offset bins\n";
- );
+ LLVM_DEBUG(if (ToAdd.size()) dbgs()
+ << "[AAPointerInfo] Inserting access in new offset bins\n";);
for (auto Key : ToAdd) {
LLVM_DEBUG(dbgs() << " key " << Key << "\n");
@@ -923,10 +995,8 @@ ChangeStatus AA::PointerInfo::State::addAccess(
// from the offset bins.
AAPointerInfo::RangeList ToRemove;
AAPointerInfo::RangeList::set_difference(ExistingRanges, NewRanges, ToRemove);
- LLVM_DEBUG(
- if (ToRemove.size())
- dbgs() << "[AAPointerInfo] Removing access from old offset bins\n";
- );
+ LLVM_DEBUG(if (ToRemove.size()) dbgs()
+ << "[AAPointerInfo] Removing access from old offset bins\n";);
for (auto Key : ToRemove) {
LLVM_DEBUG(dbgs() << " key " << Key << "\n");
@@ -1011,7 +1081,7 @@ struct AAPointerInfoImpl
AAPointerInfoImpl(const IRPosition &IRP, Attributor &A) : BaseTy(IRP) {}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return std::string("PointerInfo ") +
(isValidState() ? (std::string("#") +
std::to_string(OffsetBins.size()) + " bins")
@@ -1032,6 +1102,7 @@ struct AAPointerInfoImpl
bool forallInterferingAccesses(
Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I,
+ bool FindInterferingWrites, bool FindInterferingReads,
function_ref<bool(const Access &, bool)> UserCB, bool &HasBeenWrittenTo,
AA::RangeTy &Range) const override {
HasBeenWrittenTo = false;
@@ -1040,15 +1111,27 @@ struct AAPointerInfoImpl
SmallVector<std::pair<const Access *, bool>, 8> InterferingAccesses;
Function &Scope = *I.getFunction();
- const auto &NoSyncAA = A.getAAFor<AANoSync>(
- QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
+ bool IsKnownNoSync;
+ bool IsAssumedNoSync = AA::hasAssumedIRAttr<Attribute::NoSync>(
+ A, &QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL,
+ IsKnownNoSync);
const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
- IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL);
- bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync();
+ IRPosition::function(Scope), &QueryingAA, DepClassTy::NONE);
+ bool AllInSameNoSyncFn = IsAssumedNoSync;
bool InstIsExecutedByInitialThreadOnly =
ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I);
+
+ // If the function is not ending in aligned barriers, we need the stores to
+ // be in aligned barriers. The load being in one is not sufficient since the
+ // store might be executed by a thread that disappears after, causing the
+ // aligned barrier guarding the load to unblock and the load to read a value
+ // that has no CFG path to the load.
bool InstIsExecutedInAlignedRegion =
- ExecDomainAA && ExecDomainAA->isExecutedInAlignedRegion(A, I);
+ FindInterferingReads && ExecDomainAA &&
+ ExecDomainAA->isExecutedInAlignedRegion(A, I);
+
+ if (InstIsExecutedInAlignedRegion || InstIsExecutedByInitialThreadOnly)
+ A.recordDependence(*ExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
InformationCache &InfoCache = A.getInfoCache();
bool IsThreadLocalObj =
@@ -1063,14 +1146,25 @@ struct AAPointerInfoImpl
auto CanIgnoreThreadingForInst = [&](const Instruction &I) -> bool {
if (IsThreadLocalObj || AllInSameNoSyncFn)
return true;
- if (!ExecDomainAA)
+ const auto *FnExecDomainAA =
+ I.getFunction() == &Scope
+ ? ExecDomainAA
+ : A.lookupAAFor<AAExecutionDomain>(
+ IRPosition::function(*I.getFunction()), &QueryingAA,
+ DepClassTy::NONE);
+ if (!FnExecDomainAA)
return false;
if (InstIsExecutedInAlignedRegion ||
- ExecDomainAA->isExecutedInAlignedRegion(A, I))
+ (FindInterferingWrites &&
+ FnExecDomainAA->isExecutedInAlignedRegion(A, I))) {
+ A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
+ }
if (InstIsExecutedByInitialThreadOnly &&
- ExecDomainAA->isExecutedByInitialThreadOnly(I))
+ FnExecDomainAA->isExecutedByInitialThreadOnly(I)) {
+ A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL);
return true;
+ }
return false;
};
@@ -1084,13 +1178,13 @@ struct AAPointerInfoImpl
};
// TODO: Use inter-procedural reachability and dominance.
- const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
- QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
+ bool IsKnownNoRecurse;
+ AA::hasAssumedIRAttr<Attribute::NoRecurse>(
+ A, this, IRPosition::function(Scope), DepClassTy::OPTIONAL,
+ IsKnownNoRecurse);
- const bool FindInterferingWrites = I.mayReadFromMemory();
- const bool FindInterferingReads = I.mayWriteToMemory();
const bool UseDominanceReasoning =
- FindInterferingWrites && NoRecurseAA.isKnownNoRecurse();
+ FindInterferingWrites && IsKnownNoRecurse;
const DominatorTree *DT =
InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(Scope);
@@ -1098,8 +1192,7 @@ struct AAPointerInfoImpl
// outlive a GPU kernel. This is true for shared, constant, and local
// globals on AMD and NVIDIA GPUs.
auto HasKernelLifetime = [&](Value *V, Module &M) {
- Triple T(M.getTargetTriple());
- if (!(T.isAMDGPU() || T.isNVPTX()))
+ if (!AA::isGPU(M))
return false;
switch (AA::GPUAddressSpace(V->getType()->getPointerAddressSpace())) {
case AA::GPUAddressSpace::Shared:
@@ -1122,9 +1215,10 @@ struct AAPointerInfoImpl
// If the alloca containing function is not recursive the alloca
// must be dead in the callee.
const Function *AIFn = AI->getFunction();
- const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
- *this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL);
- if (NoRecurseAA.isAssumedNoRecurse()) {
+ bool IsKnownNoRecurse;
+ if (AA::hasAssumedIRAttr<Attribute::NoRecurse>(
+ A, this, IRPosition::function(*AIFn), DepClassTy::OPTIONAL,
+ IsKnownNoRecurse)) {
IsLiveInCalleeCB = [AIFn](const Function &Fn) { return AIFn != &Fn; };
}
} else if (auto *GV = dyn_cast<GlobalValue>(&getAssociatedValue())) {
@@ -1220,7 +1314,7 @@ struct AAPointerInfoImpl
if (!WriteChecked && HasBeenWrittenTo &&
Acc.getRemoteInst()->getFunction() != &Scope) {
- const auto &FnReachabilityAA = A.getAAFor<AAInterFnReachability>(
+ const auto *FnReachabilityAA = A.getAAFor<AAInterFnReachability>(
QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL);
// Without going backwards in the call tree, can we reach the access
@@ -1228,7 +1322,8 @@ struct AAPointerInfoImpl
// itself either.
bool Inserted = ExclusionSet.insert(&I).second;
- if (!FnReachabilityAA.instructionCanReach(
+ if (!FnReachabilityAA ||
+ !FnReachabilityAA->instructionCanReach(
A, *LeastDominatingWriteInst,
*Acc.getRemoteInst()->getFunction(), &ExclusionSet))
WriteChecked = true;
@@ -1337,7 +1432,10 @@ struct AAPointerInfoImpl
O << " --> " << *Acc.getRemoteInst()
<< "\n";
if (!Acc.isWrittenValueYetUndetermined()) {
- if (Acc.getWrittenValue())
+ if (isa_and_nonnull<Function>(Acc.getWrittenValue()))
+ O << " - c: func " << Acc.getWrittenValue()->getName()
+ << "\n";
+ else if (Acc.getWrittenValue())
O << " - c: " << *Acc.getWrittenValue() << "\n";
else
O << " - c: <unknown>\n";
@@ -1450,22 +1548,22 @@ bool AAPointerInfoFloating::collectConstantsForGEP(Attributor &A,
// combination of elements, picked one each from these sets, is separately
// added to the original set of offsets, thus resulting in more offsets.
for (const auto &VI : VariableOffsets) {
- auto &PotentialConstantsAA = A.getAAFor<AAPotentialConstantValues>(
+ auto *PotentialConstantsAA = A.getAAFor<AAPotentialConstantValues>(
*this, IRPosition::value(*VI.first), DepClassTy::OPTIONAL);
- if (!PotentialConstantsAA.isValidState()) {
+ if (!PotentialConstantsAA || !PotentialConstantsAA->isValidState()) {
UsrOI.setUnknown();
return true;
}
// UndefValue is treated as a zero, which leaves Union as is.
- if (PotentialConstantsAA.undefIsContained())
+ if (PotentialConstantsAA->undefIsContained())
continue;
// We need at least one constant in every set to compute an actual offset.
// Otherwise, we end up pessimizing AAPointerInfo by respecting offsets that
// don't actually exist. In other words, the absence of constant values
// implies that the operation can be assumed dead for now.
- auto &AssumedSet = PotentialConstantsAA.getAssumedSet();
+ auto &AssumedSet = PotentialConstantsAA->getAssumedSet();
if (AssumedSet.empty())
return false;
@@ -1602,16 +1700,6 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
return true;
}
- auto mayBeInCycleHeader = [](const CycleInfo *CI, const Instruction *I) {
- if (!CI)
- return true;
- auto *BB = I->getParent();
- auto *C = CI->getCycle(BB);
- if (!C)
- return false;
- return BB == C->getHeader();
- };
-
// Check if the PHI operand is not dependent on the PHI itself. Every
// recurrence is a cyclic net of PHIs in the data flow, and has an
// equivalent Cycle in the control flow. One of those PHIs must be in the
@@ -1619,7 +1707,7 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
// Cycles reported by CycleInfo. It is sufficient to check the PHIs in
// every Cycle header; if such a node is marked unknown, this will
// eventually propagate through the whole net of PHIs in the recurrence.
- if (mayBeInCycleHeader(CI, cast<Instruction>(Usr))) {
+ if (mayBeInCycle(CI, cast<Instruction>(Usr), /* HeaderOnly */ true)) {
auto BaseOI = It->getSecond();
BaseOI.addToAll(Offset.getZExtValue());
if (IsFirstPHIUser || BaseOI == UsrOI) {
@@ -1681,6 +1769,8 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
return false;
} else {
auto PredIt = pred_begin(IntrBB);
+ if (PredIt == pred_end(IntrBB))
+ return false;
if ((*PredIt) != BB)
return false;
if (++PredIt != pred_end(IntrBB))
@@ -1780,11 +1870,14 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
return true;
if (CB->isArgOperand(&U)) {
unsigned ArgNo = CB->getArgOperandNo(&U);
- const auto &CSArgPI = A.getAAFor<AAPointerInfo>(
+ const auto *CSArgPI = A.getAAFor<AAPointerInfo>(
*this, IRPosition::callsite_argument(*CB, ArgNo),
DepClassTy::REQUIRED);
- Changed = translateAndAddState(A, CSArgPI, OffsetInfoMap[CurPtr], *CB) |
- Changed;
+ if (!CSArgPI)
+ return false;
+ Changed =
+ translateAndAddState(A, *CSArgPI, OffsetInfoMap[CurPtr], *CB) |
+ Changed;
return isValidState();
}
LLVM_DEBUG(dbgs() << "[AAPointerInfo] Call user not handled " << *CB
@@ -1845,13 +1938,6 @@ struct AAPointerInfoArgument final : AAPointerInfoFloating {
AAPointerInfoArgument(const IRPosition &IRP, Attributor &A)
: AAPointerInfoFloating(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AAPointerInfoFloating::initialize(A);
- if (getAnchorScope()->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
@@ -1900,19 +1986,18 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating {
Argument *Arg = getAssociatedArgument();
if (Arg) {
const IRPosition &ArgPos = IRPosition::argument(*Arg);
- auto &ArgAA =
+ auto *ArgAA =
A.getAAFor<AAPointerInfo>(*this, ArgPos, DepClassTy::REQUIRED);
- if (ArgAA.getState().isValidState())
- return translateAndAddStateFromCallee(A, ArgAA,
+ if (ArgAA && ArgAA->getState().isValidState())
+ return translateAndAddStateFromCallee(A, *ArgAA,
*cast<CallBase>(getCtxI()));
if (!Arg->getParent()->isDeclaration())
return indicatePessimisticFixpoint();
}
- const auto &NoCaptureAA =
- A.getAAFor<AANoCapture>(*this, getIRPosition(), DepClassTy::OPTIONAL);
-
- if (!NoCaptureAA.isAssumedNoCapture())
+ bool IsKnownNoCapture;
+ if (!AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, this, getIRPosition(), DepClassTy::OPTIONAL, IsKnownNoCapture))
return indicatePessimisticFixpoint();
bool IsKnown = false;
@@ -1948,7 +2033,15 @@ namespace {
struct AANoUnwindImpl : AANoUnwind {
AANoUnwindImpl(const IRPosition &IRP, Attributor &A) : AANoUnwind(IRP, A) {}
- const std::string getAsStr() const override {
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ bool IsKnown;
+ assert(!AA::hasAssumedIRAttr<Attribute::NoUnwind>(
+ A, nullptr, getIRPosition(), DepClassTy::NONE, IsKnown));
+ (void)IsKnown;
+ }
+
+ const std::string getAsStr(Attributor *A) const override {
return getAssumed() ? "nounwind" : "may-unwind";
}
@@ -1960,13 +2053,14 @@ struct AANoUnwindImpl : AANoUnwind {
(unsigned)Instruction::CatchSwitch, (unsigned)Instruction::Resume};
auto CheckForNoUnwind = [&](Instruction &I) {
- if (!I.mayThrow())
+ if (!I.mayThrow(/* IncludePhaseOneUnwind */ true))
return true;
if (const auto *CB = dyn_cast<CallBase>(&I)) {
- const auto &NoUnwindAA = A.getAAFor<AANoUnwind>(
- *this, IRPosition::callsite_function(*CB), DepClassTy::REQUIRED);
- return NoUnwindAA.isAssumedNoUnwind();
+ bool IsKnownNoUnwind;
+ return AA::hasAssumedIRAttr<Attribute::NoUnwind>(
+ A, this, IRPosition::callsite_function(*CB), DepClassTy::REQUIRED,
+ IsKnownNoUnwind);
}
return false;
};
@@ -1993,14 +2087,6 @@ struct AANoUnwindCallSite final : AANoUnwindImpl {
AANoUnwindCallSite(const IRPosition &IRP, Attributor &A)
: AANoUnwindImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoUnwindImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
@@ -2009,263 +2095,15 @@ struct AANoUnwindCallSite final : AANoUnwindImpl {
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AANoUnwind>(*this, FnPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
- }
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); }
-};
-} // namespace
-
-/// --------------------- Function Return Values -------------------------------
-
-namespace {
-/// "Attribute" that collects all potential returned values and the return
-/// instructions that they arise from.
-///
-/// If there is a unique returned value R, the manifest method will:
-/// - mark R with the "returned" attribute, if R is an argument.
-class AAReturnedValuesImpl : public AAReturnedValues, public AbstractState {
-
- /// Mapping of values potentially returned by the associated function to the
- /// return instructions that might return them.
- MapVector<Value *, SmallSetVector<ReturnInst *, 4>> ReturnedValues;
-
- /// State flags
- ///
- ///{
- bool IsFixed = false;
- bool IsValidState = true;
- ///}
-
-public:
- AAReturnedValuesImpl(const IRPosition &IRP, Attributor &A)
- : AAReturnedValues(IRP, A) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- // Reset the state.
- IsFixed = false;
- IsValidState = true;
- ReturnedValues.clear();
-
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration()) {
- indicatePessimisticFixpoint();
- return;
- }
- assert(!F->getReturnType()->isVoidTy() &&
- "Did not expect a void return type!");
-
- // The map from instruction opcodes to those instructions in the function.
- auto &OpcodeInstMap = A.getInfoCache().getOpcodeInstMapForFunction(*F);
-
- // Look through all arguments, if one is marked as returned we are done.
- for (Argument &Arg : F->args()) {
- if (Arg.hasReturnedAttr()) {
- auto &ReturnInstSet = ReturnedValues[&Arg];
- if (auto *Insts = OpcodeInstMap.lookup(Instruction::Ret))
- for (Instruction *RI : *Insts)
- ReturnInstSet.insert(cast<ReturnInst>(RI));
-
- indicateOptimisticFixpoint();
- return;
- }
- }
-
- if (!A.isFunctionIPOAmendable(*F))
- indicatePessimisticFixpoint();
- }
-
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override;
-
- /// See AbstractAttribute::getState(...).
- AbstractState &getState() override { return *this; }
-
- /// See AbstractAttribute::getState(...).
- const AbstractState &getState() const override { return *this; }
-
- /// See AbstractAttribute::updateImpl(Attributor &A).
- ChangeStatus updateImpl(Attributor &A) override;
-
- llvm::iterator_range<iterator> returned_values() override {
- return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end());
- }
-
- llvm::iterator_range<const_iterator> returned_values() const override {
- return llvm::make_range(ReturnedValues.begin(), ReturnedValues.end());
- }
-
- /// Return the number of potential return values, -1 if unknown.
- size_t getNumReturnValues() const override {
- return isValidState() ? ReturnedValues.size() : -1;
- }
-
- /// Return an assumed unique return value if a single candidate is found. If
- /// there cannot be one, return a nullptr. If it is not clear yet, return
- /// std::nullopt.
- std::optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const;
-
- /// See AbstractState::checkForAllReturnedValues(...).
- bool checkForAllReturnedValuesAndReturnInsts(
- function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred)
- const override;
-
- /// Pretty print the attribute similar to the IR representation.
- const std::string getAsStr() const override;
-
- /// See AbstractState::isAtFixpoint().
- bool isAtFixpoint() const override { return IsFixed; }
-
- /// See AbstractState::isValidState().
- bool isValidState() const override { return IsValidState; }
-
- /// See AbstractState::indicateOptimisticFixpoint(...).
- ChangeStatus indicateOptimisticFixpoint() override {
- IsFixed = true;
- return ChangeStatus::UNCHANGED;
- }
-
- ChangeStatus indicatePessimisticFixpoint() override {
- IsFixed = true;
- IsValidState = false;
- return ChangeStatus::CHANGED;
- }
-};
-
-ChangeStatus AAReturnedValuesImpl::manifest(Attributor &A) {
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
-
- // Bookkeeping.
- assert(isValidState());
- STATS_DECLTRACK(KnownReturnValues, FunctionReturn,
- "Number of function with known return values");
-
- // Check if we have an assumed unique return value that we could manifest.
- std::optional<Value *> UniqueRV = getAssumedUniqueReturnValue(A);
-
- if (!UniqueRV || !*UniqueRV)
- return Changed;
-
- // Bookkeeping.
- STATS_DECLTRACK(UniqueReturnValue, FunctionReturn,
- "Number of function with unique return");
- // If the assumed unique return value is an argument, annotate it.
- if (auto *UniqueRVArg = dyn_cast<Argument>(*UniqueRV)) {
- if (UniqueRVArg->getType()->canLosslesslyBitCastTo(
- getAssociatedFunction()->getReturnType())) {
- getIRPosition() = IRPosition::argument(*UniqueRVArg);
- Changed = IRAttribute::manifest(A);
- }
- }
- return Changed;
-}
-
-const std::string AAReturnedValuesImpl::getAsStr() const {
- return (isAtFixpoint() ? "returns(#" : "may-return(#") +
- (isValidState() ? std::to_string(getNumReturnValues()) : "?") + ")";
-}
-
-std::optional<Value *>
-AAReturnedValuesImpl::getAssumedUniqueReturnValue(Attributor &A) const {
- // If checkForAllReturnedValues provides a unique value, ignoring potential
- // undef values that can also be present, it is assumed to be the actual
- // return value and forwarded to the caller of this method. If there are
- // multiple, a nullptr is returned indicating there cannot be a unique
- // returned value.
- std::optional<Value *> UniqueRV;
- Type *Ty = getAssociatedFunction()->getReturnType();
-
- auto Pred = [&](Value &RV) -> bool {
- UniqueRV = AA::combineOptionalValuesInAAValueLatice(UniqueRV, &RV, Ty);
- return UniqueRV != std::optional<Value *>(nullptr);
- };
-
- if (!A.checkForAllReturnedValues(Pred, *this))
- UniqueRV = nullptr;
-
- return UniqueRV;
-}
-
-bool AAReturnedValuesImpl::checkForAllReturnedValuesAndReturnInsts(
- function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred)
- const {
- if (!isValidState())
- return false;
-
- // Check all returned values but ignore call sites as long as we have not
- // encountered an overdefined one during an update.
- for (const auto &It : ReturnedValues) {
- Value *RV = It.first;
- if (!Pred(*RV, It.second))
- return false;
- }
-
- return true;
-}
-
-ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
- ChangeStatus Changed = ChangeStatus::UNCHANGED;
-
- SmallVector<AA::ValueAndContext> Values;
- bool UsedAssumedInformation = false;
- auto ReturnInstCB = [&](Instruction &I) {
- ReturnInst &Ret = cast<ReturnInst>(I);
- Values.clear();
- if (!A.getAssumedSimplifiedValues(IRPosition::value(*Ret.getReturnValue()),
- *this, Values, AA::Intraprocedural,
- UsedAssumedInformation))
- Values.push_back({*Ret.getReturnValue(), Ret});
-
- for (auto &VAC : Values) {
- assert(AA::isValidInScope(*VAC.getValue(), Ret.getFunction()) &&
- "Assumed returned value should be valid in function scope!");
- if (ReturnedValues[VAC.getValue()].insert(&Ret))
- Changed = ChangeStatus::CHANGED;
- }
- return true;
- };
-
- // Discover returned values from all live returned instructions in the
- // associated function.
- if (!A.checkForAllInstructions(ReturnInstCB, *this, {Instruction::Ret},
- UsedAssumedInformation))
- return indicatePessimisticFixpoint();
- return Changed;
-}
-
-struct AAReturnedValuesFunction final : public AAReturnedValuesImpl {
- AAReturnedValuesFunction(const IRPosition &IRP, Attributor &A)
- : AAReturnedValuesImpl(IRP, A) {}
-
- /// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(returned) }
-};
-
-/// Returned values information for a call sites.
-struct AAReturnedValuesCallSite final : AAReturnedValuesImpl {
- AAReturnedValuesCallSite(const IRPosition &IRP, Attributor &A)
- : AAReturnedValuesImpl(IRP, A) {}
-
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- // TODO: Once we have call site specific value information we can provide
- // call site specific liveness information and then it makes
- // sense to specialize attributes for call sites instead of
- // redirecting requests to the callee.
- llvm_unreachable("Abstract attributes for returned values are not "
- "supported for call sites yet!");
- }
-
- /// See AbstractAttribute::updateImpl(...).
- ChangeStatus updateImpl(Attributor &A) override {
+ bool IsKnownNoUnwind;
+ if (AA::hasAssumedIRAttr<Attribute::NoUnwind>(
+ A, this, FnPos, DepClassTy::REQUIRED, IsKnownNoUnwind))
+ return ChangeStatus::UNCHANGED;
return indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {}
+ void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); }
};
} // namespace
@@ -2334,7 +2172,15 @@ namespace {
struct AANoSyncImpl : AANoSync {
AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {}
- const std::string getAsStr() const override {
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ bool IsKnown;
+ assert(!AA::hasAssumedIRAttr<Attribute::NoSync>(A, nullptr, getIRPosition(),
+ DepClassTy::NONE, IsKnown));
+ (void)IsKnown;
+ }
+
+ const std::string getAsStr(Attributor *A) const override {
return getAssumed() ? "nosync" : "may-sync";
}
@@ -2381,14 +2227,6 @@ struct AANoSyncCallSite final : AANoSyncImpl {
AANoSyncCallSite(const IRPosition &IRP, Attributor &A)
: AANoSyncImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoSyncImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
@@ -2397,8 +2235,11 @@ struct AANoSyncCallSite final : AANoSyncImpl {
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AANoSync>(*this, FnPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ bool IsKnownNoSycn;
+ if (AA::hasAssumedIRAttr<Attribute::NoSync>(
+ A, this, FnPos, DepClassTy::REQUIRED, IsKnownNoSycn))
+ return ChangeStatus::UNCHANGED;
+ return indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
@@ -2412,16 +2253,21 @@ namespace {
struct AANoFreeImpl : public AANoFree {
AANoFreeImpl(const IRPosition &IRP, Attributor &A) : AANoFree(IRP, A) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ bool IsKnown;
+ assert(!AA::hasAssumedIRAttr<Attribute::NoFree>(A, nullptr, getIRPosition(),
+ DepClassTy::NONE, IsKnown));
+ (void)IsKnown;
+ }
+
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
auto CheckForNoFree = [&](Instruction &I) {
- const auto &CB = cast<CallBase>(I);
- if (CB.hasFnAttr(Attribute::NoFree))
- return true;
-
- const auto &NoFreeAA = A.getAAFor<AANoFree>(
- *this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
- return NoFreeAA.isAssumedNoFree();
+ bool IsKnown;
+ return AA::hasAssumedIRAttr<Attribute::NoFree>(
+ A, this, IRPosition::callsite_function(cast<CallBase>(I)),
+ DepClassTy::REQUIRED, IsKnown);
};
bool UsedAssumedInformation = false;
@@ -2432,7 +2278,7 @@ struct AANoFreeImpl : public AANoFree {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return getAssumed() ? "nofree" : "may-free";
}
};
@@ -2450,14 +2296,6 @@ struct AANoFreeCallSite final : AANoFreeImpl {
AANoFreeCallSite(const IRPosition &IRP, Attributor &A)
: AANoFreeImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoFreeImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
@@ -2466,8 +2304,11 @@ struct AANoFreeCallSite final : AANoFreeImpl {
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AANoFree>(*this, FnPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ bool IsKnown;
+ if (AA::hasAssumedIRAttr<Attribute::NoFree>(A, this, FnPos,
+ DepClassTy::REQUIRED, IsKnown))
+ return ChangeStatus::UNCHANGED;
+ return indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
@@ -2486,9 +2327,10 @@ struct AANoFreeFloating : AANoFreeImpl {
ChangeStatus updateImpl(Attributor &A) override {
const IRPosition &IRP = getIRPosition();
- const auto &NoFreeAA = A.getAAFor<AANoFree>(
- *this, IRPosition::function_scope(IRP), DepClassTy::OPTIONAL);
- if (NoFreeAA.isAssumedNoFree())
+ bool IsKnown;
+ if (AA::hasAssumedIRAttr<Attribute::NoFree>(A, this,
+ IRPosition::function_scope(IRP),
+ DepClassTy::OPTIONAL, IsKnown))
return ChangeStatus::UNCHANGED;
Value &AssociatedValue = getIRPosition().getAssociatedValue();
@@ -2501,10 +2343,10 @@ struct AANoFreeFloating : AANoFreeImpl {
return true;
unsigned ArgNo = CB->getArgOperandNo(&U);
- const auto &NoFreeArg = A.getAAFor<AANoFree>(
- *this, IRPosition::callsite_argument(*CB, ArgNo),
- DepClassTy::REQUIRED);
- return NoFreeArg.isAssumedNoFree();
+ bool IsKnown;
+ return AA::hasAssumedIRAttr<Attribute::NoFree>(
+ A, this, IRPosition::callsite_argument(*CB, ArgNo),
+ DepClassTy::REQUIRED, IsKnown);
}
if (isa<GetElementPtrInst>(UserI) || isa<BitCastInst>(UserI) ||
@@ -2550,8 +2392,11 @@ struct AANoFreeCallSiteArgument final : AANoFreeFloating {
if (!Arg)
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
- auto &ArgAA = A.getAAFor<AANoFree>(*this, ArgPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(getState(), ArgAA.getState());
+ bool IsKnown;
+ if (AA::hasAssumedIRAttr<Attribute::NoFree>(A, this, ArgPos,
+ DepClassTy::REQUIRED, IsKnown))
+ return ChangeStatus::UNCHANGED;
+ return indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
@@ -2593,6 +2438,39 @@ struct AANoFreeCallSiteReturned final : AANoFreeFloating {
} // namespace
/// ------------------------ NonNull Argument Attribute ------------------------
+
+bool AANonNull::isImpliedByIR(Attributor &A, const IRPosition &IRP,
+ Attribute::AttrKind ImpliedAttributeKind,
+ bool IgnoreSubsumingPositions) {
+ SmallVector<Attribute::AttrKind, 2> AttrKinds;
+ AttrKinds.push_back(Attribute::NonNull);
+ if (!NullPointerIsDefined(IRP.getAnchorScope(),
+ IRP.getAssociatedType()->getPointerAddressSpace()))
+ AttrKinds.push_back(Attribute::Dereferenceable);
+ if (A.hasAttr(IRP, AttrKinds, IgnoreSubsumingPositions, Attribute::NonNull))
+ return true;
+
+ if (IRP.getPositionKind() == IRP_RETURNED)
+ return false;
+
+ DominatorTree *DT = nullptr;
+ AssumptionCache *AC = nullptr;
+ InformationCache &InfoCache = A.getInfoCache();
+ if (const Function *Fn = IRP.getAnchorScope()) {
+ if (!Fn->isDeclaration()) {
+ DT = InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*Fn);
+ AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*Fn);
+ }
+ }
+
+ if (!isKnownNonZero(&IRP.getAssociatedValue(), A.getDataLayout(), 0, AC,
+ IRP.getCtxI(), DT))
+ return false;
+ A.manifestAttrs(IRP, {Attribute::get(IRP.getAnchorValue().getContext(),
+ Attribute::NonNull)});
+ return true;
+}
+
namespace {
static int64_t getKnownNonNullAndDerefBytesForUse(
Attributor &A, const AbstractAttribute &QueryingAA, Value &AssociatedValue,
@@ -2641,10 +2519,13 @@ static int64_t getKnownNonNullAndDerefBytesForUse(
IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo);
// As long as we only use known information there is no need to track
// dependences here.
- auto &DerefAA =
+ bool IsKnownNonNull;
+ AA::hasAssumedIRAttr<Attribute::NonNull>(A, &QueryingAA, IRP,
+ DepClassTy::NONE, IsKnownNonNull);
+ IsNonNull |= IsKnownNonNull;
+ auto *DerefAA =
A.getAAFor<AADereferenceable>(QueryingAA, IRP, DepClassTy::NONE);
- IsNonNull |= DerefAA.isKnownNonNull();
- return DerefAA.getKnownDereferenceableBytes();
+ return DerefAA ? DerefAA->getKnownDereferenceableBytes() : 0;
}
std::optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
@@ -2673,43 +2554,16 @@ static int64_t getKnownNonNullAndDerefBytesForUse(
}
struct AANonNullImpl : AANonNull {
- AANonNullImpl(const IRPosition &IRP, Attributor &A)
- : AANonNull(IRP, A),
- NullIsDefined(NullPointerIsDefined(
- getAnchorScope(),
- getAssociatedValue().getType()->getPointerAddressSpace())) {}
+ AANonNullImpl(const IRPosition &IRP, Attributor &A) : AANonNull(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
Value &V = *getAssociatedValue().stripPointerCasts();
- if (!NullIsDefined &&
- hasAttr({Attribute::NonNull, Attribute::Dereferenceable},
- /* IgnoreSubsumingPositions */ false, &A)) {
- indicateOptimisticFixpoint();
- return;
- }
-
if (isa<ConstantPointerNull>(V)) {
indicatePessimisticFixpoint();
return;
}
- AANonNull::initialize(A);
-
- bool CanBeNull, CanBeFreed;
- if (V.getPointerDereferenceableBytes(A.getDataLayout(), CanBeNull,
- CanBeFreed)) {
- if (!CanBeNull) {
- indicateOptimisticFixpoint();
- return;
- }
- }
-
- if (isa<GlobalValue>(V)) {
- indicatePessimisticFixpoint();
- return;
- }
-
if (Instruction *CtxI = getCtxI())
followUsesInMBEC(*this, A, getState(), *CtxI);
}
@@ -2726,13 +2580,9 @@ struct AANonNullImpl : AANonNull {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return getAssumed() ? "nonnull" : "may-null";
}
-
- /// Flag to determine if the underlying value can be null and still allow
- /// valid accesses.
- const bool NullIsDefined;
};
/// NonNull attribute for a floating value.
@@ -2742,48 +2592,39 @@ struct AANonNullFloating : public AANonNullImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- const DataLayout &DL = A.getDataLayout();
+ auto CheckIRP = [&](const IRPosition &IRP) {
+ bool IsKnownNonNull;
+ return AA::hasAssumedIRAttr<Attribute::NonNull>(
+ A, *this, IRP, DepClassTy::OPTIONAL, IsKnownNonNull);
+ };
bool Stripped;
bool UsedAssumedInformation = false;
+ Value *AssociatedValue = &getAssociatedValue();
SmallVector<AA::ValueAndContext> Values;
if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values,
- AA::AnyScope, UsedAssumedInformation)) {
- Values.push_back({getAssociatedValue(), getCtxI()});
+ AA::AnyScope, UsedAssumedInformation))
Stripped = false;
- } else {
- Stripped = Values.size() != 1 ||
- Values.front().getValue() != &getAssociatedValue();
- }
-
- DominatorTree *DT = nullptr;
- AssumptionCache *AC = nullptr;
- InformationCache &InfoCache = A.getInfoCache();
- if (const Function *Fn = getAnchorScope()) {
- DT = InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*Fn);
- AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*Fn);
+ else
+ Stripped =
+ Values.size() != 1 || Values.front().getValue() != AssociatedValue;
+
+ if (!Stripped) {
+ // If we haven't stripped anything we might still be able to use a
+ // different AA, but only if the IRP changes. Effectively when we
+ // interpret this not as a call site value but as a floating/argument
+ // value.
+ const IRPosition AVIRP = IRPosition::value(*AssociatedValue);
+ if (AVIRP == getIRPosition() || !CheckIRP(AVIRP))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
}
- AANonNull::StateType T;
- auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool {
- const auto &AA = A.getAAFor<AANonNull>(*this, IRPosition::value(V),
- DepClassTy::REQUIRED);
- if (!Stripped && this == &AA) {
- if (!isKnownNonZero(&V, DL, 0, AC, CtxI, DT))
- T.indicatePessimisticFixpoint();
- } else {
- // Use abstract attribute information.
- const AANonNull::StateType &NS = AA.getState();
- T ^= NS;
- }
- return T.isValidState();
- };
-
for (const auto &VAC : Values)
- if (!VisitValueCB(*VAC.getValue(), VAC.getCtxI()))
+ if (!CheckIRP(IRPosition::value(*VAC.getValue())))
return indicatePessimisticFixpoint();
- return clampStateAndIndicateChange(getState(), T);
+ return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
@@ -2792,12 +2633,14 @@ struct AANonNullFloating : public AANonNullImpl {
/// NonNull attribute for function return value.
struct AANonNullReturned final
- : AAReturnedFromReturnedValues<AANonNull, AANonNull> {
+ : AAReturnedFromReturnedValues<AANonNull, AANonNull, AANonNull::StateType,
+ false, AANonNull::IRAttributeKind> {
AANonNullReturned(const IRPosition &IRP, Attributor &A)
- : AAReturnedFromReturnedValues<AANonNull, AANonNull>(IRP, A) {}
+ : AAReturnedFromReturnedValues<AANonNull, AANonNull, AANonNull::StateType,
+ false, Attribute::NonNull>(IRP, A) {}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return getAssumed() ? "nonnull" : "may-null";
}
@@ -2807,9 +2650,13 @@ struct AANonNullReturned final
/// NonNull attribute for function argument.
struct AANonNullArgument final
- : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl> {
+ : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl,
+ AANonNull::StateType, false,
+ AANonNull::IRAttributeKind> {
AANonNullArgument(const IRPosition &IRP, Attributor &A)
- : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl>(IRP, A) {}
+ : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl,
+ AANonNull::StateType, false,
+ AANonNull::IRAttributeKind>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) }
@@ -2825,23 +2672,118 @@ struct AANonNullCallSiteArgument final : AANonNullFloating {
/// NonNull attribute for a call site return position.
struct AANonNullCallSiteReturned final
- : AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl> {
+ : AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl,
+ AANonNull::StateType, false,
+ AANonNull::IRAttributeKind> {
AANonNullCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl>(IRP, A) {}
+ : AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl,
+ AANonNull::StateType, false,
+ AANonNull::IRAttributeKind>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) }
};
} // namespace
+/// ------------------------ Must-Progress Attributes --------------------------
+namespace {
+struct AAMustProgressImpl : public AAMustProgress {
+ AAMustProgressImpl(const IRPosition &IRP, Attributor &A)
+ : AAMustProgress(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ bool IsKnown;
+ assert(!AA::hasAssumedIRAttr<Attribute::MustProgress>(
+ A, nullptr, getIRPosition(), DepClassTy::NONE, IsKnown));
+ (void)IsKnown;
+ }
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr(Attributor *A) const override {
+ return getAssumed() ? "mustprogress" : "may-not-progress";
+ }
+};
+
+struct AAMustProgressFunction final : AAMustProgressImpl {
+ AAMustProgressFunction(const IRPosition &IRP, Attributor &A)
+ : AAMustProgressImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ bool IsKnown;
+ if (AA::hasAssumedIRAttr<Attribute::WillReturn>(
+ A, this, getIRPosition(), DepClassTy::OPTIONAL, IsKnown)) {
+ if (IsKnown)
+ return indicateOptimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ auto CheckForMustProgress = [&](AbstractCallSite ACS) {
+ IRPosition IPos = IRPosition::callsite_function(*ACS.getInstruction());
+ bool IsKnownMustProgress;
+ return AA::hasAssumedIRAttr<Attribute::MustProgress>(
+ A, this, IPos, DepClassTy::REQUIRED, IsKnownMustProgress,
+ /* IgnoreSubsumingPositions */ true);
+ };
+
+ bool AllCallSitesKnown = true;
+ if (!A.checkForAllCallSites(CheckForMustProgress, *this,
+ /* RequireAllCallSites */ true,
+ AllCallSitesKnown))
+ return indicatePessimisticFixpoint();
+
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FN_ATTR(mustprogress)
+ }
+};
+
+/// MustProgress attribute deduction for a call sites.
+struct AAMustProgressCallSite final : AAMustProgressImpl {
+ AAMustProgressCallSite(const IRPosition &IRP, Attributor &A)
+ : AAMustProgressImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // TODO: Once we have call site specific value information we can provide
+ // call site specific liveness information and then it makes
+ // sense to specialize attributes for call sites arguments instead of
+ // redirecting requests to the callee argument.
+ const IRPosition &FnPos = IRPosition::function(*getAnchorScope());
+ bool IsKnownMustProgress;
+ if (!AA::hasAssumedIRAttr<Attribute::MustProgress>(
+ A, this, FnPos, DepClassTy::REQUIRED, IsKnownMustProgress))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CS_ATTR(mustprogress);
+ }
+};
+} // namespace
+
/// ------------------------ No-Recurse Attributes ----------------------------
namespace {
struct AANoRecurseImpl : public AANoRecurse {
AANoRecurseImpl(const IRPosition &IRP, Attributor &A) : AANoRecurse(IRP, A) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ bool IsKnown;
+ assert(!AA::hasAssumedIRAttr<Attribute::NoRecurse>(
+ A, nullptr, getIRPosition(), DepClassTy::NONE, IsKnown));
+ (void)IsKnown;
+ }
+
/// See AbstractAttribute::getAsStr()
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return getAssumed() ? "norecurse" : "may-recurse";
}
};
@@ -2855,10 +2797,13 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
// If all live call sites are known to be no-recurse, we are as well.
auto CallSitePred = [&](AbstractCallSite ACS) {
- const auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
- *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
- DepClassTy::NONE);
- return NoRecurseAA.isKnownNoRecurse();
+ bool IsKnownNoRecurse;
+ if (!AA::hasAssumedIRAttr<Attribute::NoRecurse>(
+ A, this,
+ IRPosition::function(*ACS.getInstruction()->getFunction()),
+ DepClassTy::NONE, IsKnownNoRecurse))
+ return false;
+ return IsKnownNoRecurse;
};
bool UsedAssumedInformation = false;
if (A.checkForAllCallSites(CallSitePred, *this, true,
@@ -2873,10 +2818,10 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
return ChangeStatus::UNCHANGED;
}
- const AAInterFnReachability &EdgeReachability =
+ const AAInterFnReachability *EdgeReachability =
A.getAAFor<AAInterFnReachability>(*this, getIRPosition(),
DepClassTy::REQUIRED);
- if (EdgeReachability.canReach(A, *getAnchorScope()))
+ if (EdgeReachability && EdgeReachability->canReach(A, *getAnchorScope()))
return indicatePessimisticFixpoint();
return ChangeStatus::UNCHANGED;
}
@@ -2889,14 +2834,6 @@ struct AANoRecurseCallSite final : AANoRecurseImpl {
AANoRecurseCallSite(const IRPosition &IRP, Attributor &A)
: AANoRecurseImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoRecurseImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
@@ -2905,8 +2842,11 @@ struct AANoRecurseCallSite final : AANoRecurseImpl {
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AANoRecurse>(*this, FnPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ bool IsKnownNoRecurse;
+ if (!AA::hasAssumedIRAttr<Attribute::NoRecurse>(
+ A, this, FnPos, DepClassTy::REQUIRED, IsKnownNoRecurse))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
@@ -2914,6 +2854,62 @@ struct AANoRecurseCallSite final : AANoRecurseImpl {
};
} // namespace
+/// ------------------------ No-Convergent Attribute --------------------------
+
+namespace {
+struct AANonConvergentImpl : public AANonConvergent {
+ AANonConvergentImpl(const IRPosition &IRP, Attributor &A)
+ : AANonConvergent(IRP, A) {}
+
+ /// See AbstractAttribute::getAsStr()
+ const std::string getAsStr(Attributor *A) const override {
+ return getAssumed() ? "non-convergent" : "may-be-convergent";
+ }
+};
+
+struct AANonConvergentFunction final : AANonConvergentImpl {
+ AANonConvergentFunction(const IRPosition &IRP, Attributor &A)
+ : AANonConvergentImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ // If all function calls are known to not be convergent, we are not
+ // convergent.
+ auto CalleeIsNotConvergent = [&](Instruction &Inst) {
+ CallBase &CB = cast<CallBase>(Inst);
+ auto *Callee = dyn_cast_if_present<Function>(CB.getCalledOperand());
+ if (!Callee || Callee->isIntrinsic()) {
+ return false;
+ }
+ if (Callee->isDeclaration()) {
+ return !Callee->hasFnAttribute(Attribute::Convergent);
+ }
+ const auto *ConvergentAA = A.getAAFor<AANonConvergent>(
+ *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
+ return ConvergentAA && ConvergentAA->isAssumedNotConvergent();
+ };
+
+ bool UsedAssumedInformation = false;
+ if (!A.checkForAllCallLikeInstructions(CalleeIsNotConvergent, *this,
+ UsedAssumedInformation)) {
+ return indicatePessimisticFixpoint();
+ }
+ return ChangeStatus::UNCHANGED;
+ }
+
+ ChangeStatus manifest(Attributor &A) override {
+ if (isKnownNotConvergent() &&
+ A.hasAttr(getIRPosition(), Attribute::Convergent)) {
+ A.removeAttrs(getIRPosition(), {Attribute::Convergent});
+ return ChangeStatus::CHANGED;
+ }
+ return ChangeStatus::UNCHANGED;
+ }
+
+ void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(convergent) }
+};
+} // namespace
+
/// -------------------- Undefined-Behavior Attributes ------------------------
namespace {
@@ -3009,7 +3005,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
// Check nonnull and noundef argument attribute violation for each
// callsite.
CallBase &CB = cast<CallBase>(I);
- Function *Callee = CB.getCalledFunction();
+ auto *Callee = dyn_cast_if_present<Function>(CB.getCalledOperand());
if (!Callee)
return true;
for (unsigned idx = 0; idx < CB.arg_size(); idx++) {
@@ -3030,9 +3026,10 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
// (3) Simplified to null pointer where known to be nonnull.
// The argument is a poison value and violate noundef attribute.
IRPosition CalleeArgumentIRP = IRPosition::callsite_argument(CB, idx);
- auto &NoUndefAA =
- A.getAAFor<AANoUndef>(*this, CalleeArgumentIRP, DepClassTy::NONE);
- if (!NoUndefAA.isKnownNoUndef())
+ bool IsKnownNoUndef;
+ AA::hasAssumedIRAttr<Attribute::NoUndef>(
+ A, this, CalleeArgumentIRP, DepClassTy::NONE, IsKnownNoUndef);
+ if (!IsKnownNoUndef)
continue;
bool UsedAssumedInformation = false;
std::optional<Value *> SimplifiedVal =
@@ -3049,9 +3046,10 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
if (!ArgVal->getType()->isPointerTy() ||
!isa<ConstantPointerNull>(**SimplifiedVal))
continue;
- auto &NonNullAA =
- A.getAAFor<AANonNull>(*this, CalleeArgumentIRP, DepClassTy::NONE);
- if (NonNullAA.isKnownNonNull())
+ bool IsKnownNonNull;
+ AA::hasAssumedIRAttr<Attribute::NonNull>(
+ A, this, CalleeArgumentIRP, DepClassTy::NONE, IsKnownNonNull);
+ if (IsKnownNonNull)
KnownUBInsts.insert(&I);
}
return true;
@@ -3081,9 +3079,11 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
// position has nonnull attribute (because the returned value is
// poison).
if (isa<ConstantPointerNull>(*SimplifiedRetValue)) {
- auto &NonNullAA = A.getAAFor<AANonNull>(
- *this, IRPosition::returned(*getAnchorScope()), DepClassTy::NONE);
- if (NonNullAA.isKnownNonNull())
+ bool IsKnownNonNull;
+ AA::hasAssumedIRAttr<Attribute::NonNull>(
+ A, this, IRPosition::returned(*getAnchorScope()), DepClassTy::NONE,
+ IsKnownNonNull);
+ if (IsKnownNonNull)
KnownUBInsts.insert(&I);
}
@@ -3108,9 +3108,10 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
if (!getAnchorScope()->getReturnType()->isVoidTy()) {
const IRPosition &ReturnIRP = IRPosition::returned(*getAnchorScope());
if (!A.isAssumedDead(ReturnIRP, this, nullptr, UsedAssumedInformation)) {
- auto &RetPosNoUndefAA =
- A.getAAFor<AANoUndef>(*this, ReturnIRP, DepClassTy::NONE);
- if (RetPosNoUndefAA.isKnownNoUndef())
+ bool IsKnownNoUndef;
+ AA::hasAssumedIRAttr<Attribute::NoUndef>(
+ A, this, ReturnIRP, DepClassTy::NONE, IsKnownNoUndef);
+ if (IsKnownNoUndef)
A.checkForAllInstructions(InspectReturnInstForUB, *this,
{Instruction::Ret}, UsedAssumedInformation,
/* CheckBBLivenessOnly */ true);
@@ -3161,7 +3162,7 @@ struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior {
}
/// See AbstractAttribute::getAsStr()
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return getAssumed() ? "undefined-behavior" : "no-ub";
}
@@ -3284,20 +3285,15 @@ struct AAWillReturnImpl : public AAWillReturn {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- AAWillReturn::initialize(A);
-
- if (isImpliedByMustprogressAndReadonly(A, /* KnownOnly */ true)) {
- indicateOptimisticFixpoint();
- return;
- }
+ bool IsKnown;
+ assert(!AA::hasAssumedIRAttr<Attribute::WillReturn>(
+ A, nullptr, getIRPosition(), DepClassTy::NONE, IsKnown));
+ (void)IsKnown;
}
/// Check for `mustprogress` and `readonly` as they imply `willreturn`.
bool isImpliedByMustprogressAndReadonly(Attributor &A, bool KnownOnly) {
- // Check for `mustprogress` in the scope and the associated function which
- // might be different if this is a call site.
- if ((!getAnchorScope() || !getAnchorScope()->mustProgress()) &&
- (!getAssociatedFunction() || !getAssociatedFunction()->mustProgress()))
+ if (!A.hasAttr(getIRPosition(), {Attribute::MustProgress}))
return false;
bool IsKnown;
@@ -3313,15 +3309,17 @@ struct AAWillReturnImpl : public AAWillReturn {
auto CheckForWillReturn = [&](Instruction &I) {
IRPosition IPos = IRPosition::callsite_function(cast<CallBase>(I));
- const auto &WillReturnAA =
- A.getAAFor<AAWillReturn>(*this, IPos, DepClassTy::REQUIRED);
- if (WillReturnAA.isKnownWillReturn())
- return true;
- if (!WillReturnAA.isAssumedWillReturn())
+ bool IsKnown;
+ if (AA::hasAssumedIRAttr<Attribute::WillReturn>(
+ A, this, IPos, DepClassTy::REQUIRED, IsKnown)) {
+ if (IsKnown)
+ return true;
+ } else {
return false;
- const auto &NoRecurseAA =
- A.getAAFor<AANoRecurse>(*this, IPos, DepClassTy::REQUIRED);
- return NoRecurseAA.isAssumedNoRecurse();
+ }
+ bool IsKnownNoRecurse;
+ return AA::hasAssumedIRAttr<Attribute::NoRecurse>(
+ A, this, IPos, DepClassTy::REQUIRED, IsKnownNoRecurse);
};
bool UsedAssumedInformation = false;
@@ -3333,7 +3331,7 @@ struct AAWillReturnImpl : public AAWillReturn {
}
/// See AbstractAttribute::getAsStr()
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return getAssumed() ? "willreturn" : "may-noreturn";
}
};
@@ -3347,7 +3345,8 @@ struct AAWillReturnFunction final : AAWillReturnImpl {
AAWillReturnImpl::initialize(A);
Function *F = getAnchorScope();
- if (!F || F->isDeclaration() || mayContainUnboundedCycle(*F, A))
+ assert(F && "Did expect an anchor function");
+ if (F->isDeclaration() || mayContainUnboundedCycle(*F, A))
indicatePessimisticFixpoint();
}
@@ -3360,14 +3359,6 @@ struct AAWillReturnCallSite final : AAWillReturnImpl {
AAWillReturnCallSite(const IRPosition &IRP, Attributor &A)
: AAWillReturnImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AAWillReturnImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || !A.isFunctionIPOAmendable(*F))
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
if (isImpliedByMustprogressAndReadonly(A, /* KnownOnly */ false))
@@ -3379,8 +3370,11 @@ struct AAWillReturnCallSite final : AAWillReturnImpl {
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AAWillReturn>(*this, FnPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ bool IsKnown;
+ if (AA::hasAssumedIRAttr<Attribute::WillReturn>(
+ A, this, FnPos, DepClassTy::REQUIRED, IsKnown))
+ return ChangeStatus::UNCHANGED;
+ return indicatePessimisticFixpoint();
}
/// See AbstractAttribute::trackStatistics()
@@ -3414,22 +3408,18 @@ template <typename ToTy> struct ReachabilityQueryInfo {
/// Constructor replacement to ensure unique and stable sets are used for the
/// cache.
ReachabilityQueryInfo(Attributor &A, const Instruction &From, const ToTy &To,
- const AA::InstExclusionSetTy *ES)
+ const AA::InstExclusionSetTy *ES, bool MakeUnique)
: From(&From), To(&To), ExclusionSet(ES) {
- if (ExclusionSet && !ExclusionSet->empty()) {
- ExclusionSet =
- A.getInfoCache().getOrCreateUniqueBlockExecutionSet(ExclusionSet);
- } else {
+ if (!ES || ES->empty()) {
ExclusionSet = nullptr;
+ } else if (MakeUnique) {
+ ExclusionSet = A.getInfoCache().getOrCreateUniqueBlockExecutionSet(ES);
}
}
ReachabilityQueryInfo(const ReachabilityQueryInfo &RQI)
- : From(RQI.From), To(RQI.To), ExclusionSet(RQI.ExclusionSet) {
- assert(RQI.Result == Reachable::No &&
- "Didn't expect to copy an explored RQI!");
- }
+ : From(RQI.From), To(RQI.To), ExclusionSet(RQI.ExclusionSet) {}
};
namespace llvm {
@@ -3482,8 +3472,7 @@ template <typename BaseTy, typename ToTy>
struct CachedReachabilityAA : public BaseTy {
using RQITy = ReachabilityQueryInfo<ToTy>;
- CachedReachabilityAA<BaseTy, ToTy>(const IRPosition &IRP, Attributor &A)
- : BaseTy(IRP, A) {}
+ CachedReachabilityAA(const IRPosition &IRP, Attributor &A) : BaseTy(IRP, A) {}
/// See AbstractAttribute::isQueryAA.
bool isQueryAA() const override { return true; }
@@ -3492,7 +3481,8 @@ struct CachedReachabilityAA : public BaseTy {
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
InUpdate = true;
- for (RQITy *RQI : QueryVector) {
+ for (unsigned u = 0, e = QueryVector.size(); u < e; ++u) {
+ RQITy *RQI = QueryVector[u];
if (RQI->Result == RQITy::Reachable::No && isReachableImpl(A, *RQI))
Changed = ChangeStatus::CHANGED;
}
@@ -3503,39 +3493,78 @@ struct CachedReachabilityAA : public BaseTy {
virtual bool isReachableImpl(Attributor &A, RQITy &RQI) = 0;
bool rememberResult(Attributor &A, typename RQITy::Reachable Result,
- RQITy &RQI) {
- if (Result == RQITy::Reachable::No) {
- if (!InUpdate)
- A.registerForUpdate(*this);
- return false;
- }
- assert(RQI.Result == RQITy::Reachable::No && "Already reachable?");
+ RQITy &RQI, bool UsedExclusionSet) {
RQI.Result = Result;
- return true;
+
+ // Remove the temporary RQI from the cache.
+ if (!InUpdate)
+ QueryCache.erase(&RQI);
+
+ // Insert a plain RQI (w/o exclusion set) if that makes sense. Two options:
+ // 1) If it is reachable, it doesn't matter if we have an exclusion set for
+ // this query. 2) We did not use the exclusion set, potentially because
+ // there is none.
+ if (Result == RQITy::Reachable::Yes || !UsedExclusionSet) {
+ RQITy PlainRQI(RQI.From, RQI.To);
+ if (!QueryCache.count(&PlainRQI)) {
+ RQITy *RQIPtr = new (A.Allocator) RQITy(RQI.From, RQI.To);
+ RQIPtr->Result = Result;
+ QueryVector.push_back(RQIPtr);
+ QueryCache.insert(RQIPtr);
+ }
+ }
+
+ // Check if we need to insert a new permanent RQI with the exclusion set.
+ if (!InUpdate && Result != RQITy::Reachable::Yes && UsedExclusionSet) {
+ assert((!RQI.ExclusionSet || !RQI.ExclusionSet->empty()) &&
+ "Did not expect empty set!");
+ RQITy *RQIPtr = new (A.Allocator)
+ RQITy(A, *RQI.From, *RQI.To, RQI.ExclusionSet, true);
+ assert(RQIPtr->Result == RQITy::Reachable::No && "Already reachable?");
+ RQIPtr->Result = Result;
+ assert(!QueryCache.count(RQIPtr));
+ QueryVector.push_back(RQIPtr);
+ QueryCache.insert(RQIPtr);
+ }
+
+ if (Result == RQITy::Reachable::No && !InUpdate)
+ A.registerForUpdate(*this);
+ return Result == RQITy::Reachable::Yes;
}
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
// TODO: Return the number of reachable queries.
return "#queries(" + std::to_string(QueryVector.size()) + ")";
}
- RQITy *checkQueryCache(Attributor &A, RQITy &StackRQI,
- typename RQITy::Reachable &Result) {
+ bool checkQueryCache(Attributor &A, RQITy &StackRQI,
+ typename RQITy::Reachable &Result) {
if (!this->getState().isValidState()) {
Result = RQITy::Reachable::Yes;
- return nullptr;
+ return true;
+ }
+
+ // If we have an exclusion set we might be able to find our answer by
+ // ignoring it first.
+ if (StackRQI.ExclusionSet) {
+ RQITy PlainRQI(StackRQI.From, StackRQI.To);
+ auto It = QueryCache.find(&PlainRQI);
+ if (It != QueryCache.end() && (*It)->Result == RQITy::Reachable::No) {
+ Result = RQITy::Reachable::No;
+ return true;
+ }
}
auto It = QueryCache.find(&StackRQI);
if (It != QueryCache.end()) {
Result = (*It)->Result;
- return nullptr;
+ return true;
}
- RQITy *RQIPtr = new (A.Allocator) RQITy(StackRQI);
- QueryVector.push_back(RQIPtr);
- QueryCache.insert(RQIPtr);
- return RQIPtr;
+ // Insert a temporary for recursive queries. We will replace it with a
+ // permanent entry later.
+ QueryCache.insert(&StackRQI);
+ return false;
}
private:
@@ -3546,8 +3575,9 @@ private:
struct AAIntraFnReachabilityFunction final
: public CachedReachabilityAA<AAIntraFnReachability, Instruction> {
+ using Base = CachedReachabilityAA<AAIntraFnReachability, Instruction>;
AAIntraFnReachabilityFunction(const IRPosition &IRP, Attributor &A)
- : CachedReachabilityAA<AAIntraFnReachability, Instruction>(IRP, A) {}
+ : Base(IRP, A) {}
bool isAssumedReachable(
Attributor &A, const Instruction &From, const Instruction &To,
@@ -3556,23 +3586,39 @@ struct AAIntraFnReachabilityFunction final
if (&From == &To)
return true;
- RQITy StackRQI(A, From, To, ExclusionSet);
+ RQITy StackRQI(A, From, To, ExclusionSet, false);
typename RQITy::Reachable Result;
- if (RQITy *RQIPtr = NonConstThis->checkQueryCache(A, StackRQI, Result)) {
- return NonConstThis->isReachableImpl(A, *RQIPtr);
- }
+ if (!NonConstThis->checkQueryCache(A, StackRQI, Result))
+ return NonConstThis->isReachableImpl(A, StackRQI);
return Result == RQITy::Reachable::Yes;
}
+ ChangeStatus updateImpl(Attributor &A) override {
+ // We only depend on liveness. DeadEdges is all we care about, check if any
+ // of them changed.
+ auto *LivenessAA =
+ A.getAAFor<AAIsDead>(*this, getIRPosition(), DepClassTy::OPTIONAL);
+ if (LivenessAA && llvm::all_of(DeadEdges, [&](const auto &DeadEdge) {
+ return LivenessAA->isEdgeDead(DeadEdge.first, DeadEdge.second);
+ })) {
+ return ChangeStatus::UNCHANGED;
+ }
+ DeadEdges.clear();
+ return Base::updateImpl(A);
+ }
+
bool isReachableImpl(Attributor &A, RQITy &RQI) override {
const Instruction *Origin = RQI.From;
+ bool UsedExclusionSet = false;
- auto WillReachInBlock = [=](const Instruction &From, const Instruction &To,
+ auto WillReachInBlock = [&](const Instruction &From, const Instruction &To,
const AA::InstExclusionSetTy *ExclusionSet) {
const Instruction *IP = &From;
while (IP && IP != &To) {
- if (ExclusionSet && IP != Origin && ExclusionSet->count(IP))
+ if (ExclusionSet && IP != Origin && ExclusionSet->count(IP)) {
+ UsedExclusionSet = true;
break;
+ }
IP = IP->getNextNode();
}
return IP == &To;
@@ -3587,7 +3633,12 @@ struct AAIntraFnReachabilityFunction final
// possible.
if (FromBB == ToBB &&
WillReachInBlock(*RQI.From, *RQI.To, RQI.ExclusionSet))
- return rememberResult(A, RQITy::Reachable::Yes, RQI);
+ return rememberResult(A, RQITy::Reachable::Yes, RQI, UsedExclusionSet);
+
+ // Check if reaching the ToBB block is sufficient or if even that would not
+ // ensure reaching the target. In the latter case we are done.
+ if (!WillReachInBlock(ToBB->front(), *RQI.To, RQI.ExclusionSet))
+ return rememberResult(A, RQITy::Reachable::No, RQI, UsedExclusionSet);
SmallPtrSet<const BasicBlock *, 16> ExclusionBlocks;
if (RQI.ExclusionSet)
@@ -3598,40 +3649,80 @@ struct AAIntraFnReachabilityFunction final
if (ExclusionBlocks.count(FromBB) &&
!WillReachInBlock(*RQI.From, *FromBB->getTerminator(),
RQI.ExclusionSet))
- return rememberResult(A, RQITy::Reachable::No, RQI);
+ return rememberResult(A, RQITy::Reachable::No, RQI, UsedExclusionSet);
SmallPtrSet<const BasicBlock *, 16> Visited;
SmallVector<const BasicBlock *, 16> Worklist;
Worklist.push_back(FromBB);
- auto &LivenessAA =
+ DenseSet<std::pair<const BasicBlock *, const BasicBlock *>> LocalDeadEdges;
+ auto *LivenessAA =
A.getAAFor<AAIsDead>(*this, getIRPosition(), DepClassTy::OPTIONAL);
while (!Worklist.empty()) {
const BasicBlock *BB = Worklist.pop_back_val();
if (!Visited.insert(BB).second)
continue;
for (const BasicBlock *SuccBB : successors(BB)) {
- if (LivenessAA.isEdgeDead(BB, SuccBB))
+ if (LivenessAA && LivenessAA->isEdgeDead(BB, SuccBB)) {
+ LocalDeadEdges.insert({BB, SuccBB});
continue;
- if (SuccBB == ToBB &&
- WillReachInBlock(SuccBB->front(), *RQI.To, RQI.ExclusionSet))
- return rememberResult(A, RQITy::Reachable::Yes, RQI);
- if (ExclusionBlocks.count(SuccBB))
+ }
+ // We checked before if we just need to reach the ToBB block.
+ if (SuccBB == ToBB)
+ return rememberResult(A, RQITy::Reachable::Yes, RQI,
+ UsedExclusionSet);
+ if (ExclusionBlocks.count(SuccBB)) {
+ UsedExclusionSet = true;
continue;
+ }
Worklist.push_back(SuccBB);
}
}
- return rememberResult(A, RQITy::Reachable::No, RQI);
+ DeadEdges.insert(LocalDeadEdges.begin(), LocalDeadEdges.end());
+ return rememberResult(A, RQITy::Reachable::No, RQI, UsedExclusionSet);
}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {}
+
+private:
+ // Set of assumed dead edges we used in the last query. If any changes we
+ // update the state.
+ DenseSet<std::pair<const BasicBlock *, const BasicBlock *>> DeadEdges;
};
} // namespace
/// ------------------------ NoAlias Argument Attribute ------------------------
+bool AANoAlias::isImpliedByIR(Attributor &A, const IRPosition &IRP,
+ Attribute::AttrKind ImpliedAttributeKind,
+ bool IgnoreSubsumingPositions) {
+ assert(ImpliedAttributeKind == Attribute::NoAlias &&
+ "Unexpected attribute kind");
+ Value *Val = &IRP.getAssociatedValue();
+ if (IRP.getPositionKind() != IRP_CALL_SITE_ARGUMENT) {
+ if (isa<AllocaInst>(Val))
+ return true;
+ } else {
+ IgnoreSubsumingPositions = true;
+ }
+
+ if (isa<UndefValue>(Val))
+ return true;
+
+ if (isa<ConstantPointerNull>(Val) &&
+ !NullPointerIsDefined(IRP.getAnchorScope(),
+ Val->getType()->getPointerAddressSpace()))
+ return true;
+
+ if (A.hasAttr(IRP, {Attribute::ByVal, Attribute::NoAlias},
+ IgnoreSubsumingPositions, Attribute::NoAlias))
+ return true;
+
+ return false;
+}
+
namespace {
struct AANoAliasImpl : AANoAlias {
AANoAliasImpl(const IRPosition &IRP, Attributor &A) : AANoAlias(IRP, A) {
@@ -3639,7 +3730,7 @@ struct AANoAliasImpl : AANoAlias {
"Noalias is a pointer attribute");
}
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return getAssumed() ? "noalias" : "may-alias";
}
};
@@ -3649,39 +3740,6 @@ struct AANoAliasFloating final : AANoAliasImpl {
AANoAliasFloating(const IRPosition &IRP, Attributor &A)
: AANoAliasImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoAliasImpl::initialize(A);
- Value *Val = &getAssociatedValue();
- do {
- CastInst *CI = dyn_cast<CastInst>(Val);
- if (!CI)
- break;
- Value *Base = CI->getOperand(0);
- if (!Base->hasOneUse())
- break;
- Val = Base;
- } while (true);
-
- if (!Val->getType()->isPointerTy()) {
- indicatePessimisticFixpoint();
- return;
- }
-
- if (isa<AllocaInst>(Val))
- indicateOptimisticFixpoint();
- else if (isa<ConstantPointerNull>(Val) &&
- !NullPointerIsDefined(getAnchorScope(),
- Val->getType()->getPointerAddressSpace()))
- indicateOptimisticFixpoint();
- else if (Val != &getAssociatedValue()) {
- const auto &ValNoAliasAA = A.getAAFor<AANoAlias>(
- *this, IRPosition::value(*Val), DepClassTy::OPTIONAL);
- if (ValNoAliasAA.isKnownNoAlias())
- indicateOptimisticFixpoint();
- }
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Implement this.
@@ -3696,18 +3754,14 @@ struct AANoAliasFloating final : AANoAliasImpl {
/// NoAlias attribute for an argument.
struct AANoAliasArgument final
- : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl> {
- using Base = AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl>;
+ : AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl,
+ AANoAlias::StateType, false,
+ Attribute::NoAlias> {
+ using Base = AAArgumentFromCallSiteArguments<AANoAlias, AANoAliasImpl,
+ AANoAlias::StateType, false,
+ Attribute::NoAlias>;
AANoAliasArgument(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- Base::initialize(A);
- // See callsite argument attribute and callee argument attribute.
- if (hasAttr({Attribute::ByVal}))
- indicateOptimisticFixpoint();
- }
-
/// See AbstractAttribute::update(...).
ChangeStatus updateImpl(Attributor &A) override {
// We have to make sure no-alias on the argument does not break
@@ -3716,10 +3770,10 @@ struct AANoAliasArgument final
// function, otherwise we give up for now.
// If the function is no-sync, no-alias cannot break synchronization.
- const auto &NoSyncAA =
- A.getAAFor<AANoSync>(*this, IRPosition::function_scope(getIRPosition()),
- DepClassTy::OPTIONAL);
- if (NoSyncAA.isAssumedNoSync())
+ bool IsKnownNoSycn;
+ if (AA::hasAssumedIRAttr<Attribute::NoSync>(
+ A, this, IRPosition::function_scope(getIRPosition()),
+ DepClassTy::OPTIONAL, IsKnownNoSycn))
return Base::updateImpl(A);
// If the argument is read-only, no-alias cannot break synchronization.
@@ -3752,19 +3806,6 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
AANoAliasCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AANoAliasImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- // See callsite argument attribute and callee argument attribute.
- const auto &CB = cast<CallBase>(getAnchorValue());
- if (CB.paramHasAttr(getCallSiteArgNo(), Attribute::NoAlias))
- indicateOptimisticFixpoint();
- Value &Val = getAssociatedValue();
- if (isa<ConstantPointerNull>(Val) &&
- !NullPointerIsDefined(getAnchorScope(),
- Val.getType()->getPointerAddressSpace()))
- indicateOptimisticFixpoint();
- }
-
/// Determine if the underlying value may alias with the call site argument
/// \p OtherArgNo of \p ICS (= the underlying call site).
bool mayAliasWithArgument(Attributor &A, AAResults *&AAR,
@@ -3779,27 +3820,29 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
if (!ArgOp->getType()->isPtrOrPtrVectorTy())
return false;
- auto &CBArgMemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
+ auto *CBArgMemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
*this, IRPosition::callsite_argument(CB, OtherArgNo), DepClassTy::NONE);
// If the argument is readnone, there is no read-write aliasing.
- if (CBArgMemBehaviorAA.isAssumedReadNone()) {
- A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
+ if (CBArgMemBehaviorAA && CBArgMemBehaviorAA->isAssumedReadNone()) {
+ A.recordDependence(*CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
return false;
}
// If the argument is readonly and the underlying value is readonly, there
// is no read-write aliasing.
bool IsReadOnly = MemBehaviorAA.isAssumedReadOnly();
- if (CBArgMemBehaviorAA.isAssumedReadOnly() && IsReadOnly) {
+ if (CBArgMemBehaviorAA && CBArgMemBehaviorAA->isAssumedReadOnly() &&
+ IsReadOnly) {
A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
- A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
+ A.recordDependence(*CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
return false;
}
// We have to utilize actual alias analysis queries so we need the object.
if (!AAR)
- AAR = A.getInfoCache().getAAResultsForFunction(*getAnchorScope());
+ AAR = A.getInfoCache().getAnalysisResultForFunction<AAManager>(
+ *getAnchorScope());
// Try to rule it out at the call site.
bool IsAliasing = !AAR || !AAR->isNoAlias(&getAssociatedValue(), ArgOp);
@@ -3811,10 +3854,8 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
return IsAliasing;
}
- bool
- isKnownNoAliasDueToNoAliasPreservation(Attributor &A, AAResults *&AAR,
- const AAMemoryBehavior &MemBehaviorAA,
- const AANoAlias &NoAliasAA) {
+ bool isKnownNoAliasDueToNoAliasPreservation(
+ Attributor &A, AAResults *&AAR, const AAMemoryBehavior &MemBehaviorAA) {
// We can deduce "noalias" if the following conditions hold.
// (i) Associated value is assumed to be noalias in the definition.
// (ii) Associated value is assumed to be no-capture in all the uses
@@ -3822,24 +3863,14 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
// (iii) There is no other pointer argument which could alias with the
// value.
- bool AssociatedValueIsNoAliasAtDef = NoAliasAA.isAssumedNoAlias();
- if (!AssociatedValueIsNoAliasAtDef) {
- LLVM_DEBUG(dbgs() << "[AANoAlias] " << getAssociatedValue()
- << " is not no-alias at the definition\n");
- return false;
- }
-
auto IsDereferenceableOrNull = [&](Value *O, const DataLayout &DL) {
- const auto &DerefAA = A.getAAFor<AADereferenceable>(
+ const auto *DerefAA = A.getAAFor<AADereferenceable>(
*this, IRPosition::value(*O), DepClassTy::OPTIONAL);
- return DerefAA.getAssumedDereferenceableBytes();
+ return DerefAA ? DerefAA->getAssumedDereferenceableBytes() : 0;
};
- A.recordDependence(NoAliasAA, *this, DepClassTy::OPTIONAL);
-
const IRPosition &VIRP = IRPosition::value(getAssociatedValue());
const Function *ScopeFn = VIRP.getAnchorScope();
- auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, VIRP, DepClassTy::NONE);
// Check whether the value is captured in the scope using AANoCapture.
// Look at CFG and check only uses possibly executed before this
// callsite.
@@ -3859,11 +3890,10 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
unsigned ArgNo = CB->getArgOperandNo(&U);
- const auto &NoCaptureAA = A.getAAFor<AANoCapture>(
- *this, IRPosition::callsite_argument(*CB, ArgNo),
- DepClassTy::OPTIONAL);
-
- if (NoCaptureAA.isAssumedNoCapture())
+ bool IsKnownNoCapture;
+ if (AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, this, IRPosition::callsite_argument(*CB, ArgNo),
+ DepClassTy::OPTIONAL, IsKnownNoCapture))
return true;
}
}
@@ -3891,7 +3921,12 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
llvm_unreachable("unknown UseCaptureKind");
};
- if (!NoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
+ bool IsKnownNoCapture;
+ const AANoCapture *NoCaptureAA = nullptr;
+ bool IsAssumedNoCapture = AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, this, VIRP, DepClassTy::NONE, IsKnownNoCapture, false, &NoCaptureAA);
+ if (!IsAssumedNoCapture &&
+ (!NoCaptureAA || !NoCaptureAA->isAssumedNoCaptureMaybeReturned())) {
if (!A.checkForAllUses(UsePred, *this, getAssociatedValue())) {
LLVM_DEBUG(
dbgs() << "[AANoAliasCSArg] " << getAssociatedValue()
@@ -3899,7 +3934,8 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
return false;
}
}
- A.recordDependence(NoCaptureAA, *this, DepClassTy::OPTIONAL);
+ if (NoCaptureAA)
+ A.recordDependence(*NoCaptureAA, *this, DepClassTy::OPTIONAL);
// Check there is no other pointer argument which could alias with the
// value passed at this call site.
@@ -3916,20 +3952,25 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
ChangeStatus updateImpl(Attributor &A) override {
// If the argument is readnone we are done as there are no accesses via the
// argument.
- auto &MemBehaviorAA =
+ auto *MemBehaviorAA =
A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(), DepClassTy::NONE);
- if (MemBehaviorAA.isAssumedReadNone()) {
- A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
+ if (MemBehaviorAA && MemBehaviorAA->isAssumedReadNone()) {
+ A.recordDependence(*MemBehaviorAA, *this, DepClassTy::OPTIONAL);
return ChangeStatus::UNCHANGED;
}
+ bool IsKnownNoAlias;
const IRPosition &VIRP = IRPosition::value(getAssociatedValue());
- const auto &NoAliasAA =
- A.getAAFor<AANoAlias>(*this, VIRP, DepClassTy::NONE);
+ if (!AA::hasAssumedIRAttr<Attribute::NoAlias>(
+ A, this, VIRP, DepClassTy::REQUIRED, IsKnownNoAlias)) {
+ LLVM_DEBUG(dbgs() << "[AANoAlias] " << getAssociatedValue()
+ << " is not no-alias at the definition\n");
+ return indicatePessimisticFixpoint();
+ }
AAResults *AAR = nullptr;
- if (isKnownNoAliasDueToNoAliasPreservation(A, AAR, MemBehaviorAA,
- NoAliasAA)) {
+ if (MemBehaviorAA &&
+ isKnownNoAliasDueToNoAliasPreservation(A, AAR, *MemBehaviorAA)) {
LLVM_DEBUG(
dbgs() << "[AANoAlias] No-Alias deduced via no-alias preservation\n");
return ChangeStatus::UNCHANGED;
@@ -3947,14 +3988,6 @@ struct AANoAliasReturned final : AANoAliasImpl {
AANoAliasReturned(const IRPosition &IRP, Attributor &A)
: AANoAliasImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoAliasImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
@@ -3969,14 +4002,18 @@ struct AANoAliasReturned final : AANoAliasImpl {
return false;
const IRPosition &RVPos = IRPosition::value(RV);
- const auto &NoAliasAA =
- A.getAAFor<AANoAlias>(*this, RVPos, DepClassTy::REQUIRED);
- if (!NoAliasAA.isAssumedNoAlias())
+ bool IsKnownNoAlias;
+ if (!AA::hasAssumedIRAttr<Attribute::NoAlias>(
+ A, this, RVPos, DepClassTy::REQUIRED, IsKnownNoAlias))
return false;
- const auto &NoCaptureAA =
- A.getAAFor<AANoCapture>(*this, RVPos, DepClassTy::REQUIRED);
- return NoCaptureAA.isAssumedNoCaptureMaybeReturned();
+ bool IsKnownNoCapture;
+ const AANoCapture *NoCaptureAA = nullptr;
+ bool IsAssumedNoCapture = AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, this, RVPos, DepClassTy::REQUIRED, IsKnownNoCapture, false,
+ &NoCaptureAA);
+ return IsAssumedNoCapture ||
+ (NoCaptureAA && NoCaptureAA->isAssumedNoCaptureMaybeReturned());
};
if (!A.checkForAllReturnedValues(CheckReturnValue, *this))
@@ -3994,14 +4031,6 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl {
AANoAliasCallSiteReturned(const IRPosition &IRP, Attributor &A)
: AANoAliasImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoAliasImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
@@ -4010,8 +4039,11 @@ struct AANoAliasCallSiteReturned final : AANoAliasImpl {
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::returned(*F);
- auto &FnAA = A.getAAFor<AANoAlias>(*this, FnPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ bool IsKnownNoAlias;
+ if (!AA::hasAssumedIRAttr<Attribute::NoAlias>(
+ A, this, FnPos, DepClassTy::REQUIRED, IsKnownNoAlias))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
@@ -4025,13 +4057,6 @@ namespace {
struct AAIsDeadValueImpl : public AAIsDead {
AAIsDeadValueImpl(const IRPosition &IRP, Attributor &A) : AAIsDead(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- if (auto *Scope = getAnchorScope())
- if (!A.isRunOn(*Scope))
- indicatePessimisticFixpoint();
- }
-
/// See AAIsDead::isAssumedDead().
bool isAssumedDead() const override { return isAssumed(IS_DEAD); }
@@ -4055,7 +4080,7 @@ struct AAIsDeadValueImpl : public AAIsDead {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return isAssumedDead() ? "assumed-dead" : "assumed-live";
}
@@ -4097,12 +4122,11 @@ struct AAIsDeadValueImpl : public AAIsDead {
return false;
const IRPosition &CallIRP = IRPosition::callsite_function(*CB);
- const auto &NoUnwindAA =
- A.getAndUpdateAAFor<AANoUnwind>(*this, CallIRP, DepClassTy::NONE);
- if (!NoUnwindAA.isAssumedNoUnwind())
+
+ bool IsKnownNoUnwind;
+ if (!AA::hasAssumedIRAttr<Attribute::NoUnwind>(
+ A, this, CallIRP, DepClassTy::OPTIONAL, IsKnownNoUnwind))
return false;
- if (!NoUnwindAA.isKnownNoUnwind())
- A.recordDependence(NoUnwindAA, *this, DepClassTy::OPTIONAL);
bool IsKnown;
return AA::isAssumedReadOnly(A, CallIRP, *this, IsKnown);
@@ -4124,13 +4148,22 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
if (!isAssumedSideEffectFree(A, I)) {
- if (!isa_and_nonnull<StoreInst>(I))
+ if (!isa_and_nonnull<StoreInst>(I) && !isa_and_nonnull<FenceInst>(I))
indicatePessimisticFixpoint();
else
removeAssumedBits(HAS_NO_EFFECT);
}
}
+ bool isDeadFence(Attributor &A, FenceInst &FI) {
+ const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>(
+ IRPosition::function(*FI.getFunction()), *this, DepClassTy::NONE);
+ if (!ExecDomainAA || !ExecDomainAA->isNoOpFence(FI))
+ return false;
+ A.recordDependence(*ExecDomainAA, *this, DepClassTy::OPTIONAL);
+ return true;
+ }
+
bool isDeadStore(Attributor &A, StoreInst &SI,
SmallSetVector<Instruction *, 8> *AssumeOnlyInst = nullptr) {
// Lang ref now states volatile store is not UB/dead, let's skip them.
@@ -4161,12 +4194,14 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
return true;
if (auto *LI = dyn_cast<LoadInst>(V)) {
if (llvm::all_of(LI->uses(), [&](const Use &U) {
- return InfoCache.isOnlyUsedByAssume(
- cast<Instruction>(*U.getUser())) ||
- A.isAssumedDead(U, this, nullptr, UsedAssumedInformation);
+ auto &UserI = cast<Instruction>(*U.getUser());
+ if (InfoCache.isOnlyUsedByAssume(UserI)) {
+ if (AssumeOnlyInst)
+ AssumeOnlyInst->insert(&UserI);
+ return true;
+ }
+ return A.isAssumedDead(U, this, nullptr, UsedAssumedInformation);
})) {
- if (AssumeOnlyInst)
- AssumeOnlyInst->insert(LI);
return true;
}
}
@@ -4177,12 +4212,15 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
Instruction *I = dyn_cast<Instruction>(&getAssociatedValue());
if (isa_and_nonnull<StoreInst>(I))
if (isValidState())
return "assumed-dead-store";
- return AAIsDeadValueImpl::getAsStr();
+ if (isa_and_nonnull<FenceInst>(I))
+ if (isValidState())
+ return "assumed-dead-fence";
+ return AAIsDeadValueImpl::getAsStr(A);
}
/// See AbstractAttribute::updateImpl(...).
@@ -4191,6 +4229,9 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
if (auto *SI = dyn_cast_or_null<StoreInst>(I)) {
if (!isDeadStore(A, *SI))
return indicatePessimisticFixpoint();
+ } else if (auto *FI = dyn_cast_or_null<FenceInst>(I)) {
+ if (!isDeadFence(A, *FI))
+ return indicatePessimisticFixpoint();
} else {
if (!isAssumedSideEffectFree(A, I))
return indicatePessimisticFixpoint();
@@ -4226,6 +4267,11 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
}
return ChangeStatus::CHANGED;
}
+ if (auto *FI = dyn_cast<FenceInst>(I)) {
+ assert(isDeadFence(A, *FI));
+ A.deleteAfterManifest(*FI);
+ return ChangeStatus::CHANGED;
+ }
if (isAssumedSideEffectFree(A, I) && !isa<InvokeInst>(I)) {
A.deleteAfterManifest(*I);
return ChangeStatus::CHANGED;
@@ -4248,13 +4294,6 @@ struct AAIsDeadArgument : public AAIsDeadFloating {
AAIsDeadArgument(const IRPosition &IRP, Attributor &A)
: AAIsDeadFloating(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AAIsDeadFloating::initialize(A);
- if (!A.isFunctionIPOAmendable(*getAnchorScope()))
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
Argument &Arg = *getAssociatedArgument();
@@ -4293,8 +4332,10 @@ struct AAIsDeadCallSiteArgument : public AAIsDeadValueImpl {
if (!Arg)
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
- auto &ArgAA = A.getAAFor<AAIsDead>(*this, ArgPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(getState(), ArgAA.getState());
+ auto *ArgAA = A.getAAFor<AAIsDead>(*this, ArgPos, DepClassTy::REQUIRED);
+ if (!ArgAA)
+ return indicatePessimisticFixpoint();
+ return clampStateAndIndicateChange(getState(), ArgAA->getState());
}
/// See AbstractAttribute::manifest(...).
@@ -4355,7 +4396,7 @@ struct AAIsDeadCallSiteReturned : public AAIsDeadFloating {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return isAssumedDead()
? "assumed-dead"
: (getAssumed() ? "assumed-dead-users" : "assumed-live");
@@ -4416,10 +4457,7 @@ struct AAIsDeadFunction : public AAIsDead {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
Function *F = getAnchorScope();
- if (!F || F->isDeclaration() || !A.isRunOn(*F)) {
- indicatePessimisticFixpoint();
- return;
- }
+ assert(F && "Did expect an anchor function");
if (!isAssumedDeadInternalFunction(A)) {
ToBeExploredFrom.insert(&F->getEntryBlock().front());
assumeLive(A, F->getEntryBlock());
@@ -4435,7 +4473,7 @@ struct AAIsDeadFunction : public AAIsDead {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return "Live[#BB " + std::to_string(AssumedLiveBlocks.size()) + "/" +
std::to_string(getAnchorScope()->size()) + "][#TBEP " +
std::to_string(ToBeExploredFrom.size()) + "][#KDE " +
@@ -4465,9 +4503,10 @@ struct AAIsDeadFunction : public AAIsDead {
auto *CB = dyn_cast<CallBase>(DeadEndI);
if (!CB)
continue;
- const auto &NoReturnAA = A.getAndUpdateAAFor<AANoReturn>(
- *this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
- bool MayReturn = !NoReturnAA.isAssumedNoReturn();
+ bool IsKnownNoReturn;
+ bool MayReturn = !AA::hasAssumedIRAttr<Attribute::NoReturn>(
+ A, this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL,
+ IsKnownNoReturn);
if (MayReturn && (!Invoke2CallAllowed || !isa<InvokeInst>(CB)))
continue;
@@ -4564,7 +4603,7 @@ struct AAIsDeadFunction : public AAIsDead {
// functions. It can however cause dead functions to be treated as live.
for (const Instruction &I : BB)
if (const auto *CB = dyn_cast<CallBase>(&I))
- if (const Function *F = CB->getCalledFunction())
+ if (auto *F = dyn_cast_if_present<Function>(CB->getCalledOperand()))
if (F->hasLocalLinkage())
A.markLiveInternalFunction(*F);
return true;
@@ -4590,10 +4629,10 @@ identifyAliveSuccessors(Attributor &A, const CallBase &CB,
SmallVectorImpl<const Instruction *> &AliveSuccessors) {
const IRPosition &IPos = IRPosition::callsite_function(CB);
- const auto &NoReturnAA =
- A.getAndUpdateAAFor<AANoReturn>(AA, IPos, DepClassTy::OPTIONAL);
- if (NoReturnAA.isAssumedNoReturn())
- return !NoReturnAA.isKnownNoReturn();
+ bool IsKnownNoReturn;
+ if (AA::hasAssumedIRAttr<Attribute::NoReturn>(
+ A, &AA, IPos, DepClassTy::OPTIONAL, IsKnownNoReturn))
+ return !IsKnownNoReturn;
if (CB.isTerminator())
AliveSuccessors.push_back(&CB.getSuccessor(0)->front());
else
@@ -4615,10 +4654,11 @@ identifyAliveSuccessors(Attributor &A, const InvokeInst &II,
AliveSuccessors.push_back(&II.getUnwindDest()->front());
} else {
const IRPosition &IPos = IRPosition::callsite_function(II);
- const auto &AANoUnw =
- A.getAndUpdateAAFor<AANoUnwind>(AA, IPos, DepClassTy::OPTIONAL);
- if (AANoUnw.isAssumedNoUnwind()) {
- UsedAssumedInformation |= !AANoUnw.isKnownNoUnwind();
+
+ bool IsKnownNoUnwind;
+ if (AA::hasAssumedIRAttr<Attribute::NoUnwind>(
+ A, &AA, IPos, DepClassTy::OPTIONAL, IsKnownNoUnwind)) {
+ UsedAssumedInformation |= !IsKnownNoUnwind;
} else {
AliveSuccessors.push_back(&II.getUnwindDest()->front());
}
@@ -4829,25 +4869,21 @@ struct AADereferenceableImpl : AADereferenceable {
void initialize(Attributor &A) override {
Value &V = *getAssociatedValue().stripPointerCasts();
SmallVector<Attribute, 4> Attrs;
- getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull},
- Attrs, /* IgnoreSubsumingPositions */ false, &A);
+ A.getAttrs(getIRPosition(),
+ {Attribute::Dereferenceable, Attribute::DereferenceableOrNull},
+ Attrs, /* IgnoreSubsumingPositions */ false);
for (const Attribute &Attr : Attrs)
takeKnownDerefBytesMaximum(Attr.getValueAsInt());
- const IRPosition &IRP = this->getIRPosition();
- NonNullAA = &A.getAAFor<AANonNull>(*this, IRP, DepClassTy::NONE);
+ // Ensure we initialize the non-null AA (if necessary).
+ bool IsKnownNonNull;
+ AA::hasAssumedIRAttr<Attribute::NonNull>(
+ A, this, getIRPosition(), DepClassTy::OPTIONAL, IsKnownNonNull);
bool CanBeNull, CanBeFreed;
takeKnownDerefBytesMaximum(V.getPointerDereferenceableBytes(
A.getDataLayout(), CanBeNull, CanBeFreed));
- bool IsFnInterface = IRP.isFnInterfaceKind();
- Function *FnScope = IRP.getAnchorScope();
- if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope))) {
- indicatePessimisticFixpoint();
- return;
- }
-
if (Instruction *CtxI = getCtxI())
followUsesInMBEC(*this, A, getState(), *CtxI);
}
@@ -4894,17 +4930,24 @@ struct AADereferenceableImpl : AADereferenceable {
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Change = AADereferenceable::manifest(A);
- if (isAssumedNonNull() && hasAttr(Attribute::DereferenceableOrNull)) {
- removeAttrs({Attribute::DereferenceableOrNull});
+ bool IsKnownNonNull;
+ bool IsAssumedNonNull = AA::hasAssumedIRAttr<Attribute::NonNull>(
+ A, this, getIRPosition(), DepClassTy::NONE, IsKnownNonNull);
+ if (IsAssumedNonNull &&
+ A.hasAttr(getIRPosition(), Attribute::DereferenceableOrNull)) {
+ A.removeAttrs(getIRPosition(), {Attribute::DereferenceableOrNull});
return ChangeStatus::CHANGED;
}
return Change;
}
- void getDeducedAttributes(LLVMContext &Ctx,
+ void getDeducedAttributes(Attributor &A, LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const override {
// TODO: Add *_globally support
- if (isAssumedNonNull())
+ bool IsKnownNonNull;
+ bool IsAssumedNonNull = AA::hasAssumedIRAttr<Attribute::NonNull>(
+ A, this, getIRPosition(), DepClassTy::NONE, IsKnownNonNull);
+ if (IsAssumedNonNull)
Attrs.emplace_back(Attribute::getWithDereferenceableBytes(
Ctx, getAssumedDereferenceableBytes()));
else
@@ -4913,14 +4956,20 @@ struct AADereferenceableImpl : AADereferenceable {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
if (!getAssumedDereferenceableBytes())
return "unknown-dereferenceable";
+ bool IsKnownNonNull;
+ bool IsAssumedNonNull = false;
+ if (A)
+ IsAssumedNonNull = AA::hasAssumedIRAttr<Attribute::NonNull>(
+ *A, this, getIRPosition(), DepClassTy::NONE, IsKnownNonNull);
return std::string("dereferenceable") +
- (isAssumedNonNull() ? "" : "_or_null") +
+ (IsAssumedNonNull ? "" : "_or_null") +
(isAssumedGlobal() ? "_globally" : "") + "<" +
std::to_string(getKnownDereferenceableBytes()) + "-" +
- std::to_string(getAssumedDereferenceableBytes()) + ">";
+ std::to_string(getAssumedDereferenceableBytes()) + ">" +
+ (!A ? " [non-null is unknown]" : "");
}
};
@@ -4931,7 +4980,6 @@ struct AADereferenceableFloating : AADereferenceableImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
-
bool Stripped;
bool UsedAssumedInformation = false;
SmallVector<AA::ValueAndContext> Values;
@@ -4955,10 +5003,10 @@ struct AADereferenceableFloating : AADereferenceableImpl {
A, *this, &V, DL, Offset, /* GetMinOffset */ false,
/* AllowNonInbounds */ true);
- const auto &AA = A.getAAFor<AADereferenceable>(
+ const auto *AA = A.getAAFor<AADereferenceable>(
*this, IRPosition::value(*Base), DepClassTy::REQUIRED);
int64_t DerefBytes = 0;
- if (!Stripped && this == &AA) {
+ if (!AA || (!Stripped && this == AA)) {
// Use IR information if we did not strip anything.
// TODO: track globally.
bool CanBeNull, CanBeFreed;
@@ -4966,7 +5014,7 @@ struct AADereferenceableFloating : AADereferenceableImpl {
Base->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
T.GlobalState.indicatePessimisticFixpoint();
} else {
- const DerefState &DS = AA.getState();
+ const DerefState &DS = AA->getState();
DerefBytes = DS.DerefBytesState.getAssumed();
T.GlobalState &= DS.GlobalState;
}
@@ -4981,7 +5029,7 @@ struct AADereferenceableFloating : AADereferenceableImpl {
T.takeAssumedDerefBytesMinimum(
std::max(int64_t(0), DerefBytes - OffsetSExt));
- if (this == &AA) {
+ if (this == AA) {
if (!Stripped) {
// If nothing was stripped IR information is all we got.
T.takeKnownDerefBytesMaximum(
@@ -5016,9 +5064,10 @@ struct AADereferenceableFloating : AADereferenceableImpl {
/// Dereferenceable attribute for a return value.
struct AADereferenceableReturned final
: AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl> {
+ using Base =
+ AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl>;
AADereferenceableReturned(const IRPosition &IRP, Attributor &A)
- : AAReturnedFromReturnedValues<AADereferenceable, AADereferenceableImpl>(
- IRP, A) {}
+ : Base(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
@@ -5095,8 +5144,9 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo);
// As long as we only use known information there is no need to track
// dependences here.
- auto &AlignAA = A.getAAFor<AAAlign>(QueryingAA, IRP, DepClassTy::NONE);
- MA = MaybeAlign(AlignAA.getKnownAlign());
+ auto *AlignAA = A.getAAFor<AAAlign>(QueryingAA, IRP, DepClassTy::NONE);
+ if (AlignAA)
+ MA = MaybeAlign(AlignAA->getKnownAlign());
}
const DataLayout &DL = A.getDataLayout();
@@ -5122,7 +5172,7 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
// gcd(Offset, Alignment) is an alignment.
uint32_t gcd = std::gcd(uint32_t(abs((int32_t)Offset)), Alignment);
- Alignment = llvm::PowerOf2Floor(gcd);
+ Alignment = llvm::bit_floor(gcd);
}
}
@@ -5135,20 +5185,13 @@ struct AAAlignImpl : AAAlign {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
SmallVector<Attribute, 4> Attrs;
- getAttrs({Attribute::Alignment}, Attrs);
+ A.getAttrs(getIRPosition(), {Attribute::Alignment}, Attrs);
for (const Attribute &Attr : Attrs)
takeKnownMaximum(Attr.getValueAsInt());
Value &V = *getAssociatedValue().stripPointerCasts();
takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value());
- if (getIRPosition().isFnInterfaceKind() &&
- (!getAnchorScope() ||
- !A.isFunctionIPOAmendable(*getAssociatedFunction()))) {
- indicatePessimisticFixpoint();
- return;
- }
-
if (Instruction *CtxI = getCtxI())
followUsesInMBEC(*this, A, getState(), *CtxI);
}
@@ -5193,7 +5236,7 @@ struct AAAlignImpl : AAAlign {
// to avoid making the alignment explicit if it did not improve.
/// See AbstractAttribute::getDeducedAttributes
- void getDeducedAttributes(LLVMContext &Ctx,
+ void getDeducedAttributes(Attributor &A, LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const override {
if (getAssumedAlign() > 1)
Attrs.emplace_back(
@@ -5213,7 +5256,7 @@ struct AAAlignImpl : AAAlign {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return "align<" + std::to_string(getKnownAlign().value()) + "-" +
std::to_string(getAssumedAlign().value()) + ">";
}
@@ -5243,9 +5286,9 @@ struct AAAlignFloating : AAAlignImpl {
auto VisitValueCB = [&](Value &V) -> bool {
if (isa<UndefValue>(V) || isa<ConstantPointerNull>(V))
return true;
- const auto &AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V),
+ const auto *AA = A.getAAFor<AAAlign>(*this, IRPosition::value(V),
DepClassTy::REQUIRED);
- if (!Stripped && this == &AA) {
+ if (!AA || (!Stripped && this == AA)) {
int64_t Offset;
unsigned Alignment = 1;
if (const Value *Base =
@@ -5258,7 +5301,7 @@ struct AAAlignFloating : AAAlignImpl {
uint32_t gcd =
std::gcd(uint32_t(abs((int32_t)Offset)), uint32_t(PA.value()));
- Alignment = llvm::PowerOf2Floor(gcd);
+ Alignment = llvm::bit_floor(gcd);
} else {
Alignment = V.getPointerAlignment(DL).value();
}
@@ -5267,7 +5310,7 @@ struct AAAlignFloating : AAAlignImpl {
T.indicatePessimisticFixpoint();
} else {
// Use abstract attribute information.
- const AAAlign::StateType &DS = AA.getState();
+ const AAAlign::StateType &DS = AA->getState();
T ^= DS;
}
return T.isValidState();
@@ -5293,14 +5336,6 @@ struct AAAlignReturned final
using Base = AAReturnedFromReturnedValues<AAAlign, AAAlignImpl>;
AAAlignReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- Base::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(aligned) }
};
@@ -5351,9 +5386,10 @@ struct AAAlignCallSiteArgument final : AAAlignFloating {
if (Argument *Arg = getAssociatedArgument()) {
// We only take known information from the argument
// so we do not need to track a dependence.
- const auto &ArgAlignAA = A.getAAFor<AAAlign>(
+ const auto *ArgAlignAA = A.getAAFor<AAAlign>(
*this, IRPosition::argument(*Arg), DepClassTy::NONE);
- takeKnownMaximum(ArgAlignAA.getKnownAlign().value());
+ if (ArgAlignAA)
+ takeKnownMaximum(ArgAlignAA->getKnownAlign().value());
}
return Changed;
}
@@ -5369,14 +5405,6 @@ struct AAAlignCallSiteReturned final
AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- Base::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); }
};
@@ -5389,14 +5417,14 @@ struct AANoReturnImpl : public AANoReturn {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- AANoReturn::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
+ bool IsKnown;
+ assert(!AA::hasAssumedIRAttr<Attribute::NoReturn>(
+ A, nullptr, getIRPosition(), DepClassTy::NONE, IsKnown));
+ (void)IsKnown;
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return getAssumed() ? "noreturn" : "may-return";
}
@@ -5425,17 +5453,6 @@ struct AANoReturnCallSite final : AANoReturnImpl {
AANoReturnCallSite(const IRPosition &IRP, Attributor &A)
: AANoReturnImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AANoReturnImpl::initialize(A);
- if (Function *F = getAssociatedFunction()) {
- const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos, DepClassTy::REQUIRED);
- if (!FnAA.isAssumedNoReturn())
- indicatePessimisticFixpoint();
- }
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
@@ -5444,8 +5461,11 @@ struct AANoReturnCallSite final : AANoReturnImpl {
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AANoReturn>(*this, FnPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ bool IsKnownNoReturn;
+ if (!AA::hasAssumedIRAttr<Attribute::NoReturn>(
+ A, this, FnPos, DepClassTy::REQUIRED, IsKnownNoReturn))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
@@ -5477,6 +5497,15 @@ struct AAInstanceInfoImpl : public AAInstanceInfo {
indicateOptimisticFixpoint();
return;
}
+ if (auto *I = dyn_cast<Instruction>(&V)) {
+ const auto *CI =
+ A.getInfoCache().getAnalysisResultForFunction<CycleAnalysis>(
+ *I->getFunction());
+ if (mayBeInCycle(CI, I, /* HeaderOnly */ false)) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+ }
}
/// See AbstractAttribute::updateImpl(...).
@@ -5495,9 +5524,10 @@ struct AAInstanceInfoImpl : public AAInstanceInfo {
if (!Scope)
return indicateOptimisticFixpoint();
- auto &NoRecurseAA = A.getAAFor<AANoRecurse>(
- *this, IRPosition::function(*Scope), DepClassTy::OPTIONAL);
- if (NoRecurseAA.isAssumedNoRecurse())
+ bool IsKnownNoRecurse;
+ if (AA::hasAssumedIRAttr<Attribute::NoRecurse>(
+ A, this, IRPosition::function(*Scope), DepClassTy::OPTIONAL,
+ IsKnownNoRecurse))
return Changed;
auto UsePred = [&](const Use &U, bool &Follow) {
@@ -5514,15 +5544,16 @@ struct AAInstanceInfoImpl : public AAInstanceInfo {
if (auto *CB = dyn_cast<CallBase>(UserI)) {
// This check is not guaranteeing uniqueness but for now that we cannot
// end up with two versions of \p U thinking it was one.
- if (!CB->getCalledFunction() ||
- !CB->getCalledFunction()->hasLocalLinkage())
+ auto *Callee = dyn_cast_if_present<Function>(CB->getCalledOperand());
+ if (!Callee || !Callee->hasLocalLinkage())
return true;
if (!CB->isArgOperand(&U))
return false;
- const auto &ArgInstanceInfoAA = A.getAAFor<AAInstanceInfo>(
+ const auto *ArgInstanceInfoAA = A.getAAFor<AAInstanceInfo>(
*this, IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U)),
DepClassTy::OPTIONAL);
- if (!ArgInstanceInfoAA.isAssumedUniqueForAnalysis())
+ if (!ArgInstanceInfoAA ||
+ !ArgInstanceInfoAA->isAssumedUniqueForAnalysis())
return false;
// If this call base might reach the scope again we might forward the
// argument back here. This is very conservative.
@@ -5554,7 +5585,7 @@ struct AAInstanceInfoImpl : public AAInstanceInfo {
}
/// See AbstractState::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return isAssumedUniqueForAnalysis() ? "<unique [fAa]>" : "<unknown>";
}
@@ -5589,9 +5620,11 @@ struct AAInstanceInfoCallSiteArgument final : AAInstanceInfoImpl {
if (!Arg)
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
- auto &ArgAA =
+ auto *ArgAA =
A.getAAFor<AAInstanceInfo>(*this, ArgPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(getState(), ArgAA.getState());
+ if (!ArgAA)
+ return indicatePessimisticFixpoint();
+ return clampStateAndIndicateChange(getState(), ArgAA->getState());
}
};
@@ -5621,6 +5654,95 @@ struct AAInstanceInfoCallSiteReturned final : AAInstanceInfoFloating {
} // namespace
/// ----------------------- Variable Capturing ---------------------------------
+bool AANoCapture::isImpliedByIR(Attributor &A, const IRPosition &IRP,
+ Attribute::AttrKind ImpliedAttributeKind,
+ bool IgnoreSubsumingPositions) {
+ assert(ImpliedAttributeKind == Attribute::NoCapture &&
+ "Unexpected attribute kind");
+ Value &V = IRP.getAssociatedValue();
+ if (!IRP.isArgumentPosition())
+ return V.use_empty();
+
+ // You cannot "capture" null in the default address space.
+ if (isa<UndefValue>(V) || (isa<ConstantPointerNull>(V) &&
+ V.getType()->getPointerAddressSpace() == 0)) {
+ return true;
+ }
+
+ if (A.hasAttr(IRP, {Attribute::NoCapture},
+ /* IgnoreSubsumingPositions */ true, Attribute::NoCapture))
+ return true;
+
+ if (IRP.getPositionKind() == IRP_CALL_SITE_ARGUMENT)
+ if (Argument *Arg = IRP.getAssociatedArgument())
+ if (A.hasAttr(IRPosition::argument(*Arg),
+ {Attribute::NoCapture, Attribute::ByVal},
+ /* IgnoreSubsumingPositions */ true)) {
+ A.manifestAttrs(IRP,
+ Attribute::get(V.getContext(), Attribute::NoCapture));
+ return true;
+ }
+
+ if (const Function *F = IRP.getAssociatedFunction()) {
+ // Check what state the associated function can actually capture.
+ AANoCapture::StateType State;
+ determineFunctionCaptureCapabilities(IRP, *F, State);
+ if (State.isKnown(NO_CAPTURE)) {
+ A.manifestAttrs(IRP,
+ Attribute::get(V.getContext(), Attribute::NoCapture));
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// Set the NOT_CAPTURED_IN_MEM and NOT_CAPTURED_IN_RET bits in \p Known
+/// depending on the ability of the function associated with \p IRP to capture
+/// state in memory and through "returning/throwing", respectively.
+void AANoCapture::determineFunctionCaptureCapabilities(const IRPosition &IRP,
+ const Function &F,
+ BitIntegerState &State) {
+ // TODO: Once we have memory behavior attributes we should use them here.
+
+ // If we know we cannot communicate or write to memory, we do not care about
+ // ptr2int anymore.
+ bool ReadOnly = F.onlyReadsMemory();
+ bool NoThrow = F.doesNotThrow();
+ bool IsVoidReturn = F.getReturnType()->isVoidTy();
+ if (ReadOnly && NoThrow && IsVoidReturn) {
+ State.addKnownBits(NO_CAPTURE);
+ return;
+ }
+
+ // A function cannot capture state in memory if it only reads memory, it can
+ // however return/throw state and the state might be influenced by the
+ // pointer value, e.g., loading from a returned pointer might reveal a bit.
+ if (ReadOnly)
+ State.addKnownBits(NOT_CAPTURED_IN_MEM);
+
+ // A function cannot communicate state back if it does not through
+ // exceptions and doesn not return values.
+ if (NoThrow && IsVoidReturn)
+ State.addKnownBits(NOT_CAPTURED_IN_RET);
+
+ // Check existing "returned" attributes.
+ int ArgNo = IRP.getCalleeArgNo();
+ if (!NoThrow || ArgNo < 0 ||
+ !F.getAttributes().hasAttrSomewhere(Attribute::Returned))
+ return;
+
+ for (unsigned U = 0, E = F.arg_size(); U < E; ++U)
+ if (F.hasParamAttribute(U, Attribute::Returned)) {
+ if (U == unsigned(ArgNo))
+ State.removeAssumedBits(NOT_CAPTURED_IN_RET);
+ else if (ReadOnly)
+ State.addKnownBits(NO_CAPTURE);
+ else
+ State.addKnownBits(NOT_CAPTURED_IN_RET);
+ break;
+ }
+}
namespace {
/// A class to hold the state of for no-capture attributes.
@@ -5629,39 +5751,17 @@ struct AANoCaptureImpl : public AANoCapture {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- if (hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ true)) {
- indicateOptimisticFixpoint();
- return;
- }
- Function *AnchorScope = getAnchorScope();
- if (isFnInterfaceKind() &&
- (!AnchorScope || !A.isFunctionIPOAmendable(*AnchorScope))) {
- indicatePessimisticFixpoint();
- return;
- }
-
- // You cannot "capture" null in the default address space.
- if (isa<ConstantPointerNull>(getAssociatedValue()) &&
- getAssociatedValue().getType()->getPointerAddressSpace() == 0) {
- indicateOptimisticFixpoint();
- return;
- }
-
- const Function *F =
- isArgumentPosition() ? getAssociatedFunction() : AnchorScope;
-
- // Check what state the associated function can actually capture.
- if (F)
- determineFunctionCaptureCapabilities(getIRPosition(), *F, *this);
- else
- indicatePessimisticFixpoint();
+ bool IsKnown;
+ assert(!AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, nullptr, getIRPosition(), DepClassTy::NONE, IsKnown));
+ (void)IsKnown;
}
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override;
/// see AbstractAttribute::isAssumedNoCaptureMaybeReturned(...).
- void getDeducedAttributes(LLVMContext &Ctx,
+ void getDeducedAttributes(Attributor &A, LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const override {
if (!isAssumedNoCaptureMaybeReturned())
return;
@@ -5674,51 +5774,8 @@ struct AANoCaptureImpl : public AANoCapture {
}
}
- /// Set the NOT_CAPTURED_IN_MEM and NOT_CAPTURED_IN_RET bits in \p Known
- /// depending on the ability of the function associated with \p IRP to capture
- /// state in memory and through "returning/throwing", respectively.
- static void determineFunctionCaptureCapabilities(const IRPosition &IRP,
- const Function &F,
- BitIntegerState &State) {
- // TODO: Once we have memory behavior attributes we should use them here.
-
- // If we know we cannot communicate or write to memory, we do not care about
- // ptr2int anymore.
- if (F.onlyReadsMemory() && F.doesNotThrow() &&
- F.getReturnType()->isVoidTy()) {
- State.addKnownBits(NO_CAPTURE);
- return;
- }
-
- // A function cannot capture state in memory if it only reads memory, it can
- // however return/throw state and the state might be influenced by the
- // pointer value, e.g., loading from a returned pointer might reveal a bit.
- if (F.onlyReadsMemory())
- State.addKnownBits(NOT_CAPTURED_IN_MEM);
-
- // A function cannot communicate state back if it does not through
- // exceptions and doesn not return values.
- if (F.doesNotThrow() && F.getReturnType()->isVoidTy())
- State.addKnownBits(NOT_CAPTURED_IN_RET);
-
- // Check existing "returned" attributes.
- int ArgNo = IRP.getCalleeArgNo();
- if (F.doesNotThrow() && ArgNo >= 0) {
- for (unsigned u = 0, e = F.arg_size(); u < e; ++u)
- if (F.hasParamAttribute(u, Attribute::Returned)) {
- if (u == unsigned(ArgNo))
- State.removeAssumedBits(NOT_CAPTURED_IN_RET);
- else if (F.onlyReadsMemory())
- State.addKnownBits(NO_CAPTURE);
- else
- State.addKnownBits(NOT_CAPTURED_IN_RET);
- break;
- }
- }
- }
-
/// See AbstractState::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
if (isKnownNoCapture())
return "known not-captured";
if (isAssumedNoCapture())
@@ -5771,12 +5828,15 @@ struct AANoCaptureImpl : public AANoCapture {
const IRPosition &CSArgPos = IRPosition::callsite_argument(*CB, ArgNo);
// If we have a abstract no-capture attribute for the argument we can use
// it to justify a non-capture attribute here. This allows recursion!
- auto &ArgNoCaptureAA =
- A.getAAFor<AANoCapture>(*this, CSArgPos, DepClassTy::REQUIRED);
- if (ArgNoCaptureAA.isAssumedNoCapture())
+ bool IsKnownNoCapture;
+ const AANoCapture *ArgNoCaptureAA = nullptr;
+ bool IsAssumedNoCapture = AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, this, CSArgPos, DepClassTy::REQUIRED, IsKnownNoCapture, false,
+ &ArgNoCaptureAA);
+ if (IsAssumedNoCapture)
return isCapturedIn(State, /* Memory */ false, /* Integer */ false,
/* Return */ false);
- if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
+ if (ArgNoCaptureAA && ArgNoCaptureAA->isAssumedNoCaptureMaybeReturned()) {
Follow = true;
return isCapturedIn(State, /* Memory */ false, /* Integer */ false,
/* Return */ false);
@@ -5830,37 +5890,35 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
// TODO: we could do this in a more sophisticated way inside
// AAReturnedValues, e.g., track all values that escape through returns
// directly somehow.
- auto CheckReturnedArgs = [&](const AAReturnedValues &RVAA) {
- if (!RVAA.getState().isValidState())
+ auto CheckReturnedArgs = [&](bool &UsedAssumedInformation) {
+ SmallVector<AA::ValueAndContext> Values;
+ if (!A.getAssumedSimplifiedValues(IRPosition::returned(*F), this, Values,
+ AA::ValueScope::Intraprocedural,
+ UsedAssumedInformation))
return false;
bool SeenConstant = false;
- for (const auto &It : RVAA.returned_values()) {
- if (isa<Constant>(It.first)) {
+ for (const AA::ValueAndContext &VAC : Values) {
+ if (isa<Constant>(VAC.getValue())) {
if (SeenConstant)
return false;
SeenConstant = true;
- } else if (!isa<Argument>(It.first) ||
- It.first == getAssociatedArgument())
+ } else if (!isa<Argument>(VAC.getValue()) ||
+ VAC.getValue() == getAssociatedArgument())
return false;
}
return true;
};
- const auto &NoUnwindAA =
- A.getAAFor<AANoUnwind>(*this, FnPos, DepClassTy::OPTIONAL);
- if (NoUnwindAA.isAssumedNoUnwind()) {
+ bool IsKnownNoUnwind;
+ if (AA::hasAssumedIRAttr<Attribute::NoUnwind>(
+ A, this, FnPos, DepClassTy::OPTIONAL, IsKnownNoUnwind)) {
bool IsVoidTy = F->getReturnType()->isVoidTy();
- const AAReturnedValues *RVAA =
- IsVoidTy ? nullptr
- : &A.getAAFor<AAReturnedValues>(*this, FnPos,
-
- DepClassTy::OPTIONAL);
- if (IsVoidTy || CheckReturnedArgs(*RVAA)) {
+ bool UsedAssumedInformation = false;
+ if (IsVoidTy || CheckReturnedArgs(UsedAssumedInformation)) {
T.addKnownBits(NOT_CAPTURED_IN_RET);
if (T.isKnown(NOT_CAPTURED_IN_MEM))
return ChangeStatus::UNCHANGED;
- if (NoUnwindAA.isKnownNoUnwind() &&
- (IsVoidTy || RVAA->getState().isAtFixpoint())) {
+ if (IsKnownNoUnwind && (IsVoidTy || !UsedAssumedInformation)) {
addKnownBits(NOT_CAPTURED_IN_RET);
if (isKnown(NOT_CAPTURED_IN_MEM))
return indicateOptimisticFixpoint();
@@ -5869,9 +5927,9 @@ ChangeStatus AANoCaptureImpl::updateImpl(Attributor &A) {
}
auto IsDereferenceableOrNull = [&](Value *O, const DataLayout &DL) {
- const auto &DerefAA = A.getAAFor<AADereferenceable>(
+ const auto *DerefAA = A.getAAFor<AADereferenceable>(
*this, IRPosition::value(*O), DepClassTy::OPTIONAL);
- return DerefAA.getAssumedDereferenceableBytes();
+ return DerefAA && DerefAA->getAssumedDereferenceableBytes();
};
auto UseCheck = [&](const Use &U, bool &Follow) -> bool {
@@ -5913,14 +5971,6 @@ struct AANoCaptureCallSiteArgument final : AANoCaptureImpl {
AANoCaptureCallSiteArgument(const IRPosition &IRP, Attributor &A)
: AANoCaptureImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- if (Argument *Arg = getAssociatedArgument())
- if (Arg->hasByValAttr())
- indicateOptimisticFixpoint();
- AANoCaptureImpl::initialize(A);
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
@@ -5931,8 +5981,15 @@ struct AANoCaptureCallSiteArgument final : AANoCaptureImpl {
if (!Arg)
return indicatePessimisticFixpoint();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
- auto &ArgAA = A.getAAFor<AANoCapture>(*this, ArgPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(getState(), ArgAA.getState());
+ bool IsKnownNoCapture;
+ const AANoCapture *ArgAA = nullptr;
+ if (AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, this, ArgPos, DepClassTy::REQUIRED, IsKnownNoCapture, false,
+ &ArgAA))
+ return ChangeStatus::UNCHANGED;
+ if (!ArgAA || !ArgAA->isAssumedNoCaptureMaybeReturned())
+ return indicatePessimisticFixpoint();
+ return clampStateAndIndicateChange(getState(), ArgAA->getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -6023,7 +6080,7 @@ struct AAValueSimplifyImpl : AAValueSimplify {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
LLVM_DEBUG({
dbgs() << "SAV: " << (bool)SimplifiedAssociatedValue << " ";
if (SimplifiedAssociatedValue && *SimplifiedAssociatedValue)
@@ -6156,19 +6213,21 @@ struct AAValueSimplifyImpl : AAValueSimplify {
return false;
// This will also pass the call base context.
- const auto &AA =
+ const auto *AA =
A.getAAFor<AAType>(*this, getIRPosition(), DepClassTy::NONE);
+ if (!AA)
+ return false;
- std::optional<Constant *> COpt = AA.getAssumedConstant(A);
+ std::optional<Constant *> COpt = AA->getAssumedConstant(A);
if (!COpt) {
SimplifiedAssociatedValue = std::nullopt;
- A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
+ A.recordDependence(*AA, *this, DepClassTy::OPTIONAL);
return true;
}
if (auto *C = *COpt) {
SimplifiedAssociatedValue = C;
- A.recordDependence(AA, *this, DepClassTy::OPTIONAL);
+ A.recordDependence(*AA, *this, DepClassTy::OPTIONAL);
return true;
}
return false;
@@ -6215,11 +6274,10 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
void initialize(Attributor &A) override {
AAValueSimplifyImpl::initialize(A);
- if (!getAnchorScope() || getAnchorScope()->isDeclaration())
- indicatePessimisticFixpoint();
- if (hasAttr({Attribute::InAlloca, Attribute::Preallocated,
- Attribute::StructRet, Attribute::Nest, Attribute::ByVal},
- /* IgnoreSubsumingPositions */ true))
+ if (A.hasAttr(getIRPosition(),
+ {Attribute::InAlloca, Attribute::Preallocated,
+ Attribute::StructRet, Attribute::Nest, Attribute::ByVal},
+ /* IgnoreSubsumingPositions */ true))
indicatePessimisticFixpoint();
}
@@ -6266,7 +6324,7 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
bool Success;
bool UsedAssumedInformation = false;
if (hasCallBaseContext() &&
- getCallBaseContext()->getCalledFunction() == Arg->getParent())
+ getCallBaseContext()->getCalledOperand() == Arg->getParent())
Success = PredForCallSite(
AbstractCallSite(&getCallBaseContext()->getCalledOperandUse()));
else
@@ -6401,10 +6459,7 @@ struct AAValueSimplifyCallSiteReturned : AAValueSimplifyImpl {
void initialize(Attributor &A) override {
AAValueSimplifyImpl::initialize(A);
Function *Fn = getAssociatedFunction();
- if (!Fn) {
- indicatePessimisticFixpoint();
- return;
- }
+ assert(Fn && "Did expect an associted function");
for (Argument &Arg : Fn->args()) {
if (Arg.hasReturnedAttr()) {
auto IRP = IRPosition::callsite_argument(*cast<CallBase>(getCtxI()),
@@ -6421,26 +6476,7 @@ struct AAValueSimplifyCallSiteReturned : AAValueSimplifyImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
- auto Before = SimplifiedAssociatedValue;
- auto &RetAA = A.getAAFor<AAReturnedValues>(
- *this, IRPosition::function(*getAssociatedFunction()),
- DepClassTy::REQUIRED);
- auto PredForReturned =
- [&](Value &RetVal, const SmallSetVector<ReturnInst *, 4> &RetInsts) {
- bool UsedAssumedInformation = false;
- std::optional<Value *> CSRetVal =
- A.translateArgumentToCallSiteContent(
- &RetVal, *cast<CallBase>(getCtxI()), *this,
- UsedAssumedInformation);
- SimplifiedAssociatedValue = AA::combineOptionalValuesInAAValueLatice(
- SimplifiedAssociatedValue, CSRetVal, getAssociatedType());
- return SimplifiedAssociatedValue != std::optional<Value *>(nullptr);
- };
- if (!RetAA.checkForAllReturnedValuesAndReturnInsts(PredForReturned))
- if (!askSimplifiedValueForOtherAAs(A))
return indicatePessimisticFixpoint();
- return Before == SimplifiedAssociatedValue ? ChangeStatus::UNCHANGED
- : ChangeStatus ::CHANGED;
}
void trackStatistics() const override {
@@ -6581,7 +6617,7 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
SCB);
}
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
unsigned NumH2SMallocs = 0, NumInvalidMallocs = 0;
for (const auto &It : AllocationInfos) {
if (It.second->Status == AllocationInfo::INVALID)
@@ -6773,10 +6809,10 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
const Function *F = getAnchorScope();
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
- const auto &LivenessAA =
+ const auto *LivenessAA =
A.getAAFor<AAIsDead>(*this, IRPosition::function(*F), DepClassTy::NONE);
- MustBeExecutedContextExplorer &Explorer =
+ MustBeExecutedContextExplorer *Explorer =
A.getInfoCache().getMustBeExecutedContextExplorer();
bool StackIsAccessibleByOtherThreads =
@@ -6813,7 +6849,7 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
// No need to analyze dead calls, ignore them instead.
bool UsedAssumedInformation = false;
- if (A.isAssumedDead(*DI.CB, this, &LivenessAA, UsedAssumedInformation,
+ if (A.isAssumedDead(*DI.CB, this, LivenessAA, UsedAssumedInformation,
/* CheckBBLivenessOnly */ true))
continue;
@@ -6855,9 +6891,9 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
// doesn't apply as the pointer could be shared and needs to be places in
// "shareable" memory.
if (!StackIsAccessibleByOtherThreads) {
- auto &NoSyncAA =
- A.getAAFor<AANoSync>(*this, getIRPosition(), DepClassTy::OPTIONAL);
- if (!NoSyncAA.isAssumedNoSync()) {
+ bool IsKnownNoSycn;
+ if (!AA::hasAssumedIRAttr<Attribute::NoSync>(
+ A, this, getIRPosition(), DepClassTy::OPTIONAL, IsKnownNoSycn)) {
LLVM_DEBUG(
dbgs() << "[H2S] found an escaping use, stack is not accessible by "
"other threads and function is not nosync:\n");
@@ -6902,7 +6938,7 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
return false;
}
Instruction *CtxI = isa<InvokeInst>(AI.CB) ? AI.CB : AI.CB->getNextNode();
- if (!Explorer.findInContextOf(UniqueFree, CtxI)) {
+ if (!Explorer || !Explorer->findInContextOf(UniqueFree, CtxI)) {
LLVM_DEBUG(
dbgs()
<< "[H2S] unique free call might not be executed with the allocation "
@@ -6938,22 +6974,21 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
}
unsigned ArgNo = CB->getArgOperandNo(&U);
+ auto CBIRP = IRPosition::callsite_argument(*CB, ArgNo);
- const auto &NoCaptureAA = A.getAAFor<AANoCapture>(
- *this, IRPosition::callsite_argument(*CB, ArgNo),
- DepClassTy::OPTIONAL);
+ bool IsKnownNoCapture;
+ bool IsAssumedNoCapture = AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, this, CBIRP, DepClassTy::OPTIONAL, IsKnownNoCapture);
// If a call site argument use is nofree, we are fine.
- const auto &ArgNoFreeAA = A.getAAFor<AANoFree>(
- *this, IRPosition::callsite_argument(*CB, ArgNo),
- DepClassTy::OPTIONAL);
+ bool IsKnownNoFree;
+ bool IsAssumedNoFree = AA::hasAssumedIRAttr<Attribute::NoFree>(
+ A, this, CBIRP, DepClassTy::OPTIONAL, IsKnownNoFree);
- bool MaybeCaptured = !NoCaptureAA.isAssumedNoCapture();
- bool MaybeFreed = !ArgNoFreeAA.isAssumedNoFree();
- if (MaybeCaptured ||
+ if (!IsAssumedNoCapture ||
(AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared &&
- MaybeFreed)) {
- AI.HasPotentiallyFreeingUnknownUses |= MaybeFreed;
+ !IsAssumedNoFree)) {
+ AI.HasPotentiallyFreeingUnknownUses |= !IsAssumedNoFree;
// Emit a missed remark if this is missed OpenMP globalization.
auto Remark = [&](OptimizationRemarkMissed ORM) {
@@ -6984,7 +7019,14 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
ValidUsesOnly = false;
return true;
};
- if (!A.checkForAllUses(Pred, *this, *AI.CB))
+ if (!A.checkForAllUses(Pred, *this, *AI.CB, /* CheckBBLivenessOnly */ false,
+ DepClassTy::OPTIONAL, /* IgnoreDroppableUses */ true,
+ [&](const Use &OldU, const Use &NewU) {
+ auto *SI = dyn_cast<StoreInst>(OldU.getUser());
+ return !SI || StackIsAccessibleByOtherThreads ||
+ AA::isAssumedThreadLocalObject(
+ A, *SI->getPointerOperand(), *this);
+ }))
return false;
return ValidUsesOnly;
};
@@ -7018,7 +7060,8 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
}
std::optional<APInt> Size = getSize(A, *this, AI);
- if (MaxHeapToStackSize != -1) {
+ if (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared &&
+ MaxHeapToStackSize != -1) {
if (!Size || Size->ugt(MaxHeapToStackSize)) {
LLVM_DEBUG({
if (!Size)
@@ -7078,7 +7121,8 @@ struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
}
/// Identify the type we can chose for a private copy of the underlying
- /// argument. None means it is not clear yet, nullptr means there is none.
+ /// argument. std::nullopt means it is not clear yet, nullptr means there is
+ /// none.
virtual std::optional<Type *> identifyPrivatizableType(Attributor &A) = 0;
/// Return a privatizable type that encloses both T0 and T1.
@@ -7098,7 +7142,7 @@ struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
return PrivatizableType;
}
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return isAssumedPrivatizablePtr() ? "[priv]" : "[no-priv]";
}
@@ -7118,7 +7162,8 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
// rewrite them), there is no need to check them explicitly.
bool UsedAssumedInformation = false;
SmallVector<Attribute, 1> Attrs;
- getAttrs({Attribute::ByVal}, Attrs, /* IgnoreSubsumingPositions */ true);
+ A.getAttrs(getIRPosition(), {Attribute::ByVal}, Attrs,
+ /* IgnoreSubsumingPositions */ true);
if (!Attrs.empty() &&
A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this,
true, UsedAssumedInformation))
@@ -7141,9 +7186,11 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
return false;
// Check that all call sites agree on a type.
- auto &PrivCSArgAA =
+ auto *PrivCSArgAA =
A.getAAFor<AAPrivatizablePtr>(*this, ACSArgPos, DepClassTy::REQUIRED);
- std::optional<Type *> CSTy = PrivCSArgAA.getPrivatizableType();
+ if (!PrivCSArgAA)
+ return false;
+ std::optional<Type *> CSTy = PrivCSArgAA->getPrivatizableType();
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: ";
@@ -7191,7 +7238,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
DepClassTy::OPTIONAL);
// Avoid arguments with padding for now.
- if (!getIRPosition().hasAttr(Attribute::ByVal) &&
+ if (!A.hasAttr(getIRPosition(), Attribute::ByVal) &&
!isDenselyPacked(*PrivatizableType, A.getInfoCache().getDL())) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n");
return indicatePessimisticFixpoint();
@@ -7216,7 +7263,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
auto CallSiteCheck = [&](AbstractCallSite ACS) {
CallBase *CB = ACS.getInstruction();
return TTI->areTypesABICompatible(
- CB->getCaller(), CB->getCalledFunction(), ReplacementTypes);
+ CB->getCaller(),
+ dyn_cast_if_present<Function>(CB->getCalledOperand()),
+ ReplacementTypes);
};
bool UsedAssumedInformation = false;
if (!A.checkForAllCallSites(CallSiteCheck, *this, true,
@@ -7264,10 +7313,10 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
if (CBArgNo != int(ArgNo))
continue;
- const auto &CBArgPrivAA = A.getAAFor<AAPrivatizablePtr>(
+ const auto *CBArgPrivAA = A.getAAFor<AAPrivatizablePtr>(
*this, IRPosition::argument(CBArg), DepClassTy::REQUIRED);
- if (CBArgPrivAA.isValidState()) {
- auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType();
+ if (CBArgPrivAA && CBArgPrivAA->isValidState()) {
+ auto CBArgPrivTy = CBArgPrivAA->getPrivatizableType();
if (!CBArgPrivTy)
continue;
if (*CBArgPrivTy == PrivatizableType)
@@ -7298,23 +7347,23 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
assert(DCArgNo >= 0 && unsigned(DCArgNo) < DC->arg_size() &&
"Expected a direct call operand for callback call operand");
+ Function *DCCallee =
+ dyn_cast_if_present<Function>(DC->getCalledOperand());
LLVM_DEBUG({
dbgs() << "[AAPrivatizablePtr] Argument " << *Arg
<< " check if be privatized in the context of its parent ("
<< Arg->getParent()->getName()
<< ")\n[AAPrivatizablePtr] because it is an argument in a "
"direct call of ("
- << DCArgNo << "@" << DC->getCalledFunction()->getName()
- << ").\n";
+ << DCArgNo << "@" << DCCallee->getName() << ").\n";
});
- Function *DCCallee = DC->getCalledFunction();
if (unsigned(DCArgNo) < DCCallee->arg_size()) {
- const auto &DCArgPrivAA = A.getAAFor<AAPrivatizablePtr>(
+ const auto *DCArgPrivAA = A.getAAFor<AAPrivatizablePtr>(
*this, IRPosition::argument(*DCCallee->getArg(DCArgNo)),
DepClassTy::REQUIRED);
- if (DCArgPrivAA.isValidState()) {
- auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType();
+ if (DCArgPrivAA && DCArgPrivAA->isValidState()) {
+ auto DCArgPrivTy = DCArgPrivAA->getPrivatizableType();
if (!DCArgPrivTy)
return true;
if (*DCArgPrivTy == PrivatizableType)
@@ -7328,7 +7377,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
<< Arg->getParent()->getName()
<< ")\n[AAPrivatizablePtr] because it is an argument in a "
"direct call of ("
- << ACS.getInstruction()->getCalledFunction()->getName()
+ << ACS.getInstruction()->getCalledOperand()->getName()
<< ").\n[AAPrivatizablePtr] for which the argument "
"privatization is not compatible.\n";
});
@@ -7479,7 +7528,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
Argument *Arg = getAssociatedArgument();
// Query AAAlign attribute for alignment of associated argument to
// determine the best alignment of loads.
- const auto &AlignAA =
+ const auto *AlignAA =
A.getAAFor<AAAlign>(*this, IRPosition::value(*Arg), DepClassTy::NONE);
// Callback to repair the associated function. A new alloca is placed at the
@@ -7510,13 +7559,13 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
// of the privatizable type are loaded prior to the call and passed to the
// new function version.
Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB =
- [=, &AlignAA](const Attributor::ArgumentReplacementInfo &ARI,
- AbstractCallSite ACS,
- SmallVectorImpl<Value *> &NewArgOperands) {
+ [=](const Attributor::ArgumentReplacementInfo &ARI,
+ AbstractCallSite ACS, SmallVectorImpl<Value *> &NewArgOperands) {
// When no alignment is specified for the load instruction,
// natural alignment is assumed.
createReplacementValues(
- AlignAA.getAssumedAlign(), *PrivatizableType, ACS,
+ AlignAA ? AlignAA->getAssumedAlign() : Align(0),
+ *PrivatizableType, ACS,
ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()),
NewArgOperands);
};
@@ -7568,10 +7617,10 @@ struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl {
if (CI->isOne())
return AI->getAllocatedType();
if (auto *Arg = dyn_cast<Argument>(Obj)) {
- auto &PrivArgAA = A.getAAFor<AAPrivatizablePtr>(
+ auto *PrivArgAA = A.getAAFor<AAPrivatizablePtr>(
*this, IRPosition::argument(*Arg), DepClassTy::REQUIRED);
- if (PrivArgAA.isAssumedPrivatizablePtr())
- return PrivArgAA.getPrivatizableType();
+ if (PrivArgAA && PrivArgAA->isAssumedPrivatizablePtr())
+ return PrivArgAA->getPrivatizableType();
}
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid "
@@ -7593,7 +7642,7 @@ struct AAPrivatizablePtrCallSiteArgument final
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- if (getIRPosition().hasAttr(Attribute::ByVal))
+ if (A.hasAttr(getIRPosition(), Attribute::ByVal))
indicateOptimisticFixpoint();
}
@@ -7606,15 +7655,17 @@ struct AAPrivatizablePtrCallSiteArgument final
return indicatePessimisticFixpoint();
const IRPosition &IRP = getIRPosition();
- auto &NoCaptureAA =
- A.getAAFor<AANoCapture>(*this, IRP, DepClassTy::REQUIRED);
- if (!NoCaptureAA.isAssumedNoCapture()) {
+ bool IsKnownNoCapture;
+ bool IsAssumedNoCapture = AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, this, IRP, DepClassTy::REQUIRED, IsKnownNoCapture);
+ if (!IsAssumedNoCapture) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might be captured!\n");
return indicatePessimisticFixpoint();
}
- auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP, DepClassTy::REQUIRED);
- if (!NoAliasAA.isAssumedNoAlias()) {
+ bool IsKnownNoAlias;
+ if (!AA::hasAssumedIRAttr<Attribute::NoAlias>(
+ A, this, IRP, DepClassTy::REQUIRED, IsKnownNoAlias)) {
LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might alias!\n");
return indicatePessimisticFixpoint();
}
@@ -7679,16 +7730,16 @@ struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
intersectAssumedBits(BEST_STATE);
- getKnownStateFromValue(getIRPosition(), getState());
+ getKnownStateFromValue(A, getIRPosition(), getState());
AAMemoryBehavior::initialize(A);
}
/// Return the memory behavior information encoded in the IR for \p IRP.
- static void getKnownStateFromValue(const IRPosition &IRP,
+ static void getKnownStateFromValue(Attributor &A, const IRPosition &IRP,
BitIntegerState &State,
bool IgnoreSubsumingPositions = false) {
SmallVector<Attribute, 2> Attrs;
- IRP.getAttrs(AttrKinds, Attrs, IgnoreSubsumingPositions);
+ A.getAttrs(IRP, AttrKinds, Attrs, IgnoreSubsumingPositions);
for (const Attribute &Attr : Attrs) {
switch (Attr.getKindAsEnum()) {
case Attribute::ReadNone:
@@ -7714,7 +7765,7 @@ struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
}
/// See AbstractAttribute::getDeducedAttributes(...).
- void getDeducedAttributes(LLVMContext &Ctx,
+ void getDeducedAttributes(Attributor &A, LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const override {
assert(Attrs.size() == 0);
if (isAssumedReadNone())
@@ -7728,29 +7779,30 @@ struct AAMemoryBehaviorImpl : public AAMemoryBehavior {
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
- if (hasAttr(Attribute::ReadNone, /* IgnoreSubsumingPositions */ true))
- return ChangeStatus::UNCHANGED;
-
const IRPosition &IRP = getIRPosition();
+ if (A.hasAttr(IRP, Attribute::ReadNone,
+ /* IgnoreSubsumingPositions */ true))
+ return ChangeStatus::UNCHANGED;
+
// Check if we would improve the existing attributes first.
SmallVector<Attribute, 4> DeducedAttrs;
- getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs);
+ getDeducedAttributes(A, IRP.getAnchorValue().getContext(), DeducedAttrs);
if (llvm::all_of(DeducedAttrs, [&](const Attribute &Attr) {
- return IRP.hasAttr(Attr.getKindAsEnum(),
- /* IgnoreSubsumingPositions */ true);
+ return A.hasAttr(IRP, Attr.getKindAsEnum(),
+ /* IgnoreSubsumingPositions */ true);
}))
return ChangeStatus::UNCHANGED;
// Clear existing attributes.
- IRP.removeAttrs(AttrKinds);
+ A.removeAttrs(IRP, AttrKinds);
// Use the generic manifest method.
return IRAttribute::manifest(A);
}
/// See AbstractState::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
if (isAssumedReadNone())
return "readnone";
if (isAssumedReadOnly())
@@ -7807,15 +7859,10 @@ struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
// TODO: Make IgnoreSubsumingPositions a property of an IRAttribute so we
// can query it when we use has/getAttr. That would allow us to reuse the
// initialize of the base class here.
- bool HasByVal =
- IRP.hasAttr({Attribute::ByVal}, /* IgnoreSubsumingPositions */ true);
- getKnownStateFromValue(IRP, getState(),
+ bool HasByVal = A.hasAttr(IRP, {Attribute::ByVal},
+ /* IgnoreSubsumingPositions */ true);
+ getKnownStateFromValue(A, IRP, getState(),
/* IgnoreSubsumingPositions */ HasByVal);
-
- // Initialize the use vector with all direct uses of the associated value.
- Argument *Arg = getAssociatedArgument();
- if (!Arg || !A.isFunctionIPOAmendable(*(Arg->getParent())))
- indicatePessimisticFixpoint();
}
ChangeStatus manifest(Attributor &A) override {
@@ -7825,10 +7872,12 @@ struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
// TODO: From readattrs.ll: "inalloca parameters are always
// considered written"
- if (hasAttr({Attribute::InAlloca, Attribute::Preallocated})) {
+ if (A.hasAttr(getIRPosition(),
+ {Attribute::InAlloca, Attribute::Preallocated})) {
removeKnownBits(NO_WRITES);
removeAssumedBits(NO_WRITES);
}
+ A.removeAttrs(getIRPosition(), AttrKinds);
return AAMemoryBehaviorFloating::manifest(A);
}
@@ -7874,9 +7923,11 @@ struct AAMemoryBehaviorCallSiteArgument final : AAMemoryBehaviorArgument {
// redirecting requests to the callee argument.
Argument *Arg = getAssociatedArgument();
const IRPosition &ArgPos = IRPosition::argument(*Arg);
- auto &ArgAA =
+ auto *ArgAA =
A.getAAFor<AAMemoryBehavior>(*this, ArgPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(getState(), ArgAA.getState());
+ if (!ArgAA)
+ return indicatePessimisticFixpoint();
+ return clampStateAndIndicateChange(getState(), ArgAA->getState());
}
/// See AbstractAttribute::trackStatistics()
@@ -7898,11 +7949,7 @@ struct AAMemoryBehaviorCallSiteReturned final : AAMemoryBehaviorFloating {
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
AAMemoryBehaviorImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
}
-
/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
// We do not annotate returned values.
@@ -7935,16 +7982,9 @@ struct AAMemoryBehaviorFunction final : public AAMemoryBehaviorImpl {
else if (isAssumedWriteOnly())
ME = MemoryEffects::writeOnly();
- // Intersect with existing memory attribute, as we currently deduce the
- // location and modref portion separately.
- MemoryEffects ExistingME = F.getMemoryEffects();
- ME &= ExistingME;
- if (ME == ExistingME)
- return ChangeStatus::UNCHANGED;
-
- return IRAttributeManifest::manifestAttrs(
- A, getIRPosition(), Attribute::getWithMemoryEffects(F.getContext(), ME),
- /*ForceReplace*/ true);
+ A.removeAttrs(getIRPosition(), AttrKinds);
+ return A.manifestAttrs(getIRPosition(),
+ Attribute::getWithMemoryEffects(F.getContext(), ME));
}
/// See AbstractAttribute::trackStatistics()
@@ -7963,14 +8003,6 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
AAMemoryBehaviorCallSite(const IRPosition &IRP, Attributor &A)
: AAMemoryBehaviorImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AAMemoryBehaviorImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
@@ -7979,9 +8011,11 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA =
+ auto *FnAA =
A.getAAFor<AAMemoryBehavior>(*this, FnPos, DepClassTy::REQUIRED);
- return clampStateAndIndicateChange(getState(), FnAA.getState());
+ if (!FnAA)
+ return indicatePessimisticFixpoint();
+ return clampStateAndIndicateChange(getState(), FnAA->getState());
}
/// See AbstractAttribute::manifest(...).
@@ -7996,17 +8030,9 @@ struct AAMemoryBehaviorCallSite final : AAMemoryBehaviorImpl {
else if (isAssumedWriteOnly())
ME = MemoryEffects::writeOnly();
- // Intersect with existing memory attribute, as we currently deduce the
- // location and modref portion separately.
- MemoryEffects ExistingME = CB.getMemoryEffects();
- ME &= ExistingME;
- if (ME == ExistingME)
- return ChangeStatus::UNCHANGED;
-
- return IRAttributeManifest::manifestAttrs(
- A, getIRPosition(),
- Attribute::getWithMemoryEffects(CB.getContext(), ME),
- /*ForceReplace*/ true);
+ A.removeAttrs(getIRPosition(), AttrKinds);
+ return A.manifestAttrs(
+ getIRPosition(), Attribute::getWithMemoryEffects(CB.getContext(), ME));
}
/// See AbstractAttribute::trackStatistics()
@@ -8030,10 +8056,12 @@ ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) {
// the local state. No further analysis is required as the other memory
// state is as optimistic as it gets.
if (const auto *CB = dyn_cast<CallBase>(&I)) {
- const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
+ const auto *MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
*this, IRPosition::callsite_function(*CB), DepClassTy::REQUIRED);
- intersectAssumedBits(MemBehaviorAA.getAssumed());
- return !isAtFixpoint();
+ if (MemBehaviorAA) {
+ intersectAssumedBits(MemBehaviorAA->getAssumed());
+ return !isAtFixpoint();
+ }
}
// Remove access kind modifiers if necessary.
@@ -8066,12 +8094,14 @@ ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) {
AAMemoryBehavior::base_t FnMemAssumedState =
AAMemoryBehavior::StateType::getWorstState();
if (!Arg || !Arg->hasByValAttr()) {
- const auto &FnMemAA =
+ const auto *FnMemAA =
A.getAAFor<AAMemoryBehavior>(*this, FnPos, DepClassTy::OPTIONAL);
- FnMemAssumedState = FnMemAA.getAssumed();
- S.addKnownBits(FnMemAA.getKnown());
- if ((S.getAssumed() & FnMemAA.getAssumed()) == S.getAssumed())
- return ChangeStatus::UNCHANGED;
+ if (FnMemAA) {
+ FnMemAssumedState = FnMemAA->getAssumed();
+ S.addKnownBits(FnMemAA->getKnown());
+ if ((S.getAssumed() & FnMemAA->getAssumed()) == S.getAssumed())
+ return ChangeStatus::UNCHANGED;
+ }
}
// The current assumed state used to determine a change.
@@ -8081,9 +8111,14 @@ ChangeStatus AAMemoryBehaviorFloating::updateImpl(Attributor &A) {
// it is, any information derived would be irrelevant anyway as we cannot
// check the potential aliases introduced by the capture. However, no need
// to fall back to anythign less optimistic than the function state.
- const auto &ArgNoCaptureAA =
- A.getAAFor<AANoCapture>(*this, IRP, DepClassTy::OPTIONAL);
- if (!ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
+ bool IsKnownNoCapture;
+ const AANoCapture *ArgNoCaptureAA = nullptr;
+ bool IsAssumedNoCapture = AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, this, IRP, DepClassTy::OPTIONAL, IsKnownNoCapture, false,
+ &ArgNoCaptureAA);
+
+ if (!IsAssumedNoCapture &&
+ (!ArgNoCaptureAA || !ArgNoCaptureAA->isAssumedNoCaptureMaybeReturned())) {
S.intersectAssumedBits(FnMemAssumedState);
return (AssumedState != getAssumed()) ? ChangeStatus::CHANGED
: ChangeStatus::UNCHANGED;
@@ -8137,9 +8172,10 @@ bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use &U,
// need to check call users.
if (U.get()->getType()->isPointerTy()) {
unsigned ArgNo = CB->getArgOperandNo(&U);
- const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(
- *this, IRPosition::callsite_argument(*CB, ArgNo), DepClassTy::OPTIONAL);
- return !ArgNoCaptureAA.isAssumedNoCapture();
+ bool IsKnownNoCapture;
+ return !AA::hasAssumedIRAttr<Attribute::NoCapture>(
+ A, this, IRPosition::callsite_argument(*CB, ArgNo),
+ DepClassTy::OPTIONAL, IsKnownNoCapture);
}
return true;
@@ -8195,11 +8231,13 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use &U,
Pos = IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U));
else
Pos = IRPosition::callsite_function(*CB);
- const auto &MemBehaviorAA =
+ const auto *MemBehaviorAA =
A.getAAFor<AAMemoryBehavior>(*this, Pos, DepClassTy::OPTIONAL);
+ if (!MemBehaviorAA)
+ break;
// "assumed" has at most the same bits as the MemBehaviorAA assumed
// and at least "known".
- intersectAssumedBits(MemBehaviorAA.getAssumed());
+ intersectAssumedBits(MemBehaviorAA->getAssumed());
return;
}
};
@@ -8286,7 +8324,7 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
UseArgMemOnly = !AnchorFn->hasLocalLinkage();
SmallVector<Attribute, 2> Attrs;
- IRP.getAttrs({Attribute::Memory}, Attrs, IgnoreSubsumingPositions);
+ A.getAttrs(IRP, {Attribute::Memory}, Attrs, IgnoreSubsumingPositions);
for (const Attribute &Attr : Attrs) {
// TODO: We can map MemoryEffects to Attributor locations more precisely.
MemoryEffects ME = Attr.getMemoryEffects();
@@ -8304,11 +8342,10 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
else {
// Remove location information, only keep read/write info.
ME = MemoryEffects(ME.getModRef());
- IRAttributeManifest::manifestAttrs(
- A, IRP,
- Attribute::getWithMemoryEffects(IRP.getAnchorValue().getContext(),
- ME),
- /*ForceReplace*/ true);
+ A.manifestAttrs(IRP,
+ Attribute::getWithMemoryEffects(
+ IRP.getAnchorValue().getContext(), ME),
+ /*ForceReplace*/ true);
}
continue;
}
@@ -8319,11 +8356,10 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
else {
// Remove location information, only keep read/write info.
ME = MemoryEffects(ME.getModRef());
- IRAttributeManifest::manifestAttrs(
- A, IRP,
- Attribute::getWithMemoryEffects(IRP.getAnchorValue().getContext(),
- ME),
- /*ForceReplace*/ true);
+ A.manifestAttrs(IRP,
+ Attribute::getWithMemoryEffects(
+ IRP.getAnchorValue().getContext(), ME),
+ /*ForceReplace*/ true);
}
continue;
}
@@ -8331,7 +8367,7 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
}
/// See AbstractAttribute::getDeducedAttributes(...).
- void getDeducedAttributes(LLVMContext &Ctx,
+ void getDeducedAttributes(Attributor &A, LLVMContext &Ctx,
SmallVectorImpl<Attribute> &Attrs) const override {
// TODO: We can map Attributor locations to MemoryEffects more precisely.
assert(Attrs.size() == 0);
@@ -8359,27 +8395,13 @@ struct AAMemoryLocationImpl : public AAMemoryLocation {
const IRPosition &IRP = getIRPosition();
SmallVector<Attribute, 1> DeducedAttrs;
- getDeducedAttributes(IRP.getAnchorValue().getContext(), DeducedAttrs);
+ getDeducedAttributes(A, IRP.getAnchorValue().getContext(), DeducedAttrs);
if (DeducedAttrs.size() != 1)
return ChangeStatus::UNCHANGED;
MemoryEffects ME = DeducedAttrs[0].getMemoryEffects();
- // Intersect with existing memory attribute, as we currently deduce the
- // location and modref portion separately.
- SmallVector<Attribute, 1> ExistingAttrs;
- IRP.getAttrs({Attribute::Memory}, ExistingAttrs,
- /* IgnoreSubsumingPositions */ true);
- if (ExistingAttrs.size() == 1) {
- MemoryEffects ExistingME = ExistingAttrs[0].getMemoryEffects();
- ME &= ExistingME;
- if (ME == ExistingME)
- return ChangeStatus::UNCHANGED;
- }
-
- return IRAttributeManifest::manifestAttrs(
- A, IRP,
- Attribute::getWithMemoryEffects(IRP.getAnchorValue().getContext(), ME),
- /*ForceReplace*/ true);
+ return A.manifestAttrs(IRP, Attribute::getWithMemoryEffects(
+ IRP.getAnchorValue().getContext(), ME));
}
/// See AAMemoryLocation::checkForAllAccessesToMemoryKind(...).
@@ -8492,13 +8514,16 @@ protected:
if (!Accesses)
Accesses = new (Allocator) AccessSet();
Changed |= Accesses->insert(AccessInfo{I, Ptr, AK}).second;
+ if (MLK == NO_UNKOWN_MEM)
+ MLK = NO_LOCATIONS;
State.removeAssumedBits(MLK);
}
/// Determine the underlying locations kinds for \p Ptr, e.g., globals or
/// arguments, and update the state and access map accordingly.
void categorizePtrValue(Attributor &A, const Instruction &I, const Value &Ptr,
- AAMemoryLocation::StateType &State, bool &Changed);
+ AAMemoryLocation::StateType &State, bool &Changed,
+ unsigned AccessAS = 0);
/// Used to allocate access sets.
BumpPtrAllocator &Allocator;
@@ -8506,14 +8531,24 @@ protected:
void AAMemoryLocationImpl::categorizePtrValue(
Attributor &A, const Instruction &I, const Value &Ptr,
- AAMemoryLocation::StateType &State, bool &Changed) {
+ AAMemoryLocation::StateType &State, bool &Changed, unsigned AccessAS) {
LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize pointer locations for "
<< Ptr << " ["
<< getMemoryLocationsAsStr(State.getAssumed()) << "]\n");
auto Pred = [&](Value &Obj) {
+ unsigned ObjectAS = Obj.getType()->getPointerAddressSpace();
// TODO: recognize the TBAA used for constant accesses.
MemoryLocationsKind MLK = NO_LOCATIONS;
+
+ // Filter accesses to constant (GPU) memory if we have an AS at the access
+ // site or the object is known to actually have the associated AS.
+ if ((AccessAS == (unsigned)AA::GPUAddressSpace::Constant ||
+ (ObjectAS == (unsigned)AA::GPUAddressSpace::Constant &&
+ isIdentifiedObject(&Obj))) &&
+ AA::isGPU(*I.getModule()))
+ return true;
+
if (isa<UndefValue>(&Obj))
return true;
if (isa<Argument>(&Obj)) {
@@ -8537,15 +8572,16 @@ void AAMemoryLocationImpl::categorizePtrValue(
else
MLK = NO_GLOBAL_EXTERNAL_MEM;
} else if (isa<ConstantPointerNull>(&Obj) &&
- !NullPointerIsDefined(getAssociatedFunction(),
- Ptr.getType()->getPointerAddressSpace())) {
+ (!NullPointerIsDefined(getAssociatedFunction(), AccessAS) ||
+ !NullPointerIsDefined(getAssociatedFunction(), ObjectAS))) {
return true;
} else if (isa<AllocaInst>(&Obj)) {
MLK = NO_LOCAL_MEM;
} else if (const auto *CB = dyn_cast<CallBase>(&Obj)) {
- const auto &NoAliasAA = A.getAAFor<AANoAlias>(
- *this, IRPosition::callsite_returned(*CB), DepClassTy::OPTIONAL);
- if (NoAliasAA.isAssumedNoAlias())
+ bool IsKnownNoAlias;
+ if (AA::hasAssumedIRAttr<Attribute::NoAlias>(
+ A, this, IRPosition::callsite_returned(*CB), DepClassTy::OPTIONAL,
+ IsKnownNoAlias))
MLK = NO_MALLOCED_MEM;
else
MLK = NO_UNKOWN_MEM;
@@ -8556,15 +8592,15 @@ void AAMemoryLocationImpl::categorizePtrValue(
assert(MLK != NO_LOCATIONS && "No location specified!");
LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Ptr value can be categorized: "
<< Obj << " -> " << getMemoryLocationsAsStr(MLK) << "\n");
- updateStateAndAccessesMap(getState(), MLK, &I, &Obj, Changed,
+ updateStateAndAccessesMap(State, MLK, &I, &Obj, Changed,
getAccessKindFromInst(&I));
return true;
};
- const auto &AA = A.getAAFor<AAUnderlyingObjects>(
+ const auto *AA = A.getAAFor<AAUnderlyingObjects>(
*this, IRPosition::value(Ptr), DepClassTy::OPTIONAL);
- if (!AA.forallUnderlyingObjects(Pred, AA::Intraprocedural)) {
+ if (!AA || !AA->forallUnderlyingObjects(Pred, AA::Intraprocedural)) {
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Pointer locations not categorized\n");
updateStateAndAccessesMap(State, NO_UNKOWN_MEM, &I, nullptr, Changed,
@@ -8589,10 +8625,10 @@ void AAMemoryLocationImpl::categorizeArgumentPointerLocations(
// Skip readnone arguments.
const IRPosition &ArgOpIRP = IRPosition::callsite_argument(CB, ArgNo);
- const auto &ArgOpMemLocationAA =
+ const auto *ArgOpMemLocationAA =
A.getAAFor<AAMemoryBehavior>(*this, ArgOpIRP, DepClassTy::OPTIONAL);
- if (ArgOpMemLocationAA.isAssumedReadNone())
+ if (ArgOpMemLocationAA && ArgOpMemLocationAA->isAssumedReadNone())
continue;
// Categorize potentially accessed pointer arguments as if there was an
@@ -8613,22 +8649,27 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
if (auto *CB = dyn_cast<CallBase>(&I)) {
// First check if we assume any memory is access is visible.
- const auto &CBMemLocationAA = A.getAAFor<AAMemoryLocation>(
+ const auto *CBMemLocationAA = A.getAAFor<AAMemoryLocation>(
*this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize call site: " << I
<< " [" << CBMemLocationAA << "]\n");
+ if (!CBMemLocationAA) {
+ updateStateAndAccessesMap(AccessedLocs, NO_UNKOWN_MEM, &I, nullptr,
+ Changed, getAccessKindFromInst(&I));
+ return NO_UNKOWN_MEM;
+ }
- if (CBMemLocationAA.isAssumedReadNone())
+ if (CBMemLocationAA->isAssumedReadNone())
return NO_LOCATIONS;
- if (CBMemLocationAA.isAssumedInaccessibleMemOnly()) {
+ if (CBMemLocationAA->isAssumedInaccessibleMemOnly()) {
updateStateAndAccessesMap(AccessedLocs, NO_INACCESSIBLE_MEM, &I, nullptr,
Changed, getAccessKindFromInst(&I));
return AccessedLocs.getAssumed();
}
uint32_t CBAssumedNotAccessedLocs =
- CBMemLocationAA.getAssumedNotAccessedLocation();
+ CBMemLocationAA->getAssumedNotAccessedLocation();
// Set the argmemonly and global bit as we handle them separately below.
uint32_t CBAssumedNotAccessedLocsNoArgMem =
@@ -8651,7 +8692,7 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
getAccessKindFromInst(&I));
return true;
};
- if (!CBMemLocationAA.checkForAllAccessesToMemoryKind(
+ if (!CBMemLocationAA->checkForAllAccessesToMemoryKind(
AccessPred, inverseLocation(NO_GLOBAL_MEM, false, false)))
return AccessedLocs.getWorstState();
}
@@ -8676,7 +8717,8 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
LLVM_DEBUG(
dbgs() << "[AAMemoryLocation] Categorize memory access with pointer: "
<< I << " [" << *Ptr << "]\n");
- categorizePtrValue(A, I, *Ptr, AccessedLocs, Changed);
+ categorizePtrValue(A, I, *Ptr, AccessedLocs, Changed,
+ Ptr->getType()->getPointerAddressSpace());
return AccessedLocs.getAssumed();
}
@@ -8695,14 +8737,14 @@ struct AAMemoryLocationFunction final : public AAMemoryLocationImpl {
/// See AbstractAttribute::updateImpl(Attributor &A).
ChangeStatus updateImpl(Attributor &A) override {
- const auto &MemBehaviorAA =
+ const auto *MemBehaviorAA =
A.getAAFor<AAMemoryBehavior>(*this, getIRPosition(), DepClassTy::NONE);
- if (MemBehaviorAA.isAssumedReadNone()) {
- if (MemBehaviorAA.isKnownReadNone())
+ if (MemBehaviorAA && MemBehaviorAA->isAssumedReadNone()) {
+ if (MemBehaviorAA->isKnownReadNone())
return indicateOptimisticFixpoint();
assert(isAssumedReadNone() &&
"AAMemoryLocation was not read-none but AAMemoryBehavior was!");
- A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
+ A.recordDependence(*MemBehaviorAA, *this, DepClassTy::OPTIONAL);
return ChangeStatus::UNCHANGED;
}
@@ -8747,14 +8789,6 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl {
AAMemoryLocationCallSite(const IRPosition &IRP, Attributor &A)
: AAMemoryLocationImpl(IRP, A) {}
- /// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {
- AAMemoryLocationImpl::initialize(A);
- Function *F = getAssociatedFunction();
- if (!F || F->isDeclaration())
- indicatePessimisticFixpoint();
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
// TODO: Once we have call site specific value information we can provide
@@ -8763,8 +8797,10 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl {
// redirecting requests to the callee argument.
Function *F = getAssociatedFunction();
const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA =
+ auto *FnAA =
A.getAAFor<AAMemoryLocation>(*this, FnPos, DepClassTy::REQUIRED);
+ if (!FnAA)
+ return indicatePessimisticFixpoint();
bool Changed = false;
auto AccessPred = [&](const Instruction *I, const Value *Ptr,
AccessKind Kind, MemoryLocationsKind MLK) {
@@ -8772,7 +8808,7 @@ struct AAMemoryLocationCallSite final : AAMemoryLocationImpl {
getAccessKindFromInst(I));
return true;
};
- if (!FnAA.checkForAllAccessesToMemoryKind(AccessPred, ALL_LOCATIONS))
+ if (!FnAA->checkForAllAccessesToMemoryKind(AccessPred, ALL_LOCATIONS))
return indicatePessimisticFixpoint();
return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
@@ -8808,7 +8844,7 @@ struct AAValueConstantRangeImpl : AAValueConstantRange {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
std::string Str;
llvm::raw_string_ostream OS(Str);
OS << "range(" << getBitWidth() << ")<";
@@ -9023,15 +9059,6 @@ struct AAValueConstantRangeArgument final
AAValueConstantRangeArgument(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
- /// See AbstractAttribute::initialize(..).
- void initialize(Attributor &A) override {
- if (!getAnchorScope() || getAnchorScope()->isDeclaration()) {
- indicatePessimisticFixpoint();
- } else {
- Base::initialize(A);
- }
- }
-
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_ARG_ATTR(value_range)
@@ -9052,7 +9079,10 @@ struct AAValueConstantRangeReturned
: Base(IRP, A) {}
/// See AbstractAttribute::initialize(...).
- void initialize(Attributor &A) override {}
+ void initialize(Attributor &A) override {
+ if (!A.isFunctionIPOAmendable(*getAssociatedFunction()))
+ indicatePessimisticFixpoint();
+ }
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
@@ -9141,17 +9171,21 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return false;
- auto &LHSAA = A.getAAFor<AAValueConstantRange>(
+ auto *LHSAA = A.getAAFor<AAValueConstantRange>(
*this, IRPosition::value(*LHS, getCallBaseContext()),
DepClassTy::REQUIRED);
- QuerriedAAs.push_back(&LHSAA);
- auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI);
+ if (!LHSAA)
+ return false;
+ QuerriedAAs.push_back(LHSAA);
+ auto LHSAARange = LHSAA->getAssumedConstantRange(A, CtxI);
- auto &RHSAA = A.getAAFor<AAValueConstantRange>(
+ auto *RHSAA = A.getAAFor<AAValueConstantRange>(
*this, IRPosition::value(*RHS, getCallBaseContext()),
DepClassTy::REQUIRED);
- QuerriedAAs.push_back(&RHSAA);
- auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI);
+ if (!RHSAA)
+ return false;
+ QuerriedAAs.push_back(RHSAA);
+ auto RHSAARange = RHSAA->getAssumedConstantRange(A, CtxI);
auto AssumedRange = LHSAARange.binaryOp(BinOp->getOpcode(), RHSAARange);
@@ -9184,12 +9218,14 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
if (!OpV->getType()->isIntegerTy())
return false;
- auto &OpAA = A.getAAFor<AAValueConstantRange>(
+ auto *OpAA = A.getAAFor<AAValueConstantRange>(
*this, IRPosition::value(*OpV, getCallBaseContext()),
DepClassTy::REQUIRED);
- QuerriedAAs.push_back(&OpAA);
- T.unionAssumed(
- OpAA.getAssumed().castOp(CastI->getOpcode(), getState().getBitWidth()));
+ if (!OpAA)
+ return false;
+ QuerriedAAs.push_back(OpAA);
+ T.unionAssumed(OpAA->getAssumed().castOp(CastI->getOpcode(),
+ getState().getBitWidth()));
return T.isValidState();
}
@@ -9224,16 +9260,20 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy())
return false;
- auto &LHSAA = A.getAAFor<AAValueConstantRange>(
+ auto *LHSAA = A.getAAFor<AAValueConstantRange>(
*this, IRPosition::value(*LHS, getCallBaseContext()),
DepClassTy::REQUIRED);
- QuerriedAAs.push_back(&LHSAA);
- auto &RHSAA = A.getAAFor<AAValueConstantRange>(
+ if (!LHSAA)
+ return false;
+ QuerriedAAs.push_back(LHSAA);
+ auto *RHSAA = A.getAAFor<AAValueConstantRange>(
*this, IRPosition::value(*RHS, getCallBaseContext()),
DepClassTy::REQUIRED);
- QuerriedAAs.push_back(&RHSAA);
- auto LHSAARange = LHSAA.getAssumedConstantRange(A, CtxI);
- auto RHSAARange = RHSAA.getAssumedConstantRange(A, CtxI);
+ if (!RHSAA)
+ return false;
+ QuerriedAAs.push_back(RHSAA);
+ auto LHSAARange = LHSAA->getAssumedConstantRange(A, CtxI);
+ auto RHSAARange = RHSAA->getAssumedConstantRange(A, CtxI);
// If one of them is empty set, we can't decide.
if (LHSAARange.isEmptySet() || RHSAARange.isEmptySet())
@@ -9260,8 +9300,10 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
else
T.unionAssumed(ConstantRange(/* BitWidth */ 1, /* isFullSet */ true));
- LLVM_DEBUG(dbgs() << "[AAValueConstantRange] " << *CmpI << " " << LHSAA
- << " " << RHSAA << "\n");
+ LLVM_DEBUG(dbgs() << "[AAValueConstantRange] " << *CmpI << " after "
+ << (MustTrue ? "true" : (MustFalse ? "false" : "unknown"))
+ << ": " << T << "\n\t" << *LHSAA << "\t<op>\n\t"
+ << *RHSAA);
// TODO: Track a known state too.
return T.isValidState();
@@ -9287,12 +9329,15 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
Value *VPtr = *SimplifiedOpV;
// If the value is not instruction, we query AA to Attributor.
- const auto &AA = A.getAAFor<AAValueConstantRange>(
+ const auto *AA = A.getAAFor<AAValueConstantRange>(
*this, IRPosition::value(*VPtr, getCallBaseContext()),
DepClassTy::REQUIRED);
// Clamp operator is not used to utilize a program point CtxI.
- T.unionAssumed(AA.getAssumedConstantRange(A, CtxI));
+ if (AA)
+ T.unionAssumed(AA->getAssumedConstantRange(A, CtxI));
+ else
+ return false;
return T.isValidState();
}
@@ -9454,12 +9499,12 @@ struct AAPotentialConstantValuesImpl : AAPotentialConstantValues {
return false;
if (!IRP.getAssociatedType()->isIntegerTy())
return false;
- auto &PotentialValuesAA = A.getAAFor<AAPotentialConstantValues>(
+ auto *PotentialValuesAA = A.getAAFor<AAPotentialConstantValues>(
*this, IRP, DepClassTy::REQUIRED);
- if (!PotentialValuesAA.getState().isValidState())
+ if (!PotentialValuesAA || !PotentialValuesAA->getState().isValidState())
return false;
- ContainsUndef = PotentialValuesAA.getState().undefIsContained();
- S = PotentialValuesAA.getState().getAssumedSet();
+ ContainsUndef = PotentialValuesAA->getState().undefIsContained();
+ S = PotentialValuesAA->getState().getAssumedSet();
return true;
}
@@ -9483,7 +9528,7 @@ struct AAPotentialConstantValuesImpl : AAPotentialConstantValues {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
std::string Str;
llvm::raw_string_ostream OS(Str);
OS << getState();
@@ -9506,15 +9551,6 @@ struct AAPotentialConstantValuesArgument final
AAPotentialConstantValuesArgument(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
- /// See AbstractAttribute::initialize(..).
- void initialize(Attributor &A) override {
- if (!getAnchorScope() || getAnchorScope()->isDeclaration()) {
- indicatePessimisticFixpoint();
- } else {
- Base::initialize(A);
- }
- }
-
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_ARG_ATTR(potential_values)
@@ -9529,6 +9565,12 @@ struct AAPotentialConstantValuesReturned
AAPotentialConstantValuesReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
+ void initialize(Attributor &A) override {
+ if (!A.isFunctionIPOAmendable(*getAssociatedFunction()))
+ indicatePessimisticFixpoint();
+ Base::initialize(A);
+ }
+
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override {
STATS_DECLTRACK_FNRET_ATTR(potential_values)
@@ -9958,9 +10000,11 @@ struct AAPotentialConstantValuesCallSiteArgument
ChangeStatus updateImpl(Attributor &A) override {
Value &V = getAssociatedValue();
auto AssumedBefore = getAssumed();
- auto &AA = A.getAAFor<AAPotentialConstantValues>(
+ auto *AA = A.getAAFor<AAPotentialConstantValues>(
*this, IRPosition::value(V), DepClassTy::REQUIRED);
- const auto &S = AA.getAssumed();
+ if (!AA)
+ return indicatePessimisticFixpoint();
+ const auto &S = AA->getAssumed();
unionAssumed(S);
return AssumedBefore == getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
@@ -9971,27 +10015,39 @@ struct AAPotentialConstantValuesCallSiteArgument
STATS_DECLTRACK_CSARG_ATTR(potential_values)
}
};
+} // namespace
/// ------------------------ NoUndef Attribute ---------------------------------
+bool AANoUndef::isImpliedByIR(Attributor &A, const IRPosition &IRP,
+ Attribute::AttrKind ImpliedAttributeKind,
+ bool IgnoreSubsumingPositions) {
+ assert(ImpliedAttributeKind == Attribute::NoUndef &&
+ "Unexpected attribute kind");
+ if (A.hasAttr(IRP, {Attribute::NoUndef}, IgnoreSubsumingPositions,
+ Attribute::NoUndef))
+ return true;
+
+ Value &Val = IRP.getAssociatedValue();
+ if (IRP.getPositionKind() != IRPosition::IRP_RETURNED &&
+ isGuaranteedNotToBeUndefOrPoison(&Val)) {
+ LLVMContext &Ctx = Val.getContext();
+ A.manifestAttrs(IRP, Attribute::get(Ctx, Attribute::NoUndef));
+ return true;
+ }
+
+ return false;
+}
+
+namespace {
struct AANoUndefImpl : AANoUndef {
AANoUndefImpl(const IRPosition &IRP, Attributor &A) : AANoUndef(IRP, A) {}
/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
- if (getIRPosition().hasAttr({Attribute::NoUndef})) {
- indicateOptimisticFixpoint();
- return;
- }
Value &V = getAssociatedValue();
if (isa<UndefValue>(V))
indicatePessimisticFixpoint();
- else if (isa<FreezeInst>(V))
- indicateOptimisticFixpoint();
- else if (getPositionKind() != IRPosition::IRP_RETURNED &&
- isGuaranteedNotToBeUndefOrPoison(&V))
- indicateOptimisticFixpoint();
- else
- AANoUndef::initialize(A);
+ assert(!isImpliedByIR(A, getIRPosition(), Attribute::NoUndef));
}
/// See followUsesInMBEC
@@ -10015,7 +10071,7 @@ struct AANoUndefImpl : AANoUndef {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return getAssumed() ? "noundef" : "may-undef-or-poison";
}
@@ -10052,33 +10108,39 @@ struct AANoUndefFloating : public AANoUndefImpl {
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
+ auto VisitValueCB = [&](const IRPosition &IRP) -> bool {
+ bool IsKnownNoUndef;
+ return AA::hasAssumedIRAttr<Attribute::NoUndef>(
+ A, this, IRP, DepClassTy::REQUIRED, IsKnownNoUndef);
+ };
- SmallVector<AA::ValueAndContext> Values;
+ bool Stripped;
bool UsedAssumedInformation = false;
+ Value *AssociatedValue = &getAssociatedValue();
+ SmallVector<AA::ValueAndContext> Values;
if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values,
- AA::AnyScope, UsedAssumedInformation)) {
- Values.push_back({getAssociatedValue(), getCtxI()});
+ AA::AnyScope, UsedAssumedInformation))
+ Stripped = false;
+ else
+ Stripped =
+ Values.size() != 1 || Values.front().getValue() != AssociatedValue;
+
+ if (!Stripped) {
+ // If we haven't stripped anything we might still be able to use a
+ // different AA, but only if the IRP changes. Effectively when we
+ // interpret this not as a call site value but as a floating/argument
+ // value.
+ const IRPosition AVIRP = IRPosition::value(*AssociatedValue);
+ if (AVIRP == getIRPosition() || !VisitValueCB(AVIRP))
+ return indicatePessimisticFixpoint();
+ return ChangeStatus::UNCHANGED;
}
- StateType T;
- auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool {
- const auto &AA = A.getAAFor<AANoUndef>(*this, IRPosition::value(V),
- DepClassTy::REQUIRED);
- if (this == &AA) {
- T.indicatePessimisticFixpoint();
- } else {
- const AANoUndef::StateType &S =
- static_cast<const AANoUndef::StateType &>(AA.getState());
- T ^= S;
- }
- return T.isValidState();
- };
-
for (const auto &VAC : Values)
- if (!VisitValueCB(*VAC.getValue(), VAC.getCtxI()))
+ if (!VisitValueCB(IRPosition::value(*VAC.getValue())))
return indicatePessimisticFixpoint();
- return clampStateAndIndicateChange(getState(), T);
+ return ChangeStatus::UNCHANGED;
}
/// See AbstractAttribute::trackStatistics()
@@ -10086,18 +10148,26 @@ struct AANoUndefFloating : public AANoUndefImpl {
};
struct AANoUndefReturned final
- : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl> {
+ : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl,
+ AANoUndef::StateType, false,
+ Attribute::NoUndef> {
AANoUndefReturned(const IRPosition &IRP, Attributor &A)
- : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl>(IRP, A) {}
+ : AAReturnedFromReturnedValues<AANoUndef, AANoUndefImpl,
+ AANoUndef::StateType, false,
+ Attribute::NoUndef>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_FNRET_ATTR(noundef) }
};
struct AANoUndefArgument final
- : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl> {
+ : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl,
+ AANoUndef::StateType, false,
+ Attribute::NoUndef> {
AANoUndefArgument(const IRPosition &IRP, Attributor &A)
- : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl>(IRP, A) {}
+ : AAArgumentFromCallSiteArguments<AANoUndef, AANoUndefImpl,
+ AANoUndef::StateType, false,
+ Attribute::NoUndef>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(noundef) }
@@ -10112,14 +10182,173 @@ struct AANoUndefCallSiteArgument final : AANoUndefFloating {
};
struct AANoUndefCallSiteReturned final
- : AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl> {
+ : AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl,
+ AANoUndef::StateType, false,
+ Attribute::NoUndef> {
AANoUndefCallSiteReturned(const IRPosition &IRP, Attributor &A)
- : AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl>(IRP, A) {}
+ : AACallSiteReturnedFromReturned<AANoUndef, AANoUndefImpl,
+ AANoUndef::StateType, false,
+ Attribute::NoUndef>(IRP, A) {}
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noundef) }
};
+/// ------------------------ NoFPClass Attribute -------------------------------
+
+struct AANoFPClassImpl : AANoFPClass {
+ AANoFPClassImpl(const IRPosition &IRP, Attributor &A) : AANoFPClass(IRP, A) {}
+
+ void initialize(Attributor &A) override {
+ const IRPosition &IRP = getIRPosition();
+
+ Value &V = IRP.getAssociatedValue();
+ if (isa<UndefValue>(V)) {
+ indicateOptimisticFixpoint();
+ return;
+ }
+
+ SmallVector<Attribute> Attrs;
+ A.getAttrs(getIRPosition(), {Attribute::NoFPClass}, Attrs, false);
+ for (const auto &Attr : Attrs) {
+ addKnownBits(Attr.getNoFPClass());
+ return;
+ }
+
+ const DataLayout &DL = A.getDataLayout();
+ if (getPositionKind() != IRPosition::IRP_RETURNED) {
+ KnownFPClass KnownFPClass = computeKnownFPClass(&V, DL);
+ addKnownBits(~KnownFPClass.KnownFPClasses);
+ }
+
+ if (Instruction *CtxI = getCtxI())
+ followUsesInMBEC(*this, A, getState(), *CtxI);
+ }
+
+ /// See followUsesInMBEC
+ bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
+ AANoFPClass::StateType &State) {
+ const Value *UseV = U->get();
+ const DominatorTree *DT = nullptr;
+ AssumptionCache *AC = nullptr;
+ const TargetLibraryInfo *TLI = nullptr;
+ InformationCache &InfoCache = A.getInfoCache();
+
+ if (Function *F = getAnchorScope()) {
+ DT = InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F);
+ AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F);
+ TLI = InfoCache.getTargetLibraryInfoForFunction(*F);
+ }
+
+ const DataLayout &DL = A.getDataLayout();
+
+ KnownFPClass KnownFPClass =
+ computeKnownFPClass(UseV, DL,
+ /*InterestedClasses=*/fcAllFlags,
+ /*Depth=*/0, TLI, AC, I, DT);
+ State.addKnownBits(~KnownFPClass.KnownFPClasses);
+
+ bool TrackUse = false;
+ return TrackUse;
+ }
+
+ const std::string getAsStr(Attributor *A) const override {
+ std::string Result = "nofpclass";
+ raw_string_ostream OS(Result);
+ OS << getAssumedNoFPClass();
+ return Result;
+ }
+
+ void getDeducedAttributes(Attributor &A, LLVMContext &Ctx,
+ SmallVectorImpl<Attribute> &Attrs) const override {
+ Attrs.emplace_back(Attribute::getWithNoFPClass(Ctx, getAssumedNoFPClass()));
+ }
+};
+
+struct AANoFPClassFloating : public AANoFPClassImpl {
+ AANoFPClassFloating(const IRPosition &IRP, Attributor &A)
+ : AANoFPClassImpl(IRP, A) {}
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ SmallVector<AA::ValueAndContext> Values;
+ bool UsedAssumedInformation = false;
+ if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values,
+ AA::AnyScope, UsedAssumedInformation)) {
+ Values.push_back({getAssociatedValue(), getCtxI()});
+ }
+
+ StateType T;
+ auto VisitValueCB = [&](Value &V, const Instruction *CtxI) -> bool {
+ const auto *AA = A.getAAFor<AANoFPClass>(*this, IRPosition::value(V),
+ DepClassTy::REQUIRED);
+ if (!AA || this == AA) {
+ T.indicatePessimisticFixpoint();
+ } else {
+ const AANoFPClass::StateType &S =
+ static_cast<const AANoFPClass::StateType &>(AA->getState());
+ T ^= S;
+ }
+ return T.isValidState();
+ };
+
+ for (const auto &VAC : Values)
+ if (!VisitValueCB(*VAC.getValue(), VAC.getCtxI()))
+ return indicatePessimisticFixpoint();
+
+ return clampStateAndIndicateChange(getState(), T);
+ }
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(nofpclass)
+ }
+};
+
+struct AANoFPClassReturned final
+ : AAReturnedFromReturnedValues<AANoFPClass, AANoFPClassImpl,
+ AANoFPClassImpl::StateType, false, Attribute::None, false> {
+ AANoFPClassReturned(const IRPosition &IRP, Attributor &A)
+ : AAReturnedFromReturnedValues<AANoFPClass, AANoFPClassImpl,
+ AANoFPClassImpl::StateType, false, Attribute::None, false>(
+ IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(nofpclass)
+ }
+};
+
+struct AANoFPClassArgument final
+ : AAArgumentFromCallSiteArguments<AANoFPClass, AANoFPClassImpl> {
+ AANoFPClassArgument(const IRPosition &IRP, Attributor &A)
+ : AAArgumentFromCallSiteArguments<AANoFPClass, AANoFPClassImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nofpclass) }
+};
+
+struct AANoFPClassCallSiteArgument final : AANoFPClassFloating {
+ AANoFPClassCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AANoFPClassFloating(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(nofpclass)
+ }
+};
+
+struct AANoFPClassCallSiteReturned final
+ : AACallSiteReturnedFromReturned<AANoFPClass, AANoFPClassImpl> {
+ AANoFPClassCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AACallSiteReturnedFromReturned<AANoFPClass, AANoFPClassImpl>(IRP, A) {}
+
+ /// See AbstractAttribute::trackStatistics()
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(nofpclass)
+ }
+};
+
struct AACallEdgesImpl : public AACallEdges {
AACallEdgesImpl(const IRPosition &IRP, Attributor &A) : AACallEdges(IRP, A) {}
@@ -10133,7 +10362,7 @@ struct AACallEdgesImpl : public AACallEdges {
return HasUnknownCalleeNonAsm;
}
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return "CallEdges[" + std::to_string(HasUnknownCallee) + "," +
std::to_string(CalledFunctions.size()) + "]";
}
@@ -10191,6 +10420,11 @@ struct AACallEdgesCallSite : public AACallEdgesImpl {
SmallVector<AA::ValueAndContext> Values;
// Process any value that we might call.
auto ProcessCalledOperand = [&](Value *V, Instruction *CtxI) {
+ if (isa<Constant>(V)) {
+ VisitValue(*V, CtxI);
+ return;
+ }
+
bool UsedAssumedInformation = false;
Values.clear();
if (!A.getAssumedSimplifiedValues(IRPosition::value(*V), *this, Values,
@@ -10246,14 +10480,16 @@ struct AACallEdgesFunction : public AACallEdgesImpl {
auto ProcessCallInst = [&](Instruction &Inst) {
CallBase &CB = cast<CallBase>(Inst);
- auto &CBEdges = A.getAAFor<AACallEdges>(
+ auto *CBEdges = A.getAAFor<AACallEdges>(
*this, IRPosition::callsite_function(CB), DepClassTy::REQUIRED);
- if (CBEdges.hasNonAsmUnknownCallee())
+ if (!CBEdges)
+ return false;
+ if (CBEdges->hasNonAsmUnknownCallee())
setHasUnknownCallee(true, Change);
- if (CBEdges.hasUnknownCallee())
+ if (CBEdges->hasUnknownCallee())
setHasUnknownCallee(false, Change);
- for (Function *F : CBEdges.getOptimisticEdges())
+ for (Function *F : CBEdges->getOptimisticEdges())
addCalledFunction(F, Change);
return true;
@@ -10277,8 +10513,9 @@ struct AACallEdgesFunction : public AACallEdgesImpl {
struct AAInterFnReachabilityFunction
: public CachedReachabilityAA<AAInterFnReachability, Function> {
+ using Base = CachedReachabilityAA<AAInterFnReachability, Function>;
AAInterFnReachabilityFunction(const IRPosition &IRP, Attributor &A)
- : CachedReachabilityAA<AAInterFnReachability, Function>(IRP, A) {}
+ : Base(IRP, A) {}
bool instructionCanReach(
Attributor &A, const Instruction &From, const Function &To,
@@ -10287,10 +10524,10 @@ struct AAInterFnReachabilityFunction
assert(From.getFunction() == getAnchorScope() && "Queried the wrong AA!");
auto *NonConstThis = const_cast<AAInterFnReachabilityFunction *>(this);
- RQITy StackRQI(A, From, To, ExclusionSet);
+ RQITy StackRQI(A, From, To, ExclusionSet, false);
typename RQITy::Reachable Result;
- if (RQITy *RQIPtr = NonConstThis->checkQueryCache(A, StackRQI, Result))
- return NonConstThis->isReachableImpl(A, *RQIPtr);
+ if (!NonConstThis->checkQueryCache(A, StackRQI, Result))
+ return NonConstThis->isReachableImpl(A, StackRQI);
return Result == RQITy::Reachable::Yes;
}
@@ -10305,59 +10542,61 @@ struct AAInterFnReachabilityFunction
if (!Visited)
Visited = &LocalVisited;
- const auto &IntraFnReachability = A.getAAFor<AAIntraFnReachability>(
- *this, IRPosition::function(*RQI.From->getFunction()),
- DepClassTy::OPTIONAL);
-
- // Determine call like instructions that we can reach from the inst.
- SmallVector<CallBase *> ReachableCallBases;
- auto CheckCallBase = [&](Instruction &CBInst) {
- if (IntraFnReachability.isAssumedReachable(A, *RQI.From, CBInst,
- RQI.ExclusionSet))
- ReachableCallBases.push_back(cast<CallBase>(&CBInst));
- return true;
- };
-
- bool UsedAssumedInformation = false;
- if (!A.checkForAllCallLikeInstructions(CheckCallBase, *this,
- UsedAssumedInformation,
- /* CheckBBLivenessOnly */ true))
- return rememberResult(A, RQITy::Reachable::Yes, RQI);
-
- for (CallBase *CB : ReachableCallBases) {
- auto &CBEdges = A.getAAFor<AACallEdges>(
+ auto CheckReachableCallBase = [&](CallBase *CB) {
+ auto *CBEdges = A.getAAFor<AACallEdges>(
*this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
- if (!CBEdges.getState().isValidState())
- return rememberResult(A, RQITy::Reachable::Yes, RQI);
+ if (!CBEdges || !CBEdges->getState().isValidState())
+ return false;
// TODO Check To backwards in this case.
- if (CBEdges.hasUnknownCallee())
- return rememberResult(A, RQITy::Reachable::Yes, RQI);
+ if (CBEdges->hasUnknownCallee())
+ return false;
- for (Function *Fn : CBEdges.getOptimisticEdges()) {
+ for (Function *Fn : CBEdges->getOptimisticEdges()) {
if (Fn == RQI.To)
- return rememberResult(A, RQITy::Reachable::Yes, RQI);
+ return false;
if (!Visited->insert(Fn).second)
continue;
if (Fn->isDeclaration()) {
if (Fn->hasFnAttribute(Attribute::NoCallback))
continue;
// TODO Check To backwards in this case.
- return rememberResult(A, RQITy::Reachable::Yes, RQI);
+ return false;
}
const AAInterFnReachability *InterFnReachability = this;
if (Fn != getAnchorScope())
- InterFnReachability = &A.getAAFor<AAInterFnReachability>(
+ InterFnReachability = A.getAAFor<AAInterFnReachability>(
*this, IRPosition::function(*Fn), DepClassTy::OPTIONAL);
const Instruction &FnFirstInst = Fn->getEntryBlock().front();
- if (InterFnReachability->instructionCanReach(A, FnFirstInst, *RQI.To,
+ if (!InterFnReachability ||
+ InterFnReachability->instructionCanReach(A, FnFirstInst, *RQI.To,
RQI.ExclusionSet, Visited))
- return rememberResult(A, RQITy::Reachable::Yes, RQI);
+ return false;
}
- }
+ return true;
+ };
+
+ const auto *IntraFnReachability = A.getAAFor<AAIntraFnReachability>(
+ *this, IRPosition::function(*RQI.From->getFunction()),
+ DepClassTy::OPTIONAL);
+
+ // Determine call like instructions that we can reach from the inst.
+ auto CheckCallBase = [&](Instruction &CBInst) {
+ if (!IntraFnReachability || !IntraFnReachability->isAssumedReachable(
+ A, *RQI.From, CBInst, RQI.ExclusionSet))
+ return true;
+ return CheckReachableCallBase(cast<CallBase>(&CBInst));
+ };
+
+ bool UsedExclusionSet = /* conservative */ true;
+ bool UsedAssumedInformation = false;
+ if (!A.checkForAllCallLikeInstructions(CheckCallBase, *this,
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ true))
+ return rememberResult(A, RQITy::Reachable::Yes, RQI, UsedExclusionSet);
- return rememberResult(A, RQITy::Reachable::No, RQI);
+ return rememberResult(A, RQITy::Reachable::No, RQI, UsedExclusionSet);
}
void trackStatistics() const override {}
@@ -10376,16 +10615,18 @@ askForAssumedConstant(Attributor &A, const AbstractAttribute &QueryingAA,
return nullptr;
// This will also pass the call base context.
- const auto &AA = A.getAAFor<AAType>(QueryingAA, IRP, DepClassTy::NONE);
+ const auto *AA = A.getAAFor<AAType>(QueryingAA, IRP, DepClassTy::NONE);
+ if (!AA)
+ return nullptr;
- std::optional<Constant *> COpt = AA.getAssumedConstant(A);
+ std::optional<Constant *> COpt = AA->getAssumedConstant(A);
if (!COpt.has_value()) {
- A.recordDependence(AA, QueryingAA, DepClassTy::OPTIONAL);
+ A.recordDependence(*AA, QueryingAA, DepClassTy::OPTIONAL);
return std::nullopt;
}
if (auto *C = *COpt) {
- A.recordDependence(AA, QueryingAA, DepClassTy::OPTIONAL);
+ A.recordDependence(*AA, QueryingAA, DepClassTy::OPTIONAL);
return C;
}
return nullptr;
@@ -10432,7 +10673,7 @@ struct AAPotentialValuesImpl : AAPotentialValues {
}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
std::string Str;
llvm::raw_string_ostream OS(Str);
OS << getState();
@@ -10454,9 +10695,9 @@ struct AAPotentialValuesImpl : AAPotentialValues {
return nullptr;
}
- void addValue(Attributor &A, StateType &State, Value &V,
- const Instruction *CtxI, AA::ValueScope S,
- Function *AnchorScope) const {
+ virtual void addValue(Attributor &A, StateType &State, Value &V,
+ const Instruction *CtxI, AA::ValueScope S,
+ Function *AnchorScope) const {
IRPosition ValIRP = IRPosition::value(V);
if (auto *CB = dyn_cast_or_null<CallBase>(CtxI)) {
@@ -10474,12 +10715,12 @@ struct AAPotentialValuesImpl : AAPotentialValues {
std::optional<Value *> SimpleV =
askOtherAA<AAValueConstantRange>(A, *this, ValIRP, Ty);
if (SimpleV.has_value() && !*SimpleV) {
- auto &PotentialConstantsAA = A.getAAFor<AAPotentialConstantValues>(
+ auto *PotentialConstantsAA = A.getAAFor<AAPotentialConstantValues>(
*this, ValIRP, DepClassTy::OPTIONAL);
- if (PotentialConstantsAA.isValidState()) {
- for (const auto &It : PotentialConstantsAA.getAssumedSet())
+ if (PotentialConstantsAA && PotentialConstantsAA->isValidState()) {
+ for (const auto &It : PotentialConstantsAA->getAssumedSet())
State.unionAssumed({{*ConstantInt::get(&Ty, It), nullptr}, S});
- if (PotentialConstantsAA.undefIsContained())
+ if (PotentialConstantsAA->undefIsContained())
State.unionAssumed({{*UndefValue::get(&Ty), nullptr}, S});
return;
}
@@ -10586,14 +10827,23 @@ struct AAPotentialValuesImpl : AAPotentialValues {
return ChangeStatus::UNCHANGED;
}
- bool getAssumedSimplifiedValues(Attributor &A,
- SmallVectorImpl<AA::ValueAndContext> &Values,
- AA::ValueScope S) const override {
+ bool getAssumedSimplifiedValues(
+ Attributor &A, SmallVectorImpl<AA::ValueAndContext> &Values,
+ AA::ValueScope S, bool RecurseForSelectAndPHI = false) const override {
if (!isValidState())
return false;
+ bool UsedAssumedInformation = false;
for (const auto &It : getAssumedSet())
- if (It.second & S)
+ if (It.second & S) {
+ if (RecurseForSelectAndPHI && (isa<PHINode>(It.first.getValue()) ||
+ isa<SelectInst>(It.first.getValue()))) {
+ if (A.getAssumedSimplifiedValues(
+ IRPosition::inst(*cast<Instruction>(It.first.getValue())),
+ this, Values, S, UsedAssumedInformation))
+ continue;
+ }
Values.push_back(It.first);
+ }
assert(!undefIsContained() && "Undef should be an explicit value!");
return true;
}
@@ -10607,7 +10857,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
ChangeStatus updateImpl(Attributor &A) override {
auto AssumedBefore = getAssumed();
- genericValueTraversal(A);
+ genericValueTraversal(A, &getAssociatedValue());
return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
@@ -10677,9 +10927,11 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
// The index is the operand that we assume is not null.
unsigned PtrIdx = LHSIsNull;
- auto &PtrNonNullAA = A.getAAFor<AANonNull>(
- *this, IRPosition::value(*(PtrIdx ? RHS : LHS)), DepClassTy::REQUIRED);
- if (!PtrNonNullAA.isAssumedNonNull())
+ bool IsKnownNonNull;
+ bool IsAssumedNonNull = AA::hasAssumedIRAttr<Attribute::NonNull>(
+ A, this, IRPosition::value(*(PtrIdx ? RHS : LHS)), DepClassTy::REQUIRED,
+ IsKnownNonNull);
+ if (!IsAssumedNonNull)
return false;
// The new value depends on the predicate, true for != and false for ==.
@@ -10743,7 +10995,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
InformationCache &InfoCache = A.getInfoCache();
if (InfoCache.isOnlyUsedByAssume(LI)) {
if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) {
- if (!I)
+ if (!I || isa<AssumeInst>(I))
return true;
if (auto *SI = dyn_cast<StoreInst>(I))
return A.isAssumedDead(SI->getOperandUse(0), this,
@@ -10797,21 +11049,37 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
auto GetLivenessInfo = [&](const Function &F) -> LivenessInfo & {
LivenessInfo &LI = LivenessAAs[&F];
if (!LI.LivenessAA)
- LI.LivenessAA = &A.getAAFor<AAIsDead>(*this, IRPosition::function(F),
- DepClassTy::NONE);
+ LI.LivenessAA = A.getAAFor<AAIsDead>(*this, IRPosition::function(F),
+ DepClassTy::NONE);
return LI;
};
if (&PHI == &getAssociatedValue()) {
LivenessInfo &LI = GetLivenessInfo(*PHI.getFunction());
+ const auto *CI =
+ A.getInfoCache().getAnalysisResultForFunction<CycleAnalysis>(
+ *PHI.getFunction());
+
+ Cycle *C = nullptr;
+ bool CyclePHI = mayBeInCycle(CI, &PHI, /* HeaderOnly */ true, &C);
for (unsigned u = 0, e = PHI.getNumIncomingValues(); u < e; u++) {
BasicBlock *IncomingBB = PHI.getIncomingBlock(u);
- if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI.getParent())) {
+ if (LI.LivenessAA &&
+ LI.LivenessAA->isEdgeDead(IncomingBB, PHI.getParent())) {
LI.AnyDead = true;
continue;
}
- Worklist.push_back(
- {{*PHI.getIncomingValue(u), IncomingBB->getTerminator()}, II.S});
+ Value *V = PHI.getIncomingValue(u);
+ if (V == &PHI)
+ continue;
+
+ // If the incoming value is not the PHI but an instruction in the same
+ // cycle we might have multiple versions of it flying around.
+ if (CyclePHI && isa<Instruction>(V) &&
+ (!C || C->contains(cast<Instruction>(V)->getParent())))
+ return false;
+
+ Worklist.push_back({{*V, IncomingBB->getTerminator()}, II.S});
}
return true;
}
@@ -10866,11 +11134,10 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
InfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(*F);
const auto *TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
auto *AC = InfoCache.getAnalysisResultForFunction<AssumptionAnalysis>(*F);
- OptimizationRemarkEmitter *ORE = nullptr;
const DataLayout &DL = I.getModule()->getDataLayout();
SimplifyQuery Q(DL, TLI, DT, AC, &I);
- Value *NewV = simplifyInstructionWithOperands(&I, NewOps, Q, ORE);
+ Value *NewV = simplifyInstructionWithOperands(&I, NewOps, Q);
if (!NewV || NewV == &I)
return false;
@@ -10902,10 +11169,9 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
return false;
}
- void genericValueTraversal(Attributor &A) {
+ void genericValueTraversal(Attributor &A, Value *InitialV) {
SmallMapVector<const Function *, LivenessInfo, 4> LivenessAAs;
- Value *InitialV = &getAssociatedValue();
SmallSet<ItemInfo, 16> Visited;
SmallVector<ItemInfo, 16> Worklist;
Worklist.push_back({{*InitialV, getCtxI()}, AA::AnyScope});
@@ -10937,14 +11203,15 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl {
if (V->getType()->isPointerTy()) {
NewV = AA::getWithType(*V->stripPointerCasts(), *V->getType());
} else {
- auto *CB = dyn_cast<CallBase>(V);
- if (CB && CB->getCalledFunction()) {
- for (Argument &Arg : CB->getCalledFunction()->args())
- if (Arg.hasReturnedAttr()) {
- NewV = CB->getArgOperand(Arg.getArgNo());
- break;
- }
- }
+ if (auto *CB = dyn_cast<CallBase>(V))
+ if (auto *Callee =
+ dyn_cast_if_present<Function>(CB->getCalledOperand())) {
+ for (Argument &Arg : Callee->args())
+ if (Arg.hasReturnedAttr()) {
+ NewV = CB->getArgOperand(Arg.getArgNo());
+ break;
+ }
+ }
}
if (NewV && NewV != V) {
Worklist.push_back({{*NewV, CtxI}, S});
@@ -11062,25 +11329,127 @@ struct AAPotentialValuesArgument final : AAPotentialValuesImpl {
}
};
-struct AAPotentialValuesReturned
- : AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl> {
- using Base =
- AAReturnedFromReturnedValues<AAPotentialValues, AAPotentialValuesImpl>;
+struct AAPotentialValuesReturned : public AAPotentialValuesFloating {
+ using Base = AAPotentialValuesFloating;
AAPotentialValuesReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
/// See AbstractAttribute::initialize(..).
void initialize(Attributor &A) override {
- if (A.hasSimplificationCallback(getIRPosition()))
+ Function *F = getAssociatedFunction();
+ if (!F || F->isDeclaration() || F->getReturnType()->isVoidTy()) {
indicatePessimisticFixpoint();
- else
- AAPotentialValues::initialize(A);
+ return;
+ }
+
+ for (Argument &Arg : F->args())
+ if (Arg.hasReturnedAttr()) {
+ addValue(A, getState(), Arg, nullptr, AA::AnyScope, F);
+ ReturnedArg = &Arg;
+ break;
+ }
+ if (!A.isFunctionIPOAmendable(*F) ||
+ A.hasSimplificationCallback(getIRPosition())) {
+ if (!ReturnedArg)
+ indicatePessimisticFixpoint();
+ else
+ indicateOptimisticFixpoint();
+ }
+ }
+
+ /// See AbstractAttribute::updateImpl(...).
+ ChangeStatus updateImpl(Attributor &A) override {
+ auto AssumedBefore = getAssumed();
+ bool UsedAssumedInformation = false;
+
+ SmallVector<AA::ValueAndContext> Values;
+ Function *AnchorScope = getAnchorScope();
+ auto HandleReturnedValue = [&](Value &V, Instruction *CtxI,
+ bool AddValues) {
+ for (AA::ValueScope S : {AA::Interprocedural, AA::Intraprocedural}) {
+ Values.clear();
+ if (!A.getAssumedSimplifiedValues(IRPosition::value(V), this, Values, S,
+ UsedAssumedInformation,
+ /* RecurseForSelectAndPHI */ true))
+ return false;
+ if (!AddValues)
+ continue;
+ for (const AA::ValueAndContext &VAC : Values)
+ addValue(A, getState(), *VAC.getValue(),
+ VAC.getCtxI() ? VAC.getCtxI() : CtxI, S, AnchorScope);
+ }
+ return true;
+ };
+
+ if (ReturnedArg) {
+ HandleReturnedValue(*ReturnedArg, nullptr, true);
+ } else {
+ auto RetInstPred = [&](Instruction &RetI) {
+ bool AddValues = true;
+ if (isa<PHINode>(RetI.getOperand(0)) ||
+ isa<SelectInst>(RetI.getOperand(0))) {
+ addValue(A, getState(), *RetI.getOperand(0), &RetI, AA::AnyScope,
+ AnchorScope);
+ AddValues = false;
+ }
+ return HandleReturnedValue(*RetI.getOperand(0), &RetI, AddValues);
+ };
+
+ if (!A.checkForAllInstructions(RetInstPred, *this, {Instruction::Ret},
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ true))
+ return indicatePessimisticFixpoint();
+ }
+
+ return (AssumedBefore == getAssumed()) ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ void addValue(Attributor &A, StateType &State, Value &V,
+ const Instruction *CtxI, AA::ValueScope S,
+ Function *AnchorScope) const override {
+ Function *F = getAssociatedFunction();
+ if (auto *CB = dyn_cast<CallBase>(&V))
+ if (CB->getCalledOperand() == F)
+ return;
+ Base::addValue(A, State, V, CtxI, S, AnchorScope);
}
ChangeStatus manifest(Attributor &A) override {
- // We queried AAValueSimplify for the returned values so they will be
- // replaced if a simplified form was found. Nothing to do here.
- return ChangeStatus::UNCHANGED;
+ if (ReturnedArg)
+ return ChangeStatus::UNCHANGED;
+ SmallVector<AA::ValueAndContext> Values;
+ if (!getAssumedSimplifiedValues(A, Values, AA::ValueScope::Intraprocedural,
+ /* RecurseForSelectAndPHI */ true))
+ return ChangeStatus::UNCHANGED;
+ Value *NewVal = getSingleValue(A, *this, getIRPosition(), Values);
+ if (!NewVal)
+ return ChangeStatus::UNCHANGED;
+
+ ChangeStatus Changed = ChangeStatus::UNCHANGED;
+ if (auto *Arg = dyn_cast<Argument>(NewVal)) {
+ STATS_DECLTRACK(UniqueReturnValue, FunctionReturn,
+ "Number of function with unique return");
+ Changed |= A.manifestAttrs(
+ IRPosition::argument(*Arg),
+ {Attribute::get(Arg->getContext(), Attribute::Returned)});
+ STATS_DECLTRACK_ARG_ATTR(returned);
+ }
+
+ auto RetInstPred = [&](Instruction &RetI) {
+ Value *RetOp = RetI.getOperand(0);
+ if (isa<UndefValue>(RetOp) || RetOp == NewVal)
+ return true;
+ if (AA::isValidAtPosition({*NewVal, RetI}, A.getInfoCache()))
+ if (A.changeUseAfterManifest(RetI.getOperandUse(0), *NewVal))
+ Changed = ChangeStatus::CHANGED;
+ return true;
+ };
+ bool UsedAssumedInformation = false;
+ (void)A.checkForAllInstructions(RetInstPred, *this, {Instruction::Ret},
+ UsedAssumedInformation,
+ /* CheckBBLivenessOnly */ true);
+ return Changed;
}
ChangeStatus indicatePessimisticFixpoint() override {
@@ -11088,9 +11457,11 @@ struct AAPotentialValuesReturned
}
/// See AbstractAttribute::trackStatistics()
- void trackStatistics() const override {
- STATS_DECLTRACK_FNRET_ATTR(potential_values)
- }
+ void trackStatistics() const override{
+ STATS_DECLTRACK_FNRET_ATTR(potential_values)}
+
+ /// The argumented with an existing `returned` attribute.
+ Argument *ReturnedArg = nullptr;
};
struct AAPotentialValuesFunction : AAPotentialValuesImpl {
@@ -11162,7 +11533,7 @@ struct AAPotentialValuesCallSiteReturned : AAPotentialValuesImpl {
SmallVector<AA::ValueAndContext> ArgValues;
IRPosition IRP = IRPosition::value(*V);
if (auto *Arg = dyn_cast<Argument>(V))
- if (Arg->getParent() == CB->getCalledFunction())
+ if (Arg->getParent() == CB->getCalledOperand())
IRP = IRPosition::callsite_argument(*CB, Arg->getArgNo());
if (recurseForValue(A, IRP, AA::AnyScope))
continue;
@@ -11228,12 +11599,26 @@ struct AAAssumptionInfoImpl : public AAAssumptionInfo {
const DenseSet<StringRef> &Known)
: AAAssumptionInfo(IRP, A, Known) {}
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ // Don't manifest a universal set if it somehow made it here.
+ if (getKnown().isUniversal())
+ return ChangeStatus::UNCHANGED;
+
+ const IRPosition &IRP = getIRPosition();
+ return A.manifestAttrs(
+ IRP,
+ Attribute::get(IRP.getAnchorValue().getContext(), AssumptionAttrKey,
+ llvm::join(getAssumed().getSet(), ",")),
+ /* ForceReplace */ true);
+ }
+
bool hasAssumption(const StringRef Assumption) const override {
return isValidState() && setContains(Assumption);
}
/// See AbstractAttribute::getAsStr()
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
const SetContents &Known = getKnown();
const SetContents &Assumed = getAssumed();
@@ -11264,31 +11649,18 @@ struct AAAssumptionInfoFunction final : AAAssumptionInfoImpl {
: AAAssumptionInfoImpl(IRP, A,
getAssumptions(*IRP.getAssociatedFunction())) {}
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- const auto &Assumptions = getKnown();
-
- // Don't manifest a universal set if it somehow made it here.
- if (Assumptions.isUniversal())
- return ChangeStatus::UNCHANGED;
-
- Function *AssociatedFunction = getAssociatedFunction();
-
- bool Changed = addAssumptions(*AssociatedFunction, Assumptions.getSet());
-
- return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
bool Changed = false;
auto CallSitePred = [&](AbstractCallSite ACS) {
- const auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
+ const auto *AssumptionAA = A.getAAFor<AAAssumptionInfo>(
*this, IRPosition::callsite_function(*ACS.getInstruction()),
DepClassTy::REQUIRED);
+ if (!AssumptionAA)
+ return false;
// Get the set of assumptions shared by all of this function's callers.
- Changed |= getIntersection(AssumptionAA.getAssumed());
+ Changed |= getIntersection(AssumptionAA->getAssumed());
return !getAssumed().empty() || !getKnown().empty();
};
@@ -11319,24 +11691,14 @@ struct AAAssumptionInfoCallSite final : AAAssumptionInfoImpl {
A.getAAFor<AAAssumptionInfo>(*this, FnPos, DepClassTy::REQUIRED);
}
- /// See AbstractAttribute::manifest(...).
- ChangeStatus manifest(Attributor &A) override {
- // Don't manifest a universal set if it somehow made it here.
- if (getKnown().isUniversal())
- return ChangeStatus::UNCHANGED;
-
- CallBase &AssociatedCall = cast<CallBase>(getAssociatedValue());
- bool Changed = addAssumptions(AssociatedCall, getAssumed().getSet());
-
- return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
- }
-
/// See AbstractAttribute::updateImpl(...).
ChangeStatus updateImpl(Attributor &A) override {
const IRPosition &FnPos = IRPosition::function(*getAnchorScope());
- auto &AssumptionAA =
+ auto *AssumptionAA =
A.getAAFor<AAAssumptionInfo>(*this, FnPos, DepClassTy::REQUIRED);
- bool Changed = getIntersection(AssumptionAA.getAssumed());
+ if (!AssumptionAA)
+ return indicatePessimisticFixpoint();
+ bool Changed = getIntersection(AssumptionAA->getAssumed());
return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
}
@@ -11360,7 +11722,7 @@ private:
AACallGraphNode *AACallEdgeIterator::operator*() const {
return static_cast<AACallGraphNode *>(const_cast<AACallEdges *>(
- &A.getOrCreateAAFor<AACallEdges>(IRPosition::function(**I))));
+ A.getOrCreateAAFor<AACallEdges>(IRPosition::function(**I))));
}
void AttributorCallGraph::print() { llvm::WriteGraph(outs(), this); }
@@ -11374,7 +11736,7 @@ struct AAUnderlyingObjectsImpl
AAUnderlyingObjectsImpl(const IRPosition &IRP, Attributor &A) : BaseTy(IRP) {}
/// See AbstractAttribute::getAsStr().
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *A) const override {
return std::string("UnderlyingObjects ") +
(isValidState()
? (std::string("inter #") +
@@ -11409,24 +11771,33 @@ struct AAUnderlyingObjectsImpl
auto *Obj = VAC.getValue();
Value *UO = getUnderlyingObject(Obj);
if (UO && UO != VAC.getValue() && SeenObjects.insert(UO).second) {
- const auto &OtherAA = A.getAAFor<AAUnderlyingObjects>(
+ const auto *OtherAA = A.getAAFor<AAUnderlyingObjects>(
*this, IRPosition::value(*UO), DepClassTy::OPTIONAL);
auto Pred = [&Values](Value &V) {
Values.emplace_back(V, nullptr);
return true;
};
- if (!OtherAA.forallUnderlyingObjects(Pred, Scope))
+ if (!OtherAA || !OtherAA->forallUnderlyingObjects(Pred, Scope))
llvm_unreachable(
"The forall call should not return false at this position");
continue;
}
- if (isa<SelectInst>(Obj) || isa<PHINode>(Obj)) {
+ if (isa<SelectInst>(Obj)) {
Changed |= handleIndirect(A, *Obj, UnderlyingObjects, Scope);
continue;
}
+ if (auto *PHI = dyn_cast<PHINode>(Obj)) {
+ // Explicitly look through PHIs as we do not care about dynamically
+ // uniqueness.
+ for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) {
+ Changed |= handleIndirect(A, *PHI->getIncomingValue(u),
+ UnderlyingObjects, Scope);
+ }
+ continue;
+ }
Changed |= UnderlyingObjects.insert(Obj);
}
@@ -11464,13 +11835,13 @@ private:
SmallSetVector<Value *, 8> &UnderlyingObjects,
AA::ValueScope Scope) {
bool Changed = false;
- const auto &AA = A.getAAFor<AAUnderlyingObjects>(
+ const auto *AA = A.getAAFor<AAUnderlyingObjects>(
*this, IRPosition::value(V), DepClassTy::OPTIONAL);
auto Pred = [&](Value &V) {
Changed |= UnderlyingObjects.insert(&V);
return true;
};
- if (!AA.forallUnderlyingObjects(Pred, Scope))
+ if (!AA || !AA->forallUnderlyingObjects(Pred, Scope))
llvm_unreachable(
"The forall call should not return false at this position");
return Changed;
@@ -11516,14 +11887,190 @@ struct AAUnderlyingObjectsFunction final : AAUnderlyingObjectsImpl {
AAUnderlyingObjectsFunction(const IRPosition &IRP, Attributor &A)
: AAUnderlyingObjectsImpl(IRP, A) {}
};
-}
+} // namespace
+
+/// ------------------------ Address Space ------------------------------------
+namespace {
+struct AAAddressSpaceImpl : public AAAddressSpace {
+ AAAddressSpaceImpl(const IRPosition &IRP, Attributor &A)
+ : AAAddressSpace(IRP, A) {}
+
+ int32_t getAddressSpace() const override {
+ assert(isValidState() && "the AA is invalid");
+ return AssumedAddressSpace;
+ }
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ assert(getAssociatedType()->isPtrOrPtrVectorTy() &&
+ "Associated value is not a pointer");
+ }
+
+ ChangeStatus updateImpl(Attributor &A) override {
+ int32_t OldAddressSpace = AssumedAddressSpace;
+ auto *AUO = A.getOrCreateAAFor<AAUnderlyingObjects>(getIRPosition(), this,
+ DepClassTy::REQUIRED);
+ auto Pred = [&](Value &Obj) {
+ if (isa<UndefValue>(&Obj))
+ return true;
+ return takeAddressSpace(Obj.getType()->getPointerAddressSpace());
+ };
+
+ if (!AUO->forallUnderlyingObjects(Pred))
+ return indicatePessimisticFixpoint();
+
+ return OldAddressSpace == AssumedAddressSpace ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ }
+
+ /// See AbstractAttribute::manifest(...).
+ ChangeStatus manifest(Attributor &A) override {
+ Value *AssociatedValue = &getAssociatedValue();
+ Value *OriginalValue = peelAddrspacecast(AssociatedValue);
+ if (getAddressSpace() == NoAddressSpace ||
+ static_cast<uint32_t>(getAddressSpace()) ==
+ getAssociatedType()->getPointerAddressSpace())
+ return ChangeStatus::UNCHANGED;
+
+ Type *NewPtrTy = PointerType::get(getAssociatedType()->getContext(),
+ static_cast<uint32_t>(getAddressSpace()));
+ bool UseOriginalValue =
+ OriginalValue->getType()->getPointerAddressSpace() ==
+ static_cast<uint32_t>(getAddressSpace());
+
+ bool Changed = false;
+
+ auto MakeChange = [&](Instruction *I, Use &U) {
+ Changed = true;
+ if (UseOriginalValue) {
+ A.changeUseAfterManifest(U, *OriginalValue);
+ return;
+ }
+ Instruction *CastInst = new AddrSpaceCastInst(OriginalValue, NewPtrTy);
+ CastInst->insertBefore(cast<Instruction>(I));
+ A.changeUseAfterManifest(U, *CastInst);
+ };
+
+ auto Pred = [&](const Use &U, bool &) {
+ if (U.get() != AssociatedValue)
+ return true;
+ auto *Inst = dyn_cast<Instruction>(U.getUser());
+ if (!Inst)
+ return true;
+ // This is a WA to make sure we only change uses from the corresponding
+ // CGSCC if the AA is run on CGSCC instead of the entire module.
+ if (!A.isRunOn(Inst->getFunction()))
+ return true;
+ if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
+ MakeChange(Inst, const_cast<Use &>(U));
+ return true;
+ };
+
+ // It doesn't matter if we can't check all uses as we can simply
+ // conservatively ignore those that can not be visited.
+ (void)A.checkForAllUses(Pred, *this, getAssociatedValue(),
+ /* CheckBBLivenessOnly */ true);
+
+ return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
+ }
+
+ /// See AbstractAttribute::getAsStr().
+ const std::string getAsStr(Attributor *A) const override {
+ if (!isValidState())
+ return "addrspace(<invalid>)";
+ return "addrspace(" +
+ (AssumedAddressSpace == NoAddressSpace
+ ? "none"
+ : std::to_string(AssumedAddressSpace)) +
+ ")";
+ }
+
+private:
+ int32_t AssumedAddressSpace = NoAddressSpace;
+
+ bool takeAddressSpace(int32_t AS) {
+ if (AssumedAddressSpace == NoAddressSpace) {
+ AssumedAddressSpace = AS;
+ return true;
+ }
+ return AssumedAddressSpace == AS;
+ }
+
+ static Value *peelAddrspacecast(Value *V) {
+ if (auto *I = dyn_cast<AddrSpaceCastInst>(V))
+ return peelAddrspacecast(I->getPointerOperand());
+ if (auto *C = dyn_cast<ConstantExpr>(V))
+ if (C->getOpcode() == Instruction::AddrSpaceCast)
+ return peelAddrspacecast(C->getOperand(0));
+ return V;
+ }
+};
+
+struct AAAddressSpaceFloating final : AAAddressSpaceImpl {
+ AAAddressSpaceFloating(const IRPosition &IRP, Attributor &A)
+ : AAAddressSpaceImpl(IRP, A) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FLOATING_ATTR(addrspace);
+ }
+};
+
+struct AAAddressSpaceReturned final : AAAddressSpaceImpl {
+ AAAddressSpaceReturned(const IRPosition &IRP, Attributor &A)
+ : AAAddressSpaceImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // TODO: we don't rewrite function argument for now because it will need to
+ // rewrite the function signature and all call sites.
+ (void)indicatePessimisticFixpoint();
+ }
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_FNRET_ATTR(addrspace);
+ }
+};
+
+struct AAAddressSpaceCallSiteReturned final : AAAddressSpaceImpl {
+ AAAddressSpaceCallSiteReturned(const IRPosition &IRP, Attributor &A)
+ : AAAddressSpaceImpl(IRP, A) {}
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSRET_ATTR(addrspace);
+ }
+};
+
+struct AAAddressSpaceArgument final : AAAddressSpaceImpl {
+ AAAddressSpaceArgument(const IRPosition &IRP, Attributor &A)
+ : AAAddressSpaceImpl(IRP, A) {}
+
+ void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(addrspace); }
+};
+
+struct AAAddressSpaceCallSiteArgument final : AAAddressSpaceImpl {
+ AAAddressSpaceCallSiteArgument(const IRPosition &IRP, Attributor &A)
+ : AAAddressSpaceImpl(IRP, A) {}
+
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+ // TODO: we don't rewrite call site argument for now because it will need to
+ // rewrite the function signature of the callee.
+ (void)indicatePessimisticFixpoint();
+ }
+
+ void trackStatistics() const override {
+ STATS_DECLTRACK_CSARG_ATTR(addrspace);
+ }
+};
+} // namespace
-const char AAReturnedValues::ID = 0;
const char AANoUnwind::ID = 0;
const char AANoSync::ID = 0;
const char AANoFree::ID = 0;
const char AANonNull::ID = 0;
+const char AAMustProgress::ID = 0;
const char AANoRecurse::ID = 0;
+const char AANonConvergent::ID = 0;
const char AAWillReturn::ID = 0;
const char AAUndefinedBehavior::ID = 0;
const char AANoAlias::ID = 0;
@@ -11543,11 +12090,13 @@ const char AAValueConstantRange::ID = 0;
const char AAPotentialConstantValues::ID = 0;
const char AAPotentialValues::ID = 0;
const char AANoUndef::ID = 0;
+const char AANoFPClass::ID = 0;
const char AACallEdges::ID = 0;
const char AAInterFnReachability::ID = 0;
const char AAPointerInfo::ID = 0;
const char AAAssumptionInfo::ID = 0;
const char AAUnderlyingObjects::ID = 0;
+const char AAAddressSpace::ID = 0;
// Macro magic to create the static generator function for attributes that
// follow the naming scheme.
@@ -11647,10 +12196,10 @@ CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoSync)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoRecurse)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAWillReturn)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoReturn)
-CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMemoryLocation)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AACallEdges)
CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAssumptionInfo)
+CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAMustProgress)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
@@ -11663,7 +12212,9 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialConstantValues)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoFPClass)
CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAddressSpace)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)
@@ -11672,6 +12223,7 @@ CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUnderlyingObjects)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior)
+CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonConvergent)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIntraFnReachability)
CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInterFnReachability)
diff --git a/llvm/lib/Transforms/IPO/BlockExtractor.cpp b/llvm/lib/Transforms/IPO/BlockExtractor.cpp
index a68cf7db7c85..0c406aa9822e 100644
--- a/llvm/lib/Transforms/IPO/BlockExtractor.cpp
+++ b/llvm/lib/Transforms/IPO/BlockExtractor.cpp
@@ -17,8 +17,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MemoryBuffer.h"
diff --git a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
index 64bfcb2a9a9f..2c8756c07f87 100644
--- a/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
+++ b/llvm/lib/Transforms/IPO/CalledValuePropagation.cpp
@@ -21,8 +21,6 @@
#include "llvm/Analysis/ValueLatticeUtils.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/MDBuilder.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
@@ -405,33 +403,3 @@ PreservedAnalyses CalledValuePropagationPass::run(Module &M,
runCVP(M);
return PreservedAnalyses::all();
}
-
-namespace {
-class CalledValuePropagationLegacyPass : public ModulePass {
-public:
- static char ID;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-
- CalledValuePropagationLegacyPass() : ModulePass(ID) {
- initializeCalledValuePropagationLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
- return runCVP(M);
- }
-};
-} // namespace
-
-char CalledValuePropagationLegacyPass::ID = 0;
-INITIALIZE_PASS(CalledValuePropagationLegacyPass, "called-value-propagation",
- "Called Value Propagation", false, false)
-
-ModulePass *llvm::createCalledValuePropagationPass() {
- return new CalledValuePropagationLegacyPass();
-}
diff --git a/llvm/lib/Transforms/IPO/ConstantMerge.cpp b/llvm/lib/Transforms/IPO/ConstantMerge.cpp
index 77bc377f4514..29052c8d997e 100644
--- a/llvm/lib/Transforms/IPO/ConstantMerge.cpp
+++ b/llvm/lib/Transforms/IPO/ConstantMerge.cpp
@@ -28,8 +28,6 @@
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/IPO.h"
#include <algorithm>
@@ -251,32 +249,3 @@ PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) {
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
-
-namespace {
-
-struct ConstantMergeLegacyPass : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
-
- ConstantMergeLegacyPass() : ModulePass(ID) {
- initializeConstantMergeLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- // For this pass, process all of the globals in the module, eliminating
- // duplicate constants.
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
- return mergeConstants(M);
- }
-};
-
-} // end anonymous namespace
-
-char ConstantMergeLegacyPass::ID = 0;
-
-INITIALIZE_PASS(ConstantMergeLegacyPass, "constmerge",
- "Merge Duplicate Global Constants", false, false)
-
-ModulePass *llvm::createConstantMergePass() {
- return new ConstantMergeLegacyPass();
-}
diff --git a/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
index 4fe7bb6c757c..93d15f59a036 100644
--- a/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -14,7 +14,6 @@
#include "llvm/Transforms/IPO/CrossDSOCFI.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalObject.h"
@@ -23,8 +22,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
@@ -35,28 +33,16 @@ STATISTIC(NumTypeIds, "Number of unique type identifiers");
namespace {
-struct CrossDSOCFI : public ModulePass {
- static char ID;
- CrossDSOCFI() : ModulePass(ID) {
- initializeCrossDSOCFIPass(*PassRegistry::getPassRegistry());
- }
-
+struct CrossDSOCFI {
MDNode *VeryLikelyWeights;
ConstantInt *extractNumericTypeId(MDNode *MD);
void buildCFICheck(Module &M);
- bool runOnModule(Module &M) override;
+ bool runOnModule(Module &M);
};
} // anonymous namespace
-INITIALIZE_PASS_BEGIN(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false,
- false)
-INITIALIZE_PASS_END(CrossDSOCFI, "cross-dso-cfi", "Cross-DSO CFI", false, false)
-char CrossDSOCFI::ID = 0;
-
-ModulePass *llvm::createCrossDSOCFIPass() { return new CrossDSOCFI; }
-
/// Extracts a numeric type identifier from an MDNode containing type metadata.
ConstantInt *CrossDSOCFI::extractNumericTypeId(MDNode *MD) {
// This check excludes vtables for classes inside anonymous namespaces.
diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index bf2c65a2402c..01834015f3fd 100644
--- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -16,9 +16,11 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Argument.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -43,7 +45,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <cassert>
#include <utility>
@@ -85,6 +86,11 @@ public:
virtual bool shouldHackArguments() const { return false; }
};
+bool isMustTailCalleeAnalyzable(const CallBase &CB) {
+ assert(CB.isMustTailCall());
+ return CB.getCalledFunction() && !CB.getCalledFunction()->isDeclaration();
+}
+
} // end anonymous namespace
char DAE::ID = 0;
@@ -520,8 +526,16 @@ void DeadArgumentEliminationPass::surveyFunction(const Function &F) {
for (const BasicBlock &BB : F) {
// If we have any returns of `musttail` results - the signature can't
// change
- if (BB.getTerminatingMustTailCall() != nullptr)
+ if (const auto *TC = BB.getTerminatingMustTailCall()) {
HasMustTailCalls = true;
+ // In addition, if the called function is not locally defined (or unknown,
+ // if this is an indirect call), we can't change the callsite and thus
+ // can't change this function's signature either.
+ if (!isMustTailCalleeAnalyzable(*TC)) {
+ markLive(F);
+ return;
+ }
+ }
}
if (HasMustTailCalls) {
@@ -1081,6 +1095,26 @@ bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) {
return true;
}
+void DeadArgumentEliminationPass::propagateVirtMustcallLiveness(
+ const Module &M) {
+ // If a function was marked "live", and it has musttail callers, they in turn
+ // can't change either.
+ LiveFuncSet NewLiveFuncs(LiveFunctions);
+ while (!NewLiveFuncs.empty()) {
+ LiveFuncSet Temp;
+ for (const auto *F : NewLiveFuncs)
+ for (const auto *U : F->users())
+ if (const auto *CB = dyn_cast<CallBase>(U))
+ if (CB->isMustTailCall())
+ if (!LiveFunctions.count(CB->getParent()->getParent()))
+ Temp.insert(CB->getParent()->getParent());
+ NewLiveFuncs.clear();
+ NewLiveFuncs.insert(Temp.begin(), Temp.end());
+ for (const auto *F : Temp)
+ markLive(*F);
+ }
+}
+
PreservedAnalyses DeadArgumentEliminationPass::run(Module &M,
ModuleAnalysisManager &) {
bool Changed = false;
@@ -1101,6 +1135,8 @@ PreservedAnalyses DeadArgumentEliminationPass::run(Module &M,
for (auto &F : M)
surveyFunction(F);
+ propagateVirtMustcallLiveness(M);
+
// Now, remove all dead arguments and return values from each function in
// turn. We use make_early_inc_range here because functions will probably get
// removed (i.e. replaced by new ones).
diff --git a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
index 7f138d206fac..2b34d3b5a56e 100644
--- a/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
+++ b/llvm/lib/Transforms/IPO/ElimAvailExtern.cpp
@@ -12,24 +12,82 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/ElimAvailExtern.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Constant.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/GlobalStatus.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
#define DEBUG_TYPE "elim-avail-extern"
-STATISTIC(NumFunctions, "Number of functions removed");
+cl::opt<bool> ConvertToLocal(
+ "avail-extern-to-local", cl::Hidden,
+ cl::desc("Convert available_externally into locals, renaming them "
+ "to avoid link-time clashes."));
+
+STATISTIC(NumRemovals, "Number of functions removed");
+STATISTIC(NumConversions, "Number of functions converted");
STATISTIC(NumVariables, "Number of global variables removed");
+void deleteFunction(Function &F) {
+ // This will set the linkage to external
+ F.deleteBody();
+ ++NumRemovals;
+}
+
+/// Create a copy of the thinlto import, mark it local, and redirect direct
+/// calls to the copy. Only direct calls are replaced, so that e.g. indirect
+/// call function pointer tests would use the global identity of the function.
+///
+/// Currently, Value Profiling ("VP") MD_prof data isn't updated to refer to the
+/// clone's GUID (which will be different, because the name and linkage is
+/// different), under the assumption that the last consumer of this data is
+/// upstream the pipeline (e.g. ICP).
+static void convertToLocalCopy(Module &M, Function &F) {
+ assert(F.hasAvailableExternallyLinkage());
+ assert(!F.isDeclaration());
+ // If we can't find a single use that's a call, just delete the function.
+ if (F.uses().end() == llvm::find_if(F.uses(), [&](Use &U) {
+ return isa<CallBase>(U.getUser());
+ }))
+ return deleteFunction(F);
+
+ auto OrigName = F.getName().str();
+ // Build a new name. We still need the old name (see below).
+ // We could just rely on internal linking allowing 2 modules have internal
+ // functions with the same name, but that just creates more trouble than
+ // necessary e.g. distinguishing profiles or debugging. Instead, we append the
+ // module identifier.
+ auto NewName = OrigName + ".__uniq" + getUniqueModuleId(&M);
+ F.setName(NewName);
+ if (auto *SP = F.getSubprogram())
+ SP->replaceLinkageName(MDString::get(F.getParent()->getContext(), NewName));
+
+ F.setLinkage(GlobalValue::InternalLinkage);
+ // Now make a declaration for the old name. We'll use it if there are non-call
+ // uses. For those, it would be incorrect to replace them with the local copy:
+ // for example, one such use could be taking the address of the function and
+ // passing it to an external function, which, in turn, might compare the
+ // function pointer to the original (non-local) function pointer, e.g. as part
+ // of indirect call promotion.
+ auto *Decl =
+ Function::Create(F.getFunctionType(), GlobalValue::ExternalLinkage,
+ F.getAddressSpace(), OrigName, F.getParent());
+ F.replaceUsesWithIf(Decl,
+ [&](Use &U) { return !isa<CallBase>(U.getUser()); });
+ ++NumConversions;
+}
+
static bool eliminateAvailableExternally(Module &M) {
bool Changed = false;
@@ -45,19 +103,21 @@ static bool eliminateAvailableExternally(Module &M) {
}
GV.removeDeadConstantUsers();
GV.setLinkage(GlobalValue::ExternalLinkage);
- NumVariables++;
+ ++NumVariables;
Changed = true;
}
// Drop the bodies of available externally functions.
- for (Function &F : M) {
- if (!F.hasAvailableExternallyLinkage())
+ for (Function &F : llvm::make_early_inc_range(M)) {
+ if (F.isDeclaration() || !F.hasAvailableExternallyLinkage())
continue;
- if (!F.isDeclaration())
- // This will set the linkage to external
- F.deleteBody();
+
+ if (ConvertToLocal)
+ convertToLocalCopy(M, F);
+ else
+ deleteFunction(F);
+
F.removeDeadConstantUsers();
- NumFunctions++;
Changed = true;
}
@@ -70,33 +130,3 @@ EliminateAvailableExternallyPass::run(Module &M, ModuleAnalysisManager &) {
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
-
-namespace {
-
-struct EliminateAvailableExternallyLegacyPass : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
-
- EliminateAvailableExternallyLegacyPass() : ModulePass(ID) {
- initializeEliminateAvailableExternallyLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- // run - Do the EliminateAvailableExternally pass on the specified module,
- // optionally updating the specified callgraph to reflect the changes.
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
- return eliminateAvailableExternally(M);
- }
-};
-
-} // end anonymous namespace
-
-char EliminateAvailableExternallyLegacyPass::ID = 0;
-
-INITIALIZE_PASS(EliminateAvailableExternallyLegacyPass, "elim-avail-extern",
- "Eliminate Available Externally Globals", false, false)
-
-ModulePass *llvm::createEliminateAvailableExternallyPass() {
- return new EliminateAvailableExternallyLegacyPass();
-}
diff --git a/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp b/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp
new file mode 100644
index 000000000000..fa56a5b564ae
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/EmbedBitcodePass.cpp
@@ -0,0 +1,52 @@
+//===- EmbedBitcodePass.cpp - Pass that embeds the bitcode into a global---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/Bitcode/BitcodeWriterPass.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+#include <memory>
+#include <string>
+
+using namespace llvm;
+
+PreservedAnalyses EmbedBitcodePass::run(Module &M, ModuleAnalysisManager &AM) {
+ if (M.getGlobalVariable("llvm.embedded.module", /*AllowInternal=*/true))
+ report_fatal_error("Can only embed the module once",
+ /*gen_crash_diag=*/false);
+
+ Triple T(M.getTargetTriple());
+ if (T.getObjectFormat() != Triple::ELF)
+ report_fatal_error(
+ "EmbedBitcode pass currently only supports ELF object format",
+ /*gen_crash_diag=*/false);
+
+ std::unique_ptr<Module> NewModule = CloneModule(M);
+ MPM.run(*NewModule, AM);
+
+ std::string Data;
+ raw_string_ostream OS(Data);
+ if (IsThinLTO)
+ ThinLTOBitcodeWriterPass(OS, /*ThinLinkOS=*/nullptr).run(*NewModule, AM);
+ else
+ BitcodeWriterPass(OS, /*ShouldPreserveUseListOrder=*/false, EmitLTOSummary)
+ .run(*NewModule, AM);
+
+ embedBufferInModule(M, MemoryBufferRef(Data, "ModuleData"), ".llvm.lto");
+
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Transforms/IPO/ExtractGV.cpp b/llvm/lib/Transforms/IPO/ExtractGV.cpp
index d5073eed2fef..6414ea69c9f7 100644
--- a/llvm/lib/Transforms/IPO/ExtractGV.cpp
+++ b/llvm/lib/Transforms/IPO/ExtractGV.cpp
@@ -36,7 +36,7 @@ static void makeVisible(GlobalValue &GV, bool Delete) {
}
// Map linkonce* to weak* so that llvm doesn't drop this GV.
- switch(GV.getLinkage()) {
+ switch (GV.getLinkage()) {
default:
llvm_unreachable("Unexpected linkage");
case GlobalValue::LinkOnceAnyLinkage:
@@ -48,10 +48,9 @@ static void makeVisible(GlobalValue &GV, bool Delete) {
}
}
-
- /// If deleteS is true, this pass deletes the specified global values.
- /// Otherwise, it deletes as much of the module as possible, except for the
- /// global values specified.
+/// If deleteS is true, this pass deletes the specified global values.
+/// Otherwise, it deletes as much of the module as possible, except for the
+/// global values specified.
ExtractGVPass::ExtractGVPass(std::vector<GlobalValue *> &GVs, bool deleteS,
bool keepConstInit)
: Named(GVs.begin(), GVs.end()), deleteStuff(deleteS),
@@ -129,5 +128,22 @@ PreservedAnalyses ExtractGVPass::run(Module &M, ModuleAnalysisManager &) {
}
}
+ // Visit the IFuncs.
+ for (GlobalIFunc &IF : llvm::make_early_inc_range(M.ifuncs())) {
+ bool Delete = deleteStuff == (bool)Named.count(&IF);
+ makeVisible(IF, Delete);
+
+ if (!Delete)
+ continue;
+
+ auto *FuncType = dyn_cast<FunctionType>(IF.getValueType());
+ IF.removeFromParent();
+ llvm::Value *Declaration =
+ Function::Create(FuncType, GlobalValue::ExternalLinkage,
+ IF.getAddressSpace(), IF.getName(), &M);
+ IF.replaceAllUsesWith(Declaration);
+ delete &IF;
+ }
+
return PreservedAnalyses::none();
}
diff --git a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index b10c2ea13469..74931e1032d1 100644
--- a/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -9,8 +9,6 @@
#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -80,32 +78,3 @@ PreservedAnalyses ForceFunctionAttrsPass::run(Module &M,
// Just conservatively invalidate analyses, this isn't likely to be important.
return PreservedAnalyses::none();
}
-
-namespace {
-struct ForceFunctionAttrsLegacyPass : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
- ForceFunctionAttrsLegacyPass() : ModulePass(ID) {
- initializeForceFunctionAttrsLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- if (!hasForceAttributes())
- return false;
-
- for (Function &F : M.functions())
- forceAttributes(F);
-
- // Conservatively assume we changed something.
- return true;
- }
-};
-}
-
-char ForceFunctionAttrsLegacyPass::ID = 0;
-INITIALIZE_PASS(ForceFunctionAttrsLegacyPass, "forceattrs",
- "Force set function attributes", false, false)
-
-Pass *llvm::createForceFunctionAttrsLegacyPass() {
- return new ForceFunctionAttrsLegacyPass();
-}
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 3f61dbe3354e..34299f9dbb23 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -50,8 +50,6 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -154,7 +152,7 @@ static MemoryEffects checkFunctionMemoryAccess(Function &F, bool ThisBody,
// If it's not an identified object, it might be an argument.
if (!isIdentifiedObject(UO))
ME |= MemoryEffects::argMemOnly(MR);
- ME |= MemoryEffects(MemoryEffects::Other, MR);
+ ME |= MemoryEffects(IRMemLocation::Other, MR);
};
// Scan the function body for instructions that may read or write memory.
for (Instruction &I : instructions(F)) {
@@ -181,17 +179,17 @@ static MemoryEffects checkFunctionMemoryAccess(Function &F, bool ThisBody,
if (isa<PseudoProbeInst>(I))
continue;
- ME |= CallME.getWithoutLoc(MemoryEffects::ArgMem);
+ ME |= CallME.getWithoutLoc(IRMemLocation::ArgMem);
// If the call accesses captured memory (currently part of "other") and
// an argument is captured (currently not tracked), then it may also
// access argument memory.
- ModRefInfo OtherMR = CallME.getModRef(MemoryEffects::Other);
+ ModRefInfo OtherMR = CallME.getModRef(IRMemLocation::Other);
ME |= MemoryEffects::argMemOnly(OtherMR);
// Check whether all pointer arguments point to local memory, and
// ignore calls that only access local memory.
- ModRefInfo ArgMR = CallME.getModRef(MemoryEffects::ArgMem);
+ ModRefInfo ArgMR = CallME.getModRef(IRMemLocation::ArgMem);
if (ArgMR != ModRefInfo::NoModRef) {
for (const Use &U : Call->args()) {
const Value *Arg = U;
@@ -640,7 +638,7 @@ determinePointerAccessAttrs(Argument *A,
if (Visited.insert(&UU).second)
Worklist.push_back(&UU);
}
-
+
if (CB.doesNotAccessMemory())
continue;
@@ -723,18 +721,18 @@ static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes,
continue;
// There is nothing to do if an argument is already marked as 'returned'.
- if (llvm::any_of(F->args(),
- [](const Argument &Arg) { return Arg.hasReturnedAttr(); }))
+ if (F->getAttributes().hasAttrSomewhere(Attribute::Returned))
continue;
- auto FindRetArg = [&]() -> Value * {
- Value *RetArg = nullptr;
+ auto FindRetArg = [&]() -> Argument * {
+ Argument *RetArg = nullptr;
for (BasicBlock &BB : *F)
if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) {
// Note that stripPointerCasts should look through functions with
// returned arguments.
- Value *RetVal = Ret->getReturnValue()->stripPointerCasts();
- if (!isa<Argument>(RetVal) || RetVal->getType() != F->getReturnType())
+ auto *RetVal =
+ dyn_cast<Argument>(Ret->getReturnValue()->stripPointerCasts());
+ if (!RetVal || RetVal->getType() != F->getReturnType())
return nullptr;
if (!RetArg)
@@ -746,9 +744,8 @@ static void addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes,
return RetArg;
};
- if (Value *RetArg = FindRetArg()) {
- auto *A = cast<Argument>(RetArg);
- A->addAttr(Attribute::Returned);
+ if (Argument *RetArg = FindRetArg()) {
+ RetArg->addAttr(Attribute::Returned);
++NumReturned;
Changed.insert(F);
}
@@ -1379,7 +1376,7 @@ static bool InstrBreaksNonConvergent(Instruction &I,
/// Helper for NoUnwind inference predicate InstrBreaksAttribute.
static bool InstrBreaksNonThrowing(Instruction &I, const SCCNodeSet &SCCNodes) {
- if (!I.mayThrow())
+ if (!I.mayThrow(/* IncludePhaseOneUnwind */ true))
return false;
if (const auto *CI = dyn_cast<CallInst>(&I)) {
if (Function *Callee = CI->getCalledFunction()) {
@@ -1410,6 +1407,61 @@ static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
return true;
}
+// Return true if this is an atomic which has an ordering stronger than
+// unordered. Note that this is different than the predicate we use in
+// Attributor. Here we chose to be conservative and consider monotonic
+// operations potentially synchronizing. We generally don't do much with
+// monotonic operations, so this is simply risk reduction.
+static bool isOrderedAtomic(Instruction *I) {
+ if (!I->isAtomic())
+ return false;
+
+ if (auto *FI = dyn_cast<FenceInst>(I))
+ // All legal orderings for fence are stronger than monotonic.
+ return FI->getSyncScopeID() != SyncScope::SingleThread;
+ else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I))
+ return true;
+ else if (auto *SI = dyn_cast<StoreInst>(I))
+ return !SI->isUnordered();
+ else if (auto *LI = dyn_cast<LoadInst>(I))
+ return !LI->isUnordered();
+ else {
+ llvm_unreachable("unknown atomic instruction?");
+ }
+}
+
+static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
+ // Volatile may synchronize
+ if (I.isVolatile())
+ return true;
+
+ // An ordered atomic may synchronize. (See comment about on monotonic.)
+ if (isOrderedAtomic(&I))
+ return true;
+
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ // Non call site cases covered by the two checks above
+ return false;
+
+ if (CB->hasFnAttr(Attribute::NoSync))
+ return false;
+
+ // Non volatile memset/memcpy/memmoves are nosync
+ // NOTE: Only intrinsics with volatile flags should be handled here. All
+ // others should be marked in Intrinsics.td.
+ if (auto *MI = dyn_cast<MemIntrinsic>(&I))
+ if (!MI->isVolatile())
+ return false;
+
+ // Speculatively assume in SCC.
+ if (Function *Callee = CB->getCalledFunction())
+ if (SCCNodes.contains(Callee))
+ return false;
+
+ return true;
+}
+
/// Attempt to remove convergent function attribute when possible.
///
/// Returns true if any changes to function attributes were made.
@@ -1441,9 +1493,7 @@ static void inferConvergent(const SCCNodeSet &SCCNodes,
}
/// Infer attributes from all functions in the SCC by scanning every
-/// instruction for compliance to the attribute assumptions. Currently it
-/// does:
-/// - addition of NoUnwind attribute
+/// instruction for compliance to the attribute assumptions.
///
/// Returns true if any changes to function attributes were made.
static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
@@ -1495,6 +1545,22 @@ static void inferAttrsFromFunctionBodies(const SCCNodeSet &SCCNodes,
},
/* RequiresExactDefinition= */ true});
+ AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
+ Attribute::NoSync,
+ // Skip already marked functions.
+ [](const Function &F) { return F.hasNoSync(); },
+ // Instructions that break nosync assumption.
+ [&SCCNodes](Instruction &I) {
+ return InstrBreaksNoSync(I, SCCNodes);
+ },
+ [](Function &F) {
+ LLVM_DEBUG(dbgs()
+ << "Adding nosync attr to fn " << F.getName() << "\n");
+ F.setNoSync();
+ ++NumNoSync;
+ },
+ /* RequiresExactDefinition= */ true});
+
// Perform all the requested attribute inference actions.
AI.run(SCCNodes, Changed);
}
@@ -1622,83 +1688,6 @@ static void addWillReturn(const SCCNodeSet &SCCNodes,
}
}
-// Return true if this is an atomic which has an ordering stronger than
-// unordered. Note that this is different than the predicate we use in
-// Attributor. Here we chose to be conservative and consider monotonic
-// operations potentially synchronizing. We generally don't do much with
-// monotonic operations, so this is simply risk reduction.
-static bool isOrderedAtomic(Instruction *I) {
- if (!I->isAtomic())
- return false;
-
- if (auto *FI = dyn_cast<FenceInst>(I))
- // All legal orderings for fence are stronger than monotonic.
- return FI->getSyncScopeID() != SyncScope::SingleThread;
- else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I))
- return true;
- else if (auto *SI = dyn_cast<StoreInst>(I))
- return !SI->isUnordered();
- else if (auto *LI = dyn_cast<LoadInst>(I))
- return !LI->isUnordered();
- else {
- llvm_unreachable("unknown atomic instruction?");
- }
-}
-
-static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
- // Volatile may synchronize
- if (I.isVolatile())
- return true;
-
- // An ordered atomic may synchronize. (See comment about on monotonic.)
- if (isOrderedAtomic(&I))
- return true;
-
- auto *CB = dyn_cast<CallBase>(&I);
- if (!CB)
- // Non call site cases covered by the two checks above
- return false;
-
- if (CB->hasFnAttr(Attribute::NoSync))
- return false;
-
- // Non volatile memset/memcpy/memmoves are nosync
- // NOTE: Only intrinsics with volatile flags should be handled here. All
- // others should be marked in Intrinsics.td.
- if (auto *MI = dyn_cast<MemIntrinsic>(&I))
- if (!MI->isVolatile())
- return false;
-
- // Speculatively assume in SCC.
- if (Function *Callee = CB->getCalledFunction())
- if (SCCNodes.contains(Callee))
- return false;
-
- return true;
-}
-
-// Infer the nosync attribute.
-static void addNoSyncAttr(const SCCNodeSet &SCCNodes,
- SmallSet<Function *, 8> &Changed) {
- AttributeInferer AI;
- AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
- Attribute::NoSync,
- // Skip already marked functions.
- [](const Function &F) { return F.hasNoSync(); },
- // Instructions that break nosync assumption.
- [&SCCNodes](Instruction &I) {
- return InstrBreaksNoSync(I, SCCNodes);
- },
- [](Function &F) {
- LLVM_DEBUG(dbgs()
- << "Adding nosync attr to fn " << F.getName() << "\n");
- F.setNoSync();
- ++NumNoSync;
- },
- /* RequiresExactDefinition= */ true});
- AI.run(SCCNodes, Changed);
-}
-
static SCCNodesResult createSCCNodeSet(ArrayRef<Function *> Functions) {
SCCNodesResult Res;
Res.HasUnknownCall = false;
@@ -1756,8 +1745,6 @@ deriveAttrsInPostOrder(ArrayRef<Function *> Functions, AARGetterT &&AARGetter) {
addNoRecurseAttrs(Nodes.SCCNodes, Changed);
}
- addNoSyncAttr(Nodes.SCCNodes, Changed);
-
// Finally, infer the maximal set of attributes from the ones we've inferred
// above. This is handling the cases where one attribute on a signature
// implies another, but for implementation reasons the inference rule for
@@ -1774,6 +1761,13 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
CGSCCAnalysisManager &AM,
LazyCallGraph &CG,
CGSCCUpdateResult &) {
+ // Skip non-recursive functions if requested.
+ if (C.size() == 1 && SkipNonRecursive) {
+ LazyCallGraph::Node &N = *C.begin();
+ if (!N->lookup(N))
+ return PreservedAnalyses::all();
+ }
+
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
@@ -1819,40 +1813,12 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
return PA;
}
-namespace {
-
-struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass {
- // Pass identification, replacement for typeid
- static char ID;
-
- PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) {
- initializePostOrderFunctionAttrsLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnSCC(CallGraphSCC &SCC) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- getAAResultsAnalysisUsage(AU);
- CallGraphSCCPass::getAnalysisUsage(AU);
- }
-};
-
-} // end anonymous namespace
-
-char PostOrderFunctionAttrsLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrsLegacyPass, "function-attrs",
- "Deduce function attributes", false, false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "function-attrs",
- "Deduce function attributes", false, false)
-
-Pass *llvm::createPostOrderFunctionAttrsLegacyPass() {
- return new PostOrderFunctionAttrsLegacyPass();
+void PostOrderFunctionAttrsPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<PostOrderFunctionAttrsPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ if (SkipNonRecursive)
+ OS << "<skip-non-recursive>";
}
template <typename AARGetterT>
@@ -1865,48 +1831,6 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
return !deriveAttrsInPostOrder(Functions, AARGetter).empty();
}
-bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) {
- if (skipSCC(SCC))
- return false;
- return runImpl(SCC, LegacyAARGetter(*this));
-}
-
-namespace {
-
-struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass {
- // Pass identification, replacement for typeid
- static char ID;
-
- ReversePostOrderFunctionAttrsLegacyPass() : ModulePass(ID) {
- initializeReversePostOrderFunctionAttrsLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<CallGraphWrapperPass>();
- AU.addPreserved<CallGraphWrapperPass>();
- }
-};
-
-} // end anonymous namespace
-
-char ReversePostOrderFunctionAttrsLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass,
- "rpo-function-attrs", "Deduce function attributes in RPO",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass,
- "rpo-function-attrs", "Deduce function attributes in RPO",
- false, false)
-
-Pass *llvm::createReversePostOrderFunctionAttrsPass() {
- return new ReversePostOrderFunctionAttrsLegacyPass();
-}
-
static bool addNoRecurseAttrsTopDown(Function &F) {
// We check the preconditions for the function prior to calling this to avoid
// the cost of building up a reversible post-order list. We assert them here
@@ -1939,7 +1863,7 @@ static bool addNoRecurseAttrsTopDown(Function &F) {
return true;
}
-static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) {
+static bool deduceFunctionAttributeInRPO(Module &M, LazyCallGraph &CG) {
// We only have a post-order SCC traversal (because SCCs are inherently
// discovered in post-order), so we accumulate them in a vector and then walk
// it in reverse. This is simpler than using the RPO iterator infrastructure
@@ -1947,17 +1871,18 @@ static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) {
// graph. We can also cheat egregiously because we're primarily interested in
// synthesizing norecurse and so we can only save the singular SCCs as SCCs
// with multiple functions in them will clearly be recursive.
- SmallVector<Function *, 16> Worklist;
- for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
- if (I->size() != 1)
- continue;
- Function *F = I->front()->getFunction();
- if (F && !F->isDeclaration() && !F->doesNotRecurse() &&
- F->hasInternalLinkage())
- Worklist.push_back(F);
+ SmallVector<Function *, 16> Worklist;
+ CG.buildRefSCCs();
+ for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
+ for (LazyCallGraph::SCC &SCC : RC) {
+ if (SCC.size() != 1)
+ continue;
+ Function &F = SCC.begin()->getFunction();
+ if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage())
+ Worklist.push_back(&F);
+ }
}
-
bool Changed = false;
for (auto *F : llvm::reverse(Worklist))
Changed |= addNoRecurseAttrsTopDown(*F);
@@ -1965,23 +1890,14 @@ static bool deduceFunctionAttributeInRPO(Module &M, CallGraph &CG) {
return Changed;
}
-bool ReversePostOrderFunctionAttrsLegacyPass::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
-
- auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
-
- return deduceFunctionAttributeInRPO(M, CG);
-}
-
PreservedAnalyses
ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) {
- auto &CG = AM.getResult<CallGraphAnalysis>(M);
+ auto &CG = AM.getResult<LazyCallGraphAnalysis>(M);
if (!deduceFunctionAttributeInRPO(M, CG))
return PreservedAnalyses::all();
PreservedAnalyses PA;
- PA.preserve<CallGraphAnalysis>();
+ PA.preserve<LazyCallGraphAnalysis>();
return PA;
}
diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp
index 7c994657e5c8..f635b14cd2a9 100644
--- a/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -30,9 +30,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/IRReader/IRReader.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Linker/IRMover.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -159,39 +157,37 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
return Result;
}
-/// Given a list of possible callee implementation for a call site, select one
-/// that fits the \p Threshold.
-///
-/// FIXME: select "best" instead of first that fits. But what is "best"?
-/// - The smallest: more likely to be inlined.
-/// - The one with the least outgoing edges (already well optimized).
-/// - One from a module already being imported from in order to reduce the
-/// number of source modules parsed/linked.
-/// - One that has PGO data attached.
-/// - [insert you fancy metric here]
-static const GlobalValueSummary *
-selectCallee(const ModuleSummaryIndex &Index,
- ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
- unsigned Threshold, StringRef CallerModulePath,
- FunctionImporter::ImportFailureReason &Reason,
- GlobalValue::GUID GUID) {
- Reason = FunctionImporter::ImportFailureReason::None;
- auto It = llvm::find_if(
+/// Given a list of possible callee implementation for a call site, qualify the
+/// legality of importing each. The return is a range of pairs. Each pair
+/// corresponds to a candidate. The first value is the ImportFailureReason for
+/// that candidate, the second is the candidate.
+static auto qualifyCalleeCandidates(
+ const ModuleSummaryIndex &Index,
+ ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
+ StringRef CallerModulePath) {
+ return llvm::map_range(
CalleeSummaryList,
- [&](const std::unique_ptr<GlobalValueSummary> &SummaryPtr) {
+ [&Index, CalleeSummaryList,
+ CallerModulePath](const std::unique_ptr<GlobalValueSummary> &SummaryPtr)
+ -> std::pair<FunctionImporter::ImportFailureReason,
+ const GlobalValueSummary *> {
auto *GVSummary = SummaryPtr.get();
- if (!Index.isGlobalValueLive(GVSummary)) {
- Reason = FunctionImporter::ImportFailureReason::NotLive;
- return false;
- }
+ if (!Index.isGlobalValueLive(GVSummary))
+ return {FunctionImporter::ImportFailureReason::NotLive, GVSummary};
- if (GlobalValue::isInterposableLinkage(GVSummary->linkage())) {
- Reason = FunctionImporter::ImportFailureReason::InterposableLinkage;
- // There is no point in importing these, we can't inline them
- return false;
- }
+ if (GlobalValue::isInterposableLinkage(GVSummary->linkage()))
+ return {FunctionImporter::ImportFailureReason::InterposableLinkage,
+ GVSummary};
- auto *Summary = cast<FunctionSummary>(GVSummary->getBaseObject());
+ auto *Summary = dyn_cast<FunctionSummary>(GVSummary->getBaseObject());
+
+ // Ignore any callees that aren't actually functions. This could happen
+ // in the case of GUID hash collisions. It could also happen in theory
+ // for SamplePGO profiles collected on old versions of the code after
+ // renaming, since we synthesize edges to any inlined callees appearing
+ // in the profile.
+ if (!Summary)
+ return {FunctionImporter::ImportFailureReason::GlobalVar, GVSummary};
// If this is a local function, make sure we import the copy
// in the caller's module. The only time a local function can
@@ -205,119 +201,174 @@ selectCallee(const ModuleSummaryIndex &Index,
// a local in another module.
if (GlobalValue::isLocalLinkage(Summary->linkage()) &&
CalleeSummaryList.size() > 1 &&
- Summary->modulePath() != CallerModulePath) {
- Reason =
- FunctionImporter::ImportFailureReason::LocalLinkageNotInModule;
- return false;
- }
-
- if ((Summary->instCount() > Threshold) &&
- !Summary->fflags().AlwaysInline && !ForceImportAll) {
- Reason = FunctionImporter::ImportFailureReason::TooLarge;
- return false;
- }
+ Summary->modulePath() != CallerModulePath)
+ return {
+ FunctionImporter::ImportFailureReason::LocalLinkageNotInModule,
+ GVSummary};
// Skip if it isn't legal to import (e.g. may reference unpromotable
// locals).
- if (Summary->notEligibleToImport()) {
- Reason = FunctionImporter::ImportFailureReason::NotEligible;
- return false;
- }
+ if (Summary->notEligibleToImport())
+ return {FunctionImporter::ImportFailureReason::NotEligible,
+ GVSummary};
- // Don't bother importing if we can't inline it anyway.
- if (Summary->fflags().NoInline && !ForceImportAll) {
- Reason = FunctionImporter::ImportFailureReason::NoInline;
- return false;
- }
-
- return true;
+ return {FunctionImporter::ImportFailureReason::None, GVSummary};
});
- if (It == CalleeSummaryList.end())
- return nullptr;
+}
+
+/// Given a list of possible callee implementation for a call site, select one
+/// that fits the \p Threshold. If none are found, the Reason will give the last
+/// reason for the failure (last, in the order of CalleeSummaryList entries).
+///
+/// FIXME: select "best" instead of first that fits. But what is "best"?
+/// - The smallest: more likely to be inlined.
+/// - The one with the least outgoing edges (already well optimized).
+/// - One from a module already being imported from in order to reduce the
+/// number of source modules parsed/linked.
+/// - One that has PGO data attached.
+/// - [insert you fancy metric here]
+static const GlobalValueSummary *
+selectCallee(const ModuleSummaryIndex &Index,
+ ArrayRef<std::unique_ptr<GlobalValueSummary>> CalleeSummaryList,
+ unsigned Threshold, StringRef CallerModulePath,
+ FunctionImporter::ImportFailureReason &Reason) {
+ auto QualifiedCandidates =
+ qualifyCalleeCandidates(Index, CalleeSummaryList, CallerModulePath);
+ for (auto QualifiedValue : QualifiedCandidates) {
+ Reason = QualifiedValue.first;
+ if (Reason != FunctionImporter::ImportFailureReason::None)
+ continue;
+ auto *Summary =
+ cast<FunctionSummary>(QualifiedValue.second->getBaseObject());
+
+ if ((Summary->instCount() > Threshold) && !Summary->fflags().AlwaysInline &&
+ !ForceImportAll) {
+ Reason = FunctionImporter::ImportFailureReason::TooLarge;
+ continue;
+ }
- return cast<GlobalValueSummary>(It->get());
+ // Don't bother importing if we can't inline it anyway.
+ if (Summary->fflags().NoInline && !ForceImportAll) {
+ Reason = FunctionImporter::ImportFailureReason::NoInline;
+ continue;
+ }
+
+ return Summary;
+ }
+ return nullptr;
}
namespace {
-using EdgeInfo =
- std::tuple<const GlobalValueSummary *, unsigned /* Threshold */>;
+using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */>;
} // anonymous namespace
-static bool shouldImportGlobal(const ValueInfo &VI,
- const GVSummaryMapTy &DefinedGVSummaries) {
- const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
- if (GVS == DefinedGVSummaries.end())
- return true;
- // We should not skip import if the module contains a definition with
- // interposable linkage type. This is required for correctness in
- // the situation with two following conditions:
- // * the def with interposable linkage is non-prevailing,
- // * there is a prevailing def available for import and marked read-only.
- // In this case, the non-prevailing def will be converted to a declaration,
- // while the prevailing one becomes internal, thus no definitions will be
- // available for linking. In order to prevent undefined symbol link error,
- // the prevailing definition must be imported.
- // FIXME: Consider adding a check that the suitable prevailing definition
- // exists and marked read-only.
- if (VI.getSummaryList().size() > 1 &&
- GlobalValue::isInterposableLinkage(GVS->second->linkage()))
- return true;
-
- return false;
-}
+/// Import globals referenced by a function or other globals that are being
+/// imported, if importing such global is possible.
+class GlobalsImporter final {
+ const ModuleSummaryIndex &Index;
+ const GVSummaryMapTy &DefinedGVSummaries;
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing;
+ FunctionImporter::ImportMapTy &ImportList;
+ StringMap<FunctionImporter::ExportSetTy> *const ExportLists;
+
+ bool shouldImportGlobal(const ValueInfo &VI) {
+ const auto &GVS = DefinedGVSummaries.find(VI.getGUID());
+ if (GVS == DefinedGVSummaries.end())
+ return true;
+ // We should not skip import if the module contains a non-prevailing
+ // definition with interposable linkage type. This is required for
+ // correctness in the situation where there is a prevailing def available
+ // for import and marked read-only. In this case, the non-prevailing def
+ // will be converted to a declaration, while the prevailing one becomes
+ // internal, thus no definitions will be available for linking. In order to
+ // prevent undefined symbol link error, the prevailing definition must be
+ // imported.
+ // FIXME: Consider adding a check that the suitable prevailing definition
+ // exists and marked read-only.
+ if (VI.getSummaryList().size() > 1 &&
+ GlobalValue::isInterposableLinkage(GVS->second->linkage()) &&
+ !IsPrevailing(VI.getGUID(), GVS->second))
+ return true;
-static void computeImportForReferencedGlobals(
- const GlobalValueSummary &Summary, const ModuleSummaryIndex &Index,
- const GVSummaryMapTy &DefinedGVSummaries,
- SmallVectorImpl<EdgeInfo> &Worklist,
- FunctionImporter::ImportMapTy &ImportList,
- StringMap<FunctionImporter::ExportSetTy> *ExportLists) {
- for (const auto &VI : Summary.refs()) {
- if (!shouldImportGlobal(VI, DefinedGVSummaries)) {
- LLVM_DEBUG(
- dbgs() << "Ref ignored! Target already in destination module.\n");
- continue;
- }
+ return false;
+ }
- LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
-
- // If this is a local variable, make sure we import the copy
- // in the caller's module. The only time a local variable can
- // share an entry in the index is if there is a local with the same name
- // in another module that had the same source file name (in a different
- // directory), where each was compiled in their own directory so there
- // was not distinguishing path.
- auto LocalNotInModule = [&](const GlobalValueSummary *RefSummary) -> bool {
- return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
- RefSummary->modulePath() != Summary.modulePath();
- };
+ void
+ onImportingSummaryImpl(const GlobalValueSummary &Summary,
+ SmallVectorImpl<const GlobalVarSummary *> &Worklist) {
+ for (const auto &VI : Summary.refs()) {
+ if (!shouldImportGlobal(VI)) {
+ LLVM_DEBUG(
+ dbgs() << "Ref ignored! Target already in destination module.\n");
+ continue;
+ }
- for (const auto &RefSummary : VI.getSummaryList())
- if (isa<GlobalVarSummary>(RefSummary.get()) &&
- Index.canImportGlobalVar(RefSummary.get(), /* AnalyzeRefs */ true) &&
- !LocalNotInModule(RefSummary.get())) {
+ LLVM_DEBUG(dbgs() << " ref -> " << VI << "\n");
+
+ // If this is a local variable, make sure we import the copy
+ // in the caller's module. The only time a local variable can
+ // share an entry in the index is if there is a local with the same name
+ // in another module that had the same source file name (in a different
+ // directory), where each was compiled in their own directory so there
+ // was not distinguishing path.
+ auto LocalNotInModule =
+ [&](const GlobalValueSummary *RefSummary) -> bool {
+ return GlobalValue::isLocalLinkage(RefSummary->linkage()) &&
+ RefSummary->modulePath() != Summary.modulePath();
+ };
+
+ for (const auto &RefSummary : VI.getSummaryList()) {
+ const auto *GVS = dyn_cast<GlobalVarSummary>(RefSummary.get());
+ // Functions could be referenced by global vars - e.g. a vtable; but we
+ // don't currently imagine a reason those would be imported here, rather
+ // than as part of the logic deciding which functions to import (i.e.
+ // based on profile information). Should we decide to handle them here,
+ // we can refactor accordingly at that time.
+ if (!GVS || !Index.canImportGlobalVar(GVS, /* AnalyzeRefs */ true) ||
+ LocalNotInModule(GVS))
+ continue;
auto ILI = ImportList[RefSummary->modulePath()].insert(VI.getGUID());
// Only update stat and exports if we haven't already imported this
// variable.
if (!ILI.second)
break;
NumImportedGlobalVarsThinLink++;
- // Any references made by this variable will be marked exported later,
- // in ComputeCrossModuleImport, after import decisions are complete,
- // which is more efficient than adding them here.
+ // Any references made by this variable will be marked exported
+ // later, in ComputeCrossModuleImport, after import decisions are
+ // complete, which is more efficient than adding them here.
if (ExportLists)
(*ExportLists)[RefSummary->modulePath()].insert(VI);
// If variable is not writeonly we attempt to recursively analyze
// its references in order to import referenced constants.
- if (!Index.isWriteOnly(cast<GlobalVarSummary>(RefSummary.get())))
- Worklist.emplace_back(RefSummary.get(), 0);
+ if (!Index.isWriteOnly(GVS))
+ Worklist.emplace_back(GVS);
break;
}
+ }
}
-}
+
+public:
+ GlobalsImporter(
+ const ModuleSummaryIndex &Index, const GVSummaryMapTy &DefinedGVSummaries,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ IsPrevailing,
+ FunctionImporter::ImportMapTy &ImportList,
+ StringMap<FunctionImporter::ExportSetTy> *ExportLists)
+ : Index(Index), DefinedGVSummaries(DefinedGVSummaries),
+ IsPrevailing(IsPrevailing), ImportList(ImportList),
+ ExportLists(ExportLists) {}
+
+ void onImportingSummary(const GlobalValueSummary &Summary) {
+ SmallVector<const GlobalVarSummary *, 128> Worklist;
+ onImportingSummaryImpl(Summary, Worklist);
+ while (!Worklist.empty())
+ onImportingSummaryImpl(*Worklist.pop_back_val(), Worklist);
+ }
+};
static const char *
getFailureName(FunctionImporter::ImportFailureReason Reason) {
@@ -348,12 +399,13 @@ getFailureName(FunctionImporter::ImportFailureReason Reason) {
static void computeImportForFunction(
const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
- SmallVectorImpl<EdgeInfo> &Worklist,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing,
+ SmallVectorImpl<EdgeInfo> &Worklist, GlobalsImporter &GVImporter,
FunctionImporter::ImportMapTy &ImportList,
StringMap<FunctionImporter::ExportSetTy> *ExportLists,
FunctionImporter::ImportThresholdsTy &ImportThresholds) {
- computeImportForReferencedGlobals(Summary, Index, DefinedGVSummaries,
- Worklist, ImportList, ExportLists);
+ GVImporter.onImportingSummary(Summary);
static int ImportCount = 0;
for (const auto &Edge : Summary.calls()) {
ValueInfo VI = Edge.first;
@@ -432,7 +484,7 @@ static void computeImportForFunction(
FunctionImporter::ImportFailureReason Reason;
CalleeSummary = selectCallee(Index, VI.getSummaryList(), NewThreshold,
- Summary.modulePath(), Reason, VI.getGUID());
+ Summary.modulePath(), Reason);
if (!CalleeSummary) {
// Update with new larger threshold if this was a retry (otherwise
// we would have already inserted with NewThreshold above). Also
@@ -519,12 +571,17 @@ static void computeImportForFunction(
/// as well as the list of "exports", i.e. the list of symbols referenced from
/// another module (that may require promotion).
static void ComputeImportForModule(
- const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index,
- StringRef ModName, FunctionImporter::ImportMapTy &ImportList,
+ const GVSummaryMapTy &DefinedGVSummaries,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing,
+ const ModuleSummaryIndex &Index, StringRef ModName,
+ FunctionImporter::ImportMapTy &ImportList,
StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
// Worklist contains the list of function imported in this module, for which
// we will analyse the callees and may import further down the callgraph.
SmallVector<EdgeInfo, 128> Worklist;
+ GlobalsImporter GVI(Index, DefinedGVSummaries, isPrevailing, ImportList,
+ ExportLists);
FunctionImporter::ImportThresholdsTy ImportThresholds;
// Populate the worklist with the import for the functions in the current
@@ -546,8 +603,8 @@ static void ComputeImportForModule(
continue;
LLVM_DEBUG(dbgs() << "Initialize import for " << VI << "\n");
computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
- DefinedGVSummaries, Worklist, ImportList,
- ExportLists, ImportThresholds);
+ DefinedGVSummaries, isPrevailing, Worklist, GVI,
+ ImportList, ExportLists, ImportThresholds);
}
// Process the newly imported functions and add callees to the worklist.
@@ -558,11 +615,8 @@ static void ComputeImportForModule(
if (auto *FS = dyn_cast<FunctionSummary>(Summary))
computeImportForFunction(*FS, Index, Threshold, DefinedGVSummaries,
- Worklist, ImportList, ExportLists,
- ImportThresholds);
- else
- computeImportForReferencedGlobals(*Summary, Index, DefinedGVSummaries,
- Worklist, ImportList, ExportLists);
+ isPrevailing, Worklist, GVI, ImportList,
+ ExportLists, ImportThresholds);
}
// Print stats about functions considered but rejected for importing
@@ -632,17 +686,23 @@ checkVariableImport(const ModuleSummaryIndex &Index,
// Checks that all GUIDs of read/writeonly vars we see in export lists
// are also in the import lists. Otherwise we my face linker undefs,
// because readonly and writeonly vars are internalized in their
- // source modules.
- auto IsReadOrWriteOnlyVar = [&](StringRef ModulePath, const ValueInfo &VI) {
+ // source modules. The exception would be if it has a linkage type indicating
+ // that there may have been a copy existing in the importing module (e.g.
+ // linkonce_odr). In that case we cannot accurately do this checking.
+ auto IsReadOrWriteOnlyVarNeedingImporting = [&](StringRef ModulePath,
+ const ValueInfo &VI) {
auto *GVS = dyn_cast_or_null<GlobalVarSummary>(
Index.findSummaryInModule(VI, ModulePath));
- return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS));
+ return GVS && (Index.isReadOnly(GVS) || Index.isWriteOnly(GVS)) &&
+ !(GVS->linkage() == GlobalValue::AvailableExternallyLinkage ||
+ GVS->linkage() == GlobalValue::WeakODRLinkage ||
+ GVS->linkage() == GlobalValue::LinkOnceODRLinkage);
};
for (auto &ExportPerModule : ExportLists)
for (auto &VI : ExportPerModule.second)
if (!FlattenedImports.count(VI.getGUID()) &&
- IsReadOrWriteOnlyVar(ExportPerModule.first(), VI))
+ IsReadOrWriteOnlyVarNeedingImporting(ExportPerModule.first(), VI))
return false;
return true;
@@ -653,6 +713,8 @@ checkVariableImport(const ModuleSummaryIndex &Index,
void llvm::ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing,
StringMap<FunctionImporter::ImportMapTy> &ImportLists,
StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
// For each module that has function defined, compute the import/export lists.
@@ -660,7 +722,7 @@ void llvm::ComputeCrossModuleImport(
auto &ImportList = ImportLists[DefinedGVSummaries.first()];
LLVM_DEBUG(dbgs() << "Computing import for Module '"
<< DefinedGVSummaries.first() << "'\n");
- ComputeImportForModule(DefinedGVSummaries.second, Index,
+ ComputeImportForModule(DefinedGVSummaries.second, isPrevailing, Index,
DefinedGVSummaries.first(), ImportList,
&ExportLists);
}
@@ -759,7 +821,10 @@ static void dumpImportListForModule(const ModuleSummaryIndex &Index,
/// Compute all the imports for the given module in the Index.
void llvm::ComputeCrossModuleImportForModule(
- StringRef ModulePath, const ModuleSummaryIndex &Index,
+ StringRef ModulePath,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing,
+ const ModuleSummaryIndex &Index,
FunctionImporter::ImportMapTy &ImportList) {
// Collect the list of functions this module defines.
// GUID -> Summary
@@ -768,7 +833,8 @@ void llvm::ComputeCrossModuleImportForModule(
// Compute the import list for this module.
LLVM_DEBUG(dbgs() << "Computing import for Module '" << ModulePath << "'\n");
- ComputeImportForModule(FunctionSummaryMap, Index, ModulePath, ImportList);
+ ComputeImportForModule(FunctionSummaryMap, isPrevailing, Index, ModulePath,
+ ImportList);
#ifndef NDEBUG
dumpImportListForModule(Index, ModulePath, ImportList);
@@ -1373,8 +1439,9 @@ Expected<bool> FunctionImporter::importFunctions(
if (Error Err = Mover.move(std::move(SrcModule),
GlobalsToImport.getArrayRef(), nullptr,
/*IsPerformingImport=*/true))
- report_fatal_error(Twine("Function Import: link error: ") +
- toString(std::move(Err)));
+ return createStringError(errc::invalid_argument,
+ Twine("Function Import: link error: ") +
+ toString(std::move(Err)));
ImportedCount += GlobalsToImport.size();
NumImportedModules++;
@@ -1394,7 +1461,9 @@ Expected<bool> FunctionImporter::importFunctions(
return ImportedCount;
}
-static bool doImportingForModule(Module &M) {
+static bool doImportingForModule(
+ Module &M, function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing) {
if (SummaryFile.empty())
report_fatal_error("error: -function-import requires -summary-file\n");
Expected<std::unique_ptr<ModuleSummaryIndex>> IndexPtrOrErr =
@@ -1415,8 +1484,8 @@ static bool doImportingForModule(Module &M) {
ComputeCrossModuleImportForModuleFromIndex(M.getModuleIdentifier(), *Index,
ImportList);
else
- ComputeCrossModuleImportForModule(M.getModuleIdentifier(), *Index,
- ImportList);
+ ComputeCrossModuleImportForModule(M.getModuleIdentifier(), isPrevailing,
+ *Index, ImportList);
// Conservatively mark all internal values as promoted. This interface is
// only used when doing importing via the function importing pass. The pass
@@ -1434,7 +1503,7 @@ static bool doImportingForModule(Module &M) {
if (renameModuleForThinLTO(M, *Index, /*ClearDSOLocalOnDeclarations=*/false,
/*GlobalsToImport=*/nullptr)) {
errs() << "Error renaming module\n";
- return false;
+ return true;
}
// Perform the import now.
@@ -1449,15 +1518,22 @@ static bool doImportingForModule(Module &M) {
if (!Result) {
logAllUnhandledErrors(Result.takeError(), errs(),
"Error importing module: ");
- return false;
+ return true;
}
- return *Result;
+ return true;
}
PreservedAnalyses FunctionImportPass::run(Module &M,
ModuleAnalysisManager &AM) {
- if (!doImportingForModule(M))
+ // This is only used for testing the function import pass via opt, where we
+ // don't have prevailing information from the LTO context available, so just
+ // conservatively assume everything is prevailing (which is fine for the very
+ // limited use of prevailing checking in this pass).
+ auto isPrevailing = [](GlobalValue::GUID, const GlobalValueSummary *) {
+ return true;
+ };
+ if (!doImportingForModule(M, isPrevailing))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 4a7efb28e853..3d6c501e4596 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -48,11 +48,13 @@
#include "llvm/Transforms/IPO/FunctionSpecialization.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueLattice.h"
#include "llvm/Analysis/ValueLatticeUtils.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Transforms/Scalar/SCCP.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -64,42 +66,324 @@ using namespace llvm;
#define DEBUG_TYPE "function-specialization"
-STATISTIC(NumFuncSpecialized, "Number of functions specialized");
+STATISTIC(NumSpecsCreated, "Number of specializations created");
-static cl::opt<bool> ForceFunctionSpecialization(
- "force-function-specialization", cl::init(false), cl::Hidden,
- cl::desc("Force function specialization for every call site with a "
- "constant argument"));
+static cl::opt<bool> ForceSpecialization(
+ "force-specialization", cl::init(false), cl::Hidden, cl::desc(
+ "Force function specialization for every call site with a constant "
+ "argument"));
-static cl::opt<unsigned> MaxClonesThreshold(
- "func-specialization-max-clones", cl::Hidden,
- cl::desc("The maximum number of clones allowed for a single function "
- "specialization"),
- cl::init(3));
+static cl::opt<unsigned> MaxClones(
+ "funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc(
+ "The maximum number of clones allowed for a single function "
+ "specialization"));
-static cl::opt<unsigned> SmallFunctionThreshold(
- "func-specialization-size-threshold", cl::Hidden,
- cl::desc("Don't specialize functions that have less than this theshold "
- "number of instructions"),
- cl::init(100));
+static cl::opt<unsigned> MaxIncomingPhiValues(
+ "funcspec-max-incoming-phi-values", cl::init(4), cl::Hidden, cl::desc(
+ "The maximum number of incoming values a PHI node can have to be "
+ "considered during the specialization bonus estimation"));
-static cl::opt<unsigned>
- AvgLoopIterationCount("func-specialization-avg-iters-cost", cl::Hidden,
- cl::desc("Average loop iteration count cost"),
- cl::init(10));
+static cl::opt<unsigned> MinFunctionSize(
+ "funcspec-min-function-size", cl::init(100), cl::Hidden, cl::desc(
+ "Don't specialize functions that have less than this number of "
+ "instructions"));
-static cl::opt<bool> SpecializeOnAddresses(
- "func-specialization-on-address", cl::init(false), cl::Hidden,
- cl::desc("Enable function specialization on the address of global values"));
+static cl::opt<bool> SpecializeOnAddress(
+ "funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
+ "Enable function specialization on the address of global values"));
// Disabled by default as it can significantly increase compilation times.
//
// https://llvm-compile-time-tracker.com
// https://github.com/nikic/llvm-compile-time-tracker
-static cl::opt<bool> EnableSpecializationForLiteralConstant(
- "function-specialization-for-literal-constant", cl::init(false), cl::Hidden,
- cl::desc("Enable specialization of functions that take a literal constant "
- "as an argument."));
+static cl::opt<bool> SpecializeLiteralConstant(
+ "funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc(
+ "Enable specialization of functions that take a literal constant as an "
+ "argument"));
+
+// Estimates the instruction cost of all the basic blocks in \p WorkList.
+// The successors of such blocks are added to the list as long as they are
+// executable and they have a unique predecessor. \p WorkList represents
+// the basic blocks of a specialization which become dead once we replace
+// instructions that are known to be constants. The aim here is to estimate
+// the combination of size and latency savings in comparison to the non
+// specialized version of the function.
+static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
+ DenseSet<BasicBlock *> &DeadBlocks,
+ ConstMap &KnownConstants, SCCPSolver &Solver,
+ BlockFrequencyInfo &BFI,
+ TargetTransformInfo &TTI) {
+ Cost Bonus = 0;
+
+ // Accumulate the instruction cost of each basic block weighted by frequency.
+ while (!WorkList.empty()) {
+ BasicBlock *BB = WorkList.pop_back_val();
+
+ uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() /
+ BFI.getEntryFreq();
+ if (!Weight)
+ continue;
+
+ // These blocks are considered dead as far as the InstCostVisitor is
+ // concerned. They haven't been proven dead yet by the Solver, but
+ // may become if we propagate the constant specialization arguments.
+ if (!DeadBlocks.insert(BB).second)
+ continue;
+
+ for (Instruction &I : *BB) {
+ // Disregard SSA copies.
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ if (II->getIntrinsicID() == Intrinsic::ssa_copy)
+ continue;
+ // If it's a known constant we have already accounted for it.
+ if (KnownConstants.contains(&I))
+ continue;
+
+ Bonus += Weight *
+ TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus
+ << " after user " << I << "\n");
+ }
+
+ // Keep adding dead successors to the list as long as they are
+ // executable and they have a unique predecessor.
+ for (BasicBlock *SuccBB : successors(BB))
+ if (Solver.isBlockExecutable(SuccBB) &&
+ SuccBB->getUniquePredecessor() == BB)
+ WorkList.push_back(SuccBB);
+ }
+ return Bonus;
+}
+
+static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
+ if (auto *C = dyn_cast<Constant>(V))
+ return C;
+ if (auto It = KnownConstants.find(V); It != KnownConstants.end())
+ return It->second;
+ return nullptr;
+}
+
+Cost InstCostVisitor::getBonusFromPendingPHIs() {
+ Cost Bonus = 0;
+ while (!PendingPHIs.empty()) {
+ Instruction *Phi = PendingPHIs.pop_back_val();
+ Bonus += getUserBonus(Phi);
+ }
+ return Bonus;
+}
+
+Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
+ // Cache the iterator before visiting.
+ LastVisited = Use ? KnownConstants.insert({Use, C}).first
+ : KnownConstants.end();
+
+ if (auto *I = dyn_cast<SwitchInst>(User))
+ return estimateSwitchInst(*I);
+
+ if (auto *I = dyn_cast<BranchInst>(User))
+ return estimateBranchInst(*I);
+
+ C = visit(*User);
+ if (!C)
+ return 0;
+
+ KnownConstants.insert({User, C});
+
+ uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() /
+ BFI.getEntryFreq();
+ if (!Weight)
+ return 0;
+
+ Cost Bonus = Weight *
+ TTI.getInstructionCost(User, TargetTransformInfo::TCK_SizeAndLatency);
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Bonus " << Bonus
+ << " for user " << *User << "\n");
+
+ for (auto *U : User->users())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ if (UI != User && Solver.isBlockExecutable(UI->getParent()))
+ Bonus += getUserBonus(UI, User, C);
+
+ return Bonus;
+}
+
+Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
+ if (I.getCondition() != LastVisited->first)
+ return 0;
+
+ auto *C = dyn_cast<ConstantInt>(LastVisited->second);
+ if (!C)
+ return 0;
+
+ BasicBlock *Succ = I.findCaseValue(C)->getCaseSuccessor();
+ // Initialize the worklist with the dead basic blocks. These are the
+ // destination labels which are different from the one corresponding
+ // to \p C. They should be executable and have a unique predecessor.
+ SmallVector<BasicBlock *> WorkList;
+ for (const auto &Case : I.cases()) {
+ BasicBlock *BB = Case.getCaseSuccessor();
+ if (BB == Succ || !Solver.isBlockExecutable(BB) ||
+ BB->getUniquePredecessor() != I.getParent())
+ continue;
+ WorkList.push_back(BB);
+ }
+
+ return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI,
+ TTI);
+}
+
+Cost InstCostVisitor::estimateBranchInst(BranchInst &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
+ if (I.getCondition() != LastVisited->first)
+ return 0;
+
+ BasicBlock *Succ = I.getSuccessor(LastVisited->second->isOneValue());
+ // Initialize the worklist with the dead successor as long as
+ // it is executable and has a unique predecessor.
+ SmallVector<BasicBlock *> WorkList;
+ if (Solver.isBlockExecutable(Succ) &&
+ Succ->getUniquePredecessor() == I.getParent())
+ WorkList.push_back(Succ);
+
+ return estimateBasicBlocks(WorkList, DeadBlocks, KnownConstants, Solver, BFI,
+ TTI);
+}
+
+Constant *InstCostVisitor::visitPHINode(PHINode &I) {
+ if (I.getNumIncomingValues() > MaxIncomingPhiValues)
+ return nullptr;
+
+ bool Inserted = VisitedPHIs.insert(&I).second;
+ Constant *Const = nullptr;
+
+ for (unsigned Idx = 0, E = I.getNumIncomingValues(); Idx != E; ++Idx) {
+ Value *V = I.getIncomingValue(Idx);
+ if (auto *Inst = dyn_cast<Instruction>(V))
+ if (Inst == &I || DeadBlocks.contains(I.getIncomingBlock(Idx)))
+ continue;
+ Constant *C = findConstantFor(V, KnownConstants);
+ if (!C) {
+ if (Inserted)
+ PendingPHIs.push_back(&I);
+ return nullptr;
+ }
+ if (!Const)
+ Const = C;
+ else if (C != Const)
+ return nullptr;
+ }
+ return Const;
+}
+
+Constant *InstCostVisitor::visitFreezeInst(FreezeInst &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
+ if (isGuaranteedNotToBeUndefOrPoison(LastVisited->second))
+ return LastVisited->second;
+ return nullptr;
+}
+
+Constant *InstCostVisitor::visitCallBase(CallBase &I) {
+ Function *F = I.getCalledFunction();
+ if (!F || !canConstantFoldCallTo(&I, F))
+ return nullptr;
+
+ SmallVector<Constant *, 8> Operands;
+ Operands.reserve(I.getNumOperands());
+
+ for (unsigned Idx = 0, E = I.getNumOperands() - 1; Idx != E; ++Idx) {
+ Value *V = I.getOperand(Idx);
+ Constant *C = findConstantFor(V, KnownConstants);
+ if (!C)
+ return nullptr;
+ Operands.push_back(C);
+ }
+
+ auto Ops = ArrayRef(Operands.begin(), Operands.end());
+ return ConstantFoldCall(&I, F, Ops);
+}
+
+Constant *InstCostVisitor::visitLoadInst(LoadInst &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
+ if (isa<ConstantPointerNull>(LastVisited->second))
+ return nullptr;
+ return ConstantFoldLoadFromConstPtr(LastVisited->second, I.getType(), DL);
+}
+
+Constant *InstCostVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
+ SmallVector<Constant *, 8> Operands;
+ Operands.reserve(I.getNumOperands());
+
+ for (unsigned Idx = 0, E = I.getNumOperands(); Idx != E; ++Idx) {
+ Value *V = I.getOperand(Idx);
+ Constant *C = findConstantFor(V, KnownConstants);
+ if (!C)
+ return nullptr;
+ Operands.push_back(C);
+ }
+
+ auto Ops = ArrayRef(Operands.begin(), Operands.end());
+ return ConstantFoldInstOperands(&I, Ops, DL);
+}
+
+Constant *InstCostVisitor::visitSelectInst(SelectInst &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
+ if (I.getCondition() != LastVisited->first)
+ return nullptr;
+
+ Value *V = LastVisited->second->isZeroValue() ? I.getFalseValue()
+ : I.getTrueValue();
+ Constant *C = findConstantFor(V, KnownConstants);
+ return C;
+}
+
+Constant *InstCostVisitor::visitCastInst(CastInst &I) {
+ return ConstantFoldCastOperand(I.getOpcode(), LastVisited->second,
+ I.getType(), DL);
+}
+
+Constant *InstCostVisitor::visitCmpInst(CmpInst &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
+ bool Swap = I.getOperand(1) == LastVisited->first;
+ Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
+ Constant *Other = findConstantFor(V, KnownConstants);
+ if (!Other)
+ return nullptr;
+
+ Constant *Const = LastVisited->second;
+ return Swap ?
+ ConstantFoldCompareInstOperands(I.getPredicate(), Other, Const, DL)
+ : ConstantFoldCompareInstOperands(I.getPredicate(), Const, Other, DL);
+}
+
+Constant *InstCostVisitor::visitUnaryOperator(UnaryOperator &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
+ return ConstantFoldUnaryOpOperand(I.getOpcode(), LastVisited->second, DL);
+}
+
+Constant *InstCostVisitor::visitBinaryOperator(BinaryOperator &I) {
+ assert(LastVisited != KnownConstants.end() && "Invalid iterator!");
+
+ bool Swap = I.getOperand(1) == LastVisited->first;
+ Value *V = Swap ? I.getOperand(0) : I.getOperand(1);
+ Constant *Other = findConstantFor(V, KnownConstants);
+ if (!Other)
+ return nullptr;
+
+ Constant *Const = LastVisited->second;
+ return dyn_cast_or_null<Constant>(Swap ?
+ simplifyBinOp(I.getOpcode(), Other, Const, SimplifyQuery(DL))
+ : simplifyBinOp(I.getOpcode(), Const, Other, SimplifyQuery(DL)));
+}
Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca,
CallInst *Call) {
@@ -125,6 +409,10 @@ Constant *FunctionSpecializer::getPromotableAlloca(AllocaInst *Alloca,
// Bail if there is any other unknown usage.
return nullptr;
}
+
+ if (!StoreValue)
+ return nullptr;
+
return getCandidateConstant(StoreValue);
}
@@ -165,49 +453,37 @@ Constant *FunctionSpecializer::getConstantStackValue(CallInst *Call,
// ret void
// }
//
-void FunctionSpecializer::promoteConstantStackValues() {
- // Iterate over the argument tracked functions see if there
- // are any new constant values for the call instruction via
- // stack variables.
- for (Function &F : M) {
- if (!Solver.isArgumentTrackedFunction(&F))
+// See if there are any new constant values for the callers of \p F via
+// stack variables and promote them to global variables.
+void FunctionSpecializer::promoteConstantStackValues(Function *F) {
+ for (User *U : F->users()) {
+
+ auto *Call = dyn_cast<CallInst>(U);
+ if (!Call)
continue;
- for (auto *User : F.users()) {
+ if (!Solver.isBlockExecutable(Call->getParent()))
+ continue;
- auto *Call = dyn_cast<CallInst>(User);
- if (!Call)
- continue;
+ for (const Use &U : Call->args()) {
+ unsigned Idx = Call->getArgOperandNo(&U);
+ Value *ArgOp = Call->getArgOperand(Idx);
+ Type *ArgOpType = ArgOp->getType();
- if (!Solver.isBlockExecutable(Call->getParent()))
+ if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy())
continue;
- bool Changed = false;
- for (const Use &U : Call->args()) {
- unsigned Idx = Call->getArgOperandNo(&U);
- Value *ArgOp = Call->getArgOperand(Idx);
- Type *ArgOpType = ArgOp->getType();
-
- if (!Call->onlyReadsMemory(Idx) || !ArgOpType->isPointerTy())
- continue;
-
- auto *ConstVal = getConstantStackValue(Call, ArgOp);
- if (!ConstVal)
- continue;
-
- Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
- GlobalValue::InternalLinkage, ConstVal,
- "funcspec.arg");
- if (ArgOpType != ConstVal->getType())
- GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOpType);
+ auto *ConstVal = getConstantStackValue(Call, ArgOp);
+ if (!ConstVal)
+ continue;
- Call->setArgOperand(Idx, GV);
- Changed = true;
- }
+ Value *GV = new GlobalVariable(M, ConstVal->getType(), true,
+ GlobalValue::InternalLinkage, ConstVal,
+ "funcspec.arg");
+ if (ArgOpType != ConstVal->getType())
+ GV = ConstantExpr::getBitCast(cast<Constant>(GV), ArgOpType);
- // Add the changed CallInst to Solver Worklist
- if (Changed)
- Solver.visitCall(*Call);
+ Call->setArgOperand(Idx, GV);
}
}
}
@@ -230,7 +506,7 @@ static void removeSSACopy(Function &F) {
/// Remove any ssa_copy intrinsics that may have been introduced.
void FunctionSpecializer::cleanUpSSA() {
- for (Function *F : SpecializedFuncs)
+ for (Function *F : Specializations)
removeSSACopy(*F);
}
@@ -249,6 +525,16 @@ template <> struct llvm::DenseMapInfo<SpecSig> {
}
};
+FunctionSpecializer::~FunctionSpecializer() {
+ LLVM_DEBUG(
+ if (NumSpecsCreated > 0)
+ dbgs() << "FnSpecialization: Created " << NumSpecsCreated
+ << " specializations in module " << M.getName() << "\n");
+ // Eliminate dead code.
+ removeDeadFunctions();
+ cleanUpSSA();
+}
+
/// Attempt to specialize functions in the module to enable constant
/// propagation across function boundaries.
///
@@ -262,17 +548,37 @@ bool FunctionSpecializer::run() {
if (!isCandidateFunction(&F))
continue;
- auto Cost = getSpecializationCost(&F);
- if (!Cost.isValid()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Invalid specialization cost for "
- << F.getName() << "\n");
- continue;
+ auto [It, Inserted] = FunctionMetrics.try_emplace(&F);
+ CodeMetrics &Metrics = It->second;
+ //Analyze the function.
+ if (Inserted) {
+ SmallPtrSet<const Value *, 32> EphValues;
+ CodeMetrics::collectEphemeralValues(&F, &GetAC(F), EphValues);
+ for (BasicBlock &BB : F)
+ Metrics.analyzeBasicBlock(&BB, GetTTI(F), EphValues);
}
+ // If the code metrics reveal that we shouldn't duplicate the function,
+ // or if the code size implies that this function is easy to get inlined,
+ // then we shouldn't specialize it.
+ if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
+ (!ForceSpecialization && !F.hasFnAttribute(Attribute::NoInline) &&
+ Metrics.NumInsts < MinFunctionSize))
+ continue;
+
+ // TODO: For now only consider recursive functions when running multiple
+ // times. This should change if specialization on literal constants gets
+ // enabled.
+ if (!Inserted && !Metrics.isRecursive && !SpecializeLiteralConstant)
+ continue;
+
LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
- << F.getName() << " is " << Cost << "\n");
+ << F.getName() << " is " << Metrics.NumInsts << "\n");
+
+ if (Inserted && Metrics.isRecursive)
+ promoteConstantStackValues(&F);
- if (!findSpecializations(&F, Cost, AllSpecs, SM)) {
+ if (!findSpecializations(&F, Metrics.NumInsts, AllSpecs, SM)) {
LLVM_DEBUG(
dbgs() << "FnSpecialization: No possible specializations found for "
<< F.getName() << "\n");
@@ -292,11 +598,11 @@ bool FunctionSpecializer::run() {
// Choose the most profitable specialisations, which fit in the module
// specialization budget, which is derived from maximum number of
// specializations per specialization candidate function.
- auto CompareGain = [&AllSpecs](unsigned I, unsigned J) {
- return AllSpecs[I].Gain > AllSpecs[J].Gain;
+ auto CompareScore = [&AllSpecs](unsigned I, unsigned J) {
+ return AllSpecs[I].Score > AllSpecs[J].Score;
};
const unsigned NSpecs =
- std::min(NumCandidates * MaxClonesThreshold, unsigned(AllSpecs.size()));
+ std::min(NumCandidates * MaxClones, unsigned(AllSpecs.size()));
SmallVector<unsigned> BestSpecs(NSpecs + 1);
std::iota(BestSpecs.begin(), BestSpecs.begin() + NSpecs, 0);
if (AllSpecs.size() > NSpecs) {
@@ -305,11 +611,11 @@ bool FunctionSpecializer::run() {
<< "FnSpecialization: Specializing the "
<< NSpecs
<< " most profitable candidates.\n");
- std::make_heap(BestSpecs.begin(), BestSpecs.begin() + NSpecs, CompareGain);
+ std::make_heap(BestSpecs.begin(), BestSpecs.begin() + NSpecs, CompareScore);
for (unsigned I = NSpecs, N = AllSpecs.size(); I < N; ++I) {
BestSpecs[NSpecs] = I;
- std::push_heap(BestSpecs.begin(), BestSpecs.end(), CompareGain);
- std::pop_heap(BestSpecs.begin(), BestSpecs.end(), CompareGain);
+ std::push_heap(BestSpecs.begin(), BestSpecs.end(), CompareScore);
+ std::pop_heap(BestSpecs.begin(), BestSpecs.end(), CompareScore);
}
}
@@ -317,7 +623,7 @@ bool FunctionSpecializer::run() {
for (unsigned I = 0; I < NSpecs; ++I) {
const Spec &S = AllSpecs[BestSpecs[I]];
dbgs() << "FnSpecialization: Function " << S.F->getName()
- << " , gain " << S.Gain << "\n";
+ << " , score " << S.Score << "\n";
for (const ArgInfo &Arg : S.Sig.Args)
dbgs() << "FnSpecialization: FormalArg = "
<< Arg.Formal->getNameOrAsOperand()
@@ -353,12 +659,37 @@ bool FunctionSpecializer::run() {
updateCallSites(F, AllSpecs.begin() + Begin, AllSpecs.begin() + End);
}
- promoteConstantStackValues();
- LLVM_DEBUG(if (NbFunctionsSpecialized) dbgs()
- << "FnSpecialization: Specialized " << NbFunctionsSpecialized
- << " functions in module " << M.getName() << "\n");
+ for (Function *F : Clones) {
+ if (F->getReturnType()->isVoidTy())
+ continue;
+ if (F->getReturnType()->isStructTy()) {
+ auto *STy = cast<StructType>(F->getReturnType());
+ if (!Solver.isStructLatticeConstant(F, STy))
+ continue;
+ } else {
+ auto It = Solver.getTrackedRetVals().find(F);
+ assert(It != Solver.getTrackedRetVals().end() &&
+ "Return value ought to be tracked");
+ if (SCCPSolver::isOverdefined(It->second))
+ continue;
+ }
+ for (User *U : F->users()) {
+ if (auto *CS = dyn_cast<CallBase>(U)) {
+ //The user instruction does not call our function.
+ if (CS->getCalledFunction() != F)
+ continue;
+ Solver.resetLatticeValueFor(CS);
+ }
+ }
+ }
+
+ // Rerun the solver to notify the users of the modified callsites.
+ Solver.solveWhileResolvedUndefs();
+
+ for (Function *F : OriginalFuncs)
+ if (FunctionMetrics[F].isRecursive)
+ promoteConstantStackValues(F);
- NumFuncSpecialized += NbFunctionsSpecialized;
return true;
}
@@ -373,24 +704,6 @@ void FunctionSpecializer::removeDeadFunctions() {
FullySpecialized.clear();
}
-// Compute the code metrics for function \p F.
-CodeMetrics &FunctionSpecializer::analyzeFunction(Function *F) {
- auto I = FunctionMetrics.insert({F, CodeMetrics()});
- CodeMetrics &Metrics = I.first->second;
- if (I.second) {
- // The code metrics were not cached.
- SmallPtrSet<const Value *, 32> EphValues;
- CodeMetrics::collectEphemeralValues(F, &(GetAC)(*F), EphValues);
- for (BasicBlock &BB : *F)
- Metrics.analyzeBasicBlock(&BB, (GetTTI)(*F), EphValues);
-
- LLVM_DEBUG(dbgs() << "FnSpecialization: Code size of function "
- << F->getName() << " is " << Metrics.NumInsts
- << " instructions\n");
- }
- return Metrics;
-}
-
/// Clone the function \p F and remove the ssa_copy intrinsics added by
/// the SCCPSolver in the cloned version.
static Function *cloneCandidateFunction(Function *F) {
@@ -400,13 +713,13 @@ static Function *cloneCandidateFunction(Function *F) {
return Clone;
}
-bool FunctionSpecializer::findSpecializations(Function *F, InstructionCost Cost,
+bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
SmallVectorImpl<Spec> &AllSpecs,
SpecMap &SM) {
// A mapping from a specialisation signature to the index of the respective
// entry in the all specialisation array. Used to ensure uniqueness of
// specialisations.
- DenseMap<SpecSig, unsigned> UM;
+ DenseMap<SpecSig, unsigned> UniqueSpecs;
// Get a list of interesting arguments.
SmallVector<Argument *> Args;
@@ -417,7 +730,6 @@ bool FunctionSpecializer::findSpecializations(Function *F, InstructionCost Cost,
if (Args.empty())
return false;
- bool Found = false;
for (User *U : F->users()) {
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
continue;
@@ -454,7 +766,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, InstructionCost Cost,
continue;
// Check if we have encountered the same specialisation already.
- if (auto It = UM.find(S); It != UM.end()) {
+ if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) {
// Existing specialisation. Add the call to the list to rewrite, unless
// it's a recursive call. A specialisation, generated because of a
// recursive call may end up as not the best specialisation for all
@@ -467,42 +779,42 @@ bool FunctionSpecializer::findSpecializations(Function *F, InstructionCost Cost,
AllSpecs[Index].CallSites.push_back(&CS);
} else {
// Calculate the specialisation gain.
- InstructionCost Gain = 0 - Cost;
+ Cost Score = 0;
+ InstCostVisitor Visitor = getInstCostVisitorFor(F);
for (ArgInfo &A : S.Args)
- Gain +=
- getSpecializationBonus(A.Formal, A.Actual, Solver.getLoopInfo(*F));
+ Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
+ Score += Visitor.getBonusFromPendingPHIs();
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization score = "
+ << Score << "\n");
// Discard unprofitable specialisations.
- if (!ForceFunctionSpecialization && Gain <= 0)
+ if (!ForceSpecialization && Score <= SpecCost)
continue;
// Create a new specialisation entry.
- auto &Spec = AllSpecs.emplace_back(F, S, Gain);
+ auto &Spec = AllSpecs.emplace_back(F, S, Score);
if (CS.getFunction() != F)
Spec.CallSites.push_back(&CS);
const unsigned Index = AllSpecs.size() - 1;
- UM[S] = Index;
+ UniqueSpecs[S] = Index;
if (auto [It, Inserted] = SM.try_emplace(F, Index, Index + 1); !Inserted)
It->second.second = Index + 1;
- Found = true;
}
}
- return Found;
+ return !UniqueSpecs.empty();
}
bool FunctionSpecializer::isCandidateFunction(Function *F) {
- if (F->isDeclaration())
+ if (F->isDeclaration() || F->arg_empty())
return false;
if (F->hasFnAttribute(Attribute::NoDuplicate))
return false;
- if (!Solver.isArgumentTrackedFunction(F))
- return false;
-
// Do not specialize the cloned function again.
- if (SpecializedFuncs.contains(F))
+ if (Specializations.contains(F))
return false;
// If we're optimizing the function for size, we shouldn't specialize it.
@@ -524,86 +836,50 @@ bool FunctionSpecializer::isCandidateFunction(Function *F) {
return true;
}
-Function *FunctionSpecializer::createSpecialization(Function *F, const SpecSig &S) {
+Function *FunctionSpecializer::createSpecialization(Function *F,
+ const SpecSig &S) {
Function *Clone = cloneCandidateFunction(F);
+ // The original function does not neccessarily have internal linkage, but the
+ // clone must.
+ Clone->setLinkage(GlobalValue::InternalLinkage);
+
// Initialize the lattice state of the arguments of the function clone,
// marking the argument on which we specialized the function constant
// with the given value.
- Solver.markArgInFuncSpecialization(Clone, S.Args);
-
- Solver.addArgumentTrackedFunction(Clone);
+ Solver.setLatticeValueForSpecializationArguments(Clone, S.Args);
Solver.markBlockExecutable(&Clone->front());
+ Solver.addArgumentTrackedFunction(Clone);
+ Solver.addTrackedFunction(Clone);
// Mark all the specialized functions
- SpecializedFuncs.insert(Clone);
- NbFunctionsSpecialized++;
+ Specializations.insert(Clone);
+ ++NumSpecsCreated;
return Clone;
}
-/// Compute and return the cost of specializing function \p F.
-InstructionCost FunctionSpecializer::getSpecializationCost(Function *F) {
- CodeMetrics &Metrics = analyzeFunction(F);
- // If the code metrics reveal that we shouldn't duplicate the function, we
- // shouldn't specialize it. Set the specialization cost to Invalid.
- // Or if the lines of codes implies that this function is easy to get
- // inlined so that we shouldn't specialize it.
- if (Metrics.notDuplicatable || !Metrics.NumInsts.isValid() ||
- (!ForceFunctionSpecialization &&
- !F->hasFnAttribute(Attribute::NoInline) &&
- Metrics.NumInsts < SmallFunctionThreshold))
- return InstructionCost::getInvalid();
-
- // Otherwise, set the specialization cost to be the cost of all the
- // instructions in the function.
- return Metrics.NumInsts * InlineConstants::getInstrCost();
-}
-
-static InstructionCost getUserBonus(User *U, llvm::TargetTransformInfo &TTI,
- const LoopInfo &LI) {
- auto *I = dyn_cast_or_null<Instruction>(U);
- // If not an instruction we do not know how to evaluate.
- // Keep minimum possible cost for now so that it doesnt affect
- // specialization.
- if (!I)
- return std::numeric_limits<unsigned>::min();
-
- InstructionCost Cost =
- TTI.getInstructionCost(U, TargetTransformInfo::TCK_SizeAndLatency);
-
- // Increase the cost if it is inside the loop.
- unsigned LoopDepth = LI.getLoopDepth(I->getParent());
- Cost *= std::pow((double)AvgLoopIterationCount, LoopDepth);
-
- // Traverse recursively if there are more uses.
- // TODO: Any other instructions to be added here?
- if (I->mayReadFromMemory() || I->isCast())
- for (auto *User : I->users())
- Cost += getUserBonus(User, TTI, LI);
-
- return Cost;
-}
-
/// Compute a bonus for replacing argument \p A with constant \p C.
-InstructionCost
-FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
- const LoopInfo &LI) {
- Function *F = A->getParent();
- auto &TTI = (GetTTI)(*F);
+Cost FunctionSpecializer::getSpecializationBonus(Argument *A, Constant *C,
+ InstCostVisitor &Visitor) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
<< C->getNameOrAsOperand() << "\n");
- InstructionCost TotalCost = 0;
- for (auto *U : A->users()) {
- TotalCost += getUserBonus(U, TTI, LI);
- LLVM_DEBUG(dbgs() << "FnSpecialization: User cost ";
- TotalCost.print(dbgs()); dbgs() << " for: " << *U << "\n");
- }
+ Cost TotalCost = 0;
+ for (auto *U : A->users())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ if (Solver.isBlockExecutable(UI->getParent()))
+ TotalCost += Visitor.getUserBonus(UI, A, C);
+
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated user bonus "
+ << TotalCost << " for argument " << *A << "\n");
// The below heuristic is only concerned with exposing inlining
// opportunities via indirect call promotion. If the argument is not a
// (potentially casted) function pointer, give up.
+ //
+ // TODO: Perhaps we should consider checking such inlining opportunities
+ // while traversing the users of the specialization arguments ?
Function *CalledFunction = dyn_cast<Function>(C->stripPointerCasts());
if (!CalledFunction)
return TotalCost;
@@ -661,16 +937,9 @@ bool FunctionSpecializer::isArgumentInteresting(Argument *A) {
if (A->user_empty())
return false;
- // For now, don't attempt to specialize functions based on the values of
- // composite types.
- Type *ArgTy = A->getType();
- if (!ArgTy->isSingleValueType())
- return false;
-
- // Specialization of integer and floating point types needs to be explicitly
- // enabled.
- if (!EnableSpecializationForLiteralConstant &&
- (ArgTy->isIntegerTy() || ArgTy->isFloatingPointTy()))
+ Type *Ty = A->getType();
+ if (!Ty->isPointerTy() && (!SpecializeLiteralConstant ||
+ (!Ty->isIntegerTy() && !Ty->isFloatingPointTy() && !Ty->isStructTy())))
return false;
// SCCP solver does not record an argument that will be constructed on
@@ -678,54 +947,46 @@ bool FunctionSpecializer::isArgumentInteresting(Argument *A) {
if (A->hasByValAttr() && !A->getParent()->onlyReadsMemory())
return false;
+ // For non-argument-tracked functions every argument is overdefined.
+ if (!Solver.isArgumentTrackedFunction(A->getParent()))
+ return true;
+
// Check the lattice value and decide if we should attemt to specialize,
// based on this argument. No point in specialization, if the lattice value
// is already a constant.
- const ValueLatticeElement &LV = Solver.getLatticeValueFor(A);
- if (LV.isUnknownOrUndef() || LV.isConstant() ||
- (LV.isConstantRange() && LV.getConstantRange().isSingleElement())) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Nothing to do, parameter "
- << A->getNameOrAsOperand() << " is already constant\n");
- return false;
- }
-
- LLVM_DEBUG(dbgs() << "FnSpecialization: Found interesting parameter "
- << A->getNameOrAsOperand() << "\n");
-
- return true;
+ bool IsOverdefined = Ty->isStructTy()
+ ? any_of(Solver.getStructLatticeValueFor(A), SCCPSolver::isOverdefined)
+ : SCCPSolver::isOverdefined(Solver.getLatticeValueFor(A));
+
+ LLVM_DEBUG(
+ if (IsOverdefined)
+ dbgs() << "FnSpecialization: Found interesting parameter "
+ << A->getNameOrAsOperand() << "\n";
+ else
+ dbgs() << "FnSpecialization: Nothing to do, parameter "
+ << A->getNameOrAsOperand() << " is already constant\n";
+ );
+ return IsOverdefined;
}
-/// Check if the valuy \p V (an actual argument) is a constant or can only
+/// Check if the value \p V (an actual argument) is a constant or can only
/// have a constant value. Return that constant.
Constant *FunctionSpecializer::getCandidateConstant(Value *V) {
if (isa<PoisonValue>(V))
return nullptr;
- // TrackValueOfGlobalVariable only tracks scalar global variables.
- if (auto *GV = dyn_cast<GlobalVariable>(V)) {
- // Check if we want to specialize on the address of non-constant
- // global values.
- if (!GV->isConstant() && !SpecializeOnAddresses)
- return nullptr;
-
- if (!GV->getValueType()->isSingleValueType())
- return nullptr;
- }
-
// Select for possible specialisation values that are constants or
// are deduced to be constants or constant ranges with a single element.
Constant *C = dyn_cast<Constant>(V);
- if (!C) {
- const ValueLatticeElement &LV = Solver.getLatticeValueFor(V);
- if (LV.isConstant())
- C = LV.getConstant();
- else if (LV.isConstantRange() && LV.getConstantRange().isSingleElement()) {
- assert(V->getType()->isIntegerTy() && "Non-integral constant range");
- C = Constant::getIntegerValue(V->getType(),
- *LV.getConstantRange().getSingleElement());
- } else
+ if (!C)
+ C = Solver.getConstantOrNull(V);
+
+ // Don't specialize on (anything derived from) the address of a non-constant
+ // global variable, unless explicitly enabled.
+ if (C && C->getType()->isPointerTy() && !C->isNullValue())
+ if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(C));
+ GV && !(GV->isConstant() || SpecializeOnAddress))
return nullptr;
- }
return C;
}
@@ -747,7 +1008,7 @@ void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin,
// Find the best matching specialisation.
const Spec *BestSpec = nullptr;
for (const Spec &S : make_range(Begin, End)) {
- if (!S.Clone || (BestSpec && S.Gain <= BestSpec->Gain))
+ if (!S.Clone || (BestSpec && S.Score <= BestSpec->Score))
continue;
if (any_of(S.Sig.Args, [CS, this](const ArgInfo &Arg) {
@@ -772,7 +1033,7 @@ void FunctionSpecializer::updateCallSites(Function *F, const Spec *Begin,
// If the function has been completely specialized, the original function
// is no longer needed. Mark it unreachable.
- if (NCallsLeft == 0) {
+ if (NCallsLeft == 0 && Solver.isArgumentTrackedFunction(F)) {
Solver.markFunctionUnreachable(F);
FullySpecialized.insert(F);
}
diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index 2f2bb174a8c8..e36d524d7667 100644
--- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -21,8 +21,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CtorUtils.h"
@@ -42,47 +40,6 @@ STATISTIC(NumIFuncs, "Number of indirect functions removed");
STATISTIC(NumVariables, "Number of global variables removed");
STATISTIC(NumVFuncs, "Number of virtual functions removed");
-namespace {
- class GlobalDCELegacyPass : public ModulePass {
- public:
- static char ID; // Pass identification, replacement for typeid
- GlobalDCELegacyPass() : ModulePass(ID) {
- initializeGlobalDCELegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- // run - Do the GlobalDCE pass on the specified module, optionally updating
- // the specified callgraph to reflect the changes.
- //
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
-
- // We need a minimally functional dummy module analysis manager. It needs
- // to at least know about the possibility of proxying a function analysis
- // manager.
- FunctionAnalysisManager DummyFAM;
- ModuleAnalysisManager DummyMAM;
- DummyMAM.registerPass(
- [&] { return FunctionAnalysisManagerModuleProxy(DummyFAM); });
-
- auto PA = Impl.run(M, DummyMAM);
- return !PA.areAllPreserved();
- }
-
- private:
- GlobalDCEPass Impl;
- };
-}
-
-char GlobalDCELegacyPass::ID = 0;
-INITIALIZE_PASS(GlobalDCELegacyPass, "globaldce",
- "Dead Global Elimination", false, false)
-
-// Public interface to the GlobalDCEPass.
-ModulePass *llvm::createGlobalDCEPass() {
- return new GlobalDCELegacyPass();
-}
-
/// Returns true if F is effectively empty.
static bool isEmptyFunction(Function *F) {
// Skip external functions.
@@ -163,12 +120,6 @@ void GlobalDCEPass::ScanVTables(Module &M) {
SmallVector<MDNode *, 2> Types;
LLVM_DEBUG(dbgs() << "Building type info -> vtable map\n");
- auto *LTOPostLinkMD =
- cast_or_null<ConstantAsMetadata>(M.getModuleFlag("LTOPostLink"));
- bool LTOPostLink =
- LTOPostLinkMD &&
- (cast<ConstantInt>(LTOPostLinkMD->getValue())->getZExtValue() != 0);
-
for (GlobalVariable &GV : M.globals()) {
Types.clear();
GV.getMetadata(LLVMContext::MD_type, Types);
@@ -195,7 +146,7 @@ void GlobalDCEPass::ScanVTables(Module &M) {
if (auto GO = dyn_cast<GlobalObject>(&GV)) {
GlobalObject::VCallVisibility TypeVis = GO->getVCallVisibility();
if (TypeVis == GlobalObject::VCallVisibilityTranslationUnit ||
- (LTOPostLink &&
+ (InLTOPostLink &&
TypeVis == GlobalObject::VCallVisibilityLinkageUnit)) {
LLVM_DEBUG(dbgs() << GV.getName() << " is safe for VFE\n");
VFESafeVTables.insert(&GV);
@@ -236,29 +187,36 @@ void GlobalDCEPass::ScanTypeCheckedLoadIntrinsics(Module &M) {
LLVM_DEBUG(dbgs() << "Scanning type.checked.load intrinsics\n");
Function *TypeCheckedLoadFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
-
- if (!TypeCheckedLoadFunc)
- return;
-
- for (auto *U : TypeCheckedLoadFunc->users()) {
- auto CI = dyn_cast<CallInst>(U);
- if (!CI)
- continue;
-
- auto *Offset = dyn_cast<ConstantInt>(CI->getArgOperand(1));
- Value *TypeIdValue = CI->getArgOperand(2);
- auto *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
-
- if (Offset) {
- ScanVTableLoad(CI->getFunction(), TypeId, Offset->getZExtValue());
- } else {
- // type.checked.load with a non-constant offset, so assume every entry in
- // every matching vtable is used.
- for (const auto &VTableInfo : TypeIdMap[TypeId]) {
- VFESafeVTables.erase(VTableInfo.first);
+ Function *TypeCheckedLoadRelativeFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load_relative));
+
+ auto scan = [&](Function *CheckedLoadFunc) {
+ if (!CheckedLoadFunc)
+ return;
+
+ for (auto *U : CheckedLoadFunc->users()) {
+ auto CI = dyn_cast<CallInst>(U);
+ if (!CI)
+ continue;
+
+ auto *Offset = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ Value *TypeIdValue = CI->getArgOperand(2);
+ auto *TypeId = cast<MetadataAsValue>(TypeIdValue)->getMetadata();
+
+ if (Offset) {
+ ScanVTableLoad(CI->getFunction(), TypeId, Offset->getZExtValue());
+ } else {
+ // type.checked.load with a non-constant offset, so assume every entry
+ // in every matching vtable is used.
+ for (const auto &VTableInfo : TypeIdMap[TypeId]) {
+ VFESafeVTables.erase(VTableInfo.first);
+ }
}
}
- }
+ };
+
+ scan(TypeCheckedLoadFunc);
+ scan(TypeCheckedLoadRelativeFunc);
}
void GlobalDCEPass::AddVirtualFunctionDependencies(Module &M) {
@@ -271,7 +229,7 @@ void GlobalDCEPass::AddVirtualFunctionDependencies(Module &M) {
// Don't attempt VFE in that case.
auto *Val = mdconst::dyn_extract_or_null<ConstantInt>(
M.getModuleFlag("Virtual Function Elim"));
- if (!Val || Val->getZExtValue() == 0)
+ if (!Val || Val->isZero())
return;
ScanVTables(M);
@@ -458,3 +416,11 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &MAM) {
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
+
+void GlobalDCEPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<GlobalDCEPass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ if (InLTOPostLink)
+ OS << "<vfe-linkage-unit-visibility>";
+}
diff --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 0317a8bcb6bc..1ccc523ead8a 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -53,8 +53,6 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -206,8 +204,10 @@ CleanupPointerRootUsers(GlobalVariable *GV,
// chain of computation and the store to the global in Dead[n].second.
SmallVector<std::pair<Instruction *, Instruction *>, 32> Dead;
+ SmallVector<User *> Worklist(GV->users());
// Constants can't be pointers to dynamically allocated memory.
- for (User *U : llvm::make_early_inc_range(GV->users())) {
+ while (!Worklist.empty()) {
+ User *U = Worklist.pop_back_val();
if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
Value *V = SI->getValueOperand();
if (isa<Constant>(V)) {
@@ -235,18 +235,8 @@ CleanupPointerRootUsers(GlobalVariable *GV,
Dead.push_back(std::make_pair(I, MTI));
}
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
- if (CE->use_empty()) {
- CE->destroyConstant();
- Changed = true;
- }
- } else if (Constant *C = dyn_cast<Constant>(U)) {
- if (isSafeToDestroyConstant(C)) {
- C->destroyConstant();
- // This could have invalidated UI, start over from scratch.
- Dead.clear();
- CleanupPointerRootUsers(GV, GetTLI);
- return true;
- }
+ if (isa<GEPOperator>(CE))
+ append_range(Worklist, CE->users());
}
}
@@ -268,6 +258,7 @@ CleanupPointerRootUsers(GlobalVariable *GV,
}
}
+ GV->removeDeadConstantUsers();
return Changed;
}
@@ -335,10 +326,19 @@ static bool CleanupConstantGlobalUsers(GlobalVariable *GV,
return Changed;
}
+/// Part of the global at a specific offset, which is only accessed through
+/// loads and stores with the given type.
+struct GlobalPart {
+ Type *Ty;
+ Constant *Initializer = nullptr;
+ bool IsLoaded = false;
+ bool IsStored = false;
+};
+
/// Look at all uses of the global and determine which (offset, type) pairs it
/// can be split into.
-static bool collectSRATypes(DenseMap<uint64_t, Type *> &Types, GlobalValue *GV,
- const DataLayout &DL) {
+static bool collectSRATypes(DenseMap<uint64_t, GlobalPart> &Parts,
+ GlobalVariable *GV, const DataLayout &DL) {
SmallVector<Use *, 16> Worklist;
SmallPtrSet<Use *, 16> Visited;
auto AppendUses = [&](Value *V) {
@@ -373,14 +373,41 @@ static bool collectSRATypes(DenseMap<uint64_t, Type *> &Types, GlobalValue *GV,
// TODO: We currently require that all accesses at a given offset must
// use the same type. This could be relaxed.
Type *Ty = getLoadStoreType(V);
- auto It = Types.try_emplace(Offset.getZExtValue(), Ty).first;
- if (Ty != It->second)
+ const auto &[It, Inserted] =
+ Parts.try_emplace(Offset.getZExtValue(), GlobalPart{Ty});
+ if (Ty != It->second.Ty)
return false;
+ if (Inserted) {
+ It->second.Initializer =
+ ConstantFoldLoadFromConst(GV->getInitializer(), Ty, Offset, DL);
+ if (!It->second.Initializer) {
+ LLVM_DEBUG(dbgs() << "Global SRA: Failed to evaluate initializer of "
+ << *GV << " with type " << *Ty << " at offset "
+ << Offset.getZExtValue());
+ return false;
+ }
+ }
+
// Scalable types not currently supported.
if (isa<ScalableVectorType>(Ty))
return false;
+ auto IsStored = [](Value *V, Constant *Initializer) {
+ auto *SI = dyn_cast<StoreInst>(V);
+ if (!SI)
+ return false;
+
+ Constant *StoredConst = dyn_cast<Constant>(SI->getOperand(0));
+ if (!StoredConst)
+ return true;
+
+ // Don't consider stores that only write the initializer value.
+ return Initializer != StoredConst;
+ };
+
+ It->second.IsLoaded |= isa<LoadInst>(V);
+ It->second.IsStored |= IsStored(V, It->second.Initializer);
continue;
}
@@ -410,6 +437,7 @@ static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
DIExpression *Expr = GVE->getExpression();
int64_t CurVarOffsetInBytes = 0;
uint64_t CurVarOffsetInBits = 0;
+ uint64_t FragmentEndInBits = FragmentOffsetInBits + FragmentSizeInBits;
// Calculate the offset (Bytes), Continue if unknown.
if (!Expr->extractIfOffset(CurVarOffsetInBytes))
@@ -423,27 +451,50 @@ static void transferSRADebugInfo(GlobalVariable *GV, GlobalVariable *NGV,
CurVarOffsetInBits = CHAR_BIT * (uint64_t)CurVarOffsetInBytes;
// Current var starts after the fragment, ignore.
- if (CurVarOffsetInBits >= (FragmentOffsetInBits + FragmentSizeInBits))
+ if (CurVarOffsetInBits >= FragmentEndInBits)
continue;
uint64_t CurVarSize = Var->getType()->getSizeInBits();
+ uint64_t CurVarEndInBits = CurVarOffsetInBits + CurVarSize;
// Current variable ends before start of fragment, ignore.
- if (CurVarSize != 0 &&
- (CurVarOffsetInBits + CurVarSize) <= FragmentOffsetInBits)
+ if (CurVarSize != 0 && /* CurVarSize is known */
+ CurVarEndInBits <= FragmentOffsetInBits)
continue;
- // Current variable fits in the fragment.
- if (CurVarOffsetInBits == FragmentOffsetInBits &&
- CurVarSize == FragmentSizeInBits)
- Expr = DIExpression::get(Expr->getContext(), {});
- // If the FragmentSize is smaller than the variable,
+ // Current variable fits in (not greater than) the fragment,
+ // does not need fragment expression.
+ if (CurVarSize != 0 && /* CurVarSize is known */
+ CurVarOffsetInBits >= FragmentOffsetInBits &&
+ CurVarEndInBits <= FragmentEndInBits) {
+ uint64_t CurVarOffsetInFragment =
+ (CurVarOffsetInBits - FragmentOffsetInBits) / 8;
+ if (CurVarOffsetInFragment != 0)
+ Expr = DIExpression::get(Expr->getContext(), {dwarf::DW_OP_plus_uconst,
+ CurVarOffsetInFragment});
+ else
+ Expr = DIExpression::get(Expr->getContext(), {});
+ auto *NGVE =
+ DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
+ NGV->addDebugInfo(NGVE);
+ continue;
+ }
+ // Current variable does not fit in single fragment,
// emit a fragment expression.
- else if (FragmentSizeInBits < VarSize) {
+ if (FragmentSizeInBits < VarSize) {
+ if (CurVarOffsetInBits > FragmentOffsetInBits)
+ continue;
+ uint64_t CurVarFragmentOffsetInBits =
+ FragmentOffsetInBits - CurVarOffsetInBits;
+ uint64_t CurVarFragmentSizeInBits = FragmentSizeInBits;
+ if (CurVarSize != 0 && CurVarEndInBits < FragmentEndInBits)
+ CurVarFragmentSizeInBits -= (FragmentEndInBits - CurVarEndInBits);
+ if (CurVarOffsetInBits)
+ Expr = DIExpression::get(Expr->getContext(), {});
if (auto E = DIExpression::createFragmentExpression(
- Expr, FragmentOffsetInBits, FragmentSizeInBits))
+ Expr, CurVarFragmentOffsetInBits, CurVarFragmentSizeInBits))
Expr = *E;
else
- return;
+ continue;
}
auto *NGVE = DIGlobalVariableExpression::get(GVE->getContext(), Var, Expr);
NGV->addDebugInfo(NGVE);
@@ -459,52 +510,45 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
assert(GV->hasLocalLinkage());
// Collect types to split into.
- DenseMap<uint64_t, Type *> Types;
- if (!collectSRATypes(Types, GV, DL) || Types.empty())
+ DenseMap<uint64_t, GlobalPart> Parts;
+ if (!collectSRATypes(Parts, GV, DL) || Parts.empty())
return nullptr;
// Make sure we don't SRA back to the same type.
- if (Types.size() == 1 && Types.begin()->second == GV->getValueType())
+ if (Parts.size() == 1 && Parts.begin()->second.Ty == GV->getValueType())
return nullptr;
- // Don't perform SRA if we would have to split into many globals.
- if (Types.size() > 16)
+ // Don't perform SRA if we would have to split into many globals. Ignore
+ // parts that are either only loaded or only stored, because we expect them
+ // to be optimized away.
+ unsigned NumParts = count_if(Parts, [](const auto &Pair) {
+ return Pair.second.IsLoaded && Pair.second.IsStored;
+ });
+ if (NumParts > 16)
return nullptr;
// Sort by offset.
- SmallVector<std::pair<uint64_t, Type *>, 16> TypesVector;
- append_range(TypesVector, Types);
+ SmallVector<std::tuple<uint64_t, Type *, Constant *>, 16> TypesVector;
+ for (const auto &Pair : Parts) {
+ TypesVector.push_back(
+ {Pair.first, Pair.second.Ty, Pair.second.Initializer});
+ }
sort(TypesVector, llvm::less_first());
// Check that the types are non-overlapping.
uint64_t Offset = 0;
- for (const auto &Pair : TypesVector) {
+ for (const auto &[OffsetForTy, Ty, _] : TypesVector) {
// Overlaps with previous type.
- if (Pair.first < Offset)
+ if (OffsetForTy < Offset)
return nullptr;
- Offset = Pair.first + DL.getTypeAllocSize(Pair.second);
+ Offset = OffsetForTy + DL.getTypeAllocSize(Ty);
}
// Some accesses go beyond the end of the global, don't bother.
if (Offset > DL.getTypeAllocSize(GV->getValueType()))
return nullptr;
- // Collect initializers for new globals.
- Constant *OrigInit = GV->getInitializer();
- DenseMap<uint64_t, Constant *> Initializers;
- for (const auto &Pair : Types) {
- Constant *NewInit = ConstantFoldLoadFromConst(OrigInit, Pair.second,
- APInt(64, Pair.first), DL);
- if (!NewInit) {
- LLVM_DEBUG(dbgs() << "Global SRA: Failed to evaluate initializer of "
- << *GV << " with type " << *Pair.second << " at offset "
- << Pair.first << "\n");
- return nullptr;
- }
- Initializers.insert({Pair.first, NewInit});
- }
-
LLVM_DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV << "\n");
// Get the alignment of the global, either explicit or target-specific.
@@ -515,26 +559,24 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
// Create replacement globals.
DenseMap<uint64_t, GlobalVariable *> NewGlobals;
unsigned NameSuffix = 0;
- for (auto &Pair : TypesVector) {
- uint64_t Offset = Pair.first;
- Type *Ty = Pair.second;
+ for (auto &[OffsetForTy, Ty, Initializer] : TypesVector) {
GlobalVariable *NGV = new GlobalVariable(
*GV->getParent(), Ty, false, GlobalVariable::InternalLinkage,
- Initializers[Offset], GV->getName() + "." + Twine(NameSuffix++), GV,
+ Initializer, GV->getName() + "." + Twine(NameSuffix++), GV,
GV->getThreadLocalMode(), GV->getAddressSpace());
NGV->copyAttributesFrom(GV);
- NewGlobals.insert({Offset, NGV});
+ NewGlobals.insert({OffsetForTy, NGV});
// Calculate the known alignment of the field. If the original aggregate
// had 256 byte alignment for example, something might depend on that:
// propagate info to each field.
- Align NewAlign = commonAlignment(StartAlignment, Offset);
+ Align NewAlign = commonAlignment(StartAlignment, OffsetForTy);
if (NewAlign > DL.getABITypeAlign(Ty))
NGV->setAlignment(NewAlign);
// Copy over the debug info for the variable.
- transferSRADebugInfo(GV, NGV, Offset * 8, DL.getTypeAllocSizeInBits(Ty),
- VarSize);
+ transferSRADebugInfo(GV, NGV, OffsetForTy * 8,
+ DL.getTypeAllocSizeInBits(Ty), VarSize);
}
// Replace uses of the original global with uses of the new global.
@@ -621,8 +663,9 @@ static bool AllUsesOfValueWillTrapIfNull(const Value *V,
if (II->getCalledOperand() != V) {
return false; // Not calling the ptr
}
- } else if (const BitCastInst *CI = dyn_cast<BitCastInst>(U)) {
- if (!AllUsesOfValueWillTrapIfNull(CI, PHIs)) return false;
+ } else if (const AddrSpaceCastInst *CI = dyn_cast<AddrSpaceCastInst>(U)) {
+ if (!AllUsesOfValueWillTrapIfNull(CI, PHIs))
+ return false;
} else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
if (!AllUsesOfValueWillTrapIfNull(GEPI, PHIs)) return false;
} else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
@@ -735,10 +778,9 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
UI = V->user_begin();
}
}
- } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
- Changed |= OptimizeAwayTrappingUsesOfValue(CI,
- ConstantExpr::getCast(CI->getOpcode(),
- NewV, CI->getType()));
+ } else if (AddrSpaceCastInst *CI = dyn_cast<AddrSpaceCastInst>(I)) {
+ Changed |= OptimizeAwayTrappingUsesOfValue(
+ CI, ConstantExpr::getAddrSpaceCast(NewV, CI->getType()));
if (CI->use_empty()) {
Changed = true;
CI->eraseFromParent();
@@ -803,7 +845,8 @@ static bool OptimizeAwayTrappingUsesOfLoads(
assert((isa<PHINode>(GlobalUser) || isa<SelectInst>(GlobalUser) ||
isa<ConstantExpr>(GlobalUser) || isa<CmpInst>(GlobalUser) ||
isa<BitCastInst>(GlobalUser) ||
- isa<GetElementPtrInst>(GlobalUser)) &&
+ isa<GetElementPtrInst>(GlobalUser) ||
+ isa<AddrSpaceCastInst>(GlobalUser)) &&
"Only expect load and stores!");
}
}
@@ -976,7 +1019,7 @@ OptimizeGlobalAddressOfAllocation(GlobalVariable *GV, CallInst *CI,
cast<StoreInst>(InitBool->user_back())->eraseFromParent();
delete InitBool;
} else
- GV->getParent()->getGlobalList().insert(GV->getIterator(), InitBool);
+ GV->getParent()->insertGlobalVariable(GV->getIterator(), InitBool);
// Now the GV is dead, nuke it and the allocation..
GV->eraseFromParent();
@@ -1103,9 +1146,6 @@ optimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
nullptr /* F */,
GV->getInitializer()->getType()->getPointerAddressSpace())) {
if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
- if (GV->getInitializer()->getType() != SOVC->getType())
- SOVC = ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
-
// Optimize away any trapping uses of the loaded value.
if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, DL, GetTLI))
return true;
@@ -1158,7 +1198,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
GV->getThreadLocalMode(),
GV->getType()->getAddressSpace());
NewGV->copyAttributesFrom(GV);
- GV->getParent()->getGlobalList().insert(GV->getIterator(), NewGV);
+ GV->getParent()->insertGlobalVariable(GV->getIterator(), NewGV);
Constant *InitVal = GV->getInitializer();
assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
@@ -1330,18 +1370,6 @@ static bool isPointerValueDeadOnEntryToFunction(
SmallVector<LoadInst *, 4> Loads;
SmallVector<StoreInst *, 4> Stores;
for (auto *U : GV->users()) {
- if (Operator::getOpcode(U) == Instruction::BitCast) {
- for (auto *UU : U->users()) {
- if (auto *LI = dyn_cast<LoadInst>(UU))
- Loads.push_back(LI);
- else if (auto *SI = dyn_cast<StoreInst>(UU))
- Stores.push_back(SI);
- else
- return false;
- }
- continue;
- }
-
Instruction *I = dyn_cast<Instruction>(U);
if (!I)
return false;
@@ -1391,62 +1419,6 @@ static bool isPointerValueDeadOnEntryToFunction(
return true;
}
-/// C may have non-instruction users. Can all of those users be turned into
-/// instructions?
-static bool allNonInstructionUsersCanBeMadeInstructions(Constant *C) {
- // We don't do this exhaustively. The most common pattern that we really need
- // to care about is a constant GEP or constant bitcast - so just looking
- // through one single ConstantExpr.
- //
- // The set of constants that this function returns true for must be able to be
- // handled by makeAllConstantUsesInstructions.
- for (auto *U : C->users()) {
- if (isa<Instruction>(U))
- continue;
- if (!isa<ConstantExpr>(U))
- // Non instruction, non-constantexpr user; cannot convert this.
- return false;
- for (auto *UU : U->users())
- if (!isa<Instruction>(UU))
- // A constantexpr used by another constant. We don't try and recurse any
- // further but just bail out at this point.
- return false;
- }
-
- return true;
-}
-
-/// C may have non-instruction users, and
-/// allNonInstructionUsersCanBeMadeInstructions has returned true. Convert the
-/// non-instruction users to instructions.
-static void makeAllConstantUsesInstructions(Constant *C) {
- SmallVector<ConstantExpr*,4> Users;
- for (auto *U : C->users()) {
- if (isa<ConstantExpr>(U))
- Users.push_back(cast<ConstantExpr>(U));
- else
- // We should never get here; allNonInstructionUsersCanBeMadeInstructions
- // should not have returned true for C.
- assert(
- isa<Instruction>(U) &&
- "Can't transform non-constantexpr non-instruction to instruction!");
- }
-
- SmallVector<Value*,4> UUsers;
- for (auto *U : Users) {
- UUsers.clear();
- append_range(UUsers, U->users());
- for (auto *UU : UUsers) {
- Instruction *UI = cast<Instruction>(UU);
- Instruction *NewU = U->getAsInstruction(UI);
- UI->replaceUsesOfWith(U, NewU);
- }
- // We've replaced all the uses, so destroy the constant. (destroyConstant
- // will update value handles and metadata.)
- U->destroyConstant();
- }
-}
-
// For a global variable with one store, if the store dominates any loads,
// those loads will always load the stored value (as opposed to the
// initializer), even in the presence of recursion.
@@ -1504,7 +1476,6 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
GV->getValueType()->isSingleValueType() &&
GV->getType()->getAddressSpace() == 0 &&
!GV->isExternallyInitialized() &&
- allNonInstructionUsersCanBeMadeInstructions(GV) &&
GS.AccessingFunction->doesNotRecurse() &&
isPointerValueDeadOnEntryToFunction(GS.AccessingFunction, GV,
LookupDomTree)) {
@@ -1520,8 +1491,6 @@ processInternalGlobal(GlobalVariable *GV, const GlobalStatus &GS,
if (!isa<UndefValue>(GV->getInitializer()))
new StoreInst(GV->getInitializer(), Alloca, &FirstI);
- makeAllConstantUsesInstructions(GV);
-
GV->replaceAllUsesWith(Alloca);
GV->eraseFromParent();
++NumLocalized;
@@ -2142,15 +2111,22 @@ static void setUsedInitializer(GlobalVariable &V,
return;
}
+ // Get address space of pointers in the array of pointers.
+ const Type *UsedArrayType = V.getValueType();
+ const auto *VAT = cast<ArrayType>(UsedArrayType);
+ const auto *VEPT = cast<PointerType>(VAT->getArrayElementType());
+
// Type of pointer to the array of pointers.
- PointerType *Int8PtrTy = Type::getInt8PtrTy(V.getContext(), 0);
+ PointerType *Int8PtrTy =
+ Type::getInt8PtrTy(V.getContext(), VEPT->getAddressSpace());
SmallVector<Constant *, 8> UsedArray;
for (GlobalValue *GV : Init) {
- Constant *Cast
- = ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
+ Constant *Cast =
+ ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, Int8PtrTy);
UsedArray.push_back(Cast);
}
+
// Sort to get deterministic order.
array_pod_sort(UsedArray.begin(), UsedArray.end(), compareNames);
ArrayType *ATy = ArrayType::get(Int8PtrTy, UsedArray.size());
@@ -2241,22 +2217,11 @@ static bool hasUseOtherThanLLVMUsed(GlobalAlias &GA, const LLVMUsed &U) {
return !U.usedCount(&GA) && !U.compilerUsedCount(&GA);
}
-static bool hasMoreThanOneUseOtherThanLLVMUsed(GlobalValue &V,
- const LLVMUsed &U) {
- unsigned N = 2;
- assert((!U.usedCount(&V) || !U.compilerUsedCount(&V)) &&
- "We should have removed the duplicated "
- "element from llvm.compiler.used");
- if (U.usedCount(&V) || U.compilerUsedCount(&V))
- ++N;
- return V.hasNUsesOrMore(N);
-}
-
-static bool mayHaveOtherReferences(GlobalAlias &GA, const LLVMUsed &U) {
- if (!GA.hasLocalLinkage())
+static bool mayHaveOtherReferences(GlobalValue &GV, const LLVMUsed &U) {
+ if (!GV.hasLocalLinkage())
return true;
- return U.usedCount(&GA) || U.compilerUsedCount(&GA);
+ return U.usedCount(&GV) || U.compilerUsedCount(&GV);
}
static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
@@ -2270,21 +2235,16 @@ static bool hasUsesToReplace(GlobalAlias &GA, const LLVMUsed &U,
if (!mayHaveOtherReferences(GA, U))
return Ret;
- // If the aliasee has internal linkage, give it the name and linkage
- // of the alias, and delete the alias. This turns:
+ // If the aliasee has internal linkage and no other references (e.g.,
+ // @llvm.used, @llvm.compiler.used), give it the name and linkage of the
+ // alias, and delete the alias. This turns:
// define internal ... @f(...)
// @a = alias ... @f
// into:
// define ... @a(...)
Constant *Aliasee = GA.getAliasee();
GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts());
- if (!Target->hasLocalLinkage())
- return Ret;
-
- // Do not perform the transform if multiple aliases potentially target the
- // aliasee. This check also ensures that it is safe to replace the section
- // and other attributes of the aliasee with those of the alias.
- if (hasMoreThanOneUseOtherThanLLVMUsed(*Target, U))
+ if (mayHaveOtherReferences(*Target, U))
return Ret;
RenameTarget = true;
@@ -2360,7 +2320,7 @@ OptimizeGlobalAliases(Module &M,
continue;
// Delete the alias.
- M.getAliasList().erase(&J);
+ M.eraseAlias(&J);
++NumAliasesRemoved;
Changed = true;
}
@@ -2562,65 +2522,3 @@ PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
PA.preserveSet<CFGAnalyses>();
return PA;
}
-
-namespace {
-
-struct GlobalOptLegacyPass : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
-
- GlobalOptLegacyPass() : ModulePass(ID) {
- initializeGlobalOptLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
-
- auto &DL = M.getDataLayout();
- auto LookupDomTree = [this](Function &F) -> DominatorTree & {
- return this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
- };
- auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
- return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- };
- auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
- return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- };
-
- auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
- return this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
- };
-
- auto ChangedCFGCallback = [&LookupDomTree](Function &F) {
- auto &DT = LookupDomTree(F);
- DT.recalculate(F);
- };
-
- return optimizeGlobalsInModule(M, DL, GetTLI, GetTTI, GetBFI, LookupDomTree,
- ChangedCFGCallback, nullptr);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- }
-};
-
-} // end anonymous namespace
-
-char GlobalOptLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(GlobalOptLegacyPass, "globalopt",
- "Global Variable Optimizer", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(GlobalOptLegacyPass, "globalopt",
- "Global Variable Optimizer", false, false)
-
-ModulePass *llvm::createGlobalOptimizerPass() {
- return new GlobalOptLegacyPass();
-}
diff --git a/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/llvm/lib/Transforms/IPO/GlobalSplit.cpp
index 7d9e6135b2eb..84e9c219f935 100644
--- a/llvm/lib/Transforms/IPO/GlobalSplit.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalSplit.cpp
@@ -29,8 +29,6 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Transforms/IPO.h"
#include <cstdint>
@@ -149,8 +147,12 @@ static bool splitGlobals(Module &M) {
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
Function *TypeCheckedLoadFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
+ Function *TypeCheckedLoadRelativeFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load_relative));
if ((!TypeTestFunc || TypeTestFunc->use_empty()) &&
- (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
+ (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
+ (!TypeCheckedLoadRelativeFunc ||
+ TypeCheckedLoadRelativeFunc->use_empty()))
return false;
bool Changed = false;
@@ -159,33 +161,6 @@ static bool splitGlobals(Module &M) {
return Changed;
}
-namespace {
-
-struct GlobalSplit : public ModulePass {
- static char ID;
-
- GlobalSplit() : ModulePass(ID) {
- initializeGlobalSplitPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
-
- return splitGlobals(M);
- }
-};
-
-} // end anonymous namespace
-
-char GlobalSplit::ID = 0;
-
-INITIALIZE_PASS(GlobalSplit, "globalsplit", "Global splitter", false, false)
-
-ModulePass *llvm::createGlobalSplitPass() {
- return new GlobalSplit;
-}
-
PreservedAnalyses GlobalSplitPass::run(Module &M, ModuleAnalysisManager &AM) {
if (!splitGlobals(M))
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
index 95e8ae0fd22f..599ace9ca79f 100644
--- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -46,8 +46,6 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -169,23 +167,6 @@ static bool markFunctionCold(Function &F, bool UpdateEntryCount = false) {
return Changed;
}
-class HotColdSplittingLegacyPass : public ModulePass {
-public:
- static char ID;
- HotColdSplittingLegacyPass() : ModulePass(ID) {
- initializeHotColdSplittingLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addUsedIfAvailable<AssumptionCacheTracker>();
- }
-
- bool runOnModule(Module &M) override;
-};
-
} // end anonymous namespace
/// Check whether \p F is inherently cold.
@@ -713,32 +694,6 @@ bool HotColdSplitting::run(Module &M) {
return Changed;
}
-bool HotColdSplittingLegacyPass::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
- ProfileSummaryInfo *PSI =
- &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto GTTI = [this](Function &F) -> TargetTransformInfo & {
- return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- };
- auto GBFI = [this](Function &F) {
- return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
- };
- std::unique_ptr<OptimizationRemarkEmitter> ORE;
- std::function<OptimizationRemarkEmitter &(Function &)> GetORE =
- [&ORE](Function &F) -> OptimizationRemarkEmitter & {
- ORE.reset(new OptimizationRemarkEmitter(&F));
- return *ORE;
- };
- auto LookupAC = [this](Function &F) -> AssumptionCache * {
- if (auto *ACT = getAnalysisIfAvailable<AssumptionCacheTracker>())
- return ACT->lookupAssumptionCache(F);
- return nullptr;
- };
-
- return HotColdSplitting(PSI, GBFI, GTTI, &GetORE, LookupAC).run(M);
-}
-
PreservedAnalyses
HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@@ -769,15 +724,3 @@ HotColdSplittingPass::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
-
-char HotColdSplittingLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(HotColdSplittingLegacyPass, "hotcoldsplit",
- "Hot Cold Splitting", false, false)
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
-INITIALIZE_PASS_END(HotColdSplittingLegacyPass, "hotcoldsplit",
- "Hot Cold Splitting", false, false)
-
-ModulePass *llvm::createHotColdSplittingPass() {
- return new HotColdSplittingLegacyPass();
-}
diff --git a/llvm/lib/Transforms/IPO/IPO.cpp b/llvm/lib/Transforms/IPO/IPO.cpp
index 4163c448dc8f..5ad1289277a7 100644
--- a/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/llvm/lib/Transforms/IPO/IPO.cpp
@@ -12,9 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm-c/Transforms/IPO.h"
-#include "llvm-c/Initialization.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
@@ -23,104 +20,10 @@
using namespace llvm;
void llvm::initializeIPO(PassRegistry &Registry) {
- initializeAnnotation2MetadataLegacyPass(Registry);
- initializeCalledValuePropagationLegacyPassPass(Registry);
- initializeConstantMergeLegacyPassPass(Registry);
- initializeCrossDSOCFIPass(Registry);
initializeDAEPass(Registry);
initializeDAHPass(Registry);
- initializeForceFunctionAttrsLegacyPassPass(Registry);
- initializeGlobalDCELegacyPassPass(Registry);
- initializeGlobalOptLegacyPassPass(Registry);
- initializeGlobalSplitPass(Registry);
- initializeHotColdSplittingLegacyPassPass(Registry);
- initializeIROutlinerLegacyPassPass(Registry);
initializeAlwaysInlinerLegacyPassPass(Registry);
- initializeSimpleInlinerPass(Registry);
- initializeInferFunctionAttrsLegacyPassPass(Registry);
- initializeInternalizeLegacyPassPass(Registry);
initializeLoopExtractorLegacyPassPass(Registry);
initializeSingleLoopExtractorPass(Registry);
- initializeMergeFunctionsLegacyPassPass(Registry);
- initializePartialInlinerLegacyPassPass(Registry);
- initializeAttributorLegacyPassPass(Registry);
- initializeAttributorCGSCCLegacyPassPass(Registry);
- initializePostOrderFunctionAttrsLegacyPassPass(Registry);
- initializeReversePostOrderFunctionAttrsLegacyPassPass(Registry);
- initializeIPSCCPLegacyPassPass(Registry);
- initializeStripDeadPrototypesLegacyPassPass(Registry);
- initializeStripSymbolsPass(Registry);
- initializeStripDebugDeclarePass(Registry);
- initializeStripDeadDebugInfoPass(Registry);
- initializeStripNonDebugSymbolsPass(Registry);
initializeBarrierNoopPass(Registry);
- initializeEliminateAvailableExternallyLegacyPassPass(Registry);
-}
-
-void LLVMInitializeIPO(LLVMPassRegistryRef R) {
- initializeIPO(*unwrap(R));
-}
-
-void LLVMAddCalledValuePropagationPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCalledValuePropagationPass());
-}
-
-void LLVMAddConstantMergePass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createConstantMergePass());
-}
-
-void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createDeadArgEliminationPass());
-}
-
-void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createPostOrderFunctionAttrsLegacyPass());
-}
-
-void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createFunctionInliningPass());
-}
-
-void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(llvm::createAlwaysInlinerLegacyPass());
-}
-
-void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createGlobalDCEPass());
-}
-
-void LLVMAddGlobalOptimizerPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createGlobalOptimizerPass());
-}
-
-void LLVMAddIPSCCPPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createIPSCCPPass());
-}
-
-void LLVMAddMergeFunctionsPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createMergeFunctionsPass());
-}
-
-void LLVMAddInternalizePass(LLVMPassManagerRef PM, unsigned AllButMain) {
- auto PreserveMain = [=](const GlobalValue &GV) {
- return AllButMain && GV.getName() == "main";
- };
- unwrap(PM)->add(createInternalizePass(PreserveMain));
-}
-
-void LLVMAddInternalizePassWithMustPreservePredicate(
- LLVMPassManagerRef PM,
- void *Context,
- LLVMBool (*Pred)(LLVMValueRef, void *)) {
- unwrap(PM)->add(createInternalizePass([=](const GlobalValue &GV) {
- return Pred(wrap(&GV), Context) == 0 ? false : true;
- }));
-}
-
-void LLVMAddStripDeadPrototypesPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createStripDeadPrototypesPass());
-}
-
-void LLVMAddStripSymbolsPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createStripSymbolsPass());
}
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index f5c52e5c7f5d..e258299c6a4c 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -22,8 +22,6 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Mangler.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
#include <optional>
@@ -179,10 +177,8 @@ static void getSortedConstantKeys(std::vector<Value *> &SortedKeys,
stable_sort(SortedKeys, [](const Value *LHS, const Value *RHS) {
assert(LHS && RHS && "Expected non void values.");
- const ConstantInt *LHSC = dyn_cast<ConstantInt>(LHS);
- const ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
- assert(RHSC && "Not a constant integer in return value?");
- assert(LHSC && "Not a constant integer in return value?");
+ const ConstantInt *LHSC = cast<ConstantInt>(LHS);
+ const ConstantInt *RHSC = cast<ConstantInt>(RHS);
return LHSC->getLimitedValue() < RHSC->getLimitedValue();
});
@@ -590,7 +586,7 @@ collectRegionsConstants(OutlinableRegion &Region,
// While this value is a register, it might not have been previously,
// make sure we don't already have a constant mapped to this global value
// number.
- if (GVNToConstant.find(GVN) != GVNToConstant.end())
+ if (GVNToConstant.contains(GVN))
ConstantsTheSame = false;
NotSame.insert(GVN);
@@ -818,7 +814,7 @@ static void mapInputsToGVNs(IRSimilarityCandidate &C,
// replacement.
for (Value *Input : CurrentInputs) {
assert(Input && "Have a nullptr as an input");
- if (OutputMappings.find(Input) != OutputMappings.end())
+ if (OutputMappings.contains(Input))
Input = OutputMappings.find(Input)->second;
assert(C.getGVN(Input) && "Could not find a numbering for the given input");
EndInputNumbers.push_back(*C.getGVN(Input));
@@ -840,7 +836,7 @@ remapExtractedInputs(const ArrayRef<Value *> ArgInputs,
// Get the global value number for each input that will be extracted as an
// argument by the code extractor, remapping if needed for reloaded values.
for (Value *Input : ArgInputs) {
- if (OutputMappings.find(Input) != OutputMappings.end())
+ if (OutputMappings.contains(Input))
Input = OutputMappings.find(Input)->second;
RemappedArgInputs.insert(Input);
}
@@ -1332,7 +1328,7 @@ findExtractedOutputToOverallOutputMapping(Module &M, OutlinableRegion &Region,
unsigned AggArgIdx = 0;
for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
- if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
+ if (!isa<PointerType>(Group.ArgumentTypes[Jdx]))
continue;
if (AggArgsUsed.contains(Jdx))
@@ -1483,8 +1479,7 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
}
// If it is a constant, we simply add it to the argument list as a value.
- if (Region.AggArgToConstant.find(AggArgIdx) !=
- Region.AggArgToConstant.end()) {
+ if (Region.AggArgToConstant.contains(AggArgIdx)) {
Constant *CST = Region.AggArgToConstant.find(AggArgIdx)->second;
LLVM_DEBUG(dbgs() << "Setting argument " << AggArgIdx << " to value "
<< *CST << "\n");
@@ -1818,8 +1813,7 @@ replaceArgumentUses(OutlinableRegion &Region,
for (unsigned ArgIdx = 0; ArgIdx < Region.ExtractedFunction->arg_size();
ArgIdx++) {
- assert(Region.ExtractedArgToAgg.find(ArgIdx) !=
- Region.ExtractedArgToAgg.end() &&
+ assert(Region.ExtractedArgToAgg.contains(ArgIdx) &&
"No mapping from extracted to outlined?");
unsigned AggArgIdx = Region.ExtractedArgToAgg.find(ArgIdx)->second;
Argument *AggArg = Group.OutlinedFunction->getArg(AggArgIdx);
@@ -2700,7 +2694,7 @@ void IROutliner::updateOutputMapping(OutlinableRegion &Region,
if (!OutputIdx)
return;
- if (OutputMappings.find(Outputs[*OutputIdx]) == OutputMappings.end()) {
+ if (!OutputMappings.contains(Outputs[*OutputIdx])) {
LLVM_DEBUG(dbgs() << "Mapping extracted output " << *LI << " to "
<< *Outputs[*OutputIdx] << "\n");
OutputMappings.insert(std::make_pair(LI, Outputs[*OutputIdx]));
@@ -3024,46 +3018,6 @@ bool IROutliner::run(Module &M) {
return doOutline(M) > 0;
}
-// Pass Manager Boilerplate
-namespace {
-class IROutlinerLegacyPass : public ModulePass {
-public:
- static char ID;
- IROutlinerLegacyPass() : ModulePass(ID) {
- initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<IRSimilarityIdentifierWrapperPass>();
- }
-
- bool runOnModule(Module &M) override;
-};
-} // namespace
-
-bool IROutlinerLegacyPass::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
-
- std::unique_ptr<OptimizationRemarkEmitter> ORE;
- auto GORE = [&ORE](Function &F) -> OptimizationRemarkEmitter & {
- ORE.reset(new OptimizationRemarkEmitter(&F));
- return *ORE;
- };
-
- auto GTTI = [this](Function &F) -> TargetTransformInfo & {
- return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- };
-
- auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
- return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
- };
-
- return IROutliner(GTTI, GIRSI, GORE).run(M);
-}
-
PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@@ -3088,14 +3042,3 @@ PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
-
-char IROutlinerLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
- false)
-
-ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }
diff --git a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
index 76f8f1a7a482..18d5911d10f1 100644
--- a/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -10,7 +10,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -52,38 +51,3 @@ PreservedAnalyses InferFunctionAttrsPass::run(Module &M,
// out all the passes.
return PreservedAnalyses::none();
}
-
-namespace {
-struct InferFunctionAttrsLegacyPass : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
- InferFunctionAttrsLegacyPass() : ModulePass(ID) {
- initializeInferFunctionAttrsLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
-
- auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
- return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- };
- return inferAllPrototypeAttributes(M, GetTLI);
- }
-};
-}
-
-char InferFunctionAttrsLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(InferFunctionAttrsLegacyPass, "inferattrs",
- "Infer set function attributes", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(InferFunctionAttrsLegacyPass, "inferattrs",
- "Infer set function attributes", false, false)
-
-Pass *llvm::createInferFunctionAttrsLegacyPass() {
- return new InferFunctionAttrsLegacyPass();
-}
diff --git a/llvm/lib/Transforms/IPO/InlineSimple.cpp b/llvm/lib/Transforms/IPO/InlineSimple.cpp
deleted file mode 100644
index eba0d6636d6c..000000000000
--- a/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-//===- InlineSimple.cpp - Code to perform simple function inlining --------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements bottom-up inlining of functions into callees.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/Inliner.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "inline"
-
-namespace {
-
-/// Actual inliner pass implementation.
-///
-/// The common implementation of the inlining logic is shared between this
-/// inliner pass and the always inliner pass. The two passes use different cost
-/// analyses to determine when to inline.
-class SimpleInliner : public LegacyInlinerBase {
-
- InlineParams Params;
-
-public:
- SimpleInliner() : LegacyInlinerBase(ID), Params(llvm::getInlineParams()) {
- initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
- }
-
- explicit SimpleInliner(InlineParams Params)
- : LegacyInlinerBase(ID), Params(std::move(Params)) {
- initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
- }
-
- static char ID; // Pass identification, replacement for typeid
-
- InlineCost getInlineCost(CallBase &CB) override {
- Function *Callee = CB.getCalledFunction();
- TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
-
- bool RemarksEnabled = false;
- const auto &BBs = *CB.getCaller();
- if (!BBs.empty()) {
- auto DI = OptimizationRemark(DEBUG_TYPE, "", DebugLoc(), &BBs.front());
- if (DI.isEnabled())
- RemarksEnabled = true;
- }
- OptimizationRemarkEmitter ORE(CB.getCaller());
-
- std::function<AssumptionCache &(Function &)> GetAssumptionCache =
- [&](Function &F) -> AssumptionCache & {
- return ACT->getAssumptionCache(F);
- };
- return llvm::getInlineCost(CB, Params, TTI, GetAssumptionCache, GetTLI,
- /*GetBFI=*/nullptr, PSI,
- RemarksEnabled ? &ORE : nullptr);
- }
-
- bool runOnSCC(CallGraphSCC &SCC) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override;
-
-private:
- TargetTransformInfoWrapperPass *TTIWP;
-
-};
-
-} // end anonymous namespace
-
-char SimpleInliner::ID = 0;
-INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", "Function Integration/Inlining",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(SimpleInliner, "inline", "Function Integration/Inlining",
- false, false)
-
-Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
-
-Pass *llvm::createFunctionInliningPass(int Threshold) {
- return new SimpleInliner(llvm::getInlineParams(Threshold));
-}
-
-Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
- unsigned SizeOptLevel,
- bool DisableInlineHotCallSite) {
- auto Param = llvm::getInlineParams(OptLevel, SizeOptLevel);
- if (DisableInlineHotCallSite)
- Param.HotCallSiteThreshold = 0;
- return new SimpleInliner(Param);
-}
-
-Pass *llvm::createFunctionInliningPass(InlineParams &Params) {
- return new SimpleInliner(Params);
-}
-
-bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
- TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
- return LegacyInlinerBase::runOnSCC(SCC);
-}
-
-void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<TargetTransformInfoWrapperPass>();
- LegacyInlinerBase::getAnalysisUsage(AU);
-}
diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp
index 5bcfc38c585b..3e00aebce372 100644
--- a/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -27,7 +27,6 @@
#include "llvm/Analysis/BasicAliasAnalysis.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CGSCCPassManager.h"
-#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LazyCallGraph.h"
@@ -71,20 +70,7 @@ using namespace llvm;
#define DEBUG_TYPE "inline"
STATISTIC(NumInlined, "Number of functions inlined");
-STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
-STATISTIC(NumMergedAllocas, "Number of allocas merged together");
-
-/// Flag to disable manual alloca merging.
-///
-/// Merging of allocas was originally done as a stack-size saving technique
-/// prior to LLVM's code generator having support for stack coloring based on
-/// lifetime markers. It is now in the process of being removed. To experiment
-/// with disabling it and relying fully on lifetime marker based stack
-/// coloring, you can pass this flag to LLVM.
-static cl::opt<bool>
- DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
- cl::init(false), cl::Hidden);
static cl::opt<int> IntraSCCCostMultiplier(
"intra-scc-cost-multiplier", cl::init(2), cl::Hidden,
@@ -108,9 +94,6 @@ static cl::opt<bool>
EnablePostSCCAdvisorPrinting("enable-scc-inline-advisor-printing",
cl::init(false), cl::Hidden);
-namespace llvm {
-extern cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats;
-}
static cl::opt<std::string> CGSCCInlineReplayFile(
"cgscc-inline-replay", cl::init(""), cl::value_desc("filename"),
@@ -163,174 +146,6 @@ static cl::opt<CallSiteFormat::Format> CGSCCInlineReplayFormat(
"<Line Number>:<Column Number>.<Discriminator> (default)")),
cl::desc("How cgscc inline replay file is formatted"), cl::Hidden);
-LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {}
-
-LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
- : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
-
-/// For this class, we declare that we require and preserve the call graph.
-/// If the derived class implements this method, it should
-/// always explicitly call the implementation here.
-void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- getAAResultsAnalysisUsage(AU);
- CallGraphSCCPass::getAnalysisUsage(AU);
-}
-
-using InlinedArrayAllocasTy = DenseMap<ArrayType *, std::vector<AllocaInst *>>;
-
-/// Look at all of the allocas that we inlined through this call site. If we
-/// have already inlined other allocas through other calls into this function,
-/// then we know that they have disjoint lifetimes and that we can merge them.
-///
-/// There are many heuristics possible for merging these allocas, and the
-/// different options have different tradeoffs. One thing that we *really*
-/// don't want to hurt is SRoA: once inlining happens, often allocas are no
-/// longer address taken and so they can be promoted.
-///
-/// Our "solution" for that is to only merge allocas whose outermost type is an
-/// array type. These are usually not promoted because someone is using a
-/// variable index into them. These are also often the most important ones to
-/// merge.
-///
-/// A better solution would be to have real memory lifetime markers in the IR
-/// and not have the inliner do any merging of allocas at all. This would
-/// allow the backend to do proper stack slot coloring of all allocas that
-/// *actually make it to the backend*, which is really what we want.
-///
-/// Because we don't have this information, we do this simple and useful hack.
-static void mergeInlinedArrayAllocas(Function *Caller, InlineFunctionInfo &IFI,
- InlinedArrayAllocasTy &InlinedArrayAllocas,
- int InlineHistory) {
- SmallPtrSet<AllocaInst *, 16> UsedAllocas;
-
- // When processing our SCC, check to see if the call site was inlined from
- // some other call site. For example, if we're processing "A" in this code:
- // A() { B() }
- // B() { x = alloca ... C() }
- // C() { y = alloca ... }
- // Assume that C was not inlined into B initially, and so we're processing A
- // and decide to inline B into A. Doing this makes an alloca available for
- // reuse and makes a callsite (C) available for inlining. When we process
- // the C call site we don't want to do any alloca merging between X and Y
- // because their scopes are not disjoint. We could make this smarter by
- // keeping track of the inline history for each alloca in the
- // InlinedArrayAllocas but this isn't likely to be a significant win.
- if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
- return;
-
- // Loop over all the allocas we have so far and see if they can be merged with
- // a previously inlined alloca. If not, remember that we had it.
- for (unsigned AllocaNo = 0, E = IFI.StaticAllocas.size(); AllocaNo != E;
- ++AllocaNo) {
- AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
-
- // Don't bother trying to merge array allocations (they will usually be
- // canonicalized to be an allocation *of* an array), or allocations whose
- // type is not itself an array (because we're afraid of pessimizing SRoA).
- ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
- if (!ATy || AI->isArrayAllocation())
- continue;
-
- // Get the list of all available allocas for this array type.
- std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy];
-
- // Loop over the allocas in AllocasForType to see if we can reuse one. Note
- // that we have to be careful not to reuse the same "available" alloca for
- // multiple different allocas that we just inlined, we use the 'UsedAllocas'
- // set to keep track of which "available" allocas are being used by this
- // function. Also, AllocasForType can be empty of course!
- bool MergedAwayAlloca = false;
- for (AllocaInst *AvailableAlloca : AllocasForType) {
- Align Align1 = AI->getAlign();
- Align Align2 = AvailableAlloca->getAlign();
-
- // The available alloca has to be in the right function, not in some other
- // function in this SCC.
- if (AvailableAlloca->getParent() != AI->getParent())
- continue;
-
- // If the inlined function already uses this alloca then we can't reuse
- // it.
- if (!UsedAllocas.insert(AvailableAlloca).second)
- continue;
-
- // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
- // success!
- LLVM_DEBUG(dbgs() << " ***MERGED ALLOCA: " << *AI
- << "\n\t\tINTO: " << *AvailableAlloca << '\n');
-
- // Move affected dbg.declare calls immediately after the new alloca to
- // avoid the situation when a dbg.declare precedes its alloca.
- if (auto *L = LocalAsMetadata::getIfExists(AI))
- if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
- for (User *U : MDV->users())
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
- DDI->moveBefore(AvailableAlloca->getNextNode());
-
- AI->replaceAllUsesWith(AvailableAlloca);
-
- if (Align1 > Align2)
- AvailableAlloca->setAlignment(AI->getAlign());
-
- AI->eraseFromParent();
- MergedAwayAlloca = true;
- ++NumMergedAllocas;
- IFI.StaticAllocas[AllocaNo] = nullptr;
- break;
- }
-
- // If we already nuked the alloca, we're done with it.
- if (MergedAwayAlloca)
- continue;
-
- // If we were unable to merge away the alloca either because there are no
- // allocas of the right type available or because we reused them all
- // already, remember that this alloca came from an inlined function and mark
- // it used so we don't reuse it for other allocas from this inline
- // operation.
- AllocasForType.push_back(AI);
- UsedAllocas.insert(AI);
- }
-}
-
-/// If it is possible to inline the specified call site,
-/// do so and update the CallGraph for this operation.
-///
-/// This function also does some basic book-keeping to update the IR. The
-/// InlinedArrayAllocas map keeps track of any allocas that are already
-/// available from other functions inlined into the caller. If we are able to
-/// inline this call site we attempt to reuse already available allocas or add
-/// any new allocas to the set if not possible.
-static InlineResult inlineCallIfPossible(
- CallBase &CB, InlineFunctionInfo &IFI,
- InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
- bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
- ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
- Function *Callee = CB.getCalledFunction();
- Function *Caller = CB.getCaller();
-
- AAResults &AAR = AARGetter(*Callee);
-
- // Try to inline the function. Get the list of static allocas that were
- // inlined.
- InlineResult IR =
- InlineFunction(CB, IFI,
- /*MergeAttributes=*/true, &AAR, InsertLifetime);
- if (!IR.isSuccess())
- return IR;
-
- if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
- ImportedFunctionsStats.recordInline(*Caller, *Callee);
-
- if (!DisableInlinedAllocaMerging)
- mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);
-
- return IR; // success
-}
-
/// Return true if the specified inline history ID
/// indicates an inline history that includes the specified function.
static bool inlineHistoryIncludes(
@@ -346,361 +161,6 @@ static bool inlineHistoryIncludes(
return false;
}
-bool LegacyInlinerBase::doInitialization(CallGraph &CG) {
- if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
- ImportedFunctionsStats.setModuleInfo(CG.getModule());
- return false; // No changes to CallGraph.
-}
-
-bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) {
- if (skipSCC(SCC))
- return false;
- return inlineCalls(SCC);
-}
-
-static bool
-inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
- std::function<AssumptionCache &(Function &)> GetAssumptionCache,
- ProfileSummaryInfo *PSI,
- std::function<const TargetLibraryInfo &(Function &)> GetTLI,
- bool InsertLifetime,
- function_ref<InlineCost(CallBase &CB)> GetInlineCost,
- function_ref<AAResults &(Function &)> AARGetter,
- ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
- SmallPtrSet<Function *, 8> SCCFunctions;
- LLVM_DEBUG(dbgs() << "Inliner visiting SCC:");
- for (CallGraphNode *Node : SCC) {
- Function *F = Node->getFunction();
- if (F)
- SCCFunctions.insert(F);
- LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
- }
-
- // Scan through and identify all call sites ahead of time so that we only
- // inline call sites in the original functions, not call sites that result
- // from inlining other functions.
- SmallVector<std::pair<CallBase *, int>, 16> CallSites;
-
- // When inlining a callee produces new call sites, we want to keep track of
- // the fact that they were inlined from the callee. This allows us to avoid
- // infinite inlining in some obscure cases. To represent this, we use an
- // index into the InlineHistory vector.
- SmallVector<std::pair<Function *, int>, 8> InlineHistory;
-
- for (CallGraphNode *Node : SCC) {
- Function *F = Node->getFunction();
- if (!F || F->isDeclaration())
- continue;
-
- OptimizationRemarkEmitter ORE(F);
- for (BasicBlock &BB : *F)
- for (Instruction &I : BB) {
- auto *CB = dyn_cast<CallBase>(&I);
- // If this isn't a call, or it is a call to an intrinsic, it can
- // never be inlined.
- if (!CB || isa<IntrinsicInst>(I))
- continue;
-
- // If this is a direct call to an external function, we can never inline
- // it. If it is an indirect call, inlining may resolve it to be a
- // direct call, so we keep it.
- if (Function *Callee = CB->getCalledFunction())
- if (Callee->isDeclaration()) {
- using namespace ore;
-
- setInlineRemark(*CB, "unavailable definition");
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
- << NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", CB->getCaller())
- << " because its definition is unavailable"
- << setIsVerbose();
- });
- continue;
- }
-
- CallSites.push_back(std::make_pair(CB, -1));
- }
- }
-
- LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n");
-
- // If there are no calls in this function, exit early.
- if (CallSites.empty())
- return false;
-
- // Now that we have all of the call sites, move the ones to functions in the
- // current SCC to the end of the list.
- unsigned FirstCallInSCC = CallSites.size();
- for (unsigned I = 0; I < FirstCallInSCC; ++I)
- if (Function *F = CallSites[I].first->getCalledFunction())
- if (SCCFunctions.count(F))
- std::swap(CallSites[I--], CallSites[--FirstCallInSCC]);
-
- InlinedArrayAllocasTy InlinedArrayAllocas;
- InlineFunctionInfo InlineInfo(&CG, GetAssumptionCache, PSI);
-
- // Now that we have all of the call sites, loop over them and inline them if
- // it looks profitable to do so.
- bool Changed = false;
- bool LocalChange;
- do {
- LocalChange = false;
- // Iterate over the outer loop because inlining functions can cause indirect
- // calls to become direct calls.
- // CallSites may be modified inside so ranged for loop can not be used.
- for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
- auto &P = CallSites[CSi];
- CallBase &CB = *P.first;
- const int InlineHistoryID = P.second;
-
- Function *Caller = CB.getCaller();
- Function *Callee = CB.getCalledFunction();
-
- // We can only inline direct calls to non-declarations.
- if (!Callee || Callee->isDeclaration())
- continue;
-
- bool IsTriviallyDead = isInstructionTriviallyDead(&CB, &GetTLI(*Caller));
-
- if (!IsTriviallyDead) {
- // If this call site was obtained by inlining another function, verify
- // that the include path for the function did not include the callee
- // itself. If so, we'd be recursively inlining the same function,
- // which would provide the same callsites, which would cause us to
- // infinitely inline.
- if (InlineHistoryID != -1 &&
- inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) {
- setInlineRemark(CB, "recursive");
- continue;
- }
- }
-
- // FIXME for new PM: because of the old PM we currently generate ORE and
- // in turn BFI on demand. With the new PM, the ORE dependency should
- // just become a regular analysis dependency.
- OptimizationRemarkEmitter ORE(Caller);
-
- auto OIC = shouldInline(CB, GetInlineCost, ORE);
- // If the policy determines that we should inline this function,
- // delete the call instead.
- if (!OIC)
- continue;
-
- // If this call site is dead and it is to a readonly function, we should
- // just delete the call instead of trying to inline it, regardless of
- // size. This happens because IPSCCP propagates the result out of the
- // call and then we're left with the dead call.
- if (IsTriviallyDead) {
- LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << CB << "\n");
- // Update the call graph by deleting the edge from Callee to Caller.
- setInlineRemark(CB, "trivially dead");
- CG[Caller]->removeCallEdgeFor(CB);
- CB.eraseFromParent();
- ++NumCallsDeleted;
- } else {
- // Get DebugLoc to report. CB will be invalid after Inliner.
- DebugLoc DLoc = CB.getDebugLoc();
- BasicBlock *Block = CB.getParent();
-
- // Attempt to inline the function.
- using namespace ore;
-
- InlineResult IR = inlineCallIfPossible(
- CB, InlineInfo, InlinedArrayAllocas, InlineHistoryID,
- InsertLifetime, AARGetter, ImportedFunctionsStats);
- if (!IR.isSuccess()) {
- setInlineRemark(CB, std::string(IR.getFailureReason()) + "; " +
- inlineCostStr(*OIC));
- ORE.emit([&]() {
- return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc,
- Block)
- << NV("Callee", Callee) << " will not be inlined into "
- << NV("Caller", Caller) << ": "
- << NV("Reason", IR.getFailureReason());
- });
- continue;
- }
- ++NumInlined;
-
- emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC);
-
- // If inlining this function gave us any new call sites, throw them
- // onto our worklist to process. They are useful inline candidates.
- if (!InlineInfo.InlinedCalls.empty()) {
- // Create a new inline history entry for this, so that we remember
- // that these new callsites came about due to inlining Callee.
- int NewHistoryID = InlineHistory.size();
- InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID));
-
-#ifndef NDEBUG
- // Make sure no dupplicates in the inline candidates. This could
- // happen when a callsite is simpilfied to reusing the return value
- // of another callsite during function cloning, thus the other
- // callsite will be reconsidered here.
- DenseSet<CallBase *> DbgCallSites;
- for (auto &II : CallSites)
- DbgCallSites.insert(II.first);
-#endif
-
- for (Value *Ptr : InlineInfo.InlinedCalls) {
-#ifndef NDEBUG
- assert(DbgCallSites.count(dyn_cast<CallBase>(Ptr)) == 0);
-#endif
- CallSites.push_back(
- std::make_pair(dyn_cast<CallBase>(Ptr), NewHistoryID));
- }
- }
- }
-
- // If we inlined or deleted the last possible call site to the function,
- // delete the function body now.
- if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
- // TODO: Can remove if in SCC now.
- !SCCFunctions.count(Callee) &&
- // The function may be apparently dead, but if there are indirect
- // callgraph references to the node, we cannot delete it yet, this
- // could invalidate the CGSCC iterator.
- CG[Callee]->getNumReferences() == 0) {
- LLVM_DEBUG(dbgs() << " -> Deleting dead function: "
- << Callee->getName() << "\n");
- CallGraphNode *CalleeNode = CG[Callee];
-
- // Remove any call graph edges from the callee to its callees.
- CalleeNode->removeAllCalledFunctions();
-
- // Removing the node for callee from the call graph and delete it.
- delete CG.removeFunctionFromModule(CalleeNode);
- ++NumDeleted;
- }
-
- // Remove this call site from the list. If possible, use
- // swap/pop_back for efficiency, but do not use it if doing so would
- // move a call site to a function in this SCC before the
- // 'FirstCallInSCC' barrier.
- if (SCC.isSingular()) {
- CallSites[CSi] = CallSites.back();
- CallSites.pop_back();
- } else {
- CallSites.erase(CallSites.begin() + CSi);
- }
- --CSi;
-
- Changed = true;
- LocalChange = true;
- }
- } while (LocalChange);
-
- return Changed;
-}
-
-bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
- CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
- ACT = &getAnalysis<AssumptionCacheTracker>();
- PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- GetTLI = [&](Function &F) -> const TargetLibraryInfo & {
- return getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- };
- auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
- return ACT->getAssumptionCache(F);
- };
- return inlineCallsImpl(
- SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime,
- [&](CallBase &CB) { return getInlineCost(CB); }, LegacyAARGetter(*this),
- ImportedFunctionsStats);
-}
-
-/// Remove now-dead linkonce functions at the end of
-/// processing to avoid breaking the SCC traversal.
-bool LegacyInlinerBase::doFinalization(CallGraph &CG) {
- if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
- ImportedFunctionsStats.dump(InlinerFunctionImportStats ==
- InlinerFunctionImportStatsOpts::Verbose);
- return removeDeadFunctions(CG);
-}
-
-/// Remove dead functions that are not included in DNR (Do Not Remove) list.
-bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
- bool AlwaysInlineOnly) {
- SmallVector<CallGraphNode *, 16> FunctionsToRemove;
- SmallVector<Function *, 16> DeadFunctionsInComdats;
-
- auto RemoveCGN = [&](CallGraphNode *CGN) {
- // Remove any call graph edges from the function to its callees.
- CGN->removeAllCalledFunctions();
-
- // Remove any edges from the external node to the function's call graph
- // node. These edges might have been made irrelegant due to
- // optimization of the program.
- CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN);
-
- // Removing the node for callee from the call graph and delete it.
- FunctionsToRemove.push_back(CGN);
- };
-
- // Scan for all of the functions, looking for ones that should now be removed
- // from the program. Insert the dead ones in the FunctionsToRemove set.
- for (const auto &I : CG) {
- CallGraphNode *CGN = I.second.get();
- Function *F = CGN->getFunction();
- if (!F || F->isDeclaration())
- continue;
-
- // Handle the case when this function is called and we only want to care
- // about always-inline functions. This is a bit of a hack to share code
- // between here and the InlineAlways pass.
- if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline))
- continue;
-
- // If the only remaining users of the function are dead constants, remove
- // them.
- F->removeDeadConstantUsers();
-
- if (!F->isDefTriviallyDead())
- continue;
-
- // It is unsafe to drop a function with discardable linkage from a COMDAT
- // without also dropping the other members of the COMDAT.
- // The inliner doesn't visit non-function entities which are in COMDAT
- // groups so it is unsafe to do so *unless* the linkage is local.
- if (!F->hasLocalLinkage()) {
- if (F->hasComdat()) {
- DeadFunctionsInComdats.push_back(F);
- continue;
- }
- }
-
- RemoveCGN(CGN);
- }
- if (!DeadFunctionsInComdats.empty()) {
- // Filter out the functions whose comdats remain alive.
- filterDeadComdatFunctions(DeadFunctionsInComdats);
- // Remove the rest.
- for (Function *F : DeadFunctionsInComdats)
- RemoveCGN(CG[F]);
- }
-
- if (FunctionsToRemove.empty())
- return false;
-
- // Now that we know which functions to delete, do so. We didn't want to do
- // this inline, because that would invalidate our CallGraph::iterator
- // objects. :(
- //
- // Note that it doesn't matter that we are iterating over a non-stable order
- // here to do this, it doesn't matter which order the functions are deleted
- // in.
- array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
- FunctionsToRemove.erase(
- std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()),
- FunctionsToRemove.end());
- for (CallGraphNode *CGN : FunctionsToRemove) {
- delete CG.removeFunctionFromModule(CGN);
- ++NumDeleted;
- }
- return true;
-}
-
InlineAdvisor &
InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
FunctionAnalysisManager &FAM, Module &M) {
@@ -729,8 +189,7 @@ InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM,
CGSCCInlineReplayFallback,
{CGSCCInlineReplayFormat}},
/*EmitRemarks=*/true,
- InlineContext{LTOPhase,
- InlinePass::ReplayCGSCCInliner});
+ InlineContext{LTOPhase, InlinePass::ReplayCGSCCInliner});
return *OwnedAdvisor;
}
@@ -871,9 +330,12 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
if (InlineHistoryID != -1 &&
inlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory)) {
- LLVM_DEBUG(dbgs() << "Skipping inlining due to history: "
- << F.getName() << " -> " << Callee.getName() << "\n");
+ LLVM_DEBUG(dbgs() << "Skipping inlining due to history: " << F.getName()
+ << " -> " << Callee.getName() << "\n");
setInlineRemark(*CB, "recursive");
+ // Set noinline so that we don't forget this decision across CGSCC
+ // iterations.
+ CB->setIsNoInline();
continue;
}
@@ -911,7 +373,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
InlineFunctionInfo IFI(
- /*cg=*/nullptr, GetAssumptionCache, PSI,
+ GetAssumptionCache, PSI,
&FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
&FAM.getResult<BlockFrequencyAnalysis>(Callee));
@@ -1193,13 +655,13 @@ void ModuleInlinerWrapperPass::printPipeline(
// on Params and Mode).
if (!MPM.isEmpty()) {
MPM.printPipeline(OS, MapClassName2PassName);
- OS << ",";
+ OS << ',';
}
OS << "cgscc(";
if (MaxDevirtIterations != 0)
OS << "devirt<" << MaxDevirtIterations << ">(";
PM.printPipeline(OS, MapClassName2PassName);
if (MaxDevirtIterations != 0)
- OS << ")";
- OS << ")";
+ OS << ')';
+ OS << ')';
}
diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp
index 85b1a8303d33..0b8fde6489f8 100644
--- a/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -19,19 +19,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
@@ -183,9 +182,8 @@ void InternalizePass::checkComdat(
Info.External = true;
}
-bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
+bool InternalizePass::internalizeModule(Module &M) {
bool Changed = false;
- CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr;
SmallVector<GlobalValue *, 4> Used;
collectUsedGlobalVariables(M, Used, false);
@@ -242,10 +240,6 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
continue;
Changed = true;
- if (ExternalNode)
- // Remove a callgraph edge from the external node to this function.
- ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]);
-
++NumFunctions;
LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n");
}
@@ -277,55 +271,8 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) {
- if (!internalizeModule(M, AM.getCachedResult<CallGraphAnalysis>(M)))
+ if (!internalizeModule(M))
return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserve<CallGraphAnalysis>();
- return PA;
-}
-
-namespace {
-class InternalizeLegacyPass : public ModulePass {
- // Client supplied callback to control wheter a symbol must be preserved.
- std::function<bool(const GlobalValue &)> MustPreserveGV;
-
-public:
- static char ID; // Pass identification, replacement for typeid
-
- InternalizeLegacyPass() : ModulePass(ID), MustPreserveGV(PreserveAPIList()) {}
-
- InternalizeLegacyPass(std::function<bool(const GlobalValue &)> MustPreserveGV)
- : ModulePass(ID), MustPreserveGV(std::move(MustPreserveGV)) {
- initializeInternalizeLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
-
- CallGraphWrapperPass *CGPass =
- getAnalysisIfAvailable<CallGraphWrapperPass>();
- CallGraph *CG = CGPass ? &CGPass->getCallGraph() : nullptr;
- return internalizeModule(M, MustPreserveGV, CG);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addPreserved<CallGraphWrapperPass>();
- }
-};
-}
-
-char InternalizeLegacyPass::ID = 0;
-INITIALIZE_PASS(InternalizeLegacyPass, "internalize",
- "Internalize Global Symbols", false, false)
-
-ModulePass *llvm::createInternalizePass() {
- return new InternalizeLegacyPass();
-}
-
-ModulePass *llvm::createInternalizePass(
- std::function<bool(const GlobalValue &)> MustPreserveGV) {
- return new InternalizeLegacyPass(std::move(MustPreserveGV));
+ return PreservedAnalyses::none();
}
diff --git a/llvm/lib/Transforms/IPO/LoopExtractor.cpp b/llvm/lib/Transforms/IPO/LoopExtractor.cpp
index ad1927c09803..9a5876f85ba7 100644
--- a/llvm/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/llvm/lib/Transforms/IPO/LoopExtractor.cpp
@@ -283,8 +283,8 @@ void LoopExtractorPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<LoopExtractorPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
if (NumLoops == 1)
OS << "single";
- OS << ">";
+ OS << '>';
}
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index ddfcace6acf8..9b4b3efd7283 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -24,7 +24,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Attributes.h"
@@ -51,12 +51,11 @@
#include "llvm/IR/ModuleSummaryIndexYAML.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ReplaceConstant.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -69,6 +68,7 @@
#include "llvm/Support/TrailingObjects.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -172,7 +172,7 @@ BitSetInfo BitSetBuilder::build() {
BSI.AlignLog2 = 0;
if (Mask != 0)
- BSI.AlignLog2 = countTrailingZeros(Mask);
+ BSI.AlignLog2 = llvm::countr_zero(Mask);
// Build the compressed bitset while normalizing the offsets against the
// computed alignment.
@@ -242,7 +242,7 @@ bool lowertypetests::isJumpTableCanonical(Function *F) {
return false;
auto *CI = mdconst::extract_or_null<ConstantInt>(
F->getParent()->getModuleFlag("CFI Canonical Jump Tables"));
- if (!CI || CI->getZExtValue() != 0)
+ if (!CI || !CI->isZero())
return true;
return F->hasFnAttribute("cfi-canonical-jump-table");
}
@@ -406,6 +406,15 @@ class LowerTypeTestsModule {
Triple::OSType OS;
Triple::ObjectFormatType ObjectFormat;
+ // Determines which kind of Thumb jump table we generate. If arch is
+ // either 'arm' or 'thumb' we need to find this out, because
+ // selectJumpTableArmEncoding may decide to use Thumb in either case.
+ bool CanUseArmJumpTable = false, CanUseThumbBWJumpTable = false;
+
+ // The jump table type we ended up deciding on. (Usually the same as
+ // Arch, except that 'arm' and 'thumb' are often interchangeable.)
+ Triple::ArchType JumpTableArch = Triple::UnknownArch;
+
IntegerType *Int1Ty = Type::getInt1Ty(M.getContext());
IntegerType *Int8Ty = Type::getInt8Ty(M.getContext());
PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
@@ -481,6 +490,8 @@ class LowerTypeTestsModule {
void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds,
ArrayRef<GlobalTypeMember *> Globals);
+ Triple::ArchType
+ selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions);
unsigned getJumpTableEntrySize();
Type *getJumpTableEntryType();
void createJumpTableEntry(raw_ostream &AsmOS, raw_ostream &ConstraintOS,
@@ -518,7 +529,8 @@ class LowerTypeTestsModule {
void replaceDirectCalls(Value *Old, Value *New);
public:
- LowerTypeTestsModule(Module &M, ModuleSummaryIndex *ExportSummary,
+ LowerTypeTestsModule(Module &M, ModuleAnalysisManager &AM,
+ ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary,
bool DropTypeTests);
@@ -526,7 +538,7 @@ public:
// Lower the module using the action and summary passed as command line
// arguments. For testing purposes only.
- static bool runForTesting(Module &M);
+ static bool runForTesting(Module &M, ModuleAnalysisManager &AM);
};
} // end anonymous namespace
@@ -686,7 +698,7 @@ static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL,
}
if (auto GEP = dyn_cast<GEPOperator>(V)) {
- APInt APOffset(DL.getPointerSizeInBits(0), 0);
+ APInt APOffset(DL.getIndexSizeInBits(0), 0);
bool Result = GEP->accumulateConstantOffset(DL, APOffset);
if (!Result)
return false;
@@ -1182,31 +1194,36 @@ static const unsigned kX86JumpTableEntrySize = 8;
static const unsigned kX86IBTJumpTableEntrySize = 16;
static const unsigned kARMJumpTableEntrySize = 4;
static const unsigned kARMBTIJumpTableEntrySize = 8;
+static const unsigned kARMv6MJumpTableEntrySize = 16;
static const unsigned kRISCVJumpTableEntrySize = 8;
unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
- switch (Arch) {
- case Triple::x86:
- case Triple::x86_64:
- if (const auto *MD = mdconst::extract_or_null<ConstantInt>(
+ switch (JumpTableArch) {
+ case Triple::x86:
+ case Triple::x86_64:
+ if (const auto *MD = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("cf-protection-branch")))
- if (MD->getZExtValue())
- return kX86IBTJumpTableEntrySize;
- return kX86JumpTableEntrySize;
- case Triple::arm:
- case Triple::thumb:
+ if (MD->getZExtValue())
+ return kX86IBTJumpTableEntrySize;
+ return kX86JumpTableEntrySize;
+ case Triple::arm:
+ return kARMJumpTableEntrySize;
+ case Triple::thumb:
+ if (CanUseThumbBWJumpTable)
return kARMJumpTableEntrySize;
- case Triple::aarch64:
- if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
+ else
+ return kARMv6MJumpTableEntrySize;
+ case Triple::aarch64:
+ if (const auto *BTE = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("branch-target-enforcement")))
- if (BTE->getZExtValue())
- return kARMBTIJumpTableEntrySize;
- return kARMJumpTableEntrySize;
- case Triple::riscv32:
- case Triple::riscv64:
- return kRISCVJumpTableEntrySize;
- default:
- report_fatal_error("Unsupported architecture for jump tables");
+ if (BTE->getZExtValue())
+ return kARMBTIJumpTableEntrySize;
+ return kARMJumpTableEntrySize;
+ case Triple::riscv32:
+ case Triple::riscv64:
+ return kRISCVJumpTableEntrySize;
+ default:
+ report_fatal_error("Unsupported architecture for jump tables");
}
}
@@ -1223,7 +1240,7 @@ void LowerTypeTestsModule::createJumpTableEntry(
bool Endbr = false;
if (const auto *MD = mdconst::extract_or_null<ConstantInt>(
Dest->getParent()->getModuleFlag("cf-protection-branch")))
- Endbr = MD->getZExtValue() != 0;
+ Endbr = !MD->isZero();
if (Endbr)
AsmOS << (JumpTableArch == Triple::x86 ? "endbr32\n" : "endbr64\n");
AsmOS << "jmp ${" << ArgIndex << ":c}@plt\n";
@@ -1240,7 +1257,32 @@ void LowerTypeTestsModule::createJumpTableEntry(
AsmOS << "bti c\n";
AsmOS << "b $" << ArgIndex << "\n";
} else if (JumpTableArch == Triple::thumb) {
- AsmOS << "b.w $" << ArgIndex << "\n";
+ if (!CanUseThumbBWJumpTable) {
+ // In Armv6-M, this sequence will generate a branch without corrupting
+ // any registers. We use two stack words; in the second, we construct the
+ // address we'll pop into pc, and the first is used to save and restore
+ // r0 which we use as a temporary register.
+ //
+ // To support position-independent use cases, the offset of the target
+ // function is stored as a relative offset (which will expand into an
+ // R_ARM_REL32 relocation in ELF, and presumably the equivalent in other
+ // object file types), and added to pc after we load it. (The alternative
+ // B.W is automatically pc-relative.)
+ //
+ // There are five 16-bit Thumb instructions here, so the .balign 4 adds a
+ // sixth halfword of padding, and then the offset consumes a further 4
+ // bytes, for a total of 16, which is very convenient since entries in
+ // this jump table need to have power-of-two size.
+ AsmOS << "push {r0,r1}\n"
+ << "ldr r0, 1f\n"
+ << "0: add r0, r0, pc\n"
+ << "str r0, [sp, #4]\n"
+ << "pop {r0,pc}\n"
+ << ".balign 4\n"
+ << "1: .word $" << ArgIndex << " - (0b + 4)\n";
+ } else {
+ AsmOS << "b.w $" << ArgIndex << "\n";
+ }
} else if (JumpTableArch == Triple::riscv32 ||
JumpTableArch == Triple::riscv64) {
AsmOS << "tail $" << ArgIndex << "@plt\n";
@@ -1325,11 +1367,27 @@ void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
F->getAddressSpace(), "", &M);
replaceCfiUses(F, PlaceholderFn, IsJumpTableCanonical);
- Constant *Target = ConstantExpr::getSelect(
- ConstantExpr::getICmp(CmpInst::ICMP_NE, F,
- Constant::getNullValue(F->getType())),
- JT, Constant::getNullValue(F->getType()));
- PlaceholderFn->replaceAllUsesWith(Target);
+ convertUsersOfConstantsToInstructions(PlaceholderFn);
+ // Don't use range based loop, because use list will be modified.
+ while (!PlaceholderFn->use_empty()) {
+ Use &U = *PlaceholderFn->use_begin();
+ auto *InsertPt = dyn_cast<Instruction>(U.getUser());
+ assert(InsertPt && "Non-instruction users should have been eliminated");
+ auto *PN = dyn_cast<PHINode>(InsertPt);
+ if (PN)
+ InsertPt = PN->getIncomingBlock(U)->getTerminator();
+ IRBuilder Builder(InsertPt);
+ Value *ICmp = Builder.CreateICmp(CmpInst::ICMP_NE, F,
+ Constant::getNullValue(F->getType()));
+ Value *Select = Builder.CreateSelect(ICmp, JT,
+ Constant::getNullValue(F->getType()));
+ // For phi nodes, we need to update the incoming value for all operands
+ // with the same predecessor.
+ if (PN)
+ PN->setIncomingValueForBlock(InsertPt->getParent(), Select);
+ else
+ U.set(Select);
+ }
PlaceholderFn->eraseFromParent();
}
@@ -1352,12 +1410,19 @@ static bool isThumbFunction(Function *F, Triple::ArchType ModuleArch) {
// Each jump table must be either ARM or Thumb as a whole for the bit-test math
// to work. Pick one that matches the majority of members to minimize interop
// veneers inserted by the linker.
-static Triple::ArchType
-selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions,
- Triple::ArchType ModuleArch) {
- if (ModuleArch != Triple::arm && ModuleArch != Triple::thumb)
- return ModuleArch;
+Triple::ArchType LowerTypeTestsModule::selectJumpTableArmEncoding(
+ ArrayRef<GlobalTypeMember *> Functions) {
+ if (Arch != Triple::arm && Arch != Triple::thumb)
+ return Arch;
+
+ if (!CanUseThumbBWJumpTable && CanUseArmJumpTable) {
+ // In architectures that provide Arm and Thumb-1 but not Thumb-2,
+ // we should always prefer the Arm jump table format, because the
+ // Thumb-1 one is larger and slower.
+ return Triple::arm;
+ }
+ // Otherwise, go with majority vote.
unsigned ArmCount = 0, ThumbCount = 0;
for (const auto GTM : Functions) {
if (!GTM->isJumpTableCanonical()) {
@@ -1368,7 +1433,7 @@ selectJumpTableArmEncoding(ArrayRef<GlobalTypeMember *> Functions,
}
Function *F = cast<Function>(GTM->getGlobal());
- ++(isThumbFunction(F, ModuleArch) ? ThumbCount : ArmCount);
+ ++(isThumbFunction(F, Arch) ? ThumbCount : ArmCount);
}
return ArmCount > ThumbCount ? Triple::arm : Triple::thumb;
@@ -1381,8 +1446,6 @@ void LowerTypeTestsModule::createJumpTable(
SmallVector<Value *, 16> AsmArgs;
AsmArgs.reserve(Functions.size() * 2);
- Triple::ArchType JumpTableArch = selectJumpTableArmEncoding(Functions, Arch);
-
for (GlobalTypeMember *GTM : Functions)
createJumpTableEntry(AsmOS, ConstraintOS, JumpTableArch, AsmArgs,
cast<Function>(GTM->getGlobal()));
@@ -1399,9 +1462,11 @@ void LowerTypeTestsModule::createJumpTable(
F->addFnAttr("target-features", "-thumb-mode");
if (JumpTableArch == Triple::thumb) {
F->addFnAttr("target-features", "+thumb-mode");
- // Thumb jump table assembly needs Thumb2. The following attribute is added
- // by Clang for -march=armv7.
- F->addFnAttr("target-cpu", "cortex-a8");
+ if (CanUseThumbBWJumpTable) {
+ // Thumb jump table assembly needs Thumb2. The following attribute is
+ // added by Clang for -march=armv7.
+ F->addFnAttr("target-cpu", "cortex-a8");
+ }
}
// When -mbranch-protection= is used, the inline asm adds a BTI. Suppress BTI
// for the function to avoid double BTI. This is a no-op without
@@ -1521,6 +1586,10 @@ void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
// FIXME: find a better way to represent the jumptable in the IR.
assert(!Functions.empty());
+ // Decide on the jump table encoding, so that we know how big the
+ // entries will be.
+ JumpTableArch = selectJumpTableArmEncoding(Functions);
+
// Build a simple layout based on the regular layout of jump tables.
DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
unsigned EntrySize = getJumpTableEntrySize();
@@ -1706,18 +1775,31 @@ void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
/// Lower all type tests in this module.
LowerTypeTestsModule::LowerTypeTestsModule(
- Module &M, ModuleSummaryIndex *ExportSummary,
+ Module &M, ModuleAnalysisManager &AM, ModuleSummaryIndex *ExportSummary,
const ModuleSummaryIndex *ImportSummary, bool DropTypeTests)
: M(M), ExportSummary(ExportSummary), ImportSummary(ImportSummary),
DropTypeTests(DropTypeTests || ClDropTypeTests) {
assert(!(ExportSummary && ImportSummary));
Triple TargetTriple(M.getTargetTriple());
Arch = TargetTriple.getArch();
+ if (Arch == Triple::arm)
+ CanUseArmJumpTable = true;
+ if (Arch == Triple::arm || Arch == Triple::thumb) {
+ auto &FAM =
+ AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ for (Function &F : M) {
+ auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
+ if (TTI.hasArmWideBranch(false))
+ CanUseArmJumpTable = true;
+ if (TTI.hasArmWideBranch(true))
+ CanUseThumbBWJumpTable = true;
+ }
+ }
OS = TargetTriple.getOS();
ObjectFormat = TargetTriple.getObjectFormat();
}
-bool LowerTypeTestsModule::runForTesting(Module &M) {
+bool LowerTypeTestsModule::runForTesting(Module &M, ModuleAnalysisManager &AM) {
ModuleSummaryIndex Summary(/*HaveGVs=*/false);
// Handle the command-line summary arguments. This code is for testing
@@ -1735,7 +1817,8 @@ bool LowerTypeTestsModule::runForTesting(Module &M) {
bool Changed =
LowerTypeTestsModule(
- M, ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
+ M, AM,
+ ClSummaryAction == PassSummaryAction::Export ? &Summary : nullptr,
ClSummaryAction == PassSummaryAction::Import ? &Summary : nullptr,
/*DropTypeTests*/ false)
.lower();
@@ -2186,9 +2269,9 @@ bool LowerTypeTestsModule::lower() {
unsigned MaxUniqueId = 0;
for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
MI != GlobalClasses.member_end(); ++MI) {
- if (auto *MD = MI->dyn_cast<Metadata *>())
+ if (auto *MD = dyn_cast_if_present<Metadata *>(*MI))
MaxUniqueId = std::max(MaxUniqueId, TypeIdInfo[MD].UniqueId);
- else if (auto *BF = MI->dyn_cast<ICallBranchFunnel *>())
+ else if (auto *BF = dyn_cast_if_present<ICallBranchFunnel *>(*MI))
MaxUniqueId = std::max(MaxUniqueId, BF->UniqueId);
}
Sets.emplace_back(I, MaxUniqueId);
@@ -2204,12 +2287,12 @@ bool LowerTypeTestsModule::lower() {
for (GlobalClassesTy::member_iterator MI =
GlobalClasses.member_begin(S.first);
MI != GlobalClasses.member_end(); ++MI) {
- if (MI->is<Metadata *>())
- TypeIds.push_back(MI->get<Metadata *>());
- else if (MI->is<GlobalTypeMember *>())
- Globals.push_back(MI->get<GlobalTypeMember *>());
+ if (isa<Metadata *>(*MI))
+ TypeIds.push_back(cast<Metadata *>(*MI));
+ else if (isa<GlobalTypeMember *>(*MI))
+ Globals.push_back(cast<GlobalTypeMember *>(*MI));
else
- ICallBranchFunnels.push_back(MI->get<ICallBranchFunnel *>());
+ ICallBranchFunnels.push_back(cast<ICallBranchFunnel *>(*MI));
}
// Order type identifiers by unique ID for determinism. This ordering is
@@ -2298,10 +2381,10 @@ PreservedAnalyses LowerTypeTestsPass::run(Module &M,
ModuleAnalysisManager &AM) {
bool Changed;
if (UseCommandLine)
- Changed = LowerTypeTestsModule::runForTesting(M);
+ Changed = LowerTypeTestsModule::runForTesting(M, AM);
else
Changed =
- LowerTypeTestsModule(M, ExportSummary, ImportSummary, DropTypeTests)
+ LowerTypeTestsModule(M, AM, ExportSummary, ImportSummary, DropTypeTests)
.lower();
if (!Changed)
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
new file mode 100644
index 000000000000..f835fb26fcb8
--- /dev/null
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -0,0 +1,3277 @@
+//==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements support for context disambiguation of allocation
+// calls for profile guided heap optimization. Specifically, it uses Memprof
+// profiles which indicate context specific allocation behavior (currently
+// distinguishing cold vs hot memory allocations). Cloning is performed to
+// expose the cold allocation call contexts, and the allocation calls are
+// subsequently annotated with an attribute for later transformation.
+//
+// The transformations can be performed either directly on IR (regular LTO), or
+// on a ThinLTO index (and later applied to the IR during the ThinLTO backend).
+// Both types of LTO operate on a the same base graph representation, which
+// uses CRTP to support either IR or Index formats.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/MemoryProfileInfo.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <sstream>
+#include <vector>
+using namespace llvm;
+using namespace llvm::memprof;
+
+#define DEBUG_TYPE "memprof-context-disambiguation"
+
+STATISTIC(FunctionClonesAnalysis,
+ "Number of function clones created during whole program analysis");
+STATISTIC(FunctionClonesThinBackend,
+ "Number of function clones created during ThinLTO backend");
+STATISTIC(FunctionsClonedThinBackend,
+ "Number of functions that had clones created during ThinLTO backend");
+STATISTIC(AllocTypeNotCold, "Number of not cold static allocations (possibly "
+ "cloned) during whole program analysis");
+STATISTIC(AllocTypeCold, "Number of cold static allocations (possibly cloned) "
+ "during whole program analysis");
+STATISTIC(AllocTypeNotColdThinBackend,
+ "Number of not cold static allocations (possibly cloned) during "
+ "ThinLTO backend");
+STATISTIC(AllocTypeColdThinBackend, "Number of cold static allocations "
+ "(possibly cloned) during ThinLTO backend");
+STATISTIC(OrigAllocsThinBackend,
+ "Number of original (not cloned) allocations with memprof profiles "
+ "during ThinLTO backend");
+STATISTIC(
+ AllocVersionsThinBackend,
+ "Number of allocation versions (including clones) during ThinLTO backend");
+STATISTIC(MaxAllocVersionsThinBackend,
+ "Maximum number of allocation versions created for an original "
+ "allocation during ThinLTO backend");
+STATISTIC(UnclonableAllocsThinBackend,
+ "Number of unclonable ambigous allocations during ThinLTO backend");
+
+static cl::opt<std::string> DotFilePathPrefix(
+ "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
+ cl::value_desc("filename"),
+ cl::desc("Specify the path prefix of the MemProf dot files."));
+
+static cl::opt<bool> ExportToDot("memprof-export-to-dot", cl::init(false),
+ cl::Hidden,
+ cl::desc("Export graph to dot files."));
+
+static cl::opt<bool>
+ DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden,
+ cl::desc("Dump CallingContextGraph to stdout after each stage."));
+
+static cl::opt<bool>
+ VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden,
+ cl::desc("Perform verification checks on CallingContextGraph."));
+
+static cl::opt<bool>
+ VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden,
+ cl::desc("Perform frequent verification checks on nodes."));
+
+static cl::opt<std::string> MemProfImportSummary(
+ "memprof-import-summary",
+ cl::desc("Import summary to use for testing the ThinLTO backend via opt"),
+ cl::Hidden);
+
+// Indicate we are linking with an allocator that supports hot/cold operator
+// new interfaces.
+cl::opt<bool> SupportsHotColdNew(
+ "supports-hot-cold-new", cl::init(false), cl::Hidden,
+ cl::desc("Linking with hot/cold operator new interfaces"));
+
+namespace {
+/// CRTP base for graphs built from either IR or ThinLTO summary index.
+///
+/// The graph represents the call contexts in all memprof metadata on allocation
+/// calls, with nodes for the allocations themselves, as well as for the calls
+/// in each context. The graph is initially built from the allocation memprof
+/// metadata (or summary) MIBs. It is then updated to match calls with callsite
+/// metadata onto the nodes, updating it to reflect any inlining performed on
+/// those calls.
+///
+/// Each MIB (representing an allocation's call context with allocation
+/// behavior) is assigned a unique context id during the graph build. The edges
+/// and nodes in the graph are decorated with the context ids they carry. This
+/// is used to correctly update the graph when cloning is performed so that we
+/// can uniquify the context for a single (possibly cloned) allocation.
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+class CallsiteContextGraph {
+public:
+ CallsiteContextGraph() = default;
+ CallsiteContextGraph(const CallsiteContextGraph &) = default;
+ CallsiteContextGraph(CallsiteContextGraph &&) = default;
+
+ /// Main entry point to perform analysis and transformations on graph.
+ bool process();
+
+ /// Perform cloning on the graph necessary to uniquely identify the allocation
+ /// behavior of an allocation based on its context.
+ void identifyClones();
+
+ /// Assign callsite clones to functions, cloning functions as needed to
+ /// accommodate the combinations of their callsite clones reached by callers.
+ /// For regular LTO this clones functions and callsites in the IR, but for
+ /// ThinLTO the cloning decisions are noted in the summaries and later applied
+ /// in applyImport.
+ bool assignFunctions();
+
+ void dump() const;
+ void print(raw_ostream &OS) const;
+
+ friend raw_ostream &operator<<(raw_ostream &OS,
+ const CallsiteContextGraph &CCG) {
+ CCG.print(OS);
+ return OS;
+ }
+
+ friend struct GraphTraits<
+ const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
+ friend struct DOTGraphTraits<
+ const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>;
+
+ void exportToDot(std::string Label) const;
+
+ /// Represents a function clone via FuncTy pointer and clone number pair.
+ struct FuncInfo final
+ : public std::pair<FuncTy *, unsigned /*Clone number*/> {
+ using Base = std::pair<FuncTy *, unsigned>;
+ FuncInfo(const Base &B) : Base(B) {}
+ FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {}
+ explicit operator bool() const { return this->first != nullptr; }
+ FuncTy *func() const { return this->first; }
+ unsigned cloneNo() const { return this->second; }
+ };
+
+ /// Represents a callsite clone via CallTy and clone number pair.
+ struct CallInfo final : public std::pair<CallTy, unsigned /*Clone number*/> {
+ using Base = std::pair<CallTy, unsigned>;
+ CallInfo(const Base &B) : Base(B) {}
+ CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0)
+ : Base(Call, CloneNo) {}
+ explicit operator bool() const { return (bool)this->first; }
+ CallTy call() const { return this->first; }
+ unsigned cloneNo() const { return this->second; }
+ void setCloneNo(unsigned N) { this->second = N; }
+ void print(raw_ostream &OS) const {
+ if (!operator bool()) {
+ assert(!cloneNo());
+ OS << "null Call";
+ return;
+ }
+ call()->print(OS);
+ OS << "\t(clone " << cloneNo() << ")";
+ }
+ void dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+ }
+ friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) {
+ Call.print(OS);
+ return OS;
+ }
+ };
+
+ struct ContextEdge;
+
+ /// Node in the Callsite Context Graph
+ struct ContextNode {
+ // Keep this for now since in the IR case where we have an Instruction* it
+ // is not as immediately discoverable. Used for printing richer information
+ // when dumping graph.
+ bool IsAllocation;
+
+ // Keeps track of when the Call was reset to null because there was
+ // recursion.
+ bool Recursive = false;
+
+ // The corresponding allocation or interior call.
+ CallInfo Call;
+
+ // For alloc nodes this is a unique id assigned when constructed, and for
+ // callsite stack nodes it is the original stack id when the node is
+ // constructed from the memprof MIB metadata on the alloc nodes. Note that
+ // this is only used when matching callsite metadata onto the stack nodes
+ // created when processing the allocation memprof MIBs, and for labeling
+ // nodes in the dot graph. Therefore we don't bother to assign a value for
+ // clones.
+ uint64_t OrigStackOrAllocId = 0;
+
+ // This will be formed by ORing together the AllocationType enum values
+ // for contexts including this node.
+ uint8_t AllocTypes = 0;
+
+ // Edges to all callees in the profiled call stacks.
+ // TODO: Should this be a map (from Callee node) for more efficient lookup?
+ std::vector<std::shared_ptr<ContextEdge>> CalleeEdges;
+
+ // Edges to all callers in the profiled call stacks.
+ // TODO: Should this be a map (from Caller node) for more efficient lookup?
+ std::vector<std::shared_ptr<ContextEdge>> CallerEdges;
+
+ // The set of IDs for contexts including this node.
+ DenseSet<uint32_t> ContextIds;
+
+ // List of clones of this ContextNode, initially empty.
+ std::vector<ContextNode *> Clones;
+
+ // If a clone, points to the original uncloned node.
+ ContextNode *CloneOf = nullptr;
+
+ ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {}
+
+ ContextNode(bool IsAllocation, CallInfo C)
+ : IsAllocation(IsAllocation), Call(C) {}
+
+ void addClone(ContextNode *Clone) {
+ if (CloneOf) {
+ CloneOf->Clones.push_back(Clone);
+ Clone->CloneOf = CloneOf;
+ } else {
+ Clones.push_back(Clone);
+ assert(!Clone->CloneOf);
+ Clone->CloneOf = this;
+ }
+ }
+
+ ContextNode *getOrigNode() {
+ if (!CloneOf)
+ return this;
+ return CloneOf;
+ }
+
+ void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType,
+ unsigned int ContextId);
+
+ ContextEdge *findEdgeFromCallee(const ContextNode *Callee);
+ ContextEdge *findEdgeFromCaller(const ContextNode *Caller);
+ void eraseCalleeEdge(const ContextEdge *Edge);
+ void eraseCallerEdge(const ContextEdge *Edge);
+
+ void setCall(CallInfo C) { Call = C; }
+
+ bool hasCall() const { return (bool)Call.call(); }
+
+ void printCall(raw_ostream &OS) const { Call.print(OS); }
+
+ // True if this node was effectively removed from the graph, in which case
+ // its context id set, caller edges, and callee edges should all be empty.
+ bool isRemoved() const {
+ assert(ContextIds.empty() ==
+ (CalleeEdges.empty() && CallerEdges.empty()));
+ return ContextIds.empty();
+ }
+
+ void dump() const;
+ void print(raw_ostream &OS) const;
+
+ friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) {
+ Node.print(OS);
+ return OS;
+ }
+ };
+
+ /// Edge in the Callsite Context Graph from a ContextNode N to a caller or
+ /// callee.
+ struct ContextEdge {
+ ContextNode *Callee;
+ ContextNode *Caller;
+
+ // This will be formed by ORing together the AllocationType enum values
+ // for contexts including this edge.
+ uint8_t AllocTypes = 0;
+
+ // The set of IDs for contexts including this edge.
+ DenseSet<uint32_t> ContextIds;
+
+ ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType,
+ DenseSet<uint32_t> ContextIds)
+ : Callee(Callee), Caller(Caller), AllocTypes(AllocType),
+ ContextIds(ContextIds) {}
+
+ DenseSet<uint32_t> &getContextIds() { return ContextIds; }
+
+ void dump() const;
+ void print(raw_ostream &OS) const;
+
+ friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) {
+ Edge.print(OS);
+ return OS;
+ }
+ };
+
+ /// Helper to remove callee edges that have allocation type None (due to not
+ /// carrying any context ids) after transformations.
+ void removeNoneTypeCalleeEdges(ContextNode *Node);
+
+protected:
+ /// Get a list of nodes corresponding to the stack ids in the given callsite
+ /// context.
+ template <class NodeT, class IteratorT>
+ std::vector<uint64_t>
+ getStackIdsWithContextNodes(CallStack<NodeT, IteratorT> &CallsiteContext);
+
+ /// Adds nodes for the given allocation and any stack ids on its memprof MIB
+ /// metadata (or summary).
+ ContextNode *addAllocNode(CallInfo Call, const FuncTy *F);
+
+ /// Adds nodes for the given MIB stack ids.
+ template <class NodeT, class IteratorT>
+ void addStackNodesForMIB(ContextNode *AllocNode,
+ CallStack<NodeT, IteratorT> &StackContext,
+ CallStack<NodeT, IteratorT> &CallsiteContext,
+ AllocationType AllocType);
+
+ /// Matches all callsite metadata (or summary) to the nodes created for
+ /// allocation memprof MIB metadata, synthesizing new nodes to reflect any
+ /// inlining performed on those callsite instructions.
+ void updateStackNodes();
+
+ /// Update graph to conservatively handle any callsite stack nodes that target
+ /// multiple different callee target functions.
+ void handleCallsitesWithMultipleTargets();
+
+ /// Save lists of calls with MemProf metadata in each function, for faster
+ /// iteration.
+ std::vector<std::pair<FuncTy *, std::vector<CallInfo>>>
+ FuncToCallsWithMetadata;
+
+ /// Map from callsite node to the enclosing caller function.
+ std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
+
+private:
+ using EdgeIter = typename std::vector<std::shared_ptr<ContextEdge>>::iterator;
+
+ using CallContextInfo = std::tuple<CallTy, std::vector<uint64_t>,
+ const FuncTy *, DenseSet<uint32_t>>;
+
+ /// Assigns the given Node to calls at or inlined into the location with
+ /// the Node's stack id, after post order traversing and processing its
+ /// caller nodes. Uses the call information recorded in the given
+ /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences
+ /// as needed. Called by updateStackNodes which sets up the given
+ /// StackIdToMatchingCalls map.
+ void assignStackNodesPostOrder(
+ ContextNode *Node, DenseSet<const ContextNode *> &Visited,
+ DenseMap<uint64_t, std::vector<CallContextInfo>> &StackIdToMatchingCalls);
+
+ /// Duplicates the given set of context ids, updating the provided
+ /// map from each original id with the newly generated context ids,
+ /// and returning the new duplicated id set.
+ DenseSet<uint32_t> duplicateContextIds(
+ const DenseSet<uint32_t> &StackSequenceContextIds,
+ DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
+
+ /// Propagates all duplicated context ids across the graph.
+ void propagateDuplicateContextIds(
+ const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds);
+
+ /// Connect the NewNode to OrigNode's callees if TowardsCallee is true,
+ /// else to its callers. Also updates OrigNode's edges to remove any context
+ /// ids moved to the newly created edge.
+ void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode,
+ bool TowardsCallee);
+
+ /// Get the stack id corresponding to the given Id or Index (for IR this will
+ /// return itself, for a summary index this will return the id recorded in the
+ /// index for that stack id index value).
+ uint64_t getStackId(uint64_t IdOrIndex) const {
+ return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex);
+ }
+
+ /// Returns true if the given call targets the given function.
+ bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) {
+ return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(Call, Func);
+ }
+
+ /// Get a list of nodes corresponding to the stack ids in the given
+ /// callsite's context.
+ std::vector<uint64_t> getStackIdsWithContextNodesForCall(CallTy Call) {
+ return static_cast<DerivedCCG *>(this)->getStackIdsWithContextNodesForCall(
+ Call);
+ }
+
+ /// Get the last stack id in the context for callsite.
+ uint64_t getLastStackId(CallTy Call) {
+ return static_cast<DerivedCCG *>(this)->getLastStackId(Call);
+ }
+
+ /// Update the allocation call to record type of allocated memory.
+ void updateAllocationCall(CallInfo &Call, AllocationType AllocType) {
+ AllocType == AllocationType::Cold ? AllocTypeCold++ : AllocTypeNotCold++;
+ static_cast<DerivedCCG *>(this)->updateAllocationCall(Call, AllocType);
+ }
+
+ /// Update non-allocation call to invoke (possibly cloned) function
+ /// CalleeFunc.
+ void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc) {
+ static_cast<DerivedCCG *>(this)->updateCall(CallerCall, CalleeFunc);
+ }
+
+ /// Clone the given function for the given callsite, recording mapping of all
+ /// of the functions tracked calls to their new versions in the CallMap.
+ /// Assigns new clones to clone number CloneNo.
+ FuncInfo cloneFunctionForCallsite(
+ FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
+ std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
+ return static_cast<DerivedCCG *>(this)->cloneFunctionForCallsite(
+ Func, Call, CallMap, CallsWithMetadataInFunc, CloneNo);
+ }
+
+ /// Gets a label to use in the dot graph for the given call clone in the given
+ /// function.
+ std::string getLabel(const FuncTy *Func, const CallTy Call,
+ unsigned CloneNo) const {
+ return static_cast<const DerivedCCG *>(this)->getLabel(Func, Call, CloneNo);
+ }
+
+ /// Helpers to find the node corresponding to the given call or stackid.
+ ContextNode *getNodeForInst(const CallInfo &C);
+ ContextNode *getNodeForAlloc(const CallInfo &C);
+ ContextNode *getNodeForStackId(uint64_t StackId);
+
+ /// Removes the node information recorded for the given call.
+ void unsetNodeForInst(const CallInfo &C);
+
+ /// Computes the alloc type corresponding to the given context ids, by
+ /// unioning their recorded alloc types.
+ uint8_t computeAllocType(DenseSet<uint32_t> &ContextIds);
+
+ /// Returns the alloction type of the intersection of the contexts of two
+ /// nodes (based on their provided context id sets), optimized for the case
+ /// when Node1Ids is smaller than Node2Ids.
+ uint8_t intersectAllocTypesImpl(const DenseSet<uint32_t> &Node1Ids,
+ const DenseSet<uint32_t> &Node2Ids);
+
+ /// Returns the alloction type of the intersection of the contexts of two
+ /// nodes (based on their provided context id sets).
+ uint8_t intersectAllocTypes(const DenseSet<uint32_t> &Node1Ids,
+ const DenseSet<uint32_t> &Node2Ids);
+
+ /// Create a clone of Edge's callee and move Edge to that new callee node,
+ /// performing the necessary context id and allocation type updates.
+ /// If callee's caller edge iterator is supplied, it is updated when removing
+ /// the edge from that list.
+ ContextNode *
+ moveEdgeToNewCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
+ EdgeIter *CallerEdgeI = nullptr);
+
+ /// Change the callee of Edge to existing callee clone NewCallee, performing
+ /// the necessary context id and allocation type updates.
+ /// If callee's caller edge iterator is supplied, it is updated when removing
+ /// the edge from that list.
+ void moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
+ ContextNode *NewCallee,
+ EdgeIter *CallerEdgeI = nullptr,
+ bool NewClone = false);
+
+ /// Recursively perform cloning on the graph for the given Node and its
+ /// callers, in order to uniquely identify the allocation behavior of an
+ /// allocation given its context.
+ void identifyClones(ContextNode *Node,
+ DenseSet<const ContextNode *> &Visited);
+
+ /// Map from each context ID to the AllocationType assigned to that context.
+ std::map<uint32_t, AllocationType> ContextIdToAllocationType;
+
+ /// Identifies the context node created for a stack id when adding the MIB
+ /// contexts to the graph. This is used to locate the context nodes when
+ /// trying to assign the corresponding callsites with those stack ids to these
+ /// nodes.
+ std::map<uint64_t, ContextNode *> StackEntryIdToContextNodeMap;
+
+ /// Maps to track the calls to their corresponding nodes in the graph.
+ MapVector<CallInfo, ContextNode *> AllocationCallToContextNodeMap;
+ MapVector<CallInfo, ContextNode *> NonAllocationCallToContextNodeMap;
+
+ /// Owner of all ContextNode unique_ptrs.
+ std::vector<std::unique_ptr<ContextNode>> NodeOwner;
+
+ /// Perform sanity checks on graph when requested.
+ void check() const;
+
+ /// Keeps track of the last unique context id assigned.
+ unsigned int LastContextId = 0;
+};
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+using ContextNode =
+ typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode;
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+using ContextEdge =
+ typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge;
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+using FuncInfo =
+ typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::FuncInfo;
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+using CallInfo =
+ typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::CallInfo;
+
+/// CRTP derived class for graphs built from IR (regular LTO).
+class ModuleCallsiteContextGraph
+ : public CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
+ Instruction *> {
+public:
+ ModuleCallsiteContextGraph(
+ Module &M,
+ function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter);
+
+private:
+ friend CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
+ Instruction *>;
+
+ uint64_t getStackId(uint64_t IdOrIndex) const;
+ bool calleeMatchesFunc(Instruction *Call, const Function *Func);
+ uint64_t getLastStackId(Instruction *Call);
+ std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call);
+ void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
+ void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
+ CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
+ Instruction *>::FuncInfo
+ cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
+ std::map<CallInfo, CallInfo> &CallMap,
+ std::vector<CallInfo> &CallsWithMetadataInFunc,
+ unsigned CloneNo);
+ std::string getLabel(const Function *Func, const Instruction *Call,
+ unsigned CloneNo) const;
+
+ const Module &Mod;
+ function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter;
+};
+
+/// Represents a call in the summary index graph, which can either be an
+/// allocation or an interior callsite node in an allocation's context.
+/// Holds a pointer to the corresponding data structure in the index.
+struct IndexCall : public PointerUnion<CallsiteInfo *, AllocInfo *> {
+ IndexCall() : PointerUnion() {}
+ IndexCall(std::nullptr_t) : IndexCall() {}
+ IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {}
+ IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {}
+ IndexCall(PointerUnion PT) : PointerUnion(PT) {}
+
+ IndexCall *operator->() { return this; }
+
+ PointerUnion<CallsiteInfo *, AllocInfo *> getBase() const { return *this; }
+
+ void print(raw_ostream &OS) const {
+ if (auto *AI = llvm::dyn_cast_if_present<AllocInfo *>(getBase())) {
+ OS << *AI;
+ } else {
+ auto *CI = llvm::dyn_cast_if_present<CallsiteInfo *>(getBase());
+ assert(CI);
+ OS << *CI;
+ }
+ }
+};
+
+/// CRTP derived class for graphs built from summary index (ThinLTO).
+class IndexCallsiteContextGraph
+ : public CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
+ IndexCall> {
+public:
+ IndexCallsiteContextGraph(
+ ModuleSummaryIndex &Index,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing);
+
+private:
+ friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
+ IndexCall>;
+
+ uint64_t getStackId(uint64_t IdOrIndex) const;
+ bool calleeMatchesFunc(IndexCall &Call, const FunctionSummary *Func);
+ uint64_t getLastStackId(IndexCall &Call);
+ std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
+ void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
+ void updateCall(CallInfo &CallerCall, FuncInfo CalleeFunc);
+ CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
+ IndexCall>::FuncInfo
+ cloneFunctionForCallsite(FuncInfo &Func, CallInfo &Call,
+ std::map<CallInfo, CallInfo> &CallMap,
+ std::vector<CallInfo> &CallsWithMetadataInFunc,
+ unsigned CloneNo);
+ std::string getLabel(const FunctionSummary *Func, const IndexCall &Call,
+ unsigned CloneNo) const;
+
+ // Saves mapping from function summaries containing memprof records back to
+ // its VI, for use in checking and debugging.
+ std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
+
+ const ModuleSummaryIndex &Index;
+};
+} // namespace
+
+namespace llvm {
+template <>
+struct DenseMapInfo<typename CallsiteContextGraph<
+ ModuleCallsiteContextGraph, Function, Instruction *>::CallInfo>
+ : public DenseMapInfo<std::pair<Instruction *, unsigned>> {};
+template <>
+struct DenseMapInfo<typename CallsiteContextGraph<
+ IndexCallsiteContextGraph, FunctionSummary, IndexCall>::CallInfo>
+ : public DenseMapInfo<std::pair<IndexCall, unsigned>> {};
+template <>
+struct DenseMapInfo<IndexCall>
+ : public DenseMapInfo<PointerUnion<CallsiteInfo *, AllocInfo *>> {};
+} // end namespace llvm
+
+namespace {
+
+struct FieldSeparator {
+ bool Skip = true;
+ const char *Sep;
+
+ FieldSeparator(const char *Sep = ", ") : Sep(Sep) {}
+};
+
+raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) {
+ if (FS.Skip) {
+ FS.Skip = false;
+ return OS;
+ }
+ return OS << FS.Sep;
+}
+
+// Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc
+// type we should actually use on the corresponding allocation.
+// If we can't clone a node that has NotCold+Cold alloc type, we will fall
+// back to using NotCold. So don't bother cloning to distinguish NotCold+Cold
+// from NotCold.
+AllocationType allocTypeToUse(uint8_t AllocTypes) {
+ assert(AllocTypes != (uint8_t)AllocationType::None);
+ if (AllocTypes ==
+ ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
+ return AllocationType::NotCold;
+ else
+ return (AllocationType)AllocTypes;
+}
+
+// Helper to check if the alloc types for all edges recorded in the
+// InAllocTypes vector match the alloc types for all edges in the Edges
+// vector.
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+bool allocTypesMatch(
+ const std::vector<uint8_t> &InAllocTypes,
+ const std::vector<std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>>
+ &Edges) {
+ return std::equal(
+ InAllocTypes.begin(), InAllocTypes.end(), Edges.begin(),
+ [](const uint8_t &l,
+ const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &r) {
+ // Can share if one of the edges is None type - don't
+ // care about the type along that edge as it doesn't
+ // exist for those context ids.
+ if (l == (uint8_t)AllocationType::None ||
+ r->AllocTypes == (uint8_t)AllocationType::None)
+ return true;
+ return allocTypeToUse(l) == allocTypeToUse(r->AllocTypes);
+ });
+}
+
+} // end anonymous namespace
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
+ const CallInfo &C) {
+ ContextNode *Node = getNodeForAlloc(C);
+ if (Node)
+ return Node;
+
+ return NonAllocationCallToContextNodeMap.lookup(C);
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForAlloc(
+ const CallInfo &C) {
+ return AllocationCallToContextNodeMap.lookup(C);
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForStackId(
+ uint64_t StackId) {
+ auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId);
+ if (StackEntryNode != StackEntryIdToContextNodeMap.end())
+ return StackEntryNode->second;
+ return nullptr;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::unsetNodeForInst(
+ const CallInfo &C) {
+ AllocationCallToContextNodeMap.erase(C) ||
+ NonAllocationCallToContextNodeMap.erase(C);
+ assert(!AllocationCallToContextNodeMap.count(C) &&
+ !NonAllocationCallToContextNodeMap.count(C));
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
+ addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType,
+ unsigned int ContextId) {
+ for (auto &Edge : CallerEdges) {
+ if (Edge->Caller == Caller) {
+ Edge->AllocTypes |= (uint8_t)AllocType;
+ Edge->getContextIds().insert(ContextId);
+ return;
+ }
+ }
+ std::shared_ptr<ContextEdge> Edge = std::make_shared<ContextEdge>(
+ this, Caller, (uint8_t)AllocType, DenseSet<uint32_t>({ContextId}));
+ CallerEdges.push_back(Edge);
+ Caller->CalleeEdges.push_back(Edge);
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<
+ DerivedCCG, FuncTy, CallTy>::removeNoneTypeCalleeEdges(ContextNode *Node) {
+ for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) {
+ auto Edge = *EI;
+ if (Edge->AllocTypes == (uint8_t)AllocationType::None) {
+ assert(Edge->ContextIds.empty());
+ Edge->Callee->eraseCallerEdge(Edge.get());
+ EI = Node->CalleeEdges.erase(EI);
+ } else
+ ++EI;
+ }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
+ findEdgeFromCallee(const ContextNode *Callee) {
+ for (const auto &Edge : CalleeEdges)
+ if (Edge->Callee == Callee)
+ return Edge.get();
+ return nullptr;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge *
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
+ findEdgeFromCaller(const ContextNode *Caller) {
+ for (const auto &Edge : CallerEdges)
+ if (Edge->Caller == Caller)
+ return Edge.get();
+ return nullptr;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
+ eraseCalleeEdge(const ContextEdge *Edge) {
+ auto EI =
+ std::find_if(CalleeEdges.begin(), CalleeEdges.end(),
+ [Edge](const std::shared_ptr<ContextEdge> &CalleeEdge) {
+ return CalleeEdge.get() == Edge;
+ });
+ assert(EI != CalleeEdges.end());
+ CalleeEdges.erase(EI);
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::
+ eraseCallerEdge(const ContextEdge *Edge) {
+ auto EI =
+ std::find_if(CallerEdges.begin(), CallerEdges.end(),
+ [Edge](const std::shared_ptr<ContextEdge> &CallerEdge) {
+ return CallerEdge.get() == Edge;
+ });
+ assert(EI != CallerEdges.end());
+ CallerEdges.erase(EI);
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::computeAllocType(
+ DenseSet<uint32_t> &ContextIds) {
+ uint8_t BothTypes =
+ (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
+ uint8_t AllocType = (uint8_t)AllocationType::None;
+ for (auto Id : ContextIds) {
+ AllocType |= (uint8_t)ContextIdToAllocationType[Id];
+ // Bail early if alloc type reached both, no further refinement.
+ if (AllocType == BothTypes)
+ return AllocType;
+ }
+ return AllocType;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+uint8_t
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypesImpl(
+ const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
+ uint8_t BothTypes =
+ (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold;
+ uint8_t AllocType = (uint8_t)AllocationType::None;
+ for (auto Id : Node1Ids) {
+ if (!Node2Ids.count(Id))
+ continue;
+ AllocType |= (uint8_t)ContextIdToAllocationType[Id];
+ // Bail early if alloc type reached both, no further refinement.
+ if (AllocType == BothTypes)
+ return AllocType;
+ }
+ return AllocType;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+uint8_t CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::intersectAllocTypes(
+ const DenseSet<uint32_t> &Node1Ids, const DenseSet<uint32_t> &Node2Ids) {
+ if (Node1Ids.size() < Node2Ids.size())
+ return intersectAllocTypesImpl(Node1Ids, Node2Ids);
+ else
+ return intersectAllocTypesImpl(Node2Ids, Node1Ids);
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addAllocNode(
+ CallInfo Call, const FuncTy *F) {
+ assert(!getNodeForAlloc(Call));
+ NodeOwner.push_back(
+ std::make_unique<ContextNode>(/*IsAllocation=*/true, Call));
+ ContextNode *AllocNode = NodeOwner.back().get();
+ AllocationCallToContextNodeMap[Call] = AllocNode;
+ NodeToCallingFunc[AllocNode] = F;
+ // Use LastContextId as a uniq id for MIB allocation nodes.
+ AllocNode->OrigStackOrAllocId = LastContextId;
+ // Alloc type should be updated as we add in the MIBs. We should assert
+ // afterwards that it is not still None.
+ AllocNode->AllocTypes = (uint8_t)AllocationType::None;
+
+ return AllocNode;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+template <class NodeT, class IteratorT>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::addStackNodesForMIB(
+ ContextNode *AllocNode, CallStack<NodeT, IteratorT> &StackContext,
+ CallStack<NodeT, IteratorT> &CallsiteContext, AllocationType AllocType) {
+ // Treating the hot alloc type as NotCold before the disambiguation for "hot"
+ // is done.
+ if (AllocType == AllocationType::Hot)
+ AllocType = AllocationType::NotCold;
+
+ ContextIdToAllocationType[++LastContextId] = AllocType;
+
+ // Update alloc type and context ids for this MIB.
+ AllocNode->AllocTypes |= (uint8_t)AllocType;
+ AllocNode->ContextIds.insert(LastContextId);
+
+ // Now add or update nodes for each stack id in alloc's context.
+ // Later when processing the stack ids on non-alloc callsites we will adjust
+ // for any inlining in the context.
+ ContextNode *PrevNode = AllocNode;
+ // Look for recursion (direct recursion should have been collapsed by
+ // module summary analysis, here we should just be detecting mutual
+ // recursion). Mark these nodes so we don't try to clone.
+ SmallSet<uint64_t, 8> StackIdSet;
+ // Skip any on the allocation call (inlining).
+ for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext);
+ ContextIter != StackContext.end(); ++ContextIter) {
+ auto StackId = getStackId(*ContextIter);
+ ContextNode *StackNode = getNodeForStackId(StackId);
+ if (!StackNode) {
+ NodeOwner.push_back(
+ std::make_unique<ContextNode>(/*IsAllocation=*/false));
+ StackNode = NodeOwner.back().get();
+ StackEntryIdToContextNodeMap[StackId] = StackNode;
+ StackNode->OrigStackOrAllocId = StackId;
+ }
+ auto Ins = StackIdSet.insert(StackId);
+ if (!Ins.second)
+ StackNode->Recursive = true;
+ StackNode->ContextIds.insert(LastContextId);
+ StackNode->AllocTypes |= (uint8_t)AllocType;
+ PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId);
+ PrevNode = StackNode;
+ }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+DenseSet<uint32_t>
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::duplicateContextIds(
+ const DenseSet<uint32_t> &StackSequenceContextIds,
+ DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
+ DenseSet<uint32_t> NewContextIds;
+ for (auto OldId : StackSequenceContextIds) {
+ NewContextIds.insert(++LastContextId);
+ OldToNewContextIds[OldId].insert(LastContextId);
+ assert(ContextIdToAllocationType.count(OldId));
+ // The new context has the same allocation type as original.
+ ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId];
+ }
+ return NewContextIds;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
+ propagateDuplicateContextIds(
+ const DenseMap<uint32_t, DenseSet<uint32_t>> &OldToNewContextIds) {
+ // Build a set of duplicated context ids corresponding to the input id set.
+ auto GetNewIds = [&OldToNewContextIds](const DenseSet<uint32_t> &ContextIds) {
+ DenseSet<uint32_t> NewIds;
+ for (auto Id : ContextIds)
+ if (auto NewId = OldToNewContextIds.find(Id);
+ NewId != OldToNewContextIds.end())
+ NewIds.insert(NewId->second.begin(), NewId->second.end());
+ return NewIds;
+ };
+
+ // Recursively update context ids sets along caller edges.
+ auto UpdateCallers = [&](ContextNode *Node,
+ DenseSet<const ContextEdge *> &Visited,
+ auto &&UpdateCallers) -> void {
+ for (const auto &Edge : Node->CallerEdges) {
+ auto Inserted = Visited.insert(Edge.get());
+ if (!Inserted.second)
+ continue;
+ ContextNode *NextNode = Edge->Caller;
+ DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Edge->getContextIds());
+ // Only need to recursively iterate to NextNode via this caller edge if
+ // it resulted in any added ids to NextNode.
+ if (!NewIdsToAdd.empty()) {
+ Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
+ NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
+ UpdateCallers(NextNode, Visited, UpdateCallers);
+ }
+ }
+ };
+
+ DenseSet<const ContextEdge *> Visited;
+ for (auto &Entry : AllocationCallToContextNodeMap) {
+ auto *Node = Entry.second;
+ // Update ids on the allocation nodes before calling the recursive
+ // update along caller edges, since this simplifies the logic during
+ // that traversal.
+ DenseSet<uint32_t> NewIdsToAdd = GetNewIds(Node->ContextIds);
+ Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end());
+ UpdateCallers(Node, Visited, UpdateCallers);
+ }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::connectNewNode(
+ ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) {
+ // Make a copy of the context ids, since this will be adjusted below as they
+ // are moved.
+ DenseSet<uint32_t> RemainingContextIds = NewNode->ContextIds;
+ auto &OrigEdges =
+ TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges;
+ // Increment iterator in loop so that we can remove edges as needed.
+ for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) {
+ auto Edge = *EI;
+ // Remove any matching context ids from Edge, return set that were found and
+ // removed, these are the new edge's context ids. Also update the remaining
+ // (not found ids).
+ DenseSet<uint32_t> NewEdgeContextIds, NotFoundContextIds;
+ set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds,
+ NotFoundContextIds);
+ RemainingContextIds.swap(NotFoundContextIds);
+ // If no matching context ids for this edge, skip it.
+ if (NewEdgeContextIds.empty()) {
+ ++EI;
+ continue;
+ }
+ if (TowardsCallee) {
+ auto NewEdge = std::make_shared<ContextEdge>(
+ Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds),
+ NewEdgeContextIds);
+ NewNode->CalleeEdges.push_back(NewEdge);
+ NewEdge->Callee->CallerEdges.push_back(NewEdge);
+ } else {
+ auto NewEdge = std::make_shared<ContextEdge>(
+ NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds),
+ NewEdgeContextIds);
+ NewNode->CallerEdges.push_back(NewEdge);
+ NewEdge->Caller->CalleeEdges.push_back(NewEdge);
+ }
+ // Remove old edge if context ids empty.
+ if (Edge->getContextIds().empty()) {
+ if (TowardsCallee) {
+ Edge->Callee->eraseCallerEdge(Edge.get());
+ EI = OrigNode->CalleeEdges.erase(EI);
+ } else {
+ Edge->Caller->eraseCalleeEdge(Edge.get());
+ EI = OrigNode->CallerEdges.erase(EI);
+ }
+ continue;
+ }
+ ++EI;
+ }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
+ assignStackNodesPostOrder(ContextNode *Node,
+ DenseSet<const ContextNode *> &Visited,
+ DenseMap<uint64_t, std::vector<CallContextInfo>>
+ &StackIdToMatchingCalls) {
+ auto Inserted = Visited.insert(Node);
+ if (!Inserted.second)
+ return;
+ // Post order traversal. Iterate over a copy since we may add nodes and
+ // therefore new callers during the recursive call, invalidating any
+ // iterator over the original edge vector. We don't need to process these
+ // new nodes as they were already processed on creation.
+ auto CallerEdges = Node->CallerEdges;
+ for (auto &Edge : CallerEdges) {
+ // Skip any that have been removed during the recursion.
+ if (!Edge)
+ continue;
+ assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls);
+ }
+
+ // If this node's stack id is in the map, update the graph to contain new
+ // nodes representing any inlining at interior callsites. Note we move the
+ // associated context ids over to the new nodes.
+
+ // Ignore this node if it is for an allocation or we didn't record any
+ // stack id lists ending at it.
+ if (Node->IsAllocation ||
+ !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId))
+ return;
+
+ auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId];
+ // Handle the simple case first. A single call with a single stack id.
+ // In this case there is no need to create any new context nodes, simply
+ // assign the context node for stack id to this Call.
+ if (Calls.size() == 1) {
+ auto &[Call, Ids, Func, SavedContextIds] = Calls[0];
+ if (Ids.size() == 1) {
+ assert(SavedContextIds.empty());
+ // It should be this Node
+ assert(Node == getNodeForStackId(Ids[0]));
+ if (Node->Recursive)
+ return;
+ Node->setCall(Call);
+ NonAllocationCallToContextNodeMap[Call] = Node;
+ NodeToCallingFunc[Node] = Func;
+ return;
+ }
+ }
+
+ // Find the node for the last stack id, which should be the same
+ // across all calls recorded for this id, and is this node's id.
+ uint64_t LastId = Node->OrigStackOrAllocId;
+ ContextNode *LastNode = getNodeForStackId(LastId);
+ // We should only have kept stack ids that had nodes.
+ assert(LastNode);
+
+ for (unsigned I = 0; I < Calls.size(); I++) {
+ auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
+ // Skip any for which we didn't assign any ids, these don't get a node in
+ // the graph.
+ if (SavedContextIds.empty())
+ continue;
+
+ assert(LastId == Ids.back());
+
+ ContextNode *FirstNode = getNodeForStackId(Ids[0]);
+ assert(FirstNode);
+
+ // Recompute the context ids for this stack id sequence (the
+ // intersection of the context ids of the corresponding nodes).
+ // Start with the ids we saved in the map for this call, which could be
+ // duplicated context ids. We have to recompute as we might have overlap
+ // overlap between the saved context ids for different last nodes, and
+ // removed them already during the post order traversal.
+ set_intersect(SavedContextIds, FirstNode->ContextIds);
+ ContextNode *PrevNode = nullptr;
+ for (auto Id : Ids) {
+ ContextNode *CurNode = getNodeForStackId(Id);
+ // We should only have kept stack ids that had nodes and weren't
+ // recursive.
+ assert(CurNode);
+ assert(!CurNode->Recursive);
+ if (!PrevNode) {
+ PrevNode = CurNode;
+ continue;
+ }
+ auto *Edge = CurNode->findEdgeFromCallee(PrevNode);
+ if (!Edge) {
+ SavedContextIds.clear();
+ break;
+ }
+ PrevNode = CurNode;
+ set_intersect(SavedContextIds, Edge->getContextIds());
+
+ // If we now have no context ids for clone, skip this call.
+ if (SavedContextIds.empty())
+ break;
+ }
+ if (SavedContextIds.empty())
+ continue;
+
+ // Create new context node.
+ NodeOwner.push_back(
+ std::make_unique<ContextNode>(/*IsAllocation=*/false, Call));
+ ContextNode *NewNode = NodeOwner.back().get();
+ NodeToCallingFunc[NewNode] = Func;
+ NonAllocationCallToContextNodeMap[Call] = NewNode;
+ NewNode->ContextIds = SavedContextIds;
+ NewNode->AllocTypes = computeAllocType(NewNode->ContextIds);
+
+ // Connect to callees of innermost stack frame in inlined call chain.
+ // This updates context ids for FirstNode's callee's to reflect those
+ // moved to NewNode.
+ connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true);
+
+ // Connect to callers of outermost stack frame in inlined call chain.
+ // This updates context ids for FirstNode's caller's to reflect those
+ // moved to NewNode.
+ connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false);
+
+ // Now we need to remove context ids from edges/nodes between First and
+ // Last Node.
+ PrevNode = nullptr;
+ for (auto Id : Ids) {
+ ContextNode *CurNode = getNodeForStackId(Id);
+ // We should only have kept stack ids that had nodes.
+ assert(CurNode);
+
+ // Remove the context ids moved to NewNode from CurNode, and the
+ // edge from the prior node.
+ set_subtract(CurNode->ContextIds, NewNode->ContextIds);
+ if (PrevNode) {
+ auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode);
+ assert(PrevEdge);
+ set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds);
+ if (PrevEdge->getContextIds().empty()) {
+ PrevNode->eraseCallerEdge(PrevEdge);
+ CurNode->eraseCalleeEdge(PrevEdge);
+ }
+ }
+ PrevNode = CurNode;
+ }
+ }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::updateStackNodes() {
+ // Map of stack id to all calls with that as the last (outermost caller)
+ // callsite id that has a context node (some might not due to pruning
+ // performed during matching of the allocation profile contexts).
+ // The CallContextInfo contains the Call and a list of its stack ids with
+ // ContextNodes, the function containing Call, and the set of context ids
+ // the analysis will eventually identify for use in any new node created
+ // for that callsite.
+ DenseMap<uint64_t, std::vector<CallContextInfo>> StackIdToMatchingCalls;
+ for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
+ for (auto &Call : CallsWithMetadata) {
+ // Ignore allocations, already handled.
+ if (AllocationCallToContextNodeMap.count(Call))
+ continue;
+ auto StackIdsWithContextNodes =
+ getStackIdsWithContextNodesForCall(Call.call());
+ // If there were no nodes created for MIBs on allocs (maybe this was in
+ // the unambiguous part of the MIB stack that was pruned), ignore.
+ if (StackIdsWithContextNodes.empty())
+ continue;
+ // Otherwise, record this Call along with the list of ids for the last
+ // (outermost caller) stack id with a node.
+ StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back(
+ {Call.call(), StackIdsWithContextNodes, Func, {}});
+ }
+ }
+
+ // First make a pass through all stack ids that correspond to a call,
+ // as identified in the above loop. Compute the context ids corresponding to
+ // each of these calls when they correspond to multiple stack ids due to
+ // due to inlining. Perform any duplication of context ids required when
+ // there is more than one call with the same stack ids. Their (possibly newly
+ // duplicated) context ids are saved in the StackIdToMatchingCalls map.
+ DenseMap<uint32_t, DenseSet<uint32_t>> OldToNewContextIds;
+ for (auto &It : StackIdToMatchingCalls) {
+ auto &Calls = It.getSecond();
+ // Skip single calls with a single stack id. These don't need a new node.
+ if (Calls.size() == 1) {
+ auto &Ids = std::get<1>(Calls[0]);
+ if (Ids.size() == 1)
+ continue;
+ }
+ // In order to do the best and maximal matching of inlined calls to context
+ // node sequences we will sort the vectors of stack ids in descending order
+ // of length, and within each length, lexicographically by stack id. The
+ // latter is so that we can specially handle calls that have identical stack
+ // id sequences (either due to cloning or artificially because of the MIB
+ // context pruning).
+ std::stable_sort(Calls.begin(), Calls.end(),
+ [](const CallContextInfo &A, const CallContextInfo &B) {
+ auto &IdsA = std::get<1>(A);
+ auto &IdsB = std::get<1>(B);
+ return IdsA.size() > IdsB.size() ||
+ (IdsA.size() == IdsB.size() && IdsA < IdsB);
+ });
+
+ // Find the node for the last stack id, which should be the same
+ // across all calls recorded for this id, and is the id for this
+ // entry in the StackIdToMatchingCalls map.
+ uint64_t LastId = It.getFirst();
+ ContextNode *LastNode = getNodeForStackId(LastId);
+ // We should only have kept stack ids that had nodes.
+ assert(LastNode);
+
+ if (LastNode->Recursive)
+ continue;
+
+ // Initialize the context ids with the last node's. We will subsequently
+ // refine the context ids by computing the intersection along all edges.
+ DenseSet<uint32_t> LastNodeContextIds = LastNode->ContextIds;
+ assert(!LastNodeContextIds.empty());
+
+ for (unsigned I = 0; I < Calls.size(); I++) {
+ auto &[Call, Ids, Func, SavedContextIds] = Calls[I];
+ assert(SavedContextIds.empty());
+ assert(LastId == Ids.back());
+
+ // First compute the context ids for this stack id sequence (the
+ // intersection of the context ids of the corresponding nodes).
+ // Start with the remaining saved ids for the last node.
+ assert(!LastNodeContextIds.empty());
+ DenseSet<uint32_t> StackSequenceContextIds = LastNodeContextIds;
+
+ ContextNode *PrevNode = LastNode;
+ ContextNode *CurNode = LastNode;
+ bool Skip = false;
+
+ // Iterate backwards through the stack Ids, starting after the last Id
+ // in the list, which was handled once outside for all Calls.
+ for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) {
+ auto Id = *IdIter;
+ CurNode = getNodeForStackId(Id);
+ // We should only have kept stack ids that had nodes.
+ assert(CurNode);
+
+ if (CurNode->Recursive) {
+ Skip = true;
+ break;
+ }
+
+ auto *Edge = CurNode->findEdgeFromCaller(PrevNode);
+ // If there is no edge then the nodes belong to different MIB contexts,
+ // and we should skip this inlined context sequence. For example, this
+ // particular inlined context may include stack ids A->B, and we may
+ // indeed have nodes for both A and B, but it is possible that they were
+ // never profiled in sequence in a single MIB for any allocation (i.e.
+ // we might have profiled an allocation that involves the callsite A,
+ // but through a different one of its callee callsites, and we might
+ // have profiled an allocation that involves callsite B, but reached
+ // from a different caller callsite).
+ if (!Edge) {
+ Skip = true;
+ break;
+ }
+ PrevNode = CurNode;
+
+ // Update the context ids, which is the intersection of the ids along
+ // all edges in the sequence.
+ set_intersect(StackSequenceContextIds, Edge->getContextIds());
+
+ // If we now have no context ids for clone, skip this call.
+ if (StackSequenceContextIds.empty()) {
+ Skip = true;
+ break;
+ }
+ }
+ if (Skip)
+ continue;
+
+ // If some of this call's stack ids did not have corresponding nodes (due
+ // to pruning), don't include any context ids for contexts that extend
+ // beyond these nodes. Otherwise we would be matching part of unrelated /
+ // not fully matching stack contexts. To do this, subtract any context ids
+ // found in caller nodes of the last node found above.
+ if (Ids.back() != getLastStackId(Call)) {
+ for (const auto &PE : CurNode->CallerEdges) {
+ set_subtract(StackSequenceContextIds, PE->getContextIds());
+ if (StackSequenceContextIds.empty())
+ break;
+ }
+ // If we now have no context ids for clone, skip this call.
+ if (StackSequenceContextIds.empty())
+ continue;
+ }
+
+ // Check if the next set of stack ids is the same (since the Calls vector
+ // of tuples is sorted by the stack ids we can just look at the next one).
+ bool DuplicateContextIds = false;
+ if (I + 1 < Calls.size()) {
+ auto NextIds = std::get<1>(Calls[I + 1]);
+ DuplicateContextIds = Ids == NextIds;
+ }
+
+ // If we don't have duplicate context ids, then we can assign all the
+ // context ids computed for the original node sequence to this call.
+ // If there are duplicate calls with the same stack ids then we synthesize
+ // new context ids that are duplicates of the originals. These are
+ // assigned to SavedContextIds, which is a reference into the map entry
+ // for this call, allowing us to access these ids later on.
+ OldToNewContextIds.reserve(OldToNewContextIds.size() +
+ StackSequenceContextIds.size());
+ SavedContextIds =
+ DuplicateContextIds
+ ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds)
+ : StackSequenceContextIds;
+ assert(!SavedContextIds.empty());
+
+ if (!DuplicateContextIds) {
+ // Update saved last node's context ids to remove those that are
+ // assigned to other calls, so that it is ready for the next call at
+ // this stack id.
+ set_subtract(LastNodeContextIds, StackSequenceContextIds);
+ if (LastNodeContextIds.empty())
+ break;
+ }
+ }
+ }
+
+ // Propagate the duplicate context ids over the graph.
+ propagateDuplicateContextIds(OldToNewContextIds);
+
+ if (VerifyCCG)
+ check();
+
+ // Now perform a post-order traversal over the graph, starting with the
+ // allocation nodes, essentially processing nodes from callers to callees.
+ // For any that contains an id in the map, update the graph to contain new
+ // nodes representing any inlining at interior callsites. Note we move the
+ // associated context ids over to the new nodes.
+ DenseSet<const ContextNode *> Visited;
+ for (auto &Entry : AllocationCallToContextNodeMap)
+ assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls);
+}
+
+uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) {
+ CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
+ Call->getMetadata(LLVMContext::MD_callsite));
+ return CallsiteContext.back();
+}
+
+uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) {
+ assert(isa<CallsiteInfo *>(Call.getBase()));
+ CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
+ CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase()));
+ // Need to convert index into stack id.
+ return Index.getStackIdAtIndex(CallsiteContext.back());
+}
+
+static const std::string MemProfCloneSuffix = ".memprof.";
+
+static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) {
+ // We use CloneNo == 0 to refer to the original version, which doesn't get
+ // renamed with a suffix.
+ if (!CloneNo)
+ return Base.str();
+ return (Base + MemProfCloneSuffix + Twine(CloneNo)).str();
+}
+
+std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
+ const Instruction *Call,
+ unsigned CloneNo) const {
+ return (Twine(Call->getFunction()->getName()) + " -> " +
+ cast<CallBase>(Call)->getCalledFunction()->getName())
+ .str();
+}
+
+std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func,
+ const IndexCall &Call,
+ unsigned CloneNo) const {
+ auto VI = FSToVIMap.find(Func);
+ assert(VI != FSToVIMap.end());
+ if (isa<AllocInfo *>(Call.getBase()))
+ return (VI->second.name() + " -> alloc").str();
+ else {
+ auto *Callsite = dyn_cast_if_present<CallsiteInfo *>(Call.getBase());
+ return (VI->second.name() + " -> " +
+ getMemProfFuncName(Callsite->Callee.name(),
+ Callsite->Clones[CloneNo]))
+ .str();
+ }
+}
+
+std::vector<uint64_t>
+ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall(
+ Instruction *Call) {
+ CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
+ Call->getMetadata(LLVMContext::MD_callsite));
+ return getStackIdsWithContextNodes<MDNode, MDNode::op_iterator>(
+ CallsiteContext);
+}
+
+std::vector<uint64_t>
+IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) {
+ assert(isa<CallsiteInfo *>(Call.getBase()));
+ CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator>
+ CallsiteContext(dyn_cast_if_present<CallsiteInfo *>(Call.getBase()));
+ return getStackIdsWithContextNodes<CallsiteInfo,
+ SmallVector<unsigned>::const_iterator>(
+ CallsiteContext);
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+template <class NodeT, class IteratorT>
+std::vector<uint64_t>
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getStackIdsWithContextNodes(
+ CallStack<NodeT, IteratorT> &CallsiteContext) {
+ std::vector<uint64_t> StackIds;
+ for (auto IdOrIndex : CallsiteContext) {
+ auto StackId = getStackId(IdOrIndex);
+ ContextNode *Node = getNodeForStackId(StackId);
+ if (!Node)
+ break;
+ StackIds.push_back(StackId);
+ }
+ return StackIds;
+}
+
+ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
+ Module &M, function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter)
+ : Mod(M), OREGetter(OREGetter) {
+ for (auto &F : M) {
+ std::vector<CallInfo> CallsWithMetadata;
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (!isa<CallBase>(I))
+ continue;
+ if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) {
+ CallsWithMetadata.push_back(&I);
+ auto *AllocNode = addAllocNode(&I, &F);
+ auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite);
+ assert(CallsiteMD);
+ CallStack<MDNode, MDNode::op_iterator> CallsiteContext(CallsiteMD);
+ // Add all of the MIBs and their stack nodes.
+ for (auto &MDOp : MemProfMD->operands()) {
+ auto *MIBMD = cast<const MDNode>(MDOp);
+ MDNode *StackNode = getMIBStackNode(MIBMD);
+ assert(StackNode);
+ CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode);
+ addStackNodesForMIB<MDNode, MDNode::op_iterator>(
+ AllocNode, StackContext, CallsiteContext,
+ getMIBAllocType(MIBMD));
+ }
+ assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
+ // Memprof and callsite metadata on memory allocations no longer
+ // needed.
+ I.setMetadata(LLVMContext::MD_memprof, nullptr);
+ I.setMetadata(LLVMContext::MD_callsite, nullptr);
+ }
+ // For callsite metadata, add to list for this function for later use.
+ else if (I.getMetadata(LLVMContext::MD_callsite))
+ CallsWithMetadata.push_back(&I);
+ }
+ }
+ if (!CallsWithMetadata.empty())
+ FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata});
+ }
+
+ if (DumpCCG) {
+ dbgs() << "CCG before updating call stack chains:\n";
+ dbgs() << *this;
+ }
+
+ if (ExportToDot)
+ exportToDot("prestackupdate");
+
+ updateStackNodes();
+
+ handleCallsitesWithMultipleTargets();
+
+ // Strip off remaining callsite metadata, no longer needed.
+ for (auto &FuncEntry : FuncToCallsWithMetadata)
+ for (auto &Call : FuncEntry.second)
+ Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr);
+}
+
+IndexCallsiteContextGraph::IndexCallsiteContextGraph(
+ ModuleSummaryIndex &Index,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing)
+ : Index(Index) {
+ for (auto &I : Index) {
+ auto VI = Index.getValueInfo(I);
+ for (auto &S : VI.getSummaryList()) {
+ // We should only add the prevailing nodes. Otherwise we may try to clone
+ // in a weak copy that won't be linked (and may be different than the
+ // prevailing version).
+ // We only keep the memprof summary on the prevailing copy now when
+ // building the combined index, as a space optimization, however don't
+ // rely on this optimization. The linker doesn't resolve local linkage
+ // values so don't check whether those are prevailing.
+ if (!GlobalValue::isLocalLinkage(S->linkage()) &&
+ !isPrevailing(VI.getGUID(), S.get()))
+ continue;
+ auto *FS = dyn_cast<FunctionSummary>(S.get());
+ if (!FS)
+ continue;
+ std::vector<CallInfo> CallsWithMetadata;
+ if (!FS->allocs().empty()) {
+ for (auto &AN : FS->mutableAllocs()) {
+ // This can happen because of recursion elimination handling that
+ // currently exists in ModuleSummaryAnalysis. Skip these for now.
+ // We still added them to the summary because we need to be able to
+ // correlate properly in applyImport in the backends.
+ if (AN.MIBs.empty())
+ continue;
+ CallsWithMetadata.push_back({&AN});
+ auto *AllocNode = addAllocNode({&AN}, FS);
+ // Pass an empty CallStack to the CallsiteContext (second)
+ // parameter, since for ThinLTO we already collapsed out the inlined
+ // stack ids on the allocation call during ModuleSummaryAnalysis.
+ CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
+ EmptyContext;
+ // Now add all of the MIBs and their stack nodes.
+ for (auto &MIB : AN.MIBs) {
+ CallStack<MIBInfo, SmallVector<unsigned>::const_iterator>
+ StackContext(&MIB);
+ addStackNodesForMIB<MIBInfo, SmallVector<unsigned>::const_iterator>(
+ AllocNode, StackContext, EmptyContext, MIB.AllocType);
+ }
+ assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None);
+ // Initialize version 0 on the summary alloc node to the current alloc
+ // type, unless it has both types in which case make it default, so
+ // that in the case where we aren't able to clone the original version
+ // always ends up with the default allocation behavior.
+ AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes);
+ }
+ }
+ // For callsite metadata, add to list for this function for later use.
+ if (!FS->callsites().empty())
+ for (auto &SN : FS->mutableCallsites())
+ CallsWithMetadata.push_back({&SN});
+
+ if (!CallsWithMetadata.empty())
+ FuncToCallsWithMetadata.push_back({FS, CallsWithMetadata});
+
+ if (!FS->allocs().empty() || !FS->callsites().empty())
+ FSToVIMap[FS] = VI;
+ }
+ }
+
+ if (DumpCCG) {
+ dbgs() << "CCG before updating call stack chains:\n";
+ dbgs() << *this;
+ }
+
+ if (ExportToDot)
+ exportToDot("prestackupdate");
+
+ updateStackNodes();
+
+ handleCallsitesWithMultipleTargets();
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy,
+ CallTy>::handleCallsitesWithMultipleTargets() {
+ // Look for and workaround callsites that call multiple functions.
+ // This can happen for indirect calls, which needs better handling, and in
+ // more rare cases (e.g. macro expansion).
+ // TODO: To fix this for indirect calls we will want to perform speculative
+ // devirtualization using either the normal PGO info with ICP, or using the
+ // information in the profiled MemProf contexts. We can do this prior to
+ // this transformation for regular LTO, and for ThinLTO we can simulate that
+ // effect in the summary and perform the actual speculative devirtualization
+ // while cloning in the ThinLTO backend.
+ for (auto Entry = NonAllocationCallToContextNodeMap.begin();
+ Entry != NonAllocationCallToContextNodeMap.end();) {
+ auto *Node = Entry->second;
+ assert(Node->Clones.empty());
+ // Check all node callees and see if in the same function.
+ bool Removed = false;
+ auto Call = Node->Call.call();
+ for (auto &Edge : Node->CalleeEdges) {
+ if (!Edge->Callee->hasCall())
+ continue;
+ assert(NodeToCallingFunc.count(Edge->Callee));
+ // Check if the called function matches that of the callee node.
+ if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee]))
+ continue;
+ // Work around by setting Node to have a null call, so it gets
+ // skipped during cloning. Otherwise assignFunctions will assert
+ // because its data structures are not designed to handle this case.
+ Entry = NonAllocationCallToContextNodeMap.erase(Entry);
+ Node->setCall(CallInfo());
+ Removed = true;
+ break;
+ }
+ if (!Removed)
+ Entry++;
+ }
+}
+
+uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
+ // In the Module (IR) case this is already the Id.
+ return IdOrIndex;
+}
+
+uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
+ // In the Index case this is an index into the stack id list in the summary
+ // index, convert it to an Id.
+ return Index.getStackIdAtIndex(IdOrIndex);
+}
+
+bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call,
+ const Function *Func) {
+ auto *CB = dyn_cast<CallBase>(Call);
+ if (!CB->getCalledOperand())
+ return false;
+ auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts();
+ auto *CalleeFunc = dyn_cast<Function>(CalleeVal);
+ if (CalleeFunc == Func)
+ return true;
+ auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
+ return Alias && Alias->getAliasee() == Func;
+}
+
+bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call,
+ const FunctionSummary *Func) {
+ ValueInfo Callee =
+ dyn_cast_if_present<CallsiteInfo *>(Call.getBase())->Callee;
+ // If there is no summary list then this is a call to an externally defined
+ // symbol.
+ AliasSummary *Alias =
+ Callee.getSummaryList().empty()
+ ? nullptr
+ : dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get());
+ assert(FSToVIMap.count(Func));
+ return Callee == FSToVIMap[Func] ||
+ // If callee is an alias, check the aliasee, since only function
+ // summary base objects will contain the stack node summaries and thus
+ // get a context node.
+ (Alias && Alias->getAliaseeVI() == FSToVIMap[Func]);
+}
+
+static std::string getAllocTypeString(uint8_t AllocTypes) {
+ if (!AllocTypes)
+ return "None";
+ std::string Str;
+ if (AllocTypes & (uint8_t)AllocationType::NotCold)
+ Str += "NotCold";
+ if (AllocTypes & (uint8_t)AllocationType::Cold)
+ Str += "Cold";
+ return Str;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::dump()
+ const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode::print(
+ raw_ostream &OS) const {
+ OS << "Node " << this << "\n";
+ OS << "\t";
+ printCall(OS);
+ if (Recursive)
+ OS << " (recursive)";
+ OS << "\n";
+ OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n";
+ OS << "\tContextIds:";
+ std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
+ std::sort(SortedIds.begin(), SortedIds.end());
+ for (auto Id : SortedIds)
+ OS << " " << Id;
+ OS << "\n";
+ OS << "\tCalleeEdges:\n";
+ for (auto &Edge : CalleeEdges)
+ OS << "\t\t" << *Edge << "\n";
+ OS << "\tCallerEdges:\n";
+ for (auto &Edge : CallerEdges)
+ OS << "\t\t" << *Edge << "\n";
+ if (!Clones.empty()) {
+ OS << "\tClones: ";
+ FieldSeparator FS;
+ for (auto *Clone : Clones)
+ OS << FS << Clone;
+ OS << "\n";
+ } else if (CloneOf) {
+ OS << "\tClone of " << CloneOf << "\n";
+ }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::dump()
+ const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextEdge::print(
+ raw_ostream &OS) const {
+ OS << "Edge from Callee " << Callee << " to Caller: " << Caller
+ << " AllocTypes: " << getAllocTypeString(AllocTypes);
+ OS << " ContextIds:";
+ std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
+ std::sort(SortedIds.begin(), SortedIds.end());
+ for (auto Id : SortedIds)
+ OS << " " << Id;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::dump() const {
+ print(dbgs());
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::print(
+ raw_ostream &OS) const {
+ OS << "Callsite Context Graph:\n";
+ using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
+ for (const auto Node : nodes<GraphType>(this)) {
+ if (Node->isRemoved())
+ continue;
+ Node->print(OS);
+ OS << "\n";
+ }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+static void checkEdge(
+ const std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>> &Edge) {
+ // Confirm that alloc type is not None and that we have at least one context
+ // id.
+ assert(Edge->AllocTypes != (uint8_t)AllocationType::None);
+ assert(!Edge->ContextIds.empty());
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+static void checkNode(const ContextNode<DerivedCCG, FuncTy, CallTy> *Node,
+ bool CheckEdges = true) {
+ if (Node->isRemoved())
+ return;
+ // Node's context ids should be the union of both its callee and caller edge
+ // context ids.
+ if (Node->CallerEdges.size()) {
+ auto EI = Node->CallerEdges.begin();
+ auto &FirstEdge = *EI;
+ EI++;
+ DenseSet<uint32_t> CallerEdgeContextIds(FirstEdge->ContextIds);
+ for (; EI != Node->CallerEdges.end(); EI++) {
+ const auto &Edge = *EI;
+ if (CheckEdges)
+ checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
+ set_union(CallerEdgeContextIds, Edge->ContextIds);
+ }
+ // Node can have more context ids than callers if some contexts terminate at
+ // node and some are longer.
+ assert(Node->ContextIds == CallerEdgeContextIds ||
+ set_is_subset(CallerEdgeContextIds, Node->ContextIds));
+ }
+ if (Node->CalleeEdges.size()) {
+ auto EI = Node->CalleeEdges.begin();
+ auto &FirstEdge = *EI;
+ EI++;
+ DenseSet<uint32_t> CalleeEdgeContextIds(FirstEdge->ContextIds);
+ for (; EI != Node->CalleeEdges.end(); EI++) {
+ const auto &Edge = *EI;
+ if (CheckEdges)
+ checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
+ set_union(CalleeEdgeContextIds, Edge->ContextIds);
+ }
+ assert(Node->ContextIds == CalleeEdgeContextIds);
+ }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::check() const {
+ using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
+ for (const auto Node : nodes<GraphType>(this)) {
+ checkNode<DerivedCCG, FuncTy, CallTy>(Node, /*CheckEdges=*/false);
+ for (auto &Edge : Node->CallerEdges)
+ checkEdge<DerivedCCG, FuncTy, CallTy>(Edge);
+ }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+struct GraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *> {
+ using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
+ using NodeRef = const ContextNode<DerivedCCG, FuncTy, CallTy> *;
+
+ using NodePtrTy = std::unique_ptr<ContextNode<DerivedCCG, FuncTy, CallTy>>;
+ static NodeRef getNode(const NodePtrTy &P) { return P.get(); }
+
+ using nodes_iterator =
+ mapped_iterator<typename std::vector<NodePtrTy>::const_iterator,
+ decltype(&getNode)>;
+
+ static nodes_iterator nodes_begin(GraphType G) {
+ return nodes_iterator(G->NodeOwner.begin(), &getNode);
+ }
+
+ static nodes_iterator nodes_end(GraphType G) {
+ return nodes_iterator(G->NodeOwner.end(), &getNode);
+ }
+
+ static NodeRef getEntryNode(GraphType G) {
+ return G->NodeOwner.begin()->get();
+ }
+
+ using EdgePtrTy = std::shared_ptr<ContextEdge<DerivedCCG, FuncTy, CallTy>>;
+ static const ContextNode<DerivedCCG, FuncTy, CallTy> *
+ GetCallee(const EdgePtrTy &P) {
+ return P->Callee;
+ }
+
+ using ChildIteratorType =
+ mapped_iterator<typename std::vector<std::shared_ptr<ContextEdge<
+ DerivedCCG, FuncTy, CallTy>>>::const_iterator,
+ decltype(&GetCallee)>;
+
+ static ChildIteratorType child_begin(NodeRef N) {
+ return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee);
+ }
+
+ static ChildIteratorType child_end(NodeRef N) {
+ return ChildIteratorType(N->CalleeEdges.end(), &GetCallee);
+ }
+};
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+struct DOTGraphTraits<const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *>
+ : public DefaultDOTGraphTraits {
+ DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
+
+ using GraphType = const CallsiteContextGraph<DerivedCCG, FuncTy, CallTy> *;
+ using GTraits = GraphTraits<GraphType>;
+ using NodeRef = typename GTraits::NodeRef;
+ using ChildIteratorType = typename GTraits::ChildIteratorType;
+
+ static std::string getNodeLabel(NodeRef Node, GraphType G) {
+ std::string LabelString =
+ (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") +
+ Twine(Node->OrigStackOrAllocId))
+ .str();
+ LabelString += "\n";
+ if (Node->hasCall()) {
+ auto Func = G->NodeToCallingFunc.find(Node);
+ assert(Func != G->NodeToCallingFunc.end());
+ LabelString +=
+ G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo());
+ } else {
+ LabelString += "null call";
+ if (Node->Recursive)
+ LabelString += " (recursive)";
+ else
+ LabelString += " (external)";
+ }
+ return LabelString;
+ }
+
+ static std::string getNodeAttributes(NodeRef Node, GraphType) {
+ std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " +
+ getContextIds(Node->ContextIds) + "\"")
+ .str();
+ AttributeString +=
+ (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str();
+ AttributeString += ",style=\"filled\"";
+ if (Node->CloneOf) {
+ AttributeString += ",color=\"blue\"";
+ AttributeString += ",style=\"filled,bold,dashed\"";
+ } else
+ AttributeString += ",style=\"filled\"";
+ return AttributeString;
+ }
+
+ static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter,
+ GraphType) {
+ auto &Edge = *(ChildIter.getCurrent());
+ return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" +
+ Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"")
+ .str();
+ }
+
+ // Since the NodeOwners list includes nodes that are no longer connected to
+ // the graph, skip them here.
+ static bool isNodeHidden(NodeRef Node, GraphType) {
+ return Node->isRemoved();
+ }
+
+private:
+ static std::string getContextIds(const DenseSet<uint32_t> &ContextIds) {
+ std::string IdString = "ContextIds:";
+ if (ContextIds.size() < 100) {
+ std::vector<uint32_t> SortedIds(ContextIds.begin(), ContextIds.end());
+ std::sort(SortedIds.begin(), SortedIds.end());
+ for (auto Id : SortedIds)
+ IdString += (" " + Twine(Id)).str();
+ } else {
+ IdString += (" (" + Twine(ContextIds.size()) + " ids)").str();
+ }
+ return IdString;
+ }
+
+ static std::string getColor(uint8_t AllocTypes) {
+ if (AllocTypes == (uint8_t)AllocationType::NotCold)
+ // Color "brown1" actually looks like a lighter red.
+ return "brown1";
+ if (AllocTypes == (uint8_t)AllocationType::Cold)
+ return "cyan";
+ if (AllocTypes ==
+ ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold))
+ // Lighter purple.
+ return "mediumorchid1";
+ return "gray";
+ }
+
+ static std::string getNodeId(NodeRef Node) {
+ std::stringstream SStream;
+ SStream << std::hex << "N0x" << (unsigned long long)Node;
+ std::string Result = SStream.str();
+ return Result;
+ }
+};
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::exportToDot(
+ std::string Label) const {
+ WriteGraph(this, "", false, Label,
+ DotFilePathPrefix + "ccg." + Label + ".dot");
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+typename CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode *
+CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::moveEdgeToNewCalleeClone(
+ const std::shared_ptr<ContextEdge> &Edge, EdgeIter *CallerEdgeI) {
+ ContextNode *Node = Edge->Callee;
+ NodeOwner.push_back(
+ std::make_unique<ContextNode>(Node->IsAllocation, Node->Call));
+ ContextNode *Clone = NodeOwner.back().get();
+ Node->addClone(Clone);
+ assert(NodeToCallingFunc.count(Node));
+ NodeToCallingFunc[Clone] = NodeToCallingFunc[Node];
+ moveEdgeToExistingCalleeClone(Edge, Clone, CallerEdgeI, /*NewClone=*/true);
+ return Clone;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::
+ moveEdgeToExistingCalleeClone(const std::shared_ptr<ContextEdge> &Edge,
+ ContextNode *NewCallee, EdgeIter *CallerEdgeI,
+ bool NewClone) {
+ // NewCallee and Edge's current callee must be clones of the same original
+ // node (Edge's current callee may be the original node too).
+ assert(NewCallee->getOrigNode() == Edge->Callee->getOrigNode());
+ auto &EdgeContextIds = Edge->getContextIds();
+ ContextNode *OldCallee = Edge->Callee;
+ if (CallerEdgeI)
+ *CallerEdgeI = OldCallee->CallerEdges.erase(*CallerEdgeI);
+ else
+ OldCallee->eraseCallerEdge(Edge.get());
+ Edge->Callee = NewCallee;
+ NewCallee->CallerEdges.push_back(Edge);
+ // Don't need to update Edge's context ids since we are simply reconnecting
+ // it.
+ set_subtract(OldCallee->ContextIds, EdgeContextIds);
+ NewCallee->ContextIds.insert(EdgeContextIds.begin(), EdgeContextIds.end());
+ NewCallee->AllocTypes |= Edge->AllocTypes;
+ OldCallee->AllocTypes = computeAllocType(OldCallee->ContextIds);
+ // OldCallee alloc type should be None iff its context id set is now empty.
+ assert((OldCallee->AllocTypes == (uint8_t)AllocationType::None) ==
+ OldCallee->ContextIds.empty());
+ // Now walk the old callee node's callee edges and move Edge's context ids
+ // over to the corresponding edge into the clone (which is created here if
+ // this is a newly created clone).
+ for (auto &OldCalleeEdge : OldCallee->CalleeEdges) {
+ // The context ids moving to the new callee are the subset of this edge's
+ // context ids and the context ids on the caller edge being moved.
+ DenseSet<uint32_t> EdgeContextIdsToMove =
+ set_intersection(OldCalleeEdge->getContextIds(), EdgeContextIds);
+ set_subtract(OldCalleeEdge->getContextIds(), EdgeContextIdsToMove);
+ OldCalleeEdge->AllocTypes =
+ computeAllocType(OldCalleeEdge->getContextIds());
+ if (!NewClone) {
+ // Update context ids / alloc type on corresponding edge to NewCallee.
+ // There is a chance this may not exist if we are reusing an existing
+ // clone, specifically during function assignment, where we would have
+ // removed none type edges after creating the clone. If we can't find
+ // a corresponding edge there, fall through to the cloning below.
+ if (auto *NewCalleeEdge =
+ NewCallee->findEdgeFromCallee(OldCalleeEdge->Callee)) {
+ NewCalleeEdge->getContextIds().insert(EdgeContextIdsToMove.begin(),
+ EdgeContextIdsToMove.end());
+ NewCalleeEdge->AllocTypes |= computeAllocType(EdgeContextIdsToMove);
+ continue;
+ }
+ }
+ auto NewEdge = std::make_shared<ContextEdge>(
+ OldCalleeEdge->Callee, NewCallee,
+ computeAllocType(EdgeContextIdsToMove), EdgeContextIdsToMove);
+ NewCallee->CalleeEdges.push_back(NewEdge);
+ NewEdge->Callee->CallerEdges.push_back(NewEdge);
+ }
+ if (VerifyCCG) {
+ checkNode<DerivedCCG, FuncTy, CallTy>(OldCallee, /*CheckEdges=*/false);
+ checkNode<DerivedCCG, FuncTy, CallTy>(NewCallee, /*CheckEdges=*/false);
+ for (const auto &OldCalleeEdge : OldCallee->CalleeEdges)
+ checkNode<DerivedCCG, FuncTy, CallTy>(OldCalleeEdge->Callee,
+ /*CheckEdges=*/false);
+ for (const auto &NewCalleeEdge : NewCallee->CalleeEdges)
+ checkNode<DerivedCCG, FuncTy, CallTy>(NewCalleeEdge->Callee,
+ /*CheckEdges=*/false);
+ }
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones() {
+ DenseSet<const ContextNode *> Visited;
+ for (auto &Entry : AllocationCallToContextNodeMap)
+ identifyClones(Entry.second, Visited);
+}
+
+// helper function to check an AllocType is cold or notcold or both.
+bool checkColdOrNotCold(uint8_t AllocType) {
+ return (AllocType == (uint8_t)AllocationType::Cold) ||
+ (AllocType == (uint8_t)AllocationType::NotCold) ||
+ (AllocType ==
+ ((uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold));
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+void CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::identifyClones(
+ ContextNode *Node, DenseSet<const ContextNode *> &Visited) {
+ if (VerifyNodes)
+ checkNode<DerivedCCG, FuncTy, CallTy>(Node);
+ assert(!Node->CloneOf);
+
+ // If Node as a null call, then either it wasn't found in the module (regular
+ // LTO) or summary index (ThinLTO), or there were other conditions blocking
+ // cloning (e.g. recursion, calls multiple targets, etc).
+ // Do this here so that we don't try to recursively clone callers below, which
+ // isn't useful at least for this node.
+ if (!Node->hasCall())
+ return;
+
+#ifndef NDEBUG
+ auto Insert =
+#endif
+ Visited.insert(Node);
+ // We should not have visited this node yet.
+ assert(Insert.second);
+ // The recursive call to identifyClones may delete the current edge from the
+ // CallerEdges vector. Make a copy and iterate on that, simpler than passing
+ // in an iterator and having recursive call erase from it. Other edges may
+ // also get removed during the recursion, which will have null Callee and
+ // Caller pointers (and are deleted later), so we skip those below.
+ {
+ auto CallerEdges = Node->CallerEdges;
+ for (auto &Edge : CallerEdges) {
+ // Skip any that have been removed by an earlier recursive call.
+ if (Edge->Callee == nullptr && Edge->Caller == nullptr) {
+ assert(!std::count(Node->CallerEdges.begin(), Node->CallerEdges.end(),
+ Edge));
+ continue;
+ }
+ // Ignore any caller we previously visited via another edge.
+ if (!Visited.count(Edge->Caller) && !Edge->Caller->CloneOf) {
+ identifyClones(Edge->Caller, Visited);
+ }
+ }
+ }
+
+ // Check if we reached an unambiguous call or have have only a single caller.
+ if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
+ return;
+
+ // We need to clone.
+
+ // Try to keep the original version as alloc type NotCold. This will make
+ // cases with indirect calls or any other situation with an unknown call to
+ // the original function get the default behavior. We do this by sorting the
+ // CallerEdges of the Node we will clone by alloc type.
+ //
+ // Give NotCold edge the lowest sort priority so those edges are at the end of
+ // the caller edges vector, and stay on the original version (since the below
+ // code clones greedily until it finds all remaining edges have the same type
+ // and leaves the remaining ones on the original Node).
+ //
+ // We shouldn't actually have any None type edges, so the sorting priority for
+ // that is arbitrary, and we assert in that case below.
+ const unsigned AllocTypeCloningPriority[] = {/*None*/ 3, /*NotCold*/ 4,
+ /*Cold*/ 1,
+ /*NotColdCold*/ 2};
+ std::stable_sort(Node->CallerEdges.begin(), Node->CallerEdges.end(),
+ [&](const std::shared_ptr<ContextEdge> &A,
+ const std::shared_ptr<ContextEdge> &B) {
+ assert(checkColdOrNotCold(A->AllocTypes) &&
+ checkColdOrNotCold(B->AllocTypes));
+
+ if (A->AllocTypes == B->AllocTypes)
+ // Use the first context id for each edge as a
+ // tie-breaker.
+ return *A->ContextIds.begin() < *B->ContextIds.begin();
+ return AllocTypeCloningPriority[A->AllocTypes] <
+ AllocTypeCloningPriority[B->AllocTypes];
+ });
+
+ assert(Node->AllocTypes != (uint8_t)AllocationType::None);
+
+ // Iterate until we find no more opportunities for disambiguating the alloc
+ // types via cloning. In most cases this loop will terminate once the Node
+ // has a single allocation type, in which case no more cloning is needed.
+ // We need to be able to remove Edge from CallerEdges, so need to adjust
+ // iterator inside the loop.
+ for (auto EI = Node->CallerEdges.begin(); EI != Node->CallerEdges.end();) {
+ auto CallerEdge = *EI;
+
+ // See if cloning the prior caller edge left this node with a single alloc
+ // type or a single caller. In that case no more cloning of Node is needed.
+ if (hasSingleAllocType(Node->AllocTypes) || Node->CallerEdges.size() <= 1)
+ break;
+
+ // Compute the node callee edge alloc types corresponding to the context ids
+ // for this caller edge.
+ std::vector<uint8_t> CalleeEdgeAllocTypesForCallerEdge;
+ CalleeEdgeAllocTypesForCallerEdge.reserve(Node->CalleeEdges.size());
+ for (auto &CalleeEdge : Node->CalleeEdges)
+ CalleeEdgeAllocTypesForCallerEdge.push_back(intersectAllocTypes(
+ CalleeEdge->getContextIds(), CallerEdge->getContextIds()));
+
+ // Don't clone if doing so will not disambiguate any alloc types amongst
+ // caller edges (including the callee edges that would be cloned).
+ // Otherwise we will simply move all edges to the clone.
+ //
+ // First check if by cloning we will disambiguate the caller allocation
+ // type from node's allocation type. Query allocTypeToUse so that we don't
+ // bother cloning to distinguish NotCold+Cold from NotCold. Note that
+ // neither of these should be None type.
+ //
+ // Then check if by cloning node at least one of the callee edges will be
+ // disambiguated by splitting out different context ids.
+ assert(CallerEdge->AllocTypes != (uint8_t)AllocationType::None);
+ assert(Node->AllocTypes != (uint8_t)AllocationType::None);
+ if (allocTypeToUse(CallerEdge->AllocTypes) ==
+ allocTypeToUse(Node->AllocTypes) &&
+ allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
+ CalleeEdgeAllocTypesForCallerEdge, Node->CalleeEdges)) {
+ ++EI;
+ continue;
+ }
+
+ // First see if we can use an existing clone. Check each clone and its
+ // callee edges for matching alloc types.
+ ContextNode *Clone = nullptr;
+ for (auto *CurClone : Node->Clones) {
+ if (allocTypeToUse(CurClone->AllocTypes) !=
+ allocTypeToUse(CallerEdge->AllocTypes))
+ continue;
+
+ if (!allocTypesMatch<DerivedCCG, FuncTy, CallTy>(
+ CalleeEdgeAllocTypesForCallerEdge, CurClone->CalleeEdges))
+ continue;
+ Clone = CurClone;
+ break;
+ }
+
+ // The edge iterator is adjusted when we move the CallerEdge to the clone.
+ if (Clone)
+ moveEdgeToExistingCalleeClone(CallerEdge, Clone, &EI);
+ else
+ Clone = moveEdgeToNewCalleeClone(CallerEdge, &EI);
+
+ assert(EI == Node->CallerEdges.end() ||
+ Node->AllocTypes != (uint8_t)AllocationType::None);
+ // Sanity check that no alloc types on clone or its edges are None.
+ assert(Clone->AllocTypes != (uint8_t)AllocationType::None);
+ assert(llvm::none_of(
+ Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
+ return E->AllocTypes == (uint8_t)AllocationType::None;
+ }));
+ }
+
+ // Cloning may have resulted in some cloned callee edges with type None,
+ // because they aren't carrying any contexts. Remove those edges.
+ for (auto *Clone : Node->Clones) {
+ removeNoneTypeCalleeEdges(Clone);
+ if (VerifyNodes)
+ checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
+ }
+ // We should still have some context ids on the original Node.
+ assert(!Node->ContextIds.empty());
+
+ // Remove any callee edges that ended up with alloc type None after creating
+ // clones and updating callee edges.
+ removeNoneTypeCalleeEdges(Node);
+
+ // Sanity check that no alloc types on node or edges are None.
+ assert(Node->AllocTypes != (uint8_t)AllocationType::None);
+ assert(llvm::none_of(Node->CalleeEdges,
+ [&](const std::shared_ptr<ContextEdge> &E) {
+ return E->AllocTypes == (uint8_t)AllocationType::None;
+ }));
+ assert(llvm::none_of(Node->CallerEdges,
+ [&](const std::shared_ptr<ContextEdge> &E) {
+ return E->AllocTypes == (uint8_t)AllocationType::None;
+ }));
+
+ if (VerifyNodes)
+ checkNode<DerivedCCG, FuncTy, CallTy>(Node);
+}
+
+void ModuleCallsiteContextGraph::updateAllocationCall(
+ CallInfo &Call, AllocationType AllocType) {
+ std::string AllocTypeString = getAllocTypeAttributeString(AllocType);
+ auto A = llvm::Attribute::get(Call.call()->getFunction()->getContext(),
+ "memprof", AllocTypeString);
+ cast<CallBase>(Call.call())->addFnAttr(A);
+ OREGetter(Call.call()->getFunction())
+ .emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", Call.call())
+ << ore::NV("AllocationCall", Call.call()) << " in clone "
+ << ore::NV("Caller", Call.call()->getFunction())
+ << " marked with memprof allocation attribute "
+ << ore::NV("Attribute", AllocTypeString));
+}
+
+void IndexCallsiteContextGraph::updateAllocationCall(CallInfo &Call,
+ AllocationType AllocType) {
+ auto *AI = Call.call().dyn_cast<AllocInfo *>();
+ assert(AI);
+ assert(AI->Versions.size() > Call.cloneNo());
+ AI->Versions[Call.cloneNo()] = (uint8_t)AllocType;
+}
+
+void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
+ FuncInfo CalleeFunc) {
+ if (CalleeFunc.cloneNo() > 0)
+ cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
+ OREGetter(CallerCall.call()->getFunction())
+ .emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call())
+ << ore::NV("Call", CallerCall.call()) << " in clone "
+ << ore::NV("Caller", CallerCall.call()->getFunction())
+ << " assigned to call function clone "
+ << ore::NV("Callee", CalleeFunc.func()));
+}
+
+void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
+ FuncInfo CalleeFunc) {
+ auto *CI = CallerCall.call().dyn_cast<CallsiteInfo *>();
+ assert(CI &&
+ "Caller cannot be an allocation which should not have profiled calls");
+ assert(CI->Clones.size() > CallerCall.cloneNo());
+ CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
+}
+
+CallsiteContextGraph<ModuleCallsiteContextGraph, Function,
+ Instruction *>::FuncInfo
+ModuleCallsiteContextGraph::cloneFunctionForCallsite(
+ FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
+ std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
+ // Use existing LLVM facilities for cloning and obtaining Call in clone
+ ValueToValueMapTy VMap;
+ auto *NewFunc = CloneFunction(Func.func(), VMap);
+ std::string Name = getMemProfFuncName(Func.func()->getName(), CloneNo);
+ assert(!Func.func()->getParent()->getFunction(Name));
+ NewFunc->setName(Name);
+ for (auto &Inst : CallsWithMetadataInFunc) {
+ // This map always has the initial version in it.
+ assert(Inst.cloneNo() == 0);
+ CallMap[Inst] = {cast<Instruction>(VMap[Inst.call()]), CloneNo};
+ }
+ OREGetter(Func.func())
+ .emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", Func.func())
+ << "created clone " << ore::NV("NewFunction", NewFunc));
+ return {NewFunc, CloneNo};
+}
+
+CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
+ IndexCall>::FuncInfo
+IndexCallsiteContextGraph::cloneFunctionForCallsite(
+ FuncInfo &Func, CallInfo &Call, std::map<CallInfo, CallInfo> &CallMap,
+ std::vector<CallInfo> &CallsWithMetadataInFunc, unsigned CloneNo) {
+ // Check how many clones we have of Call (and therefore function).
+ // The next clone number is the current size of versions array.
+ // Confirm this matches the CloneNo provided by the caller, which is based on
+ // the number of function clones we have.
+ assert(CloneNo ==
+ (Call.call().is<AllocInfo *>()
+ ? Call.call().dyn_cast<AllocInfo *>()->Versions.size()
+ : Call.call().dyn_cast<CallsiteInfo *>()->Clones.size()));
+ // Walk all the instructions in this function. Create a new version for
+ // each (by adding an entry to the Versions/Clones summary array), and copy
+ // over the version being called for the function clone being cloned here.
+ // Additionally, add an entry to the CallMap for the new function clone,
+ // mapping the original call (clone 0, what is in CallsWithMetadataInFunc)
+ // to the new call clone.
+ for (auto &Inst : CallsWithMetadataInFunc) {
+ // This map always has the initial version in it.
+ assert(Inst.cloneNo() == 0);
+ if (auto *AI = Inst.call().dyn_cast<AllocInfo *>()) {
+ assert(AI->Versions.size() == CloneNo);
+ // We assign the allocation type later (in updateAllocationCall), just add
+ // an entry for it here.
+ AI->Versions.push_back(0);
+ } else {
+ auto *CI = Inst.call().dyn_cast<CallsiteInfo *>();
+ assert(CI && CI->Clones.size() == CloneNo);
+ // We assign the clone number later (in updateCall), just add an entry for
+ // it here.
+ CI->Clones.push_back(0);
+ }
+ CallMap[Inst] = {Inst.call(), CloneNo};
+ }
+ return {Func.func(), CloneNo};
+}
+
+// This method assigns cloned callsites to functions, cloning the functions as
+// needed. The assignment is greedy and proceeds roughly as follows:
+//
+// For each function Func:
+// For each call with graph Node having clones:
+// Initialize ClonesWorklist to Node and its clones
+// Initialize NodeCloneCount to 0
+// While ClonesWorklist is not empty:
+// Clone = pop front ClonesWorklist
+// NodeCloneCount++
+// If Func has been cloned less than NodeCloneCount times:
+// If NodeCloneCount is 1:
+// Assign Clone to original Func
+// Continue
+// Create a new function clone
+// If other callers not assigned to call a function clone yet:
+// Assign them to call new function clone
+// Continue
+// Assign any other caller calling the cloned version to new clone
+//
+// For each caller of Clone:
+// If caller is assigned to call a specific function clone:
+// If we cannot assign Clone to that function clone:
+// Create new callsite Clone NewClone
+// Add NewClone to ClonesWorklist
+// Continue
+// Assign Clone to existing caller's called function clone
+// Else:
+// If Clone not already assigned to a function clone:
+// Assign to first function clone without assignment
+// Assign caller to selected function clone
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
+ bool Changed = false;
+
+ // Keep track of the assignment of nodes (callsites) to function clones they
+ // call.
+ DenseMap<ContextNode *, FuncInfo> CallsiteToCalleeFuncCloneMap;
+
+ // Update caller node to call function version CalleeFunc, by recording the
+ // assignment in CallsiteToCalleeFuncCloneMap.
+ auto RecordCalleeFuncOfCallsite = [&](ContextNode *Caller,
+ const FuncInfo &CalleeFunc) {
+ assert(Caller->hasCall());
+ CallsiteToCalleeFuncCloneMap[Caller] = CalleeFunc;
+ };
+
+ // Walk all functions for which we saw calls with memprof metadata, and handle
+ // cloning for each of its calls.
+ for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) {
+ FuncInfo OrigFunc(Func);
+ // Map from each clone of OrigFunc to a map of remappings of each call of
+ // interest (from original uncloned call to the corresponding cloned call in
+ // that function clone).
+ std::map<FuncInfo, std::map<CallInfo, CallInfo>> FuncClonesToCallMap;
+ for (auto &Call : CallsWithMetadata) {
+ ContextNode *Node = getNodeForInst(Call);
+ // Skip call if we do not have a node for it (all uses of its stack ids
+ // were either on inlined chains or pruned from the MIBs), or if we did
+ // not create any clones for it.
+ if (!Node || Node->Clones.empty())
+ continue;
+ assert(Node->hasCall() &&
+ "Not having a call should have prevented cloning");
+
+ // Track the assignment of function clones to clones of the current
+ // callsite Node being handled.
+ std::map<FuncInfo, ContextNode *> FuncCloneToCurNodeCloneMap;
+
+ // Assign callsite version CallsiteClone to function version FuncClone,
+ // and also assign (possibly cloned) Call to CallsiteClone.
+ auto AssignCallsiteCloneToFuncClone = [&](const FuncInfo &FuncClone,
+ CallInfo &Call,
+ ContextNode *CallsiteClone,
+ bool IsAlloc) {
+ // Record the clone of callsite node assigned to this function clone.
+ FuncCloneToCurNodeCloneMap[FuncClone] = CallsiteClone;
+
+ assert(FuncClonesToCallMap.count(FuncClone));
+ std::map<CallInfo, CallInfo> &CallMap = FuncClonesToCallMap[FuncClone];
+ CallInfo CallClone(Call);
+ if (CallMap.count(Call))
+ CallClone = CallMap[Call];
+ CallsiteClone->setCall(CallClone);
+ };
+
+ // Keep track of the clones of callsite Node that need to be assigned to
+ // function clones. This list may be expanded in the loop body below if we
+ // find additional cloning is required.
+ std::deque<ContextNode *> ClonesWorklist;
+ // Ignore original Node if we moved all of its contexts to clones.
+ if (!Node->ContextIds.empty())
+ ClonesWorklist.push_back(Node);
+ ClonesWorklist.insert(ClonesWorklist.end(), Node->Clones.begin(),
+ Node->Clones.end());
+
+ // Now walk through all of the clones of this callsite Node that we need,
+ // and determine the assignment to a corresponding clone of the current
+ // function (creating new function clones as needed).
+ unsigned NodeCloneCount = 0;
+ while (!ClonesWorklist.empty()) {
+ ContextNode *Clone = ClonesWorklist.front();
+ ClonesWorklist.pop_front();
+ NodeCloneCount++;
+ if (VerifyNodes)
+ checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
+
+ // Need to create a new function clone if we have more callsite clones
+ // than existing function clones, which would have been assigned to an
+ // earlier clone in the list (we assign callsite clones to function
+ // clones greedily).
+ if (FuncClonesToCallMap.size() < NodeCloneCount) {
+ // If this is the first callsite copy, assign to original function.
+ if (NodeCloneCount == 1) {
+ // Since FuncClonesToCallMap is empty in this case, no clones have
+ // been created for this function yet, and no callers should have
+ // been assigned a function clone for this callee node yet.
+ assert(llvm::none_of(
+ Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
+ return CallsiteToCalleeFuncCloneMap.count(E->Caller);
+ }));
+ // Initialize with empty call map, assign Clone to original function
+ // and its callers, and skip to the next clone.
+ FuncClonesToCallMap[OrigFunc] = {};
+ AssignCallsiteCloneToFuncClone(
+ OrigFunc, Call, Clone,
+ AllocationCallToContextNodeMap.count(Call));
+ for (auto &CE : Clone->CallerEdges) {
+ // Ignore any caller that does not have a recorded callsite Call.
+ if (!CE->Caller->hasCall())
+ continue;
+ RecordCalleeFuncOfCallsite(CE->Caller, OrigFunc);
+ }
+ continue;
+ }
+
+ // First locate which copy of OrigFunc to clone again. If a caller
+ // of this callsite clone was already assigned to call a particular
+ // function clone, we need to redirect all of those callers to the
+ // new function clone, and update their other callees within this
+ // function.
+ FuncInfo PreviousAssignedFuncClone;
+ auto EI = llvm::find_if(
+ Clone->CallerEdges, [&](const std::shared_ptr<ContextEdge> &E) {
+ return CallsiteToCalleeFuncCloneMap.count(E->Caller);
+ });
+ bool CallerAssignedToCloneOfFunc = false;
+ if (EI != Clone->CallerEdges.end()) {
+ const std::shared_ptr<ContextEdge> &Edge = *EI;
+ PreviousAssignedFuncClone =
+ CallsiteToCalleeFuncCloneMap[Edge->Caller];
+ CallerAssignedToCloneOfFunc = true;
+ }
+
+ // Clone function and save it along with the CallInfo map created
+ // during cloning in the FuncClonesToCallMap.
+ std::map<CallInfo, CallInfo> NewCallMap;
+ unsigned CloneNo = FuncClonesToCallMap.size();
+ assert(CloneNo > 0 && "Clone 0 is the original function, which "
+ "should already exist in the map");
+ FuncInfo NewFuncClone = cloneFunctionForCallsite(
+ OrigFunc, Call, NewCallMap, CallsWithMetadata, CloneNo);
+ FuncClonesToCallMap.emplace(NewFuncClone, std::move(NewCallMap));
+ FunctionClonesAnalysis++;
+ Changed = true;
+
+ // If no caller callsites were already assigned to a clone of this
+ // function, we can simply assign this clone to the new func clone
+ // and update all callers to it, then skip to the next clone.
+ if (!CallerAssignedToCloneOfFunc) {
+ AssignCallsiteCloneToFuncClone(
+ NewFuncClone, Call, Clone,
+ AllocationCallToContextNodeMap.count(Call));
+ for (auto &CE : Clone->CallerEdges) {
+ // Ignore any caller that does not have a recorded callsite Call.
+ if (!CE->Caller->hasCall())
+ continue;
+ RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
+ }
+ continue;
+ }
+
+ // We may need to do additional node cloning in this case.
+ // Reset the CallsiteToCalleeFuncCloneMap entry for any callers
+ // that were previously assigned to call PreviousAssignedFuncClone,
+ // to record that they now call NewFuncClone.
+ for (auto CE : Clone->CallerEdges) {
+ // Ignore any caller that does not have a recorded callsite Call.
+ if (!CE->Caller->hasCall())
+ continue;
+
+ if (!CallsiteToCalleeFuncCloneMap.count(CE->Caller) ||
+ // We subsequently fall through to later handling that
+ // will perform any additional cloning required for
+ // callers that were calling other function clones.
+ CallsiteToCalleeFuncCloneMap[CE->Caller] !=
+ PreviousAssignedFuncClone)
+ continue;
+
+ RecordCalleeFuncOfCallsite(CE->Caller, NewFuncClone);
+
+ // If we are cloning a function that was already assigned to some
+ // callers, then essentially we are creating new callsite clones
+ // of the other callsites in that function that are reached by those
+ // callers. Clone the other callees of the current callsite's caller
+ // that were already assigned to PreviousAssignedFuncClone
+ // accordingly. This is important since we subsequently update the
+ // calls from the nodes in the graph and their assignments to callee
+ // functions recorded in CallsiteToCalleeFuncCloneMap.
+ for (auto CalleeEdge : CE->Caller->CalleeEdges) {
+ // Skip any that have been removed on an earlier iteration when
+ // cleaning up newly None type callee edges.
+ if (!CalleeEdge)
+ continue;
+ ContextNode *Callee = CalleeEdge->Callee;
+ // Skip the current callsite, we are looking for other
+ // callsites Caller calls, as well as any that does not have a
+ // recorded callsite Call.
+ if (Callee == Clone || !Callee->hasCall())
+ continue;
+ ContextNode *NewClone = moveEdgeToNewCalleeClone(CalleeEdge);
+ removeNoneTypeCalleeEdges(NewClone);
+ // Moving the edge may have resulted in some none type
+ // callee edges on the original Callee.
+ removeNoneTypeCalleeEdges(Callee);
+ assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
+ // If the Callee node was already assigned to call a specific
+ // function version, make sure its new clone is assigned to call
+ // that same function clone.
+ if (CallsiteToCalleeFuncCloneMap.count(Callee))
+ RecordCalleeFuncOfCallsite(
+ NewClone, CallsiteToCalleeFuncCloneMap[Callee]);
+ // Update NewClone with the new Call clone of this callsite's Call
+ // created for the new function clone created earlier.
+ // Recall that we have already ensured when building the graph
+ // that each caller can only call callsites within the same
+ // function, so we are guaranteed that Callee Call is in the
+ // current OrigFunc.
+ // CallMap is set up as indexed by original Call at clone 0.
+ CallInfo OrigCall(Callee->getOrigNode()->Call);
+ OrigCall.setCloneNo(0);
+ std::map<CallInfo, CallInfo> &CallMap =
+ FuncClonesToCallMap[NewFuncClone];
+ assert(CallMap.count(OrigCall));
+ CallInfo NewCall(CallMap[OrigCall]);
+ assert(NewCall);
+ NewClone->setCall(NewCall);
+ }
+ }
+ // Fall through to handling below to perform the recording of the
+ // function for this callsite clone. This enables handling of cases
+ // where the callers were assigned to different clones of a function.
+ }
+
+ // See if we can use existing function clone. Walk through
+ // all caller edges to see if any have already been assigned to
+ // a clone of this callsite's function. If we can use it, do so. If not,
+ // because that function clone is already assigned to a different clone
+ // of this callsite, then we need to clone again.
+ // Basically, this checking is needed to handle the case where different
+ // caller functions/callsites may need versions of this function
+ // containing different mixes of callsite clones across the different
+ // callsites within the function. If that happens, we need to create
+ // additional function clones to handle the various combinations.
+ //
+ // Keep track of any new clones of this callsite created by the
+ // following loop, as well as any existing clone that we decided to
+ // assign this clone to.
+ std::map<FuncInfo, ContextNode *> FuncCloneToNewCallsiteCloneMap;
+ FuncInfo FuncCloneAssignedToCurCallsiteClone;
+ // We need to be able to remove Edge from CallerEdges, so need to adjust
+ // iterator in the loop.
+ for (auto EI = Clone->CallerEdges.begin();
+ EI != Clone->CallerEdges.end();) {
+ auto Edge = *EI;
+ // Ignore any caller that does not have a recorded callsite Call.
+ if (!Edge->Caller->hasCall()) {
+ EI++;
+ continue;
+ }
+ // If this caller already assigned to call a version of OrigFunc, need
+ // to ensure we can assign this callsite clone to that function clone.
+ if (CallsiteToCalleeFuncCloneMap.count(Edge->Caller)) {
+ FuncInfo FuncCloneCalledByCaller =
+ CallsiteToCalleeFuncCloneMap[Edge->Caller];
+ // First we need to confirm that this function clone is available
+ // for use by this callsite node clone.
+ //
+ // While FuncCloneToCurNodeCloneMap is built only for this Node and
+ // its callsite clones, one of those callsite clones X could have
+ // been assigned to the same function clone called by Edge's caller
+ // - if Edge's caller calls another callsite within Node's original
+ // function, and that callsite has another caller reaching clone X.
+ // We need to clone Node again in this case.
+ if ((FuncCloneToCurNodeCloneMap.count(FuncCloneCalledByCaller) &&
+ FuncCloneToCurNodeCloneMap[FuncCloneCalledByCaller] !=
+ Clone) ||
+ // Detect when we have multiple callers of this callsite that
+ // have already been assigned to specific, and different, clones
+ // of OrigFunc (due to other unrelated callsites in Func they
+ // reach via call contexts). Is this Clone of callsite Node
+ // assigned to a different clone of OrigFunc? If so, clone Node
+ // again.
+ (FuncCloneAssignedToCurCallsiteClone &&
+ FuncCloneAssignedToCurCallsiteClone !=
+ FuncCloneCalledByCaller)) {
+ // We need to use a different newly created callsite clone, in
+ // order to assign it to another new function clone on a
+ // subsequent iteration over the Clones array (adjusted below).
+ // Note we specifically do not reset the
+ // CallsiteToCalleeFuncCloneMap entry for this caller, so that
+ // when this new clone is processed later we know which version of
+ // the function to copy (so that other callsite clones we have
+ // assigned to that function clone are properly cloned over). See
+ // comments in the function cloning handling earlier.
+
+ // Check if we already have cloned this callsite again while
+ // walking through caller edges, for a caller calling the same
+ // function clone. If so, we can move this edge to that new clone
+ // rather than creating yet another new clone.
+ if (FuncCloneToNewCallsiteCloneMap.count(
+ FuncCloneCalledByCaller)) {
+ ContextNode *NewClone =
+ FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller];
+ moveEdgeToExistingCalleeClone(Edge, NewClone, &EI);
+ // Cleanup any none type edges cloned over.
+ removeNoneTypeCalleeEdges(NewClone);
+ } else {
+ // Create a new callsite clone.
+ ContextNode *NewClone = moveEdgeToNewCalleeClone(Edge, &EI);
+ removeNoneTypeCalleeEdges(NewClone);
+ FuncCloneToNewCallsiteCloneMap[FuncCloneCalledByCaller] =
+ NewClone;
+ // Add to list of clones and process later.
+ ClonesWorklist.push_back(NewClone);
+ assert(EI == Clone->CallerEdges.end() ||
+ Clone->AllocTypes != (uint8_t)AllocationType::None);
+ assert(NewClone->AllocTypes != (uint8_t)AllocationType::None);
+ }
+ // Moving the caller edge may have resulted in some none type
+ // callee edges.
+ removeNoneTypeCalleeEdges(Clone);
+ // We will handle the newly created callsite clone in a subsequent
+ // iteration over this Node's Clones. Continue here since we
+ // already adjusted iterator EI while moving the edge.
+ continue;
+ }
+
+ // Otherwise, we can use the function clone already assigned to this
+ // caller.
+ if (!FuncCloneAssignedToCurCallsiteClone) {
+ FuncCloneAssignedToCurCallsiteClone = FuncCloneCalledByCaller;
+ // Assign Clone to FuncCloneCalledByCaller
+ AssignCallsiteCloneToFuncClone(
+ FuncCloneCalledByCaller, Call, Clone,
+ AllocationCallToContextNodeMap.count(Call));
+ } else
+ // Don't need to do anything - callsite is already calling this
+ // function clone.
+ assert(FuncCloneAssignedToCurCallsiteClone ==
+ FuncCloneCalledByCaller);
+
+ } else {
+ // We have not already assigned this caller to a version of
+ // OrigFunc. Do the assignment now.
+
+ // First check if we have already assigned this callsite clone to a
+ // clone of OrigFunc for another caller during this iteration over
+ // its caller edges.
+ if (!FuncCloneAssignedToCurCallsiteClone) {
+ // Find first function in FuncClonesToCallMap without an assigned
+ // clone of this callsite Node. We should always have one
+ // available at this point due to the earlier cloning when the
+ // FuncClonesToCallMap size was smaller than the clone number.
+ for (auto &CF : FuncClonesToCallMap) {
+ if (!FuncCloneToCurNodeCloneMap.count(CF.first)) {
+ FuncCloneAssignedToCurCallsiteClone = CF.first;
+ break;
+ }
+ }
+ assert(FuncCloneAssignedToCurCallsiteClone);
+ // Assign Clone to FuncCloneAssignedToCurCallsiteClone
+ AssignCallsiteCloneToFuncClone(
+ FuncCloneAssignedToCurCallsiteClone, Call, Clone,
+ AllocationCallToContextNodeMap.count(Call));
+ } else
+ assert(FuncCloneToCurNodeCloneMap
+ [FuncCloneAssignedToCurCallsiteClone] == Clone);
+ // Update callers to record function version called.
+ RecordCalleeFuncOfCallsite(Edge->Caller,
+ FuncCloneAssignedToCurCallsiteClone);
+ }
+
+ EI++;
+ }
+ }
+ if (VerifyCCG) {
+ checkNode<DerivedCCG, FuncTy, CallTy>(Node);
+ for (const auto &PE : Node->CalleeEdges)
+ checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
+ for (const auto &CE : Node->CallerEdges)
+ checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
+ for (auto *Clone : Node->Clones) {
+ checkNode<DerivedCCG, FuncTy, CallTy>(Clone);
+ for (const auto &PE : Clone->CalleeEdges)
+ checkNode<DerivedCCG, FuncTy, CallTy>(PE->Callee);
+ for (const auto &CE : Clone->CallerEdges)
+ checkNode<DerivedCCG, FuncTy, CallTy>(CE->Caller);
+ }
+ }
+ }
+ }
+
+ auto UpdateCalls = [&](ContextNode *Node,
+ DenseSet<const ContextNode *> &Visited,
+ auto &&UpdateCalls) {
+ auto Inserted = Visited.insert(Node);
+ if (!Inserted.second)
+ return;
+
+ for (auto *Clone : Node->Clones)
+ UpdateCalls(Clone, Visited, UpdateCalls);
+
+ for (auto &Edge : Node->CallerEdges)
+ UpdateCalls(Edge->Caller, Visited, UpdateCalls);
+
+ // Skip if either no call to update, or if we ended up with no context ids
+ // (we moved all edges onto other clones).
+ if (!Node->hasCall() || Node->ContextIds.empty())
+ return;
+
+ if (Node->IsAllocation) {
+ updateAllocationCall(Node->Call, allocTypeToUse(Node->AllocTypes));
+ return;
+ }
+
+ if (!CallsiteToCalleeFuncCloneMap.count(Node))
+ return;
+
+ auto CalleeFunc = CallsiteToCalleeFuncCloneMap[Node];
+ updateCall(Node->Call, CalleeFunc);
+ };
+
+ // Performs DFS traversal starting from allocation nodes to update calls to
+ // reflect cloning decisions recorded earlier. For regular LTO this will
+ // update the actual calls in the IR to call the appropriate function clone
+ // (and add attributes to allocation calls), whereas for ThinLTO the decisions
+ // are recorded in the summary entries.
+ DenseSet<const ContextNode *> Visited;
+ for (auto &Entry : AllocationCallToContextNodeMap)
+ UpdateCalls(Entry.second, Visited, UpdateCalls);
+
+ return Changed;
+}
+
+static SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> createFunctionClones(
+ Function &F, unsigned NumClones, Module &M, OptimizationRemarkEmitter &ORE,
+ std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
+ &FuncToAliasMap) {
+ // The first "clone" is the original copy, we should only call this if we
+ // needed to create new clones.
+ assert(NumClones > 1);
+ SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
+ VMaps.reserve(NumClones - 1);
+ FunctionsClonedThinBackend++;
+ for (unsigned I = 1; I < NumClones; I++) {
+ VMaps.emplace_back(std::make_unique<ValueToValueMapTy>());
+ auto *NewF = CloneFunction(&F, *VMaps.back());
+ FunctionClonesThinBackend++;
+ // Strip memprof and callsite metadata from clone as they are no longer
+ // needed.
+ for (auto &BB : *NewF) {
+ for (auto &Inst : BB) {
+ Inst.setMetadata(LLVMContext::MD_memprof, nullptr);
+ Inst.setMetadata(LLVMContext::MD_callsite, nullptr);
+ }
+ }
+ std::string Name = getMemProfFuncName(F.getName(), I);
+ auto *PrevF = M.getFunction(Name);
+ if (PrevF) {
+ // We might have created this when adjusting callsite in another
+ // function. It should be a declaration.
+ assert(PrevF->isDeclaration());
+ NewF->takeName(PrevF);
+ PrevF->replaceAllUsesWith(NewF);
+ PrevF->eraseFromParent();
+ } else
+ NewF->setName(Name);
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofClone", &F)
+ << "created clone " << ore::NV("NewFunction", NewF));
+
+ // Now handle aliases to this function, and clone those as well.
+ if (!FuncToAliasMap.count(&F))
+ continue;
+ for (auto *A : FuncToAliasMap[&F]) {
+ std::string Name = getMemProfFuncName(A->getName(), I);
+ auto *PrevA = M.getNamedAlias(Name);
+ auto *NewA = GlobalAlias::create(A->getValueType(),
+ A->getType()->getPointerAddressSpace(),
+ A->getLinkage(), Name, NewF);
+ NewA->copyAttributesFrom(A);
+ if (PrevA) {
+ // We might have created this when adjusting callsite in another
+ // function. It should be a declaration.
+ assert(PrevA->isDeclaration());
+ NewA->takeName(PrevA);
+ PrevA->replaceAllUsesWith(NewA);
+ PrevA->eraseFromParent();
+ }
+ }
+ }
+ return VMaps;
+}
+
+// Locate the summary for F. This is complicated by the fact that it might
+// have been internalized or promoted.
+static ValueInfo findValueInfoForFunc(const Function &F, const Module &M,
+ const ModuleSummaryIndex *ImportSummary) {
+ // FIXME: Ideally we would retain the original GUID in some fashion on the
+ // function (e.g. as metadata), but for now do our best to locate the
+ // summary without that information.
+ ValueInfo TheFnVI = ImportSummary->getValueInfo(F.getGUID());
+ if (!TheFnVI)
+ // See if theFn was internalized, by checking index directly with
+ // original name (this avoids the name adjustment done by getGUID() for
+ // internal symbols).
+ TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(F.getName()));
+ if (TheFnVI)
+ return TheFnVI;
+ // Now query with the original name before any promotion was performed.
+ StringRef OrigName =
+ ModuleSummaryIndex::getOriginalNameBeforePromote(F.getName());
+ std::string OrigId = GlobalValue::getGlobalIdentifier(
+ OrigName, GlobalValue::InternalLinkage, M.getSourceFileName());
+ TheFnVI = ImportSummary->getValueInfo(GlobalValue::getGUID(OrigId));
+ if (TheFnVI)
+ return TheFnVI;
+ // Could be a promoted local imported from another module. We need to pass
+ // down more info here to find the original module id. For now, try with
+ // the OrigName which might have been stored in the OidGuidMap in the
+ // index. This would not work if there were same-named locals in multiple
+ // modules, however.
+ auto OrigGUID =
+ ImportSummary->getGUIDFromOriginalID(GlobalValue::getGUID(OrigName));
+ if (OrigGUID)
+ TheFnVI = ImportSummary->getValueInfo(OrigGUID);
+ return TheFnVI;
+}
+
+bool MemProfContextDisambiguation::applyImport(Module &M) {
+ assert(ImportSummary);
+ bool Changed = false;
+
+ auto IsMemProfClone = [](const Function &F) {
+ return F.getName().contains(MemProfCloneSuffix);
+ };
+
+ // We also need to clone any aliases that reference cloned functions, because
+ // the modified callsites may invoke via the alias. Keep track of the aliases
+ // for each function.
+ std::map<const Function *, SmallPtrSet<const GlobalAlias *, 1>>
+ FuncToAliasMap;
+ for (auto &A : M.aliases()) {
+ auto *Aliasee = A.getAliaseeObject();
+ if (auto *F = dyn_cast<Function>(Aliasee))
+ FuncToAliasMap[F].insert(&A);
+ }
+
+ for (auto &F : M) {
+ if (F.isDeclaration() || IsMemProfClone(F))
+ continue;
+
+ OptimizationRemarkEmitter ORE(&F);
+
+ SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
+ bool ClonesCreated = false;
+ unsigned NumClonesCreated = 0;
+ auto CloneFuncIfNeeded = [&](unsigned NumClones) {
+ // We should at least have version 0 which is the original copy.
+ assert(NumClones > 0);
+ // If only one copy needed use original.
+ if (NumClones == 1)
+ return;
+ // If we already performed cloning of this function, confirm that the
+ // requested number of clones matches (the thin link should ensure the
+ // number of clones for each constituent callsite is consistent within
+ // each function), before returning.
+ if (ClonesCreated) {
+ assert(NumClonesCreated == NumClones);
+ return;
+ }
+ VMaps = createFunctionClones(F, NumClones, M, ORE, FuncToAliasMap);
+ // The first "clone" is the original copy, which doesn't have a VMap.
+ assert(VMaps.size() == NumClones - 1);
+ Changed = true;
+ ClonesCreated = true;
+ NumClonesCreated = NumClones;
+ };
+
+ // Locate the summary for F.
+ ValueInfo TheFnVI = findValueInfoForFunc(F, M, ImportSummary);
+ // If not found, this could be an imported local (see comment in
+ // findValueInfoForFunc). Skip for now as it will be cloned in its original
+ // module (where it would have been promoted to global scope so should
+ // satisfy any reference in this module).
+ if (!TheFnVI)
+ continue;
+
+ auto *GVSummary =
+ ImportSummary->findSummaryInModule(TheFnVI, M.getModuleIdentifier());
+ if (!GVSummary)
+ // Must have been imported, use the first summary (might be multiple if
+ // this was a linkonce_odr).
+ GVSummary = TheFnVI.getSummaryList().front().get();
+
+ // If this was an imported alias skip it as we won't have the function
+ // summary, and it should be cloned in the original module.
+ if (isa<AliasSummary>(GVSummary))
+ continue;
+
+ auto *FS = cast<FunctionSummary>(GVSummary->getBaseObject());
+
+ if (FS->allocs().empty() && FS->callsites().empty())
+ continue;
+
+ auto SI = FS->callsites().begin();
+ auto AI = FS->allocs().begin();
+
+ // Assume for now that the instructions are in the exact same order
+ // as when the summary was created, but confirm this is correct by
+ // matching the stack ids.
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ // Same handling as when creating module summary.
+ if (!mayHaveMemprofSummary(CB))
+ continue;
+
+ CallStack<MDNode, MDNode::op_iterator> CallsiteContext(
+ I.getMetadata(LLVMContext::MD_callsite));
+ auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);
+
+ // Include allocs that were already assigned a memprof function
+ // attribute in the statistics.
+ if (CB->getAttributes().hasFnAttr("memprof")) {
+ assert(!MemProfMD);
+ CB->getAttributes().getFnAttr("memprof").getValueAsString() == "cold"
+ ? AllocTypeColdThinBackend++
+ : AllocTypeNotColdThinBackend++;
+ OrigAllocsThinBackend++;
+ AllocVersionsThinBackend++;
+ if (!MaxAllocVersionsThinBackend)
+ MaxAllocVersionsThinBackend = 1;
+ // Remove any remaining callsite metadata and we can skip the rest of
+ // the handling for this instruction, since no cloning needed.
+ I.setMetadata(LLVMContext::MD_callsite, nullptr);
+ continue;
+ }
+
+ if (MemProfMD) {
+ // Consult the next alloc node.
+ assert(AI != FS->allocs().end());
+ auto &AllocNode = *(AI++);
+
+ // Sanity check that the MIB stack ids match between the summary and
+ // instruction metadata.
+ auto MIBIter = AllocNode.MIBs.begin();
+ for (auto &MDOp : MemProfMD->operands()) {
+ assert(MIBIter != AllocNode.MIBs.end());
+ LLVM_ATTRIBUTE_UNUSED auto StackIdIndexIter =
+ MIBIter->StackIdIndices.begin();
+ auto *MIBMD = cast<const MDNode>(MDOp);
+ MDNode *StackMDNode = getMIBStackNode(MIBMD);
+ assert(StackMDNode);
+ SmallVector<unsigned> StackIdsFromMetadata;
+ CallStack<MDNode, MDNode::op_iterator> StackContext(StackMDNode);
+ for (auto ContextIter =
+ StackContext.beginAfterSharedPrefix(CallsiteContext);
+ ContextIter != StackContext.end(); ++ContextIter) {
+ // If this is a direct recursion, simply skip the duplicate
+ // entries, to be consistent with how the summary ids were
+ // generated during ModuleSummaryAnalysis.
+ if (!StackIdsFromMetadata.empty() &&
+ StackIdsFromMetadata.back() == *ContextIter)
+ continue;
+ assert(StackIdIndexIter != MIBIter->StackIdIndices.end());
+ assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
+ *ContextIter);
+ StackIdIndexIter++;
+ }
+ MIBIter++;
+ }
+
+ // Perform cloning if not yet done.
+ CloneFuncIfNeeded(/*NumClones=*/AllocNode.Versions.size());
+
+ OrigAllocsThinBackend++;
+ AllocVersionsThinBackend += AllocNode.Versions.size();
+ if (MaxAllocVersionsThinBackend < AllocNode.Versions.size())
+ MaxAllocVersionsThinBackend = AllocNode.Versions.size();
+
+ // If there is only one version that means we didn't end up
+ // considering this function for cloning, and in that case the alloc
+ // will still be none type or should have gotten the default NotCold.
+ // Skip that after calling clone helper since that does some sanity
+ // checks that confirm we haven't decided yet that we need cloning.
+ if (AllocNode.Versions.size() == 1) {
+ assert((AllocationType)AllocNode.Versions[0] ==
+ AllocationType::NotCold ||
+ (AllocationType)AllocNode.Versions[0] ==
+ AllocationType::None);
+ UnclonableAllocsThinBackend++;
+ continue;
+ }
+
+ // All versions should have a singular allocation type.
+ assert(llvm::none_of(AllocNode.Versions, [](uint8_t Type) {
+ return Type == ((uint8_t)AllocationType::NotCold |
+ (uint8_t)AllocationType::Cold);
+ }));
+
+ // Update the allocation types per the summary info.
+ for (unsigned J = 0; J < AllocNode.Versions.size(); J++) {
+ // Ignore any that didn't get an assigned allocation type.
+ if (AllocNode.Versions[J] == (uint8_t)AllocationType::None)
+ continue;
+ AllocationType AllocTy = (AllocationType)AllocNode.Versions[J];
+ AllocTy == AllocationType::Cold ? AllocTypeColdThinBackend++
+ : AllocTypeNotColdThinBackend++;
+ std::string AllocTypeString = getAllocTypeAttributeString(AllocTy);
+ auto A = llvm::Attribute::get(F.getContext(), "memprof",
+ AllocTypeString);
+ CallBase *CBClone;
+ // Copy 0 is the original function.
+ if (!J)
+ CBClone = CB;
+ else
+ // Since VMaps are only created for new clones, we index with
+ // clone J-1 (J==0 is the original clone and does not have a VMaps
+ // entry).
+ CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
+ CBClone->addFnAttr(A);
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofAttribute", CBClone)
+ << ore::NV("AllocationCall", CBClone) << " in clone "
+ << ore::NV("Caller", CBClone->getFunction())
+ << " marked with memprof allocation attribute "
+ << ore::NV("Attribute", AllocTypeString));
+ }
+ } else if (!CallsiteContext.empty()) {
+ // Consult the next callsite node.
+ assert(SI != FS->callsites().end());
+ auto &StackNode = *(SI++);
+
+#ifndef NDEBUG
+ // Sanity check that the stack ids match between the summary and
+ // instruction metadata.
+ auto StackIdIndexIter = StackNode.StackIdIndices.begin();
+ for (auto StackId : CallsiteContext) {
+ assert(StackIdIndexIter != StackNode.StackIdIndices.end());
+ assert(ImportSummary->getStackIdAtIndex(*StackIdIndexIter) ==
+ StackId);
+ StackIdIndexIter++;
+ }
+#endif
+
+ // Perform cloning if not yet done.
+ CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
+
+ // Should have skipped indirect calls via mayHaveMemprofSummary.
+ assert(CB->getCalledFunction());
+ assert(!IsMemProfClone(*CB->getCalledFunction()));
+
+ // Update the calls per the summary info.
+ // Save orig name since it gets updated in the first iteration
+ // below.
+ auto CalleeOrigName = CB->getCalledFunction()->getName();
+ for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
+ // Do nothing if this version calls the original version of its
+ // callee.
+ if (!StackNode.Clones[J])
+ continue;
+ auto NewF = M.getOrInsertFunction(
+ getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]),
+ CB->getCalledFunction()->getFunctionType());
+ CallBase *CBClone;
+ // Copy 0 is the original function.
+ if (!J)
+ CBClone = CB;
+ else
+ CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
+ CBClone->setCalledFunction(NewF);
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
+ << ore::NV("Call", CBClone) << " in clone "
+ << ore::NV("Caller", CBClone->getFunction())
+ << " assigned to call function clone "
+ << ore::NV("Callee", NewF.getCallee()));
+ }
+ }
+ // Memprof and callsite metadata on memory allocations no longer needed.
+ I.setMetadata(LLVMContext::MD_memprof, nullptr);
+ I.setMetadata(LLVMContext::MD_callsite, nullptr);
+ }
+ }
+ }
+
+ return Changed;
+}
+
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::process() {
+ if (DumpCCG) {
+ dbgs() << "CCG before cloning:\n";
+ dbgs() << *this;
+ }
+ if (ExportToDot)
+ exportToDot("postbuild");
+
+ if (VerifyCCG) {
+ check();
+ }
+
+ identifyClones();
+
+ if (VerifyCCG) {
+ check();
+ }
+
+ if (DumpCCG) {
+ dbgs() << "CCG after cloning:\n";
+ dbgs() << *this;
+ }
+ if (ExportToDot)
+ exportToDot("cloned");
+
+ bool Changed = assignFunctions();
+
+ if (DumpCCG) {
+ dbgs() << "CCG after assigning function clones:\n";
+ dbgs() << *this;
+ }
+ if (ExportToDot)
+ exportToDot("clonefuncassign");
+
+ return Changed;
+}
+
+bool MemProfContextDisambiguation::processModule(
+ Module &M,
+ function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter) {
+
+ // If we have an import summary, then the cloning decisions were made during
+ // the thin link on the index. Apply them and return.
+ if (ImportSummary)
+ return applyImport(M);
+
+ // TODO: If/when other types of memprof cloning are enabled beyond just for
+ // hot and cold, we will need to change this to individually control the
+ // AllocationType passed to addStackNodesForMIB during CCG construction.
+ // Note that we specifically check this after applying imports above, so that
+ // the option isn't needed to be passed to distributed ThinLTO backend
+ // clang processes, which won't necessarily have visibility into the linker
+ // dependences. Instead the information is communicated from the LTO link to
+ // the backends via the combined summary index.
+ if (!SupportsHotColdNew)
+ return false;
+
+ ModuleCallsiteContextGraph CCG(M, OREGetter);
+ return CCG.process();
+}
+
+MemProfContextDisambiguation::MemProfContextDisambiguation(
+ const ModuleSummaryIndex *Summary)
+ : ImportSummary(Summary) {
+ if (ImportSummary) {
+ // The MemProfImportSummary should only be used for testing ThinLTO
+ // distributed backend handling via opt, in which case we don't have a
+ // summary from the pass pipeline.
+ assert(MemProfImportSummary.empty());
+ return;
+ }
+ if (MemProfImportSummary.empty())
+ return;
+
+ auto ReadSummaryFile =
+ errorOrToExpected(MemoryBuffer::getFile(MemProfImportSummary));
+ if (!ReadSummaryFile) {
+ logAllUnhandledErrors(ReadSummaryFile.takeError(), errs(),
+ "Error loading file '" + MemProfImportSummary +
+ "': ");
+ return;
+ }
+ auto ImportSummaryForTestingOrErr = getModuleSummaryIndex(**ReadSummaryFile);
+ if (!ImportSummaryForTestingOrErr) {
+ logAllUnhandledErrors(ImportSummaryForTestingOrErr.takeError(), errs(),
+ "Error parsing file '" + MemProfImportSummary +
+ "': ");
+ return;
+ }
+ ImportSummaryForTesting = std::move(*ImportSummaryForTestingOrErr);
+ ImportSummary = ImportSummaryForTesting.get();
+}
+
+PreservedAnalyses MemProfContextDisambiguation::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto OREGetter = [&](Function *F) -> OptimizationRemarkEmitter & {
+ return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
+ };
+ if (!processModule(M, OREGetter))
+ return PreservedAnalyses::all();
+ return PreservedAnalyses::none();
+}
+
+void MemProfContextDisambiguation::run(
+ ModuleSummaryIndex &Index,
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing) {
+ // TODO: If/when other types of memprof cloning are enabled beyond just for
+ // hot and cold, we will need to change this to individually control the
+ // AllocationType passed to addStackNodesForMIB during CCG construction.
+ // The index was set from the option, so these should be in sync.
+ assert(Index.withSupportsHotColdNew() == SupportsHotColdNew);
+ if (!SupportsHotColdNew)
+ return;
+
+ IndexCallsiteContextGraph CCG(Index, isPrevailing);
+ CCG.process();
+}
diff --git a/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index 590f62ca58dd..feda5d6459cb 100644
--- a/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -112,8 +112,6 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -294,34 +292,8 @@ private:
// there is exactly one mapping F -> FN for each FunctionNode FN in FnTree.
DenseMap<AssertingVH<Function>, FnTreeType::iterator> FNodesInTree;
};
-
-class MergeFunctionsLegacyPass : public ModulePass {
-public:
- static char ID;
-
- MergeFunctionsLegacyPass(): ModulePass(ID) {
- initializeMergeFunctionsLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
-
- MergeFunctions MF;
- return MF.runOnModule(M);
- }
-};
-
} // end anonymous namespace
-char MergeFunctionsLegacyPass::ID = 0;
-INITIALIZE_PASS(MergeFunctionsLegacyPass, "mergefunc",
- "Merge Functions", false, false)
-
-ModulePass *llvm::createMergeFunctionsPass() {
- return new MergeFunctionsLegacyPass();
-}
-
PreservedAnalyses MergeFunctionsPass::run(Module &M,
ModuleAnalysisManager &AM) {
MergeFunctions MF;
diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
index ee382657f5e6..5e91ab80d750 100644
--- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp
+++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp
@@ -138,17 +138,12 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
//
// TODO: Here is a huge amount duplicate code between the module inliner and
// the SCC inliner, which need some refactoring.
- auto Calls = getInlineOrder(FAM, Params);
+ auto Calls = getInlineOrder(FAM, Params, MAM, M);
assert(Calls != nullptr && "Expected an initialized InlineOrder");
// Populate the initial list of calls in this module.
for (Function &F : M) {
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- // We want to generally process call sites top-down in order for
- // simplifications stemming from replacing the call with the returned value
- // after inlining to be visible to subsequent inlining decisions.
- // FIXME: Using instructions sequence is a really bad way to do this.
- // Instead we should do an actual RPO walk of the function body.
for (Instruction &I : instructions(F))
if (auto *CB = dyn_cast<CallBase>(&I))
if (Function *Callee = CB->getCalledFunction()) {
@@ -213,7 +208,7 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M,
// Setup the data structure used to plumb customization into the
// `InlineFunction` routine.
InlineFunctionInfo IFI(
- /*cg=*/nullptr, GetAssumptionCache, PSI,
+ GetAssumptionCache, PSI,
&FAM.getResult<BlockFrequencyAnalysis>(*(CB->getCaller())),
&FAM.getResult<BlockFrequencyAnalysis>(Callee));
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index bee154dab10f..588f3901e3cb 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -22,8 +22,10 @@
#include "llvm/ADT/EnumeratedArray.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
@@ -36,6 +38,8 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
@@ -44,7 +48,7 @@
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/LLVMContext.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/IPO/Attributor.h"
@@ -188,9 +192,9 @@ struct AAICVTracker;
struct OMPInformationCache : public InformationCache {
OMPInformationCache(Module &M, AnalysisGetter &AG,
BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
- KernelSet &Kernels)
+ bool OpenMPPostLink)
: InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M),
- Kernels(Kernels) {
+ OpenMPPostLink(OpenMPPostLink) {
OMPBuilder.initialize();
initializeRuntimeFunctions(M);
@@ -417,7 +421,7 @@ struct OMPInformationCache : public InformationCache {
// TODO: We directly convert uses into proper calls and unknown uses.
for (Use &U : RFI.Declaration->uses()) {
if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
- if (ModuleSlice.empty() || ModuleSlice.count(UserI->getFunction())) {
+ if (!CGSCC || CGSCC->empty() || CGSCC->contains(UserI->getFunction())) {
RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
++NumUses;
}
@@ -448,6 +452,24 @@ struct OMPInformationCache : public InformationCache {
CI->setCallingConv(Fn->getCallingConv());
}
+ // Helper function to determine if it's legal to create a call to the runtime
+ // functions.
+ bool runtimeFnsAvailable(ArrayRef<RuntimeFunction> Fns) {
+ // We can always emit calls if we haven't yet linked in the runtime.
+ if (!OpenMPPostLink)
+ return true;
+
+ // Once the runtime has been already been linked in we cannot emit calls to
+ // any undefined functions.
+ for (RuntimeFunction Fn : Fns) {
+ RuntimeFunctionInfo &RFI = RFIs[Fn];
+
+ if (RFI.Declaration && RFI.Declaration->isDeclaration())
+ return false;
+ }
+ return true;
+ }
+
/// Helper to initialize all runtime function information for those defined
/// in OpenMPKinds.def.
void initializeRuntimeFunctions(Module &M) {
@@ -518,11 +540,11 @@ struct OMPInformationCache : public InformationCache {
// TODO: We should attach the attributes defined in OMPKinds.def.
}
- /// Collection of known kernels (\see Kernel) in the module.
- KernelSet &Kernels;
-
/// Collection of known OpenMP runtime functions..
DenseSet<const Function *> RTLFunctions;
+
+ /// Indicates if we have already linked in the OpenMP device library.
+ bool OpenMPPostLink = false;
};
template <typename Ty, bool InsertInvalidates = true>
@@ -808,7 +830,7 @@ struct OpenMPOpt {
return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE);
}
- /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
+ /// Run all OpenMP optimizations on the underlying SCC.
bool run(bool IsModulePass) {
if (SCC.empty())
return false;
@@ -816,8 +838,7 @@ struct OpenMPOpt {
bool Changed = false;
LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
- << " functions in a slice with "
- << OMPInfoCache.ModuleSlice.size() << " functions\n");
+ << " functions\n");
if (IsModulePass) {
Changed |= runAttributor(IsModulePass);
@@ -882,7 +903,7 @@ struct OpenMPOpt {
/// Print OpenMP GPU kernels for testing.
void printKernels() const {
for (Function *F : SCC) {
- if (!OMPInfoCache.Kernels.count(F))
+ if (!omp::isKernel(*F))
continue;
auto Remark = [&](OptimizationRemarkAnalysis ORA) {
@@ -1412,7 +1433,10 @@ private:
Changed |= WasSplit;
return WasSplit;
};
- RFI.foreachUse(SCC, SplitMemTransfers);
+ if (OMPInfoCache.runtimeFnsAvailable(
+ {OMPRTL___tgt_target_data_begin_mapper_issue,
+ OMPRTL___tgt_target_data_begin_mapper_wait}))
+ RFI.foreachUse(SCC, SplitMemTransfers);
return Changed;
}
@@ -1681,37 +1705,27 @@ private:
};
if (!ReplVal) {
- for (Use *U : *UV)
+ auto *DT =
+ OMPInfoCache.getAnalysisResultForFunction<DominatorTreeAnalysis>(F);
+ if (!DT)
+ return false;
+ Instruction *IP = nullptr;
+ for (Use *U : *UV) {
if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
+ if (IP)
+ IP = DT->findNearestCommonDominator(IP, CI);
+ else
+ IP = CI;
if (!CanBeMoved(*CI))
continue;
-
- // If the function is a kernel, dedup will move
- // the runtime call right after the kernel init callsite. Otherwise,
- // it will move it to the beginning of the caller function.
- if (isKernel(F)) {
- auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
- auto *KernelInitUV = KernelInitRFI.getUseVector(F);
-
- if (KernelInitUV->empty())
- continue;
-
- assert(KernelInitUV->size() == 1 &&
- "Expected a single __kmpc_target_init in kernel\n");
-
- CallInst *KernelInitCI =
- getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI);
- assert(KernelInitCI &&
- "Expected a call to __kmpc_target_init in kernel\n");
-
- CI->moveAfter(KernelInitCI);
- } else
- CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
- ReplVal = CI;
- break;
+ if (!ReplVal)
+ ReplVal = CI;
}
+ }
if (!ReplVal)
return false;
+ assert(IP && "Expected insertion point!");
+ cast<Instruction>(ReplVal)->moveBefore(IP);
}
// If we use a call as a replacement value we need to make sure the ident is
@@ -1809,9 +1823,6 @@ private:
///
///{{
- /// Check if \p F is a kernel, hence entry point for target offloading.
- bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }
-
/// Cache to remember the unique kernel for a function.
DenseMap<Function *, std::optional<Kernel>> UniqueKernelMap;
@@ -1920,7 +1931,8 @@ public:
};
Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
- if (!OMPInfoCache.ModuleSlice.empty() && !OMPInfoCache.ModuleSlice.count(&F))
+ if (OMPInfoCache.CGSCC && !OMPInfoCache.CGSCC->empty() &&
+ !OMPInfoCache.CGSCC->contains(&F))
return nullptr;
// Use a scope to keep the lifetime of the CachedKernel short.
@@ -2095,12 +2107,6 @@ struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
- void initialize(Attributor &A) override {
- Function *F = getAnchorScope();
- if (!F || !A.isFunctionIPOAmendable(*F))
- indicatePessimisticFixpoint();
- }
-
/// Returns true if value is assumed to be tracked.
bool isAssumedTracked() const { return getAssumed(); }
@@ -2146,7 +2152,9 @@ struct AAICVTrackerFunction : public AAICVTracker {
: AAICVTracker(IRP, A) {}
// FIXME: come up with better string.
- const std::string getAsStr() const override { return "ICVTrackerFunction"; }
+ const std::string getAsStr(Attributor *) const override {
+ return "ICVTrackerFunction";
+ }
// FIXME: come up with some stats.
void trackStatistics() const override {}
@@ -2242,11 +2250,12 @@ struct AAICVTrackerFunction : public AAICVTracker {
if (CalledFunction->isDeclaration())
return nullptr;
- const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
+ const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>(
*this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED);
- if (ICVTrackingAA.isAssumedTracked()) {
- std::optional<Value *> URV = ICVTrackingAA.getUniqueReplacementValue(ICV);
+ if (ICVTrackingAA->isAssumedTracked()) {
+ std::optional<Value *> URV =
+ ICVTrackingAA->getUniqueReplacementValue(ICV);
if (!URV || (*URV && AA::isValidAtPosition(AA::ValueAndContext(**URV, I),
OMPInfoCache)))
return URV;
@@ -2337,7 +2346,7 @@ struct AAICVTrackerFunctionReturned : AAICVTracker {
: AAICVTracker(IRP, A) {}
// FIXME: come up with better string.
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *) const override {
return "ICVTrackerFunctionReturned";
}
@@ -2362,10 +2371,10 @@ struct AAICVTrackerFunctionReturned : AAICVTracker {
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
+ const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>(
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
- if (!ICVTrackingAA.isAssumedTracked())
+ if (!ICVTrackingAA->isAssumedTracked())
return indicatePessimisticFixpoint();
for (InternalControlVar ICV : TrackableICVs) {
@@ -2374,7 +2383,7 @@ struct AAICVTrackerFunctionReturned : AAICVTracker {
auto CheckReturnInst = [&](Instruction &I) {
std::optional<Value *> NewReplVal =
- ICVTrackingAA.getReplacementValue(ICV, &I, A);
+ ICVTrackingAA->getReplacementValue(ICV, &I, A);
// If we found a second ICV value there is no unique returned value.
if (UniqueICVValue && UniqueICVValue != NewReplVal)
@@ -2407,9 +2416,7 @@ struct AAICVTrackerCallSite : AAICVTracker {
: AAICVTracker(IRP, A) {}
void initialize(Attributor &A) override {
- Function *F = getAnchorScope();
- if (!F || !A.isFunctionIPOAmendable(*F))
- indicatePessimisticFixpoint();
+ assert(getAnchorScope() && "Expected anchor function");
// We only initialize this AA for getters, so we need to know which ICV it
// gets.
@@ -2438,7 +2445,9 @@ struct AAICVTrackerCallSite : AAICVTracker {
}
// FIXME: come up with better string.
- const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
+ const std::string getAsStr(Attributor *) const override {
+ return "ICVTrackerCallSite";
+ }
// FIXME: come up with some stats.
void trackStatistics() const override {}
@@ -2447,15 +2456,15 @@ struct AAICVTrackerCallSite : AAICVTracker {
std::optional<Value *> ReplVal;
ChangeStatus updateImpl(Attributor &A) override {
- const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
+ const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>(
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
// We don't have any information, so we assume it changes the ICV.
- if (!ICVTrackingAA.isAssumedTracked())
+ if (!ICVTrackingAA->isAssumedTracked())
return indicatePessimisticFixpoint();
std::optional<Value *> NewReplVal =
- ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
+ ICVTrackingAA->getReplacementValue(AssociatedICV, getCtxI(), A);
if (ReplVal == NewReplVal)
return ChangeStatus::UNCHANGED;
@@ -2477,7 +2486,7 @@ struct AAICVTrackerCallSiteReturned : AAICVTracker {
: AAICVTracker(IRP, A) {}
// FIXME: come up with better string.
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *) const override {
return "ICVTrackerCallSiteReturned";
}
@@ -2503,18 +2512,18 @@ struct AAICVTrackerCallSiteReturned : AAICVTracker {
ChangeStatus updateImpl(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
+ const auto *ICVTrackingAA = A.getAAFor<AAICVTracker>(
*this, IRPosition::returned(*getAssociatedFunction()),
DepClassTy::REQUIRED);
// We don't have any information, so we assume it changes the ICV.
- if (!ICVTrackingAA.isAssumedTracked())
+ if (!ICVTrackingAA->isAssumedTracked())
return indicatePessimisticFixpoint();
for (InternalControlVar ICV : TrackableICVs) {
std::optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
std::optional<Value *> NewReplVal =
- ICVTrackingAA.getUniqueReplacementValue(ICV);
+ ICVTrackingAA->getUniqueReplacementValue(ICV);
if (ReplVal == NewReplVal)
continue;
@@ -2530,26 +2539,28 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
: AAExecutionDomain(IRP, A) {}
- ~AAExecutionDomainFunction() {
- delete RPOT;
- }
+ ~AAExecutionDomainFunction() { delete RPOT; }
void initialize(Attributor &A) override {
- if (getAnchorScope()->isDeclaration()) {
- indicatePessimisticFixpoint();
- return;
- }
- RPOT = new ReversePostOrderTraversal<Function *>(getAnchorScope());
+ Function *F = getAnchorScope();
+ assert(F && "Expected anchor function");
+ RPOT = new ReversePostOrderTraversal<Function *>(F);
}
- const std::string getAsStr() const override {
- unsigned TotalBlocks = 0, InitialThreadBlocks = 0;
+ const std::string getAsStr(Attributor *) const override {
+ unsigned TotalBlocks = 0, InitialThreadBlocks = 0, AlignedBlocks = 0;
for (auto &It : BEDMap) {
+ if (!It.getFirst())
+ continue;
TotalBlocks++;
InitialThreadBlocks += It.getSecond().IsExecutedByInitialThreadOnly;
+ AlignedBlocks += It.getSecond().IsReachedFromAlignedBarrierOnly &&
+ It.getSecond().IsReachingAlignedBarrierOnly;
}
return "[AAExecutionDomain] " + std::to_string(InitialThreadBlocks) + "/" +
- std::to_string(TotalBlocks) + " executed by initial thread only";
+ std::to_string(AlignedBlocks) + " of " +
+ std::to_string(TotalBlocks) +
+ " executed by initial thread / aligned";
}
/// See AbstractAttribute::trackStatistics().
@@ -2572,7 +2583,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
SmallPtrSet<CallBase *, 16> DeletedBarriers;
auto HandleAlignedBarrier = [&](CallBase *CB) {
- const ExecutionDomainTy &ED = CEDMap[CB];
+ const ExecutionDomainTy &ED = CB ? CEDMap[{CB, PRE}] : BEDMap[nullptr];
if (!ED.IsReachedFromAlignedBarrierOnly ||
ED.EncounteredNonLocalSideEffect)
return;
@@ -2596,6 +2607,8 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
CallBase *LastCB = Worklist.pop_back_val();
if (!Visited.insert(LastCB))
continue;
+ if (LastCB->getFunction() != getAnchorScope())
+ continue;
if (!DeletedBarriers.count(LastCB)) {
A.deleteAfterManifest(*LastCB);
continue;
@@ -2603,7 +2616,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
// The final aligned barrier (LastCB) reaching the kernel end was
// removed already. This means we can go one step further and remove
// the barriers encoutered last before (LastCB).
- const ExecutionDomainTy &LastED = CEDMap[LastCB];
+ const ExecutionDomainTy &LastED = CEDMap[{LastCB, PRE}];
Worklist.append(LastED.AlignedBarriers.begin(),
LastED.AlignedBarriers.end());
}
@@ -2619,14 +2632,17 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
for (auto *CB : AlignedBarriers)
HandleAlignedBarrier(CB);
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
// Handle the "kernel end barrier" for kernels too.
- if (OMPInfoCache.Kernels.count(getAnchorScope()))
+ if (omp::isKernel(*getAnchorScope()))
HandleAlignedBarrier(nullptr);
return Changed;
}
+ bool isNoOpFence(const FenceInst &FI) const override {
+ return getState().isValidState() && !NonNoOpFences.count(&FI);
+ }
+
/// Merge barrier and assumption information from \p PredED into the successor
/// \p ED.
void
@@ -2636,12 +2652,12 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
/// Merge all information from \p PredED into the successor \p ED. If
/// \p InitialEdgeOnly is set, only the initial edge will enter the block
/// represented by \p ED from this predecessor.
- void mergeInPredecessor(Attributor &A, ExecutionDomainTy &ED,
+ bool mergeInPredecessor(Attributor &A, ExecutionDomainTy &ED,
const ExecutionDomainTy &PredED,
bool InitialEdgeOnly = false);
/// Accumulate information for the entry block in \p EntryBBED.
- void handleEntryBB(Attributor &A, ExecutionDomainTy &EntryBBED);
+ bool handleCallees(Attributor &A, ExecutionDomainTy &EntryBBED);
/// See AbstractAttribute::updateImpl.
ChangeStatus updateImpl(Attributor &A) override;
@@ -2651,14 +2667,18 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
if (!isValidState())
return false;
+ assert(BB.getParent() == getAnchorScope() && "Block is out of scope!");
return BEDMap.lookup(&BB).IsExecutedByInitialThreadOnly;
}
bool isExecutedInAlignedRegion(Attributor &A,
const Instruction &I) const override {
- if (!isValidState() || isa<CallBase>(I))
+ assert(I.getFunction() == getAnchorScope() &&
+ "Instruction is out of scope!");
+ if (!isValidState())
return false;
+ bool ForwardIsOk = true;
const Instruction *CurI;
// Check forward until a call or the block end is reached.
@@ -2667,15 +2687,18 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
auto *CB = dyn_cast<CallBase>(CurI);
if (!CB)
continue;
- const auto &It = CEDMap.find(CB);
+ if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB)))
+ return true;
+ const auto &It = CEDMap.find({CB, PRE});
if (It == CEDMap.end())
continue;
- if (!It->getSecond().IsReachedFromAlignedBarrierOnly)
- return false;
+ if (!It->getSecond().IsReachingAlignedBarrierOnly)
+ ForwardIsOk = false;
+ break;
} while ((CurI = CurI->getNextNonDebugInstruction()));
- if (!CurI && !BEDMap.lookup(I.getParent()).IsReachedFromAlignedBarrierOnly)
- return false;
+ if (!CurI && !BEDMap.lookup(I.getParent()).IsReachingAlignedBarrierOnly)
+ ForwardIsOk = false;
// Check backward until a call or the block beginning is reached.
CurI = &I;
@@ -2683,33 +2706,30 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
auto *CB = dyn_cast<CallBase>(CurI);
if (!CB)
continue;
- const auto &It = CEDMap.find(CB);
+ if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB)))
+ return true;
+ const auto &It = CEDMap.find({CB, POST});
if (It == CEDMap.end())
continue;
- if (!AA::isNoSyncInst(A, *CB, *this)) {
- if (It->getSecond().IsReachedFromAlignedBarrierOnly)
- break;
- return false;
- }
-
- Function *Callee = CB->getCalledFunction();
- if (!Callee || Callee->isDeclaration())
- return false;
- const auto &EDAA = A.getAAFor<AAExecutionDomain>(
- *this, IRPosition::function(*Callee), DepClassTy::OPTIONAL);
- if (!EDAA.getState().isValidState())
- return false;
- if (!EDAA.getFunctionExecutionDomain().IsReachedFromAlignedBarrierOnly)
- return false;
- break;
+ if (It->getSecond().IsReachedFromAlignedBarrierOnly)
+ break;
+ return false;
} while ((CurI = CurI->getPrevNonDebugInstruction()));
- if (!CurI &&
- !llvm::all_of(
- predecessors(I.getParent()), [&](const BasicBlock *PredBB) {
- return BEDMap.lookup(PredBB).IsReachedFromAlignedBarrierOnly;
- })) {
+ // Delayed decision on the forward pass to allow aligned barrier detection
+ // in the backwards traversal.
+ if (!ForwardIsOk)
return false;
+
+ if (!CurI) {
+ const BasicBlock *BB = I.getParent();
+ if (BB == &BB->getParent()->getEntryBlock())
+ return BEDMap.lookup(nullptr).IsReachedFromAlignedBarrierOnly;
+ if (!llvm::all_of(predecessors(BB), [&](const BasicBlock *PredBB) {
+ return BEDMap.lookup(PredBB).IsReachedFromAlignedBarrierOnly;
+ })) {
+ return false;
+ }
}
// On neither traversal we found a anything but aligned barriers.
@@ -2721,15 +2741,16 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
"No request should be made against an invalid state!");
return BEDMap.lookup(&BB);
}
- ExecutionDomainTy getExecutionDomain(const CallBase &CB) const override {
+ std::pair<ExecutionDomainTy, ExecutionDomainTy>
+ getExecutionDomain(const CallBase &CB) const override {
assert(isValidState() &&
"No request should be made against an invalid state!");
- return CEDMap.lookup(&CB);
+ return {CEDMap.lookup({&CB, PRE}), CEDMap.lookup({&CB, POST})};
}
ExecutionDomainTy getFunctionExecutionDomain() const override {
assert(isValidState() &&
"No request should be made against an invalid state!");
- return BEDMap.lookup(nullptr);
+ return InterProceduralED;
}
///}
@@ -2778,12 +2799,28 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
return false;
};
+ /// Mapping containing information about the function for other AAs.
+ ExecutionDomainTy InterProceduralED;
+
+ enum Direction { PRE = 0, POST = 1 };
/// Mapping containing information per block.
DenseMap<const BasicBlock *, ExecutionDomainTy> BEDMap;
- DenseMap<const CallBase *, ExecutionDomainTy> CEDMap;
+ DenseMap<PointerIntPair<const CallBase *, 1, Direction>, ExecutionDomainTy>
+ CEDMap;
SmallSetVector<CallBase *, 16> AlignedBarriers;
ReversePostOrderTraversal<Function *> *RPOT = nullptr;
+
+ /// Set \p R to \V and report true if that changed \p R.
+ static bool setAndRecord(bool &R, bool V) {
+ bool Eq = (R == V);
+ R = V;
+ return !Eq;
+ }
+
+ /// Collection of fences known to be non-no-opt. All fences not in this set
+ /// can be assumed no-opt.
+ SmallPtrSet<const FenceInst *, 8> NonNoOpFences;
};
void AAExecutionDomainFunction::mergeInPredecessorBarriersAndAssumptions(
@@ -2795,62 +2832,82 @@ void AAExecutionDomainFunction::mergeInPredecessorBarriersAndAssumptions(
ED.addAlignedBarrier(A, *AB);
}
-void AAExecutionDomainFunction::mergeInPredecessor(
+bool AAExecutionDomainFunction::mergeInPredecessor(
Attributor &A, ExecutionDomainTy &ED, const ExecutionDomainTy &PredED,
bool InitialEdgeOnly) {
- ED.IsExecutedByInitialThreadOnly =
- InitialEdgeOnly || (PredED.IsExecutedByInitialThreadOnly &&
- ED.IsExecutedByInitialThreadOnly);
-
- ED.IsReachedFromAlignedBarrierOnly = ED.IsReachedFromAlignedBarrierOnly &&
- PredED.IsReachedFromAlignedBarrierOnly;
- ED.EncounteredNonLocalSideEffect =
- ED.EncounteredNonLocalSideEffect | PredED.EncounteredNonLocalSideEffect;
+
+ bool Changed = false;
+ Changed |=
+ setAndRecord(ED.IsExecutedByInitialThreadOnly,
+ InitialEdgeOnly || (PredED.IsExecutedByInitialThreadOnly &&
+ ED.IsExecutedByInitialThreadOnly));
+
+ Changed |= setAndRecord(ED.IsReachedFromAlignedBarrierOnly,
+ ED.IsReachedFromAlignedBarrierOnly &&
+ PredED.IsReachedFromAlignedBarrierOnly);
+ Changed |= setAndRecord(ED.EncounteredNonLocalSideEffect,
+ ED.EncounteredNonLocalSideEffect |
+ PredED.EncounteredNonLocalSideEffect);
+ // Do not track assumptions and barriers as part of Changed.
if (ED.IsReachedFromAlignedBarrierOnly)
mergeInPredecessorBarriersAndAssumptions(A, ED, PredED);
else
ED.clearAssumeInstAndAlignedBarriers();
+ return Changed;
}
-void AAExecutionDomainFunction::handleEntryBB(Attributor &A,
+bool AAExecutionDomainFunction::handleCallees(Attributor &A,
ExecutionDomainTy &EntryBBED) {
- SmallVector<ExecutionDomainTy> PredExecDomains;
+ SmallVector<std::pair<ExecutionDomainTy, ExecutionDomainTy>, 4> CallSiteEDs;
auto PredForCallSite = [&](AbstractCallSite ACS) {
- const auto &EDAA = A.getAAFor<AAExecutionDomain>(
+ const auto *EDAA = A.getAAFor<AAExecutionDomain>(
*this, IRPosition::function(*ACS.getInstruction()->getFunction()),
DepClassTy::OPTIONAL);
- if (!EDAA.getState().isValidState())
+ if (!EDAA || !EDAA->getState().isValidState())
return false;
- PredExecDomains.emplace_back(
- EDAA.getExecutionDomain(*cast<CallBase>(ACS.getInstruction())));
+ CallSiteEDs.emplace_back(
+ EDAA->getExecutionDomain(*cast<CallBase>(ACS.getInstruction())));
return true;
};
+ ExecutionDomainTy ExitED;
bool AllCallSitesKnown;
if (A.checkForAllCallSites(PredForCallSite, *this,
/* RequiresAllCallSites */ true,
AllCallSitesKnown)) {
- for (const auto &PredED : PredExecDomains)
- mergeInPredecessor(A, EntryBBED, PredED);
+ for (const auto &[CSInED, CSOutED] : CallSiteEDs) {
+ mergeInPredecessor(A, EntryBBED, CSInED);
+ ExitED.IsReachingAlignedBarrierOnly &=
+ CSOutED.IsReachingAlignedBarrierOnly;
+ }
} else {
// We could not find all predecessors, so this is either a kernel or a
// function with external linkage (or with some other weird uses).
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
- if (OMPInfoCache.Kernels.count(getAnchorScope())) {
+ if (omp::isKernel(*getAnchorScope())) {
EntryBBED.IsExecutedByInitialThreadOnly = false;
EntryBBED.IsReachedFromAlignedBarrierOnly = true;
EntryBBED.EncounteredNonLocalSideEffect = false;
+ ExitED.IsReachingAlignedBarrierOnly = true;
} else {
EntryBBED.IsExecutedByInitialThreadOnly = false;
EntryBBED.IsReachedFromAlignedBarrierOnly = false;
EntryBBED.EncounteredNonLocalSideEffect = true;
+ ExitED.IsReachingAlignedBarrierOnly = false;
}
}
+ bool Changed = false;
auto &FnED = BEDMap[nullptr];
- FnED.IsReachingAlignedBarrierOnly &=
- EntryBBED.IsReachedFromAlignedBarrierOnly;
+ Changed |= setAndRecord(FnED.IsReachedFromAlignedBarrierOnly,
+ FnED.IsReachedFromAlignedBarrierOnly &
+ EntryBBED.IsReachedFromAlignedBarrierOnly);
+ Changed |= setAndRecord(FnED.IsReachingAlignedBarrierOnly,
+ FnED.IsReachingAlignedBarrierOnly &
+ ExitED.IsReachingAlignedBarrierOnly);
+ Changed |= setAndRecord(FnED.IsExecutedByInitialThreadOnly,
+ EntryBBED.IsExecutedByInitialThreadOnly);
+ return Changed;
}
ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
@@ -2860,36 +2917,28 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
// Helper to deal with an aligned barrier encountered during the forward
// traversal. \p CB is the aligned barrier, \p ED is the execution domain when
// it was encountered.
- auto HandleAlignedBarrier = [&](CallBase *CB, ExecutionDomainTy &ED) {
- if (CB)
- Changed |= AlignedBarriers.insert(CB);
+ auto HandleAlignedBarrier = [&](CallBase &CB, ExecutionDomainTy &ED) {
+ Changed |= AlignedBarriers.insert(&CB);
// First, update the barrier ED kept in the separate CEDMap.
- auto &CallED = CEDMap[CB];
- mergeInPredecessor(A, CallED, ED);
+ auto &CallInED = CEDMap[{&CB, PRE}];
+ Changed |= mergeInPredecessor(A, CallInED, ED);
+ CallInED.IsReachingAlignedBarrierOnly = true;
// Next adjust the ED we use for the traversal.
ED.EncounteredNonLocalSideEffect = false;
ED.IsReachedFromAlignedBarrierOnly = true;
// Aligned barrier collection has to come last.
ED.clearAssumeInstAndAlignedBarriers();
- if (CB)
- ED.addAlignedBarrier(A, *CB);
+ ED.addAlignedBarrier(A, CB);
+ auto &CallOutED = CEDMap[{&CB, POST}];
+ Changed |= mergeInPredecessor(A, CallOutED, ED);
};
- auto &LivenessAA =
+ auto *LivenessAA =
A.getAAFor<AAIsDead>(*this, getIRPosition(), DepClassTy::OPTIONAL);
- // Set \p R to \V and report true if that changed \p R.
- auto SetAndRecord = [&](bool &R, bool V) {
- bool Eq = (R == V);
- R = V;
- return !Eq;
- };
-
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
-
Function *F = getAnchorScope();
BasicBlock &EntryBB = F->getEntryBlock();
- bool IsKernel = OMPInfoCache.Kernels.count(F);
+ bool IsKernel = omp::isKernel(*F);
SmallVector<Instruction *> SyncInstWorklist;
for (auto &RIt : *RPOT) {
@@ -2899,18 +2948,19 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
// TODO: We use local reasoning since we don't have a divergence analysis
// running as well. We could basically allow uniform branches here.
bool AlignedBarrierLastInBlock = IsEntryBB && IsKernel;
+ bool IsExplicitlyAligned = IsEntryBB && IsKernel;
ExecutionDomainTy ED;
// Propagate "incoming edges" into information about this block.
if (IsEntryBB) {
- handleEntryBB(A, ED);
+ Changed |= handleCallees(A, ED);
} else {
// For live non-entry blocks we only propagate
// information via live edges.
- if (LivenessAA.isAssumedDead(&BB))
+ if (LivenessAA && LivenessAA->isAssumedDead(&BB))
continue;
for (auto *PredBB : predecessors(&BB)) {
- if (LivenessAA.isEdgeDead(PredBB, &BB))
+ if (LivenessAA && LivenessAA->isEdgeDead(PredBB, &BB))
continue;
bool InitialEdgeOnly = isInitialThreadOnlyEdge(
A, dyn_cast<BranchInst>(PredBB->getTerminator()), BB);
@@ -2922,7 +2972,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
// information to calls.
for (Instruction &I : BB) {
bool UsedAssumedInformation;
- if (A.isAssumedDead(I, *this, &LivenessAA, UsedAssumedInformation,
+ if (A.isAssumedDead(I, *this, LivenessAA, UsedAssumedInformation,
/* CheckBBLivenessOnly */ false, DepClassTy::OPTIONAL,
/* CheckForDeadStore */ true))
continue;
@@ -2939,6 +2989,33 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
continue;
}
+ if (auto *FI = dyn_cast<FenceInst>(&I)) {
+ if (!ED.EncounteredNonLocalSideEffect) {
+ // An aligned fence without non-local side-effects is a no-op.
+ if (ED.IsReachedFromAlignedBarrierOnly)
+ continue;
+ // A non-aligned fence without non-local side-effects is a no-op
+ // if the ordering only publishes non-local side-effects (or less).
+ switch (FI->getOrdering()) {
+ case AtomicOrdering::NotAtomic:
+ continue;
+ case AtomicOrdering::Unordered:
+ continue;
+ case AtomicOrdering::Monotonic:
+ continue;
+ case AtomicOrdering::Acquire:
+ break;
+ case AtomicOrdering::Release:
+ continue;
+ case AtomicOrdering::AcquireRelease:
+ break;
+ case AtomicOrdering::SequentiallyConsistent:
+ break;
+ };
+ }
+ NonNoOpFences.insert(FI);
+ }
+
auto *CB = dyn_cast<CallBase>(&I);
bool IsNoSync = AA::isNoSyncInst(A, I, *this);
bool IsAlignedBarrier =
@@ -2946,14 +3023,16 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
AANoSync::isAlignedBarrier(*CB, AlignedBarrierLastInBlock);
AlignedBarrierLastInBlock &= IsNoSync;
+ IsExplicitlyAligned &= IsNoSync;
// Next we check for calls. Aligned barriers are handled
// explicitly, everything else is kept for the backward traversal and will
// also affect our state.
if (CB) {
if (IsAlignedBarrier) {
- HandleAlignedBarrier(CB, ED);
+ HandleAlignedBarrier(*CB, ED);
AlignedBarrierLastInBlock = true;
+ IsExplicitlyAligned = true;
continue;
}
@@ -2971,20 +3050,20 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
// Record how we entered the call, then accumulate the effect of the
// call in ED for potential use by the callee.
- auto &CallED = CEDMap[CB];
- mergeInPredecessor(A, CallED, ED);
+ auto &CallInED = CEDMap[{CB, PRE}];
+ Changed |= mergeInPredecessor(A, CallInED, ED);
// If we have a sync-definition we can check if it starts/ends in an
// aligned barrier. If we are unsure we assume any sync breaks
// alignment.
Function *Callee = CB->getCalledFunction();
if (!IsNoSync && Callee && !Callee->isDeclaration()) {
- const auto &EDAA = A.getAAFor<AAExecutionDomain>(
+ const auto *EDAA = A.getAAFor<AAExecutionDomain>(
*this, IRPosition::function(*Callee), DepClassTy::OPTIONAL);
- if (EDAA.getState().isValidState()) {
- const auto &CalleeED = EDAA.getFunctionExecutionDomain();
+ if (EDAA && EDAA->getState().isValidState()) {
+ const auto &CalleeED = EDAA->getFunctionExecutionDomain();
ED.IsReachedFromAlignedBarrierOnly =
- CalleeED.IsReachedFromAlignedBarrierOnly;
+ CalleeED.IsReachedFromAlignedBarrierOnly;
AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly;
if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly)
ED.EncounteredNonLocalSideEffect |=
@@ -2992,19 +3071,27 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
else
ED.EncounteredNonLocalSideEffect =
CalleeED.EncounteredNonLocalSideEffect;
- if (!CalleeED.IsReachingAlignedBarrierOnly)
+ if (!CalleeED.IsReachingAlignedBarrierOnly) {
+ Changed |=
+ setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false);
SyncInstWorklist.push_back(&I);
+ }
if (CalleeED.IsReachedFromAlignedBarrierOnly)
mergeInPredecessorBarriersAndAssumptions(A, ED, CalleeED);
+ auto &CallOutED = CEDMap[{CB, POST}];
+ Changed |= mergeInPredecessor(A, CallOutED, ED);
continue;
}
}
- ED.IsReachedFromAlignedBarrierOnly =
- IsNoSync && ED.IsReachedFromAlignedBarrierOnly;
+ if (!IsNoSync) {
+ ED.IsReachedFromAlignedBarrierOnly = false;
+ Changed |= setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false);
+ SyncInstWorklist.push_back(&I);
+ }
AlignedBarrierLastInBlock &= ED.IsReachedFromAlignedBarrierOnly;
ED.EncounteredNonLocalSideEffect |= !CB->doesNotAccessMemory();
- if (!IsNoSync)
- SyncInstWorklist.push_back(&I);
+ auto &CallOutED = CEDMap[{CB, POST}];
+ Changed |= mergeInPredecessor(A, CallOutED, ED);
}
if (!I.mayHaveSideEffects() && !I.mayReadFromMemory())
@@ -3013,7 +3100,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
// If we have a callee we try to use fine-grained information to
// determine local side-effects.
if (CB) {
- const auto &MemAA = A.getAAFor<AAMemoryLocation>(
+ const auto *MemAA = A.getAAFor<AAMemoryLocation>(
*this, IRPosition::callsite_function(*CB), DepClassTy::OPTIONAL);
auto AccessPred = [&](const Instruction *I, const Value *Ptr,
@@ -3021,13 +3108,14 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
AAMemoryLocation::MemoryLocationsKind) {
return !AA::isPotentiallyAffectedByBarrier(A, {Ptr}, *this, I);
};
- if (MemAA.getState().isValidState() &&
- MemAA.checkForAllAccessesToMemoryKind(
+ if (MemAA && MemAA->getState().isValidState() &&
+ MemAA->checkForAllAccessesToMemoryKind(
AccessPred, AAMemoryLocation::ALL_LOCATIONS))
continue;
}
- if (!I.mayHaveSideEffects() && OMPInfoCache.isOnlyUsedByAssume(I))
+ auto &InfoCache = A.getInfoCache();
+ if (!I.mayHaveSideEffects() && InfoCache.isOnlyUsedByAssume(I))
continue;
if (auto *LI = dyn_cast<LoadInst>(&I))
@@ -3039,18 +3127,28 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
ED.EncounteredNonLocalSideEffect = true;
}
+ bool IsEndAndNotReachingAlignedBarriersOnly = false;
if (!isa<UnreachableInst>(BB.getTerminator()) &&
!BB.getTerminator()->getNumSuccessors()) {
- auto &FnED = BEDMap[nullptr];
- mergeInPredecessor(A, FnED, ED);
+ Changed |= mergeInPredecessor(A, InterProceduralED, ED);
- if (IsKernel)
- HandleAlignedBarrier(nullptr, ED);
+ auto &FnED = BEDMap[nullptr];
+ if (IsKernel && !IsExplicitlyAligned)
+ FnED.IsReachingAlignedBarrierOnly = false;
+ Changed |= mergeInPredecessor(A, FnED, ED);
+
+ if (!FnED.IsReachingAlignedBarrierOnly) {
+ IsEndAndNotReachingAlignedBarriersOnly = true;
+ SyncInstWorklist.push_back(BB.getTerminator());
+ auto &BBED = BEDMap[&BB];
+ Changed |= setAndRecord(BBED.IsReachingAlignedBarrierOnly, false);
+ }
}
ExecutionDomainTy &StoredED = BEDMap[&BB];
- ED.IsReachingAlignedBarrierOnly = StoredED.IsReachingAlignedBarrierOnly;
+ ED.IsReachingAlignedBarrierOnly = StoredED.IsReachingAlignedBarrierOnly &
+ !IsEndAndNotReachingAlignedBarriersOnly;
// Check if we computed anything different as part of the forward
// traversal. We do not take assumptions and aligned barriers into account
@@ -3074,36 +3172,38 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
while (!SyncInstWorklist.empty()) {
Instruction *SyncInst = SyncInstWorklist.pop_back_val();
Instruction *CurInst = SyncInst;
- bool HitAlignedBarrier = false;
+ bool HitAlignedBarrierOrKnownEnd = false;
while ((CurInst = CurInst->getPrevNode())) {
auto *CB = dyn_cast<CallBase>(CurInst);
if (!CB)
continue;
- auto &CallED = CEDMap[CB];
- if (SetAndRecord(CallED.IsReachingAlignedBarrierOnly, false))
- Changed = true;
- HitAlignedBarrier = AlignedBarriers.count(CB);
- if (HitAlignedBarrier)
+ auto &CallOutED = CEDMap[{CB, POST}];
+ Changed |= setAndRecord(CallOutED.IsReachingAlignedBarrierOnly, false);
+ auto &CallInED = CEDMap[{CB, PRE}];
+ HitAlignedBarrierOrKnownEnd =
+ AlignedBarriers.count(CB) || !CallInED.IsReachingAlignedBarrierOnly;
+ if (HitAlignedBarrierOrKnownEnd)
break;
+ Changed |= setAndRecord(CallInED.IsReachingAlignedBarrierOnly, false);
}
- if (HitAlignedBarrier)
+ if (HitAlignedBarrierOrKnownEnd)
continue;
BasicBlock *SyncBB = SyncInst->getParent();
for (auto *PredBB : predecessors(SyncBB)) {
- if (LivenessAA.isEdgeDead(PredBB, SyncBB))
+ if (LivenessAA && LivenessAA->isEdgeDead(PredBB, SyncBB))
continue;
if (!Visited.insert(PredBB))
continue;
- SyncInstWorklist.push_back(PredBB->getTerminator());
auto &PredED = BEDMap[PredBB];
- if (SetAndRecord(PredED.IsReachingAlignedBarrierOnly, false))
+ if (setAndRecord(PredED.IsReachingAlignedBarrierOnly, false)) {
Changed = true;
+ SyncInstWorklist.push_back(PredBB->getTerminator());
+ }
}
if (SyncBB != &EntryBB)
continue;
- auto &FnED = BEDMap[nullptr];
- if (SetAndRecord(FnED.IsReachingAlignedBarrierOnly, false))
- Changed = true;
+ Changed |=
+ setAndRecord(InterProceduralED.IsReachingAlignedBarrierOnly, false);
}
return Changed ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
@@ -3146,7 +3246,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A)
: AAHeapToShared(IRP, A) {}
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *) const override {
return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) +
" malloc calls eligible.";
}
@@ -3261,7 +3361,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
auto *SharedMem = new GlobalVariable(
*M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
- UndefValue::get(Int8ArrTy), CB->getName() + "_shared", nullptr,
+ PoisonValue::get(Int8ArrTy), CB->getName() + "_shared", nullptr,
GlobalValue::NotThreadLocal,
static_cast<unsigned>(AddressSpace::Shared));
auto *NewBuffer =
@@ -3270,7 +3370,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
auto Remark = [&](OptimizationRemark OR) {
return OR << "Replaced globalized variable with "
<< ore::NV("SharedMemory", AllocSize->getZExtValue())
- << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ")
+ << (AllocSize->isOne() ? " byte " : " bytes ")
<< "of shared memory.";
};
A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark);
@@ -3278,7 +3378,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
MaybeAlign Alignment = CB->getRetAlign();
assert(Alignment &&
"HeapToShared on allocation without alignment attribute");
- SharedMem->setAlignment(MaybeAlign(Alignment));
+ SharedMem->setAlignment(*Alignment);
A.changeAfterManifest(IRPosition::callsite_returned(*CB), *NewBuffer);
A.deleteAfterManifest(*CB);
@@ -3315,9 +3415,9 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
MallocCalls.remove(CB);
continue;
}
- const auto &ED = A.getAAFor<AAExecutionDomain>(
+ const auto *ED = A.getAAFor<AAExecutionDomain>(
*this, IRPosition::function(*F), DepClassTy::REQUIRED);
- if (!ED.isExecutedByInitialThreadOnly(*CB))
+ if (!ED || !ED->isExecutedByInitialThreadOnly(*CB))
MallocCalls.remove(CB);
}
}
@@ -3346,7 +3446,7 @@ struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
void trackStatistics() const override {}
/// See AbstractAttribute::getAsStr()
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *) const override {
if (!isValidState())
return "<invalid>";
return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD"
@@ -3456,22 +3556,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
Attributor::SimplifictionCallbackTy StateMachineSimplifyCB =
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> std::optional<Value *> {
- // IRP represents the "use generic state machine" argument of an
- // __kmpc_target_init call. We will answer this one with the internal
- // state. As long as we are not in an invalid state, we will create a
- // custom state machine so the value should be a `i1 false`. If we are
- // in an invalid state, we won't change the value that is in the IR.
- if (!ReachedKnownParallelRegions.isValidState())
- return nullptr;
- // If we have disabled state machine rewrites, don't make a custom one.
- if (DisableOpenMPOptStateMachineRewrite)
return nullptr;
- if (AA)
- A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
- UsedAssumedInformation = !isAtFixpoint();
- auto *FalseVal =
- ConstantInt::getBool(IRP.getAnchorValue().getContext(), false);
- return FalseVal;
};
Attributor::SimplifictionCallbackTy ModeSimplifyCB =
@@ -3622,10 +3707,11 @@ struct AAKernelInfoFunction : AAKernelInfo {
Function *Kernel = getAnchorScope();
Module &M = *Kernel->getParent();
Type *Int8Ty = Type::getInt8Ty(M.getContext());
- new GlobalVariable(M, Int8Ty, /* isConstant */ true,
- GlobalValue::WeakAnyLinkage,
- ConstantInt::get(Int8Ty, NestedParallelism ? 1 : 0),
- Kernel->getName() + "_nested_parallelism");
+ auto *GV = new GlobalVariable(
+ M, Int8Ty, /* isConstant */ true, GlobalValue::WeakAnyLinkage,
+ ConstantInt::get(Int8Ty, NestedParallelism ? 1 : 0),
+ Kernel->getName() + "_nested_parallelism");
+ GV->setVisibility(GlobalValue::HiddenVisibility);
// If we can we change the execution mode to SPMD-mode otherwise we build a
// custom state machine.
@@ -3914,6 +4000,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ // We cannot change to SPMD mode if the runtime functions aren't availible.
+ if (!OMPInfoCache.runtimeFnsAvailable(
+ {OMPRTL___kmpc_get_hardware_thread_id_in_block,
+ OMPRTL___kmpc_barrier_simple_spmd}))
+ return false;
+
if (!SPMDCompatibilityTracker.isAssumed()) {
for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
if (!NonCompatibleI)
@@ -3951,7 +4043,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
auto *CB = cast<CallBase>(Kernel->user_back());
Kernel = CB->getCaller();
}
- assert(OMPInfoCache.Kernels.count(Kernel) && "Expected kernel function!");
+ assert(omp::isKernel(*Kernel) && "Expected kernel function!");
// Check if the kernel is already in SPMD mode, if so, return success.
GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
@@ -4021,6 +4113,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (!ReachedKnownParallelRegions.isValidState())
return ChangeStatus::UNCHANGED;
+ auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
+ if (!OMPInfoCache.runtimeFnsAvailable(
+ {OMPRTL___kmpc_get_hardware_num_threads_in_block,
+ OMPRTL___kmpc_get_warp_size, OMPRTL___kmpc_barrier_simple_generic,
+ OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel}))
+ return ChangeStatus::UNCHANGED;
+
const int InitModeArgNo = 1;
const int InitUseStateMachineArgNo = 2;
@@ -4167,7 +4266,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
Module &M = *Kernel->getParent();
- auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
FunctionCallee BlockHwSizeFn =
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_get_hardware_num_threads_in_block);
@@ -4220,10 +4318,7 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (WorkFnAI->getType()->getPointerAddressSpace() !=
(unsigned int)AddressSpace::Generic) {
WorkFnAI = new AddrSpaceCastInst(
- WorkFnAI,
- PointerType::getWithSamePointeeType(
- cast<PointerType>(WorkFnAI->getType()),
- (unsigned int)AddressSpace::Generic),
+ WorkFnAI, PointerType::get(Ctx, (unsigned int)AddressSpace::Generic),
WorkFnAI->getName() + ".generic", StateMachineBeginBB);
WorkFnAI->setDebugLoc(DLoc);
}
@@ -4345,19 +4440,20 @@ struct AAKernelInfoFunction : AAKernelInfo {
if (!I.mayWriteToMemory())
return true;
if (auto *SI = dyn_cast<StoreInst>(&I)) {
- const auto &UnderlyingObjsAA = A.getAAFor<AAUnderlyingObjects>(
+ const auto *UnderlyingObjsAA = A.getAAFor<AAUnderlyingObjects>(
*this, IRPosition::value(*SI->getPointerOperand()),
DepClassTy::OPTIONAL);
- auto &HS = A.getAAFor<AAHeapToStack>(
+ auto *HS = A.getAAFor<AAHeapToStack>(
*this, IRPosition::function(*I.getFunction()),
DepClassTy::OPTIONAL);
- if (UnderlyingObjsAA.forallUnderlyingObjects([&](Value &Obj) {
+ if (UnderlyingObjsAA &&
+ UnderlyingObjsAA->forallUnderlyingObjects([&](Value &Obj) {
if (AA::isAssumedThreadLocalObject(A, Obj, *this))
return true;
// Check for AAHeapToStack moved objects which must not be
// guarded.
auto *CB = dyn_cast<CallBase>(&Obj);
- return CB && HS.isAssumedHeapToStack(*CB);
+ return CB && HS && HS->isAssumedHeapToStack(*CB);
}))
return true;
}
@@ -4392,14 +4488,14 @@ struct AAKernelInfoFunction : AAKernelInfo {
// we cannot fix the internal spmd-zation state either.
int SPMD = 0, Generic = 0;
for (auto *Kernel : ReachingKernelEntries) {
- auto &CBAA = A.getAAFor<AAKernelInfo>(
+ auto *CBAA = A.getAAFor<AAKernelInfo>(
*this, IRPosition::function(*Kernel), DepClassTy::OPTIONAL);
- if (CBAA.SPMDCompatibilityTracker.isValidState() &&
- CBAA.SPMDCompatibilityTracker.isAssumed())
+ if (CBAA && CBAA->SPMDCompatibilityTracker.isValidState() &&
+ CBAA->SPMDCompatibilityTracker.isAssumed())
++SPMD;
else
++Generic;
- if (!CBAA.SPMDCompatibilityTracker.isAtFixpoint())
+ if (!CBAA || !CBAA->SPMDCompatibilityTracker.isAtFixpoint())
UsedAssumedInformationFromReachingKernels = true;
}
if (SPMD != 0 && Generic != 0)
@@ -4413,14 +4509,16 @@ struct AAKernelInfoFunction : AAKernelInfo {
bool AllSPMDStatesWereFixed = true;
auto CheckCallInst = [&](Instruction &I) {
auto &CB = cast<CallBase>(I);
- auto &CBAA = A.getAAFor<AAKernelInfo>(
+ auto *CBAA = A.getAAFor<AAKernelInfo>(
*this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
- getState() ^= CBAA.getState();
- AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint();
+ if (!CBAA)
+ return false;
+ getState() ^= CBAA->getState();
+ AllSPMDStatesWereFixed &= CBAA->SPMDCompatibilityTracker.isAtFixpoint();
AllParallelRegionStatesWereFixed &=
- CBAA.ReachedKnownParallelRegions.isAtFixpoint();
+ CBAA->ReachedKnownParallelRegions.isAtFixpoint();
AllParallelRegionStatesWereFixed &=
- CBAA.ReachedUnknownParallelRegions.isAtFixpoint();
+ CBAA->ReachedUnknownParallelRegions.isAtFixpoint();
return true;
};
@@ -4460,10 +4558,10 @@ private:
assert(Caller && "Caller is nullptr");
- auto &CAA = A.getOrCreateAAFor<AAKernelInfo>(
+ auto *CAA = A.getOrCreateAAFor<AAKernelInfo>(
IRPosition::function(*Caller), this, DepClassTy::REQUIRED);
- if (CAA.ReachingKernelEntries.isValidState()) {
- ReachingKernelEntries ^= CAA.ReachingKernelEntries;
+ if (CAA && CAA->ReachingKernelEntries.isValidState()) {
+ ReachingKernelEntries ^= CAA->ReachingKernelEntries;
return true;
}
@@ -4491,9 +4589,9 @@ private:
assert(Caller && "Caller is nullptr");
- auto &CAA =
+ auto *CAA =
A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller));
- if (CAA.ParallelLevels.isValidState()) {
+ if (CAA && CAA->ParallelLevels.isValidState()) {
// Any function that is called by `__kmpc_parallel_51` will not be
// folded as the parallel level in the function is updated. In order to
// get it right, all the analysis would depend on the implentation. That
@@ -4504,7 +4602,7 @@ private:
return true;
}
- ParallelLevels ^= CAA.ParallelLevels;
+ ParallelLevels ^= CAA->ParallelLevels;
return true;
}
@@ -4538,11 +4636,11 @@ struct AAKernelInfoCallSite : AAKernelInfo {
CallBase &CB = cast<CallBase>(getAssociatedValue());
Function *Callee = getAssociatedFunction();
- auto &AssumptionAA = A.getAAFor<AAAssumptionInfo>(
+ auto *AssumptionAA = A.getAAFor<AAAssumptionInfo>(
*this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
// Check for SPMD-mode assumptions.
- if (AssumptionAA.hasAssumption("ompx_spmd_amenable")) {
+ if (AssumptionAA && AssumptionAA->hasAssumption("ompx_spmd_amenable")) {
SPMDCompatibilityTracker.indicateOptimisticFixpoint();
indicateOptimisticFixpoint();
}
@@ -4567,8 +4665,9 @@ struct AAKernelInfoCallSite : AAKernelInfo {
// Unknown callees might contain parallel regions, except if they have
// an appropriate assumption attached.
- if (!(AssumptionAA.hasAssumption("omp_no_openmp") ||
- AssumptionAA.hasAssumption("omp_no_parallelism")))
+ if (!AssumptionAA ||
+ !(AssumptionAA->hasAssumption("omp_no_openmp") ||
+ AssumptionAA->hasAssumption("omp_no_parallelism")))
ReachedUnknownParallelRegions.insert(&CB);
// If SPMDCompatibilityTracker is not fixed, we need to give up on the
@@ -4643,11 +4742,11 @@ struct AAKernelInfoCallSite : AAKernelInfo {
CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) {
ReachedKnownParallelRegions.insert(ParallelRegion);
/// Check nested parallelism
- auto &FnAA = A.getAAFor<AAKernelInfo>(
+ auto *FnAA = A.getAAFor<AAKernelInfo>(
*this, IRPosition::function(*ParallelRegion), DepClassTy::OPTIONAL);
- NestedParallelism |= !FnAA.getState().isValidState() ||
- !FnAA.ReachedKnownParallelRegions.empty() ||
- !FnAA.ReachedUnknownParallelRegions.empty();
+ NestedParallelism |= !FnAA || !FnAA->getState().isValidState() ||
+ !FnAA->ReachedKnownParallelRegions.empty() ||
+ !FnAA->ReachedUnknownParallelRegions.empty();
break;
}
// The condition above should usually get the parallel region function
@@ -4691,10 +4790,12 @@ struct AAKernelInfoCallSite : AAKernelInfo {
// If F is not a runtime function, propagate the AAKernelInfo of the callee.
if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
const IRPosition &FnPos = IRPosition::function(*F);
- auto &FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED);
- if (getState() == FnAA.getState())
+ auto *FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED);
+ if (!FnAA)
+ return indicatePessimisticFixpoint();
+ if (getState() == FnAA->getState())
return ChangeStatus::UNCHANGED;
- getState() = FnAA.getState();
+ getState() = FnAA->getState();
return ChangeStatus::CHANGED;
}
@@ -4707,9 +4808,9 @@ struct AAKernelInfoCallSite : AAKernelInfo {
CallBase &CB = cast<CallBase>(getAssociatedValue());
- auto &HeapToStackAA = A.getAAFor<AAHeapToStack>(
+ auto *HeapToStackAA = A.getAAFor<AAHeapToStack>(
*this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
- auto &HeapToSharedAA = A.getAAFor<AAHeapToShared>(
+ auto *HeapToSharedAA = A.getAAFor<AAHeapToShared>(
*this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
RuntimeFunction RF = It->getSecond();
@@ -4718,13 +4819,15 @@ struct AAKernelInfoCallSite : AAKernelInfo {
// If neither HeapToStack nor HeapToShared assume the call is removed,
// assume SPMD incompatibility.
case OMPRTL___kmpc_alloc_shared:
- if (!HeapToStackAA.isAssumedHeapToStack(CB) &&
- !HeapToSharedAA.isAssumedHeapToShared(CB))
+ if ((!HeapToStackAA || !HeapToStackAA->isAssumedHeapToStack(CB)) &&
+ (!HeapToSharedAA || !HeapToSharedAA->isAssumedHeapToShared(CB)))
SPMDCompatibilityTracker.insert(&CB);
break;
case OMPRTL___kmpc_free_shared:
- if (!HeapToStackAA.isAssumedHeapToStackRemovedFree(CB) &&
- !HeapToSharedAA.isAssumedHeapToSharedRemovedFree(CB))
+ if ((!HeapToStackAA ||
+ !HeapToStackAA->isAssumedHeapToStackRemovedFree(CB)) &&
+ (!HeapToSharedAA ||
+ !HeapToSharedAA->isAssumedHeapToSharedRemovedFree(CB)))
SPMDCompatibilityTracker.insert(&CB);
break;
default:
@@ -4770,7 +4873,7 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
: AAFoldRuntimeCall(IRP, A) {}
/// See AbstractAttribute::getAsStr()
- const std::string getAsStr() const override {
+ const std::string getAsStr(Attributor *) const override {
if (!isValidState())
return "<invalid>";
@@ -4883,28 +4986,29 @@ private:
unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
- auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
+ auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
- if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
+ if (!CallerKernelInfoAA ||
+ !CallerKernelInfoAA->ReachingKernelEntries.isValidState())
return indicatePessimisticFixpoint();
- for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
- auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
+ for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) {
+ auto *AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
DepClassTy::REQUIRED);
- if (!AA.isValidState()) {
+ if (!AA || !AA->isValidState()) {
SimplifiedValue = nullptr;
return indicatePessimisticFixpoint();
}
- if (AA.SPMDCompatibilityTracker.isAssumed()) {
- if (AA.SPMDCompatibilityTracker.isAtFixpoint())
+ if (AA->SPMDCompatibilityTracker.isAssumed()) {
+ if (AA->SPMDCompatibilityTracker.isAtFixpoint())
++KnownSPMDCount;
else
++AssumedSPMDCount;
} else {
- if (AA.SPMDCompatibilityTracker.isAtFixpoint())
+ if (AA->SPMDCompatibilityTracker.isAtFixpoint())
++KnownNonSPMDCount;
else
++AssumedNonSPMDCount;
@@ -4943,16 +5047,17 @@ private:
ChangeStatus foldParallelLevel(Attributor &A) {
std::optional<Value *> SimplifiedValueBefore = SimplifiedValue;
- auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
+ auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
- if (!CallerKernelInfoAA.ParallelLevels.isValidState())
+ if (!CallerKernelInfoAA ||
+ !CallerKernelInfoAA->ParallelLevels.isValidState())
return indicatePessimisticFixpoint();
- if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
+ if (!CallerKernelInfoAA->ReachingKernelEntries.isValidState())
return indicatePessimisticFixpoint();
- if (CallerKernelInfoAA.ReachingKernelEntries.empty()) {
+ if (CallerKernelInfoAA->ReachingKernelEntries.empty()) {
assert(!SimplifiedValue &&
"SimplifiedValue should keep none at this point");
return ChangeStatus::UNCHANGED;
@@ -4960,19 +5065,19 @@ private:
unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
- for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
- auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
+ for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) {
+ auto *AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
DepClassTy::REQUIRED);
- if (!AA.SPMDCompatibilityTracker.isValidState())
+ if (!AA || !AA->SPMDCompatibilityTracker.isValidState())
return indicatePessimisticFixpoint();
- if (AA.SPMDCompatibilityTracker.isAssumed()) {
- if (AA.SPMDCompatibilityTracker.isAtFixpoint())
+ if (AA->SPMDCompatibilityTracker.isAssumed()) {
+ if (AA->SPMDCompatibilityTracker.isAtFixpoint())
++KnownSPMDCount;
else
++AssumedSPMDCount;
} else {
- if (AA.SPMDCompatibilityTracker.isAtFixpoint())
+ if (AA->SPMDCompatibilityTracker.isAtFixpoint())
++KnownNonSPMDCount;
else
++AssumedNonSPMDCount;
@@ -5005,14 +5110,15 @@ private:
int32_t CurrentAttrValue = -1;
std::optional<Value *> SimplifiedValueBefore = SimplifiedValue;
- auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
+ auto *CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
*this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
- if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
+ if (!CallerKernelInfoAA ||
+ !CallerKernelInfoAA->ReachingKernelEntries.isValidState())
return indicatePessimisticFixpoint();
// Iterate over the kernels that reach this function
- for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
+ for (Kernel K : CallerKernelInfoAA->ReachingKernelEntries) {
int32_t NextAttrVal = K->getFnAttributeAsParsedInteger(Attr, -1);
if (NextAttrVal == -1 ||
@@ -5135,6 +5241,8 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(F));
if (!DisableOpenMPOptDeglobalization)
A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(F));
+ if (F.hasFnAttribute(Attribute::Convergent))
+ A.getOrCreateAAFor<AANonConvergent>(IRPosition::function(F));
for (auto &I : instructions(F)) {
if (auto *LI = dyn_cast<LoadInst>(&I)) {
@@ -5147,6 +5255,10 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) {
A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*SI));
continue;
}
+ if (auto *FI = dyn_cast<FenceInst>(&I)) {
+ A.getOrCreateAAFor<AAIsDead>(IRPosition::value(*FI));
+ continue;
+ }
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
if (II->getIntrinsicID() == Intrinsic::assume) {
A.getOrCreateAAFor<AAPotentialValues>(
@@ -5304,6 +5416,8 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
});
};
+ bool Changed = false;
+
// Create internal copies of each function if this is a kernel Module. This
// allows iterprocedural passes to see every call edge.
DenseMap<Function *, Function *> InternalizedMap;
@@ -5319,7 +5433,8 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
}
}
- Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
+ Changed |=
+ Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
}
// Look at every function in the Module unless it was internalized.
@@ -5332,7 +5447,7 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
}
if (SCC.empty())
- return PreservedAnalyses::all();
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
AnalysisGetter AG(FAM);
@@ -5343,7 +5458,9 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
BumpPtrAllocator Allocator;
CallGraphUpdater CGUpdater;
- OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, Kernels);
+ bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
+ LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
+ OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, PostLink);
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
@@ -5356,11 +5473,14 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
AC.OREGetter = OREGetter;
AC.PassName = DEBUG_TYPE;
AC.InitializationCallback = OpenMPOpt::registerAAsForFunction;
+ AC.IPOAmendableCB = [](const Function &F) {
+ return F.hasFnAttribute("kernel");
+ };
Attributor A(Functions, InfoCache, AC);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
- bool Changed = OMPOpt.run(true);
+ Changed |= OMPOpt.run(true);
// Optionally inline device functions for potentially better performance.
if (AlwaysInlineDeviceFunctions && isOpenMPDevice(M))
@@ -5417,9 +5537,11 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
CallGraphUpdater CGUpdater;
CGUpdater.initialize(CG, C, AM, UR);
+ bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
+ LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink;
SetVector<Function *> Functions(SCC.begin(), SCC.end());
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
- /*CGSCC*/ &Functions, Kernels);
+ /*CGSCC*/ &Functions, PostLink);
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
@@ -5447,6 +5569,8 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
return PreservedAnalyses::all();
}
+bool llvm::omp::isKernel(Function &Fn) { return Fn.hasFnAttribute("kernel"); }
+
KernelSet llvm::omp::getDeviceKernels(Module &M) {
// TODO: Create a more cross-platform way of determining device kernels.
NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations");
@@ -5467,6 +5591,7 @@ KernelSet llvm::omp::getDeviceKernels(Module &M) {
if (!KernelFn)
continue;
+ assert(isKernel(*KernelFn) && "Inconsistent kernel function annotation");
++NumOpenMPTargetRegionKernels;
Kernels.insert(KernelFn);
diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 310e4d4164a5..b88ba2dec24b 100644
--- a/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -41,8 +42,6 @@
#include "llvm/IR/Operator.h"
#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/User.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
@@ -342,52 +341,6 @@ private:
OptimizationRemarkEmitter &ORE) const;
};
-struct PartialInlinerLegacyPass : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
-
- PartialInlinerLegacyPass() : ModulePass(ID) {
- initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
-
- AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
- TargetTransformInfoWrapperPass *TTIWP =
- &getAnalysis<TargetTransformInfoWrapperPass>();
- ProfileSummaryInfo &PSI =
- getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
-
- auto GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & {
- return ACT->getAssumptionCache(F);
- };
-
- auto LookupAssumptionCache = [ACT](Function &F) -> AssumptionCache * {
- return ACT->lookupAssumptionCache(F);
- };
-
- auto GetTTI = [&TTIWP](Function &F) -> TargetTransformInfo & {
- return TTIWP->getTTI(F);
- };
-
- auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
- return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- };
-
- return PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
- GetTLI, PSI)
- .run(M);
- }
-};
-
} // end anonymous namespace
std::unique_ptr<FunctionOutliningMultiRegionInfo>
@@ -1027,7 +980,7 @@ PartialInlinerImpl::FunctionCloner::FunctionCloner(
// Go through all Outline Candidate Regions and update all BasicBlock
// information.
- for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
+ for (const FunctionOutliningMultiRegionInfo::OutlineRegionInfo &RegionInfo :
OI->ORI) {
SmallVector<BasicBlock *, 8> Region;
for (BasicBlock *BB : RegionInfo.Region)
@@ -1226,14 +1179,14 @@ PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
ToExtract.push_back(ClonedOI->NonReturnBlock);
OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
ClonedOI->NonReturnBlock, ClonedFuncTTI);
- for (BasicBlock &BB : *ClonedFunc)
- if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
- ToExtract.push_back(&BB);
+ for (BasicBlock *BB : depth_first(&ClonedFunc->getEntryBlock()))
+ if (!ToBeInlined(BB) && BB != ClonedOI->NonReturnBlock) {
+ ToExtract.push_back(BB);
// FIXME: the code extractor may hoist/sink more code
// into the outlined function which may make the outlining
// overhead (the difference of the outlined function cost
// and OutliningRegionCost) look larger.
- OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
+ OutlinedRegionCost += computeBBInlineCost(BB, ClonedFuncTTI);
}
// Extract the body of the if.
@@ -1429,7 +1382,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
<< ore::NV("Caller", CB->getCaller());
- InlineFunctionInfo IFI(nullptr, GetAssumptionCache, &PSI);
+ InlineFunctionInfo IFI(GetAssumptionCache, &PSI);
// We can only forward varargs when we outlined a single region, else we
// bail on vararg functions.
if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true,
@@ -1497,21 +1450,6 @@ bool PartialInlinerImpl::run(Module &M) {
return Changed;
}
-char PartialInlinerLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
- "Partial Inliner", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
- "Partial Inliner", false, false)
-
-ModulePass *llvm::createPartialInliningPass() {
- return new PartialInlinerLegacyPass();
-}
-
PreservedAnalyses PartialInlinerPass::run(Module &M,
ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
deleted file mode 100644
index 6b91c8494f39..000000000000
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ /dev/null
@@ -1,517 +0,0 @@
-//===- PassManagerBuilder.cpp - Build Standard Pass -----------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the PassManagerBuilder class, which is used to set up a
-// "standard" optimization sequence suitable for languages like C and C++.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
-#include "llvm-c/Transforms/PassManagerBuilder.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/ScopedNoAliasAA.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Target/CGPassBuilderOption.h"
-#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/Attributor.h"
-#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
-#include "llvm/Transforms/IPO/FunctionAttrs.h"
-#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
-#include "llvm/Transforms/InstCombine/InstCombine.h"
-#include "llvm/Transforms/Instrumentation.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/GVN.h"
-#include "llvm/Transforms/Scalar/LICM.h"
-#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
-#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
-#include "llvm/Transforms/Utils.h"
-#include "llvm/Transforms/Vectorize.h"
-
-using namespace llvm;
-
-PassManagerBuilder::PassManagerBuilder() {
- OptLevel = 2;
- SizeLevel = 0;
- LibraryInfo = nullptr;
- Inliner = nullptr;
- DisableUnrollLoops = false;
- SLPVectorize = false;
- LoopVectorize = true;
- LoopsInterleaved = true;
- LicmMssaOptCap = SetLicmMssaOptCap;
- LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
- DisableGVNLoadPRE = false;
- ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
- VerifyInput = false;
- VerifyOutput = false;
- MergeFunctions = false;
- DivergentTarget = false;
- CallGraphProfile = true;
-}
-
-PassManagerBuilder::~PassManagerBuilder() {
- delete LibraryInfo;
- delete Inliner;
-}
-
-void PassManagerBuilder::addInitialAliasAnalysisPasses(
- legacy::PassManagerBase &PM) const {
- // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
- // BasicAliasAnalysis wins if they disagree. This is intended to help
- // support "obvious" type-punning idioms.
- PM.add(createTypeBasedAAWrapperPass());
- PM.add(createScopedNoAliasAAWrapperPass());
-}
-
-void PassManagerBuilder::populateFunctionPassManager(
- legacy::FunctionPassManager &FPM) {
- // Add LibraryInfo if we have some.
- if (LibraryInfo)
- FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
-
- if (OptLevel == 0) return;
-
- addInitialAliasAnalysisPasses(FPM);
-
- // Lower llvm.expect to metadata before attempting transforms.
- // Compare/branch metadata may alter the behavior of passes like SimplifyCFG.
- FPM.add(createLowerExpectIntrinsicPass());
- FPM.add(createCFGSimplificationPass());
- FPM.add(createSROAPass());
- FPM.add(createEarlyCSEPass());
-}
-
-void PassManagerBuilder::addFunctionSimplificationPasses(
- legacy::PassManagerBase &MPM) {
- // Start of function pass.
- // Break up aggregate allocas, using SSAUpdater.
- assert(OptLevel >= 1 && "Calling function optimizer with no optimization level!");
- MPM.add(createSROAPass());
- MPM.add(createEarlyCSEPass(true /* Enable mem-ssa. */)); // Catch trivial redundancies
-
- if (OptLevel > 1) {
- // Speculative execution if the target has divergent branches; otherwise nop.
- MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass());
-
- MPM.add(createJumpThreadingPass()); // Thread jumps.
- MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals
- }
- MPM.add(
- createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
- true))); // Merge & remove BBs
- // Combine silly seq's
- MPM.add(createInstructionCombiningPass());
- if (SizeLevel == 0)
- MPM.add(createLibCallsShrinkWrapPass());
-
- // TODO: Investigate the cost/benefit of tail call elimination on debugging.
- if (OptLevel > 1)
- MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
- MPM.add(
- createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
- true))); // Merge & remove BBs
- MPM.add(createReassociatePass()); // Reassociate expressions
-
- // Begin the loop pass pipeline.
-
- // The simple loop unswitch pass relies on separate cleanup passes. Schedule
- // them first so when we re-process a loop they run before other loop
- // passes.
- MPM.add(createLoopInstSimplifyPass());
- MPM.add(createLoopSimplifyCFGPass());
-
- // Try to remove as much code from the loop header as possible,
- // to reduce amount of IR that will have to be duplicated. However,
- // do not perform speculative hoisting the first time as LICM
- // will destroy metadata that may not need to be destroyed if run
- // after loop rotation.
- // TODO: Investigate promotion cap for O1.
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/false));
- // Rotate Loop - disable header duplication at -Oz
- MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false));
- // TODO: Investigate promotion cap for O1.
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/true));
- MPM.add(createSimpleLoopUnswitchLegacyPass(OptLevel == 3));
- // FIXME: We break the loop pass pipeline here in order to do full
- // simplifycfg. Eventually loop-simplifycfg should be enhanced to replace the
- // need for this.
- MPM.add(createCFGSimplificationPass(
- SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
- MPM.add(createInstructionCombiningPass());
- // We resume loop passes creating a second loop pipeline here.
- MPM.add(createLoopIdiomPass()); // Recognize idioms like memset.
- MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars
- MPM.add(createLoopDeletionPass()); // Delete dead loops
-
- // Unroll small loops and perform peeling.
- MPM.add(createSimpleLoopUnrollPass(OptLevel, DisableUnrollLoops,
- ForgetAllSCEVInLoopUnroll));
- // This ends the loop pass pipelines.
-
- // Break up allocas that may now be splittable after loop unrolling.
- MPM.add(createSROAPass());
-
- if (OptLevel > 1) {
- MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
- MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
- }
- MPM.add(createSCCPPass()); // Constant prop with SCCP
-
- // Delete dead bit computations (instcombine runs after to fold away the dead
- // computations, and then ADCE will run later to exploit any new DCE
- // opportunities that creates).
- MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations
-
- // Run instcombine after redundancy elimination to exploit opportunities
- // opened up by them.
- MPM.add(createInstructionCombiningPass());
- if (OptLevel > 1) {
- MPM.add(createJumpThreadingPass()); // Thread jumps
- MPM.add(createCorrelatedValuePropagationPass());
- }
- MPM.add(createAggressiveDCEPass()); // Delete dead instructions
-
- MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset
- // TODO: Investigate if this is too expensive at O1.
- if (OptLevel > 1) {
- MPM.add(createDeadStoreEliminationPass()); // Delete dead stores
- MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/true));
- }
-
- // Merge & remove BBs and sink & hoist common instructions.
- MPM.add(createCFGSimplificationPass(
- SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true)));
- // Clean up after everything.
- MPM.add(createInstructionCombiningPass());
-}
-
-/// FIXME: Should LTO cause any differences to this set of passes?
-void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM,
- bool IsFullLTO) {
- PM.add(createLoopVectorizePass(!LoopsInterleaved, !LoopVectorize));
-
- if (IsFullLTO) {
- // The vectorizer may have significantly shortened a loop body; unroll
- // again. Unroll small loops to hide loop backedge latency and saturate any
- // parallel execution resources of an out-of-order processor. We also then
- // need to clean up redundancies and loop invariant code.
- // FIXME: It would be really good to use a loop-integrated instruction
- // combiner for cleanup here so that the unrolling and LICM can be pipelined
- // across the loop nests.
- PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
- ForgetAllSCEVInLoopUnroll));
- PM.add(createWarnMissedTransformationsPass());
- }
-
- if (!IsFullLTO) {
- // Eliminate loads by forwarding stores from the previous iteration to loads
- // of the current iteration.
- PM.add(createLoopLoadEliminationPass());
- }
- // Cleanup after the loop optimization passes.
- PM.add(createInstructionCombiningPass());
-
- // Now that we've formed fast to execute loop structures, we do further
- // optimizations. These are run afterward as they might block doing complex
- // analyses and transforms such as what are needed for loop vectorization.
-
- // Cleanup after loop vectorization, etc. Simplification passes like CVP and
- // GVN, loop transforms, and others have already run, so it's now better to
- // convert to more optimized IR using more aggressive simplify CFG options.
- // The extra sinking transform can create larger basic blocks, so do this
- // before SLP vectorization.
- PM.add(createCFGSimplificationPass(SimplifyCFGOptions()
- .forwardSwitchCondToPhi(true)
- .convertSwitchRangeToICmp(true)
- .convertSwitchToLookupTable(true)
- .needCanonicalLoops(false)
- .hoistCommonInsts(true)
- .sinkCommonInsts(true)));
-
- if (IsFullLTO) {
- PM.add(createSCCPPass()); // Propagate exposed constants
- PM.add(createInstructionCombiningPass()); // Clean up again
- PM.add(createBitTrackingDCEPass());
- }
-
- // Optimize parallel scalar instruction chains into SIMD instructions.
- if (SLPVectorize) {
- PM.add(createSLPVectorizerPass());
- }
-
- // Enhance/cleanup vector code.
- PM.add(createVectorCombinePass());
-
- if (!IsFullLTO) {
- PM.add(createInstructionCombiningPass());
-
- // Unroll small loops
- PM.add(createLoopUnrollPass(OptLevel, DisableUnrollLoops,
- ForgetAllSCEVInLoopUnroll));
-
- if (!DisableUnrollLoops) {
- // LoopUnroll may generate some redundency to cleanup.
- PM.add(createInstructionCombiningPass());
-
- // Runtime unrolling will introduce runtime check in loop prologue. If the
- // unrolled loop is a inner loop, then the prologue will be inside the
- // outer loop. LICM pass can help to promote the runtime check out if the
- // checked value is loop invariant.
- PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
- /*AllowSpeculation=*/true));
- }
-
- PM.add(createWarnMissedTransformationsPass());
- }
-
- // After vectorization and unrolling, assume intrinsics may tell us more
- // about pointer alignments.
- PM.add(createAlignmentFromAssumptionsPass());
-
- if (IsFullLTO)
- PM.add(createInstructionCombiningPass());
-}
-
-void PassManagerBuilder::populateModulePassManager(
- legacy::PassManagerBase &MPM) {
- MPM.add(createAnnotation2MetadataLegacyPass());
-
- // Allow forcing function attributes as a debugging and tuning aid.
- MPM.add(createForceFunctionAttrsLegacyPass());
-
- // If all optimizations are disabled, just run the always-inline pass and,
- // if enabled, the function merging pass.
- if (OptLevel == 0) {
- if (Inliner) {
- MPM.add(Inliner);
- Inliner = nullptr;
- }
-
- // FIXME: The BarrierNoopPass is a HACK! The inliner pass above implicitly
- // creates a CGSCC pass manager, but we don't want to add extensions into
- // that pass manager. To prevent this we insert a no-op module pass to reset
- // the pass manager to get the same behavior as EP_OptimizerLast in non-O0
- // builds. The function merging pass is
- if (MergeFunctions)
- MPM.add(createMergeFunctionsPass());
- return;
- }
-
- // Add LibraryInfo if we have some.
- if (LibraryInfo)
- MPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
-
- addInitialAliasAnalysisPasses(MPM);
-
- // Infer attributes about declarations if possible.
- MPM.add(createInferFunctionAttrsLegacyPass());
-
- if (OptLevel > 2)
- MPM.add(createCallSiteSplittingPass());
-
- MPM.add(createIPSCCPPass()); // IP SCCP
- MPM.add(createCalledValuePropagationPass());
-
- MPM.add(createGlobalOptimizerPass()); // Optimize out global vars
- // Promote any localized global vars.
- MPM.add(createPromoteMemoryToRegisterPass());
-
- MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
-
- MPM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
- MPM.add(
- createCFGSimplificationPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
- true))); // Clean up after IPCP & DAE
-
- // We add a module alias analysis pass here. In part due to bugs in the
- // analysis infrastructure this "works" in that the analysis stays alive
- // for the entire SCC pass run below.
- MPM.add(createGlobalsAAWrapperPass());
-
- // Start of CallGraph SCC passes.
- bool RunInliner = false;
- if (Inliner) {
- MPM.add(Inliner);
- Inliner = nullptr;
- RunInliner = true;
- }
-
- MPM.add(createPostOrderFunctionAttrsLegacyPass());
-
- addFunctionSimplificationPasses(MPM);
-
- // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
- // pass manager that we are specifically trying to avoid. To prevent this
- // we must insert a no-op module pass to reset the pass manager.
- MPM.add(createBarrierNoopPass());
-
- if (OptLevel > 1)
- // Remove avail extern fns and globals definitions if we aren't
- // compiling an object file for later LTO. For LTO we want to preserve
- // these so they are eligible for inlining at link-time. Note if they
- // are unreferenced they will be removed by GlobalDCE later, so
- // this only impacts referenced available externally globals.
- // Eventually they will be suppressed during codegen, but eliminating
- // here enables more opportunity for GlobalDCE as it may make
- // globals referenced by available external functions dead
- // and saves running remaining passes on the eliminated functions.
- MPM.add(createEliminateAvailableExternallyPass());
-
- MPM.add(createReversePostOrderFunctionAttrsPass());
-
- // The inliner performs some kind of dead code elimination as it goes,
- // but there are cases that are not really caught by it. We might
- // at some point consider teaching the inliner about them, but it
- // is OK for now to run GlobalOpt + GlobalDCE in tandem as their
- // benefits generally outweight the cost, making the whole pipeline
- // faster.
- if (RunInliner) {
- MPM.add(createGlobalOptimizerPass());
- MPM.add(createGlobalDCEPass());
- }
-
- // We add a fresh GlobalsModRef run at this point. This is particularly
- // useful as the above will have inlined, DCE'ed, and function-attr
- // propagated everything. We should at this point have a reasonably minimal
- // and richly annotated call graph. By computing aliasing and mod/ref
- // information for all local globals here, the late loop passes and notably
- // the vectorizer will be able to use them to help recognize vectorizable
- // memory operations.
- //
- // Note that this relies on a bug in the pass manager which preserves
- // a module analysis into a function pass pipeline (and throughout it) so
- // long as the first function pass doesn't invalidate the module analysis.
- // Thus both Float2Int and LoopRotate have to preserve AliasAnalysis for
- // this to work. Fortunately, it is trivial to preserve AliasAnalysis
- // (doing nothing preserves it as it is required to be conservatively
- // correct in the face of IR changes).
- MPM.add(createGlobalsAAWrapperPass());
-
- MPM.add(createFloat2IntPass());
- MPM.add(createLowerConstantIntrinsicsPass());
-
- // Re-rotate loops in all our loop nests. These may have fallout out of
- // rotated form due to GVN or other transformations, and the vectorizer relies
- // on the rotated form. Disable header duplication at -Oz.
- MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, false));
-
- // Distribute loops to allow partial vectorization. I.e. isolate dependences
- // into separate loop that would otherwise inhibit vectorization. This is
- // currently only performed for loops marked with the metadata
- // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
- MPM.add(createLoopDistributePass());
-
- addVectorPasses(MPM, /* IsFullLTO */ false);
-
- // FIXME: We shouldn't bother with this anymore.
- MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
-
- // GlobalOpt already deletes dead functions and globals, at -O2 try a
- // late pass of GlobalDCE. It is capable of deleting dead cycles.
- if (OptLevel > 1) {
- MPM.add(createGlobalDCEPass()); // Remove dead fns and globals.
- MPM.add(createConstantMergePass()); // Merge dup global constants
- }
-
- if (MergeFunctions)
- MPM.add(createMergeFunctionsPass());
-
- // LoopSink pass sinks instructions hoisted by LICM, which serves as a
- // canonicalization pass that enables other optimizations. As a result,
- // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
- // result too early.
- MPM.add(createLoopSinkPass());
- // Get rid of LCSSA nodes.
- MPM.add(createInstSimplifyLegacyPass());
-
- // This hoists/decomposes div/rem ops. It should run after other sink/hoist
- // passes to avoid re-sinking, but before SimplifyCFG because it can allow
- // flattening of blocks.
- MPM.add(createDivRemPairsPass());
-
- // LoopSink (and other loop passes since the last simplifyCFG) might have
- // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
- MPM.add(createCFGSimplificationPass(
- SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
-}
-
-LLVMPassManagerBuilderRef LLVMPassManagerBuilderCreate() {
- PassManagerBuilder *PMB = new PassManagerBuilder();
- return wrap(PMB);
-}
-
-void LLVMPassManagerBuilderDispose(LLVMPassManagerBuilderRef PMB) {
- PassManagerBuilder *Builder = unwrap(PMB);
- delete Builder;
-}
-
-void
-LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB,
- unsigned OptLevel) {
- PassManagerBuilder *Builder = unwrap(PMB);
- Builder->OptLevel = OptLevel;
-}
-
-void
-LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB,
- unsigned SizeLevel) {
- PassManagerBuilder *Builder = unwrap(PMB);
- Builder->SizeLevel = SizeLevel;
-}
-
-void
-LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB,
- LLVMBool Value) {
- // NOTE: The DisableUnitAtATime switch has been removed.
-}
-
-void
-LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB,
- LLVMBool Value) {
- PassManagerBuilder *Builder = unwrap(PMB);
- Builder->DisableUnrollLoops = Value;
-}
-
-void
-LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB,
- LLVMBool Value) {
- // NOTE: The simplify-libcalls pass has been removed.
-}
-
-void
-LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB,
- unsigned Threshold) {
- PassManagerBuilder *Builder = unwrap(PMB);
- Builder->Inliner = createFunctionInliningPass(Threshold);
-}
-
-void
-LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB,
- LLVMPassManagerRef PM) {
- PassManagerBuilder *Builder = unwrap(PMB);
- legacy::FunctionPassManager *FPM = unwrap<legacy::FunctionPassManager>(PM);
- Builder->populateFunctionPassManager(*FPM);
-}
-
-void
-LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB,
- LLVMPassManagerRef PM) {
- PassManagerBuilder *Builder = unwrap(PMB);
- legacy::PassManagerBase *MPM = unwrap(PM);
- Builder->populateModulePassManager(*MPM);
-}
diff --git a/llvm/lib/Transforms/IPO/SCCP.cpp b/llvm/lib/Transforms/IPO/SCCP.cpp
index 5c1582ddfdae..e2e6364df906 100644
--- a/llvm/lib/Transforms/IPO/SCCP.cpp
+++ b/llvm/lib/Transforms/IPO/SCCP.cpp
@@ -13,14 +13,14 @@
#include "llvm/Transforms/IPO/SCCP.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueLattice.h"
#include "llvm/Analysis/ValueLatticeUtils.h"
#include "llvm/Analysis/ValueTracking.h"
-#include "llvm/InitializePasses.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/CommandLine.h"
@@ -42,8 +42,8 @@ STATISTIC(NumDeadBlocks , "Number of basic blocks unreachable");
STATISTIC(NumInstReplaced,
"Number of instructions replaced with (simpler) instruction");
-static cl::opt<unsigned> FuncSpecializationMaxIters(
- "func-specialization-max-iters", cl::init(1), cl::Hidden, cl::desc(
+static cl::opt<unsigned> FuncSpecMaxIters(
+ "funcspec-max-iters", cl::init(1), cl::Hidden, cl::desc(
"The maximum number of iterations function specialization is run"));
static void findReturnsToZap(Function &F,
@@ -111,10 +111,12 @@ static bool runIPSCCP(
std::function<const TargetLibraryInfo &(Function &)> GetTLI,
std::function<TargetTransformInfo &(Function &)> GetTTI,
std::function<AssumptionCache &(Function &)> GetAC,
- function_ref<AnalysisResultsForFn(Function &)> getAnalysis,
+ std::function<DominatorTree &(Function &)> GetDT,
+ std::function<BlockFrequencyInfo &(Function &)> GetBFI,
bool IsFuncSpecEnabled) {
SCCPSolver Solver(DL, GetTLI, M.getContext());
- FunctionSpecializer Specializer(Solver, M, FAM, GetTLI, GetTTI, GetAC);
+ FunctionSpecializer Specializer(Solver, M, FAM, GetBFI, GetTLI, GetTTI,
+ GetAC);
// Loop over all functions, marking arguments to those with their addresses
// taken or that are external as overdefined.
@@ -122,7 +124,9 @@ static bool runIPSCCP(
if (F.isDeclaration())
continue;
- Solver.addAnalysis(F, getAnalysis(F));
+ DominatorTree &DT = GetDT(F);
+ AssumptionCache &AC = GetAC(F);
+ Solver.addPredicateInfo(F, DT, AC);
// Determine if we can track the function's return values. If so, add the
// function to the solver's set of return-tracked functions.
@@ -158,7 +162,7 @@ static bool runIPSCCP(
if (IsFuncSpecEnabled) {
unsigned Iters = 0;
- while (Iters++ < FuncSpecializationMaxIters && Specializer.run());
+ while (Iters++ < FuncSpecMaxIters && Specializer.run());
}
// Iterate over all of the instructions in the module, replacing them with
@@ -187,8 +191,8 @@ static bool runIPSCCP(
if (ME == MemoryEffects::unknown())
return AL;
- ME |= MemoryEffects(MemoryEffects::Other,
- ME.getModRef(MemoryEffects::ArgMem));
+ ME |= MemoryEffects(IRMemLocation::Other,
+ ME.getModRef(IRMemLocation::ArgMem));
return AL.addFnAttribute(
F.getContext(),
Attribute::getWithMemoryEffects(F.getContext(), ME));
@@ -223,10 +227,9 @@ static bool runIPSCCP(
BB, InsertedValues, NumInstRemoved, NumInstReplaced);
}
- DomTreeUpdater DTU = IsFuncSpecEnabled && Specializer.isClonedFunction(&F)
- ? DomTreeUpdater(DomTreeUpdater::UpdateStrategy::Lazy)
- : Solver.getDTU(F);
-
+ DominatorTree *DT = FAM->getCachedResult<DominatorTreeAnalysis>(F);
+ PostDominatorTree *PDT = FAM->getCachedResult<PostDominatorTreeAnalysis>(F);
+ DomTreeUpdater DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy);
// Change dead blocks to unreachable. We do it after replacing constants
// in all executable blocks, because changeToUnreachable may remove PHI
// nodes in executable blocks we found values for. The function's entry
@@ -292,13 +295,6 @@ static bool runIPSCCP(
if (!CB || CB->getCalledFunction() != F)
continue;
- // Limit to cases where the return value is guaranteed to be neither
- // poison nor undef. Poison will be outside any range and currently
- // values outside of the specified range cause immediate undefined
- // behavior.
- if (!isGuaranteedNotToBeUndefOrPoison(CB, nullptr, CB))
- continue;
-
// Do not touch existing metadata for now.
// TODO: We should be able to take the intersection of the existing
// metadata and the inferred range.
@@ -338,9 +334,14 @@ static bool runIPSCCP(
// Remove the returned attribute for zapped functions and the
// corresponding call sites.
+ // Also remove any attributes that convert an undef return value into
+ // immediate undefined behavior
+ AttributeMask UBImplyingAttributes =
+ AttributeFuncs::getUBImplyingAttributes();
for (Function *F : FuncZappedReturn) {
for (Argument &A : F->args())
F->removeParamAttr(A.getArgNo(), Attribute::Returned);
+ F->removeRetAttrs(UBImplyingAttributes);
for (Use &U : F->uses()) {
CallBase *CB = dyn_cast<CallBase>(U.getUser());
if (!CB) {
@@ -354,6 +355,7 @@ static bool runIPSCCP(
for (Use &Arg : CB->args())
CB->removeParamAttr(CB->getArgOperandNo(&Arg), Attribute::Returned);
+ CB->removeRetAttrs(UBImplyingAttributes);
}
}
@@ -368,9 +370,9 @@ static bool runIPSCCP(
while (!GV->use_empty()) {
StoreInst *SI = cast<StoreInst>(GV->user_back());
SI->eraseFromParent();
- MadeChanges = true;
}
- M.getGlobalList().erase(GV);
+ MadeChanges = true;
+ M.eraseGlobalVariable(GV);
++NumGlobalConst;
}
@@ -389,15 +391,15 @@ PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) {
auto GetAC = [&FAM](Function &F) -> AssumptionCache & {
return FAM.getResult<AssumptionAnalysis>(F);
};
- auto getAnalysis = [&FAM, this](Function &F) -> AnalysisResultsForFn {
- DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
- return {
- std::make_unique<PredicateInfo>(F, DT, FAM.getResult<AssumptionAnalysis>(F)),
- &DT, FAM.getCachedResult<PostDominatorTreeAnalysis>(F),
- isFuncSpecEnabled() ? &FAM.getResult<LoopAnalysis>(F) : nullptr };
+ auto GetDT = [&FAM](Function &F) -> DominatorTree & {
+ return FAM.getResult<DominatorTreeAnalysis>(F);
};
+ auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
+ return FAM.getResult<BlockFrequencyAnalysis>(F);
+ };
+
- if (!runIPSCCP(M, DL, &FAM, GetTLI, GetTTI, GetAC, getAnalysis,
+ if (!runIPSCCP(M, DL, &FAM, GetTLI, GetTTI, GetAC, GetDT, GetBFI,
isFuncSpecEnabled()))
return PreservedAnalyses::all();
@@ -407,73 +409,3 @@ PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) {
PA.preserve<FunctionAnalysisManagerModuleProxy>();
return PA;
}
-
-namespace {
-
-//===--------------------------------------------------------------------===//
-//
-/// IPSCCP Class - This class implements interprocedural Sparse Conditional
-/// Constant Propagation.
-///
-class IPSCCPLegacyPass : public ModulePass {
-public:
- static char ID;
-
- IPSCCPLegacyPass() : ModulePass(ID) {
- initializeIPSCCPLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
- const DataLayout &DL = M.getDataLayout();
- auto GetTLI = [this](Function &F) -> const TargetLibraryInfo & {
- return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- };
- auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
- return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- };
- auto GetAC = [this](Function &F) -> AssumptionCache & {
- return this->getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- };
- auto getAnalysis = [this](Function &F) -> AnalysisResultsForFn {
- DominatorTree &DT =
- this->getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
- return {
- std::make_unique<PredicateInfo>(
- F, DT,
- this->getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
- F)),
- nullptr, // We cannot preserve the LI, DT or PDT with the legacy pass
- nullptr, // manager, so set them to nullptr.
- nullptr};
- };
-
- return runIPSCCP(M, DL, nullptr, GetTLI, GetTTI, GetAC, getAnalysis, false);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
-};
-
-} // end anonymous namespace
-
-char IPSCCPLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(IPSCCPLegacyPass, "ipsccp",
- "Interprocedural Sparse Conditional Constant Propagation",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(IPSCCPLegacyPass, "ipsccp",
- "Interprocedural Sparse Conditional Constant Propagation",
- false, false)
-
-// createIPSCCPPass - This is the public interface to this file.
-ModulePass *llvm::createIPSCCPPass() { return new IPSCCPLegacyPass(); }
-
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 93b368fd72a6..a53baecd4776 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -35,9 +35,9 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
-#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineAdvisor.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/ReplayInlineAdvisor.h"
@@ -58,8 +58,6 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PseudoProbe.h"
#include "llvm/IR/ValueSymbolTable.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/SampleProf.h"
#include "llvm/ProfileData/SampleProfReader.h"
@@ -67,6 +65,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/ProfiledCallGraph.h"
@@ -129,6 +128,11 @@ static cl::opt<std::string> SampleProfileRemappingFile(
"sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
+static cl::opt<bool> SalvageStaleProfile(
+ "salvage-stale-profile", cl::Hidden, cl::init(false),
+ cl::desc("Salvage stale profile by fuzzy matching and use the remapped "
+ "location for sample profile query."));
+
static cl::opt<bool> ReportProfileStaleness(
"report-profile-staleness", cl::Hidden, cl::init(false),
cl::desc("Compute and report stale profile statistical metrics."));
@@ -138,6 +142,11 @@ static cl::opt<bool> PersistProfileStaleness(
cl::desc("Compute stale profile statistical metrics and write it into the "
"native object file(.llvm_stats section)."));
+static cl::opt<bool> FlattenProfileForMatching(
+ "flatten-profile-for-matching", cl::Hidden, cl::init(true),
+ cl::desc(
+ "Use flattened profile for stale profile detection and matching."));
+
static cl::opt<bool> ProfileSampleAccurate(
"profile-sample-accurate", cl::Hidden, cl::init(false),
cl::desc("If the sample profile is accurate, we will mark all un-sampled "
@@ -173,9 +182,6 @@ static cl::opt<bool>
cl::desc("Process functions in a top-down order "
"defined by the profiled call graph when "
"-sample-profile-top-down-load is on."));
-cl::opt<bool>
- SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
- cl::desc("Sort profiled recursion by edge weights."));
static cl::opt<bool> ProfileSizeInline(
"sample-profile-inline-size", cl::Hidden, cl::init(false),
@@ -191,6 +197,11 @@ static cl::opt<bool> DisableSampleLoaderInlining(
"pass, and merge (or scale) profiles (as configured by "
"--sample-profile-merge-inlinee)."));
+namespace llvm {
+cl::opt<bool>
+ SortProfiledSCC("sort-profiled-scc-member", cl::init(true), cl::Hidden,
+ cl::desc("Sort profiled recursion by edge weights."));
+
cl::opt<int> ProfileInlineGrowthLimit(
"sample-profile-inline-growth-limit", cl::Hidden, cl::init(12),
cl::desc("The size growth ratio limit for proirity-based sample profile "
@@ -214,6 +225,7 @@ cl::opt<int> SampleHotCallSiteThreshold(
cl::opt<int> SampleColdCallSiteThreshold(
"sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
cl::desc("Threshold for inlining cold callsites"));
+} // namespace llvm
static cl::opt<unsigned> ProfileICPRelativeHotness(
"sample-profile-icp-relative-hotness", cl::Hidden, cl::init(25),
@@ -307,7 +319,9 @@ static cl::opt<bool> AnnotateSampleProfileInlinePhase(
cl::desc("Annotate LTO phase (prelink / postlink), or main (no LTO) for "
"sample-profile inline pass name."));
+namespace llvm {
extern cl::opt<bool> EnableExtTspBlockPlacement;
+}
namespace {
@@ -428,6 +442,11 @@ class SampleProfileMatcher {
Module &M;
SampleProfileReader &Reader;
const PseudoProbeManager *ProbeManager;
+ SampleProfileMap FlattenedProfiles;
+ // For each function, the matcher generates a map, of which each entry is a
+ // mapping from the source location of current build to the source location in
+ // the profile.
+ StringMap<LocToLocMap> FuncMappings;
// Profile mismatching statstics.
uint64_t TotalProfiledCallsites = 0;
@@ -442,9 +461,43 @@ class SampleProfileMatcher {
public:
SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
const PseudoProbeManager *ProbeManager)
- : M(M), Reader(Reader), ProbeManager(ProbeManager) {}
- void detectProfileMismatch();
- void detectProfileMismatch(const Function &F, const FunctionSamples &FS);
+ : M(M), Reader(Reader), ProbeManager(ProbeManager) {
+ if (FlattenProfileForMatching) {
+ ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
+ FunctionSamples::ProfileIsCS);
+ }
+ }
+ void runOnModule();
+
+private:
+ FunctionSamples *getFlattenedSamplesFor(const Function &F) {
+ StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
+ auto It = FlattenedProfiles.find(CanonFName);
+ if (It != FlattenedProfiles.end())
+ return &It->second;
+ return nullptr;
+ }
+ void runOnFunction(const Function &F, const FunctionSamples &FS);
+ void countProfileMismatches(
+ const FunctionSamples &FS,
+ const std::unordered_set<LineLocation, LineLocationHash>
+ &MatchedCallsiteLocs,
+ uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites);
+
+ LocToLocMap &getIRToProfileLocationMap(const Function &F) {
+ auto Ret = FuncMappings.try_emplace(
+ FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap());
+ return Ret.first->second;
+ }
+ void distributeIRToProfileLocationMap();
+ void distributeIRToProfileLocationMap(FunctionSamples &FS);
+ void populateProfileCallsites(
+ const FunctionSamples &FS,
+ StringMap<std::set<LineLocation>> &CalleeToCallsitesMap);
+ void runStaleProfileMatching(
+ const std::map<LineLocation, StringRef> &IRLocations,
+ StringMap<std::set<LineLocation>> &CalleeToCallsitesMap,
+ LocToLocMap &IRToProfileLocationMap);
};
/// Sample profile pass.
@@ -452,15 +505,16 @@ public:
/// This pass reads profile data from the file specified by
/// -sample-profile-file and annotates every affected function with the
/// profile information found in that file.
-class SampleProfileLoader final
- : public SampleProfileLoaderBaseImpl<BasicBlock> {
+class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
public:
SampleProfileLoader(
StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS,
std::function<AssumptionCache &(Function &)> GetAssumptionCache,
std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
std::function<const TargetLibraryInfo &(Function &)> GetTLI)
- : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)),
+ : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName),
+ std::move(FS)),
GetAC(std::move(GetAssumptionCache)),
GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
LTOPhase(LTOPhase),
@@ -471,13 +525,12 @@ public:
bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
bool runOnModule(Module &M, ModuleAnalysisManager *AM,
- ProfileSummaryInfo *_PSI, CallGraph *CG);
+ ProfileSummaryInfo *_PSI, LazyCallGraph &CG);
protected:
bool runOnFunction(Function &F, ModuleAnalysisManager *AM);
bool emitAnnotations(Function &F);
ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
- ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
const FunctionSamples *
findFunctionSamples(const Instruction &I) const override;
@@ -512,8 +565,8 @@ protected:
void promoteMergeNotInlinedContextSamples(
MapVector<CallBase *, const FunctionSamples *> NonInlinedCallSites,
const Function &F);
- std::vector<Function *> buildFunctionOrder(Module &M, CallGraph *CG);
- std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(CallGraph &CG);
+ std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG);
+ std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M);
void generateMDProfMetadata(Function &F);
/// Map from function name to Function *. Used to find the function from
@@ -573,9 +626,6 @@ protected:
// External inline advisor used to replay inline decision from remarks.
std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
- // A pseudo probe helper to correlate the imported sample counts.
- std::unique_ptr<PseudoProbeManager> ProbeManager;
-
// A helper to implement the sample profile matching algorithm.
std::unique_ptr<SampleProfileMatcher> MatchingManager;
@@ -586,6 +636,50 @@ private:
};
} // end anonymous namespace
+namespace llvm {
+template <>
+inline bool SampleProfileInference<Function>::isExit(const BasicBlock *BB) {
+ return succ_empty(BB);
+}
+
+template <>
+inline void SampleProfileInference<Function>::findUnlikelyJumps(
+ const std::vector<const BasicBlockT *> &BasicBlocks,
+ BlockEdgeMap &Successors, FlowFunction &Func) {
+ for (auto &Jump : Func.Jumps) {
+ const auto *BB = BasicBlocks[Jump.Source];
+ const auto *Succ = BasicBlocks[Jump.Target];
+ const Instruction *TI = BB->getTerminator();
+ // Check if a block ends with InvokeInst and mark non-taken branch unlikely.
+ // In that case block Succ should be a landing pad
+ if (Successors[BB].size() == 2 && Successors[BB].back() == Succ) {
+ if (isa<InvokeInst>(TI)) {
+ Jump.IsUnlikely = true;
+ }
+ }
+ const Instruction *SuccTI = Succ->getTerminator();
+ // Check if the target block contains UnreachableInst and mark it unlikely
+ if (SuccTI->getNumSuccessors() == 0) {
+ if (isa<UnreachableInst>(SuccTI)) {
+ Jump.IsUnlikely = true;
+ }
+ }
+ }
+}
+
+template <>
+void SampleProfileLoaderBaseImpl<Function>::computeDominanceAndLoopInfo(
+ Function &F) {
+ DT.reset(new DominatorTree);
+ DT->recalculate(F);
+
+ PDT.reset(new PostDominatorTree(F));
+
+ LI.reset(new LoopInfo);
+ LI->analyze(*DT);
+}
+} // namespace llvm
+
ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
if (FunctionSamples::ProfileIsProbeBased)
return getProbeWeight(Inst);
@@ -614,68 +708,6 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
return getInstWeightImpl(Inst);
}
-// Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
-// of non-probe instruction. So if all instructions of the BB give error_code,
-// tell the inference algorithm to infer the BB weight.
-ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
- assert(FunctionSamples::ProfileIsProbeBased &&
- "Profile is not pseudo probe based");
- std::optional<PseudoProbe> Probe = extractProbe(Inst);
- // Ignore the non-probe instruction. If none of the instruction in the BB is
- // probe, we choose to infer the BB's weight.
- if (!Probe)
- return std::error_code();
-
- const FunctionSamples *FS = findFunctionSamples(Inst);
- // If none of the instruction has FunctionSample, we choose to return zero
- // value sample to indicate the BB is cold. This could happen when the
- // instruction is from inlinee and no profile data is found.
- // FIXME: This should not be affected by the source drift issue as 1) if the
- // newly added function is top-level inliner, it won't match the CFG checksum
- // in the function profile or 2) if it's the inlinee, the inlinee should have
- // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
- // we can improve it by adding a switch for profile-sample-block-accurate for
- // block level counts in the future.
- if (!FS)
- return 0;
-
- // For non-CS profile, If a direct call/invoke instruction is inlined in
- // profile (findCalleeFunctionSamples returns non-empty result), but not
- // inlined here, it means that the inlined callsite has no sample, thus the
- // call instruction should have 0 count.
- // For CS profile, the callsite count of previously inlined callees is
- // populated with the entry count of the callees.
- if (!FunctionSamples::ProfileIsCS)
- if (const auto *CB = dyn_cast<CallBase>(&Inst))
- if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
- return 0;
-
- const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
- if (R) {
- uint64_t Samples = R.get() * Probe->Factor;
- bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
- if (FirstMark) {
- ORE->emit([&]() {
- OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
- Remark << "Applied " << ore::NV("NumSamples", Samples);
- Remark << " samples from profile (ProbeId=";
- Remark << ore::NV("ProbeId", Probe->Id);
- Remark << ", Factor=";
- Remark << ore::NV("Factor", Probe->Factor);
- Remark << ", OriginalSamples=";
- Remark << ore::NV("OriginalSamples", R.get());
- Remark << ")";
- return Remark;
- });
- }
- LLVM_DEBUG(dbgs() << " " << Probe->Id << ":" << Inst
- << " - weight: " << R.get() << " - factor: "
- << format("%0.2f", Probe->Factor) << ")\n");
- return Samples;
- }
- return R;
-}
-
/// Get the FunctionSamples for a call instruction.
///
/// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
@@ -1041,8 +1073,8 @@ void SampleProfileLoader::findExternalInlineCandidate(
DenseSet<GlobalValue::GUID> &InlinedGUIDs,
const StringMap<Function *> &SymbolMap, uint64_t Threshold) {
- // If ExternalInlineAdvisor wants to inline an external function
- // make sure it's imported
+ // If ExternalInlineAdvisor(ReplayInlineAdvisor) wants to inline an external
+ // function make sure it's imported
if (CB && getExternalInlineAdvisorShouldInline(*CB)) {
// Samples may not exist for replayed function, if so
// just add the direct GUID and move on
@@ -1055,7 +1087,13 @@ void SampleProfileLoader::findExternalInlineCandidate(
Threshold = 0;
}
- assert(Samples && "expect non-null caller profile");
+ // In some rare cases, call instruction could be changed after being pushed
+ // into inline candidate queue, this is because earlier inlining may expose
+ // constant propagation which can change indirect call to direct call. When
+ // this happens, we may fail to find matching function samples for the
+ // candidate later, even if a match was found when the candidate was enqueued.
+ if (!Samples)
+ return;
// For AutoFDO profile, retrieve candidate profiles by walking over
// the nested inlinee profiles.
@@ -1255,7 +1293,7 @@ bool SampleProfileLoader::tryInlineCandidate(
if (!Cost)
return false;
- InlineFunctionInfo IFI(nullptr, GetAC);
+ InlineFunctionInfo IFI(GetAC);
IFI.UpdateProfile = false;
InlineResult IR = InlineFunction(CB, IFI,
/*MergeAttributes=*/true);
@@ -1784,9 +1822,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
if (!ProbeManager->profileIsValid(F, *Samples)) {
LLVM_DEBUG(
dbgs() << "Profile is invalid due to CFG mismatch for Function "
- << F.getName());
+ << F.getName() << "\n");
++NumMismatchedProfile;
- return false;
+ if (!SalvageStaleProfile)
+ return false;
}
++NumMatchedProfile;
} else {
@@ -1813,7 +1852,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
}
std::unique_ptr<ProfiledCallGraph>
-SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
+SampleProfileLoader::buildProfiledCallGraph(Module &M) {
std::unique_ptr<ProfiledCallGraph> ProfiledCG;
if (FunctionSamples::ProfileIsCS)
ProfiledCG = std::make_unique<ProfiledCallGraph>(*ContextTracker);
@@ -1823,18 +1862,17 @@ SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) {
// Add all functions into the profiled call graph even if they are not in
// the profile. This makes sure functions missing from the profile still
// gets a chance to be processed.
- for (auto &Node : CG) {
- const auto *F = Node.first;
- if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile"))
+ for (Function &F : M) {
+ if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
- ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F));
+ ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(F));
}
return ProfiledCG;
}
std::vector<Function *>
-SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
+SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) {
std::vector<Function *> FunctionOrderList;
FunctionOrderList.reserve(M.size());
@@ -1842,7 +1880,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
errs() << "WARNING: -use-profiled-call-graph ignored, should be used "
"together with -sample-profile-top-down-load.\n";
- if (!ProfileTopDownLoad || CG == nullptr) {
+ if (!ProfileTopDownLoad) {
if (ProfileMergeInlinee) {
// Disable ProfileMergeInlinee if profile is not loaded in top down order,
// because the profile for a function may be used for the profile
@@ -1858,8 +1896,6 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
return FunctionOrderList;
}
- assert(&CG->getModule() == &M);
-
if (UseProfiledCallGraph || (FunctionSamples::ProfileIsCS &&
!UseProfiledCallGraph.getNumOccurrences())) {
// Use profiled call edges to augment the top-down order. There are cases
@@ -1910,7 +1946,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
// static call edges are not so important when they don't correspond to a
// context in the profile.
- std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(*CG);
+ std::unique_ptr<ProfiledCallGraph> ProfiledCG = buildProfiledCallGraph(M);
scc_iterator<ProfiledCallGraph *> CGI = scc_begin(ProfiledCG.get());
while (!CGI.isAtEnd()) {
auto Range = *CGI;
@@ -1927,25 +1963,27 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
++CGI;
}
} else {
- scc_iterator<CallGraph *> CGI = scc_begin(CG);
- while (!CGI.isAtEnd()) {
- for (CallGraphNode *Node : *CGI) {
- auto *F = Node->getFunction();
- if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile"))
- FunctionOrderList.push_back(F);
+ CG.buildRefSCCs();
+ for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
+ for (LazyCallGraph::SCC &C : RC) {
+ for (LazyCallGraph::Node &N : C) {
+ Function &F = N.getFunction();
+ if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile"))
+ FunctionOrderList.push_back(&F);
+ }
}
- ++CGI;
}
}
+ std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
+
LLVM_DEBUG({
dbgs() << "Function processing order:\n";
- for (auto F : reverse(FunctionOrderList)) {
+ for (auto F : FunctionOrderList) {
dbgs() << F->getName() << "\n";
}
});
- std::reverse(FunctionOrderList.begin(), FunctionOrderList.end());
return FunctionOrderList;
}
@@ -1954,7 +1992,7 @@ bool SampleProfileLoader::doInitialization(Module &M,
auto &Ctx = M.getContext();
auto ReaderOrErr = SampleProfileReader::create(
- Filename, Ctx, FSDiscriminatorPass::Base, RemappingFilename);
+ Filename, Ctx, *FS, FSDiscriminatorPass::Base, RemappingFilename);
if (std::error_code EC = ReaderOrErr.getError()) {
std::string Msg = "Could not open profile: " + EC.message();
Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
@@ -2016,6 +2054,16 @@ bool SampleProfileLoader::doInitialization(Module &M,
UsePreInlinerDecision = true;
}
+ // Enable stale profile matching by default for probe-based profile.
+ // Currently the matching relies on if the checksum mismatch is detected,
+ // which is currently only available for pseudo-probe mode. Removing the
+ // checksum check could cause regressions for some cases, so further tuning
+ // might be needed if we want to enable it for all cases.
+ if (Reader->profileIsProbeBased() &&
+ !SalvageStaleProfile.getNumOccurrences()) {
+ SalvageStaleProfile = true;
+ }
+
if (!Reader->profileIsCS()) {
// Non-CS profile should be fine without a function size budget for the
// inliner since the contexts in the profile are either all from inlining
@@ -2046,7 +2094,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
}
}
- if (ReportProfileStaleness || PersistProfileStaleness) {
+ if (ReportProfileStaleness || PersistProfileStaleness ||
+ SalvageStaleProfile) {
MatchingManager =
std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
}
@@ -2054,8 +2103,167 @@ bool SampleProfileLoader::doInitialization(Module &M,
return true;
}
-void SampleProfileMatcher::detectProfileMismatch(const Function &F,
- const FunctionSamples &FS) {
+void SampleProfileMatcher::countProfileMismatches(
+ const FunctionSamples &FS,
+ const std::unordered_set<LineLocation, LineLocationHash>
+ &MatchedCallsiteLocs,
+ uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites) {
+
+ auto isInvalidLineOffset = [](uint32_t LineOffset) {
+ return LineOffset & 0x8000;
+ };
+
+ // Check if there are any callsites in the profile that does not match to any
+ // IR callsites, those callsite samples will be discarded.
+ for (auto &I : FS.getBodySamples()) {
+ const LineLocation &Loc = I.first;
+ if (isInvalidLineOffset(Loc.LineOffset))
+ continue;
+
+ uint64_t Count = I.second.getSamples();
+ if (!I.second.getCallTargets().empty()) {
+ TotalCallsiteSamples += Count;
+ FuncProfiledCallsites++;
+ if (!MatchedCallsiteLocs.count(Loc)) {
+ MismatchedCallsiteSamples += Count;
+ FuncMismatchedCallsites++;
+ }
+ }
+ }
+
+ for (auto &I : FS.getCallsiteSamples()) {
+ const LineLocation &Loc = I.first;
+ if (isInvalidLineOffset(Loc.LineOffset))
+ continue;
+
+ uint64_t Count = 0;
+ for (auto &FM : I.second) {
+ Count += FM.second.getHeadSamplesEstimate();
+ }
+ TotalCallsiteSamples += Count;
+ FuncProfiledCallsites++;
+ if (!MatchedCallsiteLocs.count(Loc)) {
+ MismatchedCallsiteSamples += Count;
+ FuncMismatchedCallsites++;
+ }
+ }
+}
+
+// Populate the anchors(direct callee name) from profile.
+void SampleProfileMatcher::populateProfileCallsites(
+ const FunctionSamples &FS,
+ StringMap<std::set<LineLocation>> &CalleeToCallsitesMap) {
+ for (const auto &I : FS.getBodySamples()) {
+ const auto &Loc = I.first;
+ const auto &CTM = I.second.getCallTargets();
+ // Filter out possible indirect calls, use direct callee name as anchor.
+ if (CTM.size() == 1) {
+ StringRef CalleeName = CTM.begin()->first();
+ const auto &Candidates = CalleeToCallsitesMap.try_emplace(
+ CalleeName, std::set<LineLocation>());
+ Candidates.first->second.insert(Loc);
+ }
+ }
+
+ for (const auto &I : FS.getCallsiteSamples()) {
+ const LineLocation &Loc = I.first;
+ const auto &CalleeMap = I.second;
+ // Filter out possible indirect calls, use direct callee name as anchor.
+ if (CalleeMap.size() == 1) {
+ StringRef CalleeName = CalleeMap.begin()->first;
+ const auto &Candidates = CalleeToCallsitesMap.try_emplace(
+ CalleeName, std::set<LineLocation>());
+ Candidates.first->second.insert(Loc);
+ }
+ }
+}
+
+// Call target name anchor based profile fuzzy matching.
+// Input:
+// For IR locations, the anchor is the callee name of direct callsite; For
+// profile locations, it's the call target name for BodySamples or inlinee's
+// profile name for CallsiteSamples.
+// Matching heuristic:
+// First match all the anchors in lexical order, then split the non-anchor
+// locations between the two anchors evenly, first half are matched based on the
+// start anchor, second half are matched based on the end anchor.
+// For example, given:
+// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
+// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
+// The matching gives:
+// [1, 2(foo), 3, 5, 6(bar), 7]
+// | | | | | |
+// [1, 2, 3(foo), 4, 7, 8(bar), 9]
+// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
+void SampleProfileMatcher::runStaleProfileMatching(
+ const std::map<LineLocation, StringRef> &IRLocations,
+ StringMap<std::set<LineLocation>> &CalleeToCallsitesMap,
+ LocToLocMap &IRToProfileLocationMap) {
+ assert(IRToProfileLocationMap.empty() &&
+ "Run stale profile matching only once per function");
+
+ auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
+ // Skip the unchanged location mapping to save memory.
+ if (From != To)
+ IRToProfileLocationMap.insert({From, To});
+ };
+
+ // Use function's beginning location as the initial anchor.
+ int32_t LocationDelta = 0;
+ SmallVector<LineLocation> LastMatchedNonAnchors;
+
+ for (const auto &IR : IRLocations) {
+ const auto &Loc = IR.first;
+ StringRef CalleeName = IR.second;
+ bool IsMatchedAnchor = false;
+ // Match the anchor location in lexical order.
+ if (!CalleeName.empty()) {
+ auto ProfileAnchors = CalleeToCallsitesMap.find(CalleeName);
+ if (ProfileAnchors != CalleeToCallsitesMap.end() &&
+ !ProfileAnchors->second.empty()) {
+ auto CI = ProfileAnchors->second.begin();
+ const auto Candidate = *CI;
+ ProfileAnchors->second.erase(CI);
+ InsertMatching(Loc, Candidate);
+ LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName
+ << " is matched from " << Loc << " to " << Candidate
+ << "\n");
+ LocationDelta = Candidate.LineOffset - Loc.LineOffset;
+
+ // Match backwards for non-anchor locations.
+ // The locations in LastMatchedNonAnchors have been matched forwards
+ // based on the previous anchor, spilt it evenly and overwrite the
+ // second half based on the current anchor.
+ for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
+ I < LastMatchedNonAnchors.size(); I++) {
+ const auto &L = LastMatchedNonAnchors[I];
+ uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
+ LineLocation Candidate(CandidateLineOffset, L.Discriminator);
+ InsertMatching(L, Candidate);
+ LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
+ << " to " << Candidate << "\n");
+ }
+
+ IsMatchedAnchor = true;
+ LastMatchedNonAnchors.clear();
+ }
+ }
+
+ // Match forwards for non-anchor locations.
+ if (!IsMatchedAnchor) {
+ uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
+ LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
+ InsertMatching(Loc, Candidate);
+ LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
+ << Candidate << "\n");
+ LastMatchedNonAnchors.emplace_back(Loc);
+ }
+ }
+}
+
+void SampleProfileMatcher::runOnFunction(const Function &F,
+ const FunctionSamples &FS) {
+ bool IsFuncHashMismatch = false;
if (FunctionSamples::ProfileIsProbeBased) {
uint64_t Count = FS.getTotalSamples();
TotalFuncHashSamples += Count;
@@ -2063,16 +2271,24 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F,
if (!ProbeManager->profileIsValid(F, FS)) {
MismatchedFuncHashSamples += Count;
NumMismatchedFuncHash++;
- return;
+ IsFuncHashMismatch = true;
}
}
std::unordered_set<LineLocation, LineLocationHash> MatchedCallsiteLocs;
+ // The value of the map is the name of direct callsite and use empty StringRef
+ // for non-direct-call site.
+ std::map<LineLocation, StringRef> IRLocations;
- // Go through all the callsites on the IR and flag the callsite if the target
- // name is the same as the one in the profile.
+ // Extract profile matching anchors and profile mismatch metrics in the IR.
for (auto &BB : F) {
for (auto &I : BB) {
+ // TODO: Support line-number based location(AutoFDO).
+ if (FunctionSamples::ProfileIsProbeBased && isa<PseudoProbeInst>(&I)) {
+ if (std::optional<PseudoProbe> Probe = extractProbe(I))
+ IRLocations.emplace(LineLocation(Probe->Id, 0), StringRef());
+ }
+
if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
continue;
@@ -2084,6 +2300,17 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F,
if (Function *Callee = CB->getCalledFunction())
CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
+ // Force to overwrite the callee name in case any non-call location was
+ // written before.
+ auto R = IRLocations.emplace(IRCallsite, CalleeName);
+ R.first->second = CalleeName;
+ assert((!FunctionSamples::ProfileIsProbeBased || R.second ||
+ R.first->second == CalleeName) &&
+ "Overwrite non-call or different callee name location for "
+ "pseudo probe callsite");
+
+ // Go through all the callsites on the IR and flag the callsite if the
+ // target name is the same as the one in the profile.
const auto CTM = FS.findCallTargetMapAt(IRCallsite);
const auto CallsiteFS = FS.findFunctionSamplesMapAt(IRCallsite);
@@ -2105,55 +2332,54 @@ void SampleProfileMatcher::detectProfileMismatch(const Function &F,
}
}
- auto isInvalidLineOffset = [](uint32_t LineOffset) {
- return LineOffset & 0x8000;
- };
+ // Detect profile mismatch for profile staleness metrics report.
+ if (ReportProfileStaleness || PersistProfileStaleness) {
+ uint64_t FuncMismatchedCallsites = 0;
+ uint64_t FuncProfiledCallsites = 0;
+ countProfileMismatches(FS, MatchedCallsiteLocs, FuncMismatchedCallsites,
+ FuncProfiledCallsites);
+ TotalProfiledCallsites += FuncProfiledCallsites;
+ NumMismatchedCallsites += FuncMismatchedCallsites;
+ LLVM_DEBUG({
+ if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch &&
+ FuncMismatchedCallsites)
+ dbgs() << "Function checksum is matched but there are "
+ << FuncMismatchedCallsites << "/" << FuncProfiledCallsites
+ << " mismatched callsites.\n";
+ });
+ }
- // Check if there are any callsites in the profile that does not match to any
- // IR callsites, those callsite samples will be discarded.
- for (auto &I : FS.getBodySamples()) {
- const LineLocation &Loc = I.first;
- if (isInvalidLineOffset(Loc.LineOffset))
- continue;
+ if (IsFuncHashMismatch && SalvageStaleProfile) {
+ LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
+ << "\n");
- uint64_t Count = I.second.getSamples();
- if (!I.second.getCallTargets().empty()) {
- TotalCallsiteSamples += Count;
- TotalProfiledCallsites++;
- if (!MatchedCallsiteLocs.count(Loc)) {
- MismatchedCallsiteSamples += Count;
- NumMismatchedCallsites++;
- }
- }
- }
+ StringMap<std::set<LineLocation>> CalleeToCallsitesMap;
+ populateProfileCallsites(FS, CalleeToCallsitesMap);
- for (auto &I : FS.getCallsiteSamples()) {
- const LineLocation &Loc = I.first;
- if (isInvalidLineOffset(Loc.LineOffset))
- continue;
+ // The matching result will be saved to IRToProfileLocationMap, create a new
+ // map for each function.
+ auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
- uint64_t Count = 0;
- for (auto &FM : I.second) {
- Count += FM.second.getHeadSamplesEstimate();
- }
- TotalCallsiteSamples += Count;
- TotalProfiledCallsites++;
- if (!MatchedCallsiteLocs.count(Loc)) {
- MismatchedCallsiteSamples += Count;
- NumMismatchedCallsites++;
- }
+ runStaleProfileMatching(IRLocations, CalleeToCallsitesMap,
+ IRToProfileLocationMap);
}
}
-void SampleProfileMatcher::detectProfileMismatch() {
+void SampleProfileMatcher::runOnModule() {
for (auto &F : M) {
if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
continue;
- FunctionSamples *FS = Reader.getSamplesFor(F);
+ FunctionSamples *FS = nullptr;
+ if (FlattenProfileForMatching)
+ FS = getFlattenedSamplesFor(F);
+ else
+ FS = Reader.getSamplesFor(F);
if (!FS)
continue;
- detectProfileMismatch(F, *FS);
+ runOnFunction(F, *FS);
}
+ if (SalvageStaleProfile)
+ distributeIRToProfileLocationMap();
if (ReportProfileStaleness) {
if (FunctionSamples::ProfileIsProbeBased) {
@@ -2196,8 +2422,31 @@ void SampleProfileMatcher::detectProfileMismatch() {
}
}
+void SampleProfileMatcher::distributeIRToProfileLocationMap(
+ FunctionSamples &FS) {
+ const auto ProfileMappings = FuncMappings.find(FS.getName());
+ if (ProfileMappings != FuncMappings.end()) {
+ FS.setIRToProfileLocationMap(&(ProfileMappings->second));
+ }
+
+ for (auto &Inlinees : FS.getCallsiteSamples()) {
+ for (auto FS : Inlinees.second) {
+ distributeIRToProfileLocationMap(FS.second);
+ }
+ }
+}
+
+// Use a central place to distribute the matching results. Outlined and inlined
+// profile with the function name will be set to the same pointer.
+void SampleProfileMatcher::distributeIRToProfileLocationMap() {
+ for (auto &I : Reader.getProfiles()) {
+ distributeIRToProfileLocationMap(I.second);
+ }
+}
+
bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
- ProfileSummaryInfo *_PSI, CallGraph *CG) {
+ ProfileSummaryInfo *_PSI,
+ LazyCallGraph &CG) {
GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
PSI = _PSI;
@@ -2240,8 +2489,10 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
assert(SymbolMap.count(StringRef()) == 0 &&
"No empty StringRef should be added in SymbolMap");
- if (ReportProfileStaleness || PersistProfileStaleness)
- MatchingManager->detectProfileMismatch();
+ if (ReportProfileStaleness || PersistProfileStaleness ||
+ SalvageStaleProfile) {
+ MatchingManager->runOnModule();
+ }
bool retval = false;
for (auto *F : buildFunctionOrder(M, CG)) {
@@ -2327,6 +2578,11 @@ bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM)
return emitAnnotations(F);
return false;
}
+SampleProfileLoaderPass::SampleProfileLoaderPass(
+ std::string File, std::string RemappingFile, ThinOrFullLTOPhase LTOPhase,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS)
+ : ProfileFileName(File), ProfileRemappingFileName(RemappingFile),
+ LTOPhase(LTOPhase), FS(std::move(FS)) {}
PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
ModuleAnalysisManager &AM) {
@@ -2343,18 +2599,21 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
return FAM.getResult<TargetLibraryAnalysis>(F);
};
+ if (!FS)
+ FS = vfs::getRealFileSystem();
+
SampleProfileLoader SampleLoader(
ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
: ProfileRemappingFileName,
- LTOPhase, GetAssumptionCache, GetTTI, GetTLI);
+ LTOPhase, FS, GetAssumptionCache, GetTTI, GetTLI);
if (!SampleLoader.doInitialization(M, &FAM))
return PreservedAnalyses::all();
ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
- CallGraph &CG = AM.getResult<CallGraphAnalysis>(M);
- if (!SampleLoader.runOnModule(M, &AM, PSI, &CG))
+ LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
+ if (!SampleLoader.runOnModule(M, &AM, PSI, CG))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index c4844dbe7f3c..0a42de7224b4 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -13,6 +13,7 @@
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/EHUtils.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -32,7 +33,7 @@
#include <vector>
using namespace llvm;
-#define DEBUG_TYPE "sample-profile-probe"
+#define DEBUG_TYPE "pseudo-probe"
STATISTIC(ArtificialDbgLine,
"Number of probes that have an artificial debug line");
@@ -55,11 +56,7 @@ static uint64_t getCallStackHash(const DILocation *DIL) {
while (InlinedAt) {
Hash ^= MD5Hash(std::to_string(InlinedAt->getLine()));
Hash ^= MD5Hash(std::to_string(InlinedAt->getColumn()));
- const DISubprogram *SP = InlinedAt->getScope()->getSubprogram();
- // Use linkage name for C++ if possible.
- auto Name = SP->getLinkageName();
- if (Name.empty())
- Name = SP->getName();
+ auto Name = InlinedAt->getSubprogramLinkageName();
Hash ^= MD5Hash(Name);
InlinedAt = InlinedAt->getInlinedAt();
}
@@ -169,47 +166,6 @@ void PseudoProbeVerifier::verifyProbeFactors(
}
}
-PseudoProbeManager::PseudoProbeManager(const Module &M) {
- if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
- for (const auto *Operand : FuncInfo->operands()) {
- const auto *MD = cast<MDNode>(Operand);
- auto GUID =
- mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
- auto Hash =
- mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
- GUIDToProbeDescMap.try_emplace(GUID, PseudoProbeDescriptor(GUID, Hash));
- }
- }
-}
-
-const PseudoProbeDescriptor *
-PseudoProbeManager::getDesc(const Function &F) const {
- auto I = GUIDToProbeDescMap.find(
- Function::getGUID(FunctionSamples::getCanonicalFnName(F)));
- return I == GUIDToProbeDescMap.end() ? nullptr : &I->second;
-}
-
-bool PseudoProbeManager::moduleIsProbed(const Module &M) const {
- return M.getNamedMetadata(PseudoProbeDescMetadataName);
-}
-
-bool PseudoProbeManager::profileIsValid(const Function &F,
- const FunctionSamples &Samples) const {
- const auto *Desc = getDesc(F);
- if (!Desc) {
- LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function " << F.getName()
- << "\n");
- return false;
- } else {
- if (Desc->getFunctionHash() != Samples.getFunctionHash()) {
- LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName()
- << "\n");
- return false;
- }
- }
- return true;
-}
-
SampleProfileProber::SampleProfileProber(Function &Func,
const std::string &CurModuleUniqueId)
: F(&Func), CurModuleUniqueId(CurModuleUniqueId) {
@@ -253,8 +209,14 @@ void SampleProfileProber::computeCFGHash() {
}
void SampleProfileProber::computeProbeIdForBlocks() {
+ DenseSet<BasicBlock *> KnownColdBlocks;
+ computeEHOnlyBlocks(*F, KnownColdBlocks);
+ // Insert pseudo probe to non-cold blocks only. This will reduce IR size as
+ // well as the binary size while retaining the profile quality.
for (auto &BB : *F) {
- BlockProbeIds[&BB] = ++LastProbeId;
+ ++LastProbeId;
+ if (!KnownColdBlocks.contains(&BB))
+ BlockProbeIds[&BB] = LastProbeId;
}
}
@@ -283,9 +245,16 @@ uint32_t SampleProfileProber::getCallsiteId(const Instruction *Call) const {
void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
Module *M = F.getParent();
MDBuilder MDB(F.getContext());
- // Compute a GUID without considering the function's linkage type. This is
- // fine since function name is the only key in the profile database.
- uint64_t Guid = Function::getGUID(F.getName());
+ // Since the GUID from probe desc and inline stack are computed seperately, we
+ // need to make sure their names are consistent, so here also use the name
+ // from debug info.
+ StringRef FName = F.getName();
+ if (auto *SP = F.getSubprogram()) {
+ FName = SP->getLinkageName();
+ if (FName.empty())
+ FName = SP->getName();
+ }
+ uint64_t Guid = Function::getGUID(FName);
// Assign an artificial debug line to a probe that doesn't come with a real
// line. A probe not having a debug line will get an incomplete inline
@@ -339,6 +308,14 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
Builder.getInt64(PseudoProbeFullDistributionFactor)};
auto *Probe = Builder.CreateCall(ProbeFn, Args);
AssignDebugLoc(Probe);
+ // Reset the dwarf discriminator if the debug location comes with any. The
+ // discriminator field may be used by FS-AFDO later in the pipeline.
+ if (auto DIL = Probe->getDebugLoc()) {
+ if (DIL->getDiscriminator()) {
+ DIL = DIL->cloneWithDiscriminator(0);
+ Probe->setDebugLoc(DIL);
+ }
+ }
}
// Probe both direct calls and indirect calls. Direct calls are probed so that
@@ -351,12 +328,13 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
? (uint32_t)PseudoProbeType::DirectCall
: (uint32_t)PseudoProbeType::IndirectCall;
AssignDebugLoc(Call);
- // Levarge the 32-bit discriminator field of debug data to store the ID and
- // type of a callsite probe. This gets rid of the dependency on plumbing a
- // customized metadata through the codegen pipeline.
- uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
- Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
if (auto DIL = Call->getDebugLoc()) {
+ // Levarge the 32-bit discriminator field of debug data to store the ID
+ // and type of a callsite probe. This gets rid of the dependency on
+ // plumbing a customized metadata through the codegen pipeline.
+ uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+ Index, Type, 0,
+ PseudoProbeDwarfDiscriminator::FullDistributionFactor);
DIL = DIL->cloneWithDiscriminator(V);
Call->setDebugLoc(DIL);
}
@@ -368,28 +346,10 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
// - FunctionHash.
// - FunctionName
auto Hash = getFunctionHash();
- auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, &F);
+ auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, FName);
auto *NMD = M->getNamedMetadata(PseudoProbeDescMetadataName);
assert(NMD && "llvm.pseudo_probe_desc should be pre-created");
NMD->addOperand(MD);
-
- // Preserve a comdat group to hold all probes materialized later. This
- // allows that when the function is considered dead and removed, the
- // materialized probes are disposed too.
- // Imported functions are defined in another module. They do not need
- // the following handling since same care will be taken for them in their
- // original module. The pseudo probes inserted into an imported functions
- // above will naturally not be emitted since the imported function is free
- // from object emission. However they will be emitted together with the
- // inliner functions that the imported function is inlined into. We are not
- // creating a comdat group for an import function since it's useless anyway.
- if (!F.isDeclarationForLinker()) {
- if (TM) {
- auto Triple = TM->getTargetTriple();
- if (Triple.supportsCOMDAT() && TM->getFunctionSections())
- getOrCreateFunctionComdat(F, Triple);
- }
- }
}
PreservedAnalyses SampleProfileProbePass::run(Module &M,
diff --git a/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp b/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
index 0f2412dce1c9..53d5b18dcead 100644
--- a/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
+++ b/llvm/lib/Transforms/IPO/StripDeadPrototypes.cpp
@@ -16,8 +16,6 @@
#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
using namespace llvm;
@@ -56,30 +54,3 @@ PreservedAnalyses StripDeadPrototypesPass::run(Module &M,
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
-
-namespace {
-
-class StripDeadPrototypesLegacyPass : public ModulePass {
-public:
- static char ID; // Pass identification, replacement for typeid
- StripDeadPrototypesLegacyPass() : ModulePass(ID) {
- initializeStripDeadPrototypesLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
- bool runOnModule(Module &M) override {
- if (skipModule(M))
- return false;
-
- return stripDeadPrototypes(M);
- }
-};
-
-} // end anonymous namespace
-
-char StripDeadPrototypesLegacyPass::ID = 0;
-INITIALIZE_PASS(StripDeadPrototypesLegacyPass, "strip-dead-prototypes",
- "Strip Unused Function Prototypes", false, false)
-
-ModulePass *llvm::createStripDeadPrototypesPass() {
- return new StripDeadPrototypesLegacyPass();
-}
diff --git a/llvm/lib/Transforms/IPO/StripSymbols.cpp b/llvm/lib/Transforms/IPO/StripSymbols.cpp
index 34f8c4316cca..147513452789 100644
--- a/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -30,110 +30,12 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/TypeFinder.h"
#include "llvm/IR/ValueSymbolTable.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/StripSymbols.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
-namespace {
- class StripSymbols : public ModulePass {
- bool OnlyDebugInfo;
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit StripSymbols(bool ODI = false)
- : ModulePass(ID), OnlyDebugInfo(ODI) {
- initializeStripSymbolsPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
- };
-
- class StripNonDebugSymbols : public ModulePass {
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit StripNonDebugSymbols()
- : ModulePass(ID) {
- initializeStripNonDebugSymbolsPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
- };
-
- class StripDebugDeclare : public ModulePass {
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit StripDebugDeclare()
- : ModulePass(ID) {
- initializeStripDebugDeclarePass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
- };
-
- class StripDeadDebugInfo : public ModulePass {
- public:
- static char ID; // Pass identification, replacement for typeid
- explicit StripDeadDebugInfo()
- : ModulePass(ID) {
- initializeStripDeadDebugInfoPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
- };
-}
-
-char StripSymbols::ID = 0;
-INITIALIZE_PASS(StripSymbols, "strip",
- "Strip all symbols from a module", false, false)
-
-ModulePass *llvm::createStripSymbolsPass(bool OnlyDebugInfo) {
- return new StripSymbols(OnlyDebugInfo);
-}
-
-char StripNonDebugSymbols::ID = 0;
-INITIALIZE_PASS(StripNonDebugSymbols, "strip-nondebug",
- "Strip all symbols, except dbg symbols, from a module",
- false, false)
-
-ModulePass *llvm::createStripNonDebugSymbolsPass() {
- return new StripNonDebugSymbols();
-}
-
-char StripDebugDeclare::ID = 0;
-INITIALIZE_PASS(StripDebugDeclare, "strip-debug-declare",
- "Strip all llvm.dbg.declare intrinsics", false, false)
-
-ModulePass *llvm::createStripDebugDeclarePass() {
- return new StripDebugDeclare();
-}
-
-char StripDeadDebugInfo::ID = 0;
-INITIALIZE_PASS(StripDeadDebugInfo, "strip-dead-debug-info",
- "Strip debug info for unused symbols", false, false)
-
-ModulePass *llvm::createStripDeadDebugInfoPass() {
- return new StripDeadDebugInfo();
-}
-
/// OnlyUsedBy - Return true if V is only used by Usr.
static bool OnlyUsedBy(Value *V, Value *Usr) {
for (User *U : V->users())
@@ -234,24 +136,6 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
return true;
}
-bool StripSymbols::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
-
- bool Changed = false;
- Changed |= StripDebugInfo(M);
- if (!OnlyDebugInfo)
- Changed |= StripSymbolNames(M, false);
- return Changed;
-}
-
-bool StripNonDebugSymbols::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
-
- return StripSymbolNames(M, true);
-}
-
static bool stripDebugDeclareImpl(Module &M) {
Function *Declare = M.getFunction("llvm.dbg.declare");
@@ -290,50 +174,6 @@ static bool stripDebugDeclareImpl(Module &M) {
return true;
}
-bool StripDebugDeclare::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
- return stripDebugDeclareImpl(M);
-}
-
-/// Collects compilation units referenced by functions or lexical scopes.
-/// Accepts any DIScope and uses recursive bottom-up approach to reach either
-/// DISubprogram or DILexicalBlockBase.
-static void
-collectCUsWithScope(const DIScope *Scope, std::set<DICompileUnit *> &LiveCUs,
- SmallPtrSet<const DIScope *, 8> &VisitedScopes) {
- if (!Scope)
- return;
-
- auto InS = VisitedScopes.insert(Scope);
- if (!InS.second)
- return;
-
- if (const auto *SP = dyn_cast<DISubprogram>(Scope)) {
- if (SP->getUnit())
- LiveCUs.insert(SP->getUnit());
- return;
- }
- if (const auto *LB = dyn_cast<DILexicalBlockBase>(Scope)) {
- const DISubprogram *SP = LB->getSubprogram();
- if (SP && SP->getUnit())
- LiveCUs.insert(SP->getUnit());
- return;
- }
-
- collectCUsWithScope(Scope->getScope(), LiveCUs, VisitedScopes);
-}
-
-static void
-collectCUsForInlinedFuncs(const DILocation *Loc,
- std::set<DICompileUnit *> &LiveCUs,
- SmallPtrSet<const DIScope *, 8> &VisitedScopes) {
- if (!Loc || !Loc->getInlinedAt())
- return;
- collectCUsWithScope(Loc->getScope(), LiveCUs, VisitedScopes);
- collectCUsForInlinedFuncs(Loc->getInlinedAt(), LiveCUs, VisitedScopes);
-}
-
static bool stripDeadDebugInfoImpl(Module &M) {
bool Changed = false;
@@ -361,19 +201,15 @@ static bool stripDeadDebugInfoImpl(Module &M) {
}
std::set<DICompileUnit *> LiveCUs;
- SmallPtrSet<const DIScope *, 8> VisitedScopes;
- // Any CU is live if is referenced from a subprogram metadata that is attached
- // to a function defined or inlined in the module.
- for (const Function &Fn : M.functions()) {
- collectCUsWithScope(Fn.getSubprogram(), LiveCUs, VisitedScopes);
- for (const_inst_iterator I = inst_begin(&Fn), E = inst_end(&Fn); I != E;
- ++I) {
- if (!I->getDebugLoc())
- continue;
- const DILocation *DILoc = I->getDebugLoc().get();
- collectCUsForInlinedFuncs(DILoc, LiveCUs, VisitedScopes);
- }
+ DebugInfoFinder LiveCUFinder;
+ for (const Function &F : M.functions()) {
+ if (auto *SP = cast_or_null<DISubprogram>(F.getSubprogram()))
+ LiveCUFinder.processSubprogram(SP);
+ for (const Instruction &I : instructions(F))
+ LiveCUFinder.processInstruction(M, I);
}
+ auto FoundCUs = LiveCUFinder.compile_units();
+ LiveCUs.insert(FoundCUs.begin(), FoundCUs.end());
bool HasDeadCUs = false;
for (DICompileUnit *DIC : F.compile_units()) {
@@ -424,39 +260,34 @@ static bool stripDeadDebugInfoImpl(Module &M) {
return Changed;
}
-/// Remove any debug info for global variables/functions in the given module for
-/// which said global variable/function no longer exists (i.e. is null).
-///
-/// Debugging information is encoded in llvm IR using metadata. This is designed
-/// such a way that debug info for symbols preserved even if symbols are
-/// optimized away by the optimizer. This special pass removes debug info for
-/// such symbols.
-bool StripDeadDebugInfo::runOnModule(Module &M) {
- if (skipModule(M))
- return false;
- return stripDeadDebugInfoImpl(M);
-}
-
PreservedAnalyses StripSymbolsPass::run(Module &M, ModuleAnalysisManager &AM) {
StripDebugInfo(M);
StripSymbolNames(M, false);
- return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
PreservedAnalyses StripNonDebugSymbolsPass::run(Module &M,
ModuleAnalysisManager &AM) {
StripSymbolNames(M, true);
- return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
PreservedAnalyses StripDebugDeclarePass::run(Module &M,
ModuleAnalysisManager &AM) {
stripDebugDeclareImpl(M);
- return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
PreservedAnalyses StripDeadDebugInfoPass::run(Module &M,
ModuleAnalysisManager &AM) {
stripDeadDebugInfoImpl(M);
- return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
index 670097010085..fc1e70b1b3d3 100644
--- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
+++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -18,9 +18,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Object/ModuleSymbolTable.h"
-#include "llvm/Pass.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
@@ -148,6 +146,14 @@ void promoteTypeIds(Module &M, StringRef ModuleId) {
}
}
+ if (Function *TypeCheckedLoadRelativeFunc = M.getFunction(
+ Intrinsic::getName(Intrinsic::type_checked_load_relative))) {
+ for (const Use &U : TypeCheckedLoadRelativeFunc->uses()) {
+ auto CI = cast<CallInst>(U.getUser());
+ ExternalizeTypeId(CI, 2);
+ }
+ }
+
for (GlobalObject &GO : M.global_objects()) {
SmallVector<MDNode *, 1> MDs;
GO.getMetadata(LLVMContext::MD_type, MDs);
@@ -196,6 +202,13 @@ void simplifyExternals(Module &M) {
F.eraseFromParent();
}
+ for (GlobalIFunc &I : llvm::make_early_inc_range(M.ifuncs())) {
+ if (I.use_empty())
+ I.eraseFromParent();
+ else
+ assert(I.getResolverFunction() && "ifunc misses its resolver function");
+ }
+
for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
if (GV.isDeclaration() && GV.use_empty()) {
GV.eraseFromParent();
@@ -246,6 +259,16 @@ static void cloneUsedGlobalVariables(const Module &SrcM, Module &DestM,
appendToUsed(DestM, NewUsed);
}
+#ifndef NDEBUG
+static bool enableUnifiedLTO(Module &M) {
+ bool UnifiedLTO = false;
+ if (auto *MD =
+ mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("UnifiedLTO")))
+ UnifiedLTO = MD->getZExtValue();
+ return UnifiedLTO;
+}
+#endif
+
// If it's possible to split M into regular and thin LTO parts, do so and write
// a multi-module bitcode file with the two parts to OS. Otherwise, write only a
// regular LTO bitcode file to OS.
@@ -254,18 +277,20 @@ void splitAndWriteThinLTOBitcode(
function_ref<AAResults &(Function &)> AARGetter, Module &M) {
std::string ModuleId = getUniqueModuleId(&M);
if (ModuleId.empty()) {
+ assert(!enableUnifiedLTO(M));
// We couldn't generate a module ID for this module, write it out as a
// regular LTO module with an index for summary-based dead stripping.
ProfileSummaryInfo PSI(M);
M.addModuleFlag(Module::Error, "ThinLTO", uint32_t(0));
ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, &PSI);
- WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, &Index);
+ WriteBitcodeToFile(M, OS, /*ShouldPreserveUseListOrder=*/false, &Index,
+ /*UnifiedLTO=*/false);
if (ThinLinkOS)
// We don't have a ThinLTO part, but still write the module to the
// ThinLinkOS if requested so that the expected output file is produced.
WriteBitcodeToFile(M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false,
- &Index);
+ &Index, /*UnifiedLTO=*/false);
return;
}
@@ -503,15 +528,17 @@ bool hasTypeMetadata(Module &M) {
return false;
}
-void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
+bool writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
function_ref<AAResults &(Function &)> AARGetter,
Module &M, const ModuleSummaryIndex *Index) {
std::unique_ptr<ModuleSummaryIndex> NewIndex = nullptr;
// See if this module has any type metadata. If so, we try to split it
// or at least promote type ids to enable WPD.
if (hasTypeMetadata(M)) {
- if (enableSplitLTOUnit(M))
- return splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
+ if (enableSplitLTOUnit(M)) {
+ splitAndWriteThinLTOBitcode(OS, ThinLinkOS, AARGetter, M);
+ return true;
+ }
// Promote type ids as needed for index-based WPD.
std::string ModuleId = getUniqueModuleId(&M);
if (!ModuleId.empty()) {
@@ -544,6 +571,7 @@ void writeThinLTOBitcode(raw_ostream &OS, raw_ostream *ThinLinkOS,
// given OS.
if (ThinLinkOS && Index)
writeThinLinkBitcodeToFile(M, *ThinLinkOS, *Index, ModHash);
+ return false;
}
} // anonymous namespace
@@ -552,10 +580,11 @@ PreservedAnalyses
llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- writeThinLTOBitcode(OS, ThinLinkOS,
- [&FAM](Function &F) -> AAResults & {
- return FAM.getResult<AAManager>(F);
- },
- M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));
- return PreservedAnalyses::all();
+ bool Changed = writeThinLTOBitcode(
+ OS, ThinLinkOS,
+ [&FAM](Function &F) -> AAResults & {
+ return FAM.getResult<AAManager>(F);
+ },
+ M, &AM.getResult<ModuleSummaryIndexAnalysis>(M));
+ return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 487a0a4a97f7..d33258642365 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -58,7 +58,6 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/BasicAliasAnalysis.h"
@@ -84,9 +83,6 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSummaryIndexYAML.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Errc.h"
@@ -94,6 +90,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/GlobPattern.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/FunctionAttrs.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -259,7 +256,7 @@ wholeprogramdevirt::findLowestOffset(ArrayRef<VirtualCallTarget> Targets,
if (I < B.size())
BitsUsed |= B[I];
if (BitsUsed != 0xff)
- return (MinByte + I) * 8 + countTrailingZeros(uint8_t(~BitsUsed));
+ return (MinByte + I) * 8 + llvm::countr_zero(uint8_t(~BitsUsed));
}
} else {
// Find a free (Size/8) byte region in each member of Used.
@@ -313,9 +310,10 @@ void wholeprogramdevirt::setAfterReturnValues(
}
}
-VirtualCallTarget::VirtualCallTarget(Function *Fn, const TypeMemberInfo *TM)
+VirtualCallTarget::VirtualCallTarget(GlobalValue *Fn, const TypeMemberInfo *TM)
: Fn(Fn), TM(TM),
- IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()), WasDevirt(false) {}
+ IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()),
+ WasDevirt(false) {}
namespace {
@@ -379,6 +377,7 @@ namespace {
// conditions
// 1) All summaries are live.
// 2) All function summaries indicate it's unreachable
+// 3) There is no non-function with the same GUID (which is rare)
bool mustBeUnreachableFunction(ValueInfo TheFnVI) {
if ((!TheFnVI) || TheFnVI.getSummaryList().empty()) {
// Returns false if ValueInfo is absent, or the summary list is empty
@@ -391,12 +390,13 @@ bool mustBeUnreachableFunction(ValueInfo TheFnVI) {
// In general either all summaries should be live or all should be dead.
if (!Summary->isLive())
return false;
- if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) {
+ if (auto *FS = dyn_cast<FunctionSummary>(Summary->getBaseObject())) {
if (!FS->fflags().MustBeUnreachable)
return false;
}
- // Do nothing if a non-function has the same GUID (which is rare).
- // This is correct since non-function summaries are not relevant.
+ // Be conservative if a non-function has the same GUID (which is rare).
+ else
+ return false;
}
// All function summaries are live and all of them agree that the function is
// unreachble.
@@ -567,6 +567,10 @@ struct DevirtModule {
// optimize a call more than once.
SmallPtrSet<CallBase *, 8> OptimizedCalls;
+ // Store calls that had their ptrauth bundle removed. They are to be deleted
+ // at the end of the optimization.
+ SmallVector<CallBase *, 8> CallsWithPtrAuthBundleRemoved;
+
// This map keeps track of the number of "unsafe" uses of a loaded function
// pointer. The key is the associated llvm.type.test intrinsic call generated
// by this pass. An unsafe use is one that calls the loaded function pointer
@@ -761,7 +765,7 @@ PreservedAnalyses WholeProgramDevirtPass::run(Module &M,
return FAM.getResult<DominatorTreeAnalysis>(F);
};
if (UseCommandLine) {
- if (DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
+ if (!DevirtModule::runForTesting(M, AARGetter, OREGetter, LookupDomTree))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
@@ -892,8 +896,7 @@ static Error checkCombinedSummaryForTesting(ModuleSummaryIndex *Summary) {
// DevirtIndex::run, not to DevirtModule::run used by opt/runForTesting.
const auto &ModPaths = Summary->modulePaths();
if (ClSummaryAction != PassSummaryAction::Import &&
- ModPaths.find(ModuleSummaryIndex::getRegularLTOModuleName()) ==
- ModPaths.end())
+ !ModPaths.contains(ModuleSummaryIndex::getRegularLTOModuleName()))
return createStringError(
errc::invalid_argument,
"combined summary should contain Regular LTO module");
@@ -958,7 +961,7 @@ void DevirtModule::buildTypeIdentifierMap(
std::vector<VTableBits> &Bits,
DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
DenseMap<GlobalVariable *, VTableBits *> GVToBits;
- Bits.reserve(M.getGlobalList().size());
+ Bits.reserve(M.global_size());
SmallVector<MDNode *, 2> Types;
for (GlobalVariable &GV : M.globals()) {
Types.clear();
@@ -1003,11 +1006,17 @@ bool DevirtModule::tryFindVirtualCallTargets(
return false;
Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(),
- TM.Offset + ByteOffset, M);
+ TM.Offset + ByteOffset, M, TM.Bits->GV);
if (!Ptr)
return false;
- auto Fn = dyn_cast<Function>(Ptr->stripPointerCasts());
+ auto C = Ptr->stripPointerCasts();
+ // Make sure this is a function or alias to a function.
+ auto Fn = dyn_cast<Function>(C);
+ auto A = dyn_cast<GlobalAlias>(C);
+ if (!Fn && A)
+ Fn = dyn_cast<Function>(A->getAliasee());
+
if (!Fn)
return false;
@@ -1024,7 +1033,11 @@ bool DevirtModule::tryFindVirtualCallTargets(
if (mustBeUnreachableFunction(Fn, ExportSummary))
continue;
- TargetsForSlot.push_back({Fn, &TM});
+ // Save the symbol used in the vtable to use as the devirtualization
+ // target.
+ auto GV = dyn_cast<GlobalValue>(C);
+ assert(GV);
+ TargetsForSlot.push_back({GV, &TM});
}
// Give up if we couldn't find any targets.
@@ -1156,6 +1169,14 @@ void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
// !callees metadata.
CB.setMetadata(LLVMContext::MD_prof, nullptr);
CB.setMetadata(LLVMContext::MD_callees, nullptr);
+ if (CB.getCalledOperand() &&
+ CB.getOperandBundle(LLVMContext::OB_ptrauth)) {
+ auto *NewCS =
+ CallBase::removeOperandBundle(&CB, LLVMContext::OB_ptrauth, &CB);
+ CB.replaceAllUsesWith(NewCS);
+ // Schedule for deletion at the end of pass run.
+ CallsWithPtrAuthBundleRemoved.push_back(&CB);
+ }
}
// This use is no longer unsafe.
@@ -1205,7 +1226,7 @@ bool DevirtModule::trySingleImplDevirt(
WholeProgramDevirtResolution *Res) {
// See if the program contains a single implementation of this virtual
// function.
- Function *TheFn = TargetsForSlot[0].Fn;
+ auto *TheFn = TargetsForSlot[0].Fn;
for (auto &&Target : TargetsForSlot)
if (TheFn != Target.Fn)
return false;
@@ -1379,9 +1400,20 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
IsExported = true;
if (CSInfo.AllCallSitesDevirted)
return;
+
+ std::map<CallBase *, CallBase *> CallBases;
for (auto &&VCallSite : CSInfo.CallSites) {
CallBase &CB = VCallSite.CB;
+ if (CallBases.find(&CB) != CallBases.end()) {
+ // When finding devirtualizable calls, it's possible to find the same
+ // vtable passed to multiple llvm.type.test or llvm.type.checked.load
+ // calls, which can cause duplicate call sites to be recorded in
+ // [Const]CallSites. If we've already found one of these
+ // call instances, just ignore it. It will be replaced later.
+ continue;
+ }
+
// Jump tables are only profitable if the retpoline mitigation is enabled.
Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features");
if (!FSAttr.isValid() ||
@@ -1428,8 +1460,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
AttributeList::get(M.getContext(), Attrs.getFnAttrs(),
Attrs.getRetAttrs(), NewArgAttrs));
- CB.replaceAllUsesWith(NewCS);
- CB.eraseFromParent();
+ CallBases[&CB] = NewCS;
// This use is no longer unsafe.
if (VCallSite.NumUnsafeUses)
@@ -1439,6 +1470,11 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo,
// retpoline mitigation, which would mean that they are lowered to
// llvm.type.test and therefore require an llvm.type.test resolution for the
// type identifier.
+
+ std::for_each(CallBases.begin(), CallBases.end(), [](auto &CBs) {
+ CBs.first->replaceAllUsesWith(CBs.second);
+ CBs.first->eraseFromParent();
+ });
};
Apply(SlotInfo.CSInfo);
for (auto &P : SlotInfo.ConstCSInfo)
@@ -1451,23 +1487,30 @@ bool DevirtModule::tryEvaluateFunctionsWithArgs(
// Evaluate each function and store the result in each target's RetVal
// field.
for (VirtualCallTarget &Target : TargetsForSlot) {
- if (Target.Fn->arg_size() != Args.size() + 1)
+ // TODO: Skip for now if the vtable symbol was an alias to a function,
+ // need to evaluate whether it would be correct to analyze the aliasee
+ // function for this optimization.
+ auto Fn = dyn_cast<Function>(Target.Fn);
+ if (!Fn)
+ return false;
+
+ if (Fn->arg_size() != Args.size() + 1)
return false;
Evaluator Eval(M.getDataLayout(), nullptr);
SmallVector<Constant *, 2> EvalArgs;
EvalArgs.push_back(
- Constant::getNullValue(Target.Fn->getFunctionType()->getParamType(0)));
+ Constant::getNullValue(Fn->getFunctionType()->getParamType(0)));
for (unsigned I = 0; I != Args.size(); ++I) {
- auto *ArgTy = dyn_cast<IntegerType>(
- Target.Fn->getFunctionType()->getParamType(I + 1));
+ auto *ArgTy =
+ dyn_cast<IntegerType>(Fn->getFunctionType()->getParamType(I + 1));
if (!ArgTy)
return false;
EvalArgs.push_back(ConstantInt::get(ArgTy, Args[I]));
}
Constant *RetVal;
- if (!Eval.EvaluateFunction(Target.Fn, RetVal, EvalArgs) ||
+ if (!Eval.EvaluateFunction(Fn, RetVal, EvalArgs) ||
!isa<ConstantInt>(RetVal))
return false;
Target.RetVal = cast<ConstantInt>(RetVal)->getZExtValue();
@@ -1675,8 +1718,7 @@ void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
Call.replaceAndErase("virtual-const-prop-1-bit", FnName, RemarksEnabled,
OREGetter, IsBitSet);
} else {
- Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
- Value *Val = B.CreateLoad(RetType, ValAddr);
+ Value *Val = B.CreateLoad(RetType, Addr);
NumVirtConstProp++;
Call.replaceAndErase("virtual-const-prop", FnName, RemarksEnabled,
OREGetter, Val);
@@ -1688,8 +1730,14 @@ void DevirtModule::applyVirtualConstProp(CallSiteInfo &CSInfo, StringRef FnName,
bool DevirtModule::tryVirtualConstProp(
MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
WholeProgramDevirtResolution *Res, VTableSlot Slot) {
+ // TODO: Skip for now if the vtable symbol was an alias to a function,
+ // need to evaluate whether it would be correct to analyze the aliasee
+ // function for this optimization.
+ auto Fn = dyn_cast<Function>(TargetsForSlot[0].Fn);
+ if (!Fn)
+ return false;
// This only works if the function returns an integer.
- auto RetType = dyn_cast<IntegerType>(TargetsForSlot[0].Fn->getReturnType());
+ auto RetType = dyn_cast<IntegerType>(Fn->getReturnType());
if (!RetType)
return false;
unsigned BitWidth = RetType->getBitWidth();
@@ -1707,11 +1755,18 @@ bool DevirtModule::tryVirtualConstProp(
// inline all implementations of the virtual function into each call site,
// rather than using function attributes to perform local optimization.
for (VirtualCallTarget &Target : TargetsForSlot) {
- if (Target.Fn->isDeclaration() ||
- !computeFunctionBodyMemoryAccess(*Target.Fn, AARGetter(*Target.Fn))
+ // TODO: Skip for now if the vtable symbol was an alias to a function,
+ // need to evaluate whether it would be correct to analyze the aliasee
+ // function for this optimization.
+ auto Fn = dyn_cast<Function>(Target.Fn);
+ if (!Fn)
+ return false;
+
+ if (Fn->isDeclaration() ||
+ !computeFunctionBodyMemoryAccess(*Fn, AARGetter(*Fn))
.doesNotAccessMemory() ||
- Target.Fn->arg_empty() || !Target.Fn->arg_begin()->use_empty() ||
- Target.Fn->getReturnType() != RetType)
+ Fn->arg_empty() || !Fn->arg_begin()->use_empty() ||
+ Fn->getReturnType() != RetType)
return false;
}
@@ -1947,9 +2002,23 @@ void DevirtModule::scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc) {
// This helps avoid unnecessary spills.
IRBuilder<> LoadB(
(LoadedPtrs.size() == 1 && !HasNonCallUses) ? LoadedPtrs[0] : CI);
- Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset);
- Value *GEPPtr = LoadB.CreateBitCast(GEP, PointerType::getUnqual(Int8PtrTy));
- Value *LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEPPtr);
+
+ Value *LoadedValue = nullptr;
+ if (TypeCheckedLoadFunc->getIntrinsicID() ==
+ Intrinsic::type_checked_load_relative) {
+ Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset);
+ Value *GEPPtr = LoadB.CreateBitCast(GEP, PointerType::getUnqual(Int32Ty));
+ LoadedValue = LoadB.CreateLoad(Int32Ty, GEPPtr);
+ LoadedValue = LoadB.CreateSExt(LoadedValue, IntPtrTy);
+ GEP = LoadB.CreatePtrToInt(GEP, IntPtrTy);
+ LoadedValue = LoadB.CreateAdd(GEP, LoadedValue);
+ LoadedValue = LoadB.CreateIntToPtr(LoadedValue, Int8PtrTy);
+ } else {
+ Value *GEP = LoadB.CreateGEP(Int8Ty, Ptr, Offset);
+ Value *GEPPtr =
+ LoadB.CreateBitCast(GEP, PointerType::getUnqual(Int8PtrTy));
+ LoadedValue = LoadB.CreateLoad(Int8PtrTy, GEPPtr);
+ }
for (Instruction *LoadedPtr : LoadedPtrs) {
LoadedPtr->replaceAllUsesWith(LoadedValue);
@@ -2130,6 +2199,8 @@ bool DevirtModule::run() {
M.getFunction(Intrinsic::getName(Intrinsic::type_test));
Function *TypeCheckedLoadFunc =
M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
+ Function *TypeCheckedLoadRelativeFunc =
+ M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load_relative));
Function *AssumeFunc = M.getFunction(Intrinsic::getName(Intrinsic::assume));
// Normally if there are no users of the devirtualization intrinsics in the
@@ -2138,7 +2209,9 @@ bool DevirtModule::run() {
if (!ExportSummary &&
(!TypeTestFunc || TypeTestFunc->use_empty() || !AssumeFunc ||
AssumeFunc->use_empty()) &&
- (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
+ (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()) &&
+ (!TypeCheckedLoadRelativeFunc ||
+ TypeCheckedLoadRelativeFunc->use_empty()))
return false;
// Rebuild type metadata into a map for easy lookup.
@@ -2152,6 +2225,9 @@ bool DevirtModule::run() {
if (TypeCheckedLoadFunc)
scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
+ if (TypeCheckedLoadRelativeFunc)
+ scanTypeCheckedLoadUsers(TypeCheckedLoadRelativeFunc);
+
if (ImportSummary) {
for (auto &S : CallSlots)
importResolution(S.first, S.second);
@@ -2219,7 +2295,7 @@ bool DevirtModule::run() {
// For each (type, offset) pair:
bool DidVirtualConstProp = false;
- std::map<std::string, Function*> DevirtTargets;
+ std::map<std::string, GlobalValue *> DevirtTargets;
for (auto &S : CallSlots) {
// Search each of the members of the type identifier for the virtual
// function implementation at offset S.first.ByteOffset, and add to
@@ -2274,7 +2350,14 @@ bool DevirtModule::run() {
if (RemarksEnabled) {
// Generate remarks for each devirtualized function.
for (const auto &DT : DevirtTargets) {
- Function *F = DT.second;
+ GlobalValue *GV = DT.second;
+ auto F = dyn_cast<Function>(GV);
+ if (!F) {
+ auto A = dyn_cast<GlobalAlias>(GV);
+ assert(A && isa<Function>(A->getAliasee()));
+ F = dyn_cast<Function>(A->getAliasee());
+ assert(F);
+ }
using namespace ore;
OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
@@ -2299,6 +2382,9 @@ bool DevirtModule::run() {
for (GlobalVariable &GV : M.globals())
GV.eraseMetadata(LLVMContext::MD_vcall_visibility);
+ for (auto *CI : CallsWithPtrAuthBundleRemoved)
+ CI->eraseFromParent();
+
return true;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index b68efc993723..91ca44e0f11e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -797,7 +797,7 @@ static Value *checkForNegativeOperand(BinaryOperator &I,
// LHS = XOR(Y, C1), Y = AND(Z, C2), C1 == (C2 + 1) => LHS == NEG(OR(Z, ~C2))
// ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2))
if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1))))
- if (C1->countTrailingZeros() == 0)
+ if (C1->countr_zero() == 0)
if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) {
Value *NewOr = Builder.CreateOr(Z, ~(*C2));
return Builder.CreateSub(RHS, NewOr, "sub");
@@ -880,8 +880,15 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
return SelectInst::Create(X, InstCombiner::SubOne(Op1C), Op1);
// ~X + C --> (C-1) - X
- if (match(Op0, m_Not(m_Value(X))))
- return BinaryOperator::CreateSub(InstCombiner::SubOne(Op1C), X);
+ if (match(Op0, m_Not(m_Value(X)))) {
+ // ~X + C has NSW and (C-1) won't oveflow => (C-1)-X can have NSW
+ auto *COne = ConstantInt::get(Op1C->getType(), 1);
+ bool WillNotSOV = willNotOverflowSignedSub(Op1C, COne, Add);
+ BinaryOperator *Res =
+ BinaryOperator::CreateSub(ConstantExpr::getSub(Op1C, COne), X);
+ Res->setHasNoSignedWrap(Add.hasNoSignedWrap() && WillNotSOV);
+ return Res;
+ }
// (iN X s>> (N - 1)) + 1 --> zext (X > -1)
const APInt *C;
@@ -975,6 +982,16 @@ Instruction *InstCombinerImpl::foldAddWithConstant(BinaryOperator &Add) {
}
}
+ // Fold (add (zext (add X, -1)), 1) -> (zext X) if X is non-zero.
+ // TODO: There's a general form for any constant on the outer add.
+ if (C->isOne()) {
+ if (match(Op0, m_ZExt(m_Add(m_Value(X), m_AllOnes())))) {
+ const SimplifyQuery Q = SQ.getWithInstruction(&Add);
+ if (llvm::isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT))
+ return new ZExtInst(X, Ty);
+ }
+ }
+
return nullptr;
}
@@ -1366,6 +1383,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
if (Instruction *X = foldNoWrapAdd(I, Builder))
return X;
+ if (Instruction *R = foldBinOpShiftWithShift(I))
+ return R;
+
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
Type *Ty = I.getType();
if (Ty->isIntOrIntVectorTy(1))
@@ -1421,6 +1441,14 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
Value *Sub = Builder.CreateSub(A, B);
return BinaryOperator::CreateAdd(Sub, ConstantExpr::getAdd(C1, C2));
}
+
+ // Canonicalize a constant sub operand as an add operand for better folding:
+ // (C1 - A) + B --> (B - A) + C1
+ if (match(&I, m_c_Add(m_OneUse(m_Sub(m_ImmConstant(C1), m_Value(A))),
+ m_Value(B)))) {
+ Value *Sub = Builder.CreateSub(B, A, "reass.sub");
+ return BinaryOperator::CreateAdd(Sub, C1);
+ }
}
// X % C0 + (( X / C0 ) % C1) * C0 => X % (C0 * C1)
@@ -1439,7 +1467,7 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
// (A & 2^C1) + A => A & (2^C1 - 1) iff bit C1 in A is a sign bit
if (match(&I, m_c_Add(m_And(m_Value(A), m_APInt(C1)), m_Deferred(A))) &&
- C1->isPowerOf2() && (ComputeNumSignBits(A) > C1->countLeadingZeros())) {
+ C1->isPowerOf2() && (ComputeNumSignBits(A) > C1->countl_zero())) {
Constant *NewMask = ConstantInt::get(RHS->getType(), *C1 - 1);
return BinaryOperator::CreateAnd(A, NewMask);
}
@@ -1451,6 +1479,11 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
match(RHS, m_ZExt(m_NUWSub(m_Value(B), m_Specific(A))))))
return new ZExtInst(B, LHS->getType());
+ // zext(A) + sext(A) --> 0 if A is i1
+ if (match(&I, m_c_BinOp(m_ZExt(m_Value(A)), m_SExt(m_Deferred(A)))) &&
+ A->getType()->isIntOrIntVectorTy(1))
+ return replaceInstUsesWith(I, Constant::getNullValue(I.getType()));
+
// A+B --> A|B iff A and B have no bits set in common.
if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))
return BinaryOperator::CreateOr(LHS, RHS);
@@ -1515,7 +1548,7 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
const APInt *NegPow2C;
if (match(&I, m_c_Add(m_OneUse(m_Mul(m_Value(A), m_NegatedPower2(NegPow2C))),
m_Value(B)))) {
- Constant *ShiftAmtC = ConstantInt::get(Ty, NegPow2C->countTrailingZeros());
+ Constant *ShiftAmtC = ConstantInt::get(Ty, NegPow2C->countr_zero());
Value *Shl = Builder.CreateShl(A, ShiftAmtC);
return BinaryOperator::CreateSub(B, Shl);
}
@@ -1536,6 +1569,13 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
if (Instruction *Ashr = foldAddToAshr(I))
return Ashr;
+ // min(A, B) + max(A, B) => A + B.
+ if (match(&I, m_CombineOr(m_c_Add(m_SMax(m_Value(A), m_Value(B)),
+ m_c_SMin(m_Deferred(A), m_Deferred(B))),
+ m_c_Add(m_UMax(m_Value(A), m_Value(B)),
+ m_c_UMin(m_Deferred(A), m_Deferred(B))))))
+ return BinaryOperator::CreateWithCopiedFlags(Instruction::Add, A, B, &I);
+
// TODO(jingyue): Consider willNotOverflowSignedAdd and
// willNotOverflowUnsignedAdd to reduce the number of invocations of
// computeKnownBits.
@@ -1575,6 +1615,12 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
{Builder.CreateOr(A, B)}));
+ if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
+ return Res;
+
+ if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
+ return Res;
+
return Changed ? &I : nullptr;
}
@@ -1786,6 +1832,20 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
}
+ // minumum(X, Y) + maximum(X, Y) => X + Y.
+ if (match(&I,
+ m_c_FAdd(m_Intrinsic<Intrinsic::maximum>(m_Value(X), m_Value(Y)),
+ m_c_Intrinsic<Intrinsic::minimum>(m_Deferred(X),
+ m_Deferred(Y))))) {
+ BinaryOperator *Result = BinaryOperator::CreateFAddFMF(X, Y, &I);
+ // We cannot preserve ninf if nnan flag is not set.
+ // If X is NaN and Y is Inf then in original program we had NaN + NaN,
+ // while in optimized version NaN + Inf and this is a poison with ninf flag.
+ if (!Result->hasNoNaNs())
+ Result->setHasNoInfs(false);
+ return Result;
+ }
+
return nullptr;
}
@@ -1956,8 +2016,17 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
Constant *C2;
// C-(X+C2) --> (C-C2)-X
- if (match(Op1, m_Add(m_Value(X), m_ImmConstant(C2))))
- return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
+ if (match(Op1, m_Add(m_Value(X), m_ImmConstant(C2)))) {
+ // C-C2 never overflow, and C-(X+C2), (X+C2) has NSW
+ // => (C-C2)-X can have NSW
+ bool WillNotSOV = willNotOverflowSignedSub(C, C2, I);
+ BinaryOperator *Res =
+ BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X);
+ auto *OBO1 = cast<OverflowingBinaryOperator>(Op1);
+ Res->setHasNoSignedWrap(I.hasNoSignedWrap() && OBO1->hasNoSignedWrap() &&
+ WillNotSOV);
+ return Res;
+ }
}
auto TryToNarrowDeduceFlags = [this, &I, &Op0, &Op1]() -> Instruction * {
@@ -2325,7 +2394,7 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
const APInt *AddC, *AndC;
if (match(Op0, m_Add(m_Value(X), m_APInt(AddC))) &&
match(Op1, m_And(m_Specific(X), m_APInt(AndC)))) {
- unsigned Cttz = AddC->countTrailingZeros();
+ unsigned Cttz = AddC->countr_zero();
APInt HighMask(APInt::getHighBitsSet(BitWidth, BitWidth - Cttz));
if ((HighMask & *AndC).isZero())
return BinaryOperator::CreateAnd(Op0, ConstantInt::get(Ty, ~(*AndC)));
@@ -2388,6 +2457,21 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
return replaceInstUsesWith(I, Mul);
}
+ // max(X,Y) nsw/nuw - min(X,Y) --> abs(X nsw - Y)
+ if (match(Op0, m_OneUse(m_c_SMax(m_Value(X), m_Value(Y)))) &&
+ match(Op1, m_OneUse(m_c_SMin(m_Specific(X), m_Specific(Y))))) {
+ if (I.hasNoUnsignedWrap() || I.hasNoSignedWrap()) {
+ Value *Sub =
+ Builder.CreateSub(X, Y, "sub", /*HasNUW=*/false, /*HasNSW=*/true);
+ Value *Call =
+ Builder.CreateBinaryIntrinsic(Intrinsic::abs, Sub, Builder.getTrue());
+ return replaceInstUsesWith(I, Call);
+ }
+ }
+
+ if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
+ return Res;
+
return TryToNarrowDeduceFlags();
}
@@ -2567,7 +2651,7 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) {
// Note that if this fsub was really an fneg, the fadd with -0.0 will get
// killed later. We still limit that particular transform with 'hasOneUse'
// because an fneg is assumed better/cheaper than a generic fsub.
- if (I.hasNoSignedZeros() || CannotBeNegativeZero(Op0, SQ.TLI)) {
+ if (I.hasNoSignedZeros() || cannotBeNegativeZero(Op0, SQ.DL, SQ.TLI)) {
if (match(Op1, m_OneUse(m_FSub(m_Value(X), m_Value(Y))))) {
Value *NewSub = Builder.CreateFSubFMF(Y, X, &I);
return BinaryOperator::CreateFAddFMF(Op0, NewSub, &I);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 97a001b2ed32..8a1fb6b7f17e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -625,7 +625,8 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
return RHS;
}
- if (Mask & BMask_Mixed) {
+ if (Mask & (BMask_Mixed | BMask_NotMixed)) {
+ // Mixed:
// (icmp eq (A & B), C) & (icmp eq (A & D), E)
// We already know that B & C == C && D & E == E.
// If we can prove that (B & D) & (C ^ E) == 0, that is, the bits of
@@ -636,24 +637,50 @@ static Value *foldLogOpOfMaskedICmps(ICmpInst *LHS, ICmpInst *RHS, bool IsAnd,
// We can't simply use C and E because we might actually handle
// (icmp ne (A & B), B) & (icmp eq (A & D), D)
// with B and D, having a single bit set.
+
+ // NotMixed:
+ // (icmp ne (A & B), C) & (icmp ne (A & D), E)
+ // -> (icmp ne (A & (B & D)), (C & E))
+ // Check the intersection (B & D) for inequality.
+ // Assume that (B & D) == B || (B & D) == D, i.e B/D is a subset of D/B
+ // and (B & D) & (C ^ E) == 0, bits of C and E, which are shared by both the
+ // B and the D, don't contradict.
+ // Note that we can assume (~B & C) == 0 && (~D & E) == 0, previous
+ // operation should delete these icmps if it hadn't been met.
+
const APInt *OldConstC, *OldConstE;
if (!match(C, m_APInt(OldConstC)) || !match(E, m_APInt(OldConstE)))
return nullptr;
- const APInt ConstC = PredL != NewCC ? *ConstB ^ *OldConstC : *OldConstC;
- const APInt ConstE = PredR != NewCC ? *ConstD ^ *OldConstE : *OldConstE;
+ auto FoldBMixed = [&](ICmpInst::Predicate CC, bool IsNot) -> Value * {
+ CC = IsNot ? CmpInst::getInversePredicate(CC) : CC;
+ const APInt ConstC = PredL != CC ? *ConstB ^ *OldConstC : *OldConstC;
+ const APInt ConstE = PredR != CC ? *ConstD ^ *OldConstE : *OldConstE;
- // If there is a conflict, we should actually return a false for the
- // whole construct.
- if (((*ConstB & *ConstD) & (ConstC ^ ConstE)).getBoolValue())
- return ConstantInt::get(LHS->getType(), !IsAnd);
+ if (((*ConstB & *ConstD) & (ConstC ^ ConstE)).getBoolValue())
+ return IsNot ? nullptr : ConstantInt::get(LHS->getType(), !IsAnd);
- Value *NewOr1 = Builder.CreateOr(B, D);
- Value *NewAnd = Builder.CreateAnd(A, NewOr1);
- Constant *NewOr2 = ConstantInt::get(A->getType(), ConstC | ConstE);
- return Builder.CreateICmp(NewCC, NewAnd, NewOr2);
- }
+ if (IsNot && !ConstB->isSubsetOf(*ConstD) && !ConstD->isSubsetOf(*ConstB))
+ return nullptr;
+ APInt BD, CE;
+ if (IsNot) {
+ BD = *ConstB & *ConstD;
+ CE = ConstC & ConstE;
+ } else {
+ BD = *ConstB | *ConstD;
+ CE = ConstC | ConstE;
+ }
+ Value *NewAnd = Builder.CreateAnd(A, BD);
+ Value *CEVal = ConstantInt::get(A->getType(), CE);
+ return Builder.CreateICmp(CC, CEVal, NewAnd);
+ };
+
+ if (Mask & BMask_Mixed)
+ return FoldBMixed(NewCC, false);
+ if (Mask & BMask_NotMixed) // can be else also
+ return FoldBMixed(NewCC, true);
+ }
return nullptr;
}
@@ -928,6 +955,108 @@ static Value *foldIsPowerOf2(ICmpInst *Cmp0, ICmpInst *Cmp1, bool JoinedByAnd,
return nullptr;
}
+/// Try to fold (icmp(A & B) == 0) & (icmp(A & D) != E) into (icmp A u< D) iff
+/// B is a contiguous set of ones starting from the most significant bit
+/// (negative power of 2), D and E are equal, and D is a contiguous set of ones
+/// starting at the most significant zero bit in B. Parameter B supports masking
+/// using undef/poison in either scalar or vector values.
+static Value *foldNegativePower2AndShiftedMask(
+ Value *A, Value *B, Value *D, Value *E, ICmpInst::Predicate PredL,
+ ICmpInst::Predicate PredR, InstCombiner::BuilderTy &Builder) {
+ assert(ICmpInst::isEquality(PredL) && ICmpInst::isEquality(PredR) &&
+ "Expected equality predicates for masked type of icmps.");
+ if (PredL != ICmpInst::ICMP_EQ || PredR != ICmpInst::ICMP_NE)
+ return nullptr;
+
+ if (!match(B, m_NegatedPower2()) || !match(D, m_ShiftedMask()) ||
+ !match(E, m_ShiftedMask()))
+ return nullptr;
+
+ // Test scalar arguments for conversion. B has been validated earlier to be a
+ // negative power of two and thus is guaranteed to have one or more contiguous
+ // ones starting from the MSB followed by zero or more contiguous zeros. D has
+ // been validated earlier to be a shifted set of one or more contiguous ones.
+ // In order to match, B leading ones and D leading zeros should be equal. The
+ // predicate that B be a negative power of 2 prevents the condition of there
+ // ever being zero leading ones. Thus 0 == 0 cannot occur. The predicate that
+ // D always be a shifted mask prevents the condition of D equaling 0. This
+ // prevents matching the condition where B contains the maximum number of
+ // leading one bits (-1) and D contains the maximum number of leading zero
+ // bits (0).
+ auto isReducible = [](const Value *B, const Value *D, const Value *E) {
+ const APInt *BCst, *DCst, *ECst;
+ return match(B, m_APIntAllowUndef(BCst)) && match(D, m_APInt(DCst)) &&
+ match(E, m_APInt(ECst)) && *DCst == *ECst &&
+ (isa<UndefValue>(B) ||
+ (BCst->countLeadingOnes() == DCst->countLeadingZeros()));
+ };
+
+ // Test vector type arguments for conversion.
+ if (const auto *BVTy = dyn_cast<VectorType>(B->getType())) {
+ const auto *BFVTy = dyn_cast<FixedVectorType>(BVTy);
+ const auto *BConst = dyn_cast<Constant>(B);
+ const auto *DConst = dyn_cast<Constant>(D);
+ const auto *EConst = dyn_cast<Constant>(E);
+
+ if (!BFVTy || !BConst || !DConst || !EConst)
+ return nullptr;
+
+ for (unsigned I = 0; I != BFVTy->getNumElements(); ++I) {
+ const auto *BElt = BConst->getAggregateElement(I);
+ const auto *DElt = DConst->getAggregateElement(I);
+ const auto *EElt = EConst->getAggregateElement(I);
+
+ if (!BElt || !DElt || !EElt)
+ return nullptr;
+ if (!isReducible(BElt, DElt, EElt))
+ return nullptr;
+ }
+ } else {
+ // Test scalar type arguments for conversion.
+ if (!isReducible(B, D, E))
+ return nullptr;
+ }
+ return Builder.CreateICmp(ICmpInst::ICMP_ULT, A, D);
+}
+
+/// Try to fold ((icmp X u< P) & (icmp(X & M) != M)) or ((icmp X s> -1) &
+/// (icmp(X & M) != M)) into (icmp X u< M). Where P is a power of 2, M < P, and
+/// M is a contiguous shifted mask starting at the right most significant zero
+/// bit in P. SGT is supported as when P is the largest representable power of
+/// 2, an earlier optimization converts the expression into (icmp X s> -1).
+/// Parameter P supports masking using undef/poison in either scalar or vector
+/// values.
+static Value *foldPowerOf2AndShiftedMask(ICmpInst *Cmp0, ICmpInst *Cmp1,
+ bool JoinedByAnd,
+ InstCombiner::BuilderTy &Builder) {
+ if (!JoinedByAnd)
+ return nullptr;
+ Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr, *E = nullptr;
+ ICmpInst::Predicate CmpPred0 = Cmp0->getPredicate(),
+ CmpPred1 = Cmp1->getPredicate();
+ // Assuming P is a 2^n, getMaskedTypeForICmpPair will normalize (icmp X u<
+ // 2^n) into (icmp (X & ~(2^n-1)) == 0) and (icmp X s> -1) into (icmp (X &
+ // SignMask) == 0).
+ std::optional<std::pair<unsigned, unsigned>> MaskPair =
+ getMaskedTypeForICmpPair(A, B, C, D, E, Cmp0, Cmp1, CmpPred0, CmpPred1);
+ if (!MaskPair)
+ return nullptr;
+
+ const auto compareBMask = BMask_NotMixed | BMask_NotAllOnes;
+ unsigned CmpMask0 = MaskPair->first;
+ unsigned CmpMask1 = MaskPair->second;
+ if ((CmpMask0 & Mask_AllZeros) && (CmpMask1 == compareBMask)) {
+ if (Value *V = foldNegativePower2AndShiftedMask(A, B, D, E, CmpPred0,
+ CmpPred1, Builder))
+ return V;
+ } else if ((CmpMask0 == compareBMask) && (CmpMask1 & Mask_AllZeros)) {
+ if (Value *V = foldNegativePower2AndShiftedMask(A, D, B, C, CmpPred1,
+ CmpPred0, Builder))
+ return V;
+ }
+ return nullptr;
+}
+
/// Commuted variants are assumed to be handled by calling this function again
/// with the parameters swapped.
static Value *foldUnsignedUnderflowCheck(ICmpInst *ZeroICmp,
@@ -1313,9 +1442,44 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
return Right;
}
+ // Turn at least two fcmps with constants into llvm.is.fpclass.
+ //
+ // If we can represent a combined value test with one class call, we can
+ // potentially eliminate 4-6 instructions. If we can represent a test with a
+ // single fcmp with fneg and fabs, that's likely a better canonical form.
+ if (LHS->hasOneUse() && RHS->hasOneUse()) {
+ auto [ClassValRHS, ClassMaskRHS] =
+ fcmpToClassTest(PredR, *RHS->getFunction(), RHS0, RHS1);
+ if (ClassValRHS) {
+ auto [ClassValLHS, ClassMaskLHS] =
+ fcmpToClassTest(PredL, *LHS->getFunction(), LHS0, LHS1);
+ if (ClassValLHS == ClassValRHS) {
+ unsigned CombinedMask = IsAnd ? (ClassMaskLHS & ClassMaskRHS)
+ : (ClassMaskLHS | ClassMaskRHS);
+ return Builder.CreateIntrinsic(
+ Intrinsic::is_fpclass, {ClassValLHS->getType()},
+ {ClassValLHS, Builder.getInt32(CombinedMask)});
+ }
+ }
+ }
+
return nullptr;
}
+/// Match an fcmp against a special value that performs a test possible by
+/// llvm.is.fpclass.
+static bool matchIsFPClassLikeFCmp(Value *Op, Value *&ClassVal,
+ uint64_t &ClassMask) {
+ auto *FCmp = dyn_cast<FCmpInst>(Op);
+ if (!FCmp || !FCmp->hasOneUse())
+ return false;
+
+ std::tie(ClassVal, ClassMask) =
+ fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
+ FCmp->getOperand(0), FCmp->getOperand(1));
+ return ClassVal != nullptr;
+}
+
/// or (is_fpclass x, mask0), (is_fpclass x, mask1)
/// -> is_fpclass x, (mask0 | mask1)
/// and (is_fpclass x, mask0), (is_fpclass x, mask1)
@@ -1324,13 +1488,25 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
/// -> is_fpclass x, (mask0 ^ mask1)
Instruction *InstCombinerImpl::foldLogicOfIsFPClass(BinaryOperator &BO,
Value *Op0, Value *Op1) {
- Value *ClassVal;
+ Value *ClassVal0 = nullptr;
+ Value *ClassVal1 = nullptr;
uint64_t ClassMask0, ClassMask1;
- if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>(
- m_Value(ClassVal), m_ConstantInt(ClassMask0)))) &&
+ // Restrict to folding one fcmp into one is.fpclass for now, don't introduce a
+ // new class.
+ //
+ // TODO: Support forming is.fpclass out of 2 separate fcmps when codegen is
+ // better.
+
+ bool IsLHSClass =
+ match(Op0, m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>(
+ m_Value(ClassVal0), m_ConstantInt(ClassMask0))));
+ bool IsRHSClass =
match(Op1, m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>(
- m_Specific(ClassVal), m_ConstantInt(ClassMask1))))) {
+ m_Value(ClassVal1), m_ConstantInt(ClassMask1))));
+ if ((((IsLHSClass || matchIsFPClassLikeFCmp(Op0, ClassVal0, ClassMask0)) &&
+ (IsRHSClass || matchIsFPClassLikeFCmp(Op1, ClassVal1, ClassMask1)))) &&
+ ClassVal0 == ClassVal1) {
unsigned NewClassMask;
switch (BO.getOpcode()) {
case Instruction::And:
@@ -1346,11 +1522,24 @@ Instruction *InstCombinerImpl::foldLogicOfIsFPClass(BinaryOperator &BO,
llvm_unreachable("not a binary logic operator");
}
- // TODO: Also check for special fcmps
- auto *II = cast<IntrinsicInst>(Op0);
- II->setArgOperand(
- 1, ConstantInt::get(II->getArgOperand(1)->getType(), NewClassMask));
- return replaceInstUsesWith(BO, II);
+ if (IsLHSClass) {
+ auto *II = cast<IntrinsicInst>(Op0);
+ II->setArgOperand(
+ 1, ConstantInt::get(II->getArgOperand(1)->getType(), NewClassMask));
+ return replaceInstUsesWith(BO, II);
+ }
+
+ if (IsRHSClass) {
+ auto *II = cast<IntrinsicInst>(Op1);
+ II->setArgOperand(
+ 1, ConstantInt::get(II->getArgOperand(1)->getType(), NewClassMask));
+ return replaceInstUsesWith(BO, II);
+ }
+
+ CallInst *NewClass =
+ Builder.CreateIntrinsic(Intrinsic::is_fpclass, {ClassVal0->getType()},
+ {ClassVal0, Builder.getInt32(NewClassMask)});
+ return replaceInstUsesWith(BO, NewClass);
}
return nullptr;
@@ -1523,6 +1712,39 @@ Instruction *InstCombinerImpl::foldCastedBitwiseLogic(BinaryOperator &I) {
assert(I.isBitwiseLogicOp() && "Unexpected opcode for bitwise logic folding");
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // fold bitwise(A >> BW - 1, zext(icmp)) (BW is the scalar bits of the
+ // type of A)
+ // -> bitwise(zext(A < 0), zext(icmp))
+ // -> zext(bitwise(A < 0, icmp))
+ auto FoldBitwiseICmpZeroWithICmp = [&](Value *Op0,
+ Value *Op1) -> Instruction * {
+ ICmpInst::Predicate Pred;
+ Value *A;
+ bool IsMatched =
+ match(Op0,
+ m_OneUse(m_LShr(
+ m_Value(A),
+ m_SpecificInt(Op0->getType()->getScalarSizeInBits() - 1)))) &&
+ match(Op1, m_OneUse(m_ZExt(m_ICmp(Pred, m_Value(), m_Value()))));
+
+ if (!IsMatched)
+ return nullptr;
+
+ auto *ICmpL =
+ Builder.CreateICmpSLT(A, Constant::getNullValue(A->getType()));
+ auto *ICmpR = cast<ZExtInst>(Op1)->getOperand(0);
+ auto *BitwiseOp = Builder.CreateBinOp(LogicOpc, ICmpL, ICmpR);
+
+ return new ZExtInst(BitwiseOp, Op0->getType());
+ };
+
+ if (auto *Ret = FoldBitwiseICmpZeroWithICmp(Op0, Op1))
+ return Ret;
+
+ if (auto *Ret = FoldBitwiseICmpZeroWithICmp(Op1, Op0))
+ return Ret;
+
CastInst *Cast0 = dyn_cast<CastInst>(Op0);
if (!Cast0)
return nullptr;
@@ -1906,16 +2128,16 @@ static Instruction *canonicalizeLogicFirst(BinaryOperator &I,
return nullptr;
unsigned Width = Ty->getScalarSizeInBits();
- unsigned LastOneMath = Width - C2->countTrailingZeros();
+ unsigned LastOneMath = Width - C2->countr_zero();
switch (OpC) {
case Instruction::And:
- if (C->countLeadingOnes() < LastOneMath)
+ if (C->countl_one() < LastOneMath)
return nullptr;
break;
case Instruction::Xor:
case Instruction::Or:
- if (C->countLeadingZeros() < LastOneMath)
+ if (C->countl_zero() < LastOneMath)
return nullptr;
break;
default:
@@ -1923,7 +2145,51 @@ static Instruction *canonicalizeLogicFirst(BinaryOperator &I,
}
Value *NewBinOp = Builder.CreateBinOp(OpC, X, ConstantInt::get(Ty, *C));
- return BinaryOperator::CreateAdd(NewBinOp, ConstantInt::get(Ty, *C2));
+ return BinaryOperator::CreateWithCopiedFlags(Instruction::Add, NewBinOp,
+ ConstantInt::get(Ty, *C2), Op0);
+}
+
+// binop(shift(ShiftedC1, ShAmt), shift(ShiftedC2, add(ShAmt, AddC))) ->
+// shift(binop(ShiftedC1, shift(ShiftedC2, AddC)), ShAmt)
+// where both shifts are the same and AddC is a valid shift amount.
+Instruction *InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator &I) {
+ assert((I.isBitwiseLogicOp() || I.getOpcode() == Instruction::Add) &&
+ "Unexpected opcode");
+
+ Value *ShAmt;
+ Constant *ShiftedC1, *ShiftedC2, *AddC;
+ Type *Ty = I.getType();
+ unsigned BitWidth = Ty->getScalarSizeInBits();
+ if (!match(&I,
+ m_c_BinOp(m_Shift(m_ImmConstant(ShiftedC1), m_Value(ShAmt)),
+ m_Shift(m_ImmConstant(ShiftedC2),
+ m_Add(m_Deferred(ShAmt), m_ImmConstant(AddC))))))
+ return nullptr;
+
+ // Make sure the add constant is a valid shift amount.
+ if (!match(AddC,
+ m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, APInt(BitWidth, BitWidth))))
+ return nullptr;
+
+ // Avoid constant expressions.
+ auto *Op0Inst = dyn_cast<Instruction>(I.getOperand(0));
+ auto *Op1Inst = dyn_cast<Instruction>(I.getOperand(1));
+ if (!Op0Inst || !Op1Inst)
+ return nullptr;
+
+ // Both shifts must be the same.
+ Instruction::BinaryOps ShiftOp =
+ static_cast<Instruction::BinaryOps>(Op0Inst->getOpcode());
+ if (ShiftOp != Op1Inst->getOpcode())
+ return nullptr;
+
+ // For adds, only left shifts are supported.
+ if (I.getOpcode() == Instruction::Add && ShiftOp != Instruction::Shl)
+ return nullptr;
+
+ Value *NewC = Builder.CreateBinOp(
+ I.getOpcode(), ShiftedC1, Builder.CreateBinOp(ShiftOp, ShiftedC2, AddC));
+ return BinaryOperator::Create(ShiftOp, NewC, ShAmt);
}
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
@@ -1964,6 +2230,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
if (Value *V = SimplifyBSwap(I, Builder))
return replaceInstUsesWith(I, V);
+ if (Instruction *R = foldBinOpShiftWithShift(I))
+ return R;
+
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Value *X, *Y;
@@ -2033,7 +2302,7 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
if (match(Op0, m_Add(m_Value(X), m_APInt(AddC)))) {
// If we add zeros to every bit below a mask, the add has no effect:
// (X + AddC) & LowMaskC --> X & LowMaskC
- unsigned Ctlz = C->countLeadingZeros();
+ unsigned Ctlz = C->countl_zero();
APInt LowMask(APInt::getLowBitsSet(Width, Width - Ctlz));
if ((*AddC & LowMask).isZero())
return BinaryOperator::CreateAnd(X, Op1);
@@ -2150,7 +2419,7 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
const APInt *C3 = C;
Value *X;
if (C3->isPowerOf2()) {
- Constant *Log2C3 = ConstantInt::get(Ty, C3->countTrailingZeros());
+ Constant *Log2C3 = ConstantInt::get(Ty, C3->countr_zero());
if (match(Op0, m_OneUse(m_LShr(m_Shl(m_ImmConstant(C1), m_Value(X)),
m_ImmConstant(C2)))) &&
match(C1, m_Power2())) {
@@ -2407,6 +2676,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
if (Instruction *Folded = foldLogicOfIsFPClass(I, Op0, Op1))
return Folded;
+ if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
+ return Res;
+
return nullptr;
}
@@ -2718,34 +2990,47 @@ Value *InstCombinerImpl::matchSelectFromAndOr(Value *A, Value *C, Value *B,
return nullptr;
}
-// (icmp eq X, 0) | (icmp ult Other, X) -> (icmp ule Other, X-1)
-// (icmp ne X, 0) & (icmp uge Other, X) -> (icmp ugt Other, X-1)
-static Value *foldAndOrOfICmpEqZeroAndICmp(ICmpInst *LHS, ICmpInst *RHS,
- bool IsAnd, bool IsLogical,
- IRBuilderBase &Builder) {
+// (icmp eq X, C) | (icmp ult Other, (X - C)) -> (icmp ule Other, (X - (C + 1)))
+// (icmp ne X, C) & (icmp uge Other, (X - C)) -> (icmp ugt Other, (X - (C + 1)))
+static Value *foldAndOrOfICmpEqConstantAndICmp(ICmpInst *LHS, ICmpInst *RHS,
+ bool IsAnd, bool IsLogical,
+ IRBuilderBase &Builder) {
+ Value *LHS0 = LHS->getOperand(0);
+ Value *RHS0 = RHS->getOperand(0);
+ Value *RHS1 = RHS->getOperand(1);
+
ICmpInst::Predicate LPred =
IsAnd ? LHS->getInversePredicate() : LHS->getPredicate();
ICmpInst::Predicate RPred =
IsAnd ? RHS->getInversePredicate() : RHS->getPredicate();
- Value *LHS0 = LHS->getOperand(0);
- if (LPred != ICmpInst::ICMP_EQ || !match(LHS->getOperand(1), m_Zero()) ||
+
+ const APInt *CInt;
+ if (LPred != ICmpInst::ICMP_EQ ||
+ !match(LHS->getOperand(1), m_APIntAllowUndef(CInt)) ||
!LHS0->getType()->isIntOrIntVectorTy() ||
!(LHS->hasOneUse() || RHS->hasOneUse()))
return nullptr;
+ auto MatchRHSOp = [LHS0, CInt](const Value *RHSOp) {
+ return match(RHSOp,
+ m_Add(m_Specific(LHS0), m_SpecificIntAllowUndef(-*CInt))) ||
+ (CInt->isZero() && RHSOp == LHS0);
+ };
+
Value *Other;
- if (RPred == ICmpInst::ICMP_ULT && RHS->getOperand(1) == LHS0)
- Other = RHS->getOperand(0);
- else if (RPred == ICmpInst::ICMP_UGT && RHS->getOperand(0) == LHS0)
- Other = RHS->getOperand(1);
+ if (RPred == ICmpInst::ICMP_ULT && MatchRHSOp(RHS1))
+ Other = RHS0;
+ else if (RPred == ICmpInst::ICMP_UGT && MatchRHSOp(RHS0))
+ Other = RHS1;
else
return nullptr;
if (IsLogical)
Other = Builder.CreateFreeze(Other);
+
return Builder.CreateICmp(
IsAnd ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE,
- Builder.CreateAdd(LHS0, Constant::getAllOnesValue(LHS0->getType())),
+ Builder.CreateSub(LHS0, ConstantInt::get(LHS0->getType(), *CInt + 1)),
Other);
}
@@ -2792,12 +3077,12 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
return V;
if (Value *V =
- foldAndOrOfICmpEqZeroAndICmp(LHS, RHS, IsAnd, IsLogical, Builder))
+ foldAndOrOfICmpEqConstantAndICmp(LHS, RHS, IsAnd, IsLogical, Builder))
return V;
// We can treat logical like bitwise here, because both operands are used on
// the LHS, and as such poison from both will propagate.
- if (Value *V = foldAndOrOfICmpEqZeroAndICmp(RHS, LHS, IsAnd,
- /*IsLogical*/ false, Builder))
+ if (Value *V = foldAndOrOfICmpEqConstantAndICmp(RHS, LHS, IsAnd,
+ /*IsLogical*/ false, Builder))
return V;
if (Value *V =
@@ -2836,6 +3121,9 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
if (Value *V = foldIsPowerOf2(LHS, RHS, IsAnd, Builder))
return V;
+ if (Value *V = foldPowerOf2AndShiftedMask(LHS, RHS, IsAnd, Builder))
+ return V;
+
// TODO: Verify whether this is safe for logical and/or.
if (!IsLogical) {
if (Value *X = foldUnsignedUnderflowCheck(LHS, RHS, IsAnd, Q, Builder))
@@ -2849,7 +3137,7 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
// (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0)
// (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0)
- // TODO: Remove this when foldLogOpOfMaskedICmps can handle undefs.
+ // TODO: Remove this and below when foldLogOpOfMaskedICmps can handle undefs.
if (!IsLogical && PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
PredL == PredR && match(LHS1, m_ZeroInt()) && match(RHS1, m_ZeroInt()) &&
LHS0->getType() == RHS0->getType()) {
@@ -2858,6 +3146,16 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
Constant::getNullValue(NewOr->getType()));
}
+ // (icmp ne A, -1) | (icmp ne B, -1) --> (icmp ne (A&B), -1)
+ // (icmp eq A, -1) & (icmp eq B, -1) --> (icmp eq (A&B), -1)
+ if (!IsLogical && PredL == (IsAnd ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
+ PredL == PredR && match(LHS1, m_AllOnes()) && match(RHS1, m_AllOnes()) &&
+ LHS0->getType() == RHS0->getType()) {
+ Value *NewAnd = Builder.CreateAnd(LHS0, RHS0);
+ return Builder.CreateICmp(PredL, NewAnd,
+ Constant::getAllOnesValue(LHS0->getType()));
+ }
+
// This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
if (!LHSC || !RHSC)
return nullptr;
@@ -2998,6 +3296,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *Concat = matchOrConcat(I, Builder))
return replaceInstUsesWith(I, Concat);
+ if (Instruction *R = foldBinOpShiftWithShift(I))
+ return R;
+
Value *X, *Y;
const APInt *CV;
if (match(&I, m_c_Or(m_OneUse(m_Xor(m_Value(X), m_APInt(CV))), m_Value(Y))) &&
@@ -3416,6 +3717,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Instruction *Folded = foldLogicOfIsFPClass(I, Op0, Op1))
return Folded;
+ if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
+ return Res;
+
return nullptr;
}
@@ -3715,6 +4019,24 @@ static Instruction *canonicalizeAbs(BinaryOperator &Xor,
return nullptr;
}
+static bool canFreelyInvert(InstCombiner &IC, Value *Op,
+ Instruction *IgnoredUser) {
+ auto *I = dyn_cast<Instruction>(Op);
+ return I && IC.isFreeToInvert(I, /*WillInvertAllUses=*/true) &&
+ InstCombiner::canFreelyInvertAllUsersOf(I, IgnoredUser);
+}
+
+static Value *freelyInvert(InstCombinerImpl &IC, Value *Op,
+ Instruction *IgnoredUser) {
+ auto *I = cast<Instruction>(Op);
+ IC.Builder.SetInsertPoint(&*I->getInsertionPointAfterDef());
+ Value *NotOp = IC.Builder.CreateNot(Op, Op->getName() + ".not");
+ Op->replaceUsesWithIf(NotOp,
+ [NotOp](Use &U) { return U.getUser() != NotOp; });
+ IC.freelyInvertAllUsersOf(NotOp, IgnoredUser);
+ return NotOp;
+}
+
// Transform
// z = ~(x &/| y)
// into:
@@ -3739,28 +4061,11 @@ bool InstCombinerImpl::sinkNotIntoLogicalOp(Instruction &I) {
return false;
// And can the operands be adapted?
- for (Value *Op : {Op0, Op1})
- if (!(InstCombiner::isFreeToInvert(Op, /*WillInvertAllUses=*/true) &&
- (match(Op, m_ImmConstant()) ||
- (isa<Instruction>(Op) &&
- InstCombiner::canFreelyInvertAllUsersOf(cast<Instruction>(Op),
- /*IgnoredUser=*/&I)))))
- return false;
+ if (!canFreelyInvert(*this, Op0, &I) || !canFreelyInvert(*this, Op1, &I))
+ return false;
- for (Value **Op : {&Op0, &Op1}) {
- Value *NotOp;
- if (auto *C = dyn_cast<Constant>(*Op)) {
- NotOp = ConstantExpr::getNot(C);
- } else {
- Builder.SetInsertPoint(
- &*cast<Instruction>(*Op)->getInsertionPointAfterDef());
- NotOp = Builder.CreateNot(*Op, (*Op)->getName() + ".not");
- (*Op)->replaceUsesWithIf(
- NotOp, [NotOp](Use &U) { return U.getUser() != NotOp; });
- freelyInvertAllUsersOf(NotOp, /*IgnoredUser=*/&I);
- }
- *Op = NotOp;
- }
+ Op0 = freelyInvert(*this, Op0, &I);
+ Op1 = freelyInvert(*this, Op1, &I);
Builder.SetInsertPoint(I.getInsertionPointAfterDef());
Value *NewLogicOp;
@@ -3794,20 +4099,11 @@ bool InstCombinerImpl::sinkNotIntoOtherHandOfLogicalOp(Instruction &I) {
Value *NotOp0 = nullptr;
Value *NotOp1 = nullptr;
Value **OpToInvert = nullptr;
- if (match(Op0, m_Not(m_Value(NotOp0))) &&
- InstCombiner::isFreeToInvert(Op1, /*WillInvertAllUses=*/true) &&
- (match(Op1, m_ImmConstant()) ||
- (isa<Instruction>(Op1) &&
- InstCombiner::canFreelyInvertAllUsersOf(cast<Instruction>(Op1),
- /*IgnoredUser=*/&I)))) {
+ if (match(Op0, m_Not(m_Value(NotOp0))) && canFreelyInvert(*this, Op1, &I)) {
Op0 = NotOp0;
OpToInvert = &Op1;
} else if (match(Op1, m_Not(m_Value(NotOp1))) &&
- InstCombiner::isFreeToInvert(Op0, /*WillInvertAllUses=*/true) &&
- (match(Op0, m_ImmConstant()) ||
- (isa<Instruction>(Op0) &&
- InstCombiner::canFreelyInvertAllUsersOf(cast<Instruction>(Op0),
- /*IgnoredUser=*/&I)))) {
+ canFreelyInvert(*this, Op0, &I)) {
Op1 = NotOp1;
OpToInvert = &Op0;
} else
@@ -3817,19 +4113,7 @@ bool InstCombinerImpl::sinkNotIntoOtherHandOfLogicalOp(Instruction &I) {
if (!InstCombiner::canFreelyInvertAllUsersOf(&I, /*IgnoredUser=*/nullptr))
return false;
- if (auto *C = dyn_cast<Constant>(*OpToInvert)) {
- *OpToInvert = ConstantExpr::getNot(C);
- } else {
- Builder.SetInsertPoint(
- &*cast<Instruction>(*OpToInvert)->getInsertionPointAfterDef());
- Value *NotOpToInvert =
- Builder.CreateNot(*OpToInvert, (*OpToInvert)->getName() + ".not");
- (*OpToInvert)->replaceUsesWithIf(NotOpToInvert, [NotOpToInvert](Use &U) {
- return U.getUser() != NotOpToInvert;
- });
- freelyInvertAllUsersOf(NotOpToInvert, /*IgnoredUser=*/&I);
- *OpToInvert = NotOpToInvert;
- }
+ *OpToInvert = freelyInvert(*this, *OpToInvert, &I);
Builder.SetInsertPoint(&*I.getInsertionPointAfterDef());
Value *NewBinOp;
@@ -3896,8 +4180,8 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {
if (match(NotVal, m_AShr(m_Not(m_Value(X)), m_Value(Y))))
return BinaryOperator::CreateAShr(X, Y);
- // Bit-hack form of a signbit test:
- // iN ~X >>s (N-1) --> sext i1 (X > -1) to iN
+ // Bit-hack form of a signbit test for iN type:
+ // ~(X >>s (N - 1)) --> sext i1 (X > -1) to iN
unsigned FullShift = Ty->getScalarSizeInBits() - 1;
if (match(NotVal, m_OneUse(m_AShr(m_Value(X), m_SpecificInt(FullShift))))) {
Value *IsNotNeg = Builder.CreateIsNotNeg(X, "isnotneg");
@@ -4071,6 +4355,9 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
if (Instruction *R = foldNot(I))
return R;
+ if (Instruction *R = foldBinOpShiftWithShift(I))
+ return R;
+
// Fold (X & M) ^ (Y & ~M) -> (X & M) | (Y & ~M)
// This it a special case in haveNoCommonBitsSet, but the computeKnownBits
// calls in there are unnecessary as SimplifyDemandedInstructionBits should
@@ -4280,6 +4567,23 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
}
}
+ // (A & B) ^ (A | C) --> A ? ~B : C -- There are 4 commuted variants.
+ if (I.getType()->isIntOrIntVectorTy(1) &&
+ match(Op0, m_OneUse(m_LogicalAnd(m_Value(A), m_Value(B)))) &&
+ match(Op1, m_OneUse(m_LogicalOr(m_Value(C), m_Value(D))))) {
+ bool NeedFreeze = isa<SelectInst>(Op0) && isa<SelectInst>(Op1) && B == D;
+ if (B == C || B == D)
+ std::swap(A, B);
+ if (A == C)
+ std::swap(C, D);
+ if (A == D) {
+ if (NeedFreeze)
+ A = Builder.CreateFreeze(A);
+ Value *NotB = Builder.CreateNot(B);
+ return SelectInst::Create(A, NotB, C);
+ }
+ }
+
if (auto *LHS = dyn_cast<ICmpInst>(I.getOperand(0)))
if (auto *RHS = dyn_cast<ICmpInst>(I.getOperand(1)))
if (Value *V = foldXorOfICmps(LHS, RHS, I))
@@ -4313,5 +4617,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
if (Instruction *Folded = canonicalizeConditionalNegationViaMathToSelect(I))
return Folded;
+ if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
+ return Res;
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
index e73667f9c02e..cba282cea72b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -116,24 +116,10 @@ Instruction *InstCombinerImpl::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
return &RMWI;
}
- AtomicOrdering Ordering = RMWI.getOrdering();
- assert(Ordering != AtomicOrdering::NotAtomic &&
- Ordering != AtomicOrdering::Unordered &&
+ assert(RMWI.getOrdering() != AtomicOrdering::NotAtomic &&
+ RMWI.getOrdering() != AtomicOrdering::Unordered &&
"AtomicRMWs don't make sense with Unordered or NotAtomic");
- // Any atomicrmw xchg with no uses can be converted to a atomic store if the
- // ordering is compatible.
- if (RMWI.getOperation() == AtomicRMWInst::Xchg &&
- RMWI.use_empty()) {
- if (Ordering != AtomicOrdering::Release &&
- Ordering != AtomicOrdering::Monotonic)
- return nullptr;
- new StoreInst(RMWI.getValOperand(), RMWI.getPointerOperand(),
- /*isVolatile*/ false, RMWI.getAlign(), Ordering,
- RMWI.getSyncScopeID(), &RMWI);
- return eraseInstFromFunction(RMWI);
- }
-
if (!isIdempotentRMW(RMWI))
return nullptr;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index fbf1327143a8..d3ec6a7aa667 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -439,9 +440,7 @@ Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
ElementCount VF = WideLoadTy->getElementCount();
- Constant *EC =
- ConstantInt::get(Builder.getInt32Ty(), VF.getKnownMinValue());
- Value *RunTimeVF = VF.isScalable() ? Builder.CreateVScale(EC) : EC;
+ Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
Value *Extract =
Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
@@ -533,16 +532,15 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
}
- // If the operand is a select with constant arm(s), try to hoist ctlz/cttz.
- if (auto *Sel = dyn_cast<SelectInst>(Op0))
- if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
- return R;
-
if (IsTZ) {
// cttz(-x) -> cttz(x)
if (match(Op0, m_Neg(m_Value(X))))
return IC.replaceOperand(II, 0, X);
+ // cttz(-x & x) -> cttz(x)
+ if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
+ return IC.replaceOperand(II, 0, X);
+
// cttz(sext(x)) -> cttz(zext(x))
if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
auto *Zext = IC.Builder.CreateZExt(X, II.getType());
@@ -599,8 +597,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
}
// Add range metadata since known bits can't completely reflect what we know.
- // TODO: Handle splat vectors.
- auto *IT = dyn_cast<IntegerType>(Op0->getType());
+ auto *IT = cast<IntegerType>(Op0->getType()->getScalarType());
if (IT && IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
Metadata *LowAndHigh[] = {
ConstantAsMetadata::get(ConstantInt::get(IT, DefiniteZeros)),
@@ -657,11 +654,6 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
}
- // If the operand is a select with constant arm(s), try to hoist ctpop.
- if (auto *Sel = dyn_cast<SelectInst>(Op0))
- if (Instruction *R = IC.FoldOpIntoSelect(II, Sel))
- return R;
-
KnownBits Known(BitWidth);
IC.computeKnownBits(Op0, Known, 0, &II);
@@ -683,12 +675,8 @@ static Instruction *foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC) {
Constant::getNullValue(Ty)),
Ty);
- // FIXME: Try to simplify vectors of integers.
- auto *IT = dyn_cast<IntegerType>(Ty);
- if (!IT)
- return nullptr;
-
// Add range metadata since known bits can't completely reflect what we know.
+ auto *IT = cast<IntegerType>(Ty->getScalarType());
unsigned MinCount = Known.countMinPopulation();
unsigned MaxCount = Known.countMaxPopulation();
if (IT->getBitWidth() != 1 && !II.getMetadata(LLVMContext::MD_range)) {
@@ -830,10 +818,204 @@ InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
return nullptr;
}
+static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
+ Ty = Ty->getScalarType();
+ return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
+}
+
+static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
+ Ty = Ty->getScalarType();
+ return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
+}
+
+/// \returns the compare predicate type if the test performed by
+/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
+/// floating-point environment assumed for \p F for type \p Ty
+static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask,
+ const Function &F, Type *Ty) {
+ switch (static_cast<unsigned>(Mask)) {
+ case fcZero:
+ if (inputDenormalIsIEEE(F, Ty))
+ return FCmpInst::FCMP_OEQ;
+ break;
+ case fcZero | fcSubnormal:
+ if (inputDenormalIsDAZ(F, Ty))
+ return FCmpInst::FCMP_OEQ;
+ break;
+ case fcPositive | fcNegZero:
+ if (inputDenormalIsIEEE(F, Ty))
+ return FCmpInst::FCMP_OGE;
+ break;
+ case fcPositive | fcNegZero | fcNegSubnormal:
+ if (inputDenormalIsDAZ(F, Ty))
+ return FCmpInst::FCMP_OGE;
+ break;
+ case fcPosSubnormal | fcPosNormal | fcPosInf:
+ if (inputDenormalIsIEEE(F, Ty))
+ return FCmpInst::FCMP_OGT;
+ break;
+ case fcNegative | fcPosZero:
+ if (inputDenormalIsIEEE(F, Ty))
+ return FCmpInst::FCMP_OLE;
+ break;
+ case fcNegative | fcPosZero | fcPosSubnormal:
+ if (inputDenormalIsDAZ(F, Ty))
+ return FCmpInst::FCMP_OLE;
+ break;
+ case fcNegSubnormal | fcNegNormal | fcNegInf:
+ if (inputDenormalIsIEEE(F, Ty))
+ return FCmpInst::FCMP_OLT;
+ break;
+ case fcPosNormal | fcPosInf:
+ if (inputDenormalIsDAZ(F, Ty))
+ return FCmpInst::FCMP_OGT;
+ break;
+ case fcNegNormal | fcNegInf:
+ if (inputDenormalIsDAZ(F, Ty))
+ return FCmpInst::FCMP_OLT;
+ break;
+ case ~fcZero & ~fcNan:
+ if (inputDenormalIsIEEE(F, Ty))
+ return FCmpInst::FCMP_ONE;
+ break;
+ case ~(fcZero | fcSubnormal) & ~fcNan:
+ if (inputDenormalIsDAZ(F, Ty))
+ return FCmpInst::FCMP_ONE;
+ break;
+ default:
+ break;
+ }
+
+ return FCmpInst::BAD_FCMP_PREDICATE;
+}
+
+Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
+ Value *Src0 = II.getArgOperand(0);
+ Value *Src1 = II.getArgOperand(1);
+ const ConstantInt *CMask = cast<ConstantInt>(Src1);
+ FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
+ const bool IsUnordered = (Mask & fcNan) == fcNan;
+ const bool IsOrdered = (Mask & fcNan) == fcNone;
+ const FPClassTest OrderedMask = Mask & ~fcNan;
+ const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
+
+ const bool IsStrict = II.isStrictFP();
+
+ Value *FNegSrc;
+ if (match(Src0, m_FNeg(m_Value(FNegSrc)))) {
+ // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
+
+ II.setArgOperand(1, ConstantInt::get(Src1->getType(), fneg(Mask)));
+ return replaceOperand(II, 0, FNegSrc);
+ }
+
+ Value *FAbsSrc;
+ if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
+ II.setArgOperand(1, ConstantInt::get(Src1->getType(), fabs(Mask)));
+ return replaceOperand(II, 0, FAbsSrc);
+ }
+
+ // TODO: is.fpclass(x, fcInf) -> fabs(x) == inf
+
+ if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
+ (IsOrdered || IsUnordered) && !IsStrict) {
+ // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
+ // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
+ // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
+ // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
+ Constant *Inf =
+ ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
+ Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
+ : Builder.CreateFCmpOEQ(Src0, Inf);
+
+ EqInf->takeName(&II);
+ return replaceInstUsesWith(II, EqInf);
+ }
+
+ if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
+ (IsOrdered || IsUnordered) && !IsStrict) {
+ // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
+ // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
+ // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
+ // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
+ Constant *Inf = ConstantFP::getInfinity(Src0->getType(),
+ OrderedInvertedMask == fcNegInf);
+ Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
+ : Builder.CreateFCmpONE(Src0, Inf);
+ NeInf->takeName(&II);
+ return replaceInstUsesWith(II, NeInf);
+ }
+
+ if (Mask == fcNan && !IsStrict) {
+ // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
+ // exceptions.
+ Value *IsNan =
+ Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
+ IsNan->takeName(&II);
+ return replaceInstUsesWith(II, IsNan);
+ }
+
+ if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
+ // Equivalent of !isnan. Replace with standard fcmp.
+ Value *FCmp =
+ Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
+ FCmp->takeName(&II);
+ return replaceInstUsesWith(II, FCmp);
+ }
+
+ FCmpInst::Predicate PredType = FCmpInst::BAD_FCMP_PREDICATE;
+
+ // Try to replace with an fcmp with 0
+ //
+ // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
+ // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
+ // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
+ // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
+ //
+ // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
+ // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
+ //
+ // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
+ // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
+ //
+ if (!IsStrict && (IsOrdered || IsUnordered) &&
+ (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
+ Src0->getType())) !=
+ FCmpInst::BAD_FCMP_PREDICATE) {
+ Constant *Zero = ConstantFP::getZero(Src0->getType());
+ // Equivalent of == 0.
+ Value *FCmp = Builder.CreateFCmp(
+ IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
+ Src0, Zero);
+
+ FCmp->takeName(&II);
+ return replaceInstUsesWith(II, FCmp);
+ }
+
+ KnownFPClass Known = computeKnownFPClass(
+ Src0, DL, Mask, 0, &getTargetLibraryInfo(), &AC, &II, &DT);
+
+ // Clear test bits we know must be false from the source value.
+ // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
+ // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
+ if ((Mask & Known.KnownFPClasses) != Mask) {
+ II.setArgOperand(
+ 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
+ return &II;
+ }
+
+ // If none of the tests which can return false are possible, fold to true.
+ // fp_class (nnan x), ~(qnan|snan) -> true
+ // fp_class (ninf x), ~(ninf|pinf) -> true
+ if (Mask == Known.KnownFPClasses)
+ return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
+
+ return nullptr;
+}
+
static std::optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
- const DataLayout &DL,
- AssumptionCache *AC,
- DominatorTree *DT) {
+ const DataLayout &DL, AssumptionCache *AC,
+ DominatorTree *DT) {
KnownBits Known = computeKnownBits(Op, DL, 0, AC, CxtI, DT);
if (Known.isNonNegative())
return false;
@@ -848,6 +1030,19 @@ static std::optional<bool> getKnownSign(Value *Op, Instruction *CxtI,
ICmpInst::ICMP_SLT, Op, Constant::getNullValue(Op->getType()), CxtI, DL);
}
+/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
+static bool signBitMustBeTheSame(Value *Op0, Value *Op1, Instruction *CxtI,
+ const DataLayout &DL, AssumptionCache *AC,
+ DominatorTree *DT) {
+ std::optional<bool> Known1 = getKnownSign(Op1, CxtI, DL, AC, DT);
+ if (!Known1)
+ return false;
+ std::optional<bool> Known0 = getKnownSign(Op0, CxtI, DL, AC, DT);
+ if (!Known0)
+ return false;
+ return *Known0 == *Known1;
+}
+
/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
/// can trigger other combines.
static Instruction *moveAddAfterMinMax(IntrinsicInst *II,
@@ -991,7 +1186,8 @@ static Instruction *foldClampRangeOfTwo(IntrinsicInst *II,
/// If this min/max has a constant operand and an operand that is a matching
/// min/max with a constant operand, constant-fold the 2 constant operands.
-static Instruction *reassociateMinMaxWithConstants(IntrinsicInst *II) {
+static Value *reassociateMinMaxWithConstants(IntrinsicInst *II,
+ IRBuilderBase &Builder) {
Intrinsic::ID MinMaxID = II->getIntrinsicID();
auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
if (!LHS || LHS->getIntrinsicID() != MinMaxID)
@@ -1004,12 +1200,10 @@ static Instruction *reassociateMinMaxWithConstants(IntrinsicInst *II) {
// max (max X, C0), C1 --> max X, (max C0, C1) --> max X, NewC
ICmpInst::Predicate Pred = MinMaxIntrinsic::getPredicate(MinMaxID);
- Constant *CondC = ConstantExpr::getICmp(Pred, C0, C1);
- Constant *NewC = ConstantExpr::getSelect(CondC, C0, C1);
-
- Module *Mod = II->getModule();
- Function *MinMax = Intrinsic::getDeclaration(Mod, MinMaxID, II->getType());
- return CallInst::Create(MinMax, {LHS->getArgOperand(0), NewC});
+ Value *CondC = Builder.CreateICmp(Pred, C0, C1);
+ Value *NewC = Builder.CreateSelect(CondC, C0, C1);
+ return Builder.CreateIntrinsic(MinMaxID, II->getType(),
+ {LHS->getArgOperand(0), NewC});
}
/// If this min/max has a matching min/max operand with a constant, try to push
@@ -1149,15 +1343,60 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II,
return new ShuffleVectorInst(NewIntrinsic, Mask);
}
+/// Fold the following cases and accepts bswap and bitreverse intrinsics:
+/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
+/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
+template <Intrinsic::ID IntrID>
+static Instruction *foldBitOrderCrossLogicOp(Value *V,
+ InstCombiner::BuilderTy &Builder) {
+ static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
+ "This helper only supports BSWAP and BITREVERSE intrinsics");
+
+ Value *X, *Y;
+ // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
+ // don't match ConstantExpr that aren't meaningful for this transform.
+ if (match(V, m_OneUse(m_BitwiseLogic(m_Value(X), m_Value(Y)))) &&
+ isa<BinaryOperator>(V)) {
+ Value *OldReorderX, *OldReorderY;
+ BinaryOperator::BinaryOps Op = cast<BinaryOperator>(V)->getOpcode();
+
+ // If both X and Y are bswap/bitreverse, the transform reduces the number
+ // of instructions even if there's multiuse.
+ // If only one operand is bswap/bitreverse, we need to ensure the operand
+ // have only one use.
+ if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
+ match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
+ return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
+ }
+
+ if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
+ Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
+ return BinaryOperator::Create(Op, OldReorderX, NewReorder);
+ }
+
+ if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
+ Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
+ return BinaryOperator::Create(Op, NewReorder, OldReorderY);
+ }
+ }
+ return nullptr;
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// Don't try to simplify calls without uses. It will not do anything useful,
// but will result in the following folds being skipped.
- if (!CI.use_empty())
- if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI)))
+ if (!CI.use_empty()) {
+ SmallVector<Value *, 4> Args;
+ Args.reserve(CI.arg_size());
+ for (Value *Op : CI.args())
+ Args.push_back(Op);
+ if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
+ SQ.getWithInstruction(&CI)))
return replaceInstUsesWith(CI, V);
+ }
if (Value *FreedOp = getFreedOperand(&CI, &TLI))
return visitFree(CI, FreedOp);
@@ -1176,7 +1415,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
// not a multiple of element size then behavior is undefined.
if (auto *AMI = dyn_cast<AtomicMemIntrinsic>(II))
if (ConstantInt *NumBytes = dyn_cast<ConstantInt>(AMI->getLength()))
- if (NumBytes->getSExtValue() < 0 ||
+ if (NumBytes->isNegative() ||
(NumBytes->getZExtValue() % AMI->getElementSizeInBytes() != 0)) {
CreateNonTerminatorUnreachable(AMI);
assert(AMI->getType()->isVoidTy() &&
@@ -1267,10 +1506,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Intrinsic::ID IID = II->getIntrinsicID();
switch (IID) {
- case Intrinsic::objectsize:
- if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false))
+ case Intrinsic::objectsize: {
+ SmallVector<Instruction *> InsertedInstructions;
+ if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
+ &InsertedInstructions)) {
+ for (Instruction *Inserted : InsertedInstructions)
+ Worklist.add(Inserted);
return replaceInstUsesWith(CI, V);
+ }
return nullptr;
+ }
case Intrinsic::abs: {
Value *IIOperand = II->getArgOperand(0);
bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
@@ -1377,6 +1622,46 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
}
+ // (umax X, (xor X, Pow2))
+ // -> (or X, Pow2)
+ // (umin X, (xor X, Pow2))
+ // -> (and X, ~Pow2)
+ // (smax X, (xor X, Pos_Pow2))
+ // -> (or X, Pos_Pow2)
+ // (smin X, (xor X, Pos_Pow2))
+ // -> (and X, ~Pos_Pow2)
+ // (smax X, (xor X, Neg_Pow2))
+ // -> (and X, ~Neg_Pow2)
+ // (smin X, (xor X, Neg_Pow2))
+ // -> (or X, Neg_Pow2)
+ if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
+ match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
+ isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
+ bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
+ bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
+
+ if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
+ auto KnownSign = getKnownSign(X, II, DL, &AC, &DT);
+ if (KnownSign == std::nullopt) {
+ UseOr = false;
+ UseAndN = false;
+ } else if (*KnownSign /* true is Signed. */) {
+ UseOr ^= true;
+ UseAndN ^= true;
+ Type *Ty = I0->getType();
+ // Negative power of 2 must be IntMin. It's possible to be able to
+ // prove negative / power of 2 without actually having known bits, so
+ // just get the value by hand.
+ X = Constant::getIntegerValue(
+ Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
+ }
+ }
+ if (UseOr)
+ return BinaryOperator::CreateOr(I0, X);
+ else if (UseAndN)
+ return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
+ }
+
// If we can eliminate ~A and Y is free to invert:
// max ~A, Y --> ~(min A, ~Y)
//
@@ -1436,13 +1721,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *SAdd = matchSAddSubSat(*II))
return SAdd;
- if (match(I1, m_ImmConstant()))
- if (auto *Sel = dyn_cast<SelectInst>(I0))
- if (Instruction *R = FoldOpIntoSelect(*II, Sel))
- return R;
-
- if (Instruction *NewMinMax = reassociateMinMaxWithConstants(II))
- return NewMinMax;
+ if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder))
+ return replaceInstUsesWith(*II, NewMinMax);
if (Instruction *R = reassociateMinMaxWithConstantInOperand(II, Builder))
return R;
@@ -1453,15 +1733,21 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::bitreverse: {
+ Value *IIOperand = II->getArgOperand(0);
// bitrev (zext i1 X to ?) --> X ? SignBitC : 0
Value *X;
- if (match(II->getArgOperand(0), m_ZExt(m_Value(X))) &&
+ if (match(IIOperand, m_ZExt(m_Value(X))) &&
X->getType()->isIntOrIntVectorTy(1)) {
Type *Ty = II->getType();
APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
ConstantInt::getNullValue(Ty));
}
+
+ if (Instruction *crossLogicOpFold =
+ foldBitOrderCrossLogicOp<Intrinsic::bitreverse>(IIOperand, Builder))
+ return crossLogicOpFold;
+
break;
}
case Intrinsic::bswap: {
@@ -1511,6 +1797,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Value *V = Builder.CreateLShr(X, CV);
return new TruncInst(V, IIOperand->getType());
}
+
+ if (Instruction *crossLogicOpFold =
+ foldBitOrderCrossLogicOp<Intrinsic::bswap>(IIOperand, Builder)) {
+ return crossLogicOpFold;
+ }
+
break;
}
case Intrinsic::masked_load:
@@ -1616,6 +1908,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty);
return CallInst::Create(Bswap, { Op0 });
}
+ if (Instruction *BitOp =
+ matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
+ /*MatchBitReversals*/ true))
+ return BitOp;
}
// Left or right might be masked.
@@ -1983,7 +2279,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
case Intrinsic::copysign: {
Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
- if (SignBitMustBeZero(Sign, &TLI)) {
+ if (SignBitMustBeZero(Sign, DL, &TLI)) {
// If we know that the sign argument is positive, reduce to FABS:
// copysign Mag, +Sign --> fabs Mag
Value *Fabs = Builder.CreateUnaryIntrinsic(Intrinsic::fabs, Mag, II);
@@ -2079,6 +2375,42 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
break;
}
+ case Intrinsic::ldexp: {
+ // ldexp(ldexp(x, a), b) -> ldexp(x, a + b)
+ //
+ // The danger is if the first ldexp would overflow to infinity or underflow
+ // to zero, but the combined exponent avoids it. We ignore this with
+ // reassoc.
+ //
+ // It's also safe to fold if we know both exponents are >= 0 or <= 0 since
+ // it would just double down on the overflow/underflow which would occur
+ // anyway.
+ //
+ // TODO: Could do better if we had range tracking for the input value
+ // exponent. Also could broaden sign check to cover == 0 case.
+ Value *Src = II->getArgOperand(0);
+ Value *Exp = II->getArgOperand(1);
+ Value *InnerSrc;
+ Value *InnerExp;
+ if (match(Src, m_OneUse(m_Intrinsic<Intrinsic::ldexp>(
+ m_Value(InnerSrc), m_Value(InnerExp)))) &&
+ Exp->getType() == InnerExp->getType()) {
+ FastMathFlags FMF = II->getFastMathFlags();
+ FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
+
+ if ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
+ signBitMustBeTheSame(Exp, InnerExp, II, DL, &AC, &DT)) {
+ // TODO: Add nsw/nuw probably safe if integer type exceeds exponent
+ // width.
+ Value *NewExp = Builder.CreateAdd(InnerExp, Exp);
+ II->setArgOperand(1, NewExp);
+ II->setFastMathFlags(InnerFlags); // Or the inner flags.
+ return replaceOperand(*II, 0, InnerSrc);
+ }
+ }
+
+ break;
+ }
case Intrinsic::ptrauth_auth:
case Intrinsic::ptrauth_resign: {
// (sign|resign) + (auth|resign) can be folded by omitting the middle
@@ -2380,12 +2712,34 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
isValidAssumeForContext(II, LHS, &DT)) {
MDNode *MD = MDNode::get(II->getContext(), std::nullopt);
LHS->setMetadata(LLVMContext::MD_nonnull, MD);
+ LHS->setMetadata(LLVMContext::MD_noundef, MD);
return RemoveConditionFromAssume(II);
// TODO: apply nonnull return attributes to calls and invokes
// TODO: apply range metadata for range check patterns?
}
+ // Separate storage assumptions apply to the underlying allocations, not any
+ // particular pointer within them. When evaluating the hints for AA purposes
+ // we getUnderlyingObject them; by precomputing the answers here we can
+ // avoid having to do so repeatedly there.
+ for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
+ OperandBundleUse OBU = II->getOperandBundleAt(Idx);
+ if (OBU.getTagName() == "separate_storage") {
+ assert(OBU.Inputs.size() == 2);
+ auto MaybeSimplifyHint = [&](const Use &U) {
+ Value *Hint = U.get();
+ // Not having a limit is safe because InstCombine removes unreachable
+ // code.
+ Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
+ if (Hint != UnderlyingObject)
+ replaceUse(const_cast<Use &>(U), UnderlyingObject);
+ };
+ MaybeSimplifyHint(OBU.Inputs[0]);
+ MaybeSimplifyHint(OBU.Inputs[1]);
+ }
+ }
+
// Convert nonnull assume like:
// %A = icmp ne i32* %PTR, null
// call void @llvm.assume(i1 %A)
@@ -2479,6 +2833,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Known.isAllOnes() && isAssumeWithEmptyBundle(cast<AssumeInst>(*II)))
return eraseInstFromFunction(*II);
+ // assume(false) is unreachable.
+ if (match(IIOperand, m_CombineOr(m_Zero(), m_Undef()))) {
+ CreateNonTerminatorUnreachable(II);
+ return eraseInstFromFunction(*II);
+ }
+
// Update the cache of affected values for this assumption (we might be
// here because we just simplified the condition).
AC.updateAffectedValues(cast<AssumeInst>(II));
@@ -2545,7 +2905,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
for (i = 0; i != SubVecNumElts; ++i)
WidenMask.push_back(i);
for (; i != VecNumElts; ++i)
- WidenMask.push_back(UndefMaskElem);
+ WidenMask.push_back(PoisonMaskElem);
Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
@@ -2840,7 +3200,7 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
int Sz = Mask.size();
SmallBitVector UsedIndices(Sz);
for (int Idx : Mask) {
- if (Idx == UndefMaskElem || UsedIndices.test(Idx))
+ if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
break;
UsedIndices.set(Idx);
}
@@ -2852,6 +3212,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
break;
}
+ case Intrinsic::is_fpclass: {
+ if (Instruction *I = foldIntrinsicIsFPClass(*II))
+ return I;
+ break;
+ }
default: {
// Handle target specific intrinsics
std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
@@ -2861,6 +3226,31 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
}
}
+ // Try to fold intrinsic into select operands. This is legal if:
+ // * The intrinsic is speculatable.
+ // * The select condition is not a vector, or the intrinsic does not
+ // perform cross-lane operations.
+ switch (IID) {
+ case Intrinsic::ctlz:
+ case Intrinsic::cttz:
+ case Intrinsic::ctpop:
+ case Intrinsic::umin:
+ case Intrinsic::umax:
+ case Intrinsic::smin:
+ case Intrinsic::smax:
+ case Intrinsic::usub_sat:
+ case Intrinsic::uadd_sat:
+ case Intrinsic::ssub_sat:
+ case Intrinsic::sadd_sat:
+ for (Value *Op : II->args())
+ if (auto *Sel = dyn_cast<SelectInst>(Op))
+ if (Instruction *R = FoldOpIntoSelect(*II, Sel))
+ return R;
+ [[fallthrough]];
+ default:
+ break;
+ }
+
if (Instruction *Shuf = foldShuffledIntrinsicOperands(II, Builder))
return Shuf;
@@ -2907,49 +3297,6 @@ Instruction *InstCombinerImpl::visitCallBrInst(CallBrInst &CBI) {
return visitCallBase(CBI);
}
-/// If this cast does not affect the value passed through the varargs area, we
-/// can eliminate the use of the cast.
-static bool isSafeToEliminateVarargsCast(const CallBase &Call,
- const DataLayout &DL,
- const CastInst *const CI,
- const int ix) {
- if (!CI->isLosslessCast())
- return false;
-
- // If this is a GC intrinsic, avoid munging types. We need types for
- // statepoint reconstruction in SelectionDAG.
- // TODO: This is probably something which should be expanded to all
- // intrinsics since the entire point of intrinsics is that
- // they are understandable by the optimizer.
- if (isa<GCStatepointInst>(Call) || isa<GCRelocateInst>(Call) ||
- isa<GCResultInst>(Call))
- return false;
-
- // Opaque pointers are compatible with any byval types.
- PointerType *SrcTy = cast<PointerType>(CI->getOperand(0)->getType());
- if (SrcTy->isOpaque())
- return true;
-
- // The size of ByVal or InAlloca arguments is derived from the type, so we
- // can't change to a type with a different size. If the size were
- // passed explicitly we could avoid this check.
- if (!Call.isPassPointeeByValueArgument(ix))
- return true;
-
- // The transform currently only handles type replacement for byval, not other
- // type-carrying attributes.
- if (!Call.isByValArgument(ix))
- return false;
-
- Type *SrcElemTy = SrcTy->getNonOpaquePointerElementType();
- Type *DstElemTy = Call.getParamByValType(ix);
- if (!SrcElemTy->isSized() || !DstElemTy->isSized())
- return false;
- if (DL.getTypeAllocSize(SrcElemTy) != DL.getTypeAllocSize(DstElemTy))
- return false;
- return true;
-}
-
Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
if (!CI->getCalledFunction()) return nullptr;
@@ -2965,7 +3312,7 @@ Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
auto InstCombineErase = [this](Instruction *I) {
eraseInstFromFunction(*I);
};
- LibCallSimplifier Simplifier(DL, &TLI, ORE, BFI, PSI, InstCombineRAUW,
+ LibCallSimplifier Simplifier(DL, &TLI, &AC, ORE, BFI, PSI, InstCombineRAUW,
InstCombineErase);
if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
++NumSimplified;
@@ -3198,32 +3545,6 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
if (IntrinsicInst *II = findInitTrampoline(Callee))
return transformCallThroughTrampoline(Call, *II);
- // TODO: Drop this transform once opaque pointer transition is done.
- FunctionType *FTy = Call.getFunctionType();
- if (FTy->isVarArg()) {
- int ix = FTy->getNumParams();
- // See if we can optimize any arguments passed through the varargs area of
- // the call.
- for (auto I = Call.arg_begin() + FTy->getNumParams(), E = Call.arg_end();
- I != E; ++I, ++ix) {
- CastInst *CI = dyn_cast<CastInst>(*I);
- if (CI && isSafeToEliminateVarargsCast(Call, DL, CI, ix)) {
- replaceUse(*I, CI->getOperand(0));
-
- // Update the byval type to match the pointer type.
- // Not necessary for opaque pointers.
- PointerType *NewTy = cast<PointerType>(CI->getOperand(0)->getType());
- if (!NewTy->isOpaque() && Call.isByValArgument(ix)) {
- Call.removeParamAttr(ix, Attribute::ByVal);
- Call.addParamAttr(ix, Attribute::getWithByValType(
- Call.getContext(),
- NewTy->getNonOpaquePointerElementType()));
- }
- Changed = true;
- }
- }
- }
-
if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
InlineAsm *IA = cast<InlineAsm>(Callee);
if (!IA->canThrow()) {
@@ -3381,13 +3702,17 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
}
/// If the callee is a constexpr cast of a function, attempt to move the cast to
-/// the arguments of the call/callbr/invoke.
+/// the arguments of the call/invoke.
+/// CallBrInst is not supported.
bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
auto *Callee =
dyn_cast<Function>(Call.getCalledOperand()->stripPointerCasts());
if (!Callee)
return false;
+ assert(!isa<CallBrInst>(Call) &&
+ "CallBr's don't have a single point after a def to insert at");
+
// If this is a call to a thunk function, don't remove the cast. Thunks are
// used to transparently forward all incoming parameters and outgoing return
// values, so it's important to leave the cast in place.
@@ -3433,7 +3758,7 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
return false; // Attribute not compatible with transformed value.
}
- // If the callbase is an invoke/callbr instruction, and the return value is
+ // If the callbase is an invoke instruction, and the return value is
// used by a PHI node in a successor, we cannot change the return type of
// the call because there is no place to put the cast instruction (without
// breaking the critical edge). Bail out in this case.
@@ -3441,8 +3766,6 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
BasicBlock *PhisNotSupportedBlock = nullptr;
if (auto *II = dyn_cast<InvokeInst>(Caller))
PhisNotSupportedBlock = II->getNormalDest();
- if (auto *CB = dyn_cast<CallBrInst>(Caller))
- PhisNotSupportedBlock = CB->getDefaultDest();
if (PhisNotSupportedBlock)
for (User *U : Caller->users())
if (PHINode *PN = dyn_cast<PHINode>(U))
@@ -3490,24 +3813,6 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
return false; // Cannot transform to or from byval.
-
- // If the parameter is passed as a byval argument, then we have to have a
- // sized type and the sized type has to have the same size as the old type.
- if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
- PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
- if (!ParamPTy)
- return false;
-
- if (!ParamPTy->isOpaque()) {
- Type *ParamElTy = ParamPTy->getNonOpaquePointerElementType();
- if (!ParamElTy->isSized())
- return false;
-
- Type *CurElTy = Call.getParamByValType(i);
- if (DL.getTypeAllocSize(CurElTy) != DL.getTypeAllocSize(ParamElTy))
- return false;
- }
- }
}
if (Callee->isDeclaration()) {
@@ -3568,16 +3873,8 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
// type. Note that we made sure all incompatible ones are safe to drop.
AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
ParamTy, AttributeFuncs::ASK_SAFE_TO_DROP);
- if (CallerPAL.hasParamAttr(i, Attribute::ByVal) &&
- !ParamTy->isOpaquePointerTy()) {
- AttrBuilder AB(Ctx, CallerPAL.getParamAttrs(i).removeAttributes(
- Ctx, IncompatibleAttrs));
- AB.addByValAttr(ParamTy->getNonOpaquePointerElementType());
- ArgAttrs.push_back(AttributeSet::get(Ctx, AB));
- } else {
- ArgAttrs.push_back(
- CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
- }
+ ArgAttrs.push_back(
+ CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
}
// If the function takes more arguments than the call was taking, add them
@@ -3626,9 +3923,6 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
II->getUnwindDest(), Args, OpBundles);
- } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(Caller)) {
- NewCall = Builder.CreateCallBr(Callee, CBI->getDefaultDest(),
- CBI->getIndirectDests(), Args, OpBundles);
} else {
NewCall = Builder.CreateCall(Callee, Args, OpBundles);
cast<CallInst>(NewCall)->setTailCallKind(
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 3f851a2b2182..5c84f666616d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -25,166 +25,6 @@ using namespace PatternMatch;
#define DEBUG_TYPE "instcombine"
-/// Analyze 'Val', seeing if it is a simple linear expression.
-/// If so, decompose it, returning some value X, such that Val is
-/// X*Scale+Offset.
-///
-static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
- uint64_t &Offset) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
- Offset = CI->getZExtValue();
- Scale = 0;
- return ConstantInt::get(Val->getType(), 0);
- }
-
- if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
- // Cannot look past anything that might overflow.
- // We specifically require nuw because we store the Scale in an unsigned
- // and perform an unsigned divide on it.
- OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
- if (OBI && !OBI->hasNoUnsignedWrap()) {
- Scale = 1;
- Offset = 0;
- return Val;
- }
-
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
- if (I->getOpcode() == Instruction::Shl) {
- // This is a value scaled by '1 << the shift amt'.
- Scale = UINT64_C(1) << RHS->getZExtValue();
- Offset = 0;
- return I->getOperand(0);
- }
-
- if (I->getOpcode() == Instruction::Mul) {
- // This value is scaled by 'RHS'.
- Scale = RHS->getZExtValue();
- Offset = 0;
- return I->getOperand(0);
- }
-
- if (I->getOpcode() == Instruction::Add) {
- // We have X+C. Check to see if we really have (X*C2)+C1,
- // where C1 is divisible by C2.
- unsigned SubScale;
- Value *SubVal =
- decomposeSimpleLinearExpr(I->getOperand(0), SubScale, Offset);
- Offset += RHS->getZExtValue();
- Scale = SubScale;
- return SubVal;
- }
- }
- }
-
- // Otherwise, we can't look past this.
- Scale = 1;
- Offset = 0;
- return Val;
-}
-
-/// If we find a cast of an allocation instruction, try to eliminate the cast by
-/// moving the type information into the alloc.
-Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,
- AllocaInst &AI) {
- PointerType *PTy = cast<PointerType>(CI.getType());
- // Opaque pointers don't have an element type we could replace with.
- if (PTy->isOpaque())
- return nullptr;
-
- IRBuilderBase::InsertPointGuard Guard(Builder);
- Builder.SetInsertPoint(&AI);
-
- // Get the type really allocated and the type casted to.
- Type *AllocElTy = AI.getAllocatedType();
- Type *CastElTy = PTy->getNonOpaquePointerElementType();
- if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
-
- // This optimisation does not work for cases where the cast type
- // is scalable and the allocated type is not. This because we need to
- // know how many times the casted type fits into the allocated type.
- // For the opposite case where the allocated type is scalable and the
- // cast type is not this leads to poor code quality due to the
- // introduction of 'vscale' into the calculations. It seems better to
- // bail out for this case too until we've done a proper cost-benefit
- // analysis.
- bool AllocIsScalable = isa<ScalableVectorType>(AllocElTy);
- bool CastIsScalable = isa<ScalableVectorType>(CastElTy);
- if (AllocIsScalable != CastIsScalable) return nullptr;
-
- Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
- Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
- if (CastElTyAlign < AllocElTyAlign) return nullptr;
-
- // If the allocation has multiple uses, only promote it if we are strictly
- // increasing the alignment of the resultant allocation. If we keep it the
- // same, we open the door to infinite loops of various kinds.
- if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return nullptr;
-
- // The alloc and cast types should be either both fixed or both scalable.
- uint64_t AllocElTySize = DL.getTypeAllocSize(AllocElTy).getKnownMinValue();
- uint64_t CastElTySize = DL.getTypeAllocSize(CastElTy).getKnownMinValue();
- if (CastElTySize == 0 || AllocElTySize == 0) return nullptr;
-
- // If the allocation has multiple uses, only promote it if we're not
- // shrinking the amount of memory being allocated.
- uint64_t AllocElTyStoreSize =
- DL.getTypeStoreSize(AllocElTy).getKnownMinValue();
- uint64_t CastElTyStoreSize = DL.getTypeStoreSize(CastElTy).getKnownMinValue();
- if (!AI.hasOneUse() && CastElTyStoreSize < AllocElTyStoreSize) return nullptr;
-
- // See if we can satisfy the modulus by pulling a scale out of the array
- // size argument.
- unsigned ArraySizeScale;
- uint64_t ArrayOffset;
- Value *NumElements = // See if the array size is a decomposable linear expr.
- decomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
-
- // If we can now satisfy the modulus, by using a non-1 scale, we really can
- // do the xform.
- if ((AllocElTySize*ArraySizeScale) % CastElTySize != 0 ||
- (AllocElTySize*ArrayOffset ) % CastElTySize != 0) return nullptr;
-
- // We don't currently support arrays of scalable types.
- assert(!AllocIsScalable || (ArrayOffset == 1 && ArraySizeScale == 0));
-
- unsigned Scale = (AllocElTySize*ArraySizeScale)/CastElTySize;
- Value *Amt = nullptr;
- if (Scale == 1) {
- Amt = NumElements;
- } else {
- Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
- // Insert before the alloca, not before the cast.
- Amt = Builder.CreateMul(Amt, NumElements);
- }
-
- if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
- Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
- Offset, true);
- Amt = Builder.CreateAdd(Amt, Off);
- }
-
- AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt);
- New->setAlignment(AI.getAlign());
- New->takeName(&AI);
- New->setUsedWithInAlloca(AI.isUsedWithInAlloca());
- New->setMetadata(LLVMContext::MD_DIAssignID,
- AI.getMetadata(LLVMContext::MD_DIAssignID));
-
- replaceAllDbgUsesWith(AI, *New, *New, DT);
-
- // If the allocation has multiple real uses, insert a cast and change all
- // things that used it to use the new cast. This will also hack on CI, but it
- // will die soon.
- if (!AI.hasOneUse()) {
- // New is the allocation instruction, pointer typed. AI is the original
- // allocation instruction, also pointer typed. Thus, cast to use is BitCast.
- Value *NewCast = Builder.CreateBitCast(New, AI.getType(), "tmpcast");
- replaceInstUsesWith(AI, NewCast);
- eraseInstFromFunction(AI);
- }
- return replaceInstUsesWith(CI, New);
-}
-
/// Given an expression that CanEvaluateTruncated or CanEvaluateSExtd returns
/// true for, actually insert the code to evaluate the expression.
Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
@@ -252,6 +92,20 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
Res = CastInst::Create(
static_cast<Instruction::CastOps>(Opc), I->getOperand(0), Ty);
break;
+ case Instruction::Call:
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unsupported call!");
+ case Intrinsic::vscale: {
+ Function *Fn =
+ Intrinsic::getDeclaration(I->getModule(), Intrinsic::vscale, {Ty});
+ Res = CallInst::Create(Fn->getFunctionType(), Fn);
+ break;
+ }
+ }
+ }
+ break;
default:
// TODO: Can handle more cases here.
llvm_unreachable("Unreachable!");
@@ -294,6 +148,10 @@ Instruction *InstCombinerImpl::commonCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);
Type *Ty = CI.getType();
+ if (auto *SrcC = dyn_cast<Constant>(Src))
+ if (Constant *Res = ConstantFoldCastOperand(CI.getOpcode(), SrcC, Ty, DL))
+ return replaceInstUsesWith(CI, Res);
+
// Try to eliminate a cast of a cast.
if (auto *CSrc = dyn_cast<CastInst>(Src)) { // A->B->C cast
if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
@@ -501,16 +359,12 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
// If the integer type can hold the max FP value, it is safe to cast
// directly to that type. Otherwise, we may create poison via overflow
// that did not exist in the original code.
- //
- // The max FP value is pow(2, MaxExponent) * (1 + MaxFraction), so we need
- // at least one more bit than the MaxExponent to hold the max FP value.
Type *InputTy = I->getOperand(0)->getType()->getScalarType();
const fltSemantics &Semantics = InputTy->getFltSemantics();
- uint32_t MinBitWidth = APFloatBase::semanticsMaxExponent(Semantics);
- // Extra sign bit needed.
- if (I->getOpcode() == Instruction::FPToSI)
- ++MinBitWidth;
- return Ty->getScalarSizeInBits() > MinBitWidth;
+ uint32_t MinBitWidth =
+ APFloatBase::semanticsIntSizeInBits(Semantics,
+ I->getOpcode() == Instruction::FPToSI);
+ return Ty->getScalarSizeInBits() >= MinBitWidth;
}
default:
// TODO: Can handle more cases here.
@@ -881,13 +735,12 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
Value *And = Builder.CreateAnd(X, MaskC);
return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
}
- if (match(Src, m_OneUse(m_c_Or(m_LShr(m_Value(X), m_Constant(C)),
+ if (match(Src, m_OneUse(m_c_Or(m_LShr(m_Value(X), m_ImmConstant(C)),
m_Deferred(X))))) {
// trunc (or (lshr X, C), X) to i1 --> icmp ne (and X, C'), 0
Constant *One = ConstantInt::get(SrcTy, APInt(SrcWidth, 1));
Constant *MaskC = ConstantExpr::getShl(One, C);
- MaskC = ConstantExpr::getOr(MaskC, One);
- Value *And = Builder.CreateAnd(X, MaskC);
+ Value *And = Builder.CreateAnd(X, Builder.CreateOr(MaskC, One));
return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
}
}
@@ -904,11 +757,18 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
// removed by the trunc.
if (match(C, m_SpecificInt_ICMP(ICmpInst::ICMP_ULE,
APInt(SrcWidth, MaxShiftAmt)))) {
+ auto GetNewShAmt = [&](unsigned Width) {
+ Constant *MaxAmt = ConstantInt::get(SrcTy, Width - 1, false);
+ Constant *Cmp =
+ ConstantFoldCompareInstOperands(ICmpInst::ICMP_ULT, C, MaxAmt, DL);
+ Constant *ShAmt = ConstantFoldSelectInstruction(Cmp, C, MaxAmt);
+ return ConstantFoldCastOperand(Instruction::Trunc, ShAmt, A->getType(),
+ DL);
+ };
+
// trunc (lshr (sext A), C) --> ashr A, C
if (A->getType() == DestTy) {
- Constant *MaxAmt = ConstantInt::get(SrcTy, DestWidth - 1, false);
- Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
- ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
+ Constant *ShAmt = GetNewShAmt(DestWidth);
ShAmt = Constant::mergeUndefsWith(ShAmt, C);
return IsExact ? BinaryOperator::CreateExactAShr(A, ShAmt)
: BinaryOperator::CreateAShr(A, ShAmt);
@@ -916,9 +776,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
// The types are mismatched, so create a cast after shifting:
// trunc (lshr (sext A), C) --> sext/trunc (ashr A, C)
if (Src->hasOneUse()) {
- Constant *MaxAmt = ConstantInt::get(SrcTy, AWidth - 1, false);
- Constant *ShAmt = ConstantExpr::getUMin(C, MaxAmt);
- ShAmt = ConstantExpr::getTrunc(ShAmt, A->getType());
+ Constant *ShAmt = GetNewShAmt(AWidth);
Value *Shift = Builder.CreateAShr(A, ShAmt, "", IsExact);
return CastInst::CreateIntegerCast(Shift, DestTy, true);
}
@@ -998,7 +856,7 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
}
}
- if (match(Src, m_VScale(DL))) {
+ if (match(Src, m_VScale())) {
if (Trunc.getFunction() &&
Trunc.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
Attribute Attr =
@@ -1217,6 +1075,13 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
return false;
return true;
}
+ case Instruction::Call:
+ // llvm.vscale() can always be executed in larger type, because the
+ // value is automatically zero-extended.
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() == Intrinsic::vscale)
+ return true;
+ return false;
default:
// TODO: Can handle more cases here.
return false;
@@ -1226,7 +1091,8 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
// If this zero extend is only used by a truncate, let the truncate be
// eliminated before we try to optimize this zext.
- if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()))
+ if (Zext.hasOneUse() && isa<TruncInst>(Zext.user_back()) &&
+ !isa<Constant>(Zext.getOperand(0)))
return nullptr;
// If one of the common conversion will work, do it.
@@ -1340,7 +1206,7 @@ Instruction *InstCombinerImpl::visitZExt(ZExtInst &Zext) {
return BinaryOperator::CreateAnd(X, ZextC);
}
- if (match(Src, m_VScale(DL))) {
+ if (match(Src, m_VScale())) {
if (Zext.getFunction() &&
Zext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
Attribute Attr =
@@ -1402,7 +1268,7 @@ Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) {
// sext ((x & 2^n) == 0) -> (x >> n) - 1
// sext ((x & 2^n) != 2^n) -> (x >> n) - 1
- unsigned ShiftAmt = KnownZeroMask.countTrailingZeros();
+ unsigned ShiftAmt = KnownZeroMask.countr_zero();
// Perform a right shift to place the desired bit in the LSB.
if (ShiftAmt)
In = Builder.CreateLShr(In,
@@ -1416,7 +1282,7 @@ Instruction *InstCombinerImpl::transformSExtICmp(ICmpInst *Cmp,
} else {
// sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1
// sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1
- unsigned ShiftAmt = KnownZeroMask.countLeadingZeros();
+ unsigned ShiftAmt = KnownZeroMask.countl_zero();
// Perform a left shift to place the desired bit in the MSB.
if (ShiftAmt)
In = Builder.CreateShl(In,
@@ -1611,7 +1477,7 @@ Instruction *InstCombinerImpl::visitSExt(SExtInst &Sext) {
}
}
- if (match(Src, m_VScale(DL))) {
+ if (match(Src, m_VScale())) {
if (Sext.getFunction() &&
Sext.getFunction()->hasFnAttribute(Attribute::VScaleRange)) {
Attribute Attr =
@@ -2687,57 +2553,6 @@ Instruction *InstCombinerImpl::optimizeBitCastFromPhi(CastInst &CI,
return RetVal;
}
-static Instruction *convertBitCastToGEP(BitCastInst &CI, IRBuilderBase &Builder,
- const DataLayout &DL) {
- Value *Src = CI.getOperand(0);
- PointerType *SrcPTy = cast<PointerType>(Src->getType());
- PointerType *DstPTy = cast<PointerType>(CI.getType());
-
- // Bitcasts involving opaque pointers cannot be converted into a GEP.
- if (SrcPTy->isOpaque() || DstPTy->isOpaque())
- return nullptr;
-
- Type *DstElTy = DstPTy->getNonOpaquePointerElementType();
- Type *SrcElTy = SrcPTy->getNonOpaquePointerElementType();
-
- // When the type pointed to is not sized the cast cannot be
- // turned into a gep.
- if (!SrcElTy->isSized())
- return nullptr;
-
- // If the source and destination are pointers, and this cast is equivalent
- // to a getelementptr X, 0, 0, 0... turn it into the appropriate gep.
- // This can enhance SROA and other transforms that want type-safe pointers.
- unsigned NumZeros = 0;
- while (SrcElTy && SrcElTy != DstElTy) {
- SrcElTy = GetElementPtrInst::getTypeAtIndex(SrcElTy, (uint64_t)0);
- ++NumZeros;
- }
-
- // If we found a path from the src to dest, create the getelementptr now.
- if (SrcElTy == DstElTy) {
- SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
- GetElementPtrInst *GEP = GetElementPtrInst::Create(
- SrcPTy->getNonOpaquePointerElementType(), Src, Idxs);
-
- // If the source pointer is dereferenceable, then assume it points to an
- // allocated object and apply "inbounds" to the GEP.
- bool CanBeNull, CanBeFreed;
- if (Src->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed)) {
- // In a non-default address space (not 0), a null pointer can not be
- // assumed inbounds, so ignore that case (dereferenceable_or_null).
- // The reason is that 'null' is not treated differently in these address
- // spaces, and we consequently ignore the 'gep inbounds' special case
- // for 'null' which allows 'inbounds' on 'null' if the indices are
- // zeros.
- if (SrcPTy->getAddressSpace() == 0 || !CanBeNull)
- GEP->setIsInBounds();
- }
- return GEP;
- }
- return nullptr;
-}
-
Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
// If the operands are integer typed then apply the integer transforms,
// otherwise just apply the common ones.
@@ -2750,19 +2565,6 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
if (DestTy == Src->getType())
return replaceInstUsesWith(CI, Src);
- if (isa<PointerType>(SrcTy) && isa<PointerType>(DestTy)) {
- // If we are casting a alloca to a pointer to a type of the same
- // size, rewrite the allocation instruction to allocate the "right" type.
- // There is no need to modify malloc calls because it is their bitcast that
- // needs to be cleaned up.
- if (AllocaInst *AI = dyn_cast<AllocaInst>(Src))
- if (Instruction *V = PromoteCastOfAllocation(CI, *AI))
- return V;
-
- if (Instruction *I = convertBitCastToGEP(CI, Builder, DL))
- return I;
- }
-
if (FixedVectorType *DestVTy = dyn_cast<FixedVectorType>(DestTy)) {
// Beware: messing with this target-specific oddity may cause trouble.
if (DestVTy->getNumElements() == 1 && SrcTy->isX86_MMXTy()) {
@@ -2905,23 +2707,5 @@ Instruction *InstCombinerImpl::visitBitCast(BitCastInst &CI) {
}
Instruction *InstCombinerImpl::visitAddrSpaceCast(AddrSpaceCastInst &CI) {
- // If the destination pointer element type is not the same as the source's
- // first do a bitcast to the destination type, and then the addrspacecast.
- // This allows the cast to be exposed to other transforms.
- Value *Src = CI.getOperand(0);
- PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType());
- PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType());
-
- if (!SrcTy->hasSameElementTypeAs(DestTy)) {
- Type *MidTy =
- PointerType::getWithSamePointeeType(DestTy, SrcTy->getAddressSpace());
- // Handle vectors of pointers.
- if (VectorType *VT = dyn_cast<VectorType>(CI.getType()))
- MidTy = VectorType::get(MidTy, VT->getElementCount());
-
- Value *NewBitCast = Builder.CreateBitCast(Src, MidTy);
- return new AddrSpaceCastInst(NewBitCast, CI.getType());
- }
-
return commonPointerCastTransforms(CI);
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 1480a0ff9e2f..656f04370e17 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -14,6 +14,7 @@
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/CmpInstAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -198,7 +199,11 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
}
// If the element is masked, handle it.
- if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
+ if (AndCst) {
+ Elt = ConstantFoldBinaryOpOperands(Instruction::And, Elt, AndCst, DL);
+ if (!Elt)
+ return nullptr;
+ }
// Find out if the comparison would be true or false for the i'th element.
Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
@@ -276,14 +281,14 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// order the state machines in complexity of the generated code.
Value *Idx = GEP->getOperand(2);
- // If the index is larger than the pointer size of the target, truncate the
- // index down like the GEP would do implicitly. We don't have to do this for
- // an inbounds GEP because the index can't be out of range.
+ // If the index is larger than the pointer offset size of the target, truncate
+ // the index down like the GEP would do implicitly. We don't have to do this
+ // for an inbounds GEP because the index can't be out of range.
if (!GEP->isInBounds()) {
- Type *IntPtrTy = DL.getIntPtrType(GEP->getType());
- unsigned PtrSize = IntPtrTy->getIntegerBitWidth();
- if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > PtrSize)
- Idx = Builder.CreateTrunc(Idx, IntPtrTy);
+ Type *PtrIdxTy = DL.getIndexType(GEP->getType());
+ unsigned OffsetSize = PtrIdxTy->getIntegerBitWidth();
+ if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > OffsetSize)
+ Idx = Builder.CreateTrunc(Idx, PtrIdxTy);
}
// If inbounds keyword is not present, Idx * ElementSize can overflow.
@@ -295,10 +300,10 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
unsigned ElementSize =
DL.getTypeAllocSize(Init->getType()->getArrayElementType());
- auto MaskIdx = [&](Value* Idx){
- if (!GEP->isInBounds() && countTrailingZeros(ElementSize) != 0) {
+ auto MaskIdx = [&](Value *Idx) {
+ if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) {
Value *Mask = ConstantInt::get(Idx->getType(), -1);
- Mask = Builder.CreateLShr(Mask, countTrailingZeros(ElementSize));
+ Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize));
Idx = Builder.CreateAnd(Idx, Mask);
}
return Idx;
@@ -533,7 +538,8 @@ static void setInsertionPoint(IRBuilder<> &Builder, Value *V,
/// pointer.
static Value *rewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,
const DataLayout &DL,
- SetVector<Value *> &Explored) {
+ SetVector<Value *> &Explored,
+ InstCombiner &IC) {
// Perform all the substitutions. This is a bit tricky because we can
// have cycles in our use-def chains.
// 1. Create the PHI nodes without any incoming values.
@@ -562,7 +568,7 @@ static Value *rewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,
// Create all the other instructions.
for (Value *Val : Explored) {
- if (NewInsts.find(Val) != NewInsts.end())
+ if (NewInsts.contains(Val))
continue;
if (auto *CI = dyn_cast<CastInst>(Val)) {
@@ -610,7 +616,7 @@ static Value *rewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,
for (unsigned I = 0, E = PHI->getNumIncomingValues(); I < E; ++I) {
Value *NewIncoming = PHI->getIncomingValue(I);
- if (NewInsts.find(NewIncoming) != NewInsts.end())
+ if (NewInsts.contains(NewIncoming))
NewIncoming = NewInsts[NewIncoming];
NewPhi->addIncoming(NewIncoming, PHI->getIncomingBlock(I));
@@ -635,7 +641,10 @@ static Value *rewriteGEPAsOffset(Type *ElemTy, Value *Start, Value *Base,
Val->getName() + ".ptr");
NewVal = Builder.CreateBitOrPointerCast(
NewVal, Val->getType(), Val->getName() + ".conv");
- Val->replaceAllUsesWith(NewVal);
+ IC.replaceInstUsesWith(*cast<Instruction>(Val), NewVal);
+ // Add old instruction to worklist for DCE. We don't directly remove it
+ // here because the original compare is one of the users.
+ IC.addToWorklist(cast<Instruction>(Val));
}
return NewInsts[Start];
@@ -688,7 +697,8 @@ getAsConstantIndexedAddress(Type *ElemTy, Value *V, const DataLayout &DL) {
/// between GEPLHS and RHS.
static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
ICmpInst::Predicate Cond,
- const DataLayout &DL) {
+ const DataLayout &DL,
+ InstCombiner &IC) {
// FIXME: Support vector of pointers.
if (GEPLHS->getType()->isVectorTy())
return nullptr;
@@ -712,7 +722,7 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
// can't have overflow on either side. We can therefore re-write
// this as:
// OFFSET1 cmp OFFSET2
- Value *NewRHS = rewriteGEPAsOffset(ElemTy, RHS, PtrBase, DL, Nodes);
+ Value *NewRHS = rewriteGEPAsOffset(ElemTy, RHS, PtrBase, DL, Nodes, IC);
// RewriteGEPAsOffset has replaced RHS and all of its uses with a re-written
// GEP having PtrBase as the pointer base, and has returned in NewRHS the
@@ -740,7 +750,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
RHS = RHS->stripPointerCasts();
Value *PtrBase = GEPLHS->getOperand(0);
- if (PtrBase == RHS && GEPLHS->isInBounds()) {
+ if (PtrBase == RHS && (GEPLHS->isInBounds() || ICmpInst::isEquality(Cond))) {
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
Value *Offset = EmitGEPOffset(GEPLHS);
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
@@ -831,7 +841,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// Otherwise, the base pointers are different and the indices are
// different. Try convert this to an indexed compare by looking through
// PHIs/casts.
- return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
+ return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this);
}
// If one of the GEPs has all zero indices, recurse.
@@ -883,7 +893,8 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// Only lower this if the icmp is the only user of the GEP or if we expect
// the result to fold to a constant!
- if (GEPsInBounds && (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
+ if ((GEPsInBounds || CmpInst::isEquality(Cond)) &&
+ (isa<ConstantExpr>(GEPLHS) || GEPLHS->hasOneUse()) &&
(isa<ConstantExpr>(GEPRHS) || GEPRHS->hasOneUse())) {
// ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2)
Value *L = EmitGEPOffset(GEPLHS);
@@ -894,13 +905,10 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
// Try convert this to an indexed compare by looking through PHIs/casts as a
// last resort.
- return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
+ return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this);
}
-Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI,
- const AllocaInst *Alloca) {
- assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
-
+bool InstCombinerImpl::foldAllocaCmp(AllocaInst *Alloca) {
// It would be tempting to fold away comparisons between allocas and any
// pointer not based on that alloca (e.g. an argument). However, even
// though such pointers cannot alias, they can still compare equal.
@@ -909,67 +917,72 @@ Instruction *InstCombinerImpl::foldAllocaCmp(ICmpInst &ICI,
// doesn't escape we can argue that it's impossible to guess its value, and we
// can therefore act as if any such guesses are wrong.
//
- // The code below checks that the alloca doesn't escape, and that it's only
- // used in a comparison once (the current instruction). The
- // single-comparison-use condition ensures that we're trivially folding all
- // comparisons against the alloca consistently, and avoids the risk of
- // erroneously folding a comparison of the pointer with itself.
-
- unsigned MaxIter = 32; // Break cycles and bound to constant-time.
+ // However, we need to ensure that this folding is consistent: We can't fold
+ // one comparison to false, and then leave a different comparison against the
+ // same value alone (as it might evaluate to true at runtime, leading to a
+ // contradiction). As such, this code ensures that all comparisons are folded
+ // at the same time, and there are no other escapes.
+
+ struct CmpCaptureTracker : public CaptureTracker {
+ AllocaInst *Alloca;
+ bool Captured = false;
+ /// The value of the map is a bit mask of which icmp operands the alloca is
+ /// used in.
+ SmallMapVector<ICmpInst *, unsigned, 4> ICmps;
+
+ CmpCaptureTracker(AllocaInst *Alloca) : Alloca(Alloca) {}
+
+ void tooManyUses() override { Captured = true; }
+
+ bool captured(const Use *U) override {
+ auto *ICmp = dyn_cast<ICmpInst>(U->getUser());
+ // We need to check that U is based *only* on the alloca, and doesn't
+ // have other contributions from a select/phi operand.
+ // TODO: We could check whether getUnderlyingObjects() reduces to one
+ // object, which would allow looking through phi nodes.
+ if (ICmp && ICmp->isEquality() && getUnderlyingObject(*U) == Alloca) {
+ // Collect equality icmps of the alloca, and don't treat them as
+ // captures.
+ auto Res = ICmps.insert({ICmp, 0});
+ Res.first->second |= 1u << U->getOperandNo();
+ return false;
+ }
- SmallVector<const Use *, 32> Worklist;
- for (const Use &U : Alloca->uses()) {
- if (Worklist.size() >= MaxIter)
- return nullptr;
- Worklist.push_back(&U);
- }
+ Captured = true;
+ return true;
+ }
+ };
- unsigned NumCmps = 0;
- while (!Worklist.empty()) {
- assert(Worklist.size() <= MaxIter);
- const Use *U = Worklist.pop_back_val();
- const Value *V = U->getUser();
- --MaxIter;
+ CmpCaptureTracker Tracker(Alloca);
+ PointerMayBeCaptured(Alloca, &Tracker);
+ if (Tracker.Captured)
+ return false;
- if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) ||
- isa<SelectInst>(V)) {
- // Track the uses.
- } else if (isa<LoadInst>(V)) {
- // Loading from the pointer doesn't escape it.
- continue;
- } else if (const auto *SI = dyn_cast<StoreInst>(V)) {
- // Storing *to* the pointer is fine, but storing the pointer escapes it.
- if (SI->getValueOperand() == U->get())
- return nullptr;
- continue;
- } else if (isa<ICmpInst>(V)) {
- if (NumCmps++)
- return nullptr; // Found more than one cmp.
- continue;
- } else if (const auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
- switch (Intrin->getIntrinsicID()) {
- // These intrinsics don't escape or compare the pointer. Memset is safe
- // because we don't allow ptrtoint. Memcpy and memmove are safe because
- // we don't allow stores, so src cannot point to V.
- case Intrinsic::lifetime_start: case Intrinsic::lifetime_end:
- case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset:
- continue;
- default:
- return nullptr;
- }
- } else {
- return nullptr;
+ bool Changed = false;
+ for (auto [ICmp, Operands] : Tracker.ICmps) {
+ switch (Operands) {
+ case 1:
+ case 2: {
+ // The alloca is only used in one icmp operand. Assume that the
+ // equality is false.
+ auto *Res = ConstantInt::get(
+ ICmp->getType(), ICmp->getPredicate() == ICmpInst::ICMP_NE);
+ replaceInstUsesWith(*ICmp, Res);
+ eraseInstFromFunction(*ICmp);
+ Changed = true;
+ break;
}
- for (const Use &U : V->uses()) {
- if (Worklist.size() >= MaxIter)
- return nullptr;
- Worklist.push_back(&U);
+ case 3:
+ // Both icmp operands are based on the alloca, so this is comparing
+ // pointer offsets, without leaking any information about the address
+ // of the alloca. Ignore such comparisons.
+ break;
+ default:
+ llvm_unreachable("Cannot happen");
}
}
- auto *Res = ConstantInt::get(ICI.getType(),
- !CmpInst::isTrueWhenEqual(ICI.getPredicate()));
- return replaceInstUsesWith(ICI, Res);
+ return Changed;
}
/// Fold "icmp pred (X+C), X".
@@ -1058,9 +1071,9 @@ Instruction *InstCombinerImpl::foldICmpShrConstConst(ICmpInst &I, Value *A,
int Shift;
if (IsAShr && AP1.isNegative())
- Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
+ Shift = AP1.countl_one() - AP2.countl_one();
else
- Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
+ Shift = AP1.countl_zero() - AP2.countl_zero();
if (Shift > 0) {
if (IsAShr && AP1 == AP2.ashr(Shift)) {
@@ -1097,7 +1110,7 @@ Instruction *InstCombinerImpl::foldICmpShlConstConst(ICmpInst &I, Value *A,
if (AP2.isZero())
return nullptr;
- unsigned AP2TrailingZeros = AP2.countTrailingZeros();
+ unsigned AP2TrailingZeros = AP2.countr_zero();
if (!AP1 && AP2TrailingZeros != 0)
return getICmp(
@@ -1108,7 +1121,7 @@ Instruction *InstCombinerImpl::foldICmpShlConstConst(ICmpInst &I, Value *A,
return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
// Get the distance between the lowest bits that are set.
- int Shift = AP1.countTrailingZeros() - AP2TrailingZeros;
+ int Shift = AP1.countr_zero() - AP2TrailingZeros;
if (Shift > 0 && AP2.shl(Shift) == AP1)
return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
@@ -1143,7 +1156,7 @@ static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
// If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
if (!CI2->getValue().isPowerOf2())
return nullptr;
- unsigned NewWidth = CI2->getValue().countTrailingZeros();
+ unsigned NewWidth = CI2->getValue().countr_zero();
if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31)
return nullptr;
@@ -1295,6 +1308,48 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
return new ICmpInst(Pred, X, Cmp.getOperand(1));
}
+ // (icmp eq/ne (mul X Y)) -> (icmp eq/ne X/Y) if we know about whether X/Y are
+ // odd/non-zero/there is no overflow.
+ if (match(Cmp.getOperand(0), m_Mul(m_Value(X), m_Value(Y))) &&
+ ICmpInst::isEquality(Pred)) {
+
+ KnownBits XKnown = computeKnownBits(X, 0, &Cmp);
+ // if X % 2 != 0
+ // (icmp eq/ne Y)
+ if (XKnown.countMaxTrailingZeros() == 0)
+ return new ICmpInst(Pred, Y, Cmp.getOperand(1));
+
+ KnownBits YKnown = computeKnownBits(Y, 0, &Cmp);
+ // if Y % 2 != 0
+ // (icmp eq/ne X)
+ if (YKnown.countMaxTrailingZeros() == 0)
+ return new ICmpInst(Pred, X, Cmp.getOperand(1));
+
+ auto *BO0 = cast<OverflowingBinaryOperator>(Cmp.getOperand(0));
+ if (BO0->hasNoUnsignedWrap() || BO0->hasNoSignedWrap()) {
+ const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
+ // `isKnownNonZero` does more analysis than just `!KnownBits.One.isZero()`
+ // but to avoid unnecessary work, first just if this is an obvious case.
+
+ // if X non-zero and NoOverflow(X * Y)
+ // (icmp eq/ne Y)
+ if (!XKnown.One.isZero() || isKnownNonZero(X, DL, 0, Q.AC, Q.CxtI, Q.DT))
+ return new ICmpInst(Pred, Y, Cmp.getOperand(1));
+
+ // if Y non-zero and NoOverflow(X * Y)
+ // (icmp eq/ne X)
+ if (!YKnown.One.isZero() || isKnownNonZero(Y, DL, 0, Q.AC, Q.CxtI, Q.DT))
+ return new ICmpInst(Pred, X, Cmp.getOperand(1));
+ }
+ // Note, we are skipping cases:
+ // if Y % 2 != 0 AND X % 2 != 0
+ // (false/true)
+ // if X non-zero and Y non-zero and NoOverflow(X * Y)
+ // (false/true)
+ // Those can be simplified later as we would have already replaced the (icmp
+ // eq/ne (mul X, Y)) with (icmp eq/ne X/Y) and if X/Y is known non-zero that
+ // will fold to a constant elsewhere.
+ }
return nullptr;
}
@@ -1331,17 +1386,18 @@ Instruction *InstCombinerImpl::foldICmpWithConstant(ICmpInst &Cmp) {
if (auto *Phi = dyn_cast<PHINode>(Op0))
if (all_of(Phi->operands(), [](Value *V) { return isa<Constant>(V); })) {
- Type *Ty = Cmp.getType();
- Builder.SetInsertPoint(Phi);
- PHINode *NewPhi =
- Builder.CreatePHI(Ty, Phi->getNumOperands());
- for (BasicBlock *Predecessor : predecessors(Phi->getParent())) {
- auto *Input =
- cast<Constant>(Phi->getIncomingValueForBlock(Predecessor));
- auto *BoolInput = ConstantExpr::getCompare(Pred, Input, C);
- NewPhi->addIncoming(BoolInput, Predecessor);
+ SmallVector<Constant *> Ops;
+ for (Value *V : Phi->incoming_values()) {
+ Constant *Res =
+ ConstantFoldCompareInstOperands(Pred, cast<Constant>(V), C, DL);
+ if (!Res)
+ return nullptr;
+ Ops.push_back(Res);
}
- NewPhi->takeName(&Cmp);
+ Builder.SetInsertPoint(Phi);
+ PHINode *NewPhi = Builder.CreatePHI(Cmp.getType(), Phi->getNumOperands());
+ for (auto [V, Pred] : zip(Ops, Phi->blocks()))
+ NewPhi->addIncoming(V, Pred);
return replaceInstUsesWith(Cmp, NewPhi);
}
@@ -1369,11 +1425,8 @@ Instruction *InstCombinerImpl::foldICmpWithDominatingICmp(ICmpInst &Cmp) {
if (TrueBB == FalseBB)
return nullptr;
- // Try to simplify this compare to T/F based on the dominating condition.
- std::optional<bool> Imp =
- isImpliedCondition(DomCond, &Cmp, DL, TrueBB == CmpBB);
- if (Imp)
- return replaceInstUsesWith(Cmp, ConstantInt::get(Cmp.getType(), *Imp));
+ // We already checked simple implication in InstSimplify, only handle complex
+ // cases here.
CmpInst::Predicate Pred = Cmp.getPredicate();
Value *X = Cmp.getOperand(0), *Y = Cmp.getOperand(1);
@@ -1475,7 +1528,7 @@ Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
KnownBits Known = computeKnownBits(X, 0, &Cmp);
// If all the high bits are known, we can do this xform.
- if ((Known.Zero | Known.One).countLeadingOnes() >= SrcBits - DstBits) {
+ if ((Known.Zero | Known.One).countl_one() >= SrcBits - DstBits) {
// Pull in the high bits from known-ones set.
APInt NewRHS = C.zext(SrcBits);
NewRHS |= Known.One & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
@@ -1781,17 +1834,12 @@ Instruction *InstCombinerImpl::foldICmpAndConstConst(ICmpInst &Cmp,
++UsesRemoved;
// Compute A & ((1 << B) | 1)
- Value *NewOr = nullptr;
- if (auto *C = dyn_cast<Constant>(B)) {
- if (UsesRemoved >= 1)
- NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
- } else {
- if (UsesRemoved >= 3)
- NewOr = Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(),
- /*HasNUW=*/true),
- One, Or->getName());
- }
- if (NewOr) {
+ unsigned RequireUsesRemoved = match(B, m_ImmConstant()) ? 1 : 3;
+ if (UsesRemoved >= RequireUsesRemoved) {
+ Value *NewOr =
+ Builder.CreateOr(Builder.CreateShl(One, B, LShr->getName(),
+ /*HasNUW=*/true),
+ One, Or->getName());
Value *NewAnd = Builder.CreateAnd(A, NewOr, And->getName());
return replaceOperand(Cmp, 0, NewAnd);
}
@@ -1819,6 +1867,15 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
auto NewPred = TrueIfNeg ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;
return new ICmpInst(NewPred, X, ConstantInt::getNullValue(X->getType()));
}
+ // (X & X) < 0 --> X == MinSignedC
+ // (X & X) > -1 --> X != MinSignedC
+ if (match(And, m_c_And(m_Neg(m_Value(X)), m_Deferred(X)))) {
+ Constant *MinSignedC = ConstantInt::get(
+ X->getType(),
+ APInt::getSignedMinValue(X->getType()->getScalarSizeInBits()));
+ auto NewPred = TrueIfNeg ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE;
+ return new ICmpInst(NewPred, X, MinSignedC);
+ }
}
// TODO: These all require that Y is constant too, so refactor with the above.
@@ -1846,6 +1903,30 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
}
+ // If we are testing the intersection of 2 select-of-nonzero-constants with no
+ // common bits set, it's the same as checking if exactly one select condition
+ // is set:
+ // ((A ? TC : FC) & (B ? TC : FC)) == 0 --> xor A, B
+ // ((A ? TC : FC) & (B ? TC : FC)) != 0 --> not(xor A, B)
+ // TODO: Generalize for non-constant values.
+ // TODO: Handle signed/unsigned predicates.
+ // TODO: Handle other bitwise logic connectors.
+ // TODO: Extend to handle a non-zero compare constant.
+ if (C.isZero() && (Pred == CmpInst::ICMP_EQ || And->hasOneUse())) {
+ assert(Cmp.isEquality() && "Not expecting non-equality predicates");
+ Value *A, *B;
+ const APInt *TC, *FC;
+ if (match(X, m_Select(m_Value(A), m_APInt(TC), m_APInt(FC))) &&
+ match(Y,
+ m_Select(m_Value(B), m_SpecificInt(*TC), m_SpecificInt(*FC))) &&
+ !TC->isZero() && !FC->isZero() && !TC->intersects(*FC)) {
+ Value *R = Builder.CreateXor(A, B);
+ if (Pred == CmpInst::ICMP_NE)
+ R = Builder.CreateNot(R);
+ return replaceInstUsesWith(Cmp, R);
+ }
+ }
+
// ((zext i1 X) & Y) == 0 --> !((trunc Y) & X)
// ((zext i1 X) & Y) != 0 --> ((trunc Y) & X)
// ((zext i1 X) & Y) == 1 --> ((trunc Y) & X)
@@ -1863,6 +1944,59 @@ Instruction *InstCombinerImpl::foldICmpAndConstant(ICmpInst &Cmp,
return nullptr;
}
+/// Fold icmp eq/ne (or (xor (X1, X2), xor(X3, X4))), 0.
+static Value *foldICmpOrXorChain(ICmpInst &Cmp, BinaryOperator *Or,
+ InstCombiner::BuilderTy &Builder) {
+ // Are we using xors to bitwise check for a pair or pairs of (in)equalities?
+ // Convert to a shorter form that has more potential to be folded even
+ // further.
+ // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)
+ // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)
+ // ((X1 ^ X2) || (X3 ^ X4) || (X5 ^ X6)) == 0 -->
+ // (X1 == X2) && (X3 == X4) && (X5 == X6)
+ // ((X1 ^ X2) || (X3 ^ X4) || (X5 ^ X6)) != 0 -->
+ // (X1 != X2) || (X3 != X4) || (X5 != X6)
+ // TODO: Implement for sub
+ SmallVector<std::pair<Value *, Value *>, 2> CmpValues;
+ SmallVector<Value *, 16> WorkList(1, Or);
+
+ while (!WorkList.empty()) {
+ auto MatchOrOperatorArgument = [&](Value *OrOperatorArgument) {
+ Value *Lhs, *Rhs;
+
+ if (match(OrOperatorArgument,
+ m_OneUse(m_Xor(m_Value(Lhs), m_Value(Rhs))))) {
+ CmpValues.emplace_back(Lhs, Rhs);
+ } else {
+ WorkList.push_back(OrOperatorArgument);
+ }
+ };
+
+ Value *CurrentValue = WorkList.pop_back_val();
+ Value *OrOperatorLhs, *OrOperatorRhs;
+
+ if (!match(CurrentValue,
+ m_Or(m_Value(OrOperatorLhs), m_Value(OrOperatorRhs)))) {
+ return nullptr;
+ }
+
+ MatchOrOperatorArgument(OrOperatorRhs);
+ MatchOrOperatorArgument(OrOperatorLhs);
+ }
+
+ ICmpInst::Predicate Pred = Cmp.getPredicate();
+ auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
+ Value *LhsCmp = Builder.CreateICmp(Pred, CmpValues.rbegin()->first,
+ CmpValues.rbegin()->second);
+
+ for (auto It = CmpValues.rbegin() + 1; It != CmpValues.rend(); ++It) {
+ Value *RhsCmp = Builder.CreateICmp(Pred, It->first, It->second);
+ LhsCmp = Builder.CreateBinOp(BOpc, LhsCmp, RhsCmp);
+ }
+
+ return LhsCmp;
+}
+
/// Fold icmp (or X, Y), C.
Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,
BinaryOperator *Or,
@@ -1909,6 +2043,30 @@ Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,
return new ICmpInst(NewPred, X, NewC);
}
+ const APInt *OrC;
+ // icmp(X | OrC, C) --> icmp(X, 0)
+ if (C.isNonNegative() && match(Or, m_Or(m_Value(X), m_APInt(OrC)))) {
+ switch (Pred) {
+ // X | OrC s< C --> X s< 0 iff OrC s>= C s>= 0
+ case ICmpInst::ICMP_SLT:
+ // X | OrC s>= C --> X s>= 0 iff OrC s>= C s>= 0
+ case ICmpInst::ICMP_SGE:
+ if (OrC->sge(C))
+ return new ICmpInst(Pred, X, ConstantInt::getNullValue(X->getType()));
+ break;
+ // X | OrC s<= C --> X s< 0 iff OrC s> C s>= 0
+ case ICmpInst::ICMP_SLE:
+ // X | OrC s> C --> X s>= 0 iff OrC s> C s>= 0
+ case ICmpInst::ICMP_SGT:
+ if (OrC->sgt(C))
+ return new ICmpInst(ICmpInst::getFlippedStrictnessPredicate(Pred), X,
+ ConstantInt::getNullValue(X->getType()));
+ break;
+ default:
+ break;
+ }
+ }
+
if (!Cmp.isEquality() || !C.isZero() || !Or->hasOneUse())
return nullptr;
@@ -1924,18 +2082,8 @@ Instruction *InstCombinerImpl::foldICmpOrConstant(ICmpInst &Cmp,
return BinaryOperator::Create(BOpc, CmpP, CmpQ);
}
- // Are we using xors to bitwise check for a pair of (in)equalities? Convert to
- // a shorter form that has more potential to be folded even further.
- Value *X1, *X2, *X3, *X4;
- if (match(OrOp0, m_OneUse(m_Xor(m_Value(X1), m_Value(X2)))) &&
- match(OrOp1, m_OneUse(m_Xor(m_Value(X3), m_Value(X4))))) {
- // ((X1 ^ X2) || (X3 ^ X4)) == 0 --> (X1 == X2) && (X3 == X4)
- // ((X1 ^ X2) || (X3 ^ X4)) != 0 --> (X1 != X2) || (X3 != X4)
- Value *Cmp12 = Builder.CreateICmp(Pred, X1, X2);
- Value *Cmp34 = Builder.CreateICmp(Pred, X3, X4);
- auto BOpc = Pred == CmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
- return BinaryOperator::Create(BOpc, Cmp12, Cmp34);
- }
+ if (Value *V = foldICmpOrXorChain(Cmp, Or, Builder))
+ return replaceInstUsesWith(Cmp, V);
return nullptr;
}
@@ -1969,21 +2117,29 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp,
return new ICmpInst(Pred, X, ConstantInt::getNullValue(MulTy));
}
- if (MulC->isZero() || (!Mul->hasNoSignedWrap() && !Mul->hasNoUnsignedWrap()))
+ if (MulC->isZero())
return nullptr;
- // If the multiply does not wrap, try to divide the compare constant by the
- // multiplication factor.
+ // If the multiply does not wrap or the constant is odd, try to divide the
+ // compare constant by the multiplication factor.
if (Cmp.isEquality()) {
- // (mul nsw X, MulC) == C --> X == C /s MulC
+ // (mul nsw X, MulC) eq/ne C --> X eq/ne C /s MulC
if (Mul->hasNoSignedWrap() && C.srem(*MulC).isZero()) {
Constant *NewC = ConstantInt::get(MulTy, C.sdiv(*MulC));
return new ICmpInst(Pred, X, NewC);
}
- // (mul nuw X, MulC) == C --> X == C /u MulC
- if (Mul->hasNoUnsignedWrap() && C.urem(*MulC).isZero()) {
- Constant *NewC = ConstantInt::get(MulTy, C.udiv(*MulC));
- return new ICmpInst(Pred, X, NewC);
+
+ // C % MulC == 0 is weaker than we could use if MulC is odd because it
+ // correct to transform if MulC * N == C including overflow. I.e with i8
+ // (icmp eq (mul X, 5), 101) -> (icmp eq X, 225) but since 101 % 5 != 0, we
+ // miss that case.
+ if (C.urem(*MulC).isZero()) {
+ // (mul nuw X, MulC) eq/ne C --> X eq/ne C /u MulC
+ // (mul X, OddC) eq/ne N * C --> X eq/ne N
+ if ((*MulC & 1).isOne() || Mul->hasNoUnsignedWrap()) {
+ Constant *NewC = ConstantInt::get(MulTy, C.udiv(*MulC));
+ return new ICmpInst(Pred, X, NewC);
+ }
}
}
@@ -1992,27 +2148,32 @@ Instruction *InstCombinerImpl::foldICmpMulConstant(ICmpInst &Cmp,
// (X * MulC) > C --> X > (C / MulC)
// TODO: Assert that Pred is not equal to SGE, SLE, UGE, ULE?
Constant *NewC = nullptr;
- if (Mul->hasNoSignedWrap()) {
+ if (Mul->hasNoSignedWrap() && ICmpInst::isSigned(Pred)) {
// MININT / -1 --> overflow.
if (C.isMinSignedValue() && MulC->isAllOnes())
return nullptr;
if (MulC->isNegative())
Pred = ICmpInst::getSwappedPredicate(Pred);
- if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE)
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGE) {
NewC = ConstantInt::get(
MulTy, APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::UP));
- if (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_SGT)
+ } else {
+ assert((Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_SGT) &&
+ "Unexpected predicate");
NewC = ConstantInt::get(
MulTy, APIntOps::RoundingSDiv(C, *MulC, APInt::Rounding::DOWN));
- } else {
- assert(Mul->hasNoUnsignedWrap() && "Expected mul nuw");
- if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE)
+ }
+ } else if (Mul->hasNoUnsignedWrap() && ICmpInst::isUnsigned(Pred)) {
+ if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) {
NewC = ConstantInt::get(
MulTy, APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::UP));
- if (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT)
+ } else {
+ assert((Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_UGT) &&
+ "Unexpected predicate");
NewC = ConstantInt::get(
MulTy, APIntOps::RoundingUDiv(C, *MulC, APInt::Rounding::DOWN));
+ }
}
return NewC ? new ICmpInst(Pred, X, NewC) : nullptr;
@@ -2070,6 +2231,32 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal)))
return foldICmpShlConstConst(Cmp, Shl->getOperand(1), C, *ShiftVal);
+ ICmpInst::Predicate Pred = Cmp.getPredicate();
+ // (icmp pred (shl nuw&nsw X, Y), Csle0)
+ // -> (icmp pred X, Csle0)
+ //
+ // The idea is the nuw/nsw essentially freeze the sign bit for the shift op
+ // so X's must be what is used.
+ if (C.sle(0) && Shl->hasNoUnsignedWrap() && Shl->hasNoSignedWrap())
+ return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1));
+
+ // (icmp eq/ne (shl nuw|nsw X, Y), 0)
+ // -> (icmp eq/ne X, 0)
+ if (ICmpInst::isEquality(Pred) && C.isZero() &&
+ (Shl->hasNoUnsignedWrap() || Shl->hasNoSignedWrap()))
+ return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1));
+
+ // (icmp slt (shl nsw X, Y), 0/1)
+ // -> (icmp slt X, 0/1)
+ // (icmp sgt (shl nsw X, Y), 0/-1)
+ // -> (icmp sgt X, 0/-1)
+ //
+ // NB: sge/sle with a constant will canonicalize to sgt/slt.
+ if (Shl->hasNoSignedWrap() &&
+ (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SLT))
+ if (C.isZero() || (Pred == ICmpInst::ICMP_SGT ? C.isAllOnes() : C.isOne()))
+ return new ICmpInst(Pred, Shl->getOperand(0), Cmp.getOperand(1));
+
const APInt *ShiftAmt;
if (!match(Shl->getOperand(1), m_APInt(ShiftAmt)))
return foldICmpShlOne(Cmp, Shl, C);
@@ -2080,7 +2267,6 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
if (ShiftAmt->uge(TypeBits))
return nullptr;
- ICmpInst::Predicate Pred = Cmp.getPredicate();
Value *X = Shl->getOperand(0);
Type *ShType = Shl->getType();
@@ -2107,11 +2293,6 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
APInt ShiftedC = (C - 1).ashr(*ShiftAmt) + 1;
return new ICmpInst(Pred, X, ConstantInt::get(ShType, ShiftedC));
}
- // If this is a signed comparison to 0 and the shift is sign preserving,
- // use the shift LHS operand instead; isSignTest may change 'Pred', so only
- // do that if we're sure to not continue on in this function.
- if (isSignTest(Pred, C))
- return new ICmpInst(Pred, X, Constant::getNullValue(ShType));
}
// NUW guarantees that we are only shifting out zero bits from the high bits,
@@ -2189,7 +2370,7 @@ Instruction *InstCombinerImpl::foldICmpShlConstant(ICmpInst &Cmp,
// free on the target. It has the additional benefit of comparing to a
// smaller constant that may be more target-friendly.
unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1);
- if (Shl->hasOneUse() && Amt != 0 && C.countTrailingZeros() >= Amt &&
+ if (Shl->hasOneUse() && Amt != 0 && C.countr_zero() >= Amt &&
DL.isLegalInteger(TypeBits - Amt)) {
Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt);
if (auto *ShVTy = dyn_cast<VectorType>(ShType))
@@ -2237,9 +2418,8 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
assert(ShiftValC->uge(C) && "Expected simplify of compare");
assert((IsUGT || !C.isZero()) && "Expected X u< 0 to simplify");
- unsigned CmpLZ =
- IsUGT ? C.countLeadingZeros() : (C - 1).countLeadingZeros();
- unsigned ShiftLZ = ShiftValC->countLeadingZeros();
+ unsigned CmpLZ = IsUGT ? C.countl_zero() : (C - 1).countl_zero();
+ unsigned ShiftLZ = ShiftValC->countl_zero();
Constant *NewC = ConstantInt::get(Shr->getType(), CmpLZ - ShiftLZ);
auto NewPred = IsUGT ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
return new ICmpInst(NewPred, Shr->getOperand(1), NewC);
@@ -3184,18 +3364,30 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
}
break;
}
- case Instruction::And: {
- const APInt *BOC;
- if (match(BOp1, m_APInt(BOC))) {
- // If we have ((X & C) == C), turn it into ((X & C) != 0).
- if (C == *BOC && C.isPowerOf2())
- return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
- BO, Constant::getNullValue(RHS->getType()));
- }
- break;
- }
case Instruction::UDiv:
- if (C.isZero()) {
+ case Instruction::SDiv:
+ if (BO->isExact()) {
+ // div exact X, Y eq/ne 0 -> X eq/ne 0
+ // div exact X, Y eq/ne 1 -> X eq/ne Y
+ // div exact X, Y eq/ne C ->
+ // if Y * C never-overflow && OneUse:
+ // -> Y * C eq/ne X
+ if (C.isZero())
+ return new ICmpInst(Pred, BOp0, Constant::getNullValue(BO->getType()));
+ else if (C.isOne())
+ return new ICmpInst(Pred, BOp0, BOp1);
+ else if (BO->hasOneUse()) {
+ OverflowResult OR = computeOverflow(
+ Instruction::Mul, BO->getOpcode() == Instruction::SDiv, BOp1,
+ Cmp.getOperand(1), BO);
+ if (OR == OverflowResult::NeverOverflows) {
+ Value *YC =
+ Builder.CreateMul(BOp1, ConstantInt::get(BO->getType(), C));
+ return new ICmpInst(Pred, YC, BOp0);
+ }
+ }
+ }
+ if (BO->getOpcode() == Instruction::UDiv && C.isZero()) {
// (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
return new ICmpInst(NewPred, BOp1, BOp0);
@@ -3207,6 +3399,44 @@ Instruction *InstCombinerImpl::foldICmpBinOpEqualityWithConstant(
return nullptr;
}
+static Instruction *foldCtpopPow2Test(ICmpInst &I, IntrinsicInst *CtpopLhs,
+ const APInt &CRhs,
+ InstCombiner::BuilderTy &Builder,
+ const SimplifyQuery &Q) {
+ assert(CtpopLhs->getIntrinsicID() == Intrinsic::ctpop &&
+ "Non-ctpop intrin in ctpop fold");
+ if (!CtpopLhs->hasOneUse())
+ return nullptr;
+
+ // Power of 2 test:
+ // isPow2OrZero : ctpop(X) u< 2
+ // isPow2 : ctpop(X) == 1
+ // NotPow2OrZero: ctpop(X) u> 1
+ // NotPow2 : ctpop(X) != 1
+ // If we know any bit of X can be folded to:
+ // IsPow2 : X & (~Bit) == 0
+ // NotPow2 : X & (~Bit) != 0
+ const ICmpInst::Predicate Pred = I.getPredicate();
+ if (((I.isEquality() || Pred == ICmpInst::ICMP_UGT) && CRhs == 1) ||
+ (Pred == ICmpInst::ICMP_ULT && CRhs == 2)) {
+ Value *Op = CtpopLhs->getArgOperand(0);
+ KnownBits OpKnown = computeKnownBits(Op, Q.DL,
+ /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT);
+ // No need to check for count > 1, that should be already constant folded.
+ if (OpKnown.countMinPopulation() == 1) {
+ Value *And = Builder.CreateAnd(
+ Op, Constant::getIntegerValue(Op->getType(), ~(OpKnown.One)));
+ return new ICmpInst(
+ (Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_ULT)
+ ? ICmpInst::ICMP_EQ
+ : ICmpInst::ICMP_NE,
+ And, Constant::getNullValue(Op->getType()));
+ }
+ }
+
+ return nullptr;
+}
+
/// Fold an equality icmp with LLVM intrinsic and constant operand.
Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
ICmpInst &Cmp, IntrinsicInst *II, const APInt &C) {
@@ -3227,6 +3457,11 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
return new ICmpInst(Pred, II->getArgOperand(0),
ConstantInt::get(Ty, C.byteSwap()));
+ case Intrinsic::bitreverse:
+ // bitreverse(A) == C -> A == bitreverse(C)
+ return new ICmpInst(Pred, II->getArgOperand(0),
+ ConstantInt::get(Ty, C.reverseBits()));
+
case Intrinsic::ctlz:
case Intrinsic::cttz: {
// ctz(A) == bitwidth(A) -> A == 0 and likewise for !=
@@ -3277,15 +3512,22 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
}
break;
+ case Intrinsic::umax:
case Intrinsic::uadd_sat: {
// uadd.sat(a, b) == 0 -> (a | b) == 0
- if (C.isZero()) {
+ // umax(a, b) == 0 -> (a | b) == 0
+ if (C.isZero() && II->hasOneUse()) {
Value *Or = Builder.CreateOr(II->getArgOperand(0), II->getArgOperand(1));
return new ICmpInst(Pred, Or, Constant::getNullValue(Ty));
}
break;
}
+ case Intrinsic::ssub_sat:
+ // ssub.sat(a, b) == 0 -> a == b
+ if (C.isZero())
+ return new ICmpInst(Pred, II->getArgOperand(0), II->getArgOperand(1));
+ break;
case Intrinsic::usub_sat: {
// usub.sat(a, b) == 0 -> a <= b
if (C.isZero()) {
@@ -3303,7 +3545,9 @@ Instruction *InstCombinerImpl::foldICmpEqIntrinsicWithConstant(
}
/// Fold an icmp with LLVM intrinsics
-static Instruction *foldICmpIntrinsicWithIntrinsic(ICmpInst &Cmp) {
+static Instruction *
+foldICmpIntrinsicWithIntrinsic(ICmpInst &Cmp,
+ InstCombiner::BuilderTy &Builder) {
assert(Cmp.isEquality());
ICmpInst::Predicate Pred = Cmp.getPredicate();
@@ -3321,16 +3565,32 @@ static Instruction *foldICmpIntrinsicWithIntrinsic(ICmpInst &Cmp) {
// original values.
return new ICmpInst(Pred, IIOp0->getOperand(0), IIOp1->getOperand(0));
case Intrinsic::fshl:
- case Intrinsic::fshr:
+ case Intrinsic::fshr: {
// If both operands are rotated by same amount, just compare the
// original values.
if (IIOp0->getOperand(0) != IIOp0->getOperand(1))
break;
if (IIOp1->getOperand(0) != IIOp1->getOperand(1))
break;
- if (IIOp0->getOperand(2) != IIOp1->getOperand(2))
- break;
- return new ICmpInst(Pred, IIOp0->getOperand(0), IIOp1->getOperand(0));
+ if (IIOp0->getOperand(2) == IIOp1->getOperand(2))
+ return new ICmpInst(Pred, IIOp0->getOperand(0), IIOp1->getOperand(0));
+
+ // rotate(X, AmtX) == rotate(Y, AmtY)
+ // -> rotate(X, AmtX - AmtY) == Y
+ // Do this if either both rotates have one use or if only one has one use
+ // and AmtX/AmtY are constants.
+ unsigned OneUses = IIOp0->hasOneUse() + IIOp1->hasOneUse();
+ if (OneUses == 2 ||
+ (OneUses == 1 && match(IIOp0->getOperand(2), m_ImmConstant()) &&
+ match(IIOp1->getOperand(2), m_ImmConstant()))) {
+ Value *SubAmt =
+ Builder.CreateSub(IIOp0->getOperand(2), IIOp1->getOperand(2));
+ Value *CombinedRotate = Builder.CreateIntrinsic(
+ Op0->getType(), IIOp0->getIntrinsicID(),
+ {IIOp0->getOperand(0), IIOp0->getOperand(0), SubAmt});
+ return new ICmpInst(Pred, IIOp1->getOperand(0), CombinedRotate);
+ }
+ } break;
default:
break;
}
@@ -3421,16 +3681,119 @@ Instruction *InstCombinerImpl::foldICmpBinOpWithConstant(ICmpInst &Cmp,
return foldICmpBinOpEqualityWithConstant(Cmp, BO, C);
}
+static Instruction *
+foldICmpUSubSatOrUAddSatWithConstant(ICmpInst::Predicate Pred,
+ SaturatingInst *II, const APInt &C,
+ InstCombiner::BuilderTy &Builder) {
+ // This transform may end up producing more than one instruction for the
+ // intrinsic, so limit it to one user of the intrinsic.
+ if (!II->hasOneUse())
+ return nullptr;
+
+ // Let Y = [add/sub]_sat(X, C) pred C2
+ // SatVal = The saturating value for the operation
+ // WillWrap = Whether or not the operation will underflow / overflow
+ // => Y = (WillWrap ? SatVal : (X binop C)) pred C2
+ // => Y = WillWrap ? (SatVal pred C2) : ((X binop C) pred C2)
+ //
+ // When (SatVal pred C2) is true, then
+ // Y = WillWrap ? true : ((X binop C) pred C2)
+ // => Y = WillWrap || ((X binop C) pred C2)
+ // else
+ // Y = WillWrap ? false : ((X binop C) pred C2)
+ // => Y = !WillWrap ? ((X binop C) pred C2) : false
+ // => Y = !WillWrap && ((X binop C) pred C2)
+ Value *Op0 = II->getOperand(0);
+ Value *Op1 = II->getOperand(1);
+
+ const APInt *COp1;
+ // This transform only works when the intrinsic has an integral constant or
+ // splat vector as the second operand.
+ if (!match(Op1, m_APInt(COp1)))
+ return nullptr;
+
+ APInt SatVal;
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable(
+ "This function only works with usub_sat and uadd_sat for now!");
+ case Intrinsic::uadd_sat:
+ SatVal = APInt::getAllOnes(C.getBitWidth());
+ break;
+ case Intrinsic::usub_sat:
+ SatVal = APInt::getZero(C.getBitWidth());
+ break;
+ }
+
+ // Check (SatVal pred C2)
+ bool SatValCheck = ICmpInst::compare(SatVal, C, Pred);
+
+ // !WillWrap.
+ ConstantRange C1 = ConstantRange::makeExactNoWrapRegion(
+ II->getBinaryOp(), *COp1, II->getNoWrapKind());
+
+ // WillWrap.
+ if (SatValCheck)
+ C1 = C1.inverse();
+
+ ConstantRange C2 = ConstantRange::makeExactICmpRegion(Pred, C);
+ if (II->getBinaryOp() == Instruction::Add)
+ C2 = C2.sub(*COp1);
+ else
+ C2 = C2.add(*COp1);
+
+ Instruction::BinaryOps CombiningOp =
+ SatValCheck ? Instruction::BinaryOps::Or : Instruction::BinaryOps::And;
+
+ std::optional<ConstantRange> Combination;
+ if (CombiningOp == Instruction::BinaryOps::Or)
+ Combination = C1.exactUnionWith(C2);
+ else /* CombiningOp == Instruction::BinaryOps::And */
+ Combination = C1.exactIntersectWith(C2);
+
+ if (!Combination)
+ return nullptr;
+
+ CmpInst::Predicate EquivPred;
+ APInt EquivInt;
+ APInt EquivOffset;
+
+ Combination->getEquivalentICmp(EquivPred, EquivInt, EquivOffset);
+
+ return new ICmpInst(
+ EquivPred,
+ Builder.CreateAdd(Op0, ConstantInt::get(Op1->getType(), EquivOffset)),
+ ConstantInt::get(Op1->getType(), EquivInt));
+}
+
/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
IntrinsicInst *II,
const APInt &C) {
+ ICmpInst::Predicate Pred = Cmp.getPredicate();
+
+ // Handle folds that apply for any kind of icmp.
+ switch (II->getIntrinsicID()) {
+ default:
+ break;
+ case Intrinsic::uadd_sat:
+ case Intrinsic::usub_sat:
+ if (auto *Folded = foldICmpUSubSatOrUAddSatWithConstant(
+ Pred, cast<SaturatingInst>(II), C, Builder))
+ return Folded;
+ break;
+ case Intrinsic::ctpop: {
+ const SimplifyQuery Q = SQ.getWithInstruction(&Cmp);
+ if (Instruction *R = foldCtpopPow2Test(Cmp, II, C, Builder, Q))
+ return R;
+ } break;
+ }
+
if (Cmp.isEquality())
return foldICmpEqIntrinsicWithConstant(Cmp, II, C);
Type *Ty = II->getType();
unsigned BitWidth = C.getBitWidth();
- ICmpInst::Predicate Pred = Cmp.getPredicate();
switch (II->getIntrinsicID()) {
case Intrinsic::ctpop: {
// (ctpop X > BitWidth - 1) --> X == -1
@@ -3484,6 +3847,21 @@ Instruction *InstCombinerImpl::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
}
break;
}
+ case Intrinsic::ssub_sat:
+ // ssub.sat(a, b) spred 0 -> a spred b
+ if (ICmpInst::isSigned(Pred)) {
+ if (C.isZero())
+ return new ICmpInst(Pred, II->getArgOperand(0), II->getArgOperand(1));
+ // X s<= 0 is cannonicalized to X s< 1
+ if (Pred == ICmpInst::ICMP_SLT && C.isOne())
+ return new ICmpInst(ICmpInst::ICMP_SLE, II->getArgOperand(0),
+ II->getArgOperand(1));
+ // X s>= 0 is cannonicalized to X s> -1
+ if (Pred == ICmpInst::ICMP_SGT && C.isAllOnes())
+ return new ICmpInst(ICmpInst::ICMP_SGE, II->getArgOperand(0),
+ II->getArgOperand(1));
+ }
+ break;
default:
break;
}
@@ -4014,20 +4392,60 @@ Value *InstCombinerImpl::foldMultiplicationOverflowCheck(ICmpInst &I) {
return Res;
}
-static Instruction *foldICmpXNegX(ICmpInst &I) {
+static Instruction *foldICmpXNegX(ICmpInst &I,
+ InstCombiner::BuilderTy &Builder) {
CmpInst::Predicate Pred;
Value *X;
- if (!match(&I, m_c_ICmp(Pred, m_NSWNeg(m_Value(X)), m_Deferred(X))))
- return nullptr;
+ if (match(&I, m_c_ICmp(Pred, m_NSWNeg(m_Value(X)), m_Deferred(X)))) {
+
+ if (ICmpInst::isSigned(Pred))
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ else if (ICmpInst::isUnsigned(Pred))
+ Pred = ICmpInst::getSignedPredicate(Pred);
+ // else for equality-comparisons just keep the predicate.
+
+ return ICmpInst::Create(Instruction::ICmp, Pred, X,
+ Constant::getNullValue(X->getType()), I.getName());
+ }
+
+ // A value is not equal to its negation unless that value is 0 or
+ // MinSignedValue, ie: a != -a --> (a & MaxSignedVal) != 0
+ if (match(&I, m_c_ICmp(Pred, m_OneUse(m_Neg(m_Value(X))), m_Deferred(X))) &&
+ ICmpInst::isEquality(Pred)) {
+ Type *Ty = X->getType();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ Constant *MaxSignedVal =
+ ConstantInt::get(Ty, APInt::getSignedMaxValue(BitWidth));
+ Value *And = Builder.CreateAnd(X, MaxSignedVal);
+ Constant *Zero = Constant::getNullValue(Ty);
+ return CmpInst::Create(Instruction::ICmp, Pred, And, Zero);
+ }
+
+ return nullptr;
+}
- if (ICmpInst::isSigned(Pred))
+static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q,
+ InstCombinerImpl &IC) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *A;
+ // Normalize xor operand as operand 0.
+ CmpInst::Predicate Pred = I.getPredicate();
+ if (match(Op1, m_c_Xor(m_Specific(Op0), m_Value()))) {
+ std::swap(Op0, Op1);
Pred = ICmpInst::getSwappedPredicate(Pred);
- else if (ICmpInst::isUnsigned(Pred))
- Pred = ICmpInst::getSignedPredicate(Pred);
- // else for equality-comparisons just keep the predicate.
+ }
+ if (!match(Op0, m_c_Xor(m_Specific(Op1), m_Value(A))))
+ return nullptr;
- return ICmpInst::Create(Instruction::ICmp, Pred, X,
- Constant::getNullValue(X->getType()), I.getName());
+ // icmp (X ^ Y_NonZero) u>= X --> icmp (X ^ Y_NonZero) u> X
+ // icmp (X ^ Y_NonZero) u<= X --> icmp (X ^ Y_NonZero) u< X
+ // icmp (X ^ Y_NonZero) s>= X --> icmp (X ^ Y_NonZero) s> X
+ // icmp (X ^ Y_NonZero) s<= X --> icmp (X ^ Y_NonZero) s< X
+ CmpInst::Predicate PredOut = CmpInst::getStrictPredicate(Pred);
+ if (PredOut != Pred &&
+ isKnownNonZero(A, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT))
+ return new ICmpInst(PredOut, Op0, Op1);
+
+ return nullptr;
}
/// Try to fold icmp (binop), X or icmp X, (binop).
@@ -4045,7 +4463,7 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
if (!BO0 && !BO1)
return nullptr;
- if (Instruction *NewICmp = foldICmpXNegX(I))
+ if (Instruction *NewICmp = foldICmpXNegX(I, Builder))
return NewICmp;
const CmpInst::Predicate Pred = I.getPredicate();
@@ -4326,17 +4744,41 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
ConstantExpr::getNeg(RHSC));
}
+ if (Instruction * R = foldICmpXorXX(I, Q, *this))
+ return R;
+
{
- // Try to remove shared constant multiplier from equality comparison:
- // X * C == Y * C (with no overflowing/aliasing) --> X == Y
- Value *X, *Y;
- const APInt *C;
- if (match(Op0, m_Mul(m_Value(X), m_APInt(C))) && *C != 0 &&
- match(Op1, m_Mul(m_Value(Y), m_SpecificInt(*C))) && I.isEquality())
- if (!C->countTrailingZeros() ||
- (BO0 && BO1 && BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap()) ||
- (BO0 && BO1 && BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap()))
- return new ICmpInst(Pred, X, Y);
+ // Try to remove shared multiplier from comparison:
+ // X * Z u{lt/le/gt/ge}/eq/ne Y * Z
+ Value *X, *Y, *Z;
+ if (Pred == ICmpInst::getUnsignedPredicate(Pred) &&
+ ((match(Op0, m_Mul(m_Value(X), m_Value(Z))) &&
+ match(Op1, m_c_Mul(m_Specific(Z), m_Value(Y)))) ||
+ (match(Op0, m_Mul(m_Value(Z), m_Value(X))) &&
+ match(Op1, m_c_Mul(m_Specific(Z), m_Value(Y)))))) {
+ bool NonZero;
+ if (ICmpInst::isEquality(Pred)) {
+ KnownBits ZKnown = computeKnownBits(Z, 0, &I);
+ // if Z % 2 != 0
+ // X * Z eq/ne Y * Z -> X eq/ne Y
+ if (ZKnown.countMaxTrailingZeros() == 0)
+ return new ICmpInst(Pred, X, Y);
+ NonZero = !ZKnown.One.isZero() ||
+ isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
+ // if Z != 0 and nsw(X * Z) and nsw(Y * Z)
+ // X * Z eq/ne Y * Z -> X eq/ne Y
+ if (NonZero && BO0 && BO1 && BO0->hasNoSignedWrap() &&
+ BO1->hasNoSignedWrap())
+ return new ICmpInst(Pred, X, Y);
+ } else
+ NonZero = isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
+
+ // If Z != 0 and nuw(X * Z) and nuw(Y * Z)
+ // X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y
+ if (NonZero && BO0 && BO1 && BO0->hasNoUnsignedWrap() &&
+ BO1->hasNoUnsignedWrap())
+ return new ICmpInst(Pred, X, Y);
+ }
}
BinaryOperator *SRem = nullptr;
@@ -4405,7 +4847,7 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
!C->isOne()) {
// icmp eq/ne (X * C), (Y * C) --> icmp (X & Mask), (Y & Mask)
// Mask = -1 >> count-trailing-zeros(C).
- if (unsigned TZs = C->countTrailingZeros()) {
+ if (unsigned TZs = C->countr_zero()) {
Constant *Mask = ConstantInt::get(
BO0->getType(),
APInt::getLowBitsSet(C->getBitWidth(), C->getBitWidth() - TZs));
@@ -4569,6 +5011,59 @@ static Instruction *foldICmpWithMinMax(ICmpInst &Cmp) {
return nullptr;
}
+// Canonicalize checking for a power-of-2-or-zero value:
+static Instruction *foldICmpPow2Test(ICmpInst &I,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ const CmpInst::Predicate Pred = I.getPredicate();
+ Value *A = nullptr;
+ bool CheckIs;
+ if (I.isEquality()) {
+ // (A & (A-1)) == 0 --> ctpop(A) < 2 (two commuted variants)
+ // ((A-1) & A) != 0 --> ctpop(A) > 1 (two commuted variants)
+ if (!match(Op0, m_OneUse(m_c_And(m_Add(m_Value(A), m_AllOnes()),
+ m_Deferred(A)))) ||
+ !match(Op1, m_ZeroInt()))
+ A = nullptr;
+
+ // (A & -A) == A --> ctpop(A) < 2 (four commuted variants)
+ // (-A & A) != A --> ctpop(A) > 1 (four commuted variants)
+ if (match(Op0, m_OneUse(m_c_And(m_Neg(m_Specific(Op1)), m_Specific(Op1)))))
+ A = Op1;
+ else if (match(Op1,
+ m_OneUse(m_c_And(m_Neg(m_Specific(Op0)), m_Specific(Op0)))))
+ A = Op0;
+
+ CheckIs = Pred == ICmpInst::ICMP_EQ;
+ } else if (ICmpInst::isUnsigned(Pred)) {
+ // (A ^ (A-1)) u>= A --> ctpop(A) < 2 (two commuted variants)
+ // ((A-1) ^ A) u< A --> ctpop(A) > 1 (two commuted variants)
+
+ if ((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_ULT) &&
+ match(Op0, m_OneUse(m_c_Xor(m_Add(m_Specific(Op1), m_AllOnes()),
+ m_Specific(Op1))))) {
+ A = Op1;
+ CheckIs = Pred == ICmpInst::ICMP_UGE;
+ } else if ((Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE) &&
+ match(Op1, m_OneUse(m_c_Xor(m_Add(m_Specific(Op0), m_AllOnes()),
+ m_Specific(Op0))))) {
+ A = Op0;
+ CheckIs = Pred == ICmpInst::ICMP_ULE;
+ }
+ }
+
+ if (A) {
+ Type *Ty = A->getType();
+ CallInst *CtPop = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, A);
+ return CheckIs ? new ICmpInst(ICmpInst::ICMP_ULT, CtPop,
+ ConstantInt::get(Ty, 2))
+ : new ICmpInst(ICmpInst::ICMP_UGT, CtPop,
+ ConstantInt::get(Ty, 1));
+ }
+
+ return nullptr;
+}
+
Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
if (!I.isEquality())
return nullptr;
@@ -4604,6 +5099,21 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
}
}
+ // canoncalize:
+ // (icmp eq/ne (and X, C), X)
+ // -> (icmp eq/ne (and X, ~C), 0)
+ {
+ Constant *CMask;
+ A = nullptr;
+ if (match(Op0, m_OneUse(m_And(m_Specific(Op1), m_ImmConstant(CMask)))))
+ A = Op1;
+ else if (match(Op1, m_OneUse(m_And(m_Specific(Op0), m_ImmConstant(CMask)))))
+ A = Op0;
+ if (A)
+ return new ICmpInst(Pred, Builder.CreateAnd(A, Builder.CreateNot(CMask)),
+ Constant::getNullValue(A->getType()));
+ }
+
if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) {
// A == (A^B) -> B == 0
Value *OtherVal = A == Op0 ? B : A;
@@ -4659,22 +5169,36 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
// (B & (Pow2C-1)) != zext A --> A != trunc B
const APInt *MaskC;
if (match(Op0, m_And(m_Value(B), m_LowBitMask(MaskC))) &&
- MaskC->countTrailingOnes() == A->getType()->getScalarSizeInBits())
+ MaskC->countr_one() == A->getType()->getScalarSizeInBits())
return new ICmpInst(Pred, A, Builder.CreateTrunc(B, A->getType()));
+ }
- // Test if 2 values have different or same signbits:
- // (X u>> BitWidth - 1) == zext (Y s> -1) --> (X ^ Y) < 0
- // (X u>> BitWidth - 1) != zext (Y s> -1) --> (X ^ Y) > -1
+ // Test if 2 values have different or same signbits:
+ // (X u>> BitWidth - 1) == zext (Y s> -1) --> (X ^ Y) < 0
+ // (X u>> BitWidth - 1) != zext (Y s> -1) --> (X ^ Y) > -1
+ // (X s>> BitWidth - 1) == sext (Y s> -1) --> (X ^ Y) < 0
+ // (X s>> BitWidth - 1) != sext (Y s> -1) --> (X ^ Y) > -1
+ Instruction *ExtI;
+ if (match(Op1, m_CombineAnd(m_Instruction(ExtI), m_ZExtOrSExt(m_Value(A)))) &&
+ (Op0->hasOneUse() || Op1->hasOneUse())) {
unsigned OpWidth = Op0->getType()->getScalarSizeInBits();
+ Instruction *ShiftI;
Value *X, *Y;
ICmpInst::Predicate Pred2;
- if (match(Op0, m_LShr(m_Value(X), m_SpecificIntAllowUndef(OpWidth - 1))) &&
+ if (match(Op0, m_CombineAnd(m_Instruction(ShiftI),
+ m_Shr(m_Value(X),
+ m_SpecificIntAllowUndef(OpWidth - 1)))) &&
match(A, m_ICmp(Pred2, m_Value(Y), m_AllOnes())) &&
Pred2 == ICmpInst::ICMP_SGT && X->getType() == Y->getType()) {
- Value *Xor = Builder.CreateXor(X, Y, "xor.signbits");
- Value *R = (Pred == ICmpInst::ICMP_EQ) ? Builder.CreateIsNeg(Xor) :
- Builder.CreateIsNotNeg(Xor);
- return replaceInstUsesWith(I, R);
+ unsigned ExtOpc = ExtI->getOpcode();
+ unsigned ShiftOpc = ShiftI->getOpcode();
+ if ((ExtOpc == Instruction::ZExt && ShiftOpc == Instruction::LShr) ||
+ (ExtOpc == Instruction::SExt && ShiftOpc == Instruction::AShr)) {
+ Value *Xor = Builder.CreateXor(X, Y, "xor.signbits");
+ Value *R = (Pred == ICmpInst::ICMP_EQ) ? Builder.CreateIsNeg(Xor)
+ : Builder.CreateIsNotNeg(Xor);
+ return replaceInstUsesWith(I, R);
+ }
}
}
@@ -4737,33 +5261,9 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
}
}
- if (Instruction *ICmp = foldICmpIntrinsicWithIntrinsic(I))
+ if (Instruction *ICmp = foldICmpIntrinsicWithIntrinsic(I, Builder))
return ICmp;
- // Canonicalize checking for a power-of-2-or-zero value:
- // (A & (A-1)) == 0 --> ctpop(A) < 2 (two commuted variants)
- // ((A-1) & A) != 0 --> ctpop(A) > 1 (two commuted variants)
- if (!match(Op0, m_OneUse(m_c_And(m_Add(m_Value(A), m_AllOnes()),
- m_Deferred(A)))) ||
- !match(Op1, m_ZeroInt()))
- A = nullptr;
-
- // (A & -A) == A --> ctpop(A) < 2 (four commuted variants)
- // (-A & A) != A --> ctpop(A) > 1 (four commuted variants)
- if (match(Op0, m_OneUse(m_c_And(m_Neg(m_Specific(Op1)), m_Specific(Op1)))))
- A = Op1;
- else if (match(Op1,
- m_OneUse(m_c_And(m_Neg(m_Specific(Op0)), m_Specific(Op0)))))
- A = Op0;
-
- if (A) {
- Type *Ty = A->getType();
- CallInst *CtPop = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, A);
- return Pred == ICmpInst::ICMP_EQ
- ? new ICmpInst(ICmpInst::ICMP_ULT, CtPop, ConstantInt::get(Ty, 2))
- : new ICmpInst(ICmpInst::ICMP_UGT, CtPop, ConstantInt::get(Ty, 1));
- }
-
// Match icmp eq (trunc (lshr A, BW), (ashr (trunc A), BW-1)), which checks the
// top BW/2 + 1 bits are all the same. Create "A >=s INT_MIN && A <=s INT_MAX",
// which we generate as "icmp ult (add A, 2^(BW-1)), 2^BW" to skip a few steps
@@ -4794,11 +5294,23 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
return new ICmpInst(CmpInst::getInversePredicate(Pred), Op1,
ConstantInt::getNullValue(Op1->getType()));
+ // Canonicalize:
+ // icmp eq/ne X, OneUse(rotate-right(X))
+ // -> icmp eq/ne X, rotate-left(X)
+ // We generally try to convert rotate-right -> rotate-left, this just
+ // canonicalizes another case.
+ CmpInst::Predicate PredUnused = Pred;
+ if (match(&I, m_c_ICmp(PredUnused, m_Value(A),
+ m_OneUse(m_Intrinsic<Intrinsic::fshr>(
+ m_Deferred(A), m_Deferred(A), m_Value(B))))))
+ return new ICmpInst(
+ Pred, A,
+ Builder.CreateIntrinsic(Op0->getType(), Intrinsic::fshl, {A, A, B}));
+
return nullptr;
}
-static Instruction *foldICmpWithTrunc(ICmpInst &ICmp,
- InstCombiner::BuilderTy &Builder) {
+Instruction *InstCombinerImpl::foldICmpWithTrunc(ICmpInst &ICmp) {
ICmpInst::Predicate Pred = ICmp.getPredicate();
Value *Op0 = ICmp.getOperand(0), *Op1 = ICmp.getOperand(1);
@@ -4836,6 +5348,25 @@ static Instruction *foldICmpWithTrunc(ICmpInst &ICmp,
return new ICmpInst(ICmpInst::ICMP_EQ, And, MaskC);
}
+ if (auto *II = dyn_cast<IntrinsicInst>(X)) {
+ if (II->getIntrinsicID() == Intrinsic::cttz ||
+ II->getIntrinsicID() == Intrinsic::ctlz) {
+ unsigned MaxRet = SrcBits;
+ // If the "is_zero_poison" argument is set, then we know at least
+ // one bit is set in the input, so the result is always at least one
+ // less than the full bitwidth of that input.
+ if (match(II->getArgOperand(1), m_One()))
+ MaxRet--;
+
+ // Make sure the destination is wide enough to hold the largest output of
+ // the intrinsic.
+ if (llvm::Log2_32(MaxRet) + 1 <= Op0->getType()->getScalarSizeInBits())
+ if (Instruction *I =
+ foldICmpIntrinsicWithConstant(ICmp, II, C->zext(SrcBits)))
+ return I;
+ }
+ }
+
return nullptr;
}
@@ -4855,10 +5386,19 @@ Instruction *InstCombinerImpl::foldICmpWithZextOrSext(ICmpInst &ICmp) {
bool IsZext0 = isa<ZExtOperator>(ICmp.getOperand(0));
bool IsZext1 = isa<ZExtOperator>(ICmp.getOperand(1));
- // If we have mismatched casts, treat the zext of a non-negative source as
- // a sext to simulate matching casts. Otherwise, we are done.
- // TODO: Can we handle some predicates (equality) without non-negative?
if (IsZext0 != IsZext1) {
+ // If X and Y and both i1
+ // (icmp eq/ne (zext X) (sext Y))
+ // eq -> (icmp eq (or X, Y), 0)
+ // ne -> (icmp ne (or X, Y), 0)
+ if (ICmp.isEquality() && X->getType()->isIntOrIntVectorTy(1) &&
+ Y->getType()->isIntOrIntVectorTy(1))
+ return new ICmpInst(ICmp.getPredicate(), Builder.CreateOr(X, Y),
+ Constant::getNullValue(X->getType()));
+
+ // If we have mismatched casts, treat the zext of a non-negative source as
+ // a sext to simulate matching casts. Otherwise, we are done.
+ // TODO: Can we handle some predicates (equality) without non-negative?
if ((IsZext0 && isKnownNonNegative(X, DL, 0, &AC, &ICmp, &DT)) ||
(IsZext1 && isKnownNonNegative(Y, DL, 0, &AC, &ICmp, &DT)))
IsSignedExt = true;
@@ -4993,7 +5533,7 @@ Instruction *InstCombinerImpl::foldICmpWithCastOp(ICmpInst &ICmp) {
return new ICmpInst(ICmp.getPredicate(), Op0Src, NewOp1);
}
- if (Instruction *R = foldICmpWithTrunc(ICmp, Builder))
+ if (Instruction *R = foldICmpWithTrunc(ICmp))
return R;
return foldICmpWithZextOrSext(ICmp);
@@ -5153,7 +5693,7 @@ static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
return nullptr;
if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) {
const APInt &CVal = CI->getValue();
- if (CVal.getBitWidth() - CVal.countLeadingZeros() > MulWidth)
+ if (CVal.getBitWidth() - CVal.countl_zero() > MulWidth)
return nullptr;
} else {
// In this case we could have the operand of the binary operation
@@ -5334,44 +5874,18 @@ static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth) {
// bits doesn't impact the outcome of the comparison, because any value
// greater than the RHS must differ in a bit higher than these due to carry.
case ICmpInst::ICMP_UGT:
- return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingOnes());
+ return APInt::getBitsSetFrom(BitWidth, RHS->countr_one());
// Similarly, for a ULT comparison, we don't care about the trailing zeros.
// Any value less than the RHS must differ in a higher bit because of carries.
case ICmpInst::ICMP_ULT:
- return APInt::getBitsSetFrom(BitWidth, RHS->countTrailingZeros());
+ return APInt::getBitsSetFrom(BitWidth, RHS->countr_zero());
default:
return APInt::getAllOnes(BitWidth);
}
}
-/// Check if the order of \p Op0 and \p Op1 as operands in an ICmpInst
-/// should be swapped.
-/// The decision is based on how many times these two operands are reused
-/// as subtract operands and their positions in those instructions.
-/// The rationale is that several architectures use the same instruction for
-/// both subtract and cmp. Thus, it is better if the order of those operands
-/// match.
-/// \return true if Op0 and Op1 should be swapped.
-static bool swapMayExposeCSEOpportunities(const Value *Op0, const Value *Op1) {
- // Filter out pointer values as those cannot appear directly in subtract.
- // FIXME: we may want to go through inttoptrs or bitcasts.
- if (Op0->getType()->isPointerTy())
- return false;
- // If a subtract already has the same operands as a compare, swapping would be
- // bad. If a subtract has the same operands as a compare but in reverse order,
- // then swapping is good.
- int GoodToSwap = 0;
- for (const User *U : Op0->users()) {
- if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
- GoodToSwap++;
- else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
- GoodToSwap--;
- }
- return GoodToSwap > 0;
-}
-
/// Check that one use is in the same block as the definition and all
/// other uses are in blocks dominated by a given block.
///
@@ -5638,14 +6152,14 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
const APInt *C1;
if (match(LHS, m_Shl(m_Power2(C1), m_Value(X)))) {
Type *XTy = X->getType();
- unsigned Log2C1 = C1->countTrailingZeros();
+ unsigned Log2C1 = C1->countr_zero();
APInt C2 = Op0KnownZeroInverted;
APInt C2Pow2 = (C2 & ~(*C1 - 1)) + *C1;
if (C2Pow2.isPowerOf2()) {
// iff (C1 is pow2) & ((C2 & ~(C1-1)) + C1) is pow2):
// ((C1 << X) & C2) == 0 -> X >= (Log2(C2+C1) - Log2(C1))
// ((C1 << X) & C2) != 0 -> X < (Log2(C2+C1) - Log2(C1))
- unsigned Log2C2 = C2Pow2.countTrailingZeros();
+ unsigned Log2C2 = C2Pow2.countr_zero();
auto *CmpC = ConstantInt::get(XTy, Log2C2 - Log2C1);
auto NewPred =
Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGE : CmpInst::ICMP_ULT;
@@ -5653,6 +6167,12 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
}
}
}
+
+ // Op0 eq C_Pow2 -> Op0 ne 0 if Op0 is known to be C_Pow2 or zero.
+ if (Op1Known.isConstant() && Op1Known.getConstant().isPowerOf2() &&
+ (Op0Known & Op1Known) == Op0Known)
+ return new ICmpInst(CmpInst::getInversePredicate(Pred), Op0,
+ ConstantInt::getNullValue(Op1->getType()));
break;
}
case ICmpInst::ICMP_ULT: {
@@ -5733,8 +6253,7 @@ Instruction *InstCombinerImpl::foldICmpUsingKnownBits(ICmpInst &I) {
/// If one operand of an icmp is effectively a bool (value range of {0,1}),
/// then try to reduce patterns based on that limit.
-static Instruction *foldICmpUsingBoolRange(ICmpInst &I,
- InstCombiner::BuilderTy &Builder) {
+Instruction *InstCombinerImpl::foldICmpUsingBoolRange(ICmpInst &I) {
Value *X, *Y;
ICmpInst::Predicate Pred;
@@ -5750,6 +6269,60 @@ static Instruction *foldICmpUsingBoolRange(ICmpInst &I,
Y->getType()->isIntOrIntVectorTy(1) && Pred == ICmpInst::ICMP_ULE)
return BinaryOperator::CreateOr(Builder.CreateIsNull(X), Y);
+ const APInt *C;
+ if (match(I.getOperand(0), m_c_Add(m_ZExt(m_Value(X)), m_SExt(m_Value(Y)))) &&
+ match(I.getOperand(1), m_APInt(C)) &&
+ X->getType()->isIntOrIntVectorTy(1) &&
+ Y->getType()->isIntOrIntVectorTy(1)) {
+ unsigned BitWidth = C->getBitWidth();
+ Pred = I.getPredicate();
+ APInt Zero = APInt::getZero(BitWidth);
+ APInt MinusOne = APInt::getAllOnes(BitWidth);
+ APInt One(BitWidth, 1);
+ if ((C->sgt(Zero) && Pred == ICmpInst::ICMP_SGT) ||
+ (C->slt(Zero) && Pred == ICmpInst::ICMP_SLT))
+ return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+ if ((C->sgt(One) && Pred == ICmpInst::ICMP_SLT) ||
+ (C->slt(MinusOne) && Pred == ICmpInst::ICMP_SGT))
+ return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+
+ if (I.getOperand(0)->hasOneUse()) {
+ APInt NewC = *C;
+ // canonicalize predicate to eq/ne
+ if ((*C == Zero && Pred == ICmpInst::ICMP_SLT) ||
+ (*C != Zero && *C != MinusOne && Pred == ICmpInst::ICMP_UGT)) {
+ // x s< 0 in [-1, 1] --> x == -1
+ // x u> 1(or any const !=0 !=-1) in [-1, 1] --> x == -1
+ NewC = MinusOne;
+ Pred = ICmpInst::ICMP_EQ;
+ } else if ((*C == MinusOne && Pred == ICmpInst::ICMP_SGT) ||
+ (*C != Zero && *C != One && Pred == ICmpInst::ICMP_ULT)) {
+ // x s> -1 in [-1, 1] --> x != -1
+ // x u< -1 in [-1, 1] --> x != -1
+ Pred = ICmpInst::ICMP_NE;
+ } else if (*C == Zero && Pred == ICmpInst::ICMP_SGT) {
+ // x s> 0 in [-1, 1] --> x == 1
+ NewC = One;
+ Pred = ICmpInst::ICMP_EQ;
+ } else if (*C == One && Pred == ICmpInst::ICMP_SLT) {
+ // x s< 1 in [-1, 1] --> x != 1
+ Pred = ICmpInst::ICMP_NE;
+ }
+
+ if (NewC == MinusOne) {
+ if (Pred == ICmpInst::ICMP_EQ)
+ return BinaryOperator::CreateAnd(Builder.CreateNot(X), Y);
+ if (Pred == ICmpInst::ICMP_NE)
+ return BinaryOperator::CreateOr(X, Builder.CreateNot(Y));
+ } else if (NewC == One) {
+ if (Pred == ICmpInst::ICMP_EQ)
+ return BinaryOperator::CreateAnd(X, Builder.CreateNot(Y));
+ if (Pred == ICmpInst::ICMP_NE)
+ return BinaryOperator::CreateOr(Builder.CreateNot(X), Y);
+ }
+ }
+ }
+
return nullptr;
}
@@ -6162,8 +6735,7 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
/// Orders the operands of the compare so that they are listed from most
/// complex to least complex. This puts constants before unary operators,
/// before binary operators.
- if (Op0Cplxity < Op1Cplxity ||
- (Op0Cplxity == Op1Cplxity && swapMayExposeCSEOpportunities(Op0, Op1))) {
+ if (Op0Cplxity < Op1Cplxity) {
I.swapOperands();
std::swap(Op0, Op1);
Changed = true;
@@ -6205,7 +6777,7 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpWithDominatingICmp(I))
return Res;
- if (Instruction *Res = foldICmpUsingBoolRange(I, Builder))
+ if (Instruction *Res = foldICmpUsingBoolRange(I))
return Res;
if (Instruction *Res = foldICmpUsingKnownBits(I))
@@ -6288,15 +6860,46 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *NI = foldSelectICmp(I.getSwappedPredicate(), SI, Op0, I))
return NI;
+ // In case of a comparison with two select instructions having the same
+ // condition, check whether one of the resulting branches can be simplified.
+ // If so, just compare the other branch and select the appropriate result.
+ // For example:
+ // %tmp1 = select i1 %cmp, i32 %y, i32 %x
+ // %tmp2 = select i1 %cmp, i32 %z, i32 %x
+ // %cmp2 = icmp slt i32 %tmp2, %tmp1
+ // The icmp will result false for the false value of selects and the result
+ // will depend upon the comparison of true values of selects if %cmp is
+ // true. Thus, transform this into:
+ // %cmp = icmp slt i32 %y, %z
+ // %sel = select i1 %cond, i1 %cmp, i1 false
+ // This handles similar cases to transform.
+ {
+ Value *Cond, *A, *B, *C, *D;
+ if (match(Op0, m_Select(m_Value(Cond), m_Value(A), m_Value(B))) &&
+ match(Op1, m_Select(m_Specific(Cond), m_Value(C), m_Value(D))) &&
+ (Op0->hasOneUse() || Op1->hasOneUse())) {
+ // Check whether comparison of TrueValues can be simplified
+ if (Value *Res = simplifyICmpInst(Pred, A, C, SQ)) {
+ Value *NewICMP = Builder.CreateICmp(Pred, B, D);
+ return SelectInst::Create(Cond, Res, NewICMP);
+ }
+ // Check whether comparison of FalseValues can be simplified
+ if (Value *Res = simplifyICmpInst(Pred, B, D, SQ)) {
+ Value *NewICMP = Builder.CreateICmp(Pred, A, C);
+ return SelectInst::Create(Cond, NewICMP, Res);
+ }
+ }
+ }
+
// Try to optimize equality comparisons against alloca-based pointers.
if (Op0->getType()->isPointerTy() && I.isEquality()) {
assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op0)))
- if (Instruction *New = foldAllocaCmp(I, Alloca))
- return New;
+ if (foldAllocaCmp(Alloca))
+ return nullptr;
if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(Op1)))
- if (Instruction *New = foldAllocaCmp(I, Alloca))
- return New;
+ if (foldAllocaCmp(Alloca))
+ return nullptr;
}
if (Instruction *Res = foldICmpBitCast(I))
@@ -6363,6 +6966,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpEquality(I))
return Res;
+ if (Instruction *Res = foldICmpPow2Test(I, Builder))
+ return Res;
+
if (Instruction *Res = foldICmpOfUAddOv(I))
return Res;
@@ -6717,7 +7323,7 @@ static Instruction *foldFabsWithFcmpZero(FCmpInst &I, InstCombinerImpl &IC) {
Mode.Input == DenormalMode::PositiveZero) {
auto replaceFCmp = [](FCmpInst *I, FCmpInst::Predicate P, Value *X) {
- Constant *Zero = ConstantFP::getNullValue(X->getType());
+ Constant *Zero = ConstantFP::getZero(X->getType());
return new FCmpInst(P, X, Zero, "", I);
};
@@ -6813,7 +7419,7 @@ static Instruction *foldFCmpFNegCommonOp(FCmpInst &I) {
// Replace the negated operand with 0.0:
// fcmp Pred Op0, -Op0 --> fcmp Pred Op0, 0.0
- Constant *Zero = ConstantFP::getNullValue(Op0->getType());
+ Constant *Zero = ConstantFP::getZero(Op0->getType());
return new FCmpInst(Pred, Op0, Zero, "", &I);
}
@@ -6863,11 +7469,13 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
// If we're just checking for a NaN (ORD/UNO) and have a non-NaN operand,
// then canonicalize the operand to 0.0.
if (Pred == CmpInst::FCMP_ORD || Pred == CmpInst::FCMP_UNO) {
- if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, &TLI))
- return replaceOperand(I, 0, ConstantFP::getNullValue(OpType));
+ if (!match(Op0, m_PosZeroFP()) && isKnownNeverNaN(Op0, DL, &TLI, 0,
+ &AC, &I, &DT))
+ return replaceOperand(I, 0, ConstantFP::getZero(OpType));
- if (!match(Op1, m_PosZeroFP()) && isKnownNeverNaN(Op1, &TLI))
- return replaceOperand(I, 1, ConstantFP::getNullValue(OpType));
+ if (!match(Op1, m_PosZeroFP()) &&
+ isKnownNeverNaN(Op1, DL, &TLI, 0, &AC, &I, &DT))
+ return replaceOperand(I, 1, ConstantFP::getZero(OpType));
}
// fcmp pred (fneg X), (fneg Y) -> fcmp swap(pred) X, Y
@@ -6896,7 +7504,7 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
// The sign of 0.0 is ignored by fcmp, so canonicalize to +0.0:
// fcmp Pred X, -0.0 --> fcmp Pred X, 0.0
if (match(Op1, m_AnyZeroFP()) && !match(Op1, m_PosZeroFP()))
- return replaceOperand(I, 1, ConstantFP::getNullValue(OpType));
+ return replaceOperand(I, 1, ConstantFP::getZero(OpType));
// Ignore signbit of bitcasted int when comparing equality to FP 0.0:
// fcmp oeq/une (bitcast X), 0.0 --> (and X, SignMaskC) ==/!= 0
@@ -6985,11 +7593,11 @@ Instruction *InstCombinerImpl::visitFCmpInst(FCmpInst &I) {
case FCmpInst::FCMP_ONE:
// X is ordered and not equal to an impossible constant --> ordered
return new FCmpInst(FCmpInst::FCMP_ORD, X,
- ConstantFP::getNullValue(X->getType()));
+ ConstantFP::getZero(X->getType()));
case FCmpInst::FCMP_UEQ:
// X is unordered or equal to an impossible constant --> unordered
return new FCmpInst(FCmpInst::FCMP_UNO, X,
- ConstantFP::getNullValue(X->getType()));
+ ConstantFP::getZero(X->getType()));
case FCmpInst::FCMP_UNE:
// X is unordered or not equal to an impossible constant --> true
return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index f4e88b122383..701579e1de48 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -150,7 +150,6 @@ public:
Instruction *visitPHINode(PHINode &PN);
Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
Instruction *visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src);
- Instruction *visitGEPOfBitcast(BitCastInst *BCI, GetElementPtrInst &GEP);
Instruction *visitAllocaInst(AllocaInst &AI);
Instruction *visitAllocSite(Instruction &FI);
Instruction *visitFree(CallInst &FI, Value *FreedOp);
@@ -330,8 +329,7 @@ private:
Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN);
Instruction *matchSAddSubSat(IntrinsicInst &MinMax1);
Instruction *foldNot(BinaryOperator &I);
-
- void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser = nullptr);
+ Instruction *foldBinOpOfDisplacedShifts(BinaryOperator &I);
/// Determine if a pair of casts can be replaced by a single cast.
///
@@ -378,6 +376,7 @@ private:
Instruction *foldLShrOverflowBit(BinaryOperator &I);
Instruction *foldExtractOfOverflowIntrinsic(ExtractValueInst &EV);
Instruction *foldIntrinsicWithOverflowCommon(IntrinsicInst *II);
+ Instruction *foldIntrinsicIsFPClass(IntrinsicInst &II);
Instruction *foldFPSignBitOps(BinaryOperator &I);
Instruction *foldFDivConstantDivisor(BinaryOperator &I);
@@ -393,12 +392,12 @@ public:
/// without having to rewrite the CFG from within InstCombine.
void CreateNonTerminatorUnreachable(Instruction *InsertAt) {
auto &Ctx = InsertAt->getContext();
- new StoreInst(ConstantInt::getTrue(Ctx),
- PoisonValue::get(Type::getInt1PtrTy(Ctx)),
- InsertAt);
+ auto *SI = new StoreInst(ConstantInt::getTrue(Ctx),
+ PoisonValue::get(Type::getInt1PtrTy(Ctx)),
+ /*isVolatile*/ false, Align(1));
+ InsertNewInstBefore(SI, *InsertAt);
}
-
/// Combiner aware instruction erasure.
///
/// When dealing with an instruction that has side effects or produces a void
@@ -411,12 +410,11 @@ public:
// Make sure that we reprocess all operands now that we reduced their
// use counts.
- for (Use &Operand : I.operands())
- if (auto *Inst = dyn_cast<Instruction>(Operand))
- Worklist.add(Inst);
-
+ SmallVector<Value *> Ops(I.operands());
Worklist.remove(&I);
I.eraseFromParent();
+ for (Value *Op : Ops)
+ Worklist.handleUseCountDecrement(Op);
MadeIRChange = true;
return nullptr; // Don't do anything with FI
}
@@ -450,6 +448,18 @@ public:
Value *SimplifySelectsFeedingBinaryOp(BinaryOperator &I, Value *LHS,
Value *RHS);
+ // (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
+ // -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
+ // (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
+ // -> (BinOp (logic_shift (BinOp X, Y)), Mask)
+ Instruction *foldBinOpShiftWithShift(BinaryOperator &I);
+
+ /// Tries to simplify binops of select and cast of the select condition.
+ ///
+ /// (Binop (cast C), (select C, T, F))
+ /// -> (select C, C0, C1)
+ Instruction *foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I);
+
/// This tries to simplify binary operations by factorizing out common terms
/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)").
Value *tryFactorizationFolds(BinaryOperator &I);
@@ -549,7 +559,7 @@ public:
ICmpInst::Predicate Cond, Instruction &I);
Instruction *foldSelectICmp(ICmpInst::Predicate Pred, SelectInst *SI,
Value *RHS, const ICmpInst &I);
- Instruction *foldAllocaCmp(ICmpInst &ICI, const AllocaInst *Alloca);
+ bool foldAllocaCmp(AllocaInst *Alloca);
Instruction *foldCmpLoadFromIndexedGlobal(LoadInst *LI,
GetElementPtrInst *GEP,
GlobalVariable *GV, CmpInst &ICI,
@@ -564,6 +574,7 @@ public:
Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp);
Instruction *foldICmpWithDominatingICmp(ICmpInst &Cmp);
Instruction *foldICmpWithConstant(ICmpInst &Cmp);
+ Instruction *foldICmpUsingBoolRange(ICmpInst &I);
Instruction *foldICmpInstWithConstant(ICmpInst &Cmp);
Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp);
Instruction *foldICmpInstWithConstantAllowUndef(ICmpInst &Cmp,
@@ -623,6 +634,7 @@ public:
Instruction *foldICmpEqIntrinsicWithConstant(ICmpInst &ICI, IntrinsicInst *II,
const APInt &C);
Instruction *foldICmpBitCast(ICmpInst &Cmp);
+ Instruction *foldICmpWithTrunc(ICmpInst &Cmp);
// Helpers of visitSelectInst().
Instruction *foldSelectOfBools(SelectInst &SI);
@@ -634,10 +646,11 @@ public:
SelectPatternFlavor SPF2, Value *C);
Instruction *foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
Instruction *foldSelectValueEquivalence(SelectInst &SI, ICmpInst &ICI);
+ bool replaceInInstruction(Value *V, Value *Old, Value *New,
+ unsigned Depth = 0);
Value *insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
bool isSigned, bool Inside);
- Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
bool mergeStoreIntoSuccessor(StoreInst &SI);
/// Given an initial instruction, check to see if it is the root of a
@@ -651,10 +664,12 @@ public:
Value *EvaluateInDifferentType(Value *V, Type *Ty, bool isSigned);
- /// Returns a value X such that Val = X * Scale, or null if none.
- ///
- /// If the multiplication is known not to overflow then NoSignedWrap is set.
- Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap);
+ bool tryToSinkInstruction(Instruction *I, BasicBlock *DestBlock);
+
+ bool removeInstructionsBeforeUnreachable(Instruction &I);
+ bool handleUnreachableFrom(Instruction *I);
+ bool handlePotentiallyDeadSuccessors(BasicBlock *BB, BasicBlock *LiveSucc);
+ void freelyInvertAllUsersOf(Value *V, Value *IgnoredUser = nullptr);
};
class Negator final {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 41bc65620ff6..6aa20ee26b9a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -32,7 +32,7 @@ STATISTIC(NumDeadStore, "Number of dead stores eliminated");
STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global");
static cl::opt<unsigned> MaxCopiedFromConstantUsers(
- "instcombine-max-copied-from-constant-users", cl::init(128),
+ "instcombine-max-copied-from-constant-users", cl::init(300),
cl::desc("Maximum users to visit in copy from constant transform"),
cl::Hidden);
@@ -219,7 +219,7 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
// Now that I is pointing to the first non-allocation-inst in the block,
// insert our getelementptr instruction...
//
- Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
+ Type *IdxTy = IC.getDataLayout().getIndexType(AI.getType());
Value *NullIdx = Constant::getNullValue(IdxTy);
Value *Idx[2] = {NullIdx, NullIdx};
Instruction *GEP = GetElementPtrInst::CreateInBounds(
@@ -235,11 +235,12 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
if (isa<UndefValue>(AI.getArraySize()))
return IC.replaceInstUsesWith(AI, Constant::getNullValue(AI.getType()));
- // Ensure that the alloca array size argument has type intptr_t, so that
- // any casting is exposed early.
- Type *IntPtrTy = IC.getDataLayout().getIntPtrType(AI.getType());
- if (AI.getArraySize()->getType() != IntPtrTy) {
- Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), IntPtrTy, false);
+ // Ensure that the alloca array size argument has type equal to the offset
+ // size of the alloca() pointer, which, in the tyical case, is intptr_t,
+ // so that any casting is exposed early.
+ Type *PtrIdxTy = IC.getDataLayout().getIndexType(AI.getType());
+ if (AI.getArraySize()->getType() != PtrIdxTy) {
+ Value *V = IC.Builder.CreateIntCast(AI.getArraySize(), PtrIdxTy, false);
return IC.replaceOperand(AI, 0, V);
}
@@ -259,8 +260,8 @@ namespace {
// instruction.
class PointerReplacer {
public:
- PointerReplacer(InstCombinerImpl &IC, Instruction &Root)
- : IC(IC), Root(Root) {}
+ PointerReplacer(InstCombinerImpl &IC, Instruction &Root, unsigned SrcAS)
+ : IC(IC), Root(Root), FromAS(SrcAS) {}
bool collectUsers();
void replacePointer(Value *V);
@@ -273,11 +274,21 @@ private:
return I == &Root || Worklist.contains(I);
}
+ bool isEqualOrValidAddrSpaceCast(const Instruction *I,
+ unsigned FromAS) const {
+ const auto *ASC = dyn_cast<AddrSpaceCastInst>(I);
+ if (!ASC)
+ return false;
+ unsigned ToAS = ASC->getDestAddressSpace();
+ return (FromAS == ToAS) || IC.isValidAddrSpaceCast(FromAS, ToAS);
+ }
+
SmallPtrSet<Instruction *, 32> ValuesToRevisit;
SmallSetVector<Instruction *, 4> Worklist;
MapVector<Value *, Value *> WorkMap;
InstCombinerImpl &IC;
Instruction &Root;
+ unsigned FromAS;
};
} // end anonymous namespace
@@ -341,6 +352,8 @@ bool PointerReplacer::collectUsersRecursive(Instruction &I) {
if (MI->isVolatile())
return false;
Worklist.insert(Inst);
+ } else if (isEqualOrValidAddrSpaceCast(Inst, FromAS)) {
+ Worklist.insert(Inst);
} else if (Inst->isLifetimeStartOrEnd()) {
continue;
} else {
@@ -391,9 +404,8 @@ void PointerReplacer::replace(Instruction *I) {
} else if (auto *BC = dyn_cast<BitCastInst>(I)) {
auto *V = getReplacement(BC->getOperand(0));
assert(V && "Operand not replaced");
- auto *NewT = PointerType::getWithSamePointeeType(
- cast<PointerType>(BC->getType()),
- V->getType()->getPointerAddressSpace());
+ auto *NewT = PointerType::get(BC->getType()->getContext(),
+ V->getType()->getPointerAddressSpace());
auto *NewI = new BitCastInst(V, NewT);
IC.InsertNewInstWith(NewI, *BC);
NewI->takeName(BC);
@@ -426,6 +438,22 @@ void PointerReplacer::replace(Instruction *I) {
IC.eraseInstFromFunction(*MemCpy);
WorkMap[MemCpy] = NewI;
+ } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(I)) {
+ auto *V = getReplacement(ASC->getPointerOperand());
+ assert(V && "Operand not replaced");
+ assert(isEqualOrValidAddrSpaceCast(
+ ASC, V->getType()->getPointerAddressSpace()) &&
+ "Invalid address space cast!");
+ auto *NewV = V;
+ if (V->getType()->getPointerAddressSpace() !=
+ ASC->getType()->getPointerAddressSpace()) {
+ auto *NewI = new AddrSpaceCastInst(V, ASC->getType(), "");
+ NewI->takeName(ASC);
+ IC.InsertNewInstWith(NewI, *ASC);
+ NewV = NewI;
+ }
+ IC.replaceInstUsesWith(*ASC, NewV);
+ IC.eraseInstFromFunction(*ASC);
} else {
llvm_unreachable("should never reach here");
}
@@ -435,7 +463,7 @@ void PointerReplacer::replacePointer(Value *V) {
#ifndef NDEBUG
auto *PT = cast<PointerType>(Root.getType());
auto *NT = cast<PointerType>(V->getType());
- assert(PT != NT && PT->hasSameElementTypeAs(NT) && "Invalid usage");
+ assert(PT != NT && "Invalid usage");
#endif
WorkMap[&Root] = V;
@@ -518,7 +546,7 @@ Instruction *InstCombinerImpl::visitAllocaInst(AllocaInst &AI) {
return NewI;
}
- PointerReplacer PtrReplacer(*this, AI);
+ PointerReplacer PtrReplacer(*this, AI, SrcAddrSpace);
if (PtrReplacer.collectUsers()) {
for (Instruction *Delete : ToDelete)
eraseInstFromFunction(*Delete);
@@ -739,6 +767,11 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
// the knowledge that padding exists for the rest of the pipeline.
const DataLayout &DL = IC.getDataLayout();
auto *SL = DL.getStructLayout(ST);
+
+ // Don't unpack for structure with scalable vector.
+ if (SL->getSizeInBits().isScalable())
+ return nullptr;
+
if (SL->hasPadding())
return nullptr;
@@ -979,17 +1012,15 @@ static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
// If we're indexing into an object with a variable index for the memory
// access, but the object has only one element, we can assume that the index
// will always be zero. If we replace the GEP, return it.
-template <typename T>
static Instruction *replaceGEPIdxWithZero(InstCombinerImpl &IC, Value *Ptr,
- T &MemI) {
+ Instruction &MemI) {
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr)) {
unsigned Idx;
if (canReplaceGEPIdxWithZero(IC, GEPI, &MemI, Idx)) {
Instruction *NewGEPI = GEPI->clone();
NewGEPI->setOperand(Idx,
ConstantInt::get(GEPI->getOperand(Idx)->getType(), 0));
- NewGEPI->insertBefore(GEPI);
- MemI.setOperand(MemI.getPointerOperandIndex(), NewGEPI);
+ IC.InsertNewInstBefore(NewGEPI, *GEPI);
return NewGEPI;
}
}
@@ -1024,6 +1055,8 @@ static bool canSimplifyNullLoadOrGEP(LoadInst &LI, Value *Op) {
Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
+ if (Value *Res = simplifyLoadInst(&LI, Op, SQ.getWithInstruction(&LI)))
+ return replaceInstUsesWith(LI, Res);
// Try to canonicalize the loaded type.
if (Instruction *Res = combineLoadToOperationType(*this, LI))
@@ -1036,10 +1069,8 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
LI.setAlignment(KnownAlign);
// Replace GEP indices if possible.
- if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI)) {
- Worklist.push(NewGEPI);
- return &LI;
- }
+ if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Op, LI))
+ return replaceOperand(LI, 0, NewGEPI);
if (Instruction *Res = unpackLoadToAggregate(*this, LI))
return Res;
@@ -1065,13 +1096,7 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
// load null/undef -> unreachable
// TODO: Consider a target hook for valid address spaces for this xforms.
if (canSimplifyNullLoadOrGEP(LI, Op)) {
- // Insert a new store to null instruction before the load to indicate
- // that this code is not reachable. We do this instead of inserting
- // an unreachable instruction directly because we cannot modify the
- // CFG.
- StoreInst *SI = new StoreInst(PoisonValue::get(LI.getType()),
- Constant::getNullValue(Op->getType()), &LI);
- SI->setDebugLoc(LI.getDebugLoc());
+ CreateNonTerminatorUnreachable(&LI);
return replaceInstUsesWith(LI, PoisonValue::get(LI.getType()));
}
@@ -1261,6 +1286,11 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
// the knowledge that padding exists for the rest of the pipeline.
const DataLayout &DL = IC.getDataLayout();
auto *SL = DL.getStructLayout(ST);
+
+ // Don't unpack for structure with scalable vector.
+ if (SL->getSizeInBits().isScalable())
+ return false;
+
if (SL->hasPadding())
return false;
@@ -1443,10 +1473,8 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
return eraseInstFromFunction(SI);
// Replace GEP indices if possible.
- if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI)) {
- Worklist.push(NewGEPI);
- return &SI;
- }
+ if (Instruction *NewGEPI = replaceGEPIdxWithZero(*this, Ptr, SI))
+ return replaceOperand(SI, 1, NewGEPI);
// Don't hack volatile/ordered stores.
// FIXME: Some bits are legal for ordered atomic stores; needs refactoring.
@@ -1530,6 +1558,16 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
return nullptr; // Do not modify these!
}
+ // This is a non-terminator unreachable marker. Don't remove it.
+ if (isa<UndefValue>(Ptr)) {
+ // Remove all instructions after the marker and guaranteed-to-transfer
+ // instructions before the marker.
+ if (handleUnreachableFrom(SI.getNextNode()) ||
+ removeInstructionsBeforeUnreachable(SI))
+ return &SI;
+ return nullptr;
+ }
+
// store undef, Ptr -> noop
// FIXME: This is technically incorrect because it might overwrite a poison
// value. Change to PoisonValue once #52930 is resolved.
@@ -1571,6 +1609,17 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
if (!OtherBr || BBI == OtherBB->begin())
return false;
+ auto OtherStoreIsMergeable = [&](StoreInst *OtherStore) -> bool {
+ if (!OtherStore ||
+ OtherStore->getPointerOperand() != SI.getPointerOperand())
+ return false;
+
+ auto *SIVTy = SI.getValueOperand()->getType();
+ auto *OSVTy = OtherStore->getValueOperand()->getType();
+ return CastInst::isBitOrNoopPointerCastable(OSVTy, SIVTy, DL) &&
+ SI.hasSameSpecialState(OtherStore);
+ };
+
// If the other block ends in an unconditional branch, check for the 'if then
// else' case. There is an instruction before the branch.
StoreInst *OtherStore = nullptr;
@@ -1586,8 +1635,7 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
// If this isn't a store, isn't a store to the same location, or is not the
// right kind of store, bail out.
OtherStore = dyn_cast<StoreInst>(BBI);
- if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) ||
- !SI.isSameOperationAs(OtherStore))
+ if (!OtherStoreIsMergeable(OtherStore))
return false;
} else {
// Otherwise, the other block ended with a conditional branch. If one of the
@@ -1601,12 +1649,10 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
// lives in OtherBB.
for (;; --BBI) {
// Check to see if we find the matching store.
- if ((OtherStore = dyn_cast<StoreInst>(BBI))) {
- if (OtherStore->getOperand(1) != SI.getOperand(1) ||
- !SI.isSameOperationAs(OtherStore))
- return false;
+ OtherStore = dyn_cast<StoreInst>(BBI);
+ if (OtherStoreIsMergeable(OtherStore))
break;
- }
+
// If we find something that may be using or overwriting the stored
// value, or if we run out of instructions, we can't do the transform.
if (BBI->mayReadFromMemory() || BBI->mayThrow() ||
@@ -1624,14 +1670,17 @@ bool InstCombinerImpl::mergeStoreIntoSuccessor(StoreInst &SI) {
}
// Insert a PHI node now if we need it.
- Value *MergedVal = OtherStore->getOperand(0);
+ Value *MergedVal = OtherStore->getValueOperand();
// The debug locations of the original instructions might differ. Merge them.
DebugLoc MergedLoc = DILocation::getMergedLocation(SI.getDebugLoc(),
OtherStore->getDebugLoc());
- if (MergedVal != SI.getOperand(0)) {
- PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge");
- PN->addIncoming(SI.getOperand(0), SI.getParent());
- PN->addIncoming(OtherStore->getOperand(0), OtherBB);
+ if (MergedVal != SI.getValueOperand()) {
+ PHINode *PN =
+ PHINode::Create(SI.getValueOperand()->getType(), 2, "storemerge");
+ PN->addIncoming(SI.getValueOperand(), SI.getParent());
+ Builder.SetInsertPoint(OtherStore);
+ PN->addIncoming(Builder.CreateBitOrPointerCast(MergedVal, PN->getType()),
+ OtherBB);
MergedVal = InsertNewInstBefore(PN, DestBB->front());
PN->setDebugLoc(MergedLoc);
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 97f129e200de..50458e2773e6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -185,6 +185,9 @@ static Value *foldMulShl1(BinaryOperator &Mul, bool CommuteOperands,
return nullptr;
}
+static Value *takeLog2(IRBuilderBase &Builder, Value *Op, unsigned Depth,
+ bool AssumeNonZero, bool DoFold);
+
Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
if (Value *V =
@@ -270,7 +273,7 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
if (match(Op0, m_ZExtOrSExt(m_Value(X))) &&
match(Op1, m_APIntAllowUndef(NegPow2C))) {
unsigned SrcWidth = X->getType()->getScalarSizeInBits();
- unsigned ShiftAmt = NegPow2C->countTrailingZeros();
+ unsigned ShiftAmt = NegPow2C->countr_zero();
if (ShiftAmt >= BitWidth - SrcWidth) {
Value *N = Builder.CreateNeg(X, X->getName() + ".neg");
Value *Z = Builder.CreateZExt(N, Ty, N->getName() + ".z");
@@ -471,6 +474,40 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
if (Instruction *Ext = narrowMathIfNoOverflow(I))
return Ext;
+ if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
+ return Res;
+
+ // min(X, Y) * max(X, Y) => X * Y.
+ if (match(&I, m_CombineOr(m_c_Mul(m_SMax(m_Value(X), m_Value(Y)),
+ m_c_SMin(m_Deferred(X), m_Deferred(Y))),
+ m_c_Mul(m_UMax(m_Value(X), m_Value(Y)),
+ m_c_UMin(m_Deferred(X), m_Deferred(Y))))))
+ return BinaryOperator::CreateWithCopiedFlags(Instruction::Mul, X, Y, &I);
+
+ // (mul Op0 Op1):
+ // if Log2(Op0) folds away ->
+ // (shl Op1, Log2(Op0))
+ // if Log2(Op1) folds away ->
+ // (shl Op0, Log2(Op1))
+ if (takeLog2(Builder, Op0, /*Depth*/ 0, /*AssumeNonZero*/ false,
+ /*DoFold*/ false)) {
+ Value *Res = takeLog2(Builder, Op0, /*Depth*/ 0, /*AssumeNonZero*/ false,
+ /*DoFold*/ true);
+ BinaryOperator *Shl = BinaryOperator::CreateShl(Op1, Res);
+ // We can only propegate nuw flag.
+ Shl->setHasNoUnsignedWrap(HasNUW);
+ return Shl;
+ }
+ if (takeLog2(Builder, Op1, /*Depth*/ 0, /*AssumeNonZero*/ false,
+ /*DoFold*/ false)) {
+ Value *Res = takeLog2(Builder, Op1, /*Depth*/ 0, /*AssumeNonZero*/ false,
+ /*DoFold*/ true);
+ BinaryOperator *Shl = BinaryOperator::CreateShl(Op0, Res);
+ // We can only propegate nuw flag.
+ Shl->setHasNoUnsignedWrap(HasNUW);
+ return Shl;
+ }
+
bool Changed = false;
if (!HasNSW && willNotOverflowSignedMul(Op0, Op1, I)) {
Changed = true;
@@ -765,6 +802,20 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
I.hasNoSignedZeros() && match(Start, m_Zero()))
return replaceInstUsesWith(I, Start);
+ // minimun(X, Y) * maximum(X, Y) => X * Y.
+ if (match(&I,
+ m_c_FMul(m_Intrinsic<Intrinsic::maximum>(m_Value(X), m_Value(Y)),
+ m_c_Intrinsic<Intrinsic::minimum>(m_Deferred(X),
+ m_Deferred(Y))))) {
+ BinaryOperator *Result = BinaryOperator::CreateFMulFMF(X, Y, &I);
+ // We cannot preserve ninf if nnan flag is not set.
+ // If X is NaN and Y is Inf then in original program we had NaN * NaN,
+ // while in optimized version NaN * Inf and this is a poison with ninf flag.
+ if (!Result->hasNoNaNs())
+ Result->setHasNoInfs(false);
+ return Result;
+ }
+
return nullptr;
}
@@ -976,9 +1027,9 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
ConstantInt::get(Ty, Product));
}
+ APInt Quotient(C2->getBitWidth(), /*val=*/0ULL, IsSigned);
if ((IsSigned && match(Op0, m_NSWMul(m_Value(X), m_APInt(C1)))) ||
(!IsSigned && match(Op0, m_NUWMul(m_Value(X), m_APInt(C1))))) {
- APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
// (X * C1) / C2 -> X / (C2 / C1) if C2 is a multiple of C1.
if (isMultiple(*C2, *C1, Quotient, IsSigned)) {
@@ -1003,7 +1054,6 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
C1->ult(C1->getBitWidth() - 1)) ||
(!IsSigned && match(Op0, m_NUWShl(m_Value(X), m_APInt(C1))) &&
C1->ult(C1->getBitWidth()))) {
- APInt Quotient(C1->getBitWidth(), /*val=*/0ULL, IsSigned);
APInt C1Shifted = APInt::getOneBitSet(
C1->getBitWidth(), static_cast<unsigned>(C1->getZExtValue()));
@@ -1026,6 +1076,23 @@ Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
}
}
+ // Distribute div over add to eliminate a matching div/mul pair:
+ // ((X * C2) + C1) / C2 --> X + C1/C2
+ // We need a multiple of the divisor for a signed add constant, but
+ // unsigned is fine with any constant pair.
+ if (IsSigned &&
+ match(Op0, m_NSWAdd(m_NSWMul(m_Value(X), m_SpecificInt(*C2)),
+ m_APInt(C1))) &&
+ isMultiple(*C1, *C2, Quotient, IsSigned)) {
+ return BinaryOperator::CreateNSWAdd(X, ConstantInt::get(Ty, Quotient));
+ }
+ if (!IsSigned &&
+ match(Op0, m_NUWAdd(m_NUWMul(m_Value(X), m_SpecificInt(*C2)),
+ m_APInt(C1)))) {
+ return BinaryOperator::CreateNUWAdd(X,
+ ConstantInt::get(Ty, C1->udiv(*C2)));
+ }
+
if (!C2->isZero()) // avoid X udiv 0
if (Instruction *FoldedDiv = foldBinOpIntoSelectOrPhi(I))
return FoldedDiv;
@@ -1121,7 +1188,7 @@ static const unsigned MaxDepth = 6;
// actual instructions, otherwise return a non-null dummy value. Return nullptr
// on failure.
static Value *takeLog2(IRBuilderBase &Builder, Value *Op, unsigned Depth,
- bool DoFold) {
+ bool AssumeNonZero, bool DoFold) {
auto IfFold = [DoFold](function_ref<Value *()> Fn) {
if (!DoFold)
return reinterpret_cast<Value *>(-1);
@@ -1147,14 +1214,18 @@ static Value *takeLog2(IRBuilderBase &Builder, Value *Op, unsigned Depth,
// FIXME: Require one use?
Value *X, *Y;
if (match(Op, m_ZExt(m_Value(X))))
- if (Value *LogX = takeLog2(Builder, X, Depth, DoFold))
+ if (Value *LogX = takeLog2(Builder, X, Depth, AssumeNonZero, DoFold))
return IfFold([&]() { return Builder.CreateZExt(LogX, Op->getType()); });
// log2(X << Y) -> log2(X) + Y
// FIXME: Require one use unless X is 1?
- if (match(Op, m_Shl(m_Value(X), m_Value(Y))))
- if (Value *LogX = takeLog2(Builder, X, Depth, DoFold))
- return IfFold([&]() { return Builder.CreateAdd(LogX, Y); });
+ if (match(Op, m_Shl(m_Value(X), m_Value(Y)))) {
+ auto *BO = cast<OverflowingBinaryOperator>(Op);
+ // nuw will be set if the `shl` is trivially non-zero.
+ if (AssumeNonZero || BO->hasNoUnsignedWrap() || BO->hasNoSignedWrap())
+ if (Value *LogX = takeLog2(Builder, X, Depth, AssumeNonZero, DoFold))
+ return IfFold([&]() { return Builder.CreateAdd(LogX, Y); });
+ }
// log2(Cond ? X : Y) -> Cond ? log2(X) : log2(Y)
// FIXME: missed optimization: if one of the hands of select is/contains
@@ -1162,8 +1233,10 @@ static Value *takeLog2(IRBuilderBase &Builder, Value *Op, unsigned Depth,
// FIXME: can both hands contain undef?
// FIXME: Require one use?
if (SelectInst *SI = dyn_cast<SelectInst>(Op))
- if (Value *LogX = takeLog2(Builder, SI->getOperand(1), Depth, DoFold))
- if (Value *LogY = takeLog2(Builder, SI->getOperand(2), Depth, DoFold))
+ if (Value *LogX = takeLog2(Builder, SI->getOperand(1), Depth,
+ AssumeNonZero, DoFold))
+ if (Value *LogY = takeLog2(Builder, SI->getOperand(2), Depth,
+ AssumeNonZero, DoFold))
return IfFold([&]() {
return Builder.CreateSelect(SI->getOperand(0), LogX, LogY);
});
@@ -1171,13 +1244,18 @@ static Value *takeLog2(IRBuilderBase &Builder, Value *Op, unsigned Depth,
// log2(umin(X, Y)) -> umin(log2(X), log2(Y))
// log2(umax(X, Y)) -> umax(log2(X), log2(Y))
auto *MinMax = dyn_cast<MinMaxIntrinsic>(Op);
- if (MinMax && MinMax->hasOneUse() && !MinMax->isSigned())
- if (Value *LogX = takeLog2(Builder, MinMax->getLHS(), Depth, DoFold))
- if (Value *LogY = takeLog2(Builder, MinMax->getRHS(), Depth, DoFold))
+ if (MinMax && MinMax->hasOneUse() && !MinMax->isSigned()) {
+ // Use AssumeNonZero as false here. Otherwise we can hit case where
+ // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow).
+ if (Value *LogX = takeLog2(Builder, MinMax->getLHS(), Depth,
+ /*AssumeNonZero*/ false, DoFold))
+ if (Value *LogY = takeLog2(Builder, MinMax->getRHS(), Depth,
+ /*AssumeNonZero*/ false, DoFold))
return IfFold([&]() {
- return Builder.CreateBinaryIntrinsic(
- MinMax->getIntrinsicID(), LogX, LogY);
+ return Builder.CreateBinaryIntrinsic(MinMax->getIntrinsicID(), LogX,
+ LogY);
});
+ }
return nullptr;
}
@@ -1297,8 +1375,10 @@ Instruction *InstCombinerImpl::visitUDiv(BinaryOperator &I) {
}
// Op1 udiv Op2 -> Op1 lshr log2(Op2), if log2() folds away.
- if (takeLog2(Builder, Op1, /*Depth*/0, /*DoFold*/false)) {
- Value *Res = takeLog2(Builder, Op1, /*Depth*/0, /*DoFold*/true);
+ if (takeLog2(Builder, Op1, /*Depth*/ 0, /*AssumeNonZero*/ true,
+ /*DoFold*/ false)) {
+ Value *Res = takeLog2(Builder, Op1, /*Depth*/ 0,
+ /*AssumeNonZero*/ true, /*DoFold*/ true);
return replaceInstUsesWith(
I, Builder.CreateLShr(Op0, Res, I.getName(), I.isExact()));
}
@@ -1359,7 +1439,8 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
// (sext X) sdiv C --> sext (X sdiv C)
Value *Op0Src;
if (match(Op0, m_OneUse(m_SExt(m_Value(Op0Src)))) &&
- Op0Src->getType()->getScalarSizeInBits() >= Op1C->getMinSignedBits()) {
+ Op0Src->getType()->getScalarSizeInBits() >=
+ Op1C->getSignificantBits()) {
// In the general case, we need to make sure that the dividend is not the
// minimum signed value because dividing that by -1 is UB. But here, we
@@ -1402,7 +1483,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
KnownBits KnownDividend = computeKnownBits(Op0, 0, &I);
if (!I.isExact() &&
(match(Op1, m_Power2(Op1C)) || match(Op1, m_NegatedPower2(Op1C))) &&
- KnownDividend.countMinTrailingZeros() >= Op1C->countTrailingZeros()) {
+ KnownDividend.countMinTrailingZeros() >= Op1C->countr_zero()) {
I.setIsExact();
return &I;
}
@@ -1681,6 +1762,111 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
return nullptr;
}
+// Variety of transform for:
+// (urem/srem (mul X, Y), (mul X, Z))
+// (urem/srem (shl X, Y), (shl X, Z))
+// (urem/srem (shl Y, X), (shl Z, X))
+// NB: The shift cases are really just extensions of the mul case. We treat
+// shift as Val * (1 << Amt).
+static Instruction *simplifyIRemMulShl(BinaryOperator &I,
+ InstCombinerImpl &IC) {
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1), *X = nullptr;
+ APInt Y, Z;
+ bool ShiftByX = false;
+
+ // If V is not nullptr, it will be matched using m_Specific.
+ auto MatchShiftOrMulXC = [](Value *Op, Value *&V, APInt &C) -> bool {
+ const APInt *Tmp = nullptr;
+ if ((!V && match(Op, m_Mul(m_Value(V), m_APInt(Tmp)))) ||
+ (V && match(Op, m_Mul(m_Specific(V), m_APInt(Tmp)))))
+ C = *Tmp;
+ else if ((!V && match(Op, m_Shl(m_Value(V), m_APInt(Tmp)))) ||
+ (V && match(Op, m_Shl(m_Specific(V), m_APInt(Tmp)))))
+ C = APInt(Tmp->getBitWidth(), 1) << *Tmp;
+ if (Tmp != nullptr)
+ return true;
+
+ // Reset `V` so we don't start with specific value on next match attempt.
+ V = nullptr;
+ return false;
+ };
+
+ auto MatchShiftCX = [](Value *Op, APInt &C, Value *&V) -> bool {
+ const APInt *Tmp = nullptr;
+ if ((!V && match(Op, m_Shl(m_APInt(Tmp), m_Value(V)))) ||
+ (V && match(Op, m_Shl(m_APInt(Tmp), m_Specific(V))))) {
+ C = *Tmp;
+ return true;
+ }
+
+ // Reset `V` so we don't start with specific value on next match attempt.
+ V = nullptr;
+ return false;
+ };
+
+ if (MatchShiftOrMulXC(Op0, X, Y) && MatchShiftOrMulXC(Op1, X, Z)) {
+ // pass
+ } else if (MatchShiftCX(Op0, Y, X) && MatchShiftCX(Op1, Z, X)) {
+ ShiftByX = true;
+ } else {
+ return nullptr;
+ }
+
+ bool IsSRem = I.getOpcode() == Instruction::SRem;
+
+ OverflowingBinaryOperator *BO0 = cast<OverflowingBinaryOperator>(Op0);
+ // TODO: We may be able to deduce more about nsw/nuw of BO0/BO1 based on Y >=
+ // Z or Z >= Y.
+ bool BO0HasNSW = BO0->hasNoSignedWrap();
+ bool BO0HasNUW = BO0->hasNoUnsignedWrap();
+ bool BO0NoWrap = IsSRem ? BO0HasNSW : BO0HasNUW;
+
+ APInt RemYZ = IsSRem ? Y.srem(Z) : Y.urem(Z);
+ // (rem (mul nuw/nsw X, Y), (mul X, Z))
+ // if (rem Y, Z) == 0
+ // -> 0
+ if (RemYZ.isZero() && BO0NoWrap)
+ return IC.replaceInstUsesWith(I, ConstantInt::getNullValue(I.getType()));
+
+ // Helper function to emit either (RemSimplificationC << X) or
+ // (RemSimplificationC * X) depending on whether we matched Op0/Op1 as
+ // (shl V, X) or (mul V, X) respectively.
+ auto CreateMulOrShift =
+ [&](const APInt &RemSimplificationC) -> BinaryOperator * {
+ Value *RemSimplification =
+ ConstantInt::get(I.getType(), RemSimplificationC);
+ return ShiftByX ? BinaryOperator::CreateShl(RemSimplification, X)
+ : BinaryOperator::CreateMul(X, RemSimplification);
+ };
+
+ OverflowingBinaryOperator *BO1 = cast<OverflowingBinaryOperator>(Op1);
+ bool BO1HasNSW = BO1->hasNoSignedWrap();
+ bool BO1HasNUW = BO1->hasNoUnsignedWrap();
+ bool BO1NoWrap = IsSRem ? BO1HasNSW : BO1HasNUW;
+ // (rem (mul X, Y), (mul nuw/nsw X, Z))
+ // if (rem Y, Z) == Y
+ // -> (mul nuw/nsw X, Y)
+ if (RemYZ == Y && BO1NoWrap) {
+ BinaryOperator *BO = CreateMulOrShift(Y);
+ // Copy any overflow flags from Op0.
+ BO->setHasNoSignedWrap(IsSRem || BO0HasNSW);
+ BO->setHasNoUnsignedWrap(!IsSRem || BO0HasNUW);
+ return BO;
+ }
+
+ // (rem (mul nuw/nsw X, Y), (mul {nsw} X, Z))
+ // if Y >= Z
+ // -> (mul {nuw} nsw X, (rem Y, Z))
+ if (Y.uge(Z) && (IsSRem ? (BO0HasNSW && BO1HasNSW) : BO0HasNUW)) {
+ BinaryOperator *BO = CreateMulOrShift(RemYZ);
+ BO->setHasNoSignedWrap();
+ BO->setHasNoUnsignedWrap(BO0HasNUW);
+ return BO;
+ }
+
+ return nullptr;
+}
+
/// This function implements the transforms common to both integer remainder
/// instructions (urem and srem). It is called by the visitors to those integer
/// remainder instructions.
@@ -1733,6 +1919,9 @@ Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) {
}
}
+ if (Instruction *R = simplifyIRemMulShl(I, *this))
+ return R;
+
return nullptr;
}
@@ -1782,8 +1971,21 @@ Instruction *InstCombinerImpl::visitURem(BinaryOperator &I) {
// urem Op0, (sext i1 X) --> (Op0 == -1) ? 0 : Op0
Value *X;
if (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)) {
- Value *Cmp = Builder.CreateICmpEQ(Op0, ConstantInt::getAllOnesValue(Ty));
- return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Op0);
+ Value *FrozenOp0 = Builder.CreateFreeze(Op0, Op0->getName() + ".frozen");
+ Value *Cmp =
+ Builder.CreateICmpEQ(FrozenOp0, ConstantInt::getAllOnesValue(Ty));
+ return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), FrozenOp0);
+ }
+
+ // For "(X + 1) % Op1" and if (X u< Op1) => (X + 1) == Op1 ? 0 : X + 1 .
+ if (match(Op0, m_Add(m_Value(X), m_One()))) {
+ Value *Val =
+ simplifyICmpInst(ICmpInst::ICMP_ULT, X, Op1, SQ.getWithInstruction(&I));
+ if (Val && match(Val, m_One())) {
+ Value *FrozenOp0 = Builder.CreateFreeze(Op0, Op0->getName() + ".frozen");
+ Value *Cmp = Builder.CreateICmpEQ(FrozenOp0, Op1);
+ return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), FrozenOp0);
+ }
}
return nullptr;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 7f59729f0085..2f6aa85062a5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -316,7 +316,7 @@ Instruction *InstCombinerImpl::foldPHIArgIntToPtrToPHI(PHINode &PN) {
for (unsigned OpNum = 0; OpNum != PN.getNumIncomingValues(); ++OpNum) {
if (auto *NewOp =
simplifyIntToPtrRoundTripCast(PN.getIncomingValue(OpNum))) {
- PN.setIncomingValue(OpNum, NewOp);
+ replaceOperand(PN, OpNum, NewOp);
OperandWithRoundTripCast = true;
}
}
@@ -745,6 +745,7 @@ Instruction *InstCombinerImpl::foldPHIArgLoadIntoPHI(PHINode &PN) {
LLVMContext::MD_dereferenceable,
LLVMContext::MD_dereferenceable_or_null,
LLVMContext::MD_access_group,
+ LLVMContext::MD_noundef,
};
for (unsigned ID : KnownIDs)
@@ -1388,11 +1389,10 @@ Instruction *InstCombinerImpl::visitPHINode(PHINode &PN) {
// If all PHI operands are the same operation, pull them through the PHI,
// reducing code size.
- if (isa<Instruction>(PN.getIncomingValue(0)) &&
- isa<Instruction>(PN.getIncomingValue(1)) &&
- cast<Instruction>(PN.getIncomingValue(0))->getOpcode() ==
- cast<Instruction>(PN.getIncomingValue(1))->getOpcode() &&
- PN.getIncomingValue(0)->hasOneUser())
+ auto *Inst0 = dyn_cast<Instruction>(PN.getIncomingValue(0));
+ auto *Inst1 = dyn_cast<Instruction>(PN.getIncomingValue(1));
+ if (Inst0 && Inst1 && Inst0->getOpcode() == Inst1->getOpcode() &&
+ Inst0->hasOneUser())
if (Instruction *Result = foldPHIArgOpIntoPHI(PN))
return Result;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index e7d8208f94fd..661c50062223 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -98,7 +98,8 @@ static Instruction *foldSelectBinOpIdentity(SelectInst &Sel,
// +0.0 compares equal to -0.0, and so it does not behave as required for this
// transform. Bail out if we can not exclude that possibility.
if (isa<FPMathOperator>(BO))
- if (!BO->hasNoSignedZeros() && !CannotBeNegativeZero(Y, &TLI))
+ if (!BO->hasNoSignedZeros() &&
+ !cannotBeNegativeZero(Y, IC.getDataLayout(), &TLI))
return nullptr;
// BO = binop Y, X
@@ -386,6 +387,32 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI,
return CallInst::Create(TII->getCalledFunction(), {NewSel, MatchOp});
}
}
+
+ // select c, (ldexp v, e0), (ldexp v, e1) -> ldexp v, (select c, e0, e1)
+ // select c, (ldexp v0, e), (ldexp v1, e) -> ldexp (select c, v0, v1), e
+ //
+ // select c, (ldexp v0, e0), (ldexp v1, e1) ->
+ // ldexp (select c, v0, v1), (select c, e0, e1)
+ if (TII->getIntrinsicID() == Intrinsic::ldexp) {
+ Value *LdexpVal0 = TII->getArgOperand(0);
+ Value *LdexpExp0 = TII->getArgOperand(1);
+ Value *LdexpVal1 = FII->getArgOperand(0);
+ Value *LdexpExp1 = FII->getArgOperand(1);
+ if (LdexpExp0->getType() == LdexpExp1->getType()) {
+ FPMathOperator *SelectFPOp = cast<FPMathOperator>(&SI);
+ FastMathFlags FMF = cast<FPMathOperator>(TII)->getFastMathFlags();
+ FMF &= cast<FPMathOperator>(FII)->getFastMathFlags();
+ FMF |= SelectFPOp->getFastMathFlags();
+
+ Value *SelectVal = Builder.CreateSelect(Cond, LdexpVal0, LdexpVal1);
+ Value *SelectExp = Builder.CreateSelect(Cond, LdexpExp0, LdexpExp1);
+
+ CallInst *NewLdexp = Builder.CreateIntrinsic(
+ TII->getType(), Intrinsic::ldexp, {SelectVal, SelectExp});
+ NewLdexp->setFastMathFlags(FMF);
+ return replaceInstUsesWith(SI, NewLdexp);
+ }
+ }
}
// icmp with a common operand also can have the common operand
@@ -429,6 +456,21 @@ Instruction *InstCombinerImpl::foldSelectOpOp(SelectInst &SI, Instruction *TI,
!OtherOpF->getType()->isVectorTy()))
return nullptr;
+ // If we are sinking div/rem after a select, we may need to freeze the
+ // condition because div/rem may induce immediate UB with a poison operand.
+ // For example, the following transform is not safe if Cond can ever be poison
+ // because we can replace poison with zero and then we have div-by-zero that
+ // didn't exist in the original code:
+ // Cond ? x/y : x/z --> x / (Cond ? y : z)
+ auto *BO = dyn_cast<BinaryOperator>(TI);
+ if (BO && BO->isIntDivRem() && !isGuaranteedNotToBePoison(Cond)) {
+ // A udiv/urem with a common divisor is safe because UB can only occur with
+ // div-by-zero, and that would be present in the original code.
+ if (BO->getOpcode() == Instruction::SDiv ||
+ BO->getOpcode() == Instruction::SRem || MatchIsOpZero)
+ Cond = Builder.CreateFreeze(Cond);
+ }
+
// If we reach here, they do have operations in common.
Value *NewSI = Builder.CreateSelect(Cond, OtherOpT, OtherOpF,
SI.getName() + ".v", &SI);
@@ -461,7 +503,7 @@ static bool isSelect01(const APInt &C1I, const APInt &C2I) {
/// optimization.
Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
Value *FalseVal) {
- // See the comment above GetSelectFoldableOperands for a description of the
+ // See the comment above getSelectFoldableOperands for a description of the
// transformation we are doing here.
auto TryFoldSelectIntoOp = [&](SelectInst &SI, Value *TrueVal,
Value *FalseVal,
@@ -496,7 +538,7 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
if (!isa<Constant>(OOp) ||
(OOpIsAPInt && isSelect01(C->getUniqueInteger(), *OOpC))) {
Value *NewSel = Builder.CreateSelect(SI.getCondition(), Swapped ? C : OOp,
- Swapped ? OOp : C);
+ Swapped ? OOp : C, "", &SI);
if (isa<FPMathOperator>(&SI))
cast<Instruction>(NewSel)->setFastMathFlags(FMF);
NewSel->takeName(TVI);
@@ -569,6 +611,44 @@ static Instruction *foldSelectICmpAndAnd(Type *SelType, const ICmpInst *Cmp,
}
/// We want to turn:
+/// (select (icmp eq (and X, C1), 0), 0, (shl [nsw/nuw] X, C2));
+/// iff C1 is a mask and the number of its leading zeros is equal to C2
+/// into:
+/// shl X, C2
+static Value *foldSelectICmpAndZeroShl(const ICmpInst *Cmp, Value *TVal,
+ Value *FVal,
+ InstCombiner::BuilderTy &Builder) {
+ ICmpInst::Predicate Pred;
+ Value *AndVal;
+ if (!match(Cmp, m_ICmp(Pred, m_Value(AndVal), m_Zero())))
+ return nullptr;
+
+ if (Pred == ICmpInst::ICMP_NE) {
+ Pred = ICmpInst::ICMP_EQ;
+ std::swap(TVal, FVal);
+ }
+
+ Value *X;
+ const APInt *C2, *C1;
+ if (Pred != ICmpInst::ICMP_EQ ||
+ !match(AndVal, m_And(m_Value(X), m_APInt(C1))) ||
+ !match(TVal, m_Zero()) || !match(FVal, m_Shl(m_Specific(X), m_APInt(C2))))
+ return nullptr;
+
+ if (!C1->isMask() ||
+ C1->countLeadingZeros() != static_cast<unsigned>(C2->getZExtValue()))
+ return nullptr;
+
+ auto *FI = dyn_cast<Instruction>(FVal);
+ if (!FI)
+ return nullptr;
+
+ FI->setHasNoSignedWrap(false);
+ FI->setHasNoUnsignedWrap(false);
+ return FVal;
+}
+
+/// We want to turn:
/// (select (icmp sgt x, C), lshr (X, Y), ashr (X, Y)); iff C s>= -1
/// (select (icmp slt x, C), ashr (X, Y), lshr (X, Y)); iff C s>= 0
/// into:
@@ -935,10 +1015,53 @@ static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
return nullptr;
}
+/// Try to match patterns with select and subtract as absolute difference.
+static Value *foldAbsDiff(ICmpInst *Cmp, Value *TVal, Value *FVal,
+ InstCombiner::BuilderTy &Builder) {
+ auto *TI = dyn_cast<Instruction>(TVal);
+ auto *FI = dyn_cast<Instruction>(FVal);
+ if (!TI || !FI)
+ return nullptr;
+
+ // Normalize predicate to gt/lt rather than ge/le.
+ ICmpInst::Predicate Pred = Cmp->getStrictPredicate();
+ Value *A = Cmp->getOperand(0);
+ Value *B = Cmp->getOperand(1);
+
+ // Normalize "A - B" as the true value of the select.
+ if (match(FI, m_Sub(m_Specific(A), m_Specific(B)))) {
+ std::swap(FI, TI);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ // With any pair of no-wrap subtracts:
+ // (A > B) ? (A - B) : (B - A) --> abs(A - B)
+ if (Pred == CmpInst::ICMP_SGT &&
+ match(TI, m_Sub(m_Specific(A), m_Specific(B))) &&
+ match(FI, m_Sub(m_Specific(B), m_Specific(A))) &&
+ (TI->hasNoSignedWrap() || TI->hasNoUnsignedWrap()) &&
+ (FI->hasNoSignedWrap() || FI->hasNoUnsignedWrap())) {
+ // The remaining subtract is not "nuw" any more.
+ // If there's one use of the subtract (no other use than the use we are
+ // about to replace), then we know that the sub is "nsw" in this context
+ // even if it was only "nuw" before. If there's another use, then we can't
+ // add "nsw" to the existing instruction because it may not be safe in the
+ // other user's context.
+ TI->setHasNoUnsignedWrap(false);
+ if (!TI->hasNoSignedWrap())
+ TI->setHasNoSignedWrap(TI->hasOneUse());
+ return Builder.CreateBinaryIntrinsic(Intrinsic::abs, TI, Builder.getTrue());
+ }
+
+ return nullptr;
+}
+
/// Fold the following code sequence:
/// \code
/// int a = ctlz(x & -x);
// x ? 31 - a : a;
+// // or
+// x ? 31 - a : 32;
/// \code
///
/// into:
@@ -953,15 +1076,19 @@ static Instruction *foldSelectCtlzToCttz(ICmpInst *ICI, Value *TrueVal,
if (ICI->getPredicate() == ICmpInst::ICMP_NE)
std::swap(TrueVal, FalseVal);
+ Value *Ctlz;
if (!match(FalseVal,
- m_Xor(m_Deferred(TrueVal), m_SpecificInt(BitWidth - 1))))
+ m_Xor(m_Value(Ctlz), m_SpecificInt(BitWidth - 1))))
return nullptr;
- if (!match(TrueVal, m_Intrinsic<Intrinsic::ctlz>()))
+ if (!match(Ctlz, m_Intrinsic<Intrinsic::ctlz>()))
+ return nullptr;
+
+ if (TrueVal != Ctlz && !match(TrueVal, m_SpecificInt(BitWidth)))
return nullptr;
Value *X = ICI->getOperand(0);
- auto *II = cast<IntrinsicInst>(TrueVal);
+ auto *II = cast<IntrinsicInst>(Ctlz);
if (!match(II->getOperand(0), m_c_And(m_Specific(X), m_Neg(m_Specific(X)))))
return nullptr;
@@ -1038,99 +1165,6 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
return nullptr;
}
-/// Return true if we find and adjust an icmp+select pattern where the compare
-/// is with a constant that can be incremented or decremented to match the
-/// minimum or maximum idiom.
-static bool adjustMinMax(SelectInst &Sel, ICmpInst &Cmp) {
- ICmpInst::Predicate Pred = Cmp.getPredicate();
- Value *CmpLHS = Cmp.getOperand(0);
- Value *CmpRHS = Cmp.getOperand(1);
- Value *TrueVal = Sel.getTrueValue();
- Value *FalseVal = Sel.getFalseValue();
-
- // We may move or edit the compare, so make sure the select is the only user.
- const APInt *CmpC;
- if (!Cmp.hasOneUse() || !match(CmpRHS, m_APInt(CmpC)))
- return false;
-
- // These transforms only work for selects of integers or vector selects of
- // integer vectors.
- Type *SelTy = Sel.getType();
- auto *SelEltTy = dyn_cast<IntegerType>(SelTy->getScalarType());
- if (!SelEltTy || SelTy->isVectorTy() != Cmp.getType()->isVectorTy())
- return false;
-
- Constant *AdjustedRHS;
- if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT)
- AdjustedRHS = ConstantInt::get(CmpRHS->getType(), *CmpC + 1);
- else if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT)
- AdjustedRHS = ConstantInt::get(CmpRHS->getType(), *CmpC - 1);
- else
- return false;
-
- // X > C ? X : C+1 --> X < C+1 ? C+1 : X
- // X < C ? X : C-1 --> X > C-1 ? C-1 : X
- if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
- (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
- ; // Nothing to do here. Values match without any sign/zero extension.
- }
- // Types do not match. Instead of calculating this with mixed types, promote
- // all to the larger type. This enables scalar evolution to analyze this
- // expression.
- else if (CmpRHS->getType()->getScalarSizeInBits() < SelEltTy->getBitWidth()) {
- Constant *SextRHS = ConstantExpr::getSExt(AdjustedRHS, SelTy);
-
- // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X
- // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X
- // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X
- // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X
- if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) && SextRHS == FalseVal) {
- CmpLHS = TrueVal;
- AdjustedRHS = SextRHS;
- } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) &&
- SextRHS == TrueVal) {
- CmpLHS = FalseVal;
- AdjustedRHS = SextRHS;
- } else if (Cmp.isUnsigned()) {
- Constant *ZextRHS = ConstantExpr::getZExt(AdjustedRHS, SelTy);
- // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X
- // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X
- // zext + signed compare cannot be changed:
- // 0xff <s 0x00, but 0x00ff >s 0x0000
- if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) && ZextRHS == FalseVal) {
- CmpLHS = TrueVal;
- AdjustedRHS = ZextRHS;
- } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) &&
- ZextRHS == TrueVal) {
- CmpLHS = FalseVal;
- AdjustedRHS = ZextRHS;
- } else {
- return false;
- }
- } else {
- return false;
- }
- } else {
- return false;
- }
-
- Pred = ICmpInst::getSwappedPredicate(Pred);
- CmpRHS = AdjustedRHS;
- std::swap(FalseVal, TrueVal);
- Cmp.setPredicate(Pred);
- Cmp.setOperand(0, CmpLHS);
- Cmp.setOperand(1, CmpRHS);
- Sel.setOperand(1, TrueVal);
- Sel.setOperand(2, FalseVal);
- Sel.swapProfMetadata();
-
- // Move the compare instruction right before the select instruction. Otherwise
- // the sext/zext value may be defined after the compare instruction uses it.
- Cmp.moveBefore(&Sel);
-
- return true;
-}
-
static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp,
InstCombinerImpl &IC) {
Value *LHS, *RHS;
@@ -1182,8 +1216,8 @@ static Instruction *canonicalizeSPF(SelectInst &Sel, ICmpInst &Cmp,
return nullptr;
}
-static bool replaceInInstruction(Value *V, Value *Old, Value *New,
- InstCombiner &IC, unsigned Depth = 0) {
+bool InstCombinerImpl::replaceInInstruction(Value *V, Value *Old, Value *New,
+ unsigned Depth) {
// Conservatively limit replacement to two instructions upwards.
if (Depth == 2)
return false;
@@ -1195,10 +1229,11 @@ static bool replaceInInstruction(Value *V, Value *Old, Value *New,
bool Changed = false;
for (Use &U : I->operands()) {
if (U == Old) {
- IC.replaceUse(U, New);
+ replaceUse(U, New);
+ Worklist.add(I);
Changed = true;
} else {
- Changed |= replaceInInstruction(U, Old, New, IC, Depth + 1);
+ Changed |= replaceInInstruction(U, Old, New, Depth + 1);
}
}
return Changed;
@@ -1254,7 +1289,7 @@ Instruction *InstCombinerImpl::foldSelectValueEquivalence(SelectInst &Sel,
// FIXME: Support vectors.
if (match(CmpRHS, m_ImmConstant()) && !match(CmpLHS, m_ImmConstant()) &&
!Cmp.getType()->isVectorTy())
- if (replaceInInstruction(TrueVal, CmpLHS, CmpRHS, *this))
+ if (replaceInInstruction(TrueVal, CmpLHS, CmpRHS))
return &Sel;
}
if (TrueVal != CmpRHS &&
@@ -1593,13 +1628,32 @@ static Instruction *foldSelectZeroOrOnes(ICmpInst *Cmp, Value *TVal,
return nullptr;
}
-static Value *foldSelectInstWithICmpConst(SelectInst &SI, ICmpInst *ICI) {
+static Value *foldSelectInstWithICmpConst(SelectInst &SI, ICmpInst *ICI,
+ InstCombiner::BuilderTy &Builder) {
const APInt *CmpC;
Value *V;
CmpInst::Predicate Pred;
if (!match(ICI, m_ICmp(Pred, m_Value(V), m_APInt(CmpC))))
return nullptr;
+ // Match clamp away from min/max value as a max/min operation.
+ Value *TVal = SI.getTrueValue();
+ Value *FVal = SI.getFalseValue();
+ if (Pred == ICmpInst::ICMP_EQ && V == FVal) {
+ // (V == UMIN) ? UMIN+1 : V --> umax(V, UMIN+1)
+ if (CmpC->isMinValue() && match(TVal, m_SpecificInt(*CmpC + 1)))
+ return Builder.CreateBinaryIntrinsic(Intrinsic::umax, V, TVal);
+ // (V == UMAX) ? UMAX-1 : V --> umin(V, UMAX-1)
+ if (CmpC->isMaxValue() && match(TVal, m_SpecificInt(*CmpC - 1)))
+ return Builder.CreateBinaryIntrinsic(Intrinsic::umin, V, TVal);
+ // (V == SMIN) ? SMIN+1 : V --> smax(V, SMIN+1)
+ if (CmpC->isMinSignedValue() && match(TVal, m_SpecificInt(*CmpC + 1)))
+ return Builder.CreateBinaryIntrinsic(Intrinsic::smax, V, TVal);
+ // (V == SMAX) ? SMAX-1 : V --> smin(V, SMAX-1)
+ if (CmpC->isMaxSignedValue() && match(TVal, m_SpecificInt(*CmpC - 1)))
+ return Builder.CreateBinaryIntrinsic(Intrinsic::smin, V, TVal);
+ }
+
BinaryOperator *BO;
const APInt *C;
CmpInst::Predicate CPred;
@@ -1632,7 +1686,7 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
if (Instruction *NewSPF = canonicalizeSPF(SI, *ICI, *this))
return NewSPF;
- if (Value *V = foldSelectInstWithICmpConst(SI, ICI))
+ if (Value *V = foldSelectInstWithICmpConst(SI, ICI, Builder))
return replaceInstUsesWith(SI, V);
if (Value *V = canonicalizeClampLike(SI, *ICI, Builder))
@@ -1642,18 +1696,17 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
tryToReuseConstantFromSelectInComparison(SI, *ICI, *this))
return NewSel;
- bool Changed = adjustMinMax(SI, *ICI);
-
if (Value *V = foldSelectICmpAnd(SI, ICI, Builder))
return replaceInstUsesWith(SI, V);
// NOTE: if we wanted to, this is where to detect integer MIN/MAX
+ bool Changed = false;
Value *TrueVal = SI.getTrueValue();
Value *FalseVal = SI.getFalseValue();
ICmpInst::Predicate Pred = ICI->getPredicate();
Value *CmpLHS = ICI->getOperand(0);
Value *CmpRHS = ICI->getOperand(1);
- if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS)) {
+ if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS) && !isa<Constant>(CmpLHS)) {
if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) {
// Transform (X == C) ? X : Y -> (X == C) ? C : Y
SI.setOperand(1, CmpRHS);
@@ -1683,7 +1736,7 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
// FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring
// decomposeBitTestICmp() might help.
- {
+ if (TrueVal->getType()->isIntOrIntVectorTy()) {
unsigned BitWidth =
DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
APInt MinSignedValue = APInt::getSignedMinValue(BitWidth);
@@ -1735,6 +1788,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
foldSelectICmpAndAnd(SI.getType(), ICI, TrueVal, FalseVal, Builder))
return V;
+ if (Value *V = foldSelectICmpAndZeroShl(ICI, TrueVal, FalseVal, Builder))
+ return replaceInstUsesWith(SI, V);
+
if (Instruction *V = foldSelectCtlzToCttz(ICI, TrueVal, FalseVal, Builder))
return V;
@@ -1756,6 +1812,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
if (Value *V = canonicalizeSaturatedAdd(ICI, TrueVal, FalseVal, Builder))
return replaceInstUsesWith(SI, V);
+ if (Value *V = foldAbsDiff(ICI, TrueVal, FalseVal, Builder))
+ return replaceInstUsesWith(SI, V);
+
return Changed ? &SI : nullptr;
}
@@ -2418,7 +2477,7 @@ Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) {
// in the case of a shuffle with no undefined mask elements.
ArrayRef<int> Mask;
if (match(TVal, m_OneUse(m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask)))) &&
- !is_contained(Mask, UndefMaskElem) &&
+ !is_contained(Mask, PoisonMaskElem) &&
cast<ShuffleVectorInst>(TVal)->isSelect()) {
if (X == FVal) {
// select Cond, (shuf_sel X, Y), X --> shuf_sel X, (select Cond, Y, X)
@@ -2432,7 +2491,7 @@ Instruction *InstCombinerImpl::foldVectorSelect(SelectInst &Sel) {
}
}
if (match(FVal, m_OneUse(m_Shuffle(m_Value(X), m_Value(Y), m_Mask(Mask)))) &&
- !is_contained(Mask, UndefMaskElem) &&
+ !is_contained(Mask, PoisonMaskElem) &&
cast<ShuffleVectorInst>(FVal)->isSelect()) {
if (X == TVal) {
// select Cond, X, (shuf_sel X, Y) --> shuf_sel X, (select Cond, X, Y)
@@ -2965,6 +3024,14 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
if (match(CondVal, m_Select(m_Value(A), m_Value(B), m_Zero())) &&
match(TrueVal, m_Specific(B)) && match(FalseVal, m_Zero()))
return replaceOperand(SI, 0, A);
+ // select a, (select ~a, true, b), false -> select a, b, false
+ if (match(TrueVal, m_c_LogicalOr(m_Not(m_Specific(CondVal)), m_Value(B))) &&
+ match(FalseVal, m_Zero()))
+ return replaceOperand(SI, 1, B);
+ // select a, true, (select ~a, b, false) -> select a, true, b
+ if (match(FalseVal, m_c_LogicalAnd(m_Not(m_Specific(CondVal)), m_Value(B))) &&
+ match(TrueVal, m_One()))
+ return replaceOperand(SI, 2, B);
// ~(A & B) & (A | B) --> A ^ B
if (match(&SI, m_c_LogicalAnd(m_Not(m_LogicalAnd(m_Value(A), m_Value(B))),
@@ -3077,6 +3144,134 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) {
return nullptr;
}
+// Return true if we can safely remove the select instruction for std::bit_ceil
+// pattern.
+static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0,
+ const APInt *Cond1, Value *CtlzOp,
+ unsigned BitWidth) {
+ // The challenge in recognizing std::bit_ceil(X) is that the operand is used
+ // for the CTLZ proper and select condition, each possibly with some
+ // operation like add and sub.
+ //
+ // Our aim is to make sure that -ctlz & (BitWidth - 1) == 0 even when the
+ // select instruction would select 1, which allows us to get rid of the select
+ // instruction.
+ //
+ // To see if we can do so, we do some symbolic execution with ConstantRange.
+ // Specifically, we compute the range of values that Cond0 could take when
+ // Cond == false. Then we successively transform the range until we obtain
+ // the range of values that CtlzOp could take.
+ //
+ // Conceptually, we follow the def-use chain backward from Cond0 while
+ // transforming the range for Cond0 until we meet the common ancestor of Cond0
+ // and CtlzOp. Then we follow the def-use chain forward until we obtain the
+ // range for CtlzOp. That said, we only follow at most one ancestor from
+ // Cond0. Likewise, we only follow at most one ancestor from CtrlOp.
+
+ ConstantRange CR = ConstantRange::makeExactICmpRegion(
+ CmpInst::getInversePredicate(Pred), *Cond1);
+
+ // Match the operation that's used to compute CtlzOp from CommonAncestor. If
+ // CtlzOp == CommonAncestor, return true as no operation is needed. If a
+ // match is found, execute the operation on CR, update CR, and return true.
+ // Otherwise, return false.
+ auto MatchForward = [&](Value *CommonAncestor) {
+ const APInt *C = nullptr;
+ if (CtlzOp == CommonAncestor)
+ return true;
+ if (match(CtlzOp, m_Add(m_Specific(CommonAncestor), m_APInt(C)))) {
+ CR = CR.add(*C);
+ return true;
+ }
+ if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) {
+ CR = ConstantRange(*C).sub(CR);
+ return true;
+ }
+ if (match(CtlzOp, m_Not(m_Specific(CommonAncestor)))) {
+ CR = CR.binaryNot();
+ return true;
+ }
+ return false;
+ };
+
+ const APInt *C = nullptr;
+ Value *CommonAncestor;
+ if (MatchForward(Cond0)) {
+ // Cond0 is either CtlzOp or CtlzOp's parent. CR has been updated.
+ } else if (match(Cond0, m_Add(m_Value(CommonAncestor), m_APInt(C)))) {
+ CR = CR.sub(*C);
+ if (!MatchForward(CommonAncestor))
+ return false;
+ // Cond0's parent is either CtlzOp or CtlzOp's parent. CR has been updated.
+ } else {
+ return false;
+ }
+
+ // Return true if all the values in the range are either 0 or negative (if
+ // treated as signed). We do so by evaluating:
+ //
+ // CR - 1 u>= (1 << BitWidth) - 1.
+ APInt IntMax = APInt::getSignMask(BitWidth) - 1;
+ CR = CR.sub(APInt(BitWidth, 1));
+ return CR.icmp(ICmpInst::ICMP_UGE, IntMax);
+}
+
+// Transform the std::bit_ceil(X) pattern like:
+//
+// %dec = add i32 %x, -1
+// %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
+// %sub = sub i32 32, %ctlz
+// %shl = shl i32 1, %sub
+// %ugt = icmp ugt i32 %x, 1
+// %sel = select i1 %ugt, i32 %shl, i32 1
+//
+// into:
+//
+// %dec = add i32 %x, -1
+// %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
+// %neg = sub i32 0, %ctlz
+// %masked = and i32 %ctlz, 31
+// %shl = shl i32 1, %sub
+//
+// Note that the select is optimized away while the shift count is masked with
+// 31. We handle some variations of the input operand like std::bit_ceil(X +
+// 1).
+static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) {
+ Type *SelType = SI.getType();
+ unsigned BitWidth = SelType->getScalarSizeInBits();
+
+ Value *FalseVal = SI.getFalseValue();
+ Value *TrueVal = SI.getTrueValue();
+ ICmpInst::Predicate Pred;
+ const APInt *Cond1;
+ Value *Cond0, *Ctlz, *CtlzOp;
+ if (!match(SI.getCondition(), m_ICmp(Pred, m_Value(Cond0), m_APInt(Cond1))))
+ return nullptr;
+
+ if (match(TrueVal, m_One())) {
+ std::swap(FalseVal, TrueVal);
+ Pred = CmpInst::getInversePredicate(Pred);
+ }
+
+ if (!match(FalseVal, m_One()) ||
+ !match(TrueVal,
+ m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth),
+ m_Value(Ctlz)))))) ||
+ !match(Ctlz, m_Intrinsic<Intrinsic::ctlz>(m_Value(CtlzOp), m_Zero())) ||
+ !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth))
+ return nullptr;
+
+ // Build 1 << (-CTLZ & (BitWidth-1)). The negation likely corresponds to a
+ // single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth
+ // is an integer constant. Masking with BitWidth-1 comes free on some
+ // hardware as part of the shift instruction.
+ Value *Neg = Builder.CreateNeg(Ctlz);
+ Value *Masked =
+ Builder.CreateAnd(Neg, ConstantInt::get(SelType, BitWidth - 1));
+ return BinaryOperator::Create(Instruction::Shl, ConstantInt::get(SelType, 1),
+ Masked);
+}
+
Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
Value *CondVal = SI.getCondition();
Value *TrueVal = SI.getTrueValue();
@@ -3253,6 +3448,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
std::swap(NewT, NewF);
Value *NewSI =
Builder.CreateSelect(CondVal, NewT, NewF, SI.getName() + ".idx", &SI);
+ if (Gep->isInBounds())
+ return GetElementPtrInst::CreateInBounds(ElementType, Ptr, {NewSI});
return GetElementPtrInst::Create(ElementType, Ptr, {NewSI});
};
if (auto *TrueGep = dyn_cast<GetElementPtrInst>(TrueVal))
@@ -3364,25 +3561,14 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
}
}
- auto canMergeSelectThroughBinop = [](BinaryOperator *BO) {
- // The select might be preventing a division by 0.
- switch (BO->getOpcode()) {
- default:
- return true;
- case Instruction::SRem:
- case Instruction::URem:
- case Instruction::SDiv:
- case Instruction::UDiv:
- return false;
- }
- };
-
// Try to simplify a binop sandwiched between 2 selects with the same
- // condition.
+ // condition. This is not valid for div/rem because the select might be
+ // preventing a division-by-zero.
+ // TODO: A div/rem restriction is conservative; use something like
+ // isSafeToSpeculativelyExecute().
// select(C, binop(select(C, X, Y), W), Z) -> select(C, binop(X, W), Z)
BinaryOperator *TrueBO;
- if (match(TrueVal, m_OneUse(m_BinOp(TrueBO))) &&
- canMergeSelectThroughBinop(TrueBO)) {
+ if (match(TrueVal, m_OneUse(m_BinOp(TrueBO))) && !TrueBO->isIntDivRem()) {
if (auto *TrueBOSI = dyn_cast<SelectInst>(TrueBO->getOperand(0))) {
if (TrueBOSI->getCondition() == CondVal) {
replaceOperand(*TrueBO, 0, TrueBOSI->getTrueValue());
@@ -3401,8 +3587,7 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
// select(C, Z, binop(select(C, X, Y), W)) -> select(C, Z, binop(Y, W))
BinaryOperator *FalseBO;
- if (match(FalseVal, m_OneUse(m_BinOp(FalseBO))) &&
- canMergeSelectThroughBinop(FalseBO)) {
+ if (match(FalseVal, m_OneUse(m_BinOp(FalseBO))) && !FalseBO->isIntDivRem()) {
if (auto *FalseBOSI = dyn_cast<SelectInst>(FalseBO->getOperand(0))) {
if (FalseBOSI->getCondition() == CondVal) {
replaceOperand(*FalseBO, 0, FalseBOSI->getFalseValue());
@@ -3516,5 +3701,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
if (sinkNotIntoOtherHandOfLogicalOp(SI))
return &SI;
+ if (Instruction *I = foldBitCeil(SI, Builder))
+ return I;
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index ec505381cc86..89dad455f015 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -322,15 +322,20 @@ dropRedundantMaskingOfLeftShiftInput(BinaryOperator *OuterShift,
return BinaryOperator::Create(Instruction::And, NewShift, NewMask);
}
-/// If we have a shift-by-constant of a bitwise logic op that itself has a
-/// shift-by-constant operand with identical opcode, we may be able to convert
-/// that into 2 independent shifts followed by the logic op. This eliminates a
-/// a use of an intermediate value (reduces dependency chain).
-static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I,
+/// If we have a shift-by-constant of a bin op (bitwise logic op or add/sub w/
+/// shl) that itself has a shift-by-constant operand with identical opcode, we
+/// may be able to convert that into 2 independent shifts followed by the logic
+/// op. This eliminates a use of an intermediate value (reduces dependency
+/// chain).
+static Instruction *foldShiftOfShiftedBinOp(BinaryOperator &I,
InstCombiner::BuilderTy &Builder) {
assert(I.isShift() && "Expected a shift as input");
- auto *LogicInst = dyn_cast<BinaryOperator>(I.getOperand(0));
- if (!LogicInst || !LogicInst->isBitwiseLogicOp() || !LogicInst->hasOneUse())
+ auto *BinInst = dyn_cast<BinaryOperator>(I.getOperand(0));
+ if (!BinInst ||
+ (!BinInst->isBitwiseLogicOp() &&
+ BinInst->getOpcode() != Instruction::Add &&
+ BinInst->getOpcode() != Instruction::Sub) ||
+ !BinInst->hasOneUse())
return nullptr;
Constant *C0, *C1;
@@ -338,6 +343,12 @@ static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I,
return nullptr;
Instruction::BinaryOps ShiftOpcode = I.getOpcode();
+ // Transform for add/sub only works with shl.
+ if ((BinInst->getOpcode() == Instruction::Add ||
+ BinInst->getOpcode() == Instruction::Sub) &&
+ ShiftOpcode != Instruction::Shl)
+ return nullptr;
+
Type *Ty = I.getType();
// Find a matching one-use shift by constant. The fold is not valid if the sum
@@ -352,19 +363,25 @@ static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I,
m_SpecificInt_ICMP(ICmpInst::ICMP_ULT, Threshold));
};
- // Logic ops are commutative, so check each operand for a match.
- if (matchFirstShift(LogicInst->getOperand(0)))
- Y = LogicInst->getOperand(1);
- else if (matchFirstShift(LogicInst->getOperand(1)))
- Y = LogicInst->getOperand(0);
- else
+ // Logic ops and Add are commutative, so check each operand for a match. Sub
+ // is not so we cannot reoder if we match operand(1) and need to keep the
+ // operands in their original positions.
+ bool FirstShiftIsOp1 = false;
+ if (matchFirstShift(BinInst->getOperand(0)))
+ Y = BinInst->getOperand(1);
+ else if (matchFirstShift(BinInst->getOperand(1))) {
+ Y = BinInst->getOperand(0);
+ FirstShiftIsOp1 = BinInst->getOpcode() == Instruction::Sub;
+ } else
return nullptr;
- // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
+ // shift (binop (shift X, C0), Y), C1 -> binop (shift X, C0+C1), (shift Y, C1)
Constant *ShiftSumC = ConstantExpr::getAdd(C0, C1);
Value *NewShift1 = Builder.CreateBinOp(ShiftOpcode, X, ShiftSumC);
Value *NewShift2 = Builder.CreateBinOp(ShiftOpcode, Y, C1);
- return BinaryOperator::Create(LogicInst->getOpcode(), NewShift1, NewShift2);
+ Value *Op1 = FirstShiftIsOp1 ? NewShift2 : NewShift1;
+ Value *Op2 = FirstShiftIsOp1 ? NewShift1 : NewShift2;
+ return BinaryOperator::Create(BinInst->getOpcode(), Op1, Op2);
}
Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) {
@@ -463,9 +480,12 @@ Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) {
return replaceOperand(I, 1, Rem);
}
- if (Instruction *Logic = foldShiftOfShiftedLogic(I, Builder))
+ if (Instruction *Logic = foldShiftOfShiftedBinOp(I, Builder))
return Logic;
+ if (match(Op1, m_Or(m_Value(), m_SpecificInt(BitWidth - 1))))
+ return replaceOperand(I, 1, ConstantInt::get(Ty, BitWidth - 1));
+
return nullptr;
}
@@ -570,8 +590,7 @@ static bool canEvaluateShifted(Value *V, unsigned NumBits, bool IsLeftShift,
const APInt *MulConst;
// We can fold (shr (mul X, -(1 << C)), C) -> (and (neg X), C`)
return !IsLeftShift && match(I->getOperand(1), m_APInt(MulConst)) &&
- MulConst->isNegatedPowerOf2() &&
- MulConst->countTrailingZeros() == NumBits;
+ MulConst->isNegatedPowerOf2() && MulConst->countr_zero() == NumBits;
}
}
}
@@ -900,8 +919,10 @@ Instruction *InstCombinerImpl::foldLShrOverflowBit(BinaryOperator &I) {
// Replace the uses of the original add with a zext of the
// NarrowAdd's result. Note that all users at this stage are known to
// be ShAmt-sized truncs, or the lshr itself.
- if (!Add->hasOneUse())
+ if (!Add->hasOneUse()) {
replaceInstUsesWith(*AddInst, Builder.CreateZExt(NarrowAdd, Ty));
+ eraseInstFromFunction(*AddInst);
+ }
// Replace the LShr with a zext of the overflow check.
return new ZExtInst(Overflow, Ty);
@@ -1133,6 +1154,14 @@ Instruction *InstCombinerImpl::visitShl(BinaryOperator &I) {
return BinaryOperator::CreateLShr(
ConstantInt::get(Ty, APInt::getSignMask(BitWidth)), X);
+ // Canonicalize "extract lowest set bit" using cttz to and-with-negate:
+ // 1 << (cttz X) --> -X & X
+ if (match(Op1,
+ m_OneUse(m_Intrinsic<Intrinsic::cttz>(m_Value(X), m_Value())))) {
+ Value *NegX = Builder.CreateNeg(X, "neg");
+ return BinaryOperator::CreateAnd(NegX, X);
+ }
+
// The only way to shift out the 1 is with an over-shift, so that would
// be poison with or without "nuw". Undef is excluded because (undef << X)
// is not undef (it is zero).
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 77d675422966..00eece9534b0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -168,7 +168,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If the high-bits of an ADD/SUB/MUL are not demanded, then we do not care
// about the high bits of the operands.
auto simplifyOperandsBasedOnUnusedHighBits = [&](APInt &DemandedFromOps) {
- unsigned NLZ = DemandedMask.countLeadingZeros();
+ unsigned NLZ = DemandedMask.countl_zero();
// Right fill the mask of bits for the operands to demand the most
// significant bit and all those below it.
DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
@@ -195,7 +195,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
- Known = LHSKnown & RHSKnown;
+ Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
+ Depth, DL, &AC, CxtI, &DT);
// If the client is only demanding bits that we know, return the known
// constant.
@@ -224,7 +225,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
- Known = LHSKnown | RHSKnown;
+ Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
+ Depth, DL, &AC, CxtI, &DT);
// If the client is only demanding bits that we know, return the known
// constant.
@@ -262,7 +264,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
- Known = LHSKnown ^ RHSKnown;
+ Known = analyzeKnownBitsFromAndXorOr(cast<Operator>(I), LHSKnown, RHSKnown,
+ Depth, DL, &AC, CxtI, &DT);
// If the client is only demanding bits that we know, return the known
// constant.
@@ -381,7 +384,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I;
// Only known if known in both the LHS and RHS.
- Known = KnownBits::commonBits(LHSKnown, RHSKnown);
+ Known = LHSKnown.intersectWith(RHSKnown);
break;
}
case Instruction::Trunc: {
@@ -393,7 +396,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// The shift amount must be valid (not poison) in the narrow type, and
// it must not be greater than the high bits demanded of the result.
if (C->ult(VTy->getScalarSizeInBits()) &&
- C->ule(DemandedMask.countLeadingZeros())) {
+ C->ule(DemandedMask.countl_zero())) {
// trunc (lshr X, C) --> lshr (trunc X), C
IRBuilderBase::InsertPointGuard Guard(Builder);
Builder.SetInsertPoint(I);
@@ -508,7 +511,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// Right fill the mask of bits for the operands to demand the most
// significant bit and all those below it.
- unsigned NLZ = DemandedMask.countLeadingZeros();
+ unsigned NLZ = DemandedMask.countl_zero();
APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
if (ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1))
@@ -517,7 +520,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If low order bits are not demanded and known to be zero in one operand,
// then we don't need to demand them from the other operand, since they
// can't cause overflow into any bits that are demanded in the result.
- unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countTrailingOnes();
+ unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();
APInt DemandedFromLHS = DemandedFromOps;
DemandedFromLHS.clearLowBits(NTZ);
if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) ||
@@ -539,7 +542,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
case Instruction::Sub: {
// Right fill the mask of bits for the operands to demand the most
// significant bit and all those below it.
- unsigned NLZ = DemandedMask.countLeadingZeros();
+ unsigned NLZ = DemandedMask.countl_zero();
APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
if (ShrinkDemandedConstant(I, 1, DemandedFromOps) ||
SimplifyDemandedBits(I, 1, DemandedFromOps, RHSKnown, Depth + 1))
@@ -548,7 +551,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If low order bits are not demanded and are known to be zero in RHS,
// then we don't need to demand them from LHS, since they can't cause a
// borrow from any bits that are demanded in the result.
- unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countTrailingOnes();
+ unsigned NTZ = (~DemandedMask & RHSKnown.Zero).countr_one();
APInt DemandedFromLHS = DemandedFromOps;
DemandedFromLHS.clearLowBits(NTZ);
if (ShrinkDemandedConstant(I, 0, DemandedFromLHS) ||
@@ -578,10 +581,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1.
// If we demand exactly one bit N and we have "X * (C' << N)" where C' is
// odd (has LSB set), then the left-shifted low bit of X is the answer.
- unsigned CTZ = DemandedMask.countTrailingZeros();
+ unsigned CTZ = DemandedMask.countr_zero();
const APInt *C;
- if (match(I->getOperand(1), m_APInt(C)) &&
- C->countTrailingZeros() == CTZ) {
+ if (match(I->getOperand(1), m_APInt(C)) && C->countr_zero() == CTZ) {
Constant *ShiftC = ConstantInt::get(VTy, CTZ);
Instruction *Shl = BinaryOperator::CreateShl(I->getOperand(0), ShiftC);
return InsertNewInstWith(Shl, *I);
@@ -619,7 +621,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1);
Value *X;
Constant *C;
- if (DemandedMask.countTrailingZeros() >= ShiftAmt &&
+ if (DemandedMask.countr_zero() >= ShiftAmt &&
match(I->getOperand(0), m_LShr(m_ImmConstant(C), m_Value(X)))) {
Constant *LeftShiftAmtC = ConstantInt::get(VTy, ShiftAmt);
Constant *NewC = ConstantExpr::getShl(C, LeftShiftAmtC);
@@ -642,29 +644,15 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- bool SignBitZero = Known.Zero.isSignBitSet();
- bool SignBitOne = Known.One.isSignBitSet();
- Known.Zero <<= ShiftAmt;
- Known.One <<= ShiftAmt;
- // low bits known zero.
- if (ShiftAmt)
- Known.Zero.setLowBits(ShiftAmt);
-
- // If this shift has "nsw" keyword, then the result is either a poison
- // value or has the same sign bit as the first operand.
- if (IOp->hasNoSignedWrap()) {
- if (SignBitZero)
- Known.Zero.setSignBit();
- else if (SignBitOne)
- Known.One.setSignBit();
- if (Known.hasConflict())
- return UndefValue::get(VTy);
- }
+ Known = KnownBits::shl(Known,
+ KnownBits::makeConstant(APInt(BitWidth, ShiftAmt)),
+ /* NUW */ IOp->hasNoUnsignedWrap(),
+ /* NSW */ IOp->hasNoSignedWrap());
} else {
// This is a variable shift, so we can't shift the demand mask by a known
// amount. But if we are not demanding high bits, then we are not
// demanding those bits from the pre-shifted operand either.
- if (unsigned CTLZ = DemandedMask.countLeadingZeros()) {
+ if (unsigned CTLZ = DemandedMask.countl_zero()) {
APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ));
if (SimplifyDemandedBits(I, 0, DemandedFromOp, Known, Depth + 1)) {
// We can't guarantee that nsw/nuw hold after simplifying the operand.
@@ -683,11 +671,10 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If we are just demanding the shifted sign bit and below, then this can
// be treated as an ASHR in disguise.
- if (DemandedMask.countLeadingZeros() >= ShiftAmt) {
+ if (DemandedMask.countl_zero() >= ShiftAmt) {
// If we only want bits that already match the signbit then we don't
// need to shift.
- unsigned NumHiDemandedBits =
- BitWidth - DemandedMask.countTrailingZeros();
+ unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
unsigned SignBits =
ComputeNumSignBits(I->getOperand(0), Depth + 1, CxtI);
if (SignBits >= NumHiDemandedBits)
@@ -734,7 +721,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// If we only want bits that already match the signbit then we don't need
// to shift.
- unsigned NumHiDemandedBits = BitWidth - DemandedMask.countTrailingZeros();
+ unsigned NumHiDemandedBits = BitWidth - DemandedMask.countr_zero();
if (SignBits >= NumHiDemandedBits)
return I->getOperand(0);
@@ -757,7 +744,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt DemandedMaskIn(DemandedMask.shl(ShiftAmt));
// If any of the high bits are demanded, we should set the sign bit as
// demanded.
- if (DemandedMask.countLeadingZeros() <= ShiftAmt)
+ if (DemandedMask.countl_zero() <= ShiftAmt)
DemandedMaskIn.setSignBit();
// If the shift is exact, then it does demand the low bits (and knows that
@@ -797,7 +784,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
const APInt *SA;
if (match(I->getOperand(1), m_APInt(SA))) {
// TODO: Take the demanded mask of the result into account.
- unsigned RHSTrailingZeros = SA->countTrailingZeros();
+ unsigned RHSTrailingZeros = SA->countr_zero();
APInt DemandedMaskIn =
APInt::getHighBitsSet(BitWidth, BitWidth - RHSTrailingZeros);
if (SimplifyDemandedBits(I, 0, DemandedMaskIn, LHSKnown, Depth + 1)) {
@@ -807,9 +794,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
return I;
}
- // Increase high zero bits from the input.
- Known.Zero.setHighBits(std::min(
- BitWidth, LHSKnown.Zero.countLeadingOnes() + RHSTrailingZeros));
+ Known = KnownBits::udiv(LHSKnown, KnownBits::makeConstant(*SA),
+ cast<BinaryOperator>(I)->isExact());
} else {
computeKnownBits(I, Known, Depth, CxtI);
}
@@ -851,25 +837,16 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
}
- // The sign bit is the LHS's sign bit, except when the result of the
- // remainder is zero.
- if (DemandedMask.isSignBitSet()) {
- computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
- // If it's known zero, our sign bit is also zero.
- if (LHSKnown.isNonNegative())
- Known.makeNonNegative();
- }
+ computeKnownBits(I, Known, Depth, CxtI);
break;
}
case Instruction::URem: {
- KnownBits Known2(BitWidth);
APInt AllOnes = APInt::getAllOnes(BitWidth);
- if (SimplifyDemandedBits(I, 0, AllOnes, Known2, Depth + 1) ||
- SimplifyDemandedBits(I, 1, AllOnes, Known2, Depth + 1))
+ if (SimplifyDemandedBits(I, 0, AllOnes, LHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(I, 1, AllOnes, RHSKnown, Depth + 1))
return I;
- unsigned Leaders = Known2.countMinLeadingZeros();
- Known.Zero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask;
+ Known = KnownBits::urem(LHSKnown, RHSKnown);
break;
}
case Instruction::Call: {
@@ -897,8 +874,8 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
case Intrinsic::bswap: {
// If the only bits demanded come from one byte of the bswap result,
// just shift the input byte into position to eliminate the bswap.
- unsigned NLZ = DemandedMask.countLeadingZeros();
- unsigned NTZ = DemandedMask.countTrailingZeros();
+ unsigned NLZ = DemandedMask.countl_zero();
+ unsigned NTZ = DemandedMask.countr_zero();
// Round NTZ down to the next byte. If we have 11 trailing zeros, then
// we need all the bits down to bit 8. Likewise, round NLZ. If we
@@ -935,9 +912,28 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
APInt DemandedMaskLHS(DemandedMask.lshr(ShiftAmt));
APInt DemandedMaskRHS(DemandedMask.shl(BitWidth - ShiftAmt));
- if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown, Depth + 1) ||
- SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1))
- return I;
+ if (I->getOperand(0) != I->getOperand(1)) {
+ if (SimplifyDemandedBits(I, 0, DemandedMaskLHS, LHSKnown,
+ Depth + 1) ||
+ SimplifyDemandedBits(I, 1, DemandedMaskRHS, RHSKnown, Depth + 1))
+ return I;
+ } else { // fshl is a rotate
+ // Avoid converting rotate into funnel shift.
+ // Only simplify if one operand is constant.
+ LHSKnown = computeKnownBits(I->getOperand(0), Depth + 1, I);
+ if (DemandedMaskLHS.isSubsetOf(LHSKnown.Zero | LHSKnown.One) &&
+ !match(I->getOperand(0), m_SpecificInt(LHSKnown.One))) {
+ replaceOperand(*I, 0, Constant::getIntegerValue(VTy, LHSKnown.One));
+ return I;
+ }
+
+ RHSKnown = computeKnownBits(I->getOperand(1), Depth + 1, I);
+ if (DemandedMaskRHS.isSubsetOf(RHSKnown.Zero | RHSKnown.One) &&
+ !match(I->getOperand(1), m_SpecificInt(RHSKnown.One))) {
+ replaceOperand(*I, 1, Constant::getIntegerValue(VTy, RHSKnown.One));
+ return I;
+ }
+ }
Known.Zero = LHSKnown.Zero.shl(ShiftAmt) |
RHSKnown.Zero.lshr(BitWidth - ShiftAmt);
@@ -951,7 +947,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// The lowest non-zero bit of DemandMask is higher than the highest
// non-zero bit of C.
const APInt *C;
- unsigned CTZ = DemandedMask.countTrailingZeros();
+ unsigned CTZ = DemandedMask.countr_zero();
if (match(II->getArgOperand(1), m_APInt(C)) &&
CTZ >= C->getActiveBits())
return II->getArgOperand(0);
@@ -963,9 +959,9 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// non-one bit of C.
// This comes from using DeMorgans on the above umax example.
const APInt *C;
- unsigned CTZ = DemandedMask.countTrailingZeros();
+ unsigned CTZ = DemandedMask.countr_zero();
if (match(II->getArgOperand(1), m_APInt(C)) &&
- CTZ >= C->getBitWidth() - C->countLeadingOnes())
+ CTZ >= C->getBitWidth() - C->countl_one())
return II->getArgOperand(0);
break;
}
@@ -1014,6 +1010,7 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
Known = LHSKnown & RHSKnown;
+ computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));
// If the client is only demanding bits that we know, return the known
// constant.
@@ -1033,6 +1030,7 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
Known = LHSKnown | RHSKnown;
+ computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));
// If the client is only demanding bits that we know, return the known
// constant.
@@ -1054,6 +1052,7 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
computeKnownBits(I->getOperand(1), RHSKnown, Depth + 1, CxtI);
computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
Known = LHSKnown ^ RHSKnown;
+ computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));
// If the client is only demanding bits that we know, return the known
// constant.
@@ -1071,7 +1070,7 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
break;
}
case Instruction::Add: {
- unsigned NLZ = DemandedMask.countLeadingZeros();
+ unsigned NLZ = DemandedMask.countl_zero();
APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
// If an operand adds zeros to every bit below the highest demanded bit,
@@ -1084,10 +1083,13 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);
+ bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ Known = KnownBits::computeForAddSub(/*Add*/ true, NSW, LHSKnown, RHSKnown);
+ computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));
break;
}
case Instruction::Sub: {
- unsigned NLZ = DemandedMask.countLeadingZeros();
+ unsigned NLZ = DemandedMask.countl_zero();
APInt DemandedFromOps = APInt::getLowBitsSet(BitWidth, BitWidth - NLZ);
// If an operand subtracts zeros from every bit below the highest demanded
@@ -1096,6 +1098,10 @@ Value *InstCombinerImpl::SimplifyMultipleUseDemandedBits(
if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
return I->getOperand(0);
+ bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+ computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);
+ Known = KnownBits::computeForAddSub(/*Add*/ false, NSW, LHSKnown, RHSKnown);
+ computeKnownBitsFromAssume(I, Known, Depth, SQ.getWithInstruction(CxtI));
break;
}
case Instruction::AShr: {
@@ -1541,7 +1547,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// Found constant vector with single element - convert to insertelement.
if (Op && Value) {
Instruction *New = InsertElementInst::Create(
- Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx),
+ Op, Value, ConstantInt::get(Type::getInt64Ty(I->getContext()), Idx),
Shuffle->getName());
InsertNewInstWith(New, *Shuffle);
return New;
@@ -1552,7 +1558,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
SmallVector<int, 16> Elts;
for (unsigned i = 0; i < VWidth; ++i) {
if (UndefElts[i])
- Elts.push_back(UndefMaskElem);
+ Elts.push_back(PoisonMaskElem);
else
Elts.push_back(Shuffle->getMaskValue(i));
}
@@ -1653,7 +1659,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// corresponding input elements are undef.
for (unsigned OutIdx = 0; OutIdx != VWidth; ++OutIdx) {
APInt SubUndef = UndefElts2.lshr(OutIdx * Ratio).zextOrTrunc(Ratio);
- if (SubUndef.countPopulation() == Ratio)
+ if (SubUndef.popcount() == Ratio)
UndefElts.setBit(OutIdx);
}
} else {
@@ -1712,6 +1718,54 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// UB/poison potential, but that should be refined.
BinaryOperator *BO;
if (match(I, m_BinOp(BO)) && !BO->isIntDivRem() && !BO->isShift()) {
+ Value *X = BO->getOperand(0);
+ Value *Y = BO->getOperand(1);
+
+ // Look for an equivalent binop except that one operand has been shuffled.
+ // If the demand for this binop only includes elements that are the same as
+ // the other binop, then we may be able to replace this binop with a use of
+ // the earlier one.
+ //
+ // Example:
+ // %other_bo = bo (shuf X, {0}), Y
+ // %this_extracted_bo = extelt (bo X, Y), 0
+ // -->
+ // %other_bo = bo (shuf X, {0}), Y
+ // %this_extracted_bo = extelt %other_bo, 0
+ //
+ // TODO: Handle demand of an arbitrary single element or more than one
+ // element instead of just element 0.
+ // TODO: Unlike general demanded elements transforms, this should be safe
+ // for any (div/rem/shift) opcode too.
+ if (DemandedElts == 1 && !X->hasOneUse() && !Y->hasOneUse() &&
+ BO->hasOneUse() ) {
+
+ auto findShufBO = [&](bool MatchShufAsOp0) -> User * {
+ // Try to use shuffle-of-operand in place of an operand:
+ // bo X, Y --> bo (shuf X), Y
+ // bo X, Y --> bo X, (shuf Y)
+ BinaryOperator::BinaryOps Opcode = BO->getOpcode();
+ Value *ShufOp = MatchShufAsOp0 ? X : Y;
+ Value *OtherOp = MatchShufAsOp0 ? Y : X;
+ for (User *U : OtherOp->users()) {
+ auto Shuf = m_Shuffle(m_Specific(ShufOp), m_Value(), m_ZeroMask());
+ if (BO->isCommutative()
+ ? match(U, m_c_BinOp(Opcode, Shuf, m_Specific(OtherOp)))
+ : MatchShufAsOp0
+ ? match(U, m_BinOp(Opcode, Shuf, m_Specific(OtherOp)))
+ : match(U, m_BinOp(Opcode, m_Specific(OtherOp), Shuf)))
+ if (DT.dominates(U, I))
+ return U;
+ }
+ return nullptr;
+ };
+
+ if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ true))
+ return ShufBO;
+ if (User *ShufBO = findShufBO(/* MatchShufAsOp0 */ false))
+ return ShufBO;
+ }
+
simplifyAndSetOp(I, 0, DemandedElts, UndefElts);
simplifyAndSetOp(I, 1, DemandedElts, UndefElts2);
@@ -1723,7 +1777,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// If we've proven all of the lanes undef, return an undef value.
// TODO: Intersect w/demanded lanes
if (UndefElts.isAllOnes())
- return UndefValue::get(I->getType());;
+ return UndefValue::get(I->getType());
return MadeChange ? I : nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 61e62adbe327..4a5ffef2b08e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -171,8 +171,11 @@ Instruction *InstCombinerImpl::scalarizePHI(ExtractElementInst &EI,
}
}
- for (auto *E : Extracts)
+ for (auto *E : Extracts) {
replaceInstUsesWith(*E, scalarPHI);
+ // Add old extract to worklist for DCE.
+ addToWorklist(E);
+ }
return &EI;
}
@@ -384,7 +387,7 @@ static APInt findDemandedEltsByAllUsers(Value *V) {
/// return it with the canonical type if it isn't already canonical. We
/// arbitrarily pick 64 bit as our canonical type. The actual bitwidth doesn't
/// matter, we just want a consistent type to simplify CSE.
-ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) {
+static ConstantInt *getPreferredVectorIndex(ConstantInt *IndexC) {
const unsigned IndexBW = IndexC->getType()->getBitWidth();
if (IndexBW == 64 || IndexC->getValue().getActiveBits() > 64)
return nullptr;
@@ -543,16 +546,16 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
->getNumElements();
if (SrcIdx < 0)
- return replaceInstUsesWith(EI, UndefValue::get(EI.getType()));
+ return replaceInstUsesWith(EI, PoisonValue::get(EI.getType()));
if (SrcIdx < (int)LHSWidth)
Src = SVI->getOperand(0);
else {
SrcIdx -= LHSWidth;
Src = SVI->getOperand(1);
}
- Type *Int32Ty = Type::getInt32Ty(EI.getContext());
+ Type *Int64Ty = Type::getInt64Ty(EI.getContext());
return ExtractElementInst::Create(
- Src, ConstantInt::get(Int32Ty, SrcIdx, false));
+ Src, ConstantInt::get(Int64Ty, SrcIdx, false));
}
} else if (auto *CI = dyn_cast<CastInst>(I)) {
// Canonicalize extractelement(cast) -> cast(extractelement).
@@ -594,6 +597,7 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
SrcVec, DemandedElts, UndefElts, 0 /* Depth */,
true /* AllowMultipleUsers */)) {
if (V != SrcVec) {
+ Worklist.addValue(SrcVec);
SrcVec->replaceAllUsesWith(V);
return &EI;
}
@@ -640,11 +644,11 @@ static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
return false;
unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
- if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
+ if (isa<PoisonValue>(ScalarOp)) { // inserting poison into vector.
// We can handle this if the vector we are inserting into is
// transitively ok.
if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
- // If so, update the mask to reflect the inserted undef.
+ // If so, update the mask to reflect the inserted poison.
Mask[InsertedIdx] = -1;
return true;
}
@@ -680,7 +684,7 @@ static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
/// If we have insertion into a vector that is wider than the vector that we
/// are extracting from, try to widen the source vector to allow a single
/// shufflevector to replace one or more insert/extract pairs.
-static void replaceExtractElements(InsertElementInst *InsElt,
+static bool replaceExtractElements(InsertElementInst *InsElt,
ExtractElementInst *ExtElt,
InstCombinerImpl &IC) {
auto *InsVecType = cast<FixedVectorType>(InsElt->getType());
@@ -691,7 +695,7 @@ static void replaceExtractElements(InsertElementInst *InsElt,
// The inserted-to vector must be wider than the extracted-from vector.
if (InsVecType->getElementType() != ExtVecType->getElementType() ||
NumExtElts >= NumInsElts)
- return;
+ return false;
// Create a shuffle mask to widen the extended-from vector using poison
// values. The mask selects all of the values of the original vector followed
@@ -719,7 +723,7 @@ static void replaceExtractElements(InsertElementInst *InsElt,
// that will delete our widening shuffle. This would trigger another attempt
// here to create that shuffle, and we spin forever.
if (InsertionBlock != InsElt->getParent())
- return;
+ return false;
// TODO: This restriction matches the check in visitInsertElementInst() and
// prevents an infinite loop caused by not turning the extract/insert pair
@@ -727,7 +731,7 @@ static void replaceExtractElements(InsertElementInst *InsElt,
// folds for shufflevectors because we're afraid to generate shuffle masks
// that the backend can't handle.
if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
- return;
+ return false;
auto *WideVec = new ShuffleVectorInst(ExtVecOp, ExtendMask);
@@ -747,9 +751,14 @@ static void replaceExtractElements(InsertElementInst *InsElt,
if (!OldExt || OldExt->getParent() != WideVec->getParent())
continue;
auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
- NewExt->insertAfter(OldExt);
+ IC.InsertNewInstWith(NewExt, *OldExt);
IC.replaceInstUsesWith(*OldExt, NewExt);
+ // Add the old extracts to the worklist for DCE. We can't remove the
+ // extracts directly, because they may still be used by the calling code.
+ IC.addToWorklist(OldExt);
}
+
+ return true;
}
/// We are building a shuffle to create V, which is a sequence of insertelement,
@@ -764,7 +773,7 @@ using ShuffleOps = std::pair<Value *, Value *>;
static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
Value *PermittedRHS,
- InstCombinerImpl &IC) {
+ InstCombinerImpl &IC, bool &Rerun) {
assert(V->getType()->isVectorTy() && "Invalid shuffle!");
unsigned NumElts = cast<FixedVectorType>(V->getType())->getNumElements();
@@ -795,13 +804,14 @@ static ShuffleOps collectShuffleElements(Value *V, SmallVectorImpl<int> &Mask,
// otherwise we'd end up with a shuffle of three inputs.
if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) {
Value *RHS = EI->getOperand(0);
- ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC);
+ ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS, IC, Rerun);
assert(LR.second == nullptr || LR.second == RHS);
if (LR.first->getType() != RHS->getType()) {
// Although we are giving up for now, see if we can create extracts
// that match the inserts for another round of combining.
- replaceExtractElements(IEI, EI, IC);
+ if (replaceExtractElements(IEI, EI, IC))
+ Rerun = true;
// We tried our best, but we can't find anything compatible with RHS
// further up the chain. Return a trivial shuffle.
@@ -1129,6 +1139,11 @@ Instruction *InstCombinerImpl::foldAggregateConstructionIntoAggregateReuse(
/// It should be transformed to:
/// %0 = insertvalue { i8, i32 } undef, i8 %y, 0
Instruction *InstCombinerImpl::visitInsertValueInst(InsertValueInst &I) {
+ if (Value *V = simplifyInsertValueInst(
+ I.getAggregateOperand(), I.getInsertedValueOperand(), I.getIndices(),
+ SQ.getWithInstruction(&I)))
+ return replaceInstUsesWith(I, V);
+
bool IsRedundant = false;
ArrayRef<unsigned int> FirstIndices = I.getIndices();
@@ -1235,22 +1250,22 @@ static Instruction *foldInsSequenceIntoSplat(InsertElementInst &InsElt) {
if (FirstIE == &InsElt)
return nullptr;
- // If we are not inserting into an undef vector, make sure we've seen an
+ // If we are not inserting into a poison vector, make sure we've seen an
// insert into every element.
// TODO: If the base vector is not undef, it might be better to create a splat
// and then a select-shuffle (blend) with the base vector.
- if (!match(FirstIE->getOperand(0), m_Undef()))
+ if (!match(FirstIE->getOperand(0), m_Poison()))
if (!ElementPresent.all())
return nullptr;
// Create the insert + shuffle.
- Type *Int32Ty = Type::getInt32Ty(InsElt.getContext());
+ Type *Int64Ty = Type::getInt64Ty(InsElt.getContext());
PoisonValue *PoisonVec = PoisonValue::get(VecTy);
- Constant *Zero = ConstantInt::get(Int32Ty, 0);
+ Constant *Zero = ConstantInt::get(Int64Ty, 0);
if (!cast<ConstantInt>(FirstIE->getOperand(2))->isZero())
FirstIE = InsertElementInst::Create(PoisonVec, SplatVal, Zero, "", &InsElt);
- // Splat from element 0, but replace absent elements with undef in the mask.
+ // Splat from element 0, but replace absent elements with poison in the mask.
SmallVector<int, 16> Mask(NumElements, 0);
for (unsigned i = 0; i != NumElements; ++i)
if (!ElementPresent[i])
@@ -1339,7 +1354,7 @@ static Instruction *foldInsEltIntoIdentityShuffle(InsertElementInst &InsElt) {
// (demanded elements analysis may unset it later).
return nullptr;
} else {
- assert(OldMask[i] == UndefMaskElem &&
+ assert(OldMask[i] == PoisonMaskElem &&
"Unexpected shuffle mask element for identity shuffle");
NewMask[i] = IdxC;
}
@@ -1465,10 +1480,10 @@ static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
}
++ValI;
}
- // Remaining values are filled with 'undef' values.
+ // Remaining values are filled with 'poison' values.
for (unsigned I = 0; I < NumElts; ++I) {
if (!Values[I]) {
- Values[I] = UndefValue::get(InsElt.getType()->getElementType());
+ Values[I] = PoisonValue::get(InsElt.getType()->getElementType());
Mask[I] = I;
}
}
@@ -1676,16 +1691,22 @@ Instruction *InstCombinerImpl::visitInsertElementInst(InsertElementInst &IE) {
// Try to form a shuffle from a chain of extract-insert ops.
if (isShuffleRootCandidate(IE)) {
- SmallVector<int, 16> Mask;
- ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr, *this);
-
- // The proposed shuffle may be trivial, in which case we shouldn't
- // perform the combine.
- if (LR.first != &IE && LR.second != &IE) {
- // We now have a shuffle of LHS, RHS, Mask.
- if (LR.second == nullptr)
- LR.second = UndefValue::get(LR.first->getType());
- return new ShuffleVectorInst(LR.first, LR.second, Mask);
+ bool Rerun = true;
+ while (Rerun) {
+ Rerun = false;
+
+ SmallVector<int, 16> Mask;
+ ShuffleOps LR =
+ collectShuffleElements(&IE, Mask, nullptr, *this, Rerun);
+
+ // The proposed shuffle may be trivial, in which case we shouldn't
+ // perform the combine.
+ if (LR.first != &IE && LR.second != &IE) {
+ // We now have a shuffle of LHS, RHS, Mask.
+ if (LR.second == nullptr)
+ LR.second = PoisonValue::get(LR.first->getType());
+ return new ShuffleVectorInst(LR.first, LR.second, Mask);
+ }
}
}
}
@@ -1815,9 +1836,9 @@ static bool canEvaluateShuffled(Value *V, ArrayRef<int> Mask,
/// Rebuild a new instruction just like 'I' but with the new operands given.
/// In the event of type mismatch, the type of the operands is correct.
-static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
- // We don't want to use the IRBuilder here because we want the replacement
- // instructions to appear next to 'I', not the builder's insertion point.
+static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps,
+ IRBuilderBase &Builder) {
+ Builder.SetInsertPoint(I);
switch (I->getOpcode()) {
case Instruction::Add:
case Instruction::FAdd:
@@ -1839,28 +1860,29 @@ static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
case Instruction::Xor: {
BinaryOperator *BO = cast<BinaryOperator>(I);
assert(NewOps.size() == 2 && "binary operator with #ops != 2");
- BinaryOperator *New =
- BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
- NewOps[0], NewOps[1], "", BO);
- if (isa<OverflowingBinaryOperator>(BO)) {
- New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
- New->setHasNoSignedWrap(BO->hasNoSignedWrap());
- }
- if (isa<PossiblyExactOperator>(BO)) {
- New->setIsExact(BO->isExact());
+ Value *New = Builder.CreateBinOp(cast<BinaryOperator>(I)->getOpcode(),
+ NewOps[0], NewOps[1]);
+ if (auto *NewI = dyn_cast<Instruction>(New)) {
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ NewI->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
+ NewI->setHasNoSignedWrap(BO->hasNoSignedWrap());
+ }
+ if (isa<PossiblyExactOperator>(BO)) {
+ NewI->setIsExact(BO->isExact());
+ }
+ if (isa<FPMathOperator>(BO))
+ NewI->copyFastMathFlags(I);
}
- if (isa<FPMathOperator>(BO))
- New->copyFastMathFlags(I);
return New;
}
case Instruction::ICmp:
assert(NewOps.size() == 2 && "icmp with #ops != 2");
- return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
- NewOps[0], NewOps[1]);
+ return Builder.CreateICmp(cast<ICmpInst>(I)->getPredicate(), NewOps[0],
+ NewOps[1]);
case Instruction::FCmp:
assert(NewOps.size() == 2 && "fcmp with #ops != 2");
- return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
- NewOps[0], NewOps[1]);
+ return Builder.CreateFCmp(cast<FCmpInst>(I)->getPredicate(), NewOps[0],
+ NewOps[1]);
case Instruction::Trunc:
case Instruction::ZExt:
case Instruction::SExt:
@@ -1876,27 +1898,26 @@ static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) {
I->getType()->getScalarType(),
cast<VectorType>(NewOps[0]->getType())->getElementCount());
assert(NewOps.size() == 1 && "cast with #ops != 1");
- return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
- "", I);
+ return Builder.CreateCast(cast<CastInst>(I)->getOpcode(), NewOps[0],
+ DestTy);
}
case Instruction::GetElementPtr: {
Value *Ptr = NewOps[0];
ArrayRef<Value*> Idx = NewOps.slice(1);
- GetElementPtrInst *GEP = GetElementPtrInst::Create(
- cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I);
- GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
- return GEP;
+ return Builder.CreateGEP(cast<GEPOperator>(I)->getSourceElementType(),
+ Ptr, Idx, "",
+ cast<GEPOperator>(I)->isInBounds());
}
}
llvm_unreachable("failed to rebuild vector instructions");
}
-static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
+static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask,
+ IRBuilderBase &Builder) {
// Mask.size() does not need to be equal to the number of vector elements.
assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
Type *EltTy = V->getType()->getScalarType();
- Type *I32Ty = IntegerType::getInt32Ty(V->getContext());
if (match(V, m_Undef()))
return UndefValue::get(FixedVectorType::get(EltTy, Mask.size()));
@@ -1950,15 +1971,14 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
// as well. E.g. GetElementPtr may have scalar operands even if the
// return value is a vector, so we need to examine the operand type.
if (I->getOperand(i)->getType()->isVectorTy())
- V = evaluateInDifferentElementOrder(I->getOperand(i), Mask);
+ V = evaluateInDifferentElementOrder(I->getOperand(i), Mask, Builder);
else
V = I->getOperand(i);
NewOps.push_back(V);
NeedsRebuild |= (V != I->getOperand(i));
}
- if (NeedsRebuild) {
- return buildNew(I, NewOps);
- }
+ if (NeedsRebuild)
+ return buildNew(I, NewOps, Builder);
return I;
}
case Instruction::InsertElement: {
@@ -1979,11 +1999,12 @@ static Value *evaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
// If element is not in Mask, no need to handle the operand 1 (element to
// be inserted). Just evaluate values in operand 0 according to Mask.
if (!Found)
- return evaluateInDifferentElementOrder(I->getOperand(0), Mask);
+ return evaluateInDifferentElementOrder(I->getOperand(0), Mask, Builder);
- Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask);
- return InsertElementInst::Create(V, I->getOperand(1),
- ConstantInt::get(I32Ty, Index), "", I);
+ Value *V = evaluateInDifferentElementOrder(I->getOperand(0), Mask,
+ Builder);
+ Builder.SetInsertPoint(I);
+ return Builder.CreateInsertElement(V, I->getOperand(1), Index);
}
}
llvm_unreachable("failed to reorder elements of vector instruction!");
@@ -2140,7 +2161,7 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
ConstantExpr::getShuffleVector(IdC, C, Mask);
bool MightCreatePoisonOrUB =
- is_contained(Mask, UndefMaskElem) &&
+ is_contained(Mask, PoisonMaskElem) &&
(Instruction::isIntDivRem(BOpcode) || Instruction::isShift(BOpcode));
if (MightCreatePoisonOrUB)
NewC = InstCombiner::getSafeVectorConstantForBinop(BOpcode, NewC, true);
@@ -2154,7 +2175,7 @@ static Instruction *foldSelectShuffleWith1Binop(ShuffleVectorInst &Shuf) {
// An undef shuffle mask element may propagate as an undef constant element in
// the new binop. That would produce poison where the original code might not.
// If we already made a safe constant, then there's no danger.
- if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
+ if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
NewBO->dropPoisonGeneratingFlags();
return NewBO;
}
@@ -2178,8 +2199,7 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
// Insert into element 0 of an undef vector.
UndefValue *UndefVec = UndefValue::get(Shuf.getType());
- Constant *Zero = Builder.getInt32(0);
- Value *NewIns = Builder.CreateInsertElement(UndefVec, X, Zero);
+ Value *NewIns = Builder.CreateInsertElement(UndefVec, X, (uint64_t)0);
// Splat from element 0. Any mask element that is undefined remains undefined.
// For example:
@@ -2189,7 +2209,7 @@ static Instruction *canonicalizeInsertSplat(ShuffleVectorInst &Shuf,
cast<FixedVectorType>(Shuf.getType())->getNumElements();
SmallVector<int, 16> NewMask(NumMaskElts, 0);
for (unsigned i = 0; i != NumMaskElts; ++i)
- if (Mask[i] == UndefMaskElem)
+ if (Mask[i] == PoisonMaskElem)
NewMask[i] = Mask[i];
return new ShuffleVectorInst(NewIns, NewMask);
@@ -2274,7 +2294,7 @@ Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
// mask element, the result is undefined, but it is not poison or undefined
// behavior. That is not necessarily true for div/rem/shift.
bool MightCreatePoisonOrUB =
- is_contained(Mask, UndefMaskElem) &&
+ is_contained(Mask, PoisonMaskElem) &&
(Instruction::isIntDivRem(BOpc) || Instruction::isShift(BOpc));
if (MightCreatePoisonOrUB)
NewC = InstCombiner::getSafeVectorConstantForBinop(BOpc, NewC,
@@ -2325,7 +2345,7 @@ Instruction *InstCombinerImpl::foldSelectShuffle(ShuffleVectorInst &Shuf) {
NewI->andIRFlags(B1);
if (DropNSW)
NewI->setHasNoSignedWrap(false);
- if (is_contained(Mask, UndefMaskElem) && !MightCreatePoisonOrUB)
+ if (is_contained(Mask, PoisonMaskElem) && !MightCreatePoisonOrUB)
NewI->dropPoisonGeneratingFlags();
}
return replaceInstUsesWith(Shuf, NewBO);
@@ -2361,7 +2381,7 @@ static Instruction *foldTruncShuffle(ShuffleVectorInst &Shuf,
SrcType->getScalarSizeInBits() / DestType->getScalarSizeInBits();
ArrayRef<int> Mask = Shuf.getShuffleMask();
for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
- if (Mask[i] == UndefMaskElem)
+ if (Mask[i] == PoisonMaskElem)
continue;
uint64_t LSBIndex = IsBigEndian ? (i + 1) * TruncRatio - 1 : i * TruncRatio;
assert(LSBIndex <= INT32_MAX && "Overflowed 32-bits");
@@ -2407,37 +2427,51 @@ static Instruction *narrowVectorSelect(ShuffleVectorInst &Shuf,
return SelectInst::Create(NarrowCond, NarrowX, NarrowY);
}
-/// Canonicalize FP negate after shuffle.
-static Instruction *foldFNegShuffle(ShuffleVectorInst &Shuf,
- InstCombiner::BuilderTy &Builder) {
- Instruction *FNeg0;
+/// Canonicalize FP negate/abs after shuffle.
+static Instruction *foldShuffleOfUnaryOps(ShuffleVectorInst &Shuf,
+ InstCombiner::BuilderTy &Builder) {
+ auto *S0 = dyn_cast<Instruction>(Shuf.getOperand(0));
Value *X;
- if (!match(Shuf.getOperand(0), m_CombineAnd(m_Instruction(FNeg0),
- m_FNeg(m_Value(X)))))
+ if (!S0 || !match(S0, m_CombineOr(m_FNeg(m_Value(X)), m_FAbs(m_Value(X)))))
return nullptr;
- // shuffle (fneg X), Mask --> fneg (shuffle X, Mask)
- if (FNeg0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
+ bool IsFNeg = S0->getOpcode() == Instruction::FNeg;
+
+ // Match 1-input (unary) shuffle.
+ // shuffle (fneg/fabs X), Mask --> fneg/fabs (shuffle X, Mask)
+ if (S0->hasOneUse() && match(Shuf.getOperand(1), m_Undef())) {
Value *NewShuf = Builder.CreateShuffleVector(X, Shuf.getShuffleMask());
- return UnaryOperator::CreateFNegFMF(NewShuf, FNeg0);
+ if (IsFNeg)
+ return UnaryOperator::CreateFNegFMF(NewShuf, S0);
+
+ Function *FAbs = Intrinsic::getDeclaration(Shuf.getModule(),
+ Intrinsic::fabs, Shuf.getType());
+ CallInst *NewF = CallInst::Create(FAbs, {NewShuf});
+ NewF->setFastMathFlags(S0->getFastMathFlags());
+ return NewF;
}
- Instruction *FNeg1;
+ // Match 2-input (binary) shuffle.
+ auto *S1 = dyn_cast<Instruction>(Shuf.getOperand(1));
Value *Y;
- if (!match(Shuf.getOperand(1), m_CombineAnd(m_Instruction(FNeg1),
- m_FNeg(m_Value(Y)))))
+ if (!S1 || !match(S1, m_CombineOr(m_FNeg(m_Value(Y)), m_FAbs(m_Value(Y)))) ||
+ S0->getOpcode() != S1->getOpcode() ||
+ (!S0->hasOneUse() && !S1->hasOneUse()))
return nullptr;
- // shuffle (fneg X), (fneg Y), Mask --> fneg (shuffle X, Y, Mask)
- if (FNeg0->hasOneUse() || FNeg1->hasOneUse()) {
- Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
- Instruction *NewFNeg = UnaryOperator::CreateFNeg(NewShuf);
- NewFNeg->copyIRFlags(FNeg0);
- NewFNeg->andIRFlags(FNeg1);
- return NewFNeg;
+ // shuf (fneg/fabs X), (fneg/fabs Y), Mask --> fneg/fabs (shuf X, Y, Mask)
+ Value *NewShuf = Builder.CreateShuffleVector(X, Y, Shuf.getShuffleMask());
+ Instruction *NewF;
+ if (IsFNeg) {
+ NewF = UnaryOperator::CreateFNeg(NewShuf);
+ } else {
+ Function *FAbs = Intrinsic::getDeclaration(Shuf.getModule(),
+ Intrinsic::fabs, Shuf.getType());
+ NewF = CallInst::Create(FAbs, {NewShuf});
}
-
- return nullptr;
+ NewF->copyIRFlags(S0);
+ NewF->andIRFlags(S1);
+ return NewF;
}
/// Canonicalize casts after shuffle.
@@ -2533,7 +2567,7 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
for (unsigned i = 0; i != NumElts; ++i) {
int ExtractMaskElt = Shuf.getMaskValue(i);
int MaskElt = Mask[i];
- NewMask[i] = ExtractMaskElt == UndefMaskElem ? ExtractMaskElt : MaskElt;
+ NewMask[i] = ExtractMaskElt == PoisonMaskElem ? ExtractMaskElt : MaskElt;
}
return new ShuffleVectorInst(X, Y, NewMask);
}
@@ -2699,7 +2733,8 @@ static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) {
// splatting the first element of the result of the BinOp
Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) {
if (!match(SVI.getOperand(1), m_Undef()) ||
- !match(SVI.getShuffleMask(), m_ZeroMask()))
+ !match(SVI.getShuffleMask(), m_ZeroMask()) ||
+ !SVI.getOperand(0)->hasOneUse())
return nullptr;
Value *Op0 = SVI.getOperand(0);
@@ -2759,7 +2794,6 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
}
ArrayRef<int> Mask = SVI.getShuffleMask();
- Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
// Peek through a bitcasted shuffle operand by scaling the mask. If the
// simulated shuffle can simplify, then this shuffle is unnecessary:
@@ -2815,7 +2849,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (Instruction *I = narrowVectorSelect(SVI, Builder))
return I;
- if (Instruction *I = foldFNegShuffle(SVI, Builder))
+ if (Instruction *I = foldShuffleOfUnaryOps(SVI, Builder))
return I;
if (Instruction *I = foldCastShuffle(SVI, Builder))
@@ -2840,7 +2874,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
return I;
if (match(RHS, m_Undef()) && canEvaluateShuffled(LHS, Mask)) {
- Value *V = evaluateInDifferentElementOrder(LHS, Mask);
+ Value *V = evaluateInDifferentElementOrder(LHS, Mask, Builder);
return replaceInstUsesWith(SVI, V);
}
@@ -2916,15 +2950,15 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth;
assert(SrcElemsPerTgtElem);
BegIdx /= SrcElemsPerTgtElem;
- bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end();
+ bool BCAlreadyExists = NewBCs.contains(CastSrcTy);
auto *NewBC =
BCAlreadyExists
? NewBCs[CastSrcTy]
: Builder.CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc");
if (!BCAlreadyExists)
NewBCs[CastSrcTy] = NewBC;
- auto *Ext = Builder.CreateExtractElement(
- NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract");
+ auto *Ext = Builder.CreateExtractElement(NewBC, BegIdx,
+ SVI.getName() + ".extract");
// The shufflevector isn't being replaced: the bitcast that used it
// is. InstCombine will visit the newly-created instructions.
replaceInstUsesWith(*BC, Ext);
@@ -3042,7 +3076,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
for (unsigned i = 0; i < VWidth; ++i) {
int eltMask;
if (Mask[i] < 0) {
- // This element is an undef value.
+ // This element is a poison value.
eltMask = -1;
} else if (Mask[i] < (int)LHSWidth) {
// This element is from left hand side vector operand.
@@ -3051,27 +3085,27 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// new mask value for the element.
if (newLHS != LHS) {
eltMask = LHSMask[Mask[i]];
- // If the value selected is an undef value, explicitly specify it
+ // If the value selected is an poison value, explicitly specify it
// with a -1 mask value.
- if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
+ if (eltMask >= (int)LHSOp0Width && isa<PoisonValue>(LHSOp1))
eltMask = -1;
} else
eltMask = Mask[i];
} else {
// This element is from right hand side vector operand
//
- // If the value selected is an undef value, explicitly specify it
+ // If the value selected is a poison value, explicitly specify it
// with a -1 mask value. (case 1)
- if (match(RHS, m_Undef()))
+ if (match(RHS, m_Poison()))
eltMask = -1;
// If RHS is going to be replaced (case 3 or 4), calculate the
// new mask value for the element.
else if (newRHS != RHS) {
eltMask = RHSMask[Mask[i]-LHSWidth];
- // If the value selected is an undef value, explicitly specify it
+ // If the value selected is an poison value, explicitly specify it
// with a -1 mask value.
if (eltMask >= (int)RHSOp0Width) {
- assert(match(RHSShuffle->getOperand(1), m_Undef()) &&
+ assert(match(RHSShuffle->getOperand(1), m_Poison()) &&
"should have been check above");
eltMask = -1;
}
@@ -3102,7 +3136,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
// or is a splat, do the replacement.
if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
if (!newRHS)
- newRHS = UndefValue::get(newLHS->getType());
+ newRHS = PoisonValue::get(newLHS->getType());
return new ShuffleVectorInst(newLHS, newRHS, newMask);
}
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index fb6f4f96ea48..afd6e034f46d 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -33,8 +33,6 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
-#include "llvm-c/Initialization.h"
-#include "llvm-c/Transforms/InstCombine.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -47,7 +45,6 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
@@ -70,6 +67,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IRBuilder.h"
@@ -78,7 +76,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
@@ -117,6 +114,11 @@ using namespace llvm::PatternMatch;
STATISTIC(NumWorklistIterations,
"Number of instruction combining iterations performed");
+STATISTIC(NumOneIteration, "Number of functions with one iteration");
+STATISTIC(NumTwoIterations, "Number of functions with two iterations");
+STATISTIC(NumThreeIterations, "Number of functions with three iterations");
+STATISTIC(NumFourOrMoreIterations,
+ "Number of functions with four or more iterations");
STATISTIC(NumCombined , "Number of insts combined");
STATISTIC(NumConstProp, "Number of constant folds");
@@ -129,7 +131,6 @@ DEBUG_COUNTER(VisitCounter, "instcombine-visit",
"Controls which instructions are visited");
// FIXME: these limits eventually should be as low as 2.
-static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
#ifndef NDEBUG
static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
#else
@@ -144,11 +145,6 @@ static cl::opt<unsigned> MaxSinkNumUsers(
"instcombine-max-sink-users", cl::init(32),
cl::desc("Maximum number of undroppable users for instruction sinking"));
-static cl::opt<unsigned> LimitMaxIterations(
- "instcombine-max-iterations",
- cl::desc("Limit the maximum number of instruction combining iterations"),
- cl::init(InstCombineDefaultMaxIterations));
-
static cl::opt<unsigned> InfiniteLoopDetectionThreshold(
"instcombine-infinite-loop-threshold",
cl::desc("Number of instruction combining iterations considered an "
@@ -203,6 +199,10 @@ std::optional<Value *> InstCombiner::targetSimplifyDemandedVectorEltsIntrinsic(
return std::nullopt;
}
+bool InstCombiner::isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
+ return TTI.isValidAddrSpaceCast(FromAS, ToAS);
+}
+
Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
return llvm::emitGEPOffset(&Builder, DL, GEP);
}
@@ -360,13 +360,17 @@ static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1,
// (op (cast (op X, C2)), C1) --> (op (cast X), FoldedC)
Type *DestTy = C1->getType();
Constant *CastC2 = ConstantExpr::getCast(CastOpcode, C2, DestTy);
- Constant *FoldedC = ConstantExpr::get(AssocOpcode, C1, CastC2);
+ Constant *FoldedC =
+ ConstantFoldBinaryOpOperands(AssocOpcode, C1, CastC2, IC.getDataLayout());
+ if (!FoldedC)
+ return false;
+
IC.replaceOperand(*Cast, 0, BinOp2->getOperand(0));
IC.replaceOperand(*BinOp1, 1, FoldedC);
return true;
}
-// Simplifies IntToPtr/PtrToInt RoundTrip Cast To BitCast.
+// Simplifies IntToPtr/PtrToInt RoundTrip Cast.
// inttoptr ( ptrtoint (x) ) --> x
Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
auto *IntToPtr = dyn_cast<IntToPtrInst>(Val);
@@ -378,10 +382,8 @@ Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
CastTy->getPointerAddressSpace() ==
PtrToInt->getSrcTy()->getPointerAddressSpace() &&
DL.getTypeSizeInBits(PtrToInt->getSrcTy()) ==
- DL.getTypeSizeInBits(PtrToInt->getDestTy())) {
- return CastInst::CreateBitOrPointerCast(PtrToInt->getOperand(0), CastTy,
- "", PtrToInt);
- }
+ DL.getTypeSizeInBits(PtrToInt->getDestTy()))
+ return PtrToInt->getOperand(0);
}
return nullptr;
}
@@ -732,6 +734,207 @@ static Value *tryFactorization(BinaryOperator &I, const SimplifyQuery &SQ,
return RetVal;
}
+// (Binop1 (Binop2 (logic_shift X, C), C1), (logic_shift Y, C))
+// IFF
+// 1) the logic_shifts match
+// 2) either both binops are binops and one is `and` or
+// BinOp1 is `and`
+// (logic_shift (inv_logic_shift C1, C), C) == C1 or
+//
+// -> (logic_shift (Binop1 (Binop2 X, inv_logic_shift(C1, C)), Y), C)
+//
+// (Binop1 (Binop2 (logic_shift X, Amt), Mask), (logic_shift Y, Amt))
+// IFF
+// 1) the logic_shifts match
+// 2) BinOp1 == BinOp2 (if BinOp == `add`, then also requires `shl`).
+//
+// -> (BinOp (logic_shift (BinOp X, Y)), Mask)
+Instruction *InstCombinerImpl::foldBinOpShiftWithShift(BinaryOperator &I) {
+ auto IsValidBinOpc = [](unsigned Opc) {
+ switch (Opc) {
+ default:
+ return false;
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Add:
+ // Skip Sub as we only match constant masks which will canonicalize to use
+ // add.
+ return true;
+ }
+ };
+
+ // Check if we can distribute binop arbitrarily. `add` + `lshr` has extra
+ // constraints.
+ auto IsCompletelyDistributable = [](unsigned BinOpc1, unsigned BinOpc2,
+ unsigned ShOpc) {
+ return (BinOpc1 != Instruction::Add && BinOpc2 != Instruction::Add) ||
+ ShOpc == Instruction::Shl;
+ };
+
+ auto GetInvShift = [](unsigned ShOpc) {
+ return ShOpc == Instruction::LShr ? Instruction::Shl : Instruction::LShr;
+ };
+
+ auto CanDistributeBinops = [&](unsigned BinOpc1, unsigned BinOpc2,
+ unsigned ShOpc, Constant *CMask,
+ Constant *CShift) {
+ // If the BinOp1 is `and` we don't need to check the mask.
+ if (BinOpc1 == Instruction::And)
+ return true;
+
+ // For all other possible transfers we need complete distributable
+ // binop/shift (anything but `add` + `lshr`).
+ if (!IsCompletelyDistributable(BinOpc1, BinOpc2, ShOpc))
+ return false;
+
+ // If BinOp2 is `and`, any mask works (this only really helps for non-splat
+ // vecs, otherwise the mask will be simplified and the following check will
+ // handle it).
+ if (BinOpc2 == Instruction::And)
+ return true;
+
+ // Otherwise, need mask that meets the below requirement.
+ // (logic_shift (inv_logic_shift Mask, ShAmt), ShAmt) == Mask
+ return ConstantExpr::get(
+ ShOpc, ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift),
+ CShift) == CMask;
+ };
+
+ auto MatchBinOp = [&](unsigned ShOpnum) -> Instruction * {
+ Constant *CMask, *CShift;
+ Value *X, *Y, *ShiftedX, *Mask, *Shift;
+ if (!match(I.getOperand(ShOpnum),
+ m_OneUse(m_LogicalShift(m_Value(Y), m_Value(Shift)))))
+ return nullptr;
+ if (!match(I.getOperand(1 - ShOpnum),
+ m_BinOp(m_Value(ShiftedX), m_Value(Mask))))
+ return nullptr;
+
+ if (!match(ShiftedX,
+ m_OneUse(m_LogicalShift(m_Value(X), m_Specific(Shift)))))
+ return nullptr;
+
+ // Make sure we are matching instruction shifts and not ConstantExpr
+ auto *IY = dyn_cast<Instruction>(I.getOperand(ShOpnum));
+ auto *IX = dyn_cast<Instruction>(ShiftedX);
+ if (!IY || !IX)
+ return nullptr;
+
+ // LHS and RHS need same shift opcode
+ unsigned ShOpc = IY->getOpcode();
+ if (ShOpc != IX->getOpcode())
+ return nullptr;
+
+ // Make sure binop is real instruction and not ConstantExpr
+ auto *BO2 = dyn_cast<Instruction>(I.getOperand(1 - ShOpnum));
+ if (!BO2)
+ return nullptr;
+
+ unsigned BinOpc = BO2->getOpcode();
+ // Make sure we have valid binops.
+ if (!IsValidBinOpc(I.getOpcode()) || !IsValidBinOpc(BinOpc))
+ return nullptr;
+
+ // If BinOp1 == BinOp2 and it's bitwise or shl with add, then just
+ // distribute to drop the shift irrelevant of constants.
+ if (BinOpc == I.getOpcode() &&
+ IsCompletelyDistributable(I.getOpcode(), BinOpc, ShOpc)) {
+ Value *NewBinOp2 = Builder.CreateBinOp(I.getOpcode(), X, Y);
+ Value *NewBinOp1 = Builder.CreateBinOp(
+ static_cast<Instruction::BinaryOps>(ShOpc), NewBinOp2, Shift);
+ return BinaryOperator::Create(I.getOpcode(), NewBinOp1, Mask);
+ }
+
+ // Otherwise we can only distribute by constant shifting the mask, so
+ // ensure we have constants.
+ if (!match(Shift, m_ImmConstant(CShift)))
+ return nullptr;
+ if (!match(Mask, m_ImmConstant(CMask)))
+ return nullptr;
+
+ // Check if we can distribute the binops.
+ if (!CanDistributeBinops(I.getOpcode(), BinOpc, ShOpc, CMask, CShift))
+ return nullptr;
+
+ Constant *NewCMask = ConstantExpr::get(GetInvShift(ShOpc), CMask, CShift);
+ Value *NewBinOp2 = Builder.CreateBinOp(
+ static_cast<Instruction::BinaryOps>(BinOpc), X, NewCMask);
+ Value *NewBinOp1 = Builder.CreateBinOp(I.getOpcode(), Y, NewBinOp2);
+ return BinaryOperator::Create(static_cast<Instruction::BinaryOps>(ShOpc),
+ NewBinOp1, CShift);
+ };
+
+ if (Instruction *R = MatchBinOp(0))
+ return R;
+ return MatchBinOp(1);
+}
+
+// (Binop (zext C), (select C, T, F))
+// -> (select C, (binop 1, T), (binop 0, F))
+//
+// (Binop (sext C), (select C, T, F))
+// -> (select C, (binop -1, T), (binop 0, F))
+//
+// Attempt to simplify binary operations into a select with folded args, when
+// one operand of the binop is a select instruction and the other operand is a
+// zext/sext extension, whose value is the select condition.
+Instruction *
+InstCombinerImpl::foldBinOpOfSelectAndCastOfSelectCondition(BinaryOperator &I) {
+ // TODO: this simplification may be extended to any speculatable instruction,
+ // not just binops, and would possibly be handled better in FoldOpIntoSelect.
+ Instruction::BinaryOps Opc = I.getOpcode();
+ Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
+ Value *A, *CondVal, *TrueVal, *FalseVal;
+ Value *CastOp;
+
+ auto MatchSelectAndCast = [&](Value *CastOp, Value *SelectOp) {
+ return match(CastOp, m_ZExtOrSExt(m_Value(A))) &&
+ A->getType()->getScalarSizeInBits() == 1 &&
+ match(SelectOp, m_Select(m_Value(CondVal), m_Value(TrueVal),
+ m_Value(FalseVal)));
+ };
+
+ // Make sure one side of the binop is a select instruction, and the other is a
+ // zero/sign extension operating on a i1.
+ if (MatchSelectAndCast(LHS, RHS))
+ CastOp = LHS;
+ else if (MatchSelectAndCast(RHS, LHS))
+ CastOp = RHS;
+ else
+ return nullptr;
+
+ auto NewFoldedConst = [&](bool IsTrueArm, Value *V) {
+ bool IsCastOpRHS = (CastOp == RHS);
+ bool IsZExt = isa<ZExtInst>(CastOp);
+ Constant *C;
+
+ if (IsTrueArm) {
+ C = Constant::getNullValue(V->getType());
+ } else if (IsZExt) {
+ unsigned BitWidth = V->getType()->getScalarSizeInBits();
+ C = Constant::getIntegerValue(V->getType(), APInt(BitWidth, 1));
+ } else {
+ C = Constant::getAllOnesValue(V->getType());
+ }
+
+ return IsCastOpRHS ? Builder.CreateBinOp(Opc, V, C)
+ : Builder.CreateBinOp(Opc, C, V);
+ };
+
+ // If the value used in the zext/sext is the select condition, or the negated
+ // of the select condition, the binop can be simplified.
+ if (CondVal == A)
+ return SelectInst::Create(CondVal, NewFoldedConst(false, TrueVal),
+ NewFoldedConst(true, FalseVal));
+
+ if (match(A, m_Not(m_Specific(CondVal))))
+ return SelectInst::Create(CondVal, NewFoldedConst(true, TrueVal),
+ NewFoldedConst(false, FalseVal));
+
+ return nullptr;
+}
+
Value *InstCombinerImpl::tryFactorizationFolds(BinaryOperator &I) {
Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS);
@@ -948,6 +1151,7 @@ Value *InstCombinerImpl::SimplifySelectsFeedingBinaryOp(BinaryOperator &I,
/// Freely adapt every user of V as-if V was changed to !V.
/// WARNING: only if canFreelyInvertAllUsersOf() said this can be done.
void InstCombinerImpl::freelyInvertAllUsersOf(Value *I, Value *IgnoredUser) {
+ assert(!isa<Constant>(I) && "Shouldn't invert users of constant");
for (User *U : make_early_inc_range(I->users())) {
if (U == IgnoredUser)
continue; // Don't consider this user.
@@ -1033,63 +1237,39 @@ Instruction *InstCombinerImpl::foldBinopOfSextBoolToSelect(BinaryOperator &BO) {
return SelectInst::Create(X, TVal, FVal);
}
-static Constant *constantFoldOperationIntoSelectOperand(
- Instruction &I, SelectInst *SI, Value *SO) {
- auto *ConstSO = dyn_cast<Constant>(SO);
- if (!ConstSO)
- return nullptr;
-
+static Constant *constantFoldOperationIntoSelectOperand(Instruction &I,
+ SelectInst *SI,
+ bool IsTrueArm) {
SmallVector<Constant *> ConstOps;
for (Value *Op : I.operands()) {
- if (Op == SI)
- ConstOps.push_back(ConstSO);
- else if (auto *C = dyn_cast<Constant>(Op))
- ConstOps.push_back(C);
- else
- llvm_unreachable("Operands should be select or constant");
- }
- return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
-}
+ CmpInst::Predicate Pred;
+ Constant *C = nullptr;
+ if (Op == SI) {
+ C = dyn_cast<Constant>(IsTrueArm ? SI->getTrueValue()
+ : SI->getFalseValue());
+ } else if (match(SI->getCondition(),
+ m_ICmp(Pred, m_Specific(Op), m_Constant(C))) &&
+ Pred == (IsTrueArm ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE) &&
+ isGuaranteedNotToBeUndefOrPoison(C)) {
+ // Pass
+ } else {
+ C = dyn_cast<Constant>(Op);
+ }
+ if (C == nullptr)
+ return nullptr;
-static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
- InstCombiner::BuilderTy &Builder) {
- if (auto *Cast = dyn_cast<CastInst>(&I))
- return Builder.CreateCast(Cast->getOpcode(), SO, I.getType());
-
- if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
- assert(canConstantFoldCallTo(II, cast<Function>(II->getCalledOperand())) &&
- "Expected constant-foldable intrinsic");
- Intrinsic::ID IID = II->getIntrinsicID();
- if (II->arg_size() == 1)
- return Builder.CreateUnaryIntrinsic(IID, SO);
-
- // This works for real binary ops like min/max (where we always expect the
- // constant operand to be canonicalized as op1) and unary ops with a bonus
- // constant argument like ctlz/cttz.
- // TODO: Handle non-commutative binary intrinsics as below for binops.
- assert(II->arg_size() == 2 && "Expected binary intrinsic");
- assert(isa<Constant>(II->getArgOperand(1)) && "Expected constant operand");
- return Builder.CreateBinaryIntrinsic(IID, SO, II->getArgOperand(1));
+ ConstOps.push_back(C);
}
- if (auto *EI = dyn_cast<ExtractElementInst>(&I))
- return Builder.CreateExtractElement(SO, EI->getIndexOperand());
-
- assert(I.isBinaryOp() && "Unexpected opcode for select folding");
-
- // Figure out if the constant is the left or the right argument.
- bool ConstIsRHS = isa<Constant>(I.getOperand(1));
- Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
-
- Value *Op0 = SO, *Op1 = ConstOperand;
- if (!ConstIsRHS)
- std::swap(Op0, Op1);
+ return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout());
+}
- Value *NewBO = Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), Op0,
- Op1, SO->getName() + ".op");
- if (auto *NewBOI = dyn_cast<Instruction>(NewBO))
- NewBOI->copyIRFlags(&I);
- return NewBO;
+static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI,
+ Value *NewOp, InstCombiner &IC) {
+ Instruction *Clone = I.clone();
+ Clone->replaceUsesOfWith(SI, NewOp);
+ IC.InsertNewInstBefore(Clone, *SI);
+ return Clone;
}
Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
@@ -1122,56 +1302,17 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI,
return nullptr;
}
- // Test if a CmpInst instruction is used exclusively by a select as
- // part of a minimum or maximum operation. If so, refrain from doing
- // any other folding. This helps out other analyses which understand
- // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
- // and CodeGen. And in this case, at least one of the comparison
- // operands has at least one user besides the compare (the select),
- // which would often largely negate the benefit of folding anyway.
- if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
- if (CI->hasOneUse()) {
- Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
-
- // FIXME: This is a hack to avoid infinite looping with min/max patterns.
- // We have to ensure that vector constants that only differ with
- // undef elements are treated as equivalent.
- auto areLooselyEqual = [](Value *A, Value *B) {
- if (A == B)
- return true;
-
- // Test for vector constants.
- Constant *ConstA, *ConstB;
- if (!match(A, m_Constant(ConstA)) || !match(B, m_Constant(ConstB)))
- return false;
-
- // TODO: Deal with FP constants?
- if (!A->getType()->isIntOrIntVectorTy() || A->getType() != B->getType())
- return false;
-
- // Compare for equality including undefs as equal.
- auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
- const APInt *C;
- return match(Cmp, m_APIntAllowUndef(C)) && C->isOne();
- };
-
- if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
- (areLooselyEqual(FV, Op0) && areLooselyEqual(TV, Op1)))
- return nullptr;
- }
- }
-
// Make sure that one of the select arms constant folds successfully.
- Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, TV);
- Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, FV);
+ Value *NewTV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ true);
+ Value *NewFV = constantFoldOperationIntoSelectOperand(Op, SI, /*IsTrueArm*/ false);
if (!NewTV && !NewFV)
return nullptr;
// Create an instruction for the arm that did not fold.
if (!NewTV)
- NewTV = foldOperationIntoSelectOperand(Op, TV, Builder);
+ NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this);
if (!NewFV)
- NewFV = foldOperationIntoSelectOperand(Op, FV, Builder);
+ NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this);
return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
}
@@ -1263,6 +1404,7 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues());
InsertNewInstBefore(NewPN, *PN);
NewPN->takeName(PN);
+ NewPN->setDebugLoc(PN->getDebugLoc());
// If we are going to have to insert a new computation, do so right before the
// predecessor's terminator.
@@ -1291,6 +1433,10 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
replaceInstUsesWith(*User, NewPN);
eraseInstFromFunction(*User);
}
+
+ replaceAllDbgUsesWith(const_cast<PHINode &>(*PN),
+ const_cast<PHINode &>(*NewPN),
+ const_cast<PHINode &>(*PN), DT);
return replaceInstUsesWith(I, NewPN);
}
@@ -1301,7 +1447,7 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
- Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
+ Phi0->getNumOperands() != Phi1->getNumOperands())
return nullptr;
// TODO: Remove the restriction for binop being in the same block as the phis.
@@ -1309,6 +1455,51 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
BO.getParent() != Phi1->getParent())
return nullptr;
+ // Fold if there is at least one specific constant value in phi0 or phi1's
+ // incoming values that comes from the same block and this specific constant
+ // value can be used to do optimization for specific binary operator.
+ // For example:
+ // %phi0 = phi i32 [0, %bb0], [%i, %bb1]
+ // %phi1 = phi i32 [%j, %bb0], [0, %bb1]
+ // %add = add i32 %phi0, %phi1
+ // ==>
+ // %add = phi i32 [%j, %bb0], [%i, %bb1]
+ Constant *C = ConstantExpr::getBinOpIdentity(BO.getOpcode(), BO.getType(),
+ /*AllowRHSConstant*/ false);
+ if (C) {
+ SmallVector<Value *, 4> NewIncomingValues;
+ auto CanFoldIncomingValuePair = [&](std::tuple<Use &, Use &> T) {
+ auto &Phi0Use = std::get<0>(T);
+ auto &Phi1Use = std::get<1>(T);
+ if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use))
+ return false;
+ Value *Phi0UseV = Phi0Use.get();
+ Value *Phi1UseV = Phi1Use.get();
+ if (Phi0UseV == C)
+ NewIncomingValues.push_back(Phi1UseV);
+ else if (Phi1UseV == C)
+ NewIncomingValues.push_back(Phi0UseV);
+ else
+ return false;
+ return true;
+ };
+
+ if (all_of(zip(Phi0->operands(), Phi1->operands()),
+ CanFoldIncomingValuePair)) {
+ PHINode *NewPhi =
+ PHINode::Create(Phi0->getType(), Phi0->getNumOperands());
+ assert(NewIncomingValues.size() == Phi0->getNumOperands() &&
+ "The number of collected incoming values should equal the number "
+ "of the original PHINode operands!");
+ for (unsigned I = 0; I < Phi0->getNumOperands(); I++)
+ NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I));
+ return NewPhi;
+ }
+ }
+
+ if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
+ return nullptr;
+
// Match a pair of incoming constants for one of the predecessor blocks.
BasicBlock *ConstBB, *OtherBB;
Constant *C0, *C1;
@@ -1374,28 +1565,6 @@ Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
return nullptr;
}
-/// Given a pointer type and a constant offset, determine whether or not there
-/// is a sequence of GEP indices into the pointed type that will land us at the
-/// specified offset. If so, fill them into NewIndices and return the resultant
-/// element type, otherwise return null.
-static Type *findElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
- SmallVectorImpl<Value *> &NewIndices,
- const DataLayout &DL) {
- // Only used by visitGEPOfBitcast(), which is skipped for opaque pointers.
- Type *Ty = PtrTy->getNonOpaquePointerElementType();
- if (!Ty->isSized())
- return nullptr;
-
- APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), IntOffset);
- SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(Ty, Offset);
- if (!Offset.isZero())
- return nullptr;
-
- for (const APInt &Index : Indices)
- NewIndices.push_back(ConstantInt::get(PtrTy->getContext(), Index));
- return Ty;
-}
-
static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
// If this GEP has only 0 indices, it is the same pointer as
// Src. If Src is not a trivial GEP too, don't combine
@@ -1406,248 +1575,6 @@ static bool shouldMergeGEPs(GEPOperator &GEP, GEPOperator &Src) {
return true;
}
-/// Return a value X such that Val = X * Scale, or null if none.
-/// If the multiplication is known not to overflow, then NoSignedWrap is set.
-Value *InstCombinerImpl::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) {
- assert(isa<IntegerType>(Val->getType()) && "Can only descale integers!");
- assert(cast<IntegerType>(Val->getType())->getBitWidth() ==
- Scale.getBitWidth() && "Scale not compatible with value!");
-
- // If Val is zero or Scale is one then Val = Val * Scale.
- if (match(Val, m_Zero()) || Scale == 1) {
- NoSignedWrap = true;
- return Val;
- }
-
- // If Scale is zero then it does not divide Val.
- if (Scale.isMinValue())
- return nullptr;
-
- // Look through chains of multiplications, searching for a constant that is
- // divisible by Scale. For example, descaling X*(Y*(Z*4)) by a factor of 4
- // will find the constant factor 4 and produce X*(Y*Z). Descaling X*(Y*8) by
- // a factor of 4 will produce X*(Y*2). The principle of operation is to bore
- // down from Val:
- //
- // Val = M1 * X || Analysis starts here and works down
- // M1 = M2 * Y || Doesn't descend into terms with more
- // M2 = Z * 4 \/ than one use
- //
- // Then to modify a term at the bottom:
- //
- // Val = M1 * X
- // M1 = Z * Y || Replaced M2 with Z
- //
- // Then to work back up correcting nsw flags.
-
- // Op - the term we are currently analyzing. Starts at Val then drills down.
- // Replaced with its descaled value before exiting from the drill down loop.
- Value *Op = Val;
-
- // Parent - initially null, but after drilling down notes where Op came from.
- // In the example above, Parent is (Val, 0) when Op is M1, because M1 is the
- // 0'th operand of Val.
- std::pair<Instruction *, unsigned> Parent;
-
- // Set if the transform requires a descaling at deeper levels that doesn't
- // overflow.
- bool RequireNoSignedWrap = false;
-
- // Log base 2 of the scale. Negative if not a power of 2.
- int32_t logScale = Scale.exactLogBase2();
-
- for (;; Op = Parent.first->getOperand(Parent.second)) { // Drill down
- if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
- // If Op is a constant divisible by Scale then descale to the quotient.
- APInt Quotient(Scale), Remainder(Scale); // Init ensures right bitwidth.
- APInt::sdivrem(CI->getValue(), Scale, Quotient, Remainder);
- if (!Remainder.isMinValue())
- // Not divisible by Scale.
- return nullptr;
- // Replace with the quotient in the parent.
- Op = ConstantInt::get(CI->getType(), Quotient);
- NoSignedWrap = true;
- break;
- }
-
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Op)) {
- if (BO->getOpcode() == Instruction::Mul) {
- // Multiplication.
- NoSignedWrap = BO->hasNoSignedWrap();
- if (RequireNoSignedWrap && !NoSignedWrap)
- return nullptr;
-
- // There are three cases for multiplication: multiplication by exactly
- // the scale, multiplication by a constant different to the scale, and
- // multiplication by something else.
- Value *LHS = BO->getOperand(0);
- Value *RHS = BO->getOperand(1);
-
- if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
- // Multiplication by a constant.
- if (CI->getValue() == Scale) {
- // Multiplication by exactly the scale, replace the multiplication
- // by its left-hand side in the parent.
- Op = LHS;
- break;
- }
-
- // Otherwise drill down into the constant.
- if (!Op->hasOneUse())
- return nullptr;
-
- Parent = std::make_pair(BO, 1);
- continue;
- }
-
- // Multiplication by something else. Drill down into the left-hand side
- // since that's where the reassociate pass puts the good stuff.
- if (!Op->hasOneUse())
- return nullptr;
-
- Parent = std::make_pair(BO, 0);
- continue;
- }
-
- if (logScale > 0 && BO->getOpcode() == Instruction::Shl &&
- isa<ConstantInt>(BO->getOperand(1))) {
- // Multiplication by a power of 2.
- NoSignedWrap = BO->hasNoSignedWrap();
- if (RequireNoSignedWrap && !NoSignedWrap)
- return nullptr;
-
- Value *LHS = BO->getOperand(0);
- int32_t Amt = cast<ConstantInt>(BO->getOperand(1))->
- getLimitedValue(Scale.getBitWidth());
- // Op = LHS << Amt.
-
- if (Amt == logScale) {
- // Multiplication by exactly the scale, replace the multiplication
- // by its left-hand side in the parent.
- Op = LHS;
- break;
- }
- if (Amt < logScale || !Op->hasOneUse())
- return nullptr;
-
- // Multiplication by more than the scale. Reduce the multiplying amount
- // by the scale in the parent.
- Parent = std::make_pair(BO, 1);
- Op = ConstantInt::get(BO->getType(), Amt - logScale);
- break;
- }
- }
-
- if (!Op->hasOneUse())
- return nullptr;
-
- if (CastInst *Cast = dyn_cast<CastInst>(Op)) {
- if (Cast->getOpcode() == Instruction::SExt) {
- // Op is sign-extended from a smaller type, descale in the smaller type.
- unsigned SmallSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
- APInt SmallScale = Scale.trunc(SmallSize);
- // Suppose Op = sext X, and we descale X as Y * SmallScale. We want to
- // descale Op as (sext Y) * Scale. In order to have
- // sext (Y * SmallScale) = (sext Y) * Scale
- // some conditions need to hold however: SmallScale must sign-extend to
- // Scale and the multiplication Y * SmallScale should not overflow.
- if (SmallScale.sext(Scale.getBitWidth()) != Scale)
- // SmallScale does not sign-extend to Scale.
- return nullptr;
- assert(SmallScale.exactLogBase2() == logScale);
- // Require that Y * SmallScale must not overflow.
- RequireNoSignedWrap = true;
-
- // Drill down through the cast.
- Parent = std::make_pair(Cast, 0);
- Scale = SmallScale;
- continue;
- }
-
- if (Cast->getOpcode() == Instruction::Trunc) {
- // Op is truncated from a larger type, descale in the larger type.
- // Suppose Op = trunc X, and we descale X as Y * sext Scale. Then
- // trunc (Y * sext Scale) = (trunc Y) * Scale
- // always holds. However (trunc Y) * Scale may overflow even if
- // trunc (Y * sext Scale) does not, so nsw flags need to be cleared
- // from this point up in the expression (see later).
- if (RequireNoSignedWrap)
- return nullptr;
-
- // Drill down through the cast.
- unsigned LargeSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
- Parent = std::make_pair(Cast, 0);
- Scale = Scale.sext(LargeSize);
- if (logScale + 1 == (int32_t)Cast->getType()->getPrimitiveSizeInBits())
- logScale = -1;
- assert(Scale.exactLogBase2() == logScale);
- continue;
- }
- }
-
- // Unsupported expression, bail out.
- return nullptr;
- }
-
- // If Op is zero then Val = Op * Scale.
- if (match(Op, m_Zero())) {
- NoSignedWrap = true;
- return Op;
- }
-
- // We know that we can successfully descale, so from here on we can safely
- // modify the IR. Op holds the descaled version of the deepest term in the
- // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known
- // not to overflow.
-
- if (!Parent.first)
- // The expression only had one term.
- return Op;
-
- // Rewrite the parent using the descaled version of its operand.
- assert(Parent.first->hasOneUse() && "Drilled down when more than one use!");
- assert(Op != Parent.first->getOperand(Parent.second) &&
- "Descaling was a no-op?");
- replaceOperand(*Parent.first, Parent.second, Op);
- Worklist.push(Parent.first);
-
- // Now work back up the expression correcting nsw flags. The logic is based
- // on the following observation: if X * Y is known not to overflow as a signed
- // multiplication, and Y is replaced by a value Z with smaller absolute value,
- // then X * Z will not overflow as a signed multiplication either. As we work
- // our way up, having NoSignedWrap 'true' means that the descaled value at the
- // current level has strictly smaller absolute value than the original.
- Instruction *Ancestor = Parent.first;
- do {
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Ancestor)) {
- // If the multiplication wasn't nsw then we can't say anything about the
- // value of the descaled multiplication, and we have to clear nsw flags
- // from this point on up.
- bool OpNoSignedWrap = BO->hasNoSignedWrap();
- NoSignedWrap &= OpNoSignedWrap;
- if (NoSignedWrap != OpNoSignedWrap) {
- BO->setHasNoSignedWrap(NoSignedWrap);
- Worklist.push(Ancestor);
- }
- } else if (Ancestor->getOpcode() == Instruction::Trunc) {
- // The fact that the descaled input to the trunc has smaller absolute
- // value than the original input doesn't tell us anything useful about
- // the absolute values of the truncations.
- NoSignedWrap = false;
- }
- assert((Ancestor->getOpcode() != Instruction::SExt || NoSignedWrap) &&
- "Failed to keep proper track of nsw flags while drilling down?");
-
- if (Ancestor == Val)
- // Got to the top, all done!
- return Val;
-
- // Move up one level in the expression.
- assert(Ancestor->hasOneUse() && "Drilled down when more than one use!");
- Ancestor = Ancestor->user_back();
- } while (true);
-}
-
Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
if (!isa<VectorType>(Inst.getType()))
return nullptr;
@@ -1748,9 +1675,9 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
// TODO: Allow arbitrary shuffles by shuffling after binop?
// That might be legal, but we have to deal with poison.
if (LShuf->isSelect() &&
- !is_contained(LShuf->getShuffleMask(), UndefMaskElem) &&
+ !is_contained(LShuf->getShuffleMask(), PoisonMaskElem) &&
RShuf->isSelect() &&
- !is_contained(RShuf->getShuffleMask(), UndefMaskElem)) {
+ !is_contained(RShuf->getShuffleMask(), PoisonMaskElem)) {
// Example:
// LHS = shuffle V1, V2, <0, 5, 6, 3>
// RHS = shuffle V2, V1, <0, 5, 6, 3>
@@ -1991,50 +1918,9 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
return nullptr;
- if (Src->getResultElementType() == GEP.getSourceElementType() &&
- Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 &&
- Src->hasOneUse()) {
- Value *GO1 = GEP.getOperand(1);
- Value *SO1 = Src->getOperand(1);
-
- if (LI) {
- // Try to reassociate loop invariant GEP chains to enable LICM.
- if (Loop *L = LI->getLoopFor(GEP.getParent())) {
- // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is
- // invariant: this breaks the dependence between GEPs and allows LICM
- // to hoist the invariant part out of the loop.
- if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) {
- // The swapped GEPs are inbounds if both original GEPs are inbounds
- // and the sign of the offsets is the same. For simplicity, only
- // handle both offsets being non-negative.
- bool IsInBounds = Src->isInBounds() && GEP.isInBounds() &&
- isKnownNonNegative(SO1, DL, 0, &AC, &GEP, &DT) &&
- isKnownNonNegative(GO1, DL, 0, &AC, &GEP, &DT);
- // Put NewSrc at same location as %src.
- Builder.SetInsertPoint(cast<Instruction>(Src));
- Value *NewSrc = Builder.CreateGEP(GEP.getSourceElementType(),
- Src->getPointerOperand(), GO1,
- Src->getName(), IsInBounds);
- GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
- GEP.getSourceElementType(), NewSrc, {SO1});
- NewGEP->setIsInBounds(IsInBounds);
- return NewGEP;
- }
- }
- }
- }
-
- // Note that if our source is a gep chain itself then we wait for that
- // chain to be resolved before we perform this transformation. This
- // avoids us creating a TON of code in some cases.
- if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0)))
- if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
- return nullptr; // Wait until our source is folded to completion.
-
// For constant GEPs, use a more general offset-based folding approach.
- // Only do this for opaque pointers, as the result element type may change.
Type *PtrTy = Src->getType()->getScalarType();
- if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices() &&
+ if (GEP.hasAllConstantIndices() &&
(Src->hasOneUse() || Src->hasAllConstantIndices())) {
// Split Src into a variable part and a constant suffix.
gep_type_iterator GTI = gep_type_begin(*Src);
@@ -2077,13 +1963,11 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
// If both GEP are constant-indexed, and cannot be merged in either way,
// convert them to a GEP of i8.
if (Src->hasAllConstantIndices())
- return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
- ? GetElementPtrInst::CreateInBounds(
- Builder.getInt8Ty(), Src->getOperand(0),
- Builder.getInt(OffsetOld), GEP.getName())
- : GetElementPtrInst::Create(
- Builder.getInt8Ty(), Src->getOperand(0),
- Builder.getInt(OffsetOld), GEP.getName());
+ return replaceInstUsesWith(
+ GEP, Builder.CreateGEP(
+ Builder.getInt8Ty(), Src->getOperand(0),
+ Builder.getInt(OffsetOld), "",
+ isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
return nullptr;
}
@@ -2100,13 +1984,9 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
IsInBounds &= Idx.isNonNegative() == ConstIndices[0].isNonNegative();
}
- return IsInBounds
- ? GetElementPtrInst::CreateInBounds(Src->getSourceElementType(),
- Src->getOperand(0), Indices,
- GEP.getName())
- : GetElementPtrInst::Create(Src->getSourceElementType(),
- Src->getOperand(0), Indices,
- GEP.getName());
+ return replaceInstUsesWith(
+ GEP, Builder.CreateGEP(Src->getSourceElementType(), Src->getOperand(0),
+ Indices, "", IsInBounds));
}
if (Src->getResultElementType() != GEP.getSourceElementType())
@@ -2160,118 +2040,10 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
}
if (!Indices.empty())
- return isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))
- ? GetElementPtrInst::CreateInBounds(
- Src->getSourceElementType(), Src->getOperand(0), Indices,
- GEP.getName())
- : GetElementPtrInst::Create(Src->getSourceElementType(),
- Src->getOperand(0), Indices,
- GEP.getName());
-
- return nullptr;
-}
-
-// Note that we may have also stripped an address space cast in between.
-Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI,
- GetElementPtrInst &GEP) {
- // With opaque pointers, there is no pointer element type we can use to
- // adjust the GEP type.
- PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
- if (SrcType->isOpaque())
- return nullptr;
-
- Type *GEPEltType = GEP.getSourceElementType();
- Type *SrcEltType = SrcType->getNonOpaquePointerElementType();
- Value *SrcOp = BCI->getOperand(0);
-
- // GEP directly using the source operand if this GEP is accessing an element
- // of a bitcasted pointer to vector or array of the same dimensions:
- // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z
- // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
- auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy,
- const DataLayout &DL) {
- auto *VecVTy = cast<FixedVectorType>(VecTy);
- return ArrTy->getArrayElementType() == VecVTy->getElementType() &&
- ArrTy->getArrayNumElements() == VecVTy->getNumElements() &&
- DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
- };
- if (GEP.getNumOperands() == 3 &&
- ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) &&
- areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) ||
- (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() &&
- areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) {
-
- // Create a new GEP here, as using `setOperand()` followed by
- // `setSourceElementType()` won't actually update the type of the
- // existing GEP Value. Causing issues if this Value is accessed when
- // constructing an AddrSpaceCastInst
- SmallVector<Value *, 8> Indices(GEP.indices());
- Value *NGEP =
- Builder.CreateGEP(SrcEltType, SrcOp, Indices, "", GEP.isInBounds());
- NGEP->takeName(&GEP);
-
- // Preserve GEP address space to satisfy users
- if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
- return new AddrSpaceCastInst(NGEP, GEP.getType());
-
- return replaceInstUsesWith(GEP, NGEP);
- }
-
- // See if we can simplify:
- // X = bitcast A* to B*
- // Y = gep X, <...constant indices...>
- // into a gep of the original struct. This is important for SROA and alias
- // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
- unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEP.getType());
- APInt Offset(OffsetBits, 0);
-
- // If the bitcast argument is an allocation, The bitcast is for convertion
- // to actual type of allocation. Removing such bitcasts, results in having
- // GEPs with i8* base and pure byte offsets. That means GEP is not aware of
- // struct or array hierarchy.
- // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have
- // a better chance to succeed.
- if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) &&
- !isAllocationFn(SrcOp, &TLI)) {
- // If this GEP instruction doesn't move the pointer, just replace the GEP
- // with a bitcast of the real input to the dest type.
- if (!Offset) {
- // If the bitcast is of an allocation, and the allocation will be
- // converted to match the type of the cast, don't touch this.
- if (isa<AllocaInst>(SrcOp)) {
- // See if the bitcast simplifies, if so, don't nuke this GEP yet.
- if (Instruction *I = visitBitCast(*BCI)) {
- if (I != BCI) {
- I->takeName(BCI);
- I->insertInto(BCI->getParent(), BCI->getIterator());
- replaceInstUsesWith(*BCI, I);
- }
- return &GEP;
- }
- }
-
- if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace())
- return new AddrSpaceCastInst(SrcOp, GEP.getType());
- return new BitCastInst(SrcOp, GEP.getType());
- }
-
- // Otherwise, if the offset is non-zero, we need to find out if there is a
- // field at Offset in 'A's type. If so, we can pull the cast through the
- // GEP.
- SmallVector<Value *, 8> NewIndices;
- if (findElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices, DL)) {
- Value *NGEP = Builder.CreateGEP(SrcEltType, SrcOp, NewIndices, "",
- GEP.isInBounds());
-
- if (NGEP->getType() == GEP.getType())
- return replaceInstUsesWith(GEP, NGEP);
- NGEP->takeName(&GEP);
-
- if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
- return new AddrSpaceCastInst(NGEP, GEP.getType());
- return new BitCastInst(NGEP, GEP.getType());
- }
- }
+ return replaceInstUsesWith(
+ GEP, Builder.CreateGEP(
+ Src->getSourceElementType(), Src->getOperand(0), Indices, "",
+ isMergedGEPInBounds(*Src, *cast<GEPOperator>(&GEP))));
return nullptr;
}
@@ -2497,192 +2269,6 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (GEPType->isVectorTy())
return nullptr;
- // Handle gep(bitcast x) and gep(gep x, 0, 0, 0).
- Value *StrippedPtr = PtrOp->stripPointerCasts();
- PointerType *StrippedPtrTy = cast<PointerType>(StrippedPtr->getType());
-
- // TODO: The basic approach of these folds is not compatible with opaque
- // pointers, because we can't use bitcasts as a hint for a desirable GEP
- // type. Instead, we should perform canonicalization directly on the GEP
- // type. For now, skip these.
- if (StrippedPtr != PtrOp && !StrippedPtrTy->isOpaque()) {
- bool HasZeroPointerIndex = false;
- Type *StrippedPtrEltTy = StrippedPtrTy->getNonOpaquePointerElementType();
-
- if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
- HasZeroPointerIndex = C->isZero();
-
- // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ...
- // into : GEP [10 x i8]* X, i32 0, ...
- //
- // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ...
- // into : GEP i8* X, ...
- //
- // This occurs when the program declares an array extern like "int X[];"
- if (HasZeroPointerIndex) {
- if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {
- // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ?
- if (CATy->getElementType() == StrippedPtrEltTy) {
- // -> GEP i8* X, ...
- SmallVector<Value *, 8> Idx(drop_begin(GEP.indices()));
- GetElementPtrInst *Res = GetElementPtrInst::Create(
- StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());
- Res->setIsInBounds(GEP.isInBounds());
- if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())
- return Res;
- // Insert Res, and create an addrspacecast.
- // e.g.,
- // GEP (addrspacecast i8 addrspace(1)* X to [0 x i8]*), i32 0, ...
- // ->
- // %0 = GEP i8 addrspace(1)* X, ...
- // addrspacecast i8 addrspace(1)* %0 to i8*
- return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);
- }
-
- if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {
- // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?
- if (CATy->getElementType() == XATy->getElementType()) {
- // -> GEP [10 x i8]* X, i32 0, ...
- // At this point, we know that the cast source type is a pointer
- // to an array of the same type as the destination pointer
- // array. Because the array type is never stepped over (there
- // is a leading zero) we can fold the cast into this GEP.
- if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) {
- GEP.setSourceElementType(XATy);
- return replaceOperand(GEP, 0, StrippedPtr);
- }
- // Cannot replace the base pointer directly because StrippedPtr's
- // address space is different. Instead, create a new GEP followed by
- // an addrspacecast.
- // e.g.,
- // GEP (addrspacecast [10 x i8] addrspace(1)* X to [0 x i8]*),
- // i32 0, ...
- // ->
- // %0 = GEP [10 x i8] addrspace(1)* X, ...
- // addrspacecast i8 addrspace(1)* %0 to i8*
- SmallVector<Value *, 8> Idx(GEP.indices());
- Value *NewGEP =
- Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
- GEP.getName(), GEP.isInBounds());
- return new AddrSpaceCastInst(NewGEP, GEPType);
- }
- }
- }
- } else if (GEP.getNumOperands() == 2 && !IsGEPSrcEleScalable) {
- // Skip if GEP source element type is scalable. The type alloc size is
- // unknown at compile-time.
- // Transform things like: %t = getelementptr i32*
- // bitcast ([2 x i32]* %str to i32*), i32 %V into: %t1 = getelementptr [2
- // x i32]* %str, i32 0, i32 %V; bitcast
- if (StrippedPtrEltTy->isArrayTy() &&
- DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==
- DL.getTypeAllocSize(GEPEltType)) {
- Type *IdxType = DL.getIndexType(GEPType);
- Value *Idx[2] = {Constant::getNullValue(IdxType), GEP.getOperand(1)};
- Value *NewGEP = Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx,
- GEP.getName(), GEP.isInBounds());
-
- // V and GEP are both pointer types --> BitCast
- return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType);
- }
-
- // Transform things like:
- // %V = mul i64 %N, 4
- // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V
- // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast
- if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {
- // Check that changing the type amounts to dividing the index by a scale
- // factor.
- uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
- uint64_t SrcSize =
- DL.getTypeAllocSize(StrippedPtrEltTy).getFixedValue();
- if (ResSize && SrcSize % ResSize == 0) {
- Value *Idx = GEP.getOperand(1);
- unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
- uint64_t Scale = SrcSize / ResSize;
-
- // Earlier transforms ensure that the index has the right type
- // according to Data Layout, which considerably simplifies the
- // logic by eliminating implicit casts.
- assert(Idx->getType() == DL.getIndexType(GEPType) &&
- "Index type does not match the Data Layout preferences");
-
- bool NSW;
- if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
- // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
- // If the multiplication NewIdx * Scale may overflow then the new
- // GEP may not be "inbounds".
- Value *NewGEP =
- Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,
- GEP.getName(), GEP.isInBounds() && NSW);
-
- // The NewGEP must be pointer typed, so must the old one -> BitCast
- return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
- GEPType);
- }
- }
- }
-
- // Similarly, transform things like:
- // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp
- // (where tmp = 8*tmp2) into:
- // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
- if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() &&
- StrippedPtrEltTy->isArrayTy()) {
- // Check that changing to the array element type amounts to dividing the
- // index by a scale factor.
- uint64_t ResSize = DL.getTypeAllocSize(GEPEltType).getFixedValue();
- uint64_t ArrayEltSize =
- DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType())
- .getFixedValue();
- if (ResSize && ArrayEltSize % ResSize == 0) {
- Value *Idx = GEP.getOperand(1);
- unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits();
- uint64_t Scale = ArrayEltSize / ResSize;
-
- // Earlier transforms ensure that the index has the right type
- // according to the Data Layout, which considerably simplifies
- // the logic by eliminating implicit casts.
- assert(Idx->getType() == DL.getIndexType(GEPType) &&
- "Index type does not match the Data Layout preferences");
-
- bool NSW;
- if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) {
- // Successfully decomposed Idx as NewIdx * Scale, form a new GEP.
- // If the multiplication NewIdx * Scale may overflow then the new
- // GEP may not be "inbounds".
- Type *IndTy = DL.getIndexType(GEPType);
- Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx};
-
- Value *NewGEP =
- Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off,
- GEP.getName(), GEP.isInBounds() && NSW);
- // The NewGEP must be pointer typed, so must the old one -> BitCast
- return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,
- GEPType);
- }
- }
- }
- }
- }
-
- // addrspacecast between types is canonicalized as a bitcast, then an
- // addrspacecast. To take advantage of the below bitcast + struct GEP, look
- // through the addrspacecast.
- Value *ASCStrippedPtrOp = PtrOp;
- if (auto *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) {
- // X = bitcast A addrspace(1)* to B addrspace(1)*
- // Y = addrspacecast A addrspace(1)* to B addrspace(2)*
- // Z = gep Y, <...constant indices...>
- // Into an addrspacecasted GEP of the struct.
- if (auto *BC = dyn_cast<BitCastInst>(ASC->getOperand(0)))
- ASCStrippedPtrOp = BC;
- }
-
- if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp))
- if (Instruction *I = visitGEPOfBitcast(BCI, GEP))
- return I;
-
if (!GEP.isInBounds()) {
unsigned IdxWidth =
DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace());
@@ -2690,12 +2276,13 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Value *UnderlyingPtrOp =
PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL,
BasePtrOffset);
- if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
+ bool CanBeNull, CanBeFreed;
+ uint64_t DerefBytes = UnderlyingPtrOp->getPointerDereferenceableBytes(
+ DL, CanBeNull, CanBeFreed);
+ if (!CanBeNull && !CanBeFreed && DerefBytes != 0) {
if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
BasePtrOffset.isNonNegative()) {
- APInt AllocSize(
- IdxWidth,
- DL.getTypeAllocSize(AI->getAllocatedType()).getKnownMinValue());
+ APInt AllocSize(IdxWidth, DerefBytes);
if (BasePtrOffset.ule(AllocSize)) {
return GetElementPtrInst::CreateInBounds(
GEP.getSourceElementType(), PtrOp, Indices, GEP.getName());
@@ -2881,8 +2468,11 @@ Instruction *InstCombinerImpl::visitAllocSite(Instruction &MI) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
if (II->getIntrinsicID() == Intrinsic::objectsize) {
- Value *Result =
- lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/true);
+ SmallVector<Instruction *> InsertedInstructions;
+ Value *Result = lowerObjectSizeCall(
+ II, DL, &TLI, AA, /*MustSucceed=*/true, &InsertedInstructions);
+ for (Instruction *Inserted : InsertedInstructions)
+ Worklist.add(Inserted);
replaceInstUsesWith(*I, Result);
eraseInstFromFunction(*I);
Users[i] = nullptr; // Skip examining in the next loop.
@@ -3089,50 +2679,27 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI, Value *Op) {
return nullptr;
}
-static bool isMustTailCall(Value *V) {
- if (auto *CI = dyn_cast<CallInst>(V))
- return CI->isMustTailCall();
- return false;
-}
-
Instruction *InstCombinerImpl::visitReturnInst(ReturnInst &RI) {
- if (RI.getNumOperands() == 0) // ret void
- return nullptr;
-
- Value *ResultOp = RI.getOperand(0);
- Type *VTy = ResultOp->getType();
- if (!VTy->isIntegerTy() || isa<Constant>(ResultOp))
- return nullptr;
-
- // Don't replace result of musttail calls.
- if (isMustTailCall(ResultOp))
- return nullptr;
-
- // There might be assume intrinsics dominating this return that completely
- // determine the value. If so, constant fold it.
- KnownBits Known = computeKnownBits(ResultOp, 0, &RI);
- if (Known.isConstant())
- return replaceOperand(RI, 0,
- Constant::getIntegerValue(VTy, Known.getConstant()));
-
+ // Nothing for now.
return nullptr;
}
// WARNING: keep in sync with SimplifyCFGOpt::simplifyUnreachable()!
-Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
+bool InstCombinerImpl::removeInstructionsBeforeUnreachable(Instruction &I) {
// Try to remove the previous instruction if it must lead to unreachable.
// This includes instructions like stores and "llvm.assume" that may not get
// removed by simple dead code elimination.
+ bool Changed = false;
while (Instruction *Prev = I.getPrevNonDebugInstruction()) {
// While we theoretically can erase EH, that would result in a block that
// used to start with an EH no longer starting with EH, which is invalid.
// To make it valid, we'd need to fixup predecessors to no longer refer to
// this block, but that changes CFG, which is not allowed in InstCombine.
if (Prev->isEHPad())
- return nullptr; // Can not drop any more instructions. We're done here.
+ break; // Can not drop any more instructions. We're done here.
if (!isGuaranteedToTransferExecutionToSuccessor(Prev))
- return nullptr; // Can not drop any more instructions. We're done here.
+ break; // Can not drop any more instructions. We're done here.
// Otherwise, this instruction can be freely erased,
// even if it is not side-effect free.
@@ -3140,9 +2707,13 @@ Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
// another unreachable block), so convert those to poison.
replaceInstUsesWith(*Prev, PoisonValue::get(Prev->getType()));
eraseInstFromFunction(*Prev);
+ Changed = true;
}
- assert(I.getParent()->sizeWithoutDebug() == 1 && "The block is now empty.");
- // FIXME: recurse into unconditional predecessors?
+ return Changed;
+}
+
+Instruction *InstCombinerImpl::visitUnreachableInst(UnreachableInst &I) {
+ removeInstructionsBeforeUnreachable(I);
return nullptr;
}
@@ -3175,6 +2746,57 @@ Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
return nullptr;
}
+// Under the assumption that I is unreachable, remove it and following
+// instructions.
+bool InstCombinerImpl::handleUnreachableFrom(Instruction *I) {
+ bool Changed = false;
+ BasicBlock *BB = I->getParent();
+ for (Instruction &Inst : make_early_inc_range(
+ make_range(std::next(BB->getTerminator()->getReverseIterator()),
+ std::next(I->getReverseIterator())))) {
+ if (!Inst.use_empty() && !Inst.getType()->isTokenTy()) {
+ replaceInstUsesWith(Inst, PoisonValue::get(Inst.getType()));
+ Changed = true;
+ }
+ if (Inst.isEHPad() || Inst.getType()->isTokenTy())
+ continue;
+ eraseInstFromFunction(Inst);
+ Changed = true;
+ }
+
+ // Replace phi node operands in successor blocks with poison.
+ for (BasicBlock *Succ : successors(BB))
+ for (PHINode &PN : Succ->phis())
+ for (Use &U : PN.incoming_values())
+ if (PN.getIncomingBlock(U) == BB && !isa<PoisonValue>(U)) {
+ replaceUse(U, PoisonValue::get(PN.getType()));
+ addToWorklist(&PN);
+ Changed = true;
+ }
+
+ // TODO: Successor blocks may also be dead.
+ return Changed;
+}
+
+bool InstCombinerImpl::handlePotentiallyDeadSuccessors(BasicBlock *BB,
+ BasicBlock *LiveSucc) {
+ bool Changed = false;
+ for (BasicBlock *Succ : successors(BB)) {
+ // The live successor isn't dead.
+ if (Succ == LiveSucc)
+ continue;
+
+ if (!all_of(predecessors(Succ), [&](BasicBlock *Pred) {
+ return DT.dominates(BasicBlockEdge(BB, Succ),
+ BasicBlockEdge(Pred, Succ));
+ }))
+ continue;
+
+ Changed |= handleUnreachableFrom(&Succ->front());
+ }
+ return Changed;
+}
+
Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
if (BI.isUnconditional())
return visitUnconditionalBranchInst(BI);
@@ -3218,6 +2840,14 @@ Instruction *InstCombinerImpl::visitBranchInst(BranchInst &BI) {
return &BI;
}
+ if (isa<UndefValue>(Cond) &&
+ handlePotentiallyDeadSuccessors(BI.getParent(), /*LiveSucc*/ nullptr))
+ return &BI;
+ if (auto *CI = dyn_cast<ConstantInt>(Cond))
+ if (handlePotentiallyDeadSuccessors(BI.getParent(),
+ BI.getSuccessor(!CI->getZExtValue())))
+ return &BI;
+
return nullptr;
}
@@ -3236,6 +2866,14 @@ Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
return replaceOperand(SI, 0, Op0);
}
+ if (isa<UndefValue>(Cond) &&
+ handlePotentiallyDeadSuccessors(SI.getParent(), /*LiveSucc*/ nullptr))
+ return &SI;
+ if (auto *CI = dyn_cast<ConstantInt>(Cond))
+ if (handlePotentiallyDeadSuccessors(
+ SI.getParent(), SI.findCaseValue(CI)->getCaseSuccessor()))
+ return &SI;
+
KnownBits Known = computeKnownBits(Cond, 0, &SI);
unsigned LeadingKnownZeros = Known.countMinLeadingZeros();
unsigned LeadingKnownOnes = Known.countMinLeadingOnes();
@@ -3243,10 +2881,10 @@ Instruction *InstCombinerImpl::visitSwitchInst(SwitchInst &SI) {
// Compute the number of leading bits we can ignore.
// TODO: A better way to determine this would use ComputeNumSignBits().
for (const auto &C : SI.cases()) {
- LeadingKnownZeros = std::min(
- LeadingKnownZeros, C.getCaseValue()->getValue().countLeadingZeros());
- LeadingKnownOnes = std::min(
- LeadingKnownOnes, C.getCaseValue()->getValue().countLeadingOnes());
+ LeadingKnownZeros =
+ std::min(LeadingKnownZeros, C.getCaseValue()->getValue().countl_zero());
+ LeadingKnownOnes =
+ std::min(LeadingKnownOnes, C.getCaseValue()->getValue().countl_one());
}
unsigned NewWidth = Known.getBitWidth() - std::max(LeadingKnownZeros, LeadingKnownOnes);
@@ -3412,6 +3050,11 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
return R;
if (LoadInst *L = dyn_cast<LoadInst>(Agg)) {
+ // Bail out if the aggregate contains scalable vector type
+ if (auto *STy = dyn_cast<StructType>(Agg->getType());
+ STy && STy->containsScalableVectorType())
+ return nullptr;
+
// If the (non-volatile) load only has one use, we can rewrite this to a
// load from a GEP. This reduces the size of the load. If a load is used
// only by extractvalue instructions then this either must have been
@@ -3965,6 +3608,17 @@ bool InstCombinerImpl::freezeOtherUses(FreezeInst &FI) {
return Changed;
}
+// Check if any direct or bitcast user of this value is a shuffle instruction.
+static bool isUsedWithinShuffleVector(Value *V) {
+ for (auto *U : V->users()) {
+ if (isa<ShuffleVectorInst>(U))
+ return true;
+ else if (match(U, m_BitCast(m_Specific(V))) && isUsedWithinShuffleVector(U))
+ return true;
+ }
+ return false;
+}
+
Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
Value *Op0 = I.getOperand(0);
@@ -4014,8 +3668,14 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
return BestValue;
};
- if (match(Op0, m_Undef()))
+ if (match(Op0, m_Undef())) {
+ // Don't fold freeze(undef/poison) if it's used as a vector operand in
+ // a shuffle. This may improve codegen for shuffles that allow
+ // unspecified inputs.
+ if (isUsedWithinShuffleVector(&I))
+ return nullptr;
return replaceInstUsesWith(I, getUndefReplacement(I.getType()));
+ }
Constant *C;
if (match(Op0, m_Constant(C)) && C->containsUndefOrPoisonElement()) {
@@ -4078,8 +3738,8 @@ static bool SoleWriteToDeadLocal(Instruction *I, TargetLibraryInfo &TLI) {
/// beginning of DestBlock, which can only happen if it's safe to move the
/// instruction past all of the instructions between it and the end of its
/// block.
-static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock,
- TargetLibraryInfo &TLI) {
+bool InstCombinerImpl::tryToSinkInstruction(Instruction *I,
+ BasicBlock *DestBlock) {
BasicBlock *SrcBlock = I->getParent();
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
@@ -4126,10 +3786,13 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock,
return false;
}
- I->dropDroppableUses([DestBlock](const Use *U) {
- if (auto *I = dyn_cast<Instruction>(U->getUser()))
- return I->getParent() != DestBlock;
- return true;
+ I->dropDroppableUses([&](const Use *U) {
+ auto *I = dyn_cast<Instruction>(U->getUser());
+ if (I && I->getParent() != DestBlock) {
+ Worklist.add(I);
+ return true;
+ }
+ return false;
});
/// FIXME: We could remove droppable uses that are not dominated by
/// the new position.
@@ -4227,23 +3890,6 @@ bool InstCombinerImpl::run() {
if (!DebugCounter::shouldExecute(VisitCounter))
continue;
- // Instruction isn't dead, see if we can constant propagate it.
- if (!I->use_empty() &&
- (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) {
- if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) {
- LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I
- << '\n');
-
- // Add operands to the worklist.
- replaceInstUsesWith(*I, C);
- ++NumConstProp;
- if (isInstructionTriviallyDead(I, &TLI))
- eraseInstFromFunction(*I);
- MadeIRChange = true;
- continue;
- }
- }
-
// See if we can trivially sink this instruction to its user if we can
// prove that the successor is not executed more frequently than our block.
// Return the UserBlock if successful.
@@ -4319,7 +3965,7 @@ bool InstCombinerImpl::run() {
if (OptBB) {
auto *UserParent = *OptBB;
// Okay, the CFG is simple enough, try to sink this instruction.
- if (TryToSinkInstruction(I, UserParent, TLI)) {
+ if (tryToSinkInstruction(I, UserParent)) {
LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
MadeIRChange = true;
// We'll add uses of the sunk instruction below, but since
@@ -4520,15 +4166,21 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
// Recursively visit successors. If this is a branch or switch on a
// constant, only visit the reachable successor.
Instruction *TI = BB->getTerminator();
- if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
- if (BI->isConditional() && isa<ConstantInt>(BI->getCondition())) {
- bool CondVal = cast<ConstantInt>(BI->getCondition())->getZExtValue();
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI); BI && BI->isConditional()) {
+ if (isa<UndefValue>(BI->getCondition()))
+ // Branch on undef is UB.
+ continue;
+ if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {
+ bool CondVal = Cond->getZExtValue();
BasicBlock *ReachableBB = BI->getSuccessor(!CondVal);
Worklist.push_back(ReachableBB);
continue;
}
} else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
- if (ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
+ if (isa<UndefValue>(SI->getCondition()))
+ // Switch on undef is UB.
+ continue;
+ if (auto *Cond = dyn_cast<ConstantInt>(SI->getCondition())) {
Worklist.push_back(SI->findCaseValue(Cond)->getCaseSuccessor());
continue;
}
@@ -4584,7 +4236,6 @@ static bool combineInstructionsOverFunction(
DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {
auto &DL = F.getParent()->getDataLayout();
- MaxIterations = std::min(MaxIterations, LimitMaxIterations.getValue());
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
@@ -4601,13 +4252,6 @@ static bool combineInstructionsOverFunction(
bool MadeIRChange = false;
if (ShouldLowerDbgDeclare)
MadeIRChange = LowerDbgDeclare(F);
- // LowerDbgDeclare calls RemoveRedundantDbgInstrs, but LowerDbgDeclare will
- // almost never return true when running an assignment tracking build. Take
- // this opportunity to do some clean up for assignment tracking builds too.
- if (!MadeIRChange && isAssignmentTrackingEnabled(*F.getParent())) {
- for (auto &BB : F)
- RemoveRedundantDbgInstrs(&BB);
- }
// Iterate while there is work to do.
unsigned Iteration = 0;
@@ -4643,13 +4287,29 @@ static bool combineInstructionsOverFunction(
MadeIRChange = true;
}
+ if (Iteration == 1)
+ ++NumOneIteration;
+ else if (Iteration == 2)
+ ++NumTwoIterations;
+ else if (Iteration == 3)
+ ++NumThreeIterations;
+ else
+ ++NumFourOrMoreIterations;
+
return MadeIRChange;
}
-InstCombinePass::InstCombinePass() : MaxIterations(LimitMaxIterations) {}
+InstCombinePass::InstCombinePass(InstCombineOptions Opts) : Options(Opts) {}
-InstCombinePass::InstCombinePass(unsigned MaxIterations)
- : MaxIterations(MaxIterations) {}
+void InstCombinePass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<InstCombinePass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << '<';
+ OS << "max-iterations=" << Options.MaxIterations << ";";
+ OS << (Options.UseLoopInfo ? "" : "no-") << "use-loop-info";
+ OS << '>';
+}
PreservedAnalyses InstCombinePass::run(Function &F,
FunctionAnalysisManager &AM) {
@@ -4659,7 +4319,11 @@ PreservedAnalyses InstCombinePass::run(Function &F,
auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ // TODO: Only use LoopInfo when the option is set. This requires that the
+ // callers in the pass pipeline explicitly set the option.
auto *LI = AM.getCachedResult<LoopAnalysis>(F);
+ if (!LI && Options.UseLoopInfo)
+ LI = &AM.getResult<LoopAnalysis>(F);
auto *AA = &AM.getResult<AAManager>(F);
auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
@@ -4669,7 +4333,7 @@ PreservedAnalyses InstCombinePass::run(Function &F,
&AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
- BFI, PSI, MaxIterations, LI))
+ BFI, PSI, Options.MaxIterations, LI))
// No changes, all analyses are preserved.
return PreservedAnalyses::all();
@@ -4718,18 +4382,13 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
nullptr;
return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
- BFI, PSI, MaxIterations, LI);
+ BFI, PSI,
+ InstCombineDefaultMaxIterations, LI);
}
char InstructionCombiningPass::ID = 0;
-InstructionCombiningPass::InstructionCombiningPass()
- : FunctionPass(ID), MaxIterations(InstCombineDefaultMaxIterations) {
- initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
-}
-
-InstructionCombiningPass::InstructionCombiningPass(unsigned MaxIterations)
- : FunctionPass(ID), MaxIterations(MaxIterations) {
+InstructionCombiningPass::InstructionCombiningPass() : FunctionPass(ID) {
initializeInstructionCombiningPassPass(*PassRegistry::getPassRegistry());
}
@@ -4752,18 +4411,6 @@ void llvm::initializeInstCombine(PassRegistry &Registry) {
initializeInstructionCombiningPassPass(Registry);
}
-void LLVMInitializeInstCombine(LLVMPassRegistryRef R) {
- initializeInstructionCombiningPassPass(*unwrap(R));
-}
-
FunctionPass *llvm::createInstructionCombiningPass() {
return new InstructionCombiningPass();
}
-
-FunctionPass *llvm::createInstructionCombiningPass(unsigned MaxIterations) {
- return new InstructionCombiningPass(MaxIterations);
-}
-
-void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createInstructionCombiningPass());
-}
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 599eeeabc143..bde5fba20f3b 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -24,7 +24,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/MemoryBuiltins.h"
@@ -70,6 +69,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h"
@@ -492,7 +492,7 @@ static ShadowMapping getShadowMapping(const Triple &TargetTriple, int LongSize,
bool IsMIPS64 = TargetTriple.isMIPS64();
bool IsArmOrThumb = TargetTriple.isARM() || TargetTriple.isThumb();
bool IsAArch64 = TargetTriple.getArch() == Triple::aarch64;
- bool IsLoongArch64 = TargetTriple.getArch() == Triple::loongarch64;
+ bool IsLoongArch64 = TargetTriple.isLoongArch64();
bool IsRISCV64 = TargetTriple.getArch() == Triple::riscv64;
bool IsWindows = TargetTriple.isOSWindows();
bool IsFuchsia = TargetTriple.isOSFuchsia();
@@ -656,6 +656,7 @@ struct AddressSanitizer {
: UseAfterReturn),
SSGI(SSGI) {
C = &(M.getContext());
+ DL = &M.getDataLayout();
LongSize = M.getDataLayout().getPointerSizeInBits();
IntptrTy = Type::getIntNTy(*C, LongSize);
Int8PtrTy = Type::getInt8PtrTy(*C);
@@ -667,17 +668,8 @@ struct AddressSanitizer {
assert(this->UseAfterReturn != AsanDetectStackUseAfterReturnMode::Invalid);
}
- uint64_t getAllocaSizeInBytes(const AllocaInst &AI) const {
- uint64_t ArraySize = 1;
- if (AI.isArrayAllocation()) {
- const ConstantInt *CI = dyn_cast<ConstantInt>(AI.getArraySize());
- assert(CI && "non-constant array size");
- ArraySize = CI->getZExtValue();
- }
- Type *Ty = AI.getAllocatedType();
- uint64_t SizeInBytes =
- AI.getModule()->getDataLayout().getTypeAllocSize(Ty);
- return SizeInBytes * ArraySize;
+ TypeSize getAllocaSizeInBytes(const AllocaInst &AI) const {
+ return *AI.getAllocationSize(AI.getModule()->getDataLayout());
}
/// Check if we want (and can) handle this alloca.
@@ -692,19 +684,27 @@ struct AddressSanitizer {
const DataLayout &DL);
void instrumentPointerComparisonOrSubtraction(Instruction *I);
void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,
- Value *Addr, uint32_t TypeSize, bool IsWrite,
+ Value *Addr, MaybeAlign Alignment,
+ uint32_t TypeStoreSize, bool IsWrite,
Value *SizeArgument, bool UseCalls, uint32_t Exp);
Instruction *instrumentAMDGPUAddress(Instruction *OrigIns,
Instruction *InsertBefore, Value *Addr,
- uint32_t TypeSize, bool IsWrite,
+ uint32_t TypeStoreSize, bool IsWrite,
Value *SizeArgument);
void instrumentUnusualSizeOrAlignment(Instruction *I,
Instruction *InsertBefore, Value *Addr,
- uint32_t TypeSize, bool IsWrite,
+ TypeSize TypeStoreSize, bool IsWrite,
Value *SizeArgument, bool UseCalls,
uint32_t Exp);
+ void instrumentMaskedLoadOrStore(AddressSanitizer *Pass, const DataLayout &DL,
+ Type *IntptrTy, Value *Mask, Value *EVL,
+ Value *Stride, Instruction *I, Value *Addr,
+ MaybeAlign Alignment, unsigned Granularity,
+ Type *OpType, bool IsWrite,
+ Value *SizeArgument, bool UseCalls,
+ uint32_t Exp);
Value *createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
- Value *ShadowValue, uint32_t TypeSize);
+ Value *ShadowValue, uint32_t TypeStoreSize);
Instruction *generateCrashCode(Instruction *InsertBefore, Value *Addr,
bool IsWrite, size_t AccessSizeIndex,
Value *SizeArgument, uint32_t Exp);
@@ -724,7 +724,7 @@ private:
bool LooksLikeCodeInBug11395(Instruction *I);
bool GlobalIsLinkerInitialized(GlobalVariable *G);
bool isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis, Value *Addr,
- uint64_t TypeSize) const;
+ TypeSize TypeStoreSize) const;
/// Helper to cleanup per-function state.
struct FunctionStateRAII {
@@ -743,6 +743,7 @@ private:
};
LLVMContext *C;
+ const DataLayout *DL;
Triple TargetTriple;
int LongSize;
bool CompileKernel;
@@ -1040,7 +1041,9 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
/// Collect Alloca instructions we want (and can) handle.
void visitAllocaInst(AllocaInst &AI) {
- if (!ASan.isInterestingAlloca(AI)) {
+ // FIXME: Handle scalable vectors instead of ignoring them.
+ if (!ASan.isInterestingAlloca(AI) ||
+ isa<ScalableVectorType>(AI.getAllocatedType())) {
if (AI.isStaticAlloca()) {
// Skip over allocas that are present *before* the first instrumented
// alloca, we don't want to move those around.
@@ -1133,10 +1136,10 @@ void AddressSanitizerPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<AddressSanitizerPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
if (Options.CompileKernel)
OS << "kernel";
- OS << ">";
+ OS << '>';
}
AddressSanitizerPass::AddressSanitizerPass(
@@ -1176,8 +1179,8 @@ PreservedAnalyses AddressSanitizerPass::run(Module &M,
return PA;
}
-static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
- size_t Res = countTrailingZeros(TypeSize / 8);
+static size_t TypeStoreSizeToSizeIndex(uint32_t TypeSize) {
+ size_t Res = llvm::countr_zero(TypeSize / 8);
assert(Res < kNumberOfAccessSizes);
return Res;
}
@@ -1227,7 +1230,7 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
// Instrument memset/memmove/memcpy
void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
- IRBuilder<> IRB(MI);
+ InstrumentationIRBuilder IRB(MI);
if (isa<MemTransferInst>(MI)) {
IRB.CreateCall(
isa<MemMoveInst>(MI) ? AsanMemmove : AsanMemcpy,
@@ -1254,7 +1257,7 @@ bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
bool IsInteresting =
(AI.getAllocatedType()->isSized() &&
// alloca() may be called with 0 size, ignore it.
- ((!AI.isStaticAlloca()) || getAllocaSizeInBytes(AI) > 0) &&
+ ((!AI.isStaticAlloca()) || !getAllocaSizeInBytes(AI).isZero()) &&
// We are only interested in allocas not promotable to registers.
// Promotable allocas are common under -O0.
(!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) &&
@@ -1326,9 +1329,12 @@ void AddressSanitizer::getInterestingMemoryOperands(
XCHG->getCompareOperand()->getType(),
std::nullopt);
} else if (auto CI = dyn_cast<CallInst>(I)) {
- if (CI->getIntrinsicID() == Intrinsic::masked_load ||
- CI->getIntrinsicID() == Intrinsic::masked_store) {
- bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_store;
+ switch (CI->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ case Intrinsic::masked_store:
+ case Intrinsic::masked_gather:
+ case Intrinsic::masked_scatter: {
+ bool IsWrite = CI->getType()->isVoidTy();
// Masked store has an initial operand for the value.
unsigned OpOffset = IsWrite ? 1 : 0;
if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads)
@@ -1344,7 +1350,76 @@ void AddressSanitizer::getInterestingMemoryOperands(
Alignment = Op->getMaybeAlignValue();
Value *Mask = CI->getOperand(2 + OpOffset);
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask);
- } else {
+ break;
+ }
+ case Intrinsic::masked_expandload:
+ case Intrinsic::masked_compressstore: {
+ bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_compressstore;
+ unsigned OpOffset = IsWrite ? 1 : 0;
+ if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads)
+ return;
+ auto BasePtr = CI->getOperand(OpOffset);
+ if (ignoreAccess(I, BasePtr))
+ return;
+ MaybeAlign Alignment = BasePtr->getPointerAlignment(*DL);
+ Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
+
+ IRBuilder IB(I);
+ Value *Mask = CI->getOperand(1 + OpOffset);
+ // Use the popcount of Mask as the effective vector length.
+ Type *ExtTy = VectorType::get(IntptrTy, cast<VectorType>(Ty));
+ Value *ExtMask = IB.CreateZExt(Mask, ExtTy);
+ Value *EVL = IB.CreateAddReduce(ExtMask);
+ Value *TrueMask = ConstantInt::get(Mask->getType(), 1);
+ Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, TrueMask,
+ EVL);
+ break;
+ }
+ case Intrinsic::vp_load:
+ case Intrinsic::vp_store:
+ case Intrinsic::experimental_vp_strided_load:
+ case Intrinsic::experimental_vp_strided_store: {
+ auto *VPI = cast<VPIntrinsic>(CI);
+ unsigned IID = CI->getIntrinsicID();
+ bool IsWrite = CI->getType()->isVoidTy();
+ if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads)
+ return;
+ unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
+ Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
+ MaybeAlign Alignment = VPI->getOperand(PtrOpNo)->getPointerAlignment(*DL);
+ Value *Stride = nullptr;
+ if (IID == Intrinsic::experimental_vp_strided_store ||
+ IID == Intrinsic::experimental_vp_strided_load) {
+ Stride = VPI->getOperand(PtrOpNo + 1);
+ // Use the pointer alignment as the element alignment if the stride is a
+ // mutiple of the pointer alignment. Otherwise, the element alignment
+ // should be Align(1).
+ unsigned PointerAlign = Alignment.valueOrOne().value();
+ if (!isa<ConstantInt>(Stride) ||
+ cast<ConstantInt>(Stride)->getZExtValue() % PointerAlign != 0)
+ Alignment = Align(1);
+ }
+ Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
+ VPI->getMaskParam(), VPI->getVectorLengthParam(),
+ Stride);
+ break;
+ }
+ case Intrinsic::vp_gather:
+ case Intrinsic::vp_scatter: {
+ auto *VPI = cast<VPIntrinsic>(CI);
+ unsigned IID = CI->getIntrinsicID();
+ bool IsWrite = IID == Intrinsic::vp_scatter;
+ if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads)
+ return;
+ unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
+ Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
+ MaybeAlign Alignment = VPI->getPointerAlignment();
+ Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
+ VPI->getMaskParam(),
+ VPI->getVectorLengthParam());
+ break;
+ }
+ default:
for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
if (!ClInstrumentByval || !CI->isByValArgument(ArgNo) ||
ignoreAccess(I, CI->getArgOperand(ArgNo)))
@@ -1416,57 +1491,94 @@ void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I,
Instruction *InsertBefore, Value *Addr,
MaybeAlign Alignment, unsigned Granularity,
- uint32_t TypeSize, bool IsWrite,
+ TypeSize TypeStoreSize, bool IsWrite,
Value *SizeArgument, bool UseCalls,
uint32_t Exp) {
// Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check
// if the data is properly aligned.
- if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
- TypeSize == 128) &&
- (!Alignment || *Alignment >= Granularity || *Alignment >= TypeSize / 8))
- return Pass->instrumentAddress(I, InsertBefore, Addr, TypeSize, IsWrite,
- nullptr, UseCalls, Exp);
- Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeSize,
+ if (!TypeStoreSize.isScalable()) {
+ const auto FixedSize = TypeStoreSize.getFixedValue();
+ switch (FixedSize) {
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ if (!Alignment || *Alignment >= Granularity ||
+ *Alignment >= FixedSize / 8)
+ return Pass->instrumentAddress(I, InsertBefore, Addr, Alignment,
+ FixedSize, IsWrite, nullptr, UseCalls,
+ Exp);
+ }
+ }
+ Pass->instrumentUnusualSizeOrAlignment(I, InsertBefore, Addr, TypeStoreSize,
IsWrite, nullptr, UseCalls, Exp);
}
-static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
- const DataLayout &DL, Type *IntptrTy,
- Value *Mask, Instruction *I,
- Value *Addr, MaybeAlign Alignment,
- unsigned Granularity, Type *OpType,
- bool IsWrite, Value *SizeArgument,
- bool UseCalls, uint32_t Exp) {
- auto *VTy = cast<FixedVectorType>(OpType);
- uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
- unsigned Num = VTy->getNumElements();
+void AddressSanitizer::instrumentMaskedLoadOrStore(
+ AddressSanitizer *Pass, const DataLayout &DL, Type *IntptrTy, Value *Mask,
+ Value *EVL, Value *Stride, Instruction *I, Value *Addr,
+ MaybeAlign Alignment, unsigned Granularity, Type *OpType, bool IsWrite,
+ Value *SizeArgument, bool UseCalls, uint32_t Exp) {
+ auto *VTy = cast<VectorType>(OpType);
+ TypeSize ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
auto Zero = ConstantInt::get(IntptrTy, 0);
- for (unsigned Idx = 0; Idx < Num; ++Idx) {
- Value *InstrumentedAddress = nullptr;
- Instruction *InsertBefore = I;
- if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {
- // dyn_cast as we might get UndefValue
- if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {
- if (Masked->isZero())
- // Mask is constant false, so no instrumentation needed.
- continue;
- // If we have a true or undef value, fall through to doInstrumentAddress
- // with InsertBefore == I
- }
+
+ IRBuilder IB(I);
+ Instruction *LoopInsertBefore = I;
+ if (EVL) {
+ // The end argument of SplitBlockAndInsertForLane is assumed bigger
+ // than zero, so we should check whether EVL is zero here.
+ Type *EVLType = EVL->getType();
+ Value *IsEVLZero = IB.CreateICmpNE(EVL, ConstantInt::get(EVLType, 0));
+ LoopInsertBefore = SplitBlockAndInsertIfThen(IsEVLZero, I, false);
+ IB.SetInsertPoint(LoopInsertBefore);
+ // Cast EVL to IntptrTy.
+ EVL = IB.CreateZExtOrTrunc(EVL, IntptrTy);
+ // To avoid undefined behavior for extracting with out of range index, use
+ // the minimum of evl and element count as trip count.
+ Value *EC = IB.CreateElementCount(IntptrTy, VTy->getElementCount());
+ EVL = IB.CreateBinaryIntrinsic(Intrinsic::umin, EVL, EC);
+ } else {
+ EVL = IB.CreateElementCount(IntptrTy, VTy->getElementCount());
+ }
+
+ // Cast Stride to IntptrTy.
+ if (Stride)
+ Stride = IB.CreateZExtOrTrunc(Stride, IntptrTy);
+
+ SplitBlockAndInsertForEachLane(EVL, LoopInsertBefore,
+ [&](IRBuilderBase &IRB, Value *Index) {
+ Value *MaskElem = IRB.CreateExtractElement(Mask, Index);
+ if (auto *MaskElemC = dyn_cast<ConstantInt>(MaskElem)) {
+ if (MaskElemC->isZero())
+ // No check
+ return;
+ // Unconditional check
} else {
- IRBuilder<> IRB(I);
- Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);
- Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);
- InsertBefore = ThenTerm;
+ // Conditional check
+ Instruction *ThenTerm = SplitBlockAndInsertIfThen(
+ MaskElem, &*IRB.GetInsertPoint(), false);
+ IRB.SetInsertPoint(ThenTerm);
}
- IRBuilder<> IRB(InsertBefore);
- InstrumentedAddress =
- IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
- doInstrumentAddress(Pass, I, InsertBefore, InstrumentedAddress, Alignment,
- Granularity, ElemTypeSize, IsWrite, SizeArgument,
- UseCalls, Exp);
- }
+ Value *InstrumentedAddress;
+ if (isa<VectorType>(Addr->getType())) {
+ assert(
+ cast<VectorType>(Addr->getType())->getElementType()->isPointerTy() &&
+ "Expected vector of pointer.");
+ InstrumentedAddress = IRB.CreateExtractElement(Addr, Index);
+ } else if (Stride) {
+ Index = IRB.CreateMul(Index, Stride);
+ Addr = IRB.CreateBitCast(Addr, Type::getInt8PtrTy(*C));
+ InstrumentedAddress = IRB.CreateGEP(Type::getInt8Ty(*C), Addr, {Index});
+ } else {
+ InstrumentedAddress = IRB.CreateGEP(VTy, Addr, {Zero, Index});
+ }
+ doInstrumentAddress(Pass, I, &*IRB.GetInsertPoint(),
+ InstrumentedAddress, Alignment, Granularity,
+ ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp);
+ });
}
void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
@@ -1492,7 +1604,7 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
// dynamically initialized global is always valid.
GlobalVariable *G = dyn_cast<GlobalVariable>(getUnderlyingObject(Addr));
if (G && (!ClInitializers || GlobalIsLinkerInitialized(G)) &&
- isSafeAccess(ObjSizeVis, Addr, O.TypeSize)) {
+ isSafeAccess(ObjSizeVis, Addr, O.TypeStoreSize)) {
NumOptimizedAccessesToGlobalVar++;
return;
}
@@ -1501,7 +1613,7 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
if (ClOpt && ClOptStack) {
// A direct inbounds access to a stack variable is always valid.
if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&
- isSafeAccess(ObjSizeVis, Addr, O.TypeSize)) {
+ isSafeAccess(ObjSizeVis, Addr, O.TypeStoreSize)) {
NumOptimizedAccessesToStackVar++;
return;
}
@@ -1514,12 +1626,13 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
unsigned Granularity = 1 << Mapping.Scale;
if (O.MaybeMask) {
- instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.getInsn(),
- Addr, O.Alignment, Granularity, O.OpType,
- O.IsWrite, nullptr, UseCalls, Exp);
+ instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.MaybeEVL,
+ O.MaybeStride, O.getInsn(), Addr, O.Alignment,
+ Granularity, O.OpType, O.IsWrite, nullptr,
+ UseCalls, Exp);
} else {
doInstrumentAddress(this, O.getInsn(), O.getInsn(), Addr, O.Alignment,
- Granularity, O.TypeSize, O.IsWrite, nullptr, UseCalls,
+ Granularity, O.TypeStoreSize, O.IsWrite, nullptr, UseCalls,
Exp);
}
}
@@ -1529,7 +1642,7 @@ Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore,
size_t AccessSizeIndex,
Value *SizeArgument,
uint32_t Exp) {
- IRBuilder<> IRB(InsertBefore);
+ InstrumentationIRBuilder IRB(InsertBefore);
Value *ExpVal = Exp == 0 ? nullptr : ConstantInt::get(IRB.getInt32Ty(), Exp);
CallInst *Call = nullptr;
if (SizeArgument) {
@@ -1554,15 +1667,15 @@ Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore,
Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
Value *ShadowValue,
- uint32_t TypeSize) {
+ uint32_t TypeStoreSize) {
size_t Granularity = static_cast<size_t>(1) << Mapping.Scale;
// Addr & (Granularity - 1)
Value *LastAccessedByte =
IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
// (Addr & (Granularity - 1)) + size - 1
- if (TypeSize / 8 > 1)
+ if (TypeStoreSize / 8 > 1)
LastAccessedByte = IRB.CreateAdd(
- LastAccessedByte, ConstantInt::get(IntptrTy, TypeSize / 8 - 1));
+ LastAccessedByte, ConstantInt::get(IntptrTy, TypeStoreSize / 8 - 1));
// (uint8_t) ((Addr & (Granularity-1)) + size - 1)
LastAccessedByte =
IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false);
@@ -1572,7 +1685,7 @@ Value *AddressSanitizer::createSlowPathCmp(IRBuilder<> &IRB, Value *AddrLong,
Instruction *AddressSanitizer::instrumentAMDGPUAddress(
Instruction *OrigIns, Instruction *InsertBefore, Value *Addr,
- uint32_t TypeSize, bool IsWrite, Value *SizeArgument) {
+ uint32_t TypeStoreSize, bool IsWrite, Value *SizeArgument) {
// Do not instrument unsupported addrspaces.
if (isUnsupportedAMDGPUAddrspace(Addr))
return nullptr;
@@ -1595,18 +1708,19 @@ Instruction *AddressSanitizer::instrumentAMDGPUAddress(
void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Instruction *InsertBefore, Value *Addr,
- uint32_t TypeSize, bool IsWrite,
+ MaybeAlign Alignment,
+ uint32_t TypeStoreSize, bool IsWrite,
Value *SizeArgument, bool UseCalls,
uint32_t Exp) {
if (TargetTriple.isAMDGPU()) {
InsertBefore = instrumentAMDGPUAddress(OrigIns, InsertBefore, Addr,
- TypeSize, IsWrite, SizeArgument);
+ TypeStoreSize, IsWrite, SizeArgument);
if (!InsertBefore)
return;
}
- IRBuilder<> IRB(InsertBefore);
- size_t AccessSizeIndex = TypeSizeToSizeIndex(TypeSize);
+ InstrumentationIRBuilder IRB(InsertBefore);
+ size_t AccessSizeIndex = TypeStoreSizeToSizeIndex(TypeStoreSize);
const ASanAccessInfo AccessInfo(IsWrite, CompileKernel, AccessSizeIndex);
if (UseCalls && ClOptimizeCallbacks) {
@@ -1631,17 +1745,19 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
}
Type *ShadowTy =
- IntegerType::get(*C, std::max(8U, TypeSize >> Mapping.Scale));
+ IntegerType::get(*C, std::max(8U, TypeStoreSize >> Mapping.Scale));
Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
Value *ShadowPtr = memToShadow(AddrLong, IRB);
- Value *ShadowValue =
- IRB.CreateLoad(ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy));
+ const uint64_t ShadowAlign =
+ std::max<uint64_t>(Alignment.valueOrOne().value() >> Mapping.Scale, 1);
+ Value *ShadowValue = IRB.CreateAlignedLoad(
+ ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy), Align(ShadowAlign));
Value *Cmp = IRB.CreateIsNotNull(ShadowValue);
size_t Granularity = 1ULL << Mapping.Scale;
Instruction *CrashTerm = nullptr;
- if (ClAlwaysSlowPath || (TypeSize < 8 * Granularity)) {
+ if (ClAlwaysSlowPath || (TypeStoreSize < 8 * Granularity)) {
// We use branch weights for the slow path check, to indicate that the slow
// path is rarely taken. This seems to be the case for SPEC benchmarks.
Instruction *CheckTerm = SplitBlockAndInsertIfThen(
@@ -1649,7 +1765,7 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
assert(cast<BranchInst>(CheckTerm)->isUnconditional());
BasicBlock *NextBB = CheckTerm->getSuccessor(0);
IRB.SetInsertPoint(CheckTerm);
- Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeSize);
+ Value *Cmp2 = createSlowPathCmp(IRB, AddrLong, ShadowValue, TypeStoreSize);
if (Recover) {
CrashTerm = SplitBlockAndInsertIfThen(Cmp2, CheckTerm, false);
} else {
@@ -1665,7 +1781,8 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
Instruction *Crash = generateCrashCode(CrashTerm, AddrLong, IsWrite,
AccessSizeIndex, SizeArgument, Exp);
- Crash->setDebugLoc(OrigIns->getDebugLoc());
+ if (OrigIns->getDebugLoc())
+ Crash->setDebugLoc(OrigIns->getDebugLoc());
}
// Instrument unusual size or unusual alignment.
@@ -1673,10 +1790,12 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns,
// and the last bytes. We call __asan_report_*_n(addr, real_size) to be able
// to report the actual access size.
void AddressSanitizer::instrumentUnusualSizeOrAlignment(
- Instruction *I, Instruction *InsertBefore, Value *Addr, uint32_t TypeSize,
+ Instruction *I, Instruction *InsertBefore, Value *Addr, TypeSize TypeStoreSize,
bool IsWrite, Value *SizeArgument, bool UseCalls, uint32_t Exp) {
- IRBuilder<> IRB(InsertBefore);
- Value *Size = ConstantInt::get(IntptrTy, TypeSize / 8);
+ InstrumentationIRBuilder IRB(InsertBefore);
+ Value *NumBits = IRB.CreateTypeSize(IntptrTy, TypeStoreSize);
+ Value *Size = IRB.CreateLShr(NumBits, ConstantInt::get(IntptrTy, 3));
+
Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);
if (UseCalls) {
if (Exp == 0)
@@ -1686,11 +1805,13 @@ void AddressSanitizer::instrumentUnusualSizeOrAlignment(
IRB.CreateCall(AsanMemoryAccessCallbackSized[IsWrite][1],
{AddrLong, Size, ConstantInt::get(IRB.getInt32Ty(), Exp)});
} else {
+ Value *SizeMinusOne = IRB.CreateSub(Size, ConstantInt::get(IntptrTy, 1));
Value *LastByte = IRB.CreateIntToPtr(
- IRB.CreateAdd(AddrLong, ConstantInt::get(IntptrTy, TypeSize / 8 - 1)),
+ IRB.CreateAdd(AddrLong, SizeMinusOne),
Addr->getType());
- instrumentAddress(I, InsertBefore, Addr, 8, IsWrite, Size, false, Exp);
- instrumentAddress(I, InsertBefore, LastByte, 8, IsWrite, Size, false, Exp);
+ instrumentAddress(I, InsertBefore, Addr, {}, 8, IsWrite, Size, false, Exp);
+ instrumentAddress(I, InsertBefore, LastByte, {}, 8, IsWrite, Size, false,
+ Exp);
}
}
@@ -2306,7 +2427,7 @@ bool ModuleAddressSanitizer::InstrumentGlobals(IRBuilder<> &IRB, Module &M,
G->getThreadLocalMode(), G->getAddressSpace());
NewGlobal->copyAttributesFrom(G);
NewGlobal->setComdat(G->getComdat());
- NewGlobal->setAlignment(MaybeAlign(getMinRedzoneSizeForGlobal()));
+ NewGlobal->setAlignment(Align(getMinRedzoneSizeForGlobal()));
// Don't fold globals with redzones. ODR violation detector and redzone
// poisoning implicitly creates a dependence on the global's address, so it
// is no longer valid for it to be marked unnamed_addr.
@@ -3485,7 +3606,11 @@ void FunctionStackPoisoner::handleDynamicAllocaCall(AllocaInst *AI) {
// base object. For example, it is a field access or an array access with
// constant inbounds index.
bool AddressSanitizer::isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis,
- Value *Addr, uint64_t TypeSize) const {
+ Value *Addr, TypeSize TypeStoreSize) const {
+ if (TypeStoreSize.isScalable())
+ // TODO: We can use vscale_range to convert a scalable value to an
+ // upper bound on the access size.
+ return false;
SizeOffsetType SizeOffset = ObjSizeVis.compute(Addr);
if (!ObjSizeVis.bothKnown(SizeOffset)) return false;
uint64_t Size = SizeOffset.first.getZExtValue();
@@ -3495,5 +3620,5 @@ bool AddressSanitizer::isSafeAccess(ObjectSizeOffsetVisitor &ObjSizeVis,
// . Size >= Offset (unsigned)
// . Size - Offset >= NeededSize (unsigned)
return Offset >= 0 && Size >= uint64_t(Offset) &&
- Size - uint64_t(Offset) >= TypeSize / 8;
+ Size - uint64_t(Offset) >= TypeStoreSize / 8;
}
diff --git a/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp b/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp
new file mode 100644
index 000000000000..0e49984c6ee3
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp
@@ -0,0 +1,368 @@
+//===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Our algorithm works by first identifying a subset of nodes that must always
+// be instrumented. We call these nodes ambiguous because knowing the coverage
+// of all remaining nodes is not enough to infer their coverage status.
+//
+// In general a node v is ambiguous if there exists two entry-to-terminal paths
+// P_1 and P_2 such that:
+// 1. v not in P_1 but P_1 visits a predecessor of v, and
+// 2. v not in P_2 but P_2 visits a successor of v.
+//
+// If a node v is not ambiguous, then if condition 1 fails, we can infer v’s
+// coverage from the coverage of its predecessors, or if condition 2 fails, we
+// can infer v’s coverage from the coverage of its successors.
+//
+// Sadly, there are example CFGs where it is not possible to infer all nodes
+// from the ambiguous nodes alone. Our algorithm selects a minimum number of
+// extra nodes to add to the ambiguous nodes to form a valid instrumentation S.
+//
+// Details on this algorithm can be found in https://arxiv.org/abs/2208.13907
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CRC.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pgo-block-coverage"
+
+STATISTIC(NumFunctions, "Number of total functions that BCI has processed");
+STATISTIC(NumIneligibleFunctions,
+ "Number of functions for which BCI cannot run on");
+STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed");
+STATISTIC(NumInstrumentedBlocks,
+ "Number of basic blocks instrumented for coverage");
+
+BlockCoverageInference::BlockCoverageInference(const Function &F,
+ bool ForceInstrumentEntry)
+ : F(F), ForceInstrumentEntry(ForceInstrumentEntry) {
+ findDependencies();
+ assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock()));
+
+ ++NumFunctions;
+ for (auto &BB : F) {
+ ++NumBlocks;
+ if (shouldInstrumentBlock(BB))
+ ++NumInstrumentedBlocks;
+ }
+}
+
+BlockCoverageInference::BlockSet
+BlockCoverageInference::getDependencies(const BasicBlock &BB) const {
+ assert(BB.getParent() == &F);
+ BlockSet Dependencies;
+ auto It = PredecessorDependencies.find(&BB);
+ if (It != PredecessorDependencies.end())
+ Dependencies.set_union(It->second);
+ It = SuccessorDependencies.find(&BB);
+ if (It != SuccessorDependencies.end())
+ Dependencies.set_union(It->second);
+ return Dependencies;
+}
+
+uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const {
+ JamCRC JC;
+ uint64_t Index = 0;
+ for (auto &BB : F) {
+ if (shouldInstrumentBlock(BB)) {
+ uint8_t Data[8];
+ support::endian::write64le(Data, Index);
+ JC.update(Data);
+ }
+ Index++;
+ }
+ return JC.getCRC();
+}
+
+bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const {
+ assert(BB.getParent() == &F);
+ auto It = PredecessorDependencies.find(&BB);
+ if (It != PredecessorDependencies.end() && It->second.size())
+ return false;
+ It = SuccessorDependencies.find(&BB);
+ if (It != SuccessorDependencies.end() && It->second.size())
+ return false;
+ return true;
+}
+
+void BlockCoverageInference::findDependencies() {
+ assert(PredecessorDependencies.empty() && SuccessorDependencies.empty());
+ // Empirical analysis shows that this algorithm finishes within 5 seconds for
+ // functions with fewer than 1.5K blocks.
+ if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) {
+ ++NumIneligibleFunctions;
+ return;
+ }
+
+ SmallVector<const BasicBlock *, 4> TerminalBlocks;
+ for (auto &BB : F)
+ if (succ_empty(&BB))
+ TerminalBlocks.push_back(&BB);
+
+ // Traverse the CFG backwards from the terminal blocks to make sure every
+ // block can reach some terminal block. Otherwise this algorithm will not work
+ // and we must fall back to instrumenting every block.
+ df_iterator_default_set<const BasicBlock *> Visited;
+ for (auto *BB : TerminalBlocks)
+ for (auto *N : inverse_depth_first_ext(BB, Visited))
+ (void)N;
+ if (F.size() != Visited.size()) {
+ ++NumIneligibleFunctions;
+ return;
+ }
+
+ // The current implementation for computing `PredecessorDependencies` and
+ // `SuccessorDependencies` runs in quadratic time with respect to the number
+ // of basic blocks. While we do have a more complicated linear time algorithm
+ // in https://arxiv.org/abs/2208.13907 we do not know if it will give a
+ // significant speedup in practice given that most functions tend to be
+ // relatively small in size for intended use cases.
+ auto &EntryBlock = F.getEntryBlock();
+ for (auto &BB : F) {
+ // The set of blocks that are reachable while avoiding BB.
+ BlockSet ReachableFromEntry, ReachableFromTerminal;
+ getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true,
+ ReachableFromEntry);
+ for (auto *TerminalBlock : TerminalBlocks)
+ getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false,
+ ReachableFromTerminal);
+
+ auto Preds = predecessors(&BB);
+ bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) {
+ return ReachableFromEntry.count(Pred) &&
+ ReachableFromTerminal.count(Pred);
+ });
+ if (!HasSuperReachablePred)
+ for (auto *Pred : Preds)
+ if (ReachableFromEntry.count(Pred))
+ PredecessorDependencies[&BB].insert(Pred);
+
+ auto Succs = successors(&BB);
+ bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) {
+ return ReachableFromEntry.count(Succ) &&
+ ReachableFromTerminal.count(Succ);
+ });
+ if (!HasSuperReachableSucc)
+ for (auto *Succ : Succs)
+ if (ReachableFromTerminal.count(Succ))
+ SuccessorDependencies[&BB].insert(Succ);
+ }
+
+ if (ForceInstrumentEntry) {
+ // Force the entry block to be instrumented by clearing the blocks it can
+ // infer coverage from.
+ PredecessorDependencies[&EntryBlock].clear();
+ SuccessorDependencies[&EntryBlock].clear();
+ }
+
+ // Construct a graph where blocks are connected if there is a mutual
+ // dependency between them. This graph has a special property that it contains
+ // only paths.
+ DenseMap<const BasicBlock *, BlockSet> AdjacencyList;
+ for (auto &BB : F) {
+ for (auto *Succ : successors(&BB)) {
+ if (SuccessorDependencies[&BB].count(Succ) &&
+ PredecessorDependencies[Succ].count(&BB)) {
+ AdjacencyList[&BB].insert(Succ);
+ AdjacencyList[Succ].insert(&BB);
+ }
+ }
+ }
+
+ // Given a path with at least one node, return the next node on the path.
+ auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * {
+ assert(Path.size());
+ auto &Neighbors = AdjacencyList[Path.back()];
+ if (Path.size() == 1) {
+ // This is the first node on the path, return its neighbor.
+ assert(Neighbors.size() == 1);
+ return Neighbors.front();
+ } else if (Neighbors.size() == 2) {
+ // This is the middle of the path, find the neighbor that is not on the
+ // path already.
+ assert(Path.size() >= 2);
+ return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0];
+ }
+ // This is the end of the path.
+ assert(Neighbors.size() == 1);
+ return nullptr;
+ };
+
+ // Remove all cycles in the inferencing graph.
+ for (auto &BB : F) {
+ if (AdjacencyList[&BB].size() == 1) {
+ // We found the head of some path.
+ BlockSet Path;
+ Path.insert(&BB);
+ while (const BasicBlock *Next = getNextOnPath(Path))
+ Path.insert(Next);
+ LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n");
+
+ // Remove these nodes from the graph so we don't discover this path again.
+ for (auto *BB : Path)
+ AdjacencyList[BB].clear();
+
+ // Finally, remove the cycles.
+ if (PredecessorDependencies[Path.front()].size()) {
+ for (auto *BB : Path)
+ if (BB != Path.back())
+ SuccessorDependencies[BB].clear();
+ } else {
+ for (auto *BB : Path)
+ if (BB != Path.front())
+ PredecessorDependencies[BB].clear();
+ }
+ }
+ }
+ LLVM_DEBUG(dump(dbgs()));
+}
+
+void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start,
+ const BasicBlock &Avoid,
+ bool IsForward,
+ BlockSet &Reachable) const {
+ df_iterator_default_set<const BasicBlock *> Visited;
+ Visited.insert(&Avoid);
+ if (IsForward) {
+ auto Range = depth_first_ext(&Start, Visited);
+ Reachable.insert(Range.begin(), Range.end());
+ } else {
+ auto Range = inverse_depth_first_ext(&Start, Visited);
+ Reachable.insert(Range.begin(), Range.end());
+ }
+}
+
+namespace llvm {
+class DotFuncBCIInfo {
+private:
+ const BlockCoverageInference *BCI;
+ const DenseMap<const BasicBlock *, bool> *Coverage;
+
+public:
+ DotFuncBCIInfo(const BlockCoverageInference *BCI,
+ const DenseMap<const BasicBlock *, bool> *Coverage)
+ : BCI(BCI), Coverage(Coverage) {}
+
+ const Function &getFunction() { return BCI->F; }
+
+ bool isInstrumented(const BasicBlock *BB) const {
+ return BCI->shouldInstrumentBlock(*BB);
+ }
+
+ bool isCovered(const BasicBlock *BB) const {
+ return Coverage && Coverage->lookup(BB);
+ }
+
+ bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const {
+ return BCI->getDependencies(*Src).count(Dest);
+ }
+};
+
+template <>
+struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> {
+ static NodeRef getEntryNode(DotFuncBCIInfo *Info) {
+ return &(Info->getFunction().getEntryBlock());
+ }
+
+ // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
+ using nodes_iterator = pointer_iterator<Function::const_iterator>;
+
+ static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) {
+ return nodes_iterator(Info->getFunction().begin());
+ }
+
+ static nodes_iterator nodes_end(DotFuncBCIInfo *Info) {
+ return nodes_iterator(Info->getFunction().end());
+ }
+
+ static size_t size(DotFuncBCIInfo *Info) {
+ return Info->getFunction().size();
+ }
+};
+
+template <>
+struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits {
+
+ DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
+
+ static std::string getGraphName(DotFuncBCIInfo *Info) {
+ return "BCI CFG for " + Info->getFunction().getName().str();
+ }
+
+ std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) {
+ return Node->getName().str();
+ }
+
+ std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I,
+ DotFuncBCIInfo *Info) {
+ const BasicBlock *Dest = *I;
+ if (Info->isDependent(Src, Dest))
+ return "color=red";
+ if (Info->isDependent(Dest, Src))
+ return "color=blue";
+ return "";
+ }
+
+ std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) {
+ std::string Result;
+ if (Info->isInstrumented(Node))
+ Result += "style=filled,fillcolor=gray";
+ if (Info->isCovered(Node))
+ Result += std::string(Result.empty() ? "" : ",") + "color=red";
+ return Result;
+ }
+};
+
+} // namespace llvm
+
+void BlockCoverageInference::viewBlockCoverageGraph(
+ const DenseMap<const BasicBlock *, bool> *Coverage) const {
+ DotFuncBCIInfo Info(this, Coverage);
+ WriteGraph(&Info, "BCI", false,
+ "Block Coverage Inference for " + F.getName());
+}
+
+void BlockCoverageInference::dump(raw_ostream &OS) const {
+ OS << "Minimal block coverage for function \'" << F.getName()
+ << "\' (Instrumented=*)\n";
+ for (auto &BB : F) {
+ OS << (shouldInstrumentBlock(BB) ? "* " : " ") << BB.getName() << "\n";
+ auto It = PredecessorDependencies.find(&BB);
+ if (It != PredecessorDependencies.end() && It->second.size())
+ OS << " PredDeps = " << getBlockNames(It->second) << "\n";
+ It = SuccessorDependencies.find(&BB);
+ if (It != SuccessorDependencies.end() && It->second.size())
+ OS << " SuccDeps = " << getBlockNames(It->second) << "\n";
+ }
+ OS << " Instrumented Blocks Hash = 0x"
+ << Twine::utohexstr(getInstrumentedBlocksHash()) << "\n";
+}
+
+std::string
+BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ OS << "[";
+ if (!BBs.empty()) {
+ OS << BBs.front()->getName();
+ BBs = BBs.drop_front();
+ }
+ for (auto *BB : BBs)
+ OS << ", " << BB->getName();
+ OS << "]";
+ return OS.str();
+}
diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
index 8b1d39ad412f..709095184af5 100644
--- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
+++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp
@@ -23,8 +23,6 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -56,7 +54,7 @@ static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal,
const DataLayout &DL, TargetLibraryInfo &TLI,
ObjectSizeOffsetEvaluator &ObjSizeEval,
BuilderTy &IRB, ScalarEvolution &SE) {
- uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType());
+ TypeSize NeededSize = DL.getTypeStoreSize(InstVal->getType());
LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
<< " bytes\n");
@@ -71,8 +69,8 @@ static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal,
Value *Offset = SizeOffset.second;
ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
- Type *IntTy = DL.getIntPtrType(Ptr->getType());
- Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize);
+ Type *IndexTy = DL.getIndexType(Ptr->getType());
+ Value *NeededSizeVal = IRB.CreateTypeSize(IndexTy, NeededSize);
auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size));
auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset));
@@ -97,7 +95,7 @@ static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal,
Value *Or = IRB.CreateOr(Cmp2, Cmp3);
if ((!SizeCI || SizeCI->getValue().slt(0)) &&
!SizeRange.getSignedMin().isNonNegative()) {
- Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IntTy, 0));
+ Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IndexTy, 0));
Or = IRB.CreateOr(Cmp1, Or);
}
diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
index 1c630e9ee424..d53e12ad1ff5 100644
--- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -15,7 +15,6 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/InitializePasses.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Transforms/Instrumentation.h"
#include <optional>
@@ -46,8 +45,7 @@ addModuleFlags(Module &M,
}
static bool runCGProfilePass(
- Module &M, function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
- function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LazyBFI) {
+ Module &M, FunctionAnalysisManager &FAM) {
MapVector<std::pair<Function *, Function *>, uint64_t> Counts;
InstrProfSymtab Symtab;
auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
@@ -64,15 +62,13 @@ static bool runCGProfilePass(
(void)(bool) Symtab.create(M);
for (auto &F : M) {
// Avoid extra cost of running passes for BFI when the function doesn't have
- // entry count. Since LazyBlockFrequencyInfoPass only exists in LPM, check
- // if using LazyBlockFrequencyInfoPass.
- // TODO: Remove LazyBFI when LazyBlockFrequencyInfoPass is available in NPM.
- if (F.isDeclaration() || (LazyBFI && !F.getEntryCount()))
+ // entry count.
+ if (F.isDeclaration() || !F.getEntryCount())
continue;
- auto &BFI = GetBFI(F);
+ auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
if (BFI.getEntryFreq() == 0)
continue;
- TargetTransformInfo &TTI = GetTTI(F);
+ TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
for (auto &BB : F) {
std::optional<uint64_t> BBCount = BFI.getBlockProfileCount(&BB);
if (!BBCount)
@@ -105,14 +101,7 @@ static bool runCGProfilePass(
PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
FunctionAnalysisManager &FAM =
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
- return FAM.getResult<BlockFrequencyAnalysis>(F);
- };
- auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
- return FAM.getResult<TargetIRAnalysis>(F);
- };
-
- runCGProfilePass(M, GetBFI, GetTTI, false);
+ runCGProfilePass(M, FAM);
return PreservedAnalyses::all();
}
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index a072ba278fce..3e3be536defc 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -30,7 +30,6 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ProfDataUtils.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -1888,8 +1887,7 @@ void CHR::fixupBranch(Region *R, CHRScope *Scope,
assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
"Must be truthy or falsy");
auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
- assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
- "Must be in the bias map");
+ assert(BranchBiasMap.contains(R) && "Must be in the bias map");
BranchProbability Bias = BranchBiasMap[R];
assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
// Take the min.
@@ -1931,8 +1929,7 @@ void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
assert((IsTrueBiased ||
Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
- assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
- "Must be in the bias map");
+ assert(SelectBiasMap.contains(SI) && "Must be in the bias map");
BranchProbability Bias = SelectBiasMap[SI];
assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
// Take the min.
@@ -1962,11 +1959,8 @@ void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond);
}
- // Select conditions can be poison, while branching on poison is immediate
- // undefined behavior. As such, we need to freeze potentially poisonous
- // conditions derived from selects.
- if (isa<SelectInst>(BranchOrSelect) &&
- !isGuaranteedNotToBeUndefOrPoison(Cond))
+ // Freeze potentially poisonous conditions.
+ if (!isGuaranteedNotToBeUndefOrPoison(Cond))
Cond = IRB.CreateFreeze(Cond);
// Use logical and to avoid propagating poison from later conditions.
@@ -2080,10 +2074,14 @@ ControlHeightReductionPass::ControlHeightReductionPass() {
PreservedAnalyses ControlHeightReductionPass::run(
Function &F,
FunctionAnalysisManager &FAM) {
+ auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+ auto PPSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ // If there is no profile summary, we should not do CHR.
+ if (!PPSI || !PPSI->hasProfileSummary())
+ return PreservedAnalyses::all();
+ auto &PSI = *PPSI;
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
- auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
- auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index e9614b48fde7..8caee5bed8ed 100644
--- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -67,12 +67,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
@@ -96,14 +97,13 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/SpecialCaseList.h"
#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -305,6 +305,14 @@ const MemoryMapParams Linux_X86_64_MemoryMapParams = {
};
// NOLINTEND(readability-identifier-naming)
+// loongarch64 Linux
+const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
+ 0, // AndMask (not used)
+ 0x500000000000, // XorMask
+ 0, // ShadowBase (not used)
+ 0x100000000000, // OriginBase
+};
+
namespace {
class DFSanABIList {
@@ -1128,6 +1136,9 @@ bool DataFlowSanitizer::initializeModule(Module &M) {
case Triple::x86_64:
MapParams = &Linux_X86_64_MemoryMapParams;
break;
+ case Triple::loongarch64:
+ MapParams = &Linux_LoongArch64_MemoryMapParams;
+ break;
default:
report_fatal_error("unsupported architecture");
}
@@ -1256,7 +1267,7 @@ void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {
size_t Pos = Asm.find(SearchStr);
if (Pos != std::string::npos) {
Asm.replace(Pos, SearchStr.size(), ".symver " + GVName + Suffix + ",");
- Pos = Asm.find("@");
+ Pos = Asm.find('@');
if (Pos == std::string::npos)
report_fatal_error(Twine("unsupported .symver: ", Asm));
@@ -2156,9 +2167,8 @@ std::pair<Value *, Value *> DFSanFunction::loadShadowFast(
ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx);
IRBuilder<> IRB(Pos);
- Value *WideAddr = IRB.CreateBitCast(ShadowAddr, WideShadowTy->getPointerTo());
Value *CombinedWideShadow =
- IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
+ IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign);
unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();
const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;
@@ -2195,10 +2205,10 @@ std::pair<Value *, Value *> DFSanFunction::loadShadowFast(
// shadow).
for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size;
ByteOfs += BytesPerWideShadow) {
- WideAddr = IRB.CreateGEP(WideShadowTy, WideAddr,
- ConstantInt::get(DFS.IntptrTy, 1));
+ ShadowAddr = IRB.CreateGEP(WideShadowTy, ShadowAddr,
+ ConstantInt::get(DFS.IntptrTy, 1));
Value *NextWideShadow =
- IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
+ IRB.CreateAlignedLoad(WideShadowTy, ShadowAddr, ShadowAlign);
CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow);
if (ShouldTrackOrigins) {
Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr);
@@ -2526,8 +2536,9 @@ void DFSanFunction::storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size,
ConstantInt::get(DFS.IntptrTy, Size), Origin});
} else {
Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp");
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
Instruction *CheckTerm = SplitBlockAndInsertIfThen(
- Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DT);
+ Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DTU);
IRBuilder<> IRBNew(CheckTerm);
paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), StoreOriginAddr, Size,
OriginAlignment);
diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 9f3ca8b02fd9..21f0b1a92293 100644
--- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -13,7 +13,6 @@
//
//===----------------------------------------------------------------------===//
-#include "CFGMST.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
@@ -21,10 +20,10 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
@@ -38,6 +37,7 @@
#include "llvm/Support/Regex.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/CFGMST.h"
#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
@@ -919,15 +919,21 @@ bool GCOVProfiler::emitProfileNotes(
IRBuilder<> Builder(E.Place, E.Place->getFirstInsertionPt());
Value *V = Builder.CreateConstInBoundsGEP2_64(
Counters->getValueType(), Counters, 0, I);
+ // Disable sanitizers to decrease size bloat. We don't expect
+ // sanitizers to catch interesting issues.
+ Instruction *Inst;
if (Options.Atomic) {
- Builder.CreateAtomicRMW(AtomicRMWInst::Add, V, Builder.getInt64(1),
- MaybeAlign(), AtomicOrdering::Monotonic);
+ Inst = Builder.CreateAtomicRMW(AtomicRMWInst::Add, V,
+ Builder.getInt64(1), MaybeAlign(),
+ AtomicOrdering::Monotonic);
} else {
- Value *Count =
+ LoadInst *OldCount =
Builder.CreateLoad(Builder.getInt64Ty(), V, "gcov_ctr");
- Count = Builder.CreateAdd(Count, Builder.getInt64(1));
- Builder.CreateStore(Count, V);
+ OldCount->setNoSanitizeMetadata();
+ Value *NewCount = Builder.CreateAdd(OldCount, Builder.getInt64(1));
+ Inst = Builder.CreateStore(NewCount, V);
}
+ Inst->setNoSanitizeMetadata();
}
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 34c61f83ad30..28db47a19092 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1,4 +1,4 @@
-//===- HWAddressSanitizer.cpp - detector of uninitialized reads -------===//
+//===- HWAddressSanitizer.cpp - memory access error detector --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -17,7 +17,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/StackSafetyAnalysis.h"
@@ -50,6 +49,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/MemoryTaggingSupport.h"
@@ -136,14 +136,6 @@ static cl::opt<bool>
cl::desc("detect use after scope within function"),
cl::Hidden, cl::init(false));
-static cl::opt<bool> ClUARRetagToZero(
- "hwasan-uar-retag-to-zero",
- cl::desc("Clear alloca tags before returning from the function to allow "
- "non-instrumented and instrumented function calls mix. When set "
- "to false, allocas are retagged before returning from the "
- "function to detect use after return."),
- cl::Hidden, cl::init(true));
-
static cl::opt<bool> ClGenerateTagsWithCalls(
"hwasan-generate-tags-with-calls",
cl::desc("generate new tags with runtime library calls"), cl::Hidden,
@@ -247,7 +239,9 @@ bool shouldInstrumentStack(const Triple &TargetTriple) {
}
bool shouldInstrumentWithCalls(const Triple &TargetTriple) {
- return ClInstrumentWithCalls || TargetTriple.getArch() == Triple::x86_64;
+ return ClInstrumentWithCalls.getNumOccurrences()
+ ? ClInstrumentWithCalls
+ : TargetTriple.getArch() == Triple::x86_64;
}
bool mightUseStackSafetyAnalysis(bool DisableOptimization) {
@@ -282,7 +276,7 @@ public:
void setSSI(const StackSafetyGlobalInfo *S) { SSI = S; }
- bool sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
+ void sanitizeFunction(Function &F, FunctionAnalysisManager &FAM);
void initializeModule();
void createHwasanCtorComdat();
@@ -313,16 +307,15 @@ public:
void tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag, size_t Size);
Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
- bool instrumentStack(memtag::StackInfo &Info, Value *StackTag,
+ bool instrumentStack(memtag::StackInfo &Info, Value *StackTag, Value *UARTag,
const DominatorTree &DT, const PostDominatorTree &PDT,
const LoopInfo &LI);
Value *readRegister(IRBuilder<> &IRB, StringRef Name);
bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
Value *getNextTagWithCall(IRBuilder<> &IRB);
Value *getStackBaseTag(IRBuilder<> &IRB);
- Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, AllocaInst *AI,
- unsigned AllocaNo);
- Value *getUARTag(IRBuilder<> &IRB, Value *StackTag);
+ Value *getAllocaTag(IRBuilder<> &IRB, Value *StackTag, unsigned AllocaNo);
+ Value *getUARTag(IRBuilder<> &IRB);
Value *getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty);
Value *applyTagMask(IRBuilder<> &IRB, Value *OldTag);
@@ -344,8 +337,6 @@ private:
Module &M;
const StackSafetyGlobalInfo *SSI;
Triple TargetTriple;
- FunctionCallee HWAsanMemmove, HWAsanMemcpy, HWAsanMemset;
- FunctionCallee HWAsanHandleVfork;
/// This struct defines the shadow mapping using the rule:
/// shadow = (mem >> Scale) + Offset.
@@ -387,6 +378,7 @@ private:
bool InstrumentStack;
bool DetectUseAfterScope;
bool UsePageAliases;
+ bool UseMatchAllCallback;
std::optional<uint8_t> MatchAllTag;
@@ -398,6 +390,9 @@ private:
FunctionCallee HwasanMemoryAccessCallback[2][kNumberOfAccessSizes];
FunctionCallee HwasanMemoryAccessCallbackSized[2];
+ FunctionCallee HwasanMemmove, HwasanMemcpy, HwasanMemset;
+ FunctionCallee HwasanHandleVfork;
+
FunctionCallee HwasanTagMemoryFunc;
FunctionCallee HwasanGenerateTagFunc;
FunctionCallee HwasanRecordFrameRecordFunc;
@@ -420,12 +415,9 @@ PreservedAnalyses HWAddressSanitizerPass::run(Module &M,
SSI = &MAM.getResult<StackSafetyGlobalAnalysis>(M);
HWAddressSanitizer HWASan(M, Options.CompileKernel, Options.Recover, SSI);
- bool Modified = false;
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
for (Function &F : M)
- Modified |= HWASan.sanitizeFunction(F, FAM);
- if (!Modified)
- return PreservedAnalyses::all();
+ HWASan.sanitizeFunction(F, FAM);
PreservedAnalyses PA = PreservedAnalyses::none();
// GlobalsAA is considered stateless and does not get invalidated unless
@@ -438,12 +430,12 @@ void HWAddressSanitizerPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<HWAddressSanitizerPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
if (Options.CompileKernel)
OS << "kernel;";
if (Options.Recover)
OS << "recover";
- OS << ">";
+ OS << '>';
}
void HWAddressSanitizer::createHwasanCtorComdat() {
@@ -594,6 +586,7 @@ void HWAddressSanitizer::initializeModule() {
} else if (CompileKernel) {
MatchAllTag = 0xFF;
}
+ UseMatchAllCallback = !CompileKernel && MatchAllTag.has_value();
// If we don't have personality function support, fall back to landing pads.
InstrumentLandingPads = ClInstrumentLandingPads.getNumOccurrences()
@@ -631,51 +624,73 @@ void HWAddressSanitizer::initializeModule() {
void HWAddressSanitizer::initializeCallbacks(Module &M) {
IRBuilder<> IRB(*C);
+ const std::string MatchAllStr = UseMatchAllCallback ? "_match_all" : "";
+ FunctionType *HwasanMemoryAccessCallbackSizedFnTy,
+ *HwasanMemoryAccessCallbackFnTy, *HwasanMemTransferFnTy,
+ *HwasanMemsetFnTy;
+ if (UseMatchAllCallback) {
+ HwasanMemoryAccessCallbackSizedFnTy =
+ FunctionType::get(VoidTy, {IntptrTy, IntptrTy, Int8Ty}, false);
+ HwasanMemoryAccessCallbackFnTy =
+ FunctionType::get(VoidTy, {IntptrTy, Int8Ty}, false);
+ HwasanMemTransferFnTy = FunctionType::get(
+ Int8PtrTy, {Int8PtrTy, Int8PtrTy, IntptrTy, Int8Ty}, false);
+ HwasanMemsetFnTy = FunctionType::get(
+ Int8PtrTy, {Int8PtrTy, Int32Ty, IntptrTy, Int8Ty}, false);
+ } else {
+ HwasanMemoryAccessCallbackSizedFnTy =
+ FunctionType::get(VoidTy, {IntptrTy, IntptrTy}, false);
+ HwasanMemoryAccessCallbackFnTy =
+ FunctionType::get(VoidTy, {IntptrTy}, false);
+ HwasanMemTransferFnTy =
+ FunctionType::get(Int8PtrTy, {Int8PtrTy, Int8PtrTy, IntptrTy}, false);
+ HwasanMemsetFnTy =
+ FunctionType::get(Int8PtrTy, {Int8PtrTy, Int32Ty, IntptrTy}, false);
+ }
+
for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {
const std::string TypeStr = AccessIsWrite ? "store" : "load";
const std::string EndingStr = Recover ? "_noabort" : "";
HwasanMemoryAccessCallbackSized[AccessIsWrite] = M.getOrInsertFunction(
- ClMemoryAccessCallbackPrefix + TypeStr + "N" + EndingStr,
- FunctionType::get(IRB.getVoidTy(), {IntptrTy, IntptrTy}, false));
+ ClMemoryAccessCallbackPrefix + TypeStr + "N" + MatchAllStr + EndingStr,
+ HwasanMemoryAccessCallbackSizedFnTy);
for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
AccessSizeIndex++) {
HwasanMemoryAccessCallback[AccessIsWrite][AccessSizeIndex] =
- M.getOrInsertFunction(
- ClMemoryAccessCallbackPrefix + TypeStr +
- itostr(1ULL << AccessSizeIndex) + EndingStr,
- FunctionType::get(IRB.getVoidTy(), {IntptrTy}, false));
+ M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + TypeStr +
+ itostr(1ULL << AccessSizeIndex) +
+ MatchAllStr + EndingStr,
+ HwasanMemoryAccessCallbackFnTy);
}
}
- HwasanTagMemoryFunc = M.getOrInsertFunction(
- "__hwasan_tag_memory", IRB.getVoidTy(), Int8PtrTy, Int8Ty, IntptrTy);
+ const std::string MemIntrinCallbackPrefix =
+ (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
+ ? std::string("")
+ : ClMemoryAccessCallbackPrefix;
+
+ HwasanMemmove = M.getOrInsertFunction(
+ MemIntrinCallbackPrefix + "memmove" + MatchAllStr, HwasanMemTransferFnTy);
+ HwasanMemcpy = M.getOrInsertFunction(
+ MemIntrinCallbackPrefix + "memcpy" + MatchAllStr, HwasanMemTransferFnTy);
+ HwasanMemset = M.getOrInsertFunction(
+ MemIntrinCallbackPrefix + "memset" + MatchAllStr, HwasanMemsetFnTy);
+
+ HwasanTagMemoryFunc = M.getOrInsertFunction("__hwasan_tag_memory", VoidTy,
+ Int8PtrTy, Int8Ty, IntptrTy);
HwasanGenerateTagFunc =
M.getOrInsertFunction("__hwasan_generate_tag", Int8Ty);
- HwasanRecordFrameRecordFunc = M.getOrInsertFunction(
- "__hwasan_add_frame_record", IRB.getVoidTy(), Int64Ty);
+ HwasanRecordFrameRecordFunc =
+ M.getOrInsertFunction("__hwasan_add_frame_record", VoidTy, Int64Ty);
- ShadowGlobal = M.getOrInsertGlobal("__hwasan_shadow",
- ArrayType::get(IRB.getInt8Ty(), 0));
+ ShadowGlobal =
+ M.getOrInsertGlobal("__hwasan_shadow", ArrayType::get(Int8Ty, 0));
- const std::string MemIntrinCallbackPrefix =
- (CompileKernel && !ClKasanMemIntrinCallbackPrefix)
- ? std::string("")
- : ClMemoryAccessCallbackPrefix;
- HWAsanMemmove = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memmove",
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy);
- HWAsanMemcpy = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memcpy",
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt8PtrTy(), IntptrTy);
- HWAsanMemset = M.getOrInsertFunction(MemIntrinCallbackPrefix + "memset",
- IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
- IRB.getInt32Ty(), IntptrTy);
-
- HWAsanHandleVfork =
- M.getOrInsertFunction("__hwasan_handle_vfork", IRB.getVoidTy(), IntptrTy);
+ HwasanHandleVfork =
+ M.getOrInsertFunction("__hwasan_handle_vfork", VoidTy, IntptrTy);
}
Value *HWAddressSanitizer::getOpaqueNoopCast(IRBuilder<> &IRB, Value *Val) {
@@ -788,7 +803,7 @@ static unsigned getPointerOperandIndex(Instruction *I) {
}
static size_t TypeSizeToSizeIndex(uint32_t TypeSize) {
- size_t Res = countTrailingZeros(TypeSize / 8);
+ size_t Res = llvm::countr_zero(TypeSize / 8);
assert(Res < kNumberOfAccessSizes);
return Res;
}
@@ -847,8 +862,8 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
IRBuilder<> IRB(InsertBefore);
Value *PtrLong = IRB.CreatePointerCast(Ptr, IntptrTy);
- Value *PtrTag = IRB.CreateTrunc(IRB.CreateLShr(PtrLong, PointerTagShift),
- IRB.getInt8Ty());
+ Value *PtrTag =
+ IRB.CreateTrunc(IRB.CreateLShr(PtrLong, PointerTagShift), Int8Ty);
Value *AddrLong = untagPointer(IRB, PtrLong);
Value *Shadow = memToShadow(AddrLong, IRB);
Value *MemTag = IRB.CreateLoad(Int8Ty, Shadow);
@@ -897,7 +912,7 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
case Triple::x86_64:
// The signal handler will find the data address in rdi.
Asm = InlineAsm::get(
- FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
+ FunctionType::get(VoidTy, {PtrLong->getType()}, false),
"int3\nnopl " +
itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)) +
"(%rax)",
@@ -908,7 +923,7 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
case Triple::aarch64_be:
// The signal handler will find the data address in x0.
Asm = InlineAsm::get(
- FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
+ FunctionType::get(VoidTy, {PtrLong->getType()}, false),
"brk #" + itostr(0x900 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
"{x0}",
/*hasSideEffects=*/true);
@@ -916,7 +931,7 @@ void HWAddressSanitizer::instrumentMemAccessInline(Value *Ptr, bool IsWrite,
case Triple::riscv64:
// The signal handler will find the data address in x10.
Asm = InlineAsm::get(
- FunctionType::get(IRB.getVoidTy(), {PtrLong->getType()}, false),
+ FunctionType::get(VoidTy, {PtrLong->getType()}, false),
"ebreak\naddiw x0, x11, " +
itostr(0x40 + (AccessInfo & HWASanAccessInfo::RuntimeMask)),
"{x10}",
@@ -943,17 +958,35 @@ bool HWAddressSanitizer::ignoreMemIntrinsic(MemIntrinsic *MI) {
void HWAddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
IRBuilder<> IRB(MI);
if (isa<MemTransferInst>(MI)) {
- IRB.CreateCall(
- isa<MemMoveInst>(MI) ? HWAsanMemmove : HWAsanMemcpy,
- {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
- IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ if (UseMatchAllCallback) {
+ IRB.CreateCall(
+ isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy,
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false),
+ ConstantInt::get(Int8Ty, *MatchAllTag)});
+ } else {
+ IRB.CreateCall(
+ isa<MemMoveInst>(MI) ? HwasanMemmove : HwasanMemcpy,
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreatePointerCast(MI->getOperand(1), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ }
} else if (isa<MemSetInst>(MI)) {
- IRB.CreateCall(
- HWAsanMemset,
- {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
- IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
- IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ if (UseMatchAllCallback) {
+ IRB.CreateCall(
+ HwasanMemset,
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false),
+ ConstantInt::get(Int8Ty, *MatchAllTag)});
+ } else {
+ IRB.CreateCall(
+ HwasanMemset,
+ {IRB.CreatePointerCast(MI->getOperand(0), IRB.getInt8PtrTy()),
+ IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),
+ IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});
+ }
}
MI->eraseFromParent();
}
@@ -967,23 +1000,40 @@ bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) {
return false; // FIXME
IRBuilder<> IRB(O.getInsn());
- if (isPowerOf2_64(O.TypeSize) &&
- (O.TypeSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
+ if (!O.TypeStoreSize.isScalable() && isPowerOf2_64(O.TypeStoreSize) &&
+ (O.TypeStoreSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) &&
(!O.Alignment || *O.Alignment >= Mapping.getObjectAlignment() ||
- *O.Alignment >= O.TypeSize / 8)) {
- size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeSize);
+ *O.Alignment >= O.TypeStoreSize / 8)) {
+ size_t AccessSizeIndex = TypeSizeToSizeIndex(O.TypeStoreSize);
if (InstrumentWithCalls) {
- IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
- IRB.CreatePointerCast(Addr, IntptrTy));
+ if (UseMatchAllCallback) {
+ IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
+ {IRB.CreatePointerCast(Addr, IntptrTy),
+ ConstantInt::get(Int8Ty, *MatchAllTag)});
+ } else {
+ IRB.CreateCall(HwasanMemoryAccessCallback[O.IsWrite][AccessSizeIndex],
+ IRB.CreatePointerCast(Addr, IntptrTy));
+ }
} else if (OutlinedChecks) {
instrumentMemAccessOutline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
} else {
instrumentMemAccessInline(Addr, O.IsWrite, AccessSizeIndex, O.getInsn());
}
} else {
- IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite],
- {IRB.CreatePointerCast(Addr, IntptrTy),
- ConstantInt::get(IntptrTy, O.TypeSize / 8)});
+ if (UseMatchAllCallback) {
+ IRB.CreateCall(
+ HwasanMemoryAccessCallbackSized[O.IsWrite],
+ {IRB.CreatePointerCast(Addr, IntptrTy),
+ IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize),
+ ConstantInt::get(IntptrTy, 8)),
+ ConstantInt::get(Int8Ty, *MatchAllTag)});
+ } else {
+ IRB.CreateCall(
+ HwasanMemoryAccessCallbackSized[O.IsWrite],
+ {IRB.CreatePointerCast(Addr, IntptrTy),
+ IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, O.TypeStoreSize),
+ ConstantInt::get(IntptrTy, 8))});
+ }
}
untagPointerOperand(O.getInsn(), Addr);
@@ -996,14 +1046,15 @@ void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
if (!UseShortGranules)
Size = AlignedSize;
- Value *JustTag = IRB.CreateTrunc(Tag, IRB.getInt8Ty());
+ Tag = IRB.CreateTrunc(Tag, Int8Ty);
if (InstrumentWithCalls) {
IRB.CreateCall(HwasanTagMemoryFunc,
- {IRB.CreatePointerCast(AI, Int8PtrTy), JustTag,
+ {IRB.CreatePointerCast(AI, Int8PtrTy), Tag,
ConstantInt::get(IntptrTy, AlignedSize)});
} else {
size_t ShadowSize = Size >> Mapping.Scale;
- Value *ShadowPtr = memToShadow(IRB.CreatePointerCast(AI, IntptrTy), IRB);
+ Value *AddrLong = untagPointer(IRB, IRB.CreatePointerCast(AI, IntptrTy));
+ Value *ShadowPtr = memToShadow(AddrLong, IRB);
// If this memset is not inlined, it will be intercepted in the hwasan
// runtime library. That's OK, because the interceptor skips the checks if
// the address is in the shadow region.
@@ -1011,14 +1062,14 @@ void HWAddressSanitizer::tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag,
// llvm.memset right here into either a sequence of stores, or a call to
// hwasan_tag_memory.
if (ShadowSize)
- IRB.CreateMemSet(ShadowPtr, JustTag, ShadowSize, Align(1));
+ IRB.CreateMemSet(ShadowPtr, Tag, ShadowSize, Align(1));
if (Size != AlignedSize) {
const uint8_t SizeRemainder = Size % Mapping.getObjectAlignment().value();
IRB.CreateStore(ConstantInt::get(Int8Ty, SizeRemainder),
IRB.CreateConstGEP1_32(Int8Ty, ShadowPtr, ShadowSize));
- IRB.CreateStore(JustTag, IRB.CreateConstGEP1_32(
- Int8Ty, IRB.CreateBitCast(AI, Int8PtrTy),
- AlignedSize - 1));
+ IRB.CreateStore(Tag, IRB.CreateConstGEP1_32(
+ Int8Ty, IRB.CreatePointerCast(AI, Int8PtrTy),
+ AlignedSize - 1));
}
}
}
@@ -1037,21 +1088,18 @@ unsigned HWAddressSanitizer::retagMask(unsigned AllocaNo) {
// mask allocated (temporally) nearby. The program that generated this list
// can be found at:
// https://github.com/google/sanitizers/blob/master/hwaddress-sanitizer/sort_masks.py
- static unsigned FastMasks[] = {0, 128, 64, 192, 32, 96, 224, 112, 240,
- 48, 16, 120, 248, 56, 24, 8, 124, 252,
- 60, 28, 12, 4, 126, 254, 62, 30, 14,
- 6, 2, 127, 63, 31, 15, 7, 3, 1};
+ static const unsigned FastMasks[] = {
+ 0, 128, 64, 192, 32, 96, 224, 112, 240, 48, 16, 120,
+ 248, 56, 24, 8, 124, 252, 60, 28, 12, 4, 126, 254,
+ 62, 30, 14, 6, 2, 127, 63, 31, 15, 7, 3, 1};
return FastMasks[AllocaNo % std::size(FastMasks)];
}
Value *HWAddressSanitizer::applyTagMask(IRBuilder<> &IRB, Value *OldTag) {
- if (TargetTriple.getArch() == Triple::x86_64) {
- Constant *TagMask = ConstantInt::get(IntptrTy, TagMaskByte);
- Value *NewTag = IRB.CreateAnd(OldTag, TagMask);
- return NewTag;
- }
- // aarch64 uses 8-bit tags, so no mask is needed.
- return OldTag;
+ if (TagMaskByte == 0xFF)
+ return OldTag; // No need to clear the tag byte.
+ return IRB.CreateAnd(OldTag,
+ ConstantInt::get(OldTag->getType(), TagMaskByte));
}
Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
@@ -1060,7 +1108,7 @@ Value *HWAddressSanitizer::getNextTagWithCall(IRBuilder<> &IRB) {
Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
if (ClGenerateTagsWithCalls)
- return getNextTagWithCall(IRB);
+ return nullptr;
if (StackBaseTag)
return StackBaseTag;
// Extract some entropy from the stack pointer for the tags.
@@ -1075,19 +1123,20 @@ Value *HWAddressSanitizer::getStackBaseTag(IRBuilder<> &IRB) {
}
Value *HWAddressSanitizer::getAllocaTag(IRBuilder<> &IRB, Value *StackTag,
- AllocaInst *AI, unsigned AllocaNo) {
+ unsigned AllocaNo) {
if (ClGenerateTagsWithCalls)
return getNextTagWithCall(IRB);
- return IRB.CreateXor(StackTag,
- ConstantInt::get(IntptrTy, retagMask(AllocaNo)));
+ return IRB.CreateXor(
+ StackTag, ConstantInt::get(StackTag->getType(), retagMask(AllocaNo)));
}
-Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB, Value *StackTag) {
- if (ClUARRetagToZero)
- return ConstantInt::get(IntptrTy, 0);
- if (ClGenerateTagsWithCalls)
- return getNextTagWithCall(IRB);
- return IRB.CreateXor(StackTag, ConstantInt::get(IntptrTy, TagMaskByte));
+Value *HWAddressSanitizer::getUARTag(IRBuilder<> &IRB) {
+ Value *StackPointerLong = getSP(IRB);
+ Value *UARTag =
+ applyTagMask(IRB, IRB.CreateLShr(StackPointerLong, PointerTagShift));
+
+ UARTag->setName("hwasan.uar.tag");
+ return UARTag;
}
// Add a tag to an address.
@@ -1117,12 +1166,12 @@ Value *HWAddressSanitizer::untagPointer(IRBuilder<> &IRB, Value *PtrLong) {
// Kernel addresses have 0xFF in the most significant byte.
UntaggedPtrLong =
IRB.CreateOr(PtrLong, ConstantInt::get(PtrLong->getType(),
- 0xFFULL << PointerTagShift));
+ TagMaskByte << PointerTagShift));
} else {
// Userspace addresses have 0x00.
- UntaggedPtrLong =
- IRB.CreateAnd(PtrLong, ConstantInt::get(PtrLong->getType(),
- ~(0xFFULL << PointerTagShift)));
+ UntaggedPtrLong = IRB.CreateAnd(
+ PtrLong, ConstantInt::get(PtrLong->getType(),
+ ~(TagMaskByte << PointerTagShift)));
}
return UntaggedPtrLong;
}
@@ -1135,8 +1184,7 @@ Value *HWAddressSanitizer::getHwasanThreadSlotPtr(IRBuilder<> &IRB, Type *Ty) {
Function *ThreadPointerFunc =
Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
Value *SlotPtr = IRB.CreatePointerCast(
- IRB.CreateConstGEP1_32(IRB.getInt8Ty(),
- IRB.CreateCall(ThreadPointerFunc), 0x30),
+ IRB.CreateConstGEP1_32(Int8Ty, IRB.CreateCall(ThreadPointerFunc), 0x30),
Ty->getPointerTo(0));
return SlotPtr;
}
@@ -1162,8 +1210,7 @@ Value *HWAddressSanitizer::getSP(IRBuilder<> &IRB) {
M, Intrinsic::frameaddress,
IRB.getInt8PtrTy(M->getDataLayout().getAllocaAddrSpace()));
CachedSP = IRB.CreatePtrToInt(
- IRB.CreateCall(GetStackPointerFn,
- {Constant::getNullValue(IRB.getInt32Ty())}),
+ IRB.CreateCall(GetStackPointerFn, {Constant::getNullValue(Int32Ty)}),
IntptrTy);
}
return CachedSP;
@@ -1280,7 +1327,7 @@ bool HWAddressSanitizer::instrumentLandingPads(
for (auto *LP : LandingPadVec) {
IRBuilder<> IRB(LP->getNextNode());
IRB.CreateCall(
- HWAsanHandleVfork,
+ HwasanHandleVfork,
{readRegister(IRB, (TargetTriple.getArch() == Triple::x86_64) ? "rsp"
: "sp")});
}
@@ -1293,7 +1340,7 @@ static bool isLifetimeIntrinsic(Value *V) {
}
bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
- Value *StackTag,
+ Value *StackTag, Value *UARTag,
const DominatorTree &DT,
const PostDominatorTree &PDT,
const LoopInfo &LI) {
@@ -1311,9 +1358,10 @@ bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
IRBuilder<> IRB(AI->getNextNode());
// Replace uses of the alloca with tagged address.
- Value *Tag = getAllocaTag(IRB, StackTag, AI, N);
+ Value *Tag = getAllocaTag(IRB, StackTag, N);
Value *AILong = IRB.CreatePointerCast(AI, IntptrTy);
- Value *Replacement = tagPointer(IRB, AI->getType(), AILong, Tag);
+ Value *AINoTagLong = untagPointer(IRB, AILong);
+ Value *Replacement = tagPointer(IRB, AI->getType(), AINoTagLong, Tag);
std::string Name =
AI->hasName() ? AI->getName().str() : "alloca." + itostr(N);
Replacement->setName(Name + ".hwasan");
@@ -1340,7 +1388,7 @@ bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
llvm::for_each(Info.LifetimeStart, HandleLifetime);
llvm::for_each(Info.LifetimeEnd, HandleLifetime);
- AI->replaceUsesWithIf(Replacement, [AICast, AILong](Use &U) {
+ AI->replaceUsesWithIf(Replacement, [AICast, AILong](const Use &U) {
auto *User = U.getUser();
return User != AILong && User != AICast && !isLifetimeIntrinsic(User);
});
@@ -1359,9 +1407,8 @@ bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
auto TagEnd = [&](Instruction *Node) {
IRB.SetInsertPoint(Node);
- Value *UARTag = getUARTag(IRB, StackTag);
// When untagging, use the `AlignedSize` because we need to set the tags
- // for the entire alloca to zero. If we used `Size` here, we would
+ // for the entire alloca to original. If we used `Size` here, we would
// keep the last granule tagged, and store zero in the last byte of the
// last granule, due to how short granules are implemented.
tagAlloca(IRB, AI, UARTag, AlignedSize);
@@ -1402,13 +1449,13 @@ bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo,
return true;
}
-bool HWAddressSanitizer::sanitizeFunction(Function &F,
+void HWAddressSanitizer::sanitizeFunction(Function &F,
FunctionAnalysisManager &FAM) {
if (&F == HwasanCtorFunction)
- return false;
+ return;
if (!F.hasFnAttribute(Attribute::SanitizeHWAddress))
- return false;
+ return;
LLVM_DEBUG(dbgs() << "Function: " << F.getName() << "\n");
@@ -1436,22 +1483,19 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F,
initializeCallbacks(*F.getParent());
- bool Changed = false;
-
if (!LandingPadVec.empty())
- Changed |= instrumentLandingPads(LandingPadVec);
+ instrumentLandingPads(LandingPadVec);
if (SInfo.AllocasToInstrument.empty() && F.hasPersonalityFn() &&
F.getPersonalityFn()->getName() == kHwasanPersonalityThunkName) {
// __hwasan_personality_thunk is a no-op for functions without an
// instrumented stack, so we can drop it.
F.setPersonalityFn(nullptr);
- Changed = true;
}
if (SInfo.AllocasToInstrument.empty() && OperandsToInstrument.empty() &&
IntrinToInstrument.empty())
- return Changed;
+ return;
assert(!ShadowBase);
@@ -1466,9 +1510,9 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F,
const DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F);
const PostDominatorTree &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
const LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
- Value *StackTag =
- ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
- instrumentStack(SInfo, StackTag, DT, PDT, LI);
+ Value *StackTag = getStackBaseTag(EntryIRB);
+ Value *UARTag = getUARTag(EntryIRB);
+ instrumentStack(SInfo, StackTag, UARTag, DT, PDT, LI);
}
// If we split the entry block, move any allocas that were originally in the
@@ -1495,8 +1539,6 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F,
ShadowBase = nullptr;
StackBaseTag = nullptr;
CachedSP = nullptr;
-
- return true;
}
void HWAddressSanitizer::instrumentGlobal(GlobalVariable *GV, uint8_t Tag) {
@@ -1605,11 +1647,14 @@ void HWAddressSanitizer::instrumentGlobals() {
Hasher.final(Hash);
uint8_t Tag = Hash[0];
+ assert(TagMaskByte >= 16);
+
for (GlobalVariable *GV : Globals) {
- Tag &= TagMaskByte;
- // Skip tag 0 in order to avoid collisions with untagged memory.
- if (Tag == 0)
- Tag = 1;
+ // Don't allow globals to be tagged with something that looks like a
+ // short-granule tag, otherwise we lose inter-granule overflow detection, as
+ // the fast path shadow-vs-address check succeeds.
+ if (Tag < 16 || Tag > TagMaskByte)
+ Tag = 16;
instrumentGlobal(GV, Tag++);
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index b66e761d53b0..5c9799235017 100644
--- a/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -104,25 +104,24 @@ static cl::opt<bool>
namespace {
-// The class for main data structure to promote indirect calls to conditional
-// direct calls.
-class ICallPromotionFunc {
+// Promote indirect calls to conditional direct calls, keeping track of
+// thresholds.
+class IndirectCallPromoter {
private:
Function &F;
- Module *M;
// Symtab that maps indirect call profile values to function names and
// defines.
- InstrProfSymtab *Symtab;
+ InstrProfSymtab *const Symtab;
- bool SamplePGO;
+ const bool SamplePGO;
OptimizationRemarkEmitter &ORE;
// A struct that records the direct target and it's call count.
struct PromotionCandidate {
- Function *TargetFunction;
- uint64_t Count;
+ Function *const TargetFunction;
+ const uint64_t Count;
PromotionCandidate(Function *F, uint64_t C) : TargetFunction(F), Count(C) {}
};
@@ -143,11 +142,11 @@ private:
uint64_t &TotalCount);
public:
- ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab,
- bool SamplePGO, OptimizationRemarkEmitter &ORE)
- : F(Func), M(Modu), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
- ICallPromotionFunc(const ICallPromotionFunc &) = delete;
- ICallPromotionFunc &operator=(const ICallPromotionFunc &) = delete;
+ IndirectCallPromoter(Function &Func, InstrProfSymtab *Symtab, bool SamplePGO,
+ OptimizationRemarkEmitter &ORE)
+ : F(Func), Symtab(Symtab), SamplePGO(SamplePGO), ORE(ORE) {}
+ IndirectCallPromoter(const IndirectCallPromoter &) = delete;
+ IndirectCallPromoter &operator=(const IndirectCallPromoter &) = delete;
bool processFunction(ProfileSummaryInfo *PSI);
};
@@ -156,8 +155,8 @@ public:
// Indirect-call promotion heuristic. The direct targets are sorted based on
// the count. Stop at the first target that is not promoted.
-std::vector<ICallPromotionFunc::PromotionCandidate>
-ICallPromotionFunc::getPromotionCandidatesForCallSite(
+std::vector<IndirectCallPromoter::PromotionCandidate>
+IndirectCallPromoter::getPromotionCandidatesForCallSite(
const CallBase &CB, const ArrayRef<InstrProfValueData> &ValueDataRef,
uint64_t TotalCount, uint32_t NumCandidates) {
std::vector<PromotionCandidate> Ret;
@@ -276,7 +275,7 @@ CallBase &llvm::pgo::promoteIndirectCall(CallBase &CB, Function *DirectCallee,
}
// Promote indirect-call to conditional direct-call for one callsite.
-uint32_t ICallPromotionFunc::tryToPromote(
+uint32_t IndirectCallPromoter::tryToPromote(
CallBase &CB, const std::vector<PromotionCandidate> &Candidates,
uint64_t &TotalCount) {
uint32_t NumPromoted = 0;
@@ -295,7 +294,7 @@ uint32_t ICallPromotionFunc::tryToPromote(
// Traverse all the indirect-call callsite and get the value profile
// annotation to perform indirect-call promotion.
-bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) {
+bool IndirectCallPromoter::processFunction(ProfileSummaryInfo *PSI) {
bool Changed = false;
ICallPromotionAnalysis ICallAnalysis;
for (auto *CB : findIndirectCalls(F)) {
@@ -319,16 +318,15 @@ bool ICallPromotionFunc::processFunction(ProfileSummaryInfo *PSI) {
if (TotalCount == 0 || NumPromoted == NumVals)
continue;
// Otherwise we need update with the un-promoted records back.
- annotateValueSite(*M, *CB, ICallProfDataRef.slice(NumPromoted), TotalCount,
- IPVK_IndirectCallTarget, NumCandidates);
+ annotateValueSite(*F.getParent(), *CB, ICallProfDataRef.slice(NumPromoted),
+ TotalCount, IPVK_IndirectCallTarget, NumCandidates);
}
return Changed;
}
// A wrapper function that does the actual work.
-static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI,
- bool InLTO, bool SamplePGO,
- ModuleAnalysisManager *AM = nullptr) {
+static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI, bool InLTO,
+ bool SamplePGO, ModuleAnalysisManager &MAM) {
if (DisableICP)
return false;
InstrProfSymtab Symtab;
@@ -342,19 +340,12 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI,
if (F.isDeclaration() || F.hasOptNone())
continue;
- std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
- OptimizationRemarkEmitter *ORE;
- if (AM) {
- auto &FAM =
- AM->getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- } else {
- OwnedORE = std::make_unique<OptimizationRemarkEmitter>(&F);
- ORE = OwnedORE.get();
- }
+ auto &FAM =
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
- ICallPromotionFunc ICallPromotion(F, &M, &Symtab, SamplePGO, *ORE);
- bool FuncChanged = ICallPromotion.processFunction(PSI);
+ IndirectCallPromoter CallPromoter(F, &Symtab, SamplePGO, ORE);
+ bool FuncChanged = CallPromoter.processFunction(PSI);
if (ICPDUMPAFTER && FuncChanged) {
LLVM_DEBUG(dbgs() << "\n== IR Dump After =="; F.print(dbgs()));
LLVM_DEBUG(dbgs() << "\n");
@@ -369,11 +360,11 @@ static bool promoteIndirectCalls(Module &M, ProfileSummaryInfo *PSI,
}
PreservedAnalyses PGOIndirectCallPromotion::run(Module &M,
- ModuleAnalysisManager &AM) {
- ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+ ModuleAnalysisManager &MAM) {
+ ProfileSummaryInfo *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
if (!promoteIndirectCalls(M, PSI, InLTO | ICPLTOMode,
- SamplePGO | ICPSamplePGOMode, &AM))
+ SamplePGO | ICPSamplePGOMode, MAM))
return PreservedAnalyses::all();
return PreservedAnalyses::none();
diff --git a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
index d7561c193aa3..6882dd83f429 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrOrderFile.cpp
@@ -15,9 +15,6 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index c0409206216e..a7b1953ce81c 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -16,7 +16,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
@@ -47,6 +46,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include <algorithm>
@@ -421,6 +421,9 @@ bool InstrProfiling::lowerIntrinsics(Function *F) {
} else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(&Instr)) {
lowerIncrement(IPI);
MadeChange = true;
+ } else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(&Instr)) {
+ lowerTimestamp(IPC);
+ MadeChange = true;
} else if (auto *IPC = dyn_cast<InstrProfCoverInst>(&Instr)) {
lowerCover(IPC);
MadeChange = true;
@@ -510,6 +513,7 @@ static bool containsProfilingIntrinsics(Module &M) {
return containsIntrinsic(llvm::Intrinsic::instrprof_cover) ||
containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||
containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) ||
+ containsIntrinsic(llvm::Intrinsic::instrprof_timestamp) ||
containsIntrinsic(llvm::Intrinsic::instrprof_value_profile);
}
@@ -540,18 +544,19 @@ bool InstrProfiling::run(
// the instrumented function. This is counting the number of instrumented
// target value sites to enter it as field in the profile data variable.
for (Function &F : M) {
- InstrProfIncrementInst *FirstProfIncInst = nullptr;
+ InstrProfInstBase *FirstProfInst = nullptr;
for (BasicBlock &BB : F)
for (auto I = BB.begin(), E = BB.end(); I != E; I++)
if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))
computeNumValueSiteCounts(Ind);
- else if (FirstProfIncInst == nullptr)
- FirstProfIncInst = dyn_cast<InstrProfIncrementInst>(I);
+ else if (FirstProfInst == nullptr &&
+ (isa<InstrProfIncrementInst>(I) || isa<InstrProfCoverInst>(I)))
+ FirstProfInst = dyn_cast<InstrProfInstBase>(I);
// Value profiling intrinsic lowering requires per-function profile data
// variable to be created first.
- if (FirstProfIncInst != nullptr)
- static_cast<void>(getOrCreateRegionCounters(FirstProfIncInst));
+ if (FirstProfInst != nullptr)
+ static_cast<void>(getOrCreateRegionCounters(FirstProfInst));
}
for (Function &F : M)
@@ -669,6 +674,9 @@ Value *InstrProfiling::getCounterAddress(InstrProfInstBase *I) {
auto *Counters = getOrCreateRegionCounters(I);
IRBuilder<> Builder(I);
+ if (isa<InstrProfTimestampInst>(I))
+ Counters->setAlignment(Align(8));
+
auto *Addr = Builder.CreateConstInBoundsGEP2_32(
Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue());
@@ -710,6 +718,21 @@ void InstrProfiling::lowerCover(InstrProfCoverInst *CoverInstruction) {
CoverInstruction->eraseFromParent();
}
+void InstrProfiling::lowerTimestamp(
+ InstrProfTimestampInst *TimestampInstruction) {
+ assert(TimestampInstruction->getIndex()->isZeroValue() &&
+ "timestamp probes are always the first probe for a function");
+ auto &Ctx = M->getContext();
+ auto *TimestampAddr = getCounterAddress(TimestampInstruction);
+ IRBuilder<> Builder(TimestampInstruction);
+ auto *CalleeTy =
+ FunctionType::get(Type::getVoidTy(Ctx), TimestampAddr->getType(), false);
+ auto Callee = M->getOrInsertFunction(
+ INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), CalleeTy);
+ Builder.CreateCall(Callee, {TimestampAddr});
+ TimestampInstruction->eraseFromParent();
+}
+
void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
auto *Addr = getCounterAddress(Inc);
@@ -823,6 +846,72 @@ static inline bool shouldRecordFunctionAddr(Function *F) {
return F->hasAddressTaken() || F->hasLinkOnceLinkage();
}
+static inline bool shouldUsePublicSymbol(Function *Fn) {
+ // It isn't legal to make an alias of this function at all
+ if (Fn->isDeclarationForLinker())
+ return true;
+
+ // Symbols with local linkage can just use the symbol directly without
+ // introducing relocations
+ if (Fn->hasLocalLinkage())
+ return true;
+
+ // PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some
+ // unfavorable interaction between the new alias and the alias renaming done
+ // in LowerTypeTests under ThinLTO. For comdat functions that would normally
+ // be deduplicated, but the renaming scheme ends up preventing renaming, since
+ // it creates unique names for each alias, resulting in duplicated symbols. In
+ // the future, we should update the CFI related passes to migrate these
+ // aliases to the same module as the jump-table they refer to will be defined.
+ if (Fn->hasMetadata(LLVMContext::MD_type))
+ return true;
+
+ // For comdat functions, an alias would need the same linkage as the original
+ // function and hidden visibility. There is no point in adding an alias with
+ // identical linkage an visibility to avoid introducing symbolic relocations.
+ if (Fn->hasComdat() &&
+ (Fn->getVisibility() == GlobalValue::VisibilityTypes::HiddenVisibility))
+ return true;
+
+ // its OK to use an alias
+ return false;
+}
+
+static inline Constant *getFuncAddrForProfData(Function *Fn) {
+ auto *Int8PtrTy = Type::getInt8PtrTy(Fn->getContext());
+ // Store a nullptr in __llvm_profd, if we shouldn't use a real address
+ if (!shouldRecordFunctionAddr(Fn))
+ return ConstantPointerNull::get(Int8PtrTy);
+
+ // If we can't use an alias, we must use the public symbol, even though this
+ // may require a symbolic relocation.
+ if (shouldUsePublicSymbol(Fn))
+ return ConstantExpr::getBitCast(Fn, Int8PtrTy);
+
+ // When possible use a private alias to avoid symbolic relocations.
+ auto *GA = GlobalAlias::create(GlobalValue::LinkageTypes::PrivateLinkage,
+ Fn->getName() + ".local", Fn);
+
+ // When the instrumented function is a COMDAT function, we cannot use a
+ // private alias. If we did, we would create reference to a local label in
+ // this function's section. If this version of the function isn't selected by
+ // the linker, then the metadata would introduce a reference to a discarded
+ // section. So, for COMDAT functions, we need to adjust the linkage of the
+ // alias. Using hidden visibility avoids a dynamic relocation and an entry in
+ // the dynamic symbol table.
+ //
+ // Note that this handles COMDAT functions with visibility other than Hidden,
+ // since that case is covered in shouldUsePublicSymbol()
+ if (Fn->hasComdat()) {
+ GA->setLinkage(Fn->getLinkage());
+ GA->setVisibility(GlobalValue::VisibilityTypes::HiddenVisibility);
+ }
+
+ // appendToCompilerUsed(*Fn->getParent(), {GA});
+
+ return ConstantExpr::getBitCast(GA, Int8PtrTy);
+}
+
static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {
// Don't do this for Darwin. compiler-rt uses linker magic.
if (TT.isOSDarwin())
@@ -1014,9 +1103,7 @@ InstrProfiling::getOrCreateRegionCounters(InstrProfInstBase *Inc) {
};
auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));
- Constant *FunctionAddr = shouldRecordFunctionAddr(Fn)
- ? ConstantExpr::getBitCast(Fn, Int8PtrTy)
- : ConstantPointerNull::get(Int8PtrTy);
+ Constant *FunctionAddr = getFuncAddrForProfData(Fn);
Constant *Int16ArrayVals[IPVK_Last + 1];
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
@@ -1116,6 +1203,7 @@ void InstrProfiling::emitVNodes() {
Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());
VNodesVar->setSection(
getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));
+ VNodesVar->setAlignment(M->getDataLayout().getABITypeAlign(VNodesTy));
// VNodesVar is used by runtime but not referenced via relocation by other
// sections. Conservatively make it linker retained.
UsedVars.push_back(VNodesVar);
diff --git a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
index ab72650ae801..806afc8fcdf7 100644
--- a/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/Instrumentation.cpp
@@ -12,12 +12,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Instrumentation.h"
-#include "llvm-c/Initialization.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/PassRegistry.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Instrumentation/KCFI.cpp b/llvm/lib/Transforms/Instrumentation/KCFI.cpp
index 7978c766f0f0..b1a26880c701 100644
--- a/llvm/lib/Transforms/Instrumentation/KCFI.cpp
+++ b/llvm/lib/Transforms/Instrumentation/KCFI.cpp
@@ -24,10 +24,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
@@ -76,6 +73,7 @@ PreservedAnalyses KCFIPass::run(Function &F, FunctionAnalysisManager &AM) {
IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
MDNode *VeryUnlikelyWeights =
MDBuilder(Ctx).createBranchWeights(1, (1U << 20) - 1);
+ Triple T(M.getTargetTriple());
for (CallInst *CI : KCFICalls) {
// Get the expected hash value.
@@ -96,14 +94,24 @@ PreservedAnalyses KCFIPass::run(Function &F, FunctionAnalysisManager &AM) {
// Emit a check and trap if the target hash doesn't match.
IRBuilder<> Builder(Call);
- Value *HashPtr = Builder.CreateConstInBoundsGEP1_32(
- Int32Ty, Call->getCalledOperand(), -1);
+ Value *FuncPtr = Call->getCalledOperand();
+ // ARM uses the least significant bit of the function pointer to select
+ // between ARM and Thumb modes for the callee. Instructions are always
+ // at least 16-bit aligned, so clear the LSB before we compute the hash
+ // location.
+ if (T.isARM() || T.isThumb()) {
+ FuncPtr = Builder.CreateIntToPtr(
+ Builder.CreateAnd(Builder.CreatePtrToInt(FuncPtr, Int32Ty),
+ ConstantInt::get(Int32Ty, -2)),
+ FuncPtr->getType());
+ }
+ Value *HashPtr = Builder.CreateConstInBoundsGEP1_32(Int32Ty, FuncPtr, -1);
Value *Test = Builder.CreateICmpNE(Builder.CreateLoad(Int32Ty, HashPtr),
ConstantInt::get(Int32Ty, ExpectedHash));
Instruction *ThenTerm =
SplitBlockAndInsertIfThen(Test, Call, false, VeryUnlikelyWeights);
Builder.SetInsertPoint(ThenTerm);
- Builder.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::trap));
+ Builder.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::debugtrap));
++NumKCFIChecks;
}
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 2a1601fab45f..789ed005d03d 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -18,10 +18,12 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
@@ -30,18 +32,30 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/BLAKE3.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/HashBuilder.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <map>
+#include <set>
using namespace llvm;
+using namespace llvm::memprof;
#define DEBUG_TYPE "memprof"
+namespace llvm {
+extern cl::opt<bool> PGOWarnMissing;
+extern cl::opt<bool> NoPGOWarnMismatch;
+extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
+} // namespace llvm
+
constexpr int LLVM_MEM_PROFILER_VERSION = 1;
// Size of memory mapped to a single shadow location.
@@ -130,6 +144,7 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
+STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
namespace {
@@ -603,3 +618,297 @@ bool MemProfiler::instrumentFunction(Function &F) {
return FunctionModified;
}
+
+static void addCallsiteMetadata(Instruction &I,
+ std::vector<uint64_t> &InlinedCallStack,
+ LLVMContext &Ctx) {
+ I.setMetadata(LLVMContext::MD_callsite,
+ buildCallstackMetadata(InlinedCallStack, Ctx));
+}
+
+static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
+ uint32_t Column) {
+ llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
+ HashBuilder;
+ HashBuilder.add(Function, LineOffset, Column);
+ llvm::BLAKE3Result<8> Hash = HashBuilder.final();
+ uint64_t Id;
+ std::memcpy(&Id, Hash.data(), sizeof(Hash));
+ return Id;
+}
+
+static uint64_t computeStackId(const memprof::Frame &Frame) {
+ return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
+}
+
+static void addCallStack(CallStackTrie &AllocTrie,
+ const AllocationInfo *AllocInfo) {
+ SmallVector<uint64_t> StackIds;
+ for (const auto &StackFrame : AllocInfo->CallStack)
+ StackIds.push_back(computeStackId(StackFrame));
+ auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
+ AllocInfo->Info.getAllocCount(),
+ AllocInfo->Info.getTotalLifetime());
+ AllocTrie.addCallStack(AllocType, StackIds);
+}
+
+// Helper to compare the InlinedCallStack computed from an instruction's debug
+// info to a list of Frames from profile data (either the allocation data or a
+// callsite). For callsites, the StartIndex to use in the Frame array may be
+// non-zero.
+static bool
+stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
+ ArrayRef<uint64_t> InlinedCallStack,
+ unsigned StartIndex = 0) {
+ auto StackFrame = ProfileCallStack.begin() + StartIndex;
+ auto InlCallStackIter = InlinedCallStack.begin();
+ for (; StackFrame != ProfileCallStack.end() &&
+ InlCallStackIter != InlinedCallStack.end();
+ ++StackFrame, ++InlCallStackIter) {
+ uint64_t StackId = computeStackId(*StackFrame);
+ if (StackId != *InlCallStackIter)
+ return false;
+ }
+ // Return true if we found and matched all stack ids from the call
+ // instruction.
+ return InlCallStackIter == InlinedCallStack.end();
+}
+
+static void readMemprof(Module &M, Function &F,
+ IndexedInstrProfReader *MemProfReader,
+ const TargetLibraryInfo &TLI) {
+ auto &Ctx = M.getContext();
+
+ auto FuncName = getPGOFuncName(F);
+ auto FuncGUID = Function::getGUID(FuncName);
+ Expected<memprof::MemProfRecord> MemProfResult =
+ MemProfReader->getMemProfRecord(FuncGUID);
+ if (Error E = MemProfResult.takeError()) {
+ handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
+ auto Err = IPE.get();
+ bool SkipWarning = false;
+ LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
+ << ": ");
+ if (Err == instrprof_error::unknown_function) {
+ NumOfMemProfMissing++;
+ SkipWarning = !PGOWarnMissing;
+ LLVM_DEBUG(dbgs() << "unknown function");
+ } else if (Err == instrprof_error::hash_mismatch) {
+ SkipWarning =
+ NoPGOWarnMismatch ||
+ (NoPGOWarnMismatchComdatWeak &&
+ (F.hasComdat() ||
+ F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
+ LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
+ }
+
+ if (SkipWarning)
+ return;
+
+ std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
+ Twine(" Hash = ") + std::to_string(FuncGUID))
+ .str();
+
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
+ });
+ return;
+ }
+
+ // Build maps of the location hash to all profile data with that leaf location
+ // (allocation info and the callsites).
+ std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
+ // For the callsites we need to record the index of the associated frame in
+ // the frame array (see comments below where the map entries are added).
+ std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
+ LocHashToCallSites;
+ const auto MemProfRec = std::move(MemProfResult.get());
+ for (auto &AI : MemProfRec.AllocSites) {
+ // Associate the allocation info with the leaf frame. The later matching
+ // code will match any inlined call sequences in the IR with a longer prefix
+ // of call stack frames.
+ uint64_t StackId = computeStackId(AI.CallStack[0]);
+ LocHashToAllocInfo[StackId].insert(&AI);
+ }
+ for (auto &CS : MemProfRec.CallSites) {
+ // Need to record all frames from leaf up to and including this function,
+ // as any of these may or may not have been inlined at this point.
+ unsigned Idx = 0;
+ for (auto &StackFrame : CS) {
+ uint64_t StackId = computeStackId(StackFrame);
+ LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
+ // Once we find this function, we can stop recording.
+ if (StackFrame.Function == FuncGUID)
+ break;
+ }
+ assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
+ }
+
+ auto GetOffset = [](const DILocation *DIL) {
+ return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
+ 0xffff;
+ };
+
+ // Now walk the instructions, looking up the associated profile data using
+ // dbug locations.
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (I.isDebugOrPseudoInst())
+ continue;
+ // We are only interested in calls (allocation or interior call stack
+ // context calls).
+ auto *CI = dyn_cast<CallBase>(&I);
+ if (!CI)
+ continue;
+ auto *CalledFunction = CI->getCalledFunction();
+ if (CalledFunction && CalledFunction->isIntrinsic())
+ continue;
+ // List of call stack ids computed from the location hashes on debug
+ // locations (leaf to inlined at root).
+ std::vector<uint64_t> InlinedCallStack;
+ // Was the leaf location found in one of the profile maps?
+ bool LeafFound = false;
+ // If leaf was found in a map, iterators pointing to its location in both
+ // of the maps. It might exist in neither, one, or both (the latter case
+ // can happen because we don't currently have discriminators to
+ // distinguish the case when a single line/col maps to both an allocation
+ // and another callsite).
+ std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
+ AllocInfoIter;
+ std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
+ unsigned>>>::iterator CallSitesIter;
+ for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
+ DIL = DIL->getInlinedAt()) {
+ // Use C++ linkage name if possible. Need to compile with
+ // -fdebug-info-for-profiling to get linkage name.
+ StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
+ if (Name.empty())
+ Name = DIL->getScope()->getSubprogram()->getName();
+ auto CalleeGUID = Function::getGUID(Name);
+ auto StackId =
+ computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
+ // LeafFound will only be false on the first iteration, since we either
+ // set it true or break out of the loop below.
+ if (!LeafFound) {
+ AllocInfoIter = LocHashToAllocInfo.find(StackId);
+ CallSitesIter = LocHashToCallSites.find(StackId);
+ // Check if the leaf is in one of the maps. If not, no need to look
+ // further at this call.
+ if (AllocInfoIter == LocHashToAllocInfo.end() &&
+ CallSitesIter == LocHashToCallSites.end())
+ break;
+ LeafFound = true;
+ }
+ InlinedCallStack.push_back(StackId);
+ }
+ // If leaf not in either of the maps, skip inst.
+ if (!LeafFound)
+ continue;
+
+ // First add !memprof metadata from allocation info, if we found the
+ // instruction's leaf location in that map, and if the rest of the
+ // instruction's locations match the prefix Frame locations on an
+ // allocation context with the same leaf.
+ if (AllocInfoIter != LocHashToAllocInfo.end()) {
+ // Only consider allocations via new, to reduce unnecessary metadata,
+ // since those are the only allocations that will be targeted initially.
+ if (!isNewLikeFn(CI, &TLI))
+ continue;
+ // We may match this instruction's location list to multiple MIB
+ // contexts. Add them to a Trie specialized for trimming the contexts to
+ // the minimal needed to disambiguate contexts with unique behavior.
+ CallStackTrie AllocTrie;
+ for (auto *AllocInfo : AllocInfoIter->second) {
+ // Check the full inlined call stack against this one.
+ // If we found and thus matched all frames on the call, include
+ // this MIB.
+ if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
+ InlinedCallStack))
+ addCallStack(AllocTrie, AllocInfo);
+ }
+ // We might not have matched any to the full inlined call stack.
+ // But if we did, create and attach metadata, or a function attribute if
+ // all contexts have identical profiled behavior.
+ if (!AllocTrie.empty()) {
+ // MemprofMDAttached will be false if a function attribute was
+ // attached.
+ bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
+ assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
+ if (MemprofMDAttached) {
+ // Add callsite metadata for the instruction's location list so that
+ // it simpler later on to identify which part of the MIB contexts
+ // are from this particular instruction (including during inlining,
+ // when the callsite metdata will be updated appropriately).
+ // FIXME: can this be changed to strip out the matching stack
+ // context ids from the MIB contexts and not add any callsite
+ // metadata here to save space?
+ addCallsiteMetadata(I, InlinedCallStack, Ctx);
+ }
+ }
+ continue;
+ }
+
+ // Otherwise, add callsite metadata. If we reach here then we found the
+ // instruction's leaf location in the callsites map and not the allocation
+ // map.
+ assert(CallSitesIter != LocHashToCallSites.end());
+ for (auto CallStackIdx : CallSitesIter->second) {
+ // If we found and thus matched all frames on the call, create and
+ // attach call stack metadata.
+ if (stackFrameIncludesInlinedCallStack(
+ *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
+ addCallsiteMetadata(I, InlinedCallStack, Ctx);
+ // Only need to find one with a matching call stack and add a single
+ // callsite metadata.
+ break;
+ }
+ }
+ }
+ }
+}
+
+MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,
+ IntrusiveRefCntPtr<vfs::FileSystem> FS)
+ : MemoryProfileFileName(MemoryProfileFile), FS(FS) {
+ if (!FS)
+ this->FS = vfs::getRealFileSystem();
+}
+
+PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {
+ LLVM_DEBUG(dbgs() << "Read in memory profile:");
+ auto &Ctx = M.getContext();
+ auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);
+ if (Error E = ReaderOrErr.takeError()) {
+ handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));
+ });
+ return PreservedAnalyses::all();
+ }
+
+ std::unique_ptr<IndexedInstrProfReader> MemProfReader =
+ std::move(ReaderOrErr.get());
+ if (!MemProfReader) {
+ Ctx.diagnose(DiagnosticInfoPGOProfile(
+ MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));
+ return PreservedAnalyses::all();
+ }
+
+ if (!MemProfReader->hasMemoryProfile()) {
+ Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),
+ "Not a memory profile"));
+ return PreservedAnalyses::all();
+ }
+
+ auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+
+ const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+ readMemprof(M, F, MemProfReader.get(), TLI);
+ }
+
+ return PreservedAnalyses::none();
+}
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index fe8b8ce0dc86..83d90049abc3 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -122,6 +122,10 @@
/// Arbitrary sized accesses are handled with:
/// __msan_metadata_ptr_for_load_n(ptr, size)
/// __msan_metadata_ptr_for_store_n(ptr, size);
+/// Note that the sanitizer code has to deal with how shadow/origin pairs
+/// returned by the these functions are represented in different ABIs. In
+/// the X86_64 ABI they are returned in RDX:RAX, and in the SystemZ ABI they
+/// are written to memory pointed to by a hidden parameter.
/// - TLS variables are stored in a single per-task struct. A call to a
/// function __msan_get_context_state() returning a pointer to that struct
/// is inserted into every instrumented function before the entry block;
@@ -135,7 +139,7 @@
/// Also, KMSAN currently ignores uninitialized memory passed into inline asm
/// calls, making sure we're on the safe side wrt. possible false positives.
///
-/// KernelMemorySanitizer only supports X86_64 at the moment.
+/// KernelMemorySanitizer only supports X86_64 and SystemZ at the moment.
///
//
// FIXME: This sanitizer does not yet handle scalable vectors
@@ -152,11 +156,11 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Argument.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallingConv.h"
@@ -190,6 +194,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -434,6 +439,14 @@ static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
0x0200000000000, // OriginBase
};
+// loongarch64 Linux
+static const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
+ 0, // AndMask (not used)
+ 0x500000000000, // XorMask
+ 0, // ShadowBase (not used)
+ 0x100000000000, // OriginBase
+};
+
// aarch64 FreeBSD
static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = {
0x1800000000000, // AndMask
@@ -491,6 +504,11 @@ static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
&Linux_AArch64_MemoryMapParams,
};
+static const PlatformMemoryMapParams Linux_LoongArch_MemoryMapParams = {
+ nullptr,
+ &Linux_LoongArch64_MemoryMapParams,
+};
+
static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = {
nullptr,
&FreeBSD_AArch64_MemoryMapParams,
@@ -543,6 +561,10 @@ private:
void createKernelApi(Module &M, const TargetLibraryInfo &TLI);
void createUserspaceApi(Module &M, const TargetLibraryInfo &TLI);
+ template <typename... ArgsTy>
+ FunctionCallee getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
+ ArgsTy... Args);
+
/// True if we're compiling the Linux kernel.
bool CompileKernel;
/// Track origins (allocation points) of uninitialized values.
@@ -550,6 +572,7 @@ private:
bool Recover;
bool EagerChecks;
+ Triple TargetTriple;
LLVMContext *C;
Type *IntptrTy;
Type *OriginTy;
@@ -620,13 +643,18 @@ private:
/// Functions for poisoning/unpoisoning local variables
FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
- /// Each of the MsanMetadataPtrXxx functions returns a pair of shadow/origin
- /// pointers.
+ /// Pair of shadow/origin pointers.
+ Type *MsanMetadata;
+
+ /// Each of the MsanMetadataPtrXxx functions returns a MsanMetadata.
FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
FunctionCallee MsanMetadataPtrForLoad_1_8[4];
FunctionCallee MsanMetadataPtrForStore_1_8[4];
FunctionCallee MsanInstrumentAsmStoreFn;
+ /// Storage for return values of the MsanMetadataPtrXxx functions.
+ Value *MsanMetadataAlloca;
+
/// Helper to choose between different MsanMetadataPtrXxx().
FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
@@ -706,7 +734,7 @@ void MemorySanitizerPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<MemorySanitizerPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
if (Options.Recover)
OS << "recover;";
if (Options.Kernel)
@@ -714,7 +742,7 @@ void MemorySanitizerPass::printPipeline(
if (Options.EagerChecks)
OS << "eager-checks;";
OS << "track-origins=" << Options.TrackOrigins;
- OS << ">";
+ OS << '>';
}
/// Create a non-const global initialized with the given string.
@@ -729,6 +757,21 @@ static GlobalVariable *createPrivateConstGlobalForString(Module &M,
GlobalValue::PrivateLinkage, StrConst, "");
}
+template <typename... ArgsTy>
+FunctionCallee
+MemorySanitizer::getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
+ ArgsTy... Args) {
+ if (TargetTriple.getArch() == Triple::systemz) {
+ // SystemZ ABI: shadow/origin pair is returned via a hidden parameter.
+ return M.getOrInsertFunction(Name, Type::getVoidTy(*C),
+ PointerType::get(MsanMetadata, 0),
+ std::forward<ArgsTy>(Args)...);
+ }
+
+ return M.getOrInsertFunction(Name, MsanMetadata,
+ std::forward<ArgsTy>(Args)...);
+}
+
/// Create KMSAN API callbacks.
void MemorySanitizer::createKernelApi(Module &M, const TargetLibraryInfo &TLI) {
IRBuilder<> IRB(*C);
@@ -758,25 +801,25 @@ void MemorySanitizer::createKernelApi(Module &M, const TargetLibraryInfo &TLI) {
MsanGetContextStateFn = M.getOrInsertFunction(
"__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
- Type *RetTy = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
- PointerType::get(IRB.getInt32Ty(), 0));
+ MsanMetadata = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
+ PointerType::get(IRB.getInt32Ty(), 0));
for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
std::string name_load =
"__msan_metadata_ptr_for_load_" + std::to_string(size);
std::string name_store =
"__msan_metadata_ptr_for_store_" + std::to_string(size);
- MsanMetadataPtrForLoad_1_8[ind] = M.getOrInsertFunction(
- name_load, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
- MsanMetadataPtrForStore_1_8[ind] = M.getOrInsertFunction(
- name_store, RetTy, PointerType::get(IRB.getInt8Ty(), 0));
+ MsanMetadataPtrForLoad_1_8[ind] = getOrInsertMsanMetadataFunction(
+ M, name_load, PointerType::get(IRB.getInt8Ty(), 0));
+ MsanMetadataPtrForStore_1_8[ind] = getOrInsertMsanMetadataFunction(
+ M, name_store, PointerType::get(IRB.getInt8Ty(), 0));
}
- MsanMetadataPtrForLoadN = M.getOrInsertFunction(
- "__msan_metadata_ptr_for_load_n", RetTy,
- PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
- MsanMetadataPtrForStoreN = M.getOrInsertFunction(
- "__msan_metadata_ptr_for_store_n", RetTy,
+ MsanMetadataPtrForLoadN = getOrInsertMsanMetadataFunction(
+ M, "__msan_metadata_ptr_for_load_n", PointerType::get(IRB.getInt8Ty(), 0),
+ IRB.getInt64Ty());
+ MsanMetadataPtrForStoreN = getOrInsertMsanMetadataFunction(
+ M, "__msan_metadata_ptr_for_store_n",
PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
// Functions for poisoning and unpoisoning memory.
@@ -927,6 +970,8 @@ FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
void MemorySanitizer::initializeModule(Module &M) {
auto &DL = M.getDataLayout();
+ TargetTriple = Triple(M.getTargetTriple());
+
bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
// Check the overrides first
@@ -937,7 +982,6 @@ void MemorySanitizer::initializeModule(Module &M) {
CustomMapParams.OriginBase = ClOriginBase;
MapParams = &CustomMapParams;
} else {
- Triple TargetTriple(M.getTargetTriple());
switch (TargetTriple.getOS()) {
case Triple::FreeBSD:
switch (TargetTriple.getArch()) {
@@ -986,6 +1030,9 @@ void MemorySanitizer::initializeModule(Module &M) {
case Triple::aarch64_be:
MapParams = Linux_ARM_MemoryMapParams.bits64;
break;
+ case Triple::loongarch64:
+ MapParams = Linux_LoongArch_MemoryMapParams.bits64;
+ break;
default:
report_fatal_error("unsupported architecture");
}
@@ -1056,10 +1103,14 @@ struct MemorySanitizerVisitor;
static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
MemorySanitizerVisitor &Visitor);
-static unsigned TypeSizeToSizeIndex(unsigned TypeSize) {
- if (TypeSize <= 8)
+static unsigned TypeSizeToSizeIndex(TypeSize TS) {
+ if (TS.isScalable())
+ // Scalable types unconditionally take slowpaths.
+ return kNumberOfAccessSizes;
+ unsigned TypeSizeFixed = TS.getFixedValue();
+ if (TypeSizeFixed <= 8)
return 0;
- return Log2_32_Ceil((TypeSize + 7) / 8);
+ return Log2_32_Ceil((TypeSizeFixed + 7) / 8);
}
namespace {
@@ -1178,13 +1229,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// Fill memory range with the given origin value.
void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
- unsigned Size, Align Alignment) {
+ TypeSize TS, Align Alignment) {
const DataLayout &DL = F.getParent()->getDataLayout();
const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy);
unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
assert(IntptrAlignment >= kMinOriginAlignment);
assert(IntptrSize >= kOriginSize);
+ // Note: The loop based formation works for fixed length vectors too,
+ // however we prefer to unroll and specialize alignment below.
+ if (TS.isScalable()) {
+ Value *Size = IRB.CreateTypeSize(IRB.getInt32Ty(), TS);
+ Value *RoundUp = IRB.CreateAdd(Size, IRB.getInt32(kOriginSize - 1));
+ Value *End = IRB.CreateUDiv(RoundUp, IRB.getInt32(kOriginSize));
+ auto [InsertPt, Index] =
+ SplitBlockAndInsertSimpleForLoop(End, &*IRB.GetInsertPoint());
+ IRB.SetInsertPoint(InsertPt);
+
+ Value *GEP = IRB.CreateGEP(MS.OriginTy, OriginPtr, Index);
+ IRB.CreateAlignedStore(Origin, GEP, kMinOriginAlignment);
+ return;
+ }
+
+ unsigned Size = TS.getFixedValue();
+
unsigned Ofs = 0;
Align CurrentAlignment = Alignment;
if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
@@ -1212,7 +1280,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *OriginPtr, Align Alignment) {
const DataLayout &DL = F.getParent()->getDataLayout();
const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
- unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
+ TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
if (!ClCheckConstantShadow || ConstantShadow->isZeroValue()) {
@@ -1229,7 +1297,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Fallback to runtime check, which still can be optimized out later.
}
- unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
+ TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
if (instrumentWithCalls(ConvertedShadow) &&
SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
@@ -1325,7 +1393,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
void materializeOneCheck(IRBuilder<> &IRB, Value *ConvertedShadow,
Value *Origin) {
const DataLayout &DL = F.getParent()->getDataLayout();
- unsigned TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
+ TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
if (instrumentWithCalls(ConvertedShadow) &&
SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
@@ -1443,6 +1511,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
MS.RetvalOriginTLS =
IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
{Zero, IRB.getInt32(6)}, "retval_origin");
+ if (MS.TargetTriple.getArch() == Triple::systemz)
+ MS.MsanMetadataAlloca = IRB.CreateAlloca(MS.MsanMetadata, 0u);
}
/// Add MemorySanitizer instrumentation to a function.
@@ -1505,8 +1575,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
const DataLayout &DL = F.getParent()->getDataLayout();
if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
- return FixedVectorType::get(IntegerType::get(*MS.C, EltSize),
- cast<FixedVectorType>(VT)->getNumElements());
+ return VectorType::get(IntegerType::get(*MS.C, EltSize),
+ VT->getElementCount());
}
if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
return ArrayType::get(getShadowTy(AT->getElementType()),
@@ -1524,14 +1594,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return IntegerType::get(*MS.C, TypeSize);
}
- /// Flatten a vector type.
- Type *getShadowTyNoVec(Type *ty) {
- if (VectorType *vt = dyn_cast<VectorType>(ty))
- return IntegerType::get(*MS.C,
- vt->getPrimitiveSizeInBits().getFixedValue());
- return ty;
- }
-
/// Extract combined shadow of struct elements as a bool
Value *collapseStructShadow(StructType *Struct, Value *Shadow,
IRBuilder<> &IRB) {
@@ -1541,8 +1603,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
for (unsigned Idx = 0; Idx < Struct->getNumElements(); Idx++) {
// Combine by ORing together each element's bool shadow
Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
- Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
- Value *ShadowBool = convertToBool(ShadowInner, IRB);
+ Value *ShadowBool = convertToBool(ShadowItem, IRB);
if (Aggregator != FalseVal)
Aggregator = IRB.CreateOr(Aggregator, ShadowBool);
@@ -1578,11 +1639,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return collapseStructShadow(Struct, V, IRB);
if (ArrayType *Array = dyn_cast<ArrayType>(V->getType()))
return collapseArrayShadow(Array, V, IRB);
- Type *Ty = V->getType();
- Type *NoVecTy = getShadowTyNoVec(Ty);
- if (Ty == NoVecTy)
- return V;
- return IRB.CreateBitCast(V, NoVecTy);
+ if (isa<VectorType>(V->getType())) {
+ if (isa<ScalableVectorType>(V->getType()))
+ return convertShadowToScalar(IRB.CreateOrReduce(V), IRB);
+ unsigned BitWidth =
+ V->getType()->getPrimitiveSizeInBits().getFixedValue();
+ return IRB.CreateBitCast(V, IntegerType::get(*MS.C, BitWidth));
+ }
+ return V;
}
// Convert a scalar value to an i1 by comparing with 0
@@ -1597,28 +1661,28 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
Type *ptrToIntPtrType(Type *PtrTy) const {
- if (FixedVectorType *VectTy = dyn_cast<FixedVectorType>(PtrTy)) {
- return FixedVectorType::get(ptrToIntPtrType(VectTy->getElementType()),
- VectTy->getNumElements());
+ if (VectorType *VectTy = dyn_cast<VectorType>(PtrTy)) {
+ return VectorType::get(ptrToIntPtrType(VectTy->getElementType()),
+ VectTy->getElementCount());
}
assert(PtrTy->isIntOrPtrTy());
return MS.IntptrTy;
}
Type *getPtrToShadowPtrType(Type *IntPtrTy, Type *ShadowTy) const {
- if (FixedVectorType *VectTy = dyn_cast<FixedVectorType>(IntPtrTy)) {
- return FixedVectorType::get(
+ if (VectorType *VectTy = dyn_cast<VectorType>(IntPtrTy)) {
+ return VectorType::get(
getPtrToShadowPtrType(VectTy->getElementType(), ShadowTy),
- VectTy->getNumElements());
+ VectTy->getElementCount());
}
assert(IntPtrTy == MS.IntptrTy);
return ShadowTy->getPointerTo();
}
Constant *constToIntPtr(Type *IntPtrTy, uint64_t C) const {
- if (FixedVectorType *VectTy = dyn_cast<FixedVectorType>(IntPtrTy)) {
- return ConstantDataVector::getSplat(
- VectTy->getNumElements(), constToIntPtr(VectTy->getElementType(), C));
+ if (VectorType *VectTy = dyn_cast<VectorType>(IntPtrTy)) {
+ return ConstantVector::getSplat(
+ VectTy->getElementCount(), constToIntPtr(VectTy->getElementType(), C));
}
assert(IntPtrTy == MS.IntptrTy);
return ConstantInt::get(MS.IntptrTy, C);
@@ -1681,24 +1745,37 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return std::make_pair(ShadowPtr, OriginPtr);
}
+ template <typename... ArgsTy>
+ Value *createMetadataCall(IRBuilder<> &IRB, FunctionCallee Callee,
+ ArgsTy... Args) {
+ if (MS.TargetTriple.getArch() == Triple::systemz) {
+ IRB.CreateCall(Callee,
+ {MS.MsanMetadataAlloca, std::forward<ArgsTy>(Args)...});
+ return IRB.CreateLoad(MS.MsanMetadata, MS.MsanMetadataAlloca);
+ }
+
+ return IRB.CreateCall(Callee, {std::forward<ArgsTy>(Args)...});
+ }
+
std::pair<Value *, Value *> getShadowOriginPtrKernelNoVec(Value *Addr,
IRBuilder<> &IRB,
Type *ShadowTy,
bool isStore) {
Value *ShadowOriginPtrs;
const DataLayout &DL = F.getParent()->getDataLayout();
- int Size = DL.getTypeStoreSize(ShadowTy);
+ TypeSize Size = DL.getTypeStoreSize(ShadowTy);
FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
Value *AddrCast =
IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
if (Getter) {
- ShadowOriginPtrs = IRB.CreateCall(Getter, AddrCast);
+ ShadowOriginPtrs = createMetadataCall(IRB, Getter, AddrCast);
} else {
Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
- ShadowOriginPtrs = IRB.CreateCall(isStore ? MS.MsanMetadataPtrForStoreN
- : MS.MsanMetadataPtrForLoadN,
- {AddrCast, SizeVal});
+ ShadowOriginPtrs = createMetadataCall(
+ IRB,
+ isStore ? MS.MsanMetadataPtrForStoreN : MS.MsanMetadataPtrForLoadN,
+ AddrCast, SizeVal);
}
Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
@@ -1714,14 +1791,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
IRBuilder<> &IRB,
Type *ShadowTy,
bool isStore) {
- FixedVectorType *VectTy = dyn_cast<FixedVectorType>(Addr->getType());
+ VectorType *VectTy = dyn_cast<VectorType>(Addr->getType());
if (!VectTy) {
assert(Addr->getType()->isPointerTy());
return getShadowOriginPtrKernelNoVec(Addr, IRB, ShadowTy, isStore);
}
// TODO: Support callbacs with vectors of addresses.
- unsigned NumElements = VectTy->getNumElements();
+ unsigned NumElements = cast<FixedVectorType>(VectTy)->getNumElements();
Value *ShadowPtrs = ConstantInt::getNullValue(
FixedVectorType::get(ShadowTy->getPointerTo(), NumElements));
Value *OriginPtrs = nullptr;
@@ -2367,9 +2444,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
// No point in adding something that might result in 0 origin value.
if (!ConstOrigin || !ConstOrigin->isNullValue()) {
- Value *FlatShadow = MSV->convertShadowToScalar(OpShadow, IRB);
- Value *Cond =
- IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow));
+ Value *Cond = MSV->convertToBool(OpShadow, IRB);
Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
}
}
@@ -2434,8 +2509,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
return IRB.CreateIntCast(V, dstTy, Signed);
if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
- cast<FixedVectorType>(dstTy)->getNumElements() ==
- cast<FixedVectorType>(srcTy)->getNumElements())
+ cast<VectorType>(dstTy)->getElementCount() ==
+ cast<VectorType>(srcTy)->getElementCount())
return IRB.CreateIntCast(V, dstTy, Signed);
Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
Value *V2 =
@@ -2487,7 +2562,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (ConstantInt *Elt =
dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
const APInt &V = Elt->getValue();
- APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+ APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
Elements.push_back(ConstantInt::get(EltTy, V2));
} else {
Elements.push_back(ConstantInt::get(EltTy, 1));
@@ -2497,7 +2572,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
} else {
if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
const APInt &V = Elt->getValue();
- APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros();
+ APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
ShadowMul = ConstantInt::get(Ty, V2);
} else {
ShadowMul = ConstantInt::get(Ty, 1);
@@ -3356,7 +3431,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
Type *ShadowTy = getShadowTy(&I);
- Type *ElementShadowTy = cast<FixedVectorType>(ShadowTy)->getElementType();
+ Type *ElementShadowTy = cast<VectorType>(ShadowTy)->getElementType();
auto [ShadowPtr, OriginPtr] =
getShadowOriginPtr(Ptr, IRB, ElementShadowTy, {}, /*isStore*/ false);
@@ -3382,7 +3457,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Shadow = getShadow(Values);
Type *ElementShadowTy =
- getShadowTy(cast<FixedVectorType>(Values->getType())->getElementType());
+ getShadowTy(cast<VectorType>(Values->getType())->getElementType());
auto [ShadowPtr, OriginPtrs] =
getShadowOriginPtr(Ptr, IRB, ElementShadowTy, {}, /*isStore*/ true);
@@ -3415,7 +3490,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
Type *ShadowTy = getShadowTy(&I);
- Type *ElementShadowTy = cast<FixedVectorType>(ShadowTy)->getElementType();
+ Type *ElementShadowTy = cast<VectorType>(ShadowTy)->getElementType();
auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
Ptrs, IRB, ElementShadowTy, Alignment, /*isStore*/ false);
@@ -3448,7 +3523,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Shadow = getShadow(Values);
Type *ElementShadowTy =
- getShadowTy(cast<FixedVectorType>(Values->getType())->getElementType());
+ getShadowTy(cast<VectorType>(Values->getType())->getElementType());
auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
Ptrs, IRB, ElementShadowTy, Alignment, /*isStore*/ true);
@@ -3520,8 +3595,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *MaskedPassThruShadow = IRB.CreateAnd(
getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
- Value *ConvertedShadow = convertShadowToScalar(MaskedPassThruShadow, IRB);
- Value *NotNull = convertToBool(ConvertedShadow, IRB, "_mscmp");
+ Value *NotNull = convertToBool(MaskedPassThruShadow, IRB, "_mscmp");
Value *PtrOrigin = IRB.CreateLoad(MS.OriginTy, OriginPtr);
Value *Origin = IRB.CreateSelect(NotNull, getOrigin(PassThru), PtrOrigin);
@@ -3645,11 +3719,21 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, getOrigin(&I, 0));
}
+ void handleIsFpClass(IntrinsicInst &I) {
+ IRBuilder<> IRB(&I);
+ Value *Shadow = getShadow(&I, 0);
+ setShadow(&I, IRB.CreateICmpNE(Shadow, getCleanShadow(Shadow)));
+ setOrigin(&I, getOrigin(&I, 0));
+ }
+
void visitIntrinsicInst(IntrinsicInst &I) {
switch (I.getIntrinsicID()) {
case Intrinsic::abs:
handleAbsIntrinsic(I);
break;
+ case Intrinsic::is_fpclass:
+ handleIsFpClass(I);
+ break;
case Intrinsic::lifetime_start:
handleLifetimeStart(I);
break;
@@ -4391,11 +4475,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Origins are always i32, so any vector conditions must be flattened.
// FIXME: consider tracking vector origins for app vectors?
if (B->getType()->isVectorTy()) {
- Type *FlatTy = getShadowTyNoVec(B->getType());
- B = IRB.CreateICmpNE(IRB.CreateBitCast(B, FlatTy),
- ConstantInt::getNullValue(FlatTy));
- Sb = IRB.CreateICmpNE(IRB.CreateBitCast(Sb, FlatTy),
- ConstantInt::getNullValue(FlatTy));
+ B = convertToBool(B, IRB);
+ Sb = convertToBool(Sb, IRB);
}
// a = select b, c, d
// Oa = Sb ? Ob : (b ? Oc : Od)
@@ -4490,9 +4571,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
if (!ElemTy->isSized())
return;
- int Size = DL.getTypeStoreSize(ElemTy);
Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
- Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
+ Value *SizeVal =
+ IRB.CreateTypeSize(MS.IntptrTy, DL.getTypeStoreSize(ElemTy));
IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
}
@@ -4600,8 +4681,8 @@ struct VarArgAMD64Helper : public VarArgHelper {
Function &F;
MemorySanitizer &MS;
MemorySanitizerVisitor &MSV;
- Value *VAArgTLSCopy = nullptr;
- Value *VAArgTLSOriginCopy = nullptr;
+ AllocaInst *VAArgTLSCopy = nullptr;
+ AllocaInst *VAArgTLSOriginCopy = nullptr;
Value *VAArgOverflowSize = nullptr;
SmallVector<CallInst *, 16> VAStartInstrumentationList;
@@ -4721,7 +4802,7 @@ struct VarArgAMD64Helper : public VarArgHelper {
IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
if (MS.TrackOrigins) {
Value *Origin = MSV.getOrigin(A);
- unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
+ TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
std::max(kShadowTLSAlignment, kMinOriginAlignment));
}
@@ -4797,11 +4878,20 @@ struct VarArgAMD64Helper : public VarArgHelper {
Value *CopySize = IRB.CreateAdd(
ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset), VAArgOverflowSize);
VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
- IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
+ VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
+ IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
+ CopySize, kShadowTLSAlignment, false);
+
+ Value *SrcSize = IRB.CreateBinaryIntrinsic(
+ Intrinsic::umin, CopySize,
+ ConstantInt::get(MS.IntptrTy, kParamTLSSize));
+ IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
+ kShadowTLSAlignment, SrcSize);
if (MS.TrackOrigins) {
VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
- IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
- Align(8), CopySize);
+ VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
+ IRB.CreateMemCpy(VAArgTLSOriginCopy, kShadowTLSAlignment,
+ MS.VAArgOriginTLS, kShadowTLSAlignment, SrcSize);
}
}
@@ -4859,7 +4949,7 @@ struct VarArgMIPS64Helper : public VarArgHelper {
Function &F;
MemorySanitizer &MS;
MemorySanitizerVisitor &MSV;
- Value *VAArgTLSCopy = nullptr;
+ AllocaInst *VAArgTLSCopy = nullptr;
Value *VAArgSize = nullptr;
SmallVector<CallInst *, 16> VAStartInstrumentationList;
@@ -4944,7 +5034,15 @@ struct VarArgMIPS64Helper : public VarArgHelper {
// If there is a va_start in this function, make a backup copy of
// va_arg_tls somewhere in the function entry block.
VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
- IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
+ VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
+ IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
+ CopySize, kShadowTLSAlignment, false);
+
+ Value *SrcSize = IRB.CreateBinaryIntrinsic(
+ Intrinsic::umin, CopySize,
+ ConstantInt::get(MS.IntptrTy, kParamTLSSize));
+ IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
+ kShadowTLSAlignment, SrcSize);
}
// Instrument va_start.
@@ -4986,7 +5084,7 @@ struct VarArgAArch64Helper : public VarArgHelper {
Function &F;
MemorySanitizer &MS;
MemorySanitizerVisitor &MSV;
- Value *VAArgTLSCopy = nullptr;
+ AllocaInst *VAArgTLSCopy = nullptr;
Value *VAArgOverflowSize = nullptr;
SmallVector<CallInst *, 16> VAStartInstrumentationList;
@@ -5130,7 +5228,15 @@ struct VarArgAArch64Helper : public VarArgHelper {
Value *CopySize = IRB.CreateAdd(
ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset), VAArgOverflowSize);
VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
- IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
+ VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
+ IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
+ CopySize, kShadowTLSAlignment, false);
+
+ Value *SrcSize = IRB.CreateBinaryIntrinsic(
+ Intrinsic::umin, CopySize,
+ ConstantInt::get(MS.IntptrTy, kParamTLSSize));
+ IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
+ kShadowTLSAlignment, SrcSize);
}
Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
@@ -5230,7 +5336,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
Function &F;
MemorySanitizer &MS;
MemorySanitizerVisitor &MSV;
- Value *VAArgTLSCopy = nullptr;
+ AllocaInst *VAArgTLSCopy = nullptr;
Value *VAArgSize = nullptr;
SmallVector<CallInst *, 16> VAStartInstrumentationList;
@@ -5373,8 +5479,17 @@ struct VarArgPowerPC64Helper : public VarArgHelper {
if (!VAStartInstrumentationList.empty()) {
// If there is a va_start in this function, make a backup copy of
// va_arg_tls somewhere in the function entry block.
+
VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
- IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
+ VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
+ IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
+ CopySize, kShadowTLSAlignment, false);
+
+ Value *SrcSize = IRB.CreateBinaryIntrinsic(
+ Intrinsic::umin, CopySize,
+ ConstantInt::get(MS.IntptrTy, kParamTLSSize));
+ IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
+ kShadowTLSAlignment, SrcSize);
}
// Instrument va_start.
@@ -5416,8 +5531,9 @@ struct VarArgSystemZHelper : public VarArgHelper {
Function &F;
MemorySanitizer &MS;
MemorySanitizerVisitor &MSV;
- Value *VAArgTLSCopy = nullptr;
- Value *VAArgTLSOriginCopy = nullptr;
+ bool IsSoftFloatABI;
+ AllocaInst *VAArgTLSCopy = nullptr;
+ AllocaInst *VAArgTLSOriginCopy = nullptr;
Value *VAArgOverflowSize = nullptr;
SmallVector<CallInst *, 16> VAStartInstrumentationList;
@@ -5434,9 +5550,10 @@ struct VarArgSystemZHelper : public VarArgHelper {
VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
MemorySanitizerVisitor &MSV)
- : F(F), MS(MS), MSV(MSV) {}
+ : F(F), MS(MS), MSV(MSV),
+ IsSoftFloatABI(F.getFnAttribute("use-soft-float").getValueAsBool()) {}
- ArgKind classifyArgument(Type *T, bool IsSoftFloatABI) {
+ ArgKind classifyArgument(Type *T) {
// T is a SystemZABIInfo::classifyArgumentType() output, and there are
// only a few possibilities of what it can be. In particular, enums, single
// element structs and large types have already been taken care of.
@@ -5474,9 +5591,6 @@ struct VarArgSystemZHelper : public VarArgHelper {
}
void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
- bool IsSoftFloatABI = CB.getCalledFunction()
- ->getFnAttribute("use-soft-float")
- .getValueAsBool();
unsigned GpOffset = SystemZGpOffset;
unsigned FpOffset = SystemZFpOffset;
unsigned VrIndex = 0;
@@ -5487,7 +5601,7 @@ struct VarArgSystemZHelper : public VarArgHelper {
// SystemZABIInfo does not produce ByVal parameters.
assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
Type *T = A->getType();
- ArgKind AK = classifyArgument(T, IsSoftFloatABI);
+ ArgKind AK = classifyArgument(T);
if (AK == ArgKind::Indirect) {
T = PointerType::get(T, 0);
AK = ArgKind::GeneralPurpose;
@@ -5587,7 +5701,7 @@ struct VarArgSystemZHelper : public VarArgHelper {
IRB.CreateStore(Shadow, ShadowBase);
if (MS.TrackOrigins) {
Value *Origin = MSV.getOrigin(A);
- unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
+ TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
kMinOriginAlignment);
}
@@ -5642,11 +5756,15 @@ struct VarArgSystemZHelper : public VarArgHelper {
MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment,
/*isStore*/ true);
// TODO(iii): copy only fragments filled by visitCallBase()
+ // TODO(iii): support packed-stack && !use-soft-float
+ // For use-soft-float functions, it is enough to copy just the GPRs.
+ unsigned RegSaveAreaSize =
+ IsSoftFloatABI ? SystemZGpEndOffset : SystemZRegSaveAreaSize;
IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
- SystemZRegSaveAreaSize);
+ RegSaveAreaSize);
if (MS.TrackOrigins)
IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
- Alignment, SystemZRegSaveAreaSize);
+ Alignment, RegSaveAreaSize);
}
void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
@@ -5688,11 +5806,20 @@ struct VarArgSystemZHelper : public VarArgHelper {
IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset),
VAArgOverflowSize);
VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
- IRB.CreateMemCpy(VAArgTLSCopy, Align(8), MS.VAArgTLS, Align(8), CopySize);
+ VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
+ IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
+ CopySize, kShadowTLSAlignment, false);
+
+ Value *SrcSize = IRB.CreateBinaryIntrinsic(
+ Intrinsic::umin, CopySize,
+ ConstantInt::get(MS.IntptrTy, kParamTLSSize));
+ IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
+ kShadowTLSAlignment, SrcSize);
if (MS.TrackOrigins) {
VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
- IRB.CreateMemCpy(VAArgTLSOriginCopy, Align(8), MS.VAArgOriginTLS,
- Align(8), CopySize);
+ VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
+ IRB.CreateMemCpy(VAArgTLSOriginCopy, kShadowTLSAlignment,
+ MS.VAArgOriginTLS, kShadowTLSAlignment, SrcSize);
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 4d4eb6f8ce80..3c8f25d73c62 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -48,7 +48,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
-#include "CFGMST.h"
#include "ValueProfileCollector.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
@@ -56,17 +55,13 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -78,6 +73,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalValue.h"
@@ -99,7 +95,6 @@
#include "llvm/IR/Value.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
-#include "llvm/Support/BLAKE3.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/Casting.h"
@@ -109,27 +104,27 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/HashBuilder.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
+#include "llvm/Transforms/Instrumentation/CFGMST.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/MisExpect.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <map>
#include <memory>
#include <numeric>
#include <optional>
-#include <set>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
using namespace llvm;
-using namespace llvm::memprof;
using ProfileCount = Function::ProfileCount;
using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
@@ -144,7 +139,6 @@ STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
-STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
STATISTIC(NumOfCSPGOSelectInsts,
@@ -159,6 +153,7 @@ STATISTIC(NumOfCSPGOFunc,
STATISTIC(NumOfCSPGOMismatch,
"Number of functions having mismatch profile in CSPGO.");
STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
+STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
// Command line option to specify the file to read profile from. This is
// mainly used for testing.
@@ -200,31 +195,31 @@ static cl::opt<bool> DoComdatRenaming(
cl::desc("Append function hash to the name of COMDAT function to avoid "
"function hash mismatch due to the preinliner"));
+namespace llvm {
// Command line option to enable/disable the warning about missing profile
// information.
-static cl::opt<bool>
- PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
- cl::desc("Use this option to turn on/off "
- "warnings about missing profile data for "
- "functions."));
+cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
+ cl::Hidden,
+ cl::desc("Use this option to turn on/off "
+ "warnings about missing profile data for "
+ "functions."));
-namespace llvm {
// Command line option to enable/disable the warning about a hash mismatch in
// the profile data.
cl::opt<bool>
NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
cl::desc("Use this option to turn off/on "
"warnings about profile cfg mismatch."));
-} // namespace llvm
// Command line option to enable/disable the warning about a hash mismatch in
// the profile data for Comdat functions, which often turns out to be false
// positive due to the pre-instrumentation inline.
-static cl::opt<bool> NoPGOWarnMismatchComdatWeak(
+cl::opt<bool> NoPGOWarnMismatchComdatWeak(
"no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
cl::desc("The option is used to turn on/off "
"warnings about hash mismatch for comdat "
"or weak functions."));
+} // namespace llvm
// Command line option to enable/disable select instruction instrumentation.
static cl::opt<bool>
@@ -268,6 +263,19 @@ static cl::opt<bool> PGOFunctionEntryCoverage(
cl::desc(
"Use this option to enable function entry coverage instrumentation."));
+static cl::opt<bool> PGOBlockCoverage(
+ "pgo-block-coverage",
+ cl::desc("Use this option to enable basic block coverage instrumentation"));
+
+static cl::opt<bool>
+ PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
+ cl::desc("Create a dot file of CFGs with block "
+ "coverage inference information"));
+
+static cl::opt<bool> PGOTemporalInstrumentation(
+ "pgo-temporal-instrumentation",
+ cl::desc("Use this option to enable temporal instrumentation"));
+
static cl::opt<bool>
PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
cl::desc("Fix function entry count in profile use."));
@@ -305,10 +313,6 @@ static cl::opt<unsigned> PGOFunctionSizeThreshold(
"pgo-function-size-threshold", cl::Hidden,
cl::desc("Do not instrument functions smaller than this threshold."));
-static cl::opt<bool> MatchMemProf(
- "pgo-match-memprof", cl::init(true), cl::Hidden,
- cl::desc("Perform matching and annotation of memprof profiles."));
-
static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
"pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
cl::desc("Do not instrument functions with the number of critical edges "
@@ -344,7 +348,7 @@ static std::string getBranchCondString(Instruction *TI) {
std::string result;
raw_string_ostream OS(result);
- OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
+ OS << CI->getPredicate() << "_";
CI->getOperand(0)->getType()->print(OS, true);
Value *RHS = CI->getOperand(1);
@@ -383,6 +387,10 @@ static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
if (PGOFunctionEntryCoverage)
ProfileVersion |=
VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
+ if (PGOBlockCoverage)
+ ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
+ if (PGOTemporalInstrumentation)
+ ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
auto IRLevelVersionVariable = new GlobalVariable(
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
@@ -415,35 +423,37 @@ struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
GlobalVariable *FuncNameVar = nullptr;
uint64_t FuncHash = 0;
PGOUseFunc *UseFunc = nullptr;
+ bool HasSingleByteCoverage;
- SelectInstVisitor(Function &Func) : F(Func) {}
+ SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
+ : F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
- void countSelects(Function &Func) {
+ void countSelects() {
NSIs = 0;
Mode = VM_counting;
- visit(Func);
+ visit(F);
}
// Visit the IR stream and instrument all select instructions. \p
// Ind is a pointer to the counter index variable; \p TotalNC
// is the total number of counters; \p FNV is the pointer to the
// PGO function name var; \p FHash is the function hash.
- void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
- GlobalVariable *FNV, uint64_t FHash) {
+ void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
+ uint64_t FHash) {
Mode = VM_instrument;
CurCtrIdx = Ind;
TotalNumCtrs = TotalNC;
FuncHash = FHash;
FuncNameVar = FNV;
- visit(Func);
+ visit(F);
}
// Visit the IR stream and annotate all select instructions.
- void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
+ void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
Mode = VM_annotate;
UseFunc = UF;
CurCtrIdx = Ind;
- visit(Func);
+ visit(F);
}
void instrumentOneSelectInst(SelectInst &SI);
@@ -457,52 +467,41 @@ struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
unsigned getNumOfSelectInsts() const { return NSIs; }
};
-} // end anonymous namespace
-
-namespace {
-
-/// An MST based instrumentation for PGO
-///
-/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
-/// in the function level.
+/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
+/// based instrumentation.
+/// Note that the CFG can be a multi-graph. So there might be multiple edges
+/// with the same SrcBB and DestBB.
struct PGOEdge {
- // This class implements the CFG edges. Note the CFG can be a multi-graph.
- // So there might be multiple edges with same SrcBB and DestBB.
- const BasicBlock *SrcBB;
- const BasicBlock *DestBB;
+ BasicBlock *SrcBB;
+ BasicBlock *DestBB;
uint64_t Weight;
bool InMST = false;
bool Removed = false;
bool IsCritical = false;
- PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
+ PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
: SrcBB(Src), DestBB(Dest), Weight(W) {}
- // Return the information string of an edge.
+ /// Return the information string of an edge.
std::string infoString() const {
return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
- (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str();
+ (IsCritical ? "c" : " ") + " W=" + Twine(Weight))
+ .str();
}
};
-// This class stores the auxiliary information for each BB.
-struct BBInfo {
- BBInfo *Group;
+/// This class stores the auxiliary information for each BB in the MST.
+struct PGOBBInfo {
+ PGOBBInfo *Group;
uint32_t Index;
uint32_t Rank = 0;
- BBInfo(unsigned IX) : Group(this), Index(IX) {}
+ PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
- // Return the information string of this object.
+ /// Return the information string of this object.
std::string infoString() const {
return (Twine("Index=") + Twine(Index)).str();
}
-
- // Empty function -- only applicable to UseBBInfo.
- void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
-
- // Empty function -- only applicable to UseBBInfo.
- void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
};
// This class implements the CFG edges. Note the CFG can be a multi-graph.
@@ -534,6 +533,16 @@ public:
// The Minimum Spanning Tree of function CFG.
CFGMST<Edge, BBInfo> MST;
+ const std::optional<BlockCoverageInference> BCI;
+
+ static std::optional<BlockCoverageInference>
+ constructBCI(Function &Func, bool HasSingleByteCoverage,
+ bool InstrumentFuncEntry) {
+ if (HasSingleByteCoverage)
+ return BlockCoverageInference(Func, InstrumentFuncEntry);
+ return {};
+ }
+
// Collect all the BBs that will be instrumented, and store them in
// InstrumentBBs.
void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
@@ -549,9 +558,9 @@ public:
BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
// Dump edges and BB information.
- void dumpInfo(std::string Str = "") const {
- MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
- Twine(FunctionHash) + "\t" + Str);
+ void dumpInfo(StringRef Str = "") const {
+ MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
+ " Hash: " + Twine(FunctionHash) + "\t" + Str);
}
FuncPGOInstrumentation(
@@ -559,12 +568,16 @@ public:
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
- bool InstrumentFuncEntry = true)
+ bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
: F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
- TLI(TLI), ValueSites(IPVK_Last + 1), SIVisitor(Func),
- MST(F, InstrumentFuncEntry, BPI, BFI) {
+ TLI(TLI), ValueSites(IPVK_Last + 1),
+ SIVisitor(Func, HasSingleByteCoverage),
+ MST(F, InstrumentFuncEntry, BPI, BFI),
+ BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
+ if (BCI && PGOViewBlockCoverageGraph)
+ BCI->viewBlockCoverageGraph();
// This should be done before CFG hash computation.
- SIVisitor.countSelects(Func);
+ SIVisitor.countSelects();
ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
if (!IsCS) {
NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
@@ -637,7 +650,11 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
- updateJCH((uint64_t)MST.AllEdges.size());
+ if (BCI) {
+ updateJCH(BCI->getInstrumentedBlocksHash());
+ } else {
+ updateJCH((uint64_t)MST.AllEdges.size());
+ }
// Hash format for context sensitive profile. Reserve 4 bits for other
// information.
@@ -725,11 +742,18 @@ void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
}
}
-// Collect all the BBs that will be instruments and return them in
-// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
+/// Collect all the BBs that will be instruments and add them to
+/// `InstrumentBBs`.
template <class Edge, class BBInfo>
void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
std::vector<BasicBlock *> &InstrumentBBs) {
+ if (BCI) {
+ for (auto &BB : F)
+ if (BCI->shouldInstrumentBlock(BB))
+ InstrumentBBs.push_back(&BB);
+ return;
+ }
+
// Use a worklist as we will update the vector during the iteration.
std::vector<Edge *> EdgeList;
EdgeList.reserve(MST.AllEdges.size());
@@ -741,18 +765,6 @@ void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
if (InstrBB)
InstrumentBBs.push_back(InstrBB);
}
-
- // Set up InEdges/OutEdges for all BBs.
- for (auto &E : MST.AllEdges) {
- if (E->Removed)
- continue;
- const BasicBlock *SrcBB = E->SrcBB;
- const BasicBlock *DestBB = E->DestBB;
- BBInfo &SrcInfo = getBBInfo(SrcBB);
- BBInfo &DestInfo = getBBInfo(DestBB);
- SrcInfo.addOutEdge(E.get());
- DestInfo.addInEdge(E.get());
- }
}
// Given a CFG E to be instrumented, find which BB to place the instrumented
@@ -762,8 +774,8 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
if (E->InMST || E->Removed)
return nullptr;
- BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
- BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
+ BasicBlock *SrcBB = E->SrcBB;
+ BasicBlock *DestBB = E->DestBB;
// For a fake edge, instrument the real BB.
if (SrcBB == nullptr)
return DestBB;
@@ -852,12 +864,15 @@ static void instrumentOneFunc(
BlockFrequencyInfo *BFI,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
bool IsCS) {
- // Split indirectbr critical edges here before computing the MST rather than
- // later in getInstrBB() to avoid invalidating it.
- SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
+ if (!PGOBlockCoverage) {
+ // Split indirectbr critical edges here before computing the MST rather than
+ // later in getInstrBB() to avoid invalidating it.
+ SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
+ }
- FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
- F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
+ FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
+ F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry,
+ PGOBlockCoverage);
Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
auto Name = ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy);
@@ -880,6 +895,18 @@ static void instrumentOneFunc(
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
uint32_t I = 0;
+ if (PGOTemporalInstrumentation) {
+ NumCounters += PGOBlockCoverage ? 8 : 1;
+ auto &EntryBB = F.getEntryBlock();
+ IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
+ // llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
+ // i32 <index>)
+ Builder.CreateCall(
+ Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
+ {Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
+ I += PGOBlockCoverage ? 8 : 1;
+ }
+
for (auto *InstrBB : InstrumentBBs) {
IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
assert(Builder.GetInsertPoint() != InstrBB->end() &&
@@ -887,12 +914,14 @@ static void instrumentOneFunc(
// llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
// i32 <index>)
Builder.CreateCall(
- Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
+ Intrinsic::getDeclaration(M, PGOBlockCoverage
+ ? Intrinsic::instrprof_cover
+ : Intrinsic::instrprof_increment),
{Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
}
// Now instrument select instructions:
- FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
+ FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
FuncInfo.FunctionHash);
assert(I == NumCounters);
@@ -947,12 +976,11 @@ namespace {
// This class represents a CFG edge in profile use compilation.
struct PGOUseEdge : public PGOEdge {
+ using PGOEdge::PGOEdge;
+
bool CountValid = false;
uint64_t CountValue = 0;
- PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
- : PGOEdge(Src, Dest, W) {}
-
// Set edge count value
void setEdgeCount(uint64_t Value) {
CountValue = Value;
@@ -971,7 +999,7 @@ struct PGOUseEdge : public PGOEdge {
using DirectEdges = SmallVector<PGOUseEdge *, 2>;
// This class stores the auxiliary information for each BB.
-struct UseBBInfo : public BBInfo {
+struct PGOUseBBInfo : public PGOBBInfo {
uint64_t CountValue = 0;
bool CountValid;
int32_t UnknownCountInEdge = 0;
@@ -979,10 +1007,7 @@ struct UseBBInfo : public BBInfo {
DirectEdges InEdges;
DirectEdges OutEdges;
- UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
-
- UseBBInfo(unsigned IX, uint64_t C)
- : BBInfo(IX), CountValue(C), CountValid(true) {}
+ PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX), CountValid(false) {}
// Set the profile count value for this BB.
void setBBInfoCount(uint64_t Value) {
@@ -993,8 +1018,9 @@ struct UseBBInfo : public BBInfo {
// Return the information string of this object.
std::string infoString() const {
if (!CountValid)
- return BBInfo::infoString();
- return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
+ return PGOBBInfo::infoString();
+ return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(CountValue))
+ .str();
}
// Add an OutEdge and update the edge count.
@@ -1030,22 +1056,25 @@ public:
PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
- ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry)
+ ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
+ bool HasSingleByteCoverage)
: F(Func), M(Modu), BFI(BFIin), PSI(PSI),
FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
- InstrumentFuncEntry),
+ InstrumentFuncEntry, HasSingleByteCoverage),
FreqAttr(FFA_Normal), IsCS(IsCS) {}
+ void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
+
// Read counts for the instrumented BB from profile.
bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
InstrProfRecord::CountPseudoKind &PseudoKind);
- // Read memprof data for the instrumented function from profile.
- bool readMemprof(IndexedInstrProfReader *PGOReader);
-
// Populate the counts for all BBs.
void populateCounters();
+ // Set block coverage based on profile coverage values.
+ void populateCoverage(IndexedInstrProfReader *PGOReader);
+
// Set the branch weights based on the count values.
void setBranchWeights();
@@ -1071,22 +1100,21 @@ public:
InstrProfRecord &getProfileRecord() { return ProfileRecord; }
// Return the auxiliary BB information.
- UseBBInfo &getBBInfo(const BasicBlock *BB) const {
+ PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
return FuncInfo.getBBInfo(BB);
}
// Return the auxiliary BB information if available.
- UseBBInfo *findBBInfo(const BasicBlock *BB) const {
+ PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
return FuncInfo.findBBInfo(BB);
}
Function &getFunc() const { return F; }
- void dumpInfo(std::string Str = "") const {
- FuncInfo.dumpInfo(Str);
- }
+ void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
+
private:
Function &F;
Module *M;
@@ -1094,7 +1122,7 @@ private:
ProfileSummaryInfo *PSI;
// This member stores the shared information with class PGOGenFunc.
- FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
+ FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
// The maximum count value in the profile. This is only used in PGO use
// compilation.
@@ -1122,9 +1150,6 @@ private:
// one unknown edge.
void setEdgeCount(DirectEdges &Edges, uint64_t Value);
- // Return FuncName string;
- std::string getFuncName() const { return FuncInfo.FuncName; }
-
// Set the hot/cold inline hints based on the count values.
// FIXME: This function should be removed once the functionality in
// the inliner is implemented.
@@ -1138,6 +1163,24 @@ private:
} // end anonymous namespace
+/// Set up InEdges/OutEdges for all BBs in the MST.
+static void
+setupBBInfoEdges(FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
+ // This is not required when there is block coverage inference.
+ if (FuncInfo.BCI)
+ return;
+ for (auto &E : FuncInfo.MST.AllEdges) {
+ if (E->Removed)
+ continue;
+ const BasicBlock *SrcBB = E->SrcBB;
+ const BasicBlock *DestBB = E->DestBB;
+ PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
+ PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
+ SrcInfo.addOutEdge(E.get());
+ DestInfo.addInEdge(E.get());
+ }
+}
+
// Visit all the edges and assign the count value for the instrumented
// edges and the BB. Return false on error.
bool PGOUseFunc::setInstrumentedCounts(
@@ -1145,6 +1188,9 @@ bool PGOUseFunc::setInstrumentedCounts(
std::vector<BasicBlock *> InstrumentBBs;
FuncInfo.getInstrumentBBs(InstrumentBBs);
+
+ setupBBInfoEdges(FuncInfo);
+
unsigned NumCounters =
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
// The number of counters here should match the number of counters
@@ -1158,7 +1204,7 @@ bool PGOUseFunc::setInstrumentedCounts(
uint32_t I = 0;
for (BasicBlock *InstrBB : InstrumentBBs) {
uint64_t CountValue = CountFromProfile[I++];
- UseBBInfo &Info = getBBInfo(InstrBB);
+ PGOUseBBInfo &Info = getBBInfo(InstrBB);
// If we reach here, we know that we have some nonzero count
// values in this function. The entry count should not be 0.
// Fix it if necessary.
@@ -1183,7 +1229,7 @@ bool PGOUseFunc::setInstrumentedCounts(
if (E->Removed || E->InMST)
continue;
const BasicBlock *SrcBB = E->SrcBB;
- UseBBInfo &SrcInfo = getBBInfo(SrcBB);
+ PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
// If only one out-edge, the edge profile count should be the same as BB
// profile count.
@@ -1191,7 +1237,7 @@ bool PGOUseFunc::setInstrumentedCounts(
setEdgeCount(E.get(), SrcInfo.CountValue);
else {
const BasicBlock *DestBB = E->DestBB;
- UseBBInfo &DestInfo = getBBInfo(DestBB);
+ PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
// If only one in-edge, the edge profile count should be the same as BB
// profile count.
if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
@@ -1222,8 +1268,7 @@ void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
}
// Emit function metadata indicating PGO profile mismatch.
-static void annotateFunctionWithHashMismatch(Function &F,
- LLVMContext &ctx) {
+static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) {
const char MetadataName[] = "instr_prof_hash_mismatch";
SmallVector<Metadata *, 2> Names;
// If this metadata already exists, ignore.
@@ -1231,7 +1276,7 @@ static void annotateFunctionWithHashMismatch(Function &F,
if (Existing) {
MDTuple *Tuple = cast<MDTuple>(Existing);
for (const auto &N : Tuple->operands()) {
- if (cast<MDString>(N.get())->getString() == MetadataName)
+ if (N.equalsStr(MetadataName))
return;
Names.push_back(N.get());
}
@@ -1243,255 +1288,44 @@ static void annotateFunctionWithHashMismatch(Function &F,
F.setMetadata(LLVMContext::MD_annotation, MD);
}
-static void addCallsiteMetadata(Instruction &I,
- std::vector<uint64_t> &InlinedCallStack,
- LLVMContext &Ctx) {
- I.setMetadata(LLVMContext::MD_callsite,
- buildCallstackMetadata(InlinedCallStack, Ctx));
-}
-
-static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
- uint32_t Column) {
- llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
- HashBuilder;
- HashBuilder.add(Function, LineOffset, Column);
- llvm::BLAKE3Result<8> Hash = HashBuilder.final();
- uint64_t Id;
- std::memcpy(&Id, Hash.data(), sizeof(Hash));
- return Id;
-}
-
-static uint64_t computeStackId(const memprof::Frame &Frame) {
- return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
-}
-
-static void addCallStack(CallStackTrie &AllocTrie,
- const AllocationInfo *AllocInfo) {
- SmallVector<uint64_t> StackIds;
- for (auto StackFrame : AllocInfo->CallStack)
- StackIds.push_back(computeStackId(StackFrame));
- auto AllocType = getAllocType(AllocInfo->Info.getMaxAccessCount(),
- AllocInfo->Info.getMinSize(),
- AllocInfo->Info.getMinLifetime());
- AllocTrie.addCallStack(AllocType, StackIds);
-}
-
-// Helper to compare the InlinedCallStack computed from an instruction's debug
-// info to a list of Frames from profile data (either the allocation data or a
-// callsite). For callsites, the StartIndex to use in the Frame array may be
-// non-zero.
-static bool
-stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
- ArrayRef<uint64_t> InlinedCallStack,
- unsigned StartIndex = 0) {
- auto StackFrame = ProfileCallStack.begin() + StartIndex;
- auto InlCallStackIter = InlinedCallStack.begin();
- for (; StackFrame != ProfileCallStack.end() &&
- InlCallStackIter != InlinedCallStack.end();
- ++StackFrame, ++InlCallStackIter) {
- uint64_t StackId = computeStackId(*StackFrame);
- if (StackId != *InlCallStackIter)
- return false;
- }
- // Return true if we found and matched all stack ids from the call
- // instruction.
- return InlCallStackIter == InlinedCallStack.end();
-}
-
-bool PGOUseFunc::readMemprof(IndexedInstrProfReader *PGOReader) {
- if (!MatchMemProf)
- return true;
-
- auto &Ctx = M->getContext();
-
- auto FuncGUID = Function::getGUID(FuncInfo.FuncName);
- Expected<memprof::MemProfRecord> MemProfResult =
- PGOReader->getMemProfRecord(FuncGUID);
- if (Error E = MemProfResult.takeError()) {
- handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
- auto Err = IPE.get();
- bool SkipWarning = false;
- LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
- << FuncInfo.FuncName << ": ");
- if (Err == instrprof_error::unknown_function) {
- NumOfMemProfMissing++;
- SkipWarning = !PGOWarnMissing;
- LLVM_DEBUG(dbgs() << "unknown function");
- } else if (Err == instrprof_error::hash_mismatch) {
- SkipWarning =
- NoPGOWarnMismatch ||
- (NoPGOWarnMismatchComdatWeak &&
- (F.hasComdat() ||
- F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
- LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
- }
-
- if (SkipWarning)
- return;
-
- std::string Msg =
- (IPE.message() + Twine(" ") + F.getName().str() + Twine(" Hash = ") +
- std::to_string(FuncInfo.FunctionHash))
- .str();
-
- Ctx.diagnose(
- DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
- });
- return false;
- }
-
- // Build maps of the location hash to all profile data with that leaf location
- // (allocation info and the callsites).
- std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
- // For the callsites we need to record the index of the associated frame in
- // the frame array (see comments below where the map entries are added).
- std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
- LocHashToCallSites;
- const auto MemProfRec = std::move(MemProfResult.get());
- for (auto &AI : MemProfRec.AllocSites) {
- // Associate the allocation info with the leaf frame. The later matching
- // code will match any inlined call sequences in the IR with a longer prefix
- // of call stack frames.
- uint64_t StackId = computeStackId(AI.CallStack[0]);
- LocHashToAllocInfo[StackId].insert(&AI);
- }
- for (auto &CS : MemProfRec.CallSites) {
- // Need to record all frames from leaf up to and including this function,
- // as any of these may or may not have been inlined at this point.
- unsigned Idx = 0;
- for (auto &StackFrame : CS) {
- uint64_t StackId = computeStackId(StackFrame);
- LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
- // Once we find this function, we can stop recording.
- if (StackFrame.Function == FuncGUID)
- break;
+void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
+ handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
+ auto &Ctx = M->getContext();
+ auto Err = IPE.get();
+ bool SkipWarning = false;
+ LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
+ << FuncInfo.FuncName << ": ");
+ if (Err == instrprof_error::unknown_function) {
+ IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
+ SkipWarning = !PGOWarnMissing;
+ LLVM_DEBUG(dbgs() << "unknown function");
+ } else if (Err == instrprof_error::hash_mismatch ||
+ Err == instrprof_error::malformed) {
+ IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
+ SkipWarning =
+ NoPGOWarnMismatch ||
+ (NoPGOWarnMismatchComdatWeak &&
+ (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
+ F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
+ LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
+ << " skip=" << SkipWarning << ")");
+ // Emit function metadata indicating PGO profile mismatch.
+ annotateFunctionWithHashMismatch(F, M->getContext());
}
- assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
- }
-
- auto GetOffset = [](const DILocation *DIL) {
- return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
- 0xffff;
- };
-
- // Now walk the instructions, looking up the associated profile data using
- // dbug locations.
- for (auto &BB : F) {
- for (auto &I : BB) {
- if (I.isDebugOrPseudoInst())
- continue;
- // We are only interested in calls (allocation or interior call stack
- // context calls).
- auto *CI = dyn_cast<CallBase>(&I);
- if (!CI)
- continue;
- auto *CalledFunction = CI->getCalledFunction();
- if (CalledFunction && CalledFunction->isIntrinsic())
- continue;
- // List of call stack ids computed from the location hashes on debug
- // locations (leaf to inlined at root).
- std::vector<uint64_t> InlinedCallStack;
- // Was the leaf location found in one of the profile maps?
- bool LeafFound = false;
- // If leaf was found in a map, iterators pointing to its location in both
- // of the maps. It might exist in neither, one, or both (the latter case
- // can happen because we don't currently have discriminators to
- // distinguish the case when a single line/col maps to both an allocation
- // and another callsite).
- std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
- AllocInfoIter;
- std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
- unsigned>>>::iterator CallSitesIter;
- for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
- DIL = DIL->getInlinedAt()) {
- // Use C++ linkage name if possible. Need to compile with
- // -fdebug-info-for-profiling to get linkage name.
- StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
- if (Name.empty())
- Name = DIL->getScope()->getSubprogram()->getName();
- auto CalleeGUID = Function::getGUID(Name);
- auto StackId =
- computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
- // LeafFound will only be false on the first iteration, since we either
- // set it true or break out of the loop below.
- if (!LeafFound) {
- AllocInfoIter = LocHashToAllocInfo.find(StackId);
- CallSitesIter = LocHashToCallSites.find(StackId);
- // Check if the leaf is in one of the maps. If not, no need to look
- // further at this call.
- if (AllocInfoIter == LocHashToAllocInfo.end() &&
- CallSitesIter == LocHashToCallSites.end())
- break;
- LeafFound = true;
- }
- InlinedCallStack.push_back(StackId);
- }
- // If leaf not in either of the maps, skip inst.
- if (!LeafFound)
- continue;
- // First add !memprof metadata from allocation info, if we found the
- // instruction's leaf location in that map, and if the rest of the
- // instruction's locations match the prefix Frame locations on an
- // allocation context with the same leaf.
- if (AllocInfoIter != LocHashToAllocInfo.end()) {
- // Only consider allocations via new, to reduce unnecessary metadata,
- // since those are the only allocations that will be targeted initially.
- if (!isNewLikeFn(CI, &FuncInfo.TLI))
- continue;
- // We may match this instruction's location list to multiple MIB
- // contexts. Add them to a Trie specialized for trimming the contexts to
- // the minimal needed to disambiguate contexts with unique behavior.
- CallStackTrie AllocTrie;
- for (auto *AllocInfo : AllocInfoIter->second) {
- // Check the full inlined call stack against this one.
- // If we found and thus matched all frames on the call, include
- // this MIB.
- if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
- InlinedCallStack))
- addCallStack(AllocTrie, AllocInfo);
- }
- // We might not have matched any to the full inlined call stack.
- // But if we did, create and attach metadata, or a function attribute if
- // all contexts have identical profiled behavior.
- if (!AllocTrie.empty()) {
- // MemprofMDAttached will be false if a function attribute was
- // attached.
- bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
- assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
- if (MemprofMDAttached) {
- // Add callsite metadata for the instruction's location list so that
- // it simpler later on to identify which part of the MIB contexts
- // are from this particular instruction (including during inlining,
- // when the callsite metdata will be updated appropriately).
- // FIXME: can this be changed to strip out the matching stack
- // context ids from the MIB contexts and not add any callsite
- // metadata here to save space?
- addCallsiteMetadata(I, InlinedCallStack, Ctx);
- }
- }
- continue;
- }
+ LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
+ if (SkipWarning)
+ return;
- // Otherwise, add callsite metadata. If we reach here then we found the
- // instruction's leaf location in the callsites map and not the allocation
- // map.
- assert(CallSitesIter != LocHashToCallSites.end());
- for (auto CallStackIdx : CallSitesIter->second) {
- // If we found and thus matched all frames on the call, create and
- // attach call stack metadata.
- if (stackFrameIncludesInlinedCallStack(
- *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
- addCallsiteMetadata(I, InlinedCallStack, Ctx);
- // Only need to find one with a matching call stack and add a single
- // callsite metadata.
- break;
- }
- }
- }
- }
+ std::string Msg =
+ IPE.message() + std::string(" ") + F.getName().str() +
+ std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
+ std::string(" up to ") + std::to_string(MismatchedFuncSum) +
+ std::string(" count discarded");
- return true;
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
+ });
}
// Read the profile from ProfileFileName and assign the value to the
@@ -1504,42 +1338,7 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
if (Error E = Result.takeError()) {
- handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
- auto Err = IPE.get();
- bool SkipWarning = false;
- LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
- << FuncInfo.FuncName << ": ");
- if (Err == instrprof_error::unknown_function) {
- IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
- SkipWarning = !PGOWarnMissing;
- LLVM_DEBUG(dbgs() << "unknown function");
- } else if (Err == instrprof_error::hash_mismatch ||
- Err == instrprof_error::malformed) {
- IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
- SkipWarning =
- NoPGOWarnMismatch ||
- (NoPGOWarnMismatchComdatWeak &&
- (F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
- F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
- LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
- << " skip=" << SkipWarning << ")");
- // Emit function metadata indicating PGO profile mismatch.
- annotateFunctionWithHashMismatch(F, M->getContext());
- }
-
- LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
- if (SkipWarning)
- return;
-
- std::string Msg =
- IPE.message() + std::string(" ") + F.getName().str() +
- std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
- std::string(" up to ") + std::to_string(MismatchedFuncSum) +
- std::string(" count discarded");
-
- Ctx.diagnose(
- DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
- });
+ handleInstrProfError(std::move(E), MismatchedFuncSum);
return false;
}
ProfileRecord = std::move(Result.get());
@@ -1569,8 +1368,9 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
dbgs() << "Inconsistent number of counts, skipping this function");
Ctx.diagnose(DiagnosticInfoPGOProfile(
M->getName().data(),
- Twine("Inconsistent number of counts in ") + F.getName().str()
- + Twine(": the profile may be stale or there is a function name collision."),
+ Twine("Inconsistent number of counts in ") + F.getName().str() +
+ Twine(": the profile may be stale or there is a function name "
+ "collision."),
DS_Warning));
return false;
}
@@ -1578,6 +1378,113 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
return true;
}
+void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
+ uint64_t MismatchedFuncSum = 0;
+ Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
+ FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
+ if (auto Err = Result.takeError()) {
+ handleInstrProfError(std::move(Err), MismatchedFuncSum);
+ return;
+ }
+
+ std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
+ DenseMap<const BasicBlock *, bool> Coverage;
+ unsigned Index = 0;
+ for (auto &BB : F)
+ if (FuncInfo.BCI->shouldInstrumentBlock(BB))
+ Coverage[&BB] = (CountsFromProfile[Index++] != 0);
+ assert(Index == CountsFromProfile.size());
+
+ // For each B in InverseDependencies[A], if A is covered then B is covered.
+ DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>>
+ InverseDependencies;
+ for (auto &BB : F) {
+ for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
+ // If Dep is covered then BB is covered.
+ InverseDependencies[Dep].insert(&BB);
+ }
+ }
+
+ // Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
+ std::stack<const BasicBlock *> CoveredBlocksToProcess;
+ for (auto &[BB, IsCovered] : Coverage)
+ if (IsCovered)
+ CoveredBlocksToProcess.push(BB);
+
+ while (!CoveredBlocksToProcess.empty()) {
+ auto *CoveredBlock = CoveredBlocksToProcess.top();
+ assert(Coverage[CoveredBlock]);
+ CoveredBlocksToProcess.pop();
+ for (auto *BB : InverseDependencies[CoveredBlock]) {
+ // If CoveredBlock is covered then BB is covered.
+ if (Coverage[BB])
+ continue;
+ Coverage[BB] = true;
+ CoveredBlocksToProcess.push(BB);
+ }
+ }
+
+ // Annotate block coverage.
+ MDBuilder MDB(F.getContext());
+ // We set the entry count to 10000 if the entry block is covered so that BFI
+ // can propagate a fraction of this count to the other covered blocks.
+ F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
+ for (auto &BB : F) {
+ // For a block A and its successor B, we set the edge weight as follows:
+ // If A is covered and B is covered, set weight=1.
+ // If A is covered and B is uncovered, set weight=0.
+ // If A is uncovered, set weight=1.
+ // This setup will allow BFI to give nonzero profile counts to only covered
+ // blocks.
+ SmallVector<unsigned, 4> Weights;
+ for (auto *Succ : successors(&BB))
+ Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
+ if (Weights.size() >= 2)
+ BB.getTerminator()->setMetadata(LLVMContext::MD_prof,
+ MDB.createBranchWeights(Weights));
+ }
+
+ unsigned NumCorruptCoverage = 0;
+ DominatorTree DT(F);
+ LoopInfo LI(DT);
+ BranchProbabilityInfo BPI(F, LI);
+ BlockFrequencyInfo BFI(F, BPI, LI);
+ auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
+ if (auto C = BFI.getBlockProfileCount(&BB))
+ return C == 0;
+ return {};
+ };
+ LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
+ for (auto &BB : F) {
+ LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
+ << (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
+ << "\n");
+ // In some cases it is possible to find a covered block that has no covered
+ // successors, e.g., when a block calls a function that may call exit(). In
+ // those cases, BFI could find its successor to be covered while BCI could
+ // find its successor to be dead.
+ if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
+ LLVM_DEBUG(
+ dbgs() << "Found inconsistent block covearge for " << BB.getName()
+ << ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
+ << (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
+ ++NumCorruptCoverage;
+ }
+ if (Coverage[&BB])
+ ++NumCoveredBlocks;
+ }
+ if (PGOVerifyBFI && NumCorruptCoverage) {
+ auto &Ctx = M->getContext();
+ Ctx.diagnose(DiagnosticInfoPGOProfile(
+ M->getName().data(),
+ Twine("Found inconsistent block coverage for function ") + F.getName() +
+ " in " + Twine(NumCorruptCoverage) + " blocks.",
+ DS_Warning));
+ }
+ if (PGOViewBlockCoverageGraph)
+ FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
+}
+
// Populate the counters from instrumented BBs to all BBs.
// In the end of this operation, all BBs should have a valid count value.
void PGOUseFunc::populateCounters() {
@@ -1590,7 +1497,7 @@ void PGOUseFunc::populateCounters() {
// For efficient traversal, it's better to start from the end as most
// of the instrumented edges are at the end.
for (auto &BB : reverse(F)) {
- UseBBInfo *Count = findBBInfo(&BB);
+ PGOUseBBInfo *Count = findBBInfo(&BB);
if (Count == nullptr)
continue;
if (!Count->CountValid) {
@@ -1629,7 +1536,7 @@ void PGOUseFunc::populateCounters() {
}
LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
- (void) NumPasses;
+ (void)NumPasses;
#ifndef NDEBUG
// Assert every BB has a valid counter.
for (auto &BB : F) {
@@ -1655,7 +1562,7 @@ void PGOUseFunc::populateCounters() {
markFunctionAttributes(FuncEntryCount, FuncMaxCount);
// Now annotate select instructions
- FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
+ FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
assert(CountPosition == ProfileCountSize);
LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
@@ -1679,7 +1586,7 @@ void PGOUseFunc::setBranchWeights() {
continue;
// We have a non-zero Branch BB.
- const UseBBInfo &BBCountInfo = getBBInfo(&BB);
+ const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
unsigned Size = BBCountInfo.OutEdges.size();
SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
uint64_t MaxCount = 0;
@@ -1704,11 +1611,11 @@ void PGOUseFunc::setBranchWeights() {
// when there is no exit block and the code exits via a noreturn function.
auto &Ctx = M->getContext();
Ctx.diagnose(DiagnosticInfoPGOProfile(
- M->getName().data(),
- Twine("Profile in ") + F.getName().str() +
- Twine(" partially ignored") +
- Twine(", possibly due to the lack of a return path."),
- DS_Warning));
+ M->getName().data(),
+ Twine("Profile in ") + F.getName().str() +
+ Twine(" partially ignored") +
+ Twine(", possibly due to the lack of a return path."),
+ DS_Warning));
}
}
}
@@ -1730,15 +1637,13 @@ void PGOUseFunc::annotateIrrLoopHeaderWeights() {
// duplication.
if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
Instruction *TI = BB.getTerminator();
- const UseBBInfo &BBCountInfo = getBBInfo(&BB);
+ const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
}
}
}
void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
- if (PGOFunctionEntryCoverage)
- return;
Module *M = F.getParent();
IRBuilder<> Builder(&SI);
Type *Int64Ty = Builder.getInt64Ty();
@@ -1771,7 +1676,7 @@ void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
}
void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
- if (!PGOInstrSelect)
+ if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
return;
// FIXME: do not handle this yet.
if (SI.getCondition()->getType()->isVectorTy())
@@ -1815,8 +1720,8 @@ void PGOUseFunc::annotateValueSites(uint32_t Kind) {
Ctx.diagnose(DiagnosticInfoPGOProfile(
M->getName().data(),
Twine("Inconsistent number of value sites for ") +
- Twine(ValueProfKindDescr[Kind]) +
- Twine(" profiling in \"") + F.getName().str() +
+ Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
+ F.getName().str() +
Twine("\", possibly due to the use of a stale profile."),
DS_Warning));
return;
@@ -1907,17 +1812,20 @@ static bool InstrumentAllFunctions(
}
PreservedAnalyses
-PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
+PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) {
createProfileFileNameVar(M, CSInstrName);
// The variable in a comdat may be discarded by LTO. Ensure the declaration
// will be retained.
appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));
- return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserve<FunctionAnalysisManagerModuleProxy>();
+ PA.preserveSet<AllAnalysesOn<Function>>();
+ return PA;
}
PreservedAnalyses PGOInstrumentationGen::run(Module &M,
- ModuleAnalysisManager &AM) {
- auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ ModuleAnalysisManager &MAM) {
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
return FAM.getResult<TargetLibraryAnalysis>(F);
};
@@ -1991,7 +1899,7 @@ static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
BlockFrequencyInfo NBFI(F, NBPI, LI);
// bool PrintFunc = false;
bool HotBBOnly = PGOVerifyHotBFI;
- std::string Msg;
+ StringRef Msg;
OptimizationRemarkEmitter ORE(&F);
unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
@@ -2059,6 +1967,7 @@ static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
static bool annotateAllFunctions(
Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
+ vfs::FileSystem &FS,
function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
@@ -2066,8 +1975,8 @@ static bool annotateAllFunctions(
LLVM_DEBUG(dbgs() << "Read in profile counters: ");
auto &Ctx = M.getContext();
// Read the counter array from file.
- auto ReaderOrErr =
- IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
+ auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
+ ProfileRemappingFileName);
if (Error E = ReaderOrErr.takeError()) {
handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
Ctx.diagnose(
@@ -2087,17 +1996,11 @@ static bool annotateAllFunctions(
return false;
// TODO: might need to change the warning once the clang option is finalized.
- if (!PGOReader->isIRLevelProfile() && !PGOReader->hasMemoryProfile()) {
+ if (!PGOReader->isIRLevelProfile()) {
Ctx.diagnose(DiagnosticInfoPGOProfile(
ProfileFileName.data(), "Not an IR level instrumentation profile"));
return false;
}
- if (PGOReader->hasSingleByteCoverage()) {
- Ctx.diagnose(DiagnosticInfoPGOProfile(
- ProfileFileName.data(),
- "Cannot use coverage profiles for optimization"));
- return false;
- }
if (PGOReader->functionEntryOnly()) {
Ctx.diagnose(DiagnosticInfoPGOProfile(
ProfileFileName.data(),
@@ -2123,25 +2026,25 @@ static bool annotateAllFunctions(
bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
if (PGOInstrumentEntry.getNumOccurrences() > 0)
InstrumentFuncEntry = PGOInstrumentEntry;
+ bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
for (auto &F : M) {
if (skipPGO(F))
continue;
auto &TLI = LookupTLI(F);
auto *BPI = LookupBPI(F);
auto *BFI = LookupBFI(F);
- // Split indirectbr critical edges here before computing the MST rather than
- // later in getInstrBB() to avoid invalidating it.
- SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
+ if (!HasSingleByteCoverage) {
+ // Split indirectbr critical edges here before computing the MST rather
+ // than later in getInstrBB() to avoid invalidating it.
+ SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
+ BFI);
+ }
PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
- InstrumentFuncEntry);
- // Read and match memprof first since we do this via debug info and can
- // match even if there is an IR mismatch detected for regular PGO below.
- if (PGOReader->hasMemoryProfile())
- Func.readMemprof(PGOReader.get());
-
- if (!PGOReader->isIRLevelProfile())
+ InstrumentFuncEntry, HasSingleByteCoverage);
+ if (HasSingleByteCoverage) {
+ Func.populateCoverage(PGOReader.get());
continue;
-
+ }
// When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
// it means the profile for the function is unrepresentative and this
// function is actually hot / warm. We will reset the function hot / cold
@@ -2249,21 +2152,24 @@ static bool annotateAllFunctions(
return true;
}
-PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
- std::string RemappingFilename,
- bool IsCS)
+PGOInstrumentationUse::PGOInstrumentationUse(
+ std::string Filename, std::string RemappingFilename, bool IsCS,
+ IntrusiveRefCntPtr<vfs::FileSystem> VFS)
: ProfileFileName(std::move(Filename)),
- ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
+ ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
+ FS(std::move(VFS)) {
if (!PGOTestProfileFile.empty())
ProfileFileName = PGOTestProfileFile;
if (!PGOTestProfileRemappingFile.empty())
ProfileRemappingFileName = PGOTestProfileRemappingFile;
+ if (!FS)
+ FS = vfs::getRealFileSystem();
}
PreservedAnalyses PGOInstrumentationUse::run(Module &M,
- ModuleAnalysisManager &AM) {
+ ModuleAnalysisManager &MAM) {
- auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+ auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
return FAM.getResult<TargetLibraryAnalysis>(F);
};
@@ -2274,9 +2180,9 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
return &FAM.getResult<BlockFrequencyAnalysis>(F);
};
- auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
+ auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
- if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
+ if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
return PreservedAnalyses::all();
@@ -2285,7 +2191,7 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
static std::string getSimpleNodeName(const BasicBlock *Node) {
if (!Node->getName().empty())
- return std::string(Node->getName());
+ return Node->getName().str();
std::string SimpleNodeName;
raw_string_ostream OS(SimpleNodeName);
@@ -2294,8 +2200,7 @@ static std::string getSimpleNodeName(const BasicBlock *Node) {
}
void llvm::setProfMetadata(Module *M, Instruction *TI,
- ArrayRef<uint64_t> EdgeCounts,
- uint64_t MaxCount) {
+ ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
MDBuilder MDB(M->getContext());
assert(MaxCount > 0 && "Bad max count");
uint64_t Scale = calculateCountScale(MaxCount);
@@ -2384,7 +2289,7 @@ template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
raw_string_ostream OS(Result);
OS << getSimpleNodeName(Node) << ":\\l";
- UseBBInfo *BI = Graph->findBBInfo(Node);
+ PGOUseBBInfo *BI = Graph->findBBInfo(Node);
OS << "Count : ";
if (BI && BI->CountValid)
OS << BI->CountValue << "\\l";
diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index 35db8483fc91..2906fe190984 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -317,7 +317,7 @@ bool MemOPSizeOpt::perform(MemOp MO) {
}
if (!SeenSizeId.insert(V).second) {
- errs() << "Invalid Profile Data in Function " << Func.getName()
+ errs() << "warning: Invalid Profile Data in Function " << Func.getName()
<< ": Two identical values in MemOp value counts.\n";
return false;
}
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
index 142b9c38e5fc..d83a3a991c89 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
@@ -15,8 +15,9 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
@@ -31,15 +32,19 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Support/SpecialCaseList.h"
+#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <array>
#include <cstdint>
+#include <memory>
using namespace llvm;
@@ -49,7 +54,7 @@ namespace {
//===--- Constants --------------------------------------------------------===//
-constexpr uint32_t kVersionBase = 1; // occupies lower 16 bits
+constexpr uint32_t kVersionBase = 2; // occupies lower 16 bits
constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
constexpr int kCtorDtorPriority = 2;
@@ -59,7 +64,6 @@ class MetadataInfo {
public:
const StringRef FunctionPrefix;
const StringRef SectionSuffix;
- const uint32_t FeatureMask;
static const MetadataInfo Covered;
static const MetadataInfo Atomics;
@@ -67,16 +71,13 @@ public:
private:
// Forbid construction elsewhere.
explicit constexpr MetadataInfo(StringRef FunctionPrefix,
- StringRef SectionSuffix, uint32_t Feature)
- : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix),
- FeatureMask(Feature) {}
+ StringRef SectionSuffix)
+ : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {}
};
-const MetadataInfo MetadataInfo::Covered{"__sanitizer_metadata_covered",
- kSanitizerBinaryMetadataCoveredSection,
- kSanitizerBinaryMetadataNone};
-const MetadataInfo MetadataInfo::Atomics{"__sanitizer_metadata_atomics",
- kSanitizerBinaryMetadataAtomicsSection,
- kSanitizerBinaryMetadataAtomics};
+const MetadataInfo MetadataInfo::Covered{
+ "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection};
+const MetadataInfo MetadataInfo::Atomics{
+ "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection};
// The only instances of MetadataInfo are the constants above, so a set of
// them may simply store pointers to them. To deterministically generate code,
@@ -89,6 +90,11 @@ cl::opt<bool> ClWeakCallbacks(
"sanitizer-metadata-weak-callbacks",
cl::desc("Declare callbacks extern weak, and only call if non-null."),
cl::Hidden, cl::init(true));
+cl::opt<bool>
+ ClNoSanitize("sanitizer-metadata-nosanitize-attr",
+ cl::desc("Mark some metadata features uncovered in functions "
+ "with associated no_sanitize attributes."),
+ cl::Hidden, cl::init(true));
cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
cl::desc("Emit PCs for covered functions."),
@@ -120,24 +126,20 @@ transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
class SanitizerBinaryMetadata {
public:
- SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts)
+ SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts,
+ std::unique_ptr<SpecialCaseList> Ignorelist)
: Mod(M), Options(transformOptionsFromCl(std::move(Opts))),
- TargetTriple(M.getTargetTriple()), IRB(M.getContext()) {
+ Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()),
+ IRB(M.getContext()) {
// FIXME: Make it work with other formats.
assert(TargetTriple.isOSBinFormatELF() && "ELF only");
+ assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) &&
+ "Device targets are not supported");
}
bool run();
private:
- // Return enabled feature mask of per-instruction metadata.
- uint32_t getEnabledPerInstructionFeature() const {
- uint32_t FeatureMask = 0;
- if (Options.Atomics)
- FeatureMask |= MetadataInfo::Atomics.FeatureMask;
- return FeatureMask;
- }
-
uint32_t getVersion() const {
uint32_t Version = kVersionBase;
const auto CM = Mod.getCodeModel();
@@ -156,7 +158,7 @@ private:
// to determine if a memory operation is atomic or not in modules compiled
// with SanitizerBinaryMetadata.
bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
- uint32_t &FeatureMask);
+ uint64_t &FeatureMask);
// Get start/end section marker pointer.
GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
@@ -170,10 +172,16 @@ private:
// Returns the section end marker name.
Twine getSectionEnd(StringRef SectionSuffix);
+ // Returns true if the access to the address should be considered "atomic".
+ bool pretendAtomicAccess(const Value *Addr);
+
Module &Mod;
const SanitizerBinaryMetadataOptions Options;
+ std::unique_ptr<SpecialCaseList> Ignorelist;
const Triple TargetTriple;
IRBuilder<> IRB;
+ BumpPtrAllocator Alloc;
+ UniqueStringSaver StringPool{Alloc};
};
bool SanitizerBinaryMetadata::run() {
@@ -218,17 +226,23 @@ bool SanitizerBinaryMetadata::run() {
(MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs,
/*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
.first;
- Constant *CtorData = nullptr;
- Constant *DtorData = nullptr;
+ Constant *CtorComdatKey = nullptr;
+ Constant *DtorComdatKey = nullptr;
if (TargetTriple.supportsCOMDAT()) {
- // Use COMDAT to deduplicate constructor/destructor function.
+ // Use COMDAT to deduplicate constructor/destructor function. The COMDAT
+ // key needs to be a non-local linkage.
Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName()));
Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName()));
- CtorData = Ctor;
- DtorData = Dtor;
+ Ctor->setLinkage(GlobalValue::ExternalLinkage);
+ Dtor->setLinkage(GlobalValue::ExternalLinkage);
+ // DSOs should _not_ call another constructor/destructor!
+ Ctor->setVisibility(GlobalValue::HiddenVisibility);
+ Dtor->setVisibility(GlobalValue::HiddenVisibility);
+ CtorComdatKey = Ctor;
+ DtorComdatKey = Dtor;
}
- appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorData);
- appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorData);
+ appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey);
+ appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey);
}
return true;
@@ -239,6 +253,8 @@ void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
return;
if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
return;
+ if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName()))
+ return;
// Don't touch available_externally functions, their actual body is elsewhere.
if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
return;
@@ -247,18 +263,18 @@ void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
// The metadata features enabled for this function, stored along covered
// metadata (if enabled).
- uint32_t FeatureMask = getEnabledPerInstructionFeature();
+ uint64_t FeatureMask = 0;
// Don't emit unnecessary covered metadata for all functions to save space.
bool RequiresCovered = false;
- // We can only understand if we need to set UAR feature after looking
- // at the instructions. So we need to check instructions even if FeatureMask
- // is empty.
- if (FeatureMask || Options.UAR) {
+
+ if (Options.Atomics || Options.UAR) {
for (BasicBlock &BB : F)
for (Instruction &I : BB)
RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
}
+ if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread"))
+ FeatureMask &= ~kSanitizerBinaryMetadataAtomics;
if (F.isVarArg())
FeatureMask &= ~kSanitizerBinaryMetadataUAR;
if (FeatureMask & kSanitizerBinaryMetadataUAR) {
@@ -274,9 +290,8 @@ void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
const auto *MI = &MetadataInfo::Covered;
MIS.insert(MI);
const StringRef Section = getSectionName(MI->SectionSuffix);
- // The feature mask will be placed after the size (32 bit) of the function,
- // so in total one covered entry will use `sizeof(void*) + 4 + 4`.
- Constant *CFM = IRB.getInt32(FeatureMask);
+ // The feature mask will be placed after the function size.
+ Constant *CFM = IRB.getInt64(FeatureMask);
F.setMetadata(LLVMContext::MD_pcsections,
MDB.createPCSections({{Section, {CFM}}}));
}
@@ -338,23 +353,80 @@ bool useAfterReturnUnsafe(Instruction &I) {
return false;
}
+bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) {
+ if (!Addr)
+ return false;
+
+ Addr = Addr->stripInBoundsOffsets();
+ auto *GV = dyn_cast<GlobalVariable>(Addr);
+ if (!GV)
+ return false;
+
+ // Some compiler-generated accesses are known racy, to avoid false positives
+ // in data-race analysis pretend they're atomic.
+ if (GV->hasSection()) {
+ const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat();
+ const auto ProfSec =
+ getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false);
+ if (GV->getSection().endswith(ProfSec))
+ return true;
+ }
+ if (GV->getName().startswith("__llvm_gcov") ||
+ GV->getName().startswith("__llvm_gcda"))
+ return true;
+
+ return false;
+}
+
+// Returns true if the memory at `Addr` may be shared with other threads.
+bool maybeSharedMutable(const Value *Addr) {
+ // By default assume memory may be shared.
+ if (!Addr)
+ return true;
+
+ if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&
+ !PointerMayBeCaptured(Addr, true, true))
+ return false; // Object is on stack but does not escape.
+
+ Addr = Addr->stripInBoundsOffsets();
+ if (auto *GV = dyn_cast<GlobalVariable>(Addr)) {
+ if (GV->isConstant())
+ return false; // Shared, but not mutable.
+ }
+
+ return true;
+}
+
bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
- MDBuilder &MDB, uint32_t &FeatureMask) {
+ MDBuilder &MDB, uint64_t &FeatureMask) {
SmallVector<const MetadataInfo *, 1> InstMetadata;
bool RequiresCovered = false;
+ // Only call if at least 1 type of metadata is requested.
+ assert(Options.UAR || Options.Atomics);
+
if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
if (useAfterReturnUnsafe(I))
FeatureMask |= kSanitizerBinaryMetadataUAR;
}
- if (Options.Atomics && I.mayReadOrWriteMemory()) {
- auto SSID = getAtomicSyncScopeID(&I);
- if (SSID.has_value() && *SSID != SyncScope::SingleThread) {
- NumMetadataAtomics++;
- InstMetadata.push_back(&MetadataInfo::Atomics);
+ if (Options.Atomics) {
+ const Value *Addr = nullptr;
+ if (auto *SI = dyn_cast<StoreInst>(&I))
+ Addr = SI->getPointerOperand();
+ else if (auto *LI = dyn_cast<LoadInst>(&I))
+ Addr = LI->getPointerOperand();
+
+ if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) {
+ auto SSID = getAtomicSyncScopeID(&I);
+ if ((SSID.has_value() && *SSID != SyncScope::SingleThread) ||
+ pretendAtomicAccess(Addr)) {
+ NumMetadataAtomics++;
+ InstMetadata.push_back(&MetadataInfo::Atomics);
+ }
+ FeatureMask |= kSanitizerBinaryMetadataAtomics;
+ RequiresCovered = true;
}
- RequiresCovered = true;
}
// Attach MD_pcsections to instruction.
@@ -381,8 +453,9 @@ SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
}
StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
- // FIXME: Other TargetTriple (req. string pool)
- return SectionSuffix;
+ // FIXME: Other TargetTriples.
+ // Request ULEB128 encoding for all integer constants.
+ return StringPool.save(SectionSuffix + "!C");
}
Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
@@ -396,12 +469,20 @@ Twine SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
} // namespace
SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
- SanitizerBinaryMetadataOptions Opts)
- : Options(std::move(Opts)) {}
+ SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles)
+ : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {}
PreservedAnalyses
SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
- SanitizerBinaryMetadata Pass(M, Options);
+ std::unique_ptr<SpecialCaseList> Ignorelist;
+ if (!IgnorelistFiles.empty()) {
+ Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles,
+ *vfs::getRealFileSystem());
+ if (Ignorelist->inSection("metadata", "src", M.getSourceFileName()))
+ return PreservedAnalyses::all();
+ }
+
+ SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist));
if (Pass.run())
return PreservedAnalyses::none();
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 23a88c3cfba2..f22918141f6e 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -13,13 +13,12 @@
#include "llvm/Transforms/Instrumentation/SanitizerCoverage.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
@@ -28,11 +27,10 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/SpecialCaseList.h"
#include "llvm/Support/VirtualFileSystem.h"
-#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
@@ -250,10 +248,6 @@ private:
std::pair<Value *, Value *> CreateSecStartEnd(Module &M, const char *Section,
Type *Ty);
- void SetNoSanitizeMetadata(Instruction *I) {
- I->setMetadata(LLVMContext::MD_nosanitize, MDNode::get(*C, std::nullopt));
- }
-
std::string getSectionName(const std::string &Section) const;
std::string getSectionStart(const std::string &Section) const;
std::string getSectionEnd(const std::string &Section) const;
@@ -809,7 +803,7 @@ void ModuleSanitizerCoverage::InjectCoverageForIndirectCalls(
assert(Options.TracePC || Options.TracePCGuard ||
Options.Inline8bitCounters || Options.InlineBoolFlag);
for (auto *I : IndirCalls) {
- IRBuilder<> IRB(I);
+ InstrumentationIRBuilder IRB(I);
CallBase &CB = cast<CallBase>(*I);
Value *Callee = CB.getCalledOperand();
if (isa<InlineAsm>(Callee))
@@ -826,7 +820,7 @@ void ModuleSanitizerCoverage::InjectTraceForSwitch(
Function &, ArrayRef<Instruction *> SwitchTraceTargets) {
for (auto *I : SwitchTraceTargets) {
if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
- IRBuilder<> IRB(I);
+ InstrumentationIRBuilder IRB(I);
SmallVector<Constant *, 16> Initializers;
Value *Cond = SI->getCondition();
if (Cond->getType()->getScalarSizeInBits() >
@@ -864,7 +858,7 @@ void ModuleSanitizerCoverage::InjectTraceForSwitch(
void ModuleSanitizerCoverage::InjectTraceForDiv(
Function &, ArrayRef<BinaryOperator *> DivTraceTargets) {
for (auto *BO : DivTraceTargets) {
- IRBuilder<> IRB(BO);
+ InstrumentationIRBuilder IRB(BO);
Value *A1 = BO->getOperand(1);
if (isa<ConstantInt>(A1)) continue;
if (!A1->getType()->isIntegerTy())
@@ -882,7 +876,7 @@ void ModuleSanitizerCoverage::InjectTraceForDiv(
void ModuleSanitizerCoverage::InjectTraceForGep(
Function &, ArrayRef<GetElementPtrInst *> GepTraceTargets) {
for (auto *GEP : GepTraceTargets) {
- IRBuilder<> IRB(GEP);
+ InstrumentationIRBuilder IRB(GEP);
for (Use &Idx : GEP->indices())
if (!isa<ConstantInt>(Idx) && Idx->getType()->isIntegerTy())
IRB.CreateCall(SanCovTraceGepFunction,
@@ -904,7 +898,7 @@ void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
Type *PointerType[5] = {Int8PtrTy, Int16PtrTy, Int32PtrTy, Int64PtrTy,
Int128PtrTy};
for (auto *LI : Loads) {
- IRBuilder<> IRB(LI);
+ InstrumentationIRBuilder IRB(LI);
auto Ptr = LI->getPointerOperand();
int Idx = CallbackIdx(LI->getType());
if (Idx < 0)
@@ -913,7 +907,7 @@ void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
IRB.CreatePointerCast(Ptr, PointerType[Idx]));
}
for (auto *SI : Stores) {
- IRBuilder<> IRB(SI);
+ InstrumentationIRBuilder IRB(SI);
auto Ptr = SI->getPointerOperand();
int Idx = CallbackIdx(SI->getValueOperand()->getType());
if (Idx < 0)
@@ -927,7 +921,7 @@ void ModuleSanitizerCoverage::InjectTraceForCmp(
Function &, ArrayRef<Instruction *> CmpTraceTargets) {
for (auto *I : CmpTraceTargets) {
if (ICmpInst *ICMP = dyn_cast<ICmpInst>(I)) {
- IRBuilder<> IRB(ICMP);
+ InstrumentationIRBuilder IRB(ICMP);
Value *A0 = ICMP->getOperand(0);
Value *A1 = ICMP->getOperand(1);
if (!A0->getType()->isIntegerTy())
@@ -994,8 +988,8 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
auto Load = IRB.CreateLoad(Int8Ty, CounterPtr);
auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1));
auto Store = IRB.CreateStore(Inc, CounterPtr);
- SetNoSanitizeMetadata(Load);
- SetNoSanitizeMetadata(Store);
+ Load->setNoSanitizeMetadata();
+ Store->setNoSanitizeMetadata();
}
if (Options.InlineBoolFlag) {
auto FlagPtr = IRB.CreateGEP(
@@ -1006,8 +1000,8 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
SplitBlockAndInsertIfThen(IRB.CreateIsNull(Load), &*IP, false);
IRBuilder<> ThenIRB(ThenTerm);
auto Store = ThenIRB.CreateStore(ConstantInt::getTrue(Int1Ty), FlagPtr);
- SetNoSanitizeMetadata(Load);
- SetNoSanitizeMetadata(Store);
+ Load->setNoSanitizeMetadata();
+ Store->setNoSanitizeMetadata();
}
if (Options.StackDepth && IsEntryBB && !IsLeafFunc) {
// Check stack depth. If it's the deepest so far, record it.
@@ -1023,8 +1017,8 @@ void ModuleSanitizerCoverage::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
auto ThenTerm = SplitBlockAndInsertIfThen(IsStackLower, &*IP, false);
IRBuilder<> ThenIRB(ThenTerm);
auto Store = ThenIRB.CreateStore(FrameAddrInt, SanCovLowestStack);
- SetNoSanitizeMetadata(LowestStack);
- SetNoSanitizeMetadata(Store);
+ LowestStack->setNoSanitizeMetadata();
+ Store->setNoSanitizeMetadata();
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index a127e81ce643..ce35eefb63fa 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -689,7 +689,7 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
// replaced back with intrinsics. If that becomes wrong at some point,
// we will need to call e.g. __tsan_memset to avoid the intrinsics.
bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
- IRBuilder<> IRB(I);
+ InstrumentationIRBuilder IRB(I);
if (MemSetInst *M = dyn_cast<MemSetInst>(I)) {
IRB.CreateCall(
MemsetFn,
@@ -813,8 +813,6 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
int ThreadSanitizer::getMemoryAccessFuncIndex(Type *OrigTy, Value *Addr,
const DataLayout &DL) {
assert(OrigTy->isSized());
- assert(
- cast<PointerType>(Addr->getType())->isOpaqueOrPointeeTypeMatches(OrigTy));
uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
if (TypeSize != 8 && TypeSize != 16 &&
TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
@@ -822,7 +820,7 @@ int ThreadSanitizer::getMemoryAccessFuncIndex(Type *OrigTy, Value *Addr,
// Ignore all unusual sizes.
return -1;
}
- size_t Idx = countTrailingZeros(TypeSize / 8);
+ size_t Idx = llvm::countr_zero(TypeSize / 8);
assert(Idx < kNumberOfAccessSizes);
return Idx;
}
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARC.h b/llvm/lib/Transforms/ObjCARC/ObjCARC.h
index d4570ff908f1..9e68bd574851 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARC.h
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARC.h
@@ -22,9 +22,9 @@
#ifndef LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H
#define LLVM_LIB_TRANSFORMS_OBJCARC_OBJCARC_H
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
#include "llvm/Analysis/ObjCARCUtil.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/Transforms/Utils/Local.h"
namespace llvm {
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index ab90ef090ae0..c397ab63f388 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -31,9 +31,9 @@
#include "ProvenanceAnalysis.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Operator.h"
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index a374958f9707..adf86526ebf1 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -36,7 +36,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/ObjCARCAliasAnalysis.h"
#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
#include "llvm/Analysis/ObjCARCInstKind.h"
@@ -46,6 +45,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstIterator.h"
@@ -933,8 +933,8 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(Function &F, Instruction *Inst,
if (IsNullOrUndef(CI->getArgOperand(0))) {
Changed = true;
new StoreInst(ConstantInt::getTrue(CI->getContext()),
- UndefValue::get(Type::getInt1PtrTy(CI->getContext())), CI);
- Value *NewValue = UndefValue::get(CI->getType());
+ PoisonValue::get(Type::getInt1PtrTy(CI->getContext())), CI);
+ Value *NewValue = PoisonValue::get(CI->getType());
LLVM_DEBUG(
dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
"\nOld = "
@@ -952,9 +952,9 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(Function &F, Instruction *Inst,
IsNullOrUndef(CI->getArgOperand(1))) {
Changed = true;
new StoreInst(ConstantInt::getTrue(CI->getContext()),
- UndefValue::get(Type::getInt1PtrTy(CI->getContext())), CI);
+ PoisonValue::get(Type::getInt1PtrTy(CI->getContext())), CI);
- Value *NewValue = UndefValue::get(CI->getType());
+ Value *NewValue = PoisonValue::get(CI->getType());
LLVM_DEBUG(
dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
"\nOld = "
diff --git a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
index 2fa25a79ae9d..23855231c5b9 100644
--- a/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp
@@ -42,40 +42,21 @@ bool ProvenanceAnalysis::relatedSelect(const SelectInst *A,
const Value *B) {
// If the values are Selects with the same condition, we can do a more precise
// check: just check for relations between the values on corresponding arms.
- if (const SelectInst *SB = dyn_cast<SelectInst>(B)) {
+ if (const SelectInst *SB = dyn_cast<SelectInst>(B))
if (A->getCondition() == SB->getCondition())
return related(A->getTrueValue(), SB->getTrueValue()) ||
related(A->getFalseValue(), SB->getFalseValue());
- // Check both arms of B individually. Return false if neither arm is related
- // to A.
- if (!(related(SB->getTrueValue(), A) || related(SB->getFalseValue(), A)))
- return false;
- }
-
// Check both arms of the Select node individually.
return related(A->getTrueValue(), B) || related(A->getFalseValue(), B);
}
bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
const Value *B) {
-
- auto comparePHISources = [this](const PHINode *PNA, const Value *B) -> bool {
- // Check each unique source of the PHI node against B.
- SmallPtrSet<const Value *, 4> UniqueSrc;
- for (Value *PV1 : PNA->incoming_values()) {
- if (UniqueSrc.insert(PV1).second && related(PV1, B))
- return true;
- }
-
- // All of the arms checked out.
- return false;
- };
-
- if (const PHINode *PNB = dyn_cast<PHINode>(B)) {
- // If the values are PHIs in the same block, we can do a more precise as
- // well as efficient check: just check for relations between the values on
- // corresponding edges.
+ // If the values are PHIs in the same block, we can do a more precise as well
+ // as efficient check: just check for relations between the values on
+ // corresponding edges.
+ if (const PHINode *PNB = dyn_cast<PHINode>(B))
if (PNB->getParent() == A->getParent()) {
for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
if (related(A->getIncomingValue(i),
@@ -84,11 +65,15 @@ bool ProvenanceAnalysis::relatedPHI(const PHINode *A,
return false;
}
- if (!comparePHISources(PNB, A))
- return false;
+ // Check each unique source of the PHI node against B.
+ SmallPtrSet<const Value *, 4> UniqueSrc;
+ for (Value *PV1 : A->incoming_values()) {
+ if (UniqueSrc.insert(PV1).second && related(PV1, B))
+ return true;
}
- return comparePHISources(A, B);
+ // All of the arms checked out.
+ return false;
}
/// Test if the value of P, or any value covered by its provenance, is ever
@@ -140,19 +125,22 @@ bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
bool BIsIdentified = IsObjCIdentifiedObject(B);
// An ObjC-Identified object can't alias a load if it is never locally stored.
-
- // Check for an obvious escape.
- if ((AIsIdentified && isa<LoadInst>(B) && !IsStoredObjCPointer(A)) ||
- (BIsIdentified && isa<LoadInst>(A) && !IsStoredObjCPointer(B)))
- return false;
-
- if ((AIsIdentified && isa<LoadInst>(B)) ||
- (BIsIdentified && isa<LoadInst>(A)))
- return true;
-
- // Both pointers are identified and escapes aren't an evident problem.
- if (AIsIdentified && BIsIdentified && !isa<LoadInst>(A) && !isa<LoadInst>(B))
- return false;
+ if (AIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(B))
+ return IsStoredObjCPointer(A);
+ if (BIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(A))
+ return IsStoredObjCPointer(B);
+ // Both pointers are identified and escapes aren't an evident problem.
+ return false;
+ }
+ } else if (BIsIdentified) {
+ // Check for an obvious escape.
+ if (isa<LoadInst>(A))
+ return IsStoredObjCPointer(B);
+ }
// Special handling for PHI and Select.
if (const PHINode *PN = dyn_cast<PHINode>(A))
@@ -179,15 +167,12 @@ bool ProvenanceAnalysis::related(const Value *A, const Value *B) {
// Begin by inserting a conservative value into the map. If the insertion
// fails, we have the answer already. If it succeeds, leave it there until we
// compute the real answer to guard against recursive queries.
- if (A > B) std::swap(A, B);
std::pair<CachedResultsTy::iterator, bool> Pair =
CachedResults.insert(std::make_pair(ValuePairTy(A, B), true));
if (!Pair.second)
return Pair.first->second;
bool Result = relatedCheck(A, B);
- assert(relatedCheck(B, A) == Result &&
- "relatedCheck result depending on order of parameters!");
CachedResults[ValuePairTy(A, B)] = Result;
return Result;
}
diff --git a/llvm/lib/Transforms/Scalar/ADCE.cpp b/llvm/lib/Transforms/Scalar/ADCE.cpp
index 253293582945..24354211341f 100644
--- a/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -26,6 +26,7 @@
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/Analysis/MemorySSA.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -42,14 +43,11 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <cstddef>
@@ -113,6 +111,12 @@ struct BlockInfoType {
bool terminatorIsLive() const { return TerminatorLiveInfo->Live; }
};
+struct ADCEChanged {
+ bool ChangedAnything = false;
+ bool ChangedNonDebugInstr = false;
+ bool ChangedControlFlow = false;
+};
+
class AggressiveDeadCodeElimination {
Function &F;
@@ -179,7 +183,7 @@ class AggressiveDeadCodeElimination {
/// Remove instructions not marked live, return if any instruction was
/// removed.
- bool removeDeadInstructions();
+ ADCEChanged removeDeadInstructions();
/// Identify connected sections of the control flow graph which have
/// dead terminators and rewrite the control flow graph to remove them.
@@ -197,12 +201,12 @@ public:
PostDominatorTree &PDT)
: F(F), DT(DT), PDT(PDT) {}
- bool performDeadCodeElimination();
+ ADCEChanged performDeadCodeElimination();
};
} // end anonymous namespace
-bool AggressiveDeadCodeElimination::performDeadCodeElimination() {
+ADCEChanged AggressiveDeadCodeElimination::performDeadCodeElimination() {
initialize();
markLiveInstructions();
return removeDeadInstructions();
@@ -504,9 +508,10 @@ void AggressiveDeadCodeElimination::markLiveBranchesFromControlDependences() {
// Routines to update the CFG and SSA information before removing dead code.
//
//===----------------------------------------------------------------------===//
-bool AggressiveDeadCodeElimination::removeDeadInstructions() {
+ADCEChanged AggressiveDeadCodeElimination::removeDeadInstructions() {
+ ADCEChanged Changed;
// Updates control and dataflow around dead blocks
- bool RegionsUpdated = updateDeadRegions();
+ Changed.ChangedControlFlow = updateDeadRegions();
LLVM_DEBUG({
for (Instruction &I : instructions(F)) {
@@ -554,6 +559,8 @@ bool AggressiveDeadCodeElimination::removeDeadInstructions() {
continue;
// Fallthrough and drop the intrinsic.
+ } else {
+ Changed.ChangedNonDebugInstr = true;
}
// Prepare to delete.
@@ -569,7 +576,9 @@ bool AggressiveDeadCodeElimination::removeDeadInstructions() {
I->eraseFromParent();
}
- return !Worklist.empty() || RegionsUpdated;
+ Changed.ChangedAnything = Changed.ChangedControlFlow || !Worklist.empty();
+
+ return Changed;
}
// A dead region is the set of dead blocks with a common live post-dominator.
@@ -699,62 +708,25 @@ PreservedAnalyses ADCEPass::run(Function &F, FunctionAnalysisManager &FAM) {
// to update analysis if it is already available.
auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
auto &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
- if (!AggressiveDeadCodeElimination(F, DT, PDT).performDeadCodeElimination())
+ ADCEChanged Changed =
+ AggressiveDeadCodeElimination(F, DT, PDT).performDeadCodeElimination();
+ if (!Changed.ChangedAnything)
return PreservedAnalyses::all();
PreservedAnalyses PA;
- // TODO: We could track if we have actually done CFG changes.
- if (!RemoveControlFlowFlag)
+ if (!Changed.ChangedControlFlow) {
PA.preserveSet<CFGAnalyses>();
- else {
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<PostDominatorTreeAnalysis>();
- }
- return PA;
-}
-
-namespace {
-
-struct ADCELegacyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
-
- ADCELegacyPass() : FunctionPass(ID) {
- initializeADCELegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- // ADCE does not need DominatorTree, but require DominatorTree here
- // to update analysis if it is already available.
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- return AggressiveDeadCodeElimination(F, DT, PDT)
- .performDeadCodeElimination();
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<PostDominatorTreeWrapperPass>();
- if (!RemoveControlFlowFlag)
- AU.setPreservesCFG();
- else {
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<PostDominatorTreeWrapperPass>();
+ if (!Changed.ChangedNonDebugInstr) {
+ // Only removing debug instructions does not affect MemorySSA.
+ //
+ // Therefore we preserve MemorySSA when only removing debug instructions
+ // since otherwise later passes may behave differently which then makes
+ // the presence of debug info affect code generation.
+ PA.preserve<MemorySSAAnalysis>();
}
- AU.addPreserved<GlobalsAAWrapperPass>();
}
-};
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<PostDominatorTreeAnalysis>();
-} // end anonymous namespace
-
-char ADCELegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(ADCELegacyPass, "adce",
- "Aggressive Dead Code Elimination", false, false)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(ADCELegacyPass, "adce", "Aggressive Dead Code Elimination",
- false, false)
-
-FunctionPass *llvm::createAggressiveDCEPass() { return new ADCELegacyPass(); }
+ return PA;
+}
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index f419f7bd769f..b259c76fc3a5 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -28,13 +28,10 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
-#define AA_NAME "alignment-from-assumptions"
-#define DEBUG_TYPE AA_NAME
+#define DEBUG_TYPE "alignment-from-assumptions"
using namespace llvm;
STATISTIC(NumLoadAlignChanged,
@@ -44,46 +41,6 @@ STATISTIC(NumStoreAlignChanged,
STATISTIC(NumMemIntAlignChanged,
"Number of memory intrinsics changed by alignment assumptions");
-namespace {
-struct AlignmentFromAssumptions : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- AlignmentFromAssumptions() : FunctionPass(ID) {
- initializeAlignmentFromAssumptionsPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
-
- AU.setPreservesCFG();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- }
-
- AlignmentFromAssumptionsPass Impl;
-};
-}
-
-char AlignmentFromAssumptions::ID = 0;
-static const char aip_name[] = "Alignment from assumptions";
-INITIALIZE_PASS_BEGIN(AlignmentFromAssumptions, AA_NAME,
- aip_name, false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(AlignmentFromAssumptions, AA_NAME,
- aip_name, false, false)
-
-FunctionPass *llvm::createAlignmentFromAssumptionsPass() {
- return new AlignmentFromAssumptions();
-}
-
// Given an expression for the (constant) alignment, AlignSCEV, and an
// expression for the displacement between a pointer and the aligned address,
// DiffSCEV, compute the alignment of the displaced pointer if it can be reduced
@@ -317,17 +274,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
return true;
}
-bool AlignmentFromAssumptions::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- return Impl.runImpl(F, AC, SE, DT);
-}
-
bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
ScalarEvolution *SE_,
DominatorTree *DT_) {
diff --git a/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp b/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
index 79f7e253d45b..b182f46cc515 100644
--- a/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
+++ b/llvm/lib/Transforms/Scalar/AnnotationRemarks.cpp
@@ -16,7 +16,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/MemoryOpRemark.h"
using namespace llvm;
@@ -58,7 +57,12 @@ static void runImpl(Function &F, const TargetLibraryInfo &TLI) {
for (const MDOperand &Op :
I.getMetadata(LLVMContext::MD_annotation)->operands()) {
- auto Iter = Mapping.insert({cast<MDString>(Op.get())->getString(), 0});
+ StringRef AnnotationStr =
+ isa<MDString>(Op.get())
+ ? cast<MDString>(Op.get())->getString()
+ : cast<MDString>(cast<MDTuple>(Op.get())->getOperand(0).get())
+ ->getString();
+ auto Iter = Mapping.insert({AnnotationStr, 0});
Iter.first->second++;
}
}
diff --git a/llvm/lib/Transforms/Scalar/BDCE.cpp b/llvm/lib/Transforms/Scalar/BDCE.cpp
index 187927b3dede..1fa2c75b0f42 100644
--- a/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -23,11 +23,8 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
@@ -116,7 +113,7 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
const uint32_t SrcBitSize = SE->getSrcTy()->getScalarSizeInBits();
auto *const DstTy = SE->getDestTy();
const uint32_t DestBitSize = DstTy->getScalarSizeInBits();
- if (Demanded.countLeadingZeros() >= (DestBitSize - SrcBitSize)) {
+ if (Demanded.countl_zero() >= (DestBitSize - SrcBitSize)) {
clearAssumptionsOfUsers(SE, DB);
IRBuilder<> Builder(SE);
I.replaceAllUsesWith(
@@ -173,34 +170,3 @@ PreservedAnalyses BDCEPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserveSet<CFGAnalyses>();
return PA;
}
-
-namespace {
-struct BDCELegacyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- BDCELegacyPass() : FunctionPass(ID) {
- initializeBDCELegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
- auto &DB = getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
- return bitTrackingDCE(F, DB);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<DemandedBitsWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-};
-}
-
-char BDCELegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(BDCELegacyPass, "bdce",
- "Bit-Tracking Dead Code Elimination", false, false)
-INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
-INITIALIZE_PASS_END(BDCELegacyPass, "bdce",
- "Bit-Tracking Dead Code Elimination", false, false)
-
-FunctionPass *llvm::createBitTrackingDCEPass() { return new BDCELegacyPass(); }
diff --git a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
index 6665a927826d..aeb7c5d461f0 100644
--- a/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
+++ b/llvm/lib/Transforms/Scalar/CallSiteSplitting.cpp
@@ -535,45 +535,6 @@ static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
return Changed;
}
-namespace {
-struct CallSiteSplittingLegacyPass : public FunctionPass {
- static char ID;
- CallSiteSplittingLegacyPass() : FunctionPass(ID) {
- initializeCallSiteSplittingLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- FunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return doCallSiteSplitting(F, TLI, TTI, DT);
- }
-};
-} // namespace
-
-char CallSiteSplittingLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting",
- "Call-site splitting", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting",
- "Call-site splitting", false, false)
-FunctionPass *llvm::createCallSiteSplittingPass() {
- return new CallSiteSplittingLegacyPass();
-}
-
PreservedAnalyses CallSiteSplittingPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
diff --git a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 8858545bbc5d..611e64bd0976 100644
--- a/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -155,16 +155,19 @@ bool ConstantHoistingLegacyPass::runOnFunction(Function &Fn) {
Fn.getEntryBlock(),
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
- if (MadeChange) {
- LLVM_DEBUG(dbgs() << "********** Function after Constant Hoisting: "
- << Fn.getName() << '\n');
- LLVM_DEBUG(dbgs() << Fn);
- }
LLVM_DEBUG(dbgs() << "********** End Constant Hoisting **********\n");
return MadeChange;
}
+void ConstantHoistingPass::collectMatInsertPts(
+ const RebasedConstantListType &RebasedConstants,
+ SmallVectorImpl<Instruction *> &MatInsertPts) const {
+ for (const RebasedConstantInfo &RCI : RebasedConstants)
+ for (const ConstantUser &U : RCI.Uses)
+ MatInsertPts.emplace_back(findMatInsertPt(U.Inst, U.OpndIdx));
+}
+
/// Find the constant materialization insertion point.
Instruction *ConstantHoistingPass::findMatInsertPt(Instruction *Inst,
unsigned Idx) const {
@@ -312,14 +315,15 @@ static void findBestInsertionSet(DominatorTree &DT, BlockFrequencyInfo &BFI,
/// Find an insertion point that dominates all uses.
SetVector<Instruction *> ConstantHoistingPass::findConstantInsertionPoint(
- const ConstantInfo &ConstInfo) const {
+ const ConstantInfo &ConstInfo,
+ const ArrayRef<Instruction *> MatInsertPts) const {
assert(!ConstInfo.RebasedConstants.empty() && "Invalid constant info entry.");
// Collect all basic blocks.
SetVector<BasicBlock *> BBs;
SetVector<Instruction *> InsertPts;
- for (auto const &RCI : ConstInfo.RebasedConstants)
- for (auto const &U : RCI.Uses)
- BBs.insert(findMatInsertPt(U.Inst, U.OpndIdx)->getParent());
+
+ for (Instruction *MatInsertPt : MatInsertPts)
+ BBs.insert(MatInsertPt->getParent());
if (BBs.count(Entry)) {
InsertPts.insert(&Entry->front());
@@ -328,12 +332,8 @@ SetVector<Instruction *> ConstantHoistingPass::findConstantInsertionPoint(
if (BFI) {
findBestInsertionSet(*DT, *BFI, Entry, BBs);
- for (auto *BB : BBs) {
- BasicBlock::iterator InsertPt = BB->begin();
- for (; isa<PHINode>(InsertPt) || InsertPt->isEHPad(); ++InsertPt)
- ;
- InsertPts.insert(&*InsertPt);
- }
+ for (BasicBlock *BB : BBs)
+ InsertPts.insert(&*BB->getFirstInsertionPt());
return InsertPts;
}
@@ -410,8 +410,8 @@ void ConstantHoistingPass::collectConstantCandidates(
// Get offset from the base GV.
PointerType *GVPtrTy = cast<PointerType>(BaseGV->getType());
- IntegerType *PtrIntTy = DL->getIntPtrType(*Ctx, GVPtrTy->getAddressSpace());
- APInt Offset(DL->getTypeSizeInBits(PtrIntTy), /*val*/0, /*isSigned*/true);
+ IntegerType *OffsetTy = DL->getIndexType(*Ctx, GVPtrTy->getAddressSpace());
+ APInt Offset(DL->getTypeSizeInBits(OffsetTy), /*val*/ 0, /*isSigned*/ true);
auto *GEPO = cast<GEPOperator>(ConstExpr);
// TODO: If we have a mix of inbounds and non-inbounds GEPs, then basing a
@@ -432,7 +432,7 @@ void ConstantHoistingPass::collectConstantCandidates(
// to be cheaper than compute it by <Base + Offset>, which can be lowered to
// an ADD instruction or folded into Load/Store instruction.
InstructionCost Cost =
- TTI->getIntImmCostInst(Instruction::Add, 1, Offset, PtrIntTy,
+ TTI->getIntImmCostInst(Instruction::Add, 1, Offset, OffsetTy,
TargetTransformInfo::TCK_SizeAndLatency, Inst);
ConstCandVecType &ExprCandVec = ConstGEPCandMap[BaseGV];
ConstCandMapType::iterator Itr;
@@ -751,45 +751,41 @@ static bool updateOperand(Instruction *Inst, unsigned Idx, Instruction *Mat) {
/// Emit materialization code for all rebased constants and update their
/// users.
void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
- Constant *Offset,
- Type *Ty,
- const ConstantUser &ConstUser) {
+ UserAdjustment *Adj) {
Instruction *Mat = Base;
// The same offset can be dereferenced to different types in nested struct.
- if (!Offset && Ty && Ty != Base->getType())
- Offset = ConstantInt::get(Type::getInt32Ty(*Ctx), 0);
+ if (!Adj->Offset && Adj->Ty && Adj->Ty != Base->getType())
+ Adj->Offset = ConstantInt::get(Type::getInt32Ty(*Ctx), 0);
- if (Offset) {
- Instruction *InsertionPt = findMatInsertPt(ConstUser.Inst,
- ConstUser.OpndIdx);
- if (Ty) {
+ if (Adj->Offset) {
+ if (Adj->Ty) {
// Constant being rebased is a ConstantExpr.
- PointerType *Int8PtrTy = Type::getInt8PtrTy(*Ctx,
- cast<PointerType>(Ty)->getAddressSpace());
- Base = new BitCastInst(Base, Int8PtrTy, "base_bitcast", InsertionPt);
- Mat = GetElementPtrInst::Create(Type::getInt8Ty(*Ctx), Base,
- Offset, "mat_gep", InsertionPt);
- Mat = new BitCastInst(Mat, Ty, "mat_bitcast", InsertionPt);
+ PointerType *Int8PtrTy = Type::getInt8PtrTy(
+ *Ctx, cast<PointerType>(Adj->Ty)->getAddressSpace());
+ Base = new BitCastInst(Base, Int8PtrTy, "base_bitcast", Adj->MatInsertPt);
+ Mat = GetElementPtrInst::Create(Type::getInt8Ty(*Ctx), Base, Adj->Offset,
+ "mat_gep", Adj->MatInsertPt);
+ Mat = new BitCastInst(Mat, Adj->Ty, "mat_bitcast", Adj->MatInsertPt);
} else
// Constant being rebased is a ConstantInt.
- Mat = BinaryOperator::Create(Instruction::Add, Base, Offset,
- "const_mat", InsertionPt);
+ Mat = BinaryOperator::Create(Instruction::Add, Base, Adj->Offset,
+ "const_mat", Adj->MatInsertPt);
LLVM_DEBUG(dbgs() << "Materialize constant (" << *Base->getOperand(0)
- << " + " << *Offset << ") in BB "
+ << " + " << *Adj->Offset << ") in BB "
<< Mat->getParent()->getName() << '\n'
<< *Mat << '\n');
- Mat->setDebugLoc(ConstUser.Inst->getDebugLoc());
+ Mat->setDebugLoc(Adj->User.Inst->getDebugLoc());
}
- Value *Opnd = ConstUser.Inst->getOperand(ConstUser.OpndIdx);
+ Value *Opnd = Adj->User.Inst->getOperand(Adj->User.OpndIdx);
// Visit constant integer.
if (isa<ConstantInt>(Opnd)) {
- LLVM_DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
- if (!updateOperand(ConstUser.Inst, ConstUser.OpndIdx, Mat) && Offset)
+ LLVM_DEBUG(dbgs() << "Update: " << *Adj->User.Inst << '\n');
+ if (!updateOperand(Adj->User.Inst, Adj->User.OpndIdx, Mat) && Adj->Offset)
Mat->eraseFromParent();
- LLVM_DEBUG(dbgs() << "To : " << *ConstUser.Inst << '\n');
+ LLVM_DEBUG(dbgs() << "To : " << *Adj->User.Inst << '\n');
return;
}
@@ -809,9 +805,9 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
<< "To : " << *ClonedCastInst << '\n');
}
- LLVM_DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
- updateOperand(ConstUser.Inst, ConstUser.OpndIdx, ClonedCastInst);
- LLVM_DEBUG(dbgs() << "To : " << *ConstUser.Inst << '\n');
+ LLVM_DEBUG(dbgs() << "Update: " << *Adj->User.Inst << '\n');
+ updateOperand(Adj->User.Inst, Adj->User.OpndIdx, ClonedCastInst);
+ LLVM_DEBUG(dbgs() << "To : " << *Adj->User.Inst << '\n');
return;
}
@@ -819,28 +815,27 @@ void ConstantHoistingPass::emitBaseConstants(Instruction *Base,
if (auto ConstExpr = dyn_cast<ConstantExpr>(Opnd)) {
if (isa<GEPOperator>(ConstExpr)) {
// Operand is a ConstantGEP, replace it.
- updateOperand(ConstUser.Inst, ConstUser.OpndIdx, Mat);
+ updateOperand(Adj->User.Inst, Adj->User.OpndIdx, Mat);
return;
}
// Aside from constant GEPs, only constant cast expressions are collected.
assert(ConstExpr->isCast() && "ConstExpr should be a cast");
- Instruction *ConstExprInst = ConstExpr->getAsInstruction(
- findMatInsertPt(ConstUser.Inst, ConstUser.OpndIdx));
+ Instruction *ConstExprInst = ConstExpr->getAsInstruction(Adj->MatInsertPt);
ConstExprInst->setOperand(0, Mat);
// Use the same debug location as the instruction we are about to update.
- ConstExprInst->setDebugLoc(ConstUser.Inst->getDebugLoc());
+ ConstExprInst->setDebugLoc(Adj->User.Inst->getDebugLoc());
LLVM_DEBUG(dbgs() << "Create instruction: " << *ConstExprInst << '\n'
<< "From : " << *ConstExpr << '\n');
- LLVM_DEBUG(dbgs() << "Update: " << *ConstUser.Inst << '\n');
- if (!updateOperand(ConstUser.Inst, ConstUser.OpndIdx, ConstExprInst)) {
+ LLVM_DEBUG(dbgs() << "Update: " << *Adj->User.Inst << '\n');
+ if (!updateOperand(Adj->User.Inst, Adj->User.OpndIdx, ConstExprInst)) {
ConstExprInst->eraseFromParent();
- if (Offset)
+ if (Adj->Offset)
Mat->eraseFromParent();
}
- LLVM_DEBUG(dbgs() << "To : " << *ConstUser.Inst << '\n');
+ LLVM_DEBUG(dbgs() << "To : " << *Adj->User.Inst << '\n');
return;
}
}
@@ -851,8 +846,11 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
bool MadeChange = false;
SmallVectorImpl<consthoist::ConstantInfo> &ConstInfoVec =
BaseGV ? ConstGEPInfoMap[BaseGV] : ConstIntInfoVec;
- for (auto const &ConstInfo : ConstInfoVec) {
- SetVector<Instruction *> IPSet = findConstantInsertionPoint(ConstInfo);
+ for (const consthoist::ConstantInfo &ConstInfo : ConstInfoVec) {
+ SmallVector<Instruction *, 4> MatInsertPts;
+ collectMatInsertPts(ConstInfo.RebasedConstants, MatInsertPts);
+ SetVector<Instruction *> IPSet =
+ findConstantInsertionPoint(ConstInfo, MatInsertPts);
// We can have an empty set if the function contains unreachable blocks.
if (IPSet.empty())
continue;
@@ -862,22 +860,21 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
unsigned NotRebasedNum = 0;
for (Instruction *IP : IPSet) {
// First, collect constants depending on this IP of the base.
- unsigned Uses = 0;
- using RebasedUse = std::tuple<Constant *, Type *, ConstantUser>;
- SmallVector<RebasedUse, 4> ToBeRebased;
+ UsesNum = 0;
+ SmallVector<UserAdjustment, 4> ToBeRebased;
+ unsigned MatCtr = 0;
for (auto const &RCI : ConstInfo.RebasedConstants) {
+ UsesNum += RCI.Uses.size();
for (auto const &U : RCI.Uses) {
- Uses++;
- BasicBlock *OrigMatInsertBB =
- findMatInsertPt(U.Inst, U.OpndIdx)->getParent();
+ Instruction *MatInsertPt = MatInsertPts[MatCtr++];
+ BasicBlock *OrigMatInsertBB = MatInsertPt->getParent();
// If Base constant is to be inserted in multiple places,
// generate rebase for U using the Base dominating U.
if (IPSet.size() == 1 ||
DT->dominates(IP->getParent(), OrigMatInsertBB))
- ToBeRebased.push_back(RebasedUse(RCI.Offset, RCI.Ty, U));
+ ToBeRebased.emplace_back(RCI.Offset, RCI.Ty, MatInsertPt, U);
}
}
- UsesNum = Uses;
// If only few constants depend on this IP of base, skip rebasing,
// assuming the base and the rebased have the same materialization cost.
@@ -905,15 +902,12 @@ bool ConstantHoistingPass::emitBaseConstants(GlobalVariable *BaseGV) {
<< *Base << '\n');
// Emit materialization code for rebased constants depending on this IP.
- for (auto const &R : ToBeRebased) {
- Constant *Off = std::get<0>(R);
- Type *Ty = std::get<1>(R);
- ConstantUser U = std::get<2>(R);
- emitBaseConstants(Base, Off, Ty, U);
+ for (UserAdjustment &R : ToBeRebased) {
+ emitBaseConstants(Base, &R);
ReBasesNum++;
// Use the same debug location as the last user of the constant.
Base->setDebugLoc(DILocation::getMergedLocation(
- Base->getDebugLoc(), U.Inst->getDebugLoc()));
+ Base->getDebugLoc(), R.User.Inst->getDebugLoc()));
}
assert(!Base->use_empty() && "The use list is empty!?");
assert(isa<Instruction>(Base->user_back()) &&
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 12fcb6aa9846..15628d32280d 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ConstraintSystem.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Dominators.h"
@@ -26,13 +27,18 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Verifier.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
#include <cmath>
+#include <optional>
#include <string>
using namespace llvm;
@@ -48,6 +54,10 @@ static cl::opt<unsigned>
MaxRows("constraint-elimination-max-rows", cl::init(500), cl::Hidden,
cl::desc("Maximum number of rows to keep in constraint system"));
+static cl::opt<bool> DumpReproducers(
+ "constraint-elimination-dump-reproducers", cl::init(false), cl::Hidden,
+ cl::desc("Dump IR to reproduce successful transformations."));
+
static int64_t MaxConstraintValue = std::numeric_limits<int64_t>::max();
static int64_t MinSignedConstraintValue = std::numeric_limits<int64_t>::min();
@@ -65,7 +75,86 @@ static int64_t addWithOverflow(int64_t A, int64_t B) {
return Result;
}
+static Instruction *getContextInstForUse(Use &U) {
+ Instruction *UserI = cast<Instruction>(U.getUser());
+ if (auto *Phi = dyn_cast<PHINode>(UserI))
+ UserI = Phi->getIncomingBlock(U)->getTerminator();
+ return UserI;
+}
+
namespace {
+/// Represents either
+/// * a condition that holds on entry to a block (=conditional fact)
+/// * an assume (=assume fact)
+/// * a use of a compare instruction to simplify.
+/// It also tracks the Dominator DFS in and out numbers for each entry.
+struct FactOrCheck {
+ union {
+ Instruction *Inst;
+ Use *U;
+ };
+ unsigned NumIn;
+ unsigned NumOut;
+ bool HasInst;
+ bool Not;
+
+ FactOrCheck(DomTreeNode *DTN, Instruction *Inst, bool Not)
+ : Inst(Inst), NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()),
+ HasInst(true), Not(Not) {}
+
+ FactOrCheck(DomTreeNode *DTN, Use *U)
+ : U(U), NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()),
+ HasInst(false), Not(false) {}
+
+ static FactOrCheck getFact(DomTreeNode *DTN, Instruction *Inst,
+ bool Not = false) {
+ return FactOrCheck(DTN, Inst, Not);
+ }
+
+ static FactOrCheck getCheck(DomTreeNode *DTN, Use *U) {
+ return FactOrCheck(DTN, U);
+ }
+
+ static FactOrCheck getCheck(DomTreeNode *DTN, CallInst *CI) {
+ return FactOrCheck(DTN, CI, false);
+ }
+
+ bool isCheck() const {
+ return !HasInst ||
+ match(Inst, m_Intrinsic<Intrinsic::ssub_with_overflow>());
+ }
+
+ Instruction *getContextInst() const {
+ if (HasInst)
+ return Inst;
+ return getContextInstForUse(*U);
+ }
+ Instruction *getInstructionToSimplify() const {
+ assert(isCheck());
+ if (HasInst)
+ return Inst;
+ // The use may have been simplified to a constant already.
+ return dyn_cast<Instruction>(*U);
+ }
+ bool isConditionFact() const { return !isCheck() && isa<CmpInst>(Inst); }
+};
+
+/// Keep state required to build worklist.
+struct State {
+ DominatorTree &DT;
+ SmallVector<FactOrCheck, 64> WorkList;
+
+ State(DominatorTree &DT) : DT(DT) {}
+
+ /// Process block \p BB and add known facts to work-list.
+ void addInfoFor(BasicBlock &BB);
+
+ /// Returns true if we can add a known condition from BB to its successor
+ /// block Succ.
+ bool canAddSuccessor(BasicBlock &BB, BasicBlock *Succ) const {
+ return DT.dominates(BasicBlockEdge(&BB, Succ), Succ);
+ }
+};
class ConstraintInfo;
@@ -100,12 +189,13 @@ struct ConstraintTy {
SmallVector<SmallVector<int64_t, 8>> ExtraInfo;
bool IsSigned = false;
- bool IsEq = false;
ConstraintTy() = default;
- ConstraintTy(SmallVector<int64_t, 8> Coefficients, bool IsSigned)
- : Coefficients(Coefficients), IsSigned(IsSigned) {}
+ ConstraintTy(SmallVector<int64_t, 8> Coefficients, bool IsSigned, bool IsEq,
+ bool IsNe)
+ : Coefficients(Coefficients), IsSigned(IsSigned), IsEq(IsEq), IsNe(IsNe) {
+ }
unsigned size() const { return Coefficients.size(); }
@@ -114,6 +204,21 @@ struct ConstraintTy {
/// Returns true if all preconditions for this list of constraints are
/// satisfied given \p CS and the corresponding \p Value2Index mapping.
bool isValid(const ConstraintInfo &Info) const;
+
+ bool isEq() const { return IsEq; }
+
+ bool isNe() const { return IsNe; }
+
+ /// Check if the current constraint is implied by the given ConstraintSystem.
+ ///
+ /// \return true or false if the constraint is proven to be respectively true,
+ /// or false. When the constraint cannot be proven to be either true or false,
+ /// std::nullopt is returned.
+ std::optional<bool> isImpliedBy(const ConstraintSystem &CS) const;
+
+private:
+ bool IsEq = false;
+ bool IsNe = false;
};
/// Wrapper encapsulating separate constraint systems and corresponding value
@@ -123,8 +228,6 @@ struct ConstraintTy {
/// based on signed-ness, certain conditions can be transferred between the two
/// systems.
class ConstraintInfo {
- DenseMap<Value *, unsigned> UnsignedValue2Index;
- DenseMap<Value *, unsigned> SignedValue2Index;
ConstraintSystem UnsignedCS;
ConstraintSystem SignedCS;
@@ -132,13 +235,14 @@ class ConstraintInfo {
const DataLayout &DL;
public:
- ConstraintInfo(const DataLayout &DL) : DL(DL) {}
+ ConstraintInfo(const DataLayout &DL, ArrayRef<Value *> FunctionArgs)
+ : UnsignedCS(FunctionArgs), SignedCS(FunctionArgs), DL(DL) {}
DenseMap<Value *, unsigned> &getValue2Index(bool Signed) {
- return Signed ? SignedValue2Index : UnsignedValue2Index;
+ return Signed ? SignedCS.getValue2Index() : UnsignedCS.getValue2Index();
}
const DenseMap<Value *, unsigned> &getValue2Index(bool Signed) const {
- return Signed ? SignedValue2Index : UnsignedValue2Index;
+ return Signed ? SignedCS.getValue2Index() : UnsignedCS.getValue2Index();
}
ConstraintSystem &getCS(bool Signed) {
@@ -235,9 +339,8 @@ static bool canUseSExt(ConstantInt *CI) {
}
static Decomposition
-decomposeGEP(GetElementPtrInst &GEP,
- SmallVectorImpl<PreconditionTy> &Preconditions, bool IsSigned,
- const DataLayout &DL) {
+decomposeGEP(GEPOperator &GEP, SmallVectorImpl<PreconditionTy> &Preconditions,
+ bool IsSigned, const DataLayout &DL) {
// Do not reason about pointers where the index size is larger than 64 bits,
// as the coefficients used to encode constraints are 64 bit integers.
if (DL.getIndexTypeSizeInBits(GEP.getPointerOperand()->getType()) > 64)
@@ -257,7 +360,7 @@ decomposeGEP(GetElementPtrInst &GEP,
// Handle the (gep (gep ....), C) case by incrementing the constant
// coefficient of the inner GEP, if C is a constant.
- auto *InnerGEP = dyn_cast<GetElementPtrInst>(GEP.getPointerOperand());
+ auto *InnerGEP = dyn_cast<GEPOperator>(GEP.getPointerOperand());
if (VariableOffsets.empty() && InnerGEP && InnerGEP->getNumOperands() == 2) {
auto Result = decompose(InnerGEP, Preconditions, IsSigned, DL);
Result.add(ConstantOffset.getSExtValue());
@@ -320,6 +423,13 @@ static Decomposition decompose(Value *V,
if (match(V, m_NSWAdd(m_Value(Op0), m_Value(Op1))))
return MergeResults(Op0, Op1, IsSigned);
+ ConstantInt *CI;
+ if (match(V, m_NSWMul(m_Value(Op0), m_ConstantInt(CI)))) {
+ auto Result = decompose(Op0, Preconditions, IsSigned, DL);
+ Result.mul(CI->getSExtValue());
+ return Result;
+ }
+
return V;
}
@@ -329,7 +439,7 @@ static Decomposition decompose(Value *V,
return int64_t(CI->getZExtValue());
}
- if (auto *GEP = dyn_cast<GetElementPtrInst>(V))
+ if (auto *GEP = dyn_cast<GEPOperator>(V))
return decomposeGEP(*GEP, Preconditions, IsSigned, DL);
Value *Op0;
@@ -363,10 +473,17 @@ static Decomposition decompose(Value *V,
return MergeResults(Op0, CI, true);
}
+ // Decompose or as an add if there are no common bits between the operands.
+ if (match(V, m_Or(m_Value(Op0), m_ConstantInt(CI))) &&
+ haveNoCommonBitsSet(Op0, CI, DL)) {
+ return MergeResults(Op0, CI, IsSigned);
+ }
+
if (match(V, m_NUWShl(m_Value(Op1), m_ConstantInt(CI))) && canUseSExt(CI)) {
- int64_t Mult = int64_t(std::pow(int64_t(2), CI->getSExtValue()));
+ if (CI->getSExtValue() < 0 || CI->getSExtValue() >= 64)
+ return {V, IsKnownNonNegative};
auto Result = decompose(Op1, Preconditions, IsSigned, DL);
- Result.mul(Mult);
+ Result.mul(int64_t{1} << CI->getSExtValue());
return Result;
}
@@ -390,6 +507,8 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
SmallVectorImpl<Value *> &NewVariables) const {
assert(NewVariables.empty() && "NewVariables must be empty when passed in");
bool IsEq = false;
+ bool IsNe = false;
+
// Try to convert Pred to one of ULE/SLT/SLE/SLT.
switch (Pred) {
case CmpInst::ICMP_UGT:
@@ -409,10 +528,13 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
}
break;
case CmpInst::ICMP_NE:
- if (!match(Op1, m_Zero()))
- return {};
- Pred = CmpInst::getSwappedPredicate(CmpInst::ICMP_UGT);
- std::swap(Op0, Op1);
+ if (match(Op1, m_Zero())) {
+ Pred = CmpInst::getSwappedPredicate(CmpInst::ICMP_UGT);
+ std::swap(Op0, Op1);
+ } else {
+ IsNe = true;
+ Pred = CmpInst::ICMP_ULE;
+ }
break;
default:
break;
@@ -459,11 +581,10 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
// subtracting all coefficients from B.
ConstraintTy Res(
SmallVector<int64_t, 8>(Value2Index.size() + NewVariables.size() + 1, 0),
- IsSigned);
+ IsSigned, IsEq, IsNe);
// Collect variables that are known to be positive in all uses in the
// constraint.
DenseMap<Value *, bool> KnownNonNegativeVariables;
- Res.IsEq = IsEq;
auto &R = Res.Coefficients;
for (const auto &KV : VariablesA) {
R[GetOrAddIndex(KV.Variable)] += KV.Coefficient;
@@ -473,7 +594,9 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
}
for (const auto &KV : VariablesB) {
- R[GetOrAddIndex(KV.Variable)] -= KV.Coefficient;
+ if (SubOverflow(R[GetOrAddIndex(KV.Variable)], KV.Coefficient,
+ R[GetOrAddIndex(KV.Variable)]))
+ return {};
auto I =
KnownNonNegativeVariables.insert({KV.Variable, KV.IsKnownNonNegative});
I.first->second &= KV.IsKnownNonNegative;
@@ -501,8 +624,8 @@ ConstraintInfo::getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
// Add extra constraints for variables that are known positive.
for (auto &KV : KnownNonNegativeVariables) {
- if (!KV.second || (Value2Index.find(KV.first) == Value2Index.end() &&
- NewIndexMap.find(KV.first) == NewIndexMap.end()))
+ if (!KV.second ||
+ (!Value2Index.contains(KV.first) && !NewIndexMap.contains(KV.first)))
continue;
SmallVector<int64_t, 8> C(Value2Index.size() + NewVariables.size() + 1, 0);
C[GetOrAddIndex(KV.first)] = -1;
@@ -524,7 +647,7 @@ ConstraintTy ConstraintInfo::getConstraintForSolving(CmpInst::Predicate Pred,
SmallVector<Value *> NewVariables;
ConstraintTy R = getConstraint(Pred, Op0, Op1, NewVariables);
- if (R.IsEq || !NewVariables.empty())
+ if (!NewVariables.empty())
return {};
return R;
}
@@ -536,10 +659,54 @@ bool ConstraintTy::isValid(const ConstraintInfo &Info) const {
});
}
+std::optional<bool>
+ConstraintTy::isImpliedBy(const ConstraintSystem &CS) const {
+ bool IsConditionImplied = CS.isConditionImplied(Coefficients);
+
+ if (IsEq || IsNe) {
+ auto NegatedOrEqual = ConstraintSystem::negateOrEqual(Coefficients);
+ bool IsNegatedOrEqualImplied =
+ !NegatedOrEqual.empty() && CS.isConditionImplied(NegatedOrEqual);
+
+ // In order to check that `%a == %b` is true (equality), both conditions `%a
+ // >= %b` and `%a <= %b` must hold true. When checking for equality (`IsEq`
+ // is true), we return true if they both hold, false in the other cases.
+ if (IsConditionImplied && IsNegatedOrEqualImplied)
+ return IsEq;
+
+ auto Negated = ConstraintSystem::negate(Coefficients);
+ bool IsNegatedImplied = !Negated.empty() && CS.isConditionImplied(Negated);
+
+ auto StrictLessThan = ConstraintSystem::toStrictLessThan(Coefficients);
+ bool IsStrictLessThanImplied =
+ !StrictLessThan.empty() && CS.isConditionImplied(StrictLessThan);
+
+ // In order to check that `%a != %b` is true (non-equality), either
+ // condition `%a > %b` or `%a < %b` must hold true. When checking for
+ // non-equality (`IsNe` is true), we return true if one of the two holds,
+ // false in the other cases.
+ if (IsNegatedImplied || IsStrictLessThanImplied)
+ return IsNe;
+
+ return std::nullopt;
+ }
+
+ if (IsConditionImplied)
+ return true;
+
+ auto Negated = ConstraintSystem::negate(Coefficients);
+ auto IsNegatedImplied = !Negated.empty() && CS.isConditionImplied(Negated);
+ if (IsNegatedImplied)
+ return false;
+
+ // Neither the condition nor its negated holds, did not prove anything.
+ return std::nullopt;
+}
+
bool ConstraintInfo::doesHold(CmpInst::Predicate Pred, Value *A,
Value *B) const {
auto R = getConstraintForSolving(Pred, A, B);
- return R.Preconditions.empty() && !R.empty() &&
+ return R.isValid(*this) &&
getCS(R.IsSigned).isConditionImplied(R.Coefficients);
}
@@ -568,11 +735,15 @@ void ConstraintInfo::transferToOtherSystem(
if (doesHold(CmpInst::ICMP_SGE, A, ConstantInt::get(B->getType(), 0)))
addFact(CmpInst::ICMP_ULT, A, B, NumIn, NumOut, DFSInStack);
break;
- case CmpInst::ICMP_SGT:
+ case CmpInst::ICMP_SGT: {
if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), -1)))
addFact(CmpInst::ICMP_UGE, A, ConstantInt::get(B->getType(), 0), NumIn,
NumOut, DFSInStack);
+ if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0)))
+ addFact(CmpInst::ICMP_UGT, A, B, NumIn, NumOut, DFSInStack);
+
break;
+ }
case CmpInst::ICMP_SGE:
if (doesHold(CmpInst::ICMP_SGE, B, ConstantInt::get(B->getType(), 0))) {
addFact(CmpInst::ICMP_UGE, A, B, NumIn, NumOut, DFSInStack);
@@ -581,77 +752,13 @@ void ConstraintInfo::transferToOtherSystem(
}
}
-namespace {
-/// Represents either
-/// * a condition that holds on entry to a block (=conditional fact)
-/// * an assume (=assume fact)
-/// * an instruction to simplify.
-/// It also tracks the Dominator DFS in and out numbers for each entry.
-struct FactOrCheck {
- Instruction *Inst;
- unsigned NumIn;
- unsigned NumOut;
- bool IsCheck;
- bool Not;
-
- FactOrCheck(DomTreeNode *DTN, Instruction *Inst, bool IsCheck, bool Not)
- : Inst(Inst), NumIn(DTN->getDFSNumIn()), NumOut(DTN->getDFSNumOut()),
- IsCheck(IsCheck), Not(Not) {}
-
- static FactOrCheck getFact(DomTreeNode *DTN, Instruction *Inst,
- bool Not = false) {
- return FactOrCheck(DTN, Inst, false, Not);
- }
-
- static FactOrCheck getCheck(DomTreeNode *DTN, Instruction *Inst) {
- return FactOrCheck(DTN, Inst, true, false);
- }
-
- bool isAssumeFact() const {
- if (!IsCheck && isa<IntrinsicInst>(Inst)) {
- assert(match(Inst, m_Intrinsic<Intrinsic::assume>()));
- return true;
- }
- return false;
- }
-
- bool isConditionFact() const { return !IsCheck && isa<CmpInst>(Inst); }
-};
-
-/// Keep state required to build worklist.
-struct State {
- DominatorTree &DT;
- SmallVector<FactOrCheck, 64> WorkList;
-
- State(DominatorTree &DT) : DT(DT) {}
-
- /// Process block \p BB and add known facts to work-list.
- void addInfoFor(BasicBlock &BB);
-
- /// Returns true if we can add a known condition from BB to its successor
- /// block Succ.
- bool canAddSuccessor(BasicBlock &BB, BasicBlock *Succ) const {
- return DT.dominates(BasicBlockEdge(&BB, Succ), Succ);
- }
-};
-
-} // namespace
-
#ifndef NDEBUG
-static void dumpWithNames(const ConstraintSystem &CS,
- DenseMap<Value *, unsigned> &Value2Index) {
- SmallVector<std::string> Names(Value2Index.size(), "");
- for (auto &KV : Value2Index) {
- Names[KV.second - 1] = std::string("%") + KV.first->getName().str();
- }
- CS.dump(Names);
-}
-static void dumpWithNames(ArrayRef<int64_t> C,
- DenseMap<Value *, unsigned> &Value2Index) {
- ConstraintSystem CS;
+static void dumpConstraint(ArrayRef<int64_t> C,
+ const DenseMap<Value *, unsigned> &Value2Index) {
+ ConstraintSystem CS(Value2Index);
CS.addVariableRowFill(C);
- dumpWithNames(CS, Value2Index);
+ CS.dump();
}
#endif
@@ -661,12 +768,24 @@ void State::addInfoFor(BasicBlock &BB) {
// Queue conditions and assumes.
for (Instruction &I : BB) {
if (auto Cmp = dyn_cast<ICmpInst>(&I)) {
- WorkList.push_back(FactOrCheck::getCheck(DT.getNode(&BB), Cmp));
+ for (Use &U : Cmp->uses()) {
+ auto *UserI = getContextInstForUse(U);
+ auto *DTN = DT.getNode(UserI->getParent());
+ if (!DTN)
+ continue;
+ WorkList.push_back(FactOrCheck::getCheck(DTN, &U));
+ }
continue;
}
if (match(&I, m_Intrinsic<Intrinsic::ssub_with_overflow>())) {
- WorkList.push_back(FactOrCheck::getCheck(DT.getNode(&BB), &I));
+ WorkList.push_back(
+ FactOrCheck::getCheck(DT.getNode(&BB), cast<CallInst>(&I)));
+ continue;
+ }
+
+ if (isa<MinMaxIntrinsic>(&I)) {
+ WorkList.push_back(FactOrCheck::getFact(DT.getNode(&BB), &I));
continue;
}
@@ -748,7 +867,160 @@ void State::addInfoFor(BasicBlock &BB) {
FactOrCheck::getFact(DT.getNode(Br->getSuccessor(1)), CmpI, true));
}
-static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) {
+namespace {
+/// Helper to keep track of a condition and if it should be treated as negated
+/// for reproducer construction.
+/// Pred == Predicate::BAD_ICMP_PREDICATE indicates that this entry is a
+/// placeholder to keep the ReproducerCondStack in sync with DFSInStack.
+struct ReproducerEntry {
+ ICmpInst::Predicate Pred;
+ Value *LHS;
+ Value *RHS;
+
+ ReproducerEntry(ICmpInst::Predicate Pred, Value *LHS, Value *RHS)
+ : Pred(Pred), LHS(LHS), RHS(RHS) {}
+};
+} // namespace
+
+/// Helper function to generate a reproducer function for simplifying \p Cond.
+/// The reproducer function contains a series of @llvm.assume calls, one for
+/// each condition in \p Stack. For each condition, the operand instruction are
+/// cloned until we reach operands that have an entry in \p Value2Index. Those
+/// will then be added as function arguments. \p DT is used to order cloned
+/// instructions. The reproducer function will get added to \p M, if it is
+/// non-null. Otherwise no reproducer function is generated.
+static void generateReproducer(CmpInst *Cond, Module *M,
+ ArrayRef<ReproducerEntry> Stack,
+ ConstraintInfo &Info, DominatorTree &DT) {
+ if (!M)
+ return;
+
+ LLVMContext &Ctx = Cond->getContext();
+
+ LLVM_DEBUG(dbgs() << "Creating reproducer for " << *Cond << "\n");
+
+ ValueToValueMapTy Old2New;
+ SmallVector<Value *> Args;
+ SmallPtrSet<Value *, 8> Seen;
+ // Traverse Cond and its operands recursively until we reach a value that's in
+ // Value2Index or not an instruction, or not a operation that
+ // ConstraintElimination can decompose. Such values will be considered as
+ // external inputs to the reproducer, they are collected and added as function
+ // arguments later.
+ auto CollectArguments = [&](ArrayRef<Value *> Ops, bool IsSigned) {
+ auto &Value2Index = Info.getValue2Index(IsSigned);
+ SmallVector<Value *, 4> WorkList(Ops);
+ while (!WorkList.empty()) {
+ Value *V = WorkList.pop_back_val();
+ if (!Seen.insert(V).second)
+ continue;
+ if (Old2New.find(V) != Old2New.end())
+ continue;
+ if (isa<Constant>(V))
+ continue;
+
+ auto *I = dyn_cast<Instruction>(V);
+ if (Value2Index.contains(V) || !I ||
+ !isa<CmpInst, BinaryOperator, GEPOperator, CastInst>(V)) {
+ Old2New[V] = V;
+ Args.push_back(V);
+ LLVM_DEBUG(dbgs() << " found external input " << *V << "\n");
+ } else {
+ append_range(WorkList, I->operands());
+ }
+ }
+ };
+
+ for (auto &Entry : Stack)
+ if (Entry.Pred != ICmpInst::BAD_ICMP_PREDICATE)
+ CollectArguments({Entry.LHS, Entry.RHS}, ICmpInst::isSigned(Entry.Pred));
+ CollectArguments(Cond, ICmpInst::isSigned(Cond->getPredicate()));
+
+ SmallVector<Type *> ParamTys;
+ for (auto *P : Args)
+ ParamTys.push_back(P->getType());
+
+ FunctionType *FTy = FunctionType::get(Cond->getType(), ParamTys,
+ /*isVarArg=*/false);
+ Function *F = Function::Create(FTy, Function::ExternalLinkage,
+ Cond->getModule()->getName() +
+ Cond->getFunction()->getName() + "repro",
+ M);
+ // Add arguments to the reproducer function for each external value collected.
+ for (unsigned I = 0; I < Args.size(); ++I) {
+ F->getArg(I)->setName(Args[I]->getName());
+ Old2New[Args[I]] = F->getArg(I);
+ }
+
+ BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F);
+ IRBuilder<> Builder(Entry);
+ Builder.CreateRet(Builder.getTrue());
+ Builder.SetInsertPoint(Entry->getTerminator());
+
+ // Clone instructions in \p Ops and their operands recursively until reaching
+ // an value in Value2Index (external input to the reproducer). Update Old2New
+ // mapping for the original and cloned instructions. Sort instructions to
+ // clone by dominance, then insert the cloned instructions in the function.
+ auto CloneInstructions = [&](ArrayRef<Value *> Ops, bool IsSigned) {
+ SmallVector<Value *, 4> WorkList(Ops);
+ SmallVector<Instruction *> ToClone;
+ auto &Value2Index = Info.getValue2Index(IsSigned);
+ while (!WorkList.empty()) {
+ Value *V = WorkList.pop_back_val();
+ if (Old2New.find(V) != Old2New.end())
+ continue;
+
+ auto *I = dyn_cast<Instruction>(V);
+ if (!Value2Index.contains(V) && I) {
+ Old2New[V] = nullptr;
+ ToClone.push_back(I);
+ append_range(WorkList, I->operands());
+ }
+ }
+
+ sort(ToClone,
+ [&DT](Instruction *A, Instruction *B) { return DT.dominates(A, B); });
+ for (Instruction *I : ToClone) {
+ Instruction *Cloned = I->clone();
+ Old2New[I] = Cloned;
+ Old2New[I]->setName(I->getName());
+ Cloned->insertBefore(&*Builder.GetInsertPoint());
+ Cloned->dropUnknownNonDebugMetadata();
+ Cloned->setDebugLoc({});
+ }
+ };
+
+ // Materialize the assumptions for the reproducer using the entries in Stack.
+ // That is, first clone the operands of the condition recursively until we
+ // reach an external input to the reproducer and add them to the reproducer
+ // function. Then add an ICmp for the condition (with the inverse predicate if
+ // the entry is negated) and an assert using the ICmp.
+ for (auto &Entry : Stack) {
+ if (Entry.Pred == ICmpInst::BAD_ICMP_PREDICATE)
+ continue;
+
+ LLVM_DEBUG(
+ dbgs() << " Materializing assumption icmp " << Entry.Pred << ' ';
+ Entry.LHS->printAsOperand(dbgs(), /*PrintType=*/true); dbgs() << ", ";
+ Entry.RHS->printAsOperand(dbgs(), /*PrintType=*/false); dbgs() << "\n");
+ CloneInstructions({Entry.LHS, Entry.RHS}, CmpInst::isSigned(Entry.Pred));
+
+ auto *Cmp = Builder.CreateICmp(Entry.Pred, Entry.LHS, Entry.RHS);
+ Builder.CreateAssumption(Cmp);
+ }
+
+ // Finally, clone the condition to reproduce and remap instruction operands in
+ // the reproducer using Old2New.
+ CloneInstructions(Cond, CmpInst::isSigned(Cond->getPredicate()));
+ Entry->getTerminator()->setOperand(0, Cond);
+ remapInstructionsInBlocks({Entry}, Old2New);
+
+ assert(!verifyFunction(*F, &dbgs()));
+}
+
+static std::optional<bool> checkCondition(CmpInst *Cmp, ConstraintInfo &Info,
+ unsigned NumIn, unsigned NumOut,
+ Instruction *ContextInst) {
LLVM_DEBUG(dbgs() << "Checking " << *Cmp << "\n");
CmpInst::Predicate Pred = Cmp->getPredicate();
@@ -758,7 +1030,7 @@ static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) {
auto R = Info.getConstraintForSolving(Pred, A, B);
if (R.empty() || !R.isValid(Info)){
LLVM_DEBUG(dbgs() << " failed to decompose condition\n");
- return false;
+ return std::nullopt;
}
auto &CSToUse = Info.getCS(R.IsSigned);
@@ -773,39 +1045,107 @@ static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) {
CSToUse.popLastConstraint();
});
- bool Changed = false;
- if (CSToUse.isConditionImplied(R.Coefficients)) {
+ if (auto ImpliedCondition = R.isImpliedBy(CSToUse)) {
if (!DebugCounter::shouldExecute(EliminatedCounter))
- return false;
+ return std::nullopt;
LLVM_DEBUG({
- dbgs() << "Condition " << *Cmp << " implied by dominating constraints\n";
- dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned));
+ if (*ImpliedCondition) {
+ dbgs() << "Condition " << *Cmp;
+ } else {
+ auto InversePred = Cmp->getInversePredicate();
+ dbgs() << "Condition " << CmpInst::getPredicateName(InversePred) << " "
+ << *A << ", " << *B;
+ }
+ dbgs() << " implied by dominating constraints\n";
+ CSToUse.dump();
});
- Constant *TrueC =
- ConstantInt::getTrue(CmpInst::makeCmpResultType(Cmp->getType()));
- Cmp->replaceUsesWithIf(TrueC, [](Use &U) {
+ return ImpliedCondition;
+ }
+
+ return std::nullopt;
+}
+
+static bool checkAndReplaceCondition(
+ CmpInst *Cmp, ConstraintInfo &Info, unsigned NumIn, unsigned NumOut,
+ Instruction *ContextInst, Module *ReproducerModule,
+ ArrayRef<ReproducerEntry> ReproducerCondStack, DominatorTree &DT) {
+ auto ReplaceCmpWithConstant = [&](CmpInst *Cmp, bool IsTrue) {
+ generateReproducer(Cmp, ReproducerModule, ReproducerCondStack, Info, DT);
+ Constant *ConstantC = ConstantInt::getBool(
+ CmpInst::makeCmpResultType(Cmp->getType()), IsTrue);
+ Cmp->replaceUsesWithIf(ConstantC, [&DT, NumIn, NumOut,
+ ContextInst](Use &U) {
+ auto *UserI = getContextInstForUse(U);
+ auto *DTN = DT.getNode(UserI->getParent());
+ if (!DTN || DTN->getDFSNumIn() < NumIn || DTN->getDFSNumOut() > NumOut)
+ return false;
+ if (UserI->getParent() == ContextInst->getParent() &&
+ UserI->comesBefore(ContextInst))
+ return false;
+
// Conditions in an assume trivially simplify to true. Skip uses
// in assume calls to not destroy the available information.
auto *II = dyn_cast<IntrinsicInst>(U.getUser());
return !II || II->getIntrinsicID() != Intrinsic::assume;
});
NumCondsRemoved++;
+ return true;
+ };
+
+ if (auto ImpliedCondition =
+ checkCondition(Cmp, Info, NumIn, NumOut, ContextInst))
+ return ReplaceCmpWithConstant(Cmp, *ImpliedCondition);
+ return false;
+}
+
+static void
+removeEntryFromStack(const StackEntry &E, ConstraintInfo &Info,
+ Module *ReproducerModule,
+ SmallVectorImpl<ReproducerEntry> &ReproducerCondStack,
+ SmallVectorImpl<StackEntry> &DFSInStack) {
+ Info.popLastConstraint(E.IsSigned);
+ // Remove variables in the system that went out of scope.
+ auto &Mapping = Info.getValue2Index(E.IsSigned);
+ for (Value *V : E.ValuesToRelease)
+ Mapping.erase(V);
+ Info.popLastNVariables(E.IsSigned, E.ValuesToRelease.size());
+ DFSInStack.pop_back();
+ if (ReproducerModule)
+ ReproducerCondStack.pop_back();
+}
+
+/// Check if the first condition for an AND implies the second.
+static bool checkAndSecondOpImpliedByFirst(
+ FactOrCheck &CB, ConstraintInfo &Info, Module *ReproducerModule,
+ SmallVectorImpl<ReproducerEntry> &ReproducerCondStack,
+ SmallVectorImpl<StackEntry> &DFSInStack) {
+ CmpInst::Predicate Pred;
+ Value *A, *B;
+ Instruction *And = CB.getContextInst();
+ if (!match(And->getOperand(0), m_ICmp(Pred, m_Value(A), m_Value(B))))
+ return false;
+
+ // Optimistically add fact from first condition.
+ unsigned OldSize = DFSInStack.size();
+ Info.addFact(Pred, A, B, CB.NumIn, CB.NumOut, DFSInStack);
+ if (OldSize == DFSInStack.size())
+ return false;
+
+ bool Changed = false;
+ // Check if the second condition can be simplified now.
+ if (auto ImpliedCondition =
+ checkCondition(cast<ICmpInst>(And->getOperand(1)), Info, CB.NumIn,
+ CB.NumOut, CB.getContextInst())) {
+ And->setOperand(1, ConstantInt::getBool(And->getType(), *ImpliedCondition));
Changed = true;
}
- if (CSToUse.isConditionImplied(ConstraintSystem::negate(R.Coefficients))) {
- if (!DebugCounter::shouldExecute(EliminatedCounter))
- return false;
- LLVM_DEBUG({
- dbgs() << "Condition !" << *Cmp << " implied by dominating constraints\n";
- dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned));
- });
- Constant *FalseC =
- ConstantInt::getFalse(CmpInst::makeCmpResultType(Cmp->getType()));
- Cmp->replaceAllUsesWith(FalseC);
- NumCondsRemoved++;
- Changed = true;
+ // Remove entries again.
+ while (OldSize < DFSInStack.size()) {
+ StackEntry E = DFSInStack.back();
+ removeEntryFromStack(E, Info, ReproducerModule, ReproducerCondStack,
+ DFSInStack);
}
return Changed;
}
@@ -817,10 +1157,12 @@ void ConstraintInfo::addFact(CmpInst::Predicate Pred, Value *A, Value *B,
// hold.
SmallVector<Value *> NewVariables;
auto R = getConstraint(Pred, A, B, NewVariables);
- if (!R.isValid(*this))
+
+ // TODO: Support non-equality for facts as well.
+ if (!R.isValid(*this) || R.isNe())
return;
- LLVM_DEBUG(dbgs() << "Adding '" << CmpInst::getPredicateName(Pred) << " ";
+ LLVM_DEBUG(dbgs() << "Adding '" << Pred << " ";
A->printAsOperand(dbgs(), false); dbgs() << ", ";
B->printAsOperand(dbgs(), false); dbgs() << "'\n");
bool Added = false;
@@ -842,14 +1184,14 @@ void ConstraintInfo::addFact(CmpInst::Predicate Pred, Value *A, Value *B,
LLVM_DEBUG({
dbgs() << " constraint: ";
- dumpWithNames(R.Coefficients, getValue2Index(R.IsSigned));
+ dumpConstraint(R.Coefficients, getValue2Index(R.IsSigned));
dbgs() << "\n";
});
DFSInStack.emplace_back(NumIn, NumOut, R.IsSigned,
std::move(ValuesToRelease));
- if (R.IsEq) {
+ if (R.isEq()) {
// Also add the inverted constraint for equality constraints.
for (auto &Coeff : R.Coefficients)
Coeff *= -1;
@@ -921,12 +1263,17 @@ tryToSimplifyOverflowMath(IntrinsicInst *II, ConstraintInfo &Info,
return Changed;
}
-static bool eliminateConstraints(Function &F, DominatorTree &DT) {
+static bool eliminateConstraints(Function &F, DominatorTree &DT,
+ OptimizationRemarkEmitter &ORE) {
bool Changed = false;
DT.updateDFSNumbers();
-
- ConstraintInfo Info(F.getParent()->getDataLayout());
+ SmallVector<Value *> FunctionArgs;
+ for (Value &Arg : F.args())
+ FunctionArgs.push_back(&Arg);
+ ConstraintInfo Info(F.getParent()->getDataLayout(), FunctionArgs);
State S(DT);
+ std::unique_ptr<Module> ReproducerModule(
+ DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr);
// First, collect conditions implied by branches and blocks with their
// Dominator DFS in and out numbers.
@@ -961,7 +1308,9 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
return true;
if (B.isConditionFact())
return false;
- return A.Inst->comesBefore(B.Inst);
+ auto *InstA = A.getContextInst();
+ auto *InstB = B.getContextInst();
+ return InstA->comesBefore(InstB);
}
return A.NumIn < B.NumIn;
});
@@ -970,6 +1319,7 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
// Finally, process ordered worklist and eliminate implied conditions.
SmallVector<StackEntry, 16> DFSInStack;
+ SmallVector<ReproducerEntry> ReproducerCondStack;
for (FactOrCheck &CB : S.WorkList) {
// First, pop entries from the stack that are out-of-scope for CB. Remove
// the corresponding entry from the constraint system.
@@ -983,61 +1333,96 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
break;
LLVM_DEBUG({
dbgs() << "Removing ";
- dumpWithNames(Info.getCS(E.IsSigned).getLastConstraint(),
- Info.getValue2Index(E.IsSigned));
+ dumpConstraint(Info.getCS(E.IsSigned).getLastConstraint(),
+ Info.getValue2Index(E.IsSigned));
dbgs() << "\n";
});
-
- Info.popLastConstraint(E.IsSigned);
- // Remove variables in the system that went out of scope.
- auto &Mapping = Info.getValue2Index(E.IsSigned);
- for (Value *V : E.ValuesToRelease)
- Mapping.erase(V);
- Info.popLastNVariables(E.IsSigned, E.ValuesToRelease.size());
- DFSInStack.pop_back();
+ removeEntryFromStack(E, Info, ReproducerModule.get(), ReproducerCondStack,
+ DFSInStack);
}
- LLVM_DEBUG({
- dbgs() << "Processing ";
- if (CB.IsCheck)
- dbgs() << "condition to simplify: " << *CB.Inst;
- else
- dbgs() << "fact to add to the system: " << *CB.Inst;
- dbgs() << "\n";
- });
+ LLVM_DEBUG(dbgs() << "Processing ");
// For a block, check if any CmpInsts become known based on the current set
// of constraints.
- if (CB.IsCheck) {
- if (auto *II = dyn_cast<WithOverflowInst>(CB.Inst)) {
+ if (CB.isCheck()) {
+ Instruction *Inst = CB.getInstructionToSimplify();
+ if (!Inst)
+ continue;
+ LLVM_DEBUG(dbgs() << "condition to simplify: " << *Inst << "\n");
+ if (auto *II = dyn_cast<WithOverflowInst>(Inst)) {
Changed |= tryToSimplifyOverflowMath(II, Info, ToRemove);
- } else if (auto *Cmp = dyn_cast<ICmpInst>(CB.Inst)) {
- Changed |= checkAndReplaceCondition(Cmp, Info);
+ } else if (auto *Cmp = dyn_cast<ICmpInst>(Inst)) {
+ bool Simplified = checkAndReplaceCondition(
+ Cmp, Info, CB.NumIn, CB.NumOut, CB.getContextInst(),
+ ReproducerModule.get(), ReproducerCondStack, S.DT);
+ if (!Simplified && match(CB.getContextInst(),
+ m_LogicalAnd(m_Value(), m_Specific(Inst)))) {
+ Simplified =
+ checkAndSecondOpImpliedByFirst(CB, Info, ReproducerModule.get(),
+ ReproducerCondStack, DFSInStack);
+ }
+ Changed |= Simplified;
}
continue;
}
- ICmpInst::Predicate Pred;
- Value *A, *B;
- Value *Cmp = CB.Inst;
- match(Cmp, m_Intrinsic<Intrinsic::assume>(m_Value(Cmp)));
- if (match(Cmp, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
+ LLVM_DEBUG(dbgs() << "fact to add to the system: " << *CB.Inst << "\n");
+ auto AddFact = [&](CmpInst::Predicate Pred, Value *A, Value *B) {
if (Info.getCS(CmpInst::isSigned(Pred)).size() > MaxRows) {
LLVM_DEBUG(
dbgs()
<< "Skip adding constraint because system has too many rows.\n");
- continue;
+ return;
+ }
+
+ Info.addFact(Pred, A, B, CB.NumIn, CB.NumOut, DFSInStack);
+ if (ReproducerModule && DFSInStack.size() > ReproducerCondStack.size())
+ ReproducerCondStack.emplace_back(Pred, A, B);
+
+ Info.transferToOtherSystem(Pred, A, B, CB.NumIn, CB.NumOut, DFSInStack);
+ if (ReproducerModule && DFSInStack.size() > ReproducerCondStack.size()) {
+ // Add dummy entries to ReproducerCondStack to keep it in sync with
+ // DFSInStack.
+ for (unsigned I = 0,
+ E = (DFSInStack.size() - ReproducerCondStack.size());
+ I < E; ++I) {
+ ReproducerCondStack.emplace_back(ICmpInst::BAD_ICMP_PREDICATE,
+ nullptr, nullptr);
+ }
}
+ };
+ ICmpInst::Predicate Pred;
+ if (auto *MinMax = dyn_cast<MinMaxIntrinsic>(CB.Inst)) {
+ Pred = ICmpInst::getNonStrictPredicate(MinMax->getPredicate());
+ AddFact(Pred, MinMax, MinMax->getLHS());
+ AddFact(Pred, MinMax, MinMax->getRHS());
+ continue;
+ }
+
+ Value *A, *B;
+ Value *Cmp = CB.Inst;
+ match(Cmp, m_Intrinsic<Intrinsic::assume>(m_Value(Cmp)));
+ if (match(Cmp, m_ICmp(Pred, m_Value(A), m_Value(B)))) {
// Use the inverse predicate if required.
if (CB.Not)
Pred = CmpInst::getInversePredicate(Pred);
- Info.addFact(Pred, A, B, CB.NumIn, CB.NumOut, DFSInStack);
- Info.transferToOtherSystem(Pred, A, B, CB.NumIn, CB.NumOut, DFSInStack);
+ AddFact(Pred, A, B);
}
}
+ if (ReproducerModule && !ReproducerModule->functions().empty()) {
+ std::string S;
+ raw_string_ostream StringS(S);
+ ReproducerModule->print(StringS, nullptr);
+ StringS.flush();
+ OptimizationRemark Rem(DEBUG_TYPE, "Reproducer", &F);
+ Rem << ore::NV("module") << S;
+ ORE.emit(Rem);
+ }
+
#ifndef NDEBUG
unsigned SignedEntries =
count_if(DFSInStack, [](const StackEntry &E) { return E.IsSigned; });
@@ -1055,7 +1440,8 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
PreservedAnalyses ConstraintEliminationPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- if (!eliminateConstraints(F, DT))
+ auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ if (!eliminateConstraints(F, DT, ORE))
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 90b4b521e7de..48b27a1ea0a2 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -36,11 +36,8 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
#include <optional>
@@ -97,60 +94,33 @@ STATISTIC(NumMinMax, "Number of llvm.[us]{min,max} intrinsics removed");
STATISTIC(NumUDivURemsNarrowedExpanded,
"Number of bound udiv's/urem's expanded");
-namespace {
-
- class CorrelatedValuePropagation : public FunctionPass {
- public:
- static char ID;
-
- CorrelatedValuePropagation(): FunctionPass(ID) {
- initializeCorrelatedValuePropagationPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LazyValueInfoWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<LazyValueInfoWrapperPass>();
- }
- };
-
-} // end anonymous namespace
-
-char CorrelatedValuePropagation::ID = 0;
-
-INITIALIZE_PASS_BEGIN(CorrelatedValuePropagation, "correlated-propagation",
- "Value Propagation", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
-INITIALIZE_PASS_END(CorrelatedValuePropagation, "correlated-propagation",
- "Value Propagation", false, false)
-
-// Public interface to the Value Propagation pass
-Pass *llvm::createCorrelatedValuePropagationPass() {
- return new CorrelatedValuePropagation();
-}
-
static bool processSelect(SelectInst *S, LazyValueInfo *LVI) {
- if (S->getType()->isVectorTy()) return false;
- if (isa<Constant>(S->getCondition())) return false;
-
- Constant *C = LVI->getConstant(S->getCondition(), S);
- if (!C) return false;
+ if (S->getType()->isVectorTy() || isa<Constant>(S->getCondition()))
+ return false;
- ConstantInt *CI = dyn_cast<ConstantInt>(C);
- if (!CI) return false;
+ bool Changed = false;
+ for (Use &U : make_early_inc_range(S->uses())) {
+ auto *I = cast<Instruction>(U.getUser());
+ Constant *C;
+ if (auto *PN = dyn_cast<PHINode>(I))
+ C = LVI->getConstantOnEdge(S->getCondition(), PN->getIncomingBlock(U),
+ I->getParent(), I);
+ else
+ C = LVI->getConstant(S->getCondition(), I);
+
+ auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ if (!CI)
+ continue;
- Value *ReplaceWith = CI->isOne() ? S->getTrueValue() : S->getFalseValue();
- S->replaceAllUsesWith(ReplaceWith);
- S->eraseFromParent();
+ U.set(CI->isOne() ? S->getTrueValue() : S->getFalseValue());
+ Changed = true;
+ ++NumSelects;
+ }
- ++NumSelects;
+ if (Changed && S->use_empty())
+ S->eraseFromParent();
- return true;
+ return Changed;
}
/// Try to simplify a phi with constant incoming values that match the edge
@@ -698,7 +668,7 @@ enum class Domain { NonNegative, NonPositive, Unknown };
static Domain getDomain(const ConstantRange &CR) {
if (CR.isAllNonNegative())
return Domain::NonNegative;
- if (CR.icmp(ICmpInst::ICMP_SLE, APInt::getNullValue(CR.getBitWidth())))
+ if (CR.icmp(ICmpInst::ICMP_SLE, APInt::getZero(CR.getBitWidth())))
return Domain::NonPositive;
return Domain::Unknown;
}
@@ -717,7 +687,6 @@ static bool narrowSDivOrSRem(BinaryOperator *Instr, const ConstantRange &LCR,
// What is the smallest bit width that can accommodate the entire value ranges
// of both of the operands?
- std::array<std::optional<ConstantRange>, 2> CRs;
unsigned MinSignedBits =
std::max(LCR.getMinSignedBits(), RCR.getMinSignedBits());
@@ -804,10 +773,18 @@ static bool expandUDivOrURem(BinaryOperator *Instr, const ConstantRange &XCR,
IRBuilder<> B(Instr);
Value *ExpandedOp;
- if (IsRem) {
+ if (XCR.icmp(ICmpInst::ICMP_UGE, YCR)) {
+ // If X is between Y and 2*Y the result is known.
+ if (IsRem)
+ ExpandedOp = B.CreateNUWSub(X, Y);
+ else
+ ExpandedOp = ConstantInt::get(Instr->getType(), 1);
+ } else if (IsRem) {
// NOTE: this transformation introduces two uses of X,
// but it may be undef so we must freeze it first.
- Value *FrozenX = B.CreateFreeze(X, X->getName() + ".frozen");
+ Value *FrozenX = X;
+ if (!isGuaranteedNotToBeUndefOrPoison(X))
+ FrozenX = B.CreateFreeze(X, X->getName() + ".frozen");
auto *AdjX = B.CreateNUWSub(FrozenX, Y, Instr->getName() + ".urem");
auto *Cmp =
B.CreateICmp(ICmpInst::ICMP_ULT, FrozenX, Y, Instr->getName() + ".cmp");
@@ -1008,7 +985,8 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
if (SDI->getType()->isVectorTy())
return false;
- ConstantRange LRange = LVI->getConstantRangeAtUse(SDI->getOperandUse(0));
+ ConstantRange LRange =
+ LVI->getConstantRangeAtUse(SDI->getOperandUse(0), /*UndefAllowed*/ false);
unsigned OrigWidth = SDI->getType()->getIntegerBitWidth();
ConstantRange NegOneOrZero =
ConstantRange(APInt(OrigWidth, (uint64_t)-1, true), APInt(OrigWidth, 1));
@@ -1040,7 +1018,8 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
return false;
const Use &Base = SDI->getOperandUse(0);
- if (!LVI->getConstantRangeAtUse(Base).isAllNonNegative())
+ if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false)
+ .isAllNonNegative())
return false;
++NumSExt;
@@ -1222,16 +1201,6 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
return FnChanged;
}
-bool CorrelatedValuePropagation::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
- DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-
- return runImpl(F, LVI, DT, getBestSimplifyQuery(*this, F));
-}
-
PreservedAnalyses
CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {
LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F);
diff --git a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
index 658d0fcb53fa..f2efe60bdf88 100644
--- a/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/DFAJumpThreading.cpp
@@ -70,11 +70,8 @@
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/SSAUpdaterBulk.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
@@ -168,51 +165,8 @@ private:
OptimizationRemarkEmitter *ORE;
};
-class DFAJumpThreadingLegacyPass : public FunctionPass {
-public:
- static char ID; // Pass identification
- DFAJumpThreadingLegacyPass() : FunctionPass(ID) {}
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- AssumptionCache *AC =
- &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TargetTransformInfo *TTI =
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- OptimizationRemarkEmitter *ORE =
- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-
- return DFAJumpThreading(AC, DT, TTI, ORE).run(F);
- }
-};
} // end anonymous namespace
-char DFAJumpThreadingLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(DFAJumpThreadingLegacyPass, "dfa-jump-threading",
- "DFA Jump Threading", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(DFAJumpThreadingLegacyPass, "dfa-jump-threading",
- "DFA Jump Threading", false, false)
-
-// Public interface to the DFA Jump Threading pass
-FunctionPass *llvm::createDFAJumpThreadingPass() {
- return new DFAJumpThreadingLegacyPass();
-}
-
namespace {
/// Create a new basic block and sink \p SIToSink into it.
@@ -625,7 +579,7 @@ private:
continue;
PathsType SuccPaths = paths(Succ, Visited, PathDepth + 1);
- for (PathType Path : SuccPaths) {
+ for (const PathType &Path : SuccPaths) {
PathType NewPath(Path);
NewPath.push_front(BB);
Res.push_back(NewPath);
@@ -978,7 +932,7 @@ private:
SSAUpdaterBulk SSAUpdate;
SmallVector<Use *, 16> UsesToRename;
- for (auto KV : NewDefs) {
+ for (const auto &KV : NewDefs) {
Instruction *I = KV.first;
BasicBlock *BB = I->getParent();
std::vector<Instruction *> Cloned = KV.second;
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 9c0b4d673145..d3fbe49439a8 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -69,15 +69,12 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DebugCounter.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -462,10 +459,10 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
"Should not hit the entry block because SI must be dominated by LI");
for (BasicBlock *Pred : predecessors(B)) {
PHITransAddr PredAddr = Addr;
- if (PredAddr.NeedsPHITranslationFromBlock(B)) {
- if (!PredAddr.IsPotentiallyPHITranslatable())
+ if (PredAddr.needsPHITranslationFromBlock(B)) {
+ if (!PredAddr.isPotentiallyPHITranslatable())
return false;
- if (PredAddr.PHITranslateValue(B, Pred, DT, false))
+ if (!PredAddr.translateValue(B, Pred, DT, false))
return false;
}
Value *TranslatedPtr = PredAddr.getAddr();
@@ -485,41 +482,75 @@ memoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI,
return true;
}
-static void shortenAssignment(Instruction *Inst, uint64_t OldOffsetInBits,
- uint64_t OldSizeInBits, uint64_t NewSizeInBits,
- bool IsOverwriteEnd) {
- DIExpression::FragmentInfo DeadFragment;
- DeadFragment.SizeInBits = OldSizeInBits - NewSizeInBits;
- DeadFragment.OffsetInBits =
+static void shortenAssignment(Instruction *Inst, Value *OriginalDest,
+ uint64_t OldOffsetInBits, uint64_t OldSizeInBits,
+ uint64_t NewSizeInBits, bool IsOverwriteEnd) {
+ const DataLayout &DL = Inst->getModule()->getDataLayout();
+ uint64_t DeadSliceSizeInBits = OldSizeInBits - NewSizeInBits;
+ uint64_t DeadSliceOffsetInBits =
OldOffsetInBits + (IsOverwriteEnd ? NewSizeInBits : 0);
-
- auto CreateDeadFragExpr = [Inst, DeadFragment]() {
- // FIXME: This should be using the DIExpression in the Alloca's dbg.assign
- // for the variable, since that could also contain a fragment?
- return *DIExpression::createFragmentExpression(
- DIExpression::get(Inst->getContext(), std::nullopt),
+ auto SetDeadFragExpr = [](DbgAssignIntrinsic *DAI,
+ DIExpression::FragmentInfo DeadFragment) {
+ // createFragmentExpression expects an offset relative to the existing
+ // fragment offset if there is one.
+ uint64_t RelativeOffset = DeadFragment.OffsetInBits -
+ DAI->getExpression()
+ ->getFragmentInfo()
+ .value_or(DIExpression::FragmentInfo(0, 0))
+ .OffsetInBits;
+ if (auto NewExpr = DIExpression::createFragmentExpression(
+ DAI->getExpression(), RelativeOffset, DeadFragment.SizeInBits)) {
+ DAI->setExpression(*NewExpr);
+ return;
+ }
+ // Failed to create a fragment expression for this so discard the value,
+ // making this a kill location.
+ auto *Expr = *DIExpression::createFragmentExpression(
+ DIExpression::get(DAI->getContext(), std::nullopt),
DeadFragment.OffsetInBits, DeadFragment.SizeInBits);
+ DAI->setExpression(Expr);
+ DAI->setKillLocation();
};
// A DIAssignID to use so that the inserted dbg.assign intrinsics do not
// link to any instructions. Created in the loop below (once).
DIAssignID *LinkToNothing = nullptr;
+ LLVMContext &Ctx = Inst->getContext();
+ auto GetDeadLink = [&Ctx, &LinkToNothing]() {
+ if (!LinkToNothing)
+ LinkToNothing = DIAssignID::getDistinct(Ctx);
+ return LinkToNothing;
+ };
// Insert an unlinked dbg.assign intrinsic for the dead fragment after each
- // overlapping dbg.assign intrinsic.
- for (auto *DAI : at::getAssignmentMarkers(Inst)) {
- if (auto FragInfo = DAI->getExpression()->getFragmentInfo()) {
- if (!DIExpression::fragmentsOverlap(*FragInfo, DeadFragment))
- continue;
+ // overlapping dbg.assign intrinsic. The loop invalidates the iterators
+ // returned by getAssignmentMarkers so save a copy of the markers to iterate
+ // over.
+ auto LinkedRange = at::getAssignmentMarkers(Inst);
+ SmallVector<DbgAssignIntrinsic *> Linked(LinkedRange.begin(),
+ LinkedRange.end());
+ for (auto *DAI : Linked) {
+ std::optional<DIExpression::FragmentInfo> NewFragment;
+ if (!at::calculateFragmentIntersect(DL, OriginalDest, DeadSliceOffsetInBits,
+ DeadSliceSizeInBits, DAI,
+ NewFragment) ||
+ !NewFragment) {
+ // We couldn't calculate the intersecting fragment for some reason. Be
+ // cautious and unlink the whole assignment from the store.
+ DAI->setKillAddress();
+ DAI->setAssignId(GetDeadLink());
+ continue;
}
+ // No intersect.
+ if (NewFragment->SizeInBits == 0)
+ continue;
// Fragments overlap: insert a new dbg.assign for this dead part.
auto *NewAssign = cast<DbgAssignIntrinsic>(DAI->clone());
NewAssign->insertAfter(DAI);
- if (!LinkToNothing)
- LinkToNothing = DIAssignID::getDistinct(Inst->getContext());
- NewAssign->setAssignId(LinkToNothing);
- NewAssign->setExpression(CreateDeadFragExpr());
+ NewAssign->setAssignId(GetDeadLink());
+ if (NewFragment)
+ SetDeadFragExpr(NewAssign, *NewFragment);
NewAssign->setKillAddress();
}
}
@@ -596,8 +627,8 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
DeadIntrinsic->setLength(TrimmedLength);
DeadIntrinsic->setDestAlignment(PrefAlign);
+ Value *OrigDest = DeadIntrinsic->getRawDest();
if (!IsOverwriteEnd) {
- Value *OrigDest = DeadIntrinsic->getRawDest();
Type *Int8PtrTy =
Type::getInt8PtrTy(DeadIntrinsic->getContext(),
OrigDest->getType()->getPointerAddressSpace());
@@ -616,7 +647,7 @@ static bool tryToShorten(Instruction *DeadI, int64_t &DeadStart,
}
// Update attached dbg.assign intrinsics. Assume 8-bit byte.
- shortenAssignment(DeadI, DeadStart * 8, DeadSize * 8, NewSize * 8,
+ shortenAssignment(DeadI, OrigDest, DeadStart * 8, DeadSize * 8, NewSize * 8,
IsOverwriteEnd);
// Finally update start and size of dead access.
@@ -730,7 +761,7 @@ tryToMergePartialOverlappingStores(StoreInst *KillingI, StoreInst *DeadI,
}
namespace {
-// Returns true if \p I is an intrisnic that does not read or write memory.
+// Returns true if \p I is an intrinsic that does not read or write memory.
bool isNoopIntrinsic(Instruction *I) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
@@ -740,7 +771,6 @@ bool isNoopIntrinsic(Instruction *I) {
case Intrinsic::launder_invariant_group:
case Intrinsic::assume:
return true;
- case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare:
case Intrinsic::dbg_label:
case Intrinsic::dbg_value:
@@ -2039,7 +2069,6 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
const LoopInfo &LI) {
bool MadeChange = false;
- MSSA.ensureOptimizedUses();
DSEState State(F, AA, MSSA, DT, PDT, AC, TLI, LI);
// For each store:
for (unsigned I = 0; I < State.MemDefs.size(); I++) {
@@ -2241,79 +2270,3 @@ PreservedAnalyses DSEPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<LoopAnalysis>();
return PA;
}
-
-namespace {
-
-/// A legacy pass for the legacy pass manager that wraps \c DSEPass.
-class DSELegacyPass : public FunctionPass {
-public:
- static char ID; // Pass identification, replacement for typeid
-
- DSELegacyPass() : FunctionPass(ID) {
- initializeDSELegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- const TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- MemorySSA &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
- PostDominatorTree &PDT =
- getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- AssumptionCache &AC =
- getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-
- bool Changed = eliminateDeadStores(F, AA, MSSA, DT, PDT, AC, TLI, LI);
-
-#ifdef LLVM_ENABLE_STATS
- if (AreStatisticsEnabled())
- for (auto &I : instructions(F))
- NumRemainingStores += isa<StoreInst>(&I);
-#endif
-
- return Changed;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<PostDominatorTreeWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
- }
-};
-
-} // end anonymous namespace
-
-char DSELegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(DSELegacyPass, "dse", "Dead Store Elimination", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(DSELegacyPass, "dse", "Dead Store Elimination", false,
- false)
-
-FunctionPass *llvm::createDeadStoreEliminationPass() {
- return new DSELegacyPass();
-}
diff --git a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
index 303951643a0b..57d3f312186e 100644
--- a/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
+++ b/llvm/lib/Transforms/Scalar/DivRemPairs.cpp
@@ -21,10 +21,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/DebugCounter.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
#include <optional>
@@ -371,6 +368,10 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
Mul->insertAfter(RemInst);
Sub->insertAfter(Mul);
+ // If DivInst has the exact flag, remove it. Otherwise this optimization
+ // may replace a well-defined value 'X % Y' with poison.
+ DivInst->dropPoisonGeneratingFlags();
+
// If X can be undef, X should be frozen first.
// For example, let's assume that Y = 1 & X = undef:
// %div = sdiv undef, 1 // %div = undef
@@ -413,44 +414,6 @@ static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI,
// Pass manager boilerplate below here.
-namespace {
-struct DivRemPairsLegacyPass : public FunctionPass {
- static char ID;
- DivRemPairsLegacyPass() : FunctionPass(ID) {
- initializeDivRemPairsLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.setPreservesCFG();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- FunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
- auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return optimizeDivRem(F, TTI, DT);
- }
-};
-} // namespace
-
-char DivRemPairsLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs",
- "Hoist/decompose integer division and remainder", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(DivRemPairsLegacyPass, "div-rem-pairs",
- "Hoist/decompose integer division and remainder", false,
- false)
-FunctionPass *llvm::createDivRemPairsPass() {
- return new DivRemPairsLegacyPass();
-}
-
PreservedAnalyses DivRemPairsPass::run(Function &F,
FunctionAnalysisManager &FAM) {
TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 26821c7ee81e..67e8e82e408f 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -218,6 +218,19 @@ static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
return true;
}
+static unsigned hashCallInst(CallInst *CI) {
+ // Don't CSE convergent calls in different basic blocks, because they
+ // implicitly depend on the set of threads that is currently executing.
+ if (CI->isConvergent()) {
+ return hash_combine(
+ CI->getOpcode(), CI->getParent(),
+ hash_combine_range(CI->value_op_begin(), CI->value_op_end()));
+ }
+ return hash_combine(
+ CI->getOpcode(),
+ hash_combine_range(CI->value_op_begin(), CI->value_op_end()));
+}
+
static unsigned getHashValueImpl(SimpleValue Val) {
Instruction *Inst = Val.Inst;
// Hash in all of the operands as pointers.
@@ -318,6 +331,11 @@ static unsigned getHashValueImpl(SimpleValue Val) {
return hash_combine(GCR->getOpcode(), GCR->getOperand(0),
GCR->getBasePtr(), GCR->getDerivedPtr());
+ // Don't CSE convergent calls in different basic blocks, because they
+ // implicitly depend on the set of threads that is currently executing.
+ if (CallInst *CI = dyn_cast<CallInst>(Inst))
+ return hashCallInst(CI);
+
// Mix in the opcode.
return hash_combine(
Inst->getOpcode(),
@@ -344,8 +362,16 @@ static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
if (LHSI->getOpcode() != RHSI->getOpcode())
return false;
- if (LHSI->isIdenticalToWhenDefined(RHSI))
+ if (LHSI->isIdenticalToWhenDefined(RHSI)) {
+ // Convergent calls implicitly depend on the set of threads that is
+ // currently executing, so conservatively return false if they are in
+ // different basic blocks.
+ if (CallInst *CI = dyn_cast<CallInst>(LHSI);
+ CI && CI->isConvergent() && LHSI->getParent() != RHSI->getParent())
+ return false;
+
return true;
+ }
// If we're not strictly identical, we still might be a commutable instruction
if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
@@ -508,15 +534,21 @@ unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
Instruction *Inst = Val.Inst;
// Hash all of the operands as pointers and mix in the opcode.
- return hash_combine(
- Inst->getOpcode(),
- hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
+ return hashCallInst(cast<CallInst>(Inst));
}
bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
- Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
if (LHS.isSentinel() || RHS.isSentinel())
- return LHSI == RHSI;
+ return LHS.Inst == RHS.Inst;
+
+ CallInst *LHSI = cast<CallInst>(LHS.Inst);
+ CallInst *RHSI = cast<CallInst>(RHS.Inst);
+
+ // Convergent calls implicitly depend on the set of threads that is
+ // currently executing, so conservatively return false if they are in
+ // different basic blocks.
+ if (LHSI->isConvergent() && LHSI->getParent() != RHSI->getParent())
+ return false;
return LHSI->isIdenticalTo(RHSI);
}
@@ -578,12 +610,13 @@ public:
unsigned Generation = 0;
int MatchingId = -1;
bool IsAtomic = false;
+ bool IsLoad = false;
LoadValue() = default;
LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
- bool IsAtomic)
+ bool IsAtomic, bool IsLoad)
: DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
- IsAtomic(IsAtomic) {}
+ IsAtomic(IsAtomic), IsLoad(IsLoad) {}
};
using LoadMapAllocator =
@@ -802,17 +835,7 @@ private:
Type *getValueType() const {
// TODO: handle target-specific intrinsics.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::masked_load:
- return II->getType();
- case Intrinsic::masked_store:
- return II->getArgOperand(0)->getType();
- default:
- return nullptr;
- }
- }
- return getLoadStoreType(Inst);
+ return Inst->getAccessType();
}
bool mayReadFromMemory() const {
@@ -1476,6 +1499,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
continue;
}
+ if (InVal.IsLoad)
+ if (auto *I = dyn_cast<Instruction>(Op))
+ combineMetadataForCSE(I, &Inst, false);
if (!Inst.use_empty())
Inst.replaceAllUsesWith(Op);
salvageKnowledge(&Inst, &AC);
@@ -1490,7 +1516,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
AvailableLoads.insert(MemInst.getPointerOperand(),
LoadValue(&Inst, CurrentGeneration,
MemInst.getMatchingId(),
- MemInst.isAtomic()));
+ MemInst.isAtomic(),
+ MemInst.isLoad()));
LastStore = nullptr;
continue;
}
@@ -1614,7 +1641,8 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
AvailableLoads.insert(MemInst.getPointerOperand(),
LoadValue(&Inst, CurrentGeneration,
MemInst.getMatchingId(),
- MemInst.isAtomic()));
+ MemInst.isAtomic(),
+ MemInst.isLoad()));
// Remember that this was the last unordered store we saw for DSE. We
// don't yet handle DSE on ordered or volatile stores since we don't
@@ -1710,10 +1738,10 @@ void EarlyCSEPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<EarlyCSEPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
if (UseMemorySSA)
OS << "memssa";
- OS << ">";
+ OS << '>';
}
namespace {
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index f66d1b914b0b..ccca8bcc1a56 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -20,12 +20,9 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include <deque>
#define DEBUG_TYPE "float2int"
@@ -49,35 +46,6 @@ MaxIntegerBW("float2int-max-integer-bw", cl::init(64), cl::Hidden,
cl::desc("Max integer bitwidth to consider in float2int"
"(default=64)"));
-namespace {
- struct Float2IntLegacyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- Float2IntLegacyPass() : FunctionPass(ID) {
- initializeFloat2IntLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- const DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return Impl.runImpl(F, DT);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-
- private:
- Float2IntPass Impl;
- };
-}
-
-char Float2IntLegacyPass::ID = 0;
-INITIALIZE_PASS(Float2IntLegacyPass, "float2int", "Float to int", false, false)
-
// Given a FCmp predicate, return a matching ICmp predicate if one
// exists, otherwise return BAD_ICMP_PREDICATE.
static CmpInst::Predicate mapFCmpPred(CmpInst::Predicate P) {
@@ -187,7 +155,7 @@ void Float2IntPass::walkBackwards() {
Instruction *I = Worklist.back();
Worklist.pop_back();
- if (SeenInsts.find(I) != SeenInsts.end())
+ if (SeenInsts.contains(I))
// Seen already.
continue;
@@ -371,7 +339,7 @@ bool Float2IntPass::validateAndTransform() {
ConvertedToTy = I->getType();
for (User *U : I->users()) {
Instruction *UI = dyn_cast<Instruction>(U);
- if (!UI || SeenInsts.find(UI) == SeenInsts.end()) {
+ if (!UI || !SeenInsts.contains(UI)) {
LLVM_DEBUG(dbgs() << "F2I: Failing because of " << *U << "\n");
Fail = true;
break;
@@ -391,8 +359,9 @@ bool Float2IntPass::validateAndTransform() {
// The number of bits required is the maximum of the upper and
// lower limits, plus one so it can be signed.
- unsigned MinBW = std::max(R.getLower().getMinSignedBits(),
- R.getUpper().getMinSignedBits()) + 1;
+ unsigned MinBW = std::max(R.getLower().getSignificantBits(),
+ R.getUpper().getSignificantBits()) +
+ 1;
LLVM_DEBUG(dbgs() << "F2I: MinBitwidth=" << MinBW << ", R: " << R << "\n");
// If we've run off the realms of the exactly representable integers,
@@ -427,7 +396,7 @@ bool Float2IntPass::validateAndTransform() {
}
Value *Float2IntPass::convert(Instruction *I, Type *ToTy) {
- if (ConvertedInsts.find(I) != ConvertedInsts.end())
+ if (ConvertedInsts.contains(I))
// Already converted this instruction.
return ConvertedInsts[I];
@@ -528,9 +497,6 @@ bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) {
return Modified;
}
-namespace llvm {
-FunctionPass *createFloat2IntPass() { return new Float2IntLegacyPass(); }
-
PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &AM) {
const DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
if (!runImpl(F, DT))
@@ -540,4 +506,3 @@ PreservedAnalyses Float2IntPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserveSet<CFGAnalyses>();
return PA;
}
-} // End namespace llvm
diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index 6158894e3437..03e8a2507b45 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -94,6 +94,8 @@ STATISTIC(NumGVNSimpl, "Number of instructions simplified");
STATISTIC(NumGVNEqProp, "Number of equalities propagated");
STATISTIC(NumPRELoad, "Number of loads PRE'd");
STATISTIC(NumPRELoopLoad, "Number of loop loads PRE'd");
+STATISTIC(NumPRELoadMoved2CEPred,
+ "Number of loads moved to predecessor of a critical edge in PRE");
STATISTIC(IsValueFullyAvailableInBlockNumSpeculationsMax,
"Number of blocks speculated as available in "
@@ -127,6 +129,11 @@ static cl::opt<uint32_t> MaxNumVisitedInsts(
cl::desc("Max number of visited instructions when trying to find "
"dominating value of select dependency (default = 100)"));
+static cl::opt<uint32_t> MaxNumInsnsPerBlock(
+ "gvn-max-num-insns", cl::Hidden, cl::init(100),
+ cl::desc("Max number of instructions to scan in each basic block in GVN "
+ "(default = 100)"));
+
struct llvm::GVNPass::Expression {
uint32_t opcode;
bool commutative = false;
@@ -416,10 +423,9 @@ GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
unsigned BitWidth = DL.getIndexTypeSizeInBits(PtrTy);
MapVector<Value *, APInt> VariableOffsets;
APInt ConstantOffset(BitWidth, 0);
- if (PtrTy->isOpaquePointerTy() &&
- GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
- // For opaque pointers, convert into offset representation, to recognize
- // equivalent address calculations that use different type encoding.
+ if (GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
+ // Convert into offset representation, to recognize equivalent address
+ // calculations that use different type encoding.
LLVMContext &Context = GEP->getContext();
E.opcode = GEP->getOpcode();
E.type = nullptr;
@@ -432,8 +438,8 @@ GVNPass::Expression GVNPass::ValueTable::createGEPExpr(GetElementPtrInst *GEP) {
E.varargs.push_back(
lookupOrAdd(ConstantInt::get(Context, ConstantOffset)));
} else {
- // If converting to offset representation fails (for typed pointers and
- // scalable vectors), fall back to type-based implementation:
+ // If converting to offset representation fails (for scalable vectors),
+ // fall back to type-based implementation:
E.opcode = GEP->getOpcode();
E.type = GEP->getSourceElementType();
for (Use &Op : GEP->operands())
@@ -461,28 +467,34 @@ void GVNPass::ValueTable::add(Value *V, uint32_t num) {
}
uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
- if (AA->doesNotAccessMemory(C) &&
- // FIXME: Currently the calls which may access the thread id may
- // be considered as not accessing the memory. But this is
- // problematic for coroutines, since coroutines may resume in a
- // different thread. So we disable the optimization here for the
- // correctness. However, it may block many other correct
- // optimizations. Revert this one when we detect the memory
- // accessing kind more precisely.
- !C->getFunction()->isPresplitCoroutine()) {
+ // FIXME: Currently the calls which may access the thread id may
+ // be considered as not accessing the memory. But this is
+ // problematic for coroutines, since coroutines may resume in a
+ // different thread. So we disable the optimization here for the
+ // correctness. However, it may block many other correct
+ // optimizations. Revert this one when we detect the memory
+ // accessing kind more precisely.
+ if (C->getFunction()->isPresplitCoroutine()) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ // Do not combine convergent calls since they implicitly depend on the set of
+ // threads that is currently executing, and they might be in different basic
+ // blocks.
+ if (C->isConvergent()) {
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
+ }
+
+ if (AA->doesNotAccessMemory(C)) {
Expression exp = createExpr(C);
uint32_t e = assignExpNewValueNum(exp).first;
valueNumbering[C] = e;
return e;
- } else if (MD && AA->onlyReadsMemory(C) &&
- // FIXME: Currently the calls which may access the thread id may
- // be considered as not accessing the memory. But this is
- // problematic for coroutines, since coroutines may resume in a
- // different thread. So we disable the optimization here for the
- // correctness. However, it may block many other correct
- // optimizations. Revert this one when we detect the memory
- // accessing kind more precisely.
- !C->getFunction()->isPresplitCoroutine()) {
+ }
+
+ if (MD && AA->onlyReadsMemory(C)) {
Expression exp = createExpr(C);
auto ValNum = assignExpNewValueNum(exp);
if (ValNum.second) {
@@ -572,10 +584,10 @@ uint32_t GVNPass::ValueTable::lookupOrAddCall(CallInst *C) {
uint32_t v = lookupOrAdd(cdep);
valueNumbering[C] = v;
return v;
- } else {
- valueNumbering[C] = nextValueNumber;
- return nextValueNumber++;
}
+
+ valueNumbering[C] = nextValueNumber;
+ return nextValueNumber++;
}
/// Returns true if a value number exists for the specified value.
@@ -708,10 +720,8 @@ void GVNPass::ValueTable::erase(Value *V) {
/// verifyRemoved - Verify that the value is removed from all internal data
/// structures.
void GVNPass::ValueTable::verifyRemoved(const Value *V) const {
- for (DenseMap<Value*, uint32_t>::const_iterator
- I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
- assert(I->first != V && "Inst still occurs in value numbering map!");
- }
+ assert(!valueNumbering.contains(V) &&
+ "Inst still occurs in value numbering map!");
}
//===----------------------------------------------------------------------===//
@@ -772,7 +782,7 @@ void GVNPass::printPipeline(
static_cast<PassInfoMixin<GVNPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
if (Options.AllowPRE != std::nullopt)
OS << (*Options.AllowPRE ? "" : "no-") << "pre;";
if (Options.AllowLoadPRE != std::nullopt)
@@ -782,7 +792,7 @@ void GVNPass::printPipeline(
<< "split-backedge-load-pre;";
if (Options.AllowMemDep != std::nullopt)
OS << (*Options.AllowMemDep ? "" : "no-") << "memdep";
- OS << ">";
+ OS << '>';
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -930,6 +940,18 @@ static bool IsValueFullyAvailableInBlock(
return !UnavailableBB;
}
+/// If the specified OldValue exists in ValuesPerBlock, replace its value with
+/// NewValue.
+static void replaceValuesPerBlockEntry(
+ SmallVectorImpl<AvailableValueInBlock> &ValuesPerBlock, Value *OldValue,
+ Value *NewValue) {
+ for (AvailableValueInBlock &V : ValuesPerBlock) {
+ if ((V.AV.isSimpleValue() && V.AV.getSimpleValue() == OldValue) ||
+ (V.AV.isCoercedLoadValue() && V.AV.getCoercedLoadValue() == OldValue))
+ V = AvailableValueInBlock::get(V.BB, NewValue);
+ }
+}
+
/// Given a set of loads specified by ValuesPerBlock,
/// construct SSA form, allowing us to eliminate Load. This returns the value
/// that should be used at Load's definition site.
@@ -986,7 +1008,7 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
if (isSimpleValue()) {
Res = getSimpleValue();
if (Res->getType() != LoadTy) {
- Res = getStoreValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
+ Res = getValueForLoad(Res, Offset, LoadTy, InsertPt, DL);
LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset
<< " " << *getSimpleValue() << '\n'
@@ -997,14 +1019,23 @@ Value *AvailableValue::MaterializeAdjustedValue(LoadInst *Load,
LoadInst *CoercedLoad = getCoercedLoadValue();
if (CoercedLoad->getType() == LoadTy && Offset == 0) {
Res = CoercedLoad;
+ combineMetadataForCSE(CoercedLoad, Load, false);
} else {
- Res = getLoadValueForLoad(CoercedLoad, Offset, LoadTy, InsertPt, DL);
- // We would like to use gvn.markInstructionForDeletion here, but we can't
- // because the load is already memoized into the leader map table that GVN
- // tracks. It is potentially possible to remove the load from the table,
- // but then there all of the operations based on it would need to be
- // rehashed. Just leave the dead load around.
- gvn.getMemDep().removeInstruction(CoercedLoad);
+ Res = getValueForLoad(CoercedLoad, Offset, LoadTy, InsertPt, DL);
+ // We are adding a new user for this load, for which the original
+ // metadata may not hold. Additionally, the new load may have a different
+ // size and type, so their metadata cannot be combined in any
+ // straightforward way.
+ // Drop all metadata that is not known to cause immediate UB on violation,
+ // unless the load has !noundef, in which case all metadata violations
+ // will be promoted to UB.
+ // TODO: We can combine noalias/alias.scope metadata here, because it is
+ // independent of the load type.
+ if (!CoercedLoad->hasMetadata(LLVMContext::MD_noundef))
+ CoercedLoad->dropUnknownNonDebugMetadata(
+ {LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null,
+ LLVMContext::MD_invariant_load, LLVMContext::MD_invariant_group});
LLVM_DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset
<< " " << *getCoercedLoadValue() << '\n'
<< *Res << '\n'
@@ -1314,9 +1345,67 @@ void GVNPass::AnalyzeLoadAvailability(LoadInst *Load, LoadDepVect &Deps,
"post condition violation");
}
+/// Given the following code, v1 is partially available on some edges, but not
+/// available on the edge from PredBB. This function tries to find if there is
+/// another identical load in the other successor of PredBB.
+///
+/// v0 = load %addr
+/// br %LoadBB
+///
+/// LoadBB:
+/// v1 = load %addr
+/// ...
+///
+/// PredBB:
+/// ...
+/// br %cond, label %LoadBB, label %SuccBB
+///
+/// SuccBB:
+/// v2 = load %addr
+/// ...
+///
+LoadInst *GVNPass::findLoadToHoistIntoPred(BasicBlock *Pred, BasicBlock *LoadBB,
+ LoadInst *Load) {
+ // For simplicity we handle a Pred has 2 successors only.
+ auto *Term = Pred->getTerminator();
+ if (Term->getNumSuccessors() != 2 || Term->isExceptionalTerminator())
+ return nullptr;
+ auto *SuccBB = Term->getSuccessor(0);
+ if (SuccBB == LoadBB)
+ SuccBB = Term->getSuccessor(1);
+ if (!SuccBB->getSinglePredecessor())
+ return nullptr;
+
+ unsigned int NumInsts = MaxNumInsnsPerBlock;
+ for (Instruction &Inst : *SuccBB) {
+ if (Inst.isDebugOrPseudoInst())
+ continue;
+ if (--NumInsts == 0)
+ return nullptr;
+
+ if (!Inst.isIdenticalTo(Load))
+ continue;
+
+ MemDepResult Dep = MD->getDependency(&Inst);
+ // If an identical load doesn't depends on any local instructions, it can
+ // be safely moved to PredBB.
+ // Also check for the implicit control flow instructions. See the comments
+ // in PerformLoadPRE for details.
+ if (Dep.isNonLocal() && !ICF->isDominatedByICFIFromSameBlock(&Inst))
+ return cast<LoadInst>(&Inst);
+
+ // Otherwise there is something in the same BB clobbers the memory, we can't
+ // move this and later load to PredBB.
+ return nullptr;
+ }
+
+ return nullptr;
+}
+
void GVNPass::eliminatePartiallyRedundantLoad(
LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
- MapVector<BasicBlock *, Value *> &AvailableLoads) {
+ MapVector<BasicBlock *, Value *> &AvailableLoads,
+ MapVector<BasicBlock *, LoadInst *> *CriticalEdgePredAndLoad) {
for (const auto &AvailableLoad : AvailableLoads) {
BasicBlock *UnavailableBlock = AvailableLoad.first;
Value *LoadPtr = AvailableLoad.second;
@@ -1370,10 +1459,29 @@ void GVNPass::eliminatePartiallyRedundantLoad(
AvailableValueInBlock::get(UnavailableBlock, NewLoad));
MD->invalidateCachedPointerInfo(LoadPtr);
LLVM_DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
+
+ // For PredBB in CriticalEdgePredAndLoad we need to replace the uses of old
+ // load instruction with the new created load instruction.
+ if (CriticalEdgePredAndLoad) {
+ auto I = CriticalEdgePredAndLoad->find(UnavailableBlock);
+ if (I != CriticalEdgePredAndLoad->end()) {
+ ++NumPRELoadMoved2CEPred;
+ ICF->insertInstructionTo(NewLoad, UnavailableBlock);
+ LoadInst *OldLoad = I->second;
+ combineMetadataForCSE(NewLoad, OldLoad, false);
+ OldLoad->replaceAllUsesWith(NewLoad);
+ replaceValuesPerBlockEntry(ValuesPerBlock, OldLoad, NewLoad);
+ if (uint32_t ValNo = VN.lookup(OldLoad, false))
+ removeFromLeaderTable(ValNo, OldLoad, OldLoad->getParent());
+ VN.erase(OldLoad);
+ removeInstruction(OldLoad);
+ }
+ }
}
// Perform PHI construction.
Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
+ // ConstructSSAForLoadSet is responsible for combining metadata.
Load->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(Load);
@@ -1456,7 +1564,12 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
for (BasicBlock *UnavailableBB : UnavailableBlocks)
FullyAvailableBlocks[UnavailableBB] = AvailabilityState::Unavailable;
- SmallVector<BasicBlock *, 4> CriticalEdgePred;
+ // The edge from Pred to LoadBB is a critical edge will be splitted.
+ SmallVector<BasicBlock *, 4> CriticalEdgePredSplit;
+ // The edge from Pred to LoadBB is a critical edge, another successor of Pred
+ // contains a load can be moved to Pred. This data structure maps the Pred to
+ // the movable load.
+ MapVector<BasicBlock *, LoadInst *> CriticalEdgePredAndLoad;
for (BasicBlock *Pred : predecessors(LoadBB)) {
// If any predecessor block is an EH pad that does not allow non-PHI
// instructions before the terminator, we can't PRE the load.
@@ -1496,7 +1609,10 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
return false;
}
- CriticalEdgePred.push_back(Pred);
+ if (LoadInst *LI = findLoadToHoistIntoPred(Pred, LoadBB, Load))
+ CriticalEdgePredAndLoad[Pred] = LI;
+ else
+ CriticalEdgePredSplit.push_back(Pred);
} else {
// Only add the predecessors that will not be split for now.
PredLoads[Pred] = nullptr;
@@ -1504,31 +1620,38 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
}
// Decide whether PRE is profitable for this load.
- unsigned NumUnavailablePreds = PredLoads.size() + CriticalEdgePred.size();
+ unsigned NumInsertPreds = PredLoads.size() + CriticalEdgePredSplit.size();
+ unsigned NumUnavailablePreds = NumInsertPreds +
+ CriticalEdgePredAndLoad.size();
assert(NumUnavailablePreds != 0 &&
"Fully available value should already be eliminated!");
+ (void)NumUnavailablePreds;
- // If this load is unavailable in multiple predecessors, reject it.
+ // If we need to insert new load in multiple predecessors, reject it.
// FIXME: If we could restructure the CFG, we could make a common pred with
// all the preds that don't have an available Load and insert a new load into
// that one block.
- if (NumUnavailablePreds != 1)
+ if (NumInsertPreds > 1)
return false;
// Now we know where we will insert load. We must ensure that it is safe
// to speculatively execute the load at that points.
if (MustEnsureSafetyOfSpeculativeExecution) {
- if (CriticalEdgePred.size())
+ if (CriticalEdgePredSplit.size())
if (!isSafeToSpeculativelyExecute(Load, LoadBB->getFirstNonPHI(), AC, DT))
return false;
for (auto &PL : PredLoads)
if (!isSafeToSpeculativelyExecute(Load, PL.first->getTerminator(), AC,
DT))
return false;
+ for (auto &CEP : CriticalEdgePredAndLoad)
+ if (!isSafeToSpeculativelyExecute(Load, CEP.first->getTerminator(), AC,
+ DT))
+ return false;
}
// Split critical edges, and update the unavailable predecessors accordingly.
- for (BasicBlock *OrigPred : CriticalEdgePred) {
+ for (BasicBlock *OrigPred : CriticalEdgePredSplit) {
BasicBlock *NewPred = splitCriticalEdges(OrigPred, LoadBB);
assert(!PredLoads.count(OrigPred) && "Split edges shouldn't be in map!");
PredLoads[NewPred] = nullptr;
@@ -1536,6 +1659,9 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
<< LoadBB->getName() << '\n');
}
+ for (auto &CEP : CriticalEdgePredAndLoad)
+ PredLoads[CEP.first] = nullptr;
+
// Check if the load can safely be moved to all the unavailable predecessors.
bool CanDoPRE = true;
const DataLayout &DL = Load->getModule()->getDataLayout();
@@ -1555,8 +1681,8 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
BasicBlock *Cur = Load->getParent();
while (Cur != LoadBB) {
PHITransAddr Address(LoadPtr, DL, AC);
- LoadPtr = Address.PHITranslateWithInsertion(
- Cur, Cur->getSinglePredecessor(), *DT, NewInsts);
+ LoadPtr = Address.translateWithInsertion(Cur, Cur->getSinglePredecessor(),
+ *DT, NewInsts);
if (!LoadPtr) {
CanDoPRE = false;
break;
@@ -1566,8 +1692,8 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
if (LoadPtr) {
PHITransAddr Address(LoadPtr, DL, AC);
- LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred, *DT,
- NewInsts);
+ LoadPtr = Address.translateWithInsertion(LoadBB, UnavailablePred, *DT,
+ NewInsts);
}
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
@@ -1592,7 +1718,7 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
}
// HINT: Don't revert the edge-splitting as following transformation may
// also need to split these critical edges.
- return !CriticalEdgePred.empty();
+ return !CriticalEdgePredSplit.empty();
}
// Okay, we can eliminate this load by inserting a reload in the predecessor
@@ -1617,7 +1743,8 @@ bool GVNPass::PerformLoadPRE(LoadInst *Load, AvailValInBlkVect &ValuesPerBlock,
VN.lookupOrAdd(I);
}
- eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, PredLoads);
+ eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, PredLoads,
+ &CriticalEdgePredAndLoad);
++NumPRELoad;
return true;
}
@@ -1696,7 +1823,8 @@ bool GVNPass::performLoopLoadPRE(LoadInst *Load,
AvailableLoads[Preheader] = LoadPtr;
LLVM_DEBUG(dbgs() << "GVN REMOVING PRE LOOP LOAD: " << *Load << '\n');
- eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads);
+ eliminatePartiallyRedundantLoad(Load, ValuesPerBlock, AvailableLoads,
+ /*CriticalEdgePredAndLoad*/ nullptr);
++NumPRELoopLoad;
return true;
}
@@ -1772,6 +1900,7 @@ bool GVNPass::processNonLocalLoad(LoadInst *Load) {
// Perform PHI construction.
Value *V = ConstructSSAForLoadSet(Load, ValuesPerBlock, *this);
+ // ConstructSSAForLoadSet is responsible for combining metadata.
Load->replaceAllUsesWith(V);
if (isa<PHINode>(V))
@@ -1823,7 +1952,7 @@ static bool impliesEquivalanceIfTrue(CmpInst* Cmp) {
if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
return true;
if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
- return true;;
+ return true;
// TODO: Handle vector floating point constants
}
return false;
@@ -1849,7 +1978,7 @@ static bool impliesEquivalanceIfFalse(CmpInst* Cmp) {
if (isa<ConstantFP>(LHS) && !cast<ConstantFP>(LHS)->isZero())
return true;
if (isa<ConstantFP>(RHS) && !cast<ConstantFP>(RHS)->isZero())
- return true;;
+ return true;
// TODO: Handle vector floating point constants
}
return false;
@@ -1907,10 +2036,14 @@ bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) {
MSSAU->insertDef(cast<MemoryDef>(NewDef), /*RenameUses=*/false);
}
}
- if (isAssumeWithEmptyBundle(*IntrinsicI))
+ if (isAssumeWithEmptyBundle(*IntrinsicI)) {
markInstructionForDeletion(IntrinsicI);
+ return true;
+ }
return false;
- } else if (isa<Constant>(V)) {
+ }
+
+ if (isa<Constant>(V)) {
// If it's not false, and constant, it must evaluate to true. This means our
// assume is assume(true), and thus, pointless, and we don't want to do
// anything more here.
@@ -2043,8 +2176,8 @@ bool GVNPass::processLoad(LoadInst *L) {
Value *AvailableValue = AV->MaterializeAdjustedValue(L, L, *this);
- // Replace the load!
- patchAndReplaceAllUsesWith(L, AvailableValue);
+ // MaterializeAdjustedValue is responsible for combining metadata.
+ L->replaceAllUsesWith(AvailableValue);
markInstructionForDeletion(L);
if (MSSAU)
MSSAU->removeMemoryAccess(L);
@@ -2543,7 +2676,9 @@ bool GVNPass::processInstruction(Instruction *I) {
// Failure, just remember this instance for future use.
addToLeaderTable(Num, I, I->getParent());
return false;
- } else if (Repl == I) {
+ }
+
+ if (Repl == I) {
// If I was the result of a shortcut PRE, it might already be in the table
// and the best replacement for itself. Nothing to do.
return false;
@@ -2669,12 +2804,7 @@ bool GVNPass::processBlock(BasicBlock *BB) {
LLVM_DEBUG(dbgs() << "GVN removed: " << *I << '\n');
salvageKnowledge(I, AC);
salvageDebugInfo(*I);
- if (MD) MD->removeInstruction(I);
- if (MSSAU)
- MSSAU->removeMemoryAccess(I);
- LLVM_DEBUG(verifyRemoved(I));
- ICF->removeInstruction(I);
- I->eraseFromParent();
+ removeInstruction(I);
}
InstrsToErase.clear();
@@ -2765,9 +2895,6 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) {
// We don't currently value number ANY inline asm calls.
if (CallB->isInlineAsm())
return false;
- // Don't do PRE on convergent calls.
- if (CallB->isConvergent())
- return false;
}
uint32_t ValNo = VN.lookup(CurInst);
@@ -2855,7 +2982,9 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) {
PREInstr = CurInst->clone();
if (!performScalarPREInsertion(PREInstr, PREPred, CurrentBlock, ValNo)) {
// If we failed insertion, make sure we remove the instruction.
- LLVM_DEBUG(verifyRemoved(PREInstr));
+#ifndef NDEBUG
+ verifyRemoved(PREInstr);
+#endif
PREInstr->deleteValue();
return false;
}
@@ -2894,15 +3023,7 @@ bool GVNPass::performScalarPRE(Instruction *CurInst) {
removeFromLeaderTable(ValNo, CurInst, CurrentBlock);
LLVM_DEBUG(dbgs() << "GVN PRE removed: " << *CurInst << '\n');
- if (MD)
- MD->removeInstruction(CurInst);
- if (MSSAU)
- MSSAU->removeMemoryAccess(CurInst);
- LLVM_DEBUG(verifyRemoved(CurInst));
- // FIXME: Intended to be markInstructionForDeletion(CurInst), but it causes
- // some assertion failures.
- ICF->removeInstruction(CurInst);
- CurInst->eraseFromParent();
+ removeInstruction(CurInst);
++NumGVNInstr;
return true;
@@ -2998,6 +3119,17 @@ void GVNPass::cleanupGlobalSets() {
InvalidBlockRPONumbers = true;
}
+void GVNPass::removeInstruction(Instruction *I) {
+ if (MD) MD->removeInstruction(I);
+ if (MSSAU)
+ MSSAU->removeMemoryAccess(I);
+#ifndef NDEBUG
+ verifyRemoved(I);
+#endif
+ ICF->removeInstruction(I);
+ I->eraseFromParent();
+}
+
/// Verify that the specified instruction does not occur in our
/// internal data structures.
void GVNPass::verifyRemoved(const Instruction *Inst) const {
diff --git a/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index bbff497b7d92..b564f00eb9d1 100644
--- a/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -62,13 +62,10 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
@@ -519,39 +516,6 @@ private:
std::pair<unsigned, unsigned> hoistExpressions(Function &F);
};
-class GVNHoistLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- GVNHoistLegacyPass() : FunctionPass(ID) {
- initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
- auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
-
- GVNHoist G(&DT, &PDT, &AA, &MD, &MSSA);
- return G.run(F);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<MemoryDependenceWrapperPass>();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-};
-
bool GVNHoist::run(Function &F) {
NumFuncArgs = F.arg_size();
VN.setDomTree(DT);
@@ -808,15 +772,20 @@ bool GVNHoist::valueAnticipable(CHIArgs C, Instruction *TI) const {
void GVNHoist::checkSafety(CHIArgs C, BasicBlock *BB, GVNHoist::InsKind K,
SmallVectorImpl<CHIArg> &Safe) {
int NumBBsOnAllPaths = MaxNumberOfBBSInPath;
+ const Instruction *T = BB->getTerminator();
for (auto CHI : C) {
Instruction *Insn = CHI.I;
if (!Insn) // No instruction was inserted in this CHI.
continue;
+ // If the Terminator is some kind of "exotic terminator" that produces a
+ // value (such as InvokeInst, CallBrInst, or CatchSwitchInst) which the CHI
+ // uses, it is not safe to hoist the use above the def.
+ if (!T->use_empty() && is_contained(Insn->operands(), cast<const Value>(T)))
+ continue;
if (K == InsKind::Scalar) {
if (safeToHoistScalar(BB, Insn->getParent(), NumBBsOnAllPaths))
Safe.push_back(CHI);
} else {
- auto *T = BB->getTerminator();
if (MemoryUseOrDef *UD = MSSA->getMemoryAccess(Insn))
if (safeToHoistLdSt(T, Insn, UD, K, NumBBsOnAllPaths))
Safe.push_back(CHI);
@@ -1251,17 +1220,3 @@ PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<MemorySSAAnalysis>();
return PA;
}
-
-char GVNHoistLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist",
- "Early GVN Hoisting of Expressions", false, false)
-INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist",
- "Early GVN Hoisting of Expressions", false, false)
-
-FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); }
diff --git a/llvm/lib/Transforms/Scalar/GVNSink.cpp b/llvm/lib/Transforms/Scalar/GVNSink.cpp
index 5fb8a77051fb..26a6978656e6 100644
--- a/llvm/lib/Transforms/Scalar/GVNSink.cpp
+++ b/llvm/lib/Transforms/Scalar/GVNSink.cpp
@@ -54,8 +54,6 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ArrayRecycler.h"
#include "llvm/Support/AtomicOrdering.h"
@@ -63,7 +61,6 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVN.h"
#include "llvm/Transforms/Scalar/GVNExpression.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -154,7 +151,7 @@ public:
void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
for (auto II = Insts.begin(); II != Insts.end();) {
- if (!llvm::is_contained(Blocks, (*II)->getParent())) {
+ if (!Blocks.contains((*II)->getParent())) {
ActiveBlocks.remove((*II)->getParent());
II = Insts.erase(II);
} else {
@@ -272,7 +269,7 @@ public:
auto VI = Values.begin();
while (BI != Blocks.end()) {
assert(VI != Values.end());
- if (!llvm::is_contained(NewBlocks, *BI)) {
+ if (!NewBlocks.contains(*BI)) {
BI = Blocks.erase(BI);
VI = Values.erase(VI);
} else {
@@ -886,29 +883,6 @@ void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
NumRemoved += Insts.size() - 1;
}
-////////////////////////////////////////////////////////////////////////////////
-// Pass machinery / boilerplate
-
-class GVNSinkLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- GVNSinkLegacyPass() : FunctionPass(ID) {
- initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
- GVNSink G;
- return G.run(F);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-};
-
} // end anonymous namespace
PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
@@ -917,14 +891,3 @@ PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
return PreservedAnalyses::all();
return PreservedAnalyses::none();
}
-
-char GVNSinkLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
- "Early GVN sinking of Expressions", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
- "Early GVN sinking of Expressions", false, false)
-
-FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index abe0babc3f12..62b40a23e38c 100644
--- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -69,6 +69,7 @@ using namespace llvm;
STATISTIC(GuardsEliminated, "Number of eliminated guards");
STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches");
+STATISTIC(FreezeAdded, "Number of freeze instruction introduced");
static cl::opt<bool>
WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden,
@@ -113,6 +114,23 @@ static void eliminateGuard(Instruction *GuardInst, MemorySSAUpdater *MSSAU) {
++GuardsEliminated;
}
+/// Find a point at which the widened condition of \p Guard should be inserted.
+/// When it is represented as intrinsic call, we can do it right before the call
+/// instruction. However, when we are dealing with widenable branch, we must
+/// account for the following situation: widening should not turn a
+/// loop-invariant condition into a loop-variant. It means that if
+/// widenable.condition() call is invariant (w.r.t. any loop), the new wide
+/// condition should stay invariant. Otherwise there can be a miscompile, like
+/// the one described at https://github.com/llvm/llvm-project/issues/60234. The
+/// safest way to do it is to expand the new condition at WC's block.
+static Instruction *findInsertionPointForWideCondition(Instruction *Guard) {
+ Value *Condition, *WC;
+ BasicBlock *IfTrue, *IfFalse;
+ if (parseWidenableBranch(Guard, Condition, WC, IfTrue, IfFalse))
+ return cast<Instruction>(WC);
+ return Guard;
+}
+
class GuardWideningImpl {
DominatorTree &DT;
PostDominatorTree *PDT;
@@ -170,16 +188,16 @@ class GuardWideningImpl {
bool InvertCond);
/// Helper to check if \p V can be hoisted to \p InsertPos.
- bool isAvailableAt(const Value *V, const Instruction *InsertPos) const {
+ bool canBeHoistedTo(const Value *V, const Instruction *InsertPos) const {
SmallPtrSet<const Instruction *, 8> Visited;
- return isAvailableAt(V, InsertPos, Visited);
+ return canBeHoistedTo(V, InsertPos, Visited);
}
- bool isAvailableAt(const Value *V, const Instruction *InsertPos,
- SmallPtrSetImpl<const Instruction *> &Visited) const;
+ bool canBeHoistedTo(const Value *V, const Instruction *InsertPos,
+ SmallPtrSetImpl<const Instruction *> &Visited) const;
/// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c
- /// isAvailableAt returned true.
+ /// canBeHoistedTo returned true.
void makeAvailableAt(Value *V, Instruction *InsertPos) const;
/// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try
@@ -192,6 +210,10 @@ class GuardWideningImpl {
bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt,
Value *&Result, bool InvertCondition);
+ /// Adds freeze to Orig and push it as far as possible very aggressively.
+ /// Also replaces all uses of frozen instruction with frozen version.
+ Value *freezeAndPush(Value *Orig, Instruction *InsertPt);
+
/// Represents a range check of the form \c Base + \c Offset u< \c Length,
/// with the constraint that \c Length is not negative. \c CheckInst is the
/// pre-existing instruction in the IR that computes the result of this range
@@ -263,8 +285,8 @@ class GuardWideningImpl {
void widenGuard(Instruction *ToWiden, Value *NewCondition,
bool InvertCondition) {
Value *Result;
-
- widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result,
+ Instruction *InsertPt = findInsertionPointForWideCondition(ToWiden);
+ widenCondCommon(getCondition(ToWiden), NewCondition, InsertPt, Result,
InvertCondition);
if (isGuardAsWidenableBranch(ToWiden)) {
setWidenableBranchCond(cast<BranchInst>(ToWiden), Result);
@@ -422,7 +444,10 @@ GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
HoistingOutOfLoop = true;
}
- if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard))
+ auto *WideningPoint = findInsertionPointForWideCondition(DominatingGuard);
+ if (!canBeHoistedTo(getCondition(DominatedInstr), WideningPoint))
+ return WS_IllegalOrNegative;
+ if (!canBeHoistedTo(getCondition(DominatingGuard), WideningPoint))
return WS_IllegalOrNegative;
// If the guard was conditional executed, it may never be reached
@@ -440,30 +465,70 @@ GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr,
if (HoistingOutOfLoop)
return WS_Positive;
- // Returns true if we might be hoisting above explicit control flow. Note
- // that this completely ignores implicit control flow (guards, calls which
- // throw, etc...). That choice appears arbitrary.
- auto MaybeHoistingOutOfIf = [&]() {
- auto *DominatingBlock = DominatingGuard->getParent();
- auto *DominatedBlock = DominatedInstr->getParent();
- if (isGuardAsWidenableBranch(DominatingGuard))
- DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0);
+ // For a given basic block \p BB, return its successor which is guaranteed or
+ // highly likely will be taken as its successor.
+ auto GetLikelySuccessor = [](const BasicBlock * BB)->const BasicBlock * {
+ if (auto *UniqueSucc = BB->getUniqueSuccessor())
+ return UniqueSucc;
+ auto *Term = BB->getTerminator();
+ Value *Cond = nullptr;
+ const BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
+ using namespace PatternMatch;
+ if (!match(Term, m_Br(m_Value(Cond), m_BasicBlock(IfTrue),
+ m_BasicBlock(IfFalse))))
+ return nullptr;
+ // For constant conditions, only one dynamical successor is possible
+ if (auto *ConstCond = dyn_cast<ConstantInt>(Cond))
+ return ConstCond->isAllOnesValue() ? IfTrue : IfFalse;
+ // If one of successors ends with deopt, another one is likely.
+ if (IfFalse->getPostdominatingDeoptimizeCall())
+ return IfTrue;
+ if (IfTrue->getPostdominatingDeoptimizeCall())
+ return IfFalse;
+ // TODO: Use branch frequency metatada to allow hoisting through non-deopt
+ // branches?
+ return nullptr;
+ };
+
+ // Returns true if we might be hoisting above explicit control flow into a
+ // considerably hotter block. Note that this completely ignores implicit
+ // control flow (guards, calls which throw, etc...). That choice appears
+ // arbitrary (we assume that implicit control flow exits are all rare).
+ auto MaybeHoistingToHotterBlock = [&]() {
+ const auto *DominatingBlock = DominatingGuard->getParent();
+ const auto *DominatedBlock = DominatedInstr->getParent();
+
+ // Descend as low as we can, always taking the likely successor.
+ assert(DT.isReachableFromEntry(DominatingBlock) && "Unreached code");
+ assert(DT.isReachableFromEntry(DominatedBlock) && "Unreached code");
+ assert(DT.dominates(DominatingBlock, DominatedBlock) && "No dominance");
+ while (DominatedBlock != DominatingBlock) {
+ auto *LikelySucc = GetLikelySuccessor(DominatingBlock);
+ // No likely successor?
+ if (!LikelySucc)
+ break;
+ // Only go down the dominator tree.
+ if (!DT.properlyDominates(DominatingBlock, LikelySucc))
+ break;
+ DominatingBlock = LikelySucc;
+ }
- // Same Block?
+ // Found?
if (DominatedBlock == DominatingBlock)
return false;
- // Obvious successor (common loop header/preheader case)
- if (DominatedBlock == DominatingBlock->getUniqueSuccessor())
- return false;
+ // We followed the likely successor chain and went past the dominated
+ // block. It means that the dominated guard is in dead/very cold code.
+ if (!DT.dominates(DominatingBlock, DominatedBlock))
+ return true;
// TODO: diamond, triangle cases
if (!PDT) return true;
return !PDT->dominates(DominatedBlock, DominatingBlock);
};
- return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral;
+ return MaybeHoistingToHotterBlock() ? WS_IllegalOrNegative : WS_Neutral;
}
-bool GuardWideningImpl::isAvailableAt(
+bool GuardWideningImpl::canBeHoistedTo(
const Value *V, const Instruction *Loc,
SmallPtrSetImpl<const Instruction *> &Visited) const {
auto *Inst = dyn_cast<Instruction>(V);
@@ -482,7 +547,7 @@ bool GuardWideningImpl::isAvailableAt(
assert(DT.isReachableFromEntry(Inst->getParent()) &&
"We did a DFS from the block entry!");
return all_of(Inst->operands(),
- [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); });
+ [&](Value *Op) { return canBeHoistedTo(Op, Loc, Visited); });
}
void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
@@ -491,14 +556,115 @@ void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const {
return;
assert(isSafeToSpeculativelyExecute(Inst, Loc, &AC, &DT) &&
- !Inst->mayReadFromMemory() && "Should've checked with isAvailableAt!");
+ !Inst->mayReadFromMemory() &&
+ "Should've checked with canBeHoistedTo!");
for (Value *Op : Inst->operands())
makeAvailableAt(Op, Loc);
Inst->moveBefore(Loc);
- // If we moved instruction before guard we must clean poison generating flags.
- Inst->dropPoisonGeneratingFlags();
+}
+
+// Return Instruction before which we can insert freeze for the value V as close
+// to def as possible. If there is no place to add freeze, return nullptr.
+static Instruction *getFreezeInsertPt(Value *V, const DominatorTree &DT) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return &*DT.getRoot()->getFirstNonPHIOrDbgOrAlloca();
+
+ auto *Res = I->getInsertionPointAfterDef();
+ // If there is no place to add freeze - return nullptr.
+ if (!Res || !DT.dominates(I, Res))
+ return nullptr;
+
+ // If there is a User dominated by original I, then it should be dominated
+ // by Freeze instruction as well.
+ if (any_of(I->users(), [&](User *U) {
+ Instruction *User = cast<Instruction>(U);
+ return Res != User && DT.dominates(I, User) && !DT.dominates(Res, User);
+ }))
+ return nullptr;
+ return Res;
+}
+
+Value *GuardWideningImpl::freezeAndPush(Value *Orig, Instruction *InsertPt) {
+ if (isGuaranteedNotToBePoison(Orig, nullptr, InsertPt, &DT))
+ return Orig;
+ Instruction *InsertPtAtDef = getFreezeInsertPt(Orig, DT);
+ if (!InsertPtAtDef)
+ return new FreezeInst(Orig, "gw.freeze", InsertPt);
+ if (isa<Constant>(Orig) || isa<GlobalValue>(Orig))
+ return new FreezeInst(Orig, "gw.freeze", InsertPtAtDef);
+
+ SmallSet<Value *, 16> Visited;
+ SmallVector<Value *, 16> Worklist;
+ SmallSet<Instruction *, 16> DropPoisonFlags;
+ SmallVector<Value *, 16> NeedFreeze;
+ DenseMap<Value *, FreezeInst *> CacheOfFreezes;
+
+ // A bit overloaded data structures. Visited contains constant/GV
+ // if we already met it. In this case CacheOfFreezes has a freeze if it is
+ // required.
+ auto handleConstantOrGlobal = [&](Use &U) {
+ Value *Def = U.get();
+ if (!isa<Constant>(Def) && !isa<GlobalValue>(Def))
+ return false;
+
+ if (Visited.insert(Def).second) {
+ if (isGuaranteedNotToBePoison(Def, nullptr, InsertPt, &DT))
+ return true;
+ CacheOfFreezes[Def] = new FreezeInst(Def, Def->getName() + ".gw.fr",
+ getFreezeInsertPt(Def, DT));
+ }
+
+ if (CacheOfFreezes.count(Def))
+ U.set(CacheOfFreezes[Def]);
+ return true;
+ };
+
+ Worklist.push_back(Orig);
+ while (!Worklist.empty()) {
+ Value *V = Worklist.pop_back_val();
+ if (!Visited.insert(V).second)
+ continue;
+
+ if (isGuaranteedNotToBePoison(V, nullptr, InsertPt, &DT))
+ continue;
+
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I || canCreateUndefOrPoison(cast<Operator>(I),
+ /*ConsiderFlagsAndMetadata*/ false)) {
+ NeedFreeze.push_back(V);
+ continue;
+ }
+ // Check all operands. If for any of them we cannot insert Freeze,
+ // stop here. Otherwise, iterate.
+ if (any_of(I->operands(), [&](Value *Op) {
+ return isa<Instruction>(Op) && !getFreezeInsertPt(Op, DT);
+ })) {
+ NeedFreeze.push_back(I);
+ continue;
+ }
+ DropPoisonFlags.insert(I);
+ for (Use &U : I->operands())
+ if (!handleConstantOrGlobal(U))
+ Worklist.push_back(U.get());
+ }
+ for (Instruction *I : DropPoisonFlags)
+ I->dropPoisonGeneratingFlagsAndMetadata();
+
+ Value *Result = Orig;
+ for (Value *V : NeedFreeze) {
+ auto *FreezeInsertPt = getFreezeInsertPt(V, DT);
+ FreezeInst *FI = new FreezeInst(V, V->getName() + ".gw.fr", FreezeInsertPt);
+ ++FreezeAdded;
+ if (V == Orig)
+ Result = FI;
+ V->replaceUsesWithIf(
+ FI, [&](const Use & U)->bool { return U.getUser() != FI; });
+ }
+
+ return Result;
}
bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
@@ -532,6 +698,8 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
if (InsertPt) {
ConstantInt *NewRHS =
ConstantInt::get(Cond0->getContext(), NewRHSAP);
+ assert(canBeHoistedTo(LHS, InsertPt) && "must be");
+ makeAvailableAt(LHS, InsertPt);
Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk");
}
return true;
@@ -558,6 +726,7 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
}
assert(Result && "Failed to find result value");
Result->setName("wide.chk");
+ Result = freezeAndPush(Result, InsertPt);
}
return true;
}
@@ -570,6 +739,7 @@ bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1,
makeAvailableAt(Cond1, InsertPt);
if (InvertCondition)
Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt);
+ Cond1 = freezeAndPush(Cond1, InsertPt);
Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt);
}
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index c834e51b5f29..40475d9563b2 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -64,15 +64,12 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -93,15 +90,6 @@ STATISTIC(NumLFTR , "Number of loop exit tests replaced");
STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
STATISTIC(NumElimIV , "Number of congruent IVs eliminated");
-// Trip count verification can be enabled by default under NDEBUG if we
-// implement a strong expression equivalence checker in SCEV. Until then, we
-// use the verify-indvars flag, which may assert in some cases.
-static cl::opt<bool> VerifyIndvars(
- "verify-indvars", cl::Hidden,
- cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
- "effect in release builds. (Note: this adds additional SCEV "
- "queries potentially changing the analysis result)"));
-
static cl::opt<ReplaceExitVal> ReplaceExitValue(
"replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
@@ -416,8 +404,8 @@ bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
PHIs.push_back(&PN);
bool Changed = false;
- for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
- if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
+ for (WeakTrackingVH &PHI : PHIs)
+ if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHI))
Changed |= handleFloatingPointIV(L, PN);
// If the loop previously had floating-point IV, ScalarEvolution
@@ -759,50 +747,6 @@ static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
return Phi != getLoopPhiForCounter(IncV, L);
}
-/// Return true if undefined behavior would provable be executed on the path to
-/// OnPathTo if Root produced a posion result. Note that this doesn't say
-/// anything about whether OnPathTo is actually executed or whether Root is
-/// actually poison. This can be used to assess whether a new use of Root can
-/// be added at a location which is control equivalent with OnPathTo (such as
-/// immediately before it) without introducing UB which didn't previously
-/// exist. Note that a false result conveys no information.
-static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
- Instruction *OnPathTo,
- DominatorTree *DT) {
- // Basic approach is to assume Root is poison, propagate poison forward
- // through all users we can easily track, and then check whether any of those
- // users are provable UB and must execute before out exiting block might
- // exit.
-
- // The set of all recursive users we've visited (which are assumed to all be
- // poison because of said visit)
- SmallSet<const Value *, 16> KnownPoison;
- SmallVector<const Instruction*, 16> Worklist;
- Worklist.push_back(Root);
- while (!Worklist.empty()) {
- const Instruction *I = Worklist.pop_back_val();
-
- // If we know this must trigger UB on a path leading our target.
- if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
- return true;
-
- // If we can't analyze propagation through this instruction, just skip it
- // and transitive users. Safe as false is a conservative result.
- if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) {
- return KnownPoison.contains(U) && propagatesPoison(U);
- }))
- continue;
-
- if (KnownPoison.insert(I).second)
- for (const User *User : I->users())
- Worklist.push_back(cast<Instruction>(User));
- }
-
- // Might be non-UB, or might have a path we couldn't prove must execute on
- // way to exiting bb.
- return false;
-}
-
/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
/// down to checking that all operands are constant and listing instructions
/// that may hide undef.
@@ -845,20 +789,6 @@ static bool hasConcreteDef(Value *V) {
return hasConcreteDefImpl(V, Visited, 0);
}
-/// Return true if this IV has any uses other than the (soon to be rewritten)
-/// loop exit test.
-static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
- int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
- Value *IncV = Phi->getIncomingValue(LatchIdx);
-
- for (User *U : Phi->users())
- if (U != Cond && U != IncV) return false;
-
- for (User *U : IncV->users())
- if (U != Cond && U != Phi) return false;
- return true;
-}
-
/// Return true if the given phi is a "counter" in L. A counter is an
/// add recurance (of integer or pointer type) with an arbitrary start, and a
/// step of 1. Note that L must have exactly one latch.
@@ -910,10 +840,6 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
if (!isLoopCounter(Phi, L, SE))
continue;
- // Avoid comparing an integer IV against a pointer Limit.
- if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
- continue;
-
const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
// AR may be a pointer type, while BECount is an integer type.
@@ -949,9 +875,9 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
const SCEV *Init = AR->getStart();
- if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
+ if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) {
// Don't force a live loop counter if another IV can be used.
- if (AlmostDeadIV(Phi, LatchBlock, Cond))
+ if (isAlmostDeadIV(Phi, LatchBlock, Cond))
continue;
// Prefer to count-from-zero. This is a more "canonical" counter form. It
@@ -979,78 +905,29 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
const SCEV *ExitCount, bool UsePostInc, Loop *L,
SCEVExpander &Rewriter, ScalarEvolution *SE) {
assert(isLoopCounter(IndVar, L, SE));
+ assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
- const SCEV *IVInit = AR->getStart();
assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
- // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
- // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
- // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
- // the existing GEPs whenever possible.
- if (IndVar->getType()->isPointerTy() &&
- !ExitCount->getType()->isPointerTy()) {
- // IVOffset will be the new GEP offset that is interpreted by GEP as a
- // signed value. ExitCount on the other hand represents the loop trip count,
- // which is an unsigned value. FindLoopCounter only allows induction
- // variables that have a positive unit stride of one. This means we don't
- // have to handle the case of negative offsets (yet) and just need to zero
- // extend ExitCount.
- Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
- const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
- if (UsePostInc)
- IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
-
- // Expand the code for the iteration count.
- assert(SE->isLoopInvariant(IVOffset, L) &&
- "Computed iteration count is not loop invariant!");
-
- const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
- BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
- return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
- } else {
- // In any other case, convert both IVInit and ExitCount to integers before
- // comparing. This may result in SCEV expansion of pointers, but in practice
- // SCEV will fold the pointer arithmetic away as such:
- // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
- //
- // Valid Cases: (1) both integers is most common; (2) both may be pointers
- // for simple memset-style loops.
- //
- // IVInit integer and ExitCount pointer would only occur if a canonical IV
- // were generated on top of case #2, which is not expected.
-
- // For unit stride, IVCount = Start + ExitCount with 2's complement
- // overflow.
-
- // For integer IVs, truncate the IV before computing IVInit + BECount,
- // unless we know apriori that the limit must be a constant when evaluated
- // in the bitwidth of the IV. We prefer (potentially) keeping a truncate
- // of the IV in the loop over a (potentially) expensive expansion of the
- // widened exit count add(zext(add)) expression.
- if (SE->getTypeSizeInBits(IVInit->getType())
- > SE->getTypeSizeInBits(ExitCount->getType())) {
- if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
- ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
- else
- IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
- }
-
- const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
-
- if (UsePostInc)
- IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
-
- // Expand the code for the iteration count.
- assert(SE->isLoopInvariant(IVLimit, L) &&
- "Computed iteration count is not loop invariant!");
- // Ensure that we generate the same type as IndVar, or a smaller integer
- // type. In the presence of null pointer values, we have an integer type
- // SCEV expression (IVInit) for a pointer type IV value (IndVar).
- Type *LimitTy = ExitCount->getType()->isPointerTy() ?
- IndVar->getType() : ExitCount->getType();
- BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
- return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
+ // For integer IVs, truncate the IV before computing the limit unless we
+ // know apriori that the limit must be a constant when evaluated in the
+ // bitwidth of the IV. We prefer (potentially) keeping a truncate of the
+ // IV in the loop over a (potentially) expensive expansion of the widened
+ // exit count add(zext(add)) expression.
+ if (IndVar->getType()->isIntegerTy() &&
+ SE->getTypeSizeInBits(AR->getType()) >
+ SE->getTypeSizeInBits(ExitCount->getType())) {
+ const SCEV *IVInit = AR->getStart();
+ if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
+ AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
}
+
+ const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
+ const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
+ assert(SE->isLoopInvariant(IVLimit, L) &&
+ "Computed iteration count is not loop invariant!");
+ return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
+ ExitingBB->getTerminator());
}
/// This method rewrites the exit condition of the loop to be a canonical !=
@@ -1148,8 +1025,7 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
// a truncate within in.
bool Extended = false;
const SCEV *IV = SE->getSCEV(CmpIndVar);
- const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
- ExitCnt->getType());
+ const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
const SCEV *ZExtTrunc =
SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
@@ -1359,14 +1235,16 @@ createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
const ScalarEvolution::LoopInvariantPredicate &LIP,
SCEVExpander &Rewriter) {
ICmpInst::Predicate InvariantPred = LIP.Pred;
- BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
- Rewriter.setInsertPoint(BI);
+ BasicBlock *Preheader = L->getLoopPreheader();
+ assert(Preheader && "Preheader doesn't exist");
+ Rewriter.setInsertPoint(Preheader->getTerminator());
auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
if (ExitIfTrue)
InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
- IRBuilder<> Builder(BI);
+ IRBuilder<> Builder(Preheader->getTerminator());
+ BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
BI->getCondition()->getName());
}
@@ -1519,7 +1397,6 @@ static bool optimizeLoopExitWithUnknownExitCount(
auto *NewCond = *Replaced;
if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
NCI->setName(OldCond->getName() + ".first_iter");
- NCI->moveBefore(cast<Instruction>(OldCond));
}
LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
<< " with " << *NewCond << "\n");
@@ -2022,16 +1899,6 @@ bool IndVarSimplify::run(Loop *L) {
if (!L->isLoopSimplifyForm())
return false;
-#ifndef NDEBUG
- // Used below for a consistency check only
- // Note: Since the result returned by ScalarEvolution may depend on the order
- // in which previous results are added to its cache, the call to
- // getBackedgeTakenCount() may change following SCEV queries.
- const SCEV *BackedgeTakenCount;
- if (VerifyIndvars)
- BackedgeTakenCount = SE->getBackedgeTakenCount(L);
-#endif
-
bool Changed = false;
// If there are any floating-point recurrences, attempt to
// transform them to use integer recurrences.
@@ -2180,27 +2047,8 @@ bool IndVarSimplify::run(Loop *L) {
// Check a post-condition.
assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
"Indvars did not preserve LCSSA!");
-
- // Verify that LFTR, and any other change have not interfered with SCEV's
- // ability to compute trip count. We may have *changed* the exit count, but
- // only by reducing it.
-#ifndef NDEBUG
- if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
- SE->forgetLoop(L);
- const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
- if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
- SE->getTypeSizeInBits(NewBECount->getType()))
- NewBECount = SE->getTruncateOrNoop(NewBECount,
- BackedgeTakenCount->getType());
- else
- BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
- NewBECount->getType());
- assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
- NewBECount) && "indvars must preserve SCEV");
- }
if (VerifyMemorySSA && MSSAU)
MSSAU->getMemorySSA()->verifyMemorySSA();
-#endif
return Changed;
}
@@ -2222,54 +2070,3 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
PA.preserve<MemorySSAAnalysis>();
return PA;
}
-
-namespace {
-
-struct IndVarSimplifyLegacyPass : public LoopPass {
- static char ID; // Pass identification, replacement for typeid
-
- IndVarSimplifyLegacyPass() : LoopPass(ID) {
- initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
- return false;
-
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
- auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
- auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- MemorySSA *MSSA = nullptr;
- if (MSSAAnalysis)
- MSSA = &MSSAAnalysis->getMSSA();
-
- IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
- return IVS.run(L);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addPreserved<MemorySSAWrapperPass>();
- getLoopAnalysisUsage(AU);
- }
-};
-
-} // end anonymous namespace
-
-char IndVarSimplifyLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
- "Induction Variable Simplification", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
- "Induction Variable Simplification", false, false)
-
-Pass *llvm::createIndVarSimplifyPass() {
- return new IndVarSimplifyLegacyPass();
-}
diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index 52a4bc8a9f24..b52589baeee7 100644
--- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -72,8 +72,6 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
@@ -81,7 +79,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -121,6 +119,16 @@ static cl::opt<bool> AllowNarrowLatchCondition(
cl::desc("If set to true, IRCE may eliminate wide range checks in loops "
"with narrow latch condition."));
+static cl::opt<unsigned> MaxTypeSizeForOverflowCheck(
+ "irce-max-type-size-for-overflow-check", cl::Hidden, cl::init(32),
+ cl::desc(
+ "Maximum size of range check type for which can be produced runtime "
+ "overflow check of its limit's computation"));
+
+static cl::opt<bool>
+ PrintScaledBoundaryRangeChecks("irce-print-scaled-boundary-range-checks",
+ cl::Hidden, cl::init(false));
+
static const char *ClonedLoopTag = "irce.loop.clone";
#define DEBUG_TYPE "irce"
@@ -145,14 +153,23 @@ class InductiveRangeCheck {
Use *CheckUse = nullptr;
static bool parseRangeCheckICmp(Loop *L, ICmpInst *ICI, ScalarEvolution &SE,
- Value *&Index, Value *&Length,
- bool &IsSigned);
+ const SCEVAddRecExpr *&Index,
+ const SCEV *&End);
static void
extractRangeChecksFromCond(Loop *L, ScalarEvolution &SE, Use &ConditionUse,
SmallVectorImpl<InductiveRangeCheck> &Checks,
SmallPtrSetImpl<Value *> &Visited);
+ static bool parseIvAgaisntLimit(Loop *L, Value *LHS, Value *RHS,
+ ICmpInst::Predicate Pred, ScalarEvolution &SE,
+ const SCEVAddRecExpr *&Index,
+ const SCEV *&End);
+
+ static bool reassociateSubLHS(Loop *L, Value *VariantLHS, Value *InvariantRHS,
+ ICmpInst::Predicate Pred, ScalarEvolution &SE,
+ const SCEVAddRecExpr *&Index, const SCEV *&End);
+
public:
const SCEV *getBegin() const { return Begin; }
const SCEV *getStep() const { return Step; }
@@ -219,10 +236,9 @@ public:
///
/// NB! There may be conditions feeding into \p BI that aren't inductive range
/// checks, and hence don't end up in \p Checks.
- static void
- extractRangeChecksFromBranch(BranchInst *BI, Loop *L, ScalarEvolution &SE,
- BranchProbabilityInfo *BPI,
- SmallVectorImpl<InductiveRangeCheck> &Checks);
+ static void extractRangeChecksFromBranch(
+ BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
+ SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed);
};
struct LoopStructure;
@@ -250,48 +266,16 @@ public:
bool run(Loop *L, function_ref<void(Loop *, bool)> LPMAddNewLoop);
};
-class IRCELegacyPass : public FunctionPass {
-public:
- static char ID;
-
- IRCELegacyPass() : FunctionPass(ID) {
- initializeIRCELegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<BranchProbabilityInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- }
-
- bool runOnFunction(Function &F) override;
-};
-
} // end anonymous namespace
-char IRCELegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(IRCELegacyPass, "irce",
- "Inductive range check elimination", false, false)
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(IRCELegacyPass, "irce", "Inductive range check elimination",
- false, false)
-
/// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI` cannot
-/// be interpreted as a range check, return false and set `Index` and `Length`
-/// to `nullptr`. Otherwise set `Index` to the value being range checked, and
-/// set `Length` to the upper limit `Index` is being range checked.
-bool
-InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
- ScalarEvolution &SE, Value *&Index,
- Value *&Length, bool &IsSigned) {
+/// be interpreted as a range check, return false. Otherwise set `Index` to the
+/// SCEV being range checked, and set `End` to the upper or lower limit `Index`
+/// is being range checked.
+bool InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
+ ScalarEvolution &SE,
+ const SCEVAddRecExpr *&Index,
+ const SCEV *&End) {
auto IsLoopInvariant = [&SE, L](Value *V) {
return SE.isLoopInvariant(SE.getSCEV(V), L);
};
@@ -300,47 +284,79 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
Value *LHS = ICI->getOperand(0);
Value *RHS = ICI->getOperand(1);
+ // Canonicalize to the `Index Pred Invariant` comparison
+ if (IsLoopInvariant(LHS)) {
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ } else if (!IsLoopInvariant(RHS))
+ // Both LHS and RHS are loop variant
+ return false;
+
+ if (parseIvAgaisntLimit(L, LHS, RHS, Pred, SE, Index, End))
+ return true;
+
+ if (reassociateSubLHS(L, LHS, RHS, Pred, SE, Index, End))
+ return true;
+
+ // TODO: support ReassociateAddLHS
+ return false;
+}
+
+// Try to parse range check in the form of "IV vs Limit"
+bool InductiveRangeCheck::parseIvAgaisntLimit(Loop *L, Value *LHS, Value *RHS,
+ ICmpInst::Predicate Pred,
+ ScalarEvolution &SE,
+ const SCEVAddRecExpr *&Index,
+ const SCEV *&End) {
+
+ auto SIntMaxSCEV = [&](Type *T) {
+ unsigned BitWidth = cast<IntegerType>(T)->getBitWidth();
+ return SE.getConstant(APInt::getSignedMaxValue(BitWidth));
+ };
+
+ const auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(LHS));
+ if (!AddRec)
+ return false;
+
+ // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L".
+ // We can potentially do much better here.
+ // If we want to adjust upper bound for the unsigned range check as we do it
+ // for signed one, we will need to pick Unsigned max
switch (Pred) {
default:
return false;
- case ICmpInst::ICMP_SLE:
- std::swap(LHS, RHS);
- [[fallthrough]];
case ICmpInst::ICMP_SGE:
- IsSigned = true;
if (match(RHS, m_ConstantInt<0>())) {
- Index = LHS;
- return true; // Lower.
+ Index = AddRec;
+ End = SIntMaxSCEV(Index->getType());
+ return true;
}
return false;
- case ICmpInst::ICMP_SLT:
- std::swap(LHS, RHS);
- [[fallthrough]];
case ICmpInst::ICMP_SGT:
- IsSigned = true;
if (match(RHS, m_ConstantInt<-1>())) {
- Index = LHS;
- return true; // Lower.
- }
-
- if (IsLoopInvariant(LHS)) {
- Index = RHS;
- Length = LHS;
- return true; // Upper.
+ Index = AddRec;
+ End = SIntMaxSCEV(Index->getType());
+ return true;
}
return false;
+ case ICmpInst::ICMP_SLT:
case ICmpInst::ICMP_ULT:
- std::swap(LHS, RHS);
- [[fallthrough]];
- case ICmpInst::ICMP_UGT:
- IsSigned = false;
- if (IsLoopInvariant(LHS)) {
- Index = RHS;
- Length = LHS;
- return true; // Both lower and upper.
+ Index = AddRec;
+ End = SE.getSCEV(RHS);
+ return true;
+
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_ULE:
+ const SCEV *One = SE.getOne(RHS->getType());
+ const SCEV *RHSS = SE.getSCEV(RHS);
+ bool Signed = Pred == ICmpInst::ICMP_SLE;
+ if (SE.willNotOverflow(Instruction::BinaryOps::Add, Signed, RHSS, One)) {
+ Index = AddRec;
+ End = SE.getAddExpr(RHSS, One);
+ return true;
}
return false;
}
@@ -348,6 +364,126 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
llvm_unreachable("default clause returns!");
}
+// Try to parse range check in the form of "IV - Offset vs Limit" or "Offset -
+// IV vs Limit"
+bool InductiveRangeCheck::reassociateSubLHS(
+ Loop *L, Value *VariantLHS, Value *InvariantRHS, ICmpInst::Predicate Pred,
+ ScalarEvolution &SE, const SCEVAddRecExpr *&Index, const SCEV *&End) {
+ Value *LHS, *RHS;
+ if (!match(VariantLHS, m_Sub(m_Value(LHS), m_Value(RHS))))
+ return false;
+
+ const SCEV *IV = SE.getSCEV(LHS);
+ const SCEV *Offset = SE.getSCEV(RHS);
+ const SCEV *Limit = SE.getSCEV(InvariantRHS);
+
+ bool OffsetSubtracted = false;
+ if (SE.isLoopInvariant(IV, L))
+ // "Offset - IV vs Limit"
+ std::swap(IV, Offset);
+ else if (SE.isLoopInvariant(Offset, L))
+ // "IV - Offset vs Limit"
+ OffsetSubtracted = true;
+ else
+ return false;
+
+ const auto *AddRec = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AddRec)
+ return false;
+
+ // In order to turn "IV - Offset < Limit" into "IV < Limit + Offset", we need
+ // to be able to freely move values from left side of inequality to right side
+ // (just as in normal linear arithmetics). Overflows make things much more
+ // complicated, so we want to avoid this.
+ //
+ // Let's prove that the initial subtraction doesn't overflow with all IV's
+ // values from the safe range constructed for that check.
+ //
+ // [Case 1] IV - Offset < Limit
+ // It doesn't overflow if:
+ // SINT_MIN <= IV - Offset <= SINT_MAX
+ // In terms of scaled SINT we need to prove:
+ // SINT_MIN + Offset <= IV <= SINT_MAX + Offset
+ // Safe range will be constructed:
+ // 0 <= IV < Limit + Offset
+ // It means that 'IV - Offset' doesn't underflow, because:
+ // SINT_MIN + Offset < 0 <= IV
+ // and doesn't overflow:
+ // IV < Limit + Offset <= SINT_MAX + Offset
+ //
+ // [Case 2] Offset - IV > Limit
+ // It doesn't overflow if:
+ // SINT_MIN <= Offset - IV <= SINT_MAX
+ // In terms of scaled SINT we need to prove:
+ // -SINT_MIN >= IV - Offset >= -SINT_MAX
+ // Offset - SINT_MIN >= IV >= Offset - SINT_MAX
+ // Safe range will be constructed:
+ // 0 <= IV < Offset - Limit
+ // It means that 'Offset - IV' doesn't underflow, because
+ // Offset - SINT_MAX < 0 <= IV
+ // and doesn't overflow:
+ // IV < Offset - Limit <= Offset - SINT_MIN
+ //
+ // For the computed upper boundary of the IV's range (Offset +/- Limit) we
+ // don't know exactly whether it overflows or not. So if we can't prove this
+ // fact at compile time, we scale boundary computations to a wider type with
+ // the intention to add runtime overflow check.
+
+ auto getExprScaledIfOverflow = [&](Instruction::BinaryOps BinOp,
+ const SCEV *LHS,
+ const SCEV *RHS) -> const SCEV * {
+ const SCEV *(ScalarEvolution::*Operation)(const SCEV *, const SCEV *,
+ SCEV::NoWrapFlags, unsigned);
+ switch (BinOp) {
+ default:
+ llvm_unreachable("Unsupported binary op");
+ case Instruction::Add:
+ Operation = &ScalarEvolution::getAddExpr;
+ break;
+ case Instruction::Sub:
+ Operation = &ScalarEvolution::getMinusSCEV;
+ break;
+ }
+
+ if (SE.willNotOverflow(BinOp, ICmpInst::isSigned(Pred), LHS, RHS,
+ cast<Instruction>(VariantLHS)))
+ return (SE.*Operation)(LHS, RHS, SCEV::FlagAnyWrap, 0);
+
+ // We couldn't prove that the expression does not overflow.
+ // Than scale it to a wider type to check overflow at runtime.
+ auto *Ty = cast<IntegerType>(LHS->getType());
+ if (Ty->getBitWidth() > MaxTypeSizeForOverflowCheck)
+ return nullptr;
+
+ auto WideTy = IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2);
+ return (SE.*Operation)(SE.getSignExtendExpr(LHS, WideTy),
+ SE.getSignExtendExpr(RHS, WideTy), SCEV::FlagAnyWrap,
+ 0);
+ };
+
+ if (OffsetSubtracted)
+ // "IV - Offset < Limit" -> "IV" < Offset + Limit
+ Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Add, Offset, Limit);
+ else {
+ // "Offset - IV > Limit" -> "IV" < Offset - Limit
+ Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Sub, Offset, Limit);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+
+ if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE) {
+ // "Expr <= Limit" -> "Expr < Limit + 1"
+ if (Pred == ICmpInst::ICMP_SLE && Limit)
+ Limit = getExprScaledIfOverflow(Instruction::BinaryOps::Add, Limit,
+ SE.getOne(Limit->getType()));
+ if (Limit) {
+ Index = AddRec;
+ End = Limit;
+ return true;
+ }
+ }
+ return false;
+}
+
void InductiveRangeCheck::extractRangeChecksFromCond(
Loop *L, ScalarEvolution &SE, Use &ConditionUse,
SmallVectorImpl<InductiveRangeCheck> &Checks,
@@ -369,32 +505,17 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
if (!ICI)
return;
- Value *Length = nullptr, *Index;
- bool IsSigned;
- if (!parseRangeCheckICmp(L, ICI, SE, Index, Length, IsSigned))
+ const SCEV *End = nullptr;
+ const SCEVAddRecExpr *IndexAddRec = nullptr;
+ if (!parseRangeCheckICmp(L, ICI, SE, IndexAddRec, End))
return;
- const auto *IndexAddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Index));
- bool IsAffineIndex =
- IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine();
+ assert(IndexAddRec && "IndexAddRec was not computed");
+ assert(End && "End was not computed");
- if (!IsAffineIndex)
+ if ((IndexAddRec->getLoop() != L) || !IndexAddRec->isAffine())
return;
- const SCEV *End = nullptr;
- // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L".
- // We can potentially do much better here.
- if (Length)
- End = SE.getSCEV(Length);
- else {
- // So far we can only reach this point for Signed range check. This may
- // change in future. In this case we will need to pick Unsigned max for the
- // unsigned range check.
- unsigned BitWidth = cast<IntegerType>(IndexAddRec->getType())->getBitWidth();
- const SCEV *SIntMax = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
- End = SIntMax;
- }
-
InductiveRangeCheck IRC;
IRC.End = End;
IRC.Begin = IndexAddRec->getStart();
@@ -405,16 +526,29 @@ void InductiveRangeCheck::extractRangeChecksFromCond(
void InductiveRangeCheck::extractRangeChecksFromBranch(
BranchInst *BI, Loop *L, ScalarEvolution &SE, BranchProbabilityInfo *BPI,
- SmallVectorImpl<InductiveRangeCheck> &Checks) {
+ SmallVectorImpl<InductiveRangeCheck> &Checks, bool &Changed) {
if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch())
return;
+ unsigned IndexLoopSucc = L->contains(BI->getSuccessor(0)) ? 0 : 1;
+ assert(L->contains(BI->getSuccessor(IndexLoopSucc)) &&
+ "No edges coming to loop?");
BranchProbability LikelyTaken(15, 16);
if (!SkipProfitabilityChecks && BPI &&
- BPI->getEdgeProbability(BI->getParent(), (unsigned)0) < LikelyTaken)
+ BPI->getEdgeProbability(BI->getParent(), IndexLoopSucc) < LikelyTaken)
return;
+ // IRCE expects branch's true edge comes to loop. Invert branch for opposite
+ // case.
+ if (IndexLoopSucc != 0) {
+ IRBuilder<> Builder(BI);
+ InvertBranch(BI, Builder);
+ if (BPI)
+ BPI->swapSuccEdgesProbabilities(BI->getParent());
+ Changed = true;
+ }
+
SmallPtrSet<Value *, 8> Visited;
InductiveRangeCheck::extractRangeChecksFromCond(L, SE, BI->getOperandUse(0),
Checks, Visited);
@@ -622,7 +756,7 @@ class LoopConstrainer {
// Information about the original loop we started out with.
Loop &OriginalLoop;
- const SCEV *LatchTakenCount = nullptr;
+ const IntegerType *ExitCountTy = nullptr;
BasicBlock *OriginalPreheader = nullptr;
// The preheader of the main loop. This may or may not be different from
@@ -671,8 +805,7 @@ static bool isSafeDecreasingBound(const SCEV *Start,
LLVM_DEBUG(dbgs() << "irce: Start: " << *Start << "\n");
LLVM_DEBUG(dbgs() << "irce: Step: " << *Step << "\n");
LLVM_DEBUG(dbgs() << "irce: BoundSCEV: " << *BoundSCEV << "\n");
- LLVM_DEBUG(dbgs() << "irce: Pred: " << ICmpInst::getPredicateName(Pred)
- << "\n");
+ LLVM_DEBUG(dbgs() << "irce: Pred: " << Pred << "\n");
LLVM_DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx << "\n");
bool IsSigned = ICmpInst::isSigned(Pred);
@@ -719,8 +852,7 @@ static bool isSafeIncreasingBound(const SCEV *Start,
LLVM_DEBUG(dbgs() << "irce: Start: " << *Start << "\n");
LLVM_DEBUG(dbgs() << "irce: Step: " << *Step << "\n");
LLVM_DEBUG(dbgs() << "irce: BoundSCEV: " << *BoundSCEV << "\n");
- LLVM_DEBUG(dbgs() << "irce: Pred: " << ICmpInst::getPredicateName(Pred)
- << "\n");
+ LLVM_DEBUG(dbgs() << "irce: Pred: " << Pred << "\n");
LLVM_DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx << "\n");
bool IsSigned = ICmpInst::isSigned(Pred);
@@ -746,6 +878,19 @@ static bool isSafeIncreasingBound(const SCEV *Start,
SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit));
}
+/// Returns estimate for max latch taken count of the loop of the narrowest
+/// available type. If the latch block has such estimate, it is returned.
+/// Otherwise, we use max exit count of whole loop (that is potentially of wider
+/// type than latch check itself), which is still better than no estimate.
+static const SCEV *getNarrowestLatchMaxTakenCountEstimate(ScalarEvolution &SE,
+ const Loop &L) {
+ const SCEV *FromBlock =
+ SE.getExitCount(&L, L.getLoopLatch(), ScalarEvolution::SymbolicMaximum);
+ if (isa<SCEVCouldNotCompute>(FromBlock))
+ return SE.getSymbolicMaxBackedgeTakenCount(&L);
+ return FromBlock;
+}
+
std::optional<LoopStructure>
LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
const char *&FailureReason) {
@@ -788,11 +933,14 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
return std::nullopt;
}
- const SCEV *LatchCount = SE.getExitCount(&L, Latch);
- if (isa<SCEVCouldNotCompute>(LatchCount)) {
+ const SCEV *MaxBETakenCount = getNarrowestLatchMaxTakenCountEstimate(SE, L);
+ if (isa<SCEVCouldNotCompute>(MaxBETakenCount)) {
FailureReason = "could not compute latch count";
return std::nullopt;
}
+ assert(SE.getLoopDisposition(MaxBETakenCount, &L) ==
+ ScalarEvolution::LoopInvariant &&
+ "loop variant exit count doesn't make sense!");
ICmpInst::Predicate Pred = ICI->getPredicate();
Value *LeftValue = ICI->getOperand(0);
@@ -1017,10 +1165,6 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, Loop &L,
}
BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx);
- assert(SE.getLoopDisposition(LatchCount, &L) ==
- ScalarEvolution::LoopInvariant &&
- "loop variant exit count doesn't make sense!");
-
assert(!L.contains(LatchExit) && "expected an exit block!");
const DataLayout &DL = Preheader->getModule()->getDataLayout();
SCEVExpander Expander(SE, DL, "irce");
@@ -1062,14 +1206,11 @@ static const SCEV *NoopOrExtend(const SCEV *S, Type *Ty, ScalarEvolution &SE,
std::optional<LoopConstrainer::SubRanges>
LoopConstrainer::calculateSubRanges(bool IsSignedPredicate) const {
- IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType());
-
auto *RTy = cast<IntegerType>(Range.getType());
-
// We only support wide range checks and narrow latches.
- if (!AllowNarrowLatchCondition && RTy != Ty)
+ if (!AllowNarrowLatchCondition && RTy != ExitCountTy)
return std::nullopt;
- if (RTy->getBitWidth() < Ty->getBitWidth())
+ if (RTy->getBitWidth() < ExitCountTy->getBitWidth())
return std::nullopt;
LoopConstrainer::SubRanges Result;
@@ -1403,10 +1544,12 @@ Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent,
bool LoopConstrainer::run() {
BasicBlock *Preheader = nullptr;
- LatchTakenCount = SE.getExitCount(&OriginalLoop, MainLoopStructure.Latch);
+ const SCEV *MaxBETakenCount =
+ getNarrowestLatchMaxTakenCountEstimate(SE, OriginalLoop);
Preheader = OriginalLoop.getLoopPreheader();
- assert(!isa<SCEVCouldNotCompute>(LatchTakenCount) && Preheader != nullptr &&
+ assert(!isa<SCEVCouldNotCompute>(MaxBETakenCount) && Preheader != nullptr &&
"preconditions!");
+ ExitCountTy = cast<IntegerType>(MaxBETakenCount->getType());
OriginalPreheader = Preheader;
MainLoopPreheader = Preheader;
@@ -1574,6 +1717,27 @@ bool LoopConstrainer::run() {
CanonicalizeLoop(PostL, false);
CanonicalizeLoop(&OriginalLoop, true);
+ /// At this point:
+ /// - We've broken a "main loop" out of the loop in a way that the "main loop"
+ /// runs with the induction variable in a subset of [Begin, End).
+ /// - There is no overflow when computing "main loop" exit limit.
+ /// - Max latch taken count of the loop is limited.
+ /// It guarantees that induction variable will not overflow iterating in the
+ /// "main loop".
+ if (auto BO = dyn_cast<BinaryOperator>(MainLoopStructure.IndVarBase))
+ if (IsSignedPredicate)
+ BO->setHasNoSignedWrap(true);
+ /// TODO: support unsigned predicate.
+ /// To add NUW flag we need to prove that both operands of BO are
+ /// non-negative. E.g:
+ /// ...
+ /// %iv.next = add nsw i32 %iv, -1
+ /// %cmp = icmp ult i32 %iv.next, %n
+ /// br i1 %cmp, label %loopexit, label %loop
+ ///
+ /// -1 is MAX_UINT in terms of unsigned int. Adding anything but zero will
+ /// overflow, therefore NUW flag is not legal here.
+
return true;
}
@@ -1588,11 +1752,13 @@ InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
// if latch check is more narrow.
auto *IVType = dyn_cast<IntegerType>(IndVar->getType());
auto *RCType = dyn_cast<IntegerType>(getBegin()->getType());
+ auto *EndType = dyn_cast<IntegerType>(getEnd()->getType());
// Do not work with pointer types.
if (!IVType || !RCType)
return std::nullopt;
if (IVType->getBitWidth() > RCType->getBitWidth())
return std::nullopt;
+
// IndVar is of the form "A + B * I" (where "I" is the canonical induction
// variable, that may or may not exist as a real llvm::Value in the loop) and
// this inductive range check is a range check on the "C + D * I" ("C" is
@@ -1631,6 +1797,7 @@ InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
assert(!D->getValue()->isZero() && "Recurrence with zero step?");
unsigned BitWidth = RCType->getBitWidth();
const SCEV *SIntMax = SE.getConstant(APInt::getSignedMaxValue(BitWidth));
+ const SCEV *SIntMin = SE.getConstant(APInt::getSignedMinValue(BitWidth));
// Subtract Y from X so that it does not go through border of the IV
// iteration space. Mathematically, it is equivalent to:
@@ -1682,6 +1849,7 @@ InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
// This function returns SCEV equal to 1 if X is non-negative 0 otherwise.
auto SCEVCheckNonNegative = [&](const SCEV *X) {
const Loop *L = IndVar->getLoop();
+ const SCEV *Zero = SE.getZero(X->getType());
const SCEV *One = SE.getOne(X->getType());
// Can we trivially prove that X is a non-negative or negative value?
if (isKnownNonNegativeInLoop(X, L, SE))
@@ -1693,6 +1861,25 @@ InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
const SCEV *NegOne = SE.getNegativeSCEV(One);
return SE.getAddExpr(SE.getSMaxExpr(SE.getSMinExpr(X, Zero), NegOne), One);
};
+
+ // This function returns SCEV equal to 1 if X will not overflow in terms of
+ // range check type, 0 otherwise.
+ auto SCEVCheckWillNotOverflow = [&](const SCEV *X) {
+ // X doesn't overflow if SINT_MAX >= X.
+ // Then if (SINT_MAX - X) >= 0, X doesn't overflow
+ const SCEV *SIntMaxExt = SE.getSignExtendExpr(SIntMax, X->getType());
+ const SCEV *OverflowCheck =
+ SCEVCheckNonNegative(SE.getMinusSCEV(SIntMaxExt, X));
+
+ // X doesn't underflow if X >= SINT_MIN.
+ // Then if (X - SINT_MIN) >= 0, X doesn't underflow
+ const SCEV *SIntMinExt = SE.getSignExtendExpr(SIntMin, X->getType());
+ const SCEV *UnderflowCheck =
+ SCEVCheckNonNegative(SE.getMinusSCEV(X, SIntMinExt));
+
+ return SE.getMulExpr(OverflowCheck, UnderflowCheck);
+ };
+
// FIXME: Current implementation of ClampedSubtract implicitly assumes that
// X is non-negative (in sense of a signed value). We need to re-implement
// this function in a way that it will correctly handle negative X as well.
@@ -1702,10 +1889,35 @@ InductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE,
// Note that this may pessimize elimination of unsigned range checks against
// negative values.
const SCEV *REnd = getEnd();
- const SCEV *EndIsNonNegative = SCEVCheckNonNegative(REnd);
+ const SCEV *EndWillNotOverflow = SE.getOne(RCType);
+
+ auto PrintRangeCheck = [&](raw_ostream &OS) {
+ auto L = IndVar->getLoop();
+ OS << "irce: in function ";
+ OS << L->getHeader()->getParent()->getName();
+ OS << ", in ";
+ L->print(OS);
+ OS << "there is range check with scaled boundary:\n";
+ print(OS);
+ };
+
+ if (EndType->getBitWidth() > RCType->getBitWidth()) {
+ assert(EndType->getBitWidth() == RCType->getBitWidth() * 2);
+ if (PrintScaledBoundaryRangeChecks)
+ PrintRangeCheck(errs());
+ // End is computed with extended type but will be truncated to a narrow one
+ // type of range check. Therefore we need a check that the result will not
+ // overflow in terms of narrow type.
+ EndWillNotOverflow =
+ SE.getTruncateExpr(SCEVCheckWillNotOverflow(REnd), RCType);
+ REnd = SE.getTruncateExpr(REnd, RCType);
+ }
+
+ const SCEV *RuntimeChecks =
+ SE.getMulExpr(SCEVCheckNonNegative(REnd), EndWillNotOverflow);
+ const SCEV *Begin = SE.getMulExpr(ClampedSubtract(Zero, M), RuntimeChecks);
+ const SCEV *End = SE.getMulExpr(ClampedSubtract(REnd, M), RuntimeChecks);
- const SCEV *Begin = SE.getMulExpr(ClampedSubtract(Zero, M), EndIsNonNegative);
- const SCEV *End = SE.getMulExpr(ClampedSubtract(REnd, M), EndIsNonNegative);
return InductiveRangeCheck::Range(Begin, End);
}
@@ -1825,39 +2037,6 @@ PreservedAnalyses IRCEPass::run(Function &F, FunctionAnalysisManager &AM) {
return getLoopPassPreservedAnalyses();
}
-bool IRCELegacyPass::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- BranchProbabilityInfo &BPI =
- getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- InductiveRangeCheckElimination IRCE(SE, &BPI, DT, LI);
-
- bool Changed = false;
-
- for (const auto &L : LI) {
- Changed |= simplifyLoop(L, &DT, &LI, &SE, nullptr, nullptr,
- /*PreserveLCSSA=*/false);
- Changed |= formLCSSARecursively(*L, DT, &LI, &SE);
- }
-
- SmallPriorityWorklist<Loop *, 4> Worklist;
- appendLoopsToWorklist(LI, Worklist);
- auto LPMAddNewLoop = [&](Loop *NL, bool IsSubloop) {
- if (!IsSubloop)
- appendLoopsToWorklist(*NL, Worklist);
- };
-
- while (!Worklist.empty()) {
- Loop *L = Worklist.pop_back_val();
- Changed |= IRCE.run(L, LPMAddNewLoop);
- }
- return Changed;
-}
-
bool
InductiveRangeCheckElimination::isProfitableToTransform(const Loop &L,
LoopStructure &LS) {
@@ -1904,14 +2083,15 @@ bool InductiveRangeCheckElimination::run(
LLVMContext &Context = Preheader->getContext();
SmallVector<InductiveRangeCheck, 16> RangeChecks;
+ bool Changed = false;
for (auto *BBI : L->getBlocks())
if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator()))
InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI,
- RangeChecks);
+ RangeChecks, Changed);
if (RangeChecks.empty())
- return false;
+ return Changed;
auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) {
OS << "irce: looking at loop "; L->print(OS);
@@ -1932,16 +2112,15 @@ bool InductiveRangeCheckElimination::run(
if (!MaybeLoopStructure) {
LLVM_DEBUG(dbgs() << "irce: could not parse loop structure: "
<< FailureReason << "\n";);
- return false;
+ return Changed;
}
LoopStructure LS = *MaybeLoopStructure;
if (!isProfitableToTransform(*L, LS))
- return false;
+ return Changed;
const SCEVAddRecExpr *IndVar =
cast<SCEVAddRecExpr>(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep)));
std::optional<InductiveRangeCheck::Range> SafeIterRange;
- Instruction *ExprInsertPt = Preheader->getTerminator();
SmallVector<InductiveRangeCheck, 4> RangeChecksToEliminate;
// Basing on the type of latch predicate, we interpret the IV iteration range
@@ -1951,7 +2130,6 @@ bool InductiveRangeCheckElimination::run(
auto IntersectRange =
LS.IsSignedPredicate ? IntersectSignedRange : IntersectUnsignedRange;
- IRBuilder<> B(ExprInsertPt);
for (InductiveRangeCheck &IRC : RangeChecks) {
auto Result = IRC.computeSafeIterationSpace(SE, IndVar,
LS.IsSignedPredicate);
@@ -1967,12 +2145,13 @@ bool InductiveRangeCheckElimination::run(
}
if (!SafeIterRange)
- return false;
+ return Changed;
LoopConstrainer LC(*L, LI, LPMAddNewLoop, LS, SE, DT, *SafeIterRange);
- bool Changed = LC.run();
- if (Changed) {
+ if (LC.run()) {
+ Changed = true;
+
auto PrintConstrainedLoopInfo = [L]() {
dbgs() << "irce: in function ";
dbgs() << L->getHeader()->getParent()->getName() << ": ";
@@ -1997,7 +2176,3 @@ bool InductiveRangeCheckElimination::run(
return Changed;
}
-
-Pass *llvm::createInductiveRangeCheckEliminationPass() {
- return new IRCELegacyPass();
-}
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 114738a35fd1..c2b5a12fd63f 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -76,14 +76,14 @@
// Second, IR rewriting in Step 2 also needs to be circular. For example,
// converting %y to addrspace(3) requires the compiler to know the converted
// %y2, but converting %y2 needs the converted %y. To address this complication,
-// we break these cycles using "undef" placeholders. When converting an
+// we break these cycles using "poison" placeholders. When converting an
// instruction `I` to a new address space, if its operand `Op` is not converted
-// yet, we let `I` temporarily use `undef` and fix all the uses of undef later.
+// yet, we let `I` temporarily use `poison` and fix all the uses later.
// For instance, our algorithm first converts %y to
-// %y' = phi float addrspace(3)* [ %input, undef ]
+// %y' = phi float addrspace(3)* [ %input, poison ]
// Then, it converts %y2 to
// %y2' = getelementptr %y', 1
-// Finally, it fixes the undef in %y' so that
+// Finally, it fixes the poison in %y' so that
// %y' = phi float addrspace(3)* [ %input, %y2' ]
//
//===----------------------------------------------------------------------===//
@@ -206,7 +206,7 @@ class InferAddressSpacesImpl {
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
const PredicatedAddrSpaceMapTy &PredicatedAS,
- SmallVectorImpl<const Use *> *UndefUsesToFix) const;
+ SmallVectorImpl<const Use *> *PoisonUsesToFix) const;
// Changes the flat address expressions in function F to point to specific
// address spaces if InferredAddrSpace says so. Postorder is the postorder of
@@ -233,7 +233,7 @@ class InferAddressSpacesImpl {
Value *V, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
const PredicatedAddrSpaceMapTy &PredicatedAS,
- SmallVectorImpl<const Use *> *UndefUsesToFix) const;
+ SmallVectorImpl<const Use *> *PoisonUsesToFix) const;
unsigned joinAddressSpaces(unsigned AS1, unsigned AS2) const;
unsigned getPredicatedAddrSpace(const Value &V, Value *Opnd) const;
@@ -256,6 +256,12 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(InferAddressSpaces, DEBUG_TYPE, "Infer address spaces",
false, false)
+static Type *getPtrOrVecOfPtrsWithNewAS(Type *Ty, unsigned NewAddrSpace) {
+ assert(Ty->isPtrOrPtrVectorTy());
+ PointerType *NPT = PointerType::get(Ty->getContext(), NewAddrSpace);
+ return Ty->getWithNewType(NPT);
+}
+
// Check whether that's no-op pointer bicast using a pair of
// `ptrtoint`/`inttoptr` due to the missing no-op pointer bitcast over
// different address spaces.
@@ -301,14 +307,14 @@ static bool isAddressExpression(const Value &V, const DataLayout &DL,
switch (Op->getOpcode()) {
case Instruction::PHI:
- assert(Op->getType()->isPointerTy());
+ assert(Op->getType()->isPtrOrPtrVectorTy());
return true;
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
case Instruction::GetElementPtr:
return true;
case Instruction::Select:
- return Op->getType()->isPointerTy();
+ return Op->getType()->isPtrOrPtrVectorTy();
case Instruction::Call: {
const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&V);
return II && II->getIntrinsicID() == Intrinsic::ptrmask;
@@ -373,6 +379,24 @@ bool InferAddressSpacesImpl::rewriteIntrinsicOperands(IntrinsicInst *II,
case Intrinsic::ptrmask:
// This is handled as an address expression, not as a use memory operation.
return false;
+ case Intrinsic::masked_gather: {
+ Type *RetTy = II->getType();
+ Type *NewPtrTy = NewV->getType();
+ Function *NewDecl =
+ Intrinsic::getDeclaration(M, II->getIntrinsicID(), {RetTy, NewPtrTy});
+ II->setArgOperand(0, NewV);
+ II->setCalledFunction(NewDecl);
+ return true;
+ }
+ case Intrinsic::masked_scatter: {
+ Type *ValueTy = II->getOperand(0)->getType();
+ Type *NewPtrTy = NewV->getType();
+ Function *NewDecl =
+ Intrinsic::getDeclaration(M, II->getIntrinsicID(), {ValueTy, NewPtrTy});
+ II->setArgOperand(1, NewV);
+ II->setCalledFunction(NewDecl);
+ return true;
+ }
default: {
Value *Rewrite = TTI->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
if (!Rewrite)
@@ -394,6 +418,14 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
PostorderStack, Visited);
break;
+ case Intrinsic::masked_gather:
+ appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(0),
+ PostorderStack, Visited);
+ break;
+ case Intrinsic::masked_scatter:
+ appendsFlatAddressExpressionToPostorderStack(II->getArgOperand(1),
+ PostorderStack, Visited);
+ break;
default:
SmallVector<int, 2> OpIndexes;
if (TTI->collectFlatAddressOperands(OpIndexes, IID)) {
@@ -412,7 +444,7 @@ void InferAddressSpacesImpl::collectRewritableIntrinsicOperands(
void InferAddressSpacesImpl::appendsFlatAddressExpressionToPostorderStack(
Value *V, PostorderStackTy &PostorderStack,
DenseSet<Value *> &Visited) const {
- assert(V->getType()->isPointerTy());
+ assert(V->getType()->isPtrOrPtrVectorTy());
// Generic addressing expressions may be hidden in nested constant
// expressions.
@@ -460,8 +492,7 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
// addressing calculations may also be faster.
for (Instruction &I : instructions(F)) {
if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
- if (!GEP->getType()->isVectorTy())
- PushPtrOperand(GEP->getPointerOperand());
+ PushPtrOperand(GEP->getPointerOperand());
} else if (auto *LI = dyn_cast<LoadInst>(&I))
PushPtrOperand(LI->getPointerOperand());
else if (auto *SI = dyn_cast<StoreInst>(&I))
@@ -480,14 +511,12 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
} else if (auto *II = dyn_cast<IntrinsicInst>(&I))
collectRewritableIntrinsicOperands(II, PostorderStack, Visited);
else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(&I)) {
- // FIXME: Handle vectors of pointers
- if (Cmp->getOperand(0)->getType()->isPointerTy()) {
+ if (Cmp->getOperand(0)->getType()->isPtrOrPtrVectorTy()) {
PushPtrOperand(Cmp->getOperand(0));
PushPtrOperand(Cmp->getOperand(1));
}
} else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) {
- if (!ASC->getType()->isVectorTy())
- PushPtrOperand(ASC->getPointerOperand());
+ PushPtrOperand(ASC->getPointerOperand());
} else if (auto *I2P = dyn_cast<IntToPtrInst>(&I)) {
if (isNoopPtrIntCastPair(cast<Operator>(I2P), *DL, TTI))
PushPtrOperand(
@@ -521,16 +550,15 @@ InferAddressSpacesImpl::collectFlatAddressExpressions(Function &F) const {
// A helper function for cloneInstructionWithNewAddressSpace. Returns the clone
// of OperandUse.get() in the new address space. If the clone is not ready yet,
-// returns an undef in the new address space as a placeholder.
-static Value *operandWithNewAddressSpaceOrCreateUndef(
+// returns poison in the new address space as a placeholder.
+static Value *operandWithNewAddressSpaceOrCreatePoison(
const Use &OperandUse, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
const PredicatedAddrSpaceMapTy &PredicatedAS,
- SmallVectorImpl<const Use *> *UndefUsesToFix) {
+ SmallVectorImpl<const Use *> *PoisonUsesToFix) {
Value *Operand = OperandUse.get();
- Type *NewPtrTy = PointerType::getWithSamePointeeType(
- cast<PointerType>(Operand->getType()), NewAddrSpace);
+ Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Operand->getType(), NewAddrSpace);
if (Constant *C = dyn_cast<Constant>(Operand))
return ConstantExpr::getAddrSpaceCast(C, NewPtrTy);
@@ -543,23 +571,22 @@ static Value *operandWithNewAddressSpaceOrCreateUndef(
if (I != PredicatedAS.end()) {
// Insert an addrspacecast on that operand before the user.
unsigned NewAS = I->second;
- Type *NewPtrTy = PointerType::getWithSamePointeeType(
- cast<PointerType>(Operand->getType()), NewAS);
+ Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(Operand->getType(), NewAS);
auto *NewI = new AddrSpaceCastInst(Operand, NewPtrTy);
NewI->insertBefore(Inst);
NewI->setDebugLoc(Inst->getDebugLoc());
return NewI;
}
- UndefUsesToFix->push_back(&OperandUse);
- return UndefValue::get(NewPtrTy);
+ PoisonUsesToFix->push_back(&OperandUse);
+ return PoisonValue::get(NewPtrTy);
}
// Returns a clone of `I` with its operands converted to those specified in
// ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an
// operand whose address space needs to be modified might not exist in
-// ValueWithNewAddrSpace. In that case, uses undef as a placeholder operand and
-// adds that operand use to UndefUsesToFix so that caller can fix them later.
+// ValueWithNewAddrSpace. In that case, uses poison as a placeholder operand and
+// adds that operand use to PoisonUsesToFix so that caller can fix them later.
//
// Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast
// from a pointer whose type already matches. Therefore, this function returns a
@@ -571,9 +598,8 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
const PredicatedAddrSpaceMapTy &PredicatedAS,
- SmallVectorImpl<const Use *> *UndefUsesToFix) const {
- Type *NewPtrType = PointerType::getWithSamePointeeType(
- cast<PointerType>(I->getType()), NewAddrSpace);
+ SmallVectorImpl<const Use *> *PoisonUsesToFix) const {
+ Type *NewPtrType = getPtrOrVecOfPtrsWithNewAS(I->getType(), NewAddrSpace);
if (I->getOpcode() == Instruction::AddrSpaceCast) {
Value *Src = I->getOperand(0);
@@ -590,9 +616,9 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
// Technically the intrinsic ID is a pointer typed argument, so specially
// handle calls early.
assert(II->getIntrinsicID() == Intrinsic::ptrmask);
- Value *NewPtr = operandWithNewAddressSpaceOrCreateUndef(
+ Value *NewPtr = operandWithNewAddressSpaceOrCreatePoison(
II->getArgOperandUse(0), NewAddrSpace, ValueWithNewAddrSpace,
- PredicatedAS, UndefUsesToFix);
+ PredicatedAS, PoisonUsesToFix);
Value *Rewrite =
TTI->rewriteIntrinsicWithAddressSpace(II, II->getArgOperand(0), NewPtr);
if (Rewrite) {
@@ -607,8 +633,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
if (AS != UninitializedAddressSpace) {
// For the assumed address space, insert an `addrspacecast` to make that
// explicit.
- Type *NewPtrTy = PointerType::getWithSamePointeeType(
- cast<PointerType>(I->getType()), AS);
+ Type *NewPtrTy = getPtrOrVecOfPtrsWithNewAS(I->getType(), AS);
auto *NewI = new AddrSpaceCastInst(I, NewPtrTy);
NewI->insertAfter(I);
return NewI;
@@ -617,19 +642,19 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
// Computes the converted pointer operands.
SmallVector<Value *, 4> NewPointerOperands;
for (const Use &OperandUse : I->operands()) {
- if (!OperandUse.get()->getType()->isPointerTy())
+ if (!OperandUse.get()->getType()->isPtrOrPtrVectorTy())
NewPointerOperands.push_back(nullptr);
else
- NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreateUndef(
+ NewPointerOperands.push_back(operandWithNewAddressSpaceOrCreatePoison(
OperandUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
- UndefUsesToFix));
+ PoisonUsesToFix));
}
switch (I->getOpcode()) {
case Instruction::BitCast:
return new BitCastInst(NewPointerOperands[0], NewPtrType);
case Instruction::PHI: {
- assert(I->getType()->isPointerTy());
+ assert(I->getType()->isPtrOrPtrVectorTy());
PHINode *PHI = cast<PHINode>(I);
PHINode *NewPHI = PHINode::Create(NewPtrType, PHI->getNumIncomingValues());
for (unsigned Index = 0; Index < PHI->getNumIncomingValues(); ++Index) {
@@ -648,7 +673,7 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
return NewGEP;
}
case Instruction::Select:
- assert(I->getType()->isPointerTy());
+ assert(I->getType()->isPtrOrPtrVectorTy());
return SelectInst::Create(I->getOperand(0), NewPointerOperands[1],
NewPointerOperands[2], "", nullptr, I);
case Instruction::IntToPtr: {
@@ -674,10 +699,10 @@ static Value *cloneConstantExprWithNewAddressSpace(
ConstantExpr *CE, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace, const DataLayout *DL,
const TargetTransformInfo *TTI) {
- Type *TargetType = CE->getType()->isPointerTy()
- ? PointerType::getWithSamePointeeType(
- cast<PointerType>(CE->getType()), NewAddrSpace)
- : CE->getType();
+ Type *TargetType =
+ CE->getType()->isPtrOrPtrVectorTy()
+ ? getPtrOrVecOfPtrsWithNewAS(CE->getType(), NewAddrSpace)
+ : CE->getType();
if (CE->getOpcode() == Instruction::AddrSpaceCast) {
// Because CE is flat, the source address space must be specific.
@@ -694,18 +719,6 @@ static Value *cloneConstantExprWithNewAddressSpace(
return ConstantExpr::getAddrSpaceCast(CE, TargetType);
}
- if (CE->getOpcode() == Instruction::Select) {
- Constant *Src0 = CE->getOperand(1);
- Constant *Src1 = CE->getOperand(2);
- if (Src0->getType()->getPointerAddressSpace() ==
- Src1->getType()->getPointerAddressSpace()) {
-
- return ConstantExpr::getSelect(
- CE->getOperand(0), ConstantExpr::getAddrSpaceCast(Src0, TargetType),
- ConstantExpr::getAddrSpaceCast(Src1, TargetType));
- }
- }
-
if (CE->getOpcode() == Instruction::IntToPtr) {
assert(isNoopPtrIntCastPair(cast<Operator>(CE), *DL, TTI));
Constant *Src = cast<ConstantExpr>(CE->getOperand(0))->getOperand(0);
@@ -758,19 +771,19 @@ static Value *cloneConstantExprWithNewAddressSpace(
// ValueWithNewAddrSpace. This function is called on every flat address
// expression whose address space needs to be modified, in postorder.
//
-// See cloneInstructionWithNewAddressSpace for the meaning of UndefUsesToFix.
+// See cloneInstructionWithNewAddressSpace for the meaning of PoisonUsesToFix.
Value *InferAddressSpacesImpl::cloneValueWithNewAddressSpace(
Value *V, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
const PredicatedAddrSpaceMapTy &PredicatedAS,
- SmallVectorImpl<const Use *> *UndefUsesToFix) const {
+ SmallVectorImpl<const Use *> *PoisonUsesToFix) const {
// All values in Postorder are flat address expressions.
assert(V->getType()->getPointerAddressSpace() == FlatAddrSpace &&
isAddressExpression(*V, *DL, TTI));
if (Instruction *I = dyn_cast<Instruction>(V)) {
Value *NewV = cloneInstructionWithNewAddressSpace(
- I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, UndefUsesToFix);
+ I, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);
if (Instruction *NewI = dyn_cast_or_null<Instruction>(NewV)) {
if (NewI->getParent() == nullptr) {
NewI->insertBefore(I);
@@ -1114,7 +1127,7 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
// operands are converted, the clone is naturally in the new address space by
// construction.
ValueToValueMapTy ValueWithNewAddrSpace;
- SmallVector<const Use *, 32> UndefUsesToFix;
+ SmallVector<const Use *, 32> PoisonUsesToFix;
for (Value* V : Postorder) {
unsigned NewAddrSpace = InferredAddrSpace.lookup(V);
@@ -1126,7 +1139,7 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
if (V->getType()->getPointerAddressSpace() != NewAddrSpace) {
Value *New =
cloneValueWithNewAddressSpace(V, NewAddrSpace, ValueWithNewAddrSpace,
- PredicatedAS, &UndefUsesToFix);
+ PredicatedAS, &PoisonUsesToFix);
if (New)
ValueWithNewAddrSpace[V] = New;
}
@@ -1135,16 +1148,16 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
if (ValueWithNewAddrSpace.empty())
return false;
- // Fixes all the undef uses generated by cloneInstructionWithNewAddressSpace.
- for (const Use *UndefUse : UndefUsesToFix) {
- User *V = UndefUse->getUser();
+ // Fixes all the poison uses generated by cloneInstructionWithNewAddressSpace.
+ for (const Use *PoisonUse : PoisonUsesToFix) {
+ User *V = PoisonUse->getUser();
User *NewV = cast_or_null<User>(ValueWithNewAddrSpace.lookup(V));
if (!NewV)
continue;
- unsigned OperandNo = UndefUse->getOperandNo();
- assert(isa<UndefValue>(NewV->getOperand(OperandNo)));
- NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get()));
+ unsigned OperandNo = PoisonUse->getOperandNo();
+ assert(isa<PoisonValue>(NewV->getOperand(OperandNo)));
+ NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(PoisonUse->get()));
}
SmallVector<Instruction *, 16> DeadInstructions;
@@ -1238,20 +1251,6 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) {
unsigned NewAS = NewV->getType()->getPointerAddressSpace();
if (ASC->getDestAddressSpace() == NewAS) {
- if (!cast<PointerType>(ASC->getType())
- ->hasSameElementTypeAs(
- cast<PointerType>(NewV->getType()))) {
- BasicBlock::iterator InsertPos;
- if (Instruction *NewVInst = dyn_cast<Instruction>(NewV))
- InsertPos = std::next(NewVInst->getIterator());
- else if (Instruction *VInst = dyn_cast<Instruction>(V))
- InsertPos = std::next(VInst->getIterator());
- else
- InsertPos = ASC->getIterator();
-
- NewV = CastInst::Create(Instruction::BitCast, NewV,
- ASC->getType(), "", &*InsertPos);
- }
ASC->replaceAllUsesWith(NewV);
DeadInstructions.push_back(ASC);
continue;
diff --git a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
index 4644905adba3..ee9452ce1c7d 100644
--- a/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
+++ b/llvm/lib/Transforms/Scalar/InstSimplifyPass.cpp
@@ -11,7 +11,6 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -26,8 +25,7 @@ using namespace llvm;
STATISTIC(NumSimplified, "Number of redundant instructions removed");
-static bool runImpl(Function &F, const SimplifyQuery &SQ,
- OptimizationRemarkEmitter *ORE) {
+static bool runImpl(Function &F, const SimplifyQuery &SQ) {
SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
bool Changed = false;
@@ -51,7 +49,7 @@ static bool runImpl(Function &F, const SimplifyQuery &SQ,
DeadInstsInBB.push_back(&I);
Changed = true;
} else if (!I.use_empty()) {
- if (Value *V = simplifyInstruction(&I, SQ, ORE)) {
+ if (Value *V = simplifyInstruction(&I, SQ)) {
// Mark all uses for resimplification next time round the loop.
for (User *U : I.users())
Next->insert(cast<Instruction>(U));
@@ -88,7 +86,6 @@ struct InstSimplifyLegacyPass : public FunctionPass {
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
}
/// Remove instructions that simplify.
@@ -102,11 +99,9 @@ struct InstSimplifyLegacyPass : public FunctionPass {
&getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
AssumptionCache *AC =
&getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- OptimizationRemarkEmitter *ORE =
- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
const DataLayout &DL = F.getParent()->getDataLayout();
const SimplifyQuery SQ(DL, TLI, DT, AC);
- return runImpl(F, SQ, ORE);
+ return runImpl(F, SQ);
}
};
} // namespace
@@ -117,7 +112,6 @@ INITIALIZE_PASS_BEGIN(InstSimplifyLegacyPass, "instsimplify",
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
INITIALIZE_PASS_END(InstSimplifyLegacyPass, "instsimplify",
"Remove redundant instructions", false, false)
@@ -131,10 +125,9 @@ PreservedAnalyses InstSimplifyPass::run(Function &F,
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
- auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
const DataLayout &DL = F.getParent()->getDataLayout();
const SimplifyQuery SQ(DL, &TLI, &DT, &AC);
- bool Changed = runImpl(F, SQ, &ORE);
+ bool Changed = runImpl(F, SQ);
if (!Changed)
return PreservedAnalyses::all();
diff --git a/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index f41eaed2e3e7..24390f1b54f6 100644
--- a/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -23,7 +23,6 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -31,6 +30,7 @@
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -40,6 +40,7 @@
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
@@ -57,15 +58,12 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -114,68 +112,6 @@ static cl::opt<bool> ThreadAcrossLoopHeaders(
cl::desc("Allow JumpThreading to thread across loop headers, for testing"),
cl::init(false), cl::Hidden);
-
-namespace {
-
- /// This pass performs 'jump threading', which looks at blocks that have
- /// multiple predecessors and multiple successors. If one or more of the
- /// predecessors of the block can be proven to always jump to one of the
- /// successors, we forward the edge from the predecessor to the successor by
- /// duplicating the contents of this block.
- ///
- /// An example of when this can occur is code like this:
- ///
- /// if () { ...
- /// X = 4;
- /// }
- /// if (X < 3) {
- ///
- /// In this case, the unconditional branch at the end of the first if can be
- /// revectored to the false side of the second if.
- class JumpThreading : public FunctionPass {
- JumpThreadingPass Impl;
-
- public:
- static char ID; // Pass identification
-
- JumpThreading(int T = -1) : FunctionPass(ID), Impl(T) {
- initializeJumpThreadingPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<LazyValueInfoWrapperPass>();
- AU.addPreserved<LazyValueInfoWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- }
-
- void releaseMemory() override { Impl.releaseMemory(); }
- };
-
-} // end anonymous namespace
-
-char JumpThreading::ID = 0;
-
-INITIALIZE_PASS_BEGIN(JumpThreading, "jump-threading",
- "Jump Threading", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LazyValueInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_END(JumpThreading, "jump-threading",
- "Jump Threading", false, false)
-
-// Public interface to the Jump Threading pass
-FunctionPass *llvm::createJumpThreadingPass(int Threshold) {
- return new JumpThreading(Threshold);
-}
-
JumpThreadingPass::JumpThreadingPass(int T) {
DefaultBBDupThreshold = (T == -1) ? BBDuplicateThreshold : unsigned(T);
}
@@ -306,102 +242,81 @@ static void updatePredecessorProfileMetadata(PHINode *PN, BasicBlock *BB) {
}
}
-/// runOnFunction - Toplevel algorithm.
-bool JumpThreading::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
- auto TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- // Jump Threading has no sense for the targets with divergent CF
- if (TTI->hasBranchDivergence())
- return false;
- auto TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI();
- auto AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DomTreeUpdater DTU(*DT, DomTreeUpdater::UpdateStrategy::Lazy);
- std::unique_ptr<BlockFrequencyInfo> BFI;
- std::unique_ptr<BranchProbabilityInfo> BPI;
- if (F.hasProfileData()) {
- LoopInfo LI{*DT};
- BPI.reset(new BranchProbabilityInfo(F, LI, TLI));
- BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
- }
-
- bool Changed = Impl.runImpl(F, TLI, TTI, LVI, AA, &DTU, F.hasProfileData(),
- std::move(BFI), std::move(BPI));
- if (PrintLVIAfterJumpThreading) {
- dbgs() << "LVI for function '" << F.getName() << "':\n";
- LVI->printLVI(F, DTU.getDomTree(), dbgs());
- }
- return Changed;
-}
-
PreservedAnalyses JumpThreadingPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
// Jump Threading has no sense for the targets with divergent CF
- if (TTI.hasBranchDivergence())
+ if (TTI.hasBranchDivergence(&F))
return PreservedAnalyses::all();
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
- auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &LVI = AM.getResult<LazyValueAnalysis>(F);
auto &AA = AM.getResult<AAManager>(F);
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
-
- std::unique_ptr<BlockFrequencyInfo> BFI;
- std::unique_ptr<BranchProbabilityInfo> BPI;
- if (F.hasProfileData()) {
- LoopInfo LI{DT};
- BPI.reset(new BranchProbabilityInfo(F, LI, &TLI));
- BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
- }
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- bool Changed = runImpl(F, &TLI, &TTI, &LVI, &AA, &DTU, F.hasProfileData(),
- std::move(BFI), std::move(BPI));
+ bool Changed =
+ runImpl(F, &AM, &TLI, &TTI, &LVI, &AA,
+ std::make_unique<DomTreeUpdater>(
+ &DT, nullptr, DomTreeUpdater::UpdateStrategy::Lazy),
+ std::nullopt, std::nullopt);
if (PrintLVIAfterJumpThreading) {
dbgs() << "LVI for function '" << F.getName() << "':\n";
- LVI.printLVI(F, DTU.getDomTree(), dbgs());
+ LVI.printLVI(F, getDomTreeUpdater()->getDomTree(), dbgs());
}
if (!Changed)
return PreservedAnalyses::all();
- PreservedAnalyses PA;
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<LazyValueAnalysis>();
- return PA;
+
+
+ getDomTreeUpdater()->flush();
+
+#if defined(EXPENSIVE_CHECKS)
+ assert(getDomTreeUpdater()->getDomTree().verify(
+ DominatorTree::VerificationLevel::Full) &&
+ "DT broken after JumpThreading");
+ assert((!getDomTreeUpdater()->hasPostDomTree() ||
+ getDomTreeUpdater()->getPostDomTree().verify(
+ PostDominatorTree::VerificationLevel::Full)) &&
+ "PDT broken after JumpThreading");
+#else
+ assert(getDomTreeUpdater()->getDomTree().verify(
+ DominatorTree::VerificationLevel::Fast) &&
+ "DT broken after JumpThreading");
+ assert((!getDomTreeUpdater()->hasPostDomTree() ||
+ getDomTreeUpdater()->getPostDomTree().verify(
+ PostDominatorTree::VerificationLevel::Fast)) &&
+ "PDT broken after JumpThreading");
+#endif
+
+ return getPreservedAnalysis();
}
-bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
+bool JumpThreadingPass::runImpl(Function &F_, FunctionAnalysisManager *FAM_,
+ TargetLibraryInfo *TLI_,
TargetTransformInfo *TTI_, LazyValueInfo *LVI_,
- AliasAnalysis *AA_, DomTreeUpdater *DTU_,
- bool HasProfileData_,
- std::unique_ptr<BlockFrequencyInfo> BFI_,
- std::unique_ptr<BranchProbabilityInfo> BPI_) {
- LLVM_DEBUG(dbgs() << "Jump threading on function '" << F.getName() << "'\n");
+ AliasAnalysis *AA_,
+ std::unique_ptr<DomTreeUpdater> DTU_,
+ std::optional<BlockFrequencyInfo *> BFI_,
+ std::optional<BranchProbabilityInfo *> BPI_) {
+ LLVM_DEBUG(dbgs() << "Jump threading on function '" << F_.getName() << "'\n");
+ F = &F_;
+ FAM = FAM_;
TLI = TLI_;
TTI = TTI_;
LVI = LVI_;
AA = AA_;
- DTU = DTU_;
- BFI.reset();
- BPI.reset();
- // When profile data is available, we need to update edge weights after
- // successful jump threading, which requires both BPI and BFI being available.
- HasProfileData = HasProfileData_;
- auto *GuardDecl = F.getParent()->getFunction(
+ DTU = std::move(DTU_);
+ BFI = BFI_;
+ BPI = BPI_;
+ auto *GuardDecl = F->getParent()->getFunction(
Intrinsic::getName(Intrinsic::experimental_guard));
HasGuards = GuardDecl && !GuardDecl->use_empty();
- if (HasProfileData) {
- BPI = std::move(BPI_);
- BFI = std::move(BFI_);
- }
// Reduce the number of instructions duplicated when optimizing strictly for
// size.
if (BBDuplicateThreshold.getNumOccurrences())
BBDupThreshold = BBDuplicateThreshold;
- else if (F.hasFnAttribute(Attribute::MinSize))
+ else if (F->hasFnAttribute(Attribute::MinSize))
BBDupThreshold = 3;
else
BBDupThreshold = DefaultBBDupThreshold;
@@ -412,22 +327,22 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
assert(DTU && "DTU isn't passed into JumpThreading before using it.");
assert(DTU->hasDomTree() && "JumpThreading relies on DomTree to proceed.");
DominatorTree &DT = DTU->getDomTree();
- for (auto &BB : F)
+ for (auto &BB : *F)
if (!DT.isReachableFromEntry(&BB))
Unreachable.insert(&BB);
if (!ThreadAcrossLoopHeaders)
- findLoopHeaders(F);
+ findLoopHeaders(*F);
bool EverChanged = false;
bool Changed;
do {
Changed = false;
- for (auto &BB : F) {
+ for (auto &BB : *F) {
if (Unreachable.count(&BB))
continue;
while (processBlock(&BB)) // Thread all of the branches we can over BB.
- Changed = true;
+ Changed = ChangedSinceLastAnalysisUpdate = true;
// Jump threading may have introduced redundant debug values into BB
// which should be removed.
@@ -437,7 +352,7 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// Stop processing BB if it's the entry or is now deleted. The following
// routines attempt to eliminate BB and locating a suitable replacement
// for the entry is non-trivial.
- if (&BB == &F.getEntryBlock() || DTU->isBBPendingDeletion(&BB))
+ if (&BB == &F->getEntryBlock() || DTU->isBBPendingDeletion(&BB))
continue;
if (pred_empty(&BB)) {
@@ -448,8 +363,8 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
<< '\n');
LoopHeaders.erase(&BB);
LVI->eraseBlock(&BB);
- DeleteDeadBlock(&BB, DTU);
- Changed = true;
+ DeleteDeadBlock(&BB, DTU.get());
+ Changed = ChangedSinceLastAnalysisUpdate = true;
continue;
}
@@ -464,12 +379,12 @@ bool JumpThreadingPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
// Don't alter Loop headers and latches to ensure another pass can
// detect and transform nested loops later.
!LoopHeaders.count(&BB) && !LoopHeaders.count(Succ) &&
- TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU)) {
+ TryToSimplifyUncondBranchFromEmptyBlock(&BB, DTU.get())) {
RemoveRedundantDbgInstrs(Succ);
// BB is valid for cleanup here because we passed in DTU. F remains
// BB's parent until a DTU->getDomTree() event.
LVI->eraseBlock(&BB);
- Changed = true;
+ Changed = ChangedSinceLastAnalysisUpdate = true;
}
}
}
@@ -1140,8 +1055,8 @@ bool JumpThreadingPass::processBlock(BasicBlock *BB) {
<< "' folding terminator: " << *BB->getTerminator()
<< '\n');
++NumFolds;
- ConstantFoldTerminator(BB, true, nullptr, DTU);
- if (HasProfileData)
+ ConstantFoldTerminator(BB, true, nullptr, DTU.get());
+ if (auto *BPI = getBPI())
BPI->eraseBlock(BB);
return true;
}
@@ -1296,7 +1211,7 @@ bool JumpThreadingPass::processImpliedCondition(BasicBlock *BB) {
FICond->eraseFromParent();
DTU->applyUpdatesPermissive({{DominatorTree::Delete, BB, RemoveSucc}});
- if (HasProfileData)
+ if (auto *BPI = getBPI())
BPI->eraseBlock(BB);
return true;
}
@@ -1740,7 +1655,7 @@ bool JumpThreadingPass::processThreadableEdges(Value *Cond, BasicBlock *BB,
++NumFolds;
Term->eraseFromParent();
DTU->applyUpdatesPermissive(Updates);
- if (HasProfileData)
+ if (auto *BPI = getBPI())
BPI->eraseBlock(BB);
// If the condition is now dead due to the removal of the old terminator,
@@ -1993,7 +1908,7 @@ bool JumpThreadingPass::maybeMergeBasicBlockIntoOnlyPred(BasicBlock *BB) {
LoopHeaders.insert(BB);
LVI->eraseBlock(SinglePred);
- MergeBasicBlockIntoOnlyPred(BB, DTU);
+ MergeBasicBlockIntoOnlyPred(BB, DTU.get());
// Now that BB is merged into SinglePred (i.e. SinglePred code followed by
// BB code within one basic block `BB`), we need to invalidate the LVI
@@ -2038,6 +1953,7 @@ void JumpThreadingPass::updateSSA(
// PHI insertion, of which we are prepared to do, clean these up now.
SSAUpdater SSAUpdate;
SmallVector<Use *, 16> UsesToRename;
+ SmallVector<DbgValueInst *, 4> DbgValues;
for (Instruction &I : *BB) {
// Scan all uses of this instruction to see if it is used outside of its
@@ -2053,8 +1969,16 @@ void JumpThreadingPass::updateSSA(
UsesToRename.push_back(&U);
}
+ // Find debug values outside of the block
+ findDbgValues(DbgValues, &I);
+ DbgValues.erase(remove_if(DbgValues,
+ [&](const DbgValueInst *DbgVal) {
+ return DbgVal->getParent() == BB;
+ }),
+ DbgValues.end());
+
// If there are no uses outside the block, we're done with this instruction.
- if (UsesToRename.empty())
+ if (UsesToRename.empty() && DbgValues.empty())
continue;
LLVM_DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
@@ -2067,6 +1991,11 @@ void JumpThreadingPass::updateSSA(
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
+ if (!DbgValues.empty()) {
+ SSAUpdate.UpdateDebugValues(&I, DbgValues);
+ DbgValues.clear();
+ }
+
LLVM_DEBUG(dbgs() << "\n");
}
}
@@ -2298,6 +2227,11 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
LLVM_DEBUG(dbgs() << " Threading through '" << PredBB->getName() << "' and '"
<< BB->getName() << "'\n");
+ // Build BPI/BFI before any changes are made to IR.
+ bool HasProfile = doesBlockHaveProfileData(BB);
+ auto *BFI = getOrCreateBFI(HasProfile);
+ auto *BPI = getOrCreateBPI(BFI != nullptr);
+
BranchInst *CondBr = cast<BranchInst>(BB->getTerminator());
BranchInst *PredBBBranch = cast<BranchInst>(PredBB->getTerminator());
@@ -2307,7 +2241,8 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
NewBB->moveAfter(PredBB);
// Set the block frequency of NewBB.
- if (HasProfileData) {
+ if (BFI) {
+ assert(BPI && "It's expected BPI to exist along with BFI");
auto NewBBFreq = BFI->getBlockFreq(PredPredBB) *
BPI->getEdgeProbability(PredPredBB, PredBB);
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
@@ -2320,7 +2255,7 @@ void JumpThreadingPass::threadThroughTwoBasicBlocks(BasicBlock *PredPredBB,
cloneInstructions(PredBB->begin(), PredBB->end(), NewBB, PredPredBB);
// Copy the edge probabilities from PredBB to NewBB.
- if (HasProfileData)
+ if (BPI)
BPI->copyEdgeProbabilities(PredBB, NewBB);
// Update the terminator of PredPredBB to jump to NewBB instead of PredBB.
@@ -2404,6 +2339,11 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
assert(!LoopHeaders.count(BB) && !LoopHeaders.count(SuccBB) &&
"Don't thread across loop headers");
+ // Build BPI/BFI before any changes are made to IR.
+ bool HasProfile = doesBlockHaveProfileData(BB);
+ auto *BFI = getOrCreateBFI(HasProfile);
+ auto *BPI = getOrCreateBPI(BFI != nullptr);
+
// And finally, do it! Start by factoring the predecessors if needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
@@ -2427,7 +2367,8 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
NewBB->moveAfter(PredBB);
// Set the block frequency of NewBB.
- if (HasProfileData) {
+ if (BFI) {
+ assert(BPI && "It's expected BPI to exist along with BFI");
auto NewBBFreq =
BFI->getBlockFreq(PredBB) * BPI->getEdgeProbability(PredBB, BB);
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
@@ -2469,7 +2410,7 @@ void JumpThreadingPass::threadEdge(BasicBlock *BB,
SimplifyInstructionsInBlock(NewBB, TLI);
// Update the edge weight from BB to SuccBB, which should be less than before.
- updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB);
+ updateBlockFreqAndEdgeWeight(PredBB, BB, NewBB, SuccBB, BFI, BPI, HasProfile);
// Threaded an edge!
++NumThreads;
@@ -2486,10 +2427,13 @@ BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
// Collect the frequencies of all predecessors of BB, which will be used to
// update the edge weight of the result of splitting predecessors.
DenseMap<BasicBlock *, BlockFrequency> FreqMap;
- if (HasProfileData)
+ auto *BFI = getBFI();
+ if (BFI) {
+ auto *BPI = getOrCreateBPI(true);
for (auto *Pred : Preds)
FreqMap.insert(std::make_pair(
Pred, BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, BB)));
+ }
// In the case when BB is a LandingPad block we create 2 new predecessors
// instead of just one.
@@ -2508,10 +2452,10 @@ BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
for (auto *Pred : predecessors(NewBB)) {
Updates.push_back({DominatorTree::Delete, Pred, BB});
Updates.push_back({DominatorTree::Insert, Pred, NewBB});
- if (HasProfileData) // Update frequencies between Pred -> NewBB.
+ if (BFI) // Update frequencies between Pred -> NewBB.
NewBBFreq += FreqMap.lookup(Pred);
}
- if (HasProfileData) // Apply the summed frequency to NewBB.
+ if (BFI) // Apply the summed frequency to NewBB.
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
}
@@ -2521,7 +2465,9 @@ BasicBlock *JumpThreadingPass::splitBlockPreds(BasicBlock *BB,
bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
const Instruction *TI = BB->getTerminator();
- assert(TI->getNumSuccessors() > 1 && "not a split");
+ if (!TI || TI->getNumSuccessors() < 2)
+ return false;
+
return hasValidBranchWeightMD(*TI);
}
@@ -2531,11 +2477,18 @@ bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
BasicBlock *BB,
BasicBlock *NewBB,
- BasicBlock *SuccBB) {
- if (!HasProfileData)
+ BasicBlock *SuccBB,
+ BlockFrequencyInfo *BFI,
+ BranchProbabilityInfo *BPI,
+ bool HasProfile) {
+ assert(((BFI && BPI) || (!BFI && !BFI)) &&
+ "Both BFI & BPI should either be set or unset");
+
+ if (!BFI) {
+ assert(!HasProfile &&
+ "It's expected to have BFI/BPI when profile info exists");
return;
-
- assert(BFI && BPI && "BFI & BPI should have been created here");
+ }
// As the edge from PredBB to BB is deleted, we have to update the block
// frequency of BB.
@@ -2608,7 +2561,7 @@ void JumpThreadingPass::updateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
// FIXME this locally as well so that BPI and BFI are consistent as well. We
// shouldn't make edges extremely likely or unlikely based solely on static
// estimation.
- if (BBSuccProbs.size() >= 2 && doesBlockHaveProfileData(BB)) {
+ if (BBSuccProbs.size() >= 2 && HasProfile) {
SmallVector<uint32_t, 4> Weights;
for (auto Prob : BBSuccProbs)
Weights.push_back(Prob.getNumerator());
@@ -2690,6 +2643,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
// mapping and using it to remap operands in the cloned instructions.
for (; BI != BB->end(); ++BI) {
Instruction *New = BI->clone();
+ New->insertInto(PredBB, OldPredBranch->getIterator());
// Remap operands to patch up intra-block references.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
@@ -2707,7 +2661,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
{BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {
ValueMapping[&*BI] = IV;
if (!New->mayHaveSideEffects()) {
- New->deleteValue();
+ New->eraseFromParent();
New = nullptr;
}
} else {
@@ -2716,7 +2670,6 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
if (New) {
// Otherwise, insert the new instruction into the block.
New->setName(BI->getName());
- New->insertInto(PredBB, OldPredBranch->getIterator());
// Update Dominance from simplified New instruction operands.
for (unsigned i = 0, e = New->getNumOperands(); i != e; ++i)
if (BasicBlock *SuccBB = dyn_cast<BasicBlock>(New->getOperand(i)))
@@ -2740,7 +2693,7 @@ bool JumpThreadingPass::duplicateCondBranchOnPHIIntoPred(
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
- if (HasProfileData)
+ if (auto *BPI = getBPI())
BPI->copyEdgeProbabilities(BB, PredBB);
DTU->applyUpdatesPermissive(Updates);
@@ -2777,21 +2730,30 @@ void JumpThreadingPass::unfoldSelectInstr(BasicBlock *Pred, BasicBlock *BB,
BI->copyMetadata(*SI, {LLVMContext::MD_prof});
SIUse->setIncomingValue(Idx, SI->getFalseValue());
SIUse->addIncoming(SI->getTrueValue(), NewBB);
- // Set the block frequency of NewBB.
- if (HasProfileData) {
- uint64_t TrueWeight, FalseWeight;
- if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
- (TrueWeight + FalseWeight) != 0) {
- SmallVector<BranchProbability, 2> BP;
- BP.emplace_back(BranchProbability::getBranchProbability(
- TrueWeight, TrueWeight + FalseWeight));
- BP.emplace_back(BranchProbability::getBranchProbability(
- FalseWeight, TrueWeight + FalseWeight));
+
+ uint64_t TrueWeight = 1;
+ uint64_t FalseWeight = 1;
+ // Copy probabilities from 'SI' to created conditional branch in 'Pred'.
+ if (extractBranchWeights(*SI, TrueWeight, FalseWeight) &&
+ (TrueWeight + FalseWeight) != 0) {
+ SmallVector<BranchProbability, 2> BP;
+ BP.emplace_back(BranchProbability::getBranchProbability(
+ TrueWeight, TrueWeight + FalseWeight));
+ BP.emplace_back(BranchProbability::getBranchProbability(
+ FalseWeight, TrueWeight + FalseWeight));
+ // Update BPI if exists.
+ if (auto *BPI = getBPI())
BPI->setEdgeProbability(Pred, BP);
+ }
+ // Set the block frequency of NewBB.
+ if (auto *BFI = getBFI()) {
+ if ((TrueWeight + FalseWeight) == 0) {
+ TrueWeight = 1;
+ FalseWeight = 1;
}
-
- auto NewBBFreq =
- BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, NewBB);
+ BranchProbability PredToNewBBProb = BranchProbability::getBranchProbability(
+ TrueWeight, TrueWeight + FalseWeight);
+ auto NewBBFreq = BFI->getBlockFreq(Pred) * PredToNewBBProb;
BFI->setBlockFreq(NewBB, NewBBFreq.getFrequency());
}
@@ -3112,3 +3074,93 @@ bool JumpThreadingPass::threadGuard(BasicBlock *BB, IntrinsicInst *Guard,
}
return true;
}
+
+PreservedAnalyses JumpThreadingPass::getPreservedAnalysis() const {
+ PreservedAnalyses PA;
+ PA.preserve<LazyValueAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+
+ // TODO: We would like to preserve BPI/BFI. Enable once all paths update them.
+ // TODO: Would be nice to verify BPI/BFI consistency as well.
+ return PA;
+}
+
+template <typename AnalysisT>
+typename AnalysisT::Result *JumpThreadingPass::runExternalAnalysis() {
+ assert(FAM && "Can't run external analysis without FunctionAnalysisManager");
+
+ // If there were no changes since last call to 'runExternalAnalysis' then all
+ // analysis is either up to date or explicitly invalidated. Just go ahead and
+ // run the "external" analysis.
+ if (!ChangedSinceLastAnalysisUpdate) {
+ assert(!DTU->hasPendingUpdates() &&
+ "Lost update of 'ChangedSinceLastAnalysisUpdate'?");
+ // Run the "external" analysis.
+ return &FAM->getResult<AnalysisT>(*F);
+ }
+ ChangedSinceLastAnalysisUpdate = false;
+
+ auto PA = getPreservedAnalysis();
+ // TODO: This shouldn't be needed once 'getPreservedAnalysis' reports BPI/BFI
+ // as preserved.
+ PA.preserve<BranchProbabilityAnalysis>();
+ PA.preserve<BlockFrequencyAnalysis>();
+ // Report everything except explicitly preserved as invalid.
+ FAM->invalidate(*F, PA);
+ // Update DT/PDT.
+ DTU->flush();
+ // Make sure DT/PDT are valid before running "external" analysis.
+ assert(DTU->getDomTree().verify(DominatorTree::VerificationLevel::Fast));
+ assert((!DTU->hasPostDomTree() ||
+ DTU->getPostDomTree().verify(
+ PostDominatorTree::VerificationLevel::Fast)));
+ // Run the "external" analysis.
+ auto *Result = &FAM->getResult<AnalysisT>(*F);
+ // Update analysis JumpThreading depends on and not explicitly preserved.
+ TTI = &FAM->getResult<TargetIRAnalysis>(*F);
+ TLI = &FAM->getResult<TargetLibraryAnalysis>(*F);
+ AA = &FAM->getResult<AAManager>(*F);
+
+ return Result;
+}
+
+BranchProbabilityInfo *JumpThreadingPass::getBPI() {
+ if (!BPI) {
+ assert(FAM && "Can't create BPI without FunctionAnalysisManager");
+ BPI = FAM->getCachedResult<BranchProbabilityAnalysis>(*F);
+ }
+ return *BPI;
+}
+
+BlockFrequencyInfo *JumpThreadingPass::getBFI() {
+ if (!BFI) {
+ assert(FAM && "Can't create BFI without FunctionAnalysisManager");
+ BFI = FAM->getCachedResult<BlockFrequencyAnalysis>(*F);
+ }
+ return *BFI;
+}
+
+// Important note on validity of BPI/BFI. JumpThreading tries to preserve
+// BPI/BFI as it goes. Thus if cached instance exists it will be updated.
+// Otherwise, new instance of BPI/BFI is created (up to date by definition).
+BranchProbabilityInfo *JumpThreadingPass::getOrCreateBPI(bool Force) {
+ auto *Res = getBPI();
+ if (Res)
+ return Res;
+
+ if (Force)
+ BPI = runExternalAnalysis<BranchProbabilityAnalysis>();
+
+ return *BPI;
+}
+
+BlockFrequencyInfo *JumpThreadingPass::getOrCreateBFI(bool Force) {
+ auto *Res = getBFI();
+ if (Res)
+ return Res;
+
+ if (Force)
+ BFI = runExternalAnalysis<BlockFrequencyAnalysis>();
+
+ return *BFI;
+}
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 2865dece8723..f8fab03f151d 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -44,7 +44,6 @@
#include "llvm/Analysis/AliasSetTracker.h"
#include "llvm/Analysis/AssumptionCache.h"
#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
#include "llvm/Analysis/Loads.h"
@@ -68,6 +67,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
@@ -102,6 +102,12 @@ STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
STATISTIC(NumPromotionCandidates, "Number of promotion candidates");
STATISTIC(NumLoadPromoted, "Number of load-only promotions");
STATISTIC(NumLoadStorePromoted, "Number of load and store promotions");
+STATISTIC(NumMinMaxHoisted,
+ "Number of min/max expressions hoisted out of the loop");
+STATISTIC(NumGEPsHoisted,
+ "Number of geps reassociated and hoisted out of the loop");
+STATISTIC(NumAddSubHoisted, "Number of add/subtract expressions reassociated "
+ "and hoisted out of the loop");
/// Memory promotion is enabled by default.
static cl::opt<bool>
@@ -145,10 +151,10 @@ cl::opt<unsigned> llvm::SetLicmMssaNoAccForPromotionCap(
"enable memory promotion."));
static bool inSubLoop(BasicBlock *BB, Loop *CurLoop, LoopInfo *LI);
-static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop,
- bool LoopNestMode);
+static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
+ const LoopSafetyInfo *SafetyInfo,
+ TargetTransformInfo *TTI,
+ bool &FoldableInLoop, bool LoopNestMode);
static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
BasicBlock *Dest, ICFLoopSafetyInfo *SafetyInfo,
MemorySSAUpdater &MSSAU, ScalarEvolution *SE,
@@ -163,9 +169,15 @@ static bool isSafeToExecuteUnconditionally(
AssumptionCache *AC, bool AllowSpeculation);
static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
Loop *CurLoop, Instruction &I,
- SinkAndHoistLICMFlags &Flags);
+ SinkAndHoistLICMFlags &Flags,
+ bool InvariantGroup);
static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
MemoryUse &MU);
+/// Aggregates various functions for hoisting computations out of loop.
+static bool hoistArithmetics(Instruction &I, Loop &L,
+ ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU, AssumptionCache *AC,
+ DominatorTree *DT);
static Instruction *cloneInstructionInExitBlock(
Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
@@ -280,9 +292,6 @@ PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM,
return PreservedAnalyses::all();
auto PA = getLoopPassPreservedAnalyses();
-
- PA.preserve<DominatorTreeAnalysis>();
- PA.preserve<LoopAnalysis>();
PA.preserve<MemorySSAAnalysis>();
return PA;
@@ -293,9 +302,9 @@ void LICMPass::printPipeline(
static_cast<PassInfoMixin<LICMPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
- OS << ">";
+ OS << '>';
}
PreservedAnalyses LNICMPass::run(LoopNest &LN, LoopAnalysisManager &AM,
@@ -334,9 +343,9 @@ void LNICMPass::printPipeline(
static_cast<PassInfoMixin<LNICMPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
OS << (Opts.AllowSpeculation ? "" : "no-") << "allowspeculation";
- OS << ">";
+ OS << '>';
}
char LegacyLICMPass::ID = 0;
@@ -351,32 +360,21 @@ INITIALIZE_PASS_END(LegacyLICMPass, "licm", "Loop Invariant Code Motion", false,
false)
Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
-Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
- unsigned LicmMssaNoAccForPromotionCap,
- bool LicmAllowSpeculation) {
- return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
- LicmAllowSpeculation);
-}
-llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
- MemorySSA *MSSA)
+llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop &L,
+ MemorySSA &MSSA)
: SinkAndHoistLICMFlags(SetLicmMssaOptCap, SetLicmMssaNoAccForPromotionCap,
IsSink, L, MSSA) {}
llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(
unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
- Loop *L, MemorySSA *MSSA)
+ Loop &L, MemorySSA &MSSA)
: LicmMssaOptCap(LicmMssaOptCap),
LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
IsSink(IsSink) {
- assert(((L != nullptr) == (MSSA != nullptr)) &&
- "Unexpected values for SinkAndHoistLICMFlags");
- if (!MSSA)
- return;
-
unsigned AccessCapCount = 0;
- for (auto *BB : L->getBlocks())
- if (const auto *Accesses = MSSA->getBlockAccesses(BB))
+ for (auto *BB : L.getBlocks())
+ if (const auto *Accesses = MSSA.getBlockAccesses(BB))
for (const auto &MA : *Accesses) {
(void)MA;
++AccessCapCount;
@@ -400,7 +398,6 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
bool Changed = false;
assert(L->isLCSSAForm(*DT) && "Loop is not in LCSSA form.");
- MSSA->ensureOptimizedUses();
// If this loop has metadata indicating that LICM is not to be performed then
// just exit.
@@ -426,7 +423,7 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AAResults *AA, LoopInfo *LI,
MemorySSAUpdater MSSAU(MSSA);
SinkAndHoistLICMFlags Flags(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
- /*IsSink=*/true, L, MSSA);
+ /*IsSink=*/true, *L, *MSSA);
// Get the preheader block to move instructions into...
BasicBlock *Preheader = L->getLoopPreheader();
@@ -581,14 +578,15 @@ bool llvm::sinkRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
// outside of the loop. In this case, it doesn't even matter if the
// operands of the instruction are loop invariant.
//
- bool FreeInLoop = false;
+ bool FoldableInLoop = false;
bool LoopNestMode = OutermostLoop != nullptr;
if (!I.mayHaveSideEffects() &&
- isNotUsedOrFreeInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
- SafetyInfo, TTI, FreeInLoop, LoopNestMode) &&
+ isNotUsedOrFoldableInLoop(I, LoopNestMode ? OutermostLoop : CurLoop,
+ SafetyInfo, TTI, FoldableInLoop,
+ LoopNestMode) &&
canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE)) {
if (sink(I, LI, DT, CurLoop, SafetyInfo, MSSAU, ORE)) {
- if (!FreeInLoop) {
+ if (!FoldableInLoop) {
++II;
salvageDebugInfo(I);
eraseInstruction(I, *SafetyInfo, MSSAU);
@@ -881,6 +879,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
LoopBlocksRPO Worklist(CurLoop);
Worklist.perform(LI);
bool Changed = false;
+ BasicBlock *Preheader = CurLoop->getLoopPreheader();
for (BasicBlock *BB : Worklist) {
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
@@ -888,21 +887,6 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
continue;
for (Instruction &I : llvm::make_early_inc_range(*BB)) {
- // Try constant folding this instruction. If all the operands are
- // constants, it is technically hoistable, but it would be better to
- // just fold it.
- if (Constant *C = ConstantFoldInstruction(
- &I, I.getModule()->getDataLayout(), TLI)) {
- LLVM_DEBUG(dbgs() << "LICM folding inst: " << I << " --> " << *C
- << '\n');
- // FIXME MSSA: Such replacements may make accesses unoptimized (D51960).
- I.replaceAllUsesWith(C);
- if (isInstructionTriviallyDead(&I, TLI))
- eraseInstruction(I, *SafetyInfo, MSSAU);
- Changed = true;
- continue;
- }
-
// Try hoisting the instruction out to the preheader. We can only do
// this if all of the operands of the instruction are loop invariant and
// if it is safe to hoist the instruction. We also check block frequency
@@ -914,8 +898,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
canSinkOrHoistInst(I, AA, DT, CurLoop, MSSAU, true, Flags, ORE) &&
isSafeToExecuteUnconditionally(
I, DT, TLI, CurLoop, SafetyInfo, ORE,
- CurLoop->getLoopPreheader()->getTerminator(), AC,
- AllowSpeculation)) {
+ Preheader->getTerminator(), AC, AllowSpeculation)) {
hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
MSSAU, SE, ORE);
HoistedInstructions.push_back(&I);
@@ -983,6 +966,13 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
}
}
+ // Try to reassociate instructions so that part of computations can be
+ // done out of loop.
+ if (hoistArithmetics(I, *CurLoop, *SafetyInfo, MSSAU, AC, DT)) {
+ Changed = true;
+ continue;
+ }
+
// Remember possibly hoistable branches so we can actually hoist them
// later if needed.
if (BranchInst *BI = dyn_cast<BranchInst>(&I))
@@ -1147,6 +1137,20 @@ bool isOnlyMemoryAccess(const Instruction *I, const Loop *L,
}
}
+static MemoryAccess *getClobberingMemoryAccess(MemorySSA &MSSA,
+ BatchAAResults &BAA,
+ SinkAndHoistLICMFlags &Flags,
+ MemoryUseOrDef *MA) {
+ // See declaration of SetLicmMssaOptCap for usage details.
+ if (Flags.tooManyClobberingCalls())
+ return MA->getDefiningAccess();
+
+ MemoryAccess *Source =
+ MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(MA, BAA);
+ Flags.incrementClobberingCalls();
+ return Source;
+}
+
bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
Loop *CurLoop, MemorySSAUpdater &MSSAU,
bool TargetExecutesOncePerLoop,
@@ -1176,8 +1180,12 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (isLoadInvariantInLoop(LI, DT, CurLoop))
return true;
+ auto MU = cast<MemoryUse>(MSSA->getMemoryAccess(LI));
+
+ bool InvariantGroup = LI->hasMetadata(LLVMContext::MD_invariant_group);
+
bool Invalidated = pointerInvalidatedByLoop(
- MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(LI)), CurLoop, I, Flags);
+ MSSA, MU, CurLoop, I, Flags, InvariantGroup);
// Check loop-invariant address because this may also be a sinkable load
// whose address is not necessarily loop-invariant.
if (ORE && Invalidated && CurLoop->isLoopInvariant(LI->getPointerOperand()))
@@ -1210,12 +1218,17 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// Assumes don't actually alias anything or throw
return true;
- if (match(CI, m_Intrinsic<Intrinsic::experimental_widenable_condition>()))
- // Widenable conditions don't actually alias anything or throw
- return true;
-
// Handle simple cases by querying alias analysis.
MemoryEffects Behavior = AA->getMemoryEffects(CI);
+
+ // FIXME: we don't handle the semantics of thread local well. So that the
+ // address of thread locals are fake constants in coroutines. So We forbid
+ // to treat onlyReadsMemory call in coroutines as constants now. Note that
+ // it is possible to hide a thread local access in a onlyReadsMemory call.
+ // Remove this check after we handle the semantics of thread locals well.
+ if (Behavior.onlyReadsMemory() && CI->getFunction()->isPresplitCoroutine())
+ return false;
+
if (Behavior.doesNotAccessMemory())
return true;
if (Behavior.onlyReadsMemory()) {
@@ -1228,7 +1241,7 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
if (Op->getType()->isPointerTy() &&
pointerInvalidatedByLoop(
MSSA, cast<MemoryUse>(MSSA->getMemoryAccess(CI)), CurLoop, I,
- Flags))
+ Flags, /*InvariantGroup=*/false))
return false;
return true;
}
@@ -1258,21 +1271,30 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// arbitrary number of reads in the loop.
if (isOnlyMemoryAccess(SI, CurLoop, MSSAU))
return true;
- // If there are more accesses than the Promotion cap or no "quota" to
- // check clobber, then give up as we're not walking a list that long.
- if (Flags.tooManyMemoryAccesses() || Flags.tooManyClobberingCalls())
+ // If there are more accesses than the Promotion cap, then give up as we're
+ // not walking a list that long.
+ if (Flags.tooManyMemoryAccesses())
+ return false;
+
+ auto *SIMD = MSSA->getMemoryAccess(SI);
+ BatchAAResults BAA(*AA);
+ auto *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, SIMD);
+ // Make sure there are no clobbers inside the loop.
+ if (!MSSA->isLiveOnEntryDef(Source) &&
+ CurLoop->contains(Source->getBlock()))
return false;
+
// If there are interfering Uses (i.e. their defining access is in the
// loop), or ordered loads (stored as Defs!), don't move this store.
// Could do better here, but this is conservatively correct.
// TODO: Cache set of Uses on the first walk in runOnLoop, update when
// moving accesses. Can also extend to dominating uses.
- auto *SIMD = MSSA->getMemoryAccess(SI);
for (auto *BB : CurLoop->getBlocks())
if (auto *Accesses = MSSA->getBlockAccesses(BB)) {
for (const auto &MA : *Accesses)
if (const auto *MU = dyn_cast<MemoryUse>(&MA)) {
- auto *MD = MU->getDefiningAccess();
+ auto *MD = getClobberingMemoryAccess(*MSSA, BAA, Flags,
+ const_cast<MemoryUse *>(MU));
if (!MSSA->isLiveOnEntryDef(MD) &&
CurLoop->contains(MD->getBlock()))
return false;
@@ -1293,17 +1315,13 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// Check if the call may read from the memory location written
// to by SI. Check CI's attributes and arguments; the number of
// such checks performed is limited above by NoOfMemAccTooLarge.
- ModRefInfo MRI = AA->getModRefInfo(CI, MemoryLocation::get(SI));
+ ModRefInfo MRI = BAA.getModRefInfo(CI, MemoryLocation::get(SI));
if (isModOrRefSet(MRI))
return false;
}
}
}
- auto *Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(SI);
- Flags.incrementClobberingCalls();
- // If there are no clobbering Defs in the loop, store is safe to hoist.
- return MSSA->isLiveOnEntryDef(Source) ||
- !CurLoop->contains(Source->getBlock());
+ return true;
}
assert(!I.mayReadOrWriteMemory() && "unhandled aliasing");
@@ -1326,13 +1344,12 @@ static bool isTriviallyReplaceablePHI(const PHINode &PN, const Instruction &I) {
return true;
}
-/// Return true if the instruction is free in the loop.
-static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
+/// Return true if the instruction is foldable in the loop.
+static bool isFoldableInLoop(const Instruction &I, const Loop *CurLoop,
const TargetTransformInfo *TTI) {
- InstructionCost CostI =
- TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
-
if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ InstructionCost CostI =
+ TTI->getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
if (CostI != TargetTransformInfo::TCC_Free)
return false;
// For a GEP, we cannot simply use getInstructionCost because currently
@@ -1349,7 +1366,7 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
return true;
}
- return CostI == TargetTransformInfo::TCC_Free;
+ return false;
}
/// Return true if the only users of this instruction are outside of
@@ -1358,12 +1375,12 @@ static bool isFreeInLoop(const Instruction &I, const Loop *CurLoop,
///
/// We also return true if the instruction could be folded away in lowering.
/// (e.g., a GEP can be folded into a load as an addressing mode in the loop).
-static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
- const LoopSafetyInfo *SafetyInfo,
- TargetTransformInfo *TTI, bool &FreeInLoop,
- bool LoopNestMode) {
+static bool isNotUsedOrFoldableInLoop(const Instruction &I, const Loop *CurLoop,
+ const LoopSafetyInfo *SafetyInfo,
+ TargetTransformInfo *TTI,
+ bool &FoldableInLoop, bool LoopNestMode) {
const auto &BlockColors = SafetyInfo->getBlockColors();
- bool IsFree = isFreeInLoop(I, CurLoop, TTI);
+ bool IsFoldable = isFoldableInLoop(I, CurLoop, TTI);
for (const User *U : I.users()) {
const Instruction *UI = cast<Instruction>(U);
if (const PHINode *PN = dyn_cast<PHINode>(UI)) {
@@ -1390,8 +1407,8 @@ static bool isNotUsedOrFreeInLoop(const Instruction &I, const Loop *CurLoop,
}
if (CurLoop->contains(UI)) {
- if (IsFree) {
- FreeInLoop = true;
+ if (IsFoldable) {
+ FoldableInLoop = true;
continue;
}
return false;
@@ -1490,7 +1507,7 @@ static void moveInstructionBefore(Instruction &I, Instruction &Dest,
MSSAU.getMemorySSA()->getMemoryAccess(&I)))
MSSAU.moveToPlace(OldMemAcc, Dest.getParent(), MemorySSA::BeforeTerminator);
if (SE)
- SE->forgetValue(&I);
+ SE->forgetBlockAndLoopDispositions(&I);
}
static Instruction *sinkThroughTriviallyReplaceablePHI(
@@ -1695,6 +1712,8 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
// The PHI must be trivially replaceable.
Instruction *New = sinkThroughTriviallyReplaceablePHI(
PN, &I, LI, SunkCopies, SafetyInfo, CurLoop, MSSAU);
+ // As we sink the instruction out of the BB, drop its debug location.
+ New->dropLocation();
PN->replaceAllUsesWith(New);
eraseInstruction(*PN, *SafetyInfo, MSSAU);
Changed = true;
@@ -1729,7 +1748,7 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
// time in isGuaranteedToExecute if we don't actually have anything to
// drop. It is a compile time optimization, not required for correctness.
!SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop))
- I.dropUndefImplyingAttrsAndUnknownMetadata();
+ I.dropUBImplyingAttrsAndMetadata();
if (isa<PHINode>(I))
// Move the new node to the end of the phi list in the destination block.
@@ -1915,6 +1934,8 @@ bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
isNotCapturedBeforeOrInLoop(Object, L, DT);
}
+// We don't consider globals as writable: While the physical memory is writable,
+// we may not have provenance to perform the write.
bool isWritableObject(const Value *Object) {
// TODO: Alloca might not be writable after its lifetime ends.
// See https://github.com/llvm/llvm-project/issues/51838.
@@ -1925,9 +1946,6 @@ bool isWritableObject(const Value *Object) {
if (auto *A = dyn_cast<Argument>(Object))
return A->hasByValAttr();
- if (auto *G = dyn_cast<GlobalVariable>(Object))
- return !G->isConstant();
-
// TODO: Noalias has nothing to do with writability, this should check for
// an allocator function.
return isNoAliasCall(Object);
@@ -2203,7 +2221,7 @@ bool llvm::promoteLoopAccessesToScalars(
});
// Look at all the loop uses, and try to merge their locations.
- std::vector<const DILocation *> LoopUsesLocs;
+ std::vector<DILocation *> LoopUsesLocs;
for (auto *U : LoopUses)
LoopUsesLocs.push_back(U->getDebugLoc().get());
auto DL = DebugLoc(DILocation::getMergedLocations(LoopUsesLocs));
@@ -2330,19 +2348,24 @@ collectPromotionCandidates(MemorySSA *MSSA, AliasAnalysis *AA, Loop *L) {
static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
Loop *CurLoop, Instruction &I,
- SinkAndHoistLICMFlags &Flags) {
+ SinkAndHoistLICMFlags &Flags,
+ bool InvariantGroup) {
// For hoisting, use the walker to determine safety
if (!Flags.getIsSink()) {
- MemoryAccess *Source;
- // See declaration of SetLicmMssaOptCap for usage details.
- if (Flags.tooManyClobberingCalls())
- Source = MU->getDefiningAccess();
- else {
- Source = MSSA->getSkipSelfWalker()->getClobberingMemoryAccess(MU);
- Flags.incrementClobberingCalls();
- }
+ // If hoisting an invariant group, we only need to check that there
+ // is no store to the loaded pointer between the start of the loop,
+ // and the load (since all values must be the same).
+
+ // This can be checked in two conditions:
+ // 1) if the memoryaccess is outside the loop
+ // 2) the earliest access is at the loop header,
+ // if the memory loaded is the phi node
+
+ BatchAAResults BAA(MSSA->getAA());
+ MemoryAccess *Source = getClobberingMemoryAccess(*MSSA, BAA, Flags, MU);
return !MSSA->isLiveOnEntryDef(Source) &&
- CurLoop->contains(Source->getBlock());
+ CurLoop->contains(Source->getBlock()) &&
+ !(InvariantGroup && Source->getBlock() == CurLoop->getHeader() && isa<MemoryPhi>(Source));
}
// For sinking, we'd need to check all Defs below this use. The getClobbering
@@ -2383,6 +2406,304 @@ bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
return false;
}
+/// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A <
+/// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their
+/// minimun can be computed outside of loop, and X is not a loop-invariant.
+static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU) {
+ bool Inverse = false;
+ using namespace PatternMatch;
+ Value *Cond1, *Cond2;
+ if (match(&I, m_LogicalOr(m_Value(Cond1), m_Value(Cond2)))) {
+ Inverse = true;
+ } else if (match(&I, m_LogicalAnd(m_Value(Cond1), m_Value(Cond2)))) {
+ // Do nothing
+ } else
+ return false;
+
+ auto MatchICmpAgainstInvariant = [&](Value *C, ICmpInst::Predicate &P,
+ Value *&LHS, Value *&RHS) {
+ if (!match(C, m_OneUse(m_ICmp(P, m_Value(LHS), m_Value(RHS)))))
+ return false;
+ if (!LHS->getType()->isIntegerTy())
+ return false;
+ if (!ICmpInst::isRelational(P))
+ return false;
+ if (L.isLoopInvariant(LHS)) {
+ std::swap(LHS, RHS);
+ P = ICmpInst::getSwappedPredicate(P);
+ }
+ if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
+ return false;
+ if (Inverse)
+ P = ICmpInst::getInversePredicate(P);
+ return true;
+ };
+ ICmpInst::Predicate P1, P2;
+ Value *LHS1, *LHS2, *RHS1, *RHS2;
+ if (!MatchICmpAgainstInvariant(Cond1, P1, LHS1, RHS1) ||
+ !MatchICmpAgainstInvariant(Cond2, P2, LHS2, RHS2))
+ return false;
+ if (P1 != P2 || LHS1 != LHS2)
+ return false;
+
+ // Everything is fine, we can do the transform.
+ bool UseMin = ICmpInst::isLT(P1) || ICmpInst::isLE(P1);
+ assert(
+ (UseMin || ICmpInst::isGT(P1) || ICmpInst::isGE(P1)) &&
+ "Relational predicate is either less (or equal) or greater (or equal)!");
+ Intrinsic::ID id = ICmpInst::isSigned(P1)
+ ? (UseMin ? Intrinsic::smin : Intrinsic::smax)
+ : (UseMin ? Intrinsic::umin : Intrinsic::umax);
+ auto *Preheader = L.getLoopPreheader();
+ assert(Preheader && "Loop is not in simplify form?");
+ IRBuilder<> Builder(Preheader->getTerminator());
+ // We are about to create a new guaranteed use for RHS2 which might not exist
+ // before (if it was a non-taken input of logical and/or instruction). If it
+ // was poison, we need to freeze it. Note that no new use for LHS and RHS1 are
+ // introduced, so they don't need this.
+ if (isa<SelectInst>(I))
+ RHS2 = Builder.CreateFreeze(RHS2, RHS2->getName() + ".fr");
+ Value *NewRHS = Builder.CreateBinaryIntrinsic(
+ id, RHS1, RHS2, nullptr, StringRef("invariant.") +
+ (ICmpInst::isSigned(P1) ? "s" : "u") +
+ (UseMin ? "min" : "max"));
+ Builder.SetInsertPoint(&I);
+ ICmpInst::Predicate P = P1;
+ if (Inverse)
+ P = ICmpInst::getInversePredicate(P);
+ Value *NewCond = Builder.CreateICmp(P, LHS1, NewRHS);
+ NewCond->takeName(&I);
+ I.replaceAllUsesWith(NewCond);
+ eraseInstruction(I, SafetyInfo, MSSAU);
+ eraseInstruction(*cast<Instruction>(Cond1), SafetyInfo, MSSAU);
+ eraseInstruction(*cast<Instruction>(Cond2), SafetyInfo, MSSAU);
+ return true;
+}
+
+/// Reassociate gep (gep ptr, idx1), idx2 to gep (gep ptr, idx2), idx1 if
+/// this allows hoisting the inner GEP.
+static bool hoistGEP(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU, AssumptionCache *AC,
+ DominatorTree *DT) {
+ auto *GEP = dyn_cast<GetElementPtrInst>(&I);
+ if (!GEP)
+ return false;
+
+ auto *Src = dyn_cast<GetElementPtrInst>(GEP->getPointerOperand());
+ if (!Src || !Src->hasOneUse() || !L.contains(Src))
+ return false;
+
+ Value *SrcPtr = Src->getPointerOperand();
+ auto LoopInvariant = [&](Value *V) { return L.isLoopInvariant(V); };
+ if (!L.isLoopInvariant(SrcPtr) || !all_of(GEP->indices(), LoopInvariant))
+ return false;
+
+ // This can only happen if !AllowSpeculation, otherwise this would already be
+ // handled.
+ // FIXME: Should we respect AllowSpeculation in these reassociation folds?
+ // The flag exists to prevent metadata dropping, which is not relevant here.
+ if (all_of(Src->indices(), LoopInvariant))
+ return false;
+
+ // The swapped GEPs are inbounds if both original GEPs are inbounds
+ // and the sign of the offsets is the same. For simplicity, only
+ // handle both offsets being non-negative.
+ const DataLayout &DL = GEP->getModule()->getDataLayout();
+ auto NonNegative = [&](Value *V) {
+ return isKnownNonNegative(V, DL, 0, AC, GEP, DT);
+ };
+ bool IsInBounds = Src->isInBounds() && GEP->isInBounds() &&
+ all_of(Src->indices(), NonNegative) &&
+ all_of(GEP->indices(), NonNegative);
+
+ BasicBlock *Preheader = L.getLoopPreheader();
+ IRBuilder<> Builder(Preheader->getTerminator());
+ Value *NewSrc = Builder.CreateGEP(GEP->getSourceElementType(), SrcPtr,
+ SmallVector<Value *>(GEP->indices()),
+ "invariant.gep", IsInBounds);
+ Builder.SetInsertPoint(GEP);
+ Value *NewGEP = Builder.CreateGEP(Src->getSourceElementType(), NewSrc,
+ SmallVector<Value *>(Src->indices()), "gep",
+ IsInBounds);
+ GEP->replaceAllUsesWith(NewGEP);
+ eraseInstruction(*GEP, SafetyInfo, MSSAU);
+ eraseInstruction(*Src, SafetyInfo, MSSAU);
+ return true;
+}
+
+/// Try to turn things like "LV + C1 < C2" into "LV < C2 - C1". Here
+/// C1 and C2 are loop invariants and LV is a loop-variant.
+static bool hoistAdd(ICmpInst::Predicate Pred, Value *VariantLHS,
+ Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
+ ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
+ AssumptionCache *AC, DominatorTree *DT) {
+ assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
+ assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
+ assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
+
+ // Try to represent VariantLHS as sum of invariant and variant operands.
+ using namespace PatternMatch;
+ Value *VariantOp, *InvariantOp;
+ if (!match(VariantLHS, m_NSWAdd(m_Value(VariantOp), m_Value(InvariantOp))))
+ return false;
+
+ // LHS itself is a loop-variant, try to represent it in the form:
+ // "VariantOp + InvariantOp". If it is possible, then we can reassociate.
+ if (L.isLoopInvariant(VariantOp))
+ std::swap(VariantOp, InvariantOp);
+ if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
+ return false;
+
+ // In order to turn "LV + C1 < C2" into "LV < C2 - C1", we need to be able to
+ // freely move values from left side of inequality to right side (just as in
+ // normal linear arithmetics). Overflows make things much more complicated, so
+ // we want to avoid this.
+ auto &DL = L.getHeader()->getModule()->getDataLayout();
+ bool ProvedNoOverflowAfterReassociate =
+ computeOverflowForSignedSub(InvariantRHS, InvariantOp, DL, AC, &ICmp,
+ DT) == llvm::OverflowResult::NeverOverflows;
+ if (!ProvedNoOverflowAfterReassociate)
+ return false;
+ auto *Preheader = L.getLoopPreheader();
+ assert(Preheader && "Loop is not in simplify form?");
+ IRBuilder<> Builder(Preheader->getTerminator());
+ Value *NewCmpOp = Builder.CreateSub(InvariantRHS, InvariantOp, "invariant.op",
+ /*HasNUW*/ false, /*HasNSW*/ true);
+ ICmp.setPredicate(Pred);
+ ICmp.setOperand(0, VariantOp);
+ ICmp.setOperand(1, NewCmpOp);
+ eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
+ return true;
+}
+
+/// Try to reassociate and hoist the following two patterns:
+/// LV - C1 < C2 --> LV < C1 + C2,
+/// C1 - LV < C2 --> LV > C1 - C2.
+static bool hoistSub(ICmpInst::Predicate Pred, Value *VariantLHS,
+ Value *InvariantRHS, ICmpInst &ICmp, Loop &L,
+ ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU,
+ AssumptionCache *AC, DominatorTree *DT) {
+ assert(ICmpInst::isSigned(Pred) && "Not supported yet!");
+ assert(!L.isLoopInvariant(VariantLHS) && "Precondition.");
+ assert(L.isLoopInvariant(InvariantRHS) && "Precondition.");
+
+ // Try to represent VariantLHS as sum of invariant and variant operands.
+ using namespace PatternMatch;
+ Value *VariantOp, *InvariantOp;
+ if (!match(VariantLHS, m_NSWSub(m_Value(VariantOp), m_Value(InvariantOp))))
+ return false;
+
+ bool VariantSubtracted = false;
+ // LHS itself is a loop-variant, try to represent it in the form:
+ // "VariantOp + InvariantOp". If it is possible, then we can reassociate. If
+ // the variant operand goes with minus, we use a slightly different scheme.
+ if (L.isLoopInvariant(VariantOp)) {
+ std::swap(VariantOp, InvariantOp);
+ VariantSubtracted = true;
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+ if (L.isLoopInvariant(VariantOp) || !L.isLoopInvariant(InvariantOp))
+ return false;
+
+ // In order to turn "LV - C1 < C2" into "LV < C2 + C1", we need to be able to
+ // freely move values from left side of inequality to right side (just as in
+ // normal linear arithmetics). Overflows make things much more complicated, so
+ // we want to avoid this. Likewise, for "C1 - LV < C2" we need to prove that
+ // "C1 - C2" does not overflow.
+ auto &DL = L.getHeader()->getModule()->getDataLayout();
+ if (VariantSubtracted) {
+ // C1 - LV < C2 --> LV > C1 - C2
+ if (computeOverflowForSignedSub(InvariantOp, InvariantRHS, DL, AC, &ICmp,
+ DT) != llvm::OverflowResult::NeverOverflows)
+ return false;
+ } else {
+ // LV - C1 < C2 --> LV < C1 + C2
+ if (computeOverflowForSignedAdd(InvariantOp, InvariantRHS, DL, AC, &ICmp,
+ DT) != llvm::OverflowResult::NeverOverflows)
+ return false;
+ }
+ auto *Preheader = L.getLoopPreheader();
+ assert(Preheader && "Loop is not in simplify form?");
+ IRBuilder<> Builder(Preheader->getTerminator());
+ Value *NewCmpOp =
+ VariantSubtracted
+ ? Builder.CreateSub(InvariantOp, InvariantRHS, "invariant.op",
+ /*HasNUW*/ false, /*HasNSW*/ true)
+ : Builder.CreateAdd(InvariantOp, InvariantRHS, "invariant.op",
+ /*HasNUW*/ false, /*HasNSW*/ true);
+ ICmp.setPredicate(Pred);
+ ICmp.setOperand(0, VariantOp);
+ ICmp.setOperand(1, NewCmpOp);
+ eraseInstruction(cast<Instruction>(*VariantLHS), SafetyInfo, MSSAU);
+ return true;
+}
+
+/// Reassociate and hoist add/sub expressions.
+static bool hoistAddSub(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU, AssumptionCache *AC,
+ DominatorTree *DT) {
+ using namespace PatternMatch;
+ ICmpInst::Predicate Pred;
+ Value *LHS, *RHS;
+ if (!match(&I, m_ICmp(Pred, m_Value(LHS), m_Value(RHS))))
+ return false;
+
+ // TODO: Support unsigned predicates?
+ if (!ICmpInst::isSigned(Pred))
+ return false;
+
+ // Put variant operand to LHS position.
+ if (L.isLoopInvariant(LHS)) {
+ std::swap(LHS, RHS);
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ }
+ // We want to delete the initial operation after reassociation, so only do it
+ // if it has no other uses.
+ if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS) || !LHS->hasOneUse())
+ return false;
+
+ // TODO: We could go with smarter context, taking common dominator of all I's
+ // users instead of I itself.
+ if (hoistAdd(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
+ return true;
+
+ if (hoistSub(Pred, LHS, RHS, cast<ICmpInst>(I), L, SafetyInfo, MSSAU, AC, DT))
+ return true;
+
+ return false;
+}
+
+static bool hoistArithmetics(Instruction &I, Loop &L,
+ ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU, AssumptionCache *AC,
+ DominatorTree *DT) {
+ // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them
+ // into (x < min(INV1, INV2)), and hoisting the invariant part of this
+ // expression out of the loop.
+ if (hoistMinMax(I, L, SafetyInfo, MSSAU)) {
+ ++NumHoisted;
+ ++NumMinMaxHoisted;
+ return true;
+ }
+
+ // Try to hoist GEPs by reassociation.
+ if (hoistGEP(I, L, SafetyInfo, MSSAU, AC, DT)) {
+ ++NumHoisted;
+ ++NumGEPsHoisted;
+ return true;
+ }
+
+ // Try to hoist add/sub's by reassociation.
+ if (hoistAddSub(I, L, SafetyInfo, MSSAU, AC, DT)) {
+ ++NumHoisted;
+ ++NumAddSubHoisted;
+ return true;
+ }
+
+ return false;
+}
+
/// Little predicate that returns true if the specified basic block is in
/// a subloop of the current one, not the current one itself.
///
diff --git a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 7e4dbace043a..c041e3621a16 100644
--- a/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -26,8 +26,6 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -73,7 +71,7 @@ static bool isLoopDead(Loop *L, ScalarEvolution &SE,
// of the loop.
bool AllEntriesInvariant = true;
bool AllOutgoingValuesSame = true;
- if (!L->hasNoExitBlocks()) {
+ if (ExitBlock) {
for (PHINode &P : ExitBlock->phis()) {
Value *incoming = P.getIncomingValueForBlock(ExitingBlocks[0]);
@@ -488,6 +486,14 @@ static LoopDeletionResult deleteLoopIfDead(Loop *L, DominatorTree &DT,
LLVM_DEBUG(dbgs() << "Deletion requires at most one exit block.\n");
return LoopDeletionResult::Unmodified;
}
+
+ // We can't directly branch to an EH pad. Don't bother handling this edge
+ // case.
+ if (ExitBlock && ExitBlock->isEHPad()) {
+ LLVM_DEBUG(dbgs() << "Cannot delete loop exiting to EH pad.\n");
+ return LoopDeletionResult::Unmodified;
+ }
+
// Finally, we have to check that the loop really is dead.
bool Changed = false;
if (!isLoopDead(L, SE, ExitingBlocks, ExitBlock, Changed, Preheader, LI)) {
@@ -539,62 +545,3 @@ PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM,
PA.preserve<MemorySSAAnalysis>();
return PA;
}
-
-namespace {
-class LoopDeletionLegacyPass : public LoopPass {
-public:
- static char ID; // Pass ID, replacement for typeid
- LoopDeletionLegacyPass() : LoopPass(ID) {
- initializeLoopDeletionLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- // Possibly eliminate loop L if it is dead.
- bool runOnLoop(Loop *L, LPPassManager &) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addPreserved<MemorySSAWrapperPass>();
- getLoopAnalysisUsage(AU);
- }
-};
-}
-
-char LoopDeletionLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopDeletionLegacyPass, "loop-deletion",
- "Delete dead loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_END(LoopDeletionLegacyPass, "loop-deletion",
- "Delete dead loops", false, false)
-
-Pass *llvm::createLoopDeletionPass() { return new LoopDeletionLegacyPass(); }
-
-bool LoopDeletionLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipLoop(L))
- return false;
- DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- MemorySSA *MSSA = nullptr;
- if (MSSAAnalysis)
- MSSA = &MSSAAnalysis->getMSSA();
- // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
- // pass. Function analyses need to be preserved across loop transformations
- // but ORE cannot be preserved (see comment before the pass definition).
- OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
-
- LLVM_DEBUG(dbgs() << "Analyzing Loop for deletion: ");
- LLVM_DEBUG(L->dump());
-
- LoopDeletionResult Result = deleteLoopIfDead(L, DT, SE, LI, MSSA, ORE);
-
- // If we can prove the backedge isn't taken, just break it and be done. This
- // leaves the loop structure in place which means it can handle dispatching
- // to the right exit based on whatever loop invariant structure remains.
- if (Result != LoopDeletionResult::Deleted)
- Result = merge(Result, breakBackedgeIfNotTaken(L, DT, SE, LI, MSSA, ORE));
-
- if (Result == LoopDeletionResult::Deleted)
- LPM.markLoopAsDeleted(*L);
-
- return Result != LoopDeletionResult::Unmodified;
-}
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 7b52b7dca85f..27196e46ca56 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -52,13 +52,10 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -995,45 +992,6 @@ static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
return Changed;
}
-namespace {
-
-/// The pass class.
-class LoopDistributeLegacy : public FunctionPass {
-public:
- static char ID;
-
- LoopDistributeLegacy() : FunctionPass(ID) {
- // The default is set by the caller.
- initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- auto &LAIs = getAnalysis<LoopAccessLegacyAnalysis>().getLAIs();
-
- return runImpl(F, LI, DT, SE, ORE, LAIs);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<LoopAccessLegacyAnalysis>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-};
-
-} // end anonymous namespace
-
PreservedAnalyses LoopDistributePass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &LI = AM.getResult<LoopAnalysis>(F);
@@ -1050,18 +1008,3 @@ PreservedAnalyses LoopDistributePass::run(Function &F,
PA.preserve<DominatorTreeAnalysis>();
return PA;
}
-
-char LoopDistributeLegacy::ID;
-
-static const char ldist_name[] = "Loop Distribution";
-
-INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
- false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
-
-FunctionPass *llvm::createLoopDistributePass() { return new LoopDistributeLegacy(); }
diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index 7d9ce8d35e0b..edc8a4956dd1 100644
--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -65,11 +65,8 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -318,12 +315,12 @@ static bool verifyTripCount(Value *RHS, Loop *L,
return false;
}
- // The Extend=false flag is used for getTripCountFromExitCount as we want
- // to verify and match it with the pattern matched tripcount. Please note
- // that overflow checks are performed in checkOverflow, but are first tried
- // to avoid by widening the IV.
+ // Evaluating in the trip count's type can not overflow here as the overflow
+ // checks are performed in checkOverflow, but are first tried to avoid by
+ // widening the IV.
const SCEV *SCEVTripCount =
- SE->getTripCountFromExitCount(BackedgeTakenCount, /*Extend=*/false);
+ SE->getTripCountFromExitCount(BackedgeTakenCount,
+ BackedgeTakenCount->getType(), L);
const SCEV *SCEVRHS = SE->getSCEV(RHS);
if (SCEVRHS == SCEVTripCount)
@@ -336,7 +333,8 @@ static bool verifyTripCount(Value *RHS, Loop *L,
// Find the extended backedge taken count and extended trip count using
// SCEV. One of these should now match the RHS of the compare.
BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
- SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt, false);
+ SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt,
+ RHS->getType(), L);
if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
return false;
@@ -918,20 +916,6 @@ static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI, U, MSSAU);
}
-bool Flatten(LoopNest &LN, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
- AssumptionCache *AC, TargetTransformInfo *TTI, LPMUpdater *U,
- MemorySSAUpdater *MSSAU) {
- bool Changed = false;
- for (Loop *InnerLoop : LN.getLoops()) {
- auto *OuterLoop = InnerLoop->getParentLoop();
- if (!OuterLoop)
- continue;
- FlattenInfo FI(OuterLoop, InnerLoop);
- Changed |= FlattenLoopPair(FI, DT, LI, SE, AC, TTI, U, MSSAU);
- }
- return Changed;
-}
-
PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
@@ -949,8 +933,14 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
// in simplified form, and also needs LCSSA. Running
// this pass will simplify all loops that contain inner loops,
// regardless of whether anything ends up being flattened.
- Changed |= Flatten(LN, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U,
- MSSAU ? &*MSSAU : nullptr);
+ for (Loop *InnerLoop : LN.getLoops()) {
+ auto *OuterLoop = InnerLoop->getParentLoop();
+ if (!OuterLoop)
+ continue;
+ FlattenInfo FI(OuterLoop, InnerLoop);
+ Changed |= FlattenLoopPair(FI, &AR.DT, &AR.LI, &AR.SE, &AR.AC, &AR.TTI, &U,
+ MSSAU ? &*MSSAU : nullptr);
+ }
if (!Changed)
return PreservedAnalyses::all();
@@ -963,60 +953,3 @@ PreservedAnalyses LoopFlattenPass::run(LoopNest &LN, LoopAnalysisManager &LAM,
PA.preserve<MemorySSAAnalysis>();
return PA;
}
-
-namespace {
-class LoopFlattenLegacyPass : public FunctionPass {
-public:
- static char ID; // Pass ID, replacement for typeid
- LoopFlattenLegacyPass() : FunctionPass(ID) {
- initializeLoopFlattenLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- // Possibly flatten loop L into its child.
- bool runOnFunction(Function &F) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- getLoopAnalysisUsage(AU);
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addPreserved<TargetTransformInfoWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addPreserved<AssumptionCacheTracker>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
-};
-} // namespace
-
-char LoopFlattenLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(LoopFlattenLegacyPass, "loop-flatten", "Flattens loops",
- false, false)
-
-FunctionPass *llvm::createLoopFlattenPass() {
- return new LoopFlattenLegacyPass();
-}
-
-bool LoopFlattenLegacyPass::runOnFunction(Function &F) {
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- auto &TTIP = getAnalysis<TargetTransformInfoWrapperPass>();
- auto *TTI = &TTIP.getTTI(F);
- auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto *MSSA = getAnalysisIfAvailable<MemorySSAWrapperPass>();
-
- std::optional<MemorySSAUpdater> MSSAU;
- if (MSSA)
- MSSAU = MemorySSAUpdater(&MSSA->getMSSA());
-
- bool Changed = false;
- for (Loop *L : *LI) {
- auto LN = LoopNest::getLoopNest(*L, *SE);
- Changed |=
- Flatten(*LN, DT, LI, SE, AC, TTI, nullptr, MSSAU ? &*MSSAU : nullptr);
- }
- return Changed;
-}
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 0eecec373736..d35b562be0aa 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -57,12 +57,9 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Verifier.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CodeMoverUtils.h"
@@ -2061,51 +2058,6 @@ private:
return FC0.L;
}
};
-
-struct LoopFuseLegacy : public FunctionPass {
-
- static char ID;
-
- LoopFuseLegacy() : FunctionPass(ID) {
- initializeLoopFuseLegacyPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<PostDominatorTreeWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- AU.addRequired<DependenceAnalysisWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
-
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<PostDominatorTreeWrapperPass>();
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
- auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
- auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- const TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- const DataLayout &DL = F.getParent()->getDataLayout();
-
- LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
- return LF.fuseLoops(F);
- }
-};
} // namespace
PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
@@ -2142,19 +2094,3 @@ PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<LoopAnalysis>();
return PA;
}
-
-char LoopFuseLegacy::ID = 0;
-
-INITIALIZE_PASS_BEGIN(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false)
-
-FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); }
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 035cbdf595a8..8572a442e784 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -84,14 +84,11 @@
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstructionCost.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -254,62 +251,8 @@ private:
/// @}
};
-
-class LoopIdiomRecognizeLegacyPass : public LoopPass {
-public:
- static char ID;
-
- explicit LoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
- initializeLoopIdiomRecognizeLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (DisableLIRP::All)
- return false;
-
- if (skipLoop(L))
- return false;
-
- AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
- *L->getHeader()->getParent());
- const TargetTransformInfo *TTI =
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
- *L->getHeader()->getParent());
- const DataLayout *DL = &L->getHeader()->getModule()->getDataLayout();
- auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
- MemorySSA *MSSA = nullptr;
- if (MSSAAnalysis)
- MSSA = &MSSAAnalysis->getMSSA();
-
- // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
- // pass. Function analyses need to be preserved across loop transformations
- // but ORE cannot be preserved (see comment before the pass definition).
- OptimizationRemarkEmitter ORE(L->getHeader()->getParent());
-
- LoopIdiomRecognize LIR(AA, DT, LI, SE, TLI, TTI, MSSA, DL, ORE);
- return LIR.runOnLoop(L);
- }
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG.
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- getLoopAnalysisUsage(AU);
- }
-};
-
} // end anonymous namespace
-char LoopIdiomRecognizeLegacyPass::ID = 0;
-
PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
@@ -334,16 +277,6 @@ PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L, LoopAnalysisManager &AM,
return PA;
}
-INITIALIZE_PASS_BEGIN(LoopIdiomRecognizeLegacyPass, "loop-idiom",
- "Recognize loop idioms", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(LoopIdiomRecognizeLegacyPass, "loop-idiom",
- "Recognize loop idioms", false, false)
-
-Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognizeLegacyPass(); }
-
static void deleteDeadInstruction(Instruction *I) {
I->replaceAllUsesWith(PoisonValue::get(I->getType()));
I->eraseFromParent();
@@ -1050,33 +983,6 @@ static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
return SE->getMinusSCEV(Start, Index);
}
-/// Compute trip count from the backedge taken count.
-static const SCEV *getTripCount(const SCEV *BECount, Type *IntPtr,
- Loop *CurLoop, const DataLayout *DL,
- ScalarEvolution *SE) {
- const SCEV *TripCountS = nullptr;
- // The # stored bytes is (BECount+1). Expand the trip count out to
- // pointer size if it isn't already.
- //
- // If we're going to need to zero extend the BE count, check if we can add
- // one to it prior to zero extending without overflow. Provided this is safe,
- // it allows better simplification of the +1.
- if (DL->getTypeSizeInBits(BECount->getType()) <
- DL->getTypeSizeInBits(IntPtr) &&
- SE->isLoopEntryGuardedByCond(
- CurLoop, ICmpInst::ICMP_NE, BECount,
- SE->getNegativeSCEV(SE->getOne(BECount->getType())))) {
- TripCountS = SE->getZeroExtendExpr(
- SE->getAddExpr(BECount, SE->getOne(BECount->getType()), SCEV::FlagNUW),
- IntPtr);
- } else {
- TripCountS = SE->getAddExpr(SE->getTruncateOrZeroExtend(BECount, IntPtr),
- SE->getOne(IntPtr), SCEV::FlagNUW);
- }
-
- return TripCountS;
-}
-
/// Compute the number of bytes as a SCEV from the backedge taken count.
///
/// This also maps the SCEV into the provided type and tries to handle the
@@ -1084,8 +990,8 @@ static const SCEV *getTripCount(const SCEV *BECount, Type *IntPtr,
static const SCEV *getNumBytes(const SCEV *BECount, Type *IntPtr,
const SCEV *StoreSizeSCEV, Loop *CurLoop,
const DataLayout *DL, ScalarEvolution *SE) {
- const SCEV *TripCountSCEV = getTripCount(BECount, IntPtr, CurLoop, DL, SE);
-
+ const SCEV *TripCountSCEV =
+ SE->getTripCountFromExitCount(BECount, IntPtr, CurLoop);
return SE->getMulExpr(TripCountSCEV,
SE->getTruncateOrZeroExtend(StoreSizeSCEV, IntPtr),
SCEV::FlagNUW);
@@ -1168,20 +1074,24 @@ bool LoopIdiomRecognize::processLoopStridedStore(
Value *NumBytes =
Expander.expandCodeFor(NumBytesS, IntIdxTy, Preheader->getTerminator());
+ if (!SplatValue && !isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16))
+ return Changed;
+
+ AAMDNodes AATags = TheStore->getAAMetadata();
+ for (Instruction *Store : Stores)
+ AATags = AATags.merge(Store->getAAMetadata());
+ if (auto CI = dyn_cast<ConstantInt>(NumBytes))
+ AATags = AATags.extendTo(CI->getZExtValue());
+ else
+ AATags = AATags.extendTo(-1);
+
CallInst *NewCall;
if (SplatValue) {
- AAMDNodes AATags = TheStore->getAAMetadata();
- for (Instruction *Store : Stores)
- AATags = AATags.merge(Store->getAAMetadata());
- if (auto CI = dyn_cast<ConstantInt>(NumBytes))
- AATags = AATags.extendTo(CI->getZExtValue());
- else
- AATags = AATags.extendTo(-1);
-
NewCall = Builder.CreateMemSet(
BasePtr, SplatValue, NumBytes, MaybeAlign(StoreAlignment),
/*isVolatile=*/false, AATags.TBAA, AATags.Scope, AATags.NoAlias);
- } else if (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16)) {
+ } else {
+ assert (isLibFuncEmittable(M, TLI, LibFunc_memset_pattern16));
// Everything is emitted in default address space
Type *Int8PtrTy = DestInt8PtrTy;
@@ -1199,8 +1109,17 @@ bool LoopIdiomRecognize::processLoopStridedStore(
GV->setAlignment(Align(16));
Value *PatternPtr = ConstantExpr::getBitCast(GV, Int8PtrTy);
NewCall = Builder.CreateCall(MSP, {BasePtr, PatternPtr, NumBytes});
- } else
- return Changed;
+
+ // Set the TBAA info if present.
+ if (AATags.TBAA)
+ NewCall->setMetadata(LLVMContext::MD_tbaa, AATags.TBAA);
+
+ if (AATags.Scope)
+ NewCall->setMetadata(LLVMContext::MD_alias_scope, AATags.Scope);
+
+ if (AATags.NoAlias)
+ NewCall->setMetadata(LLVMContext::MD_noalias, AATags.NoAlias);
+ }
NewCall->setDebugLoc(TheStore->getDebugLoc());
@@ -2471,7 +2390,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// intrinsic/shift we'll use are not cheap. Note that we are okay with *just*
// making the loop countable, even if nothing else changes.
IntrinsicCostAttributes Attrs(
- IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getTrue()});
+ IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getTrue()});
InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
if (Cost > TargetTransformInfo::TCC_Basic) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
@@ -2487,6 +2406,24 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// Ok, transform appears worthwhile.
MadeChange = true;
+ if (!isGuaranteedNotToBeUndefOrPoison(BitPos)) {
+ // BitMask may be computed from BitPos, Freeze BitPos so we can increase
+ // it's use count.
+ Instruction *InsertPt = nullptr;
+ if (auto *BitPosI = dyn_cast<Instruction>(BitPos))
+ InsertPt = BitPosI->getInsertionPointAfterDef();
+ else
+ InsertPt = &*DT->getRoot()->getFirstNonPHIOrDbgOrAlloca();
+ if (!InsertPt)
+ return false;
+ FreezeInst *BitPosFrozen =
+ new FreezeInst(BitPos, BitPos->getName() + ".fr", InsertPt);
+ BitPos->replaceUsesWithIf(BitPosFrozen, [BitPosFrozen](Use &U) {
+ return U.getUser() != BitPosFrozen;
+ });
+ BitPos = BitPosFrozen;
+ }
+
// Step 1: Compute the loop trip count.
Value *LowBitMask = Builder.CreateAdd(BitMask, Constant::getAllOnesValue(Ty),
@@ -2495,7 +2432,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
Builder.CreateOr(LowBitMask, BitMask, BitPos->getName() + ".mask");
Value *XMasked = Builder.CreateAnd(X, Mask, X->getName() + ".masked");
CallInst *XMaskedNumLeadingZeros = Builder.CreateIntrinsic(
- IntrID, Ty, {XMasked, /*is_zero_undef=*/Builder.getTrue()},
+ IntrID, Ty, {XMasked, /*is_zero_poison=*/Builder.getTrue()},
/*FMFSource=*/nullptr, XMasked->getName() + ".numleadingzeros");
Value *XMaskedNumActiveBits = Builder.CreateSub(
ConstantInt::get(Ty, Ty->getScalarSizeInBits()), XMaskedNumLeadingZeros,
@@ -2825,7 +2762,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
// intrinsic we'll use are not cheap. Note that we are okay with *just*
// making the loop countable, even if nothing else changes.
IntrinsicCostAttributes Attrs(
- IntrID, Ty, {UndefValue::get(Ty), /*is_zero_undef=*/Builder.getFalse()});
+ IntrID, Ty, {PoisonValue::get(Ty), /*is_zero_poison=*/Builder.getFalse()});
InstructionCost Cost = TTI->getIntrinsicInstrCost(Attrs, CostKind);
if (Cost > TargetTransformInfo::TCC_Basic) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE
@@ -2843,7 +2780,7 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
// Step 1: Compute the loop's final IV value / trip count.
CallInst *ValNumLeadingZeros = Builder.CreateIntrinsic(
- IntrID, Ty, {Val, /*is_zero_undef=*/Builder.getFalse()},
+ IntrID, Ty, {Val, /*is_zero_poison=*/Builder.getFalse()},
/*FMFSource=*/nullptr, Val->getName() + ".numleadingzeros");
Value *ValNumActiveBits = Builder.CreateSub(
ConstantInt::get(Ty, Ty->getScalarSizeInBits()), ValNumLeadingZeros,
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 0a7c62113c7f..91286ebcea33 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -30,20 +30,16 @@
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -187,8 +183,7 @@ static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
// if the direction matrix, after the same permutation is applied to its
// columns, has no ">" direction as the leftmost non-"=" direction in any row.
static bool isLexicographicallyPositive(std::vector<char> &DV) {
- for (unsigned Level = 0; Level < DV.size(); ++Level) {
- unsigned char Direction = DV[Level];
+ for (unsigned char Direction : DV) {
if (Direction == '<')
return true;
if (Direction == '>' || Direction == '*')
@@ -736,7 +731,6 @@ bool LoopInterchangeLegality::findInductionAndReductions(
if (!L->getLoopLatch() || !L->getLoopPredecessor())
return false;
for (PHINode &PHI : L->getHeader()->phis()) {
- RecurrenceDescriptor RD;
InductionDescriptor ID;
if (InductionDescriptor::isInductionPHI(&PHI, L, SE, ID))
Inductions.push_back(&PHI);
@@ -1105,8 +1099,7 @@ LoopInterchangeProfitability::isProfitablePerLoopCacheAnalysis(
// This is the new cost model returned from loop cache analysis.
// A smaller index means the loop should be placed an outer loop, and vice
// versa.
- if (CostMap.find(InnerLoop) != CostMap.end() &&
- CostMap.find(OuterLoop) != CostMap.end()) {
+ if (CostMap.contains(InnerLoop) && CostMap.contains(OuterLoop)) {
unsigned InnerIndex = 0, OuterIndex = 0;
InnerIndex = CostMap.find(InnerLoop)->second;
OuterIndex = CostMap.find(OuterLoop)->second;
@@ -1692,12 +1685,11 @@ bool LoopInterchangeTransform::adjustLoopBranches() {
// latch. In that case, we need to create LCSSA phis for them, because after
// interchanging they will be defined in the new inner loop and used in the
// new outer loop.
- IRBuilder<> Builder(OuterLoopHeader->getContext());
SmallVector<Instruction *, 4> MayNeedLCSSAPhis;
for (Instruction &I :
make_range(OuterLoopHeader->begin(), std::prev(OuterLoopHeader->end())))
MayNeedLCSSAPhis.push_back(&I);
- formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE, Builder);
+ formLCSSAForInstructions(MayNeedLCSSAPhis, *DT, *LI, SE);
return true;
}
@@ -1716,52 +1708,6 @@ bool LoopInterchangeTransform::adjustLoopLinks() {
return Changed;
}
-namespace {
-/// Main LoopInterchange Pass.
-struct LoopInterchangeLegacyPass : public LoopPass {
- static char ID;
-
- LoopInterchangeLegacyPass() : LoopPass(ID) {
- initializeLoopInterchangeLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DependenceAnalysisWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
-
- getLoopAnalysisUsage(AU);
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
- return false;
-
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *DI = &getAnalysis<DependenceAnalysisWrapperPass>().getDI();
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- std::unique_ptr<CacheCost> CC = nullptr;
- return LoopInterchange(SE, LI, DI, DT, CC, ORE).run(L);
- }
-};
-} // namespace
-
-char LoopInterchangeLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(LoopInterchangeLegacyPass, "loop-interchange",
- "Interchanges loops for cache reuse", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-
-INITIALIZE_PASS_END(LoopInterchangeLegacyPass, "loop-interchange",
- "Interchanges loops for cache reuse", false, false)
-
-Pass *llvm::createLoopInterchangePass() {
- return new LoopInterchangeLegacyPass();
-}
-
PreservedAnalyses LoopInterchangePass::run(LoopNest &LN,
LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
diff --git a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index b615a0a0a9c0..179ccde8d035 100644
--- a/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -46,13 +46,10 @@
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
@@ -91,8 +88,9 @@ struct StoreToLoadForwardingCandidate {
StoreToLoadForwardingCandidate(LoadInst *Load, StoreInst *Store)
: Load(Load), Store(Store) {}
- /// Return true if the dependence from the store to the load has a
- /// distance of one. E.g. A[i+1] = A[i]
+ /// Return true if the dependence from the store to the load has an
+ /// absolute distance of one.
+ /// E.g. A[i+1] = A[i] (or A[i-1] = A[i] for descending loop)
bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
Loop *L) const {
Value *LoadPtr = Load->getPointerOperand();
@@ -106,11 +104,19 @@ struct StoreToLoadForwardingCandidate {
DL.getTypeSizeInBits(getLoadStoreType(Store)) &&
"Should be a known dependence");
- // Currently we only support accesses with unit stride. FIXME: we should be
- // able to handle non unit stirde as well as long as the stride is equal to
- // the dependence distance.
- if (getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0) != 1 ||
- getPtrStride(PSE, LoadType, StorePtr, L).value_or(0) != 1)
+ int64_t StrideLoad = getPtrStride(PSE, LoadType, LoadPtr, L).value_or(0);
+ int64_t StrideStore = getPtrStride(PSE, LoadType, StorePtr, L).value_or(0);
+ if (!StrideLoad || !StrideStore || StrideLoad != StrideStore)
+ return false;
+
+ // TODO: This check for stride values other than 1 and -1 can be eliminated.
+ // However, doing so may cause the LoopAccessAnalysis to overcompensate,
+ // generating numerous non-wrap runtime checks that may undermine the
+ // benefits of load elimination. To safely implement support for non-unit
+ // strides, we would need to ensure either that the processed case does not
+ // require these additional checks, or improve the LAA to handle them more
+ // efficiently, or potentially both.
+ if (std::abs(StrideLoad) != 1)
return false;
unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
@@ -123,7 +129,7 @@ struct StoreToLoadForwardingCandidate {
auto *Dist = cast<SCEVConstant>(
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
const APInt &Val = Dist->getAPInt();
- return Val == TypeByteSize;
+ return Val == TypeByteSize * StrideLoad;
}
Value *getLoadPtr() const { return Load->getPointerOperand(); }
@@ -658,70 +664,6 @@ static bool eliminateLoadsAcrossLoops(Function &F, LoopInfo &LI,
return Changed;
}
-namespace {
-
-/// The pass. Most of the work is delegated to the per-loop
-/// LoadEliminationForLoop class.
-class LoopLoadElimination : public FunctionPass {
-public:
- static char ID;
-
- LoopLoadElimination() : FunctionPass(ID) {
- initializeLoopLoadEliminationPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &LAIs = getAnalysis<LoopAccessLegacyAnalysis>().getLAIs();
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- auto *BFI = (PSI && PSI->hasProfileSummary()) ?
- &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
- nullptr;
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-
- // Process each loop nest in the function.
- return eliminateLoadsAcrossLoops(F, LI, DT, BFI, PSI, SE, /*AC*/ nullptr,
- LAIs);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<LoopAccessLegacyAnalysis>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);
- }
-};
-
-} // end anonymous namespace
-
-char LoopLoadElimination::ID;
-
-static const char LLE_name[] = "Loop Load Elimination";
-
-INITIALIZE_PASS_BEGIN(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass)
-INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
-
-FunctionPass *llvm::createLoopLoadEliminationPass() {
- return new LoopLoadElimination();
-}
-
PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &LI = AM.getResult<LoopAnalysis>(F);
@@ -744,5 +686,7 @@ PreservedAnalyses LoopLoadEliminationPass::run(Function &F,
return PreservedAnalyses::all();
PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<LoopAnalysis>();
return PA;
}
diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
index c98b94b56e48..2c8a3351281b 100644
--- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp
@@ -59,7 +59,7 @@ void PassManager<Loop, LoopAnalysisManager, LoopStandardAnalysisResults &,
P->printPipeline(OS, MapClassName2PassName);
}
if (Idx + 1 < Size)
- OS << ",";
+ OS << ',';
}
}
@@ -193,7 +193,7 @@ void FunctionToLoopPassAdaptor::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
OS << (UseMemorySSA ? "loop-mssa(" : "loop(");
Pass->printPipeline(OS, MapClassName2PassName);
- OS << ")";
+ OS << ')';
}
PreservedAnalyses FunctionToLoopPassAdaptor::run(Function &F,
FunctionAnalysisManager &AM) {
diff --git a/llvm/lib/Transforms/Scalar/LoopPredication.cpp b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
index 49c0fff84d81..12852ae5c460 100644
--- a/llvm/lib/Transforms/Scalar/LoopPredication.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopPredication.cpp
@@ -623,7 +623,8 @@ std::optional<Value *> LoopPredication::widenICmpRangeCheckIncrementingLoop(
auto *FirstIterationCheck = expandCheck(Expander, Guard, RangeCheck.Pred,
GuardStart, GuardLimit);
IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
- return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
+ return Builder.CreateFreeze(
+ Builder.CreateAnd(FirstIterationCheck, LimitCheck));
}
std::optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
@@ -671,7 +672,8 @@ std::optional<Value *> LoopPredication::widenICmpRangeCheckDecrementingLoop(
auto *LimitCheck = expandCheck(Expander, Guard, LimitCheckPred, LatchLimit,
SE->getOne(Ty));
IRBuilder<> Builder(findInsertPt(Guard, {FirstIterationCheck, LimitCheck}));
- return Builder.CreateAnd(FirstIterationCheck, LimitCheck);
+ return Builder.CreateFreeze(
+ Builder.CreateAnd(FirstIterationCheck, LimitCheck));
}
static void normalizePredicate(ScalarEvolution *SE, Loop *L,
@@ -863,7 +865,19 @@ bool LoopPredication::widenWidenableBranchGuardConditions(
BI->setCondition(AllChecks);
if (InsertAssumesOfPredicatedGuardsConditions) {
Builder.SetInsertPoint(IfTrueBB, IfTrueBB->getFirstInsertionPt());
- Builder.CreateAssumption(Cond);
+ // If this block has other predecessors, we might not be able to use Cond.
+ // In this case, create a Phi where every other input is `true` and input
+ // from guard block is Cond.
+ Value *AssumeCond = Cond;
+ if (!IfTrueBB->getUniquePredecessor()) {
+ auto *GuardBB = BI->getParent();
+ auto *PN = Builder.CreatePHI(Cond->getType(), pred_size(IfTrueBB),
+ "assume.cond");
+ for (auto *Pred : predecessors(IfTrueBB))
+ PN->addIncoming(Pred == GuardBB ? Cond : Builder.getTrue(), Pred);
+ AssumeCond = PN;
+ }
+ Builder.CreateAssumption(AssumeCond);
}
RecursivelyDeleteTriviallyDeadInstructions(OldCond, nullptr /* TLI */, MSSAU);
assert(isGuardAsWidenableBranch(BI) &&
@@ -1161,6 +1175,11 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
if (ChangedLoop)
SE->forgetLoop(L);
+ // The insertion point for the widening should be at the widenably call, not
+ // at the WidenableBR. If we do this at the widenableBR, we can incorrectly
+ // change a loop-invariant condition to a loop-varying one.
+ auto *IP = cast<Instruction>(WidenableBR->getCondition());
+
// The use of umin(all analyzeable exits) instead of latch is subtle, but
// important for profitability. We may have a loop which hasn't been fully
// canonicalized just yet. If the exit we chose to widen is provably never
@@ -1170,21 +1189,9 @@ bool LoopPredication::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
const SCEV *MinEC = getMinAnalyzeableBackedgeTakenCount(*SE, *DT, L);
if (isa<SCEVCouldNotCompute>(MinEC) || MinEC->getType()->isPointerTy() ||
!SE->isLoopInvariant(MinEC, L) ||
- !Rewriter.isSafeToExpandAt(MinEC, WidenableBR))
+ !Rewriter.isSafeToExpandAt(MinEC, IP))
return ChangedLoop;
- // Subtlety: We need to avoid inserting additional uses of the WC. We know
- // that it can only have one transitive use at the moment, and thus moving
- // that use to just before the branch and inserting code before it and then
- // modifying the operand is legal.
- auto *IP = cast<Instruction>(WidenableBR->getCondition());
- // Here we unconditionally modify the IR, so after this point we should return
- // only `true`!
- IP->moveBefore(WidenableBR);
- if (MSSAU)
- if (auto *MUD = MSSAU->getMemorySSA()->getMemoryAccess(IP))
- MSSAU->moveToPlace(MUD, WidenableBR->getParent(),
- MemorySSA::BeforeTerminator);
Rewriter.setInsertPoint(IP);
IRBuilder<> B(IP);
diff --git a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index a0b3189c7e09..7f62526a4f6d 100644
--- a/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -39,13 +39,10 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopReroll.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -157,22 +154,6 @@ namespace {
IL_End
};
- class LoopRerollLegacyPass : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
-
- LoopRerollLegacyPass() : LoopPass(ID) {
- initializeLoopRerollLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- getLoopAnalysisUsage(AU);
- }
- };
-
class LoopReroll {
public:
LoopReroll(AliasAnalysis *AA, LoopInfo *LI, ScalarEvolution *SE,
@@ -490,17 +471,6 @@ namespace {
} // end anonymous namespace
-char LoopRerollLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(LoopRerollLegacyPass, "loop-reroll", "Reroll loops",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(LoopRerollLegacyPass, "loop-reroll", "Reroll loops", false,
- false)
-
-Pass *llvm::createLoopRerollPass() { return new LoopRerollLegacyPass; }
-
// Returns true if the provided instruction is used outside the given loop.
// This operates like Instruction::isUsedOutsideOfBlock, but considers PHIs in
// non-loop blocks to be outside the loop.
@@ -1700,21 +1670,6 @@ bool LoopReroll::runOnLoop(Loop *L) {
return Changed;
}
-bool LoopRerollLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipLoop(L))
- return false;
-
- auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
- *L->getHeader()->getParent());
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
- return LoopReroll(AA, LI, SE, TLI, DT, PreserveLCSSA).runOnLoop(L);
-}
-
PreservedAnalyses LoopRerollPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
diff --git a/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index ba735adc5b27..eee855058706 100644
--- a/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -43,6 +43,21 @@ LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication, bool PrepareForLTO)
: EnableHeaderDuplication(EnableHeaderDuplication),
PrepareForLTO(PrepareForLTO) {}
+void LoopRotatePass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<LoopRotatePass> *>(this)->printPipeline(
+ OS, MapClassName2PassName);
+ OS << "<";
+ if (!EnableHeaderDuplication)
+ OS << "no-";
+ OS << "header-duplication;";
+
+ if (!PrepareForLTO)
+ OS << "no-";
+ OS << "prepare-for-lto";
+ OS << ">";
+}
+
PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &) {
diff --git a/llvm/lib/Transforms/Scalar/LoopSink.cpp b/llvm/lib/Transforms/Scalar/LoopSink.cpp
index 21025b0bdb33..597c159682c5 100644
--- a/llvm/lib/Transforms/Scalar/LoopSink.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -177,13 +177,27 @@ static bool sinkInstruction(
SmallPtrSet<BasicBlock *, 2> BBs;
for (auto &U : I.uses()) {
Instruction *UI = cast<Instruction>(U.getUser());
- // We cannot sink I to PHI-uses.
- if (isa<PHINode>(UI))
- return false;
+
// We cannot sink I if it has uses outside of the loop.
if (!L.contains(LI.getLoopFor(UI->getParent())))
return false;
- BBs.insert(UI->getParent());
+
+ if (!isa<PHINode>(UI)) {
+ BBs.insert(UI->getParent());
+ continue;
+ }
+
+ // We cannot sink I to PHI-uses, try to look through PHI to find the incoming
+ // block of the value being used.
+ PHINode *PN = dyn_cast<PHINode>(UI);
+ BasicBlock *PhiBB = PN->getIncomingBlock(U);
+
+ // If value's incoming block is from loop preheader directly, there's no
+ // place to sink to, bailout.
+ if (L.getLoopPreheader() == PhiBB)
+ return false;
+
+ BBs.insert(PhiBB);
}
// findBBsToSinkInto is O(BBs.size() * ColdLoopBBs.size()). We cap the max
@@ -238,9 +252,11 @@ static bool sinkInstruction(
}
}
- // Replaces uses of I with IC in N
+ // Replaces uses of I with IC in N, except PHI-use which is being taken
+ // care of by defs in PHI's incoming blocks.
I.replaceUsesWithIf(IC, [N](Use &U) {
- return cast<Instruction>(U.getUser())->getParent() == N;
+ Instruction *UIToReplace = cast<Instruction>(U.getUser());
+ return UIToReplace->getParent() == N && !isa<PHINode>(UIToReplace);
});
// Replaces uses of I with IC in blocks dominated by N
replaceDominatedUsesWith(&I, IC, DT, N);
@@ -283,7 +299,7 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
return false;
MemorySSAUpdater MSSAU(&MSSA);
- SinkAndHoistLICMFlags LICMFlags(/*IsSink=*/true, &L, &MSSA);
+ SinkAndHoistLICMFlags LICMFlags(/*IsSink=*/true, L, MSSA);
bool Changed = false;
@@ -323,6 +339,11 @@ static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
}
PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
+ // Enable LoopSink only when runtime profile is available.
+ // With static profile, the sinking decision may be sub-optimal.
+ if (!F.hasProfileData())
+ return PreservedAnalyses::all();
+
LoopInfo &LI = FAM.getResult<LoopAnalysis>(F);
// Nothing to do if there are no loops.
if (LI.empty())
@@ -348,11 +369,6 @@ PreservedAnalyses LoopSinkPass::run(Function &F, FunctionAnalysisManager &FAM) {
if (!Preheader)
continue;
- // Enable LoopSink only when runtime profile is available.
- // With static profile, the sinking decision may be sub-optimal.
- if (!Preheader->getParent()->hasProfileData())
- continue;
-
// Note that we don't pass SCEV here because it is only used to invalidate
// loops in SCEV and we don't preserve (or request) SCEV at all making that
// unnecessary.
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 4c89f947d7fc..a4369b83e732 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -799,7 +799,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
/// value, and mutate S to point to a new SCEV with that value excluded.
static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
- if (C->getAPInt().getMinSignedBits() <= 64) {
+ if (C->getAPInt().getSignificantBits() <= 64) {
S = SE.getConstant(C->getType(), 0);
return C->getValue()->getSExtValue();
}
@@ -896,9 +896,14 @@ static bool isAddressUse(const TargetTransformInfo &TTI,
/// Return the type of the memory being accessed.
static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
Instruction *Inst, Value *OperandVal) {
- MemAccessTy AccessTy(Inst->getType(), MemAccessTy::UnknownAddressSpace);
+ MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->getContext());
+
+ // First get the type of memory being accessed.
+ if (Type *Ty = Inst->getAccessType())
+ AccessTy.MemTy = Ty;
+
+ // Then get the pointer address space.
if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- AccessTy.MemTy = SI->getOperand(0)->getType();
AccessTy.AddrSpace = SI->getPointerAddressSpace();
} else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
AccessTy.AddrSpace = LI->getPointerAddressSpace();
@@ -923,7 +928,6 @@ static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
II->getArgOperand(0)->getType()->getPointerAddressSpace();
break;
case Intrinsic::masked_store:
- AccessTy.MemTy = II->getOperand(0)->getType();
AccessTy.AddrSpace =
II->getArgOperand(1)->getType()->getPointerAddressSpace();
break;
@@ -976,6 +980,7 @@ static bool isHighCostExpansion(const SCEV *S,
switch (S->getSCEVType()) {
case scUnknown:
case scConstant:
+ case scVScale:
return false;
case scTruncate:
return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
@@ -1414,7 +1419,7 @@ void Cost::RateFormula(const Formula &F,
C.ImmCost += 64; // Handle symbolic values conservatively.
// TODO: This should probably be the pointer size.
else if (Offset != 0)
- C.ImmCost += APInt(64, Offset, true).getMinSignedBits();
+ C.ImmCost += APInt(64, Offset, true).getSignificantBits();
// Check with target if this offset with this instruction is
// specifically not supported.
@@ -2498,7 +2503,7 @@ LSRInstance::OptimizeLoopTermCond() {
if (C->isOne() || C->isMinusOne())
goto decline_post_inc;
// Avoid weird situations.
- if (C->getValue().getMinSignedBits() >= 64 ||
+ if (C->getValue().getSignificantBits() >= 64 ||
C->getValue().isMinSignedValue())
goto decline_post_inc;
// Check for possible scaled-address reuse.
@@ -2508,13 +2513,13 @@ LSRInstance::OptimizeLoopTermCond() {
int64_t Scale = C->getSExtValue();
if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
/*BaseOffset=*/0,
- /*HasBaseReg=*/false, Scale,
+ /*HasBaseReg=*/true, Scale,
AccessTy.AddrSpace))
goto decline_post_inc;
Scale = -Scale;
if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
/*BaseOffset=*/0,
- /*HasBaseReg=*/false, Scale,
+ /*HasBaseReg=*/true, Scale,
AccessTy.AddrSpace))
goto decline_post_inc;
}
@@ -2660,8 +2665,7 @@ LSRUse *
LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
const LSRUse &OrigLU) {
// Search all uses for the formula. This could be more clever.
- for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
- LSRUse &LU = Uses[LUIdx];
+ for (LSRUse &LU : Uses) {
// Check whether this use is close enough to OrigLU, to see whether it's
// worthwhile looking through its formulae.
// Ignore ICmpZero uses because they may contain formulae generated by
@@ -2703,6 +2707,8 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
SmallVector<const SCEV *, 4> Worklist;
for (const IVStrideUse &U : IU) {
const SCEV *Expr = IU.getExpr(U);
+ if (!Expr)
+ continue;
// Collect interesting types.
Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
@@ -2740,13 +2746,13 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
SE, true))) {
- if (Factor->getAPInt().getMinSignedBits() <= 64 && !Factor->isZero())
+ if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
Factors.insert(Factor->getAPInt().getSExtValue());
} else if (const SCEVConstant *Factor =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
NewStride,
SE, true))) {
- if (Factor->getAPInt().getMinSignedBits() <= 64 && !Factor->isZero())
+ if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
Factors.insert(Factor->getAPInt().getSExtValue());
}
}
@@ -2812,9 +2818,10 @@ static bool isCompatibleIVType(Value *LVal, Value *RVal) {
/// SCEVUnknown, we simply return the rightmost SCEV operand.
static const SCEV *getExprBase(const SCEV *S) {
switch (S->getSCEVType()) {
- default: // uncluding scUnknown.
+ default: // including scUnknown.
return S;
case scConstant:
+ case scVScale:
return nullptr;
case scTruncate:
return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
@@ -3175,7 +3182,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
if (!IncConst || !isAddressUse(TTI, UserInst, Operand))
return false;
- if (IncConst->getAPInt().getMinSignedBits() > 64)
+ if (IncConst->getAPInt().getSignificantBits() > 64)
return false;
MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
@@ -3320,6 +3327,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
}
const SCEV *S = IU.getExpr(U);
+ if (!S)
+ continue;
PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
// Equality (== and !=) ICmps are special. We can rewrite (i == N) as
@@ -3352,6 +3361,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
+ if (!N)
+ continue;
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
} else if (L->isLoopInvariant(NV) &&
@@ -3366,6 +3377,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// SCEV can't compute the difference of two unknown pointers.
N = SE.getUnknown(NV);
N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
+ if (!N)
+ continue;
Kind = LSRUse::ICmpZero;
S = SE.getMinusSCEV(N, S);
assert(!isa<SCEVCouldNotCompute>(S));
@@ -3494,8 +3507,8 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
// Look for instructions defined outside the loop.
if (L->contains(Inst)) continue;
- } else if (isa<UndefValue>(V))
- // Undef doesn't have a live range, so it doesn't matter.
+ } else if (isa<Constant>(V))
+ // Constants can be re-materialized.
continue;
for (const Use &U : V->uses()) {
const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
@@ -4137,6 +4150,29 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
}
}
+/// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops.
+/// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then
+/// perform the extension/truncate and normalize again, as the normalized form
+/// can result in folds that are not valid in the post-inc use contexts. The
+/// expressions for all PostIncLoopSets must match, otherwise return nullptr.
+static const SCEV *
+getAnyExtendConsideringPostIncUses(ArrayRef<PostIncLoopSet> Loops,
+ const SCEV *Expr, Type *ToTy,
+ ScalarEvolution &SE) {
+ const SCEV *Result = nullptr;
+ for (auto &L : Loops) {
+ auto *DenormExpr = denormalizeForPostIncUse(Expr, L, SE);
+ const SCEV *NewDenormExpr = SE.getAnyExtendExpr(DenormExpr, ToTy);
+ const SCEV *New = normalizeForPostIncUse(NewDenormExpr, L, SE);
+ if (!New || (Result && New != Result))
+ return nullptr;
+ Result = New;
+ }
+
+ assert(Result && "failed to create expression");
+ return Result;
+}
+
/// Generate reuse formulae from different IV types.
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Don't bother truncating symbolic values.
@@ -4156,6 +4192,10 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
[](const SCEV *S) { return S->getType()->isPointerTy(); }))
return;
+ SmallVector<PostIncLoopSet> Loops;
+ for (auto &LF : LU.Fixups)
+ Loops.push_back(LF.PostIncLoops);
+
for (Type *SrcTy : Types) {
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
Formula F = Base;
@@ -4165,15 +4205,17 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
// initial node (maybe due to depth limitations), but it can do them while
// taking ext.
if (F.ScaledReg) {
- const SCEV *NewScaledReg = SE.getAnyExtendExpr(F.ScaledReg, SrcTy);
- if (NewScaledReg->isZero())
- continue;
+ const SCEV *NewScaledReg =
+ getAnyExtendConsideringPostIncUses(Loops, F.ScaledReg, SrcTy, SE);
+ if (!NewScaledReg || NewScaledReg->isZero())
+ continue;
F.ScaledReg = NewScaledReg;
}
bool HasZeroBaseReg = false;
for (const SCEV *&BaseReg : F.BaseRegs) {
- const SCEV *NewBaseReg = SE.getAnyExtendExpr(BaseReg, SrcTy);
- if (NewBaseReg->isZero()) {
+ const SCEV *NewBaseReg =
+ getAnyExtendConsideringPostIncUses(Loops, BaseReg, SrcTy, SE);
+ if (!NewBaseReg || NewBaseReg->isZero()) {
HasZeroBaseReg = true;
break;
}
@@ -4379,8 +4421,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
if ((C->getAPInt() + NewF.BaseOffset)
.abs()
.slt(std::abs(NewF.BaseOffset)) &&
- (C->getAPInt() + NewF.BaseOffset).countTrailingZeros() >=
- countTrailingZeros<uint64_t>(NewF.BaseOffset))
+ (C->getAPInt() + NewF.BaseOffset).countr_zero() >=
+ (unsigned)llvm::countr_zero<uint64_t>(NewF.BaseOffset))
goto skip_formula;
// Ok, looks good.
@@ -4982,6 +5024,32 @@ void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
+// Check if Best and Reg are SCEVs separated by a constant amount C, and if so
+// would the addressing offset +C would be legal where the negative offset -C is
+// not.
+static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI,
+ ScalarEvolution &SE, const SCEV *Best,
+ const SCEV *Reg,
+ MemAccessTy AccessType) {
+ if (Best->getType() != Reg->getType() ||
+ (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&
+ cast<SCEVAddRecExpr>(Best)->getLoop() !=
+ cast<SCEVAddRecExpr>(Reg)->getLoop()))
+ return false;
+ const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Best, Reg));
+ if (!Diff)
+ return false;
+
+ return TTI.isLegalAddressingMode(
+ AccessType.MemTy, /*BaseGV=*/nullptr,
+ /*BaseOffset=*/Diff->getAPInt().getSExtValue(),
+ /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace) &&
+ !TTI.isLegalAddressingMode(
+ AccessType.MemTy, /*BaseGV=*/nullptr,
+ /*BaseOffset=*/-Diff->getAPInt().getSExtValue(),
+ /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace);
+}
+
/// Pick a register which seems likely to be profitable, and then in any use
/// which has any reference to that register, delete all formulae which do not
/// reference that register.
@@ -5010,6 +5078,19 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
Best = Reg;
BestNum = Count;
}
+
+ // If the scores are the same, but the Reg is simpler for the target
+ // (for example {x,+,1} as opposed to {x+C,+,1}, where the target can
+ // handle +C but not -C), opt for the simpler formula.
+ if (Count == BestNum) {
+ int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
+ if (LUIdx >= 0 && Uses[LUIdx].Kind == LSRUse::Address &&
+ IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg,
+ Uses[LUIdx].AccessTy)) {
+ Best = Reg;
+ BestNum = Count;
+ }
+ }
}
}
assert(Best && "Failed to find best LSRUse candidate");
@@ -5497,6 +5578,13 @@ void LSRInstance::RewriteForPHI(
PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
DenseMap<BasicBlock *, Value *> Inserted;
+
+ // Inserting instructions in the loop and using them as PHI's input could
+ // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
+ // corresponding incoming block is not loop exiting). So collect all such
+ // instructions to form LCSSA for them later.
+ SmallVector<Instruction *, 4> InsertedNonLCSSAInsts;
+
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
bool needUpdateFixups = false;
@@ -5562,6 +5650,13 @@ void LSRInstance::RewriteForPHI(
FullV, LF.OperandValToReplace->getType(),
"tmp", BB->getTerminator());
+ // If the incoming block for this value is not in the loop, it means the
+ // current PHI is not in a loop exit, so we must create a LCSSA PHI for
+ // the inserted value.
+ if (auto *I = dyn_cast<Instruction>(FullV))
+ if (L->contains(I) && !L->contains(BB))
+ InsertedNonLCSSAInsts.push_back(I);
+
PN->setIncomingValue(i, FullV);
Pair.first->second = FullV;
}
@@ -5604,6 +5699,8 @@ void LSRInstance::RewriteForPHI(
}
}
}
+
+ formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE);
}
/// Emit instructions for the leading candidate expression for this LSRUse (this
@@ -5643,6 +5740,36 @@ void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
DeadInsts.emplace_back(OperandIsInstr);
}
+// Trying to hoist the IVInc to loop header if all IVInc users are in
+// the loop header. It will help backend to generate post index load/store
+// when the latch block is different from loop header block.
+static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,
+ const LSRUse &LU, Instruction *IVIncInsertPos,
+ Loop *L) {
+ if (LU.Kind != LSRUse::Address)
+ return false;
+
+ // For now this code do the conservative optimization, only work for
+ // the header block. Later we can hoist the IVInc to the block post
+ // dominate all users.
+ BasicBlock *LHeader = L->getHeader();
+ if (IVIncInsertPos->getParent() == LHeader)
+ return false;
+
+ if (!Fixup.OperandValToReplace ||
+ any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) {
+ Instruction *UI = cast<Instruction>(U);
+ return UI->getParent() != LHeader;
+ }))
+ return false;
+
+ Instruction *I = Fixup.UserInst;
+ Type *Ty = I->getType();
+ return Ty->isIntegerTy() &&
+ ((isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||
+ (isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty)));
+}
+
/// Rewrite all the fixup locations with new values, following the chosen
/// solution.
void LSRInstance::ImplementSolution(
@@ -5651,8 +5778,6 @@ void LSRInstance::ImplementSolution(
// we can remove them after we are done working.
SmallVector<WeakTrackingVH, 16> DeadInsts;
- Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
-
// Mark phi nodes that terminate chains so the expander tries to reuse them.
for (const IVChain &Chain : IVChainVec) {
if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
@@ -5662,6 +5787,11 @@ void LSRInstance::ImplementSolution(
// Expand the new value definitions and update the users.
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
+ Instruction *InsertPos =
+ canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L)
+ ? L->getHeader()->getTerminator()
+ : IVIncInsertPos;
+ Rewriter.setIVIncInsertPos(L, InsertPos);
Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);
Changed = true;
}
@@ -5994,7 +6124,7 @@ struct SCEVDbgValueBuilder {
}
bool pushConst(const SCEVConstant *C) {
- if (C->getAPInt().getMinSignedBits() > 64)
+ if (C->getAPInt().getSignificantBits() > 64)
return false;
Expr.push_back(llvm::dwarf::DW_OP_consts);
Expr.push_back(C->getAPInt().getSExtValue());
@@ -6083,7 +6213,7 @@ struct SCEVDbgValueBuilder {
/// SCEV constant value is an identity function.
bool isIdentityFunction(uint64_t Op, const SCEV *S) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
- if (C->getAPInt().getMinSignedBits() > 64)
+ if (C->getAPInt().getSignificantBits() > 64)
return false;
int64_t I = C->getAPInt().getSExtValue();
switch (Op) {
@@ -6338,13 +6468,13 @@ static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec,
}
}
-/// Cached location ops may be erased during LSR, in which case an undef is
+/// Cached location ops may be erased during LSR, in which case a poison is
/// required when restoring from the cache. The type of that location is no
-/// longer available, so just use int8. The undef will be replaced by one or
+/// longer available, so just use int8. The poison will be replaced by one or
/// more locations later when a SCEVDbgValueBuilder selects alternative
/// locations to use for the salvage.
-static Value *getValueOrUndef(WeakVH &VH, LLVMContext &C) {
- return (VH) ? VH : UndefValue::get(llvm::Type::getInt8Ty(C));
+static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) {
+ return (VH) ? VH : PoisonValue::get(llvm::Type::getInt8Ty(C));
}
/// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
@@ -6363,12 +6493,12 @@ static void restorePreTransformState(DVIRecoveryRec &DVIRec) {
// this case was not present before, so force the location back to a single
// uncontained Value.
Value *CachedValue =
- getValueOrUndef(DVIRec.LocationOps[0], DVIRec.DVI->getContext());
+ getValueOrPoison(DVIRec.LocationOps[0], DVIRec.DVI->getContext());
DVIRec.DVI->setRawLocation(ValueAsMetadata::get(CachedValue));
} else {
SmallVector<ValueAsMetadata *, 3> MetadataLocs;
for (WeakVH VH : DVIRec.LocationOps) {
- Value *CachedValue = getValueOrUndef(VH, DVIRec.DVI->getContext());
+ Value *CachedValue = getValueOrPoison(VH, DVIRec.DVI->getContext());
MetadataLocs.push_back(ValueAsMetadata::get(CachedValue));
}
auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
@@ -6431,7 +6561,7 @@ static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
// less DWARF ops than an iteration count-based expression.
if (std::optional<APInt> Offset =
SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) {
- if (Offset->getMinSignedBits() <= 64)
+ if (Offset->getSignificantBits() <= 64)
SalvageExpr->createOffsetExpr(Offset->getSExtValue(), LSRInductionVar);
} else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
SE))
@@ -6607,7 +6737,7 @@ static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
return nullptr;
}
-static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *>>
+static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
const LoopInfo &LI) {
if (!L->isInnermost()) {
@@ -6626,16 +6756,13 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
}
BasicBlock *LoopLatch = L->getLoopLatch();
-
- // TODO: Can we do something for greater than and less than?
- // Terminating condition is foldable when it is an eq/ne icmp
- BranchInst *BI = cast<BranchInst>(LoopLatch->getTerminator());
- if (BI->isUnconditional())
+ BranchInst *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
+ if (!BI || BI->isUnconditional())
return std::nullopt;
- Value *TermCond = BI->getCondition();
- if (!isa<ICmpInst>(TermCond) || !cast<ICmpInst>(TermCond)->isEquality()) {
- LLVM_DEBUG(dbgs() << "Cannot fold on branching condition that is not an "
- "ICmpInst::eq / ICmpInst::ne\n");
+ auto *TermCond = dyn_cast<ICmpInst>(BI->getCondition());
+ if (!TermCond) {
+ LLVM_DEBUG(
+ dbgs() << "Cannot fold on branching condition that is not an ICmpInst");
return std::nullopt;
}
if (!TermCond->hasOneUse()) {
@@ -6645,89 +6772,42 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
return std::nullopt;
}
- // For `IsToFold`, a primary IV can be replaced by other affine AddRec when it
- // is only used by the terminating condition. To check for this, we may need
- // to traverse through a chain of use-def until we can examine the final
- // usage.
- // *----------------------*
- // *---->| LoopHeader: |
- // | | PrimaryIV = phi ... |
- // | *----------------------*
- // | |
- // | |
- // | chain of
- // | single use
- // used by |
- // phi |
- // | Value
- // | / \
- // | chain of chain of
- // | single use single use
- // | / \
- // | / \
- // *- Value Value --> used by terminating condition
- auto IsToFold = [&](PHINode &PN) -> bool {
- Value *V = &PN;
-
- while (V->getNumUses() == 1)
- V = *V->user_begin();
-
- if (V->getNumUses() != 2)
- return false;
+ BinaryOperator *LHS = dyn_cast<BinaryOperator>(TermCond->getOperand(0));
+ Value *RHS = TermCond->getOperand(1);
+ if (!LHS || !L->isLoopInvariant(RHS))
+ // We could pattern match the inverse form of the icmp, but that is
+ // non-canonical, and this pass is running *very* late in the pipeline.
+ return std::nullopt;
- Value *VToPN = nullptr;
- Value *VToTermCond = nullptr;
- for (User *U : V->users()) {
- while (U->getNumUses() == 1) {
- if (isa<PHINode>(U))
- VToPN = U;
- if (U == TermCond)
- VToTermCond = U;
- U = *U->user_begin();
- }
- }
- return VToPN && VToTermCond;
- };
+ // Find the IV used by the current exit condition.
+ PHINode *ToFold;
+ Value *ToFoldStart, *ToFoldStep;
+ if (!matchSimpleRecurrence(LHS, ToFold, ToFoldStart, ToFoldStep))
+ return std::nullopt;
- // If this is an IV which we could replace the terminating condition, return
- // the final value of the alternative IV on the last iteration.
- auto getAlternateIVEnd = [&](PHINode &PN) -> const SCEV * {
- // FIXME: This does not properly account for overflow.
- const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
- const SCEV *BECount = SE.getBackedgeTakenCount(L);
- const SCEV *TermValueS = SE.getAddExpr(
- AddRec->getOperand(0),
- SE.getTruncateOrZeroExtend(
- SE.getMulExpr(
- AddRec->getOperand(1),
- SE.getTruncateOrZeroExtend(
- SE.getAddExpr(BECount, SE.getOne(BECount->getType())),
- AddRec->getOperand(1)->getType())),
- AddRec->getOperand(0)->getType()));
- const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
- SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
- if (!Expander.isSafeToExpand(TermValueS)) {
- LLVM_DEBUG(
- dbgs() << "Is not safe to expand terminating value for phi node" << PN
- << "\n");
- return nullptr;
- }
- return TermValueS;
- };
+ // If that IV isn't dead after we rewrite the exit condition in terms of
+ // another IV, there's no point in doing the transform.
+ if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond))
+ return std::nullopt;
+
+ const SCEV *BECount = SE.getBackedgeTakenCount(L);
+ const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+ SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
- PHINode *ToFold = nullptr;
PHINode *ToHelpFold = nullptr;
const SCEV *TermValueS = nullptr;
-
+ bool MustDropPoison = false;
for (PHINode &PN : L->getHeader()->phis()) {
+ if (ToFold == &PN)
+ continue;
+
if (!SE.isSCEVable(PN.getType())) {
LLVM_DEBUG(dbgs() << "IV of phi '" << PN
<< "' is not SCEV-able, not qualified for the "
"terminating condition folding.\n");
continue;
}
- const SCEV *S = SE.getSCEV(&PN);
- const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S);
+ const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
// Only speculate on affine AddRec
if (!AddRec || !AddRec->isAffine()) {
LLVM_DEBUG(dbgs() << "SCEV of phi '" << PN
@@ -6736,12 +6816,63 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
continue;
}
- if (IsToFold(PN))
- ToFold = &PN;
- else if (auto P = getAlternateIVEnd(PN)) {
- ToHelpFold = &PN;
- TermValueS = P;
+ // Check that we can compute the value of AddRec on the exiting iteration
+ // without soundness problems. evaluateAtIteration internally needs
+ // to multiply the stride of the iteration number - which may wrap around.
+ // The issue here is subtle because computing the result accounting for
+ // wrap is insufficient. In order to use the result in an exit test, we
+ // must also know that AddRec doesn't take the same value on any previous
+ // iteration. The simplest case to consider is a candidate IV which is
+ // narrower than the trip count (and thus original IV), but this can
+ // also happen due to non-unit strides on the candidate IVs.
+ if (!AddRec->hasNoSelfWrap())
+ continue;
+
+ const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE);
+ const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE);
+ if (!Expander.isSafeToExpand(TermValueSLocal)) {
+ LLVM_DEBUG(
+ dbgs() << "Is not safe to expand terminating value for phi node" << PN
+ << "\n");
+ continue;
}
+
+ // The candidate IV may have been otherwise dead and poison from the
+ // very first iteration. If we can't disprove that, we can't use the IV.
+ if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) {
+ LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV "
+ << PN << "\n");
+ continue;
+ }
+
+ // The candidate IV may become poison on the last iteration. If this
+ // value is not branched on, this is a well defined program. We're
+ // about to add a new use to this IV, and we have to ensure we don't
+ // insert UB which didn't previously exist.
+ bool MustDropPoisonLocal = false;
+ Instruction *PostIncV =
+ cast<Instruction>(PN.getIncomingValueForBlock(LoopLatch));
+ if (!mustExecuteUBIfPoisonOnPathTo(PostIncV, LoopLatch->getTerminator(),
+ &DT)) {
+ LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use"
+ << PN << "\n");
+
+ // If this is a complex recurrance with multiple instructions computing
+ // the backedge value, we might need to strip poison flags from all of
+ // them.
+ if (PostIncV->getOperand(0) != &PN)
+ continue;
+
+ // In order to perform the transform, we need to drop the poison generating
+ // flags on this instruction (if any).
+ MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags();
+ }
+
+ // We pick the last legal alternate IV. We could expore choosing an optimal
+ // alternate IV if we had a decent heuristic to do so.
+ ToHelpFold = &PN;
+ TermValueS = TermValueSLocal;
+ MustDropPoison = MustDropPoisonLocal;
}
LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs()
@@ -6757,7 +6888,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
if (!ToFold || !ToHelpFold)
return std::nullopt;
- return std::make_tuple(ToFold, ToHelpFold, TermValueS);
+ return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison);
}
static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
@@ -6820,7 +6951,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
if (AllowTerminatingConditionFoldingAfterLSR) {
if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI)) {
- auto [ToFold, ToHelpFold, TermValueS] = *Opt;
+ auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
Changed = true;
NumTermFold++;
@@ -6838,6 +6969,10 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
(void)StartValue;
Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch);
+ // See comment in canFoldTermCondOfLoop on why this is sufficient.
+ if (MustDrop)
+ cast<Instruction>(LoopValue)->dropPoisonGeneratingFlags();
+
// SCEVExpander for both use in preheader and latch
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
@@ -6859,11 +6994,12 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
BranchInst *BI = cast<BranchInst>(LoopLatch->getTerminator());
ICmpInst *OldTermCond = cast<ICmpInst>(BI->getCondition());
IRBuilder<> LatchBuilder(LoopLatch->getTerminator());
- // FIXME: We are adding a use of an IV here without account for poison safety.
- // This is incorrect.
- Value *NewTermCond = LatchBuilder.CreateICmp(
- OldTermCond->getPredicate(), LoopValue, TermValue,
- "lsr_fold_term_cond.replaced_term_cond");
+ Value *NewTermCond =
+ LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue,
+ "lsr_fold_term_cond.replaced_term_cond");
+ // Swap successors to exit loop body if IV equals to new TermValue
+ if (BI->getSuccessor(0) == L->getHeader())
+ BI->swapSuccessors();
LLVM_DEBUG(dbgs() << "Old term-cond:\n"
<< *OldTermCond << "\n"
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
index 0ae26b494c5a..9c6e4ebf62a9 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollAndJamPass.cpp
@@ -32,15 +32,11 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
@@ -460,76 +456,6 @@ static bool tryToUnrollAndJamLoop(LoopNest &LN, DominatorTree &DT, LoopInfo &LI,
return DidSomething;
}
-namespace {
-
-class LoopUnrollAndJam : public LoopPass {
-public:
- static char ID; // Pass ID, replacement for typeid
- unsigned OptLevel;
-
- LoopUnrollAndJam(int OptLevel = 2) : LoopPass(ID), OptLevel(OptLevel) {
- initializeLoopUnrollAndJamPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override {
- if (skipLoop(L))
- return false;
-
- auto *F = L->getHeader()->getParent();
- auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &DI = getAnalysis<DependenceAnalysisWrapperPass>().getDI();
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
- auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F);
-
- LoopUnrollResult Result =
- tryToUnrollAndJamLoop(L, DT, LI, SE, TTI, AC, DI, ORE, OptLevel);
-
- if (Result == LoopUnrollResult::FullyUnrolled)
- LPM.markLoopAsDeleted(*L);
-
- return Result != LoopUnrollResult::Unmodified;
- }
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DependenceAnalysisWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- getLoopAnalysisUsage(AU);
- }
-};
-
-} // end anonymous namespace
-
-char LoopUnrollAndJam::ID = 0;
-
-INITIALIZE_PASS_BEGIN(LoopUnrollAndJam, "loop-unroll-and-jam",
- "Unroll and Jam loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(LoopUnrollAndJam, "loop-unroll-and-jam",
- "Unroll and Jam loops", false, false)
-
-Pass *llvm::createLoopUnrollAndJamPass(int OptLevel) {
- return new LoopUnrollAndJam(OptLevel);
-}
-
PreservedAnalyses LoopUnrollAndJamPass::run(LoopNest &LN,
LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 1a6065cb3f1a..335b489d3cb2 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -1124,7 +1124,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
const TargetTransformInfo &TTI, AssumptionCache &AC,
OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, bool PreserveLCSSA, int OptLevel,
- bool OnlyWhenForced, bool ForgetAllSCEV,
+ bool OnlyFullUnroll, bool OnlyWhenForced, bool ForgetAllSCEV,
std::optional<unsigned> ProvidedCount,
std::optional<unsigned> ProvidedThreshold,
std::optional<bool> ProvidedAllowPartial,
@@ -1133,6 +1133,7 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
std::optional<bool> ProvidedAllowPeeling,
std::optional<bool> ProvidedAllowProfileBasedPeeling,
std::optional<unsigned> ProvidedFullUnrollMaxCount) {
+
LLVM_DEBUG(dbgs() << "Loop Unroll: F["
<< L->getHeader()->getParent()->getName() << "] Loop %"
<< L->getHeader()->getName() << "\n");
@@ -1304,6 +1305,13 @@ tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution &SE,
return LoopUnrollResult::Unmodified;
}
+ // Do not attempt partial/runtime unrolling in FullLoopUnrolling
+ if (OnlyFullUnroll && !(UP.Count >= MaxTripCount)) {
+ LLVM_DEBUG(
+ dbgs() << "Not attempting partial/runtime unroll in FullLoopUnroll.\n");
+ return LoopUnrollResult::Unmodified;
+ }
+
// At this point, UP.Runtime indicates that run-time unrolling is allowed.
// However, we only want to actually perform it if we don't know the trip
// count and the unroll count doesn't divide the known trip multiple.
@@ -1420,10 +1428,10 @@ public:
LoopUnrollResult Result = tryToUnrollLoop(
L, DT, LI, SE, TTI, AC, ORE, nullptr, nullptr, PreserveLCSSA, OptLevel,
- OnlyWhenForced, ForgetAllSCEV, ProvidedCount, ProvidedThreshold,
- ProvidedAllowPartial, ProvidedRuntime, ProvidedUpperBound,
- ProvidedAllowPeeling, ProvidedAllowProfileBasedPeeling,
- ProvidedFullUnrollMaxCount);
+ /*OnlyFullUnroll*/ false, OnlyWhenForced, ForgetAllSCEV, ProvidedCount,
+ ProvidedThreshold, ProvidedAllowPartial, ProvidedRuntime,
+ ProvidedUpperBound, ProvidedAllowPeeling,
+ ProvidedAllowProfileBasedPeeling, ProvidedFullUnrollMaxCount);
if (Result == LoopUnrollResult::FullyUnrolled)
LPM.markLoopAsDeleted(*L);
@@ -1469,12 +1477,6 @@ Pass *llvm::createLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
AllowPeeling == -1 ? std::nullopt : std::optional<bool>(AllowPeeling));
}
-Pass *llvm::createSimpleLoopUnrollPass(int OptLevel, bool OnlyWhenForced,
- bool ForgetAllSCEV) {
- return createLoopUnrollPass(OptLevel, OnlyWhenForced, ForgetAllSCEV, -1, -1,
- 0, 0, 0, 1);
-}
-
PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &Updater) {
@@ -1497,8 +1499,8 @@ PreservedAnalyses LoopFullUnrollPass::run(Loop &L, LoopAnalysisManager &AM,
bool Changed =
tryToUnrollLoop(&L, AR.DT, &AR.LI, AR.SE, AR.TTI, AR.AC, ORE,
/*BFI*/ nullptr, /*PSI*/ nullptr,
- /*PreserveLCSSA*/ true, OptLevel, OnlyWhenForced,
- ForgetSCEV, /*Count*/ std::nullopt,
+ /*PreserveLCSSA*/ true, OptLevel, /*OnlyFullUnroll*/ true,
+ OnlyWhenForced, ForgetSCEV, /*Count*/ std::nullopt,
/*Threshold*/ std::nullopt, /*AllowPartial*/ false,
/*Runtime*/ false, /*UpperBound*/ false,
/*AllowPeeling*/ true,
@@ -1623,8 +1625,9 @@ PreservedAnalyses LoopUnrollPass::run(Function &F,
// flavors of unrolling during construction time (by setting UnrollOpts).
LoopUnrollResult Result = tryToUnrollLoop(
&L, DT, &LI, SE, TTI, AC, ORE, BFI, PSI,
- /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, UnrollOpts.OnlyWhenForced,
- UnrollOpts.ForgetSCEV, /*Count*/ std::nullopt,
+ /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*OnlyFullUnroll*/ false,
+ UnrollOpts.OnlyWhenForced, UnrollOpts.ForgetSCEV,
+ /*Count*/ std::nullopt,
/*Threshold*/ std::nullopt, UnrollOpts.AllowPartial,
UnrollOpts.AllowRuntime, UnrollOpts.AllowUpperBound, LocalAllowPeeling,
UnrollOpts.AllowProfileBasedPeeling, UnrollOpts.FullUnrollMaxCount);
@@ -1651,7 +1654,7 @@ void LoopUnrollPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<LoopUnrollPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
if (UnrollOpts.AllowPartial != std::nullopt)
OS << (*UnrollOpts.AllowPartial ? "" : "no-") << "partial;";
if (UnrollOpts.AllowPeeling != std::nullopt)
@@ -1664,7 +1667,7 @@ void LoopUnrollPass::printPipeline(
OS << (*UnrollOpts.AllowProfileBasedPeeling ? "" : "no-")
<< "profile-peeling;";
if (UnrollOpts.FullUnrollMaxCount != std::nullopt)
- OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ";";
- OS << "O" << UnrollOpts.OptLevel;
- OS << ">";
+ OS << "full-unroll-max=" << UnrollOpts.FullUnrollMaxCount << ';';
+ OS << 'O' << UnrollOpts.OptLevel;
+ OS << '>';
}
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 848be25a2fe0..13e06c79d0d7 100644
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -77,13 +77,10 @@
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/LoopVersioning.h"
@@ -113,33 +110,6 @@ static cl::opt<unsigned> LVLoopDepthThreshold(
namespace {
-struct LoopVersioningLICMLegacyPass : public LoopPass {
- static char ID;
-
- LoopVersioningLICMLegacyPass() : LoopPass(ID) {
- initializeLoopVersioningLICMLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnLoop(Loop *L, LPPassManager &LPM) override;
-
- StringRef getPassName() const override { return "Loop Versioning for LICM"; }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequiredID(LCSSAID);
- AU.addRequired<LoopAccessLegacyAnalysis>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- }
-};
-
struct LoopVersioningLICM {
// We don't explicitly pass in LoopAccessInfo to the constructor since the
// loop versioning might return early due to instructions that are not safe
@@ -563,21 +533,6 @@ void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) {
}
}
-bool LoopVersioningLICMLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
- if (skipLoop(L))
- return false;
-
- AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- OptimizationRemarkEmitter *ORE =
- &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &LAIs = getAnalysis<LoopAccessLegacyAnalysis>().getLAIs();
-
- return LoopVersioningLICM(AA, SE, ORE, LAIs, LI, L).run(DT);
-}
-
bool LoopVersioningLICM::run(DominatorTree *DT) {
// Do not do the transformation if disabled by metadata.
if (hasLICMVersioningTransformation(CurLoop) & TM_Disable)
@@ -611,26 +566,6 @@ bool LoopVersioningLICM::run(DominatorTree *DT) {
return Changed;
}
-char LoopVersioningLICMLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(LoopVersioningLICMLegacyPass, "loop-versioning-licm",
- "Loop Versioning For LICM", false, false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(LoopVersioningLICMLegacyPass, "loop-versioning-licm",
- "Loop Versioning For LICM", false, false)
-
-Pass *llvm::createLoopVersioningLICMPass() {
- return new LoopVersioningLICMLegacyPass();
-}
-
namespace llvm {
PreservedAnalyses LoopVersioningLICMPass::run(Loop &L, LoopAnalysisManager &AM,
diff --git a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
index ef22b0401b1b..b167120a906d 100644
--- a/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerConstantIntrinsics.cpp
@@ -29,6 +29,7 @@
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <optional>
@@ -136,10 +137,12 @@ static bool lowerConstantIntrinsics(Function &F, const TargetLibraryInfo &TLI,
continue;
case Intrinsic::is_constant:
NewValue = lowerIsConstantIntrinsic(II);
+ LLVM_DEBUG(dbgs() << "Folding " << *II << " to " << *NewValue << "\n");
IsConstantIntrinsicsHandled++;
break;
case Intrinsic::objectsize:
NewValue = lowerObjectSizeCall(II, DL, &TLI, true);
+ LLVM_DEBUG(dbgs() << "Folding " << *II << " to " << *NewValue << "\n");
ObjectSizeIntrinsicsHandled++;
break;
}
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index 17594b98c5bc..f46ea6a20afa 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -72,6 +72,11 @@ static cl::opt<bool> AllowContractEnabled(
cl::desc("Allow the use of FMAs if available and profitable. This may "
"result in different results, due to less rounding error."));
+static cl::opt<bool>
+ VerifyShapeInfo("verify-matrix-shapes", cl::Hidden,
+ cl::desc("Enable/disable matrix shape verification."),
+ cl::init(false));
+
enum class MatrixLayoutTy { ColumnMajor, RowMajor };
static cl::opt<MatrixLayoutTy> MatrixLayout(
@@ -267,7 +272,7 @@ class LowerMatrixIntrinsics {
unsigned D = isColumnMajor() ? NumColumns : NumRows;
for (unsigned J = 0; J < D; ++J)
- addVector(UndefValue::get(FixedVectorType::get(
+ addVector(PoisonValue::get(FixedVectorType::get(
EltTy, isColumnMajor() ? NumRows : NumColumns)));
}
@@ -535,6 +540,15 @@ public:
auto SIter = ShapeMap.find(V);
if (SIter != ShapeMap.end()) {
+ if (VerifyShapeInfo && (SIter->second.NumRows != Shape.NumRows ||
+ SIter->second.NumColumns != Shape.NumColumns)) {
+ errs() << "Conflicting shapes (" << SIter->second.NumRows << "x"
+ << SIter->second.NumColumns << " vs " << Shape.NumRows << "x"
+ << Shape.NumColumns << ") for " << *V << "\n";
+ report_fatal_error(
+ "Matrix shape verification failed, compilation aborted!");
+ }
+
LLVM_DEBUG(dbgs() << " not overriding existing shape: "
<< SIter->second.NumRows << " "
<< SIter->second.NumColumns << " for " << *V << "\n");
@@ -838,10 +852,13 @@ public:
auto NewInst = distributeTransposes(
TAMA, {R, C}, TAMB, {R, C}, Builder,
[&](Value *T0, ShapeInfo Shape0, Value *T1, ShapeInfo Shape1) {
- auto *FAdd =
- cast<Instruction>(LocalBuilder.CreateFAdd(T0, T1, "mfadd"));
- setShapeInfo(FAdd, Shape0);
- return FAdd;
+ bool IsFP = I.getType()->isFPOrFPVectorTy();
+ auto *Add = IsFP ? LocalBuilder.CreateFAdd(T0, T1, "madd")
+ : LocalBuilder.CreateAdd(T0, T1, "madd");
+
+ auto *Result = cast<Instruction>(Add);
+ setShapeInfo(Result, Shape0);
+ return Result;
});
updateShapeAndReplaceAllUsesWith(I, NewInst);
eraseFromParentAndMove(&I, II, BB);
@@ -978,13 +995,18 @@ public:
MatrixInsts.push_back(&I);
}
- // Second, try to fuse candidates.
+ // Second, try to lower any dot products
SmallPtrSet<Instruction *, 16> FusedInsts;
for (CallInst *CI : MaybeFusableInsts)
+ lowerDotProduct(CI, FusedInsts, getFastMathFlags(CI));
+
+ // Third, try to fuse candidates.
+ for (CallInst *CI : MaybeFusableInsts)
LowerMatrixMultiplyFused(CI, FusedInsts);
+
Changed = !FusedInsts.empty();
- // Third, lower remaining instructions with shape information.
+ // Fourth, lower remaining instructions with shape information.
for (Instruction *Inst : MatrixInsts) {
if (FusedInsts.count(Inst))
continue;
@@ -1311,6 +1333,165 @@ public:
}
}
+ /// Special case for MatMul lowering. Prevents scalar loads of row-major
+ /// vectors Lowers to vector reduction add instead of sequential add if
+ /// reassocation is enabled.
+ void lowerDotProduct(CallInst *MatMul,
+ SmallPtrSet<Instruction *, 16> &FusedInsts,
+ FastMathFlags FMF) {
+ if (FusedInsts.contains(MatMul) ||
+ MatrixLayout != MatrixLayoutTy::ColumnMajor)
+ return;
+ ShapeInfo LShape(MatMul->getArgOperand(2), MatMul->getArgOperand(3));
+ ShapeInfo RShape(MatMul->getArgOperand(3), MatMul->getArgOperand(4));
+
+ if (LShape.NumRows != 1 || RShape.NumColumns != 1) // not a dot product
+ return;
+
+ Value *LHS = MatMul->getArgOperand(0);
+ Value *RHS = MatMul->getArgOperand(1);
+
+ Type *ElementType = cast<VectorType>(LHS->getType())->getElementType();
+ bool IsIntVec = ElementType->isIntegerTy();
+
+ // Floating point reductions require reassocation.
+ if (!IsIntVec && !FMF.allowReassoc())
+ return;
+
+ auto CanBeFlattened = [this](Value *Op) {
+ if (match(Op, m_BinOp()) && ShapeMap.find(Op) != ShapeMap.end())
+ return true;
+ return match(
+ Op, m_OneUse(m_CombineOr(
+ m_Load(m_Value()),
+ m_CombineOr(m_Intrinsic<Intrinsic::matrix_transpose>(),
+ m_Intrinsic<Intrinsic::matrix_column_major_load>(
+ m_Value(), m_SpecificInt(1))))));
+ };
+ // Returns the cost benefit of using \p Op with the dot product lowering. If
+ // the returned cost is < 0, the argument is cheaper to use in the
+ // dot-product lowering.
+ auto GetCostForArg = [this, &CanBeFlattened](Value *Op, unsigned N) {
+ if (!isa<Instruction>(Op))
+ return InstructionCost(0);
+
+ FixedVectorType *VecTy = cast<FixedVectorType>(Op->getType());
+ Type *EltTy = VecTy->getElementType();
+
+ if (!CanBeFlattened(Op)) {
+ InstructionCost EmbedCost(0);
+ // Roughly estimate the cost for embedding the columns into a vector.
+ for (unsigned I = 1; I < N; ++I)
+ EmbedCost -=
+ TTI.getShuffleCost(TTI::SK_Splice, FixedVectorType::get(EltTy, 1),
+ std::nullopt, TTI::TCK_RecipThroughput);
+ return EmbedCost;
+ }
+
+ if (match(Op, m_BinOp()) && ShapeMap.find(Op) != ShapeMap.end()) {
+ InstructionCost OriginalCost =
+ TTI.getArithmeticInstrCost(cast<Instruction>(Op)->getOpcode(),
+ EltTy) *
+ N;
+ InstructionCost NewCost = TTI.getArithmeticInstrCost(
+ cast<Instruction>(Op)->getOpcode(), VecTy);
+ return NewCost - OriginalCost;
+ }
+
+ if (match(Op, m_Intrinsic<Intrinsic::matrix_transpose>())) {
+ // The transpose can be skipped for the dot product lowering, roughly
+ // estimate the savings as the cost of embedding the columns in a
+ // vector.
+ InstructionCost EmbedCost(0);
+ for (unsigned I = 1; I < N; ++I)
+ EmbedCost +=
+ TTI.getShuffleCost(TTI::SK_Splice, FixedVectorType::get(EltTy, 1),
+ std::nullopt, TTI::TCK_RecipThroughput);
+ return EmbedCost;
+ }
+
+ // Costs for loads.
+ if (N == 1)
+ return InstructionCost(0);
+
+ return TTI.getMemoryOpCost(Instruction::Load, VecTy, Align(1), 0) -
+ N * TTI.getMemoryOpCost(Instruction::Load, EltTy, Align(1), 0);
+ };
+ auto LHSCost = GetCostForArg(LHS, LShape.NumColumns);
+
+ // We compare the costs of a vector.reduce.add to sequential add.
+ int AddOpCode = IsIntVec ? Instruction::Add : Instruction::FAdd;
+ int MulOpCode = IsIntVec ? Instruction::Mul : Instruction::FMul;
+ InstructionCost ReductionCost =
+ TTI.getArithmeticReductionCost(
+ AddOpCode, cast<VectorType>(LHS->getType()),
+ IsIntVec ? std::nullopt : std::optional(FMF)) +
+ TTI.getArithmeticInstrCost(MulOpCode, LHS->getType());
+ InstructionCost SequentialAddCost =
+ TTI.getArithmeticInstrCost(AddOpCode, ElementType) *
+ (LShape.NumColumns - 1) +
+ TTI.getArithmeticInstrCost(MulOpCode, ElementType) *
+ (LShape.NumColumns);
+ if ((LHSCost + ReductionCost - SequentialAddCost) > InstructionCost(0))
+ return;
+
+ FusedInsts.insert(MatMul);
+ IRBuilder<> Builder(MatMul);
+ auto FlattenArg = [&Builder, &FusedInsts, &CanBeFlattened,
+ this](Value *Op) -> Value * {
+ // Matmul must be the only user of loads because we don't use LowerLoad
+ // for row vectors (LowerLoad results in scalar loads and shufflevectors
+ // instead of single vector load).
+ if (!CanBeFlattened(Op))
+ return Op;
+
+ if (match(Op, m_BinOp()) && ShapeMap.find(Op) != ShapeMap.end()) {
+ ShapeMap[Op] = ShapeMap[Op].t();
+ return Op;
+ }
+
+ FusedInsts.insert(cast<Instruction>(Op));
+ // If vector uses the builtin load, lower to a LoadInst
+ Value *Arg;
+ if (match(Op, m_Intrinsic<Intrinsic::matrix_column_major_load>(
+ m_Value(Arg)))) {
+ auto *NewLoad = Builder.CreateLoad(Op->getType(), Arg);
+ Op->replaceAllUsesWith(NewLoad);
+ cast<Instruction>(Op)->eraseFromParent();
+ return NewLoad;
+ } else if (match(Op, m_Intrinsic<Intrinsic::matrix_transpose>(
+ m_Value(Arg)))) {
+ ToRemove.push_back(cast<Instruction>(Op));
+ return Arg;
+ }
+
+ return Op;
+ };
+ LHS = FlattenArg(LHS);
+
+ // Insert mul/fmul and llvm.vector.reduce.fadd
+ Value *Mul =
+ IsIntVec ? Builder.CreateMul(LHS, RHS) : Builder.CreateFMul(LHS, RHS);
+
+ Value *Result;
+ if (IsIntVec)
+ Result = Builder.CreateAddReduce(Mul);
+ else {
+ Result = Builder.CreateFAddReduce(
+ ConstantFP::get(cast<VectorType>(LHS->getType())->getElementType(),
+ 0.0),
+ Mul);
+ cast<Instruction>(Result)->setFastMathFlags(FMF);
+ }
+
+ // pack scalar back into a matrix and then replace matmul inst
+ Result = Builder.CreateInsertElement(PoisonValue::get(MatMul->getType()),
+ Result, uint64_t(0));
+ MatMul->replaceAllUsesWith(Result);
+ FusedInsts.insert(MatMul);
+ ToRemove.push_back(MatMul);
+ }
+
/// Compute \p Result += \p A * \p B for input matrices with left-associating
/// addition.
///
@@ -1469,15 +1650,14 @@ public:
auto *ArrayTy = ArrayType::get(VT->getElementType(), VT->getNumElements());
AllocaInst *Alloca =
Builder.CreateAlloca(ArrayTy, Load->getPointerAddressSpace());
- Value *BC = Builder.CreateBitCast(Alloca, VT->getPointerTo());
- Builder.CreateMemCpy(BC, Alloca->getAlign(), Load->getPointerOperand(),
+ Builder.CreateMemCpy(Alloca, Alloca->getAlign(), Load->getPointerOperand(),
Load->getAlign(), LoadLoc.Size.getValue());
Builder.SetInsertPoint(Fusion, Fusion->begin());
PHINode *PHI = Builder.CreatePHI(Load->getPointerOperandType(), 3);
PHI->addIncoming(Load->getPointerOperand(), Check0);
PHI->addIncoming(Load->getPointerOperand(), Check1);
- PHI->addIncoming(BC, Copy);
+ PHI->addIncoming(Alloca, Copy);
// Adjust DT.
DTUpdates.push_back({DT->Insert, Check0, Check1});
@@ -2397,99 +2577,8 @@ void LowerMatrixIntrinsicsPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<LowerMatrixIntrinsicsPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
if (Minimal)
OS << "minimal";
- OS << ">";
-}
-
-namespace {
-
-class LowerMatrixIntrinsicsLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- LowerMatrixIntrinsicsLegacyPass() : FunctionPass(ID) {
- initializeLowerMatrixIntrinsicsLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- LowerMatrixIntrinsics LMT(F, TTI, &AA, &DT, &LI, &ORE);
- bool C = LMT.Visit();
- return C;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- }
-};
-} // namespace
-
-static const char pass_name[] = "Lower the matrix intrinsics";
-char LowerMatrixIntrinsicsLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LowerMatrixIntrinsicsLegacyPass, DEBUG_TYPE, pass_name,
- false, false)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(LowerMatrixIntrinsicsLegacyPass, DEBUG_TYPE, pass_name,
- false, false)
-
-Pass *llvm::createLowerMatrixIntrinsicsPass() {
- return new LowerMatrixIntrinsicsLegacyPass();
-}
-
-namespace {
-
-/// A lightweight version of the matrix lowering pass that only requires TTI.
-/// Advanced features that require DT, AA or ORE like tiling are disabled. This
-/// is used to lower matrix intrinsics if the main lowering pass is not run, for
-/// example with -O0.
-class LowerMatrixIntrinsicsMinimalLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- LowerMatrixIntrinsicsMinimalLegacyPass() : FunctionPass(ID) {
- initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- LowerMatrixIntrinsics LMT(F, TTI, nullptr, nullptr, nullptr, nullptr);
- bool C = LMT.Visit();
- return C;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.setPreservesCFG();
- }
-};
-} // namespace
-
-static const char pass_name_minimal[] = "Lower the matrix intrinsics (minimal)";
-char LowerMatrixIntrinsicsMinimalLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LowerMatrixIntrinsicsMinimalLegacyPass,
- "lower-matrix-intrinsics-minimal", pass_name_minimal,
- false, false)
-INITIALIZE_PASS_END(LowerMatrixIntrinsicsMinimalLegacyPass,
- "lower-matrix-intrinsics-minimal", pass_name_minimal, false,
- false)
-
-Pass *llvm::createLowerMatrixIntrinsicsMinimalPass() {
- return new LowerMatrixIntrinsicsMinimalLegacyPass();
+ OS << '>';
}
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 64846484f936..68642a01b37c 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -46,13 +46,10 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
@@ -72,6 +69,7 @@ STATISTIC(NumMemSetInfer, "Number of memsets inferred");
STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
+STATISTIC(NumStackMove, "Number of stack-move optimizations performed");
namespace {
@@ -255,54 +253,6 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
// MemCpyOptLegacyPass Pass
//===----------------------------------------------------------------------===//
-namespace {
-
-class MemCpyOptLegacyPass : public FunctionPass {
- MemCpyOptPass Impl;
-
-public:
- static char ID; // Pass identification, replacement for typeid
-
- MemCpyOptLegacyPass() : FunctionPass(ID) {
- initializeMemCpyOptLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
-private:
- // This transformation requires dominator postdominator info
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addPreserved<MemorySSAWrapperPass>();
- }
-};
-
-} // end anonymous namespace
-
-char MemCpyOptLegacyPass::ID = 0;
-
-/// The public interface to this file...
-FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
-
-INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
- false, false)
-
// Check that V is either not accessible by the caller, or unwinding cannot
// occur between Start and End.
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
@@ -463,7 +413,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// Check to see if this store is to a constant offset from the start ptr.
std::optional<int64_t> Offset =
- isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
+ NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr, DL);
if (!Offset)
break;
@@ -477,7 +427,7 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
// Check to see if this store is to a constant offset from the start ptr.
std::optional<int64_t> Offset =
- isPointerOffset(StartPtr, MSI->getDest(), DL);
+ MSI->getDest()->getPointerOffsetFrom(StartPtr, DL);
if (!Offset)
break;
@@ -781,6 +731,23 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
return true;
}
+ // If this is a load-store pair from a stack slot to a stack slot, we
+ // might be able to perform the stack-move optimization just as we do for
+ // memcpys from an alloca to an alloca.
+ if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) {
+ if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) {
+ if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
+ DL.getTypeStoreSize(T), BAA)) {
+ // Avoid invalidating the iterator.
+ BBI = SI->getNextNonDebugInstruction()->getIterator();
+ eraseInstruction(SI);
+ eraseInstruction(LI);
+ ++NumMemCpyInstr;
+ return true;
+ }
+ }
+ }
+
return false;
}
@@ -1200,8 +1167,14 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
// still want to eliminate the intermediate value, but we have to generate a
// memmove instead of memcpy.
bool UseMemMove = false;
- if (isModSet(BAA.getModRefInfo(M, MemoryLocation::getForSource(MDep))))
+ if (isModSet(BAA.getModRefInfo(M, MemoryLocation::getForSource(MDep)))) {
+ // Don't convert llvm.memcpy.inline into memmove because memmove can be
+ // lowered as a call, and that is not allowed for llvm.memcpy.inline (and
+ // there is no inline version of llvm.memmove)
+ if (isa<MemCpyInlineInst>(M))
+ return false;
UseMemMove = true;
+ }
// If all checks passed, then we can transform M.
LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
@@ -1246,13 +1219,18 @@ bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
/// In other words, transform:
/// \code
/// memset(dst, c, dst_size);
+/// ...
/// memcpy(dst, src, src_size);
/// \endcode
/// into:
/// \code
-/// memcpy(dst, src, src_size);
+/// ...
/// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
+/// memcpy(dst, src, src_size);
/// \endcode
+///
+/// The memset is sunk to just before the memcpy to ensure that src_size is
+/// present when emitting the simplified memset.
bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
MemSetInst *MemSet,
BatchAAResults &BAA) {
@@ -1300,6 +1278,15 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
IRBuilder<> Builder(MemCpy);
+ // Preserve the debug location of the old memset for the code emitted here
+ // related to the new memset. This is correct according to the rules in
+ // https://llvm.org/docs/HowToUpdateDebugInfo.html about "when to preserve an
+ // instruction location", given that we move the memset within the basic
+ // block.
+ assert(MemSet->getParent() == MemCpy->getParent() &&
+ "Preserving debug location based on moving memset within BB.");
+ Builder.SetCurrentDebugLocation(MemSet->getDebugLoc());
+
// If the sizes have different types, zext the smaller one.
if (DestSize->getType() != SrcSize->getType()) {
if (DestSize->getType()->getIntegerBitWidth() >
@@ -1323,9 +1310,8 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
"MemCpy must be a MemoryDef");
- // The new memset is inserted after the memcpy, but it is known that its
- // defining access is the memset about to be removed which immediately
- // precedes the memcpy.
+ // The new memset is inserted before the memcpy, and it is known that the
+ // memcpy's defining access is the memset about to be removed.
auto *LastDef =
cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
auto *NewAccess = MSSAU->createMemoryAccessBefore(
@@ -1440,6 +1426,217 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
return true;
}
+// Attempts to optimize the pattern whereby memory is copied from an alloca to
+// another alloca, where the two allocas don't have conflicting mod/ref. If
+// successful, the two allocas can be merged into one and the transfer can be
+// deleted. This pattern is generated frequently in Rust, due to the ubiquity of
+// move operations in that language.
+//
+// Once we determine that the optimization is safe to perform, we replace all
+// uses of the destination alloca with the source alloca. We also "shrink wrap"
+// the lifetime markers of the single merged alloca to before the first use
+// and after the last use. Note that the "shrink wrapping" procedure is a safe
+// transformation only because we restrict the scope of this optimization to
+// allocas that aren't captured.
+bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
+ AllocaInst *DestAlloca,
+ AllocaInst *SrcAlloca, uint64_t Size,
+ BatchAAResults &BAA) {
+ LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n"
+ << *Store << "\n");
+
+ // Make sure the two allocas are in the same address space.
+ if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) {
+ LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n");
+ return false;
+ }
+
+ // 1. Check that copy is full. Calculate the static size of the allocas to be
+ // merged, bail out if we can't.
+ const DataLayout &DL = DestAlloca->getModule()->getDataLayout();
+ std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL);
+ if (!SrcSize || SrcSize->isScalable() || Size != SrcSize->getFixedValue()) {
+ LLVM_DEBUG(dbgs() << "Stack Move: Source alloca size mismatch\n");
+ return false;
+ }
+ std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL);
+ if (!DestSize || DestSize->isScalable() ||
+ Size != DestSize->getFixedValue()) {
+ LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n");
+ return false;
+ }
+
+ // 2-1. Check that src and dest are static allocas, which are not affected by
+ // stacksave/stackrestore.
+ if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca() ||
+ SrcAlloca->getParent() != Load->getParent() ||
+ SrcAlloca->getParent() != Store->getParent())
+ return false;
+
+ // 2-2. Check that src and dest are never captured, unescaped allocas. Also
+ // collect lifetime markers first/last users in order to shrink wrap the
+ // lifetimes, and instructions with noalias metadata to remove them.
+
+ SmallVector<Instruction *, 4> LifetimeMarkers;
+ Instruction *FirstUser = nullptr, *LastUser = nullptr;
+ SmallSet<Instruction *, 4> NoAliasInstrs;
+
+ // Recursively track the user and check whether modified alias exist.
+ auto IsDereferenceableOrNull = [](Value *V, const DataLayout &DL) -> bool {
+ bool CanBeNull, CanBeFreed;
+ return V->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed);
+ };
+
+ auto CaptureTrackingWithModRef =
+ [&](Instruction *AI,
+ function_ref<bool(Instruction *)> ModRefCallback) -> bool {
+ SmallVector<Instruction *, 8> Worklist;
+ Worklist.push_back(AI);
+ unsigned MaxUsesToExplore = getDefaultMaxUsesToExploreForCaptureTracking();
+ Worklist.reserve(MaxUsesToExplore);
+ SmallSet<const Use *, 20> Visited;
+ while (!Worklist.empty()) {
+ Instruction *I = Worklist.back();
+ Worklist.pop_back();
+ for (const Use &U : I->uses()) {
+ if (Visited.size() >= MaxUsesToExplore) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Stack Move: Exceeded max uses to see ModRef, bailing\n");
+ return false;
+ }
+ if (!Visited.insert(&U).second)
+ continue;
+ switch (DetermineUseCaptureKind(U, IsDereferenceableOrNull)) {
+ case UseCaptureKind::MAY_CAPTURE:
+ return false;
+ case UseCaptureKind::PASSTHROUGH:
+ // Instructions cannot have non-instruction users.
+ Worklist.push_back(cast<Instruction>(U.getUser()));
+ continue;
+ case UseCaptureKind::NO_CAPTURE: {
+ auto *UI = cast<Instruction>(U.getUser());
+ if (DestAlloca->getParent() != UI->getParent())
+ return false;
+ if (!FirstUser || UI->comesBefore(FirstUser))
+ FirstUser = UI;
+ if (!LastUser || LastUser->comesBefore(UI))
+ LastUser = UI;
+ if (UI->isLifetimeStartOrEnd()) {
+ // We note the locations of these intrinsic calls so that we can
+ // delete them later if the optimization succeeds, this is safe
+ // since both llvm.lifetime.start and llvm.lifetime.end intrinsics
+ // conceptually fill all the bytes of the alloca with an undefined
+ // value.
+ int64_t Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
+ if (Size < 0 || Size == DestSize) {
+ LifetimeMarkers.push_back(UI);
+ continue;
+ }
+ }
+ if (UI->hasMetadata(LLVMContext::MD_noalias))
+ NoAliasInstrs.insert(UI);
+ if (!ModRefCallback(UI))
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+ };
+
+ // 3. Check that dest has no Mod/Ref, except full size lifetime intrinsics,
+ // from the alloca to the Store.
+ ModRefInfo DestModRef = ModRefInfo::NoModRef;
+ MemoryLocation DestLoc(DestAlloca, LocationSize::precise(Size));
+ auto DestModRefCallback = [&](Instruction *UI) -> bool {
+ // We don't care about the store itself.
+ if (UI == Store)
+ return true;
+ ModRefInfo Res = BAA.getModRefInfo(UI, DestLoc);
+ // FIXME: For multi-BB cases, we need to see reachability from it to
+ // store.
+ // Bailout if Dest may have any ModRef before Store.
+ if (UI->comesBefore(Store) && isModOrRefSet(Res))
+ return false;
+ DestModRef |= BAA.getModRefInfo(UI, DestLoc);
+
+ return true;
+ };
+
+ if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
+ return false;
+
+ // 3. Check that, from after the Load to the end of the BB,
+ // 3-1. if the dest has any Mod, src has no Ref, and
+ // 3-2. if the dest has any Ref, src has no Mod except full-sized lifetimes.
+ MemoryLocation SrcLoc(SrcAlloca, LocationSize::precise(Size));
+
+ auto SrcModRefCallback = [&](Instruction *UI) -> bool {
+ // Any ModRef before Load doesn't matter, also Load and Store can be
+ // ignored.
+ if (UI->comesBefore(Load) || UI == Load || UI == Store)
+ return true;
+ ModRefInfo Res = BAA.getModRefInfo(UI, SrcLoc);
+ if ((isModSet(DestModRef) && isRefSet(Res)) ||
+ (isRefSet(DestModRef) && isModSet(Res)))
+ return false;
+
+ return true;
+ };
+
+ if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
+ return false;
+
+ // We can do the transformation. First, align the allocas appropriately.
+ SrcAlloca->setAlignment(
+ std::max(SrcAlloca->getAlign(), DestAlloca->getAlign()));
+
+ // Merge the two allocas.
+ DestAlloca->replaceAllUsesWith(SrcAlloca);
+ eraseInstruction(DestAlloca);
+
+ // Drop metadata on the source alloca.
+ SrcAlloca->dropUnknownNonDebugMetadata();
+
+ // Do "shrink wrap" the lifetimes, if the original lifetime intrinsics exists.
+ if (!LifetimeMarkers.empty()) {
+ LLVMContext &C = SrcAlloca->getContext();
+ IRBuilder<> Builder(C);
+
+ ConstantInt *AllocaSize = ConstantInt::get(Type::getInt64Ty(C), Size);
+ // Create a new lifetime start marker before the first user of src or alloca
+ // users.
+ Builder.SetInsertPoint(FirstUser->getParent(), FirstUser->getIterator());
+ Builder.CreateLifetimeStart(SrcAlloca, AllocaSize);
+
+ // Create a new lifetime end marker after the last user of src or alloca
+ // users.
+ // FIXME: If the last user is the terminator for the bb, we can insert
+ // lifetime.end marker to the immidiate post-dominator, but currently do
+ // nothing.
+ if (!LastUser->isTerminator()) {
+ Builder.SetInsertPoint(LastUser->getParent(), ++LastUser->getIterator());
+ Builder.CreateLifetimeEnd(SrcAlloca, AllocaSize);
+ }
+
+ // Remove all other lifetime markers.
+ for (Instruction *I : LifetimeMarkers)
+ eraseInstruction(I);
+ }
+
+ // As this transformation can cause memory accesses that didn't previously
+ // alias to begin to alias one another, we remove !noalias metadata from any
+ // uses of either alloca. This is conservative, but more precision doesn't
+ // seem worthwhile right now.
+ for (Instruction *I : NoAliasInstrs)
+ I->setMetadata(LLVMContext::MD_noalias, nullptr);
+
+ LLVM_DEBUG(dbgs() << "Stack Move: Performed staack-move optimization\n");
+ NumStackMove++;
+ return true;
+}
+
/// Perform simplification of memcpy's. If we have memcpy A
/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
/// B to be a memcpy from X to Z (or potentially a memmove, depending on
@@ -1484,8 +1681,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
// Try to turn a partially redundant memset + memcpy into
- // memcpy + smaller memset. We don't need the memcpy size for this.
- // The memcpy most post-dom the memset, so limit this to the same basic
+ // smaller memset + memcpy. We don't need the memcpy size for this.
+ // The memcpy must post-dom the memset, so limit this to the same basic
// block. A non-local generalization is likely not worthwhile.
if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
@@ -1496,13 +1693,14 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
AnyClobber, MemoryLocation::getForSource(M), BAA);
- // There are four possible optimizations we can do for memcpy:
+ // There are five possible optimizations we can do for memcpy:
// a) memcpy-memcpy xform which exposes redundance for DSE.
// b) call-memcpy xform for return slot optimization.
// c) memcpy from freshly alloca'd space or space that has just started
// its lifetime copies undefined data, and we can therefore eliminate
// the memcpy in favor of the data that was already at the destination.
// d) memcpy from a just-memset'd source can be turned into memset.
+ // e) elimination of memcpy via stack-move optimization.
if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
if (Instruction *MI = MD->getMemoryInst()) {
if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
@@ -1521,7 +1719,8 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
}
if (auto *MDep = dyn_cast<MemCpyInst>(MI))
- return processMemCpyMemCpyDependence(M, MDep, BAA);
+ if (processMemCpyMemCpyDependence(M, MDep, BAA))
+ return true;
if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
@@ -1540,6 +1739,27 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
}
}
+ // If the transfer is from a stack slot to a stack slot, then we may be able
+ // to perform the stack-move optimization. See the comments in
+ // performStackMoveOptzn() for more details.
+ auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest());
+ if (!DestAlloca)
+ return false;
+ auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource());
+ if (!SrcAlloca)
+ return false;
+ ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength());
+ if (Len == nullptr)
+ return false;
+ if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, Len->getZExtValue(),
+ BAA)) {
+ // Avoid invalidating the iterator.
+ BBI = M->getNextNonDebugInstruction()->getIterator();
+ eraseInstruction(M);
+ ++NumMemCpyInstr;
+ return true;
+ }
+
return false;
}
@@ -1623,24 +1843,110 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
// foo(*a)
// It would be invalid to transform the second memcpy into foo(*b).
if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
- MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
+ MSSA->getMemoryAccess(MDep), CallAccess))
return false;
- Value *TmpCast = MDep->getSource();
- if (MDep->getSource()->getType() != ByValArg->getType()) {
- BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
- "tmpcast", &CB);
- // Set the tmpcast's DebugLoc to MDep's
- TmpBitCast->setDebugLoc(MDep->getDebugLoc());
- TmpCast = TmpBitCast;
- }
-
LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
<< " " << *MDep << "\n"
<< " " << CB << "\n");
// Otherwise we're good! Update the byval argument.
- CB.setArgOperand(ArgNo, TmpCast);
+ CB.setArgOperand(ArgNo, MDep->getSource());
+ ++NumMemCpyInstr;
+ return true;
+}
+
+/// This is called on memcpy dest pointer arguments attributed as immutable
+/// during call. Try to use memcpy source directly if all of the following
+/// conditions are satisfied.
+/// 1. The memcpy dst is neither modified during the call nor captured by the
+/// call. (if readonly, noalias, nocapture attributes on call-site.)
+/// 2. The memcpy dst is an alloca with known alignment & size.
+/// 2-1. The memcpy length == the alloca size which ensures that the new
+/// pointer is dereferenceable for the required range
+/// 2-2. The src pointer has alignment >= the alloca alignment or can be
+/// enforced so.
+/// 3. The memcpy dst and src is not modified between the memcpy and the call.
+/// (if MSSA clobber check is safe.)
+/// 4. The memcpy src is not modified during the call. (ModRef check shows no
+/// Mod.)
+bool MemCpyOptPass::processImmutArgument(CallBase &CB, unsigned ArgNo) {
+ // 1. Ensure passed argument is immutable during call.
+ if (!(CB.paramHasAttr(ArgNo, Attribute::NoAlias) &&
+ CB.paramHasAttr(ArgNo, Attribute::NoCapture)))
+ return false;
+ const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
+ Value *ImmutArg = CB.getArgOperand(ArgNo);
+
+ // 2. Check that arg is alloca
+ // TODO: Even if the arg gets back to branches, we can remove memcpy if all
+ // the alloca alignments can be enforced to source alignment.
+ auto *AI = dyn_cast<AllocaInst>(ImmutArg->stripPointerCasts());
+ if (!AI)
+ return false;
+
+ std::optional<TypeSize> AllocaSize = AI->getAllocationSize(DL);
+ // Can't handle unknown size alloca.
+ // (e.g. Variable Length Array, Scalable Vector)
+ if (!AllocaSize || AllocaSize->isScalable())
+ return false;
+ MemoryLocation Loc(ImmutArg, LocationSize::precise(*AllocaSize));
+ MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
+ if (!CallAccess)
+ return false;
+
+ MemCpyInst *MDep = nullptr;
+ BatchAAResults BAA(*AA);
+ MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
+ CallAccess->getDefiningAccess(), Loc, BAA);
+ if (auto *MD = dyn_cast<MemoryDef>(Clobber))
+ MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
+
+ // If the immut argument isn't fed by a memcpy, ignore it. If it is fed by
+ // a memcpy, check that the arg equals the memcpy dest.
+ if (!MDep || MDep->isVolatile() || AI != MDep->getDest())
+ return false;
+
+ // The address space of the memcpy source must match the immut argument
+ if (MDep->getSource()->getType()->getPointerAddressSpace() !=
+ ImmutArg->getType()->getPointerAddressSpace())
+ return false;
+
+ // 2-1. The length of the memcpy must be equal to the size of the alloca.
+ auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
+ if (!MDepLen || AllocaSize != MDepLen->getValue())
+ return false;
+
+ // 2-2. the memcpy source align must be larger than or equal the alloca's
+ // align. If not so, we check to see if we can force the source of the memcpy
+ // to the alignment we need. If we fail, we bail out.
+ Align MemDepAlign = MDep->getSourceAlign().valueOrOne();
+ Align AllocaAlign = AI->getAlign();
+ if (MemDepAlign < AllocaAlign &&
+ getOrEnforceKnownAlignment(MDep->getSource(), AllocaAlign, DL, &CB, AC,
+ DT) < AllocaAlign)
+ return false;
+
+ // 3. Verify that the source doesn't change in between the memcpy and
+ // the call.
+ // memcpy(a <- b)
+ // *b = 42;
+ // foo(*a)
+ // It would be invalid to transform the second memcpy into foo(*b).
+ if (writtenBetween(MSSA, BAA, MemoryLocation::getForSource(MDep),
+ MSSA->getMemoryAccess(MDep), CallAccess))
+ return false;
+
+ // 4. The memcpy src must not be modified during the call.
+ if (isModSet(AA->getModRefInfo(&CB, MemoryLocation::getForSource(MDep))))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to Immut src:\n"
+ << " " << *MDep << "\n"
+ << " " << CB << "\n");
+
+ // Otherwise we're good! Update the immut argument.
+ CB.setArgOperand(ArgNo, MDep->getSource());
++NumMemCpyInstr;
return true;
}
@@ -1673,9 +1979,12 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
else if (auto *M = dyn_cast<MemMoveInst>(I))
RepeatInstruction = processMemMove(M);
else if (auto *CB = dyn_cast<CallBase>(I)) {
- for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
+ for (unsigned i = 0, e = CB->arg_size(); i != e; ++i) {
if (CB->isByValArgument(i))
MadeChange |= processByValArgument(*CB, i);
+ else if (CB->onlyReadsMemory(i))
+ MadeChange |= processImmutArgument(*CB, i);
+ }
}
// Reprocess the instruction if desired.
@@ -1730,17 +2039,3 @@ bool MemCpyOptPass::runImpl(Function &F, TargetLibraryInfo *TLI_,
return MadeChange;
}
-
-/// This is the main transformation entry point for a function.
-bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
-
- auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
-
- return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
-}
diff --git a/llvm/lib/Transforms/Scalar/MergeICmps.cpp b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
index bcedb05890af..311a6435ba7c 100644
--- a/llvm/lib/Transforms/Scalar/MergeICmps.cpp
+++ b/llvm/lib/Transforms/Scalar/MergeICmps.cpp
@@ -42,6 +42,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar/MergeICmps.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/Loads.h"
@@ -49,6 +50,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -157,7 +159,7 @@ BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
return {};
}
- APInt Offset = APInt(DL.getPointerTypeSizeInBits(Addr->getType()), 0);
+ APInt Offset = APInt(DL.getIndexTypeSizeInBits(Addr->getType()), 0);
Value *Base = Addr;
auto *GEP = dyn_cast<GetElementPtrInst>(Addr);
if (GEP) {
@@ -639,10 +641,11 @@ static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
if (Comparisons.size() == 1) {
LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
- Value *const LhsLoad =
- Builder.CreateLoad(FirstCmp.Lhs().LoadI->getType(), Lhs);
- Value *const RhsLoad =
- Builder.CreateLoad(FirstCmp.Rhs().LoadI->getType(), Rhs);
+ // Use clone to keep the metadata
+ Instruction *const LhsLoad = Builder.Insert(FirstCmp.Lhs().LoadI->clone());
+ Instruction *const RhsLoad = Builder.Insert(FirstCmp.Rhs().LoadI->clone());
+ LhsLoad->replaceUsesOfWith(LhsLoad->getOperand(0), Lhs);
+ RhsLoad->replaceUsesOfWith(RhsLoad->getOperand(0), Rhs);
// There are no blocks to merge, just do the comparison.
IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad);
} else {
diff --git a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 62e75d98448c..6c5453831ade 100644
--- a/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -78,6 +78,7 @@
#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
@@ -191,11 +192,16 @@ StoreInst *MergedLoadStoreMotion::canSinkFromBlock(BasicBlock *BB1,
MemoryLocation Loc0 = MemoryLocation::get(Store0);
MemoryLocation Loc1 = MemoryLocation::get(Store1);
- if (AA->isMustAlias(Loc0, Loc1) && Store0->isSameOperationAs(Store1) &&
+
+ if (AA->isMustAlias(Loc0, Loc1) &&
!isStoreSinkBarrierInRange(*Store1->getNextNode(), BB1->back(), Loc1) &&
- !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0)) {
+ !isStoreSinkBarrierInRange(*Store0->getNextNode(), BB0->back(), Loc0) &&
+ Store0->hasSameSpecialState(Store1) &&
+ CastInst::isBitOrNoopPointerCastable(
+ Store0->getValueOperand()->getType(),
+ Store1->getValueOperand()->getType(),
+ Store0->getModule()->getDataLayout()))
return Store1;
- }
}
return nullptr;
}
@@ -254,6 +260,13 @@ void MergedLoadStoreMotion::sinkStoresAndGEPs(BasicBlock *BB, StoreInst *S0,
S0->applyMergedLocation(S0->getDebugLoc(), S1->getDebugLoc());
S0->mergeDIAssignID(S1);
+ // Insert bitcast for conflicting typed stores (or just use original value if
+ // same type).
+ IRBuilder<> Builder(S0);
+ auto Cast = Builder.CreateBitOrPointerCast(S0->getValueOperand(),
+ S1->getValueOperand()->getType());
+ S0->setOperand(0, Cast);
+
// Create the new store to be inserted at the join point.
StoreInst *SNew = cast<StoreInst>(S0->clone());
SNew->insertBefore(&*InsertPt);
@@ -428,7 +441,7 @@ void MergedLoadStoreMotionPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<MergedLoadStoreMotionPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
OS << (Options.SplitFooterBB ? "" : "no-") << "split-footer-bb";
- OS << ">";
+ OS << '>';
}
diff --git a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index 19bee4fa3879..9c3e9a2fd018 100644
--- a/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -351,9 +351,9 @@ Instruction *NaryReassociatePass::tryReassociateGEP(GetElementPtrInst *GEP) {
bool NaryReassociatePass::requiresSignExtension(Value *Index,
GetElementPtrInst *GEP) {
- unsigned PointerSizeInBits =
- DL->getPointerSizeInBits(GEP->getType()->getPointerAddressSpace());
- return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
+ unsigned IndexSizeInBits =
+ DL->getIndexSizeInBits(GEP->getType()->getPointerAddressSpace());
+ return cast<IntegerType>(Index->getType())->getBitWidth() < IndexSizeInBits;
}
GetElementPtrInst *
@@ -449,12 +449,12 @@ NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
return nullptr;
// NewGEP = &Candidate[RHS * (sizeof(IndexedType) / sizeof(Candidate[0])));
- Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
- if (RHS->getType() != IntPtrTy)
- RHS = Builder.CreateSExtOrTrunc(RHS, IntPtrTy);
+ Type *PtrIdxTy = DL->getIndexType(GEP->getType());
+ if (RHS->getType() != PtrIdxTy)
+ RHS = Builder.CreateSExtOrTrunc(RHS, PtrIdxTy);
if (IndexedSize != ElementSize) {
RHS = Builder.CreateMul(
- RHS, ConstantInt::get(IntPtrTy, IndexedSize / ElementSize));
+ RHS, ConstantInt::get(PtrIdxTy, IndexedSize / ElementSize));
}
GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(
Builder.CreateGEP(GEP->getResultElementType(), Candidate, RHS));
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index d3dba0c5f1d5..1af40e2c4e62 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -93,8 +93,6 @@
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ArrayRecycler.h"
#include "llvm/Support/Casting.h"
@@ -104,7 +102,6 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/PointerLikeTypeTraits.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/GVNExpression.h"
#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -1277,10 +1274,17 @@ const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) const {
const CallExpression *
NewGVN::createCallExpression(CallInst *CI, const MemoryAccess *MA) const {
// FIXME: Add operand bundles for calls.
- // FIXME: Allow commutative matching for intrinsics.
auto *E =
new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, MA);
setBasicExpressionInfo(CI, E);
+ if (CI->isCommutative()) {
+ // Ensure that commutative intrinsics that only differ by a permutation
+ // of their operands get the same value number by sorting the operand value
+ // numbers.
+ assert(CI->getNumOperands() >= 2 && "Unsupported commutative intrinsic!");
+ if (shouldSwapOperands(E->getOperand(0), E->getOperand(1)))
+ E->swapOperands(0, 1);
+ }
return E;
}
@@ -1453,8 +1457,7 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
if (Offset >= 0) {
if (auto *C = dyn_cast<Constant>(
lookupOperandLeader(DepSI->getValueOperand()))) {
- if (Constant *Res =
- getConstantStoreValueForLoad(C, Offset, LoadType, DL)) {
+ if (Constant *Res = getConstantValueForLoad(C, Offset, LoadType, DL)) {
LLVM_DEBUG(dbgs() << "Coercing load from store " << *DepSI
<< " to constant " << *Res << "\n");
return createConstantExpression(Res);
@@ -1470,7 +1473,7 @@ NewGVN::performSymbolicLoadCoercion(Type *LoadType, Value *LoadPtr,
// We can coerce a constant load into a load.
if (auto *C = dyn_cast<Constant>(lookupOperandLeader(DepLI)))
if (auto *PossibleConstant =
- getConstantLoadValueForLoad(C, Offset, LoadType, DL)) {
+ getConstantValueForLoad(C, Offset, LoadType, DL)) {
LLVM_DEBUG(dbgs() << "Coercing load from load " << *LI
<< " to constant " << *PossibleConstant << "\n");
return createConstantExpression(PossibleConstant);
@@ -1617,6 +1620,12 @@ NewGVN::ExprResult NewGVN::performSymbolicCallEvaluation(Instruction *I) const {
if (CI->getFunction()->isPresplitCoroutine())
return ExprResult::none();
+ // Do not combine convergent calls since they implicitly depend on the set of
+ // threads that is currently executing, and they might be in different basic
+ // blocks.
+ if (CI->isConvergent())
+ return ExprResult::none();
+
if (AA->doesNotAccessMemory(CI)) {
return ExprResult::some(
createCallExpression(CI, TOPClass->getMemoryLeader()));
@@ -1992,6 +2001,7 @@ NewGVN::performSymbolicEvaluation(Value *V,
break;
case Instruction::BitCast:
case Instruction::AddrSpaceCast:
+ case Instruction::Freeze:
return createExpression(I);
break;
case Instruction::ICmp:
@@ -2739,10 +2749,10 @@ NewGVN::makePossiblePHIOfOps(Instruction *I,
return nullptr;
}
// No point in doing this for one-operand phis.
- if (OpPHI->getNumOperands() == 1) {
- OpPHI = nullptr;
- continue;
- }
+ // Since all PHIs for operands must be in the same block, then they must
+ // have the same number of operands so we can just abort.
+ if (OpPHI->getNumOperands() == 1)
+ return nullptr;
}
if (!OpPHI)
@@ -3712,9 +3722,10 @@ void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) {
}
// Now insert something that simplifycfg will turn into an unreachable.
Type *Int8Ty = Type::getInt8Ty(BB->getContext());
- new StoreInst(PoisonValue::get(Int8Ty),
- Constant::getNullValue(Int8Ty->getPointerTo()),
- BB->getTerminator());
+ new StoreInst(
+ PoisonValue::get(Int8Ty),
+ Constant::getNullValue(PointerType::getUnqual(BB->getContext())),
+ BB->getTerminator());
}
void NewGVN::markInstructionForDeletion(Instruction *I) {
@@ -4208,61 +4219,6 @@ bool NewGVN::shouldSwapOperandsForIntrinsic(const Value *A, const Value *B,
return false;
}
-namespace {
-
-class NewGVNLegacyPass : public FunctionPass {
-public:
- // Pass identification, replacement for typeid.
- static char ID;
-
- NewGVNLegacyPass() : FunctionPass(ID) {
- initializeNewGVNLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-
-private:
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addRequired<MemorySSAWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
-};
-
-} // end anonymous namespace
-
-bool NewGVNLegacyPass::runOnFunction(Function &F) {
- if (skipFunction(F))
- return false;
- return NewGVN(F, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
- &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F),
- &getAnalysis<AAResultsWrapperPass>().getAAResults(),
- &getAnalysis<MemorySSAWrapperPass>().getMSSA(),
- F.getParent()->getDataLayout())
- .runGVN();
-}
-
-char NewGVNLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(NewGVNLegacyPass, "newgvn", "Global Value Numbering",
- false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_END(NewGVNLegacyPass, "newgvn", "Global Value Numbering", false,
- false)
-
-// createGVNPass - The public interface to this file.
-FunctionPass *llvm::createNewGVNPass() { return new NewGVNLegacyPass(); }
-
PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager<Function> &AM) {
// Apparently the order in which we get these results matter for
// the old GVN (see Chandler's comment in GVN.cpp). I'll keep
diff --git a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
index e1cc3fc71c3e..0266eb1a9f50 100644
--- a/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/llvm/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -47,6 +47,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Transforms/Scalar/PlaceSafepoints.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -67,7 +68,9 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
-#define DEBUG_TYPE "safepoint-placement"
+using namespace llvm;
+
+#define DEBUG_TYPE "place-safepoints"
STATISTIC(NumEntrySafepoints, "Number of entry safepoints inserted");
STATISTIC(NumBackedgeSafepoints, "Number of backedge safepoints inserted");
@@ -77,8 +80,6 @@ STATISTIC(CallInLoop,
STATISTIC(FiniteExecution,
"Number of loops without safepoints finite execution");
-using namespace llvm;
-
// Ignore opportunities to avoid placing safepoints on backedges, useful for
// validation
static cl::opt<bool> AllBackedges("spp-all-backedges", cl::Hidden,
@@ -97,10 +98,10 @@ static cl::opt<bool> SplitBackedge("spp-split-backedge", cl::Hidden,
cl::init(false));
namespace {
-
/// An analysis pass whose purpose is to identify each of the backedges in
/// the function which require a safepoint poll to be inserted.
-struct PlaceBackedgeSafepointsImpl : public FunctionPass {
+class PlaceBackedgeSafepointsLegacyPass : public FunctionPass {
+public:
static char ID;
/// The output of the pass - gives a list of each backedge (described by
@@ -111,17 +112,14 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
/// the call-dependent placement opts.
bool CallSafepointsEnabled;
- ScalarEvolution *SE = nullptr;
- DominatorTree *DT = nullptr;
- LoopInfo *LI = nullptr;
- TargetLibraryInfo *TLI = nullptr;
-
- PlaceBackedgeSafepointsImpl(bool CallSafepoints = false)
+ PlaceBackedgeSafepointsLegacyPass(bool CallSafepoints = false)
: FunctionPass(ID), CallSafepointsEnabled(CallSafepoints) {
- initializePlaceBackedgeSafepointsImplPass(*PassRegistry::getPassRegistry());
+ initializePlaceBackedgeSafepointsLegacyPassPass(
+ *PassRegistry::getPassRegistry());
}
bool runOnLoop(Loop *);
+
void runOnLoopAndSubLoops(Loop *L) {
// Visit all the subloops
for (Loop *I : *L)
@@ -149,39 +147,245 @@ struct PlaceBackedgeSafepointsImpl : public FunctionPass {
// analysis are preserved.
AU.setPreservesAll();
}
+
+private:
+ ScalarEvolution *SE = nullptr;
+ DominatorTree *DT = nullptr;
+ LoopInfo *LI = nullptr;
+ TargetLibraryInfo *TLI = nullptr;
};
-}
+} // namespace
static cl::opt<bool> NoEntry("spp-no-entry", cl::Hidden, cl::init(false));
static cl::opt<bool> NoCall("spp-no-call", cl::Hidden, cl::init(false));
static cl::opt<bool> NoBackedge("spp-no-backedge", cl::Hidden, cl::init(false));
-namespace {
-struct PlaceSafepoints : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
+char PlaceBackedgeSafepointsLegacyPass::ID = 0;
- PlaceSafepoints() : FunctionPass(ID) {
- initializePlaceSafepointsPass(*PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override;
+INITIALIZE_PASS_BEGIN(PlaceBackedgeSafepointsLegacyPass,
+ "place-backedge-safepoints-impl",
+ "Place Backedge Safepoints", false, false)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(PlaceBackedgeSafepointsLegacyPass,
+ "place-backedge-safepoints-impl",
+ "Place Backedge Safepoints", false, false)
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- // We modify the graph wholesale (inlining, block insertion, etc). We
- // preserve nothing at the moment. We could potentially preserve dom tree
- // if that was worth doing
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-};
-}
+static bool containsUnconditionalCallSafepoint(Loop *L, BasicBlock *Header,
+ BasicBlock *Pred,
+ DominatorTree &DT,
+ const TargetLibraryInfo &TLI);
+
+static bool mustBeFiniteCountedLoop(Loop *L, ScalarEvolution *SE,
+ BasicBlock *Pred);
+
+static Instruction *findLocationForEntrySafepoint(Function &F,
+ DominatorTree &DT);
+
+static bool isGCSafepointPoll(Function &F);
+static bool shouldRewriteFunction(Function &F);
+static bool enableEntrySafepoints(Function &F);
+static bool enableBackedgeSafepoints(Function &F);
+static bool enableCallSafepoints(Function &F);
-// Insert a safepoint poll immediately before the given instruction. Does
-// not handle the parsability of state at the runtime call, that's the
-// callers job.
static void
InsertSafepointPoll(Instruction *InsertBefore,
std::vector<CallBase *> &ParsePointsNeeded /*rval*/,
const TargetLibraryInfo &TLI);
+bool PlaceBackedgeSafepointsLegacyPass::runOnLoop(Loop *L) {
+ // Loop through all loop latches (branches controlling backedges). We need
+ // to place a safepoint on every backedge (potentially).
+ // Note: In common usage, there will be only one edge due to LoopSimplify
+ // having run sometime earlier in the pipeline, but this code must be correct
+ // w.r.t. loops with multiple backedges.
+ BasicBlock *Header = L->getHeader();
+ SmallVector<BasicBlock *, 16> LoopLatches;
+ L->getLoopLatches(LoopLatches);
+ for (BasicBlock *Pred : LoopLatches) {
+ assert(L->contains(Pred));
+
+ // Make a policy decision about whether this loop needs a safepoint or
+ // not. Note that this is about unburdening the optimizer in loops, not
+ // avoiding the runtime cost of the actual safepoint.
+ if (!AllBackedges) {
+ if (mustBeFiniteCountedLoop(L, SE, Pred)) {
+ LLVM_DEBUG(dbgs() << "skipping safepoint placement in finite loop\n");
+ FiniteExecution++;
+ continue;
+ }
+ if (CallSafepointsEnabled &&
+ containsUnconditionalCallSafepoint(L, Header, Pred, *DT, *TLI)) {
+ // Note: This is only semantically legal since we won't do any further
+ // IPO or inlining before the actual call insertion.. If we hadn't, we
+ // might latter loose this call safepoint.
+ LLVM_DEBUG(
+ dbgs()
+ << "skipping safepoint placement due to unconditional call\n");
+ CallInLoop++;
+ continue;
+ }
+ }
+
+ // TODO: We can create an inner loop which runs a finite number of
+ // iterations with an outer loop which contains a safepoint. This would
+ // not help runtime performance that much, but it might help our ability to
+ // optimize the inner loop.
+
+ // Safepoint insertion would involve creating a new basic block (as the
+ // target of the current backedge) which does the safepoint (of all live
+ // variables) and branches to the true header
+ Instruction *Term = Pred->getTerminator();
+
+ LLVM_DEBUG(dbgs() << "[LSP] terminator instruction: " << *Term);
+
+ PollLocations.push_back(Term);
+ }
+
+ return false;
+}
+
+bool PlaceSafepointsPass::runImpl(Function &F, const TargetLibraryInfo &TLI) {
+ if (F.isDeclaration() || F.empty()) {
+ // This is a declaration, nothing to do. Must exit early to avoid crash in
+ // dom tree calculation
+ return false;
+ }
+
+ if (isGCSafepointPoll(F)) {
+ // Given we're inlining this inside of safepoint poll insertion, this
+ // doesn't make any sense. Note that we do make any contained calls
+ // parseable after we inline a poll.
+ return false;
+ }
+
+ if (!shouldRewriteFunction(F))
+ return false;
+
+ bool Modified = false;
+
+ // In various bits below, we rely on the fact that uses are reachable from
+ // defs. When there are basic blocks unreachable from the entry, dominance
+ // and reachablity queries return non-sensical results. Thus, we preprocess
+ // the function to ensure these properties hold.
+ Modified |= removeUnreachableBlocks(F);
+
+ // STEP 1 - Insert the safepoint polling locations. We do not need to
+ // actually insert parse points yet. That will be done for all polls and
+ // calls in a single pass.
+
+ DominatorTree DT;
+ DT.recalculate(F);
+
+ SmallVector<Instruction *, 16> PollsNeeded;
+ std::vector<CallBase *> ParsePointNeeded;
+
+ if (enableBackedgeSafepoints(F)) {
+ // Construct a pass manager to run the LoopPass backedge logic. We
+ // need the pass manager to handle scheduling all the loop passes
+ // appropriately. Doing this by hand is painful and just not worth messing
+ // with for the moment.
+ legacy::FunctionPassManager FPM(F.getParent());
+ bool CanAssumeCallSafepoints = enableCallSafepoints(F);
+ auto *PBS = new PlaceBackedgeSafepointsLegacyPass(CanAssumeCallSafepoints);
+ FPM.add(PBS);
+ FPM.run(F);
+
+ // We preserve dominance information when inserting the poll, otherwise
+ // we'd have to recalculate this on every insert
+ DT.recalculate(F);
+
+ auto &PollLocations = PBS->PollLocations;
+
+ auto OrderByBBName = [](Instruction *a, Instruction *b) {
+ return a->getParent()->getName() < b->getParent()->getName();
+ };
+ // We need the order of list to be stable so that naming ends up stable
+ // when we split edges. This makes test cases much easier to write.
+ llvm::sort(PollLocations, OrderByBBName);
+
+ // We can sometimes end up with duplicate poll locations. This happens if
+ // a single loop is visited more than once. The fact this happens seems
+ // wrong, but it does happen for the split-backedge.ll test case.
+ PollLocations.erase(std::unique(PollLocations.begin(), PollLocations.end()),
+ PollLocations.end());
+
+ // Insert a poll at each point the analysis pass identified
+ // The poll location must be the terminator of a loop latch block.
+ for (Instruction *Term : PollLocations) {
+ // We are inserting a poll, the function is modified
+ Modified = true;
+
+ if (SplitBackedge) {
+ // Split the backedge of the loop and insert the poll within that new
+ // basic block. This creates a loop with two latches per original
+ // latch (which is non-ideal), but this appears to be easier to
+ // optimize in practice than inserting the poll immediately before the
+ // latch test.
+
+ // Since this is a latch, at least one of the successors must dominate
+ // it. Its possible that we have a) duplicate edges to the same header
+ // and b) edges to distinct loop headers. We need to insert pools on
+ // each.
+ SetVector<BasicBlock *> Headers;
+ for (unsigned i = 0; i < Term->getNumSuccessors(); i++) {
+ BasicBlock *Succ = Term->getSuccessor(i);
+ if (DT.dominates(Succ, Term->getParent())) {
+ Headers.insert(Succ);
+ }
+ }
+ assert(!Headers.empty() && "poll location is not a loop latch?");
+
+ // The split loop structure here is so that we only need to recalculate
+ // the dominator tree once. Alternatively, we could just keep it up to
+ // date and use a more natural merged loop.
+ SetVector<BasicBlock *> SplitBackedges;
+ for (BasicBlock *Header : Headers) {
+ BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT);
+ PollsNeeded.push_back(NewBB->getTerminator());
+ NumBackedgeSafepoints++;
+ }
+ } else {
+ // Split the latch block itself, right before the terminator.
+ PollsNeeded.push_back(Term);
+ NumBackedgeSafepoints++;
+ }
+ }
+ }
+
+ if (enableEntrySafepoints(F)) {
+ if (Instruction *Location = findLocationForEntrySafepoint(F, DT)) {
+ PollsNeeded.push_back(Location);
+ Modified = true;
+ NumEntrySafepoints++;
+ }
+ // TODO: else we should assert that there was, in fact, a policy choice to
+ // not insert a entry safepoint poll.
+ }
+
+ // Now that we've identified all the needed safepoint poll locations, insert
+ // safepoint polls themselves.
+ for (Instruction *PollLocation : PollsNeeded) {
+ std::vector<CallBase *> RuntimeCalls;
+ InsertSafepointPoll(PollLocation, RuntimeCalls, TLI);
+ llvm::append_range(ParsePointNeeded, RuntimeCalls);
+ }
+
+ return Modified;
+}
+
+PreservedAnalyses PlaceSafepointsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+
+ if (!runImpl(F, TLI))
+ return PreservedAnalyses::all();
+
+ // TODO: can we preserve more?
+ return PreservedAnalyses::none();
+}
+
static bool needsStatepoint(CallBase *Call, const TargetLibraryInfo &TLI) {
if (callsGCLeafFunction(Call, TLI))
return false;
@@ -306,58 +510,6 @@ static void scanInlinedCode(Instruction *Start, Instruction *End,
}
}
-bool PlaceBackedgeSafepointsImpl::runOnLoop(Loop *L) {
- // Loop through all loop latches (branches controlling backedges). We need
- // to place a safepoint on every backedge (potentially).
- // Note: In common usage, there will be only one edge due to LoopSimplify
- // having run sometime earlier in the pipeline, but this code must be correct
- // w.r.t. loops with multiple backedges.
- BasicBlock *Header = L->getHeader();
- SmallVector<BasicBlock*, 16> LoopLatches;
- L->getLoopLatches(LoopLatches);
- for (BasicBlock *Pred : LoopLatches) {
- assert(L->contains(Pred));
-
- // Make a policy decision about whether this loop needs a safepoint or
- // not. Note that this is about unburdening the optimizer in loops, not
- // avoiding the runtime cost of the actual safepoint.
- if (!AllBackedges) {
- if (mustBeFiniteCountedLoop(L, SE, Pred)) {
- LLVM_DEBUG(dbgs() << "skipping safepoint placement in finite loop\n");
- FiniteExecution++;
- continue;
- }
- if (CallSafepointsEnabled &&
- containsUnconditionalCallSafepoint(L, Header, Pred, *DT, *TLI)) {
- // Note: This is only semantically legal since we won't do any further
- // IPO or inlining before the actual call insertion.. If we hadn't, we
- // might latter loose this call safepoint.
- LLVM_DEBUG(
- dbgs()
- << "skipping safepoint placement due to unconditional call\n");
- CallInLoop++;
- continue;
- }
- }
-
- // TODO: We can create an inner loop which runs a finite number of
- // iterations with an outer loop which contains a safepoint. This would
- // not help runtime performance that much, but it might help our ability to
- // optimize the inner loop.
-
- // Safepoint insertion would involve creating a new basic block (as the
- // target of the current backedge) which does the safepoint (of all live
- // variables) and branches to the true header
- Instruction *Term = Pred->getTerminator();
-
- LLVM_DEBUG(dbgs() << "[LSP] terminator instruction: " << *Term);
-
- PollLocations.push_back(Term);
- }
-
- return false;
-}
-
/// Returns true if an entry safepoint is not required before this callsite in
/// the caller function.
static bool doesNotRequireEntrySafepointBefore(CallBase *Call) {
@@ -463,161 +615,9 @@ static bool enableEntrySafepoints(Function &F) { return !NoEntry; }
static bool enableBackedgeSafepoints(Function &F) { return !NoBackedge; }
static bool enableCallSafepoints(Function &F) { return !NoCall; }
-bool PlaceSafepoints::runOnFunction(Function &F) {
- if (F.isDeclaration() || F.empty()) {
- // This is a declaration, nothing to do. Must exit early to avoid crash in
- // dom tree calculation
- return false;
- }
-
- if (isGCSafepointPoll(F)) {
- // Given we're inlining this inside of safepoint poll insertion, this
- // doesn't make any sense. Note that we do make any contained calls
- // parseable after we inline a poll.
- return false;
- }
-
- if (!shouldRewriteFunction(F))
- return false;
-
- const TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
-
- bool Modified = false;
-
- // In various bits below, we rely on the fact that uses are reachable from
- // defs. When there are basic blocks unreachable from the entry, dominance
- // and reachablity queries return non-sensical results. Thus, we preprocess
- // the function to ensure these properties hold.
- Modified |= removeUnreachableBlocks(F);
-
- // STEP 1 - Insert the safepoint polling locations. We do not need to
- // actually insert parse points yet. That will be done for all polls and
- // calls in a single pass.
-
- DominatorTree DT;
- DT.recalculate(F);
-
- SmallVector<Instruction *, 16> PollsNeeded;
- std::vector<CallBase *> ParsePointNeeded;
-
- if (enableBackedgeSafepoints(F)) {
- // Construct a pass manager to run the LoopPass backedge logic. We
- // need the pass manager to handle scheduling all the loop passes
- // appropriately. Doing this by hand is painful and just not worth messing
- // with for the moment.
- legacy::FunctionPassManager FPM(F.getParent());
- bool CanAssumeCallSafepoints = enableCallSafepoints(F);
- auto *PBS = new PlaceBackedgeSafepointsImpl(CanAssumeCallSafepoints);
- FPM.add(PBS);
- FPM.run(F);
-
- // We preserve dominance information when inserting the poll, otherwise
- // we'd have to recalculate this on every insert
- DT.recalculate(F);
-
- auto &PollLocations = PBS->PollLocations;
-
- auto OrderByBBName = [](Instruction *a, Instruction *b) {
- return a->getParent()->getName() < b->getParent()->getName();
- };
- // We need the order of list to be stable so that naming ends up stable
- // when we split edges. This makes test cases much easier to write.
- llvm::sort(PollLocations, OrderByBBName);
-
- // We can sometimes end up with duplicate poll locations. This happens if
- // a single loop is visited more than once. The fact this happens seems
- // wrong, but it does happen for the split-backedge.ll test case.
- PollLocations.erase(std::unique(PollLocations.begin(),
- PollLocations.end()),
- PollLocations.end());
-
- // Insert a poll at each point the analysis pass identified
- // The poll location must be the terminator of a loop latch block.
- for (Instruction *Term : PollLocations) {
- // We are inserting a poll, the function is modified
- Modified = true;
-
- if (SplitBackedge) {
- // Split the backedge of the loop and insert the poll within that new
- // basic block. This creates a loop with two latches per original
- // latch (which is non-ideal), but this appears to be easier to
- // optimize in practice than inserting the poll immediately before the
- // latch test.
-
- // Since this is a latch, at least one of the successors must dominate
- // it. Its possible that we have a) duplicate edges to the same header
- // and b) edges to distinct loop headers. We need to insert pools on
- // each.
- SetVector<BasicBlock *> Headers;
- for (unsigned i = 0; i < Term->getNumSuccessors(); i++) {
- BasicBlock *Succ = Term->getSuccessor(i);
- if (DT.dominates(Succ, Term->getParent())) {
- Headers.insert(Succ);
- }
- }
- assert(!Headers.empty() && "poll location is not a loop latch?");
-
- // The split loop structure here is so that we only need to recalculate
- // the dominator tree once. Alternatively, we could just keep it up to
- // date and use a more natural merged loop.
- SetVector<BasicBlock *> SplitBackedges;
- for (BasicBlock *Header : Headers) {
- BasicBlock *NewBB = SplitEdge(Term->getParent(), Header, &DT);
- PollsNeeded.push_back(NewBB->getTerminator());
- NumBackedgeSafepoints++;
- }
- } else {
- // Split the latch block itself, right before the terminator.
- PollsNeeded.push_back(Term);
- NumBackedgeSafepoints++;
- }
- }
- }
-
- if (enableEntrySafepoints(F)) {
- if (Instruction *Location = findLocationForEntrySafepoint(F, DT)) {
- PollsNeeded.push_back(Location);
- Modified = true;
- NumEntrySafepoints++;
- }
- // TODO: else we should assert that there was, in fact, a policy choice to
- // not insert a entry safepoint poll.
- }
-
- // Now that we've identified all the needed safepoint poll locations, insert
- // safepoint polls themselves.
- for (Instruction *PollLocation : PollsNeeded) {
- std::vector<CallBase *> RuntimeCalls;
- InsertSafepointPoll(PollLocation, RuntimeCalls, TLI);
- llvm::append_range(ParsePointNeeded, RuntimeCalls);
- }
-
- return Modified;
-}
-
-char PlaceBackedgeSafepointsImpl::ID = 0;
-char PlaceSafepoints::ID = 0;
-
-FunctionPass *llvm::createPlaceSafepointsPass() {
- return new PlaceSafepoints();
-}
-
-INITIALIZE_PASS_BEGIN(PlaceBackedgeSafepointsImpl,
- "place-backedge-safepoints-impl",
- "Place Backedge Safepoints", false, false)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_END(PlaceBackedgeSafepointsImpl,
- "place-backedge-safepoints-impl",
- "Place Backedge Safepoints", false, false)
-
-INITIALIZE_PASS_BEGIN(PlaceSafepoints, "place-safepoints", "Place Safepoints",
- false, false)
-INITIALIZE_PASS_END(PlaceSafepoints, "place-safepoints", "Place Safepoints",
- false, false)
-
+// Insert a safepoint poll immediately before the given instruction. Does
+// not handle the parsability of state at the runtime call, that's the
+// callers job.
static void
InsertSafepointPoll(Instruction *InsertBefore,
std::vector<CallBase *> &ParsePointsNeeded /*rval*/,
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 21628b61edd6..40c84e249523 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -52,6 +52,7 @@
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
@@ -70,6 +71,12 @@ STATISTIC(NumChanged, "Number of insts reassociated");
STATISTIC(NumAnnihil, "Number of expr tree annihilated");
STATISTIC(NumFactor , "Number of multiplies factored");
+static cl::opt<bool>
+ UseCSELocalOpt(DEBUG_TYPE "-use-cse-local",
+ cl::desc("Only reorder expressions within a basic block "
+ "when exposing CSE opportunities"),
+ cl::init(true), cl::Hidden);
+
#ifndef NDEBUG
/// Print out the expression identified in the Ops list.
static void PrintOps(Instruction *I, const SmallVectorImpl<ValueEntry> &Ops) {
@@ -620,8 +627,7 @@ static bool LinearizeExprTree(Instruction *I,
// The leaves, repeated according to their weights, represent the linearized
// form of the expression.
- for (unsigned i = 0, e = LeafOrder.size(); i != e; ++i) {
- Value *V = LeafOrder[i];
+ for (Value *V : LeafOrder) {
LeafMap::iterator It = Leaves.find(V);
if (It == Leaves.end())
// Node initially thought to be a leaf wasn't.
@@ -683,10 +689,12 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
NotRewritable.insert(Ops[i].Op);
- // ExpressionChanged - Non-null if the rewritten expression differs from the
- // original in some non-trivial way, requiring the clearing of optional flags.
- // Flags are cleared from the operator in ExpressionChanged up to I inclusive.
- BinaryOperator *ExpressionChanged = nullptr;
+ // ExpressionChangedStart - Non-null if the rewritten expression differs from
+ // the original in some non-trivial way, requiring the clearing of optional
+ // flags. Flags are cleared from the operator in ExpressionChangedStart up to
+ // ExpressionChangedEnd inclusive.
+ BinaryOperator *ExpressionChangedStart = nullptr,
+ *ExpressionChangedEnd = nullptr;
for (unsigned i = 0; ; ++i) {
// The last operation (which comes earliest in the IR) is special as both
// operands will come from Ops, rather than just one with the other being
@@ -728,7 +736,9 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
}
LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
- ExpressionChanged = Op;
+ ExpressionChangedStart = Op;
+ if (!ExpressionChangedEnd)
+ ExpressionChangedEnd = Op;
MadeChange = true;
++NumChanged;
@@ -750,7 +760,9 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
if (BO && !NotRewritable.count(BO))
NodesToRewrite.push_back(BO);
Op->setOperand(1, NewRHS);
- ExpressionChanged = Op;
+ ExpressionChangedStart = Op;
+ if (!ExpressionChangedEnd)
+ ExpressionChangedEnd = Op;
}
LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
MadeChange = true;
@@ -787,7 +799,9 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
LLVM_DEBUG(dbgs() << "RA: " << *Op << '\n');
Op->setOperand(0, NewOp);
LLVM_DEBUG(dbgs() << "TO: " << *Op << '\n');
- ExpressionChanged = Op;
+ ExpressionChangedStart = Op;
+ if (!ExpressionChangedEnd)
+ ExpressionChangedEnd = Op;
MadeChange = true;
++NumChanged;
Op = NewOp;
@@ -797,27 +811,36 @@ void ReassociatePass::RewriteExprTree(BinaryOperator *I,
// starting from the operator specified in ExpressionChanged, and compactify
// the operators to just before the expression root to guarantee that the
// expression tree is dominated by all of Ops.
- if (ExpressionChanged)
+ if (ExpressionChangedStart) {
+ bool ClearFlags = true;
do {
// Preserve FastMathFlags.
- if (isa<FPMathOperator>(I)) {
- FastMathFlags Flags = I->getFastMathFlags();
- ExpressionChanged->clearSubclassOptionalData();
- ExpressionChanged->setFastMathFlags(Flags);
- } else
- ExpressionChanged->clearSubclassOptionalData();
-
- if (ExpressionChanged == I)
+ if (ClearFlags) {
+ if (isa<FPMathOperator>(I)) {
+ FastMathFlags Flags = I->getFastMathFlags();
+ ExpressionChangedStart->clearSubclassOptionalData();
+ ExpressionChangedStart->setFastMathFlags(Flags);
+ } else
+ ExpressionChangedStart->clearSubclassOptionalData();
+ }
+
+ if (ExpressionChangedStart == ExpressionChangedEnd)
+ ClearFlags = false;
+ if (ExpressionChangedStart == I)
break;
// Discard any debug info related to the expressions that has changed (we
- // can leave debug infor related to the root, since the result of the
- // expression tree should be the same even after reassociation).
- replaceDbgUsesWithUndef(ExpressionChanged);
-
- ExpressionChanged->moveBefore(I);
- ExpressionChanged = cast<BinaryOperator>(*ExpressionChanged->user_begin());
+ // can leave debug info related to the root and any operation that didn't
+ // change, since the result of the expression tree should be the same
+ // even after reassociation).
+ if (ClearFlags)
+ replaceDbgUsesWithUndef(ExpressionChangedStart);
+
+ ExpressionChangedStart->moveBefore(I);
+ ExpressionChangedStart =
+ cast<BinaryOperator>(*ExpressionChangedStart->user_begin());
} while (true);
+ }
// Throw away any left over nodes from the original expression.
for (unsigned i = 0, e = NodesToRewrite.size(); i != e; ++i)
@@ -1507,8 +1530,7 @@ Value *ReassociatePass::OptimizeXor(Instruction *I,
// Step 4: Reassemble the Ops
if (Changed) {
Ops.clear();
- for (unsigned int i = 0, e = Opnds.size(); i < e; i++) {
- XorOpnd &O = Opnds[i];
+ for (const XorOpnd &O : Opnds) {
if (O.isInvalid())
continue;
ValueEntry VE(getRank(O.getValue()), O.getValue());
@@ -1644,8 +1666,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,
// Add one to FactorOccurrences for each unique factor in this op.
SmallPtrSet<Value*, 8> Duplicates;
- for (unsigned i = 0, e = Factors.size(); i != e; ++i) {
- Value *Factor = Factors[i];
+ for (Value *Factor : Factors) {
if (!Duplicates.insert(Factor).second)
continue;
@@ -2048,7 +2069,7 @@ void ReassociatePass::EraseInst(Instruction *I) {
// blocks because it's a waste of time and also because it can
// lead to infinite loop due to LLVM's non-standard definition
// of dominance.
- if (ValueRankMap.find(Op) != ValueRankMap.end())
+ if (ValueRankMap.contains(Op))
RedoInsts.insert(Op);
}
@@ -2410,8 +2431,67 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
unsigned BestRank = 0;
std::pair<unsigned, unsigned> BestPair;
unsigned Idx = I->getOpcode() - Instruction::BinaryOpsBegin;
- for (unsigned i = 0; i < Ops.size() - 1; ++i)
- for (unsigned j = i + 1; j < Ops.size(); ++j) {
+ unsigned LimitIdx = 0;
+ // With the CSE-driven heuristic, we are about to slap two values at the
+ // beginning of the expression whereas they could live very late in the CFG.
+ // When using the CSE-local heuristic we avoid creating dependences from
+ // completely unrelated part of the CFG by limiting the expression
+ // reordering on the values that live in the first seen basic block.
+ // The main idea is that we want to avoid forming expressions that would
+ // become loop dependent.
+ if (UseCSELocalOpt) {
+ const BasicBlock *FirstSeenBB = nullptr;
+ int StartIdx = Ops.size() - 1;
+ // Skip the first value of the expression since we need at least two
+ // values to materialize an expression. I.e., even if this value is
+ // anchored in a different basic block, the actual first sub expression
+ // will be anchored on the second value.
+ for (int i = StartIdx - 1; i != -1; --i) {
+ const Value *Val = Ops[i].Op;
+ const auto *CurrLeafInstr = dyn_cast<Instruction>(Val);
+ const BasicBlock *SeenBB = nullptr;
+ if (!CurrLeafInstr) {
+ // The value is free of any CFG dependencies.
+ // Do as if it lives in the entry block.
+ //
+ // We do this to make sure all the values falling on this path are
+ // seen through the same anchor point. The rationale is these values
+ // can be combined together to from a sub expression free of any CFG
+ // dependencies so we want them to stay together.
+ // We could be cleverer and postpone the anchor down to the first
+ // anchored value, but that's likely complicated to get right.
+ // E.g., we wouldn't want to do that if that means being stuck in a
+ // loop.
+ //
+ // For instance, we wouldn't want to change:
+ // res = arg1 op arg2 op arg3 op ... op loop_val1 op loop_val2 ...
+ // into
+ // res = loop_val1 op arg1 op arg2 op arg3 op ... op loop_val2 ...
+ // Because all the sub expressions with arg2..N would be stuck between
+ // two loop dependent values.
+ SeenBB = &I->getParent()->getParent()->getEntryBlock();
+ } else {
+ SeenBB = CurrLeafInstr->getParent();
+ }
+
+ if (!FirstSeenBB) {
+ FirstSeenBB = SeenBB;
+ continue;
+ }
+ if (FirstSeenBB != SeenBB) {
+ // ith value is in a different basic block.
+ // Rewind the index once to point to the last value on the same basic
+ // block.
+ LimitIdx = i + 1;
+ LLVM_DEBUG(dbgs() << "CSE reordering: Consider values between ["
+ << LimitIdx << ", " << StartIdx << "]\n");
+ break;
+ }
+ }
+ }
+ for (unsigned i = Ops.size() - 1; i > LimitIdx; --i) {
+ // We must use int type to go below zero when LimitIdx is 0.
+ for (int j = i - 1; j >= (int)LimitIdx; --j) {
unsigned Score = 0;
Value *Op0 = Ops[i].Op;
Value *Op1 = Ops[j].Op;
@@ -2429,12 +2509,26 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
}
unsigned MaxRank = std::max(Ops[i].Rank, Ops[j].Rank);
+
+ // By construction, the operands are sorted in reverse order of their
+ // topological order.
+ // So we tend to form (sub) expressions with values that are close to
+ // each other.
+ //
+ // Now to expose more CSE opportunities we want to expose the pair of
+ // operands that occur the most (as statically computed in
+ // BuildPairMap.) as the first sub-expression.
+ //
+ // If two pairs occur as many times, we pick the one with the
+ // lowest rank, meaning the one with both operands appearing first in
+ // the topological order.
if (Score > Max || (Score == Max && MaxRank < BestRank)) {
- BestPair = {i, j};
+ BestPair = {j, i};
Max = Score;
BestRank = MaxRank;
}
}
+ }
if (Max > 1) {
auto Op0 = Ops[BestPair.first];
auto Op1 = Ops[BestPair.second];
@@ -2444,6 +2538,8 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
Ops.push_back(Op1);
}
}
+ LLVM_DEBUG(dbgs() << "RAOut after CSE reorder:\t"; PrintOps(I, Ops);
+ dbgs() << '\n');
// Now that we ordered and optimized the expressions, splat them back into
// the expression tree, removing any unneeded nodes.
RewriteExprTree(I, Ops);
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index bcb012b79c2e..908bda5709a0 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Argument.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CallingConv.h"
@@ -36,6 +37,7 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/GCStrategy.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
@@ -125,6 +127,9 @@ static cl::opt<bool> RematDerivedAtUses("rs4gc-remat-derived-at-uses",
/// constant physical memory: llvm.invariant.start.
static void stripNonValidData(Module &M);
+// Find the GC strategy for a function, or null if it doesn't have one.
+static std::unique_ptr<GCStrategy> findGCStrategy(Function &F);
+
static bool shouldRewriteStatepointsIn(Function &F);
PreservedAnalyses RewriteStatepointsForGC::run(Module &M,
@@ -162,76 +167,6 @@ PreservedAnalyses RewriteStatepointsForGC::run(Module &M,
namespace {
-class RewriteStatepointsForGCLegacyPass : public ModulePass {
- RewriteStatepointsForGC Impl;
-
-public:
- static char ID; // Pass identification, replacement for typeid
-
- RewriteStatepointsForGCLegacyPass() : ModulePass(ID), Impl() {
- initializeRewriteStatepointsForGCLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnModule(Module &M) override {
- bool Changed = false;
- for (Function &F : M) {
- // Nothing to do for declarations.
- if (F.isDeclaration() || F.empty())
- continue;
-
- // Policy choice says not to rewrite - the most common reason is that
- // we're compiling code without a GCStrategy.
- if (!shouldRewriteStatepointsIn(F))
- continue;
-
- TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- const TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- auto &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
-
- Changed |= Impl.runOnFunction(F, DT, TTI, TLI);
- }
-
- if (!Changed)
- return false;
-
- // stripNonValidData asserts that shouldRewriteStatepointsIn
- // returns true for at least one function in the module. Since at least
- // one function changed, we know that the precondition is satisfied.
- stripNonValidData(M);
- return true;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- // We add and rewrite a bunch of instructions, but don't really do much
- // else. We could in theory preserve a lot more analyses here.
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- }
-};
-
-} // end anonymous namespace
-
-char RewriteStatepointsForGCLegacyPass::ID = 0;
-
-ModulePass *llvm::createRewriteStatepointsForGCLegacyPass() {
- return new RewriteStatepointsForGCLegacyPass();
-}
-
-INITIALIZE_PASS_BEGIN(RewriteStatepointsForGCLegacyPass,
- "rewrite-statepoints-for-gc",
- "Make relocations explicit at statepoints", false, false)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(RewriteStatepointsForGCLegacyPass,
- "rewrite-statepoints-for-gc",
- "Make relocations explicit at statepoints", false, false)
-
-namespace {
-
struct GCPtrLivenessData {
/// Values defined in this block.
MapVector<BasicBlock *, SetVector<Value *>> KillSet;
@@ -311,37 +246,35 @@ static ArrayRef<Use> GetDeoptBundleOperands(const CallBase *Call) {
/// Compute the live-in set for every basic block in the function
static void computeLiveInValues(DominatorTree &DT, Function &F,
- GCPtrLivenessData &Data);
+ GCPtrLivenessData &Data, GCStrategy *GC);
/// Given results from the dataflow liveness computation, find the set of live
/// Values at a particular instruction.
static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
- StatepointLiveSetTy &out);
+ StatepointLiveSetTy &out, GCStrategy *GC);
-// TODO: Once we can get to the GCStrategy, this becomes
-// std::optional<bool> isGCManagedPointer(const Type *Ty) const override {
+static bool isGCPointerType(Type *T, GCStrategy *GC) {
+ assert(GC && "GC Strategy for isGCPointerType cannot be null");
-static bool isGCPointerType(Type *T) {
- if (auto *PT = dyn_cast<PointerType>(T))
- // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
- // GC managed heap. We know that a pointer into this heap needs to be
- // updated and that no other pointer does.
- return PT->getAddressSpace() == 1;
- return false;
+ if (!isa<PointerType>(T))
+ return false;
+
+ // conservative - same as StatepointLowering
+ return GC->isGCManagedPointer(T).value_or(true);
}
// Return true if this type is one which a) is a gc pointer or contains a GC
// pointer and b) is of a type this code expects to encounter as a live value.
// (The insertion code will assert that a type which matches (a) and not (b)
// is not encountered.)
-static bool isHandledGCPointerType(Type *T) {
+static bool isHandledGCPointerType(Type *T, GCStrategy *GC) {
// We fully support gc pointers
- if (isGCPointerType(T))
+ if (isGCPointerType(T, GC))
return true;
// We partially support vectors of gc pointers. The code will assert if it
// can't handle something.
if (auto VT = dyn_cast<VectorType>(T))
- if (isGCPointerType(VT->getElementType()))
+ if (isGCPointerType(VT->getElementType(), GC))
return true;
return false;
}
@@ -349,23 +282,24 @@ static bool isHandledGCPointerType(Type *T) {
#ifndef NDEBUG
/// Returns true if this type contains a gc pointer whether we know how to
/// handle that type or not.
-static bool containsGCPtrType(Type *Ty) {
- if (isGCPointerType(Ty))
+static bool containsGCPtrType(Type *Ty, GCStrategy *GC) {
+ if (isGCPointerType(Ty, GC))
return true;
if (VectorType *VT = dyn_cast<VectorType>(Ty))
- return isGCPointerType(VT->getScalarType());
+ return isGCPointerType(VT->getScalarType(), GC);
if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
- return containsGCPtrType(AT->getElementType());
+ return containsGCPtrType(AT->getElementType(), GC);
if (StructType *ST = dyn_cast<StructType>(Ty))
- return llvm::any_of(ST->elements(), containsGCPtrType);
+ return llvm::any_of(ST->elements(),
+ [GC](Type *Ty) { return containsGCPtrType(Ty, GC); });
return false;
}
// Returns true if this is a type which a) is a gc pointer or contains a GC
// pointer and b) is of a type which the code doesn't expect (i.e. first class
// aggregates). Used to trip assertions.
-static bool isUnhandledGCPointerType(Type *Ty) {
- return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);
+static bool isUnhandledGCPointerType(Type *Ty, GCStrategy *GC) {
+ return containsGCPtrType(Ty, GC) && !isHandledGCPointerType(Ty, GC);
}
#endif
@@ -382,9 +316,9 @@ static std::string suffixed_name_or(Value *V, StringRef Suffix,
// live. Values used by that instruction are considered live.
static void analyzeParsePointLiveness(
DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call,
- PartiallyConstructedSafepointRecord &Result) {
+ PartiallyConstructedSafepointRecord &Result, GCStrategy *GC) {
StatepointLiveSetTy LiveSet;
- findLiveSetAtInst(Call, OriginalLivenessData, LiveSet);
+ findLiveSetAtInst(Call, OriginalLivenessData, LiveSet, GC);
if (PrintLiveSet) {
dbgs() << "Live Variables:\n";
@@ -692,7 +626,7 @@ static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
/// Returns the base defining value for this value.
static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache,
IsKnownBaseMapTy &KnownBases) {
- if (Cache.find(I) == Cache.end()) {
+ if (!Cache.contains(I)) {
auto *BDV = findBaseDefiningValue(I, Cache, KnownBases);
Cache[I] = BDV;
LLVM_DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
@@ -700,7 +634,7 @@ static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache,
<< KnownBases[I] << "\n");
}
assert(Cache[I] != nullptr);
- assert(KnownBases.find(Cache[I]) != KnownBases.end() &&
+ assert(KnownBases.contains(Cache[I]) &&
"Cached value must be present in known bases map");
return Cache[I];
}
@@ -1289,9 +1223,9 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
if (!BdvSV->isZeroEltSplat())
UpdateOperand(1); // vector operand
else {
- // Never read, so just use undef
+ // Never read, so just use poison
Value *InVal = BdvSV->getOperand(1);
- BaseSV->setOperand(1, UndefValue::get(InVal->getType()));
+ BaseSV->setOperand(1, PoisonValue::get(InVal->getType()));
}
}
}
@@ -1385,20 +1319,21 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
CallBase *Call,
PartiallyConstructedSafepointRecord &result,
- PointerToBaseTy &PointerToBase);
+ PointerToBaseTy &PointerToBase,
+ GCStrategy *GC);
static void recomputeLiveInValues(
Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate,
MutableArrayRef<struct PartiallyConstructedSafepointRecord> records,
- PointerToBaseTy &PointerToBase) {
+ PointerToBaseTy &PointerToBase, GCStrategy *GC) {
// TODO-PERF: reuse the original liveness, then simply run the dataflow
// again. The old values are still live and will help it stabilize quickly.
GCPtrLivenessData RevisedLivenessData;
- computeLiveInValues(DT, F, RevisedLivenessData);
+ computeLiveInValues(DT, F, RevisedLivenessData, GC);
for (size_t i = 0; i < records.size(); i++) {
struct PartiallyConstructedSafepointRecord &info = records[i];
- recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info,
- PointerToBase);
+ recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info, PointerToBase,
+ GC);
}
}
@@ -1522,7 +1457,7 @@ static AttributeList legalizeCallAttributes(LLVMContext &Ctx,
static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
ArrayRef<Value *> BasePtrs,
Instruction *StatepointToken,
- IRBuilder<> &Builder) {
+ IRBuilder<> &Builder, GCStrategy *GC) {
if (LiveVariables.empty())
return;
@@ -1542,8 +1477,8 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
// towards a single unified pointer type anyways, we can just cast everything
// to an i8* of the right address space. A bitcast is added later to convert
// gc_relocate to the actual value's type.
- auto getGCRelocateDecl = [&] (Type *Ty) {
- assert(isHandledGCPointerType(Ty));
+ auto getGCRelocateDecl = [&](Type *Ty) {
+ assert(isHandledGCPointerType(Ty, GC));
auto AS = Ty->getScalarType()->getPointerAddressSpace();
Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS);
if (auto *VT = dyn_cast<VectorType>(Ty))
@@ -1668,7 +1603,8 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
const SmallVectorImpl<Value *> &LiveVariables,
PartiallyConstructedSafepointRecord &Result,
std::vector<DeferredReplacement> &Replacements,
- const PointerToBaseTy &PointerToBase) {
+ const PointerToBaseTy &PointerToBase,
+ GCStrategy *GC) {
assert(BasePtrs.size() == LiveVariables.size());
// Then go ahead and use the builder do actually do the inserts. We insert
@@ -1901,7 +1837,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
Result.UnwindToken = ExceptionalToken;
- CreateGCRelocates(LiveVariables, BasePtrs, ExceptionalToken, Builder);
+ CreateGCRelocates(LiveVariables, BasePtrs, ExceptionalToken, Builder, GC);
// Generate gc relocates and returns for normal block
BasicBlock *NormalDest = II->getNormalDest();
@@ -1947,7 +1883,7 @@ makeStatepointExplicitImpl(CallBase *Call, /* to replace */
Result.StatepointToken = Token;
// Second, create a gc.relocate for every live variable
- CreateGCRelocates(LiveVariables, BasePtrs, Token, Builder);
+ CreateGCRelocates(LiveVariables, BasePtrs, Token, Builder, GC);
}
// Replace an existing gc.statepoint with a new one and a set of gc.relocates
@@ -1959,7 +1895,7 @@ static void
makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
PartiallyConstructedSafepointRecord &Result,
std::vector<DeferredReplacement> &Replacements,
- const PointerToBaseTy &PointerToBase) {
+ const PointerToBaseTy &PointerToBase, GCStrategy *GC) {
const auto &LiveSet = Result.LiveSet;
// Convert to vector for efficient cross referencing.
@@ -1976,7 +1912,7 @@ makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
// Do the actual rewriting and delete the old statepoint
makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements,
- PointerToBase);
+ PointerToBase, GC);
}
// Helper function for the relocationViaAlloca.
@@ -2277,12 +2213,13 @@ static void insertUseHolderAfter(CallBase *Call, const ArrayRef<Value *> Values,
static void findLiveReferences(
Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate,
- MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
+ MutableArrayRef<struct PartiallyConstructedSafepointRecord> records,
+ GCStrategy *GC) {
GCPtrLivenessData OriginalLivenessData;
- computeLiveInValues(DT, F, OriginalLivenessData);
+ computeLiveInValues(DT, F, OriginalLivenessData, GC);
for (size_t i = 0; i < records.size(); i++) {
struct PartiallyConstructedSafepointRecord &info = records[i];
- analyzeParsePointLiveness(DT, OriginalLivenessData, toUpdate[i], info);
+ analyzeParsePointLiveness(DT, OriginalLivenessData, toUpdate[i], info, GC);
}
}
@@ -2684,6 +2621,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
SmallVectorImpl<CallBase *> &ToUpdate,
DefiningValueMapTy &DVCache,
IsKnownBaseMapTy &KnownBases) {
+ std::unique_ptr<GCStrategy> GC = findGCStrategy(F);
+
#ifndef NDEBUG
// Validate the input
std::set<CallBase *> Uniqued;
@@ -2718,9 +2657,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
SmallVector<Value *, 64> DeoptValues;
for (Value *Arg : GetDeoptBundleOperands(Call)) {
- assert(!isUnhandledGCPointerType(Arg->getType()) &&
+ assert(!isUnhandledGCPointerType(Arg->getType(), GC.get()) &&
"support for FCA unimplemented");
- if (isHandledGCPointerType(Arg->getType()))
+ if (isHandledGCPointerType(Arg->getType(), GC.get()))
DeoptValues.push_back(Arg);
}
@@ -2731,7 +2670,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// A) Identify all gc pointers which are statically live at the given call
// site.
- findLiveReferences(F, DT, ToUpdate, Records);
+ findLiveReferences(F, DT, ToUpdate, Records, GC.get());
/// Global mapping from live pointers to a base-defining-value.
PointerToBaseTy PointerToBase;
@@ -2782,7 +2721,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// By selecting base pointers, we've effectively inserted new uses. Thus, we
// need to rerun liveness. We may *also* have inserted new defs, but that's
// not the key issue.
- recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase);
+ recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase, GC.get());
if (PrintBasePointers) {
errs() << "Base Pairs: (w/Relocation)\n";
@@ -2842,7 +2781,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// the old statepoint calls as we go.)
for (size_t i = 0; i < Records.size(); i++)
makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements,
- PointerToBase);
+ PointerToBase, GC.get());
ToUpdate.clear(); // prevent accident use of invalid calls.
@@ -2866,9 +2805,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// Do all the fixups of the original live variables to their relocated selves
SmallVector<Value *, 128> Live;
- for (size_t i = 0; i < Records.size(); i++) {
- PartiallyConstructedSafepointRecord &Info = Records[i];
-
+ for (const PartiallyConstructedSafepointRecord &Info : Records) {
// We can't simply save the live set from the original insertion. One of
// the live values might be the result of a call which needs a safepoint.
// That Value* no longer exists and we need to use the new gc_result.
@@ -2899,7 +2836,7 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
#ifndef NDEBUG
// Validation check
for (auto *Ptr : Live)
- assert(isHandledGCPointerType(Ptr->getType()) &&
+ assert(isHandledGCPointerType(Ptr->getType(), GC.get()) &&
"must be a gc pointer type");
#endif
@@ -3019,25 +2956,33 @@ static void stripNonValidDataFromBody(Function &F) {
}
}
- // Delete the invariant.start instructions and RAUW undef.
+ // Delete the invariant.start instructions and RAUW poison.
for (auto *II : InvariantStartInstructions) {
- II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ II->replaceAllUsesWith(PoisonValue::get(II->getType()));
II->eraseFromParent();
}
}
+/// Looks up the GC strategy for a given function, returning null if the
+/// function doesn't have a GC tag. The strategy is stored in the cache.
+static std::unique_ptr<GCStrategy> findGCStrategy(Function &F) {
+ if (!F.hasGC())
+ return nullptr;
+
+ return getGCStrategy(F.getGC());
+}
+
/// Returns true if this function should be rewritten by this pass. The main
/// point of this function is as an extension point for custom logic.
static bool shouldRewriteStatepointsIn(Function &F) {
- // TODO: This should check the GCStrategy
- if (F.hasGC()) {
- const auto &FunctionGCName = F.getGC();
- const StringRef StatepointExampleName("statepoint-example");
- const StringRef CoreCLRName("coreclr");
- return (StatepointExampleName == FunctionGCName) ||
- (CoreCLRName == FunctionGCName);
- } else
+ if (!F.hasGC())
return false;
+
+ std::unique_ptr<GCStrategy> Strategy = findGCStrategy(F);
+
+ assert(Strategy && "GC strategy is required by function, but was not found");
+
+ return Strategy->useRS4GC();
}
static void stripNonValidData(Module &M) {
@@ -3216,7 +3161,7 @@ bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
/// the live-out set of the basic block
static void computeLiveInValues(BasicBlock::reverse_iterator Begin,
BasicBlock::reverse_iterator End,
- SetVector<Value *> &LiveTmp) {
+ SetVector<Value *> &LiveTmp, GCStrategy *GC) {
for (auto &I : make_range(Begin, End)) {
// KILL/Def - Remove this definition from LiveIn
LiveTmp.remove(&I);
@@ -3228,9 +3173,9 @@ static void computeLiveInValues(BasicBlock::reverse_iterator Begin,
// USE - Add to the LiveIn set for this instruction
for (Value *V : I.operands()) {
- assert(!isUnhandledGCPointerType(V->getType()) &&
+ assert(!isUnhandledGCPointerType(V->getType(), GC) &&
"support for FCA unimplemented");
- if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
+ if (isHandledGCPointerType(V->getType(), GC) && !isa<Constant>(V)) {
// The choice to exclude all things constant here is slightly subtle.
// There are two independent reasons:
// - We assume that things which are constant (from LLVM's definition)
@@ -3247,7 +3192,8 @@ static void computeLiveInValues(BasicBlock::reverse_iterator Begin,
}
}
-static void computeLiveOutSeed(BasicBlock *BB, SetVector<Value *> &LiveTmp) {
+static void computeLiveOutSeed(BasicBlock *BB, SetVector<Value *> &LiveTmp,
+ GCStrategy *GC) {
for (BasicBlock *Succ : successors(BB)) {
for (auto &I : *Succ) {
PHINode *PN = dyn_cast<PHINode>(&I);
@@ -3255,18 +3201,18 @@ static void computeLiveOutSeed(BasicBlock *BB, SetVector<Value *> &LiveTmp) {
break;
Value *V = PN->getIncomingValueForBlock(BB);
- assert(!isUnhandledGCPointerType(V->getType()) &&
+ assert(!isUnhandledGCPointerType(V->getType(), GC) &&
"support for FCA unimplemented");
- if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V))
+ if (isHandledGCPointerType(V->getType(), GC) && !isa<Constant>(V))
LiveTmp.insert(V);
}
}
}
-static SetVector<Value *> computeKillSet(BasicBlock *BB) {
+static SetVector<Value *> computeKillSet(BasicBlock *BB, GCStrategy *GC) {
SetVector<Value *> KillSet;
for (Instruction &I : *BB)
- if (isHandledGCPointerType(I.getType()))
+ if (isHandledGCPointerType(I.getType(), GC))
KillSet.insert(&I);
return KillSet;
}
@@ -3301,14 +3247,14 @@ static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
#endif
static void computeLiveInValues(DominatorTree &DT, Function &F,
- GCPtrLivenessData &Data) {
+ GCPtrLivenessData &Data, GCStrategy *GC) {
SmallSetVector<BasicBlock *, 32> Worklist;
// Seed the liveness for each individual block
for (BasicBlock &BB : F) {
- Data.KillSet[&BB] = computeKillSet(&BB);
+ Data.KillSet[&BB] = computeKillSet(&BB, GC);
Data.LiveSet[&BB].clear();
- computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]);
+ computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB], GC);
#ifndef NDEBUG
for (Value *Kill : Data.KillSet[&BB])
@@ -3316,7 +3262,7 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
#endif
Data.LiveOut[&BB] = SetVector<Value *>();
- computeLiveOutSeed(&BB, Data.LiveOut[&BB]);
+ computeLiveOutSeed(&BB, Data.LiveOut[&BB], GC);
Data.LiveIn[&BB] = Data.LiveSet[&BB];
Data.LiveIn[&BB].set_union(Data.LiveOut[&BB]);
Data.LiveIn[&BB].set_subtract(Data.KillSet[&BB]);
@@ -3368,7 +3314,7 @@ static void computeLiveInValues(DominatorTree &DT, Function &F,
}
static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
- StatepointLiveSetTy &Out) {
+ StatepointLiveSetTy &Out, GCStrategy *GC) {
BasicBlock *BB = Inst->getParent();
// Note: The copy is intentional and required
@@ -3379,8 +3325,8 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
// call result is not live (normal), nor are it's arguments
// (unless they're used again later). This adjustment is
// specifically what we need to relocate
- computeLiveInValues(BB->rbegin(), ++Inst->getIterator().getReverse(),
- LiveOut);
+ computeLiveInValues(BB->rbegin(), ++Inst->getIterator().getReverse(), LiveOut,
+ GC);
LiveOut.remove(Inst);
Out.insert(LiveOut.begin(), LiveOut.end());
}
@@ -3388,9 +3334,10 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
CallBase *Call,
PartiallyConstructedSafepointRecord &Info,
- PointerToBaseTy &PointerToBase) {
+ PointerToBaseTy &PointerToBase,
+ GCStrategy *GC) {
StatepointLiveSetTy Updated;
- findLiveSetAtInst(Call, RevisedLivenessData, Updated);
+ findLiveSetAtInst(Call, RevisedLivenessData, Updated, GC);
// We may have base pointers which are now live that weren't before. We need
// to update the PointerToBase structure to reflect this.
diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index 7b396c6ee074..fcdc503c54a4 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -41,7 +41,6 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
@@ -136,54 +135,3 @@ PreservedAnalyses SCCPPass::run(Function &F, FunctionAnalysisManager &AM) {
PA.preserve<DominatorTreeAnalysis>();
return PA;
}
-
-namespace {
-
-//===--------------------------------------------------------------------===//
-//
-/// SCCP Class - This class uses the SCCPSolver to implement a per-function
-/// Sparse Conditional Constant Propagator.
-///
-class SCCPLegacyPass : public FunctionPass {
-public:
- // Pass identification, replacement for typeid
- static char ID;
-
- SCCPLegacyPass() : FunctionPass(ID) {
- initializeSCCPLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- }
-
- // runOnFunction - Run the Sparse Conditional Constant Propagation
- // algorithm, and return true if the function was modified.
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
- const DataLayout &DL = F.getParent()->getDataLayout();
- const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- DomTreeUpdater DTU(DTWP ? &DTWP->getDomTree() : nullptr,
- DomTreeUpdater::UpdateStrategy::Lazy);
- return runSCCP(F, DL, TLI, DTU);
- }
-};
-
-} // end anonymous namespace
-
-char SCCPLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(SCCPLegacyPass, "sccp",
- "Sparse Conditional Constant Propagation", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(SCCPLegacyPass, "sccp",
- "Sparse Conditional Constant Propagation", false, false)
-
-// createSCCPPass - This is the public interface to this file.
-FunctionPass *llvm::createSCCPPass() { return new SCCPLegacyPass(); }
-
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 8339981e1bdc..983a75e1d708 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -118,13 +118,79 @@ STATISTIC(NumVectorized, "Number of vectorized aggregates");
/// GEPs.
static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
cl::Hidden);
+/// Disable running mem2reg during SROA in order to test or debug SROA.
+static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
+ cl::Hidden);
namespace {
+
+/// Calculate the fragment of a variable to use when slicing a store
+/// based on the slice dimensions, existing fragment, and base storage
+/// fragment.
+/// Results:
+/// UseFrag - Use Target as the new fragment.
+/// UseNoFrag - The new slice already covers the whole variable.
+/// Skip - The new alloca slice doesn't include this variable.
+/// FIXME: Can we use calculateFragmentIntersect instead?
+enum FragCalcResult { UseFrag, UseNoFrag, Skip };
+static FragCalcResult
+calculateFragment(DILocalVariable *Variable,
+ uint64_t NewStorageSliceOffsetInBits,
+ uint64_t NewStorageSliceSizeInBits,
+ std::optional<DIExpression::FragmentInfo> StorageFragment,
+ std::optional<DIExpression::FragmentInfo> CurrentFragment,
+ DIExpression::FragmentInfo &Target) {
+ // If the base storage describes part of the variable apply the offset and
+ // the size constraint.
+ if (StorageFragment) {
+ Target.SizeInBits =
+ std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
+ Target.OffsetInBits =
+ NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
+ } else {
+ Target.SizeInBits = NewStorageSliceSizeInBits;
+ Target.OffsetInBits = NewStorageSliceOffsetInBits;
+ }
+
+ // If this slice extracts the entirety of an independent variable from a
+ // larger alloca, do not produce a fragment expression, as the variable is
+ // not fragmented.
+ if (!CurrentFragment) {
+ if (auto Size = Variable->getSizeInBits()) {
+ // Treat the current fragment as covering the whole variable.
+ CurrentFragment = DIExpression::FragmentInfo(*Size, 0);
+ if (Target == CurrentFragment)
+ return UseNoFrag;
+ }
+ }
+
+ // No additional work to do if there isn't a fragment already, or there is
+ // but it already exactly describes the new assignment.
+ if (!CurrentFragment || *CurrentFragment == Target)
+ return UseFrag;
+
+ // Reject the target fragment if it doesn't fit wholly within the current
+ // fragment. TODO: We could instead chop up the target to fit in the case of
+ // a partial overlap.
+ if (Target.startInBits() < CurrentFragment->startInBits() ||
+ Target.endInBits() > CurrentFragment->endInBits())
+ return Skip;
+
+ // Target fits within the current fragment, return it.
+ return UseFrag;
+}
+
+static DebugVariable getAggregateVariable(DbgVariableIntrinsic *DVI) {
+ return DebugVariable(DVI->getVariable(), std::nullopt,
+ DVI->getDebugLoc().getInlinedAt());
+}
+
/// Find linked dbg.assign and generate a new one with the correct
/// FragmentInfo. Link Inst to the new dbg.assign. If Value is nullptr the
/// value component is copied from the old dbg.assign to the new.
/// \param OldAlloca Alloca for the variable before splitting.
-/// \param RelativeOffsetInBits Offset into \p OldAlloca relative to the
-/// offset prior to splitting (change in offset).
+/// \param IsSplit True if the store (not necessarily alloca)
+/// is being split.
+/// \param OldAllocaOffsetInBits Offset of the slice taken from OldAlloca.
/// \param SliceSizeInBits New number of bits being written to.
/// \param OldInst Instruction that is being split.
/// \param Inst New instruction performing this part of the
@@ -132,8 +198,8 @@ namespace {
/// \param Dest Store destination.
/// \param Value Stored value.
/// \param DL Datalayout.
-static void migrateDebugInfo(AllocaInst *OldAlloca,
- uint64_t RelativeOffsetInBits,
+static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
+ uint64_t OldAllocaOffsetInBits,
uint64_t SliceSizeInBits, Instruction *OldInst,
Instruction *Inst, Value *Dest, Value *Value,
const DataLayout &DL) {
@@ -144,7 +210,9 @@ static void migrateDebugInfo(AllocaInst *OldAlloca,
LLVM_DEBUG(dbgs() << " migrateDebugInfo\n");
LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n");
- LLVM_DEBUG(dbgs() << " RelativeOffset: " << RelativeOffsetInBits << "\n");
+ LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n");
+ LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
+ << "\n");
LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n");
LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n");
LLVM_DEBUG(dbgs() << " Inst: " << *Inst << "\n");
@@ -152,44 +220,66 @@ static void migrateDebugInfo(AllocaInst *OldAlloca,
if (Value)
LLVM_DEBUG(dbgs() << " Value: " << *Value << "\n");
+ /// Map of aggregate variables to their fragment associated with OldAlloca.
+ DenseMap<DebugVariable, std::optional<DIExpression::FragmentInfo>>
+ BaseFragments;
+ for (auto *DAI : at::getAssignmentMarkers(OldAlloca))
+ BaseFragments[getAggregateVariable(DAI)] =
+ DAI->getExpression()->getFragmentInfo();
+
// The new inst needs a DIAssignID unique metadata tag (if OldInst has
// one). It shouldn't already have one: assert this assumption.
assert(!Inst->getMetadata(LLVMContext::MD_DIAssignID));
DIAssignID *NewID = nullptr;
auto &Ctx = Inst->getContext();
DIBuilder DIB(*OldInst->getModule(), /*AllowUnresolved*/ false);
- uint64_t AllocaSizeInBits = *OldAlloca->getAllocationSizeInBits(DL);
assert(OldAlloca->isStaticAlloca());
for (DbgAssignIntrinsic *DbgAssign : MarkerRange) {
LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign
<< "\n");
auto *Expr = DbgAssign->getExpression();
+ bool SetKillLocation = false;
- // Check if the dbg.assign already describes a fragment.
- auto GetCurrentFragSize = [AllocaSizeInBits, DbgAssign,
- Expr]() -> uint64_t {
- if (auto FI = Expr->getFragmentInfo())
- return FI->SizeInBits;
- if (auto VarSize = DbgAssign->getVariable()->getSizeInBits())
- return *VarSize;
- // The variable type has an unspecified size. This can happen in the
- // case of DW_TAG_unspecified_type types, e.g. std::nullptr_t. Because
- // there is no fragment and we do not know the size of the variable type,
- // we'll guess by looking at the alloca.
- return AllocaSizeInBits;
- };
- uint64_t CurrentFragSize = GetCurrentFragSize();
- bool MakeNewFragment = CurrentFragSize != SliceSizeInBits;
- assert(MakeNewFragment || RelativeOffsetInBits == 0);
-
- assert(SliceSizeInBits <= AllocaSizeInBits);
- if (MakeNewFragment) {
- assert(RelativeOffsetInBits + SliceSizeInBits <= CurrentFragSize);
- auto E = DIExpression::createFragmentExpression(
- Expr, RelativeOffsetInBits, SliceSizeInBits);
- assert(E && "Failed to create fragment expr!");
- Expr = *E;
+ if (IsSplit) {
+ std::optional<DIExpression::FragmentInfo> BaseFragment;
+ {
+ auto R = BaseFragments.find(getAggregateVariable(DbgAssign));
+ if (R == BaseFragments.end())
+ continue;
+ BaseFragment = R->second;
+ }
+ std::optional<DIExpression::FragmentInfo> CurrentFragment =
+ Expr->getFragmentInfo();
+ DIExpression::FragmentInfo NewFragment;
+ FragCalcResult Result = calculateFragment(
+ DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
+ BaseFragment, CurrentFragment, NewFragment);
+
+ if (Result == Skip)
+ continue;
+ if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
+ if (CurrentFragment) {
+ // Rewrite NewFragment to be relative to the existing one (this is
+ // what createFragmentExpression wants). CalculateFragment has
+ // already resolved the size for us. FIXME: Should it return the
+ // relative fragment too?
+ NewFragment.OffsetInBits -= CurrentFragment->OffsetInBits;
+ }
+ // Add the new fragment info to the existing expression if possible.
+ if (auto E = DIExpression::createFragmentExpression(
+ Expr, NewFragment.OffsetInBits, NewFragment.SizeInBits)) {
+ Expr = *E;
+ } else {
+ // Otherwise, add the new fragment info to an empty expression and
+ // discard the value component of this dbg.assign as the value cannot
+ // be computed with the new fragment.
+ Expr = *DIExpression::createFragmentExpression(
+ DIExpression::get(Expr->getContext(), std::nullopt),
+ NewFragment.OffsetInBits, NewFragment.SizeInBits);
+ SetKillLocation = true;
+ }
+ }
}
// If we haven't created a DIAssignID ID do that now and attach it to Inst.
@@ -198,11 +288,27 @@ static void migrateDebugInfo(AllocaInst *OldAlloca,
Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);
}
- Value = Value ? Value : DbgAssign->getValue();
+ ::Value *NewValue = Value ? Value : DbgAssign->getValue();
auto *NewAssign = DIB.insertDbgAssign(
- Inst, Value, DbgAssign->getVariable(), Expr, Dest,
+ Inst, NewValue, DbgAssign->getVariable(), Expr, Dest,
DIExpression::get(Ctx, std::nullopt), DbgAssign->getDebugLoc());
+ // If we've updated the value but the original dbg.assign has an arglist
+ // then kill it now - we can't use the requested new value.
+ // We can't replace the DIArgList with the new value as it'd leave
+ // the DIExpression in an invalid state (DW_OP_LLVM_arg operands without
+ // an arglist). And we can't keep the DIArgList in case the linked store
+ // is being split - in which case the DIArgList + expression may no longer
+ // be computing the correct value.
+ // This should be a very rare situation as it requires the value being
+ // stored to differ from the dbg.assign (i.e., the value has been
+ // represented differently in the debug intrinsic for some reason).
+ SetKillLocation |=
+ Value && (DbgAssign->hasArgList() ||
+ !DbgAssign->getExpression()->isSingleLocationExpression());
+ if (SetKillLocation)
+ NewAssign->setKillLocation();
+
// We could use more precision here at the cost of some additional (code)
// complexity - if the original dbg.assign was adjacent to its store, we
// could position this new dbg.assign adjacent to its store rather than the
@@ -888,11 +994,12 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&LI);
- if (isa<ScalableVectorType>(LI.getType()))
+ TypeSize Size = DL.getTypeStoreSize(LI.getType());
+ if (Size.isScalable())
return PI.setAborted(&LI);
- uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedValue();
- return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
+ return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(),
+ LI.isVolatile());
}
void visitStoreInst(StoreInst &SI) {
@@ -902,10 +1009,11 @@ private:
if (!IsOffsetKnown)
return PI.setAborted(&SI);
- if (isa<ScalableVectorType>(ValOp->getType()))
+ TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType());
+ if (StoreSize.isScalable())
return PI.setAborted(&SI);
- uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedValue();
+ uint64_t Size = StoreSize.getFixedValue();
// If this memory access can be shown to *statically* extend outside the
// bounds of the allocation, it's behavior is undefined, so simply
@@ -1520,12 +1628,6 @@ static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI,
IRB.SetInsertPoint(&LI);
- if (auto *TypedPtrTy = LI.getPointerOperandType();
- !TypedPtrTy->isOpaquePointerTy() && SI.getType() != TypedPtrTy) {
- TV = IRB.CreateBitOrPointerCast(TV, TypedPtrTy, "");
- FV = IRB.CreateBitOrPointerCast(FV, TypedPtrTy, "");
- }
-
LoadInst *TL =
IRB.CreateAlignedLoad(LI.getType(), TV, LI.getAlign(),
LI.getName() + ".sroa.speculate.load.true");
@@ -1581,22 +1683,19 @@ static void rewriteMemOpOfSelect(SelectInst &SI, T &I,
bool IsThen = SuccBB == HeadBI->getSuccessor(0);
int SuccIdx = IsThen ? 0 : 1;
auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;
+ auto &CondMemOp = cast<T>(*I.clone());
if (NewMemOpBB != Head) {
NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));
if (isa<LoadInst>(I))
++NumLoadsPredicated;
else
++NumStoresPredicated;
- } else
+ } else {
+ CondMemOp.dropUBImplyingAttrsAndMetadata();
++NumLoadsSpeculated;
- auto &CondMemOp = cast<T>(*I.clone());
+ }
CondMemOp.insertBefore(NewMemOpBB->getTerminator());
Value *Ptr = SI.getOperand(1 + SuccIdx);
- if (auto *PtrTy = Ptr->getType();
- !PtrTy->isOpaquePointerTy() &&
- PtrTy != CondMemOp.getPointerOperandType())
- Ptr = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
- Ptr, CondMemOp.getPointerOperandType(), "", &CondMemOp);
CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);
if (isa<LoadInst>(I)) {
CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");
@@ -1654,238 +1753,16 @@ static bool rewriteSelectInstMemOps(SelectInst &SI,
return CFGChanged;
}
-/// Build a GEP out of a base pointer and indices.
-///
-/// This will return the BasePtr if that is valid, or build a new GEP
-/// instruction using the IRBuilder if GEP-ing is needed.
-static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
- SmallVectorImpl<Value *> &Indices,
- const Twine &NamePrefix) {
- if (Indices.empty())
- return BasePtr;
-
- // A single zero index is a no-op, so check for this and avoid building a GEP
- // in that case.
- if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
- return BasePtr;
-
- // buildGEP() is only called for non-opaque pointers.
- return IRB.CreateInBoundsGEP(
- BasePtr->getType()->getNonOpaquePointerElementType(), BasePtr, Indices,
- NamePrefix + "sroa_idx");
-}
-
-/// Get a natural GEP off of the BasePtr walking through Ty toward
-/// TargetTy without changing the offset of the pointer.
-///
-/// This routine assumes we've already established a properly offset GEP with
-/// Indices, and arrived at the Ty type. The goal is to continue to GEP with
-/// zero-indices down through type layers until we find one the same as
-/// TargetTy. If we can't find one with the same type, we at least try to use
-/// one with the same size. If none of that works, we just produce the GEP as
-/// indicated by Indices to have the correct offset.
-static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
- Value *BasePtr, Type *Ty, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &NamePrefix) {
- if (Ty == TargetTy)
- return buildGEP(IRB, BasePtr, Indices, NamePrefix);
-
- // Offset size to use for the indices.
- unsigned OffsetSize = DL.getIndexTypeSizeInBits(BasePtr->getType());
-
- // See if we can descend into a struct and locate a field with the correct
- // type.
- unsigned NumLayers = 0;
- Type *ElementTy = Ty;
- do {
- if (ElementTy->isPointerTy())
- break;
-
- if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
- ElementTy = ArrayTy->getElementType();
- Indices.push_back(IRB.getIntN(OffsetSize, 0));
- } else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
- ElementTy = VectorTy->getElementType();
- Indices.push_back(IRB.getInt32(0));
- } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
- if (STy->element_begin() == STy->element_end())
- break; // Nothing left to descend into.
- ElementTy = *STy->element_begin();
- Indices.push_back(IRB.getInt32(0));
- } else {
- break;
- }
- ++NumLayers;
- } while (ElementTy != TargetTy);
- if (ElementTy != TargetTy)
- Indices.erase(Indices.end() - NumLayers, Indices.end());
-
- return buildGEP(IRB, BasePtr, Indices, NamePrefix);
-}
-
-/// Get a natural GEP from a base pointer to a particular offset and
-/// resulting in a particular type.
-///
-/// The goal is to produce a "natural" looking GEP that works with the existing
-/// composite types to arrive at the appropriate offset and element type for
-/// a pointer. TargetTy is the element type the returned GEP should point-to if
-/// possible. We recurse by decreasing Offset, adding the appropriate index to
-/// Indices, and setting Ty to the result subtype.
-///
-/// If no natural GEP can be constructed, this function returns null.
-static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
- Value *Ptr, APInt Offset, Type *TargetTy,
- SmallVectorImpl<Value *> &Indices,
- const Twine &NamePrefix) {
- PointerType *Ty = cast<PointerType>(Ptr->getType());
-
- // Don't consider any GEPs through an i8* as natural unless the TargetTy is
- // an i8.
- if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
- return nullptr;
-
- Type *ElementTy = Ty->getNonOpaquePointerElementType();
- if (!ElementTy->isSized())
- return nullptr; // We can't GEP through an unsized element.
-
- SmallVector<APInt> IntIndices = DL.getGEPIndicesForOffset(ElementTy, Offset);
- if (Offset != 0)
- return nullptr;
-
- for (const APInt &Index : IntIndices)
- Indices.push_back(IRB.getInt(Index));
- return getNaturalGEPWithType(IRB, DL, Ptr, ElementTy, TargetTy, Indices,
- NamePrefix);
-}
-
/// Compute an adjusted pointer from Ptr by Offset bytes where the
/// resulting pointer has PointerTy.
-///
-/// This tries very hard to compute a "natural" GEP which arrives at the offset
-/// and produces the pointer type desired. Where it cannot, it will try to use
-/// the natural GEP to arrive at the offset and bitcast to the type. Where that
-/// fails, it will try to use an existing i8* and GEP to the byte offset and
-/// bitcast to the type.
-///
-/// The strategy for finding the more natural GEPs is to peel off layers of the
-/// pointer, walking back through bit casts and GEPs, searching for a base
-/// pointer from which we can compute a natural GEP with the desired
-/// properties. The algorithm tries to fold as many constant indices into
-/// a single GEP as possible, thus making each GEP more independent of the
-/// surrounding code.
static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
APInt Offset, Type *PointerTy,
const Twine &NamePrefix) {
- // Create i8 GEP for opaque pointers.
- if (Ptr->getType()->isOpaquePointerTy()) {
- if (Offset != 0)
- Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(Offset),
- NamePrefix + "sroa_idx");
- return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
- NamePrefix + "sroa_cast");
- }
-
- // Even though we don't look through PHI nodes, we could be called on an
- // instruction in an unreachable block, which may be on a cycle.
- SmallPtrSet<Value *, 4> Visited;
- Visited.insert(Ptr);
- SmallVector<Value *, 4> Indices;
-
- // We may end up computing an offset pointer that has the wrong type. If we
- // never are able to compute one directly that has the correct type, we'll
- // fall back to it, so keep it and the base it was computed from around here.
- Value *OffsetPtr = nullptr;
- Value *OffsetBasePtr;
-
- // Remember any i8 pointer we come across to re-use if we need to do a raw
- // byte offset.
- Value *Int8Ptr = nullptr;
- APInt Int8PtrOffset(Offset.getBitWidth(), 0);
-
- PointerType *TargetPtrTy = cast<PointerType>(PointerTy);
- Type *TargetTy = TargetPtrTy->getNonOpaquePointerElementType();
-
- // As `addrspacecast` is , `Ptr` (the storage pointer) may have different
- // address space from the expected `PointerTy` (the pointer to be used).
- // Adjust the pointer type based the original storage pointer.
- auto AS = cast<PointerType>(Ptr->getType())->getAddressSpace();
- PointerTy = TargetTy->getPointerTo(AS);
-
- do {
- // First fold any existing GEPs into the offset.
- while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
- APInt GEPOffset(Offset.getBitWidth(), 0);
- if (!GEP->accumulateConstantOffset(DL, GEPOffset))
- break;
- Offset += GEPOffset;
- Ptr = GEP->getPointerOperand();
- if (!Visited.insert(Ptr).second)
- break;
- }
-
- // See if we can perform a natural GEP here.
- Indices.clear();
- if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
- Indices, NamePrefix)) {
- // If we have a new natural pointer at the offset, clear out any old
- // offset pointer we computed. Unless it is the base pointer or
- // a non-instruction, we built a GEP we don't need. Zap it.
- if (OffsetPtr && OffsetPtr != OffsetBasePtr)
- if (Instruction *I = dyn_cast<Instruction>(OffsetPtr)) {
- assert(I->use_empty() && "Built a GEP with uses some how!");
- I->eraseFromParent();
- }
- OffsetPtr = P;
- OffsetBasePtr = Ptr;
- // If we also found a pointer of the right type, we're done.
- if (P->getType() == PointerTy)
- break;
- }
-
- // Stash this pointer if we've found an i8*.
- if (Ptr->getType()->isIntegerTy(8)) {
- Int8Ptr = Ptr;
- Int8PtrOffset = Offset;
- }
-
- // Peel off a layer of the pointer and update the offset appropriately.
- if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
- Ptr = cast<Operator>(Ptr)->getOperand(0);
- } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
- if (GA->isInterposable())
- break;
- Ptr = GA->getAliasee();
- } else {
- break;
- }
- assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
- } while (Visited.insert(Ptr).second);
-
- if (!OffsetPtr) {
- if (!Int8Ptr) {
- Int8Ptr = IRB.CreateBitCast(
- Ptr, IRB.getInt8PtrTy(PointerTy->getPointerAddressSpace()),
- NamePrefix + "sroa_raw_cast");
- Int8PtrOffset = Offset;
- }
-
- OffsetPtr = Int8PtrOffset == 0
- ? Int8Ptr
- : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
- IRB.getInt(Int8PtrOffset),
- NamePrefix + "sroa_raw_idx");
- }
- Ptr = OffsetPtr;
-
- // On the off chance we were targeting i8*, guard the bitcast here.
- if (cast<PointerType>(Ptr->getType()) != TargetPtrTy) {
- Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
- TargetPtrTy,
- NamePrefix + "sroa_cast");
- }
-
- return Ptr;
+ if (Offset != 0)
+ Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Ptr, IRB.getInt(Offset),
+ NamePrefix + "sroa_idx");
+ return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
+ NamePrefix + "sroa_cast");
}
/// Compute the adjusted alignment for a load or store from an offset.
@@ -2126,6 +2003,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// Collect the candidate types for vector-based promotion. Also track whether
// we have different element types.
SmallVector<VectorType *, 4> CandidateTys;
+ SetVector<Type *> LoadStoreTys;
Type *CommonEltTy = nullptr;
VectorType *CommonVecPtrTy = nullptr;
bool HaveVecPtrTy = false;
@@ -2159,15 +2037,40 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
}
}
};
- // Consider any loads or stores that are the exact size of the slice.
- for (const Slice &S : P)
- if (S.beginOffset() == P.beginOffset() &&
- S.endOffset() == P.endOffset()) {
- if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
- CheckCandidateType(LI->getType());
- else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
- CheckCandidateType(SI->getValueOperand()->getType());
+ // Put load and store types into a set for de-duplication.
+ for (const Slice &S : P) {
+ Type *Ty;
+ if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
+ Ty = LI->getType();
+ else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
+ Ty = SI->getValueOperand()->getType();
+ else
+ continue;
+ LoadStoreTys.insert(Ty);
+ // Consider any loads or stores that are the exact size of the slice.
+ if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
+ CheckCandidateType(Ty);
+ }
+ // Consider additional vector types where the element type size is a
+ // multiple of load/store element size.
+ for (Type *Ty : LoadStoreTys) {
+ if (!VectorType::isValidElementType(Ty))
+ continue;
+ unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
+ // Make a copy of CandidateTys and iterate through it, because we might
+ // append to CandidateTys in the loop.
+ SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
+ for (VectorType *&VTy : CandidateTysCopy) {
+ unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
+ unsigned ElementSize =
+ DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
+ if (TypeSize != VectorSize && TypeSize != ElementSize &&
+ VectorSize % TypeSize == 0) {
+ VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
+ CheckCandidateType(NewVTy);
+ }
}
+ }
// If we didn't find a vector type, nothing to do here.
if (CandidateTys.empty())
@@ -2195,7 +2098,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
// Rank the remaining candidate vector types. This is easy because we know
// they're all integer vectors. We sort by ascending number of elements.
- auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
+ auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
(void)DL;
assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
@@ -2207,10 +2110,22 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
return cast<FixedVectorType>(RHSTy)->getNumElements() <
cast<FixedVectorType>(LHSTy)->getNumElements();
};
- llvm::sort(CandidateTys, RankVectorTypes);
- CandidateTys.erase(
- std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
- CandidateTys.end());
+ auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
+ (void)DL;
+ assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
+ DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
+ "Cannot have vector types of different sizes!");
+ assert(RHSTy->getElementType()->isIntegerTy() &&
+ "All non-integer types eliminated!");
+ assert(LHSTy->getElementType()->isIntegerTy() &&
+ "All non-integer types eliminated!");
+ return cast<FixedVectorType>(RHSTy)->getNumElements() ==
+ cast<FixedVectorType>(LHSTy)->getNumElements();
+ };
+ llvm::sort(CandidateTys, RankVectorTypesComp);
+ CandidateTys.erase(std::unique(CandidateTys.begin(), CandidateTys.end(),
+ RankVectorTypesEq),
+ CandidateTys.end());
} else {
// The only way to have the same element type in every vector type is to
// have the same vector type. Check that and remove all but one.
@@ -2554,7 +2469,6 @@ class llvm::sroa::AllocaSliceRewriter
// original alloca.
uint64_t NewBeginOffset = 0, NewEndOffset = 0;
- uint64_t RelativeOffset = 0;
uint64_t SliceSize = 0;
bool IsSplittable = false;
bool IsSplit = false;
@@ -2628,14 +2542,13 @@ public:
NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
- RelativeOffset = NewBeginOffset - BeginOffset;
SliceSize = NewEndOffset - NewBeginOffset;
LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset
<< ") NewBegin:(" << NewBeginOffset << ", "
<< NewEndOffset << ") NewAllocaBegin:("
<< NewAllocaBeginOffset << ", " << NewAllocaEndOffset
<< ")\n");
- assert(IsSplit || RelativeOffset == 0);
+ assert(IsSplit || NewBeginOffset == BeginOffset);
OldUse = I->getUse();
OldPtr = cast<Instruction>(OldUse->get());
@@ -2898,8 +2811,8 @@ private:
Pass.DeadInsts.push_back(&SI);
// NOTE: Careful to use OrigV rather than V.
- migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &SI, Store,
- Store->getPointerOperand(), OrigV, DL);
+ migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
+ Store, Store->getPointerOperand(), OrigV, DL);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return true;
}
@@ -2923,8 +2836,9 @@ private:
if (AATags)
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
- migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &SI, Store,
- Store->getPointerOperand(), Store->getValueOperand(), DL);
+ migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
+ Store, Store->getPointerOperand(),
+ Store->getValueOperand(), DL);
Pass.DeadInsts.push_back(&SI);
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
@@ -3002,8 +2916,9 @@ private:
if (NewSI->isAtomic())
NewSI->setAlignment(SI.getAlign());
- migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &SI, NewSI,
- NewSI->getPointerOperand(), NewSI->getValueOperand(), DL);
+ migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
+ NewSI, NewSI->getPointerOperand(),
+ NewSI->getValueOperand(), DL);
Pass.DeadInsts.push_back(&SI);
deleteIfTriviallyDead(OldOp);
@@ -3103,8 +3018,8 @@ private:
if (AATags)
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
- migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &II, New,
- New->getRawDest(), nullptr, DL);
+ migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
+ New, New->getRawDest(), nullptr, DL);
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return false;
@@ -3179,8 +3094,8 @@ private:
if (AATags)
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
- migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &II, New,
- New->getPointerOperand(), V, DL);
+ migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
+ New, New->getPointerOperand(), V, DL);
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return !II.isVolatile();
@@ -3308,8 +3223,16 @@ private:
if (AATags)
New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
- migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &II, New,
- DestPtr, nullptr, DL);
+ APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0);
+ if (IsDest) {
+ migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,
+ &II, New, DestPtr, nullptr, DL);
+ } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
+ DestPtr->stripAndAccumulateConstantOffsets(
+ DL, Offset, /*AllowNonInbounds*/ true))) {
+ migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8,
+ SliceSize * 8, &II, New, DestPtr, nullptr, DL);
+ }
LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
return false;
}
@@ -3397,8 +3320,18 @@ private:
if (AATags)
Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
- migrateDebugInfo(&OldAI, RelativeOffset * 8, SliceSize * 8, &II, Store,
- DstPtr, Src, DL);
+ APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
+ if (IsDest) {
+
+ migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
+ Store, DstPtr, Src, DL);
+ } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
+ DstPtr->stripAndAccumulateConstantOffsets(
+ DL, Offset, /*AllowNonInbounds*/ true))) {
+ migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8, SliceSize * 8,
+ &II, Store, DstPtr, Src, DL);
+ }
+
LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
return !II.isVolatile();
}
@@ -3760,23 +3693,22 @@ private:
APInt Offset(
DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
- if (AATags &&
- GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
+ GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset);
+ if (AATags)
Store->setAAMetadata(AATags.shift(Offset.getZExtValue()));
// migrateDebugInfo requires the base Alloca. Walk to it from this gep.
// If we cannot (because there's an intervening non-const or unbounded
// gep) then we wouldn't expect to see dbg.assign intrinsics linked to
// this instruction.
- APInt OffsetInBytes(DL.getTypeSizeInBits(Ptr->getType()), false);
- Value *Base = InBoundsGEP->stripAndAccumulateInBoundsConstantOffsets(
- DL, OffsetInBytes);
+ Value *Base = AggStore->getPointerOperand()->stripInBoundsOffsets();
if (auto *OldAI = dyn_cast<AllocaInst>(Base)) {
uint64_t SizeInBits =
DL.getTypeSizeInBits(Store->getValueOperand()->getType());
- migrateDebugInfo(OldAI, OffsetInBytes.getZExtValue() * 8, SizeInBits,
- AggStore, Store, Store->getPointerOperand(),
- Store->getValueOperand(), DL);
+ migrateDebugInfo(OldAI, /*IsSplit*/ true, Offset.getZExtValue() * 8,
+ SizeInBits, AggStore, Store,
+ Store->getPointerOperand(), Store->getValueOperand(),
+ DL);
} else {
assert(at::getAssignmentMarkers(Store).empty() &&
"AT: unexpected debug.assign linked to store through "
@@ -3799,6 +3731,9 @@ private:
getAdjustedAlignment(&SI, 0), DL, IRB);
Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
Visited.erase(&SI);
+ // The stores replacing SI each have markers describing fragments of the
+ // assignment so delete the assignment markers linked to SI.
+ at::deleteAssignmentMarkers(&SI);
SI.eraseFromParent();
return true;
}
@@ -4029,6 +3964,10 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
return nullptr;
const StructLayout *SL = DL.getStructLayout(STy);
+
+ if (SL->getSizeInBits().isScalable())
+ return nullptr;
+
if (Offset >= SL->getSizeInBytes())
return nullptr;
uint64_t EndOffset = Offset + Size;
@@ -4869,11 +4808,13 @@ bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// Migrate debug information from the old alloca to the new alloca(s)
// and the individual partitions.
- TinyPtrVector<DbgVariableIntrinsic *> DbgDeclares = FindDbgAddrUses(&AI);
+ TinyPtrVector<DbgVariableIntrinsic *> DbgVariables;
+ for (auto *DbgDeclare : FindDbgDeclareUses(&AI))
+ DbgVariables.push_back(DbgDeclare);
for (auto *DbgAssign : at::getAssignmentMarkers(&AI))
- DbgDeclares.push_back(DbgAssign);
- for (DbgVariableIntrinsic *DbgDeclare : DbgDeclares) {
- auto *Expr = DbgDeclare->getExpression();
+ DbgVariables.push_back(DbgAssign);
+ for (DbgVariableIntrinsic *DbgVariable : DbgVariables) {
+ auto *Expr = DbgVariable->getExpression();
DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
uint64_t AllocaSize =
DL.getTypeSizeInBits(AI.getAllocatedType()).getFixedValue();
@@ -4905,7 +4846,7 @@ bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
}
// The alloca may be larger than the variable.
- auto VarSize = DbgDeclare->getVariable()->getSizeInBits();
+ auto VarSize = DbgVariable->getVariable()->getSizeInBits();
if (VarSize) {
if (Size > *VarSize)
Size = *VarSize;
@@ -4925,18 +4866,18 @@ bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
// Remove any existing intrinsics on the new alloca describing
// the variable fragment.
- for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(Fragment.Alloca)) {
+ for (DbgDeclareInst *OldDII : FindDbgDeclareUses(Fragment.Alloca)) {
auto SameVariableFragment = [](const DbgVariableIntrinsic *LHS,
const DbgVariableIntrinsic *RHS) {
return LHS->getVariable() == RHS->getVariable() &&
LHS->getDebugLoc()->getInlinedAt() ==
RHS->getDebugLoc()->getInlinedAt();
};
- if (SameVariableFragment(OldDII, DbgDeclare))
+ if (SameVariableFragment(OldDII, DbgVariable))
OldDII->eraseFromParent();
}
- if (auto *DbgAssign = dyn_cast<DbgAssignIntrinsic>(DbgDeclare)) {
+ if (auto *DbgAssign = dyn_cast<DbgAssignIntrinsic>(DbgVariable)) {
if (!Fragment.Alloca->hasMetadata(LLVMContext::MD_DIAssignID)) {
Fragment.Alloca->setMetadata(
LLVMContext::MD_DIAssignID,
@@ -4950,8 +4891,8 @@ bool SROAPass::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
LLVM_DEBUG(dbgs() << "Created new assign intrinsic: " << *NewAssign
<< "\n");
} else {
- DIB.insertDeclare(Fragment.Alloca, DbgDeclare->getVariable(),
- FragmentExpr, DbgDeclare->getDebugLoc(), &AI);
+ DIB.insertDeclare(Fragment.Alloca, DbgVariable->getVariable(),
+ FragmentExpr, DbgVariable->getDebugLoc(), &AI);
}
}
}
@@ -4996,8 +4937,9 @@ SROAPass::runOnAlloca(AllocaInst &AI) {
// Skip alloca forms that this analysis can't handle.
auto *AT = AI.getAllocatedType();
- if (AI.isArrayAllocation() || !AT->isSized() || isa<ScalableVectorType>(AT) ||
- DL.getTypeAllocSize(AT).getFixedValue() == 0)
+ TypeSize Size = DL.getTypeAllocSize(AT);
+ if (AI.isArrayAllocation() || !AT->isSized() || Size.isScalable() ||
+ Size.getFixedValue() == 0)
return {Changed, CFGChanged};
// First, split any FCA loads and stores touching this alloca to promote
@@ -5074,7 +5016,7 @@ bool SROAPass::deleteDeadInstructions(
// not be able to find it.
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
DeletedAllocas.insert(AI);
- for (DbgVariableIntrinsic *OldDII : FindDbgAddrUses(AI))
+ for (DbgDeclareInst *OldDII : FindDbgDeclareUses(AI))
OldDII->eraseFromParent();
}
@@ -5107,8 +5049,13 @@ bool SROAPass::promoteAllocas(Function &F) {
NumPromoted += PromotableAllocas.size();
- LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
- PromoteMemToReg(PromotableAllocas, DTU->getDomTree(), AC);
+ if (SROASkipMem2Reg) {
+ LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n");
+ } else {
+ LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
+ PromoteMemToReg(PromotableAllocas, DTU->getDomTree(), AC);
+ }
+
PromotableAllocas.clear();
return true;
}
@@ -5120,16 +5067,16 @@ PreservedAnalyses SROAPass::runImpl(Function &F, DomTreeUpdater &RunDTU,
DTU = &RunDTU;
AC = &RunAC;
+ const DataLayout &DL = F.getParent()->getDataLayout();
BasicBlock &EntryBB = F.getEntryBlock();
for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
I != E; ++I) {
if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
- if (isa<ScalableVectorType>(AI->getAllocatedType())) {
- if (isAllocaPromotable(AI))
- PromotableAllocas.push_back(AI);
- } else {
+ if (DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
+ isAllocaPromotable(AI))
+ PromotableAllocas.push_back(AI);
+ else
Worklist.insert(AI);
- }
}
}
@@ -5172,6 +5119,11 @@ PreservedAnalyses SROAPass::runImpl(Function &F, DomTreeUpdater &RunDTU,
if (!Changed)
return PreservedAnalyses::all();
+ if (isAssignmentTrackingEnabled(*F.getParent())) {
+ for (auto &BB : F)
+ RemoveRedundantDbgInstrs(&BB);
+ }
+
PreservedAnalyses PA;
if (!CFGChanged)
PA.preserveSet<CFGAnalyses>();
@@ -5186,8 +5138,9 @@ PreservedAnalyses SROAPass::runImpl(Function &F, DominatorTree &RunDT,
}
PreservedAnalyses SROAPass::run(Function &F, FunctionAnalysisManager &AM) {
- return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F),
- AM.getResult<AssumptionAnalysis>(F));
+ DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
+ return runImpl(F, DT, AC);
}
void SROAPass::printPipeline(
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 8aee8d140a29..37b032e4d7c7 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -12,76 +12,38 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Scalar.h"
-#include "llvm-c/Initialization.h"
-#include "llvm-c/Transforms/Scalar.h"
-#include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/ScopedNoAliasAA.h"
-#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/IR/Verifier.h"
#include "llvm/InitializePasses.h"
-#include "llvm/Transforms/Scalar/GVN.h"
-#include "llvm/Transforms/Scalar/Scalarizer.h"
-#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h"
using namespace llvm;
/// initializeScalarOptsPasses - Initialize all passes linked into the
/// ScalarOpts library.
void llvm::initializeScalarOpts(PassRegistry &Registry) {
- initializeADCELegacyPassPass(Registry);
- initializeBDCELegacyPassPass(Registry);
- initializeAlignmentFromAssumptionsPass(Registry);
- initializeCallSiteSplittingLegacyPassPass(Registry);
initializeConstantHoistingLegacyPassPass(Registry);
- initializeCorrelatedValuePropagationPass(Registry);
initializeDCELegacyPassPass(Registry);
- initializeDivRemPairsLegacyPassPass(Registry);
initializeScalarizerLegacyPassPass(Registry);
- initializeDSELegacyPassPass(Registry);
initializeGuardWideningLegacyPassPass(Registry);
initializeLoopGuardWideningLegacyPassPass(Registry);
initializeGVNLegacyPassPass(Registry);
- initializeNewGVNLegacyPassPass(Registry);
initializeEarlyCSELegacyPassPass(Registry);
initializeEarlyCSEMemSSALegacyPassPass(Registry);
initializeMakeGuardsExplicitLegacyPassPass(Registry);
- initializeGVNHoistLegacyPassPass(Registry);
- initializeGVNSinkLegacyPassPass(Registry);
initializeFlattenCFGLegacyPassPass(Registry);
- initializeIRCELegacyPassPass(Registry);
- initializeIndVarSimplifyLegacyPassPass(Registry);
initializeInferAddressSpacesPass(Registry);
initializeInstSimplifyLegacyPassPass(Registry);
- initializeJumpThreadingPass(Registry);
- initializeDFAJumpThreadingLegacyPassPass(Registry);
initializeLegacyLICMPassPass(Registry);
initializeLegacyLoopSinkPassPass(Registry);
- initializeLoopFuseLegacyPass(Registry);
initializeLoopDataPrefetchLegacyPassPass(Registry);
- initializeLoopDeletionLegacyPassPass(Registry);
- initializeLoopAccessLegacyAnalysisPass(Registry);
initializeLoopInstSimplifyLegacyPassPass(Registry);
- initializeLoopInterchangeLegacyPassPass(Registry);
- initializeLoopFlattenLegacyPassPass(Registry);
initializeLoopPredicationLegacyPassPass(Registry);
initializeLoopRotateLegacyPassPass(Registry);
initializeLoopStrengthReducePass(Registry);
- initializeLoopRerollLegacyPassPass(Registry);
initializeLoopUnrollPass(Registry);
- initializeLoopUnrollAndJamPass(Registry);
- initializeWarnMissedTransformationsLegacyPass(Registry);
- initializeLoopVersioningLICMLegacyPassPass(Registry);
- initializeLoopIdiomRecognizeLegacyPassPass(Registry);
initializeLowerAtomicLegacyPassPass(Registry);
initializeLowerConstantIntrinsicsPass(Registry);
initializeLowerExpectIntrinsicPass(Registry);
initializeLowerGuardIntrinsicLegacyPassPass(Registry);
- initializeLowerMatrixIntrinsicsLegacyPassPass(Registry);
- initializeLowerMatrixIntrinsicsMinimalLegacyPassPass(Registry);
initializeLowerWidenableConditionLegacyPassPass(Registry);
- initializeMemCpyOptLegacyPassPass(Registry);
initializeMergeICmpsLegacyPassPass(Registry);
initializeMergedLoadStoreMotionLegacyPassPass(Registry);
initializeNaryReassociateLegacyPassPass(Registry);
@@ -89,9 +51,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeReassociateLegacyPassPass(Registry);
initializeRedundantDbgInstEliminationPass(Registry);
initializeRegToMemLegacyPass(Registry);
- initializeRewriteStatepointsForGCLegacyPassPass(Registry);
initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
- initializeSCCPLegacyPassPass(Registry);
initializeSROALegacyPassPass(Registry);
initializeCFGSimplifyPassPass(Registry);
initializeStructurizeCFGLegacyPassPass(Registry);
@@ -102,196 +62,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry);
initializeSpeculativeExecutionLegacyPassPass(Registry);
initializeStraightLineStrengthReduceLegacyPassPass(Registry);
- initializePlaceBackedgeSafepointsImplPass(Registry);
- initializePlaceSafepointsPass(Registry);
- initializeFloat2IntLegacyPassPass(Registry);
- initializeLoopDistributeLegacyPass(Registry);
- initializeLoopLoadEliminationPass(Registry);
+ initializePlaceBackedgeSafepointsLegacyPassPass(Registry);
initializeLoopSimplifyCFGLegacyPassPass(Registry);
- initializeLoopVersioningLegacyPassPass(Registry);
-}
-
-void LLVMAddLoopSimplifyCFGPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopSimplifyCFGPass());
-}
-
-void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) {
- initializeScalarOpts(*unwrap(R));
-}
-
-void LLVMAddAggressiveDCEPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createAggressiveDCEPass());
-}
-
-void LLVMAddDCEPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createDeadCodeEliminationPass());
-}
-
-void LLVMAddBitTrackingDCEPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createBitTrackingDCEPass());
-}
-
-void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createAlignmentFromAssumptionsPass());
-}
-
-void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCFGSimplificationPass());
-}
-
-void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createDeadStoreEliminationPass());
-}
-
-void LLVMAddScalarizerPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createScalarizerPass());
-}
-
-void LLVMAddGVNPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createGVNPass());
-}
-
-void LLVMAddNewGVNPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createNewGVNPass());
-}
-
-void LLVMAddMergedLoadStoreMotionPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createMergedLoadStoreMotionPass());
-}
-
-void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createIndVarSimplifyPass());
-}
-
-void LLVMAddInstructionSimplifyPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createInstSimplifyLegacyPass());
-}
-
-void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createJumpThreadingPass());
-}
-
-void LLVMAddLoopSinkPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopSinkPass());
-}
-
-void LLVMAddLICMPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLICMPass());
-}
-
-void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopDeletionPass());
-}
-
-void LLVMAddLoopFlattenPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopFlattenPass());
-}
-
-void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopIdiomPass());
-}
-
-void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopRotatePass());
-}
-
-void LLVMAddLoopRerollPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopRerollPass());
-}
-
-void LLVMAddLoopUnrollPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopUnrollPass());
-}
-
-void LLVMAddLoopUnrollAndJamPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopUnrollAndJamPass());
-}
-
-void LLVMAddLowerAtomicPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLowerAtomicPass());
-}
-
-void LLVMAddMemCpyOptPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createMemCpyOptPass());
-}
-
-void LLVMAddPartiallyInlineLibCallsPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createPartiallyInlineLibCallsPass());
-}
-
-void LLVMAddReassociatePass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createReassociatePass());
-}
-
-void LLVMAddSCCPPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createSCCPPass());
-}
-
-void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createSROAPass());
-}
-
-void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createSROAPass());
-}
-
-void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM,
- int Threshold) {
- unwrap(PM)->add(createSROAPass());
-}
-
-void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM) {
- // NOTE: The simplify-libcalls pass has been removed.
-}
-
-void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createTailCallEliminationPass());
-}
-
-void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createDemoteRegisterToMemoryPass());
-}
-
-void LLVMAddVerifierPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createVerifierPass());
-}
-
-void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCorrelatedValuePropagationPass());
-}
-
-void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createEarlyCSEPass(false/*=UseMemorySSA*/));
-}
-
-void LLVMAddEarlyCSEMemSSAPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createEarlyCSEPass(true/*=UseMemorySSA*/));
-}
-
-void LLVMAddGVNHoistLegacyPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createGVNHoistPass());
-}
-
-void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createTypeBasedAAWrapperPass());
-}
-
-void LLVMAddScopedNoAliasAAPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createScopedNoAliasAAWrapperPass());
-}
-
-void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createBasicAAWrapperPass());
-}
-
-void LLVMAddLowerConstantIntrinsicsPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLowerConstantIntrinsicsPass());
-}
-
-void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLowerExpectIntrinsicPass());
-}
-
-void LLVMAddUnifyFunctionExitNodesPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createUnifyFunctionExitNodesPass());
}
diff --git a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
index 1c8e4e3512dc..c01d03f64472 100644
--- a/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarizeMaskedMemIntrin.cpp
@@ -125,7 +125,7 @@ static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
// br label %else
//
// else: ; preds = %0, %cond.load
-// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ undef, %0 ]
+// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ poison, %0 ]
// %6 = extractelement <16 x i1> %mask, i32 1
// br i1 %6, label %cond.load1, label %else2
//
@@ -170,10 +170,6 @@ static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI,
// Adjust alignment for the scalar instruction.
const Align AdjustedAlignVal =
commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
- // Bitcast %addr from i8* to EltTy*
- Type *NewPtrType =
- EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
- Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
// The result vector
@@ -183,7 +179,7 @@ static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI,
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
- Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
VResult = Builder.CreateInsertElement(VResult, Load, Idx);
}
@@ -232,7 +228,7 @@ static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI,
CondBlock->setName("cond.load");
Builder.SetInsertPoint(CondBlock->getTerminator());
- Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
@@ -309,10 +305,6 @@ static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI,
// Adjust alignment for the scalar instruction.
const Align AdjustedAlignVal =
commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
- // Bitcast %addr from i8* to EltTy*
- Type *NewPtrType =
- EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
- Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
if (isConstantIntVector(Mask)) {
@@ -320,7 +312,7 @@ static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI,
if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
Value *OneElt = Builder.CreateExtractElement(Src, Idx);
- Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
}
CI->eraseFromParent();
@@ -367,7 +359,7 @@ static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI,
Builder.SetInsertPoint(CondBlock->getTerminator());
Value *OneElt = Builder.CreateExtractElement(Src, Idx);
- Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
// Create "else" block, fill it in the next iteration
@@ -394,11 +386,11 @@ static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI,
// cond.load:
// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
// %Load0 = load i32, i32* %Ptr0, align 4
-// %Res0 = insertelement <16 x i32> undef, i32 %Load0, i32 0
+// %Res0 = insertelement <16 x i32> poison, i32 %Load0, i32 0
// br label %else
//
// else:
-// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [undef, %0]
+// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [poison, %0]
// %Mask1 = extractelement <16 x i1> %Mask, i32 1
// br i1 %Mask1, label %cond.load1, label %else2
//
@@ -653,16 +645,16 @@ static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI,
Value *VResult = PassThru;
// Shorten the way if the mask is a vector of constants.
- // Create a build_vector pattern, with loads/undefs as necessary and then
+ // Create a build_vector pattern, with loads/poisons as necessary and then
// shuffle blend with the pass through value.
if (isConstantIntVector(Mask)) {
unsigned MemIndex = 0;
VResult = PoisonValue::get(VecType);
- SmallVector<int, 16> ShuffleMask(VectorWidth, UndefMaskElem);
+ SmallVector<int, 16> ShuffleMask(VectorWidth, PoisonMaskElem);
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
Value *InsertElt;
if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
- InsertElt = UndefValue::get(EltTy);
+ InsertElt = PoisonValue::get(EltTy);
ShuffleMask[Idx] = Idx + VectorWidth;
} else {
Value *NewPtr =
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 4aab88b74f10..86b55dfd304a 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -6,8 +6,9 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass converts vector operations into scalar operations, in order
-// to expose optimization opportunities on the individual scalar operations.
+// This pass converts vector operations into scalar operations (or, optionally,
+// operations on smaller vector widths), in order to expose optimization
+// opportunities on the individual scalar operations.
// It is mainly intended for targets that do not have vector units, but it
// may also be useful for revectorizing code to different vector widths.
//
@@ -62,6 +63,16 @@ static cl::opt<bool> ClScalarizeLoadStore(
"scalarize-load-store", cl::init(false), cl::Hidden,
cl::desc("Allow the scalarizer pass to scalarize loads and store"));
+// Split vectors larger than this size into fragments, where each fragment is
+// either a vector no larger than this size or a scalar.
+//
+// Instructions with operands or results of different sizes that would be split
+// into a different number of fragments are currently left as-is.
+static cl::opt<unsigned> ClScalarizeMinBits(
+ "scalarize-min-bits", cl::init(0), cl::Hidden,
+ cl::desc("Instruct the scalarizer pass to attempt to keep values of a "
+ "minimum number of bits"));
+
namespace {
BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
@@ -88,6 +99,29 @@ using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
// along with a pointer to their scattered forms.
using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>;
+struct VectorSplit {
+ // The type of the vector.
+ FixedVectorType *VecTy = nullptr;
+
+ // The number of elements packed in a fragment (other than the remainder).
+ unsigned NumPacked = 0;
+
+ // The number of fragments (scalars or smaller vectors) into which the vector
+ // shall be split.
+ unsigned NumFragments = 0;
+
+ // The type of each complete fragment.
+ Type *SplitTy = nullptr;
+
+ // The type of the remainder (last) fragment; null if all fragments are
+ // complete.
+ Type *RemainderTy = nullptr;
+
+ Type *getFragmentType(unsigned I) const {
+ return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy;
+ }
+};
+
// Provides a very limited vector-like interface for lazily accessing one
// component of a scattered vector or vector pointer.
class Scatterer {
@@ -97,23 +131,23 @@ public:
// Scatter V into Size components. If new instructions are needed,
// insert them before BBI in BB. If Cache is nonnull, use it to cache
// the results.
- Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, Type *PtrElemTy,
- ValueVector *cachePtr = nullptr);
+ Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
+ const VectorSplit &VS, ValueVector *cachePtr = nullptr);
// Return component I, creating a new Value for it if necessary.
Value *operator[](unsigned I);
// Return the number of components.
- unsigned size() const { return Size; }
+ unsigned size() const { return VS.NumFragments; }
private:
BasicBlock *BB;
BasicBlock::iterator BBI;
Value *V;
- Type *PtrElemTy;
+ VectorSplit VS;
+ bool IsPointer;
ValueVector *CachePtr;
ValueVector Tmp;
- unsigned Size;
};
// FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
@@ -171,24 +205,74 @@ struct BinarySplitter {
struct VectorLayout {
VectorLayout() = default;
- // Return the alignment of element I.
- Align getElemAlign(unsigned I) {
- return commonAlignment(VecAlign, I * ElemSize);
+ // Return the alignment of fragment Frag.
+ Align getFragmentAlign(unsigned Frag) {
+ return commonAlignment(VecAlign, Frag * SplitSize);
}
- // The type of the vector.
- FixedVectorType *VecTy = nullptr;
-
- // The type of each element.
- Type *ElemTy = nullptr;
+ // The split of the underlying vector type.
+ VectorSplit VS;
// The alignment of the vector.
Align VecAlign;
- // The size of each element.
- uint64_t ElemSize = 0;
+ // The size of each (non-remainder) fragment in bytes.
+ uint64_t SplitSize = 0;
};
+/// Concatenate the given fragments to a single vector value of the type
+/// described in @p VS.
+static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
+ const VectorSplit &VS, Twine Name) {
+ unsigned NumElements = VS.VecTy->getNumElements();
+ SmallVector<int> ExtendMask;
+ SmallVector<int> InsertMask;
+
+ if (VS.NumPacked > 1) {
+ // Prepare the shufflevector masks once and re-use them for all
+ // fragments.
+ ExtendMask.resize(NumElements, -1);
+ for (unsigned I = 0; I < VS.NumPacked; ++I)
+ ExtendMask[I] = I;
+
+ InsertMask.resize(NumElements);
+ for (unsigned I = 0; I < NumElements; ++I)
+ InsertMask[I] = I;
+ }
+
+ Value *Res = PoisonValue::get(VS.VecTy);
+ for (unsigned I = 0; I < VS.NumFragments; ++I) {
+ Value *Fragment = Fragments[I];
+
+ unsigned NumPacked = VS.NumPacked;
+ if (I == VS.NumFragments - 1 && VS.RemainderTy) {
+ if (auto *RemVecTy = dyn_cast<FixedVectorType>(VS.RemainderTy))
+ NumPacked = RemVecTy->getNumElements();
+ else
+ NumPacked = 1;
+ }
+
+ if (NumPacked == 1) {
+ Res = Builder.CreateInsertElement(Res, Fragment, I * VS.NumPacked,
+ Name + ".upto" + Twine(I));
+ } else {
+ Fragment = Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
+ if (I == 0) {
+ Res = Fragment;
+ } else {
+ for (unsigned J = 0; J < NumPacked; ++J)
+ InsertMask[I * VS.NumPacked + J] = NumElements + J;
+ Res = Builder.CreateShuffleVector(Res, Fragment, InsertMask,
+ Name + ".upto" + Twine(I));
+ for (unsigned J = 0; J < NumPacked; ++J)
+ InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
+ }
+ }
+ }
+
+ return Res;
+}
+
template <typename T>
T getWithDefaultOverride(const cl::opt<T> &ClOption,
const std::optional<T> &DefaultOverride) {
@@ -205,8 +289,9 @@ public:
getWithDefaultOverride(ClScalarizeVariableInsertExtract,
Options.ScalarizeVariableInsertExtract)),
ScalarizeLoadStore(getWithDefaultOverride(ClScalarizeLoadStore,
- Options.ScalarizeLoadStore)) {
- }
+ Options.ScalarizeLoadStore)),
+ ScalarizeMinBits(getWithDefaultOverride(ClScalarizeMinBits,
+ Options.ScalarizeMinBits)) {}
bool visit(Function &F);
@@ -228,13 +313,15 @@ public:
bool visitLoadInst(LoadInst &LI);
bool visitStoreInst(StoreInst &SI);
bool visitCallInst(CallInst &ICI);
+ bool visitFreezeInst(FreezeInst &FI);
private:
- Scatterer scatter(Instruction *Point, Value *V, Type *PtrElemTy = nullptr);
- void gather(Instruction *Op, const ValueVector &CV);
+ Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS);
+ void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS);
void replaceUses(Instruction *Op, Value *CV);
bool canTransferMetadata(unsigned Kind);
void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
+ std::optional<VectorSplit> getVectorSplit(Type *Ty);
std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
const DataLayout &DL);
bool finish();
@@ -256,6 +343,7 @@ private:
const bool ScalarizeVariableInsertExtract;
const bool ScalarizeLoadStore;
+ const unsigned ScalarizeMinBits;
};
class ScalarizerLegacyPass : public FunctionPass {
@@ -284,42 +372,47 @@ INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
"Scalarize vector operations", false, false)
Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
- Type *PtrElemTy, ValueVector *cachePtr)
- : BB(bb), BBI(bbi), V(v), PtrElemTy(PtrElemTy), CachePtr(cachePtr) {
- Type *Ty = V->getType();
- if (Ty->isPointerTy()) {
- assert(cast<PointerType>(Ty)->isOpaqueOrPointeeTypeMatches(PtrElemTy) &&
- "Pointer element type mismatch");
- Ty = PtrElemTy;
+ const VectorSplit &VS, ValueVector *cachePtr)
+ : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
+ IsPointer = V->getType()->isPointerTy();
+ if (!CachePtr) {
+ Tmp.resize(VS.NumFragments, nullptr);
+ } else {
+ assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
+ IsPointer) &&
+ "Inconsistent vector sizes");
+ if (VS.NumFragments > CachePtr->size())
+ CachePtr->resize(VS.NumFragments, nullptr);
}
- Size = cast<FixedVectorType>(Ty)->getNumElements();
- if (!CachePtr)
- Tmp.resize(Size, nullptr);
- else if (CachePtr->empty())
- CachePtr->resize(Size, nullptr);
- else
- assert(Size == CachePtr->size() && "Inconsistent vector sizes");
}
-// Return component I, creating a new Value for it if necessary.
-Value *Scatterer::operator[](unsigned I) {
- ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
+// Return fragment Frag, creating a new Value for it if necessary.
+Value *Scatterer::operator[](unsigned Frag) {
+ ValueVector &CV = CachePtr ? *CachePtr : Tmp;
// Try to reuse a previous value.
- if (CV[I])
- return CV[I];
+ if (CV[Frag])
+ return CV[Frag];
IRBuilder<> Builder(BB, BBI);
- if (PtrElemTy) {
- Type *VectorElemTy = cast<VectorType>(PtrElemTy)->getElementType();
- if (!CV[0]) {
- Type *NewPtrTy = PointerType::get(
- VectorElemTy, V->getType()->getPointerAddressSpace());
- CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0");
- }
- if (I != 0)
- CV[I] = Builder.CreateConstGEP1_32(VectorElemTy, CV[0], I,
- V->getName() + ".i" + Twine(I));
+ if (IsPointer) {
+ if (Frag == 0)
+ CV[Frag] = V;
+ else
+ CV[Frag] = Builder.CreateConstGEP1_32(VS.SplitTy, V, Frag,
+ V->getName() + ".i" + Twine(Frag));
+ return CV[Frag];
+ }
+
+ Type *FragmentTy = VS.getFragmentType(Frag);
+
+ if (auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) {
+ SmallVector<int> Mask;
+ for (unsigned J = 0; J < VecTy->getNumElements(); ++J)
+ Mask.push_back(Frag * VS.NumPacked + J);
+ CV[Frag] =
+ Builder.CreateShuffleVector(V, PoisonValue::get(V->getType()), Mask,
+ V->getName() + ".i" + Twine(Frag));
} else {
- // Search through a chain of InsertElementInsts looking for element I.
+ // Search through a chain of InsertElementInsts looking for element Frag.
// Record other elements in the cache. The new V is still suitable
// for all uncached indices.
while (true) {
@@ -331,20 +424,23 @@ Value *Scatterer::operator[](unsigned I) {
break;
unsigned J = Idx->getZExtValue();
V = Insert->getOperand(0);
- if (I == J) {
- CV[J] = Insert->getOperand(1);
- return CV[J];
- } else if (!CV[J]) {
+ if (Frag * VS.NumPacked == J) {
+ CV[Frag] = Insert->getOperand(1);
+ return CV[Frag];
+ }
+
+ if (VS.NumPacked == 1 && !CV[J]) {
// Only cache the first entry we find for each index we're not actively
// searching for. This prevents us from going too far up the chain and
// caching incorrect entries.
CV[J] = Insert->getOperand(1);
}
}
- CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
- V->getName() + ".i" + Twine(I));
+ CV[Frag] = Builder.CreateExtractElement(V, Frag * VS.NumPacked,
+ V->getName() + ".i" + Twine(Frag));
}
- return CV[I];
+
+ return CV[Frag];
}
bool ScalarizerLegacyPass::runOnFunction(Function &F) {
@@ -386,13 +482,13 @@ bool ScalarizerVisitor::visit(Function &F) {
// Return a scattered form of V that can be accessed by Point. V must be a
// vector or a pointer to a vector.
Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
- Type *PtrElemTy) {
+ const VectorSplit &VS) {
if (Argument *VArg = dyn_cast<Argument>(V)) {
// Put the scattered form of arguments in the entry block,
// so that it can be used everywhere.
Function *F = VArg->getParent();
BasicBlock *BB = &F->getEntryBlock();
- return Scatterer(BB, BB->begin(), V, PtrElemTy, &Scattered[{V, PtrElemTy}]);
+ return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
}
if (Instruction *VOp = dyn_cast<Instruction>(V)) {
// When scalarizing PHI nodes we might try to examine/rewrite InsertElement
@@ -403,29 +499,30 @@ Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
// need to analyse them further.
if (!DT->isReachableFromEntry(VOp->getParent()))
return Scatterer(Point->getParent(), Point->getIterator(),
- PoisonValue::get(V->getType()), PtrElemTy);
+ PoisonValue::get(V->getType()), VS);
// Put the scattered form of an instruction directly after the
// instruction, skipping over PHI nodes and debug intrinsics.
BasicBlock *BB = VOp->getParent();
return Scatterer(
- BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V,
- PtrElemTy, &Scattered[{V, PtrElemTy}]);
+ BB, skipPastPhiNodesAndDbg(std::next(BasicBlock::iterator(VOp))), V, VS,
+ &Scattered[{V, VS.SplitTy}]);
}
// In the fallback case, just put the scattered before Point and
// keep the result local to Point.
- return Scatterer(Point->getParent(), Point->getIterator(), V, PtrElemTy);
+ return Scatterer(Point->getParent(), Point->getIterator(), V, VS);
}
// Replace Op with the gathered form of the components in CV. Defer the
// deletion of Op and creation of the gathered form to the end of the pass,
// so that we can avoid creating the gathered form if all uses of Op are
// replaced with uses of CV.
-void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV) {
+void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV,
+ const VectorSplit &VS) {
transferMetadataAndIRFlags(Op, CV);
// If we already have a scattered form of Op (created from ExtractElements
// of Op itself), replace them with the new form.
- ValueVector &SV = Scattered[{Op, nullptr}];
+ ValueVector &SV = Scattered[{Op, VS.SplitTy}];
if (!SV.empty()) {
for (unsigned I = 0, E = SV.size(); I != E; ++I) {
Value *V = SV[I];
@@ -483,23 +580,57 @@ void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
}
}
+// Determine how Ty is split, if at all.
+std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
+ VectorSplit Split;
+ Split.VecTy = dyn_cast<FixedVectorType>(Ty);
+ if (!Split.VecTy)
+ return {};
+
+ unsigned NumElems = Split.VecTy->getNumElements();
+ Type *ElemTy = Split.VecTy->getElementType();
+
+ if (NumElems == 1 || ElemTy->isPointerTy() ||
+ 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) {
+ Split.NumPacked = 1;
+ Split.NumFragments = NumElems;
+ Split.SplitTy = ElemTy;
+ } else {
+ Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits();
+ if (Split.NumPacked >= NumElems)
+ return {};
+
+ Split.NumFragments = divideCeil(NumElems, Split.NumPacked);
+ Split.SplitTy = FixedVectorType::get(ElemTy, Split.NumPacked);
+
+ unsigned RemainderElems = NumElems % Split.NumPacked;
+ if (RemainderElems > 1)
+ Split.RemainderTy = FixedVectorType::get(ElemTy, RemainderElems);
+ else if (RemainderElems == 1)
+ Split.RemainderTy = ElemTy;
+ }
+
+ return Split;
+}
+
// Try to fill in Layout from Ty, returning true on success. Alignment is
// the alignment of the vector, or std::nullopt if the ABI default should be
// used.
std::optional<VectorLayout>
ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
const DataLayout &DL) {
+ std::optional<VectorSplit> VS = getVectorSplit(Ty);
+ if (!VS)
+ return {};
+
VectorLayout Layout;
- // Make sure we're dealing with a vector.
- Layout.VecTy = dyn_cast<FixedVectorType>(Ty);
- if (!Layout.VecTy)
- return std::nullopt;
- // Check that we're dealing with full-byte elements.
- Layout.ElemTy = Layout.VecTy->getElementType();
- if (!DL.typeSizeEqualsStoreSize(Layout.ElemTy))
- return std::nullopt;
+ Layout.VS = *VS;
+ // Check that we're dealing with full-byte fragments.
+ if (!DL.typeSizeEqualsStoreSize(VS->SplitTy) ||
+ (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(VS->RemainderTy)))
+ return {};
Layout.VecAlign = Alignment;
- Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
+ Layout.SplitSize = DL.getTypeStoreSize(VS->SplitTy);
return Layout;
}
@@ -507,19 +638,27 @@ ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
// to create an instruction like I with operand X and name Name.
template<typename Splitter>
bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
- auto *VT = dyn_cast<FixedVectorType>(I.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(I.getType());
+ if (!VS)
return false;
- unsigned NumElems = VT->getNumElements();
+ std::optional<VectorSplit> OpVS;
+ if (I.getOperand(0)->getType() == I.getType()) {
+ OpVS = VS;
+ } else {
+ OpVS = getVectorSplit(I.getOperand(0)->getType());
+ if (!OpVS || VS->NumPacked != OpVS->NumPacked)
+ return false;
+ }
+
IRBuilder<> Builder(&I);
- Scatterer Op = scatter(&I, I.getOperand(0));
- assert(Op.size() == NumElems && "Mismatched unary operation");
+ Scatterer Op = scatter(&I, I.getOperand(0), *OpVS);
+ assert(Op.size() == VS->NumFragments && "Mismatched unary operation");
ValueVector Res;
- Res.resize(NumElems);
- for (unsigned Elem = 0; Elem < NumElems; ++Elem)
- Res[Elem] = Split(Builder, Op[Elem], I.getName() + ".i" + Twine(Elem));
- gather(&I, Res);
+ Res.resize(VS->NumFragments);
+ for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag)
+ Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag));
+ gather(&I, Res, *VS);
return true;
}
@@ -527,24 +666,32 @@ bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
// to create an instruction like I with operands X and Y and name Name.
template<typename Splitter>
bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
- auto *VT = dyn_cast<FixedVectorType>(I.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(I.getType());
+ if (!VS)
return false;
- unsigned NumElems = VT->getNumElements();
+ std::optional<VectorSplit> OpVS;
+ if (I.getOperand(0)->getType() == I.getType()) {
+ OpVS = VS;
+ } else {
+ OpVS = getVectorSplit(I.getOperand(0)->getType());
+ if (!OpVS || VS->NumPacked != OpVS->NumPacked)
+ return false;
+ }
+
IRBuilder<> Builder(&I);
- Scatterer VOp0 = scatter(&I, I.getOperand(0));
- Scatterer VOp1 = scatter(&I, I.getOperand(1));
- assert(VOp0.size() == NumElems && "Mismatched binary operation");
- assert(VOp1.size() == NumElems && "Mismatched binary operation");
+ Scatterer VOp0 = scatter(&I, I.getOperand(0), *OpVS);
+ Scatterer VOp1 = scatter(&I, I.getOperand(1), *OpVS);
+ assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation");
+ assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation");
ValueVector Res;
- Res.resize(NumElems);
- for (unsigned Elem = 0; Elem < NumElems; ++Elem) {
- Value *Op0 = VOp0[Elem];
- Value *Op1 = VOp1[Elem];
- Res[Elem] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Elem));
+ Res.resize(VS->NumFragments);
+ for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) {
+ Value *Op0 = VOp0[Frag];
+ Value *Op1 = VOp1[Frag];
+ Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag));
}
- gather(&I, Res);
+ gather(&I, Res, *VS);
return true;
}
@@ -552,18 +699,11 @@ static bool isTriviallyScalariable(Intrinsic::ID ID) {
return isTriviallyVectorizable(ID);
}
-// All of the current scalarizable intrinsics only have one mangled type.
-static Function *getScalarIntrinsicDeclaration(Module *M,
- Intrinsic::ID ID,
- ArrayRef<Type*> Tys) {
- return Intrinsic::getDeclaration(M, ID, Tys);
-}
-
/// If a call to a vector typed intrinsic function, split into a scalar call per
/// element if possible for the intrinsic.
bool ScalarizerVisitor::splitCall(CallInst &CI) {
- auto *VT = dyn_cast<FixedVectorType>(CI.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(CI.getType());
+ if (!VS)
return false;
Function *F = CI.getCalledFunction();
@@ -574,26 +714,41 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
if (ID == Intrinsic::not_intrinsic || !isTriviallyScalariable(ID))
return false;
- unsigned NumElems = VT->getNumElements();
+ // unsigned NumElems = VT->getNumElements();
unsigned NumArgs = CI.arg_size();
ValueVector ScalarOperands(NumArgs);
SmallVector<Scatterer, 8> Scattered(NumArgs);
-
- Scattered.resize(NumArgs);
+ SmallVector<int> OverloadIdx(NumArgs, -1);
SmallVector<llvm::Type *, 3> Tys;
- Tys.push_back(VT->getScalarType());
+ // Add return type if intrinsic is overloaded on it.
+ if (isVectorIntrinsicWithOverloadTypeAtArg(ID, -1))
+ Tys.push_back(VS->SplitTy);
// Assumes that any vector type has the same number of elements as the return
// vector type, which is true for all current intrinsics.
for (unsigned I = 0; I != NumArgs; ++I) {
Value *OpI = CI.getOperand(I);
- if (OpI->getType()->isVectorTy()) {
- Scattered[I] = scatter(&CI, OpI);
- assert(Scattered[I].size() == NumElems && "mismatched call operands");
- if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
- Tys.push_back(OpI->getType()->getScalarType());
+ if (auto *OpVecTy = dyn_cast<FixedVectorType>(OpI->getType())) {
+ assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements());
+ std::optional<VectorSplit> OpVS = getVectorSplit(OpI->getType());
+ if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
+ // The natural split of the operand doesn't match the result. This could
+ // happen if the vector elements are different and the ScalarizeMinBits
+ // option is used.
+ //
+ // We could in principle handle this case as well, at the cost of
+ // complicating the scattering machinery to support multiple scattering
+ // granularities for a single value.
+ return false;
+ }
+
+ Scattered[I] = scatter(&CI, OpI, *OpVS);
+ if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I)) {
+ OverloadIdx[I] = Tys.size();
+ Tys.push_back(OpVS->SplitTy);
+ }
} else {
ScalarOperands[I] = OpI;
if (isVectorIntrinsicWithOverloadTypeAtArg(ID, I))
@@ -601,49 +756,67 @@ bool ScalarizerVisitor::splitCall(CallInst &CI) {
}
}
- ValueVector Res(NumElems);
+ ValueVector Res(VS->NumFragments);
ValueVector ScalarCallOps(NumArgs);
- Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, Tys);
+ Function *NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
IRBuilder<> Builder(&CI);
// Perform actual scalarization, taking care to preserve any scalar operands.
- for (unsigned Elem = 0; Elem < NumElems; ++Elem) {
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
+ bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy;
ScalarCallOps.clear();
+ if (IsRemainder)
+ Tys[0] = VS->RemainderTy;
+
for (unsigned J = 0; J != NumArgs; ++J) {
- if (isVectorIntrinsicWithScalarOpAtArg(ID, J))
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) {
ScalarCallOps.push_back(ScalarOperands[J]);
- else
- ScalarCallOps.push_back(Scattered[J][Elem]);
+ } else {
+ ScalarCallOps.push_back(Scattered[J][I]);
+ if (IsRemainder && OverloadIdx[J] >= 0)
+ Tys[OverloadIdx[J]] = Scattered[J][I]->getType();
+ }
}
- Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps,
- CI.getName() + ".i" + Twine(Elem));
+ if (IsRemainder)
+ NewIntrin = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
+
+ Res[I] = Builder.CreateCall(NewIntrin, ScalarCallOps,
+ CI.getName() + ".i" + Twine(I));
}
- gather(&CI, Res);
+ gather(&CI, Res, *VS);
return true;
}
bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
- auto *VT = dyn_cast<FixedVectorType>(SI.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(SI.getType());
+ if (!VS)
return false;
- unsigned NumElems = VT->getNumElements();
+ std::optional<VectorSplit> CondVS;
+ if (isa<FixedVectorType>(SI.getCondition()->getType())) {
+ CondVS = getVectorSplit(SI.getCondition()->getType());
+ if (!CondVS || CondVS->NumPacked != VS->NumPacked) {
+ // This happens when ScalarizeMinBits is used.
+ return false;
+ }
+ }
+
IRBuilder<> Builder(&SI);
- Scatterer VOp1 = scatter(&SI, SI.getOperand(1));
- Scatterer VOp2 = scatter(&SI, SI.getOperand(2));
- assert(VOp1.size() == NumElems && "Mismatched select");
- assert(VOp2.size() == NumElems && "Mismatched select");
+ Scatterer VOp1 = scatter(&SI, SI.getOperand(1), *VS);
+ Scatterer VOp2 = scatter(&SI, SI.getOperand(2), *VS);
+ assert(VOp1.size() == VS->NumFragments && "Mismatched select");
+ assert(VOp2.size() == VS->NumFragments && "Mismatched select");
ValueVector Res;
- Res.resize(NumElems);
+ Res.resize(VS->NumFragments);
- if (SI.getOperand(0)->getType()->isVectorTy()) {
- Scatterer VOp0 = scatter(&SI, SI.getOperand(0));
- assert(VOp0.size() == NumElems && "Mismatched select");
- for (unsigned I = 0; I < NumElems; ++I) {
+ if (CondVS) {
+ Scatterer VOp0 = scatter(&SI, SI.getOperand(0), *CondVS);
+ assert(VOp0.size() == CondVS->NumFragments && "Mismatched select");
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
Value *Op0 = VOp0[I];
Value *Op1 = VOp1[I];
Value *Op2 = VOp2[I];
@@ -652,14 +825,14 @@ bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
}
} else {
Value *Op0 = SI.getOperand(0);
- for (unsigned I = 0; I < NumElems; ++I) {
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
Value *Op1 = VOp1[I];
Value *Op2 = VOp2[I];
Res[I] = Builder.CreateSelect(Op0, Op1, Op2,
SI.getName() + ".i" + Twine(I));
}
}
- gather(&SI, Res);
+ gather(&SI, Res, *VS);
return true;
}
@@ -680,146 +853,194 @@ bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
}
bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
- auto *VT = dyn_cast<FixedVectorType>(GEPI.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(GEPI.getType());
+ if (!VS)
return false;
IRBuilder<> Builder(&GEPI);
- unsigned NumElems = VT->getNumElements();
unsigned NumIndices = GEPI.getNumIndices();
- // The base pointer might be scalar even if it's a vector GEP. In those cases,
- // splat the pointer into a vector value, and scatter that vector.
- Value *Op0 = GEPI.getOperand(0);
- if (!Op0->getType()->isVectorTy())
- Op0 = Builder.CreateVectorSplat(NumElems, Op0);
- Scatterer Base = scatter(&GEPI, Op0);
-
- SmallVector<Scatterer, 8> Ops;
- Ops.resize(NumIndices);
- for (unsigned I = 0; I < NumIndices; ++I) {
- Value *Op = GEPI.getOperand(I + 1);
-
- // The indices might be scalars even if it's a vector GEP. In those cases,
- // splat the scalar into a vector value, and scatter that vector.
- if (!Op->getType()->isVectorTy())
- Op = Builder.CreateVectorSplat(NumElems, Op);
-
- Ops[I] = scatter(&GEPI, Op);
+ // The base pointer and indices might be scalar even if it's a vector GEP.
+ SmallVector<Value *, 8> ScalarOps{1 + NumIndices};
+ SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices};
+
+ for (unsigned I = 0; I < 1 + NumIndices; ++I) {
+ if (auto *VecTy =
+ dyn_cast<FixedVectorType>(GEPI.getOperand(I)->getType())) {
+ std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
+ if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
+ // This can happen when ScalarizeMinBits is used.
+ return false;
+ }
+ ScatterOps[I] = scatter(&GEPI, GEPI.getOperand(I), *OpVS);
+ } else {
+ ScalarOps[I] = GEPI.getOperand(I);
+ }
}
ValueVector Res;
- Res.resize(NumElems);
- for (unsigned I = 0; I < NumElems; ++I) {
- SmallVector<Value *, 8> Indices;
- Indices.resize(NumIndices);
- for (unsigned J = 0; J < NumIndices; ++J)
- Indices[J] = Ops[J][I];
- Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
+ Res.resize(VS->NumFragments);
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
+ SmallVector<Value *, 8> SplitOps;
+ SplitOps.resize(1 + NumIndices);
+ for (unsigned J = 0; J < 1 + NumIndices; ++J) {
+ if (ScalarOps[J])
+ SplitOps[J] = ScalarOps[J];
+ else
+ SplitOps[J] = ScatterOps[J][I];
+ }
+ Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), SplitOps[0],
+ ArrayRef(SplitOps).drop_front(),
GEPI.getName() + ".i" + Twine(I));
if (GEPI.isInBounds())
if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
NewGEPI->setIsInBounds();
}
- gather(&GEPI, Res);
+ gather(&GEPI, Res, *VS);
return true;
}
bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
- auto *VT = dyn_cast<FixedVectorType>(CI.getDestTy());
- if (!VT)
+ std::optional<VectorSplit> DestVS = getVectorSplit(CI.getDestTy());
+ if (!DestVS)
+ return false;
+
+ std::optional<VectorSplit> SrcVS = getVectorSplit(CI.getSrcTy());
+ if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
return false;
- unsigned NumElems = VT->getNumElements();
IRBuilder<> Builder(&CI);
- Scatterer Op0 = scatter(&CI, CI.getOperand(0));
- assert(Op0.size() == NumElems && "Mismatched cast");
+ Scatterer Op0 = scatter(&CI, CI.getOperand(0), *SrcVS);
+ assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast");
ValueVector Res;
- Res.resize(NumElems);
- for (unsigned I = 0; I < NumElems; ++I)
- Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
- CI.getName() + ".i" + Twine(I));
- gather(&CI, Res);
+ Res.resize(DestVS->NumFragments);
+ for (unsigned I = 0; I < DestVS->NumFragments; ++I)
+ Res[I] =
+ Builder.CreateCast(CI.getOpcode(), Op0[I], DestVS->getFragmentType(I),
+ CI.getName() + ".i" + Twine(I));
+ gather(&CI, Res, *DestVS);
return true;
}
bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
- auto *DstVT = dyn_cast<FixedVectorType>(BCI.getDestTy());
- auto *SrcVT = dyn_cast<FixedVectorType>(BCI.getSrcTy());
- if (!DstVT || !SrcVT)
+ std::optional<VectorSplit> DstVS = getVectorSplit(BCI.getDestTy());
+ std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.getSrcTy());
+ if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
return false;
- unsigned DstNumElems = DstVT->getNumElements();
- unsigned SrcNumElems = SrcVT->getNumElements();
+ const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
+
+ // Vectors of pointers are always fully scalarized.
+ assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1));
+
IRBuilder<> Builder(&BCI);
- Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
+ Scatterer Op0 = scatter(&BCI, BCI.getOperand(0), *SrcVS);
ValueVector Res;
- Res.resize(DstNumElems);
+ Res.resize(DstVS->NumFragments);
+
+ unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
+ unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
- if (DstNumElems == SrcNumElems) {
- for (unsigned I = 0; I < DstNumElems; ++I)
- Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
+ if (isPointerTy || DstSplitBits == SrcSplitBits) {
+ assert(DstVS->NumFragments == SrcVS->NumFragments);
+ for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
+ Res[I] = Builder.CreateBitCast(Op0[I], DstVS->getFragmentType(I),
BCI.getName() + ".i" + Twine(I));
- } else if (DstNumElems > SrcNumElems) {
- // <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the
- // individual elements to the destination.
- unsigned FanOut = DstNumElems / SrcNumElems;
- auto *MidTy = FixedVectorType::get(DstVT->getElementType(), FanOut);
+ }
+ } else if (SrcSplitBits % DstSplitBits == 0) {
+ // Convert each source fragment to the same-sized destination vector and
+ // then scatter the result to the destination.
+ VectorSplit MidVS;
+ MidVS.NumPacked = DstVS->NumPacked;
+ MidVS.NumFragments = SrcSplitBits / DstSplitBits;
+ MidVS.VecTy = FixedVectorType::get(DstVS->VecTy->getElementType(),
+ MidVS.NumPacked * MidVS.NumFragments);
+ MidVS.SplitTy = DstVS->SplitTy;
+
unsigned ResI = 0;
- for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
- Value *V = Op0[Op0I];
- Instruction *VI;
+ for (unsigned I = 0; I < SrcVS->NumFragments; ++I) {
+ Value *V = Op0[I];
+
// Look through any existing bitcasts before converting to <N x t2>.
// In the best case, the resulting conversion might be a no-op.
+ Instruction *VI;
while ((VI = dyn_cast<Instruction>(V)) &&
VI->getOpcode() == Instruction::BitCast)
V = VI->getOperand(0);
- V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
- Scatterer Mid = scatter(&BCI, V);
- for (unsigned MidI = 0; MidI < FanOut; ++MidI)
- Res[ResI++] = Mid[MidI];
+
+ V = Builder.CreateBitCast(V, MidVS.VecTy, V->getName() + ".cast");
+
+ Scatterer Mid = scatter(&BCI, V, MidVS);
+ for (unsigned J = 0; J < MidVS.NumFragments; ++J)
+ Res[ResI++] = Mid[J];
}
- } else {
- // <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2.
- unsigned FanIn = SrcNumElems / DstNumElems;
- auto *MidTy = FixedVectorType::get(SrcVT->getElementType(), FanIn);
- unsigned Op0I = 0;
- for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
- Value *V = PoisonValue::get(MidTy);
- for (unsigned MidI = 0; MidI < FanIn; ++MidI)
- V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
- BCI.getName() + ".i" + Twine(ResI)
- + ".upto" + Twine(MidI));
- Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
- BCI.getName() + ".i" + Twine(ResI));
+ } else if (DstSplitBits % SrcSplitBits == 0) {
+ // Gather enough source fragments to make up a destination fragment and
+ // then convert to the destination type.
+ VectorSplit MidVS;
+ MidVS.NumFragments = DstSplitBits / SrcSplitBits;
+ MidVS.NumPacked = SrcVS->NumPacked;
+ MidVS.VecTy = FixedVectorType::get(SrcVS->VecTy->getElementType(),
+ MidVS.NumPacked * MidVS.NumFragments);
+ MidVS.SplitTy = SrcVS->SplitTy;
+
+ unsigned SrcI = 0;
+ SmallVector<Value *, 8> ConcatOps;
+ ConcatOps.resize(MidVS.NumFragments);
+ for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
+ for (unsigned J = 0; J < MidVS.NumFragments; ++J)
+ ConcatOps[J] = Op0[SrcI++];
+ Value *V = concatenate(Builder, ConcatOps, MidVS,
+ BCI.getName() + ".i" + Twine(I));
+ Res[I] = Builder.CreateBitCast(V, DstVS->getFragmentType(I),
+ BCI.getName() + ".i" + Twine(I));
}
+ } else {
+ return false;
}
- gather(&BCI, Res);
+
+ gather(&BCI, Res, *DstVS);
return true;
}
bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
- auto *VT = dyn_cast<FixedVectorType>(IEI.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(IEI.getType());
+ if (!VS)
return false;
- unsigned NumElems = VT->getNumElements();
IRBuilder<> Builder(&IEI);
- Scatterer Op0 = scatter(&IEI, IEI.getOperand(0));
+ Scatterer Op0 = scatter(&IEI, IEI.getOperand(0), *VS);
Value *NewElt = IEI.getOperand(1);
Value *InsIdx = IEI.getOperand(2);
ValueVector Res;
- Res.resize(NumElems);
+ Res.resize(VS->NumFragments);
if (auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
- for (unsigned I = 0; I < NumElems; ++I)
- Res[I] = CI->getValue().getZExtValue() == I ? NewElt : Op0[I];
+ unsigned Idx = CI->getZExtValue();
+ unsigned Fragment = Idx / VS->NumPacked;
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
+ if (I == Fragment) {
+ bool IsPacked = VS->NumPacked > 1;
+ if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
+ !VS->RemainderTy->isVectorTy())
+ IsPacked = false;
+ if (IsPacked) {
+ Res[I] =
+ Builder.CreateInsertElement(Op0[I], NewElt, Idx % VS->NumPacked);
+ } else {
+ Res[I] = NewElt;
+ }
+ } else {
+ Res[I] = Op0[I];
+ }
+ }
} else {
- if (!ScalarizeVariableInsertExtract)
+ // Never split a variable insertelement that isn't fully scalarized.
+ if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
return false;
- for (unsigned I = 0; I < NumElems; ++I) {
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
Value *ShouldReplace =
Builder.CreateICmpEQ(InsIdx, ConstantInt::get(InsIdx->getType(), I),
InsIdx->getName() + ".is." + Twine(I));
@@ -829,31 +1050,39 @@ bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
}
}
- gather(&IEI, Res);
+ gather(&IEI, Res, *VS);
return true;
}
bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
- auto *VT = dyn_cast<FixedVectorType>(EEI.getOperand(0)->getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(EEI.getOperand(0)->getType());
+ if (!VS)
return false;
- unsigned NumSrcElems = VT->getNumElements();
IRBuilder<> Builder(&EEI);
- Scatterer Op0 = scatter(&EEI, EEI.getOperand(0));
+ Scatterer Op0 = scatter(&EEI, EEI.getOperand(0), *VS);
Value *ExtIdx = EEI.getOperand(1);
if (auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
- Value *Res = Op0[CI->getValue().getZExtValue()];
+ unsigned Idx = CI->getZExtValue();
+ unsigned Fragment = Idx / VS->NumPacked;
+ Value *Res = Op0[Fragment];
+ bool IsPacked = VS->NumPacked > 1;
+ if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
+ !VS->RemainderTy->isVectorTy())
+ IsPacked = false;
+ if (IsPacked)
+ Res = Builder.CreateExtractElement(Res, Idx % VS->NumPacked);
replaceUses(&EEI, Res);
return true;
}
- if (!ScalarizeVariableInsertExtract)
+ // Never split a variable extractelement that isn't fully scalarized.
+ if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
return false;
- Value *Res = PoisonValue::get(VT->getElementType());
- for (unsigned I = 0; I < NumSrcElems; ++I) {
+ Value *Res = PoisonValue::get(VS->VecTy->getElementType());
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
Value *ShouldExtract =
Builder.CreateICmpEQ(ExtIdx, ConstantInt::get(ExtIdx->getType(), I),
ExtIdx->getName() + ".is." + Twine(I));
@@ -866,51 +1095,52 @@ bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
}
bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
- auto *VT = dyn_cast<FixedVectorType>(SVI.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(SVI.getType());
+ std::optional<VectorSplit> VSOp =
+ getVectorSplit(SVI.getOperand(0)->getType());
+ if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1)
return false;
- unsigned NumElems = VT->getNumElements();
- Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
- Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
+ Scatterer Op0 = scatter(&SVI, SVI.getOperand(0), *VSOp);
+ Scatterer Op1 = scatter(&SVI, SVI.getOperand(1), *VSOp);
ValueVector Res;
- Res.resize(NumElems);
+ Res.resize(VS->NumFragments);
- for (unsigned I = 0; I < NumElems; ++I) {
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
int Selector = SVI.getMaskValue(I);
if (Selector < 0)
- Res[I] = UndefValue::get(VT->getElementType());
+ Res[I] = PoisonValue::get(VS->VecTy->getElementType());
else if (unsigned(Selector) < Op0.size())
Res[I] = Op0[Selector];
else
Res[I] = Op1[Selector - Op0.size()];
}
- gather(&SVI, Res);
+ gather(&SVI, Res, *VS);
return true;
}
bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
- auto *VT = dyn_cast<FixedVectorType>(PHI.getType());
- if (!VT)
+ std::optional<VectorSplit> VS = getVectorSplit(PHI.getType());
+ if (!VS)
return false;
- unsigned NumElems = cast<FixedVectorType>(VT)->getNumElements();
IRBuilder<> Builder(&PHI);
ValueVector Res;
- Res.resize(NumElems);
+ Res.resize(VS->NumFragments);
unsigned NumOps = PHI.getNumOperands();
- for (unsigned I = 0; I < NumElems; ++I)
- Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
+ for (unsigned I = 0; I < VS->NumFragments; ++I) {
+ Res[I] = Builder.CreatePHI(VS->getFragmentType(I), NumOps,
PHI.getName() + ".i" + Twine(I));
+ }
for (unsigned I = 0; I < NumOps; ++I) {
- Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
+ Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I), *VS);
BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
- for (unsigned J = 0; J < NumElems; ++J)
+ for (unsigned J = 0; J < VS->NumFragments; ++J)
cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
}
- gather(&PHI, Res);
+ gather(&PHI, Res, *VS);
return true;
}
@@ -925,17 +1155,17 @@ bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
if (!Layout)
return false;
- unsigned NumElems = cast<FixedVectorType>(Layout->VecTy)->getNumElements();
IRBuilder<> Builder(&LI);
- Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), LI.getType());
+ Scatterer Ptr = scatter(&LI, LI.getPointerOperand(), Layout->VS);
ValueVector Res;
- Res.resize(NumElems);
+ Res.resize(Layout->VS.NumFragments);
- for (unsigned I = 0; I < NumElems; ++I)
- Res[I] = Builder.CreateAlignedLoad(Layout->VecTy->getElementType(), Ptr[I],
- Align(Layout->getElemAlign(I)),
+ for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
+ Res[I] = Builder.CreateAlignedLoad(Layout->VS.getFragmentType(I), Ptr[I],
+ Align(Layout->getFragmentAlign(I)),
LI.getName() + ".i" + Twine(I));
- gather(&LI, Res);
+ }
+ gather(&LI, Res, Layout->VS);
return true;
}
@@ -951,17 +1181,17 @@ bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
if (!Layout)
return false;
- unsigned NumElems = cast<FixedVectorType>(Layout->VecTy)->getNumElements();
IRBuilder<> Builder(&SI);
- Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), FullValue->getType());
- Scatterer VVal = scatter(&SI, FullValue);
+ Scatterer VPtr = scatter(&SI, SI.getPointerOperand(), Layout->VS);
+ Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
ValueVector Stores;
- Stores.resize(NumElems);
- for (unsigned I = 0; I < NumElems; ++I) {
+ Stores.resize(Layout->VS.NumFragments);
+ for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
Value *Val = VVal[I];
Value *Ptr = VPtr[I];
- Stores[I] = Builder.CreateAlignedStore(Val, Ptr, Layout->getElemAlign(I));
+ Stores[I] =
+ Builder.CreateAlignedStore(Val, Ptr, Layout->getFragmentAlign(I));
}
transferMetadataAndIRFlags(&SI, Stores);
return true;
@@ -971,6 +1201,12 @@ bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
return splitCall(CI);
}
+bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
+ return splitUnary(FI, [](IRBuilder<> &Builder, Value *Op, const Twine &Name) {
+ return Builder.CreateFreeze(Op, Name);
+ });
+}
+
// Delete the instructions that we scalarized. If a full vector result
// is still needed, recreate it using InsertElements.
bool ScalarizerVisitor::finish() {
@@ -983,17 +1219,19 @@ bool ScalarizerVisitor::finish() {
ValueVector &CV = *GMI.second;
if (!Op->use_empty()) {
// The value is still needed, so recreate it using a series of
- // InsertElements.
- Value *Res = PoisonValue::get(Op->getType());
+ // insertelements and/or shufflevectors.
+ Value *Res;
if (auto *Ty = dyn_cast<FixedVectorType>(Op->getType())) {
BasicBlock *BB = Op->getParent();
- unsigned Count = Ty->getNumElements();
IRBuilder<> Builder(Op);
if (isa<PHINode>(Op))
Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
- for (unsigned I = 0; I < Count; ++I)
- Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
- Op->getName() + ".upto" + Twine(I));
+
+ VectorSplit VS = *getVectorSplit(Ty);
+ assert(VS.NumFragments == CV.size());
+
+ Res = concatenate(Builder, CV, VS, Op->getName());
+
Res->takeName(Op);
} else {
assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 4fb90bcea4f0..89d0b7c33e0d 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -162,7 +162,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
@@ -355,7 +354,6 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<LoopInfoWrapperPass>();
AU.setPreservesCFG();
@@ -374,14 +372,23 @@ private:
class SeparateConstOffsetFromGEP {
public:
SeparateConstOffsetFromGEP(
- DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI,
- TargetLibraryInfo *TLI,
+ DominatorTree *DT, LoopInfo *LI, TargetLibraryInfo *TLI,
function_ref<TargetTransformInfo &(Function &)> GetTTI, bool LowerGEP)
- : DT(DT), SE(SE), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
+ : DT(DT), LI(LI), TLI(TLI), GetTTI(GetTTI), LowerGEP(LowerGEP) {}
bool run(Function &F);
private:
+ /// Track the operands of an add or sub.
+ using ExprKey = std::pair<Value *, Value *>;
+
+ /// Create a pair for use as a map key for a commutable operation.
+ static ExprKey createNormalizedCommutablePair(Value *A, Value *B) {
+ if (A < B)
+ return {A, B};
+ return {B, A};
+ }
+
/// Tries to split the given GEP into a variadic base and a constant offset,
/// and returns true if the splitting succeeds.
bool splitGEP(GetElementPtrInst *GEP);
@@ -428,7 +435,7 @@ private:
/// Returns true if the module changes.
///
/// Verified in @i32_add in split-gep.ll
- bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
+ bool canonicalizeArrayIndicesToIndexSize(GetElementPtrInst *GEP);
/// Optimize sext(a)+sext(b) to sext(a+b) when a+b can't sign overflow.
/// SeparateConstOffsetFromGEP distributes a sext to leaves before extracting
@@ -446,8 +453,8 @@ private:
/// Find the closest dominator of <Dominatee> that is equivalent to <Key>.
Instruction *findClosestMatchingDominator(
- const SCEV *Key, Instruction *Dominatee,
- DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs);
+ ExprKey Key, Instruction *Dominatee,
+ DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs);
/// Verify F is free of dead code.
void verifyNoDeadCode(Function &F);
@@ -463,7 +470,6 @@ private:
const DataLayout *DL = nullptr;
DominatorTree *DT = nullptr;
- ScalarEvolution *SE;
LoopInfo *LI;
TargetLibraryInfo *TLI;
// Retrieved lazily since not always used.
@@ -473,8 +479,8 @@ private:
/// multiple GEPs with a single index.
bool LowerGEP;
- DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingAdds;
- DenseMap<const SCEV *, SmallVector<Instruction *, 2>> DominatingSubs;
+ DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingAdds;
+ DenseMap<ExprKey, SmallVector<Instruction *, 2>> DominatingSubs;
};
} // end anonymous namespace
@@ -521,6 +527,12 @@ bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
!haveNoCommonBitsSet(LHS, RHS, DL, nullptr, BO, DT))
return false;
+ // FIXME: We don't currently support constants from the RHS of subs,
+ // when we are zero-extended, because we need a way to zero-extended
+ // them before they are negated.
+ if (ZeroExtended && !SignExtended && BO->getOpcode() == Instruction::Sub)
+ return false;
+
// In addition, tracing into BO requires that its surrounding s/zext (if
// any) is distributable to both operands.
//
@@ -791,17 +803,17 @@ int64_t ConstantOffsetExtractor::Find(Value *Idx, GetElementPtrInst *GEP,
.getSExtValue();
}
-bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
+bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToIndexSize(
GetElementPtrInst *GEP) {
bool Changed = false;
- Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ Type *PtrIdxTy = DL->getIndexType(GEP->getType());
gep_type_iterator GTI = gep_type_begin(*GEP);
for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
I != E; ++I, ++GTI) {
// Skip struct member indices which must be i32.
if (GTI.isSequential()) {
- if ((*I)->getType() != IntPtrTy) {
- *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP);
+ if ((*I)->getType() != PtrIdxTy) {
+ *I = CastInst::CreateIntegerCast(*I, PtrIdxTy, true, "idxprom", GEP);
Changed = true;
}
}
@@ -849,10 +861,8 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
GetElementPtrInst *Variadic, int64_t AccumulativeByteOffset) {
IRBuilder<> Builder(Variadic);
- Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
+ Type *PtrIndexTy = DL->getIndexType(Variadic->getType());
- Type *I8PtrTy =
- Builder.getInt8PtrTy(Variadic->getType()->getPointerAddressSpace());
Value *ResultPtr = Variadic->getOperand(0);
Loop *L = LI->getLoopFor(Variadic->getParent());
// Check if the base is not loop invariant or used more than once.
@@ -861,9 +871,6 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
!hasMoreThanOneUseInLoop(ResultPtr, L);
Value *FirstResult = nullptr;
- if (ResultPtr->getType() != I8PtrTy)
- ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
-
gep_type_iterator GTI = gep_type_begin(*Variadic);
// Create an ugly GEP for each sequential index. We don't create GEPs for
// structure indices, as they are accumulated in the constant offset index.
@@ -875,15 +882,16 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
if (CI->isZero())
continue;
- APInt ElementSize = APInt(IntPtrTy->getIntegerBitWidth(),
+ APInt ElementSize = APInt(PtrIndexTy->getIntegerBitWidth(),
DL->getTypeAllocSize(GTI.getIndexedType()));
// Scale the index by element size.
if (ElementSize != 1) {
if (ElementSize.isPowerOf2()) {
Idx = Builder.CreateShl(
- Idx, ConstantInt::get(IntPtrTy, ElementSize.logBase2()));
+ Idx, ConstantInt::get(PtrIndexTy, ElementSize.logBase2()));
} else {
- Idx = Builder.CreateMul(Idx, ConstantInt::get(IntPtrTy, ElementSize));
+ Idx =
+ Builder.CreateMul(Idx, ConstantInt::get(PtrIndexTy, ElementSize));
}
}
// Create an ugly GEP with a single index for each index.
@@ -896,7 +904,7 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
// Create a GEP with the constant offset index.
if (AccumulativeByteOffset != 0) {
- Value *Offset = ConstantInt::get(IntPtrTy, AccumulativeByteOffset);
+ Value *Offset = ConstantInt::get(PtrIndexTy, AccumulativeByteOffset);
ResultPtr =
Builder.CreateGEP(Builder.getInt8Ty(), ResultPtr, Offset, "uglygep");
} else
@@ -910,9 +918,6 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
if (isSwapCandidate && isLegalToSwapOperand(FirstGEP, SecondGEP, L))
swapGEPOperand(FirstGEP, SecondGEP);
- if (ResultPtr->getType() != Variadic->getType())
- ResultPtr = Builder.CreateBitCast(ResultPtr, Variadic->getType());
-
Variadic->replaceAllUsesWith(ResultPtr);
Variadic->eraseFromParent();
}
@@ -922,6 +927,9 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
int64_t AccumulativeByteOffset) {
IRBuilder<> Builder(Variadic);
Type *IntPtrTy = DL->getIntPtrType(Variadic->getType());
+ assert(IntPtrTy == DL->getIndexType(Variadic->getType()) &&
+ "Pointer type must match index type for arithmetic-based lowering of "
+ "split GEPs");
Value *ResultPtr = Builder.CreatePtrToInt(Variadic->getOperand(0), IntPtrTy);
gep_type_iterator GTI = gep_type_begin(*Variadic);
@@ -973,7 +981,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
if (GEP->hasAllConstantIndices())
return false;
- bool Changed = canonicalizeArrayIndicesToPointerSize(GEP);
+ bool Changed = canonicalizeArrayIndicesToIndexSize(GEP);
bool NeedsExtraction;
int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
@@ -1057,7 +1065,15 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
if (LowerGEP) {
// As currently BasicAA does not analyze ptrtoint/inttoptr, do not lower to
// arithmetic operations if the target uses alias analysis in codegen.
- if (TTI.useAA())
+ // Additionally, pointers that aren't integral (and so can't be safely
+ // converted to integers) or those whose offset size is different from their
+ // pointer size (which means that doing integer arithmetic on them could
+ // affect that data) can't be lowered in this way.
+ unsigned AddrSpace = GEP->getPointerAddressSpace();
+ bool PointerHasExtraData = DL->getPointerSizeInBits(AddrSpace) !=
+ DL->getIndexSizeInBits(AddrSpace);
+ if (TTI.useAA() || DL->isNonIntegralAddressSpace(AddrSpace) ||
+ PointerHasExtraData)
lowerToSingleIndexGEPs(GEP, AccumulativeByteOffset);
else
lowerToArithmetics(GEP, AccumulativeByteOffset);
@@ -1104,13 +1120,13 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// used with unsigned integers later.
int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
DL->getTypeAllocSize(GEP->getResultElementType()));
- Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
+ Type *PtrIdxTy = DL->getIndexType(GEP->getType());
if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
// Very likely. As long as %gep is naturally aligned, the byte offset we
// extracted should be a multiple of sizeof(*%gep).
int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
- ConstantInt::get(IntPtrTy, Index, true),
+ ConstantInt::get(PtrIdxTy, Index, true),
GEP->getName(), GEP);
NewGEP->copyMetadata(*GEP);
// Inherit the inbounds attribute of the original GEP.
@@ -1131,16 +1147,11 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
//
// Emit an uglygep in this case.
IRBuilder<> Builder(GEP);
- Type *I8PtrTy =
- Builder.getInt8Ty()->getPointerTo(GEP->getPointerAddressSpace());
-
NewGEP = cast<Instruction>(Builder.CreateGEP(
- Builder.getInt8Ty(), Builder.CreateBitCast(NewGEP, I8PtrTy),
- {ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true)}, "uglygep",
+ Builder.getInt8Ty(), NewGEP,
+ {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, "uglygep",
GEPWasInBounds));
-
NewGEP->copyMetadata(*GEP);
- NewGEP = cast<Instruction>(Builder.CreateBitCast(NewGEP, GEP->getType()));
}
GEP->replaceAllUsesWith(NewGEP);
@@ -1153,13 +1164,12 @@ bool SeparateConstOffsetFromGEPLegacyPass::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
};
- SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
+ SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
return Impl.run(F);
}
@@ -1189,8 +1199,8 @@ bool SeparateConstOffsetFromGEP::run(Function &F) {
}
Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
- const SCEV *Key, Instruction *Dominatee,
- DenseMap<const SCEV *, SmallVector<Instruction *, 2>> &DominatingExprs) {
+ ExprKey Key, Instruction *Dominatee,
+ DenseMap<ExprKey, SmallVector<Instruction *, 2>> &DominatingExprs) {
auto Pos = DominatingExprs.find(Key);
if (Pos == DominatingExprs.end())
return nullptr;
@@ -1210,7 +1220,7 @@ Instruction *SeparateConstOffsetFromGEP::findClosestMatchingDominator(
}
bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
- if (!SE->isSCEVable(I->getType()))
+ if (!I->getType()->isIntOrIntVectorTy())
return false;
// Dom: LHS+RHS
@@ -1220,8 +1230,7 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
Value *LHS = nullptr, *RHS = nullptr;
if (match(I, m_Add(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
if (LHS->getType() == RHS->getType()) {
- const SCEV *Key =
- SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+ ExprKey Key = createNormalizedCommutablePair(LHS, RHS);
if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingAdds)) {
Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
NewSExt->takeName(I);
@@ -1232,9 +1241,8 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
}
} else if (match(I, m_Sub(m_SExt(m_Value(LHS)), m_SExt(m_Value(RHS))))) {
if (LHS->getType() == RHS->getType()) {
- const SCEV *Key =
- SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
- if (auto *Dom = findClosestMatchingDominator(Key, I, DominatingSubs)) {
+ if (auto *Dom =
+ findClosestMatchingDominator({LHS, RHS}, I, DominatingSubs)) {
Instruction *NewSExt = new SExtInst(Dom, I->getType(), "", I);
NewSExt->takeName(I);
I->replaceAllUsesWith(NewSExt);
@@ -1247,16 +1255,12 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
// Add I to DominatingExprs if it's an add/sub that can't sign overflow.
if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS)))) {
if (programUndefinedIfPoison(I)) {
- const SCEV *Key =
- SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
+ ExprKey Key = createNormalizedCommutablePair(LHS, RHS);
DominatingAdds[Key].push_back(I);
}
} else if (match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) {
- if (programUndefinedIfPoison(I)) {
- const SCEV *Key =
- SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));
- DominatingSubs[Key].push_back(I);
- }
+ if (programUndefinedIfPoison(I))
+ DominatingSubs[{LHS, RHS}].push_back(I);
}
return false;
}
@@ -1376,16 +1380,25 @@ void SeparateConstOffsetFromGEP::swapGEPOperand(GetElementPtrInst *First,
First->setIsInBounds(true);
}
+void SeparateConstOffsetFromGEPPass::printPipeline(
+ raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
+ static_cast<PassInfoMixin<SeparateConstOffsetFromGEPPass> *>(this)
+ ->printPipeline(OS, MapClassName2PassName);
+ OS << '<';
+ if (LowerGEP)
+ OS << "lower-gep";
+ OS << '>';
+}
+
PreservedAnalyses
SeparateConstOffsetFromGEPPass::run(Function &F, FunctionAnalysisManager &AM) {
auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
- auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
auto *LI = &AM.getResult<LoopAnalysis>(F);
auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
auto GetTTI = [&AM](Function &F) -> TargetTransformInfo & {
return AM.getResult<TargetIRAnalysis>(F);
};
- SeparateConstOffsetFromGEP Impl(DT, SE, LI, TLI, GetTTI, LowerGEP);
+ SeparateConstOffsetFromGEP Impl(DT, LI, TLI, GetTTI, LowerGEP);
if (!Impl.run(F))
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 7e08120f923d..ad7d34b61470 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -19,6 +19,7 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GuardUtils.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -42,6 +43,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
@@ -73,11 +75,14 @@ using namespace llvm::PatternMatch;
STATISTIC(NumBranches, "Number of branches unswitched");
STATISTIC(NumSwitches, "Number of switches unswitched");
+STATISTIC(NumSelects, "Number of selects turned into branches for unswitching");
STATISTIC(NumGuards, "Number of guards turned into branches for unswitching");
STATISTIC(NumTrivial, "Number of unswitches that are trivial");
STATISTIC(
NumCostMultiplierSkipped,
"Number of unswitch candidates that had their cost multiplier skipped");
+STATISTIC(NumInvariantConditionsInjected,
+ "Number of invariant conditions injected and unswitched");
static cl::opt<bool> EnableNonTrivialUnswitch(
"enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
@@ -118,15 +123,53 @@ static cl::opt<bool> FreezeLoopUnswitchCond(
cl::desc("If enabled, the freeze instruction will be added to condition "
"of loop unswitch to prevent miscompilation."));
+static cl::opt<bool> InjectInvariantConditions(
+ "simple-loop-unswitch-inject-invariant-conditions", cl::Hidden,
+ cl::desc("Whether we should inject new invariants and unswitch them to "
+ "eliminate some existing (non-invariant) conditions."),
+ cl::init(true));
+
+static cl::opt<unsigned> InjectInvariantConditionHotnesThreshold(
+ "simple-loop-unswitch-inject-invariant-condition-hotness-threshold",
+ cl::Hidden, cl::desc("Only try to inject loop invariant conditions and "
+ "unswitch on them to eliminate branches that are "
+ "not-taken 1/<this option> times or less."),
+ cl::init(16));
+
namespace {
+struct CompareDesc {
+ BranchInst *Term;
+ Value *Invariant;
+ BasicBlock *InLoopSucc;
+
+ CompareDesc(BranchInst *Term, Value *Invariant, BasicBlock *InLoopSucc)
+ : Term(Term), Invariant(Invariant), InLoopSucc(InLoopSucc) {}
+};
+
+struct InjectedInvariant {
+ ICmpInst::Predicate Pred;
+ Value *LHS;
+ Value *RHS;
+ BasicBlock *InLoopSucc;
+
+ InjectedInvariant(ICmpInst::Predicate Pred, Value *LHS, Value *RHS,
+ BasicBlock *InLoopSucc)
+ : Pred(Pred), LHS(LHS), RHS(RHS), InLoopSucc(InLoopSucc) {}
+};
+
struct NonTrivialUnswitchCandidate {
Instruction *TI = nullptr;
TinyPtrVector<Value *> Invariants;
std::optional<InstructionCost> Cost;
+ std::optional<InjectedInvariant> PendingInjection;
NonTrivialUnswitchCandidate(
Instruction *TI, ArrayRef<Value *> Invariants,
- std::optional<InstructionCost> Cost = std::nullopt)
- : TI(TI), Invariants(Invariants), Cost(Cost){};
+ std::optional<InstructionCost> Cost = std::nullopt,
+ std::optional<InjectedInvariant> PendingInjection = std::nullopt)
+ : TI(TI), Invariants(Invariants), Cost(Cost),
+ PendingInjection(PendingInjection) {};
+
+ bool hasPendingInjection() const { return PendingInjection.has_value(); }
};
} // end anonymous namespace.
@@ -434,10 +477,10 @@ static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
// Return the top-most loop containing ExitBB and having ExitBB as exiting block
// or the loop containing ExitBB, if there is no parent loop containing ExitBB
// as exiting block.
-static const Loop *getTopMostExitingLoop(const BasicBlock *ExitBB,
- const LoopInfo &LI) {
- const Loop *TopMost = LI.getLoopFor(ExitBB);
- const Loop *Current = TopMost;
+static Loop *getTopMostExitingLoop(const BasicBlock *ExitBB,
+ const LoopInfo &LI) {
+ Loop *TopMost = LI.getLoopFor(ExitBB);
+ Loop *Current = TopMost;
while (Current) {
if (Current->isLoopExiting(ExitBB))
TopMost = Current;
@@ -750,15 +793,32 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
Loop *OuterL = &L;
if (DefaultExitBB) {
- // Clear out the default destination temporarily to allow accurate
- // predecessor lists to be examined below.
- SI.setDefaultDest(nullptr);
// Check the loop containing this exit.
- Loop *ExitL = LI.getLoopFor(DefaultExitBB);
+ Loop *ExitL = getTopMostExitingLoop(DefaultExitBB, LI);
+ if (!ExitL || ExitL->contains(OuterL))
+ OuterL = ExitL;
+ }
+ for (unsigned Index : ExitCaseIndices) {
+ auto CaseI = SI.case_begin() + Index;
+ // Compute the outer loop from this exit.
+ Loop *ExitL = getTopMostExitingLoop(CaseI->getCaseSuccessor(), LI);
if (!ExitL || ExitL->contains(OuterL))
OuterL = ExitL;
}
+ if (SE) {
+ if (OuterL)
+ SE->forgetLoop(OuterL);
+ else
+ SE->forgetTopmostLoop(&L);
+ }
+
+ if (DefaultExitBB) {
+ // Clear out the default destination temporarily to allow accurate
+ // predecessor lists to be examined below.
+ SI.setDefaultDest(nullptr);
+ }
+
// Store the exit cases into a separate data structure and remove them from
// the switch.
SmallVector<std::tuple<ConstantInt *, BasicBlock *,
@@ -770,10 +830,6 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
// and don't disrupt the earlier indices.
for (unsigned Index : reverse(ExitCaseIndices)) {
auto CaseI = SI.case_begin() + Index;
- // Compute the outer loop from this exit.
- Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
- if (!ExitL || ExitL->contains(OuterL))
- OuterL = ExitL;
// Save the value of this case.
auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
@@ -781,13 +837,6 @@ static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
SIW.removeCase(CaseI);
}
- if (SE) {
- if (OuterL)
- SE->forgetLoop(OuterL);
- else
- SE->forgetTopmostLoop(&L);
- }
-
// Check if after this all of the remaining cases point at the same
// successor.
BasicBlock *CommonSuccBB = nullptr;
@@ -2079,7 +2128,7 @@ static void unswitchNontrivialInvariants(
AssumptionCache &AC,
function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
- function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
+ function_ref<void(Loop &, StringRef)> DestroyLoopCB, bool InsertFreeze) {
auto *ParentBB = TI.getParent();
BranchInst *BI = dyn_cast<BranchInst>(&TI);
SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
@@ -2160,7 +2209,9 @@ static void unswitchNontrivialInvariants(
SmallVector<BasicBlock *, 4> ExitBlocks;
L.getUniqueExitBlocks(ExitBlocks);
for (auto *ExitBB : ExitBlocks) {
- Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
+ // ExitBB can be an exit block for several levels in the loop nest. Make
+ // sure we find the top most.
+ Loop *NewOuterExitL = getTopMostExitingLoop(ExitBB, LI);
if (!NewOuterExitL) {
// We exited the entire nest with this block, so we're done.
OuterExitL = nullptr;
@@ -2181,25 +2232,6 @@ static void unswitchNontrivialInvariants(
SE->forgetBlockAndLoopDispositions();
}
- bool InsertFreeze = false;
- if (FreezeLoopUnswitchCond) {
- ICFLoopSafetyInfo SafetyInfo;
- SafetyInfo.computeLoopSafetyInfo(&L);
- InsertFreeze = !SafetyInfo.isGuaranteedToExecute(TI, &DT, &L);
- }
-
- // Perform the isGuaranteedNotToBeUndefOrPoison() query before the transform,
- // otherwise the branch instruction will have been moved outside the loop
- // already, and may imply that a poison condition is always UB.
- Value *FullUnswitchCond = nullptr;
- if (FullUnswitch) {
- FullUnswitchCond =
- BI ? skipTrivialSelect(BI->getCondition()) : SI->getCondition();
- if (InsertFreeze)
- InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
- FullUnswitchCond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
- }
-
// If the edge from this terminator to a successor dominates that successor,
// store a map from each block in its dominator subtree to it. This lets us
// tell when cloning for a particular successor if a block is dominated by
@@ -2274,10 +2306,11 @@ static void unswitchNontrivialInvariants(
BasicBlock *ClonedPH = ClonedPHs.begin()->second;
BI->setSuccessor(ClonedSucc, ClonedPH);
BI->setSuccessor(1 - ClonedSucc, LoopPH);
+ Value *Cond = skipTrivialSelect(BI->getCondition());
if (InsertFreeze)
- FullUnswitchCond = new FreezeInst(
- FullUnswitchCond, FullUnswitchCond->getName() + ".fr", BI);
- BI->setCondition(FullUnswitchCond);
+ Cond = new FreezeInst(
+ Cond, Cond->getName() + ".fr", BI);
+ BI->setCondition(Cond);
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
} else {
assert(SI && "Must either be a branch or switch!");
@@ -2294,7 +2327,7 @@ static void unswitchNontrivialInvariants(
if (InsertFreeze)
SI->setCondition(new FreezeInst(
- FullUnswitchCond, FullUnswitchCond->getName() + ".fr", SI));
+ SI->getCondition(), SI->getCondition()->getName() + ".fr", SI));
// We need to use the set to populate domtree updates as even when there
// are multiple cases pointing at the same successor we only want to
@@ -2593,6 +2626,57 @@ static InstructionCost computeDomSubtreeCost(
return Cost;
}
+/// Turns a select instruction into implicit control flow branch,
+/// making the following replacement:
+///
+/// head:
+/// --code before select--
+/// select %cond, %trueval, %falseval
+/// --code after select--
+///
+/// into
+///
+/// head:
+/// --code before select--
+/// br i1 %cond, label %then, label %tail
+///
+/// then:
+/// br %tail
+///
+/// tail:
+/// phi [ %trueval, %then ], [ %falseval, %head]
+/// unreachable
+///
+/// It also makes all relevant DT and LI updates, so that all structures are in
+/// valid state after this transform.
+static BranchInst *turnSelectIntoBranch(SelectInst *SI, DominatorTree &DT,
+ LoopInfo &LI, MemorySSAUpdater *MSSAU,
+ AssumptionCache *AC) {
+ LLVM_DEBUG(dbgs() << "Turning " << *SI << " into a branch.\n");
+ BasicBlock *HeadBB = SI->getParent();
+
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ SplitBlockAndInsertIfThen(SI->getCondition(), SI, false,
+ SI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
+ auto *CondBr = cast<BranchInst>(HeadBB->getTerminator());
+ BasicBlock *ThenBB = CondBr->getSuccessor(0),
+ *TailBB = CondBr->getSuccessor(1);
+ if (MSSAU)
+ MSSAU->moveAllAfterSpliceBlocks(HeadBB, TailBB, SI);
+
+ PHINode *Phi = PHINode::Create(SI->getType(), 2, "unswitched.select", SI);
+ Phi->addIncoming(SI->getTrueValue(), ThenBB);
+ Phi->addIncoming(SI->getFalseValue(), HeadBB);
+ SI->replaceAllUsesWith(Phi);
+ SI->eraseFromParent();
+
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ ++NumSelects;
+ return CondBr;
+}
+
/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
/// making the following replacement:
///
@@ -2624,15 +2708,10 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
- // Remove all CheckBB's successors from DomTree. A block can be seen among
- // successors more than once, but for DomTree it should be added only once.
- SmallPtrSet<BasicBlock *, 4> Successors;
- for (auto *Succ : successors(CheckBB))
- if (Successors.insert(Succ).second)
- DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
-
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
Instruction *DeoptBlockTerm =
- SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
+ SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true,
+ GI->getMetadata(LLVMContext::MD_prof), &DTU, &LI);
BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
// SplitBlockAndInsertIfThen inserts control flow that branches to
// DeoptBlockTerm if the condition is true. We want the opposite.
@@ -2649,20 +2728,6 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
GI->moveBefore(DeoptBlockTerm);
GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
- // Add new successors of CheckBB into DomTree.
- for (auto *Succ : successors(CheckBB))
- DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
-
- // Now the blocks that used to be CheckBB's successors are GuardedBlock's
- // successors.
- for (auto *Succ : Successors)
- DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
-
- // Make proper changes to DT.
- DT.applyUpdates(DTUpdates);
- // Inform LI of a new loop block.
- L.addBasicBlockToLoop(GuardedBlock, LI);
-
if (MSSAU) {
MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
@@ -2670,6 +2735,8 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
MSSAU->getMemorySSA()->verifyMemorySSA();
}
+ if (VerifyLoopInfo)
+ LI.verify(DT);
++NumGuards;
return CheckBI;
}
@@ -2700,9 +2767,10 @@ static int CalculateUnswitchCostMultiplier(
const BasicBlock *CondBlock = TI.getParent();
if (DT.dominates(CondBlock, Latch) &&
(isGuard(&TI) ||
- llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
- return L.contains(SuccBB);
- }) <= 1)) {
+ (TI.isTerminator() &&
+ llvm::count_if(successors(&TI), [&L](const BasicBlock *SuccBB) {
+ return L.contains(SuccBB);
+ }) <= 1))) {
NumCostMultiplierSkipped++;
return 1;
}
@@ -2711,12 +2779,17 @@ static int CalculateUnswitchCostMultiplier(
int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
: std::distance(LI.begin(), LI.end()));
// Count amount of clones that all the candidates might cause during
- // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
+ // unswitching. Branch/guard/select counts as 1, switch counts as log2 of its
+ // cases.
int UnswitchedClones = 0;
- for (auto Candidate : UnswitchCandidates) {
+ for (const auto &Candidate : UnswitchCandidates) {
const Instruction *CI = Candidate.TI;
const BasicBlock *CondBlock = CI->getParent();
bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
+ if (isa<SelectInst>(CI)) {
+ UnswitchedClones++;
+ continue;
+ }
if (isGuard(CI)) {
if (!SkipExitingSuccessors)
UnswitchedClones++;
@@ -2766,6 +2839,24 @@ static bool collectUnswitchCandidates(
const Loop &L, const LoopInfo &LI, AAResults &AA,
const MemorySSAUpdater *MSSAU) {
assert(UnswitchCandidates.empty() && "Should be!");
+
+ auto AddUnswitchCandidatesForInst = [&](Instruction *I, Value *Cond) {
+ Cond = skipTrivialSelect(Cond);
+ if (isa<Constant>(Cond))
+ return;
+ if (L.isLoopInvariant(Cond)) {
+ UnswitchCandidates.push_back({I, {Cond}});
+ return;
+ }
+ if (match(Cond, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
+ TinyPtrVector<Value *> Invariants =
+ collectHomogenousInstGraphLoopInvariants(
+ L, *static_cast<Instruction *>(Cond), LI);
+ if (!Invariants.empty())
+ UnswitchCandidates.push_back({I, std::move(Invariants)});
+ }
+ };
+
// Whether or not we should also collect guards in the loop.
bool CollectGuards = false;
if (UnswitchGuards) {
@@ -2779,15 +2870,20 @@ static bool collectUnswitchCandidates(
if (LI.getLoopFor(BB) != &L)
continue;
- if (CollectGuards)
- for (auto &I : *BB)
- if (isGuard(&I)) {
- auto *Cond =
- skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
- // TODO: Support AND, OR conditions and partial unswitching.
- if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
- UnswitchCandidates.push_back({&I, {Cond}});
- }
+ for (auto &I : *BB) {
+ if (auto *SI = dyn_cast<SelectInst>(&I)) {
+ auto *Cond = SI->getCondition();
+ // Do not unswitch vector selects and logical and/or selects
+ if (Cond->getType()->isIntegerTy(1) && !SI->getType()->isIntegerTy(1))
+ AddUnswitchCandidatesForInst(SI, Cond);
+ } else if (CollectGuards && isGuard(&I)) {
+ auto *Cond =
+ skipTrivialSelect(cast<IntrinsicInst>(&I)->getArgOperand(0));
+ // TODO: Support AND, OR conditions and partial unswitching.
+ if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
+ UnswitchCandidates.push_back({&I, {Cond}});
+ }
+ }
if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
// We can only consider fully loop-invariant switch conditions as we need
@@ -2799,29 +2895,11 @@ static bool collectUnswitchCandidates(
}
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
- if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
+ if (!BI || !BI->isConditional() ||
BI->getSuccessor(0) == BI->getSuccessor(1))
continue;
- Value *Cond = skipTrivialSelect(BI->getCondition());
- if (isa<Constant>(Cond))
- continue;
-
- if (L.isLoopInvariant(Cond)) {
- UnswitchCandidates.push_back({BI, {Cond}});
- continue;
- }
-
- Instruction &CondI = *cast<Instruction>(Cond);
- if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
- TinyPtrVector<Value *> Invariants =
- collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
- if (Invariants.empty())
- continue;
-
- UnswitchCandidates.push_back({BI, std::move(Invariants)});
- continue;
- }
+ AddUnswitchCandidatesForInst(BI, BI->getCondition());
}
if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
@@ -2844,6 +2922,303 @@ static bool collectUnswitchCandidates(
return !UnswitchCandidates.empty();
}
+/// Tries to canonicalize condition described by:
+///
+/// br (LHS pred RHS), label IfTrue, label IfFalse
+///
+/// into its equivalent where `Pred` is something that we support for injected
+/// invariants (so far it is limited to ult), LHS in canonicalized form is
+/// non-invariant and RHS is an invariant.
+static void canonicalizeForInvariantConditionInjection(
+ ICmpInst::Predicate &Pred, Value *&LHS, Value *&RHS, BasicBlock *&IfTrue,
+ BasicBlock *&IfFalse, const Loop &L) {
+ if (!L.contains(IfTrue)) {
+ Pred = ICmpInst::getInversePredicate(Pred);
+ std::swap(IfTrue, IfFalse);
+ }
+
+ // Move loop-invariant argument to RHS position.
+ if (L.isLoopInvariant(LHS)) {
+ Pred = ICmpInst::getSwappedPredicate(Pred);
+ std::swap(LHS, RHS);
+ }
+
+ if (Pred == ICmpInst::ICMP_SGE && match(RHS, m_Zero())) {
+ // Turn "x >=s 0" into "x <u UMIN_INT"
+ Pred = ICmpInst::ICMP_ULT;
+ RHS = ConstantInt::get(
+ RHS->getContext(),
+ APInt::getSignedMinValue(RHS->getType()->getIntegerBitWidth()));
+ }
+}
+
+/// Returns true, if predicate described by ( \p Pred, \p LHS, \p RHS )
+/// succeeding into blocks ( \p IfTrue, \p IfFalse) can be optimized by
+/// injecting a loop-invariant condition.
+static bool shouldTryInjectInvariantCondition(
+ const ICmpInst::Predicate Pred, const Value *LHS, const Value *RHS,
+ const BasicBlock *IfTrue, const BasicBlock *IfFalse, const Loop &L) {
+ if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
+ return false;
+ // TODO: Support other predicates.
+ if (Pred != ICmpInst::ICMP_ULT)
+ return false;
+ // TODO: Support non-loop-exiting branches?
+ if (!L.contains(IfTrue) || L.contains(IfFalse))
+ return false;
+ // FIXME: For some reason this causes problems with MSSA updates, need to
+ // investigate why. So far, just don't unswitch latch.
+ if (L.getHeader() == IfTrue)
+ return false;
+ return true;
+}
+
+/// Returns true, if metadata on \p BI allows us to optimize branching into \p
+/// TakenSucc via injection of invariant conditions. The branch should be not
+/// enough and not previously unswitched, the information about this comes from
+/// the metadata.
+bool shouldTryInjectBasingOnMetadata(const BranchInst *BI,
+ const BasicBlock *TakenSucc) {
+ // Skip branches that have already been unswithed this way. After successful
+ // unswitching of injected condition, we will still have a copy of this loop
+ // which looks exactly the same as original one. To prevent the 2nd attempt
+ // of unswitching it in the same pass, mark this branch as "nothing to do
+ // here".
+ if (BI->hasMetadata("llvm.invariant.condition.injection.disabled"))
+ return false;
+ SmallVector<uint32_t> Weights;
+ if (!extractBranchWeights(*BI, Weights))
+ return false;
+ unsigned T = InjectInvariantConditionHotnesThreshold;
+ BranchProbability LikelyTaken(T - 1, T);
+
+ assert(Weights.size() == 2 && "Unexpected profile data!");
+ size_t Idx = BI->getSuccessor(0) == TakenSucc ? 0 : 1;
+ auto Num = Weights[Idx];
+ auto Denom = Weights[0] + Weights[1];
+ // Degenerate or overflowed metadata.
+ if (Denom == 0 || Num > Denom)
+ return false;
+ BranchProbability ActualTaken(Num, Denom);
+ if (LikelyTaken > ActualTaken)
+ return false;
+ return true;
+}
+
+/// Materialize pending invariant condition of the given candidate into IR. The
+/// injected loop-invariant condition implies the original loop-variant branch
+/// condition, so the materialization turns
+///
+/// loop_block:
+/// ...
+/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
+///
+/// into
+///
+/// preheader:
+/// %invariant_cond = LHS pred RHS
+/// ...
+/// loop_block:
+/// br i1 %invariant_cond, label InLoopSucc, label OriginalCheck
+/// OriginalCheck:
+/// br i1 %variant_cond, label InLoopSucc, label OutOfLoopSucc
+/// ...
+static NonTrivialUnswitchCandidate
+injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
+ DominatorTree &DT, LoopInfo &LI,
+ AssumptionCache &AC, MemorySSAUpdater *MSSAU) {
+ assert(Candidate.hasPendingInjection() && "Nothing to inject!");
+ BasicBlock *Preheader = L.getLoopPreheader();
+ assert(Preheader && "Loop is not in simplified form?");
+ assert(LI.getLoopFor(Candidate.TI->getParent()) == &L &&
+ "Unswitching branch of inner loop!");
+
+ auto Pred = Candidate.PendingInjection->Pred;
+ auto *LHS = Candidate.PendingInjection->LHS;
+ auto *RHS = Candidate.PendingInjection->RHS;
+ auto *InLoopSucc = Candidate.PendingInjection->InLoopSucc;
+ auto *TI = cast<BranchInst>(Candidate.TI);
+ auto *BB = Candidate.TI->getParent();
+ auto *OutOfLoopSucc = InLoopSucc == TI->getSuccessor(0) ? TI->getSuccessor(1)
+ : TI->getSuccessor(0);
+ // FIXME: Remove this once limitation on successors is lifted.
+ assert(L.contains(InLoopSucc) && "Not supported yet!");
+ assert(!L.contains(OutOfLoopSucc) && "Not supported yet!");
+ auto &Ctx = BB->getContext();
+
+ IRBuilder<> Builder(Preheader->getTerminator());
+ assert(ICmpInst::isUnsigned(Pred) && "Not supported yet!");
+ if (LHS->getType() != RHS->getType()) {
+ if (LHS->getType()->getIntegerBitWidth() <
+ RHS->getType()->getIntegerBitWidth())
+ LHS = Builder.CreateZExt(LHS, RHS->getType(), LHS->getName() + ".wide");
+ else
+ RHS = Builder.CreateZExt(RHS, LHS->getType(), RHS->getName() + ".wide");
+ }
+ // Do not use builder here: CreateICmp may simplify this into a constant and
+ // unswitching will break. Better optimize it away later.
+ auto *InjectedCond =
+ ICmpInst::Create(Instruction::ICmp, Pred, LHS, RHS, "injected.cond",
+ Preheader->getTerminator());
+ auto *OldCond = TI->getCondition();
+
+ BasicBlock *CheckBlock = BasicBlock::Create(Ctx, BB->getName() + ".check",
+ BB->getParent(), InLoopSucc);
+ Builder.SetInsertPoint(TI);
+ auto *InvariantBr =
+ Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
+
+ Builder.SetInsertPoint(CheckBlock);
+ auto *NewTerm = Builder.CreateCondBr(OldCond, InLoopSucc, OutOfLoopSucc);
+
+ TI->eraseFromParent();
+ // Prevent infinite unswitching.
+ NewTerm->setMetadata("llvm.invariant.condition.injection.disabled",
+ MDNode::get(BB->getContext(), {}));
+
+ // Fixup phis.
+ for (auto &I : *InLoopSucc) {
+ auto *PN = dyn_cast<PHINode>(&I);
+ if (!PN)
+ break;
+ auto *Inc = PN->getIncomingValueForBlock(BB);
+ PN->addIncoming(Inc, CheckBlock);
+ }
+ OutOfLoopSucc->replacePhiUsesWith(BB, CheckBlock);
+
+ SmallVector<DominatorTree::UpdateType, 4> DTUpdates = {
+ { DominatorTree::Insert, BB, CheckBlock },
+ { DominatorTree::Insert, CheckBlock, InLoopSucc },
+ { DominatorTree::Insert, CheckBlock, OutOfLoopSucc },
+ { DominatorTree::Delete, BB, OutOfLoopSucc }
+ };
+
+ DT.applyUpdates(DTUpdates);
+ if (MSSAU)
+ MSSAU->applyUpdates(DTUpdates, DT);
+ L.addBasicBlockToLoop(CheckBlock, LI);
+
+#ifndef NDEBUG
+ DT.verify();
+ LI.verify(DT);
+ if (MSSAU && VerifyMemorySSA)
+ MSSAU->getMemorySSA()->verifyMemorySSA();
+#endif
+
+ // TODO: In fact, cost of unswitching a new invariant candidate is *slightly*
+ // higher because we have just inserted a new block. Need to think how to
+ // adjust the cost of injected candidates when it was first computed.
+ LLVM_DEBUG(dbgs() << "Injected a new loop-invariant branch " << *InvariantBr
+ << " and considering it for unswitching.");
+ ++NumInvariantConditionsInjected;
+ return NonTrivialUnswitchCandidate(InvariantBr, { InjectedCond },
+ Candidate.Cost);
+}
+
+/// Given chain of loop branch conditions looking like:
+/// br (Variant < Invariant1)
+/// br (Variant < Invariant2)
+/// br (Variant < Invariant3)
+/// ...
+/// collect set of invariant conditions on which we want to unswitch, which
+/// look like:
+/// Invariant1 <= Invariant2
+/// Invariant2 <= Invariant3
+/// ...
+/// Though they might not immediately exist in the IR, we can still inject them.
+static bool insertCandidatesWithPendingInjections(
+ SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates, Loop &L,
+ ICmpInst::Predicate Pred, ArrayRef<CompareDesc> Compares,
+ const DominatorTree &DT) {
+
+ assert(ICmpInst::isRelational(Pred));
+ assert(ICmpInst::isStrictPredicate(Pred));
+ if (Compares.size() < 2)
+ return false;
+ ICmpInst::Predicate NonStrictPred = ICmpInst::getNonStrictPredicate(Pred);
+ for (auto Prev = Compares.begin(), Next = Compares.begin() + 1;
+ Next != Compares.end(); ++Prev, ++Next) {
+ Value *LHS = Next->Invariant;
+ Value *RHS = Prev->Invariant;
+ BasicBlock *InLoopSucc = Prev->InLoopSucc;
+ InjectedInvariant ToInject(NonStrictPred, LHS, RHS, InLoopSucc);
+ NonTrivialUnswitchCandidate Candidate(Prev->Term, { LHS, RHS },
+ std::nullopt, std::move(ToInject));
+ UnswitchCandidates.push_back(std::move(Candidate));
+ }
+ return true;
+}
+
+/// Collect unswitch candidates by invariant conditions that are not immediately
+/// present in the loop. However, they can be injected into the code if we
+/// decide it's profitable.
+/// An example of such conditions is following:
+///
+/// for (...) {
+/// x = load ...
+/// if (! x <u C1) break;
+/// if (! x <u C2) break;
+/// <do something>
+/// }
+///
+/// We can unswitch by condition "C1 <=u C2". If that is true, then "x <u C1 <=
+/// C2" automatically implies "x <u C2", so we can get rid of one of
+/// loop-variant checks in unswitched loop version.
+static bool collectUnswitchCandidatesWithInjections(
+ SmallVectorImpl<NonTrivialUnswitchCandidate> &UnswitchCandidates,
+ IVConditionInfo &PartialIVInfo, Instruction *&PartialIVCondBranch, Loop &L,
+ const DominatorTree &DT, const LoopInfo &LI, AAResults &AA,
+ const MemorySSAUpdater *MSSAU) {
+ if (!InjectInvariantConditions)
+ return false;
+
+ if (!DT.isReachableFromEntry(L.getHeader()))
+ return false;
+ auto *Latch = L.getLoopLatch();
+ // Need to have a single latch and a preheader.
+ if (!Latch)
+ return false;
+ assert(L.getLoopPreheader() && "Must have a preheader!");
+
+ DenseMap<Value *, SmallVector<CompareDesc, 4> > CandidatesULT;
+ // Traverse the conditions that dominate latch (and therefore dominate each
+ // other).
+ for (auto *DTN = DT.getNode(Latch); L.contains(DTN->getBlock());
+ DTN = DTN->getIDom()) {
+ ICmpInst::Predicate Pred;
+ Value *LHS = nullptr, *RHS = nullptr;
+ BasicBlock *IfTrue = nullptr, *IfFalse = nullptr;
+ auto *BB = DTN->getBlock();
+ // Ignore inner loops.
+ if (LI.getLoopFor(BB) != &L)
+ continue;
+ auto *Term = BB->getTerminator();
+ if (!match(Term, m_Br(m_ICmp(Pred, m_Value(LHS), m_Value(RHS)),
+ m_BasicBlock(IfTrue), m_BasicBlock(IfFalse))))
+ continue;
+ if (!LHS->getType()->isIntegerTy())
+ continue;
+ canonicalizeForInvariantConditionInjection(Pred, LHS, RHS, IfTrue, IfFalse,
+ L);
+ if (!shouldTryInjectInvariantCondition(Pred, LHS, RHS, IfTrue, IfFalse, L))
+ continue;
+ if (!shouldTryInjectBasingOnMetadata(cast<BranchInst>(Term), IfTrue))
+ continue;
+ // Strip ZEXT for unsigned predicate.
+ // TODO: once signed predicates are supported, also strip SEXT.
+ CompareDesc Desc(cast<BranchInst>(Term), RHS, IfTrue);
+ while (auto *Zext = dyn_cast<ZExtInst>(LHS))
+ LHS = Zext->getOperand(0);
+ CandidatesULT[LHS].push_back(Desc);
+ }
+
+ bool Found = false;
+ for (auto &It : CandidatesULT)
+ Found |= insertCandidatesWithPendingInjections(
+ UnswitchCandidates, L, ICmpInst::ICMP_ULT, It.second, DT);
+ return Found;
+}
+
static bool isSafeForNoNTrivialUnswitching(Loop &L, LoopInfo &LI) {
if (!L.isSafeToClone())
return false;
@@ -2943,6 +3318,10 @@ static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
// cost for that terminator.
auto ComputeUnswitchedCost = [&](Instruction &TI,
bool FullUnswitch) -> InstructionCost {
+ // Unswitching selects unswitches the entire loop.
+ if (isa<SelectInst>(TI))
+ return LoopCost;
+
BasicBlock &BB = *TI.getParent();
SmallPtrSet<BasicBlock *, 4> Visited;
@@ -3003,10 +3382,11 @@ static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
Instruction &TI = *Candidate.TI;
ArrayRef<Value *> Invariants = Candidate.Invariants;
BranchInst *BI = dyn_cast<BranchInst>(&TI);
- InstructionCost CandidateCost = ComputeUnswitchedCost(
- TI, /*FullUnswitch*/ !BI ||
- (Invariants.size() == 1 &&
- Invariants[0] == skipTrivialSelect(BI->getCondition())));
+ bool FullUnswitch =
+ !BI || Candidate.hasPendingInjection() ||
+ (Invariants.size() == 1 &&
+ Invariants[0] == skipTrivialSelect(BI->getCondition()));
+ InstructionCost CandidateCost = ComputeUnswitchedCost(TI, FullUnswitch);
// Calculate cost multiplier which is a tool to limit potentially
// exponential behavior of loop-unswitch.
if (EnableUnswitchCostMultiplier) {
@@ -3033,6 +3413,32 @@ static NonTrivialUnswitchCandidate findBestNonTrivialUnswitchCandidate(
return *Best;
}
+// Insert a freeze on an unswitched branch if all is true:
+// 1. freeze-loop-unswitch-cond option is true
+// 2. The branch may not execute in the loop pre-transformation. If a branch may
+// not execute and could cause UB, it would always cause UB if it is hoisted outside
+// of the loop. Insert a freeze to prevent this case.
+// 3. The branch condition may be poison or undef
+static bool shouldInsertFreeze(Loop &L, Instruction &TI, DominatorTree &DT,
+ AssumptionCache &AC) {
+ assert(isa<BranchInst>(TI) || isa<SwitchInst>(TI));
+ if (!FreezeLoopUnswitchCond)
+ return false;
+
+ ICFLoopSafetyInfo SafetyInfo;
+ SafetyInfo.computeLoopSafetyInfo(&L);
+ if (SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
+ return false;
+
+ Value *Cond;
+ if (BranchInst *BI = dyn_cast<BranchInst>(&TI))
+ Cond = skipTrivialSelect(BI->getCondition());
+ else
+ Cond = skipTrivialSelect(cast<SwitchInst>(&TI)->getCondition());
+ return !isGuaranteedNotToBeUndefOrPoison(
+ Cond, &AC, L.getLoopPreheader()->getTerminator(), &DT);
+}
+
static bool unswitchBestCondition(
Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
AAResults &AA, TargetTransformInfo &TTI,
@@ -3044,9 +3450,13 @@ static bool unswitchBestCondition(
SmallVector<NonTrivialUnswitchCandidate, 4> UnswitchCandidates;
IVConditionInfo PartialIVInfo;
Instruction *PartialIVCondBranch = nullptr;
+ collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
+ PartialIVCondBranch, L, LI, AA, MSSAU);
+ collectUnswitchCandidatesWithInjections(UnswitchCandidates, PartialIVInfo,
+ PartialIVCondBranch, L, DT, LI, AA,
+ MSSAU);
// If we didn't find any candidates, we're done.
- if (!collectUnswitchCandidates(UnswitchCandidates, PartialIVInfo,
- PartialIVCondBranch, L, LI, AA, MSSAU))
+ if (UnswitchCandidates.empty())
return false;
LLVM_DEBUG(
@@ -3065,18 +3475,36 @@ static bool unswitchBestCondition(
return false;
}
+ if (Best.hasPendingInjection())
+ Best = injectPendingInvariantConditions(Best, L, DT, LI, AC, MSSAU);
+ assert(!Best.hasPendingInjection() &&
+ "All injections should have been done by now!");
+
if (Best.TI != PartialIVCondBranch)
PartialIVInfo.InstToDuplicate.clear();
- // If the best candidate is a guard, turn it into a branch.
- if (isGuard(Best.TI))
- Best.TI =
- turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
+ bool InsertFreeze;
+ if (auto *SI = dyn_cast<SelectInst>(Best.TI)) {
+ // If the best candidate is a select, turn it into a branch. Select
+ // instructions with a poison conditional do not propagate poison, but
+ // branching on poison causes UB. Insert a freeze on the select
+ // conditional to prevent UB after turning the select into a branch.
+ InsertFreeze = !isGuaranteedNotToBeUndefOrPoison(
+ SI->getCondition(), &AC, L.getLoopPreheader()->getTerminator(), &DT);
+ Best.TI = turnSelectIntoBranch(SI, DT, LI, MSSAU, &AC);
+ } else {
+ // If the best candidate is a guard, turn it into a branch.
+ if (isGuard(Best.TI))
+ Best.TI =
+ turnGuardIntoBranch(cast<IntrinsicInst>(Best.TI), L, DT, LI, MSSAU);
+ InsertFreeze = shouldInsertFreeze(L, *Best.TI, DT, AC);
+ }
LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = " << Best.Cost
<< ") terminator: " << *Best.TI << "\n");
unswitchNontrivialInvariants(L, *Best.TI, Best.Invariants, PartialIVInfo, DT,
- LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB);
+ LI, AC, UnswitchCB, SE, MSSAU, DestroyLoopCB,
+ InsertFreeze);
return true;
}
@@ -3124,6 +3552,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
return true;
}
+ const Function *F = L.getHeader()->getParent();
+
// Check whether we should continue with non-trivial conditions.
// EnableNonTrivialUnswitch: Global variable that forces non-trivial
// unswitching for testing and debugging.
@@ -3136,18 +3566,41 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
// branches even on targets that have divergence.
// https://bugs.llvm.org/show_bug.cgi?id=48819
bool ContinueWithNonTrivial =
- EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
+ EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence(F));
if (!ContinueWithNonTrivial)
return false;
// Skip non-trivial unswitching for optsize functions.
- if (L.getHeader()->getParent()->hasOptSize())
+ if (F->hasOptSize())
return false;
- // Skip cold loops, as unswitching them brings little benefit
- // but increases the code size
- if (PSI && PSI->hasProfileSummary() && BFI &&
- PSI->isFunctionColdInCallGraph(L.getHeader()->getParent(), *BFI)) {
+ // Returns true if Loop L's loop nest is cold, i.e. if the headers of L,
+ // of the loops L is nested in, and of the loops nested in L are all cold.
+ auto IsLoopNestCold = [&](const Loop *L) {
+ // Check L and all of its parent loops.
+ auto *Parent = L;
+ while (Parent) {
+ if (!PSI->isColdBlock(Parent->getHeader(), BFI))
+ return false;
+ Parent = Parent->getParentLoop();
+ }
+ // Next check all loops nested within L.
+ SmallVector<const Loop *, 4> Worklist;
+ Worklist.insert(Worklist.end(), L->getSubLoops().begin(),
+ L->getSubLoops().end());
+ while (!Worklist.empty()) {
+ auto *CurLoop = Worklist.pop_back_val();
+ if (!PSI->isColdBlock(CurLoop->getHeader(), BFI))
+ return false;
+ Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(),
+ CurLoop->getSubLoops().end());
+ }
+ return true;
+ };
+
+ // Skip cold loops in cold loop nests, as unswitching them brings little
+ // benefit but increases the code size
+ if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) {
LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n");
return false;
}
@@ -3249,10 +3702,10 @@ void SimpleLoopUnswitchPass::printPipeline(
static_cast<PassInfoMixin<SimpleLoopUnswitchPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
OS << (NonTrivial ? "" : "no-") << "nontrivial;";
OS << (Trivial ? "" : "no-") << "trivial";
- OS << ">";
+ OS << '>';
}
namespace {
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index e014f5d1eb04..7017f6adf3a2 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -121,7 +121,7 @@ performBlockTailMerging(Function &F, ArrayRef<BasicBlock *> BBs,
// Now, go through each block (with the current terminator type)
// we've recorded, and rewrite it to branch to the new common block.
- const DILocation *CommonDebugLoc = nullptr;
+ DILocation *CommonDebugLoc = nullptr;
for (BasicBlock *BB : BBs) {
auto *Term = BB->getTerminator();
assert(Term->getOpcode() == CanonicalTerm->getOpcode() &&
@@ -228,8 +228,8 @@ static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 32> Edges;
FindFunctionBackedges(F, Edges);
SmallPtrSet<BasicBlock *, 16> UniqueLoopHeaders;
- for (unsigned i = 0, e = Edges.size(); i != e; ++i)
- UniqueLoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second));
+ for (const auto &Edge : Edges)
+ UniqueLoopHeaders.insert(const_cast<BasicBlock *>(Edge.second));
SmallVector<WeakVH, 16> LoopHeaders(UniqueLoopHeaders.begin(),
UniqueLoopHeaders.end());
@@ -338,8 +338,8 @@ void SimplifyCFGPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<SimplifyCFGPass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
- OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ";";
+ OS << '<';
+ OS << "bonus-inst-threshold=" << Options.BonusInstThreshold << ';';
OS << (Options.ForwardSwitchCondToPhi ? "" : "no-") << "forward-switch-cond;";
OS << (Options.ConvertSwitchRangeToICmp ? "" : "no-")
<< "switch-range-to-icmp;";
@@ -347,8 +347,10 @@ void SimplifyCFGPass::printPipeline(
<< "switch-to-lookup;";
OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;";
OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
- OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts";
- OS << ">";
+ OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts;";
+ OS << (Options.SpeculateBlocks ? "" : "no-") << "speculate-blocks;";
+ OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch";
+ OS << '>';
}
PreservedAnalyses SimplifyCFGPass::run(Function &F,
@@ -358,11 +360,6 @@ PreservedAnalyses SimplifyCFGPass::run(Function &F,
DominatorTree *DT = nullptr;
if (RequireAndPreserveDomTree)
DT = &AM.getResult<DominatorTreeAnalysis>(F);
- if (F.hasFnAttribute(Attribute::OptForFuzzing)) {
- Options.setSimplifyCondBranch(false).setFoldTwoEntryPHINode(false);
- } else {
- Options.setSimplifyCondBranch(true).setFoldTwoEntryPHINode(true);
- }
if (!simplifyFunctionCFG(F, TTI, DT, Options))
return PreservedAnalyses::all();
PreservedAnalyses PA;
@@ -395,13 +392,6 @@ struct CFGSimplifyPass : public FunctionPass {
DominatorTree *DT = nullptr;
if (RequireAndPreserveDomTree)
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- if (F.hasFnAttribute(Attribute::OptForFuzzing)) {
- Options.setSimplifyCondBranch(false)
- .setFoldTwoEntryPHINode(false);
- } else {
- Options.setSimplifyCondBranch(true)
- .setFoldTwoEntryPHINode(true);
- }
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
return simplifyFunctionCFG(F, TTI, DT, Options);
diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index 65f8d760ede3..e866fe681127 100644
--- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -152,7 +152,7 @@ bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) {
namespace llvm {
bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) {
- if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence()) {
+ if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence(&F)) {
LLVM_DEBUG(dbgs() << "Not running SpeculativeExecution because "
"TTI->hasBranchDivergence() is false.\n");
return false;
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 70df0cec0dca..fdb41cb415df 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -484,9 +484,9 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
// = B + (sext(Idx) * sext(S)) * ElementSize
// = B + (sext(Idx) * ElementSize) * sext(S)
// Casting to IntegerType is safe because we skipped vector GEPs.
- IntegerType *IntPtrTy = cast<IntegerType>(DL->getIntPtrType(I->getType()));
+ IntegerType *PtrIdxTy = cast<IntegerType>(DL->getIndexType(I->getType()));
ConstantInt *ScaledIdx = ConstantInt::get(
- IntPtrTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
+ PtrIdxTy, Idx->getSExtValue() * (int64_t)ElementSize, true);
allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I);
}
@@ -549,18 +549,18 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
Value *ArrayIdx = GEP->getOperand(I);
uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
if (ArrayIdx->getType()->getIntegerBitWidth() <=
- DL->getPointerSizeInBits(GEP->getAddressSpace())) {
- // Skip factoring if ArrayIdx is wider than the pointer size, because
- // ArrayIdx is implicitly truncated to the pointer size.
+ DL->getIndexSizeInBits(GEP->getAddressSpace())) {
+ // Skip factoring if ArrayIdx is wider than the index size, because
+ // ArrayIdx is implicitly truncated to the index size.
factorArrayIndex(ArrayIdx, BaseExpr, ElementSize, GEP);
}
// When ArrayIdx is the sext of a value, we try to factor that value as
// well. Handling this case is important because array indices are
- // typically sign-extended to the pointer size.
+ // typically sign-extended to the pointer index size.
Value *TruncatedArrayIdx = nullptr;
if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))) &&
TruncatedArrayIdx->getType()->getIntegerBitWidth() <=
- DL->getPointerSizeInBits(GEP->getAddressSpace())) {
+ DL->getIndexSizeInBits(GEP->getAddressSpace())) {
// Skip factoring if TruncatedArrayIdx is wider than the pointer size,
// because TruncatedArrayIdx is implicitly truncated to the pointer size.
factorArrayIndex(TruncatedArrayIdx, BaseExpr, ElementSize, GEP);
@@ -675,24 +675,24 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
}
case Candidate::GEP:
{
- Type *IntPtrTy = DL->getIntPtrType(C.Ins->getType());
- bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
- if (BumpWithUglyGEP) {
- // C = (char *)Basis + Bump
- unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
- Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
- Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
- Reduced =
- Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds);
- Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
- } else {
- // C = gep Basis, Bump
- // Canonicalize bump to pointer size.
- Bump = Builder.CreateSExtOrTrunc(Bump, IntPtrTy);
- Reduced = Builder.CreateGEP(
- cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(),
- Basis.Ins, Bump, "", InBounds);
- }
+ Type *OffsetTy = DL->getIndexType(C.Ins->getType());
+ bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
+ if (BumpWithUglyGEP) {
+ // C = (char *)Basis + Bump
+ unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
+ Type *CharTy = Type::getInt8PtrTy(Basis.Ins->getContext(), AS);
+ Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
+ Reduced =
+ Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds);
+ Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
+ } else {
+ // C = gep Basis, Bump
+ // Canonicalize bump to pointer size.
+ Bump = Builder.CreateSExtOrTrunc(Bump, OffsetTy);
+ Reduced = Builder.CreateGEP(
+ cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(), Basis.Ins,
+ Bump, "", InBounds);
+ }
break;
}
default:
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 81d151c2904e..fac5695c7bea 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -15,10 +15,10 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
+#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -239,12 +239,12 @@ class StructurizeCFG {
Type *Boolean;
ConstantInt *BoolTrue;
ConstantInt *BoolFalse;
- UndefValue *BoolUndef;
+ Value *BoolPoison;
Function *Func;
Region *ParentRegion;
- LegacyDivergenceAnalysis *DA = nullptr;
+ UniformityInfo *UA = nullptr;
DominatorTree *DT;
SmallVector<RegionNode *, 8> Order;
@@ -319,7 +319,7 @@ class StructurizeCFG {
public:
void init(Region *R);
bool run(Region *R, DominatorTree *DT);
- bool makeUniformRegion(Region *R, LegacyDivergenceAnalysis *DA);
+ bool makeUniformRegion(Region *R, UniformityInfo &UA);
};
class StructurizeCFGLegacyPass : public RegionPass {
@@ -339,8 +339,9 @@ public:
StructurizeCFG SCFG;
SCFG.init(R);
if (SkipUniformRegions) {
- LegacyDivergenceAnalysis *DA = &getAnalysis<LegacyDivergenceAnalysis>();
- if (SCFG.makeUniformRegion(R, DA))
+ UniformityInfo &UA =
+ getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
+ if (SCFG.makeUniformRegion(R, UA))
return false;
}
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -351,7 +352,7 @@ public:
void getAnalysisUsage(AnalysisUsage &AU) const override {
if (SkipUniformRegions)
- AU.addRequired<LegacyDivergenceAnalysis>();
+ AU.addRequired<UniformityInfoWrapperPass>();
AU.addRequiredID(LowerSwitchID);
AU.addRequired<DominatorTreeWrapperPass>();
@@ -366,7 +367,7 @@ char StructurizeCFGLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
-INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
@@ -798,8 +799,6 @@ void StructurizeCFG::killTerminator(BasicBlock *BB) {
for (BasicBlock *Succ : successors(BB))
delPhiValues(BB, Succ);
- if (DA)
- DA->removeValue(Term);
Term->eraseFromParent();
}
@@ -957,7 +956,7 @@ void StructurizeCFG::wireFlow(bool ExitUseAllowed,
BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
// let it point to entry and next block
- BranchInst *Br = BranchInst::Create(Entry, Next, BoolUndef, Flow);
+ BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow);
Br->setDebugLoc(TermDL[Flow]);
Conditions.push_back(Br);
addPhiValues(Flow, Entry);
@@ -998,7 +997,7 @@ void StructurizeCFG::handleLoops(bool ExitUseAllowed,
// Create an extra loop end node
LoopEnd = needPrefix(false);
BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
- BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolUndef, LoopEnd);
+ BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd);
Br->setDebugLoc(TermDL[LoopEnd]);
LoopConds.push_back(Br);
addPhiValues(LoopEnd, LoopStart);
@@ -1064,7 +1063,7 @@ void StructurizeCFG::rebuildSSA() {
}
static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
- const LegacyDivergenceAnalysis &DA) {
+ const UniformityInfo &UA) {
// Bool for if all sub-regions are uniform.
bool SubRegionsAreUniform = true;
// Count of how many direct children are conditional.
@@ -1076,7 +1075,7 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
if (!Br || !Br->isConditional())
continue;
- if (!DA.isUniform(Br))
+ if (!UA.isUniform(Br))
return false;
// One of our direct children is conditional.
@@ -1086,7 +1085,7 @@ static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
<< " has uniform terminator\n");
} else {
// Explicitly refuse to treat regions as uniform if they have non-uniform
- // subregions. We cannot rely on DivergenceAnalysis for branches in
+ // subregions. We cannot rely on UniformityAnalysis for branches in
// subregions because those branches may have been removed and re-created,
// so we look for our metadata instead.
//
@@ -1126,17 +1125,17 @@ void StructurizeCFG::init(Region *R) {
Boolean = Type::getInt1Ty(Context);
BoolTrue = ConstantInt::getTrue(Context);
BoolFalse = ConstantInt::getFalse(Context);
- BoolUndef = UndefValue::get(Boolean);
+ BoolPoison = PoisonValue::get(Boolean);
- this->DA = nullptr;
+ this->UA = nullptr;
}
-bool StructurizeCFG::makeUniformRegion(Region *R,
- LegacyDivergenceAnalysis *DA) {
+bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
if (R->isTopLevelRegion())
return false;
- this->DA = DA;
+ this->UA = &UA;
+
// TODO: We could probably be smarter here with how we handle sub-regions.
// We currently rely on the fact that metadata is set by earlier invocations
// of the pass on sub-regions, and that this metadata doesn't get lost --
@@ -1144,7 +1143,7 @@ bool StructurizeCFG::makeUniformRegion(Region *R,
unsigned UniformMDKindID =
R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
- if (hasOnlyUniformBranches(R, UniformMDKindID, *DA)) {
+ if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
<< '\n');
diff --git a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
index 9e08954ef643..e53019768e88 100644
--- a/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
+++ b/llvm/lib/Transforms/Scalar/WarnMissedTransforms.cpp
@@ -13,7 +13,6 @@
#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
using namespace llvm;
@@ -104,47 +103,3 @@ WarnMissedTransformationsPass::run(Function &F, FunctionAnalysisManager &AM) {
return PreservedAnalyses::all();
}
-
-// Legacy pass manager boilerplate
-namespace {
-class WarnMissedTransformationsLegacy : public FunctionPass {
-public:
- static char ID;
-
- explicit WarnMissedTransformationsLegacy() : FunctionPass(ID) {
- initializeWarnMissedTransformationsLegacyPass(
- *PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-
- warnAboutLeftoverTransformations(&F, &LI, &ORE);
- return false;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
-
- AU.setPreservesAll();
- }
-};
-} // end anonymous namespace
-
-char WarnMissedTransformationsLegacy::ID = 0;
-
-INITIALIZE_PASS_BEGIN(WarnMissedTransformationsLegacy, "transform-warning",
- "Warn about non-applied transformations", false, false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(WarnMissedTransformationsLegacy, "transform-warning",
- "Warn about non-applied transformations", false, false)
-
-Pass *llvm::createWarnMissedTransformationsPass() {
- return new WarnMissedTransformationsLegacy();
-}
diff --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
index 24972db404be..2195406c144c 100644
--- a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -16,7 +16,11 @@
#include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h"
#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/MD5.h"
+#include "llvm/Support/MathExtras.h"
using namespace llvm;
@@ -179,11 +183,7 @@ static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
// Scan the format string to locate all specifiers, and mark the ones that
// specify a string, i.e, the "%s" specifier with optional '*' characters.
-static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) {
- StringRef Str;
- if (!getConstantStringInfo(Fmt, Str) || Str.empty())
- return;
-
+static void locateCStrings(SparseBitVector<8> &BV, StringRef Str) {
static const char ConvSpecifiers[] = "diouxXfFeEgGaAcspn";
size_t SpecPos = 0;
// Skip the first argument, the format string.
@@ -207,14 +207,320 @@ static void locateCStrings(SparseBitVector<8> &BV, Value *Fmt) {
}
}
-Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder,
- ArrayRef<Value *> Args) {
+// helper struct to package the string related data
+struct StringData {
+ StringRef Str;
+ Value *RealSize = nullptr;
+ Value *AlignedSize = nullptr;
+ bool IsConst = true;
+
+ StringData(StringRef ST, Value *RS, Value *AS, bool IC)
+ : Str(ST), RealSize(RS), AlignedSize(AS), IsConst(IC) {}
+};
+
+// Calculates frame size required for current printf expansion and allocates
+// space on printf buffer. Printf frame includes following contents
+// [ ControlDWord , format string/Hash , Arguments (each aligned to 8 byte) ]
+static Value *callBufferedPrintfStart(
+ IRBuilder<> &Builder, ArrayRef<Value *> Args, Value *Fmt,
+ bool isConstFmtStr, SparseBitVector<8> &SpecIsCString,
+ SmallVectorImpl<StringData> &StringContents, Value *&ArgSize) {
+ Module *M = Builder.GetInsertBlock()->getModule();
+ Value *NonConstStrLen = nullptr;
+ Value *LenWithNull = nullptr;
+ Value *LenWithNullAligned = nullptr;
+ Value *TempAdd = nullptr;
+
+ // First 4 bytes to be reserved for control dword
+ size_t BufSize = 4;
+ if (isConstFmtStr)
+ // First 8 bytes of MD5 hash
+ BufSize += 8;
+ else {
+ LenWithNull = getStrlenWithNull(Builder, Fmt);
+
+ // Align the computed length to next 8 byte boundary
+ TempAdd = Builder.CreateAdd(LenWithNull,
+ ConstantInt::get(LenWithNull->getType(), 7U));
+ NonConstStrLen = Builder.CreateAnd(
+ TempAdd, ConstantInt::get(LenWithNull->getType(), ~7U));
+
+ StringContents.push_back(
+ StringData(StringRef(), LenWithNull, NonConstStrLen, false));
+ }
+
+ for (size_t i = 1; i < Args.size(); i++) {
+ if (SpecIsCString.test(i)) {
+ StringRef ArgStr;
+ if (getConstantStringInfo(Args[i], ArgStr)) {
+ auto alignedLen = alignTo(ArgStr.size() + 1, 8);
+ StringContents.push_back(StringData(
+ ArgStr,
+ /*RealSize*/ nullptr, /*AlignedSize*/ nullptr, /*IsConst*/ true));
+ BufSize += alignedLen;
+ } else {
+ LenWithNull = getStrlenWithNull(Builder, Args[i]);
+
+ // Align the computed length to next 8 byte boundary
+ TempAdd = Builder.CreateAdd(
+ LenWithNull, ConstantInt::get(LenWithNull->getType(), 7U));
+ LenWithNullAligned = Builder.CreateAnd(
+ TempAdd, ConstantInt::get(LenWithNull->getType(), ~7U));
+
+ if (NonConstStrLen) {
+ auto Val = Builder.CreateAdd(LenWithNullAligned, NonConstStrLen,
+ "cumulativeAdd");
+ NonConstStrLen = Val;
+ } else
+ NonConstStrLen = LenWithNullAligned;
+
+ StringContents.push_back(
+ StringData(StringRef(), LenWithNull, LenWithNullAligned, false));
+ }
+ } else {
+ int AllocSize = M->getDataLayout().getTypeAllocSize(Args[i]->getType());
+ // We end up expanding non string arguments to 8 bytes
+ // (args smaller than 8 bytes)
+ BufSize += std::max(AllocSize, 8);
+ }
+ }
+
+ // calculate final size value to be passed to printf_alloc
+ Value *SizeToReserve = ConstantInt::get(Builder.getInt64Ty(), BufSize, false);
+ SmallVector<Value *, 1> Alloc_args;
+ if (NonConstStrLen)
+ SizeToReserve = Builder.CreateAdd(NonConstStrLen, SizeToReserve);
+
+ ArgSize = Builder.CreateTrunc(SizeToReserve, Builder.getInt32Ty());
+ Alloc_args.push_back(ArgSize);
+
+ // call the printf_alloc function
+ AttributeList Attr = AttributeList::get(
+ Builder.getContext(), AttributeList::FunctionIndex, Attribute::NoUnwind);
+
+ Type *Tys_alloc[1] = {Builder.getInt32Ty()};
+ Type *I8Ptr =
+ Builder.getInt8PtrTy(M->getDataLayout().getDefaultGlobalsAddressSpace());
+ FunctionType *FTy_alloc = FunctionType::get(I8Ptr, Tys_alloc, false);
+ auto PrintfAllocFn =
+ M->getOrInsertFunction(StringRef("__printf_alloc"), FTy_alloc, Attr);
+
+ return Builder.CreateCall(PrintfAllocFn, Alloc_args, "printf_alloc_fn");
+}
+
+// Prepare constant string argument to push onto the buffer
+static void processConstantStringArg(StringData *SD, IRBuilder<> &Builder,
+ SmallVectorImpl<Value *> &WhatToStore) {
+ std::string Str(SD->Str.str() + '\0');
+
+ DataExtractor Extractor(Str, /*IsLittleEndian=*/true, 8);
+ DataExtractor::Cursor Offset(0);
+ while (Offset && Offset.tell() < Str.size()) {
+ const uint64_t ReadSize = 4;
+ uint64_t ReadNow = std::min(ReadSize, Str.size() - Offset.tell());
+ uint64_t ReadBytes = 0;
+ switch (ReadNow) {
+ default:
+ llvm_unreachable("min(4, X) > 4?");
+ case 1:
+ ReadBytes = Extractor.getU8(Offset);
+ break;
+ case 2:
+ ReadBytes = Extractor.getU16(Offset);
+ break;
+ case 3:
+ ReadBytes = Extractor.getU24(Offset);
+ break;
+ case 4:
+ ReadBytes = Extractor.getU32(Offset);
+ break;
+ }
+ cantFail(Offset.takeError(), "failed to read bytes from constant array");
+
+ APInt IntVal(8 * ReadSize, ReadBytes);
+
+ // TODO: Should not bother aligning up.
+ if (ReadNow < ReadSize)
+ IntVal = IntVal.zext(8 * ReadSize);
+
+ Type *IntTy = Type::getIntNTy(Builder.getContext(), IntVal.getBitWidth());
+ WhatToStore.push_back(ConstantInt::get(IntTy, IntVal));
+ }
+ // Additional padding for 8 byte alignment
+ int Rem = (Str.size() % 8);
+ if (Rem > 0 && Rem <= 4)
+ WhatToStore.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
+}
+
+static Value *processNonStringArg(Value *Arg, IRBuilder<> &Builder) {
+ const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
+ auto Ty = Arg->getType();
+
+ if (auto IntTy = dyn_cast<IntegerType>(Ty)) {
+ if (IntTy->getBitWidth() < 64) {
+ return Builder.CreateZExt(Arg, Builder.getInt64Ty());
+ }
+ }
+
+ if (Ty->isFloatingPointTy()) {
+ if (DL.getTypeAllocSize(Ty) < 8) {
+ return Builder.CreateFPExt(Arg, Builder.getDoubleTy());
+ }
+ }
+
+ return Arg;
+}
+
+static void
+callBufferedPrintfArgPush(IRBuilder<> &Builder, ArrayRef<Value *> Args,
+ Value *PtrToStore, SparseBitVector<8> &SpecIsCString,
+ SmallVectorImpl<StringData> &StringContents,
+ bool IsConstFmtStr) {
+ Module *M = Builder.GetInsertBlock()->getModule();
+ const DataLayout &DL = M->getDataLayout();
+ auto StrIt = StringContents.begin();
+ size_t i = IsConstFmtStr ? 1 : 0;
+ for (; i < Args.size(); i++) {
+ SmallVector<Value *, 32> WhatToStore;
+ if ((i == 0) || SpecIsCString.test(i)) {
+ if (StrIt->IsConst) {
+ processConstantStringArg(StrIt, Builder, WhatToStore);
+ StrIt++;
+ } else {
+ // This copies the contents of the string, however the next offset
+ // is at aligned length, the extra space that might be created due
+ // to alignment padding is not populated with any specific value
+ // here. This would be safe as long as runtime is sync with
+ // the offsets.
+ Builder.CreateMemCpy(PtrToStore, /*DstAlign*/ Align(1), Args[i],
+ /*SrcAlign*/ Args[i]->getPointerAlignment(DL),
+ StrIt->RealSize);
+
+ PtrToStore =
+ Builder.CreateInBoundsGEP(Builder.getInt8Ty(), PtrToStore,
+ {StrIt->AlignedSize}, "PrintBuffNextPtr");
+ LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:"
+ << *PtrToStore << '\n');
+
+ // done with current argument, move to next
+ StrIt++;
+ continue;
+ }
+ } else {
+ WhatToStore.push_back(processNonStringArg(Args[i], Builder));
+ }
+
+ for (unsigned I = 0, E = WhatToStore.size(); I != E; ++I) {
+ Value *toStore = WhatToStore[I];
+
+ StoreInst *StBuff = Builder.CreateStore(toStore, PtrToStore);
+ LLVM_DEBUG(dbgs() << "inserting store to printf buffer:" << *StBuff
+ << '\n');
+ (void)StBuff;
+ PtrToStore = Builder.CreateConstInBoundsGEP1_32(
+ Builder.getInt8Ty(), PtrToStore,
+ M->getDataLayout().getTypeAllocSize(toStore->getType()),
+ "PrintBuffNextPtr");
+ LLVM_DEBUG(dbgs() << "inserting gep to the printf buffer:" << *PtrToStore
+ << '\n');
+ }
+ }
+}
+
+Value *llvm::emitAMDGPUPrintfCall(IRBuilder<> &Builder, ArrayRef<Value *> Args,
+ bool IsBuffered) {
auto NumOps = Args.size();
assert(NumOps >= 1);
auto Fmt = Args[0];
SparseBitVector<8> SpecIsCString;
- locateCStrings(SpecIsCString, Fmt);
+ StringRef FmtStr;
+
+ if (getConstantStringInfo(Fmt, FmtStr))
+ locateCStrings(SpecIsCString, FmtStr);
+
+ if (IsBuffered) {
+ SmallVector<StringData, 8> StringContents;
+ Module *M = Builder.GetInsertBlock()->getModule();
+ LLVMContext &Ctx = Builder.getContext();
+ auto Int8Ty = Builder.getInt8Ty();
+ auto Int32Ty = Builder.getInt32Ty();
+ bool IsConstFmtStr = !FmtStr.empty();
+
+ Value *ArgSize = nullptr;
+ Value *Ptr =
+ callBufferedPrintfStart(Builder, Args, Fmt, IsConstFmtStr,
+ SpecIsCString, StringContents, ArgSize);
+
+ // The buffered version still follows OpenCL printf standards for
+ // printf return value, i.e 0 on success, -1 on failure.
+ ConstantPointerNull *zeroIntPtr =
+ ConstantPointerNull::get(cast<PointerType>(Ptr->getType()));
+
+ auto *Cmp = cast<ICmpInst>(Builder.CreateICmpNE(Ptr, zeroIntPtr, ""));
+
+ BasicBlock *End = BasicBlock::Create(Ctx, "end.block",
+ Builder.GetInsertBlock()->getParent());
+ BasicBlock *ArgPush = BasicBlock::Create(
+ Ctx, "argpush.block", Builder.GetInsertBlock()->getParent());
+
+ BranchInst::Create(ArgPush, End, Cmp, Builder.GetInsertBlock());
+ Builder.SetInsertPoint(ArgPush);
+
+ // Create controlDWord and store as the first entry, format as follows
+ // Bit 0 (LSB) -> stream (1 if stderr, 0 if stdout, printf always outputs to
+ // stdout) Bit 1 -> constant format string (1 if constant) Bits 2-31 -> size
+ // of printf data frame
+ auto ConstantTwo = Builder.getInt32(2);
+ auto ControlDWord = Builder.CreateShl(ArgSize, ConstantTwo);
+ if (IsConstFmtStr)
+ ControlDWord = Builder.CreateOr(ControlDWord, ConstantTwo);
+
+ Builder.CreateStore(ControlDWord, Ptr);
+
+ Ptr = Builder.CreateConstInBoundsGEP1_32(Int8Ty, Ptr, 4);
+
+ // Create MD5 hash for costant format string, push low 64 bits of the
+ // same onto buffer and metadata.
+ NamedMDNode *metaD = M->getOrInsertNamedMetadata("llvm.printf.fmts");
+ if (IsConstFmtStr) {
+ MD5 Hasher;
+ MD5::MD5Result Hash;
+ Hasher.update(FmtStr);
+ Hasher.final(Hash);
+
+ // Try sticking to llvm.printf.fmts format, although we are not going to
+ // use the ID and argument size fields while printing,
+ std::string MetadataStr =
+ "0:0:" + llvm::utohexstr(Hash.low(), /*LowerCase=*/true) + "," +
+ FmtStr.str();
+ MDString *fmtStrArray = MDString::get(Ctx, MetadataStr);
+ MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
+ metaD->addOperand(myMD);
+
+ Builder.CreateStore(Builder.getInt64(Hash.low()), Ptr);
+ Ptr = Builder.CreateConstInBoundsGEP1_32(Int8Ty, Ptr, 8);
+ } else {
+ // Include a dummy metadata instance in case of only non constant
+ // format string usage, This might be an absurd usecase but needs to
+ // be done for completeness
+ if (metaD->getNumOperands() == 0) {
+ MDString *fmtStrArray =
+ MDString::get(Ctx, "0:0:ffffffff,\"Non const format string\"");
+ MDNode *myMD = MDNode::get(Ctx, fmtStrArray);
+ metaD->addOperand(myMD);
+ }
+ }
+
+ // Push The printf arguments onto buffer
+ callBufferedPrintfArgPush(Builder, Args, Ptr, SpecIsCString, StringContents,
+ IsConstFmtStr);
+
+ // End block, returns -1 on failure
+ BranchInst::Create(End, ArgPush);
+ Builder.SetInsertPoint(End);
+ return Builder.CreateSExt(Builder.CreateNot(Cmp), Int32Ty, "printf_result");
+ }
auto Desc = callPrintfBegin(Builder, Builder.getIntN(64, 0));
Desc = appendString(Builder, Desc, Fmt, NumOps == 1);
diff --git a/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index 56acdcc0bc3c..7d127400651e 100644
--- a/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -85,33 +85,6 @@ static cl::opt<bool> NoDiscriminators(
"no-discriminators", cl::init(false),
cl::desc("Disable generation of discriminator information."));
-namespace {
-
-// The legacy pass of AddDiscriminators.
-struct AddDiscriminatorsLegacyPass : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
-
- AddDiscriminatorsLegacyPass() : FunctionPass(ID) {
- initializeAddDiscriminatorsLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override;
-};
-
-} // end anonymous namespace
-
-char AddDiscriminatorsLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(AddDiscriminatorsLegacyPass, "add-discriminators",
- "Add DWARF path discriminators", false, false)
-INITIALIZE_PASS_END(AddDiscriminatorsLegacyPass, "add-discriminators",
- "Add DWARF path discriminators", false, false)
-
-// Create the legacy AddDiscriminatorsPass.
-FunctionPass *llvm::createAddDiscriminatorsPass() {
- return new AddDiscriminatorsLegacyPass();
-}
-
static bool shouldHaveDiscriminator(const Instruction *I) {
return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I);
}
@@ -269,10 +242,6 @@ static bool addDiscriminators(Function &F) {
return Changed;
}
-bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) {
- return addDiscriminators(F);
-}
-
PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
FunctionAnalysisManager &AM) {
if (!addDiscriminators(F))
diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
index d17c399ba798..45cf98e65a5a 100644
--- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
+++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -290,17 +290,20 @@ AssumeInst *llvm::buildAssumeFromInst(Instruction *I) {
return Builder.build();
}
-void llvm::salvageKnowledge(Instruction *I, AssumptionCache *AC,
+bool llvm::salvageKnowledge(Instruction *I, AssumptionCache *AC,
DominatorTree *DT) {
if (!EnableKnowledgeRetention || I->isTerminator())
- return;
+ return false;
+ bool Changed = false;
AssumeBuilderState Builder(I->getModule(), I, AC, DT);
Builder.addInstruction(I);
if (auto *Intr = Builder.build()) {
Intr->insertBefore(I);
+ Changed = true;
if (AC)
AC->registerAssumption(Intr);
}
+ return Changed;
}
AssumeInst *
@@ -563,57 +566,26 @@ PreservedAnalyses AssumeSimplifyPass::run(Function &F,
FunctionAnalysisManager &AM) {
if (!EnableKnowledgeRetention)
return PreservedAnalyses::all();
- simplifyAssumes(F, &AM.getResult<AssumptionAnalysis>(F),
- AM.getCachedResult<DominatorTreeAnalysis>(F));
- return PreservedAnalyses::all();
-}
-
-namespace {
-class AssumeSimplifyPassLegacyPass : public FunctionPass {
-public:
- static char ID;
-
- AssumeSimplifyPassLegacyPass() : FunctionPass(ID) {
- initializeAssumeSimplifyPassLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
- bool runOnFunction(Function &F) override {
- if (skipFunction(F) || !EnableKnowledgeRetention)
- return false;
- AssumptionCache &AC =
- getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- DominatorTreeWrapperPass *DTWP =
- getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- return simplifyAssumes(F, &AC, DTWP ? &DTWP->getDomTree() : nullptr);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
-
- AU.setPreservesAll();
- }
-};
-} // namespace
-
-char AssumeSimplifyPassLegacyPass::ID = 0;
-
-INITIALIZE_PASS_BEGIN(AssumeSimplifyPassLegacyPass, "assume-simplify",
- "Assume Simplify", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_END(AssumeSimplifyPassLegacyPass, "assume-simplify",
- "Assume Simplify", false, false)
-
-FunctionPass *llvm::createAssumeSimplifyPass() {
- return new AssumeSimplifyPassLegacyPass();
+ if (!simplifyAssumes(F, &AM.getResult<AssumptionAnalysis>(F),
+ AM.getCachedResult<DominatorTreeAnalysis>(F)))
+ return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
PreservedAnalyses AssumeBuilderPass::run(Function &F,
FunctionAnalysisManager &AM) {
AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
DominatorTree* DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+ bool Changed = false;
for (Instruction &I : instructions(F))
- salvageKnowledge(&I, AC, DT);
- return PreservedAnalyses::all();
+ Changed |= salvageKnowledge(&I, AC, DT);
+ if (!Changed)
+ PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
namespace {
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 58a226fc601c..f06ea89cc61d 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -32,6 +32,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
@@ -379,8 +380,8 @@ bool llvm::MergeBlockSuccessorsIntoGivenBlocks(
///
/// Possible improvements:
/// - Check fully overlapping fragments and not only identical fragments.
-/// - Support dbg.addr, dbg.declare. dbg.label, and possibly other meta
-/// instructions being part of the sequence of consecutive instructions.
+/// - Support dbg.declare. dbg.label, and possibly other meta instructions being
+/// part of the sequence of consecutive instructions.
static bool removeRedundantDbgInstrsUsingBackwardScan(BasicBlock *BB) {
SmallVector<DbgValueInst *, 8> ToBeRemoved;
SmallDenseSet<DebugVariable> VariableSet;
@@ -599,8 +600,8 @@ bool llvm::IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB) {
unsigned Depth = 0;
while (BB && Depth++ < MaxDeoptOrUnreachableSuccessorCheckDepth &&
VisitedBlocks.insert(BB).second) {
- if (BB->getTerminatingDeoptimizeCall() ||
- isa<UnreachableInst>(BB->getTerminator()))
+ if (isa<UnreachableInst>(BB->getTerminator()) ||
+ BB->getTerminatingDeoptimizeCall())
return true;
BB = BB->getUniqueSuccessor();
}
@@ -1470,133 +1471,198 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
return cast<ReturnInst>(NewRet);
}
-static Instruction *
-SplitBlockAndInsertIfThenImpl(Value *Cond, Instruction *SplitBefore,
- bool Unreachable, MDNode *BranchWeights,
- DomTreeUpdater *DTU, DominatorTree *DT,
- LoopInfo *LI, BasicBlock *ThenBlock) {
- SmallVector<DominatorTree::UpdateType, 8> Updates;
- BasicBlock *Head = SplitBefore->getParent();
- BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
- if (DTU) {
- SmallPtrSet<BasicBlock *, 8> UniqueSuccessorsOfHead;
- Updates.push_back({DominatorTree::Insert, Head, Tail});
- Updates.reserve(Updates.size() + 2 * succ_size(Tail));
- for (BasicBlock *SuccessorOfHead : successors(Tail))
- if (UniqueSuccessorsOfHead.insert(SuccessorOfHead).second) {
- Updates.push_back({DominatorTree::Insert, Tail, SuccessorOfHead});
- Updates.push_back({DominatorTree::Delete, Head, SuccessorOfHead});
- }
- }
- Instruction *HeadOldTerm = Head->getTerminator();
- LLVMContext &C = Head->getContext();
- Instruction *CheckTerm;
- bool CreateThenBlock = (ThenBlock == nullptr);
- if (CreateThenBlock) {
- ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
- if (Unreachable)
- CheckTerm = new UnreachableInst(C, ThenBlock);
- else {
- CheckTerm = BranchInst::Create(Tail, ThenBlock);
- if (DTU)
- Updates.push_back({DominatorTree::Insert, ThenBlock, Tail});
- }
- CheckTerm->setDebugLoc(SplitBefore->getDebugLoc());
- } else
- CheckTerm = ThenBlock->getTerminator();
- BranchInst *HeadNewTerm =
- BranchInst::Create(/*ifTrue*/ ThenBlock, /*ifFalse*/ Tail, Cond);
- if (DTU)
- Updates.push_back({DominatorTree::Insert, Head, ThenBlock});
- HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
- ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
-
- if (DTU)
- DTU->applyUpdates(Updates);
- else if (DT) {
- if (DomTreeNode *OldNode = DT->getNode(Head)) {
- std::vector<DomTreeNode *> Children(OldNode->begin(), OldNode->end());
-
- DomTreeNode *NewNode = DT->addNewBlock(Tail, Head);
- for (DomTreeNode *Child : Children)
- DT->changeImmediateDominator(Child, NewNode);
-
- // Head dominates ThenBlock.
- if (CreateThenBlock)
- DT->addNewBlock(ThenBlock, Head);
- else
- DT->changeImmediateDominator(ThenBlock, Head);
- }
- }
-
- if (LI) {
- if (Loop *L = LI->getLoopFor(Head)) {
- L->addBasicBlockToLoop(ThenBlock, *LI);
- L->addBasicBlockToLoop(Tail, *LI);
- }
- }
-
- return CheckTerm;
-}
-
Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
Instruction *SplitBefore,
bool Unreachable,
MDNode *BranchWeights,
- DominatorTree *DT, LoopInfo *LI,
+ DomTreeUpdater *DTU, LoopInfo *LI,
BasicBlock *ThenBlock) {
- return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable,
- BranchWeights,
- /*DTU=*/nullptr, DT, LI, ThenBlock);
+ SplitBlockAndInsertIfThenElse(
+ Cond, SplitBefore, &ThenBlock, /* ElseBlock */ nullptr,
+ /* UnreachableThen */ Unreachable,
+ /* UnreachableElse */ false, BranchWeights, DTU, LI);
+ return ThenBlock->getTerminator();
}
-Instruction *llvm::SplitBlockAndInsertIfThen(Value *Cond,
+
+Instruction *llvm::SplitBlockAndInsertIfElse(Value *Cond,
Instruction *SplitBefore,
bool Unreachable,
MDNode *BranchWeights,
DomTreeUpdater *DTU, LoopInfo *LI,
- BasicBlock *ThenBlock) {
- return SplitBlockAndInsertIfThenImpl(Cond, SplitBefore, Unreachable,
- BranchWeights, DTU, /*DT=*/nullptr, LI,
- ThenBlock);
+ BasicBlock *ElseBlock) {
+ SplitBlockAndInsertIfThenElse(
+ Cond, SplitBefore, /* ThenBlock */ nullptr, &ElseBlock,
+ /* UnreachableThen */ false,
+ /* UnreachableElse */ Unreachable, BranchWeights, DTU, LI);
+ return ElseBlock->getTerminator();
}
void llvm::SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
Instruction **ThenTerm,
Instruction **ElseTerm,
MDNode *BranchWeights,
- DomTreeUpdater *DTU) {
- BasicBlock *Head = SplitBefore->getParent();
+ DomTreeUpdater *DTU, LoopInfo *LI) {
+ BasicBlock *ThenBlock = nullptr;
+ BasicBlock *ElseBlock = nullptr;
+ SplitBlockAndInsertIfThenElse(
+ Cond, SplitBefore, &ThenBlock, &ElseBlock, /* UnreachableThen */ false,
+ /* UnreachableElse */ false, BranchWeights, DTU, LI);
+
+ *ThenTerm = ThenBlock->getTerminator();
+ *ElseTerm = ElseBlock->getTerminator();
+}
+
+void llvm::SplitBlockAndInsertIfThenElse(
+ Value *Cond, Instruction *SplitBefore, BasicBlock **ThenBlock,
+ BasicBlock **ElseBlock, bool UnreachableThen, bool UnreachableElse,
+ MDNode *BranchWeights, DomTreeUpdater *DTU, LoopInfo *LI) {
+ assert((ThenBlock || ElseBlock) &&
+ "At least one branch block must be created");
+ assert((!UnreachableThen || !UnreachableElse) &&
+ "Split block tail must be reachable");
+ SmallVector<DominatorTree::UpdateType, 8> Updates;
SmallPtrSet<BasicBlock *, 8> UniqueOrigSuccessors;
- if (DTU)
+ BasicBlock *Head = SplitBefore->getParent();
+ if (DTU) {
UniqueOrigSuccessors.insert(succ_begin(Head), succ_end(Head));
+ Updates.reserve(4 + 2 * UniqueOrigSuccessors.size());
+ }
+ LLVMContext &C = Head->getContext();
BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator());
+ BasicBlock *TrueBlock = Tail;
+ BasicBlock *FalseBlock = Tail;
+ bool ThenToTailEdge = false;
+ bool ElseToTailEdge = false;
+
+ // Encapsulate the logic around creation/insertion/etc of a new block.
+ auto handleBlock = [&](BasicBlock **PBB, bool Unreachable, BasicBlock *&BB,
+ bool &ToTailEdge) {
+ if (PBB == nullptr)
+ return; // Do not create/insert a block.
+
+ if (*PBB)
+ BB = *PBB; // Caller supplied block, use it.
+ else {
+ // Create a new block.
+ BB = BasicBlock::Create(C, "", Head->getParent(), Tail);
+ if (Unreachable)
+ (void)new UnreachableInst(C, BB);
+ else {
+ (void)BranchInst::Create(Tail, BB);
+ ToTailEdge = true;
+ }
+ BB->getTerminator()->setDebugLoc(SplitBefore->getDebugLoc());
+ // Pass the new block back to the caller.
+ *PBB = BB;
+ }
+ };
+
+ handleBlock(ThenBlock, UnreachableThen, TrueBlock, ThenToTailEdge);
+ handleBlock(ElseBlock, UnreachableElse, FalseBlock, ElseToTailEdge);
+
Instruction *HeadOldTerm = Head->getTerminator();
- LLVMContext &C = Head->getContext();
- BasicBlock *ThenBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
- BasicBlock *ElseBlock = BasicBlock::Create(C, "", Head->getParent(), Tail);
- *ThenTerm = BranchInst::Create(Tail, ThenBlock);
- (*ThenTerm)->setDebugLoc(SplitBefore->getDebugLoc());
- *ElseTerm = BranchInst::Create(Tail, ElseBlock);
- (*ElseTerm)->setDebugLoc(SplitBefore->getDebugLoc());
BranchInst *HeadNewTerm =
- BranchInst::Create(/*ifTrue*/ThenBlock, /*ifFalse*/ElseBlock, Cond);
+ BranchInst::Create(/*ifTrue*/ TrueBlock, /*ifFalse*/ FalseBlock, Cond);
HeadNewTerm->setMetadata(LLVMContext::MD_prof, BranchWeights);
ReplaceInstWithInst(HeadOldTerm, HeadNewTerm);
+
if (DTU) {
- SmallVector<DominatorTree::UpdateType, 8> Updates;
- Updates.reserve(4 + 2 * UniqueOrigSuccessors.size());
- for (BasicBlock *Succ : successors(Head)) {
- Updates.push_back({DominatorTree::Insert, Head, Succ});
- Updates.push_back({DominatorTree::Insert, Succ, Tail});
- }
+ Updates.emplace_back(DominatorTree::Insert, Head, TrueBlock);
+ Updates.emplace_back(DominatorTree::Insert, Head, FalseBlock);
+ if (ThenToTailEdge)
+ Updates.emplace_back(DominatorTree::Insert, TrueBlock, Tail);
+ if (ElseToTailEdge)
+ Updates.emplace_back(DominatorTree::Insert, FalseBlock, Tail);
for (BasicBlock *UniqueOrigSuccessor : UniqueOrigSuccessors)
- Updates.push_back({DominatorTree::Insert, Tail, UniqueOrigSuccessor});
+ Updates.emplace_back(DominatorTree::Insert, Tail, UniqueOrigSuccessor);
for (BasicBlock *UniqueOrigSuccessor : UniqueOrigSuccessors)
- Updates.push_back({DominatorTree::Delete, Head, UniqueOrigSuccessor});
+ Updates.emplace_back(DominatorTree::Delete, Head, UniqueOrigSuccessor);
DTU->applyUpdates(Updates);
}
+
+ if (LI) {
+ if (Loop *L = LI->getLoopFor(Head); L) {
+ if (ThenToTailEdge)
+ L->addBasicBlockToLoop(TrueBlock, *LI);
+ if (ElseToTailEdge)
+ L->addBasicBlockToLoop(FalseBlock, *LI);
+ L->addBasicBlockToLoop(Tail, *LI);
+ }
+ }
+}
+
+std::pair<Instruction*, Value*>
+llvm::SplitBlockAndInsertSimpleForLoop(Value *End, Instruction *SplitBefore) {
+ BasicBlock *LoopPred = SplitBefore->getParent();
+ BasicBlock *LoopBody = SplitBlock(SplitBefore->getParent(), SplitBefore);
+ BasicBlock *LoopExit = SplitBlock(SplitBefore->getParent(), SplitBefore);
+
+ auto *Ty = End->getType();
+ auto &DL = SplitBefore->getModule()->getDataLayout();
+ const unsigned Bitwidth = DL.getTypeSizeInBits(Ty);
+
+ IRBuilder<> Builder(LoopBody->getTerminator());
+ auto *IV = Builder.CreatePHI(Ty, 2, "iv");
+ auto *IVNext =
+ Builder.CreateAdd(IV, ConstantInt::get(Ty, 1), IV->getName() + ".next",
+ /*HasNUW=*/true, /*HasNSW=*/Bitwidth != 2);
+ auto *IVCheck = Builder.CreateICmpEQ(IVNext, End,
+ IV->getName() + ".check");
+ Builder.CreateCondBr(IVCheck, LoopExit, LoopBody);
+ LoopBody->getTerminator()->eraseFromParent();
+
+ // Populate the IV PHI.
+ IV->addIncoming(ConstantInt::get(Ty, 0), LoopPred);
+ IV->addIncoming(IVNext, LoopBody);
+
+ return std::make_pair(LoopBody->getFirstNonPHI(), IV);
+}
+
+void llvm::SplitBlockAndInsertForEachLane(ElementCount EC,
+ Type *IndexTy, Instruction *InsertBefore,
+ std::function<void(IRBuilderBase&, Value*)> Func) {
+
+ IRBuilder<> IRB(InsertBefore);
+
+ if (EC.isScalable()) {
+ Value *NumElements = IRB.CreateElementCount(IndexTy, EC);
+
+ auto [BodyIP, Index] =
+ SplitBlockAndInsertSimpleForLoop(NumElements, InsertBefore);
+
+ IRB.SetInsertPoint(BodyIP);
+ Func(IRB, Index);
+ return;
+ }
+
+ unsigned Num = EC.getFixedValue();
+ for (unsigned Idx = 0; Idx < Num; ++Idx) {
+ IRB.SetInsertPoint(InsertBefore);
+ Func(IRB, ConstantInt::get(IndexTy, Idx));
+ }
+}
+
+void llvm::SplitBlockAndInsertForEachLane(
+ Value *EVL, Instruction *InsertBefore,
+ std::function<void(IRBuilderBase &, Value *)> Func) {
+
+ IRBuilder<> IRB(InsertBefore);
+ Type *Ty = EVL->getType();
+
+ if (!isa<ConstantInt>(EVL)) {
+ auto [BodyIP, Index] = SplitBlockAndInsertSimpleForLoop(EVL, InsertBefore);
+ IRB.SetInsertPoint(BodyIP);
+ Func(IRB, Index);
+ return;
+ }
+
+ unsigned Num = cast<ConstantInt>(EVL)->getZExtValue();
+ for (unsigned Idx = 0; Idx < Num; ++Idx) {
+ IRB.SetInsertPoint(InsertBefore);
+ Func(IRB, ConstantInt::get(Ty, Idx));
+ }
}
BranchInst *llvm::GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
@@ -1997,3 +2063,17 @@ BasicBlock *llvm::CreateControlFlowHub(
return FirstGuardBlock;
}
+
+void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) {
+ Value *NewCond = PBI->getCondition();
+ // If this is a "cmp" instruction, only used for branching (and nowhere
+ // else), then we can simply invert the predicate.
+ if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
+ CmpInst *CI = cast<CmpInst>(NewCond);
+ CI->setPredicate(CI->getInversePredicate());
+ } else
+ NewCond = Builder.CreateNot(NewCond, NewCond->getName() + ".not");
+
+ PBI->setCondition(NewCond);
+ PBI->swapSuccessors();
+}
diff --git a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index 1e21a2f85446..5de8ff84de77 100644
--- a/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -478,6 +478,8 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
case LibFunc_modfl:
Changed |= setDoesNotThrow(F);
Changed |= setWillReturn(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setOnlyWritesMemory(F);
Changed |= setDoesNotCapture(F, 1);
break;
case LibFunc_memcpy:
@@ -725,6 +727,8 @@ bool llvm::inferNonMandatoryLibFuncAttrs(Function &F,
case LibFunc_frexpl:
Changed |= setDoesNotThrow(F);
Changed |= setWillReturn(F);
+ Changed |= setOnlyAccessesArgMemory(F);
+ Changed |= setOnlyWritesMemory(F);
Changed |= setDoesNotCapture(F, 1);
break;
case LibFunc_fstatvfs:
@@ -1937,3 +1941,87 @@ Value *llvm::emitCalloc(Value *Num, Value *Size, IRBuilderBase &B,
return CI;
}
+
+Value *llvm::emitHotColdNew(Value *Num, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI, LibFunc NewFunc,
+ uint8_t HotCold) {
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, NewFunc))
+ return nullptr;
+
+ StringRef Name = TLI->getName(NewFunc);
+ FunctionCallee Func = M->getOrInsertFunction(Name, B.getInt8PtrTy(),
+ Num->getType(), B.getInt8Ty());
+ inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
+ CallInst *CI = B.CreateCall(Func, {Num, B.getInt8(HotCold)}, Name);
+
+ if (const Function *F =
+ dyn_cast<Function>(Func.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+Value *llvm::emitHotColdNewNoThrow(Value *Num, Value *NoThrow, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI,
+ LibFunc NewFunc, uint8_t HotCold) {
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, NewFunc))
+ return nullptr;
+
+ StringRef Name = TLI->getName(NewFunc);
+ FunctionCallee Func =
+ M->getOrInsertFunction(Name, B.getInt8PtrTy(), Num->getType(),
+ NoThrow->getType(), B.getInt8Ty());
+ inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
+ CallInst *CI = B.CreateCall(Func, {Num, NoThrow, B.getInt8(HotCold)}, Name);
+
+ if (const Function *F =
+ dyn_cast<Function>(Func.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+Value *llvm::emitHotColdNewAligned(Value *Num, Value *Align, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI,
+ LibFunc NewFunc, uint8_t HotCold) {
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, NewFunc))
+ return nullptr;
+
+ StringRef Name = TLI->getName(NewFunc);
+ FunctionCallee Func = M->getOrInsertFunction(
+ Name, B.getInt8PtrTy(), Num->getType(), Align->getType(), B.getInt8Ty());
+ inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
+ CallInst *CI = B.CreateCall(Func, {Num, Align, B.getInt8(HotCold)}, Name);
+
+ if (const Function *F =
+ dyn_cast<Function>(Func.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
+Value *llvm::emitHotColdNewAlignedNoThrow(Value *Num, Value *Align,
+ Value *NoThrow, IRBuilderBase &B,
+ const TargetLibraryInfo *TLI,
+ LibFunc NewFunc, uint8_t HotCold) {
+ Module *M = B.GetInsertBlock()->getModule();
+ if (!isLibFuncEmittable(M, TLI, NewFunc))
+ return nullptr;
+
+ StringRef Name = TLI->getName(NewFunc);
+ FunctionCallee Func = M->getOrInsertFunction(
+ Name, B.getInt8PtrTy(), Num->getType(), Align->getType(),
+ NoThrow->getType(), B.getInt8Ty());
+ inferNonMandatoryLibFuncAttrs(M, Name, *TLI);
+ CallInst *CI =
+ B.CreateCall(Func, {Num, Align, NoThrow, B.getInt8(HotCold)}, Name);
+
+ if (const Function *F =
+ dyn_cast<Function>(Func.getCallee()->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
index 930a0bcbfac5..73a50b793e6d 100644
--- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -202,7 +202,7 @@ bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
ConstantInt *C = dyn_cast<ConstantInt>(Op1);
if (!C && isa<BitCastInst>(Op1))
C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
- return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
+ return C && C->getValue().getSignificantBits() > BypassType->getBitWidth();
}
case Instruction::PHI:
// Stop IR traversal in case of a crazy input code. This limits recursion
diff --git a/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
index d0b89ba2606e..d0b9884aa909 100644
--- a/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
+++ b/llvm/lib/Transforms/Utils/CallGraphUpdater.cpp
@@ -120,6 +120,8 @@ void CallGraphUpdater::removeFunction(Function &DeadFn) {
DeadCGN->removeAllCalledFunctions();
CGSCC->DeleteNode(DeadCGN);
}
+ if (FAM)
+ FAM->clear(DeadFn, DeadFn.getName());
}
void CallGraphUpdater::replaceFunctionWith(Function &OldFn, Function &NewFn) {
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index 4a82f9606d3f..b488e3bb0cbd 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
diff --git a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
index 4d622679dbdb..c24b6ed70405 100644
--- a/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
+++ b/llvm/lib/Transforms/Utils/CanonicalizeAliases.cpp
@@ -31,8 +31,6 @@
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/IR/Constants.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 87822ee85c2b..d55208602b71 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -470,9 +470,8 @@ void PruningFunctionCloner::CloneBlock(
// Nope, clone it now.
BasicBlock *NewBB;
- BBEntry = NewBB = BasicBlock::Create(BB->getContext());
- if (BB->hasName())
- NewBB->setName(BB->getName() + NameSuffix);
+ Twine NewName(BB->hasName() ? Twine(BB->getName()) + NameSuffix : "");
+ BBEntry = NewBB = BasicBlock::Create(BB->getContext(), NewName, NewFunc);
// It is only legal to clone a function if a block address within that
// function is never referenced outside of the function. Given that, we
@@ -498,6 +497,7 @@ void PruningFunctionCloner::CloneBlock(
++II) {
Instruction *NewInst = cloneInstruction(II);
+ NewInst->insertInto(NewBB, NewBB->end());
if (HostFuncIsStrictFP) {
// All function calls in the inlined function must get 'strictfp'
@@ -526,7 +526,7 @@ void PruningFunctionCloner::CloneBlock(
if (!NewInst->mayHaveSideEffects()) {
VMap[&*II] = V;
- NewInst->deleteValue();
+ NewInst->eraseFromParent();
continue;
}
}
@@ -535,7 +535,6 @@ void PruningFunctionCloner::CloneBlock(
if (II->hasName())
NewInst->setName(II->getName() + NameSuffix);
VMap[&*II] = NewInst; // Add instruction map to value.
- NewInst->insertInto(NewBB, NewBB->end());
if (isa<CallInst>(II) && !II->isDebugOrPseudoInst()) {
hasCalls = true;
hasMemProfMetadata |= II->hasMetadata(LLVMContext::MD_memprof);
@@ -683,8 +682,8 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
if (!NewBB)
continue; // Dead block.
- // Add the new block to the new function.
- NewFunc->insert(NewFunc->end(), NewBB);
+ // Move the new block to preserve the order in the original function.
+ NewBB->moveBefore(NewFunc->end());
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
@@ -937,8 +936,8 @@ void llvm::CloneAndPruneFunctionInto(
}
/// Remaps instructions in \p Blocks using the mapping in \p VMap.
-void llvm::remapInstructionsInBlocks(
- const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) {
+void llvm::remapInstructionsInBlocks(ArrayRef<BasicBlock *> Blocks,
+ ValueToValueMapTy &VMap) {
// Rewrite the code to refer to itself.
for (auto *BB : Blocks)
for (auto &Inst : *BB)
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index c1fe10504e45..c390af351a69 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -918,6 +918,7 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
case Attribute::AllocKind:
case Attribute::PresplitCoroutine:
case Attribute::Memory:
+ case Attribute::NoFPClass:
continue;
// Those attributes should be safe to propagate to the extracted function.
case Attribute::AlwaysInline:
@@ -1091,32 +1092,20 @@ static void insertLifetimeMarkersSurroundingCall(
Module *M, ArrayRef<Value *> LifetimesStart, ArrayRef<Value *> LifetimesEnd,
CallInst *TheCall) {
LLVMContext &Ctx = M->getContext();
- auto Int8PtrTy = Type::getInt8PtrTy(Ctx);
auto NegativeOne = ConstantInt::getSigned(Type::getInt64Ty(Ctx), -1);
Instruction *Term = TheCall->getParent()->getTerminator();
- // The memory argument to a lifetime marker must be a i8*. Cache any bitcasts
- // needed to satisfy this requirement so they may be reused.
- DenseMap<Value *, Value *> Bitcasts;
-
// Emit lifetime markers for the pointers given in \p Objects. Insert the
// markers before the call if \p InsertBefore, and after the call otherwise.
- auto insertMarkers = [&](Function *MarkerFunc, ArrayRef<Value *> Objects,
+ auto insertMarkers = [&](Intrinsic::ID MarkerFunc, ArrayRef<Value *> Objects,
bool InsertBefore) {
for (Value *Mem : Objects) {
assert((!isa<Instruction>(Mem) || cast<Instruction>(Mem)->getFunction() ==
TheCall->getFunction()) &&
"Input memory not defined in original function");
- Value *&MemAsI8Ptr = Bitcasts[Mem];
- if (!MemAsI8Ptr) {
- if (Mem->getType() == Int8PtrTy)
- MemAsI8Ptr = Mem;
- else
- MemAsI8Ptr =
- CastInst::CreatePointerCast(Mem, Int8PtrTy, "lt.cast", TheCall);
- }
- auto Marker = CallInst::Create(MarkerFunc, {NegativeOne, MemAsI8Ptr});
+ Function *Func = Intrinsic::getDeclaration(M, MarkerFunc, Mem->getType());
+ auto Marker = CallInst::Create(Func, {NegativeOne, Mem});
if (InsertBefore)
Marker->insertBefore(TheCall);
else
@@ -1125,15 +1114,13 @@ static void insertLifetimeMarkersSurroundingCall(
};
if (!LifetimesStart.empty()) {
- auto StartFn = llvm::Intrinsic::getDeclaration(
- M, llvm::Intrinsic::lifetime_start, Int8PtrTy);
- insertMarkers(StartFn, LifetimesStart, /*InsertBefore=*/true);
+ insertMarkers(Intrinsic::lifetime_start, LifetimesStart,
+ /*InsertBefore=*/true);
}
if (!LifetimesEnd.empty()) {
- auto EndFn = llvm::Intrinsic::getDeclaration(
- M, llvm::Intrinsic::lifetime_end, Int8PtrTy);
- insertMarkers(EndFn, LifetimesEnd, /*InsertBefore=*/false);
+ insertMarkers(Intrinsic::lifetime_end, LifetimesEnd,
+ /*InsertBefore=*/false);
}
}
@@ -1663,14 +1650,14 @@ CodeExtractor::extractCodeRegion(const CodeExtractorAnalysisCache &CEAC,
}
}
- // Remove CondGuardInsts that will be moved to the new function from the old
- // function's assumption cache.
+ // Remove @llvm.assume calls that will be moved to the new function from the
+ // old function's assumption cache.
for (BasicBlock *Block : Blocks) {
for (Instruction &I : llvm::make_early_inc_range(*Block)) {
- if (auto *CI = dyn_cast<CondGuardInst>(&I)) {
+ if (auto *AI = dyn_cast<AssumeInst>(&I)) {
if (AC)
- AC->unregisterAssumption(CI);
- CI->eraseFromParent();
+ AC->unregisterAssumption(AI);
+ AI->eraseFromParent();
}
}
}
@@ -1864,7 +1851,7 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
const Function &NewFunc,
AssumptionCache *AC) {
for (auto AssumeVH : AC->assumptions()) {
- auto *I = dyn_cast_or_null<CondGuardInst>(AssumeVH);
+ auto *I = dyn_cast_or_null<CallInst>(AssumeVH);
if (!I)
continue;
@@ -1876,7 +1863,7 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
// that were previously in the old function, but that have now been moved
// to the new function.
for (auto AffectedValVH : AC->assumptionsFor(I->getOperand(0))) {
- auto *AffectedCI = dyn_cast_or_null<CondGuardInst>(AffectedValVH);
+ auto *AffectedCI = dyn_cast_or_null<CallInst>(AffectedValVH);
if (!AffectedCI)
continue;
if (AffectedCI->getFunction() != &OldFunc)
diff --git a/llvm/lib/Transforms/Utils/CodeLayout.cpp b/llvm/lib/Transforms/Utils/CodeLayout.cpp
index 9eb3aff3ffe8..ac74a1c116cc 100644
--- a/llvm/lib/Transforms/Utils/CodeLayout.cpp
+++ b/llvm/lib/Transforms/Utils/CodeLayout.cpp
@@ -6,7 +6,8 @@
//
//===----------------------------------------------------------------------===//
//
-// ExtTSP - layout of basic blocks with i-cache optimization.
+// The file implements "cache-aware" layout algorithms of basic blocks and
+// functions in a binary.
//
// The algorithm tries to find a layout of nodes (basic blocks) of a given CFG
// optimizing jump locality and thus processor I-cache utilization. This is
@@ -41,12 +42,14 @@
#include "llvm/Transforms/Utils/CodeLayout.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include <cmath>
using namespace llvm;
#define DEBUG_TYPE "code-layout"
+namespace llvm {
cl::opt<bool> EnableExtTspBlockPlacement(
"enable-ext-tsp-block-placement", cl::Hidden, cl::init(false),
cl::desc("Enable machine block placement based on the ext-tsp model, "
@@ -56,6 +59,7 @@ cl::opt<bool> ApplyExtTspWithoutProfile(
"ext-tsp-apply-without-profile",
cl::desc("Whether to apply ext-tsp placement for instances w/o profile"),
cl::init(true), cl::Hidden);
+} // namespace llvm
// Algorithm-specific params. The values are tuned for the best performance
// of large-scale front-end bound binaries.
@@ -69,11 +73,11 @@ static cl::opt<double> ForwardWeightUncond(
static cl::opt<double> BackwardWeightCond(
"ext-tsp-backward-weight-cond", cl::ReallyHidden, cl::init(0.1),
- cl::desc("The weight of conditonal backward jumps for ExtTSP value"));
+ cl::desc("The weight of conditional backward jumps for ExtTSP value"));
static cl::opt<double> BackwardWeightUncond(
"ext-tsp-backward-weight-uncond", cl::ReallyHidden, cl::init(0.1),
- cl::desc("The weight of unconditonal backward jumps for ExtTSP value"));
+ cl::desc("The weight of unconditional backward jumps for ExtTSP value"));
static cl::opt<double> FallthroughWeightCond(
"ext-tsp-fallthrough-weight-cond", cl::ReallyHidden, cl::init(1.0),
@@ -149,29 +153,30 @@ double extTSPScore(uint64_t SrcAddr, uint64_t SrcSize, uint64_t DstAddr,
/// A type of merging two chains, X and Y. The former chain is split into
/// X1 and X2 and then concatenated with Y in the order specified by the type.
-enum class MergeTypeTy : int { X_Y, X1_Y_X2, Y_X2_X1, X2_X1_Y };
+enum class MergeTypeT : int { X_Y, Y_X, X1_Y_X2, Y_X2_X1, X2_X1_Y };
/// The gain of merging two chains, that is, the Ext-TSP score of the merge
-/// together with the corresponfiding merge 'type' and 'offset'.
-class MergeGainTy {
-public:
- explicit MergeGainTy() = default;
- explicit MergeGainTy(double Score, size_t MergeOffset, MergeTypeTy MergeType)
+/// together with the corresponding merge 'type' and 'offset'.
+struct MergeGainT {
+ explicit MergeGainT() = default;
+ explicit MergeGainT(double Score, size_t MergeOffset, MergeTypeT MergeType)
: Score(Score), MergeOffset(MergeOffset), MergeType(MergeType) {}
double score() const { return Score; }
size_t mergeOffset() const { return MergeOffset; }
- MergeTypeTy mergeType() const { return MergeType; }
+ MergeTypeT mergeType() const { return MergeType; }
+
+ void setMergeType(MergeTypeT Ty) { MergeType = Ty; }
// Returns 'true' iff Other is preferred over this.
- bool operator<(const MergeGainTy &Other) const {
+ bool operator<(const MergeGainT &Other) const {
return (Other.Score > EPS && Other.Score > Score + EPS);
}
// Update the current gain if Other is preferred over this.
- void updateIfLessThan(const MergeGainTy &Other) {
+ void updateIfLessThan(const MergeGainT &Other) {
if (*this < Other)
*this = Other;
}
@@ -179,106 +184,102 @@ public:
private:
double Score{-1.0};
size_t MergeOffset{0};
- MergeTypeTy MergeType{MergeTypeTy::X_Y};
+ MergeTypeT MergeType{MergeTypeT::X_Y};
};
-class Jump;
-class Chain;
-class ChainEdge;
+struct JumpT;
+struct ChainT;
+struct ChainEdge;
-/// A node in the graph, typically corresponding to a basic block in CFG.
-class Block {
-public:
- Block(const Block &) = delete;
- Block(Block &&) = default;
- Block &operator=(const Block &) = delete;
- Block &operator=(Block &&) = default;
+/// A node in the graph, typically corresponding to a basic block in the CFG or
+/// a function in the call graph.
+struct NodeT {
+ NodeT(const NodeT &) = delete;
+ NodeT(NodeT &&) = default;
+ NodeT &operator=(const NodeT &) = delete;
+ NodeT &operator=(NodeT &&) = default;
+
+ explicit NodeT(size_t Index, uint64_t Size, uint64_t EC)
+ : Index(Index), Size(Size), ExecutionCount(EC) {}
+
+ bool isEntry() const { return Index == 0; }
+
+ // The total execution count of outgoing jumps.
+ uint64_t outCount() const;
+
+ // The total execution count of incoming jumps.
+ uint64_t inCount() const;
- // The original index of the block in CFG.
+ // The original index of the node in graph.
size_t Index{0};
- // The index of the block in the current chain.
+ // The index of the node in the current chain.
size_t CurIndex{0};
- // Size of the block in the binary.
+ // The size of the node in the binary.
uint64_t Size{0};
- // Execution count of the block in the profile data.
+ // The execution count of the node in the profile data.
uint64_t ExecutionCount{0};
- // Current chain of the node.
- Chain *CurChain{nullptr};
- // An offset of the block in the current chain.
+ // The current chain of the node.
+ ChainT *CurChain{nullptr};
+ // The offset of the node in the current chain.
mutable uint64_t EstimatedAddr{0};
- // Forced successor of the block in CFG.
- Block *ForcedSucc{nullptr};
- // Forced predecessor of the block in CFG.
- Block *ForcedPred{nullptr};
- // Outgoing jumps from the block.
- std::vector<Jump *> OutJumps;
- // Incoming jumps to the block.
- std::vector<Jump *> InJumps;
-
-public:
- explicit Block(size_t Index, uint64_t Size, uint64_t EC)
- : Index(Index), Size(Size), ExecutionCount(EC) {}
- bool isEntry() const { return Index == 0; }
+ // Forced successor of the node in the graph.
+ NodeT *ForcedSucc{nullptr};
+ // Forced predecessor of the node in the graph.
+ NodeT *ForcedPred{nullptr};
+ // Outgoing jumps from the node.
+ std::vector<JumpT *> OutJumps;
+ // Incoming jumps to the node.
+ std::vector<JumpT *> InJumps;
};
-/// An arc in the graph, typically corresponding to a jump between two blocks.
-class Jump {
-public:
- Jump(const Jump &) = delete;
- Jump(Jump &&) = default;
- Jump &operator=(const Jump &) = delete;
- Jump &operator=(Jump &&) = default;
-
- // Source block of the jump.
- Block *Source;
- // Target block of the jump.
- Block *Target;
+/// An arc in the graph, typically corresponding to a jump between two nodes.
+struct JumpT {
+ JumpT(const JumpT &) = delete;
+ JumpT(JumpT &&) = default;
+ JumpT &operator=(const JumpT &) = delete;
+ JumpT &operator=(JumpT &&) = default;
+
+ explicit JumpT(NodeT *Source, NodeT *Target, uint64_t ExecutionCount)
+ : Source(Source), Target(Target), ExecutionCount(ExecutionCount) {}
+
+ // Source node of the jump.
+ NodeT *Source;
+ // Target node of the jump.
+ NodeT *Target;
// Execution count of the arc in the profile data.
uint64_t ExecutionCount{0};
// Whether the jump corresponds to a conditional branch.
bool IsConditional{false};
-
-public:
- explicit Jump(Block *Source, Block *Target, uint64_t ExecutionCount)
- : Source(Source), Target(Target), ExecutionCount(ExecutionCount) {}
+ // The offset of the jump from the source node.
+ uint64_t Offset{0};
};
-/// A chain (ordered sequence) of blocks.
-class Chain {
-public:
- Chain(const Chain &) = delete;
- Chain(Chain &&) = default;
- Chain &operator=(const Chain &) = delete;
- Chain &operator=(Chain &&) = default;
+/// A chain (ordered sequence) of nodes in the graph.
+struct ChainT {
+ ChainT(const ChainT &) = delete;
+ ChainT(ChainT &&) = default;
+ ChainT &operator=(const ChainT &) = delete;
+ ChainT &operator=(ChainT &&) = default;
+
+ explicit ChainT(uint64_t Id, NodeT *Node)
+ : Id(Id), ExecutionCount(Node->ExecutionCount), Size(Node->Size),
+ Nodes(1, Node) {}
- explicit Chain(uint64_t Id, Block *Block)
- : Id(Id), Score(0), Blocks(1, Block) {}
+ size_t numBlocks() const { return Nodes.size(); }
- uint64_t id() const { return Id; }
+ double density() const { return static_cast<double>(ExecutionCount) / Size; }
- bool isEntry() const { return Blocks[0]->Index == 0; }
+ bool isEntry() const { return Nodes[0]->Index == 0; }
bool isCold() const {
- for (auto *Block : Blocks) {
- if (Block->ExecutionCount > 0)
+ for (NodeT *Node : Nodes) {
+ if (Node->ExecutionCount > 0)
return false;
}
return true;
}
- double score() const { return Score; }
-
- void setScore(double NewScore) { Score = NewScore; }
-
- const std::vector<Block *> &blocks() const { return Blocks; }
-
- size_t numBlocks() const { return Blocks.size(); }
-
- const std::vector<std::pair<Chain *, ChainEdge *>> &edges() const {
- return Edges;
- }
-
- ChainEdge *getEdge(Chain *Other) const {
+ ChainEdge *getEdge(ChainT *Other) const {
for (auto It : Edges) {
if (It.first == Other)
return It.second;
@@ -286,7 +287,7 @@ public:
return nullptr;
}
- void removeEdge(Chain *Other) {
+ void removeEdge(ChainT *Other) {
auto It = Edges.begin();
while (It != Edges.end()) {
if (It->first == Other) {
@@ -297,63 +298,68 @@ public:
}
}
- void addEdge(Chain *Other, ChainEdge *Edge) {
+ void addEdge(ChainT *Other, ChainEdge *Edge) {
Edges.push_back(std::make_pair(Other, Edge));
}
- void merge(Chain *Other, const std::vector<Block *> &MergedBlocks) {
- Blocks = MergedBlocks;
- // Update the block's chains
- for (size_t Idx = 0; Idx < Blocks.size(); Idx++) {
- Blocks[Idx]->CurChain = this;
- Blocks[Idx]->CurIndex = Idx;
+ void merge(ChainT *Other, const std::vector<NodeT *> &MergedBlocks) {
+ Nodes = MergedBlocks;
+ // Update the chain's data
+ ExecutionCount += Other->ExecutionCount;
+ Size += Other->Size;
+ Id = Nodes[0]->Index;
+ // Update the node's data
+ for (size_t Idx = 0; Idx < Nodes.size(); Idx++) {
+ Nodes[Idx]->CurChain = this;
+ Nodes[Idx]->CurIndex = Idx;
}
}
- void mergeEdges(Chain *Other);
+ void mergeEdges(ChainT *Other);
void clear() {
- Blocks.clear();
- Blocks.shrink_to_fit();
+ Nodes.clear();
+ Nodes.shrink_to_fit();
Edges.clear();
Edges.shrink_to_fit();
}
-private:
// Unique chain identifier.
uint64_t Id;
// Cached ext-tsp score for the chain.
- double Score;
- // Blocks of the chain.
- std::vector<Block *> Blocks;
+ double Score{0};
+ // The total execution count of the chain.
+ uint64_t ExecutionCount{0};
+ // The total size of the chain.
+ uint64_t Size{0};
+ // Nodes of the chain.
+ std::vector<NodeT *> Nodes;
// Adjacent chains and corresponding edges (lists of jumps).
- std::vector<std::pair<Chain *, ChainEdge *>> Edges;
+ std::vector<std::pair<ChainT *, ChainEdge *>> Edges;
};
-/// An edge in CFG representing jumps between two chains.
-/// When blocks are merged into chains, the edges are combined too so that
+/// An edge in the graph representing jumps between two chains.
+/// When nodes are merged into chains, the edges are combined too so that
/// there is always at most one edge between a pair of chains
-class ChainEdge {
-public:
+struct ChainEdge {
ChainEdge(const ChainEdge &) = delete;
ChainEdge(ChainEdge &&) = default;
ChainEdge &operator=(const ChainEdge &) = delete;
- ChainEdge &operator=(ChainEdge &&) = default;
+ ChainEdge &operator=(ChainEdge &&) = delete;
- explicit ChainEdge(Jump *Jump)
+ explicit ChainEdge(JumpT *Jump)
: SrcChain(Jump->Source->CurChain), DstChain(Jump->Target->CurChain),
Jumps(1, Jump) {}
- const std::vector<Jump *> &jumps() const { return Jumps; }
+ ChainT *srcChain() const { return SrcChain; }
- void changeEndpoint(Chain *From, Chain *To) {
- if (From == SrcChain)
- SrcChain = To;
- if (From == DstChain)
- DstChain = To;
- }
+ ChainT *dstChain() const { return DstChain; }
+
+ bool isSelfEdge() const { return SrcChain == DstChain; }
- void appendJump(Jump *Jump) { Jumps.push_back(Jump); }
+ const std::vector<JumpT *> &jumps() const { return Jumps; }
+
+ void appendJump(JumpT *Jump) { Jumps.push_back(Jump); }
void moveJumps(ChainEdge *Other) {
Jumps.insert(Jumps.end(), Other->Jumps.begin(), Other->Jumps.end());
@@ -361,15 +367,22 @@ public:
Other->Jumps.shrink_to_fit();
}
- bool hasCachedMergeGain(Chain *Src, Chain *Dst) const {
+ void changeEndpoint(ChainT *From, ChainT *To) {
+ if (From == SrcChain)
+ SrcChain = To;
+ if (From == DstChain)
+ DstChain = To;
+ }
+
+ bool hasCachedMergeGain(ChainT *Src, ChainT *Dst) const {
return Src == SrcChain ? CacheValidForward : CacheValidBackward;
}
- MergeGainTy getCachedMergeGain(Chain *Src, Chain *Dst) const {
+ MergeGainT getCachedMergeGain(ChainT *Src, ChainT *Dst) const {
return Src == SrcChain ? CachedGainForward : CachedGainBackward;
}
- void setCachedMergeGain(Chain *Src, Chain *Dst, MergeGainTy MergeGain) {
+ void setCachedMergeGain(ChainT *Src, ChainT *Dst, MergeGainT MergeGain) {
if (Src == SrcChain) {
CachedGainForward = MergeGain;
CacheValidForward = true;
@@ -384,31 +397,55 @@ public:
CacheValidBackward = false;
}
+ void setMergeGain(MergeGainT Gain) { CachedGain = Gain; }
+
+ MergeGainT getMergeGain() const { return CachedGain; }
+
+ double gain() const { return CachedGain.score(); }
+
private:
// Source chain.
- Chain *SrcChain{nullptr};
+ ChainT *SrcChain{nullptr};
// Destination chain.
- Chain *DstChain{nullptr};
- // Original jumps in the binary with correspinding execution counts.
- std::vector<Jump *> Jumps;
- // Cached ext-tsp value for merging the pair of chains.
- // Since the gain of merging (Src, Dst) and (Dst, Src) might be different,
- // we store both values here.
- MergeGainTy CachedGainForward;
- MergeGainTy CachedGainBackward;
+ ChainT *DstChain{nullptr};
+ // Original jumps in the binary with corresponding execution counts.
+ std::vector<JumpT *> Jumps;
+ // Cached gain value for merging the pair of chains.
+ MergeGainT CachedGain;
+
+ // Cached gain values for merging the pair of chains. Since the gain of
+ // merging (Src, Dst) and (Dst, Src) might be different, we store both values
+ // here and a flag indicating which of the options results in a higher gain.
+ // Cached gain values.
+ MergeGainT CachedGainForward;
+ MergeGainT CachedGainBackward;
// Whether the cached value must be recomputed.
bool CacheValidForward{false};
bool CacheValidBackward{false};
};
-void Chain::mergeEdges(Chain *Other) {
- assert(this != Other && "cannot merge a chain with itself");
+uint64_t NodeT::outCount() const {
+ uint64_t Count = 0;
+ for (JumpT *Jump : OutJumps) {
+ Count += Jump->ExecutionCount;
+ }
+ return Count;
+}
+uint64_t NodeT::inCount() const {
+ uint64_t Count = 0;
+ for (JumpT *Jump : InJumps) {
+ Count += Jump->ExecutionCount;
+ }
+ return Count;
+}
+
+void ChainT::mergeEdges(ChainT *Other) {
// Update edges adjacent to chain Other
for (auto EdgeIt : Other->Edges) {
- Chain *DstChain = EdgeIt.first;
+ ChainT *DstChain = EdgeIt.first;
ChainEdge *DstEdge = EdgeIt.second;
- Chain *TargetChain = DstChain == Other ? this : DstChain;
+ ChainT *TargetChain = DstChain == Other ? this : DstChain;
ChainEdge *CurEdge = getEdge(TargetChain);
if (CurEdge == nullptr) {
DstEdge->changeEndpoint(Other, this);
@@ -426,15 +463,14 @@ void Chain::mergeEdges(Chain *Other) {
}
}
-using BlockIter = std::vector<Block *>::const_iterator;
+using NodeIter = std::vector<NodeT *>::const_iterator;
-/// A wrapper around three chains of blocks; it is used to avoid extra
+/// A wrapper around three chains of nodes; it is used to avoid extra
/// instantiation of the vectors.
-class MergedChain {
-public:
- MergedChain(BlockIter Begin1, BlockIter End1, BlockIter Begin2 = BlockIter(),
- BlockIter End2 = BlockIter(), BlockIter Begin3 = BlockIter(),
- BlockIter End3 = BlockIter())
+struct MergedChain {
+ MergedChain(NodeIter Begin1, NodeIter End1, NodeIter Begin2 = NodeIter(),
+ NodeIter End2 = NodeIter(), NodeIter Begin3 = NodeIter(),
+ NodeIter End3 = NodeIter())
: Begin1(Begin1), End1(End1), Begin2(Begin2), End2(End2), Begin3(Begin3),
End3(End3) {}
@@ -447,8 +483,8 @@ public:
Func(*It);
}
- std::vector<Block *> getBlocks() const {
- std::vector<Block *> Result;
+ std::vector<NodeT *> getNodes() const {
+ std::vector<NodeT *> Result;
Result.reserve(std::distance(Begin1, End1) + std::distance(Begin2, End2) +
std::distance(Begin3, End3));
Result.insert(Result.end(), Begin1, End1);
@@ -457,42 +493,71 @@ public:
return Result;
}
- const Block *getFirstBlock() const { return *Begin1; }
+ const NodeT *getFirstNode() const { return *Begin1; }
private:
- BlockIter Begin1;
- BlockIter End1;
- BlockIter Begin2;
- BlockIter End2;
- BlockIter Begin3;
- BlockIter End3;
+ NodeIter Begin1;
+ NodeIter End1;
+ NodeIter Begin2;
+ NodeIter End2;
+ NodeIter Begin3;
+ NodeIter End3;
};
+/// Merge two chains of nodes respecting a given 'type' and 'offset'.
+///
+/// If MergeType == 0, then the result is a concatenation of two chains.
+/// Otherwise, the first chain is cut into two sub-chains at the offset,
+/// and merged using all possible ways of concatenating three chains.
+MergedChain mergeNodes(const std::vector<NodeT *> &X,
+ const std::vector<NodeT *> &Y, size_t MergeOffset,
+ MergeTypeT MergeType) {
+ // Split the first chain, X, into X1 and X2
+ NodeIter BeginX1 = X.begin();
+ NodeIter EndX1 = X.begin() + MergeOffset;
+ NodeIter BeginX2 = X.begin() + MergeOffset;
+ NodeIter EndX2 = X.end();
+ NodeIter BeginY = Y.begin();
+ NodeIter EndY = Y.end();
+
+ // Construct a new chain from the three existing ones
+ switch (MergeType) {
+ case MergeTypeT::X_Y:
+ return MergedChain(BeginX1, EndX2, BeginY, EndY);
+ case MergeTypeT::Y_X:
+ return MergedChain(BeginY, EndY, BeginX1, EndX2);
+ case MergeTypeT::X1_Y_X2:
+ return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
+ case MergeTypeT::Y_X2_X1:
+ return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
+ case MergeTypeT::X2_X1_Y:
+ return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
+ }
+ llvm_unreachable("unexpected chain merge type");
+}
+
/// The implementation of the ExtTSP algorithm.
class ExtTSPImpl {
- using EdgeT = std::pair<uint64_t, uint64_t>;
- using EdgeCountMap = std::vector<std::pair<EdgeT, uint64_t>>;
-
public:
- ExtTSPImpl(size_t NumNodes, const std::vector<uint64_t> &NodeSizes,
+ ExtTSPImpl(const std::vector<uint64_t> &NodeSizes,
const std::vector<uint64_t> &NodeCounts,
- const EdgeCountMap &EdgeCounts)
- : NumNodes(NumNodes) {
+ const std::vector<EdgeCountT> &EdgeCounts)
+ : NumNodes(NodeSizes.size()) {
initialize(NodeSizes, NodeCounts, EdgeCounts);
}
- /// Run the algorithm and return an optimized ordering of blocks.
+ /// Run the algorithm and return an optimized ordering of nodes.
void run(std::vector<uint64_t> &Result) {
- // Pass 1: Merge blocks with their mutually forced successors
+ // Pass 1: Merge nodes with their mutually forced successors
mergeForcedPairs();
// Pass 2: Merge pairs of chains while improving the ExtTSP objective
mergeChainPairs();
- // Pass 3: Merge cold blocks to reduce code size
+ // Pass 3: Merge cold nodes to reduce code size
mergeColdChains();
- // Collect blocks from all chains
+ // Collect nodes from all chains
concatChains(Result);
}
@@ -500,26 +565,26 @@ private:
/// Initialize the algorithm's data structures.
void initialize(const std::vector<uint64_t> &NodeSizes,
const std::vector<uint64_t> &NodeCounts,
- const EdgeCountMap &EdgeCounts) {
- // Initialize blocks
- AllBlocks.reserve(NumNodes);
- for (uint64_t Node = 0; Node < NumNodes; Node++) {
- uint64_t Size = std::max<uint64_t>(NodeSizes[Node], 1ULL);
- uint64_t ExecutionCount = NodeCounts[Node];
- // The execution count of the entry block is set to at least 1
- if (Node == 0 && ExecutionCount == 0)
+ const std::vector<EdgeCountT> &EdgeCounts) {
+ // Initialize nodes
+ AllNodes.reserve(NumNodes);
+ for (uint64_t Idx = 0; Idx < NumNodes; Idx++) {
+ uint64_t Size = std::max<uint64_t>(NodeSizes[Idx], 1ULL);
+ uint64_t ExecutionCount = NodeCounts[Idx];
+ // The execution count of the entry node is set to at least one
+ if (Idx == 0 && ExecutionCount == 0)
ExecutionCount = 1;
- AllBlocks.emplace_back(Node, Size, ExecutionCount);
+ AllNodes.emplace_back(Idx, Size, ExecutionCount);
}
- // Initialize jumps between blocks
+ // Initialize jumps between nodes
SuccNodes.resize(NumNodes);
PredNodes.resize(NumNodes);
std::vector<uint64_t> OutDegree(NumNodes, 0);
AllJumps.reserve(EdgeCounts.size());
for (auto It : EdgeCounts) {
- auto Pred = It.first.first;
- auto Succ = It.first.second;
+ uint64_t Pred = It.first.first;
+ uint64_t Succ = It.first.second;
OutDegree[Pred]++;
// Ignore self-edges
if (Pred == Succ)
@@ -527,16 +592,16 @@ private:
SuccNodes[Pred].push_back(Succ);
PredNodes[Succ].push_back(Pred);
- auto ExecutionCount = It.second;
+ uint64_t ExecutionCount = It.second;
if (ExecutionCount > 0) {
- auto &Block = AllBlocks[Pred];
- auto &SuccBlock = AllBlocks[Succ];
- AllJumps.emplace_back(&Block, &SuccBlock, ExecutionCount);
- SuccBlock.InJumps.push_back(&AllJumps.back());
- Block.OutJumps.push_back(&AllJumps.back());
+ NodeT &PredNode = AllNodes[Pred];
+ NodeT &SuccNode = AllNodes[Succ];
+ AllJumps.emplace_back(&PredNode, &SuccNode, ExecutionCount);
+ SuccNode.InJumps.push_back(&AllJumps.back());
+ PredNode.OutJumps.push_back(&AllJumps.back());
}
}
- for (auto &Jump : AllJumps) {
+ for (JumpT &Jump : AllJumps) {
assert(OutDegree[Jump.Source->Index] > 0);
Jump.IsConditional = OutDegree[Jump.Source->Index] > 1;
}
@@ -544,78 +609,78 @@ private:
// Initialize chains
AllChains.reserve(NumNodes);
HotChains.reserve(NumNodes);
- for (Block &Block : AllBlocks) {
- AllChains.emplace_back(Block.Index, &Block);
- Block.CurChain = &AllChains.back();
- if (Block.ExecutionCount > 0) {
+ for (NodeT &Node : AllNodes) {
+ AllChains.emplace_back(Node.Index, &Node);
+ Node.CurChain = &AllChains.back();
+ if (Node.ExecutionCount > 0) {
HotChains.push_back(&AllChains.back());
}
}
// Initialize chain edges
AllEdges.reserve(AllJumps.size());
- for (Block &Block : AllBlocks) {
- for (auto &Jump : Block.OutJumps) {
- auto SuccBlock = Jump->Target;
- ChainEdge *CurEdge = Block.CurChain->getEdge(SuccBlock->CurChain);
+ for (NodeT &PredNode : AllNodes) {
+ for (JumpT *Jump : PredNode.OutJumps) {
+ NodeT *SuccNode = Jump->Target;
+ ChainEdge *CurEdge = PredNode.CurChain->getEdge(SuccNode->CurChain);
// this edge is already present in the graph
if (CurEdge != nullptr) {
- assert(SuccBlock->CurChain->getEdge(Block.CurChain) != nullptr);
+ assert(SuccNode->CurChain->getEdge(PredNode.CurChain) != nullptr);
CurEdge->appendJump(Jump);
continue;
}
// this is a new edge
AllEdges.emplace_back(Jump);
- Block.CurChain->addEdge(SuccBlock->CurChain, &AllEdges.back());
- SuccBlock->CurChain->addEdge(Block.CurChain, &AllEdges.back());
+ PredNode.CurChain->addEdge(SuccNode->CurChain, &AllEdges.back());
+ SuccNode->CurChain->addEdge(PredNode.CurChain, &AllEdges.back());
}
}
}
- /// For a pair of blocks, A and B, block B is the forced successor of A,
+ /// For a pair of nodes, A and B, node B is the forced successor of A,
/// if (i) all jumps (based on profile) from A goes to B and (ii) all jumps
- /// to B are from A. Such blocks should be adjacent in the optimal ordering;
- /// the method finds and merges such pairs of blocks.
+ /// to B are from A. Such nodes should be adjacent in the optimal ordering;
+ /// the method finds and merges such pairs of nodes.
void mergeForcedPairs() {
// Find fallthroughs based on edge weights
- for (auto &Block : AllBlocks) {
- if (SuccNodes[Block.Index].size() == 1 &&
- PredNodes[SuccNodes[Block.Index][0]].size() == 1 &&
- SuccNodes[Block.Index][0] != 0) {
- size_t SuccIndex = SuccNodes[Block.Index][0];
- Block.ForcedSucc = &AllBlocks[SuccIndex];
- AllBlocks[SuccIndex].ForcedPred = &Block;
+ for (NodeT &Node : AllNodes) {
+ if (SuccNodes[Node.Index].size() == 1 &&
+ PredNodes[SuccNodes[Node.Index][0]].size() == 1 &&
+ SuccNodes[Node.Index][0] != 0) {
+ size_t SuccIndex = SuccNodes[Node.Index][0];
+ Node.ForcedSucc = &AllNodes[SuccIndex];
+ AllNodes[SuccIndex].ForcedPred = &Node;
}
}
// There might be 'cycles' in the forced dependencies, since profile
// data isn't 100% accurate. Typically this is observed in loops, when the
// loop edges are the hottest successors for the basic blocks of the loop.
- // Break the cycles by choosing the block with the smallest index as the
+ // Break the cycles by choosing the node with the smallest index as the
// head. This helps to keep the original order of the loops, which likely
// have already been rotated in the optimized manner.
- for (auto &Block : AllBlocks) {
- if (Block.ForcedSucc == nullptr || Block.ForcedPred == nullptr)
+ for (NodeT &Node : AllNodes) {
+ if (Node.ForcedSucc == nullptr || Node.ForcedPred == nullptr)
continue;
- auto SuccBlock = Block.ForcedSucc;
- while (SuccBlock != nullptr && SuccBlock != &Block) {
- SuccBlock = SuccBlock->ForcedSucc;
+ NodeT *SuccNode = Node.ForcedSucc;
+ while (SuccNode != nullptr && SuccNode != &Node) {
+ SuccNode = SuccNode->ForcedSucc;
}
- if (SuccBlock == nullptr)
+ if (SuccNode == nullptr)
continue;
// Break the cycle
- AllBlocks[Block.ForcedPred->Index].ForcedSucc = nullptr;
- Block.ForcedPred = nullptr;
+ AllNodes[Node.ForcedPred->Index].ForcedSucc = nullptr;
+ Node.ForcedPred = nullptr;
}
- // Merge blocks with their fallthrough successors
- for (auto &Block : AllBlocks) {
- if (Block.ForcedPred == nullptr && Block.ForcedSucc != nullptr) {
- auto CurBlock = &Block;
+ // Merge nodes with their fallthrough successors
+ for (NodeT &Node : AllNodes) {
+ if (Node.ForcedPred == nullptr && Node.ForcedSucc != nullptr) {
+ const NodeT *CurBlock = &Node;
while (CurBlock->ForcedSucc != nullptr) {
- const auto NextBlock = CurBlock->ForcedSucc;
- mergeChains(Block.CurChain, NextBlock->CurChain, 0, MergeTypeTy::X_Y);
+ const NodeT *NextBlock = CurBlock->ForcedSucc;
+ mergeChains(Node.CurChain, NextBlock->CurChain, 0, MergeTypeT::X_Y);
CurBlock = NextBlock;
}
}
@@ -625,23 +690,23 @@ private:
/// Merge pairs of chains while improving the ExtTSP objective.
void mergeChainPairs() {
/// Deterministically compare pairs of chains
- auto compareChainPairs = [](const Chain *A1, const Chain *B1,
- const Chain *A2, const Chain *B2) {
+ auto compareChainPairs = [](const ChainT *A1, const ChainT *B1,
+ const ChainT *A2, const ChainT *B2) {
if (A1 != A2)
- return A1->id() < A2->id();
- return B1->id() < B2->id();
+ return A1->Id < A2->Id;
+ return B1->Id < B2->Id;
};
while (HotChains.size() > 1) {
- Chain *BestChainPred = nullptr;
- Chain *BestChainSucc = nullptr;
- auto BestGain = MergeGainTy();
+ ChainT *BestChainPred = nullptr;
+ ChainT *BestChainSucc = nullptr;
+ MergeGainT BestGain;
// Iterate over all pairs of chains
- for (Chain *ChainPred : HotChains) {
+ for (ChainT *ChainPred : HotChains) {
// Get candidates for merging with the current chain
- for (auto EdgeIter : ChainPred->edges()) {
- Chain *ChainSucc = EdgeIter.first;
- class ChainEdge *ChainEdge = EdgeIter.second;
+ for (auto EdgeIt : ChainPred->Edges) {
+ ChainT *ChainSucc = EdgeIt.first;
+ ChainEdge *Edge = EdgeIt.second;
// Ignore loop edges
if (ChainPred == ChainSucc)
continue;
@@ -651,8 +716,7 @@ private:
continue;
// Compute the gain of merging the two chains
- MergeGainTy CurGain =
- getBestMergeGain(ChainPred, ChainSucc, ChainEdge);
+ MergeGainT CurGain = getBestMergeGain(ChainPred, ChainSucc, Edge);
if (CurGain.score() <= EPS)
continue;
@@ -677,43 +741,43 @@ private:
}
}
- /// Merge remaining blocks into chains w/o taking jump counts into
- /// consideration. This allows to maintain the original block order in the
- /// absense of profile data
+ /// Merge remaining nodes into chains w/o taking jump counts into
+ /// consideration. This allows to maintain the original node order in the
+ /// absence of profile data
void mergeColdChains() {
for (size_t SrcBB = 0; SrcBB < NumNodes; SrcBB++) {
// Iterating in reverse order to make sure original fallthrough jumps are
// merged first; this might be beneficial for code size.
size_t NumSuccs = SuccNodes[SrcBB].size();
for (size_t Idx = 0; Idx < NumSuccs; Idx++) {
- auto DstBB = SuccNodes[SrcBB][NumSuccs - Idx - 1];
- auto SrcChain = AllBlocks[SrcBB].CurChain;
- auto DstChain = AllBlocks[DstBB].CurChain;
+ size_t DstBB = SuccNodes[SrcBB][NumSuccs - Idx - 1];
+ ChainT *SrcChain = AllNodes[SrcBB].CurChain;
+ ChainT *DstChain = AllNodes[DstBB].CurChain;
if (SrcChain != DstChain && !DstChain->isEntry() &&
- SrcChain->blocks().back()->Index == SrcBB &&
- DstChain->blocks().front()->Index == DstBB &&
+ SrcChain->Nodes.back()->Index == SrcBB &&
+ DstChain->Nodes.front()->Index == DstBB &&
SrcChain->isCold() == DstChain->isCold()) {
- mergeChains(SrcChain, DstChain, 0, MergeTypeTy::X_Y);
+ mergeChains(SrcChain, DstChain, 0, MergeTypeT::X_Y);
}
}
}
}
- /// Compute the Ext-TSP score for a given block order and a list of jumps.
+ /// Compute the Ext-TSP score for a given node order and a list of jumps.
double extTSPScore(const MergedChain &MergedBlocks,
- const std::vector<Jump *> &Jumps) const {
+ const std::vector<JumpT *> &Jumps) const {
if (Jumps.empty())
return 0.0;
uint64_t CurAddr = 0;
- MergedBlocks.forEach([&](const Block *BB) {
- BB->EstimatedAddr = CurAddr;
- CurAddr += BB->Size;
+ MergedBlocks.forEach([&](const NodeT *Node) {
+ Node->EstimatedAddr = CurAddr;
+ CurAddr += Node->Size;
});
double Score = 0;
- for (auto &Jump : Jumps) {
- const Block *SrcBlock = Jump->Source;
- const Block *DstBlock = Jump->Target;
+ for (JumpT *Jump : Jumps) {
+ const NodeT *SrcBlock = Jump->Source;
+ const NodeT *DstBlock = Jump->Target;
Score += ::extTSPScore(SrcBlock->EstimatedAddr, SrcBlock->Size,
DstBlock->EstimatedAddr, Jump->ExecutionCount,
Jump->IsConditional);
@@ -727,8 +791,8 @@ private:
/// computes the one having the largest increase in ExtTSP objective. The
/// result is a pair with the first element being the gain and the second
/// element being the corresponding merging type.
- MergeGainTy getBestMergeGain(Chain *ChainPred, Chain *ChainSucc,
- ChainEdge *Edge) const {
+ MergeGainT getBestMergeGain(ChainT *ChainPred, ChainT *ChainSucc,
+ ChainEdge *Edge) const {
if (Edge->hasCachedMergeGain(ChainPred, ChainSucc)) {
return Edge->getCachedMergeGain(ChainPred, ChainSucc);
}
@@ -742,22 +806,22 @@ private:
assert(!Jumps.empty() && "trying to merge chains w/o jumps");
// The object holds the best currently chosen gain of merging the two chains
- MergeGainTy Gain = MergeGainTy();
+ MergeGainT Gain = MergeGainT();
/// Given a merge offset and a list of merge types, try to merge two chains
/// and update Gain with a better alternative
auto tryChainMerging = [&](size_t Offset,
- const std::vector<MergeTypeTy> &MergeTypes) {
+ const std::vector<MergeTypeT> &MergeTypes) {
// Skip merging corresponding to concatenation w/o splitting
- if (Offset == 0 || Offset == ChainPred->blocks().size())
+ if (Offset == 0 || Offset == ChainPred->Nodes.size())
return;
// Skip merging if it breaks Forced successors
- auto BB = ChainPred->blocks()[Offset - 1];
- if (BB->ForcedSucc != nullptr)
+ NodeT *Node = ChainPred->Nodes[Offset - 1];
+ if (Node->ForcedSucc != nullptr)
return;
// Apply the merge, compute the corresponding gain, and update the best
// value, if the merge is beneficial
- for (const auto &MergeType : MergeTypes) {
+ for (const MergeTypeT &MergeType : MergeTypes) {
Gain.updateIfLessThan(
computeMergeGain(ChainPred, ChainSucc, Jumps, Offset, MergeType));
}
@@ -765,36 +829,36 @@ private:
// Try to concatenate two chains w/o splitting
Gain.updateIfLessThan(
- computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeTy::X_Y));
+ computeMergeGain(ChainPred, ChainSucc, Jumps, 0, MergeTypeT::X_Y));
if (EnableChainSplitAlongJumps) {
- // Attach (a part of) ChainPred before the first block of ChainSucc
- for (auto &Jump : ChainSucc->blocks().front()->InJumps) {
- const auto SrcBlock = Jump->Source;
+ // Attach (a part of) ChainPred before the first node of ChainSucc
+ for (JumpT *Jump : ChainSucc->Nodes.front()->InJumps) {
+ const NodeT *SrcBlock = Jump->Source;
if (SrcBlock->CurChain != ChainPred)
continue;
size_t Offset = SrcBlock->CurIndex + 1;
- tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::X2_X1_Y});
+ tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::X2_X1_Y});
}
- // Attach (a part of) ChainPred after the last block of ChainSucc
- for (auto &Jump : ChainSucc->blocks().back()->OutJumps) {
- const auto DstBlock = Jump->Source;
+ // Attach (a part of) ChainPred after the last node of ChainSucc
+ for (JumpT *Jump : ChainSucc->Nodes.back()->OutJumps) {
+ const NodeT *DstBlock = Jump->Source;
if (DstBlock->CurChain != ChainPred)
continue;
size_t Offset = DstBlock->CurIndex;
- tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::Y_X2_X1});
+ tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1});
}
}
// Try to break ChainPred in various ways and concatenate with ChainSucc
- if (ChainPred->blocks().size() <= ChainSplitThreshold) {
- for (size_t Offset = 1; Offset < ChainPred->blocks().size(); Offset++) {
+ if (ChainPred->Nodes.size() <= ChainSplitThreshold) {
+ for (size_t Offset = 1; Offset < ChainPred->Nodes.size(); Offset++) {
// Try to split the chain in different ways. In practice, applying
// X2_Y_X1 merging is almost never provides benefits; thus, we exclude
// it from consideration to reduce the search space
- tryChainMerging(Offset, {MergeTypeTy::X1_Y_X2, MergeTypeTy::Y_X2_X1,
- MergeTypeTy::X2_X1_Y});
+ tryChainMerging(Offset, {MergeTypeT::X1_Y_X2, MergeTypeT::Y_X2_X1,
+ MergeTypeT::X2_X1_Y});
}
}
Edge->setCachedMergeGain(ChainPred, ChainSucc, Gain);
@@ -805,96 +869,66 @@ private:
/// merge 'type' and 'offset'.
///
/// The two chains are not modified in the method.
- MergeGainTy computeMergeGain(const Chain *ChainPred, const Chain *ChainSucc,
- const std::vector<Jump *> &Jumps,
- size_t MergeOffset,
- MergeTypeTy MergeType) const {
- auto MergedBlocks = mergeBlocks(ChainPred->blocks(), ChainSucc->blocks(),
- MergeOffset, MergeType);
-
- // Do not allow a merge that does not preserve the original entry block
+ MergeGainT computeMergeGain(const ChainT *ChainPred, const ChainT *ChainSucc,
+ const std::vector<JumpT *> &Jumps,
+ size_t MergeOffset, MergeTypeT MergeType) const {
+ auto MergedBlocks =
+ mergeNodes(ChainPred->Nodes, ChainSucc->Nodes, MergeOffset, MergeType);
+
+ // Do not allow a merge that does not preserve the original entry point
if ((ChainPred->isEntry() || ChainSucc->isEntry()) &&
- !MergedBlocks.getFirstBlock()->isEntry())
- return MergeGainTy();
+ !MergedBlocks.getFirstNode()->isEntry())
+ return MergeGainT();
// The gain for the new chain
- auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->score();
- return MergeGainTy(NewGainScore, MergeOffset, MergeType);
- }
-
- /// Merge two chains of blocks respecting a given merge 'type' and 'offset'.
- ///
- /// If MergeType == 0, then the result is a concatenation of two chains.
- /// Otherwise, the first chain is cut into two sub-chains at the offset,
- /// and merged using all possible ways of concatenating three chains.
- MergedChain mergeBlocks(const std::vector<Block *> &X,
- const std::vector<Block *> &Y, size_t MergeOffset,
- MergeTypeTy MergeType) const {
- // Split the first chain, X, into X1 and X2
- BlockIter BeginX1 = X.begin();
- BlockIter EndX1 = X.begin() + MergeOffset;
- BlockIter BeginX2 = X.begin() + MergeOffset;
- BlockIter EndX2 = X.end();
- BlockIter BeginY = Y.begin();
- BlockIter EndY = Y.end();
-
- // Construct a new chain from the three existing ones
- switch (MergeType) {
- case MergeTypeTy::X_Y:
- return MergedChain(BeginX1, EndX2, BeginY, EndY);
- case MergeTypeTy::X1_Y_X2:
- return MergedChain(BeginX1, EndX1, BeginY, EndY, BeginX2, EndX2);
- case MergeTypeTy::Y_X2_X1:
- return MergedChain(BeginY, EndY, BeginX2, EndX2, BeginX1, EndX1);
- case MergeTypeTy::X2_X1_Y:
- return MergedChain(BeginX2, EndX2, BeginX1, EndX1, BeginY, EndY);
- }
- llvm_unreachable("unexpected chain merge type");
+ auto NewGainScore = extTSPScore(MergedBlocks, Jumps) - ChainPred->Score;
+ return MergeGainT(NewGainScore, MergeOffset, MergeType);
}
/// Merge chain From into chain Into, update the list of active chains,
/// adjacency information, and the corresponding cached values.
- void mergeChains(Chain *Into, Chain *From, size_t MergeOffset,
- MergeTypeTy MergeType) {
+ void mergeChains(ChainT *Into, ChainT *From, size_t MergeOffset,
+ MergeTypeT MergeType) {
assert(Into != From && "a chain cannot be merged with itself");
- // Merge the blocks
- MergedChain MergedBlocks =
- mergeBlocks(Into->blocks(), From->blocks(), MergeOffset, MergeType);
- Into->merge(From, MergedBlocks.getBlocks());
+ // Merge the nodes
+ MergedChain MergedNodes =
+ mergeNodes(Into->Nodes, From->Nodes, MergeOffset, MergeType);
+ Into->merge(From, MergedNodes.getNodes());
+
+ // Merge the edges
Into->mergeEdges(From);
From->clear();
// Update cached ext-tsp score for the new chain
ChainEdge *SelfEdge = Into->getEdge(Into);
if (SelfEdge != nullptr) {
- MergedBlocks = MergedChain(Into->blocks().begin(), Into->blocks().end());
- Into->setScore(extTSPScore(MergedBlocks, SelfEdge->jumps()));
+ MergedNodes = MergedChain(Into->Nodes.begin(), Into->Nodes.end());
+ Into->Score = extTSPScore(MergedNodes, SelfEdge->jumps());
}
- // Remove chain From from the list of active chains
+ // Remove the chain from the list of active chains
llvm::erase_value(HotChains, From);
// Invalidate caches
- for (auto EdgeIter : Into->edges()) {
- EdgeIter.second->invalidateCache();
- }
+ for (auto EdgeIt : Into->Edges)
+ EdgeIt.second->invalidateCache();
}
- /// Concatenate all chains into a final order of blocks.
+ /// Concatenate all chains into the final order.
void concatChains(std::vector<uint64_t> &Order) {
- // Collect chains and calculate some stats for their sorting
- std::vector<Chain *> SortedChains;
- DenseMap<const Chain *, double> ChainDensity;
- for (auto &Chain : AllChains) {
- if (!Chain.blocks().empty()) {
+ // Collect chains and calculate density stats for their sorting
+ std::vector<const ChainT *> SortedChains;
+ DenseMap<const ChainT *, double> ChainDensity;
+ for (ChainT &Chain : AllChains) {
+ if (!Chain.Nodes.empty()) {
SortedChains.push_back(&Chain);
- // Using doubles to avoid overflow of ExecutionCount
+ // Using doubles to avoid overflow of ExecutionCounts
double Size = 0;
double ExecutionCount = 0;
- for (auto *Block : Chain.blocks()) {
- Size += static_cast<double>(Block->Size);
- ExecutionCount += static_cast<double>(Block->ExecutionCount);
+ for (NodeT *Node : Chain.Nodes) {
+ Size += static_cast<double>(Node->Size);
+ ExecutionCount += static_cast<double>(Node->ExecutionCount);
}
assert(Size > 0 && "a chain of zero size");
ChainDensity[&Chain] = ExecutionCount / Size;
@@ -903,24 +937,23 @@ private:
// Sorting chains by density in the decreasing order
std::stable_sort(SortedChains.begin(), SortedChains.end(),
- [&](const Chain *C1, const Chain *C2) {
- // Make sure the original entry block is at the
+ [&](const ChainT *L, const ChainT *R) {
+ // Make sure the original entry point is at the
// beginning of the order
- if (C1->isEntry() != C2->isEntry()) {
- return C1->isEntry();
- }
+ if (L->isEntry() != R->isEntry())
+ return L->isEntry();
- const double D1 = ChainDensity[C1];
- const double D2 = ChainDensity[C2];
+ const double DL = ChainDensity[L];
+ const double DR = ChainDensity[R];
// Compare by density and break ties by chain identifiers
- return (D1 != D2) ? (D1 > D2) : (C1->id() < C2->id());
+ return (DL != DR) ? (DL > DR) : (L->Id < R->Id);
});
- // Collect the blocks in the order specified by their chains
+ // Collect the nodes in the order specified by their chains
Order.reserve(NumNodes);
- for (Chain *Chain : SortedChains) {
- for (Block *Block : Chain->blocks()) {
- Order.push_back(Block->Index);
+ for (const ChainT *Chain : SortedChains) {
+ for (NodeT *Node : Chain->Nodes) {
+ Order.push_back(Node->Index);
}
}
}
@@ -935,49 +968,47 @@ private:
/// Predecessors of each node.
std::vector<std::vector<uint64_t>> PredNodes;
- /// All basic blocks.
- std::vector<Block> AllBlocks;
+ /// All nodes (basic blocks) in the graph.
+ std::vector<NodeT> AllNodes;
- /// All jumps between blocks.
- std::vector<Jump> AllJumps;
+ /// All jumps between the nodes.
+ std::vector<JumpT> AllJumps;
- /// All chains of basic blocks.
- std::vector<Chain> AllChains;
+ /// All chains of nodes.
+ std::vector<ChainT> AllChains;
- /// All edges between chains.
+ /// All edges between the chains.
std::vector<ChainEdge> AllEdges;
/// Active chains. The vector gets updated at runtime when chains are merged.
- std::vector<Chain *> HotChains;
+ std::vector<ChainT *> HotChains;
};
} // end of anonymous namespace
-std::vector<uint64_t> llvm::applyExtTspLayout(
- const std::vector<uint64_t> &NodeSizes,
- const std::vector<uint64_t> &NodeCounts,
- const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) {
- size_t NumNodes = NodeSizes.size();
-
- // Verify correctness of the input data.
+std::vector<uint64_t>
+llvm::applyExtTspLayout(const std::vector<uint64_t> &NodeSizes,
+ const std::vector<uint64_t> &NodeCounts,
+ const std::vector<EdgeCountT> &EdgeCounts) {
+ // Verify correctness of the input data
assert(NodeCounts.size() == NodeSizes.size() && "Incorrect input");
- assert(NumNodes > 2 && "Incorrect input");
+ assert(NodeSizes.size() > 2 && "Incorrect input");
- // Apply the reordering algorithm.
- auto Alg = ExtTSPImpl(NumNodes, NodeSizes, NodeCounts, EdgeCounts);
+ // Apply the reordering algorithm
+ ExtTSPImpl Alg(NodeSizes, NodeCounts, EdgeCounts);
std::vector<uint64_t> Result;
Alg.run(Result);
- // Verify correctness of the output.
+ // Verify correctness of the output
assert(Result.front() == 0 && "Original entry point is not preserved");
- assert(Result.size() == NumNodes && "Incorrect size of reordered layout");
+ assert(Result.size() == NodeSizes.size() && "Incorrect size of layout");
return Result;
}
-double llvm::calcExtTspScore(
- const std::vector<uint64_t> &Order, const std::vector<uint64_t> &NodeSizes,
- const std::vector<uint64_t> &NodeCounts,
- const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) {
+double llvm::calcExtTspScore(const std::vector<uint64_t> &Order,
+ const std::vector<uint64_t> &NodeSizes,
+ const std::vector<uint64_t> &NodeCounts,
+ const std::vector<EdgeCountT> &EdgeCounts) {
// Estimate addresses of the blocks in memory
std::vector<uint64_t> Addr(NodeSizes.size(), 0);
for (size_t Idx = 1; Idx < Order.size(); Idx++) {
@@ -985,15 +1016,15 @@ double llvm::calcExtTspScore(
}
std::vector<uint64_t> OutDegree(NodeSizes.size(), 0);
for (auto It : EdgeCounts) {
- auto Pred = It.first.first;
+ uint64_t Pred = It.first.first;
OutDegree[Pred]++;
}
// Increase the score for each jump
double Score = 0;
for (auto It : EdgeCounts) {
- auto Pred = It.first.first;
- auto Succ = It.first.second;
+ uint64_t Pred = It.first.first;
+ uint64_t Succ = It.first.second;
uint64_t Count = It.second;
bool IsConditional = OutDegree[Pred] > 1;
Score += ::extTSPScore(Addr[Pred], NodeSizes[Pred], Addr[Succ], Count,
@@ -1002,10 +1033,9 @@ double llvm::calcExtTspScore(
return Score;
}
-double llvm::calcExtTspScore(
- const std::vector<uint64_t> &NodeSizes,
- const std::vector<uint64_t> &NodeCounts,
- const std::vector<std::pair<EdgeT, uint64_t>> &EdgeCounts) {
+double llvm::calcExtTspScore(const std::vector<uint64_t> &NodeSizes,
+ const std::vector<uint64_t> &NodeCounts,
+ const std::vector<EdgeCountT> &EdgeCounts) {
std::vector<uint64_t> Order(NodeSizes.size());
for (size_t Idx = 0; Idx < NodeSizes.size(); Idx++) {
Order[Idx] = Idx;
diff --git a/llvm/lib/Transforms/Utils/CountVisits.cpp b/llvm/lib/Transforms/Utils/CountVisits.cpp
new file mode 100644
index 000000000000..4faded8fc656
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/CountVisits.cpp
@@ -0,0 +1,25 @@
+//===- CountVisits.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/CountVisits.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/IR/PassManager.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "count-visits"
+
+STATISTIC(MaxVisited, "Max number of times we visited a function");
+
+PreservedAnalyses CountVisitsPass::run(Function &F, FunctionAnalysisManager &) {
+ uint32_t Count = Counts[F.getName()] + 1;
+ Counts[F.getName()] = Count;
+ if (Count > MaxVisited)
+ MaxVisited = Count;
+ return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Transforms/Utils/CtorUtils.cpp b/llvm/lib/Transforms/Utils/CtorUtils.cpp
index c997f39508e3..e07c92df2265 100644
--- a/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -48,7 +48,7 @@ static void removeGlobalCtors(GlobalVariable *GCL, const BitVector &CtorsToRemov
GlobalVariable *NGV =
new GlobalVariable(CA->getType(), GCL->isConstant(), GCL->getLinkage(),
CA, "", GCL->getThreadLocalMode());
- GCL->getParent()->getGlobalList().insert(GCL->getIterator(), NGV);
+ GCL->getParent()->insertGlobalVariable(GCL->getIterator(), NGV);
NGV->takeName(GCL);
// Nuke the old list, replacing any uses with the new one.
diff --git a/llvm/lib/Transforms/Utils/Debugify.cpp b/llvm/lib/Transforms/Utils/Debugify.cpp
index 989473693a0b..93cad0888a56 100644
--- a/llvm/lib/Transforms/Utils/Debugify.cpp
+++ b/llvm/lib/Transforms/Utils/Debugify.cpp
@@ -979,7 +979,9 @@ PreservedAnalyses NewPMDebugifyPass::run(Module &M, ModuleAnalysisManager &) {
collectDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass,
"ModuleDebugify (original debuginfo)",
NameOfWrappedPass);
- return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
ModulePass *createCheckDebugifyModulePass(
@@ -1027,45 +1029,58 @@ static bool isIgnoredPass(StringRef PassID) {
}
void DebugifyEachInstrumentation::registerCallbacks(
- PassInstrumentationCallbacks &PIC) {
- PIC.registerBeforeNonSkippedPassCallback([this](StringRef P, Any IR) {
- if (isIgnoredPass(P))
- return;
- if (const auto **F = any_cast<const Function *>(&IR))
- applyDebugify(*const_cast<Function *>(*F),
- Mode, DebugInfoBeforePass, P);
- else if (const auto **M = any_cast<const Module *>(&IR))
- applyDebugify(*const_cast<Module *>(*M),
- Mode, DebugInfoBeforePass, P);
- });
- PIC.registerAfterPassCallback([this](StringRef P, Any IR,
- const PreservedAnalyses &PassPA) {
+ PassInstrumentationCallbacks &PIC, ModuleAnalysisManager &MAM) {
+ PIC.registerBeforeNonSkippedPassCallback([this, &MAM](StringRef P, Any IR) {
if (isIgnoredPass(P))
return;
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
if (const auto **CF = any_cast<const Function *>(&IR)) {
- auto &F = *const_cast<Function *>(*CF);
- Module &M = *F.getParent();
- auto It = F.getIterator();
- if (Mode == DebugifyMode::SyntheticDebugInfo)
- checkDebugifyMetadata(M, make_range(It, std::next(It)), P,
- "CheckFunctionDebugify", /*Strip=*/true, DIStatsMap);
- else
- checkDebugInfoMetadata(
- M, make_range(It, std::next(It)), *DebugInfoBeforePass,
- "CheckModuleDebugify (original debuginfo)",
- P, OrigDIVerifyBugsReportFilePath);
+ Function &F = *const_cast<Function *>(*CF);
+ applyDebugify(F, Mode, DebugInfoBeforePass, P);
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent())
+ .getManager()
+ .invalidate(F, PA);
} else if (const auto **CM = any_cast<const Module *>(&IR)) {
- auto &M = *const_cast<Module *>(*CM);
- if (Mode == DebugifyMode::SyntheticDebugInfo)
- checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify",
- /*Strip=*/true, DIStatsMap);
- else
- checkDebugInfoMetadata(
- M, M.functions(), *DebugInfoBeforePass,
- "CheckModuleDebugify (original debuginfo)",
- P, OrigDIVerifyBugsReportFilePath);
+ Module &M = *const_cast<Module *>(*CM);
+ applyDebugify(M, Mode, DebugInfoBeforePass, P);
+ MAM.invalidate(M, PA);
}
});
+ PIC.registerAfterPassCallback(
+ [this, &MAM](StringRef P, Any IR, const PreservedAnalyses &PassPA) {
+ if (isIgnoredPass(P))
+ return;
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ if (const auto **CF = any_cast<const Function *>(&IR)) {
+ auto &F = *const_cast<Function *>(*CF);
+ Module &M = *F.getParent();
+ auto It = F.getIterator();
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ checkDebugifyMetadata(M, make_range(It, std::next(It)), P,
+ "CheckFunctionDebugify", /*Strip=*/true,
+ DIStatsMap);
+ else
+ checkDebugInfoMetadata(M, make_range(It, std::next(It)),
+ *DebugInfoBeforePass,
+ "CheckModuleDebugify (original debuginfo)",
+ P, OrigDIVerifyBugsReportFilePath);
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent())
+ .getManager()
+ .invalidate(F, PA);
+ } else if (const auto **CM = any_cast<const Module *>(&IR)) {
+ Module &M = *const_cast<Module *>(*CM);
+ if (Mode == DebugifyMode::SyntheticDebugInfo)
+ checkDebugifyMetadata(M, M.functions(), P, "CheckModuleDebugify",
+ /*Strip=*/true, DIStatsMap);
+ else
+ checkDebugInfoMetadata(M, M.functions(), *DebugInfoBeforePass,
+ "CheckModuleDebugify (original debuginfo)",
+ P, OrigDIVerifyBugsReportFilePath);
+ MAM.invalidate(M, PA);
+ }
+ });
}
char DebugifyModulePass::ID = 0;
diff --git a/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp b/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
index 086ea088dc5e..c894afee68a2 100644
--- a/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/llvm/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -74,6 +74,7 @@ AllocaInst *llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
V = new LoadInst(I.getType(), Slot, I.getName() + ".reload",
VolatileLoads,
PN->getIncomingBlock(i)->getTerminator());
+ Loads[PN->getIncomingBlock(i)] = V;
}
PN->setIncomingValue(i, V);
}
diff --git a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
index 53af1b1969c2..d424ebbef99d 100644
--- a/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
+++ b/llvm/lib/Transforms/Utils/EntryExitInstrumenter.cpp
@@ -7,7 +7,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/Dominators.h"
@@ -16,9 +15,7 @@
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
@@ -83,6 +80,13 @@ static void insertCall(Function &CurFn, StringRef Func,
}
static bool runOnFunction(Function &F, bool PostInlining) {
+ // The asm in a naked function may reasonably expect the argument registers
+ // and the return address register (if present) to be live. An inserted
+ // function call will clobber these registers. Simply skip naked functions for
+ // all targets.
+ if (F.hasFnAttribute(Attribute::Naked))
+ return false;
+
StringRef EntryAttr = PostInlining ? "instrument-function-entry-inlined"
: "instrument-function-entry";
@@ -145,8 +149,8 @@ void llvm::EntryExitInstrumenterPass::printPipeline(
raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
static_cast<PassInfoMixin<llvm::EntryExitInstrumenterPass> *>(this)
->printPipeline(OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
if (PostInlining)
OS << "post-inline";
- OS << ">";
+ OS << '>';
}
diff --git a/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
index 91053338df5f..88c838685bca 100644
--- a/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
+++ b/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -12,9 +12,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/EscapeEnumerator.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Module.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp
index dc58bebd724b..23c1ca366a44 100644
--- a/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -121,7 +121,7 @@ isSimpleEnoughValueToCommit(Constant *C,
}
void Evaluator::MutableValue::clear() {
- if (auto *Agg = Val.dyn_cast<MutableAggregate *>())
+ if (auto *Agg = dyn_cast_if_present<MutableAggregate *>(Val))
delete Agg;
Val = nullptr;
}
@@ -130,7 +130,7 @@ Constant *Evaluator::MutableValue::read(Type *Ty, APInt Offset,
const DataLayout &DL) const {
TypeSize TySize = DL.getTypeStoreSize(Ty);
const MutableValue *V = this;
- while (const auto *Agg = V->Val.dyn_cast<MutableAggregate *>()) {
+ while (const auto *Agg = dyn_cast_if_present<MutableAggregate *>(V->Val)) {
Type *AggTy = Agg->Ty;
std::optional<APInt> Index = DL.getGEPIndexForOffset(AggTy, Offset);
if (!Index || Index->uge(Agg->Elements.size()) ||
@@ -140,11 +140,11 @@ Constant *Evaluator::MutableValue::read(Type *Ty, APInt Offset,
V = &Agg->Elements[Index->getZExtValue()];
}
- return ConstantFoldLoadFromConst(V->Val.get<Constant *>(), Ty, Offset, DL);
+ return ConstantFoldLoadFromConst(cast<Constant *>(V->Val), Ty, Offset, DL);
}
bool Evaluator::MutableValue::makeMutable() {
- Constant *C = Val.get<Constant *>();
+ Constant *C = cast<Constant *>(Val);
Type *Ty = C->getType();
unsigned NumElements;
if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
@@ -171,10 +171,10 @@ bool Evaluator::MutableValue::write(Constant *V, APInt Offset,
MutableValue *MV = this;
while (Offset != 0 ||
!CastInst::isBitOrNoopPointerCastable(Ty, MV->getType(), DL)) {
- if (MV->Val.is<Constant *>() && !MV->makeMutable())
+ if (isa<Constant *>(MV->Val) && !MV->makeMutable())
return false;
- MutableAggregate *Agg = MV->Val.get<MutableAggregate *>();
+ MutableAggregate *Agg = cast<MutableAggregate *>(MV->Val);
Type *AggTy = Agg->Ty;
std::optional<APInt> Index = DL.getGEPIndexForOffset(AggTy, Offset);
if (!Index || Index->uge(Agg->Elements.size()) ||
@@ -413,16 +413,28 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
}
Constant *Val = getVal(MSI->getValue());
- APInt Len = LenC->getValue();
- while (Len != 0) {
- Constant *DestVal = ComputeLoadResult(GV, Val->getType(), Offset);
- if (DestVal != Val) {
- LLVM_DEBUG(dbgs() << "Memset is not a no-op at offset "
- << Offset << " of " << *GV << ".\n");
+ // Avoid the byte-per-byte scan if we're memseting a zeroinitializer
+ // to zero.
+ if (!Val->isNullValue() || MutatedMemory.contains(GV) ||
+ !GV->hasDefinitiveInitializer() ||
+ !GV->getInitializer()->isNullValue()) {
+ APInt Len = LenC->getValue();
+ if (Len.ugt(64 * 1024)) {
+ LLVM_DEBUG(dbgs() << "Not evaluating large memset of size "
+ << Len << "\n");
return false;
}
- ++Offset;
- --Len;
+
+ while (Len != 0) {
+ Constant *DestVal = ComputeLoadResult(GV, Val->getType(), Offset);
+ if (DestVal != Val) {
+ LLVM_DEBUG(dbgs() << "Memset is not a no-op at offset "
+ << Offset << " of " << *GV << ".\n");
+ return false;
+ }
+ ++Offset;
+ --Len;
+ }
}
LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n");
diff --git a/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index 2fb2ab82e41a..1925b91c4da7 100644
--- a/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -487,17 +487,10 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
Builder.SetInsertPoint(PBI);
if (InvertCond2) {
- // If this is a "cmp" instruction, only used for branching (and nowhere
- // else), then we can simply invert the predicate.
- auto Cmp2 = dyn_cast<CmpInst>(CInst2);
- if (Cmp2 && Cmp2->hasOneUse())
- Cmp2->setPredicate(Cmp2->getInversePredicate());
- else
- CInst2 = cast<Instruction>(Builder.CreateNot(CInst2));
- PBI->swapSuccessors();
+ InvertBranch(PBI, Builder);
}
- Value *NC = Builder.CreateBinOp(CombineOp, CInst1, CInst2);
- PBI->replaceUsesOfWith(CInst2, NC);
+ Value *NC = Builder.CreateBinOp(CombineOp, CInst1, PBI->getCondition());
+ PBI->replaceUsesOfWith(PBI->getCondition(), NC);
Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt);
// Handle PHI node to replace its predecessors to FirstEntryBlock.
diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
index 3fa61ec68cd3..8daeb92130ba 100644
--- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp
+++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -157,16 +157,31 @@ int FunctionComparator::cmpAttrs(const AttributeList L,
return 0;
}
-int FunctionComparator::cmpRangeMetadata(const MDNode *L,
- const MDNode *R) const {
+int FunctionComparator::cmpMetadata(const Metadata *L,
+ const Metadata *R) const {
+ // TODO: the following routine coerce the metadata contents into constants
+ // before comparison.
+ // It ignores any other cases, so that the metadata nodes are considered
+ // equal even though this is not correct.
+ // We should structurally compare the metadata nodes to be perfect here.
+ auto *CL = dyn_cast<ConstantAsMetadata>(L);
+ auto *CR = dyn_cast<ConstantAsMetadata>(R);
+ if (CL == CR)
+ return 0;
+ if (!CL)
+ return -1;
+ if (!CR)
+ return 1;
+ return cmpConstants(CL->getValue(), CR->getValue());
+}
+
+int FunctionComparator::cmpMDNode(const MDNode *L, const MDNode *R) const {
if (L == R)
return 0;
if (!L)
return -1;
if (!R)
return 1;
- // Range metadata is a sequence of numbers. Make sure they are the same
- // sequence.
// TODO: Note that as this is metadata, it is possible to drop and/or merge
// this data when considering functions to merge. Thus this comparison would
// return 0 (i.e. equivalent), but merging would become more complicated
@@ -175,10 +190,30 @@ int FunctionComparator::cmpRangeMetadata(const MDNode *L,
// function semantically.
if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
return Res;
- for (size_t I = 0; I < L->getNumOperands(); ++I) {
- ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
- ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
- if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
+ for (size_t I = 0; I < L->getNumOperands(); ++I)
+ if (int Res = cmpMetadata(L->getOperand(I), R->getOperand(I)))
+ return Res;
+ return 0;
+}
+
+int FunctionComparator::cmpInstMetadata(Instruction const *L,
+ Instruction const *R) const {
+ /// These metadata affects the other optimization passes by making assertions
+ /// or constraints.
+ /// Values that carry different expectations should be considered different.
+ SmallVector<std::pair<unsigned, MDNode *>> MDL, MDR;
+ L->getAllMetadataOtherThanDebugLoc(MDL);
+ R->getAllMetadataOtherThanDebugLoc(MDR);
+ if (MDL.size() > MDR.size())
+ return 1;
+ else if (MDL.size() < MDR.size())
+ return -1;
+ for (size_t I = 0, N = MDL.size(); I < N; ++I) {
+ auto const [KeyL, ML] = MDL[I];
+ auto const [KeyR, MR] = MDR[I];
+ if (int Res = cmpNumbers(KeyL, KeyR))
+ return Res;
+ if (int Res = cmpMDNode(ML, MR))
return Res;
}
return 0;
@@ -586,9 +621,7 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res = cmpNumbers(LI->getSyncScopeID(),
cast<LoadInst>(R)->getSyncScopeID()))
return Res;
- return cmpRangeMetadata(
- LI->getMetadata(LLVMContext::MD_range),
- cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
+ return cmpInstMetadata(L, R);
}
if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
if (int Res =
@@ -616,8 +649,8 @@ int FunctionComparator::cmpOperations(const Instruction *L,
if (int Res = cmpNumbers(CI->getTailCallKind(),
cast<CallInst>(R)->getTailCallKind()))
return Res;
- return cmpRangeMetadata(L->getMetadata(LLVMContext::MD_range),
- R->getMetadata(LLVMContext::MD_range));
+ return cmpMDNode(L->getMetadata(LLVMContext::MD_range),
+ R->getMetadata(LLVMContext::MD_range));
}
if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
ArrayRef<unsigned> LIndices = IVI->getIndices();
@@ -715,8 +748,8 @@ int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
// When we have target data, we can reduce the GEP down to the value in bytes
// added to the address.
const DataLayout &DL = FnL->getParent()->getDataLayout();
- unsigned BitWidth = DL.getPointerSizeInBits(ASL);
- APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+ unsigned OffsetBitWidth = DL.getIndexSizeInBits(ASL);
+ APInt OffsetL(OffsetBitWidth, 0), OffsetR(OffsetBitWidth, 0);
if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
GEPR->accumulateConstantOffset(DL, OffsetR))
return cmpAPInts(OffsetL, OffsetR);
diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
index 55bcb6f3b121..dab0be3a9fde 100644
--- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
+++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -19,7 +19,6 @@
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/InstIterator.h"
-#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
@@ -40,7 +39,7 @@ STATISTIC(NumCompUsedAdded,
/// CI (other than void) need to be widened to a VectorType of VF
/// lanes.
static void addVariantDeclaration(CallInst &CI, const ElementCount &VF,
- const StringRef VFName) {
+ bool Predicate, const StringRef VFName) {
Module *M = CI.getModule();
// Add function declaration.
@@ -50,6 +49,8 @@ static void addVariantDeclaration(CallInst &CI, const ElementCount &VF,
Tys.push_back(ToVectorTy(ArgOperand->getType(), VF));
assert(!CI.getFunctionType()->isVarArg() &&
"VarArg functions are not supported.");
+ if (Predicate)
+ Tys.push_back(ToVectorTy(Type::getInt1Ty(RetTy->getContext()), VF));
FunctionType *FTy = FunctionType::get(RetTy, Tys, /*isVarArg=*/false);
Function *VectorF =
Function::Create(FTy, Function::ExternalLinkage, VFName, M);
@@ -89,19 +90,19 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
const SetVector<StringRef> OriginalSetOfMappings(Mappings.begin(),
Mappings.end());
- auto AddVariantDecl = [&](const ElementCount &VF) {
+ auto AddVariantDecl = [&](const ElementCount &VF, bool Predicate) {
const std::string TLIName =
- std::string(TLI.getVectorizedFunction(ScalarName, VF));
+ std::string(TLI.getVectorizedFunction(ScalarName, VF, Predicate));
if (!TLIName.empty()) {
- std::string MangledName =
- VFABI::mangleTLIVectorName(TLIName, ScalarName, CI.arg_size(), VF);
+ std::string MangledName = VFABI::mangleTLIVectorName(
+ TLIName, ScalarName, CI.arg_size(), VF, Predicate);
if (!OriginalSetOfMappings.count(MangledName)) {
Mappings.push_back(MangledName);
++NumCallInjected;
}
Function *VariantF = M->getFunction(TLIName);
if (!VariantF)
- addVariantDeclaration(CI, VF, TLIName);
+ addVariantDeclaration(CI, VF, Predicate, TLIName);
}
};
@@ -109,13 +110,15 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
ElementCount WidestFixedVF, WidestScalableVF;
TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
- for (ElementCount VF = ElementCount::getFixed(2);
- ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
- AddVariantDecl(VF);
+ for (bool Predicated : {false, true}) {
+ for (ElementCount VF = ElementCount::getFixed(2);
+ ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
+ AddVariantDecl(VF, Predicated);
- // TODO: Add scalable variants once we're able to test them.
- assert(WidestScalableVF.isZero() &&
- "Scalable vector mappings not yet supported");
+ for (ElementCount VF = ElementCount::getScalable(2);
+ ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
+ AddVariantDecl(VF, Predicated);
+ }
VFABI::setVectorVariantNames(&CI, Mappings);
}
@@ -138,39 +141,3 @@ PreservedAnalyses InjectTLIMappings::run(Function &F,
// Even if the pass adds IR attributes, the analyses are preserved.
return PreservedAnalyses::all();
}
-
-////////////////////////////////////////////////////////////////////////////////
-// Legacy PM Implementation.
-////////////////////////////////////////////////////////////////////////////////
-bool InjectTLIMappingsLegacy::runOnFunction(Function &F) {
- const TargetLibraryInfo &TLI =
- getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- return runImpl(TLI, F);
-}
-
-void InjectTLIMappingsLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.addPreserved<TargetLibraryInfoWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<LoopAccessLegacyAnalysis>();
- AU.addPreserved<DemandedBitsWrapperPass>();
- AU.addPreserved<OptimizationRemarkEmitterWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Legacy Pass manager initialization
-////////////////////////////////////////////////////////////////////////////////
-char InjectTLIMappingsLegacy::ID = 0;
-
-INITIALIZE_PASS_BEGIN(InjectTLIMappingsLegacy, DEBUG_TYPE,
- "Inject TLI Mappings", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(InjectTLIMappingsLegacy, DEBUG_TYPE, "Inject TLI Mappings",
- false, false)
-
-FunctionPass *llvm::createInjectTLIMappingsLegacyPass() {
- return new InjectTLIMappingsLegacy();
-}
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 399c9a43793f..f7b93fc8fd06 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -23,7 +23,6 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CaptureTracking.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ObjCARCAnalysisUtils.h"
@@ -42,6 +41,7 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
@@ -99,10 +99,6 @@ PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",
cl::init(false), cl::Hidden,
cl::desc("Convert align attributes to assumptions during inlining."));
-static cl::opt<bool> UpdateReturnAttributes(
- "update-return-attrs", cl::init(true), cl::Hidden,
- cl::desc("Update return attributes on calls within inlined body"));
-
static cl::opt<unsigned> InlinerAttributeWindow(
"max-inst-checked-for-throw-during-inlining", cl::Hidden,
cl::desc("the maximum number of instructions analyzed for may throw during "
@@ -879,9 +875,6 @@ static void propagateMemProfHelper(const CallBase *OrigCall,
// inlined callee's callsite metadata with that of the inlined call,
// and moving the subset of any memprof contexts to the inlined callee
// allocations if they match the new inlined call stack.
-// FIXME: Replace memprof metadata with function attribute if all MIB end up
-// having the same behavior. Do other context trimming/merging optimizations
-// too.
static void
propagateMemProfMetadata(Function *Callee, CallBase &CB,
bool ContainsMemProfMetadata,
@@ -1368,9 +1361,6 @@ static AttrBuilder IdentifyValidAttributes(CallBase &CB) {
}
static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) {
- if (!UpdateReturnAttributes)
- return;
-
AttrBuilder Valid = IdentifyValidAttributes(CB);
if (!Valid.hasAttributes())
return;
@@ -1460,84 +1450,10 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
}
}
-/// Once we have cloned code over from a callee into the caller,
-/// update the specified callgraph to reflect the changes we made.
-/// Note that it's possible that not all code was copied over, so only
-/// some edges of the callgraph may remain.
-static void UpdateCallGraphAfterInlining(CallBase &CB,
- Function::iterator FirstNewBlock,
- ValueToValueMapTy &VMap,
- InlineFunctionInfo &IFI) {
- CallGraph &CG = *IFI.CG;
- const Function *Caller = CB.getCaller();
- const Function *Callee = CB.getCalledFunction();
- CallGraphNode *CalleeNode = CG[Callee];
- CallGraphNode *CallerNode = CG[Caller];
-
- // Since we inlined some uninlined call sites in the callee into the caller,
- // add edges from the caller to all of the callees of the callee.
- CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end();
-
- // Consider the case where CalleeNode == CallerNode.
- CallGraphNode::CalledFunctionsVector CallCache;
- if (CalleeNode == CallerNode) {
- CallCache.assign(I, E);
- I = CallCache.begin();
- E = CallCache.end();
- }
-
- for (; I != E; ++I) {
- // Skip 'refererence' call records.
- if (!I->first)
- continue;
-
- const Value *OrigCall = *I->first;
-
- ValueToValueMapTy::iterator VMI = VMap.find(OrigCall);
- // Only copy the edge if the call was inlined!
- if (VMI == VMap.end() || VMI->second == nullptr)
- continue;
-
- // If the call was inlined, but then constant folded, there is no edge to
- // add. Check for this case.
- auto *NewCall = dyn_cast<CallBase>(VMI->second);
- if (!NewCall)
- continue;
-
- // We do not treat intrinsic calls like real function calls because we
- // expect them to become inline code; do not add an edge for an intrinsic.
- if (NewCall->getCalledFunction() &&
- NewCall->getCalledFunction()->isIntrinsic())
- continue;
-
- // Remember that this call site got inlined for the client of
- // InlineFunction.
- IFI.InlinedCalls.push_back(NewCall);
-
- // It's possible that inlining the callsite will cause it to go from an
- // indirect to a direct call by resolving a function pointer. If this
- // happens, set the callee of the new call site to a more precise
- // destination. This can also happen if the call graph node of the caller
- // was just unnecessarily imprecise.
- if (!I->second->getFunction())
- if (Function *F = NewCall->getCalledFunction()) {
- // Indirect call site resolved to direct call.
- CallerNode->addCalledFunction(NewCall, CG[F]);
-
- continue;
- }
-
- CallerNode->addCalledFunction(NewCall, I->second);
- }
-
- // Update the call graph by deleting the edge from Callee to Caller. We must
- // do this after the loop above in case Caller and Callee are the same.
- CallerNode->removeCallEdgeFor(*cast<CallBase>(&CB));
-}
-
static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
Module *M, BasicBlock *InsertBlock,
- InlineFunctionInfo &IFI) {
+ InlineFunctionInfo &IFI,
+ Function *CalledFunc) {
IRBuilder<> Builder(InsertBlock, InsertBlock->begin());
Value *Size =
@@ -1546,8 +1462,15 @@ static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,
// Always generate a memcpy of alignment 1 here because we don't know
// the alignment of the src pointer. Other optimizations can infer
// better alignment.
- Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,
- /*SrcAlign*/ Align(1), Size);
+ CallInst *CI = Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,
+ /*SrcAlign*/ Align(1), Size);
+
+ // The verifier requires that all calls of debug-info-bearing functions
+ // from debug-info-bearing functions have a debug location (for inlining
+ // purposes). Assign a dummy location to satisfy the constraint.
+ if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram())
+ if (DISubprogram *SP = CalledFunc->getSubprogram())
+ CI->setDebugLoc(DILocation::get(SP->getContext(), 0, 0, SP));
}
/// When inlining a call site that has a byval argument,
@@ -1557,8 +1480,6 @@ static Value *HandleByValArgument(Type *ByValType, Value *Arg,
const Function *CalledFunc,
InlineFunctionInfo &IFI,
MaybeAlign ByValAlignment) {
- assert(cast<PointerType>(Arg->getType())
- ->isOpaqueOrPointeeTypeMatches(ByValType));
Function *Caller = TheCall->getFunction();
const DataLayout &DL = Caller->getParent()->getDataLayout();
@@ -1710,6 +1631,12 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
if (allocaWouldBeStaticInEntry(AI))
continue;
+ // Do not force a debug loc for pseudo probes, since they do not need to
+ // be debuggable, and also they are expected to have a zero/null dwarf
+ // discriminator at this point which could be violated otherwise.
+ if (isa<PseudoProbeInst>(BI))
+ continue;
+
BI->setDebugLoc(TheCallDL);
}
@@ -2242,7 +2169,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// Inject byval arguments initialization.
for (ByValInit &Init : ByValInits)
HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(),
- &*FirstNewBlock, IFI);
+ &*FirstNewBlock, IFI, CalledFunc);
std::optional<OperandBundleUse> ParentDeopt =
CB.getOperandBundle(LLVMContext::OB_deopt);
@@ -2292,10 +2219,6 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
}
}
- // Update the callgraph if requested.
- if (IFI.CG)
- UpdateCallGraphAfterInlining(CB, FirstNewBlock, VMap, IFI);
-
// For 'nodebug' functions, the associated DISubprogram is always null.
// Conservatively avoid propagating the callsite debug location to
// instructions inlined from a function whose DISubprogram is not null.
@@ -2333,7 +2256,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
for (BasicBlock &NewBlock :
make_range(FirstNewBlock->getIterator(), Caller->end()))
for (Instruction &I : NewBlock)
- if (auto *II = dyn_cast<CondGuardInst>(&I))
+ if (auto *II = dyn_cast<AssumeInst>(&I))
IFI.GetAssumptionCache(*Caller).registerAssumption(II);
}
@@ -2701,7 +2624,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// call graph updates weren't requested, as those provide value handle based
// tracking of inlined call sites instead. Calls to intrinsics are not
// collected because they are not inlineable.
- if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) {
+ if (InlinedFunctionInfo.ContainsCalls) {
// Otherwise just collect the raw call sites that were inlined.
for (BasicBlock &NewBB :
make_range(FirstNewBlock->getIterator(), Caller->end()))
@@ -2734,7 +2657,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
if (!CB.use_empty()) {
ReturnInst *R = Returns[0];
if (&CB == R->getReturnValue())
- CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
+ CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));
else
CB.replaceAllUsesWith(R->getReturnValue());
}
@@ -2846,7 +2769,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
// using the return value of the call with the computed value.
if (!CB.use_empty()) {
if (&CB == Returns[0]->getReturnValue())
- CB.replaceAllUsesWith(UndefValue::get(CB.getType()));
+ CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));
else
CB.replaceAllUsesWith(Returns[0]->getReturnValue());
}
diff --git a/llvm/lib/Transforms/Utils/InstructionNamer.cpp b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
index f3499c9c8aed..3ae570cfeb77 100644
--- a/llvm/lib/Transforms/Utils/InstructionNamer.cpp
+++ b/llvm/lib/Transforms/Utils/InstructionNamer.cpp
@@ -17,9 +17,6 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils.h"
using namespace llvm;
@@ -41,35 +38,7 @@ void nameInstructions(Function &F) {
}
}
-struct InstNamer : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- InstNamer() : FunctionPass(ID) {
- initializeInstNamerPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &Info) const override {
- Info.setPreservesAll();
- }
-
- bool runOnFunction(Function &F) override {
- nameInstructions(F);
- return true;
- }
-};
-
- char InstNamer::ID = 0;
- } // namespace
-
-INITIALIZE_PASS(InstNamer, "instnamer",
- "Assign names to anonymous instructions", false, false)
-char &llvm::InstructionNamerID = InstNamer::ID;
-//===----------------------------------------------------------------------===//
-//
-// InstructionNamer - Give any unnamed non-void instructions "tmp" names.
-//
-FunctionPass *llvm::createInstructionNamerPass() {
- return new InstNamer();
-}
+} // namespace
PreservedAnalyses InstructionNamerPass::run(Function &F,
FunctionAnalysisManager &FAM) {
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index af79dc456ea6..c36b0533580b 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -40,7 +40,6 @@
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
-#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PredIteratorCache.h"
@@ -77,15 +76,14 @@ static bool isExitBlock(BasicBlock *BB,
/// rewrite the uses.
bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
const DominatorTree &DT, const LoopInfo &LI,
- ScalarEvolution *SE, IRBuilderBase &Builder,
- SmallVectorImpl<PHINode *> *PHIsToRemove) {
+ ScalarEvolution *SE,
+ SmallVectorImpl<PHINode *> *PHIsToRemove,
+ SmallVectorImpl<PHINode *> *InsertedPHIs) {
SmallVector<Use *, 16> UsesToRewrite;
SmallSetVector<PHINode *, 16> LocalPHIsToRemove;
PredIteratorCache PredCache;
bool Changed = false;
- IRBuilderBase::InsertPointGuard InsertPtGuard(Builder);
-
// Cache the Loop ExitBlocks across this loop. We expect to get a lot of
// instructions within the same loops, computing the exit blocks is
// expensive, and we're not mutating the loop structure.
@@ -146,17 +144,14 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
SmallVector<PHINode *, 16> AddedPHIs;
SmallVector<PHINode *, 8> PostProcessPHIs;
- SmallVector<PHINode *, 4> InsertedPHIs;
- SSAUpdater SSAUpdate(&InsertedPHIs);
+ SmallVector<PHINode *, 4> LocalInsertedPHIs;
+ SSAUpdater SSAUpdate(&LocalInsertedPHIs);
SSAUpdate.Initialize(I->getType(), I->getName());
- // Force re-computation of I, as some users now need to use the new PHI
- // node.
- if (SE)
- SE->forgetValue(I);
-
// Insert the LCSSA phi's into all of the exit blocks dominated by the
// value, and add them to the Phi's map.
+ bool HasSCEV = SE && SE->isSCEVable(I->getType()) &&
+ SE->getExistingSCEV(I) != nullptr;
for (BasicBlock *ExitBB : ExitBlocks) {
if (!DT.dominates(DomNode, DT.getNode(ExitBB)))
continue;
@@ -164,9 +159,10 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// If we already inserted something for this BB, don't reprocess it.
if (SSAUpdate.HasValueForBlock(ExitBB))
continue;
- Builder.SetInsertPoint(&ExitBB->front());
- PHINode *PN = Builder.CreatePHI(I->getType(), PredCache.size(ExitBB),
- I->getName() + ".lcssa");
+ PHINode *PN = PHINode::Create(I->getType(), PredCache.size(ExitBB),
+ I->getName() + ".lcssa", &ExitBB->front());
+ if (InsertedPHIs)
+ InsertedPHIs->push_back(PN);
// Get the debug location from the original instruction.
PN->setDebugLoc(I->getDebugLoc());
@@ -203,6 +199,13 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
if (auto *OtherLoop = LI.getLoopFor(ExitBB))
if (!L->contains(OtherLoop))
PostProcessPHIs.push_back(PN);
+
+ // If we have a cached SCEV for the original instruction, make sure the
+ // new LCSSA phi node is also cached. This makes sures that BECounts
+ // based on it will be invalidated when the LCSSA phi node is invalidated,
+ // which some passes rely on.
+ if (HasSCEV)
+ SE->getSCEV(PN);
}
// Rewrite all uses outside the loop in terms of the new PHIs we just
@@ -256,10 +259,12 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
// SSAUpdater might have inserted phi-nodes inside other loops. We'll need
// to post-process them to keep LCSSA form.
- for (PHINode *InsertedPN : InsertedPHIs) {
+ for (PHINode *InsertedPN : LocalInsertedPHIs) {
if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
if (!L->contains(OtherLoop))
PostProcessPHIs.push_back(InsertedPN);
+ if (InsertedPHIs)
+ InsertedPHIs->push_back(InsertedPN);
}
// Post process PHI instructions that were inserted into another disjoint
@@ -392,14 +397,7 @@ bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
}
}
- IRBuilder<> Builder(L.getHeader()->getContext());
- Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE, Builder);
-
- // If we modified the code, remove any caches about the loop from SCEV to
- // avoid dangling entries.
- // FIXME: This is a big hammer, can we clear the cache more selectively?
- if (SE && Changed)
- SE->forgetLoop(&L);
+ Changed = formLCSSAForInstructions(Worklist, DT, *LI, SE);
assert(L.isLCSSAForm(DT));
diff --git a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
index 5dd469c7af4b..cdcfb5050bff 100644
--- a/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
+++ b/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -28,6 +28,7 @@
#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/GlobalsModRef.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/Constants.h"
@@ -37,8 +38,6 @@
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/MDBuilder.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <cmath>
@@ -51,31 +50,10 @@ STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted");
STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted");
namespace {
-class LibCallsShrinkWrapLegacyPass : public FunctionPass {
-public:
- static char ID; // Pass identification, replacement for typeid
- explicit LibCallsShrinkWrapLegacyPass() : FunctionPass(ID) {
- initializeLibCallsShrinkWrapLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override;
- bool runOnFunction(Function &F) override;
-};
-}
-
-char LibCallsShrinkWrapLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",
- "Conditionally eliminate dead library calls", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",
- "Conditionally eliminate dead library calls", false, false)
-
-namespace {
class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> {
public:
- LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT)
- : TLI(TLI), DT(DT){};
+ LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DomTreeUpdater &DTU)
+ : TLI(TLI), DTU(DTU){};
void visitCallInst(CallInst &CI) { checkCandidate(CI); }
bool perform() {
bool Changed = false;
@@ -101,14 +79,21 @@ private:
Value *generateTwoRangeCond(CallInst *CI, const LibFunc &Func);
Value *generateCondForPow(CallInst *CI, const LibFunc &Func);
+ // Create an OR of two conditions with given Arg and Arg2.
+ Value *createOrCond(CallInst *CI, Value *Arg, CmpInst::Predicate Cmp,
+ float Val, Value *Arg2, CmpInst::Predicate Cmp2,
+ float Val2) {
+ IRBuilder<> BBBuilder(CI);
+ auto Cond2 = createCond(BBBuilder, Arg2, Cmp2, Val2);
+ auto Cond1 = createCond(BBBuilder, Arg, Cmp, Val);
+ return BBBuilder.CreateOr(Cond1, Cond2);
+ }
+
// Create an OR of two conditions.
Value *createOrCond(CallInst *CI, CmpInst::Predicate Cmp, float Val,
CmpInst::Predicate Cmp2, float Val2) {
- IRBuilder<> BBBuilder(CI);
Value *Arg = CI->getArgOperand(0);
- auto Cond2 = createCond(BBBuilder, Arg, Cmp2, Val2);
- auto Cond1 = createCond(BBBuilder, Arg, Cmp, Val);
- return BBBuilder.CreateOr(Cond1, Cond2);
+ return createOrCond(CI, Arg, Cmp, Val, Arg, Cmp2, Val2);
}
// Create a single condition using IRBuilder.
@@ -117,18 +102,26 @@ private:
Constant *V = ConstantFP::get(BBBuilder.getContext(), APFloat(Val));
if (!Arg->getType()->isFloatTy())
V = ConstantExpr::getFPExtend(V, Arg->getType());
+ if (BBBuilder.GetInsertBlock()->getParent()->hasFnAttribute(Attribute::StrictFP))
+ BBBuilder.setIsFPConstrained(true);
return BBBuilder.CreateFCmp(Cmp, Arg, V);
}
+ // Create a single condition with given Arg.
+ Value *createCond(CallInst *CI, Value *Arg, CmpInst::Predicate Cmp,
+ float Val) {
+ IRBuilder<> BBBuilder(CI);
+ return createCond(BBBuilder, Arg, Cmp, Val);
+ }
+
// Create a single condition.
Value *createCond(CallInst *CI, CmpInst::Predicate Cmp, float Val) {
- IRBuilder<> BBBuilder(CI);
Value *Arg = CI->getArgOperand(0);
- return createCond(BBBuilder, Arg, Cmp, Val);
+ return createCond(CI, Arg, Cmp, Val);
}
const TargetLibraryInfo &TLI;
- DominatorTree *DT;
+ DomTreeUpdater &DTU;
SmallVector<CallInst *, 16> WorkList;
};
} // end anonymous namespace
@@ -428,7 +421,6 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
Value *Base = CI->getArgOperand(0);
Value *Exp = CI->getArgOperand(1);
- IRBuilder<> BBBuilder(CI);
// Constant Base case.
if (ConstantFP *CF = dyn_cast<ConstantFP>(Base)) {
@@ -439,10 +431,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
}
++NumWrappedOneCond;
- Constant *V = ConstantFP::get(CI->getContext(), APFloat(127.0f));
- if (!Exp->getType()->isFloatTy())
- V = ConstantExpr::getFPExtend(V, Exp->getType());
- return BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V);
+ return createCond(CI, Exp, CmpInst::FCMP_OGT, 127.0f);
}
// If the Base value coming from an integer type.
@@ -467,16 +456,8 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
}
++NumWrappedTwoCond;
- Constant *V = ConstantFP::get(CI->getContext(), APFloat(UpperV));
- Constant *V0 = ConstantFP::get(CI->getContext(), APFloat(0.0f));
- if (!Exp->getType()->isFloatTy())
- V = ConstantExpr::getFPExtend(V, Exp->getType());
- if (!Base->getType()->isFloatTy())
- V0 = ConstantExpr::getFPExtend(V0, Exp->getType());
-
- Value *Cond = BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V);
- Value *Cond0 = BBBuilder.CreateFCmp(CmpInst::FCMP_OLE, Base, V0);
- return BBBuilder.CreateOr(Cond0, Cond);
+ return createOrCond(CI, Base, CmpInst::FCMP_OLE, 0.0f, Exp,
+ CmpInst::FCMP_OGT, UpperV);
}
LLVM_DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n");
return nullptr;
@@ -489,7 +470,7 @@ void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
MDBuilder(CI->getContext()).createBranchWeights(1, 2000);
Instruction *NewInst =
- SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT);
+ SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, &DTU);
BasicBlock *CallBB = NewInst->getParent();
CallBB->setName("cdce.call");
BasicBlock *SuccBB = CallBB->getSingleSuccessor();
@@ -515,40 +496,21 @@ bool LibCallsShrinkWrap::perform(CallInst *CI) {
return performCallErrors(CI, Func);
}
-void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addRequired<TargetLibraryInfoWrapperPass>();
-}
-
static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
DominatorTree *DT) {
if (F.hasFnAttribute(Attribute::OptimizeForSize))
return false;
- LibCallsShrinkWrap CCDCE(TLI, DT);
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
+ LibCallsShrinkWrap CCDCE(TLI, DTU);
CCDCE.visit(F);
bool Changed = CCDCE.perform();
-// Verify the dominator after we've updated it locally.
- assert(!DT || DT->verify(DominatorTree::VerificationLevel::Fast));
+ // Verify the dominator after we've updated it locally.
+ assert(!DT ||
+ DTU.getDomTree().verify(DominatorTree::VerificationLevel::Fast));
return Changed;
}
-bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) {
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
- auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
- return runImpl(F, TLI, DT);
-}
-
-namespace llvm {
-char &LibCallsShrinkWrapPassID = LibCallsShrinkWrapLegacyPass::ID;
-
-// Public interface to LibCallsShrinkWrap pass.
-FunctionPass *createLibCallsShrinkWrapPass() {
- return new LibCallsShrinkWrapLegacyPass();
-}
-
PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F,
FunctionAnalysisManager &FAM) {
auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
@@ -559,4 +521,3 @@ PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F,
PA.preserve<DominatorTreeAnalysis>();
return PA;
}
-}
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 31cdd2ee56b9..f153ace5d3fc 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -25,7 +25,6 @@
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemorySSAUpdater.h"
@@ -47,6 +46,7 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/EHPersonalities.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/GlobalObject.h"
@@ -201,16 +201,16 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
bool Changed = false;
// Figure out which case it goes to.
- for (auto i = SI->case_begin(), e = SI->case_end(); i != e;) {
+ for (auto It = SI->case_begin(), End = SI->case_end(); It != End;) {
// Found case matching a constant operand?
- if (i->getCaseValue() == CI) {
- TheOnlyDest = i->getCaseSuccessor();
+ if (It->getCaseValue() == CI) {
+ TheOnlyDest = It->getCaseSuccessor();
break;
}
// Check to see if this branch is going to the same place as the default
// dest. If so, eliminate it as an explicit compare.
- if (i->getCaseSuccessor() == DefaultDest) {
+ if (It->getCaseSuccessor() == DefaultDest) {
MDNode *MD = getValidBranchWeightMDNode(*SI);
unsigned NCases = SI->getNumCases();
// Fold the case metadata into the default if there will be any branches
@@ -221,11 +221,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
extractBranchWeights(MD, Weights);
// Merge weight of this case to the default weight.
- unsigned idx = i->getCaseIndex();
+ unsigned Idx = It->getCaseIndex();
// TODO: Add overflow check.
- Weights[0] += Weights[idx+1];
+ Weights[0] += Weights[Idx + 1];
// Remove weight for this case.
- std::swap(Weights[idx+1], Weights.back());
+ std::swap(Weights[Idx + 1], Weights.back());
Weights.pop_back();
SI->setMetadata(LLVMContext::MD_prof,
MDBuilder(BB->getContext()).
@@ -234,14 +234,14 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Remove this entry.
BasicBlock *ParentBB = SI->getParent();
DefaultDest->removePredecessor(ParentBB);
- i = SI->removeCase(i);
- e = SI->case_end();
+ It = SI->removeCase(It);
+ End = SI->case_end();
// Removing this case may have made the condition constant. In that
// case, update CI and restart iteration through the cases.
if (auto *NewCI = dyn_cast<ConstantInt>(SI->getCondition())) {
CI = NewCI;
- i = SI->case_begin();
+ It = SI->case_begin();
}
Changed = true;
@@ -251,11 +251,11 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,
// Otherwise, check to see if the switch only branches to one destination.
// We do this by reseting "TheOnlyDest" to null when we find two non-equal
// destinations.
- if (i->getCaseSuccessor() != TheOnlyDest)
+ if (It->getCaseSuccessor() != TheOnlyDest)
TheOnlyDest = nullptr;
// Increment this iterator as we haven't removed the case.
- ++i;
+ ++It;
}
if (CI && !TheOnlyDest) {
@@ -424,18 +424,10 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
if (I->isEHPad())
return false;
- // We don't want debug info removed by anything this general, unless
- // debug info is empty.
- if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
- if (DDI->getAddress())
- return false;
- return true;
- }
- if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
- if (DVI->hasArgList() || DVI->getValue(0))
- return false;
- return true;
- }
+ // We don't want debug info removed by anything this general.
+ if (isa<DbgVariableIntrinsic>(I))
+ return false;
+
if (DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) {
if (DLI->getLabel())
return false;
@@ -555,7 +547,7 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive(
std::function<void(Value *)> AboutToDeleteCallback) {
unsigned S = 0, E = DeadInsts.size(), Alive = 0;
for (; S != E; ++S) {
- auto *I = dyn_cast<Instruction>(DeadInsts[S]);
+ auto *I = dyn_cast_or_null<Instruction>(DeadInsts[S]);
if (!I || !isInstructionTriviallyDead(I)) {
DeadInsts[S] = nullptr;
++Alive;
@@ -1231,12 +1223,10 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
// If the unconditional branch we replaced contains llvm.loop metadata, we
// add the metadata to the branch instructions in the predecessors.
- unsigned LoopMDKind = BB->getContext().getMDKindID("llvm.loop");
- Instruction *TI = BB->getTerminator();
- if (TI)
- if (MDNode *LoopMD = TI->getMetadata(LoopMDKind))
+ if (Instruction *TI = BB->getTerminator())
+ if (MDNode *LoopMD = TI->getMetadata(LLVMContext::MD_loop))
for (BasicBlock *Pred : predecessors(BB))
- Pred->getTerminator()->setMetadata(LoopMDKind, LoopMD);
+ Pred->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopMD);
// Everything that jumped to BB now goes to Succ.
BB->replaceAllUsesWith(Succ);
@@ -1423,6 +1413,12 @@ static Align tryEnforceAlignment(Value *V, Align PrefAlign,
if (!GO->canIncreaseAlignment())
return CurrentAlign;
+ if (GO->isThreadLocal()) {
+ unsigned MaxTLSAlign = GO->getParent()->getMaxTLSAlignment() / CHAR_BIT;
+ if (MaxTLSAlign && PrefAlign > Align(MaxTLSAlign))
+ PrefAlign = Align(MaxTLSAlign);
+ }
+
GO->setAlignment(PrefAlign);
return PrefAlign;
}
@@ -1480,19 +1476,16 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,
/// (or fragment of the variable) described by \p DII.
///
/// This is primarily intended as a helper for the different
-/// ConvertDebugDeclareToDebugValue functions. The dbg.declare/dbg.addr that is
-/// converted describes an alloca'd variable, so we need to use the
-/// alloc size of the value when doing the comparison. E.g. an i1 value will be
-/// identified as covering an n-bit fragment, if the store size of i1 is at
-/// least n bits.
+/// ConvertDebugDeclareToDebugValue functions. The dbg.declare that is converted
+/// describes an alloca'd variable, so we need to use the alloc size of the
+/// value when doing the comparison. E.g. an i1 value will be identified as
+/// covering an n-bit fragment, if the store size of i1 is at least n bits.
static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
const DataLayout &DL = DII->getModule()->getDataLayout();
TypeSize ValueSize = DL.getTypeAllocSizeInBits(ValTy);
- if (std::optional<uint64_t> FragmentSize = DII->getFragmentSizeInBits()) {
- assert(!ValueSize.isScalable() &&
- "Fragments don't work on scalable types.");
- return ValueSize.getFixedValue() >= *FragmentSize;
- }
+ if (std::optional<uint64_t> FragmentSize = DII->getFragmentSizeInBits())
+ return TypeSize::isKnownGE(ValueSize, TypeSize::getFixed(*FragmentSize));
+
// We can't always calculate the size of the DI variable (e.g. if it is a
// VLA). Try to use the size of the alloca that the dbg intrinsic describes
// intead.
@@ -1513,7 +1506,7 @@ static bool valueCoversEntireFragment(Type *ValTy, DbgVariableIntrinsic *DII) {
}
/// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
-/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
+/// that has an associated llvm.dbg.declare intrinsic.
void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
StoreInst *SI, DIBuilder &Builder) {
assert(DII->isAddressOfVariable() || isa<DbgAssignIntrinsic>(DII));
@@ -1524,24 +1517,39 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
DebugLoc NewLoc = getDebugValueLoc(DII);
- if (!valueCoversEntireFragment(DV->getType(), DII)) {
- // FIXME: If storing to a part of the variable described by the dbg.declare,
- // then we want to insert a dbg.value for the corresponding fragment.
- LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: "
- << *DII << '\n');
- // For now, when there is a store to parts of the variable (but we do not
- // know which part) we insert an dbg.value intrinsic to indicate that we
- // know nothing about the variable's content.
- DV = UndefValue::get(DV->getType());
+ // If the alloca describes the variable itself, i.e. the expression in the
+ // dbg.declare doesn't start with a dereference, we can perform the
+ // conversion if the value covers the entire fragment of DII.
+ // If the alloca describes the *address* of DIVar, i.e. DIExpr is
+ // *just* a DW_OP_deref, we use DV as is for the dbg.value.
+ // We conservatively ignore other dereferences, because the following two are
+ // not equivalent:
+ // dbg.declare(alloca, ..., !Expr(deref, plus_uconstant, 2))
+ // dbg.value(DV, ..., !Expr(deref, plus_uconstant, 2))
+ // The former is adding 2 to the address of the variable, whereas the latter
+ // is adding 2 to the value of the variable. As such, we insist on just a
+ // deref expression.
+ bool CanConvert =
+ DIExpr->isDeref() || (!DIExpr->startsWithDeref() &&
+ valueCoversEntireFragment(DV->getType(), DII));
+ if (CanConvert) {
Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
return;
}
+ // FIXME: If storing to a part of the variable described by the dbg.declare,
+ // then we want to insert a dbg.value for the corresponding fragment.
+ LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " << *DII
+ << '\n');
+ // For now, when there is a store to parts of the variable (but we do not
+ // know which part) we insert an dbg.value intrinsic to indicate that we
+ // know nothing about the variable's content.
+ DV = UndefValue::get(DV->getType());
Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, NewLoc, SI);
}
/// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
-/// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.
+/// that has an associated llvm.dbg.declare intrinsic.
void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
LoadInst *LI, DIBuilder &Builder) {
auto *DIVar = DII->getVariable();
@@ -1569,7 +1577,7 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
}
/// Inserts a llvm.dbg.value intrinsic after a phi that has an associated
-/// llvm.dbg.declare or llvm.dbg.addr intrinsic.
+/// llvm.dbg.declare intrinsic.
void llvm::ConvertDebugDeclareToDebugValue(DbgVariableIntrinsic *DII,
PHINode *APN, DIBuilder &Builder) {
auto *DIVar = DII->getVariable();
@@ -1752,8 +1760,8 @@ void llvm::insertDebugValuesForPHIs(BasicBlock *BB,
bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
DIBuilder &Builder, uint8_t DIExprFlags,
int Offset) {
- auto DbgAddrs = FindDbgAddrUses(Address);
- for (DbgVariableIntrinsic *DII : DbgAddrs) {
+ auto DbgDeclares = FindDbgDeclareUses(Address);
+ for (DbgVariableIntrinsic *DII : DbgDeclares) {
const DebugLoc &Loc = DII->getDebugLoc();
auto *DIVar = DII->getVariable();
auto *DIExpr = DII->getExpression();
@@ -1764,7 +1772,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,
Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, DII);
DII->eraseFromParent();
}
- return !DbgAddrs.empty();
+ return !DbgDeclares.empty();
}
static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,
@@ -1860,9 +1868,8 @@ void llvm::salvageDebugInfoForDbgValues(
continue;
}
- // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they
- // are implicitly pointing out the value as a DWARF memory location
- // description.
+ // Do not add DW_OP_stack_value for DbgDeclare, because they are implicitly
+ // pointing out the value as a DWARF memory location description.
bool StackValue = isa<DbgValueInst>(DII);
auto DIILocation = DII->location_ops();
assert(
@@ -1896,17 +1903,14 @@ void llvm::salvageDebugInfoForDbgValues(
bool IsValidSalvageExpr = SalvagedExpr->getNumElements() <= MaxExpressionSize;
if (AdditionalValues.empty() && IsValidSalvageExpr) {
DII->setExpression(SalvagedExpr);
- } else if (isa<DbgValueInst>(DII) && !isa<DbgAssignIntrinsic>(DII) &&
- IsValidSalvageExpr &&
+ } else if (isa<DbgValueInst>(DII) && IsValidSalvageExpr &&
DII->getNumVariableLocationOps() + AdditionalValues.size() <=
MaxDebugArgs) {
DII->addVariableLocationOps(AdditionalValues, SalvagedExpr);
} else {
- // Do not salvage using DIArgList for dbg.addr/dbg.declare, as it is
- // not currently supported in those instructions. Do not salvage using
- // DIArgList for dbg.assign yet. FIXME: support this.
- // Also do not salvage if the resulting DIArgList would contain an
- // unreasonably large number of values.
+ // Do not salvage using DIArgList for dbg.declare, as it is not currently
+ // supported in those instructions. Also do not salvage if the resulting
+ // DIArgList would contain an unreasonably large number of values.
DII->setKillLocation();
}
LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');
@@ -1934,7 +1938,7 @@ Value *getSalvageOpsForGEP(GetElementPtrInst *GEP, const DataLayout &DL,
Opcodes.insert(Opcodes.begin(), {dwarf::DW_OP_LLVM_arg, 0});
CurrentLocOps = 1;
}
- for (auto Offset : VariableOffsets) {
+ for (const auto &Offset : VariableOffsets) {
AdditionalValues.push_back(Offset.first);
assert(Offset.second.isStrictlyPositive() &&
"Expected strictly positive multiplier for offset.");
@@ -1976,6 +1980,18 @@ uint64_t getDwarfOpForBinOp(Instruction::BinaryOps Opcode) {
}
}
+static void handleSSAValueOperands(uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Opcodes,
+ SmallVectorImpl<Value *> &AdditionalValues,
+ Instruction *I) {
+ if (!CurrentLocOps) {
+ Opcodes.append({dwarf::DW_OP_LLVM_arg, 0});
+ CurrentLocOps = 1;
+ }
+ Opcodes.append({dwarf::DW_OP_LLVM_arg, CurrentLocOps});
+ AdditionalValues.push_back(I->getOperand(1));
+}
+
Value *getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps,
SmallVectorImpl<uint64_t> &Opcodes,
SmallVectorImpl<Value *> &AdditionalValues) {
@@ -1998,12 +2014,7 @@ Value *getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps,
}
Opcodes.append({dwarf::DW_OP_constu, Val});
} else {
- if (!CurrentLocOps) {
- Opcodes.append({dwarf::DW_OP_LLVM_arg, 0});
- CurrentLocOps = 1;
- }
- Opcodes.append({dwarf::DW_OP_LLVM_arg, CurrentLocOps});
- AdditionalValues.push_back(BI->getOperand(1));
+ handleSSAValueOperands(CurrentLocOps, Opcodes, AdditionalValues, BI);
}
// Add salvaged binary operator to expression stack, if it has a valid
@@ -2015,6 +2026,60 @@ Value *getSalvageOpsForBinOp(BinaryOperator *BI, uint64_t CurrentLocOps,
return BI->getOperand(0);
}
+uint64_t getDwarfOpForIcmpPred(CmpInst::Predicate Pred) {
+ // The signedness of the operation is implicit in the typed stack, signed and
+ // unsigned instructions map to the same DWARF opcode.
+ switch (Pred) {
+ case CmpInst::ICMP_EQ:
+ return dwarf::DW_OP_eq;
+ case CmpInst::ICMP_NE:
+ return dwarf::DW_OP_ne;
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_SGT:
+ return dwarf::DW_OP_gt;
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_SGE:
+ return dwarf::DW_OP_ge;
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_SLT:
+ return dwarf::DW_OP_lt;
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SLE:
+ return dwarf::DW_OP_le;
+ default:
+ return 0;
+ }
+}
+
+Value *getSalvageOpsForIcmpOp(ICmpInst *Icmp, uint64_t CurrentLocOps,
+ SmallVectorImpl<uint64_t> &Opcodes,
+ SmallVectorImpl<Value *> &AdditionalValues) {
+ // Handle icmp operations with constant integer operands as a special case.
+ auto *ConstInt = dyn_cast<ConstantInt>(Icmp->getOperand(1));
+ // Values wider than 64 bits cannot be represented within a DIExpression.
+ if (ConstInt && ConstInt->getBitWidth() > 64)
+ return nullptr;
+ // Push any Constant Int operand onto the expression stack.
+ if (ConstInt) {
+ if (Icmp->isSigned())
+ Opcodes.push_back(dwarf::DW_OP_consts);
+ else
+ Opcodes.push_back(dwarf::DW_OP_constu);
+ uint64_t Val = ConstInt->getSExtValue();
+ Opcodes.push_back(Val);
+ } else {
+ handleSSAValueOperands(CurrentLocOps, Opcodes, AdditionalValues, Icmp);
+ }
+
+ // Add salvaged binary operator to expression stack, if it has a valid
+ // representation in a DIExpression.
+ uint64_t DwarfIcmpOp = getDwarfOpForIcmpPred(Icmp->getPredicate());
+ if (!DwarfIcmpOp)
+ return nullptr;
+ Opcodes.push_back(DwarfIcmpOp);
+ return Icmp->getOperand(0);
+}
+
Value *llvm::salvageDebugInfoImpl(Instruction &I, uint64_t CurrentLocOps,
SmallVectorImpl<uint64_t> &Ops,
SmallVectorImpl<Value *> &AdditionalValues) {
@@ -2054,6 +2119,8 @@ Value *llvm::salvageDebugInfoImpl(Instruction &I, uint64_t CurrentLocOps,
return getSalvageOpsForGEP(GEP, DL, CurrentLocOps, Ops, AdditionalValues);
if (auto *BI = dyn_cast<BinaryOperator>(&I))
return getSalvageOpsForBinOp(BI, CurrentLocOps, Ops, AdditionalValues);
+ if (auto *IC = dyn_cast<ICmpInst>(&I))
+ return getSalvageOpsForIcmpOp(IC, CurrentLocOps, Ops, AdditionalValues);
// *Not* to do: we should not attempt to salvage load instructions,
// because the validity and lifetime of a dbg.value containing
@@ -2661,43 +2728,52 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
intersectAccessGroups(K, J));
break;
case LLVMContext::MD_range:
-
- // If K does move, use most generic range. Otherwise keep the range of
- // K.
- if (DoesKMove)
- // FIXME: If K does move, we should drop the range info and nonnull.
- // Currently this function is used with DoesKMove in passes
- // doing hoisting/sinking and the current behavior of using the
- // most generic range is correct in those cases.
+ if (DoesKMove || !K->hasMetadata(LLVMContext::MD_noundef))
K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD));
break;
case LLVMContext::MD_fpmath:
K->setMetadata(Kind, MDNode::getMostGenericFPMath(JMD, KMD));
break;
case LLVMContext::MD_invariant_load:
- // Only set the !invariant.load if it is present in both instructions.
- K->setMetadata(Kind, JMD);
+ // If K moves, only set the !invariant.load if it is present in both
+ // instructions.
+ if (DoesKMove)
+ K->setMetadata(Kind, JMD);
break;
case LLVMContext::MD_nonnull:
- // If K does move, keep nonull if it is present in both instructions.
- if (DoesKMove)
+ if (DoesKMove || !K->hasMetadata(LLVMContext::MD_noundef))
K->setMetadata(Kind, JMD);
break;
case LLVMContext::MD_invariant_group:
// Preserve !invariant.group in K.
break;
case LLVMContext::MD_align:
- K->setMetadata(Kind,
- MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+ if (DoesKMove || !K->hasMetadata(LLVMContext::MD_noundef))
+ K->setMetadata(
+ Kind, MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
case LLVMContext::MD_dereferenceable:
case LLVMContext::MD_dereferenceable_or_null:
- K->setMetadata(Kind,
- MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
+ if (DoesKMove)
+ K->setMetadata(Kind,
+ MDNode::getMostGenericAlignmentOrDereferenceable(JMD, KMD));
break;
case LLVMContext::MD_preserve_access_index:
// Preserve !preserve.access.index in K.
break;
+ case LLVMContext::MD_noundef:
+ // If K does move, keep noundef if it is present in both instructions.
+ if (DoesKMove)
+ K->setMetadata(Kind, JMD);
+ break;
+ case LLVMContext::MD_nontemporal:
+ // Preserve !nontemporal if it is present on both instructions.
+ K->setMetadata(Kind, JMD);
+ break;
+ case LLVMContext::MD_prof:
+ if (DoesKMove)
+ K->setMetadata(Kind, MDNode::getMergedProfMetadata(KMD, JMD, K, J));
+ break;
}
}
// Set !invariant.group from J if J has it. If both instructions have it
@@ -2713,14 +2789,22 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
bool KDominatesJ) {
- unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_range,
- LLVMContext::MD_invariant_load, LLVMContext::MD_nonnull,
- LLVMContext::MD_invariant_group, LLVMContext::MD_align,
- LLVMContext::MD_dereferenceable,
- LLVMContext::MD_dereferenceable_or_null,
- LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index};
+ unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
+ LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_range,
+ LLVMContext::MD_fpmath,
+ LLVMContext::MD_invariant_load,
+ LLVMContext::MD_nonnull,
+ LLVMContext::MD_invariant_group,
+ LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null,
+ LLVMContext::MD_access_group,
+ LLVMContext::MD_preserve_access_index,
+ LLVMContext::MD_prof,
+ LLVMContext::MD_nontemporal,
+ LLVMContext::MD_noundef};
combineMetadata(K, J, KnownIDs, KDominatesJ);
}
@@ -2799,13 +2883,7 @@ void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) {
// In general, GVN unifies expressions over different control-flow
// regions, and so we need a conservative combination of the noalias
// scopes.
- static const unsigned KnownIDs[] = {
- LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias, LLVMContext::MD_range,
- LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load,
- LLVMContext::MD_invariant_group, LLVMContext::MD_nonnull,
- LLVMContext::MD_access_group, LLVMContext::MD_preserve_access_index};
- combineMetadata(ReplInst, I, KnownIDs, false);
+ combineMetadataForCSE(ReplInst, I, false);
}
template <typename RootType, typename DominatesFn>
@@ -2930,7 +3008,8 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,
return;
unsigned BitWidth = DL.getPointerTypeSizeInBits(NewTy);
- if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
+ if (BitWidth == OldLI.getType()->getScalarSizeInBits() &&
+ !getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
MDNode *NN = MDNode::get(OldLI.getContext(), std::nullopt);
NewLI.setMetadata(LLVMContext::MD_nonnull, NN);
}
@@ -2969,7 +3048,7 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt,
for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
Instruction *I = &*II;
- I->dropUndefImplyingAttrsAndUnknownMetadata();
+ I->dropUBImplyingAttrsAndMetadata();
if (I->isUsedByMetadata())
dropDebugUsers(*I);
if (I->isDebugOrPseudoInst()) {
@@ -3125,7 +3204,7 @@ collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals,
// Check that the mask allows a multiple of 8 bits for a bswap, for an
// early exit.
- unsigned NumMaskedBits = AndMask.countPopulation();
+ unsigned NumMaskedBits = AndMask.popcount();
if (!MatchBitReversals && (NumMaskedBits % 8) != 0)
return Result;
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 2acbe9002309..d701cf110154 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -345,20 +345,20 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
unsigned DesiredPeelCount = 0;
- for (auto *BB : L.blocks()) {
- auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
- if (!BI || BI->isUnconditional())
- continue;
-
- // Ignore loop exit condition.
- if (L.getLoopLatch() == BB)
- continue;
+ // Do not peel the entire loop.
+ const SCEV *BE = SE.getConstantMaxBackedgeTakenCount(&L);
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(BE))
+ MaxPeelCount =
+ std::min((unsigned)SC->getAPInt().getLimitedValue() - 1, MaxPeelCount);
+
+ auto ComputePeelCount = [&](Value *Condition) -> void {
+ if (!Condition->getType()->isIntegerTy())
+ return;
- Value *Condition = BI->getCondition();
Value *LeftVal, *RightVal;
CmpInst::Predicate Pred;
if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
- continue;
+ return;
const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
const SCEV *RightSCEV = SE.getSCEV(RightVal);
@@ -366,7 +366,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// Do not consider predicates that are known to be true or false
// independently of the loop iteration.
if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
- continue;
+ return;
// Check if we have a condition with one AddRec and one non AddRec
// expression. Normalize LeftSCEV to be the AddRec.
@@ -375,7 +375,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
std::swap(LeftSCEV, RightSCEV);
Pred = ICmpInst::getSwappedPredicate(Pred);
} else
- continue;
+ return;
}
const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
@@ -383,10 +383,10 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// Avoid huge SCEV computations in the loop below, make sure we only
// consider AddRecs of the loop we are trying to peel.
if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
- continue;
+ return;
if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
!SE.getMonotonicPredicateType(LeftAR, Pred))
- continue;
+ return;
// Check if extending the current DesiredPeelCount lets us evaluate Pred
// or !Pred in the loop body statically.
@@ -422,7 +422,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
// first iteration of the loop body after peeling?
if (!SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal,
RightSCEV))
- continue; // If not, give up.
+ return; // If not, give up.
// However, for equality comparisons, that isn't always sufficient to
// eliminate the comparsion in loop body, we may need to peel one more
@@ -433,11 +433,28 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
!SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
if (!CanPeelOneMoreIteration())
- continue; // Need to peel one more iteration, but can't. Give up.
+ return; // Need to peel one more iteration, but can't. Give up.
PeelOneMoreIteration(); // Great!
}
DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
+ };
+
+ for (BasicBlock *BB : L.blocks()) {
+ for (Instruction &I : *BB) {
+ if (SelectInst *SI = dyn_cast<SelectInst>(&I))
+ ComputePeelCount(SI->getCondition());
+ }
+
+ auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || BI->isUnconditional())
+ continue;
+
+ // Ignore loop exit condition.
+ if (L.getLoopLatch() == BB)
+ continue;
+
+ ComputePeelCount(BI->getCondition());
}
return DesiredPeelCount;
@@ -1025,6 +1042,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
// We modified the loop, update SE.
SE->forgetTopmostLoop(L);
+ SE->forgetBlockAndLoopDispositions();
#ifdef EXPENSIVE_CHECKS
// Finally DomtTree must be correct.
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 1a9eaf242190..d81db5647c60 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -435,6 +435,8 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Otherwise, create a duplicate of the instruction.
Instruction *C = Inst->clone();
+ C->insertBefore(LoopEntryBranch);
+
++NumInstrsDuplicated;
// Eagerly remap the operands of the instruction.
@@ -444,7 +446,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Avoid inserting the same intrinsic twice.
if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C))
if (DbgIntrinsics.count(makeHash(DII))) {
- C->deleteValue();
+ C->eraseFromParent();
continue;
}
@@ -457,7 +459,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// in the map.
InsertNewValueIntoMap(ValueMap, Inst, V);
if (!C->mayHaveSideEffects()) {
- C->deleteValue();
+ C->eraseFromParent();
C = nullptr;
}
} else {
@@ -466,7 +468,6 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
if (C) {
// Otherwise, stick the new instruction into the new block!
C->setName(Inst->getName());
- C->insertBefore(LoopEntryBranch);
if (auto *II = dyn_cast<AssumeInst>(C))
AC->registerAssumption(II);
diff --git a/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 87a0e54e2704..3e604fdf2e11 100644
--- a/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -448,16 +448,15 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
// backedge blocks to jump to the BEBlock instead of the header.
// If one of the backedges has llvm.loop metadata attached, we remove
// it from the backedge and add it to BEBlock.
- unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop");
MDNode *LoopMD = nullptr;
for (BasicBlock *BB : BackedgeBlocks) {
Instruction *TI = BB->getTerminator();
if (!LoopMD)
- LoopMD = TI->getMetadata(LoopMDKind);
- TI->setMetadata(LoopMDKind, nullptr);
+ LoopMD = TI->getMetadata(LLVMContext::MD_loop);
+ TI->setMetadata(LLVMContext::MD_loop, nullptr);
TI->replaceSuccessorWith(Header, BEBlock);
}
- BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD);
+ BEBlock->getTerminator()->setMetadata(LLVMContext::MD_loop, LoopMD);
//===--- Update all analyses which we must preserve now -----------------===//
@@ -693,12 +692,6 @@ ReprocessLoop:
}
}
- // Changing exit conditions for blocks may affect exit counts of this loop and
- // any of its paretns, so we must invalidate the entire subtree if we've made
- // any changes.
- if (Changed && SE)
- SE->forgetTopmostLoop(L);
-
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
@@ -737,6 +730,13 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
AC, MSSAU, PreserveLCSSA);
+ // Changing exit conditions for blocks may affect exit counts of this loop and
+ // any of its parents, so we must invalidate the entire subtree if we've made
+ // any changes. Do this here rather than in simplifyOneLoop() as the top-most
+ // loop is going to be the same for all child loops.
+ if (Changed && SE)
+ SE->forgetTopmostLoop(L);
+
return Changed;
}
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index e8f585b4a94d..511dd61308f9 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -45,6 +45,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/Use.h"
#include "llvm/IR/User.h"
#include "llvm/IR/ValueHandle.h"
@@ -216,6 +217,8 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
ScalarEvolution *SE, DominatorTree *DT,
AssumptionCache *AC,
const TargetTransformInfo *TTI) {
+ using namespace llvm::PatternMatch;
+
// Simplify any new induction variables in the partially unrolled loop.
if (SE && SimplifyIVs) {
SmallVector<WeakTrackingVH, 16> DeadInsts;
@@ -241,6 +244,30 @@ void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI,
Inst.replaceAllUsesWith(V);
if (isInstructionTriviallyDead(&Inst))
DeadInsts.emplace_back(&Inst);
+
+ // Fold ((add X, C1), C2) to (add X, C1+C2). This is very common in
+ // unrolled loops, and handling this early allows following code to
+ // identify the IV as a "simple recurrence" without first folding away
+ // a long chain of adds.
+ {
+ Value *X;
+ const APInt *C1, *C2;
+ if (match(&Inst, m_Add(m_Add(m_Value(X), m_APInt(C1)), m_APInt(C2)))) {
+ auto *InnerI = dyn_cast<Instruction>(Inst.getOperand(0));
+ auto *InnerOBO = cast<OverflowingBinaryOperator>(Inst.getOperand(0));
+ bool SignedOverflow;
+ APInt NewC = C1->sadd_ov(*C2, SignedOverflow);
+ Inst.setOperand(0, X);
+ Inst.setOperand(1, ConstantInt::get(Inst.getType(), NewC));
+ Inst.setHasNoUnsignedWrap(Inst.hasNoUnsignedWrap() &&
+ InnerOBO->hasNoUnsignedWrap());
+ Inst.setHasNoSignedWrap(Inst.hasNoSignedWrap() &&
+ InnerOBO->hasNoSignedWrap() &&
+ !SignedOverflow);
+ if (InnerI && isInstructionTriviallyDead(InnerI))
+ DeadInsts.emplace_back(InnerI);
+ }
+ }
}
// We can't do recursive deletion until we're done iterating, as we might
// have a phi which (potentially indirectly) uses instructions later in
@@ -310,6 +337,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
const unsigned MaxTripCount = SE->getSmallConstantMaxTripCount(L);
const bool MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
+ unsigned EstimatedLoopInvocationWeight = 0;
+ std::optional<unsigned> OriginalTripCount =
+ llvm::getLoopEstimatedTripCount(L, &EstimatedLoopInvocationWeight);
// Effectively "DCE" unrolled iterations that are beyond the max tripcount
// and will never be executed.
@@ -513,7 +543,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
!EnableFSDiscriminator)
for (BasicBlock *BB : L->getBlocks())
for (Instruction &I : *BB)
- if (!isa<DbgInfoIntrinsic>(&I))
+ if (!I.isDebugOrPseudoInst())
if (const DILocation *DIL = I.getDebugLoc()) {
auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(ULO.Count);
if (NewDIL)
@@ -830,8 +860,16 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
Loop *OuterL = L->getParentLoop();
// Update LoopInfo if the loop is completely removed.
- if (CompletelyUnroll)
+ if (CompletelyUnroll) {
LI->erase(L);
+ // We shouldn't try to use `L` anymore.
+ L = nullptr;
+ } else if (OriginalTripCount) {
+ // Update the trip count. Note that the remainder has already logic
+ // computing it in `UnrollRuntimeLoopRemainder`.
+ setLoopEstimatedTripCount(L, *OriginalTripCount / ULO.Count,
+ EstimatedLoopInvocationWeight);
+ }
// LoopInfo should not be valid, confirm that.
if (UnrollVerifyLoopInfo)
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index b125e952ec94..31b8cd34eb24 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -347,7 +347,7 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
!EnableFSDiscriminator)
for (BasicBlock *BB : L->getBlocks())
for (Instruction &I : *BB)
- if (!isa<DbgInfoIntrinsic>(&I))
+ if (!I.isDebugOrPseudoInst())
if (const DILocation *DIL = I.getDebugLoc()) {
auto NewDIL = DIL->cloneByMultiplyingDuplicationFactor(Count);
if (NewDIL)
@@ -757,11 +757,11 @@ checkDependencies(Loop &Root, const BasicBlockSet &SubLoopBlocks,
DependenceInfo &DI, LoopInfo &LI) {
SmallVector<BasicBlockSet, 8> AllBlocks;
for (Loop *L : Root.getLoopsInPreorder())
- if (ForeBlocksMap.find(L) != ForeBlocksMap.end())
+ if (ForeBlocksMap.contains(L))
AllBlocks.push_back(ForeBlocksMap.lookup(L));
AllBlocks.push_back(SubLoopBlocks);
for (Loop *L : Root.getLoopsInPreorder())
- if (AftBlocksMap.find(L) != AftBlocksMap.end())
+ if (AftBlocksMap.contains(L))
AllBlocks.push_back(AftBlocksMap.lookup(L));
unsigned LoopDepth = Root.getLoopDepth();
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index b19156bcb420..1e22eca30d2d 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -457,7 +457,7 @@ static bool canProfitablyUnrollMultiExitLoop(
// call.
return (OtherExits.size() == 1 &&
(UnrollRuntimeOtherExitPredictable ||
- OtherExits[0]->getTerminatingDeoptimizeCall()));
+ OtherExits[0]->getPostdominatingDeoptimizeCall()));
// TODO: These can be fine-tuned further to consider code size or deopt states
// that are captured by the deoptimize exit block.
// Also, we can extend this to support more cases, if we actually
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 7df8651ede15..7d6662c44f07 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -466,6 +466,19 @@ llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
return Worklist;
}
+bool llvm::isAlmostDeadIV(PHINode *PN, BasicBlock *LatchBlock, Value *Cond) {
+ int LatchIdx = PN->getBasicBlockIndex(LatchBlock);
+ Value *IncV = PN->getIncomingValue(LatchIdx);
+
+ for (User *U : PN->users())
+ if (U != Cond && U != IncV) return false;
+
+ for (User *U : IncV->users())
+ if (U != Cond && U != PN) return false;
+ return true;
+}
+
+
void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
LoopInfo *LI, MemorySSA *MSSA) {
assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
@@ -628,18 +641,17 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
}
// After the loop has been deleted all the values defined and modified
- // inside the loop are going to be unavailable.
- // Since debug values in the loop have been deleted, inserting an undef
- // dbg.value truncates the range of any dbg.value before the loop where the
- // loop used to be. This is particularly important for constant values.
+ // inside the loop are going to be unavailable. Values computed in the
+ // loop will have been deleted, automatically causing their debug uses
+ // be be replaced with undef. Loop invariant values will still be available.
+ // Move dbg.values out the loop so that earlier location ranges are still
+ // terminated and loop invariant assignments are preserved.
Instruction *InsertDbgValueBefore = ExitBlock->getFirstNonPHI();
assert(InsertDbgValueBefore &&
"There should be a non-PHI instruction in exit block, else these "
"instructions will have no parent.");
- for (auto *DVI : DeadDebugInst) {
- DVI->setKillLocation();
+ for (auto *DVI : DeadDebugInst)
DVI->moveBefore(InsertDbgValueBefore);
- }
}
// Remove the block from the reference counting scheme, so that we can
@@ -880,6 +892,29 @@ bool llvm::hasIterationCountInvariantInParent(Loop *InnerLoop,
return true;
}
+Intrinsic::ID llvm::getMinMaxReductionIntrinsicOp(RecurKind RK) {
+ switch (RK) {
+ default:
+ llvm_unreachable("Unknown min/max recurrence kind");
+ case RecurKind::UMin:
+ return Intrinsic::umin;
+ case RecurKind::UMax:
+ return Intrinsic::umax;
+ case RecurKind::SMin:
+ return Intrinsic::smin;
+ case RecurKind::SMax:
+ return Intrinsic::smax;
+ case RecurKind::FMin:
+ return Intrinsic::minnum;
+ case RecurKind::FMax:
+ return Intrinsic::maxnum;
+ case RecurKind::FMinimum:
+ return Intrinsic::minimum;
+ case RecurKind::FMaximum:
+ return Intrinsic::maximum;
+ }
+}
+
CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
switch (RK) {
default:
@@ -896,6 +931,9 @@ CmpInst::Predicate llvm::getMinMaxReductionPredicate(RecurKind RK) {
return CmpInst::FCMP_OLT;
case RecurKind::FMax:
return CmpInst::FCMP_OGT;
+ // We do not add FMinimum/FMaximum recurrence kind here since there is no
+ // equivalent predicate which compares signed zeroes according to the
+ // semantics of the intrinsics (llvm.minimum/maximum).
}
}
@@ -910,6 +948,14 @@ Value *llvm::createSelectCmpOp(IRBuilderBase &Builder, Value *StartVal,
Value *llvm::createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
Value *Right) {
+ Type *Ty = Left->getType();
+ if (Ty->isIntOrIntVectorTy() ||
+ (RK == RecurKind::FMinimum || RK == RecurKind::FMaximum)) {
+ // TODO: Add float minnum/maxnum support when FMF nnan is set.
+ Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RK);
+ return Builder.CreateIntrinsic(Ty, Id, {Left, Right}, nullptr,
+ "rdx.minmax");
+ }
CmpInst::Predicate Pred = getMinMaxReductionPredicate(RK);
Value *Cmp = Builder.CreateCmp(Pred, Left, Right, "rdx.minmax.cmp");
Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
@@ -1055,6 +1101,10 @@ Value *llvm::createSimpleTargetReduction(IRBuilderBase &Builder,
return Builder.CreateFPMaxReduce(Src);
case RecurKind::FMin:
return Builder.CreateFPMinReduce(Src);
+ case RecurKind::FMinimum:
+ return Builder.CreateFPMinimumReduce(Src);
+ case RecurKind::FMaximum:
+ return Builder.CreateFPMaximumReduce(Src);
default:
llvm_unreachable("Unhandled opcode");
}
@@ -1123,6 +1173,20 @@ bool llvm::isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGE, S, Zero);
}
+bool llvm::isKnownPositiveInLoop(const SCEV *S, const Loop *L,
+ ScalarEvolution &SE) {
+ const SCEV *Zero = SE.getZero(S->getType());
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, S, Zero);
+}
+
+bool llvm::isKnownNonPositiveInLoop(const SCEV *S, const Loop *L,
+ ScalarEvolution &SE) {
+ const SCEV *Zero = SE.getZero(S->getType());
+ return SE.isAvailableAtLoopEntry(S, L) &&
+ SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLE, S, Zero);
+}
+
bool llvm::cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
bool Signed) {
unsigned BitWidth = cast<IntegerType>(S->getType())->getBitWidth();
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 17e71cf5a6c4..78ebe75c121b 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -23,7 +23,6 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
-#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -31,6 +30,8 @@
using namespace llvm;
+#define DEBUG_TYPE "loop-versioning"
+
static cl::opt<bool>
AnnotateNoAlias("loop-version-annotate-no-alias", cl::init(true),
cl::Hidden,
@@ -208,7 +209,7 @@ void LoopVersioning::prepareNoAliasMetadata() {
// Finally, transform the above to actually map to scope list which is what
// the metadata uses.
- for (auto Pair : GroupToNonAliasingScopes)
+ for (const auto &Pair : GroupToNonAliasingScopes)
GroupToNonAliasingScopeList[Pair.first] = MDNode::get(Context, Pair.second);
}
@@ -290,56 +291,6 @@ bool runImpl(LoopInfo *LI, LoopAccessInfoManager &LAIs, DominatorTree *DT,
return Changed;
}
-
-/// Also expose this is a pass. Currently this is only used for
-/// unit-testing. It adds all memchecks necessary to remove all may-aliasing
-/// array accesses from the loop.
-class LoopVersioningLegacyPass : public FunctionPass {
-public:
- LoopVersioningLegacyPass() : FunctionPass(ID) {
- initializeLoopVersioningLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto &LAIs = getAnalysis<LoopAccessLegacyAnalysis>().getLAIs();
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-
- return runImpl(LI, LAIs, DT, SE);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequired<LoopAccessLegacyAnalysis>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- }
-
- static char ID;
-};
-}
-
-#define LVER_OPTION "loop-versioning"
-#define DEBUG_TYPE LVER_OPTION
-
-char LoopVersioningLegacyPass::ID;
-static const char LVer_name[] = "Loop Versioning";
-
-INITIALIZE_PASS_BEGIN(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false,
- false)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(LoopVersioningLegacyPass, LVER_OPTION, LVer_name, false,
- false)
-
-namespace llvm {
-FunctionPass *createLoopVersioningLegacyPass() {
- return new LoopVersioningLegacyPass();
}
PreservedAnalyses LoopVersioningPass::run(Function &F,
@@ -353,4 +304,3 @@ PreservedAnalyses LoopVersioningPass::run(Function &F,
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
-} // namespace llvm
diff --git a/llvm/lib/Transforms/Utils/LowerAtomic.cpp b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
index b6f40de0daa6..b203970ef9c5 100644
--- a/llvm/lib/Transforms/Utils/LowerAtomic.cpp
+++ b/llvm/lib/Transforms/Utils/LowerAtomic.cpp
@@ -14,8 +14,7 @@
#include "llvm/Transforms/Utils/LowerAtomic.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
+
using namespace llvm;
#define DEBUG_TYPE "loweratomic"
@@ -102,6 +101,9 @@ Value *llvm::buildAtomicRMWValue(AtomicRMWInst::BinOp Op,
bool llvm::lowerAtomicRMWInst(AtomicRMWInst *RMWI) {
IRBuilder<> Builder(RMWI);
+ Builder.setIsFPConstrained(
+ RMWI->getFunction()->hasFnAttribute(Attribute::StrictFP));
+
Value *Ptr = RMWI->getPointerOperand();
Value *Val = RMWI->getValOperand();
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 165740b55298..906eb71fc2d9 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -12,9 +12,12 @@
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <optional>
+#define DEBUG_TYPE "lower-mem-intrinsics"
+
using namespace llvm;
void llvm::createMemCpyLoopKnownSize(
@@ -376,19 +379,14 @@ void llvm::createMemCpyLoopUnknownSize(
static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
Value *DstAddr, Value *CopyLen, Align SrcAlign,
Align DstAlign, bool SrcIsVolatile,
- bool DstIsVolatile) {
+ bool DstIsVolatile,
+ const TargetTransformInfo &TTI) {
Type *TypeOfCopyLen = CopyLen->getType();
BasicBlock *OrigBB = InsertBefore->getParent();
Function *F = OrigBB->getParent();
const DataLayout &DL = F->getParent()->getDataLayout();
-
// TODO: Use different element type if possible?
- IRBuilder<> CastBuilder(InsertBefore);
- Type *EltTy = CastBuilder.getInt8Ty();
- Type *PtrTy =
- CastBuilder.getInt8PtrTy(SrcAddr->getType()->getPointerAddressSpace());
- SrcAddr = CastBuilder.CreateBitCast(SrcAddr, PtrTy);
- DstAddr = CastBuilder.CreateBitCast(DstAddr, PtrTy);
+ Type *EltTy = Type::getInt8Ty(F->getContext());
// Create the a comparison of src and dst, based on which we jump to either
// the forward-copy part of the function (if src >= dst) or the backwards-copy
@@ -428,6 +426,7 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
BasicBlock *LoopBB =
BasicBlock::Create(F->getContext(), "copy_backwards_loop", F, CopyForwardBB);
IRBuilder<> LoopBuilder(LoopBB);
+
PHINode *LoopPhi = LoopBuilder.CreatePHI(TypeOfCopyLen, 0);
Value *IndexPtr = LoopBuilder.CreateSub(
LoopPhi, ConstantInt::get(TypeOfCopyLen, 1), "index_ptr");
@@ -552,15 +551,57 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,
}
}
-void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {
- createMemMoveLoop(/* InsertBefore */ Memmove,
- /* SrcAddr */ Memmove->getRawSource(),
- /* DstAddr */ Memmove->getRawDest(),
- /* CopyLen */ Memmove->getLength(),
- /* SrcAlign */ Memmove->getSourceAlign().valueOrOne(),
- /* DestAlign */ Memmove->getDestAlign().valueOrOne(),
- /* SrcIsVolatile */ Memmove->isVolatile(),
- /* DstIsVolatile */ Memmove->isVolatile());
+bool llvm::expandMemMoveAsLoop(MemMoveInst *Memmove,
+ const TargetTransformInfo &TTI) {
+ Value *CopyLen = Memmove->getLength();
+ Value *SrcAddr = Memmove->getRawSource();
+ Value *DstAddr = Memmove->getRawDest();
+ Align SrcAlign = Memmove->getSourceAlign().valueOrOne();
+ Align DstAlign = Memmove->getDestAlign().valueOrOne();
+ bool SrcIsVolatile = Memmove->isVolatile();
+ bool DstIsVolatile = SrcIsVolatile;
+ IRBuilder<> CastBuilder(Memmove);
+
+ unsigned SrcAS = SrcAddr->getType()->getPointerAddressSpace();
+ unsigned DstAS = DstAddr->getType()->getPointerAddressSpace();
+ if (SrcAS != DstAS) {
+ if (!TTI.addrspacesMayAlias(SrcAS, DstAS)) {
+ // We may not be able to emit a pointer comparison, but we don't have
+ // to. Expand as memcpy.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(CopyLen)) {
+ createMemCpyLoopKnownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr,
+ CI, SrcAlign, DstAlign, SrcIsVolatile,
+ DstIsVolatile,
+ /*CanOverlap=*/false, TTI);
+ } else {
+ createMemCpyLoopUnknownSize(/*InsertBefore=*/Memmove, SrcAddr, DstAddr,
+ CopyLen, SrcAlign, DstAlign, SrcIsVolatile,
+ DstIsVolatile,
+ /*CanOverlap=*/false, TTI);
+ }
+
+ return true;
+ }
+
+ if (TTI.isValidAddrSpaceCast(DstAS, SrcAS))
+ DstAddr = CastBuilder.CreateAddrSpaceCast(DstAddr, SrcAddr->getType());
+ else if (TTI.isValidAddrSpaceCast(SrcAS, DstAS))
+ SrcAddr = CastBuilder.CreateAddrSpaceCast(SrcAddr, DstAddr->getType());
+ else {
+ // We don't know generically if it's legal to introduce an
+ // addrspacecast. We need to know either if it's legal to insert an
+ // addrspacecast, or if the address spaces cannot alias.
+ LLVM_DEBUG(
+ dbgs() << "Do not know how to expand memmove between different "
+ "address spaces\n");
+ return false;
+ }
+ }
+
+ createMemMoveLoop(
+ /*InsertBefore=*/Memmove, SrcAddr, DstAddr, CopyLen, SrcAlign, DstAlign,
+ SrcIsVolatile, DstIsVolatile, TTI);
+ return true;
}
void llvm::expandMemSetAsLoop(MemSetInst *Memset) {
diff --git a/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/llvm/lib/Transforms/Utils/Mem2Reg.cpp
index 5ad7aeb463ec..fbc6dd7613de 100644
--- a/llvm/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -74,15 +74,19 @@ namespace {
struct PromoteLegacyPass : public FunctionPass {
// Pass identification, replacement for typeid
static char ID;
+ bool ForcePass; /// If true, forces pass to execute, instead of skipping.
- PromoteLegacyPass() : FunctionPass(ID) {
+ PromoteLegacyPass() : FunctionPass(ID), ForcePass(false) {
+ initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry());
+ }
+ PromoteLegacyPass(bool IsForced) : FunctionPass(ID), ForcePass(IsForced) {
initializePromoteLegacyPassPass(*PassRegistry::getPassRegistry());
}
// runOnFunction - To run this pass, first we calculate the alloca
// instructions that are safe for promotion, then we promote each one.
bool runOnFunction(Function &F) override {
- if (skipFunction(F))
+ if (!ForcePass && skipFunction(F))
return false;
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
@@ -111,6 +115,6 @@ INITIALIZE_PASS_END(PromoteLegacyPass, "mem2reg", "Promote Memory to Register",
false, false)
// createPromoteMemoryToRegister - Provide an entry point to create this pass.
-FunctionPass *llvm::createPromoteMemoryToRegisterPass() {
- return new PromoteLegacyPass();
+FunctionPass *llvm::createPromoteMemoryToRegisterPass(bool IsForced) {
+ return new PromoteLegacyPass(IsForced);
}
diff --git a/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp b/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp
index 899928c085c6..531b0a624daf 100644
--- a/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp
+++ b/llvm/lib/Transforms/Utils/MemoryOpRemark.cpp
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/MemoryOpRemark.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/DebugInfo.h"
@@ -321,7 +322,7 @@ void MemoryOpRemark::visitVariable(const Value *V,
// Try to get an llvm.dbg.declare, which has a DILocalVariable giving us the
// real debug info name and size of the variable.
for (const DbgVariableIntrinsic *DVI :
- FindDbgAddrUses(const_cast<Value *>(V))) {
+ FindDbgDeclareUses(const_cast<Value *>(V))) {
if (DILocalVariable *DILV = DVI->getVariable()) {
std::optional<uint64_t> DISize = getSizeInBytes(DILV->getSizeInBits());
VariableInfo Var{DILV->getName(), DISize};
@@ -387,7 +388,8 @@ bool AutoInitRemark::canHandle(const Instruction *I) {
return false;
return any_of(I->getMetadata(LLVMContext::MD_annotation)->operands(),
[](const MDOperand &Op) {
- return cast<MDString>(Op.get())->getString() == "auto-init";
+ return isa<MDString>(Op.get()) &&
+ cast<MDString>(Op.get())->getString() == "auto-init";
});
}
diff --git a/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 0ea210671b93..44ac65f265f0 100644
--- a/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -26,14 +26,12 @@
#include "llvm/IR/GlobalAlias.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/TypeFinder.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Transforms/Utils.h"
using namespace llvm;
@@ -62,6 +60,11 @@ static cl::opt<std::string> RenameExcludeStructPrefixes(
"by a comma"),
cl::Hidden);
+static cl::opt<bool>
+ RenameOnlyInst("rename-only-inst", cl::init(false),
+ cl::desc("only rename the instructions in the function"),
+ cl::Hidden);
+
static const char *const metaNames[] = {
// See http://en.wikipedia.org/wiki/Metasyntactic_variable
"foo", "bar", "baz", "quux", "barney", "snork", "zot", "blam", "hoge",
@@ -105,6 +108,12 @@ parseExcludedPrefixes(StringRef PrefixesStr,
}
}
+void MetaRenameOnlyInstructions(Function &F) {
+ for (auto &I : instructions(F))
+ if (!I.getType()->isVoidTy() && I.getName().empty())
+ I.setName(I.getOpcodeName());
+}
+
void MetaRename(Function &F) {
for (Argument &Arg : F.args())
if (!Arg.getType()->isVoidTy())
@@ -115,7 +124,7 @@ void MetaRename(Function &F) {
for (auto &I : BB)
if (!I.getType()->isVoidTy())
- I.setName("tmp");
+ I.setName(I.getOpcodeName());
}
}
@@ -145,6 +154,26 @@ void MetaRename(Module &M,
[&Name](auto &Prefix) { return Name.startswith(Prefix); });
};
+ // Leave library functions alone because their presence or absence could
+ // affect the behavior of other passes.
+ auto ExcludeLibFuncs = [&](Function &F) {
+ LibFunc Tmp;
+ StringRef Name = F.getName();
+ return Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
+ GetTLI(F).getLibFunc(F, Tmp) ||
+ IsNameExcluded(Name, ExcludedFuncPrefixes);
+ };
+
+ if (RenameOnlyInst) {
+ // Rename all functions
+ for (auto &F : M) {
+ if (ExcludeLibFuncs(F))
+ continue;
+ MetaRenameOnlyInstructions(F);
+ }
+ return;
+ }
+
// Rename all aliases
for (GlobalAlias &GA : M.aliases()) {
StringRef Name = GA.getName();
@@ -181,64 +210,20 @@ void MetaRename(Module &M,
// Rename all functions
for (auto &F : M) {
- StringRef Name = F.getName();
- LibFunc Tmp;
- // Leave library functions alone because their presence or absence could
- // affect the behavior of other passes.
- if (Name.startswith("llvm.") || (!Name.empty() && Name[0] == 1) ||
- GetTLI(F).getLibFunc(F, Tmp) ||
- IsNameExcluded(Name, ExcludedFuncPrefixes))
+ if (ExcludeLibFuncs(F))
continue;
// Leave @main alone. The output of -metarenamer might be passed to
// lli for execution and the latter needs a main entry point.
- if (Name != "main")
+ if (F.getName() != "main")
F.setName(renamer.newName());
MetaRename(F);
}
}
-struct MetaRenamer : public ModulePass {
- // Pass identification, replacement for typeid
- static char ID;
-
- MetaRenamer() : ModulePass(ID) {
- initializeMetaRenamerPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetLibraryInfoWrapperPass>();
- AU.setPreservesAll();
- }
-
- bool runOnModule(Module &M) override {
- auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
- return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
- };
- MetaRename(M, GetTLI);
- return true;
- }
-};
-
} // end anonymous namespace
-char MetaRenamer::ID = 0;
-
-INITIALIZE_PASS_BEGIN(MetaRenamer, "metarenamer",
- "Assign new names to everything", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(MetaRenamer, "metarenamer",
- "Assign new names to everything", false, false)
-
-//===----------------------------------------------------------------------===//
-//
-// MetaRenamer - Rename everything with metasyntactic names.
-//
-ModulePass *llvm::createMetaRenamerPass() {
- return new MetaRenamer();
-}
-
PreservedAnalyses MetaRenamerPass::run(Module &M, ModuleAnalysisManager &AM) {
FunctionAnalysisManager &FAM =
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index 6d17a466957e..1e243ef74df7 100644
--- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -12,6 +12,7 @@
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -19,6 +20,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/xxhash.h"
+
using namespace llvm;
#define DEBUG_TYPE "moduleutils"
@@ -31,11 +33,9 @@ static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
// Get the current set of static global constructors and add the new ctor
// to the list.
SmallVector<Constant *, 16> CurrentCtors;
- StructType *EltTy = StructType::get(
- IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()),
- IRB.getInt8PtrTy());
-
+ StructType *EltTy;
if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) {
+ EltTy = cast<StructType>(GVCtor->getValueType()->getArrayElementType());
if (Constant *Init = GVCtor->getInitializer()) {
unsigned n = Init->getNumOperands();
CurrentCtors.reserve(n + 1);
@@ -43,6 +43,10 @@ static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F,
CurrentCtors.push_back(cast<Constant>(Init->getOperand(i)));
}
GVCtor->eraseFromParent();
+ } else {
+ EltTy = StructType::get(
+ IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()),
+ IRB.getInt8PtrTy());
}
// Build a 3 field global_ctor entry. We don't take a comdat key.
@@ -390,9 +394,7 @@ bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
const DataLayout &DL = M.getDataLayout();
PointerType *TableEntryTy =
- Ctx.supportsTypedPointers()
- ? PointerType::get(Type::getInt8Ty(Ctx), DL.getProgramAddressSpace())
- : PointerType::get(Ctx, DL.getProgramAddressSpace());
+ PointerType::get(Ctx, DL.getProgramAddressSpace());
ArrayType *FuncPtrTableTy =
ArrayType::get(TableEntryTy, IFuncsToLower.size());
@@ -462,9 +464,7 @@ bool llvm::lowerGlobalIFuncUsersAsGlobalCtor(
InitBuilder.CreateRetVoid();
- PointerType *ConstantDataTy = Ctx.supportsTypedPointers()
- ? PointerType::get(Type::getInt8Ty(Ctx), 0)
- : PointerType::get(Ctx, 0);
+ PointerType *ConstantDataTy = PointerType::get(Ctx, 0);
// TODO: Is this the right priority? Probably should be before any other
// constructors?
diff --git a/llvm/lib/Transforms/Utils/MoveAutoInit.cpp b/llvm/lib/Transforms/Utils/MoveAutoInit.cpp
new file mode 100644
index 000000000000..b0ca0b15c08e
--- /dev/null
+++ b/llvm/lib/Transforms/Utils/MoveAutoInit.cpp
@@ -0,0 +1,231 @@
+//===-- MoveAutoInit.cpp - move auto-init inst closer to their use site----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instruction maked as auto-init closer to the basic block that
+// use it, eventually removing it from some control path of the function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/MoveAutoInit.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "move-auto-init"
+
+STATISTIC(NumMoved, "Number of instructions moved");
+
+static cl::opt<unsigned> MoveAutoInitThreshold(
+ "move-auto-init-threshold", cl::Hidden, cl::init(128),
+ cl::desc("Maximum instructions to analyze per moved initialization"));
+
+static bool hasAutoInitMetadata(const Instruction &I) {
+ return I.hasMetadata(LLVMContext::MD_annotation) &&
+ any_of(I.getMetadata(LLVMContext::MD_annotation)->operands(),
+ [](const MDOperand &Op) { return Op.equalsStr("auto-init"); });
+}
+
+static std::optional<MemoryLocation> writeToAlloca(const Instruction &I) {
+ MemoryLocation ML;
+ if (auto *MI = dyn_cast<MemIntrinsic>(&I))
+ ML = MemoryLocation::getForDest(MI);
+ else if (auto *SI = dyn_cast<StoreInst>(&I))
+ ML = MemoryLocation::get(SI);
+ else
+ assert(false && "memory location set");
+
+ if (isa<AllocaInst>(getUnderlyingObject(ML.Ptr)))
+ return ML;
+ else
+ return {};
+}
+
+/// Finds a BasicBlock in the CFG where instruction `I` can be moved to while
+/// not changing the Memory SSA ordering and being guarded by at least one
+/// condition.
+static BasicBlock *usersDominator(const MemoryLocation &ML, Instruction *I,
+ DominatorTree &DT, MemorySSA &MSSA) {
+ BasicBlock *CurrentDominator = nullptr;
+ MemoryUseOrDef &IMA = *MSSA.getMemoryAccess(I);
+ BatchAAResults AA(MSSA.getAA());
+
+ SmallPtrSet<MemoryAccess *, 8> Visited;
+
+ auto AsMemoryAccess = [](User *U) { return cast<MemoryAccess>(U); };
+ SmallVector<MemoryAccess *> WorkList(map_range(IMA.users(), AsMemoryAccess));
+
+ while (!WorkList.empty()) {
+ MemoryAccess *MA = WorkList.pop_back_val();
+ if (!Visited.insert(MA).second)
+ continue;
+
+ if (Visited.size() > MoveAutoInitThreshold)
+ return nullptr;
+
+ bool FoundClobberingUser = false;
+ if (auto *M = dyn_cast<MemoryUseOrDef>(MA)) {
+ Instruction *MI = M->getMemoryInst();
+
+ // If this memory instruction may not clobber `I`, we can skip it.
+ // LifetimeEnd is a valid user, but we do not want it in the user
+ // dominator.
+ if (AA.getModRefInfo(MI, ML) != ModRefInfo::NoModRef &&
+ !MI->isLifetimeStartOrEnd() && MI != I) {
+ FoundClobberingUser = true;
+ CurrentDominator = CurrentDominator
+ ? DT.findNearestCommonDominator(CurrentDominator,
+ MI->getParent())
+ : MI->getParent();
+ }
+ }
+ if (!FoundClobberingUser) {
+ auto UsersAsMemoryAccesses = map_range(MA->users(), AsMemoryAccess);
+ append_range(WorkList, UsersAsMemoryAccesses);
+ }
+ }
+ return CurrentDominator;
+}
+
+static bool runMoveAutoInit(Function &F, DominatorTree &DT, MemorySSA &MSSA) {
+ BasicBlock &EntryBB = F.getEntryBlock();
+ SmallVector<std::pair<Instruction *, BasicBlock *>> JobList;
+
+ //
+ // Compute movable instructions.
+ //
+ for (Instruction &I : EntryBB) {
+ if (!hasAutoInitMetadata(I))
+ continue;
+
+ std::optional<MemoryLocation> ML = writeToAlloca(I);
+ if (!ML)
+ continue;
+
+ if (I.isVolatile())
+ continue;
+
+ BasicBlock *UsersDominator = usersDominator(ML.value(), &I, DT, MSSA);
+ if (!UsersDominator)
+ continue;
+
+ if (UsersDominator == &EntryBB)
+ continue;
+
+ // Traverse the CFG to detect cycles `UsersDominator` would be part of.
+ SmallPtrSet<BasicBlock *, 8> TransitiveSuccessors;
+ SmallVector<BasicBlock *> WorkList(successors(UsersDominator));
+ bool HasCycle = false;
+ while (!WorkList.empty()) {
+ BasicBlock *CurrBB = WorkList.pop_back_val();
+ if (CurrBB == UsersDominator)
+ // No early exit because we want to compute the full set of transitive
+ // successors.
+ HasCycle = true;
+ for (BasicBlock *Successor : successors(CurrBB)) {
+ if (!TransitiveSuccessors.insert(Successor).second)
+ continue;
+ WorkList.push_back(Successor);
+ }
+ }
+
+ // Don't insert if that could create multiple execution of I,
+ // but we can insert it in the non back-edge predecessors, if it exists.
+ if (HasCycle) {
+ BasicBlock *UsersDominatorHead = UsersDominator;
+ while (BasicBlock *UniquePredecessor =
+ UsersDominatorHead->getUniquePredecessor())
+ UsersDominatorHead = UniquePredecessor;
+
+ if (UsersDominatorHead == &EntryBB)
+ continue;
+
+ BasicBlock *DominatingPredecessor = nullptr;
+ for (BasicBlock *Pred : predecessors(UsersDominatorHead)) {
+ // If one of the predecessor of the dominator also transitively is a
+ // successor, moving to the dominator would do the inverse of loop
+ // hoisting, and we don't want that.
+ if (TransitiveSuccessors.count(Pred))
+ continue;
+
+ DominatingPredecessor =
+ DominatingPredecessor
+ ? DT.findNearestCommonDominator(DominatingPredecessor, Pred)
+ : Pred;
+ }
+
+ if (!DominatingPredecessor || DominatingPredecessor == &EntryBB)
+ continue;
+
+ UsersDominator = DominatingPredecessor;
+ }
+
+ // CatchSwitchInst blocks can only have one instruction, so they are not
+ // good candidates for insertion.
+ while (isa<CatchSwitchInst>(UsersDominator->getFirstInsertionPt())) {
+ for (BasicBlock *Pred : predecessors(UsersDominator))
+ UsersDominator = DT.findNearestCommonDominator(UsersDominator, Pred);
+ }
+
+ // We finally found a place where I can be moved while not introducing extra
+ // execution, and guarded by at least one condition.
+ if (UsersDominator != &EntryBB)
+ JobList.emplace_back(&I, UsersDominator);
+ }
+
+ //
+ // Perform the actual substitution.
+ //
+ if (JobList.empty())
+ return false;
+
+ MemorySSAUpdater MSSAU(&MSSA);
+
+ // Reverse insertion to respect relative order between instructions:
+ // if two instructions are moved from the same BB to the same BB, we insert
+ // the second one in the front, then the first on top of it.
+ for (auto &Job : reverse(JobList)) {
+ Job.first->moveBefore(&*Job.second->getFirstInsertionPt());
+ MSSAU.moveToPlace(MSSA.getMemoryAccess(Job.first), Job.first->getParent(),
+ MemorySSA::InsertionPlace::Beginning);
+ }
+
+ if (VerifyMemorySSA)
+ MSSA.verifyMemorySSA();
+
+ NumMoved += JobList.size();
+
+ return true;
+}
+
+PreservedAnalyses MoveAutoInitPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+ if (!runMoveAutoInit(F, DT, MSSA))
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<MemorySSAAnalysis>();
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
+}
diff --git a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
index d4ab4504064f..f41a14cdfbec 100644
--- a/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
+++ b/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -14,8 +14,6 @@
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/Module.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/MD5.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 75ea9dc5dfc0..2e5f40d39912 100644
--- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -118,19 +118,28 @@ public:
/// Update assignment tracking debug info given for the to-be-deleted store
/// \p ToDelete that stores to this alloca.
- void updateForDeletedStore(StoreInst *ToDelete, DIBuilder &DIB) const {
+ void updateForDeletedStore(
+ StoreInst *ToDelete, DIBuilder &DIB,
+ SmallSet<DbgAssignIntrinsic *, 8> *DbgAssignsToDelete) const {
// There's nothing to do if the alloca doesn't have any variables using
// assignment tracking.
- if (DbgAssigns.empty()) {
- assert(at::getAssignmentMarkers(ToDelete).empty());
+ if (DbgAssigns.empty())
return;
- }
- // Just leave dbg.assign intrinsics in place and remember that we've seen
- // one for each variable fragment.
- SmallSet<DebugVariable, 2> VarHasDbgAssignForStore;
- for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(ToDelete))
- VarHasDbgAssignForStore.insert(DebugVariable(DAI));
+ // Insert a dbg.value where the linked dbg.assign is and remember to delete
+ // the dbg.assign later. Demoting to dbg.value isn't necessary for
+ // correctness but does reduce compile time and memory usage by reducing
+ // unnecessary function-local metadata. Remember that we've seen a
+ // dbg.assign for each variable fragment for the untracked store handling
+ // (after this loop).
+ SmallSet<DebugVariableAggregate, 2> VarHasDbgAssignForStore;
+ for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(ToDelete)) {
+ VarHasDbgAssignForStore.insert(DebugVariableAggregate(DAI));
+ DbgAssignsToDelete->insert(DAI);
+ DIB.insertDbgValueIntrinsic(DAI->getValue(), DAI->getVariable(),
+ DAI->getExpression(), DAI->getDebugLoc(),
+ DAI);
+ }
// It's possible for variables using assignment tracking to have no
// dbg.assign linked to this store. These are variables in DbgAssigns that
@@ -141,7 +150,7 @@ public:
// size) or one that is trackable but has had its DIAssignID attachment
// dropped accidentally.
for (auto *DAI : DbgAssigns) {
- if (VarHasDbgAssignForStore.contains(DebugVariable(DAI)))
+ if (VarHasDbgAssignForStore.contains(DebugVariableAggregate(DAI)))
continue;
ConvertDebugDeclareToDebugValue(DAI, ToDelete, DIB);
}
@@ -324,6 +333,9 @@ struct PromoteMem2Reg {
/// For each alloca, keep an instance of a helper class that gives us an easy
/// way to update assignment tracking debug info if the alloca is promoted.
SmallVector<AssignmentTrackingInfo, 8> AllocaATInfo;
+ /// A set of dbg.assigns to delete because they've been demoted to
+ /// dbg.values. Call cleanUpDbgAssigns to delete them.
+ SmallSet<DbgAssignIntrinsic *, 8> DbgAssignsToDelete;
/// The set of basic blocks the renamer has already visited.
SmallPtrSet<BasicBlock *, 16> Visited;
@@ -367,6 +379,13 @@ private:
RenamePassData::LocationVector &IncLocs,
std::vector<RenamePassData> &Worklist);
bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);
+
+ /// Delete dbg.assigns that have been demoted to dbg.values.
+ void cleanUpDbgAssigns() {
+ for (auto *DAI : DbgAssignsToDelete)
+ DAI->eraseFromParent();
+ DbgAssignsToDelete.clear();
+ }
};
} // end anonymous namespace
@@ -438,9 +457,10 @@ static void removeIntrinsicUsers(AllocaInst *AI) {
/// false there were some loads which were not dominated by the single store
/// and thus must be phi-ed with undef. We fall back to the standard alloca
/// promotion algorithm in that case.
-static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
- LargeBlockInfo &LBI, const DataLayout &DL,
- DominatorTree &DT, AssumptionCache *AC) {
+static bool rewriteSingleStoreAlloca(
+ AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI, const DataLayout &DL,
+ DominatorTree &DT, AssumptionCache *AC,
+ SmallSet<DbgAssignIntrinsic *, 8> *DbgAssignsToDelete) {
StoreInst *OnlyStore = Info.OnlyStore;
bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0));
BasicBlock *StoreBB = OnlyStore->getParent();
@@ -500,7 +520,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false);
// Update assignment tracking info for the store we're going to delete.
- Info.AssignmentTracking.updateForDeletedStore(Info.OnlyStore, DIB);
+ Info.AssignmentTracking.updateForDeletedStore(Info.OnlyStore, DIB,
+ DbgAssignsToDelete);
// Record debuginfo for the store and remove the declaration's
// debuginfo.
@@ -540,11 +561,10 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info,
/// use(t);
/// *A = 42;
/// }
-static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
- LargeBlockInfo &LBI,
- const DataLayout &DL,
- DominatorTree &DT,
- AssumptionCache *AC) {
+static bool promoteSingleBlockAlloca(
+ AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI,
+ const DataLayout &DL, DominatorTree &DT, AssumptionCache *AC,
+ SmallSet<DbgAssignIntrinsic *, 8> *DbgAssignsToDelete) {
// The trickiest case to handle is when we have large blocks. Because of this,
// this code is optimized assuming that large blocks happen. This does not
// significantly pessimize the small block case. This uses LargeBlockInfo to
@@ -608,7 +628,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,
while (!AI->use_empty()) {
StoreInst *SI = cast<StoreInst>(AI->user_back());
// Update assignment tracking info for the store we're going to delete.
- Info.AssignmentTracking.updateForDeletedStore(SI, DIB);
+ Info.AssignmentTracking.updateForDeletedStore(SI, DIB, DbgAssignsToDelete);
// Record debuginfo for the store before removing it.
for (DbgVariableIntrinsic *DII : Info.DbgUsers) {
if (DII->isAddressOfVariable()) {
@@ -668,7 +688,8 @@ void PromoteMem2Reg::run() {
// If there is only a single store to this value, replace any loads of
// it that are directly dominated by the definition with the value stored.
if (Info.DefiningBlocks.size() == 1) {
- if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
+ if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC,
+ &DbgAssignsToDelete)) {
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
++NumSingleStore;
@@ -679,7 +700,8 @@ void PromoteMem2Reg::run() {
// If the alloca is only read and written in one basic block, just perform a
// linear sweep over the block to eliminate it.
if (Info.OnlyUsedInOneBlock &&
- promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC)) {
+ promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC,
+ &DbgAssignsToDelete)) {
// The alloca has been processed, move on.
RemoveFromAllocasList(AllocaNum);
continue;
@@ -728,9 +750,10 @@ void PromoteMem2Reg::run() {
QueuePhiNode(BB, AllocaNum, CurrentVersion);
}
- if (Allocas.empty())
+ if (Allocas.empty()) {
+ cleanUpDbgAssigns();
return; // All of the allocas must have been trivial!
-
+ }
LBI.clear();
// Set the incoming values for the basic block to be null values for all of
@@ -812,7 +835,7 @@ void PromoteMem2Reg::run() {
// code. Unfortunately, there may be unreachable blocks which the renamer
// hasn't traversed. If this is the case, the PHI nodes may not
// have incoming values for all predecessors. Loop over all PHI nodes we have
- // created, inserting undef values if they are missing any incoming values.
+ // created, inserting poison values if they are missing any incoming values.
for (DenseMap<std::pair<unsigned, unsigned>, PHINode *>::iterator
I = NewPhiNodes.begin(),
E = NewPhiNodes.end();
@@ -862,13 +885,14 @@ void PromoteMem2Reg::run() {
BasicBlock::iterator BBI = BB->begin();
while ((SomePHI = dyn_cast<PHINode>(BBI++)) &&
SomePHI->getNumIncomingValues() == NumBadPreds) {
- Value *UndefVal = UndefValue::get(SomePHI->getType());
+ Value *PoisonVal = PoisonValue::get(SomePHI->getType());
for (BasicBlock *Pred : Preds)
- SomePHI->addIncoming(UndefVal, Pred);
+ SomePHI->addIncoming(PoisonVal, Pred);
}
}
NewPhiNodes.clear();
+ cleanUpDbgAssigns();
}
/// Determine which blocks the value is live in.
@@ -1072,7 +1096,8 @@ NextIteration:
// Record debuginfo for the store before removing it.
IncomingLocs[AllocaNo] = SI->getDebugLoc();
- AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB);
+ AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB,
+ &DbgAssignsToDelete);
for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second])
if (DII->isAddressOfVariable())
ConvertDebugDeclareToDebugValue(DII, SI, DIB);
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 8d03a0d8a2c4..de3626a24212 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -17,6 +17,7 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ValueLattice.h"
#include "llvm/Analysis/ValueLatticeUtils.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
@@ -41,6 +42,14 @@ static ValueLatticeElement::MergeOptions getMaxWidenStepsOpts() {
MaxNumRangeExtensions);
}
+static ConstantRange getConstantRange(const ValueLatticeElement &LV, Type *Ty,
+ bool UndefAllowed = true) {
+ assert(Ty->isIntOrIntVectorTy() && "Should be int or int vector");
+ if (LV.isConstantRange(UndefAllowed))
+ return LV.getConstantRange();
+ return ConstantRange::getFull(Ty->getScalarSizeInBits());
+}
+
namespace llvm {
bool SCCPSolver::isConstant(const ValueLatticeElement &LV) {
@@ -65,30 +74,9 @@ static bool canRemoveInstruction(Instruction *I) {
}
bool SCCPSolver::tryToReplaceWithConstant(Value *V) {
- Constant *Const = nullptr;
- if (V->getType()->isStructTy()) {
- std::vector<ValueLatticeElement> IVs = getStructLatticeValueFor(V);
- if (llvm::any_of(IVs, isOverdefined))
- return false;
- std::vector<Constant *> ConstVals;
- auto *ST = cast<StructType>(V->getType());
- for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
- ValueLatticeElement V = IVs[i];
- ConstVals.push_back(SCCPSolver::isConstant(V)
- ? getConstant(V)
- : UndefValue::get(ST->getElementType(i)));
- }
- Const = ConstantStruct::get(ST, ConstVals);
- } else {
- const ValueLatticeElement &IV = getLatticeValueFor(V);
- if (isOverdefined(IV))
- return false;
-
- Const = SCCPSolver::isConstant(IV) ? getConstant(IV)
- : UndefValue::get(V->getType());
- }
- assert(Const && "Constant is nullptr here!");
-
+ Constant *Const = getConstantOrNull(V);
+ if (!Const)
+ return false;
// Replacing `musttail` instructions with constant breaks `musttail` invariant
// unless the call itself can be removed.
// Calls with "clang.arc.attachedcall" implicitly use the return value and
@@ -115,6 +103,47 @@ bool SCCPSolver::tryToReplaceWithConstant(Value *V) {
return true;
}
+/// Try to use \p Inst's value range from \p Solver to infer the NUW flag.
+static bool refineInstruction(SCCPSolver &Solver,
+ const SmallPtrSetImpl<Value *> &InsertedValues,
+ Instruction &Inst) {
+ if (!isa<OverflowingBinaryOperator>(Inst))
+ return false;
+
+ auto GetRange = [&Solver, &InsertedValues](Value *Op) {
+ if (auto *Const = dyn_cast<ConstantInt>(Op))
+ return ConstantRange(Const->getValue());
+ if (isa<Constant>(Op) || InsertedValues.contains(Op)) {
+ unsigned Bitwidth = Op->getType()->getScalarSizeInBits();
+ return ConstantRange::getFull(Bitwidth);
+ }
+ return getConstantRange(Solver.getLatticeValueFor(Op), Op->getType(),
+ /*UndefAllowed=*/false);
+ };
+ auto RangeA = GetRange(Inst.getOperand(0));
+ auto RangeB = GetRange(Inst.getOperand(1));
+ bool Changed = false;
+ if (!Inst.hasNoUnsignedWrap()) {
+ auto NUWRange = ConstantRange::makeGuaranteedNoWrapRegion(
+ Instruction::BinaryOps(Inst.getOpcode()), RangeB,
+ OverflowingBinaryOperator::NoUnsignedWrap);
+ if (NUWRange.contains(RangeA)) {
+ Inst.setHasNoUnsignedWrap();
+ Changed = true;
+ }
+ }
+ if (!Inst.hasNoSignedWrap()) {
+ auto NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(
+ Instruction::BinaryOps(Inst.getOpcode()), RangeB, OverflowingBinaryOperator::NoSignedWrap);
+ if (NSWRange.contains(RangeA)) {
+ Inst.setHasNoSignedWrap();
+ Changed = true;
+ }
+ }
+
+ return Changed;
+}
+
/// Try to replace signed instructions with their unsigned equivalent.
static bool replaceSignedInst(SCCPSolver &Solver,
SmallPtrSetImpl<Value *> &InsertedValues,
@@ -195,6 +224,8 @@ bool SCCPSolver::simplifyInstsInBlock(BasicBlock &BB,
} else if (replaceSignedInst(*this, InsertedValues, Inst)) {
MadeChanges = true;
++InstReplacedStat;
+ } else if (refineInstruction(*this, InsertedValues, Inst)) {
+ MadeChanges = true;
}
}
return MadeChanges;
@@ -322,6 +353,10 @@ class SCCPInstVisitor : public InstVisitor<SCCPInstVisitor> {
MapVector<std::pair<Function *, unsigned>, ValueLatticeElement>
TrackedMultipleRetVals;
+ /// The set of values whose lattice has been invalidated.
+ /// Populated by resetLatticeValueFor(), cleared after resolving undefs.
+ DenseSet<Value *> Invalidated;
+
/// MRVFunctionsTracked - Each function in TrackedMultipleRetVals is
/// represented here for efficient lookup.
SmallPtrSet<Function *, 16> MRVFunctionsTracked;
@@ -352,14 +387,15 @@ class SCCPInstVisitor : public InstVisitor<SCCPInstVisitor> {
using Edge = std::pair<BasicBlock *, BasicBlock *>;
DenseSet<Edge> KnownFeasibleEdges;
- DenseMap<Function *, AnalysisResultsForFn> AnalysisResults;
+ DenseMap<Function *, std::unique_ptr<PredicateInfo>> FnPredicateInfo;
+
DenseMap<Value *, SmallPtrSet<User *, 2>> AdditionalUsers;
LLVMContext &Ctx;
private:
- ConstantInt *getConstantInt(const ValueLatticeElement &IV) const {
- return dyn_cast_or_null<ConstantInt>(getConstant(IV));
+ ConstantInt *getConstantInt(const ValueLatticeElement &IV, Type *Ty) const {
+ return dyn_cast_or_null<ConstantInt>(getConstant(IV, Ty));
}
// pushToWorkList - Helper for markConstant/markOverdefined
@@ -447,6 +483,64 @@ private:
return LV;
}
+ /// Traverse the use-def chain of \p Call, marking itself and its users as
+ /// "unknown" on the way.
+ void invalidate(CallBase *Call) {
+ SmallVector<Instruction *, 64> ToInvalidate;
+ ToInvalidate.push_back(Call);
+
+ while (!ToInvalidate.empty()) {
+ Instruction *Inst = ToInvalidate.pop_back_val();
+
+ if (!Invalidated.insert(Inst).second)
+ continue;
+
+ if (!BBExecutable.count(Inst->getParent()))
+ continue;
+
+ Value *V = nullptr;
+ // For return instructions we need to invalidate the tracked returns map.
+ // Anything else has its lattice in the value map.
+ if (auto *RetInst = dyn_cast<ReturnInst>(Inst)) {
+ Function *F = RetInst->getParent()->getParent();
+ if (auto It = TrackedRetVals.find(F); It != TrackedRetVals.end()) {
+ It->second = ValueLatticeElement();
+ V = F;
+ } else if (MRVFunctionsTracked.count(F)) {
+ auto *STy = cast<StructType>(F->getReturnType());
+ for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I)
+ TrackedMultipleRetVals[{F, I}] = ValueLatticeElement();
+ V = F;
+ }
+ } else if (auto *STy = dyn_cast<StructType>(Inst->getType())) {
+ for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) {
+ if (auto It = StructValueState.find({Inst, I});
+ It != StructValueState.end()) {
+ It->second = ValueLatticeElement();
+ V = Inst;
+ }
+ }
+ } else if (auto It = ValueState.find(Inst); It != ValueState.end()) {
+ It->second = ValueLatticeElement();
+ V = Inst;
+ }
+
+ if (V) {
+ LLVM_DEBUG(dbgs() << "Invalidated lattice for " << *V << "\n");
+
+ for (User *U : V->users())
+ if (auto *UI = dyn_cast<Instruction>(U))
+ ToInvalidate.push_back(UI);
+
+ auto It = AdditionalUsers.find(V);
+ if (It != AdditionalUsers.end())
+ for (User *U : It->second)
+ if (auto *UI = dyn_cast<Instruction>(U))
+ ToInvalidate.push_back(UI);
+ }
+ }
+ }
+
/// markEdgeExecutable - Mark a basic block as executable, adding it to the BB
/// work list if it is not already executable.
bool markEdgeExecutable(BasicBlock *Source, BasicBlock *Dest);
@@ -520,6 +614,7 @@ private:
void visitCastInst(CastInst &I);
void visitSelectInst(SelectInst &I);
void visitUnaryOperator(Instruction &I);
+ void visitFreezeInst(FreezeInst &I);
void visitBinaryOperator(Instruction &I);
void visitCmpInst(CmpInst &I);
void visitExtractValueInst(ExtractValueInst &EVI);
@@ -557,8 +652,8 @@ private:
void visitInstruction(Instruction &I);
public:
- void addAnalysis(Function &F, AnalysisResultsForFn A) {
- AnalysisResults.insert({&F, std::move(A)});
+ void addPredicateInfo(Function &F, DominatorTree &DT, AssumptionCache &AC) {
+ FnPredicateInfo.insert({&F, std::make_unique<PredicateInfo>(F, DT, AC)});
}
void visitCallInst(CallInst &I) { visitCallBase(I); }
@@ -566,23 +661,10 @@ public:
bool markBlockExecutable(BasicBlock *BB);
const PredicateBase *getPredicateInfoFor(Instruction *I) {
- auto A = AnalysisResults.find(I->getParent()->getParent());
- if (A == AnalysisResults.end())
+ auto It = FnPredicateInfo.find(I->getParent()->getParent());
+ if (It == FnPredicateInfo.end())
return nullptr;
- return A->second.PredInfo->getPredicateInfoFor(I);
- }
-
- const LoopInfo &getLoopInfo(Function &F) {
- auto A = AnalysisResults.find(&F);
- assert(A != AnalysisResults.end() && A->second.LI &&
- "Need LoopInfo analysis results for function.");
- return *A->second.LI;
- }
-
- DomTreeUpdater getDTU(Function &F) {
- auto A = AnalysisResults.find(&F);
- assert(A != AnalysisResults.end() && "Need analysis results for function.");
- return {A->second.DT, A->second.PDT, DomTreeUpdater::UpdateStrategy::Lazy};
+ return It->second->getPredicateInfoFor(I);
}
SCCPInstVisitor(const DataLayout &DL,
@@ -627,6 +709,8 @@ public:
void solve();
+ bool resolvedUndef(Instruction &I);
+
bool resolvedUndefsIn(Function &F);
bool isBlockExecutable(BasicBlock *BB) const {
@@ -649,6 +733,19 @@ public:
void removeLatticeValueFor(Value *V) { ValueState.erase(V); }
+ /// Invalidate the Lattice Value of \p Call and its users after specializing
+ /// the call. Then recompute it.
+ void resetLatticeValueFor(CallBase *Call) {
+ // Calls to void returning functions do not need invalidation.
+ Function *F = Call->getCalledFunction();
+ (void)F;
+ assert(!F->getReturnType()->isVoidTy() &&
+ (TrackedRetVals.count(F) || MRVFunctionsTracked.count(F)) &&
+ "All non void specializations should be tracked");
+ invalidate(Call);
+ handleCallResult(*Call);
+ }
+
const ValueLatticeElement &getLatticeValueFor(Value *V) const {
assert(!V->getType()->isStructTy() &&
"Should use getStructLatticeValueFor");
@@ -681,15 +778,16 @@ public:
bool isStructLatticeConstant(Function *F, StructType *STy);
- Constant *getConstant(const ValueLatticeElement &LV) const;
- ConstantRange getConstantRange(const ValueLatticeElement &LV, Type *Ty) const;
+ Constant *getConstant(const ValueLatticeElement &LV, Type *Ty) const;
+
+ Constant *getConstantOrNull(Value *V) const;
SmallPtrSetImpl<Function *> &getArgumentTrackedFunctions() {
return TrackingIncomingArguments;
}
- void markArgInFuncSpecialization(Function *F,
- const SmallVectorImpl<ArgInfo> &Args);
+ void setLatticeValueForSpecializationArguments(Function *F,
+ const SmallVectorImpl<ArgInfo> &Args);
void markFunctionUnreachable(Function *F) {
for (auto &BB : *F)
@@ -715,6 +813,18 @@ public:
ResolvedUndefs |= resolvedUndefsIn(*F);
}
}
+
+ void solveWhileResolvedUndefs() {
+ bool ResolvedUndefs = true;
+ while (ResolvedUndefs) {
+ solve();
+ ResolvedUndefs = false;
+ for (Value *V : Invalidated)
+ if (auto *I = dyn_cast<Instruction>(V))
+ ResolvedUndefs |= resolvedUndef(*I);
+ }
+ Invalidated.clear();
+ }
};
} // namespace llvm
@@ -728,9 +838,13 @@ bool SCCPInstVisitor::markBlockExecutable(BasicBlock *BB) {
}
void SCCPInstVisitor::pushToWorkList(ValueLatticeElement &IV, Value *V) {
- if (IV.isOverdefined())
- return OverdefinedInstWorkList.push_back(V);
- InstWorkList.push_back(V);
+ if (IV.isOverdefined()) {
+ if (OverdefinedInstWorkList.empty() || OverdefinedInstWorkList.back() != V)
+ OverdefinedInstWorkList.push_back(V);
+ return;
+ }
+ if (InstWorkList.empty() || InstWorkList.back() != V)
+ InstWorkList.push_back(V);
}
void SCCPInstVisitor::pushToWorkListMsg(ValueLatticeElement &IV, Value *V) {
@@ -771,57 +885,84 @@ bool SCCPInstVisitor::isStructLatticeConstant(Function *F, StructType *STy) {
return true;
}
-Constant *SCCPInstVisitor::getConstant(const ValueLatticeElement &LV) const {
- if (LV.isConstant())
- return LV.getConstant();
+Constant *SCCPInstVisitor::getConstant(const ValueLatticeElement &LV,
+ Type *Ty) const {
+ if (LV.isConstant()) {
+ Constant *C = LV.getConstant();
+ assert(C->getType() == Ty && "Type mismatch");
+ return C;
+ }
if (LV.isConstantRange()) {
const auto &CR = LV.getConstantRange();
if (CR.getSingleElement())
- return ConstantInt::get(Ctx, *CR.getSingleElement());
+ return ConstantInt::get(Ty, *CR.getSingleElement());
}
return nullptr;
}
-ConstantRange
-SCCPInstVisitor::getConstantRange(const ValueLatticeElement &LV,
- Type *Ty) const {
- assert(Ty->isIntOrIntVectorTy() && "Should be int or int vector");
- if (LV.isConstantRange())
- return LV.getConstantRange();
- return ConstantRange::getFull(Ty->getScalarSizeInBits());
+Constant *SCCPInstVisitor::getConstantOrNull(Value *V) const {
+ Constant *Const = nullptr;
+ if (V->getType()->isStructTy()) {
+ std::vector<ValueLatticeElement> LVs = getStructLatticeValueFor(V);
+ if (any_of(LVs, SCCPSolver::isOverdefined))
+ return nullptr;
+ std::vector<Constant *> ConstVals;
+ auto *ST = cast<StructType>(V->getType());
+ for (unsigned I = 0, E = ST->getNumElements(); I != E; ++I) {
+ ValueLatticeElement LV = LVs[I];
+ ConstVals.push_back(SCCPSolver::isConstant(LV)
+ ? getConstant(LV, ST->getElementType(I))
+ : UndefValue::get(ST->getElementType(I)));
+ }
+ Const = ConstantStruct::get(ST, ConstVals);
+ } else {
+ const ValueLatticeElement &LV = getLatticeValueFor(V);
+ if (SCCPSolver::isOverdefined(LV))
+ return nullptr;
+ Const = SCCPSolver::isConstant(LV) ? getConstant(LV, V->getType())
+ : UndefValue::get(V->getType());
+ }
+ assert(Const && "Constant is nullptr here!");
+ return Const;
}
-void SCCPInstVisitor::markArgInFuncSpecialization(
- Function *F, const SmallVectorImpl<ArgInfo> &Args) {
+void SCCPInstVisitor::setLatticeValueForSpecializationArguments(Function *F,
+ const SmallVectorImpl<ArgInfo> &Args) {
assert(!Args.empty() && "Specialization without arguments");
assert(F->arg_size() == Args[0].Formal->getParent()->arg_size() &&
"Functions should have the same number of arguments");
auto Iter = Args.begin();
- Argument *NewArg = F->arg_begin();
- Argument *OldArg = Args[0].Formal->getParent()->arg_begin();
+ Function::arg_iterator NewArg = F->arg_begin();
+ Function::arg_iterator OldArg = Args[0].Formal->getParent()->arg_begin();
for (auto End = F->arg_end(); NewArg != End; ++NewArg, ++OldArg) {
LLVM_DEBUG(dbgs() << "SCCP: Marking argument "
<< NewArg->getNameOrAsOperand() << "\n");
- if (Iter != Args.end() && OldArg == Iter->Formal) {
- // Mark the argument constants in the new function.
- markConstant(NewArg, Iter->Actual);
+ // Mark the argument constants in the new function
+ // or copy the lattice state over from the old function.
+ if (Iter != Args.end() && Iter->Formal == &*OldArg) {
+ if (auto *STy = dyn_cast<StructType>(NewArg->getType())) {
+ for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) {
+ ValueLatticeElement &NewValue = StructValueState[{&*NewArg, I}];
+ NewValue.markConstant(Iter->Actual->getAggregateElement(I));
+ }
+ } else {
+ ValueState[&*NewArg].markConstant(Iter->Actual);
+ }
++Iter;
- } else if (ValueState.count(OldArg)) {
- // For the remaining arguments in the new function, copy the lattice state
- // over from the old function.
- //
- // Note: This previously looked like this:
- // ValueState[NewArg] = ValueState[OldArg];
- // This is incorrect because the DenseMap class may resize the underlying
- // memory when inserting `NewArg`, which will invalidate the reference to
- // `OldArg`. Instead, we make sure `NewArg` exists before setting it.
- auto &NewValue = ValueState[NewArg];
- NewValue = ValueState[OldArg];
- pushToWorkList(NewValue, NewArg);
+ } else {
+ if (auto *STy = dyn_cast<StructType>(NewArg->getType())) {
+ for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I) {
+ ValueLatticeElement &NewValue = StructValueState[{&*NewArg, I}];
+ NewValue = StructValueState[{&*OldArg, I}];
+ }
+ } else {
+ ValueLatticeElement &NewValue = ValueState[&*NewArg];
+ NewValue = ValueState[&*OldArg];
+ }
}
}
}
@@ -874,7 +1015,7 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
}
ValueLatticeElement BCValue = getValueState(BI->getCondition());
- ConstantInt *CI = getConstantInt(BCValue);
+ ConstantInt *CI = getConstantInt(BCValue, BI->getCondition()->getType());
if (!CI) {
// Overdefined condition variables, and branches on unfoldable constant
// conditions, mean the branch could go either way.
@@ -900,7 +1041,8 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
return;
}
const ValueLatticeElement &SCValue = getValueState(SI->getCondition());
- if (ConstantInt *CI = getConstantInt(SCValue)) {
+ if (ConstantInt *CI =
+ getConstantInt(SCValue, SI->getCondition()->getType())) {
Succs[SI->findCaseValue(CI)->getSuccessorIndex()] = true;
return;
}
@@ -931,7 +1073,8 @@ void SCCPInstVisitor::getFeasibleSuccessors(Instruction &TI,
if (auto *IBR = dyn_cast<IndirectBrInst>(&TI)) {
// Casts are folded by visitCastInst.
ValueLatticeElement IBRValue = getValueState(IBR->getAddress());
- BlockAddress *Addr = dyn_cast_or_null<BlockAddress>(getConstant(IBRValue));
+ BlockAddress *Addr = dyn_cast_or_null<BlockAddress>(
+ getConstant(IBRValue, IBR->getAddress()->getType()));
if (!Addr) { // Overdefined or unknown condition?
// All destinations are executable!
if (!IBRValue.isUnknownOrUndef())
@@ -1086,7 +1229,7 @@ void SCCPInstVisitor::visitCastInst(CastInst &I) {
if (OpSt.isUnknownOrUndef())
return;
- if (Constant *OpC = getConstant(OpSt)) {
+ if (Constant *OpC = getConstant(OpSt, I.getOperand(0)->getType())) {
// Fold the constant as we build.
Constant *C = ConstantFoldCastOperand(I.getOpcode(), OpC, I.getType(), DL);
markConstant(&I, C);
@@ -1221,7 +1364,8 @@ void SCCPInstVisitor::visitSelectInst(SelectInst &I) {
if (CondValue.isUnknownOrUndef())
return;
- if (ConstantInt *CondCB = getConstantInt(CondValue)) {
+ if (ConstantInt *CondCB =
+ getConstantInt(CondValue, I.getCondition()->getType())) {
Value *OpVal = CondCB->isZero() ? I.getFalseValue() : I.getTrueValue();
mergeInValue(&I, getValueState(OpVal));
return;
@@ -1254,13 +1398,37 @@ void SCCPInstVisitor::visitUnaryOperator(Instruction &I) {
return;
if (SCCPSolver::isConstant(V0State))
- if (Constant *C = ConstantFoldUnaryOpOperand(I.getOpcode(),
- getConstant(V0State), DL))
+ if (Constant *C = ConstantFoldUnaryOpOperand(
+ I.getOpcode(), getConstant(V0State, I.getType()), DL))
return (void)markConstant(IV, &I, C);
markOverdefined(&I);
}
+void SCCPInstVisitor::visitFreezeInst(FreezeInst &I) {
+ // If this freeze returns a struct, just mark the result overdefined.
+ // TODO: We could do a lot better than this.
+ if (I.getType()->isStructTy())
+ return (void)markOverdefined(&I);
+
+ ValueLatticeElement V0State = getValueState(I.getOperand(0));
+ ValueLatticeElement &IV = ValueState[&I];
+ // resolvedUndefsIn might mark I as overdefined. Bail out, even if we would
+ // discover a concrete value later.
+ if (SCCPSolver::isOverdefined(IV))
+ return (void)markOverdefined(&I);
+
+ // If something is unknown/undef, wait for it to resolve.
+ if (V0State.isUnknownOrUndef())
+ return;
+
+ if (SCCPSolver::isConstant(V0State) &&
+ isGuaranteedNotToBeUndefOrPoison(getConstant(V0State, I.getType())))
+ return (void)markConstant(IV, &I, getConstant(V0State, I.getType()));
+
+ markOverdefined(&I);
+}
+
// Handle Binary Operators.
void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
ValueLatticeElement V1State = getValueState(I.getOperand(0));
@@ -1280,10 +1448,12 @@ void SCCPInstVisitor::visitBinaryOperator(Instruction &I) {
// If either of the operands is a constant, try to fold it to a constant.
// TODO: Use information from notconstant better.
if ((V1State.isConstant() || V2State.isConstant())) {
- Value *V1 = SCCPSolver::isConstant(V1State) ? getConstant(V1State)
- : I.getOperand(0);
- Value *V2 = SCCPSolver::isConstant(V2State) ? getConstant(V2State)
- : I.getOperand(1);
+ Value *V1 = SCCPSolver::isConstant(V1State)
+ ? getConstant(V1State, I.getOperand(0)->getType())
+ : I.getOperand(0);
+ Value *V2 = SCCPSolver::isConstant(V2State)
+ ? getConstant(V2State, I.getOperand(1)->getType())
+ : I.getOperand(1);
Value *R = simplifyBinOp(I.getOpcode(), V1, V2, SimplifyQuery(DL));
auto *C = dyn_cast_or_null<Constant>(R);
if (C) {
@@ -1361,7 +1531,7 @@ void SCCPInstVisitor::visitGetElementPtrInst(GetElementPtrInst &I) {
if (SCCPSolver::isOverdefined(State))
return (void)markOverdefined(&I);
- if (Constant *C = getConstant(State)) {
+ if (Constant *C = getConstant(State, I.getOperand(i)->getType())) {
Operands.push_back(C);
continue;
}
@@ -1427,7 +1597,7 @@ void SCCPInstVisitor::visitLoadInst(LoadInst &I) {
ValueLatticeElement &IV = ValueState[&I];
if (SCCPSolver::isConstant(PtrVal)) {
- Constant *Ptr = getConstant(PtrVal);
+ Constant *Ptr = getConstant(PtrVal, I.getOperand(0)->getType());
// load null is undefined.
if (isa<ConstantPointerNull>(Ptr)) {
@@ -1490,7 +1660,7 @@ void SCCPInstVisitor::handleCallOverdefined(CallBase &CB) {
if (SCCPSolver::isOverdefined(State))
return (void)markOverdefined(&CB);
assert(SCCPSolver::isConstant(State) && "Unknown state!");
- Operands.push_back(getConstant(State));
+ Operands.push_back(getConstant(State, A->getType()));
}
if (SCCPSolver::isOverdefined(getValueState(&CB)))
@@ -1622,6 +1792,8 @@ void SCCPInstVisitor::handleCallResult(CallBase &CB) {
SmallVector<ConstantRange, 2> OpRanges;
for (Value *Op : II->args()) {
const ValueLatticeElement &State = getValueState(Op);
+ if (State.isUnknownOrUndef())
+ return;
OpRanges.push_back(getConstantRange(State, Op->getType()));
}
@@ -1666,6 +1838,7 @@ void SCCPInstVisitor::solve() {
// things to overdefined more quickly.
while (!OverdefinedInstWorkList.empty()) {
Value *I = OverdefinedInstWorkList.pop_back_val();
+ Invalidated.erase(I);
LLVM_DEBUG(dbgs() << "\nPopped off OI-WL: " << *I << '\n');
@@ -1682,6 +1855,7 @@ void SCCPInstVisitor::solve() {
// Process the instruction work list.
while (!InstWorkList.empty()) {
Value *I = InstWorkList.pop_back_val();
+ Invalidated.erase(I);
LLVM_DEBUG(dbgs() << "\nPopped off I-WL: " << *I << '\n');
@@ -1709,6 +1883,61 @@ void SCCPInstVisitor::solve() {
}
}
+bool SCCPInstVisitor::resolvedUndef(Instruction &I) {
+ // Look for instructions which produce undef values.
+ if (I.getType()->isVoidTy())
+ return false;
+
+ if (auto *STy = dyn_cast<StructType>(I.getType())) {
+ // Only a few things that can be structs matter for undef.
+
+ // Tracked calls must never be marked overdefined in resolvedUndefsIn.
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (Function *F = CB->getCalledFunction())
+ if (MRVFunctionsTracked.count(F))
+ return false;
+
+ // extractvalue and insertvalue don't need to be marked; they are
+ // tracked as precisely as their operands.
+ if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I))
+ return false;
+ // Send the results of everything else to overdefined. We could be
+ // more precise than this but it isn't worth bothering.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ ValueLatticeElement &LV = getStructValueState(&I, i);
+ if (LV.isUnknown()) {
+ markOverdefined(LV, &I);
+ return true;
+ }
+ }
+ return false;
+ }
+
+ ValueLatticeElement &LV = getValueState(&I);
+ if (!LV.isUnknown())
+ return false;
+
+ // There are two reasons a call can have an undef result
+ // 1. It could be tracked.
+ // 2. It could be constant-foldable.
+ // Because of the way we solve return values, tracked calls must
+ // never be marked overdefined in resolvedUndefsIn.
+ if (auto *CB = dyn_cast<CallBase>(&I))
+ if (Function *F = CB->getCalledFunction())
+ if (TrackedRetVals.count(F))
+ return false;
+
+ if (isa<LoadInst>(I)) {
+ // A load here means one of two things: a load of undef from a global,
+ // a load from an unknown pointer. Either way, having it return undef
+ // is okay.
+ return false;
+ }
+
+ markOverdefined(&I);
+ return true;
+}
+
/// While solving the dataflow for a function, we don't compute a result for
/// operations with an undef operand, to allow undef to be lowered to a
/// constant later. For example, constant folding of "zext i8 undef to i16"
@@ -1728,60 +1957,8 @@ bool SCCPInstVisitor::resolvedUndefsIn(Function &F) {
if (!BBExecutable.count(&BB))
continue;
- for (Instruction &I : BB) {
- // Look for instructions which produce undef values.
- if (I.getType()->isVoidTy())
- continue;
-
- if (auto *STy = dyn_cast<StructType>(I.getType())) {
- // Only a few things that can be structs matter for undef.
-
- // Tracked calls must never be marked overdefined in resolvedUndefsIn.
- if (auto *CB = dyn_cast<CallBase>(&I))
- if (Function *F = CB->getCalledFunction())
- if (MRVFunctionsTracked.count(F))
- continue;
-
- // extractvalue and insertvalue don't need to be marked; they are
- // tracked as precisely as their operands.
- if (isa<ExtractValueInst>(I) || isa<InsertValueInst>(I))
- continue;
- // Send the results of everything else to overdefined. We could be
- // more precise than this but it isn't worth bothering.
- for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
- ValueLatticeElement &LV = getStructValueState(&I, i);
- if (LV.isUnknown()) {
- markOverdefined(LV, &I);
- MadeChange = true;
- }
- }
- continue;
- }
-
- ValueLatticeElement &LV = getValueState(&I);
- if (!LV.isUnknown())
- continue;
-
- // There are two reasons a call can have an undef result
- // 1. It could be tracked.
- // 2. It could be constant-foldable.
- // Because of the way we solve return values, tracked calls must
- // never be marked overdefined in resolvedUndefsIn.
- if (auto *CB = dyn_cast<CallBase>(&I))
- if (Function *F = CB->getCalledFunction())
- if (TrackedRetVals.count(F))
- continue;
-
- if (isa<LoadInst>(I)) {
- // A load here means one of two things: a load of undef from a global,
- // a load from an unknown pointer. Either way, having it return undef
- // is okay.
- continue;
- }
-
- markOverdefined(&I);
- MadeChange = true;
- }
+ for (Instruction &I : BB)
+ MadeChange |= resolvedUndef(I);
}
LLVM_DEBUG(if (MadeChange) dbgs()
@@ -1802,8 +1979,9 @@ SCCPSolver::SCCPSolver(
SCCPSolver::~SCCPSolver() = default;
-void SCCPSolver::addAnalysis(Function &F, AnalysisResultsForFn A) {
- return Visitor->addAnalysis(F, std::move(A));
+void SCCPSolver::addPredicateInfo(Function &F, DominatorTree &DT,
+ AssumptionCache &AC) {
+ Visitor->addPredicateInfo(F, DT, AC);
}
bool SCCPSolver::markBlockExecutable(BasicBlock *BB) {
@@ -1814,12 +1992,6 @@ const PredicateBase *SCCPSolver::getPredicateInfoFor(Instruction *I) {
return Visitor->getPredicateInfoFor(I);
}
-const LoopInfo &SCCPSolver::getLoopInfo(Function &F) {
- return Visitor->getLoopInfo(F);
-}
-
-DomTreeUpdater SCCPSolver::getDTU(Function &F) { return Visitor->getDTU(F); }
-
void SCCPSolver::trackValueOfGlobalVariable(GlobalVariable *GV) {
Visitor->trackValueOfGlobalVariable(GV);
}
@@ -1859,6 +2031,10 @@ SCCPSolver::solveWhileResolvedUndefsIn(SmallVectorImpl<Function *> &WorkList) {
Visitor->solveWhileResolvedUndefsIn(WorkList);
}
+void SCCPSolver::solveWhileResolvedUndefs() {
+ Visitor->solveWhileResolvedUndefs();
+}
+
bool SCCPSolver::isBlockExecutable(BasicBlock *BB) const {
return Visitor->isBlockExecutable(BB);
}
@@ -1876,6 +2052,10 @@ void SCCPSolver::removeLatticeValueFor(Value *V) {
return Visitor->removeLatticeValueFor(V);
}
+void SCCPSolver::resetLatticeValueFor(CallBase *Call) {
+ Visitor->resetLatticeValueFor(Call);
+}
+
const ValueLatticeElement &SCCPSolver::getLatticeValueFor(Value *V) const {
return Visitor->getLatticeValueFor(V);
}
@@ -1900,17 +2080,22 @@ bool SCCPSolver::isStructLatticeConstant(Function *F, StructType *STy) {
return Visitor->isStructLatticeConstant(F, STy);
}
-Constant *SCCPSolver::getConstant(const ValueLatticeElement &LV) const {
- return Visitor->getConstant(LV);
+Constant *SCCPSolver::getConstant(const ValueLatticeElement &LV,
+ Type *Ty) const {
+ return Visitor->getConstant(LV, Ty);
+}
+
+Constant *SCCPSolver::getConstantOrNull(Value *V) const {
+ return Visitor->getConstantOrNull(V);
}
SmallPtrSetImpl<Function *> &SCCPSolver::getArgumentTrackedFunctions() {
return Visitor->getArgumentTrackedFunctions();
}
-void SCCPSolver::markArgInFuncSpecialization(
- Function *F, const SmallVectorImpl<ArgInfo> &Args) {
- Visitor->markArgInFuncSpecialization(F, Args);
+void SCCPSolver::setLatticeValueForSpecializationArguments(Function *F,
+ const SmallVectorImpl<ArgInfo> &Args) {
+ Visitor->setLatticeValueForSpecializationArguments(F, Args);
}
void SCCPSolver::markFunctionUnreachable(Function *F) {
diff --git a/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 2520aa5d9db0..ebe9cb27f5ab 100644
--- a/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -19,6 +19,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -195,6 +196,33 @@ void SSAUpdater::RewriteUse(Use &U) {
U.set(V);
}
+void SSAUpdater::UpdateDebugValues(Instruction *I) {
+ SmallVector<DbgValueInst *, 4> DbgValues;
+ llvm::findDbgValues(DbgValues, I);
+ for (auto &DbgValue : DbgValues) {
+ if (DbgValue->getParent() == I->getParent())
+ continue;
+ UpdateDebugValue(I, DbgValue);
+ }
+}
+
+void SSAUpdater::UpdateDebugValues(Instruction *I,
+ SmallVectorImpl<DbgValueInst *> &DbgValues) {
+ for (auto &DbgValue : DbgValues) {
+ UpdateDebugValue(I, DbgValue);
+ }
+}
+
+void SSAUpdater::UpdateDebugValue(Instruction *I, DbgValueInst *DbgValue) {
+ BasicBlock *UserBB = DbgValue->getParent();
+ if (HasValueForBlock(UserBB)) {
+ Value *NewVal = GetValueAtEndOfBlock(UserBB);
+ DbgValue->replaceVariableLocationOp(I, NewVal);
+ }
+ else
+ DbgValue->setKillLocation();
+}
+
void SSAUpdater::RewriteUseAfterInsertions(Use &U) {
Instruction *User = cast<Instruction>(U.getUser());
diff --git a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
index 691ee00bd831..31d62fbf0618 100644
--- a/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
+++ b/llvm/lib/Transforms/Utils/SampleProfileInference.cpp
@@ -20,6 +20,7 @@
#include <queue>
#include <set>
#include <stack>
+#include <unordered_set>
using namespace llvm;
#define DEBUG_TYPE "sample-profile-inference"
@@ -1218,10 +1219,23 @@ void extractWeights(const ProfiParams &Params, MinCostMaxFlow &Network,
#ifndef NDEBUG
/// Verify that the provided block/jump weights are as expected.
void verifyInput(const FlowFunction &Func) {
- // Verify the entry block
+ // Verify entry and exit blocks
assert(Func.Entry == 0 && Func.Blocks[0].isEntry());
+ size_t NumExitBlocks = 0;
for (size_t I = 1; I < Func.Blocks.size(); I++) {
assert(!Func.Blocks[I].isEntry() && "multiple entry blocks");
+ if (Func.Blocks[I].isExit())
+ NumExitBlocks++;
+ }
+ assert(NumExitBlocks > 0 && "cannot find exit blocks");
+
+ // Verify that there are no parallel edges
+ for (auto &Block : Func.Blocks) {
+ std::unordered_set<uint64_t> UniqueSuccs;
+ for (auto &Jump : Block.SuccJumps) {
+ auto It = UniqueSuccs.insert(Jump->Target);
+ assert(It.second && "input CFG contains parallel edges");
+ }
}
// Verify CFG jumps
for (auto &Block : Func.Blocks) {
@@ -1304,8 +1318,26 @@ void verifyOutput(const FlowFunction &Func) {
} // end of anonymous namespace
-/// Apply the profile inference algorithm for a given function
+/// Apply the profile inference algorithm for a given function and provided
+/// profi options
void llvm::applyFlowInference(const ProfiParams &Params, FlowFunction &Func) {
+ // Check if the function has samples and assign initial flow values
+ bool HasSamples = false;
+ for (FlowBlock &Block : Func.Blocks) {
+ if (Block.Weight > 0)
+ HasSamples = true;
+ Block.Flow = Block.Weight;
+ }
+ for (FlowJump &Jump : Func.Jumps) {
+ if (Jump.Weight > 0)
+ HasSamples = true;
+ Jump.Flow = Jump.Weight;
+ }
+
+ // Quit early for functions with a single block or ones w/o samples
+ if (Func.Blocks.size() <= 1 || !HasSamples)
+ return;
+
#ifndef NDEBUG
// Verify the input data
verifyInput(Func);
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 24f1966edd37..20844271b943 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -163,7 +163,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
"InsertNoopCastOfTo cannot change sizes!");
// inttoptr only works for integral pointers. For non-integral pointers, we
- // can create a GEP on i8* null with the integral value as index. Note that
+ // can create a GEP on null with the integral value as index. Note that
// it is safe to use GEP of null instead of inttoptr here, because only
// expressions already based on a GEP of null should be converted to pointers
// during expansion.
@@ -173,9 +173,8 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace());
assert(DL.getTypeAllocSize(Builder.getInt8Ty()) == 1 &&
"alloc size of i8 must by 1 byte for the GEP to be correct");
- auto *GEP = Builder.CreateGEP(
- Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep");
- return Builder.CreateBitCast(GEP, Ty);
+ return Builder.CreateGEP(
+ Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "scevgep");
}
}
// Short-circuit unnecessary bitcasts.
@@ -287,142 +286,6 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
return BO;
}
-/// FactorOutConstant - Test if S is divisible by Factor, using signed
-/// division. If so, update S with Factor divided out and return true.
-/// S need not be evenly divisible if a reasonable remainder can be
-/// computed.
-static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,
- const SCEV *Factor, ScalarEvolution &SE,
- const DataLayout &DL) {
- // Everything is divisible by one.
- if (Factor->isOne())
- return true;
-
- // x/x == 1.
- if (S == Factor) {
- S = SE.getConstant(S->getType(), 1);
- return true;
- }
-
- // For a Constant, check for a multiple of the given factor.
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
- // 0/x == 0.
- if (C->isZero())
- return true;
- // Check for divisibility.
- if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
- ConstantInt *CI =
- ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt()));
- // If the quotient is zero and the remainder is non-zero, reject
- // the value at this scale. It will be considered for subsequent
- // smaller scales.
- if (!CI->isZero()) {
- const SCEV *Div = SE.getConstant(CI);
- S = Div;
- Remainder = SE.getAddExpr(
- Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt())));
- return true;
- }
- }
- }
-
- // In a Mul, check if there is a constant operand which is a multiple
- // of the given factor.
- if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
- // Size is known, check if there is a constant operand which is a multiple
- // of the given factor. If so, we can factor it.
- if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor))
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
- if (!C->getAPInt().srem(FC->getAPInt())) {
- SmallVector<const SCEV *, 4> NewMulOps(M->operands());
- NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt()));
- S = SE.getMulExpr(NewMulOps);
- return true;
- }
- }
-
- // In an AddRec, check if both start and step are divisible.
- if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
- const SCEV *Step = A->getStepRecurrence(SE);
- const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
- if (!FactorOutConstant(Step, StepRem, Factor, SE, DL))
- return false;
- if (!StepRem->isZero())
- return false;
- const SCEV *Start = A->getStart();
- if (!FactorOutConstant(Start, Remainder, Factor, SE, DL))
- return false;
- S = SE.getAddRecExpr(Start, Step, A->getLoop(),
- A->getNoWrapFlags(SCEV::FlagNW));
- return true;
- }
-
- return false;
-}
-
-/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
-/// is the number of SCEVAddRecExprs present, which are kept at the end of
-/// the list.
-///
-static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
- Type *Ty,
- ScalarEvolution &SE) {
- unsigned NumAddRecs = 0;
- for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
- ++NumAddRecs;
- // Group Ops into non-addrecs and addrecs.
- SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
- SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
- // Let ScalarEvolution sort and simplify the non-addrecs list.
- const SCEV *Sum = NoAddRecs.empty() ?
- SE.getConstant(Ty, 0) :
- SE.getAddExpr(NoAddRecs);
- // If it returned an add, use the operands. Otherwise it simplified
- // the sum into a single value, so just use that.
- Ops.clear();
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
- append_range(Ops, Add->operands());
- else if (!Sum->isZero())
- Ops.push_back(Sum);
- // Then append the addrecs.
- Ops.append(AddRecs.begin(), AddRecs.end());
-}
-
-/// SplitAddRecs - Flatten a list of add operands, moving addrec start values
-/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
-/// This helps expose more opportunities for folding parts of the expressions
-/// into GEP indices.
-///
-static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
- Type *Ty,
- ScalarEvolution &SE) {
- // Find the addrecs.
- SmallVector<const SCEV *, 8> AddRecs;
- for (unsigned i = 0, e = Ops.size(); i != e; ++i)
- while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
- const SCEV *Start = A->getStart();
- if (Start->isZero()) break;
- const SCEV *Zero = SE.getConstant(Ty, 0);
- AddRecs.push_back(SE.getAddRecExpr(Zero,
- A->getStepRecurrence(SE),
- A->getLoop(),
- A->getNoWrapFlags(SCEV::FlagNW)));
- if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
- Ops[i] = Zero;
- append_range(Ops, Add->operands());
- e += Add->getNumOperands();
- } else {
- Ops[i] = Start;
- }
- }
- if (!AddRecs.empty()) {
- // Add the addrecs onto the end of the list.
- Ops.append(AddRecs.begin(), AddRecs.end());
- // Resort the operand list, moving any constants to the front.
- SimplifyAddOperands(Ops, Ty, SE);
- }
-}
-
/// expandAddToGEP - Expand an addition expression with a pointer type into
/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
/// BasicAliasAnalysis and other passes analyze the result. See the rules
@@ -450,210 +313,53 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
/// loop-invariant portions of expressions, after considering what
/// can be folded using target addressing modes.
///
-Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
- const SCEV *const *op_end,
- PointerType *PTy,
- Type *Ty,
- Value *V) {
- SmallVector<Value *, 4> GepIndices;
- SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
- bool AnyNonZeroIndices = false;
-
- // Split AddRecs up into parts as either of the parts may be usable
- // without the other.
- SplitAddRecs(Ops, Ty, SE);
-
- Type *IntIdxTy = DL.getIndexType(PTy);
-
- // For opaque pointers, always generate i8 GEP.
- if (!PTy->isOpaque()) {
- // Descend down the pointer's type and attempt to convert the other
- // operands into GEP indices, at each level. The first index in a GEP
- // indexes into the array implied by the pointer operand; the rest of
- // the indices index into the element or field type selected by the
- // preceding index.
- Type *ElTy = PTy->getNonOpaquePointerElementType();
- for (;;) {
- // If the scale size is not 0, attempt to factor out a scale for
- // array indexing.
- SmallVector<const SCEV *, 8> ScaledOps;
- if (ElTy->isSized()) {
- const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy);
- if (!ElSize->isZero()) {
- SmallVector<const SCEV *, 8> NewOps;
- for (const SCEV *Op : Ops) {
- const SCEV *Remainder = SE.getConstant(Ty, 0);
- if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {
- // Op now has ElSize factored out.
- ScaledOps.push_back(Op);
- if (!Remainder->isZero())
- NewOps.push_back(Remainder);
- AnyNonZeroIndices = true;
- } else {
- // The operand was not divisible, so add it to the list of
- // operands we'll scan next iteration.
- NewOps.push_back(Op);
- }
- }
- // If we made any changes, update Ops.
- if (!ScaledOps.empty()) {
- Ops = NewOps;
- SimplifyAddOperands(Ops, Ty, SE);
- }
- }
- }
+Value *SCEVExpander::expandAddToGEP(const SCEV *Offset, Type *Ty, Value *V) {
+ assert(!isa<Instruction>(V) ||
+ SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
- // Record the scaled array index for this level of the type. If
- // we didn't find any operands that could be factored, tentatively
- // assume that element zero was selected (since the zero offset
- // would obviously be folded away).
- Value *Scaled =
- ScaledOps.empty()
- ? Constant::getNullValue(Ty)
- : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty);
- GepIndices.push_back(Scaled);
-
- // Collect struct field index operands.
- while (StructType *STy = dyn_cast<StructType>(ElTy)) {
- bool FoundFieldNo = false;
- // An empty struct has no fields.
- if (STy->getNumElements() == 0) break;
- // Field offsets are known. See if a constant offset falls within any of
- // the struct fields.
- if (Ops.empty())
- break;
- if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
- if (SE.getTypeSizeInBits(C->getType()) <= 64) {
- const StructLayout &SL = *DL.getStructLayout(STy);
- uint64_t FullOffset = C->getValue()->getZExtValue();
- if (FullOffset < SL.getSizeInBytes()) {
- unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
- GepIndices.push_back(
- ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
- ElTy = STy->getTypeAtIndex(ElIdx);
- Ops[0] =
- SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
- AnyNonZeroIndices = true;
- FoundFieldNo = true;
- }
- }
- // If no struct field offsets were found, tentatively assume that
- // field zero was selected (since the zero offset would obviously
- // be folded away).
- if (!FoundFieldNo) {
- ElTy = STy->getTypeAtIndex(0u);
- GepIndices.push_back(
- Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
- }
- }
+ Value *Idx = expandCodeForImpl(Offset, Ty);
- if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
- ElTy = ATy->getElementType();
- else
- // FIXME: Handle VectorType.
- // E.g., If ElTy is scalable vector, then ElSize is not a compile-time
- // constant, therefore can not be factored out. The generated IR is less
- // ideal with base 'V' cast to i8* and do ugly getelementptr over that.
- break;
- }
- }
-
- // If none of the operands were convertible to proper GEP indices, cast
- // the base to i8* and do an ugly getelementptr with that. It's still
- // better than ptrtoint+arithmetic+inttoptr at least.
- if (!AnyNonZeroIndices) {
- // Cast the base to i8*.
- if (!PTy->isOpaque())
- V = InsertNoopCastOfTo(V,
- Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
-
- assert(!isa<Instruction>(V) ||
- SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));
-
- // Expand the operands for a plain byte offset.
- Value *Idx = expandCodeForImpl(SE.getAddExpr(Ops), Ty);
-
- // Fold a GEP with constant operands.
- if (Constant *CLHS = dyn_cast<Constant>(V))
- if (Constant *CRHS = dyn_cast<Constant>(Idx))
- return Builder.CreateGEP(Builder.getInt8Ty(), CLHS, CRHS);
-
- // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
- unsigned ScanLimit = 6;
- BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
- // Scanning starts from the last instruction before the insertion point.
- BasicBlock::iterator IP = Builder.GetInsertPoint();
- if (IP != BlockBegin) {
- --IP;
- for (; ScanLimit; --IP, --ScanLimit) {
- // Don't count dbg.value against the ScanLimit, to avoid perturbing the
- // generated code.
- if (isa<DbgInfoIntrinsic>(IP))
- ScanLimit++;
- if (IP->getOpcode() == Instruction::GetElementPtr &&
- IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
- cast<GEPOperator>(&*IP)->getSourceElementType() ==
- Type::getInt8Ty(Ty->getContext()))
- return &*IP;
- if (IP == BlockBegin) break;
- }
- }
+ // Fold a GEP with constant operands.
+ if (Constant *CLHS = dyn_cast<Constant>(V))
+ if (Constant *CRHS = dyn_cast<Constant>(Idx))
+ return Builder.CreateGEP(Builder.getInt8Ty(), CLHS, CRHS);
- // Save the original insertion point so we can restore it when we're done.
- SCEVInsertPointGuard Guard(Builder, this);
-
- // Move the insertion point out of as many loops as we can.
- while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
- if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) break;
-
- // Ok, move up a level.
- Builder.SetInsertPoint(Preheader->getTerminator());
+ // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
+ unsigned ScanLimit = 6;
+ BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
+ // Scanning starts from the last instruction before the insertion point.
+ BasicBlock::iterator IP = Builder.GetInsertPoint();
+ if (IP != BlockBegin) {
+ --IP;
+ for (; ScanLimit; --IP, --ScanLimit) {
+ // Don't count dbg.value against the ScanLimit, to avoid perturbing the
+ // generated code.
+ if (isa<DbgInfoIntrinsic>(IP))
+ ScanLimit++;
+ if (IP->getOpcode() == Instruction::GetElementPtr &&
+ IP->getOperand(0) == V && IP->getOperand(1) == Idx &&
+ cast<GEPOperator>(&*IP)->getSourceElementType() ==
+ Type::getInt8Ty(Ty->getContext()))
+ return &*IP;
+ if (IP == BlockBegin) break;
}
-
- // Emit a GEP.
- return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep");
}
- {
- SCEVInsertPointGuard Guard(Builder, this);
-
- // Move the insertion point out of as many loops as we can.
- while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
- if (!L->isLoopInvariant(V)) break;
-
- bool AnyIndexNotLoopInvariant = any_of(
- GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); });
-
- if (AnyIndexNotLoopInvariant)
- break;
+ // Save the original insertion point so we can restore it when we're done.
+ SCEVInsertPointGuard Guard(Builder, this);
- BasicBlock *Preheader = L->getLoopPreheader();
- if (!Preheader) break;
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
- // Ok, move up a level.
- Builder.SetInsertPoint(Preheader->getTerminator());
- }
-
- // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
- // because ScalarEvolution may have changed the address arithmetic to
- // compute a value which is beyond the end of the allocated object.
- Value *Casted = V;
- if (V->getType() != PTy)
- Casted = InsertNoopCastOfTo(Casted, PTy);
- Value *GEP = Builder.CreateGEP(PTy->getNonOpaquePointerElementType(),
- Casted, GepIndices, "scevgep");
- Ops.push_back(SE.getUnknown(GEP));
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader->getTerminator());
}
- return expand(SE.getAddExpr(Ops));
-}
-
-Value *SCEVExpander::expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty,
- Value *V) {
- const SCEV *const Ops[1] = {Op};
- return expandAddToGEP(Ops, Ops + 1, PTy, Ty, V);
+ // Emit a GEP.
+ return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "scevgep");
}
/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
@@ -680,6 +386,7 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
switch (S->getSCEVType()) {
case scConstant:
+ case scVScale:
return nullptr; // A constant has no relevant loops.
case scTruncate:
case scZeroExtend:
@@ -778,7 +485,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
}
assert(!Op->getType()->isPointerTy() && "Only first op can be pointer");
- if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+ if (isa<PointerType>(Sum->getType())) {
// The running sum expression is a pointer. Try to form a getelementptr
// at this level with that as the base.
SmallVector<const SCEV *, 4> NewOps;
@@ -791,7 +498,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
X = SE.getSCEV(U->getValue());
NewOps.push_back(X);
}
- Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
+ Sum = expandAddToGEP(SE.getAddExpr(NewOps), Ty, Sum);
} else if (Op->isNonConstantNegative()) {
// Instead of doing a negate and add, just do a subtract.
Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty);
@@ -995,15 +702,8 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
// allow any kind of GEP as long as it can be hoisted.
continue;
}
- // This must be a pointer addition of constants (pretty), which is already
- // handled, or some number of address-size elements (ugly). Ugly geps
- // have 2 operands. i1* is used by the expander to represent an
- // address-size element.
- if (IncV->getNumOperands() != 2)
- return nullptr;
- unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
- if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
- && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
+ // GEPs produced by SCEVExpander use i8 element type.
+ if (!cast<GEPOperator>(IncV)->getSourceElementType()->isIntegerTy(8))
return nullptr;
break;
}
@@ -1108,15 +808,7 @@ Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
Value *IncV;
// If the PHI is a pointer, use a GEP, otherwise use an add or sub.
if (ExpandTy->isPointerTy()) {
- PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
- // If the step isn't constant, don't use an implicitly scaled GEP, because
- // that would require a multiply inside the loop.
- if (!isa<ConstantInt>(StepV))
- GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
- GEPPtrTy->getAddressSpace());
- IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN);
- if (IncV->getType() != PN->getType())
- IncV = Builder.CreateBitCast(IncV, PN->getType());
+ IncV = expandAddToGEP(SE.getSCEV(StepV), IntTy, PN);
} else {
IncV = useSubtract ?
Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
@@ -1388,7 +1080,8 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
if (PostIncLoops.count(L)) {
PostIncLoopSet Loops;
Loops.insert(L);
- Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE));
+ Normalized = cast<SCEVAddRecExpr>(
+ normalizeForPostIncUse(S, Loops, SE, /*CheckInvertible=*/false));
}
// Strip off any non-loop-dominating component from the addrec start.
@@ -1515,12 +1208,12 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
// Re-apply any non-loop-dominating offset.
if (PostLoopOffset) {
- if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
+ if (isa<PointerType>(ExpandTy)) {
if (Result->getType()->isIntegerTy()) {
Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy);
- Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base);
+ Result = expandAddToGEP(SE.getUnknown(Result), IntTy, Base);
} else {
- Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result);
+ Result = expandAddToGEP(PostLoopOffset, IntTy, Result);
}
} else {
Result = InsertNoopCastOfTo(Result, IntTy);
@@ -1574,10 +1267,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
// {X,+,F} --> X + {0,+,F}
if (!S->getStart()->isZero()) {
- if (PointerType *PTy = dyn_cast<PointerType>(S->getType())) {
+ if (isa<PointerType>(S->getType())) {
Value *StartV = expand(SE.getPointerBase(S));
- assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
- return expandAddToGEP(SE.removePointerBase(S), PTy, Ty, StartV);
+ return expandAddToGEP(SE.removePointerBase(S), Ty, StartV);
}
SmallVector<const SCEV *, 4> NewOps(S->operands());
@@ -1744,6 +1436,10 @@ Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true);
}
+Value *SCEVExpander::visitVScale(const SCEVVScale *S) {
+ return Builder.CreateVScale(ConstantInt::get(S->getType(), 1));
+}
+
Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty,
Instruction *IP) {
setInsertPoint(IP);
@@ -1956,11 +1652,17 @@ SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
OrigPhiRef = Phi;
if (Phi->getType()->isIntegerTy() && TTI &&
TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
- // This phi can be freely truncated to the narrowest phi type. Map the
- // truncated expression to it so it will be reused for narrow types.
- const SCEV *TruncExpr =
- SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType());
- ExprToIVMap[TruncExpr] = Phi;
+ // Make sure we only rewrite using simple induction variables;
+ // otherwise, we can make the trip count of a loop unanalyzable
+ // to SCEV.
+ const SCEV *PhiExpr = SE.getSCEV(Phi);
+ if (isa<SCEVAddRecExpr>(PhiExpr)) {
+ // This phi can be freely truncated to the narrowest phi type. Map the
+ // truncated expression to it so it will be reused for narrow types.
+ const SCEV *TruncExpr =
+ SE.getTruncateExpr(PhiExpr, Phis.back()->getType());
+ ExprToIVMap[TruncExpr] = Phi;
+ }
}
continue;
}
@@ -2124,6 +1826,7 @@ template<typename T> static InstructionCost costAndCollectOperands(
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
case scUnknown:
case scConstant:
+ case scVScale:
return 0;
case scPtrToInt:
Cost = CastCost(Instruction::PtrToInt);
@@ -2260,6 +1963,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
case scCouldNotCompute:
llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
case scUnknown:
+ case scVScale:
// Assume to be zero-cost.
return false;
case scConstant: {
@@ -2551,7 +2255,11 @@ Value *SCEVExpander::fixupLCSSAFormFor(Value *V) {
SmallVector<Instruction *, 1> ToUpdate;
ToUpdate.push_back(DefI);
SmallVector<PHINode *, 16> PHIsToRemove;
- formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, Builder, &PHIsToRemove);
+ SmallVector<PHINode *, 16> InsertedPHIs;
+ formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, &PHIsToRemove,
+ &InsertedPHIs);
+ for (PHINode *PN : InsertedPHIs)
+ rememberInstruction(PN);
for (PHINode *PN : PHIsToRemove) {
if (!PN->use_empty())
continue;
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 9e0483966d3e..d3a9a41aef15 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -271,10 +271,8 @@ class SimplifyCFGOpt {
bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
IRBuilder<> &Builder);
- bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI,
- bool EqTermsOnly);
- bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
- const TargetTransformInfo &TTI);
+ bool HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly);
+ bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
BasicBlock *TrueBB, BasicBlock *FalseBB,
uint32_t TrueWeight, uint32_t FalseWeight);
@@ -1086,7 +1084,7 @@ static void GetBranchWeights(Instruction *TI,
static void FitWeights(MutableArrayRef<uint64_t> Weights) {
uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
if (Max > UINT_MAX) {
- unsigned Offset = 32 - countLeadingZeros(Max);
+ unsigned Offset = 32 - llvm::countl_zero(Max);
for (uint64_t &I : Weights)
I >>= Offset;
}
@@ -1117,16 +1115,12 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
VMap[&BonusInst] = NewBonusInst;
- // If we moved a load, we cannot any longer claim any knowledge about
- // its potential value. The previous information might have been valid
+ // If we speculated an instruction, we need to drop any metadata that may
+ // result in undefined behavior, as the metadata might have been valid
// only given the branch precondition.
- // For an analogous reason, we must also drop all the metadata whose
- // semantics we don't understand. We *can* preserve !annotation, because
- // it is tied to the instruction itself, not the value or position.
// Similarly strip attributes on call parameters that may cause UB in
// location the call is moved to.
- NewBonusInst->dropUndefImplyingAttrsAndUnknownMetadata(
- LLVMContext::MD_annotation);
+ NewBonusInst->dropUBImplyingAttrsAndMetadata();
NewBonusInst->insertInto(PredBlock, PTI->getIterator());
NewBonusInst->takeName(&BonusInst);
@@ -1462,7 +1456,7 @@ static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
// If we have seen an instruction with side effects, it's unsafe to reorder an
// instruction which reads memory or itself has side effects.
if ((Flags & SkipSideEffect) &&
- (I->mayReadFromMemory() || I->mayHaveSideEffects()))
+ (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
return false;
// Reordering across an instruction which does not necessarily transfer
@@ -1490,14 +1484,43 @@ static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
+/// Helper function for HoistThenElseCodeToIf. Return true if identical
+/// instructions \p I1 and \p I2 can and should be hoisted.
+static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
+ const TargetTransformInfo &TTI) {
+ // If we're going to hoist a call, make sure that the two instructions
+ // we're commoning/hoisting are both marked with musttail, or neither of
+ // them is marked as such. Otherwise, we might end up in a situation where
+ // we hoist from a block where the terminator is a `ret` to a block where
+ // the terminator is a `br`, and `musttail` calls expect to be followed by
+ // a return.
+ auto *C1 = dyn_cast<CallInst>(I1);
+ auto *C2 = dyn_cast<CallInst>(I2);
+ if (C1 && C2)
+ if (C1->isMustTailCall() != C2->isMustTailCall())
+ return false;
+
+ if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
+ return false;
+
+ // If any of the two call sites has nomerge or convergent attribute, stop
+ // hoisting.
+ if (const auto *CB1 = dyn_cast<CallBase>(I1))
+ if (CB1->cannotMerge() || CB1->isConvergent())
+ return false;
+ if (const auto *CB2 = dyn_cast<CallBase>(I2))
+ if (CB2->cannotMerge() || CB2->isConvergent())
+ return false;
+
+ return true;
+}
+
/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
/// in the two blocks up into the branch block. The caller of this function
/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given,
/// only perform hoisting in case both blocks only contain a terminator. In that
/// case, only the original BI will be replaced and selects for PHIs are added.
-bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
- const TargetTransformInfo &TTI,
- bool EqTermsOnly) {
+bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI, bool EqTermsOnly) {
// This does very trivial matching, with limited scanning, to find identical
// instructions in the two blocks. In particular, we don't want to get into
// O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
@@ -1572,37 +1595,13 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
goto HoistTerminator;
}
- if (I1->isIdenticalToWhenDefined(I2)) {
- // Even if the instructions are identical, it may not be safe to hoist
- // them if we have skipped over instructions with side effects or their
- // operands weren't hoisted.
- if (!isSafeToHoistInstr(I1, SkipFlagsBB1) ||
- !isSafeToHoistInstr(I2, SkipFlagsBB2))
- return Changed;
-
- // If we're going to hoist a call, make sure that the two instructions
- // we're commoning/hoisting are both marked with musttail, or neither of
- // them is marked as such. Otherwise, we might end up in a situation where
- // we hoist from a block where the terminator is a `ret` to a block where
- // the terminator is a `br`, and `musttail` calls expect to be followed by
- // a return.
- auto *C1 = dyn_cast<CallInst>(I1);
- auto *C2 = dyn_cast<CallInst>(I2);
- if (C1 && C2)
- if (C1->isMustTailCall() != C2->isMustTailCall())
- return Changed;
-
- if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
- return Changed;
-
- // If any of the two call sites has nomerge attribute, stop hoisting.
- if (const auto *CB1 = dyn_cast<CallBase>(I1))
- if (CB1->cannotMerge())
- return Changed;
- if (const auto *CB2 = dyn_cast<CallBase>(I2))
- if (CB2->cannotMerge())
- return Changed;
-
+ if (I1->isIdenticalToWhenDefined(I2) &&
+ // Even if the instructions are identical, it may not be safe to hoist
+ // them if we have skipped over instructions with side effects or their
+ // operands weren't hoisted.
+ isSafeToHoistInstr(I1, SkipFlagsBB1) &&
+ isSafeToHoistInstr(I2, SkipFlagsBB2) &&
+ shouldHoistCommonInstructions(I1, I2, TTI)) {
if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) {
assert(isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2));
// The debug location is an integral part of a debug info intrinsic
@@ -1618,19 +1617,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
if (!I2->use_empty())
I2->replaceAllUsesWith(I1);
I1->andIRFlags(I2);
- unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
- LLVMContext::MD_range,
- LLVMContext::MD_fpmath,
- LLVMContext::MD_invariant_load,
- LLVMContext::MD_nonnull,
- LLVMContext::MD_invariant_group,
- LLVMContext::MD_align,
- LLVMContext::MD_dereferenceable,
- LLVMContext::MD_dereferenceable_or_null,
- LLVMContext::MD_mem_parallel_loop_access,
- LLVMContext::MD_access_group,
- LLVMContext::MD_preserve_access_index};
- combineMetadata(I1, I2, KnownIDs, true);
+ combineMetadataForCSE(I1, I2, true);
// I1 and I2 are being combined into a single instruction. Its debug
// location is the merged locations of the original instructions.
@@ -1808,9 +1795,9 @@ static bool canSinkInstructions(
// Conservatively return false if I is an inline-asm instruction. Sinking
// and merging inline-asm instructions can potentially create arguments
// that cannot satisfy the inline-asm constraints.
- // If the instruction has nomerge attribute, return false.
+ // If the instruction has nomerge or convergent attribute, return false.
if (const auto *C = dyn_cast<CallBase>(I))
- if (C->isInlineAsm() || C->cannotMerge())
+ if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
return false;
// Each instruction must have zero or one use.
@@ -2455,9 +2442,13 @@ bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
// Can we theoretically form the data operands for the merged `invoke`?
auto IsIllegalToMergeArguments = [](auto Ops) {
- Type *Ty = std::get<0>(Ops)->getType();
- assert(Ty == std::get<1>(Ops)->getType() && "Incompatible types?");
- return Ty->isTokenTy() && std::get<0>(Ops) != std::get<1>(Ops);
+ Use &U0 = std::get<0>(Ops);
+ Use &U1 = std::get<1>(Ops);
+ if (U0 == U1)
+ return false;
+ return U0->getType()->isTokenTy() ||
+ !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
+ U0.getOperandNo());
};
assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
@@ -2571,7 +2562,7 @@ static void MergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
// And finally, replace the original `invoke`s with an unconditional branch
// to the block with the merged `invoke`. Also, give that merged `invoke`
// the merged debugloc of all the original `invoke`s.
- const DILocation *MergedDebugLoc = nullptr;
+ DILocation *MergedDebugLoc = nullptr;
for (InvokeInst *II : Invokes) {
// Compute the debug location common to all the original `invoke`s.
if (!MergedDebugLoc)
@@ -2849,8 +2840,11 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
/// \endcode
///
/// \returns true if the conditional block is removed.
-bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
- const TargetTransformInfo &TTI) {
+bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
+ BasicBlock *ThenBB) {
+ if (!Options.SpeculateBlocks)
+ return false;
+
// Be conservative for now. FP select instruction can often be expensive.
Value *BrCond = BI->getCondition();
if (isa<FCmpInst>(BrCond))
@@ -3021,7 +3015,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
}
// Metadata can be dependent on the condition we are hoisting above.
- // Conservatively strip all metadata on the instruction. Drop the debug loc
+ // Strip all UB-implying metadata on the instruction. Drop the debug loc
// to avoid making it appear as if the condition is a constant, which would
// be misleading while debugging.
// Similarly strip attributes that maybe dependent on condition we are
@@ -3032,7 +3026,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
if (!isa<DbgAssignIntrinsic>(&I))
I.setDebugLoc(DebugLoc());
}
- I.dropUndefImplyingAttrsAndUnknownMetadata();
+ I.dropUBImplyingAttrsAndMetadata();
// Drop ephemeral values.
if (EphTracker.contains(&I)) {
@@ -3220,6 +3214,9 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
}
// Clone the instruction.
Instruction *N = BBI->clone();
+ // Insert the new instruction into its new home.
+ N->insertInto(EdgeBB, InsertPt);
+
if (BBI->hasName())
N->setName(BBI->getName() + ".c");
@@ -3235,7 +3232,8 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
if (!BBI->use_empty())
TranslateMap[&*BBI] = V;
if (!N->mayHaveSideEffects()) {
- N->deleteValue(); // Instruction folded away, don't need actual inst
+ N->eraseFromParent(); // Instruction folded away, don't need actual
+ // inst
N = nullptr;
}
} else {
@@ -3243,9 +3241,6 @@ FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
TranslateMap[&*BBI] = N;
}
if (N) {
- // Insert the new instruction into its new home.
- N->insertInto(EdgeBB, InsertPt);
-
// Register the new instruction with the assumption cache if necessary.
if (auto *Assume = dyn_cast<AssumeInst>(N))
if (AC)
@@ -3591,17 +3586,7 @@ static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
// If we need to invert the condition in the pred block to match, do so now.
if (InvertPredCond) {
- Value *NewCond = PBI->getCondition();
- if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
- CmpInst *CI = cast<CmpInst>(NewCond);
- CI->setPredicate(CI->getInversePredicate());
- } else {
- NewCond =
- Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
- }
-
- PBI->setCondition(NewCond);
- PBI->swapSuccessors();
+ InvertBranch(PBI, Builder);
}
BasicBlock *UniqueSucc =
@@ -3887,7 +3872,7 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
for (BasicBlock *PredBB : predecessors(Succ))
if (PredBB != BB)
PHI->addIncoming(
- AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB);
+ AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
return PHI;
}
@@ -5150,14 +5135,18 @@ bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
Value* Cond = BI->getCondition();
assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
"The destinations are guaranteed to be different here.");
+ CallInst *Assumption;
if (BI->getSuccessor(0) == BB) {
- Builder.CreateAssumption(Builder.CreateNot(Cond));
+ Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
Builder.CreateBr(BI->getSuccessor(1));
} else {
assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
- Builder.CreateAssumption(Cond);
+ Assumption = Builder.CreateAssumption(Cond);
Builder.CreateBr(BI->getSuccessor(0));
}
+ if (Options.AC)
+ Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
+
EraseTerminatorAndDCECond(BI);
Changed = true;
}
@@ -5453,7 +5442,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
}
const APInt &CaseVal = Case.getCaseValue()->getValue();
if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
- (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
+ (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
DeadCases.push_back(Case.getCaseValue());
if (DTU)
--NumPerSuccessorCases[Successor];
@@ -5469,7 +5458,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
bool HasDefault =
!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
const unsigned NumUnknownBits =
- Known.getBitWidth() - (Known.Zero | Known.One).countPopulation();
+ Known.getBitWidth() - (Known.Zero | Known.One).popcount();
assert(NumUnknownBits <= Known.getBitWidth());
if (HasDefault && DeadCases.empty() &&
NumUnknownBits < 64 /* avoid overflow */ &&
@@ -5860,7 +5849,7 @@ static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
// Check if cases with the same result can cover all number
// in touched bits.
- if (BitMask.countPopulation() == Log2_32(CaseCount)) {
+ if (BitMask.popcount() == Log2_32(CaseCount)) {
if (!MinCaseVal->isNullValue())
Condition = Builder.CreateSub(Condition, MinCaseVal);
Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
@@ -6001,6 +5990,7 @@ private:
// For LinearMapKind, these are the constants used to derive the value.
ConstantInt *LinearOffset = nullptr;
ConstantInt *LinearMultiplier = nullptr;
+ bool LinearMapValWrapped = false;
// For ArrayKind, this is the array.
GlobalVariable *Array = nullptr;
@@ -6061,6 +6051,8 @@ SwitchLookupTable::SwitchLookupTable(
bool LinearMappingPossible = true;
APInt PrevVal;
APInt DistToPrev;
+ // When linear map is monotonic, we can attach nsw.
+ bool Wrapped = false;
assert(TableSize >= 2 && "Should be a SingleValue table.");
// Check if there is the same distance between two consecutive values.
for (uint64_t I = 0; I < TableSize; ++I) {
@@ -6080,12 +6072,15 @@ SwitchLookupTable::SwitchLookupTable(
LinearMappingPossible = false;
break;
}
+ Wrapped |=
+ Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
}
PrevVal = Val;
}
if (LinearMappingPossible) {
LinearOffset = cast<ConstantInt>(TableContents[0]);
LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
+ LinearMapValWrapped = Wrapped;
Kind = LinearMapKind;
++NumLinearMaps;
return;
@@ -6134,9 +6129,14 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
false, "switch.idx.cast");
if (!LinearMultiplier->isOne())
- Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult");
+ Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
+ /*HasNUW = */ false,
+ /*HasNSW = */ !LinearMapValWrapped);
+
if (!LinearOffset->isZero())
- Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset");
+ Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
+ /*HasNUW = */ false,
+ /*HasNSW = */ !LinearMapValWrapped);
return Result;
}
case BitMapKind: {
@@ -6148,10 +6148,12 @@ Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
// truncating it to the width of the bitmask is safe.
Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
- // Multiply the shift amount by the element width.
+ // Multiply the shift amount by the element width. NUW/NSW can always be
+ // set, because WouldFitInRegister guarantees Index * ShiftAmt is in
+ // BitMap's bit width.
ShiftAmt = Builder.CreateMul(
ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
- "switch.shiftamt");
+ "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
// Shift down.
Value *DownShifted =
@@ -6490,6 +6492,21 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
std::vector<DominatorTree::UpdateType> Updates;
+ // Compute the maximum table size representable by the integer type we are
+ // switching upon.
+ unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
+ uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
+ assert(MaxTableSize >= TableSize &&
+ "It is impossible for a switch to have more entries than the max "
+ "representable value of its input integer type's size.");
+
+ // If the default destination is unreachable, or if the lookup table covers
+ // all values of the conditional variable, branch directly to the lookup table
+ // BB. Otherwise, check that the condition is within the case range.
+ const bool DefaultIsReachable =
+ !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
+ const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
+
// Create the BB that does the lookups.
Module &Mod = *CommonDest->getParent()->getParent();
BasicBlock *LookupBB = BasicBlock::Create(
@@ -6504,24 +6521,19 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
TableIndex = SI->getCondition();
} else {
TableIndexOffset = MinCaseVal;
- TableIndex =
- Builder.CreateSub(SI->getCondition(), TableIndexOffset, "switch.tableidx");
- }
+ // If the default is unreachable, all case values are s>= MinCaseVal. Then
+ // we can try to attach nsw.
+ bool MayWrap = true;
+ if (!DefaultIsReachable) {
+ APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
+ (void)Res;
+ }
- // Compute the maximum table size representable by the integer type we are
- // switching upon.
- unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
- uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
- assert(MaxTableSize >= TableSize &&
- "It is impossible for a switch to have more entries than the max "
- "representable value of its input integer type's size.");
+ TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
+ "switch.tableidx", /*HasNUW =*/false,
+ /*HasNSW =*/!MayWrap);
+ }
- // If the default destination is unreachable, or if the lookup table covers
- // all values of the conditional variable, branch directly to the lookup table
- // BB. Otherwise, check that the condition is within the case range.
- const bool DefaultIsReachable =
- !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
- const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
BranchInst *RangeCheckBranch = nullptr;
if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
@@ -6694,7 +6706,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
// less than 64.
unsigned Shift = 64;
for (auto &V : Values)
- Shift = std::min(Shift, countTrailingZeros((uint64_t)V));
+ Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
assert(Shift < 64);
if (Shift > 0)
for (auto &V : Values)
@@ -6990,7 +7002,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
"Tautological conditional branch should have been eliminated already.");
BasicBlock *BB = BI->getParent();
- if (!Options.SimplifyCondBranch)
+ if (!Options.SimplifyCondBranch ||
+ BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
return false;
// Conditional branch
@@ -7045,8 +7058,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// can hoist it up to the branching block.
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
- if (HoistCommon &&
- HoistThenElseCodeToIf(BI, TTI, !Options.HoistCommonInsts))
+ if (HoistCommon && HoistThenElseCodeToIf(BI, !Options.HoistCommonInsts))
return requestResimplify();
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
@@ -7054,7 +7066,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
if (Succ0TI->getNumSuccessors() == 1 &&
Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
- if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
return requestResimplify();
}
} else if (BI->getSuccessor(1)->getSinglePredecessor()) {
@@ -7063,7 +7075,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
if (Succ1TI->getNumSuccessors() == 1 &&
Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
- if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
+ if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
return requestResimplify();
}
@@ -7179,7 +7191,8 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValu
/// If BB has an incoming value that will always trigger undefined behavior
/// (eg. null pointer dereference), remove the branch leading here.
static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
- DomTreeUpdater *DTU) {
+ DomTreeUpdater *DTU,
+ AssumptionCache *AC) {
for (PHINode &PHI : BB->phis())
for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
@@ -7196,10 +7209,13 @@ static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
// Preserve guarding condition in assume, because it might not be
// inferrable from any dominating condition.
Value *Cond = BI->getCondition();
+ CallInst *Assumption;
if (BI->getSuccessor(0) == BB)
- Builder.CreateAssumption(Builder.CreateNot(Cond));
+ Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
else
- Builder.CreateAssumption(Cond);
+ Assumption = Builder.CreateAssumption(Cond);
+ if (AC)
+ AC->registerAssumption(cast<AssumeInst>(Assumption));
Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
: BI->getSuccessor(0));
}
@@ -7260,7 +7276,7 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
Changed |= EliminateDuplicatePHINodes(BB);
// Check for and remove branches that will always cause undefined behavior.
- if (removeUndefIntroducingPredecessor(BB, DTU))
+ if (removeUndefIntroducingPredecessor(BB, DTU, Options.AC))
return requestResimplify();
// Merge basic blocks into their predecessor if there is only one distinct
@@ -7282,7 +7298,8 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
IRBuilder<> Builder(BB);
- if (Options.FoldTwoEntryPHINode) {
+ if (Options.SpeculateBlocks &&
+ !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
// If there is a trivial two-entry PHI node in this basic block, and we can
// eliminate it, do so now.
if (auto *PN = dyn_cast<PHINode>(BB->begin()))
diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
index 4e83d2f6e3c6..a28916bc9baf 100644
--- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp
@@ -93,6 +93,7 @@ namespace {
void replaceRemWithNumeratorOrZero(BinaryOperator *Rem);
void replaceSRemWithURem(BinaryOperator *Rem);
bool eliminateSDiv(BinaryOperator *SDiv);
+ bool strengthenBinaryOp(BinaryOperator *BO, Instruction *IVOperand);
bool strengthenOverflowingOperation(BinaryOperator *OBO,
Instruction *IVOperand);
bool strengthenRightShift(BinaryOperator *BO, Instruction *IVOperand);
@@ -216,8 +217,10 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,
// Do not generate something ridiculous.
auto *PHTerm = Preheader->getTerminator();
- if (Rewriter.isHighCostExpansion({ InvariantLHS, InvariantRHS }, L,
- 2 * SCEVCheapExpansionBudget, TTI, PHTerm))
+ if (Rewriter.isHighCostExpansion({InvariantLHS, InvariantRHS}, L,
+ 2 * SCEVCheapExpansionBudget, TTI, PHTerm) ||
+ !Rewriter.isSafeToExpandAt(InvariantLHS, PHTerm) ||
+ !Rewriter.isSafeToExpandAt(InvariantRHS, PHTerm))
return false;
auto *NewLHS =
Rewriter.expandCodeFor(InvariantLHS, IVOperand->getType(), PHTerm);
@@ -747,6 +750,13 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,
return true;
}
+bool SimplifyIndvar::strengthenBinaryOp(BinaryOperator *BO,
+ Instruction *IVOperand) {
+ return (isa<OverflowingBinaryOperator>(BO) &&
+ strengthenOverflowingOperation(BO, IVOperand)) ||
+ (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand));
+}
+
/// Annotate BO with nsw / nuw if it provably does not signed-overflow /
/// unsigned-overflow. Returns true if anything changed, false otherwise.
bool SimplifyIndvar::strengthenOverflowingOperation(BinaryOperator *BO,
@@ -898,6 +908,14 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
if (replaceIVUserWithLoopInvariant(UseInst))
continue;
+ // Go further for the bitcast ''prtoint ptr to i64'
+ if (isa<PtrToIntInst>(UseInst))
+ for (Use &U : UseInst->uses()) {
+ Instruction *User = cast<Instruction>(U.getUser());
+ if (replaceIVUserWithLoopInvariant(User))
+ break; // done replacing
+ }
+
Instruction *IVOperand = UseOper.second;
for (unsigned N = 0; IVOperand; ++N) {
assert(N <= Simplified.size() && "runaway iteration");
@@ -917,9 +935,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {
}
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) {
- if ((isa<OverflowingBinaryOperator>(BO) &&
- strengthenOverflowingOperation(BO, IVOperand)) ||
- (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) {
+ if (strengthenBinaryOp(BO, IVOperand)) {
// re-queue uses of the now modified binary operator and fall
// through to the checks that remain.
pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 20f18322d43c..5b0951252c07 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -14,11 +14,12 @@
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
#include "llvm/ADT/APSInt.h"
#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/AttributeMask.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
@@ -29,6 +30,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SizeOpts.h"
@@ -44,6 +46,45 @@ static cl::opt<bool>
cl::desc("Enable unsafe double to float "
"shrinking for math lib calls"));
+// Enable conversion of operator new calls with a MemProf hot or cold hint
+// to an operator new call that takes a hot/cold hint. Off by default since
+// not all allocators currently support this extension.
+static cl::opt<bool>
+ OptimizeHotColdNew("optimize-hot-cold-new", cl::Hidden, cl::init(false),
+ cl::desc("Enable hot/cold operator new library calls"));
+
+namespace {
+
+// Specialized parser to ensure the hint is an 8 bit value (we can't specify
+// uint8_t to opt<> as that is interpreted to mean that we are passing a char
+// option with a specific set of values.
+struct HotColdHintParser : public cl::parser<unsigned> {
+ HotColdHintParser(cl::Option &O) : cl::parser<unsigned>(O) {}
+
+ bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, unsigned &Value) {
+ if (Arg.getAsInteger(0, Value))
+ return O.error("'" + Arg + "' value invalid for uint argument!");
+
+ if (Value > 255)
+ return O.error("'" + Arg + "' value must be in the range [0, 255]!");
+
+ return false;
+ }
+};
+
+} // end anonymous namespace
+
+// Hot/cold operator new takes an 8 bit hotness hint, where 0 is the coldest
+// and 255 is the hottest. Default to 1 value away from the coldest and hottest
+// hints, so that the compiler hinted allocations are slightly less strong than
+// manually inserted hints at the two extremes.
+static cl::opt<unsigned, false, HotColdHintParser> ColdNewHintValue(
+ "cold-new-hint-value", cl::Hidden, cl::init(1),
+ cl::desc("Value to pass to hot/cold operator new for cold allocation"));
+static cl::opt<unsigned, false, HotColdHintParser> HotNewHintValue(
+ "hot-new-hint-value", cl::Hidden, cl::init(254),
+ cl::desc("Value to pass to hot/cold operator new for hot allocation"));
+
//===----------------------------------------------------------------------===//
// Helper Functions
//===----------------------------------------------------------------------===//
@@ -186,21 +227,9 @@ static Value *convertStrToInt(CallInst *CI, StringRef &Str, Value *EndPtr,
return ConstantInt::get(RetTy, Result);
}
-static bool isOnlyUsedInComparisonWithZero(Value *V) {
- for (User *U : V->users()) {
- if (ICmpInst *IC = dyn_cast<ICmpInst>(U))
- if (Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
- if (C->isNullValue())
- continue;
- // Unknown instruction.
- return false;
- }
- return true;
-}
-
static bool canTransformToMemCmp(CallInst *CI, Value *Str, uint64_t Len,
const DataLayout &DL) {
- if (!isOnlyUsedInComparisonWithZero(CI))
+ if (!isOnlyUsedInZeroComparison(CI))
return false;
if (!isDereferenceableAndAlignedPointer(Str, Align(1), APInt(64, Len), DL))
@@ -1358,6 +1387,10 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
return nullptr;
}
+ bool OptForSize = CI->getFunction()->hasOptSize() ||
+ llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI,
+ PGSOQueryType::IRPass);
+
// If the char is variable but the input str and length are not we can turn
// this memchr call into a simple bit field test. Of course this only works
// when the return value is only checked against null.
@@ -1368,7 +1401,7 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
// memchr("\r\n", C, 2) != nullptr -> (1 << C & ((1 << '\r') | (1 << '\n')))
// != 0
// after bounds check.
- if (Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI))
+ if (OptForSize || Str.empty() || !isOnlyUsedInZeroEqualityComparison(CI))
return nullptr;
unsigned char Max =
@@ -1380,8 +1413,34 @@ Value *LibCallSimplifier::optimizeMemChr(CallInst *CI, IRBuilderBase &B) {
// FIXME: On a 64 bit architecture this prevents us from using the
// interesting range of alpha ascii chars. We could do better by emitting
// two bitfields or shifting the range by 64 if no lower chars are used.
- if (!DL.fitsInLegalInteger(Max + 1))
- return nullptr;
+ if (!DL.fitsInLegalInteger(Max + 1)) {
+ // Build chain of ORs
+ // Transform:
+ // memchr("abcd", C, 4) != nullptr
+ // to:
+ // (C == 'a' || C == 'b' || C == 'c' || C == 'd') != 0
+ std::string SortedStr = Str.str();
+ llvm::sort(SortedStr);
+ // Compute the number of of non-contiguous ranges.
+ unsigned NonContRanges = 1;
+ for (size_t i = 1; i < SortedStr.size(); ++i) {
+ if (SortedStr[i] > SortedStr[i - 1] + 1) {
+ NonContRanges++;
+ }
+ }
+
+ // Restrict this optimization to profitable cases with one or two range
+ // checks.
+ if (NonContRanges > 2)
+ return nullptr;
+
+ SmallVector<Value *> CharCompares;
+ for (unsigned char C : SortedStr)
+ CharCompares.push_back(
+ B.CreateICmpEQ(CharVal, ConstantInt::get(CharVal->getType(), C)));
+
+ return B.CreateIntToPtr(B.CreateOr(CharCompares), CI->getType());
+ }
// For the bit field use a power-of-2 type with at least 8 bits to avoid
// creating unnecessary illegal types.
@@ -1481,30 +1540,21 @@ static Value *optimizeMemCmpConstantSize(CallInst *CI, Value *LHS, Value *RHS,
// First, see if we can fold either argument to a constant.
Value *LHSV = nullptr;
- if (auto *LHSC = dyn_cast<Constant>(LHS)) {
- LHSC = ConstantExpr::getBitCast(LHSC, IntType->getPointerTo());
+ if (auto *LHSC = dyn_cast<Constant>(LHS))
LHSV = ConstantFoldLoadFromConstPtr(LHSC, IntType, DL);
- }
+
Value *RHSV = nullptr;
- if (auto *RHSC = dyn_cast<Constant>(RHS)) {
- RHSC = ConstantExpr::getBitCast(RHSC, IntType->getPointerTo());
+ if (auto *RHSC = dyn_cast<Constant>(RHS))
RHSV = ConstantFoldLoadFromConstPtr(RHSC, IntType, DL);
- }
// Don't generate unaligned loads. If either source is constant data,
// alignment doesn't matter for that source because there is no load.
if ((LHSV || getKnownAlignment(LHS, DL, CI) >= PrefAlignment) &&
(RHSV || getKnownAlignment(RHS, DL, CI) >= PrefAlignment)) {
- if (!LHSV) {
- Type *LHSPtrTy =
- IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
- LHSV = B.CreateLoad(IntType, B.CreateBitCast(LHS, LHSPtrTy), "lhsv");
- }
- if (!RHSV) {
- Type *RHSPtrTy =
- IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
- RHSV = B.CreateLoad(IntType, B.CreateBitCast(RHS, RHSPtrTy), "rhsv");
- }
+ if (!LHSV)
+ LHSV = B.CreateLoad(IntType, LHS, "lhsv");
+ if (!RHSV)
+ RHSV = B.CreateLoad(IntType, RHS, "rhsv");
return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
}
}
@@ -1653,6 +1703,59 @@ Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilderBase &B) {
return nullptr;
}
+// When enabled, replace operator new() calls marked with a hot or cold memprof
+// attribute with an operator new() call that takes a __hot_cold_t parameter.
+// Currently this is supported by the open source version of tcmalloc, see:
+// https://github.com/google/tcmalloc/blob/master/tcmalloc/new_extension.h
+Value *LibCallSimplifier::optimizeNew(CallInst *CI, IRBuilderBase &B,
+ LibFunc &Func) {
+ if (!OptimizeHotColdNew)
+ return nullptr;
+
+ uint8_t HotCold;
+ if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "cold")
+ HotCold = ColdNewHintValue;
+ else if (CI->getAttributes().getFnAttr("memprof").getValueAsString() == "hot")
+ HotCold = HotNewHintValue;
+ else
+ return nullptr;
+
+ switch (Func) {
+ case LibFunc_Znwm:
+ return emitHotColdNew(CI->getArgOperand(0), B, TLI,
+ LibFunc_Znwm12__hot_cold_t, HotCold);
+ case LibFunc_Znam:
+ return emitHotColdNew(CI->getArgOperand(0), B, TLI,
+ LibFunc_Znam12__hot_cold_t, HotCold);
+ case LibFunc_ZnwmRKSt9nothrow_t:
+ return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B,
+ TLI, LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t,
+ HotCold);
+ case LibFunc_ZnamRKSt9nothrow_t:
+ return emitHotColdNewNoThrow(CI->getArgOperand(0), CI->getArgOperand(1), B,
+ TLI, LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t,
+ HotCold);
+ case LibFunc_ZnwmSt11align_val_t:
+ return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B,
+ TLI, LibFunc_ZnwmSt11align_val_t12__hot_cold_t,
+ HotCold);
+ case LibFunc_ZnamSt11align_val_t:
+ return emitHotColdNewAligned(CI->getArgOperand(0), CI->getArgOperand(1), B,
+ TLI, LibFunc_ZnamSt11align_val_t12__hot_cold_t,
+ HotCold);
+ case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
+ return emitHotColdNewAlignedNoThrow(
+ CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
+ TLI, LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold);
+ case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
+ return emitHotColdNewAlignedNoThrow(
+ CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), B,
+ TLI, LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, HotCold);
+ default:
+ return nullptr;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Math Library Optimizations
//===----------------------------------------------------------------------===//
@@ -1939,7 +2042,8 @@ Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
AttributeList NoAttrs; // Attributes are only meaningful on the original call
// pow(2.0, itofp(x)) -> ldexp(1.0, x)
- if (match(Base, m_SpecificFP(2.0)) &&
+ // TODO: This does not work for vectors because there is no ldexp intrinsic.
+ if (!Ty->isVectorTy() && match(Base, m_SpecificFP(2.0)) &&
(isa<SIToFPInst>(Expo) || isa<UIToFPInst>(Expo)) &&
hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
if (Value *ExpoI = getIntToFPVal(Expo, B, TLI->getIntSize()))
@@ -2056,7 +2160,7 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
// pow(-Inf, 0.5) is optionally required to have a result of +Inf (not setting
// errno), but sqrt(-Inf) is required by various standards to set errno.
if (!Pow->doesNotAccessMemory() && !Pow->hasNoInfs() &&
- !isKnownNeverInfinity(Base, TLI))
+ !isKnownNeverInfinity(Base, DL, TLI, 0, AC, Pow))
return nullptr;
Sqrt = getSqrtCall(Base, AttributeList(), Pow->doesNotAccessMemory(), Mod, B,
@@ -2217,17 +2321,25 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
hasFloatVersion(M, Name))
Ret = optimizeUnaryDoubleFP(CI, B, TLI, true);
+ // Bail out for vectors because the code below only expects scalars.
+ // TODO: This could be allowed if we had a ldexp intrinsic (D14327).
Type *Ty = CI->getType();
- Value *Op = CI->getArgOperand(0);
+ if (Ty->isVectorTy())
+ return Ret;
// exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= IntSize
// exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < IntSize
+ Value *Op = CI->getArgOperand(0);
if ((isa<SIToFPInst>(Op) || isa<UIToFPInst>(Op)) &&
hasFloatFn(M, TLI, Ty, LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl)) {
- if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize()))
- return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI,
- LibFunc_ldexp, LibFunc_ldexpf,
- LibFunc_ldexpl, B, AttributeList());
+ if (Value *Exp = getIntToFPVal(Op, B, TLI->getIntSize())) {
+ IRBuilderBase::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
+ return copyFlags(
+ *CI, emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI,
+ LibFunc_ldexp, LibFunc_ldexpf,
+ LibFunc_ldexpl, B, AttributeList()));
+ }
}
return Ret;
@@ -2579,7 +2691,7 @@ static bool insertSinCosCall(IRBuilderBase &B, Function *OrigCallee, Value *Arg,
return true;
}
-Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) {
+Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, bool IsSin, IRBuilderBase &B) {
// Make sure the prototype is as expected, otherwise the rest of the
// function is probably invalid and likely to abort.
if (!isTrigLibCall(CI))
@@ -2618,7 +2730,7 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilderBase &B) {
replaceTrigInsts(CosCalls, Cos);
replaceTrigInsts(SinCosCalls, SinCos);
- return nullptr;
+ return IsSin ? Sin : Cos;
}
void LibCallSimplifier::classifyArgUse(
@@ -3439,6 +3551,15 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
return optimizeWcslen(CI, Builder);
case LibFunc_bcopy:
return optimizeBCopy(CI, Builder);
+ case LibFunc_Znwm:
+ case LibFunc_ZnwmRKSt9nothrow_t:
+ case LibFunc_ZnwmSt11align_val_t:
+ case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:
+ case LibFunc_Znam:
+ case LibFunc_ZnamRKSt9nothrow_t:
+ case LibFunc_ZnamSt11align_val_t:
+ case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:
+ return optimizeNew(CI, Builder, Func);
default:
break;
}
@@ -3461,9 +3582,10 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI,
switch (Func) {
case LibFunc_sinpif:
case LibFunc_sinpi:
+ return optimizeSinCosPi(CI, /*IsSin*/true, Builder);
case LibFunc_cospif:
case LibFunc_cospi:
- return optimizeSinCosPi(CI, Builder);
+ return optimizeSinCosPi(CI, /*IsSin*/false, Builder);
case LibFunc_powf:
case LibFunc_pow:
case LibFunc_powl:
@@ -3696,13 +3818,13 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI, IRBuilderBase &Builder) {
}
LibCallSimplifier::LibCallSimplifier(
- const DataLayout &DL, const TargetLibraryInfo *TLI,
- OptimizationRemarkEmitter &ORE,
- BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ const DataLayout &DL, const TargetLibraryInfo *TLI, AssumptionCache *AC,
+ OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
+ ProfileSummaryInfo *PSI,
function_ref<void(Instruction *, Value *)> Replacer,
function_ref<void(Instruction *)> Eraser)
- : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), ORE(ORE), BFI(BFI), PSI(PSI),
- Replacer(Replacer), Eraser(Eraser) {}
+ : FortifiedSimplifier(TLI), DL(DL), TLI(TLI), AC(AC), ORE(ORE), BFI(BFI),
+ PSI(PSI), Replacer(Replacer), Eraser(Eraser) {}
void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
// Indirect through the replacer used in this instance.
diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp
index 1242380f73c1..1ca2e0e6ebb9 100644
--- a/llvm/lib/Transforms/Utils/SizeOpts.cpp
+++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp
@@ -98,14 +98,12 @@ struct BasicBlockBFIAdapter {
bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI,
PGSOQueryType QueryType) {
- return shouldFuncOptimizeForSizeImpl<BasicBlockBFIAdapter>(F, PSI, BFI,
- QueryType);
+ return shouldFuncOptimizeForSizeImpl(F, PSI, BFI, QueryType);
}
bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI,
PGSOQueryType QueryType) {
assert(BB);
- return shouldOptimizeForSizeImpl<BasicBlockBFIAdapter>(BB, PSI, BFI,
- QueryType);
+ return shouldOptimizeForSizeImpl(BB, PSI, BFI, QueryType);
}
diff --git a/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
index 10fda4df51ba..618c6bab3a8f 100644
--- a/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
+++ b/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
@@ -8,44 +8,13 @@
#include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h"
#include "llvm/IR/DebugInfo.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
-#include "llvm/Transforms/Utils.h"
-using namespace llvm;
-
-namespace {
-
-/// This pass strips all debug info that is not related line tables.
-/// The result will be the same as if the program where compiled with
-/// -gline-tables-only.
-struct StripNonLineTableDebugLegacyPass : public ModulePass {
- static char ID; // Pass identification, replacement for typeid
- StripNonLineTableDebugLegacyPass() : ModulePass(ID) {
- initializeStripNonLineTableDebugLegacyPassPass(
- *PassRegistry::getPassRegistry());
- }
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-
- bool runOnModule(Module &M) override {
- return llvm::stripNonLineTableDebugInfo(M);
- }
-};
-}
-
-char StripNonLineTableDebugLegacyPass::ID = 0;
-INITIALIZE_PASS(StripNonLineTableDebugLegacyPass,
- "strip-nonlinetable-debuginfo",
- "Strip all debug info except linetables", false, false)
-
-ModulePass *llvm::createStripNonLineTableDebugLegacyPass() {
- return new StripNonLineTableDebugLegacyPass();
-}
+using namespace llvm;
PreservedAnalyses
StripNonLineTableDebugInfoPass::run(Module &M, ModuleAnalysisManager &AM) {
llvm::stripNonLineTableDebugInfo(M);
- return PreservedAnalyses::all();
+ PreservedAnalyses PA;
+ PA.preserveSet<CFGAnalyses>();
+ return PA;
}
diff --git a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index 4ad16d622e8d..c3ae43e567b0 100644
--- a/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -517,37 +517,6 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
return true;
}
-namespace {
-
-class RewriteSymbolsLegacyPass : public ModulePass {
-public:
- static char ID; // Pass identification, replacement for typeid
-
- RewriteSymbolsLegacyPass();
- RewriteSymbolsLegacyPass(SymbolRewriter::RewriteDescriptorList &DL);
-
- bool runOnModule(Module &M) override;
-
-private:
- RewriteSymbolPass Impl;
-};
-
-} // end anonymous namespace
-
-char RewriteSymbolsLegacyPass::ID = 0;
-
-RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID) {
- initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());
-}
-
-RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass(
- SymbolRewriter::RewriteDescriptorList &DL)
- : ModulePass(ID), Impl(DL) {}
-
-bool RewriteSymbolsLegacyPass::runOnModule(Module &M) {
- return Impl.runImpl(M);
-}
-
PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) {
if (!runImpl(M))
return PreservedAnalyses::all();
@@ -572,15 +541,3 @@ void RewriteSymbolPass::loadAndParseMapFiles() {
for (const auto &MapFile : MapFiles)
Parser.parse(MapFile, &Descriptors);
}
-
-INITIALIZE_PASS(RewriteSymbolsLegacyPass, "rewrite-symbols", "Rewrite Symbols",
- false, false)
-
-ModulePass *llvm::createRewriteSymbolsPass() {
- return new RewriteSymbolsLegacyPass();
-}
-
-ModulePass *
-llvm::createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &DL) {
- return new RewriteSymbolsLegacyPass(DL);
-}
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 3be96ebc93a2..8c781f59ff5a 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -113,7 +113,7 @@ static void restoreSSA(const DominatorTree &DT, const Loop *L,
}
}
- for (auto II : ExternalUsers) {
+ for (const auto &II : ExternalUsers) {
// For each Def used outside the loop, create NewPhi in
// LoopExitBlock. NewPhi receives Def only along exiting blocks that
// dominate it, while the remaining values are undefined since those paths
@@ -130,7 +130,7 @@ static void restoreSSA(const DominatorTree &DT, const Loop *L,
NewPhi->addIncoming(Def, In);
} else {
LLVM_DEBUG(dbgs() << "not dominated\n");
- NewPhi->addIncoming(UndefValue::get(Def->getType()), In);
+ NewPhi->addIncoming(PoisonValue::get(Def->getType()), In);
}
}
diff --git a/llvm/lib/Transforms/Utils/Utils.cpp b/llvm/lib/Transforms/Utils/Utils.cpp
index d002922cfd30..91c743f17764 100644
--- a/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/llvm/lib/Transforms/Utils/Utils.cpp
@@ -12,9 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils.h"
-#include "llvm-c/Initialization.h"
-#include "llvm-c/Transforms/Utils.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
@@ -24,42 +21,18 @@ using namespace llvm;
/// initializeTransformUtils - Initialize all passes in the TransformUtils
/// library.
void llvm::initializeTransformUtils(PassRegistry &Registry) {
- initializeAddDiscriminatorsLegacyPassPass(Registry);
- initializeAssumeSimplifyPassLegacyPassPass(Registry);
initializeAssumeBuilderPassLegacyPassPass(Registry);
initializeBreakCriticalEdgesPass(Registry);
initializeCanonicalizeFreezeInLoopsPass(Registry);
- initializeInstNamerPass(Registry);
initializeLCSSAWrapperPassPass(Registry);
- initializeLibCallsShrinkWrapLegacyPassPass(Registry);
initializeLoopSimplifyPass(Registry);
initializeLowerGlobalDtorsLegacyPassPass(Registry);
initializeLowerInvokeLegacyPassPass(Registry);
initializeLowerSwitchLegacyPassPass(Registry);
initializePromoteLegacyPassPass(Registry);
- initializeStripNonLineTableDebugLegacyPassPass(Registry);
initializeUnifyFunctionExitNodesLegacyPassPass(Registry);
- initializeMetaRenamerPass(Registry);
initializeStripGCRelocatesLegacyPass(Registry);
initializePredicateInfoPrinterLegacyPassPass(Registry);
- initializeInjectTLIMappingsLegacyPass(Registry);
initializeFixIrreduciblePass(Registry);
initializeUnifyLoopExitsLegacyPassPass(Registry);
}
-
-/// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
-void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {
- initializeTransformUtils(*unwrap(R));
-}
-
-void LLVMAddLowerSwitchPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLowerSwitchPass());
-}
-
-void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createPromoteMemoryToRegisterPass());
-}
-
-void LLVMAddAddDiscriminatorsPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createAddDiscriminatorsPass());
-}
diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
index f295a7e312b6..7a597da2bc51 100644
--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -226,91 +226,6 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
DL);
}
-/// Looks at a memory location for a load (specified by MemLocBase, Offs, and
-/// Size) and compares it against a load.
-///
-/// If the specified load could be safely widened to a larger integer load
-/// that is 1) still efficient, 2) safe for the target, and 3) would provide
-/// the specified memory location value, then this function returns the size
-/// in bytes of the load width to use. If not, this returns zero.
-static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase,
- int64_t MemLocOffs,
- unsigned MemLocSize,
- const LoadInst *LI) {
- // We can only extend simple integer loads.
- if (!isa<IntegerType>(LI->getType()) || !LI->isSimple())
- return 0;
-
- // Load widening is hostile to ThreadSanitizer: it may cause false positives
- // or make the reports more cryptic (access sizes are wrong).
- if (LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread))
- return 0;
-
- const DataLayout &DL = LI->getModule()->getDataLayout();
-
- // Get the base of this load.
- int64_t LIOffs = 0;
- const Value *LIBase =
- GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, DL);
-
- // If the two pointers are not based on the same pointer, we can't tell that
- // they are related.
- if (LIBase != MemLocBase)
- return 0;
-
- // Okay, the two values are based on the same pointer, but returned as
- // no-alias. This happens when we have things like two byte loads at "P+1"
- // and "P+3". Check to see if increasing the size of the "LI" load up to its
- // alignment (or the largest native integer type) will allow us to load all
- // the bits required by MemLoc.
-
- // If MemLoc is before LI, then no widening of LI will help us out.
- if (MemLocOffs < LIOffs)
- return 0;
-
- // Get the alignment of the load in bytes. We assume that it is safe to load
- // any legal integer up to this size without a problem. For example, if we're
- // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can
- // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it
- // to i16.
- unsigned LoadAlign = LI->getAlign().value();
-
- int64_t MemLocEnd = MemLocOffs + MemLocSize;
-
- // If no amount of rounding up will let MemLoc fit into LI, then bail out.
- if (LIOffs + LoadAlign < MemLocEnd)
- return 0;
-
- // This is the size of the load to try. Start with the next larger power of
- // two.
- unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
- NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
-
- while (true) {
- // If this load size is bigger than our known alignment or would not fit
- // into a native integer register, then we fail.
- if (NewLoadByteSize > LoadAlign ||
- !DL.fitsInLegalInteger(NewLoadByteSize * 8))
- return 0;
-
- if (LIOffs + NewLoadByteSize > MemLocEnd &&
- (LI->getParent()->getParent()->hasFnAttribute(
- Attribute::SanitizeAddress) ||
- LI->getParent()->getParent()->hasFnAttribute(
- Attribute::SanitizeHWAddress)))
- // We will be reading past the location accessed by the original program.
- // While this is safe in a regular build, Address Safety analysis tools
- // may start reporting false warnings. So, don't do widening.
- return 0;
-
- // If a load of this width would include all of MemLoc, then we succeed.
- if (LIOffs + NewLoadByteSize >= MemLocEnd)
- return NewLoadByteSize;
-
- NewLoadByteSize <<= 1;
- }
-}
-
/// This function is called when we have a
/// memdep query of a load that ends up being clobbered by another load. See if
/// the other load can feed into the second load.
@@ -325,28 +240,7 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
Value *DepPtr = DepLI->getPointerOperand();
uint64_t DepSize = DL.getTypeSizeInBits(DepLI->getType()).getFixedValue();
- int R = analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
- if (R != -1)
- return R;
-
- // If we have a load/load clobber an DepLI can be widened to cover this load,
- // then we should widen it!
- int64_t LoadOffs = 0;
- const Value *LoadBase =
- GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, DL);
- unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue();
-
- unsigned Size =
- getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI);
- if (Size == 0)
- return -1;
-
- // Check non-obvious conditions enforced by MDA which we rely on for being
- // able to materialize this potentially available value
- assert(DepLI->isSimple() && "Cannot widen volatile/atomic load!");
- assert(DepLI->getType()->isIntegerTy() && "Can't widen non-integer load");
-
- return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size * 8, DL);
+ return analyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, DL);
}
int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
@@ -438,83 +332,27 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
return SrcVal;
}
-/// This function is called when we have a memdep query of a load that ends up
-/// being a clobbering store. This means that the store provides bits used by
-/// the load but the pointers don't must-alias. Check this case to see if
-/// there is anything more we can do before we give up.
-Value *getStoreValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
- Instruction *InsertPt, const DataLayout &DL) {
+Value *getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
+ Instruction *InsertPt, const DataLayout &DL) {
+#ifndef NDEBUG
+ unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType()).getFixedValue();
+ unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue();
+ assert(Offset + LoadSize <= SrcValSize);
+#endif
IRBuilder<> Builder(InsertPt);
SrcVal = getStoreValueForLoadHelper(SrcVal, Offset, LoadTy, Builder, DL);
return coerceAvailableValueToLoadType(SrcVal, LoadTy, Builder, DL);
}
-Constant *getConstantStoreValueForLoad(Constant *SrcVal, unsigned Offset,
- Type *LoadTy, const DataLayout &DL) {
- return ConstantFoldLoadFromConst(SrcVal, LoadTy, APInt(32, Offset), DL);
-}
-
-/// This function is called when we have a memdep query of a load that ends up
-/// being a clobbering load. This means that the load *may* provide bits used
-/// by the load but we can't be sure because the pointers don't must-alias.
-/// Check this case to see if there is anything more we can do before we give
-/// up.
-Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,
- Instruction *InsertPt, const DataLayout &DL) {
- // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to
- // widen SrcVal out to a larger load.
- unsigned SrcValStoreSize =
- DL.getTypeStoreSize(SrcVal->getType()).getFixedValue();
+Constant *getConstantValueForLoad(Constant *SrcVal, unsigned Offset,
+ Type *LoadTy, const DataLayout &DL) {
+#ifndef NDEBUG
+ unsigned SrcValSize = DL.getTypeStoreSize(SrcVal->getType()).getFixedValue();
unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue();
- if (Offset + LoadSize > SrcValStoreSize) {
- assert(SrcVal->isSimple() && "Cannot widen volatile/atomic load!");
- assert(SrcVal->getType()->isIntegerTy() && "Can't widen non-integer load");
- // If we have a load/load clobber an DepLI can be widened to cover this
- // load, then we should widen it to the next power of 2 size big enough!
- unsigned NewLoadSize = Offset + LoadSize;
- if (!isPowerOf2_32(NewLoadSize))
- NewLoadSize = NextPowerOf2(NewLoadSize);
-
- Value *PtrVal = SrcVal->getPointerOperand();
- // Insert the new load after the old load. This ensures that subsequent
- // memdep queries will find the new load. We can't easily remove the old
- // load completely because it is already in the value numbering table.
- IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal));
- Type *DestTy = IntegerType::get(LoadTy->getContext(), NewLoadSize * 8);
- Type *DestPTy =
- PointerType::get(DestTy, PtrVal->getType()->getPointerAddressSpace());
- Builder.SetCurrentDebugLocation(SrcVal->getDebugLoc());
- PtrVal = Builder.CreateBitCast(PtrVal, DestPTy);
- LoadInst *NewLoad = Builder.CreateLoad(DestTy, PtrVal);
- NewLoad->takeName(SrcVal);
- NewLoad->setAlignment(SrcVal->getAlign());
-
- LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n");
- LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n");
-
- // Replace uses of the original load with the wider load. On a big endian
- // system, we need to shift down to get the relevant bits.
- Value *RV = NewLoad;
- if (DL.isBigEndian())
- RV = Builder.CreateLShr(RV, (NewLoadSize - SrcValStoreSize) * 8);
- RV = Builder.CreateTrunc(RV, SrcVal->getType());
- SrcVal->replaceAllUsesWith(RV);
-
- SrcVal = NewLoad;
- }
-
- return getStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, DL);
-}
-
-Constant *getConstantLoadValueForLoad(Constant *SrcVal, unsigned Offset,
- Type *LoadTy, const DataLayout &DL) {
- unsigned SrcValStoreSize =
- DL.getTypeStoreSize(SrcVal->getType()).getFixedValue();
- unsigned LoadSize = DL.getTypeStoreSize(LoadTy).getFixedValue();
- if (Offset + LoadSize > SrcValStoreSize)
- return nullptr;
- return getConstantStoreValueForLoad(SrcVal, Offset, LoadTy, DL);
+ assert(Offset + LoadSize <= SrcValSize);
+#endif
+ return ConstantFoldLoadFromConst(SrcVal, LoadTy, APInt(32, Offset), DL);
}
/// This function is called when we have a
diff --git a/llvm/lib/Transforms/Utils/ValueMapper.cpp b/llvm/lib/Transforms/Utils/ValueMapper.cpp
index a5edbb2acc6d..3446e31cc2ef 100644
--- a/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -523,10 +523,14 @@ Value *Mapper::mapValue(const Value *V) {
if (isa<ConstantVector>(C))
return getVM()[V] = ConstantVector::get(Ops);
// If this is a no-operand constant, it must be because the type was remapped.
+ if (isa<PoisonValue>(C))
+ return getVM()[V] = PoisonValue::get(NewTy);
if (isa<UndefValue>(C))
return getVM()[V] = UndefValue::get(NewTy);
if (isa<ConstantAggregateZero>(C))
return getVM()[V] = ConstantAggregateZero::get(NewTy);
+ if (isa<ConstantTargetNone>(C))
+ return getVM()[V] = Constant::getNullValue(NewTy);
assert(isa<ConstantPointerNull>(C));
return getVM()[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
}
@@ -1030,7 +1034,7 @@ void Mapper::mapAppendingVariable(GlobalVariable &GV, Constant *InitPrefix,
if (IsOldCtorDtor) {
// FIXME: This upgrade is done during linking to support the C API. See
// also IRLinker::linkAppendingVarProto() in IRMover.cpp.
- VoidPtrTy = Type::getInt8Ty(GV.getContext())->getPointerTo();
+ VoidPtrTy = PointerType::getUnqual(GV.getContext());
auto &ST = *cast<StructType>(NewMembers.front()->getType());
Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy};
EltTy = StructType::get(GV.getContext(), Tys, false);
@@ -1179,6 +1183,10 @@ void ValueMapper::remapFunction(Function &F) {
FlushingMapper(pImpl)->remapFunction(F);
}
+void ValueMapper::remapGlobalObjectMetadata(GlobalObject &GO) {
+ FlushingMapper(pImpl)->remapGlobalObjectMetadata(GO);
+}
+
void ValueMapper::scheduleMapGlobalInitializer(GlobalVariable &GV,
Constant &Init,
unsigned MCID) {
diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index 0b7fc853dc1b..260d7889906b 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -37,13 +37,34 @@
// multiple scalar registers, similar to a GPU vectorized load. In theory ARM
// could use this pass (with some modifications), but currently it implements
// its own pass to do something similar to what we do here.
+//
+// Overview of the algorithm and terminology in this pass:
+//
+// - Break up each basic block into pseudo-BBs, composed of instructions which
+// are guaranteed to transfer control to their successors.
+// - Within a single pseudo-BB, find all loads, and group them into
+// "equivalence classes" according to getUnderlyingObject() and loaded
+// element size. Do the same for stores.
+// - For each equivalence class, greedily build "chains". Each chain has a
+// leader instruction, and every other member of the chain has a known
+// constant offset from the first instr in the chain.
+// - Break up chains so that they contain only contiguous accesses of legal
+// size with no intervening may-alias instrs.
+// - Convert each chain to vector instructions.
+//
+// The O(n^2) behavior of this pass comes from initially building the chains.
+// In the worst case we have to compare each new instruction to all of those
+// that came before. To limit this, we only calculate the offset to the leaders
+// of the N most recently-used chains.
#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Sequence.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
@@ -57,6 +78,7 @@
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
@@ -67,23 +89,33 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ModRef.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <cassert>
+#include <cstdint>
#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <numeric>
+#include <optional>
#include <tuple>
+#include <type_traits>
#include <utility>
+#include <vector>
using namespace llvm;
@@ -92,21 +124,115 @@ using namespace llvm;
STATISTIC(NumVectorInstructions, "Number of vector accesses generated");
STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized");
+namespace {
+
+// Equivalence class key, the initial tuple by which we group loads/stores.
+// Loads/stores with different EqClassKeys are never merged.
+//
+// (We could in theory remove element-size from the this tuple. We'd just need
+// to fix up the vector packing/unpacking code.)
+using EqClassKey =
+ std::tuple<const Value * /* result of getUnderlyingObject() */,
+ unsigned /* AddrSpace */,
+ unsigned /* Load/Store element size bits */,
+ char /* IsLoad; char b/c bool can't be a DenseMap key */
+ >;
+[[maybe_unused]] llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
+ const EqClassKey &K) {
+ const auto &[UnderlyingObject, AddrSpace, ElementSize, IsLoad] = K;
+ return OS << (IsLoad ? "load" : "store") << " of " << *UnderlyingObject
+ << " of element size " << ElementSize << " bits in addrspace "
+ << AddrSpace;
+}
+
+// A Chain is a set of instructions such that:
+// - All instructions have the same equivalence class, so in particular all are
+// loads, or all are stores.
+// - We know the address accessed by the i'th chain elem relative to the
+// chain's leader instruction, which is the first instr of the chain in BB
+// order.
+//
+// Chains have two canonical orderings:
+// - BB order, sorted by Instr->comesBefore.
+// - Offset order, sorted by OffsetFromLeader.
+// This pass switches back and forth between these orders.
+struct ChainElem {
+ Instruction *Inst;
+ APInt OffsetFromLeader;
+};
+using Chain = SmallVector<ChainElem, 1>;
+
+void sortChainInBBOrder(Chain &C) {
+ sort(C, [](auto &A, auto &B) { return A.Inst->comesBefore(B.Inst); });
+}
+
+void sortChainInOffsetOrder(Chain &C) {
+ sort(C, [](const auto &A, const auto &B) {
+ if (A.OffsetFromLeader != B.OffsetFromLeader)
+ return A.OffsetFromLeader.slt(B.OffsetFromLeader);
+ return A.Inst->comesBefore(B.Inst); // stable tiebreaker
+ });
+}
+
+[[maybe_unused]] void dumpChain(ArrayRef<ChainElem> C) {
+ for (const auto &E : C) {
+ dbgs() << " " << *E.Inst << " (offset " << E.OffsetFromLeader << ")\n";
+ }
+}
+
+using EquivalenceClassMap =
+ MapVector<EqClassKey, SmallVector<Instruction *, 8>>;
+
// FIXME: Assuming stack alignment of 4 is always good enough
-static const unsigned StackAdjustedAlignment = 4;
+constexpr unsigned StackAdjustedAlignment = 4;
-namespace {
+Instruction *propagateMetadata(Instruction *I, const Chain &C) {
+ SmallVector<Value *, 8> Values;
+ for (const ChainElem &E : C)
+ Values.push_back(E.Inst);
+ return propagateMetadata(I, Values);
+}
-/// ChainID is an arbitrary token that is allowed to be different only for the
-/// accesses that are guaranteed to be considered non-consecutive by
-/// Vectorizer::isConsecutiveAccess. It's used for grouping instructions
-/// together and reducing the number of instructions the main search operates on
-/// at a time, i.e. this is to reduce compile time and nothing else as the main
-/// search has O(n^2) time complexity. The underlying type of ChainID should not
-/// be relied upon.
-using ChainID = const Value *;
-using InstrList = SmallVector<Instruction *, 8>;
-using InstrListMap = MapVector<ChainID, InstrList>;
+bool isInvariantLoad(const Instruction *I) {
+ const LoadInst *LI = dyn_cast<LoadInst>(I);
+ return LI != nullptr && LI->hasMetadata(LLVMContext::MD_invariant_load);
+}
+
+/// Reorders the instructions that I depends on (the instructions defining its
+/// operands), to ensure they dominate I.
+void reorder(Instruction *I) {
+ SmallPtrSet<Instruction *, 16> InstructionsToMove;
+ SmallVector<Instruction *, 16> Worklist;
+
+ Worklist.push_back(I);
+ while (!Worklist.empty()) {
+ Instruction *IW = Worklist.pop_back_val();
+ int NumOperands = IW->getNumOperands();
+ for (int i = 0; i < NumOperands; i++) {
+ Instruction *IM = dyn_cast<Instruction>(IW->getOperand(i));
+ if (!IM || IM->getOpcode() == Instruction::PHI)
+ continue;
+
+ // If IM is in another BB, no need to move it, because this pass only
+ // vectorizes instructions within one BB.
+ if (IM->getParent() != I->getParent())
+ continue;
+
+ if (!IM->comesBefore(I)) {
+ InstructionsToMove.insert(IM);
+ Worklist.push_back(IM);
+ }
+ }
+ }
+
+ // All instructions to move should follow I. Start from I, not from begin().
+ for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;) {
+ Instruction *IM = &*(BBI++);
+ if (!InstructionsToMove.count(IM))
+ continue;
+ IM->moveBefore(I);
+ }
+}
class Vectorizer {
Function &F;
@@ -118,6 +244,12 @@ class Vectorizer {
const DataLayout &DL;
IRBuilder<> Builder;
+ // We could erase instrs right after vectorizing them, but that can mess up
+ // our BB iterators, and also can make the equivalence class keys point to
+ // freed memory. This is fixable, but it's simpler just to wait until we're
+ // done with the BB and erase all at once.
+ SmallVector<Instruction *, 128> ToErase;
+
public:
Vectorizer(Function &F, AliasAnalysis &AA, AssumptionCache &AC,
DominatorTree &DT, ScalarEvolution &SE, TargetTransformInfo &TTI)
@@ -127,70 +259,83 @@ public:
bool run();
private:
- unsigned getPointerAddressSpace(Value *I);
-
static const unsigned MaxDepth = 3;
- bool isConsecutiveAccess(Value *A, Value *B);
- bool areConsecutivePointers(Value *PtrA, Value *PtrB, APInt PtrDelta,
- unsigned Depth = 0) const;
- bool lookThroughComplexAddresses(Value *PtrA, Value *PtrB, APInt PtrDelta,
- unsigned Depth) const;
- bool lookThroughSelects(Value *PtrA, Value *PtrB, const APInt &PtrDelta,
- unsigned Depth) const;
-
- /// After vectorization, reorder the instructions that I depends on
- /// (the instructions defining its operands), to ensure they dominate I.
- void reorder(Instruction *I);
-
- /// Returns the first and the last instructions in Chain.
- std::pair<BasicBlock::iterator, BasicBlock::iterator>
- getBoundaryInstrs(ArrayRef<Instruction *> Chain);
-
- /// Erases the original instructions after vectorizing.
- void eraseInstructions(ArrayRef<Instruction *> Chain);
-
- /// "Legalize" the vector type that would be produced by combining \p
- /// ElementSizeBits elements in \p Chain. Break into two pieces such that the
- /// total size of each piece is 1, 2 or a multiple of 4 bytes. \p Chain is
- /// expected to have more than 4 elements.
- std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
- splitOddVectorElts(ArrayRef<Instruction *> Chain, unsigned ElementSizeBits);
-
- /// Finds the largest prefix of Chain that's vectorizable, checking for
- /// intervening instructions which may affect the memory accessed by the
- /// instructions within Chain.
+ /// Runs the vectorizer on a "pseudo basic block", which is a range of
+ /// instructions [Begin, End) within one BB all of which have
+ /// isGuaranteedToTransferExecutionToSuccessor(I) == true.
+ bool runOnPseudoBB(BasicBlock::iterator Begin, BasicBlock::iterator End);
+
+ /// Runs the vectorizer on one equivalence class, i.e. one set of loads/stores
+ /// in the same BB with the same value for getUnderlyingObject() etc.
+ bool runOnEquivalenceClass(const EqClassKey &EqClassKey,
+ ArrayRef<Instruction *> EqClass);
+
+ /// Runs the vectorizer on one chain, i.e. a subset of an equivalence class
+ /// where all instructions access a known, constant offset from the first
+ /// instruction.
+ bool runOnChain(Chain &C);
+
+ /// Splits the chain into subchains of instructions which read/write a
+ /// contiguous block of memory. Discards any length-1 subchains (because
+ /// there's nothing to vectorize in there).
+ std::vector<Chain> splitChainByContiguity(Chain &C);
+
+ /// Splits the chain into subchains where it's safe to hoist loads up to the
+ /// beginning of the sub-chain and it's safe to sink loads up to the end of
+ /// the sub-chain. Discards any length-1 subchains.
+ std::vector<Chain> splitChainByMayAliasInstrs(Chain &C);
+
+ /// Splits the chain into subchains that make legal, aligned accesses.
+ /// Discards any length-1 subchains.
+ std::vector<Chain> splitChainByAlignment(Chain &C);
+
+ /// Converts the instrs in the chain into a single vectorized load or store.
+ /// Adds the old scalar loads/stores to ToErase.
+ bool vectorizeChain(Chain &C);
+
+ /// Tries to compute the offset in bytes PtrB - PtrA.
+ std::optional<APInt> getConstantOffset(Value *PtrA, Value *PtrB,
+ Instruction *ContextInst,
+ unsigned Depth = 0);
+ std::optional<APInt> getConstantOffsetComplexAddrs(Value *PtrA, Value *PtrB,
+ Instruction *ContextInst,
+ unsigned Depth);
+ std::optional<APInt> getConstantOffsetSelects(Value *PtrA, Value *PtrB,
+ Instruction *ContextInst,
+ unsigned Depth);
+
+ /// Gets the element type of the vector that the chain will load or store.
+ /// This is nontrivial because the chain may contain elements of different
+ /// types; e.g. it's legal to have a chain that contains both i32 and float.
+ Type *getChainElemTy(const Chain &C);
+
+ /// Determines whether ChainElem can be moved up (if IsLoad) or down (if
+ /// !IsLoad) to ChainBegin -- i.e. there are no intervening may-alias
+ /// instructions.
+ ///
+ /// The map ChainElemOffsets must contain all of the elements in
+ /// [ChainBegin, ChainElem] and their offsets from some arbitrary base
+ /// address. It's ok if it contains additional entries.
+ template <bool IsLoadChain>
+ bool isSafeToMove(
+ Instruction *ChainElem, Instruction *ChainBegin,
+ const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets);
+
+ /// Collects loads and stores grouped by "equivalence class", where:
+ /// - all elements in an eq class are a load or all are a store,
+ /// - they all load/store the same element size (it's OK to have e.g. i8 and
+ /// <4 x i8> in the same class, but not i32 and <4 x i8>), and
+ /// - they all have the same value for getUnderlyingObject().
+ EquivalenceClassMap collectEquivalenceClasses(BasicBlock::iterator Begin,
+ BasicBlock::iterator End);
+
+ /// Partitions Instrs into "chains" where every instruction has a known
+ /// constant offset from the first instr in the chain.
///
- /// The elements of \p Chain must be all loads or all stores and must be in
- /// address order.
- ArrayRef<Instruction *> getVectorizablePrefix(ArrayRef<Instruction *> Chain);
-
- /// Collects load and store instructions to vectorize.
- std::pair<InstrListMap, InstrListMap> collectInstructions(BasicBlock *BB);
-
- /// Processes the collected instructions, the \p Map. The values of \p Map
- /// should be all loads or all stores.
- bool vectorizeChains(InstrListMap &Map);
-
- /// Finds the load/stores to consecutive memory addresses and vectorizes them.
- bool vectorizeInstructions(ArrayRef<Instruction *> Instrs);
-
- /// Vectorizes the load instructions in Chain.
- bool
- vectorizeLoadChain(ArrayRef<Instruction *> Chain,
- SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
-
- /// Vectorizes the store instructions in Chain.
- bool
- vectorizeStoreChain(ArrayRef<Instruction *> Chain,
- SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
-
- /// Check if this load/store access is misaligned accesses.
- /// Returns a \p RelativeSpeed of an operation if allowed suitable to
- /// compare to another result for the same \p AddressSpace and potentially
- /// different \p Alignment and \p SzInBytes.
- bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
- Align Alignment, unsigned &RelativeSpeed);
+ /// Postcondition: For all i, ret[i][0].second == 0, because the first instr
+ /// in the chain is the leader, and an instr touches distance 0 from itself.
+ std::vector<Chain> gatherChains(ArrayRef<Instruction *> Instrs);
};
class LoadStoreVectorizerLegacyPass : public FunctionPass {
@@ -198,7 +343,8 @@ public:
static char ID;
LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {
- initializeLoadStoreVectorizerLegacyPassPass(*PassRegistry::getPassRegistry());
+ initializeLoadStoreVectorizerLegacyPassPass(
+ *PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
@@ -250,11 +396,11 @@ bool LoadStoreVectorizerLegacyPass::runOnFunction(Function &F) {
AssumptionCache &AC =
getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- Vectorizer V(F, AA, AC, DT, SE, TTI);
- return V.run();
+ return Vectorizer(F, AA, AC, DT, SE, TTI).run();
}
-PreservedAnalyses LoadStoreVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
+PreservedAnalyses LoadStoreVectorizerPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
// Don't vectorize when the attribute NoImplicitFloat is used.
if (F.hasFnAttribute(Attribute::NoImplicitFloat))
return PreservedAnalyses::all();
@@ -265,125 +411,681 @@ PreservedAnalyses LoadStoreVectorizerPass::run(Function &F, FunctionAnalysisMana
TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
- Vectorizer V(F, AA, AC, DT, SE, TTI);
- bool Changed = V.run();
+ bool Changed = Vectorizer(F, AA, AC, DT, SE, TTI).run();
PreservedAnalyses PA;
PA.preserveSet<CFGAnalyses>();
return Changed ? PA : PreservedAnalyses::all();
}
-// The real propagateMetadata expects a SmallVector<Value*>, but we deal in
-// vectors of Instructions.
-static void propagateMetadata(Instruction *I, ArrayRef<Instruction *> IL) {
- SmallVector<Value *, 8> VL(IL.begin(), IL.end());
- propagateMetadata(I, VL);
-}
-
-// Vectorizer Implementation
bool Vectorizer::run() {
bool Changed = false;
-
- // Scan the blocks in the function in post order.
+ // Break up the BB if there are any instrs which aren't guaranteed to transfer
+ // execution to their successor.
+ //
+ // Consider, for example:
+ //
+ // def assert_arr_len(int n) { if (n < 2) exit(); }
+ //
+ // load arr[0]
+ // call assert_array_len(arr.length)
+ // load arr[1]
+ //
+ // Even though assert_arr_len does not read or write any memory, we can't
+ // speculate the second load before the call. More info at
+ // https://github.com/llvm/llvm-project/issues/52950.
for (BasicBlock *BB : post_order(&F)) {
- InstrListMap LoadRefs, StoreRefs;
- std::tie(LoadRefs, StoreRefs) = collectInstructions(BB);
- Changed |= vectorizeChains(LoadRefs);
- Changed |= vectorizeChains(StoreRefs);
+ // BB must at least have a terminator.
+ assert(!BB->empty());
+
+ SmallVector<BasicBlock::iterator, 8> Barriers;
+ Barriers.push_back(BB->begin());
+ for (Instruction &I : *BB)
+ if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+ Barriers.push_back(I.getIterator());
+ Barriers.push_back(BB->end());
+
+ for (auto It = Barriers.begin(), End = std::prev(Barriers.end()); It != End;
+ ++It)
+ Changed |= runOnPseudoBB(*It, *std::next(It));
+
+ for (Instruction *I : ToErase) {
+ auto *PtrOperand = getLoadStorePointerOperand(I);
+ if (I->use_empty())
+ I->eraseFromParent();
+ RecursivelyDeleteTriviallyDeadInstructions(PtrOperand);
+ }
+ ToErase.clear();
}
return Changed;
}
-unsigned Vectorizer::getPointerAddressSpace(Value *I) {
- if (LoadInst *L = dyn_cast<LoadInst>(I))
- return L->getPointerAddressSpace();
- if (StoreInst *S = dyn_cast<StoreInst>(I))
- return S->getPointerAddressSpace();
- return -1;
+bool Vectorizer::runOnPseudoBB(BasicBlock::iterator Begin,
+ BasicBlock::iterator End) {
+ LLVM_DEBUG({
+ dbgs() << "LSV: Running on pseudo-BB [" << *Begin << " ... ";
+ if (End != Begin->getParent()->end())
+ dbgs() << *End;
+ else
+ dbgs() << "<BB end>";
+ dbgs() << ")\n";
+ });
+
+ bool Changed = false;
+ for (const auto &[EqClassKey, EqClass] :
+ collectEquivalenceClasses(Begin, End))
+ Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
+
+ return Changed;
}
-// FIXME: Merge with llvm::isConsecutiveAccess
-bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) {
- Value *PtrA = getLoadStorePointerOperand(A);
- Value *PtrB = getLoadStorePointerOperand(B);
- unsigned ASA = getPointerAddressSpace(A);
- unsigned ASB = getPointerAddressSpace(B);
+bool Vectorizer::runOnEquivalenceClass(const EqClassKey &EqClassKey,
+ ArrayRef<Instruction *> EqClass) {
+ bool Changed = false;
- // Check that the address spaces match and that the pointers are valid.
- if (!PtrA || !PtrB || (ASA != ASB))
- return false;
+ LLVM_DEBUG({
+ dbgs() << "LSV: Running on equivalence class of size " << EqClass.size()
+ << " keyed on " << EqClassKey << ":\n";
+ for (Instruction *I : EqClass)
+ dbgs() << " " << *I << "\n";
+ });
- // Make sure that A and B are different pointers of the same size type.
- Type *PtrATy = getLoadStoreType(A);
- Type *PtrBTy = getLoadStoreType(B);
- if (PtrA == PtrB ||
- PtrATy->isVectorTy() != PtrBTy->isVectorTy() ||
- DL.getTypeStoreSize(PtrATy) != DL.getTypeStoreSize(PtrBTy) ||
- DL.getTypeStoreSize(PtrATy->getScalarType()) !=
- DL.getTypeStoreSize(PtrBTy->getScalarType()))
- return false;
+ std::vector<Chain> Chains = gatherChains(EqClass);
+ LLVM_DEBUG(dbgs() << "LSV: Got " << Chains.size()
+ << " nontrivial chains.\n";);
+ for (Chain &C : Chains)
+ Changed |= runOnChain(C);
+ return Changed;
+}
- unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
- APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy));
+bool Vectorizer::runOnChain(Chain &C) {
+ LLVM_DEBUG({
+ dbgs() << "LSV: Running on chain with " << C.size() << " instructions:\n";
+ dumpChain(C);
+ });
- return areConsecutivePointers(PtrA, PtrB, Size);
+ // Split up the chain into increasingly smaller chains, until we can finally
+ // vectorize the chains.
+ //
+ // (Don't be scared by the depth of the loop nest here. These operations are
+ // all at worst O(n lg n) in the number of instructions, and splitting chains
+ // doesn't change the number of instrs. So the whole loop nest is O(n lg n).)
+ bool Changed = false;
+ for (auto &C : splitChainByMayAliasInstrs(C))
+ for (auto &C : splitChainByContiguity(C))
+ for (auto &C : splitChainByAlignment(C))
+ Changed |= vectorizeChain(C);
+ return Changed;
}
-bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB,
- APInt PtrDelta, unsigned Depth) const {
- unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType());
- APInt OffsetA(PtrBitWidth, 0);
- APInt OffsetB(PtrBitWidth, 0);
- PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
- PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
+std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &C) {
+ if (C.empty())
+ return {};
- unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType());
+ sortChainInBBOrder(C);
- if (NewPtrBitWidth != DL.getTypeStoreSizeInBits(PtrB->getType()))
+ LLVM_DEBUG({
+ dbgs() << "LSV: splitChainByMayAliasInstrs considering chain:\n";
+ dumpChain(C);
+ });
+
+ // We know that elements in the chain with nonverlapping offsets can't
+ // alias, but AA may not be smart enough to figure this out. Use a
+ // hashtable.
+ DenseMap<Instruction *, APInt /*OffsetFromLeader*/> ChainOffsets;
+ for (const auto &E : C)
+ ChainOffsets.insert({&*E.Inst, E.OffsetFromLeader});
+
+ // Loads get hoisted up to the first load in the chain. Stores get sunk
+ // down to the last store in the chain. Our algorithm for loads is:
+ //
+ // - Take the first element of the chain. This is the start of a new chain.
+ // - Take the next element of `Chain` and check for may-alias instructions
+ // up to the start of NewChain. If no may-alias instrs, add it to
+ // NewChain. Otherwise, start a new NewChain.
+ //
+ // For stores it's the same except in the reverse direction.
+ //
+ // We expect IsLoad to be an std::bool_constant.
+ auto Impl = [&](auto IsLoad) {
+ // MSVC is unhappy if IsLoad is a capture, so pass it as an arg.
+ auto [ChainBegin, ChainEnd] = [&](auto IsLoad) {
+ if constexpr (IsLoad())
+ return std::make_pair(C.begin(), C.end());
+ else
+ return std::make_pair(C.rbegin(), C.rend());
+ }(IsLoad);
+ assert(ChainBegin != ChainEnd);
+
+ std::vector<Chain> Chains;
+ SmallVector<ChainElem, 1> NewChain;
+ NewChain.push_back(*ChainBegin);
+ for (auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
+ if (isSafeToMove<IsLoad>(ChainIt->Inst, NewChain.front().Inst,
+ ChainOffsets)) {
+ LLVM_DEBUG(dbgs() << "LSV: No intervening may-alias instrs; can merge "
+ << *ChainIt->Inst << " into " << *ChainBegin->Inst
+ << "\n");
+ NewChain.push_back(*ChainIt);
+ } else {
+ LLVM_DEBUG(
+ dbgs() << "LSV: Found intervening may-alias instrs; cannot merge "
+ << *ChainIt->Inst << " into " << *ChainBegin->Inst << "\n");
+ if (NewChain.size() > 1) {
+ LLVM_DEBUG({
+ dbgs() << "LSV: got nontrivial chain without aliasing instrs:\n";
+ dumpChain(NewChain);
+ });
+ Chains.push_back(std::move(NewChain));
+ }
+
+ // Start a new chain.
+ NewChain = SmallVector<ChainElem, 1>({*ChainIt});
+ }
+ }
+ if (NewChain.size() > 1) {
+ LLVM_DEBUG({
+ dbgs() << "LSV: got nontrivial chain without aliasing instrs:\n";
+ dumpChain(NewChain);
+ });
+ Chains.push_back(std::move(NewChain));
+ }
+ return Chains;
+ };
+
+ if (isa<LoadInst>(C[0].Inst))
+ return Impl(/*IsLoad=*/std::bool_constant<true>());
+
+ assert(isa<StoreInst>(C[0].Inst));
+ return Impl(/*IsLoad=*/std::bool_constant<false>());
+}
+
+std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &C) {
+ if (C.empty())
+ return {};
+
+ sortChainInOffsetOrder(C);
+
+ LLVM_DEBUG({
+ dbgs() << "LSV: splitChainByContiguity considering chain:\n";
+ dumpChain(C);
+ });
+
+ std::vector<Chain> Ret;
+ Ret.push_back({C.front()});
+
+ for (auto It = std::next(C.begin()), End = C.end(); It != End; ++It) {
+ // `prev` accesses offsets [PrevDistFromBase, PrevReadEnd).
+ auto &CurChain = Ret.back();
+ const ChainElem &Prev = CurChain.back();
+ unsigned SzBits = DL.getTypeSizeInBits(getLoadStoreType(&*Prev.Inst));
+ assert(SzBits % 8 == 0 && "Non-byte sizes should have been filtered out by "
+ "collectEquivalenceClass");
+ APInt PrevReadEnd = Prev.OffsetFromLeader + SzBits / 8;
+
+ // Add this instruction to the end of the current chain, or start a new one.
+ bool AreContiguous = It->OffsetFromLeader == PrevReadEnd;
+ LLVM_DEBUG(dbgs() << "LSV: Instructions are "
+ << (AreContiguous ? "" : "not ") << "contiguous: "
+ << *Prev.Inst << " (ends at offset " << PrevReadEnd
+ << ") -> " << *It->Inst << " (starts at offset "
+ << It->OffsetFromLeader << ")\n");
+ if (AreContiguous)
+ CurChain.push_back(*It);
+ else
+ Ret.push_back({*It});
+ }
+
+ // Filter out length-1 chains, these are uninteresting.
+ llvm::erase_if(Ret, [](const auto &Chain) { return Chain.size() <= 1; });
+ return Ret;
+}
+
+Type *Vectorizer::getChainElemTy(const Chain &C) {
+ assert(!C.empty());
+ // The rules are:
+ // - If there are any pointer types in the chain, use an integer type.
+ // - Prefer an integer type if it appears in the chain.
+ // - Otherwise, use the first type in the chain.
+ //
+ // The rule about pointer types is a simplification when we merge e.g. a load
+ // of a ptr and a double. There's no direct conversion from a ptr to a
+ // double; it requires a ptrtoint followed by a bitcast.
+ //
+ // It's unclear to me if the other rules have any practical effect, but we do
+ // it to match this pass's previous behavior.
+ if (any_of(C, [](const ChainElem &E) {
+ return getLoadStoreType(E.Inst)->getScalarType()->isPointerTy();
+ })) {
+ return Type::getIntNTy(
+ F.getContext(),
+ DL.getTypeSizeInBits(getLoadStoreType(C[0].Inst)->getScalarType()));
+ }
+
+ for (const ChainElem &E : C)
+ if (Type *T = getLoadStoreType(E.Inst)->getScalarType(); T->isIntegerTy())
+ return T;
+ return getLoadStoreType(C[0].Inst)->getScalarType();
+}
+
+std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
+ // We use a simple greedy algorithm.
+ // - Given a chain of length N, find all prefixes that
+ // (a) are not longer than the max register length, and
+ // (b) are a power of 2.
+ // - Starting from the longest prefix, try to create a vector of that length.
+ // - If one of them works, great. Repeat the algorithm on any remaining
+ // elements in the chain.
+ // - If none of them work, discard the first element and repeat on a chain
+ // of length N-1.
+ if (C.empty())
+ return {};
+
+ sortChainInOffsetOrder(C);
+
+ LLVM_DEBUG({
+ dbgs() << "LSV: splitChainByAlignment considering chain:\n";
+ dumpChain(C);
+ });
+
+ bool IsLoadChain = isa<LoadInst>(C[0].Inst);
+ auto getVectorFactor = [&](unsigned VF, unsigned LoadStoreSize,
+ unsigned ChainSizeBytes, VectorType *VecTy) {
+ return IsLoadChain ? TTI.getLoadVectorFactor(VF, LoadStoreSize,
+ ChainSizeBytes, VecTy)
+ : TTI.getStoreVectorFactor(VF, LoadStoreSize,
+ ChainSizeBytes, VecTy);
+ };
+
+#ifndef NDEBUG
+ for (const auto &E : C) {
+ Type *Ty = getLoadStoreType(E.Inst)->getScalarType();
+ assert(isPowerOf2_32(DL.getTypeSizeInBits(Ty)) &&
+ "Should have filtered out non-power-of-two elements in "
+ "collectEquivalenceClasses.");
+ }
+#endif
+
+ unsigned AS = getLoadStoreAddressSpace(C[0].Inst);
+ unsigned VecRegBytes = TTI.getLoadStoreVecRegBitWidth(AS) / 8;
+
+ std::vector<Chain> Ret;
+ for (unsigned CBegin = 0; CBegin < C.size(); ++CBegin) {
+ // Find candidate chains of size not greater than the largest vector reg.
+ // These chains are over the closed interval [CBegin, CEnd].
+ SmallVector<std::pair<unsigned /*CEnd*/, unsigned /*SizeBytes*/>, 8>
+ CandidateChains;
+ for (unsigned CEnd = CBegin + 1, Size = C.size(); CEnd < Size; ++CEnd) {
+ APInt Sz = C[CEnd].OffsetFromLeader +
+ DL.getTypeStoreSize(getLoadStoreType(C[CEnd].Inst)) -
+ C[CBegin].OffsetFromLeader;
+ if (Sz.sgt(VecRegBytes))
+ break;
+ CandidateChains.push_back(
+ {CEnd, static_cast<unsigned>(Sz.getLimitedValue())});
+ }
+
+ // Consider the longest chain first.
+ for (auto It = CandidateChains.rbegin(), End = CandidateChains.rend();
+ It != End; ++It) {
+ auto [CEnd, SizeBytes] = *It;
+ LLVM_DEBUG(
+ dbgs() << "LSV: splitChainByAlignment considering candidate chain ["
+ << *C[CBegin].Inst << " ... " << *C[CEnd].Inst << "]\n");
+
+ Type *VecElemTy = getChainElemTy(C);
+ // Note, VecElemTy is a power of 2, but might be less than one byte. For
+ // example, we can vectorize 2 x <2 x i4> to <4 x i4>, and in this case
+ // VecElemTy would be i4.
+ unsigned VecElemBits = DL.getTypeSizeInBits(VecElemTy);
+
+ // SizeBytes and VecElemBits are powers of 2, so they divide evenly.
+ assert((8 * SizeBytes) % VecElemBits == 0);
+ unsigned NumVecElems = 8 * SizeBytes / VecElemBits;
+ FixedVectorType *VecTy = FixedVectorType::get(VecElemTy, NumVecElems);
+ unsigned VF = 8 * VecRegBytes / VecElemBits;
+
+ // Check that TTI is happy with this vectorization factor.
+ unsigned TargetVF = getVectorFactor(VF, VecElemBits,
+ VecElemBits * NumVecElems / 8, VecTy);
+ if (TargetVF != VF && TargetVF < NumVecElems) {
+ LLVM_DEBUG(
+ dbgs() << "LSV: splitChainByAlignment discarding candidate chain "
+ "because TargetVF="
+ << TargetVF << " != VF=" << VF
+ << " and TargetVF < NumVecElems=" << NumVecElems << "\n");
+ continue;
+ }
+
+ // Is a load/store with this alignment allowed by TTI and at least as fast
+ // as an unvectorized load/store?
+ //
+ // TTI and F are passed as explicit captures to WAR an MSVC misparse (??).
+ auto IsAllowedAndFast = [&, SizeBytes = SizeBytes, &TTI = TTI,
+ &F = F](Align Alignment) {
+ if (Alignment.value() % SizeBytes == 0)
+ return true;
+ unsigned VectorizedSpeed = 0;
+ bool AllowsMisaligned = TTI.allowsMisalignedMemoryAccesses(
+ F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
+ if (!AllowsMisaligned) {
+ LLVM_DEBUG(dbgs()
+ << "LSV: Access of " << SizeBytes << "B in addrspace "
+ << AS << " with alignment " << Alignment.value()
+ << " is misaligned, and therefore can't be vectorized.\n");
+ return false;
+ }
+
+ unsigned ElementwiseSpeed = 0;
+ (TTI).allowsMisalignedMemoryAccesses((F).getContext(), VecElemBits, AS,
+ Alignment, &ElementwiseSpeed);
+ if (VectorizedSpeed < ElementwiseSpeed) {
+ LLVM_DEBUG(dbgs()
+ << "LSV: Access of " << SizeBytes << "B in addrspace "
+ << AS << " with alignment " << Alignment.value()
+ << " has relative speed " << VectorizedSpeed
+ << ", which is lower than the elementwise speed of "
+ << ElementwiseSpeed
+ << ". Therefore this access won't be vectorized.\n");
+ return false;
+ }
+ return true;
+ };
+
+ // If we're loading/storing from an alloca, align it if possible.
+ //
+ // FIXME: We eagerly upgrade the alignment, regardless of whether TTI
+ // tells us this is beneficial. This feels a bit odd, but it matches
+ // existing tests. This isn't *so* bad, because at most we align to 4
+ // bytes (current value of StackAdjustedAlignment).
+ //
+ // FIXME: We will upgrade the alignment of the alloca even if it turns out
+ // we can't vectorize for some other reason.
+ Value *PtrOperand = getLoadStorePointerOperand(C[CBegin].Inst);
+ bool IsAllocaAccess = AS == DL.getAllocaAddrSpace() &&
+ isa<AllocaInst>(PtrOperand->stripPointerCasts());
+ Align Alignment = getLoadStoreAlignment(C[CBegin].Inst);
+ Align PrefAlign = Align(StackAdjustedAlignment);
+ if (IsAllocaAccess && Alignment.value() % SizeBytes != 0 &&
+ IsAllowedAndFast(PrefAlign)) {
+ Align NewAlign = getOrEnforceKnownAlignment(
+ PtrOperand, PrefAlign, DL, C[CBegin].Inst, nullptr, &DT);
+ if (NewAlign >= Alignment) {
+ LLVM_DEBUG(dbgs()
+ << "LSV: splitByChain upgrading alloca alignment from "
+ << Alignment.value() << " to " << NewAlign.value()
+ << "\n");
+ Alignment = NewAlign;
+ }
+ }
+
+ if (!IsAllowedAndFast(Alignment)) {
+ LLVM_DEBUG(
+ dbgs() << "LSV: splitChainByAlignment discarding candidate chain "
+ "because its alignment is not AllowedAndFast: "
+ << Alignment.value() << "\n");
+ continue;
+ }
+
+ if ((IsLoadChain &&
+ !TTI.isLegalToVectorizeLoadChain(SizeBytes, Alignment, AS)) ||
+ (!IsLoadChain &&
+ !TTI.isLegalToVectorizeStoreChain(SizeBytes, Alignment, AS))) {
+ LLVM_DEBUG(
+ dbgs() << "LSV: splitChainByAlignment discarding candidate chain "
+ "because !isLegalToVectorizeLoad/StoreChain.");
+ continue;
+ }
+
+ // Hooray, we can vectorize this chain!
+ Chain &NewChain = Ret.emplace_back();
+ for (unsigned I = CBegin; I <= CEnd; ++I)
+ NewChain.push_back(C[I]);
+ CBegin = CEnd; // Skip over the instructions we've added to the chain.
+ break;
+ }
+ }
+ return Ret;
+}
+
+bool Vectorizer::vectorizeChain(Chain &C) {
+ if (C.size() < 2)
return false;
- // In case if we have to shrink the pointer
- // stripAndAccumulateInBoundsConstantOffsets should properly handle a
- // possible overflow and the value should fit into a smallest data type
- // used in the cast/gep chain.
- assert(OffsetA.getMinSignedBits() <= NewPtrBitWidth &&
- OffsetB.getMinSignedBits() <= NewPtrBitWidth);
+ sortChainInOffsetOrder(C);
- OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
- OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
- PtrDelta = PtrDelta.sextOrTrunc(NewPtrBitWidth);
+ LLVM_DEBUG({
+ dbgs() << "LSV: Vectorizing chain of " << C.size() << " instructions:\n";
+ dumpChain(C);
+ });
- APInt OffsetDelta = OffsetB - OffsetA;
+ Type *VecElemTy = getChainElemTy(C);
+ bool IsLoadChain = isa<LoadInst>(C[0].Inst);
+ unsigned AS = getLoadStoreAddressSpace(C[0].Inst);
+ unsigned ChainBytes = std::accumulate(
+ C.begin(), C.end(), 0u, [&](unsigned Bytes, const ChainElem &E) {
+ return Bytes + DL.getTypeStoreSize(getLoadStoreType(E.Inst));
+ });
+ assert(ChainBytes % DL.getTypeStoreSize(VecElemTy) == 0);
+ // VecTy is a power of 2 and 1 byte at smallest, but VecElemTy may be smaller
+ // than 1 byte (e.g. VecTy == <32 x i1>).
+ Type *VecTy = FixedVectorType::get(
+ VecElemTy, 8 * ChainBytes / DL.getTypeSizeInBits(VecElemTy));
+
+ Align Alignment = getLoadStoreAlignment(C[0].Inst);
+ // If this is a load/store of an alloca, we might have upgraded the alloca's
+ // alignment earlier. Get the new alignment.
+ if (AS == DL.getAllocaAddrSpace()) {
+ Alignment = std::max(
+ Alignment,
+ getOrEnforceKnownAlignment(getLoadStorePointerOperand(C[0].Inst),
+ MaybeAlign(), DL, C[0].Inst, nullptr, &DT));
+ }
- // Check if they are based on the same pointer. That makes the offsets
- // sufficient.
- if (PtrA == PtrB)
- return OffsetDelta == PtrDelta;
-
- // Compute the necessary base pointer delta to have the necessary final delta
- // equal to the pointer delta requested.
- APInt BaseDelta = PtrDelta - OffsetDelta;
-
- // Compute the distance with SCEV between the base pointers.
- const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
- const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
- const SCEV *C = SE.getConstant(BaseDelta);
- const SCEV *X = SE.getAddExpr(PtrSCEVA, C);
- if (X == PtrSCEVB)
+ // All elements of the chain must have the same scalar-type size.
+#ifndef NDEBUG
+ for (const ChainElem &E : C)
+ assert(DL.getTypeStoreSize(getLoadStoreType(E.Inst)->getScalarType()) ==
+ DL.getTypeStoreSize(VecElemTy));
+#endif
+
+ Instruction *VecInst;
+ if (IsLoadChain) {
+ // Loads get hoisted to the location of the first load in the chain. We may
+ // also need to hoist the (transitive) operands of the loads.
+ Builder.SetInsertPoint(
+ std::min_element(C.begin(), C.end(), [](const auto &A, const auto &B) {
+ return A.Inst->comesBefore(B.Inst);
+ })->Inst);
+
+ // Chain is in offset order, so C[0] is the instr with the lowest offset,
+ // i.e. the root of the vector.
+ Value *Bitcast = Builder.CreateBitCast(
+ getLoadStorePointerOperand(C[0].Inst), VecTy->getPointerTo(AS));
+ VecInst = Builder.CreateAlignedLoad(VecTy, Bitcast, Alignment);
+
+ unsigned VecIdx = 0;
+ for (const ChainElem &E : C) {
+ Instruction *I = E.Inst;
+ Value *V;
+ Type *T = getLoadStoreType(I);
+ if (auto *VT = dyn_cast<FixedVectorType>(T)) {
+ auto Mask = llvm::to_vector<8>(
+ llvm::seq<int>(VecIdx, VecIdx + VT->getNumElements()));
+ V = Builder.CreateShuffleVector(VecInst, Mask, I->getName());
+ VecIdx += VT->getNumElements();
+ } else {
+ V = Builder.CreateExtractElement(VecInst, Builder.getInt32(VecIdx),
+ I->getName());
+ ++VecIdx;
+ }
+ if (V->getType() != I->getType())
+ V = Builder.CreateBitOrPointerCast(V, I->getType());
+ I->replaceAllUsesWith(V);
+ }
+
+ // Finally, we need to reorder the instrs in the BB so that the (transitive)
+ // operands of VecInst appear before it. To see why, suppose we have
+ // vectorized the following code:
+ //
+ // ptr1 = gep a, 1
+ // load1 = load i32 ptr1
+ // ptr0 = gep a, 0
+ // load0 = load i32 ptr0
+ //
+ // We will put the vectorized load at the location of the earliest load in
+ // the BB, i.e. load1. We get:
+ //
+ // ptr1 = gep a, 1
+ // loadv = load <2 x i32> ptr0
+ // load0 = extractelement loadv, 0
+ // load1 = extractelement loadv, 1
+ // ptr0 = gep a, 0
+ //
+ // Notice that loadv uses ptr0, which is defined *after* it!
+ reorder(VecInst);
+ } else {
+ // Stores get sunk to the location of the last store in the chain.
+ Builder.SetInsertPoint(
+ std::max_element(C.begin(), C.end(), [](auto &A, auto &B) {
+ return A.Inst->comesBefore(B.Inst);
+ })->Inst);
+
+ // Build the vector to store.
+ Value *Vec = PoisonValue::get(VecTy);
+ unsigned VecIdx = 0;
+ auto InsertElem = [&](Value *V) {
+ if (V->getType() != VecElemTy)
+ V = Builder.CreateBitOrPointerCast(V, VecElemTy);
+ Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(VecIdx++));
+ };
+ for (const ChainElem &E : C) {
+ auto I = cast<StoreInst>(E.Inst);
+ if (FixedVectorType *VT =
+ dyn_cast<FixedVectorType>(getLoadStoreType(I))) {
+ for (int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
+ InsertElem(Builder.CreateExtractElement(I->getValueOperand(),
+ Builder.getInt32(J)));
+ }
+ } else {
+ InsertElem(I->getValueOperand());
+ }
+ }
+
+ // Chain is in offset order, so C[0] is the instr with the lowest offset,
+ // i.e. the root of the vector.
+ VecInst = Builder.CreateAlignedStore(
+ Vec,
+ Builder.CreateBitCast(getLoadStorePointerOperand(C[0].Inst),
+ VecTy->getPointerTo(AS)),
+ Alignment);
+ }
+
+ propagateMetadata(VecInst, C);
+
+ for (const ChainElem &E : C)
+ ToErase.push_back(E.Inst);
+
+ ++NumVectorInstructions;
+ NumScalarsVectorized += C.size();
+ return true;
+}
+
+template <bool IsLoadChain>
+bool Vectorizer::isSafeToMove(
+ Instruction *ChainElem, Instruction *ChainBegin,
+ const DenseMap<Instruction *, APInt /*OffsetFromLeader*/> &ChainOffsets) {
+ LLVM_DEBUG(dbgs() << "LSV: isSafeToMove(" << *ChainElem << " -> "
+ << *ChainBegin << ")\n");
+
+ assert(isa<LoadInst>(ChainElem) == IsLoadChain);
+ if (ChainElem == ChainBegin)
return true;
- // The above check will not catch the cases where one of the pointers is
- // factorized but the other one is not, such as (C + (S * (A + B))) vs
- // (AS + BS). Get the minus scev. That will allow re-combining the expresions
- // and getting the simplified difference.
- const SCEV *Dist = SE.getMinusSCEV(PtrSCEVB, PtrSCEVA);
- if (C == Dist)
+ // Invariant loads can always be reordered; by definition they are not
+ // clobbered by stores.
+ if (isInvariantLoad(ChainElem))
return true;
- // Sometimes even this doesn't work, because SCEV can't always see through
- // patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking
- // things the hard way.
- return lookThroughComplexAddresses(PtrA, PtrB, BaseDelta, Depth);
+ auto BBIt = std::next([&] {
+ if constexpr (IsLoadChain)
+ return BasicBlock::reverse_iterator(ChainElem);
+ else
+ return BasicBlock::iterator(ChainElem);
+ }());
+ auto BBItEnd = std::next([&] {
+ if constexpr (IsLoadChain)
+ return BasicBlock::reverse_iterator(ChainBegin);
+ else
+ return BasicBlock::iterator(ChainBegin);
+ }());
+
+ const APInt &ChainElemOffset = ChainOffsets.at(ChainElem);
+ const unsigned ChainElemSize =
+ DL.getTypeStoreSize(getLoadStoreType(ChainElem));
+
+ for (; BBIt != BBItEnd; ++BBIt) {
+ Instruction *I = &*BBIt;
+
+ if (!I->mayReadOrWriteMemory())
+ continue;
+
+ // Loads can be reordered with other loads.
+ if (IsLoadChain && isa<LoadInst>(I))
+ continue;
+
+ // Stores can be sunk below invariant loads.
+ if (!IsLoadChain && isInvariantLoad(I))
+ continue;
+
+ // If I is in the chain, we can tell whether it aliases ChainIt by checking
+ // what offset ChainIt accesses. This may be better than AA is able to do.
+ //
+ // We should really only have duplicate offsets for stores (the duplicate
+ // loads should be CSE'ed), but in case we have a duplicate load, we'll
+ // split the chain so we don't have to handle this case specially.
+ if (auto OffsetIt = ChainOffsets.find(I); OffsetIt != ChainOffsets.end()) {
+ // I and ChainElem overlap if:
+ // - I and ChainElem have the same offset, OR
+ // - I's offset is less than ChainElem's, but I touches past the
+ // beginning of ChainElem, OR
+ // - ChainElem's offset is less than I's, but ChainElem touches past the
+ // beginning of I.
+ const APInt &IOffset = OffsetIt->second;
+ unsigned IElemSize = DL.getTypeStoreSize(getLoadStoreType(I));
+ if (IOffset == ChainElemOffset ||
+ (IOffset.sle(ChainElemOffset) &&
+ (IOffset + IElemSize).sgt(ChainElemOffset)) ||
+ (ChainElemOffset.sle(IOffset) &&
+ (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
+ LLVM_DEBUG({
+ // Double check that AA also sees this alias. If not, we probably
+ // have a bug.
+ ModRefInfo MR = AA.getModRefInfo(I, MemoryLocation::get(ChainElem));
+ assert(IsLoadChain ? isModSet(MR) : isModOrRefSet(MR));
+ dbgs() << "LSV: Found alias in chain: " << *I << "\n";
+ });
+ return false; // We found an aliasing instruction; bail.
+ }
+
+ continue; // We're confident there's no alias.
+ }
+
+ LLVM_DEBUG(dbgs() << "LSV: Querying AA for " << *I << "\n");
+ ModRefInfo MR = AA.getModRefInfo(I, MemoryLocation::get(ChainElem));
+ if (IsLoadChain ? isModSet(MR) : isModOrRefSet(MR)) {
+ LLVM_DEBUG(dbgs() << "LSV: Found alias in chain:\n"
+ << " Aliasing instruction:\n"
+ << " " << *I << '\n'
+ << " Aliased instruction and pointer:\n"
+ << " " << *ChainElem << '\n'
+ << " " << *getLoadStorePointerOperand(ChainElem)
+ << '\n');
+
+ return false;
+ }
+ }
+ return true;
}
static bool checkNoWrapFlags(Instruction *I, bool Signed) {
@@ -395,10 +1097,14 @@ static bool checkNoWrapFlags(Instruction *I, bool Signed) {
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA,
unsigned MatchingOpIdxA, Instruction *AddOpB,
unsigned MatchingOpIdxB, bool Signed) {
- // If both OpA and OpB is an add with NSW/NUW and with
- // one of the operands being the same, we can guarantee that the
- // transformation is safe if we can prove that OpA won't overflow when
- // IdxDiff added to the other operand of OpA.
+ LLVM_DEBUG(dbgs() << "LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff
+ << ", AddOpA=" << *AddOpA << ", MatchingOpIdxA="
+ << MatchingOpIdxA << ", AddOpB=" << *AddOpB
+ << ", MatchingOpIdxB=" << MatchingOpIdxB
+ << ", Signed=" << Signed << "\n");
+ // If both OpA and OpB are adds with NSW/NUW and with one of the operands
+ // being the same, we can guarantee that the transformation is safe if we can
+ // prove that OpA won't overflow when Ret added to the other operand of OpA.
// For example:
// %tmp7 = add nsw i32 %tmp2, %v0
// %tmp8 = sext i32 %tmp7 to i64
@@ -407,10 +1113,9 @@ static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA,
// %tmp12 = add nsw i32 %tmp2, %tmp11
// %tmp13 = sext i32 %tmp12 to i64
//
- // Both %tmp7 and %tmp2 has the nsw flag and the first operand
- // is %tmp2. It's guaranteed that adding 1 to %tmp7 won't overflow
- // because %tmp11 adds 1 to %v0 and both %tmp11 and %tmp12 has the
- // nsw flag.
+ // Both %tmp7 and %tmp12 have the nsw flag and the first operand is %tmp2.
+ // It's guaranteed that adding 1 to %tmp7 won't overflow because %tmp11 adds
+ // 1 to %v0 and both %tmp11 and %tmp12 have the nsw flag.
assert(AddOpA->getOpcode() == Instruction::Add &&
AddOpB->getOpcode() == Instruction::Add &&
checkNoWrapFlags(AddOpA, Signed) && checkNoWrapFlags(AddOpB, Signed));
@@ -461,24 +1166,26 @@ static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA,
return false;
}
-bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB,
- APInt PtrDelta,
- unsigned Depth) const {
+std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
+ Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {
+ LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
+ << " PtrB=" << *PtrB << " ContextInst=" << *ContextInst
+ << " Depth=" << Depth << "\n");
auto *GEPA = dyn_cast<GetElementPtrInst>(PtrA);
auto *GEPB = dyn_cast<GetElementPtrInst>(PtrB);
if (!GEPA || !GEPB)
- return lookThroughSelects(PtrA, PtrB, PtrDelta, Depth);
+ return getConstantOffsetSelects(PtrA, PtrB, ContextInst, Depth);
// Look through GEPs after checking they're the same except for the last
// index.
if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
GEPA->getPointerOperand() != GEPB->getPointerOperand())
- return false;
+ return std::nullopt;
gep_type_iterator GTIA = gep_type_begin(GEPA);
gep_type_iterator GTIB = gep_type_begin(GEPB);
for (unsigned I = 0, E = GEPA->getNumIndices() - 1; I < E; ++I) {
if (GTIA.getOperand() != GTIB.getOperand())
- return false;
+ return std::nullopt;
++GTIA;
++GTIB;
}
@@ -487,23 +1194,13 @@ bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB,
Instruction *OpB = dyn_cast<Instruction>(GTIB.getOperand());
if (!OpA || !OpB || OpA->getOpcode() != OpB->getOpcode() ||
OpA->getType() != OpB->getType())
- return false;
+ return std::nullopt;
- if (PtrDelta.isNegative()) {
- if (PtrDelta.isMinSignedValue())
- return false;
- PtrDelta.negate();
- std::swap(OpA, OpB);
- }
uint64_t Stride = DL.getTypeAllocSize(GTIA.getIndexedType());
- if (PtrDelta.urem(Stride) != 0)
- return false;
- unsigned IdxBitWidth = OpA->getType()->getScalarSizeInBits();
- APInt IdxDiff = PtrDelta.udiv(Stride).zext(IdxBitWidth);
// Only look through a ZExt/SExt.
if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
- return false;
+ return std::nullopt;
bool Signed = isa<SExtInst>(OpA);
@@ -511,7 +1208,21 @@ bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB,
Value *ValA = OpA->getOperand(0);
OpB = dyn_cast<Instruction>(OpB->getOperand(0));
if (!OpB || ValA->getType() != OpB->getType())
- return false;
+ return std::nullopt;
+
+ const SCEV *OffsetSCEVA = SE.getSCEV(ValA);
+ const SCEV *OffsetSCEVB = SE.getSCEV(OpB);
+ const SCEV *IdxDiffSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
+ if (IdxDiffSCEV == SE.getCouldNotCompute())
+ return std::nullopt;
+
+ ConstantRange IdxDiffRange = SE.getSignedRange(IdxDiffSCEV);
+ if (!IdxDiffRange.isSingleElement())
+ return std::nullopt;
+ APInt IdxDiff = *IdxDiffRange.getSingleElement();
+
+ LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
+ << "\n");
// Now we need to prove that adding IdxDiff to ValA won't overflow.
bool Safe = false;
@@ -530,10 +1241,9 @@ bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB,
if (!Safe && OpA && OpA->getOpcode() == Instruction::Add &&
OpB->getOpcode() == Instruction::Add && checkNoWrapFlags(OpA, Signed) &&
checkNoWrapFlags(OpB, Signed)) {
- // In the checks below a matching operand in OpA and OpB is
- // an operand which is the same in those two instructions.
- // Below we account for possible orders of the operands of
- // these add instructions.
+ // In the checks below a matching operand in OpA and OpB is an operand which
+ // is the same in those two instructions. Below we account for possible
+ // orders of the operands of these add instructions.
for (unsigned MatchingOpIdxA : {0, 1})
for (unsigned MatchingOpIdxB : {0, 1})
if (!Safe)
@@ -544,802 +1254,267 @@ bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB,
unsigned BitWidth = ValA->getType()->getScalarSizeInBits();
// Third attempt:
- // If all set bits of IdxDiff or any higher order bit other than the sign bit
- // are known to be zero in ValA, we can add Diff to it while guaranteeing no
- // overflow of any sort.
+ //
+ // Assuming IdxDiff is positive: If all set bits of IdxDiff or any higher
+ // order bit other than the sign bit are known to be zero in ValA, we can add
+ // Diff to it while guaranteeing no overflow of any sort.
+ //
+ // If IdxDiff is negative, do the same, but swap ValA and ValB.
if (!Safe) {
+ // When computing known bits, use the GEPs as context instructions, since
+ // they likely are in the same BB as the load/store.
KnownBits Known(BitWidth);
- computeKnownBits(ValA, Known, DL, 0, &AC, OpB, &DT);
+ computeKnownBits((IdxDiff.sge(0) ? ValA : OpB), Known, DL, 0, &AC,
+ ContextInst, &DT);
APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.getBitWidth());
if (Signed)
BitsAllowedToBeSet.clearBit(BitWidth - 1);
- if (BitsAllowedToBeSet.ult(IdxDiff))
- return false;
+ if (BitsAllowedToBeSet.ult(IdxDiff.abs()))
+ return std::nullopt;
+ Safe = true;
}
- const SCEV *OffsetSCEVA = SE.getSCEV(ValA);
- const SCEV *OffsetSCEVB = SE.getSCEV(OpB);
- const SCEV *C = SE.getConstant(IdxDiff.trunc(BitWidth));
- const SCEV *X = SE.getAddExpr(OffsetSCEVA, C);
- return X == OffsetSCEVB;
+ if (Safe)
+ return IdxDiff * Stride;
+ return std::nullopt;
}
-bool Vectorizer::lookThroughSelects(Value *PtrA, Value *PtrB,
- const APInt &PtrDelta,
- unsigned Depth) const {
+std::optional<APInt> Vectorizer::getConstantOffsetSelects(
+ Value *PtrA, Value *PtrB, Instruction *ContextInst, unsigned Depth) {
if (Depth++ == MaxDepth)
- return false;
+ return std::nullopt;
if (auto *SelectA = dyn_cast<SelectInst>(PtrA)) {
if (auto *SelectB = dyn_cast<SelectInst>(PtrB)) {
- return SelectA->getCondition() == SelectB->getCondition() &&
- areConsecutivePointers(SelectA->getTrueValue(),
- SelectB->getTrueValue(), PtrDelta, Depth) &&
- areConsecutivePointers(SelectA->getFalseValue(),
- SelectB->getFalseValue(), PtrDelta, Depth);
+ if (SelectA->getCondition() != SelectB->getCondition())
+ return std::nullopt;
+ LLVM_DEBUG(dbgs() << "LSV: getConstantOffsetSelects, PtrA=" << *PtrA
+ << ", PtrB=" << *PtrB << ", ContextInst="
+ << *ContextInst << ", Depth=" << Depth << "\n");
+ std::optional<APInt> TrueDiff = getConstantOffset(
+ SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst, Depth);
+ if (!TrueDiff.has_value())
+ return std::nullopt;
+ std::optional<APInt> FalseDiff =
+ getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
+ ContextInst, Depth);
+ if (TrueDiff == FalseDiff)
+ return TrueDiff;
}
}
- return false;
+ return std::nullopt;
}
-void Vectorizer::reorder(Instruction *I) {
- SmallPtrSet<Instruction *, 16> InstructionsToMove;
- SmallVector<Instruction *, 16> Worklist;
-
- Worklist.push_back(I);
- while (!Worklist.empty()) {
- Instruction *IW = Worklist.pop_back_val();
- int NumOperands = IW->getNumOperands();
- for (int i = 0; i < NumOperands; i++) {
- Instruction *IM = dyn_cast<Instruction>(IW->getOperand(i));
- if (!IM || IM->getOpcode() == Instruction::PHI)
- continue;
-
- // If IM is in another BB, no need to move it, because this pass only
- // vectorizes instructions within one BB.
- if (IM->getParent() != I->getParent())
- continue;
-
- if (!IM->comesBefore(I)) {
- InstructionsToMove.insert(IM);
- Worklist.push_back(IM);
- }
+EquivalenceClassMap
+Vectorizer::collectEquivalenceClasses(BasicBlock::iterator Begin,
+ BasicBlock::iterator End) {
+ EquivalenceClassMap Ret;
+
+ auto getUnderlyingObject = [](const Value *Ptr) -> const Value * {
+ const Value *ObjPtr = llvm::getUnderlyingObject(Ptr);
+ if (const auto *Sel = dyn_cast<SelectInst>(ObjPtr)) {
+ // The select's themselves are distinct instructions even if they share
+ // the same condition and evaluate to consecutive pointers for true and
+ // false values of the condition. Therefore using the select's themselves
+ // for grouping instructions would put consecutive accesses into different
+ // lists and they won't be even checked for being consecutive, and won't
+ // be vectorized.
+ return Sel->getCondition();
}
- }
+ return ObjPtr;
+ };
- // All instructions to move should follow I. Start from I, not from begin().
- for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;
- ++BBI) {
- if (!InstructionsToMove.count(&*BBI))
+ for (Instruction &I : make_range(Begin, End)) {
+ auto *LI = dyn_cast<LoadInst>(&I);
+ auto *SI = dyn_cast<StoreInst>(&I);
+ if (!LI && !SI)
continue;
- Instruction *IM = &*BBI;
- --BBI;
- IM->removeFromParent();
- IM->insertBefore(I);
- }
-}
-
-std::pair<BasicBlock::iterator, BasicBlock::iterator>
-Vectorizer::getBoundaryInstrs(ArrayRef<Instruction *> Chain) {
- Instruction *C0 = Chain[0];
- BasicBlock::iterator FirstInstr = C0->getIterator();
- BasicBlock::iterator LastInstr = C0->getIterator();
- BasicBlock *BB = C0->getParent();
- unsigned NumFound = 0;
- for (Instruction &I : *BB) {
- if (!is_contained(Chain, &I))
+ if ((LI && !LI->isSimple()) || (SI && !SI->isSimple()))
continue;
- ++NumFound;
- if (NumFound == 1) {
- FirstInstr = I.getIterator();
- }
- if (NumFound == Chain.size()) {
- LastInstr = I.getIterator();
- break;
- }
- }
-
- // Range is [first, last).
- return std::make_pair(FirstInstr, ++LastInstr);
-}
-
-void Vectorizer::eraseInstructions(ArrayRef<Instruction *> Chain) {
- SmallVector<Instruction *, 16> Instrs;
- for (Instruction *I : Chain) {
- Value *PtrOperand = getLoadStorePointerOperand(I);
- assert(PtrOperand && "Instruction must have a pointer operand.");
- Instrs.push_back(I);
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(PtrOperand))
- Instrs.push_back(GEP);
- }
-
- // Erase instructions.
- for (Instruction *I : Instrs)
- if (I->use_empty())
- I->eraseFromParent();
-}
-
-std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
-Vectorizer::splitOddVectorElts(ArrayRef<Instruction *> Chain,
- unsigned ElementSizeBits) {
- unsigned ElementSizeBytes = ElementSizeBits / 8;
- unsigned SizeBytes = ElementSizeBytes * Chain.size();
- unsigned NumLeft = (SizeBytes - (SizeBytes % 4)) / ElementSizeBytes;
- if (NumLeft == Chain.size()) {
- if ((NumLeft & 1) == 0)
- NumLeft /= 2; // Split even in half
- else
- --NumLeft; // Split off last element
- } else if (NumLeft == 0)
- NumLeft = 1;
- return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
-}
-
-ArrayRef<Instruction *>
-Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
- // These are in BB order, unlike Chain, which is in address order.
- SmallVector<Instruction *, 16> MemoryInstrs;
- SmallVector<Instruction *, 16> ChainInstrs;
-
- bool IsLoadChain = isa<LoadInst>(Chain[0]);
- LLVM_DEBUG({
- for (Instruction *I : Chain) {
- if (IsLoadChain)
- assert(isa<LoadInst>(I) &&
- "All elements of Chain must be loads, or all must be stores.");
- else
- assert(isa<StoreInst>(I) &&
- "All elements of Chain must be loads, or all must be stores.");
- }
- });
-
- for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
- if ((isa<LoadInst>(I) || isa<StoreInst>(I)) && is_contained(Chain, &I)) {
- ChainInstrs.push_back(&I);
+ if ((LI && !TTI.isLegalToVectorizeLoad(LI)) ||
+ (SI && !TTI.isLegalToVectorizeStore(SI)))
continue;
- }
- if (!isGuaranteedToTransferExecutionToSuccessor(&I)) {
- LLVM_DEBUG(dbgs() << "LSV: Found instruction may not transfer execution: "
- << I << '\n');
- break;
- }
- if (I.mayReadOrWriteMemory())
- MemoryInstrs.push_back(&I);
- }
-
- // Loop until we find an instruction in ChainInstrs that we can't vectorize.
- unsigned ChainInstrIdx = 0;
- Instruction *BarrierMemoryInstr = nullptr;
-
- for (unsigned E = ChainInstrs.size(); ChainInstrIdx < E; ++ChainInstrIdx) {
- Instruction *ChainInstr = ChainInstrs[ChainInstrIdx];
-
- // If a barrier memory instruction was found, chain instructions that follow
- // will not be added to the valid prefix.
- if (BarrierMemoryInstr && BarrierMemoryInstr->comesBefore(ChainInstr))
- break;
- // Check (in BB order) if any instruction prevents ChainInstr from being
- // vectorized. Find and store the first such "conflicting" instruction.
- for (Instruction *MemInstr : MemoryInstrs) {
- // If a barrier memory instruction was found, do not check past it.
- if (BarrierMemoryInstr && BarrierMemoryInstr->comesBefore(MemInstr))
- break;
-
- auto *MemLoad = dyn_cast<LoadInst>(MemInstr);
- auto *ChainLoad = dyn_cast<LoadInst>(ChainInstr);
- if (MemLoad && ChainLoad)
- continue;
-
- // We can ignore the alias if the we have a load store pair and the load
- // is known to be invariant. The load cannot be clobbered by the store.
- auto IsInvariantLoad = [](const LoadInst *LI) -> bool {
- return LI->hasMetadata(LLVMContext::MD_invariant_load);
- };
-
- if (IsLoadChain) {
- // We can ignore the alias as long as the load comes before the store,
- // because that means we won't be moving the load past the store to
- // vectorize it (the vectorized load is inserted at the location of the
- // first load in the chain).
- if (ChainInstr->comesBefore(MemInstr) ||
- (ChainLoad && IsInvariantLoad(ChainLoad)))
- continue;
- } else {
- // Same case, but in reverse.
- if (MemInstr->comesBefore(ChainInstr) ||
- (MemLoad && IsInvariantLoad(MemLoad)))
- continue;
- }
-
- ModRefInfo MR =
- AA.getModRefInfo(MemInstr, MemoryLocation::get(ChainInstr));
- if (IsLoadChain ? isModSet(MR) : isModOrRefSet(MR)) {
- LLVM_DEBUG({
- dbgs() << "LSV: Found alias:\n"
- " Aliasing instruction:\n"
- << " " << *MemInstr << '\n'
- << " Aliased instruction and pointer:\n"
- << " " << *ChainInstr << '\n'
- << " " << *getLoadStorePointerOperand(ChainInstr) << '\n';
- });
- // Save this aliasing memory instruction as a barrier, but allow other
- // instructions that precede the barrier to be vectorized with this one.
- BarrierMemoryInstr = MemInstr;
- break;
- }
- }
- // Continue the search only for store chains, since vectorizing stores that
- // precede an aliasing load is valid. Conversely, vectorizing loads is valid
- // up to an aliasing store, but should not pull loads from further down in
- // the basic block.
- if (IsLoadChain && BarrierMemoryInstr) {
- // The BarrierMemoryInstr is a store that precedes ChainInstr.
- assert(BarrierMemoryInstr->comesBefore(ChainInstr));
- break;
- }
- }
-
- // Find the largest prefix of Chain whose elements are all in
- // ChainInstrs[0, ChainInstrIdx). This is the largest vectorizable prefix of
- // Chain. (Recall that Chain is in address order, but ChainInstrs is in BB
- // order.)
- SmallPtrSet<Instruction *, 8> VectorizableChainInstrs(
- ChainInstrs.begin(), ChainInstrs.begin() + ChainInstrIdx);
- unsigned ChainIdx = 0;
- for (unsigned ChainLen = Chain.size(); ChainIdx < ChainLen; ++ChainIdx) {
- if (!VectorizableChainInstrs.count(Chain[ChainIdx]))
- break;
- }
- return Chain.slice(0, ChainIdx);
-}
-
-static ChainID getChainID(const Value *Ptr) {
- const Value *ObjPtr = getUnderlyingObject(Ptr);
- if (const auto *Sel = dyn_cast<SelectInst>(ObjPtr)) {
- // The select's themselves are distinct instructions even if they share the
- // same condition and evaluate to consecutive pointers for true and false
- // values of the condition. Therefore using the select's themselves for
- // grouping instructions would put consecutive accesses into different lists
- // and they won't be even checked for being consecutive, and won't be
- // vectorized.
- return Sel->getCondition();
- }
- return ObjPtr;
-}
-
-std::pair<InstrListMap, InstrListMap>
-Vectorizer::collectInstructions(BasicBlock *BB) {
- InstrListMap LoadRefs;
- InstrListMap StoreRefs;
-
- for (Instruction &I : *BB) {
- if (!I.mayReadOrWriteMemory())
+ Type *Ty = getLoadStoreType(&I);
+ if (!VectorType::isValidElementType(Ty->getScalarType()))
continue;
- if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
- if (!LI->isSimple())
- continue;
-
- // Skip if it's not legal.
- if (!TTI.isLegalToVectorizeLoad(LI))
- continue;
-
- Type *Ty = LI->getType();
- if (!VectorType::isValidElementType(Ty->getScalarType()))
- continue;
-
- // Skip weird non-byte sizes. They probably aren't worth the effort of
- // handling correctly.
- unsigned TySize = DL.getTypeSizeInBits(Ty);
- if ((TySize % 8) != 0)
- continue;
-
- // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
- // functions are currently using an integer type for the vectorized
- // load/store, and does not support casting between the integer type and a
- // vector of pointers (e.g. i64 to <2 x i16*>)
- if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
- continue;
-
- Value *Ptr = LI->getPointerOperand();
- unsigned AS = Ptr->getType()->getPointerAddressSpace();
- unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
-
- unsigned VF = VecRegSize / TySize;
- VectorType *VecTy = dyn_cast<VectorType>(Ty);
-
- // No point in looking at these if they're too big to vectorize.
- if (TySize > VecRegSize / 2 ||
- (VecTy && TTI.getLoadVectorFactor(VF, TySize, TySize / 8, VecTy) == 0))
- continue;
-
- // Save the load locations.
- const ChainID ID = getChainID(Ptr);
- LoadRefs[ID].push_back(LI);
- } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
- if (!SI->isSimple())
- continue;
-
- // Skip if it's not legal.
- if (!TTI.isLegalToVectorizeStore(SI))
- continue;
-
- Type *Ty = SI->getValueOperand()->getType();
- if (!VectorType::isValidElementType(Ty->getScalarType()))
- continue;
-
- // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
- // functions are currently using an integer type for the vectorized
- // load/store, and does not support casting between the integer type and a
- // vector of pointers (e.g. i64 to <2 x i16*>)
- if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
- continue;
-
- // Skip weird non-byte sizes. They probably aren't worth the effort of
- // handling correctly.
- unsigned TySize = DL.getTypeSizeInBits(Ty);
- if ((TySize % 8) != 0)
- continue;
-
- Value *Ptr = SI->getPointerOperand();
- unsigned AS = Ptr->getType()->getPointerAddressSpace();
- unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
-
- unsigned VF = VecRegSize / TySize;
- VectorType *VecTy = dyn_cast<VectorType>(Ty);
-
- // No point in looking at these if they're too big to vectorize.
- if (TySize > VecRegSize / 2 ||
- (VecTy && TTI.getStoreVectorFactor(VF, TySize, TySize / 8, VecTy) == 0))
- continue;
-
- // Save store location.
- const ChainID ID = getChainID(Ptr);
- StoreRefs[ID].push_back(SI);
- }
- }
-
- return {LoadRefs, StoreRefs};
-}
-
-bool Vectorizer::vectorizeChains(InstrListMap &Map) {
- bool Changed = false;
-
- for (const std::pair<ChainID, InstrList> &Chain : Map) {
- unsigned Size = Chain.second.size();
- if (Size < 2)
+ // Skip weird non-byte sizes. They probably aren't worth the effort of
+ // handling correctly.
+ unsigned TySize = DL.getTypeSizeInBits(Ty);
+ if ((TySize % 8) != 0)
continue;
- LLVM_DEBUG(dbgs() << "LSV: Analyzing a chain of length " << Size << ".\n");
-
- // Process the stores in chunks of 64.
- for (unsigned CI = 0, CE = Size; CI < CE; CI += 64) {
- unsigned Len = std::min<unsigned>(CE - CI, 64);
- ArrayRef<Instruction *> Chunk(&Chain.second[CI], Len);
- Changed |= vectorizeInstructions(Chunk);
- }
- }
-
- return Changed;
-}
-
-bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) {
- LLVM_DEBUG(dbgs() << "LSV: Vectorizing " << Instrs.size()
- << " instructions.\n");
- SmallVector<int, 16> Heads, Tails;
- int ConsecutiveChain[64];
-
- // Do a quadratic search on all of the given loads/stores and find all of the
- // pairs of loads/stores that follow each other.
- for (int i = 0, e = Instrs.size(); i < e; ++i) {
- ConsecutiveChain[i] = -1;
- for (int j = e - 1; j >= 0; --j) {
- if (i == j)
- continue;
-
- if (isConsecutiveAccess(Instrs[i], Instrs[j])) {
- if (ConsecutiveChain[i] != -1) {
- int CurDistance = std::abs(ConsecutiveChain[i] - i);
- int NewDistance = std::abs(ConsecutiveChain[i] - j);
- if (j < i || NewDistance > CurDistance)
- continue; // Should not insert.
- }
+ // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
+ // functions are currently using an integer type for the vectorized
+ // load/store, and does not support casting between the integer type and a
+ // vector of pointers (e.g. i64 to <2 x i16*>)
+ if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
+ continue;
- Tails.push_back(j);
- Heads.push_back(i);
- ConsecutiveChain[i] = j;
- }
- }
- }
+ Value *Ptr = getLoadStorePointerOperand(&I);
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+ unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
- bool Changed = false;
- SmallPtrSet<Instruction *, 16> InstructionsProcessed;
+ unsigned VF = VecRegSize / TySize;
+ VectorType *VecTy = dyn_cast<VectorType>(Ty);
- for (int Head : Heads) {
- if (InstructionsProcessed.count(Instrs[Head]))
+ // Only handle power-of-two sized elements.
+ if ((!VecTy && !isPowerOf2_32(DL.getTypeSizeInBits(Ty))) ||
+ (VecTy && !isPowerOf2_32(DL.getTypeSizeInBits(VecTy->getScalarType()))))
continue;
- bool LongerChainExists = false;
- for (unsigned TIt = 0; TIt < Tails.size(); TIt++)
- if (Head == Tails[TIt] &&
- !InstructionsProcessed.count(Instrs[Heads[TIt]])) {
- LongerChainExists = true;
- break;
- }
- if (LongerChainExists)
- continue;
-
- // We found an instr that starts a chain. Now follow the chain and try to
- // vectorize it.
- SmallVector<Instruction *, 16> Operands;
- int I = Head;
- while (I != -1 && (is_contained(Tails, I) || is_contained(Heads, I))) {
- if (InstructionsProcessed.count(Instrs[I]))
- break;
-
- Operands.push_back(Instrs[I]);
- I = ConsecutiveChain[I];
- }
- bool Vectorized = false;
- if (isa<LoadInst>(*Operands.begin()))
- Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed);
- else
- Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed);
+ // No point in looking at these if they're too big to vectorize.
+ if (TySize > VecRegSize / 2 ||
+ (VecTy && TTI.getLoadVectorFactor(VF, TySize, TySize / 8, VecTy) == 0))
+ continue;
- Changed |= Vectorized;
+ Ret[{getUnderlyingObject(Ptr), AS,
+ DL.getTypeSizeInBits(getLoadStoreType(&I)->getScalarType()),
+ /*IsLoad=*/LI != nullptr}]
+ .push_back(&I);
}
- return Changed;
+ return Ret;
}
-bool Vectorizer::vectorizeStoreChain(
- ArrayRef<Instruction *> Chain,
- SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
- StoreInst *S0 = cast<StoreInst>(Chain[0]);
-
- // If the vector has an int element, default to int for the whole store.
- Type *StoreTy = nullptr;
- for (Instruction *I : Chain) {
- StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
- if (StoreTy->isIntOrIntVectorTy())
- break;
-
- if (StoreTy->isPtrOrPtrVectorTy()) {
- StoreTy = Type::getIntNTy(F.getParent()->getContext(),
- DL.getTypeSizeInBits(StoreTy));
- break;
- }
- }
- assert(StoreTy && "Failed to find store type");
+std::vector<Chain> Vectorizer::gatherChains(ArrayRef<Instruction *> Instrs) {
+ if (Instrs.empty())
+ return {};
- unsigned Sz = DL.getTypeSizeInBits(StoreTy);
- unsigned AS = S0->getPointerAddressSpace();
- unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
- unsigned VF = VecRegSize / Sz;
- unsigned ChainSize = Chain.size();
- Align Alignment = S0->getAlign();
+ unsigned AS = getLoadStoreAddressSpace(Instrs[0]);
+ unsigned ASPtrBits = DL.getIndexSizeInBits(AS);
- if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
- InstructionsProcessed->insert(Chain.begin(), Chain.end());
- return false;
+#ifndef NDEBUG
+ // Check that Instrs is in BB order and all have the same addr space.
+ for (size_t I = 1; I < Instrs.size(); ++I) {
+ assert(Instrs[I - 1]->comesBefore(Instrs[I]));
+ assert(getLoadStoreAddressSpace(Instrs[I]) == AS);
}
+#endif
- ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
- if (NewChain.empty()) {
- // No vectorization possible.
- InstructionsProcessed->insert(Chain.begin(), Chain.end());
- return false;
- }
- if (NewChain.size() == 1) {
- // Failed after the first instruction. Discard it and try the smaller chain.
- InstructionsProcessed->insert(NewChain.front());
- return false;
- }
-
- // Update Chain to the valid vectorizable subchain.
- Chain = NewChain;
- ChainSize = Chain.size();
-
- // Check if it's legal to vectorize this chain. If not, split the chain and
- // try again.
- unsigned EltSzInBytes = Sz / 8;
- unsigned SzInBytes = EltSzInBytes * ChainSize;
-
- FixedVectorType *VecTy;
- auto *VecStoreTy = dyn_cast<FixedVectorType>(StoreTy);
- if (VecStoreTy)
- VecTy = FixedVectorType::get(StoreTy->getScalarType(),
- Chain.size() * VecStoreTy->getNumElements());
- else
- VecTy = FixedVectorType::get(StoreTy, Chain.size());
-
- // If it's more than the max vector size or the target has a better
- // vector factor, break it into two pieces.
- unsigned TargetVF = TTI.getStoreVectorFactor(VF, Sz, SzInBytes, VecTy);
- if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
- LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
- " Creating two separate arrays.\n");
- bool Vectorized = false;
- Vectorized |=
- vectorizeStoreChain(Chain.slice(0, TargetVF), InstructionsProcessed);
- Vectorized |=
- vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed);
- return Vectorized;
- }
-
- LLVM_DEBUG({
- dbgs() << "LSV: Stores to vectorize:\n";
- for (Instruction *I : Chain)
- dbgs() << " " << *I << "\n";
- });
-
- // We won't try again to vectorize the elements of the chain, regardless of
- // whether we succeed below.
- InstructionsProcessed->insert(Chain.begin(), Chain.end());
-
- // If the store is going to be misaligned, don't vectorize it.
- unsigned RelativeSpeed;
- if (accessIsMisaligned(SzInBytes, AS, Alignment, RelativeSpeed)) {
- if (S0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
- unsigned SpeedBefore;
- accessIsMisaligned(EltSzInBytes, AS, Alignment, SpeedBefore);
- if (SpeedBefore > RelativeSpeed)
- return false;
-
- auto Chains = splitOddVectorElts(Chain, Sz);
- bool Vectorized = false;
- Vectorized |= vectorizeStoreChain(Chains.first, InstructionsProcessed);
- Vectorized |= vectorizeStoreChain(Chains.second, InstructionsProcessed);
- return Vectorized;
+ // Machinery to build an MRU-hashtable of Chains.
+ //
+ // (Ideally this could be done with MapVector, but as currently implemented,
+ // moving an element to the front of a MapVector is O(n).)
+ struct InstrListElem : ilist_node<InstrListElem>,
+ std::pair<Instruction *, Chain> {
+ explicit InstrListElem(Instruction *I)
+ : std::pair<Instruction *, Chain>(I, {}) {}
+ };
+ struct InstrListElemDenseMapInfo {
+ using PtrInfo = DenseMapInfo<InstrListElem *>;
+ using IInfo = DenseMapInfo<Instruction *>;
+ static InstrListElem *getEmptyKey() { return PtrInfo::getEmptyKey(); }
+ static InstrListElem *getTombstoneKey() {
+ return PtrInfo::getTombstoneKey();
}
-
- Align NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),
- Align(StackAdjustedAlignment),
- DL, S0, nullptr, &DT);
- if (NewAlign >= Alignment)
- Alignment = NewAlign;
- else
- return false;
- }
-
- if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
- auto Chains = splitOddVectorElts(Chain, Sz);
- bool Vectorized = false;
- Vectorized |= vectorizeStoreChain(Chains.first, InstructionsProcessed);
- Vectorized |= vectorizeStoreChain(Chains.second, InstructionsProcessed);
- return Vectorized;
- }
-
- BasicBlock::iterator First, Last;
- std::tie(First, Last) = getBoundaryInstrs(Chain);
- Builder.SetInsertPoint(&*Last);
-
- Value *Vec = PoisonValue::get(VecTy);
-
- if (VecStoreTy) {
- unsigned VecWidth = VecStoreTy->getNumElements();
- for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
- StoreInst *Store = cast<StoreInst>(Chain[I]);
- for (unsigned J = 0, NE = VecStoreTy->getNumElements(); J != NE; ++J) {
- unsigned NewIdx = J + I * VecWidth;
- Value *Extract = Builder.CreateExtractElement(Store->getValueOperand(),
- Builder.getInt32(J));
- if (Extract->getType() != StoreTy->getScalarType())
- Extract = Builder.CreateBitCast(Extract, StoreTy->getScalarType());
-
- Value *Insert =
- Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(NewIdx));
- Vec = Insert;
- }
+ static unsigned getHashValue(const InstrListElem *E) {
+ return IInfo::getHashValue(E->first);
}
- } else {
- for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
- StoreInst *Store = cast<StoreInst>(Chain[I]);
- Value *Extract = Store->getValueOperand();
- if (Extract->getType() != StoreTy->getScalarType())
- Extract =
- Builder.CreateBitOrPointerCast(Extract, StoreTy->getScalarType());
-
- Value *Insert =
- Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(I));
- Vec = Insert;
+ static bool isEqual(const InstrListElem *A, const InstrListElem *B) {
+ if (A == getEmptyKey() || B == getEmptyKey())
+ return A == getEmptyKey() && B == getEmptyKey();
+ if (A == getTombstoneKey() || B == getTombstoneKey())
+ return A == getTombstoneKey() && B == getTombstoneKey();
+ return IInfo::isEqual(A->first, B->first);
}
- }
-
- StoreInst *SI = Builder.CreateAlignedStore(
- Vec,
- Builder.CreateBitCast(S0->getPointerOperand(), VecTy->getPointerTo(AS)),
- Alignment);
- propagateMetadata(SI, Chain);
-
- eraseInstructions(Chain);
- ++NumVectorInstructions;
- NumScalarsVectorized += Chain.size();
- return true;
-}
-
-bool Vectorizer::vectorizeLoadChain(
- ArrayRef<Instruction *> Chain,
- SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
- LoadInst *L0 = cast<LoadInst>(Chain[0]);
-
- // If the vector has an int element, default to int for the whole load.
- Type *LoadTy = nullptr;
- for (const auto &V : Chain) {
- LoadTy = cast<LoadInst>(V)->getType();
- if (LoadTy->isIntOrIntVectorTy())
- break;
-
- if (LoadTy->isPtrOrPtrVectorTy()) {
- LoadTy = Type::getIntNTy(F.getParent()->getContext(),
- DL.getTypeSizeInBits(LoadTy));
- break;
+ };
+ SpecificBumpPtrAllocator<InstrListElem> Allocator;
+ simple_ilist<InstrListElem> MRU;
+ DenseSet<InstrListElem *, InstrListElemDenseMapInfo> Chains;
+
+ // Compare each instruction in `instrs` to leader of the N most recently-used
+ // chains. This limits the O(n^2) behavior of this pass while also allowing
+ // us to build arbitrarily long chains.
+ for (Instruction *I : Instrs) {
+ constexpr int MaxChainsToTry = 64;
+
+ bool MatchFound = false;
+ auto ChainIter = MRU.begin();
+ for (size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.end();
+ ++J, ++ChainIter) {
+ std::optional<APInt> Offset = getConstantOffset(
+ getLoadStorePointerOperand(ChainIter->first),
+ getLoadStorePointerOperand(I),
+ /*ContextInst=*/
+ (ChainIter->first->comesBefore(I) ? I : ChainIter->first));
+ if (Offset.has_value()) {
+ // `Offset` might not have the expected number of bits, if e.g. AS has a
+ // different number of bits than opaque pointers.
+ ChainIter->second.push_back(ChainElem{I, Offset.value()});
+ // Move ChainIter to the front of the MRU list.
+ MRU.remove(*ChainIter);
+ MRU.push_front(*ChainIter);
+ MatchFound = true;
+ break;
+ }
}
- }
- assert(LoadTy && "Can't determine LoadInst type from chain");
-
- unsigned Sz = DL.getTypeSizeInBits(LoadTy);
- unsigned AS = L0->getPointerAddressSpace();
- unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
- unsigned VF = VecRegSize / Sz;
- unsigned ChainSize = Chain.size();
- Align Alignment = L0->getAlign();
-
- if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
- InstructionsProcessed->insert(Chain.begin(), Chain.end());
- return false;
- }
-
- ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
- if (NewChain.empty()) {
- // No vectorization possible.
- InstructionsProcessed->insert(Chain.begin(), Chain.end());
- return false;
- }
- if (NewChain.size() == 1) {
- // Failed after the first instruction. Discard it and try the smaller chain.
- InstructionsProcessed->insert(NewChain.front());
- return false;
- }
- // Update Chain to the valid vectorizable subchain.
- Chain = NewChain;
- ChainSize = Chain.size();
-
- // Check if it's legal to vectorize this chain. If not, split the chain and
- // try again.
- unsigned EltSzInBytes = Sz / 8;
- unsigned SzInBytes = EltSzInBytes * ChainSize;
- VectorType *VecTy;
- auto *VecLoadTy = dyn_cast<FixedVectorType>(LoadTy);
- if (VecLoadTy)
- VecTy = FixedVectorType::get(LoadTy->getScalarType(),
- Chain.size() * VecLoadTy->getNumElements());
- else
- VecTy = FixedVectorType::get(LoadTy, Chain.size());
-
- // If it's more than the max vector size or the target has a better
- // vector factor, break it into two pieces.
- unsigned TargetVF = TTI.getLoadVectorFactor(VF, Sz, SzInBytes, VecTy);
- if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
- LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
- " Creating two separate arrays.\n");
- bool Vectorized = false;
- Vectorized |=
- vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed);
- Vectorized |=
- vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed);
- return Vectorized;
- }
-
- // We won't try again to vectorize the elements of the chain, regardless of
- // whether we succeed below.
- InstructionsProcessed->insert(Chain.begin(), Chain.end());
-
- // If the load is going to be misaligned, don't vectorize it.
- unsigned RelativeSpeed;
- if (accessIsMisaligned(SzInBytes, AS, Alignment, RelativeSpeed)) {
- if (L0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
- unsigned SpeedBefore;
- accessIsMisaligned(EltSzInBytes, AS, Alignment, SpeedBefore);
- if (SpeedBefore > RelativeSpeed)
- return false;
-
- auto Chains = splitOddVectorElts(Chain, Sz);
- bool Vectorized = false;
- Vectorized |= vectorizeLoadChain(Chains.first, InstructionsProcessed);
- Vectorized |= vectorizeLoadChain(Chains.second, InstructionsProcessed);
- return Vectorized;
+ if (!MatchFound) {
+ APInt ZeroOffset(ASPtrBits, 0);
+ InstrListElem *E = new (Allocator.Allocate()) InstrListElem(I);
+ E->second.push_back(ChainElem{I, ZeroOffset});
+ MRU.push_front(*E);
+ Chains.insert(E);
}
-
- Align NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
- Align(StackAdjustedAlignment),
- DL, L0, nullptr, &DT);
- if (NewAlign >= Alignment)
- Alignment = NewAlign;
- else
- return false;
}
- if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
- auto Chains = splitOddVectorElts(Chain, Sz);
- bool Vectorized = false;
- Vectorized |= vectorizeLoadChain(Chains.first, InstructionsProcessed);
- Vectorized |= vectorizeLoadChain(Chains.second, InstructionsProcessed);
- return Vectorized;
- }
+ std::vector<Chain> Ret;
+ Ret.reserve(Chains.size());
+ // Iterate over MRU rather than Chains so the order is deterministic.
+ for (auto &E : MRU)
+ if (E.second.size() > 1)
+ Ret.push_back(std::move(E.second));
+ return Ret;
+}
- LLVM_DEBUG({
- dbgs() << "LSV: Loads to vectorize:\n";
- for (Instruction *I : Chain)
- I->dump();
- });
+std::optional<APInt> Vectorizer::getConstantOffset(Value *PtrA, Value *PtrB,
+ Instruction *ContextInst,
+ unsigned Depth) {
+ LLVM_DEBUG(dbgs() << "LSV: getConstantOffset, PtrA=" << *PtrA
+ << ", PtrB=" << *PtrB << ", ContextInst= " << *ContextInst
+ << ", Depth=" << Depth << "\n");
+ // We'll ultimately return a value of this bit width, even if computations
+ // happen in a different width.
+ unsigned OrigBitWidth = DL.getIndexTypeSizeInBits(PtrA->getType());
+ APInt OffsetA(OrigBitWidth, 0);
+ APInt OffsetB(OrigBitWidth, 0);
+ PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
+ PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
+ unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType());
+ if (NewPtrBitWidth != DL.getTypeStoreSizeInBits(PtrB->getType()))
+ return std::nullopt;
- // getVectorizablePrefix already computed getBoundaryInstrs. The value of
- // Last may have changed since then, but the value of First won't have. If it
- // matters, we could compute getBoundaryInstrs only once and reuse it here.
- BasicBlock::iterator First, Last;
- std::tie(First, Last) = getBoundaryInstrs(Chain);
- Builder.SetInsertPoint(&*First);
-
- Value *Bitcast =
- Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
- LoadInst *LI =
- Builder.CreateAlignedLoad(VecTy, Bitcast, MaybeAlign(Alignment));
- propagateMetadata(LI, Chain);
-
- for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
- Value *CV = Chain[I];
- Value *V;
- if (VecLoadTy) {
- // Extract a subvector using shufflevector.
- unsigned VecWidth = VecLoadTy->getNumElements();
- auto Mask =
- llvm::to_vector<8>(llvm::seq<int>(I * VecWidth, (I + 1) * VecWidth));
- V = Builder.CreateShuffleVector(LI, Mask, CV->getName());
- } else {
- V = Builder.CreateExtractElement(LI, Builder.getInt32(I), CV->getName());
- }
+ // If we have to shrink the pointer, stripAndAccumulateInBoundsConstantOffsets
+ // should properly handle a possible overflow and the value should fit into
+ // the smallest data type used in the cast/gep chain.
+ assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&
+ OffsetB.getSignificantBits() <= NewPtrBitWidth);
- if (V->getType() != CV->getType()) {
- V = Builder.CreateBitOrPointerCast(V, CV->getType());
+ OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
+ OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
+ if (PtrA == PtrB)
+ return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
+
+ // Try to compute B - A.
+ const SCEV *DistScev = SE.getMinusSCEV(SE.getSCEV(PtrB), SE.getSCEV(PtrA));
+ if (DistScev != SE.getCouldNotCompute()) {
+ LLVM_DEBUG(dbgs() << "LSV: SCEV PtrB - PtrA =" << *DistScev << "\n");
+ ConstantRange DistRange = SE.getSignedRange(DistScev);
+ if (DistRange.isSingleElement()) {
+ // Handle index width (the width of Dist) != pointer width (the width of
+ // the Offset*s at this point).
+ APInt Dist = DistRange.getSingleElement()->sextOrTrunc(NewPtrBitWidth);
+ return (OffsetB - OffsetA + Dist).sextOrTrunc(OrigBitWidth);
}
-
- // Replace the old instruction.
- CV->replaceAllUsesWith(V);
}
-
- // Since we might have opaque pointers we might end up using the pointer
- // operand of the first load (wrt. memory loaded) for the vector load. Since
- // this first load might not be the first in the block we potentially need to
- // reorder the pointer operand (and its operands). If we have a bitcast though
- // it might be before the load and should be the reorder start instruction.
- // "Might" because for opaque pointers the "bitcast" is just the first loads
- // pointer operand, as oppposed to something we inserted at the right position
- // ourselves.
- Instruction *BCInst = dyn_cast<Instruction>(Bitcast);
- reorder((BCInst && BCInst != L0->getPointerOperand()) ? BCInst : LI);
-
- eraseInstructions(Chain);
-
- ++NumVectorInstructions;
- NumScalarsVectorized += Chain.size();
- return true;
-}
-
-bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
- Align Alignment, unsigned &RelativeSpeed) {
- RelativeSpeed = 0;
- if (Alignment.value() % SzInBytes == 0)
- return false;
-
- bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
- SzInBytes * 8, AddressSpace,
- Alignment, &RelativeSpeed);
- LLVM_DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows
- << " with relative speed = " << RelativeSpeed << '\n';);
- return !Allows || !RelativeSpeed;
+ std::optional<APInt> Diff =
+ getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst, Depth);
+ if (Diff.has_value())
+ return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
+ .sextOrTrunc(OrigBitWidth);
+ return std::nullopt;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index cd48c0d57eb3..f923f0be6621 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -37,6 +37,11 @@ static cl::opt<bool>
EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
cl::desc("Enable if-conversion during vectorization."));
+static cl::opt<bool>
+AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
+ cl::desc("Enable recognition of non-constant strided "
+ "pointer induction variables."));
+
namespace llvm {
cl::opt<bool>
HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
@@ -447,8 +452,12 @@ static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A,
int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
Value *Ptr) const {
- const ValueToValueMap &Strides =
- getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
+ // FIXME: Currently, the set of symbolic strides is sometimes queried before
+ // it's collected. This happens from canVectorizeWithIfConvert, when the
+ // pointer is checked to reference consecutive elements suitable for a
+ // masked access.
+ const auto &Strides =
+ LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
Function *F = TheLoop->getHeader()->getParent();
bool OptForSize = F->hasOptSize() ||
@@ -462,11 +471,135 @@ int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
return 0;
}
-bool LoopVectorizationLegality::isUniform(Value *V) const {
- return LAI->isUniform(V);
+bool LoopVectorizationLegality::isInvariant(Value *V) const {
+ return LAI->isInvariant(V);
+}
+
+namespace {
+/// A rewriter to build the SCEVs for each of the VF lanes in the expected
+/// vectorized loop, which can then be compared to detect their uniformity. This
+/// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
+/// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
+/// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
+/// uniformity.
+class SCEVAddRecForUniformityRewriter
+ : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
+ /// Multiplier to be applied to the step of AddRecs in TheLoop.
+ unsigned StepMultiplier;
+
+ /// Offset to be added to the AddRecs in TheLoop.
+ unsigned Offset;
+
+ /// Loop for which to rewrite AddRecsFor.
+ Loop *TheLoop;
+
+ /// Is any sub-expressions not analyzable w.r.t. uniformity?
+ bool CannotAnalyze = false;
+
+ bool canAnalyze() const { return !CannotAnalyze; }
+
+public:
+ SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
+ unsigned Offset, Loop *TheLoop)
+ : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
+ TheLoop(TheLoop) {}
+
+ const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
+ assert(Expr->getLoop() == TheLoop &&
+ "addrec outside of TheLoop must be invariant and should have been "
+ "handled earlier");
+ // Build a new AddRec by multiplying the step by StepMultiplier and
+ // incrementing the start by Offset * step.
+ Type *Ty = Expr->getType();
+ auto *Step = Expr->getStepRecurrence(SE);
+ if (!SE.isLoopInvariant(Step, TheLoop)) {
+ CannotAnalyze = true;
+ return Expr;
+ }
+ auto *NewStep = SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
+ auto *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
+ auto *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
+ return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
+ }
+
+ const SCEV *visit(const SCEV *S) {
+ if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
+ return S;
+ return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S);
+ }
+
+ const SCEV *visitUnknown(const SCEVUnknown *S) {
+ if (SE.isLoopInvariant(S, TheLoop))
+ return S;
+ // The value could vary across iterations.
+ CannotAnalyze = true;
+ return S;
+ }
+
+ const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
+ // Could not analyze the expression.
+ CannotAnalyze = true;
+ return S;
+ }
+
+ static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
+ unsigned StepMultiplier, unsigned Offset,
+ Loop *TheLoop) {
+ /// Bail out if the expression does not contain an UDiv expression.
+ /// Uniform values which are not loop invariant require operations to strip
+ /// out the lowest bits. For now just look for UDivs and use it to avoid
+ /// re-writing UDIV-free expressions for other lanes to limit compile time.
+ if (!SCEVExprContains(S,
+ [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
+ return SE.getCouldNotCompute();
+
+ SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
+ TheLoop);
+ const SCEV *Result = Rewriter.visit(S);
+
+ if (Rewriter.canAnalyze())
+ return Result;
+ return SE.getCouldNotCompute();
+ }
+};
+
+} // namespace
+
+bool LoopVectorizationLegality::isUniform(Value *V, ElementCount VF) const {
+ if (isInvariant(V))
+ return true;
+ if (VF.isScalable())
+ return false;
+ if (VF.isScalar())
+ return true;
+
+ // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
+ // never considered uniform.
+ auto *SE = PSE.getSE();
+ if (!SE->isSCEVable(V->getType()))
+ return false;
+ const SCEV *S = SE->getSCEV(V);
+
+ // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
+ // lane 0 matches the expressions for all other lanes.
+ unsigned FixedVF = VF.getKnownMinValue();
+ const SCEV *FirstLaneExpr =
+ SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
+ if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
+ return false;
+
+ // Make sure the expressions for lanes FixedVF-1..1 match the expression for
+ // lane 0. We check lanes in reverse order for compile-time, as frequently
+ // checking the last lane is sufficient to rule out uniformity.
+ return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
+ const SCEV *IthLaneExpr =
+ SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
+ return FirstLaneExpr == IthLaneExpr;
+ });
}
-bool LoopVectorizationLegality::isUniformMemOp(Instruction &I) const {
+bool LoopVectorizationLegality::isUniformMemOp(Instruction &I,
+ ElementCount VF) const {
Value *Ptr = getLoadStorePointerOperand(&I);
if (!Ptr)
return false;
@@ -474,7 +607,7 @@ bool LoopVectorizationLegality::isUniformMemOp(Instruction &I) const {
// stores from being uniform. The current lowering simply doesn't handle
// it; in particular, the cost model distinguishes scatter/gather from
// scalar w/predication, and we currently rely on the scalar path.
- return isUniform(Ptr) && !blockNeedsPredication(I.getParent());
+ return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
}
bool LoopVectorizationLegality::canVectorizeOuterLoop() {
@@ -700,6 +833,18 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
continue;
}
+ // We prevent matching non-constant strided pointer IVS to preserve
+ // historical vectorizer behavior after a generalization of the
+ // IVDescriptor code. The intent is to remove this check, but we
+ // have to fix issues around code quality for such loops first.
+ auto isDisallowedStridedPointerInduction =
+ [](const InductionDescriptor &ID) {
+ if (AllowStridedPointerIVs)
+ return false;
+ return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
+ ID.getConstIntStepValue() == nullptr;
+ };
+
// TODO: Instead of recording the AllowedExit, it would be good to
// record the complementary set: NotAllowedExit. These include (but may
// not be limited to):
@@ -715,14 +860,14 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// By recording these, we can then reason about ways to vectorize each
// of these NotAllowedExit.
InductionDescriptor ID;
- if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
+ if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
+ !isDisallowedStridedPointerInduction(ID)) {
addInductionPhi(Phi, ID, AllowedExit);
Requirements->addExactFPMathInst(ID.getExactFPMathInst());
continue;
}
- if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop,
- SinkAfter, DT)) {
+ if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
AllowedExit.insert(Phi);
FixedOrderRecurrences.insert(Phi);
continue;
@@ -730,7 +875,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
// As a last resort, coerce the PHI to a AddRec expression
// and re-try classifying it a an induction PHI.
- if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
+ if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
+ !isDisallowedStridedPointerInduction(ID)) {
addInductionPhi(Phi, ID, AllowedExit);
continue;
}
@@ -894,18 +1040,6 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
}
}
- // For fixed order recurrences, we use the previous value (incoming value from
- // the latch) to check if it dominates all users of the recurrence. Bail out
- // if we have to sink such an instruction for another recurrence, as the
- // dominance requirement may not hold after sinking.
- BasicBlock *LoopLatch = TheLoop->getLoopLatch();
- if (any_of(FixedOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
- Instruction *V =
- cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
- return SinkAfter.find(V) != SinkAfter.end();
- }))
- return false;
-
// Now we know the widest induction type, check if our found induction
// is the same size. If it's not, unset it here and InnerLoopVectorizer
// will create another.
@@ -1124,6 +1258,16 @@ bool LoopVectorizationLegality::blockCanBePredicated(
if (isa<NoAliasScopeDeclInst>(&I))
continue;
+ // We can allow masked calls if there's at least one vector variant, even
+ // if we end up scalarizing due to the cost model calculations.
+ // TODO: Allow other calls if they have appropriate attributes... readonly
+ // and argmemonly?
+ if (CallInst *CI = dyn_cast<CallInst>(&I))
+ if (VFDatabase::hasMaskedVariant(*CI)) {
+ MaskedOp.insert(CI);
+ continue;
+ }
+
// Loads are handled via masking (or speculated if safe to do so.)
if (auto *LI = dyn_cast<LoadInst>(&I)) {
if (!SafePtrs.count(LI->getPointerOperand()))
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index 8990a65afdb4..13357cb06c55 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -25,6 +25,7 @@
#define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
#include "VPlan.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/InstructionCost.h"
namespace llvm {
@@ -217,6 +218,16 @@ struct VectorizationFactor {
}
};
+/// ElementCountComparator creates a total ordering for ElementCount
+/// for the purposes of using it in a set structure.
+struct ElementCountComparator {
+ bool operator()(const ElementCount &LHS, const ElementCount &RHS) const {
+ return std::make_tuple(LHS.isScalable(), LHS.getKnownMinValue()) <
+ std::make_tuple(RHS.isScalable(), RHS.getKnownMinValue());
+ }
+};
+using ElementCountSet = SmallSet<ElementCount, 16, ElementCountComparator>;
+
/// A class that represents two vectorization factors (initialized with 0 by
/// default). One for fixed-width vectorization and one for scalable
/// vectorization. This can be used by the vectorizer to choose from a range of
@@ -261,7 +272,7 @@ class LoopVectorizationPlanner {
const TargetLibraryInfo *TLI;
/// Target Transform Info.
- const TargetTransformInfo *TTI;
+ const TargetTransformInfo &TTI;
/// The legality analysis.
LoopVectorizationLegality *Legal;
@@ -280,12 +291,15 @@ class LoopVectorizationPlanner {
SmallVector<VPlanPtr, 4> VPlans;
+ /// Profitable vector factors.
+ SmallVector<VectorizationFactor, 8> ProfitableVFs;
+
/// A builder used to construct the current plan.
VPBuilder Builder;
public:
LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI,
+ const TargetTransformInfo &TTI,
LoopVectorizationLegality *Legal,
LoopVectorizationCostModel &CM,
InterleavedAccessInfo &IAI,
@@ -311,16 +325,22 @@ public:
/// TODO: \p IsEpilogueVectorization is needed to avoid issues due to epilogue
/// vectorization re-using plans for both the main and epilogue vector loops.
/// It should be removed once the re-use issue has been fixed.
- void executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
- InnerLoopVectorizer &LB, DominatorTree *DT,
- bool IsEpilogueVectorization);
+ /// \p ExpandedSCEVs is passed during execution of the plan for epilogue loop
+ /// to re-use expansion results generated during main plan execution. Returns
+ /// a mapping of SCEVs to their expanded IR values. Note that this is a
+ /// temporary workaround needed due to the current epilogue handling.
+ DenseMap<const SCEV *, Value *>
+ executePlan(ElementCount VF, unsigned UF, VPlan &BestPlan,
+ InnerLoopVectorizer &LB, DominatorTree *DT,
+ bool IsEpilogueVectorization,
+ DenseMap<const SCEV *, Value *> *ExpandedSCEVs = nullptr);
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void printPlans(raw_ostream &O);
#endif
- /// Look through the existing plans and return true if we have one with all
- /// the vectorization factors in question.
+ /// Look through the existing plans and return true if we have one with
+ /// vectorization factor \p VF.
bool hasPlanWithVF(ElementCount VF) const {
return any_of(VPlans,
[&](const VPlanPtr &Plan) { return Plan->hasVF(VF); });
@@ -333,8 +353,11 @@ public:
getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
VFRange &Range);
- /// Check if the number of runtime checks exceeds the threshold.
- bool requiresTooManyRuntimeChecks() const;
+ /// \return The most profitable vectorization factor and the cost of that VF
+ /// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
+ /// epilogue vectorization is not supported for the loop.
+ VectorizationFactor
+ selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC);
protected:
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
@@ -350,9 +373,12 @@ private:
/// Build a VPlan using VPRecipes according to the information gather by
/// Legal. This method is only used for the legacy inner loop vectorizer.
- VPlanPtr buildVPlanWithVPRecipes(
- VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
- const MapVector<Instruction *, Instruction *> &SinkAfter);
+ /// \p Range's largest included VF is restricted to the maximum VF the
+ /// returned VPlan is valid for. If no VPlan can be built for the input range,
+ /// set the largest included VF to the maximum VF for which no plan could be
+ /// built.
+ std::optional<VPlanPtr> tryToBuildVPlanWithVPRecipes(
+ VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions);
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
/// according to the information gathered by Legal when it checked if it is
@@ -367,6 +393,20 @@ private:
void adjustRecipesForReductions(VPBasicBlock *LatchVPBB, VPlanPtr &Plan,
VPRecipeBuilder &RecipeBuilder,
ElementCount MinVF);
+
+ /// \return The most profitable vectorization factor and the cost of that VF.
+ /// This method checks every VF in \p CandidateVFs.
+ VectorizationFactor
+ selectVectorizationFactor(const ElementCountSet &CandidateVFs);
+
+ /// Returns true if the per-lane cost of VectorizationFactor A is lower than
+ /// that of B.
+ bool isMoreProfitable(const VectorizationFactor &A,
+ const VectorizationFactor &B) const;
+
+ /// Determines if we have the infrastructure to vectorize the loop and its
+ /// epilogue, assuming the main loop is vectorized by \p VF.
+ bool isCandidateForEpilogueVectorization(const ElementCount VF) const;
};
} // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a28099d8ba7d..d7e40e8ef978 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -98,6 +98,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
@@ -120,8 +121,6 @@
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/IR/Verifier.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -231,6 +230,25 @@ static cl::opt<PreferPredicateTy::Option> PreferPredicateOverEpilogue(
"prefers tail-folding, don't attempt vectorization if "
"tail-folding fails.")));
+static cl::opt<TailFoldingStyle> ForceTailFoldingStyle(
+ "force-tail-folding-style", cl::desc("Force the tail folding style"),
+ cl::init(TailFoldingStyle::None),
+ cl::values(
+ clEnumValN(TailFoldingStyle::None, "none", "Disable tail folding"),
+ clEnumValN(
+ TailFoldingStyle::Data, "data",
+ "Create lane mask for data only, using active.lane.mask intrinsic"),
+ clEnumValN(TailFoldingStyle::DataWithoutLaneMask,
+ "data-without-lane-mask",
+ "Create lane mask with compare/stepvector"),
+ clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control",
+ "Create lane mask using active.lane.mask intrinsic, and use "
+ "it for both data and control flow"),
+ clEnumValN(
+ TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck,
+ "data-and-control-without-rt-check",
+ "Similar to data-and-control, but remove the runtime check")));
+
static cl::opt<bool> MaximizeBandwidth(
"vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden,
cl::desc("Maximize bandwidth when selecting vectorization factor which "
@@ -338,10 +356,12 @@ static cl::opt<bool> PreferPredicatedReductionSelect(
cl::desc(
"Prefer predicating a reduction operation over an after loop select."));
+namespace llvm {
cl::opt<bool> EnableVPlanNativePath(
- "enable-vplan-native-path", cl::init(false), cl::Hidden,
+ "enable-vplan-native-path", cl::Hidden,
cl::desc("Enable VPlan-native vectorization path with "
"support for outer loop vectorization."));
+}
// This flag enables the stress testing of the VPlan H-CFG construction in the
// VPlan-native vectorization path. It must be used in conjuction with
@@ -419,9 +439,42 @@ static std::optional<unsigned> getSmallBestKnownTC(ScalarEvolution &SE,
return std::nullopt;
}
+/// Return a vector containing interleaved elements from multiple
+/// smaller input vectors.
+static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
+ const Twine &Name) {
+ unsigned Factor = Vals.size();
+ assert(Factor > 1 && "Tried to interleave invalid number of vectors");
+
+ VectorType *VecTy = cast<VectorType>(Vals[0]->getType());
+#ifndef NDEBUG
+ for (Value *Val : Vals)
+ assert(Val->getType() == VecTy && "Tried to interleave mismatched types");
+#endif
+
+ // Scalable vectors cannot use arbitrary shufflevectors (only splats), so
+ // must use intrinsics to interleave.
+ if (VecTy->isScalableTy()) {
+ VectorType *WideVecTy = VectorType::getDoubleElementsVectorType(VecTy);
+ return Builder.CreateIntrinsic(
+ WideVecTy, Intrinsic::experimental_vector_interleave2, Vals,
+ /*FMFSource=*/nullptr, Name);
+ }
+
+ // Fixed length. Start by concatenating all vectors into a wide vector.
+ Value *WideVec = concatenateVectors(Builder, Vals);
+
+ // Interleave the elements into the wide vector.
+ const unsigned NumElts = VecTy->getElementCount().getFixedValue();
+ return Builder.CreateShuffleVector(
+ WideVec, createInterleaveMask(NumElts, Factor), Name);
+}
+
namespace {
// Forward declare GeneratedRTChecks.
class GeneratedRTChecks;
+
+using SCEV2ValueTy = DenseMap<const SCEV *, Value *>;
} // namespace
namespace llvm {
@@ -477,8 +530,10 @@ public:
/// loop and the start value for the canonical induction, if it is != 0. The
/// latter is the case when vectorizing the epilogue loop. In the case of
/// epilogue vectorization, this function is overriden to handle the more
- /// complex control flow around the loops.
- virtual std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton();
+ /// complex control flow around the loops. \p ExpandedSCEVs is used to
+ /// look up SCEV expansions for expressions needed during skeleton creation.
+ virtual std::pair<BasicBlock *, Value *>
+ createVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs);
/// Fix the vectorized code, taking care of header phi's, live-outs, and more.
void fixVectorizedLoop(VPTransformState &State, VPlan &Plan);
@@ -498,7 +553,7 @@ public:
/// Instr's operands.
void scalarizeInstruction(const Instruction *Instr,
VPReplicateRecipe *RepRecipe,
- const VPIteration &Instance, bool IfPredicateInstr,
+ const VPIteration &Instance,
VPTransformState &State);
/// Construct the vector value of a scalarized value \p V one lane at a time.
@@ -513,7 +568,7 @@ public:
ArrayRef<VPValue *> VPDefs,
VPTransformState &State, VPValue *Addr,
ArrayRef<VPValue *> StoredValues,
- VPValue *BlockInMask = nullptr);
+ VPValue *BlockInMask, bool NeedsMaskForGaps);
/// Fix the non-induction PHIs in \p Plan.
void fixNonInductionPHIs(VPlan &Plan, VPTransformState &State);
@@ -522,28 +577,30 @@ public:
/// able to vectorize with strict in-order reductions for the given RdxDesc.
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc);
- /// Create a broadcast instruction. This method generates a broadcast
- /// instruction (shuffle) for loop invariant values and for the induction
- /// value. If this is the induction variable then we extend it to N, N+1, ...
- /// this is needed because each iteration in the loop corresponds to a SIMD
- /// element.
- virtual Value *getBroadcastInstrs(Value *V);
-
// Returns the resume value (bc.merge.rdx) for a reduction as
// generated by fixReduction.
PHINode *getReductionResumeValue(const RecurrenceDescriptor &RdxDesc);
/// Create a new phi node for the induction variable \p OrigPhi to resume
/// iteration count in the scalar epilogue, from where the vectorized loop
- /// left off. In cases where the loop skeleton is more complicated (eg.
- /// epilogue vectorization) and the resume values can come from an additional
- /// bypass block, the \p AdditionalBypass pair provides information about the
- /// bypass block and the end value on the edge from bypass to this loop.
+ /// left off. \p Step is the SCEV-expanded induction step to use. In cases
+ /// where the loop skeleton is more complicated (i.e., epilogue vectorization)
+ /// and the resume values can come from an additional bypass block, the \p
+ /// AdditionalBypass pair provides information about the bypass block and the
+ /// end value on the edge from bypass to this loop.
PHINode *createInductionResumeValue(
- PHINode *OrigPhi, const InductionDescriptor &ID,
+ PHINode *OrigPhi, const InductionDescriptor &ID, Value *Step,
ArrayRef<BasicBlock *> BypassBlocks,
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
+ /// Returns the original loop trip count.
+ Value *getTripCount() const { return TripCount; }
+
+ /// Used to set the trip count after ILV's construction and after the
+ /// preheader block has been executed. Note that this always holds the trip
+ /// count of the original loop for both main loop and epilogue vectorization.
+ void setTripCount(Value *TC) { TripCount = TC; }
+
protected:
friend class LoopVectorizationPlanner;
@@ -560,7 +617,7 @@ protected:
void fixupIVUsers(PHINode *OrigPhi, const InductionDescriptor &II,
Value *VectorTripCount, Value *EndValue,
BasicBlock *MiddleBlock, BasicBlock *VectorHeader,
- VPlan &Plan);
+ VPlan &Plan, VPTransformState &State);
/// Handle all cross-iteration phis in the header.
void fixCrossIterationPHIs(VPTransformState &State);
@@ -573,10 +630,6 @@ protected:
/// Create code for the loop exit value of the reduction.
void fixReduction(VPReductionPHIRecipe *Phi, VPTransformState &State);
- /// Clear NSW/NUW flags from reduction instructions if necessary.
- void clearReductionWrapFlags(VPReductionPHIRecipe *PhiR,
- VPTransformState &State);
-
/// Iteratively sink the scalarized operands of a predicated instruction into
/// the block that was created for it.
void sinkScalarOperands(Instruction *PredInst);
@@ -585,9 +638,6 @@ protected:
/// represented as.
void truncateToMinimalBitwidths(VPTransformState &State);
- /// Returns (and creates if needed) the original loop trip count.
- Value *getOrCreateTripCount(BasicBlock *InsertBlock);
-
/// Returns (and creates if needed) the trip count of the widened loop.
Value *getOrCreateVectorTripCount(BasicBlock *InsertBlock);
@@ -621,6 +671,7 @@ protected:
/// block, the \p AdditionalBypass pair provides information about the bypass
/// block and the end value on the edge from bypass to this loop.
void createInductionResumeValues(
+ const SCEV2ValueTy &ExpandedSCEVs,
std::pair<BasicBlock *, Value *> AdditionalBypass = {nullptr, nullptr});
/// Complete the loop skeleton by adding debug MDs, creating appropriate
@@ -758,9 +809,6 @@ public:
ElementCount::getFixed(1),
ElementCount::getFixed(1), UnrollFactor, LVL, CM,
BFI, PSI, Check) {}
-
-private:
- Value *getBroadcastInstrs(Value *V) override;
};
/// Encapsulate information regarding vectorization of a loop and its epilogue.
@@ -810,15 +858,16 @@ public:
// Override this function to handle the more complex control flow around the
// three loops.
- std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton() final {
- return createEpilogueVectorizedLoopSkeleton();
+ std::pair<BasicBlock *, Value *> createVectorizedLoopSkeleton(
+ const SCEV2ValueTy &ExpandedSCEVs) final {
+ return createEpilogueVectorizedLoopSkeleton(ExpandedSCEVs);
}
/// The interface for creating a vectorized skeleton using one of two
/// different strategies, each corresponding to one execution of the vplan
/// as described above.
virtual std::pair<BasicBlock *, Value *>
- createEpilogueVectorizedLoopSkeleton() = 0;
+ createEpilogueVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs) = 0;
/// Holds and updates state information required to vectorize the main loop
/// and its epilogue in two separate passes. This setup helps us avoid
@@ -846,7 +895,8 @@ public:
EPI, LVL, CM, BFI, PSI, Check) {}
/// Implements the interface for creating a vectorized skeleton using the
/// *main loop* strategy (ie the first pass of vplan execution).
- std::pair<BasicBlock *, Value *> createEpilogueVectorizedLoopSkeleton() final;
+ std::pair<BasicBlock *, Value *>
+ createEpilogueVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs) final;
protected:
/// Emits an iteration count bypass check once for the main loop (when \p
@@ -876,7 +926,8 @@ public:
}
/// Implements the interface for creating a vectorized skeleton using the
/// *epilogue loop* strategy (ie the second pass of vplan execution).
- std::pair<BasicBlock *, Value *> createEpilogueVectorizedLoopSkeleton() final;
+ std::pair<BasicBlock *, Value *>
+ createEpilogueVectorizedLoopSkeleton(const SCEV2ValueTy &ExpandedSCEVs) final;
protected:
/// Emits an iteration count bypass check after the main vector loop has
@@ -953,35 +1004,21 @@ namespace llvm {
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
int64_t Step) {
assert(Ty->isIntegerTy() && "Expected an integer step");
- Constant *StepVal = ConstantInt::get(Ty, Step * VF.getKnownMinValue());
- return VF.isScalable() ? B.CreateVScale(StepVal) : StepVal;
+ return B.CreateElementCount(Ty, VF.multiplyCoefficientBy(Step));
}
/// Return the runtime value for VF.
Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF) {
- Constant *EC = ConstantInt::get(Ty, VF.getKnownMinValue());
- return VF.isScalable() ? B.CreateVScale(EC) : EC;
+ return B.CreateElementCount(Ty, VF);
}
-const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE) {
+const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
+ Loop *OrigLoop) {
const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && "Invalid loop count");
ScalarEvolution &SE = *PSE.getSE();
-
- // The exit count might have the type of i64 while the phi is i32. This can
- // happen if we have an induction variable that is sign extended before the
- // compare. The only way that we get a backedge taken count is that the
- // induction variable was signed and as such will not overflow. In such a case
- // truncation is legal.
- if (SE.getTypeSizeInBits(BackedgeTakenCount->getType()) >
- IdxTy->getPrimitiveSizeInBits())
- BackedgeTakenCount = SE.getTruncateOrNoop(BackedgeTakenCount, IdxTy);
- BackedgeTakenCount = SE.getNoopOrZeroExtend(BackedgeTakenCount, IdxTy);
-
- // Get the total trip count from the count by adding 1.
- return SE.getAddExpr(BackedgeTakenCount,
- SE.getOne(BackedgeTakenCount->getType()));
+ return SE.getTripCountFromExitCount(BackedgeTakenCount, IdxTy, OrigLoop);
}
static Value *getRuntimeVFAsFloat(IRBuilderBase &B, Type *FTy,
@@ -1062,11 +1099,17 @@ void InnerLoopVectorizer::collectPoisonGeneratingRecipes(
continue;
// This recipe contributes to the address computation of a widen
- // load/store. Collect recipe if its underlying instruction has
- // poison-generating flags.
- Instruction *Instr = CurRec->getUnderlyingInstr();
- if (Instr && Instr->hasPoisonGeneratingFlags())
- State.MayGeneratePoisonRecipes.insert(CurRec);
+ // load/store. If the underlying instruction has poison-generating flags,
+ // drop them directly.
+ if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {
+ RecWithFlags->dropPoisonGeneratingFlags();
+ } else {
+ Instruction *Instr = CurRec->getUnderlyingInstr();
+ (void)Instr;
+ assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
+ "found instruction with poison generating flags not covered by "
+ "VPRecipeWithIRFlags");
+ }
// Add new definitions to the worklist.
for (VPValue *operand : CurRec->operands())
@@ -1143,15 +1186,7 @@ enum ScalarEpilogueLowering {
CM_ScalarEpilogueNotAllowedUsePredicate
};
-/// ElementCountComparator creates a total ordering for ElementCount
-/// for the purposes of using it in a set structure.
-struct ElementCountComparator {
- bool operator()(const ElementCount &LHS, const ElementCount &RHS) const {
- return std::make_tuple(LHS.isScalable(), LHS.getKnownMinValue()) <
- std::make_tuple(RHS.isScalable(), RHS.getKnownMinValue());
- }
-};
-using ElementCountSet = SmallSet<ElementCount, 16, ElementCountComparator>;
+using InstructionVFPair = std::pair<Instruction *, ElementCount>;
/// LoopVectorizationCostModel - estimates the expected speedups due to
/// vectorization.
@@ -1184,17 +1219,6 @@ public:
/// otherwise.
bool runtimeChecksRequired();
- /// \return The most profitable vectorization factor and the cost of that VF.
- /// This method checks every VF in \p CandidateVFs. If UserVF is not ZERO
- /// then this vectorization factor will be selected if vectorization is
- /// possible.
- VectorizationFactor
- selectVectorizationFactor(const ElementCountSet &CandidateVFs);
-
- VectorizationFactor
- selectEpilogueVectorizationFactor(const ElementCount MaxVF,
- const LoopVectorizationPlanner &LVP);
-
/// Setup cost-based decisions for user vectorization factor.
/// \return true if the UserVF is a feasible VF to be chosen.
bool selectUserVectorizationFactor(ElementCount UserVF) {
@@ -1278,11 +1302,17 @@ public:
auto Scalars = InstsToScalarize.find(VF);
assert(Scalars != InstsToScalarize.end() &&
"VF not yet analyzed for scalarization profitability");
- return Scalars->second.find(I) != Scalars->second.end();
+ return Scalars->second.contains(I);
}
/// Returns true if \p I is known to be uniform after vectorization.
bool isUniformAfterVectorization(Instruction *I, ElementCount VF) const {
+ // Pseudo probe needs to be duplicated for each unrolled iteration and
+ // vector lane so that profiled loop trip count can be accurately
+ // accumulated instead of being under counted.
+ if (isa<PseudoProbeInst>(I))
+ return false;
+
if (VF.isScalar())
return true;
@@ -1316,7 +1346,7 @@ public:
/// \returns True if instruction \p I can be truncated to a smaller bitwidth
/// for vectorization factor \p VF.
bool canTruncateToMinimalBitwidth(Instruction *I, ElementCount VF) const {
- return VF.isVector() && MinBWs.find(I) != MinBWs.end() &&
+ return VF.isVector() && MinBWs.contains(I) &&
!isProfitableToScalarize(I, VF) &&
!isScalarAfterVectorization(I, VF);
}
@@ -1379,7 +1409,7 @@ public:
InstructionCost getWideningCost(Instruction *I, ElementCount VF) {
assert(VF.isVector() && "Expected VF >=2");
std::pair<Instruction *, ElementCount> InstOnVF = std::make_pair(I, VF);
- assert(WideningDecisions.find(InstOnVF) != WideningDecisions.end() &&
+ assert(WideningDecisions.contains(InstOnVF) &&
"The cost is not calculated");
return WideningDecisions[InstOnVF].second;
}
@@ -1419,7 +1449,7 @@ public:
/// that may be vectorized as interleave, gather-scatter or scalarized.
void collectUniformsAndScalars(ElementCount VF) {
// Do the analysis once.
- if (VF.isScalar() || Uniforms.find(VF) != Uniforms.end())
+ if (VF.isScalar() || Uniforms.contains(VF))
return;
setCostBasedWideningDecision(VF);
collectLoopUniforms(VF);
@@ -1442,8 +1472,7 @@ public:
/// Returns true if the target machine can represent \p V as a masked gather
/// or scatter operation.
- bool isLegalGatherOrScatter(Value *V,
- ElementCount VF = ElementCount::getFixed(1)) {
+ bool isLegalGatherOrScatter(Value *V, ElementCount VF) {
bool LI = isa<LoadInst>(V);
bool SI = isa<StoreInst>(V);
if (!LI && !SI)
@@ -1522,14 +1551,29 @@ public:
/// Returns true if we're required to use a scalar epilogue for at least
/// the final iteration of the original loop.
- bool requiresScalarEpilogue(ElementCount VF) const {
+ bool requiresScalarEpilogue(bool IsVectorizing) const {
if (!isScalarEpilogueAllowed())
return false;
// If we might exit from anywhere but the latch, must run the exiting
// iteration in scalar form.
if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch())
return true;
- return VF.isVector() && InterleaveInfo.requiresScalarEpilogue();
+ return IsVectorizing && InterleaveInfo.requiresScalarEpilogue();
+ }
+
+ /// Returns true if we're required to use a scalar epilogue for at least
+ /// the final iteration of the original loop for all VFs in \p Range.
+ /// A scalar epilogue must either be required for all VFs in \p Range or for
+ /// none.
+ bool requiresScalarEpilogue(VFRange Range) const {
+ auto RequiresScalarEpilogue = [this](ElementCount VF) {
+ return requiresScalarEpilogue(VF.isVector());
+ };
+ bool IsRequired = all_of(Range, RequiresScalarEpilogue);
+ assert(
+ (IsRequired || none_of(Range, RequiresScalarEpilogue)) &&
+ "all VFs in range must agree on whether a scalar epilogue is required");
+ return IsRequired;
}
/// Returns true if a scalar epilogue is not allowed due to optsize or a
@@ -1538,14 +1582,21 @@ public:
return ScalarEpilogueStatus == CM_ScalarEpilogueAllowed;
}
- /// Returns true if all loop blocks should be masked to fold tail loop.
- bool foldTailByMasking() const { return FoldTailByMasking; }
+ /// Returns the TailFoldingStyle that is best for the current loop.
+ TailFoldingStyle
+ getTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
+ if (!CanFoldTailByMasking)
+ return TailFoldingStyle::None;
+
+ if (ForceTailFoldingStyle.getNumOccurrences())
+ return ForceTailFoldingStyle;
+
+ return TTI.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
+ }
- /// Returns true if were tail-folding and want to use the active lane mask
- /// for vector loop control flow.
- bool useActiveLaneMaskForControlFlow() const {
- return FoldTailByMasking &&
- TTI.emitGetActiveLaneMask() == PredicationStyle::DataAndControlFlow;
+ /// Returns true if all loop blocks should be masked to fold tail loop.
+ bool foldTailByMasking() const {
+ return getTailFoldingStyle() != TailFoldingStyle::None;
}
/// Returns true if the instructions in this block requires predication
@@ -1582,12 +1633,8 @@ public:
/// scalarized -
/// i.e. either vector version isn't available, or is too expensive.
InstructionCost getVectorCallCost(CallInst *CI, ElementCount VF,
- bool &NeedToScalarize) const;
-
- /// Returns true if the per-lane cost of VectorizationFactor A is lower than
- /// that of B.
- bool isMoreProfitable(const VectorizationFactor &A,
- const VectorizationFactor &B) const;
+ Function **Variant,
+ bool *NeedsMask = nullptr) const;
/// Invalidates decisions already taken by the cost model.
void invalidateCostModelingDecisions() {
@@ -1596,10 +1643,29 @@ public:
Scalars.clear();
}
- /// Convenience function that returns the value of vscale_range iff
- /// vscale_range.min == vscale_range.max or otherwise returns the value
- /// returned by the corresponding TLI method.
- std::optional<unsigned> getVScaleForTuning() const;
+ /// The vectorization cost is a combination of the cost itself and a boolean
+ /// indicating whether any of the contributing operations will actually
+ /// operate on vector values after type legalization in the backend. If this
+ /// latter value is false, then all operations will be scalarized (i.e. no
+ /// vectorization has actually taken place).
+ using VectorizationCostTy = std::pair<InstructionCost, bool>;
+
+ /// Returns the expected execution cost. The unit of the cost does
+ /// not matter because we use the 'cost' units to compare different
+ /// vector widths. The cost that is returned is *not* normalized by
+ /// the factor width. If \p Invalid is not nullptr, this function
+ /// will add a pair(Instruction*, ElementCount) to \p Invalid for
+ /// each instruction that has an Invalid cost for the given VF.
+ VectorizationCostTy
+ expectedCost(ElementCount VF,
+ SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
+
+ bool hasPredStores() const { return NumPredStores > 0; }
+
+ /// Returns true if epilogue vectorization is considered profitable, and
+ /// false otherwise.
+ /// \p VF is the vectorization factor chosen for the original loop.
+ bool isEpilogueVectorizationProfitable(const ElementCount VF) const;
private:
unsigned NumPredStores = 0;
@@ -1626,24 +1692,6 @@ private:
/// of elements.
ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
- /// The vectorization cost is a combination of the cost itself and a boolean
- /// indicating whether any of the contributing operations will actually
- /// operate on vector values after type legalization in the backend. If this
- /// latter value is false, then all operations will be scalarized (i.e. no
- /// vectorization has actually taken place).
- using VectorizationCostTy = std::pair<InstructionCost, bool>;
-
- /// Returns the expected execution cost. The unit of the cost does
- /// not matter because we use the 'cost' units to compare different
- /// vector widths. The cost that is returned is *not* normalized by
- /// the factor width. If \p Invalid is not nullptr, this function
- /// will add a pair(Instruction*, ElementCount) to \p Invalid for
- /// each instruction that has an Invalid cost for the given VF.
- using InstructionVFPair = std::pair<Instruction *, ElementCount>;
- VectorizationCostTy
- expectedCost(ElementCount VF,
- SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
-
/// Returns the execution time cost of an instruction for a given vector
/// width. Vector width of one means scalar.
VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
@@ -1715,7 +1763,7 @@ private:
ScalarEpilogueLowering ScalarEpilogueStatus = CM_ScalarEpilogueAllowed;
/// All blocks of loop are to be masked to fold tail of scalar iterations.
- bool FoldTailByMasking = false;
+ bool CanFoldTailByMasking = false;
/// A map holding scalar costs for different vectorization factors. The
/// presence of a cost for an instruction in the mapping indicates that the
@@ -1796,8 +1844,7 @@ private:
// the scalars are collected. That should be a safe assumption in most
// cases, because we check if the operands have vectorizable types
// beforehand in LoopVectorizationLegality.
- return Scalars.find(VF) == Scalars.end() ||
- !isScalarAfterVectorization(I, VF);
+ return !Scalars.contains(VF) || !isScalarAfterVectorization(I, VF);
};
/// Returns a range containing only operands needing to be extracted.
@@ -1807,16 +1854,6 @@ private:
Ops, [this, VF](Value *V) { return this->needsExtract(V, VF); }));
}
- /// Determines if we have the infrastructure to vectorize loop \p L and its
- /// epilogue, assuming the main loop is vectorized by \p VF.
- bool isCandidateForEpilogueVectorization(const Loop &L,
- const ElementCount VF) const;
-
- /// Returns true if epilogue vectorization is considered profitable, and
- /// false otherwise.
- /// \p VF is the vectorization factor chosen for the original loop.
- bool isEpilogueVectorizationProfitable(const ElementCount VF) const;
-
public:
/// The loop that we evaluate.
Loop *TheLoop;
@@ -1862,9 +1899,6 @@ public:
/// All element types found in the loop.
SmallPtrSet<Type *, 16> ElementTypesInLoop;
-
- /// Profitable vector factors.
- SmallVector<VectorizationFactor, 8> ProfitableVFs;
};
} // end namespace llvm
@@ -2135,6 +2169,17 @@ public:
};
} // namespace
+static bool useActiveLaneMask(TailFoldingStyle Style) {
+ return Style == TailFoldingStyle::Data ||
+ Style == TailFoldingStyle::DataAndControlFlow ||
+ Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
+}
+
+static bool useActiveLaneMaskForControlFlow(TailFoldingStyle Style) {
+ return Style == TailFoldingStyle::DataAndControlFlow ||
+ Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck;
+}
+
// Return true if \p OuterLp is an outer loop annotated with hints for explicit
// vectorization. The loop needs to be annotated with #pragma omp simd
// simdlen(#) or #pragma clang vectorize(enable) vectorize_width(#). If the
@@ -2202,97 +2247,11 @@ static void collectSupportedLoops(Loop &L, LoopInfo *LI,
collectSupportedLoops(*InnerL, LI, ORE, V);
}
-namespace {
-
-/// The LoopVectorize Pass.
-struct LoopVectorize : public FunctionPass {
- /// Pass identification, replacement for typeid
- static char ID;
-
- LoopVectorizePass Impl;
-
- explicit LoopVectorize(bool InterleaveOnlyWhenForced = false,
- bool VectorizeOnlyWhenForced = false)
- : FunctionPass(ID),
- Impl({InterleaveOnlyWhenForced, VectorizeOnlyWhenForced}) {
- initializeLoopVectorizePass(*PassRegistry::getPassRegistry());
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
- auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto &LAIs = getAnalysis<LoopAccessLegacyAnalysis>().getLAIs();
- auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
- auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
- auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
-
- return Impl
- .runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AC, LAIs, *ORE, PSI)
- .MadeAnyChange;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<BlockFrequencyInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<LoopAccessLegacyAnalysis>();
- AU.addRequired<DemandedBitsWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- AU.addRequired<InjectTLIMappingsLegacy>();
-
- // We currently do not preserve loopinfo/dominator analyses with outer loop
- // vectorization. Until this is addressed, mark these analyses as preserved
- // only for non-VPlan-native path.
- // TODO: Preserve Loop and Dominator analyses for VPlan-native path.
- if (!EnableVPlanNativePath) {
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- }
-
- AU.addPreserved<BasicAAWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addRequired<ProfileSummaryInfoWrapperPass>();
- }
-};
-
-} // end anonymous namespace
-
//===----------------------------------------------------------------------===//
// Implementation of LoopVectorizationLegality, InnerLoopVectorizer and
// LoopVectorizationCostModel and LoopVectorizationPlanner.
//===----------------------------------------------------------------------===//
-Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
- // We need to place the broadcast of invariant variables outside the loop,
- // but only if it's proven safe to do so. Else, broadcast will be inside
- // vector loop body.
- Instruction *Instr = dyn_cast<Instruction>(V);
- bool SafeToHoist = OrigLoop->isLoopInvariant(V) &&
- (!Instr ||
- DT->dominates(Instr->getParent(), LoopVectorPreHeader));
- // Place the code for broadcasting invariant variables in the new preheader.
- IRBuilder<>::InsertPointGuard Guard(Builder);
- if (SafeToHoist)
- Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
-
- // Broadcast the scalar into all locations in the vector.
- Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
-
- return Shuf;
-}
-
/// This function adds
/// (StartIdx * Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)
/// to each vector element of Val. The sequence starts at StartIndex.
@@ -2435,21 +2394,6 @@ static void buildScalarSteps(Value *ScalarIV, Value *Step,
}
}
-// Generate code for the induction step. Note that induction steps are
-// required to be loop-invariant
-static Value *CreateStepValue(const SCEV *Step, ScalarEvolution &SE,
- Instruction *InsertBefore,
- Loop *OrigLoop = nullptr) {
- const DataLayout &DL = SE.getDataLayout();
- assert((!OrigLoop || SE.isLoopInvariant(Step, OrigLoop)) &&
- "Induction step should be loop invariant");
- if (auto *E = dyn_cast<SCEVUnknown>(Step))
- return E->getValue();
-
- SCEVExpander Exp(SE, DL, "induction");
- return Exp.expandCodeFor(Step, Step->getType(), InsertBefore);
-}
-
/// Compute the transformed value of Index at offset StartValue using step
/// StepValue.
/// For integer induction, returns StartValue + Index * StepValue.
@@ -2514,9 +2458,7 @@ static Value *emitTransformedIndex(IRBuilderBase &B, Value *Index,
return CreateAdd(StartValue, Offset);
}
case InductionDescriptor::IK_PtrInduction: {
- assert(isa<Constant>(Step) &&
- "Expected constant step for pointer induction");
- return B.CreateGEP(ID.getElementType(), StartValue, CreateMul(Index, Step));
+ return B.CreateGEP(B.getInt8Ty(), StartValue, CreateMul(Index, Step));
}
case InductionDescriptor::IK_FpInduction: {
assert(!isa<VectorType>(Index->getType()) &&
@@ -2538,6 +2480,50 @@ static Value *emitTransformedIndex(IRBuilderBase &B, Value *Index,
llvm_unreachable("invalid enum");
}
+std::optional<unsigned> getMaxVScale(const Function &F,
+ const TargetTransformInfo &TTI) {
+ if (std::optional<unsigned> MaxVScale = TTI.getMaxVScale())
+ return MaxVScale;
+
+ if (F.hasFnAttribute(Attribute::VScaleRange))
+ return F.getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
+
+ return std::nullopt;
+}
+
+/// For the given VF and UF and maximum trip count computed for the loop, return
+/// whether the induction variable might overflow in the vectorized loop. If not,
+/// then we know a runtime overflow check always evaluates to false and can be
+/// removed.
+static bool isIndvarOverflowCheckKnownFalse(
+ const LoopVectorizationCostModel *Cost,
+ ElementCount VF, std::optional<unsigned> UF = std::nullopt) {
+ // Always be conservative if we don't know the exact unroll factor.
+ unsigned MaxUF = UF ? *UF : Cost->TTI.getMaxInterleaveFactor(VF);
+
+ Type *IdxTy = Cost->Legal->getWidestInductionType();
+ APInt MaxUIntTripCount = cast<IntegerType>(IdxTy)->getMask();
+
+ // We know the runtime overflow check is known false iff the (max) trip-count
+ // is known and (max) trip-count + (VF * UF) does not overflow in the type of
+ // the vector loop induction variable.
+ if (unsigned TC =
+ Cost->PSE.getSE()->getSmallConstantMaxTripCount(Cost->TheLoop)) {
+ uint64_t MaxVF = VF.getKnownMinValue();
+ if (VF.isScalable()) {
+ std::optional<unsigned> MaxVScale =
+ getMaxVScale(*Cost->TheFunction, Cost->TTI);
+ if (!MaxVScale)
+ return false;
+ MaxVF *= *MaxVScale;
+ }
+
+ return (MaxUIntTripCount - TC).ugt(MaxVF * MaxUF);
+ }
+
+ return false;
+}
+
void InnerLoopVectorizer::packScalarIntoVectorValue(VPValue *Def,
const VPIteration &Instance,
VPTransformState &State) {
@@ -2591,14 +2577,13 @@ static bool useMaskedInterleavedAccesses(const TargetTransformInfo &TTI) {
void InnerLoopVectorizer::vectorizeInterleaveGroup(
const InterleaveGroup<Instruction> *Group, ArrayRef<VPValue *> VPDefs,
VPTransformState &State, VPValue *Addr, ArrayRef<VPValue *> StoredValues,
- VPValue *BlockInMask) {
+ VPValue *BlockInMask, bool NeedsMaskForGaps) {
Instruction *Instr = Group->getInsertPos();
const DataLayout &DL = Instr->getModule()->getDataLayout();
// Prepare for the vector type of the interleaved load/store.
Type *ScalarTy = getLoadStoreType(Instr);
unsigned InterleaveFactor = Group->getFactor();
- assert(!VF.isScalable() && "scalable vectors not yet supported.");
auto *VecTy = VectorType::get(ScalarTy, VF * InterleaveFactor);
// Prepare for the new pointers.
@@ -2609,14 +2594,21 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
assert((!BlockInMask || !Group->isReverse()) &&
"Reversed masked interleave-group not supported.");
+ Value *Idx;
// If the group is reverse, adjust the index to refer to the last vector lane
// instead of the first. We adjust the index from the first vector lane,
// rather than directly getting the pointer for lane VF - 1, because the
// pointer operand of the interleaved access is supposed to be uniform. For
// uniform instructions, we're only required to generate a value for the
// first vector lane in each unroll iteration.
- if (Group->isReverse())
- Index += (VF.getKnownMinValue() - 1) * Group->getFactor();
+ if (Group->isReverse()) {
+ Value *RuntimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), VF);
+ Idx = Builder.CreateSub(RuntimeVF, Builder.getInt32(1));
+ Idx = Builder.CreateMul(Idx, Builder.getInt32(Group->getFactor()));
+ Idx = Builder.CreateAdd(Idx, Builder.getInt32(Index));
+ Idx = Builder.CreateNeg(Idx);
+ } else
+ Idx = Builder.getInt32(-Index);
for (unsigned Part = 0; Part < UF; Part++) {
Value *AddrPart = State.get(Addr, VPIteration(Part, 0));
@@ -2637,8 +2629,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
bool InBounds = false;
if (auto *gep = dyn_cast<GetElementPtrInst>(AddrPart->stripPointerCasts()))
InBounds = gep->isInBounds();
- AddrPart = Builder.CreateGEP(ScalarTy, AddrPart, Builder.getInt32(-Index));
- cast<GetElementPtrInst>(AddrPart)->setIsInBounds(InBounds);
+ AddrPart = Builder.CreateGEP(ScalarTy, AddrPart, Idx, "", InBounds);
// Cast to the vector pointer type.
unsigned AddressSpace = AddrPart->getType()->getPointerAddressSpace();
@@ -2649,14 +2640,43 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
State.setDebugLocFromInst(Instr);
Value *PoisonVec = PoisonValue::get(VecTy);
- Value *MaskForGaps = nullptr;
- if (Group->requiresScalarEpilogue() && !Cost->isScalarEpilogueAllowed()) {
- MaskForGaps = createBitMaskForGaps(Builder, VF.getKnownMinValue(), *Group);
- assert(MaskForGaps && "Mask for Gaps is required but it is null");
- }
+ auto CreateGroupMask = [this, &BlockInMask, &State, &InterleaveFactor](
+ unsigned Part, Value *MaskForGaps) -> Value * {
+ if (VF.isScalable()) {
+ assert(!MaskForGaps && "Interleaved groups with gaps are not supported.");
+ assert(InterleaveFactor == 2 &&
+ "Unsupported deinterleave factor for scalable vectors");
+ auto *BlockInMaskPart = State.get(BlockInMask, Part);
+ SmallVector<Value *, 2> Ops = {BlockInMaskPart, BlockInMaskPart};
+ auto *MaskTy =
+ VectorType::get(Builder.getInt1Ty(), VF.getKnownMinValue() * 2, true);
+ return Builder.CreateIntrinsic(
+ MaskTy, Intrinsic::experimental_vector_interleave2, Ops,
+ /*FMFSource=*/nullptr, "interleaved.mask");
+ }
+
+ if (!BlockInMask)
+ return MaskForGaps;
+
+ Value *BlockInMaskPart = State.get(BlockInMask, Part);
+ Value *ShuffledMask = Builder.CreateShuffleVector(
+ BlockInMaskPart,
+ createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()),
+ "interleaved.mask");
+ return MaskForGaps ? Builder.CreateBinOp(Instruction::And, ShuffledMask,
+ MaskForGaps)
+ : ShuffledMask;
+ };
// Vectorize the interleaved load group.
if (isa<LoadInst>(Instr)) {
+ Value *MaskForGaps = nullptr;
+ if (NeedsMaskForGaps) {
+ MaskForGaps =
+ createBitMaskForGaps(Builder, VF.getKnownMinValue(), *Group);
+ assert(MaskForGaps && "Mask for Gaps is required but it is null");
+ }
+
// For each unroll part, create a wide load for the group.
SmallVector<Value *, 2> NewLoads;
for (unsigned Part = 0; Part < UF; Part++) {
@@ -2664,18 +2684,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
if (BlockInMask || MaskForGaps) {
assert(useMaskedInterleavedAccesses(*TTI) &&
"masked interleaved groups are not allowed.");
- Value *GroupMask = MaskForGaps;
- if (BlockInMask) {
- Value *BlockInMaskPart = State.get(BlockInMask, Part);
- Value *ShuffledMask = Builder.CreateShuffleVector(
- BlockInMaskPart,
- createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()),
- "interleaved.mask");
- GroupMask = MaskForGaps
- ? Builder.CreateBinOp(Instruction::And, ShuffledMask,
- MaskForGaps)
- : ShuffledMask;
- }
+ Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
NewLoad =
Builder.CreateMaskedLoad(VecTy, AddrParts[Part], Group->getAlign(),
GroupMask, PoisonVec, "wide.masked.vec");
@@ -2687,6 +2696,41 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
NewLoads.push_back(NewLoad);
}
+ if (VecTy->isScalableTy()) {
+ assert(InterleaveFactor == 2 &&
+ "Unsupported deinterleave factor for scalable vectors");
+
+ for (unsigned Part = 0; Part < UF; ++Part) {
+ // Scalable vectors cannot use arbitrary shufflevectors (only splats),
+ // so must use intrinsics to deinterleave.
+ Value *DI = Builder.CreateIntrinsic(
+ Intrinsic::experimental_vector_deinterleave2, VecTy, NewLoads[Part],
+ /*FMFSource=*/nullptr, "strided.vec");
+ unsigned J = 0;
+ for (unsigned I = 0; I < InterleaveFactor; ++I) {
+ Instruction *Member = Group->getMember(I);
+
+ if (!Member)
+ continue;
+
+ Value *StridedVec = Builder.CreateExtractValue(DI, I);
+ // If this member has different type, cast the result type.
+ if (Member->getType() != ScalarTy) {
+ VectorType *OtherVTy = VectorType::get(Member->getType(), VF);
+ StridedVec = createBitOrPointerCast(StridedVec, OtherVTy, DL);
+ }
+
+ if (Group->isReverse())
+ StridedVec = Builder.CreateVectorReverse(StridedVec, "reverse");
+
+ State.set(VPDefs[J], StridedVec, Part);
+ ++J;
+ }
+ }
+
+ return;
+ }
+
// For each member in the group, shuffle out the appropriate data from the
// wide loads.
unsigned J = 0;
@@ -2724,7 +2768,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
auto *SubVT = VectorType::get(ScalarTy, VF);
// Vectorize the interleaved store group.
- MaskForGaps = createBitMaskForGaps(Builder, VF.getKnownMinValue(), *Group);
+ Value *MaskForGaps =
+ createBitMaskForGaps(Builder, VF.getKnownMinValue(), *Group);
assert((!MaskForGaps || useMaskedInterleavedAccesses(*TTI)) &&
"masked interleaved groups are not allowed.");
assert((!MaskForGaps || !VF.isScalable()) &&
@@ -2759,27 +2804,11 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
StoredVecs.push_back(StoredVec);
}
- // Concatenate all vectors into a wide vector.
- Value *WideVec = concatenateVectors(Builder, StoredVecs);
-
- // Interleave the elements in the wide vector.
- Value *IVec = Builder.CreateShuffleVector(
- WideVec, createInterleaveMask(VF.getKnownMinValue(), InterleaveFactor),
- "interleaved.vec");
-
+ // Interleave all the smaller vectors into one wider vector.
+ Value *IVec = interleaveVectors(Builder, StoredVecs, "interleaved.vec");
Instruction *NewStoreInstr;
if (BlockInMask || MaskForGaps) {
- Value *GroupMask = MaskForGaps;
- if (BlockInMask) {
- Value *BlockInMaskPart = State.get(BlockInMask, Part);
- Value *ShuffledMask = Builder.CreateShuffleVector(
- BlockInMaskPart,
- createReplicatedMask(InterleaveFactor, VF.getKnownMinValue()),
- "interleaved.mask");
- GroupMask = MaskForGaps ? Builder.CreateBinOp(Instruction::And,
- ShuffledMask, MaskForGaps)
- : ShuffledMask;
- }
+ Value *GroupMask = CreateGroupMask(Part, MaskForGaps);
NewStoreInstr = Builder.CreateMaskedStore(IVec, AddrParts[Part],
Group->getAlign(), GroupMask);
} else
@@ -2793,7 +2822,6 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
VPReplicateRecipe *RepRecipe,
const VPIteration &Instance,
- bool IfPredicateInstr,
VPTransformState &State) {
assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
@@ -2810,14 +2838,7 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
if (!IsVoidRetTy)
Cloned->setName(Instr->getName() + ".cloned");
- // If the scalarized instruction contributes to the address computation of a
- // widen masked load/store which was in a basic block that needed predication
- // and is not predicated after vectorization, we can't propagate
- // poison-generating flags (nuw/nsw, exact, inbounds, etc.). The scalarized
- // instruction could feed a poison value to the base address of the widen
- // load/store.
- if (State.MayGeneratePoisonRecipes.contains(RepRecipe))
- Cloned->dropPoisonGeneratingFlags();
+ RepRecipe->setFlags(Cloned);
if (Instr->getDebugLoc())
State.setDebugLocFromInst(Instr);
@@ -2843,45 +2864,17 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr,
AC->registerAssumption(II);
// End if-block.
+ bool IfPredicateInstr = RepRecipe->getParent()->getParent()->isReplicator();
if (IfPredicateInstr)
PredicatedInstructions.push_back(Cloned);
}
-Value *InnerLoopVectorizer::getOrCreateTripCount(BasicBlock *InsertBlock) {
- if (TripCount)
- return TripCount;
-
- assert(InsertBlock);
- IRBuilder<> Builder(InsertBlock->getTerminator());
- // Find the loop boundaries.
- Type *IdxTy = Legal->getWidestInductionType();
- assert(IdxTy && "No type for induction");
- const SCEV *ExitCount = createTripCountSCEV(IdxTy, PSE);
-
- const DataLayout &DL = InsertBlock->getModule()->getDataLayout();
-
- // Expand the trip count and place the new instructions in the preheader.
- // Notice that the pre-header does not change, only the loop body.
- SCEVExpander Exp(*PSE.getSE(), DL, "induction");
-
- // Count holds the overall loop count (N).
- TripCount = Exp.expandCodeFor(ExitCount, ExitCount->getType(),
- InsertBlock->getTerminator());
-
- if (TripCount->getType()->isPointerTy())
- TripCount =
- CastInst::CreatePointerCast(TripCount, IdxTy, "exitcount.ptrcnt.to.int",
- InsertBlock->getTerminator());
-
- return TripCount;
-}
-
Value *
InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
if (VectorTripCount)
return VectorTripCount;
- Value *TC = getOrCreateTripCount(InsertBlock);
+ Value *TC = getTripCount();
IRBuilder<> Builder(InsertBlock->getTerminator());
Type *Ty = TC->getType();
@@ -2917,7 +2910,7 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
// the step does not evenly divide the trip count, no adjustment is necessary
// since there will already be scalar iterations. Note that the minimum
// iterations check ensures that N >= Step.
- if (Cost->requiresScalarEpilogue(VF)) {
+ if (Cost->requiresScalarEpilogue(VF.isVector())) {
auto *IsZero = Builder.CreateICmpEQ(R, ConstantInt::get(R->getType(), 0));
R = Builder.CreateSelect(IsZero, Step, R);
}
@@ -2930,10 +2923,10 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
const DataLayout &DL) {
// Verify that V is a vector type with same number of elements as DstVTy.
- auto *DstFVTy = cast<FixedVectorType>(DstVTy);
- unsigned VF = DstFVTy->getNumElements();
- auto *SrcVecTy = cast<FixedVectorType>(V->getType());
- assert((VF == SrcVecTy->getNumElements()) && "Vector dimensions do not match");
+ auto *DstFVTy = cast<VectorType>(DstVTy);
+ auto VF = DstFVTy->getElementCount();
+ auto *SrcVecTy = cast<VectorType>(V->getType());
+ assert(VF == SrcVecTy->getElementCount() && "Vector dimensions do not match");
Type *SrcElemTy = SrcVecTy->getElementType();
Type *DstElemTy = DstFVTy->getElementType();
assert((DL.getTypeSizeInBits(SrcElemTy) == DL.getTypeSizeInBits(DstElemTy)) &&
@@ -2953,13 +2946,13 @@ Value *InnerLoopVectorizer::createBitOrPointerCast(Value *V, VectorType *DstVTy,
"Only one type should be a floating point type");
Type *IntTy =
IntegerType::getIntNTy(V->getContext(), DL.getTypeSizeInBits(SrcElemTy));
- auto *VecIntTy = FixedVectorType::get(IntTy, VF);
+ auto *VecIntTy = VectorType::get(IntTy, VF);
Value *CastVal = Builder.CreateBitOrPointerCast(V, VecIntTy);
return Builder.CreateBitOrPointerCast(CastVal, DstFVTy);
}
void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
- Value *Count = getOrCreateTripCount(LoopVectorPreHeader);
+ Value *Count = getTripCount();
// Reuse existing vector loop preheader for TC checks.
// Note that new preheader block is generated for vector loop.
BasicBlock *const TCCheckBlock = LoopVectorPreHeader;
@@ -2970,8 +2963,8 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
// vector trip count is zero. This check also covers the case where adding one
// to the backedge-taken count overflowed leading to an incorrect trip count
// of zero. In this case we will also jump to the scalar loop.
- auto P = Cost->requiresScalarEpilogue(VF) ? ICmpInst::ICMP_ULE
- : ICmpInst::ICMP_ULT;
+ auto P = Cost->requiresScalarEpilogue(VF.isVector()) ? ICmpInst::ICMP_ULE
+ : ICmpInst::ICMP_ULT;
// If tail is to be folded, vector loop takes care of all iterations.
Type *CountTy = Count->getType();
@@ -2989,10 +2982,13 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
Intrinsic::umax, MinProfTC, createStepForVF(Builder, CountTy, VF, UF));
};
- if (!Cost->foldTailByMasking())
+ TailFoldingStyle Style = Cost->getTailFoldingStyle();
+ if (Style == TailFoldingStyle::None)
CheckMinIters =
Builder.CreateICmp(P, Count, CreateStep(), "min.iters.check");
- else if (VF.isScalable()) {
+ else if (VF.isScalable() &&
+ !isIndvarOverflowCheckKnownFalse(Cost, VF, UF) &&
+ Style != TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
// vscale is not necessarily a power-of-2, which means we cannot guarantee
// an overflow to zero when updating induction variables and so an
// additional overflow check is required before entering the vector loop.
@@ -3017,7 +3013,7 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
// Update dominator for Bypass & LoopExit (if needed).
DT->changeImmediateDominator(Bypass, TCCheckBlock);
- if (!Cost->requiresScalarEpilogue(VF))
+ if (!Cost->requiresScalarEpilogue(VF.isVector()))
// If there is an epilogue which must run, there's no edge from the
// middle block to exit blocks and thus no need to update the immediate
// dominator of the exit blocks.
@@ -3044,7 +3040,7 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
// Update dominator only if this is first RT check.
if (LoopBypassBlocks.empty()) {
DT->changeImmediateDominator(Bypass, SCEVCheckBlock);
- if (!Cost->requiresScalarEpilogue(VF))
+ if (!Cost->requiresScalarEpilogue(VF.isVector()))
// If there is an epilogue which must run, there's no edge from the
// middle block to exit blocks and thus no need to update the immediate
// dominator of the exit blocks.
@@ -3097,7 +3093,7 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
LoopVectorPreHeader = OrigLoop->getLoopPreheader();
assert(LoopVectorPreHeader && "Invalid loop structure");
LoopExitBlock = OrigLoop->getUniqueExitBlock(); // may be nullptr
- assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF)) &&
+ assert((LoopExitBlock || Cost->requiresScalarEpilogue(VF.isVector())) &&
"multiple exit loop without required epilogue?");
LoopMiddleBlock =
@@ -3117,17 +3113,18 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
// branch from the middle block to the loop scalar preheader, and the
// exit block. completeLoopSkeleton will update the condition to use an
// iteration check, if required to decide whether to execute the remainder.
- BranchInst *BrInst = Cost->requiresScalarEpilogue(VF) ?
- BranchInst::Create(LoopScalarPreHeader) :
- BranchInst::Create(LoopExitBlock, LoopScalarPreHeader,
- Builder.getTrue());
+ BranchInst *BrInst =
+ Cost->requiresScalarEpilogue(VF.isVector())
+ ? BranchInst::Create(LoopScalarPreHeader)
+ : BranchInst::Create(LoopExitBlock, LoopScalarPreHeader,
+ Builder.getTrue());
BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc());
ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);
// Update dominator for loop exit. During skeleton creation, only the vector
// pre-header and the middle block are created. The vector loop is entirely
// created during VPlan exection.
- if (!Cost->requiresScalarEpilogue(VF))
+ if (!Cost->requiresScalarEpilogue(VF.isVector()))
// If there is an epilogue which must run, there's no edge from the
// middle block to exit blocks and thus no need to update the immediate
// dominator of the exit blocks.
@@ -3135,7 +3132,7 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
}
PHINode *InnerLoopVectorizer::createInductionResumeValue(
- PHINode *OrigPhi, const InductionDescriptor &II,
+ PHINode *OrigPhi, const InductionDescriptor &II, Value *Step,
ArrayRef<BasicBlock *> BypassBlocks,
std::pair<BasicBlock *, Value *> AdditionalBypass) {
Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
@@ -3154,8 +3151,6 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
if (II.getInductionBinOp() && isa<FPMathOperator>(II.getInductionBinOp()))
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
- Value *Step =
- CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint());
EndValue =
emitTransformedIndex(B, VectorTripCount, II.getStartValue(), Step, II);
EndValue->setName("ind.end");
@@ -3163,8 +3158,6 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
// Compute the end value for the additional bypass (if applicable).
if (AdditionalBypass.first) {
B.SetInsertPoint(&(*AdditionalBypass.first->getFirstInsertionPt()));
- Value *Step =
- CreateStepValue(II.getStep(), *PSE.getSE(), &*B.GetInsertPoint());
EndValueFromAdditionalBypass = emitTransformedIndex(
B, AdditionalBypass.second, II.getStartValue(), Step, II);
EndValueFromAdditionalBypass->setName("ind.end");
@@ -3193,7 +3186,22 @@ PHINode *InnerLoopVectorizer::createInductionResumeValue(
return BCResumeVal;
}
+/// Return the expanded step for \p ID using \p ExpandedSCEVs to look up SCEV
+/// expansion results.
+static Value *getExpandedStep(const InductionDescriptor &ID,
+ const SCEV2ValueTy &ExpandedSCEVs) {
+ const SCEV *Step = ID.getStep();
+ if (auto *C = dyn_cast<SCEVConstant>(Step))
+ return C->getValue();
+ if (auto *U = dyn_cast<SCEVUnknown>(Step))
+ return U->getValue();
+ auto I = ExpandedSCEVs.find(Step);
+ assert(I != ExpandedSCEVs.end() && "SCEV must be expanded at this point");
+ return I->second;
+}
+
void InnerLoopVectorizer::createInductionResumeValues(
+ const SCEV2ValueTy &ExpandedSCEVs,
std::pair<BasicBlock *, Value *> AdditionalBypass) {
assert(((AdditionalBypass.first && AdditionalBypass.second) ||
(!AdditionalBypass.first && !AdditionalBypass.second)) &&
@@ -3209,14 +3217,15 @@ void InnerLoopVectorizer::createInductionResumeValues(
PHINode *OrigPhi = InductionEntry.first;
const InductionDescriptor &II = InductionEntry.second;
PHINode *BCResumeVal = createInductionResumeValue(
- OrigPhi, II, LoopBypassBlocks, AdditionalBypass);
+ OrigPhi, II, getExpandedStep(II, ExpandedSCEVs), LoopBypassBlocks,
+ AdditionalBypass);
OrigPhi->setIncomingValueForBlock(LoopScalarPreHeader, BCResumeVal);
}
}
BasicBlock *InnerLoopVectorizer::completeLoopSkeleton() {
// The trip counts should be cached by now.
- Value *Count = getOrCreateTripCount(LoopVectorPreHeader);
+ Value *Count = getTripCount();
Value *VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);
auto *ScalarLatchTerm = OrigLoop->getLoopLatch()->getTerminator();
@@ -3229,7 +3238,8 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton() {
// Thus if tail is to be folded, we know we don't need to run the
// remainder and we can use the previous value for the condition (true).
// 3) Otherwise, construct a runtime check.
- if (!Cost->requiresScalarEpilogue(VF) && !Cost->foldTailByMasking()) {
+ if (!Cost->requiresScalarEpilogue(VF.isVector()) &&
+ !Cost->foldTailByMasking()) {
Instruction *CmpN = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
Count, VectorTripCount, "cmp.n",
LoopMiddleBlock->getTerminator());
@@ -3250,14 +3260,16 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton() {
}
std::pair<BasicBlock *, Value *>
-InnerLoopVectorizer::createVectorizedLoopSkeleton() {
+InnerLoopVectorizer::createVectorizedLoopSkeleton(
+ const SCEV2ValueTy &ExpandedSCEVs) {
/*
In this function we generate a new loop. The new loop will contain
the vectorized instructions while the old loop will continue to run the
scalar remainder.
- [ ] <-- loop iteration number check.
- / |
+ [ ] <-- old preheader - loop iteration number check and SCEVs in Plan's
+ / | preheader are expanded here. Eventually all required SCEV
+ / | expansion should happen here.
/ v
| [ ] <-- vector loop bypass (may consist of multiple blocks).
| / |
@@ -3304,7 +3316,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() {
emitMemRuntimeChecks(LoopScalarPreHeader);
// Emit phis for the new starting index of the scalar loop.
- createInductionResumeValues();
+ createInductionResumeValues(ExpandedSCEVs);
return {completeLoopSkeleton(), nullptr};
}
@@ -3317,7 +3329,8 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
const InductionDescriptor &II,
Value *VectorTripCount, Value *EndValue,
BasicBlock *MiddleBlock,
- BasicBlock *VectorHeader, VPlan &Plan) {
+ BasicBlock *VectorHeader, VPlan &Plan,
+ VPTransformState &State) {
// There are two kinds of external IV usages - those that use the value
// computed in the last iteration (the PHI) and those that use the penultimate
// value (the value that feeds into the phi from the loop latch).
@@ -3345,7 +3358,6 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
auto *UI = cast<Instruction>(U);
if (!OrigLoop->contains(UI)) {
assert(isa<PHINode>(UI) && "Expected LCSSA form");
-
IRBuilder<> B(MiddleBlock->getTerminator());
// Fast-math-flags propagate from the original induction instruction.
@@ -3355,8 +3367,11 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
Value *CountMinusOne = B.CreateSub(
VectorTripCount, ConstantInt::get(VectorTripCount->getType(), 1));
CountMinusOne->setName("cmo");
- Value *Step = CreateStepValue(II.getStep(), *PSE.getSE(),
- VectorHeader->getTerminator());
+
+ VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
+ assert(StepVPV && "step must have been expanded during VPlan execution");
+ Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
+ : State.get(StepVPV, {0, 0});
Value *Escape =
emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step, II);
Escape->setName("ind.escape");
@@ -3430,12 +3445,12 @@ static void cse(BasicBlock *BB) {
}
}
-InstructionCost
-LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, ElementCount VF,
- bool &NeedToScalarize) const {
+InstructionCost LoopVectorizationCostModel::getVectorCallCost(
+ CallInst *CI, ElementCount VF, Function **Variant, bool *NeedsMask) const {
Function *F = CI->getCalledFunction();
Type *ScalarRetTy = CI->getType();
SmallVector<Type *, 4> Tys, ScalarTys;
+ bool MaskRequired = Legal->isMaskRequired(CI);
for (auto &ArgOp : CI->args())
ScalarTys.push_back(ArgOp->getType());
@@ -3464,18 +3479,39 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, ElementCount VF,
// If we can't emit a vector call for this function, then the currently found
// cost is the cost we need to return.
- NeedToScalarize = true;
- VFShape Shape = VFShape::get(*CI, VF, false /*HasGlobalPred*/);
+ InstructionCost MaskCost = 0;
+ VFShape Shape = VFShape::get(*CI, VF, MaskRequired);
+ if (NeedsMask)
+ *NeedsMask = MaskRequired;
Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
+ // If we want an unmasked vector function but can't find one matching the VF,
+ // maybe we can find vector function that does use a mask and synthesize
+ // an all-true mask.
+ if (!VecFunc && !MaskRequired) {
+ Shape = VFShape::get(*CI, VF, /*HasGlobalPred=*/true);
+ VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
+ // If we found one, add in the cost of creating a mask
+ if (VecFunc) {
+ if (NeedsMask)
+ *NeedsMask = true;
+ MaskCost = TTI.getShuffleCost(
+ TargetTransformInfo::SK_Broadcast,
+ VectorType::get(
+ IntegerType::getInt1Ty(VecFunc->getFunctionType()->getContext()),
+ VF));
+ }
+ }
+ // We don't support masked function calls yet, but we can scalarize a
+ // masked call with branches (unless VF is scalable).
if (!TLI || CI->isNoBuiltin() || !VecFunc)
- return Cost;
+ return VF.isScalable() ? InstructionCost::getInvalid() : Cost;
// If the corresponding vector cost is cheaper, return its cost.
InstructionCost VectorCallCost =
- TTI.getCallInstrCost(nullptr, RetTy, Tys, CostKind);
+ TTI.getCallInstrCost(nullptr, RetTy, Tys, CostKind) + MaskCost;
if (VectorCallCost < Cost) {
- NeedToScalarize = false;
+ *Variant = VecFunc;
Cost = VectorCallCost;
}
return Cost;
@@ -3675,14 +3711,25 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
// Forget the original basic block.
PSE.getSE()->forgetLoop(OrigLoop);
+ // After vectorization, the exit blocks of the original loop will have
+ // additional predecessors. Invalidate SCEVs for the exit phis in case SE
+ // looked through single-entry phis.
+ SmallVector<BasicBlock *> ExitBlocks;
+ OrigLoop->getExitBlocks(ExitBlocks);
+ for (BasicBlock *Exit : ExitBlocks)
+ for (PHINode &PN : Exit->phis())
+ PSE.getSE()->forgetValue(&PN);
+
VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]);
- if (Cost->requiresScalarEpilogue(VF)) {
+ if (Cost->requiresScalarEpilogue(VF.isVector())) {
// No edge from the middle block to the unique exit block has been inserted
// and there is nothing to fix from vector loop; phis should have incoming
// from scalar loop only.
- Plan.clearLiveOuts();
} else {
+ // TODO: Check VPLiveOuts to see if IV users need fixing instead of checking
+ // the cost model.
+
// If we inserted an edge from the middle block to the unique exit block,
// update uses outside the loop (phis) to account for the newly inserted
// edge.
@@ -3692,7 +3739,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
fixupIVUsers(Entry.first, Entry.second,
getOrCreateVectorTripCount(VectorLoop->getLoopPreheader()),
IVEndValues[Entry.first], LoopMiddleBlock,
- VectorLoop->getHeader(), Plan);
+ VectorLoop->getHeader(), Plan, State);
}
// Fix LCSSA phis not already fixed earlier. Extracts may need to be generated
@@ -3799,31 +3846,53 @@ void InnerLoopVectorizer::fixFixedOrderRecurrence(
Value *Incoming = State.get(PreviousDef, UF - 1);
auto *ExtractForScalar = Incoming;
auto *IdxTy = Builder.getInt32Ty();
+ Value *RuntimeVF = nullptr;
if (VF.isVector()) {
auto *One = ConstantInt::get(IdxTy, 1);
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
- auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
+ RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
- ExtractForScalar = Builder.CreateExtractElement(ExtractForScalar, LastIdx,
- "vector.recur.extract");
- }
- // Extract the second last element in the middle block if the
- // Phi is used outside the loop. We need to extract the phi itself
- // and not the last element (the phi update in the current iteration). This
- // will be the value when jumping to the exit block from the LoopMiddleBlock,
- // when the scalar loop is not run at all.
- Value *ExtractForPhiUsedOutsideLoop = nullptr;
- if (VF.isVector()) {
- auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
- auto *Idx = Builder.CreateSub(RuntimeVF, ConstantInt::get(IdxTy, 2));
- ExtractForPhiUsedOutsideLoop = Builder.CreateExtractElement(
- Incoming, Idx, "vector.recur.extract.for.phi");
- } else if (UF > 1)
- // When loop is unrolled without vectorizing, initialize
- // ExtractForPhiUsedOutsideLoop with the value just prior to unrolled value
- // of `Incoming`. This is analogous to the vectorized case above: extracting
- // the second last element when VF > 1.
- ExtractForPhiUsedOutsideLoop = State.get(PreviousDef, UF - 2);
+ ExtractForScalar =
+ Builder.CreateExtractElement(Incoming, LastIdx, "vector.recur.extract");
+ }
+
+ auto RecurSplice = cast<VPInstruction>(*PhiR->user_begin());
+ assert(PhiR->getNumUsers() == 1 &&
+ RecurSplice->getOpcode() ==
+ VPInstruction::FirstOrderRecurrenceSplice &&
+ "recurrence phi must have a single user: FirstOrderRecurrenceSplice");
+ SmallVector<VPLiveOut *> LiveOuts;
+ for (VPUser *U : RecurSplice->users())
+ if (auto *LiveOut = dyn_cast<VPLiveOut>(U))
+ LiveOuts.push_back(LiveOut);
+
+ if (!LiveOuts.empty()) {
+ // Extract the second last element in the middle block if the
+ // Phi is used outside the loop. We need to extract the phi itself
+ // and not the last element (the phi update in the current iteration). This
+ // will be the value when jumping to the exit block from the
+ // LoopMiddleBlock, when the scalar loop is not run at all.
+ Value *ExtractForPhiUsedOutsideLoop = nullptr;
+ if (VF.isVector()) {
+ auto *Idx = Builder.CreateSub(RuntimeVF, ConstantInt::get(IdxTy, 2));
+ ExtractForPhiUsedOutsideLoop = Builder.CreateExtractElement(
+ Incoming, Idx, "vector.recur.extract.for.phi");
+ } else {
+ assert(UF > 1 && "VF and UF cannot both be 1");
+ // When loop is unrolled without vectorizing, initialize
+ // ExtractForPhiUsedOutsideLoop with the value just prior to unrolled
+ // value of `Incoming`. This is analogous to the vectorized case above:
+ // extracting the second last element when VF > 1.
+ ExtractForPhiUsedOutsideLoop = State.get(PreviousDef, UF - 2);
+ }
+
+ for (VPLiveOut *LiveOut : LiveOuts) {
+ assert(!Cost->requiresScalarEpilogue(VF.isVector()));
+ PHINode *LCSSAPhi = LiveOut->getPhi();
+ LCSSAPhi->addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
+ State.Plan->removeLiveOut(LCSSAPhi);
+ }
+ }
// Fix the initial value of the original recurrence in the scalar loop.
Builder.SetInsertPoint(&*LoopScalarPreHeader->begin());
@@ -3837,22 +3906,6 @@ void InnerLoopVectorizer::fixFixedOrderRecurrence(
Phi->setIncomingValueForBlock(LoopScalarPreHeader, Start);
Phi->setName("scalar.recur");
-
- // Finally, fix users of the recurrence outside the loop. The users will need
- // either the last value of the scalar recurrence or the last value of the
- // vector recurrence we extracted in the middle block. Since the loop is in
- // LCSSA form, we just need to find all the phi nodes for the original scalar
- // recurrence in the exit block, and then add an edge for the middle block.
- // Note that LCSSA does not imply single entry when the original scalar loop
- // had multiple exiting edges (as we always run the last iteration in the
- // scalar epilogue); in that case, there is no edge from middle to exit and
- // and thus no phis which needed updated.
- if (!Cost->requiresScalarEpilogue(VF))
- for (PHINode &LCSSAPhi : LoopExitBlock->phis())
- if (llvm::is_contained(LCSSAPhi.incoming_values(), Phi)) {
- LCSSAPhi.addIncoming(ExtractForPhiUsedOutsideLoop, LoopMiddleBlock);
- State.Plan->removeLiveOut(&LCSSAPhi);
- }
}
void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
@@ -3872,9 +3925,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// This is the vector-clone of the value that leaves the loop.
Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
- // Wrap flags are in general invalid after vectorization, clear them.
- clearReductionWrapFlags(PhiR, State);
-
// Before each round, move the insertion point right between
// the PHIs and the values we are going to write.
// This allows us to write both PHINodes and the extractelement
@@ -4036,7 +4086,7 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
// We know that the loop is in LCSSA form. We need to update the PHI nodes
// in the exit blocks. See comment on analogous loop in
// fixFixedOrderRecurrence for a more complete explaination of the logic.
- if (!Cost->requiresScalarEpilogue(VF))
+ if (!Cost->requiresScalarEpilogue(VF.isVector()))
for (PHINode &LCSSAPhi : LoopExitBlock->phis())
if (llvm::is_contained(LCSSAPhi.incoming_values(), LoopExitInst)) {
LCSSAPhi.addIncoming(ReducedPartRdx, LoopMiddleBlock);
@@ -4054,38 +4104,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
}
-void InnerLoopVectorizer::clearReductionWrapFlags(VPReductionPHIRecipe *PhiR,
- VPTransformState &State) {
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
- RecurKind RK = RdxDesc.getRecurrenceKind();
- if (RK != RecurKind::Add && RK != RecurKind::Mul)
- return;
-
- SmallVector<VPValue *, 8> Worklist;
- SmallPtrSet<VPValue *, 8> Visited;
- Worklist.push_back(PhiR);
- Visited.insert(PhiR);
-
- while (!Worklist.empty()) {
- VPValue *Cur = Worklist.pop_back_val();
- for (unsigned Part = 0; Part < UF; ++Part) {
- Value *V = State.get(Cur, Part);
- if (!isa<OverflowingBinaryOperator>(V))
- break;
- cast<Instruction>(V)->dropPoisonGeneratingFlags();
- }
-
- for (VPUser *U : Cur->users()) {
- auto *UserRecipe = dyn_cast<VPRecipeBase>(U);
- if (!UserRecipe)
- continue;
- for (VPValue *V : UserRecipe->definedValues())
- if (Visited.insert(V).second)
- Worklist.push_back(V);
- }
- }
-}
-
void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
// The basic block and loop containing the predicated instruction.
auto *PredBB = PredInst->getParent();
@@ -4125,10 +4143,11 @@ void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
auto *I = dyn_cast<Instruction>(Worklist.pop_back_val());
// We can't sink an instruction if it is a phi node, is not in the loop,
- // or may have side effects.
+ // may have side effects or may read from memory.
+ // TODO Could dor more granular checking to allow sinking a load past non-store instructions.
if (!I || isa<PHINode>(I) || !VectorLoop->contains(I) ||
- I->mayHaveSideEffects())
- continue;
+ I->mayHaveSideEffects() || I->mayReadFromMemory())
+ continue;
// If the instruction is already in PredBB, check if we can sink its
// operands. In that case, VPlan's sinkScalarOperands() succeeded in
@@ -4189,7 +4208,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
// We should not collect Scalars more than once per VF. Right now, this
// function is called from collectUniformsAndScalars(), which already does
// this check. Collecting Scalars for VF=1 does not make any sense.
- assert(VF.isVector() && Scalars.find(VF) == Scalars.end() &&
+ assert(VF.isVector() && !Scalars.contains(VF) &&
"This function should not be visited twice for the same VF");
// This avoids any chances of creating a REPLICATE recipe during planning
@@ -4382,6 +4401,8 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
switch(I->getOpcode()) {
default:
return true;
+ case Instruction::Call:
+ return !VFDatabase::hasMaskedVariant(*(cast<CallInst>(I)), VF);
case Instruction::Load:
case Instruction::Store: {
auto *Ptr = getLoadStorePointerOperand(I);
@@ -4430,10 +4451,10 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
// both speculation safety (which follows from the same argument as loads),
// but also must prove the value being stored is correct. The easiest
// form of the later is to require that all values stored are the same.
- if (Legal->isUniformMemOp(*I) &&
- (isa<LoadInst>(I) ||
- (isa<StoreInst>(I) &&
- TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()))) &&
+ if (Legal->isInvariant(getLoadStorePointerOperand(I)) &&
+ (isa<LoadInst>(I) ||
+ (isa<StoreInst>(I) &&
+ TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()))) &&
!Legal->blockNeedsPredication(I->getParent()))
return false;
return true;
@@ -4445,6 +4466,8 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
// TODO: We can use the loop-preheader as context point here and get
// context sensitive reasoning
return !isSafeToSpeculativelyExecute(I);
+ case Instruction::Call:
+ return Legal->isMaskRequired(I);
}
}
@@ -4502,7 +4525,8 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
// second vector operand. One example of this are shifts on x86.
Value *Op2 = I->getOperand(1);
auto Op2Info = TTI.getOperandInfo(Op2);
- if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue && Legal->isUniform(Op2))
+ if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
+ Legal->isInvariant(Op2))
Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
SmallVector<const Value *, 4> Operands(I->operand_values());
@@ -4614,7 +4638,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// already does this check. Collecting Uniforms for VF=1 does not make any
// sense.
- assert(VF.isVector() && Uniforms.find(VF) == Uniforms.end() &&
+ assert(VF.isVector() && !Uniforms.contains(VF) &&
"This function should not be visited twice for the same VF");
// Visit the list of Uniforms. If we'll not find any uniform value, we'll
@@ -4663,10 +4687,18 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse())
addToWorklistIfAllowed(Cmp);
+ auto PrevVF = VF.divideCoefficientBy(2);
// Return true if all lanes perform the same memory operation, and we can
// thus chose to execute only one.
auto isUniformMemOpUse = [&](Instruction *I) {
- if (!Legal->isUniformMemOp(*I))
+ // If the value was already known to not be uniform for the previous
+ // (smaller VF), it cannot be uniform for the larger VF.
+ if (PrevVF.isVector()) {
+ auto Iter = Uniforms.find(PrevVF);
+ if (Iter != Uniforms.end() && !Iter->second.contains(I))
+ return false;
+ }
+ if (!Legal->isUniformMemOp(*I, VF))
return false;
if (isa<LoadInst>(I))
// Loading the same address always produces the same result - at least
@@ -4689,11 +4721,14 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
WideningDecision == CM_Interleave);
};
-
// Returns true if Ptr is the pointer operand of a memory access instruction
- // I, and I is known to not require scalarization.
+ // I, I is known to not require scalarization, and the pointer is not also
+ // stored.
auto isVectorizedMemAccessUse = [&](Instruction *I, Value *Ptr) -> bool {
- return getLoadStorePointerOperand(I) == Ptr && isUniformDecision(I, VF);
+ if (isa<StoreInst>(I) && I->getOperand(0) == Ptr)
+ return false;
+ return getLoadStorePointerOperand(I) == Ptr &&
+ (isUniformDecision(I, VF) || Legal->isInvariant(Ptr));
};
// Holds a list of values which are known to have at least one uniform use.
@@ -4739,10 +4774,8 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
if (isUniformMemOpUse(&I))
addToWorklistIfAllowed(&I);
- if (isUniformDecision(&I, VF)) {
- assert(isVectorizedMemAccessUse(&I, Ptr) && "consistency check");
+ if (isVectorizedMemAccessUse(&I, Ptr))
HasUniformUse.insert(Ptr);
- }
}
// Add to the worklist any operands which have *only* uniform (e.g. lane 0
@@ -4906,12 +4939,11 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
return MaxScalableVF;
// Limit MaxScalableVF by the maximum safe dependence distance.
- std::optional<unsigned> MaxVScale = TTI.getMaxVScale();
- if (!MaxVScale && TheFunction->hasFnAttribute(Attribute::VScaleRange))
- MaxVScale =
- TheFunction->getFnAttribute(Attribute::VScaleRange).getVScaleRangeMax();
- MaxScalableVF =
- ElementCount::getScalable(MaxVScale ? (MaxSafeElements / *MaxVScale) : 0);
+ if (std::optional<unsigned> MaxVScale = getMaxVScale(*TheFunction, TTI))
+ MaxScalableVF = ElementCount::getScalable(MaxSafeElements / *MaxVScale);
+ else
+ MaxScalableVF = ElementCount::getScalable(0);
+
if (!MaxScalableVF)
reportVectorizationInfo(
"Max legal vector width too small, scalable vectorization "
@@ -4932,7 +4964,7 @@ FixedScalableVFPair LoopVectorizationCostModel::computeFeasibleMaxVF(
// the memory accesses that is most restrictive (involved in the smallest
// dependence distance).
unsigned MaxSafeElements =
- PowerOf2Floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
+ llvm::bit_floor(Legal->getMaxSafeVectorWidthInBits() / WidestType);
auto MaxSafeFixedVF = ElementCount::getFixed(MaxSafeElements);
auto MaxSafeScalableVF = getMaxLegalScalableVF(MaxSafeElements);
@@ -5105,16 +5137,26 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
}
FixedScalableVFPair MaxFactors = computeFeasibleMaxVF(TC, UserVF, true);
+
// Avoid tail folding if the trip count is known to be a multiple of any VF
- // we chose.
- // FIXME: The condition below pessimises the case for fixed-width vectors,
- // when scalable VFs are also candidates for vectorization.
- if (MaxFactors.FixedVF.isVector() && !MaxFactors.ScalableVF) {
- ElementCount MaxFixedVF = MaxFactors.FixedVF;
- assert((UserVF.isNonZero() || isPowerOf2_32(MaxFixedVF.getFixedValue())) &&
+ // we choose.
+ std::optional<unsigned> MaxPowerOf2RuntimeVF =
+ MaxFactors.FixedVF.getFixedValue();
+ if (MaxFactors.ScalableVF) {
+ std::optional<unsigned> MaxVScale = getMaxVScale(*TheFunction, TTI);
+ if (MaxVScale && TTI.isVScaleKnownToBeAPowerOfTwo()) {
+ MaxPowerOf2RuntimeVF = std::max<unsigned>(
+ *MaxPowerOf2RuntimeVF,
+ *MaxVScale * MaxFactors.ScalableVF.getKnownMinValue());
+ } else
+ MaxPowerOf2RuntimeVF = std::nullopt; // Stick with tail-folding for now.
+ }
+
+ if (MaxPowerOf2RuntimeVF && *MaxPowerOf2RuntimeVF > 0) {
+ assert((UserVF.isNonZero() || isPowerOf2_32(*MaxPowerOf2RuntimeVF)) &&
"MaxFixedVF must be a power of 2");
- unsigned MaxVFtimesIC = UserIC ? MaxFixedVF.getFixedValue() * UserIC
- : MaxFixedVF.getFixedValue();
+ unsigned MaxVFtimesIC =
+ UserIC ? *MaxPowerOf2RuntimeVF * UserIC : *MaxPowerOf2RuntimeVF;
ScalarEvolution *SE = PSE.getSE();
const SCEV *BackedgeTakenCount = PSE.getBackedgeTakenCount();
const SCEV *ExitCount = SE->getAddExpr(
@@ -5134,7 +5176,7 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
// by masking.
// FIXME: look for a smaller MaxVF that does divide TC rather than masking.
if (Legal->prepareToFoldTailByMasking()) {
- FoldTailByMasking = true;
+ CanFoldTailByMasking = true;
return MaxFactors;
}
@@ -5187,7 +5229,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
// Ensure MaxVF is a power of 2; the dependence distance bound may not be.
// Note that both WidestRegister and WidestType may not be a powers of 2.
auto MaxVectorElementCount = ElementCount::get(
- PowerOf2Floor(WidestRegister.getKnownMinValue() / WidestType),
+ llvm::bit_floor(WidestRegister.getKnownMinValue() / WidestType),
ComputeScalableMaxVF);
MaxVectorElementCount = MinVF(MaxVectorElementCount, MaxSafeVF);
LLVM_DEBUG(dbgs() << "LV: The Widest register safe to use is: "
@@ -5207,6 +5249,13 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
auto Min = Attr.getVScaleRangeMin();
WidestRegisterMinEC *= Min;
}
+
+ // When a scalar epilogue is required, at least one iteration of the scalar
+ // loop has to execute. Adjust ConstTripCount accordingly to avoid picking a
+ // max VF that results in a dead vector loop.
+ if (ConstTripCount > 0 && requiresScalarEpilogue(true))
+ ConstTripCount -= 1;
+
if (ConstTripCount && ConstTripCount <= WidestRegisterMinEC &&
(!FoldTailByMasking || isPowerOf2_32(ConstTripCount))) {
// If loop trip count (TC) is known at compile time there is no point in
@@ -5214,7 +5263,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
// power of two which doesn't exceed TC.
// If MaxVectorElementCount is scalable, we only fall back on a fixed VF
// when the TC is less than or equal to the known number of lanes.
- auto ClampedConstTripCount = PowerOf2Floor(ConstTripCount);
+ auto ClampedConstTripCount = llvm::bit_floor(ConstTripCount);
LLVM_DEBUG(dbgs() << "LV: Clamping the MaxVF to maximum power of two not "
"exceeding the constant trip count: "
<< ClampedConstTripCount << "\n");
@@ -5228,7 +5277,7 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
if (MaximizeBandwidth || (MaximizeBandwidth.getNumOccurrences() == 0 &&
TTI.shouldMaximizeVectorBandwidth(RegKind))) {
auto MaxVectorElementCountMaxBW = ElementCount::get(
- PowerOf2Floor(WidestRegister.getKnownMinValue() / SmallestType),
+ llvm::bit_floor(WidestRegister.getKnownMinValue() / SmallestType),
ComputeScalableMaxVF);
MaxVectorElementCountMaxBW = MinVF(MaxVectorElementCountMaxBW, MaxSafeVF);
@@ -5273,9 +5322,14 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
return MaxVF;
}
-std::optional<unsigned> LoopVectorizationCostModel::getVScaleForTuning() const {
- if (TheFunction->hasFnAttribute(Attribute::VScaleRange)) {
- auto Attr = TheFunction->getFnAttribute(Attribute::VScaleRange);
+/// Convenience function that returns the value of vscale_range iff
+/// vscale_range.min == vscale_range.max or otherwise returns the value
+/// returned by the corresponding TTI method.
+static std::optional<unsigned>
+getVScaleForTuning(const Loop *L, const TargetTransformInfo &TTI) {
+ const Function *Fn = L->getHeader()->getParent();
+ if (Fn->hasFnAttribute(Attribute::VScaleRange)) {
+ auto Attr = Fn->getFnAttribute(Attribute::VScaleRange);
auto Min = Attr.getVScaleRangeMin();
auto Max = Attr.getVScaleRangeMax();
if (Max && Min == Max)
@@ -5285,31 +5339,39 @@ std::optional<unsigned> LoopVectorizationCostModel::getVScaleForTuning() const {
return TTI.getVScaleForTuning();
}
-bool LoopVectorizationCostModel::isMoreProfitable(
+bool LoopVectorizationPlanner::isMoreProfitable(
const VectorizationFactor &A, const VectorizationFactor &B) const {
InstructionCost CostA = A.Cost;
InstructionCost CostB = B.Cost;
- unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(TheLoop);
-
- if (!A.Width.isScalable() && !B.Width.isScalable() && FoldTailByMasking &&
- MaxTripCount) {
- // If we are folding the tail and the trip count is a known (possibly small)
- // constant, the trip count will be rounded up to an integer number of
- // iterations. The total cost will be PerIterationCost*ceil(TripCount/VF),
- // which we compare directly. When not folding the tail, the total cost will
- // be PerIterationCost*floor(TC/VF) + Scalar remainder cost, and so is
- // approximated with the per-lane cost below instead of using the tripcount
- // as here.
- auto RTCostA = CostA * divideCeil(MaxTripCount, A.Width.getFixedValue());
- auto RTCostB = CostB * divideCeil(MaxTripCount, B.Width.getFixedValue());
+ unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(OrigLoop);
+
+ if (!A.Width.isScalable() && !B.Width.isScalable() && MaxTripCount) {
+ // If the trip count is a known (possibly small) constant, the trip count
+ // will be rounded up to an integer number of iterations under
+ // FoldTailByMasking. The total cost in that case will be
+ // VecCost*ceil(TripCount/VF). When not folding the tail, the total
+ // cost will be VecCost*floor(TC/VF) + ScalarCost*(TC%VF). There will be
+ // some extra overheads, but for the purpose of comparing the costs of
+ // different VFs we can use this to compare the total loop-body cost
+ // expected after vectorization.
+ auto GetCostForTC = [MaxTripCount, this](unsigned VF,
+ InstructionCost VectorCost,
+ InstructionCost ScalarCost) {
+ return CM.foldTailByMasking() ? VectorCost * divideCeil(MaxTripCount, VF)
+ : VectorCost * (MaxTripCount / VF) +
+ ScalarCost * (MaxTripCount % VF);
+ };
+ auto RTCostA = GetCostForTC(A.Width.getFixedValue(), CostA, A.ScalarCost);
+ auto RTCostB = GetCostForTC(B.Width.getFixedValue(), CostB, B.ScalarCost);
+
return RTCostA < RTCostB;
}
// Improve estimate for the vector width if it is scalable.
unsigned EstimatedWidthA = A.Width.getKnownMinValue();
unsigned EstimatedWidthB = B.Width.getKnownMinValue();
- if (std::optional<unsigned> VScale = getVScaleForTuning()) {
+ if (std::optional<unsigned> VScale = getVScaleForTuning(OrigLoop, TTI)) {
if (A.Width.isScalable())
EstimatedWidthA *= *VScale;
if (B.Width.isScalable())
@@ -5328,9 +5390,74 @@ bool LoopVectorizationCostModel::isMoreProfitable(
return (CostA * EstimatedWidthB) < (CostB * EstimatedWidthA);
}
-VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
+static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
+ OptimizationRemarkEmitter *ORE,
+ Loop *TheLoop) {
+ if (InvalidCosts.empty())
+ return;
+
+ // Emit a report of VFs with invalid costs in the loop.
+
+ // Group the remarks per instruction, keeping the instruction order from
+ // InvalidCosts.
+ std::map<Instruction *, unsigned> Numbering;
+ unsigned I = 0;
+ for (auto &Pair : InvalidCosts)
+ if (!Numbering.count(Pair.first))
+ Numbering[Pair.first] = I++;
+
+ // Sort the list, first on instruction(number) then on VF.
+ sort(InvalidCosts, [&Numbering](InstructionVFPair &A, InstructionVFPair &B) {
+ if (Numbering[A.first] != Numbering[B.first])
+ return Numbering[A.first] < Numbering[B.first];
+ ElementCountComparator ECC;
+ return ECC(A.second, B.second);
+ });
+
+ // For a list of ordered instruction-vf pairs:
+ // [(load, vf1), (load, vf2), (store, vf1)]
+ // Group the instructions together to emit separate remarks for:
+ // load (vf1, vf2)
+ // store (vf1)
+ auto Tail = ArrayRef<InstructionVFPair>(InvalidCosts);
+ auto Subset = ArrayRef<InstructionVFPair>();
+ do {
+ if (Subset.empty())
+ Subset = Tail.take_front(1);
+
+ Instruction *I = Subset.front().first;
+
+ // If the next instruction is different, or if there are no other pairs,
+ // emit a remark for the collated subset. e.g.
+ // [(load, vf1), (load, vf2))]
+ // to emit:
+ // remark: invalid costs for 'load' at VF=(vf, vf2)
+ if (Subset == Tail || Tail[Subset.size()].first != I) {
+ std::string OutString;
+ raw_string_ostream OS(OutString);
+ assert(!Subset.empty() && "Unexpected empty range");
+ OS << "Instruction with invalid costs prevented vectorization at VF=(";
+ for (const auto &Pair : Subset)
+ OS << (Pair.second == Subset.front().second ? "" : ", ") << Pair.second;
+ OS << "):";
+ if (auto *CI = dyn_cast<CallInst>(I))
+ OS << " call to " << CI->getCalledFunction()->getName();
+ else
+ OS << " " << I->getOpcodeName();
+ OS.flush();
+ reportVectorizationInfo(OutString, "InvalidCost", ORE, TheLoop, I);
+ Tail = Tail.drop_front(Subset.size());
+ Subset = {};
+ } else
+ // Grow the subset by one element
+ Subset = Tail.take_front(Subset.size() + 1);
+ } while (!Tail.empty());
+}
+
+VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor(
const ElementCountSet &VFCandidates) {
- InstructionCost ExpectedCost = expectedCost(ElementCount::getFixed(1)).first;
+ InstructionCost ExpectedCost =
+ CM.expectedCost(ElementCount::getFixed(1)).first;
LLVM_DEBUG(dbgs() << "LV: Scalar loop costs: " << ExpectedCost << ".\n");
assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop");
assert(VFCandidates.count(ElementCount::getFixed(1)) &&
@@ -5340,7 +5467,7 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
ExpectedCost);
VectorizationFactor ChosenFactor = ScalarCost;
- bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;
+ bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled;
if (ForceVectorization && VFCandidates.size() > 1) {
// Ignore scalar width, because the user explicitly wants vectorization.
// Initialize cost to max so that VF = 2 is, at least, chosen during cost
@@ -5354,12 +5481,13 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
if (i.isScalar())
continue;
- VectorizationCostTy C = expectedCost(i, &InvalidCosts);
+ LoopVectorizationCostModel::VectorizationCostTy C =
+ CM.expectedCost(i, &InvalidCosts);
VectorizationFactor Candidate(i, C.first, ScalarCost.ScalarCost);
#ifndef NDEBUG
unsigned AssumedMinimumVscale = 1;
- if (std::optional<unsigned> VScale = getVScaleForTuning())
+ if (std::optional<unsigned> VScale = getVScaleForTuning(OrigLoop, TTI))
AssumedMinimumVscale = *VScale;
unsigned Width =
Candidate.Width.isScalable()
@@ -5388,70 +5516,13 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
ChosenFactor = Candidate;
}
- // Emit a report of VFs with invalid costs in the loop.
- if (!InvalidCosts.empty()) {
- // Group the remarks per instruction, keeping the instruction order from
- // InvalidCosts.
- std::map<Instruction *, unsigned> Numbering;
- unsigned I = 0;
- for (auto &Pair : InvalidCosts)
- if (!Numbering.count(Pair.first))
- Numbering[Pair.first] = I++;
-
- // Sort the list, first on instruction(number) then on VF.
- llvm::sort(InvalidCosts,
- [&Numbering](InstructionVFPair &A, InstructionVFPair &B) {
- if (Numbering[A.first] != Numbering[B.first])
- return Numbering[A.first] < Numbering[B.first];
- ElementCountComparator ECC;
- return ECC(A.second, B.second);
- });
-
- // For a list of ordered instruction-vf pairs:
- // [(load, vf1), (load, vf2), (store, vf1)]
- // Group the instructions together to emit separate remarks for:
- // load (vf1, vf2)
- // store (vf1)
- auto Tail = ArrayRef<InstructionVFPair>(InvalidCosts);
- auto Subset = ArrayRef<InstructionVFPair>();
- do {
- if (Subset.empty())
- Subset = Tail.take_front(1);
-
- Instruction *I = Subset.front().first;
-
- // If the next instruction is different, or if there are no other pairs,
- // emit a remark for the collated subset. e.g.
- // [(load, vf1), (load, vf2))]
- // to emit:
- // remark: invalid costs for 'load' at VF=(vf, vf2)
- if (Subset == Tail || Tail[Subset.size()].first != I) {
- std::string OutString;
- raw_string_ostream OS(OutString);
- assert(!Subset.empty() && "Unexpected empty range");
- OS << "Instruction with invalid costs prevented vectorization at VF=(";
- for (const auto &Pair : Subset)
- OS << (Pair.second == Subset.front().second ? "" : ", ")
- << Pair.second;
- OS << "):";
- if (auto *CI = dyn_cast<CallInst>(I))
- OS << " call to " << CI->getCalledFunction()->getName();
- else
- OS << " " << I->getOpcodeName();
- OS.flush();
- reportVectorizationInfo(OutString, "InvalidCost", ORE, TheLoop, I);
- Tail = Tail.drop_front(Subset.size());
- Subset = {};
- } else
- // Grow the subset by one element
- Subset = Tail.take_front(Subset.size() + 1);
- } while (!Tail.empty());
- }
+ emitInvalidCostRemarks(InvalidCosts, ORE, OrigLoop);
- if (!EnableCondStoresVectorization && NumPredStores) {
- reportVectorizationFailure("There are conditional stores.",
+ if (!EnableCondStoresVectorization && CM.hasPredStores()) {
+ reportVectorizationFailure(
+ "There are conditional stores.",
"store that is conditionally executed prevents vectorization",
- "ConditionalStore", ORE, TheLoop);
+ "ConditionalStore", ORE, OrigLoop);
ChosenFactor = ScalarCost;
}
@@ -5463,11 +5534,11 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
return ChosenFactor;
}
-bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization(
- const Loop &L, ElementCount VF) const {
+bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
+ ElementCount VF) const {
// Cross iteration phis such as reductions need special handling and are
// currently unsupported.
- if (any_of(L.getHeader()->phis(),
+ if (any_of(OrigLoop->getHeader()->phis(),
[&](PHINode &Phi) { return Legal->isFixedOrderRecurrence(&Phi); }))
return false;
@@ -5475,20 +5546,21 @@ bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization(
// currently unsupported.
for (const auto &Entry : Legal->getInductionVars()) {
// Look for uses of the value of the induction at the last iteration.
- Value *PostInc = Entry.first->getIncomingValueForBlock(L.getLoopLatch());
+ Value *PostInc =
+ Entry.first->getIncomingValueForBlock(OrigLoop->getLoopLatch());
for (User *U : PostInc->users())
- if (!L.contains(cast<Instruction>(U)))
+ if (!OrigLoop->contains(cast<Instruction>(U)))
return false;
// Look for uses of penultimate value of the induction.
for (User *U : Entry.first->users())
- if (!L.contains(cast<Instruction>(U)))
+ if (!OrigLoop->contains(cast<Instruction>(U)))
return false;
}
// Epilogue vectorization code has not been auditted to ensure it handles
// non-latch exits properly. It may be fine, but it needs auditted and
// tested.
- if (L.getExitingBlock() != L.getLoopLatch())
+ if (OrigLoop->getExitingBlock() != OrigLoop->getLoopLatch())
return false;
return true;
@@ -5507,62 +5579,59 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
// We also consider epilogue vectorization unprofitable for targets that don't
// consider interleaving beneficial (eg. MVE).
- if (TTI.getMaxInterleaveFactor(VF.getKnownMinValue()) <= 1)
+ if (TTI.getMaxInterleaveFactor(VF) <= 1)
return false;
- // FIXME: We should consider changing the threshold for scalable
- // vectors to take VScaleForTuning into account.
- if (VF.getKnownMinValue() >= EpilogueVectorizationMinVF)
+
+ unsigned Multiplier = 1;
+ if (VF.isScalable())
+ Multiplier = getVScaleForTuning(TheLoop, TTI).value_or(1);
+ if ((Multiplier * VF.getKnownMinValue()) >= EpilogueVectorizationMinVF)
return true;
return false;
}
-VectorizationFactor
-LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
- const ElementCount MainLoopVF, const LoopVectorizationPlanner &LVP) {
+VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
+ const ElementCount MainLoopVF, unsigned IC) {
VectorizationFactor Result = VectorizationFactor::Disabled();
if (!EnableEpilogueVectorization) {
- LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is disabled.\n";);
+ LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is disabled.\n");
return Result;
}
- if (!isScalarEpilogueAllowed()) {
- LLVM_DEBUG(
- dbgs() << "LEV: Unable to vectorize epilogue because no epilogue is "
- "allowed.\n";);
+ if (!CM.isScalarEpilogueAllowed()) {
+ LLVM_DEBUG(dbgs() << "LEV: Unable to vectorize epilogue because no "
+ "epilogue is allowed.\n");
return Result;
}
// Not really a cost consideration, but check for unsupported cases here to
// simplify the logic.
- if (!isCandidateForEpilogueVectorization(*TheLoop, MainLoopVF)) {
- LLVM_DEBUG(
- dbgs() << "LEV: Unable to vectorize epilogue because the loop is "
- "not a supported candidate.\n";);
+ if (!isCandidateForEpilogueVectorization(MainLoopVF)) {
+ LLVM_DEBUG(dbgs() << "LEV: Unable to vectorize epilogue because the loop "
+ "is not a supported candidate.\n");
return Result;
}
if (EpilogueVectorizationForceVF > 1) {
- LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n";);
+ LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n");
ElementCount ForcedEC = ElementCount::getFixed(EpilogueVectorizationForceVF);
- if (LVP.hasPlanWithVF(ForcedEC))
+ if (hasPlanWithVF(ForcedEC))
return {ForcedEC, 0, 0};
else {
- LLVM_DEBUG(
- dbgs()
- << "LEV: Epilogue vectorization forced factor is not viable.\n";);
+ LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization forced factor is not "
+ "viable.\n");
return Result;
}
}
- if (TheLoop->getHeader()->getParent()->hasOptSize() ||
- TheLoop->getHeader()->getParent()->hasMinSize()) {
+ if (OrigLoop->getHeader()->getParent()->hasOptSize() ||
+ OrigLoop->getHeader()->getParent()->hasMinSize()) {
LLVM_DEBUG(
- dbgs()
- << "LEV: Epilogue vectorization skipped due to opt for size.\n";);
+ dbgs() << "LEV: Epilogue vectorization skipped due to opt for size.\n");
return Result;
}
- if (!isEpilogueVectorizationProfitable(MainLoopVF)) {
+ if (!CM.isEpilogueVectorizationProfitable(MainLoopVF)) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is not profitable for "
"this loop\n");
return Result;
@@ -5574,21 +5643,48 @@ LoopVectorizationCostModel::selectEpilogueVectorizationFactor(
ElementCount EstimatedRuntimeVF = MainLoopVF;
if (MainLoopVF.isScalable()) {
EstimatedRuntimeVF = ElementCount::getFixed(MainLoopVF.getKnownMinValue());
- if (std::optional<unsigned> VScale = getVScaleForTuning())
+ if (std::optional<unsigned> VScale = getVScaleForTuning(OrigLoop, TTI))
EstimatedRuntimeVF *= *VScale;
}
- for (auto &NextVF : ProfitableVFs)
- if (((!NextVF.Width.isScalable() && MainLoopVF.isScalable() &&
- ElementCount::isKnownLT(NextVF.Width, EstimatedRuntimeVF)) ||
- ElementCount::isKnownLT(NextVF.Width, MainLoopVF)) &&
- (Result.Width.isScalar() || isMoreProfitable(NextVF, Result)) &&
- LVP.hasPlanWithVF(NextVF.Width))
+ ScalarEvolution &SE = *PSE.getSE();
+ Type *TCType = Legal->getWidestInductionType();
+ const SCEV *RemainingIterations = nullptr;
+ for (auto &NextVF : ProfitableVFs) {
+ // Skip candidate VFs without a corresponding VPlan.
+ if (!hasPlanWithVF(NextVF.Width))
+ continue;
+
+ // Skip candidate VFs with widths >= the estimate runtime VF (scalable
+ // vectors) or the VF of the main loop (fixed vectors).
+ if ((!NextVF.Width.isScalable() && MainLoopVF.isScalable() &&
+ ElementCount::isKnownGE(NextVF.Width, EstimatedRuntimeVF)) ||
+ ElementCount::isKnownGE(NextVF.Width, MainLoopVF))
+ continue;
+
+ // If NextVF is greater than the number of remaining iterations, the
+ // epilogue loop would be dead. Skip such factors.
+ if (!MainLoopVF.isScalable() && !NextVF.Width.isScalable()) {
+ // TODO: extend to support scalable VFs.
+ if (!RemainingIterations) {
+ const SCEV *TC = createTripCountSCEV(TCType, PSE, OrigLoop);
+ RemainingIterations = SE.getURemExpr(
+ TC, SE.getConstant(TCType, MainLoopVF.getKnownMinValue() * IC));
+ }
+ if (SE.isKnownPredicate(
+ CmpInst::ICMP_UGT,
+ SE.getConstant(TCType, NextVF.Width.getKnownMinValue()),
+ RemainingIterations))
+ continue;
+ }
+
+ if (Result.Width.isScalar() || isMoreProfitable(NextVF, Result))
Result = NextVF;
+ }
if (Result != VectorizationFactor::Disabled())
LLVM_DEBUG(dbgs() << "LEV: Vectorizing epilogue loop with VF = "
- << Result.Width << "\n";);
+ << Result.Width << "\n");
return Result;
}
@@ -5688,7 +5784,7 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
return 1;
// We used the distance for the interleave count.
- if (Legal->getMaxSafeDepDistBytes() != -1U)
+ if (!Legal->isSafeForAnyVectorWidth())
return 1;
auto BestKnownTC = getSmallBestKnownTC(*PSE.getSE(), TheLoop);
@@ -5750,20 +5846,19 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end())
LoopInvariantRegs = R.LoopInvariantRegs[pair.first];
- unsigned TmpIC = PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers);
+ unsigned TmpIC = llvm::bit_floor((TargetNumRegisters - LoopInvariantRegs) /
+ MaxLocalUsers);
// Don't count the induction variable as interleaved.
if (EnableIndVarRegisterHeur) {
- TmpIC =
- PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs - 1) /
- std::max(1U, (MaxLocalUsers - 1)));
+ TmpIC = llvm::bit_floor((TargetNumRegisters - LoopInvariantRegs - 1) /
+ std::max(1U, (MaxLocalUsers - 1)));
}
IC = std::min(IC, TmpIC);
}
// Clamp the interleave ranges to reasonable counts.
- unsigned MaxInterleaveCount =
- TTI.getMaxInterleaveFactor(VF.getKnownMinValue());
+ unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF);
// Check if the user has overridden the max.
if (VF.isScalar()) {
@@ -5834,8 +5929,8 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
// We assume that the cost overhead is 1 and we use the cost model
// to estimate the cost of the loop and interleave until the cost of the
// loop overhead is about 5% of the cost of the loop.
- unsigned SmallIC = std::min(
- IC, (unsigned)PowerOf2Floor(SmallLoopCost / *LoopCost.getValue()));
+ unsigned SmallIC = std::min(IC, (unsigned)llvm::bit_floor<uint64_t>(
+ SmallLoopCost / *LoopCost.getValue()));
// Interleave until store/load ports (estimated by max interleave count) are
// saturated.
@@ -5953,7 +6048,7 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
// Saves the list of values that are used in the loop but are defined outside
// the loop (not including non-instruction values such as arguments and
// constants).
- SmallPtrSet<Value *, 8> LoopInvariants;
+ SmallSetVector<Instruction *, 8> LoopInvariants;
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
for (Instruction &I : BB->instructionsWithoutDebug()) {
@@ -6079,11 +6174,16 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
for (auto *Inst : LoopInvariants) {
// FIXME: The target might use more than one register for the type
// even in the scalar case.
- unsigned Usage =
- VFs[i].isScalar() ? 1 : GetRegUsage(Inst->getType(), VFs[i]);
+ bool IsScalar = all_of(Inst->users(), [&](User *U) {
+ auto *I = cast<Instruction>(U);
+ return TheLoop != LI->getLoopFor(I->getParent()) ||
+ isScalarAfterVectorization(I, VFs[i]);
+ });
+
+ ElementCount VF = IsScalar ? ElementCount::getFixed(1) : VFs[i];
unsigned ClassID =
- TTI.getRegisterClassForType(VFs[i].isVector(), Inst->getType());
- Invariant[ClassID] += Usage;
+ TTI.getRegisterClassForType(VF.isVector(), Inst->getType());
+ Invariant[ClassID] += GetRegUsage(Inst->getType(), VF);
}
LLVM_DEBUG({
@@ -6134,8 +6234,7 @@ void LoopVectorizationCostModel::collectInstsToScalarize(ElementCount VF) {
// instructions to scalarize, there's nothing to do. Collection may already
// have occurred if we have a user-selected VF and are now computing the
// expected cost for interleaving.
- if (VF.isScalar() || VF.isZero() ||
- InstsToScalarize.find(VF) != InstsToScalarize.end())
+ if (VF.isScalar() || VF.isZero() || InstsToScalarize.contains(VF))
return;
// Initialize a mapping for VF in InstsToScalalarize. If we find that it's
@@ -6224,7 +6323,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
Instruction *I = Worklist.pop_back_val();
// If we've already analyzed the instruction, there's nothing to do.
- if (ScalarCosts.find(I) != ScalarCosts.end())
+ if (ScalarCosts.contains(I))
continue;
// Compute the cost of the vector instruction. Note that this cost already
@@ -6362,11 +6461,6 @@ static const SCEV *getAddressAccessSCEV(
return PSE.getSCEV(Ptr);
}
-static bool isStrideMul(Instruction *I, LoopVectorizationLegality *Legal) {
- return Legal->hasStride(I->getOperand(0)) ||
- Legal->hasStride(I->getOperand(1));
-}
-
InstructionCost
LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
ElementCount VF) {
@@ -6460,7 +6554,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
InstructionCost
LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
ElementCount VF) {
- assert(Legal->isUniformMemOp(*I));
+ assert(Legal->isUniformMemOp(*I, VF));
Type *ValTy = getLoadStoreType(I);
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
@@ -6475,7 +6569,7 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
}
StoreInst *SI = cast<StoreInst>(I);
- bool isLoopInvariantStoreValue = Legal->isUniform(SI->getValueOperand());
+ bool isLoopInvariantStoreValue = Legal->isInvariant(SI->getValueOperand());
return TTI.getAddressComputationCost(ValTy) +
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
CostKind) +
@@ -6502,11 +6596,6 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
InstructionCost
LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
ElementCount VF) {
- // TODO: Once we have support for interleaving with scalable vectors
- // we can calculate the cost properly here.
- if (VF.isScalable())
- return InstructionCost::getInvalid();
-
Type *ValTy = getLoadStoreType(I);
auto *VectorTy = cast<VectorType>(ToVectorTy(ValTy, VF));
unsigned AS = getLoadStoreAddressSpace(I);
@@ -6836,7 +6925,7 @@ void LoopVectorizationCostModel::setCostBasedWideningDecision(ElementCount VF) {
if (isa<StoreInst>(&I) && isScalarWithPredication(&I, VF))
NumPredStores++;
- if (Legal->isUniformMemOp(I)) {
+ if (Legal->isUniformMemOp(I, VF)) {
auto isLegalToScalarize = [&]() {
if (!VF.isScalable())
// Scalarization of fixed length vectors "just works".
@@ -7134,8 +7223,12 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- // Since we will replace the stride by 1 the multiplication should go away.
- if (I->getOpcode() == Instruction::Mul && isStrideMul(I, Legal))
+ // If we're speculating on the stride being 1, the multiplication may
+ // fold away. We can generalize this for all operations using the notion
+ // of neutral elements. (TODO)
+ if (I->getOpcode() == Instruction::Mul &&
+ (PSE.getSCEV(I->getOperand(0))->isOne() ||
+ PSE.getSCEV(I->getOperand(1))->isOne()))
return 0;
// Detect reduction patterns
@@ -7146,7 +7239,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
// second vector operand. One example of this are shifts on x86.
Value *Op2 = I->getOperand(1);
auto Op2Info = TTI.getOperandInfo(Op2);
- if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue && Legal->isUniform(Op2))
+ if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
+ Legal->isInvariant(Op2))
Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
SmallVector<const Value *, 4> Operands(I->operand_values());
@@ -7304,7 +7398,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
VectorTy =
largestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
} else if (Opcode == Instruction::ZExt || Opcode == Instruction::SExt) {
- SrcVecTy = largestIntegerVectorType(SrcVecTy, MinVecTy);
+ // Leave SrcVecTy unchanged - we only shrink the destination element
+ // type.
VectorTy =
smallestIntegerVectorType(ToVectorTy(I->getType(), VF), MinVecTy);
}
@@ -7316,9 +7411,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
if (RecurrenceDescriptor::isFMulAddIntrinsic(I))
if (auto RedCost = getReductionPatternCost(I, VF, VectorTy, CostKind))
return *RedCost;
- bool NeedToScalarize;
+ Function *Variant;
CallInst *CI = cast<CallInst>(I);
- InstructionCost CallCost = getVectorCallCost(CI, VF, NeedToScalarize);
+ InstructionCost CallCost = getVectorCallCost(CI, VF, &Variant);
if (getVectorIntrinsicIDForCall(CI, TLI)) {
InstructionCost IntrinsicCost = getVectorIntrinsicCost(CI, VF);
return std::min(CallCost, IntrinsicCost);
@@ -7339,37 +7434,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
} // end of switch.
}
-char LoopVectorize::ID = 0;
-
-static const char lv_name[] = "Loop Vectorization";
-
-INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
-INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(InjectTLIMappingsLegacy)
-INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
-
-namespace llvm {
-
-Pass *createLoopVectorizePass() { return new LoopVectorize(); }
-
-Pass *createLoopVectorizePass(bool InterleaveOnlyWhenForced,
- bool VectorizeOnlyWhenForced) {
- return new LoopVectorize(InterleaveOnlyWhenForced, VectorizeOnlyWhenForced);
-}
-
-} // end namespace llvm
-
void LoopVectorizationCostModel::collectValuesToIgnore() {
// Ignore ephemeral values.
CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
@@ -7462,7 +7526,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
// reasonable one.
if (UserVF.isZero()) {
VF = ElementCount::getFixed(determineVPlanVF(
- TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
+ TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
.getFixedValue(),
CM));
LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n");
@@ -7497,13 +7561,16 @@ LoopVectorizationPlanner::planInVPlanNativePath(ElementCount UserVF) {
std::optional<VectorizationFactor>
LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
assert(OrigLoop->isInnermost() && "Inner loop expected.");
+ CM.collectValuesToIgnore();
+ CM.collectElementTypesForWidening();
+
FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC);
if (!MaxFactors) // Cases that should not to be vectorized nor interleaved.
return std::nullopt;
// Invalidate interleave groups if all blocks of loop will be predicated.
if (CM.blockNeedsPredicationForAnyReason(OrigLoop->getHeader()) &&
- !useMaskedInterleavedAccesses(*TTI)) {
+ !useMaskedInterleavedAccesses(TTI)) {
LLVM_DEBUG(
dbgs()
<< "LV: Invalidate all interleaved groups due to fold-tail by masking "
@@ -7527,6 +7594,12 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
CM.collectInLoopReductions();
buildVPlansWithVPRecipes(UserVF, UserVF);
+ if (!hasPlanWithVF(UserVF)) {
+ LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << UserVF
+ << ".\n");
+ return std::nullopt;
+ }
+
LLVM_DEBUG(printPlans(dbgs()));
return {{UserVF, 0, 0}};
} else
@@ -7562,8 +7635,13 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
return VectorizationFactor::Disabled();
// Select the optimal vectorization factor.
- VectorizationFactor VF = CM.selectVectorizationFactor(VFCandidates);
+ VectorizationFactor VF = selectVectorizationFactor(VFCandidates);
assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero.");
+ if (!hasPlanWithVF(VF.Width)) {
+ LLVM_DEBUG(dbgs() << "LV: No VPlan could be built for " << VF.Width
+ << ".\n");
+ return std::nullopt;
+ }
return VF;
}
@@ -7614,43 +7692,51 @@ static void AddRuntimeUnrollDisableMetaData(Loop *L) {
}
}
-void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
- VPlan &BestVPlan,
- InnerLoopVectorizer &ILV,
- DominatorTree *DT,
- bool IsEpilogueVectorization) {
+SCEV2ValueTy LoopVectorizationPlanner::executePlan(
+ ElementCount BestVF, unsigned BestUF, VPlan &BestVPlan,
+ InnerLoopVectorizer &ILV, DominatorTree *DT, bool IsEpilogueVectorization,
+ DenseMap<const SCEV *, Value *> *ExpandedSCEVs) {
assert(BestVPlan.hasVF(BestVF) &&
"Trying to execute plan with unsupported VF");
assert(BestVPlan.hasUF(BestUF) &&
"Trying to execute plan with unsupported UF");
+ assert(
+ (IsEpilogueVectorization || !ExpandedSCEVs) &&
+ "expanded SCEVs to reuse can only be used during epilogue vectorization");
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << BestVF << ", UF=" << BestUF
<< '\n');
- // Workaround! Compute the trip count of the original loop and cache it
- // before we start modifying the CFG. This code has a systemic problem
- // wherein it tries to run analysis over partially constructed IR; this is
- // wrong, and not simply for SCEV. The trip count of the original loop
- // simply happens to be prone to hitting this in practice. In theory, we
- // can hit the same issue for any SCEV, or ValueTracking query done during
- // mutation. See PR49900.
- ILV.getOrCreateTripCount(OrigLoop->getLoopPreheader());
-
if (!IsEpilogueVectorization)
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
// Perform the actual loop transformation.
+ VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan};
+
+ // 0. Generate SCEV-dependent code into the preheader, including TripCount,
+ // before making any changes to the CFG.
+ if (!BestVPlan.getPreheader()->empty()) {
+ State.CFG.PrevBB = OrigLoop->getLoopPreheader();
+ State.Builder.SetInsertPoint(OrigLoop->getLoopPreheader()->getTerminator());
+ BestVPlan.getPreheader()->execute(&State);
+ }
+ if (!ILV.getTripCount())
+ ILV.setTripCount(State.get(BestVPlan.getTripCount(), {0, 0}));
+ else
+ assert(IsEpilogueVectorization && "should only re-use the existing trip "
+ "count during epilogue vectorization");
// 1. Set up the skeleton for vectorization, including vector pre-header and
// middle block. The vector loop is created during VPlan execution.
- VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan};
Value *CanonicalIVStartValue;
std::tie(State.CFG.PrevBB, CanonicalIVStartValue) =
- ILV.createVectorizedLoopSkeleton();
+ ILV.createVectorizedLoopSkeleton(ExpandedSCEVs ? *ExpandedSCEVs
+ : State.ExpandedSCEVs);
// Only use noalias metadata when using memory checks guaranteeing no overlap
// across all iterations.
const LoopAccessInfo *LAI = ILV.Legal->getLAI();
+ std::unique_ptr<LoopVersioning> LVer = nullptr;
if (LAI && !LAI->getRuntimePointerChecking()->getChecks().empty() &&
!LAI->getRuntimePointerChecking()->getDiffChecks()) {
@@ -7658,9 +7744,10 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
// still use it to add the noalias metadata.
// TODO: Find a better way to re-use LoopVersioning functionality to add
// metadata.
- State.LVer = std::make_unique<LoopVersioning>(
+ LVer = std::make_unique<LoopVersioning>(
*LAI, LAI->getRuntimePointerChecking()->getChecks(), OrigLoop, LI, DT,
PSE.getSE());
+ State.LVer = &*LVer;
State.LVer->prepareNoAliasMetadata();
}
@@ -7677,10 +7764,9 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
//===------------------------------------------------===//
// 2. Copy and widen instructions from the old loop into the new loop.
- BestVPlan.prepareToExecute(ILV.getOrCreateTripCount(nullptr),
- ILV.getOrCreateVectorTripCount(nullptr),
- CanonicalIVStartValue, State,
- IsEpilogueVectorization);
+ BestVPlan.prepareToExecute(
+ ILV.getTripCount(), ILV.getOrCreateVectorTripCount(nullptr),
+ CanonicalIVStartValue, State, IsEpilogueVectorization);
BestVPlan.execute(&State);
@@ -7706,13 +7792,18 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
LoopVectorizeHints Hints(L, true, *ORE);
Hints.setAlreadyVectorized();
}
- AddRuntimeUnrollDisableMetaData(L);
+ TargetTransformInfo::UnrollingPreferences UP;
+ TTI.getUnrollingPreferences(L, *PSE.getSE(), UP, ORE);
+ if (!UP.UnrollVectorizedLoop || CanonicalIVStartValue)
+ AddRuntimeUnrollDisableMetaData(L);
// 3. Fix the vectorized code: take care of header phi's, live-outs,
// predication, updating analyses.
ILV.fixVectorizedLoop(State, BestVPlan);
ILV.printDebugTracesAtEnd();
+
+ return State.ExpandedSCEVs;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -7725,8 +7816,6 @@ void LoopVectorizationPlanner::printPlans(raw_ostream &O) {
}
#endif
-Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
-
//===--------------------------------------------------------------------===//
// EpilogueVectorizerMainLoop
//===--------------------------------------------------------------------===//
@@ -7734,7 +7823,8 @@ Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
/// This function is partially responsible for generating the control flow
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
std::pair<BasicBlock *, Value *>
-EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
+EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton(
+ const SCEV2ValueTy &ExpandedSCEVs) {
createVectorLoopSkeleton("");
// Generate the code to check the minimum iteration count of the vector
@@ -7795,7 +7885,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
assert(Bypass && "Expected valid bypass basic block.");
ElementCount VFactor = ForEpilogue ? EPI.EpilogueVF : VF;
unsigned UFactor = ForEpilogue ? EPI.EpilogueUF : UF;
- Value *Count = getOrCreateTripCount(LoopVectorPreHeader);
+ Value *Count = getTripCount();
// Reuse existing vector loop preheader for TC checks.
// Note that new preheader block is generated for vector loop.
BasicBlock *const TCCheckBlock = LoopVectorPreHeader;
@@ -7803,8 +7893,10 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
// Generate code to check if the loop's trip count is less than VF * UF of the
// main vector loop.
- auto P = Cost->requiresScalarEpilogue(ForEpilogue ? EPI.EpilogueVF : VF) ?
- ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
+ auto P = Cost->requiresScalarEpilogue(ForEpilogue ? EPI.EpilogueVF.isVector()
+ : VF.isVector())
+ ? ICmpInst::ICMP_ULE
+ : ICmpInst::ICMP_ULT;
Value *CheckMinIters = Builder.CreateICmp(
P, Count, createStepForVF(Builder, Count->getType(), VFactor, UFactor),
@@ -7824,7 +7916,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
// Update dominator for Bypass & LoopExit.
DT->changeImmediateDominator(Bypass, TCCheckBlock);
- if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF))
+ if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector()))
// For loops with multiple exits, there's no edge from the middle block
// to exit blocks (as the epilogue must run) and thus no need to update
// the immediate dominator of the exit blocks.
@@ -7852,7 +7944,8 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
/// This function is partially responsible for generating the control flow
/// depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization.
std::pair<BasicBlock *, Value *>
-EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
+EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
+ const SCEV2ValueTy &ExpandedSCEVs) {
createVectorLoopSkeleton("vec.epilog.");
// Now, compare the remaining count and if there aren't enough iterations to
@@ -7891,7 +7984,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
DT->changeImmediateDominator(LoopScalarPreHeader,
EPI.EpilogueIterationCountCheck);
- if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF))
+ if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector()))
// If there is an epilogue which must run, there's no edge from the
// middle block to exit blocks and thus no need to update the immediate
// dominator of the exit blocks.
@@ -7950,7 +8043,8 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
// check, then the resume value for the induction variable comes from
// the trip count of the main vector loop, hence passing the AdditionalBypass
// argument.
- createInductionResumeValues({VecEpilogueIterationCountCheck,
+ createInductionResumeValues(ExpandedSCEVs,
+ {VecEpilogueIterationCountCheck,
EPI.VectorTripCount} /* AdditionalBypass */);
return {completeLoopSkeleton(), EPResumeVal};
@@ -7972,8 +8066,9 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
// Generate code to check if the loop's trip count is less than VF * UF of the
// vector epilogue loop.
- auto P = Cost->requiresScalarEpilogue(EPI.EpilogueVF) ?
- ICmpInst::ICMP_ULE : ICmpInst::ICMP_ULT;
+ auto P = Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector())
+ ? ICmpInst::ICMP_ULE
+ : ICmpInst::ICMP_ULT;
Value *CheckMinIters =
Builder.CreateICmp(P, Count,
@@ -8008,8 +8103,7 @@ bool LoopVectorizationPlanner::getDecisionAndClampRange(
assert(!Range.isEmpty() && "Trying to test an empty VF range.");
bool PredicateAtRangeStart = Predicate(Range.Start);
- for (ElementCount TmpVF = Range.Start * 2;
- ElementCount::isKnownLT(TmpVF, Range.End); TmpVF *= 2)
+ for (ElementCount TmpVF : VFRange(Range.Start * 2, Range.End))
if (Predicate(TmpVF) != PredicateAtRangeStart) {
Range.End = TmpVF;
break;
@@ -8025,16 +8119,16 @@ bool LoopVectorizationPlanner::getDecisionAndClampRange(
/// buildVPlan().
void LoopVectorizationPlanner::buildVPlans(ElementCount MinVF,
ElementCount MaxVF) {
- auto MaxVFPlusOne = MaxVF.getWithIncrement(1);
- for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) {
- VFRange SubRange = {VF, MaxVFPlusOne};
+ auto MaxVFTimes2 = MaxVF * 2;
+ for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
+ VFRange SubRange = {VF, MaxVFTimes2};
VPlans.push_back(buildVPlan(SubRange));
VF = SubRange.End;
}
}
VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
- VPlanPtr &Plan) {
+ VPlan &Plan) {
assert(is_contained(predecessors(Dst), Src) && "Invalid edge");
// Look for cached value.
@@ -8058,7 +8152,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
if (OrigLoop->isLoopExiting(Src))
return EdgeMaskCache[Edge] = SrcMask;
- VPValue *EdgeMask = Plan->getOrAddVPValue(BI->getCondition());
+ VPValue *EdgeMask = Plan.getVPValueOrAddLiveIn(BI->getCondition());
assert(EdgeMask && "No Edge Mask found for condition");
if (BI->getSuccessor(0) != Dst)
@@ -8069,7 +8163,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
// 'select i1 SrcMask, i1 EdgeMask, i1 false'.
// The select version does not introduce new UB if SrcMask is false and
// EdgeMask is poison. Using 'and' here introduces undefined behavior.
- VPValue *False = Plan->getOrAddVPValue(
+ VPValue *False = Plan.getVPValueOrAddLiveIn(
ConstantInt::getFalse(BI->getCondition()->getType()));
EdgeMask =
Builder.createSelect(SrcMask, EdgeMask, False, BI->getDebugLoc());
@@ -8078,7 +8172,7 @@ VPValue *VPRecipeBuilder::createEdgeMask(BasicBlock *Src, BasicBlock *Dst,
return EdgeMaskCache[Edge] = EdgeMask;
}
-VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
+VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
assert(OrigLoop->contains(BB) && "Block is not a part of a loop");
// Look for cached value.
@@ -8098,29 +8192,28 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
// If we're using the active lane mask for control flow, then we get the
// mask from the active lane mask PHI that is cached in the VPlan.
- PredicationStyle EmitGetActiveLaneMask = CM.TTI.emitGetActiveLaneMask();
- if (EmitGetActiveLaneMask == PredicationStyle::DataAndControlFlow)
- return BlockMaskCache[BB] = Plan->getActiveLaneMaskPhi();
+ TailFoldingStyle TFStyle = CM.getTailFoldingStyle();
+ if (useActiveLaneMaskForControlFlow(TFStyle))
+ return BlockMaskCache[BB] = Plan.getActiveLaneMaskPhi();
// Introduce the early-exit compare IV <= BTC to form header block mask.
// This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
// constructing the desired canonical IV in the header block as its first
// non-phi instructions.
- VPBasicBlock *HeaderVPBB =
- Plan->getVectorLoopRegion()->getEntryBasicBlock();
+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
- auto *IV = new VPWidenCanonicalIVRecipe(Plan->getCanonicalIV());
+ auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
HeaderVPBB->insert(IV, HeaderVPBB->getFirstNonPhi());
VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
- if (EmitGetActiveLaneMask != PredicationStyle::None) {
- VPValue *TC = Plan->getOrCreateTripCount();
+ if (useActiveLaneMask(TFStyle)) {
+ VPValue *TC = Plan.getTripCount();
BlockMask = Builder.createNaryOp(VPInstruction::ActiveLaneMask, {IV, TC},
nullptr, "active.lane.mask");
} else {
- VPValue *BTC = Plan->getOrCreateBackedgeTakenCount();
+ VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
BlockMask = Builder.createNaryOp(VPInstruction::ICmpULE, {IV, BTC});
}
return BlockMaskCache[BB] = BlockMask;
@@ -8168,7 +8261,7 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
VPValue *Mask = nullptr;
if (Legal->isMaskRequired(I))
- Mask = createBlockInMask(I->getParent(), Plan);
+ Mask = createBlockInMask(I->getParent(), *Plan);
// Determine if the pointer operand of the access is either consecutive or
// reverse consecutive.
@@ -8189,22 +8282,11 @@ VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
/// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also
/// insert a recipe to expand the step for the induction recipe.
-static VPWidenIntOrFpInductionRecipe *createWidenInductionRecipes(
- PHINode *Phi, Instruction *PhiOrTrunc, VPValue *Start,
- const InductionDescriptor &IndDesc, LoopVectorizationCostModel &CM,
- VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop, VFRange &Range) {
- // Returns true if an instruction \p I should be scalarized instead of
- // vectorized for the chosen vectorization factor.
- auto ShouldScalarizeInstruction = [&CM](Instruction *I, ElementCount VF) {
- return CM.isScalarAfterVectorization(I, VF) ||
- CM.isProfitableToScalarize(I, VF);
- };
-
- bool NeedsScalarIVOnly = LoopVectorizationPlanner::getDecisionAndClampRange(
- [&](ElementCount VF) {
- return ShouldScalarizeInstruction(PhiOrTrunc, VF);
- },
- Range);
+static VPWidenIntOrFpInductionRecipe *
+createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
+ VPValue *Start, const InductionDescriptor &IndDesc,
+ VPlan &Plan, ScalarEvolution &SE, Loop &OrigLoop,
+ VFRange &Range) {
assert(IndDesc.getStartValue() ==
Phi->getIncomingValueForBlock(OrigLoop.getLoopPreheader()));
assert(SE.isLoopInvariant(IndDesc.getStep(), &OrigLoop) &&
@@ -8213,12 +8295,10 @@ static VPWidenIntOrFpInductionRecipe *createWidenInductionRecipes(
VPValue *Step =
vputils::getOrCreateVPValueForSCEVExpr(Plan, IndDesc.getStep(), SE);
if (auto *TruncI = dyn_cast<TruncInst>(PhiOrTrunc)) {
- return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc, TruncI,
- !NeedsScalarIVOnly);
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc, TruncI);
}
assert(isa<PHINode>(PhiOrTrunc) && "must be a phi node here");
- return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc,
- !NeedsScalarIVOnly);
+ return new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, IndDesc);
}
VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
@@ -8227,14 +8307,13 @@ VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI(
// Check if this is an integer or fp induction. If so, build the recipe that
// produces its scalar and vector values.
if (auto *II = Legal->getIntOrFpInductionDescriptor(Phi))
- return createWidenInductionRecipes(Phi, Phi, Operands[0], *II, CM, Plan,
+ return createWidenInductionRecipes(Phi, Phi, Operands[0], *II, Plan,
*PSE.getSE(), *OrigLoop, Range);
// Check if this is pointer induction. If so, build the recipe for it.
if (auto *II = Legal->getPointerInductionDescriptor(Phi)) {
VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep(),
*PSE.getSE());
- assert(isa<SCEVConstant>(II->getStep()));
return new VPWidenPointerInductionRecipe(
Phi, Operands[0], Step, *II,
LoopVectorizationPlanner::getDecisionAndClampRange(
@@ -8267,9 +8346,9 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
auto *Phi = cast<PHINode>(I->getOperand(0));
const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor(Phi);
- VPValue *Start = Plan.getOrAddVPValue(II.getStartValue());
- return createWidenInductionRecipes(Phi, I, Start, II, CM, Plan,
- *PSE.getSE(), *OrigLoop, Range);
+ VPValue *Start = Plan.getVPValueOrAddLiveIn(II.getStartValue());
+ return createWidenInductionRecipes(Phi, I, Start, II, Plan, *PSE.getSE(),
+ *OrigLoop, Range);
}
return nullptr;
}
@@ -8309,7 +8388,7 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi,
for (unsigned In = 0; In < NumIncoming; In++) {
VPValue *EdgeMask =
- createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), Plan);
+ createEdgeMask(Phi->getIncomingBlock(In), Phi->getParent(), *Plan);
assert((EdgeMask || NumIncoming == 1) &&
"Multiple predecessors with one having a full mask");
OperandsWithMask.push_back(Operands[In]);
@@ -8321,8 +8400,8 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi,
VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
ArrayRef<VPValue *> Operands,
- VFRange &Range) const {
-
+ VFRange &Range,
+ VPlanPtr &Plan) {
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
[this, CI](ElementCount VF) {
return CM.isScalarWithPredication(CI, VF);
@@ -8339,17 +8418,17 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
ID == Intrinsic::experimental_noalias_scope_decl))
return nullptr;
- ArrayRef<VPValue *> Ops = Operands.take_front(CI->arg_size());
+ SmallVector<VPValue *, 4> Ops(Operands.take_front(CI->arg_size()));
// Is it beneficial to perform intrinsic call compared to lib call?
bool ShouldUseVectorIntrinsic =
ID && LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) -> bool {
- bool NeedToScalarize = false;
+ Function *Variant;
// Is it beneficial to perform intrinsic call compared to lib
// call?
InstructionCost CallCost =
- CM.getVectorCallCost(CI, VF, NeedToScalarize);
+ CM.getVectorCallCost(CI, VF, &Variant);
InstructionCost IntrinsicCost =
CM.getVectorIntrinsicCost(CI, VF);
return IntrinsicCost <= CallCost;
@@ -8358,6 +8437,9 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
if (ShouldUseVectorIntrinsic)
return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()), ID);
+ Function *Variant = nullptr;
+ ElementCount VariantVF;
+ bool NeedsMask = false;
// Is better to call a vectorized version of the function than to to scalarize
// the call?
auto ShouldUseVectorCall = LoopVectorizationPlanner::getDecisionAndClampRange(
@@ -8365,14 +8447,57 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
// The following case may be scalarized depending on the VF.
// The flag shows whether we can use a usual Call for vectorized
// version of the instruction.
- bool NeedToScalarize = false;
- CM.getVectorCallCost(CI, VF, NeedToScalarize);
- return !NeedToScalarize;
+
+ // If we've found a variant at a previous VF, then stop looking. A
+ // vectorized variant of a function expects input in a certain shape
+ // -- basically the number of input registers, the number of lanes
+ // per register, and whether there's a mask required.
+ // We store a pointer to the variant in the VPWidenCallRecipe, so
+ // once we have an appropriate variant it's only valid for that VF.
+ // This will force a different vplan to be generated for each VF that
+ // finds a valid variant.
+ if (Variant)
+ return false;
+ CM.getVectorCallCost(CI, VF, &Variant, &NeedsMask);
+ // If we found a valid vector variant at this VF, then store the VF
+ // in case we need to generate a mask.
+ if (Variant)
+ VariantVF = VF;
+ return Variant != nullptr;
},
Range);
- if (ShouldUseVectorCall)
+ if (ShouldUseVectorCall) {
+ if (NeedsMask) {
+ // We have 2 cases that would require a mask:
+ // 1) The block needs to be predicated, either due to a conditional
+ // in the scalar loop or use of an active lane mask with
+ // tail-folding, and we use the appropriate mask for the block.
+ // 2) No mask is required for the block, but the only available
+ // vector variant at this VF requires a mask, so we synthesize an
+ // all-true mask.
+ VPValue *Mask = nullptr;
+ if (Legal->isMaskRequired(CI))
+ Mask = createBlockInMask(CI->getParent(), *Plan);
+ else
+ Mask = Plan->getVPValueOrAddLiveIn(ConstantInt::getTrue(
+ IntegerType::getInt1Ty(Variant->getFunctionType()->getContext())));
+
+ VFShape Shape = VFShape::get(*CI, VariantVF, /*HasGlobalPred=*/true);
+ unsigned MaskPos = 0;
+
+ for (const VFInfo &Info : VFDatabase::getMappings(*CI))
+ if (Info.Shape == Shape) {
+ assert(Info.isMasked() && "Vector function info shape mismatch");
+ MaskPos = Info.getParamIndexForOptionalMask().value();
+ break;
+ }
+
+ Ops.insert(Ops.begin() + MaskPos, Mask);
+ }
+
return new VPWidenCallRecipe(*CI, make_range(Ops.begin(), Ops.end()),
- Intrinsic::not_intrinsic);
+ Intrinsic::not_intrinsic, Variant);
+ }
return nullptr;
}
@@ -8405,9 +8530,9 @@ VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I,
// div/rem operation itself. Otherwise fall through to general handling below.
if (CM.isPredicatedInst(I)) {
SmallVector<VPValue *> Ops(Operands.begin(), Operands.end());
- VPValue *Mask = createBlockInMask(I->getParent(), Plan);
- VPValue *One =
- Plan->getOrAddExternalDef(ConstantInt::get(I->getType(), 1u, false));
+ VPValue *Mask = createBlockInMask(I->getParent(), *Plan);
+ VPValue *One = Plan->getVPValueOrAddLiveIn(
+ ConstantInt::get(I->getType(), 1u, false));
auto *SafeRHS =
new VPInstruction(Instruction::Select, {Mask, Ops[1], One},
I->getDebugLoc());
@@ -8415,38 +8540,26 @@ VPRecipeBase *VPRecipeBuilder::tryToWiden(Instruction *I,
Ops[1] = SafeRHS;
return new VPWidenRecipe(*I, make_range(Ops.begin(), Ops.end()));
}
- LLVM_FALLTHROUGH;
+ [[fallthrough]];
}
case Instruction::Add:
case Instruction::And:
case Instruction::AShr:
- case Instruction::BitCast:
case Instruction::FAdd:
case Instruction::FCmp:
case Instruction::FDiv:
case Instruction::FMul:
case Instruction::FNeg:
- case Instruction::FPExt:
- case Instruction::FPToSI:
- case Instruction::FPToUI:
- case Instruction::FPTrunc:
case Instruction::FRem:
case Instruction::FSub:
case Instruction::ICmp:
- case Instruction::IntToPtr:
case Instruction::LShr:
case Instruction::Mul:
case Instruction::Or:
- case Instruction::PtrToInt:
case Instruction::Select:
- case Instruction::SExt:
case Instruction::Shl:
- case Instruction::SIToFP:
case Instruction::Sub:
- case Instruction::Trunc:
- case Instruction::UIToFP:
case Instruction::Xor:
- case Instruction::ZExt:
case Instruction::Freeze:
return new VPWidenRecipe(*I, make_range(Operands.begin(), Operands.end()));
};
@@ -8462,9 +8575,9 @@ void VPRecipeBuilder::fixHeaderPhis() {
}
}
-VPBasicBlock *VPRecipeBuilder::handleReplication(
- Instruction *I, VFRange &Range, VPBasicBlock *VPBB,
- VPlanPtr &Plan) {
+VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I,
+ VFRange &Range,
+ VPlan &Plan) {
bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
Range);
@@ -8501,83 +8614,22 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
break;
}
}
-
- auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()),
- IsUniform, IsPredicated);
-
- // Find if I uses a predicated instruction. If so, it will use its scalar
- // value. Avoid hoisting the insert-element which packs the scalar value into
- // a vector value, as that happens iff all users use the vector value.
- for (VPValue *Op : Recipe->operands()) {
- auto *PredR =
- dyn_cast_or_null<VPPredInstPHIRecipe>(Op->getDefiningRecipe());
- if (!PredR)
- continue;
- auto *RepR = cast<VPReplicateRecipe>(
- PredR->getOperand(0)->getDefiningRecipe());
- assert(RepR->isPredicated() &&
- "expected Replicate recipe to be predicated");
- RepR->setAlsoPack(false);
- }
-
- // Finalize the recipe for Instr, first if it is not predicated.
+ VPValue *BlockInMask = nullptr;
if (!IsPredicated) {
+ // Finalize the recipe for Instr, first if it is not predicated.
LLVM_DEBUG(dbgs() << "LV: Scalarizing:" << *I << "\n");
- setRecipe(I, Recipe);
- Plan->addVPValue(I, Recipe);
- VPBB->appendRecipe(Recipe);
- return VPBB;
- }
- LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n");
-
- VPBlockBase *SingleSucc = VPBB->getSingleSuccessor();
- assert(SingleSucc && "VPBB must have a single successor when handling "
- "predicated replication.");
- VPBlockUtils::disconnectBlocks(VPBB, SingleSucc);
- // Record predicated instructions for above packing optimizations.
- VPBlockBase *Region = createReplicateRegion(Recipe, Plan);
- VPBlockUtils::insertBlockAfter(Region, VPBB);
- auto *RegSucc = new VPBasicBlock();
- VPBlockUtils::insertBlockAfter(RegSucc, Region);
- VPBlockUtils::connectBlocks(RegSucc, SingleSucc);
- return RegSucc;
-}
-
-VPRegionBlock *
-VPRecipeBuilder::createReplicateRegion(VPReplicateRecipe *PredRecipe,
- VPlanPtr &Plan) {
- Instruction *Instr = PredRecipe->getUnderlyingInstr();
- // Instructions marked for predication are replicated and placed under an
- // if-then construct to prevent side-effects.
- // Generate recipes to compute the block mask for this region.
- VPValue *BlockInMask = createBlockInMask(Instr->getParent(), Plan);
-
- // Build the triangular if-then region.
- std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str();
- assert(Instr->getParent() && "Predicated instruction not in any basic block");
- auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask);
- auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);
- auto *PHIRecipe = Instr->getType()->isVoidTy()
- ? nullptr
- : new VPPredInstPHIRecipe(PredRecipe);
- if (PHIRecipe) {
- setRecipe(Instr, PHIRecipe);
- Plan->addVPValue(Instr, PHIRecipe);
} else {
- setRecipe(Instr, PredRecipe);
- Plan->addVPValue(Instr, PredRecipe);
+ LLVM_DEBUG(dbgs() << "LV: Scalarizing and predicating:" << *I << "\n");
+ // Instructions marked for predication are replicated and a mask operand is
+ // added initially. Masked replicate recipes will later be placed under an
+ // if-then construct to prevent side-effects. Generate recipes to compute
+ // the block mask for this region.
+ BlockInMask = createBlockInMask(I->getParent(), Plan);
}
- auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
- auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", PredRecipe);
- VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true);
-
- // Note: first set Entry as region entry and then connect successors starting
- // from it in order, to propagate the "parent" of each VPBasicBlock.
- VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry);
- VPBlockUtils::connectBlocks(Pred, Exiting);
-
- return Region;
+ auto *Recipe = new VPReplicateRecipe(I, Plan.mapToVPValues(I->operands()),
+ IsUniform, BlockInMask);
+ return toVPRecipeResult(Recipe);
}
VPRecipeOrVPValueTy
@@ -8643,7 +8695,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
return nullptr;
if (auto *CI = dyn_cast<CallInst>(Instr))
- return toVPRecipeResult(tryToWidenCall(CI, Operands, Range));
+ return toVPRecipeResult(tryToWidenCall(CI, Operands, Range, Plan));
if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
return toVPRecipeResult(tryToWidenMemory(Instr, Operands, Range, Plan));
@@ -8653,13 +8705,16 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
if (auto GEP = dyn_cast<GetElementPtrInst>(Instr))
return toVPRecipeResult(new VPWidenGEPRecipe(
- GEP, make_range(Operands.begin(), Operands.end()), OrigLoop));
+ GEP, make_range(Operands.begin(), Operands.end())));
if (auto *SI = dyn_cast<SelectInst>(Instr)) {
- bool InvariantCond =
- PSE.getSE()->isLoopInvariant(PSE.getSCEV(SI->getOperand(0)), OrigLoop);
return toVPRecipeResult(new VPWidenSelectRecipe(
- *SI, make_range(Operands.begin(), Operands.end()), InvariantCond));
+ *SI, make_range(Operands.begin(), Operands.end())));
+ }
+
+ if (auto *CI = dyn_cast<CastInst>(Instr)) {
+ return toVPRecipeResult(
+ new VPWidenCastRecipe(CI->getOpcode(), Operands[0], CI->getType(), CI));
}
return toVPRecipeResult(tryToWiden(Instr, Operands, VPBB, Plan));
@@ -8677,34 +8732,11 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
auto &ConditionalAssumes = Legal->getConditionalAssumes();
DeadInstructions.insert(ConditionalAssumes.begin(), ConditionalAssumes.end());
- MapVector<Instruction *, Instruction *> &SinkAfter = Legal->getSinkAfter();
- // Dead instructions do not need sinking. Remove them from SinkAfter.
- for (Instruction *I : DeadInstructions)
- SinkAfter.erase(I);
-
- // Cannot sink instructions after dead instructions (there won't be any
- // recipes for them). Instead, find the first non-dead previous instruction.
- for (auto &P : Legal->getSinkAfter()) {
- Instruction *SinkTarget = P.second;
- Instruction *FirstInst = &*SinkTarget->getParent()->begin();
- (void)FirstInst;
- while (DeadInstructions.contains(SinkTarget)) {
- assert(
- SinkTarget != FirstInst &&
- "Must find a live instruction (at least the one feeding the "
- "fixed-order recurrence PHI) before reaching beginning of the block");
- SinkTarget = SinkTarget->getPrevNode();
- assert(SinkTarget != P.first &&
- "sink source equals target, no sinking required");
- }
- P.second = SinkTarget;
- }
-
- auto MaxVFPlusOne = MaxVF.getWithIncrement(1);
- for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFPlusOne);) {
- VFRange SubRange = {VF, MaxVFPlusOne};
- VPlans.push_back(
- buildVPlanWithVPRecipes(SubRange, DeadInstructions, SinkAfter));
+ auto MaxVFTimes2 = MaxVF * 2;
+ for (ElementCount VF = MinVF; ElementCount::isKnownLT(VF, MaxVFTimes2);) {
+ VFRange SubRange = {VF, MaxVFTimes2};
+ if (auto Plan = tryToBuildVPlanWithVPRecipes(SubRange, DeadInstructions))
+ VPlans.push_back(std::move(*Plan));
VF = SubRange.End;
}
}
@@ -8712,10 +8744,9 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
// Add the necessary canonical IV and branch recipes required to control the
// loop.
static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
- bool HasNUW,
- bool UseLaneMaskForLoopControlFlow) {
+ TailFoldingStyle Style) {
Value *StartIdx = ConstantInt::get(IdxTy, 0);
- auto *StartV = Plan.getOrAddVPValue(StartIdx);
+ auto *StartV = Plan.getVPValueOrAddLiveIn(StartIdx);
// Add a VPCanonicalIVPHIRecipe starting at 0 to the header.
auto *CanonicalIVPHI = new VPCanonicalIVPHIRecipe(StartV, DL);
@@ -8725,6 +8756,7 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
// Add a CanonicalIVIncrement{NUW} VPInstruction to increment the scalar
// IV by VF * UF.
+ bool HasNUW = Style == TailFoldingStyle::None;
auto *CanonicalIVIncrement =
new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementNUW
: VPInstruction::CanonicalIVIncrement,
@@ -8732,11 +8764,10 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
CanonicalIVPHI->addOperand(CanonicalIVIncrement);
VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
- EB->appendRecipe(CanonicalIVIncrement);
-
- if (UseLaneMaskForLoopControlFlow) {
+ if (useActiveLaneMaskForControlFlow(Style)) {
// Create the active lane mask instruction in the vplan preheader.
- VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
+ VPBasicBlock *VecPreheader =
+ cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSinglePredecessor());
// We can't use StartV directly in the ActiveLaneMask VPInstruction, since
// we have to take unrolling into account. Each part needs to start at
@@ -8745,14 +8776,34 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementForPartNUW
: VPInstruction::CanonicalIVIncrementForPart,
{StartV}, DL, "index.part.next");
- Preheader->appendRecipe(CanonicalIVIncrementParts);
+ VecPreheader->appendRecipe(CanonicalIVIncrementParts);
// Create the ActiveLaneMask instruction using the correct start values.
- VPValue *TC = Plan.getOrCreateTripCount();
+ VPValue *TC = Plan.getTripCount();
+
+ VPValue *TripCount, *IncrementValue;
+ if (Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
+ // When avoiding a runtime check, the active.lane.mask inside the loop
+ // uses a modified trip count and the induction variable increment is
+ // done after the active.lane.mask intrinsic is called.
+ auto *TCMinusVF =
+ new VPInstruction(VPInstruction::CalculateTripCountMinusVF, {TC}, DL);
+ VecPreheader->appendRecipe(TCMinusVF);
+ IncrementValue = CanonicalIVPHI;
+ TripCount = TCMinusVF;
+ } else {
+ // When the loop is guarded by a runtime overflow check for the loop
+ // induction variable increment by VF, we can increment the value before
+ // the get.active.lane mask and use the unmodified tripcount.
+ EB->appendRecipe(CanonicalIVIncrement);
+ IncrementValue = CanonicalIVIncrement;
+ TripCount = TC;
+ }
+
auto *EntryALM = new VPInstruction(VPInstruction::ActiveLaneMask,
{CanonicalIVIncrementParts, TC}, DL,
"active.lane.mask.entry");
- Preheader->appendRecipe(EntryALM);
+ VecPreheader->appendRecipe(EntryALM);
// Now create the ActiveLaneMaskPhi recipe in the main loop using the
// preheader ActiveLaneMask instruction.
@@ -8763,15 +8814,21 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
CanonicalIVIncrementParts =
new VPInstruction(HasNUW ? VPInstruction::CanonicalIVIncrementForPartNUW
: VPInstruction::CanonicalIVIncrementForPart,
- {CanonicalIVIncrement}, DL);
+ {IncrementValue}, DL);
EB->appendRecipe(CanonicalIVIncrementParts);
auto *ALM = new VPInstruction(VPInstruction::ActiveLaneMask,
- {CanonicalIVIncrementParts, TC}, DL,
+ {CanonicalIVIncrementParts, TripCount}, DL,
"active.lane.mask.next");
EB->appendRecipe(ALM);
LaneMaskPhi->addOperand(ALM);
+ if (Style == TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck) {
+ // Do the increment of the canonical IV after the active.lane.mask, because
+ // that value is still based off %CanonicalIVPHI
+ EB->appendRecipe(CanonicalIVIncrement);
+ }
+
// We have to invert the mask here because a true condition means jumping
// to the exit block.
auto *NotMask = new VPInstruction(VPInstruction::Not, ALM, DL);
@@ -8781,6 +8838,8 @@ static void addCanonicalIVRecipes(VPlan &Plan, Type *IdxTy, DebugLoc DL,
new VPInstruction(VPInstruction::BranchOnCond, {NotMask}, DL);
EB->appendRecipe(BranchBack);
} else {
+ EB->appendRecipe(CanonicalIVIncrement);
+
// Add the BranchOnCount VPInstruction to the latch.
VPInstruction *BranchBack = new VPInstruction(
VPInstruction::BranchOnCount,
@@ -8804,14 +8863,13 @@ static void addUsersInExitBlock(VPBasicBlock *HeaderVPBB,
for (PHINode &ExitPhi : ExitBB->phis()) {
Value *IncomingValue =
ExitPhi.getIncomingValueForBlock(ExitingBB);
- VPValue *V = Plan.getOrAddVPValue(IncomingValue, true);
+ VPValue *V = Plan.getVPValueOrAddLiveIn(IncomingValue);
Plan.addLiveOut(&ExitPhi, V);
}
}
-VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
- VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
- const MapVector<Instruction *, Instruction *> &SinkAfter) {
+std::optional<VPlanPtr> LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
+ VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions) {
SmallPtrSet<const InterleaveGroup<Instruction> *, 1> InterleaveGroups;
@@ -8822,12 +8880,6 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// process after constructing the initial VPlan.
// ---------------------------------------------------------------------------
- // Mark instructions we'll need to sink later and their targets as
- // ingredients whose recipe we'll need to record.
- for (const auto &Entry : SinkAfter) {
- RecipeBuilder.recordRecipeOf(Entry.first);
- RecipeBuilder.recordRecipeOf(Entry.second);
- }
for (const auto &Reduction : CM.getInLoopReductionChains()) {
PHINode *Phi = Reduction.first;
RecurKind Kind =
@@ -8852,9 +8904,15 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// single VPInterleaveRecipe.
for (InterleaveGroup<Instruction> *IG : IAI.getInterleaveGroups()) {
auto applyIG = [IG, this](ElementCount VF) -> bool {
- return (VF.isVector() && // Query is illegal for VF == 1
- CM.getWideningDecision(IG->getInsertPos(), VF) ==
- LoopVectorizationCostModel::CM_Interleave);
+ bool Result = (VF.isVector() && // Query is illegal for VF == 1
+ CM.getWideningDecision(IG->getInsertPos(), VF) ==
+ LoopVectorizationCostModel::CM_Interleave);
+ // For scalable vectors, the only interleave factor currently supported
+ // is 2 since we require the (de)interleave2 intrinsics instead of
+ // shufflevectors.
+ assert((!Result || !VF.isScalable() || IG->getFactor() == 2) &&
+ "Unsupported interleave factor for scalable vectors");
+ return Result;
};
if (!getDecisionAndClampRange(applyIG, Range))
continue;
@@ -8869,26 +8927,34 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// visit each basic block after having visited its predecessor basic blocks.
// ---------------------------------------------------------------------------
- // Create initial VPlan skeleton, starting with a block for the pre-header,
- // followed by a region for the vector loop, followed by the middle block. The
- // skeleton vector loop region contains a header and latch block.
- VPBasicBlock *Preheader = new VPBasicBlock("vector.ph");
- auto Plan = std::make_unique<VPlan>(Preheader);
-
+ // Create initial VPlan skeleton, having a basic block for the pre-header
+ // which contains SCEV expansions that need to happen before the CFG is
+ // modified; a basic block for the vector pre-header, followed by a region for
+ // the vector loop, followed by the middle basic block. The skeleton vector
+ // loop region contains a header and latch basic blocks.
+ VPlanPtr Plan = VPlan::createInitialVPlan(
+ createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop),
+ *PSE.getSE());
VPBasicBlock *HeaderVPBB = new VPBasicBlock("vector.body");
VPBasicBlock *LatchVPBB = new VPBasicBlock("vector.latch");
VPBlockUtils::insertBlockAfter(LatchVPBB, HeaderVPBB);
auto *TopRegion = new VPRegionBlock(HeaderVPBB, LatchVPBB, "vector loop");
- VPBlockUtils::insertBlockAfter(TopRegion, Preheader);
+ VPBlockUtils::insertBlockAfter(TopRegion, Plan->getEntry());
VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block");
VPBlockUtils::insertBlockAfter(MiddleVPBB, TopRegion);
+ // Don't use getDecisionAndClampRange here, because we don't know the UF
+ // so this function is better to be conservative, rather than to split
+ // it up into different VPlans.
+ bool IVUpdateMayOverflow = false;
+ for (ElementCount VF : Range)
+ IVUpdateMayOverflow |= !isIndvarOverflowCheckKnownFalse(&CM, VF);
+
Instruction *DLInst =
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(),
DLInst ? DLInst->getDebugLoc() : DebugLoc(),
- !CM.foldTailByMasking(),
- CM.useActiveLaneMaskForControlFlow());
+ CM.getTailFoldingStyle(IVUpdateMayOverflow));
// Scan the body of the loop in a topological order to visit each basic block
// after having visited its predecessor basic blocks.
@@ -8896,18 +8962,16 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
DFS.perform(LI);
VPBasicBlock *VPBB = HeaderVPBB;
- SmallVector<VPWidenIntOrFpInductionRecipe *> InductionsToMove;
for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) {
// Relevant instructions from basic block BB will be grouped into VPRecipe
// ingredients and fill a new VPBasicBlock.
- unsigned VPBBsForBB = 0;
if (VPBB != HeaderVPBB)
VPBB->setName(BB->getName());
Builder.setInsertPoint(VPBB);
// Introduce each ingredient into VPlan.
// TODO: Model and preserve debug intrinsics in VPlan.
- for (Instruction &I : BB->instructionsWithoutDebug()) {
+ for (Instruction &I : BB->instructionsWithoutDebug(false)) {
Instruction *Instr = &I;
// First filter out irrelevant instructions, to ensure no recipes are
@@ -8918,7 +8982,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
SmallVector<VPValue *, 4> Operands;
auto *Phi = dyn_cast<PHINode>(Instr);
if (Phi && Phi->getParent() == OrigLoop->getHeader()) {
- Operands.push_back(Plan->getOrAddVPValue(
+ Operands.push_back(Plan->getVPValueOrAddLiveIn(
Phi->getIncomingValueForBlock(OrigLoop->getLoopPreheader())));
} else {
auto OpRange = Plan->mapToVPValues(Instr->operands());
@@ -8932,50 +8996,36 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
continue;
- if (auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe(
- Instr, Operands, Range, VPBB, Plan)) {
- // If Instr can be simplified to an existing VPValue, use it.
- if (RecipeOrValue.is<VPValue *>()) {
- auto *VPV = RecipeOrValue.get<VPValue *>();
- Plan->addVPValue(Instr, VPV);
- // If the re-used value is a recipe, register the recipe for the
- // instruction, in case the recipe for Instr needs to be recorded.
- if (VPRecipeBase *R = VPV->getDefiningRecipe())
- RecipeBuilder.setRecipe(Instr, R);
- continue;
- }
- // Otherwise, add the new recipe.
- VPRecipeBase *Recipe = RecipeOrValue.get<VPRecipeBase *>();
- for (auto *Def : Recipe->definedValues()) {
- auto *UV = Def->getUnderlyingValue();
- Plan->addVPValue(UV, Def);
- }
-
- if (isa<VPWidenIntOrFpInductionRecipe>(Recipe) &&
- HeaderVPBB->getFirstNonPhi() != VPBB->end()) {
- // Keep track of VPWidenIntOrFpInductionRecipes not in the phi section
- // of the header block. That can happen for truncates of induction
- // variables. Those recipes are moved to the phi section of the header
- // block after applying SinkAfter, which relies on the original
- // position of the trunc.
- assert(isa<TruncInst>(Instr));
- InductionsToMove.push_back(
- cast<VPWidenIntOrFpInductionRecipe>(Recipe));
- }
- RecipeBuilder.setRecipe(Instr, Recipe);
- VPBB->appendRecipe(Recipe);
+ auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe(
+ Instr, Operands, Range, VPBB, Plan);
+ if (!RecipeOrValue)
+ RecipeOrValue = RecipeBuilder.handleReplication(Instr, Range, *Plan);
+ // If Instr can be simplified to an existing VPValue, use it.
+ if (isa<VPValue *>(RecipeOrValue)) {
+ auto *VPV = cast<VPValue *>(RecipeOrValue);
+ Plan->addVPValue(Instr, VPV);
+ // If the re-used value is a recipe, register the recipe for the
+ // instruction, in case the recipe for Instr needs to be recorded.
+ if (VPRecipeBase *R = VPV->getDefiningRecipe())
+ RecipeBuilder.setRecipe(Instr, R);
continue;
}
-
- // Otherwise, if all widening options failed, Instruction is to be
- // replicated. This may create a successor for VPBB.
- VPBasicBlock *NextVPBB =
- RecipeBuilder.handleReplication(Instr, Range, VPBB, Plan);
- if (NextVPBB != VPBB) {
- VPBB = NextVPBB;
- VPBB->setName(BB->hasName() ? BB->getName() + "." + Twine(VPBBsForBB++)
- : "");
+ // Otherwise, add the new recipe.
+ VPRecipeBase *Recipe = cast<VPRecipeBase *>(RecipeOrValue);
+ for (auto *Def : Recipe->definedValues()) {
+ auto *UV = Def->getUnderlyingValue();
+ Plan->addVPValue(UV, Def);
}
+
+ RecipeBuilder.setRecipe(Instr, Recipe);
+ if (isa<VPWidenIntOrFpInductionRecipe>(Recipe) &&
+ HeaderVPBB->getFirstNonPhi() != VPBB->end()) {
+ // Move VPWidenIntOrFpInductionRecipes for optimized truncates to the
+ // phi section of HeaderVPBB.
+ assert(isa<TruncInst>(Instr));
+ Recipe->insertBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi());
+ } else
+ VPBB->appendRecipe(Recipe);
}
VPBlockUtils::insertBlockAfter(new VPBasicBlock(), VPBB);
@@ -8985,7 +9035,12 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// After here, VPBB should not be used.
VPBB = nullptr;
- addUsersInExitBlock(HeaderVPBB, MiddleVPBB, OrigLoop, *Plan);
+ if (CM.requiresScalarEpilogue(Range)) {
+ // No edge from the middle block to the unique exit block has been inserted
+ // and there is nothing to fix from vector loop; phis should have incoming
+ // from scalar loop only.
+ } else
+ addUsersInExitBlock(HeaderVPBB, MiddleVPBB, OrigLoop, *Plan);
assert(isa<VPRegionBlock>(Plan->getVectorLoopRegion()) &&
!Plan->getVectorLoopRegion()->getEntryBasicBlock()->empty() &&
@@ -8998,116 +9053,10 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
// bring the VPlan to its final state.
// ---------------------------------------------------------------------------
- // Apply Sink-After legal constraints.
- auto GetReplicateRegion = [](VPRecipeBase *R) -> VPRegionBlock * {
- auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
- if (Region && Region->isReplicator()) {
- assert(Region->getNumSuccessors() == 1 &&
- Region->getNumPredecessors() == 1 && "Expected SESE region!");
- assert(R->getParent()->size() == 1 &&
- "A recipe in an original replicator region must be the only "
- "recipe in its block");
- return Region;
- }
- return nullptr;
- };
- for (const auto &Entry : SinkAfter) {
- VPRecipeBase *Sink = RecipeBuilder.getRecipe(Entry.first);
- VPRecipeBase *Target = RecipeBuilder.getRecipe(Entry.second);
-
- auto *TargetRegion = GetReplicateRegion(Target);
- auto *SinkRegion = GetReplicateRegion(Sink);
- if (!SinkRegion) {
- // If the sink source is not a replicate region, sink the recipe directly.
- if (TargetRegion) {
- // The target is in a replication region, make sure to move Sink to
- // the block after it, not into the replication region itself.
- VPBasicBlock *NextBlock =
- cast<VPBasicBlock>(TargetRegion->getSuccessors().front());
- Sink->moveBefore(*NextBlock, NextBlock->getFirstNonPhi());
- } else
- Sink->moveAfter(Target);
- continue;
- }
-
- // The sink source is in a replicate region. Unhook the region from the CFG.
- auto *SinkPred = SinkRegion->getSinglePredecessor();
- auto *SinkSucc = SinkRegion->getSingleSuccessor();
- VPBlockUtils::disconnectBlocks(SinkPred, SinkRegion);
- VPBlockUtils::disconnectBlocks(SinkRegion, SinkSucc);
- VPBlockUtils::connectBlocks(SinkPred, SinkSucc);
-
- if (TargetRegion) {
- // The target recipe is also in a replicate region, move the sink region
- // after the target region.
- auto *TargetSucc = TargetRegion->getSingleSuccessor();
- VPBlockUtils::disconnectBlocks(TargetRegion, TargetSucc);
- VPBlockUtils::connectBlocks(TargetRegion, SinkRegion);
- VPBlockUtils::connectBlocks(SinkRegion, TargetSucc);
- } else {
- // The sink source is in a replicate region, we need to move the whole
- // replicate region, which should only contain a single recipe in the
- // main block.
- auto *SplitBlock =
- Target->getParent()->splitAt(std::next(Target->getIterator()));
-
- auto *SplitPred = SplitBlock->getSinglePredecessor();
-
- VPBlockUtils::disconnectBlocks(SplitPred, SplitBlock);
- VPBlockUtils::connectBlocks(SplitPred, SinkRegion);
- VPBlockUtils::connectBlocks(SinkRegion, SplitBlock);
- }
- }
-
- VPlanTransforms::removeRedundantCanonicalIVs(*Plan);
- VPlanTransforms::removeRedundantInductionCasts(*Plan);
-
- // Now that sink-after is done, move induction recipes for optimized truncates
- // to the phi section of the header block.
- for (VPWidenIntOrFpInductionRecipe *Ind : InductionsToMove)
- Ind->moveBefore(*HeaderVPBB, HeaderVPBB->getFirstNonPhi());
-
// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(cast<VPBasicBlock>(TopRegion->getExiting()), Plan,
RecipeBuilder, Range.Start);
- // Introduce a recipe to combine the incoming and previous values of a
- // fixed-order recurrence.
- for (VPRecipeBase &R :
- Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
- auto *RecurPhi = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R);
- if (!RecurPhi)
- continue;
-
- VPRecipeBase *PrevRecipe = &RecurPhi->getBackedgeRecipe();
- // Fixed-order recurrences do not contain cycles, so this loop is guaranteed
- // to terminate.
- while (auto *PrevPhi =
- dyn_cast<VPFirstOrderRecurrencePHIRecipe>(PrevRecipe))
- PrevRecipe = &PrevPhi->getBackedgeRecipe();
- VPBasicBlock *InsertBlock = PrevRecipe->getParent();
- auto *Region = GetReplicateRegion(PrevRecipe);
- if (Region)
- InsertBlock = dyn_cast<VPBasicBlock>(Region->getSingleSuccessor());
- if (!InsertBlock) {
- InsertBlock = new VPBasicBlock(Region->getName() + ".succ");
- VPBlockUtils::insertBlockAfter(InsertBlock, Region);
- }
- if (Region || PrevRecipe->isPhi())
- Builder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi());
- else
- Builder.setInsertPoint(InsertBlock, std::next(PrevRecipe->getIterator()));
-
- auto *RecurSplice = cast<VPInstruction>(
- Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice,
- {RecurPhi, RecurPhi->getBackedgeValue()}));
-
- RecurPhi->replaceAllUsesWith(RecurSplice);
- // Set the first operand of RecurSplice to RecurPhi again, after replacing
- // all users.
- RecurSplice->setOperand(0, RecurPhi);
- }
-
// Interleave memory: for each Interleave Group we marked earlier as relevant
// for this VPlan, replace the Recipes widening its memory instructions with a
// single VPInterleaveRecipe at its insertion point.
@@ -9122,48 +9071,66 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
StoredValues.push_back(StoreR->getStoredValue());
}
+ bool NeedsMaskForGaps =
+ IG->requiresScalarEpilogue() && !CM.isScalarEpilogueAllowed();
auto *VPIG = new VPInterleaveRecipe(IG, Recipe->getAddr(), StoredValues,
- Recipe->getMask());
+ Recipe->getMask(), NeedsMaskForGaps);
VPIG->insertBefore(Recipe);
unsigned J = 0;
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (Instruction *Member = IG->getMember(i)) {
+ VPRecipeBase *MemberR = RecipeBuilder.getRecipe(Member);
if (!Member->getType()->isVoidTy()) {
- VPValue *OriginalV = Plan->getVPValue(Member);
- Plan->removeVPValueFor(Member);
- Plan->addVPValue(Member, VPIG->getVPValue(J));
+ VPValue *OriginalV = MemberR->getVPSingleValue();
OriginalV->replaceAllUsesWith(VPIG->getVPValue(J));
J++;
}
- RecipeBuilder.getRecipe(Member)->eraseFromParent();
+ MemberR->eraseFromParent();
}
}
- for (ElementCount VF = Range.Start; ElementCount::isKnownLT(VF, Range.End);
- VF *= 2)
+ for (ElementCount VF : Range)
Plan->addVF(VF);
Plan->setName("Initial VPlan");
+ // Replace VPValues for known constant strides guaranteed by predicate scalar
+ // evolution.
+ for (auto [_, Stride] : Legal->getLAI()->getSymbolicStrides()) {
+ auto *StrideV = cast<SCEVUnknown>(Stride)->getValue();
+ auto *ScevStride = dyn_cast<SCEVConstant>(PSE.getSCEV(StrideV));
+ // Only handle constant strides for now.
+ if (!ScevStride)
+ continue;
+ Constant *CI = ConstantInt::get(Stride->getType(), ScevStride->getAPInt());
+
+ auto *ConstVPV = Plan->getVPValueOrAddLiveIn(CI);
+ // The versioned value may not be used in the loop directly, so just add a
+ // new live-in in those cases.
+ Plan->getVPValueOrAddLiveIn(StrideV)->replaceAllUsesWith(ConstVPV);
+ }
+
// From this point onwards, VPlan-to-VPlan transformations may change the plan
// in ways that accessing values using original IR values is incorrect.
Plan->disableValue2VPValue();
+ // Sink users of fixed-order recurrence past the recipe defining the previous
+ // value and introduce FirstOrderRecurrenceSplice VPInstructions.
+ if (!VPlanTransforms::adjustFixedOrderRecurrences(*Plan, Builder))
+ return std::nullopt;
+
+ VPlanTransforms::removeRedundantCanonicalIVs(*Plan);
+ VPlanTransforms::removeRedundantInductionCasts(*Plan);
+
VPlanTransforms::optimizeInductions(*Plan, *PSE.getSE());
VPlanTransforms::removeDeadRecipes(*Plan);
- bool ShouldSimplify = true;
- while (ShouldSimplify) {
- ShouldSimplify = VPlanTransforms::sinkScalarOperands(*Plan);
- ShouldSimplify |=
- VPlanTransforms::mergeReplicateRegionsIntoSuccessors(*Plan);
- ShouldSimplify |= VPlanTransforms::mergeBlocksIntoPredecessors(*Plan);
- }
+ VPlanTransforms::createAndOptimizeReplicateRegions(*Plan);
VPlanTransforms::removeRedundantExpandSCEVRecipes(*Plan);
VPlanTransforms::mergeBlocksIntoPredecessors(*Plan);
assert(VPlanVerifier::verifyPlanIsValid(*Plan) && "VPlan is invalid");
- return Plan;
+ return std::make_optional(std::move(Plan));
}
VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
@@ -9175,21 +9142,21 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
// Create new empty VPlan
- auto Plan = std::make_unique<VPlan>();
+ auto Plan = VPlan::createInitialVPlan(
+ createTripCountSCEV(Legal->getWidestInductionType(), PSE, OrigLoop),
+ *PSE.getSE());
// Build hierarchical CFG
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI, *Plan);
HCFGBuilder.buildHierarchicalCFG();
- for (ElementCount VF = Range.Start; ElementCount::isKnownLT(VF, Range.End);
- VF *= 2)
+ for (ElementCount VF : Range)
Plan->addVF(VF);
- SmallPtrSet<Instruction *, 1> DeadInstructions;
VPlanTransforms::VPInstructionsToVPRecipes(
- OrigLoop, Plan,
+ Plan,
[this](PHINode *P) { return Legal->getIntOrFpInductionDescriptor(P); },
- DeadInstructions, *PSE.getSE(), *TLI);
+ *PSE.getSE(), *TLI);
// Remove the existing terminator of the exiting block of the top-most region.
// A BranchOnCount will be added instead when adding the canonical IV recipes.
@@ -9198,7 +9165,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
Term->eraseFromParent();
addCanonicalIVRecipes(*Plan, Legal->getWidestInductionType(), DebugLoc(),
- true, CM.useActiveLaneMaskForControlFlow());
+ CM.getTailFoldingStyle());
return Plan;
}
@@ -9255,7 +9222,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(WidenRecipe->getParent(),
WidenRecipe->getIterator());
- CondOp = RecipeBuilder.createBlockInMask(R->getParent(), Plan);
+ CondOp = RecipeBuilder.createBlockInMask(R->getParent(), *Plan);
}
if (IsFMulAdd) {
@@ -9270,7 +9237,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
VecOp = FMulRecipe;
}
VPReductionRecipe *RedRecipe =
- new VPReductionRecipe(&RdxDesc, R, ChainOp, VecOp, CondOp, TTI);
+ new VPReductionRecipe(&RdxDesc, R, ChainOp, VecOp, CondOp, &TTI);
WidenRecipe->getVPSingleValue()->replaceAllUsesWith(RedRecipe);
Plan->removeVPValueFor(R);
Plan->addVPValue(R, RedRecipe);
@@ -9304,13 +9271,15 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
if (!PhiR || PhiR->isInLoop())
continue;
VPValue *Cond =
- RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), Plan);
+ RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), *Plan);
VPValue *Red = PhiR->getBackedgeValue();
assert(Red->getDefiningRecipe()->getParent() != LatchVPBB &&
"reduction recipe must be defined before latch");
Builder.createNaryOp(Instruction::Select, {Cond, Red, PhiR});
}
}
+
+ VPlanTransforms::clearReductionWrapFlags(*Plan);
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -9475,7 +9444,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
PartStart, ConstantInt::get(PtrInd->getType(), Lane));
Value *GlobalIdx = State.Builder.CreateAdd(PtrInd, Idx);
- Value *Step = State.get(getOperand(1), VPIteration(0, Part));
+ Value *Step = State.get(getOperand(1), VPIteration(Part, Lane));
Value *SclrGep = emitTransformedIndex(
State.Builder, GlobalIdx, IndDesc.getStartValue(), Step, IndDesc);
SclrGep->setName("next.gep");
@@ -9485,8 +9454,6 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
return;
}
- assert(isa<SCEVConstant>(IndDesc.getStep()) &&
- "Induction step not a SCEV constant!");
Type *PhiType = IndDesc.getStep()->getType();
// Build a pointer phi
@@ -9506,7 +9473,7 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
Value *NumUnrolledElems =
State.Builder.CreateMul(RuntimeVF, ConstantInt::get(PhiType, State.UF));
Value *InductionGEP = GetElementPtrInst::Create(
- IndDesc.getElementType(), NewPointerPhi,
+ State.Builder.getInt8Ty(), NewPointerPhi,
State.Builder.CreateMul(ScalarStepValue, NumUnrolledElems), "ptr.ind",
InductionLoc);
// Add induction update using an incorrect block temporarily. The phi node
@@ -9529,10 +9496,10 @@ void VPWidenPointerInductionRecipe::execute(VPTransformState &State) {
StartOffset = State.Builder.CreateAdd(
StartOffset, State.Builder.CreateStepVector(VecPhiType));
- assert(ScalarStepValue == State.get(getOperand(1), VPIteration(0, Part)) &&
+ assert(ScalarStepValue == State.get(getOperand(1), VPIteration(Part, 0)) &&
"scalar step must be the same across all parts");
Value *GEP = State.Builder.CreateGEP(
- IndDesc.getElementType(), NewPointerPhi,
+ State.Builder.getInt8Ty(), NewPointerPhi,
State.Builder.CreateMul(
StartOffset,
State.Builder.CreateVectorSplat(State.VF, ScalarStepValue),
@@ -9584,7 +9551,8 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
void VPInterleaveRecipe::execute(VPTransformState &State) {
assert(!State.Instance && "Interleave group being replicated.");
State.ILV->vectorizeInterleaveGroup(IG, definedValues(), State, getAddr(),
- getStoredValues(), getMask());
+ getStoredValues(), getMask(),
+ NeedsMaskForGaps);
}
void VPReductionRecipe::execute(VPTransformState &State) {
@@ -9640,10 +9608,9 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
Instruction *UI = getUnderlyingInstr();
if (State.Instance) { // Generate a single instance.
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector");
- State.ILV->scalarizeInstruction(UI, this, *State.Instance,
- IsPredicated, State);
+ State.ILV->scalarizeInstruction(UI, this, *State.Instance, State);
// Insert scalar instance packing it into a vector.
- if (AlsoPack && State.VF.isVector()) {
+ if (State.VF.isVector() && shouldPack()) {
// If we're constructing lane 0, initialize to start from poison.
if (State.Instance->Lane.isFirstLane()) {
assert(!State.VF.isScalable() && "VF is assumed to be non scalable.");
@@ -9663,8 +9630,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
all_of(operands(), [](VPValue *Op) {
return Op->isDefinedOutsideVectorRegions();
})) {
- State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), IsPredicated,
- State);
+ State.ILV->scalarizeInstruction(UI, this, VPIteration(0, 0), State);
if (user_begin() != user_end()) {
for (unsigned Part = 1; Part < State.UF; ++Part)
State.set(this, State.get(this, VPIteration(0, 0)),
@@ -9676,16 +9642,16 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
// Uniform within VL means we need to generate lane 0 only for each
// unrolled copy.
for (unsigned Part = 0; Part < State.UF; ++Part)
- State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, 0),
- IsPredicated, State);
+ State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, 0), State);
return;
}
- // A store of a loop varying value to a loop invariant address only
- // needs only the last copy of the store.
- if (isa<StoreInst>(UI) && !getOperand(1)->hasDefiningRecipe()) {
+ // A store of a loop varying value to a uniform address only needs the last
+ // copy of the store.
+ if (isa<StoreInst>(UI) &&
+ vputils::isUniformAfterVectorization(getOperand(1))) {
auto Lane = VPLane::getLastLaneForVF(State.VF);
- State.ILV->scalarizeInstruction(UI, this, VPIteration(State.UF - 1, Lane), IsPredicated,
+ State.ILV->scalarizeInstruction(UI, this, VPIteration(State.UF - 1, Lane),
State);
return;
}
@@ -9695,8 +9661,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) {
const unsigned EndLane = State.VF.getKnownMinValue();
for (unsigned Part = 0; Part < State.UF; ++Part)
for (unsigned Lane = 0; Lane < EndLane; ++Lane)
- State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, Lane),
- IsPredicated, State);
+ State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, Lane), State);
}
void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
@@ -9714,7 +9679,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
auto *DataTy = VectorType::get(ScalarDataTy, State.VF);
const Align Alignment = getLoadStoreAlignment(&Ingredient);
- bool CreateGatherScatter = !Consecutive;
+ bool CreateGatherScatter = !isConsecutive();
auto &Builder = State.Builder;
InnerLoopVectorizer::VectorParts BlockInMaskParts(State.UF);
@@ -9725,36 +9690,39 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * {
// Calculate the pointer for the specific unroll-part.
- GetElementPtrInst *PartPtr = nullptr;
-
+ Value *PartPtr = nullptr;
+
+ // Use i32 for the gep index type when the value is constant,
+ // or query DataLayout for a more suitable index type otherwise.
+ const DataLayout &DL =
+ Builder.GetInsertBlock()->getModule()->getDataLayout();
+ Type *IndexTy = State.VF.isScalable() && (isReverse() || Part > 0)
+ ? DL.getIndexType(ScalarDataTy->getPointerTo())
+ : Builder.getInt32Ty();
bool InBounds = false;
if (auto *gep = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts()))
InBounds = gep->isInBounds();
- if (Reverse) {
+ if (isReverse()) {
// If the address is consecutive but reversed, then the
// wide store needs to start at the last vector element.
// RunTimeVF = VScale * VF.getKnownMinValue()
// For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
- Value *RunTimeVF = getRuntimeVF(Builder, Builder.getInt32Ty(), State.VF);
+ Value *RunTimeVF = getRuntimeVF(Builder, IndexTy, State.VF);
// NumElt = -Part * RunTimeVF
- Value *NumElt = Builder.CreateMul(Builder.getInt32(-Part), RunTimeVF);
+ Value *NumElt =
+ Builder.CreateMul(ConstantInt::get(IndexTy, -(int64_t)Part), RunTimeVF);
// LastLane = 1 - RunTimeVF
- Value *LastLane = Builder.CreateSub(Builder.getInt32(1), RunTimeVF);
+ Value *LastLane =
+ Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF);
+ PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, NumElt, "", InBounds);
PartPtr =
- cast<GetElementPtrInst>(Builder.CreateGEP(ScalarDataTy, Ptr, NumElt));
- PartPtr->setIsInBounds(InBounds);
- PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane));
- PartPtr->setIsInBounds(InBounds);
+ Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane, "", InBounds);
if (isMaskRequired) // Reverse of a null all-one mask is a null mask.
BlockInMaskParts[Part] =
Builder.CreateVectorReverse(BlockInMaskParts[Part], "reverse");
} else {
- Value *Increment =
- createStepForVF(Builder, Builder.getInt32Ty(), State.VF, Part);
- PartPtr = cast<GetElementPtrInst>(
- Builder.CreateGEP(ScalarDataTy, Ptr, Increment));
- PartPtr->setIsInBounds(InBounds);
+ Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part);
+ PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, Increment, "", InBounds);
}
unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
@@ -9774,7 +9742,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment,
MaskPart);
} else {
- if (Reverse) {
+ if (isReverse()) {
// If we store to reverse consecutive memory locations, then we need
// to reverse the order of elements in the stored value.
StoredVal = Builder.CreateVectorReverse(StoredVal, "reverse");
@@ -9833,7 +9801,6 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) {
static ScalarEpilogueLowering getScalarEpilogueLowering(
Function *F, Loop *L, LoopVectorizeHints &Hints, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI, TargetTransformInfo *TTI, TargetLibraryInfo *TLI,
- AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
LoopVectorizationLegality &LVL, InterleavedAccessInfo *IAI) {
// 1) OptSize takes precedence over all other options, i.e. if this is set,
// don't look at hints or options, and don't request a scalar epilogue.
@@ -9869,7 +9836,8 @@ static ScalarEpilogueLowering getScalarEpilogueLowering(
};
// 4) if the TTI hook indicates this is profitable, request predication.
- if (TTI->preferPredicateOverEpilogue(L, LI, *SE, *AC, TLI, DT, &LVL, IAI))
+ TailFoldingInfo TFI(TLI, &LVL, IAI);
+ if (TTI->preferPredicateOverEpilogue(&TFI))
return CM_ScalarEpilogueNotNeededUsePredicate;
return CM_ScalarEpilogueAllowed;
@@ -9880,9 +9848,29 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part) {
if (hasVectorValue(Def, Part))
return Data.PerPartOutput[Def][Part];
+ auto GetBroadcastInstrs = [this, Def](Value *V) {
+ bool SafeToHoist = Def->isDefinedOutsideVectorRegions();
+ if (VF.isScalar())
+ return V;
+ // Place the code for broadcasting invariant variables in the new preheader.
+ IRBuilder<>::InsertPointGuard Guard(Builder);
+ if (SafeToHoist) {
+ BasicBlock *LoopVectorPreHeader = CFG.VPBB2IRBB[cast<VPBasicBlock>(
+ Plan->getVectorLoopRegion()->getSinglePredecessor())];
+ if (LoopVectorPreHeader)
+ Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
+ }
+
+ // Place the code for broadcasting invariant variables in the new preheader.
+ // Broadcast the scalar into all locations in the vector.
+ Value *Shuf = Builder.CreateVectorSplat(VF, V, "broadcast");
+
+ return Shuf;
+ };
+
if (!hasScalarValue(Def, {Part, 0})) {
Value *IRV = Def->getLiveInIRValue();
- Value *B = ILV->getBroadcastInstrs(IRV);
+ Value *B = GetBroadcastInstrs(IRV);
set(Def, B, Part);
return B;
}
@@ -9900,9 +9888,11 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part) {
unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue() - 1;
// Check if there is a scalar value for the selected lane.
if (!hasScalarValue(Def, {Part, LastLane})) {
- // At the moment, VPWidenIntOrFpInductionRecipes and VPScalarIVStepsRecipes can also be uniform.
+ // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
+ // VPExpandSCEVRecipes can also be uniform.
assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe()) ||
- isa<VPScalarIVStepsRecipe>(Def->getDefiningRecipe())) &&
+ isa<VPScalarIVStepsRecipe>(Def->getDefiningRecipe()) ||
+ isa<VPExpandSCEVRecipe>(Def->getDefiningRecipe())) &&
"unexpected recipe found to be invariant");
IsUniform = true;
LastLane = 0;
@@ -9927,7 +9917,7 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part) {
// State, we will only generate the insertelements once.
Value *VectorValue = nullptr;
if (IsUniform) {
- VectorValue = ILV->getBroadcastInstrs(ScalarValue);
+ VectorValue = GetBroadcastInstrs(ScalarValue);
set(Def, VectorValue, Part);
} else {
// Initialize packing with insertelements to start from undef.
@@ -9962,15 +9952,15 @@ static bool processLoopInVPlanNativePath(
Function *F = L->getHeader()->getParent();
InterleavedAccessInfo IAI(PSE, L, DT, LI, LVL->getLAI());
- ScalarEpilogueLowering SEL = getScalarEpilogueLowering(
- F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, *LVL, &IAI);
+ ScalarEpilogueLowering SEL =
+ getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, *LVL, &IAI);
LoopVectorizationCostModel CM(SEL, L, PSE, LI, LVL, *TTI, TLI, DB, AC, ORE, F,
&Hints, IAI);
// Use the planner for outer loop vectorization.
// TODO: CM is not used at this point inside the planner. Turn CM into an
// optional argument if we don't need it in the future.
- LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE, Hints, ORE);
+ LoopVectorizationPlanner LVP(L, LI, TLI, *TTI, LVL, CM, IAI, PSE, Hints, ORE);
// Get user vectorization factor.
ElementCount UserVF = Hints.getWidth();
@@ -10231,8 +10221,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Check the function attributes and profiles to find out if this function
// should be optimized for size.
- ScalarEpilogueLowering SEL = getScalarEpilogueLowering(
- F, L, Hints, PSI, BFI, TTI, TLI, AC, LI, PSE.getSE(), DT, LVL, &IAI);
+ ScalarEpilogueLowering SEL =
+ getScalarEpilogueLowering(F, L, Hints, PSI, BFI, TTI, TLI, LVL, &IAI);
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
// count by optimizing for size, to minimize overheads.
@@ -10309,11 +10299,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Use the cost model.
LoopVectorizationCostModel CM(SEL, L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE,
F, &Hints, IAI);
- CM.collectValuesToIgnore();
- CM.collectElementTypesForWidening();
-
// Use the planner for vectorization.
- LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE, Hints, ORE);
+ LoopVectorizationPlanner LVP(L, LI, TLI, *TTI, &LVL, CM, IAI, PSE, Hints,
+ ORE);
// Get user vectorization factor and interleave count.
ElementCount UserVF = Hints.getWidth();
@@ -10342,7 +10330,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
bool ForceVectorization =
Hints.getForce() == LoopVectorizeHints::FK_Enabled;
if (!ForceVectorization &&
- !areRuntimeChecksProfitable(Checks, VF, CM.getVScaleForTuning(), L,
+ !areRuntimeChecksProfitable(Checks, VF, getVScaleForTuning(L, *TTI), L,
*PSE.getSE())) {
ORE->emit([&]() {
return OptimizationRemarkAnalysisAliasing(
@@ -10464,7 +10452,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Consider vectorizing the epilogue too if it's profitable.
VectorizationFactor EpilogueVF =
- CM.selectEpilogueVectorizationFactor(VF.Width, LVP);
+ LVP.selectEpilogueVectorizationFactor(VF.Width, IC);
if (EpilogueVF.Width.isVector()) {
// The first pass vectorizes the main loop and creates a scalar epilogue
@@ -10475,8 +10463,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
EPI, &LVL, &CM, BFI, PSI, Checks);
VPlan &BestMainPlan = LVP.getBestPlanFor(EPI.MainLoopVF);
- LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, BestMainPlan, MainILV,
- DT, true);
+ auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
+ BestMainPlan, MainILV, DT, true);
++LoopsVectorized;
// Second pass vectorizes the epilogue and adjusts the control flow
@@ -10492,6 +10480,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");
+ // Re-use the trip count and steps expanded for the main loop, as
+ // skeleton creation needs it as a value that dominates both the scalar
+ // and vector epilogue loops
+ // TODO: This is a workaround needed for epilogue vectorization and it
+ // should be removed once induction resume value creation is done
+ // directly in VPlan.
+ EpilogILV.setTripCount(MainILV.getTripCount());
+ for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader())) {
+ auto *ExpandR = cast<VPExpandSCEVRecipe>(&R);
+ auto *ExpandedVal = BestEpiPlan.getVPValueOrAddLiveIn(
+ ExpandedSCEVs.find(ExpandR->getSCEV())->second);
+ ExpandR->replaceAllUsesWith(ExpandedVal);
+ ExpandR->eraseFromParent();
+ }
+
// Ensure that the start values for any VPWidenIntOrFpInductionRecipe,
// VPWidenPointerInductionRecipe and VPReductionPHIRecipes are updated
// before vectorizing the epilogue loop.
@@ -10520,15 +10523,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
}
ResumeV = MainILV.createInductionResumeValue(
- IndPhi, *ID, {EPI.MainLoopIterationCountCheck});
+ IndPhi, *ID, getExpandedStep(*ID, ExpandedSCEVs),
+ {EPI.MainLoopIterationCountCheck});
}
assert(ResumeV && "Must have a resume value");
- VPValue *StartVal = BestEpiPlan.getOrAddExternalDef(ResumeV);
+ VPValue *StartVal = BestEpiPlan.getVPValueOrAddLiveIn(ResumeV);
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
}
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
- DT, true);
+ DT, true, &ExpandedSCEVs);
++LoopsEpilogueVectorized;
if (!MainILV.areSafetyChecksAdded())
@@ -10581,14 +10585,14 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LoopVectorizeResult LoopVectorizePass::runImpl(
Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_,
- DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
+ DominatorTree &DT_, BlockFrequencyInfo *BFI_, TargetLibraryInfo *TLI_,
DemandedBits &DB_, AssumptionCache &AC_, LoopAccessInfoManager &LAIs_,
OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) {
SE = &SE_;
LI = &LI_;
TTI = &TTI_;
DT = &DT_;
- BFI = &BFI_;
+ BFI = BFI_;
TLI = TLI_;
AC = &AC_;
LAIs = &LAIs_;
@@ -10604,7 +10608,7 @@ LoopVectorizeResult LoopVectorizePass::runImpl(
// vector registers, loop vectorization may still enable scalar
// interleaving.
if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) &&
- TTI->getMaxInterleaveFactor(1) < 2)
+ TTI->getMaxInterleaveFactor(ElementCount::getFixed(1)) < 2)
return LoopVectorizeResult(false, false);
bool Changed = false, CFGChanged = false;
@@ -10656,7 +10660,6 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
- auto &BFI = AM.getResult<BlockFrequencyAnalysis>(F);
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto &DB = AM.getResult<DemandedBitsAnalysis>(F);
@@ -10666,12 +10669,20 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
ProfileSummaryInfo *PSI =
MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
+ BlockFrequencyInfo *BFI = nullptr;
+ if (PSI && PSI->hasProfileSummary())
+ BFI = &AM.getResult<BlockFrequencyAnalysis>(F);
LoopVectorizeResult Result =
runImpl(F, SE, LI, TTI, DT, BFI, &TLI, DB, AC, LAIs, ORE, PSI);
if (!Result.MadeAnyChange)
return PreservedAnalyses::all();
PreservedAnalyses PA;
+ if (isAssignmentTrackingEnabled(*F.getParent())) {
+ for (auto &BB : F)
+ RemoveRedundantDbgInstrs(&BB);
+ }
+
// We currently do not preserve loopinfo/dominator analyses with outer loop
// vectorization. Until this is addressed, mark these analyses as preserved
// only for non-VPlan-native path.
@@ -10679,6 +10690,11 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
if (!EnableVPlanNativePath) {
PA.preserve<LoopAnalysis>();
PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+
+#ifdef EXPENSIVE_CHECKS
+ SE.verify();
+#endif
}
if (Result.MadeCFGChange) {
@@ -10699,8 +10715,8 @@ void LoopVectorizePass::printPipeline(
static_cast<PassInfoMixin<LoopVectorizePass> *>(this)->printPipeline(
OS, MapClassName2PassName);
- OS << "<";
+ OS << '<';
OS << (InterleaveOnlyWhenForced ? "" : "no-") << "interleave-forced-only;";
OS << (VectorizeOnlyWhenForced ? "" : "no-") << "vectorize-forced-only;";
- OS << ">";
+ OS << '>';
}
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e3eb6b1804e7..821a3fa22a85 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -87,7 +87,6 @@
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -126,6 +125,13 @@ static cl::opt<bool> ShouldStartVectorizeHorAtStore(
cl::desc(
"Attempt to vectorize horizontal reductions feeding into a store"));
+// NOTE: If AllowHorRdxIdenityOptimization is true, the optimization will run
+// even if we match a reduction but do not vectorize in the end.
+static cl::opt<bool> AllowHorRdxIdenityOptimization(
+ "slp-optimize-identity-hor-reduction-ops", cl::init(true), cl::Hidden,
+ cl::desc("Allow optimization of original scalar identity operations on "
+ "matched horizontal reductions."));
+
static cl::opt<int>
MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
cl::desc("Attempt to vectorize for this register size in bits"));
@@ -287,7 +293,7 @@ static bool isCommutative(Instruction *I) {
/// \returns inserting index of InsertElement or InsertValue instruction,
/// using Offset as base offset for index.
static std::optional<unsigned> getInsertIndex(const Value *InsertInst,
- unsigned Offset = 0) {
+ unsigned Offset = 0) {
int Index = Offset;
if (const auto *IE = dyn_cast<InsertElementInst>(InsertInst)) {
const auto *VT = dyn_cast<FixedVectorType>(IE->getType());
@@ -342,16 +348,16 @@ enum class UseMask {
static SmallBitVector buildUseMask(int VF, ArrayRef<int> Mask,
UseMask MaskArg) {
SmallBitVector UseMask(VF, true);
- for (auto P : enumerate(Mask)) {
- if (P.value() == UndefMaskElem) {
+ for (auto [Idx, Value] : enumerate(Mask)) {
+ if (Value == PoisonMaskElem) {
if (MaskArg == UseMask::UndefsAsMask)
- UseMask.reset(P.index());
+ UseMask.reset(Idx);
continue;
}
- if (MaskArg == UseMask::FirstArg && P.value() < VF)
- UseMask.reset(P.value());
- else if (MaskArg == UseMask::SecondArg && P.value() >= VF)
- UseMask.reset(P.value() - VF);
+ if (MaskArg == UseMask::FirstArg && Value < VF)
+ UseMask.reset(Value);
+ else if (MaskArg == UseMask::SecondArg && Value >= VF)
+ UseMask.reset(Value - VF);
}
return UseMask;
}
@@ -374,9 +380,9 @@ static SmallBitVector isUndefVector(const Value *V,
if (!UseMask.empty()) {
const Value *Base = V;
while (auto *II = dyn_cast<InsertElementInst>(Base)) {
+ Base = II->getOperand(0);
if (isa<T>(II->getOperand(1)))
continue;
- Base = II->getOperand(0);
std::optional<unsigned> Idx = getInsertIndex(II);
if (!Idx)
continue;
@@ -461,7 +467,7 @@ isFixedVectorShuffle(ArrayRef<Value *> VL, SmallVectorImpl<int> &Mask) {
Value *Vec2 = nullptr;
enum ShuffleMode { Unknown, Select, Permute };
ShuffleMode CommonShuffleMode = Unknown;
- Mask.assign(VL.size(), UndefMaskElem);
+ Mask.assign(VL.size(), PoisonMaskElem);
for (unsigned I = 0, E = VL.size(); I < E; ++I) {
// Undef can be represented as an undef element in a vector.
if (isa<UndefValue>(VL[I]))
@@ -533,6 +539,117 @@ static std::optional<unsigned> getExtractIndex(Instruction *E) {
return *EI->idx_begin();
}
+/// Tries to find extractelement instructions with constant indices from fixed
+/// vector type and gather such instructions into a bunch, which highly likely
+/// might be detected as a shuffle of 1 or 2 input vectors. If this attempt was
+/// successful, the matched scalars are replaced by poison values in \p VL for
+/// future analysis.
+static std::optional<TTI::ShuffleKind>
+tryToGatherExtractElements(SmallVectorImpl<Value *> &VL,
+ SmallVectorImpl<int> &Mask) {
+ // Scan list of gathered scalars for extractelements that can be represented
+ // as shuffles.
+ MapVector<Value *, SmallVector<int>> VectorOpToIdx;
+ SmallVector<int> UndefVectorExtracts;
+ for (int I = 0, E = VL.size(); I < E; ++I) {
+ auto *EI = dyn_cast<ExtractElementInst>(VL[I]);
+ if (!EI) {
+ if (isa<UndefValue>(VL[I]))
+ UndefVectorExtracts.push_back(I);
+ continue;
+ }
+ auto *VecTy = dyn_cast<FixedVectorType>(EI->getVectorOperandType());
+ if (!VecTy || !isa<ConstantInt, UndefValue>(EI->getIndexOperand()))
+ continue;
+ std::optional<unsigned> Idx = getExtractIndex(EI);
+ // Undefined index.
+ if (!Idx) {
+ UndefVectorExtracts.push_back(I);
+ continue;
+ }
+ SmallBitVector ExtractMask(VecTy->getNumElements(), true);
+ ExtractMask.reset(*Idx);
+ if (isUndefVector(EI->getVectorOperand(), ExtractMask).all()) {
+ UndefVectorExtracts.push_back(I);
+ continue;
+ }
+ VectorOpToIdx[EI->getVectorOperand()].push_back(I);
+ }
+ // Sort the vector operands by the maximum number of uses in extractelements.
+ MapVector<unsigned, SmallVector<Value *>> VFToVector;
+ for (const auto &Data : VectorOpToIdx)
+ VFToVector[cast<FixedVectorType>(Data.first->getType())->getNumElements()]
+ .push_back(Data.first);
+ for (auto &Data : VFToVector) {
+ stable_sort(Data.second, [&VectorOpToIdx](Value *V1, Value *V2) {
+ return VectorOpToIdx.find(V1)->second.size() >
+ VectorOpToIdx.find(V2)->second.size();
+ });
+ }
+ // Find the best pair of the vectors with the same number of elements or a
+ // single vector.
+ const int UndefSz = UndefVectorExtracts.size();
+ unsigned SingleMax = 0;
+ Value *SingleVec = nullptr;
+ unsigned PairMax = 0;
+ std::pair<Value *, Value *> PairVec(nullptr, nullptr);
+ for (auto &Data : VFToVector) {
+ Value *V1 = Data.second.front();
+ if (SingleMax < VectorOpToIdx[V1].size() + UndefSz) {
+ SingleMax = VectorOpToIdx[V1].size() + UndefSz;
+ SingleVec = V1;
+ }
+ Value *V2 = nullptr;
+ if (Data.second.size() > 1)
+ V2 = *std::next(Data.second.begin());
+ if (V2 && PairMax < VectorOpToIdx[V1].size() + VectorOpToIdx[V2].size() +
+ UndefSz) {
+ PairMax = VectorOpToIdx[V1].size() + VectorOpToIdx[V2].size() + UndefSz;
+ PairVec = std::make_pair(V1, V2);
+ }
+ }
+ if (SingleMax == 0 && PairMax == 0 && UndefSz == 0)
+ return std::nullopt;
+ // Check if better to perform a shuffle of 2 vectors or just of a single
+ // vector.
+ SmallVector<Value *> SavedVL(VL.begin(), VL.end());
+ SmallVector<Value *> GatheredExtracts(
+ VL.size(), PoisonValue::get(VL.front()->getType()));
+ if (SingleMax >= PairMax && SingleMax) {
+ for (int Idx : VectorOpToIdx[SingleVec])
+ std::swap(GatheredExtracts[Idx], VL[Idx]);
+ } else {
+ for (Value *V : {PairVec.first, PairVec.second})
+ for (int Idx : VectorOpToIdx[V])
+ std::swap(GatheredExtracts[Idx], VL[Idx]);
+ }
+ // Add extracts from undefs too.
+ for (int Idx : UndefVectorExtracts)
+ std::swap(GatheredExtracts[Idx], VL[Idx]);
+ // Check that gather of extractelements can be represented as just a
+ // shuffle of a single/two vectors the scalars are extracted from.
+ std::optional<TTI::ShuffleKind> Res =
+ isFixedVectorShuffle(GatheredExtracts, Mask);
+ if (!Res) {
+ // TODO: try to check other subsets if possible.
+ // Restore the original VL if attempt was not successful.
+ VL.swap(SavedVL);
+ return std::nullopt;
+ }
+ // Restore unused scalars from mask, if some of the extractelements were not
+ // selected for shuffle.
+ for (int I = 0, E = GatheredExtracts.size(); I < E; ++I) {
+ auto *EI = dyn_cast<ExtractElementInst>(VL[I]);
+ if (!EI || !isa<FixedVectorType>(EI->getVectorOperandType()) ||
+ !isa<ConstantInt, UndefValue>(EI->getIndexOperand()) ||
+ is_contained(UndefVectorExtracts, I))
+ continue;
+ if (Mask[I] == PoisonMaskElem && !isa<PoisonValue>(GatheredExtracts[I]))
+ std::swap(VL[I], GatheredExtracts[I]);
+ }
+ return Res;
+}
+
namespace {
/// Main data required for vectorization of instructions.
@@ -829,18 +946,29 @@ static bool isSimple(Instruction *I) {
}
/// Shuffles \p Mask in accordance with the given \p SubMask.
-static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask) {
+/// \param ExtendingManyInputs Supports reshuffling of the mask with not only
+/// one but two input vectors.
+static void addMask(SmallVectorImpl<int> &Mask, ArrayRef<int> SubMask,
+ bool ExtendingManyInputs = false) {
if (SubMask.empty())
return;
+ assert(
+ (!ExtendingManyInputs || SubMask.size() > Mask.size() ||
+ // Check if input scalars were extended to match the size of other node.
+ (SubMask.size() == Mask.size() &&
+ std::all_of(std::next(Mask.begin(), Mask.size() / 2), Mask.end(),
+ [](int Idx) { return Idx == PoisonMaskElem; }))) &&
+ "SubMask with many inputs support must be larger than the mask.");
if (Mask.empty()) {
Mask.append(SubMask.begin(), SubMask.end());
return;
}
- SmallVector<int> NewMask(SubMask.size(), UndefMaskElem);
+ SmallVector<int> NewMask(SubMask.size(), PoisonMaskElem);
int TermValue = std::min(Mask.size(), SubMask.size());
for (int I = 0, E = SubMask.size(); I < E; ++I) {
- if (SubMask[I] >= TermValue || SubMask[I] == UndefMaskElem ||
- Mask[SubMask[I]] >= TermValue)
+ if (SubMask[I] == PoisonMaskElem ||
+ (!ExtendingManyInputs &&
+ (SubMask[I] >= TermValue || Mask[SubMask[I]] >= TermValue)))
continue;
NewMask[I] = Mask[SubMask[I]];
}
@@ -887,7 +1015,7 @@ static void inversePermutation(ArrayRef<unsigned> Indices,
SmallVectorImpl<int> &Mask) {
Mask.clear();
const unsigned E = Indices.size();
- Mask.resize(E, UndefMaskElem);
+ Mask.resize(E, PoisonMaskElem);
for (unsigned I = 0; I < E; ++I)
Mask[Indices[I]] = I;
}
@@ -900,7 +1028,7 @@ static void reorderScalars(SmallVectorImpl<Value *> &Scalars,
UndefValue::get(Scalars.front()->getType()));
Prev.swap(Scalars);
for (unsigned I = 0, E = Prev.size(); I < E; ++I)
- if (Mask[I] != UndefMaskElem)
+ if (Mask[I] != PoisonMaskElem)
Scalars[Mask[I]] = Prev[I];
}
@@ -962,6 +1090,7 @@ namespace slpvectorizer {
class BoUpSLP {
struct TreeEntry;
struct ScheduleData;
+ class ShuffleCostEstimator;
class ShuffleInstructionBuilder;
public:
@@ -1006,8 +1135,12 @@ public:
/// Vectorize the tree but with the list of externally used values \p
/// ExternallyUsedValues. Values in this MapVector can be replaced but the
/// generated extractvalue instructions.
- Value *vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
- Instruction *ReductionRoot = nullptr);
+ /// \param ReplacedExternals containd list of replaced external values
+ /// {scalar, replace} after emitting extractelement for external uses.
+ Value *
+ vectorizeTree(const ExtraValueToDebugLocsMap &ExternallyUsedValues,
+ SmallVectorImpl<std::pair<Value *, Value *>> &ReplacedExternals,
+ Instruction *ReductionRoot = nullptr);
/// \returns the cost incurred by unwanted spills and fills, caused by
/// holding live values over call sites.
@@ -1025,24 +1158,18 @@ public:
/// Construct a vectorizable tree that starts at \p Roots.
void buildTree(ArrayRef<Value *> Roots);
- /// Checks if the very first tree node is going to be vectorized.
- bool isVectorizedFirstNode() const {
- return !VectorizableTree.empty() &&
- VectorizableTree.front()->State == TreeEntry::Vectorize;
- }
-
- /// Returns the main instruction for the very first node.
- Instruction *getFirstNodeMainOp() const {
- assert(!VectorizableTree.empty() && "No tree to get the first node from");
- return VectorizableTree.front()->getMainOp();
- }
-
/// Returns whether the root node has in-tree uses.
bool doesRootHaveInTreeUses() const {
return !VectorizableTree.empty() &&
!VectorizableTree.front()->UserTreeIndices.empty();
}
+ /// Return the scalars of the root node.
+ ArrayRef<Value *> getRootNodeScalars() const {
+ assert(!VectorizableTree.empty() && "No graph to get the first node from");
+ return VectorizableTree.front()->Scalars;
+ }
+
/// Builds external uses of the vectorized scalars, i.e. the list of
/// vectorized scalars to be extracted, their lanes and their scalar users. \p
/// ExternallyUsedValues contains additional list of external uses to handle
@@ -1064,6 +1191,8 @@ public:
MinBWs.clear();
InstrElementSize.clear();
UserIgnoreList = nullptr;
+ PostponedGathers.clear();
+ ValueToGatherNodes.clear();
}
unsigned getTreeSize() const { return VectorizableTree.size(); }
@@ -1083,9 +1212,12 @@ public:
/// Gets reordering data for the given tree entry. If the entry is vectorized
/// - just return ReorderIndices, otherwise check if the scalars can be
/// reordered and return the most optimal order.
+ /// \return std::nullopt if ordering is not important, empty order, if
+ /// identity order is important, or the actual order.
/// \param TopToBottom If true, include the order of vectorized stores and
/// insertelement nodes, otherwise skip them.
- std::optional<OrdersType> getReorderingData(const TreeEntry &TE, bool TopToBottom);
+ std::optional<OrdersType> getReorderingData(const TreeEntry &TE,
+ bool TopToBottom);
/// Reorders the current graph to the most profitable order starting from the
/// root node to the leaf nodes. The best order is chosen only from the nodes
@@ -1328,8 +1460,14 @@ public:
ConstantInt *Ex1Idx;
if (match(V1, m_ExtractElt(m_Value(EV1), m_ConstantInt(Ex1Idx)))) {
// Undefs are always profitable for extractelements.
+ // Compiler can easily combine poison and extractelement <non-poison> or
+ // undef and extractelement <poison>. But combining undef +
+ // extractelement <non-poison-but-may-produce-poison> requires some
+ // extra operations.
if (isa<UndefValue>(V2))
- return LookAheadHeuristics::ScoreConsecutiveExtracts;
+ return (isa<PoisonValue>(V2) || isUndefVector(EV1).all())
+ ? LookAheadHeuristics::ScoreConsecutiveExtracts
+ : LookAheadHeuristics::ScoreSameOpcode;
Value *EV2 = nullptr;
ConstantInt *Ex2Idx = nullptr;
if (match(V2,
@@ -1683,9 +1821,10 @@ public:
// Search all operands in Ops[*][Lane] for the one that matches best
// Ops[OpIdx][LastLane] and return its opreand index.
// If no good match can be found, return std::nullopt.
- std::optional<unsigned> getBestOperand(unsigned OpIdx, int Lane, int LastLane,
- ArrayRef<ReorderingMode> ReorderingModes,
- ArrayRef<Value *> MainAltOps) {
+ std::optional<unsigned>
+ getBestOperand(unsigned OpIdx, int Lane, int LastLane,
+ ArrayRef<ReorderingMode> ReorderingModes,
+ ArrayRef<Value *> MainAltOps) {
unsigned NumOperands = getNumOperands();
// The operand of the previous lane at OpIdx.
@@ -2299,7 +2438,8 @@ private:
/// \returns the cost of the vectorizable entry.
InstructionCost getEntryCost(const TreeEntry *E,
- ArrayRef<Value *> VectorizedVals);
+ ArrayRef<Value *> VectorizedVals,
+ SmallPtrSetImpl<Value *> &CheckedExtracts);
/// This is the recursive part of buildTree.
void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth,
@@ -2323,15 +2463,13 @@ private:
/// Create a new vector from a list of scalar values. Produces a sequence
/// which exploits values reused across lanes, and arranges the inserts
/// for ease of later optimization.
- Value *createBuildVector(const TreeEntry *E);
+ template <typename BVTy, typename ResTy, typename... Args>
+ ResTy processBuildVector(const TreeEntry *E, Args &...Params);
- /// \returns the scalarization cost for this type. Scalarization in this
- /// context means the creation of vectors from a group of scalars. If \p
- /// NeedToShuffle is true, need to add a cost of reshuffling some of the
- /// vector elements.
- InstructionCost getGatherCost(FixedVectorType *Ty,
- const APInt &ShuffledIndices,
- bool NeedToShuffle) const;
+ /// Create a new vector from a list of scalar values. Produces a sequence
+ /// which exploits values reused across lanes, and arranges the inserts
+ /// for ease of later optimization.
+ Value *createBuildVector(const TreeEntry *E);
/// Returns the instruction in the bundle, which can be used as a base point
/// for scheduling. Usually it is the last instruction in the bundle, except
@@ -2354,14 +2492,16 @@ private:
/// \returns the scalarization cost for this list of values. Assuming that
/// this subtree gets vectorized, we may need to extract the values from the
/// roots. This method calculates the cost of extracting the values.
- InstructionCost getGatherCost(ArrayRef<Value *> VL) const;
+ /// \param ForPoisonSrc true if initial vector is poison, false otherwise.
+ InstructionCost getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc) const;
/// Set the Builder insert point to one after the last instruction in
/// the bundle
void setInsertPointAfterBundle(const TreeEntry *E);
- /// \returns a vector from a collection of scalars in \p VL.
- Value *gather(ArrayRef<Value *> VL);
+ /// \returns a vector from a collection of scalars in \p VL. if \p Root is not
+ /// specified, the starting vector value is poison.
+ Value *gather(ArrayRef<Value *> VL, Value *Root);
/// \returns whether the VectorizableTree is fully vectorizable and will
/// be beneficial even the tree height is tiny.
@@ -2400,6 +2540,14 @@ private:
using VecTreeTy = SmallVector<std::unique_ptr<TreeEntry>, 8>;
TreeEntry(VecTreeTy &Container) : Container(Container) {}
+ /// \returns Common mask for reorder indices and reused scalars.
+ SmallVector<int> getCommonMask() const {
+ SmallVector<int> Mask;
+ inversePermutation(ReorderIndices, Mask);
+ ::addMask(Mask, ReuseShuffleIndices);
+ return Mask;
+ }
+
/// \returns true if the scalars in VL are equal to this entry.
bool isSame(ArrayRef<Value *> VL) const {
auto &&IsSame = [VL](ArrayRef<Value *> Scalars, ArrayRef<int> Mask) {
@@ -2409,8 +2557,8 @@ private:
std::equal(VL.begin(), VL.end(), Mask.begin(),
[Scalars](Value *V, int Idx) {
return (isa<UndefValue>(V) &&
- Idx == UndefMaskElem) ||
- (Idx != UndefMaskElem && V == Scalars[Idx]);
+ Idx == PoisonMaskElem) ||
+ (Idx != PoisonMaskElem && V == Scalars[Idx]);
});
};
if (!ReorderIndices.empty()) {
@@ -2471,7 +2619,7 @@ private:
ValueList Scalars;
/// The Scalars are vectorized into this value. It is initialized to Null.
- Value *VectorizedValue = nullptr;
+ WeakTrackingVH VectorizedValue = nullptr;
/// Do we need to gather this sequence or vectorize it
/// (either with vector instruction or with scatter/gather
@@ -2684,20 +2832,22 @@ private:
#ifndef NDEBUG
void dumpTreeCosts(const TreeEntry *E, InstructionCost ReuseShuffleCost,
- InstructionCost VecCost,
- InstructionCost ScalarCost) const {
- dbgs() << "SLP: Calculated costs for Tree:\n"; E->dump();
+ InstructionCost VecCost, InstructionCost ScalarCost,
+ StringRef Banner) const {
+ dbgs() << "SLP: " << Banner << ":\n";
+ E->dump();
dbgs() << "SLP: Costs:\n";
dbgs() << "SLP: ReuseShuffleCost = " << ReuseShuffleCost << "\n";
dbgs() << "SLP: VectorCost = " << VecCost << "\n";
dbgs() << "SLP: ScalarCost = " << ScalarCost << "\n";
- dbgs() << "SLP: ReuseShuffleCost + VecCost - ScalarCost = " <<
- ReuseShuffleCost + VecCost - ScalarCost << "\n";
+ dbgs() << "SLP: ReuseShuffleCost + VecCost - ScalarCost = "
+ << ReuseShuffleCost + VecCost - ScalarCost << "\n";
}
#endif
/// Create a new VectorizableTree entry.
- TreeEntry *newTreeEntry(ArrayRef<Value *> VL, std::optional<ScheduleData *> Bundle,
+ TreeEntry *newTreeEntry(ArrayRef<Value *> VL,
+ std::optional<ScheduleData *> Bundle,
const InstructionsState &S,
const EdgeInfo &UserTreeIdx,
ArrayRef<int> ReuseShuffleIndices = std::nullopt,
@@ -2791,8 +2941,14 @@ private:
return ScalarToTreeEntry.lookup(V);
}
+ /// Checks if the specified list of the instructions/values can be vectorized
+ /// and fills required data before actual scheduling of the instructions.
+ TreeEntry::EntryState getScalarsVectorizationState(
+ InstructionsState &S, ArrayRef<Value *> VL, bool IsScatterVectorizeUserTE,
+ OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) const;
+
/// Maps a specific scalar to its tree entry.
- SmallDenseMap<Value*, TreeEntry *> ScalarToTreeEntry;
+ SmallDenseMap<Value *, TreeEntry *> ScalarToTreeEntry;
/// Maps a value to the proposed vectorizable size.
SmallDenseMap<Value *, unsigned> InstrElementSize;
@@ -2808,6 +2964,15 @@ private:
/// pre-gather them before.
DenseMap<const TreeEntry *, Instruction *> EntryToLastInstruction;
+ /// List of gather nodes, depending on other gather/vector nodes, which should
+ /// be emitted after the vector instruction emission process to correctly
+ /// handle order of the vector instructions and shuffles.
+ SetVector<const TreeEntry *> PostponedGathers;
+
+ using ValueToGatherNodesMap =
+ DenseMap<Value *, SmallPtrSet<const TreeEntry *, 4>>;
+ ValueToGatherNodesMap ValueToGatherNodes;
+
/// This POD struct describes one external user in the vectorized tree.
struct ExternalUser {
ExternalUser(Value *S, llvm::User *U, int L)
@@ -3235,7 +3400,6 @@ private:
<< "SLP: gets ready (ctl): " << *DepBundle << "\n");
}
}
-
}
}
@@ -3579,7 +3743,7 @@ static void reorderReuses(SmallVectorImpl<int> &Reuses, ArrayRef<int> Mask) {
SmallVector<int> Prev(Reuses.begin(), Reuses.end());
Prev.swap(Reuses);
for (unsigned I = 0, E = Prev.size(); I < E; ++I)
- if (Mask[I] != UndefMaskElem)
+ if (Mask[I] != PoisonMaskElem)
Reuses[Mask[I]] = Prev[I];
}
@@ -3603,7 +3767,7 @@ static void reorderOrder(SmallVectorImpl<unsigned> &Order, ArrayRef<int> Mask) {
}
Order.assign(Mask.size(), Mask.size());
for (unsigned I = 0, E = Mask.size(); I < E; ++I)
- if (MaskOrder[I] != UndefMaskElem)
+ if (MaskOrder[I] != PoisonMaskElem)
Order[MaskOrder[I]] = I;
fixupOrderingIndices(Order);
}
@@ -3653,10 +3817,8 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
return false;
return true;
};
- if (IsIdentityOrder(CurrentOrder)) {
- CurrentOrder.clear();
- return CurrentOrder;
- }
+ if (IsIdentityOrder(CurrentOrder))
+ return OrdersType();
auto *It = CurrentOrder.begin();
for (unsigned I = 0; I < NumScalars;) {
if (UsedPositions.test(I)) {
@@ -3669,7 +3831,7 @@ BoUpSLP::findReusedOrderedScalars(const BoUpSLP::TreeEntry &TE) {
}
++It;
}
- return CurrentOrder;
+ return std::move(CurrentOrder);
}
return std::nullopt;
}
@@ -3779,9 +3941,9 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
return LoadsState::Gather;
}
-bool clusterSortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
- const DataLayout &DL, ScalarEvolution &SE,
- SmallVectorImpl<unsigned> &SortedIndices) {
+static bool clusterSortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
+ const DataLayout &DL, ScalarEvolution &SE,
+ SmallVectorImpl<unsigned> &SortedIndices) {
assert(llvm::all_of(
VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
"Expected list of pointer operands.");
@@ -3825,7 +3987,7 @@ bool clusterSortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
return std::get<1>(X) < std::get<1>(Y);
});
int InitialOffset = std::get<1>(Vec[0]);
- AnyConsecutive |= all_of(enumerate(Vec), [InitialOffset](auto &P) {
+ AnyConsecutive |= all_of(enumerate(Vec), [InitialOffset](const auto &P) {
return std::get<1>(P.value()) == int(P.index()) + InitialOffset;
});
}
@@ -3862,7 +4024,7 @@ BoUpSLP::findPartiallyOrderedLoads(const BoUpSLP::TreeEntry &TE) {
BoUpSLP::OrdersType Order;
if (clusterSortPtrAccesses(Ptrs, ScalarTy, *DL, *SE, Order))
- return Order;
+ return std::move(Order);
return std::nullopt;
}
@@ -3888,31 +4050,35 @@ static bool areTwoInsertFromSameBuildVector(
// Go through the vector operand of insertelement instructions trying to find
// either VU as the original vector for IE2 or V as the original vector for
// IE1.
+ SmallSet<int, 8> ReusedIdx;
+ bool IsReusedIdx = false;
do {
- if (IE2 == VU)
+ if (IE2 == VU && !IE1)
return VU->hasOneUse();
- if (IE1 == V)
+ if (IE1 == V && !IE2)
return V->hasOneUse();
- if (IE1) {
- if ((IE1 != VU && !IE1->hasOneUse()) ||
- getInsertIndex(IE1).value_or(*Idx2) == *Idx2)
+ if (IE1 && IE1 != V) {
+ IsReusedIdx |=
+ !ReusedIdx.insert(getInsertIndex(IE1).value_or(*Idx2)).second;
+ if ((IE1 != VU && !IE1->hasOneUse()) || IsReusedIdx)
IE1 = nullptr;
else
IE1 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE1));
}
- if (IE2) {
- if ((IE2 != V && !IE2->hasOneUse()) ||
- getInsertIndex(IE2).value_or(*Idx1) == *Idx1)
+ if (IE2 && IE2 != VU) {
+ IsReusedIdx |=
+ !ReusedIdx.insert(getInsertIndex(IE2).value_or(*Idx1)).second;
+ if ((IE2 != V && !IE2->hasOneUse()) || IsReusedIdx)
IE2 = nullptr;
else
IE2 = dyn_cast_or_null<InsertElementInst>(GetBaseOperand(IE2));
}
- } while (IE1 || IE2);
+ } while (!IsReusedIdx && (IE1 || IE2));
return false;
}
-std::optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &TE,
- bool TopToBottom) {
+std::optional<BoUpSLP::OrdersType>
+BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) {
// No need to reorder if need to shuffle reuses, still need to shuffle the
// node.
if (!TE.ReuseShuffleIndices.empty()) {
@@ -3936,14 +4102,14 @@ std::optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &T
std::optional<unsigned> Idx = getExtractIndex(cast<Instruction>(V));
return Idx && *Idx < Sz;
})) {
- SmallVector<int> ReorderMask(Sz, UndefMaskElem);
+ SmallVector<int> ReorderMask(Sz, PoisonMaskElem);
if (TE.ReorderIndices.empty())
std::iota(ReorderMask.begin(), ReorderMask.end(), 0);
else
inversePermutation(TE.ReorderIndices, ReorderMask);
for (unsigned I = 0; I < VF; ++I) {
int &Idx = ReusedMask[I];
- if (Idx == UndefMaskElem)
+ if (Idx == PoisonMaskElem)
continue;
Value *V = TE.Scalars[ReorderMask[Idx]];
std::optional<unsigned> EI = getExtractIndex(cast<Instruction>(V));
@@ -3958,7 +4124,7 @@ std::optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &T
for (unsigned K = 0; K < VF; K += Sz) {
OrdersType CurrentOrder(TE.ReorderIndices);
SmallVector<int> SubMask{ArrayRef(ReusedMask).slice(K, Sz)};
- if (SubMask.front() == UndefMaskElem)
+ if (SubMask.front() == PoisonMaskElem)
std::iota(SubMask.begin(), SubMask.end(), 0);
reorderOrder(CurrentOrder, SubMask);
transform(CurrentOrder, It, [K](unsigned Pos) { return Pos + K; });
@@ -3966,8 +4132,8 @@ std::optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &T
}
if (all_of(enumerate(ResOrder),
[](const auto &Data) { return Data.index() == Data.value(); }))
- return {}; // Use identity order.
- return ResOrder;
+ return std::nullopt; // No need to reorder.
+ return std::move(ResOrder);
}
if (TE.State == TreeEntry::Vectorize &&
(isa<LoadInst, ExtractElementInst, ExtractValueInst>(TE.getMainOp()) ||
@@ -3976,6 +4142,8 @@ std::optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &T
return TE.ReorderIndices;
if (TE.State == TreeEntry::Vectorize && TE.getOpcode() == Instruction::PHI) {
auto PHICompare = [](llvm::Value *V1, llvm::Value *V2) {
+ if (V1 == V2)
+ return false;
if (!V1->hasOneUse() || !V2->hasOneUse())
return false;
auto *FirstUserOfPhi1 = cast<Instruction>(*V1->user_begin());
@@ -4023,8 +4191,8 @@ std::optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &T
for (unsigned Id = 0, Sz = Phis.size(); Id < Sz; ++Id)
ResOrder[Id] = PhiToId[Phis[Id]];
if (IsIdentityOrder(ResOrder))
- return {};
- return ResOrder;
+ return std::nullopt; // No need to reorder.
+ return std::move(ResOrder);
}
if (TE.State == TreeEntry::NeedToGather) {
// TODO: add analysis of other gather nodes with extractelement
@@ -4050,7 +4218,42 @@ std::optional<BoUpSLP::OrdersType> BoUpSLP::getReorderingData(const TreeEntry &T
if (Reuse || !CurrentOrder.empty()) {
if (!CurrentOrder.empty())
fixupOrderingIndices(CurrentOrder);
- return CurrentOrder;
+ return std::move(CurrentOrder);
+ }
+ }
+ // If the gather node is <undef, v, .., poison> and
+ // insertelement poison, v, 0 [+ permute]
+ // is cheaper than
+ // insertelement poison, v, n - try to reorder.
+ // If rotating the whole graph, exclude the permute cost, the whole graph
+ // might be transformed.
+ int Sz = TE.Scalars.size();
+ if (isSplat(TE.Scalars) && !allConstant(TE.Scalars) &&
+ count_if(TE.Scalars, UndefValue::classof) == Sz - 1) {
+ const auto *It =
+ find_if(TE.Scalars, [](Value *V) { return !isConstant(V); });
+ if (It == TE.Scalars.begin())
+ return OrdersType();
+ auto *Ty = FixedVectorType::get(TE.Scalars.front()->getType(), Sz);
+ if (It != TE.Scalars.end()) {
+ OrdersType Order(Sz, Sz);
+ unsigned Idx = std::distance(TE.Scalars.begin(), It);
+ Order[Idx] = 0;
+ fixupOrderingIndices(Order);
+ SmallVector<int> Mask;
+ inversePermutation(Order, Mask);
+ InstructionCost PermuteCost =
+ TopToBottom
+ ? 0
+ : TTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, Mask);
+ InstructionCost InsertFirstCost = TTI->getVectorInstrCost(
+ Instruction::InsertElement, Ty, TTI::TCK_RecipThroughput, 0,
+ PoisonValue::get(Ty), *It);
+ InstructionCost InsertIdxCost = TTI->getVectorInstrCost(
+ Instruction::InsertElement, Ty, TTI::TCK_RecipThroughput, Idx,
+ PoisonValue::get(Ty), *It);
+ if (InsertFirstCost + PermuteCost < InsertIdxCost)
+ return std::move(Order);
}
}
if (std::optional<OrdersType> CurrentOrder = findReusedOrderedScalars(TE))
@@ -4260,7 +4463,7 @@ void BoUpSLP::reorderTopToBottom() {
unsigned E = Order.size();
OrdersType CurrentOrder(E, E);
transform(Mask, CurrentOrder.begin(), [E](int Idx) {
- return Idx == UndefMaskElem ? E : static_cast<unsigned>(Idx);
+ return Idx == PoisonMaskElem ? E : static_cast<unsigned>(Idx);
});
fixupOrderingIndices(CurrentOrder);
++OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second;
@@ -4285,10 +4488,10 @@ void BoUpSLP::reorderTopToBottom() {
continue;
SmallVector<int> Mask;
inversePermutation(BestOrder, Mask);
- SmallVector<int> MaskOrder(BestOrder.size(), UndefMaskElem);
+ SmallVector<int> MaskOrder(BestOrder.size(), PoisonMaskElem);
unsigned E = BestOrder.size();
transform(BestOrder, MaskOrder.begin(), [E](unsigned I) {
- return I < E ? static_cast<int>(I) : UndefMaskElem;
+ return I < E ? static_cast<int>(I) : PoisonMaskElem;
});
// Do an actual reordering, if profitable.
for (std::unique_ptr<TreeEntry> &TE : VectorizableTree) {
@@ -4384,7 +4587,7 @@ bool BoUpSLP::canReorderOperands(
}
return false;
}) > 1 &&
- !all_of(UserTE->getOperand(I), isConstant))
+ !allConstant(UserTE->getOperand(I)))
return false;
if (Gather)
GatherOps.push_back(Gather);
@@ -4499,7 +4702,7 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
unsigned E = Order.size();
OrdersType CurrentOrder(E, E);
transform(Mask, CurrentOrder.begin(), [E](int Idx) {
- return Idx == UndefMaskElem ? E : static_cast<unsigned>(Idx);
+ return Idx == PoisonMaskElem ? E : static_cast<unsigned>(Idx);
});
fixupOrderingIndices(CurrentOrder);
OrdersUses.insert(std::make_pair(CurrentOrder, 0)).first->second +=
@@ -4578,10 +4781,10 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
VisitedOps.clear();
SmallVector<int> Mask;
inversePermutation(BestOrder, Mask);
- SmallVector<int> MaskOrder(BestOrder.size(), UndefMaskElem);
+ SmallVector<int> MaskOrder(BestOrder.size(), PoisonMaskElem);
unsigned E = BestOrder.size();
transform(BestOrder, MaskOrder.begin(), [E](unsigned I) {
- return I < E ? static_cast<int>(I) : UndefMaskElem;
+ return I < E ? static_cast<int>(I) : PoisonMaskElem;
});
for (const std::pair<unsigned, TreeEntry *> &Op : Data.second) {
TreeEntry *TE = Op.second;
@@ -4779,7 +4982,7 @@ bool BoUpSLP::canFormVector(const SmallVector<StoreInst *, 4> &StoresVec,
// Check if the stores are consecutive by checking if their difference is 1.
for (unsigned Idx : seq<unsigned>(1, StoreOffsetVec.size()))
- if (StoreOffsetVec[Idx].second != StoreOffsetVec[Idx-1].second + 1)
+ if (StoreOffsetVec[Idx].second != StoreOffsetVec[Idx - 1].second + 1)
return false;
// Calculate the shuffle indices according to their offset against the sorted
@@ -4976,6 +5179,309 @@ static bool isAlternateInstruction(const Instruction *I,
const Instruction *AltOp,
const TargetLibraryInfo &TLI);
+BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
+ InstructionsState &S, ArrayRef<Value *> VL, bool IsScatterVectorizeUserTE,
+ OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) const {
+ assert(S.MainOp && "Expected instructions with same/alternate opcodes only.");
+
+ unsigned ShuffleOrOp =
+ S.isAltShuffle() ? (unsigned)Instruction::ShuffleVector : S.getOpcode();
+ auto *VL0 = cast<Instruction>(S.OpValue);
+ switch (ShuffleOrOp) {
+ case Instruction::PHI: {
+ // Check for terminator values (e.g. invoke).
+ for (Value *V : VL)
+ for (Value *Incoming : cast<PHINode>(V)->incoming_values()) {
+ Instruction *Term = dyn_cast<Instruction>(Incoming);
+ if (Term && Term->isTerminator()) {
+ LLVM_DEBUG(dbgs()
+ << "SLP: Need to swizzle PHINodes (terminator use).\n");
+ return TreeEntry::NeedToGather;
+ }
+ }
+
+ return TreeEntry::Vectorize;
+ }
+ case Instruction::ExtractValue:
+ case Instruction::ExtractElement: {
+ bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
+ if (Reuse || !CurrentOrder.empty())
+ return TreeEntry::Vectorize;
+ LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
+ return TreeEntry::NeedToGather;
+ }
+ case Instruction::InsertElement: {
+ // Check that we have a buildvector and not a shuffle of 2 or more
+ // different vectors.
+ ValueSet SourceVectors;
+ for (Value *V : VL) {
+ SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
+ assert(getInsertIndex(V) != std::nullopt &&
+ "Non-constant or undef index?");
+ }
+
+ if (count_if(VL, [&SourceVectors](Value *V) {
+ return !SourceVectors.contains(V);
+ }) >= 2) {
+ // Found 2nd source vector - cancel.
+ LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement vectors with "
+ "different source vectors.\n");
+ return TreeEntry::NeedToGather;
+ }
+
+ return TreeEntry::Vectorize;
+ }
+ case Instruction::Load: {
+ // Check that a vectorized load would load the same memory as a scalar
+ // load. For example, we don't want to vectorize loads that are smaller
+ // than 8-bit. Even though we have a packed struct {<i2, i2, i2, i2>} LLVM
+ // treats loading/storing it as an i8 struct. If we vectorize loads/stores
+ // from such a struct, we read/write packed bits disagreeing with the
+ // unvectorized version.
+ switch (canVectorizeLoads(VL, VL0, *TTI, *DL, *SE, *LI, *TLI, CurrentOrder,
+ PointerOps)) {
+ case LoadsState::Vectorize:
+ return TreeEntry::Vectorize;
+ case LoadsState::ScatterVectorize:
+ return TreeEntry::ScatterVectorize;
+ case LoadsState::Gather:
+#ifndef NDEBUG
+ Type *ScalarTy = VL0->getType();
+ if (DL->getTypeSizeInBits(ScalarTy) !=
+ DL->getTypeAllocSizeInBits(ScalarTy))
+ LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
+ else if (any_of(VL,
+ [](Value *V) { return !cast<LoadInst>(V)->isSimple(); }))
+ LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
+ else
+ LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
+#endif // NDEBUG
+ return TreeEntry::NeedToGather;
+ }
+ llvm_unreachable("Unexpected state of loads");
+ }
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::FPExt:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::SIToFP:
+ case Instruction::UIToFP:
+ case Instruction::Trunc:
+ case Instruction::FPTrunc:
+ case Instruction::BitCast: {
+ Type *SrcTy = VL0->getOperand(0)->getType();
+ for (Value *V : VL) {
+ Type *Ty = cast<Instruction>(V)->getOperand(0)->getType();
+ if (Ty != SrcTy || !isValidElementType(Ty)) {
+ LLVM_DEBUG(
+ dbgs() << "SLP: Gathering casts with different src types.\n");
+ return TreeEntry::NeedToGather;
+ }
+ }
+ return TreeEntry::Vectorize;
+ }
+ case Instruction::ICmp:
+ case Instruction::FCmp: {
+ // Check that all of the compares have the same predicate.
+ CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
+ CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0);
+ Type *ComparedTy = VL0->getOperand(0)->getType();
+ for (Value *V : VL) {
+ CmpInst *Cmp = cast<CmpInst>(V);
+ if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
+ Cmp->getOperand(0)->getType() != ComparedTy) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
+ return TreeEntry::NeedToGather;
+ }
+ }
+ return TreeEntry::Vectorize;
+ }
+ case Instruction::Select:
+ case Instruction::FNeg:
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ return TreeEntry::Vectorize;
+ case Instruction::GetElementPtr: {
+ // We don't combine GEPs with complicated (nested) indexing.
+ for (Value *V : VL) {
+ auto *I = dyn_cast<GetElementPtrInst>(V);
+ if (!I)
+ continue;
+ if (I->getNumOperands() != 2) {
+ LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
+ return TreeEntry::NeedToGather;
+ }
+ }
+
+ // We can't combine several GEPs into one vector if they operate on
+ // different types.
+ Type *Ty0 = cast<GEPOperator>(VL0)->getSourceElementType();
+ for (Value *V : VL) {
+ auto *GEP = dyn_cast<GEPOperator>(V);
+ if (!GEP)
+ continue;
+ Type *CurTy = GEP->getSourceElementType();
+ if (Ty0 != CurTy) {
+ LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
+ return TreeEntry::NeedToGather;
+ }
+ }
+
+ // We don't combine GEPs with non-constant indexes.
+ Type *Ty1 = VL0->getOperand(1)->getType();
+ for (Value *V : VL) {
+ auto *I = dyn_cast<GetElementPtrInst>(V);
+ if (!I)
+ continue;
+ auto *Op = I->getOperand(1);
+ if ((!IsScatterVectorizeUserTE && !isa<ConstantInt>(Op)) ||
+ (Op->getType() != Ty1 &&
+ ((IsScatterVectorizeUserTE && !isa<ConstantInt>(Op)) ||
+ Op->getType()->getScalarSizeInBits() >
+ DL->getIndexSizeInBits(
+ V->getType()->getPointerAddressSpace())))) {
+ LLVM_DEBUG(
+ dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
+ return TreeEntry::NeedToGather;
+ }
+ }
+
+ return TreeEntry::Vectorize;
+ }
+ case Instruction::Store: {
+ // Check if the stores are consecutive or if we need to swizzle them.
+ llvm::Type *ScalarTy = cast<StoreInst>(VL0)->getValueOperand()->getType();
+ // Avoid types that are padded when being allocated as scalars, while
+ // being packed together in a vector (such as i1).
+ if (DL->getTypeSizeInBits(ScalarTy) !=
+ DL->getTypeAllocSizeInBits(ScalarTy)) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering stores of non-packed type.\n");
+ return TreeEntry::NeedToGather;
+ }
+ // Make sure all stores in the bundle are simple - we can't vectorize
+ // atomic or volatile stores.
+ for (Value *V : VL) {
+ auto *SI = cast<StoreInst>(V);
+ if (!SI->isSimple()) {
+ LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple stores.\n");
+ return TreeEntry::NeedToGather;
+ }
+ PointerOps.push_back(SI->getPointerOperand());
+ }
+
+ // Check the order of pointer operands.
+ if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
+ Value *Ptr0;
+ Value *PtrN;
+ if (CurrentOrder.empty()) {
+ Ptr0 = PointerOps.front();
+ PtrN = PointerOps.back();
+ } else {
+ Ptr0 = PointerOps[CurrentOrder.front()];
+ PtrN = PointerOps[CurrentOrder.back()];
+ }
+ std::optional<int> Dist =
+ getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
+ // Check that the sorted pointer operands are consecutive.
+ if (static_cast<unsigned>(*Dist) == VL.size() - 1)
+ return TreeEntry::Vectorize;
+ }
+
+ LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
+ return TreeEntry::NeedToGather;
+ }
+ case Instruction::Call: {
+ // Check if the calls are all to the same vectorizable intrinsic or
+ // library function.
+ CallInst *CI = cast<CallInst>(VL0);
+ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
+
+ VFShape Shape = VFShape::get(
+ *CI, ElementCount::getFixed(static_cast<unsigned int>(VL.size())),
+ false /*HasGlobalPred*/);
+ Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
+
+ if (!VecFunc && !isTriviallyVectorizable(ID)) {
+ LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
+ return TreeEntry::NeedToGather;
+ }
+ Function *F = CI->getCalledFunction();
+ unsigned NumArgs = CI->arg_size();
+ SmallVector<Value *, 4> ScalarArgs(NumArgs, nullptr);
+ for (unsigned J = 0; J != NumArgs; ++J)
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, J))
+ ScalarArgs[J] = CI->getArgOperand(J);
+ for (Value *V : VL) {
+ CallInst *CI2 = dyn_cast<CallInst>(V);
+ if (!CI2 || CI2->getCalledFunction() != F ||
+ getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
+ (VecFunc &&
+ VecFunc != VFDatabase(*CI2).getVectorizedFunction(Shape)) ||
+ !CI->hasIdenticalOperandBundleSchema(*CI2)) {
+ LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *V
+ << "\n");
+ return TreeEntry::NeedToGather;
+ }
+ // Some intrinsics have scalar arguments and should be same in order for
+ // them to be vectorized.
+ for (unsigned J = 0; J != NumArgs; ++J) {
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, J)) {
+ Value *A1J = CI2->getArgOperand(J);
+ if (ScalarArgs[J] != A1J) {
+ LLVM_DEBUG(dbgs()
+ << "SLP: mismatched arguments in call:" << *CI
+ << " argument " << ScalarArgs[J] << "!=" << A1J << "\n");
+ return TreeEntry::NeedToGather;
+ }
+ }
+ }
+ // Verify that the bundle operands are identical between the two calls.
+ if (CI->hasOperandBundles() &&
+ !std::equal(CI->op_begin() + CI->getBundleOperandsStartIndex(),
+ CI->op_begin() + CI->getBundleOperandsEndIndex(),
+ CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
+ LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI
+ << "!=" << *V << '\n');
+ return TreeEntry::NeedToGather;
+ }
+ }
+
+ return TreeEntry::Vectorize;
+ }
+ case Instruction::ShuffleVector: {
+ // If this is not an alternate sequence of opcode like add-sub
+ // then do not vectorize this instruction.
+ if (!S.isAltShuffle()) {
+ LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
+ return TreeEntry::NeedToGather;
+ }
+ return TreeEntry::Vectorize;
+ }
+ default:
+ LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
+ return TreeEntry::NeedToGather;
+ }
+}
+
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
const EdgeInfo &UserTreeIdx) {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
@@ -4990,7 +5496,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
for (Value *V : VL) {
if (isConstant(V)) {
ReuseShuffleIndicies.emplace_back(
- isa<UndefValue>(V) ? UndefMaskElem : UniqueValues.size());
+ isa<UndefValue>(V) ? PoisonMaskElem : UniqueValues.size());
UniqueValues.emplace_back(V);
continue;
}
@@ -5010,7 +5516,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return isa<UndefValue>(V) ||
!isConstant(V);
})) ||
- !llvm::isPowerOf2_32(NumUniqueScalarValues)) {
+ !llvm::has_single_bit<uint32_t>(NumUniqueScalarValues)) {
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
return false;
@@ -5257,6 +5763,17 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (!TryToFindDuplicates(S))
return;
+ // Perform specific checks for each particular instruction kind.
+ OrdersType CurrentOrder;
+ SmallVector<Value *> PointerOps;
+ TreeEntry::EntryState State = getScalarsVectorizationState(
+ S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps);
+ if (State == TreeEntry::NeedToGather) {
+ newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+ return;
+ }
+
auto &BSRef = BlocksSchedules[BB];
if (!BSRef)
BSRef = std::make_unique<BlockScheduling>(BB);
@@ -5285,20 +5802,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case Instruction::PHI: {
auto *PH = cast<PHINode>(VL0);
- // Check for terminator values (e.g. invoke).
- for (Value *V : VL)
- for (Value *Incoming : cast<PHINode>(V)->incoming_values()) {
- Instruction *Term = dyn_cast<Instruction>(Incoming);
- if (Term && Term->isTerminator()) {
- LLVM_DEBUG(dbgs()
- << "SLP: Need to swizzle PHINodes (terminator use).\n");
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- return;
- }
- }
-
TreeEntry *TE =
newTreeEntry(VL, Bundle, S, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
@@ -5326,9 +5829,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
case Instruction::ExtractValue:
case Instruction::ExtractElement: {
- OrdersType CurrentOrder;
- bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
- if (Reuse) {
+ if (CurrentOrder.empty()) {
LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
@@ -5339,55 +5840,28 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
VectorizableTree.back()->setOperand(0, Op0);
return;
}
- if (!CurrentOrder.empty()) {
- LLVM_DEBUG({
- dbgs() << "SLP: Reusing or shuffling of reordered extract sequence "
- "with order";
- for (unsigned Idx : CurrentOrder)
- dbgs() << " " << Idx;
- dbgs() << "\n";
- });
- fixupOrderingIndices(CurrentOrder);
- // Insert new order with initial value 0, if it does not exist,
- // otherwise return the iterator to the existing one.
- newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies, CurrentOrder);
- // This is a special case, as it does not gather, but at the same time
- // we are not extending buildTree_rec() towards the operands.
- ValueList Op0;
- Op0.assign(VL.size(), VL0->getOperand(0));
- VectorizableTree.back()->setOperand(0, Op0);
- return;
- }
- LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- BS.cancelScheduling(VL, VL0);
+ LLVM_DEBUG({
+ dbgs() << "SLP: Reusing or shuffling of reordered extract sequence "
+ "with order";
+ for (unsigned Idx : CurrentOrder)
+ dbgs() << " " << Idx;
+ dbgs() << "\n";
+ });
+ fixupOrderingIndices(CurrentOrder);
+ // Insert new order with initial value 0, if it does not exist,
+ // otherwise return the iterator to the existing one.
+ newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies, CurrentOrder);
+ // This is a special case, as it does not gather, but at the same time
+ // we are not extending buildTree_rec() towards the operands.
+ ValueList Op0;
+ Op0.assign(VL.size(), VL0->getOperand(0));
+ VectorizableTree.back()->setOperand(0, Op0);
return;
}
case Instruction::InsertElement: {
assert(ReuseShuffleIndicies.empty() && "All inserts should be unique");
- // Check that we have a buildvector and not a shuffle of 2 or more
- // different vectors.
- ValueSet SourceVectors;
- for (Value *V : VL) {
- SourceVectors.insert(cast<Instruction>(V)->getOperand(0));
- assert(getInsertIndex(V) != std::nullopt &&
- "Non-constant or undef index?");
- }
-
- if (count_if(VL, [&SourceVectors](Value *V) {
- return !SourceVectors.contains(V);
- }) >= 2) {
- // Found 2nd source vector - cancel.
- LLVM_DEBUG(dbgs() << "SLP: Gather of insertelement vectors with "
- "different source vectors.\n");
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx);
- BS.cancelScheduling(VL, VL0);
- return;
- }
-
auto OrdCompare = [](const std::pair<int, int> &P1,
const std::pair<int, int> &P2) {
return P1.first > P2.first;
@@ -5430,12 +5904,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
// treats loading/storing it as an i8 struct. If we vectorize loads/stores
// from such a struct, we read/write packed bits disagreeing with the
// unvectorized version.
- SmallVector<Value *> PointerOps;
- OrdersType CurrentOrder;
TreeEntry *TE = nullptr;
- switch (canVectorizeLoads(VL, VL0, *TTI, *DL, *SE, *LI, *TLI,
- CurrentOrder, PointerOps)) {
- case LoadsState::Vectorize:
+ switch (State) {
+ case TreeEntry::Vectorize:
if (CurrentOrder.empty()) {
// Original loads are consecutive and does not require reordering.
TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
@@ -5450,7 +5921,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
TE->setOperandsInOrder();
break;
- case LoadsState::ScatterVectorize:
+ case TreeEntry::ScatterVectorize:
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
UserTreeIdx, ReuseShuffleIndicies);
@@ -5458,23 +5929,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
break;
- case LoadsState::Gather:
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
-#ifndef NDEBUG
- Type *ScalarTy = VL0->getType();
- if (DL->getTypeSizeInBits(ScalarTy) !=
- DL->getTypeAllocSizeInBits(ScalarTy))
- LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
- else if (any_of(VL, [](Value *V) {
- return !cast<LoadInst>(V)->isSimple();
- }))
- LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
- else
- LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
-#endif // NDEBUG
- break;
+ case TreeEntry::NeedToGather:
+ llvm_unreachable("Unexpected loads state.");
}
return;
}
@@ -5490,18 +5946,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case Instruction::Trunc:
case Instruction::FPTrunc:
case Instruction::BitCast: {
- Type *SrcTy = VL0->getOperand(0)->getType();
- for (Value *V : VL) {
- Type *Ty = cast<Instruction>(V)->getOperand(0)->getType();
- if (Ty != SrcTy || !isValidElementType(Ty)) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs()
- << "SLP: Gathering casts with different src types.\n");
- return;
- }
- }
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
@@ -5521,21 +5965,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
case Instruction::FCmp: {
// Check that all of the compares have the same predicate.
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
- CmpInst::Predicate SwapP0 = CmpInst::getSwappedPredicate(P0);
- Type *ComparedTy = VL0->getOperand(0)->getType();
- for (Value *V : VL) {
- CmpInst *Cmp = cast<CmpInst>(V);
- if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
- Cmp->getOperand(0)->getType() != ComparedTy) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs()
- << "SLP: Gathering cmp with different predicate.\n");
- return;
- }
- }
-
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
@@ -5544,7 +5973,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
- assert(P0 == SwapP0 && "Commutative Predicate mismatch");
+ assert(P0 == CmpInst::getSwappedPredicate(P0) &&
+ "Commutative Predicate mismatch");
reorderInputsAccordingToOpcode(VL, Left, Right, *TLI, *DL, *SE, *this);
} else {
// Collect operands - commute if it uses the swapped predicate.
@@ -5612,60 +6042,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
case Instruction::GetElementPtr: {
- // We don't combine GEPs with complicated (nested) indexing.
- for (Value *V : VL) {
- auto *I = dyn_cast<GetElementPtrInst>(V);
- if (!I)
- continue;
- if (I->getNumOperands() != 2) {
- LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- return;
- }
- }
-
- // We can't combine several GEPs into one vector if they operate on
- // different types.
- Type *Ty0 = cast<GEPOperator>(VL0)->getSourceElementType();
- for (Value *V : VL) {
- auto *GEP = dyn_cast<GEPOperator>(V);
- if (!GEP)
- continue;
- Type *CurTy = GEP->getSourceElementType();
- if (Ty0 != CurTy) {
- LLVM_DEBUG(dbgs()
- << "SLP: not-vectorizable GEP (different types).\n");
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- return;
- }
- }
-
- // We don't combine GEPs with non-constant indexes.
- Type *Ty1 = VL0->getOperand(1)->getType();
- for (Value *V : VL) {
- auto *I = dyn_cast<GetElementPtrInst>(V);
- if (!I)
- continue;
- auto *Op = I->getOperand(1);
- if ((!IsScatterVectorizeUserTE && !isa<ConstantInt>(Op)) ||
- (Op->getType() != Ty1 &&
- ((IsScatterVectorizeUserTE && !isa<ConstantInt>(Op)) ||
- Op->getType()->getScalarSizeInBits() >
- DL->getIndexSizeInBits(
- V->getType()->getPointerAddressSpace())))) {
- LLVM_DEBUG(dbgs()
- << "SLP: not-vectorizable GEP (non-constant indexes).\n");
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- return;
- }
- }
-
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
@@ -5722,78 +6098,29 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
}
case Instruction::Store: {
// Check if the stores are consecutive or if we need to swizzle them.
- llvm::Type *ScalarTy = cast<StoreInst>(VL0)->getValueOperand()->getType();
- // Avoid types that are padded when being allocated as scalars, while
- // being packed together in a vector (such as i1).
- if (DL->getTypeSizeInBits(ScalarTy) !=
- DL->getTypeAllocSizeInBits(ScalarTy)) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: Gathering stores of non-packed type.\n");
- return;
- }
- // Make sure all stores in the bundle are simple - we can't vectorize
- // atomic or volatile stores.
- SmallVector<Value *, 4> PointerOps(VL.size());
ValueList Operands(VL.size());
- auto POIter = PointerOps.begin();
- auto OIter = Operands.begin();
+ auto *OIter = Operands.begin();
for (Value *V : VL) {
auto *SI = cast<StoreInst>(V);
- if (!SI->isSimple()) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple stores.\n");
- return;
- }
- *POIter = SI->getPointerOperand();
*OIter = SI->getValueOperand();
- ++POIter;
++OIter;
}
-
- OrdersType CurrentOrder;
- // Check the order of pointer operands.
- if (llvm::sortPtrAccesses(PointerOps, ScalarTy, *DL, *SE, CurrentOrder)) {
- Value *Ptr0;
- Value *PtrN;
- if (CurrentOrder.empty()) {
- Ptr0 = PointerOps.front();
- PtrN = PointerOps.back();
- } else {
- Ptr0 = PointerOps[CurrentOrder.front()];
- PtrN = PointerOps[CurrentOrder.back()];
- }
- std::optional<int> Dist =
- getPointersDiff(ScalarTy, Ptr0, ScalarTy, PtrN, *DL, *SE);
- // Check that the sorted pointer operands are consecutive.
- if (static_cast<unsigned>(*Dist) == VL.size() - 1) {
- if (CurrentOrder.empty()) {
- // Original stores are consecutive and does not require reordering.
- TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,
- UserTreeIdx, ReuseShuffleIndicies);
- TE->setOperandsInOrder();
- buildTree_rec(Operands, Depth + 1, {TE, 0});
- LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
- } else {
- fixupOrderingIndices(CurrentOrder);
- TreeEntry *TE =
- newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies, CurrentOrder);
- TE->setOperandsInOrder();
- buildTree_rec(Operands, Depth + 1, {TE, 0});
- LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n");
- }
- return;
- }
+ // Check that the sorted pointer operands are consecutive.
+ if (CurrentOrder.empty()) {
+ // Original stores are consecutive and does not require reordering.
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies);
+ TE->setOperandsInOrder();
+ buildTree_rec(Operands, Depth + 1, {TE, 0});
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
+ } else {
+ fixupOrderingIndices(CurrentOrder);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndicies, CurrentOrder);
+ TE->setOperandsInOrder();
+ buildTree_rec(Operands, Depth + 1, {TE, 0});
+ LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n");
}
-
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
return;
}
case Instruction::Call: {
@@ -5802,68 +6129,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
CallInst *CI = cast<CallInst>(VL0);
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
- VFShape Shape = VFShape::get(
- *CI, ElementCount::getFixed(static_cast<unsigned int>(VL.size())),
- false /*HasGlobalPred*/);
- Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
-
- if (!VecFunc && !isTriviallyVectorizable(ID)) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
- return;
- }
- Function *F = CI->getCalledFunction();
- unsigned NumArgs = CI->arg_size();
- SmallVector<Value*, 4> ScalarArgs(NumArgs, nullptr);
- for (unsigned j = 0; j != NumArgs; ++j)
- if (isVectorIntrinsicWithScalarOpAtArg(ID, j))
- ScalarArgs[j] = CI->getArgOperand(j);
- for (Value *V : VL) {
- CallInst *CI2 = dyn_cast<CallInst>(V);
- if (!CI2 || CI2->getCalledFunction() != F ||
- getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
- (VecFunc &&
- VecFunc != VFDatabase(*CI2).getVectorizedFunction(Shape)) ||
- !CI->hasIdenticalOperandBundleSchema(*CI2)) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *V
- << "\n");
- return;
- }
- // Some intrinsics have scalar arguments and should be same in order for
- // them to be vectorized.
- for (unsigned j = 0; j != NumArgs; ++j) {
- if (isVectorIntrinsicWithScalarOpAtArg(ID, j)) {
- Value *A1J = CI2->getArgOperand(j);
- if (ScalarArgs[j] != A1J) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
- << " argument " << ScalarArgs[j] << "!=" << A1J
- << "\n");
- return;
- }
- }
- }
- // Verify that the bundle operands are identical between the two calls.
- if (CI->hasOperandBundles() &&
- !std::equal(CI->op_begin() + CI->getBundleOperandsStartIndex(),
- CI->op_begin() + CI->getBundleOperandsEndIndex(),
- CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:"
- << *CI << "!=" << *V << '\n');
- return;
- }
- }
-
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
TE->setOperandsInOrder();
@@ -5883,15 +6148,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
case Instruction::ShuffleVector: {
- // If this is not an alternate sequence of opcode like add-sub
- // then do not vectorize this instruction.
- if (!S.isAltShuffle()) {
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
- return;
- }
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
@@ -5949,19 +6205,16 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
default:
- BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
- ReuseShuffleIndicies);
- LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
- return;
+ break;
}
+ llvm_unreachable("Unexpected vectorization of the instructions.");
}
unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
unsigned N = 1;
Type *EltTy = T;
- while (isa<StructType, ArrayType, VectorType>(EltTy)) {
+ while (isa<StructType, ArrayType, FixedVectorType>(EltTy)) {
if (auto *ST = dyn_cast<StructType>(EltTy)) {
// Check that struct is homogeneous.
for (const auto *Ty : ST->elements())
@@ -5982,7 +6235,8 @@ unsigned BoUpSLP::canMapToVector(Type *T, const DataLayout &DL) const {
if (!isValidElementType(EltTy))
return 0;
uint64_t VTSize = DL.getTypeStoreSizeInBits(FixedVectorType::get(EltTy, N));
- if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize || VTSize != DL.getTypeStoreSizeInBits(T))
+ if (VTSize < MinVecRegSize || VTSize > MaxVecRegSize ||
+ VTSize != DL.getTypeStoreSizeInBits(T))
return 0;
return N;
}
@@ -6111,68 +6365,6 @@ getVectorCallCosts(CallInst *CI, FixedVectorType *VecTy,
return {IntrinsicCost, LibCost};
}
-/// Compute the cost of creating a vector of type \p VecTy containing the
-/// extracted values from \p VL.
-static InstructionCost
-computeExtractCost(ArrayRef<Value *> VL, FixedVectorType *VecTy,
- TargetTransformInfo::ShuffleKind ShuffleKind,
- ArrayRef<int> Mask, TargetTransformInfo &TTI) {
- unsigned NumOfParts = TTI.getNumberOfParts(VecTy);
-
- if (ShuffleKind != TargetTransformInfo::SK_PermuteSingleSrc || !NumOfParts ||
- VecTy->getNumElements() < NumOfParts)
- return TTI.getShuffleCost(ShuffleKind, VecTy, Mask);
-
- bool AllConsecutive = true;
- unsigned EltsPerVector = VecTy->getNumElements() / NumOfParts;
- unsigned Idx = -1;
- InstructionCost Cost = 0;
-
- // Process extracts in blocks of EltsPerVector to check if the source vector
- // operand can be re-used directly. If not, add the cost of creating a shuffle
- // to extract the values into a vector register.
- SmallVector<int> RegMask(EltsPerVector, UndefMaskElem);
- for (auto *V : VL) {
- ++Idx;
-
- // Reached the start of a new vector registers.
- if (Idx % EltsPerVector == 0) {
- RegMask.assign(EltsPerVector, UndefMaskElem);
- AllConsecutive = true;
- continue;
- }
-
- // Need to exclude undefs from analysis.
- if (isa<UndefValue>(V) || Mask[Idx] == UndefMaskElem)
- continue;
-
- // Check all extracts for a vector register on the target directly
- // extract values in order.
- unsigned CurrentIdx = *getExtractIndex(cast<Instruction>(V));
- if (!isa<UndefValue>(VL[Idx - 1]) && Mask[Idx - 1] != UndefMaskElem) {
- unsigned PrevIdx = *getExtractIndex(cast<Instruction>(VL[Idx - 1]));
- AllConsecutive &= PrevIdx + 1 == CurrentIdx &&
- CurrentIdx % EltsPerVector == Idx % EltsPerVector;
- RegMask[Idx % EltsPerVector] = CurrentIdx % EltsPerVector;
- }
-
- if (AllConsecutive)
- continue;
-
- // Skip all indices, except for the last index per vector block.
- if ((Idx + 1) % EltsPerVector != 0 && Idx + 1 != VL.size())
- continue;
-
- // If we have a series of extracts which are not consecutive and hence
- // cannot re-use the source vector register directly, compute the shuffle
- // cost to extract the vector with EltsPerVector elements.
- Cost += TTI.getShuffleCost(
- TargetTransformInfo::SK_PermuteSingleSrc,
- FixedVectorType::get(VecTy->getElementType(), EltsPerVector), RegMask);
- }
- return Cost;
-}
-
/// Build shuffle mask for shuffle graph entries and lists of main and alternate
/// operations operands.
static void
@@ -6183,7 +6375,7 @@ buildShuffleEntryMask(ArrayRef<Value *> VL, ArrayRef<unsigned> ReorderIndices,
SmallVectorImpl<Value *> *OpScalars = nullptr,
SmallVectorImpl<Value *> *AltScalars = nullptr) {
unsigned Sz = VL.size();
- Mask.assign(Sz, UndefMaskElem);
+ Mask.assign(Sz, PoisonMaskElem);
SmallVector<int> OrderMask;
if (!ReorderIndices.empty())
inversePermutation(ReorderIndices, OrderMask);
@@ -6203,9 +6395,9 @@ buildShuffleEntryMask(ArrayRef<Value *> VL, ArrayRef<unsigned> ReorderIndices,
}
}
if (!ReusesIndices.empty()) {
- SmallVector<int> NewMask(ReusesIndices.size(), UndefMaskElem);
+ SmallVector<int> NewMask(ReusesIndices.size(), PoisonMaskElem);
transform(ReusesIndices, NewMask.begin(), [&Mask](int Idx) {
- return Idx != UndefMaskElem ? Mask[Idx] : UndefMaskElem;
+ return Idx != PoisonMaskElem ? Mask[Idx] : PoisonMaskElem;
});
Mask.swap(NewMask);
}
@@ -6325,13 +6517,13 @@ protected:
static void combineMasks(unsigned LocalVF, SmallVectorImpl<int> &Mask,
ArrayRef<int> ExtMask) {
unsigned VF = Mask.size();
- SmallVector<int> NewMask(ExtMask.size(), UndefMaskElem);
+ SmallVector<int> NewMask(ExtMask.size(), PoisonMaskElem);
for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) {
- if (ExtMask[I] == UndefMaskElem)
+ if (ExtMask[I] == PoisonMaskElem)
continue;
int MaskedIdx = Mask[ExtMask[I] % VF];
NewMask[I] =
- MaskedIdx == UndefMaskElem ? UndefMaskElem : MaskedIdx % LocalVF;
+ MaskedIdx == PoisonMaskElem ? PoisonMaskElem : MaskedIdx % LocalVF;
}
Mask.swap(NewMask);
}
@@ -6418,11 +6610,12 @@ protected:
if (auto *SVOpTy =
dyn_cast<FixedVectorType>(SV->getOperand(0)->getType()))
LocalVF = SVOpTy->getNumElements();
- SmallVector<int> ExtMask(Mask.size(), UndefMaskElem);
+ SmallVector<int> ExtMask(Mask.size(), PoisonMaskElem);
for (auto [Idx, I] : enumerate(Mask)) {
- if (I == UndefMaskElem)
- continue;
- ExtMask[Idx] = SV->getMaskValue(I);
+ if (I == PoisonMaskElem ||
+ static_cast<unsigned>(I) >= SV->getShuffleMask().size())
+ continue;
+ ExtMask[Idx] = SV->getMaskValue(I);
}
bool IsOp1Undef =
isUndefVector(SV->getOperand(0),
@@ -6435,11 +6628,11 @@ protected:
if (!IsOp1Undef && !IsOp2Undef) {
// Update mask and mark undef elems.
for (int &I : Mask) {
- if (I == UndefMaskElem)
+ if (I == PoisonMaskElem)
continue;
if (SV->getMaskValue(I % SV->getShuffleMask().size()) ==
- UndefMaskElem)
- I = UndefMaskElem;
+ PoisonMaskElem)
+ I = PoisonMaskElem;
}
break;
}
@@ -6453,15 +6646,16 @@ protected:
Op = SV->getOperand(1);
}
if (auto *OpTy = dyn_cast<FixedVectorType>(Op->getType());
- !OpTy || !isIdentityMask(Mask, OpTy, SinglePermute)) {
+ !OpTy || !isIdentityMask(Mask, OpTy, SinglePermute) ||
+ ShuffleVectorInst::isZeroEltSplatMask(Mask)) {
if (IdentityOp) {
V = IdentityOp;
assert(Mask.size() == IdentityMask.size() &&
"Expected masks of same sizes.");
// Clear known poison elements.
for (auto [I, Idx] : enumerate(Mask))
- if (Idx == UndefMaskElem)
- IdentityMask[I] = UndefMaskElem;
+ if (Idx == PoisonMaskElem)
+ IdentityMask[I] = PoisonMaskElem;
Mask.swap(IdentityMask);
auto *Shuffle = dyn_cast<ShuffleVectorInst>(V);
return SinglePermute &&
@@ -6481,10 +6675,12 @@ protected:
/// Smart shuffle instruction emission, walks through shuffles trees and
/// tries to find the best matching vector for the actual shuffle
/// instruction.
- template <typename ShuffleBuilderTy>
- static Value *createShuffle(Value *V1, Value *V2, ArrayRef<int> Mask,
- ShuffleBuilderTy &Builder) {
+ template <typename T, typename ShuffleBuilderTy>
+ static T createShuffle(Value *V1, Value *V2, ArrayRef<int> Mask,
+ ShuffleBuilderTy &Builder) {
assert(V1 && "Expected at least one vector value.");
+ if (V2)
+ Builder.resizeToMatch(V1, V2);
int VF = Mask.size();
if (auto *FTy = dyn_cast<FixedVectorType>(V1->getType()))
VF = FTy->getNumElements();
@@ -6495,8 +6691,8 @@ protected:
Value *Op2 = V2;
int VF =
cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
- SmallVector<int> CombinedMask1(Mask.size(), UndefMaskElem);
- SmallVector<int> CombinedMask2(Mask.size(), UndefMaskElem);
+ SmallVector<int> CombinedMask1(Mask.size(), PoisonMaskElem);
+ SmallVector<int> CombinedMask2(Mask.size(), PoisonMaskElem);
for (int I = 0, E = Mask.size(); I < E; ++I) {
if (Mask[I] < VF)
CombinedMask1[I] = Mask[I];
@@ -6514,9 +6710,9 @@ protected:
// again.
if (auto *SV1 = dyn_cast<ShuffleVectorInst>(Op1))
if (auto *SV2 = dyn_cast<ShuffleVectorInst>(Op2)) {
- SmallVector<int> ExtMask1(Mask.size(), UndefMaskElem);
+ SmallVector<int> ExtMask1(Mask.size(), PoisonMaskElem);
for (auto [Idx, I] : enumerate(CombinedMask1)) {
- if (I == UndefMaskElem)
+ if (I == PoisonMaskElem)
continue;
ExtMask1[Idx] = SV1->getMaskValue(I);
}
@@ -6524,9 +6720,9 @@ protected:
cast<FixedVectorType>(SV1->getOperand(1)->getType())
->getNumElements(),
ExtMask1, UseMask::SecondArg);
- SmallVector<int> ExtMask2(CombinedMask2.size(), UndefMaskElem);
+ SmallVector<int> ExtMask2(CombinedMask2.size(), PoisonMaskElem);
for (auto [Idx, I] : enumerate(CombinedMask2)) {
- if (I == UndefMaskElem)
+ if (I == PoisonMaskElem)
continue;
ExtMask2[Idx] = SV2->getMaskValue(I);
}
@@ -6566,64 +6762,360 @@ protected:
->getElementCount()
.getKnownMinValue());
for (int I = 0, E = Mask.size(); I < E; ++I) {
- if (CombinedMask2[I] != UndefMaskElem) {
- assert(CombinedMask1[I] == UndefMaskElem &&
+ if (CombinedMask2[I] != PoisonMaskElem) {
+ assert(CombinedMask1[I] == PoisonMaskElem &&
"Expected undefined mask element");
CombinedMask1[I] = CombinedMask2[I] + (Op1 == Op2 ? 0 : VF);
}
}
+ const int Limit = CombinedMask1.size() * 2;
+ if (Op1 == Op2 && Limit == 2 * VF &&
+ all_of(CombinedMask1, [=](int Idx) { return Idx < Limit; }) &&
+ (ShuffleVectorInst::isIdentityMask(CombinedMask1) ||
+ (ShuffleVectorInst::isZeroEltSplatMask(CombinedMask1) &&
+ isa<ShuffleVectorInst>(Op1) &&
+ cast<ShuffleVectorInst>(Op1)->getShuffleMask() ==
+ ArrayRef(CombinedMask1))))
+ return Builder.createIdentity(Op1);
return Builder.createShuffleVector(
Op1, Op1 == Op2 ? PoisonValue::get(Op1->getType()) : Op2,
CombinedMask1);
}
if (isa<PoisonValue>(V1))
- return PoisonValue::get(FixedVectorType::get(
- cast<VectorType>(V1->getType())->getElementType(), Mask.size()));
+ return Builder.createPoison(
+ cast<VectorType>(V1->getType())->getElementType(), Mask.size());
SmallVector<int> NewMask(Mask.begin(), Mask.end());
bool IsIdentity = peekThroughShuffles(V1, NewMask, /*SinglePermute=*/true);
assert(V1 && "Expected non-null value after looking through shuffles.");
if (!IsIdentity)
return Builder.createShuffleVector(V1, NewMask);
- return V1;
+ return Builder.createIdentity(V1);
}
};
} // namespace
-InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
- ArrayRef<Value *> VectorizedVals) {
- ArrayRef<Value *> VL = E->Scalars;
+/// Merges shuffle masks and emits final shuffle instruction, if required. It
+/// supports shuffling of 2 input vectors. It implements lazy shuffles emission,
+/// when the actual shuffle instruction is generated only if this is actually
+/// required. Otherwise, the shuffle instruction emission is delayed till the
+/// end of the process, to reduce the number of emitted instructions and further
+/// analysis/transformations.
+class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
+ bool IsFinalized = false;
+ SmallVector<int> CommonMask;
+ SmallVector<PointerUnion<Value *, const TreeEntry *>, 2> InVectors;
+ const TargetTransformInfo &TTI;
+ InstructionCost Cost = 0;
+ ArrayRef<Value *> VectorizedVals;
+ BoUpSLP &R;
+ SmallPtrSetImpl<Value *> &CheckedExtracts;
+ constexpr static TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+
+ InstructionCost getBuildVectorCost(ArrayRef<Value *> VL, Value *Root) {
+ if ((!Root && allConstant(VL)) || all_of(VL, UndefValue::classof))
+ return TTI::TCC_Free;
+ auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
+ InstructionCost GatherCost = 0;
+ SmallVector<Value *> Gathers(VL.begin(), VL.end());
+ // Improve gather cost for gather of loads, if we can group some of the
+ // loads into vector loads.
+ InstructionsState S = getSameOpcode(VL, *R.TLI);
+ if (VL.size() > 2 && S.getOpcode() == Instruction::Load &&
+ !S.isAltShuffle() &&
+ !all_of(Gathers, [&](Value *V) { return R.getTreeEntry(V); }) &&
+ !isSplat(Gathers)) {
+ BoUpSLP::ValueSet VectorizedLoads;
+ unsigned StartIdx = 0;
+ unsigned VF = VL.size() / 2;
+ unsigned VectorizedCnt = 0;
+ unsigned ScatterVectorizeCnt = 0;
+ const unsigned Sz = R.DL->getTypeSizeInBits(S.MainOp->getType());
+ for (unsigned MinVF = R.getMinVF(2 * Sz); VF >= MinVF; VF /= 2) {
+ for (unsigned Cnt = StartIdx, End = VL.size(); Cnt + VF <= End;
+ Cnt += VF) {
+ ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
+ if (!VectorizedLoads.count(Slice.front()) &&
+ !VectorizedLoads.count(Slice.back()) && allSameBlock(Slice)) {
+ SmallVector<Value *> PointerOps;
+ OrdersType CurrentOrder;
+ LoadsState LS =
+ canVectorizeLoads(Slice, Slice.front(), TTI, *R.DL, *R.SE,
+ *R.LI, *R.TLI, CurrentOrder, PointerOps);
+ switch (LS) {
+ case LoadsState::Vectorize:
+ case LoadsState::ScatterVectorize:
+ // Mark the vectorized loads so that we don't vectorize them
+ // again.
+ if (LS == LoadsState::Vectorize)
+ ++VectorizedCnt;
+ else
+ ++ScatterVectorizeCnt;
+ VectorizedLoads.insert(Slice.begin(), Slice.end());
+ // If we vectorized initial block, no need to try to vectorize
+ // it again.
+ if (Cnt == StartIdx)
+ StartIdx += VF;
+ break;
+ case LoadsState::Gather:
+ break;
+ }
+ }
+ }
+ // Check if the whole array was vectorized already - exit.
+ if (StartIdx >= VL.size())
+ break;
+ // Found vectorizable parts - exit.
+ if (!VectorizedLoads.empty())
+ break;
+ }
+ if (!VectorizedLoads.empty()) {
+ unsigned NumParts = TTI.getNumberOfParts(VecTy);
+ bool NeedInsertSubvectorAnalysis =
+ !NumParts || (VL.size() / VF) > NumParts;
+ // Get the cost for gathered loads.
+ for (unsigned I = 0, End = VL.size(); I < End; I += VF) {
+ if (VectorizedLoads.contains(VL[I]))
+ continue;
+ GatherCost += getBuildVectorCost(VL.slice(I, VF), Root);
+ }
+ // Exclude potentially vectorized loads from list of gathered
+ // scalars.
+ auto *LI = cast<LoadInst>(S.MainOp);
+ Gathers.assign(Gathers.size(), PoisonValue::get(LI->getType()));
+ // The cost for vectorized loads.
+ InstructionCost ScalarsCost = 0;
+ for (Value *V : VectorizedLoads) {
+ auto *LI = cast<LoadInst>(V);
+ ScalarsCost +=
+ TTI.getMemoryOpCost(Instruction::Load, LI->getType(),
+ LI->getAlign(), LI->getPointerAddressSpace(),
+ CostKind, TTI::OperandValueInfo(), LI);
+ }
+ auto *LoadTy = FixedVectorType::get(LI->getType(), VF);
+ Align Alignment = LI->getAlign();
+ GatherCost +=
+ VectorizedCnt *
+ TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment,
+ LI->getPointerAddressSpace(), CostKind,
+ TTI::OperandValueInfo(), LI);
+ GatherCost += ScatterVectorizeCnt *
+ TTI.getGatherScatterOpCost(
+ Instruction::Load, LoadTy, LI->getPointerOperand(),
+ /*VariableMask=*/false, Alignment, CostKind, LI);
+ if (NeedInsertSubvectorAnalysis) {
+ // Add the cost for the subvectors insert.
+ for (int I = VF, E = VL.size(); I < E; I += VF)
+ GatherCost += TTI.getShuffleCost(TTI::SK_InsertSubvector, VecTy,
+ std::nullopt, CostKind, I, LoadTy);
+ }
+ GatherCost -= ScalarsCost;
+ }
+ } else if (!Root && isSplat(VL)) {
+ // Found the broadcasting of the single scalar, calculate the cost as
+ // the broadcast.
+ const auto *It =
+ find_if(VL, [](Value *V) { return !isa<UndefValue>(V); });
+ assert(It != VL.end() && "Expected at least one non-undef value.");
+ // Add broadcast for non-identity shuffle only.
+ bool NeedShuffle =
+ count(VL, *It) > 1 &&
+ (VL.front() != *It || !all_of(VL.drop_front(), UndefValue::classof));
+ InstructionCost InsertCost = TTI.getVectorInstrCost(
+ Instruction::InsertElement, VecTy, CostKind,
+ NeedShuffle ? 0 : std::distance(VL.begin(), It),
+ PoisonValue::get(VecTy), *It);
+ return InsertCost +
+ (NeedShuffle ? TTI.getShuffleCost(
+ TargetTransformInfo::SK_Broadcast, VecTy,
+ /*Mask=*/std::nullopt, CostKind, /*Index=*/0,
+ /*SubTp=*/nullptr, /*Args=*/*It)
+ : TTI::TCC_Free);
+ }
+ return GatherCost +
+ (all_of(Gathers, UndefValue::classof)
+ ? TTI::TCC_Free
+ : R.getGatherCost(Gathers, !Root && VL.equals(Gathers)));
+ };
- Type *ScalarTy = VL[0]->getType();
- if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
- ScalarTy = SI->getValueOperand()->getType();
- else if (CmpInst *CI = dyn_cast<CmpInst>(VL[0]))
- ScalarTy = CI->getOperand(0)->getType();
- else if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
- ScalarTy = IE->getOperand(1)->getType();
- auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ /// Compute the cost of creating a vector of type \p VecTy containing the
+ /// extracted values from \p VL.
+ InstructionCost computeExtractCost(ArrayRef<Value *> VL, ArrayRef<int> Mask,
+ TTI::ShuffleKind ShuffleKind) {
+ auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
+ unsigned NumOfParts = TTI.getNumberOfParts(VecTy);
- // If we have computed a smaller type for the expression, update VecTy so
- // that the costs will be accurate.
- if (MinBWs.count(VL[0]))
- VecTy = FixedVectorType::get(
- IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
- unsigned EntryVF = E->getVectorFactor();
- auto *FinalVecTy = FixedVectorType::get(VecTy->getElementType(), EntryVF);
+ if (ShuffleKind != TargetTransformInfo::SK_PermuteSingleSrc ||
+ !NumOfParts || VecTy->getNumElements() < NumOfParts)
+ return TTI.getShuffleCost(ShuffleKind, VecTy, Mask);
- bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
- // FIXME: it tries to fix a problem with MSVC buildbots.
- TargetTransformInfo *TTI = this->TTI;
- auto AdjustExtractsCost = [=](InstructionCost &Cost) {
+ bool AllConsecutive = true;
+ unsigned EltsPerVector = VecTy->getNumElements() / NumOfParts;
+ unsigned Idx = -1;
+ InstructionCost Cost = 0;
+
+ // Process extracts in blocks of EltsPerVector to check if the source vector
+ // operand can be re-used directly. If not, add the cost of creating a
+ // shuffle to extract the values into a vector register.
+ SmallVector<int> RegMask(EltsPerVector, PoisonMaskElem);
+ for (auto *V : VL) {
+ ++Idx;
+
+ // Reached the start of a new vector registers.
+ if (Idx % EltsPerVector == 0) {
+ RegMask.assign(EltsPerVector, PoisonMaskElem);
+ AllConsecutive = true;
+ continue;
+ }
+
+ // Need to exclude undefs from analysis.
+ if (isa<UndefValue>(V) || Mask[Idx] == PoisonMaskElem)
+ continue;
+
+ // Check all extracts for a vector register on the target directly
+ // extract values in order.
+ unsigned CurrentIdx = *getExtractIndex(cast<Instruction>(V));
+ if (!isa<UndefValue>(VL[Idx - 1]) && Mask[Idx - 1] != PoisonMaskElem) {
+ unsigned PrevIdx = *getExtractIndex(cast<Instruction>(VL[Idx - 1]));
+ AllConsecutive &= PrevIdx + 1 == CurrentIdx &&
+ CurrentIdx % EltsPerVector == Idx % EltsPerVector;
+ RegMask[Idx % EltsPerVector] = CurrentIdx % EltsPerVector;
+ }
+
+ if (AllConsecutive)
+ continue;
+
+ // Skip all indices, except for the last index per vector block.
+ if ((Idx + 1) % EltsPerVector != 0 && Idx + 1 != VL.size())
+ continue;
+
+ // If we have a series of extracts which are not consecutive and hence
+ // cannot re-use the source vector register directly, compute the shuffle
+ // cost to extract the vector with EltsPerVector elements.
+ Cost += TTI.getShuffleCost(
+ TargetTransformInfo::SK_PermuteSingleSrc,
+ FixedVectorType::get(VecTy->getElementType(), EltsPerVector),
+ RegMask);
+ }
+ return Cost;
+ }
+
+ class ShuffleCostBuilder {
+ const TargetTransformInfo &TTI;
+
+ static bool isEmptyOrIdentity(ArrayRef<int> Mask, unsigned VF) {
+ int Limit = 2 * VF;
+ return Mask.empty() ||
+ (VF == Mask.size() &&
+ all_of(Mask, [Limit](int Idx) { return Idx < Limit; }) &&
+ ShuffleVectorInst::isIdentityMask(Mask));
+ }
+
+ public:
+ ShuffleCostBuilder(const TargetTransformInfo &TTI) : TTI(TTI) {}
+ ~ShuffleCostBuilder() = default;
+ InstructionCost createShuffleVector(Value *V1, Value *,
+ ArrayRef<int> Mask) const {
+ // Empty mask or identity mask are free.
+ unsigned VF =
+ cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
+ if (isEmptyOrIdentity(Mask, VF))
+ return TTI::TCC_Free;
+ return TTI.getShuffleCost(
+ TTI::SK_PermuteTwoSrc,
+ FixedVectorType::get(
+ cast<VectorType>(V1->getType())->getElementType(), Mask.size()),
+ Mask);
+ }
+ InstructionCost createShuffleVector(Value *V1, ArrayRef<int> Mask) const {
+ // Empty mask or identity mask are free.
+ if (isEmptyOrIdentity(Mask, Mask.size()))
+ return TTI::TCC_Free;
+ return TTI.getShuffleCost(
+ TTI::SK_PermuteSingleSrc,
+ FixedVectorType::get(
+ cast<VectorType>(V1->getType())->getElementType(), Mask.size()),
+ Mask);
+ }
+ InstructionCost createIdentity(Value *) const { return TTI::TCC_Free; }
+ InstructionCost createPoison(Type *Ty, unsigned VF) const {
+ return TTI::TCC_Free;
+ }
+ void resizeToMatch(Value *&, Value *&) const {}
+ };
+
+ /// Smart shuffle instruction emission, walks through shuffles trees and
+ /// tries to find the best matching vector for the actual shuffle
+ /// instruction.
+ InstructionCost
+ createShuffle(const PointerUnion<Value *, const TreeEntry *> &P1,
+ const PointerUnion<Value *, const TreeEntry *> &P2,
+ ArrayRef<int> Mask) {
+ ShuffleCostBuilder Builder(TTI);
+ Value *V1 = P1.dyn_cast<Value *>(), *V2 = P2.dyn_cast<Value *>();
+ unsigned CommonVF = 0;
+ if (!V1) {
+ const TreeEntry *E = P1.get<const TreeEntry *>();
+ unsigned VF = E->getVectorFactor();
+ if (V2) {
+ unsigned V2VF = cast<FixedVectorType>(V2->getType())->getNumElements();
+ if (V2VF != VF && V2VF == E->Scalars.size())
+ VF = E->Scalars.size();
+ } else if (!P2.isNull()) {
+ const TreeEntry *E2 = P2.get<const TreeEntry *>();
+ if (E->Scalars.size() == E2->Scalars.size())
+ CommonVF = VF = E->Scalars.size();
+ } else {
+ // P2 is empty, check that we have same node + reshuffle (if any).
+ if (E->Scalars.size() == Mask.size() && VF != Mask.size()) {
+ VF = E->Scalars.size();
+ SmallVector<int> CommonMask(Mask.begin(), Mask.end());
+ ::addMask(CommonMask, E->getCommonMask());
+ V1 = Constant::getNullValue(
+ FixedVectorType::get(E->Scalars.front()->getType(), VF));
+ return BaseShuffleAnalysis::createShuffle<InstructionCost>(
+ V1, nullptr, CommonMask, Builder);
+ }
+ }
+ V1 = Constant::getNullValue(
+ FixedVectorType::get(E->Scalars.front()->getType(), VF));
+ }
+ if (!V2 && !P2.isNull()) {
+ const TreeEntry *E = P2.get<const TreeEntry *>();
+ unsigned VF = E->getVectorFactor();
+ unsigned V1VF = cast<FixedVectorType>(V1->getType())->getNumElements();
+ if (!CommonVF && V1VF == E->Scalars.size())
+ CommonVF = E->Scalars.size();
+ if (CommonVF)
+ VF = CommonVF;
+ V2 = Constant::getNullValue(
+ FixedVectorType::get(E->Scalars.front()->getType(), VF));
+ }
+ return BaseShuffleAnalysis::createShuffle<InstructionCost>(V1, V2, Mask,
+ Builder);
+ }
+
+public:
+ ShuffleCostEstimator(TargetTransformInfo &TTI,
+ ArrayRef<Value *> VectorizedVals, BoUpSLP &R,
+ SmallPtrSetImpl<Value *> &CheckedExtracts)
+ : TTI(TTI), VectorizedVals(VectorizedVals), R(R),
+ CheckedExtracts(CheckedExtracts) {}
+ Value *adjustExtracts(const TreeEntry *E, ArrayRef<int> Mask,
+ TTI::ShuffleKind ShuffleKind) {
+ if (Mask.empty())
+ return nullptr;
+ Value *VecBase = nullptr;
+ ArrayRef<Value *> VL = E->Scalars;
+ auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
// If the resulting type is scalarized, do not adjust the cost.
- unsigned VecNumParts = TTI->getNumberOfParts(VecTy);
+ unsigned VecNumParts = TTI.getNumberOfParts(VecTy);
if (VecNumParts == VecTy->getNumElements())
- return;
+ return nullptr;
DenseMap<Value *, int> ExtractVectorsTys;
- SmallPtrSet<Value *, 4> CheckedExtracts;
- for (auto *V : VL) {
- if (isa<UndefValue>(V))
+ for (auto [I, V] : enumerate(VL)) {
+ // Ignore non-extractelement scalars.
+ if (isa<UndefValue>(V) || (!Mask.empty() && Mask[I] == PoisonMaskElem))
continue;
// If all users of instruction are going to be vectorized and this
// instruction itself is not going to be vectorized, consider this
@@ -6631,17 +7123,18 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
// vectorized tree.
// Also, avoid adjusting the cost for extractelements with multiple uses
// in different graph entries.
- const TreeEntry *VE = getTreeEntry(V);
+ const TreeEntry *VE = R.getTreeEntry(V);
if (!CheckedExtracts.insert(V).second ||
- !areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
+ !R.areAllUsersVectorized(cast<Instruction>(V), VectorizedVals) ||
(VE && VE != E))
continue;
auto *EE = cast<ExtractElementInst>(V);
+ VecBase = EE->getVectorOperand();
std::optional<unsigned> EEIdx = getExtractIndex(EE);
if (!EEIdx)
continue;
unsigned Idx = *EEIdx;
- if (VecNumParts != TTI->getNumberOfParts(EE->getVectorOperandType())) {
+ if (VecNumParts != TTI.getNumberOfParts(EE->getVectorOperandType())) {
auto It =
ExtractVectorsTys.try_emplace(EE->getVectorOperand(), Idx).first;
It->getSecond() = std::min<int>(It->second, Idx);
@@ -6654,18 +7147,17 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
})) {
// Use getExtractWithExtendCost() to calculate the cost of
// extractelement/ext pair.
- Cost -=
- TTI->getExtractWithExtendCost(Ext->getOpcode(), Ext->getType(),
- EE->getVectorOperandType(), Idx);
+ Cost -= TTI.getExtractWithExtendCost(Ext->getOpcode(), Ext->getType(),
+ EE->getVectorOperandType(), Idx);
// Add back the cost of s|zext which is subtracted separately.
- Cost += TTI->getCastInstrCost(
+ Cost += TTI.getCastInstrCost(
Ext->getOpcode(), Ext->getType(), EE->getType(),
TTI::getCastContextHint(Ext), CostKind, Ext);
continue;
}
}
- Cost -= TTI->getVectorInstrCost(*EE, EE->getVectorOperandType(), CostKind,
- Idx);
+ Cost -= TTI.getVectorInstrCost(*EE, EE->getVectorOperandType(), CostKind,
+ Idx);
}
// Add a cost for subvector extracts/inserts if required.
for (const auto &Data : ExtractVectorsTys) {
@@ -6673,34 +7165,148 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
unsigned NumElts = VecTy->getNumElements();
if (Data.second % NumElts == 0)
continue;
- if (TTI->getNumberOfParts(EEVTy) > VecNumParts) {
+ if (TTI.getNumberOfParts(EEVTy) > VecNumParts) {
unsigned Idx = (Data.second / NumElts) * NumElts;
unsigned EENumElts = EEVTy->getNumElements();
+ if (Idx % NumElts == 0)
+ continue;
if (Idx + NumElts <= EENumElts) {
- Cost +=
- TTI->getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
- EEVTy, std::nullopt, CostKind, Idx, VecTy);
+ Cost += TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
+ EEVTy, std::nullopt, CostKind, Idx, VecTy);
} else {
// Need to round up the subvector type vectorization factor to avoid a
// crash in cost model functions. Make SubVT so that Idx + VF of SubVT
// <= EENumElts.
auto *SubVT =
FixedVectorType::get(VecTy->getElementType(), EENumElts - Idx);
- Cost +=
- TTI->getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
- EEVTy, std::nullopt, CostKind, Idx, SubVT);
+ Cost += TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
+ EEVTy, std::nullopt, CostKind, Idx, SubVT);
}
} else {
- Cost += TTI->getShuffleCost(TargetTransformInfo::SK_InsertSubvector,
- VecTy, std::nullopt, CostKind, 0, EEVTy);
+ Cost += TTI.getShuffleCost(TargetTransformInfo::SK_InsertSubvector,
+ VecTy, std::nullopt, CostKind, 0, EEVTy);
}
}
- };
+ // Check that gather of extractelements can be represented as just a
+ // shuffle of a single/two vectors the scalars are extracted from.
+ // Found the bunch of extractelement instructions that must be gathered
+ // into a vector and can be represented as a permutation elements in a
+ // single input vector or of 2 input vectors.
+ Cost += computeExtractCost(VL, Mask, ShuffleKind);
+ return VecBase;
+ }
+ void add(const TreeEntry *E1, const TreeEntry *E2, ArrayRef<int> Mask) {
+ CommonMask.assign(Mask.begin(), Mask.end());
+ InVectors.assign({E1, E2});
+ }
+ void add(const TreeEntry *E1, ArrayRef<int> Mask) {
+ CommonMask.assign(Mask.begin(), Mask.end());
+ InVectors.assign(1, E1);
+ }
+ /// Adds another one input vector and the mask for the shuffling.
+ void add(Value *V1, ArrayRef<int> Mask) {
+ assert(CommonMask.empty() && InVectors.empty() &&
+ "Expected empty input mask/vectors.");
+ CommonMask.assign(Mask.begin(), Mask.end());
+ InVectors.assign(1, V1);
+ }
+ Value *gather(ArrayRef<Value *> VL, Value *Root = nullptr) {
+ Cost += getBuildVectorCost(VL, Root);
+ if (!Root) {
+ assert(InVectors.empty() && "Unexpected input vectors for buildvector.");
+ // FIXME: Need to find a way to avoid use of getNullValue here.
+ SmallVector<Constant *> Vals;
+ for (Value *V : VL) {
+ if (isa<UndefValue>(V)) {
+ Vals.push_back(cast<Constant>(V));
+ continue;
+ }
+ Vals.push_back(Constant::getNullValue(V->getType()));
+ }
+ return ConstantVector::get(Vals);
+ }
+ return ConstantVector::getSplat(
+ ElementCount::getFixed(VL.size()),
+ Constant::getNullValue(VL.front()->getType()));
+ }
+ /// Finalize emission of the shuffles.
+ InstructionCost
+ finalize(ArrayRef<int> ExtMask, unsigned VF = 0,
+ function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
+ IsFinalized = true;
+ if (Action) {
+ const PointerUnion<Value *, const TreeEntry *> &Vec = InVectors.front();
+ if (InVectors.size() == 2) {
+ Cost += createShuffle(Vec, InVectors.back(), CommonMask);
+ InVectors.pop_back();
+ } else {
+ Cost += createShuffle(Vec, nullptr, CommonMask);
+ }
+ for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
+ if (CommonMask[Idx] != PoisonMaskElem)
+ CommonMask[Idx] = Idx;
+ assert(VF > 0 &&
+ "Expected vector length for the final value before action.");
+ Value *V = Vec.dyn_cast<Value *>();
+ if (!Vec.isNull() && !V)
+ V = Constant::getNullValue(FixedVectorType::get(
+ Vec.get<const TreeEntry *>()->Scalars.front()->getType(),
+ CommonMask.size()));
+ Action(V, CommonMask);
+ }
+ ::addMask(CommonMask, ExtMask, /*ExtendingManyInputs=*/true);
+ if (CommonMask.empty())
+ return Cost;
+ int Limit = CommonMask.size() * 2;
+ if (all_of(CommonMask, [=](int Idx) { return Idx < Limit; }) &&
+ ShuffleVectorInst::isIdentityMask(CommonMask))
+ return Cost;
+ return Cost +
+ createShuffle(InVectors.front(),
+ InVectors.size() == 2 ? InVectors.back() : nullptr,
+ CommonMask);
+ }
+
+ ~ShuffleCostEstimator() {
+ assert((IsFinalized || CommonMask.empty()) &&
+ "Shuffle construction must be finalized.");
+ }
+};
+
+InstructionCost
+BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
+ SmallPtrSetImpl<Value *> &CheckedExtracts) {
+ ArrayRef<Value *> VL = E->Scalars;
+
+ Type *ScalarTy = VL[0]->getType();
+ if (auto *SI = dyn_cast<StoreInst>(VL[0]))
+ ScalarTy = SI->getValueOperand()->getType();
+ else if (auto *CI = dyn_cast<CmpInst>(VL[0]))
+ ScalarTy = CI->getOperand(0)->getType();
+ else if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
+ ScalarTy = IE->getOperand(1)->getType();
+ auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+
+ // If we have computed a smaller type for the expression, update VecTy so
+ // that the costs will be accurate.
+ if (MinBWs.count(VL[0]))
+ VecTy = FixedVectorType::get(
+ IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
+ unsigned EntryVF = E->getVectorFactor();
+ auto *FinalVecTy = FixedVectorType::get(VecTy->getElementType(), EntryVF);
+
+ bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
if (E->State == TreeEntry::NeedToGather) {
if (allConstant(VL))
return 0;
if (isa<InsertElementInst>(VL[0]))
return InstructionCost::getInvalid();
+ ShuffleCostEstimator Estimator(*TTI, VectorizedVals, *this,
+ CheckedExtracts);
+ unsigned VF = E->getVectorFactor();
+ SmallVector<int> ReuseShuffleIndicies(E->ReuseShuffleIndices.begin(),
+ E->ReuseShuffleIndices.end());
SmallVector<Value *> GatheredScalars(E->Scalars.begin(), E->Scalars.end());
// Build a mask out of the reorder indices and reorder scalars per this
// mask.
@@ -6709,195 +7315,104 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
if (!ReorderMask.empty())
reorderScalars(GatheredScalars, ReorderMask);
SmallVector<int> Mask;
+ SmallVector<int> ExtractMask;
+ std::optional<TargetTransformInfo::ShuffleKind> ExtractShuffle;
std::optional<TargetTransformInfo::ShuffleKind> GatherShuffle;
SmallVector<const TreeEntry *> Entries;
+ Type *ScalarTy = GatheredScalars.front()->getType();
+ // Check for gathered extracts.
+ ExtractShuffle = tryToGatherExtractElements(GatheredScalars, ExtractMask);
+ SmallVector<Value *> IgnoredVals;
+ if (UserIgnoreList)
+ IgnoredVals.assign(UserIgnoreList->begin(), UserIgnoreList->end());
+
+ bool Resized = false;
+ if (Value *VecBase = Estimator.adjustExtracts(
+ E, ExtractMask, ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc)))
+ if (auto *VecBaseTy = dyn_cast<FixedVectorType>(VecBase->getType()))
+ if (VF == VecBaseTy->getNumElements() && GatheredScalars.size() != VF) {
+ Resized = true;
+ GatheredScalars.append(VF - GatheredScalars.size(),
+ PoisonValue::get(ScalarTy));
+ }
+
// Do not try to look for reshuffled loads for gathered loads (they will be
// handled later), for vectorized scalars, and cases, which are definitely
// not profitable (splats and small gather nodes.)
- if (E->getOpcode() != Instruction::Load || E->isAltShuffle() ||
+ if (ExtractShuffle || E->getOpcode() != Instruction::Load ||
+ E->isAltShuffle() ||
all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) ||
isSplat(E->Scalars) ||
(E->Scalars != GatheredScalars && GatheredScalars.size() <= 2))
GatherShuffle = isGatherShuffledEntry(E, GatheredScalars, Mask, Entries);
if (GatherShuffle) {
- // Remove shuffled elements from list of gathers.
- for (int I = 0, Sz = Mask.size(); I < Sz; ++I) {
- if (Mask[I] != UndefMaskElem)
- GatheredScalars[I] = PoisonValue::get(ScalarTy);
- }
assert((Entries.size() == 1 || Entries.size() == 2) &&
"Expected shuffle of 1 or 2 entries.");
- InstructionCost GatherCost = 0;
- int Limit = Mask.size() * 2;
- if (all_of(Mask, [=](int Idx) { return Idx < Limit; }) &&
- ShuffleVectorInst::isIdentityMask(Mask)) {
+ if (*GatherShuffle == TTI::SK_PermuteSingleSrc &&
+ Entries.front()->isSame(E->Scalars)) {
// Perfect match in the graph, will reuse the previously vectorized
// node. Cost is 0.
LLVM_DEBUG(
dbgs()
<< "SLP: perfect diamond match for gather bundle that starts with "
<< *VL.front() << ".\n");
- if (NeedToShuffleReuses)
- GatherCost =
- TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
- FinalVecTy, E->ReuseShuffleIndices);
- } else {
- LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size()
- << " entries for bundle that starts with "
- << *VL.front() << ".\n");
- // Detected that instead of gather we can emit a shuffle of single/two
- // previously vectorized nodes. Add the cost of the permutation rather
- // than gather.
- ::addMask(Mask, E->ReuseShuffleIndices);
- GatherCost = TTI->getShuffleCost(*GatherShuffle, FinalVecTy, Mask);
- }
- if (!all_of(GatheredScalars, UndefValue::classof))
- GatherCost += getGatherCost(GatheredScalars);
- return GatherCost;
- }
- if ((E->getOpcode() == Instruction::ExtractElement ||
- all_of(E->Scalars,
- [](Value *V) {
- return isa<ExtractElementInst, UndefValue>(V);
- })) &&
- allSameType(VL)) {
- // Check that gather of extractelements can be represented as just a
- // shuffle of a single/two vectors the scalars are extracted from.
- SmallVector<int> Mask;
- std::optional<TargetTransformInfo::ShuffleKind> ShuffleKind =
- isFixedVectorShuffle(VL, Mask);
- if (ShuffleKind) {
- // Found the bunch of extractelement instructions that must be gathered
- // into a vector and can be represented as a permutation elements in a
- // single input vector or of 2 input vectors.
- InstructionCost Cost =
- computeExtractCost(VL, VecTy, *ShuffleKind, Mask, *TTI);
- AdjustExtractsCost(Cost);
- if (NeedToShuffleReuses)
- Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
- FinalVecTy, E->ReuseShuffleIndices);
- return Cost;
- }
- }
- if (isSplat(VL)) {
- // Found the broadcasting of the single scalar, calculate the cost as the
- // broadcast.
- assert(VecTy == FinalVecTy &&
- "No reused scalars expected for broadcast.");
- const auto *It =
- find_if(VL, [](Value *V) { return !isa<UndefValue>(V); });
- // If all values are undefs - consider cost free.
- if (It == VL.end())
- return TTI::TCC_Free;
- // Add broadcast for non-identity shuffle only.
- bool NeedShuffle =
- VL.front() != *It || !all_of(VL.drop_front(), UndefValue::classof);
- InstructionCost InsertCost =
- TTI->getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind,
- /*Index=*/0, PoisonValue::get(VecTy), *It);
- return InsertCost + (NeedShuffle
- ? TTI->getShuffleCost(
- TargetTransformInfo::SK_Broadcast, VecTy,
- /*Mask=*/std::nullopt, CostKind,
- /*Index=*/0,
- /*SubTp=*/nullptr, /*Args=*/VL[0])
- : TTI::TCC_Free);
- }
- InstructionCost ReuseShuffleCost = 0;
- if (NeedToShuffleReuses)
- ReuseShuffleCost = TTI->getShuffleCost(
- TTI::SK_PermuteSingleSrc, FinalVecTy, E->ReuseShuffleIndices);
- // Improve gather cost for gather of loads, if we can group some of the
- // loads into vector loads.
- if (VL.size() > 2 && E->getOpcode() == Instruction::Load &&
- !E->isAltShuffle()) {
- BoUpSLP::ValueSet VectorizedLoads;
- unsigned StartIdx = 0;
- unsigned VF = VL.size() / 2;
- unsigned VectorizedCnt = 0;
- unsigned ScatterVectorizeCnt = 0;
- const unsigned Sz = DL->getTypeSizeInBits(E->getMainOp()->getType());
- for (unsigned MinVF = getMinVF(2 * Sz); VF >= MinVF; VF /= 2) {
- for (unsigned Cnt = StartIdx, End = VL.size(); Cnt + VF <= End;
- Cnt += VF) {
- ArrayRef<Value *> Slice = VL.slice(Cnt, VF);
- if (!VectorizedLoads.count(Slice.front()) &&
- !VectorizedLoads.count(Slice.back()) && allSameBlock(Slice)) {
- SmallVector<Value *> PointerOps;
- OrdersType CurrentOrder;
- LoadsState LS =
- canVectorizeLoads(Slice, Slice.front(), *TTI, *DL, *SE, *LI,
- *TLI, CurrentOrder, PointerOps);
- switch (LS) {
- case LoadsState::Vectorize:
- case LoadsState::ScatterVectorize:
- // Mark the vectorized loads so that we don't vectorize them
- // again.
- if (LS == LoadsState::Vectorize)
- ++VectorizedCnt;
- else
- ++ScatterVectorizeCnt;
- VectorizedLoads.insert(Slice.begin(), Slice.end());
- // If we vectorized initial block, no need to try to vectorize it
- // again.
- if (Cnt == StartIdx)
- StartIdx += VF;
- break;
- case LoadsState::Gather:
- break;
- }
+ // Restore the mask for previous partially matched values.
+ for (auto [I, V] : enumerate(E->Scalars)) {
+ if (isa<PoisonValue>(V)) {
+ Mask[I] = PoisonMaskElem;
+ continue;
}
+ if (Mask[I] == PoisonMaskElem)
+ Mask[I] = Entries.front()->findLaneForValue(V);
}
- // Check if the whole array was vectorized already - exit.
- if (StartIdx >= VL.size())
- break;
- // Found vectorizable parts - exit.
- if (!VectorizedLoads.empty())
- break;
+ Estimator.add(Entries.front(), Mask);
+ return Estimator.finalize(E->ReuseShuffleIndices);
}
- if (!VectorizedLoads.empty()) {
- InstructionCost GatherCost = 0;
- unsigned NumParts = TTI->getNumberOfParts(VecTy);
- bool NeedInsertSubvectorAnalysis =
- !NumParts || (VL.size() / VF) > NumParts;
- // Get the cost for gathered loads.
- for (unsigned I = 0, End = VL.size(); I < End; I += VF) {
- if (VectorizedLoads.contains(VL[I]))
- continue;
- GatherCost += getGatherCost(VL.slice(I, VF));
- }
- // The cost for vectorized loads.
- InstructionCost ScalarsCost = 0;
- for (Value *V : VectorizedLoads) {
- auto *LI = cast<LoadInst>(V);
- ScalarsCost +=
- TTI->getMemoryOpCost(Instruction::Load, LI->getType(),
- LI->getAlign(), LI->getPointerAddressSpace(),
- CostKind, TTI::OperandValueInfo(), LI);
- }
- auto *LI = cast<LoadInst>(E->getMainOp());
- auto *LoadTy = FixedVectorType::get(LI->getType(), VF);
- Align Alignment = LI->getAlign();
- GatherCost +=
- VectorizedCnt *
- TTI->getMemoryOpCost(Instruction::Load, LoadTy, Alignment,
- LI->getPointerAddressSpace(), CostKind,
- TTI::OperandValueInfo(), LI);
- GatherCost += ScatterVectorizeCnt *
- TTI->getGatherScatterOpCost(
- Instruction::Load, LoadTy, LI->getPointerOperand(),
- /*VariableMask=*/false, Alignment, CostKind, LI);
- if (NeedInsertSubvectorAnalysis) {
- // Add the cost for the subvectors insert.
- for (int I = VF, E = VL.size(); I < E; I += VF)
- GatherCost +=
- TTI->getShuffleCost(TTI::SK_InsertSubvector, VecTy,
- std::nullopt, CostKind, I, LoadTy);
- }
- return ReuseShuffleCost + GatherCost - ScalarsCost;
+ if (!Resized) {
+ unsigned VF1 = Entries.front()->getVectorFactor();
+ unsigned VF2 = Entries.back()->getVectorFactor();
+ if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF)
+ GatheredScalars.append(VF - GatheredScalars.size(),
+ PoisonValue::get(ScalarTy));
}
+ // Remove shuffled elements from list of gathers.
+ for (int I = 0, Sz = Mask.size(); I < Sz; ++I) {
+ if (Mask[I] != PoisonMaskElem)
+ GatheredScalars[I] = PoisonValue::get(ScalarTy);
+ }
+ LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size()
+ << " entries for bundle that starts with "
+ << *VL.front() << ".\n";);
+ if (Entries.size() == 1)
+ Estimator.add(Entries.front(), Mask);
+ else
+ Estimator.add(Entries.front(), Entries.back(), Mask);
+ if (all_of(GatheredScalars, PoisonValue ::classof))
+ return Estimator.finalize(E->ReuseShuffleIndices);
+ return Estimator.finalize(
+ E->ReuseShuffleIndices, E->Scalars.size(),
+ [&](Value *&Vec, SmallVectorImpl<int> &Mask) {
+ Vec = Estimator.gather(GatheredScalars,
+ Constant::getNullValue(FixedVectorType::get(
+ GatheredScalars.front()->getType(),
+ GatheredScalars.size())));
+ });
}
- return ReuseShuffleCost + getGatherCost(VL);
+ if (!all_of(GatheredScalars, PoisonValue::classof)) {
+ auto Gathers = ArrayRef(GatheredScalars).take_front(VL.size());
+ bool SameGathers = VL.equals(Gathers);
+ Value *BV = Estimator.gather(
+ Gathers, SameGathers ? nullptr
+ : Constant::getNullValue(FixedVectorType::get(
+ GatheredScalars.front()->getType(),
+ GatheredScalars.size())));
+ SmallVector<int> ReuseMask(Gathers.size(), PoisonMaskElem);
+ std::iota(ReuseMask.begin(), ReuseMask.end(), 0);
+ Estimator.add(BV, ReuseMask);
+ }
+ if (ExtractShuffle)
+ Estimator.add(E, std::nullopt);
+ return Estimator.finalize(E->ReuseShuffleIndices);
}
InstructionCost CommonCost = 0;
SmallVector<int> Mask;
@@ -6945,48 +7460,89 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
}
InstructionCost VecCost = VectorCost(CommonCost);
- LLVM_DEBUG(
- dumpTreeCosts(E, CommonCost, VecCost - CommonCost, ScalarCost));
- // Disable warnings for `this` and `E` are unused. Required for
- // `dumpTreeCosts`.
- (void)this;
- (void)E;
+ LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost - CommonCost,
+ ScalarCost, "Calculated costs for Tree"));
return VecCost - ScalarCost;
};
// Calculate cost difference from vectorizing set of GEPs.
// Negative value means vectorizing is profitable.
auto GetGEPCostDiff = [=](ArrayRef<Value *> Ptrs, Value *BasePtr) {
- InstructionCost CostSavings = 0;
- for (Value *V : Ptrs) {
- if (V == BasePtr)
- continue;
- auto *Ptr = dyn_cast<GetElementPtrInst>(V);
- // GEPs may contain just addresses without instructions, considered free.
- // GEPs with all constant indices also considered to have zero cost.
- if (!Ptr || Ptr->hasAllConstantIndices())
- continue;
-
- // Here we differentiate two cases: when GEPs represent a regular
- // vectorization tree node (and hence vectorized) and when the set is
- // arguments of a set of loads or stores being vectorized. In the former
- // case all the scalar GEPs will be removed as a result of vectorization.
+ InstructionCost ScalarCost = 0;
+ InstructionCost VecCost = 0;
+ // Here we differentiate two cases: (1) when Ptrs represent a regular
+ // vectorization tree node (as they are pointer arguments of scattered
+ // loads) or (2) when Ptrs are the arguments of loads or stores being
+ // vectorized as plane wide unit-stride load/store since all the
+ // loads/stores are known to be from/to adjacent locations.
+ assert(E->State == TreeEntry::Vectorize &&
+ "Entry state expected to be Vectorize here.");
+ if (isa<LoadInst, StoreInst>(VL0)) {
+ // Case 2: estimate costs for pointer related costs when vectorizing to
+ // a wide load/store.
+ // Scalar cost is estimated as a set of pointers with known relationship
+ // between them.
+ // For vector code we will use BasePtr as argument for the wide load/store
+ // but we also need to account all the instructions which are going to
+ // stay in vectorized code due to uses outside of these scalar
+ // loads/stores.
+ ScalarCost = TTI->getPointersChainCost(
+ Ptrs, BasePtr, TTI::PointersChainInfo::getUnitStride(), ScalarTy,
+ CostKind);
+
+ SmallVector<const Value *> PtrsRetainedInVecCode;
+ for (Value *V : Ptrs) {
+ if (V == BasePtr) {
+ PtrsRetainedInVecCode.push_back(V);
+ continue;
+ }
+ auto *Ptr = dyn_cast<GetElementPtrInst>(V);
+ // For simplicity assume Ptr to stay in vectorized code if it's not a
+ // GEP instruction. We don't care since it's cost considered free.
+ // TODO: We should check for any uses outside of vectorizable tree
+ // rather than just single use.
+ if (!Ptr || !Ptr->hasOneUse())
+ PtrsRetainedInVecCode.push_back(V);
+ }
+
+ if (PtrsRetainedInVecCode.size() == Ptrs.size()) {
+ // If all pointers stay in vectorized code then we don't have
+ // any savings on that.
+ LLVM_DEBUG(dumpTreeCosts(E, 0, ScalarCost, ScalarCost,
+ "Calculated GEPs cost for Tree"));
+ return InstructionCost{TTI::TCC_Free};
+ }
+ VecCost = TTI->getPointersChainCost(
+ PtrsRetainedInVecCode, BasePtr,
+ TTI::PointersChainInfo::getKnownStride(), VecTy, CostKind);
+ } else {
+ // Case 1: Ptrs are the arguments of loads that we are going to transform
+ // into masked gather load intrinsic.
+ // All the scalar GEPs will be removed as a result of vectorization.
// For any external uses of some lanes extract element instructions will
- // be generated (which cost is estimated separately). For the latter case
- // since the set of GEPs itself is not vectorized those used more than
- // once will remain staying in vectorized code as well. So we should not
- // count them as savings.
- if (!Ptr->hasOneUse() && isa<LoadInst, StoreInst>(VL0))
- continue;
-
- // TODO: it is target dependent, so need to implement and then use a TTI
- // interface.
- CostSavings += TTI->getArithmeticInstrCost(Instruction::Add,
- Ptr->getType(), CostKind);
- }
- LLVM_DEBUG(dbgs() << "SLP: Calculated GEPs cost savings or Tree:\n";
- E->dump());
- LLVM_DEBUG(dbgs() << "SLP: GEP cost saving = " << CostSavings << "\n");
- return InstructionCost() - CostSavings;
+ // be generated (which cost is estimated separately).
+ TTI::PointersChainInfo PtrsInfo =
+ all_of(Ptrs,
+ [](const Value *V) {
+ auto *Ptr = dyn_cast<GetElementPtrInst>(V);
+ return Ptr && !Ptr->hasAllConstantIndices();
+ })
+ ? TTI::PointersChainInfo::getUnknownStride()
+ : TTI::PointersChainInfo::getKnownStride();
+
+ ScalarCost = TTI->getPointersChainCost(Ptrs, BasePtr, PtrsInfo, ScalarTy,
+ CostKind);
+ if (auto *BaseGEP = dyn_cast<GEPOperator>(BasePtr)) {
+ SmallVector<const Value *> Indices(BaseGEP->indices());
+ VecCost = TTI->getGEPCost(BaseGEP->getSourceElementType(),
+ BaseGEP->getPointerOperand(), Indices, VecTy,
+ CostKind);
+ }
+ }
+
+ LLVM_DEBUG(dumpTreeCosts(E, 0, VecCost, ScalarCost,
+ "Calculated GEPs cost for Tree"));
+
+ return VecCost - ScalarCost;
};
switch (ShuffleOrOp) {
@@ -7062,7 +7618,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
unsigned NumOfParts = TTI->getNumberOfParts(SrcVecTy);
- SmallVector<int> InsertMask(NumElts, UndefMaskElem);
+ SmallVector<int> InsertMask(NumElts, PoisonMaskElem);
unsigned OffsetBeg = *getInsertIndex(VL.front());
unsigned OffsetEnd = OffsetBeg;
InsertMask[OffsetBeg] = 0;
@@ -7099,13 +7655,13 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
SmallVector<int> Mask;
if (!E->ReorderIndices.empty()) {
inversePermutation(E->ReorderIndices, Mask);
- Mask.append(InsertVecSz - Mask.size(), UndefMaskElem);
+ Mask.append(InsertVecSz - Mask.size(), PoisonMaskElem);
} else {
- Mask.assign(VecSz, UndefMaskElem);
+ Mask.assign(VecSz, PoisonMaskElem);
std::iota(Mask.begin(), std::next(Mask.begin(), InsertVecSz), 0);
}
bool IsIdentity = true;
- SmallVector<int> PrevMask(InsertVecSz, UndefMaskElem);
+ SmallVector<int> PrevMask(InsertVecSz, PoisonMaskElem);
Mask.swap(PrevMask);
for (unsigned I = 0; I < NumScalars; ++I) {
unsigned InsertIdx = *getInsertIndex(VL[PrevMask[I]]);
@@ -7148,14 +7704,14 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
InsertVecTy);
} else {
for (unsigned I = 0, End = OffsetBeg - Offset; I < End; ++I)
- Mask[I] = InMask.test(I) ? UndefMaskElem : I;
+ Mask[I] = InMask.test(I) ? PoisonMaskElem : I;
for (unsigned I = OffsetBeg - Offset, End = OffsetEnd - Offset;
I <= End; ++I)
- if (Mask[I] != UndefMaskElem)
+ if (Mask[I] != PoisonMaskElem)
Mask[I] = I + VecSz;
for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I)
Mask[I] =
- ((I >= InMask.size()) || InMask.test(I)) ? UndefMaskElem : I;
+ ((I >= InMask.size()) || InMask.test(I)) ? PoisonMaskElem : I;
Cost += TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, InsertVecTy, Mask);
}
}
@@ -7422,11 +7978,11 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
VecCost +=
TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy, CostKind);
} else if (auto *CI0 = dyn_cast<CmpInst>(VL0)) {
- VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
- Builder.getInt1Ty(),
+ auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(), VL.size());
+ VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
CI0->getPredicate(), CostKind, VL0);
VecCost += TTI->getCmpSelInstrCost(
- E->getOpcode(), ScalarTy, Builder.getInt1Ty(),
+ E->getOpcode(), VecTy, MaskTy,
cast<CmpInst>(E->getAltOp())->getPredicate(), CostKind,
E->getAltOp());
} else {
@@ -7615,7 +8171,7 @@ InstructionCost BoUpSLP::getSpillCost() const {
unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
InstructionCost Cost = 0;
- SmallPtrSet<Instruction*, 4> LiveValues;
+ SmallPtrSet<Instruction *, 4> LiveValues;
Instruction *PrevInst = nullptr;
// The entries in VectorizableTree are not necessarily ordered by their
@@ -7626,6 +8182,8 @@ InstructionCost BoUpSLP::getSpillCost() const {
// are grouped together. Using dominance ensures a deterministic order.
SmallVector<Instruction *, 16> OrderedScalars;
for (const auto &TEPtr : VectorizableTree) {
+ if (TEPtr->State != TreeEntry::Vectorize)
+ continue;
Instruction *Inst = dyn_cast<Instruction>(TEPtr->Scalars[0]);
if (!Inst)
continue;
@@ -7639,7 +8197,7 @@ InstructionCost BoUpSLP::getSpillCost() const {
assert((NodeA == NodeB) == (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
"Different nodes should have different DFS numbers");
if (NodeA != NodeB)
- return NodeA->getDFSNumIn() < NodeB->getDFSNumIn();
+ return NodeA->getDFSNumIn() > NodeB->getDFSNumIn();
return B->comesBefore(A);
});
@@ -7698,7 +8256,7 @@ InstructionCost BoUpSLP::getSpillCost() const {
};
// Debug information does not impact spill cost.
- if (isa<CallInst>(&*PrevInstIt) && !NoCallIntrinsic(&*PrevInstIt) &&
+ if (isa<CallBase>(&*PrevInstIt) && !NoCallIntrinsic(&*PrevInstIt) &&
&*PrevInstIt != PrevInst)
NumCalls++;
@@ -7706,7 +8264,7 @@ InstructionCost BoUpSLP::getSpillCost() const {
}
if (NumCalls) {
- SmallVector<Type*, 4> V;
+ SmallVector<Type *, 4> V;
for (auto *II : LiveValues) {
auto *ScalarTy = II->getType();
if (auto *VectorTy = dyn_cast<FixedVectorType>(ScalarTy))
@@ -7797,8 +8355,8 @@ static T *performExtractsShuffleAction(
ResizeAction(ShuffleMask.begin()->first, Mask, /*ForSingleMask=*/false);
SmallBitVector IsBasePoison = isUndefVector<true>(Base, UseMask);
for (unsigned Idx = 0, VF = Mask.size(); Idx < VF; ++Idx) {
- if (Mask[Idx] == UndefMaskElem)
- Mask[Idx] = IsBasePoison.test(Idx) ? UndefMaskElem : Idx;
+ if (Mask[Idx] == PoisonMaskElem)
+ Mask[Idx] = IsBasePoison.test(Idx) ? PoisonMaskElem : Idx;
else
Mask[Idx] = (Res.second ? Idx : Mask[Idx]) + VF;
}
@@ -7827,8 +8385,8 @@ static T *performExtractsShuffleAction(
// can shuffle them directly.
ArrayRef<int> SecMask = VMIt->second;
for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
- if (SecMask[I] != UndefMaskElem) {
- assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars.");
+ if (SecMask[I] != PoisonMaskElem) {
+ assert(Mask[I] == PoisonMaskElem && "Multiple uses of scalars.");
Mask[I] = SecMask[I] + Vec1VF;
}
}
@@ -7841,12 +8399,12 @@ static T *performExtractsShuffleAction(
ResizeAction(VMIt->first, VMIt->second, /*ForSingleMask=*/false);
ArrayRef<int> SecMask = VMIt->second;
for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
- if (Mask[I] != UndefMaskElem) {
- assert(SecMask[I] == UndefMaskElem && "Multiple uses of scalars.");
+ if (Mask[I] != PoisonMaskElem) {
+ assert(SecMask[I] == PoisonMaskElem && "Multiple uses of scalars.");
if (Res1.second)
Mask[I] = I;
- } else if (SecMask[I] != UndefMaskElem) {
- assert(Mask[I] == UndefMaskElem && "Multiple uses of scalars.");
+ } else if (SecMask[I] != PoisonMaskElem) {
+ assert(Mask[I] == PoisonMaskElem && "Multiple uses of scalars.");
Mask[I] = (Res2.second ? I : SecMask[I]) + VF;
}
}
@@ -7863,11 +8421,11 @@ static T *performExtractsShuffleAction(
ResizeAction(VMIt->first, VMIt->second, /*ForSingleMask=*/false);
ArrayRef<int> SecMask = VMIt->second;
for (unsigned I = 0, VF = Mask.size(); I < VF; ++I) {
- if (SecMask[I] != UndefMaskElem) {
- assert((Mask[I] == UndefMaskElem || IsBaseNotUndef) &&
+ if (SecMask[I] != PoisonMaskElem) {
+ assert((Mask[I] == PoisonMaskElem || IsBaseNotUndef) &&
"Multiple uses of scalars.");
Mask[I] = (Res.second ? I : SecMask[I]) + VF;
- } else if (Mask[I] != UndefMaskElem) {
+ } else if (Mask[I] != PoisonMaskElem) {
Mask[I] = I;
}
}
@@ -7877,12 +8435,23 @@ static T *performExtractsShuffleAction(
}
InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
+ // Build a map for gathered scalars to the nodes where they are used.
+ ValueToGatherNodes.clear();
+ for (const std::unique_ptr<TreeEntry> &EntryPtr : VectorizableTree) {
+ if (EntryPtr->State != TreeEntry::NeedToGather)
+ continue;
+ for (Value *V : EntryPtr->Scalars)
+ if (!isConstant(V))
+ ValueToGatherNodes.try_emplace(V).first->getSecond().insert(
+ EntryPtr.get());
+ }
InstructionCost Cost = 0;
LLVM_DEBUG(dbgs() << "SLP: Calculating cost for tree of size "
<< VectorizableTree.size() << ".\n");
unsigned BundleWidth = VectorizableTree[0]->Scalars.size();
+ SmallPtrSet<Value *, 4> CheckedExtracts;
for (unsigned I = 0, E = VectorizableTree.size(); I < E; ++I) {
TreeEntry &TE = *VectorizableTree[I];
if (TE.State == TreeEntry::NeedToGather) {
@@ -7898,7 +8467,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
}
}
- InstructionCost C = getEntryCost(&TE, VectorizedVals);
+ InstructionCost C = getEntryCost(&TE, VectorizedVals, CheckedExtracts);
Cost += C;
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
<< " for bundle that starts with " << *TE.Scalars[0]
@@ -7951,7 +8520,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
(void)ShuffleMasks.emplace_back();
SmallVectorImpl<int> &Mask = ShuffleMasks.back()[ScalarTE];
if (Mask.empty())
- Mask.assign(FTy->getNumElements(), UndefMaskElem);
+ Mask.assign(FTy->getNumElements(), PoisonMaskElem);
// Find the insertvector, vectorized in tree, if any.
Value *Base = VU;
while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
@@ -7965,7 +8534,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
do {
IEBase = cast<InsertElementInst>(Base);
int Idx = *getInsertIndex(IEBase);
- assert(Mask[Idx] == UndefMaskElem &&
+ assert(Mask[Idx] == PoisonMaskElem &&
"InsertElementInstruction used already.");
Mask[Idx] = Idx;
Base = IEBase->getOperand(0);
@@ -7985,7 +8554,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
int InIdx = *InsertIdx;
SmallVectorImpl<int> &Mask = ShuffleMasks[VecId][ScalarTE];
if (Mask.empty())
- Mask.assign(FTy->getNumElements(), UndefMaskElem);
+ Mask.assign(FTy->getNumElements(), PoisonMaskElem);
Mask[InIdx] = EU.Lane;
DemandedElts[VecId].setBit(InIdx);
continue;
@@ -8024,7 +8593,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
(all_of(Mask,
[VF](int Idx) { return Idx < 2 * static_cast<int>(VF); }) &&
!ShuffleVectorInst::isIdentityMask(Mask)))) {
- SmallVector<int> OrigMask(VecVF, UndefMaskElem);
+ SmallVector<int> OrigMask(VecVF, PoisonMaskElem);
std::copy(Mask.begin(), std::next(Mask.begin(), std::min(VF, VecVF)),
OrigMask.begin());
C = TTI->getShuffleCost(
@@ -8110,17 +8679,23 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
// No need to check for the topmost gather node.
if (TE == VectorizableTree.front().get())
return std::nullopt;
- Mask.assign(VL.size(), UndefMaskElem);
+ Mask.assign(VL.size(), PoisonMaskElem);
assert(TE->UserTreeIndices.size() == 1 &&
"Expected only single user of the gather node.");
// TODO: currently checking only for Scalars in the tree entry, need to count
// reused elements too for better cost estimation.
Instruction &UserInst =
getLastInstructionInBundle(TE->UserTreeIndices.front().UserTE);
- auto *PHI = dyn_cast<PHINode>(&UserInst);
- auto *NodeUI = DT->getNode(
- PHI ? PHI->getIncomingBlock(TE->UserTreeIndices.front().EdgeIdx)
- : UserInst.getParent());
+ BasicBlock *ParentBB = nullptr;
+ // Main node of PHI entries keeps the correct order of operands/incoming
+ // blocks.
+ if (auto *PHI =
+ dyn_cast<PHINode>(TE->UserTreeIndices.front().UserTE->getMainOp())) {
+ ParentBB = PHI->getIncomingBlock(TE->UserTreeIndices.front().EdgeIdx);
+ } else {
+ ParentBB = UserInst.getParent();
+ }
+ auto *NodeUI = DT->getNode(ParentBB);
assert(NodeUI && "Should only process reachable instructions");
SmallPtrSet<Value *, 4> GatheredScalars(VL.begin(), VL.end());
auto CheckOrdering = [&](Instruction *LastEI) {
@@ -8147,45 +8722,6 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
return false;
return true;
};
- // Build a lists of values to tree entries.
- DenseMap<Value *, SmallPtrSet<const TreeEntry *, 4>> ValueToTEs;
- for (const std::unique_ptr<TreeEntry> &EntryPtr : VectorizableTree) {
- if (EntryPtr.get() == TE)
- continue;
- if (EntryPtr->State != TreeEntry::NeedToGather)
- continue;
- if (!any_of(EntryPtr->Scalars, [&GatheredScalars](Value *V) {
- return GatheredScalars.contains(V);
- }))
- continue;
- assert(EntryPtr->UserTreeIndices.size() == 1 &&
- "Expected only single user of the gather node.");
- Instruction &EntryUserInst =
- getLastInstructionInBundle(EntryPtr->UserTreeIndices.front().UserTE);
- if (&UserInst == &EntryUserInst) {
- // If 2 gathers are operands of the same entry, compare operands indices,
- // use the earlier one as the base.
- if (TE->UserTreeIndices.front().UserTE ==
- EntryPtr->UserTreeIndices.front().UserTE &&
- TE->UserTreeIndices.front().EdgeIdx <
- EntryPtr->UserTreeIndices.front().EdgeIdx)
- continue;
- }
- // Check if the user node of the TE comes after user node of EntryPtr,
- // otherwise EntryPtr depends on TE.
- auto *EntryPHI = dyn_cast<PHINode>(&EntryUserInst);
- auto *EntryI =
- EntryPHI
- ? EntryPHI
- ->getIncomingBlock(EntryPtr->UserTreeIndices.front().EdgeIdx)
- ->getTerminator()
- : &EntryUserInst;
- if (!CheckOrdering(EntryI))
- continue;
- for (Value *V : EntryPtr->Scalars)
- if (!isConstant(V))
- ValueToTEs.try_emplace(V).first->getSecond().insert(EntryPtr.get());
- }
// Find all tree entries used by the gathered values. If no common entries
// found - not a shuffle.
// Here we build a set of tree nodes for each gathered value and trying to
@@ -8195,16 +8731,58 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
// have a permutation of 2 input vectors.
SmallVector<SmallPtrSet<const TreeEntry *, 4>> UsedTEs;
DenseMap<Value *, int> UsedValuesEntry;
- for (Value *V : TE->Scalars) {
+ for (Value *V : VL) {
if (isConstant(V))
continue;
// Build a list of tree entries where V is used.
SmallPtrSet<const TreeEntry *, 4> VToTEs;
- auto It = ValueToTEs.find(V);
- if (It != ValueToTEs.end())
- VToTEs = It->second;
- if (const TreeEntry *VTE = getTreeEntry(V))
+ for (const TreeEntry *TEPtr : ValueToGatherNodes.find(V)->second) {
+ if (TEPtr == TE)
+ continue;
+ assert(any_of(TEPtr->Scalars,
+ [&](Value *V) { return GatheredScalars.contains(V); }) &&
+ "Must contain at least single gathered value.");
+ assert(TEPtr->UserTreeIndices.size() == 1 &&
+ "Expected only single user of the gather node.");
+ PHINode *EntryPHI =
+ dyn_cast<PHINode>(TEPtr->UserTreeIndices.front().UserTE->getMainOp());
+ Instruction *EntryUserInst =
+ EntryPHI ? nullptr
+ : &getLastInstructionInBundle(
+ TEPtr->UserTreeIndices.front().UserTE);
+ if (&UserInst == EntryUserInst) {
+ assert(!EntryPHI && "Unexpected phi node entry.");
+ // If 2 gathers are operands of the same entry, compare operands
+ // indices, use the earlier one as the base.
+ if (TE->UserTreeIndices.front().UserTE ==
+ TEPtr->UserTreeIndices.front().UserTE &&
+ TE->UserTreeIndices.front().EdgeIdx <
+ TEPtr->UserTreeIndices.front().EdgeIdx)
+ continue;
+ }
+ // Check if the user node of the TE comes after user node of EntryPtr,
+ // otherwise EntryPtr depends on TE.
+ auto *EntryI =
+ EntryPHI
+ ? EntryPHI
+ ->getIncomingBlock(TEPtr->UserTreeIndices.front().EdgeIdx)
+ ->getTerminator()
+ : EntryUserInst;
+ if ((ParentBB != EntryI->getParent() ||
+ TE->UserTreeIndices.front().EdgeIdx <
+ TEPtr->UserTreeIndices.front().EdgeIdx ||
+ TE->UserTreeIndices.front().UserTE !=
+ TEPtr->UserTreeIndices.front().UserTE) &&
+ !CheckOrdering(EntryI))
+ continue;
+ VToTEs.insert(TEPtr);
+ }
+ if (const TreeEntry *VTE = getTreeEntry(V)) {
+ Instruction &EntryUserInst = getLastInstructionInBundle(VTE);
+ if (&EntryUserInst == &UserInst || !CheckOrdering(&EntryUserInst))
+ continue;
VToTEs.insert(VTE);
+ }
if (VToTEs.empty())
continue;
if (UsedTEs.empty()) {
@@ -8260,13 +8838,13 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
auto *It = find_if(FirstEntries, [=](const TreeEntry *EntryPtr) {
return EntryPtr->isSame(VL) || EntryPtr->isSame(TE->Scalars);
});
- if (It != FirstEntries.end()) {
+ if (It != FirstEntries.end() && (*It)->getVectorFactor() == VL.size()) {
Entries.push_back(*It);
std::iota(Mask.begin(), Mask.end(), 0);
// Clear undef scalars.
for (int I = 0, Sz = VL.size(); I < Sz; ++I)
- if (isa<PoisonValue>(TE->Scalars[I]))
- Mask[I] = UndefMaskElem;
+ if (isa<PoisonValue>(VL[I]))
+ Mask[I] = PoisonMaskElem;
return TargetTransformInfo::SK_PermuteSingleSrc;
}
// No perfect match, just shuffle, so choose the first tree node from the
@@ -8302,10 +8880,18 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
break;
}
}
- // No 2 source vectors with the same vector factor - give up and do regular
- // gather.
- if (Entries.empty())
- return std::nullopt;
+ // No 2 source vectors with the same vector factor - just choose 2 with max
+ // index.
+ if (Entries.empty()) {
+ Entries.push_back(
+ *std::max_element(UsedTEs.front().begin(), UsedTEs.front().end(),
+ [](const TreeEntry *TE1, const TreeEntry *TE2) {
+ return TE1->Idx < TE2->Idx;
+ }));
+ Entries.push_back(SecondEntries.front());
+ VF = std::max(Entries.front()->getVectorFactor(),
+ Entries.back()->getVectorFactor());
+ }
}
bool IsSplatOrUndefs = isSplat(VL) || all_of(VL, UndefValue::classof);
@@ -8427,19 +9013,8 @@ BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, ArrayRef<Value *> VL,
return std::nullopt;
}
-InstructionCost BoUpSLP::getGatherCost(FixedVectorType *Ty,
- const APInt &ShuffledIndices,
- bool NeedToShuffle) const {
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- InstructionCost Cost =
- TTI->getScalarizationOverhead(Ty, ~ShuffledIndices, /*Insert*/ true,
- /*Extract*/ false, CostKind);
- if (NeedToShuffle)
- Cost += TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, Ty);
- return Cost;
-}
-
-InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
+InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL,
+ bool ForPoisonSrc) const {
// Find the type of the operands in VL.
Type *ScalarTy = VL[0]->getType();
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
@@ -8451,20 +9026,36 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
// shuffle candidates.
APInt ShuffledElements = APInt::getZero(VL.size());
DenseSet<Value *> UniqueElements;
- // Iterate in reverse order to consider insert elements with the high cost.
- for (unsigned I = VL.size(); I > 0; --I) {
- unsigned Idx = I - 1;
+ constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
+ InstructionCost Cost;
+ auto EstimateInsertCost = [&](unsigned I, Value *V) {
+ if (!ForPoisonSrc)
+ Cost +=
+ TTI->getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind,
+ I, Constant::getNullValue(VecTy), V);
+ };
+ for (unsigned I = 0, E = VL.size(); I < E; ++I) {
+ Value *V = VL[I];
// No need to shuffle duplicates for constants.
- if (isConstant(VL[Idx])) {
- ShuffledElements.setBit(Idx);
+ if ((ForPoisonSrc && isConstant(V)) || isa<UndefValue>(V)) {
+ ShuffledElements.setBit(I);
continue;
}
- if (!UniqueElements.insert(VL[Idx]).second) {
+ if (!UniqueElements.insert(V).second) {
DuplicateNonConst = true;
- ShuffledElements.setBit(Idx);
+ ShuffledElements.setBit(I);
+ continue;
}
+ EstimateInsertCost(I, V);
}
- return getGatherCost(VecTy, ShuffledElements, DuplicateNonConst);
+ if (ForPoisonSrc)
+ Cost =
+ TTI->getScalarizationOverhead(VecTy, ~ShuffledElements, /*Insert*/ true,
+ /*Extract*/ false, CostKind);
+ if (DuplicateNonConst)
+ Cost +=
+ TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, VecTy);
+ return Cost;
}
// Perform operand reordering on the instructions in VL and return the reordered
@@ -8483,6 +9074,9 @@ void BoUpSLP::reorderInputsAccordingToOpcode(
}
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
+ auto &Res = EntryToLastInstruction.FindAndConstruct(E);
+ if (Res.second)
+ return *Res.second;
// Get the basic block this bundle is in. All instructions in the bundle
// should be in this block (except for extractelement-like instructions with
// constant indeces).
@@ -8497,7 +9091,7 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
isVectorLikeInstWithConstOps(I);
}));
- auto &&FindLastInst = [E, Front, this, &BB]() {
+ auto FindLastInst = [&]() {
Instruction *LastInst = Front;
for (Value *V : E->Scalars) {
auto *I = dyn_cast<Instruction>(V);
@@ -8508,9 +9102,11 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
LastInst = I;
continue;
}
- assert(isVectorLikeInstWithConstOps(LastInst) &&
- isVectorLikeInstWithConstOps(I) &&
- "Expected vector-like insts only.");
+ assert(((E->getOpcode() == Instruction::GetElementPtr &&
+ !isa<GetElementPtrInst>(I)) ||
+ (isVectorLikeInstWithConstOps(LastInst) &&
+ isVectorLikeInstWithConstOps(I))) &&
+ "Expected vector-like or non-GEP in GEP node insts only.");
if (!DT->isReachableFromEntry(LastInst->getParent())) {
LastInst = I;
continue;
@@ -8531,7 +9127,7 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
return LastInst;
};
- auto &&FindFirstInst = [E, Front, this]() {
+ auto FindFirstInst = [&]() {
Instruction *FirstInst = Front;
for (Value *V : E->Scalars) {
auto *I = dyn_cast<Instruction>(V);
@@ -8542,9 +9138,11 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
FirstInst = I;
continue;
}
- assert(isVectorLikeInstWithConstOps(FirstInst) &&
- isVectorLikeInstWithConstOps(I) &&
- "Expected vector-like insts only.");
+ assert(((E->getOpcode() == Instruction::GetElementPtr &&
+ !isa<GetElementPtrInst>(I)) ||
+ (isVectorLikeInstWithConstOps(FirstInst) &&
+ isVectorLikeInstWithConstOps(I))) &&
+ "Expected vector-like or non-GEP in GEP node insts only.");
if (!DT->isReachableFromEntry(FirstInst->getParent())) {
FirstInst = I;
continue;
@@ -8566,22 +9164,23 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
// Set the insert point to the beginning of the basic block if the entry
// should not be scheduled.
- if (E->State != TreeEntry::NeedToGather &&
- (doesNotNeedToSchedule(E->Scalars) ||
+ if (doesNotNeedToSchedule(E->Scalars) ||
+ (E->State != TreeEntry::NeedToGather &&
all_of(E->Scalars, isVectorLikeInstWithConstOps))) {
- Instruction *InsertInst;
- if (all_of(E->Scalars, [](Value *V) {
+ if ((E->getOpcode() == Instruction::GetElementPtr &&
+ any_of(E->Scalars,
+ [](Value *V) {
+ return !isa<GetElementPtrInst>(V) && isa<Instruction>(V);
+ })) ||
+ all_of(E->Scalars, [](Value *V) {
return !isVectorLikeInstWithConstOps(V) && isUsedOutsideBlock(V);
}))
- InsertInst = FindLastInst();
+ Res.second = FindLastInst();
else
- InsertInst = FindFirstInst();
- return *InsertInst;
+ Res.second = FindFirstInst();
+ return *Res.second;
}
- // The last instruction in the bundle in program order.
- Instruction *LastInst = nullptr;
-
// Find the last instruction. The common case should be that BB has been
// scheduled, and the last instruction is VL.back(). So we start with
// VL.back() and iterate over schedule data until we reach the end of the
@@ -8594,7 +9193,7 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
if (Bundle && Bundle->isPartOfBundle())
for (; Bundle; Bundle = Bundle->NextInBundle)
if (Bundle->OpValue == Bundle->Inst)
- LastInst = Bundle->Inst;
+ Res.second = Bundle->Inst;
}
// LastInst can still be null at this point if there's either not an entry
@@ -8615,15 +9214,15 @@ Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
// not ideal. However, this should be exceedingly rare since it requires that
// we both exit early from buildTree_rec and that the bundle be out-of-order
// (causing us to iterate all the way to the end of the block).
- if (!LastInst)
- LastInst = FindLastInst();
- assert(LastInst && "Failed to find last instruction in bundle");
- return *LastInst;
+ if (!Res.second)
+ Res.second = FindLastInst();
+ assert(Res.second && "Failed to find last instruction in bundle");
+ return *Res.second;
}
void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
auto *Front = E->getMainOp();
- Instruction *LastInst = EntryToLastInstruction.lookup(E);
+ Instruction *LastInst = &getLastInstructionInBundle(E);
assert(LastInst && "Failed to find last instruction in bundle");
// If the instruction is PHI, set the insert point after all the PHIs.
bool IsPHI = isa<PHINode>(LastInst);
@@ -8641,7 +9240,7 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
Builder.SetCurrentDebugLocation(Front->getDebugLoc());
}
-Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
+Value *BoUpSLP::gather(ArrayRef<Value *> VL, Value *Root) {
// List of instructions/lanes from current block and/or the blocks which are
// part of the current loop. These instructions will be inserted at the end to
// make it possible to optimize loops and hoist invariant instructions out of
@@ -8658,7 +9257,8 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
for (int I = 0, E = VL.size(); I < E; ++I) {
if (auto *Inst = dyn_cast<Instruction>(VL[I]))
if ((CheckPredecessor(Inst->getParent(), Builder.GetInsertBlock()) ||
- getTreeEntry(Inst) || (L && (L->contains(Inst)))) &&
+ getTreeEntry(Inst) ||
+ (L && (!Root || L->isLoopInvariant(Root)) && L->contains(Inst))) &&
PostponedIndices.insert(I).second)
PostponedInsts.emplace_back(Inst, I);
}
@@ -8681,7 +9281,7 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
Value *Val0 =
isa<StoreInst>(VL[0]) ? cast<StoreInst>(VL[0])->getValueOperand() : VL[0];
FixedVectorType *VecTy = FixedVectorType::get(Val0->getType(), VL.size());
- Value *Vec = PoisonValue::get(VecTy);
+ Value *Vec = Root ? Root : PoisonValue::get(VecTy);
SmallVector<int> NonConsts;
// Insert constant values at first.
for (int I = 0, E = VL.size(); I < E; ++I) {
@@ -8691,6 +9291,18 @@ Value *BoUpSLP::gather(ArrayRef<Value *> VL) {
NonConsts.push_back(I);
continue;
}
+ if (Root) {
+ if (!isa<UndefValue>(VL[I])) {
+ NonConsts.push_back(I);
+ continue;
+ }
+ if (isa<PoisonValue>(VL[I]))
+ continue;
+ if (auto *SV = dyn_cast<ShuffleVectorInst>(Root)) {
+ if (SV->getMaskValue(I) == PoisonMaskElem)
+ continue;
+ }
+ }
Vec = CreateInsertElement(Vec, VL[I], I);
}
// Insert non-constant values.
@@ -8789,6 +9401,10 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
}
return Vec;
}
+ Value *createIdentity(Value *V) { return V; }
+ Value *createPoison(Type *Ty, unsigned VF) {
+ return PoisonValue::get(FixedVectorType::get(Ty, VF));
+ }
/// Resizes 2 input vector to match the sizes, if the they are not equal
/// yet. The smallest vector is resized to the size of the larger vector.
void resizeToMatch(Value *&V1, Value *&V2) {
@@ -8798,7 +9414,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
int V2VF = cast<FixedVectorType>(V2->getType())->getNumElements();
int VF = std::max(V1VF, V2VF);
int MinVF = std::min(V1VF, V2VF);
- SmallVector<int> IdentityMask(VF, UndefMaskElem);
+ SmallVector<int> IdentityMask(VF, PoisonMaskElem);
std::iota(IdentityMask.begin(), std::next(IdentityMask.begin(), MinVF),
0);
Value *&Op = MinVF == V1VF ? V1 : V2;
@@ -8821,7 +9437,8 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
assert(V1 && "Expected at least one vector value.");
ShuffleIRBuilder ShuffleBuilder(Builder, R.GatherShuffleExtractSeq,
R.CSEBlocks);
- return BaseShuffleAnalysis::createShuffle(V1, V2, Mask, ShuffleBuilder);
+ return BaseShuffleAnalysis::createShuffle<Value *>(V1, V2, Mask,
+ ShuffleBuilder);
}
/// Transforms mask \p CommonMask per given \p Mask to make proper set after
@@ -8829,7 +9446,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
static void transformMaskAfterShuffle(MutableArrayRef<int> CommonMask,
ArrayRef<int> Mask) {
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
- if (Mask[Idx] != UndefMaskElem)
+ if (Mask[Idx] != PoisonMaskElem)
CommonMask[Idx] = Idx;
}
@@ -8837,6 +9454,39 @@ public:
ShuffleInstructionBuilder(IRBuilderBase &Builder, BoUpSLP &R)
: Builder(Builder), R(R) {}
+ /// Adjusts extractelements after reusing them.
+ Value *adjustExtracts(const TreeEntry *E, ArrayRef<int> Mask) {
+ Value *VecBase = nullptr;
+ for (int I = 0, Sz = Mask.size(); I < Sz; ++I) {
+ int Idx = Mask[I];
+ if (Idx == PoisonMaskElem)
+ continue;
+ auto *EI = cast<ExtractElementInst>(E->Scalars[I]);
+ VecBase = EI->getVectorOperand();
+ // If the only one use is vectorized - can delete the extractelement
+ // itself.
+ if (!EI->hasOneUse() || any_of(EI->users(), [&](User *U) {
+ return !R.ScalarToTreeEntry.count(U);
+ }))
+ continue;
+ R.eraseInstruction(EI);
+ }
+ return VecBase;
+ }
+ /// Checks if the specified entry \p E needs to be delayed because of its
+ /// dependency nodes.
+ Value *needToDelay(const TreeEntry *E, ArrayRef<const TreeEntry *> Deps) {
+ // No need to delay emission if all deps are ready.
+ if (all_of(Deps, [](const TreeEntry *TE) { return TE->VectorizedValue; }))
+ return nullptr;
+ // Postpone gather emission, will be emitted after the end of the
+ // process to keep correct order.
+ auto *VecTy = FixedVectorType::get(E->Scalars.front()->getType(),
+ E->getVectorFactor());
+ return Builder.CreateAlignedLoad(
+ VecTy, PoisonValue::get(PointerType::getUnqual(VecTy->getContext())),
+ MaybeAlign());
+ }
/// Adds 2 input vectors and the mask for their shuffling.
void add(Value *V1, Value *V2, ArrayRef<int> Mask) {
assert(V1 && V2 && !Mask.empty() && "Expected non-empty input vectors.");
@@ -8849,15 +9499,15 @@ public:
Value *Vec = InVectors.front();
if (InVectors.size() == 2) {
Vec = createShuffle(Vec, InVectors.back(), CommonMask);
- transformMaskAfterShuffle(CommonMask, Mask);
+ transformMaskAfterShuffle(CommonMask, CommonMask);
} else if (cast<FixedVectorType>(Vec->getType())->getNumElements() !=
Mask.size()) {
Vec = createShuffle(Vec, nullptr, CommonMask);
- transformMaskAfterShuffle(CommonMask, Mask);
+ transformMaskAfterShuffle(CommonMask, CommonMask);
}
V1 = createShuffle(V1, V2, Mask);
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
- if (Mask[Idx] != UndefMaskElem)
+ if (Mask[Idx] != PoisonMaskElem)
CommonMask[Idx] = Idx + Sz;
InVectors.front() = Vec;
if (InVectors.size() == 2)
@@ -8870,7 +9520,7 @@ public:
if (InVectors.empty()) {
if (!isa<FixedVectorType>(V1->getType())) {
V1 = createShuffle(V1, nullptr, CommonMask);
- CommonMask.assign(Mask.size(), UndefMaskElem);
+ CommonMask.assign(Mask.size(), PoisonMaskElem);
transformMaskAfterShuffle(CommonMask, Mask);
}
InVectors.push_back(V1);
@@ -8892,7 +9542,7 @@ public:
transformMaskAfterShuffle(CommonMask, CommonMask);
}
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
- if (CommonMask[Idx] == UndefMaskElem && Mask[Idx] != UndefMaskElem)
+ if (CommonMask[Idx] == PoisonMaskElem && Mask[Idx] != PoisonMaskElem)
CommonMask[Idx] =
V->getType() != V1->getType()
? Idx + Sz
@@ -8910,7 +9560,7 @@ public:
// Check if second vector is required if the used elements are already
// used from the first one.
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
- if (Mask[Idx] != UndefMaskElem && CommonMask[Idx] == UndefMaskElem) {
+ if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem) {
InVectors.push_back(V1);
break;
}
@@ -8919,7 +9569,7 @@ public:
if (auto *FTy = dyn_cast<FixedVectorType>(V1->getType()))
VF = FTy->getNumElements();
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
- if (Mask[Idx] != UndefMaskElem && CommonMask[Idx] == UndefMaskElem)
+ if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
CommonMask[Idx] = Mask[Idx] + (It == InVectors.begin() ? 0 : VF);
}
/// Adds another one input vector and the mask for the shuffling.
@@ -8928,17 +9578,46 @@ public:
inversePermutation(Order, NewMask);
add(V1, NewMask);
}
+ Value *gather(ArrayRef<Value *> VL, Value *Root = nullptr) {
+ return R.gather(VL, Root);
+ }
+ Value *createFreeze(Value *V) { return Builder.CreateFreeze(V); }
/// Finalize emission of the shuffles.
+ /// \param Action the action (if any) to be performed before final applying of
+ /// the \p ExtMask mask.
Value *
- finalize(ArrayRef<int> ExtMask = std::nullopt) {
+ finalize(ArrayRef<int> ExtMask, unsigned VF = 0,
+ function_ref<void(Value *&, SmallVectorImpl<int> &)> Action = {}) {
IsFinalized = true;
+ if (Action) {
+ Value *Vec = InVectors.front();
+ if (InVectors.size() == 2) {
+ Vec = createShuffle(Vec, InVectors.back(), CommonMask);
+ InVectors.pop_back();
+ } else {
+ Vec = createShuffle(Vec, nullptr, CommonMask);
+ }
+ for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
+ if (CommonMask[Idx] != PoisonMaskElem)
+ CommonMask[Idx] = Idx;
+ assert(VF > 0 &&
+ "Expected vector length for the final value before action.");
+ unsigned VecVF = cast<FixedVectorType>(Vec->getType())->getNumElements();
+ if (VecVF < VF) {
+ SmallVector<int> ResizeMask(VF, PoisonMaskElem);
+ std::iota(ResizeMask.begin(), std::next(ResizeMask.begin(), VecVF), 0);
+ Vec = createShuffle(Vec, nullptr, ResizeMask);
+ }
+ Action(Vec, CommonMask);
+ InVectors.front() = Vec;
+ }
if (!ExtMask.empty()) {
if (CommonMask.empty()) {
CommonMask.assign(ExtMask.begin(), ExtMask.end());
} else {
- SmallVector<int> NewMask(ExtMask.size(), UndefMaskElem);
+ SmallVector<int> NewMask(ExtMask.size(), PoisonMaskElem);
for (int I = 0, Sz = ExtMask.size(); I < Sz; ++I) {
- if (ExtMask[I] == UndefMaskElem)
+ if (ExtMask[I] == PoisonMaskElem)
continue;
NewMask[I] = CommonMask[ExtMask[I]];
}
@@ -9009,18 +9688,18 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
// ... (use %2)
// %shuffle = shuffle <2 x> %2, poison, <2 x> {2, 0}
// br %block
- SmallVector<int> UniqueIdxs(VF, UndefMaskElem);
+ SmallVector<int> UniqueIdxs(VF, PoisonMaskElem);
SmallSet<int, 4> UsedIdxs;
int Pos = 0;
for (int Idx : VE->ReuseShuffleIndices) {
- if (Idx != static_cast<int>(VF) && Idx != UndefMaskElem &&
+ if (Idx != static_cast<int>(VF) && Idx != PoisonMaskElem &&
UsedIdxs.insert(Idx).second)
UniqueIdxs[Idx] = Pos;
++Pos;
}
assert(VF >= UsedIdxs.size() && "Expected vectorization factor "
"less than original vector size.");
- UniqueIdxs.append(VF - UsedIdxs.size(), UndefMaskElem);
+ UniqueIdxs.append(VF - UsedIdxs.size(), PoisonMaskElem);
V = FinalShuffle(V, UniqueIdxs);
} else {
assert(VF < cast<FixedVectorType>(V->getType())->getNumElements() &&
@@ -9031,6 +9710,21 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
V = FinalShuffle(V, UniformMask);
}
}
+ // Need to update the operand gather node, if actually the operand is not a
+ // vectorized node, but the buildvector/gather node, which matches one of
+ // the vectorized nodes.
+ if (find_if(VE->UserTreeIndices, [&](const EdgeInfo &EI) {
+ return EI.UserTE == E && EI.EdgeIdx == NodeIdx;
+ }) == VE->UserTreeIndices.end()) {
+ auto *It = find_if(
+ VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
+ return TE->State == TreeEntry::NeedToGather &&
+ TE->UserTreeIndices.front().UserTE == E &&
+ TE->UserTreeIndices.front().EdgeIdx == NodeIdx;
+ });
+ assert(It != VectorizableTree.end() && "Expected gather node operand.");
+ (*It)->VectorizedValue = V;
+ }
return V;
}
}
@@ -9049,108 +9743,370 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) {
IRBuilder<>::InsertPointGuard Guard(Builder);
if (E->getOpcode() != Instruction::InsertElement &&
E->getOpcode() != Instruction::PHI) {
- Instruction *LastInst = EntryToLastInstruction.lookup(E);
+ Instruction *LastInst = &getLastInstructionInBundle(E);
assert(LastInst && "Failed to find last instruction in bundle");
Builder.SetInsertPoint(LastInst);
}
return vectorizeTree(I->get());
}
-Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
+template <typename BVTy, typename ResTy, typename... Args>
+ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
assert(E->State == TreeEntry::NeedToGather && "Expected gather node.");
unsigned VF = E->getVectorFactor();
- ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
- SmallVector<Value *> Gathered(
- VF, PoisonValue::get(E->Scalars.front()->getType()));
bool NeedFreeze = false;
- SmallVector<Value *> VL(E->Scalars.begin(), E->Scalars.end());
- // Build a mask out of the redorder indices and reorder scalars per this mask.
+ SmallVector<int> ReuseShuffleIndicies(E->ReuseShuffleIndices.begin(),
+ E->ReuseShuffleIndices.end());
+ SmallVector<Value *> GatheredScalars(E->Scalars.begin(), E->Scalars.end());
+ // Build a mask out of the reorder indices and reorder scalars per this
+ // mask.
SmallVector<int> ReorderMask;
inversePermutation(E->ReorderIndices, ReorderMask);
if (!ReorderMask.empty())
- reorderScalars(VL, ReorderMask);
- SmallVector<int> ReuseMask(VF, UndefMaskElem);
- if (!allConstant(VL)) {
+ reorderScalars(GatheredScalars, ReorderMask);
+ auto FindReusedSplat = [&](SmallVectorImpl<int> &Mask) {
+ if (!isSplat(E->Scalars) || none_of(E->Scalars, [](Value *V) {
+ return isa<UndefValue>(V) && !isa<PoisonValue>(V);
+ }))
+ return false;
+ TreeEntry *UserTE = E->UserTreeIndices.back().UserTE;
+ unsigned EdgeIdx = E->UserTreeIndices.back().EdgeIdx;
+ if (UserTE->getNumOperands() != 2)
+ return false;
+ auto *It =
+ find_if(VectorizableTree, [=](const std::unique_ptr<TreeEntry> &TE) {
+ return find_if(TE->UserTreeIndices, [=](const EdgeInfo &EI) {
+ return EI.UserTE == UserTE && EI.EdgeIdx != EdgeIdx;
+ }) != TE->UserTreeIndices.end();
+ });
+ if (It == VectorizableTree.end())
+ return false;
+ unsigned I =
+ *find_if_not(Mask, [](int Idx) { return Idx == PoisonMaskElem; });
+ int Sz = Mask.size();
+ if (all_of(Mask, [Sz](int Idx) { return Idx < 2 * Sz; }) &&
+ ShuffleVectorInst::isIdentityMask(Mask))
+ std::iota(Mask.begin(), Mask.end(), 0);
+ else
+ std::fill(Mask.begin(), Mask.end(), I);
+ return true;
+ };
+ BVTy ShuffleBuilder(Params...);
+ ResTy Res = ResTy();
+ SmallVector<int> Mask;
+ SmallVector<int> ExtractMask;
+ std::optional<TargetTransformInfo::ShuffleKind> ExtractShuffle;
+ std::optional<TargetTransformInfo::ShuffleKind> GatherShuffle;
+ SmallVector<const TreeEntry *> Entries;
+ Type *ScalarTy = GatheredScalars.front()->getType();
+ if (!all_of(GatheredScalars, UndefValue::classof)) {
+ // Check for gathered extracts.
+ ExtractShuffle = tryToGatherExtractElements(GatheredScalars, ExtractMask);
+ SmallVector<Value *> IgnoredVals;
+ if (UserIgnoreList)
+ IgnoredVals.assign(UserIgnoreList->begin(), UserIgnoreList->end());
+ bool Resized = false;
+ if (Value *VecBase = ShuffleBuilder.adjustExtracts(E, ExtractMask))
+ if (auto *VecBaseTy = dyn_cast<FixedVectorType>(VecBase->getType()))
+ if (VF == VecBaseTy->getNumElements() && GatheredScalars.size() != VF) {
+ Resized = true;
+ GatheredScalars.append(VF - GatheredScalars.size(),
+ PoisonValue::get(ScalarTy));
+ }
+ // Gather extracts after we check for full matched gathers only.
+ if (ExtractShuffle || E->getOpcode() != Instruction::Load ||
+ E->isAltShuffle() ||
+ all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) ||
+ isSplat(E->Scalars) ||
+ (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) {
+ GatherShuffle = isGatherShuffledEntry(E, GatheredScalars, Mask, Entries);
+ }
+ if (GatherShuffle) {
+ if (Value *Delayed = ShuffleBuilder.needToDelay(E, Entries)) {
+ // Delay emission of gathers which are not ready yet.
+ PostponedGathers.insert(E);
+ // Postpone gather emission, will be emitted after the end of the
+ // process to keep correct order.
+ return Delayed;
+ }
+ assert((Entries.size() == 1 || Entries.size() == 2) &&
+ "Expected shuffle of 1 or 2 entries.");
+ if (*GatherShuffle == TTI::SK_PermuteSingleSrc &&
+ Entries.front()->isSame(E->Scalars)) {
+ // Perfect match in the graph, will reuse the previously vectorized
+ // node. Cost is 0.
+ LLVM_DEBUG(
+ dbgs()
+ << "SLP: perfect diamond match for gather bundle that starts with "
+ << *E->Scalars.front() << ".\n");
+ // Restore the mask for previous partially matched values.
+ if (Entries.front()->ReorderIndices.empty() &&
+ ((Entries.front()->ReuseShuffleIndices.empty() &&
+ E->Scalars.size() == Entries.front()->Scalars.size()) ||
+ (E->Scalars.size() ==
+ Entries.front()->ReuseShuffleIndices.size()))) {
+ std::iota(Mask.begin(), Mask.end(), 0);
+ } else {
+ for (auto [I, V] : enumerate(E->Scalars)) {
+ if (isa<PoisonValue>(V)) {
+ Mask[I] = PoisonMaskElem;
+ continue;
+ }
+ Mask[I] = Entries.front()->findLaneForValue(V);
+ }
+ }
+ ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask);
+ Res = ShuffleBuilder.finalize(E->getCommonMask());
+ return Res;
+ }
+ if (!Resized) {
+ unsigned VF1 = Entries.front()->getVectorFactor();
+ unsigned VF2 = Entries.back()->getVectorFactor();
+ if ((VF == VF1 || VF == VF2) && GatheredScalars.size() != VF)
+ GatheredScalars.append(VF - GatheredScalars.size(),
+ PoisonValue::get(ScalarTy));
+ }
+ // Remove shuffled elements from list of gathers.
+ for (int I = 0, Sz = Mask.size(); I < Sz; ++I) {
+ if (Mask[I] != PoisonMaskElem)
+ GatheredScalars[I] = PoisonValue::get(ScalarTy);
+ }
+ }
+ }
+ auto TryPackScalars = [&](SmallVectorImpl<Value *> &Scalars,
+ SmallVectorImpl<int> &ReuseMask,
+ bool IsRootPoison) {
// For splats with can emit broadcasts instead of gathers, so try to find
// such sequences.
- bool IsSplat = isSplat(VL) && (VL.size() > 2 || VL.front() == VL.back());
+ bool IsSplat = IsRootPoison && isSplat(Scalars) &&
+ (Scalars.size() > 2 || Scalars.front() == Scalars.back());
+ Scalars.append(VF - Scalars.size(), PoisonValue::get(ScalarTy));
SmallVector<int> UndefPos;
DenseMap<Value *, unsigned> UniquePositions;
// Gather unique non-const values and all constant values.
// For repeated values, just shuffle them.
- for (auto [I, V] : enumerate(VL)) {
+ int NumNonConsts = 0;
+ int SinglePos = 0;
+ for (auto [I, V] : enumerate(Scalars)) {
if (isa<UndefValue>(V)) {
if (!isa<PoisonValue>(V)) {
- Gathered[I] = V;
ReuseMask[I] = I;
UndefPos.push_back(I);
}
continue;
}
if (isConstant(V)) {
- Gathered[I] = V;
ReuseMask[I] = I;
continue;
}
+ ++NumNonConsts;
+ SinglePos = I;
+ Value *OrigV = V;
+ Scalars[I] = PoisonValue::get(ScalarTy);
if (IsSplat) {
- Gathered.front() = V;
+ Scalars.front() = OrigV;
ReuseMask[I] = 0;
} else {
- const auto Res = UniquePositions.try_emplace(V, I);
- Gathered[Res.first->second] = V;
+ const auto Res = UniquePositions.try_emplace(OrigV, I);
+ Scalars[Res.first->second] = OrigV;
ReuseMask[I] = Res.first->second;
}
}
- if (!UndefPos.empty() && IsSplat) {
+ if (NumNonConsts == 1) {
+ // Restore single insert element.
+ if (IsSplat) {
+ ReuseMask.assign(VF, PoisonMaskElem);
+ std::swap(Scalars.front(), Scalars[SinglePos]);
+ if (!UndefPos.empty() && UndefPos.front() == 0)
+ Scalars.front() = UndefValue::get(ScalarTy);
+ }
+ ReuseMask[SinglePos] = SinglePos;
+ } else if (!UndefPos.empty() && IsSplat) {
// For undef values, try to replace them with the simple broadcast.
// We can do it if the broadcasted value is guaranteed to be
// non-poisonous, or by freezing the incoming scalar value first.
- auto *It = find_if(Gathered, [this, E](Value *V) {
+ auto *It = find_if(Scalars, [this, E](Value *V) {
return !isa<UndefValue>(V) &&
(getTreeEntry(V) || isGuaranteedNotToBePoison(V) ||
- any_of(V->uses(), [E](const Use &U) {
- // Check if the value already used in the same operation in
- // one of the nodes already.
- return E->UserTreeIndices.size() == 1 &&
- is_contained(
- E->UserTreeIndices.front().UserTE->Scalars,
- U.getUser()) &&
- E->UserTreeIndices.front().EdgeIdx != U.getOperandNo();
- }));
+ (E->UserTreeIndices.size() == 1 &&
+ any_of(V->uses(), [E](const Use &U) {
+ // Check if the value already used in the same operation in
+ // one of the nodes already.
+ return E->UserTreeIndices.front().EdgeIdx !=
+ U.getOperandNo() &&
+ is_contained(
+ E->UserTreeIndices.front().UserTE->Scalars,
+ U.getUser());
+ })));
});
- if (It != Gathered.end()) {
+ if (It != Scalars.end()) {
// Replace undefs by the non-poisoned scalars and emit broadcast.
- int Pos = std::distance(Gathered.begin(), It);
+ int Pos = std::distance(Scalars.begin(), It);
for_each(UndefPos, [&](int I) {
// Set the undef position to the non-poisoned scalar.
ReuseMask[I] = Pos;
- // Replace the undef by the poison, in the mask it is replaced by non-poisoned scalar already.
+ // Replace the undef by the poison, in the mask it is replaced by
+ // non-poisoned scalar already.
if (I != Pos)
- Gathered[I] = PoisonValue::get(Gathered[I]->getType());
+ Scalars[I] = PoisonValue::get(ScalarTy);
});
} else {
// Replace undefs by the poisons, emit broadcast and then emit
// freeze.
for_each(UndefPos, [&](int I) {
- ReuseMask[I] = UndefMaskElem;
- if (isa<UndefValue>(Gathered[I]))
- Gathered[I] = PoisonValue::get(Gathered[I]->getType());
+ ReuseMask[I] = PoisonMaskElem;
+ if (isa<UndefValue>(Scalars[I]))
+ Scalars[I] = PoisonValue::get(ScalarTy);
});
NeedFreeze = true;
}
}
+ };
+ if (ExtractShuffle || GatherShuffle) {
+ bool IsNonPoisoned = true;
+ bool IsUsedInExpr = false;
+ Value *Vec1 = nullptr;
+ if (ExtractShuffle) {
+ // Gather of extractelements can be represented as just a shuffle of
+ // a single/two vectors the scalars are extracted from.
+ // Find input vectors.
+ Value *Vec2 = nullptr;
+ for (unsigned I = 0, Sz = ExtractMask.size(); I < Sz; ++I) {
+ if (ExtractMask[I] == PoisonMaskElem ||
+ (!Mask.empty() && Mask[I] != PoisonMaskElem)) {
+ ExtractMask[I] = PoisonMaskElem;
+ continue;
+ }
+ if (isa<UndefValue>(E->Scalars[I]))
+ continue;
+ auto *EI = cast<ExtractElementInst>(E->Scalars[I]);
+ if (!Vec1) {
+ Vec1 = EI->getVectorOperand();
+ } else if (Vec1 != EI->getVectorOperand()) {
+ assert((!Vec2 || Vec2 == EI->getVectorOperand()) &&
+ "Expected only 1 or 2 vectors shuffle.");
+ Vec2 = EI->getVectorOperand();
+ }
+ }
+ if (Vec2) {
+ IsNonPoisoned &=
+ isGuaranteedNotToBePoison(Vec1) && isGuaranteedNotToBePoison(Vec2);
+ ShuffleBuilder.add(Vec1, Vec2, ExtractMask);
+ } else if (Vec1) {
+ IsUsedInExpr = FindReusedSplat(ExtractMask);
+ ShuffleBuilder.add(Vec1, ExtractMask);
+ IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
+ } else {
+ ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get(
+ ScalarTy, GatheredScalars.size())),
+ ExtractMask);
+ }
+ }
+ if (GatherShuffle) {
+ if (Entries.size() == 1) {
+ IsUsedInExpr = FindReusedSplat(Mask);
+ ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask);
+ IsNonPoisoned &=
+ isGuaranteedNotToBePoison(Entries.front()->VectorizedValue);
+ } else {
+ ShuffleBuilder.add(Entries.front()->VectorizedValue,
+ Entries.back()->VectorizedValue, Mask);
+ IsNonPoisoned &=
+ isGuaranteedNotToBePoison(Entries.front()->VectorizedValue) &&
+ isGuaranteedNotToBePoison(Entries.back()->VectorizedValue);
+ }
+ }
+ // Try to figure out best way to combine values: build a shuffle and insert
+ // elements or just build several shuffles.
+ // Insert non-constant scalars.
+ SmallVector<Value *> NonConstants(GatheredScalars);
+ int EMSz = ExtractMask.size();
+ int MSz = Mask.size();
+ // Try to build constant vector and shuffle with it only if currently we
+ // have a single permutation and more than 1 scalar constants.
+ bool IsSingleShuffle = !ExtractShuffle || !GatherShuffle;
+ bool IsIdentityShuffle =
+ (ExtractShuffle.value_or(TTI::SK_PermuteTwoSrc) ==
+ TTI::SK_PermuteSingleSrc &&
+ none_of(ExtractMask, [&](int I) { return I >= EMSz; }) &&
+ ShuffleVectorInst::isIdentityMask(ExtractMask)) ||
+ (GatherShuffle.value_or(TTI::SK_PermuteTwoSrc) ==
+ TTI::SK_PermuteSingleSrc &&
+ none_of(Mask, [&](int I) { return I >= MSz; }) &&
+ ShuffleVectorInst::isIdentityMask(Mask));
+ bool EnoughConstsForShuffle =
+ IsSingleShuffle &&
+ (none_of(GatheredScalars,
+ [](Value *V) {
+ return isa<UndefValue>(V) && !isa<PoisonValue>(V);
+ }) ||
+ any_of(GatheredScalars,
+ [](Value *V) {
+ return isa<Constant>(V) && !isa<UndefValue>(V);
+ })) &&
+ (!IsIdentityShuffle ||
+ (GatheredScalars.size() == 2 &&
+ any_of(GatheredScalars,
+ [](Value *V) { return !isa<UndefValue>(V); })) ||
+ count_if(GatheredScalars, [](Value *V) {
+ return isa<Constant>(V) && !isa<PoisonValue>(V);
+ }) > 1);
+ // NonConstants array contains just non-constant values, GatheredScalars
+ // contains only constant to build final vector and then shuffle.
+ for (int I = 0, Sz = GatheredScalars.size(); I < Sz; ++I) {
+ if (EnoughConstsForShuffle && isa<Constant>(GatheredScalars[I]))
+ NonConstants[I] = PoisonValue::get(ScalarTy);
+ else
+ GatheredScalars[I] = PoisonValue::get(ScalarTy);
+ }
+ // Generate constants for final shuffle and build a mask for them.
+ if (!all_of(GatheredScalars, PoisonValue::classof)) {
+ SmallVector<int> BVMask(GatheredScalars.size(), PoisonMaskElem);
+ TryPackScalars(GatheredScalars, BVMask, /*IsRootPoison=*/true);
+ Value *BV = ShuffleBuilder.gather(GatheredScalars);
+ ShuffleBuilder.add(BV, BVMask);
+ }
+ if (all_of(NonConstants, [=](Value *V) {
+ return isa<PoisonValue>(V) ||
+ (IsSingleShuffle && ((IsIdentityShuffle &&
+ IsNonPoisoned) || IsUsedInExpr) && isa<UndefValue>(V));
+ }))
+ Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
+ else
+ Res = ShuffleBuilder.finalize(
+ E->ReuseShuffleIndices, E->Scalars.size(),
+ [&](Value *&Vec, SmallVectorImpl<int> &Mask) {
+ TryPackScalars(NonConstants, Mask, /*IsRootPoison=*/false);
+ Vec = ShuffleBuilder.gather(NonConstants, Vec);
+ });
+ } else if (!allConstant(GatheredScalars)) {
+ // Gather unique scalars and all constants.
+ SmallVector<int> ReuseMask(GatheredScalars.size(), PoisonMaskElem);
+ TryPackScalars(GatheredScalars, ReuseMask, /*IsRootPoison=*/true);
+ Value *BV = ShuffleBuilder.gather(GatheredScalars);
+ ShuffleBuilder.add(BV, ReuseMask);
+ Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
} else {
- ReuseMask.clear();
- copy(VL, Gathered.begin());
+ // Gather all constants.
+ SmallVector<int> Mask(E->Scalars.size(), PoisonMaskElem);
+ for (auto [I, V] : enumerate(E->Scalars)) {
+ if (!isa<PoisonValue>(V))
+ Mask[I] = I;
+ }
+ Value *BV = ShuffleBuilder.gather(E->Scalars);
+ ShuffleBuilder.add(BV, Mask);
+ Res = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
}
- // Gather unique scalars and all constants.
- Value *Vec = gather(Gathered);
- ShuffleBuilder.add(Vec, ReuseMask);
- Vec = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
+
if (NeedFreeze)
- Vec = Builder.CreateFreeze(Vec);
- return Vec;
+ Res = ShuffleBuilder.createFreeze(Res);
+ return Res;
+}
+
+Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
+ return processBuildVector<ShuffleInstructionBuilder, Value *>(E, Builder,
+ *this);
}
Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
@@ -9161,10 +10117,17 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return E->VectorizedValue;
}
+ if (E->State == TreeEntry::NeedToGather) {
+ if (E->getMainOp() && E->Idx == 0)
+ setInsertPointAfterBundle(E);
+ Value *Vec = createBuildVector(E);
+ E->VectorizedValue = Vec;
+ return Vec;
+ }
+
auto FinalShuffle = [&](Value *V, const TreeEntry *E) {
ShuffleInstructionBuilder ShuffleBuilder(Builder, *this);
- if (E->State != TreeEntry::NeedToGather &&
- E->getOpcode() == Instruction::Store) {
+ if (E->getOpcode() == Instruction::Store) {
ArrayRef<int> Mask =
ArrayRef(reinterpret_cast<const int *>(E->ReorderIndices.begin()),
E->ReorderIndices.size());
@@ -9175,45 +10138,6 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return ShuffleBuilder.finalize(E->ReuseShuffleIndices);
};
- if (E->State == TreeEntry::NeedToGather) {
- if (E->Idx > 0) {
- // We are in the middle of a vectorizable chain. We need to gather the
- // scalars from the users.
- Value *Vec = createBuildVector(E);
- E->VectorizedValue = Vec;
- return Vec;
- }
- if (E->getMainOp())
- setInsertPointAfterBundle(E);
- SmallVector<Value *> GatheredScalars(E->Scalars.begin(), E->Scalars.end());
- // Build a mask out of the reorder indices and reorder scalars per this
- // mask.
- SmallVector<int> ReorderMask;
- inversePermutation(E->ReorderIndices, ReorderMask);
- if (!ReorderMask.empty())
- reorderScalars(GatheredScalars, ReorderMask);
- Value *Vec;
- SmallVector<int> Mask;
- SmallVector<const TreeEntry *> Entries;
- std::optional<TargetTransformInfo::ShuffleKind> Shuffle =
- isGatherShuffledEntry(E, GatheredScalars, Mask, Entries);
- if (Shuffle) {
- assert((Entries.size() == 1 || Entries.size() == 2) &&
- "Expected shuffle of 1 or 2 entries.");
- Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue,
- Entries.back()->VectorizedValue, Mask);
- if (auto *I = dyn_cast<Instruction>(Vec)) {
- GatherShuffleExtractSeq.insert(I);
- CSEBlocks.insert(I->getParent());
- }
- } else {
- Vec = gather(E->Scalars);
- }
- Vec = FinalShuffle(Vec, E);
- E->VectorizedValue = Vec;
- return Vec;
- }
-
assert((E->State == TreeEntry::Vectorize ||
E->State == TreeEntry::ScatterVectorize) &&
"Unhandled state");
@@ -9248,7 +10172,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
// PHINodes may have multiple entries from the same block. We want to
// visit every block once.
- SmallPtrSet<BasicBlock*, 4> VisitedBBs;
+ SmallPtrSet<BasicBlock *, 4> VisitedBBs;
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
ValueList Operands;
@@ -9314,14 +10238,14 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
SmallVector<int> Mask;
if (!E->ReorderIndices.empty()) {
inversePermutation(E->ReorderIndices, Mask);
- Mask.append(NumElts - NumScalars, UndefMaskElem);
+ Mask.append(NumElts - NumScalars, PoisonMaskElem);
} else {
- Mask.assign(NumElts, UndefMaskElem);
+ Mask.assign(NumElts, PoisonMaskElem);
std::iota(Mask.begin(), std::next(Mask.begin(), NumScalars), 0);
}
// Create InsertVector shuffle if necessary
bool IsIdentity = true;
- SmallVector<int> PrevMask(NumElts, UndefMaskElem);
+ SmallVector<int> PrevMask(NumElts, PoisonMaskElem);
Mask.swap(PrevMask);
for (unsigned I = 0; I < NumScalars; ++I) {
Value *Scalar = E->Scalars[PrevMask[I]];
@@ -9337,9 +10261,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
}
- SmallVector<int> InsertMask(NumElts, UndefMaskElem);
+ SmallVector<int> InsertMask(NumElts, PoisonMaskElem);
for (unsigned I = 0; I < NumElts; I++) {
- if (Mask[I] != UndefMaskElem)
+ if (Mask[I] != PoisonMaskElem)
InsertMask[Offset + I] = I;
}
SmallBitVector UseMask =
@@ -9354,10 +10278,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
isUndefVector<true>(FirstInsert->getOperand(0), UseMask);
if (!IsFirstPoison.all()) {
for (unsigned I = 0; I < NumElts; I++) {
- if (InsertMask[I] == UndefMaskElem && !IsFirstPoison.test(I))
+ if (InsertMask[I] == PoisonMaskElem && !IsFirstPoison.test(I))
InsertMask[I] = I + NumElts;
}
- }
+ }
V = Builder.CreateShuffleVector(
V,
IsFirstPoison.all() ? PoisonValue::get(V->getType())
@@ -9372,8 +10296,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
SmallBitVector IsFirstPoison =
isUndefVector<true>(FirstInsert->getOperand(0), UseMask);
for (unsigned I = 0; I < NumElts; I++) {
- if (InsertMask[I] == UndefMaskElem)
- InsertMask[I] = IsFirstPoison.test(I) ? UndefMaskElem : I;
+ if (InsertMask[I] == PoisonMaskElem)
+ InsertMask[I] = IsFirstPoison.test(I) ? PoisonMaskElem : I;
else
InsertMask[I] += NumElts;
}
@@ -9544,20 +10468,17 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
LoadInst *LI = cast<LoadInst>(VL0);
Instruction *NewLI;
- unsigned AS = LI->getPointerAddressSpace();
Value *PO = LI->getPointerOperand();
if (E->State == TreeEntry::Vectorize) {
- Value *VecPtr = Builder.CreateBitCast(PO, VecTy->getPointerTo(AS));
- NewLI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign());
+ NewLI = Builder.CreateAlignedLoad(VecTy, PO, LI->getAlign());
- // The pointer operand uses an in-tree scalar so we add the new BitCast
- // or LoadInst to ExternalUses list to make sure that an extract will
+ // The pointer operand uses an in-tree scalar so we add the new
+ // LoadInst to ExternalUses list to make sure that an extract will
// be generated in the future.
if (TreeEntry *Entry = getTreeEntry(PO)) {
// Find which lane we need to extract.
unsigned FoundLane = Entry->findLaneForValue(PO);
- ExternalUses.emplace_back(
- PO, PO != VecPtr ? cast<User>(VecPtr) : NewLI, FoundLane);
+ ExternalUses.emplace_back(PO, NewLI, FoundLane);
}
} else {
assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state");
@@ -9653,7 +10574,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
CallInst *CI = cast<CallInst>(VL0);
setInsertPointAfterBundle(E);
- Intrinsic::ID IID = Intrinsic::not_intrinsic;
+ Intrinsic::ID IID = Intrinsic::not_intrinsic;
if (Function *FI = CI->getCalledFunction())
IID = FI->getIntrinsicID();
@@ -9665,8 +10586,11 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
Value *ScalarArg = nullptr;
std::vector<Value *> OpVecs;
- SmallVector<Type *, 2> TysForDecl =
- {FixedVectorType::get(CI->getType(), E->Scalars.size())};
+ SmallVector<Type *, 2> TysForDecl;
+ // Add return type if intrinsic is overloaded on it.
+ if (isVectorIntrinsicWithOverloadTypeAtArg(IID, -1))
+ TysForDecl.push_back(
+ FixedVectorType::get(CI->getType(), E->Scalars.size()));
for (int j = 0, e = CI->arg_size(); j < e; ++j) {
ValueList OpVL;
// Some intrinsics have scalar arguments. This argument should not be
@@ -9808,14 +10732,15 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
return V;
}
default:
- llvm_unreachable("unknown inst");
+ llvm_unreachable("unknown inst");
}
return nullptr;
}
Value *BoUpSLP::vectorizeTree() {
ExtraValueToDebugLocsMap ExternallyUsedValues;
- return vectorizeTree(ExternallyUsedValues);
+ SmallVector<std::pair<Value *, Value *>> ReplacedExternals;
+ return vectorizeTree(ExternallyUsedValues, ReplacedExternals);
}
namespace {
@@ -9829,28 +10754,51 @@ struct ShuffledInsertData {
};
} // namespace
-Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
- Instruction *ReductionRoot) {
+Value *BoUpSLP::vectorizeTree(
+ const ExtraValueToDebugLocsMap &ExternallyUsedValues,
+ SmallVectorImpl<std::pair<Value *, Value *>> &ReplacedExternals,
+ Instruction *ReductionRoot) {
// All blocks must be scheduled before any instructions are inserted.
for (auto &BSIter : BlocksSchedules) {
scheduleBlock(BSIter.second.get());
}
-
- // Pre-gather last instructions.
- for (const std::unique_ptr<TreeEntry> &E : VectorizableTree) {
- if ((E->State == TreeEntry::NeedToGather &&
- (!E->getMainOp() || E->Idx > 0)) ||
- (E->State != TreeEntry::NeedToGather &&
- E->getOpcode() == Instruction::ExtractValue) ||
- E->getOpcode() == Instruction::InsertElement)
- continue;
- Instruction *LastInst = &getLastInstructionInBundle(E.get());
- EntryToLastInstruction.try_emplace(E.get(), LastInst);
- }
+ // Clean Entry-to-LastInstruction table. It can be affected after scheduling,
+ // need to rebuild it.
+ EntryToLastInstruction.clear();
Builder.SetInsertPoint(ReductionRoot ? ReductionRoot
: &F->getEntryBlock().front());
auto *VectorRoot = vectorizeTree(VectorizableTree[0].get());
+ // Run through the list of postponed gathers and emit them, replacing the temp
+ // emitted allocas with actual vector instructions.
+ ArrayRef<const TreeEntry *> PostponedNodes = PostponedGathers.getArrayRef();
+ DenseMap<Value *, SmallVector<TreeEntry *>> PostponedValues;
+ for (const TreeEntry *E : PostponedNodes) {
+ auto *TE = const_cast<TreeEntry *>(E);
+ if (auto *VecTE = getTreeEntry(TE->Scalars.front()))
+ if (VecTE->isSame(TE->UserTreeIndices.front().UserTE->getOperand(
+ TE->UserTreeIndices.front().EdgeIdx)))
+ // Found gather node which is absolutely the same as one of the
+ // vectorized nodes. It may happen after reordering.
+ continue;
+ auto *PrevVec = cast<Instruction>(TE->VectorizedValue);
+ TE->VectorizedValue = nullptr;
+ auto *UserI =
+ cast<Instruction>(TE->UserTreeIndices.front().UserTE->VectorizedValue);
+ Builder.SetInsertPoint(PrevVec);
+ Builder.SetCurrentDebugLocation(UserI->getDebugLoc());
+ Value *Vec = vectorizeTree(TE);
+ PrevVec->replaceAllUsesWith(Vec);
+ PostponedValues.try_emplace(Vec).first->second.push_back(TE);
+ // Replace the stub vector node, if it was used before for one of the
+ // buildvector nodes already.
+ auto It = PostponedValues.find(PrevVec);
+ if (It != PostponedValues.end()) {
+ for (TreeEntry *VTE : It->getSecond())
+ VTE->VectorizedValue = Vec;
+ }
+ eraseInstruction(PrevVec);
+ }
// If the vectorized tree can be rewritten in a smaller type, we truncate the
// vectorized root. InstCombine will then rewrite the entire expression. We
@@ -9968,14 +10916,9 @@ Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
Builder.SetInsertPoint(&F->getEntryBlock().front());
}
Value *NewInst = ExtractAndExtendIfNeeded(Vec);
- auto &NewInstLocs = ExternallyUsedValues[NewInst];
- auto It = ExternallyUsedValues.find(Scalar);
- assert(It != ExternallyUsedValues.end() &&
- "Externally used scalar is not found in ExternallyUsedValues");
- NewInstLocs.append(It->second);
- ExternallyUsedValues.erase(Scalar);
// Required to update internally referenced instructions.
Scalar->replaceAllUsesWith(NewInst);
+ ReplacedExternals.emplace_back(Scalar, NewInst);
continue;
}
@@ -10004,7 +10947,7 @@ Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
ShuffledInserts.size() - 1);
SmallVectorImpl<int> &Mask = It->ValueMasks[Vec];
if (Mask.empty())
- Mask.assign(FTy->getNumElements(), UndefMaskElem);
+ Mask.assign(FTy->getNumElements(), PoisonMaskElem);
// Find the insertvector, vectorized in tree, if any.
Value *Base = VU;
while (auto *IEBase = dyn_cast<InsertElementInst>(Base)) {
@@ -10017,7 +10960,7 @@ Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
do {
IEBase = cast<InsertElementInst>(Base);
int IEIdx = *getInsertIndex(IEBase);
- assert(Mask[Idx] == UndefMaskElem &&
+ assert(Mask[Idx] == PoisonMaskElem &&
"InsertElementInstruction used already.");
Mask[IEIdx] = IEIdx;
Base = IEBase->getOperand(0);
@@ -10035,7 +10978,7 @@ Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
}
SmallVectorImpl<int> &Mask = It->ValueMasks[Vec];
if (Mask.empty())
- Mask.assign(FTy->getNumElements(), UndefMaskElem);
+ Mask.assign(FTy->getNumElements(), PoisonMaskElem);
Mask[Idx] = ExternalUse.Lane;
It->InsertElements.push_back(cast<InsertElementInst>(User));
continue;
@@ -10077,8 +11020,8 @@ Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
}
auto CreateShuffle = [&](Value *V1, Value *V2, ArrayRef<int> Mask) {
- SmallVector<int> CombinedMask1(Mask.size(), UndefMaskElem);
- SmallVector<int> CombinedMask2(Mask.size(), UndefMaskElem);
+ SmallVector<int> CombinedMask1(Mask.size(), PoisonMaskElem);
+ SmallVector<int> CombinedMask2(Mask.size(), PoisonMaskElem);
int VF = cast<FixedVectorType>(V1->getType())->getNumElements();
for (int I = 0, E = Mask.size(); I < E; ++I) {
if (Mask[I] < VF)
@@ -10103,9 +11046,9 @@ Value *BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues,
return std::make_pair(Vec, true);
}
if (!ForSingleMask) {
- SmallVector<int> ResizeMask(VF, UndefMaskElem);
+ SmallVector<int> ResizeMask(VF, PoisonMaskElem);
for (unsigned I = 0; I < VF; ++I) {
- if (Mask[I] != UndefMaskElem)
+ if (Mask[I] != PoisonMaskElem)
ResizeMask[Mask[I]] = Mask[I];
}
Vec = CreateShuffle(Vec, nullptr, ResizeMask);
@@ -10308,14 +11251,14 @@ void BoUpSLP::optimizeGatherSequence() {
// registers.
unsigned LastUndefsCnt = 0;
for (int I = 0, E = NewMask.size(); I < E; ++I) {
- if (SM1[I] == UndefMaskElem)
+ if (SM1[I] == PoisonMaskElem)
++LastUndefsCnt;
else
LastUndefsCnt = 0;
- if (NewMask[I] != UndefMaskElem && SM1[I] != UndefMaskElem &&
+ if (NewMask[I] != PoisonMaskElem && SM1[I] != PoisonMaskElem &&
NewMask[I] != SM1[I])
return false;
- if (NewMask[I] == UndefMaskElem)
+ if (NewMask[I] == PoisonMaskElem)
NewMask[I] = SM1[I];
}
// Check if the last undefs actually change the final number of used vector
@@ -10590,11 +11533,20 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
}
// Search up and down at the same time, because we don't know if the new
// instruction is above or below the existing scheduling region.
+ // Ignore debug info (and other "AssumeLike" intrinsics) so that's not counted
+ // against the budget. Otherwise debug info could affect codegen.
BasicBlock::reverse_iterator UpIter =
++ScheduleStart->getIterator().getReverse();
BasicBlock::reverse_iterator UpperEnd = BB->rend();
BasicBlock::iterator DownIter = ScheduleEnd->getIterator();
BasicBlock::iterator LowerEnd = BB->end();
+ auto IsAssumeLikeIntr = [](const Instruction &I) {
+ if (auto *II = dyn_cast<IntrinsicInst>(&I))
+ return II->isAssumeLikeIntrinsic();
+ return false;
+ };
+ UpIter = std::find_if_not(UpIter, UpperEnd, IsAssumeLikeIntr);
+ DownIter = std::find_if_not(DownIter, LowerEnd, IsAssumeLikeIntr);
while (UpIter != UpperEnd && DownIter != LowerEnd && &*UpIter != I &&
&*DownIter != I) {
if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
@@ -10604,6 +11556,9 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V,
++UpIter;
++DownIter;
+
+ UpIter = std::find_if_not(UpIter, UpperEnd, IsAssumeLikeIntr);
+ DownIter = std::find_if_not(DownIter, LowerEnd, IsAssumeLikeIntr);
}
if (DownIter == LowerEnd || (UpIter != UpperEnd && &*UpIter == I)) {
assert(I->getParent() == ScheduleStart->getParent() &&
@@ -10804,7 +11759,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
unsigned numAliased = 0;
unsigned DistToSrc = 1;
- for ( ; DepDest; DepDest = DepDest->NextLoadStore) {
+ for (; DepDest; DepDest = DepDest->NextLoadStore) {
assert(isInSchedulingRegion(DepDest));
// We have two limits to reduce the complexity:
@@ -11163,8 +12118,8 @@ void BoUpSLP::computeMinimumValueSizes() {
// we can truncate the roots to this narrower type.
for (auto *Root : TreeRoot) {
auto Mask = DB->getDemandedBits(cast<Instruction>(Root));
- MaxBitWidth = std::max<unsigned>(
- Mask.getBitWidth() - Mask.countLeadingZeros(), MaxBitWidth);
+ MaxBitWidth = std::max<unsigned>(Mask.getBitWidth() - Mask.countl_zero(),
+ MaxBitWidth);
}
// True if the roots can be zero-extended back to their original type, rather
@@ -11223,8 +12178,7 @@ void BoUpSLP::computeMinimumValueSizes() {
}
// Round MaxBitWidth up to the next power-of-two.
- if (!isPowerOf2_64(MaxBitWidth))
- MaxBitWidth = NextPowerOf2(MaxBitWidth);
+ MaxBitWidth = llvm::bit_ceil(MaxBitWidth);
// If the maximum bit width we compute is less than the with of the roots'
// type, we can proceed with the narrowing. Otherwise, do nothing.
@@ -11242,60 +12196,6 @@ void BoUpSLP::computeMinimumValueSizes() {
MinBWs[Scalar] = std::make_pair(MaxBitWidth, !IsKnownPositive);
}
-namespace {
-
-/// The SLPVectorizer Pass.
-struct SLPVectorizer : public FunctionPass {
- SLPVectorizerPass Impl;
-
- /// Pass identification, replacement for typeid
- static char ID;
-
- explicit SLPVectorizer() : FunctionPass(ID) {
- initializeSLPVectorizerPass(*PassRegistry::getPassRegistry());
- }
-
- bool doInitialization(Module &M) override { return false; }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
-
- auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
- auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
- auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
- auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-
- return Impl.runImpl(F, SE, TTI, TLI, AA, LI, DT, AC, DB, ORE);
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- FunctionPass::getAnalysisUsage(AU);
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<DemandedBitsWrapperPass>();
- AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
- AU.addRequired<InjectTLIMappingsLegacy>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.setPreservesCFG();
- }
-};
-
-} // end anonymous namespace
-
PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) {
auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
@@ -11536,7 +12436,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
unsigned MaxVecRegSize = R.getMaxVecRegSize();
unsigned EltSize = R.getVectorElementSize(Operands[0]);
- unsigned MaxElts = llvm::PowerOf2Floor(MaxVecRegSize / EltSize);
+ unsigned MaxElts = llvm::bit_floor(MaxVecRegSize / EltSize);
unsigned MaxVF = std::min(R.getMaximumVF(EltSize, Instruction::Store),
MaxElts);
@@ -11618,17 +12518,8 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) {
}
}
-bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
- if (!A || !B)
- return false;
- if (isa<InsertElementInst>(A) || isa<InsertElementInst>(B))
- return false;
- Value *VL[] = {A, B};
- return tryToVectorizeList(VL, R);
-}
-
bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
- bool LimitForRegisterSize) {
+ bool MaxVFOnly) {
if (VL.size() < 2)
return false;
@@ -11663,7 +12554,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
unsigned Sz = R.getVectorElementSize(I0);
unsigned MinVF = R.getMinVF(Sz);
- unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
+ unsigned MaxVF = std::max<unsigned>(llvm::bit_floor(VL.size()), MinVF);
MaxVF = std::min(R.getMaximumVF(Sz, S.getOpcode()), MaxVF);
if (MaxVF < 2) {
R.getORE()->emit([&]() {
@@ -11690,21 +12581,17 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
if (TTI->getNumberOfParts(VecTy) == VF)
continue;
for (unsigned I = NextInst; I < MaxInst; ++I) {
- unsigned OpsWidth = 0;
+ unsigned ActualVF = std::min(MaxInst - I, VF);
- if (I + VF > MaxInst)
- OpsWidth = MaxInst - I;
- else
- OpsWidth = VF;
-
- if (!isPowerOf2_32(OpsWidth))
+ if (!isPowerOf2_32(ActualVF))
continue;
- if ((LimitForRegisterSize && OpsWidth < MaxVF) ||
- (VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2))
+ if (MaxVFOnly && ActualVF < MaxVF)
+ break;
+ if ((VF > MinVF && ActualVF <= VF / 2) || (VF == MinVF && ActualVF < 2))
break;
- ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
+ ArrayRef<Value *> Ops = VL.slice(I, ActualVF);
// Check that a previous iteration of this loop did not delete the Value.
if (llvm::any_of(Ops, [&R](Value *V) {
auto *I = dyn_cast<Instruction>(V);
@@ -11712,7 +12599,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
}))
continue;
- LLVM_DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
+ LLVM_DEBUG(dbgs() << "SLP: Analyzing " << ActualVF << " operations "
<< "\n");
R.buildTree(Ops);
@@ -11730,7 +12617,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
MinCost = std::min(MinCost, Cost);
LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost
- << " for VF=" << OpsWidth << "\n");
+ << " for VF=" << ActualVF << "\n");
if (Cost < -SLPCostThreshold) {
LLVM_DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
R.getORE()->emit(OptimizationRemark(SV_NAME, "VectorizedList",
@@ -11806,14 +12693,14 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
}
if (Candidates.size() == 1)
- return tryToVectorizePair(Op0, Op1, R);
+ return tryToVectorizeList({Op0, Op1}, R);
// We have multiple options. Try to pick the single best.
std::optional<int> BestCandidate = R.findBestRootPair(Candidates);
if (!BestCandidate)
return false;
- return tryToVectorizePair(Candidates[*BestCandidate].first,
- Candidates[*BestCandidate].second, R);
+ return tryToVectorizeList(
+ {Candidates[*BestCandidate].first, Candidates[*BestCandidate].second}, R);
}
namespace {
@@ -11857,6 +12744,9 @@ class HorizontalReduction {
WeakTrackingVH ReductionRoot;
/// The type of reduction operation.
RecurKind RdxKind;
+ /// Checks if the optimization of original scalar identity operations on
+ /// matched horizontal reductions is enabled and allowed.
+ bool IsSupportedHorRdxIdentityOp = false;
static bool isCmpSelMinMax(Instruction *I) {
return match(I, m_Select(m_Cmp(), m_Value(), m_Value())) &&
@@ -11888,6 +12778,9 @@ class HorizontalReduction {
return I->getFastMathFlags().noNaNs();
}
+ if (Kind == RecurKind::FMaximum || Kind == RecurKind::FMinimum)
+ return true;
+
return I->isAssociative();
}
@@ -11905,6 +12798,7 @@ class HorizontalReduction {
static Value *createOp(IRBuilder<> &Builder, RecurKind Kind, Value *LHS,
Value *RHS, const Twine &Name, bool UseSelect) {
unsigned RdxOpcode = RecurrenceDescriptor::getOpcode(Kind);
+ bool IsConstant = isConstant(LHS) && isConstant(RHS);
switch (Kind) {
case RecurKind::Or:
if (UseSelect &&
@@ -11926,29 +12820,49 @@ class HorizontalReduction {
return Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, LHS, RHS,
Name);
case RecurKind::FMax:
+ if (IsConstant)
+ return ConstantFP::get(LHS->getType(),
+ maxnum(cast<ConstantFP>(LHS)->getValueAPF(),
+ cast<ConstantFP>(RHS)->getValueAPF()));
return Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, LHS, RHS);
case RecurKind::FMin:
+ if (IsConstant)
+ return ConstantFP::get(LHS->getType(),
+ minnum(cast<ConstantFP>(LHS)->getValueAPF(),
+ cast<ConstantFP>(RHS)->getValueAPF()));
return Builder.CreateBinaryIntrinsic(Intrinsic::minnum, LHS, RHS);
+ case RecurKind::FMaximum:
+ if (IsConstant)
+ return ConstantFP::get(LHS->getType(),
+ maximum(cast<ConstantFP>(LHS)->getValueAPF(),
+ cast<ConstantFP>(RHS)->getValueAPF()));
+ return Builder.CreateBinaryIntrinsic(Intrinsic::maximum, LHS, RHS);
+ case RecurKind::FMinimum:
+ if (IsConstant)
+ return ConstantFP::get(LHS->getType(),
+ minimum(cast<ConstantFP>(LHS)->getValueAPF(),
+ cast<ConstantFP>(RHS)->getValueAPF()));
+ return Builder.CreateBinaryIntrinsic(Intrinsic::minimum, LHS, RHS);
case RecurKind::SMax:
- if (UseSelect) {
+ if (IsConstant || UseSelect) {
Value *Cmp = Builder.CreateICmpSGT(LHS, RHS, Name);
return Builder.CreateSelect(Cmp, LHS, RHS, Name);
}
return Builder.CreateBinaryIntrinsic(Intrinsic::smax, LHS, RHS);
case RecurKind::SMin:
- if (UseSelect) {
+ if (IsConstant || UseSelect) {
Value *Cmp = Builder.CreateICmpSLT(LHS, RHS, Name);
return Builder.CreateSelect(Cmp, LHS, RHS, Name);
}
return Builder.CreateBinaryIntrinsic(Intrinsic::smin, LHS, RHS);
case RecurKind::UMax:
- if (UseSelect) {
+ if (IsConstant || UseSelect) {
Value *Cmp = Builder.CreateICmpUGT(LHS, RHS, Name);
return Builder.CreateSelect(Cmp, LHS, RHS, Name);
}
return Builder.CreateBinaryIntrinsic(Intrinsic::umax, LHS, RHS);
case RecurKind::UMin:
- if (UseSelect) {
+ if (IsConstant || UseSelect) {
Value *Cmp = Builder.CreateICmpULT(LHS, RHS, Name);
return Builder.CreateSelect(Cmp, LHS, RHS, Name);
}
@@ -11984,6 +12898,7 @@ class HorizontalReduction {
return Op;
}
+public:
static RecurKind getRdxKind(Value *V) {
auto *I = dyn_cast<Instruction>(V);
if (!I)
@@ -12010,6 +12925,10 @@ class HorizontalReduction {
if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
return RecurKind::FMin;
+ if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(), m_Value())))
+ return RecurKind::FMaximum;
+ if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(), m_Value())))
+ return RecurKind::FMinimum;
// This matches either cmp+select or intrinsics. SLP is expected to handle
// either form.
// TODO: If we are canonicalizing to intrinsics, we can remove several
@@ -12086,6 +13005,7 @@ class HorizontalReduction {
return isCmpSelMinMax(I) ? 1 : 0;
}
+private:
/// Total number of operands in the reduction operation.
static unsigned getNumberOfOperands(Instruction *I) {
return isCmpSelMinMax(I) ? 3 : 2;
@@ -12134,17 +13054,6 @@ class HorizontalReduction {
}
}
- static Value *getLHS(RecurKind Kind, Instruction *I) {
- if (Kind == RecurKind::None)
- return nullptr;
- return I->getOperand(getFirstOperandIndex(I));
- }
- static Value *getRHS(RecurKind Kind, Instruction *I) {
- if (Kind == RecurKind::None)
- return nullptr;
- return I->getOperand(getFirstOperandIndex(I) + 1);
- }
-
static bool isGoodForReduction(ArrayRef<Value *> Data) {
int Sz = Data.size();
auto *I = dyn_cast<Instruction>(Data.front());
@@ -12156,65 +13065,39 @@ public:
HorizontalReduction() = default;
/// Try to find a reduction tree.
- bool matchAssociativeReduction(PHINode *Phi, Instruction *Inst,
+ bool matchAssociativeReduction(BoUpSLP &R, Instruction *Root,
ScalarEvolution &SE, const DataLayout &DL,
const TargetLibraryInfo &TLI) {
- assert((!Phi || is_contained(Phi->operands(), Inst)) &&
- "Phi needs to use the binary operator");
- assert((isa<BinaryOperator>(Inst) || isa<SelectInst>(Inst) ||
- isa<IntrinsicInst>(Inst)) &&
- "Expected binop, select, or intrinsic for reduction matching");
- RdxKind = getRdxKind(Inst);
-
- // We could have a initial reductions that is not an add.
- // r *= v1 + v2 + v3 + v4
- // In such a case start looking for a tree rooted in the first '+'.
- if (Phi) {
- if (getLHS(RdxKind, Inst) == Phi) {
- Phi = nullptr;
- Inst = dyn_cast<Instruction>(getRHS(RdxKind, Inst));
- if (!Inst)
- return false;
- RdxKind = getRdxKind(Inst);
- } else if (getRHS(RdxKind, Inst) == Phi) {
- Phi = nullptr;
- Inst = dyn_cast<Instruction>(getLHS(RdxKind, Inst));
- if (!Inst)
- return false;
- RdxKind = getRdxKind(Inst);
- }
- }
-
- if (!isVectorizable(RdxKind, Inst))
+ RdxKind = HorizontalReduction::getRdxKind(Root);
+ if (!isVectorizable(RdxKind, Root))
return false;
// Analyze "regular" integer/FP types for reductions - no target-specific
// types or pointers.
- Type *Ty = Inst->getType();
+ Type *Ty = Root->getType();
if (!isValidElementType(Ty) || Ty->isPointerTy())
return false;
// Though the ultimate reduction may have multiple uses, its condition must
// have only single use.
- if (auto *Sel = dyn_cast<SelectInst>(Inst))
+ if (auto *Sel = dyn_cast<SelectInst>(Root))
if (!Sel->getCondition()->hasOneUse())
return false;
- ReductionRoot = Inst;
+ ReductionRoot = Root;
// Iterate through all the operands of the possible reduction tree and
// gather all the reduced values, sorting them by their value id.
- BasicBlock *BB = Inst->getParent();
- bool IsCmpSelMinMax = isCmpSelMinMax(Inst);
- SmallVector<Instruction *> Worklist(1, Inst);
+ BasicBlock *BB = Root->getParent();
+ bool IsCmpSelMinMax = isCmpSelMinMax(Root);
+ SmallVector<Instruction *> Worklist(1, Root);
// Checks if the operands of the \p TreeN instruction are also reduction
// operations or should be treated as reduced values or an extra argument,
// which is not part of the reduction.
- auto &&CheckOperands = [this, IsCmpSelMinMax,
- BB](Instruction *TreeN,
- SmallVectorImpl<Value *> &ExtraArgs,
- SmallVectorImpl<Value *> &PossibleReducedVals,
- SmallVectorImpl<Instruction *> &ReductionOps) {
+ auto CheckOperands = [&](Instruction *TreeN,
+ SmallVectorImpl<Value *> &ExtraArgs,
+ SmallVectorImpl<Value *> &PossibleReducedVals,
+ SmallVectorImpl<Instruction *> &ReductionOps) {
for (int I = getFirstOperandIndex(TreeN),
End = getNumberOfOperands(TreeN);
I < End; ++I) {
@@ -12229,10 +13112,14 @@ public:
}
// If the edge is not an instruction, or it is different from the main
// reduction opcode or has too many uses - possible reduced value.
+ // Also, do not try to reduce const values, if the operation is not
+ // foldable.
if (!EdgeInst || getRdxKind(EdgeInst) != RdxKind ||
IsCmpSelMinMax != isCmpSelMinMax(EdgeInst) ||
!hasRequiredNumberOfUses(IsCmpSelMinMax, EdgeInst) ||
- !isVectorizable(getRdxKind(EdgeInst), EdgeInst)) {
+ !isVectorizable(RdxKind, EdgeInst) ||
+ (R.isAnalyzedReductionRoot(EdgeInst) &&
+ all_of(EdgeInst->operands(), Constant::classof))) {
PossibleReducedVals.push_back(EdgeVal);
continue;
}
@@ -12246,10 +13133,43 @@ public:
// instructions (grouping them by the predicate).
MapVector<size_t, MapVector<size_t, MapVector<Value *, unsigned>>>
PossibleReducedVals;
- initReductionOps(Inst);
+ initReductionOps(Root);
DenseMap<Value *, SmallVector<LoadInst *>> LoadsMap;
SmallSet<size_t, 2> LoadKeyUsed;
SmallPtrSet<Value *, 4> DoNotReverseVals;
+
+ auto GenerateLoadsSubkey = [&](size_t Key, LoadInst *LI) {
+ Value *Ptr = getUnderlyingObject(LI->getPointerOperand());
+ if (LoadKeyUsed.contains(Key)) {
+ auto LIt = LoadsMap.find(Ptr);
+ if (LIt != LoadsMap.end()) {
+ for (LoadInst *RLI : LIt->second) {
+ if (getPointersDiff(RLI->getType(), RLI->getPointerOperand(),
+ LI->getType(), LI->getPointerOperand(), DL, SE,
+ /*StrictCheck=*/true))
+ return hash_value(RLI->getPointerOperand());
+ }
+ for (LoadInst *RLI : LIt->second) {
+ if (arePointersCompatible(RLI->getPointerOperand(),
+ LI->getPointerOperand(), TLI)) {
+ hash_code SubKey = hash_value(RLI->getPointerOperand());
+ DoNotReverseVals.insert(RLI);
+ return SubKey;
+ }
+ }
+ if (LIt->second.size() > 2) {
+ hash_code SubKey =
+ hash_value(LIt->second.back()->getPointerOperand());
+ DoNotReverseVals.insert(LIt->second.back());
+ return SubKey;
+ }
+ }
+ }
+ LoadKeyUsed.insert(Key);
+ LoadsMap.try_emplace(Ptr).first->second.push_back(LI);
+ return hash_value(LI->getPointerOperand());
+ };
+
while (!Worklist.empty()) {
Instruction *TreeN = Worklist.pop_back_val();
SmallVector<Value *> Args;
@@ -12269,41 +13189,8 @@ public:
// results.
for (Value *V : PossibleRedVals) {
size_t Key, Idx;
- std::tie(Key, Idx) = generateKeySubkey(
- V, &TLI,
- [&](size_t Key, LoadInst *LI) {
- Value *Ptr = getUnderlyingObject(LI->getPointerOperand());
- if (LoadKeyUsed.contains(Key)) {
- auto LIt = LoadsMap.find(Ptr);
- if (LIt != LoadsMap.end()) {
- for (LoadInst *RLI: LIt->second) {
- if (getPointersDiff(
- RLI->getType(), RLI->getPointerOperand(),
- LI->getType(), LI->getPointerOperand(), DL, SE,
- /*StrictCheck=*/true))
- return hash_value(RLI->getPointerOperand());
- }
- for (LoadInst *RLI : LIt->second) {
- if (arePointersCompatible(RLI->getPointerOperand(),
- LI->getPointerOperand(), TLI)) {
- hash_code SubKey = hash_value(RLI->getPointerOperand());
- DoNotReverseVals.insert(RLI);
- return SubKey;
- }
- }
- if (LIt->second.size() > 2) {
- hash_code SubKey =
- hash_value(LIt->second.back()->getPointerOperand());
- DoNotReverseVals.insert(LIt->second.back());
- return SubKey;
- }
- }
- }
- LoadKeyUsed.insert(Key);
- LoadsMap.try_emplace(Ptr).first->second.push_back(LI);
- return hash_value(LI->getPointerOperand());
- },
- /*AllowAlternate=*/false);
+ std::tie(Key, Idx) = generateKeySubkey(V, &TLI, GenerateLoadsSubkey,
+ /*AllowAlternate=*/false);
++PossibleReducedVals[Key][Idx]
.insert(std::make_pair(V, 0))
.first->second;
@@ -12312,40 +13199,8 @@ public:
PossibleReductionOps.rend());
} else {
size_t Key, Idx;
- std::tie(Key, Idx) = generateKeySubkey(
- TreeN, &TLI,
- [&](size_t Key, LoadInst *LI) {
- Value *Ptr = getUnderlyingObject(LI->getPointerOperand());
- if (LoadKeyUsed.contains(Key)) {
- auto LIt = LoadsMap.find(Ptr);
- if (LIt != LoadsMap.end()) {
- for (LoadInst *RLI: LIt->second) {
- if (getPointersDiff(RLI->getType(),
- RLI->getPointerOperand(), LI->getType(),
- LI->getPointerOperand(), DL, SE,
- /*StrictCheck=*/true))
- return hash_value(RLI->getPointerOperand());
- }
- for (LoadInst *RLI : LIt->second) {
- if (arePointersCompatible(RLI->getPointerOperand(),
- LI->getPointerOperand(), TLI)) {
- hash_code SubKey = hash_value(RLI->getPointerOperand());
- DoNotReverseVals.insert(RLI);
- return SubKey;
- }
- }
- if (LIt->second.size() > 2) {
- hash_code SubKey = hash_value(LIt->second.back()->getPointerOperand());
- DoNotReverseVals.insert(LIt->second.back());
- return SubKey;
- }
- }
- }
- LoadKeyUsed.insert(Key);
- LoadsMap.try_emplace(Ptr).first->second.push_back(LI);
- return hash_value(LI->getPointerOperand());
- },
- /*AllowAlternate=*/false);
+ std::tie(Key, Idx) = generateKeySubkey(TreeN, &TLI, GenerateLoadsSubkey,
+ /*AllowAlternate=*/false);
++PossibleReducedVals[Key][Idx]
.insert(std::make_pair(TreeN, 0))
.first->second;
@@ -12407,14 +13262,18 @@ public:
// If there are a sufficient number of reduction values, reduce
// to a nearby power-of-2. We can safely generate oversized
// vectors and rely on the backend to split them to legal sizes.
- size_t NumReducedVals =
+ unsigned NumReducedVals =
std::accumulate(ReducedVals.begin(), ReducedVals.end(), 0,
- [](size_t Num, ArrayRef<Value *> Vals) {
+ [](unsigned Num, ArrayRef<Value *> Vals) -> unsigned {
if (!isGoodForReduction(Vals))
return Num;
return Num + Vals.size();
});
- if (NumReducedVals < ReductionLimit) {
+ if (NumReducedVals < ReductionLimit &&
+ (!AllowHorRdxIdenityOptimization ||
+ all_of(ReducedVals, [](ArrayRef<Value *> RedV) {
+ return RedV.size() < 2 || !allConstant(RedV) || !isSplat(RedV);
+ }))) {
for (ReductionOpsType &RdxOps : ReductionOps)
for (Value *RdxOp : RdxOps)
V.analyzedReductionRoot(cast<Instruction>(RdxOp));
@@ -12428,6 +13287,7 @@ public:
DenseMap<Value *, WeakTrackingVH> TrackedVals(
ReducedVals.size() * ReducedVals.front().size() + ExtraArgs.size());
BoUpSLP::ExtraValueToDebugLocsMap ExternallyUsedValues;
+ SmallVector<std::pair<Value *, Value *>> ReplacedExternals;
ExternallyUsedValues.reserve(ExtraArgs.size() + 1);
// The same extra argument may be used several times, so log each attempt
// to use it.
@@ -12448,6 +13308,18 @@ public:
return cast<Instruction>(ScalarCond);
};
+ // Return new VectorizedTree, based on previous value.
+ auto GetNewVectorizedTree = [&](Value *VectorizedTree, Value *Res) {
+ if (VectorizedTree) {
+ // Update the final value in the reduction.
+ Builder.SetCurrentDebugLocation(
+ cast<Instruction>(ReductionOps.front().front())->getDebugLoc());
+ return createOp(Builder, RdxKind, VectorizedTree, Res, "op.rdx",
+ ReductionOps);
+ }
+ // Initialize the final value in the reduction.
+ return Res;
+ };
// The reduction root is used as the insertion point for new instructions,
// so set it as externally used to prevent it from being deleted.
ExternallyUsedValues[ReductionRoot];
@@ -12459,6 +13331,12 @@ public:
continue;
IgnoreList.insert(RdxOp);
}
+ // Intersect the fast-math-flags from all reduction operations.
+ FastMathFlags RdxFMF;
+ RdxFMF.set();
+ for (Value *U : IgnoreList)
+ if (auto *FPMO = dyn_cast<FPMathOperator>(U))
+ RdxFMF &= FPMO->getFastMathFlags();
bool IsCmpSelMinMax = isCmpSelMinMax(cast<Instruction>(ReductionRoot));
// Need to track reduced vals, they may be changed during vectorization of
@@ -12519,16 +13397,82 @@ public:
}
}
}
+
+ // Emit code for constant values.
+ if (AllowHorRdxIdenityOptimization && Candidates.size() > 1 &&
+ allConstant(Candidates)) {
+ Value *Res = Candidates.front();
+ ++VectorizedVals.try_emplace(Candidates.front(), 0).first->getSecond();
+ for (Value *VC : ArrayRef(Candidates).drop_front()) {
+ Res = createOp(Builder, RdxKind, Res, VC, "const.rdx", ReductionOps);
+ ++VectorizedVals.try_emplace(VC, 0).first->getSecond();
+ if (auto *ResI = dyn_cast<Instruction>(Res))
+ V.analyzedReductionRoot(ResI);
+ }
+ VectorizedTree = GetNewVectorizedTree(VectorizedTree, Res);
+ continue;
+ }
+
unsigned NumReducedVals = Candidates.size();
- if (NumReducedVals < ReductionLimit)
+ if (NumReducedVals < ReductionLimit &&
+ (NumReducedVals < 2 || !AllowHorRdxIdenityOptimization ||
+ !isSplat(Candidates)))
continue;
+ // Check if we support repeated scalar values processing (optimization of
+ // original scalar identity operations on matched horizontal reductions).
+ IsSupportedHorRdxIdentityOp =
+ AllowHorRdxIdenityOptimization && RdxKind != RecurKind::Mul &&
+ RdxKind != RecurKind::FMul && RdxKind != RecurKind::FMulAdd;
+ // Gather same values.
+ MapVector<Value *, unsigned> SameValuesCounter;
+ if (IsSupportedHorRdxIdentityOp)
+ for (Value *V : Candidates)
+ ++SameValuesCounter.insert(std::make_pair(V, 0)).first->second;
+ // Used to check if the reduced values used same number of times. In this
+ // case the compiler may produce better code. E.g. if reduced values are
+ // aabbccdd (8 x values), then the first node of the tree will have a node
+ // for 4 x abcd + shuffle <4 x abcd>, <0, 0, 1, 1, 2, 2, 3, 3>.
+ // Plus, the final reduction will be performed on <8 x aabbccdd>.
+ // Instead compiler may build <4 x abcd> tree immediately, + reduction (4
+ // x abcd) * 2.
+ // Currently it only handles add/fadd/xor. and/or/min/max do not require
+ // this analysis, other operations may require an extra estimation of
+ // the profitability.
+ bool SameScaleFactor = false;
+ bool OptReusedScalars = IsSupportedHorRdxIdentityOp &&
+ SameValuesCounter.size() != Candidates.size();
+ if (OptReusedScalars) {
+ SameScaleFactor =
+ (RdxKind == RecurKind::Add || RdxKind == RecurKind::FAdd ||
+ RdxKind == RecurKind::Xor) &&
+ all_of(drop_begin(SameValuesCounter),
+ [&SameValuesCounter](const std::pair<Value *, unsigned> &P) {
+ return P.second == SameValuesCounter.front().second;
+ });
+ Candidates.resize(SameValuesCounter.size());
+ transform(SameValuesCounter, Candidates.begin(),
+ [](const auto &P) { return P.first; });
+ NumReducedVals = Candidates.size();
+ // Have a reduction of the same element.
+ if (NumReducedVals == 1) {
+ Value *OrigV = TrackedToOrig.find(Candidates.front())->second;
+ unsigned Cnt = SameValuesCounter.lookup(OrigV);
+ Value *RedVal =
+ emitScaleForReusedOps(Candidates.front(), Builder, Cnt);
+ VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
+ VectorizedVals.try_emplace(OrigV, Cnt);
+ continue;
+ }
+ }
+
unsigned MaxVecRegSize = V.getMaxVecRegSize();
unsigned EltSize = V.getVectorElementSize(Candidates[0]);
- unsigned MaxElts = RegMaxNumber * PowerOf2Floor(MaxVecRegSize / EltSize);
+ unsigned MaxElts =
+ RegMaxNumber * llvm::bit_floor(MaxVecRegSize / EltSize);
unsigned ReduxWidth = std::min<unsigned>(
- PowerOf2Floor(NumReducedVals), std::max(RedValsMaxNumber, MaxElts));
+ llvm::bit_floor(NumReducedVals), std::max(RedValsMaxNumber, MaxElts));
unsigned Start = 0;
unsigned Pos = Start;
// Restarts vectorization attempt with lower vector factor.
@@ -12551,6 +13495,7 @@ public:
ReduxWidth /= 2;
return IsAnyRedOpGathered;
};
+ bool AnyVectorized = false;
while (Pos < NumReducedVals - ReduxWidth + 1 &&
ReduxWidth >= ReductionLimit) {
// Dependency in tree of the reduction ops - drop this attempt, try
@@ -12603,34 +13548,24 @@ public:
LocalExternallyUsedValues[TrackedVals[V]];
});
}
- // Number of uses of the candidates in the vector of values.
- SmallDenseMap<Value *, unsigned> NumUses(Candidates.size());
- for (unsigned Cnt = 0; Cnt < Pos; ++Cnt) {
- Value *V = Candidates[Cnt];
- ++NumUses.try_emplace(V, 0).first->getSecond();
- }
- for (unsigned Cnt = Pos + ReduxWidth; Cnt < NumReducedVals; ++Cnt) {
- Value *V = Candidates[Cnt];
- ++NumUses.try_emplace(V, 0).first->getSecond();
+ if (!IsSupportedHorRdxIdentityOp) {
+ // Number of uses of the candidates in the vector of values.
+ assert(SameValuesCounter.empty() &&
+ "Reused values counter map is not empty");
+ for (unsigned Cnt = 0; Cnt < NumReducedVals; ++Cnt) {
+ if (Cnt >= Pos && Cnt < Pos + ReduxWidth)
+ continue;
+ Value *V = Candidates[Cnt];
+ Value *OrigV = TrackedToOrig.find(V)->second;
+ ++SameValuesCounter[OrigV];
+ }
}
SmallPtrSet<Value *, 4> VLScalars(VL.begin(), VL.end());
// Gather externally used values.
SmallPtrSet<Value *, 4> Visited;
- for (unsigned Cnt = 0; Cnt < Pos; ++Cnt) {
- Value *RdxVal = Candidates[Cnt];
- if (!Visited.insert(RdxVal).second)
+ for (unsigned Cnt = 0; Cnt < NumReducedVals; ++Cnt) {
+ if (Cnt >= Pos && Cnt < Pos + ReduxWidth)
continue;
- // Check if the scalar was vectorized as part of the vectorization
- // tree but not the top node.
- if (!VLScalars.contains(RdxVal) && V.isVectorized(RdxVal)) {
- LocalExternallyUsedValues[RdxVal];
- continue;
- }
- unsigned NumOps = VectorizedVals.lookup(RdxVal) + NumUses[RdxVal];
- if (NumOps != ReducedValsToOps.find(RdxVal)->second.size())
- LocalExternallyUsedValues[RdxVal];
- }
- for (unsigned Cnt = Pos + ReduxWidth; Cnt < NumReducedVals; ++Cnt) {
Value *RdxVal = Candidates[Cnt];
if (!Visited.insert(RdxVal).second)
continue;
@@ -12640,42 +13575,34 @@ public:
LocalExternallyUsedValues[RdxVal];
continue;
}
- unsigned NumOps = VectorizedVals.lookup(RdxVal) + NumUses[RdxVal];
- if (NumOps != ReducedValsToOps.find(RdxVal)->second.size())
+ Value *OrigV = TrackedToOrig.find(RdxVal)->second;
+ unsigned NumOps =
+ VectorizedVals.lookup(RdxVal) + SameValuesCounter[OrigV];
+ if (NumOps != ReducedValsToOps.find(OrigV)->second.size())
LocalExternallyUsedValues[RdxVal];
}
+ // Do not need the list of reused scalars in regular mode anymore.
+ if (!IsSupportedHorRdxIdentityOp)
+ SameValuesCounter.clear();
for (Value *RdxVal : VL)
if (RequiredExtract.contains(RdxVal))
LocalExternallyUsedValues[RdxVal];
+ // Update LocalExternallyUsedValues for the scalar, replaced by
+ // extractelement instructions.
+ for (const std::pair<Value *, Value *> &Pair : ReplacedExternals) {
+ auto It = ExternallyUsedValues.find(Pair.first);
+ if (It == ExternallyUsedValues.end())
+ continue;
+ LocalExternallyUsedValues[Pair.second].append(It->second);
+ }
V.buildExternalUses(LocalExternallyUsedValues);
V.computeMinimumValueSizes();
- // Intersect the fast-math-flags from all reduction operations.
- FastMathFlags RdxFMF;
- RdxFMF.set();
- for (Value *U : IgnoreList)
- if (auto *FPMO = dyn_cast<FPMathOperator>(U))
- RdxFMF &= FPMO->getFastMathFlags();
// Estimate cost.
InstructionCost TreeCost = V.getTreeCost(VL);
InstructionCost ReductionCost =
- getReductionCost(TTI, VL, ReduxWidth, RdxFMF);
- if (V.isVectorizedFirstNode() && isa<LoadInst>(VL.front())) {
- Instruction *MainOp = V.getFirstNodeMainOp();
- for (Value *V : VL) {
- auto *VI = dyn_cast<LoadInst>(V);
- // Add the costs of scalar GEP pointers, to be removed from the
- // code.
- if (!VI || VI == MainOp)
- continue;
- auto *Ptr = dyn_cast<GetElementPtrInst>(VI->getPointerOperand());
- if (!Ptr || !Ptr->hasOneUse() || Ptr->hasAllConstantIndices())
- continue;
- TreeCost -= TTI->getArithmeticInstrCost(
- Instruction::Add, Ptr->getType(), TTI::TCK_RecipThroughput);
- }
- }
+ getReductionCost(TTI, VL, IsCmpSelMinMax, ReduxWidth, RdxFMF);
InstructionCost Cost = TreeCost + ReductionCost;
LLVM_DEBUG(dbgs() << "SLP: Found cost = " << Cost << " for reduction\n");
if (!Cost.isValid())
@@ -12716,8 +13643,8 @@ public:
InsertPt = GetCmpForMinMaxReduction(RdxRootInst);
// Vectorize a tree.
- Value *VectorizedRoot =
- V.vectorizeTree(LocalExternallyUsedValues, InsertPt);
+ Value *VectorizedRoot = V.vectorizeTree(LocalExternallyUsedValues,
+ ReplacedExternals, InsertPt);
Builder.SetInsertPoint(InsertPt);
@@ -12727,29 +13654,48 @@ public:
if (isBoolLogicOp(RdxRootInst))
VectorizedRoot = Builder.CreateFreeze(VectorizedRoot);
+ // Emit code to correctly handle reused reduced values, if required.
+ if (OptReusedScalars && !SameScaleFactor) {
+ VectorizedRoot =
+ emitReusedOps(VectorizedRoot, Builder, V.getRootNodeScalars(),
+ SameValuesCounter, TrackedToOrig);
+ }
+
Value *ReducedSubTree =
emitReduction(VectorizedRoot, Builder, ReduxWidth, TTI);
- if (!VectorizedTree) {
- // Initialize the final value in the reduction.
- VectorizedTree = ReducedSubTree;
- } else {
- // Update the final value in the reduction.
- Builder.SetCurrentDebugLocation(
- cast<Instruction>(ReductionOps.front().front())->getDebugLoc());
- VectorizedTree = createOp(Builder, RdxKind, VectorizedTree,
- ReducedSubTree, "op.rdx", ReductionOps);
- }
+ // Improved analysis for add/fadd/xor reductions with same scale factor
+ // for all operands of reductions. We can emit scalar ops for them
+ // instead.
+ if (OptReusedScalars && SameScaleFactor)
+ ReducedSubTree = emitScaleForReusedOps(
+ ReducedSubTree, Builder, SameValuesCounter.front().second);
+
+ VectorizedTree = GetNewVectorizedTree(VectorizedTree, ReducedSubTree);
// Count vectorized reduced values to exclude them from final reduction.
for (Value *RdxVal : VL) {
- ++VectorizedVals.try_emplace(TrackedToOrig.find(RdxVal)->second, 0)
- .first->getSecond();
+ Value *OrigV = TrackedToOrig.find(RdxVal)->second;
+ if (IsSupportedHorRdxIdentityOp) {
+ VectorizedVals.try_emplace(OrigV, SameValuesCounter[RdxVal]);
+ continue;
+ }
+ ++VectorizedVals.try_emplace(OrigV, 0).first->getSecond();
if (!V.isVectorized(RdxVal))
RequiredExtract.insert(RdxVal);
}
Pos += ReduxWidth;
Start = Pos;
- ReduxWidth = PowerOf2Floor(NumReducedVals - Pos);
+ ReduxWidth = llvm::bit_floor(NumReducedVals - Pos);
+ AnyVectorized = true;
+ }
+ if (OptReusedScalars && !AnyVectorized) {
+ for (const std::pair<Value *, unsigned> &P : SameValuesCounter) {
+ Value *RedVal = emitScaleForReusedOps(P.first, Builder, P.second);
+ VectorizedTree = GetNewVectorizedTree(VectorizedTree, RedVal);
+ Value *OrigV = TrackedToOrig.find(P.first)->second;
+ VectorizedVals.try_emplace(OrigV, P.second);
+ }
+ continue;
}
}
if (VectorizedTree) {
@@ -12757,7 +13703,7 @@ public:
// possible problem with poison propagation. If not possible to reorder
// (both operands are originally RHS), emit an extra freeze instruction
// for the LHS operand.
- //I.e., if we have original code like this:
+ // I.e., if we have original code like this:
// RedOp1 = select i1 ?, i1 LHS, i1 false
// RedOp2 = select i1 RHS, i1 ?, i1 false
@@ -12892,7 +13838,8 @@ private:
/// Calculate the cost of a reduction.
InstructionCost getReductionCost(TargetTransformInfo *TTI,
ArrayRef<Value *> ReducedVals,
- unsigned ReduxWidth, FastMathFlags FMF) {
+ bool IsCmpSelMinMax, unsigned ReduxWidth,
+ FastMathFlags FMF) {
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
Value *FirstReducedVal = ReducedVals.front();
Type *ScalarTy = FirstReducedVal->getType();
@@ -12900,7 +13847,36 @@ private:
InstructionCost VectorCost = 0, ScalarCost;
// If all of the reduced values are constant, the vector cost is 0, since
// the reduction value can be calculated at the compile time.
- bool AllConsts = all_of(ReducedVals, isConstant);
+ bool AllConsts = allConstant(ReducedVals);
+ auto EvaluateScalarCost = [&](function_ref<InstructionCost()> GenCostFn) {
+ InstructionCost Cost = 0;
+ // Scalar cost is repeated for N-1 elements.
+ int Cnt = ReducedVals.size();
+ for (Value *RdxVal : ReducedVals) {
+ if (Cnt == 1)
+ break;
+ --Cnt;
+ if (RdxVal->hasNUsesOrMore(IsCmpSelMinMax ? 3 : 2)) {
+ Cost += GenCostFn();
+ continue;
+ }
+ InstructionCost ScalarCost = 0;
+ for (User *U : RdxVal->users()) {
+ auto *RdxOp = cast<Instruction>(U);
+ if (hasRequiredNumberOfUses(IsCmpSelMinMax, RdxOp)) {
+ ScalarCost += TTI->getInstructionCost(RdxOp, CostKind);
+ continue;
+ }
+ ScalarCost = InstructionCost::getInvalid();
+ break;
+ }
+ if (ScalarCost.isValid())
+ Cost += ScalarCost;
+ else
+ Cost += GenCostFn();
+ }
+ return Cost;
+ };
switch (RdxKind) {
case RecurKind::Add:
case RecurKind::Mul:
@@ -12913,52 +13889,32 @@ private:
if (!AllConsts)
VectorCost =
TTI->getArithmeticReductionCost(RdxOpcode, VectorTy, FMF, CostKind);
- ScalarCost = TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy, CostKind);
+ ScalarCost = EvaluateScalarCost([&]() {
+ return TTI->getArithmeticInstrCost(RdxOpcode, ScalarTy, CostKind);
+ });
break;
}
case RecurKind::FMax:
- case RecurKind::FMin: {
- auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy);
- if (!AllConsts) {
- auto *VecCondTy =
- cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
- VectorCost =
- TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
- /*IsUnsigned=*/false, CostKind);
- }
- CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind);
- ScalarCost = TTI->getCmpSelInstrCost(Instruction::FCmp, ScalarTy,
- SclCondTy, RdxPred, CostKind) +
- TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
- SclCondTy, RdxPred, CostKind);
- break;
- }
+ case RecurKind::FMin:
+ case RecurKind::FMaximum:
+ case RecurKind::FMinimum:
case RecurKind::SMax:
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin: {
- auto *SclCondTy = CmpInst::makeCmpResultType(ScalarTy);
- if (!AllConsts) {
- auto *VecCondTy =
- cast<VectorType>(CmpInst::makeCmpResultType(VectorTy));
- bool IsUnsigned =
- RdxKind == RecurKind::UMax || RdxKind == RecurKind::UMin;
- VectorCost = TTI->getMinMaxReductionCost(VectorTy, VecCondTy,
- IsUnsigned, CostKind);
- }
- CmpInst::Predicate RdxPred = getMinMaxReductionPredicate(RdxKind);
- ScalarCost = TTI->getCmpSelInstrCost(Instruction::ICmp, ScalarTy,
- SclCondTy, RdxPred, CostKind) +
- TTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
- SclCondTy, RdxPred, CostKind);
+ Intrinsic::ID Id = getMinMaxReductionIntrinsicOp(RdxKind);
+ if (!AllConsts)
+ VectorCost = TTI->getMinMaxReductionCost(Id, VectorTy, FMF, CostKind);
+ ScalarCost = EvaluateScalarCost([&]() {
+ IntrinsicCostAttributes ICA(Id, ScalarTy, {ScalarTy, ScalarTy}, FMF);
+ return TTI->getIntrinsicInstrCost(ICA, CostKind);
+ });
break;
}
default:
llvm_unreachable("Expected arithmetic or min/max reduction operation");
}
- // Scalar cost is repeated for N-1 elements.
- ScalarCost *= (ReduxWidth - 1);
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << VectorCost - ScalarCost
<< " for reduction that starts with " << *FirstReducedVal
<< " (It is a splitting reduction)\n");
@@ -12977,8 +13933,148 @@ private:
++NumVectorInstructions;
return createSimpleTargetReduction(Builder, TTI, VectorizedValue, RdxKind);
}
-};
+ /// Emits optimized code for unique scalar value reused \p Cnt times.
+ Value *emitScaleForReusedOps(Value *VectorizedValue, IRBuilderBase &Builder,
+ unsigned Cnt) {
+ assert(IsSupportedHorRdxIdentityOp &&
+ "The optimization of matched scalar identity horizontal reductions "
+ "must be supported.");
+ switch (RdxKind) {
+ case RecurKind::Add: {
+ // res = mul vv, n
+ Value *Scale = ConstantInt::get(VectorizedValue->getType(), Cnt);
+ LLVM_DEBUG(dbgs() << "SLP: Add (to-mul) " << Cnt << "of "
+ << VectorizedValue << ". (HorRdx)\n");
+ return Builder.CreateMul(VectorizedValue, Scale);
+ }
+ case RecurKind::Xor: {
+ // res = n % 2 ? 0 : vv
+ LLVM_DEBUG(dbgs() << "SLP: Xor " << Cnt << "of " << VectorizedValue
+ << ". (HorRdx)\n");
+ if (Cnt % 2 == 0)
+ return Constant::getNullValue(VectorizedValue->getType());
+ return VectorizedValue;
+ }
+ case RecurKind::FAdd: {
+ // res = fmul v, n
+ Value *Scale = ConstantFP::get(VectorizedValue->getType(), Cnt);
+ LLVM_DEBUG(dbgs() << "SLP: FAdd (to-fmul) " << Cnt << "of "
+ << VectorizedValue << ". (HorRdx)\n");
+ return Builder.CreateFMul(VectorizedValue, Scale);
+ }
+ case RecurKind::And:
+ case RecurKind::Or:
+ case RecurKind::SMax:
+ case RecurKind::SMin:
+ case RecurKind::UMax:
+ case RecurKind::UMin:
+ case RecurKind::FMax:
+ case RecurKind::FMin:
+ case RecurKind::FMaximum:
+ case RecurKind::FMinimum:
+ // res = vv
+ return VectorizedValue;
+ case RecurKind::Mul:
+ case RecurKind::FMul:
+ case RecurKind::FMulAdd:
+ case RecurKind::SelectICmp:
+ case RecurKind::SelectFCmp:
+ case RecurKind::None:
+ llvm_unreachable("Unexpected reduction kind for repeated scalar.");
+ }
+ return nullptr;
+ }
+
+ /// Emits actual operation for the scalar identity values, found during
+ /// horizontal reduction analysis.
+ Value *emitReusedOps(Value *VectorizedValue, IRBuilderBase &Builder,
+ ArrayRef<Value *> VL,
+ const MapVector<Value *, unsigned> &SameValuesCounter,
+ const DenseMap<Value *, Value *> &TrackedToOrig) {
+ assert(IsSupportedHorRdxIdentityOp &&
+ "The optimization of matched scalar identity horizontal reductions "
+ "must be supported.");
+ switch (RdxKind) {
+ case RecurKind::Add: {
+ // root = mul prev_root, <1, 1, n, 1>
+ SmallVector<Constant *> Vals;
+ for (Value *V : VL) {
+ unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.find(V)->second);
+ Vals.push_back(ConstantInt::get(V->getType(), Cnt, /*IsSigned=*/false));
+ }
+ auto *Scale = ConstantVector::get(Vals);
+ LLVM_DEBUG(dbgs() << "SLP: Add (to-mul) " << Scale << "of "
+ << VectorizedValue << ". (HorRdx)\n");
+ return Builder.CreateMul(VectorizedValue, Scale);
+ }
+ case RecurKind::And:
+ case RecurKind::Or:
+ // No need for multiple or/and(s).
+ LLVM_DEBUG(dbgs() << "SLP: And/or of same " << VectorizedValue
+ << ". (HorRdx)\n");
+ return VectorizedValue;
+ case RecurKind::SMax:
+ case RecurKind::SMin:
+ case RecurKind::UMax:
+ case RecurKind::UMin:
+ case RecurKind::FMax:
+ case RecurKind::FMin:
+ case RecurKind::FMaximum:
+ case RecurKind::FMinimum:
+ // No need for multiple min/max(s) of the same value.
+ LLVM_DEBUG(dbgs() << "SLP: Max/min of same " << VectorizedValue
+ << ". (HorRdx)\n");
+ return VectorizedValue;
+ case RecurKind::Xor: {
+ // Replace values with even number of repeats with 0, since
+ // x xor x = 0.
+ // root = shuffle prev_root, zeroinitalizer, <0, 1, 2, vf, 4, vf, 5, 6,
+ // 7>, if elements 4th and 6th elements have even number of repeats.
+ SmallVector<int> Mask(
+ cast<FixedVectorType>(VectorizedValue->getType())->getNumElements(),
+ PoisonMaskElem);
+ std::iota(Mask.begin(), Mask.end(), 0);
+ bool NeedShuffle = false;
+ for (unsigned I = 0, VF = VL.size(); I < VF; ++I) {
+ Value *V = VL[I];
+ unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.find(V)->second);
+ if (Cnt % 2 == 0) {
+ Mask[I] = VF;
+ NeedShuffle = true;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "SLP: Xor <"; for (int I
+ : Mask) dbgs()
+ << I << " ";
+ dbgs() << "> of " << VectorizedValue << ". (HorRdx)\n");
+ if (NeedShuffle)
+ VectorizedValue = Builder.CreateShuffleVector(
+ VectorizedValue,
+ ConstantVector::getNullValue(VectorizedValue->getType()), Mask);
+ return VectorizedValue;
+ }
+ case RecurKind::FAdd: {
+ // root = fmul prev_root, <1.0, 1.0, n.0, 1.0>
+ SmallVector<Constant *> Vals;
+ for (Value *V : VL) {
+ unsigned Cnt = SameValuesCounter.lookup(TrackedToOrig.find(V)->second);
+ Vals.push_back(ConstantFP::get(V->getType(), Cnt));
+ }
+ auto *Scale = ConstantVector::get(Vals);
+ return Builder.CreateFMul(VectorizedValue, Scale);
+ }
+ case RecurKind::Mul:
+ case RecurKind::FMul:
+ case RecurKind::FMulAdd:
+ case RecurKind::SelectICmp:
+ case RecurKind::SelectFCmp:
+ case RecurKind::None:
+ llvm_unreachable("Unexpected reduction kind for reused scalars.");
+ }
+ return nullptr;
+ }
+};
} // end anonymous namespace
static std::optional<unsigned> getAggregateSize(Instruction *InsertInst) {
@@ -13075,15 +14171,15 @@ static bool findBuildAggregate(Instruction *LastInsertInst,
return false;
}
-/// Try and get a reduction value from a phi node.
+/// Try and get a reduction instruction from a phi node.
///
/// Given a phi node \p P in a block \p ParentBB, consider possible reductions
/// if they come from either \p ParentBB or a containing loop latch.
///
/// \returns A candidate reduction value if possible, or \code nullptr \endcode
/// if not possible.
-static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
- BasicBlock *ParentBB, LoopInfo *LI) {
+static Instruction *getReductionInstr(const DominatorTree *DT, PHINode *P,
+ BasicBlock *ParentBB, LoopInfo *LI) {
// There are situations where the reduction value is not dominated by the
// reduction phi. Vectorizing such cases has been reported to cause
// miscompiles. See PR25787.
@@ -13092,13 +14188,13 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
DT->dominates(P->getParent(), cast<Instruction>(R)->getParent());
};
- Value *Rdx = nullptr;
+ Instruction *Rdx = nullptr;
// Return the incoming value if it comes from the same BB as the phi node.
if (P->getIncomingBlock(0) == ParentBB) {
- Rdx = P->getIncomingValue(0);
+ Rdx = dyn_cast<Instruction>(P->getIncomingValue(0));
} else if (P->getIncomingBlock(1) == ParentBB) {
- Rdx = P->getIncomingValue(1);
+ Rdx = dyn_cast<Instruction>(P->getIncomingValue(1));
}
if (Rdx && DominatedReduxValue(Rdx))
@@ -13115,9 +14211,9 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
// There is a loop latch, return the incoming value if it comes from
// that. This reduction pattern occasionally turns up.
if (P->getIncomingBlock(0) == BBLatch) {
- Rdx = P->getIncomingValue(0);
+ Rdx = dyn_cast<Instruction>(P->getIncomingValue(0));
} else if (P->getIncomingBlock(1) == BBLatch) {
- Rdx = P->getIncomingValue(1);
+ Rdx = dyn_cast<Instruction>(P->getIncomingValue(1));
}
if (Rdx && DominatedReduxValue(Rdx))
@@ -13133,6 +14229,10 @@ static bool matchRdxBop(Instruction *I, Value *&V0, Value *&V1) {
return true;
if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(V0), m_Value(V1))))
return true;
+ if (match(I, m_Intrinsic<Intrinsic::maximum>(m_Value(V0), m_Value(V1))))
+ return true;
+ if (match(I, m_Intrinsic<Intrinsic::minimum>(m_Value(V0), m_Value(V1))))
+ return true;
if (match(I, m_Intrinsic<Intrinsic::smax>(m_Value(V0), m_Value(V1))))
return true;
if (match(I, m_Intrinsic<Intrinsic::smin>(m_Value(V0), m_Value(V1))))
@@ -13144,21 +14244,63 @@ static bool matchRdxBop(Instruction *I, Value *&V0, Value *&V1) {
return false;
}
+/// We could have an initial reduction that is not an add.
+/// r *= v1 + v2 + v3 + v4
+/// In such a case start looking for a tree rooted in the first '+'.
+/// \Returns the new root if found, which may be nullptr if not an instruction.
+static Instruction *tryGetSecondaryReductionRoot(PHINode *Phi,
+ Instruction *Root) {
+ assert((isa<BinaryOperator>(Root) || isa<SelectInst>(Root) ||
+ isa<IntrinsicInst>(Root)) &&
+ "Expected binop, select, or intrinsic for reduction matching");
+ Value *LHS =
+ Root->getOperand(HorizontalReduction::getFirstOperandIndex(Root));
+ Value *RHS =
+ Root->getOperand(HorizontalReduction::getFirstOperandIndex(Root) + 1);
+ if (LHS == Phi)
+ return dyn_cast<Instruction>(RHS);
+ if (RHS == Phi)
+ return dyn_cast<Instruction>(LHS);
+ return nullptr;
+}
+
+/// \p Returns the first operand of \p I that does not match \p Phi. If
+/// operand is not an instruction it returns nullptr.
+static Instruction *getNonPhiOperand(Instruction *I, PHINode *Phi) {
+ Value *Op0 = nullptr;
+ Value *Op1 = nullptr;
+ if (!matchRdxBop(I, Op0, Op1))
+ return nullptr;
+ return dyn_cast<Instruction>(Op0 == Phi ? Op1 : Op0);
+}
+
+/// \Returns true if \p I is a candidate instruction for reduction vectorization.
+static bool isReductionCandidate(Instruction *I) {
+ bool IsSelect = match(I, m_Select(m_Value(), m_Value(), m_Value()));
+ Value *B0 = nullptr, *B1 = nullptr;
+ bool IsBinop = matchRdxBop(I, B0, B1);
+ return IsBinop || IsSelect;
+}
+
bool SLPVectorizerPass::vectorizeHorReduction(
- PHINode *P, Value *V, BasicBlock *BB, BoUpSLP &R, TargetTransformInfo *TTI,
+ PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R, TargetTransformInfo *TTI,
SmallVectorImpl<WeakTrackingVH> &PostponedInsts) {
if (!ShouldVectorizeHor)
return false;
+ bool TryOperandsAsNewSeeds = P && isa<BinaryOperator>(Root);
- auto *Root = dyn_cast_or_null<Instruction>(V);
- if (!Root)
+ if (Root->getParent() != BB || isa<PHINode>(Root))
return false;
- if (!isa<BinaryOperator>(Root))
- P = nullptr;
+ // If we can find a secondary reduction root, use that instead.
+ auto SelectRoot = [&]() {
+ if (TryOperandsAsNewSeeds && isReductionCandidate(Root) &&
+ HorizontalReduction::getRdxKind(Root) != RecurKind::None)
+ if (Instruction *NewRoot = tryGetSecondaryReductionRoot(P, Root))
+ return NewRoot;
+ return Root;
+ };
- if (Root->getParent() != BB || isa<PHINode>(Root))
- return false;
// Start analysis starting from Root instruction. If horizontal reduction is
// found, try to vectorize it. If it is not a horizontal reduction or
// vectorization is not possible or not effective, and currently analyzed
@@ -13171,22 +14313,32 @@ bool SLPVectorizerPass::vectorizeHorReduction(
// If a horizintal reduction was not matched or vectorized we collect
// instructions for possible later attempts for vectorization.
std::queue<std::pair<Instruction *, unsigned>> Stack;
- Stack.emplace(Root, 0);
+ Stack.emplace(SelectRoot(), 0);
SmallPtrSet<Value *, 8> VisitedInstrs;
bool Res = false;
- auto &&TryToReduce = [this, TTI, &P, &R](Instruction *Inst, Value *&B0,
- Value *&B1) -> Value * {
+ auto &&TryToReduce = [this, TTI, &R](Instruction *Inst) -> Value * {
if (R.isAnalyzedReductionRoot(Inst))
return nullptr;
- bool IsBinop = matchRdxBop(Inst, B0, B1);
- bool IsSelect = match(Inst, m_Select(m_Value(), m_Value(), m_Value()));
- if (IsBinop || IsSelect) {
- HorizontalReduction HorRdx;
- if (HorRdx.matchAssociativeReduction(P, Inst, *SE, *DL, *TLI))
- return HorRdx.tryToReduce(R, TTI, *TLI);
+ if (!isReductionCandidate(Inst))
+ return nullptr;
+ HorizontalReduction HorRdx;
+ if (!HorRdx.matchAssociativeReduction(R, Inst, *SE, *DL, *TLI))
+ return nullptr;
+ return HorRdx.tryToReduce(R, TTI, *TLI);
+ };
+ auto TryAppendToPostponedInsts = [&](Instruction *FutureSeed) {
+ if (TryOperandsAsNewSeeds && FutureSeed == Root) {
+ FutureSeed = getNonPhiOperand(Root, P);
+ if (!FutureSeed)
+ return false;
}
- return nullptr;
+ // Do not collect CmpInst or InsertElementInst/InsertValueInst as their
+ // analysis is done separately.
+ if (!isa<CmpInst, InsertElementInst, InsertValueInst>(FutureSeed))
+ PostponedInsts.push_back(FutureSeed);
+ return true;
};
+
while (!Stack.empty()) {
Instruction *Inst;
unsigned Level;
@@ -13197,37 +14349,19 @@ bool SLPVectorizerPass::vectorizeHorReduction(
// iteration while stack was populated before that happened.
if (R.isDeleted(Inst))
continue;
- Value *B0 = nullptr, *B1 = nullptr;
- if (Value *V = TryToReduce(Inst, B0, B1)) {
+ if (Value *VectorizedV = TryToReduce(Inst)) {
Res = true;
- // Set P to nullptr to avoid re-analysis of phi node in
- // matchAssociativeReduction function unless this is the root node.
- P = nullptr;
- if (auto *I = dyn_cast<Instruction>(V)) {
+ if (auto *I = dyn_cast<Instruction>(VectorizedV)) {
// Try to find another reduction.
Stack.emplace(I, Level);
continue;
}
} else {
- bool IsBinop = B0 && B1;
- if (P && IsBinop) {
- Inst = dyn_cast<Instruction>(B0);
- if (Inst == P)
- Inst = dyn_cast<Instruction>(B1);
- if (!Inst) {
- // Set P to nullptr to avoid re-analysis of phi node in
- // matchAssociativeReduction function unless this is the root node.
- P = nullptr;
- continue;
- }
+ // We could not vectorize `Inst` so try to use it as a future seed.
+ if (!TryAppendToPostponedInsts(Inst)) {
+ assert(Stack.empty() && "Expected empty stack");
+ break;
}
- // Set P to nullptr to avoid re-analysis of phi node in
- // matchAssociativeReduction function unless this is the root node.
- P = nullptr;
- // Do not collect CmpInst or InsertElementInst/InsertValueInst as their
- // analysis is done separately.
- if (!isa<CmpInst, InsertElementInst, InsertValueInst>(Inst))
- PostponedInsts.push_back(Inst);
}
// Try to vectorize operands.
@@ -13246,11 +14380,11 @@ bool SLPVectorizerPass::vectorizeHorReduction(
return Res;
}
-bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
+bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Instruction *Root,
BasicBlock *BB, BoUpSLP &R,
TargetTransformInfo *TTI) {
SmallVector<WeakTrackingVH> PostponedInsts;
- bool Res = vectorizeHorReduction(P, V, BB, R, TTI, PostponedInsts);
+ bool Res = vectorizeHorReduction(P, Root, BB, R, TTI, PostponedInsts);
Res |= tryToVectorize(PostponedInsts, R);
return Res;
}
@@ -13297,13 +14431,11 @@ bool SLPVectorizerPass::vectorizeInsertElementInst(InsertElementInst *IEI,
}
template <typename T>
-static bool
-tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
- function_ref<unsigned(T *)> Limit,
- function_ref<bool(T *, T *)> Comparator,
- function_ref<bool(T *, T *)> AreCompatible,
- function_ref<bool(ArrayRef<T *>, bool)> TryToVectorizeHelper,
- bool LimitForRegisterSize) {
+static bool tryToVectorizeSequence(
+ SmallVectorImpl<T *> &Incoming, function_ref<bool(T *, T *)> Comparator,
+ function_ref<bool(T *, T *)> AreCompatible,
+ function_ref<bool(ArrayRef<T *>, bool)> TryToVectorizeHelper,
+ bool MaxVFOnly, BoUpSLP &R) {
bool Changed = false;
// Sort by type, parent, operands.
stable_sort(Incoming, Comparator);
@@ -13331,21 +14463,29 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
// same/alternate ops only, this may result in some extra final
// vectorization.
if (NumElts > 1 &&
- TryToVectorizeHelper(ArrayRef(IncIt, NumElts), LimitForRegisterSize)) {
+ TryToVectorizeHelper(ArrayRef(IncIt, NumElts), MaxVFOnly)) {
// Success start over because instructions might have been changed.
Changed = true;
- } else if (NumElts < Limit(*IncIt) &&
- (Candidates.empty() ||
- Candidates.front()->getType() == (*IncIt)->getType())) {
- Candidates.append(IncIt, std::next(IncIt, NumElts));
+ } else {
+ /// \Returns the minimum number of elements that we will attempt to
+ /// vectorize.
+ auto GetMinNumElements = [&R](Value *V) {
+ unsigned EltSize = R.getVectorElementSize(V);
+ return std::max(2U, R.getMaxVecRegSize() / EltSize);
+ };
+ if (NumElts < GetMinNumElements(*IncIt) &&
+ (Candidates.empty() ||
+ Candidates.front()->getType() == (*IncIt)->getType())) {
+ Candidates.append(IncIt, std::next(IncIt, NumElts));
+ }
}
// Final attempt to vectorize instructions with the same types.
if (Candidates.size() > 1 &&
(SameTypeIt == E || (*SameTypeIt)->getType() != (*IncIt)->getType())) {
- if (TryToVectorizeHelper(Candidates, /*LimitForRegisterSize=*/false)) {
+ if (TryToVectorizeHelper(Candidates, /*MaxVFOnly=*/false)) {
// Success start over because instructions might have been changed.
Changed = true;
- } else if (LimitForRegisterSize) {
+ } else if (MaxVFOnly) {
// Try to vectorize using small vectors.
for (auto *It = Candidates.begin(), *End = Candidates.end();
It != End;) {
@@ -13353,9 +14493,8 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
while (SameTypeIt != End && AreCompatible(*SameTypeIt, *It))
++SameTypeIt;
unsigned NumElts = (SameTypeIt - It);
- if (NumElts > 1 &&
- TryToVectorizeHelper(ArrayRef(It, NumElts),
- /*LimitForRegisterSize=*/false))
+ if (NumElts > 1 && TryToVectorizeHelper(ArrayRef(It, NumElts),
+ /*MaxVFOnly=*/false))
Changed = true;
It = SameTypeIt;
}
@@ -13378,11 +14517,12 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
/// of the second cmp instruction.
template <bool IsCompatibility>
static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI,
- function_ref<bool(Instruction *)> IsDeleted) {
+ const DominatorTree &DT) {
+ assert(isValidElementType(V->getType()) &&
+ isValidElementType(V2->getType()) &&
+ "Expected valid element types only.");
auto *CI1 = cast<CmpInst>(V);
auto *CI2 = cast<CmpInst>(V2);
- if (IsDeleted(CI2) || !isValidElementType(CI2->getType()))
- return false;
if (CI1->getOperand(0)->getType()->getTypeID() <
CI2->getOperand(0)->getType()->getTypeID())
return !IsCompatibility;
@@ -13411,31 +14551,102 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI,
return false;
if (auto *I1 = dyn_cast<Instruction>(Op1))
if (auto *I2 = dyn_cast<Instruction>(Op2)) {
- if (I1->getParent() != I2->getParent())
- return false;
+ if (IsCompatibility) {
+ if (I1->getParent() != I2->getParent())
+ return false;
+ } else {
+ // Try to compare nodes with same parent.
+ DomTreeNodeBase<BasicBlock> *NodeI1 = DT.getNode(I1->getParent());
+ DomTreeNodeBase<BasicBlock> *NodeI2 = DT.getNode(I2->getParent());
+ if (!NodeI1)
+ return NodeI2 != nullptr;
+ if (!NodeI2)
+ return false;
+ assert((NodeI1 == NodeI2) ==
+ (NodeI1->getDFSNumIn() == NodeI2->getDFSNumIn()) &&
+ "Different nodes should have different DFS numbers");
+ if (NodeI1 != NodeI2)
+ return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
+ }
InstructionsState S = getSameOpcode({I1, I2}, TLI);
- if (S.getOpcode())
+ if (S.getOpcode() && (IsCompatibility || !S.isAltShuffle()))
continue;
- return false;
+ return !IsCompatibility && I1->getOpcode() < I2->getOpcode();
}
}
return IsCompatibility;
}
-bool SLPVectorizerPass::vectorizeSimpleInstructions(InstSetVector &Instructions,
- BasicBlock *BB, BoUpSLP &R,
- bool AtTerminator) {
+template <typename ItT>
+bool SLPVectorizerPass::vectorizeCmpInsts(iterator_range<ItT> CmpInsts,
+ BasicBlock *BB, BoUpSLP &R) {
+ bool Changed = false;
+ // Try to find reductions first.
+ for (CmpInst *I : CmpInsts) {
+ if (R.isDeleted(I))
+ continue;
+ for (Value *Op : I->operands())
+ if (auto *RootOp = dyn_cast<Instruction>(Op))
+ Changed |= vectorizeRootInstruction(nullptr, RootOp, BB, R, TTI);
+ }
+ // Try to vectorize operands as vector bundles.
+ for (CmpInst *I : CmpInsts) {
+ if (R.isDeleted(I))
+ continue;
+ Changed |= tryToVectorize(I, R);
+ }
+ // Try to vectorize list of compares.
+ // Sort by type, compare predicate, etc.
+ auto CompareSorter = [&](Value *V, Value *V2) {
+ if (V == V2)
+ return false;
+ return compareCmp<false>(V, V2, *TLI, *DT);
+ };
+
+ auto AreCompatibleCompares = [&](Value *V1, Value *V2) {
+ if (V1 == V2)
+ return true;
+ return compareCmp<true>(V1, V2, *TLI, *DT);
+ };
+
+ SmallVector<Value *> Vals;
+ for (Instruction *V : CmpInsts)
+ if (!R.isDeleted(V) && isValidElementType(V->getType()))
+ Vals.push_back(V);
+ if (Vals.size() <= 1)
+ return Changed;
+ Changed |= tryToVectorizeSequence<Value>(
+ Vals, CompareSorter, AreCompatibleCompares,
+ [this, &R](ArrayRef<Value *> Candidates, bool MaxVFOnly) {
+ // Exclude possible reductions from other blocks.
+ bool ArePossiblyReducedInOtherBlock = any_of(Candidates, [](Value *V) {
+ return any_of(V->users(), [V](User *U) {
+ auto *Select = dyn_cast<SelectInst>(U);
+ return Select &&
+ Select->getParent() != cast<Instruction>(V)->getParent();
+ });
+ });
+ if (ArePossiblyReducedInOtherBlock)
+ return false;
+ return tryToVectorizeList(Candidates, R, MaxVFOnly);
+ },
+ /*MaxVFOnly=*/true, R);
+ return Changed;
+}
+
+bool SLPVectorizerPass::vectorizeInserts(InstSetVector &Instructions,
+ BasicBlock *BB, BoUpSLP &R) {
+ assert(all_of(Instructions,
+ [](auto *I) {
+ return isa<InsertElementInst, InsertValueInst>(I);
+ }) &&
+ "This function only accepts Insert instructions");
bool OpsChanged = false;
- SmallVector<Instruction *, 4> PostponedCmps;
SmallVector<WeakTrackingVH> PostponedInsts;
// pass1 - try to vectorize reductions only
for (auto *I : reverse(Instructions)) {
if (R.isDeleted(I))
continue;
- if (isa<CmpInst>(I)) {
- PostponedCmps.push_back(I);
- continue;
- }
OpsChanged |= vectorizeHorReduction(nullptr, I, BB, R, TTI, PostponedInsts);
}
// pass2 - try to match and vectorize a buildvector sequence.
@@ -13451,63 +14662,7 @@ bool SLPVectorizerPass::vectorizeSimpleInstructions(InstSetVector &Instructions,
// Now try to vectorize postponed instructions.
OpsChanged |= tryToVectorize(PostponedInsts, R);
- if (AtTerminator) {
- // Try to find reductions first.
- for (Instruction *I : PostponedCmps) {
- if (R.isDeleted(I))
- continue;
- for (Value *Op : I->operands())
- OpsChanged |= vectorizeRootInstruction(nullptr, Op, BB, R, TTI);
- }
- // Try to vectorize operands as vector bundles.
- for (Instruction *I : PostponedCmps) {
- if (R.isDeleted(I))
- continue;
- OpsChanged |= tryToVectorize(I, R);
- }
- // Try to vectorize list of compares.
- // Sort by type, compare predicate, etc.
- auto CompareSorter = [&](Value *V, Value *V2) {
- return compareCmp<false>(V, V2, *TLI,
- [&R](Instruction *I) { return R.isDeleted(I); });
- };
-
- auto AreCompatibleCompares = [&](Value *V1, Value *V2) {
- if (V1 == V2)
- return true;
- return compareCmp<true>(V1, V2, *TLI,
- [&R](Instruction *I) { return R.isDeleted(I); });
- };
- auto Limit = [&R](Value *V) {
- unsigned EltSize = R.getVectorElementSize(V);
- return std::max(2U, R.getMaxVecRegSize() / EltSize);
- };
-
- SmallVector<Value *> Vals(PostponedCmps.begin(), PostponedCmps.end());
- OpsChanged |= tryToVectorizeSequence<Value>(
- Vals, Limit, CompareSorter, AreCompatibleCompares,
- [this, &R](ArrayRef<Value *> Candidates, bool LimitForRegisterSize) {
- // Exclude possible reductions from other blocks.
- bool ArePossiblyReducedInOtherBlock =
- any_of(Candidates, [](Value *V) {
- return any_of(V->users(), [V](User *U) {
- return isa<SelectInst>(U) &&
- cast<SelectInst>(U)->getParent() !=
- cast<Instruction>(V)->getParent();
- });
- });
- if (ArePossiblyReducedInOtherBlock)
- return false;
- return tryToVectorizeList(Candidates, R, LimitForRegisterSize);
- },
- /*LimitForRegisterSize=*/true);
- Instructions.clear();
- } else {
- Instructions.clear();
- // Insert in reverse order since the PostponedCmps vector was filled in
- // reverse order.
- Instructions.insert(PostponedCmps.rbegin(), PostponedCmps.rend());
- }
+ Instructions.clear();
return OpsChanged;
}
@@ -13603,10 +14758,6 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
return true;
};
- auto Limit = [&R](Value *V) {
- unsigned EltSize = R.getVectorElementSize(V);
- return std::max(2U, R.getMaxVecRegSize() / EltSize);
- };
bool HaveVectorizedPhiNodes = false;
do {
@@ -13648,19 +14799,44 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
HaveVectorizedPhiNodes = tryToVectorizeSequence<Value>(
- Incoming, Limit, PHICompare, AreCompatiblePHIs,
- [this, &R](ArrayRef<Value *> Candidates, bool LimitForRegisterSize) {
- return tryToVectorizeList(Candidates, R, LimitForRegisterSize);
+ Incoming, PHICompare, AreCompatiblePHIs,
+ [this, &R](ArrayRef<Value *> Candidates, bool MaxVFOnly) {
+ return tryToVectorizeList(Candidates, R, MaxVFOnly);
},
- /*LimitForRegisterSize=*/true);
+ /*MaxVFOnly=*/true, R);
Changed |= HaveVectorizedPhiNodes;
VisitedInstrs.insert(Incoming.begin(), Incoming.end());
} while (HaveVectorizedPhiNodes);
VisitedInstrs.clear();
- InstSetVector PostProcessInstructions;
- SmallDenseSet<Instruction *, 4> KeyNodes;
+ InstSetVector PostProcessInserts;
+ SmallSetVector<CmpInst *, 8> PostProcessCmps;
+ // Vectorizes Inserts in `PostProcessInserts` and if `VecctorizeCmps` is true
+ // also vectorizes `PostProcessCmps`.
+ auto VectorizeInsertsAndCmps = [&](bool VectorizeCmps) {
+ bool Changed = vectorizeInserts(PostProcessInserts, BB, R);
+ if (VectorizeCmps) {
+ Changed |= vectorizeCmpInsts(reverse(PostProcessCmps), BB, R);
+ PostProcessCmps.clear();
+ }
+ PostProcessInserts.clear();
+ return Changed;
+ };
+ // Returns true if `I` is in `PostProcessInserts` or `PostProcessCmps`.
+ auto IsInPostProcessInstrs = [&](Instruction *I) {
+ if (auto *Cmp = dyn_cast<CmpInst>(I))
+ return PostProcessCmps.contains(Cmp);
+ return isa<InsertElementInst, InsertValueInst>(I) &&
+ PostProcessInserts.contains(I);
+ };
+ // Returns true if `I` is an instruction without users, like terminator, or
+ // function call with ignored return value, store. Ignore unused instructions
+ // (basing on instruction type, except for CallInst and InvokeInst).
+ auto HasNoUsers = [](Instruction *I) {
+ return I->use_empty() &&
+ (I->getType()->isVoidTy() || isa<CallInst, InvokeInst>(I));
+ };
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
// Skip instructions with scalable type. The num of elements is unknown at
// compile-time for scalable type.
@@ -13672,9 +14848,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
continue;
// We may go through BB multiple times so skip the one we have checked.
if (!VisitedInstrs.insert(&*it).second) {
- if (it->use_empty() && KeyNodes.contains(&*it) &&
- vectorizeSimpleInstructions(PostProcessInstructions, BB, R,
- it->isTerminator())) {
+ if (HasNoUsers(&*it) &&
+ VectorizeInsertsAndCmps(/*VectorizeCmps=*/it->isTerminator())) {
// We would like to start over since some instructions are deleted
// and the iterator may become invalid value.
Changed = true;
@@ -13692,8 +14867,8 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Check that the PHI is a reduction PHI.
if (P->getNumIncomingValues() == 2) {
// Try to match and vectorize a horizontal reduction.
- if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R,
- TTI)) {
+ Instruction *Root = getReductionInstr(DT, P, BB, LI);
+ if (Root && vectorizeRootInstruction(P, Root, BB, R, TTI)) {
Changed = true;
it = BB->begin();
e = BB->end();
@@ -13714,19 +14889,14 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Postponed instructions should not be vectorized here, delay their
// vectorization.
if (auto *PI = dyn_cast<Instruction>(P->getIncomingValue(I));
- PI && !PostProcessInstructions.contains(PI))
- Changed |= vectorizeRootInstruction(nullptr, P->getIncomingValue(I),
+ PI && !IsInPostProcessInstrs(PI))
+ Changed |= vectorizeRootInstruction(nullptr, PI,
P->getIncomingBlock(I), R, TTI);
}
continue;
}
- // Ran into an instruction without users, like terminator, or function call
- // with ignored return value, store. Ignore unused instructions (basing on
- // instruction type, except for CallInst and InvokeInst).
- if (it->use_empty() &&
- (it->getType()->isVoidTy() || isa<CallInst, InvokeInst>(it))) {
- KeyNodes.insert(&*it);
+ if (HasNoUsers(&*it)) {
bool OpsChanged = false;
auto *SI = dyn_cast<StoreInst>(it);
bool TryToVectorizeRoot = ShouldStartVectorizeHorAtStore || !SI;
@@ -13746,16 +14916,16 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Postponed instructions should not be vectorized here, delay their
// vectorization.
if (auto *VI = dyn_cast<Instruction>(V);
- VI && !PostProcessInstructions.contains(VI))
+ VI && !IsInPostProcessInstrs(VI))
// Try to match and vectorize a horizontal reduction.
- OpsChanged |= vectorizeRootInstruction(nullptr, V, BB, R, TTI);
+ OpsChanged |= vectorizeRootInstruction(nullptr, VI, BB, R, TTI);
}
}
// Start vectorization of post-process list of instructions from the
// top-tree instructions to try to vectorize as many instructions as
// possible.
- OpsChanged |= vectorizeSimpleInstructions(PostProcessInstructions, BB, R,
- it->isTerminator());
+ OpsChanged |=
+ VectorizeInsertsAndCmps(/*VectorizeCmps=*/it->isTerminator());
if (OpsChanged) {
// We would like to start over since some instructions are deleted
// and the iterator may become invalid value.
@@ -13766,8 +14936,10 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
}
}
- if (isa<CmpInst, InsertElementInst, InsertValueInst>(it))
- PostProcessInstructions.insert(&*it);
+ if (isa<InsertElementInst, InsertValueInst>(it))
+ PostProcessInserts.insert(&*it);
+ else if (isa<CmpInst>(it))
+ PostProcessCmps.insert(cast<CmpInst>(&*it));
}
return Changed;
@@ -13928,10 +15100,6 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
return V1->getValueOperand()->getValueID() ==
V2->getValueOperand()->getValueID();
};
- auto Limit = [&R, this](StoreInst *SI) {
- unsigned EltSize = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
- return R.getMinVF(EltSize);
- };
// Attempt to sort and vectorize each of the store-groups.
for (auto &Pair : Stores) {
@@ -13945,28 +15113,11 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
continue;
Changed |= tryToVectorizeSequence<StoreInst>(
- Pair.second, Limit, StoreSorter, AreCompatibleStores,
+ Pair.second, StoreSorter, AreCompatibleStores,
[this, &R](ArrayRef<StoreInst *> Candidates, bool) {
return vectorizeStores(Candidates, R);
},
- /*LimitForRegisterSize=*/false);
+ /*MaxVFOnly=*/false, R);
}
return Changed;
}
-
-char SLPVectorizer::ID = 0;
-
-static const char lv_name[] = "SLP Vectorizer";
-
-INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false)
-INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(InjectTLIMappingsLegacy)
-INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false)
-
-Pass *llvm::createSLPVectorizerPass() { return new SLPVectorizer(); }
diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
index 733d2e1c667b..1271d1424c03 100644
--- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
@@ -95,7 +95,7 @@ class VPRecipeBuilder {
/// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same
/// decision from \p Range.Start to \p Range.End.
VPWidenCallRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
- VFRange &Range) const;
+ VFRange &Range, VPlanPtr &Plan);
/// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
/// if it can. The function should only be called if the cost-model indicates
@@ -136,11 +136,11 @@ public:
/// A helper function that computes the predicate of the block BB, assuming
/// that the header block of the loop is set to True. It returns the *entry*
/// mask for the block BB.
- VPValue *createBlockInMask(BasicBlock *BB, VPlanPtr &Plan);
+ VPValue *createBlockInMask(BasicBlock *BB, VPlan &Plan);
/// A helper function that computes the predicate of the edge between SRC
/// and DST.
- VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlanPtr &Plan);
+ VPValue *createEdgeMask(BasicBlock *Src, BasicBlock *Dst, VPlan &Plan);
/// Mark given ingredient for recording its recipe once one is created for
/// it.
@@ -159,19 +159,11 @@ public:
return Ingredient2Recipe[I];
}
- /// Create a replicating region for \p PredRecipe.
- VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
- VPlanPtr &Plan);
-
- /// Build a VPReplicationRecipe for \p I and enclose it within a Region if it
- /// is predicated. \return \p VPBB augmented with this new recipe if \p I is
- /// not predicated, otherwise \return a new VPBasicBlock that succeeds the new
- /// Region. Update the packing decision of predicated instructions if they
- /// feed \p I. Range.End may be decreased to ensure same recipe behavior from
- /// \p Range.Start to \p Range.End.
- VPBasicBlock *handleReplication(
- Instruction *I, VFRange &Range, VPBasicBlock *VPBB,
- VPlanPtr &Plan);
+ /// Build a VPReplicationRecipe for \p I. If it is predicated, add the mask as
+ /// last operand. Range.End may be decreased to ensure same recipe behavior
+ /// from \p Range.Start to \p Range.End.
+ VPRecipeOrVPValueTy handleReplication(Instruction *I, VFRange &Range,
+ VPlan &Plan);
/// Add the incoming values from the backedge to reduction & first-order
/// recurrence cross-iteration phis.
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index d554f438c804..e81b88fd8099 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/IR/BasicBlock.h"
@@ -46,7 +47,10 @@
#include <vector>
using namespace llvm;
+
+namespace llvm {
extern cl::opt<bool> EnableVPlanNativePath;
+}
#define DEBUG_TYPE "vplan"
@@ -160,8 +164,9 @@ VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
}
void VPBlockBase::setPlan(VPlan *ParentPlan) {
- assert(ParentPlan->getEntry() == this &&
- "Can only set plan on its entry block.");
+ assert(
+ (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&
+ "Can only set plan on its entry or preheader block.");
Plan = ParentPlan;
}
@@ -209,7 +214,7 @@ VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
}
Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
- if (!Def->hasDefiningRecipe())
+ if (Def->isLiveIn())
return Def->getLiveInIRValue();
if (hasScalarValue(Def, Instance)) {
@@ -243,11 +248,19 @@ void VPTransformState::addNewMetadata(Instruction *To,
}
void VPTransformState::addMetadata(Instruction *To, Instruction *From) {
+ // No source instruction to transfer metadata from?
+ if (!From)
+ return;
+
propagateMetadata(To, From);
addNewMetadata(To, From);
}
void VPTransformState::addMetadata(ArrayRef<Value *> To, Instruction *From) {
+ // No source instruction to transfer metadata from?
+ if (!From)
+ return;
+
for (Value *V : To) {
if (Instruction *I = dyn_cast<Instruction>(V))
addMetadata(I, From);
@@ -265,7 +278,7 @@ void VPTransformState::setDebugLocFromInst(const Value *V) {
// When a FSDiscriminator is enabled, we don't need to add the multiply
// factors to the discriminators.
if (DIL && Inst->getFunction()->shouldEmitDebugInfoForProfiling() &&
- !isa<DbgInfoIntrinsic>(Inst) && !EnableFSDiscriminator) {
+ !Inst->isDebugOrPseudoInst() && !EnableFSDiscriminator) {
// FIXME: For scalable vectors, assume vscale=1.
auto NewDIL =
DIL->cloneByMultiplyingDuplicationFactor(UF * VF.getKnownMinValue());
@@ -577,7 +590,9 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
#endif
VPlan::~VPlan() {
- clearLiveOuts();
+ for (auto &KV : LiveOuts)
+ delete KV.second;
+ LiveOuts.clear();
if (Entry) {
VPValue DummyValue;
@@ -585,15 +600,23 @@ VPlan::~VPlan() {
Block->dropAllReferences(&DummyValue);
VPBlockBase::deleteCFG(Entry);
+
+ Preheader->dropAllReferences(&DummyValue);
+ delete Preheader;
}
- for (VPValue *VPV : VPValuesToFree)
+ for (VPValue *VPV : VPLiveInsToFree)
delete VPV;
- if (TripCount)
- delete TripCount;
if (BackedgeTakenCount)
delete BackedgeTakenCount;
- for (auto &P : VPExternalDefs)
- delete P.second;
+}
+
+VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE) {
+ VPBasicBlock *Preheader = new VPBasicBlock("ph");
+ VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
+ auto Plan = std::make_unique<VPlan>(Preheader, VecPreheader);
+ Plan->TripCount =
+ vputils::getOrCreateVPValueForSCEVExpr(*Plan, TripCount, SE);
+ return Plan;
}
VPActiveLaneMaskPHIRecipe *VPlan::getActiveLaneMaskPhi() {
@@ -609,13 +632,6 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
Value *CanonicalIVStartValue,
VPTransformState &State,
bool IsEpilogueVectorization) {
-
- // Check if the trip count is needed, and if so build it.
- if (TripCount && TripCount->getNumUsers()) {
- for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
- State.set(TripCount, TripCountV, Part);
- }
-
// Check if the backedge taken count is needed, and if so build it.
if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
@@ -636,7 +652,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
// needs to be changed from zero to the value after the main vector loop.
// FIXME: Improve modeling for canonical IV start values in the epilogue loop.
if (CanonicalIVStartValue) {
- VPValue *VPV = getOrAddExternalDef(CanonicalIVStartValue);
+ VPValue *VPV = getVPValueOrAddLiveIn(CanonicalIVStartValue);
auto *IV = getCanonicalIV();
assert(all_of(IV->users(),
[](const VPUser *U) {
@@ -650,8 +666,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
VPInstruction::CanonicalIVIncrementNUW;
}) &&
"the canonical IV should only be used by its increments or "
- "ScalarIVSteps when "
- "resetting the start value");
+ "ScalarIVSteps when resetting the start value");
IV->setOperand(0, VPV);
}
}
@@ -748,13 +763,25 @@ void VPlan::print(raw_ostream &O) const {
if (VectorTripCount.getNumUsers() > 0) {
O << "\nLive-in ";
VectorTripCount.printAsOperand(O, SlotTracker);
- O << " = vector-trip-count\n";
+ O << " = vector-trip-count";
}
if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
O << "\nLive-in ";
BackedgeTakenCount->printAsOperand(O, SlotTracker);
- O << " = backedge-taken count\n";
+ O << " = backedge-taken count";
+ }
+
+ O << "\n";
+ if (TripCount->isLiveIn())
+ O << "Live-in ";
+ TripCount->printAsOperand(O, SlotTracker);
+ O << " = original trip-count";
+ O << "\n";
+
+ if (!getPreheader()->empty()) {
+ O << "\n";
+ getPreheader()->print(O, "", SlotTracker);
}
for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
@@ -765,11 +792,7 @@ void VPlan::print(raw_ostream &O) const {
if (!LiveOuts.empty())
O << "\n";
for (const auto &KV : LiveOuts) {
- O << "Live-out ";
- KV.second->getPhi()->printAsOperand(O);
- O << " = ";
- KV.second->getOperand(0)->printAsOperand(O, SlotTracker);
- O << "\n";
+ KV.second->print(O, SlotTracker);
}
O << "}\n";
@@ -882,6 +905,8 @@ void VPlanPrinter::dump() {
OS << "edge [fontname=Courier, fontsize=30]\n";
OS << "compound=true\n";
+ dumpBlock(Plan.getPreheader());
+
for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
dumpBlock(Block);
@@ -1086,26 +1111,27 @@ VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
}
void VPSlotTracker::assignSlot(const VPValue *V) {
- assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!");
+ assert(!Slots.contains(V) && "VPValue already has a slot!");
Slots[V] = NextSlot++;
}
void VPSlotTracker::assignSlots(const VPlan &Plan) {
-
- for (const auto &P : Plan.VPExternalDefs)
- assignSlot(P.second);
-
assignSlot(&Plan.VectorTripCount);
if (Plan.BackedgeTakenCount)
assignSlot(Plan.BackedgeTakenCount);
+ assignSlots(Plan.getPreheader());
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
for (const VPBasicBlock *VPBB :
VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
- for (const VPRecipeBase &Recipe : *VPBB)
- for (VPValue *Def : Recipe.definedValues())
- assignSlot(Def);
+ assignSlots(VPBB);
+}
+
+void VPSlotTracker::assignSlots(const VPBasicBlock *VPBB) {
+ for (const VPRecipeBase &Recipe : *VPBB)
+ for (VPValue *Def : Recipe.definedValues())
+ assignSlot(Def);
}
bool vputils::onlyFirstLaneUsed(VPValue *Def) {
@@ -1115,13 +1141,17 @@ bool vputils::onlyFirstLaneUsed(VPValue *Def) {
VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr,
ScalarEvolution &SE) {
+ if (auto *Expanded = Plan.getSCEVExpansion(Expr))
+ return Expanded;
+ VPValue *Expanded = nullptr;
if (auto *E = dyn_cast<SCEVConstant>(Expr))
- return Plan.getOrAddExternalDef(E->getValue());
- if (auto *E = dyn_cast<SCEVUnknown>(Expr))
- return Plan.getOrAddExternalDef(E->getValue());
-
- VPBasicBlock *Preheader = Plan.getEntry()->getEntryBasicBlock();
- VPExpandSCEVRecipe *Step = new VPExpandSCEVRecipe(Expr, SE);
- Preheader->appendRecipe(Step);
- return Step;
+ Expanded = Plan.getVPValueOrAddLiveIn(E->getValue());
+ else if (auto *E = dyn_cast<SCEVUnknown>(Expr))
+ Expanded = Plan.getVPValueOrAddLiveIn(E->getValue());
+ else {
+ Expanded = new VPExpandSCEVRecipe(Expr, SE);
+ Plan.getPreheader()->appendRecipe(Expanded->getDefiningRecipe());
+ }
+ Plan.addSCEVExpansion(Expr, Expanded);
+ return Expanded;
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 986faaf99664..73313465adea 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -25,7 +25,6 @@
#include "VPlanValue.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -33,11 +32,12 @@
#include "llvm/ADT/Twine.h"
#include "llvm/ADT/ilist.h"
#include "llvm/ADT/ilist_node.h"
+#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/FMF.h"
-#include "llvm/Transforms/Utils/LoopVersioning.h"
+#include "llvm/IR/Operator.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -47,11 +47,9 @@ namespace llvm {
class BasicBlock;
class DominatorTree;
-class InductionDescriptor;
class InnerLoopVectorizer;
class IRBuilderBase;
class LoopInfo;
-class PredicateScalarEvolution;
class raw_ostream;
class RecurrenceDescriptor;
class SCEV;
@@ -62,6 +60,7 @@ class VPlan;
class VPReplicateRecipe;
class VPlanSlp;
class Value;
+class LoopVersioning;
namespace Intrinsic {
typedef unsigned ID;
@@ -76,16 +75,17 @@ Value *getRuntimeVF(IRBuilderBase &B, Type *Ty, ElementCount VF);
Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF,
int64_t Step);
-const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE);
+const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE,
+ Loop *CurLoop = nullptr);
/// A range of powers-of-2 vectorization factors with fixed start and
/// adjustable end. The range includes start and excludes end, e.g.,:
-/// [1, 9) = {1, 2, 4, 8}
+/// [1, 16) = {1, 2, 4, 8}
struct VFRange {
// A power of 2.
const ElementCount Start;
- // Need not be a power of 2. If End <= Start range is empty.
+ // A power of 2. If End <= Start range is empty.
ElementCount End;
bool isEmpty() const {
@@ -98,6 +98,33 @@ struct VFRange {
"Both Start and End should have the same scalable flag");
assert(isPowerOf2_32(Start.getKnownMinValue()) &&
"Expected Start to be a power of 2");
+ assert(isPowerOf2_32(End.getKnownMinValue()) &&
+ "Expected End to be a power of 2");
+ }
+
+ /// Iterator to iterate over vectorization factors in a VFRange.
+ class iterator
+ : public iterator_facade_base<iterator, std::forward_iterator_tag,
+ ElementCount> {
+ ElementCount VF;
+
+ public:
+ iterator(ElementCount VF) : VF(VF) {}
+
+ bool operator==(const iterator &Other) const { return VF == Other.VF; }
+
+ ElementCount operator*() const { return VF; }
+
+ iterator &operator++() {
+ VF *= 2;
+ return *this;
+ }
+ };
+
+ iterator begin() { return iterator(Start); }
+ iterator end() {
+ assert(isPowerOf2_32(End.getKnownMinValue()));
+ return iterator(End);
}
};
@@ -248,7 +275,7 @@ struct VPTransformState {
}
bool hasAnyVectorValue(VPValue *Def) const {
- return Data.PerPartOutput.find(Def) != Data.PerPartOutput.end();
+ return Data.PerPartOutput.contains(Def);
}
bool hasScalarValue(VPValue *Def, VPIteration Instance) {
@@ -370,10 +397,6 @@ struct VPTransformState {
/// Pointer to the VPlan code is generated for.
VPlan *Plan;
- /// Holds recipes that may generate a poison value that is used after
- /// vectorization, even when their operands are not poison.
- SmallPtrSet<VPRecipeBase *, 16> MayGeneratePoisonRecipes;
-
/// The loop object for the current parent region, or nullptr.
Loop *CurrentVectorLoop = nullptr;
@@ -382,7 +405,11 @@ struct VPTransformState {
///
/// This is currently only used to add no-alias metadata based on the
/// memchecks. The actually versioning is performed manually.
- std::unique_ptr<LoopVersioning> LVer;
+ LoopVersioning *LVer = nullptr;
+
+ /// Map SCEVs to their expanded values. Populated when executing
+ /// VPExpandSCEVRecipes.
+ DenseMap<const SCEV *, Value *> ExpandedSCEVs;
};
/// VPBlockBase is the building block of the Hierarchical Control-Flow Graph.
@@ -639,6 +666,10 @@ public:
VPLiveOut(PHINode *Phi, VPValue *Op)
: VPUser({Op}, VPUser::VPUserID::LiveOut), Phi(Phi) {}
+ static inline bool classof(const VPUser *U) {
+ return U->getVPUserID() == VPUser::VPUserID::LiveOut;
+ }
+
/// Fixup the wrapped LCSSA phi node in the unique exit block. This simply
/// means we need to add the appropriate incoming value from the middle
/// block as exiting edges from the scalar epilogue loop (if present) are
@@ -654,6 +685,11 @@ public:
}
PHINode *getPhi() const { return Phi; }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the VPLiveOut to \p O.
+ void print(raw_ostream &O, VPSlotTracker &SlotTracker) const;
+#endif
};
/// VPRecipeBase is a base class modeling a sequence of one or more output IR
@@ -790,6 +826,7 @@ public:
SLPLoad,
SLPStore,
ActiveLaneMask,
+ CalculateTripCountMinusVF,
CanonicalIVIncrement,
CanonicalIVIncrementNUW,
// The next two are similar to the above, but instead increment the
@@ -810,8 +847,10 @@ private:
const std::string Name;
/// Utility method serving execute(): generates a single instance of the
- /// modeled instruction.
- void generateInstruction(VPTransformState &State, unsigned Part);
+ /// modeled instruction. \returns the generated value for \p Part.
+ /// In some cases an existing value is returned rather than a generated
+ /// one.
+ Value *generateInstruction(VPTransformState &State, unsigned Part);
protected:
void setUnderlyingInstr(Instruction *I) { setUnderlyingValue(I); }
@@ -892,6 +931,7 @@ public:
default:
return false;
case VPInstruction::ActiveLaneMask:
+ case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::CanonicalIVIncrementNUW:
case VPInstruction::CanonicalIVIncrementForPart:
@@ -903,14 +943,169 @@ public:
}
};
+/// Class to record LLVM IR flag for a recipe along with it.
+class VPRecipeWithIRFlags : public VPRecipeBase {
+ enum class OperationType : unsigned char {
+ OverflowingBinOp,
+ PossiblyExactOp,
+ GEPOp,
+ FPMathOp,
+ Other
+ };
+ struct WrapFlagsTy {
+ char HasNUW : 1;
+ char HasNSW : 1;
+ };
+ struct ExactFlagsTy {
+ char IsExact : 1;
+ };
+ struct GEPFlagsTy {
+ char IsInBounds : 1;
+ };
+ struct FastMathFlagsTy {
+ char AllowReassoc : 1;
+ char NoNaNs : 1;
+ char NoInfs : 1;
+ char NoSignedZeros : 1;
+ char AllowReciprocal : 1;
+ char AllowContract : 1;
+ char ApproxFunc : 1;
+ };
+
+ OperationType OpType;
+
+ union {
+ WrapFlagsTy WrapFlags;
+ ExactFlagsTy ExactFlags;
+ GEPFlagsTy GEPFlags;
+ FastMathFlagsTy FMFs;
+ unsigned char AllFlags;
+ };
+
+public:
+ template <typename IterT>
+ VPRecipeWithIRFlags(const unsigned char SC, iterator_range<IterT> Operands)
+ : VPRecipeBase(SC, Operands) {
+ OpType = OperationType::Other;
+ AllFlags = 0;
+ }
+
+ template <typename IterT>
+ VPRecipeWithIRFlags(const unsigned char SC, iterator_range<IterT> Operands,
+ Instruction &I)
+ : VPRecipeWithIRFlags(SC, Operands) {
+ if (auto *Op = dyn_cast<OverflowingBinaryOperator>(&I)) {
+ OpType = OperationType::OverflowingBinOp;
+ WrapFlags.HasNUW = Op->hasNoUnsignedWrap();
+ WrapFlags.HasNSW = Op->hasNoSignedWrap();
+ } else if (auto *Op = dyn_cast<PossiblyExactOperator>(&I)) {
+ OpType = OperationType::PossiblyExactOp;
+ ExactFlags.IsExact = Op->isExact();
+ } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+ OpType = OperationType::GEPOp;
+ GEPFlags.IsInBounds = GEP->isInBounds();
+ } else if (auto *Op = dyn_cast<FPMathOperator>(&I)) {
+ OpType = OperationType::FPMathOp;
+ FastMathFlags FMF = Op->getFastMathFlags();
+ FMFs.AllowReassoc = FMF.allowReassoc();
+ FMFs.NoNaNs = FMF.noNaNs();
+ FMFs.NoInfs = FMF.noInfs();
+ FMFs.NoSignedZeros = FMF.noSignedZeros();
+ FMFs.AllowReciprocal = FMF.allowReciprocal();
+ FMFs.AllowContract = FMF.allowContract();
+ FMFs.ApproxFunc = FMF.approxFunc();
+ }
+ }
+
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
+ R->getVPDefID() == VPRecipeBase::VPReplicateSC;
+ }
+
+ /// Drop all poison-generating flags.
+ void dropPoisonGeneratingFlags() {
+ // NOTE: This needs to be kept in-sync with
+ // Instruction::dropPoisonGeneratingFlags.
+ switch (OpType) {
+ case OperationType::OverflowingBinOp:
+ WrapFlags.HasNUW = false;
+ WrapFlags.HasNSW = false;
+ break;
+ case OperationType::PossiblyExactOp:
+ ExactFlags.IsExact = false;
+ break;
+ case OperationType::GEPOp:
+ GEPFlags.IsInBounds = false;
+ break;
+ case OperationType::FPMathOp:
+ FMFs.NoNaNs = false;
+ FMFs.NoInfs = false;
+ break;
+ case OperationType::Other:
+ break;
+ }
+ }
+
+ /// Set the IR flags for \p I.
+ void setFlags(Instruction *I) const {
+ switch (OpType) {
+ case OperationType::OverflowingBinOp:
+ I->setHasNoUnsignedWrap(WrapFlags.HasNUW);
+ I->setHasNoSignedWrap(WrapFlags.HasNSW);
+ break;
+ case OperationType::PossiblyExactOp:
+ I->setIsExact(ExactFlags.IsExact);
+ break;
+ case OperationType::GEPOp:
+ cast<GetElementPtrInst>(I)->setIsInBounds(GEPFlags.IsInBounds);
+ break;
+ case OperationType::FPMathOp:
+ I->setHasAllowReassoc(FMFs.AllowReassoc);
+ I->setHasNoNaNs(FMFs.NoNaNs);
+ I->setHasNoInfs(FMFs.NoInfs);
+ I->setHasNoSignedZeros(FMFs.NoSignedZeros);
+ I->setHasAllowReciprocal(FMFs.AllowReciprocal);
+ I->setHasAllowContract(FMFs.AllowContract);
+ I->setHasApproxFunc(FMFs.ApproxFunc);
+ break;
+ case OperationType::Other:
+ break;
+ }
+ }
+
+ bool isInBounds() const {
+ assert(OpType == OperationType::GEPOp &&
+ "recipe doesn't have inbounds flag");
+ return GEPFlags.IsInBounds;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ FastMathFlags getFastMathFlags() const {
+ FastMathFlags Res;
+ Res.setAllowReassoc(FMFs.AllowReassoc);
+ Res.setNoNaNs(FMFs.NoNaNs);
+ Res.setNoInfs(FMFs.NoInfs);
+ Res.setNoSignedZeros(FMFs.NoSignedZeros);
+ Res.setAllowReciprocal(FMFs.AllowReciprocal);
+ Res.setAllowContract(FMFs.AllowContract);
+ Res.setApproxFunc(FMFs.ApproxFunc);
+ return Res;
+ }
+
+ void printFlags(raw_ostream &O) const;
+#endif
+};
+
/// VPWidenRecipe is a recipe for producing a copy of vector type its
/// ingredient. This recipe covers most of the traditional vectorization cases
/// where each ingredient transforms into a vectorized version of itself.
-class VPWidenRecipe : public VPRecipeBase, public VPValue {
+class VPWidenRecipe : public VPRecipeWithIRFlags, public VPValue {
+
public:
template <typename IterT>
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
- : VPRecipeBase(VPDef::VPWidenSC, Operands), VPValue(this, &I) {}
+ : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I), VPValue(this, &I) {}
~VPWidenRecipe() override = default;
@@ -926,18 +1121,62 @@ public:
#endif
};
+/// VPWidenCastRecipe is a recipe to create vector cast instructions.
+class VPWidenCastRecipe : public VPRecipeBase, public VPValue {
+ /// Cast instruction opcode.
+ Instruction::CastOps Opcode;
+
+ /// Result type for the cast.
+ Type *ResultTy;
+
+public:
+ VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+ CastInst *UI = nullptr)
+ : VPRecipeBase(VPDef::VPWidenCastSC, Op), VPValue(this, UI),
+ Opcode(Opcode), ResultTy(ResultTy) {
+ assert((!UI || UI->getOpcode() == Opcode) &&
+ "opcode of underlying cast doesn't match");
+ assert((!UI || UI->getType() == ResultTy) &&
+ "result type of underlying cast doesn't match");
+ }
+
+ ~VPWidenCastRecipe() override = default;
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
+
+ /// Produce widened copies of the cast.
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ Instruction::CastOps getOpcode() const { return Opcode; }
+
+ /// Returns the result type of the cast.
+ Type *getResultType() const { return ResultTy; }
+};
+
/// A recipe for widening Call instructions.
class VPWidenCallRecipe : public VPRecipeBase, public VPValue {
/// ID of the vector intrinsic to call when widening the call. If set the
/// Intrinsic::not_intrinsic, a library call will be used instead.
Intrinsic::ID VectorIntrinsicID;
+ /// If this recipe represents a library call, Variant stores a pointer to
+ /// the chosen function. There is a 1:1 mapping between a given VF and the
+ /// chosen vectorized variant, so there will be a different vplan for each
+ /// VF with a valid variant.
+ Function *Variant;
public:
template <typename IterT>
VPWidenCallRecipe(CallInst &I, iterator_range<IterT> CallArguments,
- Intrinsic::ID VectorIntrinsicID)
+ Intrinsic::ID VectorIntrinsicID,
+ Function *Variant = nullptr)
: VPRecipeBase(VPDef::VPWidenCallSC, CallArguments), VPValue(this, &I),
- VectorIntrinsicID(VectorIntrinsicID) {}
+ VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {}
~VPWidenCallRecipe() override = default;
@@ -954,17 +1193,10 @@ public:
};
/// A recipe for widening select instructions.
-class VPWidenSelectRecipe : public VPRecipeBase, public VPValue {
-
- /// Is the condition of the select loop invariant?
- bool InvariantCond;
-
-public:
+struct VPWidenSelectRecipe : public VPRecipeBase, public VPValue {
template <typename IterT>
- VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands,
- bool InvariantCond)
- : VPRecipeBase(VPDef::VPWidenSelectSC, Operands), VPValue(this, &I),
- InvariantCond(InvariantCond) {}
+ VPWidenSelectRecipe(SelectInst &I, iterator_range<IterT> Operands)
+ : VPRecipeBase(VPDef::VPWidenSelectSC, Operands), VPValue(this, &I) {}
~VPWidenSelectRecipe() override = default;
@@ -978,29 +1210,38 @@ public:
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
+
+ VPValue *getCond() const {
+ return getOperand(0);
+ }
+
+ bool isInvariantCond() const {
+ return getCond()->isDefinedOutsideVectorRegions();
+ }
};
/// A recipe for handling GEP instructions.
-class VPWidenGEPRecipe : public VPRecipeBase, public VPValue {
- bool IsPtrLoopInvariant;
- SmallBitVector IsIndexLoopInvariant;
+class VPWidenGEPRecipe : public VPRecipeWithIRFlags, public VPValue {
+ bool isPointerLoopInvariant() const {
+ return getOperand(0)->isDefinedOutsideVectorRegions();
+ }
+
+ bool isIndexLoopInvariant(unsigned I) const {
+ return getOperand(I + 1)->isDefinedOutsideVectorRegions();
+ }
+
+ bool areAllOperandsInvariant() const {
+ return all_of(operands(), [](VPValue *Op) {
+ return Op->isDefinedOutsideVectorRegions();
+ });
+ }
public:
template <typename IterT>
VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands)
- : VPRecipeBase(VPDef::VPWidenGEPSC, Operands), VPValue(this, GEP),
- IsIndexLoopInvariant(GEP->getNumIndices(), false) {}
+ : VPRecipeWithIRFlags(VPDef::VPWidenGEPSC, Operands, *GEP),
+ VPValue(this, GEP) {}
- template <typename IterT>
- VPWidenGEPRecipe(GetElementPtrInst *GEP, iterator_range<IterT> Operands,
- Loop *OrigLoop)
- : VPRecipeBase(VPDef::VPWidenGEPSC, Operands), VPValue(this, GEP),
- IsIndexLoopInvariant(GEP->getNumIndices(), false) {
- IsPtrLoopInvariant = OrigLoop->isLoopInvariant(GEP->getPointerOperand());
- for (auto Index : enumerate(GEP->indices()))
- IsIndexLoopInvariant[Index.index()] =
- OrigLoop->isLoopInvariant(Index.value().get());
- }
~VPWidenGEPRecipe() override = default;
VP_CLASSOF_IMPL(VPDef::VPWidenGEPSC)
@@ -1015,78 +1256,6 @@ public:
#endif
};
-/// A recipe for handling phi nodes of integer and floating-point inductions,
-/// producing their vector values.
-class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
- PHINode *IV;
- const InductionDescriptor &IndDesc;
- bool NeedsVectorIV;
-
-public:
- VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
- const InductionDescriptor &IndDesc,
- bool NeedsVectorIV)
- : VPRecipeBase(VPDef::VPWidenIntOrFpInductionSC, {Start, Step}),
- VPValue(this, IV), IV(IV), IndDesc(IndDesc),
- NeedsVectorIV(NeedsVectorIV) {}
-
- VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
- const InductionDescriptor &IndDesc,
- TruncInst *Trunc, bool NeedsVectorIV)
- : VPRecipeBase(VPDef::VPWidenIntOrFpInductionSC, {Start, Step}),
- VPValue(this, Trunc), IV(IV), IndDesc(IndDesc),
- NeedsVectorIV(NeedsVectorIV) {}
-
- ~VPWidenIntOrFpInductionRecipe() override = default;
-
- VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
-
- /// Generate the vectorized and scalarized versions of the phi node as
- /// needed by their users.
- void execute(VPTransformState &State) override;
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the recipe.
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
-
- /// Returns the start value of the induction.
- VPValue *getStartValue() { return getOperand(0); }
- const VPValue *getStartValue() const { return getOperand(0); }
-
- /// Returns the step value of the induction.
- VPValue *getStepValue() { return getOperand(1); }
- const VPValue *getStepValue() const { return getOperand(1); }
-
- /// Returns the first defined value as TruncInst, if it is one or nullptr
- /// otherwise.
- TruncInst *getTruncInst() {
- return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
- }
- const TruncInst *getTruncInst() const {
- return dyn_cast_or_null<TruncInst>(getVPValue(0)->getUnderlyingValue());
- }
-
- PHINode *getPHINode() { return IV; }
-
- /// Returns the induction descriptor for the recipe.
- const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
-
- /// Returns true if the induction is canonical, i.e. starting at 0 and
- /// incremented by UF * VF (= the original IV is incremented by 1).
- bool isCanonical() const;
-
- /// Returns the scalar type of the induction.
- const Type *getScalarType() const {
- const TruncInst *TruncI = getTruncInst();
- return TruncI ? TruncI->getType() : IV->getType();
- }
-
- /// Returns true if a vector phi needs to be created for the induction.
- bool needsVectorIV() const { return NeedsVectorIV; }
-};
-
/// A pure virtual base class for all recipes modeling header phis, including
/// phis for first order recurrences, pointer inductions and reductions. The
/// start value is the first operand of the recipe and the incoming value from
@@ -1112,9 +1281,9 @@ public:
/// per-lane based on the canonical induction.
class VPHeaderPHIRecipe : public VPRecipeBase, public VPValue {
protected:
- VPHeaderPHIRecipe(unsigned char VPDefID, PHINode *Phi,
+ VPHeaderPHIRecipe(unsigned char VPDefID, Instruction *UnderlyingInstr,
VPValue *Start = nullptr)
- : VPRecipeBase(VPDefID, {}), VPValue(this, Phi) {
+ : VPRecipeBase(VPDefID, {}), VPValue(this, UnderlyingInstr) {
if (Start)
addOperand(Start);
}
@@ -1125,12 +1294,12 @@ public:
/// Method to support type inquiry through isa, cast, and dyn_cast.
static inline bool classof(const VPRecipeBase *B) {
return B->getVPDefID() >= VPDef::VPFirstHeaderPHISC &&
- B->getVPDefID() <= VPDef::VPLastPHISC;
+ B->getVPDefID() <= VPDef::VPLastHeaderPHISC;
}
static inline bool classof(const VPValue *V) {
auto *B = V->getDefiningRecipe();
return B && B->getVPDefID() >= VPRecipeBase::VPFirstHeaderPHISC &&
- B->getVPDefID() <= VPRecipeBase::VPLastPHISC;
+ B->getVPDefID() <= VPRecipeBase::VPLastHeaderPHISC;
}
/// Generate the phi nodes.
@@ -1154,17 +1323,92 @@ public:
void setStartValue(VPValue *V) { setOperand(0, V); }
/// Returns the incoming value from the loop backedge.
- VPValue *getBackedgeValue() {
+ virtual VPValue *getBackedgeValue() {
return getOperand(1);
}
/// Returns the backedge value as a recipe. The backedge value is guaranteed
/// to be a recipe.
- VPRecipeBase &getBackedgeRecipe() {
+ virtual VPRecipeBase &getBackedgeRecipe() {
return *getBackedgeValue()->getDefiningRecipe();
}
};
+/// A recipe for handling phi nodes of integer and floating-point inductions,
+/// producing their vector values.
+class VPWidenIntOrFpInductionRecipe : public VPHeaderPHIRecipe {
+ PHINode *IV;
+ TruncInst *Trunc;
+ const InductionDescriptor &IndDesc;
+
+public:
+ VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
+ const InductionDescriptor &IndDesc)
+ : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, IV, Start), IV(IV),
+ Trunc(nullptr), IndDesc(IndDesc) {
+ addOperand(Step);
+ }
+
+ VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
+ const InductionDescriptor &IndDesc,
+ TruncInst *Trunc)
+ : VPHeaderPHIRecipe(VPDef::VPWidenIntOrFpInductionSC, Trunc, Start),
+ IV(IV), Trunc(Trunc), IndDesc(IndDesc) {
+ addOperand(Step);
+ }
+
+ ~VPWidenIntOrFpInductionRecipe() override = default;
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenIntOrFpInductionSC)
+
+ /// Generate the vectorized and scalarized versions of the phi node as
+ /// needed by their users.
+ void execute(VPTransformState &State) override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
+ VPValue *getBackedgeValue() override {
+ // TODO: All operands of base recipe must exist and be at same index in
+ // derived recipe.
+ llvm_unreachable(
+ "VPWidenIntOrFpInductionRecipe generates its own backedge value");
+ }
+
+ VPRecipeBase &getBackedgeRecipe() override {
+ // TODO: All operands of base recipe must exist and be at same index in
+ // derived recipe.
+ llvm_unreachable(
+ "VPWidenIntOrFpInductionRecipe generates its own backedge value");
+ }
+
+ /// Returns the step value of the induction.
+ VPValue *getStepValue() { return getOperand(1); }
+ const VPValue *getStepValue() const { return getOperand(1); }
+
+ /// Returns the first defined value as TruncInst, if it is one or nullptr
+ /// otherwise.
+ TruncInst *getTruncInst() { return Trunc; }
+ const TruncInst *getTruncInst() const { return Trunc; }
+
+ PHINode *getPHINode() { return IV; }
+
+ /// Returns the induction descriptor for the recipe.
+ const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
+
+ /// Returns true if the induction is canonical, i.e. starting at 0 and
+ /// incremented by UF * VF (= the original IV is incremented by 1).
+ bool isCanonical() const;
+
+ /// Returns the scalar type of the induction.
+ const Type *getScalarType() const {
+ return Trunc ? Trunc->getType() : IV->getType();
+ }
+};
+
class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
const InductionDescriptor &IndDesc;
@@ -1374,12 +1618,20 @@ public:
class VPInterleaveRecipe : public VPRecipeBase {
const InterleaveGroup<Instruction> *IG;
+ /// Indicates if the interleave group is in a conditional block and requires a
+ /// mask.
bool HasMask = false;
+ /// Indicates if gaps between members of the group need to be masked out or if
+ /// unusued gaps can be loaded speculatively.
+ bool NeedsMaskForGaps = false;
+
public:
VPInterleaveRecipe(const InterleaveGroup<Instruction> *IG, VPValue *Addr,
- ArrayRef<VPValue *> StoredValues, VPValue *Mask)
- : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG) {
+ ArrayRef<VPValue *> StoredValues, VPValue *Mask,
+ bool NeedsMaskForGaps)
+ : VPRecipeBase(VPDef::VPInterleaveSC, {Addr}), IG(IG),
+ NeedsMaskForGaps(NeedsMaskForGaps) {
for (unsigned i = 0; i < IG->getFactor(); ++i)
if (Instruction *I = IG->getMember(i)) {
if (I->getType()->isVoidTy())
@@ -1490,28 +1742,21 @@ public:
/// copies of the original scalar type, one per lane, instead of producing a
/// single copy of widened type for all lanes. If the instruction is known to be
/// uniform only one copy, per lane zero, will be generated.
-class VPReplicateRecipe : public VPRecipeBase, public VPValue {
+class VPReplicateRecipe : public VPRecipeWithIRFlags, public VPValue {
/// Indicator if only a single replica per lane is needed.
bool IsUniform;
/// Indicator if the replicas are also predicated.
bool IsPredicated;
- /// Indicator if the scalar values should also be packed into a vector.
- bool AlsoPack;
-
public:
template <typename IterT>
VPReplicateRecipe(Instruction *I, iterator_range<IterT> Operands,
- bool IsUniform, bool IsPredicated = false)
- : VPRecipeBase(VPDef::VPReplicateSC, Operands), VPValue(this, I),
- IsUniform(IsUniform), IsPredicated(IsPredicated) {
- // Retain the previous behavior of predicateInstructions(), where an
- // insert-element of a predicated instruction got hoisted into the
- // predicated basic block iff it was its only user. This is achieved by
- // having predicated instructions also pack their values into a vector by
- // default unless they have a replicated user which uses their scalar value.
- AlsoPack = IsPredicated && !I->use_empty();
+ bool IsUniform, VPValue *Mask = nullptr)
+ : VPRecipeWithIRFlags(VPDef::VPReplicateSC, Operands, *I),
+ VPValue(this, I), IsUniform(IsUniform), IsPredicated(Mask) {
+ if (Mask)
+ addOperand(Mask);
}
~VPReplicateRecipe() override = default;
@@ -1523,8 +1768,6 @@ public:
/// the \p State.
void execute(VPTransformState &State) override;
- void setAlsoPack(bool Pack) { AlsoPack = Pack; }
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
@@ -1533,8 +1776,6 @@ public:
bool isUniform() const { return IsUniform; }
- bool isPacked() const { return AlsoPack; }
-
bool isPredicated() const { return IsPredicated; }
/// Returns true if the recipe only uses the first lane of operand \p Op.
@@ -1550,6 +1791,17 @@ public:
"Op must be an operand of the recipe");
return true;
}
+
+ /// Returns true if the recipe is used by a widened recipe via an intervening
+ /// VPPredInstPHIRecipe. In this case, the scalar values should also be packed
+ /// in a vector.
+ bool shouldPack() const;
+
+ /// Return the mask of a predicated VPReplicateRecipe.
+ VPValue *getMask() {
+ assert(isPredicated() && "Trying to get the mask of a unpredicated recipe");
+ return getOperand(getNumOperands() - 1);
+ }
};
/// A recipe for generating conditional branches on the bits of a mask.
@@ -1791,9 +2043,11 @@ public:
return true;
}
- /// Check if the induction described by \p ID is canonical, i.e. has the same
- /// start, step (of 1), and type as the canonical IV.
- bool isCanonical(const InductionDescriptor &ID, Type *Ty) const;
+ /// Check if the induction described by \p Kind, /p Start and \p Step is
+ /// canonical, i.e. has the same start, step (of 1), and type as the
+ /// canonical IV.
+ bool isCanonical(InductionDescriptor::InductionKind Kind, VPValue *Start,
+ VPValue *Step, Type *Ty) const;
};
/// A recipe for generating the active lane mask for the vector loop that is
@@ -2156,13 +2410,19 @@ public:
/// to produce efficient output IR, including which branches, basic-blocks and
/// output IR instructions to generate, and their cost. VPlan holds a
/// Hierarchical-CFG of VPBasicBlocks and VPRegionBlocks rooted at an Entry
-/// VPBlock.
+/// VPBasicBlock.
class VPlan {
friend class VPlanPrinter;
friend class VPSlotTracker;
- /// Hold the single entry to the Hierarchical CFG of the VPlan.
- VPBlockBase *Entry;
+ /// Hold the single entry to the Hierarchical CFG of the VPlan, i.e. the
+ /// preheader of the vector loop.
+ VPBasicBlock *Entry;
+
+ /// VPBasicBlock corresponding to the original preheader. Used to place
+ /// VPExpandSCEV recipes for expressions used during skeleton creation and the
+ /// rest of VPlan execution.
+ VPBasicBlock *Preheader;
/// Holds the VFs applicable to this VPlan.
SmallSetVector<ElementCount, 2> VFs;
@@ -2174,10 +2434,6 @@ class VPlan {
/// Holds the name of the VPlan, for printing.
std::string Name;
- /// Holds all the external definitions created for this VPlan. External
- /// definitions must be immutable and hold a pointer to their underlying IR.
- DenseMap<Value *, VPValue *> VPExternalDefs;
-
/// Represents the trip count of the original loop, for folding
/// the tail.
VPValue *TripCount = nullptr;
@@ -2193,9 +2449,9 @@ class VPlan {
/// VPlan.
Value2VPValueTy Value2VPValue;
- /// Contains all VPValues that been allocated by addVPValue directly and need
- /// to be free when the plan's destructor is called.
- SmallVector<VPValue *, 16> VPValuesToFree;
+ /// Contains all the external definitions created for this VPlan. External
+ /// definitions are VPValues that hold a pointer to their underlying IR.
+ SmallVector<VPValue *, 16> VPLiveInsToFree;
/// Indicates whether it is safe use the Value2VPValue mapping or if the
/// mapping cannot be used any longer, because it is stale.
@@ -2204,14 +2460,41 @@ class VPlan {
/// Values used outside the plan.
MapVector<PHINode *, VPLiveOut *> LiveOuts;
+ /// Mapping from SCEVs to the VPValues representing their expansions.
+ /// NOTE: This mapping is temporary and will be removed once all users have
+ /// been modeled in VPlan directly.
+ DenseMap<const SCEV *, VPValue *> SCEVToExpansion;
+
public:
- VPlan(VPBlockBase *Entry = nullptr) : Entry(Entry) {
- if (Entry)
- Entry->setPlan(this);
+ /// Construct a VPlan with original preheader \p Preheader, trip count \p TC
+ /// and \p Entry to the plan. At the moment, \p Preheader and \p Entry need to
+ /// be disconnected, as the bypass blocks between them are not yet modeled in
+ /// VPlan.
+ VPlan(VPBasicBlock *Preheader, VPValue *TC, VPBasicBlock *Entry)
+ : VPlan(Preheader, Entry) {
+ TripCount = TC;
+ }
+
+ /// Construct a VPlan with original preheader \p Preheader and \p Entry to
+ /// the plan. At the moment, \p Preheader and \p Entry need to be
+ /// disconnected, as the bypass blocks between them are not yet modeled in
+ /// VPlan.
+ VPlan(VPBasicBlock *Preheader, VPBasicBlock *Entry)
+ : Entry(Entry), Preheader(Preheader) {
+ Entry->setPlan(this);
+ Preheader->setPlan(this);
+ assert(Preheader->getNumSuccessors() == 0 &&
+ Preheader->getNumPredecessors() == 0 &&
+ "preheader must be disconnected");
}
~VPlan();
+ /// Create an initial VPlan with preheader and entry blocks. Creates a
+ /// VPExpandSCEVRecipe for \p TripCount and uses it as plan's trip count.
+ static VPlanPtr createInitialVPlan(const SCEV *TripCount,
+ ScalarEvolution &PSE);
+
/// Prepare the plan for execution, setting up the required live-in values.
void prepareToExecute(Value *TripCount, Value *VectorTripCount,
Value *CanonicalIVStartValue, VPTransformState &State,
@@ -2220,19 +2503,12 @@ public:
/// Generate the IR code for this VPlan.
void execute(VPTransformState *State);
- VPBlockBase *getEntry() { return Entry; }
- const VPBlockBase *getEntry() const { return Entry; }
-
- VPBlockBase *setEntry(VPBlockBase *Block) {
- Entry = Block;
- Block->setPlan(this);
- return Entry;
- }
+ VPBasicBlock *getEntry() { return Entry; }
+ const VPBasicBlock *getEntry() const { return Entry; }
/// The trip count of the original loop.
- VPValue *getOrCreateTripCount() {
- if (!TripCount)
- TripCount = new VPValue();
+ VPValue *getTripCount() const {
+ assert(TripCount && "trip count needs to be set before accessing it");
return TripCount;
}
@@ -2275,50 +2551,35 @@ public:
void setName(const Twine &newName) { Name = newName.str(); }
- /// Get the existing or add a new external definition for \p V.
- VPValue *getOrAddExternalDef(Value *V) {
- auto I = VPExternalDefs.insert({V, nullptr});
- if (I.second)
- I.first->second = new VPValue(V);
- return I.first->second;
- }
-
- void addVPValue(Value *V) {
- assert(Value2VPValueEnabled &&
- "IR value to VPValue mapping may be out of date!");
- assert(V && "Trying to add a null Value to VPlan");
- assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
- VPValue *VPV = new VPValue(V);
- Value2VPValue[V] = VPV;
- VPValuesToFree.push_back(VPV);
- }
-
void addVPValue(Value *V, VPValue *VPV) {
- assert(Value2VPValueEnabled && "Value2VPValue mapping may be out of date!");
+ assert((Value2VPValueEnabled || VPV->isLiveIn()) &&
+ "Value2VPValue mapping may be out of date!");
assert(V && "Trying to add a null Value to VPlan");
assert(!Value2VPValue.count(V) && "Value already exists in VPlan");
Value2VPValue[V] = VPV;
}
/// Returns the VPValue for \p V. \p OverrideAllowed can be used to disable
- /// checking whether it is safe to query VPValues using IR Values.
+ /// /// checking whether it is safe to query VPValues using IR Values.
VPValue *getVPValue(Value *V, bool OverrideAllowed = false) {
- assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
- "Value2VPValue mapping may be out of date!");
assert(V && "Trying to get the VPValue of a null Value");
assert(Value2VPValue.count(V) && "Value does not exist in VPlan");
+ assert((Value2VPValueEnabled || OverrideAllowed ||
+ Value2VPValue[V]->isLiveIn()) &&
+ "Value2VPValue mapping may be out of date!");
return Value2VPValue[V];
}
- /// Gets the VPValue or adds a new one (if none exists yet) for \p V. \p
- /// OverrideAllowed can be used to disable checking whether it is safe to
- /// query VPValues using IR Values.
- VPValue *getOrAddVPValue(Value *V, bool OverrideAllowed = false) {
- assert((OverrideAllowed || isa<Constant>(V) || Value2VPValueEnabled) &&
- "Value2VPValue mapping may be out of date!");
+ /// Gets the VPValue for \p V or adds a new live-in (if none exists yet) for
+ /// \p V.
+ VPValue *getVPValueOrAddLiveIn(Value *V) {
assert(V && "Trying to get or add the VPValue of a null Value");
- if (!Value2VPValue.count(V))
- addVPValue(V);
+ if (!Value2VPValue.count(V)) {
+ VPValue *VPV = new VPValue(V);
+ VPLiveInsToFree.push_back(VPV);
+ addVPValue(V, VPV);
+ }
+
return getVPValue(V);
}
@@ -2344,7 +2605,7 @@ public:
iterator_range<mapped_iterator<Use *, std::function<VPValue *(Value *)>>>
mapToVPValues(User::op_range Operands) {
std::function<VPValue *(Value *)> Fn = [this](Value *Op) {
- return getOrAddVPValue(Op);
+ return getVPValueOrAddLiveIn(Op);
};
return map_range(Operands, Fn);
}
@@ -2373,12 +2634,6 @@ public:
void addLiveOut(PHINode *PN, VPValue *V);
- void clearLiveOuts() {
- for (auto &KV : LiveOuts)
- delete KV.second;
- LiveOuts.clear();
- }
-
void removeLiveOut(PHINode *PN) {
delete LiveOuts[PN];
LiveOuts.erase(PN);
@@ -2388,6 +2643,19 @@ public:
return LiveOuts;
}
+ VPValue *getSCEVExpansion(const SCEV *S) const {
+ return SCEVToExpansion.lookup(S);
+ }
+
+ void addSCEVExpansion(const SCEV *S, VPValue *V) {
+ assert(!SCEVToExpansion.contains(S) && "SCEV already expanded");
+ SCEVToExpansion[S] = V;
+ }
+
+ /// \return The block corresponding to the original preheader.
+ VPBasicBlock *getPreheader() { return Preheader; }
+ const VPBasicBlock *getPreheader() const { return Preheader; }
+
private:
/// Add to the given dominator tree the header block and every new basic block
/// that was created between it and the latch block, inclusive.
@@ -2709,6 +2977,8 @@ inline bool isUniformAfterVectorization(VPValue *VPV) {
assert(Def && "Must have definition for value defined inside vector region");
if (auto Rep = dyn_cast<VPReplicateRecipe>(Def))
return Rep->isUniform();
+ if (auto *GEP = dyn_cast<VPWidenGEPRecipe>(Def))
+ return all_of(GEP->operands(), isUniformAfterVectorization);
return false;
}
} // end namespace vputils
diff --git a/llvm/lib/Transforms/Vectorize/VPlanCFG.h b/llvm/lib/Transforms/Vectorize/VPlanCFG.h
index f790f7e73e11..89e2e7514dac 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanCFG.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanCFG.h
@@ -13,6 +13,7 @@
#define LLVM_TRANSFORMS_VECTORIZE_VPLANCFG_H
#include "VPlan.h"
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/SmallVector.h"
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
index 952ce72e36c1..f6e3a2a16db8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.cpp
@@ -73,9 +73,8 @@ public:
PlainCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
: TheLoop(Lp), LI(LI), Plan(P) {}
- /// Build plain CFG for TheLoop. Return the pre-header VPBasicBlock connected
- /// to a new VPRegionBlock (TopRegion) enclosing the plain CFG.
- VPBasicBlock *buildPlainCFG();
+ /// Build plain CFG for TheLoop and connects it to Plan's entry.
+ void buildPlainCFG();
};
} // anonymous namespace
@@ -196,7 +195,7 @@ VPValue *PlainCFGBuilder::getOrCreateVPOperand(Value *IRVal) {
// A and B: Create VPValue and add it to the pool of external definitions and
// to the Value->VPValue map.
- VPValue *NewVPVal = Plan.getOrAddExternalDef(IRVal);
+ VPValue *NewVPVal = Plan.getVPValueOrAddLiveIn(IRVal);
IRDef2VPValue[IRVal] = NewVPVal;
return NewVPVal;
}
@@ -254,7 +253,7 @@ void PlainCFGBuilder::createVPInstructionsForVPBB(VPBasicBlock *VPBB,
}
// Main interface to build the plain CFG.
-VPBasicBlock *PlainCFGBuilder::buildPlainCFG() {
+void PlainCFGBuilder::buildPlainCFG() {
// 1. Scan the body of the loop in a topological order to visit each basic
// block after having visited its predecessor basic blocks. Create a VPBB for
// each BB and link it to its successor and predecessor VPBBs. Note that
@@ -267,12 +266,13 @@ VPBasicBlock *PlainCFGBuilder::buildPlainCFG() {
BasicBlock *ThePreheaderBB = TheLoop->getLoopPreheader();
assert((ThePreheaderBB->getTerminator()->getNumSuccessors() == 1) &&
"Unexpected loop preheader");
- VPBasicBlock *ThePreheaderVPBB = getOrCreateVPBB(ThePreheaderBB);
+ VPBasicBlock *ThePreheaderVPBB = Plan.getEntry();
+ BB2VPBB[ThePreheaderBB] = ThePreheaderVPBB;
ThePreheaderVPBB->setName("vector.ph");
for (auto &I : *ThePreheaderBB) {
if (I.getType()->isVoidTy())
continue;
- IRDef2VPValue[&I] = Plan.getOrAddExternalDef(&I);
+ IRDef2VPValue[&I] = Plan.getVPValueOrAddLiveIn(&I);
}
// Create empty VPBB for Loop H so that we can link PH->H.
VPBlockBase *HeaderVPBB = getOrCreateVPBB(TheLoop->getHeader());
@@ -371,20 +371,17 @@ VPBasicBlock *PlainCFGBuilder::buildPlainCFG() {
// have a VPlan couterpart. Fix VPlan phi nodes by adding their corresponding
// VPlan operands.
fixPhiNodes();
-
- return ThePreheaderVPBB;
}
-VPBasicBlock *VPlanHCFGBuilder::buildPlainCFG() {
+void VPlanHCFGBuilder::buildPlainCFG() {
PlainCFGBuilder PCFGBuilder(TheLoop, LI, Plan);
- return PCFGBuilder.buildPlainCFG();
+ PCFGBuilder.buildPlainCFG();
}
// Public interface to build a H-CFG.
void VPlanHCFGBuilder::buildHierarchicalCFG() {
- // Build Top Region enclosing the plain CFG and set it as VPlan entry.
- VPBasicBlock *EntryVPBB = buildPlainCFG();
- Plan.setEntry(EntryVPBB);
+ // Build Top Region enclosing the plain CFG.
+ buildPlainCFG();
LLVM_DEBUG(Plan.setName("HCFGBuilder: Plain CFG\n"); dbgs() << Plan);
VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
diff --git a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
index 2d52990af268..299ae36155cb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanHCFGBuilder.h
@@ -57,9 +57,8 @@ private:
// are introduced.
VPDominatorTree VPDomTree;
- /// Build plain CFG for TheLoop. Return the pre-header VPBasicBlock connected
- /// to a new VPRegionBlock (TopRegion) enclosing the plain CFG.
- VPBasicBlock *buildPlainCFG();
+ /// Build plain CFG for TheLoop and connects it to Plan's entry.
+ void buildPlainCFG();
public:
VPlanHCFGBuilder(Loop *Lp, LoopInfo *LI, VPlan &P)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 4e9be35001ad..26c309eed800 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -34,7 +34,9 @@ using namespace llvm;
using VectorParts = SmallVector<Value *, 2>;
+namespace llvm {
extern cl::opt<bool> EnableVPlanNativePath;
+}
#define LV_NAME "loop-vectorize"
#define DEBUG_TYPE LV_NAME
@@ -50,14 +52,16 @@ bool VPRecipeBase::mayWriteToMemory() const {
->mayWriteToMemory();
case VPBranchOnMaskSC:
case VPScalarIVStepsSC:
+ case VPPredInstPHISC:
return false;
- case VPWidenIntOrFpInductionSC:
+ case VPBlendSC:
+ case VPReductionSC:
case VPWidenCanonicalIVSC:
+ case VPWidenCastSC:
+ case VPWidenGEPSC:
+ case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
- case VPBlendSC:
case VPWidenSC:
- case VPWidenGEPSC:
- case VPReductionSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -82,14 +86,16 @@ bool VPRecipeBase::mayReadFromMemory() const {
->mayReadFromMemory();
case VPBranchOnMaskSC:
case VPScalarIVStepsSC:
+ case VPPredInstPHISC:
return false;
- case VPWidenIntOrFpInductionSC:
+ case VPBlendSC:
+ case VPReductionSC:
case VPWidenCanonicalIVSC:
+ case VPWidenCastSC:
+ case VPWidenGEPSC:
+ case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
- case VPBlendSC:
case VPWidenSC:
- case VPWidenGEPSC:
- case VPReductionSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -108,16 +114,20 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPDerivedIVSC:
case VPPredInstPHISC:
return false;
- case VPWidenIntOrFpInductionSC:
- case VPWidenPointerInductionSC:
+ case VPWidenCallSC:
+ return cast<Instruction>(getVPSingleValue()->getUnderlyingValue())
+ ->mayHaveSideEffects();
+ case VPBlendSC:
+ case VPReductionSC:
+ case VPScalarIVStepsSC:
case VPWidenCanonicalIVSC:
+ case VPWidenCastSC:
+ case VPWidenGEPSC:
+ case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
- case VPBlendSC:
+ case VPWidenPointerInductionSC:
case VPWidenSC:
- case VPWidenGEPSC:
- case VPReductionSC:
- case VPWidenSelectSC:
- case VPScalarIVStepsSC: {
+ case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
@@ -125,6 +135,13 @@ bool VPRecipeBase::mayHaveSideEffects() const {
"underlying instruction has side-effects");
return false;
}
+ case VPWidenMemoryInstructionSC:
+ assert(cast<VPWidenMemoryInstructionRecipe>(this)
+ ->getIngredient()
+ .mayHaveSideEffects() == mayWriteToMemory() &&
+ "mayHaveSideffects result for ingredient differs from this "
+ "implementation");
+ return mayWriteToMemory();
case VPReplicateSC: {
auto *R = cast<VPReplicateRecipe>(this);
return R->getUnderlyingInstr()->mayHaveSideEffects();
@@ -143,6 +160,16 @@ void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
State.Builder.GetInsertBlock());
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
+ O << "Live-out ";
+ getPhi()->printAsOperand(O);
+ O << " = ";
+ getOperand(0)->printAsOperand(O, SlotTracker);
+ O << "\n";
+}
+#endif
+
void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
assert(!Parent && "Recipe already in some VPBasicBlock");
assert(InsertPos->getParent() &&
@@ -189,55 +216,44 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB,
insertBefore(BB, I);
}
-void VPInstruction::generateInstruction(VPTransformState &State,
- unsigned Part) {
+Value *VPInstruction::generateInstruction(VPTransformState &State,
+ unsigned Part) {
IRBuilderBase &Builder = State.Builder;
Builder.SetCurrentDebugLocation(DL);
if (Instruction::isBinaryOp(getOpcode())) {
Value *A = State.get(getOperand(0), Part);
Value *B = State.get(getOperand(1), Part);
- Value *V =
- Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
- State.set(this, V, Part);
- return;
+ return Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name);
}
switch (getOpcode()) {
case VPInstruction::Not: {
Value *A = State.get(getOperand(0), Part);
- Value *V = Builder.CreateNot(A, Name);
- State.set(this, V, Part);
- break;
+ return Builder.CreateNot(A, Name);
}
case VPInstruction::ICmpULE: {
Value *IV = State.get(getOperand(0), Part);
Value *TC = State.get(getOperand(1), Part);
- Value *V = Builder.CreateICmpULE(IV, TC, Name);
- State.set(this, V, Part);
- break;
+ return Builder.CreateICmpULE(IV, TC, Name);
}
case Instruction::Select: {
Value *Cond = State.get(getOperand(0), Part);
Value *Op1 = State.get(getOperand(1), Part);
Value *Op2 = State.get(getOperand(2), Part);
- Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name);
- State.set(this, V, Part);
- break;
+ return Builder.CreateSelect(Cond, Op1, Op2, Name);
}
case VPInstruction::ActiveLaneMask: {
// Get first lane of vector induction variable.
Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
// Get the original loop tripcount.
- Value *ScalarTC = State.get(getOperand(1), Part);
+ Value *ScalarTC = State.get(getOperand(1), VPIteration(Part, 0));
auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
auto *PredTy = VectorType::get(Int1Ty, State.VF);
- Instruction *Call = Builder.CreateIntrinsic(
- Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
- {VIVElem0, ScalarTC}, nullptr, Name);
- State.set(this, Call, Part);
- break;
+ return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
+ {PredTy, ScalarTC->getType()},
+ {VIVElem0, ScalarTC}, nullptr, Name);
}
case VPInstruction::FirstOrderRecurrenceSplice: {
// Generate code to combine the previous and current values in vector v3.
@@ -255,18 +271,22 @@ void VPInstruction::generateInstruction(VPTransformState &State,
// For the first part, use the recurrence phi (v1), otherwise v2.
auto *V1 = State.get(getOperand(0), 0);
Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1);
- if (!PartMinus1->getType()->isVectorTy()) {
- State.set(this, PartMinus1, Part);
- } else {
- Value *V2 = State.get(getOperand(1), Part);
- State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name),
- Part);
- }
- break;
+ if (!PartMinus1->getType()->isVectorTy())
+ return PartMinus1;
+ Value *V2 = State.get(getOperand(1), Part);
+ return Builder.CreateVectorSplice(PartMinus1, V2, -1, Name);
+ }
+ case VPInstruction::CalculateTripCountMinusVF: {
+ Value *ScalarTC = State.get(getOperand(0), {0, 0});
+ Value *Step =
+ createStepForVF(Builder, ScalarTC->getType(), State.VF, State.UF);
+ Value *Sub = Builder.CreateSub(ScalarTC, Step);
+ Value *Cmp = Builder.CreateICmp(CmpInst::Predicate::ICMP_UGT, ScalarTC, Step);
+ Value *Zero = ConstantInt::get(ScalarTC->getType(), 0);
+ return Builder.CreateSelect(Cmp, Sub, Zero);
}
case VPInstruction::CanonicalIVIncrement:
case VPInstruction::CanonicalIVIncrementNUW: {
- Value *Next = nullptr;
if (Part == 0) {
bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW;
auto *Phi = State.get(getOperand(0), 0);
@@ -274,34 +294,26 @@ void VPInstruction::generateInstruction(VPTransformState &State,
// elements) times the unroll factor (num of SIMD instructions).
Value *Step =
createStepForVF(Builder, Phi->getType(), State.VF, State.UF);
- Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false);
- } else {
- Next = State.get(this, 0);
+ return Builder.CreateAdd(Phi, Step, Name, IsNUW, false);
}
-
- State.set(this, Next, Part);
- break;
+ return State.get(this, 0);
}
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::CanonicalIVIncrementForPartNUW: {
bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW;
auto *IV = State.get(getOperand(0), VPIteration(0, 0));
- if (Part == 0) {
- State.set(this, IV, Part);
- break;
- }
+ if (Part == 0)
+ return IV;
// The canonical IV is incremented by the vectorization factor (num of SIMD
// elements) times the unroll part.
Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part);
- Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false);
- State.set(this, Next, Part);
- break;
+ return Builder.CreateAdd(IV, Step, Name, IsNUW, false);
}
case VPInstruction::BranchOnCond: {
if (Part != 0)
- break;
+ return nullptr;
Value *Cond = State.get(getOperand(0), VPIteration(Part, 0));
VPRegionBlock *ParentRegion = getParent()->getParent();
@@ -318,11 +330,11 @@ void VPInstruction::generateInstruction(VPTransformState &State,
CondBr->setSuccessor(0, nullptr);
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
- break;
+ return CondBr;
}
case VPInstruction::BranchOnCount: {
if (Part != 0)
- break;
+ return nullptr;
// First create the compare.
Value *IV = State.get(getOperand(0), Part);
Value *TC = State.get(getOperand(1), Part);
@@ -342,7 +354,7 @@ void VPInstruction::generateInstruction(VPTransformState &State,
State.CFG.VPBB2IRBB[Header]);
CondBr->setSuccessor(0, nullptr);
Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
- break;
+ return CondBr;
}
default:
llvm_unreachable("Unsupported opcode for instruction");
@@ -353,8 +365,13 @@ void VPInstruction::execute(VPTransformState &State) {
assert(!State.Instance && "VPInstruction executing an Instance");
IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
State.Builder.setFastMathFlags(FMF);
- for (unsigned Part = 0; Part < State.UF; ++Part)
- generateInstruction(State, Part);
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *GeneratedValue = generateInstruction(State, Part);
+ if (!hasResult())
+ continue;
+ assert(GeneratedValue && "generateInstruction must produce a value");
+ State.set(this, GeneratedValue, Part);
+ }
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -400,6 +417,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::BranchOnCond:
O << "branch-on-cond";
break;
+ case VPInstruction::CalculateTripCountMinusVF:
+ O << "TC > VF ? TC - VF : 0";
+ break;
case VPInstruction::CanonicalIVIncrementForPart:
O << "VF * Part + ";
break;
@@ -438,18 +458,19 @@ void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) {
}
void VPWidenCallRecipe::execute(VPTransformState &State) {
+ assert(State.VF.isVector() && "not widening");
auto &CI = *cast<CallInst>(getUnderlyingInstr());
assert(!isa<DbgInfoIntrinsic>(CI) &&
"DbgInfoIntrinsic should have been dropped during VPlan construction");
State.setDebugLocFromInst(&CI);
- SmallVector<Type *, 4> Tys;
- for (Value *ArgOperand : CI.args())
- Tys.push_back(
- ToVectorTy(ArgOperand->getType(), State.VF.getKnownMinValue()));
-
for (unsigned Part = 0; Part < State.UF; ++Part) {
- SmallVector<Type *, 2> TysForDecl = {CI.getType()};
+ SmallVector<Type *, 2> TysForDecl;
+ // Add return type if intrinsic is overloaded on it.
+ if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) {
+ TysForDecl.push_back(
+ VectorType::get(CI.getType()->getScalarType(), State.VF));
+ }
SmallVector<Value *, 4> Args;
for (const auto &I : enumerate(operands())) {
// Some intrinsics have a scalar argument - don't replace it with a
@@ -468,21 +489,16 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
Function *VectorF;
if (VectorIntrinsicID != Intrinsic::not_intrinsic) {
// Use vector version of the intrinsic.
- if (State.VF.isVector())
- TysForDecl[0] =
- VectorType::get(CI.getType()->getScalarType(), State.VF);
Module *M = State.Builder.GetInsertBlock()->getModule();
VectorF = Intrinsic::getDeclaration(M, VectorIntrinsicID, TysForDecl);
assert(VectorF && "Can't retrieve vector intrinsic.");
} else {
- // Use vector version of the function call.
- const VFShape Shape = VFShape::get(CI, State.VF, false /*HasGlobalPred*/);
#ifndef NDEBUG
- assert(VFDatabase(CI).getVectorizedFunction(Shape) != nullptr &&
- "Can't create vector function.");
+ assert(Variant != nullptr && "Can't create vector function.");
#endif
- VectorF = VFDatabase(CI).getVectorizedFunction(Shape);
+ VectorF = Variant;
}
+
SmallVector<OperandBundleDef, 1> OpBundles;
CI.getOperandBundlesAsDefs(OpBundles);
CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
@@ -514,8 +530,12 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
if (VectorIntrinsicID)
O << " (using vector intrinsic)";
- else
- O << " (using library function)";
+ else {
+ O << " (using library function";
+ if (Variant->hasName())
+ O << ": " << Variant->getName();
+ O << ")";
+ }
}
void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
@@ -528,7 +548,7 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
getOperand(1)->printAsOperand(O, SlotTracker);
O << ", ";
getOperand(2)->printAsOperand(O, SlotTracker);
- O << (InvariantCond ? " (condition is loop invariant)" : "");
+ O << (isInvariantCond() ? " (condition is loop invariant)" : "");
}
#endif
@@ -541,10 +561,10 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) {
// We have to take the 'vectorized' value and pick the first lane.
// Instcombine will make this a no-op.
auto *InvarCond =
- InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr;
+ isInvariantCond() ? State.get(getCond(), VPIteration(0, 0)) : nullptr;
for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part);
+ Value *Cond = InvarCond ? InvarCond : State.get(getCond(), Part);
Value *Op0 = State.get(getOperand(1), Part);
Value *Op1 = State.get(getOperand(2), Part);
Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1);
@@ -553,6 +573,33 @@ void VPWidenSelectRecipe::execute(VPTransformState &State) {
}
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
+ switch (OpType) {
+ case OperationType::PossiblyExactOp:
+ if (ExactFlags.IsExact)
+ O << " exact";
+ break;
+ case OperationType::OverflowingBinOp:
+ if (WrapFlags.HasNUW)
+ O << " nuw";
+ if (WrapFlags.HasNSW)
+ O << " nsw";
+ break;
+ case OperationType::FPMathOp:
+ getFastMathFlags().print(O);
+ break;
+ case OperationType::GEPOp:
+ if (GEPFlags.IsInBounds)
+ O << " inbounds";
+ break;
+ case OperationType::Other:
+ break;
+ }
+ O << " ";
+}
+#endif
+
void VPWidenRecipe::execute(VPTransformState &State) {
auto &I = *cast<Instruction>(getUnderlyingValue());
auto &Builder = State.Builder;
@@ -592,17 +639,8 @@ void VPWidenRecipe::execute(VPTransformState &State) {
Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops);
- if (auto *VecOp = dyn_cast<Instruction>(V)) {
- VecOp->copyIRFlags(&I);
-
- // If the instruction is vectorized and was in a basic block that needed
- // predication, we can't propagate poison-generating flags (nuw/nsw,
- // exact, etc.). The control flow has been linearized and the
- // instruction is no longer guarded by the predicate, which could make
- // the flag properties to no longer hold.
- if (State.MayGeneratePoisonRecipes.contains(this))
- VecOp->dropPoisonGeneratingFlags();
- }
+ if (auto *VecOp = dyn_cast<Instruction>(V))
+ setFlags(VecOp);
// Use this vector value for all users of the original instruction.
State.set(this, V, Part);
@@ -646,35 +684,6 @@ void VPWidenRecipe::execute(VPTransformState &State) {
break;
}
-
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::FPExt:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::SIToFP:
- case Instruction::UIToFP:
- case Instruction::Trunc:
- case Instruction::FPTrunc:
- case Instruction::BitCast: {
- auto *CI = cast<CastInst>(&I);
- State.setDebugLocFromInst(CI);
-
- /// Vectorize casts.
- Type *DestTy = (State.VF.isScalar())
- ? CI->getType()
- : VectorType::get(CI->getType(), State.VF);
-
- for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *A = State.get(getOperand(0), Part);
- Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy);
- State.set(this, Cast, Part);
- State.addMetadata(Cast, &I);
- }
- break;
- }
default:
// This instruction is not vectorized by simple widening.
LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I);
@@ -687,10 +696,39 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
O << Indent << "WIDEN ";
printAsOperand(O, SlotTracker);
const Instruction *UI = getUnderlyingInstr();
- O << " = " << UI->getOpcodeName() << " ";
+ O << " = " << UI->getOpcodeName();
+ printFlags(O);
if (auto *Cmp = dyn_cast<CmpInst>(UI))
- O << CmpInst::getPredicateName(Cmp->getPredicate()) << " ";
+ O << Cmp->getPredicate() << " ";
+ printOperands(O, SlotTracker);
+}
+#endif
+
+void VPWidenCastRecipe::execute(VPTransformState &State) {
+ auto *I = cast_or_null<Instruction>(getUnderlyingValue());
+ if (I)
+ State.setDebugLocFromInst(I);
+ auto &Builder = State.Builder;
+ /// Vectorize casts.
+ assert(State.VF.isVector() && "Not vectorizing?");
+ Type *DestTy = VectorType::get(getResultType(), State.VF);
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *A = State.get(getOperand(0), Part);
+ Value *Cast = Builder.CreateCast(Instruction::CastOps(Opcode), A, DestTy);
+ State.set(this, Cast, Part);
+ State.addMetadata(Cast, I);
+ }
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN-CAST ";
+ printAsOperand(O, SlotTracker);
+ O << " = " << Instruction::getOpcodeName(Opcode) << " ";
printOperands(O, SlotTracker);
+ O << " to " << *getResultType();
}
void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
@@ -710,8 +748,13 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
+ // The step may be defined by a recipe in the preheader (e.g. if it requires
+ // SCEV expansion), but for the canonical induction the step is required to be
+ // 1, which is represented as live-in.
+ if (getStepValue()->getDefiningRecipe())
+ return false;
+ auto *StepC = dyn_cast<ConstantInt>(getStepValue()->getLiveInIRValue());
auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
- auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
return StartC && StartC->isZero() && StepC && StepC->isOne();
}
@@ -743,6 +786,7 @@ void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
#endif
void VPWidenGEPRecipe::execute(VPTransformState &State) {
+ assert(State.VF.isVector() && "not widening");
auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr());
// Construct a vector GEP by widening the operands of the scalar GEP as
// necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
@@ -750,7 +794,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
// is vector-typed. Thus, to keep the representation compact, we only use
// vector-typed operands for loop-varying values.
- if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) {
+ if (areAllOperandsInvariant()) {
// If we are vectorizing, but the GEP has only loop-invariant operands,
// the GEP we build (by only using vector-typed operands for
// loop-varying values) would be a scalar pointer. Thus, to ensure we
@@ -763,9 +807,15 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
// required. We would add the scalarization decision to
// collectLoopScalars() and teach getVectorValue() to broadcast
// the lane-zero scalar value.
- auto *Clone = State.Builder.Insert(GEP->clone());
+ SmallVector<Value *> Ops;
+ for (unsigned I = 0, E = getNumOperands(); I != E; I++)
+ Ops.push_back(State.get(getOperand(I), VPIteration(0, 0)));
+
+ auto *NewGEP =
+ State.Builder.CreateGEP(GEP->getSourceElementType(), Ops[0],
+ ArrayRef(Ops).drop_front(), "", isInBounds());
for (unsigned Part = 0; Part < State.UF; ++Part) {
- Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone);
+ Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, NewGEP);
State.set(this, EntryPart, Part);
State.addMetadata(EntryPart, GEP);
}
@@ -780,7 +830,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
for (unsigned Part = 0; Part < State.UF; ++Part) {
// The pointer operand of the new GEP. If it's loop-invariant, we
// won't broadcast it.
- auto *Ptr = IsPtrLoopInvariant
+ auto *Ptr = isPointerLoopInvariant()
? State.get(getOperand(0), VPIteration(0, 0))
: State.get(getOperand(0), Part);
@@ -789,24 +839,16 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
SmallVector<Value *, 4> Indices;
for (unsigned I = 1, E = getNumOperands(); I < E; I++) {
VPValue *Operand = getOperand(I);
- if (IsIndexLoopInvariant[I - 1])
+ if (isIndexLoopInvariant(I - 1))
Indices.push_back(State.get(Operand, VPIteration(0, 0)));
else
Indices.push_back(State.get(Operand, Part));
}
- // If the GEP instruction is vectorized and was in a basic block that
- // needed predication, we can't propagate the poison-generating 'inbounds'
- // flag. The control flow has been linearized and the GEP is no longer
- // guarded by the predicate, which could make the 'inbounds' properties to
- // no longer hold.
- bool IsInBounds =
- GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0;
-
// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
// but it should be a vector, otherwise.
auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr,
- Indices, "", IsInBounds);
+ Indices, "", isInBounds());
assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) &&
"NewGEP is not a pointer vector");
State.set(this, NewGEP, Part);
@@ -819,14 +861,14 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-GEP ";
- O << (IsPtrLoopInvariant ? "Inv" : "Var");
- size_t IndicesNumber = IsIndexLoopInvariant.size();
- for (size_t I = 0; I < IndicesNumber; ++I)
- O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
+ O << (isPointerLoopInvariant() ? "Inv" : "Var");
+ for (size_t I = 0; I < getNumOperands() - 1; ++I)
+ O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
O << " ";
printAsOperand(O, SlotTracker);
- O << " = getelementptr ";
+ O << " = getelementptr";
+ printFlags(O);
printOperands(O, SlotTracker);
}
#endif
@@ -911,7 +953,21 @@ void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
O << " (with final reduction value stored in invariant address sank "
"outside of loop)";
}
+#endif
+
+bool VPReplicateRecipe::shouldPack() const {
+ // Find if the recipe is used by a widened recipe via an intervening
+ // VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
+ return any_of(users(), [](const VPUser *U) {
+ if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(U))
+ return any_of(PredR->users(), [PredR](const VPUser *U) {
+ return !U->usesScalars(PredR);
+ });
+ return false;
+ });
+}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
@@ -921,18 +977,21 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
O << " = ";
}
if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) {
- O << "call @" << CB->getCalledFunction()->getName() << "(";
+ O << "call";
+ printFlags(O);
+ O << "@" << CB->getCalledFunction()->getName() << "(";
interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)),
O, [&O, &SlotTracker](VPValue *Op) {
Op->printAsOperand(O, SlotTracker);
});
O << ")";
} else {
- O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
+ O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode());
+ printFlags(O);
printOperands(O, SlotTracker);
}
- if (AlsoPack)
+ if (shouldPack())
O << " (S->V)";
}
#endif
@@ -1053,20 +1112,22 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
}
#endif
-bool VPCanonicalIVPHIRecipe::isCanonical(const InductionDescriptor &ID,
- Type *Ty) const {
- if (Ty != getScalarType())
+bool VPCanonicalIVPHIRecipe::isCanonical(
+ InductionDescriptor::InductionKind Kind, VPValue *Start, VPValue *Step,
+ Type *Ty) const {
+ // The types must match and it must be an integer induction.
+ if (Ty != getScalarType() || Kind != InductionDescriptor::IK_IntInduction)
return false;
- // The start value of ID must match the start value of this canonical
- // induction.
- if (getStartValue()->getLiveInIRValue() != ID.getStartValue())
+ // Start must match the start value of this canonical induction.
+ if (Start != getStartValue())
return false;
- ConstantInt *Step = ID.getConstIntStepValue();
- // ID must also be incremented by one. IK_IntInduction always increment the
- // induction by Step, but the binary op may not be set.
- return ID.getKind() == InductionDescriptor::IK_IntInduction && Step &&
- Step->isOne();
+ // If the step is defined by a recipe, it is not a ConstantInt.
+ if (Step->getDefiningRecipe())
+ return false;
+
+ ConstantInt *StepC = dyn_cast<ConstantInt>(Step->getLiveInIRValue());
+ return StepC && StepC->isOne();
}
bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(ElementCount VF) {
@@ -1092,9 +1153,11 @@ void VPExpandSCEVRecipe::execute(VPTransformState &State) {
Value *Res = Exp.expandCodeFor(Expr, Expr->getType(),
&*State.Builder.GetInsertPoint());
-
+ assert(!State.ExpandedSCEVs.contains(Expr) &&
+ "Same SCEV expanded multiple times");
+ State.ExpandedSCEVs[Expr] = Res;
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part)
- State.set(this, Res, Part);
+ State.set(this, Res, {Part, 0});
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index cbf111b00e3d..83bfdfd09d19 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "VPlanTransforms.h"
+#include "VPlanDominatorTree.h"
+#include "VPRecipeBuilder.h"
#include "VPlanCFG.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
@@ -22,11 +24,10 @@
using namespace llvm;
void VPlanTransforms::VPInstructionsToVPRecipes(
- Loop *OrigLoop, VPlanPtr &Plan,
+ VPlanPtr &Plan,
function_ref<const InductionDescriptor *(PHINode *)>
GetIntOrFpInductionDescriptor,
- SmallPtrSetImpl<Instruction *> &DeadInstructions, ScalarEvolution &SE,
- const TargetLibraryInfo &TLI) {
+ ScalarEvolution &SE, const TargetLibraryInfo &TLI) {
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
Plan->getEntry());
@@ -39,22 +40,15 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
VPValue *VPV = Ingredient.getVPSingleValue();
Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
- if (DeadInstructions.count(Inst)) {
- VPValue DummyValue;
- VPV->replaceAllUsesWith(&DummyValue);
- Ingredient.eraseFromParent();
- continue;
- }
VPRecipeBase *NewRecipe = nullptr;
if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) {
auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
if (const auto *II = GetIntOrFpInductionDescriptor(Phi)) {
- VPValue *Start = Plan->getOrAddVPValue(II->getStartValue());
+ VPValue *Start = Plan->getVPValueOrAddLiveIn(II->getStartValue());
VPValue *Step =
vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
- NewRecipe =
- new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II, true);
+ NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II);
} else {
Plan->addVPValue(Phi, VPPhi);
continue;
@@ -66,28 +60,25 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
// Create VPWidenMemoryInstructionRecipe for loads and stores.
if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
NewRecipe = new VPWidenMemoryInstructionRecipe(
- *Load, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
- nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/);
+ *Load, Ingredient.getOperand(0), nullptr /*Mask*/,
+ false /*Consecutive*/, false /*Reverse*/);
} else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
NewRecipe = new VPWidenMemoryInstructionRecipe(
- *Store, Plan->getOrAddVPValue(getLoadStorePointerOperand(Inst)),
- Plan->getOrAddVPValue(Store->getValueOperand()), nullptr /*Mask*/,
- false /*Consecutive*/, false /*Reverse*/);
+ *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
+ nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/);
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
- NewRecipe = new VPWidenGEPRecipe(
- GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop);
+ NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
} else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
NewRecipe =
- new VPWidenCallRecipe(*CI, Plan->mapToVPValues(CI->args()),
+ new VPWidenCallRecipe(*CI, drop_end(Ingredient.operands()),
getVectorIntrinsicIDForCall(CI, &TLI));
} else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
- bool InvariantCond =
- SE.isLoopInvariant(SE.getSCEV(SI->getOperand(0)), OrigLoop);
- NewRecipe = new VPWidenSelectRecipe(
- *SI, Plan->mapToVPValues(SI->operands()), InvariantCond);
+ NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
+ } else if (auto *CI = dyn_cast<CastInst>(Inst)) {
+ NewRecipe = new VPWidenCastRecipe(
+ CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), CI);
} else {
- NewRecipe =
- new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands()));
+ NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands());
}
}
@@ -98,15 +89,11 @@ void VPlanTransforms::VPInstructionsToVPRecipes(
assert(NewRecipe->getNumDefinedValues() == 0 &&
"Only recpies with zero or one defined values expected");
Ingredient.eraseFromParent();
- Plan->removeVPValueFor(Inst);
- for (auto *Def : NewRecipe->definedValues()) {
- Plan->addVPValue(Inst, Def);
- }
}
}
}
-bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
+static bool sinkScalarOperands(VPlan &Plan) {
auto Iter = vp_depth_first_deep(Plan.getEntry());
bool Changed = false;
// First, collect the operands of all recipes in replicate blocks as seeds for
@@ -167,8 +154,7 @@ bool VPlanTransforms::sinkScalarOperands(VPlan &Plan) {
continue;
Instruction *I = cast<Instruction>(
cast<VPReplicateRecipe>(SinkCandidate)->getUnderlyingValue());
- auto *Clone =
- new VPReplicateRecipe(I, SinkCandidate->operands(), true, false);
+ auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true);
// TODO: add ".cloned" suffix to name of Clone's VPValue.
Clone->insertBefore(SinkCandidate);
@@ -224,7 +210,10 @@ static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) {
return nullptr;
}
-bool VPlanTransforms::mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
+// Merge replicate regions in their successor region, if a replicate region
+// is connected to a successor replicate region with the same predicate by a
+// single, empty VPBasicBlock.
+static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
SetVector<VPRegionBlock *> DeletedRegions;
// Collect replicate regions followed by an empty block, followed by another
@@ -312,6 +301,81 @@ bool VPlanTransforms::mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
return !DeletedRegions.empty();
}
+static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
+ VPlan &Plan) {
+ Instruction *Instr = PredRecipe->getUnderlyingInstr();
+ // Build the triangular if-then region.
+ std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str();
+ assert(Instr->getParent() && "Predicated instruction not in any basic block");
+ auto *BlockInMask = PredRecipe->getMask();
+ auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask);
+ auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);
+
+ // Replace predicated replicate recipe with a replicate recipe without a
+ // mask but in the replicate region.
+ auto *RecipeWithoutMask = new VPReplicateRecipe(
+ PredRecipe->getUnderlyingInstr(),
+ make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())),
+ PredRecipe->isUniform());
+ auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask);
+
+ VPPredInstPHIRecipe *PHIRecipe = nullptr;
+ if (PredRecipe->getNumUsers() != 0) {
+ PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask);
+ PredRecipe->replaceAllUsesWith(PHIRecipe);
+ PHIRecipe->setOperand(0, RecipeWithoutMask);
+ }
+ PredRecipe->eraseFromParent();
+ auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
+ VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true);
+
+ // Note: first set Entry as region entry and then connect successors starting
+ // from it in order, to propagate the "parent" of each VPBasicBlock.
+ VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry);
+ VPBlockUtils::connectBlocks(Pred, Exiting);
+
+ return Region;
+}
+
+static void addReplicateRegions(VPlan &Plan) {
+ SmallVector<VPReplicateRecipe *> WorkList;
+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
+ vp_depth_first_deep(Plan.getEntry()))) {
+ for (VPRecipeBase &R : *VPBB)
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
+ if (RepR->isPredicated())
+ WorkList.push_back(RepR);
+ }
+ }
+
+ unsigned BBNum = 0;
+ for (VPReplicateRecipe *RepR : WorkList) {
+ VPBasicBlock *CurrentBlock = RepR->getParent();
+ VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator());
+
+ BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent();
+ SplitBlock->setName(
+ OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : "");
+ // Record predicated instructions for above packing optimizations.
+ VPBlockBase *Region = createReplicateRegion(RepR, Plan);
+ Region->setParent(CurrentBlock->getParent());
+ VPBlockUtils::disconnectBlocks(CurrentBlock, SplitBlock);
+ VPBlockUtils::connectBlocks(CurrentBlock, Region);
+ VPBlockUtils::connectBlocks(Region, SplitBlock);
+ }
+}
+
+void VPlanTransforms::createAndOptimizeReplicateRegions(VPlan &Plan) {
+ // Convert masked VPReplicateRecipes to if-then region blocks.
+ addReplicateRegions(Plan);
+
+ bool ShouldSimplify = true;
+ while (ShouldSimplify) {
+ ShouldSimplify = sinkScalarOperands(Plan);
+ ShouldSimplify |= mergeReplicateRegionsIntoSuccessors(Plan);
+ ShouldSimplify |= VPlanTransforms::mergeBlocksIntoPredecessors(Plan);
+ }
+}
bool VPlanTransforms::mergeBlocksIntoPredecessors(VPlan &Plan) {
SmallVector<VPBasicBlock *> WorkList;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
@@ -395,7 +459,10 @@ void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) {
// everything WidenNewIV's users need. That is, WidenOriginalIV will
// generate a vector phi or all users of WidenNewIV demand the first lane
// only.
- if (WidenOriginalIV->needsVectorIV() ||
+ if (any_of(WidenOriginalIV->users(),
+ [WidenOriginalIV](VPUser *U) {
+ return !U->usesScalars(WidenOriginalIV);
+ }) ||
vputils::onlyFirstLaneUsed(WidenNewIV)) {
WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
WidenNewIV->eraseFromParent();
@@ -440,10 +507,10 @@ void VPlanTransforms::optimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
if (Instruction *TruncI = WideIV->getTruncInst())
ResultTy = TruncI->getType();
const InductionDescriptor &ID = WideIV->getInductionDescriptor();
- VPValue *Step =
- vputils::getOrCreateVPValueForSCEVExpr(Plan, ID.getStep(), SE);
+ VPValue *Step = WideIV->getStepValue();
VPValue *BaseIV = CanonicalIV;
- if (!CanonicalIV->isCanonical(ID, ResultTy)) {
+ if (!CanonicalIV->isCanonical(ID.getKind(), WideIV->getStartValue(), Step,
+ ResultTy)) {
BaseIV = new VPDerivedIVRecipe(ID, WideIV->getStartValue(), CanonicalIV,
Step, ResultTy);
HeaderVPBB->insert(BaseIV->getDefiningRecipe(), IP);
@@ -522,9 +589,9 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
return;
LLVMContext &Ctx = SE.getContext();
- auto *BOC =
- new VPInstruction(VPInstruction::BranchOnCond,
- {Plan.getOrAddExternalDef(ConstantInt::getTrue(Ctx))});
+ auto *BOC = new VPInstruction(
+ VPInstruction::BranchOnCond,
+ {Plan.getVPValueOrAddLiveIn(ConstantInt::getTrue(Ctx))});
Term->eraseFromParent();
ExitingVPBB->appendRecipe(BOC);
Plan.setVF(BestVF);
@@ -533,3 +600,181 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
// 1. Replace inductions with constants.
// 2. Replace vector loop region with VPBasicBlock.
}
+
+#ifndef NDEBUG
+static VPRegionBlock *GetReplicateRegion(VPRecipeBase *R) {
+ auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
+ if (Region && Region->isReplicator()) {
+ assert(Region->getNumSuccessors() == 1 &&
+ Region->getNumPredecessors() == 1 && "Expected SESE region!");
+ assert(R->getParent()->size() == 1 &&
+ "A recipe in an original replicator region must be the only "
+ "recipe in its block");
+ return Region;
+ }
+ return nullptr;
+}
+#endif
+
+static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B,
+ VPDominatorTree &VPDT) {
+ if (A == B)
+ return false;
+
+ auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
+ for (auto &R : *A->getParent()) {
+ if (&R == A)
+ return true;
+ if (&R == B)
+ return false;
+ }
+ llvm_unreachable("recipe not found");
+ };
+ const VPBlockBase *ParentA = A->getParent();
+ const VPBlockBase *ParentB = B->getParent();
+ if (ParentA == ParentB)
+ return LocalComesBefore(A, B);
+
+ assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
+ "No replicate regions expected at this point");
+ assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
+ "No replicate regions expected at this point");
+ return VPDT.properlyDominates(ParentA, ParentB);
+}
+
+/// Sink users of \p FOR after the recipe defining the previous value \p
+/// Previous of the recurrence. \returns true if all users of \p FOR could be
+/// re-arranged as needed or false if it is not possible.
+static bool
+sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
+ VPRecipeBase *Previous,
+ VPDominatorTree &VPDT) {
+ // Collect recipes that need sinking.
+ SmallVector<VPRecipeBase *> WorkList;
+ SmallPtrSet<VPRecipeBase *, 8> Seen;
+ Seen.insert(Previous);
+ auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
+ // The previous value must not depend on the users of the recurrence phi. In
+ // that case, FOR is not a fixed order recurrence.
+ if (SinkCandidate == Previous)
+ return false;
+
+ if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
+ !Seen.insert(SinkCandidate).second ||
+ properlyDominates(Previous, SinkCandidate, VPDT))
+ return true;
+
+ if (SinkCandidate->mayHaveSideEffects())
+ return false;
+
+ WorkList.push_back(SinkCandidate);
+ return true;
+ };
+
+ // Recursively sink users of FOR after Previous.
+ WorkList.push_back(FOR);
+ for (unsigned I = 0; I != WorkList.size(); ++I) {
+ VPRecipeBase *Current = WorkList[I];
+ assert(Current->getNumDefinedValues() == 1 &&
+ "only recipes with a single defined value expected");
+
+ for (VPUser *User : Current->getVPSingleValue()->users()) {
+ if (auto *R = dyn_cast<VPRecipeBase>(User))
+ if (!TryToPushSinkCandidate(R))
+ return false;
+ }
+ }
+
+ // Keep recipes to sink ordered by dominance so earlier instructions are
+ // processed first.
+ sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
+ return properlyDominates(A, B, VPDT);
+ });
+
+ for (VPRecipeBase *SinkCandidate : WorkList) {
+ if (SinkCandidate == FOR)
+ continue;
+
+ SinkCandidate->moveAfter(Previous);
+ Previous = SinkCandidate;
+ }
+ return true;
+}
+
+bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
+ VPBuilder &Builder) {
+ VPDominatorTree VPDT;
+ VPDT.recalculate(Plan);
+
+ SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis;
+ for (VPRecipeBase &R :
+ Plan.getVectorLoopRegion()->getEntry()->getEntryBasicBlock()->phis())
+ if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
+ RecurrencePhis.push_back(FOR);
+
+ for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) {
+ SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis;
+ VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
+ // Fixed-order recurrences do not contain cycles, so this loop is guaranteed
+ // to terminate.
+ while (auto *PrevPhi =
+ dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(Previous)) {
+ assert(PrevPhi->getParent() == FOR->getParent());
+ assert(SeenPhis.insert(PrevPhi).second);
+ Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
+ }
+
+ if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT))
+ return false;
+
+ // Introduce a recipe to combine the incoming and previous values of a
+ // fixed-order recurrence.
+ VPBasicBlock *InsertBlock = Previous->getParent();
+ if (isa<VPHeaderPHIRecipe>(Previous))
+ Builder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi());
+ else
+ Builder.setInsertPoint(InsertBlock, std::next(Previous->getIterator()));
+
+ auto *RecurSplice = cast<VPInstruction>(
+ Builder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice,
+ {FOR, FOR->getBackedgeValue()}));
+
+ FOR->replaceAllUsesWith(RecurSplice);
+ // Set the first operand of RecurSplice to FOR again, after replacing
+ // all users.
+ RecurSplice->setOperand(0, FOR);
+ }
+ return true;
+}
+
+void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
+ for (VPRecipeBase &R :
+ Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
+ auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
+ if (!PhiR)
+ continue;
+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
+ RecurKind RK = RdxDesc.getRecurrenceKind();
+ if (RK != RecurKind::Add && RK != RecurKind::Mul)
+ continue;
+
+ SmallSetVector<VPValue *, 8> Worklist;
+ Worklist.insert(PhiR);
+
+ for (unsigned I = 0; I != Worklist.size(); ++I) {
+ VPValue *Cur = Worklist[I];
+ if (auto *RecWithFlags =
+ dyn_cast<VPRecipeWithIRFlags>(Cur->getDefiningRecipe())) {
+ RecWithFlags->dropPoisonGeneratingFlags();
+ }
+
+ for (VPUser *U : Cur->users()) {
+ auto *UserRecipe = dyn_cast<VPRecipeBase>(U);
+ if (!UserRecipe)
+ continue;
+ for (VPValue *V : UserRecipe->definedValues())
+ Worklist.insert(V);
+ }
+ }
+ }
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index be0d8e76d809..3eccf6e9600d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -25,23 +25,23 @@ class ScalarEvolution;
class Loop;
class PredicatedScalarEvolution;
class TargetLibraryInfo;
+class VPBuilder;
+class VPRecipeBuilder;
struct VPlanTransforms {
/// Replaces the VPInstructions in \p Plan with corresponding
/// widen recipes.
static void
- VPInstructionsToVPRecipes(Loop *OrigLoop, VPlanPtr &Plan,
+ VPInstructionsToVPRecipes(VPlanPtr &Plan,
function_ref<const InductionDescriptor *(PHINode *)>
GetIntOrFpInductionDescriptor,
- SmallPtrSetImpl<Instruction *> &DeadInstructions,
ScalarEvolution &SE, const TargetLibraryInfo &TLI);
- static bool sinkScalarOperands(VPlan &Plan);
-
- /// Merge replicate regions in their successor region, if a replicate region
- /// is connected to a successor replicate region with the same predicate by a
- /// single, empty VPBasicBlock.
- static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan);
+ /// Wrap predicated VPReplicateRecipes with a mask operand in an if-then
+ /// region block and remove the mask operand. Optimize the created regions by
+ /// iteratively sinking scalar operands into the region, followed by merging
+ /// regions until no improvements are remaining.
+ static void createAndOptimizeReplicateRegions(VPlan &Plan);
/// Remove redundant VPBasicBlocks by merging them into their predecessor if
/// the predecessor has a single successor.
@@ -71,6 +71,19 @@ struct VPlanTransforms {
/// them with already existing recipes expanding the same SCEV expression.
static void removeRedundantExpandSCEVRecipes(VPlan &Plan);
+ /// Sink users of fixed-order recurrences after the recipe defining their
+ /// previous value. Then introduce FirstOrderRecurrenceSplice VPInstructions
+ /// to combine the value from the recurrence phis and previous values. The
+ /// current implementation assumes all users can be sunk after the previous
+ /// value, which is enforced by earlier legality checks.
+ /// \returns true if all users of fixed-order recurrences could be re-arranged
+ /// as needed or false if it is not possible. In the latter case, \p Plan is
+ /// not valid.
+ static bool adjustFixedOrderRecurrences(VPlan &Plan, VPBuilder &Builder);
+
+ /// Clear NSW/NUW flags from reduction instructions if necessary.
+ static void clearReductionWrapFlags(VPlan &Plan);
+
/// Optimize \p Plan based on \p BestVF and \p BestUF. This may restrict the
/// resulting plan to \p BestVF and \p BestUF.
static void optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 62ec65cbfe5d..ac110bb3b0ef 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -171,16 +171,19 @@ public:
/// Returns true if this VPValue is defined by a recipe.
bool hasDefiningRecipe() const { return getDefiningRecipe(); }
+ /// Returns true if this VPValue is a live-in, i.e. defined outside the VPlan.
+ bool isLiveIn() const { return !hasDefiningRecipe(); }
+
/// Returns the underlying IR value, if this VPValue is defined outside the
/// scope of VPlan. Returns nullptr if the VPValue is defined by a VPDef
/// inside a VPlan.
Value *getLiveInIRValue() {
- assert(!hasDefiningRecipe() &&
+ assert(isLiveIn() &&
"VPValue is not a live-in; it is defined by a VPDef inside a VPlan");
return getUnderlyingValue();
}
const Value *getLiveInIRValue() const {
- assert(!hasDefiningRecipe() &&
+ assert(isLiveIn() &&
"VPValue is not a live-in; it is defined by a VPDef inside a VPlan");
return getUnderlyingValue();
}
@@ -342,15 +345,16 @@ public:
VPScalarIVStepsSC,
VPWidenCallSC,
VPWidenCanonicalIVSC,
+ VPWidenCastSC,
VPWidenGEPSC,
VPWidenMemoryInstructionSC,
VPWidenSC,
VPWidenSelectSC,
-
- // Phi-like recipes. Need to be kept together.
+ // START: Phi-like recipes. Need to be kept together.
VPBlendSC,
VPPredInstPHISC,
- // Header-phi recipes. Need to be kept together.
+ // START: SubclassID for recipes that inherit VPHeaderPHIRecipe.
+ // VPHeaderPHIRecipe need to be kept together.
VPCanonicalIVPHISC,
VPActiveLaneMaskPHISC,
VPFirstOrderRecurrencePHISC,
@@ -358,8 +362,11 @@ public:
VPWidenIntOrFpInductionSC,
VPWidenPointerInductionSC,
VPReductionPHISC,
+ // END: SubclassID for recipes that inherit VPHeaderPHIRecipe
+ // END: Phi-like recipes
VPFirstPHISC = VPBlendSC,
VPFirstHeaderPHISC = VPCanonicalIVPHISC,
+ VPLastHeaderPHISC = VPReductionPHISC,
VPLastPHISC = VPReductionPHISC,
};
@@ -434,6 +441,7 @@ class VPSlotTracker {
void assignSlot(const VPValue *V);
void assignSlots(const VPlan &Plan);
+ void assignSlots(const VPBasicBlock *VPBB);
public:
VPSlotTracker(const VPlan *Plan = nullptr) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
index 18125cebed33..d6b81543dbc9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
@@ -15,6 +15,7 @@
#include "VPlanVerifier.h"
#include "VPlan.h"
#include "VPlanCFG.h"
+#include "VPlanDominatorTree.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/Support/CommandLine.h"
@@ -189,9 +190,8 @@ static bool verifyPhiRecipes(const VPBasicBlock *VPBB) {
return true;
}
-static bool
-verifyVPBasicBlock(const VPBasicBlock *VPBB,
- DenseMap<const VPBlockBase *, unsigned> &BlockNumbering) {
+static bool verifyVPBasicBlock(const VPBasicBlock *VPBB,
+ VPDominatorTree &VPDT) {
if (!verifyPhiRecipes(VPBB))
return false;
@@ -206,7 +206,8 @@ verifyVPBasicBlock(const VPBasicBlock *VPBB,
for (const VPValue *V : R.definedValues()) {
for (const VPUser *U : V->users()) {
auto *UI = dyn_cast<VPRecipeBase>(U);
- if (!UI || isa<VPHeaderPHIRecipe>(UI))
+ // TODO: check dominance of incoming values for phis properly.
+ if (!UI || isa<VPHeaderPHIRecipe>(UI) || isa<VPPredInstPHIRecipe>(UI))
continue;
// If the user is in the same block, check it comes after R in the
@@ -219,27 +220,7 @@ verifyVPBasicBlock(const VPBasicBlock *VPBB,
continue;
}
- // Skip blocks outside any region for now and blocks outside
- // replicate-regions.
- auto *ParentR = VPBB->getParent();
- if (!ParentR || !ParentR->isReplicator())
- continue;
-
- // For replicators, verify that VPPRedInstPHIRecipe defs are only used
- // in subsequent blocks.
- if (isa<VPPredInstPHIRecipe>(&R)) {
- auto I = BlockNumbering.find(UI->getParent());
- unsigned BlockNumber = I == BlockNumbering.end() ? std::numeric_limits<unsigned>::max() : I->second;
- if (BlockNumber < BlockNumbering[ParentR]) {
- errs() << "Use before def!\n";
- return false;
- }
- continue;
- }
-
- // All non-VPPredInstPHIRecipe recipes in the block must be used in
- // the replicate region only.
- if (UI->getParent()->getParent() != ParentR) {
+ if (!VPDT.dominates(VPBB, UI->getParent())) {
errs() << "Use before def!\n";
return false;
}
@@ -250,15 +231,13 @@ verifyVPBasicBlock(const VPBasicBlock *VPBB,
}
bool VPlanVerifier::verifyPlanIsValid(const VPlan &Plan) {
- DenseMap<const VPBlockBase *, unsigned> BlockNumbering;
- unsigned Cnt = 0;
+ VPDominatorTree VPDT;
+ VPDT.recalculate(const_cast<VPlan &>(Plan));
+
auto Iter = vp_depth_first_deep(Plan.getEntry());
- for (const VPBlockBase *VPB : Iter) {
- BlockNumbering[VPB] = Cnt++;
- auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
- if (!VPBB)
- continue;
- if (!verifyVPBasicBlock(VPBB, BlockNumbering))
+ for (const VPBasicBlock *VPBB :
+ VPBlockUtils::blocksOnly<const VPBasicBlock>(Iter)) {
+ if (!verifyVPBasicBlock(VPBB, VPDT))
return false;
}
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 2e489757ebc1..13464c9d3496 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -25,11 +25,8 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/PatternMatch.h"
-#include "llvm/InitializePasses.h"
-#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/Transforms/Vectorize.h"
#include <numeric>
#define DEBUG_TYPE "vector-combine"
@@ -247,7 +244,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
// still need a shuffle to change the vector size.
auto *Ty = cast<FixedVectorType>(I.getType());
unsigned OutputNumElts = Ty->getNumElements();
- SmallVector<int, 16> Mask(OutputNumElts, UndefMaskElem);
+ SmallVector<int, 16> Mask(OutputNumElts, PoisonMaskElem);
assert(OffsetEltIndex < MinVecNumElts && "Address offset too big");
Mask[0] = OffsetEltIndex;
if (OffsetEltIndex)
@@ -460,9 +457,9 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
// If we are extracting from 2 different indexes, then one operand must be
// shuffled before performing the vector operation. The shuffle mask is
- // undefined except for 1 lane that is being translated to the remaining
+ // poison except for 1 lane that is being translated to the remaining
// extraction lane. Therefore, it is a splat shuffle. Ex:
- // ShufMask = { undef, undef, 0, undef }
+ // ShufMask = { poison, poison, 0, poison }
// TODO: The cost model has an option for a "broadcast" shuffle
// (splat-from-element-0), but no option for a more general splat.
NewCost +=
@@ -479,11 +476,11 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
/// to a new element location.
static Value *createShiftShuffle(Value *Vec, unsigned OldIndex,
unsigned NewIndex, IRBuilder<> &Builder) {
- // The shuffle mask is undefined except for 1 lane that is being translated
+ // The shuffle mask is poison except for 1 lane that is being translated
// to the new element index. Example for OldIndex == 2 and NewIndex == 0:
- // ShufMask = { 2, undef, undef, undef }
+ // ShufMask = { 2, poison, poison, poison }
auto *VecTy = cast<FixedVectorType>(Vec->getType());
- SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
+ SmallVector<int, 32> ShufMask(VecTy->getNumElements(), PoisonMaskElem);
ShufMask[NewIndex] = OldIndex;
return Builder.CreateShuffleVector(Vec, ShufMask, "shift");
}
@@ -917,7 +914,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
auto *CmpTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(X->getType()));
InstructionCost NewCost = TTI.getCmpSelInstrCost(
CmpOpcode, X->getType(), CmpInst::makeCmpResultType(X->getType()), Pred);
- SmallVector<int, 32> ShufMask(VecTy->getNumElements(), UndefMaskElem);
+ SmallVector<int, 32> ShufMask(VecTy->getNumElements(), PoisonMaskElem);
ShufMask[CheapIndex] = ExpensiveIndex;
NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, CmpTy,
ShufMask);
@@ -932,7 +929,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) {
// Create a vector constant from the 2 scalar constants.
SmallVector<Constant *, 32> CmpC(VecTy->getNumElements(),
- UndefValue::get(VecTy->getElementType()));
+ PoisonValue::get(VecTy->getElementType()));
CmpC[Index0] = C0;
CmpC[Index1] = C1;
Value *VCmp = Builder.CreateCmp(Pred, X, ConstantVector::get(CmpC));
@@ -1565,7 +1562,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
// Calculate our ReconstructMasks from the OrigReconstructMasks and the
// modified order of the input shuffles.
SmallVector<SmallVector<int>> ReconstructMasks;
- for (auto Mask : OrigReconstructMasks) {
+ for (const auto &Mask : OrigReconstructMasks) {
SmallVector<int> ReconstructMask;
for (int M : Mask) {
auto FindIndex = [](const SmallVector<std::pair<int, int>> &V, int M) {
@@ -1596,12 +1593,12 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
V2B.push_back(GetBaseMaskValue(SVI1B, V2[I].first));
}
while (V1A.size() < NumElts) {
- V1A.push_back(UndefMaskElem);
- V1B.push_back(UndefMaskElem);
+ V1A.push_back(PoisonMaskElem);
+ V1B.push_back(PoisonMaskElem);
}
while (V2A.size() < NumElts) {
- V2A.push_back(UndefMaskElem);
- V2B.push_back(UndefMaskElem);
+ V2A.push_back(PoisonMaskElem);
+ V2B.push_back(PoisonMaskElem);
}
auto AddShuffleCost = [&](InstructionCost C, Instruction *I) {
@@ -1660,16 +1657,16 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) {
return SSV->getOperand(Op);
return SV->getOperand(Op);
};
- Builder.SetInsertPoint(SVI0A->getNextNode());
+ Builder.SetInsertPoint(SVI0A->getInsertionPointAfterDef());
Value *NSV0A = Builder.CreateShuffleVector(GetShuffleOperand(SVI0A, 0),
GetShuffleOperand(SVI0A, 1), V1A);
- Builder.SetInsertPoint(SVI0B->getNextNode());
+ Builder.SetInsertPoint(SVI0B->getInsertionPointAfterDef());
Value *NSV0B = Builder.CreateShuffleVector(GetShuffleOperand(SVI0B, 0),
GetShuffleOperand(SVI0B, 1), V1B);
- Builder.SetInsertPoint(SVI1A->getNextNode());
+ Builder.SetInsertPoint(SVI1A->getInsertionPointAfterDef());
Value *NSV1A = Builder.CreateShuffleVector(GetShuffleOperand(SVI1A, 0),
GetShuffleOperand(SVI1A, 1), V2A);
- Builder.SetInsertPoint(SVI1B->getNextNode());
+ Builder.SetInsertPoint(SVI1B->getInsertionPointAfterDef());
Value *NSV1B = Builder.CreateShuffleVector(GetShuffleOperand(SVI1B, 0),
GetShuffleOperand(SVI1B, 1), V2B);
Builder.SetInsertPoint(Op0);
@@ -1811,54 +1808,6 @@ bool VectorCombine::run() {
return MadeChange;
}
-// Pass manager boilerplate below here.
-
-namespace {
-class VectorCombineLegacyPass : public FunctionPass {
-public:
- static char ID;
- VectorCombineLegacyPass() : FunctionPass(ID) {
- initializeVectorCombineLegacyPassPass(*PassRegistry::getPassRegistry());
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- AU.addRequired<AAResultsWrapperPass>();
- AU.setPreservesCFG();
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- AU.addPreserved<AAResultsWrapperPass>();
- AU.addPreserved<BasicAAWrapperPass>();
- FunctionPass::getAnalysisUsage(AU);
- }
-
- bool runOnFunction(Function &F) override {
- if (skipFunction(F))
- return false;
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
- VectorCombine Combiner(F, TTI, DT, AA, AC, false);
- return Combiner.run();
- }
-};
-} // namespace
-
-char VectorCombineLegacyPass::ID = 0;
-INITIALIZE_PASS_BEGIN(VectorCombineLegacyPass, "vector-combine",
- "Optimize scalar/vector ops", false,
- false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_END(VectorCombineLegacyPass, "vector-combine",
- "Optimize scalar/vector ops", false, false)
-Pass *llvm::createVectorCombinePass() {
- return new VectorCombineLegacyPass();
-}
-
PreservedAnalyses VectorCombinePass::run(Function &F,
FunctionAnalysisManager &FAM) {
auto &AC = FAM.getResult<AssumptionAnalysis>(F);
diff --git a/llvm/lib/Transforms/Vectorize/Vectorize.cpp b/llvm/lib/Transforms/Vectorize/Vectorize.cpp
index 208e5eeea864..2f5048d2a664 100644
--- a/llvm/lib/Transforms/Vectorize/Vectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/Vectorize.cpp
@@ -12,10 +12,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/Vectorize.h"
-#include "llvm-c/Initialization.h"
-#include "llvm-c/Transforms/Vectorize.h"
-#include "llvm/IR/LegacyPassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/PassRegistry.h"
@@ -23,20 +19,5 @@ using namespace llvm;
/// Initialize all passes linked into the Vectorization library.
void llvm::initializeVectorization(PassRegistry &Registry) {
- initializeLoopVectorizePass(Registry);
- initializeSLPVectorizerPass(Registry);
initializeLoadStoreVectorizerLegacyPassPass(Registry);
- initializeVectorCombineLegacyPassPass(Registry);
-}
-
-void LLVMInitializeVectorization(LLVMPassRegistryRef R) {
- initializeVectorization(*unwrap(R));
-}
-
-void LLVMAddLoopVectorizePass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createLoopVectorizePass());
-}
-
-void LLVMAddSLPVectorizePass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createSLPVectorizerPass());
}
diff --git a/llvm/lib/WindowsDriver/MSVCPaths.cpp b/llvm/lib/WindowsDriver/MSVCPaths.cpp
index d8d656fc46e4..1c070bf1bf7d 100644
--- a/llvm/lib/WindowsDriver/MSVCPaths.cpp
+++ b/llvm/lib/WindowsDriver/MSVCPaths.cpp
@@ -9,15 +9,16 @@
#include "llvm/WindowsDriver/MSVCPaths.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/VersionTuple.h"
#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/Triple.h"
#include <optional>
#include <string>
@@ -586,7 +587,7 @@ bool findVCToolChainViaEnvironment(vfs::FileSystem &VFS, std::string &Path,
for (StringRef Prefix : ExpectedPrefixes) {
if (It == End)
goto NotAToolChain;
- if (!It->startswith_insensitive(Prefix))
+ if (!It->starts_with_insensitive(Prefix))
goto NotAToolChain;
++It;
}
@@ -609,8 +610,9 @@ bool findVCToolChainViaEnvironment(vfs::FileSystem &VFS, std::string &Path,
return false;
}
-bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS, std::string &Path,
- ToolsetLayout &VSLayout) {
+bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS,
+ std::optional<StringRef> VCToolsVersion,
+ std::string &Path, ToolsetLayout &VSLayout) {
#if !defined(USE_MSVC_SETUP_API)
return false;
#else
@@ -677,17 +679,24 @@ bool findVCToolChainViaSetupConfig(vfs::FileSystem &VFS, std::string &Path,
std::string VCRootPath;
convertWideToUTF8(std::wstring(VCPathWide), VCRootPath);
- SmallString<256> ToolsVersionFilePath(VCRootPath);
- sys::path::append(ToolsVersionFilePath, "Auxiliary", "Build",
- "Microsoft.VCToolsVersion.default.txt");
+ std::string ToolsVersion;
+ if (VCToolsVersion.has_value()) {
+ ToolsVersion = *VCToolsVersion;
+ } else {
+ SmallString<256> ToolsVersionFilePath(VCRootPath);
+ sys::path::append(ToolsVersionFilePath, "Auxiliary", "Build",
+ "Microsoft.VCToolsVersion.default.txt");
+
+ auto ToolsVersionFile = MemoryBuffer::getFile(ToolsVersionFilePath);
+ if (!ToolsVersionFile)
+ return false;
+
+ ToolsVersion = ToolsVersionFile->get()->getBuffer().rtrim();
+ }
- auto ToolsVersionFile = MemoryBuffer::getFile(ToolsVersionFilePath);
- if (!ToolsVersionFile)
- return false;
SmallString<256> ToolchainPath(VCRootPath);
- sys::path::append(ToolchainPath, "Tools", "MSVC",
- ToolsVersionFile->get()->getBuffer().rtrim());
+ sys::path::append(ToolchainPath, "Tools", "MSVC", ToolsVersion);
auto Status = VFS.status(ToolchainPath);
if (!Status || !Status->isDirectory())
return false;
diff --git a/llvm/lib/XRay/InstrumentationMap.cpp b/llvm/lib/XRay/InstrumentationMap.cpp
index ee190d9e58c7..800f0a0f47e4 100644
--- a/llvm/lib/XRay/InstrumentationMap.cpp
+++ b/llvm/lib/XRay/InstrumentationMap.cpp
@@ -14,7 +14,6 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ELFObjectFile.h"
@@ -24,6 +23,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/YAMLTraits.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
@@ -60,6 +60,7 @@ loadObj(StringRef Filename, object::OwningBinary<object::ObjectFile> &ObjFile,
// Find the section named "xray_instr_map".
if ((!ObjFile.getBinary()->isELF() && !ObjFile.getBinary()->isMachO()) ||
!(ObjFile.getBinary()->getArch() == Triple::x86_64 ||
+ ObjFile.getBinary()->getArch() == Triple::loongarch64 ||
ObjFile.getBinary()->getArch() == Triple::ppc64le ||
ObjFile.getBinary()->getArch() == Triple::arm ||
ObjFile.getBinary()->getArch() == Triple::aarch64))
diff --git a/llvm/tools/bugpoint/BugDriver.cpp b/llvm/tools/bugpoint/BugDriver.cpp
index 942028cad80b..32c747fdd516 100644
--- a/llvm/tools/bugpoint/BugDriver.cpp
+++ b/llvm/tools/bugpoint/BugDriver.cpp
@@ -21,9 +21,9 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
#include <memory>
using namespace llvm;
diff --git a/llvm/tools/bugpoint/BugDriver.h b/llvm/tools/bugpoint/BugDriver.h
index b7c9edc5b812..9fb0880b9cd0 100644
--- a/llvm/tools/bugpoint/BugDriver.h
+++ b/llvm/tools/bugpoint/BugDriver.h
@@ -101,15 +101,6 @@ public:
/// input.
Error debugMiscompilation();
- /// debugPassMiscompilation - This method is called when the specified pass
- /// miscompiles Program as input. It tries to reduce the testcase to
- /// something that smaller that still miscompiles the program.
- /// ReferenceOutput contains the filename of the file containing the output we
- /// are to match.
- ///
- bool debugPassMiscompilation(const PassInfo *ThePass,
- const std::string &ReferenceOutput);
-
/// compileSharedObject - This method creates a SharedObject from a given
/// BitcodeFile for debugging a code generator.
///
diff --git a/llvm/tools/bugpoint/CrashDebugger.cpp b/llvm/tools/bugpoint/CrashDebugger.cpp
index c90e1afd8ca4..0ca8fa28c4af 100644
--- a/llvm/tools/bugpoint/CrashDebugger.cpp
+++ b/llvm/tools/bugpoint/CrashDebugger.cpp
@@ -70,6 +70,18 @@ cl::opt<bool> VerboseErrors("verbose-errors",
cl::init(false));
}
+static bool isValidModule(std::unique_ptr<Module> &M,
+ bool ExitOnFailure = true) {
+ if (!llvm::verifyModule(*M.get(), &llvm::errs()))
+ return true;
+
+ if (ExitOnFailure) {
+ llvm::errs() << "verify failed!\n";
+ exit(1);
+ }
+ return false;
+}
+
namespace llvm {
class ReducePassList : public ListReducer<std::string> {
BugDriver &BD;
@@ -368,6 +380,10 @@ bool ReduceCrashingFunctionAttributes::TestFuncAttrs(
if (F->hasFnAttribute(Attribute::OptimizeNone))
F->addFnAttr(Attribute::NoInline);
+ // If modifying the attribute list leads to invalid IR, revert the change
+ if (!isValidModule(M, /*ExitOnFailure=*/false))
+ return false;
+
// Try running on the hacked up program...
if (TestFn(BD, M.get())) {
BD.setNewProgram(std::move(M)); // It crashed, keep the trimmed version...
@@ -510,14 +526,7 @@ bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock *> &BBs) {
ToProcess.clear();
}
// Verify we didn't break anything
- std::vector<std::string> Passes;
- Passes.push_back("verify");
- std::unique_ptr<Module> New = BD.runPassesOn(M.get(), Passes);
- if (!New) {
- errs() << "verify failed!\n";
- exit(1);
- }
- M = std::move(New);
+ isValidModule(M);
// Try running on the hacked up program...
if (TestFn(BD, M.get())) {
@@ -618,14 +627,7 @@ bool ReduceCrashingConditionals::TestBlocks(
ToProcess.clear();
}
// Verify we didn't break anything
- std::vector<std::string> Passes;
- Passes.push_back("verify");
- std::unique_ptr<Module> New = BD.runPassesOn(M.get(), Passes);
- if (!New) {
- errs() << "verify failed!\n";
- exit(1);
- }
- M = std::move(New);
+ isValidModule(M);
// Try running on the hacked up program...
if (TestFn(BD, M.get())) {
@@ -711,14 +713,7 @@ bool ReduceSimplifyCFG::TestBlocks(std::vector<const BasicBlock *> &BBs) {
simplifyCFG(&*BBIt++, TTI);
}
// Verify we didn't break anything
- std::vector<std::string> Passes;
- Passes.push_back("verify");
- std::unique_ptr<Module> New = BD.runPassesOn(M.get(), Passes);
- if (!New) {
- errs() << "verify failed!\n";
- exit(1);
- }
- M = std::move(New);
+ isValidModule(M);
// Try running on the hacked up program...
if (TestFn(BD, M.get())) {
@@ -797,9 +792,7 @@ bool ReduceCrashingInstructions::TestInsts(
}
// Verify that this is still valid.
- legacy::PassManager Passes;
- Passes.add(createVerifierPass(/*FatalErrors=*/false));
- Passes.run(*M);
+ isValidModule(M, /*ExitOnFailure=*/false);
// Try running on the hacked up program...
if (TestFn(BD, M.get())) {
@@ -869,9 +862,7 @@ bool ReduceCrashingMetadata::TestInsts(std::vector<Instruction *> &Insts) {
}
// Verify that this is still valid.
- legacy::PassManager Passes;
- Passes.add(createVerifierPass(/*FatalErrors=*/false));
- Passes.run(*M);
+ isValidModule(M, /*ExitOnFailure=*/false);
// Try running on the hacked up program...
if (TestFn(BD, M.get())) {
@@ -944,9 +935,7 @@ bool ReduceCrashingNamedMD::TestNamedMDs(std::vector<std::string> &NamedMDs) {
NamedMD->eraseFromParent();
// Verify that this is still valid.
- legacy::PassManager Passes;
- Passes.add(createVerifierPass(/*FatalErrors=*/false));
- Passes.run(*M);
+ isValidModule(M, /*ExitOnFailure=*/false);
// Try running on the hacked up program...
if (TestFn(BD, M.get())) {
@@ -1009,9 +998,7 @@ bool ReduceCrashingNamedMDOps::TestNamedMDOps(
}
// Verify that this is still valid.
- legacy::PassManager Passes;
- Passes.add(createVerifierPass(/*FatalErrors=*/false));
- Passes.run(*M);
+ isValidModule(M, /*ExitOnFailure=*/false);
// Try running on the hacked up program...
if (TestFn(BD, M.get())) {
diff --git a/llvm/tools/bugpoint/ExtractFunction.cpp b/llvm/tools/bugpoint/ExtractFunction.cpp
index 5047aa35d7e7..dd9a82c32035 100644
--- a/llvm/tools/bugpoint/ExtractFunction.cpp
+++ b/llvm/tools/bugpoint/ExtractFunction.cpp
@@ -133,7 +133,6 @@ BugDriver::performFinalCleanups(std::unique_ptr<Module> M,
I->setLinkage(GlobalValue::ExternalLinkage);
std::vector<std::string> CleanupPasses;
- CleanupPasses.push_back("globaldce");
if (MayModifySemantics)
CleanupPasses.push_back("deadarghaX0r");
diff --git a/llvm/tools/bugpoint/OptimizerDriver.cpp b/llvm/tools/bugpoint/OptimizerDriver.cpp
index 1197528d0dd3..f7239f5dc61b 100644
--- a/llvm/tools/bugpoint/OptimizerDriver.cpp
+++ b/llvm/tools/bugpoint/OptimizerDriver.cpp
@@ -207,7 +207,7 @@ bool BugDriver::runPasses(Module &Program,
Args.push_back(OptArgs[i]);
// Pin to legacy PM since bugpoint has lots of infra and hacks revolving
// around the legacy PM.
- Args.push_back("-enable-new-pm=0");
+ Args.push_back("-bugpoint-enable-legacy-pm");
Args.push_back("-disable-symbolication");
Args.push_back("-o");
Args.push_back(OutputFilename);
diff --git a/llvm/tools/bugpoint/ToolRunner.cpp b/llvm/tools/bugpoint/ToolRunner.cpp
index 352588f01ac8..c6733aecd31d 100644
--- a/llvm/tools/bugpoint/ToolRunner.cpp
+++ b/llvm/tools/bugpoint/ToolRunner.cpp
@@ -612,7 +612,7 @@ static bool IsARMArchitecture(std::vector<StringRef> Args) {
++I;
if (I == Args.size())
break;
- if (Args[I].startswith_insensitive("arm"))
+ if (Args[I].starts_with_insensitive("arm"))
return true;
}
diff --git a/llvm/tools/bugpoint/ToolRunner.h b/llvm/tools/bugpoint/ToolRunner.h
index f6b5f26c7a66..c9da9afba0e4 100644
--- a/llvm/tools/bugpoint/ToolRunner.h
+++ b/llvm/tools/bugpoint/ToolRunner.h
@@ -16,11 +16,11 @@
#ifndef LLVM_TOOLS_BUGPOINT_TOOLRUNNER_H
#define LLVM_TOOLS_BUGPOINT_TOOLRUNNER_H
-#include "llvm/ADT/Triple.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SystemUtils.h"
+#include "llvm/TargetParser/Triple.h"
#include <exception>
#include <vector>
diff --git a/llvm/tools/bugpoint/bugpoint.cpp b/llvm/tools/bugpoint/bugpoint.cpp
index 0305f6463858..e49efdfe7c8e 100644
--- a/llvm/tools/bugpoint/bugpoint.cpp
+++ b/llvm/tools/bugpoint/bugpoint.cpp
@@ -30,7 +30,6 @@
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/Valgrind.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
-#include "llvm/Transforms/IPO/PassManagerBuilder.h"
// Enable this macro to debug bugpoint itself.
//#define DEBUG_BUGPOINT 1
@@ -66,24 +65,6 @@ static cl::opt<bool>
static cl::list<const PassInfo *, bool, PassNameParser>
PassList(cl::desc("Passes available:"));
-static cl::opt<bool>
- OptLevelO1("O1", cl::desc("Optimization level 1. Identical to 'opt -O1'"));
-
-static cl::opt<bool>
- OptLevelO2("O2", cl::desc("Optimization level 2. Identical to 'opt -O2'"));
-
-static cl::opt<bool> OptLevelOs(
- "Os",
- cl::desc(
- "Like -O2 with extra optimizations for size. Similar to clang -Os"));
-
-static cl::opt<bool>
-OptLevelOz("Oz",
- cl::desc("Like -Os but reduces code size further. Similar to clang -Oz"));
-
-static cl::opt<bool>
- OptLevelO3("O3", cl::desc("Optimization level 3. Identical to 'opt -O3'"));
-
static cl::opt<std::string>
OverrideTriple("mtriple", cl::desc("Override target triple for module"));
@@ -110,26 +91,6 @@ public:
};
}
-// This routine adds optimization passes based on selected optimization level,
-// OptLevel.
-//
-// OptLevel - Optimization Level
-static void AddOptimizationPasses(legacy::FunctionPassManager &FPM,
- unsigned OptLevel,
- unsigned SizeLevel) {
- PassManagerBuilder Builder;
- Builder.OptLevel = OptLevel;
- Builder.SizeLevel = SizeLevel;
-
- if (OptLevel > 1)
- Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel, false);
- else
- Builder.Inliner = createAlwaysInlinerLegacyPass();
-
- Builder.populateFunctionPassManager(FPM);
- Builder.populateModulePassManager(FPM);
-}
-
#define HANDLE_EXTENSION(Ext) \
llvm::PassPluginLibraryInfo get##Ext##PluginInfo();
#include "llvm/Support/Extension.def"
@@ -195,17 +156,6 @@ int main(int argc, char **argv) {
AddToDriver PM(D);
- if (OptLevelO1)
- AddOptimizationPasses(PM, 1, 0);
- else if (OptLevelO2)
- AddOptimizationPasses(PM, 2, 0);
- else if (OptLevelO3)
- AddOptimizationPasses(PM, 3, 0);
- else if (OptLevelOs)
- AddOptimizationPasses(PM, 2, 1);
- else if (OptLevelOz)
- AddOptimizationPasses(PM, 2, 2);
-
for (const PassInfo *PI : PassList)
D.addPass(std::string(PI->getPassArgument()));
diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp
index f2dae67040ff..8934130f9913 100644
--- a/llvm/tools/llc/llc.cpp
+++ b/llvm/tools/llc/llc.cpp
@@ -14,7 +14,6 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/ScopeExit.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
@@ -36,7 +35,6 @@
#include "llvm/IRReader/IRReader.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Pass.h"
#include "llvm/Remarks/HotnessThresholdParser.h"
@@ -44,7 +42,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/SourceMgr.h"
@@ -54,6 +51,9 @@
#include "llvm/Support/WithColor.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <memory>
#include <optional>
@@ -107,10 +107,6 @@ static cl::opt<std::string>
"regardless of binutils support"));
static cl::opt<bool>
-NoIntegratedAssembler("no-integrated-as", cl::Hidden,
- cl::desc("Disable integrated assembler"));
-
-static cl::opt<bool>
PreserveComments("preserve-as-comments", cl::Hidden,
cl::desc("Preserve Comments in outputted assembly"),
cl::init(true));
@@ -366,7 +362,7 @@ int main(int argc, char **argv) {
initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry);
initializeExpandReductionsPass(*Registry);
initializeExpandVectorPredicationPass(*Registry);
- initializeHardwareLoopsPass(*Registry);
+ initializeHardwareLoopsLegacyPass(*Registry);
initializeTransformUtils(*Registry);
initializeReplaceWithVeclibLegacyPass(*Registry);
initializeTLSVariableHoistLegacyPassPass(*Registry);
@@ -496,9 +492,27 @@ static int compileModule(char **argv, LLVMContext &Context) {
TargetOptions Options;
auto InitializeOptions = [&](const Triple &TheTriple) {
Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple);
+
+ if (Options.XCOFFReadOnlyPointers) {
+ if (!TheTriple.isOSAIX())
+ reportError("-mxcoff-roptr option is only supported on AIX",
+ InputFilename);
+
+ // Since the storage mapping class is specified per csect,
+ // without using data sections, it is less effective to use read-only
+ // pointers. Using read-only pointers may cause other RO variables in the
+ // same csect to become RW when the linker acts upon `-bforceimprw`;
+ // therefore, we require that separate data sections are used in the
+ // presence of ReadOnlyPointers. We respect the setting of data-sections
+ // since we have not found reasons to do otherwise that overcome the user
+ // surprise of not respecting the setting.
+ if (!Options.DataSections)
+ reportError("-mxcoff-roptr option must be used with -data-sections",
+ InputFilename);
+ }
+
Options.BinutilsVersion =
TargetMachine::parseBinutilsVersion(BinutilsVersion);
- Options.DisableIntegratedAS = NoIntegratedAssembler;
Options.MCOptions.ShowMCEncoding = ShowMCEncoding;
Options.MCOptions.AsmVerbose = AsmVerbose;
Options.MCOptions.PreserveAsmComments = PreserveComments;
@@ -680,13 +694,17 @@ static int compileModule(char **argv, LLVMContext &Context) {
if (!MIR) {
WithColor::warning(errs(), argv[0])
<< "run-pass is for .mir file only.\n";
+ delete MMIWP;
return 1;
}
- TargetPassConfig &TPC = *LLVMTM.createPassConfig(PM);
+ TargetPassConfig *PTPC = LLVMTM.createPassConfig(PM);
+ TargetPassConfig &TPC = *PTPC;
if (TPC.hasLimitedCodeGenPipeline()) {
WithColor::warning(errs(), argv[0])
<< "run-pass cannot be used with "
<< TPC.getLimitedCodeGenPipelineReason(" and ") << ".\n";
+ delete PTPC;
+ delete MMIWP;
return 1;
}
diff --git a/llvm/tools/lli/ExecutionUtils.h b/llvm/tools/lli/ExecutionUtils.h
index fcd1db05cca3..6bf9cd58e031 100644
--- a/llvm/tools/lli/ExecutionUtils.h
+++ b/llvm/tools/lli/ExecutionUtils.h
@@ -48,8 +48,8 @@ private:
std::unique_ptr<ToolOutputFile> TestOut;
template <typename T> void expose(orc::SymbolStringPtr Name, T *Handler) {
- BuiltinFunctions[Name] = JITEvaluatedSymbol(
- pointerToJITTargetAddress(Handler), JITSymbolFlags::Exported);
+ BuiltinFunctions[Name] = {orc::ExecutorAddr::fromPtr(Handler),
+ JITSymbolFlags::Exported};
}
static std::unique_ptr<ToolOutputFile> createToolOutput();
diff --git a/llvm/tools/lli/lli.cpp b/llvm/tools/lli/lli.cpp
index c9b77e23ba07..3b5250d56707 100644
--- a/llvm/tools/lli/lli.cpp
+++ b/llvm/tools/lli/lli.cpp
@@ -15,7 +15,6 @@
#include "ExecutionUtils.h"
#include "ForwardingMemoryManager.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Bitcode/BitcodeReader.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/CodeGen/LinkAllCodegenComponents.h"
@@ -26,17 +25,13 @@
#include "llvm/ExecutionEngine/JITSymbol.h"
#include "llvm/ExecutionEngine/MCJIT.h"
#include "llvm/ExecutionEngine/ObjectCache.h"
-#include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h"
#include "llvm/ExecutionEngine/Orc/DebugUtils.h"
-#include "llvm/ExecutionEngine/Orc/ELFNixPlatform.h"
-#include "llvm/ExecutionEngine/Orc/EPCDebugObjectRegistrar.h"
#include "llvm/ExecutionEngine/Orc/EPCDynamicLibrarySearchGenerator.h"
#include "llvm/ExecutionEngine/Orc/EPCEHFrameRegistrar.h"
#include "llvm/ExecutionEngine/Orc/EPCGenericRTDyldMemoryManager.h"
#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h"
#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h"
#include "llvm/ExecutionEngine/Orc/LLJIT.h"
-#include "llvm/ExecutionEngine/Orc/MachOPlatform.h"
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/SimpleRemoteEPC.h"
#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h"
@@ -68,6 +63,7 @@
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Instrumentation.h"
#include <cerrno>
#include <optional>
@@ -236,20 +232,22 @@ namespace {
cl::desc("Do not resolve lli process symbols in JIT'd code"),
cl::init(false));
- enum class LLJITPlatform { Inactive, DetectHost, ORC, GenericIR };
-
- cl::opt<LLJITPlatform>
- Platform("lljit-platform", cl::desc("Platform to use with LLJIT"),
- cl::init(LLJITPlatform::DetectHost),
- cl::values(clEnumValN(LLJITPlatform::DetectHost, "DetectHost",
- "Select based on JIT target triple"),
- clEnumValN(LLJITPlatform::ORC, "ORC",
- "Use ORCPlatform with the ORC runtime"),
- clEnumValN(LLJITPlatform::GenericIR, "GenericIR",
- "Use LLJITGenericIRPlatform"),
- clEnumValN(LLJITPlatform::Inactive, "Inactive",
- "Disable platform support explicitly")),
- cl::Hidden);
+ enum class LLJITPlatform { Inactive, Auto, ExecutorNative, GenericIR };
+
+ cl::opt<LLJITPlatform> Platform(
+ "lljit-platform", cl::desc("Platform to use with LLJIT"),
+ cl::init(LLJITPlatform::Auto),
+ cl::values(clEnumValN(LLJITPlatform::Auto, "Auto",
+ "Like 'ExecutorNative' if ORC runtime "
+ "provided, otherwise like 'GenericIR'"),
+ clEnumValN(LLJITPlatform::ExecutorNative, "ExecutorNative",
+ "Use the native platform for the executor."
+ "Requires -orc-runtime"),
+ clEnumValN(LLJITPlatform::GenericIR, "GenericIR",
+ "Use LLJITGenericIRPlatform"),
+ clEnumValN(LLJITPlatform::Inactive, "Inactive",
+ "Disable platform support explicitly")),
+ cl::Hidden);
enum class DumpKind {
NoDump,
@@ -663,10 +661,6 @@ int main(int argc, char **argv, char * const *envp) {
#endif
}
- std::unique_ptr<orc::ExecutorProcessControl> EPC =
- RemoteMCJIT ? ExitOnErr(launchRemote())
- : ExitOnErr(orc::SelfExecutorProcessControl::Create());
-
if (!RemoteMCJIT) {
// If the program doesn't explicitly call exit, we will need the Exit
// function later on to make an explicit call, so get the function now.
@@ -712,6 +706,7 @@ int main(int argc, char **argv, char * const *envp) {
abort();
} else {
// else == "if (RemoteMCJIT)"
+ std::unique_ptr<orc::ExecutorProcessControl> EPC = ExitOnErr(launchRemote());
// Remote target MCJIT doesn't (yet) support static constructors. No reason
// it couldn't. This is a limitation of the LLI implementation, not the
@@ -829,6 +824,20 @@ loadModule(StringRef Path, orc::ThreadSafeContext TSCtx) {
return orc::ThreadSafeModule(std::move(M), std::move(TSCtx));
}
+int mingw_noop_main(void) {
+ // Cygwin and MinGW insert calls from the main function to the runtime
+ // function __main. The __main function is responsible for setting up main's
+ // environment (e.g. running static constructors), however this is not needed
+ // when running under lli: the executor process will have run non-JIT ctors,
+ // and ORC will take care of running JIT'd ctors. To avoid a missing symbol
+ // error we just implement __main as a no-op.
+ //
+ // FIXME: Move this to ORC-RT (and the ORC-RT substitution library once it
+ // exists). That will allow it to work out-of-process, and for all
+ // ORC tools (the problem isn't lli specific).
+ return 0;
+}
+
int runOrcJIT(const char *ProgName) {
// Start setting up the JIT environment.
@@ -867,6 +876,9 @@ int runOrcJIT(const char *ProgName) {
.setRelocationModel(codegen::getExplicitRelocModel())
.setCodeModel(codegen::getExplicitCodeModel());
+ // Link process symbols unless NoProcessSymbols is set.
+ Builder.setLinkProcessSymbolsByDefault(!NoProcessSymbols);
+
// FIXME: Setting a dummy call-through manager in non-lazy mode prevents the
// JIT builder to instantiate a default (which would fail with an error for
// unsupported architectures).
@@ -874,7 +886,8 @@ int runOrcJIT(const char *ProgName) {
auto ES = std::make_unique<orc::ExecutionSession>(
ExitOnErr(orc::SelfExecutorProcessControl::Create()));
Builder.setLazyCallthroughManager(
- std::make_unique<orc::LazyCallThroughManager>(*ES, 0, nullptr));
+ std::make_unique<orc::LazyCallThroughManager>(*ES, orc::ExecutorAddr(),
+ nullptr));
Builder.setExecutionSession(std::move(ES));
}
@@ -907,17 +920,15 @@ int runOrcJIT(const char *ProgName) {
// Set up LLJIT platform.
LLJITPlatform P = Platform;
- if (P == LLJITPlatform::DetectHost) {
- if (JITLinker == JITLinkerKind::JITLink && !OrcRuntime.empty() &&
- (TT->isOSBinFormatMachO() || TT->isOSBinFormatELF()))
- P = LLJITPlatform::ORC;
- else
- P = LLJITPlatform::GenericIR;
- }
+ if (P == LLJITPlatform::Auto)
+ P = OrcRuntime.empty() ? LLJITPlatform::GenericIR
+ : LLJITPlatform::ExecutorNative;
+
switch (P) {
- case LLJITPlatform::ORC:
- Builder.setPlatformSetUp(orc::setUpOrcPlatform);
+ case LLJITPlatform::ExecutorNative: {
+ Builder.setPlatformSetUp(orc::ExecutorNativePlatform(OrcRuntime));
break;
+ }
case LLJITPlatform::GenericIR:
// Nothing to do: LLJITBuilder will use this by default.
break;
@@ -936,22 +947,35 @@ int runOrcJIT(const char *ProgName) {
Builder.setObjectLinkingLayerCreator([&EPC, &P](orc::ExecutionSession &ES,
const Triple &TT) {
auto L = std::make_unique<orc::ObjectLinkingLayer>(ES, EPC->getMemMgr());
- if (P != LLJITPlatform::ORC) {
+ if (P != LLJITPlatform::ExecutorNative)
L->addPlugin(std::make_unique<orc::EHFrameRegistrationPlugin>(
ES, ExitOnErr(orc::EPCEHFrameRegistrar::Create(ES))));
- L->addPlugin(std::make_unique<orc::DebugObjectManagerPlugin>(
- ES, ExitOnErr(orc::createJITLoaderGDBRegistrar(ES))));
- }
return L;
});
}
+ // Enable debugging of JIT'd code (only works on JITLink for ELF and MachO).
+ Builder.setEnableDebuggerSupport(true);
+
auto J = ExitOnErr(Builder.create());
auto *ObjLayer = &J->getObjLinkingLayer();
- if (auto *RTDyldObjLayer = dyn_cast<orc::RTDyldObjectLinkingLayer>(ObjLayer))
+ if (auto *RTDyldObjLayer = dyn_cast<orc::RTDyldObjectLinkingLayer>(ObjLayer)) {
RTDyldObjLayer->registerJITEventListener(
*JITEventListener::createGDBRegistrationListener());
+#if LLVM_USE_OPROFILE
+ RTDyldObjLayer->registerJITEventListener(
+ *JITEventListener::createOProfileJITEventListener());
+#endif
+#if LLVM_USE_INTEL_JITEVENTS
+ RTDyldObjLayer->registerJITEventListener(
+ *JITEventListener::createIntelJITEventListener());
+#endif
+#if LLVM_USE_PERF
+ RTDyldObjLayer->registerJITEventListener(
+ *JITEventListener::createPerfJITEventListener());
+#endif
+ }
if (PerModuleLazy)
J->setPartitionFunction(orc::CompileOnDemandLayer::compileWholeModule);
@@ -971,48 +995,22 @@ int runOrcJIT(const char *ProgName) {
return TSM;
});
- orc::MangleAndInterner Mangle(J->getExecutionSession(), J->getDataLayout());
-
- // Unless they've been explicitly disabled, make process symbols available to
- // JIT'd code.
- if (!NoProcessSymbols)
- J->getMainJITDylib().addGenerator(
- ExitOnErr(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
- J->getDataLayout().getGlobalPrefix(),
- [MainName = Mangle("main")](const orc::SymbolStringPtr &Name) {
- return Name != MainName;
- })));
-
- if (GenerateBuiltinFunctions.size() > 0)
+ if (GenerateBuiltinFunctions.size() > 0) {
+ // Add LLI builtins.
+ orc::MangleAndInterner Mangle(J->getExecutionSession(), J->getDataLayout());
J->getMainJITDylib().addGenerator(
std::make_unique<LLIBuiltinFunctionGenerator>(GenerateBuiltinFunctions,
Mangle));
-
- if (P == LLJITPlatform::ORC) {
- if (auto *OLL = llvm::dyn_cast<llvm::orc::ObjectLinkingLayer>(ObjLayer)) {
- auto &ES = J->getExecutionSession();
- if (TT->isOSBinFormatMachO()) {
- if (auto P = llvm::orc::MachOPlatform::Create(
- ES, *OLL, J->getMainJITDylib(), OrcRuntime.c_str()))
- ES.setPlatform(std::move(*P));
- else
- ExitOnErr(P.takeError());
- } else if (TT->isOSBinFormatELF()) {
- if (auto P = llvm::orc::ELFNixPlatform::Create(
- ES, *OLL, J->getMainJITDylib(), OrcRuntime.c_str()))
- ES.setPlatform(std::move(*P));
- else
- ExitOnErr(P.takeError());
- } else {
- errs() << "No ORC platform support\n";
- exit(1);
- }
- } else {
- errs() << "ORC platform requires JITLink\n";
- exit(1);
- }
}
+ // If this is a Mingw or Cygwin executor then we need to alias __main to
+ // orc_rt_int_void_return_0.
+ if (J->getTargetTriple().isOSCygMing())
+ ExitOnErr(J->getProcessSymbolsJITDylib()->define(
+ orc::absoluteSymbols({{J->mangleAndIntern("__main"),
+ {orc::ExecutorAddr::fromPtr(mingw_noop_main),
+ JITSymbolFlags::Exported}}})));
+
// Regular modules are greedy: They materialize as a whole and trigger
// materialization for all required symbols recursively. Lazy modules go
// through partitioning and they replace outgoing calls with reexport stubs
@@ -1060,8 +1058,7 @@ int runOrcJIT(const char *ProgName) {
assert(EAIdx != 0 && "ExtraArchive should have index > 0");
auto JDItr = std::prev(IdxToDylib.lower_bound(EAIdx));
auto &JD = *JDItr->second;
- JD.addGenerator(ExitOnErr(orc::StaticLibraryDefinitionGenerator::Load(
- J->getObjLinkingLayer(), EAItr->c_str(), *TT)));
+ ExitOnErr(J->linkStaticLibraryInto(JD, EAItr->c_str()));
}
}
@@ -1181,3 +1178,41 @@ Expected<std::unique_ptr<orc::ExecutorProcessControl>> launchRemote() {
llvm::orc::SimpleRemoteEPC::Setup(), PipeFD[1][0], PipeFD[0][1]);
#endif
}
+
+// For MinGW environments, manually export the __chkstk function from the lli
+// executable.
+//
+// Normally, this function is provided by compiler-rt builtins or libgcc.
+// It is named "_alloca" on i386, "___chkstk_ms" on x86_64, and "__chkstk" on
+// arm/aarch64. In MSVC configurations, it's named "__chkstk" in all
+// configurations.
+//
+// When Orc tries to resolve symbols at runtime, this succeeds in MSVC
+// configurations, somewhat by accident/luck; kernelbase.dll does export a
+// symbol named "__chkstk" which gets found by Orc, even if regular applications
+// never link against that function from that DLL (it's linked in statically
+// from a compiler support library).
+//
+// The MinGW specific symbol names aren't available in that DLL though.
+// Therefore, manually export the relevant symbol from lli, to let it be
+// found at runtime during tests.
+//
+// For real JIT uses, the real compiler support libraries should be linked
+// in, somehow; this is a workaround to let tests pass.
+//
+// TODO: Move this into libORC at some point, see
+// https://github.com/llvm/llvm-project/issues/56603.
+#ifdef __MINGW32__
+// This is a MinGW version of #pragma comment(linker, "...") that doesn't
+// require compiling with -fms-extensions.
+#if defined(__i386__)
+static __attribute__((section(".drectve"), used)) const char export_chkstk[] =
+ "-export:_alloca";
+#elif defined(__x86_64__)
+static __attribute__((section(".drectve"), used)) const char export_chkstk[] =
+ "-export:___chkstk_ms";
+#else
+static __attribute__((section(".drectve"), used)) const char export_chkstk[] =
+ "-export:__chkstk";
+#endif
+#endif
diff --git a/llvm/tools/llvm-ar/llvm-ar.cpp b/llvm/tools/llvm-ar/llvm-ar.cpp
index 12f3196a9844..d21650d146a9 100644
--- a/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -13,20 +13,11 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/Magic.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Object/Archive.h"
#include "llvm/Object/ArchiveWriter.h"
-#include "llvm/Object/COFFImportFile.h"
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Object/IRObjectFile.h"
-#include "llvm/Object/MachO.h"
-#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/SymbolicFile.h"
-#include "llvm/Object/TapiFile.h"
-#include "llvm/Object/Wasm.h"
-#include "llvm/Object/XCOFFObjectFile.h"
#include "llvm/Support/Chrono.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ConvertUTF.h"
@@ -34,8 +25,8 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadic.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/LLVMDriver.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
@@ -45,6 +36,8 @@
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/ToolDrivers/llvm-dlltool/DlltoolDriver.h"
#include "llvm/ToolDrivers/llvm-lib/LibDriver.h"
@@ -646,31 +639,12 @@ static bool shouldCreateArchive(ArchiveOperation Op) {
llvm_unreachable("Missing entry in covered switch.");
}
-static bool is64BitSymbolicFile(SymbolicFile &Obj) {
- if (auto *IRObj = dyn_cast<IRObjectFile>(&Obj))
- return Triple(IRObj->getTargetTriple()).isArch64Bit();
- if (isa<COFFObjectFile>(Obj) || isa<COFFImportFile>(Obj))
- return false;
- if (XCOFFObjectFile *XCOFFObj = dyn_cast<XCOFFObjectFile>(&Obj))
- return XCOFFObj->is64Bit();
- if (isa<WasmObjectFile>(Obj))
- return false;
- if (TapiFile *Tapi = dyn_cast<TapiFile>(&Obj))
- return Tapi->is64Bit();
- if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
- return MachO->is64Bit();
- if (ELFObjectFileBase *ElfO = dyn_cast<ELFObjectFileBase>(&Obj))
- return ElfO->getBytesInAddress() == 8;
-
- fail("unsupported file format");
-}
-
static bool isValidInBitMode(Binary &Bin) {
if (BitMode == BitModeTy::Bit32_64 || BitMode == BitModeTy::Any)
return true;
if (SymbolicFile *SymFile = dyn_cast<SymbolicFile>(&Bin)) {
- bool Is64Bit = is64BitSymbolicFile(*SymFile);
+ bool Is64Bit = SymFile->is64Bit();
if ((Is64Bit && (BitMode == BitModeTy::Bit32)) ||
(!Is64Bit && (BitMode == BitModeTy::Bit64)))
return false;
@@ -1452,7 +1426,7 @@ static int ranlib_main(int argc, char **argv) {
return 0;
}
-int llvm_ar_main(int argc, char **argv) {
+int llvm_ar_main(int argc, char **argv, const llvm::ToolContext &) {
InitLLVM X(argc, argv);
ToolName = argv[0];
diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp
index 2b2eda5d8587..02448dcd31a1 100644
--- a/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -22,7 +22,10 @@
#include "SourceCoverageView.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/Debuginfod/BuildIDFetcher.h"
+#include "llvm/Debuginfod/Debuginfod.h"
+#include "llvm/Debuginfod/HTTPClient.h"
+#include "llvm/Object/BuildID.h"
#include "llvm/ProfileData/Coverage/CoverageMapping.h"
#include "llvm/ProfileData/InstrProfReader.h"
#include "llvm/Support/CommandLine.h"
@@ -38,6 +41,7 @@
#include "llvm/Support/Threading.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/TargetParser/Triple.h"
#include <functional>
#include <map>
@@ -179,6 +183,10 @@ private:
/// Allowlist from -name-allowlist to be used for filtering.
std::unique_ptr<SpecialCaseList> NameAllowlist;
+
+ std::unique_ptr<object::BuildIDFetcher> BIDFetcher;
+
+ bool CheckBinaryIDs;
};
}
@@ -433,9 +441,10 @@ std::unique_ptr<CoverageMapping> CodeCoverageTool::load() {
if (modifiedTimeGT(ObjectFilename, PGOFilename))
warning("profile data may be out of date - object is newer",
ObjectFilename);
- auto CoverageOrErr =
- CoverageMapping::load(ObjectFilenames, PGOFilename, CoverageArches,
- ViewOpts.CompilationDirectory);
+ auto FS = vfs::getRealFileSystem();
+ auto CoverageOrErr = CoverageMapping::load(
+ ObjectFilenames, PGOFilename, *FS, CoverageArches,
+ ViewOpts.CompilationDirectory, BIDFetcher.get(), CheckBinaryIDs);
if (Error E = CoverageOrErr.takeError()) {
error("Failed to load coverage: " + toString(std::move(E)));
return nullptr;
@@ -629,7 +638,7 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
"dump-collected-objects", cl::Optional, cl::Hidden,
cl::desc("Show the collected coverage object files"));
- cl::list<std::string> InputSourceFiles(cl::Positional,
+ cl::list<std::string> InputSourceFiles("sources", cl::Positional,
cl::desc("<Source files>"));
cl::opt<bool> DebugDumpCollectedPaths(
@@ -647,6 +656,14 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
cl::opt<bool> DebugDump("dump", cl::Optional,
cl::desc("Show internal debug dump"));
+ cl::list<std::string> DebugFileDirectory(
+ "debug-file-directory",
+ cl::desc("Directories to search for object files by build ID"));
+ cl::opt<bool> Debuginfod(
+ "debuginfod", cl::ZeroOrMore,
+ cl::desc("Use debuginfod to look up object files from profile"),
+ cl::init(canUseDebuginfod()));
+
cl::opt<CoverageViewOptions::OutputFormat> Format(
"format", cl::desc("Output format for line-based coverage reports"),
cl::values(clEnumValN(CoverageViewOptions::OutputFormat::Text, "text",
@@ -746,15 +763,26 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
"compilation-dir", cl::init(""),
cl::desc("Directory used as a base for relative coverage mapping paths"));
+ cl::opt<bool> CheckBinaryIDs(
+ "check-binary-ids", cl::desc("Fail if an object couldn't be found for a "
+ "binary ID in the profile"));
+
auto commandLineParser = [&, this](int argc, const char **argv) -> int {
cl::ParseCommandLineOptions(argc, argv, "LLVM code coverage tool\n");
ViewOpts.Debug = DebugDump;
+ if (Debuginfod) {
+ HTTPClient::initialize();
+ BIDFetcher = std::make_unique<DebuginfodFetcher>(DebugFileDirectory);
+ } else {
+ BIDFetcher = std::make_unique<object::BuildIDFetcher>(DebugFileDirectory);
+ }
+ this->CheckBinaryIDs = CheckBinaryIDs;
if (!CovFilename.empty())
ObjectFilenames.emplace_back(CovFilename);
for (const std::string &Filename : CovFilenames)
ObjectFilenames.emplace_back(Filename);
- if (ObjectFilenames.empty()) {
+ if (ObjectFilenames.empty() && !Debuginfod && DebugFileDirectory.empty()) {
errs() << "No filenames specified!\n";
::exit(1);
}
@@ -867,10 +895,8 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
}
CoverageArches.emplace_back(Arch);
}
- if (CoverageArches.size() == 1)
- CoverageArches.insert(CoverageArches.end(), ObjectFilenames.size() - 1,
- CoverageArches[0]);
- if (CoverageArches.size() != ObjectFilenames.size()) {
+ if (CoverageArches.size() != 1 &&
+ CoverageArches.size() != ObjectFilenames.size()) {
error("Number of architectures doesn't match the number of objects");
return 1;
}
diff --git a/llvm/tools/llvm-cov/CoverageReport.cpp b/llvm/tools/llvm-cov/CoverageReport.cpp
index be042aa9e027..cb0b184e103c 100644
--- a/llvm/tools/llvm-cov/CoverageReport.cpp
+++ b/llvm/tools/llvm-cov/CoverageReport.cpp
@@ -13,6 +13,7 @@
#include "CoverageReport.h"
#include "RenderingSupport.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ThreadPool.h"
diff --git a/llvm/tools/llvm-cov/llvm-cov.cpp b/llvm/tools/llvm-cov/llvm-cov.cpp
index 45de2afb0855..5ada55789b24 100644
--- a/llvm/tools/llvm-cov/llvm-cov.cpp
+++ b/llvm/tools/llvm-cov/llvm-cov.cpp
@@ -59,7 +59,7 @@ int main(int argc, const char **argv) {
InitLLVM X(argc, argv);
// If argv[0] is or ends with 'gcov', always be gcov compatible
- if (sys::path::stem(argv[0]).endswith_insensitive("gcov"))
+ if (sys::path::stem(argv[0]).ends_with_insensitive("gcov"))
return gcovMain(argc, argv);
// Check if we are invoking a specific tool command.
diff --git a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
index 06f0a25c0dff..2bbd57f14d99 100644
--- a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
+++ b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
@@ -7,16 +7,18 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Demangle/Demangle.h"
+#include "llvm/Demangle/StringViewExtras.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/Option.h"
#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/LLVMDriver.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/Triple.h"
#include <cstdlib>
#include <iostream>
@@ -70,10 +72,11 @@ static void error(const Twine &Message) {
}
static std::string demangle(const std::string &Mangled) {
- const char *DecoratedStr = Mangled.c_str();
+ using llvm::itanium_demangle::starts_with;
+ std::string_view DecoratedStr = Mangled;
if (StripUnderscore)
if (DecoratedStr[0] == '_')
- ++DecoratedStr;
+ DecoratedStr.remove_prefix(1);
std::string Result;
if (nonMicrosoftDemangle(DecoratedStr, Result))
@@ -83,11 +86,11 @@ static std::string demangle(const std::string &Mangled) {
char *Undecorated = nullptr;
if (Types)
- Undecorated = itaniumDemangle(DecoratedStr, nullptr, nullptr, nullptr);
+ Undecorated = itaniumDemangle(DecoratedStr);
- if (!Undecorated && strncmp(DecoratedStr, "__imp_", 6) == 0) {
+ if (!Undecorated && starts_with(DecoratedStr, "__imp_")) {
Prefix = "import thunk for ";
- Undecorated = itaniumDemangle(DecoratedStr + 6, nullptr, nullptr, nullptr);
+ Undecorated = itaniumDemangle(DecoratedStr.substr(6));
}
Result = Undecorated ? Prefix + Undecorated : Mangled;
@@ -145,7 +148,7 @@ static void demangleLine(llvm::raw_ostream &OS, StringRef Mangled, bool Split) {
OS.flush();
}
-int llvm_cxxfilt_main(int argc, char **argv) {
+int llvm_cxxfilt_main(int argc, char **argv, const llvm::ToolContext &) {
InitLLVM X(argc, argv);
BumpPtrAllocator A;
StringSaver Saver(A);
diff --git a/llvm/tools/llvm-cxxmap/llvm-cxxmap.cpp b/llvm/tools/llvm-cxxmap/llvm-cxxmap.cpp
index 1e18e379f23c..6a5646965df2 100644
--- a/llvm/tools/llvm-cxxmap/llvm-cxxmap.cpp
+++ b/llvm/tools/llvm-cxxmap/llvm-cxxmap.cpp
@@ -14,12 +14,12 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ProfileData/SymbolRemappingReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/LineIterator.h"
#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SymbolRemappingReader.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
diff --git a/llvm/tools/llvm-debuginfo-analyzer/README.txt b/llvm/tools/llvm-debuginfo-analyzer/README.txt
new file mode 100644
index 000000000000..e6c20db7cd71
--- /dev/null
+++ b/llvm/tools/llvm-debuginfo-analyzer/README.txt
@@ -0,0 +1,224 @@
+//===- llvm/tools/llvm-debuginfo-analyzer/README.txt ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains notes collected during the development, review and test.
+// It describes limitations, know issues and future work.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Remove the use of macros in 'LVReader.h' that describe the bumpallocators.
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D137933#inline-1389904
+
+Use a standard (or LLVM) map with typeinfo (would need a specialization
+to expose equality and hasher) for the allocators and the creation
+functions could be a function template.
+
+//===----------------------------------------------------------------------===//
+// Use a lit test instead of a unit test for the logical readers.
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D125783#inline-1324376
+
+As the DebugInfoLogicalView library is sufficiently exposed via the
+llvm-debuginfo-analyzer tool, follow the LLVM general approach and
+use LIT tests to validate the logical readers.
+
+Convert the unitests:
+ llvm-project/llvm/unittests/DebugInfo/LogicalView/CodeViewReaderTest.cpp
+ llvm-project/llvm/unittests/DebugInfo/LogicalView/ELFReaderTest.cpp
+
+into LIT tests:
+ llvm-project/llvm/test/DebugInfo/LogicalView/CodeViewReader.test
+ llvm-project/llvm/test/DebugInfo/LogicalView/ELFReader.test
+
+//===----------------------------------------------------------------------===//
+// Eliminate calls to 'getInputFileDirectory()' in the unit tests.
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D125783#inline-1324359
+
+Rewrite the unittests 'LFReaderTest' and 'CodeViewReaderTest'to eliminate
+the call:
+
+ getInputFileDirectory()
+
+as use of that call is discouraged.
+
+See: Use a lit test instead of a unit test for the logical readers.
+
+//===----------------------------------------------------------------------===//
+// Fix mismatch between %d/%x format strings and uint64_t type.
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D137400
+https://github.com/llvm/llvm-project/issues/58758
+
+Incorrect printing of uint64_t on 32-bit platforms.
+Add the PRIx64 specifier to the printing code (format()).
+
+//===----------------------------------------------------------------------===//
+// Remove 'LVScope::Children' container.
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D137933#inline-1373902
+
+Use a chaining iterator over the other containers rather than keep a
+separate container 'Children' that mirrors their contents.
+
+//===----------------------------------------------------------------------===//
+// Use TableGen for command line options.
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D125777#inline-1291801
+
+The current trend is to use TableGen for command-line options in tools.
+Change command line options to use tablegen as many other LLVM tools.
+
+//===----------------------------------------------------------------------===//
+// LVDoubleMap to return optional<ValueType> instead of null pointer.
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D125783#inline-1294164
+
+The more idiomatic LLVM way to handle this would be to have 'find '
+return Optional<ValueType>.
+
+//===----------------------------------------------------------------------===//
+// Pass references instead of pointers (Comparison functions).
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D125782#inline-1293920
+
+In the comparison functions, pass references instead of pointers (when
+pointers cannot be null).
+
+//===----------------------------------------------------------------------===//
+// Use StringMap where possible.
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D125783#inline-1294211
+
+LLVM has a StringMap class that is advertised as more efficient than
+std::map<std::string, ValueType>. Mainly it does fewer allocations
+because the key is not a std::string.
+
+Replace the use of std::map<std::string, ValueType> with String Map.
+One specific case is the LVSymbolNames definitions.
+
+//===----------------------------------------------------------------------===//
+// Calculate unique offset for CodeView elements.
+//===----------------------------------------------------------------------===//
+In order to have the same logical functionality as the ELF Reader, such
+as:
+
+- find scopes contribution to debug info
+- sort by its physical location
+
+The logical elements must have an unique offset (similar like the DWARF
+DIE offset).
+
+//===----------------------------------------------------------------------===//
+// Move 'initializeFileAndStringTables' to the COFF Library.
+//===----------------------------------------------------------------------===//
+There is some code in the CodeView reader that was extracted/adapted
+from 'tools/llvm-readobj/COFFDumper.cpp' that can be moved to the COFF
+library.
+
+We had a similar case with code shared with llvm-pdbutil that was moved
+to the PDB library: https://reviews.llvm.org/D122226
+
+//===----------------------------------------------------------------------===//
+// Move 'getSymbolKindName'/'formatRegisterId' to the CodeView Library.
+//===----------------------------------------------------------------------===//
+There is some code in the CodeView reader that was extracted/adapted
+from 'lib/DebugInfo/CodeView/SymbolDumper.cpp' that can be used.
+
+//===----------------------------------------------------------------------===//
+// Use of std::unordered_set instead of std::set.
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D125784#inline-1221421
+
+Replace the std::set usage for DeducedScopes, UnresolvedScopes and
+IdentifiedNamespaces with std::unordered_set and get the benefit
+of the O(1) while inserting/searching, as the order is not important.
+
+//===----------------------------------------------------------------------===//
+// Optimize 'LVNamespaceDeduction::find' funtion.
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D125784#inline-1296195
+
+Optimize the 'find' method to use the proposed code:
+
+ LVStringRefs::iterator Iter = std::find_if(Components.begin(), Components.end(),
+ [](StringRef Name) {
+ return IdentifiedNamespaces.find(Name) == IdentifiedNamespaces.end();
+ });
+ LVStringRefs::size_type FirstNonNamespace = std::distance(Components.begin(), Iter);
+
+//===----------------------------------------------------------------------===//
+// Move all the printing support to a common module.
+//===----------------------------------------------------------------------===//
+Factor out printing functionality from the logical elements into a
+common module.
+
+//===----------------------------------------------------------------------===//
+// Refactor 'LVBinaryReader::processLines'.
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D125783#inline-1246155
+https://reviews.llvm.org/D137156
+
+During the traversal of the debug information sections, we created the
+logical lines representing the disassembled instructions from the text
+section and the logical lines representing the line records from the
+debug line section. Using the ranges associated with the logical scopes,
+we will allocate those logical lines to their logical scopes.
+
+Consider the case when any of those lines become orphans, causing
+incorrect scope parent for disassembly or line records.
+
+//===----------------------------------------------------------------------===//
+// Add support for '-ffunction-sections'.
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D125783#inline-1295012
+
+Only linked executables are handled. It does not support relocatable
+files compiled with -ffunction-sections.
+
+//===----------------------------------------------------------------------===//
+// Add support for DWARF v5 .debug_names section.
+// Add support for CodeView public symbols stream.
+//===----------------------------------------------------------------------===//
+https://reviews.llvm.org/D125783#inline-1294142
+
+The ELF and CodeView readers use the public names information to create
+the instructions (LVLineAssembler). Instead of relying on DWARF section
+names (.debug_pubnames, .debug_names) and CodeView public symbol stream
+(S_PUB32), the readers collects the needed information while processing
+the debug information.
+
+If the object file supports the above section names and stream, use them
+to create the public names.
+
+//===----------------------------------------------------------------------===//
+// Add support for some extra DWARF locations.
+//===----------------------------------------------------------------------===//
+The following DWARF debug location operands are not supported:
+
+- DW_OP_const_type
+- DW_OP_entry_value
+- DW_OP_implicit_value
+
+//===----------------------------------------------------------------------===//
+// Add support for additional binary formats.
+//===----------------------------------------------------------------------===//
+- WebAssembly (Wasm).
+ https://github.com/llvm/llvm-project/issues/57040#issuecomment-1211336680
+
+- Extended COFF (XCOFF)
+
+//===----------------------------------------------------------------------===//
+// Add support for JSON or YAML.
+//===----------------------------------------------------------------------===//
+The logical view uses its own and non-standard free form text when
+displaying information on logical elements.
+
+//===----------------------------------------------------------------------===//
diff --git a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index 27330a571bbe..156e10c84ddd 100644
--- a/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -11,9 +11,10 @@
//===----------------------------------------------------------------------===//
#include "llvm-dwarfdump.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
@@ -26,6 +27,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
@@ -34,6 +36,7 @@
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <cstdlib>
using namespace llvm;
@@ -171,6 +174,10 @@ static list<std::string>
value_desc("name"), cat(DwarfDumpCategory));
static alias FindAlias("f", desc("Alias for --find."), aliasopt(Find),
cl::NotHidden);
+static opt<bool> FindAllApple(
+ "find-all-apple",
+ desc("Print every debug information entry in the accelerator tables."),
+ cat(DwarfDumpCategory));
static opt<bool> IgnoreCase("ignore-case",
desc("Ignore case distinctions when using --name."),
value_desc("i"), cat(DwarfDumpCategory));
@@ -453,6 +460,37 @@ static void filterByAccelName(
Die.dump(OS, 0, DumpOpts);
}
+/// Print all DIEs in apple accelerator tables
+static void findAllApple(
+ DWARFContext &DICtx, raw_ostream &OS,
+ std::function<StringRef(uint64_t RegNum, bool IsEH)> GetNameForDWARFReg) {
+ MapVector<StringRef, llvm::SmallSet<DWARFDie, 2>> NameToDies;
+
+ auto PushDIEs = [&](const AppleAcceleratorTable &Accel) {
+ for (const auto &Entry : Accel.entries()) {
+ if (std::optional<uint64_t> Off = Entry.BaseEntry.getDIESectionOffset()) {
+ std::optional<StringRef> MaybeName = Entry.readName();
+ DWARFDie Die = DICtx.getDIEForOffset(*Off);
+ if (Die && MaybeName)
+ NameToDies[*MaybeName].insert(Die);
+ }
+ }
+ };
+
+ PushDIEs(DICtx.getAppleNames());
+ PushDIEs(DICtx.getAppleNamespaces());
+ PushDIEs(DICtx.getAppleTypes());
+
+ DIDumpOptions DumpOpts = getDumpOpts(DICtx);
+ DumpOpts.GetNameForDWARFReg = GetNameForDWARFReg;
+ for (const auto &[Name, Dies] : NameToDies) {
+ OS << llvm::formatv("\nApple accelerator entries with name = \"{0}\":\n",
+ Name);
+ for (DWARFDie Die : Dies)
+ Die.dump(OS, 0, DumpOpts);
+ }
+}
+
/// Handle the --lookup option and dump the DIEs and line info for the given
/// address.
/// TODO: specified Address for --lookup option could relate for several
@@ -625,6 +663,12 @@ static bool dumpObjectFile(ObjectFile &Obj, DWARFContext &DICtx,
return true;
}
+ // Handle the --find-all-apple option and lower it to --debug-info=<offset>.
+ if (FindAllApple) {
+ findAllApple(DICtx, OS, GetRegName);
+ return true;
+ }
+
// Dump the complete DWARF structure.
auto DumpOpts = getDumpOpts(DICtx);
DumpOpts.GetNameForDWARFReg = GetRegName;
@@ -782,7 +826,7 @@ int main(int argc, char **argv) {
// Unless dumping a specific DIE, default to --show-children.
if (!ShowChildren && !Verify && !OffsetRequested && Name.empty() &&
- Find.empty())
+ Find.empty() && !FindAllApple)
ShowChildren = true;
// Defaults to a.out if no filenames specified.
diff --git a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp
index ef222f8cc1a4..47a23e8448cc 100644
--- a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp
+++ b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/DWARFLinker/DWARFLinker.h"
#include "llvm/DWARFLinker/DWARFStreamer.h"
+#include "llvm/DWARFLinkerParallel/DWARFLinker.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
#include "llvm/Object/ObjectFile.h"
@@ -37,11 +38,12 @@ namespace dwarfutil {
// exec: [LowPC, HighPC] is not inside address ranges of .text sections
//
// universal: maxpc and bfd
-class ObjFileAddressMap : public AddressesMap {
+template <typename AddressMapBase>
+class ObjFileAddressMap : public AddressMapBase {
public:
ObjFileAddressMap(DWARFContext &Context, const Options &Options,
object::ObjectFile &ObjFile)
- : Opts(Options), Context(Context) {
+ : Opts(Options) {
// Remember addresses of existing text sections.
for (const object::SectionRef &Sect : ObjFile.sections()) {
if (!Sect.isText())
@@ -57,21 +59,29 @@ public:
for (std::unique_ptr<DWARFUnit> &CU : Context.compile_units()) {
Expected<llvm::DWARFAddressRangesVector> ARanges =
CU->getUnitDIE().getAddressRanges();
- if (ARanges) {
- for (auto &Range : *ARanges) {
- if (!isDeadAddressRange(Range.LowPC, Range.HighPC, CU->getVersion(),
- Options.Tombstone, CU->getAddressByteSize()))
- DWARFAddressRanges.insert({Range.LowPC, Range.HighPC}, 0);
+ if (!ARanges) {
+ llvm::consumeError(ARanges.takeError());
+ continue;
+ }
+
+ for (auto &Range : *ARanges) {
+ if (!isDeadAddressRange(Range.LowPC, Range.HighPC, CU->getVersion(),
+ Options.Tombstone, CU->getAddressByteSize())) {
+ HasValidAddressRanges = true;
+ break;
}
}
+
+ if (HasValidAddressRanges)
+ break;
}
}
// should be renamed into has valid address ranges
- bool hasValidRelocs() override { return !DWARFAddressRanges.empty(); }
+ bool hasValidRelocs() override { return HasValidAddressRanges; }
- bool isLiveSubprogram(const DWARFDie &DIE,
- CompileUnit::DIEInfo &Info) override {
+ std::optional<int64_t>
+ getSubprogramRelocAdjustment(const DWARFDie &DIE) override {
assert((DIE.getTag() == dwarf::DW_TAG_subprogram ||
DIE.getTag() == dwarf::DW_TAG_label) &&
"Wrong type of input die");
@@ -80,53 +90,44 @@ public:
dwarf::toAddress(DIE.find(dwarf::DW_AT_low_pc))) {
if (!isDeadAddress(*LowPC, DIE.getDwarfUnit()->getVersion(),
Opts.Tombstone,
- DIE.getDwarfUnit()->getAddressByteSize())) {
- Info.AddrAdjust = 0;
- Info.InDebugMap = true;
- return true;
- }
+ DIE.getDwarfUnit()->getAddressByteSize()))
+ // Relocation value for the linked binary is 0.
+ return 0;
}
- return false;
+ return std::nullopt;
}
- bool isLiveVariable(const DWARFDie &DIE,
- CompileUnit::DIEInfo &Info) override {
- assert((DIE.getTag() == dwarf::DW_TAG_variable ||
- DIE.getTag() == dwarf::DW_TAG_constant) &&
- "Wrong type of input die");
-
- if (Expected<DWARFLocationExpressionsVector> Loc =
- DIE.getLocations(dwarf::DW_AT_location)) {
- DWARFUnit *U = DIE.getDwarfUnit();
- for (const auto &Entry : *Loc) {
- DataExtractor Data(toStringRef(Entry.Expr),
- U->getContext().isLittleEndian(), 0);
- DWARFExpression Expression(Data, U->getAddressByteSize(),
- U->getFormParams().Format);
- bool HasLiveAddresses =
- any_of(Expression, [&](const DWARFExpression::Operation &Op) {
- // TODO: add handling of dwarf::DW_OP_addrx
- return !Op.isError() &&
- (Op.getCode() == dwarf::DW_OP_addr &&
- !isDeadAddress(Op.getRawOperand(0), U->getVersion(),
- Opts.Tombstone,
- DIE.getDwarfUnit()->getAddressByteSize()));
- });
-
- if (HasLiveAddresses) {
- Info.AddrAdjust = 0;
- Info.InDebugMap = true;
- return true;
- }
+ std::optional<int64_t> getExprOpAddressRelocAdjustment(
+ DWARFUnit &U, const DWARFExpression::Operation &Op, uint64_t StartOffset,
+ uint64_t EndOffset) override {
+ switch (Op.getCode()) {
+ default: {
+ assert(false && "Specified operation does not have address operand");
+ } break;
+ case dwarf::DW_OP_const4u:
+ case dwarf::DW_OP_const8u:
+ case dwarf::DW_OP_const4s:
+ case dwarf::DW_OP_const8s:
+ case dwarf::DW_OP_addr: {
+ if (!isDeadAddress(Op.getRawOperand(0), U.getVersion(), Opts.Tombstone,
+ U.getAddressByteSize()))
+ // Relocation value for the linked binary is 0.
+ return 0;
+ } break;
+ case dwarf::DW_OP_constx:
+ case dwarf::DW_OP_addrx: {
+ if (std::optional<object::SectionedAddress> Address =
+ U.getAddrOffsetSectionItem(Op.getRawOperand(0))) {
+ if (!isDeadAddress(Address->Address, U.getVersion(), Opts.Tombstone,
+ U.getAddressByteSize()))
+ // Relocation value for the linked binary is 0.
+ return 0;
}
- } else {
- // FIXME: missing DW_AT_location is OK here, but other errors should be
- // reported to the user.
- consumeError(Loc.takeError());
+ } break;
}
- return false;
+ return std::nullopt;
}
bool applyValidRelocs(MutableArrayRef<char>, uint64_t, bool) override {
@@ -134,33 +135,7 @@ public:
return false;
}
- RangesTy &getValidAddressRanges() override { return DWARFAddressRanges; };
-
- void clear() override { DWARFAddressRanges.clear(); }
-
- llvm::Expected<uint64_t> relocateIndexedAddr(uint64_t StartOffset,
- uint64_t EndOffset) override {
- // No relocations in linked binary. Return just address value.
-
- const char *AddrPtr =
- Context.getDWARFObj().getAddrSection().Data.data() + StartOffset;
- support::endianness Endianess =
- Context.getDWARFObj().isLittleEndian() ? support::little : support::big;
-
- assert(EndOffset > StartOffset);
- switch (EndOffset - StartOffset) {
- case 1:
- return *AddrPtr;
- case 2:
- return support::endian::read16(AddrPtr, Endianess);
- case 4:
- return support::endian::read32(AddrPtr, Endianess);
- case 8:
- return support::endian::read64(AddrPtr, Endianess);
- }
-
- llvm_unreachable("relocateIndexedAddr unhandled case!");
- }
+ void clear() override {}
protected:
// returns true if specified address range is inside address ranges
@@ -228,10 +203,9 @@ protected:
}
private:
- RangesTy DWARFAddressRanges;
AddressRanges TextAddressRanges;
const Options &Opts;
- DWARFContext &Context;
+ bool HasValidAddressRanges = false;
};
static bool knownByDWARFUtil(StringRef SecName) {
@@ -258,12 +232,13 @@ static bool knownByDWARFUtil(StringRef SecName) {
.Default(false);
}
-static std::optional<DwarfLinkerAccelTableKind>
+template <typename AccelTableKind>
+static std::optional<AccelTableKind>
getAcceleratorTableKind(StringRef SecName) {
- return llvm::StringSwitch<std::optional<DwarfLinkerAccelTableKind>>(SecName)
- .Case(".debug_pubnames", DwarfLinkerAccelTableKind::Pub)
- .Case(".debug_pubtypes", DwarfLinkerAccelTableKind::Pub)
- .Case(".debug_names", DwarfLinkerAccelTableKind::DebugNames)
+ return llvm::StringSwitch<std::optional<AccelTableKind>>(SecName)
+ .Case(".debug_pubnames", AccelTableKind::Pub)
+ .Case(".debug_pubtypes", AccelTableKind::Pub)
+ .Case(".debug_names", AccelTableKind::DebugNames)
.Default(std::nullopt);
}
@@ -309,9 +284,9 @@ static std::string getMessageForDeletedAcceleratorTables(
return Message;
}
-Error linkDebugInfo(object::ObjectFile &File, const Options &Options,
- raw_pwrite_stream &OutStream) {
-
+template <typename Linker, typename OutDwarfFile, typename AddressMapBase>
+Error linkDebugInfoImpl(object::ObjectFile &File, const Options &Options,
+ raw_pwrite_stream &OutStream) {
auto ReportWarn = [&](const Twine &Message, StringRef Context,
const DWARFDie *Die) {
warning(Message, Context);
@@ -331,39 +306,33 @@ Error linkDebugInfo(object::ObjectFile &File, const Options &Options,
WithColor::error(errs(), Context) << Message << '\n';
};
- // Create output streamer.
- DwarfStreamer OutStreamer(OutputFileType::Object, OutStream, nullptr,
- ReportWarn, ReportWarn);
+ // Create DWARF linker.
+ std::unique_ptr<Linker> DebugInfoLinker =
+ Linker::createLinker(ReportErr, ReportWarn);
+
Triple TargetTriple = File.makeTriple();
- if (!OutStreamer.init(TargetTriple, formatv("cannot create a stream for {0}",
- TargetTriple.getTriple())
- .str()))
- return createStringError(std::errc::invalid_argument, "");
+ if (Error Err = DebugInfoLinker->createEmitter(
+ TargetTriple, Linker::OutputFileType::Object, OutStream))
+ return Err;
- std::unique_ptr<DWARFContext> Context = DWARFContext::create(File);
+ DebugInfoLinker->setEstimatedObjfilesAmount(1);
+ DebugInfoLinker->setNumThreads(Options.NumThreads);
+ DebugInfoLinker->setNoODR(!Options.DoODRDeduplication);
+ DebugInfoLinker->setVerbosity(Options.Verbose);
+ DebugInfoLinker->setUpdateIndexTablesOnly(!Options.DoGarbageCollection);
- // Create DWARF linker.
- DWARFLinker DebugInfoLinker(&OutStreamer, DwarfLinkerClient::LLD);
-
- DebugInfoLinker.setEstimatedObjfilesAmount(1);
- DebugInfoLinker.setErrorHandler(ReportErr);
- DebugInfoLinker.setWarningHandler(ReportWarn);
- DebugInfoLinker.setNumThreads(Options.NumThreads);
- DebugInfoLinker.setNoODR(!Options.DoODRDeduplication);
- DebugInfoLinker.setVerbosity(Options.Verbose);
- DebugInfoLinker.setUpdate(!Options.DoGarbageCollection);
-
- std::vector<std::unique_ptr<DWARFFile>> ObjectsForLinking(1);
- std::vector<std::unique_ptr<AddressesMap>> AddresssMapForLinking(1);
+ std::vector<std::unique_ptr<OutDwarfFile>> ObjectsForLinking(1);
std::vector<std::string> EmptyWarnings;
// Add object files to the DWARFLinker.
- AddresssMapForLinking[0] =
- std::make_unique<ObjFileAddressMap>(*Context, Options, File);
+ std::unique_ptr<DWARFContext> Context = DWARFContext::create(File);
+ std::unique_ptr<ObjFileAddressMap<AddressMapBase>> AddressesMap(
+ std::make_unique<ObjFileAddressMap<AddressMapBase>>(*Context, Options,
+ File));
- ObjectsForLinking[0] = std::make_unique<DWARFFile>(
- File.getFileName(), &*Context, AddresssMapForLinking[0].get(),
- EmptyWarnings);
+ ObjectsForLinking[0] =
+ std::make_unique<OutDwarfFile>(File.getFileName(), std::move(Context),
+ std::move(AddressesMap), EmptyWarnings);
uint16_t MaxDWARFVersion = 0;
std::function<void(const DWARFUnit &Unit)> OnCUDieLoaded =
@@ -372,17 +341,17 @@ Error linkDebugInfo(object::ObjectFile &File, const Options &Options,
};
for (size_t I = 0; I < ObjectsForLinking.size(); I++)
- DebugInfoLinker.addObjectFile(*ObjectsForLinking[I], nullptr,
- OnCUDieLoaded);
+ DebugInfoLinker->addObjectFile(*ObjectsForLinking[I], nullptr,
+ OnCUDieLoaded);
// If we haven't seen any CUs, pick an arbitrary valid Dwarf version anyway.
if (MaxDWARFVersion == 0)
MaxDWARFVersion = 3;
- if (Error Err = DebugInfoLinker.setTargetDWARFVersion(MaxDWARFVersion))
+ if (Error Err = DebugInfoLinker->setTargetDWARFVersion(MaxDWARFVersion))
return Err;
- SmallVector<DwarfLinkerAccelTableKind> AccelTables;
+ SmallVector<typename Linker::AccelTableKind> AccelTables;
switch (Options.AccelTableKind) {
case DwarfUtilAccelKind::None:
@@ -390,60 +359,74 @@ Error linkDebugInfo(object::ObjectFile &File, const Options &Options,
break;
case DwarfUtilAccelKind::DWARF:
// use .debug_names for all DWARF versions.
- AccelTables.push_back(DwarfLinkerAccelTableKind::DebugNames);
+ AccelTables.push_back(Linker::AccelTableKind::DebugNames);
break;
}
// Add accelerator tables to DWARFLinker.
- for (DwarfLinkerAccelTableKind Table : AccelTables)
- DebugInfoLinker.addAccelTableKind(Table);
-
- SmallVector<StringRef> AccelTableNamesToReplace;
- SmallVector<StringRef> AccelTableNamesToDelete;
-
- // Unknown debug sections or non-requested accelerator sections would be
- // removed. Display warning for such sections.
- for (SectionName Sec : Context->getDWARFObj().getSectionNames()) {
- if (isDebugSection(Sec.Name)) {
- std::optional<DwarfLinkerAccelTableKind> SrcAccelTableKind =
- getAcceleratorTableKind(Sec.Name);
-
- if (SrcAccelTableKind) {
- assert(knownByDWARFUtil(Sec.Name));
-
- if (Options.AccelTableKind == DwarfUtilAccelKind::None)
- AccelTableNamesToDelete.push_back(Sec.Name);
- else if (std::find(AccelTables.begin(), AccelTables.end(),
- *SrcAccelTableKind) == AccelTables.end())
- AccelTableNamesToReplace.push_back(Sec.Name);
- } else if (!knownByDWARFUtil(Sec.Name)) {
- assert(!SrcAccelTableKind);
- warning(
- formatv("'{0}' is not currently supported: section will be skipped",
- Sec.Name),
- Options.InputFileName);
+ for (typename Linker::AccelTableKind Table : AccelTables)
+ DebugInfoLinker->addAccelTableKind(Table);
+
+ for (std::unique_ptr<OutDwarfFile> &CurFile : ObjectsForLinking) {
+ SmallVector<StringRef> AccelTableNamesToReplace;
+ SmallVector<StringRef> AccelTableNamesToDelete;
+
+ // Unknown debug sections or non-requested accelerator sections would be
+ // removed. Display warning for such sections.
+ for (SectionName Sec : CurFile->Dwarf->getDWARFObj().getSectionNames()) {
+ if (isDebugSection(Sec.Name)) {
+ std::optional<typename Linker::AccelTableKind> SrcAccelTableKind =
+ getAcceleratorTableKind<typename Linker::AccelTableKind>(Sec.Name);
+
+ if (SrcAccelTableKind) {
+ assert(knownByDWARFUtil(Sec.Name));
+
+ if (Options.AccelTableKind == DwarfUtilAccelKind::None)
+ AccelTableNamesToDelete.push_back(Sec.Name);
+ else if (!llvm::is_contained(AccelTables, *SrcAccelTableKind))
+ AccelTableNamesToReplace.push_back(Sec.Name);
+ } else if (!knownByDWARFUtil(Sec.Name)) {
+ assert(!SrcAccelTableKind);
+ warning(
+ formatv(
+ "'{0}' is not currently supported: section will be skipped",
+ Sec.Name),
+ Options.InputFileName);
+ }
}
}
- }
- // Display message for the replaced accelerator tables.
- if (!AccelTableNamesToReplace.empty())
- warning(getMessageForReplacedAcceleratorTables(AccelTableNamesToReplace,
- Options.AccelTableKind),
- Options.InputFileName);
+ // Display message for the replaced accelerator tables.
+ if (!AccelTableNamesToReplace.empty())
+ warning(getMessageForReplacedAcceleratorTables(AccelTableNamesToReplace,
+ Options.AccelTableKind),
+ Options.InputFileName);
- // Display message for the removed accelerator tables.
- if (!AccelTableNamesToDelete.empty())
- warning(getMessageForDeletedAcceleratorTables(AccelTableNamesToDelete),
- Options.InputFileName);
+ // Display message for the removed accelerator tables.
+ if (!AccelTableNamesToDelete.empty())
+ warning(getMessageForDeletedAcceleratorTables(AccelTableNamesToDelete),
+ Options.InputFileName);
+ }
// Link debug info.
- if (Error Err = DebugInfoLinker.link())
+ if (Error Err = DebugInfoLinker->link())
return Err;
- OutStreamer.finish();
+ DebugInfoLinker->getEmitter()->finish();
return Error::success();
}
+Error linkDebugInfo(object::ObjectFile &File, const Options &Options,
+ raw_pwrite_stream &OutStream) {
+ if (Options.UseLLVMDWARFLinker)
+ return linkDebugInfoImpl<dwarflinker_parallel::DWARFLinker,
+ dwarflinker_parallel::DWARFFile,
+ dwarflinker_parallel::AddressesMap>(File, Options,
+ OutStream);
+ else
+ return linkDebugInfoImpl<DWARFLinker, DWARFFile, AddressesMap>(
+ File, Options, OutStream);
+}
+
} // end of namespace dwarfutil
} // end of namespace llvm
diff --git a/llvm/tools/llvm-dwarfutil/Error.h b/llvm/tools/llvm-dwarfutil/Error.h
index 9ef288d4f657..b92c50ca5a45 100644
--- a/llvm/tools/llvm-dwarfutil/Error.h
+++ b/llvm/tools/llvm-dwarfutil/Error.h
@@ -12,12 +12,12 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
namespace llvm {
namespace dwarfutil {
diff --git a/llvm/tools/llvm-dwarfutil/Options.h b/llvm/tools/llvm-dwarfutil/Options.h
index 38fa2b9eda63..e97833bdd79e 100644
--- a/llvm/tools/llvm-dwarfutil/Options.h
+++ b/llvm/tools/llvm-dwarfutil/Options.h
@@ -40,6 +40,7 @@ struct Options {
bool Verbose = false;
int NumThreads = 0;
bool Verify = false;
+ bool UseLLVMDWARFLinker = false;
DwarfUtilAccelKind AccelTableKind = DwarfUtilAccelKind::None;
std::string getSeparateDebugFileName() const {
diff --git a/llvm/tools/llvm-dwarfutil/Options.td b/llvm/tools/llvm-dwarfutil/Options.td
index d4541188c0c2..26b9ac678b6a 100644
--- a/llvm/tools/llvm-dwarfutil/Options.td
+++ b/llvm/tools/llvm-dwarfutil/Options.td
@@ -20,6 +20,11 @@ def h : Flag<["-"], "h">,
Alias<help>,
HelpText<"Alias for --help">;
+def linker: Separate<["--", "-"], "linker">,
+ MetaVarName<"<DWARF linker type>">,
+ HelpText<"Specify the desired type of DWARF linker. Defaults to 'apple'">;
+def: Joined<["--", "-"], "linker=">, Alias<linker>;
+
defm odr_deduplication : BB<"odr-deduplication",
"Do ODR deduplication for debug types(default)",
"Don`t do ODR deduplication for debug types">;
diff --git a/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp b/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp
index 74b6104bc668..1c7627179795 100644
--- a/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp
+++ b/llvm/tools/llvm-dwarfutil/llvm-dwarfutil.cpp
@@ -123,6 +123,18 @@ static Error validateAndSetOptions(opt::InputArgList &Args, Options &Options) {
formatv("unknown tombstone value: '{0}'", S).str().c_str());
}
+ if (opt::Arg *LinkerKind = Args.getLastArg(OPT_linker)) {
+ StringRef S = LinkerKind->getValue();
+ if (S == "apple")
+ Options.UseLLVMDWARFLinker = false;
+ else if (S == "llvm")
+ Options.UseLLVMDWARFLinker = true;
+ else
+ return createStringError(
+ std::errc::invalid_argument,
+ formatv("unknown linker kind value: '{0}'", S).str().c_str());
+ }
+
if (opt::Arg *BuildAccelerator = Args.getLastArg(OPT_build_accelerator)) {
StringRef S = BuildAccelerator->getValue();
diff --git a/llvm/tools/llvm-dwp/Opts.td b/llvm/tools/llvm-dwp/Opts.td
new file mode 100644
index 000000000000..c01fa4a12cba
--- /dev/null
+++ b/llvm/tools/llvm-dwp/Opts.td
@@ -0,0 +1,13 @@
+include "llvm/Option/OptParser.td"
+
+class F<string name, string help> : Flag<["-", "--"], name>, HelpText<help>;
+class S<string name, string help> : Separate<["-", "--"], name>, HelpText<help>;
+
+def help : F<"help", "Display this help">;
+def : F<"h", "Alias for --help">, Alias<help>;
+def version : F<"version", "Display the version of this program">;
+
+def execFileNames : S<"e", "Specify the executable/library files to get the list of *.dwo from.">, MetaVarName<"<filename>">;
+def outputFileName : S<"o", "Specify the output file.">, MetaVarName<"<filename>">;
+def continueOnCuIndexOverflow: F<"continue-on-cu-index-overflow", "This turns an error when offset for .debug_*.dwo sections "
+ "overfolws into a warning.">, MetaVarName<"<filename>">;
diff --git a/llvm/tools/llvm-dwp/llvm-dwp.cpp b/llvm/tools/llvm-dwp/llvm-dwp.cpp
index 0a2c1c1ccc02..350a37345e2c 100644
--- a/llvm/tools/llvm-dwp/llvm-dwp.cpp
+++ b/llvm/tools/llvm-dwp/llvm-dwp.cpp
@@ -23,6 +23,8 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptionsCommandFlags.h"
#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Option/ArgList.h"
+#include "llvm/Option/Option.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
@@ -36,20 +38,46 @@ using namespace llvm::object;
static mc::RegisterMCTargetOptionsFlags MCTargetOptionsFlags;
-cl::OptionCategory DwpCategory("Specific Options");
-static cl::list<std::string>
- InputFiles(cl::Positional, cl::desc("<input files>"), cl::cat(DwpCategory));
+// Command-line option boilerplate.
+namespace {
+enum ID {
+ OPT_INVALID = 0, // This is not an option ID.
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ OPT_##ID,
+#include "Opts.inc"
+#undef OPTION
+};
-static cl::list<std::string> ExecFilenames(
- "e",
- cl::desc(
- "Specify the executable/library files to get the list of *.dwo from"),
- cl::value_desc("filename"), cl::cat(DwpCategory));
+#define PREFIX(NAME, VALUE) \
+ static constexpr StringLiteral NAME##_init[] = VALUE; \
+ static constexpr ArrayRef<StringLiteral> NAME(NAME##_init, \
+ std::size(NAME##_init) - 1);
+#include "Opts.inc"
+#undef PREFIX
-static cl::opt<std::string> OutputFilename(cl::Required, "o",
- cl::desc("Specify the output file."),
- cl::value_desc("filename"),
- cl::cat(DwpCategory));
+static constexpr opt::OptTable::Info InfoTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \
+ HELPTEXT, METAVAR, VALUES) \
+ { \
+ PREFIX, NAME, HELPTEXT, \
+ METAVAR, OPT_##ID, opt::Option::KIND##Class, \
+ PARAM, FLAGS, OPT_##GROUP, \
+ OPT_##ALIAS, ALIASARGS, VALUES},
+#include "Opts.inc"
+#undef OPTION
+};
+
+class DwpOptTable : public opt::GenericOptTable {
+public:
+ DwpOptTable() : GenericOptTable(InfoTable) {}
+};
+} // end anonymous namespace
+
+// Options
+static std::vector<std::string> ExecFilenames;
+static std::string OutputFilename;
+static bool ContinueOnCuIndexOverflow;
static Expected<SmallVector<std::string, 16>>
getDWOFilenames(StringRef ExecFilename) {
@@ -100,15 +128,41 @@ static Expected<Triple> readTargetTriple(StringRef FileName) {
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
- cl::HideUnrelatedOptions({&DwpCategory, &getColorCategory()});
- cl::ParseCommandLineOptions(argc, argv, "merge split dwarf (.dwo) files\n");
+ DwpOptTable Tbl;
+ llvm::BumpPtrAllocator A;
+ llvm::StringSaver Saver{A};
+ opt::InputArgList Args =
+ Tbl.parseArgs(argc, argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {
+ llvm::errs() << Msg << '\n';
+ std::exit(1);
+ });
+
+ if (Args.hasArg(OPT_help)) {
+ Tbl.printHelp(llvm::outs(), "llvm-dwp [options] <input files>",
+ "merge split dwarf (.dwo) files");
+ std::exit(0);
+ }
+
+ if (Args.hasArg(OPT_version)) {
+ llvm::cl::PrintVersionMessage();
+ std::exit(0);
+ }
+
+ OutputFilename = Args.getLastArgValue(OPT_outputFileName, "");
+ ContinueOnCuIndexOverflow = Args.hasArg(OPT_continueOnCuIndexOverflow);
+
+ for (const llvm::opt::Arg *A : Args.filtered(OPT_execFileNames))
+ ExecFilenames.emplace_back(A->getValue());
+
+ std::vector<std::string> DWOFilenames;
+ for (const llvm::opt::Arg *A : Args.filtered(OPT_INPUT))
+ DWOFilenames.emplace_back(A->getValue());
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllTargets();
llvm::InitializeAllAsmPrinters();
- std::vector<std::string> DWOFilenames = InputFiles;
for (const auto &ExecFilename : ExecFilenames) {
auto DWOs = getDWOFilenames(ExecFilename);
if (!DWOs) {
@@ -207,7 +261,7 @@ int main(int argc, char **argv) {
if (!MS)
return error("no object streamer for target " + TripleName, Context);
- if (auto Err = write(*MS, DWOFilenames)) {
+ if (auto Err = write(*MS, DWOFilenames, ContinueOnCuIndexOverflow)) {
logAllUnhandledErrors(std::move(Err), WithColor::error());
return 1;
}
diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp
index 79e9d93061a2..51921d44d748 100644
--- a/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -516,11 +516,10 @@ static void getThinLTOOldAndNewPrefix(std::string &OldPrefix,
/// Given the original \p Path to an output file, replace any path
/// prefix matching \p OldPrefix with \p NewPrefix. Also, create the
/// resulting directory if it does not yet exist.
-static std::string getThinLTOOutputFile(const std::string &Path,
- const std::string &OldPrefix,
- const std::string &NewPrefix) {
+static std::string getThinLTOOutputFile(StringRef Path, StringRef OldPrefix,
+ StringRef NewPrefix) {
if (OldPrefix.empty() && NewPrefix.empty())
- return Path;
+ return std::string(Path);
SmallString<128> NewPath(Path);
llvm::sys::path::replace_path_prefix(NewPath, OldPrefix, NewPrefix);
StringRef ParentPath = llvm::sys::path::parent_path(NewPath.str());
diff --git a/llvm/tools/llvm-lto2/llvm-lto2.cpp b/llvm/tools/llvm-lto2/llvm-lto2.cpp
index 09c74fc586f5..81c97a994038 100644
--- a/llvm/tools/llvm-lto2/llvm-lto2.cpp
+++ b/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -167,10 +167,6 @@ static cl::opt<bool>
cl::desc("Run PGO context sensitive IR instrumentation"),
cl::Hidden);
-static cl::opt<bool> LtoOpaquePointers("lto-opaque-pointers",
- cl::desc("Enable opaque pointer types"),
- cl::init(true), cl::Hidden);
-
static cl::opt<bool>
DebugPassManager("debug-pass-manager", cl::Hidden,
cl::desc("Print pass management debugging information"));
@@ -182,6 +178,10 @@ static cl::list<std::string>
PassPlugins("load-pass-plugin",
cl::desc("Load passes from plugin library"));
+static cl::opt<std::string> UnifiedLTOMode("unified-lto", cl::Optional,
+ cl::desc("Set LTO mode"),
+ cl::value_desc("mode"));
+
static cl::opt<bool> EnableFreestanding(
"lto-freestanding",
cl::desc("Enable Freestanding (disable builtins / TLI) during LTO"),
@@ -321,15 +321,15 @@ static int run(int argc, char **argv) {
Conf.StatsFile = StatsFile;
Conf.PTO.LoopVectorization = Conf.OptLevel > 1;
Conf.PTO.SLPVectorization = Conf.OptLevel > 1;
- Conf.OpaquePointers = LtoOpaquePointers;
ThinBackend Backend;
if (ThinLTODistributedIndexes)
- Backend =
- createWriteIndexesThinBackend(/* OldPrefix */ "",
- /* NewPrefix */ "", ThinLTOEmitImports,
- /* LinkedObjectsFile */ nullptr,
- /* OnWrite */ {});
+ Backend = createWriteIndexesThinBackend(/*OldPrefix=*/"",
+ /*NewPrefix=*/"",
+ /*NativeObjectPrefix=*/"",
+ ThinLTOEmitImports,
+ /*LinkedObjectsFile=*/nullptr,
+ /*OnWrite=*/{});
else
Backend = createInProcessThinBackend(
llvm::heavyweight_hardware_concurrency(Threads),
@@ -352,7 +352,20 @@ static int run(int argc, char **argv) {
HasErrors = true;
};
- LTO Lto(std::move(Conf), std::move(Backend));
+ LTO::LTOKind LTOMode = LTO::LTOK_Default;
+
+ if (UnifiedLTOMode == "full") {
+ LTOMode = LTO::LTOK_UnifiedRegular;
+ } else if (UnifiedLTOMode == "thin") {
+ LTOMode = LTO::LTOK_UnifiedThin;
+ } else if (UnifiedLTOMode == "default") {
+ LTOMode = LTO::LTOK_Default;
+ } else if (!UnifiedLTOMode.empty()) {
+ llvm::errs() << "invalid LTO mode\n";
+ return 1;
+ }
+
+ LTO Lto(std::move(Conf), std::move(Backend), 1, LTOMode);
for (std::string F : InputFilenames) {
std::unique_ptr<MemoryBuffer> MB = check(MemoryBuffer::getFile(F), F);
diff --git a/llvm/tools/llvm-mc/Disassembler.cpp b/llvm/tools/llvm-mc/Disassembler.cpp
index 2d1833429718..7456a2f2c915 100644
--- a/llvm/tools/llvm-mc/Disassembler.cpp
+++ b/llvm/tools/llvm-mc/Disassembler.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "Disassembler.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -25,6 +24,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp
index dd5a66a6eb7d..572723afb79e 100644
--- a/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -31,13 +31,13 @@
#include "llvm/Support/Compression.h"
#include "llvm/Support/FileUtilities.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
+#include "llvm/TargetParser/Host.h"
using namespace llvm;
diff --git a/llvm/tools/llvm-mca/CodeRegion.cpp b/llvm/tools/llvm-mca/CodeRegion.cpp
index c91ed759ee77..ba5188076c2e 100644
--- a/llvm/tools/llvm-mca/CodeRegion.cpp
+++ b/llvm/tools/llvm-mca/CodeRegion.cpp
@@ -115,7 +115,7 @@ void AnalysisRegions::endRegion(StringRef Description, SMLoc Loc) {
InstrumentRegions::InstrumentRegions(llvm::SourceMgr &S) : CodeRegions(S) {}
void InstrumentRegions::beginRegion(StringRef Description, SMLoc Loc,
- SharedInstrument I) {
+ UniqueInstrument I) {
if (Description.empty()) {
SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error,
"anonymous instrumentation regions are not permitted");
@@ -137,7 +137,8 @@ void InstrumentRegions::beginRegion(StringRef Description, SMLoc Loc,
}
ActiveRegions[Description] = Regions.size();
- Regions.emplace_back(std::make_unique<InstrumentRegion>(Description, Loc, I));
+ Regions.emplace_back(
+ std::make_unique<InstrumentRegion>(Description, Loc, std::move(I)));
}
void InstrumentRegions::endRegion(StringRef Description, SMLoc Loc) {
@@ -158,13 +159,13 @@ void InstrumentRegions::endRegion(StringRef Description, SMLoc Loc) {
}
}
-const SmallVector<SharedInstrument>
+const SmallVector<Instrument *>
InstrumentRegions::getActiveInstruments(SMLoc Loc) const {
- SmallVector<SharedInstrument> AI;
+ SmallVector<Instrument *> AI;
for (auto &R : Regions) {
if (R->isLocInRange(Loc)) {
InstrumentRegion *IR = static_cast<InstrumentRegion *>(R.get());
- AI.emplace_back(IR->getInstrument());
+ AI.push_back(IR->getInstrument());
}
}
return AI;
diff --git a/llvm/tools/llvm-mca/CodeRegion.h b/llvm/tools/llvm-mca/CodeRegion.h
index b5b2f3a0d118..ce107fd8f3b6 100644
--- a/llvm/tools/llvm-mca/CodeRegion.h
+++ b/llvm/tools/llvm-mca/CodeRegion.h
@@ -91,6 +91,8 @@ public:
CodeRegion(llvm::StringRef Desc, llvm::SMLoc Start)
: Description(Desc), RangeStart(Start) {}
+ virtual ~CodeRegion() = default;
+
void addInstruction(const llvm::MCInst &Instruction) {
Instructions.emplace_back(Instruction);
}
@@ -115,14 +117,14 @@ using AnalysisRegion = CodeRegion;
/// in analysis of the region.
class InstrumentRegion : public CodeRegion {
/// Instrument for this region.
- SharedInstrument Instrument;
+ UniqueInstrument I;
public:
- InstrumentRegion(llvm::StringRef Desc, llvm::SMLoc Start, SharedInstrument I)
- : CodeRegion(Desc, Start), Instrument(I) {}
+ InstrumentRegion(llvm::StringRef Desc, llvm::SMLoc Start, UniqueInstrument I)
+ : CodeRegion(Desc, Start), I(std::move(I)) {}
public:
- SharedInstrument getInstrument() const { return Instrument; }
+ Instrument *getInstrument() const { return I.get(); }
};
class CodeRegionParseError final : public Error {};
@@ -142,6 +144,7 @@ protected:
public:
CodeRegions(llvm::SourceMgr &S) : SM(S), FoundErrors(false) {}
+ virtual ~CodeRegions() = default;
typedef std::vector<UniqueCodeRegion>::iterator iterator;
typedef std::vector<UniqueCodeRegion>::const_iterator const_iterator;
@@ -167,26 +170,34 @@ public:
bool isValid() const { return !FoundErrors; }
bool isRegionActive(llvm::StringRef Description) const {
- return ActiveRegions.find(Description) != ActiveRegions.end();
+ return ActiveRegions.contains(Description);
}
+
+ virtual void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc) = 0;
+ virtual void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc,
+ UniqueInstrument Instrument) = 0;
+ virtual void endRegion(llvm::StringRef Description, llvm::SMLoc Loc) = 0;
};
struct AnalysisRegions : public CodeRegions {
AnalysisRegions(llvm::SourceMgr &S);
- void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc);
- void endRegion(llvm::StringRef Description, llvm::SMLoc Loc);
+ void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc) override;
+ void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc,
+ UniqueInstrument Instrument) override {}
+ void endRegion(llvm::StringRef Description, llvm::SMLoc Loc) override;
};
struct InstrumentRegions : public CodeRegions {
+
InstrumentRegions(llvm::SourceMgr &S);
+ void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc) override{};
void beginRegion(llvm::StringRef Description, llvm::SMLoc Loc,
- SharedInstrument Instrument);
- void endRegion(llvm::StringRef Description, llvm::SMLoc Loc);
+ UniqueInstrument Instrument) override;
+ void endRegion(llvm::StringRef Description, llvm::SMLoc Loc) override;
- const SmallVector<SharedInstrument>
- getActiveInstruments(llvm::SMLoc Loc) const;
+ const SmallVector<Instrument *> getActiveInstruments(llvm::SMLoc Loc) const;
};
} // namespace mca
diff --git a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
index b8e10fa69c2d..5241b584b746 100644
--- a/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
+++ b/llvm/tools/llvm-mca/CodeRegionGenerator.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCParser/MCTargetAsmParser.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/SMLoc.h"
@@ -29,46 +28,12 @@ namespace mca {
// This virtual dtor serves as the anchor for the CodeRegionGenerator class.
CodeRegionGenerator::~CodeRegionGenerator() {}
-// This class provides the callbacks that occur when parsing input assembly.
-class MCStreamerWrapper final : public MCStreamer {
- CodeRegions &Regions;
-
-public:
- MCStreamerWrapper(MCContext &Context, mca::CodeRegions &R)
- : MCStreamer(Context), Regions(R) {}
-
- // We only want to intercept the emission of new instructions.
- void emitInstruction(const MCInst &Inst,
- const MCSubtargetInfo & /* unused */) override {
- Regions.addInstruction(Inst);
- }
-
- bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override {
- return true;
- }
-
- void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
- Align ByteAlignment) override {}
- void emitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
- uint64_t Size = 0, Align ByteAlignment = Align(1),
- SMLoc Loc = SMLoc()) override {}
- void emitGPRel32Value(const MCExpr *Value) override {}
- void beginCOFFSymbolDef(const MCSymbol *Symbol) override {}
- void emitCOFFSymbolStorageClass(int StorageClass) override {}
- void emitCOFFSymbolType(int Type) override {}
- void endCOFFSymbolDef() override {}
-
- ArrayRef<MCInst> GetInstructionSequence(unsigned Index) const {
- return Regions.getInstructionSequence(Index);
- }
-};
-
Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions(
const std::unique_ptr<MCInstPrinter> &IP) {
MCTargetOptions Opts;
Opts.PreserveAsmComments = false;
CodeRegions &Regions = getRegions();
- MCStreamerWrapper Str(Ctx, Regions);
+ MCStreamerWrapper *Str = getMCStreamer();
// Need to initialize an MCTargetStreamer otherwise
// certain asm directives will cause a segfault.
@@ -76,13 +41,13 @@ Expected<const CodeRegions &> AsmCodeRegionGenerator::parseCodeRegions(
// doesn't show up in the llvm-mca output.
raw_ostream &OSRef = nulls();
formatted_raw_ostream FOSRef(OSRef);
- TheTarget.createAsmTargetStreamer(Str, FOSRef, IP.get(),
+ TheTarget.createAsmTargetStreamer(*Str, FOSRef, IP.get(),
/*IsVerboseAsm=*/true);
// Create a MCAsmParser and setup the lexer to recognize llvm-mca ASM
// comments.
std::unique_ptr<MCAsmParser> Parser(
- createMCAsmParser(Regions.getSourceMgr(), Ctx, Str, MAI));
+ createMCAsmParser(Regions.getSourceMgr(), Ctx, *Str, MAI));
MCAsmLexer &Lexer = Parser->getLexer();
MCACommentConsumer *CCP = getCommentConsumer();
Lexer.setCommentConsumer(CCP);
@@ -184,7 +149,7 @@ void InstrumentRegionCommentConsumer::HandleComment(SMLoc Loc,
return;
}
- SharedInstrument I = IM.createInstrument(InstrumentKind, Data);
+ UniqueInstrument I = IM.createInstrument(InstrumentKind, Data);
if (!I) {
if (Data.empty())
SM.PrintMessage(Loc, llvm::SourceMgr::DK_Error,
@@ -202,7 +167,7 @@ void InstrumentRegionCommentConsumer::HandleComment(SMLoc Loc,
if (Regions.isRegionActive(InstrumentKind))
Regions.endRegion(InstrumentKind, Loc);
// Start new instrumentation region
- Regions.beginRegion(InstrumentKind, Loc, I);
+ Regions.beginRegion(InstrumentKind, Loc, std::move(I));
}
} // namespace mca
diff --git a/llvm/tools/llvm-mca/CodeRegionGenerator.h b/llvm/tools/llvm-mca/CodeRegionGenerator.h
index 88621ed856c5..68da567f3e0f 100644
--- a/llvm/tools/llvm-mca/CodeRegionGenerator.h
+++ b/llvm/tools/llvm-mca/CodeRegionGenerator.h
@@ -20,6 +20,7 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/MCA/CustomBehaviour.h"
@@ -32,10 +33,10 @@ namespace mca {
class MCACommentConsumer : public AsmCommentConsumer {
protected:
- bool FoundError;
+ bool FoundError = false;
public:
- MCACommentConsumer() : FoundError(false) {}
+ MCACommentConsumer() = default;
bool hadErr() const { return FoundError; }
};
@@ -77,6 +78,67 @@ public:
/// region of type INSTRUMENATION_TYPE, then it will end the active
/// one and begin a new one using the new data.
void HandleComment(SMLoc Loc, StringRef CommentText) override;
+
+ InstrumentManager &getInstrumentManager() { return IM; }
+};
+
+// This class provides the callbacks that occur when parsing input assembly.
+class MCStreamerWrapper : public MCStreamer {
+protected:
+ CodeRegions &Regions;
+
+public:
+ MCStreamerWrapper(MCContext &Context, mca::CodeRegions &R)
+ : MCStreamer(Context), Regions(R) {}
+
+ // We only want to intercept the emission of new instructions.
+ void emitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo & /* unused */) override {
+ Regions.addInstruction(Inst);
+ }
+
+ bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override {
+ return true;
+ }
+
+ void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ Align ByteAlignment) override {}
+ void emitZerofill(MCSection *Section, MCSymbol *Symbol = nullptr,
+ uint64_t Size = 0, Align ByteAlignment = Align(1),
+ SMLoc Loc = SMLoc()) override {}
+ void emitGPRel32Value(const MCExpr *Value) override {}
+ void beginCOFFSymbolDef(const MCSymbol *Symbol) override {}
+ void emitCOFFSymbolStorageClass(int StorageClass) override {}
+ void emitCOFFSymbolType(int Type) override {}
+ void endCOFFSymbolDef() override {}
+
+ ArrayRef<MCInst> GetInstructionSequence(unsigned Index) const {
+ return Regions.getInstructionSequence(Index);
+ }
+};
+
+class InstrumentMCStreamer : public MCStreamerWrapper {
+ InstrumentManager &IM;
+
+public:
+ InstrumentMCStreamer(MCContext &Context, mca::InstrumentRegions &R,
+ InstrumentManager &IM)
+ : MCStreamerWrapper(Context, R), IM(IM) {}
+
+ void emitInstruction(const MCInst &Inst,
+ const MCSubtargetInfo &MCSI) override {
+ MCStreamerWrapper::emitInstruction(Inst, MCSI);
+
+ // We know that Regions is an InstrumentRegions by the constructor.
+ for (UniqueInstrument &I : IM.createInstruments(Inst)) {
+ StringRef InstrumentKind = I.get()->getDesc();
+ // End InstrumentType region if one is open
+ if (Regions.isRegionActive(InstrumentKind))
+ Regions.endRegion(InstrumentKind, Inst.getLoc());
+ // Start new instrumentation region
+ Regions.beginRegion(InstrumentKind, Inst.getLoc(), std::move(I));
+ }
+ }
};
/// This abstract class is responsible for parsing the input given to
@@ -121,19 +183,22 @@ public:
/// generating a CodeRegions instance.
class AsmCodeRegionGenerator : public virtual CodeRegionGenerator {
const Target &TheTarget;
- MCContext &Ctx;
const MCAsmInfo &MAI;
const MCSubtargetInfo &STI;
const MCInstrInfo &MCII;
unsigned AssemblerDialect; // This is set during parsing.
+protected:
+ MCContext &Ctx;
+
public:
AsmCodeRegionGenerator(const Target &T, MCContext &C, const MCAsmInfo &A,
const MCSubtargetInfo &S, const MCInstrInfo &I)
- : TheTarget(T), Ctx(C), MAI(A), STI(S), MCII(I), AssemblerDialect(0) {}
+ : TheTarget(T), MAI(A), STI(S), MCII(I), AssemblerDialect(0), Ctx(C) {}
virtual MCACommentConsumer *getCommentConsumer() = 0;
virtual CodeRegions &getRegions() = 0;
+ virtual MCStreamerWrapper *getMCStreamer() = 0;
unsigned getAssemblerDialect() const { return AssemblerDialect; }
Expected<const CodeRegions &>
@@ -143,16 +208,18 @@ public:
class AsmAnalysisRegionGenerator final : public AnalysisRegionGenerator,
public AsmCodeRegionGenerator {
AnalysisRegionCommentConsumer CC;
+ MCStreamerWrapper Streamer;
public:
AsmAnalysisRegionGenerator(const Target &T, llvm::SourceMgr &SM, MCContext &C,
const MCAsmInfo &A, const MCSubtargetInfo &S,
const MCInstrInfo &I)
: AnalysisRegionGenerator(SM), AsmCodeRegionGenerator(T, C, A, S, I),
- CC(Regions) {}
+ CC(Regions), Streamer(Ctx, Regions) {}
MCACommentConsumer *getCommentConsumer() override { return &CC; };
CodeRegions &getRegions() override { return Regions; };
+ MCStreamerWrapper *getMCStreamer() override { return &Streamer; }
Expected<const AnalysisRegions &>
parseAnalysisRegions(const std::unique_ptr<MCInstPrinter> &IP) override {
@@ -172,6 +239,7 @@ public:
class AsmInstrumentRegionGenerator final : public InstrumentRegionGenerator,
public AsmCodeRegionGenerator {
InstrumentRegionCommentConsumer CC;
+ InstrumentMCStreamer Streamer;
public:
AsmInstrumentRegionGenerator(const Target &T, llvm::SourceMgr &SM,
@@ -179,10 +247,11 @@ public:
const MCSubtargetInfo &S, const MCInstrInfo &I,
InstrumentManager &IM)
: InstrumentRegionGenerator(SM), AsmCodeRegionGenerator(T, C, A, S, I),
- CC(SM, Regions, IM) {}
+ CC(SM, Regions, IM), Streamer(Ctx, Regions, IM) {}
MCACommentConsumer *getCommentConsumer() override { return &CC; };
CodeRegions &getRegions() override { return Regions; };
+ MCStreamerWrapper *getMCStreamer() override { return &Streamer; }
Expected<const InstrumentRegions &>
parseInstrumentRegions(const std::unique_ptr<MCInstPrinter> &IP) override {
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
index dc0a07e75e48..b254ccd6670f 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.cpp
@@ -52,7 +52,7 @@ void PressureTracker::getResourceUsers(uint64_t ResourceMask,
const MCProcResourceDesc &PRDesc = *SM.getProcResource(ProcResID);
for (unsigned I = 0, E = PRDesc.NumUnits; I < E; ++I) {
const User U = getResourceUser(ProcResID, I);
- if (U.second && IPI.find(U.first) != IPI.end())
+ if (U.second && IPI.contains(U.first))
Users.emplace_back(U);
}
}
@@ -69,7 +69,7 @@ void PressureTracker::handleInstructionIssuedEvent(
for (const ResourceUse &Use : Event.UsedResources) {
const ResourceRef &RR = Use.first;
unsigned Index = ProcResID2ResourceUsersIndex[RR.first];
- Index += countTrailingZeros(RR.second);
+ Index += llvm::countr_zero(RR.second);
ResourceUsers[Index] = std::make_pair(IID, Use.second.getNumerator());
}
}
diff --git a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
index cd5af0afcf5b..e709b25c3f76 100644
--- a/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
+++ b/llvm/tools/llvm-mca/Views/BottleneckAnalysis.h
@@ -146,19 +146,19 @@ public:
SmallVectorImpl<User> &Users) const;
unsigned getRegisterPressureCycles(unsigned IID) const {
- assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+ assert(IPI.contains(IID) && "Instruction is not tracked!");
const InstructionPressureInfo &Info = IPI.find(IID)->second;
return Info.RegisterPressureCycles;
}
unsigned getMemoryPressureCycles(unsigned IID) const {
- assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+ assert(IPI.contains(IID) && "Instruction is not tracked!");
const InstructionPressureInfo &Info = IPI.find(IID)->second;
return Info.MemoryPressureCycles;
}
unsigned getResourcePressureCycles(unsigned IID) const {
- assert(IPI.find(IID) != IPI.end() && "Instruction is not tracked!");
+ assert(IPI.contains(IID) && "Instruction is not tracked!");
const InstructionPressureInfo &Info = IPI.find(IID)->second;
return Info.ResourcePressureCycles;
}
diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
index 257fdca8cb36..fea0c9b8455c 100644
--- a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
+++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -55,10 +55,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
}
}
- int Index = 0;
- for (const auto &I : enumerate(zip(IIVD, Source))) {
- const InstructionInfoViewData &IIVDEntry = std::get<0>(I.value());
-
+ for (const auto &[Index, IIVDEntry, Inst] : enumerate(IIVD, Source)) {
TempStream << ' ' << IIVDEntry.NumMicroOpcodes << " ";
if (IIVDEntry.NumMicroOpcodes < 10)
TempStream << " ";
@@ -92,7 +89,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
}
if (PrintEncodings) {
- StringRef Encoding(CE.getEncoding(I.index()));
+ StringRef Encoding(CE.getEncoding(Index));
unsigned EncodingSize = Encoding.size();
TempStream << " " << EncodingSize
<< (EncodingSize < 10 ? " " : " ");
@@ -104,9 +101,7 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
FOS.flush();
}
- const MCInst &Inst = std::get<1>(I.value());
TempStream << printInstructionString(Inst) << '\n';
- ++Index;
}
TempStream.flush();
@@ -122,8 +117,13 @@ void InstructionInfoView::collectData(
InstructionInfoViewData &IIVDEntry = std::get<1>(I);
const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
- // Obtain the scheduling class information from the instruction.
- unsigned SchedClassID = MCDesc.getSchedClass();
+ // Obtain the scheduling class information from the instruction
+ // and instruments.
+ auto IVecIt = InstToInstruments.find(&Inst);
+ unsigned SchedClassID =
+ IVecIt == InstToInstruments.end()
+ ? MCDesc.getSchedClass()
+ : IM.getSchedClassID(MCII, Inst, IVecIt->second);
unsigned CPUID = SM.getProcessorID();
// Try to solve variant scheduling classes.
diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.h b/llvm/tools/llvm-mca/Views/InstructionInfoView.h
index bddd01a086b5..3befafda90a3 100644
--- a/llvm/tools/llvm-mca/Views/InstructionInfoView.h
+++ b/llvm/tools/llvm-mca/Views/InstructionInfoView.h
@@ -42,6 +42,7 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MCA/CodeEmitter.h"
+#include "llvm/MCA/CustomBehaviour.h"
#include "llvm/Support/raw_ostream.h"
#define DEBUG_TYPE "llvm-mca"
@@ -57,6 +58,10 @@ class InstructionInfoView : public InstructionView {
bool PrintBarriers;
using UniqueInst = std::unique_ptr<Instruction>;
ArrayRef<UniqueInst> LoweredInsts;
+ const InstrumentManager &IM;
+ using InstToInstrumentsT =
+ DenseMap<const MCInst *, SmallVector<mca::Instrument *>>;
+ const InstToInstrumentsT &InstToInstruments;
struct InstructionInfoViewData {
unsigned NumMicroOpcodes = 0;
@@ -77,10 +82,12 @@ public:
bool ShouldPrintEncodings, llvm::ArrayRef<llvm::MCInst> S,
llvm::MCInstPrinter &IP,
ArrayRef<UniqueInst> LoweredInsts,
- bool ShouldPrintBarriers)
+ bool ShouldPrintBarriers, const InstrumentManager &IM,
+ const InstToInstrumentsT &InstToInstruments)
: InstructionView(ST, IP, S), MCII(II), CE(C),
PrintEncodings(ShouldPrintEncodings),
- PrintBarriers(ShouldPrintBarriers), LoweredInsts(LoweredInsts) {}
+ PrintBarriers(ShouldPrintBarriers), LoweredInsts(LoweredInsts), IM(IM),
+ InstToInstruments(InstToInstruments) {}
void printView(llvm::raw_ostream &OS) const override;
StringRef getNameAsString() const override { return "InstructionInfoView"; }
diff --git a/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp b/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
index 77b3ba0b7c8d..0f059bcc0a06 100644
--- a/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
+++ b/llvm/tools/llvm-mca/Views/ResourcePressureView.cpp
@@ -57,9 +57,9 @@ void ResourcePressureView::onEvent(const HWInstructionEvent &Event) {
for (const std::pair<ResourceRef, ResourceCycles> &Use :
IssueEvent.UsedResources) {
const ResourceRef &RR = Use.first;
- assert(Resource2VecIndex.find(RR.first) != Resource2VecIndex.end());
+ assert(Resource2VecIndex.contains(RR.first));
unsigned R2VIndex = Resource2VecIndex[RR.first];
- R2VIndex += countTrailingZeros(RR.second);
+ R2VIndex += llvm::countr_zero(RR.second);
ResourceUsage[R2VIndex + NumResourceUnits * SourceIdx] += Use.second;
ResourceUsage[R2VIndex + NumResourceUnits * Source.size()] += Use.second;
}
diff --git a/llvm/tools/llvm-mca/Views/TimelineView.cpp b/llvm/tools/llvm-mca/Views/TimelineView.cpp
index 5c05edbdea68..2eca48aadfd7 100644
--- a/llvm/tools/llvm-mca/Views/TimelineView.cpp
+++ b/llvm/tools/llvm-mca/Views/TimelineView.cpp
@@ -315,6 +315,10 @@ json::Value TimelineView::toJSON() const {
json::Array TimelineInfo;
for (const TimelineViewEntry &TLE : Timeline) {
+ // Check if the timeline-max-cycles has been reached.
+ if (!TLE.CycleRetired && TLE.CycleExecuted)
+ break;
+
TimelineInfo.push_back(
json::Object({{"CycleDispatched", TLE.CycleDispatched},
{"CycleReady", TLE.CycleReady},
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
index 73c341891ab7..eb71cffba6dd 100644
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -53,13 +53,13 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
+#include "llvm/TargetParser/Host.h"
using namespace llvm;
@@ -401,11 +401,6 @@ int main(int argc, char **argv) {
// Tell SrcMgr about this buffer, which is what the parser will pick up.
SrcMgr.AddNewSourceBuffer(std::move(*BufferPtr), SMLoc());
- MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr);
- std::unique_ptr<MCObjectFileInfo> MOFI(
- TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false));
- Ctx.setObjectFileInfo(MOFI.get());
-
std::unique_ptr<buffer_ostream> BOS;
std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
@@ -433,7 +428,11 @@ int main(int argc, char **argv) {
}
// Parse the input and create CodeRegions that llvm-mca can analyze.
- mca::AsmAnalysisRegionGenerator CRG(*TheTarget, SrcMgr, Ctx, *MAI, *STI,
+ MCContext ACtx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr);
+ std::unique_ptr<MCObjectFileInfo> AMOFI(
+ TheTarget->createMCObjectFileInfo(ACtx, /*PIC=*/false));
+ ACtx.setObjectFileInfo(AMOFI.get());
+ mca::AsmAnalysisRegionGenerator CRG(*TheTarget, SrcMgr, ACtx, *MAI, *STI,
*MCII);
Expected<const mca::AnalysisRegions &> RegionsOrErr =
CRG.parseAnalysisRegions(std::move(IPtemp));
@@ -471,7 +470,11 @@ int main(int argc, char **argv) {
// Parse the input and create InstrumentRegion that llvm-mca
// can use to improve analysis.
- mca::AsmInstrumentRegionGenerator IRG(*TheTarget, SrcMgr, Ctx, *MAI, *STI,
+ MCContext ICtx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr);
+ std::unique_ptr<MCObjectFileInfo> IMOFI(
+ TheTarget->createMCObjectFileInfo(ICtx, /*PIC=*/false));
+ ICtx.setObjectFileInfo(IMOFI.get());
+ mca::AsmInstrumentRegionGenerator IRG(*TheTarget, SrcMgr, ICtx, *MAI, *STI,
*MCII, *IM);
Expected<const mca::InstrumentRegions &> InstrumentRegionsOrErr =
IRG.parseInstrumentRegions(std::move(IPtemp));
@@ -547,7 +550,7 @@ int main(int argc, char **argv) {
unsigned RegionIdx = 0;
std::unique_ptr<MCCodeEmitter> MCE(
- TheTarget->createMCCodeEmitter(*MCII, Ctx));
+ TheTarget->createMCCodeEmitter(*MCII, ACtx));
assert(MCE && "Unable to create code emitter!");
std::unique_ptr<MCAsmBackend> MAB(TheTarget->createMCAsmBackend(
@@ -568,11 +571,14 @@ int main(int argc, char **argv) {
IPP->resetState();
+ DenseMap<const MCInst *, SmallVector<mca::Instrument *>>
+ InstToInstruments;
SmallVector<std::unique_ptr<mca::Instruction>> LoweredSequence;
for (const MCInst &MCI : Insts) {
SMLoc Loc = MCI.getLoc();
- const SmallVector<mca::SharedInstrument> Instruments =
+ const SmallVector<mca::Instrument *> Instruments =
InstrumentRegions.getActiveInstruments(Loc);
+ InstToInstruments.insert({&MCI, Instruments});
Expected<std::unique_ptr<mca::Instruction>> Inst =
IB.createInstruction(MCI, Instruments);
@@ -618,7 +624,7 @@ int main(int argc, char **argv) {
if (PrintInstructionInfoView) {
Printer.addView(std::make_unique<mca::InstructionInfoView>(
*STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
- ShowBarriers));
+ ShowBarriers, *IM, InstToInstruments));
}
Printer.addView(
std::make_unique<mca::ResourcePressureView>(*STI, *IP, Insts));
@@ -695,7 +701,7 @@ int main(int argc, char **argv) {
if (PrintInstructionInfoView)
Printer.addView(std::make_unique<mca::InstructionInfoView>(
*STI, *MCII, CE, ShowEncoding, Insts, *IP, LoweredSequence,
- ShowBarriers));
+ ShowBarriers, *IM, InstToInstruments));
// Fetch custom Views that are to be placed after the InstructionInfoView.
// Refer to the comment paired with the CB->getStartViews(*IP, Insts); line
diff --git a/llvm/tools/llvm-nm/llvm-nm.cpp b/llvm/tools/llvm-nm/llvm-nm.cpp
index 55319d0e4c72..1f6a5d1ab806 100644
--- a/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -16,7 +16,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/XCOFF.h"
#include "llvm/Demangle/Demangle.h"
@@ -40,14 +39,16 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/LLVMDriver.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/Signals.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/Triple.h"
#include <vector>
using namespace llvm;
@@ -293,22 +294,6 @@ bool operator==(const NMSymbol &A, const NMSymbol &B) {
}
} // anonymous namespace
-static char isSymbolList64Bit(SymbolicFile &Obj) {
- if (auto *IRObj = dyn_cast<IRObjectFile>(&Obj))
- return Triple(IRObj->getTargetTriple()).isArch64Bit();
- if (isa<COFFObjectFile>(Obj) || isa<COFFImportFile>(Obj))
- return false;
- if (XCOFFObjectFile *XCOFFObj = dyn_cast<XCOFFObjectFile>(&Obj))
- return XCOFFObj->is64Bit();
- if (isa<WasmObjectFile>(Obj))
- return false;
- if (TapiFile *Tapi = dyn_cast<TapiFile>(&Obj))
- return Tapi->is64Bit();
- if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
- return MachO->is64Bit();
- return cast<ELFObjectFileBase>(Obj).getBytesInAddress() == 8;
-}
-
static StringRef CurrentFilename;
static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I);
@@ -654,7 +639,7 @@ static void darwinPrintStab(MachOObjectFile *MachO, const NMSymbol &S) {
static std::optional<std::string> demangle(StringRef Name) {
std::string Demangled;
- if (nonMicrosoftDemangle(Name.str().c_str(), Demangled))
+ if (nonMicrosoftDemangle(Name, Demangled))
return Demangled;
return std::nullopt;
}
@@ -722,7 +707,7 @@ static void printSymbolList(SymbolicFile &Obj,
outs() << '\n' << CurrentFilename << ":\n";
} else if (OutputFormat == sysv) {
outs() << "\n\nSymbols from " << CurrentFilename << ":\n\n";
- if (isSymbolList64Bit(Obj))
+ if (Obj.is64Bit())
outs() << "Name Value Class Type"
<< " Size Line Section\n";
else
@@ -732,7 +717,7 @@ static void printSymbolList(SymbolicFile &Obj,
}
const char *printBlanks, *printDashes, *printFormat;
- if (isSymbolList64Bit(Obj)) {
+ if (Obj.is64Bit()) {
printBlanks = " ";
printDashes = "----------------";
switch (AddressRadix) {
@@ -1044,7 +1029,15 @@ static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
}
static char getSymbolNMTypeChar(TapiFile &Obj, basic_symbol_iterator I) {
- return 's';
+ auto Type = cantFail(Obj.getSymbolType(I->getRawDataRefImpl()));
+ switch (Type) {
+ case SymbolRef::ST_Data:
+ return 'd';
+ case SymbolRef::ST_Function:
+ return 't';
+ default:
+ return 's';
+ }
}
static char getSymbolNMTypeChar(WasmObjectFile &Obj, basic_symbol_iterator I) {
@@ -1671,8 +1664,8 @@ static bool shouldDump(SymbolicFile &Obj) {
!isa<IRObjectFile>(Obj))
return true;
- return isSymbolList64Bit(Obj) ? BitMode != BitModeTy::Bit32
- : BitMode != BitModeTy::Bit64;
+ return Obj.is64Bit() ? BitMode != BitModeTy::Bit32
+ : BitMode != BitModeTy::Bit64;
}
static void getXCOFFExports(XCOFFObjectFile *XCOFFObj,
@@ -1961,26 +1954,39 @@ static bool checkMachOAndArchFlags(SymbolicFile *O, StringRef Filename) {
return true;
}
+static void printArchiveMap(iterator_range<Archive::symbol_iterator> &map,
+ StringRef Filename) {
+ for (auto I : map) {
+ Expected<Archive::Child> C = I.getMember();
+ if (!C) {
+ error(C.takeError(), Filename);
+ break;
+ }
+ Expected<StringRef> FileNameOrErr = C->getName();
+ if (!FileNameOrErr) {
+ error(FileNameOrErr.takeError(), Filename);
+ break;
+ }
+ StringRef SymName = I.getName();
+ outs() << SymName << " in " << FileNameOrErr.get() << "\n";
+ }
+
+ outs() << "\n";
+}
+
static void dumpArchiveMap(Archive *A, StringRef Filename) {
- Archive::symbol_iterator I = A->symbol_begin();
- Archive::symbol_iterator E = A->symbol_end();
- if (I != E) {
+ auto Map = A->symbols();
+ if (!Map.empty()) {
outs() << "Archive map\n";
- for (; I != E; ++I) {
- Expected<Archive::Child> C = I->getMember();
- if (!C) {
- error(C.takeError(), Filename);
- break;
- }
- Expected<StringRef> FileNameOrErr = C->getName();
- if (!FileNameOrErr) {
- error(FileNameOrErr.takeError(), Filename);
- break;
- }
- StringRef SymName = I->getName();
- outs() << SymName << " in " << FileNameOrErr.get() << "\n";
- }
- outs() << "\n";
+ printArchiveMap(Map, Filename);
+ }
+
+ auto ECMap = A->ec_symbols();
+ if (!ECMap) {
+ warn(ECMap.takeError(), Filename);
+ } else if (!ECMap->empty()) {
+ outs() << "Archive EC map\n";
+ printArchiveMap(*ECMap, Filename);
}
}
@@ -2261,11 +2267,7 @@ static std::vector<NMSymbol> dumpSymbolNamesFromFile(StringRef Filename) {
if (error(BufferOrErr.getError(), Filename))
return SymbolList;
- // Always enable opaque pointers, to handle archives with mixed typed and
- // opaque pointer bitcode files gracefully. As we're only reading symbols,
- // the used pointer types don't matter.
LLVMContext Context;
- Context.setOpaquePointers(true);
LLVMContext *ContextPtr = NoLLVMBitcode ? nullptr : &Context;
Expected<std::unique_ptr<Binary>> BinaryOrErr =
createBinary(BufferOrErr.get()->getMemBufferRef(), ContextPtr);
@@ -2302,7 +2304,7 @@ exportSymbolNamesFromFiles(const std::vector<std::string> &InputFilenames) {
printExportSymbolList(SymbolList);
}
-int llvm_nm_main(int argc, char **argv) {
+int llvm_nm_main(int argc, char **argv, const llvm::ToolContext &) {
InitLLVM X(argc, argv);
BumpPtrAllocator A;
StringSaver Saver(A);
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
index 577b837320ae..265e4fc6073c 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
+++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp
@@ -8,6 +8,7 @@
#include "ObjcopyOptions.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/BinaryFormat/COFF.h"
@@ -331,7 +332,11 @@ static const StringMap<MachineInfo> TargetMap{
// SPARC
{"elf32-sparc", {ELF::EM_SPARC, false, false}},
{"elf32-sparcel", {ELF::EM_SPARC, false, true}},
+ // Hexagon
{"elf32-hexagon", {ELF::EM_HEXAGON, false, true}},
+ // LoongArch
+ {"elf32-loongarch", {ELF::EM_LOONGARCH, false, true}},
+ {"elf64-loongarch", {ELF::EM_LOONGARCH, true, true}},
};
static Expected<TargetInfo>
diff --git a/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
index 0fddd443a4cc..a7e4263271d3 100644
--- a/llvm/tools/llvm-objcopy/ObjcopyOpts.td
+++ b/llvm/tools/llvm-objcopy/ObjcopyOpts.td
@@ -27,7 +27,7 @@ def O : JoinedOrSeparate<["-"], "O">,
defm new_symbol_visibility : Eq<"new-symbol-visibility", "Visibility of "
"symbols generated for binary input or added"
" with --add-symbol unless otherwise"
- " specified. The default value is 'default'.">;
+ " specified. The default value is 'default'">;
def compress_debug_sections
: Joined<["--"], "compress-debug-sections=">,
@@ -37,7 +37,7 @@ def compress_debug_sections
def : Flag<["--"], "compress-debug-sections">, Alias<compress_debug_sections>,
AliasArgs<["zlib"]>;
def decompress_debug_sections : Flag<["--"], "decompress-debug-sections">,
- HelpText<"Decompress DWARF debug sections.">;
+ HelpText<"Decompress DWARF debug sections">;
defm split_dwo
: Eq<"split-dwo", "Equivalent to extract-dwo on the input file to "
"<dwo-file>, then strip-dwo on the input file">,
@@ -52,7 +52,7 @@ defm rename_section
"Renames a section from old to new, optionally with specified flags. "
"Flags supported for GNU compatibility: alloc, load, noload, "
"readonly, exclude, debug, code, data, rom, share, contents, merge, "
- "strings.">,
+ "strings">,
MetaVarName<"old=new[,flag1,...]">;
defm redefine_symbol
: Eq<"redefine-sym", "Change the name of a symbol old to new">,
@@ -64,7 +64,7 @@ defm redefine_symbols
"contains two symbols per line separated with whitespace and may "
"contain comments beginning with '#'. Leading and trailing "
"whitespace is stripped from each line. May be repeated to read "
- "symbols from many files.">,
+ "symbols from many files">,
MetaVarName<"filename">;
defm only_section : Eq<"only-section", "Remove all but <section>">,
@@ -74,18 +74,18 @@ def j : JoinedOrSeparate<["-"], "j">,
HelpText<"Alias for --only-section">;
defm add_section
: Eq<"add-section",
- "Make a section named <section> with the contents of <file>.">,
+ "Make a section named <section> with the contents of <file>">,
MetaVarName<"section=file">;
defm set_section_alignment
- : Eq<"set-section-alignment", "Set alignment for a given section.">,
+ : Eq<"set-section-alignment", "Set alignment for a given section">,
MetaVarName<"section=align">;
defm set_section_flags
: Eq<"set-section-flags",
"Set section flags for a given section. Flags supported for GNU "
"compatibility: alloc, load, noload, readonly, exclude, debug, code, "
- "data, rom, share, contents, merge, strings.">,
+ "data, rom, share, contents, merge, strings">,
MetaVarName<"section=flag1[,flag2,...]">;
defm set_section_type
@@ -136,7 +136,7 @@ defm localize_symbol : Eq<"localize-symbol", "Mark <symbol> as local">,
MetaVarName<"symbol">;
defm localize_symbols
: Eq<"localize-symbols",
- "Reads a list of symbols from <filename> and marks them local.">,
+ "Reads a list of symbols from <filename> and marks them local">,
MetaVarName<"filename">;
def L : JoinedOrSeparate<["-"], "L">,
@@ -148,13 +148,13 @@ defm globalize_symbol : Eq<"globalize-symbol", "Mark <symbol> as global">,
defm globalize_symbols
: Eq<"globalize-symbols",
- "Reads a list of symbols from <filename> and marks them global.">,
+ "Reads a list of symbols from <filename> and marks them global">,
MetaVarName<"filename">;
defm keep_global_symbol
: Eq<"keep-global-symbol",
"Convert all symbols except <symbol> to local. May be repeated to "
- "convert all except a set of symbols to local.">,
+ "convert all except a set of symbols to local">,
MetaVarName<"symbol">;
def G : JoinedOrSeparate<["-"], "G">,
Alias<keep_global_symbol>,
@@ -166,14 +166,14 @@ defm keep_global_symbols
"--keep-global-symbol=<symbol> is set for each one. <filename> "
"contains one symbol per line and may contain comments beginning with "
"'#'. Leading and trailing whitespace is stripped from each line. May "
- "be repeated to read symbols from many files.">,
+ "be repeated to read symbols from many files">,
MetaVarName<"filename">;
defm weaken_symbol : Eq<"weaken-symbol", "Mark <symbol> as weak">,
MetaVarName<"symbol">;
defm weaken_symbols
: Eq<"weaken-symbols",
- "Reads a list of symbols from <filename> and marks them weak.">,
+ "Reads a list of symbols from <filename> and marks them weak">,
MetaVarName<"filename">;
def W : JoinedOrSeparate<["-"], "W">,
@@ -184,7 +184,7 @@ def weaken : Flag<["--"], "weaken">,
defm strip_symbols
: Eq<"strip-symbols",
- "Reads a list of symbols from <filename> and removes them.">,
+ "Reads a list of symbols from <filename> and removes them">,
MetaVarName<"filename">;
defm keep_symbols
@@ -193,7 +193,7 @@ defm keep_symbols
"--keep-symbol=<symbol> is set for each one. <filename> "
"contains one symbol per line and may contain comments beginning with "
"'#'. Leading and trailing whitespace is stripped from each line. May "
- "be repeated to read symbols from many files.">,
+ "be repeated to read symbols from many files">,
MetaVarName<"filename">;
defm dump_section
@@ -209,11 +209,11 @@ defm prefix_alloc_sections
MetaVarName<"prefix">;
defm set_start : Eq<"set-start", "Set the start address to <addr>. Overrides "
- "any previous --change-start or --adjust-start values.">,
+ "any previous --change-start or --adjust-start values">,
MetaVarName<"addr">;
defm change_start : Eq<"change-start", "Add <incr> to the start address. Can be "
"specified multiple times, all values will be applied "
- "cumulatively.">,
+ "cumulatively">,
MetaVarName<"incr">;
def adjust_start : JoinedOrSeparate<["--"], "adjust-start">,
Alias<change_start>,
@@ -224,9 +224,9 @@ defm add_symbol
"global, local, weak, default, hidden, protected, file, section, object, "
"function, indirect-function. Accepted but ignored for "
"compatibility: debug, constructor, warning, indirect, synthetic, "
- "unique-object, before.">,
+ "unique-object, before">,
MetaVarName<"name=[section:]value[,flags]">;
defm update_section
- : Eq<"update-section", "Replace the contents of section <name> with contents from a file <file>.">,
+ : Eq<"update-section", "Replace the contents of section <name> with contents from a file <file>">,
MetaVarName<"name=file">;
diff --git a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
index a24cd889b83f..2afa97601f5c 100644
--- a/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
+++ b/llvm/tools/llvm-objcopy/llvm-objcopy.cpp
@@ -42,8 +42,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileUtilities.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/LLVMDriver.h"
#include "llvm/Support/Memory.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Process.h"
@@ -51,6 +51,7 @@
#include "llvm/Support/StringSaver.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
#include <algorithm>
#include <cassert>
#include <cstdlib>
@@ -223,7 +224,7 @@ static Error executeObjcopy(ConfigManager &ConfigMgr) {
return Error::success();
}
-int llvm_objcopy_main(int argc, char **argv) {
+int llvm_objcopy_main(int argc, char **argv, const llvm::ToolContext &) {
InitLLVM X(argc, argv);
ToolName = argv[0];
diff --git a/llvm/tools/llvm-objdump/COFFDump.cpp b/llvm/tools/llvm-objdump/COFFDump.cpp
index 3bc7d3ce33b0..e1b38471c77f 100644
--- a/llvm/tools/llvm-objdump/COFFDump.cpp
+++ b/llvm/tools/llvm-objdump/COFFDump.cpp
@@ -37,13 +37,15 @@ template <typename T> struct EnumEntry {
StringRef Name;
};
-class COFFDumper {
+class COFFDumper : public Dumper {
public:
- explicit COFFDumper(const llvm::object::COFFObjectFile &Obj) : Obj(Obj) {
+ explicit COFFDumper(const llvm::object::COFFObjectFile &O)
+ : Dumper(O), Obj(O) {
Is64 = !Obj.getPE32Header();
}
template <class PEHeader> void printPEHeader(const PEHeader &Hdr) const;
+ void printPrivateHeaders(bool MachOOnlyFirst) override;
private:
template <typename T> FormattedNumber formatAddr(T V) const {
@@ -59,6 +61,11 @@ private:
};
} // namespace
+std::unique_ptr<Dumper>
+objdump::createCOFFDumper(const object::COFFObjectFile &Obj) {
+ return std::make_unique<COFFDumper>(Obj);
+}
+
constexpr EnumEntry<uint16_t> PEHeaderMagic[] = {
{uint16_t(COFF::PE32Header::PE32), "PE32"},
{uint16_t(COFF::PE32Header::PE32_PLUS), "PE32+"},
@@ -545,12 +552,18 @@ static void printExportTable(const COFFObjectFile *Obj) {
outs() << " Ordinal base: " << OrdinalBase << "\n";
outs() << " Ordinal RVA Name\n";
for (; I != E; I = ++I) {
- uint32_t Ordinal;
- if (I->getOrdinal(Ordinal))
- return;
uint32_t RVA;
if (I->getExportRVA(RVA))
return;
+ StringRef Name;
+ if (I->getSymbolName(Name))
+ continue;
+ if (!RVA && Name.empty())
+ continue;
+
+ uint32_t Ordinal;
+ if (I->getOrdinal(Ordinal))
+ return;
bool IsForwarder;
if (I->isForwarder(IsForwarder))
return;
@@ -559,14 +572,11 @@ static void printExportTable(const COFFObjectFile *Obj) {
// Export table entries can be used to re-export symbols that
// this COFF file is imported from some DLLs. This is rare.
// In most cases IsForwarder is false.
- outs() << format(" % 4d ", Ordinal);
+ outs() << format(" %5d ", Ordinal);
} else {
- outs() << format(" % 4d %# 8x", Ordinal, RVA);
+ outs() << format(" %5d %# 8x", Ordinal, RVA);
}
- StringRef Name;
- if (I->getSymbolName(Name))
- continue;
if (!Name.empty())
outs() << " " << Name;
if (IsForwarder) {
@@ -761,7 +771,7 @@ void objdump::printCOFFUnwindInfo(const COFFObjectFile *Obj) {
}
}
-void objdump::printCOFFFileHeader(const COFFObjectFile &Obj) {
+void COFFDumper::printPrivateHeaders(bool MachOOnlyFirst) {
COFFDumper CD(Obj);
const uint16_t Cha = Obj.getCharacteristics();
outs() << "Characteristics 0x" << Twine::utohexstr(Cha) << '\n';
@@ -849,8 +859,7 @@ void objdump::printCOFFSymbolTable(const COFFObjectFile &coff) {
<< Name;
if (Demangle && Name.startswith("?")) {
int Status = -1;
- char *DemangledSymbol =
- microsoftDemangle(Name.data(), nullptr, nullptr, nullptr, &Status);
+ char *DemangledSymbol = microsoftDemangle(Name, nullptr, &Status);
if (Status == 0 && DemangledSymbol) {
outs() << " (" << StringRef(DemangledSymbol) << ")";
diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp
index b98b45e3015a..5b08a4b12858 100644
--- a/llvm/tools/llvm-objdump/ELFDump.cpp
+++ b/llvm/tools/llvm-objdump/ELFDump.cpp
@@ -24,6 +24,39 @@ using namespace llvm;
using namespace llvm::object;
using namespace llvm::objdump;
+namespace {
+template <typename ELFT> class ELFDumper : public Dumper {
+public:
+ ELFDumper(const ELFObjectFile<ELFT> &O) : Dumper(O), Obj(O) {}
+ void printPrivateHeaders(bool MachOOnlyFirst) override;
+ void printDynamicRelocations() override;
+
+private:
+ const ELFObjectFile<ELFT> &Obj;
+
+ const ELFFile<ELFT> &getELFFile() const { return Obj.getELFFile(); }
+ void printDynamicSection();
+ void printProgramHeaders();
+ void printSymbolVersion();
+};
+} // namespace
+
+template <class ELFT>
+static std::unique_ptr<Dumper> createDumper(const ELFObjectFile<ELFT> &Obj) {
+ return std::make_unique<ELFDumper<ELFT>>(Obj);
+}
+
+std::unique_ptr<Dumper>
+objdump::createELFDumper(const object::ELFObjectFileBase &Obj) {
+ if (const auto *O = dyn_cast<ELF32LEObjectFile>(&Obj))
+ return createDumper(*O);
+ if (const auto *O = dyn_cast<ELF32BEObjectFile>(&Obj))
+ return createDumper(*O);
+ if (const auto *O = dyn_cast<ELF64LEObjectFile>(&Obj))
+ return createDumper(*O);
+ return createDumper(cast<ELF64BEObjectFile>(Obj));
+}
+
template <class ELFT>
static Expected<StringRef> getDynamicStrTab(const ELFFile<ELFT> &Elf) {
auto DynamicEntriesOrError = Elf.dynamicEntries();
@@ -108,10 +141,7 @@ static Error getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
Expected<StringRef> SymName = SI->getName();
if (!SymName)
return SymName.takeError();
- if (Demangle)
- Fmt << demangle(std::string(*SymName));
- else
- Fmt << *SymName;
+ Fmt << (Demangle ? demangle(*SymName) : *SymName);
}
} else {
Fmt << "*ABS*";
@@ -169,11 +199,11 @@ uint64_t objdump::getELFSectionLMA(const object::ELFSectionRef &Sec) {
return getSectionLMA(ELFObj->getELFFile(), Sec);
}
-template <class ELFT>
-static void printDynamicSection(const ELFFile<ELFT> &Elf, StringRef Filename) {
+template <class ELFT> void ELFDumper<ELFT>::printDynamicSection() {
+ const ELFFile<ELFT> &Elf = getELFFile();
auto DynamicEntriesOrErr = Elf.dynamicEntries();
if (!DynamicEntriesOrErr) {
- reportWarning(toString(DynamicEntriesOrErr.takeError()), Filename);
+ reportWarning(toString(DynamicEntriesOrErr.takeError()), Obj.getFileName());
return;
}
ArrayRef<typename ELFT::Dyn> DynamicEntries = *DynamicEntriesOrErr;
@@ -203,21 +233,20 @@ static void printDynamicSection(const ELFFile<ELFT> &Elf, StringRef Filename) {
outs() << (Data + Dyn.d_un.d_val) << "\n";
continue;
}
- reportWarning(toString(StrTabOrErr.takeError()), Filename);
+ reportWarning(toString(StrTabOrErr.takeError()), Obj.getFileName());
consumeError(StrTabOrErr.takeError());
}
outs() << format(Fmt, (uint64_t)Dyn.d_un.d_val);
}
}
-template <class ELFT>
-static void printProgramHeaders(const ELFFile<ELFT> &Obj, StringRef FileName) {
+template <class ELFT> void ELFDumper<ELFT>::printProgramHeaders() {
outs() << "\nProgram Header:\n";
- auto ProgramHeaderOrError = Obj.program_headers();
+ auto ProgramHeaderOrError = getELFFile().program_headers();
if (!ProgramHeaderOrError) {
reportWarning("unable to read program headers: " +
toString(ProgramHeaderOrError.takeError()),
- FileName);
+ Obj.getFileName());
return;
}
@@ -274,8 +303,7 @@ static void printProgramHeaders(const ELFFile<ELFT> &Obj, StringRef FileName) {
outs() << "off " << format(Fmt, (uint64_t)Phdr.p_offset) << "vaddr "
<< format(Fmt, (uint64_t)Phdr.p_vaddr) << "paddr "
<< format(Fmt, (uint64_t)Phdr.p_paddr)
- << format("align 2**%u\n",
- countTrailingZeros<uint64_t>(Phdr.p_align))
+ << format("align 2**%u\n", llvm::countr_zero<uint64_t>(Phdr.p_align))
<< " filesz " << format(Fmt, (uint64_t)Phdr.p_filesz)
<< "memsz " << format(Fmt, (uint64_t)Phdr.p_memsz) << "flags "
<< ((Phdr.p_flags & ELF::PF_R) ? "r" : "-")
@@ -284,6 +312,39 @@ static void printProgramHeaders(const ELFFile<ELFT> &Obj, StringRef FileName) {
}
}
+template <typename ELFT> void ELFDumper<ELFT>::printDynamicRelocations() {
+ if (!any_of(Obj.sections(), [](const ELFSectionRef Sec) {
+ return Sec.getType() == ELF::SHT_DYNAMIC;
+ })) {
+ reportError(Obj.getFileName(), "not a dynamic object");
+ return;
+ }
+
+ std::vector<SectionRef> DynRelSec =
+ cast<ObjectFile>(Obj).dynamic_relocation_sections();
+ if (DynRelSec.empty())
+ return;
+
+ outs() << "\nDYNAMIC RELOCATION RECORDS\n";
+ const uint32_t OffsetPadding = (Obj.getBytesInAddress() > 4 ? 16 : 8);
+ const uint32_t TypePadding = 24;
+ outs() << left_justify("OFFSET", OffsetPadding) << ' '
+ << left_justify("TYPE", TypePadding) << " VALUE\n";
+
+ StringRef Fmt = Obj.getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
+ for (const SectionRef &Section : DynRelSec)
+ for (const RelocationRef &Reloc : Section.relocations()) {
+ uint64_t Address = Reloc.getOffset();
+ SmallString<32> RelocName;
+ SmallString<32> ValueStr;
+ Reloc.getTypeName(RelocName);
+ if (Error E = getELFRelocationValueString(&Obj, Reloc, ValueStr))
+ reportError(std::move(E), Obj.getFileName());
+ outs() << format(Fmt.data(), Address) << ' '
+ << left_justify(RelocName, TypePadding) << ' ' << ValueStr << '\n';
+ }
+}
+
template <class ELFT>
static void printSymbolVersionDependency(StringRef FileName,
const ELFFile<ELFT> &Obj,
@@ -342,9 +403,9 @@ static void printSymbolVersionDefinition(const typename ELFT::Shdr &Shdr,
}
}
-template <class ELFT>
-static void printSymbolVersionInfo(const ELFFile<ELFT> &Elf,
- StringRef FileName) {
+template <class ELFT> void ELFDumper<ELFT>::printSymbolVersion() {
+ const ELFFile<ELFT> &Elf = getELFFile();
+ StringRef FileName = Obj.getFileName();
ArrayRef<typename ELFT::Shdr> Sections =
unwrapOrError(Elf.sections(), FileName);
for (const typename ELFT::Shdr &Shdr : Sections) {
@@ -365,35 +426,8 @@ static void printSymbolVersionInfo(const ELFFile<ELFT> &Elf,
}
}
-void objdump::printELFFileHeader(const object::ObjectFile *Obj) {
- if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
- printProgramHeaders(ELFObj->getELFFile(), Obj->getFileName());
- else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
- printProgramHeaders(ELFObj->getELFFile(), Obj->getFileName());
- else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
- printProgramHeaders(ELFObj->getELFFile(), Obj->getFileName());
- else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
- printProgramHeaders(ELFObj->getELFFile(), Obj->getFileName());
-}
-
-void objdump::printELFDynamicSection(const object::ObjectFile *Obj) {
- if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
- printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
- else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
- printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
- else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
- printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
- else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
- printDynamicSection(ELFObj->getELFFile(), Obj->getFileName());
-}
-
-void objdump::printELFSymbolVersionInfo(const object::ObjectFile *Obj) {
- if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
- printSymbolVersionInfo(ELFObj->getELFFile(), Obj->getFileName());
- else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
- printSymbolVersionInfo(ELFObj->getELFFile(), Obj->getFileName());
- else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
- printSymbolVersionInfo(ELFObj->getELFFile(), Obj->getFileName());
- else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
- printSymbolVersionInfo(ELFObj->getELFFile(), Obj->getFileName());
+template <class ELFT> void ELFDumper<ELFT>::printPrivateHeaders(bool) {
+ printProgramHeaders();
+ printDynamicSection();
+ printSymbolVersion();
}
diff --git a/llvm/tools/llvm-objdump/ELFDump.h b/llvm/tools/llvm-objdump/ELFDump.h
index 9b6b1f341cf3..205c3d256e2b 100644
--- a/llvm/tools/llvm-objdump/ELFDump.h
+++ b/llvm/tools/llvm-objdump/ELFDump.h
@@ -30,8 +30,6 @@ Error getELFRelocationValueString(const object::ELFObjectFileBase *Obj,
uint64_t getELFSectionLMA(const object::ELFSectionRef &Sec);
void printELFFileHeader(const object::ObjectFile *O);
-void printELFDynamicSection(const object::ObjectFile *Obj);
-void printELFSymbolVersionInfo(const object::ObjectFile *Obj);
} // namespace objdump
} // namespace llvm
diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index fadc8367a8c1..11fb1cb41a9f 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -17,7 +17,6 @@
#include "llvm-c/Disassembler.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Config/config.h"
#include "llvm/DebugInfo/DIContext.h"
@@ -49,6 +48,7 @@
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cstring>
#include <system_error>
@@ -191,8 +191,21 @@ struct SymbolSorter {
return AAddr < BAddr;
}
};
+
+class MachODumper : public Dumper {
+ const object::MachOObjectFile &Obj;
+
+public:
+ MachODumper(const object::MachOObjectFile &O) : Dumper(O), Obj(O) {}
+ void printPrivateHeaders(bool OnlyFirst) override;
+};
} // namespace
+std::unique_ptr<Dumper>
+objdump::createMachODumper(const object::MachOObjectFile &Obj) {
+ return std::make_unique<MachODumper>(Obj);
+}
+
// Types for the storted data in code table that is built before disassembly
// and the predicate function to sort them.
typedef std::pair<uint64_t, DiceRef> DiceTableEntry;
@@ -2142,6 +2155,8 @@ static void printObjcMetaData(MachOObjectFile *O, bool verbose);
static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
StringRef ArchiveMemberName = StringRef(),
StringRef ArchitectureName = StringRef()) {
+ std::unique_ptr<Dumper> D = createMachODumper(*MachOOF);
+
// If we are doing some processing here on the Mach-O file print the header
// info. And don't print it otherwise like in the case of printing the
// UniversalHeaders or ArchiveHeaders.
@@ -2227,7 +2242,7 @@ static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
if (DylibId)
PrintDylibs(MachOOF, true);
if (SymbolTable)
- printSymbolTable(*MachOOF, ArchiveName, ArchitectureName);
+ D->printSymbolTable(ArchiveName, ArchitectureName);
if (UnwindInfo)
printMachOUnwindInfo(MachOOF);
if (PrivateHeaders) {
@@ -7280,9 +7295,7 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
} else if (SymbolName != nullptr && strncmp(SymbolName, "__Z", 3) == 0) {
if (info->demangled_name != nullptr)
free(info->demangled_name);
- int status;
- info->demangled_name =
- itaniumDemangle(SymbolName + 1, nullptr, nullptr, &status);
+ info->demangled_name = itaniumDemangle(SymbolName + 1);
if (info->demangled_name != nullptr) {
*ReferenceName = info->demangled_name;
*ReferenceType = LLVMDisassembler_ReferenceType_DeMangled_Name;
@@ -7380,9 +7393,7 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
} else if (SymbolName != nullptr && strncmp(SymbolName, "__Z", 3) == 0) {
if (info->demangled_name != nullptr)
free(info->demangled_name);
- int status;
- info->demangled_name =
- itaniumDemangle(SymbolName + 1, nullptr, nullptr, &status);
+ info->demangled_name = itaniumDemangle(SymbolName + 1);
if (info->demangled_name != nullptr) {
*ReferenceName = info->demangled_name;
*ReferenceType = LLVMDisassembler_ReferenceType_DeMangled_Name;
@@ -10362,6 +10373,8 @@ static void PrintLinkEditDataCommand(MachO::linkedit_data_command ld,
outs() << " cmd LC_DYLD_EXPORTS_TRIE\n";
else if (ld.cmd == MachO::LC_DYLD_CHAINED_FIXUPS)
outs() << " cmd LC_DYLD_CHAINED_FIXUPS\n";
+ else if (ld.cmd == MachO::LC_ATOM_INFO)
+ outs() << " cmd LC_ATOM_INFO\n";
else
outs() << " cmd " << ld.cmd << " (?)\n";
outs() << " cmdsize " << ld.cmdsize;
@@ -10507,7 +10520,8 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t filetype,
Command.C.cmd == MachO::LC_DYLIB_CODE_SIGN_DRS ||
Command.C.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT ||
Command.C.cmd == MachO::LC_DYLD_EXPORTS_TRIE ||
- Command.C.cmd == MachO::LC_DYLD_CHAINED_FIXUPS) {
+ Command.C.cmd == MachO::LC_DYLD_CHAINED_FIXUPS ||
+ Command.C.cmd == MachO::LC_ATOM_INFO) {
MachO::linkedit_data_command Ld =
Obj->getLinkeditDataLoadCommand(Command);
PrintLinkEditDataCommand(Ld, Buf.size());
@@ -10540,6 +10554,12 @@ void objdump::printMachOFileHeader(const object::ObjectFile *Obj) {
PrintMachHeader(file, Verbose);
}
+void MachODumper::printPrivateHeaders(bool OnlyFirst) {
+ printMachOFileHeader(&Obj);
+ if (!OnlyFirst)
+ printMachOLoadCommands(&Obj);
+}
+
void objdump::printMachOLoadCommands(const object::ObjectFile *Obj) {
const MachOObjectFile *file = cast<const MachOObjectFile>(Obj);
uint32_t filetype = 0;
diff --git a/llvm/tools/llvm-objdump/ObjdumpOpts.td b/llvm/tools/llvm-objdump/ObjdumpOpts.td
index de7f883d24a8..e3e74762420d 100644
--- a/llvm/tools/llvm-objdump/ObjdumpOpts.td
+++ b/llvm/tools/llvm-objdump/ObjdumpOpts.td
@@ -65,7 +65,11 @@ def : Flag<["-"], "D">, Alias<disassemble_all>,
def symbol_description : Flag<["--"], "symbol-description">,
HelpText<"Add symbol description for disassembly. This "
- "option is for XCOFF files only.">;
+ "option is for XCOFF files only">;
+
+def traceback_table : Flag<["--"], "traceback-table">,
+ HelpText<"Decode traceback table in disassembly. Implies --disassemble. "
+ "This option is for XCOFF files only">;
def disassemble_symbols_EQ : Joined<["--"], "disassemble-symbols=">,
HelpText<"List of symbols to disassemble. "
@@ -145,10 +149,10 @@ def reloc : Flag<["--"], "reloc">,
def : Flag<["-"], "r">, Alias<reloc>, HelpText<"Alias for --reloc">;
def print_imm_hex : Flag<["--"], "print-imm-hex">,
- HelpText<"Use hex format for immediate values">;
+ HelpText<"Use hex format for immediate values (default)">;
def no_print_imm_hex : Flag<["--"], "no-print-imm-hex">,
- HelpText<"Do not use hex format for immediate values (default)">;
+ HelpText<"Do not use hex format for immediate values">;
def : Flag<["--"], "print-imm-hex=false">, Alias<no_print_imm_hex>;
def private_headers : Flag<["--"], "private-headers">,
diff --git a/llvm/tools/llvm-objdump/SourcePrinter.cpp b/llvm/tools/llvm-objdump/SourcePrinter.cpp
index 6736cbc9ad5f..b2fe56cf2e1c 100644
--- a/llvm/tools/llvm-objdump/SourcePrinter.cpp
+++ b/llvm/tools/llvm-objdump/SourcePrinter.cpp
@@ -26,10 +26,6 @@
namespace llvm {
namespace objdump {
-unsigned getInstStartColumn(const MCSubtargetInfo &STI) {
- return !ShowRawInsn ? 16 : STI.getTargetTriple().isX86() ? 40 : 24;
-}
-
bool LiveVariable::liveAtAddress(object::SectionedAddress Addr) {
if (LocExpr.Range == std::nullopt)
return false;
@@ -452,6 +448,34 @@ void SourcePrinter::printLines(formatted_raw_ostream &OS,
}
}
+// Get the source line text for LineInfo:
+// - use LineInfo::LineSource if available;
+// - use LineCache if LineInfo::Source otherwise.
+StringRef SourcePrinter::getLine(const DILineInfo &LineInfo,
+ StringRef ObjectFilename) {
+ if (LineInfo.LineSource)
+ return LineInfo.LineSource.value();
+
+ if (SourceCache.find(LineInfo.FileName) == SourceCache.end())
+ if (!cacheSource(LineInfo))
+ return {};
+
+ auto LineBuffer = LineCache.find(LineInfo.FileName);
+ if (LineBuffer == LineCache.end())
+ return {};
+
+ if (LineInfo.Line > LineBuffer->second.size()) {
+ reportWarning(
+ formatv("debug info line number {0} exceeds the number of lines in {1}",
+ LineInfo.Line, LineInfo.FileName),
+ ObjectFilename);
+ return {};
+ }
+
+ // Vector begins at 0, line numbers are non-zero
+ return LineBuffer->second[LineInfo.Line - 1];
+}
+
void SourcePrinter::printSources(formatted_raw_ostream &OS,
const DILineInfo &LineInfo,
StringRef ObjectFilename, StringRef Delimiter,
@@ -461,21 +485,9 @@ void SourcePrinter::printSources(formatted_raw_ostream &OS,
OldLineInfo.FileName == LineInfo.FileName))
return;
- if (SourceCache.find(LineInfo.FileName) == SourceCache.end())
- if (!cacheSource(LineInfo))
- return;
- auto LineBuffer = LineCache.find(LineInfo.FileName);
- if (LineBuffer != LineCache.end()) {
- if (LineInfo.Line > LineBuffer->second.size()) {
- reportWarning(
- formatv(
- "debug info line number {0} exceeds the number of lines in {1}",
- LineInfo.Line, LineInfo.FileName),
- ObjectFilename);
- return;
- }
- // Vector begins at 0, line numbers are non-zero
- OS << Delimiter << LineBuffer->second[LineInfo.Line - 1];
+ StringRef Line = getLine(LineInfo, ObjectFilename);
+ if (!Line.empty()) {
+ OS << Delimiter << Line;
LVP.printBetweenInsts(OS, true);
}
}
diff --git a/llvm/tools/llvm-objdump/SourcePrinter.h b/llvm/tools/llvm-objdump/SourcePrinter.h
index 6209bb0e43e4..fc67fc650744 100644
--- a/llvm/tools/llvm-objdump/SourcePrinter.h
+++ b/llvm/tools/llvm-objdump/SourcePrinter.h
@@ -151,6 +151,10 @@ private:
StringRef ObjectFilename, StringRef Delimiter,
LiveVariablePrinter &LVP);
+ // Returns line source code corresponding to `LineInfo`.
+ // Returns empty string if source code cannot be found.
+ StringRef getLine(const DILineInfo &LineInfo, StringRef ObjectFilename);
+
public:
SourcePrinter() = default;
SourcePrinter(const object::ObjectFile *Obj, StringRef DefaultArch);
diff --git a/llvm/tools/llvm-objdump/WasmDump.cpp b/llvm/tools/llvm-objdump/WasmDump.cpp
index df0a08e5b1dd..a1d767d81003 100644
--- a/llvm/tools/llvm-objdump/WasmDump.cpp
+++ b/llvm/tools/llvm-objdump/WasmDump.cpp
@@ -19,12 +19,25 @@
using namespace llvm;
using namespace llvm::object;
-void objdump::printWasmFileHeader(const object::ObjectFile *Obj) {
- const auto *File = cast<const WasmObjectFile>(Obj);
+namespace {
+class WasmDumper : public objdump::Dumper {
+ const WasmObjectFile &Obj;
+public:
+ WasmDumper(const WasmObjectFile &O) : Dumper(O), Obj(O) {}
+ void printPrivateHeaders(bool MachOOnlyFirst) override;
+};
+} // namespace
+
+std::unique_ptr<objdump::Dumper>
+objdump::createWasmDumper(const object::WasmObjectFile &Obj) {
+ return std::make_unique<WasmDumper>(Obj);
+}
+
+void WasmDumper::printPrivateHeaders(bool) {
outs() << "Program Header:\n";
outs() << "Version: 0x";
- outs().write_hex(File->getHeader().Version);
+ outs().write_hex(Obj.getHeader().Version);
outs() << "\n";
}
diff --git a/llvm/tools/llvm-objdump/XCOFFDump.cpp b/llvm/tools/llvm-objdump/XCOFFDump.cpp
index 7171e2eb6eb3..87b1679a7969 100644
--- a/llvm/tools/llvm-objdump/XCOFFDump.cpp
+++ b/llvm/tools/llvm-objdump/XCOFFDump.cpp
@@ -14,10 +14,34 @@
#include "XCOFFDump.h"
#include "llvm-objdump.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Demangle/Demangle.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/FormattedStream.h"
+#include <algorithm>
using namespace llvm;
using namespace llvm::object;
+using namespace llvm::XCOFF;
+using namespace llvm::support;
+
+namespace {
+class XCOFFDumper : public objdump::Dumper {
+public:
+ XCOFFDumper(const object::XCOFFObjectFile &O) : Dumper(O) {}
+ void printPrivateHeaders(bool MachOOnlyFirst) override;
+};
+} // namespace
+
+std::unique_ptr<objdump::Dumper>
+objdump::createXCOFFDumper(const object::XCOFFObjectFile &Obj) {
+ return std::make_unique<XCOFFDumper>(Obj);
+}
+
+void XCOFFDumper::printPrivateHeaders(bool) {}
Error objdump::getXCOFFRelocationValueString(const XCOFFObjectFile &Obj,
const RelocationRef &Rel,
@@ -32,10 +56,8 @@ Error objdump::getXCOFFRelocationValueString(const XCOFFObjectFile &Obj,
if (!SymNameOrErr)
return SymNameOrErr.takeError();
- std::string SymName = (*SymNameOrErr).str();
- if (Demangle)
- SymName = demangle(SymName);
-
+ std::string SymName =
+ Demangle ? demangle(*SymNameOrErr) : SymNameOrErr->str();
if (SymbolDescription)
SymName = getXCOFFSymbolDescription(createSymbolInfo(Obj, *SymI), SymName);
@@ -109,3 +131,299 @@ std::string objdump::getXCOFFSymbolDescription(const SymbolInfoTy &SymbolInfo,
return Result;
}
+
+#define PRINTBOOL(Prefix, Obj, Field) \
+ OS << Prefix << " " << ((Obj.Field()) ? "+" : "-") << #Field
+
+#define PRINTGET(Prefix, Obj, Field) \
+ OS << Prefix << " " << #Field << " = " \
+ << static_cast<unsigned>(Obj.get##Field())
+
+#define PRINTOPTIONAL(Field) \
+ if (TbTable.get##Field()) { \
+ OS << '\n'; \
+ printRawData(Bytes.slice(Index, 4), Address + Index, OS, STI); \
+ Index += 4; \
+ OS << "\t# " << #Field << " = " << *TbTable.get##Field(); \
+ }
+
+void objdump::dumpTracebackTable(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ formatted_raw_ostream &OS, uint64_t End,
+ const MCSubtargetInfo &STI,
+ const XCOFFObjectFile *Obj) {
+ uint64_t Index = 0;
+ unsigned TabStop = getInstStartColumn(STI) - 1;
+ // Print traceback table boundary.
+ printRawData(Bytes.slice(Index, 4), Address, OS, STI);
+ OS << "\t# Traceback table start\n";
+ Index += 4;
+
+ uint64_t Size = End - Address;
+ bool Is64Bit = Obj->is64Bit();
+
+ // XCOFFTracebackTable::create modifies the size parameter, so ensure Size
+ // isn't changed.
+ uint64_t SizeCopy = End - Address;
+ Expected<XCOFFTracebackTable> TTOrErr =
+ XCOFFTracebackTable::create(Bytes.data() + Index, SizeCopy, Is64Bit);
+
+ if (!TTOrErr) {
+ std::string WarningMsgStr;
+ raw_string_ostream WarningStream(WarningMsgStr);
+ WarningStream << "failure parsing traceback table with address: 0x"
+ << utohexstr(Address) + "\n>>> "
+ << toString(TTOrErr.takeError())
+ << "\n>>> Raw traceback table data is:\n";
+
+ uint64_t LastNonZero = Index;
+ for (uint64_t I = Index; I < Size; I += 4)
+ if (support::endian::read32be(Bytes.slice(I, 4).data()) != 0)
+ LastNonZero = I + 4 > Size ? Size : I + 4;
+
+ if (Size - LastNonZero <= 4)
+ LastNonZero = Size;
+
+ formatted_raw_ostream FOS(WarningStream);
+ while (Index < LastNonZero) {
+ printRawData(Bytes.slice(Index, 4), Address + Index, FOS, STI);
+ Index += 4;
+ WarningStream << '\n';
+ }
+
+ // Print all remaining zeroes as ...
+ if (Size - LastNonZero >= 8)
+ WarningStream << "\t\t...\n";
+
+ reportWarning(WarningMsgStr, Obj->getFileName());
+ return;
+ }
+
+ auto PrintBytes = [&](uint64_t N) {
+ printRawData(Bytes.slice(Index, N), Address + Index, OS, STI);
+ Index += N;
+ };
+
+ XCOFFTracebackTable TbTable = *TTOrErr;
+ // Print the first of the 8 bytes of mandatory fields.
+ PrintBytes(1);
+ OS << format("\t# Version = %i", TbTable.getVersion()) << '\n';
+
+ // Print the second of the 8 bytes of mandatory fields.
+ PrintBytes(1);
+ TracebackTable::LanguageID LangId =
+ static_cast<TracebackTable::LanguageID>(TbTable.getLanguageID());
+ OS << "\t# Language = " << getNameForTracebackTableLanguageId(LangId) << '\n';
+
+ auto Split = [&]() {
+ OS << '\n';
+ OS.indent(TabStop);
+ };
+
+ // Print the third of the 8 bytes of mandatory fields.
+ PrintBytes(1);
+ PRINTBOOL("\t#", TbTable, isGlobalLinkage);
+ PRINTBOOL(",", TbTable, isOutOfLineEpilogOrPrologue);
+ Split();
+ PRINTBOOL("\t ", TbTable, hasTraceBackTableOffset);
+ PRINTBOOL(",", TbTable, isInternalProcedure);
+ Split();
+ PRINTBOOL("\t ", TbTable, hasControlledStorage);
+ PRINTBOOL(",", TbTable, isTOCless);
+ Split();
+ PRINTBOOL("\t ", TbTable, isFloatingPointPresent);
+ Split();
+ PRINTBOOL("\t ", TbTable, isFloatingPointOperationLogOrAbortEnabled);
+ OS << '\n';
+
+ // Print the 4th of the 8 bytes of mandatory fields.
+ PrintBytes(1);
+ PRINTBOOL("\t#", TbTable, isInterruptHandler);
+ PRINTBOOL(",", TbTable, isFuncNamePresent);
+ PRINTBOOL(",", TbTable, isAllocaUsed);
+ Split();
+ PRINTGET("\t ", TbTable, OnConditionDirective);
+ PRINTBOOL(",", TbTable, isCRSaved);
+ PRINTBOOL(",", TbTable, isLRSaved);
+ OS << '\n';
+
+ // Print the 5th of the 8 bytes of mandatory fields.
+ PrintBytes(1);
+ PRINTBOOL("\t#", TbTable, isBackChainStored);
+ PRINTBOOL(",", TbTable, isFixup);
+ PRINTGET(",", TbTable, NumOfFPRsSaved);
+ OS << '\n';
+
+ // Print the 6th of the 8 bytes of mandatory fields.
+ PrintBytes(1);
+ PRINTBOOL("\t#", TbTable, hasExtensionTable);
+ PRINTBOOL(",", TbTable, hasVectorInfo);
+ PRINTGET(",", TbTable, NumOfGPRsSaved);
+ OS << '\n';
+
+ // Print the 7th of the 8 bytes of mandatory fields.
+ PrintBytes(1);
+ PRINTGET("\t#", TbTable, NumberOfFixedParms);
+ OS << '\n';
+
+ // Print the 8th of the 8 bytes of mandatory fields.
+ PrintBytes(1);
+ PRINTGET("\t#", TbTable, NumberOfFPParms);
+ PRINTBOOL(",", TbTable, hasParmsOnStack);
+
+ PRINTOPTIONAL(ParmsType);
+ PRINTOPTIONAL(TraceBackTableOffset);
+ PRINTOPTIONAL(HandlerMask);
+ PRINTOPTIONAL(NumOfCtlAnchors);
+
+ if (TbTable.getControlledStorageInfoDisp()) {
+ SmallVector<uint32_t, 8> Disp = *TbTable.getControlledStorageInfoDisp();
+ for (unsigned I = 0; I < Disp.size(); ++I) {
+ OS << '\n';
+ PrintBytes(4);
+ OS << "\t" << (I ? " " : "#") << " ControlledStorageInfoDisp[" << I
+ << "] = " << Disp[I];
+ }
+ }
+
+ // If there is a name, print the function name and function name length.
+ if (TbTable.isFuncNamePresent()) {
+ uint16_t FunctionNameLen = TbTable.getFunctionName()->size();
+ if (FunctionNameLen == 0) {
+ OS << '\n';
+ reportWarning(
+ "the length of the function name must be greater than zero if the "
+ "isFuncNamePresent bit is set in the traceback table",
+ Obj->getFileName());
+ return;
+ }
+
+ OS << '\n';
+ PrintBytes(2);
+ OS << "\t# FunctionNameLen = " << FunctionNameLen;
+
+ uint16_t RemainingBytes = FunctionNameLen;
+ bool HasPrinted = false;
+ while (RemainingBytes > 0) {
+ OS << '\n';
+ uint16_t PrintLen = RemainingBytes >= 4 ? 4 : RemainingBytes;
+ printRawData(Bytes.slice(Index, PrintLen), Address + Index, OS, STI);
+ Index += PrintLen;
+ RemainingBytes -= PrintLen;
+
+ if (!HasPrinted) {
+ OS << "\t# FunctionName = " << *TbTable.getFunctionName();
+ HasPrinted = true;
+ }
+ }
+ }
+
+ if (TbTable.isAllocaUsed()) {
+ OS << '\n';
+ PrintBytes(1);
+ OS << format("\t# AllocaRegister = %u", *TbTable.getAllocaRegister());
+ }
+
+ if (TbTable.getVectorExt()) {
+ OS << '\n';
+ TBVectorExt VecExt = *TbTable.getVectorExt();
+ // Print first byte of VectorExt.
+ PrintBytes(1);
+ PRINTGET("\t#", VecExt, NumberOfVRSaved);
+ PRINTBOOL(",", VecExt, isVRSavedOnStack);
+ PRINTBOOL(",", VecExt, hasVarArgs);
+ OS << '\n';
+
+ // Print the second byte of VectorExt.
+ PrintBytes(1);
+ PRINTGET("\t#", VecExt, NumberOfVectorParms);
+ PRINTBOOL(",", VecExt, hasVMXInstruction);
+ OS << '\n';
+
+ PrintBytes(4);
+ OS << "\t# VectorParmsInfoString = " << VecExt.getVectorParmsInfo();
+
+ // There are two bytes of padding after vector info.
+ OS << '\n';
+ PrintBytes(2);
+ OS << "\t# Padding";
+ }
+
+ if (TbTable.getExtensionTable()) {
+ OS << '\n';
+ PrintBytes(1);
+ ExtendedTBTableFlag Flag =
+ static_cast<ExtendedTBTableFlag>(*TbTable.getExtensionTable());
+ OS << "\t# ExtensionTable = " << getExtendedTBTableFlagString(Flag);
+ }
+
+ if (TbTable.getEhInfoDisp()) {
+ // There are 4 bytes alignment before eh info displacement.
+ if (Index % 4) {
+ OS << '\n';
+ PrintBytes(4 - Index % 4);
+ OS << "\t# Alignment padding for eh info displacement";
+ }
+ OS << '\n';
+ // The size of the displacement (address) is 4 bytes in 32-bit object files,
+ // and 8 bytes in 64-bit object files.
+ PrintBytes(4);
+ OS << "\t# EH info displacement";
+ if (Is64Bit) {
+ OS << '\n';
+ PrintBytes(4);
+ }
+ }
+
+ OS << '\n';
+ if (End == Address + Index)
+ return;
+
+ Size = End - Address;
+
+ const char *LineSuffix = "\t# Padding\n";
+ auto IsWordZero = [&](uint64_t WordPos) {
+ if (WordPos >= Size)
+ return false;
+ uint64_t LineLength = std::min(4 - WordPos % 4, Size - WordPos);
+ return std::all_of(Bytes.begin() + WordPos,
+ Bytes.begin() + WordPos + LineLength,
+ [](uint8_t Byte) { return Byte == 0; });
+ };
+
+ bool AreWordsZero[] = {IsWordZero(Index), IsWordZero(alignTo(Index, 4) + 4),
+ IsWordZero(alignTo(Index, 4) + 8)};
+ bool ShouldPrintLine = true;
+ while (true) {
+ // Determine the length of the line (4, except for the first line, which
+ // will be just enough to align to the word boundary, and the last line,
+ // which will be the remainder of the data).
+ uint64_t LineLength = std::min(4 - Index % 4, Size - Index);
+ if (ShouldPrintLine) {
+ // Print the line.
+ printRawData(Bytes.slice(Index, LineLength), Address + Index, OS, STI);
+ OS << LineSuffix;
+ LineSuffix = "\n";
+ }
+
+ Index += LineLength;
+ if (Index == Size)
+ return;
+
+ // For 3 or more consecutive lines of zeros, skip all but the first one, and
+ // replace them with "...".
+ if (AreWordsZero[0] && AreWordsZero[1] && AreWordsZero[2]) {
+ if (ShouldPrintLine)
+ OS << std::string(8, ' ') << "...\n";
+ ShouldPrintLine = false;
+ } else if (!AreWordsZero[1]) {
+ // We have reached the end of a skipped block of zeros.
+ ShouldPrintLine = true;
+ }
+ AreWordsZero[0] = AreWordsZero[1];
+ AreWordsZero[1] = AreWordsZero[2];
+ AreWordsZero[2] = IsWordZero(Index + 8);
+ }
+}
+#undef PRINTBOOL
+#undef PRINTGET
+#undef PRINTOPTIONAL
diff --git a/llvm/tools/llvm-objdump/XCOFFDump.h b/llvm/tools/llvm-objdump/XCOFFDump.h
index 35d1c0f1ebbe..cf5b19f910ea 100644
--- a/llvm/tools/llvm-objdump/XCOFFDump.h
+++ b/llvm/tools/llvm-objdump/XCOFFDump.h
@@ -13,6 +13,8 @@
namespace llvm {
+class formatted_raw_ostream;
+class MCSubtargetInfo;
struct SymbolInfoTy;
namespace objdump {
@@ -32,6 +34,11 @@ std::string getXCOFFSymbolDescription(const SymbolInfoTy &SymbolInfo,
Error getXCOFFRelocationValueString(const object::XCOFFObjectFile &Obj,
const object::RelocationRef &RelRef,
llvm::SmallVectorImpl<char> &Result);
+
+void dumpTracebackTable(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ formatted_raw_ostream &OS, uint64_t End,
+ const MCSubtargetInfo &STI,
+ const object::XCOFFObjectFile *Obj);
} // namespace objdump
} // namespace llvm
#endif
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index 930b132533cd..bd45ed199767 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -30,7 +30,6 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
@@ -49,7 +48,6 @@
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/Archive.h"
@@ -74,7 +72,6 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/SourceMgr.h"
@@ -82,6 +79,8 @@
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/Triple.h"
#include <algorithm>
#include <cctype>
#include <cstring>
@@ -196,6 +195,7 @@ bool objdump::Demangle;
bool objdump::Disassemble;
bool objdump::DisassembleAll;
bool objdump::SymbolDescription;
+bool objdump::TracebackTable;
static std::vector<std::string> DisassembleSymbols;
static bool DisassembleZeroes;
static std::vector<std::string> DisassemblerOptions;
@@ -247,6 +247,31 @@ static StringRef ToolName;
std::unique_ptr<BuildIDFetcher> BIDFetcher;
ExitOnError ExitOnErr;
+void Dumper::reportUniqueWarning(Error Err) {
+ reportUniqueWarning(toString(std::move(Err)));
+}
+
+void Dumper::reportUniqueWarning(const Twine &Msg) {
+ if (Warnings.insert(StringRef(Msg.str())).second)
+ reportWarning(Msg, O.getFileName());
+}
+
+static Expected<std::unique_ptr<Dumper>> createDumper(const ObjectFile &Obj) {
+ if (const auto *O = dyn_cast<COFFObjectFile>(&Obj))
+ return createCOFFDumper(*O);
+ if (const auto *O = dyn_cast<ELFObjectFileBase>(&Obj))
+ return createELFDumper(*O);
+ if (const auto *O = dyn_cast<MachOObjectFile>(&Obj))
+ return createMachODumper(*O);
+ if (const auto *O = dyn_cast<WasmObjectFile>(&Obj))
+ return createWasmDumper(*O);
+ if (const auto *O = dyn_cast<XCOFFObjectFile>(&Obj))
+ return createXCOFFDumper(*O);
+
+ return createStringError(errc::invalid_argument,
+ "unsupported object file format");
+}
+
namespace {
struct FilterResult {
// True if the section should not be skipped.
@@ -442,14 +467,36 @@ static bool getHidden(RelocationRef RelRef) {
return false;
}
-namespace {
-
/// Get the column at which we want to start printing the instruction
/// disassembly, taking into account anything which appears to the left of it.
-unsigned getInstStartColumn(const MCSubtargetInfo &STI) {
+unsigned objdump::getInstStartColumn(const MCSubtargetInfo &STI) {
return !ShowRawInsn ? 16 : STI.getTargetTriple().isX86() ? 40 : 24;
}
+static void AlignToInstStartColumn(size_t Start, const MCSubtargetInfo &STI,
+ raw_ostream &OS) {
+ // The output of printInst starts with a tab. Print some spaces so that
+ // the tab has 1 column and advances to the target tab stop.
+ unsigned TabStop = getInstStartColumn(STI);
+ unsigned Column = OS.tell() - Start;
+ OS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8);
+}
+
+void objdump::printRawData(ArrayRef<uint8_t> Bytes, uint64_t Address,
+ formatted_raw_ostream &OS,
+ MCSubtargetInfo const &STI) {
+ size_t Start = OS.tell();
+ if (LeadingAddr)
+ OS << format("%8" PRIx64 ":", Address);
+ if (ShowRawInsn) {
+ OS << ' ';
+ dumpBytes(Bytes, OS);
+ }
+ AlignToInstStartColumn(Start, STI, OS);
+}
+
+namespace {
+
static bool isAArch64Elf(const ObjectFile &Obj) {
const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj);
return Elf && Elf->getEMachine() == ELF::EM_AARCH64;
@@ -489,15 +536,6 @@ static void printRelocation(formatted_raw_ostream &OS, StringRef FileName,
OS << Name << "\t" << Val;
}
-static void AlignToInstStartColumn(size_t Start, const MCSubtargetInfo &STI,
- raw_ostream &OS) {
- // The output of printInst starts with a tab. Print some spaces so that
- // the tab has 1 column and advances to the target tab stop.
- unsigned TabStop = getInstStartColumn(STI);
- unsigned Column = OS.tell() - Start;
- OS.indent(Column < TabStop - 1 ? TabStop - 1 - Column : 7 - Column % 8);
-}
-
class PrettyPrinter {
public:
virtual ~PrettyPrinter() = default;
@@ -511,15 +549,7 @@ public:
SP->printSourceLine(OS, Address, ObjectFilename, LVP);
LVP.printBetweenInsts(OS, false);
- size_t Start = OS.tell();
- if (LeadingAddr)
- OS << format("%8" PRIx64 ":", Address.Address);
- if (ShowRawInsn) {
- OS << ' ';
- dumpBytes(Bytes, OS);
- }
-
- AlignToInstStartColumn(Start, STI, OS);
+ printRawData(Bytes, Address.Address, OS, STI);
if (MI) {
// See MCInstPrinter::printInst. On targets where a PC relative immediate
@@ -806,7 +836,7 @@ PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
return AArch64PrettyPrinterInst;
}
}
-}
+} // namespace
static uint8_t getElfSymbolType(const ObjectFile &Obj, const SymbolRef &Sym) {
assert(Obj.isELF());
@@ -914,38 +944,35 @@ addMissingWasmCodeSymbols(const WasmObjectFile &Obj,
static void addPltEntries(const ObjectFile &Obj,
std::map<SectionRef, SectionSymbolsTy> &AllSymbols,
StringSaver &Saver) {
- std::optional<SectionRef> Plt;
- for (const SectionRef &Section : Obj.sections()) {
+ auto *ElfObj = dyn_cast<ELFObjectFileBase>(&Obj);
+ if (!ElfObj)
+ return;
+ DenseMap<StringRef, SectionRef> Sections;
+ for (SectionRef Section : Obj.sections()) {
Expected<StringRef> SecNameOrErr = Section.getName();
if (!SecNameOrErr) {
consumeError(SecNameOrErr.takeError());
continue;
}
- if (*SecNameOrErr == ".plt")
- Plt = Section;
- }
- if (!Plt)
- return;
- if (auto *ElfObj = dyn_cast<ELFObjectFileBase>(&Obj)) {
- for (auto PltEntry : ElfObj->getPltAddresses()) {
- if (PltEntry.first) {
- SymbolRef Symbol(*PltEntry.first, ElfObj);
- uint8_t SymbolType = getElfSymbolType(Obj, Symbol);
- if (Expected<StringRef> NameOrErr = Symbol.getName()) {
- if (!NameOrErr->empty())
- AllSymbols[*Plt].emplace_back(
- PltEntry.second, Saver.save((*NameOrErr + "@plt").str()),
- SymbolType);
- continue;
- } else {
- // The warning has been reported in disassembleObject().
- consumeError(NameOrErr.takeError());
- }
+ Sections[*SecNameOrErr] = Section;
+ }
+ for (auto Plt : ElfObj->getPltEntries()) {
+ if (Plt.Symbol) {
+ SymbolRef Symbol(*Plt.Symbol, ElfObj);
+ uint8_t SymbolType = getElfSymbolType(Obj, Symbol);
+ if (Expected<StringRef> NameOrErr = Symbol.getName()) {
+ if (!NameOrErr->empty())
+ AllSymbols[Sections[Plt.Section]].emplace_back(
+ Plt.Address, Saver.save((*NameOrErr + "@plt").str()), SymbolType);
+ continue;
+ } else {
+ // The warning has been reported in disassembleObject().
+ consumeError(NameOrErr.takeError());
}
- reportWarning("PLT entry at 0x" + Twine::utohexstr(PltEntry.second) +
- " references an invalid symbol",
- Obj.getFileName());
}
+ reportWarning("PLT entry at 0x" + Twine::utohexstr(Plt.Address) +
+ " references an invalid symbol",
+ Obj.getFileName());
}
}
@@ -1090,7 +1117,7 @@ SymbolInfoTy objdump::createSymbolInfo(const ObjectFile &Obj,
const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
- if (Obj.isXCOFF() && SymbolDescription) {
+ if (Obj.isXCOFF() && (SymbolDescription || TracebackTable)) {
const auto &XCOFFObj = cast<XCOFFObjectFile>(Obj);
DataRefImpl SymbolDRI = Symbol.getRawDataRefImpl();
@@ -1111,7 +1138,7 @@ SymbolInfoTy objdump::createSymbolInfo(const ObjectFile &Obj,
static SymbolInfoTy createDummySymbolInfo(const ObjectFile &Obj,
const uint64_t Addr, StringRef &Name,
uint8_t Type) {
- if (Obj.isXCOFF() && SymbolDescription)
+ if (Obj.isXCOFF() && (SymbolDescription || TracebackTable))
return SymbolInfoTy(Addr, Name, std::nullopt, std::nullopt, false);
else
return SymbolInfoTy(Addr, Name, Type);
@@ -1129,11 +1156,11 @@ collectBBAddrMapLabels(const std::unordered_map<uint64_t, BBAddrMap> &AddrToBBAd
auto Iter = AddrToBBAddrMap.find(StartAddress);
if (Iter == AddrToBBAddrMap.end())
return;
- for (unsigned I = 0, Size = Iter->second.BBEntries.size(); I < Size; ++I) {
- uint64_t BBAddress = Iter->second.BBEntries[I].Offset + Iter->second.Addr;
+ for (const BBAddrMap::BBEntry &BBEntry : Iter->second.BBEntries) {
+ uint64_t BBAddress = BBEntry.Offset + Iter->second.Addr;
if (BBAddress >= EndAddress)
continue;
- Labels[BBAddress].push_back(("BB" + Twine(I)).str());
+ Labels[BBAddress].push_back(("BB" + Twine(BBEntry.ID)).str());
}
}
@@ -1285,10 +1312,10 @@ static void createFakeELFSections(ObjectFile &Obj) {
// Build ID. Returns std::nullopt if nothing was found.
static std::optional<OwningBinary<Binary>>
fetchBinaryByBuildID(const ObjectFile &Obj) {
- std::optional<object::BuildIDRef> BuildID = getBuildID(&Obj);
- if (!BuildID)
+ object::BuildIDRef BuildID = getBuildID(&Obj);
+ if (BuildID.empty())
return std::nullopt;
- std::optional<std::string> Path = BIDFetcher->fetch(*BuildID);
+ std::optional<std::string> Path = BIDFetcher->fetch(BuildID);
if (!Path)
return std::nullopt;
Expected<OwningBinary<Binary>> DebugBinary = createBinary(*Path);
@@ -1439,8 +1466,10 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
AddrToBBAddrMap.clear();
if (const auto *Elf = dyn_cast<ELFObjectFileBase>(&Obj)) {
auto BBAddrMapsOrErr = Elf->readBBAddrMap(SectionIndex);
- if (!BBAddrMapsOrErr)
+ if (!BBAddrMapsOrErr) {
reportWarning(toString(BBAddrMapsOrErr.takeError()), Obj.getFileName());
+ return;
+ }
for (auto &FunctionBBAddrMap : *BBAddrMapsOrErr)
AddrToBBAddrMap.emplace(FunctionBBAddrMap.Addr,
std::move(FunctionBBAddrMap));
@@ -1546,7 +1575,7 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
if (Demangle) {
// Fetch the demangled names and store them locally.
for (const SymbolInfoTy &Symbol : SymbolsHere)
- DemangledSymNamesHere.push_back(demangle(Symbol.Name.str()));
+ DemangledSymNamesHere.push_back(demangle(Symbol.Name));
// Now we've finished modifying that vector, it's safe to make
// a vector of StringRefs pointing into it.
SymNamesHere.insert(SymNamesHere.begin(), DemangledSymNamesHere.begin(),
@@ -1714,8 +1743,9 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
// distance to the next symbol, and sometimes it will be just a
// prologue and we should start disassembling instructions from where
// it left off.
- outs() << "// Error in decoding " << SymNamesHere[SHI]
- << " : Decoding failed region as bytes.\n";
+ outs() << Ctx.getAsmInfo()->getCommentString()
+ << " error in decoding " << SymNamesHere[SHI]
+ << " : decoding failed region as bytes.\n";
for (uint64_t I = 0; I < Size; ++I) {
outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true)
<< "\n";
@@ -1736,6 +1766,11 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
}
bool DumpARMELFData = false;
+ bool DumpTracebackTableForXCOFFFunction =
+ Obj.isXCOFF() && Section.isText() && TracebackTable &&
+ Symbols[SI - 1].XCOFFSymInfo.StorageMappingClass &&
+ (*Symbols[SI - 1].XCOFFSymInfo.StorageMappingClass == XCOFF::XMC_PR);
+
formatted_raw_ostream FOS(outs());
std::unordered_map<uint64_t, std::string> AllLabels;
@@ -1788,6 +1823,16 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
}
}
+ if (DumpTracebackTableForXCOFFFunction &&
+ doesXCOFFTracebackTableBegin(Bytes.slice(Index, 4))) {
+ dumpTracebackTable(Bytes.slice(Index),
+ SectionAddr + Index + VMAAdjustment, FOS,
+ SectionAddr + End + VMAAdjustment, *STI,
+ cast<XCOFFObjectFile>(&Obj));
+ Index = End;
+ continue;
+ }
+
// Print local label if there's any.
auto Iter1 = BBAddrMapLabels.find(SectionAddr + Index);
if (Iter1 != BBAddrMapLabels.end()) {
@@ -1907,9 +1952,8 @@ static void disassembleObject(const Target *TheTarget, ObjectFile &Obj,
if (TargetSym != nullptr) {
uint64_t TargetAddress = TargetSym->Addr;
uint64_t Disp = Target - TargetAddress;
- std::string TargetName = TargetSym->Name.str();
- if (Demangle)
- TargetName = demangle(TargetName);
+ std::string TargetName = Demangle ? demangle(TargetSym->Name)
+ : TargetSym->Name.str();
*TargetOS << " <";
if (!Disp) {
@@ -2149,23 +2193,22 @@ static void disassembleObject(ObjectFile *Obj, bool InlineRelocs) {
SecondarySTI.get(), PIP, SP, InlineRelocs);
}
-void objdump::printRelocations(const ObjectFile *Obj) {
- StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 :
- "%08" PRIx64;
+void Dumper::printRelocations() {
+ StringRef Fmt = O.getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
// Build a mapping from relocation target to a vector of relocation
// sections. Usually, there is an only one relocation section for
// each relocated section.
MapVector<SectionRef, std::vector<SectionRef>> SecToRelSec;
uint64_t Ndx;
- for (const SectionRef &Section : ToolSectionFilter(*Obj, &Ndx)) {
- if (Obj->isELF() && (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC))
+ for (const SectionRef &Section : ToolSectionFilter(O, &Ndx)) {
+ if (O.isELF() && (ELFSectionRef(Section).getFlags() & ELF::SHF_ALLOC))
continue;
if (Section.relocation_begin() == Section.relocation_end())
continue;
Expected<section_iterator> SecOrErr = Section.getRelocatedSection();
if (!SecOrErr)
- reportError(Obj->getFileName(),
+ reportError(O.getFileName(),
"section (" + Twine(Ndx) +
"): unable to get a relocation target: " +
toString(SecOrErr.takeError()));
@@ -2173,9 +2216,9 @@ void objdump::printRelocations(const ObjectFile *Obj) {
}
for (std::pair<SectionRef, std::vector<SectionRef>> &P : SecToRelSec) {
- StringRef SecName = unwrapOrError(P.first.getName(), Obj->getFileName());
+ StringRef SecName = unwrapOrError(P.first.getName(), O.getFileName());
outs() << "\nRELOCATION RECORDS FOR [" << SecName << "]:\n";
- uint32_t OffsetPadding = (Obj->getBytesInAddress() > 4 ? 16 : 8);
+ uint32_t OffsetPadding = (O.getBytesInAddress() > 4 ? 16 : 8);
uint32_t TypePadding = 24;
outs() << left_justify("OFFSET", OffsetPadding) << " "
<< left_justify("TYPE", TypePadding) << " "
@@ -2190,7 +2233,7 @@ void objdump::printRelocations(const ObjectFile *Obj) {
continue;
Reloc.getTypeName(RelocName);
if (Error E = getRelocationValueString(Reloc, ValueStr))
- reportError(std::move(E), Obj->getFileName());
+ reportUniqueWarning(std::move(E));
outs() << format(Fmt.data(), Address) << " "
<< left_justify(RelocName, TypePadding) << " " << ValueStr
@@ -2200,43 +2243,6 @@ void objdump::printRelocations(const ObjectFile *Obj) {
}
}
-void objdump::printDynamicRelocations(const ObjectFile *Obj) {
- // For the moment, this option is for ELF only
- if (!Obj->isELF())
- return;
-
- const auto *Elf = dyn_cast<ELFObjectFileBase>(Obj);
- if (!Elf || !any_of(Elf->sections(), [](const ELFSectionRef Sec) {
- return Sec.getType() == ELF::SHT_DYNAMIC;
- })) {
- reportError(Obj->getFileName(), "not a dynamic object");
- return;
- }
-
- std::vector<SectionRef> DynRelSec = Obj->dynamic_relocation_sections();
- if (DynRelSec.empty())
- return;
-
- outs() << "\nDYNAMIC RELOCATION RECORDS\n";
- const uint32_t OffsetPadding = (Obj->getBytesInAddress() > 4 ? 16 : 8);
- const uint32_t TypePadding = 24;
- outs() << left_justify("OFFSET", OffsetPadding) << ' '
- << left_justify("TYPE", TypePadding) << " VALUE\n";
-
- StringRef Fmt = Obj->getBytesInAddress() > 4 ? "%016" PRIx64 : "%08" PRIx64;
- for (const SectionRef &Section : DynRelSec)
- for (const RelocationRef &Reloc : Section.relocations()) {
- uint64_t Address = Reloc.getOffset();
- SmallString<32> RelocName;
- SmallString<32> ValueStr;
- Reloc.getTypeName(RelocName);
- if (Error E = getRelocationValueString(Reloc, ValueStr))
- reportError(std::move(E), Obj->getFileName());
- outs() << format(Fmt.data(), Address) << ' '
- << left_justify(RelocName, TypePadding) << ' ' << ValueStr << '\n';
- }
-}
-
// Returns true if we need to show LMA column when dumping section headers. We
// show it only when the platform is ELF and either we have at least one section
// whose VMA and LMA are different and/or when --show-lma flag is used.
@@ -2355,8 +2361,8 @@ void objdump::printSectionContents(const ObjectFile *Obj) {
}
}
-void objdump::printSymbolTable(const ObjectFile &O, StringRef ArchiveName,
- StringRef ArchitectureName, bool DumpDynamic) {
+void Dumper::printSymbolTable(StringRef ArchiveName, StringRef ArchitectureName,
+ bool DumpDynamic) {
if (O.isCOFF() && !DumpDynamic) {
outs() << "\nSYMBOL TABLE:\n";
printCOFFSymbolTable(cast<const COFFObjectFile>(O));
@@ -2368,8 +2374,7 @@ void objdump::printSymbolTable(const ObjectFile &O, StringRef ArchiveName,
if (!DumpDynamic) {
outs() << "\nSYMBOL TABLE:\n";
for (auto I = O.symbol_begin(); I != O.symbol_end(); ++I)
- printSymbol(O, *I, {}, FileName, ArchiveName, ArchitectureName,
- DumpDynamic);
+ printSymbol(*I, {}, FileName, ArchiveName, ArchitectureName, DumpDynamic);
return;
}
@@ -2391,17 +2396,21 @@ void objdump::printSymbolTable(const ObjectFile &O, StringRef ArchiveName,
(void)!SymbolVersionsOrErr;
}
for (auto &Sym : Symbols)
- printSymbol(O, Sym, *SymbolVersionsOrErr, FileName, ArchiveName,
+ printSymbol(Sym, *SymbolVersionsOrErr, FileName, ArchiveName,
ArchitectureName, DumpDynamic);
}
-void objdump::printSymbol(const ObjectFile &O, const SymbolRef &Symbol,
- ArrayRef<VersionEntry> SymbolVersions,
- StringRef FileName, StringRef ArchiveName,
- StringRef ArchitectureName, bool DumpDynamic) {
+void Dumper::printSymbol(const SymbolRef &Symbol,
+ ArrayRef<VersionEntry> SymbolVersions,
+ StringRef FileName, StringRef ArchiveName,
+ StringRef ArchitectureName, bool DumpDynamic) {
const MachOObjectFile *MachO = dyn_cast<const MachOObjectFile>(&O);
- uint64_t Address = unwrapOrError(Symbol.getAddress(), FileName, ArchiveName,
- ArchitectureName);
+ Expected<uint64_t> AddrOrErr = Symbol.getAddress();
+ if (!AddrOrErr) {
+ reportUniqueWarning(AddrOrErr.takeError());
+ return;
+ }
+ uint64_t Address = *AddrOrErr;
if ((Address < StartAddress) || (Address > StopAddress))
return;
SymbolRef::Type Type =
@@ -2509,10 +2518,8 @@ void objdump::printSymbol(const ObjectFile &O, const SymbolRef &Symbol,
if (NameOrErr) {
outs() << " (csect:";
- std::string SymName(NameOrErr.get());
-
- if (Demangle)
- SymName = demangle(SymName);
+ std::string SymName =
+ Demangle ? demangle(*NameOrErr) : NameOrErr->str();
if (SymbolDescription)
SymName = getXCOFFSymbolDescription(createSymbolInfo(O, *SymRef),
@@ -2566,10 +2573,7 @@ void objdump::printSymbol(const ObjectFile &O, const SymbolRef &Symbol,
outs() << " .hidden";
}
- std::string SymName(Name);
- if (Demangle)
- SymName = demangle(SymName);
-
+ std::string SymName = Demangle ? demangle(Name) : Name.str();
if (O.isXCOFF() && SymbolDescription)
SymName = getXCOFFSymbolDescription(createSymbolInfo(O, Symbol), SymName);
@@ -2672,24 +2676,8 @@ static void printFaultMaps(const ObjectFile *Obj) {
outs() << FMP;
}
-static void printPrivateFileHeaders(const ObjectFile *O, bool OnlyFirst) {
- if (O->isELF()) {
- printELFFileHeader(O);
- printELFDynamicSection(O);
- printELFSymbolVersionInfo(O);
- return;
- }
- if (O->isCOFF())
- return printCOFFFileHeader(cast<object::COFFObjectFile>(*O));
- if (O->isWasm())
- return printWasmFileHeader(O);
- if (O->isMachO()) {
- printMachOFileHeader(O);
- if (!OnlyFirst)
- printMachOLoadCommands(O);
- return;
- }
- reportError(O->getFileName(), "Invalid/Unsupported object file format");
+void Dumper::printPrivateHeaders(bool) {
+ reportError(O.getFileName(), "Invalid/Unsupported object file format");
}
static void printFileHeaders(const ObjectFile *O) {
@@ -2792,6 +2780,14 @@ static void checkForInvalidStartStopAddress(ObjectFile *Obj,
static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
const Archive::Child *C = nullptr) {
+ Expected<std::unique_ptr<Dumper>> DumperOrErr = createDumper(*O);
+ if (!DumperOrErr) {
+ reportError(DumperOrErr.takeError(), O->getFileName(),
+ A ? A->getFileName() : "");
+ return;
+ }
+ Dumper &D = **DumperOrErr;
+
// Avoid other output when using a raw option.
if (!RawClangAST) {
outs() << '\n';
@@ -2805,6 +2801,9 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
if (HasStartAddressFlag || HasStopAddressFlag)
checkForInvalidStartStopAddress(O, StartAddress, StopAddress);
+ // TODO: Change print* free functions to Dumper member functions to utilitize
+ // stateful functions like reportUniqueWarning.
+
// Note: the order here matches GNU objdump for compatability.
StringRef ArchiveName = A ? A->getFileName() : "";
if (ArchiveHeaders && !MachOOpt && C)
@@ -2812,14 +2811,14 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
if (FileHeaders)
printFileHeaders(O);
if (PrivateHeaders || FirstPrivateHeader)
- printPrivateFileHeaders(O, FirstPrivateHeader);
+ D.printPrivateHeaders(FirstPrivateHeader);
if (SectionHeaders)
printSectionHeaders(*O);
if (SymbolTable)
- printSymbolTable(*O, ArchiveName);
+ D.printSymbolTable(ArchiveName);
if (DynamicSymbolTable)
- printSymbolTable(*O, ArchiveName, /*ArchitectureName=*/"",
- /*DumpDynamic=*/true);
+ D.printSymbolTable(ArchiveName, /*ArchitectureName=*/"",
+ /*DumpDynamic=*/true);
if (DwarfDumpType != DIDT_Null) {
std::unique_ptr<DIContext> DICtx = DWARFContext::create(*O);
// Dump the complete DWARF structure.
@@ -2828,9 +2827,9 @@ static void dumpObject(ObjectFile *O, const Archive *A = nullptr,
DICtx->dump(outs(), DumpOpts);
}
if (Relocations && !Disassemble)
- printRelocations(O);
+ D.printRelocations();
if (DynamicRelocations)
- printDynamicRelocations(O);
+ D.printDynamicRelocations();
if (SectionContents)
printSectionContents(O);
if (Disassemble)
@@ -2941,13 +2940,11 @@ static void parseIntArg(const llvm::opt::InputArgList &InputArgs, int ID,
static object::BuildID parseBuildIDArg(const opt::Arg *A) {
StringRef V(A->getValue());
- std::string Bytes;
- if (!tryGetFromHex(V, Bytes))
+ object::BuildID BID = parseBuildID(V);
+ if (BID.empty())
reportCmdLineError(A->getSpelling() + ": expected a build ID, but got '" +
V + "'");
- ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
- Bytes.size());
- return object::BuildID(BuildID.begin(), BuildID.end());
+ return BID;
}
void objdump::invalidArgValue(const opt::Arg *A) {
@@ -3027,6 +3024,7 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) {
Disassemble = InputArgs.hasArg(OBJDUMP_disassemble);
DisassembleAll = InputArgs.hasArg(OBJDUMP_disassemble_all);
SymbolDescription = InputArgs.hasArg(OBJDUMP_symbol_description);
+ TracebackTable = InputArgs.hasArg(OBJDUMP_traceback_table);
DisassembleSymbols =
commaSeparatedValues(InputArgs, OBJDUMP_disassemble_symbols_EQ);
DisassembleZeroes = InputArgs.hasArg(OBJDUMP_disassemble_zeroes);
@@ -3198,9 +3196,7 @@ int main(int argc, char **argv) {
// Initialize debuginfod.
const bool ShouldUseDebuginfodByDefault =
- InputArgs.hasArg(OBJDUMP_build_id) ||
- (HTTPClient::isAvailable() &&
- !ExitOnErr(getDefaultDebuginfodUrls()).empty());
+ InputArgs.hasArg(OBJDUMP_build_id) || canUseDebuginfod();
std::vector<std::string> DebugFileDirectories =
InputArgs.getAllArgValues(OBJDUMP_debug_file_directory);
if (InputArgs.hasFlag(OBJDUMP_debuginfod, OBJDUMP_no_debuginfod,
@@ -3229,7 +3225,7 @@ int main(int argc, char **argv) {
ArchiveHeaders = FileHeaders = PrivateHeaders = Relocations =
SectionHeaders = SymbolTable = true;
- if (DisassembleAll || PrintSource || PrintLines ||
+ if (DisassembleAll || PrintSource || PrintLines || TracebackTable ||
!DisassembleSymbols.empty())
Disassemble = true;
diff --git a/llvm/tools/llvm-objdump/llvm-objdump.h b/llvm/tools/llvm-objdump/llvm-objdump.h
index efb445195259..b3136f0374d2 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.h
+++ b/llvm/tools/llvm-objdump/llvm-objdump.h
@@ -12,9 +12,13 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/DebugInfo/DIContext.h"
#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Object/Archive.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/FormattedStream.h"
+#include <memory>
namespace llvm {
class StringRef;
@@ -27,6 +31,12 @@ class Arg;
namespace object {
class RelocationRef;
struct VersionEntry;
+
+class COFFObjectFile;
+class ELFObjectFileBase;
+class MachOObjectFile;
+class WasmObjectFile;
+class XCOFFObjectFile;
} // namespace object
namespace objdump {
@@ -55,64 +65,41 @@ extern bool SectionHeaders;
extern bool SectionContents;
extern bool ShowRawInsn;
extern bool SymbolDescription;
+extern bool TracebackTable;
extern bool SymbolTable;
extern std::string TripleName;
extern bool UnwindInfo;
extern StringSet<> FoundSectionSet;
-typedef std::function<bool(llvm::object::SectionRef const &)> FilterPredicate;
+class Dumper {
+ const object::ObjectFile &O;
+ StringSet<> Warnings;
-/// A filtered iterator for SectionRefs that skips sections based on some given
-/// predicate.
-class SectionFilterIterator {
public:
- SectionFilterIterator(FilterPredicate P,
- llvm::object::section_iterator const &I,
- llvm::object::section_iterator const &E)
- : Predicate(std::move(P)), Iterator(I), End(E) {
- ScanPredicate();
- }
- const llvm::object::SectionRef &operator*() const { return *Iterator; }
- SectionFilterIterator &operator++() {
- ++Iterator;
- ScanPredicate();
- return *this;
- }
- bool operator!=(SectionFilterIterator const &Other) const {
- return Iterator != Other.Iterator;
- }
-
-private:
- void ScanPredicate() {
- while (Iterator != End && !Predicate(*Iterator)) {
- ++Iterator;
- }
- }
- FilterPredicate Predicate;
- llvm::object::section_iterator Iterator;
- llvm::object::section_iterator End;
+ Dumper(const object::ObjectFile &O) : O(O) {}
+ virtual ~Dumper() {}
+
+ void reportUniqueWarning(Error Err);
+ void reportUniqueWarning(const Twine &Msg);
+
+ virtual void printPrivateHeaders(bool MachOOnlyFirst);
+ virtual void printDynamicRelocations() {}
+ void printSymbolTable(StringRef ArchiveName,
+ StringRef ArchitectureName = StringRef(),
+ bool DumpDynamic = false);
+ void printSymbol(const object::SymbolRef &Symbol,
+ ArrayRef<object::VersionEntry> SymbolVersions,
+ StringRef FileName, StringRef ArchiveName,
+ StringRef ArchitectureName, bool DumpDynamic);
+ void printRelocations();
};
-/// Creates an iterator range of SectionFilterIterators for a given Object and
-/// predicate.
-class SectionFilter {
-public:
- SectionFilter(FilterPredicate P, llvm::object::ObjectFile const &O)
- : Predicate(std::move(P)), Object(O) {}
- SectionFilterIterator begin() {
- return SectionFilterIterator(Predicate, Object.section_begin(),
- Object.section_end());
- }
- SectionFilterIterator end() {
- return SectionFilterIterator(Predicate, Object.section_end(),
- Object.section_end());
- }
-
-private:
- FilterPredicate Predicate;
- llvm::object::ObjectFile const &Object;
-};
+std::unique_ptr<Dumper> createCOFFDumper(const object::COFFObjectFile &Obj);
+std::unique_ptr<Dumper> createELFDumper(const object::ELFObjectFileBase &Obj);
+std::unique_ptr<Dumper> createMachODumper(const object::MachOObjectFile &Obj);
+std::unique_ptr<Dumper> createWasmDumper(const object::WasmObjectFile &Obj);
+std::unique_ptr<Dumper> createXCOFFDumper(const object::XCOFFObjectFile &Obj);
// Various helper functions.
@@ -122,21 +109,12 @@ private:
/// Idx is an optional output parameter that keeps track of which section index
/// this is. This may be different than the actual section number, as some
/// sections may be filtered (e.g. symbol tables).
-SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O,
- uint64_t *Idx = nullptr);
+object::SectionFilter ToolSectionFilter(const llvm::object::ObjectFile &O,
+ uint64_t *Idx = nullptr);
bool isRelocAddressLess(object::RelocationRef A, object::RelocationRef B);
-void printRelocations(const object::ObjectFile *O);
-void printDynamicRelocations(const object::ObjectFile *O);
void printSectionHeaders(object::ObjectFile &O);
void printSectionContents(const object::ObjectFile *O);
-void printSymbolTable(const object::ObjectFile &O, StringRef ArchiveName,
- StringRef ArchitectureName = StringRef(),
- bool DumpDynamic = false);
-void printSymbol(const object::ObjectFile &O, const object::SymbolRef &Symbol,
- ArrayRef<object::VersionEntry> SymbolVersions,
- StringRef FileName, StringRef ArchiveName,
- StringRef ArchitectureName, bool DumpDynamic);
[[noreturn]] void reportError(StringRef File, const Twine &Message);
[[noreturn]] void reportError(Error E, StringRef FileName,
StringRef ArchiveName = "",
@@ -156,6 +134,10 @@ std::string getFileNameForError(const object::Archive::Child &C,
unsigned Index);
SymbolInfoTy createSymbolInfo(const object::ObjectFile &Obj,
const object::SymbolRef &Symbol);
+unsigned getInstStartColumn(const MCSubtargetInfo &STI);
+void printRawData(llvm::ArrayRef<uint8_t> Bytes, uint64_t Address,
+ llvm::formatted_raw_ostream &OS,
+ llvm::MCSubtargetInfo const &STI);
} // namespace objdump
} // end namespace llvm
diff --git a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
index 4b604206fc98..447a9cb6b359 100644
--- a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
+++ b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp
@@ -15,6 +15,7 @@
#include "llvm-pdbutil.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
diff --git a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index 8e17284871a9..96c3d49072b6 100644
--- a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -8,6 +8,7 @@
#include "MinimalSymbolDumper.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/Formatters.h"
@@ -210,6 +211,8 @@ static std::string formatSourceLanguage(SourceLanguage Lang) {
RETURN_CASE(SourceLanguage, D, "d");
RETURN_CASE(SourceLanguage, Swift, "swift");
RETURN_CASE(SourceLanguage, Rust, "rust");
+ RETURN_CASE(SourceLanguage, ObjC, "objc");
+ RETURN_CASE(SourceLanguage, ObjCpp, "objc++");
}
return formatUnknownEnum(Lang);
}
diff --git a/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp b/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
index be7e487673fb..aaa430a9572e 100644
--- a/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/MinimalTypeDumper.cpp
@@ -11,6 +11,7 @@
#include "TypeReferenceTracker.h"
#include "llvm-pdbutil.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
diff --git a/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp b/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
index 591bd4f93702..b347cfdfc392 100644
--- a/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/PrettyCompilandDumper.cpp
@@ -11,6 +11,7 @@
#include "PrettyFunctionDumper.h"
#include "llvm-pdbutil.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
#include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
#include "llvm/DebugInfo/PDB/IPDBSession.h"
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index c8e5e6d1ad68..da10ddcc58c6 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -23,6 +23,7 @@
#include "llvm/ProfileData/RawMemProfReader.h"
#include "llvm/ProfileData/SampleProfReader.h"
#include "llvm/ProfileData/SampleProfWriter.h"
+#include "llvm/Support/BalancedPartitioning.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Discriminator.h"
#include "llvm/Support/Errc.h"
@@ -30,11 +31,13 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/LLVMDriver.h"
#include "llvm/Support/MD5.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/Threading.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -51,7 +54,7 @@ const std::string DuplicateNameStr = "----";
enum ProfileFormat {
PF_None = 0,
PF_Text,
- PF_Compact_Binary,
+ PF_Compact_Binary, // Deprecated
PF_Ext_Binary,
PF_GCC,
PF_Binary
@@ -214,9 +217,10 @@ struct WriterContext {
SmallSet<instrprof_error, 4> &WriterErrorCodes;
WriterContext(bool IsSparse, std::mutex &ErrLock,
- SmallSet<instrprof_error, 4> &WriterErrorCodes)
- : Writer(IsSparse), ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {
- }
+ SmallSet<instrprof_error, 4> &WriterErrorCodes,
+ uint64_t ReservoirSize = 0, uint64_t MaxTraceLength = 0)
+ : Writer(IsSparse, ReservoirSize, MaxTraceLength), ErrLock(ErrLock),
+ WriterErrorCodes(WriterErrorCodes) {}
};
/// Computer the overlap b/w profile BaseFilename and TestFileName,
@@ -226,12 +230,14 @@ static void overlapInput(const std::string &BaseFilename,
OverlapStats &Overlap,
const OverlapFuncFilters &FuncFilter,
raw_fd_ostream &OS, bool IsCS) {
- auto ReaderOrErr = InstrProfReader::create(TestFilename);
+ auto FS = vfs::getRealFileSystem();
+ auto ReaderOrErr = InstrProfReader::create(TestFilename, *FS);
if (Error E = ReaderOrErr.takeError()) {
// Skip the empty profiles by returning sliently.
- instrprof_error IPE = InstrProfError::take(std::move(E));
- if (IPE != instrprof_error::empty_raw_profile)
- WC->Errors.emplace_back(make_error<InstrProfError>(IPE), TestFilename);
+ auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
+ if (ErrorCode != instrprof_error::empty_raw_profile)
+ WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
+ TestFilename);
return;
}
@@ -276,8 +282,9 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
}
auto MemProfError = [&](Error E) {
- instrprof_error IPE = InstrProfError::take(std::move(E));
- WC->Errors.emplace_back(make_error<InstrProfError>(IPE), Filename);
+ auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
+ WC->Errors.emplace_back(make_error<InstrProfError>(ErrorCode, Msg),
+ Filename);
};
// Add the frame mappings into the writer context.
@@ -298,12 +305,14 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
return;
}
- auto ReaderOrErr = InstrProfReader::create(Input.Filename, Correlator);
+ auto FS = vfs::getRealFileSystem();
+ auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator);
if (Error E = ReaderOrErr.takeError()) {
- // Skip the empty profiles by returning sliently.
- instrprof_error IPE = InstrProfError::take(std::move(E));
- if (IPE != instrprof_error::empty_raw_profile)
- WC->Errors.emplace_back(make_error<InstrProfError>(IPE), Filename);
+ // Skip the empty profiles by returning silently.
+ auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
+ if (ErrCode != instrprof_error::empty_raw_profile)
+ WC->Errors.emplace_back(make_error<InstrProfError>(ErrCode, Msg),
+ Filename);
return;
}
@@ -330,14 +339,20 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
}
Reported = true;
// Only show hint the first time an error occurs.
- instrprof_error IPE = InstrProfError::take(std::move(E));
+ auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
- bool firstTime = WC->WriterErrorCodes.insert(IPE).second;
- handleMergeWriterError(make_error<InstrProfError>(IPE), Input.Filename,
- FuncName, firstTime);
+ bool firstTime = WC->WriterErrorCodes.insert(ErrCode).second;
+ handleMergeWriterError(make_error<InstrProfError>(ErrCode, Msg),
+ Input.Filename, FuncName, firstTime);
});
}
+ if (Reader->hasTemporalProfile()) {
+ auto &Traces = Reader->getTemporalProfTraces(Input.Weight);
+ if (!Traces.empty())
+ WC->Writer.addTemporalProfileTraces(
+ Traces, Reader->getTemporalProfTraceStreamSize());
+ }
if (Reader->hasError()) {
if (Error E = Reader->getError())
WC->Errors.emplace_back(std::move(E), Filename);
@@ -359,11 +374,11 @@ static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
exitWithError(std::move(E));
Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer), [&](Error E) {
- instrprof_error IPE = InstrProfError::take(std::move(E));
+ auto [ErrorCode, Msg] = InstrProfError::take(std::move(E));
std::unique_lock<std::mutex> ErrGuard{Dst->ErrLock};
- bool firstTime = Dst->WriterErrorCodes.insert(IPE).second;
+ bool firstTime = Dst->WriterErrorCodes.insert(ErrorCode).second;
if (firstTime)
- warn(toString(make_error<InstrProfError>(IPE)));
+ warn(toString(make_error<InstrProfError>(ErrorCode, Msg)));
});
}
@@ -388,15 +403,17 @@ static void writeInstrProfile(StringRef OutputFilename,
}
}
-static void mergeInstrProfile(const WeightedFileVector &Inputs,
- StringRef DebugInfoFilename,
- SymbolRemapper *Remapper,
- StringRef OutputFilename,
- ProfileFormat OutputFormat, bool OutputSparse,
- unsigned NumThreads, FailureMode FailMode,
- const StringRef ProfiledBinary) {
- if (OutputFormat != PF_Binary && OutputFormat != PF_Compact_Binary &&
- OutputFormat != PF_Ext_Binary && OutputFormat != PF_Text)
+static void
+mergeInstrProfile(const WeightedFileVector &Inputs, StringRef DebugInfoFilename,
+ SymbolRemapper *Remapper, StringRef OutputFilename,
+ ProfileFormat OutputFormat, uint64_t TraceReservoirSize,
+ uint64_t MaxTraceLength, bool OutputSparse,
+ unsigned NumThreads, FailureMode FailMode,
+ const StringRef ProfiledBinary) {
+ if (OutputFormat == PF_Compact_Binary)
+ exitWithError("Compact Binary is deprecated");
+ if (OutputFormat != PF_Binary && OutputFormat != PF_Ext_Binary &&
+ OutputFormat != PF_Text)
exitWithError("unknown format is specified");
std::unique_ptr<InstrProfCorrelator> Correlator;
@@ -420,7 +437,8 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
for (unsigned I = 0; I < NumThreads; ++I)
Contexts.emplace_back(std::make_unique<WriterContext>(
- OutputSparse, ErrorLock, WriterErrorCodes));
+ OutputSparse, ErrorLock, WriterErrorCodes, TraceReservoirSize,
+ MaxTraceLength));
if (NumThreads == 1) {
for (const auto &Input : Inputs)
@@ -631,7 +649,7 @@ adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
}
}
- if (StaticFuncMap.find(NewName) == StaticFuncMap.end()) {
+ if (!StaticFuncMap.contains(NewName)) {
StaticFuncMap[NewName] = Name;
} else {
StaticFuncMap[NewName] = DuplicateNameStr;
@@ -838,8 +856,9 @@ static void supplementInstrProfile(
// Read sample profile.
LLVMContext Context;
+ auto FS = vfs::getRealFileSystem();
auto ReaderOrErr = sampleprof::SampleProfileReader::create(
- SampleFilename.str(), Context, FSDiscriminatorPassOption);
+ SampleFilename.str(), Context, *FS, FSDiscriminatorPassOption);
if (std::error_code EC = ReaderOrErr.getError())
exitWithErrorCode(EC, SampleFilename);
auto Reader = std::move(ReaderOrErr.get());
@@ -896,7 +915,7 @@ remapSamples(const sampleprof::FunctionSamples &Samples,
static sampleprof::SampleProfileFormat FormatMap[] = {
sampleprof::SPF_None,
sampleprof::SPF_Text,
- sampleprof::SPF_Compact_Binary,
+ sampleprof::SPF_None,
sampleprof::SPF_Ext_Binary,
sampleprof::SPF_GCC,
sampleprof::SPF_Binary};
@@ -963,10 +982,11 @@ static void
mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
StringRef OutputFilename, ProfileFormat OutputFormat,
StringRef ProfileSymbolListFile, bool CompressAllSections,
- bool UseMD5, bool GenPartialProfile, bool GenCSNestedProfile,
+ bool UseMD5, bool GenPartialProfile,
+ SampleProfileLayout ProfileLayout,
bool SampleMergeColdContext, bool SampleTrimColdContext,
bool SampleColdContextFrameDepth, FailureMode FailMode,
- bool DropProfileSymbolList) {
+ bool DropProfileSymbolList, size_t OutputSizeLimit) {
using namespace sampleprof;
SampleProfileMap ProfileMap;
SmallVector<std::unique_ptr<sampleprof::SampleProfileReader>, 5> Readers;
@@ -975,7 +995,8 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
std::optional<bool> ProfileIsProbeBased;
std::optional<bool> ProfileIsCS;
for (const auto &Input : Inputs) {
- auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context,
+ auto FS = vfs::getRealFileSystem();
+ auto ReaderOrErr = SampleProfileReader::create(Input.Filename, Context, *FS,
FSDiscriminatorPassOption);
if (std::error_code EC = ReaderOrErr.getError()) {
warnOrExitGivenError(FailMode, EC, Input.Filename);
@@ -1042,9 +1063,12 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
SampleMergeColdContext, SampleColdContextFrameDepth, false);
}
- if (ProfileIsCS && GenCSNestedProfile) {
- CSProfileConverter CSConverter(ProfileMap);
- CSConverter.convertProfiles();
+ if (ProfileLayout == llvm::sampleprof::SPL_Flat) {
+ ProfileConverter::flattenProfile(ProfileMap, FunctionSamples::ProfileIsCS);
+ ProfileIsCS = FunctionSamples::ProfileIsCS = false;
+ } else if (ProfileIsCS && ProfileLayout == llvm::sampleprof::SPL_Nest) {
+ ProfileConverter CSConverter(ProfileMap);
+ CSConverter.convertCSProfiles();
ProfileIsCS = FunctionSamples::ProfileIsCS = false;
}
@@ -1059,7 +1083,10 @@ mergeSampleProfile(const WeightedFileVector &Inputs, SymbolRemapper *Remapper,
auto Buffer = getInputFileBuf(ProfileSymbolListFile);
handleExtBinaryWriter(*Writer, OutputFormat, Buffer.get(), WriterList,
CompressAllSections, UseMD5, GenPartialProfile);
- if (std::error_code EC = Writer->write(ProfileMap))
+
+ // If OutputSizeLimit is 0 (default), it is the same as write().
+ if (std::error_code EC =
+ Writer->writeWithSizeLimit(ProfileMap, OutputSizeLimit))
exitWithErrorCode(std::move(EC));
}
@@ -1156,12 +1183,11 @@ static int merge_main(int argc, const char *argv[]) {
cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
clEnumVal(sample, "Sample profile")));
cl::opt<ProfileFormat> OutputFormat(
- cl::desc("Format of output profile"), cl::init(PF_Binary),
+ cl::desc("Format of output profile"), cl::init(PF_Ext_Binary),
cl::values(
- clEnumValN(PF_Binary, "binary", "Binary encoding (default)"),
- clEnumValN(PF_Compact_Binary, "compbinary",
- "Compact binary encoding"),
- clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding"),
+ clEnumValN(PF_Binary, "binary", "Binary encoding"),
+ clEnumValN(PF_Ext_Binary, "extbinary", "Extensible binary encoding "
+ "(default)"),
clEnumValN(PF_Text, "text", "Text encoding"),
clEnumValN(PF_GCC, "gcc",
"GCC encoding (only meaningful for -sample)")));
@@ -1202,6 +1228,11 @@ static int merge_main(int argc, const char *argv[]) {
"sample-frame-depth-for-cold-context", cl::init(1),
cl::desc("Keep the last K frames while merging cold profile. 1 means the "
"context-less base profile"));
+ cl::opt<size_t> OutputSizeLimit(
+ "output-size-limit", cl::init(0), cl::Hidden,
+ cl::desc("Trim cold functions until profile size is below specified "
+ "limit in bytes. This uses a heursitic and functions may be "
+ "excessively trimmed"));
cl::opt<bool> GenPartialProfile(
"gen-partial-profile", cl::init(false), cl::Hidden,
cl::desc("Generate a partial profile (only meaningful for -extbinary)"));
@@ -1227,9 +1258,15 @@ static int merge_main(int argc, const char *argv[]) {
"instr-prof-cold-threshold", cl::init(0), cl::Hidden,
cl::desc("User specified cold threshold for instr profile which will "
"override the cold threshold got from profile summary. "));
- cl::opt<bool> GenCSNestedProfile(
- "gen-cs-nested-profile", cl::Hidden, cl::init(false),
- cl::desc("Generate nested function profiles for CSSPGO"));
+ cl::opt<SampleProfileLayout> ProfileLayout(
+ "convert-sample-profile-layout",
+ cl::desc("Convert the generated profile to a profile with a new layout"),
+ cl::init(SPL_None),
+ cl::values(
+ clEnumValN(SPL_Nest, "nest",
+ "Nested profile, the input should be CS flat profile"),
+ clEnumValN(SPL_Flat, "flat",
+ "Profile with nested inlinee flatten out")));
cl::opt<std::string> DebugInfoFilename(
"debug-info", cl::init(""),
cl::desc("Use the provided debug info to correlate the raw profile."));
@@ -1240,6 +1277,17 @@ static int merge_main(int argc, const char *argv[]) {
"drop-profile-symbol-list", cl::init(false), cl::Hidden,
cl::desc("Drop the profile symbol list when merging AutoFDO profiles "
"(only meaningful for -sample)"));
+ // WARNING: This reservoir size value is propagated to any input indexed
+ // profiles for simplicity. Changing this value between invocations could
+ // result in sample bias.
+ cl::opt<uint64_t> TemporalProfTraceReservoirSize(
+ "temporal-profile-trace-reservoir-size", cl::init(100),
+ cl::desc("The maximum number of stored temporal profile traces (default: "
+ "100)"));
+ cl::opt<uint64_t> TemporalProfMaxTraceLength(
+ "temporal-profile-max-trace-length", cl::init(10000),
+ cl::desc("The maximum length of a single temporal profile trace "
+ "(default: 10000)"));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
@@ -1281,14 +1329,17 @@ static int merge_main(int argc, const char *argv[]) {
if (ProfileKind == instr)
mergeInstrProfile(WeightedInputs, DebugInfoFilename, Remapper.get(),
- OutputFilename, OutputFormat, OutputSparse, NumThreads,
+ OutputFilename, OutputFormat,
+ TemporalProfTraceReservoirSize,
+ TemporalProfMaxTraceLength, OutputSparse, NumThreads,
FailureMode, ProfiledBinary);
else
- mergeSampleProfile(
- WeightedInputs, Remapper.get(), OutputFilename, OutputFormat,
- ProfileSymbolListFile, CompressAllSections, UseMD5, GenPartialProfile,
- GenCSNestedProfile, SampleMergeColdContext, SampleTrimColdContext,
- SampleColdContextFrameDepth, FailureMode, DropProfileSymbolList);
+ mergeSampleProfile(WeightedInputs, Remapper.get(), OutputFilename,
+ OutputFormat, ProfileSymbolListFile, CompressAllSections,
+ UseMD5, GenPartialProfile, ProfileLayout,
+ SampleMergeColdContext, SampleTrimColdContext,
+ SampleColdContextFrameDepth, FailureMode,
+ DropProfileSymbolList, OutputSizeLimit);
return 0;
}
@@ -2189,12 +2240,13 @@ std::error_code SampleOverlapAggregator::loadProfiles() {
using namespace sampleprof;
LLVMContext Context;
- auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context,
+ auto FS = vfs::getRealFileSystem();
+ auto BaseReaderOrErr = SampleProfileReader::create(BaseFilename, Context, *FS,
FSDiscriminatorPassOption);
if (std::error_code EC = BaseReaderOrErr.getError())
exitWithErrorCode(EC, BaseFilename);
- auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context,
+ auto TestReaderOrErr = SampleProfileReader::create(TestFilename, Context, *FS,
FSDiscriminatorPassOption);
if (std::error_code EC = TestReaderOrErr.getError())
exitWithErrorCode(EC, TestFilename);
@@ -2358,21 +2410,20 @@ static void showValueSitesStats(raw_fd_ostream &OS, uint32_t VK,
}
}
-static int showInstrProfile(const std::string &Filename, bool ShowCounts,
- uint32_t TopN, bool ShowIndirectCallTargets,
- bool ShowMemOPSizes, bool ShowDetailedSummary,
- std::vector<uint32_t> DetailedSummaryCutoffs,
- bool ShowAllFunctions, bool ShowCS,
- uint64_t ValueCutoff, bool OnlyListBelow,
- const std::string &ShowFunction, bool TextFormat,
- bool ShowBinaryIds, bool ShowCovered,
- bool ShowProfileVersion, ShowFormat SFormat,
- raw_fd_ostream &OS) {
+static int showInstrProfile(
+ const std::string &Filename, bool ShowCounts, uint32_t TopN,
+ bool ShowIndirectCallTargets, bool ShowMemOPSizes, bool ShowDetailedSummary,
+ std::vector<uint32_t> DetailedSummaryCutoffs, bool ShowAllFunctions,
+ bool ShowCS, uint64_t ValueCutoff, bool OnlyListBelow,
+ const std::string &ShowFunction, bool TextFormat, bool ShowBinaryIds,
+ bool ShowCovered, bool ShowProfileVersion, bool ShowTemporalProfTraces,
+ ShowFormat SFormat, raw_fd_ostream &OS) {
if (SFormat == ShowFormat::Json)
exitWithError("JSON output is not supported for instr profiles");
if (SFormat == ShowFormat::Yaml)
exitWithError("YAML output is not supported for instr profiles");
- auto ReaderOrErr = InstrProfReader::create(Filename);
+ auto FS = vfs::getRealFileSystem();
+ auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
std::vector<uint32_t> Cutoffs = std::move(DetailedSummaryCutoffs);
if (ShowDetailedSummary && Cutoffs.empty()) {
Cutoffs = ProfileSummaryBuilder::DefaultCutoffs;
@@ -2583,6 +2634,19 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
if (ShowProfileVersion)
OS << "Profile version: " << Reader->getVersion() << "\n";
+
+ if (ShowTemporalProfTraces) {
+ auto &Traces = Reader->getTemporalProfTraces();
+ OS << "Temporal Profile Traces (samples=" << Traces.size()
+ << " seen=" << Reader->getTemporalProfTraceStreamSize() << "):\n";
+ for (unsigned i = 0; i < Traces.size(); i++) {
+ OS << " Temporal Profile Trace " << i << " (weight=" << Traces[i].Weight
+ << " count=" << Traces[i].FunctionNameRefs.size() << "):\n";
+ for (auto &NameRef : Traces[i].FunctionNameRefs)
+ OS << " " << Reader->getSymtab().getFuncName(NameRef) << "\n";
+ }
+ }
+
return 0;
}
@@ -2742,8 +2806,9 @@ static int showSampleProfile(const std::string &Filename, bool ShowCounts,
exitWithError("YAML output is not supported for sample profiles");
using namespace sampleprof;
LLVMContext Context;
- auto ReaderOrErr =
- SampleProfileReader::create(Filename, Context, FSDiscriminatorPassOption);
+ auto FS = vfs::getRealFileSystem();
+ auto ReaderOrErr = SampleProfileReader::create(Filename, Context, *FS,
+ FSDiscriminatorPassOption);
if (std::error_code EC = ReaderOrErr.getError())
exitWithErrorCode(EC, Filename);
@@ -2917,6 +2982,9 @@ static int show_main(int argc, const char *argv[]) {
"extbinary format"));
cl::opt<bool> ShowBinaryIds("binary-ids", cl::init(false),
cl::desc("Show binary ids in the profile. "));
+ cl::opt<bool> ShowTemporalProfTraces(
+ "temporal-profile-traces",
+ cl::desc("Show temporal profile traces in the profile."));
cl::opt<std::string> DebugInfoFilename(
"debug-info", cl::init(""),
cl::desc("Read and extract profile metadata from debug info and show "
@@ -2961,8 +3029,8 @@ static int show_main(int argc, const char *argv[]) {
Filename, ShowCounts, TopNFunctions, ShowIndirectCallTargets,
ShowMemOPSizes, ShowDetailedSummary, DetailedSummaryCutoffs,
ShowAllFunctions, ShowCS, ValueCutoff, OnlyListBelow, ShowFunction,
- TextFormat, ShowBinaryIds, ShowCovered, ShowProfileVersion, SFormat,
- OS);
+ TextFormat, ShowBinaryIds, ShowCovered, ShowProfileVersion,
+ ShowTemporalProfTraces, SFormat, OS);
if (ProfileKind == sample)
return showSampleProfile(Filename, ShowCounts, TopNFunctions,
ShowAllFunctions, ShowDetailedSummary,
@@ -2971,20 +3039,73 @@ static int show_main(int argc, const char *argv[]) {
return showMemProfProfile(Filename, ProfiledBinary, SFormat, OS);
}
-int llvm_profdata_main(int argc, char **argvNonConst) {
+static int order_main(int argc, const char *argv[]) {
+ cl::opt<std::string> Filename(cl::Positional, cl::desc("<profdata-file>"));
+ cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
+ cl::init("-"), cl::desc("Output file"));
+ cl::alias OutputFilenameA("o", cl::desc("Alias for --output"),
+ cl::aliasopt(OutputFilename));
+ cl::ParseCommandLineOptions(argc, argv, "LLVM profile data order\n");
+
+ std::error_code EC;
+ raw_fd_ostream OS(OutputFilename.data(), EC, sys::fs::OF_TextWithCRLF);
+ if (EC)
+ exitWithErrorCode(EC, OutputFilename);
+ auto FS = vfs::getRealFileSystem();
+ auto ReaderOrErr = InstrProfReader::create(Filename, *FS);
+ if (Error E = ReaderOrErr.takeError())
+ exitWithError(std::move(E), Filename);
+
+ auto Reader = std::move(ReaderOrErr.get());
+ for (auto &I : *Reader) {
+ // Read all entries
+ (void)I;
+ }
+ auto &Traces = Reader->getTemporalProfTraces();
+ auto Nodes = TemporalProfTraceTy::createBPFunctionNodes(Traces);
+ BalancedPartitioningConfig Config;
+ BalancedPartitioning BP(Config);
+ BP.run(Nodes);
+
+ WithColor::note() << "# Ordered " << Nodes.size() << " functions\n";
+ for (auto &N : Nodes) {
+ auto FuncName = Reader->getSymtab().getFuncName(N.Id);
+ if (FuncName.contains(':')) {
+ // GlobalValue::getGlobalIdentifier() prefixes the filename if the symbol
+ // is local. This logic will break if there is a colon in the filename,
+ // but we cannot use rsplit() because ObjC symbols can have colons.
+ auto [Filename, ParsedFuncName] = FuncName.split(':');
+ // Emit a comment describing where this symbol came from
+ OS << "# " << Filename << "\n";
+ FuncName = ParsedFuncName;
+ }
+ OS << FuncName << "\n";
+ }
+ return 0;
+}
+
+typedef int (*llvm_profdata_subcommand)(int, const char *[]);
+
+static std::tuple<StringRef, llvm_profdata_subcommand>
+ llvm_profdata_subcommands[] = {
+ {"merge", merge_main},
+ {"show", show_main},
+ {"order", order_main},
+ {"overlap", overlap_main},
+};
+
+int llvm_profdata_main(int argc, char **argvNonConst,
+ const llvm::ToolContext &) {
const char **argv = const_cast<const char **>(argvNonConst);
InitLLVM X(argc, argv);
StringRef ProgName(sys::path::filename(argv[0]));
if (argc > 1) {
- int (*func)(int, const char *[]) = nullptr;
- if (strcmp(argv[1], "merge") == 0)
- func = merge_main;
- else if (strcmp(argv[1], "show") == 0)
- func = show_main;
- else if (strcmp(argv[1], "overlap") == 0)
- func = overlap_main;
+ llvm_profdata_subcommand func = nullptr;
+ for (auto [subcmd_name, subcmd_action] : llvm_profdata_subcommands)
+ if (subcmd_name == argv[1])
+ func = subcmd_action;
if (func) {
std::string Invocation(ProgName.str() + " " + argv[1]);
@@ -2999,7 +3120,17 @@ int llvm_profdata_main(int argc, char **argvNonConst) {
<< "USAGE: " << ProgName << " <command> [args...]\n"
<< "USAGE: " << ProgName << " <command> -help\n\n"
<< "See each individual command --help for more details.\n"
- << "Available commands: merge, show, overlap\n";
+ << "Available commands: "
+ << join(map_range(llvm_profdata_subcommands,
+ [](auto const &KV) { return std::get<0>(KV); }),
+ ", ")
+ << "\n";
+ return 0;
+ }
+
+ if (strcmp(argv[1], "--version") == 0) {
+ outs() << ProgName << '\n';
+ cl::PrintVersionMessage();
return 0;
}
}
@@ -3009,6 +3140,10 @@ int llvm_profdata_main(int argc, char **argvNonConst) {
else
errs() << ProgName << ": Unknown command!\n";
- errs() << "USAGE: " << ProgName << " <merge|show|overlap> [args...]\n";
+ errs() << "USAGE: " << ProgName << " <"
+ << join(map_range(llvm_profdata_subcommands,
+ [](auto const &KV) { return std::get<0>(KV); }),
+ "|")
+ << "> [args...]\n";
return 1;
}
diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp
index 5279e5853cc5..0a5073d2d23f 100644
--- a/llvm/tools/llvm-readobj/COFFDumper.cpp
+++ b/llvm/tools/llvm-readobj/COFFDumper.cpp
@@ -344,6 +344,7 @@ const EnumEntry<COFF::MachineTypes> ImageFileMachineType[] = {
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_ARM ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_ARM64 ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_ARM64EC ),
+ LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_ARM64X ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_ARMNT ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_EBC ),
LLVM_READOBJ_ENUM_ENT(COFF, IMAGE_FILE_MACHINE_I386 ),
@@ -544,9 +545,10 @@ const EnumEntry<COFF::DebugType> ImageDebugType[] = {
static const EnumEntry<COFF::WeakExternalCharacteristics>
WeakExternalCharacteristics[] = {
- { "NoLibrary", COFF::IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY },
- { "Library" , COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY },
- { "Alias" , COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS }
+ { "NoLibrary" , COFF::IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY },
+ { "Library" , COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY },
+ { "Alias" , COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS },
+ { "AntiDependency" , COFF::IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY },
};
const EnumEntry<uint32_t> SubSectionTypes[] = {
@@ -840,6 +842,93 @@ void COFFDumper::printCOFFLoadConfig() {
else
printCOFFLoadConfig(Obj->getLoadConfig32(), Tables);
+ if (auto CHPE = Obj->getCHPEMetadata()) {
+ ListScope LS(W, "CHPEMetadata");
+ W.printHex("Version", CHPE->Version);
+
+ if (CHPE->CodeMapCount) {
+ ListScope CMLS(W, "CodeMap");
+
+ uintptr_t CodeMapInt;
+ if (Error E = Obj->getRvaPtr(CHPE->CodeMap, CodeMapInt))
+ reportError(std::move(E), Obj->getFileName());
+ auto CodeMap = reinterpret_cast<const chpe_range_entry *>(CodeMapInt);
+ for (uint32_t i = 0; i < CHPE->CodeMapCount; i++) {
+ uint32_t Start = CodeMap[i].StartOffset & ~3;
+ W.startLine() << W.hex(Start) << " - "
+ << W.hex(Start + CodeMap[i].Length) << " ";
+ switch (CodeMap[i].StartOffset & 3) {
+ case CHPE_RANGE_ARM64:
+ W.getOStream() << "ARM64\n";
+ break;
+ case CHPE_RANGE_ARM64EC:
+ W.getOStream() << "ARM64EC\n";
+ break;
+ case CHPE_RANGE_AMD64:
+ W.getOStream() << "X64\n";
+ break;
+ default:
+ W.getOStream() << W.hex(CodeMap[i].StartOffset & 3) << "\n";
+ break;
+ }
+ }
+ } else {
+ W.printNumber("CodeMap", CHPE->CodeMap);
+ }
+
+ if (CHPE->CodeRangesToEntryPointsCount) {
+ ListScope CRLS(W, "CodeRangesToEntryPoints");
+
+ uintptr_t CodeRangesInt;
+ if (Error E =
+ Obj->getRvaPtr(CHPE->CodeRangesToEntryPoints, CodeRangesInt))
+ reportError(std::move(E), Obj->getFileName());
+ auto CodeRanges =
+ reinterpret_cast<const chpe_code_range_entry *>(CodeRangesInt);
+ for (uint32_t i = 0; i < CHPE->CodeRangesToEntryPointsCount; i++) {
+ W.startLine() << W.hex(CodeRanges[i].StartRva) << " - "
+ << W.hex(CodeRanges[i].EndRva) << " -> "
+ << W.hex(CodeRanges[i].EntryPoint) << "\n";
+ }
+ } else {
+ W.printNumber("CodeRangesToEntryPoints", CHPE->CodeRangesToEntryPoints);
+ }
+
+ if (CHPE->RedirectionMetadataCount) {
+ ListScope RMLS(W, "RedirectionMetadata");
+
+ uintptr_t RedirMetadataInt;
+ if (Error E = Obj->getRvaPtr(CHPE->RedirectionMetadata, RedirMetadataInt))
+ reportError(std::move(E), Obj->getFileName());
+ auto RedirMetadata =
+ reinterpret_cast<const chpe_redirection_entry *>(RedirMetadataInt);
+ for (uint32_t i = 0; i < CHPE->RedirectionMetadataCount; i++) {
+ W.startLine() << W.hex(RedirMetadata[i].Source) << " -> "
+ << W.hex(RedirMetadata[i].Destination) << "\n";
+ }
+ } else {
+ W.printNumber("RedirectionMetadata", CHPE->RedirectionMetadata);
+ }
+
+ W.printHex("__os_arm64x_dispatch_call_no_redirect",
+ CHPE->__os_arm64x_dispatch_call_no_redirect);
+ W.printHex("__os_arm64x_dispatch_ret", CHPE->__os_arm64x_dispatch_ret);
+ W.printHex("__os_arm64x_dispatch_call", CHPE->__os_arm64x_dispatch_call);
+ W.printHex("__os_arm64x_dispatch_icall", CHPE->__os_arm64x_dispatch_icall);
+ W.printHex("__os_arm64x_dispatch_icall_cfg",
+ CHPE->__os_arm64x_dispatch_icall_cfg);
+ W.printHex("AlternateEntryPoint", CHPE->AlternateEntryPoint);
+ W.printHex("AuxiliaryIAT", CHPE->AuxiliaryIAT);
+ W.printHex("GetX64InformationFunctionPointer",
+ CHPE->GetX64InformationFunctionPointer);
+ W.printHex("SetX64InformationFunctionPointer",
+ CHPE->SetX64InformationFunctionPointer);
+ W.printHex("ExtraRFETable", CHPE->ExtraRFETable);
+ W.printHex("ExtraRFETableSize", CHPE->ExtraRFETableSize);
+ W.printHex("__os_arm64x_dispatch_fptr", CHPE->__os_arm64x_dispatch_fptr);
+ W.printHex("AuxiliaryIATCopy", CHPE->AuxiliaryIATCopy);
+ }
+
if (Tables.SEHTableVA) {
ListScope LS(W, "SEHTable");
printRVATable(Tables.SEHTableVA, Tables.SEHTableCount, 4);
@@ -919,7 +1008,7 @@ void COFFDumper::printCOFFLoadConfig(const T *Conf, LoadConfigTables &Tables) {
W.printHex("SecurityCookie", Conf->SecurityCookie);
// Print the safe SEH table if present.
- if (Conf->Size < offsetof(coff_load_configuration32, GuardCFCheckFunction))
+ if (Conf->Size < offsetof(T, GuardCFCheckFunction))
return;
W.printHex("SEHandlerTable", Conf->SEHandlerTable);
W.printNumber("SEHandlerCount", Conf->SEHandlerCount);
@@ -1670,6 +1759,7 @@ void COFFDumper::printUnwindInfo() {
}
case COFF::IMAGE_FILE_MACHINE_ARM64:
case COFF::IMAGE_FILE_MACHINE_ARM64EC:
+ case COFF::IMAGE_FILE_MACHINE_ARM64X:
case COFF::IMAGE_FILE_MACHINE_ARMNT: {
ARM::WinEH::Decoder Decoder(W, Obj->getMachine() !=
COFF::IMAGE_FILE_MACHINE_ARMNT);
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 45fff0cc4a76..aa924823e554 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -220,6 +220,15 @@ public:
void printVersionInfo() override;
void printArchSpecificInfo() override;
void printStackMap() const override;
+ void printMemtag() override;
+ ArrayRef<uint8_t> getMemtagGlobalsSectionContents(uint64_t ExpectedAddr);
+
+ // Hash histogram shows statistics of how efficient the hash was for the
+ // dynamic symbol table. The table shows the number of hash buckets for
+ // different lengths of chains as an absolute number and percentage of the
+ // total buckets, and the cumulative coverage of symbols for each set of
+ // buckets.
+ void printHashHistograms() override;
const object::ELFObjectFile<ELFT> &getElfObject() const { return ObjF; };
@@ -234,6 +243,9 @@ public:
return 4;
}
+ std::vector<EnumEntry<unsigned>>
+ getOtherFlagsFromSymbol(const Elf_Ehdr &Header, const Elf_Sym &Symbol) const;
+
Elf_Dyn_Range dynamic_table() const {
// A valid .dynamic section contains an array of entries terminated
// with a DT_NULL entry. However, sometimes the section content may
@@ -296,6 +308,17 @@ protected:
virtual void printMipsGOT(const MipsGOTParser<ELFT> &Parser) = 0;
virtual void printMipsPLT(const MipsGOTParser<ELFT> &Parser) = 0;
+ virtual void printMemtag(
+ const ArrayRef<std::pair<std::string, std::string>> DynamicEntries,
+ const ArrayRef<uint8_t> AndroidNoteDesc,
+ const ArrayRef<std::pair<uint64_t, uint64_t>> Descriptors) = 0;
+
+ virtual void printHashHistogram(const Elf_Hash &HashTable) const;
+ virtual void printGnuHashHistogram(const Elf_GnuHash &GnuHashTable) const;
+ virtual void printHashHistogramStats(size_t NBucket, size_t MaxChain,
+ size_t TotalSyms, ArrayRef<size_t> Count,
+ bool IsGnu) const = 0;
+
Expected<ArrayRef<Elf_Versym>>
getVersionTable(const Elf_Shdr &Sec, ArrayRef<Elf_Sym> *SymTab,
StringRef *StrTab, const Elf_Shdr **SymTabSec) const;
@@ -323,12 +346,6 @@ protected:
void printRelocatableStackSizes(std::function<void()> PrintHeader);
void printNonRelocatableStackSizes(std::function<void()> PrintHeader);
- /// Retrieves sections with corresponding relocation sections based on
- /// IsMatch.
- void getSectionAndRelocations(
- std::function<bool(const Elf_Shdr &)> IsMatch,
- llvm::MapVector<const Elf_Shdr *, const Elf_Shdr *> &SecToRelocMap);
-
const object::ELFObjectFile<ELFT> &ObjF;
const ELFFile<ELFT> &Obj;
StringRef FileName;
@@ -570,17 +587,21 @@ public:
void printVersionSymbolSection(const Elf_Shdr *Sec) override;
void printVersionDefinitionSection(const Elf_Shdr *Sec) override;
void printVersionDependencySection(const Elf_Shdr *Sec) override;
- void printHashHistograms() override;
void printCGProfile() override;
void printBBAddrMaps() override;
void printAddrsig() override;
void printNotes() override;
void printELFLinkerOptions() override;
void printStackSizes() override;
+ void printMemtag(
+ const ArrayRef<std::pair<std::string, std::string>> DynamicEntries,
+ const ArrayRef<uint8_t> AndroidNoteDesc,
+ const ArrayRef<std::pair<uint64_t, uint64_t>> Descriptors) override;
+ void printHashHistogramStats(size_t NBucket, size_t MaxChain,
+ size_t TotalSyms, ArrayRef<size_t> Count,
+ bool IsGnu) const override;
private:
- void printHashHistogram(const Elf_Hash &HashTable);
- void printGnuHashHistogram(const Elf_GnuHash &GnuHashTable);
void printHashTableSymbols(const Elf_Hash &HashTable);
void printGnuHashTableSymbols(const Elf_GnuHash &GnuHashTable);
@@ -674,21 +695,27 @@ public:
void printVersionSymbolSection(const Elf_Shdr *Sec) override;
void printVersionDefinitionSection(const Elf_Shdr *Sec) override;
void printVersionDependencySection(const Elf_Shdr *Sec) override;
- void printHashHistograms() override;
void printCGProfile() override;
void printBBAddrMaps() override;
void printAddrsig() override;
void printNotes() override;
void printELFLinkerOptions() override;
void printStackSizes() override;
+ void printMemtag(
+ const ArrayRef<std::pair<std::string, std::string>> DynamicEntries,
+ const ArrayRef<uint8_t> AndroidNoteDesc,
+ const ArrayRef<std::pair<uint64_t, uint64_t>> Descriptors) override;
+ void printSymbolSection(const Elf_Sym &Symbol, unsigned SymIndex,
+ DataRegion<Elf_Word> ShndxTable) const;
+ void printHashHistogramStats(size_t NBucket, size_t MaxChain,
+ size_t TotalSyms, ArrayRef<size_t> Count,
+ bool IsGnu) const override;
private:
void printRelrReloc(const Elf_Relr &R) override;
void printRelRelaReloc(const Relocation<ELFT> &R,
const RelSymbol<ELFT> &RelSym) override;
- void printSymbolSection(const Elf_Sym &Symbol, unsigned SymIndex,
- DataRegion<Elf_Word> ShndxTable) const;
void printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable,
std::optional<StringRef> StrTable, bool IsDynamic,
@@ -701,8 +728,22 @@ private:
void printMipsGOT(const MipsGOTParser<ELFT> &Parser) override;
void printMipsPLT(const MipsGOTParser<ELFT> &Parser) override;
void printMipsABIFlags() override;
+ virtual void printZeroSymbolOtherField(const Elf_Sym &Symbol) const;
protected:
+ virtual std::string getGroupSectionHeaderName() const;
+ void printSymbolOtherField(const Elf_Sym &Symbol) const;
+ virtual void printExpandedRelRelaReloc(const Relocation<ELFT> &R,
+ StringRef SymbolName,
+ StringRef RelocName);
+ virtual void printDefaultRelRelaReloc(const Relocation<ELFT> &R,
+ StringRef SymbolName,
+ StringRef RelocName);
+ virtual void printRelocationSectionInfo(const Elf_Shdr &Sec, StringRef Name,
+ const unsigned SecNdx);
+ virtual void printSectionGroupMembers(StringRef Name, uint64_t Idx) const;
+ virtual void printEmptyGroupMessage() const;
+
ScopedPrinter &W;
};
@@ -715,9 +756,23 @@ public:
JSONELFDumper(const object::ELFObjectFile<ELFT> &ObjF, ScopedPrinter &Writer)
: LLVMELFDumper<ELFT>(ObjF, Writer) {}
+ std::string getGroupSectionHeaderName() const override;
+
void printFileSummary(StringRef FileStr, ObjectFile &Obj,
ArrayRef<std::string> InputFilenames,
const Archive *A) override;
+ virtual void printZeroSymbolOtherField(const Elf_Sym &Symbol) const override;
+
+ void printDefaultRelRelaReloc(const Relocation<ELFT> &R,
+ StringRef SymbolName,
+ StringRef RelocName) override;
+
+ void printRelocationSectionInfo(const Elf_Shdr &Sec, StringRef Name,
+ const unsigned SecNdx) override;
+
+ void printSectionGroupMembers(StringRef Name, uint64_t Idx) const override;
+
+ void printEmptyGroupMessage() const override;
private:
std::unique_ptr<DictScope> FileScope;
@@ -853,7 +908,7 @@ ELFDumper<ELFT>::getShndxTable(const Elf_Shdr *Symtab) const {
}
static std::string maybeDemangle(StringRef Name) {
- return opts::Demangle ? demangle(std::string(Name)) : Name.str();
+ return opts::Demangle ? demangle(Name) : Name.str();
}
template <typename ELFT>
@@ -1533,6 +1588,8 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX941),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX942),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
@@ -1548,6 +1605,8 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1101),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1102),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1103),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1150),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1151),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_V3),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_SRAMECC_V3)
};
@@ -1593,6 +1652,8 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX941),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX942),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
@@ -1608,6 +1669,8 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1101),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1102),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1103),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1150),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1151),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ANY_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_OFF_V4),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_FEATURE_XNACK_ON_V4),
@@ -2251,7 +2314,29 @@ std::string ELFDumper<ELFT>::getDynamicEntry(uint64_t Type,
case DT_AARCH64_BTI_PLT:
case DT_AARCH64_PAC_PLT:
case DT_AARCH64_VARIANT_PCS:
+ case DT_AARCH64_MEMTAG_GLOBALSSZ:
return std::to_string(Value);
+ case DT_AARCH64_MEMTAG_MODE:
+ switch (Value) {
+ case 0:
+ return "Synchronous (0)";
+ case 1:
+ return "Asynchronous (1)";
+ default:
+ return (Twine("Unknown (") + Twine(Value) + ")").str();
+ }
+ case DT_AARCH64_MEMTAG_HEAP:
+ case DT_AARCH64_MEMTAG_STACK:
+ switch (Value) {
+ case 0:
+ return "Disabled (0)";
+ case 1:
+ return "Enabled (1)";
+ default:
+ return (Twine("Unknown (") + Twine(Value) + ")").str();
+ }
+ case DT_AARCH64_MEMTAG_GLOBALS:
+ return (Twine("0x") + utohexstr(Value, /*LowerCase=*/true)).str();
default:
break;
}
@@ -2624,6 +2709,116 @@ void ELFDumper<ELFT>::printGnuHashTable() {
W.printHexList("Values", *Chains);
}
+template <typename ELFT> void ELFDumper<ELFT>::printHashHistograms() {
+ // Print histogram for the .hash section.
+ if (this->HashTable) {
+ if (Error E = checkHashTable<ELFT>(*this, this->HashTable))
+ this->reportUniqueWarning(std::move(E));
+ else
+ printHashHistogram(*this->HashTable);
+ }
+
+ // Print histogram for the .gnu.hash section.
+ if (this->GnuHashTable) {
+ if (Error E = checkGNUHashTable<ELFT>(this->Obj, this->GnuHashTable))
+ this->reportUniqueWarning(std::move(E));
+ else
+ printGnuHashHistogram(*this->GnuHashTable);
+ }
+}
+
+template <typename ELFT>
+void ELFDumper<ELFT>::printHashHistogram(const Elf_Hash &HashTable) const {
+ size_t NBucket = HashTable.nbucket;
+ size_t NChain = HashTable.nchain;
+ ArrayRef<Elf_Word> Buckets = HashTable.buckets();
+ ArrayRef<Elf_Word> Chains = HashTable.chains();
+ size_t TotalSyms = 0;
+ // If hash table is correct, we have at least chains with 0 length.
+ size_t MaxChain = 1;
+
+ if (NChain == 0 || NBucket == 0)
+ return;
+
+ std::vector<size_t> ChainLen(NBucket, 0);
+ // Go over all buckets and and note chain lengths of each bucket (total
+ // unique chain lengths).
+ for (size_t B = 0; B < NBucket; ++B) {
+ BitVector Visited(NChain);
+ for (size_t C = Buckets[B]; C < NChain; C = Chains[C]) {
+ if (C == ELF::STN_UNDEF)
+ break;
+ if (Visited[C]) {
+ this->reportUniqueWarning(
+ ".hash section is invalid: bucket " + Twine(C) +
+ ": a cycle was detected in the linked chain");
+ break;
+ }
+ Visited[C] = true;
+ if (MaxChain <= ++ChainLen[B])
+ ++MaxChain;
+ }
+ TotalSyms += ChainLen[B];
+ }
+
+ if (!TotalSyms)
+ return;
+
+ std::vector<size_t> Count(MaxChain, 0);
+ // Count how long is the chain for each bucket.
+ for (size_t B = 0; B < NBucket; B++)
+ ++Count[ChainLen[B]];
+ // Print Number of buckets with each chain lengths and their cumulative
+ // coverage of the symbols.
+ printHashHistogramStats(NBucket, MaxChain, TotalSyms, Count, /*IsGnu=*/false);
+}
+
+template <class ELFT>
+void ELFDumper<ELFT>::printGnuHashHistogram(
+ const Elf_GnuHash &GnuHashTable) const {
+ Expected<ArrayRef<Elf_Word>> ChainsOrErr =
+ getGnuHashTableChains<ELFT>(this->DynSymRegion, &GnuHashTable);
+ if (!ChainsOrErr) {
+ this->reportUniqueWarning("unable to print the GNU hash table histogram: " +
+ toString(ChainsOrErr.takeError()));
+ return;
+ }
+
+ ArrayRef<Elf_Word> Chains = *ChainsOrErr;
+ size_t Symndx = GnuHashTable.symndx;
+ size_t TotalSyms = 0;
+ size_t MaxChain = 1;
+
+ size_t NBucket = GnuHashTable.nbuckets;
+ if (Chains.empty() || NBucket == 0)
+ return;
+
+ ArrayRef<Elf_Word> Buckets = GnuHashTable.buckets();
+ std::vector<size_t> ChainLen(NBucket, 0);
+ for (size_t B = 0; B < NBucket; ++B) {
+ if (!Buckets[B])
+ continue;
+ size_t Len = 1;
+ for (size_t C = Buckets[B] - Symndx;
+ C < Chains.size() && (Chains[C] & 1) == 0; ++C)
+ if (MaxChain < ++Len)
+ ++MaxChain;
+ ChainLen[B] = Len;
+ TotalSyms += Len;
+ }
+ ++MaxChain;
+
+ if (!TotalSyms)
+ return;
+
+ std::vector<size_t> Count(MaxChain, 0);
+ for (size_t B = 0; B < NBucket; ++B)
+ ++Count[ChainLen[B]];
+ // Print Number of buckets with each chain lengths and their cumulative
+ // coverage of the symbols.
+ printHashHistogramStats(NBucket, MaxChain, TotalSyms, Count, /*IsGnu=*/true);
+}
+
template <typename ELFT> void ELFDumper<ELFT>::printLoadName() {
StringRef SOName = "<Not found>";
if (SONameOffset)
@@ -3243,6 +3438,35 @@ void ELFDumper<ELFT>::printReloc(const Relocation<ELFT> &R, unsigned RelIndex,
printRelRelaReloc(R, *Target);
}
+template <class ELFT>
+std::vector<EnumEntry<unsigned>>
+ELFDumper<ELFT>::getOtherFlagsFromSymbol(const Elf_Ehdr &Header,
+ const Elf_Sym &Symbol) const {
+ std::vector<EnumEntry<unsigned>> SymOtherFlags(std::begin(ElfSymOtherFlags),
+ std::end(ElfSymOtherFlags));
+ if (Header.e_machine == EM_MIPS) {
+ // Someone in their infinite wisdom decided to make STO_MIPS_MIPS16
+ // flag overlap with other ST_MIPS_xxx flags. So consider both
+ // cases separately.
+ if ((Symbol.st_other & STO_MIPS_MIPS16) == STO_MIPS_MIPS16)
+ SymOtherFlags.insert(SymOtherFlags.end(),
+ std::begin(ElfMips16SymOtherFlags),
+ std::end(ElfMips16SymOtherFlags));
+ else
+ SymOtherFlags.insert(SymOtherFlags.end(),
+ std::begin(ElfMipsSymOtherFlags),
+ std::end(ElfMipsSymOtherFlags));
+ } else if (Header.e_machine == EM_AARCH64) {
+ SymOtherFlags.insert(SymOtherFlags.end(),
+ std::begin(ElfAArch64SymOtherFlags),
+ std::end(ElfAArch64SymOtherFlags));
+ } else if (Header.e_machine == EM_RISCV) {
+ SymOtherFlags.insert(SymOtherFlags.end(), std::begin(ElfRISCVSymOtherFlags),
+ std::end(ElfRISCVSymOtherFlags));
+ }
+ return SymOtherFlags;
+}
+
static inline void printFields(formatted_raw_ostream &OS, StringRef Str1,
StringRef Str2) {
OS.PadToColumn(2u);
@@ -3537,15 +3761,18 @@ void GNUELFDumper<ELFT>::printRelRelaReloc(const Relocation<ELFT> &R,
if (RelSym.Sym)
Fields[3].Str =
to_string(format_hex_no_prefix(RelSym.Sym->getValue(), Width));
+ if (RelSym.Sym && RelSym.Name.empty())
+ Fields[4].Str = "<null>";
+ else
+ Fields[4].Str = std::string(RelSym.Name);
- Fields[4].Str = std::string(RelSym.Name);
for (const Field &F : Fields)
printField(F);
std::string Addend;
if (std::optional<int64_t> A = R.Addend) {
int64_t RelAddend = *A;
- if (!RelSym.Name.empty()) {
+ if (!Fields[4].Str.empty()) {
if (RelAddend < 0) {
Addend = " - ";
RelAddend = std::abs(RelAddend);
@@ -4744,108 +4971,16 @@ void GNUELFDumper<ELFT>::printVersionDependencySection(const Elf_Shdr *Sec) {
}
template <class ELFT>
-void GNUELFDumper<ELFT>::printHashHistogram(const Elf_Hash &HashTable) {
- size_t NBucket = HashTable.nbucket;
- size_t NChain = HashTable.nchain;
- ArrayRef<Elf_Word> Buckets = HashTable.buckets();
- ArrayRef<Elf_Word> Chains = HashTable.chains();
- size_t TotalSyms = 0;
- // If hash table is correct, we have at least chains with 0 length
- size_t MaxChain = 1;
- size_t CumulativeNonZero = 0;
-
- if (NChain == 0 || NBucket == 0)
- return;
-
- std::vector<size_t> ChainLen(NBucket, 0);
- // Go over all buckets and and note chain lengths of each bucket (total
- // unique chain lengths).
- for (size_t B = 0; B < NBucket; B++) {
- BitVector Visited(NChain);
- for (size_t C = Buckets[B]; C < NChain; C = Chains[C]) {
- if (C == ELF::STN_UNDEF)
- break;
- if (Visited[C]) {
- this->reportUniqueWarning(".hash section is invalid: bucket " +
- Twine(C) +
- ": a cycle was detected in the linked chain");
- break;
- }
- Visited[C] = true;
- if (MaxChain <= ++ChainLen[B])
- MaxChain++;
- }
- TotalSyms += ChainLen[B];
- }
-
- if (!TotalSyms)
- return;
-
- std::vector<size_t> Count(MaxChain, 0);
- // Count how long is the chain for each bucket
- for (size_t B = 0; B < NBucket; B++)
- ++Count[ChainLen[B]];
- // Print Number of buckets with each chain lengths and their cumulative
- // coverage of the symbols
- OS << "Histogram for bucket list length (total of " << NBucket
- << " buckets)\n"
- << " Length Number % of total Coverage\n";
- for (size_t I = 0; I < MaxChain; I++) {
- CumulativeNonZero += Count[I] * I;
- OS << format("%7lu %-10lu (%5.1f%%) %5.1f%%\n", I, Count[I],
- (Count[I] * 100.0) / NBucket,
- (CumulativeNonZero * 100.0) / TotalSyms);
- }
-}
-
-template <class ELFT>
-void GNUELFDumper<ELFT>::printGnuHashHistogram(
- const Elf_GnuHash &GnuHashTable) {
- Expected<ArrayRef<Elf_Word>> ChainsOrErr =
- getGnuHashTableChains<ELFT>(this->DynSymRegion, &GnuHashTable);
- if (!ChainsOrErr) {
- this->reportUniqueWarning("unable to print the GNU hash table histogram: " +
- toString(ChainsOrErr.takeError()));
- return;
- }
-
- ArrayRef<Elf_Word> Chains = *ChainsOrErr;
- size_t Symndx = GnuHashTable.symndx;
- size_t TotalSyms = 0;
- size_t MaxChain = 1;
+void GNUELFDumper<ELFT>::printHashHistogramStats(size_t NBucket,
+ size_t MaxChain,
+ size_t TotalSyms,
+ ArrayRef<size_t> Count,
+ bool IsGnu) const {
size_t CumulativeNonZero = 0;
-
- size_t NBucket = GnuHashTable.nbuckets;
- if (Chains.empty() || NBucket == 0)
- return;
-
- ArrayRef<Elf_Word> Buckets = GnuHashTable.buckets();
- std::vector<size_t> ChainLen(NBucket, 0);
- for (size_t B = 0; B < NBucket; B++) {
- if (!Buckets[B])
- continue;
- size_t Len = 1;
- for (size_t C = Buckets[B] - Symndx;
- C < Chains.size() && (Chains[C] & 1) == 0; C++)
- if (MaxChain < ++Len)
- MaxChain++;
- ChainLen[B] = Len;
- TotalSyms += Len;
- }
- MaxChain++;
-
- if (!TotalSyms)
- return;
-
- std::vector<size_t> Count(MaxChain, 0);
- for (size_t B = 0; B < NBucket; B++)
- ++Count[ChainLen[B]];
- // Print Number of buckets with each chain lengths and their cumulative
- // coverage of the symbols
- OS << "Histogram for `.gnu.hash' bucket list length (total of " << NBucket
- << " buckets)\n"
+ OS << "Histogram for" << (IsGnu ? " `.gnu.hash'" : "")
+ << " bucket list length (total of " << NBucket << " buckets)\n"
<< " Length Number % of total Coverage\n";
- for (size_t I = 0; I < MaxChain; I++) {
+ for (size_t I = 0; I < MaxChain; ++I) {
CumulativeNonZero += Count[I] * I;
OS << format("%7lu %-10lu (%5.1f%%) %5.1f%%\n", I, Count[I],
(Count[I] * 100.0) / NBucket,
@@ -4853,28 +4988,6 @@ void GNUELFDumper<ELFT>::printGnuHashHistogram(
}
}
-// Hash histogram shows statistics of how efficient the hash was for the
-// dynamic symbol table. The table shows the number of hash buckets for
-// different lengths of chains as an absolute number and percentage of the total
-// buckets, and the cumulative coverage of symbols for each set of buckets.
-template <class ELFT> void GNUELFDumper<ELFT>::printHashHistograms() {
- // Print histogram for the .hash section.
- if (this->HashTable) {
- if (Error E = checkHashTable<ELFT>(*this, this->HashTable))
- this->reportUniqueWarning(std::move(E));
- else
- printHashHistogram(*this->HashTable);
- }
-
- // Print histogram for the .gnu.hash section.
- if (this->GnuHashTable) {
- if (Error E = checkGNUHashTable<ELFT>(this->Obj, this->GnuHashTable))
- this->reportUniqueWarning(std::move(E));
- else
- printGnuHashHistogram(*this->GnuHashTable);
- }
-}
-
template <class ELFT> void GNUELFDumper<ELFT>::printCGProfile() {
OS << "GNUStyle::printCGProfile not implemented\n";
}
@@ -5194,10 +5307,36 @@ static bool printAndroidNote(raw_ostream &OS, uint32_t NoteType,
return false;
for (const auto &KV : Props)
OS << " " << KV.first << ": " << KV.second << '\n';
- OS << '\n';
return true;
}
+template <class ELFT>
+void GNUELFDumper<ELFT>::printMemtag(
+ const ArrayRef<std::pair<std::string, std::string>> DynamicEntries,
+ const ArrayRef<uint8_t> AndroidNoteDesc,
+ const ArrayRef<std::pair<uint64_t, uint64_t>> Descriptors) {
+ OS << "Memtag Dynamic Entries:\n";
+ if (DynamicEntries.empty())
+ OS << " < none found >\n";
+ for (const auto &DynamicEntryKV : DynamicEntries)
+ OS << " " << DynamicEntryKV.first << ": " << DynamicEntryKV.second
+ << "\n";
+
+ if (!AndroidNoteDesc.empty()) {
+ OS << "Memtag Android Note:\n";
+ printAndroidNote(OS, ELF::NT_ANDROID_TYPE_MEMTAG, AndroidNoteDesc);
+ }
+
+ if (Descriptors.empty())
+ return;
+
+ OS << "Memtag Global Descriptors:\n";
+ for (const auto &[Addr, BytesToTag] : Descriptors) {
+ OS << " 0x" << utohexstr(Addr, /*LowerCase=*/true) << ": 0x"
+ << utohexstr(BytesToTag, /*LowerCase=*/true) << "\n";
+ }
+}
+
template <typename ELFT>
static bool printLLVMOMPOFFLOADNote(raw_ostream &OS, uint32_t NoteType,
ArrayRef<uint8_t> Desc) {
@@ -5386,10 +5525,16 @@ static AMDGPUNote getAMDGPUNote(uint32_t NoteType, ArrayRef<uint8_t> Desc) {
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
return {"", ""};
- AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
std::string MetadataString;
- if (!Verifier.verify(MsgPackDoc.getRoot()))
- MetadataString = "Invalid AMDGPU Metadata\n";
+
+ // FIXME: Metadata Verifier only works with AMDHSA.
+ // This is an ugly workaround to avoid the verifier for other MD
+ // formats (e.g. amdpal)
+ if (MsgPackString.find("amdhsa.") != StringRef::npos) {
+ AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
+ if (!Verifier.verify(MsgPackDoc.getRoot()))
+ MetadataString = "Invalid AMDGPU Metadata\n";
+ }
raw_string_ostream StrOS(MetadataString);
if (MsgPackDoc.getRoot().isScalar()) {
@@ -5623,6 +5768,12 @@ const NoteType CoreNoteTypes[] = {
"NT_ARM_HW_BREAK (AArch hardware breakpoint registers)"},
{ELF::NT_ARM_HW_WATCH,
"NT_ARM_HW_WATCH (AArch hardware watchpoint registers)"},
+ {ELF::NT_ARM_SVE, "NT_ARM_SVE (AArch64 SVE registers)"},
+ {ELF::NT_ARM_PAC_MASK,
+ "NT_ARM_PAC_MASK (AArch64 Pointer Authentication code masks)"},
+ {ELF::NT_ARM_SSVE, "NT_ARM_SSVE (AArch64 Streaming SVE registers)"},
+ {ELF::NT_ARM_ZA, "NT_ARM_ZA (AArch64 SME ZA registers)"},
+ {ELF::NT_ARM_ZT, "NT_ARM_ZT (AArch64 SME ZT registers)"},
{ELF::NT_FILE, "NT_FILE (mapped files)"},
{ELF::NT_PRXFPREG, "NT_PRXFPREG (user_xfpregs structure)"},
@@ -5681,10 +5832,10 @@ StringRef getNoteTypeName(const typename ELFT::Note &Note, unsigned ELFType) {
}
template <class ELFT>
-static void printNotesHelper(
+static void processNotesHelper(
const ELFDumper<ELFT> &Dumper,
llvm::function_ref<void(std::optional<StringRef>, typename ELFT::Off,
- typename ELFT::Addr)>
+ typename ELFT::Addr, size_t)>
StartNotesFn,
llvm::function_ref<Error(const typename ELFT::Note &, bool)> ProcessNoteFn,
llvm::function_ref<void()> FinishNotesFn) {
@@ -5697,7 +5848,7 @@ static void printNotesHelper(
if (S.sh_type != SHT_NOTE)
continue;
StartNotesFn(expectedToStdOptional(Obj.getSectionName(S)), S.sh_offset,
- S.sh_size);
+ S.sh_size, S.sh_addralign);
Error Err = Error::success();
size_t I = 0;
for (const typename ELFT::Note Note : Obj.notes(S, Err)) {
@@ -5728,7 +5879,7 @@ static void printNotesHelper(
const typename ELFT::Phdr &P = (*PhdrsOrErr)[I];
if (P.p_type != PT_NOTE)
continue;
- StartNotesFn(/*SecName=*/std::nullopt, P.p_offset, P.p_filesz);
+ StartNotesFn(/*SecName=*/std::nullopt, P.p_offset, P.p_filesz, P.p_align);
Error Err = Error::success();
size_t Index = 0;
for (const typename ELFT::Note Note : Obj.notes(P, Err)) {
@@ -5748,10 +5899,12 @@ static void printNotesHelper(
}
template <class ELFT> void GNUELFDumper<ELFT>::printNotes() {
+ size_t Align = 0;
bool IsFirstHeader = true;
auto PrintHeader = [&](std::optional<StringRef> SecName,
const typename ELFT::Off Offset,
- const typename ELFT::Addr Size) {
+ const typename ELFT::Addr Size, size_t Al) {
+ Align = std::max<size_t>(Al, 4);
// Print a newline between notes sections to match GNU readelf.
if (!IsFirstHeader) {
OS << '\n';
@@ -5772,7 +5925,7 @@ template <class ELFT> void GNUELFDumper<ELFT>::printNotes() {
auto ProcessNote = [&](const Elf_Note &Note, bool IsCore) -> Error {
StringRef Name = Note.getName();
- ArrayRef<uint8_t> Descriptor = Note.getDesc();
+ ArrayRef<uint8_t> Descriptor = Note.getDesc(Align);
Elf_Word Type = Note.getType();
// Print the note owner/type.
@@ -5837,7 +5990,132 @@ template <class ELFT> void GNUELFDumper<ELFT>::printNotes() {
return Error::success();
};
- printNotesHelper(*this, PrintHeader, ProcessNote, []() {});
+ processNotesHelper(*this, /*StartNotesFn=*/PrintHeader,
+ /*ProcessNoteFn=*/ProcessNote, /*FinishNotesFn=*/[]() {});
+}
+
+template <class ELFT>
+ArrayRef<uint8_t>
+ELFDumper<ELFT>::getMemtagGlobalsSectionContents(uint64_t ExpectedAddr) {
+ for (const typename ELFT::Shdr &Sec : cantFail(Obj.sections())) {
+ if (Sec.sh_type != SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC)
+ continue;
+ if (Sec.sh_addr != ExpectedAddr) {
+ reportUniqueWarning(
+ "SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC section was unexpectedly at 0x" +
+ Twine::utohexstr(Sec.sh_addr) +
+ ", when DT_AARCH64_MEMTAG_GLOBALS says it should be at 0x" +
+ Twine::utohexstr(ExpectedAddr));
+ return ArrayRef<uint8_t>();
+ }
+ Expected<ArrayRef<uint8_t>> Contents = Obj.getSectionContents(Sec);
+ if (auto E = Contents.takeError()) {
+ reportUniqueWarning(
+ "couldn't get SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC section contents: " +
+ toString(std::move(E)));
+ return ArrayRef<uint8_t>();
+ }
+ return Contents.get();
+ }
+ return ArrayRef<uint8_t>();
+}
+
+// Reserve the lower three bits of the first byte of the step distance when
+// encoding the memtag descriptors. Found to be the best overall size tradeoff
+// when compiling Android T with full MTE globals enabled.
+constexpr uint64_t MemtagStepVarintReservedBits = 3;
+constexpr uint64_t MemtagGranuleSize = 16;
+
+template <typename ELFT> void ELFDumper<ELFT>::printMemtag() {
+ if (Obj.getHeader().e_machine != EM_AARCH64) return;
+ std::vector<std::pair<std::string, std::string>> DynamicEntries;
+ uint64_t MemtagGlobalsSz = 0;
+ uint64_t MemtagGlobals = 0;
+ for (const typename ELFT::Dyn &Entry : dynamic_table()) {
+ uintX_t Tag = Entry.getTag();
+ switch (Tag) {
+ case DT_AARCH64_MEMTAG_GLOBALSSZ:
+ MemtagGlobalsSz = Entry.getVal();
+ DynamicEntries.emplace_back(Obj.getDynamicTagAsString(Tag),
+ getDynamicEntry(Tag, Entry.getVal()));
+ break;
+ case DT_AARCH64_MEMTAG_GLOBALS:
+ MemtagGlobals = Entry.getVal();
+ DynamicEntries.emplace_back(Obj.getDynamicTagAsString(Tag),
+ getDynamicEntry(Tag, Entry.getVal()));
+ break;
+ case DT_AARCH64_MEMTAG_MODE:
+ case DT_AARCH64_MEMTAG_HEAP:
+ case DT_AARCH64_MEMTAG_STACK:
+ DynamicEntries.emplace_back(Obj.getDynamicTagAsString(Tag),
+ getDynamicEntry(Tag, Entry.getVal()));
+ break;
+ }
+ }
+
+ ArrayRef<uint8_t> AndroidNoteDesc;
+ auto FindAndroidNote = [&](const Elf_Note &Note, bool IsCore) -> Error {
+ if (Note.getName() == "Android" &&
+ Note.getType() == ELF::NT_ANDROID_TYPE_MEMTAG)
+ AndroidNoteDesc = Note.getDesc(4);
+ return Error::success();
+ };
+
+ processNotesHelper(
+ *this,
+ /*StartNotesFn=*/
+ [](std::optional<StringRef>, const typename ELFT::Off,
+ const typename ELFT::Addr, size_t) {},
+ /*ProcessNoteFn=*/FindAndroidNote, /*FinishNotesFn=*/[]() {});
+
+ ArrayRef<uint8_t> Contents = getMemtagGlobalsSectionContents(MemtagGlobals);
+ if (Contents.size() != MemtagGlobalsSz) {
+ reportUniqueWarning(
+ "mismatch between DT_AARCH64_MEMTAG_GLOBALSSZ (0x" +
+ Twine::utohexstr(MemtagGlobalsSz) +
+ ") and SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC section size (0x" +
+ Twine::utohexstr(Contents.size()) + ")");
+ Contents = ArrayRef<uint8_t>();
+ }
+
+ std::vector<std::pair<uint64_t, uint64_t>> GlobalDescriptors;
+ uint64_t Address = 0;
+ // See the AArch64 MemtagABI document for a description of encoding scheme:
+ // https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#83encoding-of-sht_aarch64_memtag_globals_dynamic
+ for (size_t I = 0; I < Contents.size();) {
+ const char *Error = nullptr;
+ unsigned DecodedBytes = 0;
+ uint64_t Value = decodeULEB128(Contents.data() + I, &DecodedBytes,
+ Contents.end(), &Error);
+ I += DecodedBytes;
+ if (Error) {
+ reportUniqueWarning(
+ "error decoding distance uleb, " + Twine(DecodedBytes) +
+ " byte(s) into SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC: " + Twine(Error));
+ GlobalDescriptors.clear();
+ break;
+ }
+ uint64_t Distance = Value >> MemtagStepVarintReservedBits;
+ uint64_t GranulesToTag = Value & ((1 << MemtagStepVarintReservedBits) - 1);
+ if (GranulesToTag == 0) {
+ GranulesToTag = decodeULEB128(Contents.data() + I, &DecodedBytes,
+ Contents.end(), &Error) +
+ 1;
+ I += DecodedBytes;
+ if (Error) {
+ reportUniqueWarning(
+ "error decoding size-only uleb, " + Twine(DecodedBytes) +
+ " byte(s) into SHT_AARCH64_MEMTAG_GLOBALS_DYNAMIC: " + Twine(Error));
+ GlobalDescriptors.clear();
+ break;
+ }
+ }
+ Address += Distance * MemtagGranuleSize;
+ GlobalDescriptors.emplace_back(Address, GranulesToTag * MemtagGranuleSize);
+ Address += GranulesToTag * MemtagGranuleSize;
+ }
+
+ printMemtag(DynamicEntries, AndroidNoteDesc, GlobalDescriptors);
}
template <class ELFT> void GNUELFDumper<ELFT>::printELFLinkerOptions() {
@@ -6197,37 +6475,10 @@ void ELFDumper<ELFT>::printNonRelocatableStackSizes(
}
template <class ELFT>
-void ELFDumper<ELFT>::getSectionAndRelocations(
- std::function<bool(const Elf_Shdr &)> IsMatch,
- llvm::MapVector<const Elf_Shdr *, const Elf_Shdr *> &SecToRelocMap) {
- for (const Elf_Shdr &Sec : cantFail(Obj.sections())) {
- if (IsMatch(Sec))
- if (SecToRelocMap.insert(std::make_pair(&Sec, (const Elf_Shdr *)nullptr))
- .second)
- continue;
-
- if (Sec.sh_type != ELF::SHT_RELA && Sec.sh_type != ELF::SHT_REL)
- continue;
-
- Expected<const Elf_Shdr *> RelSecOrErr = Obj.getSection(Sec.sh_info);
- if (!RelSecOrErr) {
- reportUniqueWarning(describe(Sec) +
- ": failed to get a relocated section: " +
- toString(RelSecOrErr.takeError()));
- continue;
- }
- const Elf_Shdr *ContentsSec = *RelSecOrErr;
- if (IsMatch(*ContentsSec))
- SecToRelocMap[ContentsSec] = &Sec;
- }
-}
-
-template <class ELFT>
void ELFDumper<ELFT>::printRelocatableStackSizes(
std::function<void()> PrintHeader) {
// Build a map between stack size sections and their corresponding relocation
// sections.
- llvm::MapVector<const Elf_Shdr *, const Elf_Shdr *> StackSizeRelocMap;
auto IsMatch = [&](const Elf_Shdr &Sec) -> bool {
StringRef SectionName;
if (Expected<StringRef> NameOrErr = Obj.getSectionName(Sec))
@@ -6237,9 +6488,16 @@ void ELFDumper<ELFT>::printRelocatableStackSizes(
return SectionName == ".stack_sizes";
};
- getSectionAndRelocations(IsMatch, StackSizeRelocMap);
- for (const auto &StackSizeMapEntry : StackSizeRelocMap) {
+ Expected<MapVector<const Elf_Shdr *, const Elf_Shdr *>>
+ StackSizeRelocMapOrErr = Obj.getSectionAndRelocations(IsMatch);
+ if (!StackSizeRelocMapOrErr) {
+ reportUniqueWarning("unable to get stack size map section(s): " +
+ toString(StackSizeRelocMapOrErr.takeError()));
+ return;
+ }
+
+ for (const auto &StackSizeMapEntry : *StackSizeRelocMapOrErr) {
PrintHeader();
const Elf_Shdr *StackSizesELFSec = StackSizeMapEntry.first;
const Elf_Shdr *RelocSec = StackSizeMapEntry.second;
@@ -6607,9 +6865,9 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printGroupSections() {
W.printNumber("Link", G.Link);
W.printNumber("Info", G.Info);
W.printHex("Type", getGroupType(G.Type), G.Type);
- W.startLine() << "Signature: " << G.Signature << "\n";
+ W.printString("Signature", G.Signature);
- ListScope L(W, "Section(s) in group");
+ ListScope L(W, getGroupSectionHeaderName());
for (const GroupMember &GM : G.Members) {
const GroupSection *MainGroup = Map[GM.Index];
if (MainGroup != &G)
@@ -6619,12 +6877,23 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printGroupSections() {
Twine(MainGroup->Index) +
", was also found in the group section with index " +
Twine(G.Index));
- W.startLine() << GM.Name << " (" << GM.Index << ")\n";
+ printSectionGroupMembers(GM.Name, GM.Index);
}
}
if (V.empty())
- W.startLine() << "There are no group sections in the file.\n";
+ printEmptyGroupMessage();
+}
+
+template <class ELFT>
+std::string LLVMELFDumper<ELFT>::getGroupSectionHeaderName() const {
+ return "Section(s) in group";
+}
+
+template <class ELFT>
+void LLVMELFDumper<ELFT>::printSectionGroupMembers(StringRef Name,
+ uint64_t Idx) const {
+ W.startLine() << Name << " (" << Idx << ")\n";
}
template <class ELFT> void LLVMELFDumper<ELFT>::printRelocations() {
@@ -6636,11 +6905,7 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printRelocations() {
StringRef Name = this->getPrintableSectionName(Sec);
unsigned SecNdx = &Sec - &cantFail(this->Obj.sections()).front();
- W.startLine() << "Section (" << SecNdx << ") " << Name << " {\n";
- W.indent();
- this->printRelocationsHelper(Sec);
- W.unindent();
- W.startLine() << "}\n";
+ printRelocationSectionInfo(Sec, Name, SecNdx);
}
}
@@ -6650,26 +6915,54 @@ void LLVMELFDumper<ELFT>::printRelrReloc(const Elf_Relr &R) {
}
template <class ELFT>
+void LLVMELFDumper<ELFT>::printExpandedRelRelaReloc(const Relocation<ELFT> &R,
+ StringRef SymbolName,
+ StringRef RelocName) {
+ DictScope Group(W, "Relocation");
+ W.printHex("Offset", R.Offset);
+ W.printNumber("Type", RelocName, R.Type);
+ W.printNumber("Symbol", !SymbolName.empty() ? SymbolName : "-", R.Symbol);
+ if (R.Addend)
+ W.printHex("Addend", (uintX_t)*R.Addend);
+}
+
+template <class ELFT>
+void LLVMELFDumper<ELFT>::printDefaultRelRelaReloc(const Relocation<ELFT> &R,
+ StringRef SymbolName,
+ StringRef RelocName) {
+ raw_ostream &OS = W.startLine();
+ OS << W.hex(R.Offset) << " " << RelocName << " "
+ << (!SymbolName.empty() ? SymbolName : "-");
+ if (R.Addend)
+ OS << " " << W.hex((uintX_t)*R.Addend);
+ OS << "\n";
+}
+
+template <class ELFT>
+void LLVMELFDumper<ELFT>::printRelocationSectionInfo(const Elf_Shdr &Sec,
+ StringRef Name,
+ const unsigned SecNdx) {
+ DictScope D(W, (Twine("Section (") + Twine(SecNdx) + ") " + Name).str());
+ this->printRelocationsHelper(Sec);
+}
+
+template <class ELFT> void LLVMELFDumper<ELFT>::printEmptyGroupMessage() const {
+ W.startLine() << "There are no group sections in the file.\n";
+}
+
+template <class ELFT>
void LLVMELFDumper<ELFT>::printRelRelaReloc(const Relocation<ELFT> &R,
const RelSymbol<ELFT> &RelSym) {
StringRef SymbolName = RelSym.Name;
+ if (RelSym.Sym && RelSym.Name.empty())
+ SymbolName = "<null>";
SmallString<32> RelocName;
this->Obj.getRelocationTypeName(R.Type, RelocName);
if (opts::ExpandRelocs) {
- DictScope Group(W, "Relocation");
- W.printHex("Offset", R.Offset);
- W.printNumber("Type", RelocName, R.Type);
- W.printNumber("Symbol", !SymbolName.empty() ? SymbolName : "-", R.Symbol);
- if (R.Addend)
- W.printHex("Addend", (uintX_t)*R.Addend);
+ printExpandedRelRelaReloc(R, SymbolName, RelocName);
} else {
- raw_ostream &OS = W.startLine();
- OS << W.hex(R.Offset) << " " << RelocName << " "
- << (!SymbolName.empty() ? SymbolName : "-");
- if (R.Addend)
- OS << " " << W.hex((uintX_t)*R.Addend);
- OS << "\n";
+ printDefaultRelRelaReloc(R, SymbolName, RelocName);
}
}
@@ -6785,6 +7078,22 @@ void LLVMELFDumper<ELFT>::printSymbolSection(
}
template <class ELFT>
+void LLVMELFDumper<ELFT>::printSymbolOtherField(const Elf_Sym &Symbol) const {
+ std::vector<EnumEntry<unsigned>> SymOtherFlags =
+ this->getOtherFlagsFromSymbol(this->Obj.getHeader(), Symbol);
+ W.printFlags("Other", Symbol.st_other, ArrayRef(SymOtherFlags), 0x3u);
+}
+
+template <class ELFT>
+void LLVMELFDumper<ELFT>::printZeroSymbolOtherField(
+ const Elf_Sym &Symbol) const {
+ assert(Symbol.st_other == 0 && "non-zero Other Field");
+ // Usually st_other flag is zero. Do not pollute the output
+ // by flags enumeration in that case.
+ W.printNumber("Other", 0);
+}
+
+template <class ELFT>
void LLVMELFDumper<ELFT>::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
DataRegion<Elf_Word> ShndxTable,
std::optional<StringRef> StrTable,
@@ -6805,35 +7114,9 @@ void LLVMELFDumper<ELFT>::printSymbol(const Elf_Sym &Symbol, unsigned SymIndex,
else
W.printEnum("Type", SymbolType, ArrayRef(ElfSymbolTypes));
if (Symbol.st_other == 0)
- // Usually st_other flag is zero. Do not pollute the output
- // by flags enumeration in that case.
- W.printNumber("Other", 0);
- else {
- std::vector<EnumEntry<unsigned>> SymOtherFlags(std::begin(ElfSymOtherFlags),
- std::end(ElfSymOtherFlags));
- if (this->Obj.getHeader().e_machine == EM_MIPS) {
- // Someones in their infinite wisdom decided to make STO_MIPS_MIPS16
- // flag overlapped with other ST_MIPS_xxx flags. So consider both
- // cases separately.
- if ((Symbol.st_other & STO_MIPS_MIPS16) == STO_MIPS_MIPS16)
- SymOtherFlags.insert(SymOtherFlags.end(),
- std::begin(ElfMips16SymOtherFlags),
- std::end(ElfMips16SymOtherFlags));
- else
- SymOtherFlags.insert(SymOtherFlags.end(),
- std::begin(ElfMipsSymOtherFlags),
- std::end(ElfMipsSymOtherFlags));
- } else if (this->Obj.getHeader().e_machine == EM_AARCH64) {
- SymOtherFlags.insert(SymOtherFlags.end(),
- std::begin(ElfAArch64SymOtherFlags),
- std::end(ElfAArch64SymOtherFlags));
- } else if (this->Obj.getHeader().e_machine == EM_RISCV) {
- SymOtherFlags.insert(SymOtherFlags.end(),
- std::begin(ElfRISCVSymOtherFlags),
- std::end(ElfRISCVSymOtherFlags));
- }
- W.printFlags("Other", Symbol.st_other, ArrayRef(SymOtherFlags), 0x3u);
- }
+ printZeroSymbolOtherField(Symbol);
+ else
+ printSymbolOtherField(Symbol);
printSymbolSection(Symbol, SymIndex, ShndxTable);
}
@@ -7009,8 +7292,27 @@ void LLVMELFDumper<ELFT>::printVersionDependencySection(const Elf_Shdr *Sec) {
}
}
-template <class ELFT> void LLVMELFDumper<ELFT>::printHashHistograms() {
- W.startLine() << "Hash Histogram not implemented!\n";
+template <class ELFT>
+void LLVMELFDumper<ELFT>::printHashHistogramStats(size_t NBucket,
+ size_t MaxChain,
+ size_t TotalSyms,
+ ArrayRef<size_t> Count,
+ bool IsGnu) const {
+ StringRef HistName = IsGnu ? "GnuHashHistogram" : "HashHistogram";
+ StringRef BucketName = IsGnu ? "Bucket" : "Chain";
+ StringRef ListName = IsGnu ? "Buckets" : "Chains";
+ DictScope Outer(W, HistName);
+ W.printNumber("TotalBuckets", NBucket);
+ ListScope Buckets(W, ListName);
+ size_t CumulativeNonZero = 0;
+ for (size_t I = 0; I < MaxChain; ++I) {
+ CumulativeNonZero += Count[I] * I;
+ DictScope Bucket(W, BucketName);
+ W.printNumber("Length", I);
+ W.printNumber("Count", Count[I]);
+ W.printNumber("Percentage", (float)(Count[I] * 100.0) / NBucket);
+ W.printNumber("Coverage", (float)(CumulativeNonZero * 100.0) / TotalSyms);
+ }
}
// Returns true if rel/rela section exists, and populates SymbolIndices.
@@ -7063,14 +7365,19 @@ static bool getSymbolIndices(const typename ELFT::Shdr *CGRelSection,
}
template <class ELFT> void LLVMELFDumper<ELFT>::printCGProfile() {
- llvm::MapVector<const Elf_Shdr *, const Elf_Shdr *> SecToRelocMap;
-
auto IsMatch = [](const Elf_Shdr &Sec) -> bool {
return Sec.sh_type == ELF::SHT_LLVM_CALL_GRAPH_PROFILE;
};
- this->getSectionAndRelocations(IsMatch, SecToRelocMap);
- for (const auto &CGMapEntry : SecToRelocMap) {
+ Expected<MapVector<const Elf_Shdr *, const Elf_Shdr *>> SecToRelocMapOrErr =
+ this->Obj.getSectionAndRelocations(IsMatch);
+ if (!SecToRelocMapOrErr) {
+ this->reportUniqueWarning("unable to get CG Profile section(s): " +
+ toString(SecToRelocMapOrErr.takeError()));
+ return;
+ }
+
+ for (const auto &CGMapEntry : *SecToRelocMapOrErr) {
const Elf_Shdr *CGSection = CGMapEntry.first;
const Elf_Shdr *CGRelSection = CGMapEntry.second;
@@ -7109,21 +7416,35 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printCGProfile() {
template <class ELFT> void LLVMELFDumper<ELFT>::printBBAddrMaps() {
bool IsRelocatable = this->Obj.getHeader().e_type == ELF::ET_REL;
- for (const Elf_Shdr &Sec : cantFail(this->Obj.sections())) {
- if (Sec.sh_type != SHT_LLVM_BB_ADDR_MAP &&
- Sec.sh_type != SHT_LLVM_BB_ADDR_MAP_V0) {
- continue;
- }
+ using Elf_Shdr = typename ELFT::Shdr;
+ auto IsMatch = [](const Elf_Shdr &Sec) -> bool {
+ return Sec.sh_type == ELF::SHT_LLVM_BB_ADDR_MAP ||
+ Sec.sh_type == ELF::SHT_LLVM_BB_ADDR_MAP_V0;
+ };
+ Expected<MapVector<const Elf_Shdr *, const Elf_Shdr *>> SecRelocMapOrErr =
+ this->Obj.getSectionAndRelocations(IsMatch);
+ if (!SecRelocMapOrErr) {
+ this->reportUniqueWarning(
+ "failed to get SHT_LLVM_BB_ADDR_MAP section(s): " +
+ toString(SecRelocMapOrErr.takeError()));
+ return;
+ }
+ for (auto const &[Sec, RelocSec] : *SecRelocMapOrErr) {
std::optional<const Elf_Shdr *> FunctionSec;
if (IsRelocatable)
FunctionSec =
- unwrapOrError(this->FileName, this->Obj.getSection(Sec.sh_link));
+ unwrapOrError(this->FileName, this->Obj.getSection(Sec->sh_link));
ListScope L(W, "BBAddrMap");
+ if (IsRelocatable && !RelocSec) {
+ this->reportUniqueWarning("unable to get relocation section for " +
+ this->describe(*Sec));
+ continue;
+ }
Expected<std::vector<BBAddrMap>> BBAddrMapOrErr =
- this->Obj.decodeBBAddrMap(Sec);
+ this->Obj.decodeBBAddrMap(*Sec, RelocSec);
if (!BBAddrMapOrErr) {
- this->reportUniqueWarning("unable to dump " + this->describe(Sec) + ": " +
- toString(BBAddrMapOrErr.takeError()));
+ this->reportUniqueWarning("unable to dump " + this->describe(*Sec) +
+ ": " + toString(BBAddrMapOrErr.takeError()));
continue;
}
for (const BBAddrMap &AM : *BBAddrMapOrErr) {
@@ -7135,7 +7456,7 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printBBAddrMaps() {
if (FuncSymIndex.empty())
this->reportUniqueWarning(
"could not identify function symbol for address (0x" +
- Twine::utohexstr(AM.Addr) + ") in " + this->describe(Sec));
+ Twine::utohexstr(AM.Addr) + ") in " + this->describe(*Sec));
else
FuncName = this->getStaticSymbolName(FuncSymIndex.front());
W.printString("Name", FuncName);
@@ -7146,10 +7467,11 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printBBAddrMaps() {
W.printNumber("ID", BBE.ID);
W.printHex("Offset", BBE.Offset);
W.printHex("Size", BBE.Size);
- W.printBoolean("HasReturn", BBE.HasReturn);
- W.printBoolean("HasTailCall", BBE.HasTailCall);
- W.printBoolean("IsEHPad", BBE.IsEHPad);
- W.printBoolean("CanFallThrough", BBE.CanFallThrough);
+ W.printBoolean("HasReturn", BBE.hasReturn());
+ W.printBoolean("HasTailCall", BBE.hasTailCall());
+ W.printBoolean("IsEHPad", BBE.isEHPad());
+ W.printBoolean("CanFallThrough", BBE.canFallThrough());
+ W.printBoolean("HasIndirectBranch", BBE.MD.HasIndirectBranch);
}
}
}
@@ -7216,6 +7538,35 @@ static bool printAndroidNoteLLVMStyle(uint32_t NoteType, ArrayRef<uint8_t> Desc,
return true;
}
+template <class ELFT>
+void LLVMELFDumper<ELFT>::printMemtag(
+ const ArrayRef<std::pair<std::string, std::string>> DynamicEntries,
+ const ArrayRef<uint8_t> AndroidNoteDesc,
+ const ArrayRef<std::pair<uint64_t, uint64_t>> Descriptors) {
+ {
+ ListScope L(W, "Memtag Dynamic Entries:");
+ if (DynamicEntries.empty())
+ W.printString("< none found >");
+ for (const auto &DynamicEntryKV : DynamicEntries)
+ W.printString(DynamicEntryKV.first, DynamicEntryKV.second);
+ }
+
+ if (!AndroidNoteDesc.empty()) {
+ ListScope L(W, "Memtag Android Note:");
+ printAndroidNoteLLVMStyle(ELF::NT_ANDROID_TYPE_MEMTAG, AndroidNoteDesc, W);
+ }
+
+ if (Descriptors.empty())
+ return;
+
+ {
+ ListScope L(W, "Memtag Global Descriptors:");
+ for (const auto &[Addr, BytesToTag] : Descriptors) {
+ W.printHex("0x" + utohexstr(Addr), BytesToTag);
+ }
+ }
+}
+
template <typename ELFT>
static bool printLLVMOMPOFFLOADNoteLLVMStyle(uint32_t NoteType,
ArrayRef<uint8_t> Desc,
@@ -7251,9 +7602,11 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printNotes() {
ListScope L(W, "Notes");
std::unique_ptr<DictScope> NoteScope;
+ size_t Align = 0;
auto StartNotes = [&](std::optional<StringRef> SecName,
const typename ELFT::Off Offset,
- const typename ELFT::Addr Size) {
+ const typename ELFT::Addr Size, size_t Al) {
+ Align = std::max<size_t>(Al, 4);
NoteScope = std::make_unique<DictScope>(W, "NoteSection");
W.printString("Name", SecName ? *SecName : "<?>");
W.printHex("Offset", Offset);
@@ -7265,7 +7618,7 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printNotes() {
auto ProcessNote = [&](const Elf_Note &Note, bool IsCore) -> Error {
DictScope D2(W, "Note");
StringRef Name = Note.getName();
- ArrayRef<uint8_t> Descriptor = Note.getDesc();
+ ArrayRef<uint8_t> Descriptor = Note.getDesc(Align);
Elf_Word Type = Note.getType();
// Print the note owner/type.
@@ -7328,7 +7681,8 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printNotes() {
return Error::success();
};
- printNotesHelper(*this, StartNotes, ProcessNote, EndNotes);
+ processNotesHelper(*this, /*StartNotesFn=*/StartNotes,
+ /*ProcessNoteFn=*/ProcessNote, /*FinishNotesFn=*/EndNotes);
}
template <class ELFT> void LLVMELFDumper<ELFT>::printELFLinkerOptions() {
@@ -7554,3 +7908,45 @@ void JSONELFDumper<ELFT>::printFileSummary(StringRef FileStr, ObjectFile &Obj,
std::string(formatv("{0}bit", 8 * Obj.getBytesInAddress())));
this->printLoadName();
}
+
+template <class ELFT>
+void JSONELFDumper<ELFT>::printZeroSymbolOtherField(
+ const Elf_Sym &Symbol) const {
+ // We want the JSON format to be uniform, since it is machine readable, so
+ // always print the `Other` field the same way.
+ this->printSymbolOtherField(Symbol);
+}
+
+template <class ELFT>
+void JSONELFDumper<ELFT>::printDefaultRelRelaReloc(const Relocation<ELFT> &R,
+ StringRef SymbolName,
+ StringRef RelocName) {
+ this->printExpandedRelRelaReloc(R, SymbolName, RelocName);
+}
+
+template <class ELFT>
+void JSONELFDumper<ELFT>::printRelocationSectionInfo(const Elf_Shdr &Sec,
+ StringRef Name,
+ const unsigned SecNdx) {
+ DictScope Group(this->W);
+ this->W.printNumber("SectionIndex", SecNdx);
+ ListScope D(this->W, "Relocs");
+ this->printRelocationsHelper(Sec);
+}
+
+template <class ELFT>
+std::string JSONELFDumper<ELFT>::getGroupSectionHeaderName() const {
+ return "GroupSections";
+}
+
+template <class ELFT>
+void JSONELFDumper<ELFT>::printSectionGroupMembers(StringRef Name,
+ uint64_t Idx) const {
+ DictScope Grp(this->W);
+ this->W.printString("Name", Name);
+ this->W.printNumber("Index", Idx);
+}
+
+template <class ELFT> void JSONELFDumper<ELFT>::printEmptyGroupMessage() const {
+ // JSON output does not need to print anything for empty groups
+}
diff --git a/llvm/tools/llvm-readobj/ObjDumper.h b/llvm/tools/llvm-readobj/ObjDumper.h
index 258d87240984..921792f886d0 100644
--- a/llvm/tools/llvm-readobj/ObjDumper.h
+++ b/llvm/tools/llvm-readobj/ObjDumper.h
@@ -136,6 +136,7 @@ public:
virtual void printStackSizes() {}
virtual void printSectionDetails() {}
virtual void printArchSpecificInfo() {}
+ virtual void printMemtag() {}
// Only implemented for PE/COFF.
virtual void printCOFFImports() { }
diff --git a/llvm/tools/llvm-readobj/Opts.td b/llvm/tools/llvm-readobj/Opts.td
index 4f7b12f95a60..fec0adb5e6a6 100644
--- a/llvm/tools/llvm-readobj/Opts.td
+++ b/llvm/tools/llvm-readobj/Opts.td
@@ -55,6 +55,7 @@ def section_groups : FF<"section-groups", "Display section groups">, Group<grp_e
def gnu_hash_table : FF<"gnu-hash-table", "Display the GNU hash table for dynamic symbols">, Group<grp_elf>;
def hash_symbols : FF<"hash-symbols", "Display the dynamic symbols derived from the hash section">, Group<grp_elf>;
def hash_table : FF<"hash-table", "Display .hash section">, Group<grp_elf>;
+def memtag : FF<"memtag", "Display memory tagging metadata (modes, Android notes, global descriptors)">, Group<grp_elf>;
def needed_libs : FF<"needed-libs", "Display the needed libraries">, Group<grp_elf>;
def notes : FF<"notes", "Display notes">, Group<grp_elf>;
def program_headers : FF<"program-headers", "Display program headers">, Group<grp_elf>;
diff --git a/llvm/tools/llvm-readobj/XCOFFDumper.cpp b/llvm/tools/llvm-readobj/XCOFFDumper.cpp
index 56f672b3c5aa..74ebcc4ec7d8 100644
--- a/llvm/tools/llvm-readobj/XCOFFDumper.cpp
+++ b/llvm/tools/llvm-readobj/XCOFFDumper.cpp
@@ -98,10 +98,11 @@ void XCOFFDumper::printFileHeaders() {
// tests will let us know.
time_t TimeDate = TimeStamp;
- char FormattedTime[21] = {};
- size_t BytesWritten =
- strftime(FormattedTime, 21, "%Y-%m-%dT%H:%M:%SZ", gmtime(&TimeDate));
- if (BytesWritten)
+ char FormattedTime[80] = {};
+
+ size_t BytesFormatted =
+ strftime(FormattedTime, sizeof(FormattedTime), "%F %T", gmtime(&TimeDate));
+ if (BytesFormatted)
W.printHex("TimeStamp", FormattedTime, TimeStamp);
else
W.printHex("Timestamp", TimeStamp);
@@ -709,7 +710,7 @@ static StringRef GetSymbolValueName(XCOFF::StorageClass SC) {
const EnumEntry<XCOFF::CFileLangId> CFileLangIdClass[] = {
#define ECase(X) \
{ #X, XCOFF::X }
- ECase(TB_C), ECase(TB_CPLUSPLUS)
+ ECase(TB_C), ECase(TB_Fortran), ECase(TB_CPLUSPLUS)
#undef ECase
};
diff --git a/llvm/tools/llvm-readobj/llvm-readobj.cpp b/llvm/tools/llvm-readobj/llvm-readobj.cpp
index a11de35fcd76..d72eec04d06a 100644
--- a/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -43,6 +43,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/LLVMDriver.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/Support/WithColor.h"
@@ -135,6 +136,7 @@ static bool GnuHashTable;
static bool HashSymbols;
static bool HashTable;
static bool HashHistogram;
+static bool Memtag;
static bool NeededLibraries;
static bool Notes;
static bool ProgramHeaders;
@@ -265,6 +267,7 @@ static void parseOptions(const opt::InputArgList &Args) {
opts::HashSymbols = Args.hasArg(OPT_hash_symbols);
opts::HashTable = Args.hasArg(OPT_hash_table);
opts::HashHistogram = Args.hasArg(OPT_histogram);
+ opts::Memtag = Args.hasArg(OPT_memtag);
opts::NeededLibraries = Args.hasArg(OPT_needed_libs);
opts::Notes = Args.hasArg(OPT_notes);
opts::PrettyPrint = Args.hasArg(OPT_pretty_print);
@@ -472,6 +475,8 @@ static void dumpObject(ObjectFile &Obj, ScopedPrinter &Writer,
Dumper->printAddrsig();
if (opts::Notes)
Dumper->printNotes();
+ if (opts::Memtag)
+ Dumper->printMemtag();
}
if (Obj.isCOFF()) {
if (opts::COFFImports)
@@ -632,7 +637,7 @@ std::unique_ptr<ScopedPrinter> createWriter() {
return std::make_unique<ScopedPrinter>(fouts());
}
-int llvm_readobj_main(int argc, char **argv) {
+int llvm_readobj_main(int argc, char **argv, const llvm::ToolContext &) {
InitLLVM X(argc, argv);
BumpPtrAllocator A;
StringSaver Saver(A);
@@ -683,6 +688,7 @@ int llvm_readobj_main(int argc, char **argv) {
opts::Addrsig = true;
opts::PrintStackSizes = true;
}
+ opts::Memtag = true;
}
if (opts::Headers) {
diff --git a/llvm/tools/llvm-remarkutil/RemarkUtil.cpp b/llvm/tools/llvm-remarkutil/RemarkUtil.cpp
index 0412eae954ac..14af5d2842cf 100644
--- a/llvm/tools/llvm-remarkutil/RemarkUtil.cpp
+++ b/llvm/tools/llvm-remarkutil/RemarkUtil.cpp
@@ -38,6 +38,10 @@ static cl::SubCommand
static cl::SubCommand InstructionCount(
"instruction-count",
"Function instruction count information (requires asm-printer remarks)");
+static cl::SubCommand
+ AnnotationCount("annotation-count",
+ "Collect count information from annotation remarks (uses "
+ "AnnotationRemarksPass)");
} // namespace subopts
// Keep input + output help + names consistent across the various modes via a
@@ -49,6 +53,23 @@ static cl::SubCommand InstructionCount(
static cl::opt<std::string> OutputFileName( \
"o", cl::init("-"), cl::cat(RemarkUtilCategory), cl::desc("Output"), \
cl::value_desc("filename"), cl::sub(SUBOPT));
+
+// Keep Input format and names consistent accross the modes via a macro.
+#define INPUT_FORMAT_COMMAND_LINE_OPTIONS(SUBOPT) \
+ static cl::opt<Format> InputFormat( \
+ "parser", cl::desc("Input remark format to parse"), \
+ cl::values(clEnumValN(Format::YAML, "yaml", "YAML"), \
+ clEnumValN(Format::Bitstream, "bitstream", "Bitstream")), \
+ cl::sub(SUBOPT));
+
+#define DEBUG_LOC_INFO_COMMAND_LINE_OPTIONS(SUBOPT) \
+ static cl::opt<bool> UseDebugLoc( \
+ "use-debug-loc", \
+ cl::desc( \
+ "Add debug loc information when generating tables for " \
+ "functions. The loc is represented as (path:line number:column " \
+ "number)"), \
+ cl::init(false), cl::sub(SUBOPT));
namespace yaml2bitstream {
/// Remark format to parse.
static constexpr Format InputFormat = Format::YAML;
@@ -66,14 +87,20 @@ INPUT_OUTPUT_COMMAND_LINE_OPTIONS(subopts::Bitstream2YAML)
} // namespace bitstream2yaml
namespace instructioncount {
-static cl::opt<Format> InputFormat(
- "parser", cl::desc("Input remark format to parse"),
- cl::values(clEnumValN(Format::YAML, "yaml", "YAML"),
- clEnumValN(Format::Bitstream, "bitstream", "Bitstream")),
- cl::sub(subopts::InstructionCount));
+INPUT_FORMAT_COMMAND_LINE_OPTIONS(subopts::InstructionCount)
INPUT_OUTPUT_COMMAND_LINE_OPTIONS(subopts::InstructionCount)
+DEBUG_LOC_INFO_COMMAND_LINE_OPTIONS(subopts::InstructionCount)
} // namespace instructioncount
+namespace annotationcount {
+INPUT_FORMAT_COMMAND_LINE_OPTIONS(subopts::AnnotationCount)
+static cl::opt<std::string> AnnotationTypeToCollect(
+ "annotation-type", cl::desc("annotation-type remark to collect count for"),
+ cl::sub(subopts::AnnotationCount));
+INPUT_OUTPUT_COMMAND_LINE_OPTIONS(subopts::AnnotationCount)
+DEBUG_LOC_INFO_COMMAND_LINE_OPTIONS(subopts::AnnotationCount)
+} // namespace annotationcount
+
/// \returns A MemoryBuffer for the input file on success, and an Error
/// otherwise.
static Expected<std::unique_ptr<MemoryBuffer>>
@@ -115,6 +142,10 @@ getOutputFileForRemarks(StringRef OutputFileName, Format OutputFormat) {
: sys::fs::OF_None);
}
+static bool shouldSkipRemark(bool UseDebugLoc, Remark &Remark) {
+ return UseDebugLoc && !Remark.Loc.has_value();
+}
+
namespace yaml2bitstream {
/// Parses all remarks in the input YAML file.
/// \p [out] ParsedRemarks - Filled with remarks parsed from the input file.
@@ -229,6 +260,8 @@ static Error tryInstructionCount() {
if (!MaybeParser)
return MaybeParser.takeError();
// Emit CSV header.
+ if (UseDebugLoc)
+ OF->os() << "Source,";
OF->os() << "Function,InstructionCount\n";
// Parse all remarks. Whenever we see an instruction count remark, output
// the file name and the number of instructions.
@@ -238,11 +271,19 @@ static Error tryInstructionCount() {
auto &Remark = **MaybeRemark;
if (Remark.RemarkName != "InstructionCount")
continue;
+ if (shouldSkipRemark(UseDebugLoc, Remark))
+ continue;
auto *InstrCountArg = find_if(Remark.Args, [](const Argument &Arg) {
return Arg.Key == "NumInstructions";
});
assert(InstrCountArg != Remark.Args.end() &&
"Expected instruction count remarks to have a NumInstructions key?");
+ if (UseDebugLoc) {
+ std::string Loc = Remark.Loc->SourceFilePath.str() + ":" +
+ std::to_string(Remark.Loc->SourceLine) + +":" +
+ std::to_string(Remark.Loc->SourceColumn);
+ OF->os() << Loc << ",";
+ }
OF->os() << Remark.FunctionName << "," << InstrCountArg->Val << "\n";
}
auto E = MaybeRemark.takeError();
@@ -254,6 +295,61 @@ static Error tryInstructionCount() {
}
} // namespace instructioncount
+namespace annotationcount {
+static Error tryAnnotationCount() {
+ // Create the output buffer.
+ auto MaybeOF = getOutputFileWithFlags(OutputFileName,
+ /*Flags = */ sys::fs::OF_TextWithCRLF);
+ if (!MaybeOF)
+ return MaybeOF.takeError();
+ auto OF = std::move(*MaybeOF);
+ // Create a parser for the user-specified input format.
+ auto MaybeBuf = getInputMemoryBuffer(InputFileName);
+ if (!MaybeBuf)
+ return MaybeBuf.takeError();
+ auto MaybeParser = createRemarkParser(InputFormat, (*MaybeBuf)->getBuffer());
+ if (!MaybeParser)
+ return MaybeParser.takeError();
+ // Emit CSV header.
+ if (UseDebugLoc)
+ OF->os() << "Source,";
+ OF->os() << "Function,Count\n";
+ // Parse all remarks. When we see the specified remark collect the count
+ // information.
+ auto &Parser = **MaybeParser;
+ auto MaybeRemark = Parser.next();
+ for (; MaybeRemark; MaybeRemark = Parser.next()) {
+ auto &Remark = **MaybeRemark;
+ if (Remark.RemarkName != "AnnotationSummary")
+ continue;
+ if (shouldSkipRemark(UseDebugLoc, Remark))
+ continue;
+ auto *RemarkNameArg = find_if(Remark.Args, [](const Argument &Arg) {
+ return Arg.Key == "type" && Arg.Val == AnnotationTypeToCollect;
+ });
+ if (RemarkNameArg == Remark.Args.end())
+ continue;
+ auto *CountArg = find_if(
+ Remark.Args, [](const Argument &Arg) { return Arg.Key == "count"; });
+ assert(CountArg != Remark.Args.end() &&
+ "Expected annotation-type remark to have a count key?");
+ if (UseDebugLoc) {
+ std::string Loc = Remark.Loc->SourceFilePath.str() + ":" +
+ std::to_string(Remark.Loc->SourceLine) + +":" +
+ std::to_string(Remark.Loc->SourceColumn);
+ OF->os() << Loc << ",";
+ }
+ OF->os() << Remark.FunctionName << "," << CountArg->Val << "\n";
+ }
+ auto E = MaybeRemark.takeError();
+ if (!E.isA<EndOfFileError>())
+ return E;
+ consumeError(std::move(E));
+ OF->keep();
+ return Error::success();
+}
+
+} // namespace annotationcount
/// Handle user-specified suboptions (e.g. yaml2bitstream, bitstream2yaml).
/// \returns An Error if the specified suboption fails or if no suboption was
/// specified. Otherwise, Error::success().
@@ -264,6 +360,9 @@ static Error handleSuboptions() {
return yaml2bitstream::tryYAML2Bitstream();
if (subopts::InstructionCount)
return instructioncount::tryInstructionCount();
+ if (subopts::AnnotationCount)
+ return annotationcount::tryAnnotationCount();
+
return make_error<StringError>(
"Please specify a subcommand. (See -help for options)",
inconvertibleErrorCode());
diff --git a/llvm/tools/llvm-size/llvm-size.cpp b/llvm/tools/llvm-size/llvm-size.cpp
index 32dbf3d489c0..048a98b9af7d 100644
--- a/llvm/tools/llvm-size/llvm-size.cpp
+++ b/llvm/tools/llvm-size/llvm-size.cpp
@@ -26,6 +26,7 @@
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/LLVMDriver.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
@@ -869,7 +870,7 @@ static void printBerkeleyTotals() {
<< "(TOTALS)\n";
}
-int llvm_size_main(int argc, char **argv) {
+int llvm_size_main(int argc, char **argv, const llvm::ToolContext &) {
InitLLVM X(argc, argv);
BumpPtrAllocator A;
StringSaver Saver(A);
diff --git a/llvm/tools/llvm-stress/llvm-stress.cpp b/llvm/tools/llvm-stress/llvm-stress.cpp
index 639506c7d488..d1cf1607b94b 100644
--- a/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -222,7 +222,7 @@ protected:
} else if (Tp->isFloatingPointTy()) {
if (getRandom() & 1)
return ConstantFP::getAllOnesValue(Tp);
- return ConstantFP::getNullValue(Tp);
+ return ConstantFP::getZero(Tp);
}
return UndefValue::get(Tp);
}
@@ -244,7 +244,7 @@ protected:
} else if (Tp->isFloatingPointTy()) {
if (getRandom() & 1)
return ConstantFP::getAllOnesValue(Tp);
- return ConstantFP::getNullValue(Tp);
+ return ConstantFP::getZero(Tp);
} else if (auto *VTp = dyn_cast<FixedVectorType>(Tp)) {
std::vector<Constant*> TempValues;
TempValues.reserve(VTp->getNumElements());
@@ -341,9 +341,7 @@ struct LoadModifier: public Modifier {
void Act() override {
// Try to use predefined pointers. If non-exist, use undef pointer value;
Value *Ptr = getRandomPointerValue();
- Type *Ty = Ptr->getType()->isOpaquePointerTy()
- ? pickType()
- : Ptr->getType()->getNonOpaquePointerElementType();
+ Type *Ty = pickType();
Value *V = new LoadInst(Ty, Ptr, "L", BB->getTerminator());
PT->push_back(V);
}
@@ -356,9 +354,7 @@ struct StoreModifier: public Modifier {
void Act() override {
// Try to use predefined pointers. If non-exist, use undef pointer value;
Value *Ptr = getRandomPointerValue();
- Type *ValTy = Ptr->getType()->isOpaquePointerTy()
- ? pickType()
- : Ptr->getType()->getNonOpaquePointerElementType();
+ Type *ValTy = pickType();
// Do not store vectors of i1s because they are unsupported
// by the codegen.
@@ -442,7 +438,7 @@ struct ConstModifier: public Modifier {
APFloat RandomFloat(Ty->getFltSemantics(), RandomInt);
if (getRandom() & 1)
- return PT->push_back(ConstantFP::getNullValue(Ty));
+ return PT->push_back(ConstantFP::getZero(Ty));
return PT->push_back(ConstantFP::get(Ty->getContext(), RandomFloat));
}
diff --git a/llvm/tools/llvm-strings/llvm-strings.cpp b/llvm/tools/llvm-strings/llvm-strings.cpp
index f6d08a1988b7..d9bc34ee621a 100644
--- a/llvm/tools/llvm-strings/llvm-strings.cpp
+++ b/llvm/tools/llvm-strings/llvm-strings.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "Opts.inc"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Object/Binary.h"
#include "llvm/Option/Arg.h"
#include "llvm/Option/ArgList.h"
@@ -62,6 +63,7 @@ class StringsOptTable : public opt::GenericOptTable {
public:
StringsOptTable() : GenericOptTable(InfoTable) {
setGroupedShortOptions(true);
+ setDashDashParsing(true);
}
};
} // namespace
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 1b86134dda51..3e342a4db9ce 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -36,6 +36,7 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/StringSaver.h"
+#include "llvm/Support/WithColor.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cstdio>
@@ -83,6 +84,16 @@ public:
};
} // namespace
+static std::string ToolName;
+
+static void printError(const ErrorInfoBase &EI, StringRef Path) {
+ WithColor::error(errs(), ToolName);
+ if (!EI.isA<FileError>())
+ errs() << "'" << Path << "': ";
+ EI.log(errs());
+ errs() << '\n';
+}
+
template <typename T>
static void print(const Request &Request, Expected<T> &ResOrErr,
DIPrinter &Printer) {
@@ -96,8 +107,7 @@ static void print(const Request &Request, Expected<T> &ResOrErr,
bool PrintEmpty = true;
handleAllErrors(std::move(ResOrErr.takeError()),
[&](const ErrorInfoBase &EI) {
- PrintEmpty = Printer.printError(
- Request, EI, "LLVMSymbolizer: error reading file: ");
+ PrintEmpty = Printer.printError(Request, EI);
});
if (PrintEmpty)
@@ -125,15 +135,6 @@ static void enableDebuginfod(LLVMSymbolizer &Symbolizer,
HTTPClient::initialize();
}
-static object::BuildID parseBuildID(StringRef Str) {
- std::string Bytes;
- if (!tryGetFromHex(Str, Bytes))
- return {};
- ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
- Bytes.size());
- return object::BuildID(BuildID.begin(), BuildID.end());
-}
-
static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
StringRef InputString, Command &Cmd,
std::string &ModuleName, object::BuildID &BuildID,
@@ -218,17 +219,18 @@ void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
uint64_t AdjustedOffset = Offset - AdjustVMA;
object::SectionedAddress Address = {AdjustedOffset,
object::SectionedAddress::UndefSection};
+ Request SymRequest = {ModuleName, Offset};
if (Cmd == Command::Data) {
Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address);
- print({ModuleName, Offset}, ResOrErr, Printer);
+ print(SymRequest, ResOrErr, Printer);
} else if (Cmd == Command::Frame) {
Expected<std::vector<DILocal>> ResOrErr =
Symbolizer.symbolizeFrame(ModuleSpec, Address);
- print({ModuleName, Offset}, ResOrErr, Printer);
+ print(SymRequest, ResOrErr, Printer);
} else if (ShouldInline) {
Expected<DIInliningInfo> ResOrErr =
Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);
- print({ModuleName, Offset}, ResOrErr, Printer);
+ print(SymRequest, ResOrErr, Printer);
} else if (Style == OutputStyle::GNU) {
// With PrintFunctions == FunctionNameKind::LinkageName (default)
// and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()
@@ -243,11 +245,11 @@ void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
? Expected<DILineInfo>(ResOrErr.takeError())
: ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo()
: ResOrErr->getFrame(0));
- print({ModuleName, Offset}, Res0OrErr, Printer);
+ print(SymRequest, Res0OrErr, Printer);
} else {
Expected<DILineInfo> ResOrErr =
Symbolizer.symbolizeCode(ModuleSpec, Address);
- print({ModuleName, Offset}, ResOrErr, Printer);
+ print(SymRequest, ResOrErr, Printer);
}
Symbolizer.pruneCache();
}
@@ -386,7 +388,8 @@ int main(int argc, char **argv) {
InitLLVM X(argc, argv);
sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);
- bool IsAddr2Line = sys::path::stem(argv[0]).contains("addr2line");
+ ToolName = argv[0];
+ bool IsAddr2Line = sys::path::stem(ToolName).contains("addr2line");
BumpPtrAllocator A;
StringSaver Saver(A);
SymbolizerOptTable Tbl;
@@ -443,13 +446,7 @@ int main(int argc, char **argv) {
LLVMSymbolizer Symbolizer(Opts);
- // A debuginfod lookup could succeed if a HTTP client is available and at
- // least one backing URL is configured.
- bool ShouldUseDebuginfodByDefault =
- HTTPClient::isAvailable() &&
- !ExitOnErr(getDefaultDebuginfodUrls()).empty();
- if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod,
- ShouldUseDebuginfodByDefault))
+ if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod, canUseDebuginfod()))
enableDebuginfod(Symbolizer, Args);
if (Args.hasArg(OPT_filter_markup)) {
@@ -475,11 +472,25 @@ int main(int argc, char **argv) {
std::unique_ptr<DIPrinter> Printer;
if (Style == OutputStyle::GNU)
- Printer = std::make_unique<GNUPrinter>(outs(), errs(), Config);
+ Printer = std::make_unique<GNUPrinter>(outs(), printError, Config);
else if (Style == OutputStyle::JSON)
Printer = std::make_unique<JSONPrinter>(outs(), Config);
else
- Printer = std::make_unique<LLVMPrinter>(outs(), errs(), Config);
+ Printer = std::make_unique<LLVMPrinter>(outs(), printError, Config);
+
+ // When an input file is specified, exit immediately if the file cannot be
+ // read. If getOrCreateModuleInfo succeeds, symbolizeInput will reuse the
+ // cached file handle.
+ if (auto *Arg = Args.getLastArg(OPT_obj_EQ); Arg) {
+ auto Status = Symbolizer.getOrCreateModuleInfo(Arg->getValue());
+ if (!Status) {
+ Request SymRequest = {Arg->getValue(), 0};
+ handleAllErrors(Status.takeError(), [&](const ErrorInfoBase &EI) {
+ Printer->printError(SymRequest, EI);
+ });
+ return EXIT_FAILURE;
+ }
+ }
std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);
if (InputAddresses.empty()) {
diff --git a/llvm/tools/llvm-tapi-diff/DiffEngine.cpp b/llvm/tools/llvm-tapi-diff/DiffEngine.cpp
index 9ebaadbeec26..3e07bb94f4df 100644
--- a/llvm/tools/llvm-tapi-diff/DiffEngine.cpp
+++ b/llvm/tools/llvm-tapi-diff/DiffEngine.cpp
@@ -11,11 +11,13 @@
//
//===----------------------------------------------------------------------===/
#include "DiffEngine.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TextAPI/InterfaceFile.h"
#include "llvm/TextAPI/Symbol.h"
#include "llvm/TextAPI/Target.h"
+#include <iterator>
using namespace llvm;
using namespace MachO;
@@ -74,43 +76,49 @@ StringLiteral SymScalar::getSymbolNamePrefix(MachO::SymbolKind Kind) {
llvm_unreachable("Unknown llvm::MachO::SymbolKind enum");
}
-std::string SymScalar::stringifySymbolFlag(MachO::SymbolFlags Flag) {
- switch (Flag) {
- case MachO::SymbolFlags::None:
- return "";
- case MachO::SymbolFlags::ThreadLocalValue:
- return "Thread-Local";
- case MachO::SymbolFlags::WeakDefined:
- return "Weak-Defined";
- case MachO::SymbolFlags::WeakReferenced:
- return "Weak-Referenced";
- case MachO::SymbolFlags::Undefined:
- return "Undefined";
- case MachO::SymbolFlags::Rexported:
- return "Reexported";
- }
- llvm_unreachable("Unknown llvm::MachO::SymbolFlags enum");
+std::string SymScalar::getFlagString(const MachO::Symbol *Sym) {
+ if (Sym->getFlags() == SymbolFlags::None)
+ return {};
+ SmallString<64> Flags(" - ");
+ if (Sym->isThreadLocalValue())
+ Flags.append("Thread-Local ");
+ if (Sym->isWeakDefined())
+ Flags.append("Weak-Defined ");
+ if (Sym->isWeakReferenced())
+ Flags.append("Weak-Referenced ");
+ if (Sym->isUndefined())
+ Flags.append("Undefined ");
+ if (Sym->isReexported())
+ Flags.append("Reexported ");
+ if (Sym->isData())
+ Flags.append("Data ");
+ if (Sym->isText())
+ Flags.append("Text ");
+
+ return std::string(Flags);
}
void SymScalar::print(raw_ostream &OS, std::string Indent, MachO::Target Targ) {
if (Val->getKind() == MachO::SymbolKind::ObjectiveCClass) {
if (Targ.Arch == MachO::AK_i386 && Targ.Platform == MachO::PLATFORM_MACOS) {
OS << Indent << "\t\t" << ((Order == lhs) ? "< " : "> ")
- << ObjC1ClassNamePrefix << Val->getName()
- << getFlagString(Val->getFlags()) << "\n";
+ << ObjC1ClassNamePrefix << Val->getName() << getFlagString(Val)
+ << "\n";
return;
}
OS << Indent << "\t\t" << ((Order == lhs) ? "< " : "> ")
- << ObjC2ClassNamePrefix << Val->getName()
- << getFlagString(Val->getFlags()) << "\n";
+ << ObjC2ClassNamePrefix << Val->getName() << getFlagString(Val) << "\n";
}
OS << Indent << "\t\t" << ((Order == lhs) ? "< " : "> ")
<< getSymbolNamePrefix(Val->getKind()) << Val->getName()
- << getFlagString(Val->getFlags()) << "\n";
+ << getFlagString(Val) << "\n";
}
bool checkSymbolEquality(llvm::MachO::InterfaceFile::const_symbol_range LHS,
llvm::MachO::InterfaceFile::const_symbol_range RHS) {
+ if (std::distance(LHS.begin(), LHS.end()) !=
+ std::distance(RHS.begin(), RHS.end()))
+ return false;
return std::equal(LHS.begin(), LHS.end(), RHS.begin(),
[&](auto LHS, auto RHS) { return *LHS == *RHS; });
}
@@ -204,9 +212,6 @@ std::vector<DiffOutput> getSingleIF(InterfaceFile *Interface,
diffAttribute("Swift ABI Version", Output,
DiffScalarVal<uint8_t, AD_Diff_Scalar_Unsigned>(
Order, Interface->getSwiftABIVersion()));
- diffAttribute("InstallAPI", Output,
- DiffScalarVal<bool, AD_Diff_Scalar_Bool>(
- Order, Interface->isInstallAPI()));
diffAttribute("Two Level Namespace", Output,
DiffScalarVal<bool, AD_Diff_Scalar_Bool>(
Order, Interface->isTwoLevelNamespace()));
@@ -341,11 +346,6 @@ DiffEngine::findDifferences(const InterfaceFile *IFLHS,
DiffScalarVal<uint8_t, AD_Diff_Scalar_Unsigned>(
rhs, IFRHS->getSwiftABIVersion()),
"Swift ABI Version"));
- if (IFLHS->isInstallAPI() != IFRHS->isInstallAPI())
- Output.push_back(recordDifferences(
- DiffScalarVal<bool, AD_Diff_Scalar_Bool>(lhs, IFLHS->isInstallAPI()),
- DiffScalarVal<bool, AD_Diff_Scalar_Bool>(rhs, IFRHS->isInstallAPI()),
- "InstallAPI"));
if (IFLHS->isTwoLevelNamespace() != IFRHS->isTwoLevelNamespace())
Output.push_back(recordDifferences(DiffScalarVal<bool, AD_Diff_Scalar_Bool>(
diff --git a/llvm/tools/llvm-tapi-diff/DiffEngine.h b/llvm/tools/llvm-tapi-diff/DiffEngine.h
index e4864054fa11..27b72573d011 100644
--- a/llvm/tools/llvm-tapi-diff/DiffEngine.h
+++ b/llvm/tools/llvm-tapi-diff/DiffEngine.h
@@ -83,11 +83,7 @@ public:
SymScalar(InterfaceInputOrder Order, const MachO::Symbol *Sym)
: Order(Order), Val(Sym){};
- std::string getFlagString(MachO::SymbolFlags Flags) {
- return Flags != MachO::SymbolFlags::None
- ? " - " + stringifySymbolFlag(Flags)
- : stringifySymbolFlag(Flags);
- }
+ std::string getFlagString(const MachO::Symbol *Sym);
void print(raw_ostream &OS, std::string Indent, MachO::Target Targ);
@@ -99,7 +95,6 @@ private:
InterfaceInputOrder Order;
const MachO::Symbol *Val;
StringLiteral getSymbolNamePrefix(MachO::SymbolKind Kind);
- std::string stringifySymbolFlag(MachO::SymbolFlags Flag);
};
class DiffStrVec : public AttributeDiff {
diff --git a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
index 179c42b60605..9cc18f80910d 100644
--- a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
+++ b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp
@@ -8,7 +8,6 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/Demangle/Demangle.h"
@@ -20,6 +19,7 @@
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/WithColor.h"
+#include "llvm/TargetParser/Triple.h"
using namespace llvm;
using namespace llvm::object;
@@ -107,7 +107,7 @@ static std::string getPrintableName(StringRef Name) {
std::string OutputName = "'";
OutputName += Name;
OutputName += "'";
- std::string DemangledName(demangle(Name.str()));
+ std::string DemangledName(demangle(Name));
if (Name != DemangledName) {
OutputName += " aka ";
OutputName += DemangledName;
diff --git a/llvm/tools/llvm-xray/xray-account.cpp b/llvm/tools/llvm-xray/xray-account.cpp
index a9d91297b4e1..24a3552cfb91 100644
--- a/llvm/tools/llvm-xray/xray-account.cpp
+++ b/llvm/tools/llvm-xray/xray-account.cpp
@@ -80,7 +80,7 @@ static cl::opt<SortField> AccountSortOutput(
"sort", cl::desc("sort output by this field"), cl::value_desc("field"),
cl::sub(Account), cl::init(SortField::FUNCID),
cl::values(clEnumValN(SortField::FUNCID, "funcid", "function id"),
- clEnumValN(SortField::COUNT, "count", "funciton call counts"),
+ clEnumValN(SortField::COUNT, "count", "function call counts"),
clEnumValN(SortField::MIN, "min", "minimum function durations"),
clEnumValN(SortField::MED, "med", "median function durations"),
clEnumValN(SortField::PCT90, "90p", "90th percentile durations"),
diff --git a/llvm/tools/llvm-xray/xray-graph.cpp b/llvm/tools/llvm-xray/xray-graph.cpp
index b8328052f473..de67993d7590 100644
--- a/llvm/tools/llvm-xray/xray-graph.cpp
+++ b/llvm/tools/llvm-xray/xray-graph.cpp
@@ -200,7 +200,7 @@ static std::string escapeString(StringRef Label) {
// example caused by tail call elimination and if the option is enabled then
// then tries to recover from this.
//
-// This funciton will also error if the records are out of order, as the trace
+// This function will also error if the records are out of order, as the trace
// is expected to be sorted.
//
// The graph generated has an immaginary root for functions called by no-one at
diff --git a/llvm/tools/opt/AnalysisWrappers.cpp b/llvm/tools/opt/AnalysisWrappers.cpp
deleted file mode 100644
index 2ae1da84a9a0..000000000000
--- a/llvm/tools/opt/AnalysisWrappers.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-//===- AnalysisWrappers.cpp - Wrappers around non-pass analyses -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines pass wrappers around LLVM analyses that don't make sense to
-// be passes. It provides a nice standard pass interface to these classes so
-// that they can be printed out by analyze.
-//
-// These classes are separated out of analyze.cpp so that it is more clear which
-// code is the integral part of the analyze tool, and which part of the code is
-// just making it so more passes are available.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
- /// ExternalFunctionsPassedConstants - This pass prints out call sites to
- /// external functions that are called with constant arguments. This can be
- /// useful when looking for standard library functions we should constant fold
- /// or handle in alias analyses.
- struct ExternalFunctionsPassedConstants : public ModulePass {
- static char ID; // Pass ID, replacement for typeid
- ExternalFunctionsPassedConstants() : ModulePass(ID) {}
- bool runOnModule(Module &M) override {
- for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
- if (!I->isDeclaration()) continue;
-
- bool PrintedFn = false;
- for (User *U : I->users()) {
- Instruction *UI = dyn_cast<Instruction>(U);
- if (!UI) continue;
-
- CallBase *CB = dyn_cast<CallBase>(UI);
- if (!CB)
- continue;
-
- for (auto AI = CB->arg_begin(), E = CB->arg_end(); AI != E; ++AI) {
- if (!isa<Constant>(*AI)) continue;
-
- if (!PrintedFn) {
- errs() << "Function '" << I->getName() << "':\n";
- PrintedFn = true;
- }
- errs() << *UI;
- break;
- }
- }
- }
-
- return false;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
- };
-}
-
-char ExternalFunctionsPassedConstants::ID = 0;
-static RegisterPass<ExternalFunctionsPassedConstants>
- P1("print-externalfnconstants",
- "Print external fn callsites passed constants");
diff --git a/llvm/tools/opt/BreakpointPrinter.cpp b/llvm/tools/opt/BreakpointPrinter.cpp
deleted file mode 100644
index a57a8c43c264..000000000000
--- a/llvm/tools/opt/BreakpointPrinter.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-//===- BreakpointPrinter.cpp - Breakpoint location printer ----------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Breakpoint location printer.
-///
-//===----------------------------------------------------------------------===//
-#include "BreakpointPrinter.h"
-#include "llvm/ADT/StringSet.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-namespace {
-
-struct BreakpointPrinter : public ModulePass {
- raw_ostream &Out;
- static char ID;
-
- BreakpointPrinter(raw_ostream &out) : ModulePass(ID), Out(out) {}
-
- void getContextName(const DIScope *Context, std::string &N) {
- if (auto *NS = dyn_cast<DINamespace>(Context)) {
- if (!NS->getName().empty()) {
- getContextName(NS->getScope(), N);
- N = N + NS->getName().str() + "::";
- }
- } else if (auto *TY = dyn_cast<DIType>(Context)) {
- if (!TY->getName().empty()) {
- getContextName(TY->getScope(), N);
- N = N + TY->getName().str() + "::";
- }
- }
- }
-
- bool runOnModule(Module &M) override {
- StringSet<> Processed;
- if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp"))
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- std::string Name;
- auto *SP = cast_or_null<DISubprogram>(NMD->getOperand(i));
- if (!SP)
- continue;
- getContextName(SP->getScope(), Name);
- Name = Name + SP->getName().str();
- if (!Name.empty() && Processed.insert(Name).second) {
- Out << Name << "\n";
- }
- }
- return false;
- }
-
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesAll();
- }
-};
-
-char BreakpointPrinter::ID = 0;
-}
-
-ModulePass *llvm::createBreakpointPrinter(raw_ostream &out) {
- return new BreakpointPrinter(out);
-}
diff --git a/llvm/tools/opt/BreakpointPrinter.h b/llvm/tools/opt/BreakpointPrinter.h
deleted file mode 100644
index 2877555f852c..000000000000
--- a/llvm/tools/opt/BreakpointPrinter.h
+++ /dev/null
@@ -1,24 +0,0 @@
-//===- BreakpointPrinter.h - Breakpoint location printer ------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Breakpoint location printer.
-///
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_OPT_BREAKPOINTPRINTER_H
-#define LLVM_TOOLS_OPT_BREAKPOINTPRINTER_H
-
-namespace llvm {
-
-class ModulePass;
-class raw_ostream;
-
-ModulePass *createBreakpointPrinter(raw_ostream &out);
-}
-
-#endif // LLVM_TOOLS_OPT_BREAKPOINTPRINTER_H
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index a8db0c62898e..6ae3f87099af 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -31,6 +31,7 @@
#include "llvm/Passes/StandardInstrumentations.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
@@ -175,6 +176,9 @@ static cl::opt<PGOKind>
"Use sampled profile to guide PGO.")));
static cl::opt<std::string> ProfileFile("profile-file",
cl::desc("Path to the profile."), cl::Hidden);
+static cl::opt<std::string>
+ MemoryProfileFile("memory-profile-file",
+ cl::desc("Path to the memory profile."), cl::Hidden);
static cl::opt<CSPGOKind> CSPGOKindFlag(
"cspgo-kind", cl::init(NoCSPGO), cl::Hidden,
@@ -320,35 +324,38 @@ static void registerEPCallbacks(PassBuilder &PB) {
llvm::PassPluginLibraryInfo get##Ext##PluginInfo();
#include "llvm/Support/Extension.def"
-bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
- TargetLibraryInfoImpl *TLII, ToolOutputFile *Out,
- ToolOutputFile *ThinLTOLinkOut,
- ToolOutputFile *OptRemarkFile,
- StringRef PassPipeline,
- ArrayRef<PassPlugin> PassPlugins,
- OutputKind OK, VerifierKind VK,
- bool ShouldPreserveAssemblyUseListOrder,
- bool ShouldPreserveBitcodeUseListOrder,
- bool EmitSummaryIndex, bool EmitModuleHash,
- bool EnableDebugify, bool VerifyDIPreserve) {
+bool llvm::runPassPipeline(
+ StringRef Arg0, Module &M, TargetMachine *TM, TargetLibraryInfoImpl *TLII,
+ ToolOutputFile *Out, ToolOutputFile *ThinLTOLinkOut,
+ ToolOutputFile *OptRemarkFile, StringRef PassPipeline,
+ ArrayRef<PassPlugin> PassPlugins, OutputKind OK, VerifierKind VK,
+ bool ShouldPreserveAssemblyUseListOrder,
+ bool ShouldPreserveBitcodeUseListOrder, bool EmitSummaryIndex,
+ bool EmitModuleHash, bool EnableDebugify, bool VerifyDIPreserve,
+ bool UnifiedLTO) {
bool VerifyEachPass = VK == VK_VerifyEachPass;
+ auto FS = vfs::getRealFileSystem();
std::optional<PGOOptions> P;
switch (PGOKindFlag) {
case InstrGen:
- P = PGOOptions(ProfileFile, "", "", PGOOptions::IRInstr);
+ P = PGOOptions(ProfileFile, "", "", MemoryProfileFile, FS,
+ PGOOptions::IRInstr);
break;
case InstrUse:
- P = PGOOptions(ProfileFile, "", ProfileRemappingFile, PGOOptions::IRUse);
+ P = PGOOptions(ProfileFile, "", ProfileRemappingFile, MemoryProfileFile, FS,
+ PGOOptions::IRUse);
break;
case SampleUse:
- P = PGOOptions(ProfileFile, "", ProfileRemappingFile,
+ P = PGOOptions(ProfileFile, "", ProfileRemappingFile, MemoryProfileFile, FS,
PGOOptions::SampleUse);
break;
case NoPGO:
- if (DebugInfoForProfiling || PseudoProbeForProfiling)
- P = PGOOptions("", "", "", PGOOptions::NoAction, PGOOptions::NoCSAction,
- DebugInfoForProfiling, PseudoProbeForProfiling);
+ if (DebugInfoForProfiling || PseudoProbeForProfiling ||
+ !MemoryProfileFile.empty())
+ P = PGOOptions("", "", "", MemoryProfileFile, FS, PGOOptions::NoAction,
+ PGOOptions::NoCSAction, DebugInfoForProfiling,
+ PseudoProbeForProfiling);
else
P = std::nullopt;
}
@@ -368,7 +375,8 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
P->CSProfileGenFile = CSProfileGenFile;
} else
P = PGOOptions("", CSProfileGenFile, ProfileRemappingFile,
- PGOOptions::NoAction, PGOOptions::CSIRInstr);
+ /*MemoryProfile=*/"", FS, PGOOptions::NoAction,
+ PGOOptions::CSIRInstr);
} else /* CSPGOKindFlag == CSInstrUse */ {
if (!P) {
errs() << "CSInstrUse needs to be together with InstrUse";
@@ -391,20 +399,20 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
PrintPassOpts.SkipAnalyses = DebugPM == DebugLogging::Quiet;
StandardInstrumentations SI(M.getContext(), DebugPM != DebugLogging::None,
VerifyEachPass, PrintPassOpts);
- SI.registerCallbacks(PIC, &FAM);
+ SI.registerCallbacks(PIC, &MAM);
DebugifyEachInstrumentation Debugify;
DebugifyStatsMap DIStatsMap;
DebugInfoPerPass DebugInfoBeforePass;
if (DebugifyEach) {
Debugify.setDIStatsMap(DIStatsMap);
Debugify.setDebugifyMode(DebugifyMode::SyntheticDebugInfo);
- Debugify.registerCallbacks(PIC);
+ Debugify.registerCallbacks(PIC, MAM);
} else if (VerifyEachDebugInfoPreserve) {
Debugify.setDebugInfoBeforePass(DebugInfoBeforePass);
Debugify.setDebugifyMode(DebugifyMode::OriginalDebugInfo);
Debugify.setOrigDIVerifyBugsReportFilePath(
VerifyDIPreserveExport);
- Debugify.registerCallbacks(PIC);
+ Debugify.registerCallbacks(PIC, MAM);
}
PipelineTuningOptions PTO;
@@ -412,6 +420,7 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
// to false above so we shouldn't necessarily need to check whether or not the
// option has been enabled.
PTO.LoopUnrolling = !DisableLoopUnrolling;
+ PTO.UnifiedLTO = UnifiedLTO;
PassBuilder PB(TM, PTO, P, &PIC);
registerEPCallbacks(PB);
diff --git a/llvm/tools/opt/NewPMDriver.h b/llvm/tools/opt/NewPMDriver.h
index a3cdd158d40f..3230c27c2d7a 100644
--- a/llvm/tools/opt/NewPMDriver.h
+++ b/llvm/tools/opt/NewPMDriver.h
@@ -67,13 +67,13 @@ void printPasses(raw_ostream &OS);
bool runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
TargetLibraryInfoImpl *TLII, ToolOutputFile *Out,
ToolOutputFile *ThinLinkOut, ToolOutputFile *OptRemarkFile,
- StringRef PassPipeline,
- ArrayRef<PassPlugin> PassPlugins, opt_tool::OutputKind OK,
- opt_tool::VerifierKind VK,
+ StringRef PassPipeline, ArrayRef<PassPlugin> PassPlugins,
+ opt_tool::OutputKind OK, opt_tool::VerifierKind VK,
bool ShouldPreserveAssemblyUseListOrder,
bool ShouldPreserveBitcodeUseListOrder,
bool EmitSummaryIndex, bool EmitModuleHash,
- bool EnableDebugify, bool VerifyDIPreserve);
+ bool EnableDebugify, bool VerifyDIPreserve,
+ bool UnifiedLTO = false);
} // namespace llvm
#endif
diff --git a/llvm/tools/opt/opt.cpp b/llvm/tools/opt/opt.cpp
index 40632b43e73b..9c20e7784223 100644
--- a/llvm/tools/opt/opt.cpp
+++ b/llvm/tools/opt/opt.cpp
@@ -11,9 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "BreakpointPrinter.h"
#include "NewPMDriver.h"
-#include "llvm/ADT/Triple.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/LoopPass.h"
@@ -37,13 +35,11 @@
#include "llvm/InitializePasses.h"
#include "llvm/LinkAllIR.h"
#include "llvm/LinkAllPasses.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Remarks/HotnessThresholdParser.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Host.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/PluginLoader.h"
#include "llvm/Support/SourceMgr.h"
@@ -52,6 +48,9 @@
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/YAMLTraits.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/TargetParser/Host.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
+#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Debugify.h"
@@ -68,12 +67,12 @@ static codegen::RegisterCodeGenFlags CFG;
static cl::list<const PassInfo *, bool, PassNameParser> PassList(cl::desc(
"Optimizations available (use '-passes=' for the new pass manager)"));
-static cl::opt<bool> EnableNewPassManager(
- "enable-new-pm",
- cl::desc("Enable the new pass manager, translating "
- "'opt -foo' to 'opt -passes=foo'. This is strictly for the new PM "
- "migration, use '-passes=' when possible."),
- cl::init(true));
+static cl::opt<bool> EnableLegacyPassManager(
+ "bugpoint-enable-legacy-pm",
+ cl::desc(
+ "Enable the legacy pass manager. This is strictly for bugpoint "
+ "due to it not working with the new PM, please do not use otherwise."),
+ cl::init(false));
// This flag specifies a textual description of the optimization pass pipeline
// to run over the module. This flag switches opt to use the new pass manager
@@ -117,6 +116,12 @@ static cl::opt<bool>
SplitLTOUnit("thinlto-split-lto-unit",
cl::desc("Enable splitting of a ThinLTO LTOUnit"));
+static cl::opt<bool>
+ UnifiedLTO("unified-lto",
+ cl::desc("Use unified LTO piplines. Ignored unless -thinlto-bc "
+ "is also specified."),
+ cl::Hidden, cl::init(false));
+
static cl::opt<std::string> ThinLinkBitcodeFile(
"thin-link-bitcode-file", cl::value_desc("filename"),
cl::desc(
@@ -203,10 +208,6 @@ static cl::opt<bool> VerifyDebugInfoPreserve(
cl::desc("Start the pipeline with collecting and end it with checking of "
"debug info preservation."));
-static cl::opt<bool>
-PrintBreakpoints("print-breakpoints-for-testing",
- cl::desc("Print select breakpoints location for testing"));
-
static cl::opt<std::string> ClDataLayout("data-layout",
cl::desc("data layout string to use"),
cl::value_desc("layout-string"),
@@ -279,15 +280,6 @@ static cl::list<std::string>
PassPlugins("load-pass-plugin",
cl::desc("Load passes from plugin library"));
-static inline void addPass(legacy::PassManagerBase &PM, Pass *P) {
- // Add the pass to the pass manager...
- PM.add(P);
-
- // If we are verifying all of the intermediate steps, add the verifier...
- if (VerifyEach)
- PM.add(createVerifierPass());
-}
-
//===----------------------------------------------------------------------===//
// CodeGen-related helper functions.
//
@@ -368,7 +360,6 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
"verify-safepoint-ir",
"atomic-expand",
"expandvp",
- "hardware-loops",
"mve-tail-predication",
"interleaved-access",
"global-merge",
@@ -393,7 +384,8 @@ static bool shouldPinPassToLegacyPM(StringRef Pass) {
"expand-large-div-rem",
"structurizecfg",
"fix-irreducible",
- "expand-large-fp-convert"
+ "expand-large-fp-convert",
+ "callbrprepare",
};
for (const auto &P : PassNamePrefix)
if (Pass.startswith(P))
@@ -445,9 +437,9 @@ int main(int argc, char **argv) {
initializeExpandMemCmpPassPass(Registry);
initializeScalarizeMaskedMemIntrinLegacyPassPass(Registry);
initializeSelectOptimizePass(Registry);
+ initializeCallBrPreparePass(Registry);
initializeCodeGenPreparePass(Registry);
initializeAtomicExpandPass(Registry);
- initializeRewriteSymbolsLegacyPassPass(Registry);
initializeWinEHPreparePass(Registry);
initializeDwarfEHPrepareLegacyPassPass(Registry);
initializeSafeStackLegacyPassPass(Registry);
@@ -462,7 +454,6 @@ int main(int argc, char **argv) {
initializeExpandVectorPredicationPass(Registry);
initializeWasmEHPreparePass(Registry);
initializeWriteBitcodePassPass(Registry);
- initializeHardwareLoopsPass(Registry);
initializeReplaceWithVeclibLegacyPass(Registry);
initializeJMCInstrumenterPass(Registry);
@@ -485,11 +476,8 @@ int main(int argc, char **argv) {
LLVMContext Context;
- // If `-passes=` is specified, use NPM.
- // If `-enable-new-pm` is specified and there are no codegen passes, use NPM.
- // e.g. `-enable-new-pm -sroa` will use NPM.
- // but `-enable-new-pm -codegenprepare` will still revert to legacy PM.
- const bool UseNPM = (EnableNewPassManager && !shouldForceLegacyPM()) ||
+ // TODO: remove shouldForceLegacyPM().
+ const bool UseNPM = (!EnableLegacyPassManager && !shouldForceLegacyPM()) ||
PassPipeline.getNumOccurrences() > 0;
if (UseNPM && !PassList.empty()) {
@@ -647,8 +635,11 @@ int main(int argc, char **argv) {
if (CheckBitcodeOutputToConsole(Out->os()))
NoOutput = true;
- if (OutputThinLTOBC)
+ if (OutputThinLTOBC) {
M->addModuleFlag(Module::Error, "EnableSplitLTOUnit", SplitLTOUnit);
+ if (UnifiedLTO)
+ M->addModuleFlag(Module::Error, "UnifiedLTO", 1);
+ }
// Add an appropriate TargetLibraryInfo pass for the module's triple.
TargetLibraryInfoImpl TLII(ModuleTriple);
@@ -671,9 +662,8 @@ int main(int argc, char **argv) {
if (UseNPM) {
if (legacy::debugPassSpecified()) {
- errs()
- << "-debug-pass does not work with the new PM, either use "
- "-debug-pass-manager, or use the legacy PM (-enable-new-pm=0)\n";
+ errs() << "-debug-pass does not work with the new PM, either use "
+ "-debug-pass-manager, or use the legacy PM\n";
return 1;
}
auto NumOLevel = OptLevelO0 + OptLevelO1 + OptLevelO2 + OptLevelO3 +
@@ -721,7 +711,7 @@ int main(int argc, char **argv) {
PluginList, OK, VK, PreserveAssemblyUseListOrder,
PreserveBitcodeUseListOrder, EmitSummaryIndex,
EmitModuleHash, EnableDebugify,
- VerifyDebugInfoPreserve)
+ VerifyDebugInfoPreserve, UnifiedLTO)
? 0
: 1;
}
@@ -781,26 +771,6 @@ int main(int argc, char **argv) {
}
}
- std::unique_ptr<legacy::FunctionPassManager> FPasses;
-
- if (PrintBreakpoints) {
- // Default to standard output.
- if (!Out) {
- if (OutputFilename.empty())
- OutputFilename = "-";
-
- std::error_code EC;
- Out = std::make_unique<ToolOutputFile>(OutputFilename, EC,
- sys::fs::OF_None);
- if (EC) {
- errs() << EC.message() << '\n';
- return 1;
- }
- }
- Passes.add(createBreakpointPrinter(Out->os()));
- NoOutput = true;
- }
-
if (TM) {
// FIXME: We should dyn_cast this when supported.
auto &LTM = static_cast<LLVMTargetMachine &>(*TM);
@@ -811,21 +781,18 @@ int main(int argc, char **argv) {
// Create a new optimization pass for each one specified on the command line
for (unsigned i = 0; i < PassList.size(); ++i) {
const PassInfo *PassInf = PassList[i];
- Pass *P = nullptr;
- if (PassInf->getNormalCtor())
- P = PassInf->getNormalCtor()();
- else
+ if (PassInf->getNormalCtor()) {
+ Pass *P = PassInf->getNormalCtor()();
+ if (P) {
+ // Add the pass to the pass manager.
+ Passes.add(P);
+ // If we are verifying all of the intermediate steps, add the verifier.
+ if (VerifyEach)
+ Passes.add(createVerifierPass());
+ }
+ } else
errs() << argv[0] << ": cannot create pass: "
<< PassInf->getPassName() << "\n";
- if (P)
- addPass(Passes, P);
- }
-
- if (FPasses) {
- FPasses->doInitialization();
- for (Function &F : *M)
- FPasses->run(F);
- FPasses->doFinalization();
}
// Check that the module is well formed on completion of optimization
@@ -912,7 +879,7 @@ int main(int argc, char **argv) {
exportDebugifyStats(DebugifyExport, Passes.getDebugifyStatsMap());
// Declare success.
- if (!NoOutput || PrintBreakpoints)
+ if (!NoOutput)
Out->keep();
if (RemarksFile)
diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index c13e5b5deff6..1c195200a888 100644
--- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -95,7 +95,9 @@
//
//===----------------------------------------------------------------------===//
+#include "CodeGenInstAlias.h"
#include "CodeGenInstruction.h"
+#include "CodeGenRegisters.h"
#include "CodeGenTarget.h"
#include "SubtargetFeatureInfo.h"
#include "Types.h"
@@ -105,7 +107,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/Config/llvm-config.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -505,9 +506,9 @@ struct MatchableInfo {
PointerUnion<const CodeGenInstruction*, const CodeGenInstAlias*> DefRec;
const CodeGenInstruction *getResultInst() const {
- if (DefRec.is<const CodeGenInstruction*>())
- return DefRec.get<const CodeGenInstruction*>();
- return DefRec.get<const CodeGenInstAlias*>()->ResultInst;
+ if (isa<const CodeGenInstruction *>(DefRec))
+ return cast<const CodeGenInstruction *>(DefRec);
+ return cast<const CodeGenInstAlias *>(DefRec)->ResultInst;
}
/// ResOperands - This is the operand list that should be built for the result
@@ -533,7 +534,7 @@ struct MatchableInfo {
std::string ConversionFnKind;
/// If this instruction is deprecated in some form.
- bool HasDeprecation;
+ bool HasDeprecation = false;
/// If this is an alias, this is use to determine whether or not to using
/// the conversion function defined by the instruction's AsmMatchConverter
@@ -563,11 +564,11 @@ struct MatchableInfo {
ConversionFnKind(RHS.ConversionFnKind),
HasDeprecation(RHS.HasDeprecation),
UseInstAsmMatchConverter(RHS.UseInstAsmMatchConverter) {
- assert(!DefRec.is<const CodeGenInstAlias *>());
+ assert(!isa<const CodeGenInstAlias *>(DefRec));
}
~MatchableInfo() {
- delete DefRec.dyn_cast<const CodeGenInstAlias*>();
+ delete dyn_cast_if_present<const CodeGenInstAlias *>(DefRec);
}
// Two-operand aliases clone from the main matchable, but mark the second
@@ -629,6 +630,17 @@ struct MatchableInfo {
return false;
}
+ // For X86 AVX/AVX512 instructions, we prefer vex encoding because the
+ // vex encoding size is smaller. Since X86InstrSSE.td is included ahead
+ // of X86InstrAVX512.td, the AVX instruction ID is less than AVX512 ID.
+ // We use the ID to sort AVX instruction before AVX512 instruction in
+ // matching table.
+ if (TheDef->isSubClassOf("Instruction") &&
+ TheDef->getValueAsBit("HasPositionOrder") &&
+ RHS.TheDef->isSubClassOf("Instruction") &&
+ RHS.TheDef->getValueAsBit("HasPositionOrder"))
+ return TheDef->getID() < RHS.TheDef->getID();
+
// Give matches that require more features higher precedence. This is useful
// because we cannot define AssemblerPredicates with the negation of
// processor features. For example, ARM v6 "nop" may be either a HINT or
@@ -638,15 +650,6 @@ struct MatchableInfo {
if (RequiredFeatures.size() != RHS.RequiredFeatures.size())
return RequiredFeatures.size() > RHS.RequiredFeatures.size();
- // For X86 AVX/AVX512 instructions, we prefer vex encoding because the
- // vex encoding size is smaller. Since X86InstrSSE.td is included ahead
- // of X86InstrAVX512.td, the AVX instruction ID is less than AVX512 ID.
- // We use the ID to sort AVX instruction before AVX512 instruction in
- // matching table.
- if (TheDef->isSubClassOf("Instruction") &&
- TheDef->getValueAsBit("HasPositionOrder"))
- return TheDef->getID() < RHS.TheDef->getID();
-
return false;
}
@@ -1613,13 +1616,13 @@ void AsmMatcherInfo::buildInfo() {
else
OperandName = Token.substr(1);
- if (II->DefRec.is<const CodeGenInstruction*>())
+ if (isa<const CodeGenInstruction *>(II->DefRec))
buildInstructionOperandReference(II.get(), OperandName, i);
else
buildAliasOperandReference(II.get(), OperandName, Op);
}
- if (II->DefRec.is<const CodeGenInstruction*>()) {
+ if (isa<const CodeGenInstruction *>(II->DefRec)) {
II->buildInstructionResultOperands();
// If the instruction has a two-operand alias, build up the
// matchable here. We'll add them in bulk at the end to avoid
@@ -1682,7 +1685,7 @@ void AsmMatcherInfo::
buildInstructionOperandReference(MatchableInfo *II,
StringRef OperandName,
unsigned AsmOpIdx) {
- const CodeGenInstruction &CGI = *II->DefRec.get<const CodeGenInstruction*>();
+ const CodeGenInstruction &CGI = *cast<const CodeGenInstruction *>(II->DefRec);
const CGIOperandList &Operands = CGI.Operands;
MatchableInfo::AsmOperand *Op = &II->AsmOperands[AsmOpIdx];
@@ -1745,7 +1748,7 @@ buildInstructionOperandReference(MatchableInfo *II,
void AsmMatcherInfo::buildAliasOperandReference(MatchableInfo *II,
StringRef OperandName,
MatchableInfo::AsmOperand &Op) {
- const CodeGenInstAlias &CGA = *II->DefRec.get<const CodeGenInstAlias*>();
+ const CodeGenInstAlias &CGA = *cast<const CodeGenInstAlias *>(II->DefRec);
// Set up the operand class.
for (unsigned i = 0, e = CGA.ResultOperands.size(); i != e; ++i)
@@ -1818,7 +1821,7 @@ void MatchableInfo::buildInstructionResultOperands() {
}
void MatchableInfo::buildAliasResultOperands(bool AliasConstraintsAreChecked) {
- const CodeGenInstAlias &CGA = *DefRec.get<const CodeGenInstAlias*>();
+ const CodeGenInstAlias &CGA = *cast<const CodeGenInstAlias *>(DefRec);
const CodeGenInstruction *ResultInst = getResultInst();
// Map of: $reg -> #lastref
@@ -2924,7 +2927,7 @@ emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
// Emit the operand class switch to call the correct custom parser for
// the found operand class.
- OS << "OperandMatchResultTy " << Target.getName() << ClassName << "::\n"
+ OS << "ParseStatus " << Target.getName() << ClassName << "::\n"
<< "tryCustomParseOperand(OperandVector"
<< " &Operands,\n unsigned MCK) {\n\n"
<< " switch(MCK) {\n";
@@ -2937,15 +2940,15 @@ emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
}
OS << " default:\n";
- OS << " return MatchOperand_NoMatch;\n";
+ OS << " return ParseStatus::NoMatch;\n";
OS << " }\n";
- OS << " return MatchOperand_NoMatch;\n";
+ OS << " return ParseStatus::NoMatch;\n";
OS << "}\n\n";
// Emit the static custom operand parser. This code is very similar with
// the other matcher. Also use MatchResultTy here just in case we go for
// a better error handling.
- OS << "OperandMatchResultTy " << Target.getName() << ClassName << "::\n"
+ OS << "ParseStatus " << Target.getName() << ClassName << "::\n"
<< "MatchOperandParserImpl(OperandVector"
<< " &Operands,\n StringRef Mnemonic,\n"
<< " bool ParseForAllFeatures) {\n";
@@ -2976,7 +2979,7 @@ emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
}
OS << " if (MnemonicRange.first == MnemonicRange.second)\n";
- OS << " return MatchOperand_NoMatch;\n\n";
+ OS << " return ParseStatus::NoMatch;\n\n";
OS << " for (const OperandMatchEntry *it = MnemonicRange.first,\n"
<< " *ie = MnemonicRange.second; it != ie; ++it) {\n";
@@ -3002,14 +3005,13 @@ emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
if (ParserName.empty())
ParserName = "tryCustomParseOperand";
OS << " // call custom parse method to handle the operand\n";
- OS << " OperandMatchResultTy Result = " << ParserName
- << "(Operands, it->Class);\n";
- OS << " if (Result != MatchOperand_NoMatch)\n";
+ OS << " ParseStatus Result = " << ParserName << "(Operands, it->Class);\n";
+ OS << " if (!Result.isNoMatch())\n";
OS << " return Result;\n";
OS << " }\n\n";
OS << " // Okay, we had no match.\n";
- OS << " return MatchOperand_NoMatch;\n";
+ OS << " return ParseStatus::NoMatch;\n";
OS << "}\n\n";
}
@@ -3202,6 +3204,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
Record *AsmParser = Target.getAsmParser();
StringRef ClassName = AsmParser->getValueAsString("AsmParserClassName");
+ emitSourceFileHeader("Assembly Matcher Source Fragment", OS);
+
// Compute the information on the instructions to match.
AsmMatcherInfo Info(AsmParser, Target, Records);
Info.buildInfo();
@@ -3303,12 +3307,12 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
if (!Info.OperandMatchInfo.empty()) {
- OS << " OperandMatchResultTy MatchOperandParserImpl(\n";
+ OS << " ParseStatus MatchOperandParserImpl(\n";
OS << " OperandVector &Operands,\n";
OS << " StringRef Mnemonic,\n";
OS << " bool ParseForAllFeatures = false);\n";
- OS << " OperandMatchResultTy tryCustomParseOperand(\n";
+ OS << " ParseStatus tryCustomParseOperand(\n";
OS << " OperandVector &Operands,\n";
OS << " unsigned MCK);\n\n";
}
@@ -3999,11 +4003,5 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
OS << "#endif // GET_MNEMONIC_CHECKER\n\n";
}
-namespace llvm {
-
-void EmitAsmMatcher(RecordKeeper &RK, raw_ostream &OS) {
- emitSourceFileHeader("Assembly Matcher Source Fragment", OS);
- AsmMatcherEmitter(RK).run(OS);
-}
-
-} // end namespace llvm
+static TableGen::Emitter::OptClass<AsmMatcherEmitter>
+ X("gen-asm-matcher", "Generate assembly instruction matcher");
diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp
index f2e4d15a2c75..92e71910a800 100644
--- a/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "AsmWriterInst.h"
+#include "CodeGenInstAlias.h"
#include "CodeGenInstruction.h"
#include "CodeGenRegisters.h"
#include "CodeGenTarget.h"
@@ -994,7 +995,10 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
for (Record *const R : ReqFeatures) {
const DagInit *D = R->getValueAsDag("AssemblerCondDag");
- std::string CombineType = D->getOperator()->getAsString();
+ auto *Op = dyn_cast<DefInit>(D->getOperator());
+ if (!Op)
+ PrintFatalError(R->getLoc(), "Invalid AssemblerCondDag!");
+ StringRef CombineType = Op->getDef()->getName();
if (CombineType != "any_of" && CombineType != "all_of")
PrintFatalError(R->getLoc(), "Invalid AssemblerCondDag!");
if (D->getNumArgs() == 0)
@@ -1002,7 +1006,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
bool IsOr = CombineType == "any_of";
// Change (any_of FeatureAll, (any_of ...)) to (any_of FeatureAll, ...).
if (IsOr && D->getNumArgs() == 2 && isa<DagInit>(D->getArg(1))) {
- DagInit *RHS = dyn_cast<DagInit>(D->getArg(1));
+ DagInit *RHS = cast<DagInit>(D->getArg(1));
SmallVector<Init *> Args{D->getArg(0)};
SmallVector<StringInit *> ArgNames{D->getArgName(0)};
for (unsigned i = 0, e = RHS->getNumArgs(); i != e; ++i) {
@@ -1298,17 +1302,12 @@ void AsmWriterEmitter::run(raw_ostream &O) {
std::vector<std::vector<std::string>> TableDrivenOperandPrinters;
unsigned BitsLeft = 0;
unsigned AsmStrBits = 0;
+ emitSourceFileHeader("Assembly Writer Source Fragment", O);
EmitGetMnemonic(O, TableDrivenOperandPrinters, BitsLeft, AsmStrBits);
EmitPrintInstruction(O, TableDrivenOperandPrinters, BitsLeft, AsmStrBits);
EmitGetRegisterName(O);
EmitPrintAliasInstruction(O);
}
-namespace llvm {
-
-void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS) {
- emitSourceFileHeader("Assembly Writer Source Fragment", OS);
- AsmWriterEmitter(RK).run(OS);
-}
-
-} // end namespace llvm
+static TableGen::Emitter::OptClass<AsmWriterEmitter>
+ X("gen-asm-writer", "Generate assembly writer");
diff --git a/llvm/utils/TableGen/AsmWriterInst.cpp b/llvm/utils/TableGen/AsmWriterInst.cpp
index 4a78108d6f4a..c9558593e142 100644
--- a/llvm/utils/TableGen/AsmWriterInst.cpp
+++ b/llvm/utils/TableGen/AsmWriterInst.cpp
@@ -12,7 +12,6 @@
#include "AsmWriterInst.h"
#include "CodeGenInstruction.h"
-#include "CodeGenTarget.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
diff --git a/llvm/utils/TableGen/AsmWriterInst.h b/llvm/utils/TableGen/AsmWriterInst.h
index fe2b934e266f..9c93e82b611b 100644
--- a/llvm/utils/TableGen/AsmWriterInst.h
+++ b/llvm/utils/TableGen/AsmWriterInst.h
@@ -21,7 +21,6 @@
namespace llvm {
class CodeGenInstruction;
- class Record;
struct AsmWriterOperand {
enum OpType {
diff --git a/llvm/utils/TableGen/Attributes.cpp b/llvm/utils/TableGen/Attributes.cpp
index 735c53dd6fcf..474042a3e9a3 100644
--- a/llvm/utils/TableGen/Attributes.cpp
+++ b/llvm/utils/TableGen/Attributes.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
#include <vector>
using namespace llvm;
@@ -17,7 +18,7 @@ namespace {
class Attributes {
public:
Attributes(RecordKeeper &R) : Records(R) {}
- void emit(raw_ostream &OS);
+ void run(raw_ostream &OS);
private:
void emitTargetIndependentNames(raw_ostream &OS);
@@ -54,6 +55,7 @@ void Attributes::emitTargetIndependentNames(raw_ostream &OS) {
// Emit attribute enums in the same order llvm::Attribute::operator< expects.
Emit({"EnumAttr", "TypeAttr", "IntAttr"}, "ATTRIBUTE_ENUM");
Emit({"StrBoolAttr"}, "ATTRIBUTE_STRBOOL");
+ Emit({"ComplexStrAttr"}, "ATTRIBUTE_COMPLEXSTR");
OS << "#undef ATTRIBUTE_ALL\n";
OS << "#endif\n\n";
@@ -123,16 +125,11 @@ void Attributes::emitAttributeProperties(raw_ostream &OS) {
OS << "#endif\n";
}
-void Attributes::emit(raw_ostream &OS) {
+void Attributes::run(raw_ostream &OS) {
emitTargetIndependentNames(OS);
emitFnAttrCompatCheck(OS, false);
emitAttributeProperties(OS);
}
-namespace llvm {
-
-void EmitAttributes(RecordKeeper &RK, raw_ostream &OS) {
- Attributes(RK).emit(OS);
-}
-
-} // End llvm namespace.
+static TableGen::Emitter::OptClass<Attributes> X("gen-attrs",
+ "Generate attributes");
diff --git a/llvm/utils/TableGen/CTagsEmitter.cpp b/llvm/utils/TableGen/CTagsEmitter.cpp
index fe62d6a9b67f..b8e27d057d95 100644
--- a/llvm/utils/TableGen/CTagsEmitter.cpp
+++ b/llvm/utils/TableGen/CTagsEmitter.cpp
@@ -12,12 +12,12 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
#include <algorithm>
-#include <string>
#include <vector>
using namespace llvm;
@@ -86,8 +86,5 @@ void CTagsEmitter::run(raw_ostream &OS) {
T.emit(OS);
}
-namespace llvm {
-
-void EmitCTags(RecordKeeper &RK, raw_ostream &OS) { CTagsEmitter(RK).run(OS); }
-
-} // End llvm namespace.
+static TableGen::Emitter::OptClass<CTagsEmitter>
+ X("gen-ctags", "Generate ctags-compatible index");
diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp
index e8ec90e9c078..de3810b2e227 100644
--- a/llvm/utils/TableGen/CallingConvEmitter.cpp
+++ b/llvm/utils/TableGen/CallingConvEmitter.cpp
@@ -15,14 +15,16 @@
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
+#include <deque>
+
using namespace llvm;
namespace {
class CallingConvEmitter {
RecordKeeper &Records;
- unsigned Counter;
+ unsigned Counter = 0u;
std::string CurrentAction;
- bool SwiftAction;
+ bool SwiftAction = false;
std::map<std::string, std::set<std::string>> AssignedRegsMap;
std::map<std::string, std::set<std::string>> AssignedSwiftRegsMap;
@@ -41,7 +43,9 @@ private:
} // End anonymous namespace
void CallingConvEmitter::run(raw_ostream &O) {
- std::vector<Record*> CCs = Records.getAllDerivedDefinitions("CallingConv");
+ emitSourceFileHeader("Calling Convention Implementation Fragment", O);
+
+ std::vector<Record *> CCs = Records.getAllDerivedDefinitions("CallingConv");
// Emit prototypes for all of the non-custom CC's so that they can forward ref
// each other.
@@ -247,7 +251,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
int Size = Action->getValueAsInt("Size");
int Align = Action->getValueAsInt("Align");
- O << IndentStr << "unsigned Offset" << ++Counter
+ O << IndentStr << "int64_t Offset" << ++Counter
<< " = State.AllocateStack(";
if (Size)
O << Size << ", ";
@@ -283,7 +287,7 @@ void CallingConvEmitter::EmitAction(Record *Action,
O << LS << getQualifiedName(ShadowRegList->getElementAsRecord(i));
O << "\n" << IndentStr << "};\n";
- O << IndentStr << "unsigned Offset" << ++Counter
+ O << IndentStr << "int64_t Offset" << ++Counter
<< " = State.AllocateStack(" << Size << ", Align(" << Align << "), "
<< "ShadowRegList" << ShadowRegListNumber << ");\n";
O << IndentStr << "State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset"
@@ -426,11 +430,5 @@ void CallingConvEmitter::EmitArgRegisterLists(raw_ostream &O) {
}
}
-namespace llvm {
-
-void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS) {
- emitSourceFileHeader("Calling Convention Implementation Fragment", OS);
- CallingConvEmitter(RK).run(OS);
-}
-
-} // End llvm namespace
+static TableGen::Emitter::OptClass<CallingConvEmitter>
+ X("gen-callingconv", "Generate calling convention descriptions");
diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp
index dc4fd589eaa8..48ed319bf06f 100644
--- a/llvm/utils/TableGen/CodeEmitterGen.cpp
+++ b/llvm/utils/TableGen/CodeEmitterGen.cpp
@@ -7,15 +7,25 @@
//===----------------------------------------------------------------------===//
//
// CodeEmitterGen uses the descriptions of instructions and their fields to
-// construct an automated code emitter: a function that, given a MachineInstr,
-// returns the (currently, 32-bit unsigned) value of the instruction.
+// construct an automated code emitter: a function called
+// getBinaryCodeForInstr() that, given a MCInst, returns the value of the
+// instruction - either as an uint64_t or as an APInt, depending on the
+// maximum bit width of all Inst definitions.
+//
+// In addition, it generates another function called getOperandBitOffset()
+// that, given a MCInst and an operand index, returns the minimum of indices of
+// all bits that carry some portion of the respective operand. When the target's
+// encodeInstruction() stores the instruction in a little-endian byte order, the
+// returned value is the offset of the start of the operand in the encoded
+// instruction. Other targets might need to adjust the returned value according
+// to their encodeInstruction() implementation.
//
//===----------------------------------------------------------------------===//
+#include "CodeGenHwModes.h"
#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
-#include "SubtargetFeatureInfo.h"
-#include "Types.h"
+#include "InfoByHwMode.h"
#include "VarLenCodeEmitterGen.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
@@ -46,19 +56,24 @@ public:
private:
int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
- std::string getInstructionCase(Record *R, CodeGenTarget &Target);
- std::string getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
- CodeGenTarget &Target);
+ std::pair<std::string, std::string>
+ getInstructionCases(Record *R, CodeGenTarget &Target);
+ void addInstructionCasesForEncoding(Record *R, Record *EncodingDef,
+ CodeGenTarget &Target, std::string &Case,
+ std::string &BitOffsetCase);
bool addCodeToMergeInOperand(Record *R, BitsInit *BI,
- const std::string &VarName, unsigned &NumberedOp,
- std::set<unsigned> &NamedOpIndices,
- std::string &Case, CodeGenTarget &Target);
+ const std::string &VarName, std::string &Case,
+ std::string &BitOffsetCase,
+ CodeGenTarget &Target);
void emitInstructionBaseValues(
raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,
CodeGenTarget &Target, int HwMode = -1);
- unsigned BitWidth;
- bool UseAPInt;
+ void
+ emitCaseMap(raw_ostream &o,
+ const std::map<std::string, std::vector<std::string>> &CaseMap);
+ unsigned BitWidth = 0u;
+ bool UseAPInt = false;
};
// If the VarBitInit at position 'bit' matches the specified variable then
@@ -80,9 +95,8 @@ int CodeEmitterGen::getVariableBit(const std::string &VarName,
// Returns true if it succeeds, false if an error.
bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
const std::string &VarName,
- unsigned &NumberedOp,
- std::set<unsigned> &NamedOpIndices,
std::string &Case,
+ std::string &BitOffsetCase,
CodeGenTarget &Target) {
CodeGenInstruction &CGI = Target.getInstruction(R);
@@ -113,52 +127,8 @@ bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
// Get the machine operand number for the indicated operand.
OpIdx = CGI.Operands[OpIdx].MIOperandNo;
} else {
- // Fall back to positional lookup. By default, we now disable positional
- // lookup (and print an error, below), but even so, we'll do the lookup to
- // help print a helpful diagnostic message.
- //
- // TODO: When we remove useDeprecatedPositionallyEncodedOperands, delete all
- // this code, just leaving a "no operand named X in record Y" error.
-
- unsigned NumberOps = CGI.Operands.size();
- /// If this operand is not supposed to be emitted by the
- /// generated emitter, skip it.
- while (NumberedOp < NumberOps &&
- (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) ||
- (!NamedOpIndices.empty() && NamedOpIndices.count(
- CGI.Operands.getSubOperandNumber(NumberedOp).first)))) {
- ++NumberedOp;
- }
-
- if (NumberedOp >=
- CGI.Operands.back().MIOperandNo + CGI.Operands.back().MINumOperands) {
- if (!Target.getInstructionSet()->getValueAsBit(
- "useDeprecatedPositionallyEncodedOperands")) {
- PrintError(R, Twine("No operand named ") + VarName + " in record " +
- R->getName() +
- " (would've given 'too few operands' error with "
- "useDeprecatedPositionallyEncodedOperands=true)");
- } else {
- PrintError(R, "Too few operands in record " + R->getName() +
- " (no match for variable " + VarName + ")");
- }
- return false;
- }
-
- OpIdx = NumberedOp++;
-
- if (!Target.getInstructionSet()->getValueAsBit(
- "useDeprecatedPositionallyEncodedOperands")) {
- std::pair<unsigned, unsigned> SO =
- CGI.Operands.getSubOperandNumber(OpIdx);
- std::string OpName = CGI.Operands[SO.first].Name;
- PrintError(R, Twine("No operand named ") + VarName + " in record " +
- R->getName() + " (would've used positional operand #" +
- Twine(SO.first) + " ('" + OpName + "') sub-op #" +
- Twine(SO.second) +
- " with useDeprecatedPositionallyEncodedOperands=true)");
- return false;
- }
+ PrintError(R, Twine("No operand named ") + VarName + " in record " + R->getName());
+ return false;
}
if (CGI.Operands.isFlatOperandNotEmitted(OpIdx)) {
@@ -222,6 +192,7 @@ bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
++numOperandLits;
}
+ unsigned BitOffset = -1;
for (; bit >= 0; ) {
int varBit = getVariableBit(VarName, BI, bit);
@@ -230,7 +201,7 @@ bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
--bit;
continue;
}
-
+
// Figure out the consecutive range of bits covered by this operand, in
// order to generate better encoding code.
int beginInstBit = bit;
@@ -249,6 +220,7 @@ bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
unsigned loBit = beginVarBit - N + 1;
unsigned hiBit = loBit + N;
unsigned loInstBit = beginInstBit - N + 1;
+ BitOffset = loInstBit;
if (UseAPInt) {
std::string extractStr;
if (N >= 64) {
@@ -290,65 +262,71 @@ bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,
}
}
}
+
+ if (BitOffset != (unsigned)-1) {
+ BitOffsetCase += " case " + utostr(OpIdx) + ":\n";
+ BitOffsetCase += " // op: " + VarName + "\n";
+ BitOffsetCase += " return " + utostr(BitOffset) + ";\n";
+ }
+
return true;
}
-std::string CodeEmitterGen::getInstructionCase(Record *R,
- CodeGenTarget &Target) {
- std::string Case;
+std::pair<std::string, std::string>
+CodeEmitterGen::getInstructionCases(Record *R, CodeGenTarget &Target) {
+ std::string Case, BitOffsetCase;
+
+ auto append = [&](const char *S) {
+ Case += S;
+ BitOffsetCase += S;
+ };
+
if (const RecordVal *RV = R->getValue("EncodingInfos")) {
if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {
const CodeGenHwModes &HWM = Target.getHwModes();
EncodingInfoByHwMode EBM(DI->getDef(), HWM);
- Case += " switch (HwMode) {\n";
- Case += " default: llvm_unreachable(\"Unhandled HwMode\");\n";
+ append(" switch (HwMode) {\n");
+ append(" default: llvm_unreachable(\"Unhandled HwMode\");\n");
for (auto &KV : EBM) {
- Case += " case " + itostr(KV.first) + ": {\n";
- Case += getInstructionCaseForEncoding(R, KV.second, Target);
- Case += " break;\n";
- Case += " }\n";
+ append((" case " + itostr(KV.first) + ": {\n").c_str());
+ addInstructionCasesForEncoding(R, KV.second, Target, Case,
+ BitOffsetCase);
+ append(" break;\n");
+ append(" }\n");
}
- Case += " }\n";
- return Case;
+ append(" }\n");
+ return std::make_pair(std::move(Case), std::move(BitOffsetCase));
}
}
- return getInstructionCaseForEncoding(R, R, Target);
+ addInstructionCasesForEncoding(R, R, Target, Case, BitOffsetCase);
+ return std::make_pair(std::move(Case), std::move(BitOffsetCase));
}
-std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *EncodingDef,
- CodeGenTarget &Target) {
- std::string Case;
+void CodeEmitterGen::addInstructionCasesForEncoding(
+ Record *R, Record *EncodingDef, CodeGenTarget &Target, std::string &Case,
+ std::string &BitOffsetCase) {
BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");
- unsigned NumberedOp = 0;
- std::set<unsigned> NamedOpIndices;
-
- // Collect the set of operand indices that might correspond to named
- // operand, and skip these when assigning operands based on position.
- if (Target.getInstructionSet()->
- getValueAsBit("noNamedPositionallyEncodedOperands")) {
- CodeGenInstruction &CGI = Target.getInstruction(R);
- for (const RecordVal &RV : R->getValues()) {
- unsigned OpIdx;
- if (!CGI.Operands.hasOperandNamed(RV.getName(), OpIdx))
- continue;
-
- NamedOpIndices.insert(OpIdx);
- }
- }
// Loop over all of the fields in the instruction, determining which are the
// operands to the instruction.
bool Success = true;
+ size_t OrigBitOffsetCaseSize = BitOffsetCase.size();
+ BitOffsetCase += " switch (OpNum) {\n";
+ size_t BitOffsetCaseSizeBeforeLoop = BitOffsetCase.size();
for (const RecordVal &RV : EncodingDef->getValues()) {
// Ignore fixed fields in the record, we're looking for values like:
// bits<5> RST = { ?, ?, ?, ?, ? };
if (RV.isNonconcreteOK() || RV.getValue()->isComplete())
continue;
- Success &=
- addCodeToMergeInOperand(R, BI, std::string(RV.getName()), NumberedOp,
- NamedOpIndices, Case, Target);
+ Success &= addCodeToMergeInOperand(R, BI, std::string(RV.getName()), Case,
+ BitOffsetCase, Target);
}
+ // Avoid empty switches.
+ if (BitOffsetCase.size() == BitOffsetCaseSizeBeforeLoop)
+ BitOffsetCase.resize(OrigBitOffsetCaseSize);
+ else
+ BitOffsetCase += " }\n";
if (!Success) {
// Dump the record, so we can see what's going on...
@@ -367,8 +345,6 @@ std::string CodeEmitterGen::getInstructionCaseForEncoding(Record *R, Record *Enc
Case += ", STI";
Case += ");\n";
}
-
- return Case;
}
static void emitInstBits(raw_ostream &OS, const APInt &Bits) {
@@ -419,7 +395,29 @@ void CodeEmitterGen::emitInstructionBaseValues(
o << " UINT64_C(0)\n };\n";
}
+void CodeEmitterGen::emitCaseMap(
+ raw_ostream &o,
+ const std::map<std::string, std::vector<std::string>> &CaseMap) {
+ std::map<std::string, std::vector<std::string>>::const_iterator IE, EE;
+ for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
+ const std::string &Case = IE->first;
+ const std::vector<std::string> &InstList = IE->second;
+
+ for (int i = 0, N = InstList.size(); i < N; i++) {
+ if (i)
+ o << "\n";
+ o << " case " << InstList[i] << ":";
+ }
+ o << " {\n";
+ o << Case;
+ o << " break;\n"
+ << " }\n";
+ }
+}
+
void CodeEmitterGen::run(raw_ostream &o) {
+ emitSourceFileHeader("Machine Code Emitter", o);
+
CodeGenTarget Target(Records);
std::vector<Record*> Insts = Records.getAllDerivedDefinitions("Instruction");
@@ -498,6 +496,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
// Map to accumulate all the cases.
std::map<std::string, std::vector<std::string>> CaseMap;
+ std::map<std::string, std::vector<std::string>> BitOffsetCaseMap;
// Construct all cases statement for each opcode
for (Record *R : Insts) {
@@ -506,9 +505,11 @@ void CodeEmitterGen::run(raw_ostream &o) {
continue;
std::string InstName =
(R->getValueAsString("Namespace") + "::" + R->getName()).str();
- std::string Case = getInstructionCase(R, Target);
+ std::string Case, BitOffsetCase;
+ std::tie(Case, BitOffsetCase) = getInstructionCases(R, Target);
- CaseMap[Case].push_back(std::move(InstName));
+ CaseMap[Case].push_back(InstName);
+ BitOffsetCaseMap[BitOffsetCase].push_back(std::move(InstName));
}
// Emit initial function code
@@ -531,21 +532,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
}
// Emit each case statement
- std::map<std::string, std::vector<std::string>>::iterator IE, EE;
- for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
- const std::string &Case = IE->first;
- std::vector<std::string> &InstList = IE->second;
-
- for (int i = 0, N = InstList.size(); i < N; i++) {
- if (i)
- o << "\n";
- o << " case " << InstList[i] << ":";
- }
- o << " {\n";
- o << Case;
- o << " break;\n"
- << " }\n";
- }
+ emitCaseMap(o, CaseMap);
// Default case: unhandled opcode
o << " default:\n"
@@ -559,16 +546,27 @@ void CodeEmitterGen::run(raw_ostream &o) {
else
o << " return Value;\n";
o << "}\n\n";
+
+ o << "#ifdef GET_OPERAND_BIT_OFFSET\n"
+ << "#undef GET_OPERAND_BIT_OFFSET\n\n"
+ << "uint32_t " << Target.getName()
+ << "MCCodeEmitter::getOperandBitOffset(const MCInst &MI,\n"
+ << " unsigned OpNum,\n"
+ << " const MCSubtargetInfo &STI) const {\n"
+ << " switch (MI.getOpcode()) {\n";
+ emitCaseMap(o, BitOffsetCaseMap);
+ o << " }\n"
+ << " std::string msg;\n"
+ << " raw_string_ostream Msg(msg);\n"
+ << " Msg << \"Not supported instr[opcode]: \" << MI << \"[\" << OpNum "
+ "<< \"]\";\n"
+ << " report_fatal_error(Msg.str().c_str());\n"
+ << "}\n\n"
+ << "#endif // GET_OPERAND_BIT_OFFSET\n\n";
}
}
} // end anonymous namespace
-namespace llvm {
-
-void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) {
- emitSourceFileHeader("Machine Code Emitter", OS);
- CodeEmitterGen(RK).run(OS);
-}
-
-} // end namespace llvm
+static TableGen::Emitter::OptClass<CodeEmitterGen>
+ X("gen-emitter", "Generate machine code emitter");
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index dd04778e2dbe..e481f7e38e6a 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -13,6 +13,7 @@
#include "CodeGenDAGPatterns.h"
#include "CodeGenInstruction.h"
+#include "CodeGenRegisters.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
@@ -82,10 +83,12 @@ void MachineValueTypeSet::writeToStream(raw_ostream &OS) const {
// inference will apply to each mode separately.
TypeSetByHwMode::TypeSetByHwMode(ArrayRef<ValueTypeByHwMode> VTList) {
- for (const ValueTypeByHwMode &VVT : VTList) {
+ // Take the address space from the first type in the list.
+ if (!VTList.empty())
+ AddrSpace = VTList[0].PtrAddrSpace;
+
+ for (const ValueTypeByHwMode &VVT : VTList)
insert(VVT);
- AddrSpaces.push_back(VVT.PtrAddrSpace);
- }
}
bool TypeSetByHwMode::isValueTypeByHwMode(bool AllowEmpty) const {
@@ -102,13 +105,11 @@ ValueTypeByHwMode TypeSetByHwMode::getValueTypeByHwMode() const {
assert(isValueTypeByHwMode(true) &&
"The type set has multiple types for at least one HW mode");
ValueTypeByHwMode VVT;
- auto ASI = AddrSpaces.begin();
+ VVT.PtrAddrSpace = AddrSpace;
for (const auto &I : *this) {
MVT T = I.second.empty() ? MVT::Other : *I.second.begin();
VVT.getOrCreateTypeForMode(I.first, T);
- if (ASI != AddrSpaces.end())
- VVT.PtrAddrSpace = *ASI++;
}
return VVT;
}
@@ -217,7 +218,7 @@ bool TypeSetByHwMode::operator==(const TypeSetByHwMode &VTS) const {
bool IsSimple = isSimple();
bool VTSIsSimple = VTS.isSimple();
if (IsSimple && VTSIsSimple)
- return *begin() == *VTS.begin();
+ return getSimple() == VTS.getSimple();
// Speedup: We have a default if the set is simple.
bool HaveDefault = IsSimple || hasDefault();
@@ -354,21 +355,18 @@ bool TypeSetByHwMode::intersect(SetType &Out, const SetType &In) {
}
bool TypeSetByHwMode::validate() const {
-#ifndef NDEBUG
if (empty())
return true;
bool AllEmpty = true;
for (const auto &I : *this)
AllEmpty &= I.second.empty();
return !AllEmpty;
-#endif
- return true;
}
// --- TypeInfer
bool TypeInfer::MergeInTypeInfo(TypeSetByHwMode &Out,
- const TypeSetByHwMode &In) {
+ const TypeSetByHwMode &In) const {
ValidateOnExit _1(Out, *this);
In.validate();
if (In.empty() || Out == In || TP.hasError())
@@ -809,72 +807,57 @@ bool TypeInfer::EnforceSameSize(TypeSetByHwMode &A, TypeSetByHwMode &B) {
return Changed;
}
-void TypeInfer::expandOverloads(TypeSetByHwMode &VTS) {
+void TypeInfer::expandOverloads(TypeSetByHwMode &VTS) const {
ValidateOnExit _1(VTS, *this);
const TypeSetByHwMode &Legal = getLegalTypes();
- assert(Legal.isDefaultOnly() && "Default-mode only expected");
- const TypeSetByHwMode::SetType &LegalTypes = Legal.get(DefaultMode);
+ assert(Legal.isSimple() && "Default-mode only expected");
+ const TypeSetByHwMode::SetType &LegalTypes = Legal.getSimple();
for (auto &I : VTS)
expandOverloads(I.second, LegalTypes);
}
void TypeInfer::expandOverloads(TypeSetByHwMode::SetType &Out,
- const TypeSetByHwMode::SetType &Legal) {
- std::set<MVT> Ovs;
- for (MVT T : Out) {
- if (!T.isOverloaded())
- continue;
-
- Ovs.insert(T);
- // MachineValueTypeSet allows iteration and erasing.
- Out.erase(T);
- }
-
- for (MVT Ov : Ovs) {
- switch (Ov.SimpleTy) {
- case MVT::iPTRAny:
- Out.insert(MVT::iPTR);
- return;
- case MVT::iAny:
- for (MVT T : MVT::integer_valuetypes())
- if (Legal.count(T))
- Out.insert(T);
- for (MVT T : MVT::integer_fixedlen_vector_valuetypes())
- if (Legal.count(T))
- Out.insert(T);
- for (MVT T : MVT::integer_scalable_vector_valuetypes())
- if (Legal.count(T))
- Out.insert(T);
- return;
- case MVT::fAny:
- for (MVT T : MVT::fp_valuetypes())
- if (Legal.count(T))
- Out.insert(T);
- for (MVT T : MVT::fp_fixedlen_vector_valuetypes())
- if (Legal.count(T))
- Out.insert(T);
- for (MVT T : MVT::fp_scalable_vector_valuetypes())
- if (Legal.count(T))
- Out.insert(T);
- return;
- case MVT::vAny:
- for (MVT T : MVT::vector_valuetypes())
- if (Legal.count(T))
- Out.insert(T);
- return;
- case MVT::Any:
- for (MVT T : MVT::all_valuetypes())
- if (Legal.count(T))
- Out.insert(T);
- return;
- default:
- break;
- }
- }
-}
-
-const TypeSetByHwMode &TypeInfer::getLegalTypes() {
+ const TypeSetByHwMode::SetType &Legal) const {
+ if (Out.count(MVT::iPTRAny)) {
+ Out.erase(MVT::iPTRAny);
+ Out.insert(MVT::iPTR);
+ } else if (Out.count(MVT::iAny)) {
+ Out.erase(MVT::iAny);
+ for (MVT T : MVT::integer_valuetypes())
+ if (Legal.count(T))
+ Out.insert(T);
+ for (MVT T : MVT::integer_fixedlen_vector_valuetypes())
+ if (Legal.count(T))
+ Out.insert(T);
+ for (MVT T : MVT::integer_scalable_vector_valuetypes())
+ if (Legal.count(T))
+ Out.insert(T);
+ } else if (Out.count(MVT::fAny)) {
+ Out.erase(MVT::fAny);
+ for (MVT T : MVT::fp_valuetypes())
+ if (Legal.count(T))
+ Out.insert(T);
+ for (MVT T : MVT::fp_fixedlen_vector_valuetypes())
+ if (Legal.count(T))
+ Out.insert(T);
+ for (MVT T : MVT::fp_scalable_vector_valuetypes())
+ if (Legal.count(T))
+ Out.insert(T);
+ } else if (Out.count(MVT::vAny)) {
+ Out.erase(MVT::vAny);
+ for (MVT T : MVT::vector_valuetypes())
+ if (Legal.count(T))
+ Out.insert(T);
+ } else if (Out.count(MVT::Any)) {
+ Out.erase(MVT::Any);
+ for (MVT T : MVT::all_valuetypes())
+ if (Legal.count(T))
+ Out.insert(T);
+ }
+}
+
+const TypeSetByHwMode &TypeInfer::getLegalTypes() const {
if (!LegalTypesCached) {
TypeSetByHwMode::SetType &LegalTypes = LegalCache.getOrCreate(DefaultMode);
// Stuff all types from all modes into the default mode.
@@ -883,26 +866,26 @@ const TypeSetByHwMode &TypeInfer::getLegalTypes() {
LegalTypes.insert(I.second);
LegalTypesCached = true;
}
- assert(LegalCache.isDefaultOnly() && "Default-mode only expected");
+ assert(LegalCache.isSimple() && "Default-mode only expected");
return LegalCache;
}
-#ifndef NDEBUG
TypeInfer::ValidateOnExit::~ValidateOnExit() {
if (Infer.Validate && !VTS.validate()) {
- dbgs() << "Type set is empty for each HW mode:\n"
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ errs() << "Type set is empty for each HW mode:\n"
"possible type contradiction in the pattern below "
"(use -print-records with llvm-tblgen to see all "
"expanded records).\n";
Infer.TP.dump();
- dbgs() << "Generated from record:\n";
+ errs() << "Generated from record:\n";
Infer.TP.getRecord()->dump();
+#endif
PrintFatalError(Infer.TP.getRecord()->getLoc(),
"Type set is empty for each HW mode in '" +
Infer.TP.getRecord()->getName() + "'");
}
}
-#endif
//===----------------------------------------------------------------------===//
@@ -1512,6 +1495,9 @@ void PatternToMatch::getPredicateRecords(
}
// Sort so that different orders get canonicalized to the same string.
llvm::sort(PredicateRecs, LessRecord());
+ // Remove duplicate predicates.
+ PredicateRecs.erase(std::unique(PredicateRecs.begin(), PredicateRecs.end()),
+ PredicateRecs.end());
}
/// getPredicateCheck - Return a single string containing all of this
@@ -1522,22 +1508,17 @@ std::string PatternToMatch::getPredicateCheck() const {
getPredicateRecords(PredicateRecs);
SmallString<128> PredicateCheck;
+ raw_svector_ostream OS(PredicateCheck);
+ ListSeparator LS(" && ");
for (Record *Pred : PredicateRecs) {
StringRef CondString = Pred->getValueAsString("CondString");
if (CondString.empty())
continue;
- if (!PredicateCheck.empty())
- PredicateCheck += " && ";
- PredicateCheck += "(";
- PredicateCheck += CondString;
- PredicateCheck += ")";
+ OS << LS << '(' << CondString << ')';
}
- if (!HwModeFeatures.empty()) {
- if (!PredicateCheck.empty())
- PredicateCheck += " && ";
- PredicateCheck += HwModeFeatures;
- }
+ if (!HwModeFeatures.empty())
+ OS << LS << HwModeFeatures;
return std::string(PredicateCheck);
}
@@ -1792,7 +1773,7 @@ bool TreePatternNode::ContainsUnresolvedType(TreePattern &TP) const {
bool TreePatternNode::hasProperTypeByHwMode() const {
for (const TypeSetByHwMode &S : Types)
- if (!S.isDefaultOnly())
+ if (!S.isSimple())
return true;
for (const TreePatternNodePtr &C : Children)
if (C->hasProperTypeByHwMode())
@@ -1880,7 +1861,7 @@ static unsigned GetNumNodeResults(Record *Operator, CodeGenDAGPatterns &CDP) {
return 0; // All return nothing.
if (Operator->isSubClassOf("Intrinsic"))
- return CDP.getIntrinsic(Operator).IS.RetVTs.size();
+ return CDP.getIntrinsic(Operator).IS.RetTys.size();
if (Operator->isSubClassOf("SDNode"))
return CDP.getSDNodeInfo(Operator).getNumResults();
@@ -1996,7 +1977,17 @@ void TreePatternNode::dump() const {
bool TreePatternNode::isIsomorphicTo(const TreePatternNode *N,
const MultipleUseVarSet &DepVars) const {
if (N == this) return true;
- if (N->isLeaf() != isLeaf() || getExtTypes() != N->getExtTypes() ||
+ if (N->isLeaf() != isLeaf())
+ return false;
+
+ // Check operator of non-leaves early since it can be cheaper than checking
+ // types.
+ if (!isLeaf())
+ if (N->getOperator() != getOperator() ||
+ N->getNumChildren() != getNumChildren())
+ return false;
+
+ if (getExtTypes() != N->getExtTypes() ||
getPredicateCalls() != N->getPredicateCalls() ||
getTransformFn() != N->getTransformFn())
return false;
@@ -2004,16 +1995,13 @@ bool TreePatternNode::isIsomorphicTo(const TreePatternNode *N,
if (isLeaf()) {
if (DefInit *DI = dyn_cast<DefInit>(getLeafValue())) {
if (DefInit *NDI = dyn_cast<DefInit>(N->getLeafValue())) {
- return ((DI->getDef() == NDI->getDef())
- && (DepVars.find(getName()) == DepVars.end()
- || getName() == N->getName()));
+ return ((DI->getDef() == NDI->getDef()) &&
+ (!DepVars.contains(getName()) || getName() == N->getName()));
}
}
return getLeafValue() == N->getLeafValue();
}
- if (N->getOperator() != getOperator() ||
- N->getNumChildren() != getNumChildren()) return false;
for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
if (!getChild(i)->isIsomorphicTo(N->getChild(i), DepVars))
return false;
@@ -2025,19 +2013,20 @@ bool TreePatternNode::isIsomorphicTo(const TreePatternNode *N,
TreePatternNodePtr TreePatternNode::clone() const {
TreePatternNodePtr New;
if (isLeaf()) {
- New = std::make_shared<TreePatternNode>(getLeafValue(), getNumTypes());
+ New = makeIntrusiveRefCnt<TreePatternNode>(getLeafValue(), getNumTypes());
} else {
std::vector<TreePatternNodePtr> CChildren;
CChildren.reserve(Children.size());
for (unsigned i = 0, e = getNumChildren(); i != e; ++i)
CChildren.push_back(getChild(i)->clone());
- New = std::make_shared<TreePatternNode>(getOperator(), std::move(CChildren),
- getNumTypes());
+ New = makeIntrusiveRefCnt<TreePatternNode>(
+ getOperator(), std::move(CChildren), getNumTypes());
}
New->setName(getName());
New->setNamesAsPredicateArg(getNamesAsPredicateArg());
New->Types = Types;
New->setPredicateCalls(getPredicateCalls());
+ New->setGISelFlagsRecord(getGISelFlagsRecord());
New->setTransformFn(getTransformFn());
return New;
}
@@ -2085,14 +2074,13 @@ void TreePatternNode::SubstituteFormalArguments(
/// fragments, return the set of inlined versions (this can be more than
/// one if a PatFrags record has multiple alternatives).
void TreePatternNode::InlinePatternFragments(
- TreePatternNodePtr T, TreePattern &TP,
- std::vector<TreePatternNodePtr> &OutAlternatives) {
+ TreePattern &TP, std::vector<TreePatternNodePtr> &OutAlternatives) {
if (TP.hasError())
return;
if (isLeaf()) {
- OutAlternatives.push_back(T); // nothing to do.
+ OutAlternatives.push_back(this); // nothing to do.
return;
}
@@ -2100,16 +2088,16 @@ void TreePatternNode::InlinePatternFragments(
if (!Op->isSubClassOf("PatFrags")) {
if (getNumChildren() == 0) {
- OutAlternatives.push_back(T);
+ OutAlternatives.push_back(this);
return;
}
// Recursively inline children nodes.
- std::vector<std::vector<TreePatternNodePtr> > ChildAlternatives;
- ChildAlternatives.resize(getNumChildren());
+ std::vector<std::vector<TreePatternNodePtr>> ChildAlternatives(
+ getNumChildren());
for (unsigned i = 0, e = getNumChildren(); i != e; ++i) {
TreePatternNodePtr Child = getChildShared(i);
- Child->InlinePatternFragments(Child, TP, ChildAlternatives[i]);
+ Child->InlinePatternFragments(TP, ChildAlternatives[i]);
// If there are no alternatives for any child, there are no
// alternatives for this expression as whole.
if (ChildAlternatives[i].empty())
@@ -2125,21 +2113,22 @@ void TreePatternNode::InlinePatternFragments(
}
// The end result is an all-pairs construction of the resultant pattern.
- std::vector<unsigned> Idxs;
- Idxs.resize(ChildAlternatives.size());
+ std::vector<unsigned> Idxs(ChildAlternatives.size());
bool NotDone;
do {
// Create the variant and add it to the output list.
std::vector<TreePatternNodePtr> NewChildren;
+ NewChildren.reserve(ChildAlternatives.size());
for (unsigned i = 0, e = ChildAlternatives.size(); i != e; ++i)
NewChildren.push_back(ChildAlternatives[i][Idxs[i]]);
- TreePatternNodePtr R = std::make_shared<TreePatternNode>(
+ TreePatternNodePtr R = makeIntrusiveRefCnt<TreePatternNode>(
getOperator(), std::move(NewChildren), getNumTypes());
// Copy over properties.
R->setName(getName());
R->setNamesAsPredicateArg(getNamesAsPredicateArg());
R->setPredicateCalls(getPredicateCalls());
+ R->setGISelFlagsRecord(getGISelFlagsRecord());
R->setTransformFn(getTransformFn());
for (unsigned i = 0, e = getNumTypes(); i != e; ++i)
R->setType(i, getExtType(i));
@@ -2170,7 +2159,7 @@ void TreePatternNode::InlinePatternFragments(
TreePattern *Frag = TP.getDAGPatterns().getPatternFragment(Op);
// Verify that we are passing the right number of operands.
- if (Frag->getNumArgs() != Children.size()) {
+ if (Frag->getNumArgs() != getNumChildren()) {
TP.error("'" + Op->getName() + "' fragment requires " +
Twine(Frag->getNumArgs()) + " operands!");
return;
@@ -2209,13 +2198,16 @@ void TreePatternNode::InlinePatternFragments(
for (unsigned i = 0, e = FragTree->getNumTypes(); i != e; ++i)
FragTree->UpdateNodeType(i, getExtType(i), TP);
+ if (Op->isSubClassOf("GISelFlags"))
+ FragTree->setGISelFlagsRecord(Op);
+
// Transfer in the old predicates.
for (const TreePredicateCall &Pred : getPredicateCalls())
FragTree->addPredicateCall(Pred);
// The fragment we inlined could have recursive inlining that is needed. See
// if there are any pattern fragments in it and inline them as needed.
- FragTree->InlinePatternFragments(FragTree, TP, OutAlternatives);
+ FragTree->InlinePatternFragments(TP, OutAlternatives);
}
}
@@ -2408,10 +2400,10 @@ bool TreePatternNode::NodeHasProperty(SDNP Property,
return Int->hasProperty(Property);
}
- if (!Operator->isSubClassOf("SDPatternOperator"))
+ if (!getOperator()->isSubClassOf("SDPatternOperator"))
return false;
- return CGP.getSDNodeInfo(Operator).hasProperty(Property);
+ return CGP.getSDNodeInfo(getOperator()).hasProperty(Property);
}
@@ -2522,11 +2514,12 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
bool MadeChange = false;
// Apply the result type to the node.
- unsigned NumRetVTs = Int->IS.RetVTs.size();
- unsigned NumParamVTs = Int->IS.ParamVTs.size();
+ unsigned NumRetVTs = Int->IS.RetTys.size();
+ unsigned NumParamVTs = Int->IS.ParamTys.size();
for (unsigned i = 0, e = NumRetVTs; i != e; ++i)
- MadeChange |= UpdateNodeType(i, Int->IS.RetVTs[i], TP);
+ MadeChange |= UpdateNodeType(
+ i, getValueType(Int->IS.RetTys[i]->getValueAsDef("VT")), TP);
if (getNumChildren() != NumParamVTs + 1) {
TP.error("Intrinsic '" + Int->Name + "' expects " + Twine(NumParamVTs) +
@@ -2540,9 +2533,10 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
for (unsigned i = 0, e = getNumChildren()-1; i != e; ++i) {
MadeChange |= getChild(i+1)->ApplyTypeConstraints(TP, NotRegisters);
- MVT::SimpleValueType OpVT = Int->IS.ParamVTs[i];
- assert(getChild(i+1)->getNumTypes() == 1 && "Unhandled case");
- MadeChange |= getChild(i+1)->UpdateNodeType(0, OpVT, TP);
+ MVT::SimpleValueType OpVT =
+ getValueType(Int->IS.ParamTys[i]->getValueAsDef("VT"));
+ assert(getChild(i + 1)->getNumTypes() == 1 && "Unhandled case");
+ MadeChange |= getChild(i + 1)->UpdateNodeType(0, OpVT, TP);
}
return MadeChange;
}
@@ -2872,7 +2866,7 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
OpName);
// Input argument?
- TreePatternNodePtr Res = std::make_shared<TreePatternNode>(DI, 1);
+ TreePatternNodePtr Res = makeIntrusiveRefCnt<TreePatternNode>(DI, 1);
if (R->getName() == "node" && !OpName.empty()) {
if (OpName.empty())
error("'node' argument requires a name to match with operand list");
@@ -2887,7 +2881,7 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
if (isa<UnsetInit>(TheInit)) {
if (OpName.empty())
error("'?' argument requires a name to match with operand list");
- TreePatternNodePtr Res = std::make_shared<TreePatternNode>(TheInit, 1);
+ TreePatternNodePtr Res = makeIntrusiveRefCnt<TreePatternNode>(TheInit, 1);
Args.push_back(std::string(OpName));
Res->setName(OpName);
return Res;
@@ -2898,7 +2892,7 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
error("Constant int or bit argument should not have a name!");
if (isa<BitInit>(TheInit))
TheInit = TheInit->convertInitializerTo(IntRecTy::get(RK));
- return std::make_shared<TreePatternNode>(TheInit, 1);
+ return makeIntrusiveRefCnt<TreePatternNode>(TheInit, 1);
}
if (BitsInit *BI = dyn_cast<BitsInit>(TheInit)) {
@@ -2906,16 +2900,20 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
Init *II = BI->convertInitializerTo(IntRecTy::get(RK));
if (!II || !isa<IntInit>(II))
error("Bits value must be constants!");
- return ParseTreePattern(II, OpName);
+ return II ? ParseTreePattern(II, OpName) : nullptr;
}
DagInit *Dag = dyn_cast<DagInit>(TheInit);
if (!Dag) {
TheInit->print(errs());
error("Pattern has unexpected init kind!");
+ return nullptr;
}
DefInit *OpDef = dyn_cast<DefInit>(Dag->getOperator());
- if (!OpDef) error("Pattern has unexpected operator type!");
+ if (!OpDef) {
+ error("Pattern has unexpected operator type!");
+ return nullptr;
+ }
Record *Operator = OpDef->getDef();
if (Operator->isSubClassOf("ValueType")) {
@@ -2994,7 +2992,7 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
// If this intrinsic returns void, it must have side-effects and thus a
// chain.
- if (Int.IS.RetVTs.empty())
+ if (Int.IS.RetTys.empty())
Operator = getDAGPatterns().get_intrinsic_void_sdnode();
else if (!Int.ME.doesNotAccessMemory() || Int.hasSideEffects)
// Has side-effects, requires chain.
@@ -3002,7 +3000,7 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
else // Otherwise, no chain.
Operator = getDAGPatterns().get_intrinsic_wo_chain_sdnode();
- Children.insert(Children.begin(), std::make_shared<TreePatternNode>(
+ Children.insert(Children.begin(), makeIntrusiveRefCnt<TreePatternNode>(
IntInit::get(RK, IID), 1));
}
@@ -3027,9 +3025,8 @@ TreePatternNodePtr TreePattern::ParseTreePattern(Init *TheInit,
}
}
- TreePatternNodePtr Result =
- std::make_shared<TreePatternNode>(Operator, std::move(Children),
- NumResults);
+ TreePatternNodePtr Result = makeIntrusiveRefCnt<TreePatternNode>(
+ Operator, std::move(Children), NumResults);
Result->setName(OpName);
if (Dag->getName()) {
@@ -3069,11 +3066,9 @@ static bool SimplifyTree(TreePatternNodePtr &N) {
// Walk all children.
bool MadeChange = false;
- for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i) {
- TreePatternNodePtr Child = N->getChildShared(i);
- MadeChange |= SimplifyTree(Child);
- N->setChild(i, std::move(Child));
- }
+ for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i)
+ MadeChange |= SimplifyTree(N->getChildSharedPtr(i));
+
return MadeChange;
}
@@ -3492,7 +3487,8 @@ void CodeGenDAGPatterns::FindPatternInputsAndOutputs(
DefInit *Val = dyn_cast<DefInit>(Dest->getLeafValue());
if (!Val || !Val->getDef()->isSubClassOf("Register"))
I.error("implicitly defined value should be a register!");
- InstImpResults.push_back(Val->getDef());
+ if (Val)
+ InstImpResults.push_back(Val->getDef());
}
return;
}
@@ -3913,7 +3909,7 @@ void CodeGenDAGPatterns::parseInstructionPattern(
OpNode->setTransformFn(nullptr);
std::vector<TreePatternNodePtr> Children;
Children.push_back(OpNode);
- OpNode = std::make_shared<TreePatternNode>(Xform, std::move(Children),
+ OpNode = makeIntrusiveRefCnt<TreePatternNode>(Xform, std::move(Children),
OpNode->getNumTypes());
}
@@ -3924,7 +3920,7 @@ void CodeGenDAGPatterns::parseInstructionPattern(
I.error("Input operand $" + InstInputs.begin()->first +
" occurs in pattern but not in operands list!");
- TreePatternNodePtr ResultPattern = std::make_shared<TreePatternNode>(
+ TreePatternNodePtr ResultPattern = makeIntrusiveRefCnt<TreePatternNode>(
I.getRecord(), std::move(ResultNodeOperands),
GetNumNodeResults(I.getRecord(), *this));
// Copy fully inferred output node types to instruction result pattern.
@@ -3948,9 +3944,8 @@ void CodeGenDAGPatterns::parseInstructionPattern(
// Create and insert the instruction.
// FIXME: InstImpResults should not be part of DAGInstruction.
Record *R = I.getRecord();
- DAGInsts.emplace(std::piecewise_construct, std::forward_as_tuple(R),
- std::forward_as_tuple(Results, Operands, InstImpResults,
- SrcPattern, ResultPattern));
+ DAGInsts.try_emplace(R, std::move(Results), std::move(Operands),
+ std::move(InstImpResults), SrcPattern, ResultPattern);
LLVM_DEBUG(I.dump());
}
@@ -3990,9 +3985,8 @@ void CodeGenDAGPatterns::ParseInstructions() {
}
// Create and insert the instruction.
- std::vector<Record*> ImpResults;
- Instructions.insert(std::make_pair(Instr,
- DAGInstruction(Results, Operands, ImpResults)));
+ Instructions.try_emplace(Instr, std::move(Results), std::move(Operands),
+ std::vector<Record *>());
continue; // no pattern.
}
@@ -4246,7 +4240,7 @@ static TreePatternNodePtr PromoteXForms(TreePatternNodePtr N) {
N->setTransformFn(nullptr);
std::vector<TreePatternNodePtr> Children;
Children.push_back(PromoteXForms(N));
- return std::make_shared<TreePatternNode>(Xform, std::move(Children),
+ return makeIntrusiveRefCnt<TreePatternNode>(Xform, std::move(Children),
N->getNumTypes());
}
@@ -4342,13 +4336,24 @@ void CodeGenDAGPatterns::ParseOnePattern(Record *TheDef,
// that register class does not accept that type, the type inference
// will lead to a contradiction, which is not an error however, but
// a sign that this pattern will simply never match.
- if (Temp.getOnlyTree()->hasPossibleType())
- for (const auto &T : Pattern.getTrees())
+ if (Temp.getOnlyTree()->hasPossibleType()) {
+ for (const auto &T : Pattern.getTrees()) {
if (T->hasPossibleType())
AddPatternToMatch(&Pattern,
PatternToMatch(TheDef, Preds, T, Temp.getOnlyTree(),
InstImpResults, Complexity,
TheDef->getID()));
+ }
+ } else {
+ // Show a message about a dropped pattern with some info to make it
+ // easier to identify it in the .td files.
+ LLVM_DEBUG({
+ dbgs() << "Dropping: ";
+ Pattern.dump();
+ Temp.getOnlyTree()->dump();
+ dbgs() << "\n";
+ });
+ }
}
void CodeGenDAGPatterns::ParsePatterns() {
@@ -4397,6 +4402,9 @@ static void collectModes(std::set<unsigned> &Modes, const TreePatternNode *N) {
void CodeGenDAGPatterns::ExpandHwModeBasedTypes() {
const CodeGenHwModes &CGH = getTargetInfo().getHwModes();
+ if (CGH.getNumModeIds() == 1)
+ return;
+
std::vector<PatternToMatch> Copy;
PatternsToMatch.swap(Copy);
@@ -4411,15 +4419,15 @@ void CodeGenDAGPatterns::ExpandHwModeBasedTypes() {
PatternsToMatch.emplace_back(P.getSrcRecord(), P.getPredicates(),
std::move(NewSrc), std::move(NewDst),
P.getDstRegs(), P.getAddedComplexity(),
- Record::getNewUID(Records), Mode, Check);
+ Record::getNewUID(Records), Check);
};
for (PatternToMatch &P : Copy) {
- TreePatternNodePtr SrcP = nullptr, DstP = nullptr;
+ const TreePatternNode *SrcP = nullptr, *DstP = nullptr;
if (P.getSrcPattern()->hasProperTypeByHwMode())
- SrcP = P.getSrcPatternShared();
+ SrcP = P.getSrcPattern();
if (P.getDstPattern()->hasProperTypeByHwMode())
- DstP = P.getDstPatternShared();
+ DstP = P.getDstPattern();
if (!SrcP && !DstP) {
PatternsToMatch.push_back(P);
continue;
@@ -4427,9 +4435,9 @@ void CodeGenDAGPatterns::ExpandHwModeBasedTypes() {
std::set<unsigned> Modes;
if (SrcP)
- collectModes(Modes, SrcP.get());
+ collectModes(Modes, SrcP);
if (DstP)
- collectModes(Modes, DstP.get());
+ collectModes(Modes, DstP);
// The predicate for the default mode needs to be constructed for each
// pattern separately.
@@ -4450,14 +4458,14 @@ void CodeGenDAGPatterns::ExpandHwModeBasedTypes() {
// Fill the map entry for this mode.
const HwMode &HM = CGH.getMode(M);
- AppendPattern(P, M, "(MF->getSubtarget().checkFeatures(\"" + HM.Features + "\"))");
+ AppendPattern(P, M, HM.Predicates);
// Add negations of the HM's predicates to the default predicate.
if (!DefaultCheck.empty())
DefaultCheck += " && ";
- DefaultCheck += "(!(MF->getSubtarget().checkFeatures(\"";
- DefaultCheck += HM.Features;
- DefaultCheck += "\")))";
+ DefaultCheck += "!(";
+ DefaultCheck += HM.Predicates;
+ DefaultCheck += ")";
}
bool HasDefault = Modes.count(DefaultMode);
@@ -4518,8 +4526,7 @@ static void CombineChildVariants(
return;
// The end result is an all-pairs construction of the resultant pattern.
- std::vector<unsigned> Idxs;
- Idxs.resize(ChildVariants.size());
+ std::vector<unsigned> Idxs(ChildVariants.size());
bool NotDone;
do {
#ifndef NDEBUG
@@ -4533,15 +4540,17 @@ static void CombineChildVariants(
#endif
// Create the variant and add it to the output list.
std::vector<TreePatternNodePtr> NewChildren;
+ NewChildren.reserve(ChildVariants.size());
for (unsigned i = 0, e = ChildVariants.size(); i != e; ++i)
NewChildren.push_back(ChildVariants[i][Idxs[i]]);
- TreePatternNodePtr R = std::make_shared<TreePatternNode>(
+ TreePatternNodePtr R = makeIntrusiveRefCnt<TreePatternNode>(
Orig->getOperator(), std::move(NewChildren), Orig->getNumTypes());
// Copy over properties.
R->setName(Orig->getName());
R->setNamesAsPredicateArg(Orig->getNamesAsPredicateArg());
R->setPredicateCalls(Orig->getPredicateCalls());
+ R->setGISelFlagsRecord(Orig->getGISelFlagsRecord());
R->setTransformFn(Orig->getTransformFn());
for (unsigned i = 0, e = Orig->getNumTypes(); i != e; ++i)
R->setType(i, Orig->getExtType(i));
@@ -4679,8 +4688,8 @@ static void GenerateVariantsOf(TreePatternNodePtr N,
}
// Compute permutations of all children.
- std::vector<std::vector<TreePatternNodePtr>> ChildVariants;
- ChildVariants.resize(N->getNumChildren());
+ std::vector<std::vector<TreePatternNodePtr>> ChildVariants(
+ N->getNumChildren());
for (unsigned i = 0, e = N->getNumChildren(); i != e; ++i)
GenerateVariantsOf(N->getChildShared(i), ChildVariants[i], CDP, DepVars);
@@ -4708,17 +4717,10 @@ static void GenerateVariantsOf(TreePatternNodePtr N,
}
// Consider the commuted order.
if (NoRegisters) {
- std::vector<std::vector<TreePatternNodePtr>> Variants;
- unsigned i = 0;
- if (isCommIntrinsic)
- Variants.push_back(std::move(ChildVariants[i++])); // Intrinsic id.
- Variants.push_back(std::move(ChildVariants[i + 1]));
- Variants.push_back(std::move(ChildVariants[i]));
- i += 2;
- // Remaining operands are not commuted.
- for (; i != N->getNumChildren(); ++i)
- Variants.push_back(std::move(ChildVariants[i]));
- CombineChildVariants(N, Variants, OutVariants, CDP, DepVars);
+ // Swap the first two operands after the intrinsic id, if present.
+ unsigned i = isCommIntrinsic ? 1 : 0;
+ std::swap(ChildVariants[i], ChildVariants[i + 1]);
+ CombineChildVariants(N, ChildVariants, OutVariants, CDP, DepVars);
}
}
}
@@ -4788,7 +4790,6 @@ void CodeGenDAGPatterns::GenerateVariants() {
Variant, PatternsToMatch[i].getDstPatternShared(),
PatternsToMatch[i].getDstRegs(),
PatternsToMatch[i].getAddedComplexity(), Record::getNewUID(Records),
- PatternsToMatch[i].getForceMode(),
PatternsToMatch[i].getHwModeFeatures());
}
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.h b/llvm/utils/TableGen/CodeGenDAGPatterns.h
index ec35e6680088..2611fe06f55c 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.h
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.h
@@ -17,12 +17,16 @@
#include "CodeGenIntrinsics.h"
#include "CodeGenTarget.h"
#include "SDNodeProperties.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/TableGen/Record.h"
#include <algorithm>
#include <array>
#include <functional>
@@ -32,7 +36,6 @@
namespace llvm {
-class Record;
class Init;
class ListInit;
class DagInit;
@@ -42,7 +45,7 @@ class TreePatternNode;
class CodeGenDAGPatterns;
/// Shared pointer for TreePatternNode.
-using TreePatternNodePtr = std::shared_ptr<TreePatternNode>;
+using TreePatternNodePtr = IntrusiveRefCntPtr<TreePatternNode>;
/// This represents a set of MVTs. Since the underlying type for the MVT
/// is uint8_t, there are at most 256 values. To reduce the number of memory
@@ -191,7 +194,7 @@ raw_ostream &operator<<(raw_ostream &OS, const MachineValueTypeSet &T);
struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> {
using SetType = MachineValueTypeSet;
- SmallVector<unsigned, 16> AddrSpaces;
+ unsigned AddrSpace = std::numeric_limits<unsigned>::max();
TypeSetByHwMode() = default;
TypeSetByHwMode(const TypeSetByHwMode &VTS) = default;
@@ -211,22 +214,17 @@ struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> {
LLVM_ATTRIBUTE_ALWAYS_INLINE
bool isMachineValueType() const {
- return isDefaultOnly() && Map.begin()->second.size() == 1;
+ return isSimple() && getSimple().size() == 1;
}
LLVM_ATTRIBUTE_ALWAYS_INLINE
MVT getMachineValueType() const {
assert(isMachineValueType());
- return *Map.begin()->second.begin();
+ return *getSimple().begin();
}
bool isPossible() const;
- LLVM_ATTRIBUTE_ALWAYS_INLINE
- bool isDefaultOnly() const {
- return Map.size() == 1 && Map.begin()->first == DefaultMode;
- }
-
bool isPointer() const {
return getValueTypeByHwMode().isPointer();
}
@@ -259,7 +257,7 @@ private:
raw_ostream &operator<<(raw_ostream &OS, const TypeSetByHwMode &T);
struct TypeInfer {
- TypeInfer(TreePattern &T) : TP(T), ForceMode(0) {}
+ TypeInfer(TreePattern &T) : TP(T) {}
bool isConcrete(const TypeSetByHwMode &VTS, bool AllowEmpty) const {
return VTS.isValueTypeByHwMode(AllowEmpty);
@@ -274,11 +272,11 @@ struct TypeInfer {
/// expand*) is to return "true" if a change has been made, "false"
/// otherwise.
- bool MergeInTypeInfo(TypeSetByHwMode &Out, const TypeSetByHwMode &In);
- bool MergeInTypeInfo(TypeSetByHwMode &Out, MVT::SimpleValueType InVT) {
+ bool MergeInTypeInfo(TypeSetByHwMode &Out, const TypeSetByHwMode &In) const;
+ bool MergeInTypeInfo(TypeSetByHwMode &Out, MVT::SimpleValueType InVT) const {
return MergeInTypeInfo(Out, TypeSetByHwMode(InVT));
}
- bool MergeInTypeInfo(TypeSetByHwMode &Out, ValueTypeByHwMode InVT) {
+ bool MergeInTypeInfo(TypeSetByHwMode &Out, ValueTypeByHwMode InVT) const {
return MergeInTypeInfo(Out, TypeSetByHwMode(InVT));
}
@@ -332,19 +330,16 @@ struct TypeInfer {
/// For each overloaded type (i.e. of form *Any), replace it with the
/// corresponding subset of legal, specific types.
- void expandOverloads(TypeSetByHwMode &VTS);
+ void expandOverloads(TypeSetByHwMode &VTS) const;
void expandOverloads(TypeSetByHwMode::SetType &Out,
- const TypeSetByHwMode::SetType &Legal);
+ const TypeSetByHwMode::SetType &Legal) const;
struct ValidateOnExit {
- ValidateOnExit(TypeSetByHwMode &T, TypeInfer &TI) : Infer(TI), VTS(T) {}
- #ifndef NDEBUG
+ ValidateOnExit(const TypeSetByHwMode &T, const TypeInfer &TI)
+ : Infer(TI), VTS(T) {}
~ValidateOnExit();
- #else
- ~ValidateOnExit() {} // Empty destructor with NDEBUG.
- #endif
- TypeInfer &Infer;
- TypeSetByHwMode &VTS;
+ const TypeInfer &Infer;
+ const TypeSetByHwMode &VTS;
};
struct SuppressValidation {
@@ -359,16 +354,14 @@ struct TypeInfer {
};
TreePattern &TP;
- unsigned ForceMode; // Mode to use when set.
- bool CodeGen = false; // Set during generation of matcher code.
bool Validate = true; // Indicate whether to validate types.
private:
- const TypeSetByHwMode &getLegalTypes();
+ const TypeSetByHwMode &getLegalTypes() const;
/// Cached legal types (in default mode).
- bool LegalTypesCached = false;
- TypeSetByHwMode LegalCache;
+ mutable bool LegalTypesCached = false;
+ mutable TypeSetByHwMode LegalCache;
};
/// Set type used to track multiply used variables in patterns
@@ -632,7 +625,7 @@ struct TreePredicateCall {
}
};
-class TreePatternNode {
+class TreePatternNode : public RefCountedBase<TreePatternNode> {
/// The type of each node result. Before and during type inference, each
/// result may be a set of possible types. After (successful) type inference,
/// each is a single concrete type.
@@ -641,13 +634,10 @@ class TreePatternNode {
/// The index of each result in results of the pattern.
std::vector<unsigned> ResultPerm;
- /// Operator - The Record for the operator if this is an interior node (not
- /// a leaf).
- Record *Operator;
-
- /// Val - The init value (e.g. the "GPRC" record, or "7") for a leaf.
- ///
- Init *Val;
+ /// OperatorOrVal - The Record for the operator if this is an interior node
+ /// (not a leaf) or the init value (e.g. the "GPRC" record, or "7") for a
+ /// leaf.
+ PointerUnion<Record *, Init *> OperatorOrVal;
/// Name - The name given to this node with the :$foo notation.
///
@@ -665,17 +655,20 @@ class TreePatternNode {
std::vector<TreePatternNodePtr> Children;
+ /// If this was instantiated from a PatFrag node, and the PatFrag was derived
+ /// from "GISelFlags": the original Record derived from GISelFlags.
+ const Record *GISelFlags = nullptr;
+
public:
TreePatternNode(Record *Op, std::vector<TreePatternNodePtr> Ch,
unsigned NumResults)
- : Operator(Op), Val(nullptr), TransformFn(nullptr),
- Children(std::move(Ch)) {
+ : OperatorOrVal(Op), TransformFn(nullptr), Children(std::move(Ch)) {
Types.resize(NumResults);
ResultPerm.resize(NumResults);
std::iota(ResultPerm.begin(), ResultPerm.end(), 0);
}
- TreePatternNode(Init *val, unsigned NumResults) // leaf ctor
- : Operator(nullptr), Val(val), TransformFn(nullptr) {
+ TreePatternNode(Init *val, unsigned NumResults) // leaf ctor
+ : OperatorOrVal(val), TransformFn(nullptr) {
Types.resize(NumResults);
ResultPerm.resize(NumResults);
std::iota(ResultPerm.begin(), ResultPerm.end(), 0);
@@ -695,7 +688,7 @@ public:
NamesAsPredicateArg.push_back(N);
}
- bool isLeaf() const { return Val != nullptr; }
+ bool isLeaf() const { return isa<Init *>(OperatorOrVal); }
// Type accessors.
unsigned getNumTypes() const { return Types.size(); }
@@ -723,14 +716,26 @@ public:
unsigned getResultIndex(unsigned ResNo) const { return ResultPerm[ResNo]; }
void setResultIndex(unsigned ResNo, unsigned RI) { ResultPerm[ResNo] = RI; }
- Init *getLeafValue() const { assert(isLeaf()); return Val; }
- Record *getOperator() const { assert(!isLeaf()); return Operator; }
+ Init *getLeafValue() const {
+ assert(isLeaf());
+ return cast<Init *>(OperatorOrVal);
+ }
+ Record *getOperator() const {
+ assert(!isLeaf());
+ return cast<Record *>(OperatorOrVal);
+ }
unsigned getNumChildren() const { return Children.size(); }
- TreePatternNode *getChild(unsigned N) const { return Children[N].get(); }
+ const TreePatternNode *getChild(unsigned N) const {
+ return Children[N].get();
+ }
+ TreePatternNode *getChild(unsigned N) { return Children[N].get(); }
const TreePatternNodePtr &getChildShared(unsigned N) const {
return Children[N];
}
+ TreePatternNodePtr &getChildSharedPtr(unsigned N) {
+ return Children[N];
+ }
void setChild(unsigned i, TreePatternNodePtr N) { Children[i] = N; }
/// hasChild - Return true if N is any of our children.
@@ -794,6 +799,9 @@ public:
/// marked isCommutative.
bool isCommutativeIntrinsic(const CodeGenDAGPatterns &CDP) const;
+ void setGISelFlagsRecord(const Record *R) { GISelFlags = R; }
+ const Record *getGISelFlagsRecord() const { return GISelFlags; }
+
void print(raw_ostream &OS) const;
void dump() const;
@@ -818,11 +826,10 @@ public: // Higher level manipulation routines.
void
SubstituteFormalArguments(std::map<std::string, TreePatternNodePtr> &ArgMap);
- /// InlinePatternFragments - If this pattern refers to any pattern
+ /// InlinePatternFragments - If \p T pattern refers to any pattern
/// fragments, return the set of inlined versions (this can be more than
/// one if a PatFrags record has multiple alternatives).
- void InlinePatternFragments(TreePatternNodePtr T,
- TreePattern &TP,
+ void InlinePatternFragments(TreePattern &TP,
std::vector<TreePatternNodePtr> &OutAlternatives);
/// ApplyTypeConstraints - Apply all of the type constraints relevant to
@@ -860,7 +867,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const TreePatternNode &TPN) {
return OS;
}
-
/// TreePattern - Represent a pattern, used for instructions, pattern
/// fragments, etc.
///
@@ -950,10 +956,10 @@ public:
/// PatFrags references. This may increase the number of trees in the
/// pattern if a PatFrags has multiple alternatives.
void InlinePatternFragments() {
- std::vector<TreePatternNodePtr> Copy = Trees;
- Trees.clear();
- for (unsigned i = 0, e = Copy.size(); i != e; ++i)
- Copy[i]->InlinePatternFragments(Copy[i], *this, Trees);
+ std::vector<TreePatternNodePtr> Copy;
+ Trees.swap(Copy);
+ for (const TreePatternNodePtr &C : Copy)
+ C->InlinePatternFragments(*this, Trees);
}
/// InferAllTypes - Infer/propagate as many types throughout the expression
@@ -1023,13 +1029,14 @@ class DAGInstruction {
TreePatternNodePtr ResultPattern;
public:
- DAGInstruction(const std::vector<Record*> &results,
- const std::vector<Record*> &operands,
- const std::vector<Record*> &impresults,
+ DAGInstruction(std::vector<Record *> &&results,
+ std::vector<Record *> &&operands,
+ std::vector<Record *> &&impresults,
TreePatternNodePtr srcpattern = nullptr,
TreePatternNodePtr resultpattern = nullptr)
- : Results(results), Operands(operands), ImpResults(impresults),
- SrcPattern(srcpattern), ResultPattern(resultpattern) {}
+ : Results(std::move(results)), Operands(std::move(operands)),
+ ImpResults(std::move(impresults)), SrcPattern(srcpattern),
+ ResultPattern(resultpattern) {}
unsigned getNumResults() const { return Results.size(); }
unsigned getNumOperands() const { return Operands.size(); }
@@ -1066,17 +1073,16 @@ class PatternToMatch {
std::string HwModeFeatures;
int AddedComplexity; // Add to matching pattern complexity.
unsigned ID; // Unique ID for the record.
- unsigned ForceMode; // Force this mode in type inference when set.
public:
PatternToMatch(Record *srcrecord, ListInit *preds, TreePatternNodePtr src,
TreePatternNodePtr dst, std::vector<Record *> dstregs,
- int complexity, unsigned uid, unsigned setmode = 0,
+ int complexity, unsigned uid,
const Twine &hwmodefeatures = "")
: SrcRecord(srcrecord), Predicates(preds), SrcPattern(src),
DstPattern(dst), Dstregs(std::move(dstregs)),
HwModeFeatures(hwmodefeatures.str()), AddedComplexity(complexity),
- ID(uid), ForceMode(setmode) {}
+ ID(uid) {}
Record *getSrcRecord() const { return SrcRecord; }
ListInit *getPredicates() const { return Predicates; }
@@ -1088,7 +1094,6 @@ public:
StringRef getHwModeFeatures() const { return HwModeFeatures; }
int getAddedComplexity() const { return AddedComplexity; }
unsigned getID() const { return ID; }
- unsigned getForceMode() const { return ForceMode; }
std::string getPredicateCheck() const;
void getPredicateRecords(SmallVectorImpl<Record *> &PredicateRecs) const;
diff --git a/llvm/utils/TableGen/CodeGenHwModes.cpp b/llvm/utils/TableGen/CodeGenHwModes.cpp
index 2fec46c44100..2171507f4c63 100644
--- a/llvm/utils/TableGen/CodeGenHwModes.cpp
+++ b/llvm/utils/TableGen/CodeGenHwModes.cpp
@@ -21,6 +21,19 @@ StringRef CodeGenHwModes::DefaultModeName = "DefaultMode";
HwMode::HwMode(Record *R) {
Name = R->getName();
Features = std::string(R->getValueAsString("Features"));
+
+ std::vector<Record *> PredicateRecs = R->getValueAsListOfDefs("Predicates");
+ SmallString<128> PredicateCheck;
+ raw_svector_ostream OS(PredicateCheck);
+ ListSeparator LS(" && ");
+ for (Record *Pred : PredicateRecs) {
+ StringRef CondString = Pred->getValueAsString("CondString");
+ if (CondString.empty())
+ continue;
+ OS << LS << '(' << CondString << ')';
+ }
+
+ Predicates = std::string(PredicateCheck);
}
LLVM_DUMP_METHOD
@@ -38,7 +51,7 @@ HwModeSelect::HwModeSelect(Record *R, CodeGenHwModes &CGH) {
report_fatal_error("error in target description.");
}
for (unsigned i = 0, e = Modes.size(); i != e; ++i) {
- unsigned ModeId = CGH.getHwModeId(Modes[i]->getName());
+ unsigned ModeId = CGH.getHwModeId(Modes[i]);
Items.push_back(std::make_pair(ModeId, Objects[i]));
}
}
@@ -52,34 +65,26 @@ void HwModeSelect::dump() const {
}
CodeGenHwModes::CodeGenHwModes(RecordKeeper &RK) : Records(RK) {
- std::vector<Record*> MRs = Records.getAllDerivedDefinitions("HwMode");
- // The default mode needs a definition in the .td sources for TableGen
- // to accept references to it. We need to ignore the definition here.
- for (auto I = MRs.begin(), E = MRs.end(); I != E; ++I) {
- if ((*I)->getName() != DefaultModeName)
+ for (Record *R : Records.getAllDerivedDefinitions("HwMode")) {
+ // The default mode needs a definition in the .td sources for TableGen
+ // to accept references to it. We need to ignore the definition here.
+ if (R->getName() == DefaultModeName)
continue;
- MRs.erase(I);
- break;
- }
-
- for (Record *R : MRs) {
Modes.emplace_back(R);
- unsigned NewId = Modes.size();
- ModeIds.insert(std::make_pair(Modes[NewId-1].Name, NewId));
+ ModeIds.insert(std::make_pair(R, Modes.size()));
}
- std::vector<Record*> MSs = Records.getAllDerivedDefinitions("HwModeSelect");
- for (Record *R : MSs) {
+ for (Record *R : Records.getAllDerivedDefinitions("HwModeSelect")) {
auto P = ModeSelects.emplace(std::make_pair(R, HwModeSelect(R, *this)));
assert(P.second);
(void)P;
}
}
-unsigned CodeGenHwModes::getHwModeId(StringRef Name) const {
- if (Name == DefaultModeName)
+unsigned CodeGenHwModes::getHwModeId(Record *R) const {
+ if (R->getName() == DefaultModeName)
return DefaultMode;
- auto F = ModeIds.find(Name);
+ auto F = ModeIds.find(R);
assert(F != ModeIds.end() && "Unknown mode name");
return F->second;
}
@@ -101,7 +106,7 @@ void CodeGenHwModes::dump() const {
dbgs() << "ModeIds: {\n";
for (const auto &P : ModeIds)
- dbgs() << " " << P.first() << " -> " << P.second << '\n';
+ dbgs() << " " << P.first->getName() << " -> " << P.second << '\n';
dbgs() << "}\n";
dbgs() << "ModeSelects: {\n";
diff --git a/llvm/utils/TableGen/CodeGenHwModes.h b/llvm/utils/TableGen/CodeGenHwModes.h
index 55507cbca37d..09d20ad85c5e 100644
--- a/llvm/utils/TableGen/CodeGenHwModes.h
+++ b/llvm/utils/TableGen/CodeGenHwModes.h
@@ -11,10 +11,12 @@
#ifndef LLVM_UTILS_TABLEGEN_CODEGENHWMODES_H
#define LLVM_UTILS_TABLEGEN_CODEGENHWMODES_H
-#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
#include <cassert>
#include <map>
#include <string>
+#include <utility>
#include <vector>
// HwModeId -> list of predicates (definition)
@@ -29,6 +31,7 @@ namespace llvm {
HwMode(Record *R);
StringRef Name;
std::string Features;
+ std::string Predicates;
void dump() const;
};
@@ -44,7 +47,7 @@ namespace llvm {
static StringRef DefaultModeName;
CodeGenHwModes(RecordKeeper &R);
- unsigned getHwModeId(StringRef Name) const;
+ unsigned getHwModeId(Record *R) const;
const HwMode &getMode(unsigned Id) const {
assert(Id != 0 && "Mode id of 0 is reserved for the default mode");
return Modes[Id-1];
@@ -55,7 +58,7 @@ namespace llvm {
private:
RecordKeeper &Records;
- StringMap<unsigned> ModeIds; // HwMode (string) -> HwModeId
+ DenseMap<Record *, unsigned> ModeIds; // HwMode Record -> HwModeId
std::vector<HwMode> Modes;
std::map<Record*,HwModeSelect> ModeSelects;
};
diff --git a/llvm/utils/TableGen/CodeGenInstAlias.cpp b/llvm/utils/TableGen/CodeGenInstAlias.cpp
new file mode 100644
index 000000000000..8634d45eafc7
--- /dev/null
+++ b/llvm/utils/TableGen/CodeGenInstAlias.cpp
@@ -0,0 +1,283 @@
+//===- CodeGenInstAlias.cpp - CodeGen InstAlias Class Wrapper -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CodeGenInstAlias class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenInstAlias.h"
+#include "CodeGenInstruction.h"
+#include "CodeGenRegisters.h"
+#include "CodeGenTarget.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+
+using namespace llvm;
+
+/// tryAliasOpMatch - This is a helper function for the CodeGenInstAlias
+/// constructor. It checks if an argument in an InstAlias pattern matches
+/// the corresponding operand of the instruction. It returns true on a
+/// successful match, with ResOp set to the result operand to be used.
+bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
+ Record *InstOpRec, bool hasSubOps,
+ ArrayRef<SMLoc> Loc, CodeGenTarget &T,
+ ResultOperand &ResOp) {
+ Init *Arg = Result->getArg(AliasOpNo);
+ DefInit *ADI = dyn_cast<DefInit>(Arg);
+ Record *ResultRecord = ADI ? ADI->getDef() : nullptr;
+
+ if (ADI && ADI->getDef() == InstOpRec) {
+ // If the operand is a record, it must have a name, and the record type
+ // must match up with the instruction's argument type.
+ if (!Result->getArgName(AliasOpNo))
+ PrintFatalError(Loc, "result argument #" + Twine(AliasOpNo) +
+ " must have a name!");
+ ResOp = ResultOperand(std::string(Result->getArgNameStr(AliasOpNo)),
+ ResultRecord);
+ return true;
+ }
+
+ // For register operands, the source register class can be a subclass
+ // of the instruction register class, not just an exact match.
+ if (InstOpRec->isSubClassOf("RegisterOperand"))
+ InstOpRec = InstOpRec->getValueAsDef("RegClass");
+
+ if (ADI && ADI->getDef()->isSubClassOf("RegisterOperand"))
+ ADI = ADI->getDef()->getValueAsDef("RegClass")->getDefInit();
+
+ if (ADI && ADI->getDef()->isSubClassOf("RegisterClass")) {
+ if (!InstOpRec->isSubClassOf("RegisterClass"))
+ return false;
+ if (!T.getRegisterClass(InstOpRec).hasSubClass(
+ &T.getRegisterClass(ADI->getDef())))
+ return false;
+ ResOp = ResultOperand(std::string(Result->getArgNameStr(AliasOpNo)),
+ ResultRecord);
+ return true;
+ }
+
+ // Handle explicit registers.
+ if (ADI && ADI->getDef()->isSubClassOf("Register")) {
+ if (InstOpRec->isSubClassOf("OptionalDefOperand")) {
+ DagInit *DI = InstOpRec->getValueAsDag("MIOperandInfo");
+ // The operand info should only have a single (register) entry. We
+ // want the register class of it.
+ InstOpRec = cast<DefInit>(DI->getArg(0))->getDef();
+ }
+
+ if (!InstOpRec->isSubClassOf("RegisterClass"))
+ return false;
+
+ if (!T.getRegisterClass(InstOpRec).contains(
+ T.getRegBank().getReg(ADI->getDef())))
+ PrintFatalError(Loc, "fixed register " + ADI->getDef()->getName() +
+ " is not a member of the " +
+ InstOpRec->getName() + " register class!");
+
+ if (Result->getArgName(AliasOpNo))
+ PrintFatalError(Loc, "result fixed register argument must "
+ "not have a name!");
+
+ ResOp = ResultOperand(ResultRecord);
+ return true;
+ }
+
+ // Handle "zero_reg" for optional def operands.
+ if (ADI && ADI->getDef()->getName() == "zero_reg") {
+
+ // Check if this is an optional def.
+ // Tied operands where the source is a sub-operand of a complex operand
+ // need to represent both operands in the alias destination instruction.
+ // Allow zero_reg for the tied portion. This can and should go away once
+ // the MC representation of things doesn't use tied operands at all.
+ // if (!InstOpRec->isSubClassOf("OptionalDefOperand"))
+ // throw TGError(Loc, "reg0 used for result that is not an "
+ // "OptionalDefOperand!");
+
+ ResOp = ResultOperand(static_cast<Record *>(nullptr));
+ return true;
+ }
+
+ // Literal integers.
+ if (IntInit *II = dyn_cast<IntInit>(Arg)) {
+ if (hasSubOps || !InstOpRec->isSubClassOf("Operand"))
+ return false;
+ // Integer arguments can't have names.
+ if (Result->getArgName(AliasOpNo))
+ PrintFatalError(Loc, "result argument #" + Twine(AliasOpNo) +
+ " must not have a name!");
+ ResOp = ResultOperand(II->getValue());
+ return true;
+ }
+
+ // Bits<n> (also used for 0bxx literals)
+ if (BitsInit *BI = dyn_cast<BitsInit>(Arg)) {
+ if (hasSubOps || !InstOpRec->isSubClassOf("Operand"))
+ return false;
+ if (!BI->isComplete())
+ return false;
+ // Convert the bits init to an integer and use that for the result.
+ IntInit *II = dyn_cast_or_null<IntInit>(
+ BI->convertInitializerTo(IntRecTy::get(BI->getRecordKeeper())));
+ if (!II)
+ return false;
+ ResOp = ResultOperand(II->getValue());
+ return true;
+ }
+
+ // If both are Operands with the same MVT, allow the conversion. It's
+ // up to the user to make sure the values are appropriate, just like
+ // for isel Pat's.
+ if (InstOpRec->isSubClassOf("Operand") && ADI &&
+ ADI->getDef()->isSubClassOf("Operand")) {
+ // FIXME: What other attributes should we check here? Identical
+ // MIOperandInfo perhaps?
+ if (InstOpRec->getValueInit("Type") != ADI->getDef()->getValueInit("Type"))
+ return false;
+ ResOp = ResultOperand(std::string(Result->getArgNameStr(AliasOpNo)),
+ ADI->getDef());
+ return true;
+ }
+
+ return false;
+}
+
+unsigned CodeGenInstAlias::ResultOperand::getMINumOperands() const {
+ if (!isRecord())
+ return 1;
+
+ Record *Rec = getRecord();
+ if (!Rec->isSubClassOf("Operand"))
+ return 1;
+
+ DagInit *MIOpInfo = Rec->getValueAsDag("MIOperandInfo");
+ if (MIOpInfo->getNumArgs() == 0) {
+ // Unspecified, so it defaults to 1
+ return 1;
+ }
+
+ return MIOpInfo->getNumArgs();
+}
+
+CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T) : TheDef(R) {
+ Result = R->getValueAsDag("ResultInst");
+ AsmString = std::string(R->getValueAsString("AsmString"));
+
+ // Verify that the root of the result is an instruction.
+ DefInit *DI = dyn_cast<DefInit>(Result->getOperator());
+ if (!DI || !DI->getDef()->isSubClassOf("Instruction"))
+ PrintFatalError(R->getLoc(),
+ "result of inst alias should be an instruction");
+
+ ResultInst = &T.getInstruction(DI->getDef());
+
+ // NameClass - If argument names are repeated, we need to verify they have
+ // the same class.
+ StringMap<Record *> NameClass;
+ for (unsigned i = 0, e = Result->getNumArgs(); i != e; ++i) {
+ DefInit *ADI = dyn_cast<DefInit>(Result->getArg(i));
+ if (!ADI || !Result->getArgName(i))
+ continue;
+ // Verify we don't have something like: (someinst GR16:$foo, GR32:$foo)
+ // $foo can exist multiple times in the result list, but it must have the
+ // same type.
+ Record *&Entry = NameClass[Result->getArgNameStr(i)];
+ if (Entry && Entry != ADI->getDef())
+ PrintFatalError(R->getLoc(), "result value $" + Result->getArgNameStr(i) +
+ " is both " + Entry->getName() +
+ " and " + ADI->getDef()->getName() +
+ "!");
+ Entry = ADI->getDef();
+ }
+
+ // Decode and validate the arguments of the result.
+ unsigned AliasOpNo = 0;
+ for (unsigned i = 0, e = ResultInst->Operands.size(); i != e; ++i) {
+
+ // Tied registers don't have an entry in the result dag unless they're part
+ // of a complex operand, in which case we include them anyways, as we
+ // don't have any other way to specify the whole operand.
+ if (ResultInst->Operands[i].MINumOperands == 1 &&
+ ResultInst->Operands[i].getTiedRegister() != -1) {
+ // Tied operands of different RegisterClass should be explicit within an
+ // instruction's syntax and so cannot be skipped.
+ int TiedOpNum = ResultInst->Operands[i].getTiedRegister();
+ if (ResultInst->Operands[i].Rec->getName() ==
+ ResultInst->Operands[TiedOpNum].Rec->getName())
+ continue;
+ }
+
+ if (AliasOpNo >= Result->getNumArgs())
+ PrintFatalError(R->getLoc(), "not enough arguments for instruction!");
+
+ Record *InstOpRec = ResultInst->Operands[i].Rec;
+ unsigned NumSubOps = ResultInst->Operands[i].MINumOperands;
+ ResultOperand ResOp(static_cast<int64_t>(0));
+ if (tryAliasOpMatch(Result, AliasOpNo, InstOpRec, (NumSubOps > 1),
+ R->getLoc(), T, ResOp)) {
+ // If this is a simple operand, or a complex operand with a custom match
+ // class, then we can match is verbatim.
+ if (NumSubOps == 1 || (InstOpRec->getValue("ParserMatchClass") &&
+ InstOpRec->getValueAsDef("ParserMatchClass")
+ ->getValueAsString("Name") != "Imm")) {
+ ResultOperands.push_back(ResOp);
+ ResultInstOperandIndex.push_back(std::make_pair(i, -1));
+ ++AliasOpNo;
+
+ // Otherwise, we need to match each of the suboperands individually.
+ } else {
+ DagInit *MIOI = ResultInst->Operands[i].MIOperandInfo;
+ for (unsigned SubOp = 0; SubOp != NumSubOps; ++SubOp) {
+ Record *SubRec = cast<DefInit>(MIOI->getArg(SubOp))->getDef();
+
+ // Take care to instantiate each of the suboperands with the correct
+ // nomenclature: $foo.bar
+ ResultOperands.emplace_back(
+ Result->getArgName(AliasOpNo)->getAsUnquotedString() + "." +
+ MIOI->getArgName(SubOp)->getAsUnquotedString(),
+ SubRec);
+ ResultInstOperandIndex.push_back(std::make_pair(i, SubOp));
+ }
+ ++AliasOpNo;
+ }
+ continue;
+ }
+
+ // If the argument did not match the instruction operand, and the operand
+ // is composed of multiple suboperands, try matching the suboperands.
+ if (NumSubOps > 1) {
+ DagInit *MIOI = ResultInst->Operands[i].MIOperandInfo;
+ for (unsigned SubOp = 0; SubOp != NumSubOps; ++SubOp) {
+ if (AliasOpNo >= Result->getNumArgs())
+ PrintFatalError(R->getLoc(), "not enough arguments for instruction!");
+ Record *SubRec = cast<DefInit>(MIOI->getArg(SubOp))->getDef();
+ if (tryAliasOpMatch(Result, AliasOpNo, SubRec, false, R->getLoc(), T,
+ ResOp)) {
+ ResultOperands.push_back(ResOp);
+ ResultInstOperandIndex.push_back(std::make_pair(i, SubOp));
+ ++AliasOpNo;
+ } else {
+ PrintFatalError(
+ R->getLoc(),
+ "result argument #" + Twine(AliasOpNo) +
+ " does not match instruction operand class " +
+ (SubOp == 0 ? InstOpRec->getName() : SubRec->getName()));
+ }
+ }
+ continue;
+ }
+ PrintFatalError(R->getLoc(),
+ "result argument #" + Twine(AliasOpNo) +
+ " does not match instruction operand class " +
+ InstOpRec->getName());
+ }
+
+ if (AliasOpNo != Result->getNumArgs())
+ PrintFatalError(R->getLoc(), "too many operands for instruction!");
+}
diff --git a/llvm/utils/TableGen/CodeGenInstAlias.h b/llvm/utils/TableGen/CodeGenInstAlias.h
new file mode 100644
index 000000000000..2a05273e7270
--- /dev/null
+++ b/llvm/utils/TableGen/CodeGenInstAlias.h
@@ -0,0 +1,105 @@
+//===- CodeGenInstAlias.h - InstAlias Class Wrapper -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a wrapper class for the 'InstAlias' TableGen class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_CODEGENINSTALIAS_H
+#define LLVM_UTILS_TABLEGEN_CODEGENINSTALIAS_H
+
+#include "llvm/ADT/StringRef.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+template <typename T> class ArrayRef;
+class CodeGenInstruction;
+class CodeGenTarget;
+class DagInit;
+class SMLoc;
+class Record;
+
+/// CodeGenInstAlias - This represents an InstAlias definition.
+class CodeGenInstAlias {
+public:
+ Record *TheDef; // The actual record defining this InstAlias.
+
+ /// AsmString - The format string used to emit a .s file for the
+ /// instruction.
+ std::string AsmString;
+
+ /// Result - The result instruction.
+ DagInit *Result;
+
+ /// ResultInst - The instruction generated by the alias (decoded from
+ /// Result).
+ CodeGenInstruction *ResultInst;
+
+ struct ResultOperand {
+ private:
+ std::string Name;
+ Record *R = nullptr;
+ int64_t Imm = 0;
+
+ public:
+ enum { K_Record, K_Imm, K_Reg } Kind;
+
+ ResultOperand(std::string N, Record *r)
+ : Name(std::move(N)), R(r), Kind(K_Record) {}
+ ResultOperand(int64_t I) : Imm(I), Kind(K_Imm) {}
+ ResultOperand(Record *r) : R(r), Kind(K_Reg) {}
+
+ bool isRecord() const { return Kind == K_Record; }
+ bool isImm() const { return Kind == K_Imm; }
+ bool isReg() const { return Kind == K_Reg; }
+
+ StringRef getName() const {
+ assert(isRecord());
+ return Name;
+ }
+ Record *getRecord() const {
+ assert(isRecord());
+ return R;
+ }
+ int64_t getImm() const {
+ assert(isImm());
+ return Imm;
+ }
+ Record *getRegister() const {
+ assert(isReg());
+ return R;
+ }
+
+ unsigned getMINumOperands() const;
+ };
+
+ /// ResultOperands - The decoded operands for the result instruction.
+ std::vector<ResultOperand> ResultOperands;
+
+ /// ResultInstOperandIndex - For each operand, this vector holds a pair of
+ /// indices to identify the corresponding operand in the result
+ /// instruction. The first index specifies the operand and the second
+ /// index specifies the suboperand. If there are no suboperands or if all
+ /// of them are matched by the operand, the second value should be -1.
+ std::vector<std::pair<unsigned, int>> ResultInstOperandIndex;
+
+ CodeGenInstAlias(Record *R, CodeGenTarget &T);
+
+ bool tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo, Record *InstOpRec,
+ bool hasSubOps, ArrayRef<SMLoc> Loc, CodeGenTarget &T,
+ ResultOperand &ResOp);
+};
+
+} // namespace llvm
+
+#endif // LLVM_UTILS_TABLEGEN_CODEGENINSTALIAS_H
diff --git a/llvm/utils/TableGen/CodeGenInstruction.cpp b/llvm/utils/TableGen/CodeGenInstruction.cpp
index 238c6a1b6ba8..8662b6fb52da 100644
--- a/llvm/utils/TableGen/CodeGenInstruction.cpp
+++ b/llvm/utils/TableGen/CodeGenInstruction.cpp
@@ -13,7 +13,6 @@
#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringMap.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include <set>
@@ -176,7 +175,7 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
}
OpInfo.SubOpNames[j] = SubArgName;
- SubOpAliases[SubArgName] = std::make_pair(MIOperandNo, j);
+ SubOpAliases[SubArgName] = std::make_pair(i, j);
}
} else if (!EncoderMethod.empty()) {
// If we have no explicit sub-op dag, but have an top-level encoder
@@ -592,266 +591,3 @@ bool CodeGenInstruction::isOperandImpl(StringRef OpListName, unsigned i,
return Constraint->getDef()->isSubClassOf("TypedOperand") &&
Constraint->getDef()->getValueAsBit(PropertyName);
}
-
-//===----------------------------------------------------------------------===//
-/// CodeGenInstAlias Implementation
-//===----------------------------------------------------------------------===//
-
-/// tryAliasOpMatch - This is a helper function for the CodeGenInstAlias
-/// constructor. It checks if an argument in an InstAlias pattern matches
-/// the corresponding operand of the instruction. It returns true on a
-/// successful match, with ResOp set to the result operand to be used.
-bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
- Record *InstOpRec, bool hasSubOps,
- ArrayRef<SMLoc> Loc, CodeGenTarget &T,
- ResultOperand &ResOp) {
- Init *Arg = Result->getArg(AliasOpNo);
- DefInit *ADI = dyn_cast<DefInit>(Arg);
- Record *ResultRecord = ADI ? ADI->getDef() : nullptr;
-
- if (ADI && ADI->getDef() == InstOpRec) {
- // If the operand is a record, it must have a name, and the record type
- // must match up with the instruction's argument type.
- if (!Result->getArgName(AliasOpNo))
- PrintFatalError(Loc, "result argument #" + Twine(AliasOpNo) +
- " must have a name!");
- ResOp = ResultOperand(std::string(Result->getArgNameStr(AliasOpNo)),
- ResultRecord);
- return true;
- }
-
- // For register operands, the source register class can be a subclass
- // of the instruction register class, not just an exact match.
- if (InstOpRec->isSubClassOf("RegisterOperand"))
- InstOpRec = InstOpRec->getValueAsDef("RegClass");
-
- if (ADI && ADI->getDef()->isSubClassOf("RegisterOperand"))
- ADI = ADI->getDef()->getValueAsDef("RegClass")->getDefInit();
-
- if (ADI && ADI->getDef()->isSubClassOf("RegisterClass")) {
- if (!InstOpRec->isSubClassOf("RegisterClass"))
- return false;
- if (!T.getRegisterClass(InstOpRec)
- .hasSubClass(&T.getRegisterClass(ADI->getDef())))
- return false;
- ResOp = ResultOperand(std::string(Result->getArgNameStr(AliasOpNo)),
- ResultRecord);
- return true;
- }
-
- // Handle explicit registers.
- if (ADI && ADI->getDef()->isSubClassOf("Register")) {
- if (InstOpRec->isSubClassOf("OptionalDefOperand")) {
- DagInit *DI = InstOpRec->getValueAsDag("MIOperandInfo");
- // The operand info should only have a single (register) entry. We
- // want the register class of it.
- InstOpRec = cast<DefInit>(DI->getArg(0))->getDef();
- }
-
- if (!InstOpRec->isSubClassOf("RegisterClass"))
- return false;
-
- if (!T.getRegisterClass(InstOpRec)
- .contains(T.getRegBank().getReg(ADI->getDef())))
- PrintFatalError(Loc, "fixed register " + ADI->getDef()->getName() +
- " is not a member of the " + InstOpRec->getName() +
- " register class!");
-
- if (Result->getArgName(AliasOpNo))
- PrintFatalError(Loc, "result fixed register argument must "
- "not have a name!");
-
- ResOp = ResultOperand(ResultRecord);
- return true;
- }
-
- // Handle "zero_reg" for optional def operands.
- if (ADI && ADI->getDef()->getName() == "zero_reg") {
-
- // Check if this is an optional def.
- // Tied operands where the source is a sub-operand of a complex operand
- // need to represent both operands in the alias destination instruction.
- // Allow zero_reg for the tied portion. This can and should go away once
- // the MC representation of things doesn't use tied operands at all.
- //if (!InstOpRec->isSubClassOf("OptionalDefOperand"))
- // throw TGError(Loc, "reg0 used for result that is not an "
- // "OptionalDefOperand!");
-
- ResOp = ResultOperand(static_cast<Record*>(nullptr));
- return true;
- }
-
- // Literal integers.
- if (IntInit *II = dyn_cast<IntInit>(Arg)) {
- if (hasSubOps || !InstOpRec->isSubClassOf("Operand"))
- return false;
- // Integer arguments can't have names.
- if (Result->getArgName(AliasOpNo))
- PrintFatalError(Loc, "result argument #" + Twine(AliasOpNo) +
- " must not have a name!");
- ResOp = ResultOperand(II->getValue());
- return true;
- }
-
- // Bits<n> (also used for 0bxx literals)
- if (BitsInit *BI = dyn_cast<BitsInit>(Arg)) {
- if (hasSubOps || !InstOpRec->isSubClassOf("Operand"))
- return false;
- if (!BI->isComplete())
- return false;
- // Convert the bits init to an integer and use that for the result.
- IntInit *II = dyn_cast_or_null<IntInit>(
- BI->convertInitializerTo(IntRecTy::get(BI->getRecordKeeper())));
- if (!II)
- return false;
- ResOp = ResultOperand(II->getValue());
- return true;
- }
-
- // If both are Operands with the same MVT, allow the conversion. It's
- // up to the user to make sure the values are appropriate, just like
- // for isel Pat's.
- if (InstOpRec->isSubClassOf("Operand") && ADI &&
- ADI->getDef()->isSubClassOf("Operand")) {
- // FIXME: What other attributes should we check here? Identical
- // MIOperandInfo perhaps?
- if (InstOpRec->getValueInit("Type") != ADI->getDef()->getValueInit("Type"))
- return false;
- ResOp = ResultOperand(std::string(Result->getArgNameStr(AliasOpNo)),
- ADI->getDef());
- return true;
- }
-
- return false;
-}
-
-unsigned CodeGenInstAlias::ResultOperand::getMINumOperands() const {
- if (!isRecord())
- return 1;
-
- Record *Rec = getRecord();
- if (!Rec->isSubClassOf("Operand"))
- return 1;
-
- DagInit *MIOpInfo = Rec->getValueAsDag("MIOperandInfo");
- if (MIOpInfo->getNumArgs() == 0) {
- // Unspecified, so it defaults to 1
- return 1;
- }
-
- return MIOpInfo->getNumArgs();
-}
-
-CodeGenInstAlias::CodeGenInstAlias(Record *R, CodeGenTarget &T)
- : TheDef(R) {
- Result = R->getValueAsDag("ResultInst");
- AsmString = std::string(R->getValueAsString("AsmString"));
-
- // Verify that the root of the result is an instruction.
- DefInit *DI = dyn_cast<DefInit>(Result->getOperator());
- if (!DI || !DI->getDef()->isSubClassOf("Instruction"))
- PrintFatalError(R->getLoc(),
- "result of inst alias should be an instruction");
-
- ResultInst = &T.getInstruction(DI->getDef());
-
- // NameClass - If argument names are repeated, we need to verify they have
- // the same class.
- StringMap<Record*> NameClass;
- for (unsigned i = 0, e = Result->getNumArgs(); i != e; ++i) {
- DefInit *ADI = dyn_cast<DefInit>(Result->getArg(i));
- if (!ADI || !Result->getArgName(i))
- continue;
- // Verify we don't have something like: (someinst GR16:$foo, GR32:$foo)
- // $foo can exist multiple times in the result list, but it must have the
- // same type.
- Record *&Entry = NameClass[Result->getArgNameStr(i)];
- if (Entry && Entry != ADI->getDef())
- PrintFatalError(R->getLoc(), "result value $" + Result->getArgNameStr(i) +
- " is both " + Entry->getName() + " and " +
- ADI->getDef()->getName() + "!");
- Entry = ADI->getDef();
- }
-
- // Decode and validate the arguments of the result.
- unsigned AliasOpNo = 0;
- for (unsigned i = 0, e = ResultInst->Operands.size(); i != e; ++i) {
-
- // Tied registers don't have an entry in the result dag unless they're part
- // of a complex operand, in which case we include them anyways, as we
- // don't have any other way to specify the whole operand.
- if (ResultInst->Operands[i].MINumOperands == 1 &&
- ResultInst->Operands[i].getTiedRegister() != -1) {
- // Tied operands of different RegisterClass should be explicit within an
- // instruction's syntax and so cannot be skipped.
- int TiedOpNum = ResultInst->Operands[i].getTiedRegister();
- if (ResultInst->Operands[i].Rec->getName() ==
- ResultInst->Operands[TiedOpNum].Rec->getName())
- continue;
- }
-
- if (AliasOpNo >= Result->getNumArgs())
- PrintFatalError(R->getLoc(), "not enough arguments for instruction!");
-
- Record *InstOpRec = ResultInst->Operands[i].Rec;
- unsigned NumSubOps = ResultInst->Operands[i].MINumOperands;
- ResultOperand ResOp(static_cast<int64_t>(0));
- if (tryAliasOpMatch(Result, AliasOpNo, InstOpRec, (NumSubOps > 1),
- R->getLoc(), T, ResOp)) {
- // If this is a simple operand, or a complex operand with a custom match
- // class, then we can match is verbatim.
- if (NumSubOps == 1 ||
- (InstOpRec->getValue("ParserMatchClass") &&
- InstOpRec->getValueAsDef("ParserMatchClass")
- ->getValueAsString("Name") != "Imm")) {
- ResultOperands.push_back(ResOp);
- ResultInstOperandIndex.push_back(std::make_pair(i, -1));
- ++AliasOpNo;
-
- // Otherwise, we need to match each of the suboperands individually.
- } else {
- DagInit *MIOI = ResultInst->Operands[i].MIOperandInfo;
- for (unsigned SubOp = 0; SubOp != NumSubOps; ++SubOp) {
- Record *SubRec = cast<DefInit>(MIOI->getArg(SubOp))->getDef();
-
- // Take care to instantiate each of the suboperands with the correct
- // nomenclature: $foo.bar
- ResultOperands.emplace_back(
- Result->getArgName(AliasOpNo)->getAsUnquotedString() + "." +
- MIOI->getArgName(SubOp)->getAsUnquotedString(), SubRec);
- ResultInstOperandIndex.push_back(std::make_pair(i, SubOp));
- }
- ++AliasOpNo;
- }
- continue;
- }
-
- // If the argument did not match the instruction operand, and the operand
- // is composed of multiple suboperands, try matching the suboperands.
- if (NumSubOps > 1) {
- DagInit *MIOI = ResultInst->Operands[i].MIOperandInfo;
- for (unsigned SubOp = 0; SubOp != NumSubOps; ++SubOp) {
- if (AliasOpNo >= Result->getNumArgs())
- PrintFatalError(R->getLoc(), "not enough arguments for instruction!");
- Record *SubRec = cast<DefInit>(MIOI->getArg(SubOp))->getDef();
- if (tryAliasOpMatch(Result, AliasOpNo, SubRec, false,
- R->getLoc(), T, ResOp)) {
- ResultOperands.push_back(ResOp);
- ResultInstOperandIndex.push_back(std::make_pair(i, SubOp));
- ++AliasOpNo;
- } else {
- PrintFatalError(R->getLoc(), "result argument #" + Twine(AliasOpNo) +
- " does not match instruction operand class " +
- (SubOp == 0 ? InstOpRec->getName() :SubRec->getName()));
- }
- }
- continue;
- }
- PrintFatalError(R->getLoc(), "result argument #" + Twine(AliasOpNo) +
- " does not match instruction operand class " +
- InstOpRec->getName());
- }
-
- if (AliasOpNo != Result->getNumArgs())
- PrintFatalError(R->getLoc(), "too many operands for instruction!");
-}
diff --git a/llvm/utils/TableGen/CodeGenInstruction.h b/llvm/utils/TableGen/CodeGenInstruction.h
index 72626caada56..ee7a1696bab9 100644
--- a/llvm/utils/TableGen/CodeGenInstruction.h
+++ b/llvm/utils/TableGen/CodeGenInstruction.h
@@ -16,15 +16,13 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/MachineValueType.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include <cassert>
#include <string>
#include <utility>
#include <vector>
namespace llvm {
-class SMLoc;
-template <typename T> class ArrayRef;
class Record;
class DagInit;
class CodeGenTarget;
@@ -340,71 +338,6 @@ template <typename T> class ArrayRef;
bool isOperandImpl(StringRef OpListName, unsigned i,
StringRef PropertyName) const;
};
-
-
- /// CodeGenInstAlias - This represents an InstAlias definition.
- class CodeGenInstAlias {
- public:
- Record *TheDef; // The actual record defining this InstAlias.
-
- /// AsmString - The format string used to emit a .s file for the
- /// instruction.
- std::string AsmString;
-
- /// Result - The result instruction.
- DagInit *Result;
-
- /// ResultInst - The instruction generated by the alias (decoded from
- /// Result).
- CodeGenInstruction *ResultInst;
-
-
- struct ResultOperand {
- private:
- std::string Name;
- Record *R = nullptr;
- int64_t Imm = 0;
-
- public:
- enum {
- K_Record,
- K_Imm,
- K_Reg
- } Kind;
-
- ResultOperand(std::string N, Record *r)
- : Name(std::move(N)), R(r), Kind(K_Record) {}
- ResultOperand(int64_t I) : Imm(I), Kind(K_Imm) {}
- ResultOperand(Record *r) : R(r), Kind(K_Reg) {}
-
- bool isRecord() const { return Kind == K_Record; }
- bool isImm() const { return Kind == K_Imm; }
- bool isReg() const { return Kind == K_Reg; }
-
- StringRef getName() const { assert(isRecord()); return Name; }
- Record *getRecord() const { assert(isRecord()); return R; }
- int64_t getImm() const { assert(isImm()); return Imm; }
- Record *getRegister() const { assert(isReg()); return R; }
-
- unsigned getMINumOperands() const;
- };
-
- /// ResultOperands - The decoded operands for the result instruction.
- std::vector<ResultOperand> ResultOperands;
-
- /// ResultInstOperandIndex - For each operand, this vector holds a pair of
- /// indices to identify the corresponding operand in the result
- /// instruction. The first index specifies the operand and the second
- /// index specifies the suboperand. If there are no suboperands or if all
- /// of them are matched by the operand, the second value should be -1.
- std::vector<std::pair<unsigned, int> > ResultInstOperandIndex;
-
- CodeGenInstAlias(Record *R, CodeGenTarget &T);
-
- bool tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
- Record *InstOpRec, bool hasSubOps, ArrayRef<SMLoc> Loc,
- CodeGenTarget &T, ResultOperand &ResOp);
- };
-}
+} // namespace llvm
#endif
diff --git a/llvm/utils/TableGen/CodeGenIntrinsics.cpp b/llvm/utils/TableGen/CodeGenIntrinsics.cpp
new file mode 100644
index 000000000000..7cb86ad95266
--- /dev/null
+++ b/llvm/utils/TableGen/CodeGenIntrinsics.cpp
@@ -0,0 +1,270 @@
+//===- CodeGenIntrinsics.cpp - Intrinsic Class Wrapper --------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a wrapper class for the 'Intrinsic' TableGen class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenIntrinsics.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include <algorithm>
+#include <cassert>
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// CodeGenIntrinsic Implementation
+//===----------------------------------------------------------------------===//
+
+CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC) {
+ std::vector<Record *> IntrProperties =
+ RC.getAllDerivedDefinitions("IntrinsicProperty");
+
+ std::vector<Record *> DefaultProperties;
+ for (Record *Rec : IntrProperties)
+ if (Rec->getValueAsBit("IsDefault"))
+ DefaultProperties.push_back(Rec);
+
+ std::vector<Record *> Defs = RC.getAllDerivedDefinitions("Intrinsic");
+ Intrinsics.reserve(Defs.size());
+
+ for (unsigned I = 0, e = Defs.size(); I != e; ++I)
+ Intrinsics.push_back(CodeGenIntrinsic(Defs[I], DefaultProperties));
+
+ llvm::sort(Intrinsics,
+ [](const CodeGenIntrinsic &LHS, const CodeGenIntrinsic &RHS) {
+ return std::tie(LHS.TargetPrefix, LHS.Name) <
+ std::tie(RHS.TargetPrefix, RHS.Name);
+ });
+ Targets.push_back({"", 0, 0});
+ for (size_t I = 0, E = Intrinsics.size(); I < E; ++I)
+ if (Intrinsics[I].TargetPrefix != Targets.back().Name) {
+ Targets.back().Count = I - Targets.back().Offset;
+ Targets.push_back({Intrinsics[I].TargetPrefix, I, 0});
+ }
+ Targets.back().Count = Intrinsics.size() - Targets.back().Offset;
+}
+
+CodeGenIntrinsic::CodeGenIntrinsic(Record *R,
+ std::vector<Record *> DefaultProperties) {
+ TheDef = R;
+ std::string DefName = std::string(R->getName());
+ ArrayRef<SMLoc> DefLoc = R->getLoc();
+ Properties = 0;
+ isOverloaded = false;
+ isCommutative = false;
+ canThrow = false;
+ isNoReturn = false;
+ isNoCallback = false;
+ isNoSync = false;
+ isNoFree = false;
+ isWillReturn = false;
+ isCold = false;
+ isNoDuplicate = false;
+ isNoMerge = false;
+ isConvergent = false;
+ isSpeculatable = false;
+ hasSideEffects = false;
+ isStrictFP = false;
+
+ if (DefName.size() <= 4 || DefName.substr(0, 4) != "int_")
+ PrintFatalError(DefLoc,
+ "Intrinsic '" + DefName + "' does not start with 'int_'!");
+
+ EnumName = DefName.substr(4);
+
+ if (R->getValue(
+ "ClangBuiltinName")) // Ignore a missing ClangBuiltinName field.
+ ClangBuiltinName = std::string(R->getValueAsString("ClangBuiltinName"));
+ if (R->getValue("MSBuiltinName")) // Ignore a missing MSBuiltinName field.
+ MSBuiltinName = std::string(R->getValueAsString("MSBuiltinName"));
+
+ TargetPrefix = std::string(R->getValueAsString("TargetPrefix"));
+ Name = std::string(R->getValueAsString("LLVMName"));
+
+ if (Name == "") {
+ // If an explicit name isn't specified, derive one from the DefName.
+ Name = "llvm.";
+
+ for (unsigned i = 0, e = EnumName.size(); i != e; ++i)
+ Name += (EnumName[i] == '_') ? '.' : EnumName[i];
+ } else {
+ // Verify it starts with "llvm.".
+ if (Name.size() <= 5 || Name.substr(0, 5) != "llvm.")
+ PrintFatalError(DefLoc, "Intrinsic '" + DefName +
+ "'s name does not start with 'llvm.'!");
+ }
+
+ // If TargetPrefix is specified, make sure that Name starts with
+ // "llvm.<targetprefix>.".
+ if (!TargetPrefix.empty()) {
+ if (Name.size() < 6 + TargetPrefix.size() ||
+ Name.substr(5, 1 + TargetPrefix.size()) != (TargetPrefix + "."))
+ PrintFatalError(DefLoc, "Intrinsic '" + DefName +
+ "' does not start with 'llvm." +
+ TargetPrefix + ".'!");
+ }
+
+ if (auto *Types = R->getValue("Types")) {
+ auto *TypeList = cast<ListInit>(Types->getValue());
+ isOverloaded = R->getValueAsBit("isOverloaded");
+
+ unsigned I = 0;
+ for (unsigned E = R->getValueAsListInit("RetTypes")->size(); I < E; ++I)
+ IS.RetTys.push_back(TypeList->getElementAsRecord(I));
+
+ for (unsigned E = TypeList->size(); I < E; ++I)
+ IS.ParamTys.push_back(TypeList->getElementAsRecord(I));
+ }
+
+ // Parse the intrinsic properties.
+ ListInit *PropList = R->getValueAsListInit("IntrProperties");
+ for (unsigned i = 0, e = PropList->size(); i != e; ++i) {
+ Record *Property = PropList->getElementAsRecord(i);
+ assert(Property->isSubClassOf("IntrinsicProperty") &&
+ "Expected a property!");
+
+ setProperty(Property);
+ }
+
+ // Set default properties to true.
+ setDefaultProperties(R, DefaultProperties);
+
+ // Also record the SDPatternOperator Properties.
+ Properties = parseSDPatternOperatorProperties(R);
+
+ // Sort the argument attributes for later benefit.
+ for (auto &Attrs : ArgumentAttributes)
+ llvm::sort(Attrs);
+}
+
+void CodeGenIntrinsic::setDefaultProperties(
+ Record *R, std::vector<Record *> DefaultProperties) {
+ // opt-out of using default attributes.
+ if (R->getValueAsBit("DisableDefaultAttributes"))
+ return;
+
+ for (Record *Rec : DefaultProperties)
+ setProperty(Rec);
+}
+
+void CodeGenIntrinsic::setProperty(Record *R) {
+ if (R->getName() == "IntrNoMem")
+ ME = MemoryEffects::none();
+ else if (R->getName() == "IntrReadMem") {
+ if (ME.onlyWritesMemory())
+ PrintFatalError(TheDef->getLoc(),
+ Twine("IntrReadMem cannot be used after IntrNoMem or "
+ "IntrWriteMem. Default is ReadWrite"));
+ ME &= MemoryEffects::readOnly();
+ } else if (R->getName() == "IntrWriteMem") {
+ if (ME.onlyReadsMemory())
+ PrintFatalError(TheDef->getLoc(),
+ Twine("IntrWriteMem cannot be used after IntrNoMem or "
+ "IntrReadMem. Default is ReadWrite"));
+ ME &= MemoryEffects::writeOnly();
+ } else if (R->getName() == "IntrArgMemOnly")
+ ME &= MemoryEffects::argMemOnly();
+ else if (R->getName() == "IntrInaccessibleMemOnly")
+ ME &= MemoryEffects::inaccessibleMemOnly();
+ else if (R->getName() == "IntrInaccessibleMemOrArgMemOnly")
+ ME &= MemoryEffects::inaccessibleOrArgMemOnly();
+ else if (R->getName() == "Commutative")
+ isCommutative = true;
+ else if (R->getName() == "Throws")
+ canThrow = true;
+ else if (R->getName() == "IntrNoDuplicate")
+ isNoDuplicate = true;
+ else if (R->getName() == "IntrNoMerge")
+ isNoMerge = true;
+ else if (R->getName() == "IntrConvergent")
+ isConvergent = true;
+ else if (R->getName() == "IntrNoReturn")
+ isNoReturn = true;
+ else if (R->getName() == "IntrNoCallback")
+ isNoCallback = true;
+ else if (R->getName() == "IntrNoSync")
+ isNoSync = true;
+ else if (R->getName() == "IntrNoFree")
+ isNoFree = true;
+ else if (R->getName() == "IntrWillReturn")
+ isWillReturn = !isNoReturn;
+ else if (R->getName() == "IntrCold")
+ isCold = true;
+ else if (R->getName() == "IntrSpeculatable")
+ isSpeculatable = true;
+ else if (R->getName() == "IntrHasSideEffects")
+ hasSideEffects = true;
+ else if (R->getName() == "IntrStrictFP")
+ isStrictFP = true;
+ else if (R->isSubClassOf("NoCapture")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ addArgAttribute(ArgNo, NoCapture);
+ } else if (R->isSubClassOf("NoAlias")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ addArgAttribute(ArgNo, NoAlias);
+ } else if (R->isSubClassOf("NoUndef")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ addArgAttribute(ArgNo, NoUndef);
+ } else if (R->isSubClassOf("NonNull")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ addArgAttribute(ArgNo, NonNull);
+ } else if (R->isSubClassOf("Returned")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ addArgAttribute(ArgNo, Returned);
+ } else if (R->isSubClassOf("ReadOnly")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ addArgAttribute(ArgNo, ReadOnly);
+ } else if (R->isSubClassOf("WriteOnly")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ addArgAttribute(ArgNo, WriteOnly);
+ } else if (R->isSubClassOf("ReadNone")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ addArgAttribute(ArgNo, ReadNone);
+ } else if (R->isSubClassOf("ImmArg")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ addArgAttribute(ArgNo, ImmArg);
+ } else if (R->isSubClassOf("Align")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ uint64_t Align = R->getValueAsInt("Align");
+ addArgAttribute(ArgNo, Alignment, Align);
+ } else if (R->isSubClassOf("Dereferenceable")) {
+ unsigned ArgNo = R->getValueAsInt("ArgNo");
+ uint64_t Bytes = R->getValueAsInt("Bytes");
+ addArgAttribute(ArgNo, Dereferenceable, Bytes);
+ } else
+ llvm_unreachable("Unknown property!");
+}
+
+bool CodeGenIntrinsic::isParamAPointer(unsigned ParamIdx) const {
+ if (ParamIdx >= IS.ParamTys.size())
+ return false;
+ return (IS.ParamTys[ParamIdx]->isSubClassOf("LLVMQualPointerType") ||
+ IS.ParamTys[ParamIdx]->isSubClassOf("LLVMAnyPointerType"));
+}
+
+bool CodeGenIntrinsic::isParamImmArg(unsigned ParamIdx) const {
+ // Convert argument index to attribute index starting from `FirstArgIndex`.
+ ++ParamIdx;
+ if (ParamIdx >= ArgumentAttributes.size())
+ return false;
+ ArgAttribute Val{ImmArg, 0};
+ return std::binary_search(ArgumentAttributes[ParamIdx].begin(),
+ ArgumentAttributes[ParamIdx].end(), Val);
+}
+
+void CodeGenIntrinsic::addArgAttribute(unsigned Idx, ArgAttrKind AK,
+ uint64_t V) {
+ if (Idx >= ArgumentAttributes.size())
+ ArgumentAttributes.resize(Idx + 1);
+ ArgumentAttributes[Idx].emplace_back(AK, V);
+}
diff --git a/llvm/utils/TableGen/CodeGenIntrinsics.h b/llvm/utils/TableGen/CodeGenIntrinsics.h
index 0558918b3028..f3452f5acea8 100644
--- a/llvm/utils/TableGen/CodeGenIntrinsics.h
+++ b/llvm/utils/TableGen/CodeGenIntrinsics.h
@@ -1,4 +1,4 @@
-//===- CodeGenIntrinsic.h - Intrinsic Class Wrapper ------------*- C++ -*--===//
+//===- CodeGenIntrinsics.h - Intrinsic Class Wrapper -----------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -15,9 +15,9 @@
#include "SDNodeProperties.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/ModRef.h"
#include <string>
+#include <tuple>
#include <vector>
namespace llvm {
@@ -42,19 +42,13 @@ struct CodeGenIntrinsic {
/// only populated when in the context of a target .td file. When building
/// Intrinsics.td, this isn't available, because we don't know the target
/// pointer size.
- std::vector<MVT::SimpleValueType> RetVTs;
-
- /// The records for each return type.
- std::vector<Record *> RetTypeDefs;
+ std::vector<Record *> RetTys;
/// The MVT::SimpleValueType for each parameter type. Note that this list is
/// only populated when in the context of a target .td file. When building
/// Intrinsics.td, this isn't available, because we don't know the target
/// pointer size.
- std::vector<MVT::SimpleValueType> ParamVTs;
-
- /// The records for each parameter type.
- std::vector<Record *> ParamTypeDefs;
+ std::vector<Record *> ParamTys;
};
IntrinsicSignature IS;
@@ -109,6 +103,9 @@ struct CodeGenIntrinsic {
// True if the intrinsic is marked as speculatable.
bool isSpeculatable;
+ // True if the intrinsic is marked as strictfp.
+ bool isStrictFP;
+
enum ArgAttrKind {
NoCapture,
NoAlias,
@@ -119,7 +116,8 @@ struct CodeGenIntrinsic {
WriteOnly,
ReadNone,
ImmArg,
- Alignment
+ Alignment,
+ Dereferenceable
};
struct ArgAttribute {
diff --git a/llvm/utils/TableGen/CodeGenMapTable.cpp b/llvm/utils/TableGen/CodeGenMapTable.cpp
index 02695942f5c1..fd375735dfd2 100644
--- a/llvm/utils/TableGen/CodeGenMapTable.cpp
+++ b/llvm/utils/TableGen/CodeGenMapTable.cpp
@@ -78,6 +78,7 @@
#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
using namespace llvm;
typedef std::map<std::string, std::vector<Record*> > InstrRelMapTy;
diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp
index 8ad8a7a5bc9b..5c45290a0657 100644
--- a/llvm/utils/TableGen/CodeGenRegisters.cpp
+++ b/llvm/utils/TableGen/CodeGenRegisters.cpp
@@ -872,7 +872,7 @@ bool CodeGenRegisterClass::hasType(const ValueTypeByHwMode &VT) const {
// If VT is not identical to any of this class's types, but is a simple
// type, check if any of the types for this class contain it under some
// mode.
- // The motivating example came from RISCV, where (likely because of being
+ // The motivating example came from RISC-V, where (likely because of being
// guarded by "64-bit" predicate), the type of X5 was {*:[i64]}, but the
// type in GRC was {*:[i32], m1:[i64]}.
if (VT.isSimple()) {
@@ -1659,8 +1659,8 @@ static void computeUberSets(std::vector<UberRegSet> &UberSets,
"register enum value mismatch");
// For simplicitly make the SetID the same as EnumValue.
- IntEqClasses UberSetIDs(Registers.size()+1);
- std::set<unsigned> AllocatableRegs;
+ IntEqClasses UberSetIDs(Registers.size() + 1);
+ BitVector AllocatableRegs(Registers.size() + 1);
for (auto &RegClass : RegBank.getRegClasses()) {
if (!RegClass.Allocatable)
continue;
@@ -1672,16 +1672,16 @@ static void computeUberSets(std::vector<UberRegSet> &UberSets,
unsigned USetID = UberSetIDs.findLeader((*Regs.begin())->EnumValue);
assert(USetID && "register number 0 is invalid");
- AllocatableRegs.insert((*Regs.begin())->EnumValue);
+ AllocatableRegs.set((*Regs.begin())->EnumValue);
for (const CodeGenRegister *CGR : llvm::drop_begin(Regs)) {
- AllocatableRegs.insert(CGR->EnumValue);
+ AllocatableRegs.set(CGR->EnumValue);
UberSetIDs.join(USetID, CGR->EnumValue);
}
}
// Combine non-allocatable regs.
for (const auto &Reg : Registers) {
unsigned RegNum = Reg.EnumValue;
- if (AllocatableRegs.count(RegNum))
+ if (AllocatableRegs.test(RegNum))
continue;
UberSetIDs.join(0, RegNum);
@@ -1704,7 +1704,6 @@ static void computeUberSets(std::vector<UberRegSet> &UberSets,
UberRegSet *USet = &UberSets[USetID];
USet->Regs.push_back(&Reg);
- sortAndUniqueRegisters(USet->Regs);
RegSets[i++] = USet;
}
}
diff --git a/llvm/utils/TableGen/CodeGenRegisters.h b/llvm/utils/TableGen/CodeGenRegisters.h
index 765425ed68cb..15f08d1431f9 100644
--- a/llvm/utils/TableGen/CodeGenRegisters.h
+++ b/llvm/utils/TableGen/CodeGenRegisters.h
@@ -14,6 +14,7 @@
#ifndef LLVM_UTILS_TABLEGEN_CODEGENREGISTERS_H
#define LLVM_UTILS_TABLEGEN_CODEGENREGISTERS_H
+#include "CodeGenHwModes.h"
#include "InfoByHwMode.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
@@ -32,8 +33,11 @@
#include <cassert>
#include <cstdint>
#include <deque>
+#include <functional>
#include <list>
#include <map>
+#include <memory>
+#include <optional>
#include <string>
#include <utility>
#include <vector>
@@ -41,7 +45,6 @@
namespace llvm {
class CodeGenRegBank;
- template <typename T, typename Vector, typename Set> class SetVector;
/// Used to encode a step in a register lane mask transformation.
/// Mask the bits specified in Mask, then rotate them Rol bits to the left
@@ -147,14 +150,15 @@ namespace llvm {
};
/// CodeGenRegister - Represents a register definition.
- struct CodeGenRegister {
+ class CodeGenRegister {
+ public:
Record *TheDef;
unsigned EnumValue;
std::vector<int64_t> CostPerUse;
- bool CoveredBySubRegs;
- bool HasDisjunctSubRegs;
- bool Artificial;
- bool Constant;
+ bool CoveredBySubRegs = true;
+ bool HasDisjunctSubRegs = false;
+ bool Artificial = true;
+ bool Constant = false;
// Map SubRegIndex -> Register.
typedef std::map<CodeGenSubRegIndex *, CodeGenRegister *,
diff --git a/llvm/utils/TableGen/CodeGenSchedule.cpp b/llvm/utils/TableGen/CodeGenSchedule.cpp
index 441a088c1731..04219a6e54d9 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -298,12 +298,12 @@ processSTIPredicate(STIPredicateFunction &Fn,
RecVec Classes = Def->getValueAsListOfDefs("Classes");
for (const Record *EC : Classes) {
const Record *Pred = EC->getValueAsDef("Predicate");
- if (Predicate2Index.find(Pred) == Predicate2Index.end())
+ if (!Predicate2Index.contains(Pred))
Predicate2Index[Pred] = NumUniquePredicates++;
RecVec Opcodes = EC->getValueAsListOfDefs("Opcodes");
for (const Record *Opcode : Opcodes) {
- if (Opcode2Index.find(Opcode) == Opcode2Index.end()) {
+ if (!Opcode2Index.contains(Opcode)) {
Opcode2Index[Opcode] = OpcodeMappings.size();
OpcodeMappings.emplace_back(Opcode, OpcodeInfo());
}
@@ -370,11 +370,11 @@ processSTIPredicate(STIPredicateFunction &Fn,
const std::pair<APInt, APInt> &RhsMasks = OpcodeMasks[RhsIdx];
auto LessThan = [](const APInt &Lhs, const APInt &Rhs) {
- unsigned LhsCountPopulation = Lhs.countPopulation();
- unsigned RhsCountPopulation = Rhs.countPopulation();
+ unsigned LhsCountPopulation = Lhs.popcount();
+ unsigned RhsCountPopulation = Rhs.popcount();
return ((LhsCountPopulation < RhsCountPopulation) ||
((LhsCountPopulation == RhsCountPopulation) &&
- (Lhs.countLeadingZeros() > Rhs.countLeadingZeros())));
+ (Lhs.countl_zero() > Rhs.countl_zero())));
};
if (LhsMasks.first != RhsMasks.first)
diff --git a/llvm/utils/TableGen/CodeGenSchedule.h b/llvm/utils/TableGen/CodeGenSchedule.h
index bbf5381ad086..76ef1e439530 100644
--- a/llvm/utils/TableGen/CodeGenSchedule.h
+++ b/llvm/utils/TableGen/CodeGenSchedule.h
@@ -15,10 +15,17 @@
#define LLVM_UTILS_TABLEGEN_CODEGENSCHEDULE_H
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/SetTheory.h"
+#include <cassert>
+#include <string>
+#include <utility>
+#include <vector>
namespace llvm {
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index b7240f01300c..fbdc0499a8cf 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -15,13 +15,17 @@
#include "CodeGenTarget.h"
#include "CodeGenInstruction.h"
-#include "CodeGenIntrinsics.h"
+#include "CodeGenRegisters.h"
#include "CodeGenSchedule.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include <algorithm>
+#include <iterator>
+#include <tuple>
using namespace llvm;
cl::OptionCategory AsmParserCat("Options for -gen-asm-parser");
@@ -77,6 +81,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::ppcf128: return "MVT::ppcf128";
case MVT::x86mmx: return "MVT::x86mmx";
case MVT::x86amx: return "MVT::x86amx";
+ case MVT::aarch64svcount: return "MVT::aarch64svcount";
case MVT::i64x8: return "MVT::i64x8";
case MVT::Glue: return "MVT::Glue";
case MVT::isVoid: return "MVT::isVoid";
@@ -427,6 +432,10 @@ const CodeGenRegister *CodeGenTarget::getRegisterByName(StringRef Name) const {
return getRegBank().getRegistersByName().lookup(Name);
}
+const CodeGenRegisterClass &CodeGenTarget::getRegisterClass(Record *R) const {
+ return *getRegBank().getRegClass(R);
+}
+
std::vector<ValueTypeByHwMode> CodeGenTarget::getRegisterVTs(Record *R)
const {
const CodeGenRegister *Reg = getRegBank().getReg(R);
@@ -635,318 +644,3 @@ ComplexPattern::ComplexPattern(Record *R) {
"'!");
}
}
-
-//===----------------------------------------------------------------------===//
-// CodeGenIntrinsic Implementation
-//===----------------------------------------------------------------------===//
-
-CodeGenIntrinsicTable::CodeGenIntrinsicTable(const RecordKeeper &RC) {
- std::vector<Record *> IntrProperties =
- RC.getAllDerivedDefinitions("IntrinsicProperty");
-
- std::vector<Record *> DefaultProperties;
- for (Record *Rec : IntrProperties)
- if (Rec->getValueAsBit("IsDefault"))
- DefaultProperties.push_back(Rec);
-
- std::vector<Record *> Defs = RC.getAllDerivedDefinitions("Intrinsic");
- Intrinsics.reserve(Defs.size());
-
- for (unsigned I = 0, e = Defs.size(); I != e; ++I)
- Intrinsics.push_back(CodeGenIntrinsic(Defs[I], DefaultProperties));
-
- llvm::sort(Intrinsics,
- [](const CodeGenIntrinsic &LHS, const CodeGenIntrinsic &RHS) {
- return std::tie(LHS.TargetPrefix, LHS.Name) <
- std::tie(RHS.TargetPrefix, RHS.Name);
- });
- Targets.push_back({"", 0, 0});
- for (size_t I = 0, E = Intrinsics.size(); I < E; ++I)
- if (Intrinsics[I].TargetPrefix != Targets.back().Name) {
- Targets.back().Count = I - Targets.back().Offset;
- Targets.push_back({Intrinsics[I].TargetPrefix, I, 0});
- }
- Targets.back().Count = Intrinsics.size() - Targets.back().Offset;
-}
-
-CodeGenIntrinsic::CodeGenIntrinsic(Record *R,
- std::vector<Record *> DefaultProperties) {
- TheDef = R;
- std::string DefName = std::string(R->getName());
- ArrayRef<SMLoc> DefLoc = R->getLoc();
- Properties = 0;
- isOverloaded = false;
- isCommutative = false;
- canThrow = false;
- isNoReturn = false;
- isNoCallback = false;
- isNoSync = false;
- isNoFree = false;
- isWillReturn = false;
- isCold = false;
- isNoDuplicate = false;
- isNoMerge = false;
- isConvergent = false;
- isSpeculatable = false;
- hasSideEffects = false;
-
- if (DefName.size() <= 4 || DefName.substr(0, 4) != "int_")
- PrintFatalError(DefLoc,
- "Intrinsic '" + DefName + "' does not start with 'int_'!");
-
- EnumName = DefName.substr(4);
-
- if (R->getValue("ClangBuiltinName")) // Ignore a missing ClangBuiltinName field.
- ClangBuiltinName = std::string(R->getValueAsString("ClangBuiltinName"));
- if (R->getValue("MSBuiltinName")) // Ignore a missing MSBuiltinName field.
- MSBuiltinName = std::string(R->getValueAsString("MSBuiltinName"));
-
- TargetPrefix = std::string(R->getValueAsString("TargetPrefix"));
- Name = std::string(R->getValueAsString("LLVMName"));
-
- if (Name == "") {
- // If an explicit name isn't specified, derive one from the DefName.
- Name = "llvm.";
-
- for (unsigned i = 0, e = EnumName.size(); i != e; ++i)
- Name += (EnumName[i] == '_') ? '.' : EnumName[i];
- } else {
- // Verify it starts with "llvm.".
- if (Name.size() <= 5 || Name.substr(0, 5) != "llvm.")
- PrintFatalError(DefLoc, "Intrinsic '" + DefName +
- "'s name does not start with 'llvm.'!");
- }
-
- // If TargetPrefix is specified, make sure that Name starts with
- // "llvm.<targetprefix>.".
- if (!TargetPrefix.empty()) {
- if (Name.size() < 6+TargetPrefix.size() ||
- Name.substr(5, 1 + TargetPrefix.size()) != (TargetPrefix + "."))
- PrintFatalError(DefLoc, "Intrinsic '" + DefName +
- "' does not start with 'llvm." +
- TargetPrefix + ".'!");
- }
-
- ListInit *RetTypes = R->getValueAsListInit("RetTypes");
- ListInit *ParamTypes = R->getValueAsListInit("ParamTypes");
-
- // First collate a list of overloaded types.
- std::vector<MVT::SimpleValueType> OverloadedVTs;
- for (ListInit *TypeList : {RetTypes, ParamTypes}) {
- for (unsigned i = 0, e = TypeList->size(); i != e; ++i) {
- Record *TyEl = TypeList->getElementAsRecord(i);
- assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
-
- if (TyEl->isSubClassOf("LLVMMatchType"))
- continue;
-
- MVT::SimpleValueType VT = getValueType(TyEl->getValueAsDef("VT"));
- if (MVT(VT).isOverloaded()) {
- OverloadedVTs.push_back(VT);
- isOverloaded = true;
- }
- }
- }
-
- // Parse the list of return types.
- ListInit *TypeList = RetTypes;
- for (unsigned i = 0, e = TypeList->size(); i != e; ++i) {
- Record *TyEl = TypeList->getElementAsRecord(i);
- assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
- MVT::SimpleValueType VT;
- if (TyEl->isSubClassOf("LLVMMatchType")) {
- unsigned MatchTy = TyEl->getValueAsInt("Number");
- assert(MatchTy < OverloadedVTs.size() &&
- "Invalid matching number!");
- VT = OverloadedVTs[MatchTy];
- // It only makes sense to use the extended and truncated vector element
- // variants with iAny types; otherwise, if the intrinsic is not
- // overloaded, all the types can be specified directly.
- assert(((!TyEl->isSubClassOf("LLVMExtendedType") &&
- !TyEl->isSubClassOf("LLVMTruncatedType")) ||
- VT == MVT::iAny || VT == MVT::vAny) &&
- "Expected iAny or vAny type");
- } else {
- VT = getValueType(TyEl->getValueAsDef("VT"));
- }
-
- // Reject invalid types.
- if (VT == MVT::isVoid)
- PrintFatalError(DefLoc, "Intrinsic '" + DefName +
- " has void in result type list!");
-
- IS.RetVTs.push_back(VT);
- IS.RetTypeDefs.push_back(TyEl);
- }
-
- // Parse the list of parameter types.
- TypeList = ParamTypes;
- for (unsigned i = 0, e = TypeList->size(); i != e; ++i) {
- Record *TyEl = TypeList->getElementAsRecord(i);
- assert(TyEl->isSubClassOf("LLVMType") && "Expected a type!");
- MVT::SimpleValueType VT;
- if (TyEl->isSubClassOf("LLVMMatchType")) {
- unsigned MatchTy = TyEl->getValueAsInt("Number");
- if (MatchTy >= OverloadedVTs.size()) {
- PrintError(R->getLoc(),
- "Parameter #" + Twine(i) + " has out of bounds matching "
- "number " + Twine(MatchTy));
- PrintFatalError(DefLoc,
- Twine("ParamTypes is ") + TypeList->getAsString());
- }
- VT = OverloadedVTs[MatchTy];
- // It only makes sense to use the extended and truncated vector element
- // variants with iAny types; otherwise, if the intrinsic is not
- // overloaded, all the types can be specified directly.
- assert(((!TyEl->isSubClassOf("LLVMExtendedType") &&
- !TyEl->isSubClassOf("LLVMTruncatedType")) ||
- VT == MVT::iAny || VT == MVT::vAny) &&
- "Expected iAny or vAny type");
- } else
- VT = getValueType(TyEl->getValueAsDef("VT"));
-
- // Reject invalid types.
- if (VT == MVT::isVoid && i != e-1 /*void at end means varargs*/)
- PrintFatalError(DefLoc, "Intrinsic '" + DefName +
- " has void in result type list!");
-
- IS.ParamVTs.push_back(VT);
- IS.ParamTypeDefs.push_back(TyEl);
- }
-
- // Parse the intrinsic properties.
- ListInit *PropList = R->getValueAsListInit("IntrProperties");
- for (unsigned i = 0, e = PropList->size(); i != e; ++i) {
- Record *Property = PropList->getElementAsRecord(i);
- assert(Property->isSubClassOf("IntrinsicProperty") &&
- "Expected a property!");
-
- setProperty(Property);
- }
-
- // Set default properties to true.
- setDefaultProperties(R, DefaultProperties);
-
- // Also record the SDPatternOperator Properties.
- Properties = parseSDPatternOperatorProperties(R);
-
- // Sort the argument attributes for later benefit.
- for (auto &Attrs : ArgumentAttributes)
- llvm::sort(Attrs);
-}
-
-void CodeGenIntrinsic::setDefaultProperties(
- Record *R, std::vector<Record *> DefaultProperties) {
- // opt-out of using default attributes.
- if (R->getValueAsBit("DisableDefaultAttributes"))
- return;
-
- for (Record *Rec : DefaultProperties)
- setProperty(Rec);
-}
-
-void CodeGenIntrinsic::setProperty(Record *R) {
- if (R->getName() == "IntrNoMem")
- ME = MemoryEffects::none();
- else if (R->getName() == "IntrReadMem") {
- if (ME.onlyWritesMemory())
- PrintFatalError(TheDef->getLoc(),
- Twine("IntrReadMem cannot be used after IntrNoMem or "
- "IntrWriteMem. Default is ReadWrite"));
- ME &= MemoryEffects::readOnly();
- } else if (R->getName() == "IntrWriteMem") {
- if (ME.onlyReadsMemory())
- PrintFatalError(TheDef->getLoc(),
- Twine("IntrWriteMem cannot be used after IntrNoMem or "
- "IntrReadMem. Default is ReadWrite"));
- ME &= MemoryEffects::writeOnly();
- } else if (R->getName() == "IntrArgMemOnly")
- ME &= MemoryEffects::argMemOnly();
- else if (R->getName() == "IntrInaccessibleMemOnly")
- ME &= MemoryEffects::inaccessibleMemOnly();
- else if (R->getName() == "IntrInaccessibleMemOrArgMemOnly")
- ME &= MemoryEffects::inaccessibleOrArgMemOnly();
- else if (R->getName() == "Commutative")
- isCommutative = true;
- else if (R->getName() == "Throws")
- canThrow = true;
- else if (R->getName() == "IntrNoDuplicate")
- isNoDuplicate = true;
- else if (R->getName() == "IntrNoMerge")
- isNoMerge = true;
- else if (R->getName() == "IntrConvergent")
- isConvergent = true;
- else if (R->getName() == "IntrNoReturn")
- isNoReturn = true;
- else if (R->getName() == "IntrNoCallback")
- isNoCallback = true;
- else if (R->getName() == "IntrNoSync")
- isNoSync = true;
- else if (R->getName() == "IntrNoFree")
- isNoFree = true;
- else if (R->getName() == "IntrWillReturn")
- isWillReturn = !isNoReturn;
- else if (R->getName() == "IntrCold")
- isCold = true;
- else if (R->getName() == "IntrSpeculatable")
- isSpeculatable = true;
- else if (R->getName() == "IntrHasSideEffects")
- hasSideEffects = true;
- else if (R->isSubClassOf("NoCapture")) {
- unsigned ArgNo = R->getValueAsInt("ArgNo");
- addArgAttribute(ArgNo, NoCapture);
- } else if (R->isSubClassOf("NoAlias")) {
- unsigned ArgNo = R->getValueAsInt("ArgNo");
- addArgAttribute(ArgNo, NoAlias);
- } else if (R->isSubClassOf("NoUndef")) {
- unsigned ArgNo = R->getValueAsInt("ArgNo");
- addArgAttribute(ArgNo, NoUndef);
- } else if (R->isSubClassOf("NonNull")) {
- unsigned ArgNo = R->getValueAsInt("ArgNo");
- addArgAttribute(ArgNo, NonNull);
- } else if (R->isSubClassOf("Returned")) {
- unsigned ArgNo = R->getValueAsInt("ArgNo");
- addArgAttribute(ArgNo, Returned);
- } else if (R->isSubClassOf("ReadOnly")) {
- unsigned ArgNo = R->getValueAsInt("ArgNo");
- addArgAttribute(ArgNo, ReadOnly);
- } else if (R->isSubClassOf("WriteOnly")) {
- unsigned ArgNo = R->getValueAsInt("ArgNo");
- addArgAttribute(ArgNo, WriteOnly);
- } else if (R->isSubClassOf("ReadNone")) {
- unsigned ArgNo = R->getValueAsInt("ArgNo");
- addArgAttribute(ArgNo, ReadNone);
- } else if (R->isSubClassOf("ImmArg")) {
- unsigned ArgNo = R->getValueAsInt("ArgNo");
- addArgAttribute(ArgNo, ImmArg);
- } else if (R->isSubClassOf("Align")) {
- unsigned ArgNo = R->getValueAsInt("ArgNo");
- uint64_t Align = R->getValueAsInt("Align");
- addArgAttribute(ArgNo, Alignment, Align);
- } else
- llvm_unreachable("Unknown property!");
-}
-
-bool CodeGenIntrinsic::isParamAPointer(unsigned ParamIdx) const {
- if (ParamIdx >= IS.ParamVTs.size())
- return false;
- MVT ParamType = MVT(IS.ParamVTs[ParamIdx]);
- return ParamType == MVT::iPTR || ParamType == MVT::iPTRAny;
-}
-
-bool CodeGenIntrinsic::isParamImmArg(unsigned ParamIdx) const {
- // Convert argument index to attribute index starting from `FirstArgIndex`.
- ++ParamIdx;
- if (ParamIdx >= ArgumentAttributes.size())
- return false;
- ArgAttribute Val{ImmArg, 0};
- return std::binary_search(ArgumentAttributes[ParamIdx].begin(),
- ArgumentAttributes[ParamIdx].end(), Val);
-}
-
-void CodeGenIntrinsic::addArgAttribute(unsigned Idx, ArgAttrKind AK,
- uint64_t V) {
- if (Idx >= ArgumentAttributes.size())
- ArgumentAttributes.resize(Idx + 1);
- ArgumentAttributes[Idx].emplace_back(AK, V);
-}
diff --git a/llvm/utils/TableGen/CodeGenTarget.h b/llvm/utils/TableGen/CodeGenTarget.h
index 6846e6b5c77a..2ba3af724d36 100644
--- a/llvm/utils/TableGen/CodeGenTarget.h
+++ b/llvm/utils/TableGen/CodeGenTarget.h
@@ -17,18 +17,29 @@
#define LLVM_UTILS_TABLEGEN_CODEGENTARGET_H
#include "CodeGenHwModes.h"
-#include "CodeGenRegisters.h"
#include "InfoByHwMode.h"
#include "SDNodeProperties.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include <cassert>
+#include <memory>
+#include <optional>
+#include <string>
+#include <vector>
namespace llvm {
class RecordKeeper;
class Record;
class CodeGenInstruction;
-struct CodeGenRegister;
+class CodeGenRegBank;
+class CodeGenRegister;
+class CodeGenRegisterClass;
class CodeGenSchedModels;
-class CodeGenTarget;
+class CodeGenSubRegIndex;
/// getValueType - Return the MVT::SimpleValueType that the specified TableGen
/// record corresponds to.
@@ -122,9 +133,7 @@ public:
return RegAltNameIndices;
}
- const CodeGenRegisterClass &getRegisterClass(Record *R) const {
- return *getRegBank().getRegClass(R);
- }
+ const CodeGenRegisterClass &getRegisterClass(Record *R) const;
/// getRegisterVTs - Find the union of all possible SimpleValueTypes for the
/// specified physical register.
diff --git a/llvm/utils/TableGen/CompressInstEmitter.cpp b/llvm/utils/TableGen/CompressInstEmitter.cpp
index a18d6a6b8854..9d9b69f4cfbd 100644
--- a/llvm/utils/TableGen/CompressInstEmitter.cpp
+++ b/llvm/utils/TableGen/CompressInstEmitter.cpp
@@ -65,6 +65,7 @@
//===----------------------------------------------------------------------===//
#include "CodeGenInstruction.h"
+#include "CodeGenRegisters.h"
#include "CodeGenTarget.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/SmallVector.h"
@@ -902,10 +903,5 @@ void CompressInstEmitter::run(raw_ostream &o) {
emitCompressInstEmitter(o, EmitterType::CheckCompress);
}
-namespace llvm {
-
-void EmitCompressInst(RecordKeeper &RK, raw_ostream &OS) {
- CompressInstEmitter(RK).run(OS);
-}
-
-} // namespace llvm
+static TableGen::Emitter::OptClass<CompressInstEmitter>
+ X("gen-compress-inst-emitter", "Generate compressed instructions.");
diff --git a/llvm/utils/TableGen/DAGISelEmitter.cpp b/llvm/utils/TableGen/DAGISelEmitter.cpp
index d012a0172a8f..eaf7f7f9f0a3 100644
--- a/llvm/utils/TableGen/DAGISelEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelEmitter.cpp
@@ -12,6 +12,7 @@
#include "CodeGenDAGPatterns.h"
#include "CodeGenInstruction.h"
+#include "CodeGenTarget.h"
#include "DAGISelMatcher.h"
#include "llvm/Support/Debug.h"
#include "llvm/TableGen/Record.h"
@@ -123,6 +124,7 @@ struct PatternSortingPredicate {
void DAGISelEmitter::run(raw_ostream &OS) {
+ Records.startTimer("Parse patterns");
emitSourceFileHeader("DAG Instruction Selector for the " +
CGP.getTargetInfo().getName().str() + " target", OS);
@@ -163,7 +165,7 @@ void DAGISelEmitter::run(raw_ostream &OS) {
// Convert each variant of each pattern into a Matcher.
Records.startTimer("Convert to matchers");
- std::vector<Matcher*> PatternMatchers;
+ SmallVector<Matcher *, 0> PatternMatchers;
for (const PatternToMatch *PTM : Patterns) {
for (unsigned Variant = 0; ; ++Variant) {
if (Matcher *M = ConvertPatternToMatcher(*PTM, Variant, CGP))
@@ -174,7 +176,7 @@ void DAGISelEmitter::run(raw_ostream &OS) {
}
std::unique_ptr<Matcher> TheMatcher =
- std::make_unique<ScopeMatcher>(PatternMatchers);
+ std::make_unique<ScopeMatcher>(std::move(PatternMatchers));
Records.startTimer("Optimize matchers");
OptimizeMatcher(TheMatcher, CGP);
@@ -185,11 +187,5 @@ void DAGISelEmitter::run(raw_ostream &OS) {
EmitMatcherTable(TheMatcher.get(), CGP, OS);
}
-namespace llvm {
-
-void EmitDAGISel(RecordKeeper &RK, raw_ostream &OS) {
- RK.startTimer("Parse patterns");
- DAGISelEmitter(RK).run(OS);
-}
-
-} // End llvm namespace
+static TableGen::Emitter::OptClass<DAGISelEmitter>
+ X("gen-dag-isel", "Generate a DAG instruction selector");
diff --git a/llvm/utils/TableGen/DAGISelMatcher.cpp b/llvm/utils/TableGen/DAGISelMatcher.cpp
index e436a931a9f5..0609f006763b 100644
--- a/llvm/utils/TableGen/DAGISelMatcher.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcher.cpp
@@ -8,6 +8,8 @@
#include "DAGISelMatcher.h"
#include "CodeGenDAGPatterns.h"
+#include "CodeGenInstruction.h"
+#include "CodeGenRegisters.h"
#include "CodeGenTarget.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Record.h"
@@ -290,7 +292,7 @@ void EmitNodeXFormMatcher::printImpl(raw_ostream &OS, unsigned indent) const {
void EmitNodeMatcherCommon::printImpl(raw_ostream &OS, unsigned indent) const {
OS.indent(indent);
OS << (isa<MorphNodeToMatcher>(this) ? "MorphNodeTo: " : "EmitNode: ")
- << OpcodeName << ": <todo flags> ";
+ << CGI.Namespace << "::" << CGI.TheDef->getName() << ": <todo flags> ";
for (unsigned i = 0, e = VTs.size(); i != e; ++i)
OS << ' ' << getEnumName(VTs[i]);
@@ -315,10 +317,9 @@ bool CheckOpcodeMatcher::isEqualImpl(const Matcher *M) const {
bool EmitNodeMatcherCommon::isEqualImpl(const Matcher *m) const {
const EmitNodeMatcherCommon *M = cast<EmitNodeMatcherCommon>(m);
- return M->OpcodeName == OpcodeName && M->VTs == VTs &&
- M->Operands == Operands && M->HasChain == HasChain &&
- M->HasInGlue == HasInGlue && M->HasOutGlue == HasOutGlue &&
- M->HasMemRefs == HasMemRefs &&
+ return &M->CGI == &CGI && M->VTs == VTs && M->Operands == Operands &&
+ M->HasChain == HasChain && M->HasInGlue == HasInGlue &&
+ M->HasOutGlue == HasOutGlue && M->HasMemRefs == HasMemRefs &&
M->NumFixedArityOperands == NumFixedArityOperands;
}
diff --git a/llvm/utils/TableGen/DAGISelMatcher.h b/llvm/utils/TableGen/DAGISelMatcher.h
index 77280acaf4ca..e3cf847edd12 100644
--- a/llvm/utils/TableGen/DAGISelMatcher.h
+++ b/llvm/utils/TableGen/DAGISelMatcher.h
@@ -12,12 +12,18 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/Support/Casting.h"
-#include "llvm/Support/MachineValueType.h"
+#include <cassert>
+#include <cstddef>
+#include <memory>
+#include <string>
+#include <utility>
namespace llvm {
- struct CodeGenRegister;
+ class CodeGenRegister;
class CodeGenDAGPatterns;
+ class CodeGenInstruction;
class Matcher;
class PatternToMatch;
class raw_ostream;
@@ -41,7 +47,7 @@ class Matcher {
// The next matcher node that is executed after this one. Null if this is the
// last stage of a match.
std::unique_ptr<Matcher> Next;
- size_t Size; // Size in bytes of matcher and all its children (if any).
+ size_t Size = 0; // Size in bytes of matcher and all its children (if any).
virtual void anchor();
public:
enum KindTy {
@@ -189,9 +195,8 @@ protected:
class ScopeMatcher : public Matcher {
SmallVector<Matcher*, 4> Children;
public:
- ScopeMatcher(ArrayRef<Matcher *> children)
- : Matcher(Scope), Children(children.begin(), children.end()) {
- }
+ ScopeMatcher(SmallVectorImpl<Matcher *> &&children)
+ : Matcher(Scope), Children(std::move(children)) {}
~ScopeMatcher() override;
unsigned getNumChildren() const { return Children.size(); }
@@ -473,8 +478,9 @@ private:
class SwitchOpcodeMatcher : public Matcher {
SmallVector<std::pair<const SDNodeInfo*, Matcher*>, 8> Cases;
public:
- SwitchOpcodeMatcher(ArrayRef<std::pair<const SDNodeInfo*, Matcher*> > cases)
- : Matcher(SwitchOpcode), Cases(cases.begin(), cases.end()) {}
+ SwitchOpcodeMatcher(
+ SmallVectorImpl<std::pair<const SDNodeInfo *, Matcher *>> &&cases)
+ : Matcher(SwitchOpcode), Cases(std::move(cases)) {}
~SwitchOpcodeMatcher() override;
static bool classof(const Matcher *N) {
@@ -523,8 +529,9 @@ private:
class SwitchTypeMatcher : public Matcher {
SmallVector<std::pair<MVT::SimpleValueType, Matcher*>, 8> Cases;
public:
- SwitchTypeMatcher(ArrayRef<std::pair<MVT::SimpleValueType, Matcher*> > cases)
- : Matcher(SwitchType), Cases(cases.begin(), cases.end()) {}
+ SwitchTypeMatcher(
+ SmallVectorImpl<std::pair<MVT::SimpleValueType, Matcher *>> &&cases)
+ : Matcher(SwitchType), Cases(std::move(cases)) {}
~SwitchTypeMatcher() override;
static bool classof(const Matcher *N) {
@@ -991,7 +998,7 @@ private:
/// EmitNodeMatcherCommon - Common class shared between EmitNode and
/// MorphNodeTo.
class EmitNodeMatcherCommon : public Matcher {
- std::string OpcodeName;
+ const CodeGenInstruction &CGI;
const SmallVector<MVT::SimpleValueType, 3> VTs;
const SmallVector<unsigned, 6> Operands;
bool HasChain, HasInGlue, HasOutGlue, HasMemRefs;
@@ -1001,18 +1008,17 @@ class EmitNodeMatcherCommon : public Matcher {
/// operands in the root of the pattern. The rest are appended to this node.
int NumFixedArityOperands;
public:
- EmitNodeMatcherCommon(const std::string &opcodeName,
+ EmitNodeMatcherCommon(const CodeGenInstruction &cgi,
ArrayRef<MVT::SimpleValueType> vts,
- ArrayRef<unsigned> operands,
- bool hasChain, bool hasInGlue, bool hasOutGlue,
- bool hasmemrefs,
+ ArrayRef<unsigned> operands, bool hasChain,
+ bool hasInGlue, bool hasOutGlue, bool hasmemrefs,
int numfixedarityoperands, bool isMorphNodeTo)
- : Matcher(isMorphNodeTo ? MorphNodeTo : EmitNode), OpcodeName(opcodeName),
- VTs(vts.begin(), vts.end()), Operands(operands.begin(), operands.end()),
- HasChain(hasChain), HasInGlue(hasInGlue), HasOutGlue(hasOutGlue),
- HasMemRefs(hasmemrefs), NumFixedArityOperands(numfixedarityoperands) {}
+ : Matcher(isMorphNodeTo ? MorphNodeTo : EmitNode), CGI(cgi),
+ VTs(vts.begin(), vts.end()), Operands(operands.begin(), operands.end()),
+ HasChain(hasChain), HasInGlue(hasInGlue), HasOutGlue(hasOutGlue),
+ HasMemRefs(hasmemrefs), NumFixedArityOperands(numfixedarityoperands) {}
- const std::string &getOpcodeName() const { return OpcodeName; }
+ const CodeGenInstruction &getInstruction() const { return CGI; }
unsigned getNumVTs() const { return VTs.size(); }
MVT::SimpleValueType getVT(unsigned i) const {
@@ -1031,8 +1037,8 @@ public:
bool hasChain() const { return HasChain; }
- bool hasInFlag() const { return HasInGlue; }
- bool hasOutFlag() const { return HasOutGlue; }
+ bool hasInGlue() const { return HasInGlue; }
+ bool hasOutGlue() const { return HasOutGlue; }
bool hasMemRefs() const { return HasMemRefs; }
int getNumFixedArityOperands() const { return NumFixedArityOperands; }
@@ -1050,16 +1056,15 @@ class EmitNodeMatcher : public EmitNodeMatcherCommon {
void anchor() override;
unsigned FirstResultSlot;
public:
- EmitNodeMatcher(const std::string &opcodeName,
+ EmitNodeMatcher(const CodeGenInstruction &cgi,
ArrayRef<MVT::SimpleValueType> vts,
- ArrayRef<unsigned> operands,
- bool hasChain, bool hasInFlag, bool hasOutFlag,
- bool hasmemrefs,
- int numfixedarityoperands, unsigned firstresultslot)
- : EmitNodeMatcherCommon(opcodeName, vts, operands, hasChain,
- hasInFlag, hasOutFlag, hasmemrefs,
- numfixedarityoperands, false),
- FirstResultSlot(firstresultslot) {}
+ ArrayRef<unsigned> operands, bool hasChain, bool hasInGlue,
+ bool hasOutGlue, bool hasmemrefs, int numfixedarityoperands,
+ unsigned firstresultslot)
+ : EmitNodeMatcherCommon(cgi, vts, operands, hasChain, hasInGlue,
+ hasOutGlue, hasmemrefs, numfixedarityoperands,
+ false),
+ FirstResultSlot(firstresultslot) {}
unsigned getFirstResultSlot() const { return FirstResultSlot; }
@@ -1073,17 +1078,15 @@ class MorphNodeToMatcher : public EmitNodeMatcherCommon {
void anchor() override;
const PatternToMatch &Pattern;
public:
- MorphNodeToMatcher(const std::string &opcodeName,
+ MorphNodeToMatcher(const CodeGenInstruction &cgi,
ArrayRef<MVT::SimpleValueType> vts,
- ArrayRef<unsigned> operands,
- bool hasChain, bool hasInFlag, bool hasOutFlag,
- bool hasmemrefs,
+ ArrayRef<unsigned> operands, bool hasChain, bool hasInGlue,
+ bool hasOutGlue, bool hasmemrefs,
int numfixedarityoperands, const PatternToMatch &pattern)
- : EmitNodeMatcherCommon(opcodeName, vts, operands, hasChain,
- hasInFlag, hasOutFlag, hasmemrefs,
- numfixedarityoperands, true),
- Pattern(pattern) {
- }
+ : EmitNodeMatcherCommon(cgi, vts, operands, hasChain, hasInGlue,
+ hasOutGlue, hasmemrefs, numfixedarityoperands,
+ true),
+ Pattern(pattern) {}
const PatternToMatch &getPattern() const { return Pattern; }
diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index 777e75dcd929..28d4d585f3dd 100644
--- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -11,7 +11,11 @@
//===----------------------------------------------------------------------===//
#include "CodeGenDAGPatterns.h"
+#include "CodeGenInstruction.h"
+#include "CodeGenRegisters.h"
+#include "CodeGenTarget.h"
#include "DAGISelMatcher.h"
+#include "SDNodeProperties.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/StringMap.h"
@@ -80,9 +84,8 @@ class MatcherTableEmitter {
}
public:
- MatcherTableEmitter(const CodeGenDAGPatterns &cgp) : CGP(cgp) {
- OpcodeCounts.assign(Matcher::HighestKind+1, 0);
- }
+ MatcherTableEmitter(const CodeGenDAGPatterns &cgp)
+ : CGP(cgp), OpcodeCounts(Matcher::HighestKind + 1, 0) {}
unsigned EmitMatcherList(const Matcher *N, const unsigned Indent,
unsigned StartIdx, raw_ostream &OS);
@@ -772,11 +775,13 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
if (CompressVTs)
OS << EN->getNumVTs();
- OS << ", TARGET_VAL(" << EN->getOpcodeName() << "), 0";
+ const CodeGenInstruction &CGI = EN->getInstruction();
+ OS << ", TARGET_VAL(" << CGI.Namespace << "::" << CGI.TheDef->getName()
+ << "), 0";
if (EN->hasChain()) OS << "|OPFL_Chain";
- if (EN->hasInFlag()) OS << "|OPFL_GlueInput";
- if (EN->hasOutFlag()) OS << "|OPFL_GlueOutput";
+ if (EN->hasInGlue()) OS << "|OPFL_GlueInput";
+ if (EN->hasOutGlue()) OS << "|OPFL_GlueOutput";
if (EN->hasMemRefs()) OS << "|OPFL_MemRefs";
if (EN->getNumFixedArityOperands() != -1)
OS << "|OPFL_Variadic" << EN->getNumFixedArityOperands();
diff --git a/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/llvm/utils/TableGen/DAGISelMatcherGen.cpp
index 44bff4c67ab3..f773f7c77a77 100644
--- a/llvm/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherGen.cpp
@@ -9,7 +9,10 @@
#include "CodeGenDAGPatterns.h"
#include "CodeGenInstruction.h"
#include "CodeGenRegisters.h"
+#include "CodeGenTarget.h"
#include "DAGISelMatcher.h"
+#include "InfoByHwMode.h"
+#include "SDNodeProperties.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/TableGen/Error.h"
@@ -19,8 +22,8 @@ using namespace llvm;
/// getRegisterValueType - Look up and return the ValueType of the specified
-/// register. If the register is a member of multiple register classes which
-/// have different associated types, return MVT::Other.
+/// register. If the register is a member of multiple register classes, they
+/// must all have the same type.
static MVT::SimpleValueType getRegisterValueType(Record *R,
const CodeGenTarget &T) {
bool FoundRC = false;
@@ -34,15 +37,15 @@ static MVT::SimpleValueType getRegisterValueType(Record *R,
if (!FoundRC) {
FoundRC = true;
const ValueTypeByHwMode &VVT = RC.getValueTypeNum(0);
- if (VVT.isSimple())
- VT = VVT.getSimple().SimpleTy;
+ assert(VVT.isSimple());
+ VT = VVT.getSimple().SimpleTy;
continue;
}
#ifndef NDEBUG
// If this occurs in multiple register classes, they all have to agree.
- const ValueTypeByHwMode &T = RC.getValueTypeNum(0);
- assert((!T.isSimple() || T.getSimple().SimpleTy == VT) &&
+ const ValueTypeByHwMode &VVT = RC.getValueTypeNum(0);
+ assert(VVT.isSimple() && VVT.getSimple().SimpleTy == VT &&
"ValueType mismatch between register classes for this register");
#endif
}
@@ -107,15 +110,13 @@ namespace {
Matcher *GetMatcher() const { return TheMatcher; }
private:
void AddMatcher(Matcher *NewNode);
- void InferPossibleTypes(unsigned ForceMode);
+ void InferPossibleTypes();
// Matcher Generation.
- void EmitMatchCode(const TreePatternNode *N, TreePatternNode *NodeNoTypes,
- unsigned ForceMode);
+ void EmitMatchCode(const TreePatternNode *N, TreePatternNode *NodeNoTypes);
void EmitLeafMatchCode(const TreePatternNode *N);
void EmitOperatorMatchCode(const TreePatternNode *N,
- TreePatternNode *NodeNoTypes,
- unsigned ForceMode);
+ TreePatternNode *NodeNoTypes);
/// If this is the first time a node with unique identifier Name has been
/// seen, record it. Otherwise, emit a check to make sure this is the same
@@ -164,19 +165,17 @@ MatcherGen::MatcherGen(const PatternToMatch &pattern,
PatWithNoTypes->RemoveAllTypes();
// If there are types that are manifestly known, infer them.
- InferPossibleTypes(Pattern.getForceMode());
+ InferPossibleTypes();
}
/// InferPossibleTypes - As we emit the pattern, we end up generating type
/// checks and applying them to the 'PatWithNoTypes' tree. As we do this, we
/// want to propagate implied types as far throughout the tree as possible so
/// that we avoid doing redundant type checks. This does the type propagation.
-void MatcherGen::InferPossibleTypes(unsigned ForceMode) {
+void MatcherGen::InferPossibleTypes() {
// TP - Get *SOME* tree pattern, we don't care which. It is only used for
// diagnostics, which we know are impossible at this point.
TreePattern &TP = *CGP.pf_begin()->second;
- TP.getInfer().CodeGen = true;
- TP.getInfer().ForceMode = ForceMode;
bool MadeChange = true;
while (MadeChange)
@@ -278,7 +277,8 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
return;
}
- if (LeafRec->getName() == "immAllOnesV") {
+ if (LeafRec->getName() == "immAllOnesV" ||
+ LeafRec->getName() == "immAllZerosV") {
// If this is the root of the dag we're matching, we emit a redundant opcode
// check to ensure that this gets folded into the normal top-level
// OpcodeSwitch.
@@ -288,19 +288,11 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
const SDNodeInfo &NI = CGP.getSDNodeInfo(CGP.getSDNodeNamed(Name));
AddMatcher(new CheckOpcodeMatcher(NI));
}
- return AddMatcher(new CheckImmAllOnesVMatcher());
- }
- if (LeafRec->getName() == "immAllZerosV") {
- // If this is the root of the dag we're matching, we emit a redundant opcode
- // check to ensure that this gets folded into the normal top-level
- // OpcodeSwitch.
- if (N == Pattern.getSrcPattern()) {
- MVT VT = N->getSimpleType(0);
- StringRef Name = VT.isScalableVector() ? "splat_vector" : "build_vector";
- const SDNodeInfo &NI = CGP.getSDNodeInfo(CGP.getSDNodeNamed(Name));
- AddMatcher(new CheckOpcodeMatcher(NI));
- }
- return AddMatcher(new CheckImmAllZerosVMatcher());
+ if (LeafRec->getName() == "immAllOnesV")
+ AddMatcher(new CheckImmAllOnesVMatcher());
+ else
+ AddMatcher(new CheckImmAllZerosVMatcher());
+ return;
}
errs() << "Unknown leaf kind: " << *N << "\n";
@@ -308,8 +300,7 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
}
void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
- TreePatternNode *NodeNoTypes,
- unsigned ForceMode) {
+ TreePatternNode *NodeNoTypes) {
assert(!N->isLeaf() && "Not an operator?");
if (N->getOperator()->isSubClassOf("ComplexPattern")) {
@@ -347,7 +338,8 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
N->getChild(1)->isLeaf() && N->getChild(1)->getPredicateCalls().empty() &&
N->getPredicateCalls().empty()) {
if (IntInit *II = dyn_cast<IntInit>(N->getChild(1)->getLeafValue())) {
- if (!isPowerOf2_32(II->getValue())) { // Don't bother with single bits.
+ if (!llvm::has_single_bit<uint32_t>(
+ II->getValue())) { // Don't bother with single bits.
// If this is at the root of the pattern, we emit a redundant
// CheckOpcode so that the following checks get factored properly under
// a single opcode check.
@@ -362,7 +354,7 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
// Match the LHS of the AND as appropriate.
AddMatcher(new MoveChildMatcher(0));
- EmitMatchCode(N->getChild(0), NodeNoTypes->getChild(0), ForceMode);
+ EmitMatchCode(N->getChild(0), NodeNoTypes->getChild(0));
AddMatcher(new MoveParentMatcher());
return;
}
@@ -461,7 +453,7 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
// Get the code suitable for matching this child. Move to the child, check
// it then move back to the parent.
AddMatcher(new MoveChildMatcher(OpNo));
- EmitMatchCode(N->getChild(i), NodeNoTypes->getChild(i), ForceMode);
+ EmitMatchCode(N->getChild(i), NodeNoTypes->getChild(i));
AddMatcher(new MoveParentMatcher());
}
}
@@ -502,8 +494,7 @@ bool MatcherGen::recordUniqueNode(ArrayRef<std::string> Names) {
}
void MatcherGen::EmitMatchCode(const TreePatternNode *N,
- TreePatternNode *NodeNoTypes,
- unsigned ForceMode) {
+ TreePatternNode *NodeNoTypes) {
// If N and NodeNoTypes don't agree on a type, then this is a case where we
// need to do a type check. Emit the check, apply the type to NodeNoTypes and
// reinfer any correlated types.
@@ -512,7 +503,7 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
for (unsigned i = 0, e = NodeNoTypes->getNumTypes(); i != e; ++i) {
if (NodeNoTypes->getExtType(i) == N->getExtType(i)) continue;
NodeNoTypes->setType(i, N->getExtType(i));
- InferPossibleTypes(ForceMode);
+ InferPossibleTypes();
ResultsToTypeCheck.push_back(i);
}
@@ -534,7 +525,7 @@ void MatcherGen::EmitMatchCode(const TreePatternNode *N,
if (N->isLeaf())
EmitLeafMatchCode(N);
else
- EmitOperatorMatchCode(N, NodeNoTypes, ForceMode);
+ EmitOperatorMatchCode(N, NodeNoTypes);
// If there are node predicates for this node, generate their checks.
for (unsigned i = 0, e = N->getPredicateCalls().size(); i != e; ++i) {
@@ -576,13 +567,13 @@ bool MatcherGen::EmitMatcherCode(unsigned Variant) {
}
// Emit the matcher for the pattern structure and types.
- EmitMatchCode(Pattern.getSrcPattern(), PatWithNoTypes.get(),
- Pattern.getForceMode());
+ EmitMatchCode(Pattern.getSrcPattern(), PatWithNoTypes.get());
// If the pattern has a predicate on it (e.g. only enabled when a subtarget
// feature is around, do the check).
- if (!Pattern.getPredicateCheck().empty())
- AddMatcher(new CheckPatternPredicateMatcher(Pattern.getPredicateCheck()));
+ std::string PredicateCheck = Pattern.getPredicateCheck();
+ if (!PredicateCheck.empty())
+ AddMatcher(new CheckPatternPredicateMatcher(PredicateCheck));
// Now that we've completed the structural type match, emit any ComplexPattern
// checks (e.g. addrmode matches). We emit this after the structural match
@@ -605,16 +596,17 @@ bool MatcherGen::EmitMatcherCode(unsigned Variant) {
// Get the slot we recorded the value in from the name on the node.
unsigned RecNodeEntry = MatchedComplexPatterns[i].second;
- const ComplexPattern &CP = *N->getComplexPatternInfo(CGP);
+ const ComplexPattern *CP = N->getComplexPatternInfo(CGP);
+ assert(CP && "Not a valid ComplexPattern!");
// Emit a CheckComplexPat operation, which does the match (aborting if it
// fails) and pushes the matched operands onto the recorded nodes list.
- AddMatcher(new CheckComplexPatMatcher(CP, RecNodeEntry,
- N->getName(), NextRecordedOperandNo));
+ AddMatcher(new CheckComplexPatMatcher(*CP, RecNodeEntry, N->getName(),
+ NextRecordedOperandNo));
// Record the right number of operands.
- NextRecordedOperandNo += CP.getNumOperands();
- if (CP.hasProperty(SDNPHasChain)) {
+ NextRecordedOperandNo += CP->getNumOperands();
+ if (CP->hasProperty(SDNPHasChain)) {
// If the complex pattern has a chain, then we need to keep track of the
// fact that we just recorded a chain input. The chain input will be
// matched as the last operand of the predicate if it was successful.
@@ -697,12 +689,12 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
}
if (Def->getName() == "undef_tied_input") {
- std::array<MVT::SimpleValueType, 1> ResultVTs = {{ N->getSimpleType(0) }};
- std::array<unsigned, 0> InstOps;
+ MVT::SimpleValueType ResultVT = N->getSimpleType(0);
auto IDOperandNo = NextRecordedOperandNo++;
- AddMatcher(new EmitNodeMatcher("TargetOpcode::IMPLICIT_DEF",
- ResultVTs, InstOps, false, false, false,
- false, -1, IDOperandNo));
+ Record *ImpDef = Def->getRecords().getDef("IMPLICIT_DEF");
+ CodeGenInstruction &II = CGP.getTargetInfo().getInstruction(ImpDef);
+ AddMatcher(new EmitNodeMatcher(II, ResultVT, std::nullopt, false, false,
+ false, false, -1, IDOperandNo));
ResultOps.push_back(IDOperandNo);
return;
}
@@ -983,11 +975,9 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
assert((!ResultVTs.empty() || TreeHasOutGlue || NodeHasChain) &&
"Node has no result");
- AddMatcher(new EmitNodeMatcher(II.Namespace.str()+"::"+II.TheDef->getName().str(),
- ResultVTs, InstOps,
- NodeHasChain, TreeHasInGlue, TreeHasOutGlue,
- NodeHasMemRefs, NumFixedArityOperands,
- NextRecordedOperandNo));
+ AddMatcher(new EmitNodeMatcher(II, ResultVTs, InstOps, NodeHasChain,
+ TreeHasInGlue, TreeHasOutGlue, NodeHasMemRefs,
+ NumFixedArityOperands, NextRecordedOperandNo));
// The non-chain and non-glue results of the newly emitted node get recorded.
for (unsigned i = 0, e = ResultVTs.size(); i != e; ++i) {
diff --git a/llvm/utils/TableGen/DAGISelMatcherOpt.cpp b/llvm/utils/TableGen/DAGISelMatcherOpt.cpp
index 4273bd69b87d..bf2a24241e84 100644
--- a/llvm/utils/TableGen/DAGISelMatcherOpt.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherOpt.cpp
@@ -10,8 +10,9 @@
//
//===----------------------------------------------------------------------===//
-#include "DAGISelMatcher.h"
#include "CodeGenDAGPatterns.h"
+#include "DAGISelMatcher.h"
+#include "SDNodeProperties.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -25,8 +26,9 @@ static void ContractNodes(std::unique_ptr<Matcher> &MatcherPtr,
const CodeGenDAGPatterns &CGP) {
// If we reached the end of the chain, we're done.
Matcher *N = MatcherPtr.get();
- if (!N) return;
-
+ if (!N)
+ return;
+
// If we have a scope node, walk down all of the children.
if (ScopeMatcher *Scope = dyn_cast<ScopeMatcher>(N)) {
for (unsigned i = 0, e = Scope->getNumChildren(); i != e; ++i) {
@@ -36,31 +38,31 @@ static void ContractNodes(std::unique_ptr<Matcher> &MatcherPtr,
}
return;
}
-
+
// If we found a movechild node with a node that comes in a 'foochild' form,
// transform it.
if (MoveChildMatcher *MC = dyn_cast<MoveChildMatcher>(N)) {
Matcher *New = nullptr;
if (RecordMatcher *RM = dyn_cast<RecordMatcher>(MC->getNext()))
- if (MC->getChildNo() < 8) // Only have RecordChild0...7
+ if (MC->getChildNo() < 8) // Only have RecordChild0...7
New = new RecordChildMatcher(MC->getChildNo(), RM->getWhatFor(),
RM->getResultNo());
if (CheckTypeMatcher *CT = dyn_cast<CheckTypeMatcher>(MC->getNext()))
- if (MC->getChildNo() < 8 && // Only have CheckChildType0...7
- CT->getResNo() == 0) // CheckChildType checks res #0
+ if (MC->getChildNo() < 8 && // Only have CheckChildType0...7
+ CT->getResNo() == 0) // CheckChildType checks res #0
New = new CheckChildTypeMatcher(MC->getChildNo(), CT->getType());
if (CheckSameMatcher *CS = dyn_cast<CheckSameMatcher>(MC->getNext()))
- if (MC->getChildNo() < 4) // Only have CheckChildSame0...3
+ if (MC->getChildNo() < 4) // Only have CheckChildSame0...3
New = new CheckChildSameMatcher(MC->getChildNo(), CS->getMatchNumber());
if (CheckIntegerMatcher *CI = dyn_cast<CheckIntegerMatcher>(MC->getNext()))
- if (MC->getChildNo() < 5) // Only have CheckChildInteger0...4
+ if (MC->getChildNo() < 5) // Only have CheckChildInteger0...4
New = new CheckChildIntegerMatcher(MC->getChildNo(), CI->getValue());
if (auto *CCC = dyn_cast<CheckCondCodeMatcher>(MC->getNext()))
- if (MC->getChildNo() == 2) // Only have CheckChild2CondCode
+ if (MC->getChildNo() == 2) // Only have CheckChild2CondCode
New = new CheckChild2CondCodeMatcher(CCC->getCondCodeName());
if (New) {
@@ -72,11 +74,10 @@ static void ContractNodes(std::unique_ptr<Matcher> &MatcherPtr,
return ContractNodes(MatcherPtr, CGP);
}
}
-
+
// Zap movechild -> moveparent.
if (MoveChildMatcher *MC = dyn_cast<MoveChildMatcher>(N))
- if (MoveParentMatcher *MP =
- dyn_cast<MoveParentMatcher>(MC->getNext())) {
+ if (MoveParentMatcher *MP = dyn_cast<MoveParentMatcher>(MC->getNext())) {
MatcherPtr.reset(MP->takeNext());
return ContractNodes(MatcherPtr, CGP);
}
@@ -84,19 +85,19 @@ static void ContractNodes(std::unique_ptr<Matcher> &MatcherPtr,
// Turn EmitNode->CompleteMatch into MorphNodeTo if we can.
if (EmitNodeMatcher *EN = dyn_cast<EmitNodeMatcher>(N))
if (CompleteMatchMatcher *CM =
- dyn_cast<CompleteMatchMatcher>(EN->getNext())) {
+ dyn_cast<CompleteMatchMatcher>(EN->getNext())) {
// We can only use MorphNodeTo if the result values match up.
unsigned RootResultFirst = EN->getFirstResultSlot();
bool ResultsMatch = true;
for (unsigned i = 0, e = CM->getNumResults(); i != e; ++i)
- if (CM->getResult(i) != RootResultFirst+i)
+ if (CM->getResult(i) != RootResultFirst + i)
ResultsMatch = false;
-
+
// If the selected node defines a subset of the glue/chain results, we
// can't use MorphNodeTo. For example, we can't use MorphNodeTo if the
// matched pattern has a chain but the root node doesn't.
const PatternToMatch &Pattern = CM->getPattern();
-
+
if (!EN->hasChain() &&
Pattern.getSrcPattern()->NodeHasProperty(SDNPHasChain, CGP))
ResultsMatch = false;
@@ -107,40 +108,35 @@ static void ContractNodes(std::unique_ptr<Matcher> &MatcherPtr,
// NOTE: Strictly speaking, we don't have to check for glue here
// because the code in the pattern generator doesn't handle it right. We
// do it anyway for thoroughness.
- if (!EN->hasOutFlag() &&
+ if (!EN->hasOutGlue() &&
Pattern.getSrcPattern()->NodeHasProperty(SDNPOutGlue, CGP))
ResultsMatch = false;
-
-
+
+#if 0
// If the root result node defines more results than the source root node
// *and* has a chain or glue input, then we can't match it because it
// would end up replacing the extra result with the chain/glue.
-#if 0
if ((EN->hasGlue() || EN->hasChain()) &&
EN->getNumNonChainGlueVTs() > ... need to get no results reliably ...)
ResultMatch = false;
#endif
-
+
if (ResultsMatch) {
const SmallVectorImpl<MVT::SimpleValueType> &VTs = EN->getVTList();
const SmallVectorImpl<unsigned> &Operands = EN->getOperandList();
- MatcherPtr.reset(new MorphNodeToMatcher(EN->getOpcodeName(),
- VTs, Operands,
- EN->hasChain(), EN->hasInFlag(),
- EN->hasOutFlag(),
- EN->hasMemRefs(),
- EN->getNumFixedArityOperands(),
- Pattern));
+ MatcherPtr.reset(new MorphNodeToMatcher(
+ EN->getInstruction(), VTs, Operands, EN->hasChain(),
+ EN->hasInGlue(), EN->hasOutGlue(), EN->hasMemRefs(),
+ EN->getNumFixedArityOperands(), Pattern));
return;
}
// FIXME2: Kill off all the SelectionDAG::SelectNodeTo and getMachineNode
// variants.
}
-
+
ContractNodes(N->getNextPtr(), CGP);
-
-
+
// If we have a CheckType/CheckChildType/Record node followed by a
// CheckOpcode, invert the two nodes. We prefer to do structural checks
// before type checks, as this opens opportunities for factoring on targets
@@ -152,7 +148,7 @@ static void ContractNodes(std::unique_ptr<Matcher> &MatcherPtr,
Matcher *CheckType = MatcherPtr.release();
Matcher *CheckOpcode = CheckType->takeNext();
Matcher *Tail = CheckOpcode->takeNext();
-
+
// Relink them.
MatcherPtr.reset(CheckOpcode);
CheckOpcode->setNext(CheckType);
@@ -171,7 +167,6 @@ static Matcher *FindNodeWithKind(Matcher *M, Matcher::KindTy Kind) {
return nullptr;
}
-
/// FactorNodes - Turn matches like this:
/// Scope
/// OPC_CheckType i32
@@ -191,7 +186,8 @@ static void FactorNodes(std::unique_ptr<Matcher> &InputMatcherPtr) {
while (!Scope) {
// If we reached the end of the chain, we're done.
Matcher *N = RebindableMatcherPtr->get();
- if (!N) return;
+ if (!N)
+ return;
// If this is not a push node, just scan for one.
Scope = dyn_cast<ScopeMatcher>(N);
@@ -199,78 +195,73 @@ static void FactorNodes(std::unique_ptr<Matcher> &InputMatcherPtr) {
RebindableMatcherPtr = &(N->getNextPtr());
}
std::unique_ptr<Matcher> &MatcherPtr = *RebindableMatcherPtr;
-
+
// Okay, pull together the children of the scope node into a vector so we can
// inspect it more easily.
- SmallVector<Matcher*, 32> OptionsToMatch;
-
+ SmallVector<Matcher *, 32> OptionsToMatch;
+
for (unsigned i = 0, e = Scope->getNumChildren(); i != e; ++i) {
// Factor the subexpression.
std::unique_ptr<Matcher> Child(Scope->takeChild(i));
FactorNodes(Child);
-
- if (Child) {
- // If the child is a ScopeMatcher we can just merge its contents.
- if (auto *SM = dyn_cast<ScopeMatcher>(Child.get())) {
- for (unsigned j = 0, e = SM->getNumChildren(); j != e; ++j)
- OptionsToMatch.push_back(SM->takeChild(j));
- } else {
- OptionsToMatch.push_back(Child.release());
- }
+
+ // If the child is a ScopeMatcher we can just merge its contents.
+ if (auto *SM = dyn_cast<ScopeMatcher>(Child.get())) {
+ for (unsigned j = 0, e = SM->getNumChildren(); j != e; ++j)
+ OptionsToMatch.push_back(SM->takeChild(j));
+ } else {
+ OptionsToMatch.push_back(Child.release());
}
}
-
- SmallVector<Matcher*, 32> NewOptionsToMatch;
-
+
// Loop over options to match, merging neighboring patterns with identical
// starting nodes into a shared matcher.
- for (unsigned OptionIdx = 0, e = OptionsToMatch.size(); OptionIdx != e;) {
+ auto E = OptionsToMatch.end();
+ for (auto I = OptionsToMatch.begin(); I != E; ++I) {
+ // If there are no other matchers left, there's nothing to merge with.
+ auto J = std::next(I);
+ if (J == E)
+ break;
+
+ // Remember where we started. We'll use this to move non-equal elements.
+ auto K = J;
+
// Find the set of matchers that start with this node.
- Matcher *Optn = OptionsToMatch[OptionIdx++];
+ Matcher *Optn = *I;
- if (OptionIdx == e) {
- NewOptionsToMatch.push_back(Optn);
- continue;
- }
-
// See if the next option starts with the same matcher. If the two
// neighbors *do* start with the same matcher, we can factor the matcher out
// of at least these two patterns. See what the maximal set we can merge
// together is.
- SmallVector<Matcher*, 8> EqualMatchers;
+ SmallVector<Matcher *, 8> EqualMatchers;
EqualMatchers.push_back(Optn);
-
+
// Factor all of the known-equal matchers after this one into the same
// group.
- while (OptionIdx != e && OptionsToMatch[OptionIdx]->isEqual(Optn))
- EqualMatchers.push_back(OptionsToMatch[OptionIdx++]);
+ while (J != E && (*J)->isEqual(Optn))
+ EqualMatchers.push_back(*J++);
// If we found a non-equal matcher, see if it is contradictory with the
// current node. If so, we know that the ordering relation between the
// current sets of nodes and this node don't matter. Look past it to see if
// we can merge anything else into this matching group.
- unsigned Scan = OptionIdx;
- while (true) {
- // If we ran out of stuff to scan, we're done.
- if (Scan == e) break;
-
- Matcher *ScanMatcher = OptionsToMatch[Scan];
-
+ while (J != E) {
+ Matcher *ScanMatcher = *J;
+
// If we found an entry that matches out matcher, merge it into the set to
// handle.
if (Optn->isEqual(ScanMatcher)) {
- // If is equal after all, add the option to EqualMatchers and remove it
- // from OptionsToMatch.
+ // It is equal after all, add the option to EqualMatchers.
EqualMatchers.push_back(ScanMatcher);
- OptionsToMatch.erase(OptionsToMatch.begin()+Scan);
- --e;
+ ++J;
continue;
}
-
+
// If the option we're checking for contradicts the start of the list,
- // skip over it.
+ // move it earlier in OptionsToMatch for the next iteration of the outer
+ // loop. Then continue searching for equal or contradictory matchers.
if (Optn->isContradictory(ScanMatcher)) {
- ++Scan;
+ *K++ = *J++;
continue;
}
@@ -279,38 +270,47 @@ static void FactorNodes(std::unique_ptr<Matcher> &InputMatcherPtr) {
// or the same as what we're looking for. If so, reorder it.
if (Optn->isSimplePredicateOrRecordNode()) {
Matcher *M2 = FindNodeWithKind(ScanMatcher, Optn->getKind());
- if (M2 && M2 != ScanMatcher &&
- M2->canMoveBefore(ScanMatcher) &&
+ if (M2 && M2 != ScanMatcher && M2->canMoveBefore(ScanMatcher) &&
(M2->isEqual(Optn) || M2->isContradictory(Optn))) {
Matcher *MatcherWithoutM2 = ScanMatcher->unlinkNode(M2);
M2->setNext(MatcherWithoutM2);
- OptionsToMatch[Scan] = M2;
+ *J = M2;
continue;
}
}
-
+
// Otherwise, we don't know how to handle this entry, we have to bail.
break;
}
-
- if (Scan != e &&
- // Don't print it's obvious nothing extra could be merged anyway.
- Scan+1 != e) {
+
+ if (J != E &&
+ // Don't print if it's obvious nothing extract could be merged anyway.
+ std::next(J) != E) {
LLVM_DEBUG(errs() << "Couldn't merge this:\n"; Optn->print(errs(), 4);
errs() << "into this:\n";
- OptionsToMatch[Scan]->print(errs(), 4);
- if (Scan + 1 != e) OptionsToMatch[Scan + 1]->printOne(errs());
- if (Scan + 2 < e) OptionsToMatch[Scan + 2]->printOne(errs());
+ (*J)->print(errs(), 4);
+ (*std::next(J))->printOne(errs());
+ if (std::next(J, 2) != E) (*std::next(J, 2))->printOne(errs());
errs() << "\n");
}
-
+
+ // If we removed any equal matchers, we may need to slide the rest of the
+ // elements down for the next iteration of the outer loop.
+ if (J != K) {
+ while (J != E)
+ *K++ = *J++;
+
+ // Update end pointer for outer loop.
+ E = K;
+ }
+
// If we only found one option starting with this matcher, no factoring is
- // possible.
+ // possible. Put the Matcher back in OptionsToMatch.
if (EqualMatchers.size() == 1) {
- NewOptionsToMatch.push_back(EqualMatchers[0]);
+ *I = EqualMatchers[0];
continue;
}
-
+
// Factor these checks by pulling the first node off each entry and
// discarding it. Take the first one off the first entry to reuse.
Matcher *Shared = Optn;
@@ -322,42 +322,49 @@ static void FactorNodes(std::unique_ptr<Matcher> &InputMatcherPtr) {
Matcher *Tmp = EqualMatchers[i]->takeNext();
delete EqualMatchers[i];
EqualMatchers[i] = Tmp;
+ assert(!Optn == !Tmp && "Expected all to be null if any are null");
}
-
- Shared->setNext(new ScopeMatcher(EqualMatchers));
- // Recursively factor the newly created node.
- FactorNodes(Shared->getNextPtr());
-
- NewOptionsToMatch.push_back(Shared);
+ if (EqualMatchers[0]) {
+ Shared->setNext(new ScopeMatcher(std::move(EqualMatchers)));
+
+ // Recursively factor the newly created node.
+ FactorNodes(Shared->getNextPtr());
+ }
+
+ // Put the new Matcher where we started in OptionsToMatch.
+ *I = Shared;
}
-
+
+ // Trim the array to match the updated end.
+ if (E != OptionsToMatch.end())
+ OptionsToMatch.erase(E, OptionsToMatch.end());
+
// If we're down to a single pattern to match, then we don't need this scope
// anymore.
- if (NewOptionsToMatch.size() == 1) {
- MatcherPtr.reset(NewOptionsToMatch[0]);
+ if (OptionsToMatch.size() == 1) {
+ MatcherPtr.reset(OptionsToMatch[0]);
return;
}
-
- if (NewOptionsToMatch.empty()) {
+
+ if (OptionsToMatch.empty()) {
MatcherPtr.reset();
return;
}
-
+
// If our factoring failed (didn't achieve anything) see if we can simplify in
// other ways.
-
+
// Check to see if all of the leading entries are now opcode checks. If so,
// we can convert this Scope to be a OpcodeSwitch instead.
bool AllOpcodeChecks = true, AllTypeChecks = true;
- for (unsigned i = 0, e = NewOptionsToMatch.size(); i != e; ++i) {
+ for (unsigned i = 0, e = OptionsToMatch.size(); i != e; ++i) {
// Check to see if this breaks a series of CheckOpcodeMatchers.
- if (AllOpcodeChecks &&
- !isa<CheckOpcodeMatcher>(NewOptionsToMatch[i])) {
+ if (AllOpcodeChecks && !isa<CheckOpcodeMatcher>(OptionsToMatch[i])) {
#if 0
if (i > 3) {
errs() << "FAILING OPC #" << i << "\n";
- NewOptionsToMatch[i]->dump();
+ OptionsToMatch[i]->dump();
}
#endif
AllOpcodeChecks = false;
@@ -365,9 +372,8 @@ static void FactorNodes(std::unique_ptr<Matcher> &InputMatcherPtr) {
// Check to see if this breaks a series of CheckTypeMatcher's.
if (AllTypeChecks) {
- CheckTypeMatcher *CTM =
- cast_or_null<CheckTypeMatcher>(FindNodeWithKind(NewOptionsToMatch[i],
- Matcher::CheckType));
+ CheckTypeMatcher *CTM = cast_or_null<CheckTypeMatcher>(
+ FindNodeWithKind(OptionsToMatch[i], Matcher::CheckType));
if (!CTM ||
// iPTR checks could alias any other case without us knowing, don't
// bother with them.
@@ -376,66 +382,66 @@ static void FactorNodes(std::unique_ptr<Matcher> &InputMatcherPtr) {
CTM->getResNo() != 0 ||
// If the CheckType isn't at the start of the list, see if we can move
// it there.
- !CTM->canMoveBefore(NewOptionsToMatch[i])) {
+ !CTM->canMoveBefore(OptionsToMatch[i])) {
#if 0
if (i > 3 && AllTypeChecks) {
errs() << "FAILING TYPE #" << i << "\n";
- NewOptionsToMatch[i]->dump();
+ OptionsToMatch[i]->dump();
}
#endif
AllTypeChecks = false;
}
}
}
-
+
// If all the options are CheckOpcode's, we can form the SwitchOpcode, woot.
if (AllOpcodeChecks) {
StringSet<> Opcodes;
- SmallVector<std::pair<const SDNodeInfo*, Matcher*>, 8> Cases;
- for (unsigned i = 0, e = NewOptionsToMatch.size(); i != e; ++i) {
- CheckOpcodeMatcher *COM = cast<CheckOpcodeMatcher>(NewOptionsToMatch[i]);
+ SmallVector<std::pair<const SDNodeInfo *, Matcher *>, 8> Cases;
+ for (unsigned i = 0, e = OptionsToMatch.size(); i != e; ++i) {
+ CheckOpcodeMatcher *COM = cast<CheckOpcodeMatcher>(OptionsToMatch[i]);
assert(Opcodes.insert(COM->getOpcode().getEnumName()).second &&
"Duplicate opcodes not factored?");
Cases.push_back(std::make_pair(&COM->getOpcode(), COM->takeNext()));
delete COM;
}
-
- MatcherPtr.reset(new SwitchOpcodeMatcher(Cases));
+
+ MatcherPtr.reset(new SwitchOpcodeMatcher(std::move(Cases)));
return;
}
-
+
// If all the options are CheckType's, we can form the SwitchType, woot.
if (AllTypeChecks) {
DenseMap<unsigned, unsigned> TypeEntry;
- SmallVector<std::pair<MVT::SimpleValueType, Matcher*>, 8> Cases;
- for (unsigned i = 0, e = NewOptionsToMatch.size(); i != e; ++i) {
- Matcher* M = FindNodeWithKind(NewOptionsToMatch[i], Matcher::CheckType);
+ SmallVector<std::pair<MVT::SimpleValueType, Matcher *>, 8> Cases;
+ for (unsigned i = 0, e = OptionsToMatch.size(); i != e; ++i) {
+ Matcher *M = FindNodeWithKind(OptionsToMatch[i], Matcher::CheckType);
assert(M && isa<CheckTypeMatcher>(M) && "Unknown Matcher type");
auto *CTM = cast<CheckTypeMatcher>(M);
- Matcher *MatcherWithoutCTM = NewOptionsToMatch[i]->unlinkNode(CTM);
+ Matcher *MatcherWithoutCTM = OptionsToMatch[i]->unlinkNode(CTM);
MVT::SimpleValueType CTMTy = CTM->getType();
delete CTM;
unsigned &Entry = TypeEntry[CTMTy];
if (Entry != 0) {
// If we have unfactored duplicate types, then we should factor them.
- Matcher *PrevMatcher = Cases[Entry-1].second;
+ Matcher *PrevMatcher = Cases[Entry - 1].second;
if (ScopeMatcher *SM = dyn_cast<ScopeMatcher>(PrevMatcher)) {
- SM->setNumChildren(SM->getNumChildren()+1);
- SM->resetChild(SM->getNumChildren()-1, MatcherWithoutCTM);
+ SM->setNumChildren(SM->getNumChildren() + 1);
+ SM->resetChild(SM->getNumChildren() - 1, MatcherWithoutCTM);
continue;
}
-
- Matcher *Entries[2] = { PrevMatcher, MatcherWithoutCTM };
- Cases[Entry-1].second = new ScopeMatcher(Entries);
+
+ SmallVector<Matcher *, 2> Entries = {PrevMatcher, MatcherWithoutCTM};
+ Cases[Entry - 1].second = new ScopeMatcher(std::move(Entries));
continue;
}
-
- Entry = Cases.size()+1;
+
+ Entry = Cases.size() + 1;
Cases.push_back(std::make_pair(CTMTy, MatcherWithoutCTM));
}
-
+
// Make sure we recursively factor any scopes we may have created.
for (auto &M : Cases) {
if (ScopeMatcher *SM = dyn_cast<ScopeMatcher>(M.second)) {
@@ -447,7 +453,7 @@ static void FactorNodes(std::unique_ptr<Matcher> &InputMatcherPtr) {
}
if (Cases.size() != 1) {
- MatcherPtr.reset(new SwitchTypeMatcher(Cases));
+ MatcherPtr.reset(new SwitchTypeMatcher(std::move(Cases)));
} else {
// If we factored and ended up with one case, create it now.
MatcherPtr.reset(new CheckTypeMatcher(Cases[0].first, 0));
@@ -455,17 +461,15 @@ static void FactorNodes(std::unique_ptr<Matcher> &InputMatcherPtr) {
}
return;
}
-
// Reassemble the Scope node with the adjusted children.
- Scope->setNumChildren(NewOptionsToMatch.size());
- for (unsigned i = 0, e = NewOptionsToMatch.size(); i != e; ++i)
- Scope->resetChild(i, NewOptionsToMatch[i]);
+ Scope->setNumChildren(OptionsToMatch.size());
+ for (unsigned i = 0, e = OptionsToMatch.size(); i != e; ++i)
+ Scope->resetChild(i, OptionsToMatch[i]);
}
-void
-llvm::OptimizeMatcher(std::unique_ptr<Matcher> &MatcherPtr,
- const CodeGenDAGPatterns &CGP) {
+void llvm::OptimizeMatcher(std::unique_ptr<Matcher> &MatcherPtr,
+ const CodeGenDAGPatterns &CGP) {
ContractNodes(MatcherPtr, CGP);
FactorNodes(MatcherPtr);
}
diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp
index 705908226fa1..54ad81cbebe8 100644
--- a/llvm/utils/TableGen/DFAEmitter.cpp
+++ b/llvm/utils/TableGen/DFAEmitter.cpp
@@ -22,13 +22,13 @@
#include "DFAEmitter.h"
#include "SequenceToOffsetTable.h"
-#include "TableGenBackends.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
#include <cassert>
#include <cstdint>
#include <deque>
@@ -370,10 +370,5 @@ void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) {
OS << ")";
}
-namespace llvm {
-
-void EmitAutomata(RecordKeeper &RK, raw_ostream &OS) {
- AutomatonEmitter(RK).run(OS);
-}
-
-} // namespace llvm
+static TableGen::Emitter::OptClass<AutomatonEmitter>
+ X("gen-automata", "Generate generic automata");
diff --git a/llvm/utils/TableGen/DFAEmitter.h b/llvm/utils/TableGen/DFAEmitter.h
index 44e5d97d544f..c831a65a73cd 100644
--- a/llvm/utils/TableGen/DFAEmitter.h
+++ b/llvm/utils/TableGen/DFAEmitter.h
@@ -21,6 +21,8 @@
#include "llvm/ADT/UniqueVector.h"
#include <map>
#include <set>
+#include <utility>
+#include <vector>
namespace llvm {
diff --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
index 6704d747f715..64c7884616a5 100644
--- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -24,6 +24,7 @@
#include "llvm/TableGen/TableGenBackend.h"
#include <cassert>
#include <cstdint>
+#include <deque>
#include <map>
#include <set>
#include <string>
@@ -205,6 +206,7 @@ void DFAPacketizerEmitter::createScheduleClasses(unsigned ItineraryIdx,
// Run the worklist algorithm to generate the DFA.
//
void DFAPacketizerEmitter::run(raw_ostream &OS) {
+ emitSourceFileHeader("Target DFA Packetizer Tables", OS);
OS << "\n"
<< "#include \"llvm/CodeGen/DFAPacketizer.h\"\n";
OS << "namespace llvm {\n";
@@ -352,11 +354,5 @@ void DFAPacketizerEmitter::emitForItineraries(
<< "\n}\n\n";
}
-namespace llvm {
-
-void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS) {
- emitSourceFileHeader("Target DFA Packetizer Tables", OS);
- DFAPacketizerEmitter(RK).run(OS);
-}
-
-} // end namespace llvm
+static TableGen::Emitter::OptClass<DFAPacketizerEmitter>
+ X("gen-dfa-packetizer", "Generate DFA Packetizer for VLIW targets");
diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp
index 44c1df3e9ac4..b294c66007f8 100644
--- a/llvm/utils/TableGen/DXILEmitter.cpp
+++ b/llvm/utils/TableGen/DXILEmitter.cpp
@@ -17,8 +17,8 @@
#include "llvm/ADT/StringSet.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/DXILOperationCommon.h"
-#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
using namespace llvm;
using namespace llvm::dxil;
@@ -26,8 +26,8 @@ using namespace llvm::dxil;
namespace {
struct DXILShaderModel {
- int Major;
- int Minor;
+ int Major = 0;
+ int Minor = 0;
};
struct DXILParam {
@@ -56,12 +56,13 @@ struct DXILOperationData {
// memory,ro=only reads from memory
StringRef Intrinsic; // The llvm intrinsic map to DXILOp. Default is "" which
// means no map exist
- bool IsDeriv; // whether this is some kind of derivative
- bool IsGradient; // whether this requires a gradient calculation
- bool IsFeedback; // whether this is a sampler feedback op
- bool IsWave; // whether this requires in-wave, cross-lane functionality
- bool RequiresUniformInputs; // whether this operation requires that all
- // of its inputs are uniform across the wave
+ bool IsDeriv = false; // whether this is some kind of derivative
+ bool IsGradient = false; // whether this requires a gradient calculation
+ bool IsFeedback = false; // whether this is a sampler feedback op
+ bool IsWave = false; // whether this requires in-wave, cross-lane functionality
+ bool RequiresUniformInputs = false; // whether this operation requires that
+ // all of its inputs are uniform across
+ // the wave
SmallVector<StringRef, 4>
ShaderStages; // shader stages to which this applies, empty for all.
DXILShaderModel ShaderModel; // minimum shader model required
@@ -322,7 +323,7 @@ static void emitDXILOperationTable(std::vector<DXILOperationData> &DXILOps,
for (auto &DXILOp : DXILOps) {
OpStrings.add(DXILOp.DXILOp.str());
- if (ClassSet.find(DXILOp.DXILClass) != ClassSet.end())
+ if (ClassSet.contains(DXILOp.DXILClass))
continue;
ClassSet.insert(DXILOp.DXILClass);
OpClassStrings.add(getDXILOpClassName(DXILOp.DXILClass));
@@ -411,9 +412,7 @@ static void emitDXILOperationTable(std::vector<DXILOperationData> &DXILOps,
OS << "}\n ";
}
-namespace llvm {
-
-void EmitDXILOperation(RecordKeeper &Records, raw_ostream &OS) {
+static void EmitDXILOperation(RecordKeeper &Records, raw_ostream &OS) {
std::vector<Record *> Ops = Records.getAllDerivedDefinitions("dxil_op");
OS << "// Generated code, do not edit.\n";
OS << "\n";
@@ -439,4 +438,5 @@ void EmitDXILOperation(RecordKeeper &Records, raw_ostream &OS) {
OS << "\n";
}
-} // namespace llvm
+static TableGen::Emitter::Opt X("gen-dxil-operation", EmitDXILOperation,
+ "Generate DXIL operation information");
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 8f816744370c..607f19653c7a 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -11,9 +11,11 @@
//
//===----------------------------------------------------------------------===//
+#include "CodeGenHwModes.h"
#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
#include "InfoByHwMode.h"
+#include "TableGenBackends.h"
#include "VarLenCodeEmitterGen.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
@@ -2028,193 +2030,11 @@ populateInstruction(CodeGenTarget &Target, const Record &EncodingDef,
if (IsVarLenInst) {
parseVarLenInstOperand(EncodingDef, InsnOperands, CGI);
} else {
- std::map<std::string, std::vector<OperandInfo>> NumberedInsnOperands;
- std::set<std::string> NumberedInsnOperandsNoTie;
- bool SupportPositionalDecoding =
- Target.getInstructionSet()->getValueAsBit(
- "useDeprecatedPositionallyEncodedOperands") &&
- Target.getInstructionSet()->getValueAsBit(
- "decodePositionallyEncodedOperands");
- if (SupportPositionalDecoding) {
- const std::vector<RecordVal> &Vals = Def.getValues();
- unsigned NumberedOp = 0;
-
- std::set<unsigned> NamedOpIndices;
- if (Target.getInstructionSet()->getValueAsBit(
- "noNamedPositionallyEncodedOperands"))
- // Collect the set of operand indices that might correspond to named
- // operand, and skip these when assigning operands based on position.
- for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
- unsigned OpIdx;
- if (!CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx))
- continue;
-
- NamedOpIndices.insert(OpIdx);
- }
-
- for (unsigned i = 0, e = Vals.size(); i != e; ++i) {
- // Ignore fixed fields in the record, we're looking for values like:
- // bits<5> RST = { ?, ?, ?, ?, ? };
- if (Vals[i].isNonconcreteOK() || Vals[i].getValue()->isComplete())
- continue;
-
- // Determine if Vals[i] actually contributes to the Inst encoding.
- unsigned bi = 0;
- for (; bi < Bits.getNumBits(); ++bi) {
- VarInit *Var = nullptr;
- VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
- if (BI)
- Var = dyn_cast<VarInit>(BI->getBitVar());
- else
- Var = dyn_cast<VarInit>(Bits.getBit(bi));
-
- if (Var && Var->getName() == Vals[i].getName())
- break;
- }
-
- if (bi == Bits.getNumBits())
- continue;
-
- // Skip variables that correspond to explicitly-named operands.
- unsigned OpIdx;
- std::pair<unsigned, unsigned> SubOp;
- if (CGI.Operands.hasSubOperandAlias(Vals[i].getName(), SubOp) ||
- CGI.Operands.hasOperandNamed(Vals[i].getName(), OpIdx))
- continue;
-
- // Get the bit range for this operand:
- unsigned bitStart = bi++, bitWidth = 1;
- for (; bi < Bits.getNumBits(); ++bi) {
- VarInit *Var = nullptr;
- VarBitInit *BI = dyn_cast<VarBitInit>(Bits.getBit(bi));
- if (BI)
- Var = dyn_cast<VarInit>(BI->getBitVar());
- else
- Var = dyn_cast<VarInit>(Bits.getBit(bi));
-
- if (!Var)
- break;
-
- if (Var->getName() != Vals[i].getName())
- break;
-
- ++bitWidth;
- }
-
- unsigned NumberOps = CGI.Operands.size();
- while (NumberedOp < NumberOps &&
- (CGI.Operands.isFlatOperandNotEmitted(NumberedOp) ||
- (!NamedOpIndices.empty() &&
- NamedOpIndices.count(
- CGI.Operands.getSubOperandNumber(NumberedOp).first))))
- ++NumberedOp;
-
- OpIdx = NumberedOp++;
-
- // OpIdx now holds the ordered operand number of Vals[i].
- std::pair<unsigned, unsigned> SO =
- CGI.Operands.getSubOperandNumber(OpIdx);
- const std::string &Name = CGI.Operands[SO.first].Name;
-
- LLVM_DEBUG(dbgs() << "Numbered operand mapping for " << Def.getName()
- << ": " << Name << "(" << SO.first << ", "
- << SO.second << ") => " << Vals[i].getName() << "\n");
-
- std::string Decoder;
- Record *TypeRecord = CGI.Operands[SO.first].Rec;
-
- RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod");
- StringInit *String =
- DecoderString ? dyn_cast<StringInit>(DecoderString->getValue())
- : nullptr;
- if (String && String->getValue() != "")
- Decoder = std::string(String->getValue());
-
- if (Decoder == "" && CGI.Operands[SO.first].MIOperandInfo &&
- CGI.Operands[SO.first].MIOperandInfo->getNumArgs()) {
- Init *Arg = CGI.Operands[SO.first].MIOperandInfo->getArg(SO.second);
- if (DefInit *DI = cast<DefInit>(Arg))
- TypeRecord = DI->getDef();
- }
-
- bool isReg = false;
- if (TypeRecord->isSubClassOf("RegisterOperand"))
- TypeRecord = TypeRecord->getValueAsDef("RegClass");
- if (TypeRecord->isSubClassOf("RegisterClass")) {
- Decoder = "Decode" + TypeRecord->getName().str() + "RegisterClass";
- isReg = true;
- } else if (TypeRecord->isSubClassOf("PointerLikeRegClass")) {
- Decoder = "DecodePointerLikeRegClass" +
- utostr(TypeRecord->getValueAsInt("RegClassKind"));
- isReg = true;
- }
-
- DecoderString = TypeRecord->getValue("DecoderMethod");
- String = DecoderString ? dyn_cast<StringInit>(DecoderString->getValue())
- : nullptr;
- if (!isReg && String && String->getValue() != "")
- Decoder = std::string(String->getValue());
-
- RecordVal *HasCompleteDecoderVal =
- TypeRecord->getValue("hasCompleteDecoder");
- BitInit *HasCompleteDecoderBit =
- HasCompleteDecoderVal
- ? dyn_cast<BitInit>(HasCompleteDecoderVal->getValue())
- : nullptr;
- bool HasCompleteDecoder =
- HasCompleteDecoderBit ? HasCompleteDecoderBit->getValue() : true;
-
- OperandInfo OpInfo(Decoder, HasCompleteDecoder);
- OpInfo.addField(bitStart, bitWidth, 0);
-
- NumberedInsnOperands[Name].push_back(OpInfo);
-
- // FIXME: For complex operands with custom decoders we can't handle tied
- // sub-operands automatically. Skip those here and assume that this is
- // fixed up elsewhere.
- if (CGI.Operands[SO.first].MIOperandInfo &&
- CGI.Operands[SO.first].MIOperandInfo->getNumArgs() > 1 && String &&
- String->getValue() != "")
- NumberedInsnOperandsNoTie.insert(Name);
- }
- }
-
// For each operand, see if we can figure out where it is encoded.
for (const auto &Op : InOutOperands) {
Init *OpInit = Op.first;
StringRef OpName = Op.second;
- if (SupportPositionalDecoding) {
- if (!NumberedInsnOperands[std::string(OpName)].empty()) {
- llvm::append_range(InsnOperands,
- NumberedInsnOperands[std::string(OpName)]);
- continue;
- }
- if (!NumberedInsnOperands[TiedNames[std::string(OpName)]].empty()) {
- if (!NumberedInsnOperandsNoTie.count(
- TiedNames[std::string(OpName)])) {
- // Figure out to which (sub)operand we're tied.
- unsigned i =
- CGI.Operands.getOperandNamed(TiedNames[std::string(OpName)]);
- int tiedTo = CGI.Operands[i].getTiedRegister();
- if (tiedTo == -1) {
- i = CGI.Operands.getOperandNamed(OpName);
- tiedTo = CGI.Operands[i].getTiedRegister();
- }
-
- if (tiedTo != -1) {
- std::pair<unsigned, unsigned> SO =
- CGI.Operands.getSubOperandNumber(tiedTo);
-
- InsnOperands.push_back(
- NumberedInsnOperands[TiedNames[std::string(OpName)]]
- [SO.second]);
- }
- }
- continue;
- }
- }
-
// We're ready to find the instruction encoding locations for this operand.
// First, find the operand type ("OpInit"), and sub-op names
@@ -2597,11 +2417,11 @@ void DecoderEmitter::run(raw_ostream &o) {
formatted_raw_ostream OS(o);
OS << "#include \"llvm/MC/MCInst.h\"\n";
OS << "#include \"llvm/MC/MCSubtargetInfo.h\"\n";
- OS << "#include \"llvm/MC/SubtargetFeature.h\"\n";
OS << "#include \"llvm/Support/DataTypes.h\"\n";
OS << "#include \"llvm/Support/Debug.h\"\n";
OS << "#include \"llvm/Support/LEB128.h\"\n";
OS << "#include \"llvm/Support/raw_ostream.h\"\n";
+ OS << "#include \"llvm/TargetParser/SubtargetFeature.h\"\n";
OS << "#include <assert.h>\n";
OS << '\n';
OS << "namespace llvm {\n\n";
diff --git a/llvm/utils/TableGen/DirectiveEmitter.cpp b/llvm/utils/TableGen/DirectiveEmitter.cpp
index f32fbe3e25cd..67033c6290ca 100644
--- a/llvm/utils/TableGen/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/DirectiveEmitter.cpp
@@ -18,6 +18,7 @@
#include "llvm/ADT/StringSwitch.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
using namespace llvm;
@@ -36,14 +37,12 @@ private:
StringRef Name;
raw_ostream &OS;
};
-} // end anonymous namespace
-
-namespace llvm {
+} // namespace
// Generate enum class
-void GenerateEnumClass(const std::vector<Record *> &Records, raw_ostream &OS,
- StringRef Enum, StringRef Prefix,
- const DirectiveLanguage &DirLang) {
+static void GenerateEnumClass(const std::vector<Record *> &Records,
+ raw_ostream &OS, StringRef Enum, StringRef Prefix,
+ const DirectiveLanguage &DirLang) {
OS << "\n";
OS << "enum class " << Enum << " {\n";
for (const auto &R : Records) {
@@ -73,9 +72,10 @@ void GenerateEnumClass(const std::vector<Record *> &Records, raw_ostream &OS,
// Generate enums for values that clauses can take.
// Also generate function declarations for get<Enum>Name(StringRef Str).
-void GenerateEnumClauseVal(const std::vector<Record *> &Records,
- raw_ostream &OS, const DirectiveLanguage &DirLang,
- std::string &EnumHelperFuncs) {
+static void GenerateEnumClauseVal(const std::vector<Record *> &Records,
+ raw_ostream &OS,
+ const DirectiveLanguage &DirLang,
+ std::string &EnumHelperFuncs) {
for (const auto &R : Records) {
Clause C{R};
const auto &ClauseVals = C.getClauseVals();
@@ -117,9 +117,9 @@ void GenerateEnumClauseVal(const std::vector<Record *> &Records,
}
}
-bool HasDuplicateClauses(const std::vector<Record *> &Clauses,
- const Directive &Directive,
- llvm::StringSet<> &CrtClauses) {
+static bool HasDuplicateClauses(const std::vector<Record *> &Clauses,
+ const Directive &Directive,
+ llvm::StringSet<> &CrtClauses) {
bool HasError = false;
for (const auto &C : Clauses) {
VersionedClause VerClause{C};
@@ -136,7 +136,8 @@ bool HasDuplicateClauses(const std::vector<Record *> &Clauses,
// Check for duplicate clauses in lists. Clauses cannot appear twice in the
// three allowed list. Also, since required implies allowed, clauses cannot
// appear in both the allowedClauses and requiredClauses lists.
-bool HasDuplicateClausesInDirectives(const std::vector<Record *> &Directives) {
+static bool
+HasDuplicateClausesInDirectives(const std::vector<Record *> &Directives) {
bool HasDuplicate = false;
for (const auto &D : Directives) {
Directive Dir{D};
@@ -175,7 +176,7 @@ bool DirectiveLanguage::HasValidityErrors() const {
// Generate the declaration section for the enumeration in the directive
// language
-void EmitDirectivesDecl(RecordKeeper &Records, raw_ostream &OS) {
+static void EmitDirectivesDecl(RecordKeeper &Records, raw_ostream &OS) {
const auto DirLang = DirectiveLanguage{Records};
if (DirLang.HasValidityErrors())
return;
@@ -245,9 +246,10 @@ void EmitDirectivesDecl(RecordKeeper &Records, raw_ostream &OS) {
}
// Generate function implementation for get<Enum>Name(StringRef Str)
-void GenerateGetName(const std::vector<Record *> &Records, raw_ostream &OS,
- StringRef Enum, const DirectiveLanguage &DirLang,
- StringRef Prefix) {
+static void GenerateGetName(const std::vector<Record *> &Records,
+ raw_ostream &OS, StringRef Enum,
+ const DirectiveLanguage &DirLang,
+ StringRef Prefix) {
OS << "\n";
OS << "llvm::StringRef llvm::" << DirLang.getCppNamespace() << "::get"
<< DirLang.getName() << Enum << "Name(" << Enum << " Kind) {\n";
@@ -269,9 +271,10 @@ void GenerateGetName(const std::vector<Record *> &Records, raw_ostream &OS,
}
// Generate function implementation for get<Enum>Kind(StringRef Str)
-void GenerateGetKind(const std::vector<Record *> &Records, raw_ostream &OS,
- StringRef Enum, const DirectiveLanguage &DirLang,
- StringRef Prefix, bool ImplicitAsUnknown) {
+static void GenerateGetKind(const std::vector<Record *> &Records,
+ raw_ostream &OS, StringRef Enum,
+ const DirectiveLanguage &DirLang, StringRef Prefix,
+ bool ImplicitAsUnknown) {
auto DefaultIt = llvm::find_if(
Records, [](Record *R) { return R->getValueAsBit("isDefault") == true; });
@@ -303,8 +306,8 @@ void GenerateGetKind(const std::vector<Record *> &Records, raw_ostream &OS,
}
// Generate function implementation for get<ClauseVal>Kind(StringRef Str)
-void GenerateGetKindClauseVal(const DirectiveLanguage &DirLang,
- raw_ostream &OS) {
+static void GenerateGetKindClauseVal(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
for (const auto &R : DirLang.getClauses()) {
Clause C{R};
const auto &ClauseVals = C.getClauseVals();
@@ -359,10 +362,11 @@ void GenerateGetKindClauseVal(const DirectiveLanguage &DirLang,
}
}
-void GenerateCaseForVersionedClauses(const std::vector<Record *> &Clauses,
- raw_ostream &OS, StringRef DirectiveName,
- const DirectiveLanguage &DirLang,
- llvm::StringSet<> &Cases) {
+static void
+GenerateCaseForVersionedClauses(const std::vector<Record *> &Clauses,
+ raw_ostream &OS, StringRef DirectiveName,
+ const DirectiveLanguage &DirLang,
+ llvm::StringSet<> &Cases) {
for (const auto &C : Clauses) {
VersionedClause VerClause{C};
@@ -378,8 +382,8 @@ void GenerateCaseForVersionedClauses(const std::vector<Record *> &Clauses,
}
// Generate the isAllowedClauseForDirective function implementation.
-void GenerateIsAllowedClause(const DirectiveLanguage &DirLang,
- raw_ostream &OS) {
+static void GenerateIsAllowedClause(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
OS << "\n";
OS << "bool llvm::" << DirLang.getCppNamespace()
<< "::isAllowedClauseForDirective("
@@ -432,9 +436,10 @@ void GenerateIsAllowedClause(const DirectiveLanguage &DirLang,
}
// Generate a simple enum set with the give clauses.
-void GenerateClauseSet(const std::vector<Record *> &Clauses, raw_ostream &OS,
- StringRef ClauseSetPrefix, Directive &Dir,
- const DirectiveLanguage &DirLang) {
+static void GenerateClauseSet(const std::vector<Record *> &Clauses,
+ raw_ostream &OS, StringRef ClauseSetPrefix,
+ Directive &Dir,
+ const DirectiveLanguage &DirLang) {
OS << "\n";
OS << " static " << DirLang.getClauseEnumSetClass() << " " << ClauseSetPrefix
@@ -450,8 +455,8 @@ void GenerateClauseSet(const std::vector<Record *> &Clauses, raw_ostream &OS,
}
// Generate an enum set for the 4 kinds of clauses linked to a directive.
-void GenerateDirectiveClauseSets(const DirectiveLanguage &DirLang,
- raw_ostream &OS) {
+static void GenerateDirectiveClauseSets(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
IfDefScope Scope("GEN_FLANG_DIRECTIVE_CLAUSE_SETS", OS);
@@ -490,8 +495,8 @@ void GenerateDirectiveClauseSets(const DirectiveLanguage &DirLang,
// Generate a map of directive (key) with DirectiveClauses struct as values.
// The struct holds the 4 sets of enumeration for the 4 kinds of clauses
// allowances (allowed, allowed once, allowed exclusive and required).
-void GenerateDirectiveClauseMap(const DirectiveLanguage &DirLang,
- raw_ostream &OS) {
+static void GenerateDirectiveClauseMap(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
IfDefScope Scope("GEN_FLANG_DIRECTIVE_CLAUSE_MAP", OS);
@@ -525,8 +530,8 @@ void GenerateDirectiveClauseMap(const DirectiveLanguage &DirLang,
// If the clause does not hold a value, an EMPTY_CLASS is used.
// If the clause class is generic then a WRAPPER_CLASS is used. When the value
// is optional, the value class is wrapped into a std::optional.
-void GenerateFlangClauseParserClass(const DirectiveLanguage &DirLang,
- raw_ostream &OS) {
+static void GenerateFlangClauseParserClass(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
IfDefScope Scope("GEN_FLANG_CLAUSE_PARSER_CLASSES", OS);
@@ -553,8 +558,8 @@ void GenerateFlangClauseParserClass(const DirectiveLanguage &DirLang,
}
// Generate a list of the different clause classes for Flang.
-void GenerateFlangClauseParserClassList(const DirectiveLanguage &DirLang,
- raw_ostream &OS) {
+static void GenerateFlangClauseParserClassList(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
IfDefScope Scope("GEN_FLANG_CLAUSE_PARSER_CLASSES_LIST", OS);
@@ -566,8 +571,8 @@ void GenerateFlangClauseParserClassList(const DirectiveLanguage &DirLang,
}
// Generate dump node list for the clauses holding a generic class name.
-void GenerateFlangClauseDump(const DirectiveLanguage &DirLang,
- raw_ostream &OS) {
+static void GenerateFlangClauseDump(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
IfDefScope Scope("GEN_FLANG_DUMP_PARSE_TREE_CLAUSES", OS);
@@ -581,8 +586,8 @@ void GenerateFlangClauseDump(const DirectiveLanguage &DirLang,
// Generate Unparse functions for clauses classes in the Flang parse-tree
// If the clause is a non-generic class, no entry is generated.
-void GenerateFlangClauseUnparse(const DirectiveLanguage &DirLang,
- raw_ostream &OS) {
+static void GenerateFlangClauseUnparse(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
IfDefScope Scope("GEN_FLANG_CLAUSE_UNPARSE", OS);
@@ -633,8 +638,8 @@ void GenerateFlangClauseUnparse(const DirectiveLanguage &DirLang,
}
// Generate check in the Enter functions for clauses classes.
-void GenerateFlangClauseCheckPrototypes(const DirectiveLanguage &DirLang,
- raw_ostream &OS) {
+static void GenerateFlangClauseCheckPrototypes(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
IfDefScope Scope("GEN_FLANG_CLAUSE_CHECK_ENTER", OS);
@@ -648,8 +653,8 @@ void GenerateFlangClauseCheckPrototypes(const DirectiveLanguage &DirLang,
// Generate the mapping for clauses between the parser class and the
// corresponding clause Kind
-void GenerateFlangClauseParserKindMap(const DirectiveLanguage &DirLang,
- raw_ostream &OS) {
+static void GenerateFlangClauseParserKindMap(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
IfDefScope Scope("GEN_FLANG_CLAUSE_PARSER_KIND_MAP", OS);
@@ -669,15 +674,15 @@ void GenerateFlangClauseParserKindMap(const DirectiveLanguage &DirLang,
<< " Parser clause\");\n";
}
-bool compareClauseName(Record *R1, Record *R2) {
+static bool compareClauseName(Record *R1, Record *R2) {
Clause C1{R1};
Clause C2{R2};
return (C1.getName() > C2.getName());
}
// Generate the parser for the clauses.
-void GenerateFlangClausesParser(const DirectiveLanguage &DirLang,
- raw_ostream &OS) {
+static void GenerateFlangClausesParser(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
std::vector<Record *> Clauses = DirLang.getClauses();
// Sort clauses in reverse alphabetical order so with clauses with same
// beginning, the longer option is tried before.
@@ -715,6 +720,8 @@ void GenerateFlangClausesParser(const DirectiveLanguage &DirLang,
if (Clause.isValueOptional())
OS << "maybe(";
OS << "parenthesized(";
+ if (Clause.isValueList())
+ OS << "nonemptyList(";
if (!Clause.getPrefix().empty())
OS << "\"" << Clause.getPrefix() << ":\" >> ";
@@ -735,6 +742,8 @@ void GenerateFlangClausesParser(const DirectiveLanguage &DirLang,
OS << Parser;
if (!Clause.getPrefix().empty() && Clause.isPrefixOptional())
OS << " || " << Parser;
+ if (Clause.isValueList()) // close nonemptyList(.
+ OS << ")";
OS << ")"; // close parenthesized(.
if (Clause.isValueOptional()) // close maybe(.
@@ -750,8 +759,8 @@ void GenerateFlangClausesParser(const DirectiveLanguage &DirLang,
// Generate the implementation section for the enumeration in the directive
// language
-void EmitDirectivesFlangImpl(const DirectiveLanguage &DirLang,
- raw_ostream &OS) {
+static void EmitDirectivesFlangImpl(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
GenerateDirectiveClauseSets(DirLang, OS);
@@ -772,8 +781,8 @@ void EmitDirectivesFlangImpl(const DirectiveLanguage &DirLang,
GenerateFlangClausesParser(DirLang, OS);
}
-void GenerateClauseClassMacro(const DirectiveLanguage &DirLang,
- raw_ostream &OS) {
+static void GenerateClauseClassMacro(const DirectiveLanguage &DirLang,
+ raw_ostream &OS) {
// Generate macros style information for legacy code in clang
IfDefScope Scope("GEN_CLANG_CLAUSE_CLASS", OS);
@@ -870,7 +879,7 @@ void EmitDirectivesBasicImpl(const DirectiveLanguage &DirLang,
// Generate the implemenation section for the enumeration in the directive
// language.
-void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) {
+static void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) {
const auto DirLang = DirectiveLanguage{Records};
if (DirLang.HasValidityErrors())
return;
@@ -882,4 +891,10 @@ void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) {
EmitDirectivesBasicImpl(DirLang, OS);
}
-} // namespace llvm
+static TableGen::Emitter::Opt
+ X("gen-directive-decl", EmitDirectivesDecl,
+ "Generate directive related declaration code (header file)");
+
+static TableGen::Emitter::Opt
+ Y("gen-directive-impl", EmitDirectivesImpl,
+ "Generate directive related implementation code");
diff --git a/llvm/utils/TableGen/DisassemblerEmitter.cpp b/llvm/utils/TableGen/DisassemblerEmitter.cpp
index dfa4b30ee569..92f3721507e5 100644
--- a/llvm/utils/TableGen/DisassemblerEmitter.cpp
+++ b/llvm/utils/TableGen/DisassemblerEmitter.cpp
@@ -7,6 +7,7 @@
//===----------------------------------------------------------------------===//
#include "CodeGenTarget.h"
+#include "TableGenBackends.h"
#include "WebAssemblyDisassemblerEmitter.h"
#include "X86DisassemblerTables.h"
#include "X86RecognizableInstr.h"
@@ -93,12 +94,7 @@ using namespace llvm::X86Disassembler;
/// X86RecognizableInstr.cpp contains the implementation for a single
/// instruction.
-namespace llvm {
-
-extern void EmitDecoder(RecordKeeper &RK, raw_ostream &OS,
- const std::string &PredicateNamespace);
-
-void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
+static void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
CodeGenTarget Target(Records);
emitSourceFileHeader(" * " + Target.getName().str() + " Disassembler", OS);
@@ -135,4 +131,5 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
EmitDecoder(Records, OS, PredicateNamespace);
}
-} // end namespace llvm
+static TableGen::Emitter::Opt X("gen-disassembler", EmitDisassembler,
+ "Generate disassembler");
diff --git a/llvm/utils/TableGen/ExegesisEmitter.cpp b/llvm/utils/TableGen/ExegesisEmitter.cpp
index bc8ccdac557b..736f1220be14 100644
--- a/llvm/utils/TableGen/ExegesisEmitter.cpp
+++ b/llvm/utils/TableGen/ExegesisEmitter.cpp
@@ -202,10 +202,5 @@ void ExegesisEmitter::run(raw_ostream &OS) const {
} // end anonymous namespace
-namespace llvm {
-
-void EmitExegesis(RecordKeeper &RK, raw_ostream &OS) {
- ExegesisEmitter(RK).run(OS);
-}
-
-} // end namespace llvm
+static TableGen::Emitter::OptClass<ExegesisEmitter>
+ X("gen-exegesis", "Generate llvm-exegesis tables");
diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp
index 0a88f67be168..3f3a63de0c0c 100644
--- a/llvm/utils/TableGen/FastISelEmitter.cpp
+++ b/llvm/utils/TableGen/FastISelEmitter.cpp
@@ -18,6 +18,9 @@
#include "CodeGenDAGPatterns.h"
#include "CodeGenInstruction.h"
+#include "CodeGenRegisters.h"
+#include "CodeGenTarget.h"
+#include "InfoByHwMode.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/TableGen/Error.h"
@@ -854,9 +857,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) {
// TODO: SignaturesWithConstantForms should be empty here.
}
-namespace llvm {
-
-void EmitFastISel(RecordKeeper &RK, raw_ostream &OS) {
+static void EmitFastISel(RecordKeeper &RK, raw_ostream &OS) {
CodeGenDAGPatterns CGP(RK);
const CodeGenTarget &Target = CGP.getTargetInfo();
emitSourceFileHeader("\"Fast\" Instruction Selector for the " +
@@ -872,4 +873,5 @@ void EmitFastISel(RecordKeeper &RK, raw_ostream &OS) {
F.printFunctionDefinitions(OS);
}
-} // End llvm namespace
+static TableGen::Emitter::Opt X("gen-fast-isel", EmitFastISel,
+ "Generate a \"fast\" instruction selector");
diff --git a/llvm/utils/TableGen/GICombinerEmitter.cpp b/llvm/utils/TableGen/GICombinerEmitter.cpp
index 2ae313081a6f..ec26024b6518 100644
--- a/llvm/utils/TableGen/GICombinerEmitter.cpp
+++ b/llvm/utils/TableGen/GICombinerEmitter.cpp
@@ -14,7 +14,11 @@
#include "CodeGenTarget.h"
#include "GlobalISel/CodeExpander.h"
#include "GlobalISel/CodeExpansions.h"
+#include "GlobalISel/CombinerUtils.h"
#include "GlobalISel/GIMatchDag.h"
+#include "GlobalISel/GIMatchDagEdge.h"
+#include "GlobalISel/GIMatchDagInstr.h"
+#include "GlobalISel/GIMatchDagOperands.h"
#include "GlobalISel/GIMatchDagPredicate.h"
#include "GlobalISel/GIMatchTree.h"
#include "llvm/ADT/SmallSet.h"
@@ -24,6 +28,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/StringMatcher.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <cstdint>
@@ -38,14 +43,14 @@ STATISTIC(NumPatternTotalStatistic, "Total number of patterns");
cl::OptionCategory
GICombinerEmitterCat("Options for -gen-global-isel-combiner");
-static cl::list<std::string>
+cl::list<std::string>
SelectedCombiners("combiners", cl::desc("Emit the specified combiners"),
cl::cat(GICombinerEmitterCat), cl::CommaSeparated);
static cl::opt<bool> ShowExpansions(
"gicombiner-show-expansions",
cl::desc("Use C++ comments to indicate occurence of code expansion"),
cl::cat(GICombinerEmitterCat));
-static cl::opt<bool> StopAfterParse(
+cl::opt<bool> StopAfterParse(
"gicombiner-stop-after-parse",
cl::desc("Stop processing after parsing rules and dump state"),
cl::cat(GICombinerEmitterCat));
@@ -277,55 +282,6 @@ public:
}
};
-/// A convenience function to check that an Init refers to a specific def. This
-/// is primarily useful for testing for defs and similar in DagInit's since
-/// DagInit's support any type inside them.
-static bool isSpecificDef(const Init &N, StringRef Def) {
- if (const DefInit *OpI = dyn_cast<DefInit>(&N))
- if (OpI->getDef()->getName() == Def)
- return true;
- return false;
-}
-
-/// A convenience function to check that an Init refers to a def that is a
-/// subclass of the given class and coerce it to a def if it is. This is
-/// primarily useful for testing for subclasses of GIMatchKind and similar in
-/// DagInit's since DagInit's support any type inside them.
-static Record *getDefOfSubClass(const Init &N, StringRef Cls) {
- if (const DefInit *OpI = dyn_cast<DefInit>(&N))
- if (OpI->getDef()->isSubClassOf(Cls))
- return OpI->getDef();
- return nullptr;
-}
-
-/// A convenience function to check that an Init refers to a dag whose operator
-/// is a specific def and coerce it to a dag if it is. This is primarily useful
-/// for testing for subclasses of GIMatchKind and similar in DagInit's since
-/// DagInit's support any type inside them.
-static const DagInit *getDagWithSpecificOperator(const Init &N,
- StringRef Name) {
- if (const DagInit *I = dyn_cast<DagInit>(&N))
- if (I->getNumArgs() > 0)
- if (const DefInit *OpI = dyn_cast<DefInit>(I->getOperator()))
- if (OpI->getDef()->getName() == Name)
- return I;
- return nullptr;
-}
-
-/// A convenience function to check that an Init refers to a dag whose operator
-/// is a def that is a subclass of the given class and coerce it to a dag if it
-/// is. This is primarily useful for testing for subclasses of GIMatchKind and
-/// similar in DagInit's since DagInit's support any type inside them.
-static const DagInit *getDagWithOperatorOfSubClass(const Init &N,
- StringRef Cls) {
- if (const DagInit *I = dyn_cast<DagInit>(&N))
- if (I->getNumArgs() > 0)
- if (const DefInit *OpI = dyn_cast<DefInit>(I->getOperator()))
- if (OpI->getDef()->isSubClassOf(Cls))
- return I;
- return nullptr;
-}
-
StringRef makeNameForAnonInstr(CombineRule &Rule) {
return insertStrTab(to_string(
format("__anon%" PRIu64 "_%u", Rule.getID(), Rule.allocUID())));
@@ -1062,8 +1018,14 @@ void GICombinerEmitter::run(raw_ostream &OS) {
//===----------------------------------------------------------------------===//
-namespace llvm {
-void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS) {
+static void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS) {
+ PrintWarning(
+ "'-gen-global-isel-combiner' is deprecated and will be removed soon; "
+ "please use '-gen-global-isel-combiner-match-table' instead");
+ PrintNote(
+ "See "
+ "https://discourse.llvm.org/t/rfc-matchtable-based-globalisel-combiners");
+
CodeGenTarget Target(RK);
emitSourceFileHeader("Global Combiner", OS);
@@ -1078,4 +1040,5 @@ void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS) {
NumPatternTotalStatistic = NumPatternTotal;
}
-} // namespace llvm
+static TableGen::Emitter::Opt X("gen-global-isel-combiner", EmitGICombiner,
+ "Generate GlobalISel combiner");
diff --git a/llvm/utils/TableGen/GlobalISel/CodeExpander.h b/llvm/utils/TableGen/GlobalISel/CodeExpander.h
index 1291eb1ad940..0b1e6ceab52c 100644
--- a/llvm/utils/TableGen/GlobalISel/CodeExpander.h
+++ b/llvm/utils/TableGen/GlobalISel/CodeExpander.h
@@ -1,4 +1,4 @@
-//===- CodeExpander.h - Expand variables in a string ----------------------===//
+//===- CodeExpander.h - Expand variables in a string ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/utils/TableGen/GlobalISel/CombinerUtils.h b/llvm/utils/TableGen/GlobalISel/CombinerUtils.h
new file mode 100644
index 000000000000..394c43e3fa83
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISel/CombinerUtils.h
@@ -0,0 +1,72 @@
+//===- CombinerUtils.h ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Utility functions used by both Combiner backends.
+/// TODO: Can remove when MatchDAG-based backend is removed.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_COMBINERUTILS_H
+#define LLVM_UTILS_TABLEGEN_COMBINERUTILS_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/TableGen/Record.h"
+
+namespace llvm {
+
+/// A convenience function to check that an Init refers to a specific def. This
+/// is primarily useful for testing for defs and similar in DagInit's since
+/// DagInit's support any type inside them.
+inline bool isSpecificDef(const Init &N, StringRef Def) {
+ if (const DefInit *OpI = dyn_cast<DefInit>(&N))
+ if (OpI->getDef()->getName() == Def)
+ return true;
+ return false;
+}
+
+/// A convenience function to check that an Init refers to a def that is a
+/// subclass of the given class and coerce it to a def if it is. This is
+/// primarily useful for testing for subclasses of GIMatchKind and similar in
+/// DagInit's since DagInit's support any type inside them.
+inline Record *getDefOfSubClass(const Init &N, StringRef Cls) {
+ if (const DefInit *OpI = dyn_cast<DefInit>(&N))
+ if (OpI->getDef()->isSubClassOf(Cls))
+ return OpI->getDef();
+ return nullptr;
+}
+
+/// A convenience function to check that an Init refers to a dag whose operator
+/// is a specific def and coerce it to a dag if it is. This is primarily useful
+/// for testing for subclasses of GIMatchKind and similar in DagInit's since
+/// DagInit's support any type inside them.
+inline const DagInit *getDagWithSpecificOperator(const Init &N,
+ StringRef Name) {
+ if (const DagInit *I = dyn_cast<DagInit>(&N))
+ if (I->getNumArgs() > 0)
+ if (const DefInit *OpI = dyn_cast<DefInit>(I->getOperator()))
+ if (OpI->getDef()->getName() == Name)
+ return I;
+ return nullptr;
+}
+
+/// A convenience function to check that an Init refers to a dag whose operator
+/// is a def that is a subclass of the given class and coerce it to a dag if it
+/// is. This is primarily useful for testing for subclasses of GIMatchKind and
+/// similar in DagInit's since DagInit's support any type inside them.
+inline const DagInit *getDagWithOperatorOfSubClass(const Init &N,
+ StringRef Cls) {
+ if (const DagInit *I = dyn_cast<DagInit>(&N))
+ if (I->getNumArgs() > 0)
+ if (const DefInit *OpI = dyn_cast<DefInit>(I->getOperator()))
+ if (OpI->getDef()->isSubClassOf(Cls))
+ return I;
+ return nullptr;
+}
+} // namespace llvm
+
+#endif
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDag.h b/llvm/utils/TableGen/GlobalISel/GIMatchDag.h
index 4c3c610aff74..c566dd73f709 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDag.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDag.h
@@ -1,4 +1,4 @@
-//===- GIMatchDag.h - Represent a DAG to be matched -----------------------===//
+//===- GIMatchDag.h - Represent a DAG to be matched -------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.h
index 8e845ff0a51e..e76ef1b4a3aa 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.h
@@ -1,4 +1,4 @@
-//===- GIMatchDagEdge.h - Represent a shared operand list for nodes -------===//
+//===- GIMatchDagEdge.h - Represent node shared operand lists ---*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h
index 5e60448b30c1..d2c746dda9e9 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h
@@ -1,4 +1,4 @@
-//===- GIMatchDagInstr.h - Represent a instruction to be matched ----------===//
+//===- GIMatchDagInstr.h - Represent instruction to be matched --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagOperands.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagOperands.h
index c2d30574231d..ae7190cb7296 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagOperands.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagOperands.h
@@ -1,4 +1,4 @@
-//===- GIMatchDagOperands.h - Represent a shared operand list for nodes ---===//
+//===- GIMatchDagOperands.h - Represent operand lists for nodes -*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h
index 96fef21b7627..952cbdb24f54 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h
@@ -1,4 +1,4 @@
-//===- GIMatchDagPredicate - Represent a predicate to check ---------------===//
+//===- GIMatchDagPredicate - Represent a predicate to check -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp b/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
index d98884493e84..23697fd9e2e2 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchTree.cpp
@@ -89,20 +89,20 @@ GIMatchTreeBuilderLeafInfo::GIMatchTreeBuilderLeafInfo(
TraversableEdges(MatchDag.getNumEdges()),
TestablePredicates(MatchDag.getNumPredicates()) {
// Number all the predicates in this DAG
- for (auto &P : enumerate(MatchDag.predicates())) {
- PredicateIDs.insert(std::make_pair(P.value(), P.index()));
+ for (const auto &[Idx, P] : enumerate(MatchDag.predicates())) {
+ PredicateIDs.insert(std::make_pair(P, Idx));
}
// Number all the predicate dependencies in this DAG and set up a bitvector
// for each predicate indicating the unsatisfied dependencies.
- for (auto &Dep : enumerate(MatchDag.predicate_edges())) {
- PredicateDepIDs.insert(std::make_pair(Dep.value(), Dep.index()));
+ for (const auto &[Idx, Dep] : enumerate(MatchDag.predicate_edges())) {
+ PredicateDepIDs.insert(std::make_pair(Dep, Idx));
}
UnsatisfiedPredDepsForPred.resize(MatchDag.getNumPredicates(),
BitVector(PredicateDepIDs.size()));
- for (auto &Dep : enumerate(MatchDag.predicate_edges())) {
- unsigned ID = PredicateIDs.lookup(Dep.value()->getPredicate());
- UnsatisfiedPredDepsForPred[ID].set(Dep.index());
+ for (const auto &[Idx, Dep] : enumerate(MatchDag.predicate_edges())) {
+ unsigned ID = PredicateIDs.lookup(Dep->getPredicate());
+ UnsatisfiedPredDepsForPred[ID].set(Idx);
}
}
@@ -134,10 +134,10 @@ void GIMatchTreeBuilderLeafInfo::declareInstr(const GIMatchDagInstr *Instr, unsi
// Mark the dependencies that are now satisfied as a result of this
// instruction and mark any predicates whose dependencies are fully
// satisfied.
- for (auto &Dep : enumerate(MatchDag.predicate_edges())) {
+ for (const auto &Dep : enumerate(MatchDag.predicate_edges())) {
if (Dep.value()->getRequiredMI() == Instr &&
Dep.value()->getRequiredMO() == nullptr) {
- for (auto &DepsFor : enumerate(UnsatisfiedPredDepsForPred)) {
+ for (const auto &DepsFor : enumerate(UnsatisfiedPredDepsForPred)) {
DepsFor.value().reset(Dep.index());
if (DepsFor.value().none())
TestablePredicates.set(DepsFor.index());
@@ -157,10 +157,9 @@ void GIMatchTreeBuilderLeafInfo::declareOperand(unsigned InstrID,
// When an operand becomes reachable, we potentially activate some traversals.
// Record the edges that can now be followed as a result of this
// instruction.
- for (auto &E : enumerate(MatchDag.edges())) {
- if (E.value()->getFromMI() == Instr &&
- E.value()->getFromMO()->getIdx() == OpIdx) {
- TraversableEdges.set(E.index());
+ for (const auto &[Idx, E] : enumerate(MatchDag.edges())) {
+ if (E->getFromMI() == Instr && E->getFromMO()->getIdx() == OpIdx) {
+ TraversableEdges.set(Idx);
}
}
@@ -168,10 +167,10 @@ void GIMatchTreeBuilderLeafInfo::declareOperand(unsigned InstrID,
// Clear the dependencies that are now satisfied as a result of this
// operand and activate any predicates whose dependencies are fully
// satisfied.
- for (auto &Dep : enumerate(MatchDag.predicate_edges())) {
+ for (const auto &Dep : enumerate(MatchDag.predicate_edges())) {
if (Dep.value()->getRequiredMI() == Instr && Dep.value()->getRequiredMO() &&
Dep.value()->getRequiredMO()->getIdx() == OpIdx) {
- for (auto &DepsFor : enumerate(UnsatisfiedPredDepsForPred)) {
+ for (const auto &DepsFor : enumerate(UnsatisfiedPredDepsForPred)) {
DepsFor.value().reset(Dep.index());
if (DepsFor.value().none())
TestablePredicates.set(DepsFor.index());
@@ -231,25 +230,6 @@ void GIMatchTreeBuilder::runStep() {
dbgs() << "\n");
#endif // ifndef NDEBUG
- // Check for unreachable rules. Rules are unreachable if they are preceeded by
- // a fully tested rule.
- // Note: This is only true for the current algorithm, if we allow the
- // algorithm to compare equally valid rules then they will become
- // reachable.
- {
- auto FullyTestedLeafI = Leaves.end();
- for (auto LeafI = Leaves.begin(), LeafE = Leaves.end();
- LeafI != LeafE; ++LeafI) {
- if (LeafI->isFullyTraversed() && LeafI->isFullyTested())
- FullyTestedLeafI = LeafI;
- else if (FullyTestedLeafI != Leaves.end()) {
- PrintError("Leaf " + LeafI->getName() + " is unreachable");
- PrintNote("Leaf " + FullyTestedLeafI->getName() +
- " will have already matched");
- }
- }
- }
-
LLVM_DEBUG(dbgs() << " Eliminating redundant partitioners:\n");
filterRedundantPartitioners();
LLVM_DEBUG(dbgs() << " Partitioners remaining:\n");
@@ -339,9 +319,9 @@ void GIMatchTreeBuilder::runStep() {
"Must always partition into at least one partition");
TreeNode->setNumChildren(Partitioner->getNumPartitions());
- for (auto &C : enumerate(TreeNode->children())) {
- SubtreeBuilders.emplace_back(&C.value(), NextInstrID);
- Partitioner->applyForPartition(C.index(), *this, SubtreeBuilders.back());
+ for (const auto &[Idx, Child] : enumerate(TreeNode->children())) {
+ SubtreeBuilders.emplace_back(&Child, NextInstrID);
+ Partitioner->applyForPartition(Idx, *this, SubtreeBuilders.back());
}
TreeNode->setPartitioner(std::move(Partitioner));
@@ -536,22 +516,22 @@ void GIMatchTreeOpcodePartitioner::applyForPartition(
BitVector PossibleLeaves = getPossibleLeavesForPartition(PartitionIdx);
// Consume any predicates we handled.
- for (auto &EnumeratedLeaf : enumerate(Builder.getPossibleLeaves())) {
- if (!PossibleLeaves[EnumeratedLeaf.index()])
+ for (const auto &[Index, EnumeratedLeaf] :
+ enumerate(Builder.getPossibleLeaves())) {
+ if (!PossibleLeaves[Index])
continue;
- auto &Leaf = EnumeratedLeaf.value();
- const auto &TestedPredicatesForLeaf =
- TestedPredicates[EnumeratedLeaf.index()];
+ const auto &TestedPredicatesForLeaf = TestedPredicates[Index];
for (unsigned PredIdx : TestedPredicatesForLeaf.set_bits()) {
- LLVM_DEBUG(dbgs() << " " << Leaf.getName() << " tested predicate #"
- << PredIdx << " of " << TestedPredicatesForLeaf.size()
- << " " << *Leaf.getPredicate(PredIdx) << "\n");
- Leaf.RemainingPredicates.reset(PredIdx);
- Leaf.TestablePredicates.reset(PredIdx);
+ LLVM_DEBUG(dbgs() << " " << EnumeratedLeaf.getName()
+ << " tested predicate #" << PredIdx << " of "
+ << TestedPredicatesForLeaf.size() << " "
+ << *EnumeratedLeaf.getPredicate(PredIdx) << "\n");
+ EnumeratedLeaf.RemainingPredicates.reset(PredIdx);
+ EnumeratedLeaf.TestablePredicates.reset(PredIdx);
}
- SubBuilder.addLeaf(Leaf);
+ SubBuilder.addLeaf(EnumeratedLeaf);
}
// Nothing to do, we don't know anything about this instruction as a result
@@ -571,11 +551,11 @@ void GIMatchTreeOpcodePartitioner::applyForPartition(
if (!InstrInfo)
continue;
const GIMatchDagInstr *Instr = InstrInfo->getInstrNode();
- for (auto &E : enumerate(Leaf.getMatchDag().edges())) {
- if (E.value()->getFromMI() == Instr &&
- E.value()->getFromMO()->getIdx() < CGI->Operands.size()) {
- ReferencedOperands.resize(E.value()->getFromMO()->getIdx() + 1);
- ReferencedOperands.set(E.value()->getFromMO()->getIdx());
+ for (const auto &E : Leaf.getMatchDag().edges()) {
+ if (E->getFromMI() == Instr &&
+ E->getFromMO()->getIdx() < CGI->Operands.size()) {
+ ReferencedOperands.resize(E->getFromMO()->getIdx() + 1);
+ ReferencedOperands.set(E->getFromMO()->getIdx());
}
}
}
@@ -682,12 +662,7 @@ void GIMatchTreeVRegDefPartitioner::repartition(
WantsEdge = true;
}
- bool isNotReg = false;
- if (!WantsEdge && isNotReg) {
- // If this leaf doesn't have an edge and we _don't_ want a register,
- // then add it to partition 0.
- addToPartition(false, Leaf.index());
- } else if (!WantsEdge) {
+ if (!WantsEdge) {
// If this leaf doesn't have an edge and we don't know what we want,
// then add it to partition 0 and 1.
addToPartition(false, Leaf.index());
@@ -715,16 +690,16 @@ void GIMatchTreeVRegDefPartitioner::applyForPartition(
std::vector<BitVector> TraversedEdgesByNewLeaves;
// Consume any edges we handled.
- for (auto &EnumeratedLeaf : enumerate(Builder.getPossibleLeaves())) {
- if (!PossibleLeaves[EnumeratedLeaf.index()])
+ for (const auto &[Index, EnumeratedLeaf] :
+ enumerate(Builder.getPossibleLeaves())) {
+ if (!PossibleLeaves[Index])
continue;
- auto &Leaf = EnumeratedLeaf.value();
- const auto &TraversedEdgesForLeaf = TraversedEdges[EnumeratedLeaf.index()];
+ const auto &TraversedEdgesForLeaf = TraversedEdges[Index];
TraversedEdgesByNewLeaves.push_back(TraversedEdgesForLeaf);
- Leaf.RemainingEdges.reset(TraversedEdgesForLeaf);
- Leaf.TraversableEdges.reset(TraversedEdgesForLeaf);
- SubBuilder.addLeaf(Leaf);
+ EnumeratedLeaf.RemainingEdges.reset(TraversedEdgesForLeaf);
+ EnumeratedLeaf.TraversableEdges.reset(TraversedEdgesForLeaf);
+ SubBuilder.addLeaf(EnumeratedLeaf);
}
// Nothing to do. The only thing we know is that it isn't a vreg-def.
@@ -734,7 +709,7 @@ void GIMatchTreeVRegDefPartitioner::applyForPartition(
NewInstrID = SubBuilder.allocInstrID();
GIMatchTreeBuilder::LeafVec &NewLeaves = SubBuilder.getPossibleLeaves();
- for (const auto I : zip(NewLeaves, TraversedEdgesByNewLeaves)) {
+ for (const auto &I : zip(NewLeaves, TraversedEdgesByNewLeaves)) {
auto &Leaf = std::get<0>(I);
auto &TraversedEdgesForLeaf = std::get<1>(I);
GIMatchTreeInstrInfo *InstrInfo = Leaf.getInstrInfo(InstrID);
diff --git a/llvm/utils/TableGen/GlobalISel/GIMatchTree.h b/llvm/utils/TableGen/GlobalISel/GIMatchTree.h
index 0ce4060fe7b4..c65423ddacdb 100644
--- a/llvm/utils/TableGen/GlobalISel/GIMatchTree.h
+++ b/llvm/utils/TableGen/GlobalISel/GIMatchTree.h
@@ -390,7 +390,7 @@ protected:
/// The leaves that the resulting decision tree will distinguish.
LeafVec Leaves;
/// The tree node being constructed.
- GIMatchTree *TreeNode;
+ GIMatchTree *TreeNode = nullptr;
/// The builders for each subtree resulting from the current decision.
std::vector<GIMatchTreeBuilder> SubtreeBuilders;
/// The possible partitioners we could apply right now.
diff --git a/llvm/utils/TableGen/GlobalISelCombinerMatchTableEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerMatchTableEmitter.cpp
new file mode 100644
index 000000000000..3ae66ed01b3a
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISelCombinerMatchTableEmitter.cpp
@@ -0,0 +1,1575 @@
+//===- GlobalISelCombinerMatchTableEmitter.cpp - --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file Generate a combiner implementation for GlobalISel from a declarative
+/// syntax using GlobalISelMatchTable.
+///
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenInstruction.h"
+#include "CodeGenTarget.h"
+#include "GlobalISel/CodeExpander.h"
+#include "GlobalISel/CodeExpansions.h"
+#include "GlobalISel/CombinerUtils.h"
+#include "GlobalISelMatchTable.h"
+#include "GlobalISelMatchTableExecutorEmitter.h"
+#include "SubtargetFeatureInfo.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/StringMatcher.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <cstdint>
+
+using namespace llvm;
+using namespace llvm::gi;
+
+#define DEBUG_TYPE "gicombiner-matchtable-emitter"
+
+extern cl::list<std::string> SelectedCombiners;
+extern cl::opt<bool> StopAfterParse;
+
+namespace {
+constexpr StringLiteral CXXApplyPrefix = "GICXXCustomAction_CombineApply";
+constexpr StringLiteral CXXPredPrefix = "GICXXPred_MI_Predicate_";
+
+std::string getIsEnabledPredicateEnumName(unsigned CombinerRuleID) {
+ return "GICXXPred_Simple_IsRule" + to_string(CombinerRuleID) + "Enabled";
+}
+
+void declareInstExpansion(CodeExpansions &CE, const InstructionMatcher &IM,
+ StringRef Name) {
+ CE.declare(Name, "State.MIs[" + to_string(IM.getInsnVarID()) + "]");
+}
+
+void declareOperandExpansion(CodeExpansions &CE, const OperandMatcher &OM,
+ StringRef Name) {
+ CE.declare(Name, "State.MIs[" + to_string(OM.getInsnVarID()) +
+ "]->getOperand(" + to_string(OM.getOpIdx()) + ")");
+}
+
+//===- MatchData Handling -------------------------------------------------===//
+
+/// Represents MatchData defined by the match stage and required by the apply
+/// stage.
+///
+/// This allows the plumbing of arbitrary data from C++ predicates between the
+/// stages.
+///
+/// When this class is initially created, it only has a pattern symbol and a
+/// type. When all of the MatchDatas declarations of a given pattern have been
+/// parsed, `AssignVariables` must be called to assign storage variable names to
+/// each MatchDataInfo.
+class MatchDataInfo {
+ StringRef PatternSymbol;
+ StringRef Type;
+ std::string VarName;
+
+public:
+ static constexpr StringLiteral StructTypeName = "MatchInfosTy";
+ static constexpr StringLiteral StructName = "MatchInfos";
+
+ MatchDataInfo(StringRef PatternSymbol, StringRef Type)
+ : PatternSymbol(PatternSymbol), Type(Type.trim()) {}
+
+ StringRef getPatternSymbol() const { return PatternSymbol; };
+ StringRef getType() const { return Type; };
+
+ bool hasVariableName() const { return !VarName.empty(); }
+ void setVariableName(StringRef Name) { VarName = Name; }
+ StringRef getVariableName() const;
+
+ std::string getQualifiedVariableName() const {
+ return StructName.str() + "." + getVariableName().str();
+ }
+
+ void print(raw_ostream &OS) const;
+ void dump() const { print(dbgs()); }
+};
+
+StringRef MatchDataInfo::getVariableName() const {
+ assert(hasVariableName());
+ return VarName;
+}
+
+void MatchDataInfo::print(raw_ostream &OS) const {
+ OS << "(MatchDataInfo pattern_symbol:" << PatternSymbol << " type:'" << Type
+ << "' var_name:" << (VarName.empty() ? "<unassigned>" : VarName) << ")";
+}
+
+/// Pool of type -> variables used to emit MatchData variables declarations.
+///
+/// e.g. if the map contains "int64_t" -> ["MD0", "MD1"], then two variable
+/// declarations must be emitted: `int64_t MD0` and `int64_t MD1`.
+///
+/// This has a static lifetime and will outlive all the `MatchDataInfo` objects
+/// by design. It needs to persist after all `CombineRuleBuilder` objects died
+/// so we can emit the variable declarations.
+StringMap<std::vector<std::string>> AllMatchDataVars;
+
+// Assign variable names to all MatchDatas used by a pattern. This must be
+// called after all MatchData decls have been parsed inside a rule.
+//
+// Requires an array of MatchDataInfo so we can handle cases where a pattern
+// uses multiple instances of the same MatchData type.
+void AssignMatchDataVariables(MutableArrayRef<MatchDataInfo> Infos) {
+ static unsigned NextVarID = 0;
+
+ StringMap<unsigned> SeenTypes;
+ for (auto &I : Infos) {
+ unsigned &NumSeen = SeenTypes[I.getType()];
+ auto &ExistingVars = AllMatchDataVars[I.getType()];
+
+ if (NumSeen == ExistingVars.size())
+ ExistingVars.push_back("MDInfo" + to_string(NextVarID++));
+
+ I.setVariableName(ExistingVars[NumSeen++]);
+ }
+}
+
+//===- C++ Predicates Handling --------------------------------------------===//
+
+/// Entry into the static pool of all CXX Predicate code. This contains the
+/// fully expanded C++ code.
+///
+/// Each CXXPattern creates a new entry in the pool to store its data, even
+/// after the pattern is destroyed.
+///
+/// Note that CXXPattern trims C++ code, so the Code is already expected to be
+/// free of leading/trailing whitespace.
+struct CXXPredicateCode {
+ CXXPredicateCode(std::string Code, unsigned ID)
+ : Code(Code), ID(ID), BaseEnumName("GICombiner" + to_string(ID)) {
+ assert(StringRef(Code).trim() == Code &&
+ "Code was expected to be trimmed!");
+ }
+
+ const std::string Code;
+ const unsigned ID;
+ const std::string BaseEnumName;
+
+ bool needsUnreachable() const {
+ return !StringRef(Code).starts_with("return");
+ }
+
+ std::string getEnumNameWithPrefix(StringRef Prefix) const {
+ return Prefix.str() + BaseEnumName;
+ }
+};
+
+using CXXPredicateCodePool =
+ DenseMap<hash_code, std::unique_ptr<CXXPredicateCode>>;
+CXXPredicateCodePool AllCXXMatchCode;
+CXXPredicateCodePool AllCXXApplyCode;
+
+/// Gets an instance of `CXXPredicateCode` for \p Code, or returns an already
+/// existing one.
+const CXXPredicateCode &getOrInsert(CXXPredicateCodePool &Pool,
+ std::string Code) {
+ // Check if we already have an identical piece of code, if not, create an
+ // entry in the pool.
+ const auto CodeHash = hash_value(Code);
+ if (auto It = Pool.find(CodeHash); It != Pool.end())
+ return *It->second;
+
+ const auto ID = Pool.size();
+ auto OwnedData = std::make_unique<CXXPredicateCode>(std::move(Code), ID);
+ const auto &DataRef = *OwnedData;
+ Pool[CodeHash] = std::move(OwnedData);
+ return DataRef;
+}
+
+/// Sorts a `CXXPredicateCodePool` by their IDs and returns it.
+std::vector<const CXXPredicateCode *>
+getSorted(const CXXPredicateCodePool &Pool) {
+ std::vector<const CXXPredicateCode *> Out;
+ std::transform(Pool.begin(), Pool.end(), std::back_inserter(Out),
+ [&](auto &Elt) { return Elt.second.get(); });
+ sort(Out, [](const auto *A, const auto *B) { return A->ID < B->ID; });
+ return Out;
+}
+
+//===- Pattern Base Class -------------------------------------------------===//
+
+// An abstract pattern found in a combine rule. This can be an apply or match
+// pattern.
+class Pattern {
+public:
+ enum {
+ K_AnyOpcode,
+ K_Inst,
+ K_CXX,
+ };
+
+ virtual ~Pattern() = default;
+
+ unsigned getKind() const { return Kind; }
+ const char *getKindName() const;
+
+ bool hasName() const { return !Name.empty(); }
+ StringRef getName() const { return Name; }
+
+ virtual void print(raw_ostream &OS, bool PrintName = true) const = 0;
+ void dump() const { return print(dbgs()); }
+
+protected:
+ Pattern(unsigned Kind, StringRef Name) : Kind(Kind), Name(Name.str()) {
+ assert(!Name.empty() && "unnamed pattern!");
+ }
+
+ void printImpl(raw_ostream &OS, bool PrintName,
+ function_ref<void()> ContentPrinter) const;
+
+private:
+ unsigned Kind;
+
+ // Note: if this ever changes to a StringRef (e.g. allocated in a pool or
+ // something), CombineRuleBuilder::verify() needs to be updated as well.
+ // It currently checks that the StringRef in the PatternMap references this.
+ std::string Name;
+};
+
+const char *Pattern::getKindName() const {
+ switch (Kind) {
+ case K_AnyOpcode:
+ return "AnyOpcodePattern";
+ case K_Inst:
+ return "InstructionPattern";
+ case K_CXX:
+ return "CXXPattern";
+ }
+
+ llvm_unreachable("unknown pattern kind!");
+}
+
+void Pattern::printImpl(raw_ostream &OS, bool PrintName,
+ function_ref<void()> ContentPrinter) const {
+ OS << "(" << getKindName() << " ";
+ if (PrintName)
+ OS << "name:" << getName() << " ";
+ ContentPrinter();
+ OS << ")";
+}
+
+//===- AnyOpcodePattern ---------------------------------------------------===//
+
+/// `wip_match_opcode` patterns.
+/// This matches one or more opcodes, and does not check any operands
+/// whatsoever.
+class AnyOpcodePattern : public Pattern {
+public:
+ AnyOpcodePattern(StringRef Name) : Pattern(K_AnyOpcode, Name) {}
+
+ static bool classof(const Pattern *P) { return P->getKind() == K_AnyOpcode; }
+
+ void addOpcode(const CodeGenInstruction *I) { Insts.push_back(I); }
+ const auto &insts() const { return Insts; }
+
+ void print(raw_ostream &OS, bool PrintName = true) const override;
+
+private:
+ SmallVector<const CodeGenInstruction *, 4> Insts;
+};
+
+void AnyOpcodePattern::print(raw_ostream &OS, bool PrintName) const {
+ printImpl(OS, PrintName, [&OS, this]() {
+ OS << "["
+ << join(map_range(Insts,
+ [](const auto *I) { return I->TheDef->getName(); }),
+ ", ")
+ << "]";
+ });
+}
+
+//===- InstructionPattern -------------------------------------------------===//
+
+/// Matches an instruction, e.g. `G_ADD $x, $y, $z`.
+///
+/// This pattern is simply CodeGenInstruction + a list of operands.
+class InstructionPattern : public Pattern {
+public:
+ struct Operand {
+ std::string Name;
+ bool IsDef = false;
+ };
+
+ InstructionPattern(const CodeGenInstruction &I, StringRef Name)
+ : Pattern(K_Inst, Name), I(I) {}
+
+ static bool classof(const Pattern *P) { return P->getKind() == K_Inst; }
+
+ const auto &operands() const { return Operands; }
+ void addOperand(StringRef Name);
+ unsigned getNumDefs() const { return I.Operands.NumDefs; }
+
+ const CodeGenInstruction &getInst() const { return I; }
+ StringRef getInstName() const { return I.TheDef->getName(); }
+
+ void reportUnreachable(ArrayRef<SMLoc> Locs) const;
+ bool checkSemantics(ArrayRef<SMLoc> Loc) const;
+
+ void print(raw_ostream &OS, bool PrintName = true) const override;
+
+private:
+ const CodeGenInstruction &I;
+ SmallVector<Operand, 4> Operands;
+};
+
+void InstructionPattern::addOperand(StringRef Name) {
+ const bool IsDef = Operands.size() < getNumDefs();
+ Operands.emplace_back(Operand{Name.str(), IsDef});
+}
+
+void InstructionPattern::reportUnreachable(ArrayRef<SMLoc> Locs) const {
+ PrintError(Locs, "Instruction pattern '" + getName() +
+ "' is unreachable from the pattern root!");
+}
+
+bool InstructionPattern::checkSemantics(ArrayRef<SMLoc> Loc) const {
+ unsigned NumExpectedOperands = I.Operands.size();
+ if (NumExpectedOperands != Operands.size()) {
+
+ PrintError(Loc, "'" + getInstName() + "' expected " +
+ Twine(NumExpectedOperands) + " operands, got " +
+ Twine(Operands.size()));
+ return false;
+ }
+ return true;
+}
+
+void InstructionPattern::print(raw_ostream &OS, bool PrintName) const {
+ printImpl(OS, PrintName, [&OS, this]() {
+ OS << "inst:" << I.TheDef->getName() << " operands:["
+ << join(map_range(Operands,
+ [](const auto &O) {
+ return (O.IsDef ? "<def>" : "") + O.Name;
+ }),
+ ", ")
+ << "]";
+ });
+}
+
+//===- CXXPattern ---------------------------------------------------------===//
+
+/// Raw C++ code which may need some expansions.
+///
+/// e.g. [{ return isFooBux(${src}.getReg()); }]
+///
+/// For the expanded code, \see CXXPredicateCode. CXXPredicateCode objects are
+/// created through `expandCode`.
+///
+/// \see CodeExpander and \see CodeExpansions for more information on code
+/// expansions.
+///
+/// This object has two purposes:
+/// - Represent C++ code as a pattern entry.
+/// - Be a factory for expanded C++ code.
+/// - It's immutable and only holds the raw code so we can expand the same
+/// CXX pattern multiple times if we need to.
+///
+/// Note that the code is always trimmed in the constructor, so leading and
+/// trailing whitespaces are removed. This removes bloat in the output, avoids
+/// formatting issues, but also allows us to check things like
+/// `.startswith("return")` trivially without worrying about spaces.
+class CXXPattern : public Pattern {
+public:
+ CXXPattern(const StringInit &Code, StringRef Name, bool IsApply)
+ : CXXPattern(Code.getAsUnquotedString(), Name, IsApply) {}
+
+ CXXPattern(StringRef Code, StringRef Name, bool IsApply)
+ : Pattern(K_CXX, Name), IsApply(IsApply), RawCode(Code.trim().str()) {}
+
+ static bool classof(const Pattern *P) { return P->getKind() == K_CXX; }
+
+ bool isApply() const { return IsApply; }
+ StringRef getRawCode() const { return RawCode; }
+
+ /// Expands raw code, replacing things such as `${foo}` with their
+ /// substitution in \p CE.
+ ///
+ /// \param CE Map of Code Expansions
+ /// \param Locs SMLocs for the Code Expander, in case it needs to emit
+ /// diagnostics.
+ /// \return A CXXPredicateCode object that contains the expanded code. Note
+ /// that this may or may not insert a new object. All CXXPredicateCode objects
+ /// are held in a set to avoid emitting duplicate C++ code.
+ const CXXPredicateCode &expandCode(const CodeExpansions &CE,
+ ArrayRef<SMLoc> Locs) const;
+
+ void print(raw_ostream &OS, bool PrintName = true) const override;
+
+private:
+ bool IsApply;
+ std::string RawCode;
+};
+
+const CXXPredicateCode &CXXPattern::expandCode(const CodeExpansions &CE,
+ ArrayRef<SMLoc> Locs) const {
+ std::string Result;
+ raw_string_ostream OS(Result);
+ CodeExpander Expander(RawCode, CE, Locs, /*ShowExpansions*/ false);
+ Expander.emit(OS);
+ return getOrInsert(IsApply ? AllCXXApplyCode : AllCXXMatchCode,
+ std::move(Result));
+}
+
+void CXXPattern::print(raw_ostream &OS, bool PrintName) const {
+ printImpl(OS, PrintName, [&OS, this] {
+ OS << (IsApply ? "apply" : "match") << " code:\"";
+ printEscapedString(getRawCode(), OS);
+ OS << "\"";
+ });
+}
+
+//===- CombineRuleBuilder -------------------------------------------------===//
+
+/// Helper for CombineRuleBuilder.
+///
+/// Represents information about an operand.
+/// Operands with no MatchPat are considered live-in to the pattern.
+struct OperandTableEntry {
+ // The matcher pattern that defines this operand.
+ // null for live-ins.
+ InstructionPattern *MatchPat = nullptr;
+ // The apply pattern that (re)defines this operand.
+ // This can only be non-null if MatchPat is.
+ InstructionPattern *ApplyPat = nullptr;
+
+ bool isLiveIn() const { return !MatchPat; }
+};
+
+/// Parses combine rule and builds a small intermediate representation to tie
+/// patterns together and emit RuleMatchers to match them. This may emit more
+/// than one RuleMatcher, e.g. for `wip_match_opcode`.
+///
+/// Memory management for `Pattern` objects is done through `std::unique_ptr`.
+/// In most cases, there are two stages to a pattern's lifetime:
+/// - Creation in a `parse` function
+/// - The unique_ptr is stored in a variable, and may be destroyed if the
+/// pattern is found to be semantically invalid.
+/// - Ownership transfer into a `PatternMap`
+/// - Once a pattern is moved into either the map of Match or Apply
+/// patterns, it is known to be valid and it never moves back.
+class CombineRuleBuilder {
+public:
+ using PatternMap = MapVector<StringRef, std::unique_ptr<Pattern>>;
+
+ CombineRuleBuilder(const CodeGenTarget &CGT,
+ SubtargetFeatureInfoMap &SubtargetFeatures,
+ Record &RuleDef, unsigned ID,
+ std::vector<RuleMatcher> &OutRMs)
+ : CGT(CGT), SubtargetFeatures(SubtargetFeatures), RuleDef(RuleDef),
+ RuleID(ID), OutRMs(OutRMs) {}
+
+ /// Parses all fields in the RuleDef record.
+ bool parseAll();
+
+ /// Emits all RuleMatchers into the vector of RuleMatchers passed in the
+ /// constructor.
+ bool emitRuleMatchers();
+
+ void print(raw_ostream &OS) const;
+ void dump() const { print(dbgs()); }
+
+ /// Debug-only verification of invariants.
+ void verify() const;
+
+private:
+ void PrintError(Twine Msg) const { ::PrintError(RuleDef.getLoc(), Msg); }
+
+ /// Adds the expansions from \see MatchDatas to \p CE.
+ void declareAllMatchDatasExpansions(CodeExpansions &CE) const;
+
+ /// Adds \p P to \p IM, expanding its code using \p CE.
+ void addCXXPredicate(InstructionMatcher &IM, const CodeExpansions &CE,
+ const CXXPattern &P);
+
+ /// Generates a name for anonymous patterns.
+ ///
+ /// e.g. (G_ADD $x, $y, $z):$foo is a pattern named "foo", but if ":$foo" is
+ /// absent, then the pattern is anonymous and this is used to assign it a
+ /// name.
+ std::string makeAnonPatName(StringRef Prefix) const;
+ mutable unsigned AnonIDCnt = 0;
+
+ /// Creates a new RuleMatcher with some boilerplate
+ /// settings/actions/predicates, and and adds it to \p OutRMs.
+ /// \see addFeaturePredicates too.
+ ///
+ /// \param AdditionalComment Comment string to be added to the
+ /// `DebugCommentAction`.
+ RuleMatcher &addRuleMatcher(Twine AdditionalComment = "");
+ bool addFeaturePredicates(RuleMatcher &M);
+
+ bool findRoots();
+ bool buildOperandsTable();
+
+ bool parseDefs(DagInit &Def);
+ bool parseMatch(DagInit &Match);
+ bool parseApply(DagInit &Apply);
+
+ std::unique_ptr<Pattern> parseInstructionMatcher(const Init &Arg,
+ StringRef PatName);
+ std::unique_ptr<Pattern> parseWipMatchOpcodeMatcher(const Init &Arg,
+ StringRef PatName);
+
+ bool emitMatchPattern(CodeExpansions &CE, const InstructionPattern &IP);
+ bool emitMatchPattern(CodeExpansions &CE, const AnyOpcodePattern &AOP);
+
+ bool emitApplyPatterns(CodeExpansions &CE, RuleMatcher &M);
+
+ // Recursively visits InstructionPattern from P to build up the
+ // RuleMatcher/InstructionMatcher. May create new InstructionMatchers as
+ // needed.
+ bool emitInstructionMatchPattern(CodeExpansions &CE, RuleMatcher &M,
+ InstructionMatcher &IM,
+ const InstructionPattern &P,
+ DenseSet<const Pattern *> &SeenPats);
+
+ const CodeGenTarget &CGT;
+ SubtargetFeatureInfoMap &SubtargetFeatures;
+ Record &RuleDef;
+ const unsigned RuleID;
+ std::vector<RuleMatcher> &OutRMs;
+
+ // For InstructionMatcher::addOperand
+ unsigned AllocatedTemporariesBaseID = 0;
+
+ /// The root of the pattern.
+ StringRef RootName;
+
+ /// These maps have ownership of the actual Pattern objects.
+ /// They both map a Pattern's name to the Pattern instance.
+ PatternMap MatchPats;
+ PatternMap ApplyPats;
+
+ /// Set by findRoots.
+ Pattern *MatchRoot = nullptr;
+
+ MapVector<StringRef, OperandTableEntry> OperandTable;
+ SmallVector<MatchDataInfo, 2> MatchDatas;
+};
+
+bool CombineRuleBuilder::parseAll() {
+ if (!parseDefs(*RuleDef.getValueAsDag("Defs")))
+ return false;
+ if (!parseMatch(*RuleDef.getValueAsDag("Match")))
+ return false;
+ if (!parseApply(*RuleDef.getValueAsDag("Apply")))
+ return false;
+ if (!buildOperandsTable())
+ return false;
+ if (!findRoots())
+ return false;
+ LLVM_DEBUG(verify());
+ return true;
+}
+
+bool CombineRuleBuilder::emitRuleMatchers() {
+ assert(MatchRoot);
+ CodeExpansions CE;
+ declareAllMatchDatasExpansions(CE);
+
+ switch (MatchRoot->getKind()) {
+ case Pattern::K_AnyOpcode: {
+ if (!emitMatchPattern(CE, *cast<AnyOpcodePattern>(MatchRoot)))
+ return false;
+ break;
+ }
+ case Pattern::K_Inst:
+ if (!emitMatchPattern(CE, *cast<InstructionPattern>(MatchRoot)))
+ return false;
+ break;
+ case Pattern::K_CXX:
+ PrintError("C++ code cannot be the root of a pattern!");
+ return false;
+ default:
+ llvm_unreachable("unknown pattern kind!");
+ }
+
+ return true;
+}
+
+void CombineRuleBuilder::print(raw_ostream &OS) const {
+ OS << "(CombineRule name:" << RuleDef.getName() << " id:" << RuleID
+ << " root:" << RootName << "\n";
+
+ OS << " (MatchDatas ";
+ if (MatchDatas.empty())
+ OS << "<empty>)\n";
+ else {
+ OS << "\n";
+ for (const auto &MD : MatchDatas) {
+ OS << " ";
+ MD.print(OS);
+ OS << "\n";
+ }
+ OS << " )\n";
+ }
+
+ const auto DumpPats = [&](StringRef Name, const PatternMap &Pats) {
+ OS << " (" << Name << " ";
+ if (Pats.empty()) {
+ OS << "<empty>)\n";
+ return;
+ }
+
+ OS << "\n";
+ for (const auto &[Name, Pat] : Pats) {
+ OS << " ";
+ if (Pat.get() == MatchRoot)
+ OS << "<root>";
+ OS << Name << ":";
+ Pat->print(OS, /*PrintName=*/false);
+ OS << "\n";
+ }
+ OS << " )\n";
+ };
+
+ DumpPats("MatchPats", MatchPats);
+ DumpPats("ApplyPats", ApplyPats);
+
+ OS << " (OperandTable ";
+ if (OperandTable.empty())
+ OS << "<empty>)\n";
+ else {
+ OS << "\n";
+ for (const auto &[Key, Val] : OperandTable) {
+ OS << " [" << Key;
+ if (const auto *P = Val.MatchPat)
+ OS << " match_pat:" << P->getName();
+ if (const auto *P = Val.ApplyPat)
+ OS << " apply_pat:" << P->getName();
+ if (Val.isLiveIn())
+ OS << " live-in";
+ OS << "]\n";
+ }
+ OS << " )\n";
+ }
+
+ OS << ")\n";
+}
+
+void CombineRuleBuilder::verify() const {
+ const auto VerifyPats = [&](const PatternMap &Pats) {
+ for (const auto &[Name, Pat] : Pats) {
+ if (!Pat)
+ PrintFatalError("null pattern in pattern map!");
+
+ if (Name != Pat->getName()) {
+ Pat->dump();
+ PrintFatalError("Pattern name mismatch! Map name: " + Name +
+ ", Pat name: " + Pat->getName());
+ }
+
+ // As an optimization, the PatternMaps don't re-allocate the PatternName
+ // string. They simply reference the std::string inside Pattern. Ensure
+ // this is the case to avoid memory issues.
+ if (Name.data() != Pat->getName().data()) {
+ dbgs() << "Map StringRef: '" << Name << "' @ "
+ << (const void *)Name.data() << "\n";
+ dbgs() << "Pat String: '" << Pat->getName() << "' @ "
+ << (const void *)Pat->getName().data() << "\n";
+ PrintFatalError("StringRef stored in the PatternMap is not referencing "
+ "the same string as its Pattern!");
+ }
+ }
+ };
+
+ VerifyPats(MatchPats);
+ VerifyPats(ApplyPats);
+
+ for (const auto &[Name, Op] : OperandTable) {
+ if (Op.ApplyPat && !Op.MatchPat) {
+ dump();
+ PrintFatalError("Operand " + Name +
+ " has an apply pattern, but no match pattern!");
+ }
+ }
+}
+
+bool CombineRuleBuilder::addFeaturePredicates(RuleMatcher &M) {
+ if (!RuleDef.getValue("Predicates"))
+ return true;
+
+ ListInit *Preds = RuleDef.getValueAsListInit("Predicates");
+ for (Init *I : Preds->getValues()) {
+ if (DefInit *Pred = dyn_cast<DefInit>(I)) {
+ Record *Def = Pred->getDef();
+ if (!Def->isSubClassOf("Predicate")) {
+ ::PrintError(Def->getLoc(), "Unknown 'Predicate' Type");
+ return false;
+ }
+
+ if (Def->getValueAsString("CondString").empty())
+ continue;
+
+ if (SubtargetFeatures.count(Def) == 0) {
+ SubtargetFeatures.emplace(
+ Def, SubtargetFeatureInfo(Def, SubtargetFeatures.size()));
+ }
+
+ M.addRequiredFeature(Def);
+ }
+ }
+
+ return true;
+}
+
+void CombineRuleBuilder::declareAllMatchDatasExpansions(
+ CodeExpansions &CE) const {
+ for (const auto &MD : MatchDatas)
+ CE.declare(MD.getPatternSymbol(), MD.getQualifiedVariableName());
+}
+
+void CombineRuleBuilder::addCXXPredicate(InstructionMatcher &IM,
+ const CodeExpansions &CE,
+ const CXXPattern &P) {
+ const auto &ExpandedCode = P.expandCode(CE, RuleDef.getLoc());
+ IM.addPredicate<GenericInstructionPredicateMatcher>(
+ ExpandedCode.getEnumNameWithPrefix(CXXPredPrefix));
+}
+
+std::string CombineRuleBuilder::makeAnonPatName(StringRef Prefix) const {
+ return to_string("__anon_pat_" + Prefix + "_" + to_string(RuleID) + "_" +
+ to_string(AnonIDCnt++));
+}
+
+RuleMatcher &CombineRuleBuilder::addRuleMatcher(Twine AdditionalComment) {
+ auto &RM = OutRMs.emplace_back(RuleDef.getLoc());
+ addFeaturePredicates(RM);
+ RM.addRequiredSimplePredicate(getIsEnabledPredicateEnumName(RuleID));
+ const std::string AdditionalCommentStr = AdditionalComment.str();
+ RM.addAction<DebugCommentAction>(
+ "Combiner Rule #" + to_string(RuleID) + ": " + RuleDef.getName().str() +
+ (AdditionalCommentStr.empty() ? "" : "; " + AdditionalCommentStr));
+ return RM;
+}
+
+bool CombineRuleBuilder::findRoots() {
+ // Look by pattern name, e.g.
+ // (G_FNEG $x, $y):$root
+ if (auto It = MatchPats.find(RootName); It != MatchPats.end()) {
+ MatchRoot = It->second.get();
+ return true;
+ }
+
+ // Look by def:
+ // (G_FNEG $root, $y)
+ auto It = OperandTable.find(RootName);
+ if (It == OperandTable.end()) {
+ PrintError("Cannot find root '" + RootName + "' in match patterns!");
+ return false;
+ }
+
+ if (!It->second.MatchPat) {
+ PrintError("Cannot use live-in operand '" + RootName +
+ "' as match pattern root!");
+ return false;
+ }
+
+ MatchRoot = It->second.MatchPat;
+ return true;
+}
+
+bool CombineRuleBuilder::buildOperandsTable() {
+ // Walk each instruction pattern
+ for (auto &[_, P] : MatchPats) {
+ auto *IP = dyn_cast<InstructionPattern>(P.get());
+ if (!IP)
+ continue;
+ for (const auto &Operand : IP->operands()) {
+ // Create an entry, no matter if it's a use or a def.
+ auto &Entry = OperandTable[Operand.Name];
+
+ // We only need to do additional checking on defs, though.
+ if (!Operand.IsDef)
+ continue;
+
+ if (Entry.MatchPat) {
+ PrintError("Operand '" + Operand.Name +
+ "' is defined multiple times in the 'match' patterns");
+ return false;
+ }
+ Entry.MatchPat = IP;
+ }
+ }
+
+ for (auto &[_, P] : ApplyPats) {
+ auto *IP = dyn_cast<InstructionPattern>(P.get());
+ if (!IP)
+ continue;
+ for (const auto &Operand : IP->operands()) {
+ // Create an entry, no matter if it's a use or a def.
+ auto &Entry = OperandTable[Operand.Name];
+
+ // We only need to do additional checking on defs, though.
+ if (!Operand.IsDef)
+ continue;
+
+ if (!Entry.MatchPat) {
+ PrintError("Cannot define live-in operand '" + Operand.Name +
+ "' in the 'apply' pattern");
+ return false;
+ }
+ if (Entry.ApplyPat) {
+ PrintError("Operand '" + Operand.Name +
+ "' is defined multiple times in the 'apply' patterns");
+ return false;
+ }
+ Entry.ApplyPat = IP;
+ }
+ }
+
+ return true;
+}
+
+bool CombineRuleBuilder::parseDefs(DagInit &Def) {
+ if (Def.getOperatorAsDef(RuleDef.getLoc())->getName() != "defs") {
+ PrintError("Expected defs operator");
+ return false;
+ }
+
+ SmallVector<StringRef> Roots;
+ for (unsigned I = 0, E = Def.getNumArgs(); I < E; ++I) {
+ if (isSpecificDef(*Def.getArg(I), "root")) {
+ Roots.emplace_back(Def.getArgNameStr(I));
+ continue;
+ }
+
+ // Subclasses of GIDefMatchData should declare that this rule needs to pass
+ // data from the match stage to the apply stage, and ensure that the
+ // generated matcher has a suitable variable for it to do so.
+ if (Record *MatchDataRec =
+ getDefOfSubClass(*Def.getArg(I), "GIDefMatchData")) {
+ MatchDatas.emplace_back(Def.getArgNameStr(I),
+ MatchDataRec->getValueAsString("Type"));
+ continue;
+ }
+
+ // Otherwise emit an appropriate error message.
+ if (getDefOfSubClass(*Def.getArg(I), "GIDefKind"))
+ PrintError("This GIDefKind not implemented in tablegen");
+ else if (getDefOfSubClass(*Def.getArg(I), "GIDefKindWithArgs"))
+ PrintError("This GIDefKindWithArgs not implemented in tablegen");
+ else
+ PrintError("Expected a subclass of GIDefKind or a sub-dag whose "
+ "operator is of type GIDefKindWithArgs");
+ return false;
+ }
+
+ if (Roots.size() != 1) {
+ PrintError("Combine rules must have exactly one root");
+ return false;
+ }
+
+ RootName = Roots.front();
+
+ // Assign variables to all MatchDatas.
+ AssignMatchDataVariables(MatchDatas);
+ return true;
+}
+
+bool CombineRuleBuilder::parseMatch(DagInit &Match) {
+ if (Match.getOperatorAsDef(RuleDef.getLoc())->getName() != "match") {
+ PrintError("Expected match operator");
+ return false;
+ }
+
+ if (Match.getNumArgs() == 0) {
+ PrintError("Matcher is empty");
+ return false;
+ }
+
+ // The match section consists of a list of matchers and predicates. Parse each
+ // one and add the equivalent GIMatchDag nodes, predicates, and edges.
+ bool HasOpcodeMatcher = false;
+ for (unsigned I = 0; I < Match.getNumArgs(); ++I) {
+ Init *Arg = Match.getArg(I);
+ std::string Name = Match.getArgName(I)
+ ? Match.getArgName(I)->getValue().str()
+ : makeAnonPatName("match");
+
+ if (MatchPats.contains(Name)) {
+ PrintError("'" + Name + "' match pattern defined more than once!");
+ return false;
+ }
+
+ if (auto Pat = parseInstructionMatcher(*Arg, Name)) {
+ MatchPats[Pat->getName()] = std::move(Pat);
+ continue;
+ }
+
+ if (auto Pat = parseWipMatchOpcodeMatcher(*Arg, Name)) {
+ if (HasOpcodeMatcher) {
+ PrintError("wip_opcode_match can only be present once");
+ return false;
+ }
+ HasOpcodeMatcher = true;
+ MatchPats[Pat->getName()] = std::move(Pat);
+ continue;
+ }
+
+ // Parse arbitrary C++ code
+ if (const auto *StringI = dyn_cast<StringInit>(Arg)) {
+ auto CXXPat =
+ std::make_unique<CXXPattern>(*StringI, Name, /*IsApply*/ false);
+ if (!CXXPat->getRawCode().contains("return ")) {
+ PrintWarning(RuleDef.getLoc(),
+ "'match' C++ code does not seem to return!");
+ }
+ MatchPats[CXXPat->getName()] = std::move(CXXPat);
+ continue;
+ }
+
+ // TODO: don't print this on, e.g. bad operand count in inst pat
+ PrintError("Expected a subclass of GIMatchKind or a sub-dag whose "
+ "operator is either of a GIMatchKindWithArgs or Instruction");
+ PrintNote("Pattern was `" + Arg->getAsString() + "'");
+ return false;
+ }
+
+ return true;
+}
+
+bool CombineRuleBuilder::parseApply(DagInit &Apply) {
+ // Currently we only support C++ :(
+ if (Apply.getOperatorAsDef(RuleDef.getLoc())->getName() != "apply") {
+ PrintError("Expected 'apply' operator in Apply DAG");
+ return false;
+ }
+
+ if (Apply.getNumArgs() != 1) {
+ PrintError("Expected exactly 1 argument in 'apply'");
+ return false;
+ }
+
+ const StringInit *Code = dyn_cast<StringInit>(Apply.getArg(0));
+ auto Pat = std::make_unique<CXXPattern>(*Code, makeAnonPatName("apply"),
+ /*IsApply*/ true);
+ ApplyPats[Pat->getName()] = std::move(Pat);
+ return true;
+}
+
+std::unique_ptr<Pattern>
+CombineRuleBuilder::parseInstructionMatcher(const Init &Arg, StringRef Name) {
+ const DagInit *Matcher = getDagWithOperatorOfSubClass(Arg, "Instruction");
+ if (!Matcher)
+ return nullptr;
+
+ auto &Instr = CGT.getInstruction(Matcher->getOperatorAsDef(RuleDef.getLoc()));
+ auto Pat = std::make_unique<InstructionPattern>(Instr, Name);
+
+ for (const auto &NameInit : Matcher->getArgNames())
+ Pat->addOperand(NameInit->getAsUnquotedString());
+
+ if (!Pat->checkSemantics(RuleDef.getLoc()))
+ return nullptr;
+
+ return std::move(Pat);
+}
+
+std::unique_ptr<Pattern>
+CombineRuleBuilder::parseWipMatchOpcodeMatcher(const Init &Arg,
+ StringRef Name) {
+ const DagInit *Matcher = getDagWithSpecificOperator(Arg, "wip_match_opcode");
+ if (!Matcher)
+ return nullptr;
+
+ if (Matcher->getNumArgs() == 0) {
+ PrintError("Empty wip_match_opcode");
+ return nullptr;
+ }
+
+ // Each argument is an opcode that can match.
+ auto Result = std::make_unique<AnyOpcodePattern>(Name);
+ for (const auto &Arg : Matcher->getArgs()) {
+ Record *OpcodeDef = getDefOfSubClass(*Arg, "Instruction");
+ if (OpcodeDef) {
+ Result->addOpcode(&CGT.getInstruction(OpcodeDef));
+ continue;
+ }
+
+ PrintError("Arguments to wip_match_opcode must be instructions");
+ return nullptr;
+ }
+
+ return std::move(Result);
+}
+
+bool CombineRuleBuilder::emitMatchPattern(CodeExpansions &CE,
+ const InstructionPattern &IP) {
+ auto &M = addRuleMatcher();
+ InstructionMatcher &IM = M.addInstructionMatcher("root");
+ declareInstExpansion(CE, IM, IP.getName());
+
+ DenseSet<const Pattern *> SeenPats;
+ if (!emitInstructionMatchPattern(CE, M, IM, IP, SeenPats))
+ return false;
+
+ // Emit remaining patterns
+ for (auto &[_, Pat] : MatchPats) {
+ if (SeenPats.contains(Pat.get()))
+ continue;
+
+ switch (Pat->getKind()) {
+ case Pattern::K_AnyOpcode:
+ PrintError("wip_match_opcode can not be used with instruction patterns!");
+ return false;
+ case Pattern::K_Inst:
+ cast<InstructionPattern>(Pat.get())->reportUnreachable(RuleDef.getLoc());
+ return false;
+ case Pattern::K_CXX: {
+ addCXXPredicate(IM, CE, *cast<CXXPattern>(Pat.get()));
+ continue;
+ }
+ default:
+ llvm_unreachable("unknown pattern kind!");
+ }
+ }
+
+ return emitApplyPatterns(CE, M);
+}
+
+bool CombineRuleBuilder::emitMatchPattern(CodeExpansions &CE,
+ const AnyOpcodePattern &AOP) {
+
+ for (const CodeGenInstruction *CGI : AOP.insts()) {
+ auto &M = addRuleMatcher("wip_match_opcode alternative '" +
+ CGI->TheDef->getName() + "'");
+
+ InstructionMatcher &IM = M.addInstructionMatcher(AOP.getName());
+ declareInstExpansion(CE, IM, AOP.getName());
+ // declareInstExpansion needs to be identical, otherwise we need to create a
+ // CodeExpansions object here instead.
+ assert(IM.getInsnVarID() == 0);
+
+ IM.addPredicate<InstructionOpcodeMatcher>(CGI);
+
+ // Emit remaining patterns.
+ for (auto &[_, Pat] : MatchPats) {
+ if (Pat.get() == &AOP)
+ continue;
+
+ switch (Pat->getKind()) {
+ case Pattern::K_AnyOpcode:
+ PrintError("wip_match_opcode can only be present once!");
+ return false;
+ case Pattern::K_Inst:
+ cast<InstructionPattern>(Pat.get())->reportUnreachable(
+ RuleDef.getLoc());
+ return false;
+ case Pattern::K_CXX: {
+ addCXXPredicate(IM, CE, *cast<CXXPattern>(Pat.get()));
+ break;
+ }
+ default:
+ llvm_unreachable("unknown pattern kind!");
+ }
+ }
+
+ if (!emitApplyPatterns(CE, M))
+ return false;
+ }
+
+ return true;
+}
+
+bool CombineRuleBuilder::emitApplyPatterns(CodeExpansions &CE, RuleMatcher &M) {
+ for (auto &[_, Pat] : ApplyPats) {
+ switch (Pat->getKind()) {
+ case Pattern::K_AnyOpcode:
+ case Pattern::K_Inst:
+ llvm_unreachable("Unsupported pattern kind in output pattern!");
+ case Pattern::K_CXX: {
+ CXXPattern *CXXPat = cast<CXXPattern>(Pat.get());
+ const auto &ExpandedCode = CXXPat->expandCode(CE, RuleDef.getLoc());
+ M.addAction<CustomCXXAction>(
+ ExpandedCode.getEnumNameWithPrefix(CXXApplyPrefix));
+ continue;
+ }
+ default:
+ llvm_unreachable("Unknown pattern kind!");
+ }
+ }
+
+ return true;
+}
+
+bool CombineRuleBuilder::emitInstructionMatchPattern(
+ CodeExpansions &CE, RuleMatcher &M, InstructionMatcher &IM,
+ const InstructionPattern &P, DenseSet<const Pattern *> &SeenPats) {
+ if (SeenPats.contains(&P))
+ return true;
+
+ SeenPats.insert(&P);
+
+ IM.addPredicate<InstructionOpcodeMatcher>(&P.getInst());
+ declareInstExpansion(CE, IM, P.getName());
+
+ unsigned OpIdx = 0;
+ for (auto &O : P.operands()) {
+ auto &OpTableEntry = OperandTable.find(O.Name)->second;
+
+ OperandMatcher &OM =
+ IM.addOperand(OpIdx++, O.Name, AllocatedTemporariesBaseID++);
+ declareOperandExpansion(CE, OM, O.Name);
+
+ if (O.IsDef)
+ continue;
+
+ if (InstructionPattern *DefPat = OpTableEntry.MatchPat) {
+ auto InstOpM = OM.addPredicate<InstructionOperandMatcher>(M, O.Name);
+ if (!InstOpM) {
+ // TODO: copy-pasted from GlobalISelEmitter.cpp. Is it still relevant
+ // here?
+ PrintError("Nested instruction '" + DefPat->getName() +
+ "' cannot be the same as another operand '" + O.Name + "'");
+ return false;
+ }
+
+ if (!emitInstructionMatchPattern(CE, M, (*InstOpM)->getInsnMatcher(),
+ *DefPat, SeenPats))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+//===- GICombinerEmitter --------------------------------------------------===//
+
+/// This class is essentially the driver. It fetches all TableGen records, calls
+/// CombineRuleBuilder to build the MatchTable's RuleMatchers, then creates the
+/// MatchTable & emits it. It also handles emitting all the supporting code such
+/// as the list of LLTs, the CXXPredicates, etc.
+class GICombinerEmitter final : public GlobalISelMatchTableExecutorEmitter {
+ RecordKeeper &Records;
+ StringRef Name;
+ const CodeGenTarget &Target;
+ Record *Combiner;
+ unsigned NextRuleID = 0;
+
+ // List all combine rules (ID, name) imported.
+ // Note that the combiner rule ID is different from the RuleMatcher ID. The
+ // latter is internal to the MatchTable, the former is the canonical ID of the
+ // combine rule used to disable/enable it.
+ std::vector<std::pair<unsigned, std::string>> AllCombineRules;
+
+ MatchTable buildMatchTable(MutableArrayRef<RuleMatcher> Rules);
+
+ void emitRuleConfigImpl(raw_ostream &OS);
+
+ void emitAdditionalImpl(raw_ostream &OS) override;
+
+ void emitMIPredicateFns(raw_ostream &OS) override;
+ void emitI64ImmPredicateFns(raw_ostream &OS) override;
+ void emitAPFloatImmPredicateFns(raw_ostream &OS) override;
+ void emitAPIntImmPredicateFns(raw_ostream &OS) override;
+ void emitTestSimplePredicate(raw_ostream &OS) override;
+ void emitRunCustomAction(raw_ostream &OS) override;
+
+ void emitAdditionalTemporariesDecl(raw_ostream &OS,
+ StringRef Indent) override;
+
+ const CodeGenTarget &getTarget() const override { return Target; }
+ StringRef getClassName() const override {
+ return Combiner->getValueAsString("Classname");
+ }
+
+ std::string getRuleConfigClassName() const {
+ return getClassName().str() + "RuleConfig";
+ }
+
+ void gatherRules(std::vector<RuleMatcher> &Rules,
+ const std::vector<Record *> &&RulesAndGroups);
+
+public:
+ explicit GICombinerEmitter(RecordKeeper &RK, const CodeGenTarget &Target,
+ StringRef Name, Record *Combiner);
+ ~GICombinerEmitter() {}
+
+ void run(raw_ostream &OS);
+};
+
+void GICombinerEmitter::emitRuleConfigImpl(raw_ostream &OS) {
+ OS << "struct " << getRuleConfigClassName() << " {\n"
+ << " SparseBitVector<> DisabledRules;\n\n"
+ << " bool isRuleEnabled(unsigned RuleID) const;\n"
+ << " bool parseCommandLineOption();\n"
+ << " bool setRuleEnabled(StringRef RuleIdentifier);\n"
+ << " bool setRuleDisabled(StringRef RuleIdentifier);\n"
+ << "};\n\n";
+
+ std::vector<std::pair<std::string, std::string>> Cases;
+ Cases.reserve(AllCombineRules.size());
+
+ for (const auto &[ID, Name] : AllCombineRules)
+ Cases.emplace_back(Name, "return " + to_string(ID) + ";\n");
+
+ OS << "static std::optional<uint64_t> getRuleIdxForIdentifier(StringRef "
+ "RuleIdentifier) {\n"
+ << " uint64_t I;\n"
+ << " // getAtInteger(...) returns false on success\n"
+ << " bool Parsed = !RuleIdentifier.getAsInteger(0, I);\n"
+ << " if (Parsed)\n"
+ << " return I;\n\n"
+ << "#ifndef NDEBUG\n";
+ StringMatcher Matcher("RuleIdentifier", Cases, OS);
+ Matcher.Emit();
+ OS << "#endif // ifndef NDEBUG\n\n"
+ << " return std::nullopt;\n"
+ << "}\n";
+
+ OS << "static std::optional<std::pair<uint64_t, uint64_t>> "
+ "getRuleRangeForIdentifier(StringRef RuleIdentifier) {\n"
+ << " std::pair<StringRef, StringRef> RangePair = "
+ "RuleIdentifier.split('-');\n"
+ << " if (!RangePair.second.empty()) {\n"
+ << " const auto First = "
+ "getRuleIdxForIdentifier(RangePair.first);\n"
+ << " const auto Last = "
+ "getRuleIdxForIdentifier(RangePair.second);\n"
+ << " if (!First || !Last)\n"
+ << " return std::nullopt;\n"
+ << " if (First >= Last)\n"
+ << " report_fatal_error(\"Beginning of range should be before "
+ "end of range\");\n"
+ << " return {{*First, *Last + 1}};\n"
+ << " }\n"
+ << " if (RangePair.first == \"*\") {\n"
+ << " return {{0, " << AllCombineRules.size() << "}};\n"
+ << " }\n"
+ << " const auto I = getRuleIdxForIdentifier(RangePair.first);\n"
+ << " if (!I)\n"
+ << " return std::nullopt;\n"
+ << " return {{*I, *I + 1}};\n"
+ << "}\n\n";
+
+ for (bool Enabled : {true, false}) {
+ OS << "bool " << getRuleConfigClassName() << "::setRule"
+ << (Enabled ? "Enabled" : "Disabled") << "(StringRef RuleIdentifier) {\n"
+ << " auto MaybeRange = getRuleRangeForIdentifier(RuleIdentifier);\n"
+ << " if (!MaybeRange)\n"
+ << " return false;\n"
+ << " for (auto I = MaybeRange->first; I < MaybeRange->second; ++I)\n"
+ << " DisabledRules." << (Enabled ? "reset" : "set") << "(I);\n"
+ << " return true;\n"
+ << "}\n\n";
+ }
+
+ OS << "static std::vector<std::string> " << Name << "Option;\n"
+ << "static cl::list<std::string> " << Name << "DisableOption(\n"
+ << " \"" << Name.lower() << "-disable-rule\",\n"
+ << " cl::desc(\"Disable one or more combiner rules temporarily in "
+ << "the " << Name << " pass\"),\n"
+ << " cl::CommaSeparated,\n"
+ << " cl::Hidden,\n"
+ << " cl::cat(GICombinerOptionCategory),\n"
+ << " cl::callback([](const std::string &Str) {\n"
+ << " " << Name << "Option.push_back(Str);\n"
+ << " }));\n"
+ << "static cl::list<std::string> " << Name << "OnlyEnableOption(\n"
+ << " \"" << Name.lower() << "-only-enable-rule\",\n"
+ << " cl::desc(\"Disable all rules in the " << Name
+ << " pass then re-enable the specified ones\"),\n"
+ << " cl::Hidden,\n"
+ << " cl::cat(GICombinerOptionCategory),\n"
+ << " cl::callback([](const std::string &CommaSeparatedArg) {\n"
+ << " StringRef Str = CommaSeparatedArg;\n"
+ << " " << Name << "Option.push_back(\"*\");\n"
+ << " do {\n"
+ << " auto X = Str.split(\",\");\n"
+ << " " << Name << "Option.push_back((\"!\" + X.first).str());\n"
+ << " Str = X.second;\n"
+ << " } while (!Str.empty());\n"
+ << " }));\n"
+ << "\n\n"
+ << "bool " << getRuleConfigClassName()
+ << "::isRuleEnabled(unsigned RuleID) const {\n"
+ << " return !DisabledRules.test(RuleID);\n"
+ << "}\n"
+ << "bool " << getRuleConfigClassName() << "::parseCommandLineOption() {\n"
+ << " for (StringRef Identifier : " << Name << "Option) {\n"
+ << " bool Enabled = Identifier.consume_front(\"!\");\n"
+ << " if (Enabled && !setRuleEnabled(Identifier))\n"
+ << " return false;\n"
+ << " if (!Enabled && !setRuleDisabled(Identifier))\n"
+ << " return false;\n"
+ << " }\n"
+ << " return true;\n"
+ << "}\n\n";
+}
+
+void GICombinerEmitter::emitAdditionalImpl(raw_ostream &OS) {
+ OS << "bool " << getClassName()
+ << "::tryCombineAll(MachineInstr &I) const {\n"
+ << " const TargetSubtargetInfo &ST = MF.getSubtarget();\n"
+ << " const PredicateBitset AvailableFeatures = "
+ "getAvailableFeatures();\n"
+ << " NewMIVector OutMIs;\n"
+ << " State.MIs.clear();\n"
+ << " State.MIs.push_back(&I);\n"
+ << " " << MatchDataInfo::StructName << " = "
+ << MatchDataInfo::StructTypeName << "();\n\n"
+ << " if (executeMatchTable(*this, OutMIs, State, ExecInfo"
+ << ", getMatchTable(), *ST.getInstrInfo(), MRI, "
+ "*MRI.getTargetRegisterInfo(), *ST.getRegBankInfo(), AvailableFeatures"
+ << ", /*CoverageInfo*/ nullptr)) {\n"
+ << " return true;\n"
+ << " }\n\n"
+ << " return false;\n"
+ << "}\n\n";
+}
+
+void GICombinerEmitter::emitMIPredicateFns(raw_ostream &OS) {
+ auto MatchCode = getSorted(AllCXXMatchCode);
+ emitMIPredicateFnsImpl<const CXXPredicateCode *>(
+ OS, "", ArrayRef<const CXXPredicateCode *>(MatchCode),
+ [](const CXXPredicateCode *C) -> StringRef { return C->BaseEnumName; },
+ [](const CXXPredicateCode *C) -> StringRef { return C->Code; });
+}
+
+void GICombinerEmitter::emitI64ImmPredicateFns(raw_ostream &OS) {
+ // Unused, but still needs to be called.
+ emitImmPredicateFnsImpl<unsigned>(
+ OS, "I64", "int64_t", {}, [](unsigned) { return ""; },
+ [](unsigned) { return ""; });
+}
+
+void GICombinerEmitter::emitAPFloatImmPredicateFns(raw_ostream &OS) {
+ // Unused, but still needs to be called.
+ emitImmPredicateFnsImpl<unsigned>(
+ OS, "APFloat", "const APFloat &", {}, [](unsigned) { return ""; },
+ [](unsigned) { return ""; });
+}
+
+void GICombinerEmitter::emitAPIntImmPredicateFns(raw_ostream &OS) {
+ // Unused, but still needs to be called.
+ emitImmPredicateFnsImpl<unsigned>(
+ OS, "APInt", "const APInt &", {}, [](unsigned) { return ""; },
+ [](unsigned) { return ""; });
+}
+
+void GICombinerEmitter::emitTestSimplePredicate(raw_ostream &OS) {
+ if (!AllCombineRules.empty()) {
+ OS << "enum {\n";
+ std::string EnumeratorSeparator = " = GICXXPred_Invalid + 1,\n";
+ // To avoid emitting a switch, we expect that all those rules are in order.
+ // That way we can just get the RuleID from the enum by subtracting
+ // (GICXXPred_Invalid + 1).
+ unsigned ExpectedID = 0;
+ (void)ExpectedID;
+ for (const auto &[ID, _] : AllCombineRules) {
+ assert(ExpectedID++ == ID && "combine rules are not ordered!");
+ OS << " " << getIsEnabledPredicateEnumName(ID) << EnumeratorSeparator;
+ EnumeratorSeparator = ",\n";
+ }
+ OS << "};\n\n";
+ }
+
+ OS << "bool " << getClassName()
+ << "::testSimplePredicate(unsigned Predicate) const {\n"
+ << " return RuleConfig.isRuleEnabled(Predicate - "
+ "GICXXPred_Invalid - "
+ "1);\n"
+ << "}\n";
+}
+
+void GICombinerEmitter::emitRunCustomAction(raw_ostream &OS) {
+ const auto ApplyCode = getSorted(AllCXXApplyCode);
+
+ if (!ApplyCode.empty()) {
+ OS << "enum {\n";
+ std::string EnumeratorSeparator = " = GICXXCustomAction_Invalid + 1,\n";
+ for (const auto &Apply : ApplyCode) {
+ OS << " " << Apply->getEnumNameWithPrefix(CXXApplyPrefix)
+ << EnumeratorSeparator;
+ EnumeratorSeparator = ",\n";
+ }
+ OS << "};\n";
+ }
+
+ OS << "void " << getClassName()
+ << "::runCustomAction(unsigned ApplyID, const MatcherState &State) const "
+ "{\n";
+ if (!ApplyCode.empty()) {
+ OS << " switch(ApplyID) {\n";
+ for (const auto &Apply : ApplyCode) {
+ OS << " case " << Apply->getEnumNameWithPrefix(CXXApplyPrefix) << ":{\n"
+ << " " << Apply->Code << "\n"
+ << " return;\n";
+ OS << " }\n";
+ }
+ OS << "}\n";
+ }
+ OS << " llvm_unreachable(\"Unknown Apply Action\");\n"
+ << "}\n";
+}
+
+void GICombinerEmitter::emitAdditionalTemporariesDecl(raw_ostream &OS,
+ StringRef Indent) {
+ OS << Indent << "struct " << MatchDataInfo::StructTypeName << " {\n";
+ for (const auto &[Type, VarNames] : AllMatchDataVars) {
+ assert(!VarNames.empty() && "Cannot have no vars for this type!");
+ OS << Indent << " " << Type << " " << join(VarNames, ", ") << ";\n";
+ }
+ OS << Indent << "};\n"
+ << Indent << "mutable " << MatchDataInfo::StructTypeName << " "
+ << MatchDataInfo::StructName << ";\n\n";
+}
+
+GICombinerEmitter::GICombinerEmitter(RecordKeeper &RK,
+ const CodeGenTarget &Target,
+ StringRef Name, Record *Combiner)
+ : Records(RK), Name(Name), Target(Target), Combiner(Combiner) {}
+
+MatchTable
+GICombinerEmitter::buildMatchTable(MutableArrayRef<RuleMatcher> Rules) {
+ std::vector<Matcher *> InputRules;
+ for (Matcher &Rule : Rules)
+ InputRules.push_back(&Rule);
+
+ unsigned CurrentOrdering = 0;
+ StringMap<unsigned> OpcodeOrder;
+ for (RuleMatcher &Rule : Rules) {
+ const StringRef Opcode = Rule.getOpcode();
+ assert(!Opcode.empty() && "Didn't expect an undefined opcode");
+ if (OpcodeOrder.count(Opcode) == 0)
+ OpcodeOrder[Opcode] = CurrentOrdering++;
+ }
+
+ llvm::stable_sort(InputRules, [&OpcodeOrder](const Matcher *A,
+ const Matcher *B) {
+ auto *L = static_cast<const RuleMatcher *>(A);
+ auto *R = static_cast<const RuleMatcher *>(B);
+ return std::make_tuple(OpcodeOrder[L->getOpcode()], L->getNumOperands()) <
+ std::make_tuple(OpcodeOrder[R->getOpcode()], R->getNumOperands());
+ });
+
+ for (Matcher *Rule : InputRules)
+ Rule->optimize();
+
+ std::vector<std::unique_ptr<Matcher>> MatcherStorage;
+ std::vector<Matcher *> OptRules =
+ optimizeRules<GroupMatcher>(InputRules, MatcherStorage);
+
+ for (Matcher *Rule : OptRules)
+ Rule->optimize();
+
+ OptRules = optimizeRules<SwitchMatcher>(OptRules, MatcherStorage);
+
+ return MatchTable::buildTable(OptRules, /*WithCoverage*/ false,
+ /*IsCombiner*/ true);
+}
+
+/// Recurse into GICombineGroup's and flatten the ruleset into a simple list.
+void GICombinerEmitter::gatherRules(
+ std::vector<RuleMatcher> &ActiveRules,
+ const std::vector<Record *> &&RulesAndGroups) {
+ for (Record *R : RulesAndGroups) {
+ if (R->isValueUnset("Rules")) {
+ AllCombineRules.emplace_back(NextRuleID, R->getName().str());
+ CombineRuleBuilder CRB(Target, SubtargetFeatures, *R, NextRuleID++,
+ ActiveRules);
+
+ if (!CRB.parseAll())
+ continue;
+
+ if (StopAfterParse) {
+ CRB.print(outs());
+ continue;
+ }
+
+ if (!CRB.emitRuleMatchers())
+ continue;
+ } else
+ gatherRules(ActiveRules, R->getValueAsListOfDefs("Rules"));
+ }
+}
+
+void GICombinerEmitter::run(raw_ostream &OS) {
+ Records.startTimer("Gather rules");
+ std::vector<RuleMatcher> Rules;
+ gatherRules(Rules, Combiner->getValueAsListOfDefs("Rules"));
+ if (ErrorsPrinted)
+ PrintFatalError(Combiner->getLoc(), "Failed to parse one or more rules");
+
+ Records.startTimer("Creating Match Table");
+ unsigned MaxTemporaries = 0;
+ for (const auto &Rule : Rules)
+ MaxTemporaries = std::max(MaxTemporaries, Rule.countRendererFns());
+
+ const MatchTable Table = buildMatchTable(Rules);
+
+ Records.startTimer("Emit combiner");
+
+ emitSourceFileHeader(getClassName().str() + " Combiner Match Table", OS);
+
+ // Unused
+ std::vector<StringRef> CustomRendererFns;
+ // Unused, but hack to avoid empty declarator
+ std::vector<LLTCodeGen> TypeObjects = {LLTCodeGen(LLT::scalar(1))};
+ // Unused
+ std::vector<Record *> ComplexPredicates;
+
+ // GET_GICOMBINER_DEPS, which pulls in extra dependencies.
+ OS << "#ifdef GET_GICOMBINER_DEPS\n"
+ << "#include \"llvm/ADT/SparseBitVector.h\"\n"
+ << "namespace llvm {\n"
+ << "extern cl::OptionCategory GICombinerOptionCategory;\n"
+ << "} // end namespace llvm\n"
+ << "#endif // ifdef GET_GICOMBINER_DEPS\n\n";
+
+ // GET_GICOMBINER_TYPES, which needs to be included before the declaration of
+ // the class.
+ OS << "#ifdef GET_GICOMBINER_TYPES\n";
+ emitRuleConfigImpl(OS);
+ OS << "#endif // ifdef GET_GICOMBINER_TYPES\n\n";
+ emitPredicateBitset(OS, "GET_GICOMBINER_TYPES");
+
+ // GET_GICOMBINER_CLASS_MEMBERS, which need to be included inside the class.
+ emitPredicatesDecl(OS, "GET_GICOMBINER_CLASS_MEMBERS");
+ emitTemporariesDecl(OS, "GET_GICOMBINER_CLASS_MEMBERS");
+
+ // GET_GICOMBINER_IMPL, which needs to be included outside the class.
+ emitExecutorImpl(OS, Table, TypeObjects, Rules, ComplexPredicates,
+ CustomRendererFns, "GET_GICOMBINER_IMPL");
+
+ // GET_GICOMBINER_CONSTRUCTOR_INITS, which are in the constructor's
+ // initializer list.
+ emitPredicatesInit(OS, "GET_GICOMBINER_CONSTRUCTOR_INITS");
+ emitTemporariesInit(OS, MaxTemporaries, "GET_GICOMBINER_CONSTRUCTOR_INITS");
+}
+
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+
+static void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS) {
+ CodeGenTarget Target(RK);
+
+ if (SelectedCombiners.empty())
+ PrintFatalError("No combiners selected with -combiners");
+ for (const auto &Combiner : SelectedCombiners) {
+ Record *CombinerDef = RK.getDef(Combiner);
+ if (!CombinerDef)
+ PrintFatalError("Could not find " + Combiner);
+ GICombinerEmitter(RK, Target, Combiner, CombinerDef).run(OS);
+ }
+}
+
+static TableGen::Emitter::Opt X("gen-global-isel-combiner-matchtable",
+ EmitGICombiner,
+ "Generate GlobalISel combiner Match Table");
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index c79c79948a80..3bdcfec06e24 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -8,7 +8,7 @@
//
/// \file
/// This tablegen backend emits code for use by the GlobalISel instruction
-/// selector. See include/llvm/CodeGen/TargetGlobalISel.td.
+/// selector. See include/llvm/Target/GlobalISel/Target.td.
///
/// This file analyzes the patterns recognized by the SelectionDAGISel tablegen
/// backend, filters out the ones that are unsupported, maps
@@ -31,28 +31,39 @@
#include "CodeGenDAGPatterns.h"
#include "CodeGenInstruction.h"
+#include "CodeGenIntrinsics.h"
+#include "CodeGenRegisters.h"
+#include "CodeGenTarget.h"
+#include "GlobalISelMatchTable.h"
+#include "GlobalISelMatchTableExecutorEmitter.h"
+#include "InfoByHwMode.h"
#include "SubtargetFeatureInfo.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/Support/CodeGenCoverage.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Error.h"
-#include "llvm/Support/LowLevelTypeImpl.h"
-#include "llvm/Support/MachineValueType.h"
+#include "llvm/Support/SaveAndRestore.h"
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <numeric>
#include <string>
+
using namespace llvm;
+using namespace llvm::gi;
+
+using action_iterator = RuleMatcher::action_iterator;
#define DEBUG_TYPE "gisel-emitter"
STATISTIC(NumPatternTotal, "Total number of patterns");
STATISTIC(NumPatternImported, "Number of patterns imported from SelectionDAG");
STATISTIC(NumPatternImportsSkipped, "Number of SelectionDAG imports skipped");
-STATISTIC(NumPatternsTested, "Number of patterns executed according to coverage information");
-STATISTIC(NumPatternEmitted, "Number of patterns emitted");
+STATISTIC(NumPatternsTested,
+ "Number of patterns executed according to coverage information");
cl::OptionCategory GlobalISelEmitterCat("Options for -gen-global-isel");
@@ -78,140 +89,6 @@ static cl::opt<bool> OptimizeMatchTable(
cl::init(true), cl::cat(GlobalISelEmitterCat));
namespace {
-//===- Helper functions ---------------------------------------------------===//
-
-/// Get the name of the enum value used to number the predicate function.
-std::string getEnumNameForPredicate(const TreePredicateFn &Predicate) {
- if (Predicate.hasGISelPredicateCode())
- return "GIPFP_MI_" + Predicate.getFnName();
- return "GIPFP_" + Predicate.getImmTypeIdentifier().str() + "_" +
- Predicate.getFnName();
-}
-
-/// Get the opcode used to check this predicate.
-std::string getMatchOpcodeForImmPredicate(const TreePredicateFn &Predicate) {
- return "GIM_Check" + Predicate.getImmTypeIdentifier().str() + "ImmPredicate";
-}
-
-/// This class stands in for LLT wherever we want to tablegen-erate an
-/// equivalent at compiler run-time.
-class LLTCodeGen {
-private:
- LLT Ty;
-
-public:
- LLTCodeGen() = default;
- LLTCodeGen(const LLT &Ty) : Ty(Ty) {}
-
- std::string getCxxEnumValue() const {
- std::string Str;
- raw_string_ostream OS(Str);
-
- emitCxxEnumValue(OS);
- return Str;
- }
-
- void emitCxxEnumValue(raw_ostream &OS) const {
- if (Ty.isScalar()) {
- OS << "GILLT_s" << Ty.getSizeInBits();
- return;
- }
- if (Ty.isVector()) {
- OS << (Ty.isScalable() ? "GILLT_nxv" : "GILLT_v")
- << Ty.getElementCount().getKnownMinValue() << "s"
- << Ty.getScalarSizeInBits();
- return;
- }
- if (Ty.isPointer()) {
- OS << "GILLT_p" << Ty.getAddressSpace();
- if (Ty.getSizeInBits() > 0)
- OS << "s" << Ty.getSizeInBits();
- return;
- }
- llvm_unreachable("Unhandled LLT");
- }
-
- void emitCxxConstructorCall(raw_ostream &OS) const {
- if (Ty.isScalar()) {
- OS << "LLT::scalar(" << Ty.getSizeInBits() << ")";
- return;
- }
- if (Ty.isVector()) {
- OS << "LLT::vector("
- << (Ty.isScalable() ? "ElementCount::getScalable("
- : "ElementCount::getFixed(")
- << Ty.getElementCount().getKnownMinValue() << "), "
- << Ty.getScalarSizeInBits() << ")";
- return;
- }
- if (Ty.isPointer() && Ty.getSizeInBits() > 0) {
- OS << "LLT::pointer(" << Ty.getAddressSpace() << ", "
- << Ty.getSizeInBits() << ")";
- return;
- }
- llvm_unreachable("Unhandled LLT");
- }
-
- const LLT &get() const { return Ty; }
-
- /// This ordering is used for std::unique() and llvm::sort(). There's no
- /// particular logic behind the order but either A < B or B < A must be
- /// true if A != B.
- bool operator<(const LLTCodeGen &Other) const {
- if (Ty.isValid() != Other.Ty.isValid())
- return Ty.isValid() < Other.Ty.isValid();
- if (!Ty.isValid())
- return false;
-
- if (Ty.isVector() != Other.Ty.isVector())
- return Ty.isVector() < Other.Ty.isVector();
- if (Ty.isScalar() != Other.Ty.isScalar())
- return Ty.isScalar() < Other.Ty.isScalar();
- if (Ty.isPointer() != Other.Ty.isPointer())
- return Ty.isPointer() < Other.Ty.isPointer();
-
- if (Ty.isPointer() && Ty.getAddressSpace() != Other.Ty.getAddressSpace())
- return Ty.getAddressSpace() < Other.Ty.getAddressSpace();
-
- if (Ty.isVector() && Ty.getElementCount() != Other.Ty.getElementCount())
- return std::make_tuple(Ty.isScalable(),
- Ty.getElementCount().getKnownMinValue()) <
- std::make_tuple(Other.Ty.isScalable(),
- Other.Ty.getElementCount().getKnownMinValue());
-
- assert((!Ty.isVector() || Ty.isScalable() == Other.Ty.isScalable()) &&
- "Unexpected mismatch of scalable property");
- return Ty.isVector()
- ? std::make_tuple(Ty.isScalable(),
- Ty.getSizeInBits().getKnownMinValue()) <
- std::make_tuple(
- Other.Ty.isScalable(),
- Other.Ty.getSizeInBits().getKnownMinValue())
- : Ty.getSizeInBits().getFixedValue() <
- Other.Ty.getSizeInBits().getFixedValue();
- }
-
- bool operator==(const LLTCodeGen &B) const { return Ty == B.Ty; }
-};
-
-// Track all types that are used so we can emit the corresponding enum.
-std::set<LLTCodeGen> KnownTypes;
-
-class InstructionMatcher;
-/// Convert an MVT to an equivalent LLT if possible, or the invalid LLT() for
-/// MVTs that don't map cleanly to an LLT (e.g., iPTR, *any, ...).
-static std::optional<LLTCodeGen> MVTToLLT(MVT::SimpleValueType SVT) {
- MVT VT(SVT);
-
- if (VT.isVector() && !VT.getVectorElementCount().isScalar())
- return LLTCodeGen(
- LLT::vector(VT.getVectorElementCount(), VT.getScalarSizeInBits()));
-
- if (VT.isInteger() || VT.isFloatingPoint())
- return LLTCodeGen(LLT::scalar(VT.getSizeInBits()));
-
- return std::nullopt;
-}
static std::string explainPredicates(const TreePatternNode *N) {
std::string Explanation;
@@ -401,3150 +278,10 @@ static Record *getInitValueAsRegClass(Init *V) {
return nullptr;
}
-std::string
-getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset) {
- std::string Name = "GIFBS";
- for (const auto &Feature : FeatureBitset)
- Name += ("_" + Feature->getName()).str();
- return Name;
-}
-
static std::string getScopedName(unsigned Scope, const std::string &Name) {
return ("pred:" + Twine(Scope) + ":" + Name).str();
}
-//===- MatchTable Helpers -------------------------------------------------===//
-
-class MatchTable;
-
-/// A record to be stored in a MatchTable.
-///
-/// This class represents any and all output that may be required to emit the
-/// MatchTable. Instances are most often configured to represent an opcode or
-/// value that will be emitted to the table with some formatting but it can also
-/// represent commas, comments, and other formatting instructions.
-struct MatchTableRecord {
- enum RecordFlagsBits {
- MTRF_None = 0x0,
- /// Causes EmitStr to be formatted as comment when emitted.
- MTRF_Comment = 0x1,
- /// Causes the record value to be followed by a comma when emitted.
- MTRF_CommaFollows = 0x2,
- /// Causes the record value to be followed by a line break when emitted.
- MTRF_LineBreakFollows = 0x4,
- /// Indicates that the record defines a label and causes an additional
- /// comment to be emitted containing the index of the label.
- MTRF_Label = 0x8,
- /// Causes the record to be emitted as the index of the label specified by
- /// LabelID along with a comment indicating where that label is.
- MTRF_JumpTarget = 0x10,
- /// Causes the formatter to add a level of indentation before emitting the
- /// record.
- MTRF_Indent = 0x20,
- /// Causes the formatter to remove a level of indentation after emitting the
- /// record.
- MTRF_Outdent = 0x40,
- };
-
- /// When MTRF_Label or MTRF_JumpTarget is used, indicates a label id to
- /// reference or define.
- unsigned LabelID;
- /// The string to emit. Depending on the MTRF_* flags it may be a comment, a
- /// value, a label name.
- std::string EmitStr;
-
-private:
- /// The number of MatchTable elements described by this record. Comments are 0
- /// while values are typically 1. Values >1 may occur when we need to emit
- /// values that exceed the size of a MatchTable element.
- unsigned NumElements;
-
-public:
- /// A bitfield of RecordFlagsBits flags.
- unsigned Flags;
-
- /// The actual run-time value, if known
- int64_t RawValue;
-
- MatchTableRecord(std::optional<unsigned> LabelID_, StringRef EmitStr,
- unsigned NumElements, unsigned Flags,
- int64_t RawValue = std::numeric_limits<int64_t>::min())
- : LabelID(LabelID_.value_or(~0u)), EmitStr(EmitStr),
- NumElements(NumElements), Flags(Flags), RawValue(RawValue) {
- assert((!LabelID_ || LabelID != ~0u) &&
- "This value is reserved for non-labels");
- }
- MatchTableRecord(const MatchTableRecord &Other) = default;
- MatchTableRecord(MatchTableRecord &&Other) = default;
-
- /// Useful if a Match Table Record gets optimized out
- void turnIntoComment() {
- Flags |= MTRF_Comment;
- Flags &= ~MTRF_CommaFollows;
- NumElements = 0;
- }
-
- /// For Jump Table generation purposes
- bool operator<(const MatchTableRecord &Other) const {
- return RawValue < Other.RawValue;
- }
- int64_t getRawValue() const { return RawValue; }
-
- void emit(raw_ostream &OS, bool LineBreakNextAfterThis,
- const MatchTable &Table) const;
- unsigned size() const { return NumElements; }
-};
-
-class Matcher;
-
-/// Holds the contents of a generated MatchTable to enable formatting and the
-/// necessary index tracking needed to support GIM_Try.
-class MatchTable {
- /// An unique identifier for the table. The generated table will be named
- /// MatchTable${ID}.
- unsigned ID;
- /// The records that make up the table. Also includes comments describing the
- /// values being emitted and line breaks to format it.
- std::vector<MatchTableRecord> Contents;
- /// The currently defined labels.
- DenseMap<unsigned, unsigned> LabelMap;
- /// Tracks the sum of MatchTableRecord::NumElements as the table is built.
- unsigned CurrentSize = 0;
- /// A unique identifier for a MatchTable label.
- unsigned CurrentLabelID = 0;
- /// Determines if the table should be instrumented for rule coverage tracking.
- bool IsWithCoverage;
-
-public:
- static MatchTableRecord LineBreak;
- static MatchTableRecord Comment(StringRef Comment) {
- return MatchTableRecord(std::nullopt, Comment, 0,
- MatchTableRecord::MTRF_Comment);
- }
- static MatchTableRecord Opcode(StringRef Opcode, int IndentAdjust = 0) {
- unsigned ExtraFlags = 0;
- if (IndentAdjust > 0)
- ExtraFlags |= MatchTableRecord::MTRF_Indent;
- if (IndentAdjust < 0)
- ExtraFlags |= MatchTableRecord::MTRF_Outdent;
-
- return MatchTableRecord(std::nullopt, Opcode, 1,
- MatchTableRecord::MTRF_CommaFollows | ExtraFlags);
- }
- static MatchTableRecord NamedValue(StringRef NamedValue) {
- return MatchTableRecord(std::nullopt, NamedValue, 1,
- MatchTableRecord::MTRF_CommaFollows);
- }
- static MatchTableRecord NamedValue(StringRef NamedValue, int64_t RawValue) {
- return MatchTableRecord(std::nullopt, NamedValue, 1,
- MatchTableRecord::MTRF_CommaFollows, RawValue);
- }
- static MatchTableRecord NamedValue(StringRef Namespace,
- StringRef NamedValue) {
- return MatchTableRecord(std::nullopt, (Namespace + "::" + NamedValue).str(),
- 1, MatchTableRecord::MTRF_CommaFollows);
- }
- static MatchTableRecord NamedValue(StringRef Namespace, StringRef NamedValue,
- int64_t RawValue) {
- return MatchTableRecord(std::nullopt, (Namespace + "::" + NamedValue).str(),
- 1, MatchTableRecord::MTRF_CommaFollows, RawValue);
- }
- static MatchTableRecord IntValue(int64_t IntValue) {
- return MatchTableRecord(std::nullopt, llvm::to_string(IntValue), 1,
- MatchTableRecord::MTRF_CommaFollows);
- }
- static MatchTableRecord Label(unsigned LabelID) {
- return MatchTableRecord(LabelID, "Label " + llvm::to_string(LabelID), 0,
- MatchTableRecord::MTRF_Label |
- MatchTableRecord::MTRF_Comment |
- MatchTableRecord::MTRF_LineBreakFollows);
- }
- static MatchTableRecord JumpTarget(unsigned LabelID) {
- return MatchTableRecord(LabelID, "Label " + llvm::to_string(LabelID), 1,
- MatchTableRecord::MTRF_JumpTarget |
- MatchTableRecord::MTRF_Comment |
- MatchTableRecord::MTRF_CommaFollows);
- }
-
- static MatchTable buildTable(ArrayRef<Matcher *> Rules, bool WithCoverage);
-
- MatchTable(bool WithCoverage, unsigned ID = 0)
- : ID(ID), IsWithCoverage(WithCoverage) {}
-
- bool isWithCoverage() const { return IsWithCoverage; }
-
- void push_back(const MatchTableRecord &Value) {
- if (Value.Flags & MatchTableRecord::MTRF_Label)
- defineLabel(Value.LabelID);
- Contents.push_back(Value);
- CurrentSize += Value.size();
- }
-
- unsigned allocateLabelID() { return CurrentLabelID++; }
-
- void defineLabel(unsigned LabelID) {
- LabelMap.insert(std::make_pair(LabelID, CurrentSize));
- }
-
- unsigned getLabelIndex(unsigned LabelID) const {
- const auto I = LabelMap.find(LabelID);
- assert(I != LabelMap.end() && "Use of undeclared label");
- return I->second;
- }
-
- void emitUse(raw_ostream &OS) const { OS << "MatchTable" << ID; }
-
- void emitDeclaration(raw_ostream &OS) const {
- unsigned Indentation = 4;
- OS << " constexpr static int64_t MatchTable" << ID << "[] = {";
- LineBreak.emit(OS, true, *this);
- OS << std::string(Indentation, ' ');
-
- for (auto I = Contents.begin(), E = Contents.end(); I != E;
- ++I) {
- bool LineBreakIsNext = false;
- const auto &NextI = std::next(I);
-
- if (NextI != E) {
- if (NextI->EmitStr == "" &&
- NextI->Flags == MatchTableRecord::MTRF_LineBreakFollows)
- LineBreakIsNext = true;
- }
-
- if (I->Flags & MatchTableRecord::MTRF_Indent)
- Indentation += 2;
-
- I->emit(OS, LineBreakIsNext, *this);
- if (I->Flags & MatchTableRecord::MTRF_LineBreakFollows)
- OS << std::string(Indentation, ' ');
-
- if (I->Flags & MatchTableRecord::MTRF_Outdent)
- Indentation -= 2;
- }
- OS << "};\n";
- }
-};
-
-MatchTableRecord MatchTable::LineBreak = {
- std::nullopt, "" /* Emit String */, 0 /* Elements */,
- MatchTableRecord::MTRF_LineBreakFollows};
-
-void MatchTableRecord::emit(raw_ostream &OS, bool LineBreakIsNextAfterThis,
- const MatchTable &Table) const {
- bool UseLineComment =
- LineBreakIsNextAfterThis || (Flags & MTRF_LineBreakFollows);
- if (Flags & (MTRF_JumpTarget | MTRF_CommaFollows))
- UseLineComment = false;
-
- if (Flags & MTRF_Comment)
- OS << (UseLineComment ? "// " : "/*");
-
- OS << EmitStr;
- if (Flags & MTRF_Label)
- OS << ": @" << Table.getLabelIndex(LabelID);
-
- if ((Flags & MTRF_Comment) && !UseLineComment)
- OS << "*/";
-
- if (Flags & MTRF_JumpTarget) {
- if (Flags & MTRF_Comment)
- OS << " ";
- OS << Table.getLabelIndex(LabelID);
- }
-
- if (Flags & MTRF_CommaFollows) {
- OS << ",";
- if (!LineBreakIsNextAfterThis && !(Flags & MTRF_LineBreakFollows))
- OS << " ";
- }
-
- if (Flags & MTRF_LineBreakFollows)
- OS << "\n";
-}
-
-MatchTable &operator<<(MatchTable &Table, const MatchTableRecord &Value) {
- Table.push_back(Value);
- return Table;
-}
-
-//===- Matchers -----------------------------------------------------------===//
-
-class OperandMatcher;
-class MatchAction;
-class PredicateMatcher;
-
-class Matcher {
-public:
- virtual ~Matcher() = default;
- virtual void optimize() {}
- virtual void emit(MatchTable &Table) = 0;
-
- virtual bool hasFirstCondition() const = 0;
- virtual const PredicateMatcher &getFirstCondition() const = 0;
- virtual std::unique_ptr<PredicateMatcher> popFirstCondition() = 0;
-};
-
-MatchTable MatchTable::buildTable(ArrayRef<Matcher *> Rules,
- bool WithCoverage) {
- MatchTable Table(WithCoverage);
- for (Matcher *Rule : Rules)
- Rule->emit(Table);
-
- return Table << MatchTable::Opcode("GIM_Reject") << MatchTable::LineBreak;
-}
-
-class GroupMatcher final : public Matcher {
- /// Conditions that form a common prefix of all the matchers contained.
- SmallVector<std::unique_ptr<PredicateMatcher>, 1> Conditions;
-
- /// All the nested matchers, sharing a common prefix.
- std::vector<Matcher *> Matchers;
-
- /// An owning collection for any auxiliary matchers created while optimizing
- /// nested matchers contained.
- std::vector<std::unique_ptr<Matcher>> MatcherStorage;
-
-public:
- /// Add a matcher to the collection of nested matchers if it meets the
- /// requirements, and return true. If it doesn't, do nothing and return false.
- ///
- /// Expected to preserve its argument, so it could be moved out later on.
- bool addMatcher(Matcher &Candidate);
-
- /// Mark the matcher as fully-built and ensure any invariants expected by both
- /// optimize() and emit(...) methods. Generally, both sequences of calls
- /// are expected to lead to a sensible result:
- ///
- /// addMatcher(...)*; finalize(); optimize(); emit(...); and
- /// addMatcher(...)*; finalize(); emit(...);
- ///
- /// or generally
- ///
- /// addMatcher(...)*; finalize(); { optimize()*; emit(...); }*
- ///
- /// Multiple calls to optimize() are expected to be handled gracefully, though
- /// optimize() is not expected to be idempotent. Multiple calls to finalize()
- /// aren't generally supported. emit(...) is expected to be non-mutating and
- /// producing the exact same results upon repeated calls.
- ///
- /// addMatcher() calls after the finalize() call are not supported.
- ///
- /// finalize() and optimize() are both allowed to mutate the contained
- /// matchers, so moving them out after finalize() is not supported.
- void finalize();
- void optimize() override;
- void emit(MatchTable &Table) override;
-
- /// Could be used to move out the matchers added previously, unless finalize()
- /// has been already called. If any of the matchers are moved out, the group
- /// becomes safe to destroy, but not safe to re-use for anything else.
- iterator_range<std::vector<Matcher *>::iterator> matchers() {
- return make_range(Matchers.begin(), Matchers.end());
- }
- size_t size() const { return Matchers.size(); }
- bool empty() const { return Matchers.empty(); }
-
- std::unique_ptr<PredicateMatcher> popFirstCondition() override {
- assert(!Conditions.empty() &&
- "Trying to pop a condition from a condition-less group");
- std::unique_ptr<PredicateMatcher> P = std::move(Conditions.front());
- Conditions.erase(Conditions.begin());
- return P;
- }
- const PredicateMatcher &getFirstCondition() const override {
- assert(!Conditions.empty() &&
- "Trying to get a condition from a condition-less group");
- return *Conditions.front();
- }
- bool hasFirstCondition() const override { return !Conditions.empty(); }
-
-private:
- /// See if a candidate matcher could be added to this group solely by
- /// analyzing its first condition.
- bool candidateConditionMatches(const PredicateMatcher &Predicate) const;
-};
-
-class SwitchMatcher : public Matcher {
- /// All the nested matchers, representing distinct switch-cases. The first
- /// conditions (as Matcher::getFirstCondition() reports) of all the nested
- /// matchers must share the same type and path to a value they check, in other
- /// words, be isIdenticalDownToValue, but have different values they check
- /// against.
- std::vector<Matcher *> Matchers;
-
- /// The representative condition, with a type and a path (InsnVarID and OpIdx
- /// in most cases) shared by all the matchers contained.
- std::unique_ptr<PredicateMatcher> Condition = nullptr;
-
- /// Temporary set used to check that the case values don't repeat within the
- /// same switch.
- std::set<MatchTableRecord> Values;
-
- /// An owning collection for any auxiliary matchers created while optimizing
- /// nested matchers contained.
- std::vector<std::unique_ptr<Matcher>> MatcherStorage;
-
-public:
- bool addMatcher(Matcher &Candidate);
-
- void finalize();
- void emit(MatchTable &Table) override;
-
- iterator_range<std::vector<Matcher *>::iterator> matchers() {
- return make_range(Matchers.begin(), Matchers.end());
- }
- size_t size() const { return Matchers.size(); }
- bool empty() const { return Matchers.empty(); }
-
- std::unique_ptr<PredicateMatcher> popFirstCondition() override {
- // SwitchMatcher doesn't have a common first condition for its cases, as all
- // the cases only share a kind of a value (a type and a path to it) they
- // match, but deliberately differ in the actual value they match.
- llvm_unreachable("Trying to pop a condition from a condition-less group");
- }
- const PredicateMatcher &getFirstCondition() const override {
- llvm_unreachable("Trying to pop a condition from a condition-less group");
- }
- bool hasFirstCondition() const override { return false; }
-
-private:
- /// See if the predicate type has a Switch-implementation for it.
- static bool isSupportedPredicateType(const PredicateMatcher &Predicate);
-
- bool candidateConditionMatches(const PredicateMatcher &Predicate) const;
-
- /// emit()-helper
- static void emitPredicateSpecificOpcodes(const PredicateMatcher &P,
- MatchTable &Table);
-};
-
-/// Generates code to check that a match rule matches.
-class RuleMatcher : public Matcher {
-public:
- using ActionList = std::list<std::unique_ptr<MatchAction>>;
- using action_iterator = ActionList::iterator;
-
-protected:
- /// A list of matchers that all need to succeed for the current rule to match.
- /// FIXME: This currently supports a single match position but could be
- /// extended to support multiple positions to support div/rem fusion or
- /// load-multiple instructions.
- using MatchersTy = std::vector<std::unique_ptr<InstructionMatcher>> ;
- MatchersTy Matchers;
-
- /// A list of actions that need to be taken when all predicates in this rule
- /// have succeeded.
- ActionList Actions;
-
- using DefinedInsnVariablesMap = std::map<InstructionMatcher *, unsigned>;
-
- /// A map of instruction matchers to the local variables
- DefinedInsnVariablesMap InsnVariableIDs;
-
- using MutatableInsnSet = SmallPtrSet<InstructionMatcher *, 4>;
-
- // The set of instruction matchers that have not yet been claimed for mutation
- // by a BuildMI.
- MutatableInsnSet MutatableInsns;
-
- /// A map of named operands defined by the matchers that may be referenced by
- /// the renderers.
- StringMap<OperandMatcher *> DefinedOperands;
-
- /// A map of anonymous physical register operands defined by the matchers that
- /// may be referenced by the renderers.
- DenseMap<Record *, OperandMatcher *> PhysRegOperands;
-
- /// ID for the next instruction variable defined with implicitlyDefineInsnVar()
- unsigned NextInsnVarID;
-
- /// ID for the next output instruction allocated with allocateOutputInsnID()
- unsigned NextOutputInsnID;
-
- /// ID for the next temporary register ID allocated with allocateTempRegID()
- unsigned NextTempRegID;
-
- std::vector<Record *> RequiredFeatures;
- std::vector<std::unique_ptr<PredicateMatcher>> EpilogueMatchers;
-
- ArrayRef<SMLoc> SrcLoc;
-
- typedef std::tuple<Record *, unsigned, unsigned>
- DefinedComplexPatternSubOperand;
- typedef StringMap<DefinedComplexPatternSubOperand>
- DefinedComplexPatternSubOperandMap;
- /// A map of Symbolic Names to ComplexPattern sub-operands.
- DefinedComplexPatternSubOperandMap ComplexSubOperands;
- /// A map used to for multiple referenced error check of ComplexSubOperand.
- /// ComplexSubOperand can't be referenced multiple from different operands,
- /// however multiple references from same operand are allowed since that is
- /// how 'same operand checks' are generated.
- StringMap<std::string> ComplexSubOperandsParentName;
-
- uint64_t RuleID;
- static uint64_t NextRuleID;
-
-public:
- RuleMatcher(ArrayRef<SMLoc> SrcLoc)
- : NextInsnVarID(0), NextOutputInsnID(0), NextTempRegID(0), SrcLoc(SrcLoc),
- RuleID(NextRuleID++) {}
- RuleMatcher(RuleMatcher &&Other) = default;
- RuleMatcher &operator=(RuleMatcher &&Other) = default;
-
- uint64_t getRuleID() const { return RuleID; }
-
- InstructionMatcher &addInstructionMatcher(StringRef SymbolicName);
- void addRequiredFeature(Record *Feature);
- const std::vector<Record *> &getRequiredFeatures() const;
-
- template <class Kind, class... Args> Kind &addAction(Args &&... args);
- template <class Kind, class... Args>
- action_iterator insertAction(action_iterator InsertPt, Args &&... args);
-
- /// Define an instruction without emitting any code to do so.
- unsigned implicitlyDefineInsnVar(InstructionMatcher &Matcher);
-
- unsigned getInsnVarID(InstructionMatcher &InsnMatcher) const;
- DefinedInsnVariablesMap::const_iterator defined_insn_vars_begin() const {
- return InsnVariableIDs.begin();
- }
- DefinedInsnVariablesMap::const_iterator defined_insn_vars_end() const {
- return InsnVariableIDs.end();
- }
- iterator_range<typename DefinedInsnVariablesMap::const_iterator>
- defined_insn_vars() const {
- return make_range(defined_insn_vars_begin(), defined_insn_vars_end());
- }
-
- MutatableInsnSet::const_iterator mutatable_insns_begin() const {
- return MutatableInsns.begin();
- }
- MutatableInsnSet::const_iterator mutatable_insns_end() const {
- return MutatableInsns.end();
- }
- iterator_range<typename MutatableInsnSet::const_iterator>
- mutatable_insns() const {
- return make_range(mutatable_insns_begin(), mutatable_insns_end());
- }
- void reserveInsnMatcherForMutation(InstructionMatcher *InsnMatcher) {
- bool R = MutatableInsns.erase(InsnMatcher);
- assert(R && "Reserving a mutatable insn that isn't available");
- (void)R;
- }
-
- action_iterator actions_begin() { return Actions.begin(); }
- action_iterator actions_end() { return Actions.end(); }
- iterator_range<action_iterator> actions() {
- return make_range(actions_begin(), actions_end());
- }
-
- void defineOperand(StringRef SymbolicName, OperandMatcher &OM);
-
- void definePhysRegOperand(Record *Reg, OperandMatcher &OM);
-
- Error defineComplexSubOperand(StringRef SymbolicName, Record *ComplexPattern,
- unsigned RendererID, unsigned SubOperandID,
- StringRef ParentSymbolicName) {
- std::string ParentName(ParentSymbolicName);
- if (ComplexSubOperands.count(SymbolicName)) {
- const std::string &RecordedParentName =
- ComplexSubOperandsParentName[SymbolicName];
- if (RecordedParentName != ParentName)
- return failedImport("Error: Complex suboperand " + SymbolicName +
- " referenced by different operands: " +
- RecordedParentName + " and " + ParentName + ".");
- // Complex suboperand referenced more than once from same the operand is
- // used to generate 'same operand check'. Emitting of
- // GIR_ComplexSubOperandRenderer for them is already handled.
- return Error::success();
- }
-
- ComplexSubOperands[SymbolicName] =
- std::make_tuple(ComplexPattern, RendererID, SubOperandID);
- ComplexSubOperandsParentName[SymbolicName] = ParentName;
-
- return Error::success();
- }
-
- std::optional<DefinedComplexPatternSubOperand>
- getComplexSubOperand(StringRef SymbolicName) const {
- const auto &I = ComplexSubOperands.find(SymbolicName);
- if (I == ComplexSubOperands.end())
- return std::nullopt;
- return I->second;
- }
-
- InstructionMatcher &getInstructionMatcher(StringRef SymbolicName) const;
- const OperandMatcher &getOperandMatcher(StringRef Name) const;
- const OperandMatcher &getPhysRegOperandMatcher(Record *) const;
-
- void optimize() override;
- void emit(MatchTable &Table) override;
-
- /// Compare the priority of this object and B.
- ///
- /// Returns true if this object is more important than B.
- bool isHigherPriorityThan(const RuleMatcher &B) const;
-
- /// Report the maximum number of temporary operands needed by the rule
- /// matcher.
- unsigned countRendererFns() const;
-
- std::unique_ptr<PredicateMatcher> popFirstCondition() override;
- const PredicateMatcher &getFirstCondition() const override;
- LLTCodeGen getFirstConditionAsRootType();
- bool hasFirstCondition() const override;
- unsigned getNumOperands() const;
- StringRef getOpcode() const;
-
- // FIXME: Remove this as soon as possible
- InstructionMatcher &insnmatchers_front() const { return *Matchers.front(); }
-
- unsigned allocateOutputInsnID() { return NextOutputInsnID++; }
- unsigned allocateTempRegID() { return NextTempRegID++; }
-
- iterator_range<MatchersTy::iterator> insnmatchers() {
- return make_range(Matchers.begin(), Matchers.end());
- }
- bool insnmatchers_empty() const { return Matchers.empty(); }
- void insnmatchers_pop_front() { Matchers.erase(Matchers.begin()); }
-};
-
-uint64_t RuleMatcher::NextRuleID = 0;
-
-using action_iterator = RuleMatcher::action_iterator;
-
-template <class PredicateTy> class PredicateListMatcher {
-private:
- /// Template instantiations should specialize this to return a string to use
- /// for the comment emitted when there are no predicates.
- std::string getNoPredicateComment() const;
-
-protected:
- using PredicatesTy = std::deque<std::unique_ptr<PredicateTy>>;
- PredicatesTy Predicates;
-
- /// Track if the list of predicates was manipulated by one of the optimization
- /// methods.
- bool Optimized = false;
-
-public:
- typename PredicatesTy::iterator predicates_begin() {
- return Predicates.begin();
- }
- typename PredicatesTy::iterator predicates_end() {
- return Predicates.end();
- }
- iterator_range<typename PredicatesTy::iterator> predicates() {
- return make_range(predicates_begin(), predicates_end());
- }
- typename PredicatesTy::size_type predicates_size() const {
- return Predicates.size();
- }
- bool predicates_empty() const { return Predicates.empty(); }
-
- std::unique_ptr<PredicateTy> predicates_pop_front() {
- std::unique_ptr<PredicateTy> Front = std::move(Predicates.front());
- Predicates.pop_front();
- Optimized = true;
- return Front;
- }
-
- void prependPredicate(std::unique_ptr<PredicateTy> &&Predicate) {
- Predicates.push_front(std::move(Predicate));
- }
-
- void eraseNullPredicates() {
- const auto NewEnd =
- std::stable_partition(Predicates.begin(), Predicates.end(),
- std::logical_not<std::unique_ptr<PredicateTy>>());
- if (NewEnd != Predicates.begin()) {
- Predicates.erase(Predicates.begin(), NewEnd);
- Optimized = true;
- }
- }
-
- /// Emit MatchTable opcodes that tests whether all the predicates are met.
- template <class... Args>
- void emitPredicateListOpcodes(MatchTable &Table, Args &&... args) {
- if (Predicates.empty() && !Optimized) {
- Table << MatchTable::Comment(getNoPredicateComment())
- << MatchTable::LineBreak;
- return;
- }
-
- for (const auto &Predicate : predicates())
- Predicate->emitPredicateOpcodes(Table, std::forward<Args>(args)...);
- }
-
- /// Provide a function to avoid emitting certain predicates. This is used to
- /// defer some predicate checks until after others
- using PredicateFilterFunc = std::function<bool(const PredicateTy&)>;
-
- /// Emit MatchTable opcodes for predicates which satisfy \p
- /// ShouldEmitPredicate. This should be called multiple times to ensure all
- /// predicates are eventually added to the match table.
- template <class... Args>
- void emitFilteredPredicateListOpcodes(PredicateFilterFunc ShouldEmitPredicate,
- MatchTable &Table, Args &&... args) {
- if (Predicates.empty() && !Optimized) {
- Table << MatchTable::Comment(getNoPredicateComment())
- << MatchTable::LineBreak;
- return;
- }
-
- for (const auto &Predicate : predicates()) {
- if (ShouldEmitPredicate(*Predicate))
- Predicate->emitPredicateOpcodes(Table, std::forward<Args>(args)...);
- }
- }
-};
-
-class PredicateMatcher {
-public:
- /// This enum is used for RTTI and also defines the priority that is given to
- /// the predicate when generating the matcher code. Kinds with higher priority
- /// must be tested first.
- ///
- /// The relative priority of OPM_LLT, OPM_RegBank, and OPM_MBB do not matter
- /// but OPM_Int must have priority over OPM_RegBank since constant integers
- /// are represented by a virtual register defined by a G_CONSTANT instruction.
- ///
- /// Note: The relative priority between IPM_ and OPM_ does not matter, they
- /// are currently not compared between each other.
- enum PredicateKind {
- IPM_Opcode,
- IPM_NumOperands,
- IPM_ImmPredicate,
- IPM_Imm,
- IPM_AtomicOrderingMMO,
- IPM_MemoryLLTSize,
- IPM_MemoryVsLLTSize,
- IPM_MemoryAddressSpace,
- IPM_MemoryAlignment,
- IPM_VectorSplatImm,
- IPM_NoUse,
- IPM_GenericPredicate,
- OPM_SameOperand,
- OPM_ComplexPattern,
- OPM_IntrinsicID,
- OPM_CmpPredicate,
- OPM_Instruction,
- OPM_Int,
- OPM_LiteralInt,
- OPM_LLT,
- OPM_PointerToAny,
- OPM_RegBank,
- OPM_MBB,
- OPM_RecordNamedOperand,
- };
-
-protected:
- PredicateKind Kind;
- unsigned InsnVarID;
- unsigned OpIdx;
-
-public:
- PredicateMatcher(PredicateKind Kind, unsigned InsnVarID, unsigned OpIdx = ~0)
- : Kind(Kind), InsnVarID(InsnVarID), OpIdx(OpIdx) {}
-
- unsigned getInsnVarID() const { return InsnVarID; }
- unsigned getOpIdx() const { return OpIdx; }
-
- virtual ~PredicateMatcher() = default;
- /// Emit MatchTable opcodes that check the predicate for the given operand.
- virtual void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const = 0;
-
- PredicateKind getKind() const { return Kind; }
-
- bool dependsOnOperands() const {
- // Custom predicates really depend on the context pattern of the
- // instruction, not just the individual instruction. This therefore
- // implicitly depends on all other pattern constraints.
- return Kind == IPM_GenericPredicate;
- }
-
- virtual bool isIdentical(const PredicateMatcher &B) const {
- return B.getKind() == getKind() && InsnVarID == B.InsnVarID &&
- OpIdx == B.OpIdx;
- }
-
- virtual bool isIdenticalDownToValue(const PredicateMatcher &B) const {
- return hasValue() && PredicateMatcher::isIdentical(B);
- }
-
- virtual MatchTableRecord getValue() const {
- assert(hasValue() && "Can not get a value of a value-less predicate!");
- llvm_unreachable("Not implemented yet");
- }
- virtual bool hasValue() const { return false; }
-
- /// Report the maximum number of temporary operands needed by the predicate
- /// matcher.
- virtual unsigned countRendererFns() const { return 0; }
-};
-
-/// Generates code to check a predicate of an operand.
-///
-/// Typical predicates include:
-/// * Operand is a particular register.
-/// * Operand is assigned a particular register bank.
-/// * Operand is an MBB.
-class OperandPredicateMatcher : public PredicateMatcher {
-public:
- OperandPredicateMatcher(PredicateKind Kind, unsigned InsnVarID,
- unsigned OpIdx)
- : PredicateMatcher(Kind, InsnVarID, OpIdx) {}
- virtual ~OperandPredicateMatcher() {}
-
- /// Compare the priority of this object and B.
- ///
- /// Returns true if this object is more important than B.
- virtual bool isHigherPriorityThan(const OperandPredicateMatcher &B) const;
-};
-
-template <>
-std::string
-PredicateListMatcher<OperandPredicateMatcher>::getNoPredicateComment() const {
- return "No operand predicates";
-}
-
-/// Generates code to check that a register operand is defined by the same exact
-/// one as another.
-class SameOperandMatcher : public OperandPredicateMatcher {
- std::string MatchingName;
- unsigned OrigOpIdx;
-
-public:
- SameOperandMatcher(unsigned InsnVarID, unsigned OpIdx, StringRef MatchingName,
- unsigned OrigOpIdx)
- : OperandPredicateMatcher(OPM_SameOperand, InsnVarID, OpIdx),
- MatchingName(MatchingName), OrigOpIdx(OrigOpIdx) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == OPM_SameOperand;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override;
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return OperandPredicateMatcher::isIdentical(B) &&
- OrigOpIdx == cast<SameOperandMatcher>(&B)->OrigOpIdx &&
- MatchingName == cast<SameOperandMatcher>(&B)->MatchingName;
- }
-};
-
-/// Generates code to check that an operand is a particular LLT.
-class LLTOperandMatcher : public OperandPredicateMatcher {
-protected:
- LLTCodeGen Ty;
-
-public:
- static std::map<LLTCodeGen, unsigned> TypeIDValues;
-
- static void initTypeIDValuesMap() {
- TypeIDValues.clear();
-
- unsigned ID = 0;
- for (const LLTCodeGen &LLTy : KnownTypes)
- TypeIDValues[LLTy] = ID++;
- }
-
- LLTOperandMatcher(unsigned InsnVarID, unsigned OpIdx, const LLTCodeGen &Ty)
- : OperandPredicateMatcher(OPM_LLT, InsnVarID, OpIdx), Ty(Ty) {
- KnownTypes.insert(Ty);
- }
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == OPM_LLT;
- }
- bool isIdentical(const PredicateMatcher &B) const override {
- return OperandPredicateMatcher::isIdentical(B) &&
- Ty == cast<LLTOperandMatcher>(&B)->Ty;
- }
- MatchTableRecord getValue() const override {
- const auto VI = TypeIDValues.find(Ty);
- if (VI == TypeIDValues.end())
- return MatchTable::NamedValue(getTy().getCxxEnumValue());
- return MatchTable::NamedValue(getTy().getCxxEnumValue(), VI->second);
- }
- bool hasValue() const override {
- if (TypeIDValues.size() != KnownTypes.size())
- initTypeIDValuesMap();
- return TypeIDValues.count(Ty);
- }
-
- LLTCodeGen getTy() const { return Ty; }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckType") << MatchTable::Comment("MI")
- << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op")
- << MatchTable::IntValue(OpIdx) << MatchTable::Comment("Type")
- << getValue() << MatchTable::LineBreak;
- }
-};
-
-std::map<LLTCodeGen, unsigned> LLTOperandMatcher::TypeIDValues;
-
-/// Generates code to check that an operand is a pointer to any address space.
-///
-/// In SelectionDAG, the types did not describe pointers or address spaces. As a
-/// result, iN is used to describe a pointer of N bits to any address space and
-/// PatFrag predicates are typically used to constrain the address space. There's
-/// no reliable means to derive the missing type information from the pattern so
-/// imported rules must test the components of a pointer separately.
-///
-/// If SizeInBits is zero, then the pointer size will be obtained from the
-/// subtarget.
-class PointerToAnyOperandMatcher : public OperandPredicateMatcher {
-protected:
- unsigned SizeInBits;
-
-public:
- PointerToAnyOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
- unsigned SizeInBits)
- : OperandPredicateMatcher(OPM_PointerToAny, InsnVarID, OpIdx),
- SizeInBits(SizeInBits) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == OPM_PointerToAny;
- }
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return OperandPredicateMatcher::isIdentical(B) &&
- SizeInBits == cast<PointerToAnyOperandMatcher>(&B)->SizeInBits;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckPointerToAny")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
- << MatchTable::Comment("SizeInBits")
- << MatchTable::IntValue(SizeInBits) << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to record named operand in RecordedOperands list at StoreIdx.
-/// Predicates with 'let PredicateCodeUsesOperands = 1' get RecordedOperands as
-/// an argument to predicate's c++ code once all operands have been matched.
-class RecordNamedOperandMatcher : public OperandPredicateMatcher {
-protected:
- unsigned StoreIdx;
- std::string Name;
-
-public:
- RecordNamedOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
- unsigned StoreIdx, StringRef Name)
- : OperandPredicateMatcher(OPM_RecordNamedOperand, InsnVarID, OpIdx),
- StoreIdx(StoreIdx), Name(Name) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == OPM_RecordNamedOperand;
- }
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return OperandPredicateMatcher::isIdentical(B) &&
- StoreIdx == cast<RecordNamedOperandMatcher>(&B)->StoreIdx &&
- Name == cast<RecordNamedOperandMatcher>(&B)->Name;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_RecordNamedOperand")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
- << MatchTable::Comment("StoreIdx") << MatchTable::IntValue(StoreIdx)
- << MatchTable::Comment("Name : " + Name) << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check that an operand is a particular target constant.
-class ComplexPatternOperandMatcher : public OperandPredicateMatcher {
-protected:
- const OperandMatcher &Operand;
- const Record &TheDef;
-
- unsigned getAllocatedTemporariesBaseID() const;
-
-public:
- bool isIdentical(const PredicateMatcher &B) const override { return false; }
-
- ComplexPatternOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
- const OperandMatcher &Operand,
- const Record &TheDef)
- : OperandPredicateMatcher(OPM_ComplexPattern, InsnVarID, OpIdx),
- Operand(Operand), TheDef(TheDef) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == OPM_ComplexPattern;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- unsigned ID = getAllocatedTemporariesBaseID();
- Table << MatchTable::Opcode("GIM_CheckComplexPattern")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
- << MatchTable::Comment("Renderer") << MatchTable::IntValue(ID)
- << MatchTable::NamedValue(("GICP_" + TheDef.getName()).str())
- << MatchTable::LineBreak;
- }
-
- unsigned countRendererFns() const override {
- return 1;
- }
-};
-
-/// Generates code to check that an operand is in a particular register bank.
-class RegisterBankOperandMatcher : public OperandPredicateMatcher {
-protected:
- const CodeGenRegisterClass &RC;
-
-public:
- RegisterBankOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
- const CodeGenRegisterClass &RC)
- : OperandPredicateMatcher(OPM_RegBank, InsnVarID, OpIdx), RC(RC) {}
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return OperandPredicateMatcher::isIdentical(B) &&
- RC.getDef() == cast<RegisterBankOperandMatcher>(&B)->RC.getDef();
- }
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == OPM_RegBank;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckRegBankForClass")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
- << MatchTable::Comment("RC")
- << MatchTable::NamedValue(RC.getQualifiedName() + "RegClassID")
- << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check that an operand is a basic block.
-class MBBOperandMatcher : public OperandPredicateMatcher {
-public:
- MBBOperandMatcher(unsigned InsnVarID, unsigned OpIdx)
- : OperandPredicateMatcher(OPM_MBB, InsnVarID, OpIdx) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == OPM_MBB;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckIsMBB") << MatchTable::Comment("MI")
- << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op")
- << MatchTable::IntValue(OpIdx) << MatchTable::LineBreak;
- }
-};
-
-class ImmOperandMatcher : public OperandPredicateMatcher {
-public:
- ImmOperandMatcher(unsigned InsnVarID, unsigned OpIdx)
- : OperandPredicateMatcher(IPM_Imm, InsnVarID, OpIdx) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == IPM_Imm;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckIsImm") << MatchTable::Comment("MI")
- << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op")
- << MatchTable::IntValue(OpIdx) << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check that an operand is a G_CONSTANT with a particular
-/// int.
-class ConstantIntOperandMatcher : public OperandPredicateMatcher {
-protected:
- int64_t Value;
-
-public:
- ConstantIntOperandMatcher(unsigned InsnVarID, unsigned OpIdx, int64_t Value)
- : OperandPredicateMatcher(OPM_Int, InsnVarID, OpIdx), Value(Value) {}
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return OperandPredicateMatcher::isIdentical(B) &&
- Value == cast<ConstantIntOperandMatcher>(&B)->Value;
- }
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == OPM_Int;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckConstantInt")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
- << MatchTable::IntValue(Value) << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check that an operand is a raw int (where MO.isImm() or
-/// MO.isCImm() is true).
-class LiteralIntOperandMatcher : public OperandPredicateMatcher {
-protected:
- int64_t Value;
-
-public:
- LiteralIntOperandMatcher(unsigned InsnVarID, unsigned OpIdx, int64_t Value)
- : OperandPredicateMatcher(OPM_LiteralInt, InsnVarID, OpIdx),
- Value(Value) {}
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return OperandPredicateMatcher::isIdentical(B) &&
- Value == cast<LiteralIntOperandMatcher>(&B)->Value;
- }
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == OPM_LiteralInt;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckLiteralInt")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
- << MatchTable::IntValue(Value) << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check that an operand is an CmpInst predicate
-class CmpPredicateOperandMatcher : public OperandPredicateMatcher {
-protected:
- std::string PredName;
-
-public:
- CmpPredicateOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
- std::string P)
- : OperandPredicateMatcher(OPM_CmpPredicate, InsnVarID, OpIdx), PredName(P) {}
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return OperandPredicateMatcher::isIdentical(B) &&
- PredName == cast<CmpPredicateOperandMatcher>(&B)->PredName;
- }
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == OPM_CmpPredicate;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckCmpPredicate")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
- << MatchTable::Comment("Predicate")
- << MatchTable::NamedValue("CmpInst", PredName)
- << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check that an operand is an intrinsic ID.
-class IntrinsicIDOperandMatcher : public OperandPredicateMatcher {
-protected:
- const CodeGenIntrinsic *II;
-
-public:
- IntrinsicIDOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
- const CodeGenIntrinsic *II)
- : OperandPredicateMatcher(OPM_IntrinsicID, InsnVarID, OpIdx), II(II) {}
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return OperandPredicateMatcher::isIdentical(B) &&
- II == cast<IntrinsicIDOperandMatcher>(&B)->II;
- }
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == OPM_IntrinsicID;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckIntrinsicID")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
- << MatchTable::NamedValue("Intrinsic::" + II->EnumName)
- << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check that this operand is an immediate whose value meets
-/// an immediate predicate.
-class OperandImmPredicateMatcher : public OperandPredicateMatcher {
-protected:
- TreePredicateFn Predicate;
-
-public:
- OperandImmPredicateMatcher(unsigned InsnVarID, unsigned OpIdx,
- const TreePredicateFn &Predicate)
- : OperandPredicateMatcher(IPM_ImmPredicate, InsnVarID, OpIdx),
- Predicate(Predicate) {}
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return OperandPredicateMatcher::isIdentical(B) &&
- Predicate.getOrigPatFragRecord() ==
- cast<OperandImmPredicateMatcher>(&B)
- ->Predicate.getOrigPatFragRecord();
- }
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == IPM_ImmPredicate;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckImmOperandPredicate")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("MO") << MatchTable::IntValue(OpIdx)
- << MatchTable::Comment("Predicate")
- << MatchTable::NamedValue(getEnumNameForPredicate(Predicate))
- << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check that a set of predicates match for a particular
-/// operand.
-class OperandMatcher : public PredicateListMatcher<OperandPredicateMatcher> {
-protected:
- InstructionMatcher &Insn;
- unsigned OpIdx;
- std::string SymbolicName;
-
- /// The index of the first temporary variable allocated to this operand. The
- /// number of allocated temporaries can be found with
- /// countRendererFns().
- unsigned AllocatedTemporariesBaseID;
-
-public:
- OperandMatcher(InstructionMatcher &Insn, unsigned OpIdx,
- const std::string &SymbolicName,
- unsigned AllocatedTemporariesBaseID)
- : Insn(Insn), OpIdx(OpIdx), SymbolicName(SymbolicName),
- AllocatedTemporariesBaseID(AllocatedTemporariesBaseID) {}
-
- bool hasSymbolicName() const { return !SymbolicName.empty(); }
- StringRef getSymbolicName() const { return SymbolicName; }
- void setSymbolicName(StringRef Name) {
- assert(SymbolicName.empty() && "Operand already has a symbolic name");
- SymbolicName = std::string(Name);
- }
-
- /// Construct a new operand predicate and add it to the matcher.
- template <class Kind, class... Args>
- std::optional<Kind *> addPredicate(Args &&...args) {
- if (isSameAsAnotherOperand())
- return std::nullopt;
- Predicates.emplace_back(std::make_unique<Kind>(
- getInsnVarID(), getOpIdx(), std::forward<Args>(args)...));
- return static_cast<Kind *>(Predicates.back().get());
- }
-
- unsigned getOpIdx() const { return OpIdx; }
- unsigned getInsnVarID() const;
-
- std::string getOperandExpr(unsigned InsnVarID) const {
- return "State.MIs[" + llvm::to_string(InsnVarID) + "]->getOperand(" +
- llvm::to_string(OpIdx) + ")";
- }
-
- InstructionMatcher &getInstructionMatcher() const { return Insn; }
-
- Error addTypeCheckPredicate(const TypeSetByHwMode &VTy,
- bool OperandIsAPointer);
-
- /// Emit MatchTable opcodes that test whether the instruction named in
- /// InsnVarID matches all the predicates and all the operands.
- void emitPredicateOpcodes(MatchTable &Table, RuleMatcher &Rule) {
- if (!Optimized) {
- std::string Comment;
- raw_string_ostream CommentOS(Comment);
- CommentOS << "MIs[" << getInsnVarID() << "] ";
- if (SymbolicName.empty())
- CommentOS << "Operand " << OpIdx;
- else
- CommentOS << SymbolicName;
- Table << MatchTable::Comment(Comment) << MatchTable::LineBreak;
- }
-
- emitPredicateListOpcodes(Table, Rule);
- }
-
- /// Compare the priority of this object and B.
- ///
- /// Returns true if this object is more important than B.
- bool isHigherPriorityThan(OperandMatcher &B) {
- // Operand matchers involving more predicates have higher priority.
- if (predicates_size() > B.predicates_size())
- return true;
- if (predicates_size() < B.predicates_size())
- return false;
-
- // This assumes that predicates are added in a consistent order.
- for (auto &&Predicate : zip(predicates(), B.predicates())) {
- if (std::get<0>(Predicate)->isHigherPriorityThan(*std::get<1>(Predicate)))
- return true;
- if (std::get<1>(Predicate)->isHigherPriorityThan(*std::get<0>(Predicate)))
- return false;
- }
-
- return false;
- };
-
- /// Report the maximum number of temporary operands needed by the operand
- /// matcher.
- unsigned countRendererFns() {
- return std::accumulate(
- predicates().begin(), predicates().end(), 0,
- [](unsigned A,
- const std::unique_ptr<OperandPredicateMatcher> &Predicate) {
- return A + Predicate->countRendererFns();
- });
- }
-
- unsigned getAllocatedTemporariesBaseID() const {
- return AllocatedTemporariesBaseID;
- }
-
- bool isSameAsAnotherOperand() {
- for (const auto &Predicate : predicates())
- if (isa<SameOperandMatcher>(Predicate))
- return true;
- return false;
- }
-};
-
-Error OperandMatcher::addTypeCheckPredicate(const TypeSetByHwMode &VTy,
- bool OperandIsAPointer) {
- if (!VTy.isMachineValueType())
- return failedImport("unsupported typeset");
-
- if (VTy.getMachineValueType() == MVT::iPTR && OperandIsAPointer) {
- addPredicate<PointerToAnyOperandMatcher>(0);
- return Error::success();
- }
-
- auto OpTyOrNone = MVTToLLT(VTy.getMachineValueType().SimpleTy);
- if (!OpTyOrNone)
- return failedImport("unsupported type");
-
- if (OperandIsAPointer)
- addPredicate<PointerToAnyOperandMatcher>(OpTyOrNone->get().getSizeInBits());
- else if (VTy.isPointer())
- addPredicate<LLTOperandMatcher>(LLT::pointer(VTy.getPtrAddrSpace(),
- OpTyOrNone->get().getSizeInBits()));
- else
- addPredicate<LLTOperandMatcher>(*OpTyOrNone);
- return Error::success();
-}
-
-unsigned ComplexPatternOperandMatcher::getAllocatedTemporariesBaseID() const {
- return Operand.getAllocatedTemporariesBaseID();
-}
-
-/// Generates code to check a predicate on an instruction.
-///
-/// Typical predicates include:
-/// * The opcode of the instruction is a particular value.
-/// * The nsw/nuw flag is/isn't set.
-class InstructionPredicateMatcher : public PredicateMatcher {
-public:
- InstructionPredicateMatcher(PredicateKind Kind, unsigned InsnVarID)
- : PredicateMatcher(Kind, InsnVarID) {}
- virtual ~InstructionPredicateMatcher() {}
-
- /// Compare the priority of this object and B.
- ///
- /// Returns true if this object is more important than B.
- virtual bool
- isHigherPriorityThan(const InstructionPredicateMatcher &B) const {
- return Kind < B.Kind;
- };
-};
-
-template <>
-std::string
-PredicateListMatcher<PredicateMatcher>::getNoPredicateComment() const {
- return "No instruction predicates";
-}
-
-/// Generates code to check the opcode of an instruction.
-class InstructionOpcodeMatcher : public InstructionPredicateMatcher {
-protected:
- // Allow matching one to several, similar opcodes that share properties. This
- // is to handle patterns where one SelectionDAG operation maps to multiple
- // GlobalISel ones (e.g. G_BUILD_VECTOR and G_BUILD_VECTOR_TRUNC). The first
- // is treated as the canonical opcode.
- SmallVector<const CodeGenInstruction *, 2> Insts;
-
- static DenseMap<const CodeGenInstruction *, unsigned> OpcodeValues;
-
-
- MatchTableRecord getInstValue(const CodeGenInstruction *I) const {
- const auto VI = OpcodeValues.find(I);
- if (VI != OpcodeValues.end())
- return MatchTable::NamedValue(I->Namespace, I->TheDef->getName(),
- VI->second);
- return MatchTable::NamedValue(I->Namespace, I->TheDef->getName());
- }
-
-public:
- static void initOpcodeValuesMap(const CodeGenTarget &Target) {
- OpcodeValues.clear();
-
- unsigned OpcodeValue = 0;
- for (const CodeGenInstruction *I : Target.getInstructionsByEnumValue())
- OpcodeValues[I] = OpcodeValue++;
- }
-
- InstructionOpcodeMatcher(unsigned InsnVarID,
- ArrayRef<const CodeGenInstruction *> I)
- : InstructionPredicateMatcher(IPM_Opcode, InsnVarID),
- Insts(I.begin(), I.end()) {
- assert((Insts.size() == 1 || Insts.size() == 2) &&
- "unexpected number of opcode alternatives");
- }
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == IPM_Opcode;
- }
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return InstructionPredicateMatcher::isIdentical(B) &&
- Insts == cast<InstructionOpcodeMatcher>(&B)->Insts;
- }
-
- bool hasValue() const override {
- return Insts.size() == 1 && OpcodeValues.count(Insts[0]);
- }
-
- // TODO: This is used for the SwitchMatcher optimization. We should be able to
- // return a list of the opcodes to match.
- MatchTableRecord getValue() const override {
- assert(Insts.size() == 1);
-
- const CodeGenInstruction *I = Insts[0];
- const auto VI = OpcodeValues.find(I);
- if (VI != OpcodeValues.end())
- return MatchTable::NamedValue(I->Namespace, I->TheDef->getName(),
- VI->second);
- return MatchTable::NamedValue(I->Namespace, I->TheDef->getName());
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- StringRef CheckType = Insts.size() == 1 ?
- "GIM_CheckOpcode" : "GIM_CheckOpcodeIsEither";
- Table << MatchTable::Opcode(CheckType) << MatchTable::Comment("MI")
- << MatchTable::IntValue(InsnVarID);
-
- for (const CodeGenInstruction *I : Insts)
- Table << getInstValue(I);
- Table << MatchTable::LineBreak;
- }
-
- /// Compare the priority of this object and B.
- ///
- /// Returns true if this object is more important than B.
- bool
- isHigherPriorityThan(const InstructionPredicateMatcher &B) const override {
- if (InstructionPredicateMatcher::isHigherPriorityThan(B))
- return true;
- if (B.InstructionPredicateMatcher::isHigherPriorityThan(*this))
- return false;
-
- // Prioritize opcodes for cosmetic reasons in the generated source. Although
- // this is cosmetic at the moment, we may want to drive a similar ordering
- // using instruction frequency information to improve compile time.
- if (const InstructionOpcodeMatcher *BO =
- dyn_cast<InstructionOpcodeMatcher>(&B))
- return Insts[0]->TheDef->getName() < BO->Insts[0]->TheDef->getName();
-
- return false;
- };
-
- bool isConstantInstruction() const {
- return Insts.size() == 1 && Insts[0]->TheDef->getName() == "G_CONSTANT";
- }
-
- // The first opcode is the canonical opcode, and later are alternatives.
- StringRef getOpcode() const {
- return Insts[0]->TheDef->getName();
- }
-
- ArrayRef<const CodeGenInstruction *> getAlternativeOpcodes() {
- return Insts;
- }
-
- bool isVariadicNumOperands() const {
- // If one is variadic, they all should be.
- return Insts[0]->Operands.isVariadic;
- }
-
- StringRef getOperandType(unsigned OpIdx) const {
- // Types expected to be uniform for all alternatives.
- return Insts[0]->Operands[OpIdx].OperandType;
- }
-};
-
-DenseMap<const CodeGenInstruction *, unsigned>
- InstructionOpcodeMatcher::OpcodeValues;
-
-class InstructionNumOperandsMatcher final : public InstructionPredicateMatcher {
- unsigned NumOperands = 0;
-
-public:
- InstructionNumOperandsMatcher(unsigned InsnVarID, unsigned NumOperands)
- : InstructionPredicateMatcher(IPM_NumOperands, InsnVarID),
- NumOperands(NumOperands) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == IPM_NumOperands;
- }
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return InstructionPredicateMatcher::isIdentical(B) &&
- NumOperands == cast<InstructionNumOperandsMatcher>(&B)->NumOperands;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckNumOperands")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("Expected")
- << MatchTable::IntValue(NumOperands) << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check that this instruction is a constant whose value
-/// meets an immediate predicate.
-///
-/// Immediates are slightly odd since they are typically used like an operand
-/// but are represented as an operator internally. We typically write simm8:$src
-/// in a tablegen pattern, but this is just syntactic sugar for
-/// (imm:i32)<<P:Predicate_simm8>>:$imm which more directly describes the nodes
-/// that will be matched and the predicate (which is attached to the imm
-/// operator) that will be tested. In SelectionDAG this describes a
-/// ConstantSDNode whose internal value will be tested using the simm8 predicate.
-///
-/// The corresponding GlobalISel representation is %1 = G_CONSTANT iN Value. In
-/// this representation, the immediate could be tested with an
-/// InstructionMatcher, InstructionOpcodeMatcher, OperandMatcher, and a
-/// OperandPredicateMatcher-subclass to check the Value meets the predicate but
-/// there are two implementation issues with producing that matcher
-/// configuration from the SelectionDAG pattern:
-/// * ImmLeaf is a PatFrag whose root is an InstructionMatcher. This means that
-/// were we to sink the immediate predicate to the operand we would have to
-/// have two partial implementations of PatFrag support, one for immediates
-/// and one for non-immediates.
-/// * At the point we handle the predicate, the OperandMatcher hasn't been
-/// created yet. If we were to sink the predicate to the OperandMatcher we
-/// would also have to complicate (or duplicate) the code that descends and
-/// creates matchers for the subtree.
-/// Overall, it's simpler to handle it in the place it was found.
-class InstructionImmPredicateMatcher : public InstructionPredicateMatcher {
-protected:
- TreePredicateFn Predicate;
-
-public:
- InstructionImmPredicateMatcher(unsigned InsnVarID,
- const TreePredicateFn &Predicate)
- : InstructionPredicateMatcher(IPM_ImmPredicate, InsnVarID),
- Predicate(Predicate) {}
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return InstructionPredicateMatcher::isIdentical(B) &&
- Predicate.getOrigPatFragRecord() ==
- cast<InstructionImmPredicateMatcher>(&B)
- ->Predicate.getOrigPatFragRecord();
- }
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == IPM_ImmPredicate;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode(getMatchOpcodeForImmPredicate(Predicate))
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("Predicate")
- << MatchTable::NamedValue(getEnumNameForPredicate(Predicate))
- << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check that a memory instruction has a atomic ordering
-/// MachineMemoryOperand.
-class AtomicOrderingMMOPredicateMatcher : public InstructionPredicateMatcher {
-public:
- enum AOComparator {
- AO_Exactly,
- AO_OrStronger,
- AO_WeakerThan,
- };
-
-protected:
- StringRef Order;
- AOComparator Comparator;
-
-public:
- AtomicOrderingMMOPredicateMatcher(unsigned InsnVarID, StringRef Order,
- AOComparator Comparator = AO_Exactly)
- : InstructionPredicateMatcher(IPM_AtomicOrderingMMO, InsnVarID),
- Order(Order), Comparator(Comparator) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == IPM_AtomicOrderingMMO;
- }
-
- bool isIdentical(const PredicateMatcher &B) const override {
- if (!InstructionPredicateMatcher::isIdentical(B))
- return false;
- const auto &R = *cast<AtomicOrderingMMOPredicateMatcher>(&B);
- return Order == R.Order && Comparator == R.Comparator;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- StringRef Opcode = "GIM_CheckAtomicOrdering";
-
- if (Comparator == AO_OrStronger)
- Opcode = "GIM_CheckAtomicOrderingOrStrongerThan";
- if (Comparator == AO_WeakerThan)
- Opcode = "GIM_CheckAtomicOrderingWeakerThan";
-
- Table << MatchTable::Opcode(Opcode) << MatchTable::Comment("MI")
- << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Order")
- << MatchTable::NamedValue(("(int64_t)AtomicOrdering::" + Order).str())
- << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check that the size of an MMO is exactly N bytes.
-class MemorySizePredicateMatcher : public InstructionPredicateMatcher {
-protected:
- unsigned MMOIdx;
- uint64_t Size;
-
-public:
- MemorySizePredicateMatcher(unsigned InsnVarID, unsigned MMOIdx, unsigned Size)
- : InstructionPredicateMatcher(IPM_MemoryLLTSize, InsnVarID),
- MMOIdx(MMOIdx), Size(Size) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == IPM_MemoryLLTSize;
- }
- bool isIdentical(const PredicateMatcher &B) const override {
- return InstructionPredicateMatcher::isIdentical(B) &&
- MMOIdx == cast<MemorySizePredicateMatcher>(&B)->MMOIdx &&
- Size == cast<MemorySizePredicateMatcher>(&B)->Size;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckMemorySizeEqualTo")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx)
- << MatchTable::Comment("Size") << MatchTable::IntValue(Size)
- << MatchTable::LineBreak;
- }
-};
-
-class MemoryAddressSpacePredicateMatcher : public InstructionPredicateMatcher {
-protected:
- unsigned MMOIdx;
- SmallVector<unsigned, 4> AddrSpaces;
-
-public:
- MemoryAddressSpacePredicateMatcher(unsigned InsnVarID, unsigned MMOIdx,
- ArrayRef<unsigned> AddrSpaces)
- : InstructionPredicateMatcher(IPM_MemoryAddressSpace, InsnVarID),
- MMOIdx(MMOIdx), AddrSpaces(AddrSpaces.begin(), AddrSpaces.end()) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == IPM_MemoryAddressSpace;
- }
- bool isIdentical(const PredicateMatcher &B) const override {
- if (!InstructionPredicateMatcher::isIdentical(B))
- return false;
- auto *Other = cast<MemoryAddressSpacePredicateMatcher>(&B);
- return MMOIdx == Other->MMOIdx && AddrSpaces == Other->AddrSpaces;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckMemoryAddressSpace")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx)
- // Encode number of address spaces to expect.
- << MatchTable::Comment("NumAddrSpace")
- << MatchTable::IntValue(AddrSpaces.size());
- for (unsigned AS : AddrSpaces)
- Table << MatchTable::Comment("AddrSpace") << MatchTable::IntValue(AS);
-
- Table << MatchTable::LineBreak;
- }
-};
-
-class MemoryAlignmentPredicateMatcher : public InstructionPredicateMatcher {
-protected:
- unsigned MMOIdx;
- int MinAlign;
-
-public:
- MemoryAlignmentPredicateMatcher(unsigned InsnVarID, unsigned MMOIdx,
- int MinAlign)
- : InstructionPredicateMatcher(IPM_MemoryAlignment, InsnVarID),
- MMOIdx(MMOIdx), MinAlign(MinAlign) {
- assert(MinAlign > 0);
- }
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == IPM_MemoryAlignment;
- }
-
- bool isIdentical(const PredicateMatcher &B) const override {
- if (!InstructionPredicateMatcher::isIdentical(B))
- return false;
- auto *Other = cast<MemoryAlignmentPredicateMatcher>(&B);
- return MMOIdx == Other->MMOIdx && MinAlign == Other->MinAlign;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckMemoryAlignment")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx)
- << MatchTable::Comment("MinAlign") << MatchTable::IntValue(MinAlign)
- << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check that the size of an MMO is less-than, equal-to, or
-/// greater than a given LLT.
-class MemoryVsLLTSizePredicateMatcher : public InstructionPredicateMatcher {
-public:
- enum RelationKind {
- GreaterThan,
- EqualTo,
- LessThan,
- };
-
-protected:
- unsigned MMOIdx;
- RelationKind Relation;
- unsigned OpIdx;
-
-public:
- MemoryVsLLTSizePredicateMatcher(unsigned InsnVarID, unsigned MMOIdx,
- enum RelationKind Relation,
- unsigned OpIdx)
- : InstructionPredicateMatcher(IPM_MemoryVsLLTSize, InsnVarID),
- MMOIdx(MMOIdx), Relation(Relation), OpIdx(OpIdx) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == IPM_MemoryVsLLTSize;
- }
- bool isIdentical(const PredicateMatcher &B) const override {
- return InstructionPredicateMatcher::isIdentical(B) &&
- MMOIdx == cast<MemoryVsLLTSizePredicateMatcher>(&B)->MMOIdx &&
- Relation == cast<MemoryVsLLTSizePredicateMatcher>(&B)->Relation &&
- OpIdx == cast<MemoryVsLLTSizePredicateMatcher>(&B)->OpIdx;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode(Relation == EqualTo
- ? "GIM_CheckMemorySizeEqualToLLT"
- : Relation == GreaterThan
- ? "GIM_CheckMemorySizeGreaterThanLLT"
- : "GIM_CheckMemorySizeLessThanLLT")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx)
- << MatchTable::Comment("OpIdx") << MatchTable::IntValue(OpIdx)
- << MatchTable::LineBreak;
- }
-};
-
-// Matcher for immAllOnesV/immAllZerosV
-class VectorSplatImmPredicateMatcher : public InstructionPredicateMatcher {
-public:
- enum SplatKind {
- AllZeros,
- AllOnes
- };
-
-private:
- SplatKind Kind;
-
-public:
- VectorSplatImmPredicateMatcher(unsigned InsnVarID, SplatKind K)
- : InstructionPredicateMatcher(IPM_VectorSplatImm, InsnVarID), Kind(K) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == IPM_VectorSplatImm;
- }
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return InstructionPredicateMatcher::isIdentical(B) &&
- Kind == static_cast<const VectorSplatImmPredicateMatcher &>(B).Kind;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- if (Kind == AllOnes)
- Table << MatchTable::Opcode("GIM_CheckIsBuildVectorAllOnes");
- else
- Table << MatchTable::Opcode("GIM_CheckIsBuildVectorAllZeros");
-
- Table << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID);
- Table << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check an arbitrary C++ instruction predicate.
-class GenericInstructionPredicateMatcher : public InstructionPredicateMatcher {
-protected:
- TreePredicateFn Predicate;
-
-public:
- GenericInstructionPredicateMatcher(unsigned InsnVarID,
- TreePredicateFn Predicate)
- : InstructionPredicateMatcher(IPM_GenericPredicate, InsnVarID),
- Predicate(Predicate) {}
-
- static bool classof(const InstructionPredicateMatcher *P) {
- return P->getKind() == IPM_GenericPredicate;
- }
- bool isIdentical(const PredicateMatcher &B) const override {
- return InstructionPredicateMatcher::isIdentical(B) &&
- Predicate ==
- static_cast<const GenericInstructionPredicateMatcher &>(B)
- .Predicate;
- }
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckCxxInsnPredicate")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("FnId")
- << MatchTable::NamedValue(getEnumNameForPredicate(Predicate))
- << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check for the absence of use of the result.
-// TODO? Generalize this to support checking for one use.
-class NoUsePredicateMatcher : public InstructionPredicateMatcher {
-public:
- NoUsePredicateMatcher(unsigned InsnVarID)
- : InstructionPredicateMatcher(IPM_NoUse, InsnVarID) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == IPM_NoUse;
- }
-
- bool isIdentical(const PredicateMatcher &B) const override {
- return InstructionPredicateMatcher::isIdentical(B);
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIM_CheckHasNoUse")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to check that a set of predicates and operands match for a
-/// particular instruction.
-///
-/// Typical predicates include:
-/// * Has a specific opcode.
-/// * Has an nsw/nuw flag or doesn't.
-class InstructionMatcher final : public PredicateListMatcher<PredicateMatcher> {
-protected:
- typedef std::vector<std::unique_ptr<OperandMatcher>> OperandVec;
-
- RuleMatcher &Rule;
-
- /// The operands to match. All rendered operands must be present even if the
- /// condition is always true.
- OperandVec Operands;
- bool NumOperandsCheck = true;
-
- std::string SymbolicName;
- unsigned InsnVarID;
-
- /// PhysRegInputs - List list has an entry for each explicitly specified
- /// physreg input to the pattern. The first elt is the Register node, the
- /// second is the recorded slot number the input pattern match saved it in.
- SmallVector<std::pair<Record *, unsigned>, 2> PhysRegInputs;
-
-public:
- InstructionMatcher(RuleMatcher &Rule, StringRef SymbolicName,
- bool NumOpsCheck = true)
- : Rule(Rule), NumOperandsCheck(NumOpsCheck), SymbolicName(SymbolicName) {
- // We create a new instruction matcher.
- // Get a new ID for that instruction.
- InsnVarID = Rule.implicitlyDefineInsnVar(*this);
- }
-
- /// Construct a new instruction predicate and add it to the matcher.
- template <class Kind, class... Args>
- std::optional<Kind *> addPredicate(Args &&...args) {
- Predicates.emplace_back(
- std::make_unique<Kind>(getInsnVarID(), std::forward<Args>(args)...));
- return static_cast<Kind *>(Predicates.back().get());
- }
-
- RuleMatcher &getRuleMatcher() const { return Rule; }
-
- unsigned getInsnVarID() const { return InsnVarID; }
-
- /// Add an operand to the matcher.
- OperandMatcher &addOperand(unsigned OpIdx, const std::string &SymbolicName,
- unsigned AllocatedTemporariesBaseID) {
- Operands.emplace_back(new OperandMatcher(*this, OpIdx, SymbolicName,
- AllocatedTemporariesBaseID));
- if (!SymbolicName.empty())
- Rule.defineOperand(SymbolicName, *Operands.back());
-
- return *Operands.back();
- }
-
- OperandMatcher &getOperand(unsigned OpIdx) {
- auto I = llvm::find_if(Operands,
- [&OpIdx](const std::unique_ptr<OperandMatcher> &X) {
- return X->getOpIdx() == OpIdx;
- });
- if (I != Operands.end())
- return **I;
- llvm_unreachable("Failed to lookup operand");
- }
-
- OperandMatcher &addPhysRegInput(Record *Reg, unsigned OpIdx,
- unsigned TempOpIdx) {
- assert(SymbolicName.empty());
- OperandMatcher *OM = new OperandMatcher(*this, OpIdx, "", TempOpIdx);
- Operands.emplace_back(OM);
- Rule.definePhysRegOperand(Reg, *OM);
- PhysRegInputs.emplace_back(Reg, OpIdx);
- return *OM;
- }
-
- ArrayRef<std::pair<Record *, unsigned>> getPhysRegInputs() const {
- return PhysRegInputs;
- }
-
- StringRef getSymbolicName() const { return SymbolicName; }
- unsigned getNumOperands() const { return Operands.size(); }
- OperandVec::iterator operands_begin() { return Operands.begin(); }
- OperandVec::iterator operands_end() { return Operands.end(); }
- iterator_range<OperandVec::iterator> operands() {
- return make_range(operands_begin(), operands_end());
- }
- OperandVec::const_iterator operands_begin() const { return Operands.begin(); }
- OperandVec::const_iterator operands_end() const { return Operands.end(); }
- iterator_range<OperandVec::const_iterator> operands() const {
- return make_range(operands_begin(), operands_end());
- }
- bool operands_empty() const { return Operands.empty(); }
-
- void pop_front() { Operands.erase(Operands.begin()); }
-
- void optimize();
-
- /// Emit MatchTable opcodes that test whether the instruction named in
- /// InsnVarName matches all the predicates and all the operands.
- void emitPredicateOpcodes(MatchTable &Table, RuleMatcher &Rule) {
- if (NumOperandsCheck)
- InstructionNumOperandsMatcher(InsnVarID, getNumOperands())
- .emitPredicateOpcodes(Table, Rule);
-
- // First emit all instruction level predicates need to be verified before we
- // can verify operands.
- emitFilteredPredicateListOpcodes(
- [](const PredicateMatcher &P) {
- return !P.dependsOnOperands();
- }, Table, Rule);
-
- // Emit all operand constraints.
- for (const auto &Operand : Operands)
- Operand->emitPredicateOpcodes(Table, Rule);
-
- // All of the tablegen defined predicates should now be matched. Now emit
- // any custom predicates that rely on all generated checks.
- emitFilteredPredicateListOpcodes(
- [](const PredicateMatcher &P) {
- return P.dependsOnOperands();
- }, Table, Rule);
- }
-
- /// Compare the priority of this object and B.
- ///
- /// Returns true if this object is more important than B.
- bool isHigherPriorityThan(InstructionMatcher &B) {
- // Instruction matchers involving more operands have higher priority.
- if (Operands.size() > B.Operands.size())
- return true;
- if (Operands.size() < B.Operands.size())
- return false;
-
- for (auto &&P : zip(predicates(), B.predicates())) {
- auto L = static_cast<InstructionPredicateMatcher *>(std::get<0>(P).get());
- auto R = static_cast<InstructionPredicateMatcher *>(std::get<1>(P).get());
- if (L->isHigherPriorityThan(*R))
- return true;
- if (R->isHigherPriorityThan(*L))
- return false;
- }
-
- for (auto Operand : zip(Operands, B.Operands)) {
- if (std::get<0>(Operand)->isHigherPriorityThan(*std::get<1>(Operand)))
- return true;
- if (std::get<1>(Operand)->isHigherPriorityThan(*std::get<0>(Operand)))
- return false;
- }
-
- return false;
- };
-
- /// Report the maximum number of temporary operands needed by the instruction
- /// matcher.
- unsigned countRendererFns() {
- return std::accumulate(
- predicates().begin(), predicates().end(), 0,
- [](unsigned A,
- const std::unique_ptr<PredicateMatcher> &Predicate) {
- return A + Predicate->countRendererFns();
- }) +
- std::accumulate(
- Operands.begin(), Operands.end(), 0,
- [](unsigned A, const std::unique_ptr<OperandMatcher> &Operand) {
- return A + Operand->countRendererFns();
- });
- }
-
- InstructionOpcodeMatcher &getOpcodeMatcher() {
- for (auto &P : predicates())
- if (auto *OpMatcher = dyn_cast<InstructionOpcodeMatcher>(P.get()))
- return *OpMatcher;
- llvm_unreachable("Didn't find an opcode matcher");
- }
-
- bool isConstantInstruction() {
- return getOpcodeMatcher().isConstantInstruction();
- }
-
- StringRef getOpcode() { return getOpcodeMatcher().getOpcode(); }
-};
-
-StringRef RuleMatcher::getOpcode() const {
- return Matchers.front()->getOpcode();
-}
-
-unsigned RuleMatcher::getNumOperands() const {
- return Matchers.front()->getNumOperands();
-}
-
-LLTCodeGen RuleMatcher::getFirstConditionAsRootType() {
- InstructionMatcher &InsnMatcher = *Matchers.front();
- if (!InsnMatcher.predicates_empty())
- if (const auto *TM =
- dyn_cast<LLTOperandMatcher>(&**InsnMatcher.predicates_begin()))
- if (TM->getInsnVarID() == 0 && TM->getOpIdx() == 0)
- return TM->getTy();
- return {};
-}
-
-/// Generates code to check that the operand is a register defined by an
-/// instruction that matches the given instruction matcher.
-///
-/// For example, the pattern:
-/// (set $dst, (G_MUL (G_ADD $src1, $src2), $src3))
-/// would use an InstructionOperandMatcher for operand 1 of the G_MUL to match
-/// the:
-/// (G_ADD $src1, $src2)
-/// subpattern.
-class InstructionOperandMatcher : public OperandPredicateMatcher {
-protected:
- std::unique_ptr<InstructionMatcher> InsnMatcher;
-
-public:
- InstructionOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
- RuleMatcher &Rule, StringRef SymbolicName,
- bool NumOpsCheck = true)
- : OperandPredicateMatcher(OPM_Instruction, InsnVarID, OpIdx),
- InsnMatcher(new InstructionMatcher(Rule, SymbolicName, NumOpsCheck)) {}
-
- static bool classof(const PredicateMatcher *P) {
- return P->getKind() == OPM_Instruction;
- }
-
- InstructionMatcher &getInsnMatcher() const { return *InsnMatcher; }
-
- void emitCaptureOpcodes(MatchTable &Table, RuleMatcher &Rule) const {
- const unsigned NewInsnVarID = InsnMatcher->getInsnVarID();
- Table << MatchTable::Opcode("GIM_RecordInsn")
- << MatchTable::Comment("DefineMI")
- << MatchTable::IntValue(NewInsnVarID) << MatchTable::Comment("MI")
- << MatchTable::IntValue(getInsnVarID())
- << MatchTable::Comment("OpIdx") << MatchTable::IntValue(getOpIdx())
- << MatchTable::Comment("MIs[" + llvm::to_string(NewInsnVarID) + "]")
- << MatchTable::LineBreak;
- }
-
- void emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const override {
- emitCaptureOpcodes(Table, Rule);
- InsnMatcher->emitPredicateOpcodes(Table, Rule);
- }
-
- bool isHigherPriorityThan(const OperandPredicateMatcher &B) const override {
- if (OperandPredicateMatcher::isHigherPriorityThan(B))
- return true;
- if (B.OperandPredicateMatcher::isHigherPriorityThan(*this))
- return false;
-
- if (const InstructionOperandMatcher *BP =
- dyn_cast<InstructionOperandMatcher>(&B))
- if (InsnMatcher->isHigherPriorityThan(*BP->InsnMatcher))
- return true;
- return false;
- }
-
- /// Report the maximum number of temporary operands needed by the predicate
- /// matcher.
- unsigned countRendererFns() const override {
- return InsnMatcher->countRendererFns();
- }
-};
-
-void InstructionMatcher::optimize() {
- SmallVector<std::unique_ptr<PredicateMatcher>, 8> Stash;
- const auto &OpcMatcher = getOpcodeMatcher();
-
- Stash.push_back(predicates_pop_front());
- if (Stash.back().get() == &OpcMatcher) {
- if (NumOperandsCheck && OpcMatcher.isVariadicNumOperands())
- Stash.emplace_back(
- new InstructionNumOperandsMatcher(InsnVarID, getNumOperands()));
- NumOperandsCheck = false;
-
- for (auto &OM : Operands)
- for (auto &OP : OM->predicates())
- if (isa<IntrinsicIDOperandMatcher>(OP)) {
- Stash.push_back(std::move(OP));
- OM->eraseNullPredicates();
- break;
- }
- }
-
- if (InsnVarID > 0) {
- assert(!Operands.empty() && "Nested instruction is expected to def a vreg");
- for (auto &OP : Operands[0]->predicates())
- OP.reset();
- Operands[0]->eraseNullPredicates();
- }
- for (auto &OM : Operands) {
- for (auto &OP : OM->predicates())
- if (isa<LLTOperandMatcher>(OP))
- Stash.push_back(std::move(OP));
- OM->eraseNullPredicates();
- }
- while (!Stash.empty())
- prependPredicate(Stash.pop_back_val());
-}
-
-//===- Actions ------------------------------------------------------------===//
-class OperandRenderer {
-public:
- enum RendererKind {
- OR_Copy,
- OR_CopyOrAddZeroReg,
- OR_CopySubReg,
- OR_CopyPhysReg,
- OR_CopyConstantAsImm,
- OR_CopyFConstantAsFPImm,
- OR_Imm,
- OR_SubRegIndex,
- OR_Register,
- OR_TempRegister,
- OR_ComplexPattern,
- OR_Custom,
- OR_CustomOperand
- };
-
-protected:
- RendererKind Kind;
-
-public:
- OperandRenderer(RendererKind Kind) : Kind(Kind) {}
- virtual ~OperandRenderer() {}
-
- RendererKind getKind() const { return Kind; }
-
- virtual void emitRenderOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const = 0;
-};
-
-/// A CopyRenderer emits code to copy a single operand from an existing
-/// instruction to the one being built.
-class CopyRenderer : public OperandRenderer {
-protected:
- unsigned NewInsnID;
- /// The name of the operand.
- const StringRef SymbolicName;
-
-public:
- CopyRenderer(unsigned NewInsnID, StringRef SymbolicName)
- : OperandRenderer(OR_Copy), NewInsnID(NewInsnID),
- SymbolicName(SymbolicName) {
- assert(!SymbolicName.empty() && "Cannot copy from an unspecified source");
- }
-
- static bool classof(const OperandRenderer *R) {
- return R->getKind() == OR_Copy;
- }
-
- StringRef getSymbolicName() const { return SymbolicName; }
-
- void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- const OperandMatcher &Operand = Rule.getOperandMatcher(SymbolicName);
- unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher());
- Table << MatchTable::Opcode("GIR_Copy") << MatchTable::Comment("NewInsnID")
- << MatchTable::IntValue(NewInsnID) << MatchTable::Comment("OldInsnID")
- << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
- << MatchTable::IntValue(Operand.getOpIdx())
- << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
- }
-};
-
-/// A CopyRenderer emits code to copy a virtual register to a specific physical
-/// register.
-class CopyPhysRegRenderer : public OperandRenderer {
-protected:
- unsigned NewInsnID;
- Record *PhysReg;
-
-public:
- CopyPhysRegRenderer(unsigned NewInsnID, Record *Reg)
- : OperandRenderer(OR_CopyPhysReg), NewInsnID(NewInsnID),
- PhysReg(Reg) {
- assert(PhysReg);
- }
-
- static bool classof(const OperandRenderer *R) {
- return R->getKind() == OR_CopyPhysReg;
- }
-
- Record *getPhysReg() const { return PhysReg; }
-
- void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- const OperandMatcher &Operand = Rule.getPhysRegOperandMatcher(PhysReg);
- unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher());
- Table << MatchTable::Opcode("GIR_Copy") << MatchTable::Comment("NewInsnID")
- << MatchTable::IntValue(NewInsnID) << MatchTable::Comment("OldInsnID")
- << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
- << MatchTable::IntValue(Operand.getOpIdx())
- << MatchTable::Comment(PhysReg->getName())
- << MatchTable::LineBreak;
- }
-};
-
-/// A CopyOrAddZeroRegRenderer emits code to copy a single operand from an
-/// existing instruction to the one being built. If the operand turns out to be
-/// a 'G_CONSTANT 0' then it replaces the operand with a zero register.
-class CopyOrAddZeroRegRenderer : public OperandRenderer {
-protected:
- unsigned NewInsnID;
- /// The name of the operand.
- const StringRef SymbolicName;
- const Record *ZeroRegisterDef;
-
-public:
- CopyOrAddZeroRegRenderer(unsigned NewInsnID,
- StringRef SymbolicName, Record *ZeroRegisterDef)
- : OperandRenderer(OR_CopyOrAddZeroReg), NewInsnID(NewInsnID),
- SymbolicName(SymbolicName), ZeroRegisterDef(ZeroRegisterDef) {
- assert(!SymbolicName.empty() && "Cannot copy from an unspecified source");
- }
-
- static bool classof(const OperandRenderer *R) {
- return R->getKind() == OR_CopyOrAddZeroReg;
- }
-
- StringRef getSymbolicName() const { return SymbolicName; }
-
- void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- const OperandMatcher &Operand = Rule.getOperandMatcher(SymbolicName);
- unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher());
- Table << MatchTable::Opcode("GIR_CopyOrAddZeroReg")
- << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
- << MatchTable::Comment("OldInsnID")
- << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
- << MatchTable::IntValue(Operand.getOpIdx())
- << MatchTable::NamedValue(
- (ZeroRegisterDef->getValue("Namespace")
- ? ZeroRegisterDef->getValueAsString("Namespace")
- : ""),
- ZeroRegisterDef->getName())
- << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
- }
-};
-
-/// A CopyConstantAsImmRenderer emits code to render a G_CONSTANT instruction to
-/// an extended immediate operand.
-class CopyConstantAsImmRenderer : public OperandRenderer {
-protected:
- unsigned NewInsnID;
- /// The name of the operand.
- const std::string SymbolicName;
- bool Signed;
-
-public:
- CopyConstantAsImmRenderer(unsigned NewInsnID, StringRef SymbolicName)
- : OperandRenderer(OR_CopyConstantAsImm), NewInsnID(NewInsnID),
- SymbolicName(SymbolicName), Signed(true) {}
-
- static bool classof(const OperandRenderer *R) {
- return R->getKind() == OR_CopyConstantAsImm;
- }
-
- StringRef getSymbolicName() const { return SymbolicName; }
-
- void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
- unsigned OldInsnVarID = Rule.getInsnVarID(InsnMatcher);
- Table << MatchTable::Opcode(Signed ? "GIR_CopyConstantAsSImm"
- : "GIR_CopyConstantAsUImm")
- << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
- << MatchTable::Comment("OldInsnID")
- << MatchTable::IntValue(OldInsnVarID)
- << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
- }
-};
-
-/// A CopyFConstantAsFPImmRenderer emits code to render a G_FCONSTANT
-/// instruction to an extended immediate operand.
-class CopyFConstantAsFPImmRenderer : public OperandRenderer {
-protected:
- unsigned NewInsnID;
- /// The name of the operand.
- const std::string SymbolicName;
-
-public:
- CopyFConstantAsFPImmRenderer(unsigned NewInsnID, StringRef SymbolicName)
- : OperandRenderer(OR_CopyFConstantAsFPImm), NewInsnID(NewInsnID),
- SymbolicName(SymbolicName) {}
-
- static bool classof(const OperandRenderer *R) {
- return R->getKind() == OR_CopyFConstantAsFPImm;
- }
-
- StringRef getSymbolicName() const { return SymbolicName; }
-
- void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
- unsigned OldInsnVarID = Rule.getInsnVarID(InsnMatcher);
- Table << MatchTable::Opcode("GIR_CopyFConstantAsFPImm")
- << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
- << MatchTable::Comment("OldInsnID")
- << MatchTable::IntValue(OldInsnVarID)
- << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
- }
-};
-
-/// A CopySubRegRenderer emits code to copy a single register operand from an
-/// existing instruction to the one being built and indicate that only a
-/// subregister should be copied.
-class CopySubRegRenderer : public OperandRenderer {
-protected:
- unsigned NewInsnID;
- /// The name of the operand.
- const StringRef SymbolicName;
- /// The subregister to extract.
- const CodeGenSubRegIndex *SubReg;
-
-public:
- CopySubRegRenderer(unsigned NewInsnID, StringRef SymbolicName,
- const CodeGenSubRegIndex *SubReg)
- : OperandRenderer(OR_CopySubReg), NewInsnID(NewInsnID),
- SymbolicName(SymbolicName), SubReg(SubReg) {}
-
- static bool classof(const OperandRenderer *R) {
- return R->getKind() == OR_CopySubReg;
- }
-
- StringRef getSymbolicName() const { return SymbolicName; }
-
- void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- const OperandMatcher &Operand = Rule.getOperandMatcher(SymbolicName);
- unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher());
- Table << MatchTable::Opcode("GIR_CopySubReg")
- << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
- << MatchTable::Comment("OldInsnID")
- << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
- << MatchTable::IntValue(Operand.getOpIdx())
- << MatchTable::Comment("SubRegIdx")
- << MatchTable::IntValue(SubReg->EnumValue)
- << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
- }
-};
-
-/// Adds a specific physical register to the instruction being built.
-/// This is typically useful for WZR/XZR on AArch64.
-class AddRegisterRenderer : public OperandRenderer {
-protected:
- unsigned InsnID;
- const Record *RegisterDef;
- bool IsDef;
- const CodeGenTarget &Target;
-
-public:
- AddRegisterRenderer(unsigned InsnID, const CodeGenTarget &Target,
- const Record *RegisterDef, bool IsDef = false)
- : OperandRenderer(OR_Register), InsnID(InsnID), RegisterDef(RegisterDef),
- IsDef(IsDef), Target(Target) {}
-
- static bool classof(const OperandRenderer *R) {
- return R->getKind() == OR_Register;
- }
-
- void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIR_AddRegister")
- << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID);
- if (RegisterDef->getName() != "zero_reg") {
- Table << MatchTable::NamedValue(
- (RegisterDef->getValue("Namespace")
- ? RegisterDef->getValueAsString("Namespace")
- : ""),
- RegisterDef->getName());
- } else {
- Table << MatchTable::NamedValue(Target.getRegNamespace(), "NoRegister");
- }
- Table << MatchTable::Comment("AddRegisterRegFlags");
-
- // TODO: This is encoded as a 64-bit element, but only 16 or 32-bits are
- // really needed for a physical register reference. We can pack the
- // register and flags in a single field.
- if (IsDef)
- Table << MatchTable::NamedValue("RegState::Define");
- else
- Table << MatchTable::IntValue(0);
- Table << MatchTable::LineBreak;
- }
-};
-
-/// Adds a specific temporary virtual register to the instruction being built.
-/// This is used to chain instructions together when emitting multiple
-/// instructions.
-class TempRegRenderer : public OperandRenderer {
-protected:
- unsigned InsnID;
- unsigned TempRegID;
- const CodeGenSubRegIndex *SubRegIdx;
- bool IsDef;
- bool IsDead;
-
-public:
- TempRegRenderer(unsigned InsnID, unsigned TempRegID, bool IsDef = false,
- const CodeGenSubRegIndex *SubReg = nullptr,
- bool IsDead = false)
- : OperandRenderer(OR_Register), InsnID(InsnID), TempRegID(TempRegID),
- SubRegIdx(SubReg), IsDef(IsDef), IsDead(IsDead) {}
-
- static bool classof(const OperandRenderer *R) {
- return R->getKind() == OR_TempRegister;
- }
-
- void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- if (SubRegIdx) {
- assert(!IsDef);
- Table << MatchTable::Opcode("GIR_AddTempSubRegister");
- } else
- Table << MatchTable::Opcode("GIR_AddTempRegister");
-
- Table << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
- << MatchTable::Comment("TempRegID") << MatchTable::IntValue(TempRegID)
- << MatchTable::Comment("TempRegFlags");
-
- if (IsDef) {
- SmallString<32> RegFlags;
- RegFlags += "RegState::Define";
- if (IsDead)
- RegFlags += "|RegState::Dead";
- Table << MatchTable::NamedValue(RegFlags);
- } else
- Table << MatchTable::IntValue(0);
-
- if (SubRegIdx)
- Table << MatchTable::NamedValue(SubRegIdx->getQualifiedName());
- Table << MatchTable::LineBreak;
- }
-};
-
-/// Adds a specific immediate to the instruction being built.
-class ImmRenderer : public OperandRenderer {
-protected:
- unsigned InsnID;
- int64_t Imm;
-
-public:
- ImmRenderer(unsigned InsnID, int64_t Imm)
- : OperandRenderer(OR_Imm), InsnID(InsnID), Imm(Imm) {}
-
- static bool classof(const OperandRenderer *R) {
- return R->getKind() == OR_Imm;
- }
-
- void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIR_AddImm") << MatchTable::Comment("InsnID")
- << MatchTable::IntValue(InsnID) << MatchTable::Comment("Imm")
- << MatchTable::IntValue(Imm) << MatchTable::LineBreak;
- }
-};
-
-/// Adds an enum value for a subreg index to the instruction being built.
-class SubRegIndexRenderer : public OperandRenderer {
-protected:
- unsigned InsnID;
- const CodeGenSubRegIndex *SubRegIdx;
-
-public:
- SubRegIndexRenderer(unsigned InsnID, const CodeGenSubRegIndex *SRI)
- : OperandRenderer(OR_SubRegIndex), InsnID(InsnID), SubRegIdx(SRI) {}
-
- static bool classof(const OperandRenderer *R) {
- return R->getKind() == OR_SubRegIndex;
- }
-
- void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIR_AddImm") << MatchTable::Comment("InsnID")
- << MatchTable::IntValue(InsnID) << MatchTable::Comment("SubRegIndex")
- << MatchTable::IntValue(SubRegIdx->EnumValue)
- << MatchTable::LineBreak;
- }
-};
-
-/// Adds operands by calling a renderer function supplied by the ComplexPattern
-/// matcher function.
-class RenderComplexPatternOperand : public OperandRenderer {
-private:
- unsigned InsnID;
- const Record &TheDef;
- /// The name of the operand.
- const StringRef SymbolicName;
- /// The renderer number. This must be unique within a rule since it's used to
- /// identify a temporary variable to hold the renderer function.
- unsigned RendererID;
- /// When provided, this is the suboperand of the ComplexPattern operand to
- /// render. Otherwise all the suboperands will be rendered.
- std::optional<unsigned> SubOperand;
-
- unsigned getNumOperands() const {
- return TheDef.getValueAsDag("Operands")->getNumArgs();
- }
-
-public:
- RenderComplexPatternOperand(unsigned InsnID, const Record &TheDef,
- StringRef SymbolicName, unsigned RendererID,
- std::optional<unsigned> SubOperand = std::nullopt)
- : OperandRenderer(OR_ComplexPattern), InsnID(InsnID), TheDef(TheDef),
- SymbolicName(SymbolicName), RendererID(RendererID),
- SubOperand(SubOperand) {}
-
- static bool classof(const OperandRenderer *R) {
- return R->getKind() == OR_ComplexPattern;
- }
-
- void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode(SubOperand ? "GIR_ComplexSubOperandRenderer"
- : "GIR_ComplexRenderer")
- << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
- << MatchTable::Comment("RendererID")
- << MatchTable::IntValue(RendererID);
- if (SubOperand)
- Table << MatchTable::Comment("SubOperand")
- << MatchTable::IntValue(*SubOperand);
- Table << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
- }
-};
-
-class CustomRenderer : public OperandRenderer {
-protected:
- unsigned InsnID;
- const Record &Renderer;
- /// The name of the operand.
- const std::string SymbolicName;
-
-public:
- CustomRenderer(unsigned InsnID, const Record &Renderer,
- StringRef SymbolicName)
- : OperandRenderer(OR_Custom), InsnID(InsnID), Renderer(Renderer),
- SymbolicName(SymbolicName) {}
-
- static bool classof(const OperandRenderer *R) {
- return R->getKind() == OR_Custom;
- }
-
- void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
- unsigned OldInsnVarID = Rule.getInsnVarID(InsnMatcher);
- Table << MatchTable::Opcode("GIR_CustomRenderer")
- << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
- << MatchTable::Comment("OldInsnID")
- << MatchTable::IntValue(OldInsnVarID)
- << MatchTable::Comment("Renderer")
- << MatchTable::NamedValue(
- "GICR_" + Renderer.getValueAsString("RendererFn").str())
- << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
- }
-};
-
-class CustomOperandRenderer : public OperandRenderer {
-protected:
- unsigned InsnID;
- const Record &Renderer;
- /// The name of the operand.
- const std::string SymbolicName;
-
-public:
- CustomOperandRenderer(unsigned InsnID, const Record &Renderer,
- StringRef SymbolicName)
- : OperandRenderer(OR_CustomOperand), InsnID(InsnID), Renderer(Renderer),
- SymbolicName(SymbolicName) {}
-
- static bool classof(const OperandRenderer *R) {
- return R->getKind() == OR_CustomOperand;
- }
-
- void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- const OperandMatcher &OpdMatcher = Rule.getOperandMatcher(SymbolicName);
- Table << MatchTable::Opcode("GIR_CustomOperandRenderer")
- << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
- << MatchTable::Comment("OldInsnID")
- << MatchTable::IntValue(OpdMatcher.getInsnVarID())
- << MatchTable::Comment("OpIdx")
- << MatchTable::IntValue(OpdMatcher.getOpIdx())
- << MatchTable::Comment("OperandRenderer")
- << MatchTable::NamedValue(
- "GICR_" + Renderer.getValueAsString("RendererFn").str())
- << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
- }
-};
-
-/// An action taken when all Matcher predicates succeeded for a parent rule.
-///
-/// Typical actions include:
-/// * Changing the opcode of an instruction.
-/// * Adding an operand to an instruction.
-class MatchAction {
-public:
- virtual ~MatchAction() {}
-
- /// Emit the MatchTable opcodes to implement the action.
- virtual void emitActionOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const = 0;
-};
-
-/// Generates a comment describing the matched rule being acted upon.
-class DebugCommentAction : public MatchAction {
-private:
- std::string S;
-
-public:
- DebugCommentAction(StringRef S) : S(std::string(S)) {}
-
- void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- Table << MatchTable::Comment(S) << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to build an instruction or mutate an existing instruction
-/// into the desired instruction when this is possible.
-class BuildMIAction : public MatchAction {
-private:
- unsigned InsnID;
- const CodeGenInstruction *I;
- InstructionMatcher *Matched;
- std::vector<std::unique_ptr<OperandRenderer>> OperandRenderers;
-
- /// True if the instruction can be built solely by mutating the opcode.
- bool canMutate(RuleMatcher &Rule, const InstructionMatcher *Insn) const {
- if (!Insn)
- return false;
-
- if (OperandRenderers.size() != Insn->getNumOperands())
- return false;
-
- for (const auto &Renderer : enumerate(OperandRenderers)) {
- if (const auto *Copy = dyn_cast<CopyRenderer>(&*Renderer.value())) {
- const OperandMatcher &OM = Rule.getOperandMatcher(Copy->getSymbolicName());
- if (Insn != &OM.getInstructionMatcher() ||
- OM.getOpIdx() != Renderer.index())
- return false;
- } else
- return false;
- }
-
- return true;
- }
-
-public:
- BuildMIAction(unsigned InsnID, const CodeGenInstruction *I)
- : InsnID(InsnID), I(I), Matched(nullptr) {}
-
- unsigned getInsnID() const { return InsnID; }
- const CodeGenInstruction *getCGI() const { return I; }
-
- void chooseInsnToMutate(RuleMatcher &Rule) {
- for (auto *MutateCandidate : Rule.mutatable_insns()) {
- if (canMutate(Rule, MutateCandidate)) {
- // Take the first one we're offered that we're able to mutate.
- Rule.reserveInsnMatcherForMutation(MutateCandidate);
- Matched = MutateCandidate;
- return;
- }
- }
- }
-
- template <class Kind, class... Args>
- Kind &addRenderer(Args&&... args) {
- OperandRenderers.emplace_back(
- std::make_unique<Kind>(InsnID, std::forward<Args>(args)...));
- return *static_cast<Kind *>(OperandRenderers.back().get());
- }
-
- void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- if (Matched) {
- assert(canMutate(Rule, Matched) &&
- "Arranged to mutate an insn that isn't mutatable");
-
- unsigned RecycleInsnID = Rule.getInsnVarID(*Matched);
- Table << MatchTable::Opcode("GIR_MutateOpcode")
- << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
- << MatchTable::Comment("RecycleInsnID")
- << MatchTable::IntValue(RecycleInsnID)
- << MatchTable::Comment("Opcode")
- << MatchTable::NamedValue(I->Namespace, I->TheDef->getName())
- << MatchTable::LineBreak;
-
- if (!I->ImplicitDefs.empty() || !I->ImplicitUses.empty()) {
- for (auto *Def : I->ImplicitDefs) {
- auto Namespace = Def->getValue("Namespace")
- ? Def->getValueAsString("Namespace")
- : "";
- Table << MatchTable::Opcode("GIR_AddImplicitDef")
- << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
- << MatchTable::NamedValue(Namespace, Def->getName())
- << MatchTable::LineBreak;
- }
- for (auto *Use : I->ImplicitUses) {
- auto Namespace = Use->getValue("Namespace")
- ? Use->getValueAsString("Namespace")
- : "";
- Table << MatchTable::Opcode("GIR_AddImplicitUse")
- << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
- << MatchTable::NamedValue(Namespace, Use->getName())
- << MatchTable::LineBreak;
- }
- }
- return;
- }
-
- // TODO: Simple permutation looks like it could be almost as common as
- // mutation due to commutative operations.
-
- Table << MatchTable::Opcode("GIR_BuildMI") << MatchTable::Comment("InsnID")
- << MatchTable::IntValue(InsnID) << MatchTable::Comment("Opcode")
- << MatchTable::NamedValue(I->Namespace, I->TheDef->getName())
- << MatchTable::LineBreak;
- for (const auto &Renderer : OperandRenderers)
- Renderer->emitRenderOpcodes(Table, Rule);
-
- if (I->mayLoad || I->mayStore) {
- Table << MatchTable::Opcode("GIR_MergeMemOperands")
- << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
- << MatchTable::Comment("MergeInsnID's");
- // Emit the ID's for all the instructions that are matched by this rule.
- // TODO: Limit this to matched instructions that mayLoad/mayStore or have
- // some other means of having a memoperand. Also limit this to
- // emitted instructions that expect to have a memoperand too. For
- // example, (G_SEXT (G_LOAD x)) that results in separate load and
- // sign-extend instructions shouldn't put the memoperand on the
- // sign-extend since it has no effect there.
- std::vector<unsigned> MergeInsnIDs;
- for (const auto &IDMatcherPair : Rule.defined_insn_vars())
- MergeInsnIDs.push_back(IDMatcherPair.second);
- llvm::sort(MergeInsnIDs);
- for (const auto &MergeInsnID : MergeInsnIDs)
- Table << MatchTable::IntValue(MergeInsnID);
- Table << MatchTable::NamedValue("GIU_MergeMemOperands_EndOfList")
- << MatchTable::LineBreak;
- }
-
- // FIXME: This is a hack but it's sufficient for ISel. We'll need to do
- // better for combines. Particularly when there are multiple match
- // roots.
- if (InsnID == 0)
- Table << MatchTable::Opcode("GIR_EraseFromParent")
- << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
- << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to constrain the operands of an output instruction to the
-/// register classes specified by the definition of that instruction.
-class ConstrainOperandsToDefinitionAction : public MatchAction {
- unsigned InsnID;
-
-public:
- ConstrainOperandsToDefinitionAction(unsigned InsnID) : InsnID(InsnID) {}
-
- void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIR_ConstrainSelectedInstOperands")
- << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
- << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to constrain the specified operand of an output instruction
-/// to the specified register class.
-class ConstrainOperandToRegClassAction : public MatchAction {
- unsigned InsnID;
- unsigned OpIdx;
- const CodeGenRegisterClass &RC;
-
-public:
- ConstrainOperandToRegClassAction(unsigned InsnID, unsigned OpIdx,
- const CodeGenRegisterClass &RC)
- : InsnID(InsnID), OpIdx(OpIdx), RC(RC) {}
-
- void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIR_ConstrainOperandRC")
- << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
- << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
- << MatchTable::NamedValue(RC.getQualifiedName() + "RegClassID")
- << MatchTable::LineBreak;
- }
-};
-
-/// Generates code to create a temporary register which can be used to chain
-/// instructions together.
-class MakeTempRegisterAction : public MatchAction {
-private:
- LLTCodeGen Ty;
- unsigned TempRegID;
-
-public:
- MakeTempRegisterAction(const LLTCodeGen &Ty, unsigned TempRegID)
- : Ty(Ty), TempRegID(TempRegID) {
- KnownTypes.insert(Ty);
- }
-
- void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
- Table << MatchTable::Opcode("GIR_MakeTempReg")
- << MatchTable::Comment("TempRegID") << MatchTable::IntValue(TempRegID)
- << MatchTable::Comment("TypeID")
- << MatchTable::NamedValue(Ty.getCxxEnumValue())
- << MatchTable::LineBreak;
- }
-};
-
-InstructionMatcher &RuleMatcher::addInstructionMatcher(StringRef SymbolicName) {
- Matchers.emplace_back(new InstructionMatcher(*this, SymbolicName));
- MutatableInsns.insert(Matchers.back().get());
- return *Matchers.back();
-}
-
-void RuleMatcher::addRequiredFeature(Record *Feature) {
- RequiredFeatures.push_back(Feature);
-}
-
-const std::vector<Record *> &RuleMatcher::getRequiredFeatures() const {
- return RequiredFeatures;
-}
-
-// Emplaces an action of the specified Kind at the end of the action list.
-//
-// Returns a reference to the newly created action.
-//
-// Like std::vector::emplace_back(), may invalidate all iterators if the new
-// size exceeds the capacity. Otherwise, only invalidates the past-the-end
-// iterator.
-template <class Kind, class... Args>
-Kind &RuleMatcher::addAction(Args &&... args) {
- Actions.emplace_back(std::make_unique<Kind>(std::forward<Args>(args)...));
- return *static_cast<Kind *>(Actions.back().get());
-}
-
-// Emplaces an action of the specified Kind before the given insertion point.
-//
-// Returns an iterator pointing at the newly created instruction.
-//
-// Like std::vector::insert(), may invalidate all iterators if the new size
-// exceeds the capacity. Otherwise, only invalidates the iterators from the
-// insertion point onwards.
-template <class Kind, class... Args>
-action_iterator RuleMatcher::insertAction(action_iterator InsertPt,
- Args &&... args) {
- return Actions.emplace(InsertPt,
- std::make_unique<Kind>(std::forward<Args>(args)...));
-}
-
-unsigned RuleMatcher::implicitlyDefineInsnVar(InstructionMatcher &Matcher) {
- unsigned NewInsnVarID = NextInsnVarID++;
- InsnVariableIDs[&Matcher] = NewInsnVarID;
- return NewInsnVarID;
-}
-
-unsigned RuleMatcher::getInsnVarID(InstructionMatcher &InsnMatcher) const {
- const auto &I = InsnVariableIDs.find(&InsnMatcher);
- if (I != InsnVariableIDs.end())
- return I->second;
- llvm_unreachable("Matched Insn was not captured in a local variable");
-}
-
-void RuleMatcher::defineOperand(StringRef SymbolicName, OperandMatcher &OM) {
- if (DefinedOperands.find(SymbolicName) == DefinedOperands.end()) {
- DefinedOperands[SymbolicName] = &OM;
- return;
- }
-
- // If the operand is already defined, then we must ensure both references in
- // the matcher have the exact same node.
- OM.addPredicate<SameOperandMatcher>(
- OM.getSymbolicName(), getOperandMatcher(OM.getSymbolicName()).getOpIdx());
-}
-
-void RuleMatcher::definePhysRegOperand(Record *Reg, OperandMatcher &OM) {
- if (PhysRegOperands.find(Reg) == PhysRegOperands.end()) {
- PhysRegOperands[Reg] = &OM;
- return;
- }
-}
-
-InstructionMatcher &
-RuleMatcher::getInstructionMatcher(StringRef SymbolicName) const {
- for (const auto &I : InsnVariableIDs)
- if (I.first->getSymbolicName() == SymbolicName)
- return *I.first;
- llvm_unreachable(
- ("Failed to lookup instruction " + SymbolicName).str().c_str());
-}
-
-const OperandMatcher &
-RuleMatcher::getPhysRegOperandMatcher(Record *Reg) const {
- const auto &I = PhysRegOperands.find(Reg);
-
- if (I == PhysRegOperands.end()) {
- PrintFatalError(SrcLoc, "Register " + Reg->getName() +
- " was not declared in matcher");
- }
-
- return *I->second;
-}
-
-const OperandMatcher &
-RuleMatcher::getOperandMatcher(StringRef Name) const {
- const auto &I = DefinedOperands.find(Name);
-
- if (I == DefinedOperands.end())
- PrintFatalError(SrcLoc, "Operand " + Name + " was not declared in matcher");
-
- return *I->second;
-}
-
-void RuleMatcher::emit(MatchTable &Table) {
- if (Matchers.empty())
- llvm_unreachable("Unexpected empty matcher!");
-
- // The representation supports rules that require multiple roots such as:
- // %ptr(p0) = ...
- // %elt0(s32) = G_LOAD %ptr
- // %1(p0) = G_ADD %ptr, 4
- // %elt1(s32) = G_LOAD p0 %1
- // which could be usefully folded into:
- // %ptr(p0) = ...
- // %elt0(s32), %elt1(s32) = TGT_LOAD_PAIR %ptr
- // on some targets but we don't need to make use of that yet.
- assert(Matchers.size() == 1 && "Cannot handle multi-root matchers yet");
-
- unsigned LabelID = Table.allocateLabelID();
- Table << MatchTable::Opcode("GIM_Try", +1)
- << MatchTable::Comment("On fail goto")
- << MatchTable::JumpTarget(LabelID)
- << MatchTable::Comment(("Rule ID " + Twine(RuleID) + " //").str())
- << MatchTable::LineBreak;
-
- if (!RequiredFeatures.empty()) {
- Table << MatchTable::Opcode("GIM_CheckFeatures")
- << MatchTable::NamedValue(getNameForFeatureBitset(RequiredFeatures))
- << MatchTable::LineBreak;
- }
-
- Matchers.front()->emitPredicateOpcodes(Table, *this);
-
- // We must also check if it's safe to fold the matched instructions.
- if (InsnVariableIDs.size() >= 2) {
- // Invert the map to create stable ordering (by var names)
- SmallVector<unsigned, 2> InsnIDs;
- for (const auto &Pair : InsnVariableIDs) {
- // Skip the root node since it isn't moving anywhere. Everything else is
- // sinking to meet it.
- if (Pair.first == Matchers.front().get())
- continue;
-
- InsnIDs.push_back(Pair.second);
- }
- llvm::sort(InsnIDs);
-
- for (const auto &InsnID : InsnIDs) {
- // Reject the difficult cases until we have a more accurate check.
- Table << MatchTable::Opcode("GIM_CheckIsSafeToFold")
- << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
- << MatchTable::LineBreak;
-
- // FIXME: Emit checks to determine it's _actually_ safe to fold and/or
- // account for unsafe cases.
- //
- // Example:
- // MI1--> %0 = ...
- // %1 = ... %0
- // MI0--> %2 = ... %0
- // It's not safe to erase MI1. We currently handle this by not
- // erasing %0 (even when it's dead).
- //
- // Example:
- // MI1--> %0 = load volatile @a
- // %1 = load volatile @a
- // MI0--> %2 = ... %0
- // It's not safe to sink %0's def past %1. We currently handle
- // this by rejecting all loads.
- //
- // Example:
- // MI1--> %0 = load @a
- // %1 = store @a
- // MI0--> %2 = ... %0
- // It's not safe to sink %0's def past %1. We currently handle
- // this by rejecting all loads.
- //
- // Example:
- // G_CONDBR %cond, @BB1
- // BB0:
- // MI1--> %0 = load @a
- // G_BR @BB1
- // BB1:
- // MI0--> %2 = ... %0
- // It's not always safe to sink %0 across control flow. In this
- // case it may introduce a memory fault. We currentl handle this
- // by rejecting all loads.
- }
- }
-
- for (const auto &PM : EpilogueMatchers)
- PM->emitPredicateOpcodes(Table, *this);
-
- for (const auto &MA : Actions)
- MA->emitActionOpcodes(Table, *this);
-
- if (Table.isWithCoverage())
- Table << MatchTable::Opcode("GIR_Coverage") << MatchTable::IntValue(RuleID)
- << MatchTable::LineBreak;
- else
- Table << MatchTable::Comment(("GIR_Coverage, " + Twine(RuleID) + ",").str())
- << MatchTable::LineBreak;
-
- Table << MatchTable::Opcode("GIR_Done", -1) << MatchTable::LineBreak
- << MatchTable::Label(LabelID);
- ++NumPatternEmitted;
-}
-
-bool RuleMatcher::isHigherPriorityThan(const RuleMatcher &B) const {
- // Rules involving more match roots have higher priority.
- if (Matchers.size() > B.Matchers.size())
- return true;
- if (Matchers.size() < B.Matchers.size())
- return false;
-
- for (auto Matcher : zip(Matchers, B.Matchers)) {
- if (std::get<0>(Matcher)->isHigherPriorityThan(*std::get<1>(Matcher)))
- return true;
- if (std::get<1>(Matcher)->isHigherPriorityThan(*std::get<0>(Matcher)))
- return false;
- }
-
- return false;
-}
-
-unsigned RuleMatcher::countRendererFns() const {
- return std::accumulate(
- Matchers.begin(), Matchers.end(), 0,
- [](unsigned A, const std::unique_ptr<InstructionMatcher> &Matcher) {
- return A + Matcher->countRendererFns();
- });
-}
-
-bool OperandPredicateMatcher::isHigherPriorityThan(
- const OperandPredicateMatcher &B) const {
- // Generally speaking, an instruction is more important than an Int or a
- // LiteralInt because it can cover more nodes but theres an exception to
- // this. G_CONSTANT's are less important than either of those two because they
- // are more permissive.
-
- const InstructionOperandMatcher *AOM =
- dyn_cast<InstructionOperandMatcher>(this);
- const InstructionOperandMatcher *BOM =
- dyn_cast<InstructionOperandMatcher>(&B);
- bool AIsConstantInsn = AOM && AOM->getInsnMatcher().isConstantInstruction();
- bool BIsConstantInsn = BOM && BOM->getInsnMatcher().isConstantInstruction();
-
- if (AOM && BOM) {
- // The relative priorities between a G_CONSTANT and any other instruction
- // don't actually matter but this code is needed to ensure a strict weak
- // ordering. This is particularly important on Windows where the rules will
- // be incorrectly sorted without it.
- if (AIsConstantInsn != BIsConstantInsn)
- return AIsConstantInsn < BIsConstantInsn;
- return false;
- }
-
- if (AOM && AIsConstantInsn && (B.Kind == OPM_Int || B.Kind == OPM_LiteralInt))
- return false;
- if (BOM && BIsConstantInsn && (Kind == OPM_Int || Kind == OPM_LiteralInt))
- return true;
-
- return Kind < B.Kind;
-}
-
-void SameOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
- RuleMatcher &Rule) const {
- const OperandMatcher &OtherOM = Rule.getOperandMatcher(MatchingName);
- unsigned OtherInsnVarID = Rule.getInsnVarID(OtherOM.getInstructionMatcher());
- assert(OtherInsnVarID == OtherOM.getInstructionMatcher().getInsnVarID());
-
- Table << MatchTable::Opcode("GIM_CheckIsSameOperand")
- << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
- << MatchTable::Comment("OpIdx") << MatchTable::IntValue(OpIdx)
- << MatchTable::Comment("OtherMI")
- << MatchTable::IntValue(OtherInsnVarID)
- << MatchTable::Comment("OtherOpIdx")
- << MatchTable::IntValue(OtherOM.getOpIdx())
- << MatchTable::LineBreak;
-}
-
//===- GlobalISelEmitter class --------------------------------------------===//
static Expected<LLTCodeGen> getInstResultType(const TreePatternNode *Dst) {
@@ -3554,8 +291,7 @@ static Expected<LLTCodeGen> getInstResultType(const TreePatternNode *Dst) {
std::optional<LLTCodeGen> MaybeOpTy;
if (ChildTypes.front().isMachineValueType()) {
- MaybeOpTy =
- MVTToLLT(ChildTypes.front().getMachineValueType().SimpleTy);
+ MaybeOpTy = MVTToLLT(ChildTypes.front().getMachineValueType().SimpleTy);
}
if (!MaybeOpTy)
@@ -3563,17 +299,34 @@ static Expected<LLTCodeGen> getInstResultType(const TreePatternNode *Dst) {
return *MaybeOpTy;
}
-class GlobalISelEmitter {
+class GlobalISelEmitter final : public GlobalISelMatchTableExecutorEmitter {
public:
explicit GlobalISelEmitter(RecordKeeper &RK);
+
+ void emitAdditionalImpl(raw_ostream &OS) override;
+
+ void emitMIPredicateFns(raw_ostream &OS) override;
+ void emitI64ImmPredicateFns(raw_ostream &OS) override;
+ void emitAPFloatImmPredicateFns(raw_ostream &OS) override;
+ void emitAPIntImmPredicateFns(raw_ostream &OS) override;
+ void emitTestSimplePredicate(raw_ostream &OS) override;
+ void emitRunCustomAction(raw_ostream &OS) override;
+
+ const CodeGenTarget &getTarget() const override { return Target; }
+ StringRef getClassName() const override { return ClassName; }
+
void run(raw_ostream &OS);
private:
+ std::string ClassName;
+
const RecordKeeper &RK;
const CodeGenDAGPatterns CGP;
const CodeGenTarget &Target;
CodeGenRegBank &CGRegs;
+ std::vector<Record *> AllPatFrags;
+
/// Keep track of the equivalence between SDNodes and Instruction by mapping
/// SDNodes to the GINodeEquiv mapping. We need to map to the GINodeEquiv to
/// check for attributes on the relation such as CheckMMOIsNonAtomic.
@@ -3594,9 +347,6 @@ private:
/// This adds compatibility for RuleMatchers to use this for ordering rules.
DenseMap<uint64_t, int> RuleMatcherScores;
- // Map of predicates to their subtarget features.
- SubtargetFeatureInfoMap SubtargetFeatures;
-
// Rule coverage information.
std::optional<CodeGenCoverage> RuleCoverage;
@@ -3635,24 +385,21 @@ private:
const TreePatternNode *Src, const TreePatternNode *Dst);
Expected<action_iterator> createAndImportSubInstructionRenderer(
action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst,
- unsigned TempReg);
+ const TreePatternNode *Src, unsigned TempReg);
Expected<action_iterator>
createInstructionRenderer(action_iterator InsertPt, RuleMatcher &M,
const TreePatternNode *Dst);
- Expected<action_iterator>
- importExplicitDefRenderers(action_iterator InsertPt, RuleMatcher &M,
- BuildMIAction &DstMIBuilder,
- const TreePatternNode *Dst);
+ Expected<action_iterator> importExplicitDefRenderers(
+ action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
+ const TreePatternNode *Src, const TreePatternNode *Dst);
- Expected<action_iterator>
- importExplicitUseRenderers(action_iterator InsertPt, RuleMatcher &M,
- BuildMIAction &DstMIBuilder,
- const llvm::TreePatternNode *Dst);
- Expected<action_iterator>
- importExplicitUseRenderer(action_iterator InsertPt, RuleMatcher &Rule,
- BuildMIAction &DstMIBuilder,
- TreePatternNode *DstChild);
+ Expected<action_iterator> importExplicitUseRenderers(
+ action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
+ const llvm::TreePatternNode *Dst, const TreePatternNode *Src);
+ Expected<action_iterator> importExplicitUseRenderer(
+ action_iterator InsertPt, RuleMatcher &Rule, BuildMIAction &DstMIBuilder,
+ const TreePatternNode *DstChild, const TreePatternNode *Src);
Error importDefaultOperandRenderers(action_iterator InsertPt, RuleMatcher &M,
BuildMIAction &DstMIBuilder,
DagInit *DefaultOps) const;
@@ -3660,16 +407,6 @@ private:
importImplicitDefRenderers(BuildMIAction &DstMIBuilder,
const std::vector<Record *> &ImplicitDefs) const;
- void emitCxxPredicateFns(raw_ostream &OS, StringRef CodeFieldName,
- StringRef TypeIdentifier, StringRef ArgType,
- StringRef ArgName, StringRef AdditionalArgs,
- StringRef AdditionalDeclarations,
- std::function<bool(const Record *R)> Filter);
- void emitImmPredicateFns(raw_ostream &OS, StringRef TypeIdentifier,
- StringRef ArgType,
- std::function<bool(const Record *R)> Filter);
- void emitMIPredicateFns(raw_ostream &OS);
-
/// Analyze pattern \p P, returning a matcher for it if possible.
/// Otherwise, return an Error explaining why we don't support it.
Expected<RuleMatcher> runOnPattern(const PatternToMatch &P);
@@ -3685,25 +422,25 @@ private:
/// If no register class is found, return std::nullopt.
std::optional<const CodeGenRegisterClass *>
inferSuperRegisterClassForNode(const TypeSetByHwMode &Ty,
- TreePatternNode *SuperRegNode,
- TreePatternNode *SubRegIdxNode);
+ const TreePatternNode *SuperRegNode,
+ const TreePatternNode *SubRegIdxNode);
std::optional<CodeGenSubRegIndex *>
- inferSubRegIndexForNode(TreePatternNode *SubRegIdxNode);
+ inferSubRegIndexForNode(const TreePatternNode *SubRegIdxNode);
/// Infer a CodeGenRegisterClass which suppoorts \p Ty and \p SubRegIdxNode.
/// Return std::nullopt if no such class exists.
std::optional<const CodeGenRegisterClass *>
inferSuperRegisterClass(const TypeSetByHwMode &Ty,
- TreePatternNode *SubRegIdxNode);
+ const TreePatternNode *SubRegIdxNode);
/// Return the CodeGenRegisterClass associated with \p Leaf if it has one.
std::optional<const CodeGenRegisterClass *>
- getRegClassFromLeaf(TreePatternNode *Leaf);
+ getRegClassFromLeaf(const TreePatternNode *Leaf);
/// Return a CodeGenRegisterClass for \p N if one can be found. Return
/// std::nullopt otherwise.
std::optional<const CodeGenRegisterClass *>
- inferRegClassFromPattern(TreePatternNode *N);
+ inferRegClassFromPattern(const TreePatternNode *N);
/// Return the size of the MemoryVT in this predicate, if possible.
std::optional<unsigned>
@@ -3714,39 +451,10 @@ private:
addBuiltinPredicates(const Record *SrcGIEquivOrNull,
const TreePredicateFn &Predicate,
InstructionMatcher &InsnMatcher, bool &HasAddedMatcher);
-
-public:
- /// Takes a sequence of \p Rules and group them based on the predicates
- /// they share. \p MatcherStorage is used as a memory container
- /// for the group that are created as part of this process.
- ///
- /// What this optimization does looks like if GroupT = GroupMatcher:
- /// Output without optimization:
- /// \verbatim
- /// # R1
- /// # predicate A
- /// # predicate B
- /// ...
- /// # R2
- /// # predicate A // <-- effectively this is going to be checked twice.
- /// // Once in R1 and once in R2.
- /// # predicate C
- /// \endverbatim
- /// Output with optimization:
- /// \verbatim
- /// # Group1_2
- /// # predicate A // <-- Check is now shared.
- /// # R1
- /// # predicate B
- /// # R2
- /// # predicate C
- /// \endverbatim
- template <class GroupT>
- static std::vector<Matcher *> optimizeRules(
- ArrayRef<Matcher *> Rules,
- std::vector<std::unique_ptr<Matcher>> &MatcherStorage);
};
+StringRef getPatFragPredicateEnumName(Record *R) { return R->getName(); }
+
void GlobalISelEmitter::gatherOpcodeValues() {
InstructionOpcodeMatcher::initOpcodeValuesMap(Target);
}
@@ -3766,15 +474,15 @@ void GlobalISelEmitter::gatherNodeEquivs() {
if (!SelDAGEquiv)
continue;
ComplexPatternEquivs[SelDAGEquiv] = Equiv;
- }
-
- assert(SDNodeXFormEquivs.empty());
- for (Record *Equiv : RK.getAllDerivedDefinitions("GISDNodeXFormEquiv")) {
- Record *SelDAGEquiv = Equiv->getValueAsDef("SelDAGEquivalent");
- if (!SelDAGEquiv)
- continue;
- SDNodeXFormEquivs[SelDAGEquiv] = Equiv;
- }
+ }
+
+ assert(SDNodeXFormEquivs.empty());
+ for (Record *Equiv : RK.getAllDerivedDefinitions("GISDNodeXFormEquiv")) {
+ Record *SelDAGEquiv = Equiv->getValueAsDef("SelDAGEquivalent");
+ if (!SelDAGEquiv)
+ continue;
+ SDNodeXFormEquivs[SelDAGEquiv] = Equiv;
+ }
}
Record *GlobalISelEmitter::findNodeEquiv(Record *N) const {
@@ -3806,8 +514,10 @@ GlobalISelEmitter::getEquivNode(Record &Equiv, const TreePatternNode *N) const {
}
GlobalISelEmitter::GlobalISelEmitter(RecordKeeper &RK)
- : RK(RK), CGP(RK), Target(CGP.getTargetInfo()),
- CGRegs(Target.getRegBank()) {}
+ : GlobalISelMatchTableExecutorEmitter(), RK(RK), CGP(RK),
+ Target(CGP.getTargetInfo()), CGRegs(Target.getRegBank()) {
+ ClassName = Target.getName().str() + "InstructionSelector";
+}
//===- Emitter ------------------------------------------------------------===//
@@ -3900,6 +610,7 @@ Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
}
}
+ assert(SrcGIEquivOrNull != nullptr && "Invalid SrcGIEquivOrNull value");
// No check required. We already did it by swapping the opcode.
if (!SrcGIEquivOrNull->isValueUnset("IfSignExtend") &&
Predicate.isSignExtLoad())
@@ -3985,13 +696,12 @@ Expected<InstructionMatcher &> GlobalISelEmitter::addBuiltinPredicates(
Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
RuleMatcher &Rule, InstructionMatcher &InsnMatcher,
const TreePatternNode *Src, unsigned &TempOpIdx) {
+ const auto SavedFlags = Rule.setGISelFlags(Src->getGISelFlagsRecord());
+
Record *SrcGIEquivOrNull = nullptr;
const CodeGenInstruction *SrcGIOrNull = nullptr;
// Start with the defined operands (i.e., the results of the root operator).
- if (Src->getExtTypes().size() > 1)
- return failedImport("Src pattern has multiple results");
-
if (Src->isLeaf()) {
Init *SrcInit = Src->getLeafValue();
if (isa<IntInit>(SrcInit)) {
@@ -4070,12 +780,14 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
}
bool IsAtomic = false;
- if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsNonAtomic"))
+ if (SrcGIEquivOrNull &&
+ SrcGIEquivOrNull->getValueAsBit("CheckMMOIsNonAtomic"))
InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>("NotAtomic");
- else if (SrcGIEquivOrNull && SrcGIEquivOrNull->getValueAsBit("CheckMMOIsAtomic")) {
+ else if (SrcGIEquivOrNull &&
+ SrcGIEquivOrNull->getValueAsBit("CheckMMOIsAtomic")) {
IsAtomic = true;
InsnMatcher.addPredicate<AtomicOrderingMMOPredicateMatcher>(
- "Unordered", AtomicOrderingMMOPredicateMatcher::AO_OrStronger);
+ "Unordered", AtomicOrderingMMOPredicateMatcher::AO_OrStronger);
}
if (Src->isLeaf()) {
@@ -4107,7 +819,7 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
bool IsFCmp = SrcGIOrNull->TheDef->getName() == "G_FCMP";
if (IsFCmp || SrcGIOrNull->TheDef->getName() == "G_ICMP") {
- TreePatternNode *SrcChild = Src->getChild(NumChildren - 1);
+ const TreePatternNode *SrcChild = Src->getChild(NumChildren - 1);
if (SrcChild->isLeaf()) {
DefInit *DI = dyn_cast<DefInit>(SrcChild->getLeafValue());
Record *CCDef = DI ? DI->getDef() : nullptr;
@@ -4115,9 +827,9 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
return failedImport("Unable to handle CondCode");
OperandMatcher &OM =
- InsnMatcher.addOperand(OpIdx++, SrcChild->getName(), TempOpIdx);
- StringRef PredType = IsFCmp ? CCDef->getValueAsString("FCmpPredicate") :
- CCDef->getValueAsString("ICmpPredicate");
+ InsnMatcher.addOperand(OpIdx++, SrcChild->getName(), TempOpIdx);
+ StringRef PredType = IsFCmp ? CCDef->getValueAsString("FCmpPredicate")
+ : CCDef->getValueAsString("ICmpPredicate");
if (!PredType.empty()) {
OM.addPredicate<CmpPredicateOperandMatcher>(std::string(PredType));
@@ -4135,8 +847,8 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
if (IsAtomic && SrcGIOrNull->TheDef->getName() == "G_STORE") {
assert(NumChildren == 2 && "wrong operands for atomic store");
- TreePatternNode *PtrChild = Src->getChild(0);
- TreePatternNode *ValueChild = Src->getChild(1);
+ const TreePatternNode *PtrChild = Src->getChild(0);
+ const TreePatternNode *ValueChild = Src->getChild(1);
if (auto Error = importChildMatcher(Rule, InsnMatcher, PtrChild, true,
false, 1, TempOpIdx))
@@ -4157,7 +869,7 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
return failedImport("Expected IntInit containing intrinsic ID)");
for (unsigned i = 0; i != NumChildren; ++i) {
- TreePatternNode *SrcChild = Src->getChild(i);
+ const TreePatternNode *SrcChild = Src->getChild(i);
// We need to determine the meaning of a literal integer based on the
// context. If this is a field required to be an immediate (such as an
@@ -4169,8 +881,9 @@ Expected<InstructionMatcher &> GlobalISelEmitter::createAndImportSelDAGMatcher(
bool OperandIsImmArg = SrcGIOrNull->isInOperandImmArg(i);
// SelectionDAG allows pointers to be represented with iN since it doesn't
- // distinguish between pointers and integers but they are different types in GlobalISel.
- // Coerce integers to pointers to address space 0 if the context indicates a pointer.
+ // distinguish between pointers and integers but they are different types
+ // in GlobalISel. Coerce integers to pointers to address space 0 if the
+ // context indicates a pointer.
//
bool OperandIsAPointer = SrcGIOrNull->isInOperandAPointer(i);
@@ -4327,7 +1040,8 @@ Error GlobalISelEmitter::importChildMatcher(
// This isn't strictly true. If the user were to provide exactly the same
// matchers as the original operand then we could allow it. However, it's
// simpler to not permit the redundant specification.
- return failedImport("Nested instruction cannot be the same as another operand");
+ return failedImport(
+ "Nested instruction cannot be the same as another operand");
}
// Map the node to a gMIR instruction.
@@ -4377,11 +1091,11 @@ Error GlobalISelEmitter::importChildMatcher(
if (ChildRec->isSubClassOf("Register")) {
// This just be emitted as a copy to the specific register.
ValueTypeByHwMode VT = ChildTypes.front().getValueTypeByHwMode();
- const CodeGenRegisterClass *RC
- = CGRegs.getMinimalPhysRegClass(ChildRec, &VT);
+ const CodeGenRegisterClass *RC =
+ CGRegs.getMinimalPhysRegClass(ChildRec, &VT);
if (!RC) {
return failedImport(
- "Could not determine physical register class of pattern source");
+ "Could not determine physical register class of pattern source");
}
OM.addPredicate<RegisterBankOperandMatcher>(*RC);
@@ -4416,10 +1130,10 @@ Error GlobalISelEmitter::importChildMatcher(
ValueTypeByHwMode VTy = ChildTypes.front().getValueTypeByHwMode();
- const CodeGenInstruction &BuildVector
- = Target.getInstruction(RK.getDef("G_BUILD_VECTOR"));
- const CodeGenInstruction &BuildVectorTrunc
- = Target.getInstruction(RK.getDef("G_BUILD_VECTOR_TRUNC"));
+ const CodeGenInstruction &BuildVector =
+ Target.getInstruction(RK.getDef("G_BUILD_VECTOR"));
+ const CodeGenInstruction &BuildVectorTrunc =
+ Target.getInstruction(RK.getDef("G_BUILD_VECTOR_TRUNC"));
// Treat G_BUILD_VECTOR as the canonical opcode, and G_BUILD_VECTOR_TRUNC
// as an alternative.
@@ -4451,7 +1165,7 @@ Error GlobalISelEmitter::importChildMatcher(
Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
action_iterator InsertPt, RuleMatcher &Rule, BuildMIAction &DstMIBuilder,
- TreePatternNode *DstChild) {
+ const TreePatternNode *DstChild, const TreePatternNode *Src) {
const auto &SubOperand = Rule.getComplexSubOperand(DstChild->getName());
if (SubOperand) {
@@ -4516,18 +1230,19 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
return OpTy.takeError();
unsigned TempRegID = Rule.allocateTempRegID();
- InsertPt = Rule.insertAction<MakeTempRegisterAction>(
- InsertPt, *OpTy, TempRegID);
+ InsertPt =
+ Rule.insertAction<MakeTempRegisterAction>(InsertPt, *OpTy, TempRegID);
DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
auto InsertPtOrError = createAndImportSubInstructionRenderer(
- ++InsertPt, Rule, DstChild, TempRegID);
+ ++InsertPt, Rule, DstChild, Src, TempRegID);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
return InsertPtOrError.get();
}
- return failedImport("Dst pattern child isn't a leaf node or an MBB" + llvm::to_string(*DstChild));
+ return failedImport("Dst pattern child isn't a leaf node or an MBB" +
+ llvm::to_string(*DstChild));
}
// It could be a specific immediate in which case we should just check for
@@ -4593,6 +1308,16 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderer(
return failedImport(
"Dst pattern child def is an unsupported tablegen class");
}
+
+ // Handle the case where the MVT/register class is omitted in the dest pattern
+ // but MVT exists in the source pattern.
+ if (isa<UnsetInit>(DstChild->getLeafValue())) {
+ for (unsigned NumOp = 0; NumOp < Src->getNumChildren(); NumOp++)
+ if (Src->getChild(NumOp)->getName() == DstChild->getName()) {
+ DstMIBuilder.addRenderer<CopyRenderer>(Src->getChild(NumOp)->getName());
+ return InsertPt;
+ }
+ }
return failedImport("Dst pattern child is an unsupported kind");
}
@@ -4612,18 +1337,19 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
&Target.getInstruction(RK.getDef("COPY")));
BuildMIAction &CopyToPhysRegMIBuilder =
*static_cast<BuildMIAction *>(InsertPt->get());
- CopyToPhysRegMIBuilder.addRenderer<AddRegisterRenderer>(Target,
- PhysInput.first,
- true);
+ CopyToPhysRegMIBuilder.addRenderer<AddRegisterRenderer>(
+ Target, PhysInput.first, true);
CopyToPhysRegMIBuilder.addRenderer<CopyPhysRegRenderer>(PhysInput.first);
}
- if (auto Error = importExplicitDefRenderers(InsertPt, M, DstMIBuilder, Dst)
- .takeError())
+ if (auto Error =
+ importExplicitDefRenderers(InsertPt, M, DstMIBuilder, Src, Dst)
+ .takeError())
return std::move(Error);
- if (auto Error = importExplicitUseRenderers(InsertPt, M, DstMIBuilder, Dst)
- .takeError())
+ if (auto Error =
+ importExplicitUseRenderers(InsertPt, M, DstMIBuilder, Dst, Src)
+ .takeError())
return std::move(Error);
return DstMIBuilder;
@@ -4632,7 +1358,7 @@ Expected<BuildMIAction &> GlobalISelEmitter::createAndImportInstructionRenderer(
Expected<action_iterator>
GlobalISelEmitter::createAndImportSubInstructionRenderer(
const action_iterator InsertPt, RuleMatcher &M, const TreePatternNode *Dst,
- unsigned TempRegID) {
+ const TreePatternNode *Src, unsigned TempRegID) {
auto InsertPtOrError = createInstructionRenderer(InsertPt, M, Dst);
// TODO: Assert there's exactly one result.
@@ -4646,8 +1372,8 @@ GlobalISelEmitter::createAndImportSubInstructionRenderer(
// Assign the result to TempReg.
DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID, true);
- InsertPtOrError =
- importExplicitUseRenderers(InsertPtOrError.get(), M, DstMIBuilder, Dst);
+ InsertPtOrError = importExplicitUseRenderers(InsertPtOrError.get(), M,
+ DstMIBuilder, Dst, Src);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
@@ -4685,19 +1411,19 @@ GlobalISelEmitter::createAndImportSubInstructionRenderer(
auto SuperClass = inferRegClassFromPattern(Dst->getChild(0));
if (!SuperClass)
return failedImport(
- "Cannot infer register class from EXTRACT_SUBREG operand #0");
+ "Cannot infer register class from EXTRACT_SUBREG operand #0");
auto SubIdx = inferSubRegIndexForNode(Dst->getChild(1));
if (!SubIdx)
return failedImport("EXTRACT_SUBREG child #1 is not a subreg index");
const auto SrcRCDstRCPair =
- (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
+ (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass");
M.insertAction<ConstrainOperandToRegClassAction>(
- InsertPt, DstMIBuilder.getInsnID(), 0, *SrcRCDstRCPair->second);
+ InsertPt, DstMIBuilder.getInsnID(), 0, *SrcRCDstRCPair->second);
M.insertAction<ConstrainOperandToRegClassAction>(
- InsertPt, DstMIBuilder.getInsnID(), 1, *SrcRCDstRCPair->first);
+ InsertPt, DstMIBuilder.getInsnID(), 1, *SrcRCDstRCPair->first);
// We're done with this pattern! It's eligible for GISel emission; return
// it.
@@ -4710,37 +1436,37 @@ GlobalISelEmitter::createAndImportSubInstructionRenderer(
auto SubClass = inferRegClassFromPattern(Dst->getChild(1));
if (!SubClass)
return failedImport(
- "Cannot infer register class from SUBREG_TO_REG child #1");
- auto SuperClass = inferSuperRegisterClass(Dst->getExtType(0),
- Dst->getChild(2));
+ "Cannot infer register class from SUBREG_TO_REG child #1");
+ auto SuperClass =
+ inferSuperRegisterClass(Dst->getExtType(0), Dst->getChild(2));
if (!SuperClass)
return failedImport(
- "Cannot infer register class for SUBREG_TO_REG operand #0");
+ "Cannot infer register class for SUBREG_TO_REG operand #0");
M.insertAction<ConstrainOperandToRegClassAction>(
- InsertPt, DstMIBuilder.getInsnID(), 0, **SuperClass);
+ InsertPt, DstMIBuilder.getInsnID(), 0, **SuperClass);
M.insertAction<ConstrainOperandToRegClassAction>(
- InsertPt, DstMIBuilder.getInsnID(), 2, **SubClass);
+ InsertPt, DstMIBuilder.getInsnID(), 2, **SubClass);
return InsertPtOrError.get();
}
if (OpName == "REG_SEQUENCE") {
auto SuperClass = inferRegClassFromPattern(Dst->getChild(0));
M.insertAction<ConstrainOperandToRegClassAction>(
- InsertPt, DstMIBuilder.getInsnID(), 0, **SuperClass);
+ InsertPt, DstMIBuilder.getInsnID(), 0, **SuperClass);
unsigned Num = Dst->getNumChildren();
for (unsigned I = 1; I != Num; I += 2) {
- TreePatternNode *SubRegChild = Dst->getChild(I + 1);
+ const TreePatternNode *SubRegChild = Dst->getChild(I + 1);
auto SubIdx = inferSubRegIndexForNode(SubRegChild);
if (!SubIdx)
return failedImport("REG_SEQUENCE child is not a subreg index");
const auto SrcRCDstRCPair =
- (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
+ (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass");
M.insertAction<ConstrainOperandToRegClassAction>(
- InsertPt, DstMIBuilder.getInsnID(), I, *SrcRCDstRCPair->second);
+ InsertPt, DstMIBuilder.getInsnID(), I, *SrcRCDstRCPair->second);
}
return InsertPtOrError.get();
@@ -4774,22 +1500,22 @@ Expected<action_iterator> GlobalISelEmitter::createInstructionRenderer(
Expected<action_iterator> GlobalISelEmitter::importExplicitDefRenderers(
action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
- const TreePatternNode *Dst) {
+ const TreePatternNode *Src, const TreePatternNode *Dst) {
const CodeGenInstruction *DstI = DstMIBuilder.getCGI();
- const unsigned NumDefs = DstI->Operands.NumDefs;
- if (NumDefs == 0)
+ const unsigned SrcNumDefs = Src->getExtTypes().size();
+ const unsigned DstNumDefs = DstI->Operands.NumDefs;
+ if (DstNumDefs == 0)
return InsertPt;
- DstMIBuilder.addRenderer<CopyRenderer>(DstI->Operands[0].Name);
+ for (unsigned I = 0; I < SrcNumDefs; ++I)
+ DstMIBuilder.addRenderer<CopyRenderer>(DstI->Operands[I].Name);
// Some instructions have multiple defs, but are missing a type entry
// (e.g. s_cc_out operands).
- if (Dst->getExtTypes().size() < NumDefs)
+ if (Dst->getExtTypes().size() < DstNumDefs)
return failedImport("unhandled discarded def");
- // Patterns only handle a single result, so any result after the first is an
- // implicitly dead def.
- for (unsigned I = 1; I < NumDefs; ++I) {
+ for (unsigned I = SrcNumDefs; I < DstNumDefs; ++I) {
const TypeSetByHwMode &ExtTy = Dst->getExtType(I);
if (!ExtTy.isMachineValueType())
return failedImport("unsupported typeset");
@@ -4800,7 +1526,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitDefRenderers(
unsigned TempRegID = M.allocateTempRegID();
InsertPt =
- M.insertAction<MakeTempRegisterAction>(InsertPt, *OpTy, TempRegID);
+ M.insertAction<MakeTempRegisterAction>(InsertPt, *OpTy, TempRegID);
DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID, true, nullptr, true);
}
@@ -4809,7 +1535,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitDefRenderers(
Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
action_iterator InsertPt, RuleMatcher &M, BuildMIAction &DstMIBuilder,
- const llvm::TreePatternNode *Dst) {
+ const llvm::TreePatternNode *Dst, const llvm::TreePatternNode *Src) {
const CodeGenInstruction *DstI = DstMIBuilder.getCGI();
CodeGenInstruction *OrigDstI = &Target.getInstruction(Dst->getOperator());
@@ -4825,7 +1551,7 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
return failedImport("EXTRACT_SUBREG child #1 is not a subreg index");
CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef());
- TreePatternNode *ValChild = Dst->getChild(0);
+ const TreePatternNode *ValChild = Dst->getChild(0);
if (!ValChild->isLeaf()) {
// We really have to handle the source instruction, and then insert a
// copy from the subregister.
@@ -4834,11 +1560,11 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
return ExtractSrcTy.takeError();
unsigned TempRegID = M.allocateTempRegID();
- InsertPt = M.insertAction<MakeTempRegisterAction>(
- InsertPt, *ExtractSrcTy, TempRegID);
+ InsertPt = M.insertAction<MakeTempRegisterAction>(InsertPt, *ExtractSrcTy,
+ TempRegID);
auto InsertPtOrError = createAndImportSubInstructionRenderer(
- ++InsertPt, M, ValChild, TempRegID);
+ ++InsertPt, M, ValChild, Src, TempRegID);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
@@ -4855,15 +1581,22 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
CodeGenRegisterClass *RC = CGRegs.getRegClass(RCDef);
const auto SrcRCDstRCPair =
- RC->getMatchingSubClassWithSubRegs(CGRegs, SubIdx);
+ RC->getMatchingSubClassWithSubRegs(CGRegs, SubIdx);
if (SrcRCDstRCPair) {
assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass");
if (SrcRCDstRCPair->first != RC)
return failedImport("EXTRACT_SUBREG requires an additional COPY");
}
- DstMIBuilder.addRenderer<CopySubRegRenderer>(Dst->getChild(0)->getName(),
- SubIdx);
+ StringRef RegOperandName = Dst->getChild(0)->getName();
+ if (const auto &SubOperand = M.getComplexSubOperand(RegOperandName)) {
+ DstMIBuilder.addRenderer<RenderComplexPatternOperand>(
+ *std::get<0>(*SubOperand), RegOperandName, std::get<1>(*SubOperand),
+ std::get<2>(*SubOperand), SubIdx);
+ return InsertPt;
+ }
+
+ DstMIBuilder.addRenderer<CopySubRegRenderer>(RegOperandName, SubIdx);
return InsertPt;
}
@@ -4880,15 +1613,15 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
return failedImport("Malformed REG_SEQUENCE");
for (unsigned I = 1; I != ExpectedDstINumUses; I += 2) {
- TreePatternNode *ValChild = Dst->getChild(I);
- TreePatternNode *SubRegChild = Dst->getChild(I + 1);
+ const TreePatternNode *ValChild = Dst->getChild(I);
+ const TreePatternNode *SubRegChild = Dst->getChild(I + 1);
if (DefInit *SubRegInit =
dyn_cast<DefInit>(SubRegChild->getLeafValue())) {
CodeGenSubRegIndex *SubIdx = CGRegs.getSubRegIdx(SubRegInit->getDef());
auto InsertPtOrError =
- importExplicitUseRenderer(InsertPt, M, DstMIBuilder, ValChild);
+ importExplicitUseRenderer(InsertPt, M, DstMIBuilder, ValChild, Src);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
InsertPt = InsertPtOrError.get();
@@ -4949,15 +1682,15 @@ Expected<action_iterator> GlobalISelEmitter::importExplicitUseRenderers(
const CGIOperandList::OperandInfo &DstIOperand = DstI->Operands[InstOpNo];
DagInit *DefaultOps = DstIOperand.Rec->getValueAsDag("DefaultOps");
- if (auto Error = importDefaultOperandRenderers(
- InsertPt, M, DstMIBuilder, DefaultOps))
+ if (auto Error = importDefaultOperandRenderers(InsertPt, M, DstMIBuilder,
+ DefaultOps))
return std::move(Error);
++NumDefaultOps;
continue;
}
auto InsertPtOrError = importExplicitUseRenderer(InsertPt, M, DstMIBuilder,
- Dst->getChild(Child));
+ Dst->getChild(Child), Src);
if (auto Error = InsertPtOrError.takeError())
return std::move(Error);
InsertPt = InsertPtOrError.get();
@@ -4985,8 +1718,7 @@ Error GlobalISelEmitter::importDefaultOperandRenderers(
if (const DefInit *DefaultDagOperator =
dyn_cast<DefInit>(DefaultDagOp->getOperator())) {
if (DefaultDagOperator->getDef()->isSubClassOf("ValueType")) {
- OpTyOrNone = MVTToLLT(getValueType(
- DefaultDagOperator->getDef()));
+ OpTyOrNone = MVTToLLT(getValueType(DefaultDagOperator->getDef()));
DefaultOp = DefaultDagOp->getArg(0);
}
}
@@ -4999,10 +1731,10 @@ Error GlobalISelEmitter::importDefaultOperandRenderers(
M.insertAction<MakeTempRegisterAction>(InsertPt, *OpTyOrNone,
TempRegID);
InsertPt = M.insertAction<BuildMIAction>(
- InsertPt, M.allocateOutputInsnID(),
- &Target.getInstruction(RK.getDef("IMPLICIT_DEF")));
- BuildMIAction &IDMIBuilder = *static_cast<BuildMIAction *>(
- InsertPt->get());
+ InsertPt, M.allocateOutputInsnID(),
+ &Target.getInstruction(RK.getDef("IMPLICIT_DEF")));
+ BuildMIAction &IDMIBuilder =
+ *static_cast<BuildMIAction *>(InsertPt->get());
IDMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
DstMIBuilder.addRenderer<TempRegRenderer>(TempRegID);
} else {
@@ -5031,7 +1763,7 @@ Error GlobalISelEmitter::importImplicitDefRenderers(
}
std::optional<const CodeGenRegisterClass *>
-GlobalISelEmitter::getRegClassFromLeaf(TreePatternNode *Leaf) {
+GlobalISelEmitter::getRegClassFromLeaf(const TreePatternNode *Leaf) {
assert(Leaf && "Expected node?");
assert(Leaf->isLeaf() && "Expected leaf?");
Record *RCRec = getInitValueAsRegClass(Leaf->getLeafValue());
@@ -5044,7 +1776,7 @@ GlobalISelEmitter::getRegClassFromLeaf(TreePatternNode *Leaf) {
}
std::optional<const CodeGenRegisterClass *>
-GlobalISelEmitter::inferRegClassFromPattern(TreePatternNode *N) {
+GlobalISelEmitter::inferRegClassFromPattern(const TreePatternNode *N) {
if (!N)
return std::nullopt;
@@ -5076,13 +1808,13 @@ GlobalISelEmitter::inferRegClassFromPattern(TreePatternNode *N) {
if (IsRegSequence || InstName == "COPY_TO_REGCLASS") {
// If we have a COPY_TO_REGCLASS, then we need to handle it specially. It
// has the desired register class as the first child.
- TreePatternNode *RCChild = N->getChild(IsRegSequence ? 0 : 1);
+ const TreePatternNode *RCChild = N->getChild(IsRegSequence ? 0 : 1);
if (!RCChild->isLeaf())
return std::nullopt;
return getRegClassFromLeaf(RCChild);
}
if (InstName == "INSERT_SUBREG") {
- TreePatternNode *Child0 = N->getChild(0);
+ const TreePatternNode *Child0 = N->getChild(0);
assert(Child0->getNumTypes() == 1 && "Unexpected number of types!");
const TypeSetByHwMode &VTy = Child0->getExtType(0);
return inferSuperRegisterClassForNode(VTy, Child0, N->getChild(2));
@@ -5112,8 +1844,8 @@ GlobalISelEmitter::inferRegClassFromPattern(TreePatternNode *N) {
}
std::optional<const CodeGenRegisterClass *>
-GlobalISelEmitter::inferSuperRegisterClass(const TypeSetByHwMode &Ty,
- TreePatternNode *SubRegIdxNode) {
+GlobalISelEmitter::inferSuperRegisterClass(
+ const TypeSetByHwMode &Ty, const TreePatternNode *SubRegIdxNode) {
assert(SubRegIdxNode && "Expected subregister index node!");
// We need a ValueTypeByHwMode for getSuperRegForSubReg.
if (!Ty.isValueTypeByHwMode(false))
@@ -5137,8 +1869,8 @@ GlobalISelEmitter::inferSuperRegisterClass(const TypeSetByHwMode &Ty,
std::optional<const CodeGenRegisterClass *>
GlobalISelEmitter::inferSuperRegisterClassForNode(
- const TypeSetByHwMode &Ty, TreePatternNode *SuperRegNode,
- TreePatternNode *SubRegIdxNode) {
+ const TypeSetByHwMode &Ty, const TreePatternNode *SuperRegNode,
+ const TreePatternNode *SubRegIdxNode) {
assert(SuperRegNode && "Expected super register node!");
// Check if we already have a defined register class for the super register
// node. If we do, then we should preserve that rather than inferring anything
@@ -5151,8 +1883,8 @@ GlobalISelEmitter::inferSuperRegisterClassForNode(
return inferSuperRegisterClass(Ty, SubRegIdxNode);
}
-std::optional<CodeGenSubRegIndex *>
-GlobalISelEmitter::inferSubRegIndexForNode(TreePatternNode *SubRegIdxNode) {
+std::optional<CodeGenSubRegIndex *> GlobalISelEmitter::inferSubRegIndexForNode(
+ const TreePatternNode *SubRegIdxNode) {
if (!SubRegIdxNode->isLeaf())
return std::nullopt;
@@ -5211,6 +1943,9 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
// before their first use.)
InstructionMatcher &InsnMatcherTemp = M.addInstructionMatcher(Src->getName());
unsigned TempOpIdx = 0;
+
+ const auto SavedFlags = M.setGISelFlags(P.getSrcRecord());
+
auto InsnMatcherOrError =
createAndImportSelDAGMatcher(M, InsnMatcherTemp, Src, TempOpIdx);
if (auto Error = InsnMatcherOrError.takeError())
@@ -5297,7 +2032,8 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
} else if (DstIName == "EXTRACT_SUBREG") {
auto InferredClass = inferRegClassFromPattern(Dst->getChild(0));
if (!InferredClass)
- return failedImport("Could not infer class for EXTRACT_SUBREG operand #0");
+ return failedImport(
+ "Could not infer class for EXTRACT_SUBREG operand #0");
// We can assume that a subregister is in the same bank as it's super
// register.
@@ -5379,7 +2115,7 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
auto SuperClass = inferRegClassFromPattern(Dst->getChild(0));
if (!SuperClass)
return failedImport(
- "Cannot infer register class from EXTRACT_SUBREG operand #0");
+ "Cannot infer register class from EXTRACT_SUBREG operand #0");
auto SubIdx = inferSubRegIndexForNode(Dst->getChild(1));
if (!SubIdx)
@@ -5392,17 +2128,18 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
// FIXME: This may introduce an extra copy if the chosen class doesn't
// actually contain the subregisters.
assert(Src->getExtTypes().size() == 1 &&
- "Expected Src of EXTRACT_SUBREG to have one result type");
+ "Expected Src of EXTRACT_SUBREG to have one result type");
const auto SrcRCDstRCPair =
- (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
+ (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
if (!SrcRCDstRCPair) {
return failedImport("subreg index is incompatible "
"with inferred reg class");
}
assert(SrcRCDstRCPair->second && "Couldn't find a matching subclass");
- M.addAction<ConstrainOperandToRegClassAction>(0, 0, *SrcRCDstRCPair->second);
+ M.addAction<ConstrainOperandToRegClassAction>(0, 0,
+ *SrcRCDstRCPair->second);
M.addAction<ConstrainOperandToRegClassAction>(0, 1, *SrcRCDstRCPair->first);
// We're done with this pattern! It's eligible for GISel emission; return
@@ -5470,7 +2207,7 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
return failedImport("REG_SEQUENCE child is not a subreg index");
const auto SrcRCDstRCPair =
- (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
+ (*SuperClass)->getMatchingSubClassWithSubRegs(CGRegs, *SubIdx);
M.addAction<ConstrainOperandToRegClassAction>(0, I,
*SrcRCDstRCPair->second);
@@ -5487,126 +2224,6 @@ Expected<RuleMatcher> GlobalISelEmitter::runOnPattern(const PatternToMatch &P) {
return std::move(M);
}
-// Emit imm predicate table and an enum to reference them with.
-// The 'Predicate_' part of the name is redundant but eliminating it is more
-// trouble than it's worth.
-void GlobalISelEmitter::emitCxxPredicateFns(
- raw_ostream &OS, StringRef CodeFieldName, StringRef TypeIdentifier,
- StringRef ArgType, StringRef ArgName, StringRef AdditionalArgs,
- StringRef AdditionalDeclarations,
- std::function<bool(const Record *R)> Filter) {
- std::vector<const Record *> MatchedRecords;
- const auto &Defs = RK.getAllDerivedDefinitions("PatFrags");
- std::copy_if(Defs.begin(), Defs.end(), std::back_inserter(MatchedRecords),
- [&](Record *Record) {
- return !Record->getValueAsString(CodeFieldName).empty() &&
- Filter(Record);
- });
-
- if (!MatchedRecords.empty()) {
- OS << "// PatFrag predicates.\n"
- << "enum {\n";
- std::string EnumeratorSeparator =
- (" = GIPFP_" + TypeIdentifier + "_Invalid + 1,\n").str();
- for (const auto *Record : MatchedRecords) {
- OS << " GIPFP_" << TypeIdentifier << "_Predicate_" << Record->getName()
- << EnumeratorSeparator;
- EnumeratorSeparator = ",\n";
- }
- OS << "};\n";
- }
-
- OS << "bool " << Target.getName() << "InstructionSelector::test" << ArgName
- << "Predicate_" << TypeIdentifier << "(unsigned PredicateID, " << ArgType << " "
- << ArgName << AdditionalArgs <<") const {\n"
- << AdditionalDeclarations;
- if (!AdditionalDeclarations.empty())
- OS << "\n";
- if (!MatchedRecords.empty())
- OS << " switch (PredicateID) {\n";
- for (const auto *Record : MatchedRecords) {
- OS << " case GIPFP_" << TypeIdentifier << "_Predicate_"
- << Record->getName() << ": {\n"
- << " " << Record->getValueAsString(CodeFieldName) << "\n"
- << " llvm_unreachable(\"" << CodeFieldName
- << " should have returned\");\n"
- << " return false;\n"
- << " }\n";
- }
- if (!MatchedRecords.empty())
- OS << " }\n";
- OS << " llvm_unreachable(\"Unknown predicate\");\n"
- << " return false;\n"
- << "}\n";
-}
-
-void GlobalISelEmitter::emitImmPredicateFns(
- raw_ostream &OS, StringRef TypeIdentifier, StringRef ArgType,
- std::function<bool(const Record *R)> Filter) {
- return emitCxxPredicateFns(OS, "ImmediateCode", TypeIdentifier, ArgType,
- "Imm", "", "", Filter);
-}
-
-void GlobalISelEmitter::emitMIPredicateFns(raw_ostream &OS) {
- return emitCxxPredicateFns(
- OS, "GISelPredicateCode", "MI", "const MachineInstr &", "MI",
- ", const std::array<const MachineOperand *, 3> &Operands",
- " const MachineFunction &MF = *MI.getParent()->getParent();\n"
- " const MachineRegisterInfo &MRI = MF.getRegInfo();\n"
- " (void)MRI;",
- [](const Record *R) { return true; });
-}
-
-template <class GroupT>
-std::vector<Matcher *> GlobalISelEmitter::optimizeRules(
- ArrayRef<Matcher *> Rules,
- std::vector<std::unique_ptr<Matcher>> &MatcherStorage) {
-
- std::vector<Matcher *> OptRules;
- std::unique_ptr<GroupT> CurrentGroup = std::make_unique<GroupT>();
- assert(CurrentGroup->empty() && "Newly created group isn't empty!");
- unsigned NumGroups = 0;
-
- auto ProcessCurrentGroup = [&]() {
- if (CurrentGroup->empty())
- // An empty group is good to be reused:
- return;
-
- // If the group isn't large enough to provide any benefit, move all the
- // added rules out of it and make sure to re-create the group to properly
- // re-initialize it:
- if (CurrentGroup->size() < 2)
- append_range(OptRules, CurrentGroup->matchers());
- else {
- CurrentGroup->finalize();
- OptRules.push_back(CurrentGroup.get());
- MatcherStorage.emplace_back(std::move(CurrentGroup));
- ++NumGroups;
- }
- CurrentGroup = std::make_unique<GroupT>();
- };
- for (Matcher *Rule : Rules) {
- // Greedily add as many matchers as possible to the current group:
- if (CurrentGroup->addMatcher(*Rule))
- continue;
-
- ProcessCurrentGroup();
- assert(CurrentGroup->empty() && "A group wasn't properly re-initialized");
-
- // Try to add the pending matcher to a newly created empty group:
- if (!CurrentGroup->addMatcher(*Rule))
- // If we couldn't add the matcher to an empty group, that group type
- // doesn't support that kind of matchers at all, so just skip it:
- OptRules.push_back(Rule);
- }
- ProcessCurrentGroup();
-
- LLVM_DEBUG(dbgs() << "NumGroups: " << NumGroups << "\n");
- (void) NumGroups;
- assert(CurrentGroup->empty() && "The last group wasn't properly processed");
- return OptRules;
-}
-
MatchTable
GlobalISelEmitter::buildMatchTable(MutableArrayRef<RuleMatcher> Rules,
bool Optimize, bool WithCoverage) {
@@ -5649,32 +2266,101 @@ GlobalISelEmitter::buildMatchTable(MutableArrayRef<RuleMatcher> Rules,
return MatchTable::buildTable(OptRules, WithCoverage);
}
-void GroupMatcher::optimize() {
- // Make sure we only sort by a specific predicate within a range of rules that
- // all have that predicate checked against a specific value (not a wildcard):
- auto F = Matchers.begin();
- auto T = F;
- auto E = Matchers.end();
- while (T != E) {
- while (T != E) {
- auto *R = static_cast<RuleMatcher *>(*T);
- if (!R->getFirstConditionAsRootType().get().isValid())
- break;
- ++T;
- }
- std::stable_sort(F, T, [](Matcher *A, Matcher *B) {
- auto *L = static_cast<RuleMatcher *>(A);
- auto *R = static_cast<RuleMatcher *>(B);
- return L->getFirstConditionAsRootType() <
- R->getFirstConditionAsRootType();
- });
- if (T != E)
- F = ++T;
- }
- GlobalISelEmitter::optimizeRules<GroupMatcher>(Matchers, MatcherStorage)
- .swap(Matchers);
- GlobalISelEmitter::optimizeRules<SwitchMatcher>(Matchers, MatcherStorage)
- .swap(Matchers);
+void GlobalISelEmitter::emitAdditionalImpl(raw_ostream &OS) {
+ OS << "bool " << getClassName()
+ << "::selectImpl(MachineInstr &I, CodeGenCoverage "
+ "&CoverageInfo) const {\n"
+ << " const PredicateBitset AvailableFeatures = "
+ "getAvailableFeatures();\n"
+ << " NewMIVector OutMIs;\n"
+ << " State.MIs.clear();\n"
+ << " State.MIs.push_back(&I);\n\n"
+ << " if (executeMatchTable(*this, OutMIs, State, ExecInfo"
+ << ", getMatchTable(), TII, MF->getRegInfo(), TRI, RBI, AvailableFeatures"
+ << ", &CoverageInfo)) {\n"
+ << " return true;\n"
+ << " }\n\n"
+ << " return false;\n"
+ << "}\n\n";
+}
+
+void GlobalISelEmitter::emitMIPredicateFns(raw_ostream &OS) {
+ std::vector<Record *> MatchedRecords;
+ std::copy_if(AllPatFrags.begin(), AllPatFrags.end(),
+ std::back_inserter(MatchedRecords), [&](Record *R) {
+ return !R->getValueAsString("GISelPredicateCode").empty();
+ });
+ emitMIPredicateFnsImpl<Record *>(
+ OS,
+ " const MachineFunction &MF = *MI.getParent()->getParent();\n"
+ " const MachineRegisterInfo &MRI = MF.getRegInfo();\n"
+ " const auto &Operands = State.RecordedOperands;\n"
+ " (void)Operands;\n"
+ " (void)MRI;",
+ ArrayRef<Record *>(MatchedRecords), &getPatFragPredicateEnumName,
+ [&](Record *R) { return R->getValueAsString("GISelPredicateCode"); },
+ "PatFrag predicates.");
+}
+
+void GlobalISelEmitter::emitI64ImmPredicateFns(raw_ostream &OS) {
+ std::vector<Record *> MatchedRecords;
+ std::copy_if(AllPatFrags.begin(), AllPatFrags.end(),
+ std::back_inserter(MatchedRecords), [&](Record *R) {
+ bool Unset;
+ return !R->getValueAsString("ImmediateCode").empty() &&
+ !R->getValueAsBitOrUnset("IsAPFloat", Unset) &&
+ !R->getValueAsBit("IsAPInt");
+ });
+ emitImmPredicateFnsImpl<Record *>(
+ OS, "I64", "int64_t", ArrayRef<Record *>(MatchedRecords),
+ &getPatFragPredicateEnumName,
+ [&](Record *R) { return R->getValueAsString("ImmediateCode"); },
+ "PatFrag predicates.");
+}
+
+void GlobalISelEmitter::emitAPFloatImmPredicateFns(raw_ostream &OS) {
+ std::vector<Record *> MatchedRecords;
+ std::copy_if(AllPatFrags.begin(), AllPatFrags.end(),
+ std::back_inserter(MatchedRecords), [&](Record *R) {
+ bool Unset;
+ return !R->getValueAsString("ImmediateCode").empty() &&
+ R->getValueAsBitOrUnset("IsAPFloat", Unset);
+ });
+ emitImmPredicateFnsImpl<Record *>(
+ OS, "APFloat", "const APFloat &", ArrayRef<Record *>(MatchedRecords),
+ &getPatFragPredicateEnumName,
+ [&](Record *R) { return R->getValueAsString("ImmediateCode"); },
+ "PatFrag predicates.");
+}
+
+void GlobalISelEmitter::emitAPIntImmPredicateFns(raw_ostream &OS) {
+ std::vector<Record *> MatchedRecords;
+ std::copy_if(AllPatFrags.begin(), AllPatFrags.end(),
+ std::back_inserter(MatchedRecords), [&](Record *R) {
+ return !R->getValueAsString("ImmediateCode").empty() &&
+ R->getValueAsBit("IsAPInt");
+ });
+ emitImmPredicateFnsImpl<Record *>(
+ OS, "APInt", "const APInt &", ArrayRef<Record *>(MatchedRecords),
+ &getPatFragPredicateEnumName,
+ [&](Record *R) { return R->getValueAsString("ImmediateCode"); },
+ "PatFrag predicates.");
+}
+
+void GlobalISelEmitter::emitTestSimplePredicate(raw_ostream &OS) {
+ OS << "bool " << getClassName() << "::testSimplePredicate(unsigned) const {\n"
+ << " llvm_unreachable(\"" + getClassName() +
+ " does not support simple predicates!\");\n"
+ << " return false;\n"
+ << "}\n";
+}
+
+void GlobalISelEmitter::emitRunCustomAction(raw_ostream &OS) {
+ OS << "void " << getClassName()
+ << "::runCustomAction(unsigned, const MatcherState&) const {\n"
+ << " llvm_unreachable(\"" + getClassName() +
+ " does not support custom C++ actions!\");\n"
+ << "}\n";
}
void GlobalISelEmitter::run(raw_ostream &OS) {
@@ -5700,8 +2386,12 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
// Track the GINodeEquiv definitions.
gatherNodeEquivs();
- emitSourceFileHeader(("Global Instruction Selector for the " +
- Target.getName() + " target").str(), OS);
+ AllPatFrags = RK.getAllDerivedDefinitions("PatFrags");
+
+ emitSourceFileHeader(
+ ("Global Instruction Selector for the " + Target.getName() + " target")
+ .str(),
+ OS);
std::vector<RuleMatcher> Rules;
// Look through the SelectionDAG patterns we found, possibly emitting some.
for (const PatternToMatch &Pat : CGP.ptms()) {
@@ -5753,203 +2443,13 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
std::unique(CustomRendererFns.begin(), CustomRendererFns.end()),
CustomRendererFns.end());
- unsigned MaxTemporaries = 0;
- for (const auto &Rule : Rules)
- MaxTemporaries = std::max(MaxTemporaries, Rule.countRendererFns());
-
- OS << "#ifdef GET_GLOBALISEL_PREDICATE_BITSET\n"
- << "const unsigned MAX_SUBTARGET_PREDICATES = " << SubtargetFeatures.size()
- << ";\n"
- << "using PredicateBitset = "
- "llvm::PredicateBitsetImpl<MAX_SUBTARGET_PREDICATES>;\n"
- << "#endif // ifdef GET_GLOBALISEL_PREDICATE_BITSET\n\n";
-
- OS << "#ifdef GET_GLOBALISEL_TEMPORARIES_DECL\n"
- << " mutable MatcherState State;\n"
- << " typedef "
- "ComplexRendererFns("
- << Target.getName()
- << "InstructionSelector::*ComplexMatcherMemFn)(MachineOperand &) const;\n"
-
- << " typedef void(" << Target.getName()
- << "InstructionSelector::*CustomRendererFn)(MachineInstrBuilder &, const "
- "MachineInstr &, int) "
- "const;\n"
- << " const ISelInfoTy<PredicateBitset, ComplexMatcherMemFn, "
- "CustomRendererFn> "
- "ISelInfo;\n";
- OS << " static " << Target.getName()
- << "InstructionSelector::ComplexMatcherMemFn ComplexPredicateFns[];\n"
- << " static " << Target.getName()
- << "InstructionSelector::CustomRendererFn CustomRenderers[];\n"
- << " bool testImmPredicate_I64(unsigned PredicateID, int64_t Imm) const "
- "override;\n"
- << " bool testImmPredicate_APInt(unsigned PredicateID, const APInt &Imm) "
- "const override;\n"
- << " bool testImmPredicate_APFloat(unsigned PredicateID, const APFloat "
- "&Imm) const override;\n"
- << " const int64_t *getMatchTable() const override;\n"
- << " bool testMIPredicate_MI(unsigned PredicateID, const MachineInstr &MI"
- ", const std::array<const MachineOperand *, 3> &Operands) "
- "const override;\n"
- << "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_DECL\n\n";
-
- OS << "#ifdef GET_GLOBALISEL_TEMPORARIES_INIT\n"
- << ", State(" << MaxTemporaries << "),\n"
- << "ISelInfo(TypeObjects, NumTypeObjects, FeatureBitsets"
- << ", ComplexPredicateFns, CustomRenderers)\n"
- << "#endif // ifdef GET_GLOBALISEL_TEMPORARIES_INIT\n\n";
-
- OS << "#ifdef GET_GLOBALISEL_IMPL\n";
- SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(SubtargetFeatures,
- OS);
-
- // Separate subtarget features by how often they must be recomputed.
- SubtargetFeatureInfoMap ModuleFeatures;
- std::copy_if(SubtargetFeatures.begin(), SubtargetFeatures.end(),
- std::inserter(ModuleFeatures, ModuleFeatures.end()),
- [](const SubtargetFeatureInfoMap::value_type &X) {
- return !X.second.mustRecomputePerFunction();
- });
- SubtargetFeatureInfoMap FunctionFeatures;
- std::copy_if(SubtargetFeatures.begin(), SubtargetFeatures.end(),
- std::inserter(FunctionFeatures, FunctionFeatures.end()),
- [](const SubtargetFeatureInfoMap::value_type &X) {
- return X.second.mustRecomputePerFunction();
- });
-
- SubtargetFeatureInfo::emitComputeAvailableFeatures(
- Target.getName(), "InstructionSelector", "computeAvailableModuleFeatures",
- ModuleFeatures, OS);
-
-
- OS << "void " << Target.getName() << "InstructionSelector"
- "::setupGeneratedPerFunctionState(MachineFunction &MF) {\n"
- " AvailableFunctionFeatures = computeAvailableFunctionFeatures("
- "(const " << Target.getName() << "Subtarget *)&MF.getSubtarget(), &MF);\n"
- "}\n";
-
- SubtargetFeatureInfo::emitComputeAvailableFeatures(
- Target.getName(), "InstructionSelector",
- "computeAvailableFunctionFeatures", FunctionFeatures, OS,
- "const MachineFunction *MF");
-
- // Emit a table containing the LLT objects needed by the matcher and an enum
+ // Create a table containing the LLT objects needed by the matcher and an enum
// for the matcher to reference them with.
std::vector<LLTCodeGen> TypeObjects;
append_range(TypeObjects, KnownTypes);
llvm::sort(TypeObjects);
- OS << "// LLT Objects.\n"
- << "enum {\n";
- for (const auto &TypeObject : TypeObjects) {
- OS << " ";
- TypeObject.emitCxxEnumValue(OS);
- OS << ",\n";
- }
- OS << "};\n";
- OS << "const static size_t NumTypeObjects = " << TypeObjects.size() << ";\n"
- << "const static LLT TypeObjects[] = {\n";
- for (const auto &TypeObject : TypeObjects) {
- OS << " ";
- TypeObject.emitCxxConstructorCall(OS);
- OS << ",\n";
- }
- OS << "};\n\n";
-
- // Emit a table containing the PredicateBitsets objects needed by the matcher
- // and an enum for the matcher to reference them with.
- std::vector<std::vector<Record *>> FeatureBitsets;
- FeatureBitsets.reserve(Rules.size());
- for (auto &Rule : Rules)
- FeatureBitsets.push_back(Rule.getRequiredFeatures());
- llvm::sort(FeatureBitsets, [&](const std::vector<Record *> &A,
- const std::vector<Record *> &B) {
- if (A.size() < B.size())
- return true;
- if (A.size() > B.size())
- return false;
- for (auto Pair : zip(A, B)) {
- if (std::get<0>(Pair)->getName() < std::get<1>(Pair)->getName())
- return true;
- if (std::get<0>(Pair)->getName() > std::get<1>(Pair)->getName())
- return false;
- }
- return false;
- });
- FeatureBitsets.erase(
- std::unique(FeatureBitsets.begin(), FeatureBitsets.end()),
- FeatureBitsets.end());
- OS << "// Feature bitsets.\n"
- << "enum {\n"
- << " GIFBS_Invalid,\n";
- for (const auto &FeatureBitset : FeatureBitsets) {
- if (FeatureBitset.empty())
- continue;
- OS << " " << getNameForFeatureBitset(FeatureBitset) << ",\n";
- }
- OS << "};\n"
- << "const static PredicateBitset FeatureBitsets[] {\n"
- << " {}, // GIFBS_Invalid\n";
- for (const auto &FeatureBitset : FeatureBitsets) {
- if (FeatureBitset.empty())
- continue;
- OS << " {";
- for (const auto &Feature : FeatureBitset) {
- const auto &I = SubtargetFeatures.find(Feature);
- assert(I != SubtargetFeatures.end() && "Didn't import predicate?");
- OS << I->second.getEnumBitName() << ", ";
- }
- OS << "},\n";
- }
- OS << "};\n\n";
-
- // Emit complex predicate table and an enum to reference them with.
- OS << "// ComplexPattern predicates.\n"
- << "enum {\n"
- << " GICP_Invalid,\n";
- for (const auto &Record : ComplexPredicates)
- OS << " GICP_" << Record->getName() << ",\n";
- OS << "};\n"
- << "// See constructor for table contents\n\n";
-
- emitImmPredicateFns(OS, "I64", "int64_t", [](const Record *R) {
- bool Unset;
- return !R->getValueAsBitOrUnset("IsAPFloat", Unset) &&
- !R->getValueAsBit("IsAPInt");
- });
- emitImmPredicateFns(OS, "APFloat", "const APFloat &", [](const Record *R) {
- bool Unset;
- return R->getValueAsBitOrUnset("IsAPFloat", Unset);
- });
- emitImmPredicateFns(OS, "APInt", "const APInt &", [](const Record *R) {
- return R->getValueAsBit("IsAPInt");
- });
- emitMIPredicateFns(OS);
- OS << "\n";
-
- OS << Target.getName() << "InstructionSelector::ComplexMatcherMemFn\n"
- << Target.getName() << "InstructionSelector::ComplexPredicateFns[] = {\n"
- << " nullptr, // GICP_Invalid\n";
- for (const auto &Record : ComplexPredicates)
- OS << " &" << Target.getName()
- << "InstructionSelector::" << Record->getValueAsString("MatcherFn")
- << ", // " << Record->getName() << "\n";
- OS << "};\n\n";
-
- OS << "// Custom renderers.\n"
- << "enum {\n"
- << " GICR_Invalid,\n";
- for (const auto &Fn : CustomRendererFns)
- OS << " GICR_" << Fn << ",\n";
- OS << "};\n";
-
- OS << Target.getName() << "InstructionSelector::CustomRendererFn\n"
- << Target.getName() << "InstructionSelector::CustomRenderers[] = {\n"
- << " nullptr, // GICR_Invalid\n";
- for (const auto &Fn : CustomRendererFns)
- OS << " &" << Target.getName() << "InstructionSelector::" << Fn << ",\n";
- OS << "};\n\n";
+ // Sort rules.
llvm::stable_sort(Rules, [&](const RuleMatcher &A, const RuleMatcher &B) {
int ScoreA = RuleMatcherScores[A.getRuleID()];
int ScoreB = RuleMatcherScores[B.getRuleID()];
@@ -5966,53 +2466,21 @@ void GlobalISelEmitter::run(raw_ostream &OS) {
return false;
});
- OS << "bool " << Target.getName()
- << "InstructionSelector::selectImpl(MachineInstr &I, CodeGenCoverage "
- "&CoverageInfo) const {\n"
- << " MachineFunction &MF = *I.getParent()->getParent();\n"
- << " MachineRegisterInfo &MRI = MF.getRegInfo();\n"
- << " const PredicateBitset AvailableFeatures = getAvailableFeatures();\n"
- << " NewMIVector OutMIs;\n"
- << " State.MIs.clear();\n"
- << " State.MIs.push_back(&I);\n\n"
- << " if (executeMatchTable(*this, OutMIs, State, ISelInfo"
- << ", getMatchTable(), TII, MRI, TRI, RBI, AvailableFeatures"
- << ", CoverageInfo)) {\n"
- << " return true;\n"
- << " }\n\n"
- << " return false;\n"
- << "}\n\n";
+ unsigned MaxTemporaries = 0;
+ for (const auto &Rule : Rules)
+ MaxTemporaries = std::max(MaxTemporaries, Rule.countRendererFns());
+ // Build match table
const MatchTable Table =
buildMatchTable(Rules, OptimizeMatchTable, GenerateCoverage);
- OS << "const int64_t *" << Target.getName()
- << "InstructionSelector::getMatchTable() const {\n";
- Table.emitDeclaration(OS);
- OS << " return ";
- Table.emitUse(OS);
- OS << ";\n}\n";
- OS << "#endif // ifdef GET_GLOBALISEL_IMPL\n";
-
- OS << "#ifdef GET_GLOBALISEL_PREDICATES_DECL\n"
- << "PredicateBitset AvailableModuleFeatures;\n"
- << "mutable PredicateBitset AvailableFunctionFeatures;\n"
- << "PredicateBitset getAvailableFeatures() const {\n"
- << " return AvailableModuleFeatures | AvailableFunctionFeatures;\n"
- << "}\n"
- << "PredicateBitset\n"
- << "computeAvailableModuleFeatures(const " << Target.getName()
- << "Subtarget *Subtarget) const;\n"
- << "PredicateBitset\n"
- << "computeAvailableFunctionFeatures(const " << Target.getName()
- << "Subtarget *Subtarget,\n"
- << " const MachineFunction *MF) const;\n"
- << "void setupGeneratedPerFunctionState(MachineFunction &MF) override;\n"
- << "#endif // ifdef GET_GLOBALISEL_PREDICATES_DECL\n";
-
- OS << "#ifdef GET_GLOBALISEL_PREDICATES_INIT\n"
- << "AvailableModuleFeatures(computeAvailableModuleFeatures(&STI)),\n"
- << "AvailableFunctionFeatures()\n"
- << "#endif // ifdef GET_GLOBALISEL_PREDICATES_INIT\n";
+
+ emitPredicateBitset(OS, "GET_GLOBALISEL_PREDICATE_BITSET");
+ emitTemporariesDecl(OS, "GET_GLOBALISEL_TEMPORARIES_DECL");
+ emitTemporariesInit(OS, MaxTemporaries, "GET_GLOBALISEL_TEMPORARIES_INIT");
+ emitExecutorImpl(OS, Table, TypeObjects, Rules, ComplexPredicates,
+ CustomRendererFns, "GET_GLOBALISEL_IMPL");
+ emitPredicatesDecl(OS, "GET_GLOBALISEL_PREDICATES_DECL");
+ emitPredicatesInit(OS, "GET_GLOBALISEL_PREDICATES_INIT");
}
void GlobalISelEmitter::declareSubtargetFeature(Record *Predicate) {
@@ -6021,294 +2489,9 @@ void GlobalISelEmitter::declareSubtargetFeature(Record *Predicate) {
Predicate, SubtargetFeatureInfo(Predicate, SubtargetFeatures.size()));
}
-void RuleMatcher::optimize() {
- for (auto &Item : InsnVariableIDs) {
- InstructionMatcher &InsnMatcher = *Item.first;
- for (auto &OM : InsnMatcher.operands()) {
- // Complex Patterns are usually expensive and they relatively rarely fail
- // on their own: more often we end up throwing away all the work done by a
- // matching part of a complex pattern because some other part of the
- // enclosing pattern didn't match. All of this makes it beneficial to
- // delay complex patterns until the very end of the rule matching,
- // especially for targets having lots of complex patterns.
- for (auto &OP : OM->predicates())
- if (isa<ComplexPatternOperandMatcher>(OP))
- EpilogueMatchers.emplace_back(std::move(OP));
- OM->eraseNullPredicates();
- }
- InsnMatcher.optimize();
- }
- llvm::sort(EpilogueMatchers, [](const std::unique_ptr<PredicateMatcher> &L,
- const std::unique_ptr<PredicateMatcher> &R) {
- return std::make_tuple(L->getKind(), L->getInsnVarID(), L->getOpIdx()) <
- std::make_tuple(R->getKind(), R->getInsnVarID(), R->getOpIdx());
- });
-}
-
-bool RuleMatcher::hasFirstCondition() const {
- if (insnmatchers_empty())
- return false;
- InstructionMatcher &Matcher = insnmatchers_front();
- if (!Matcher.predicates_empty())
- return true;
- for (auto &OM : Matcher.operands())
- for (auto &OP : OM->predicates())
- if (!isa<InstructionOperandMatcher>(OP))
- return true;
- return false;
-}
-
-const PredicateMatcher &RuleMatcher::getFirstCondition() const {
- assert(!insnmatchers_empty() &&
- "Trying to get a condition from an empty RuleMatcher");
-
- InstructionMatcher &Matcher = insnmatchers_front();
- if (!Matcher.predicates_empty())
- return **Matcher.predicates_begin();
- // If there is no more predicate on the instruction itself, look at its
- // operands.
- for (auto &OM : Matcher.operands())
- for (auto &OP : OM->predicates())
- if (!isa<InstructionOperandMatcher>(OP))
- return *OP;
-
- llvm_unreachable("Trying to get a condition from an InstructionMatcher with "
- "no conditions");
-}
-
-std::unique_ptr<PredicateMatcher> RuleMatcher::popFirstCondition() {
- assert(!insnmatchers_empty() &&
- "Trying to pop a condition from an empty RuleMatcher");
-
- InstructionMatcher &Matcher = insnmatchers_front();
- if (!Matcher.predicates_empty())
- return Matcher.predicates_pop_front();
- // If there is no more predicate on the instruction itself, look at its
- // operands.
- for (auto &OM : Matcher.operands())
- for (auto &OP : OM->predicates())
- if (!isa<InstructionOperandMatcher>(OP)) {
- std::unique_ptr<PredicateMatcher> Result = std::move(OP);
- OM->eraseNullPredicates();
- return Result;
- }
-
- llvm_unreachable("Trying to pop a condition from an InstructionMatcher with "
- "no conditions");
-}
-
-bool GroupMatcher::candidateConditionMatches(
- const PredicateMatcher &Predicate) const {
-
- if (empty()) {
- // Sharing predicates for nested instructions is not supported yet as we
- // currently don't hoist the GIM_RecordInsn's properly, therefore we can
- // only work on the original root instruction (InsnVarID == 0):
- if (Predicate.getInsnVarID() != 0)
- return false;
- // ... otherwise an empty group can handle any predicate with no specific
- // requirements:
- return true;
- }
-
- const Matcher &Representative = **Matchers.begin();
- const auto &RepresentativeCondition = Representative.getFirstCondition();
- // ... if not empty, the group can only accomodate matchers with the exact
- // same first condition:
- return Predicate.isIdentical(RepresentativeCondition);
-}
-
-bool GroupMatcher::addMatcher(Matcher &Candidate) {
- if (!Candidate.hasFirstCondition())
- return false;
-
- const PredicateMatcher &Predicate = Candidate.getFirstCondition();
- if (!candidateConditionMatches(Predicate))
- return false;
-
- Matchers.push_back(&Candidate);
- return true;
-}
-
-void GroupMatcher::finalize() {
- assert(Conditions.empty() && "Already finalized?");
- if (empty())
- return;
-
- Matcher &FirstRule = **Matchers.begin();
- for (;;) {
- // All the checks are expected to succeed during the first iteration:
- for (const auto &Rule : Matchers)
- if (!Rule->hasFirstCondition())
- return;
- const auto &FirstCondition = FirstRule.getFirstCondition();
- for (unsigned I = 1, E = Matchers.size(); I < E; ++I)
- if (!Matchers[I]->getFirstCondition().isIdentical(FirstCondition))
- return;
-
- Conditions.push_back(FirstRule.popFirstCondition());
- for (unsigned I = 1, E = Matchers.size(); I < E; ++I)
- Matchers[I]->popFirstCondition();
- }
-}
-
-void GroupMatcher::emit(MatchTable &Table) {
- unsigned LabelID = ~0U;
- if (!Conditions.empty()) {
- LabelID = Table.allocateLabelID();
- Table << MatchTable::Opcode("GIM_Try", +1)
- << MatchTable::Comment("On fail goto")
- << MatchTable::JumpTarget(LabelID) << MatchTable::LineBreak;
- }
- for (auto &Condition : Conditions)
- Condition->emitPredicateOpcodes(
- Table, *static_cast<RuleMatcher *>(*Matchers.begin()));
-
- for (const auto &M : Matchers)
- M->emit(Table);
-
- // Exit the group
- if (!Conditions.empty())
- Table << MatchTable::Opcode("GIM_Reject", -1) << MatchTable::LineBreak
- << MatchTable::Label(LabelID);
-}
-
-bool SwitchMatcher::isSupportedPredicateType(const PredicateMatcher &P) {
- return isa<InstructionOpcodeMatcher>(P) || isa<LLTOperandMatcher>(P);
-}
-
-bool SwitchMatcher::candidateConditionMatches(
- const PredicateMatcher &Predicate) const {
-
- if (empty()) {
- // Sharing predicates for nested instructions is not supported yet as we
- // currently don't hoist the GIM_RecordInsn's properly, therefore we can
- // only work on the original root instruction (InsnVarID == 0):
- if (Predicate.getInsnVarID() != 0)
- return false;
- // ... while an attempt to add even a root matcher to an empty SwitchMatcher
- // could fail as not all the types of conditions are supported:
- if (!isSupportedPredicateType(Predicate))
- return false;
- // ... or the condition might not have a proper implementation of
- // getValue() / isIdenticalDownToValue() yet:
- if (!Predicate.hasValue())
- return false;
- // ... otherwise an empty Switch can accomodate the condition with no
- // further requirements:
- return true;
- }
-
- const Matcher &CaseRepresentative = **Matchers.begin();
- const auto &RepresentativeCondition = CaseRepresentative.getFirstCondition();
- // Switch-cases must share the same kind of condition and path to the value it
- // checks:
- if (!Predicate.isIdenticalDownToValue(RepresentativeCondition))
- return false;
-
- const auto Value = Predicate.getValue();
- // ... but be unique with respect to the actual value they check:
- return Values.count(Value) == 0;
-}
-
-bool SwitchMatcher::addMatcher(Matcher &Candidate) {
- if (!Candidate.hasFirstCondition())
- return false;
-
- const PredicateMatcher &Predicate = Candidate.getFirstCondition();
- if (!candidateConditionMatches(Predicate))
- return false;
- const auto Value = Predicate.getValue();
- Values.insert(Value);
-
- Matchers.push_back(&Candidate);
- return true;
-}
-
-void SwitchMatcher::finalize() {
- assert(Condition == nullptr && "Already finalized");
- assert(Values.size() == Matchers.size() && "Broken SwitchMatcher");
- if (empty())
- return;
-
- llvm::stable_sort(Matchers, [](const Matcher *L, const Matcher *R) {
- return L->getFirstCondition().getValue() <
- R->getFirstCondition().getValue();
- });
- Condition = Matchers[0]->popFirstCondition();
- for (unsigned I = 1, E = Values.size(); I < E; ++I)
- Matchers[I]->popFirstCondition();
-}
-
-void SwitchMatcher::emitPredicateSpecificOpcodes(const PredicateMatcher &P,
- MatchTable &Table) {
- assert(isSupportedPredicateType(P) && "Predicate type is not supported");
-
- if (const auto *Condition = dyn_cast<InstructionOpcodeMatcher>(&P)) {
- Table << MatchTable::Opcode("GIM_SwitchOpcode") << MatchTable::Comment("MI")
- << MatchTable::IntValue(Condition->getInsnVarID());
- return;
- }
- if (const auto *Condition = dyn_cast<LLTOperandMatcher>(&P)) {
- Table << MatchTable::Opcode("GIM_SwitchType") << MatchTable::Comment("MI")
- << MatchTable::IntValue(Condition->getInsnVarID())
- << MatchTable::Comment("Op")
- << MatchTable::IntValue(Condition->getOpIdx());
- return;
- }
-
- llvm_unreachable("emitPredicateSpecificOpcodes is broken: can not handle a "
- "predicate type that is claimed to be supported");
-}
-
-void SwitchMatcher::emit(MatchTable &Table) {
- assert(Values.size() == Matchers.size() && "Broken SwitchMatcher");
- if (empty())
- return;
- assert(Condition != nullptr &&
- "Broken SwitchMatcher, hasn't been finalized?");
-
- std::vector<unsigned> LabelIDs(Values.size());
- std::generate(LabelIDs.begin(), LabelIDs.end(),
- [&Table]() { return Table.allocateLabelID(); });
- const unsigned Default = Table.allocateLabelID();
-
- const int64_t LowerBound = Values.begin()->getRawValue();
- const int64_t UpperBound = Values.rbegin()->getRawValue() + 1;
-
- emitPredicateSpecificOpcodes(*Condition, Table);
-
- Table << MatchTable::Comment("[") << MatchTable::IntValue(LowerBound)
- << MatchTable::IntValue(UpperBound) << MatchTable::Comment(")")
- << MatchTable::Comment("default:") << MatchTable::JumpTarget(Default);
-
- int64_t J = LowerBound;
- auto VI = Values.begin();
- for (unsigned I = 0, E = Values.size(); I < E; ++I) {
- auto V = *VI++;
- while (J++ < V.getRawValue())
- Table << MatchTable::IntValue(0);
- V.turnIntoComment();
- Table << MatchTable::LineBreak << V << MatchTable::JumpTarget(LabelIDs[I]);
- }
- Table << MatchTable::LineBreak;
-
- for (unsigned I = 0, E = Values.size(); I < E; ++I) {
- Table << MatchTable::Label(LabelIDs[I]);
- Matchers[I]->emit(Table);
- Table << MatchTable::Opcode("GIM_Reject") << MatchTable::LineBreak;
- }
- Table << MatchTable::Label(Default);
-}
-
-unsigned OperandMatcher::getInsnVarID() const { return Insn.getInsnVarID(); }
-
} // end anonymous namespace
//===----------------------------------------------------------------------===//
-namespace llvm {
-void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS) {
- GlobalISelEmitter(RK).run(OS);
-}
-} // End llvm namespace
+static TableGen::Emitter::OptClass<GlobalISelEmitter>
+ X("gen-global-isel", "Generate GlobalISel selector");
diff --git a/llvm/utils/TableGen/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/GlobalISelMatchTable.cpp
new file mode 100644
index 000000000000..aab772f020a6
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISelMatchTable.cpp
@@ -0,0 +1,2019 @@
+//===- GlobalISelMatchTable.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "GlobalISelMatchTable.h"
+#include "CodeGenInstruction.h"
+#include "CodeGenRegisters.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Error.h"
+
+#define DEBUG_TYPE "gi-match-table"
+
+STATISTIC(NumPatternEmitted, "Number of patterns emitted");
+
+namespace llvm {
+namespace gi {
+
+namespace {
+
+Error failUnsupported(const Twine &Reason) {
+ return make_error<StringError>(Reason, inconvertibleErrorCode());
+}
+
+/// Get the name of the enum value used to number the predicate function.
+std::string getEnumNameForPredicate(const TreePredicateFn &Predicate) {
+ if (Predicate.hasGISelPredicateCode())
+ return "GICXXPred_MI_" + Predicate.getFnName();
+ return "GICXXPred_" + Predicate.getImmTypeIdentifier().str() + "_" +
+ Predicate.getFnName();
+}
+
+std::string getMatchOpcodeForImmPredicate(const TreePredicateFn &Predicate) {
+ return "GIM_Check" + Predicate.getImmTypeIdentifier().str() + "ImmPredicate";
+}
+} // namespace
+
+//===- Helpers ------------------------------------------------------------===//
+
+std::string
+getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset) {
+ std::string Name = "GIFBS";
+ for (const auto &Feature : FeatureBitset)
+ Name += ("_" + Feature->getName()).str();
+ return Name;
+}
+
+template <class GroupT>
+std::vector<Matcher *>
+optimizeRules(ArrayRef<Matcher *> Rules,
+ std::vector<std::unique_ptr<Matcher>> &MatcherStorage) {
+
+ std::vector<Matcher *> OptRules;
+ std::unique_ptr<GroupT> CurrentGroup = std::make_unique<GroupT>();
+ assert(CurrentGroup->empty() && "Newly created group isn't empty!");
+ unsigned NumGroups = 0;
+
+ auto ProcessCurrentGroup = [&]() {
+ if (CurrentGroup->empty())
+ // An empty group is good to be reused:
+ return;
+
+ // If the group isn't large enough to provide any benefit, move all the
+ // added rules out of it and make sure to re-create the group to properly
+ // re-initialize it:
+ if (CurrentGroup->size() < 2)
+ append_range(OptRules, CurrentGroup->matchers());
+ else {
+ CurrentGroup->finalize();
+ OptRules.push_back(CurrentGroup.get());
+ MatcherStorage.emplace_back(std::move(CurrentGroup));
+ ++NumGroups;
+ }
+ CurrentGroup = std::make_unique<GroupT>();
+ };
+ for (Matcher *Rule : Rules) {
+ // Greedily add as many matchers as possible to the current group:
+ if (CurrentGroup->addMatcher(*Rule))
+ continue;
+
+ ProcessCurrentGroup();
+ assert(CurrentGroup->empty() && "A group wasn't properly re-initialized");
+
+ // Try to add the pending matcher to a newly created empty group:
+ if (!CurrentGroup->addMatcher(*Rule))
+ // If we couldn't add the matcher to an empty group, that group type
+ // doesn't support that kind of matchers at all, so just skip it:
+ OptRules.push_back(Rule);
+ }
+ ProcessCurrentGroup();
+
+ LLVM_DEBUG(dbgs() << "NumGroups: " << NumGroups << "\n");
+ (void)NumGroups;
+ assert(CurrentGroup->empty() && "The last group wasn't properly processed");
+ return OptRules;
+}
+
+template std::vector<Matcher *> optimizeRules<GroupMatcher>(
+ ArrayRef<Matcher *> Rules,
+ std::vector<std::unique_ptr<Matcher>> &MatcherStorage);
+
+template std::vector<Matcher *> optimizeRules<SwitchMatcher>(
+ ArrayRef<Matcher *> Rules,
+ std::vector<std::unique_ptr<Matcher>> &MatcherStorage);
+
+//===- Global Data --------------------------------------------------------===//
+
+std::set<LLTCodeGen> KnownTypes;
+
+//===- MatchTableRecord ---------------------------------------------------===//
+
+void MatchTableRecord::emit(raw_ostream &OS, bool LineBreakIsNextAfterThis,
+ const MatchTable &Table) const {
+ bool UseLineComment =
+ LineBreakIsNextAfterThis || (Flags & MTRF_LineBreakFollows);
+ if (Flags & (MTRF_JumpTarget | MTRF_CommaFollows))
+ UseLineComment = false;
+
+ if (Flags & MTRF_Comment)
+ OS << (UseLineComment ? "// " : "/*");
+
+ OS << EmitStr;
+ if (Flags & MTRF_Label)
+ OS << ": @" << Table.getLabelIndex(LabelID);
+
+ if ((Flags & MTRF_Comment) && !UseLineComment)
+ OS << "*/";
+
+ if (Flags & MTRF_JumpTarget) {
+ if (Flags & MTRF_Comment)
+ OS << " ";
+ OS << Table.getLabelIndex(LabelID);
+ }
+
+ if (Flags & MTRF_CommaFollows) {
+ OS << ",";
+ if (!LineBreakIsNextAfterThis && !(Flags & MTRF_LineBreakFollows))
+ OS << " ";
+ }
+
+ if (Flags & MTRF_LineBreakFollows)
+ OS << "\n";
+}
+
+//===- MatchTable ---------------------------------------------------------===//
+
+MatchTableRecord MatchTable::LineBreak = {
+ std::nullopt, "" /* Emit String */, 0 /* Elements */,
+ MatchTableRecord::MTRF_LineBreakFollows};
+
+MatchTableRecord MatchTable::Comment(StringRef Comment) {
+ return MatchTableRecord(std::nullopt, Comment, 0,
+ MatchTableRecord::MTRF_Comment);
+}
+
+MatchTableRecord MatchTable::Opcode(StringRef Opcode, int IndentAdjust) {
+ unsigned ExtraFlags = 0;
+ if (IndentAdjust > 0)
+ ExtraFlags |= MatchTableRecord::MTRF_Indent;
+ if (IndentAdjust < 0)
+ ExtraFlags |= MatchTableRecord::MTRF_Outdent;
+
+ return MatchTableRecord(std::nullopt, Opcode, 1,
+ MatchTableRecord::MTRF_CommaFollows | ExtraFlags);
+}
+
+MatchTableRecord MatchTable::NamedValue(StringRef NamedValue) {
+ return MatchTableRecord(std::nullopt, NamedValue, 1,
+ MatchTableRecord::MTRF_CommaFollows);
+}
+
+MatchTableRecord MatchTable::NamedValue(StringRef NamedValue,
+ int64_t RawValue) {
+ return MatchTableRecord(std::nullopt, NamedValue, 1,
+ MatchTableRecord::MTRF_CommaFollows, RawValue);
+}
+
+MatchTableRecord MatchTable::NamedValue(StringRef Namespace,
+ StringRef NamedValue) {
+ return MatchTableRecord(std::nullopt, (Namespace + "::" + NamedValue).str(),
+ 1, MatchTableRecord::MTRF_CommaFollows);
+}
+
+MatchTableRecord MatchTable::NamedValue(StringRef Namespace,
+ StringRef NamedValue,
+ int64_t RawValue) {
+ return MatchTableRecord(std::nullopt, (Namespace + "::" + NamedValue).str(),
+ 1, MatchTableRecord::MTRF_CommaFollows, RawValue);
+}
+
+MatchTableRecord MatchTable::IntValue(int64_t IntValue) {
+ return MatchTableRecord(std::nullopt, llvm::to_string(IntValue), 1,
+ MatchTableRecord::MTRF_CommaFollows);
+}
+
+MatchTableRecord MatchTable::Label(unsigned LabelID) {
+ return MatchTableRecord(LabelID, "Label " + llvm::to_string(LabelID), 0,
+ MatchTableRecord::MTRF_Label |
+ MatchTableRecord::MTRF_Comment |
+ MatchTableRecord::MTRF_LineBreakFollows);
+}
+
+MatchTableRecord MatchTable::JumpTarget(unsigned LabelID) {
+ return MatchTableRecord(LabelID, "Label " + llvm::to_string(LabelID), 1,
+ MatchTableRecord::MTRF_JumpTarget |
+ MatchTableRecord::MTRF_Comment |
+ MatchTableRecord::MTRF_CommaFollows);
+}
+
+void MatchTable::emitUse(raw_ostream &OS) const { OS << "MatchTable" << ID; }
+
+void MatchTable::emitDeclaration(raw_ostream &OS) const {
+ unsigned Indentation = 4;
+ OS << " constexpr static int64_t MatchTable" << ID << "[] = {";
+ LineBreak.emit(OS, true, *this);
+ OS << std::string(Indentation, ' ');
+
+ for (auto I = Contents.begin(), E = Contents.end(); I != E; ++I) {
+ bool LineBreakIsNext = false;
+ const auto &NextI = std::next(I);
+
+ if (NextI != E) {
+ if (NextI->EmitStr == "" &&
+ NextI->Flags == MatchTableRecord::MTRF_LineBreakFollows)
+ LineBreakIsNext = true;
+ }
+
+ if (I->Flags & MatchTableRecord::MTRF_Indent)
+ Indentation += 2;
+
+ I->emit(OS, LineBreakIsNext, *this);
+ if (I->Flags & MatchTableRecord::MTRF_LineBreakFollows)
+ OS << std::string(Indentation, ' ');
+
+ if (I->Flags & MatchTableRecord::MTRF_Outdent)
+ Indentation -= 2;
+ }
+ OS << "};\n";
+}
+
+MatchTable MatchTable::buildTable(ArrayRef<Matcher *> Rules, bool WithCoverage,
+ bool IsCombiner) {
+ MatchTable Table(WithCoverage, IsCombiner);
+ for (Matcher *Rule : Rules)
+ Rule->emit(Table);
+
+ return Table << MatchTable::Opcode("GIM_Reject") << MatchTable::LineBreak;
+}
+
+//===- LLTCodeGen ---------------------------------------------------------===//
+
+std::string LLTCodeGen::getCxxEnumValue() const {
+ std::string Str;
+ raw_string_ostream OS(Str);
+
+ emitCxxEnumValue(OS);
+ return Str;
+}
+
+void LLTCodeGen::emitCxxEnumValue(raw_ostream &OS) const {
+ if (Ty.isScalar()) {
+ OS << "GILLT_s" << Ty.getSizeInBits();
+ return;
+ }
+ if (Ty.isVector()) {
+ OS << (Ty.isScalable() ? "GILLT_nxv" : "GILLT_v")
+ << Ty.getElementCount().getKnownMinValue() << "s"
+ << Ty.getScalarSizeInBits();
+ return;
+ }
+ if (Ty.isPointer()) {
+ OS << "GILLT_p" << Ty.getAddressSpace();
+ if (Ty.getSizeInBits() > 0)
+ OS << "s" << Ty.getSizeInBits();
+ return;
+ }
+ llvm_unreachable("Unhandled LLT");
+}
+
+void LLTCodeGen::emitCxxConstructorCall(raw_ostream &OS) const {
+ if (Ty.isScalar()) {
+ OS << "LLT::scalar(" << Ty.getSizeInBits() << ")";
+ return;
+ }
+ if (Ty.isVector()) {
+ OS << "LLT::vector("
+ << (Ty.isScalable() ? "ElementCount::getScalable("
+ : "ElementCount::getFixed(")
+ << Ty.getElementCount().getKnownMinValue() << "), "
+ << Ty.getScalarSizeInBits() << ")";
+ return;
+ }
+ if (Ty.isPointer() && Ty.getSizeInBits() > 0) {
+ OS << "LLT::pointer(" << Ty.getAddressSpace() << ", " << Ty.getSizeInBits()
+ << ")";
+ return;
+ }
+ llvm_unreachable("Unhandled LLT");
+}
+
+/// This ordering is used for std::unique() and llvm::sort(). There's no
+/// particular logic behind the order but either A < B or B < A must be
+/// true if A != B.
+bool LLTCodeGen::operator<(const LLTCodeGen &Other) const {
+ if (Ty.isValid() != Other.Ty.isValid())
+ return Ty.isValid() < Other.Ty.isValid();
+ if (!Ty.isValid())
+ return false;
+
+ if (Ty.isVector() != Other.Ty.isVector())
+ return Ty.isVector() < Other.Ty.isVector();
+ if (Ty.isScalar() != Other.Ty.isScalar())
+ return Ty.isScalar() < Other.Ty.isScalar();
+ if (Ty.isPointer() != Other.Ty.isPointer())
+ return Ty.isPointer() < Other.Ty.isPointer();
+
+ if (Ty.isPointer() && Ty.getAddressSpace() != Other.Ty.getAddressSpace())
+ return Ty.getAddressSpace() < Other.Ty.getAddressSpace();
+
+ if (Ty.isVector() && Ty.getElementCount() != Other.Ty.getElementCount())
+ return std::make_tuple(Ty.isScalable(),
+ Ty.getElementCount().getKnownMinValue()) <
+ std::make_tuple(Other.Ty.isScalable(),
+ Other.Ty.getElementCount().getKnownMinValue());
+
+ assert((!Ty.isVector() || Ty.isScalable() == Other.Ty.isScalable()) &&
+ "Unexpected mismatch of scalable property");
+ return Ty.isVector()
+ ? std::make_tuple(Ty.isScalable(),
+ Ty.getSizeInBits().getKnownMinValue()) <
+ std::make_tuple(Other.Ty.isScalable(),
+ Other.Ty.getSizeInBits().getKnownMinValue())
+ : Ty.getSizeInBits().getFixedValue() <
+ Other.Ty.getSizeInBits().getFixedValue();
+}
+
+//===- LLTCodeGen Helpers -------------------------------------------------===//
+
+std::optional<LLTCodeGen> MVTToLLT(MVT::SimpleValueType SVT) {
+ MVT VT(SVT);
+
+ if (VT.isVector() && !VT.getVectorElementCount().isScalar())
+ return LLTCodeGen(
+ LLT::vector(VT.getVectorElementCount(), VT.getScalarSizeInBits()));
+
+ if (VT.isInteger() || VT.isFloatingPoint())
+ return LLTCodeGen(LLT::scalar(VT.getSizeInBits()));
+
+ return std::nullopt;
+}
+
+//===- Matcher ------------------------------------------------------------===//
+
+void Matcher::optimize() {}
+
+Matcher::~Matcher() {}
+
+//===- GroupMatcher -------------------------------------------------------===//
+
+bool GroupMatcher::candidateConditionMatches(
+ const PredicateMatcher &Predicate) const {
+
+ if (empty()) {
+ // Sharing predicates for nested instructions is not supported yet as we
+ // currently don't hoist the GIM_RecordInsn's properly, therefore we can
+ // only work on the original root instruction (InsnVarID == 0):
+ if (Predicate.getInsnVarID() != 0)
+ return false;
+ // ... otherwise an empty group can handle any predicate with no specific
+ // requirements:
+ return true;
+ }
+
+ const Matcher &Representative = **Matchers.begin();
+ const auto &RepresentativeCondition = Representative.getFirstCondition();
+ // ... if not empty, the group can only accomodate matchers with the exact
+ // same first condition:
+ return Predicate.isIdentical(RepresentativeCondition);
+}
+
+bool GroupMatcher::addMatcher(Matcher &Candidate) {
+ if (!Candidate.hasFirstCondition())
+ return false;
+
+ const PredicateMatcher &Predicate = Candidate.getFirstCondition();
+ if (!candidateConditionMatches(Predicate))
+ return false;
+
+ Matchers.push_back(&Candidate);
+ return true;
+}
+
+void GroupMatcher::finalize() {
+ assert(Conditions.empty() && "Already finalized?");
+ if (empty())
+ return;
+
+ Matcher &FirstRule = **Matchers.begin();
+ for (;;) {
+ // All the checks are expected to succeed during the first iteration:
+ for (const auto &Rule : Matchers)
+ if (!Rule->hasFirstCondition())
+ return;
+ const auto &FirstCondition = FirstRule.getFirstCondition();
+ for (unsigned I = 1, E = Matchers.size(); I < E; ++I)
+ if (!Matchers[I]->getFirstCondition().isIdentical(FirstCondition))
+ return;
+
+ Conditions.push_back(FirstRule.popFirstCondition());
+ for (unsigned I = 1, E = Matchers.size(); I < E; ++I)
+ Matchers[I]->popFirstCondition();
+ }
+}
+
+void GroupMatcher::emit(MatchTable &Table) {
+ unsigned LabelID = ~0U;
+ if (!Conditions.empty()) {
+ LabelID = Table.allocateLabelID();
+ Table << MatchTable::Opcode("GIM_Try", +1)
+ << MatchTable::Comment("On fail goto")
+ << MatchTable::JumpTarget(LabelID) << MatchTable::LineBreak;
+ }
+ for (auto &Condition : Conditions)
+ Condition->emitPredicateOpcodes(
+ Table, *static_cast<RuleMatcher *>(*Matchers.begin()));
+
+ for (const auto &M : Matchers)
+ M->emit(Table);
+
+ // Exit the group
+ if (!Conditions.empty())
+ Table << MatchTable::Opcode("GIM_Reject", -1) << MatchTable::LineBreak
+ << MatchTable::Label(LabelID);
+}
+
+void GroupMatcher::optimize() {
+ // Make sure we only sort by a specific predicate within a range of rules that
+ // all have that predicate checked against a specific value (not a wildcard):
+ auto F = Matchers.begin();
+ auto T = F;
+ auto E = Matchers.end();
+ while (T != E) {
+ while (T != E) {
+ auto *R = static_cast<RuleMatcher *>(*T);
+ if (!R->getFirstConditionAsRootType().get().isValid())
+ break;
+ ++T;
+ }
+ std::stable_sort(F, T, [](Matcher *A, Matcher *B) {
+ auto *L = static_cast<RuleMatcher *>(A);
+ auto *R = static_cast<RuleMatcher *>(B);
+ return L->getFirstConditionAsRootType() <
+ R->getFirstConditionAsRootType();
+ });
+ if (T != E)
+ F = ++T;
+ }
+ optimizeRules<GroupMatcher>(Matchers, MatcherStorage).swap(Matchers);
+ optimizeRules<SwitchMatcher>(Matchers, MatcherStorage).swap(Matchers);
+}
+
+//===- SwitchMatcher ------------------------------------------------------===//
+
+bool SwitchMatcher::isSupportedPredicateType(const PredicateMatcher &P) {
+ return isa<InstructionOpcodeMatcher>(P) || isa<LLTOperandMatcher>(P);
+}
+
+bool SwitchMatcher::candidateConditionMatches(
+ const PredicateMatcher &Predicate) const {
+
+ if (empty()) {
+ // Sharing predicates for nested instructions is not supported yet as we
+ // currently don't hoist the GIM_RecordInsn's properly, therefore we can
+ // only work on the original root instruction (InsnVarID == 0):
+ if (Predicate.getInsnVarID() != 0)
+ return false;
+ // ... while an attempt to add even a root matcher to an empty SwitchMatcher
+ // could fail as not all the types of conditions are supported:
+ if (!isSupportedPredicateType(Predicate))
+ return false;
+ // ... or the condition might not have a proper implementation of
+ // getValue() / isIdenticalDownToValue() yet:
+ if (!Predicate.hasValue())
+ return false;
+ // ... otherwise an empty Switch can accomodate the condition with no
+ // further requirements:
+ return true;
+ }
+
+ const Matcher &CaseRepresentative = **Matchers.begin();
+ const auto &RepresentativeCondition = CaseRepresentative.getFirstCondition();
+ // Switch-cases must share the same kind of condition and path to the value it
+ // checks:
+ if (!Predicate.isIdenticalDownToValue(RepresentativeCondition))
+ return false;
+
+ const auto Value = Predicate.getValue();
+ // ... but be unique with respect to the actual value they check:
+ return Values.count(Value) == 0;
+}
+
+bool SwitchMatcher::addMatcher(Matcher &Candidate) {
+ if (!Candidate.hasFirstCondition())
+ return false;
+
+ const PredicateMatcher &Predicate = Candidate.getFirstCondition();
+ if (!candidateConditionMatches(Predicate))
+ return false;
+ const auto Value = Predicate.getValue();
+ Values.insert(Value);
+
+ Matchers.push_back(&Candidate);
+ return true;
+}
+
+void SwitchMatcher::finalize() {
+ assert(Condition == nullptr && "Already finalized");
+ assert(Values.size() == Matchers.size() && "Broken SwitchMatcher");
+ if (empty())
+ return;
+
+ llvm::stable_sort(Matchers, [](const Matcher *L, const Matcher *R) {
+ return L->getFirstCondition().getValue() <
+ R->getFirstCondition().getValue();
+ });
+ Condition = Matchers[0]->popFirstCondition();
+ for (unsigned I = 1, E = Values.size(); I < E; ++I)
+ Matchers[I]->popFirstCondition();
+}
+
+void SwitchMatcher::emitPredicateSpecificOpcodes(const PredicateMatcher &P,
+ MatchTable &Table) {
+ assert(isSupportedPredicateType(P) && "Predicate type is not supported");
+
+ if (const auto *Condition = dyn_cast<InstructionOpcodeMatcher>(&P)) {
+ Table << MatchTable::Opcode("GIM_SwitchOpcode") << MatchTable::Comment("MI")
+ << MatchTable::IntValue(Condition->getInsnVarID());
+ return;
+ }
+ if (const auto *Condition = dyn_cast<LLTOperandMatcher>(&P)) {
+ Table << MatchTable::Opcode("GIM_SwitchType") << MatchTable::Comment("MI")
+ << MatchTable::IntValue(Condition->getInsnVarID())
+ << MatchTable::Comment("Op")
+ << MatchTable::IntValue(Condition->getOpIdx());
+ return;
+ }
+
+ llvm_unreachable("emitPredicateSpecificOpcodes is broken: can not handle a "
+ "predicate type that is claimed to be supported");
+}
+
+void SwitchMatcher::emit(MatchTable &Table) {
+ assert(Values.size() == Matchers.size() && "Broken SwitchMatcher");
+ if (empty())
+ return;
+ assert(Condition != nullptr &&
+ "Broken SwitchMatcher, hasn't been finalized?");
+
+ std::vector<unsigned> LabelIDs(Values.size());
+ std::generate(LabelIDs.begin(), LabelIDs.end(),
+ [&Table]() { return Table.allocateLabelID(); });
+ const unsigned Default = Table.allocateLabelID();
+
+ const int64_t LowerBound = Values.begin()->getRawValue();
+ const int64_t UpperBound = Values.rbegin()->getRawValue() + 1;
+
+ emitPredicateSpecificOpcodes(*Condition, Table);
+
+ Table << MatchTable::Comment("[") << MatchTable::IntValue(LowerBound)
+ << MatchTable::IntValue(UpperBound) << MatchTable::Comment(")")
+ << MatchTable::Comment("default:") << MatchTable::JumpTarget(Default);
+
+ int64_t J = LowerBound;
+ auto VI = Values.begin();
+ for (unsigned I = 0, E = Values.size(); I < E; ++I) {
+ auto V = *VI++;
+ while (J++ < V.getRawValue())
+ Table << MatchTable::IntValue(0);
+ V.turnIntoComment();
+ Table << MatchTable::LineBreak << V << MatchTable::JumpTarget(LabelIDs[I]);
+ }
+ Table << MatchTable::LineBreak;
+
+ for (unsigned I = 0, E = Values.size(); I < E; ++I) {
+ Table << MatchTable::Label(LabelIDs[I]);
+ Matchers[I]->emit(Table);
+ Table << MatchTable::Opcode("GIM_Reject") << MatchTable::LineBreak;
+ }
+ Table << MatchTable::Label(Default);
+}
+
+//===- RuleMatcher --------------------------------------------------------===//
+
+uint64_t RuleMatcher::NextRuleID = 0;
+
+StringRef RuleMatcher::getOpcode() const {
+ return Matchers.front()->getOpcode();
+}
+
+unsigned RuleMatcher::getNumOperands() const {
+ return Matchers.front()->getNumOperands();
+}
+
+LLTCodeGen RuleMatcher::getFirstConditionAsRootType() {
+ InstructionMatcher &InsnMatcher = *Matchers.front();
+ if (!InsnMatcher.predicates_empty())
+ if (const auto *TM =
+ dyn_cast<LLTOperandMatcher>(&**InsnMatcher.predicates_begin()))
+ if (TM->getInsnVarID() == 0 && TM->getOpIdx() == 0)
+ return TM->getTy();
+ return {};
+}
+
+void RuleMatcher::optimize() {
+ for (auto &Item : InsnVariableIDs) {
+ InstructionMatcher &InsnMatcher = *Item.first;
+ for (auto &OM : InsnMatcher.operands()) {
+ // Complex Patterns are usually expensive and they relatively rarely fail
+ // on their own: more often we end up throwing away all the work done by a
+ // matching part of a complex pattern because some other part of the
+ // enclosing pattern didn't match. All of this makes it beneficial to
+ // delay complex patterns until the very end of the rule matching,
+ // especially for targets having lots of complex patterns.
+ for (auto &OP : OM->predicates())
+ if (isa<ComplexPatternOperandMatcher>(OP))
+ EpilogueMatchers.emplace_back(std::move(OP));
+ OM->eraseNullPredicates();
+ }
+ InsnMatcher.optimize();
+ }
+ llvm::sort(EpilogueMatchers, [](const std::unique_ptr<PredicateMatcher> &L,
+ const std::unique_ptr<PredicateMatcher> &R) {
+ return std::make_tuple(L->getKind(), L->getInsnVarID(), L->getOpIdx()) <
+ std::make_tuple(R->getKind(), R->getInsnVarID(), R->getOpIdx());
+ });
+}
+
+bool RuleMatcher::hasFirstCondition() const {
+ if (insnmatchers_empty())
+ return false;
+ InstructionMatcher &Matcher = insnmatchers_front();
+ if (!Matcher.predicates_empty())
+ return true;
+ for (auto &OM : Matcher.operands())
+ for (auto &OP : OM->predicates())
+ if (!isa<InstructionOperandMatcher>(OP))
+ return true;
+ return false;
+}
+
+const PredicateMatcher &RuleMatcher::getFirstCondition() const {
+ assert(!insnmatchers_empty() &&
+ "Trying to get a condition from an empty RuleMatcher");
+
+ InstructionMatcher &Matcher = insnmatchers_front();
+ if (!Matcher.predicates_empty())
+ return **Matcher.predicates_begin();
+ // If there is no more predicate on the instruction itself, look at its
+ // operands.
+ for (auto &OM : Matcher.operands())
+ for (auto &OP : OM->predicates())
+ if (!isa<InstructionOperandMatcher>(OP))
+ return *OP;
+
+ llvm_unreachable("Trying to get a condition from an InstructionMatcher with "
+ "no conditions");
+}
+
+std::unique_ptr<PredicateMatcher> RuleMatcher::popFirstCondition() {
+ assert(!insnmatchers_empty() &&
+ "Trying to pop a condition from an empty RuleMatcher");
+
+ InstructionMatcher &Matcher = insnmatchers_front();
+ if (!Matcher.predicates_empty())
+ return Matcher.predicates_pop_front();
+ // If there is no more predicate on the instruction itself, look at its
+ // operands.
+ for (auto &OM : Matcher.operands())
+ for (auto &OP : OM->predicates())
+ if (!isa<InstructionOperandMatcher>(OP)) {
+ std::unique_ptr<PredicateMatcher> Result = std::move(OP);
+ OM->eraseNullPredicates();
+ return Result;
+ }
+
+ llvm_unreachable("Trying to pop a condition from an InstructionMatcher with "
+ "no conditions");
+}
+
+GISelFlags RuleMatcher::updateGISelFlag(GISelFlags CurFlags, const Record *R,
+ StringRef FlagName,
+ GISelFlags FlagBit) {
+ // If the value of a flag is unset, ignore it.
+ // If it's set, it always takes precedence over the existing value so
+ // clear/set the corresponding bit.
+ bool Unset = false;
+ bool Value = R->getValueAsBitOrUnset("GIIgnoreCopies", Unset);
+ if (!Unset)
+ return Value ? (CurFlags | FlagBit) : (CurFlags & ~FlagBit);
+ return CurFlags;
+}
+
+SaveAndRestore<GISelFlags> RuleMatcher::setGISelFlags(const Record *R) {
+ if (!R || !R->isSubClassOf("GISelFlags"))
+ return {Flags, Flags};
+
+ assert((R->isSubClassOf("PatFrags") || R->isSubClassOf("Pattern")) &&
+ "GISelFlags is only expected on Pattern/PatFrags!");
+
+ GISelFlags NewFlags =
+ updateGISelFlag(Flags, R, "GIIgnoreCopies", GISF_IgnoreCopies);
+ return {Flags, NewFlags};
+}
+
+Error RuleMatcher::defineComplexSubOperand(StringRef SymbolicName,
+ Record *ComplexPattern,
+ unsigned RendererID,
+ unsigned SubOperandID,
+ StringRef ParentSymbolicName) {
+ std::string ParentName(ParentSymbolicName);
+ if (ComplexSubOperands.count(SymbolicName)) {
+ const std::string &RecordedParentName =
+ ComplexSubOperandsParentName[SymbolicName];
+ if (RecordedParentName != ParentName)
+ return failUnsupported("Error: Complex suboperand " + SymbolicName +
+ " referenced by different operands: " +
+ RecordedParentName + " and " + ParentName + ".");
+ // Complex suboperand referenced more than once from same the operand is
+ // used to generate 'same operand check'. Emitting of
+ // GIR_ComplexSubOperandRenderer for them is already handled.
+ return Error::success();
+ }
+
+ ComplexSubOperands[SymbolicName] =
+ std::make_tuple(ComplexPattern, RendererID, SubOperandID);
+ ComplexSubOperandsParentName[SymbolicName] = ParentName;
+
+ return Error::success();
+}
+
+InstructionMatcher &RuleMatcher::addInstructionMatcher(StringRef SymbolicName) {
+ Matchers.emplace_back(new InstructionMatcher(*this, SymbolicName));
+ MutatableInsns.insert(Matchers.back().get());
+ return *Matchers.back();
+}
+
+void RuleMatcher::addRequiredSimplePredicate(StringRef PredName) {
+ RequiredSimplePredicates.push_back(PredName.str());
+}
+
+const std::vector<std::string> &RuleMatcher::getRequiredSimplePredicates() {
+ return RequiredSimplePredicates;
+}
+
+void RuleMatcher::addRequiredFeature(Record *Feature) {
+ RequiredFeatures.push_back(Feature);
+}
+
+const std::vector<Record *> &RuleMatcher::getRequiredFeatures() const {
+ return RequiredFeatures;
+}
+
+unsigned RuleMatcher::implicitlyDefineInsnVar(InstructionMatcher &Matcher) {
+ unsigned NewInsnVarID = NextInsnVarID++;
+ InsnVariableIDs[&Matcher] = NewInsnVarID;
+ return NewInsnVarID;
+}
+
+unsigned RuleMatcher::getInsnVarID(InstructionMatcher &InsnMatcher) const {
+ const auto &I = InsnVariableIDs.find(&InsnMatcher);
+ if (I != InsnVariableIDs.end())
+ return I->second;
+ llvm_unreachable("Matched Insn was not captured in a local variable");
+}
+
+void RuleMatcher::defineOperand(StringRef SymbolicName, OperandMatcher &OM) {
+ if (!DefinedOperands.contains(SymbolicName)) {
+ DefinedOperands[SymbolicName] = &OM;
+ return;
+ }
+
+ // If the operand is already defined, then we must ensure both references in
+ // the matcher have the exact same node.
+ RuleMatcher &RM = OM.getInstructionMatcher().getRuleMatcher();
+ OM.addPredicate<SameOperandMatcher>(
+ OM.getSymbolicName(), getOperandMatcher(OM.getSymbolicName()).getOpIdx(),
+ RM.getGISelFlags());
+}
+
+void RuleMatcher::definePhysRegOperand(Record *Reg, OperandMatcher &OM) {
+ if (!PhysRegOperands.contains(Reg)) {
+ PhysRegOperands[Reg] = &OM;
+ return;
+ }
+}
+
+InstructionMatcher &
+RuleMatcher::getInstructionMatcher(StringRef SymbolicName) const {
+ for (const auto &I : InsnVariableIDs)
+ if (I.first->getSymbolicName() == SymbolicName)
+ return *I.first;
+ llvm_unreachable(
+ ("Failed to lookup instruction " + SymbolicName).str().c_str());
+}
+
+const OperandMatcher &RuleMatcher::getPhysRegOperandMatcher(Record *Reg) const {
+ const auto &I = PhysRegOperands.find(Reg);
+
+ if (I == PhysRegOperands.end()) {
+ PrintFatalError(SrcLoc, "Register " + Reg->getName() +
+ " was not declared in matcher");
+ }
+
+ return *I->second;
+}
+
+const OperandMatcher &RuleMatcher::getOperandMatcher(StringRef Name) const {
+ const auto &I = DefinedOperands.find(Name);
+
+ if (I == DefinedOperands.end())
+ PrintFatalError(SrcLoc, "Operand " + Name + " was not declared in matcher");
+
+ return *I->second;
+}
+
+void RuleMatcher::emit(MatchTable &Table) {
+ if (Matchers.empty())
+ llvm_unreachable("Unexpected empty matcher!");
+
+ // The representation supports rules that require multiple roots such as:
+ // %ptr(p0) = ...
+ // %elt0(s32) = G_LOAD %ptr
+ // %1(p0) = G_ADD %ptr, 4
+ // %elt1(s32) = G_LOAD p0 %1
+ // which could be usefully folded into:
+ // %ptr(p0) = ...
+ // %elt0(s32), %elt1(s32) = TGT_LOAD_PAIR %ptr
+ // on some targets but we don't need to make use of that yet.
+ assert(Matchers.size() == 1 && "Cannot handle multi-root matchers yet");
+
+ unsigned LabelID = Table.allocateLabelID();
+ Table << MatchTable::Opcode("GIM_Try", +1)
+ << MatchTable::Comment("On fail goto")
+ << MatchTable::JumpTarget(LabelID)
+ << MatchTable::Comment(("Rule ID " + Twine(RuleID) + " //").str())
+ << MatchTable::LineBreak;
+
+ if (!RequiredFeatures.empty()) {
+ Table << MatchTable::Opcode("GIM_CheckFeatures")
+ << MatchTable::NamedValue(getNameForFeatureBitset(RequiredFeatures))
+ << MatchTable::LineBreak;
+ }
+
+ if (!RequiredSimplePredicates.empty()) {
+ for (const auto &Pred : RequiredSimplePredicates) {
+ Table << MatchTable::Opcode("GIM_CheckSimplePredicate")
+ << MatchTable::NamedValue(Pred) << MatchTable::LineBreak;
+ }
+ }
+
+ Matchers.front()->emitPredicateOpcodes(Table, *this);
+
+ // We must also check if it's safe to fold the matched instructions.
+ if (InsnVariableIDs.size() >= 2) {
+ // Invert the map to create stable ordering (by var names)
+ SmallVector<unsigned, 2> InsnIDs;
+ for (const auto &Pair : InsnVariableIDs) {
+ // Skip the root node since it isn't moving anywhere. Everything else is
+ // sinking to meet it.
+ if (Pair.first == Matchers.front().get())
+ continue;
+
+ InsnIDs.push_back(Pair.second);
+ }
+ llvm::sort(InsnIDs);
+
+ for (const auto &InsnID : InsnIDs) {
+ // Reject the difficult cases until we have a more accurate check.
+ Table << MatchTable::Opcode("GIM_CheckIsSafeToFold")
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+ << MatchTable::LineBreak;
+
+ // FIXME: Emit checks to determine it's _actually_ safe to fold and/or
+ // account for unsafe cases.
+ //
+ // Example:
+ // MI1--> %0 = ...
+ // %1 = ... %0
+ // MI0--> %2 = ... %0
+ // It's not safe to erase MI1. We currently handle this by not
+ // erasing %0 (even when it's dead).
+ //
+ // Example:
+ // MI1--> %0 = load volatile @a
+ // %1 = load volatile @a
+ // MI0--> %2 = ... %0
+ // It's not safe to sink %0's def past %1. We currently handle
+ // this by rejecting all loads.
+ //
+ // Example:
+ // MI1--> %0 = load @a
+ // %1 = store @a
+ // MI0--> %2 = ... %0
+ // It's not safe to sink %0's def past %1. We currently handle
+ // this by rejecting all loads.
+ //
+ // Example:
+ // G_CONDBR %cond, @BB1
+ // BB0:
+ // MI1--> %0 = load @a
+ // G_BR @BB1
+ // BB1:
+ // MI0--> %2 = ... %0
+ // It's not always safe to sink %0 across control flow. In this
+ // case it may introduce a memory fault. We currentl handle
+ // this by rejecting all loads.
+ }
+ }
+
+ for (const auto &PM : EpilogueMatchers)
+ PM->emitPredicateOpcodes(Table, *this);
+
+ for (const auto &MA : Actions)
+ MA->emitActionOpcodes(Table, *this);
+
+ assert((Table.isWithCoverage() ? !Table.isCombiner() : true) &&
+ "Combiner tables don't support coverage!");
+ if (Table.isWithCoverage())
+ Table << MatchTable::Opcode("GIR_Coverage") << MatchTable::IntValue(RuleID)
+ << MatchTable::LineBreak;
+ else if (!Table.isCombiner())
+ Table << MatchTable::Comment(("GIR_Coverage, " + Twine(RuleID) + ",").str())
+ << MatchTable::LineBreak;
+
+ Table << MatchTable::Opcode("GIR_Done", -1) << MatchTable::LineBreak
+ << MatchTable::Label(LabelID);
+ ++NumPatternEmitted;
+}
+
+bool RuleMatcher::isHigherPriorityThan(const RuleMatcher &B) const {
+ // Rules involving more match roots have higher priority.
+ if (Matchers.size() > B.Matchers.size())
+ return true;
+ if (Matchers.size() < B.Matchers.size())
+ return false;
+
+ for (auto Matcher : zip(Matchers, B.Matchers)) {
+ if (std::get<0>(Matcher)->isHigherPriorityThan(*std::get<1>(Matcher)))
+ return true;
+ if (std::get<1>(Matcher)->isHigherPriorityThan(*std::get<0>(Matcher)))
+ return false;
+ }
+
+ return false;
+}
+
+unsigned RuleMatcher::countRendererFns() const {
+ return std::accumulate(
+ Matchers.begin(), Matchers.end(), 0,
+ [](unsigned A, const std::unique_ptr<InstructionMatcher> &Matcher) {
+ return A + Matcher->countRendererFns();
+ });
+}
+
+//===- PredicateMatcher ---------------------------------------------------===//
+
+PredicateMatcher::~PredicateMatcher() {}
+
+//===- OperandPredicateMatcher --------------------------------------------===//
+
+OperandPredicateMatcher::~OperandPredicateMatcher() {}
+
+bool OperandPredicateMatcher::isHigherPriorityThan(
+ const OperandPredicateMatcher &B) const {
+ // Generally speaking, an instruction is more important than an Int or a
+ // LiteralInt because it can cover more nodes but theres an exception to
+ // this. G_CONSTANT's are less important than either of those two because they
+ // are more permissive.
+
+ const InstructionOperandMatcher *AOM =
+ dyn_cast<InstructionOperandMatcher>(this);
+ const InstructionOperandMatcher *BOM =
+ dyn_cast<InstructionOperandMatcher>(&B);
+ bool AIsConstantInsn = AOM && AOM->getInsnMatcher().isConstantInstruction();
+ bool BIsConstantInsn = BOM && BOM->getInsnMatcher().isConstantInstruction();
+
+ if (AOM && BOM) {
+ // The relative priorities between a G_CONSTANT and any other instruction
+ // don't actually matter but this code is needed to ensure a strict weak
+ // ordering. This is particularly important on Windows where the rules will
+ // be incorrectly sorted without it.
+ if (AIsConstantInsn != BIsConstantInsn)
+ return AIsConstantInsn < BIsConstantInsn;
+ return false;
+ }
+
+ if (AOM && AIsConstantInsn && (B.Kind == OPM_Int || B.Kind == OPM_LiteralInt))
+ return false;
+ if (BOM && BIsConstantInsn && (Kind == OPM_Int || Kind == OPM_LiteralInt))
+ return true;
+
+ return Kind < B.Kind;
+}
+
+//===- SameOperandMatcher -------------------------------------------------===//
+
+void SameOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ const OperandMatcher &OtherOM = Rule.getOperandMatcher(MatchingName);
+ unsigned OtherInsnVarID = Rule.getInsnVarID(OtherOM.getInstructionMatcher());
+ assert(OtherInsnVarID == OtherOM.getInstructionMatcher().getInsnVarID());
+ const bool IgnoreCopies = Flags & GISF_IgnoreCopies;
+ Table << MatchTable::Opcode(IgnoreCopies
+ ? "GIM_CheckIsSameOperandIgnoreCopies"
+ : "GIM_CheckIsSameOperand")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("OpIdx") << MatchTable::IntValue(OpIdx)
+ << MatchTable::Comment("OtherMI")
+ << MatchTable::IntValue(OtherInsnVarID)
+ << MatchTable::Comment("OtherOpIdx")
+ << MatchTable::IntValue(OtherOM.getOpIdx()) << MatchTable::LineBreak;
+}
+
+//===- LLTOperandMatcher --------------------------------------------------===//
+
+std::map<LLTCodeGen, unsigned> LLTOperandMatcher::TypeIDValues;
+
+MatchTableRecord LLTOperandMatcher::getValue() const {
+ const auto VI = TypeIDValues.find(Ty);
+ if (VI == TypeIDValues.end())
+ return MatchTable::NamedValue(getTy().getCxxEnumValue());
+ return MatchTable::NamedValue(getTy().getCxxEnumValue(), VI->second);
+}
+
+bool LLTOperandMatcher::hasValue() const {
+ if (TypeIDValues.size() != KnownTypes.size())
+ initTypeIDValuesMap();
+ return TypeIDValues.count(Ty);
+}
+
+void LLTOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckType") << MatchTable::Comment("MI")
+ << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op")
+ << MatchTable::IntValue(OpIdx) << MatchTable::Comment("Type")
+ << getValue() << MatchTable::LineBreak;
+}
+
+//===- PointerToAnyOperandMatcher -----------------------------------------===//
+
+void PointerToAnyOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckPointerToAny")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+ << MatchTable::Comment("SizeInBits") << MatchTable::IntValue(SizeInBits)
+ << MatchTable::LineBreak;
+}
+
+//===- RecordNamedOperandMatcher ------------------------------------------===//
+
+void RecordNamedOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_RecordNamedOperand")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+ << MatchTable::Comment("StoreIdx") << MatchTable::IntValue(StoreIdx)
+ << MatchTable::Comment("Name : " + Name) << MatchTable::LineBreak;
+}
+
+//===- ComplexPatternOperandMatcher ---------------------------------------===//
+
+void ComplexPatternOperandMatcher::emitPredicateOpcodes(
+ MatchTable &Table, RuleMatcher &Rule) const {
+ unsigned ID = getAllocatedTemporariesBaseID();
+ Table << MatchTable::Opcode("GIM_CheckComplexPattern")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+ << MatchTable::Comment("Renderer") << MatchTable::IntValue(ID)
+ << MatchTable::NamedValue(("GICP_" + TheDef.getName()).str())
+ << MatchTable::LineBreak;
+}
+
+unsigned ComplexPatternOperandMatcher::getAllocatedTemporariesBaseID() const {
+ return Operand.getAllocatedTemporariesBaseID();
+}
+
+//===- RegisterBankOperandMatcher -----------------------------------------===//
+
+bool RegisterBankOperandMatcher::isIdentical(const PredicateMatcher &B) const {
+ return OperandPredicateMatcher::isIdentical(B) &&
+ RC.getDef() == cast<RegisterBankOperandMatcher>(&B)->RC.getDef();
+}
+
+void RegisterBankOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckRegBankForClass")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+ << MatchTable::Comment("RC")
+ << MatchTable::NamedValue(RC.getQualifiedName() + "RegClassID")
+ << MatchTable::LineBreak;
+}
+
+//===- MBBOperandMatcher --------------------------------------------------===//
+
+void MBBOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckIsMBB") << MatchTable::Comment("MI")
+ << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op")
+ << MatchTable::IntValue(OpIdx) << MatchTable::LineBreak;
+}
+
+//===- ImmOperandMatcher --------------------------------------------------===//
+
+void ImmOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckIsImm") << MatchTable::Comment("MI")
+ << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Op")
+ << MatchTable::IntValue(OpIdx) << MatchTable::LineBreak;
+}
+
+//===- ConstantIntOperandMatcher ------------------------------------------===//
+
+void ConstantIntOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckConstantInt")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+ << MatchTable::IntValue(Value) << MatchTable::LineBreak;
+}
+
+//===- LiteralIntOperandMatcher -------------------------------------------===//
+
+void LiteralIntOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckLiteralInt")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+ << MatchTable::IntValue(Value) << MatchTable::LineBreak;
+}
+
+//===- CmpPredicateOperandMatcher -----------------------------------------===//
+
+void CmpPredicateOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckCmpPredicate")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+ << MatchTable::Comment("Predicate")
+ << MatchTable::NamedValue("CmpInst", PredName) << MatchTable::LineBreak;
+}
+
+//===- IntrinsicIDOperandMatcher ------------------------------------------===//
+
+void IntrinsicIDOperandMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckIntrinsicID")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+ << MatchTable::NamedValue("Intrinsic::" + II->EnumName)
+ << MatchTable::LineBreak;
+}
+
+//===- OperandImmPredicateMatcher -----------------------------------------===//
+
+void OperandImmPredicateMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckImmOperandPredicate")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("MO") << MatchTable::IntValue(OpIdx)
+ << MatchTable::Comment("Predicate")
+ << MatchTable::NamedValue(getEnumNameForPredicate(Predicate))
+ << MatchTable::LineBreak;
+}
+
+//===- OperandMatcher -----------------------------------------------------===//
+
+std::string OperandMatcher::getOperandExpr(unsigned InsnVarID) const {
+ return "State.MIs[" + llvm::to_string(InsnVarID) + "]->getOperand(" +
+ llvm::to_string(OpIdx) + ")";
+}
+
+unsigned OperandMatcher::getInsnVarID() const { return Insn.getInsnVarID(); }
+
+void OperandMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) {
+ if (!Optimized) {
+ std::string Comment;
+ raw_string_ostream CommentOS(Comment);
+ CommentOS << "MIs[" << getInsnVarID() << "] ";
+ if (SymbolicName.empty())
+ CommentOS << "Operand " << OpIdx;
+ else
+ CommentOS << SymbolicName;
+ Table << MatchTable::Comment(Comment) << MatchTable::LineBreak;
+ }
+
+ emitPredicateListOpcodes(Table, Rule);
+}
+
+bool OperandMatcher::isHigherPriorityThan(OperandMatcher &B) {
+ // Operand matchers involving more predicates have higher priority.
+ if (predicates_size() > B.predicates_size())
+ return true;
+ if (predicates_size() < B.predicates_size())
+ return false;
+
+ // This assumes that predicates are added in a consistent order.
+ for (auto &&Predicate : zip(predicates(), B.predicates())) {
+ if (std::get<0>(Predicate)->isHigherPriorityThan(*std::get<1>(Predicate)))
+ return true;
+ if (std::get<1>(Predicate)->isHigherPriorityThan(*std::get<0>(Predicate)))
+ return false;
+ }
+
+ return false;
+}
+
+unsigned OperandMatcher::countRendererFns() {
+ return std::accumulate(
+ predicates().begin(), predicates().end(), 0,
+ [](unsigned A,
+ const std::unique_ptr<OperandPredicateMatcher> &Predicate) {
+ return A + Predicate->countRendererFns();
+ });
+}
+
+Error OperandMatcher::addTypeCheckPredicate(const TypeSetByHwMode &VTy,
+ bool OperandIsAPointer) {
+ if (!VTy.isMachineValueType())
+ return failUnsupported("unsupported typeset");
+
+ if (VTy.getMachineValueType() == MVT::iPTR && OperandIsAPointer) {
+ addPredicate<PointerToAnyOperandMatcher>(0);
+ return Error::success();
+ }
+
+ auto OpTyOrNone = MVTToLLT(VTy.getMachineValueType().SimpleTy);
+ if (!OpTyOrNone)
+ return failUnsupported("unsupported type");
+
+ if (OperandIsAPointer)
+ addPredicate<PointerToAnyOperandMatcher>(OpTyOrNone->get().getSizeInBits());
+ else if (VTy.isPointer())
+ addPredicate<LLTOperandMatcher>(
+ LLT::pointer(VTy.getPtrAddrSpace(), OpTyOrNone->get().getSizeInBits()));
+ else
+ addPredicate<LLTOperandMatcher>(*OpTyOrNone);
+ return Error::success();
+}
+
+//===- InstructionOpcodeMatcher -------------------------------------------===//
+
+DenseMap<const CodeGenInstruction *, unsigned>
+ InstructionOpcodeMatcher::OpcodeValues;
+
+MatchTableRecord
+InstructionOpcodeMatcher::getInstValue(const CodeGenInstruction *I) const {
+ const auto VI = OpcodeValues.find(I);
+ if (VI != OpcodeValues.end())
+ return MatchTable::NamedValue(I->Namespace, I->TheDef->getName(),
+ VI->second);
+ return MatchTable::NamedValue(I->Namespace, I->TheDef->getName());
+}
+
+void InstructionOpcodeMatcher::initOpcodeValuesMap(
+ const CodeGenTarget &Target) {
+ OpcodeValues.clear();
+
+ unsigned OpcodeValue = 0;
+ for (const CodeGenInstruction *I : Target.getInstructionsByEnumValue())
+ OpcodeValues[I] = OpcodeValue++;
+}
+
+MatchTableRecord InstructionOpcodeMatcher::getValue() const {
+ assert(Insts.size() == 1);
+
+ const CodeGenInstruction *I = Insts[0];
+ const auto VI = OpcodeValues.find(I);
+ if (VI != OpcodeValues.end())
+ return MatchTable::NamedValue(I->Namespace, I->TheDef->getName(),
+ VI->second);
+ return MatchTable::NamedValue(I->Namespace, I->TheDef->getName());
+}
+
+void InstructionOpcodeMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ StringRef CheckType =
+ Insts.size() == 1 ? "GIM_CheckOpcode" : "GIM_CheckOpcodeIsEither";
+ Table << MatchTable::Opcode(CheckType) << MatchTable::Comment("MI")
+ << MatchTable::IntValue(InsnVarID);
+
+ for (const CodeGenInstruction *I : Insts)
+ Table << getInstValue(I);
+ Table << MatchTable::LineBreak;
+}
+
+bool InstructionOpcodeMatcher::isHigherPriorityThan(
+ const InstructionPredicateMatcher &B) const {
+ if (InstructionPredicateMatcher::isHigherPriorityThan(B))
+ return true;
+ if (B.InstructionPredicateMatcher::isHigherPriorityThan(*this))
+ return false;
+
+ // Prioritize opcodes for cosmetic reasons in the generated source. Although
+ // this is cosmetic at the moment, we may want to drive a similar ordering
+ // using instruction frequency information to improve compile time.
+ if (const InstructionOpcodeMatcher *BO =
+ dyn_cast<InstructionOpcodeMatcher>(&B))
+ return Insts[0]->TheDef->getName() < BO->Insts[0]->TheDef->getName();
+
+ return false;
+}
+
+bool InstructionOpcodeMatcher::isConstantInstruction() const {
+ return Insts.size() == 1 && Insts[0]->TheDef->getName() == "G_CONSTANT";
+}
+
+StringRef InstructionOpcodeMatcher::getOpcode() const {
+ return Insts[0]->TheDef->getName();
+}
+
+bool InstructionOpcodeMatcher::isVariadicNumOperands() const {
+ // If one is variadic, they all should be.
+ return Insts[0]->Operands.isVariadic;
+}
+
+StringRef InstructionOpcodeMatcher::getOperandType(unsigned OpIdx) const {
+ // Types expected to be uniform for all alternatives.
+ return Insts[0]->Operands[OpIdx].OperandType;
+}
+
+//===- InstructionNumOperandsMatcher --------------------------------------===//
+
+void InstructionNumOperandsMatcher::emitPredicateOpcodes(
+ MatchTable &Table, RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckNumOperands")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("Expected") << MatchTable::IntValue(NumOperands)
+ << MatchTable::LineBreak;
+}
+
+//===- InstructionImmPredicateMatcher -------------------------------------===//
+
+bool InstructionImmPredicateMatcher::isIdentical(
+ const PredicateMatcher &B) const {
+ return InstructionPredicateMatcher::isIdentical(B) &&
+ Predicate.getOrigPatFragRecord() ==
+ cast<InstructionImmPredicateMatcher>(&B)
+ ->Predicate.getOrigPatFragRecord();
+}
+
+void InstructionImmPredicateMatcher::emitPredicateOpcodes(
+ MatchTable &Table, RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode(getMatchOpcodeForImmPredicate(Predicate))
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("Predicate")
+ << MatchTable::NamedValue(getEnumNameForPredicate(Predicate))
+ << MatchTable::LineBreak;
+}
+
+//===- AtomicOrderingMMOPredicateMatcher ----------------------------------===//
+
+bool AtomicOrderingMMOPredicateMatcher::isIdentical(
+ const PredicateMatcher &B) const {
+ if (!InstructionPredicateMatcher::isIdentical(B))
+ return false;
+ const auto &R = *cast<AtomicOrderingMMOPredicateMatcher>(&B);
+ return Order == R.Order && Comparator == R.Comparator;
+}
+
+void AtomicOrderingMMOPredicateMatcher::emitPredicateOpcodes(
+ MatchTable &Table, RuleMatcher &Rule) const {
+ StringRef Opcode = "GIM_CheckAtomicOrdering";
+
+ if (Comparator == AO_OrStronger)
+ Opcode = "GIM_CheckAtomicOrderingOrStrongerThan";
+ if (Comparator == AO_WeakerThan)
+ Opcode = "GIM_CheckAtomicOrderingWeakerThan";
+
+ Table << MatchTable::Opcode(Opcode) << MatchTable::Comment("MI")
+ << MatchTable::IntValue(InsnVarID) << MatchTable::Comment("Order")
+ << MatchTable::NamedValue(("(int64_t)AtomicOrdering::" + Order).str())
+ << MatchTable::LineBreak;
+}
+
+//===- MemorySizePredicateMatcher -----------------------------------------===//
+
+void MemorySizePredicateMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckMemorySizeEqualTo")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx)
+ << MatchTable::Comment("Size") << MatchTable::IntValue(Size)
+ << MatchTable::LineBreak;
+}
+
+//===- MemoryAddressSpacePredicateMatcher ---------------------------------===//
+
+bool MemoryAddressSpacePredicateMatcher::isIdentical(
+ const PredicateMatcher &B) const {
+ if (!InstructionPredicateMatcher::isIdentical(B))
+ return false;
+ auto *Other = cast<MemoryAddressSpacePredicateMatcher>(&B);
+ return MMOIdx == Other->MMOIdx && AddrSpaces == Other->AddrSpaces;
+}
+
+void MemoryAddressSpacePredicateMatcher::emitPredicateOpcodes(
+ MatchTable &Table, RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckMemoryAddressSpace")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("MMO")
+ << MatchTable::IntValue(MMOIdx)
+ // Encode number of address spaces to expect.
+ << MatchTable::Comment("NumAddrSpace")
+ << MatchTable::IntValue(AddrSpaces.size());
+ for (unsigned AS : AddrSpaces)
+ Table << MatchTable::Comment("AddrSpace") << MatchTable::IntValue(AS);
+
+ Table << MatchTable::LineBreak;
+}
+
+//===- MemoryAlignmentPredicateMatcher ------------------------------------===//
+
+bool MemoryAlignmentPredicateMatcher::isIdentical(
+ const PredicateMatcher &B) const {
+ if (!InstructionPredicateMatcher::isIdentical(B))
+ return false;
+ auto *Other = cast<MemoryAlignmentPredicateMatcher>(&B);
+ return MMOIdx == Other->MMOIdx && MinAlign == Other->MinAlign;
+}
+
+void MemoryAlignmentPredicateMatcher::emitPredicateOpcodes(
+ MatchTable &Table, RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckMemoryAlignment")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx)
+ << MatchTable::Comment("MinAlign") << MatchTable::IntValue(MinAlign)
+ << MatchTable::LineBreak;
+}
+
+//===- MemoryVsLLTSizePredicateMatcher ------------------------------------===//
+
+bool MemoryVsLLTSizePredicateMatcher::isIdentical(
+ const PredicateMatcher &B) const {
+ return InstructionPredicateMatcher::isIdentical(B) &&
+ MMOIdx == cast<MemoryVsLLTSizePredicateMatcher>(&B)->MMOIdx &&
+ Relation == cast<MemoryVsLLTSizePredicateMatcher>(&B)->Relation &&
+ OpIdx == cast<MemoryVsLLTSizePredicateMatcher>(&B)->OpIdx;
+}
+
+void MemoryVsLLTSizePredicateMatcher::emitPredicateOpcodes(
+ MatchTable &Table, RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode(
+ Relation == EqualTo ? "GIM_CheckMemorySizeEqualToLLT"
+ : Relation == GreaterThan ? "GIM_CheckMemorySizeGreaterThanLLT"
+ : "GIM_CheckMemorySizeLessThanLLT")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("MMO") << MatchTable::IntValue(MMOIdx)
+ << MatchTable::Comment("OpIdx") << MatchTable::IntValue(OpIdx)
+ << MatchTable::LineBreak;
+}
+
+//===- VectorSplatImmPredicateMatcher -------------------------------------===//
+
+void VectorSplatImmPredicateMatcher::emitPredicateOpcodes(
+ MatchTable &Table, RuleMatcher &Rule) const {
+ if (Kind == AllOnes)
+ Table << MatchTable::Opcode("GIM_CheckIsBuildVectorAllOnes");
+ else
+ Table << MatchTable::Opcode("GIM_CheckIsBuildVectorAllZeros");
+
+ Table << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID);
+ Table << MatchTable::LineBreak;
+}
+
+//===- GenericInstructionPredicateMatcher ---------------------------------===//
+
+GenericInstructionPredicateMatcher::GenericInstructionPredicateMatcher(
+ unsigned InsnVarID, TreePredicateFn Predicate)
+ : GenericInstructionPredicateMatcher(InsnVarID,
+ getEnumNameForPredicate(Predicate)) {}
+
+bool GenericInstructionPredicateMatcher::isIdentical(
+ const PredicateMatcher &B) const {
+ return InstructionPredicateMatcher::isIdentical(B) &&
+ EnumVal ==
+ static_cast<const GenericInstructionPredicateMatcher &>(B).EnumVal;
+}
+void GenericInstructionPredicateMatcher::emitPredicateOpcodes(
+ MatchTable &Table, RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIM_CheckCxxInsnPredicate")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::Comment("FnId") << MatchTable::NamedValue(EnumVal)
+ << MatchTable::LineBreak;
+}
+
+//===- InstructionMatcher -------------------------------------------------===//
+
+OperandMatcher &
+InstructionMatcher::addOperand(unsigned OpIdx, const std::string &SymbolicName,
+ unsigned AllocatedTemporariesBaseID) {
+ Operands.emplace_back(new OperandMatcher(*this, OpIdx, SymbolicName,
+ AllocatedTemporariesBaseID));
+ if (!SymbolicName.empty())
+ Rule.defineOperand(SymbolicName, *Operands.back());
+
+ return *Operands.back();
+}
+
+OperandMatcher &InstructionMatcher::getOperand(unsigned OpIdx) {
+ auto I = llvm::find_if(Operands,
+ [&OpIdx](const std::unique_ptr<OperandMatcher> &X) {
+ return X->getOpIdx() == OpIdx;
+ });
+ if (I != Operands.end())
+ return **I;
+ llvm_unreachable("Failed to lookup operand");
+}
+
+OperandMatcher &InstructionMatcher::addPhysRegInput(Record *Reg, unsigned OpIdx,
+ unsigned TempOpIdx) {
+ assert(SymbolicName.empty());
+ OperandMatcher *OM = new OperandMatcher(*this, OpIdx, "", TempOpIdx);
+ Operands.emplace_back(OM);
+ Rule.definePhysRegOperand(Reg, *OM);
+ PhysRegInputs.emplace_back(Reg, OpIdx);
+ return *OM;
+}
+
+void InstructionMatcher::emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) {
+ if (NumOperandsCheck)
+ InstructionNumOperandsMatcher(InsnVarID, getNumOperands())
+ .emitPredicateOpcodes(Table, Rule);
+
+ // First emit all instruction level predicates need to be verified before we
+ // can verify operands.
+ emitFilteredPredicateListOpcodes(
+ [](const PredicateMatcher &P) { return !P.dependsOnOperands(); }, Table,
+ Rule);
+
+ // Emit all operand constraints.
+ for (const auto &Operand : Operands)
+ Operand->emitPredicateOpcodes(Table, Rule);
+
+ // All of the tablegen defined predicates should now be matched. Now emit
+ // any custom predicates that rely on all generated checks.
+ emitFilteredPredicateListOpcodes(
+ [](const PredicateMatcher &P) { return P.dependsOnOperands(); }, Table,
+ Rule);
+}
+
+bool InstructionMatcher::isHigherPriorityThan(InstructionMatcher &B) {
+ // Instruction matchers involving more operands have higher priority.
+ if (Operands.size() > B.Operands.size())
+ return true;
+ if (Operands.size() < B.Operands.size())
+ return false;
+
+ for (auto &&P : zip(predicates(), B.predicates())) {
+ auto L = static_cast<InstructionPredicateMatcher *>(std::get<0>(P).get());
+ auto R = static_cast<InstructionPredicateMatcher *>(std::get<1>(P).get());
+ if (L->isHigherPriorityThan(*R))
+ return true;
+ if (R->isHigherPriorityThan(*L))
+ return false;
+ }
+
+ for (auto Operand : zip(Operands, B.Operands)) {
+ if (std::get<0>(Operand)->isHigherPriorityThan(*std::get<1>(Operand)))
+ return true;
+ if (std::get<1>(Operand)->isHigherPriorityThan(*std::get<0>(Operand)))
+ return false;
+ }
+
+ return false;
+}
+
+unsigned InstructionMatcher::countRendererFns() {
+ return std::accumulate(
+ predicates().begin(), predicates().end(), 0,
+ [](unsigned A,
+ const std::unique_ptr<PredicateMatcher> &Predicate) {
+ return A + Predicate->countRendererFns();
+ }) +
+ std::accumulate(
+ Operands.begin(), Operands.end(), 0,
+ [](unsigned A, const std::unique_ptr<OperandMatcher> &Operand) {
+ return A + Operand->countRendererFns();
+ });
+}
+
+void InstructionMatcher::optimize() {
+ SmallVector<std::unique_ptr<PredicateMatcher>, 8> Stash;
+ const auto &OpcMatcher = getOpcodeMatcher();
+
+ Stash.push_back(predicates_pop_front());
+ if (Stash.back().get() == &OpcMatcher) {
+ if (NumOperandsCheck && OpcMatcher.isVariadicNumOperands() &&
+ getNumOperands() != 0)
+ Stash.emplace_back(
+ new InstructionNumOperandsMatcher(InsnVarID, getNumOperands()));
+ NumOperandsCheck = false;
+
+ for (auto &OM : Operands)
+ for (auto &OP : OM->predicates())
+ if (isa<IntrinsicIDOperandMatcher>(OP)) {
+ Stash.push_back(std::move(OP));
+ OM->eraseNullPredicates();
+ break;
+ }
+ }
+
+ if (InsnVarID > 0) {
+ assert(!Operands.empty() && "Nested instruction is expected to def a vreg");
+ for (auto &OP : Operands[0]->predicates())
+ OP.reset();
+ Operands[0]->eraseNullPredicates();
+ }
+ for (auto &OM : Operands) {
+ for (auto &OP : OM->predicates())
+ if (isa<LLTOperandMatcher>(OP))
+ Stash.push_back(std::move(OP));
+ OM->eraseNullPredicates();
+ }
+ while (!Stash.empty())
+ prependPredicate(Stash.pop_back_val());
+}
+
+//===- InstructionOperandMatcher ------------------------------------------===//
+
+void InstructionOperandMatcher::emitCaptureOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ const unsigned NewInsnVarID = InsnMatcher->getInsnVarID();
+ const bool IgnoreCopies = Flags & GISF_IgnoreCopies;
+ Table << MatchTable::Opcode(IgnoreCopies ? "GIM_RecordInsnIgnoreCopies"
+ : "GIM_RecordInsn")
+ << MatchTable::Comment("DefineMI") << MatchTable::IntValue(NewInsnVarID)
+ << MatchTable::Comment("MI") << MatchTable::IntValue(getInsnVarID())
+ << MatchTable::Comment("OpIdx") << MatchTable::IntValue(getOpIdx())
+ << MatchTable::Comment("MIs[" + llvm::to_string(NewInsnVarID) + "]")
+ << MatchTable::LineBreak;
+}
+
+bool InstructionOperandMatcher::isHigherPriorityThan(
+ const OperandPredicateMatcher &B) const {
+ if (OperandPredicateMatcher::isHigherPriorityThan(B))
+ return true;
+ if (B.OperandPredicateMatcher::isHigherPriorityThan(*this))
+ return false;
+
+ if (const InstructionOperandMatcher *BP =
+ dyn_cast<InstructionOperandMatcher>(&B))
+ if (InsnMatcher->isHigherPriorityThan(*BP->InsnMatcher))
+ return true;
+ return false;
+}
+
+//===- OperandRenderer ----------------------------------------------------===//
+
+OperandRenderer::~OperandRenderer() {}
+
+//===- CopyRenderer -------------------------------------------------------===//
+
+void CopyRenderer::emitRenderOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ const OperandMatcher &Operand = Rule.getOperandMatcher(SymbolicName);
+ unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher());
+ Table << MatchTable::Opcode("GIR_Copy") << MatchTable::Comment("NewInsnID")
+ << MatchTable::IntValue(NewInsnID) << MatchTable::Comment("OldInsnID")
+ << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
+ << MatchTable::IntValue(Operand.getOpIdx())
+ << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
+}
+
+//===- CopyPhysRegRenderer ------------------------------------------------===//
+
+void CopyPhysRegRenderer::emitRenderOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ const OperandMatcher &Operand = Rule.getPhysRegOperandMatcher(PhysReg);
+ unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher());
+ Table << MatchTable::Opcode("GIR_Copy") << MatchTable::Comment("NewInsnID")
+ << MatchTable::IntValue(NewInsnID) << MatchTable::Comment("OldInsnID")
+ << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
+ << MatchTable::IntValue(Operand.getOpIdx())
+ << MatchTable::Comment(PhysReg->getName()) << MatchTable::LineBreak;
+}
+
+//===- CopyOrAddZeroRegRenderer -------------------------------------------===//
+
+void CopyOrAddZeroRegRenderer::emitRenderOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ const OperandMatcher &Operand = Rule.getOperandMatcher(SymbolicName);
+ unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher());
+ Table << MatchTable::Opcode("GIR_CopyOrAddZeroReg")
+ << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
+ << MatchTable::Comment("OldInsnID")
+ << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
+ << MatchTable::IntValue(Operand.getOpIdx())
+ << MatchTable::NamedValue(
+ (ZeroRegisterDef->getValue("Namespace")
+ ? ZeroRegisterDef->getValueAsString("Namespace")
+ : ""),
+ ZeroRegisterDef->getName())
+ << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
+}
+
+//===- CopyConstantAsImmRenderer ------------------------------------------===//
+
+void CopyConstantAsImmRenderer::emitRenderOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
+ unsigned OldInsnVarID = Rule.getInsnVarID(InsnMatcher);
+ Table << MatchTable::Opcode(Signed ? "GIR_CopyConstantAsSImm"
+ : "GIR_CopyConstantAsUImm")
+ << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
+ << MatchTable::Comment("OldInsnID")
+ << MatchTable::IntValue(OldInsnVarID)
+ << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
+}
+
+//===- CopyFConstantAsFPImmRenderer ---------------------------------------===//
+
+void CopyFConstantAsFPImmRenderer::emitRenderOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
+ unsigned OldInsnVarID = Rule.getInsnVarID(InsnMatcher);
+ Table << MatchTable::Opcode("GIR_CopyFConstantAsFPImm")
+ << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
+ << MatchTable::Comment("OldInsnID")
+ << MatchTable::IntValue(OldInsnVarID)
+ << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
+}
+
+//===- CopySubRegRenderer -------------------------------------------------===//
+
+void CopySubRegRenderer::emitRenderOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ const OperandMatcher &Operand = Rule.getOperandMatcher(SymbolicName);
+ unsigned OldInsnVarID = Rule.getInsnVarID(Operand.getInstructionMatcher());
+ Table << MatchTable::Opcode("GIR_CopySubReg")
+ << MatchTable::Comment("NewInsnID") << MatchTable::IntValue(NewInsnID)
+ << MatchTable::Comment("OldInsnID")
+ << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("OpIdx")
+ << MatchTable::IntValue(Operand.getOpIdx())
+ << MatchTable::Comment("SubRegIdx")
+ << MatchTable::IntValue(SubReg->EnumValue)
+ << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
+}
+
+//===- AddRegisterRenderer ------------------------------------------------===//
+
+void AddRegisterRenderer::emitRenderOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIR_AddRegister")
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID);
+ if (RegisterDef->getName() != "zero_reg") {
+ Table << MatchTable::NamedValue(
+ (RegisterDef->getValue("Namespace")
+ ? RegisterDef->getValueAsString("Namespace")
+ : ""),
+ RegisterDef->getName());
+ } else {
+ Table << MatchTable::NamedValue(Target.getRegNamespace(), "NoRegister");
+ }
+ Table << MatchTable::Comment("AddRegisterRegFlags");
+
+ // TODO: This is encoded as a 64-bit element, but only 16 or 32-bits are
+ // really needed for a physical register reference. We can pack the
+ // register and flags in a single field.
+ if (IsDef)
+ Table << MatchTable::NamedValue("RegState::Define");
+ else
+ Table << MatchTable::IntValue(0);
+ Table << MatchTable::LineBreak;
+}
+
+//===- TempRegRenderer ----------------------------------------------------===//
+
+void TempRegRenderer::emitRenderOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ if (SubRegIdx) {
+ assert(!IsDef);
+ Table << MatchTable::Opcode("GIR_AddTempSubRegister");
+ } else
+ Table << MatchTable::Opcode("GIR_AddTempRegister");
+
+ Table << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+ << MatchTable::Comment("TempRegID") << MatchTable::IntValue(TempRegID)
+ << MatchTable::Comment("TempRegFlags");
+
+ if (IsDef) {
+ SmallString<32> RegFlags;
+ RegFlags += "RegState::Define";
+ if (IsDead)
+ RegFlags += "|RegState::Dead";
+ Table << MatchTable::NamedValue(RegFlags);
+ } else
+ Table << MatchTable::IntValue(0);
+
+ if (SubRegIdx)
+ Table << MatchTable::NamedValue(SubRegIdx->getQualifiedName());
+ Table << MatchTable::LineBreak;
+}
+
+//===- SubRegIndexRenderer ------------------------------------------------===//
+
+void SubRegIndexRenderer::emitRenderOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIR_AddImm") << MatchTable::Comment("InsnID")
+ << MatchTable::IntValue(InsnID) << MatchTable::Comment("SubRegIndex")
+ << MatchTable::IntValue(SubRegIdx->EnumValue) << MatchTable::LineBreak;
+}
+
+//===- RenderComplexPatternOperand ----------------------------------------===//
+
+void RenderComplexPatternOperand::emitRenderOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode(
+ SubOperand ? (SubReg ? "GIR_ComplexSubOperandSubRegRenderer"
+ : "GIR_ComplexSubOperandRenderer")
+ : "GIR_ComplexRenderer")
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+ << MatchTable::Comment("RendererID")
+ << MatchTable::IntValue(RendererID);
+ if (SubOperand)
+ Table << MatchTable::Comment("SubOperand")
+ << MatchTable::IntValue(*SubOperand);
+ if (SubReg)
+ Table << MatchTable::Comment("SubRegIdx")
+ << MatchTable::IntValue(SubReg->EnumValue);
+ Table << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
+}
+
+//===- CustomRenderer -----------------------------------------------------===//
+
+void CustomRenderer::emitRenderOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ InstructionMatcher &InsnMatcher = Rule.getInstructionMatcher(SymbolicName);
+ unsigned OldInsnVarID = Rule.getInsnVarID(InsnMatcher);
+ Table << MatchTable::Opcode("GIR_CustomRenderer")
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+ << MatchTable::Comment("OldInsnID")
+ << MatchTable::IntValue(OldInsnVarID) << MatchTable::Comment("Renderer")
+ << MatchTable::NamedValue("GICR_" +
+ Renderer.getValueAsString("RendererFn").str())
+ << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
+}
+
+//===- CustomOperandRenderer ----------------------------------------------===//
+
+void CustomOperandRenderer::emitRenderOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ const OperandMatcher &OpdMatcher = Rule.getOperandMatcher(SymbolicName);
+ Table << MatchTable::Opcode("GIR_CustomOperandRenderer")
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+ << MatchTable::Comment("OldInsnID")
+ << MatchTable::IntValue(OpdMatcher.getInsnVarID())
+ << MatchTable::Comment("OpIdx")
+ << MatchTable::IntValue(OpdMatcher.getOpIdx())
+ << MatchTable::Comment("OperandRenderer")
+ << MatchTable::NamedValue("GICR_" +
+ Renderer.getValueAsString("RendererFn").str())
+ << MatchTable::Comment(SymbolicName) << MatchTable::LineBreak;
+}
+
+//===- CustomCXXAction ----------------------------------------------------===//
+
+void CustomCXXAction::emitActionOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIR_CustomAction")
+ << MatchTable::NamedValue(FnEnumName) << MatchTable::LineBreak;
+}
+
+//===- BuildMIAction ------------------------------------------------------===//
+
+bool BuildMIAction::canMutate(RuleMatcher &Rule,
+ const InstructionMatcher *Insn) const {
+ if (!Insn)
+ return false;
+
+ if (OperandRenderers.size() != Insn->getNumOperands())
+ return false;
+
+ for (const auto &Renderer : enumerate(OperandRenderers)) {
+ if (const auto *Copy = dyn_cast<CopyRenderer>(&*Renderer.value())) {
+ const OperandMatcher &OM =
+ Rule.getOperandMatcher(Copy->getSymbolicName());
+ if (Insn != &OM.getInstructionMatcher() ||
+ OM.getOpIdx() != Renderer.index())
+ return false;
+ } else
+ return false;
+ }
+
+ return true;
+}
+
+void BuildMIAction::chooseInsnToMutate(RuleMatcher &Rule) {
+ for (auto *MutateCandidate : Rule.mutatable_insns()) {
+ if (canMutate(Rule, MutateCandidate)) {
+ // Take the first one we're offered that we're able to mutate.
+ Rule.reserveInsnMatcherForMutation(MutateCandidate);
+ Matched = MutateCandidate;
+ return;
+ }
+ }
+}
+
+void BuildMIAction::emitActionOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ if (Matched) {
+ assert(canMutate(Rule, Matched) &&
+ "Arranged to mutate an insn that isn't mutatable");
+
+ unsigned RecycleInsnID = Rule.getInsnVarID(*Matched);
+ Table << MatchTable::Opcode("GIR_MutateOpcode")
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+ << MatchTable::Comment("RecycleInsnID")
+ << MatchTable::IntValue(RecycleInsnID)
+ << MatchTable::Comment("Opcode")
+ << MatchTable::NamedValue(I->Namespace, I->TheDef->getName())
+ << MatchTable::LineBreak;
+
+ if (!I->ImplicitDefs.empty() || !I->ImplicitUses.empty()) {
+ for (auto *Def : I->ImplicitDefs) {
+ auto Namespace = Def->getValue("Namespace")
+ ? Def->getValueAsString("Namespace")
+ : "";
+ Table << MatchTable::Opcode("GIR_AddImplicitDef")
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+ << MatchTable::NamedValue(Namespace, Def->getName())
+ << MatchTable::LineBreak;
+ }
+ for (auto *Use : I->ImplicitUses) {
+ auto Namespace = Use->getValue("Namespace")
+ ? Use->getValueAsString("Namespace")
+ : "";
+ Table << MatchTable::Opcode("GIR_AddImplicitUse")
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+ << MatchTable::NamedValue(Namespace, Use->getName())
+ << MatchTable::LineBreak;
+ }
+ }
+ return;
+ }
+
+ // TODO: Simple permutation looks like it could be almost as common as
+ // mutation due to commutative operations.
+
+ Table << MatchTable::Opcode("GIR_BuildMI") << MatchTable::Comment("InsnID")
+ << MatchTable::IntValue(InsnID) << MatchTable::Comment("Opcode")
+ << MatchTable::NamedValue(I->Namespace, I->TheDef->getName())
+ << MatchTable::LineBreak;
+ for (const auto &Renderer : OperandRenderers)
+ Renderer->emitRenderOpcodes(Table, Rule);
+
+ if (I->mayLoad || I->mayStore) {
+ Table << MatchTable::Opcode("GIR_MergeMemOperands")
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+ << MatchTable::Comment("MergeInsnID's");
+ // Emit the ID's for all the instructions that are matched by this rule.
+ // TODO: Limit this to matched instructions that mayLoad/mayStore or have
+ // some other means of having a memoperand. Also limit this to
+ // emitted instructions that expect to have a memoperand too. For
+ // example, (G_SEXT (G_LOAD x)) that results in separate load and
+ // sign-extend instructions shouldn't put the memoperand on the
+ // sign-extend since it has no effect there.
+ std::vector<unsigned> MergeInsnIDs;
+ for (const auto &IDMatcherPair : Rule.defined_insn_vars())
+ MergeInsnIDs.push_back(IDMatcherPair.second);
+ llvm::sort(MergeInsnIDs);
+ for (const auto &MergeInsnID : MergeInsnIDs)
+ Table << MatchTable::IntValue(MergeInsnID);
+ Table << MatchTable::NamedValue("GIU_MergeMemOperands_EndOfList")
+ << MatchTable::LineBreak;
+ }
+
+ // FIXME: This is a hack but it's sufficient for ISel. We'll need to do
+ // better for combines. Particularly when there are multiple match
+ // roots.
+ if (InsnID == 0)
+ Table << MatchTable::Opcode("GIR_EraseFromParent")
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+ << MatchTable::LineBreak;
+}
+
+//===- ConstrainOperandToRegClassAction -----------------------------------===//
+
+void ConstrainOperandToRegClassAction::emitActionOpcodes(
+ MatchTable &Table, RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIR_ConstrainOperandRC")
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+ << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
+ << MatchTable::NamedValue(RC.getQualifiedName() + "RegClassID")
+ << MatchTable::LineBreak;
+}
+
+//===- MakeTempRegisterAction ---------------------------------------------===//
+
+void MakeTempRegisterAction::emitActionOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const {
+ Table << MatchTable::Opcode("GIR_MakeTempReg")
+ << MatchTable::Comment("TempRegID") << MatchTable::IntValue(TempRegID)
+ << MatchTable::Comment("TypeID")
+ << MatchTable::NamedValue(Ty.getCxxEnumValue())
+ << MatchTable::LineBreak;
+}
+
+} // namespace gi
+} // namespace llvm
diff --git a/llvm/utils/TableGen/GlobalISelMatchTable.h b/llvm/utils/TableGen/GlobalISelMatchTable.h
new file mode 100644
index 000000000000..fcb3392226c1
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISelMatchTable.h
@@ -0,0 +1,2162 @@
+//===- GlobalISelMatchTable.h ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains the code related to the GlobalISel Match Table emitted by
+/// GlobalISelEmitter.cpp. The generated match table is interpreted at runtime
+/// by `GIMatchTableExecutorImpl.h` to match & apply ISel patterns.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_GLOBALISELMATCHTABLE_H
+#define LLVM_UTILS_TABLEGEN_GLOBALISELMATCHTABLE_H
+
+#include "CodeGenDAGPatterns.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/SaveAndRestore.h"
+#include <deque>
+#include <list>
+#include <map>
+#include <memory>
+#include <optional>
+#include <set>
+#include <string>
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+class Record;
+class SMLoc;
+class CodeGenRegisterClass;
+
+// Use a namespace to avoid conflicts because there's some fairly generic names
+// in there (e.g. Matcher).
+namespace gi {
+class MatchTable;
+class Matcher;
+class OperandMatcher;
+class MatchAction;
+class PredicateMatcher;
+class InstructionMatcher;
+
+enum {
+ GISF_IgnoreCopies = 0x1,
+};
+
+using GISelFlags = std::uint16_t;
+
+//===- Helper functions ---------------------------------------------------===//
+
+std::string getNameForFeatureBitset(const std::vector<Record *> &FeatureBitset);
+
+/// Takes a sequence of \p Rules and group them based on the predicates
+/// they share. \p MatcherStorage is used as a memory container
+/// for the group that are created as part of this process.
+///
+/// What this optimization does looks like if GroupT = GroupMatcher:
+/// Output without optimization:
+/// \verbatim
+/// # R1
+/// # predicate A
+/// # predicate B
+/// ...
+/// # R2
+/// # predicate A // <-- effectively this is going to be checked twice.
+/// // Once in R1 and once in R2.
+/// # predicate C
+/// \endverbatim
+/// Output with optimization:
+/// \verbatim
+/// # Group1_2
+/// # predicate A // <-- Check is now shared.
+/// # R1
+/// # predicate B
+/// # R2
+/// # predicate C
+/// \endverbatim
+template <class GroupT>
+std::vector<Matcher *>
+optimizeRules(ArrayRef<Matcher *> Rules,
+ std::vector<std::unique_ptr<Matcher>> &MatcherStorage);
+
+/// A record to be stored in a MatchTable.
+///
+/// This class represents any and all output that may be required to emit the
+/// MatchTable. Instances are most often configured to represent an opcode or
+/// value that will be emitted to the table with some formatting but it can also
+/// represent commas, comments, and other formatting instructions.
+struct MatchTableRecord {
+ enum RecordFlagsBits {
+ MTRF_None = 0x0,
+ /// Causes EmitStr to be formatted as comment when emitted.
+ MTRF_Comment = 0x1,
+ /// Causes the record value to be followed by a comma when emitted.
+ MTRF_CommaFollows = 0x2,
+ /// Causes the record value to be followed by a line break when emitted.
+ MTRF_LineBreakFollows = 0x4,
+ /// Indicates that the record defines a label and causes an additional
+ /// comment to be emitted containing the index of the label.
+ MTRF_Label = 0x8,
+ /// Causes the record to be emitted as the index of the label specified by
+ /// LabelID along with a comment indicating where that label is.
+ MTRF_JumpTarget = 0x10,
+ /// Causes the formatter to add a level of indentation before emitting the
+ /// record.
+ MTRF_Indent = 0x20,
+ /// Causes the formatter to remove a level of indentation after emitting the
+ /// record.
+ MTRF_Outdent = 0x40,
+ };
+
+ /// When MTRF_Label or MTRF_JumpTarget is used, indicates a label id to
+ /// reference or define.
+ unsigned LabelID;
+ /// The string to emit. Depending on the MTRF_* flags it may be a comment, a
+ /// value, a label name.
+ std::string EmitStr;
+
+private:
+ /// The number of MatchTable elements described by this record. Comments are 0
+ /// while values are typically 1. Values >1 may occur when we need to emit
+ /// values that exceed the size of a MatchTable element.
+ unsigned NumElements;
+
+public:
+ /// A bitfield of RecordFlagsBits flags.
+ unsigned Flags;
+
+ /// The actual run-time value, if known
+ int64_t RawValue;
+
+ MatchTableRecord(std::optional<unsigned> LabelID_, StringRef EmitStr,
+ unsigned NumElements, unsigned Flags,
+ int64_t RawValue = std::numeric_limits<int64_t>::min())
+ : LabelID(LabelID_.value_or(~0u)), EmitStr(EmitStr),
+ NumElements(NumElements), Flags(Flags), RawValue(RawValue) {
+ assert((!LabelID_ || LabelID != ~0u) &&
+ "This value is reserved for non-labels");
+ }
+ MatchTableRecord(const MatchTableRecord &Other) = default;
+ MatchTableRecord(MatchTableRecord &&Other) = default;
+
+ /// Useful if a Match Table Record gets optimized out
+ void turnIntoComment() {
+ Flags |= MTRF_Comment;
+ Flags &= ~MTRF_CommaFollows;
+ NumElements = 0;
+ }
+
+ /// For Jump Table generation purposes
+ bool operator<(const MatchTableRecord &Other) const {
+ return RawValue < Other.RawValue;
+ }
+ int64_t getRawValue() const { return RawValue; }
+
+ void emit(raw_ostream &OS, bool LineBreakNextAfterThis,
+ const MatchTable &Table) const;
+ unsigned size() const { return NumElements; }
+};
+
+/// Holds the contents of a generated MatchTable to enable formatting and the
+/// necessary index tracking needed to support GIM_Try.
+class MatchTable {
+ /// An unique identifier for the table. The generated table will be named
+ /// MatchTable${ID}.
+ unsigned ID;
+ /// The records that make up the table. Also includes comments describing the
+ /// values being emitted and line breaks to format it.
+ std::vector<MatchTableRecord> Contents;
+ /// The currently defined labels.
+ DenseMap<unsigned, unsigned> LabelMap;
+ /// Tracks the sum of MatchTableRecord::NumElements as the table is built.
+ unsigned CurrentSize = 0;
+ /// A unique identifier for a MatchTable label.
+ unsigned CurrentLabelID = 0;
+ /// Determines if the table should be instrumented for rule coverage tracking.
+ bool IsWithCoverage;
+ /// Whether this table is for the GISel combiner.
+ bool IsCombinerTable;
+
+public:
+ static MatchTableRecord LineBreak;
+ static MatchTableRecord Comment(StringRef Comment);
+ static MatchTableRecord Opcode(StringRef Opcode, int IndentAdjust = 0);
+ static MatchTableRecord NamedValue(StringRef NamedValue);
+ static MatchTableRecord NamedValue(StringRef NamedValue, int64_t RawValue);
+ static MatchTableRecord NamedValue(StringRef Namespace, StringRef NamedValue);
+ static MatchTableRecord NamedValue(StringRef Namespace, StringRef NamedValue,
+ int64_t RawValue);
+ static MatchTableRecord IntValue(int64_t IntValue);
+ static MatchTableRecord Label(unsigned LabelID);
+ static MatchTableRecord JumpTarget(unsigned LabelID);
+
+ static MatchTable buildTable(ArrayRef<Matcher *> Rules, bool WithCoverage,
+ bool IsCombiner = false);
+
+ MatchTable(bool WithCoverage, bool IsCombinerTable, unsigned ID = 0)
+ : ID(ID), IsWithCoverage(WithCoverage), IsCombinerTable(IsCombinerTable) {
+ }
+
+ bool isWithCoverage() const { return IsWithCoverage; }
+ bool isCombiner() const { return IsCombinerTable; }
+
+ void push_back(const MatchTableRecord &Value) {
+ if (Value.Flags & MatchTableRecord::MTRF_Label)
+ defineLabel(Value.LabelID);
+ Contents.push_back(Value);
+ CurrentSize += Value.size();
+ }
+
+ unsigned allocateLabelID() { return CurrentLabelID++; }
+
+ void defineLabel(unsigned LabelID) {
+ LabelMap.insert(std::make_pair(LabelID, CurrentSize));
+ }
+
+ unsigned getLabelIndex(unsigned LabelID) const {
+ const auto I = LabelMap.find(LabelID);
+ assert(I != LabelMap.end() && "Use of undeclared label");
+ return I->second;
+ }
+
+ void emitUse(raw_ostream &OS) const;
+ void emitDeclaration(raw_ostream &OS) const;
+};
+
+inline MatchTable &operator<<(MatchTable &Table,
+ const MatchTableRecord &Value) {
+ Table.push_back(Value);
+ return Table;
+}
+
+/// This class stands in for LLT wherever we want to tablegen-erate an
+/// equivalent at compiler run-time.
+class LLTCodeGen {
+private:
+ LLT Ty;
+
+public:
+ LLTCodeGen() = default;
+ LLTCodeGen(const LLT &Ty) : Ty(Ty) {}
+
+ std::string getCxxEnumValue() const;
+
+ void emitCxxEnumValue(raw_ostream &OS) const;
+ void emitCxxConstructorCall(raw_ostream &OS) const;
+
+ const LLT &get() const { return Ty; }
+
+ /// This ordering is used for std::unique() and llvm::sort(). There's no
+ /// particular logic behind the order but either A < B or B < A must be
+ /// true if A != B.
+ bool operator<(const LLTCodeGen &Other) const;
+ bool operator==(const LLTCodeGen &B) const { return Ty == B.Ty; }
+};
+
+// Track all types that are used so we can emit the corresponding enum.
+extern std::set<LLTCodeGen> KnownTypes;
+
+/// Convert an MVT to an equivalent LLT if possible, or the invalid LLT() for
+/// MVTs that don't map cleanly to an LLT (e.g., iPTR, *any, ...).
+std::optional<LLTCodeGen> MVTToLLT(MVT::SimpleValueType SVT);
+
+//===- Matchers -----------------------------------------------------------===//
+class Matcher {
+public:
+ virtual ~Matcher();
+ virtual void optimize();
+ virtual void emit(MatchTable &Table) = 0;
+
+ virtual bool hasFirstCondition() const = 0;
+ virtual const PredicateMatcher &getFirstCondition() const = 0;
+ virtual std::unique_ptr<PredicateMatcher> popFirstCondition() = 0;
+};
+
+class GroupMatcher final : public Matcher {
+ /// Conditions that form a common prefix of all the matchers contained.
+ SmallVector<std::unique_ptr<PredicateMatcher>, 1> Conditions;
+
+ /// All the nested matchers, sharing a common prefix.
+ std::vector<Matcher *> Matchers;
+
+ /// An owning collection for any auxiliary matchers created while optimizing
+ /// nested matchers contained.
+ std::vector<std::unique_ptr<Matcher>> MatcherStorage;
+
+public:
+ /// Add a matcher to the collection of nested matchers if it meets the
+ /// requirements, and return true. If it doesn't, do nothing and return false.
+ ///
+ /// Expected to preserve its argument, so it could be moved out later on.
+ bool addMatcher(Matcher &Candidate);
+
+ /// Mark the matcher as fully-built and ensure any invariants expected by both
+ /// optimize() and emit(...) methods. Generally, both sequences of calls
+ /// are expected to lead to a sensible result:
+ ///
+ /// addMatcher(...)*; finalize(); optimize(); emit(...); and
+ /// addMatcher(...)*; finalize(); emit(...);
+ ///
+ /// or generally
+ ///
+ /// addMatcher(...)*; finalize(); { optimize()*; emit(...); }*
+ ///
+ /// Multiple calls to optimize() are expected to be handled gracefully, though
+ /// optimize() is not expected to be idempotent. Multiple calls to finalize()
+ /// aren't generally supported. emit(...) is expected to be non-mutating and
+ /// producing the exact same results upon repeated calls.
+ ///
+ /// addMatcher() calls after the finalize() call are not supported.
+ ///
+ /// finalize() and optimize() are both allowed to mutate the contained
+ /// matchers, so moving them out after finalize() is not supported.
+ void finalize();
+ void optimize() override;
+ void emit(MatchTable &Table) override;
+
+ /// Could be used to move out the matchers added previously, unless finalize()
+ /// has been already called. If any of the matchers are moved out, the group
+ /// becomes safe to destroy, but not safe to re-use for anything else.
+ iterator_range<std::vector<Matcher *>::iterator> matchers() {
+ return make_range(Matchers.begin(), Matchers.end());
+ }
+ size_t size() const { return Matchers.size(); }
+ bool empty() const { return Matchers.empty(); }
+
+ std::unique_ptr<PredicateMatcher> popFirstCondition() override {
+ assert(!Conditions.empty() &&
+ "Trying to pop a condition from a condition-less group");
+ std::unique_ptr<PredicateMatcher> P = std::move(Conditions.front());
+ Conditions.erase(Conditions.begin());
+ return P;
+ }
+ const PredicateMatcher &getFirstCondition() const override {
+ assert(!Conditions.empty() &&
+ "Trying to get a condition from a condition-less group");
+ return *Conditions.front();
+ }
+ bool hasFirstCondition() const override { return !Conditions.empty(); }
+
+private:
+ /// See if a candidate matcher could be added to this group solely by
+ /// analyzing its first condition.
+ bool candidateConditionMatches(const PredicateMatcher &Predicate) const;
+};
+
+class SwitchMatcher : public Matcher {
+ /// All the nested matchers, representing distinct switch-cases. The first
+ /// conditions (as Matcher::getFirstCondition() reports) of all the nested
+ /// matchers must share the same type and path to a value they check, in other
+ /// words, be isIdenticalDownToValue, but have different values they check
+ /// against.
+ std::vector<Matcher *> Matchers;
+
+ /// The representative condition, with a type and a path (InsnVarID and OpIdx
+ /// in most cases) shared by all the matchers contained.
+ std::unique_ptr<PredicateMatcher> Condition = nullptr;
+
+ /// Temporary set used to check that the case values don't repeat within the
+ /// same switch.
+ std::set<MatchTableRecord> Values;
+
+ /// An owning collection for any auxiliary matchers created while optimizing
+ /// nested matchers contained.
+ std::vector<std::unique_ptr<Matcher>> MatcherStorage;
+
+public:
+ bool addMatcher(Matcher &Candidate);
+
+ void finalize();
+ void emit(MatchTable &Table) override;
+
+ iterator_range<std::vector<Matcher *>::iterator> matchers() {
+ return make_range(Matchers.begin(), Matchers.end());
+ }
+ size_t size() const { return Matchers.size(); }
+ bool empty() const { return Matchers.empty(); }
+
+ std::unique_ptr<PredicateMatcher> popFirstCondition() override {
+ // SwitchMatcher doesn't have a common first condition for its cases, as all
+ // the cases only share a kind of a value (a type and a path to it) they
+ // match, but deliberately differ in the actual value they match.
+ llvm_unreachable("Trying to pop a condition from a condition-less group");
+ }
+
+ const PredicateMatcher &getFirstCondition() const override {
+ llvm_unreachable("Trying to pop a condition from a condition-less group");
+ }
+
+ bool hasFirstCondition() const override { return false; }
+
+private:
+ /// See if the predicate type has a Switch-implementation for it.
+ static bool isSupportedPredicateType(const PredicateMatcher &Predicate);
+
+ bool candidateConditionMatches(const PredicateMatcher &Predicate) const;
+
+ /// emit()-helper
+ static void emitPredicateSpecificOpcodes(const PredicateMatcher &P,
+ MatchTable &Table);
+};
+
+/// Generates code to check that a match rule matches.
+class RuleMatcher : public Matcher {
+public:
+ using ActionList = std::list<std::unique_ptr<MatchAction>>;
+ using action_iterator = ActionList::iterator;
+
+protected:
+ /// A list of matchers that all need to succeed for the current rule to match.
+ /// FIXME: This currently supports a single match position but could be
+ /// extended to support multiple positions to support div/rem fusion or
+ /// load-multiple instructions.
+ using MatchersTy = std::vector<std::unique_ptr<InstructionMatcher>>;
+ MatchersTy Matchers;
+
+ /// A list of actions that need to be taken when all predicates in this rule
+ /// have succeeded.
+ ActionList Actions;
+
+ using DefinedInsnVariablesMap = std::map<InstructionMatcher *, unsigned>;
+
+ /// A map of instruction matchers to the local variables
+ DefinedInsnVariablesMap InsnVariableIDs;
+
+ using MutatableInsnSet = SmallPtrSet<InstructionMatcher *, 4>;
+
+ // The set of instruction matchers that have not yet been claimed for mutation
+ // by a BuildMI.
+ MutatableInsnSet MutatableInsns;
+
+ /// A map of named operands defined by the matchers that may be referenced by
+ /// the renderers.
+ StringMap<OperandMatcher *> DefinedOperands;
+
+ /// A map of anonymous physical register operands defined by the matchers that
+ /// may be referenced by the renderers.
+ DenseMap<Record *, OperandMatcher *> PhysRegOperands;
+
+ /// ID for the next instruction variable defined with
+ /// implicitlyDefineInsnVar()
+ unsigned NextInsnVarID;
+
+ /// ID for the next output instruction allocated with allocateOutputInsnID()
+ unsigned NextOutputInsnID;
+
+ /// ID for the next temporary register ID allocated with allocateTempRegID()
+ unsigned NextTempRegID;
+
+ /// Current GISelFlags
+ GISelFlags Flags = 0;
+
+ std::vector<std::string> RequiredSimplePredicates;
+ std::vector<Record *> RequiredFeatures;
+ std::vector<std::unique_ptr<PredicateMatcher>> EpilogueMatchers;
+
+ ArrayRef<SMLoc> SrcLoc;
+
+ typedef std::tuple<Record *, unsigned, unsigned>
+ DefinedComplexPatternSubOperand;
+ typedef StringMap<DefinedComplexPatternSubOperand>
+ DefinedComplexPatternSubOperandMap;
+ /// A map of Symbolic Names to ComplexPattern sub-operands.
+ DefinedComplexPatternSubOperandMap ComplexSubOperands;
+ /// A map used to for multiple referenced error check of ComplexSubOperand.
+ /// ComplexSubOperand can't be referenced multiple from different operands,
+ /// however multiple references from same operand are allowed since that is
+ /// how 'same operand checks' are generated.
+ StringMap<std::string> ComplexSubOperandsParentName;
+
+ uint64_t RuleID;
+ static uint64_t NextRuleID;
+
+ GISelFlags updateGISelFlag(GISelFlags CurFlags, const Record *R,
+ StringRef FlagName, GISelFlags FlagBit);
+
+public:
+ RuleMatcher(ArrayRef<SMLoc> SrcLoc)
+ : NextInsnVarID(0), NextOutputInsnID(0), NextTempRegID(0), SrcLoc(SrcLoc),
+ RuleID(NextRuleID++) {}
+ RuleMatcher(RuleMatcher &&Other) = default;
+ RuleMatcher &operator=(RuleMatcher &&Other) = default;
+
+ uint64_t getRuleID() const { return RuleID; }
+
+ InstructionMatcher &addInstructionMatcher(StringRef SymbolicName);
+ void addRequiredFeature(Record *Feature);
+ const std::vector<Record *> &getRequiredFeatures() const;
+
+ void addRequiredSimplePredicate(StringRef PredName);
+ const std::vector<std::string> &getRequiredSimplePredicates();
+
+ // Emplaces an action of the specified Kind at the end of the action list.
+ //
+ // Returns a reference to the newly created action.
+ //
+ // Like std::vector::emplace_back(), may invalidate all iterators if the new
+ // size exceeds the capacity. Otherwise, only invalidates the past-the-end
+ // iterator.
+ template <class Kind, class... Args> Kind &addAction(Args &&...args) {
+ Actions.emplace_back(std::make_unique<Kind>(std::forward<Args>(args)...));
+ return *static_cast<Kind *>(Actions.back().get());
+ }
+
+ // Emplaces an action of the specified Kind before the given insertion point.
+ //
+ // Returns an iterator pointing at the newly created instruction.
+ //
+ // Like std::vector::insert(), may invalidate all iterators if the new size
+ // exceeds the capacity. Otherwise, only invalidates the iterators from the
+ // insertion point onwards.
+ template <class Kind, class... Args>
+ action_iterator insertAction(action_iterator InsertPt, Args &&...args) {
+ return Actions.emplace(InsertPt,
+ std::make_unique<Kind>(std::forward<Args>(args)...));
+ }
+
+ // Update the active GISelFlags based on the GISelFlags Record R.
+ // A SaveAndRestore object is returned so the old GISelFlags are restored
+ // at the end of the scope.
+ SaveAndRestore<GISelFlags> setGISelFlags(const Record *R);
+ GISelFlags getGISelFlags() const { return Flags; }
+
+ /// Define an instruction without emitting any code to do so.
+ unsigned implicitlyDefineInsnVar(InstructionMatcher &Matcher);
+
+ unsigned getInsnVarID(InstructionMatcher &InsnMatcher) const;
+ DefinedInsnVariablesMap::const_iterator defined_insn_vars_begin() const {
+ return InsnVariableIDs.begin();
+ }
+ DefinedInsnVariablesMap::const_iterator defined_insn_vars_end() const {
+ return InsnVariableIDs.end();
+ }
+ iterator_range<typename DefinedInsnVariablesMap::const_iterator>
+ defined_insn_vars() const {
+ return make_range(defined_insn_vars_begin(), defined_insn_vars_end());
+ }
+
+ MutatableInsnSet::const_iterator mutatable_insns_begin() const {
+ return MutatableInsns.begin();
+ }
+ MutatableInsnSet::const_iterator mutatable_insns_end() const {
+ return MutatableInsns.end();
+ }
+ iterator_range<typename MutatableInsnSet::const_iterator>
+ mutatable_insns() const {
+ return make_range(mutatable_insns_begin(), mutatable_insns_end());
+ }
+ void reserveInsnMatcherForMutation(InstructionMatcher *InsnMatcher) {
+ bool R = MutatableInsns.erase(InsnMatcher);
+ assert(R && "Reserving a mutatable insn that isn't available");
+ (void)R;
+ }
+
+ action_iterator actions_begin() { return Actions.begin(); }
+ action_iterator actions_end() { return Actions.end(); }
+ iterator_range<action_iterator> actions() {
+ return make_range(actions_begin(), actions_end());
+ }
+
+ void defineOperand(StringRef SymbolicName, OperandMatcher &OM);
+
+ void definePhysRegOperand(Record *Reg, OperandMatcher &OM);
+
+ Error defineComplexSubOperand(StringRef SymbolicName, Record *ComplexPattern,
+ unsigned RendererID, unsigned SubOperandID,
+ StringRef ParentSymbolicName);
+
+ std::optional<DefinedComplexPatternSubOperand>
+ getComplexSubOperand(StringRef SymbolicName) const {
+ const auto &I = ComplexSubOperands.find(SymbolicName);
+ if (I == ComplexSubOperands.end())
+ return std::nullopt;
+ return I->second;
+ }
+
+ InstructionMatcher &getInstructionMatcher(StringRef SymbolicName) const;
+ const OperandMatcher &getOperandMatcher(StringRef Name) const;
+ const OperandMatcher &getPhysRegOperandMatcher(Record *) const;
+
+ void optimize() override;
+ void emit(MatchTable &Table) override;
+
+ /// Compare the priority of this object and B.
+ ///
+ /// Returns true if this object is more important than B.
+ bool isHigherPriorityThan(const RuleMatcher &B) const;
+
+ /// Report the maximum number of temporary operands needed by the rule
+ /// matcher.
+ unsigned countRendererFns() const;
+
+ std::unique_ptr<PredicateMatcher> popFirstCondition() override;
+ const PredicateMatcher &getFirstCondition() const override;
+ LLTCodeGen getFirstConditionAsRootType();
+ bool hasFirstCondition() const override;
+ unsigned getNumOperands() const;
+ StringRef getOpcode() const;
+
+ // FIXME: Remove this as soon as possible
+ InstructionMatcher &insnmatchers_front() const { return *Matchers.front(); }
+
+ unsigned allocateOutputInsnID() { return NextOutputInsnID++; }
+ unsigned allocateTempRegID() { return NextTempRegID++; }
+
+ iterator_range<MatchersTy::iterator> insnmatchers() {
+ return make_range(Matchers.begin(), Matchers.end());
+ }
+ bool insnmatchers_empty() const { return Matchers.empty(); }
+ void insnmatchers_pop_front() { Matchers.erase(Matchers.begin()); }
+};
+
+template <class PredicateTy> class PredicateListMatcher {
+private:
+ /// Template instantiations should specialize this to return a string to use
+ /// for the comment emitted when there are no predicates.
+ std::string getNoPredicateComment() const;
+
+protected:
+ using PredicatesTy = std::deque<std::unique_ptr<PredicateTy>>;
+ PredicatesTy Predicates;
+
+ /// Track if the list of predicates was manipulated by one of the optimization
+ /// methods.
+ bool Optimized = false;
+
+public:
+ typename PredicatesTy::iterator predicates_begin() {
+ return Predicates.begin();
+ }
+ typename PredicatesTy::iterator predicates_end() { return Predicates.end(); }
+ iterator_range<typename PredicatesTy::iterator> predicates() {
+ return make_range(predicates_begin(), predicates_end());
+ }
+ typename PredicatesTy::size_type predicates_size() const {
+ return Predicates.size();
+ }
+ bool predicates_empty() const { return Predicates.empty(); }
+
+ std::unique_ptr<PredicateTy> predicates_pop_front() {
+ std::unique_ptr<PredicateTy> Front = std::move(Predicates.front());
+ Predicates.pop_front();
+ Optimized = true;
+ return Front;
+ }
+
+ void prependPredicate(std::unique_ptr<PredicateTy> &&Predicate) {
+ Predicates.push_front(std::move(Predicate));
+ }
+
+ void eraseNullPredicates() {
+ const auto NewEnd =
+ std::stable_partition(Predicates.begin(), Predicates.end(),
+ std::logical_not<std::unique_ptr<PredicateTy>>());
+ if (NewEnd != Predicates.begin()) {
+ Predicates.erase(Predicates.begin(), NewEnd);
+ Optimized = true;
+ }
+ }
+
+ /// Emit MatchTable opcodes that tests whether all the predicates are met.
+ template <class... Args>
+ void emitPredicateListOpcodes(MatchTable &Table, Args &&...args) {
+ if (Predicates.empty() && !Optimized) {
+ Table << MatchTable::Comment(getNoPredicateComment())
+ << MatchTable::LineBreak;
+ return;
+ }
+
+ for (const auto &Predicate : predicates())
+ Predicate->emitPredicateOpcodes(Table, std::forward<Args>(args)...);
+ }
+
+ /// Provide a function to avoid emitting certain predicates. This is used to
+ /// defer some predicate checks until after others
+ using PredicateFilterFunc = std::function<bool(const PredicateTy &)>;
+
+ /// Emit MatchTable opcodes for predicates which satisfy \p
+ /// ShouldEmitPredicate. This should be called multiple times to ensure all
+ /// predicates are eventually added to the match table.
+ template <class... Args>
+ void emitFilteredPredicateListOpcodes(PredicateFilterFunc ShouldEmitPredicate,
+ MatchTable &Table, Args &&...args) {
+ if (Predicates.empty() && !Optimized) {
+ Table << MatchTable::Comment(getNoPredicateComment())
+ << MatchTable::LineBreak;
+ return;
+ }
+
+ for (const auto &Predicate : predicates()) {
+ if (ShouldEmitPredicate(*Predicate))
+ Predicate->emitPredicateOpcodes(Table, std::forward<Args>(args)...);
+ }
+ }
+};
+
+class PredicateMatcher {
+public:
+ /// This enum is used for RTTI and also defines the priority that is given to
+ /// the predicate when generating the matcher code. Kinds with higher priority
+ /// must be tested first.
+ ///
+ /// The relative priority of OPM_LLT, OPM_RegBank, and OPM_MBB do not matter
+ /// but OPM_Int must have priority over OPM_RegBank since constant integers
+ /// are represented by a virtual register defined by a G_CONSTANT instruction.
+ ///
+ /// Note: The relative priority between IPM_ and OPM_ does not matter, they
+ /// are currently not compared between each other.
+ enum PredicateKind {
+ IPM_Opcode,
+ IPM_NumOperands,
+ IPM_ImmPredicate,
+ IPM_Imm,
+ IPM_AtomicOrderingMMO,
+ IPM_MemoryLLTSize,
+ IPM_MemoryVsLLTSize,
+ IPM_MemoryAddressSpace,
+ IPM_MemoryAlignment,
+ IPM_VectorSplatImm,
+ IPM_NoUse,
+ IPM_GenericPredicate,
+ OPM_SameOperand,
+ OPM_ComplexPattern,
+ OPM_IntrinsicID,
+ OPM_CmpPredicate,
+ OPM_Instruction,
+ OPM_Int,
+ OPM_LiteralInt,
+ OPM_LLT,
+ OPM_PointerToAny,
+ OPM_RegBank,
+ OPM_MBB,
+ OPM_RecordNamedOperand,
+ };
+
+protected:
+ PredicateKind Kind;
+ unsigned InsnVarID;
+ unsigned OpIdx;
+
+public:
+ PredicateMatcher(PredicateKind Kind, unsigned InsnVarID, unsigned OpIdx = ~0)
+ : Kind(Kind), InsnVarID(InsnVarID), OpIdx(OpIdx) {}
+ virtual ~PredicateMatcher();
+
+ unsigned getInsnVarID() const { return InsnVarID; }
+ unsigned getOpIdx() const { return OpIdx; }
+
+ /// Emit MatchTable opcodes that check the predicate for the given operand.
+ virtual void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const = 0;
+
+ PredicateKind getKind() const { return Kind; }
+
+ bool dependsOnOperands() const {
+ // Custom predicates really depend on the context pattern of the
+ // instruction, not just the individual instruction. This therefore
+ // implicitly depends on all other pattern constraints.
+ return Kind == IPM_GenericPredicate;
+ }
+
+ virtual bool isIdentical(const PredicateMatcher &B) const {
+ return B.getKind() == getKind() && InsnVarID == B.InsnVarID &&
+ OpIdx == B.OpIdx;
+ }
+
+ virtual bool isIdenticalDownToValue(const PredicateMatcher &B) const {
+ return hasValue() && PredicateMatcher::isIdentical(B);
+ }
+
+ virtual MatchTableRecord getValue() const {
+ assert(hasValue() && "Can not get a value of a value-less predicate!");
+ llvm_unreachable("Not implemented yet");
+ }
+ virtual bool hasValue() const { return false; }
+
+ /// Report the maximum number of temporary operands needed by the predicate
+ /// matcher.
+ virtual unsigned countRendererFns() const { return 0; }
+};
+
+/// Generates code to check a predicate of an operand.
+///
+/// Typical predicates include:
+/// * Operand is a particular register.
+/// * Operand is assigned a particular register bank.
+/// * Operand is an MBB.
+class OperandPredicateMatcher : public PredicateMatcher {
+public:
+ OperandPredicateMatcher(PredicateKind Kind, unsigned InsnVarID,
+ unsigned OpIdx)
+ : PredicateMatcher(Kind, InsnVarID, OpIdx) {}
+ virtual ~OperandPredicateMatcher();
+
+ /// Compare the priority of this object and B.
+ ///
+ /// Returns true if this object is more important than B.
+ virtual bool isHigherPriorityThan(const OperandPredicateMatcher &B) const;
+};
+
+template <>
+inline std::string
+PredicateListMatcher<OperandPredicateMatcher>::getNoPredicateComment() const {
+ return "No operand predicates";
+}
+
+/// Generates code to check that a register operand is defined by the same exact
+/// one as another.
+class SameOperandMatcher : public OperandPredicateMatcher {
+ std::string MatchingName;
+ unsigned OrigOpIdx;
+
+ GISelFlags Flags;
+
+public:
+ SameOperandMatcher(unsigned InsnVarID, unsigned OpIdx, StringRef MatchingName,
+ unsigned OrigOpIdx, GISelFlags Flags)
+ : OperandPredicateMatcher(OPM_SameOperand, InsnVarID, OpIdx),
+ MatchingName(MatchingName), OrigOpIdx(OrigOpIdx), Flags(Flags) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_SameOperand;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return OperandPredicateMatcher::isIdentical(B) &&
+ OrigOpIdx == cast<SameOperandMatcher>(&B)->OrigOpIdx &&
+ MatchingName == cast<SameOperandMatcher>(&B)->MatchingName;
+ }
+};
+
+/// Generates code to check that an operand is a particular LLT.
+class LLTOperandMatcher : public OperandPredicateMatcher {
+protected:
+ LLTCodeGen Ty;
+
+public:
+ static std::map<LLTCodeGen, unsigned> TypeIDValues;
+
+ static void initTypeIDValuesMap() {
+ TypeIDValues.clear();
+
+ unsigned ID = 0;
+ for (const LLTCodeGen &LLTy : KnownTypes)
+ TypeIDValues[LLTy] = ID++;
+ }
+
+ LLTOperandMatcher(unsigned InsnVarID, unsigned OpIdx, const LLTCodeGen &Ty)
+ : OperandPredicateMatcher(OPM_LLT, InsnVarID, OpIdx), Ty(Ty) {
+ KnownTypes.insert(Ty);
+ }
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_LLT;
+ }
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return OperandPredicateMatcher::isIdentical(B) &&
+ Ty == cast<LLTOperandMatcher>(&B)->Ty;
+ }
+
+ MatchTableRecord getValue() const override;
+ bool hasValue() const override;
+
+ LLTCodeGen getTy() const { return Ty; }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check that an operand is a pointer to any address space.
+///
+/// In SelectionDAG, the types did not describe pointers or address spaces. As a
+/// result, iN is used to describe a pointer of N bits to any address space and
+/// PatFrag predicates are typically used to constrain the address space.
+/// There's no reliable means to derive the missing type information from the
+/// pattern so imported rules must test the components of a pointer separately.
+///
+/// If SizeInBits is zero, then the pointer size will be obtained from the
+/// subtarget.
+class PointerToAnyOperandMatcher : public OperandPredicateMatcher {
+protected:
+ unsigned SizeInBits;
+
+public:
+ PointerToAnyOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
+ unsigned SizeInBits)
+ : OperandPredicateMatcher(OPM_PointerToAny, InsnVarID, OpIdx),
+ SizeInBits(SizeInBits) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_PointerToAny;
+ }
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return OperandPredicateMatcher::isIdentical(B) &&
+ SizeInBits == cast<PointerToAnyOperandMatcher>(&B)->SizeInBits;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to record named operand in RecordedOperands list at StoreIdx.
+/// Predicates with 'let PredicateCodeUsesOperands = 1' get RecordedOperands as
+/// an argument to predicate's c++ code once all operands have been matched.
+class RecordNamedOperandMatcher : public OperandPredicateMatcher {
+protected:
+ unsigned StoreIdx;
+ std::string Name;
+
+public:
+ RecordNamedOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
+ unsigned StoreIdx, StringRef Name)
+ : OperandPredicateMatcher(OPM_RecordNamedOperand, InsnVarID, OpIdx),
+ StoreIdx(StoreIdx), Name(Name) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_RecordNamedOperand;
+ }
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return OperandPredicateMatcher::isIdentical(B) &&
+ StoreIdx == cast<RecordNamedOperandMatcher>(&B)->StoreIdx &&
+ Name == cast<RecordNamedOperandMatcher>(&B)->Name;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check that an operand is a particular target constant.
+class ComplexPatternOperandMatcher : public OperandPredicateMatcher {
+protected:
+ const OperandMatcher &Operand;
+ const Record &TheDef;
+
+ unsigned getAllocatedTemporariesBaseID() const;
+
+public:
+ bool isIdentical(const PredicateMatcher &B) const override { return false; }
+
+ ComplexPatternOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
+ const OperandMatcher &Operand,
+ const Record &TheDef)
+ : OperandPredicateMatcher(OPM_ComplexPattern, InsnVarID, OpIdx),
+ Operand(Operand), TheDef(TheDef) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_ComplexPattern;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+ unsigned countRendererFns() const override { return 1; }
+};
+
+/// Generates code to check that an operand is in a particular register bank.
+class RegisterBankOperandMatcher : public OperandPredicateMatcher {
+protected:
+ const CodeGenRegisterClass &RC;
+
+public:
+ RegisterBankOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
+ const CodeGenRegisterClass &RC)
+ : OperandPredicateMatcher(OPM_RegBank, InsnVarID, OpIdx), RC(RC) {}
+
+ bool isIdentical(const PredicateMatcher &B) const override;
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_RegBank;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check that an operand is a basic block.
+class MBBOperandMatcher : public OperandPredicateMatcher {
+public:
+ MBBOperandMatcher(unsigned InsnVarID, unsigned OpIdx)
+ : OperandPredicateMatcher(OPM_MBB, InsnVarID, OpIdx) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_MBB;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+class ImmOperandMatcher : public OperandPredicateMatcher {
+public:
+ ImmOperandMatcher(unsigned InsnVarID, unsigned OpIdx)
+ : OperandPredicateMatcher(IPM_Imm, InsnVarID, OpIdx) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_Imm;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check that an operand is a G_CONSTANT with a particular
+/// int.
+class ConstantIntOperandMatcher : public OperandPredicateMatcher {
+protected:
+ int64_t Value;
+
+public:
+ ConstantIntOperandMatcher(unsigned InsnVarID, unsigned OpIdx, int64_t Value)
+ : OperandPredicateMatcher(OPM_Int, InsnVarID, OpIdx), Value(Value) {}
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return OperandPredicateMatcher::isIdentical(B) &&
+ Value == cast<ConstantIntOperandMatcher>(&B)->Value;
+ }
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_Int;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check that an operand is a raw int (where MO.isImm() or
+/// MO.isCImm() is true).
+class LiteralIntOperandMatcher : public OperandPredicateMatcher {
+protected:
+ int64_t Value;
+
+public:
+ LiteralIntOperandMatcher(unsigned InsnVarID, unsigned OpIdx, int64_t Value)
+ : OperandPredicateMatcher(OPM_LiteralInt, InsnVarID, OpIdx),
+ Value(Value) {}
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return OperandPredicateMatcher::isIdentical(B) &&
+ Value == cast<LiteralIntOperandMatcher>(&B)->Value;
+ }
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_LiteralInt;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check that an operand is an CmpInst predicate
+class CmpPredicateOperandMatcher : public OperandPredicateMatcher {
+protected:
+ std::string PredName;
+
+public:
+ CmpPredicateOperandMatcher(unsigned InsnVarID, unsigned OpIdx, std::string P)
+ : OperandPredicateMatcher(OPM_CmpPredicate, InsnVarID, OpIdx),
+ PredName(P) {}
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return OperandPredicateMatcher::isIdentical(B) &&
+ PredName == cast<CmpPredicateOperandMatcher>(&B)->PredName;
+ }
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_CmpPredicate;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check that an operand is an intrinsic ID.
+class IntrinsicIDOperandMatcher : public OperandPredicateMatcher {
+protected:
+ const CodeGenIntrinsic *II;
+
+public:
+ IntrinsicIDOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
+ const CodeGenIntrinsic *II)
+ : OperandPredicateMatcher(OPM_IntrinsicID, InsnVarID, OpIdx), II(II) {}
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return OperandPredicateMatcher::isIdentical(B) &&
+ II == cast<IntrinsicIDOperandMatcher>(&B)->II;
+ }
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_IntrinsicID;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check that this operand is an immediate whose value meets
+/// an immediate predicate.
+class OperandImmPredicateMatcher : public OperandPredicateMatcher {
+protected:
+ TreePredicateFn Predicate;
+
+public:
+ OperandImmPredicateMatcher(unsigned InsnVarID, unsigned OpIdx,
+ const TreePredicateFn &Predicate)
+ : OperandPredicateMatcher(IPM_ImmPredicate, InsnVarID, OpIdx),
+ Predicate(Predicate) {}
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return OperandPredicateMatcher::isIdentical(B) &&
+ Predicate.getOrigPatFragRecord() ==
+ cast<OperandImmPredicateMatcher>(&B)
+ ->Predicate.getOrigPatFragRecord();
+ }
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_ImmPredicate;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check that a set of predicates match for a particular
+/// operand.
+class OperandMatcher : public PredicateListMatcher<OperandPredicateMatcher> {
+protected:
+ InstructionMatcher &Insn;
+ unsigned OpIdx;
+ std::string SymbolicName;
+
+ /// The index of the first temporary variable allocated to this operand. The
+ /// number of allocated temporaries can be found with
+ /// countRendererFns().
+ unsigned AllocatedTemporariesBaseID;
+
+public:
+ OperandMatcher(InstructionMatcher &Insn, unsigned OpIdx,
+ const std::string &SymbolicName,
+ unsigned AllocatedTemporariesBaseID)
+ : Insn(Insn), OpIdx(OpIdx), SymbolicName(SymbolicName),
+ AllocatedTemporariesBaseID(AllocatedTemporariesBaseID) {}
+
+ bool hasSymbolicName() const { return !SymbolicName.empty(); }
+ StringRef getSymbolicName() const { return SymbolicName; }
+ void setSymbolicName(StringRef Name) {
+ assert(SymbolicName.empty() && "Operand already has a symbolic name");
+ SymbolicName = std::string(Name);
+ }
+
+ /// Construct a new operand predicate and add it to the matcher.
+ template <class Kind, class... Args>
+ std::optional<Kind *> addPredicate(Args &&...args) {
+ if (isSameAsAnotherOperand())
+ return std::nullopt;
+ Predicates.emplace_back(std::make_unique<Kind>(
+ getInsnVarID(), getOpIdx(), std::forward<Args>(args)...));
+ return static_cast<Kind *>(Predicates.back().get());
+ }
+
+ unsigned getOpIdx() const { return OpIdx; }
+ unsigned getInsnVarID() const;
+
+ std::string getOperandExpr(unsigned InsnVarID) const;
+
+ InstructionMatcher &getInstructionMatcher() const { return Insn; }
+
+ Error addTypeCheckPredicate(const TypeSetByHwMode &VTy,
+ bool OperandIsAPointer);
+
+ /// Emit MatchTable opcodes that test whether the instruction named in
+ /// InsnVarID matches all the predicates and all the operands.
+ void emitPredicateOpcodes(MatchTable &Table, RuleMatcher &Rule);
+
+ /// Compare the priority of this object and B.
+ ///
+ /// Returns true if this object is more important than B.
+ bool isHigherPriorityThan(OperandMatcher &B);
+
+ /// Report the maximum number of temporary operands needed by the operand
+ /// matcher.
+ unsigned countRendererFns();
+
+ unsigned getAllocatedTemporariesBaseID() const {
+ return AllocatedTemporariesBaseID;
+ }
+
+ bool isSameAsAnotherOperand() {
+ for (const auto &Predicate : predicates())
+ if (isa<SameOperandMatcher>(Predicate))
+ return true;
+ return false;
+ }
+};
+
+/// Generates code to check a predicate on an instruction.
+///
+/// Typical predicates include:
+/// * The opcode of the instruction is a particular value.
+/// * The nsw/nuw flag is/isn't set.
+class InstructionPredicateMatcher : public PredicateMatcher {
+public:
+ InstructionPredicateMatcher(PredicateKind Kind, unsigned InsnVarID)
+ : PredicateMatcher(Kind, InsnVarID) {}
+ virtual ~InstructionPredicateMatcher() {}
+
+ /// Compare the priority of this object and B.
+ ///
+ /// Returns true if this object is more important than B.
+ virtual bool
+ isHigherPriorityThan(const InstructionPredicateMatcher &B) const {
+ return Kind < B.Kind;
+ };
+};
+
+template <>
+inline std::string
+PredicateListMatcher<PredicateMatcher>::getNoPredicateComment() const {
+ return "No instruction predicates";
+}
+
+/// Generates code to check the opcode of an instruction.
+class InstructionOpcodeMatcher : public InstructionPredicateMatcher {
+protected:
+ // Allow matching one to several, similar opcodes that share properties. This
+ // is to handle patterns where one SelectionDAG operation maps to multiple
+ // GlobalISel ones (e.g. G_BUILD_VECTOR and G_BUILD_VECTOR_TRUNC). The first
+ // is treated as the canonical opcode.
+ SmallVector<const CodeGenInstruction *, 2> Insts;
+
+ static DenseMap<const CodeGenInstruction *, unsigned> OpcodeValues;
+
+ MatchTableRecord getInstValue(const CodeGenInstruction *I) const;
+
+public:
+ static void initOpcodeValuesMap(const CodeGenTarget &Target);
+
+ InstructionOpcodeMatcher(unsigned InsnVarID,
+ ArrayRef<const CodeGenInstruction *> I)
+ : InstructionPredicateMatcher(IPM_Opcode, InsnVarID),
+ Insts(I.begin(), I.end()) {
+ assert((Insts.size() == 1 || Insts.size() == 2) &&
+ "unexpected number of opcode alternatives");
+ }
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_Opcode;
+ }
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return InstructionPredicateMatcher::isIdentical(B) &&
+ Insts == cast<InstructionOpcodeMatcher>(&B)->Insts;
+ }
+
+ bool hasValue() const override {
+ return Insts.size() == 1 && OpcodeValues.count(Insts[0]);
+ }
+
+ // TODO: This is used for the SwitchMatcher optimization. We should be able to
+ // return a list of the opcodes to match.
+ MatchTableRecord getValue() const override;
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+
+ /// Compare the priority of this object and B.
+ ///
+ /// Returns true if this object is more important than B.
+ bool
+ isHigherPriorityThan(const InstructionPredicateMatcher &B) const override;
+
+ bool isConstantInstruction() const;
+
+ // The first opcode is the canonical opcode, and later are alternatives.
+ StringRef getOpcode() const;
+ ArrayRef<const CodeGenInstruction *> getAlternativeOpcodes() { return Insts; }
+ bool isVariadicNumOperands() const;
+ StringRef getOperandType(unsigned OpIdx) const;
+};
+
+class InstructionNumOperandsMatcher final : public InstructionPredicateMatcher {
+ unsigned NumOperands = 0;
+
+public:
+ InstructionNumOperandsMatcher(unsigned InsnVarID, unsigned NumOperands)
+ : InstructionPredicateMatcher(IPM_NumOperands, InsnVarID),
+ NumOperands(NumOperands) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_NumOperands;
+ }
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return InstructionPredicateMatcher::isIdentical(B) &&
+ NumOperands == cast<InstructionNumOperandsMatcher>(&B)->NumOperands;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check that this instruction is a constant whose value
+/// meets an immediate predicate.
+///
+/// Immediates are slightly odd since they are typically used like an operand
+/// but are represented as an operator internally. We typically write simm8:$src
+/// in a tablegen pattern, but this is just syntactic sugar for
+/// (imm:i32)<<P:Predicate_simm8>>:$imm which more directly describes the nodes
+/// that will be matched and the predicate (which is attached to the imm
+/// operator) that will be tested. In SelectionDAG this describes a
+/// ConstantSDNode whose internal value will be tested using the simm8
+/// predicate.
+///
+/// The corresponding GlobalISel representation is %1 = G_CONSTANT iN Value. In
+/// this representation, the immediate could be tested with an
+/// InstructionMatcher, InstructionOpcodeMatcher, OperandMatcher, and a
+/// OperandPredicateMatcher-subclass to check the Value meets the predicate but
+/// there are two implementation issues with producing that matcher
+/// configuration from the SelectionDAG pattern:
+/// * ImmLeaf is a PatFrag whose root is an InstructionMatcher. This means that
+/// were we to sink the immediate predicate to the operand we would have to
+/// have two partial implementations of PatFrag support, one for immediates
+/// and one for non-immediates.
+/// * At the point we handle the predicate, the OperandMatcher hasn't been
+/// created yet. If we were to sink the predicate to the OperandMatcher we
+/// would also have to complicate (or duplicate) the code that descends and
+/// creates matchers for the subtree.
+/// Overall, it's simpler to handle it in the place it was found.
+class InstructionImmPredicateMatcher : public InstructionPredicateMatcher {
+protected:
+ TreePredicateFn Predicate;
+
+public:
+ InstructionImmPredicateMatcher(unsigned InsnVarID,
+ const TreePredicateFn &Predicate)
+ : InstructionPredicateMatcher(IPM_ImmPredicate, InsnVarID),
+ Predicate(Predicate) {}
+
+ bool isIdentical(const PredicateMatcher &B) const override;
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_ImmPredicate;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check that a memory instruction has a atomic ordering
+/// MachineMemoryOperand.
+class AtomicOrderingMMOPredicateMatcher : public InstructionPredicateMatcher {
+public:
+ enum AOComparator {
+ AO_Exactly,
+ AO_OrStronger,
+ AO_WeakerThan,
+ };
+
+protected:
+ StringRef Order;
+ AOComparator Comparator;
+
+public:
+ AtomicOrderingMMOPredicateMatcher(unsigned InsnVarID, StringRef Order,
+ AOComparator Comparator = AO_Exactly)
+ : InstructionPredicateMatcher(IPM_AtomicOrderingMMO, InsnVarID),
+ Order(Order), Comparator(Comparator) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_AtomicOrderingMMO;
+ }
+
+ bool isIdentical(const PredicateMatcher &B) const override;
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check that the size of an MMO is exactly N bytes.
+class MemorySizePredicateMatcher : public InstructionPredicateMatcher {
+protected:
+ unsigned MMOIdx;
+ uint64_t Size;
+
+public:
+ MemorySizePredicateMatcher(unsigned InsnVarID, unsigned MMOIdx, unsigned Size)
+ : InstructionPredicateMatcher(IPM_MemoryLLTSize, InsnVarID),
+ MMOIdx(MMOIdx), Size(Size) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_MemoryLLTSize;
+ }
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return InstructionPredicateMatcher::isIdentical(B) &&
+ MMOIdx == cast<MemorySizePredicateMatcher>(&B)->MMOIdx &&
+ Size == cast<MemorySizePredicateMatcher>(&B)->Size;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+class MemoryAddressSpacePredicateMatcher : public InstructionPredicateMatcher {
+protected:
+ unsigned MMOIdx;
+ SmallVector<unsigned, 4> AddrSpaces;
+
+public:
+ MemoryAddressSpacePredicateMatcher(unsigned InsnVarID, unsigned MMOIdx,
+ ArrayRef<unsigned> AddrSpaces)
+ : InstructionPredicateMatcher(IPM_MemoryAddressSpace, InsnVarID),
+ MMOIdx(MMOIdx), AddrSpaces(AddrSpaces.begin(), AddrSpaces.end()) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_MemoryAddressSpace;
+ }
+
+ bool isIdentical(const PredicateMatcher &B) const override;
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+class MemoryAlignmentPredicateMatcher : public InstructionPredicateMatcher {
+protected:
+ unsigned MMOIdx;
+ int MinAlign;
+
+public:
+ MemoryAlignmentPredicateMatcher(unsigned InsnVarID, unsigned MMOIdx,
+ int MinAlign)
+ : InstructionPredicateMatcher(IPM_MemoryAlignment, InsnVarID),
+ MMOIdx(MMOIdx), MinAlign(MinAlign) {
+ assert(MinAlign > 0);
+ }
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_MemoryAlignment;
+ }
+
+ bool isIdentical(const PredicateMatcher &B) const override;
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check that the size of an MMO is less-than, equal-to, or
+/// greater than a given LLT.
+class MemoryVsLLTSizePredicateMatcher : public InstructionPredicateMatcher {
+public:
+ enum RelationKind {
+ GreaterThan,
+ EqualTo,
+ LessThan,
+ };
+
+protected:
+ unsigned MMOIdx;
+ RelationKind Relation;
+ unsigned OpIdx;
+
+public:
+ MemoryVsLLTSizePredicateMatcher(unsigned InsnVarID, unsigned MMOIdx,
+ enum RelationKind Relation, unsigned OpIdx)
+ : InstructionPredicateMatcher(IPM_MemoryVsLLTSize, InsnVarID),
+ MMOIdx(MMOIdx), Relation(Relation), OpIdx(OpIdx) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_MemoryVsLLTSize;
+ }
+ bool isIdentical(const PredicateMatcher &B) const override;
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+// Matcher for immAllOnesV/immAllZerosV
+class VectorSplatImmPredicateMatcher : public InstructionPredicateMatcher {
+public:
+ enum SplatKind { AllZeros, AllOnes };
+
+private:
+ SplatKind Kind;
+
+public:
+ VectorSplatImmPredicateMatcher(unsigned InsnVarID, SplatKind K)
+ : InstructionPredicateMatcher(IPM_VectorSplatImm, InsnVarID), Kind(K) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_VectorSplatImm;
+ }
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return InstructionPredicateMatcher::isIdentical(B) &&
+ Kind == static_cast<const VectorSplatImmPredicateMatcher &>(B).Kind;
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check an arbitrary C++ instruction predicate.
+class GenericInstructionPredicateMatcher : public InstructionPredicateMatcher {
+protected:
+ std::string EnumVal;
+
+public:
+ GenericInstructionPredicateMatcher(unsigned InsnVarID,
+ TreePredicateFn Predicate);
+
+ GenericInstructionPredicateMatcher(unsigned InsnVarID,
+ const std::string &EnumVal)
+ : InstructionPredicateMatcher(IPM_GenericPredicate, InsnVarID),
+ EnumVal(EnumVal) {}
+
+ static bool classof(const InstructionPredicateMatcher *P) {
+ return P->getKind() == IPM_GenericPredicate;
+ }
+ bool isIdentical(const PredicateMatcher &B) const override;
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override;
+};
+
+/// Generates code to check for the absence of use of the result.
+// TODO? Generalize this to support checking for one use.
+class NoUsePredicateMatcher : public InstructionPredicateMatcher {
+public:
+ NoUsePredicateMatcher(unsigned InsnVarID)
+ : InstructionPredicateMatcher(IPM_NoUse, InsnVarID) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == IPM_NoUse;
+ }
+
+ bool isIdentical(const PredicateMatcher &B) const override {
+ return InstructionPredicateMatcher::isIdentical(B);
+ }
+
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override {
+ Table << MatchTable::Opcode("GIM_CheckHasNoUse")
+ << MatchTable::Comment("MI") << MatchTable::IntValue(InsnVarID)
+ << MatchTable::LineBreak;
+ }
+};
+
+/// Generates code to check that a set of predicates and operands match for a
+/// particular instruction.
+///
+/// Typical predicates include:
+/// * Has a specific opcode.
+/// * Has an nsw/nuw flag or doesn't.
+class InstructionMatcher final : public PredicateListMatcher<PredicateMatcher> {
+protected:
+ typedef std::vector<std::unique_ptr<OperandMatcher>> OperandVec;
+
+ RuleMatcher &Rule;
+
+ /// The operands to match. All rendered operands must be present even if the
+ /// condition is always true.
+ OperandVec Operands;
+ bool NumOperandsCheck = true;
+
+ std::string SymbolicName;
+ unsigned InsnVarID;
+
+ /// PhysRegInputs - List list has an entry for each explicitly specified
+ /// physreg input to the pattern. The first elt is the Register node, the
+ /// second is the recorded slot number the input pattern match saved it in.
+ SmallVector<std::pair<Record *, unsigned>, 2> PhysRegInputs;
+
+public:
+ InstructionMatcher(RuleMatcher &Rule, StringRef SymbolicName,
+ bool NumOpsCheck = true)
+ : Rule(Rule), NumOperandsCheck(NumOpsCheck), SymbolicName(SymbolicName) {
+ // We create a new instruction matcher.
+ // Get a new ID for that instruction.
+ InsnVarID = Rule.implicitlyDefineInsnVar(*this);
+ }
+
+ /// Construct a new instruction predicate and add it to the matcher.
+ template <class Kind, class... Args>
+ std::optional<Kind *> addPredicate(Args &&...args) {
+ Predicates.emplace_back(
+ std::make_unique<Kind>(getInsnVarID(), std::forward<Args>(args)...));
+ return static_cast<Kind *>(Predicates.back().get());
+ }
+
+ RuleMatcher &getRuleMatcher() const { return Rule; }
+
+ unsigned getInsnVarID() const { return InsnVarID; }
+
+ /// Add an operand to the matcher.
+ OperandMatcher &addOperand(unsigned OpIdx, const std::string &SymbolicName,
+ unsigned AllocatedTemporariesBaseID);
+ OperandMatcher &getOperand(unsigned OpIdx);
+ OperandMatcher &addPhysRegInput(Record *Reg, unsigned OpIdx,
+ unsigned TempOpIdx);
+
+ ArrayRef<std::pair<Record *, unsigned>> getPhysRegInputs() const {
+ return PhysRegInputs;
+ }
+
+ StringRef getSymbolicName() const { return SymbolicName; }
+ unsigned getNumOperands() const { return Operands.size(); }
+ OperandVec::iterator operands_begin() { return Operands.begin(); }
+ OperandVec::iterator operands_end() { return Operands.end(); }
+ iterator_range<OperandVec::iterator> operands() {
+ return make_range(operands_begin(), operands_end());
+ }
+ OperandVec::const_iterator operands_begin() const { return Operands.begin(); }
+ OperandVec::const_iterator operands_end() const { return Operands.end(); }
+ iterator_range<OperandVec::const_iterator> operands() const {
+ return make_range(operands_begin(), operands_end());
+ }
+ bool operands_empty() const { return Operands.empty(); }
+
+ void pop_front() { Operands.erase(Operands.begin()); }
+
+ void optimize();
+
+ /// Emit MatchTable opcodes that test whether the instruction named in
+ /// InsnVarName matches all the predicates and all the operands.
+ void emitPredicateOpcodes(MatchTable &Table, RuleMatcher &Rule);
+
+ /// Compare the priority of this object and B.
+ ///
+ /// Returns true if this object is more important than B.
+ bool isHigherPriorityThan(InstructionMatcher &B);
+
+ /// Report the maximum number of temporary operands needed by the instruction
+ /// matcher.
+ unsigned countRendererFns();
+
+ InstructionOpcodeMatcher &getOpcodeMatcher() {
+ for (auto &P : predicates())
+ if (auto *OpMatcher = dyn_cast<InstructionOpcodeMatcher>(P.get()))
+ return *OpMatcher;
+ llvm_unreachable("Didn't find an opcode matcher");
+ }
+
+ bool isConstantInstruction() {
+ return getOpcodeMatcher().isConstantInstruction();
+ }
+
+ StringRef getOpcode() { return getOpcodeMatcher().getOpcode(); }
+};
+
+/// Generates code to check that the operand is a register defined by an
+/// instruction that matches the given instruction matcher.
+///
+/// For example, the pattern:
+/// (set $dst, (G_MUL (G_ADD $src1, $src2), $src3))
+/// would use an InstructionOperandMatcher for operand 1 of the G_MUL to match
+/// the:
+/// (G_ADD $src1, $src2)
+/// subpattern.
+class InstructionOperandMatcher : public OperandPredicateMatcher {
+protected:
+ std::unique_ptr<InstructionMatcher> InsnMatcher;
+
+ GISelFlags Flags;
+
+public:
+ InstructionOperandMatcher(unsigned InsnVarID, unsigned OpIdx,
+ RuleMatcher &Rule, StringRef SymbolicName,
+ bool NumOpsCheck = true)
+ : OperandPredicateMatcher(OPM_Instruction, InsnVarID, OpIdx),
+ InsnMatcher(new InstructionMatcher(Rule, SymbolicName, NumOpsCheck)),
+ Flags(Rule.getGISelFlags()) {}
+
+ static bool classof(const PredicateMatcher *P) {
+ return P->getKind() == OPM_Instruction;
+ }
+
+ InstructionMatcher &getInsnMatcher() const { return *InsnMatcher; }
+
+ void emitCaptureOpcodes(MatchTable &Table, RuleMatcher &Rule) const;
+ void emitPredicateOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const override {
+ emitCaptureOpcodes(Table, Rule);
+ InsnMatcher->emitPredicateOpcodes(Table, Rule);
+ }
+
+ bool isHigherPriorityThan(const OperandPredicateMatcher &B) const override;
+
+ /// Report the maximum number of temporary operands needed by the predicate
+ /// matcher.
+ unsigned countRendererFns() const override {
+ return InsnMatcher->countRendererFns();
+ }
+};
+
+//===- Actions ------------------------------------------------------------===//
+class OperandRenderer {
+public:
+ enum RendererKind {
+ OR_Copy,
+ OR_CopyOrAddZeroReg,
+ OR_CopySubReg,
+ OR_CopyPhysReg,
+ OR_CopyConstantAsImm,
+ OR_CopyFConstantAsFPImm,
+ OR_Imm,
+ OR_SubRegIndex,
+ OR_Register,
+ OR_TempRegister,
+ OR_ComplexPattern,
+ OR_Custom,
+ OR_CustomOperand
+ };
+
+protected:
+ RendererKind Kind;
+
+public:
+ OperandRenderer(RendererKind Kind) : Kind(Kind) {}
+ virtual ~OperandRenderer();
+
+ RendererKind getKind() const { return Kind; }
+
+ virtual void emitRenderOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const = 0;
+};
+
+/// A CopyRenderer emits code to copy a single operand from an existing
+/// instruction to the one being built.
+class CopyRenderer : public OperandRenderer {
+protected:
+ unsigned NewInsnID;
+ /// The name of the operand.
+ const StringRef SymbolicName;
+
+public:
+ CopyRenderer(unsigned NewInsnID, StringRef SymbolicName)
+ : OperandRenderer(OR_Copy), NewInsnID(NewInsnID),
+ SymbolicName(SymbolicName) {
+ assert(!SymbolicName.empty() && "Cannot copy from an unspecified source");
+ }
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_Copy;
+ }
+
+ StringRef getSymbolicName() const { return SymbolicName; }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+/// A CopyRenderer emits code to copy a virtual register to a specific physical
+/// register.
+class CopyPhysRegRenderer : public OperandRenderer {
+protected:
+ unsigned NewInsnID;
+ Record *PhysReg;
+
+public:
+ CopyPhysRegRenderer(unsigned NewInsnID, Record *Reg)
+ : OperandRenderer(OR_CopyPhysReg), NewInsnID(NewInsnID), PhysReg(Reg) {
+ assert(PhysReg);
+ }
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_CopyPhysReg;
+ }
+
+ Record *getPhysReg() const { return PhysReg; }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+/// A CopyOrAddZeroRegRenderer emits code to copy a single operand from an
+/// existing instruction to the one being built. If the operand turns out to be
+/// a 'G_CONSTANT 0' then it replaces the operand with a zero register.
+class CopyOrAddZeroRegRenderer : public OperandRenderer {
+protected:
+ unsigned NewInsnID;
+ /// The name of the operand.
+ const StringRef SymbolicName;
+ const Record *ZeroRegisterDef;
+
+public:
+ CopyOrAddZeroRegRenderer(unsigned NewInsnID, StringRef SymbolicName,
+ Record *ZeroRegisterDef)
+ : OperandRenderer(OR_CopyOrAddZeroReg), NewInsnID(NewInsnID),
+ SymbolicName(SymbolicName), ZeroRegisterDef(ZeroRegisterDef) {
+ assert(!SymbolicName.empty() && "Cannot copy from an unspecified source");
+ }
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_CopyOrAddZeroReg;
+ }
+
+ StringRef getSymbolicName() const { return SymbolicName; }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+/// A CopyConstantAsImmRenderer emits code to render a G_CONSTANT instruction to
+/// an extended immediate operand.
+class CopyConstantAsImmRenderer : public OperandRenderer {
+protected:
+ unsigned NewInsnID;
+ /// The name of the operand.
+ const std::string SymbolicName;
+ bool Signed;
+
+public:
+ CopyConstantAsImmRenderer(unsigned NewInsnID, StringRef SymbolicName)
+ : OperandRenderer(OR_CopyConstantAsImm), NewInsnID(NewInsnID),
+ SymbolicName(SymbolicName), Signed(true) {}
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_CopyConstantAsImm;
+ }
+
+ StringRef getSymbolicName() const { return SymbolicName; }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+/// A CopyFConstantAsFPImmRenderer emits code to render a G_FCONSTANT
+/// instruction to an extended immediate operand.
+class CopyFConstantAsFPImmRenderer : public OperandRenderer {
+protected:
+ unsigned NewInsnID;
+ /// The name of the operand.
+ const std::string SymbolicName;
+
+public:
+ CopyFConstantAsFPImmRenderer(unsigned NewInsnID, StringRef SymbolicName)
+ : OperandRenderer(OR_CopyFConstantAsFPImm), NewInsnID(NewInsnID),
+ SymbolicName(SymbolicName) {}
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_CopyFConstantAsFPImm;
+ }
+
+ StringRef getSymbolicName() const { return SymbolicName; }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+/// A CopySubRegRenderer emits code to copy a single register operand from an
+/// existing instruction to the one being built and indicate that only a
+/// subregister should be copied.
+class CopySubRegRenderer : public OperandRenderer {
+protected:
+ unsigned NewInsnID;
+ /// The name of the operand.
+ const StringRef SymbolicName;
+ /// The subregister to extract.
+ const CodeGenSubRegIndex *SubReg;
+
+public:
+ CopySubRegRenderer(unsigned NewInsnID, StringRef SymbolicName,
+ const CodeGenSubRegIndex *SubReg)
+ : OperandRenderer(OR_CopySubReg), NewInsnID(NewInsnID),
+ SymbolicName(SymbolicName), SubReg(SubReg) {}
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_CopySubReg;
+ }
+
+ StringRef getSymbolicName() const { return SymbolicName; }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+/// Adds a specific physical register to the instruction being built.
+/// This is typically useful for WZR/XZR on AArch64.
+class AddRegisterRenderer : public OperandRenderer {
+protected:
+ unsigned InsnID;
+ const Record *RegisterDef;
+ bool IsDef;
+ const CodeGenTarget &Target;
+
+public:
+ AddRegisterRenderer(unsigned InsnID, const CodeGenTarget &Target,
+ const Record *RegisterDef, bool IsDef = false)
+ : OperandRenderer(OR_Register), InsnID(InsnID), RegisterDef(RegisterDef),
+ IsDef(IsDef), Target(Target) {}
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_Register;
+ }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+/// Adds a specific temporary virtual register to the instruction being built.
+/// This is used to chain instructions together when emitting multiple
+/// instructions.
+class TempRegRenderer : public OperandRenderer {
+protected:
+ unsigned InsnID;
+ unsigned TempRegID;
+ const CodeGenSubRegIndex *SubRegIdx;
+ bool IsDef;
+ bool IsDead;
+
+public:
+ TempRegRenderer(unsigned InsnID, unsigned TempRegID, bool IsDef = false,
+ const CodeGenSubRegIndex *SubReg = nullptr,
+ bool IsDead = false)
+ : OperandRenderer(OR_Register), InsnID(InsnID), TempRegID(TempRegID),
+ SubRegIdx(SubReg), IsDef(IsDef), IsDead(IsDead) {}
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_TempRegister;
+ }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+/// Adds a specific immediate to the instruction being built.
+class ImmRenderer : public OperandRenderer {
+protected:
+ unsigned InsnID;
+ int64_t Imm;
+
+public:
+ ImmRenderer(unsigned InsnID, int64_t Imm)
+ : OperandRenderer(OR_Imm), InsnID(InsnID), Imm(Imm) {}
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_Imm;
+ }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+ Table << MatchTable::Opcode("GIR_AddImm") << MatchTable::Comment("InsnID")
+ << MatchTable::IntValue(InsnID) << MatchTable::Comment("Imm")
+ << MatchTable::IntValue(Imm) << MatchTable::LineBreak;
+ }
+};
+
+/// Adds an enum value for a subreg index to the instruction being built.
+class SubRegIndexRenderer : public OperandRenderer {
+protected:
+ unsigned InsnID;
+ const CodeGenSubRegIndex *SubRegIdx;
+
+public:
+ SubRegIndexRenderer(unsigned InsnID, const CodeGenSubRegIndex *SRI)
+ : OperandRenderer(OR_SubRegIndex), InsnID(InsnID), SubRegIdx(SRI) {}
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_SubRegIndex;
+ }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+/// Adds operands by calling a renderer function supplied by the ComplexPattern
+/// matcher function.
+class RenderComplexPatternOperand : public OperandRenderer {
+private:
+ unsigned InsnID;
+ const Record &TheDef;
+ /// The name of the operand.
+ const StringRef SymbolicName;
+ /// The renderer number. This must be unique within a rule since it's used to
+ /// identify a temporary variable to hold the renderer function.
+ unsigned RendererID;
+ /// When provided, this is the suboperand of the ComplexPattern operand to
+ /// render. Otherwise all the suboperands will be rendered.
+ std::optional<unsigned> SubOperand;
+ /// The subregister to extract. Render the whole register if not specified.
+ const CodeGenSubRegIndex *SubReg;
+
+ unsigned getNumOperands() const {
+ return TheDef.getValueAsDag("Operands")->getNumArgs();
+ }
+
+public:
+ RenderComplexPatternOperand(unsigned InsnID, const Record &TheDef,
+ StringRef SymbolicName, unsigned RendererID,
+ std::optional<unsigned> SubOperand = std::nullopt,
+ const CodeGenSubRegIndex *SubReg = nullptr)
+ : OperandRenderer(OR_ComplexPattern), InsnID(InsnID), TheDef(TheDef),
+ SymbolicName(SymbolicName), RendererID(RendererID),
+ SubOperand(SubOperand), SubReg(SubReg) {}
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_ComplexPattern;
+ }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+class CustomRenderer : public OperandRenderer {
+protected:
+ unsigned InsnID;
+ const Record &Renderer;
+ /// The name of the operand.
+ const std::string SymbolicName;
+
+public:
+ CustomRenderer(unsigned InsnID, const Record &Renderer,
+ StringRef SymbolicName)
+ : OperandRenderer(OR_Custom), InsnID(InsnID), Renderer(Renderer),
+ SymbolicName(SymbolicName) {}
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_Custom;
+ }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+class CustomOperandRenderer : public OperandRenderer {
+protected:
+ unsigned InsnID;
+ const Record &Renderer;
+ /// The name of the operand.
+ const std::string SymbolicName;
+
+public:
+ CustomOperandRenderer(unsigned InsnID, const Record &Renderer,
+ StringRef SymbolicName)
+ : OperandRenderer(OR_CustomOperand), InsnID(InsnID), Renderer(Renderer),
+ SymbolicName(SymbolicName) {}
+
+ static bool classof(const OperandRenderer *R) {
+ return R->getKind() == OR_CustomOperand;
+ }
+
+ void emitRenderOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+/// An action taken when all Matcher predicates succeeded for a parent rule.
+///
+/// Typical actions include:
+/// * Changing the opcode of an instruction.
+/// * Adding an operand to an instruction.
+class MatchAction {
+public:
+ virtual ~MatchAction() {}
+
+ /// Emit the MatchTable opcodes to implement the action.
+ virtual void emitActionOpcodes(MatchTable &Table,
+ RuleMatcher &Rule) const = 0;
+};
+
+/// Generates a comment describing the matched rule being acted upon.
+class DebugCommentAction : public MatchAction {
+private:
+ std::string S;
+
+public:
+ DebugCommentAction(StringRef S) : S(std::string(S)) {}
+
+ void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+ Table << MatchTable::Comment(S) << MatchTable::LineBreak;
+ }
+};
+
+class CustomCXXAction : public MatchAction {
+ std::string FnEnumName;
+
+public:
+ CustomCXXAction(StringRef FnEnumName) : FnEnumName(FnEnumName.str()) {}
+
+ void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+/// Generates code to build an instruction or mutate an existing instruction
+/// into the desired instruction when this is possible.
+class BuildMIAction : public MatchAction {
+private:
+ unsigned InsnID;
+ const CodeGenInstruction *I;
+ InstructionMatcher *Matched;
+ std::vector<std::unique_ptr<OperandRenderer>> OperandRenderers;
+
+ /// True if the instruction can be built solely by mutating the opcode.
+ bool canMutate(RuleMatcher &Rule, const InstructionMatcher *Insn) const;
+
+public:
+ BuildMIAction(unsigned InsnID, const CodeGenInstruction *I)
+ : InsnID(InsnID), I(I), Matched(nullptr) {}
+
+ unsigned getInsnID() const { return InsnID; }
+ const CodeGenInstruction *getCGI() const { return I; }
+
+ void chooseInsnToMutate(RuleMatcher &Rule);
+
+ template <class Kind, class... Args> Kind &addRenderer(Args &&...args) {
+ OperandRenderers.emplace_back(
+ std::make_unique<Kind>(InsnID, std::forward<Args>(args)...));
+ return *static_cast<Kind *>(OperandRenderers.back().get());
+ }
+
+ void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+/// Generates code to constrain the operands of an output instruction to the
+/// register classes specified by the definition of that instruction.
+class ConstrainOperandsToDefinitionAction : public MatchAction {
+ unsigned InsnID;
+
+public:
+ ConstrainOperandsToDefinitionAction(unsigned InsnID) : InsnID(InsnID) {}
+
+ void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override {
+ Table << MatchTable::Opcode("GIR_ConstrainSelectedInstOperands")
+ << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
+ << MatchTable::LineBreak;
+ }
+};
+
+/// Generates code to constrain the specified operand of an output instruction
+/// to the specified register class.
+class ConstrainOperandToRegClassAction : public MatchAction {
+ unsigned InsnID;
+ unsigned OpIdx;
+ const CodeGenRegisterClass &RC;
+
+public:
+ ConstrainOperandToRegClassAction(unsigned InsnID, unsigned OpIdx,
+ const CodeGenRegisterClass &RC)
+ : InsnID(InsnID), OpIdx(OpIdx), RC(RC) {}
+
+ void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+/// Generates code to create a temporary register which can be used to chain
+/// instructions together.
+class MakeTempRegisterAction : public MatchAction {
+private:
+ LLTCodeGen Ty;
+ unsigned TempRegID;
+
+public:
+ MakeTempRegisterAction(const LLTCodeGen &Ty, unsigned TempRegID)
+ : Ty(Ty), TempRegID(TempRegID) {
+ KnownTypes.insert(Ty);
+ }
+
+ void emitActionOpcodes(MatchTable &Table, RuleMatcher &Rule) const override;
+};
+
+} // namespace gi
+} // namespace llvm
+
+#endif
diff --git a/llvm/utils/TableGen/GlobalISelMatchTableExecutorEmitter.cpp b/llvm/utils/TableGen/GlobalISelMatchTableExecutorEmitter.cpp
new file mode 100644
index 000000000000..8dc422b140a5
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISelMatchTableExecutorEmitter.cpp
@@ -0,0 +1,267 @@
+//===- GlobalISelMatchTableExecutorEmitter.cpp ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "GlobalISelMatchTableExecutorEmitter.h"
+#include "GlobalISelMatchTable.h"
+
+using namespace llvm;
+using namespace llvm::gi;
+
+void GlobalISelMatchTableExecutorEmitter::emitSubtargetFeatureBitsetImpl(
+ raw_ostream &OS, ArrayRef<RuleMatcher> Rules) {
+ SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(SubtargetFeatures,
+ OS);
+
+ // Separate subtarget features by how often they must be recomputed.
+ SubtargetFeatureInfoMap ModuleFeatures;
+ std::copy_if(SubtargetFeatures.begin(), SubtargetFeatures.end(),
+ std::inserter(ModuleFeatures, ModuleFeatures.end()),
+ [](const SubtargetFeatureInfoMap::value_type &X) {
+ return !X.second.mustRecomputePerFunction();
+ });
+ SubtargetFeatureInfoMap FunctionFeatures;
+ std::copy_if(SubtargetFeatures.begin(), SubtargetFeatures.end(),
+ std::inserter(FunctionFeatures, FunctionFeatures.end()),
+ [](const SubtargetFeatureInfoMap::value_type &X) {
+ return X.second.mustRecomputePerFunction();
+ });
+
+ SubtargetFeatureInfo::emitComputeAvailableFeatures(
+ getTarget().getName(), getClassName(), "computeAvailableModuleFeatures",
+ ModuleFeatures, OS);
+
+ OS << "void " << getClassName()
+ << "::setupGeneratedPerFunctionState(MachineFunction &MF) {\n"
+ " AvailableFunctionFeatures = computeAvailableFunctionFeatures("
+ "(const "
+ << getTarget().getName()
+ << "Subtarget *)&MF.getSubtarget(), &MF);\n"
+ "}\n";
+
+ SubtargetFeatureInfo::emitComputeAvailableFeatures(
+ getTarget().getName(), getClassName(), "computeAvailableFunctionFeatures",
+ FunctionFeatures, OS, "const MachineFunction *MF");
+
+ // Emit a table containing the PredicateBitsets objects needed by the matcher
+ // and an enum for the matcher to reference them with.
+ std::vector<std::vector<Record *>> FeatureBitsets;
+ FeatureBitsets.reserve(Rules.size());
+ for (auto &Rule : Rules)
+ FeatureBitsets.push_back(Rule.getRequiredFeatures());
+ llvm::sort(FeatureBitsets, [&](const std::vector<Record *> &A,
+ const std::vector<Record *> &B) {
+ if (A.size() < B.size())
+ return true;
+ if (A.size() > B.size())
+ return false;
+ for (auto [First, Second] : zip(A, B)) {
+ if (First->getName() < Second->getName())
+ return true;
+ if (First->getName() > Second->getName())
+ return false;
+ }
+ return false;
+ });
+ FeatureBitsets.erase(
+ std::unique(FeatureBitsets.begin(), FeatureBitsets.end()),
+ FeatureBitsets.end());
+ OS << "// Feature bitsets.\n"
+ << "enum {\n"
+ << " GIFBS_Invalid,\n";
+ for (const auto &FeatureBitset : FeatureBitsets) {
+ if (FeatureBitset.empty())
+ continue;
+ OS << " " << getNameForFeatureBitset(FeatureBitset) << ",\n";
+ }
+ OS << "};\n"
+ << "const static PredicateBitset FeatureBitsets[] {\n"
+ << " {}, // GIFBS_Invalid\n";
+ for (const auto &FeatureBitset : FeatureBitsets) {
+ if (FeatureBitset.empty())
+ continue;
+ OS << " {";
+ for (const auto &Feature : FeatureBitset) {
+ const auto &I = SubtargetFeatures.find(Feature);
+ assert(I != SubtargetFeatures.end() && "Didn't import predicate?");
+ OS << I->second.getEnumBitName() << ", ";
+ }
+ OS << "},\n";
+ }
+ OS << "};\n\n";
+}
+
+void GlobalISelMatchTableExecutorEmitter::emitComplexPredicates(
+ raw_ostream &OS, ArrayRef<Record *> ComplexOperandMatchers) {
+ // Emit complex predicate table and an enum to reference them with.
+ OS << "// ComplexPattern predicates.\n"
+ << "enum {\n"
+ << " GICP_Invalid,\n";
+ for (const auto &Record : ComplexOperandMatchers)
+ OS << " GICP_" << Record->getName() << ",\n";
+ OS << "};\n"
+ << "// See constructor for table contents\n\n";
+
+ OS << getClassName() << "::ComplexMatcherMemFn\n"
+ << getClassName() << "::ComplexPredicateFns[] = {\n"
+ << " nullptr, // GICP_Invalid\n";
+ for (const auto &Record : ComplexOperandMatchers)
+ OS << " &" << getClassName()
+ << "::" << Record->getValueAsString("MatcherFn") << ", // "
+ << Record->getName() << "\n";
+ OS << "};\n\n";
+}
+
+void GlobalISelMatchTableExecutorEmitter::emitCustomOperandRenderers(
+ raw_ostream &OS, ArrayRef<StringRef> CustomOperandRenderers) {
+ OS << "// Custom renderers.\n"
+ << "enum {\n"
+ << " GICR_Invalid,\n";
+ for (const auto &Fn : CustomOperandRenderers)
+ OS << " GICR_" << Fn << ",\n";
+ OS << "};\n";
+
+ OS << getClassName() << "::CustomRendererFn\n"
+ << getClassName() << "::CustomRenderers[] = {\n"
+ << " nullptr, // GICR_Invalid\n";
+ for (const auto &Fn : CustomOperandRenderers)
+ OS << " &" << getClassName() << "::" << Fn << ",\n";
+ OS << "};\n\n";
+}
+
+void GlobalISelMatchTableExecutorEmitter::emitTypeObjects(
+ raw_ostream &OS, ArrayRef<LLTCodeGen> TypeObjects) {
+ OS << "// LLT Objects.\n"
+ << "enum {\n";
+ for (const auto &TypeObject : TypeObjects) {
+ OS << " ";
+ TypeObject.emitCxxEnumValue(OS);
+ OS << ",\n";
+ }
+ OS << "};\n"
+ << "const static size_t NumTypeObjects = " << TypeObjects.size() << ";\n"
+ << "const static LLT TypeObjects[] = {\n";
+ for (const auto &TypeObject : TypeObjects) {
+ OS << " ";
+ TypeObject.emitCxxConstructorCall(OS);
+ OS << ",\n";
+ }
+ OS << "};\n\n";
+}
+
+void GlobalISelMatchTableExecutorEmitter::emitMatchTable(
+ raw_ostream &OS, const MatchTable &Table) {
+ OS << "const int64_t *" << getClassName() << "::getMatchTable() const {\n";
+ Table.emitDeclaration(OS);
+ OS << " return ";
+ Table.emitUse(OS);
+ OS << ";\n}\n";
+}
+
+void GlobalISelMatchTableExecutorEmitter::emitExecutorImpl(
+ raw_ostream &OS, const MatchTable &Table, ArrayRef<LLTCodeGen> TypeObjects,
+ ArrayRef<RuleMatcher> Rules, ArrayRef<Record *> ComplexOperandMatchers,
+ ArrayRef<StringRef> CustomOperandRenderers, StringRef IfDefName) {
+ OS << "#ifdef " << IfDefName << "\n";
+ emitTypeObjects(OS, TypeObjects);
+ emitSubtargetFeatureBitsetImpl(OS, Rules);
+ emitComplexPredicates(OS, ComplexOperandMatchers);
+ emitMIPredicateFns(OS);
+ emitI64ImmPredicateFns(OS);
+ emitAPFloatImmPredicateFns(OS);
+ emitAPIntImmPredicateFns(OS);
+ emitTestSimplePredicate(OS);
+ emitCustomOperandRenderers(OS, CustomOperandRenderers);
+ emitAdditionalImpl(OS);
+ emitRunCustomAction(OS);
+ emitMatchTable(OS, Table);
+ OS << "#endif // ifdef " << IfDefName << "\n\n";
+}
+
+void GlobalISelMatchTableExecutorEmitter::emitPredicateBitset(
+ raw_ostream &OS, StringRef IfDefName) {
+ OS << "#ifdef " << IfDefName << "\n"
+ << "const unsigned MAX_SUBTARGET_PREDICATES = " << SubtargetFeatures.size()
+ << ";\n"
+ << "using PredicateBitset = "
+ "llvm::PredicateBitsetImpl<MAX_SUBTARGET_PREDICATES>;\n"
+ << "#endif // ifdef " << IfDefName << "\n\n";
+}
+
+void GlobalISelMatchTableExecutorEmitter::emitTemporariesDecl(
+ raw_ostream &OS, StringRef IfDefName) {
+ OS << "#ifdef " << IfDefName << "\n"
+ << " mutable MatcherState State;\n"
+ << " typedef "
+ "ComplexRendererFns("
+ << getClassName() << "::*ComplexMatcherMemFn)(MachineOperand &) const;\n"
+
+ << " typedef void(" << getClassName()
+ << "::*CustomRendererFn)(MachineInstrBuilder &, const "
+ "MachineInstr &, int) "
+ "const;\n"
+ << " const ExecInfoTy<PredicateBitset, ComplexMatcherMemFn, "
+ "CustomRendererFn> "
+ "ExecInfo;\n"
+ << " static " << getClassName()
+ << "::ComplexMatcherMemFn ComplexPredicateFns[];\n"
+ << " static " << getClassName()
+ << "::CustomRendererFn CustomRenderers[];\n"
+ << " bool testImmPredicate_I64(unsigned PredicateID, int64_t Imm) const "
+ "override;\n"
+ << " bool testImmPredicate_APInt(unsigned PredicateID, const APInt &Imm) "
+ "const override;\n"
+ << " bool testImmPredicate_APFloat(unsigned PredicateID, const APFloat "
+ "&Imm) const override;\n"
+ << " const int64_t *getMatchTable() const override;\n"
+ << " bool testMIPredicate_MI(unsigned PredicateID, const MachineInstr &MI"
+ ", const MatcherState &State) "
+ "const override;\n"
+ << " bool testSimplePredicate(unsigned PredicateID) const override;\n"
+ << " void runCustomAction(unsigned FnID, const MatcherState &State) "
+ "const override;\n";
+ emitAdditionalTemporariesDecl(OS, " ");
+ OS << "#endif // ifdef " << IfDefName << "\n\n";
+}
+
+void GlobalISelMatchTableExecutorEmitter::emitTemporariesInit(
+ raw_ostream &OS, unsigned MaxTemporaries, StringRef IfDefName) {
+ OS << "#ifdef " << IfDefName << "\n"
+ << ", State(" << MaxTemporaries << "),\n"
+ << "ExecInfo(TypeObjects, NumTypeObjects, FeatureBitsets"
+ << ", ComplexPredicateFns, CustomRenderers)\n"
+ << "#endif // ifdef " << IfDefName << "\n\n";
+
+ emitAdditionalTemporariesInit(OS);
+}
+
+void GlobalISelMatchTableExecutorEmitter::emitPredicatesDecl(
+ raw_ostream &OS, StringRef IfDefName) {
+ OS << "#ifdef " << IfDefName << "\n"
+ << "PredicateBitset AvailableModuleFeatures;\n"
+ << "mutable PredicateBitset AvailableFunctionFeatures;\n"
+ << "PredicateBitset getAvailableFeatures() const {\n"
+ << " return AvailableModuleFeatures | AvailableFunctionFeatures;\n"
+ << "}\n"
+ << "PredicateBitset\n"
+ << "computeAvailableModuleFeatures(const " << getTarget().getName()
+ << "Subtarget *Subtarget) const;\n"
+ << "PredicateBitset\n"
+ << "computeAvailableFunctionFeatures(const " << getTarget().getName()
+ << "Subtarget *Subtarget,\n"
+ << " const MachineFunction *MF) const;\n"
+ << "void setupGeneratedPerFunctionState(MachineFunction &MF) override;\n"
+ << "#endif // ifdef " << IfDefName << "\n";
+}
+
+void GlobalISelMatchTableExecutorEmitter::emitPredicatesInit(
+ raw_ostream &OS, StringRef IfDefName) {
+ OS << "#ifdef " << IfDefName << "\n"
+ << "AvailableModuleFeatures(computeAvailableModuleFeatures(&STI)),\n"
+ << "AvailableFunctionFeatures()\n"
+ << "#endif // ifdef " << IfDefName << "\n";
+}
diff --git a/llvm/utils/TableGen/GlobalISelMatchTableExecutorEmitter.h b/llvm/utils/TableGen/GlobalISelMatchTableExecutorEmitter.h
new file mode 100644
index 000000000000..d526e08a96e3
--- /dev/null
+++ b/llvm/utils/TableGen/GlobalISelMatchTableExecutorEmitter.h
@@ -0,0 +1,228 @@
+//===- GlobalISelMatchTableExecutorEmitter.h ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains common code related to emitting
+/// GIMatchTableExecutor-derived classes.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_GLOBALISELMATCHTABLEEXECUTOREMITTER_H
+#define LLVM_UTILS_TABLEGEN_GLOBALISELMATCHTABLEEXECUTOREMITTER_H
+
+#include "SubtargetFeatureInfo.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include <functional>
+#include <vector>
+
+namespace llvm {
+class CodeGenTarget;
+
+namespace gi {
+class RuleMatcher;
+class LLTCodeGen;
+class MatchTable;
+} // namespace gi
+
+/// Abstract base class for TableGen backends that emit a
+/// `GIMatchTableExecutor`-derived class.
+class GlobalISelMatchTableExecutorEmitter {
+ /// Emits logic to check features required by \p Rules using the
+
+ /// SubtargetFeatures map.
+ void emitSubtargetFeatureBitsetImpl(raw_ostream &OS,
+ ArrayRef<gi::RuleMatcher> Rules);
+
+ /// Emits an enum + an array that stores references to
+ /// \p ComplexOperandMatchers.
+ void emitComplexPredicates(raw_ostream &OS,
+ ArrayRef<Record *> ComplexOperandMatchers);
+
+ /// Emits an enum + an array that stores references to
+ /// \p CustomOperandRenderers.
+ void emitCustomOperandRenderers(raw_ostream &OS,
+ ArrayRef<StringRef> CustomOperandRenderers);
+
+ /// Emits an enum + an array to reference \p TypeObjects (LLTs) in the match
+ /// table.
+ void emitTypeObjects(raw_ostream &OS, ArrayRef<gi::LLTCodeGen> TypeObjects);
+
+ /// Emits the getMatchTable function which contains all of the match table's
+ /// opcodes.
+ void emitMatchTable(raw_ostream &OS, const gi::MatchTable &Table);
+
+ /// Helper function to emit `test` functions for the executor. This emits both
+ /// an enum to reference predicates in the MatchTable, and a function to
+ /// switch over the enum & execute the predicate's C++ code.
+ ///
+ /// \tparam PredicateObject An object representing a predicate to emit.
+ /// \param OS Output stream
+ /// \param TypeIdentifier Identifier used for the type of the predicate,
+ /// e.g. `MI` for MachineInstrs.
+ /// \param ArgType Full type of the argument, e.g. `const MachineInstr &`
+ /// \param ArgName Name of the argument, e.g. `MI` for MachineInstrs.
+ /// \param AdditionalArgs Optional additional argument declarations.
+ /// \param AdditionalDeclarations Optional declarations to write at the start
+ /// of the function, before switching over the predicates enum.
+ /// \param Predicates Predicates to emit.
+ /// \param GetPredEnumName Returns an enum name for a given predicate.
+ /// \param GetPredCode Returns the C++ code of a given predicate.
+ /// \param Comment Optional comment for the enum declaration.
+ template <typename PredicateObject>
+ void emitCxxPredicateFns(
+ raw_ostream &OS, StringRef TypeIdentifier, StringRef ArgType,
+ StringRef ArgName, StringRef AdditionalArgs,
+ StringRef AdditionalDeclarations, ArrayRef<PredicateObject> Predicates,
+ std::function<StringRef(PredicateObject)> GetPredEnumName,
+ std::function<StringRef(PredicateObject)> GetPredCode,
+ StringRef Comment) {
+ if (!Comment.empty())
+ OS << "// " << Comment << "\n";
+ if (!Predicates.empty()) {
+ OS << "enum {\n";
+ StringRef EnumeratorSeparator = " = GICXXPred_Invalid + 1,\n";
+ for (const auto &Pred : Predicates) {
+ OS << " GICXXPred_" << TypeIdentifier << "_Predicate_"
+ << GetPredEnumName(Pred) << EnumeratorSeparator;
+ EnumeratorSeparator = ",\n";
+ }
+ OS << "};\n";
+ }
+
+ OS << "bool " << getClassName() << "::test" << ArgName << "Predicate_"
+ << TypeIdentifier << "(unsigned PredicateID, " << ArgType << " "
+ << ArgName << AdditionalArgs << ") const {\n"
+ << AdditionalDeclarations;
+ if (!AdditionalDeclarations.empty())
+ OS << "\n";
+ if (!Predicates.empty()) {
+ OS << " switch (PredicateID) {\n";
+ for (const auto &Pred : Predicates) {
+ const auto Code = GetPredCode(Pred);
+ OS << " case GICXXPred_" << TypeIdentifier << "_Predicate_"
+ << GetPredEnumName(Pred) << ": {\n"
+ << " " << Code << "\n";
+ if (!StringRef(Code).ltrim().startswith("return")) {
+ OS << " llvm_unreachable(\"" << GetPredEnumName(Pred)
+ << " should have returned\");\n";
+ }
+ OS << " }\n";
+ }
+ OS << " }\n";
+ }
+ OS << " llvm_unreachable(\"Unknown predicate\");\n"
+ << " return false;\n"
+ << "}\n";
+ }
+
+protected:
+ /// Emits `testMIPredicate_MI`.
+ /// \tparam PredicateObject An object representing a predicate to emit.
+ /// \param OS Output stream
+ /// \param AdditionalDecls Additional C++ variable declarations.
+ /// \param Predicates Predicates to emit.
+ /// \param GetPredEnumName Returns an enum name for a given predicate.
+ /// \param GetPredCode Returns the C++ code of a given predicate.
+ /// \param Comment Optional comment for the enum declaration.
+ template <typename PredicateObject>
+ void emitMIPredicateFnsImpl(
+ raw_ostream &OS, StringRef AdditionalDecls,
+ ArrayRef<PredicateObject> Predicates,
+ std::function<StringRef(PredicateObject)> GetPredEnumName,
+ std::function<StringRef(PredicateObject)> GetPredCode,
+ StringRef Comment = "") {
+ return emitCxxPredicateFns(
+ OS, "MI", "const MachineInstr &", "MI", ", const MatcherState &State",
+ AdditionalDecls, Predicates, GetPredEnumName, GetPredCode, Comment);
+ }
+
+ /// Helper function to emit the following executor functions:
+ /// * testImmPredicate_I64 (TypeIdentifier=I64)
+ /// * testImmPredicate_APInt (TypeIdentifier=APInt)
+ /// * testImmPredicate_APFloat (TypeIdentifier=APFloat)
+ ///
+ /// \tparam PredicateObject An object representing a predicate to emit.
+ /// \param OS Output stream
+ /// \param TypeIdentifier Identifier used for the type of the predicate
+ /// \param ArgType Full type of the argument
+ /// \param Predicates Predicates to emit.
+ /// \param GetPredEnumName Returns an enum name for a given predicate.
+ /// \param GetPredCode Returns the C++ code of a given predicate.
+ /// \param Comment Optional comment for the enum declaration.
+ template <typename PredicateObject>
+ void emitImmPredicateFnsImpl(
+ raw_ostream &OS, StringRef TypeIdentifier, StringRef ArgType,
+ ArrayRef<PredicateObject> Predicates,
+ std::function<StringRef(PredicateObject)> GetPredEnumName,
+ std::function<StringRef(PredicateObject)> GetPredCode,
+ StringRef Comment = "") {
+ return emitCxxPredicateFns(OS, TypeIdentifier, ArgType, "Imm", "", "",
+ Predicates, GetPredEnumName, GetPredCode,
+ Comment);
+ }
+
+ GlobalISelMatchTableExecutorEmitter() = default;
+
+public:
+ virtual ~GlobalISelMatchTableExecutorEmitter() = default;
+
+ virtual const CodeGenTarget &getTarget() const = 0;
+
+ /// \returns the name of the class being emitted including any prefixes, e.g.
+ /// `AMDGPUInstructionSelector`.
+ virtual StringRef getClassName() const = 0;
+
+ /// Emit additional content in emitExecutorImpl
+ virtual void emitAdditionalImpl(raw_ostream &OS) {}
+
+ /// Emit additional content in emitTemporariesDecl.
+ virtual void emitAdditionalTemporariesDecl(raw_ostream &OS,
+ StringRef Indent) {}
+
+ /// Emit additional content in emitTemporariesInit.
+ virtual void emitAdditionalTemporariesInit(raw_ostream &OS) {}
+
+ /// Emit the `testMIPredicate_MI` function.
+ /// Note: `emitMIPredicateFnsImpl` can be used to do most of the work.
+ virtual void emitMIPredicateFns(raw_ostream &OS) = 0;
+
+ /// Emit the `testImmPredicate_I64` function.
+ /// Note: `emitImmPredicateFnsImpl` can be used to do most of the work.
+ virtual void emitI64ImmPredicateFns(raw_ostream &OS) = 0;
+
+ /// Emit the `testImmPredicate_APFloat` function.
+ /// Note: `emitImmPredicateFnsImpl` can be used to do most of the work.
+ virtual void emitAPFloatImmPredicateFns(raw_ostream &OS) = 0;
+
+ /// Emit the `testImmPredicate_APInt` function.
+ /// Note: `emitImmPredicateFnsImpl` can be used to do most of the work.
+ virtual void emitAPIntImmPredicateFns(raw_ostream &OS) = 0;
+ virtual void emitTestSimplePredicate(raw_ostream &OS) = 0;
+ virtual void emitRunCustomAction(raw_ostream &OS) = 0;
+
+ void emitExecutorImpl(raw_ostream &OS, const gi::MatchTable &Table,
+ ArrayRef<gi::LLTCodeGen> TypeObjects,
+ ArrayRef<gi::RuleMatcher> Rules,
+ ArrayRef<Record *> ComplexOperandMatchers,
+ ArrayRef<StringRef> CustomOperandRenderers,
+ StringRef IfDefName);
+ void emitPredicateBitset(raw_ostream &OS, StringRef IfDefName);
+ void emitTemporariesDecl(raw_ostream &OS, StringRef IfDefName);
+ void emitTemporariesInit(raw_ostream &OS, unsigned MaxTemporaries,
+ StringRef IfDefName);
+ void emitPredicatesDecl(raw_ostream &OS, StringRef IfDefName);
+ void emitPredicatesInit(raw_ostream &OS, StringRef IfDefName);
+
+ // Map of predicates to their subtarget features.
+ SubtargetFeatureInfoMap SubtargetFeatures;
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/utils/TableGen/InfoByHwMode.cpp b/llvm/utils/TableGen/InfoByHwMode.cpp
index 73c4fbf0a5eb..4e9136e936af 100644
--- a/llvm/utils/TableGen/InfoByHwMode.cpp
+++ b/llvm/utils/TableGen/InfoByHwMode.cpp
@@ -17,7 +17,7 @@
#include "llvm/ADT/Twine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-
+#include "llvm/TableGen/Record.h"
#include <string>
using namespace llvm;
@@ -65,8 +65,8 @@ MVT &ValueTypeByHwMode::getOrCreateTypeForMode(unsigned Mode, MVT Type) {
return F->second;
// If Mode is not in the map, look up the default mode. If it exists,
// make a copy of it for Mode and return it.
- auto D = Map.find(DefaultMode);
- if (D != Map.end())
+ auto D = Map.begin();
+ if (D != Map.end() && D->first == DefaultMode)
return Map.insert(std::make_pair(Mode, D->second)).first->second;
// If default mode is not present either, use provided Type.
return Map.insert(std::make_pair(Mode, Type)).first->second;
diff --git a/llvm/utils/TableGen/InfoByHwMode.h b/llvm/utils/TableGen/InfoByHwMode.h
index 44927d0bf0df..b8a6645baca5 100644
--- a/llvm/utils/TableGen/InfoByHwMode.h
+++ b/llvm/utils/TableGen/InfoByHwMode.h
@@ -16,10 +16,16 @@
#include "CodeGenHwModes.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/Support/MachineValueType.h"
-
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <limits>
#include <map>
#include <string>
+#include <tuple>
+#include <utility>
namespace llvm {
@@ -38,18 +44,44 @@ template <typename InfoT>
void union_modes(const InfoByHwMode<InfoT> &A,
const InfoByHwMode<InfoT> &B,
SmallVectorImpl<unsigned> &Modes) {
- SmallSet<unsigned, 4> U;
- for (const auto &P : A)
- U.insert(P.first);
- for (const auto &P : B)
- U.insert(P.first);
- // Make sure that the default mode is last on the list.
+ auto AI = A.begin();
+ auto BI = B.begin();
+
+ // Skip default mode, but remember if we had one.
bool HasDefault = false;
- for (unsigned M : U)
- if (M != DefaultMode)
- Modes.push_back(M);
- else
- HasDefault = true;
+ if (AI != A.end() && AI->first == DefaultMode) {
+ HasDefault = true;
+ ++AI;
+ }
+ if (BI != B.end() && BI->first == DefaultMode) {
+ HasDefault = true;
+ ++BI;
+ }
+
+ while (AI != A.end()) {
+ // If we're done with B, finish A.
+ if (BI == B.end()) {
+ for (; AI != A.end(); ++AI)
+ Modes.push_back(AI->first);
+ break;
+ }
+
+ if (BI->first < AI->first) {
+ Modes.push_back(BI->first);
+ ++BI;
+ } else {
+ Modes.push_back(AI->first);
+ if (AI->first == BI->first)
+ ++BI;
+ ++AI;
+ }
+ }
+
+ // Finish B.
+ for (; BI != B.end(); ++BI)
+ Modes.push_back(BI->first);
+
+ // Make sure that the default mode is last on the list.
if (HasDefault)
Modes.push_back(DefaultMode);
}
@@ -78,20 +110,27 @@ struct InfoByHwMode {
LLVM_ATTRIBUTE_ALWAYS_INLINE
bool hasMode(unsigned M) const { return Map.find(M) != Map.end(); }
LLVM_ATTRIBUTE_ALWAYS_INLINE
- bool hasDefault() const { return hasMode(DefaultMode); }
+ bool hasDefault() const {
+ return !Map.empty() && Map.begin()->first == DefaultMode;
+ }
InfoT &get(unsigned Mode) {
- if (!hasMode(Mode)) {
- assert(hasMode(DefaultMode));
- Map.insert({Mode, Map.at(DefaultMode)});
- }
- return Map.at(Mode);
+ auto F = Map.find(Mode);
+ if (F != Map.end())
+ return F->second;
+
+ // Copy and insert the default mode which should be first.
+ assert(hasDefault());
+ auto P = Map.insert({Mode, Map.begin()->second});
+ return P.first->second;
}
const InfoT &get(unsigned Mode) const {
auto F = Map.find(Mode);
- if (Mode != DefaultMode && F == Map.end())
- F = Map.find(DefaultMode);
- assert(F != Map.end());
+ if (F != Map.end())
+ return F->second;
+ // Get the default mode which should be first.
+ F = Map.begin();
+ assert(F != Map.end() && F->first == DefaultMode);
return F->second;
}
@@ -100,7 +139,7 @@ struct InfoByHwMode {
return Map.size() == 1 && Map.begin()->first == DefaultMode;
}
LLVM_ATTRIBUTE_ALWAYS_INLINE
- InfoT getSimple() const {
+ const InfoT &getSimple() const {
assert(isSimple());
return Map.begin()->second;
}
diff --git a/llvm/utils/TableGen/InstrDocsEmitter.cpp b/llvm/utils/TableGen/InstrDocsEmitter.cpp
index bc391227edd1..616e7b589288 100644
--- a/llvm/utils/TableGen/InstrDocsEmitter.cpp
+++ b/llvm/utils/TableGen/InstrDocsEmitter.cpp
@@ -21,25 +21,24 @@
#include "CodeGenDAGPatterns.h"
#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
-#include "TableGenBackends.h"
#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
#include <string>
#include <vector>
using namespace llvm;
-namespace llvm {
-
-void writeTitle(StringRef Str, raw_ostream &OS, char Kind = '-') {
- OS << std::string(Str.size(), Kind) << "\n" << Str << "\n"
+static void writeTitle(StringRef Str, raw_ostream &OS, char Kind = '-') {
+ OS << std::string(Str.size(), Kind) << "\n"
+ << Str << "\n"
<< std::string(Str.size(), Kind) << "\n";
}
-void writeHeader(StringRef Str, raw_ostream &OS, char Kind = '-') {
+static void writeHeader(StringRef Str, raw_ostream &OS, char Kind = '-') {
OS << Str << "\n" << std::string(Str.size(), Kind) << "\n";
}
-std::string escapeForRST(StringRef Str) {
+static std::string escapeForRST(StringRef Str) {
std::string Result;
Result.reserve(Str.size() + 4);
for (char C : Str) {
@@ -55,7 +54,7 @@ std::string escapeForRST(StringRef Str) {
return Result;
}
-void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
+static void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
CodeGenDAGPatterns CDP(RK);
CodeGenTarget &Target = CDP.getTargetInfo();
unsigned VariantCount = Target.getAsmParserVariantCount();
@@ -216,4 +215,5 @@ void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) {
}
}
-} // end namespace llvm
+static TableGen::Emitter::Opt X("gen-instr-docs", EmitInstrDocs,
+ "Generate instruction documentation");
diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp
index 564c3ed64e26..cab9ecd4ea97 100644
--- a/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -61,7 +61,9 @@ public:
private:
void emitEnums(raw_ostream &OS);
- typedef std::map<std::vector<std::string>, unsigned> OperandInfoMapTy;
+ typedef std::vector<std::string> OperandInfoTy;
+ typedef std::vector<OperandInfoTy> OperandInfoListTy;
+ typedef std::map<OperandInfoTy, unsigned> OperandInfoMapTy;
/// The keys of this map are maps which have OpName enum values as their keys
/// and instruction operand indices as their values. The values of this map
@@ -86,9 +88,8 @@ private:
void emitFeatureVerifier(raw_ostream &OS, const CodeGenTarget &Target);
void emitRecord(const CodeGenInstruction &Inst, unsigned Num,
Record *InstrInfo,
- std::map<std::vector<Record*>, unsigned> &EL,
- const OperandInfoMapTy &OpInfo,
- raw_ostream &OS);
+ std::map<std::vector<Record *>, unsigned> &EL,
+ const OperandInfoMapTy &OperandInfo, raw_ostream &OS);
void emitOperandTypeMappings(
raw_ostream &OS, const CodeGenTarget &Target,
ArrayRef<const CodeGenInstruction *> NumberedInstructions);
@@ -108,27 +109,21 @@ private:
ArrayRef<const CodeGenInstruction *> NumberedInstructions);
// Operand information.
- void EmitOperandInfo(raw_ostream &OS, OperandInfoMapTy &OperandInfoIDs);
- std::vector<std::string> GetOperandInfo(const CodeGenInstruction &Inst);
+ unsigned CollectOperandInfo(OperandInfoListTy &OperandInfoList,
+ OperandInfoMapTy &OperandInfoMap);
+ void EmitOperandInfo(raw_ostream &OS, OperandInfoListTy &OperandInfoList);
+ OperandInfoTy GetOperandInfo(const CodeGenInstruction &Inst);
};
} // end anonymous namespace
-static void PrintDefList(const std::vector<Record*> &Uses,
- unsigned Num, raw_ostream &OS) {
- OS << "static const MCPhysReg ImplicitList" << Num << "[] = { ";
- for (auto [Idx, U] : enumerate(Uses))
- OS << (Idx ? ", " : "") << getQualifiedName(U);
- OS << " };\n";
-}
-
//===----------------------------------------------------------------------===//
// Operand Info Emission.
//===----------------------------------------------------------------------===//
-std::vector<std::string>
+InstrInfoEmitter::OperandInfoTy
InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
- std::vector<std::string> Result;
+ OperandInfoTy Result;
for (auto &Op : Inst.Operands) {
// Handle aggregate operands and normal operands the same way by expanding
@@ -215,24 +210,30 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
return Result;
}
-void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS,
- OperandInfoMapTy &OperandInfoIDs) {
- // ID #0 is for no operand info.
- unsigned OperandListNum = 0;
- OperandInfoIDs[std::vector<std::string>()] = ++OperandListNum;
-
- OS << "\n";
+unsigned
+InstrInfoEmitter::CollectOperandInfo(OperandInfoListTy &OperandInfoList,
+ OperandInfoMapTy &OperandInfoMap) {
const CodeGenTarget &Target = CDP.getTargetInfo();
+ unsigned Offset = 0;
for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
- std::vector<std::string> OperandInfo = GetOperandInfo(*Inst);
- unsigned &N = OperandInfoIDs[OperandInfo];
- if (N != 0) continue;
-
- N = ++OperandListNum;
- OS << "static const MCOperandInfo OperandInfo" << N << "[] = { ";
- for (const std::string &Info : OperandInfo)
- OS << "{ " << Info << " }, ";
- OS << "};\n";
+ OperandInfoTy OperandInfo = GetOperandInfo(*Inst);
+ if (OperandInfoMap.insert({OperandInfo, Offset}).second) {
+ OperandInfoList.push_back(OperandInfo);
+ Offset += OperandInfo.size();
+ }
+ }
+ return Offset;
+}
+
+void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS,
+ OperandInfoListTy &OperandInfoList) {
+ unsigned Offset = 0;
+ for (auto &OperandInfo : OperandInfoList) {
+ OS << " /* " << Offset << " */";
+ for (auto &Info : OperandInfo)
+ OS << " { " << Info << " },";
+ OS << '\n';
+ Offset += OperandInfo.size();
}
}
@@ -419,8 +420,7 @@ void InstrInfoEmitter::emitOperandTypeMappings(
// Size the unsigned integer offset to save space.
assert(OperandRecords.size() <= UINT32_MAX &&
"Too many operands for offset table");
- OS << ((OperandRecords.size() <= UINT16_MAX) ? " const uint16_t"
- : " const uint32_t");
+ OS << " static const " << getMinimalTypeForRange(OperandRecords.size());
OS << " Offsets[] = {\n";
for (int I = 0, E = OperandOffsets.size(); I != E; ++I) {
OS << " /* " << getInstrName(I) << " */\n";
@@ -436,7 +436,8 @@ void InstrInfoEmitter::emitOperandTypeMappings(
assert(EnumVal <= INT16_MAX &&
"Too many operand types for operand types table");
OS << "\n using namespace OpTypes;\n";
- OS << ((EnumVal <= INT8_MAX) ? " const int8_t" : " const int16_t");
+ OS << " static";
+ OS << ((EnumVal <= INT8_MAX) ? " const int8_t" : " const int16_t");
OS << " OpcodeOperandTypes[] = {\n ";
for (int I = 0, E = OperandRecords.size(), CurOffset = 0; I != E; ++I) {
// We print each Opcode's operands in its own row.
@@ -732,23 +733,19 @@ void InstrInfoEmitter::emitFeatureVerifier(raw_ostream &OS,
std::map<Record *, SubtargetFeatureInfo, LessRecordByID> SubtargetFeatures;
SubtargetFeatures.insert(All.begin(), All.end());
- OS << "#ifdef ENABLE_INSTR_PREDICATE_VERIFIER\n"
- << "#undef ENABLE_INSTR_PREDICATE_VERIFIER\n"
- << "#include <sstream>\n\n";
-
- OS << "namespace llvm {\n";
- OS << "namespace " << Target.getName() << "_MC {\n\n";
+ OS << "#if defined(ENABLE_INSTR_PREDICATE_VERIFIER) && !defined(NDEBUG)\n"
+ << "#define GET_COMPUTE_FEATURES\n"
+ << "#endif\n";
+ OS << "#ifdef GET_COMPUTE_FEATURES\n"
+ << "#undef GET_COMPUTE_FEATURES\n"
+ << "namespace llvm {\n"
+ << "namespace " << Target.getName() << "_MC {\n\n";
// Emit the subtarget feature enumeration.
SubtargetFeatureInfo::emitSubtargetFeatureBitEnumeration(SubtargetFeatures,
OS);
-
- // Emit the name table for error messages.
- OS << "#ifndef NDEBUG\n";
- SubtargetFeatureInfo::emitNameTable(SubtargetFeatures, OS);
- OS << "#endif // NDEBUG\n\n";
-
// Emit the available features compute function.
+ OS << "inline ";
SubtargetFeatureInfo::emitComputeAssemblerAvailableFeatures(
Target.getName(), "", "computeAvailableFeatures", SubtargetFeatures, OS);
@@ -779,22 +776,21 @@ void InstrInfoEmitter::emitFeatureVerifier(raw_ostream &OS,
FeatureBitsets.erase(
std::unique(FeatureBitsets.begin(), FeatureBitsets.end()),
FeatureBitsets.end());
- OS << "#ifndef NDEBUG\n"
- << "// Feature bitsets.\n"
- << "enum : " << getMinimalTypeForRange(FeatureBitsets.size()) << " {\n"
- << " CEFBS_None,\n";
+ OS << "inline FeatureBitset computeRequiredFeatures(unsigned Opcode) {\n"
+ << " enum : " << getMinimalTypeForRange(FeatureBitsets.size()) << " {\n"
+ << " CEFBS_None,\n";
for (const auto &FeatureBitset : FeatureBitsets) {
if (FeatureBitset.empty())
continue;
- OS << " " << getNameForFeatureBitset(FeatureBitset) << ",\n";
+ OS << " " << getNameForFeatureBitset(FeatureBitset) << ",\n";
}
- OS << "};\n\n"
- << "static constexpr FeatureBitset FeatureBitsets[] = {\n"
- << " {}, // CEFBS_None\n";
+ OS << " };\n\n"
+ << " static constexpr FeatureBitset FeatureBitsets[] = {\n"
+ << " {}, // CEFBS_None\n";
for (const auto &FeatureBitset : FeatureBitsets) {
if (FeatureBitset.empty())
continue;
- OS << " {";
+ OS << " {";
for (const auto &Feature : FeatureBitset) {
const auto &I = SubtargetFeatures.find(Feature);
assert(I != SubtargetFeatures.end() && "Didn't import predicate?");
@@ -802,13 +798,7 @@ void InstrInfoEmitter::emitFeatureVerifier(raw_ostream &OS,
}
OS << "},\n";
}
- OS << "};\n"
- << "#endif // NDEBUG\n\n";
-
- // Emit the predicate verifier.
- OS << "void verifyInstructionPredicates(\n"
- << " unsigned Opcode, const FeatureBitset &Features) {\n"
- << "#ifndef NDEBUG\n"
+ OS << " };\n"
<< " static " << getMinimalTypeForRange(FeatureBitsets.size())
<< " RequiredFeaturesRefs[] = {\n";
unsigned InstIdx = 0;
@@ -827,12 +817,35 @@ void InstrInfoEmitter::emitFeatureVerifier(raw_ostream &OS,
OS << ", // " << Inst->TheDef->getName() << " = " << InstIdx << "\n";
InstIdx++;
}
- OS << " };\n\n";
- OS << " assert(Opcode < " << InstIdx << ");\n";
+ OS << " };\n\n"
+ << " assert(Opcode < " << InstIdx << ");\n"
+ << " return FeatureBitsets[RequiredFeaturesRefs[Opcode]];\n"
+ << "}\n\n";
+
+ OS << "} // end namespace " << Target.getName() << "_MC\n"
+ << "} // end namespace llvm\n"
+ << "#endif // GET_COMPUTE_FEATURES\n\n";
+
+ OS << "#ifdef ENABLE_INSTR_PREDICATE_VERIFIER\n"
+ << "#undef ENABLE_INSTR_PREDICATE_VERIFIER\n"
+ << "#include <sstream>\n\n";
+
+ OS << "namespace llvm {\n";
+ OS << "namespace " << Target.getName() << "_MC {\n\n";
+
+ // Emit the name table for error messages.
+ OS << "#ifndef NDEBUG\n";
+ SubtargetFeatureInfo::emitNameTable(SubtargetFeatures, OS);
+ OS << "#endif // NDEBUG\n\n";
+
+ // Emit the predicate verifier.
+ OS << "void verifyInstructionPredicates(\n"
+ << " unsigned Opcode, const FeatureBitset &Features) {\n"
+ << "#ifndef NDEBUG\n";
OS << " FeatureBitset AvailableFeatures = "
"computeAvailableFeatures(Features);\n";
- OS << " const FeatureBitset &RequiredFeatures = "
- "FeatureBitsets[RequiredFeaturesRefs[Opcode]];\n";
+ OS << " FeatureBitset RequiredFeatures = "
+ << "computeRequiredFeatures(Opcode);\n";
OS << " FeatureBitset MissingFeatures =\n"
<< " (AvailableFeatures & RequiredFeatures) ^\n"
<< " RequiredFeatures;\n"
@@ -891,54 +904,90 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
emitSourceFileHeader("Target Instruction Enum Values and Descriptors", OS);
emitEnums(OS);
- OS << "#ifdef GET_INSTRINFO_MC_DESC\n";
- OS << "#undef GET_INSTRINFO_MC_DESC\n";
-
- OS << "namespace llvm {\n\n";
-
CodeGenTarget &Target = CDP.getTargetInfo();
const std::string &TargetName = std::string(Target.getName());
Record *InstrInfo = Target.getInstructionSet();
- // Keep track of all of the def lists we have emitted already.
- std::map<std::vector<Record*>, unsigned> EmittedLists;
- unsigned ListNumber = 0;
+ // Collect all of the operand info records.
+ Records.startTimer("Collect operand info");
+ OperandInfoListTy OperandInfoList;
+ OperandInfoMapTy OperandInfoMap;
+ unsigned OperandInfoSize =
+ CollectOperandInfo(OperandInfoList, OperandInfoMap);
- // Emit all of the instruction's implicit uses and defs.
- Records.startTimer("Emit uses/defs");
+ // Collect all of the instruction's implicit uses and defs.
+ Records.startTimer("Collect uses/defs");
+ std::map<std::vector<Record*>, unsigned> EmittedLists;
+ std::vector<std::vector<Record *>> ImplicitLists;
+ unsigned ImplicitListSize = 0;
for (const CodeGenInstruction *II : Target.getInstructionsByEnumValue()) {
std::vector<Record *> ImplicitOps = II->ImplicitUses;
llvm::append_range(ImplicitOps, II->ImplicitDefs);
- if (!ImplicitOps.empty()) {
- unsigned &IL = EmittedLists[ImplicitOps];
- if (!IL) {
- IL = ++ListNumber;
- PrintDefList(ImplicitOps, IL, OS);
- }
+ if (EmittedLists.insert({ImplicitOps, ImplicitListSize}).second) {
+ ImplicitLists.push_back(ImplicitOps);
+ ImplicitListSize += ImplicitOps.size();
}
}
- OperandInfoMapTy OperandInfoIDs;
+ ArrayRef<const CodeGenInstruction *> NumberedInstructions =
+ Target.getInstructionsByEnumValue();
+ OS << "#if defined(GET_INSTRINFO_MC_DESC) || "
+ "defined(GET_INSTRINFO_CTOR_DTOR)\n";
+ OS << "namespace llvm {\n\n";
- // Emit all of the operand info records.
- Records.startTimer("Emit operand info");
- EmitOperandInfo(OS, OperandInfoIDs);
+ OS << "struct " << TargetName << "InstrTable {\n";
+ OS << " MCInstrDesc Insts[" << NumberedInstructions.size() << "];\n";
+ OS << " static_assert(alignof(MCInstrDesc) >= alignof(MCOperandInfo), "
+ "\"Unwanted padding between Insts and OperandInfo\");\n";
+ OS << " MCOperandInfo OperandInfo[" << OperandInfoSize << "];\n";
+ OS << " static_assert(alignof(MCOperandInfo) >= alignof(MCPhysReg), "
+ "\"Unwanted padding between OperandInfo and ImplicitOps\");\n";
+ OS << " MCPhysReg ImplicitOps[" << std::max(ImplicitListSize, 1U) << "];\n";
+ OS << "};\n\n";
+
+ OS << "} // end namespace llvm\n";
+ OS << "#endif // defined(GET_INSTRINFO_MC_DESC) || "
+ "defined(GET_INSTRINFO_CTOR_DTOR)\n\n";
+
+ OS << "#ifdef GET_INSTRINFO_MC_DESC\n";
+ OS << "#undef GET_INSTRINFO_MC_DESC\n";
+ OS << "namespace llvm {\n\n";
// Emit all of the MCInstrDesc records in reverse ENUM ordering.
Records.startTimer("Emit InstrDesc records");
- OS << "\nextern const MCInstrDesc " << TargetName << "Insts[] = {\n";
- ArrayRef<const CodeGenInstruction*> NumberedInstructions =
- Target.getInstructionsByEnumValue();
+ OS << "static_assert(sizeof(MCOperandInfo) % sizeof(MCPhysReg) == 0);\n";
+ OS << "static constexpr unsigned " << TargetName << "ImpOpBase = sizeof "
+ << TargetName << "InstrTable::OperandInfo / (sizeof(MCPhysReg));\n\n";
+ OS << "extern const " << TargetName << "InstrTable " << TargetName
+ << "Descs = {\n {\n";
SequenceToOffsetTable<std::string> InstrNames;
unsigned Num = NumberedInstructions.size();
for (const CodeGenInstruction *Inst : reverse(NumberedInstructions)) {
// Keep a list of the instruction names.
InstrNames.add(std::string(Inst->TheDef->getName()));
// Emit the record into the table.
- emitRecord(*Inst, --Num, InstrInfo, EmittedLists, OperandInfoIDs, OS);
+ emitRecord(*Inst, --Num, InstrInfo, EmittedLists, OperandInfoMap, OS);
}
- OS << "};\n\n";
+
+ OS << " }, {\n";
+
+ // Emit all of the operand info records.
+ Records.startTimer("Emit operand info");
+ EmitOperandInfo(OS, OperandInfoList);
+
+ OS << " }, {\n";
+
+ // Emit all of the instruction's implicit uses and defs.
+ Records.startTimer("Emit uses/defs");
+ for (auto &List : ImplicitLists) {
+ OS << " /* " << EmittedLists[List] << " */";
+ for (auto &Reg : List)
+ OS << ' ' << getQualifiedName(Reg) << ',';
+ OS << '\n';
+ }
+
+ OS << " }\n};\n\n";
// Emit the array of instruction names.
Records.startTimer("Emit instruction names");
@@ -1005,7 +1054,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
Records.startTimer("Emit initialization routine");
OS << "static inline void Init" << TargetName
<< "MCInstrInfo(MCInstrInfo *II) {\n";
- OS << " II->InitMCInstrInfo(" << TargetName << "Insts, " << TargetName
+ OS << " II->InitMCInstrInfo(" << TargetName << "Descs.Insts, " << TargetName
<< "InstrNameIndices, " << TargetName << "InstrNameData, ";
if (HasDeprecationFeatures)
OS << TargetName << "InstrDeprecationFeatures, ";
@@ -1053,7 +1102,8 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
OS << "#undef GET_INSTRINFO_CTOR_DTOR\n";
OS << "namespace llvm {\n";
- OS << "extern const MCInstrDesc " << TargetName << "Insts[];\n";
+ OS << "extern const " << TargetName << "InstrTable " << TargetName
+ << "Descs;\n";
OS << "extern const unsigned " << TargetName << "InstrNameIndices[];\n";
OS << "extern const char " << TargetName << "InstrNameData[];\n";
if (HasDeprecationFeatures)
@@ -1067,7 +1117,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
"CatchRetOpcode, unsigned ReturnOpcode)\n"
<< " : TargetInstrInfo(CFSetupOpcode, CFDestroyOpcode, CatchRetOpcode, "
"ReturnOpcode) {\n"
- << " InitMCInstrInfo(" << TargetName << "Insts, " << TargetName
+ << " InitMCInstrInfo(" << TargetName << "Descs.Insts, " << TargetName
<< "InstrNameIndices, " << TargetName << "InstrNameData, ";
if (HasDeprecationFeatures)
OS << TargetName << "InstrDeprecationFeatures, ";
@@ -1101,27 +1151,34 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
emitFeatureVerifier(OS, Target);
}
-void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
- Record *InstrInfo,
- std::map<std::vector<Record*>, unsigned> &EmittedLists,
- const OperandInfoMapTy &OpInfo,
- raw_ostream &OS) {
+void InstrInfoEmitter::emitRecord(
+ const CodeGenInstruction &Inst, unsigned Num, Record *InstrInfo,
+ std::map<std::vector<Record *>, unsigned> &EmittedLists,
+ const OperandInfoMapTy &OperandInfoMap, raw_ostream &OS) {
int MinOperands = 0;
if (!Inst.Operands.empty())
// Each logical operand can be multiple MI operands.
MinOperands = Inst.Operands.back().MIOperandNo +
Inst.Operands.back().MINumOperands;
- OS << " { ";
- OS << Num << ",\t" << MinOperands << ",\t"
- << Inst.Operands.NumDefs << ",\t"
+ OS << " { ";
+ OS << Num << ",\t" << MinOperands << ",\t" << Inst.Operands.NumDefs << ",\t"
<< Inst.TheDef->getValueAsInt("Size") << ",\t"
- << SchedModels.getSchedClassIdx(Inst) << ",\t"
- << Inst.ImplicitUses.size() << ",\t"
- << Inst.ImplicitDefs.size() << ",\t0";
+ << SchedModels.getSchedClassIdx(Inst) << ",\t";
CodeGenTarget &Target = CDP.getTargetInfo();
+ // Emit the implicit use/def list...
+ OS << Inst.ImplicitUses.size() << ",\t" << Inst.ImplicitDefs.size() << ",\t";
+ std::vector<Record *> ImplicitOps = Inst.ImplicitUses;
+ llvm::append_range(ImplicitOps, Inst.ImplicitDefs);
+ OS << Target.getName() << "ImpOpBase + " << EmittedLists[ImplicitOps]
+ << ",\t";
+
+ // Emit the operand info offset.
+ OperandInfoTy OperandInfo = GetOperandInfo(Inst);
+ OS << OperandInfoMap.find(OperandInfo)->second << ",\t0";
+
// Emit all of the target independent flags...
if (Inst.isPreISelOpcode) OS << "|(1ULL<<MCID::PreISelOpcode)";
if (Inst.isPseudo) OS << "|(1ULL<<MCID::Pseudo)";
@@ -1181,22 +1238,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
}
OS << ", 0x";
OS.write_hex(Value);
- OS << "ULL, ";
-
- // Emit the implicit use/def list...
- std::vector<Record *> ImplicitOps = Inst.ImplicitUses;
- llvm::append_range(ImplicitOps, Inst.ImplicitDefs);
- if (ImplicitOps.empty())
- OS << "nullptr, ";
- else
- OS << "ImplicitList" << EmittedLists[ImplicitOps] << ", ";
-
- // Emit the operand info.
- std::vector<std::string> OperandInfo = GetOperandInfo(Inst);
- if (OperandInfo.empty())
- OS << "nullptr";
- else
- OS << "OperandInfo" << OpInfo.find(OperandInfo)->second;
+ OS << "ULL";
OS << " }, // Inst #" << Num << " = " << Inst.TheDef->getName() << "\n";
}
@@ -1245,13 +1287,12 @@ void InstrInfoEmitter::emitEnums(raw_ostream &OS) {
OS << "#endif // GET_INSTRINFO_SCHED_ENUM\n\n";
}
-namespace llvm {
-
-void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) {
+static void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) {
RK.startTimer("Analyze DAG patterns");
InstrInfoEmitter(RK).run(OS);
RK.startTimer("Emit map table");
EmitMapTable(RK, OS);
}
-} // end namespace llvm
+static TableGen::Emitter::Opt X("gen-instr-info", EmitInstrInfo,
+ "Generate instruction descriptions");
diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp
index 946a58417594..09aad78536fe 100644
--- a/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -11,16 +11,29 @@
//===----------------------------------------------------------------------===//
#include "CodeGenIntrinsics.h"
-#include "CodeGenTarget.h"
#include "SequenceToOffsetTable.h"
-#include "TableGenBackends.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ModRef.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/StringToOffsetTable.h"
#include "llvm/TableGen/TableGenBackend.h"
#include <algorithm>
+#include <array>
+#include <cassert>
+#include <map>
+#include <optional>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
cl::OptionCategory GenIntrinsicCat("Options for -gen-intrinsic-enums");
@@ -39,6 +52,8 @@ public:
void run(raw_ostream &OS, bool Enums);
void EmitEnumInfo(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
+ void EmitArgKind(raw_ostream &OS);
+ void EmitIITInfo(raw_ostream &OS);
void EmitTargetInfo(const CodeGenIntrinsicTable &Ints, raw_ostream &OS);
void EmitIntrinsicToNameTable(const CodeGenIntrinsicTable &Ints,
raw_ostream &OS);
@@ -63,7 +78,13 @@ void IntrinsicEmitter::run(raw_ostream &OS, bool Enums) {
if (Enums) {
// Emit the enum information.
EmitEnumInfo(Ints, OS);
+
+ // Emit ArgKind for Intrinsics.h.
+ EmitArgKind(OS);
} else {
+ // Emit IIT_Info constants.
+ EmitIITInfo(OS);
+
// Emit the target metadata.
EmitTargetInfo(Ints, OS);
@@ -110,7 +131,9 @@ void IntrinsicEmitter::EmitEnumInfo(const CodeGenIntrinsicTable &Ints,
}
// Generate a complete header for target specific intrinsics.
- if (!IntrinsicPrefix.empty()) {
+ if (IntrinsicPrefix.empty()) {
+ OS << "#ifdef GET_INTRINSIC_ENUM_VALUES\n";
+ } else {
std::string UpperPrefix = StringRef(IntrinsicPrefix).upper();
OS << "#ifndef LLVM_IR_INTRINSIC_" << UpperPrefix << "_ENUMS_H\n";
OS << "#define LLVM_IR_INTRINSIC_" << UpperPrefix << "_ENUMS_H\n\n";
@@ -137,6 +160,7 @@ void IntrinsicEmitter::EmitEnumInfo(const CodeGenIntrinsicTable &Ints,
// Emit num_intrinsics into the target neutral enum.
if (IntrinsicPrefix.empty()) {
OS << " num_intrinsics = " << (Ints.size() + 1) << "\n";
+ OS << "#endif\n\n";
} else {
OS << "}; // enum\n";
OS << "} // namespace Intrinsic\n";
@@ -145,6 +169,41 @@ void IntrinsicEmitter::EmitEnumInfo(const CodeGenIntrinsicTable &Ints,
}
}
+void IntrinsicEmitter::EmitArgKind(raw_ostream &OS) {
+ if (!IntrinsicPrefix.empty())
+ return;
+ OS << "// llvm::Intrinsic::IITDescriptor::ArgKind\n";
+ OS << "#ifdef GET_INTRINSIC_ARGKIND\n";
+ if (auto RecArgKind = Records.getDef("ArgKind")) {
+ for (auto &RV : RecArgKind->getValues())
+ OS << " AK_" << RV.getName() << " = " << *RV.getValue() << ",\n";
+ } else {
+ OS << "#error \"ArgKind is not defined\"\n";
+ }
+ OS << "#endif\n\n";
+}
+
+void IntrinsicEmitter::EmitIITInfo(raw_ostream &OS) {
+ OS << "#ifdef GET_INTRINSIC_IITINFO\n";
+ std::array<StringRef, 256> RecsByNumber;
+ auto IIT_Base = Records.getAllDerivedDefinitionsIfDefined("IIT_Base");
+ for (auto Rec : IIT_Base) {
+ auto Number = Rec->getValueAsInt("Number");
+ assert(0 <= Number && Number < (int)RecsByNumber.size() &&
+ "IIT_Info.Number should be uint8_t");
+ assert(RecsByNumber[Number].empty() && "Duplicate IIT_Info.Number");
+ RecsByNumber[Number] = Rec->getName();
+ }
+ if (IIT_Base.size() > 0) {
+ for (unsigned I = 0, E = RecsByNumber.size(); I < E; ++I)
+ if (!RecsByNumber[I].empty())
+ OS << " " << RecsByNumber[I] << " = " << I << ",\n";
+ } else {
+ OS << "#error \"class IIT_Base is not defined\"\n";
+ }
+ OS << "#endif\n\n";
+}
+
void IntrinsicEmitter::EmitTargetInfo(const CodeGenIntrinsicTable &Ints,
raw_ostream &OS) {
OS << "// Target mapping\n";
@@ -191,327 +250,16 @@ void IntrinsicEmitter::EmitIntrinsicToOverloadTable(
OS << "#endif\n\n";
}
-
-// NOTE: This must be kept in synch with the copy in lib/IR/Function.cpp!
-enum IIT_Info {
- // Common values should be encoded with 0-15.
- IIT_Done = 0,
- IIT_I1 = 1,
- IIT_I8 = 2,
- IIT_I16 = 3,
- IIT_I32 = 4,
- IIT_I64 = 5,
- IIT_F16 = 6,
- IIT_F32 = 7,
- IIT_F64 = 8,
- IIT_V2 = 9,
- IIT_V4 = 10,
- IIT_V8 = 11,
- IIT_V16 = 12,
- IIT_V32 = 13,
- IIT_PTR = 14,
- IIT_ARG = 15,
-
- // Values from 16+ are only encodable with the inefficient encoding.
- IIT_V64 = 16,
- IIT_MMX = 17,
- IIT_TOKEN = 18,
- IIT_METADATA = 19,
- IIT_EMPTYSTRUCT = 20,
- IIT_STRUCT2 = 21,
- IIT_STRUCT3 = 22,
- IIT_STRUCT4 = 23,
- IIT_STRUCT5 = 24,
- IIT_EXTEND_ARG = 25,
- IIT_TRUNC_ARG = 26,
- IIT_ANYPTR = 27,
- IIT_V1 = 28,
- IIT_VARARG = 29,
- IIT_HALF_VEC_ARG = 30,
- IIT_SAME_VEC_WIDTH_ARG = 31,
- IIT_PTR_TO_ARG = 32,
- IIT_PTR_TO_ELT = 33,
- IIT_VEC_OF_ANYPTRS_TO_ELT = 34,
- IIT_I128 = 35,
- IIT_V512 = 36,
- IIT_V1024 = 37,
- IIT_STRUCT6 = 38,
- IIT_STRUCT7 = 39,
- IIT_STRUCT8 = 40,
- IIT_F128 = 41,
- IIT_VEC_ELEMENT = 42,
- IIT_SCALABLE_VEC = 43,
- IIT_SUBDIVIDE2_ARG = 44,
- IIT_SUBDIVIDE4_ARG = 45,
- IIT_VEC_OF_BITCASTS_TO_INT = 46,
- IIT_V128 = 47,
- IIT_BF16 = 48,
- IIT_STRUCT9 = 49,
- IIT_V256 = 50,
- IIT_AMX = 51,
- IIT_PPCF128 = 52,
- IIT_V3 = 53,
- IIT_EXTERNREF = 54,
- IIT_FUNCREF = 55,
- IIT_ANYPTR_TO_ELT = 56,
- IIT_I2 = 57,
- IIT_I4 = 58,
-};
-
-static void EncodeFixedValueType(MVT::SimpleValueType VT,
- std::vector<unsigned char> &Sig) {
- // clang-format off
- if (MVT(VT).isInteger()) {
- unsigned BitWidth = MVT(VT).getFixedSizeInBits();
- switch (BitWidth) {
- default: PrintFatalError("unhandled integer type width in intrinsic!");
- case 1: return Sig.push_back(IIT_I1);
- case 2: return Sig.push_back(IIT_I2);
- case 4: return Sig.push_back(IIT_I4);
- case 8: return Sig.push_back(IIT_I8);
- case 16: return Sig.push_back(IIT_I16);
- case 32: return Sig.push_back(IIT_I32);
- case 64: return Sig.push_back(IIT_I64);
- case 128: return Sig.push_back(IIT_I128);
- }
- }
-
- switch (VT) {
- default: PrintFatalError("unhandled MVT in intrinsic!");
- case MVT::f16: return Sig.push_back(IIT_F16);
- case MVT::bf16: return Sig.push_back(IIT_BF16);
- case MVT::f32: return Sig.push_back(IIT_F32);
- case MVT::f64: return Sig.push_back(IIT_F64);
- case MVT::f128: return Sig.push_back(IIT_F128);
- case MVT::ppcf128: return Sig.push_back(IIT_PPCF128);
- case MVT::token: return Sig.push_back(IIT_TOKEN);
- case MVT::Metadata: return Sig.push_back(IIT_METADATA);
- case MVT::x86mmx: return Sig.push_back(IIT_MMX);
- case MVT::x86amx: return Sig.push_back(IIT_AMX);
- // MVT::OtherVT is used to mean the empty struct type here.
- case MVT::Other: return Sig.push_back(IIT_EMPTYSTRUCT);
- // MVT::isVoid is used to represent varargs here.
- case MVT::isVoid: return Sig.push_back(IIT_VARARG);
- case MVT::externref:
- return Sig.push_back(IIT_EXTERNREF);
- case MVT::funcref:
- return Sig.push_back(IIT_FUNCREF);
- }
- // clang-format on
-}
-
-#if defined(_MSC_VER) && !defined(__clang__)
-#pragma optimize("",off) // MSVC 2015 optimizer can't deal with this function.
-#endif
-
-static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
- unsigned &NextArgCode,
- std::vector<unsigned char> &Sig,
- ArrayRef<unsigned char> Mapping) {
-
- if (R->isSubClassOf("LLVMMatchType")) {
- unsigned Number = Mapping[R->getValueAsInt("Number")];
- assert(Number < ArgCodes.size() && "Invalid matching number!");
- if (R->isSubClassOf("LLVMExtendedType"))
- Sig.push_back(IIT_EXTEND_ARG);
- else if (R->isSubClassOf("LLVMTruncatedType"))
- Sig.push_back(IIT_TRUNC_ARG);
- else if (R->isSubClassOf("LLVMHalfElementsVectorType"))
- Sig.push_back(IIT_HALF_VEC_ARG);
- else if (R->isSubClassOf("LLVMScalarOrSameVectorWidth")) {
- Sig.push_back(IIT_SAME_VEC_WIDTH_ARG);
- Sig.push_back((Number << 3) | ArgCodes[Number]);
- MVT::SimpleValueType VT = getValueType(R->getValueAsDef("ElTy"));
- EncodeFixedValueType(VT, Sig);
- return;
- }
- else if (R->isSubClassOf("LLVMPointerTo"))
- Sig.push_back(IIT_PTR_TO_ARG);
- else if (R->isSubClassOf("LLVMVectorOfAnyPointersToElt")) {
- Sig.push_back(IIT_VEC_OF_ANYPTRS_TO_ELT);
- // Encode overloaded ArgNo
- Sig.push_back(NextArgCode++);
- // Encode LLVMMatchType<Number> ArgNo
- Sig.push_back(Number);
- return;
- } else if (R->isSubClassOf("LLVMAnyPointerToElt")) {
- Sig.push_back(IIT_ANYPTR_TO_ELT);
- // Encode overloaded ArgNo
- Sig.push_back(NextArgCode++);
- // Encode LLVMMatchType<Number> ArgNo
- Sig.push_back(Number);
- return;
- } else if (R->isSubClassOf("LLVMPointerToElt"))
- Sig.push_back(IIT_PTR_TO_ELT);
- else if (R->isSubClassOf("LLVMVectorElementType"))
- Sig.push_back(IIT_VEC_ELEMENT);
- else if (R->isSubClassOf("LLVMSubdivide2VectorType"))
- Sig.push_back(IIT_SUBDIVIDE2_ARG);
- else if (R->isSubClassOf("LLVMSubdivide4VectorType"))
- Sig.push_back(IIT_SUBDIVIDE4_ARG);
- else if (R->isSubClassOf("LLVMVectorOfBitcastsToInt"))
- Sig.push_back(IIT_VEC_OF_BITCASTS_TO_INT);
- else
- Sig.push_back(IIT_ARG);
- return Sig.push_back((Number << 3) | 7 /*IITDescriptor::AK_MatchType*/);
- }
-
- MVT::SimpleValueType VT = getValueType(R->getValueAsDef("VT"));
-
- unsigned Tmp = 0;
- switch (VT) {
- default: break;
- case MVT::iPTRAny: ++Tmp; [[fallthrough]];
- case MVT::vAny: ++Tmp; [[fallthrough]];
- case MVT::fAny: ++Tmp; [[fallthrough]];
- case MVT::iAny: ++Tmp; [[fallthrough]];
- case MVT::Any: {
- // If this is an "any" valuetype, then the type is the type of the next
- // type in the list specified to getIntrinsic().
- Sig.push_back(IIT_ARG);
-
- // Figure out what arg # this is consuming, and remember what kind it was.
- assert(NextArgCode < ArgCodes.size() && ArgCodes[NextArgCode] == Tmp &&
- "Invalid or no ArgCode associated with overloaded VT!");
- unsigned ArgNo = NextArgCode++;
-
- // Encode what sort of argument it must be in the low 3 bits of the ArgNo.
- return Sig.push_back((ArgNo << 3) | Tmp);
- }
-
- case MVT::iPTR: {
- unsigned AddrSpace = 0;
- if (R->isSubClassOf("LLVMQualPointerType")) {
- AddrSpace = R->getValueAsInt("AddrSpace");
- assert(AddrSpace < 256 && "Address space exceeds 255");
- }
- if (AddrSpace) {
- Sig.push_back(IIT_ANYPTR);
- Sig.push_back(AddrSpace);
- } else {
- Sig.push_back(IIT_PTR);
- }
- return EncodeFixedType(R->getValueAsDef("ElTy"), ArgCodes, NextArgCode, Sig,
- Mapping);
- }
- }
-
- if (MVT(VT).isVector()) {
- MVT VVT = VT;
- if (VVT.isScalableVector())
- Sig.push_back(IIT_SCALABLE_VEC);
- switch (VVT.getVectorMinNumElements()) {
- default: PrintFatalError("unhandled vector type width in intrinsic!");
- case 1: Sig.push_back(IIT_V1); break;
- case 2: Sig.push_back(IIT_V2); break;
- case 3: Sig.push_back(IIT_V3); break;
- case 4: Sig.push_back(IIT_V4); break;
- case 8: Sig.push_back(IIT_V8); break;
- case 16: Sig.push_back(IIT_V16); break;
- case 32: Sig.push_back(IIT_V32); break;
- case 64: Sig.push_back(IIT_V64); break;
- case 128: Sig.push_back(IIT_V128); break;
- case 256: Sig.push_back(IIT_V256); break;
- case 512: Sig.push_back(IIT_V512); break;
- case 1024: Sig.push_back(IIT_V1024); break;
- }
-
- return EncodeFixedValueType(VVT.getVectorElementType().SimpleTy, Sig);
- }
-
- EncodeFixedValueType(VT, Sig);
-}
-
-static void UpdateArgCodes(Record *R, std::vector<unsigned char> &ArgCodes,
- unsigned int &NumInserted,
- SmallVectorImpl<unsigned char> &Mapping) {
- if (R->isSubClassOf("LLVMMatchType")) {
- if (R->isSubClassOf("LLVMVectorOfAnyPointersToElt")) {
- ArgCodes.push_back(3 /*vAny*/);
- ++NumInserted;
- } else if (R->isSubClassOf("LLVMAnyPointerToElt")) {
- ArgCodes.push_back(4 /*iPTRAny*/);
- ++NumInserted;
- }
- return;
- }
-
- unsigned Tmp = 0;
- switch (getValueType(R->getValueAsDef("VT"))) {
- default: break;
- case MVT::iPTR:
- UpdateArgCodes(R->getValueAsDef("ElTy"), ArgCodes, NumInserted, Mapping);
- break;
- case MVT::iPTRAny:
- ++Tmp;
- [[fallthrough]];
- case MVT::vAny:
- ++Tmp;
- [[fallthrough]];
- case MVT::fAny:
- ++Tmp;
- [[fallthrough]];
- case MVT::iAny:
- ++Tmp;
- [[fallthrough]];
- case MVT::Any:
- unsigned OriginalIdx = ArgCodes.size() - NumInserted;
- assert(OriginalIdx >= Mapping.size());
- Mapping.resize(OriginalIdx+1);
- Mapping[OriginalIdx] = ArgCodes.size();
- ArgCodes.push_back(Tmp);
- break;
- }
-}
-
-#if defined(_MSC_VER) && !defined(__clang__)
-#pragma optimize("",on)
-#endif
-
/// ComputeFixedEncoding - If we can encode the type signature for this
/// intrinsic into 32 bits, return it. If not, return ~0U.
static void ComputeFixedEncoding(const CodeGenIntrinsic &Int,
std::vector<unsigned char> &TypeSig) {
- std::vector<unsigned char> ArgCodes;
-
- // Add codes for any overloaded result VTs.
- unsigned int NumInserted = 0;
- SmallVector<unsigned char, 8> ArgMapping;
- for (unsigned i = 0, e = Int.IS.RetVTs.size(); i != e; ++i)
- UpdateArgCodes(Int.IS.RetTypeDefs[i], ArgCodes, NumInserted, ArgMapping);
-
- // Add codes for any overloaded operand VTs.
- for (unsigned i = 0, e = Int.IS.ParamTypeDefs.size(); i != e; ++i)
- UpdateArgCodes(Int.IS.ParamTypeDefs[i], ArgCodes, NumInserted, ArgMapping);
-
- unsigned NextArgCode = 0;
- if (Int.IS.RetVTs.empty())
- TypeSig.push_back(IIT_Done);
- else if (Int.IS.RetVTs.size() == 1 &&
- Int.IS.RetVTs[0] == MVT::isVoid)
- TypeSig.push_back(IIT_Done);
- else {
- switch (Int.IS.RetVTs.size()) {
- case 1: break;
- case 2: TypeSig.push_back(IIT_STRUCT2); break;
- case 3: TypeSig.push_back(IIT_STRUCT3); break;
- case 4: TypeSig.push_back(IIT_STRUCT4); break;
- case 5: TypeSig.push_back(IIT_STRUCT5); break;
- case 6: TypeSig.push_back(IIT_STRUCT6); break;
- case 7: TypeSig.push_back(IIT_STRUCT7); break;
- case 8: TypeSig.push_back(IIT_STRUCT8); break;
- case 9: TypeSig.push_back(IIT_STRUCT9); break;
- default: llvm_unreachable("Unhandled case in struct");
+ if (auto *R = Int.TheDef->getValue("TypeSig")) {
+ for (auto &a : cast<ListInit>(R->getValue())->getValues()) {
+ for (auto &b : cast<ListInit>(a)->getValues())
+ TypeSig.push_back(cast<IntInit>(b)->getValue());
}
-
- for (unsigned i = 0, e = Int.IS.RetVTs.size(); i != e; ++i)
- EncodeFixedType(Int.IS.RetTypeDefs[i], ArgCodes, NextArgCode, TypeSig,
- ArgMapping);
}
-
- for (unsigned i = 0, e = Int.IS.ParamTypeDefs.size(); i != e; ++i)
- EncodeFixedType(Int.IS.ParamTypeDefs[i], ArgCodes, NextArgCode, TypeSig,
- ArgMapping);
}
static void printIITEntry(raw_ostream &OS, unsigned char X) {
@@ -640,6 +388,9 @@ std::optional<bool> compareFnAttributes(const CodeGenIntrinsic *L,
if (L->hasSideEffects != R->hasSideEffects)
return R->hasSideEffects;
+ if (L->isStrictFP != R->isStrictFP)
+ return R->isStrictFP;
+
// Try to order by readonly/readnone attribute.
uint32_t LK = L->ME.toIntValue();
uint32_t RK = R->ME.toIntValue();
@@ -726,6 +477,10 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
OS << " Attribute::get(C, Attribute::Alignment, "
<< Attr.Value << "),\n";
break;
+ case CodeGenIntrinsic::Dereferenceable:
+ OS << " Attribute::get(C, Attribute::Dereferenceable, "
+ << Attr.Value << "),\n";
+ break;
}
}
OS << " });\n";
@@ -770,6 +525,8 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
OS << " Attribute::get(C, Attribute::Convergent),\n";
if (Intrinsic.isSpeculatable)
OS << " Attribute::get(C, Attribute::Speculatable),\n";
+ if (Intrinsic.isStrictFP)
+ OS << " Attribute::get(C, Attribute::StrictFP),\n";
MemoryEffects ME = Intrinsic.ME;
// TODO: IntrHasSideEffects should affect not only readnone intrinsics.
@@ -842,7 +599,8 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
Intrinsic.isNoReturn || Intrinsic.isNoCallback || Intrinsic.isNoSync ||
Intrinsic.isNoFree || Intrinsic.isWillReturn || Intrinsic.isCold ||
Intrinsic.isNoDuplicate || Intrinsic.isNoMerge ||
- Intrinsic.isConvergent || Intrinsic.isSpeculatable) {
+ Intrinsic.isConvergent || Intrinsic.isSpeculatable ||
+ Intrinsic.isStrictFP) {
unsigned ID = UniqFnAttributes.find(&Intrinsic)->second;
OS << " AS[" << numAttrs++ << "] = {AttributeList::FunctionIndex, "
<< "getIntrinsicFnAttributeSet(C, " << ID << ")};\n";
@@ -952,10 +710,16 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
OS << "#endif\n\n";
}
-void llvm::EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS) {
+static void EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS) {
IntrinsicEmitter(RK).run(OS, /*Enums=*/true);
}
-void llvm::EmitIntrinsicImpl(RecordKeeper &RK, raw_ostream &OS) {
+static TableGen::Emitter::Opt X("gen-intrinsic-enums", EmitIntrinsicEnums,
+ "Generate intrinsic enums");
+
+static void EmitIntrinsicImpl(RecordKeeper &RK, raw_ostream &OS) {
IntrinsicEmitter(RK).run(OS, /*Enums=*/false);
}
+
+static TableGen::Emitter::Opt Y("gen-intrinsic-impl", EmitIntrinsicImpl,
+ "Generate intrinsic information");
diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp
index d363191bd9b8..a04680b5d91e 100644
--- a/llvm/utils/TableGen/OptParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptParserEmitter.cpp
@@ -64,7 +64,7 @@ class MarshallingInfo {
public:
static constexpr const char *MacroName = "OPTION_WITH_MARSHALLING";
const Record &R;
- bool ShouldAlwaysEmit;
+ bool ShouldAlwaysEmit = false;
StringRef MacroPrefix;
StringRef KeyPath;
StringRef DefaultValue;
@@ -212,8 +212,7 @@ static MarshallingInfo createMarshallingInfo(const Record &R) {
/// OptParserEmitter - This tablegen backend takes an input .td file
/// describing a list of options and emits a data structure for parsing and
/// working with those options when given an input command line.
-namespace llvm {
-void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
+static void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
// Get the option groups and options.
const std::vector<Record*> &Groups =
Records.getAllDerivedDefinitions("OptionGroup");
@@ -499,4 +498,6 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) {
OS << "\n";
}
-} // end namespace llvm
+
+static TableGen::Emitter::Opt X("gen-opt-parser-defs", EmitOptParser,
+ "Generate option definitions");
diff --git a/llvm/utils/TableGen/OptRSTEmitter.cpp b/llvm/utils/TableGen/OptRSTEmitter.cpp
index 03c7326e817a..87e755d943a1 100644
--- a/llvm/utils/TableGen/OptRSTEmitter.cpp
+++ b/llvm/utils/TableGen/OptRSTEmitter.cpp
@@ -10,13 +10,13 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
using namespace llvm;
/// OptParserEmitter - This tablegen backend takes an input .td file
/// describing a list of options and emits a RST man page.
-namespace llvm {
-void EmitOptRST(RecordKeeper &Records, raw_ostream &OS) {
+static void EmitOptRST(RecordKeeper &Records, raw_ostream &OS) {
llvm::StringMap<std::vector<Record *>> OptionsByGroup;
std::vector<Record *> OptionsWithoutGroup;
@@ -102,4 +102,6 @@ void EmitOptRST(RecordKeeper &Records, raw_ostream &OS) {
}
}
}
-} // end namespace llvm
+
+static TableGen::Emitter::Opt X("gen-opt-rst", EmitOptRST,
+ "Generate option RST");
diff --git a/llvm/utils/TableGen/PredicateExpander.cpp b/llvm/utils/TableGen/PredicateExpander.cpp
index b129401461b5..8f96d3307ded 100644
--- a/llvm/utils/TableGen/PredicateExpander.cpp
+++ b/llvm/utils/TableGen/PredicateExpander.cpp
@@ -12,6 +12,7 @@
#include "PredicateExpander.h"
#include "CodeGenSchedule.h" // Definition of STIPredicateFunction.
+#include "llvm/TableGen/Record.h"
namespace llvm {
diff --git a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
index 6a1e1332d767..e07fb9188098 100644
--- a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
+++ b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -313,10 +313,5 @@ void PseudoLoweringEmitter::run(raw_ostream &o) {
emitLoweringEmitter(o);
}
-namespace llvm {
-
-void EmitPseudoLowering(RecordKeeper &RK, raw_ostream &OS) {
- PseudoLoweringEmitter(RK).run(OS);
-}
-
-} // End llvm namespace
+static TableGen::Emitter::OptClass<PseudoLoweringEmitter>
+ X("gen-pseudo-lowering", "Generate pseudo instruction lowering");
diff --git a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp
index fa6508cbfc69..12174fd83f56 100644
--- a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp
+++ b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp
@@ -1,4 +1,4 @@
-//===- RISCVTargetDefEmitter.cpp - Generate lists of RISCV CPUs -----------===//
+//===- RISCVTargetDefEmitter.cpp - Generate lists of RISC-V CPUs ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
@@ -11,23 +11,23 @@
//
//===----------------------------------------------------------------------===//
-#include "TableGenBackends.h"
#include "llvm/Support/RISCVISAInfo.h"
#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
using namespace llvm;
using ISAInfoTy = llvm::Expected<std::unique_ptr<RISCVISAInfo>>;
// We can generate march string from target features as what has been described
-// in RISCV ISA specification (version 20191213) 'Chapter 27. ISA Extension
+// in RISC-V ISA specification (version 20191213) 'Chapter 27. ISA Extension
// Naming Conventions'.
//
// This is almost the same as RISCVFeatures::parseFeatureBits, except that we
// get feature name from feature records instead of feature bits.
static std::string getMArch(const Record &Rec) {
std::vector<std::string> FeatureVector;
- int XLen = 32;
+ unsigned XLen = 32;
// Convert features to FeatureVector.
for (auto *Feature : Rec.getValueAsListOfDefs("Features")) {
@@ -47,12 +47,11 @@ static std::string getMArch(const Record &Rec) {
return (*ISAInfo)->toString();
}
-void llvm::EmitRISCVTargetDef(const RecordKeeper &RK, raw_ostream &OS) {
+static void EmitRISCVTargetDef(RecordKeeper &RK, raw_ostream &OS) {
OS << "#ifndef PROC\n"
<< "#define PROC(ENUM, NAME, DEFAULT_MARCH)\n"
<< "#endif\n\n";
- OS << "PROC(INVALID, {\"invalid\"}, {\"\"})\n";
// Iterate on all definition records.
for (const Record *Rec : RK.getAllDerivedDefinitions("RISCVProcessorModel")) {
std::string MArch = Rec->getValueAsString("DefaultMarch").str();
@@ -80,3 +79,6 @@ void llvm::EmitRISCVTargetDef(const RecordKeeper &RK, raw_ostream &OS) {
OS << "\n#undef TUNE_PROC\n";
}
+
+static TableGen::Emitter::Opt X("gen-riscv-target-def", EmitRISCVTargetDef,
+ "Generate the list of CPU for RISCV");
diff --git a/llvm/utils/TableGen/RegisterBankEmitter.cpp b/llvm/utils/TableGen/RegisterBankEmitter.cpp
index e6689b211a7d..2d23bf86b6ad 100644
--- a/llvm/utils/TableGen/RegisterBankEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterBankEmitter.cpp
@@ -11,15 +11,15 @@
//
//===----------------------------------------------------------------------===//
+#include "CodeGenRegisters.h"
+#include "CodeGenTarget.h"
+#include "InfoByHwMode.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/Support/Debug.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
-#include "CodeGenRegisters.h"
-#include "CodeGenTarget.h"
-
#define DEBUG_TYPE "register-bank-emitter"
using namespace llvm;
@@ -37,11 +37,11 @@ private:
RegisterClassesTy RCs;
/// The register class with the largest register size.
- const CodeGenRegisterClass *RCWithLargestRegsSize;
+ std::vector<const CodeGenRegisterClass *> RCsWithLargestRegSize;
public:
- RegisterBank(const Record &TheDef)
- : TheDef(TheDef), RCWithLargestRegsSize(nullptr) {}
+ RegisterBank(const Record &TheDef, unsigned NumModeIds)
+ : TheDef(TheDef), RCsWithLargestRegSize(NumModeIds) {}
/// Get the human-readable name for the bank.
StringRef getName() const { return TheDef.getValueAsString("Name"); }
@@ -79,18 +79,21 @@ public:
// register size anywhere (we could sum the sizes of the subregisters
// but there may be additional bits too) and we can't derive it from
// the VT's reliably due to Untyped.
- if (RCWithLargestRegsSize == nullptr)
- RCWithLargestRegsSize = RC;
- else if (RCWithLargestRegsSize->RSI.get(DefaultMode).SpillSize <
- RC->RSI.get(DefaultMode).SpillSize)
- RCWithLargestRegsSize = RC;
- assert(RCWithLargestRegsSize && "RC was nullptr?");
+ unsigned NumModeIds = RCsWithLargestRegSize.size();
+ for (unsigned M = 0; M < NumModeIds; ++M) {
+ if (RCsWithLargestRegSize[M] == nullptr)
+ RCsWithLargestRegSize[M] = RC;
+ else if (RCsWithLargestRegSize[M]->RSI.get(M).SpillSize <
+ RC->RSI.get(M).SpillSize)
+ RCsWithLargestRegSize[M] = RC;
+ assert(RCsWithLargestRegSize[M] && "RC was nullptr?");
+ }
RCs.emplace_back(RC);
}
- const CodeGenRegisterClass *getRCWithLargestRegsSize() const {
- return RCWithLargestRegsSize;
+ const CodeGenRegisterClass *getRCWithLargestRegSize(unsigned HwMode) const {
+ return RCsWithLargestRegSize[HwMode];
}
iterator_range<typename RegisterClassesTy::const_iterator>
@@ -144,9 +147,10 @@ void RegisterBankEmitter::emitBaseClassDefinition(
raw_ostream &OS, const StringRef TargetName,
const std::vector<RegisterBank> &Banks) {
OS << "private:\n"
- << " static RegisterBank *RegBanks[];\n\n"
+ << " static const RegisterBank *RegBanks[];\n"
+ << " static const unsigned Sizes[];\n\n"
<< "protected:\n"
- << " " << TargetName << "GenRegisterBankInfo();\n"
+ << " " << TargetName << "GenRegisterBankInfo(unsigned HwMode = 0);\n"
<< "\n";
}
@@ -211,6 +215,7 @@ void RegisterBankEmitter::emitBaseClassImplementation(
raw_ostream &OS, StringRef TargetName,
std::vector<RegisterBank> &Banks) {
const CodeGenRegBank &RegisterClassHierarchy = Target.getRegBank();
+ const CodeGenHwModes &CGH = Target.getHwModes();
OS << "namespace llvm {\n"
<< "namespace " << TargetName << " {\n";
@@ -241,11 +246,8 @@ void RegisterBankEmitter::emitBaseClassImplementation(
for (const auto &Bank : Banks) {
std::string QualifiedBankID =
(TargetName + "::" + Bank.getEnumeratorName()).str();
- const CodeGenRegisterClass &RC = *Bank.getRCWithLargestRegsSize();
- unsigned Size = RC.RSI.get(DefaultMode).SpillSize;
- OS << "RegisterBank " << Bank.getInstanceVarName() << "(/* ID */ "
- << QualifiedBankID << ", /* Name */ \"" << Bank.getName()
- << "\", /* Size */ " << Size << ", "
+ OS << "const RegisterBank " << Bank.getInstanceVarName() << "(/* ID */ "
+ << QualifiedBankID << ", /* Name */ \"" << Bank.getName() << "\", "
<< "/* CoveredRegClasses */ " << Bank.getCoverageArrayName()
<< ", /* NumRegClasses */ "
<< RegisterClassHierarchy.getRegClasses().size() << ");\n";
@@ -253,16 +255,33 @@ void RegisterBankEmitter::emitBaseClassImplementation(
OS << "} // end namespace " << TargetName << "\n"
<< "\n";
- OS << "RegisterBank *" << TargetName
+ OS << "const RegisterBank *" << TargetName
<< "GenRegisterBankInfo::RegBanks[] = {\n";
for (const auto &Bank : Banks)
OS << " &" << TargetName << "::" << Bank.getInstanceVarName() << ",\n";
OS << "};\n\n";
+ unsigned NumModeIds = CGH.getNumModeIds();
+ OS << "const unsigned " << TargetName << "GenRegisterBankInfo::Sizes[] = {\n";
+ for (unsigned M = 0; M < NumModeIds; ++M) {
+ OS << " // Mode = " << M << " (";
+ if (M == DefaultMode)
+ OS << "Default";
+ else
+ OS << CGH.getMode(M).Name;
+ OS << ")\n";
+ for (const auto &Bank : Banks) {
+ const CodeGenRegisterClass &RC = *Bank.getRCWithLargestRegSize(M);
+ unsigned Size = RC.RSI.get(M).SpillSize;
+ OS << " " << Size << ",\n";
+ }
+ }
+ OS << "};\n\n";
+
OS << TargetName << "GenRegisterBankInfo::" << TargetName
- << "GenRegisterBankInfo()\n"
+ << "GenRegisterBankInfo(unsigned HwMode)\n"
<< " : RegisterBankInfo(RegBanks, " << TargetName
- << "::NumRegisterBanks) {\n"
+ << "::NumRegisterBanks, Sizes, HwMode) {\n"
<< " // Assert that RegBank indices match their ID's\n"
<< "#ifndef NDEBUG\n"
<< " for (auto RB : enumerate(RegBanks))\n"
@@ -275,12 +294,13 @@ void RegisterBankEmitter::emitBaseClassImplementation(
void RegisterBankEmitter::run(raw_ostream &OS) {
StringRef TargetName = Target.getName();
const CodeGenRegBank &RegisterClassHierarchy = Target.getRegBank();
+ const CodeGenHwModes &CGH = Target.getHwModes();
Records.startTimer("Analyze records");
std::vector<RegisterBank> Banks;
for (const auto &V : Records.getAllDerivedDefinitions("RegisterBank")) {
SmallPtrSet<const CodeGenRegisterClass *, 8> VisitedRCs;
- RegisterBank Bank(*V);
+ RegisterBank Bank(*V, CGH.getNumModeIds());
for (const CodeGenRegisterClass *RC :
Bank.getExplicitlySpecifiedRegisterClasses(RegisterClassHierarchy)) {
@@ -327,10 +347,5 @@ void RegisterBankEmitter::run(raw_ostream &OS) {
OS << "#endif // GET_TARGET_REGBANK_IMPL\n";
}
-namespace llvm {
-
-void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS) {
- RegisterBankEmitter(RK).run(OS);
-}
-
-} // end namespace llvm
+static TableGen::Emitter::OptClass<RegisterBankEmitter>
+ X("gen-register-bank", "Generate registers bank descriptions");
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 113cebf8a08e..3101081114fb 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -12,8 +12,10 @@
//
//===----------------------------------------------------------------------===//
+#include "CodeGenHwModes.h"
#include "CodeGenRegisters.h"
#include "CodeGenTarget.h"
+#include "InfoByHwMode.h"
#include "SequenceToOffsetTable.h"
#include "Types.h"
#include "llvm/ADT/ArrayRef.h"
@@ -23,10 +25,10 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Format.h"
-#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
@@ -633,17 +635,16 @@ static void printSubRegIndex(raw_ostream &OS, const CodeGenSubRegIndex *Idx) {
// The initial value depends on the specific list. The list is terminated by a
// 0 differential which means we can't encode repeated elements.
-typedef SmallVector<uint16_t, 4> DiffVec;
+typedef SmallVector<int16_t, 4> DiffVec;
typedef SmallVector<LaneBitmask, 4> MaskVec;
-// Differentially encode a sequence of numbers into V. The starting value and
-// terminating 0 are not added to V, so it will have the same size as List.
-static
-DiffVec &diffEncode(DiffVec &V, unsigned InitVal, SparseBitVector<> List) {
+// Fills V with differentials between every two consecutive elements of List.
+static DiffVec &diffEncode(DiffVec &V, SparseBitVector<> List) {
assert(V.empty() && "Clear DiffVec before diffEncode.");
- uint16_t Val = uint16_t(InitVal);
-
- for (uint16_t Cur : List) {
+ SparseBitVector<>::iterator I = List.begin(), E = List.end();
+ unsigned Val = *I;
+ while (++I != E) {
+ unsigned Cur = *I;
V.push_back(Cur - Val);
Val = Cur;
}
@@ -654,18 +655,16 @@ template<typename Iter>
static
DiffVec &diffEncode(DiffVec &V, unsigned InitVal, Iter Begin, Iter End) {
assert(V.empty() && "Clear DiffVec before diffEncode.");
- uint16_t Val = uint16_t(InitVal);
+ unsigned Val = InitVal;
for (Iter I = Begin; I != End; ++I) {
- uint16_t Cur = (*I)->EnumValue;
+ unsigned Cur = (*I)->EnumValue;
V.push_back(Cur - Val);
Val = Cur;
}
return V;
}
-static void printDiff16(raw_ostream &OS, uint16_t Val) {
- OS << Val;
-}
+static void printDiff16(raw_ostream &OS, int16_t Val) { OS << Val; }
static void printMask(raw_ostream &OS, LaneBitmask Val) {
OS << "LaneBitmask(0x" << PrintLaneMask(Val) << ')';
@@ -889,7 +888,6 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
SmallVector<DiffVec, 4> SubRegLists(Regs.size());
SmallVector<DiffVec, 4> SuperRegLists(Regs.size());
SmallVector<DiffVec, 4> RegUnitLists(Regs.size());
- SmallVector<unsigned, 4> RegUnitInitScale(Regs.size());
// List of lane masks accompanying register unit sequences.
SequenceToOffsetTable<MaskVec> LaneMaskSeqs;
@@ -927,31 +925,8 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
SuperRegList.end());
DiffSeqs.add(SuperRegLists[i]);
- // Differentially encode the register unit list, seeded by register number.
- // First compute a scale factor that allows more diff-lists to be reused:
- //
- // D0 -> (S0, S1)
- // D1 -> (S2, S3)
- //
- // A scale factor of 2 allows D0 and D1 to share a diff-list. The initial
- // value for the differential decoder is the register number multiplied by
- // the scale.
- //
- // Check the neighboring registers for arithmetic progressions.
- unsigned ScaleA = ~0u, ScaleB = ~0u;
- SparseBitVector<> RUs = Reg.getNativeRegUnits();
- if (I != Regs.begin() &&
- std::prev(I)->getNativeRegUnits().count() == RUs.count())
- ScaleB = *RUs.begin() - *std::prev(I)->getNativeRegUnits().begin();
- if (std::next(I) != Regs.end() &&
- std::next(I)->getNativeRegUnits().count() == RUs.count())
- ScaleA = *std::next(I)->getNativeRegUnits().begin() - *RUs.begin();
- unsigned Scale = std::min(ScaleB, ScaleA);
- // Default the scale to 0 if it can't be encoded in 4 bits.
- if (Scale >= 16)
- Scale = 0;
- RegUnitInitScale[i] = Scale;
- DiffSeqs.add(diffEncode(RegUnitLists[i], Scale * Reg.EnumValue, RUs));
+ const SparseBitVector<> &RUs = Reg.getNativeRegUnits();
+ DiffSeqs.add(diffEncode(RegUnitLists[i], RUs));
const auto &RUMasks = Reg.getRegUnitLaneMasks();
MaskVec &LaneMaskVec = RegUnitLaneMasks[i];
@@ -976,7 +951,7 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
const std::string &TargetName = std::string(Target.getName());
// Emit the shared table of differential lists.
- OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[] = {\n";
+ OS << "extern const int16_t " << TargetName << "RegDiffLists[] = {\n";
DiffSeqs.emit(OS, printDiff16);
OS << "};\n\n";
@@ -1012,10 +987,16 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
// Emit the register descriptors now.
i = 0;
for (const auto &Reg : Regs) {
+ unsigned FirstRU = Reg.getNativeRegUnits().find_first();
+ unsigned Offset = DiffSeqs.get(RegUnitLists[i]);
+ // The value must be kept in sync with MCRegisterInfo.h.
+ constexpr unsigned RegUnitBits = 12;
+ assert(isUInt<RegUnitBits>(FirstRU) && "Too many regunits");
+ assert(isUInt<32 - RegUnitBits>(Offset) && "Offset is too big");
OS << " { " << RegStrings.get(std::string(Reg.getName())) << ", "
<< DiffSeqs.get(SubRegLists[i]) << ", " << DiffSeqs.get(SuperRegLists[i])
<< ", " << SubRegIdxSeqs.get(SubRegIdxLists[i]) << ", "
- << (DiffSeqs.get(RegUnitLists[i]) * 16 + RegUnitInitScale[i]) << ", "
+ << (Offset << RegUnitBits | FirstRU) << ", "
<< LaneMaskSeqs.get(RegUnitLaneMasks[i]) << " },\n";
++i;
}
@@ -1261,7 +1242,8 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
for (const auto &RC : RegisterClasses) {
std::vector<MVT::SimpleValueType> S;
for (const ValueTypeByHwMode &VVT : RC.VTs)
- S.push_back(VVT.get(M).SimpleTy);
+ if (VVT.hasDefault() || VVT.hasMode(M))
+ S.push_back(VVT.get(M).SimpleTy);
VTSeqs.add(S);
}
}
@@ -1311,7 +1293,8 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
<< RI.SpillAlignment;
std::vector<MVT::SimpleValueType> VTs;
for (const ValueTypeByHwMode &VVT : RC.VTs)
- VTs.push_back(VVT.get(M).SimpleTy);
+ if (VVT.hasDefault() || VVT.hasMode(M))
+ VTs.push_back(VVT.get(M).SimpleTy);
OS << ", VTLists+" << VTSeqs.get(VTs) << " }, // "
<< RC.getName() << '\n';
}
@@ -1649,7 +1632,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
// Emit the constructor of the class...
OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[];\n";
- OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[];\n";
+ OS << "extern const int16_t " << TargetName << "RegDiffLists[];\n";
OS << "extern const LaneBitmask " << TargetName << "LaneMaskLists[];\n";
OS << "extern const char " << TargetName << "RegStrings[];\n";
OS << "extern const char " << TargetName << "RegClassStrings[];\n";
@@ -1906,10 +1889,5 @@ void RegisterInfoEmitter::debugDump(raw_ostream &OS) {
}
}
-namespace llvm {
-
-void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS) {
- RegisterInfoEmitter(RK).run(OS);
-}
-
-} // end namespace llvm
+static TableGen::Emitter::OptClass<RegisterInfoEmitter>
+ X("gen-register-info", "Generate registers and register classes info");
diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp
index c88a2db55502..b6af02c28a80 100644
--- a/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
#include <algorithm>
#include <set>
#include <string>
@@ -173,6 +174,8 @@ private:
"' lookup method '" + Index.Name +
"', key field '" + Field.Name +
"' of type bits is too large");
+ } else if (isa<BitRecTy>(Field.RecType)) {
+ return "bool";
} else if (Field.Enum || Field.IsIntrinsic || Field.IsInstruction)
return "unsigned";
PrintFatalError(Index.Loc,
@@ -822,10 +825,5 @@ void SearchableTableEmitter::run(raw_ostream &OS) {
OS << "#undef " << Guard << "\n";
}
-namespace llvm {
-
-void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS) {
- SearchableTableEmitter(RK).run(OS);
-}
-
-} // End llvm namespace.
+static TableGen::Emitter::OptClass<SearchableTableEmitter>
+ X("gen-searchable-tables", "Generate generic binary-searchable table");
diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp
index 8afe6d37d0e0..e4eb23649e96 100644
--- a/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -10,22 +10,23 @@
//
//===----------------------------------------------------------------------===//
+#include "CodeGenHwModes.h"
#include "CodeGenSchedule.h"
#include "CodeGenTarget.h"
#include "PredicateExpander.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/MC/MCSchedule.h"
-#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
+#include "llvm/TargetParser/SubtargetFeature.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -40,6 +41,15 @@ using namespace llvm;
namespace {
+/// Sorting predicate to sort record pointers by their
+/// FieldName field.
+struct LessRecordFieldFieldName {
+ bool operator()(const Record *Rec1, const Record *Rec2) const {
+ return Rec1->getValueAsString("FieldName") <
+ Rec2->getValueAsString("FieldName");
+ }
+};
+
class SubtargetEmitter {
// Each processor has a SchedClassDesc table with an entry for each SchedClass.
// The SchedClassDesc table indexes into a global write resource table, write
@@ -68,7 +78,7 @@ class SubtargetEmitter {
}
};
- const CodeGenTarget &TGT;
+ CodeGenTarget TGT;
RecordKeeper &Records;
CodeGenSchedModels &SchedModels;
std::string Target;
@@ -110,6 +120,7 @@ class SubtargetEmitter {
Record *FindReadAdvance(const CodeGenSchedRW &SchedRead,
const CodeGenProcModel &ProcModel);
void ExpandProcResources(RecVec &PRVec, std::vector<int64_t> &Cycles,
+ std::vector<int64_t> &StartAtCycles,
const CodeGenProcModel &ProcModel);
void GenSchedClassTables(const CodeGenProcModel &ProcModel,
SchedClassTables &SchedTables);
@@ -126,8 +137,8 @@ class SubtargetEmitter {
void ParseFeaturesFunction(raw_ostream &OS);
public:
- SubtargetEmitter(RecordKeeper &R, CodeGenTarget &TGT)
- : TGT(TGT), Records(R), SchedModels(TGT.getSchedModels()),
+ SubtargetEmitter(RecordKeeper &R)
+ : TGT(R), Records(R), SchedModels(TGT.getSchedModels()),
Target(TGT.getName()) {}
void run(raw_ostream &o);
@@ -200,15 +211,15 @@ void SubtargetEmitter::EmitSubtargetInfoMacroCalls(raw_ostream &OS) {
std::vector<Record *> FeatureList =
Records.getAllDerivedDefinitions("SubtargetFeature");
- llvm::sort(FeatureList, LessRecordFieldName());
+ llvm::sort(FeatureList, LessRecordFieldFieldName());
for (const Record *Feature : FeatureList) {
- const StringRef Attribute = Feature->getValueAsString("Attribute");
+ const StringRef FieldName = Feature->getValueAsString("FieldName");
const StringRef Value = Feature->getValueAsString("Value");
// Only handle boolean features for now, excluding BitVectors and enums.
const bool IsBool = (Value == "false" || Value == "true") &&
- !StringRef(Attribute).contains('[');
+ !StringRef(FieldName).contains('[');
if (!IsBool)
continue;
@@ -217,9 +228,9 @@ void SubtargetEmitter::EmitSubtargetInfoMacroCalls(raw_ostream &OS) {
// Define the getter with lowercased first char: xxxYyy() { return XxxYyy; }
const std::string Getter =
- Attribute.substr(0, 1).lower() + Attribute.substr(1).str();
+ FieldName.substr(0, 1).lower() + FieldName.substr(1).str();
- OS << "GET_SUBTARGETINFO_MACRO(" << Attribute << ", " << Default << ", "
+ OS << "GET_SUBTARGETINFO_MACRO(" << FieldName << ", " << Default << ", "
<< Getter << ")\n";
}
OS << "#undef GET_SUBTARGETINFO_MACRO\n";
@@ -967,6 +978,7 @@ Record *SubtargetEmitter::FindReadAdvance(const CodeGenSchedRW &SchedRead,
// resource groups and super resources that cover them.
void SubtargetEmitter::ExpandProcResources(RecVec &PRVec,
std::vector<int64_t> &Cycles,
+ std::vector<int64_t> &StartAtCycles,
const CodeGenProcModel &PM) {
assert(PRVec.size() == Cycles.size() && "failed precondition");
for (unsigned i = 0, e = PRVec.size(); i != e; ++i) {
@@ -989,6 +1001,7 @@ void SubtargetEmitter::ExpandProcResources(RecVec &PRVec,
SubDef->getLoc());
PRVec.push_back(SuperDef);
Cycles.push_back(Cycles[i]);
+ StartAtCycles.push_back(StartAtCycles[i]);
SubDef = SuperDef;
}
}
@@ -1005,6 +1018,7 @@ void SubtargetEmitter::ExpandProcResources(RecVec &PRVec,
if (SubI == SubE) {
PRVec.push_back(PR);
Cycles.push_back(Cycles[i]);
+ StartAtCycles.push_back(StartAtCycles[i]);
}
}
}
@@ -1139,22 +1153,48 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
std::vector<int64_t> Cycles =
WriteRes->getValueAsListOfInts("ResourceCycles");
- if (Cycles.empty()) {
- // If ResourceCycles is not provided, default to one cycle per
- // resource.
- Cycles.resize(PRVec.size(), 1);
- } else if (Cycles.size() != PRVec.size()) {
+ std::vector<int64_t> StartAtCycles =
+ WriteRes->getValueAsListOfInts("StartAtCycles");
+
+ // Check consistency of the two vectors carrying the start and
+ // stop cycles of the resources.
+ if (!Cycles.empty() && Cycles.size() != PRVec.size()) {
// If ResourceCycles is provided, check consistency.
PrintFatalError(
WriteRes->getLoc(),
- Twine("Inconsistent resource cycles: !size(ResourceCycles) != "
- "!size(ProcResources): ")
+ Twine("Inconsistent resource cycles: size(ResourceCycles) != "
+ "size(ProcResources): ")
.concat(Twine(PRVec.size()))
.concat(" vs ")
.concat(Twine(Cycles.size())));
}
- ExpandProcResources(PRVec, Cycles, ProcModel);
+ if (!StartAtCycles.empty() && StartAtCycles.size() != PRVec.size()) {
+ PrintFatalError(
+ WriteRes->getLoc(),
+ Twine("Inconsistent resource cycles: size(StartAtCycles) != "
+ "size(ProcResources): ")
+ .concat(Twine(StartAtCycles.size()))
+ .concat(" vs ")
+ .concat(Twine(PRVec.size())));
+ }
+
+ if (Cycles.empty()) {
+ // If ResourceCycles is not provided, default to one cycle
+ // per resource.
+ Cycles.resize(PRVec.size(), 1);
+ }
+
+ if (StartAtCycles.empty()) {
+ // If StartAtCycles is not provided, reserve the resource
+ // starting from cycle 0.
+ StartAtCycles.resize(PRVec.size(), 0);
+ }
+
+ assert(StartAtCycles.size() == Cycles.size());
+
+ ExpandProcResources(PRVec, Cycles, StartAtCycles, ProcModel);
+ assert(StartAtCycles.size() == Cycles.size());
for (unsigned PRIdx = 0, PREnd = PRVec.size();
PRIdx != PREnd; ++PRIdx) {
@@ -1162,6 +1202,17 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
WPREntry.ProcResourceIdx = ProcModel.getProcResourceIdx(PRVec[PRIdx]);
assert(WPREntry.ProcResourceIdx && "Bad ProcResourceIdx");
WPREntry.Cycles = Cycles[PRIdx];
+ WPREntry.StartAtCycle = StartAtCycles[PRIdx];
+ if (StartAtCycles[PRIdx] > Cycles[PRIdx]) {
+ PrintFatalError(WriteRes->getLoc(),
+ Twine("Inconsistent resource cycles: StartAtCycles "
+ "< Cycles must hold."));
+ }
+ if (StartAtCycles[PRIdx] < 0) {
+ PrintFatalError(WriteRes->getLoc(),
+ Twine("Invalid value: StartAtCycle "
+ "must be a non-negative value."));
+ }
// If this resource is already used in this sequence, add the current
// entry's cycles so that the same resource appears to be used
// serially, rather than multiple parallel uses. This is important for
@@ -1170,6 +1221,15 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
for( ; WPRIdx != WPREnd; ++WPRIdx) {
if (WriteProcResources[WPRIdx].ProcResourceIdx
== WPREntry.ProcResourceIdx) {
+ // TODO: multiple use of the same resources would
+ // require either 1. thinking of how to handle multiple
+ // intervals for the same resource in
+ // `<Target>WriteProcResTable` (see
+ // `SubtargetEmitter::EmitSchedClassTables`), or
+ // 2. thinking how to merge multiple intervals into a
+ // single interval.
+ assert(WPREntry.StartAtCycle == 0 &&
+ "multiple use ofthe same resource is not yet handled");
WriteProcResources[WPRIdx].Cycles += WPREntry.Cycles;
break;
}
@@ -1274,15 +1334,16 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
void SubtargetEmitter::EmitSchedClassTables(SchedClassTables &SchedTables,
raw_ostream &OS) {
// Emit global WriteProcResTable.
- OS << "\n// {ProcResourceIdx, Cycles}\n"
- << "extern const llvm::MCWriteProcResEntry "
- << Target << "WriteProcResTable[] = {\n"
- << " { 0, 0}, // Invalid\n";
+ OS << "\n// {ProcResourceIdx, Cycles, StartAtCycle}\n"
+ << "extern const llvm::MCWriteProcResEntry " << Target
+ << "WriteProcResTable[] = {\n"
+ << " { 0, 0, 0 }, // Invalid\n";
for (unsigned WPRIdx = 1, WPREnd = SchedTables.WriteProcResources.size();
WPRIdx != WPREnd; ++WPRIdx) {
MCWriteProcResEntry &WPREntry = SchedTables.WriteProcResources[WPRIdx];
OS << " {" << format("%2d", WPREntry.ProcResourceIdx) << ", "
- << format("%2d", WPREntry.Cycles) << "}";
+ << format("%2d", WPREntry.Cycles) << ", "
+ << format("%2d", WPREntry.StartAtCycle) << "}";
if (WPRIdx + 1 < WPREnd)
OS << ',';
OS << " // #" << WPRIdx << '\n';
@@ -1401,6 +1462,12 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
OS << " " << (CompleteModel ? "true" : "false") << ", // "
<< "CompleteModel\n";
+ bool EnableIntervals =
+ (PM.ModelDef ? PM.ModelDef->getValueAsBit("EnableIntervals") : false);
+
+ OS << " " << (EnableIntervals ? "true" : "false") << ", // "
+ << "EnableIntervals\n";
+
OS << " " << PM.Index << ", // Processor ID\n";
if (PM.hasInstrSchedModel())
OS << " " << PM.ModelName << "ProcResources" << ",\n"
@@ -1746,17 +1813,17 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
// Next record
StringRef Instance = R->getName();
StringRef Value = R->getValueAsString("Value");
- StringRef Attribute = R->getValueAsString("Attribute");
+ StringRef FieldName = R->getValueAsString("FieldName");
if (Value=="true" || Value=="false")
OS << " if (Bits[" << Target << "::"
<< Instance << "]) "
- << Attribute << " = " << Value << ";\n";
+ << FieldName << " = " << Value << ";\n";
else
OS << " if (Bits[" << Target << "::"
<< Instance << "] && "
- << Attribute << " < " << Value << ") "
- << Attribute << " = " << Value << ";\n";
+ << FieldName << " < " << Value << ") "
+ << FieldName << " = " << Value << ";\n";
}
OS << "}\n";
@@ -1983,11 +2050,5 @@ void SubtargetEmitter::run(raw_ostream &OS) {
EmitMCInstrAnalysisPredicateFunctions(OS);
}
-namespace llvm {
-
-void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS) {
- CodeGenTarget CGTarget(RK);
- SubtargetEmitter(RK, CGTarget).run(OS);
-}
-
-} // end namespace llvm
+static TableGen::Emitter::OptClass<SubtargetEmitter>
+ X("gen-subtarget", "Generate subtarget enumerations");
diff --git a/llvm/utils/TableGen/SubtargetFeatureInfo.cpp b/llvm/utils/TableGen/SubtargetFeatureInfo.cpp
index 2a63fc490380..1db8c0bf430a 100644
--- a/llvm/utils/TableGen/SubtargetFeatureInfo.cpp
+++ b/llvm/utils/TableGen/SubtargetFeatureInfo.cpp
@@ -11,7 +11,6 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/TableGen/Error.h"
#include "llvm/TableGen/Record.h"
-#include <map>
using namespace llvm;
@@ -90,7 +89,7 @@ void SubtargetFeatureInfo::emitComputeAvailableFeatures(
StringRef TargetName, StringRef ClassName, StringRef FuncName,
SubtargetFeatureInfoMap &SubtargetFeatures, raw_ostream &OS,
StringRef ExtraParams) {
- OS << "PredicateBitset " << TargetName << ClassName << "::\n"
+ OS << "PredicateBitset " << ClassName << "::\n"
<< FuncName << "(const " << TargetName << "Subtarget *Subtarget";
if (!ExtraParams.empty())
OS << ", " << ExtraParams;
@@ -118,16 +117,19 @@ static bool emitFeaturesAux(StringRef TargetName, const Init &Val,
return false;
}
if (auto *D = dyn_cast<DagInit>(&Val)) {
- std::string Op = D->getOperator()->getAsString();
- if (Op == "not" && D->getNumArgs() == 1) {
+ auto *Op = dyn_cast<DefInit>(D->getOperator());
+ if (!Op)
+ return true;
+ StringRef OpName = Op->getDef()->getName();
+ if (OpName == "not" && D->getNumArgs() == 1) {
OS << '!';
return emitFeaturesAux(TargetName, *D->getArg(0), true, OS);
}
- if ((Op == "any_of" || Op == "all_of") && D->getNumArgs() > 0) {
+ if ((OpName == "any_of" || OpName == "all_of") && D->getNumArgs() > 0) {
bool Paren = D->getNumArgs() > 1 && std::exchange(ParenIfBinOp, true);
if (Paren)
OS << '(';
- ListSeparator LS(Op == "any_of" ? " || " : " && ");
+ ListSeparator LS(OpName == "any_of" ? " || " : " && ");
for (auto *Arg : D->getArgs()) {
OS << LS;
if (emitFeaturesAux(TargetName, *Arg, ParenIfBinOp, OS))
diff --git a/llvm/utils/TableGen/SubtargetFeatureInfo.h b/llvm/utils/TableGen/SubtargetFeatureInfo.h
index 8c8a4487934c..77703e8a87f8 100644
--- a/llvm/utils/TableGen/SubtargetFeatureInfo.h
+++ b/llvm/utils/TableGen/SubtargetFeatureInfo.h
@@ -9,9 +9,11 @@
#ifndef LLVM_UTIL_TABLEGEN_SUBTARGETFEATUREINFO_H
#define LLVM_UTIL_TABLEGEN_SUBTARGETFEATUREINFO_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/TableGen/Record.h"
#include <map>
#include <string>
+#include <utility>
#include <vector>
namespace llvm {
@@ -67,8 +69,8 @@ struct SubtargetFeatureInfo {
///
/// \param TargetName The name of the target as used in class prefixes (e.g.
/// <TargetName>Subtarget)
- /// \param ClassName The name of the class (without the <Target> prefix)
- /// that will contain the generated functions.
+ /// \param ClassName The name of the class that will contain the generated
+ /// functions (including the target prefix.)
/// \param FuncName The name of the function to emit.
/// \param SubtargetFeatures A map of TableGen records to the
/// SubtargetFeatureInfo equivalent.
diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp
index 746e2dd1db16..b2ed48cffe6b 100644
--- a/llvm/utils/TableGen/TableGen.cpp
+++ b/llvm/utils/TableGen/TableGen.cpp
@@ -10,57 +10,20 @@
//
//===----------------------------------------------------------------------===//
-#include "TableGenBackends.h" // Declares all backends.
+#include "llvm/ADT/StringRef.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Main.h"
#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/SetTheory.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <cassert>
+#include <string>
+#include <vector>
using namespace llvm;
-enum ActionType {
- PrintRecords,
- PrintDetailedRecords,
- NullBackend,
- DumpJSON,
- GenEmitter,
- GenRegisterInfo,
- GenInstrInfo,
- GenInstrDocs,
- GenAsmWriter,
- GenAsmMatcher,
- GenDisassembler,
- GenPseudoLowering,
- GenCompressInst,
- GenCallingConv,
- GenDAGISel,
- GenDFAPacketizer,
- GenFastISel,
- GenSubtarget,
- GenIntrinsicEnums,
- GenIntrinsicImpl,
- PrintEnums,
- PrintSets,
- GenOptParserDefs,
- GenOptRST,
- GenCTags,
- GenAttributes,
- GenSearchableTables,
- GenGlobalISel,
- GenGICombiner,
- GenX86EVEX2VEXTables,
- GenX86FoldTables,
- GenX86MnemonicTables,
- GenRegisterBank,
- GenExegesis,
- GenAutomata,
- GenDirectivesEnumDecl,
- GenDirectivesEnumImpl,
- GenDXILOperation,
- GenRISCVTargetDef,
-};
-
namespace llvm {
cl::opt<bool> EmitLongStrLiterals(
"long-string-literals",
@@ -71,229 +34,54 @@ cl::opt<bool> EmitLongStrLiterals(
} // end namespace llvm
namespace {
-cl::opt<ActionType> Action(
- cl::desc("Action to perform:"),
- cl::values(
- clEnumValN(PrintRecords, "print-records",
- "Print all records to stdout (default)"),
- clEnumValN(PrintDetailedRecords, "print-detailed-records",
- "Print full details of all records to stdout"),
- clEnumValN(NullBackend, "null-backend",
- "Do nothing after parsing (useful for timing)"),
- clEnumValN(DumpJSON, "dump-json",
- "Dump all records as machine-readable JSON"),
- clEnumValN(GenEmitter, "gen-emitter", "Generate machine code emitter"),
- clEnumValN(GenRegisterInfo, "gen-register-info",
- "Generate registers and register classes info"),
- clEnumValN(GenInstrInfo, "gen-instr-info",
- "Generate instruction descriptions"),
- clEnumValN(GenInstrDocs, "gen-instr-docs",
- "Generate instruction documentation"),
- clEnumValN(GenCallingConv, "gen-callingconv",
- "Generate calling convention descriptions"),
- clEnumValN(GenAsmWriter, "gen-asm-writer", "Generate assembly writer"),
- clEnumValN(GenDisassembler, "gen-disassembler",
- "Generate disassembler"),
- clEnumValN(GenPseudoLowering, "gen-pseudo-lowering",
- "Generate pseudo instruction lowering"),
- clEnumValN(GenCompressInst, "gen-compress-inst-emitter",
- "Generate RISCV compressed instructions."),
- clEnumValN(GenAsmMatcher, "gen-asm-matcher",
- "Generate assembly instruction matcher"),
- clEnumValN(GenDAGISel, "gen-dag-isel",
- "Generate a DAG instruction selector"),
- clEnumValN(GenDFAPacketizer, "gen-dfa-packetizer",
- "Generate DFA Packetizer for VLIW targets"),
- clEnumValN(GenFastISel, "gen-fast-isel",
- "Generate a \"fast\" instruction selector"),
- clEnumValN(GenSubtarget, "gen-subtarget",
- "Generate subtarget enumerations"),
- clEnumValN(GenIntrinsicEnums, "gen-intrinsic-enums",
- "Generate intrinsic enums"),
- clEnumValN(GenIntrinsicImpl, "gen-intrinsic-impl",
- "Generate intrinsic information"),
- clEnumValN(PrintEnums, "print-enums", "Print enum values for a class"),
- clEnumValN(PrintSets, "print-sets",
- "Print expanded sets for testing DAG exprs"),
- clEnumValN(GenOptParserDefs, "gen-opt-parser-defs",
- "Generate option definitions"),
- clEnumValN(GenOptRST, "gen-opt-rst", "Generate option RST"),
- clEnumValN(GenCTags, "gen-ctags", "Generate ctags-compatible index"),
- clEnumValN(GenAttributes, "gen-attrs", "Generate attributes"),
- clEnumValN(GenSearchableTables, "gen-searchable-tables",
- "Generate generic binary-searchable table"),
- clEnumValN(GenGlobalISel, "gen-global-isel",
- "Generate GlobalISel selector"),
- clEnumValN(GenGICombiner, "gen-global-isel-combiner",
- "Generate GlobalISel combiner"),
- clEnumValN(GenX86EVEX2VEXTables, "gen-x86-EVEX2VEX-tables",
- "Generate X86 EVEX to VEX compress tables"),
- clEnumValN(GenX86FoldTables, "gen-x86-fold-tables",
- "Generate X86 fold tables"),
- clEnumValN(GenX86MnemonicTables, "gen-x86-mnemonic-tables",
- "Generate X86 mnemonic tables"),
- clEnumValN(GenRegisterBank, "gen-register-bank",
- "Generate registers bank descriptions"),
- clEnumValN(GenExegesis, "gen-exegesis",
- "Generate llvm-exegesis tables"),
- clEnumValN(GenAutomata, "gen-automata", "Generate generic automata"),
- clEnumValN(GenDirectivesEnumDecl, "gen-directive-decl",
- "Generate directive related declaration code (header file)"),
- clEnumValN(GenDirectivesEnumImpl, "gen-directive-impl",
- "Generate directive related implementation code"),
- clEnumValN(GenDXILOperation, "gen-dxil-operation",
- "Generate DXIL operation information"),
- clEnumValN(GenRISCVTargetDef, "gen-riscv-target-def",
- "Generate the list of CPU for RISCV")));
+
cl::OptionCategory PrintEnumsCat("Options for -print-enums");
cl::opt<std::string> Class("class", cl::desc("Print Enum list for this class"),
cl::value_desc("class name"),
cl::cat(PrintEnumsCat));
-bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
- switch (Action) {
- case PrintRecords:
- OS << Records; // No argument, dump all contents
- break;
- case PrintDetailedRecords:
- EmitDetailedRecords(Records, OS);
- break;
- case NullBackend: // No backend at all.
- break;
- case DumpJSON:
- EmitJSON(Records, OS);
- break;
- case GenEmitter:
- EmitCodeEmitter(Records, OS);
- break;
- case GenRegisterInfo:
- EmitRegisterInfo(Records, OS);
- break;
- case GenInstrInfo:
- EmitInstrInfo(Records, OS);
- break;
- case GenInstrDocs:
- EmitInstrDocs(Records, OS);
- break;
- case GenCallingConv:
- EmitCallingConv(Records, OS);
- break;
- case GenAsmWriter:
- EmitAsmWriter(Records, OS);
- break;
- case GenAsmMatcher:
- EmitAsmMatcher(Records, OS);
- break;
- case GenDisassembler:
- EmitDisassembler(Records, OS);
- break;
- case GenPseudoLowering:
- EmitPseudoLowering(Records, OS);
- break;
- case GenCompressInst:
- EmitCompressInst(Records, OS);
- break;
- case GenDAGISel:
- EmitDAGISel(Records, OS);
- break;
- case GenDFAPacketizer:
- EmitDFAPacketizer(Records, OS);
- break;
- case GenFastISel:
- EmitFastISel(Records, OS);
- break;
- case GenSubtarget:
- EmitSubtarget(Records, OS);
- break;
- case GenIntrinsicEnums:
- EmitIntrinsicEnums(Records, OS);
- break;
- case GenIntrinsicImpl:
- EmitIntrinsicImpl(Records, OS);
- break;
- case GenOptParserDefs:
- EmitOptParser(Records, OS);
- break;
- case GenOptRST:
- EmitOptRST(Records, OS);
- break;
- case PrintEnums:
- {
- for (Record *Rec : Records.getAllDerivedDefinitions(Class))
- OS << Rec->getName() << ", ";
- OS << "\n";
- break;
- }
- case PrintSets:
- {
- SetTheory Sets;
- Sets.addFieldExpander("Set", "Elements");
- for (Record *Rec : Records.getAllDerivedDefinitions("Set")) {
- OS << Rec->getName() << " = [";
- const std::vector<Record*> *Elts = Sets.expand(Rec);
- assert(Elts && "Couldn't expand Set instance");
- for (Record *Elt : *Elts)
- OS << ' ' << Elt->getName();
- OS << " ]\n";
- }
- break;
- }
- case GenCTags:
- EmitCTags(Records, OS);
- break;
- case GenAttributes:
- EmitAttributes(Records, OS);
- break;
- case GenSearchableTables:
- EmitSearchableTables(Records, OS);
- break;
- case GenGlobalISel:
- EmitGlobalISel(Records, OS);
- break;
- case GenGICombiner:
- EmitGICombiner(Records, OS);
- break;
- case GenRegisterBank:
- EmitRegisterBank(Records, OS);
- break;
- case GenX86EVEX2VEXTables:
- EmitX86EVEX2VEXTables(Records, OS);
- break;
- case GenX86MnemonicTables:
- EmitX86MnemonicTables(Records, OS);
- break;
- case GenX86FoldTables:
- EmitX86FoldTables(Records, OS);
- break;
- case GenExegesis:
- EmitExegesis(Records, OS);
- break;
- case GenAutomata:
- EmitAutomata(Records, OS);
- break;
- case GenDirectivesEnumDecl:
- EmitDirectivesDecl(Records, OS);
- break;
- case GenDirectivesEnumImpl:
- EmitDirectivesImpl(Records, OS);
- break;
- case GenDXILOperation:
- EmitDXILOperation(Records, OS);
- break;
- case GenRISCVTargetDef:
- EmitRISCVTargetDef(Records, OS);
- break;
- }
+void PrintRecords(RecordKeeper &Records, raw_ostream &OS) {
+ OS << Records; // No argument, dump all contents
+}
- return false;
+void PrintEnums(RecordKeeper &Records, raw_ostream &OS) {
+ for (Record *Rec : Records.getAllDerivedDefinitions(Class))
+ OS << Rec->getName() << ", ";
+ OS << "\n";
}
+
+void PrintSets(RecordKeeper &Records, raw_ostream &OS) {
+ SetTheory Sets;
+ Sets.addFieldExpander("Set", "Elements");
+ for (Record *Rec : Records.getAllDerivedDefinitions("Set")) {
+ OS << Rec->getName() << " = [";
+ const std::vector<Record *> *Elts = Sets.expand(Rec);
+ assert(Elts && "Couldn't expand Set instance");
+ for (Record *Elt : *Elts)
+ OS << ' ' << Elt->getName();
+ OS << " ]\n";
+ }
}
+TableGen::Emitter::Opt X[] = {
+ {"print-records", PrintRecords, "Print all records to stdout (default)",
+ true},
+ {"print-detailed-records", EmitDetailedRecords,
+ "Print full details of all records to stdout"},
+ {"null-backend", [](RecordKeeper &Records, raw_ostream &OS) {},
+ "Do nothing after parsing (useful for timing)"},
+ {"dump-json", EmitJSON, "Dump all records as machine-readable JSON"},
+ {"print-enums", PrintEnums, "Print enum values for a class"},
+ {"print-sets", PrintSets, "Print expanded sets for testing DAG exprs"},
+};
+
+} // namespace
+
int main(int argc, char **argv) {
InitLLVM X(argc, argv);
cl::ParseCommandLineOptions(argc, argv);
- return TableGenMain(argv[0], &LLVMTableGenMain);
+ return TableGenMain(argv[0]);
}
#ifndef __has_feature
diff --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h
index ac44babb1261..3afe6b01467b 100644
--- a/llvm/utils/TableGen/TableGenBackends.h
+++ b/llvm/utils/TableGen/TableGenBackends.h
@@ -15,6 +15,8 @@
#ifndef LLVM_UTILS_TABLEGEN_TABLEGENBACKENDS_H
#define LLVM_UTILS_TABLEGEN_TABLEGENBACKENDS_H
+#include <string>
+
// A TableGen backend is a function that looks like
//
// EmitFoo(RecordKeeper &RK, raw_ostream &OS /*, anything else you need */ )
@@ -61,41 +63,12 @@ namespace llvm {
class raw_ostream;
class RecordKeeper;
-void EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS);
-void EmitIntrinsicImpl(RecordKeeper &RK, raw_ostream &OS);
-void EmitAsmMatcher(RecordKeeper &RK, raw_ostream &OS);
-void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS);
-void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS);
-void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS);
-void EmitDAGISel(RecordKeeper &RK, raw_ostream &OS);
-void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS);
-void EmitDisassembler(RecordKeeper &RK, raw_ostream &OS);
-void EmitFastISel(RecordKeeper &RK, raw_ostream &OS);
-void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS);
-void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS);
-void EmitPseudoLowering(RecordKeeper &RK, raw_ostream &OS);
-void EmitCompressInst(RecordKeeper &RK, raw_ostream &OS);
-void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS);
-void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS);
void EmitMapTable(RecordKeeper &RK, raw_ostream &OS);
-void EmitOptParser(RecordKeeper &RK, raw_ostream &OS);
-void EmitOptRST(RecordKeeper &RK, raw_ostream &OS);
-void EmitCTags(RecordKeeper &RK, raw_ostream &OS);
-void EmitAttributes(RecordKeeper &RK, raw_ostream &OS);
-void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS);
-void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS);
-void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS);
-void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS);
-void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS);
-void EmitX86MnemonicTables(RecordKeeper &RK, raw_ostream &OS);
-void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS);
-void EmitExegesis(RecordKeeper &RK, raw_ostream &OS);
-void EmitAutomata(RecordKeeper &RK, raw_ostream &OS);
-void EmitDirectivesDecl(RecordKeeper &RK, raw_ostream &OS);
-void EmitDirectivesImpl(RecordKeeper &RK, raw_ostream &OS);
-void EmitDXILOperation(RecordKeeper &RK, raw_ostream &OS);
-void EmitRISCVTargetDef(const RecordKeeper &RK, raw_ostream &OS);
-} // End llvm namespace
+// Defined in DecoderEmitter.cpp
+void EmitDecoder(RecordKeeper &RK, raw_ostream &OS,
+ const std::string &PredicateNamespace);
+
+} // namespace llvm
#endif
diff --git a/llvm/utils/TableGen/Types.cpp b/llvm/utils/TableGen/Types.cpp
index a6682da90e6b..aca8e36b683d 100644
--- a/llvm/utils/TableGen/Types.cpp
+++ b/llvm/utils/TableGen/Types.cpp
@@ -34,11 +34,3 @@ const char *llvm::getMinimalTypeForRange(uint64_t Range, unsigned MaxSize LLVM_A
return "uint16_t";
return "uint8_t";
}
-
-const char *llvm::getMinimalTypeForEnumBitfield(uint64_t Size) {
- uint64_t MaxIndex = Size;
- if (MaxIndex > 0)
- MaxIndex--;
- assert(MaxIndex <= 64 && "Too many bits");
- return getMinimalTypeForRange(1ULL << MaxIndex);
-}
diff --git a/llvm/utils/TableGen/Types.h b/llvm/utils/TableGen/Types.h
index 17c7742ccaac..f369d61785c4 100644
--- a/llvm/utils/TableGen/Types.h
+++ b/llvm/utils/TableGen/Types.h
@@ -16,9 +16,6 @@ namespace llvm {
/// MaxSize indicates the largest size of integer to consider (in bits) and only
/// supports values of at least 32.
const char *getMinimalTypeForRange(uint64_t Range, unsigned MaxSize = 64);
-
-/// Returns the smallest unsigned integer type that can hold the given bitfield.
-const char *getMinimalTypeForEnumBitfield(uint64_t Size);
}
#endif
diff --git a/llvm/utils/TableGen/VTEmitter.cpp b/llvm/utils/TableGen/VTEmitter.cpp
new file mode 100644
index 000000000000..d398a7e7b58f
--- /dev/null
+++ b/llvm/utils/TableGen/VTEmitter.cpp
@@ -0,0 +1,130 @@
+//===- VTEmitter.cpp - Generate properties from ValueTypes.td -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <array>
+#include <cassert>
+#include <map>
+using namespace llvm;
+
+namespace {
+
+class VTEmitter {
+private:
+ RecordKeeper &Records;
+
+public:
+ VTEmitter(RecordKeeper &R) : Records(R) {}
+
+ void run(raw_ostream &OS);
+};
+
+} // End anonymous namespace.
+
+void VTEmitter::run(raw_ostream &OS) {
+ emitSourceFileHeader("ValueTypes Source Fragment", OS);
+
+ std::array<const Record *, 256> VTsByNumber = {};
+ auto ValueTypes = Records.getAllDerivedDefinitions("ValueType");
+ for (auto *VT : ValueTypes) {
+ auto Number = VT->getValueAsInt("Value");
+ assert(0 <= Number && Number < (int)VTsByNumber.size() &&
+ "ValueType should be uint8_t");
+ assert(!VTsByNumber[Number] && "Duplicate ValueType");
+ VTsByNumber[Number] = VT;
+ }
+
+ struct VTRange {
+ StringRef First;
+ StringRef Last;
+ bool Closed;
+ };
+
+ std::map<StringRef, VTRange> VTRanges;
+
+ auto UpdateVTRange = [&VTRanges](const char *Key, StringRef Name,
+ bool Valid) {
+ if (Valid) {
+ if (!VTRanges.count(Key))
+ VTRanges[Key].First = Name;
+ assert(!VTRanges[Key].Closed && "Gap detected!");
+ VTRanges[Key].Last = Name;
+ } else if (VTRanges.count(Key)) {
+ VTRanges[Key].Closed = true;
+ }
+ };
+
+ OS << "#ifdef GET_VT_ATTR // (Ty, n, sz, Any, Int, FP, Vec, Sc)\n";
+ for (const auto *VT : VTsByNumber) {
+ if (!VT)
+ continue;
+ auto Name = VT->getValueAsString("LLVMName");
+ auto Value = VT->getValueAsInt("Value");
+ bool IsInteger = VT->getValueAsInt("isInteger");
+ bool IsFP = VT->getValueAsInt("isFP");
+ bool IsVector = VT->getValueAsInt("isVector");
+ bool IsScalable = VT->getValueAsInt("isScalable");
+
+ UpdateVTRange("INTEGER_FIXEDLEN_VECTOR_VALUETYPE", Name,
+ IsInteger && IsVector && !IsScalable);
+ UpdateVTRange("INTEGER_SCALABLE_VECTOR_VALUETYPE", Name,
+ IsInteger && IsScalable);
+ UpdateVTRange("FP_FIXEDLEN_VECTOR_VALUETYPE", Name,
+ IsFP && IsVector && !IsScalable);
+ UpdateVTRange("FP_SCALABLE_VECTOR_VALUETYPE", Name, IsFP && IsScalable);
+ UpdateVTRange("FIXEDLEN_VECTOR_VALUETYPE", Name, IsVector && !IsScalable);
+ UpdateVTRange("SCALABLE_VECTOR_VALUETYPE", Name, IsScalable);
+ UpdateVTRange("VECTOR_VALUETYPE", Name, IsVector);
+ UpdateVTRange("INTEGER_VALUETYPE", Name, IsInteger && !IsVector);
+ UpdateVTRange("FP_VALUETYPE", Name, IsFP && !IsVector);
+ UpdateVTRange("VALUETYPE", Name, Value < 224);
+
+ // clang-format off
+ OS << " GET_VT_ATTR("
+ << Name << ", "
+ << Value << ", "
+ << VT->getValueAsInt("Size") << ", "
+ << VT->getValueAsInt("isOverloaded") << ", "
+ << (IsInteger ? Name[0] == 'i' ? 3 : 1 : 0) << ", "
+ << (IsFP ? Name[0] == 'f' ? 3 : 1 : 0) << ", "
+ << IsVector << ", "
+ << IsScalable << ")\n";
+ // clang-format on
+ }
+ OS << "#endif\n\n";
+
+ OS << "#ifdef GET_VT_RANGES\n";
+ for (const auto &KV : VTRanges) {
+ assert(KV.second.Closed);
+ OS << " FIRST_" << KV.first << " = " << KV.second.First << ",\n"
+ << " LAST_" << KV.first << " = " << KV.second.Last << ",\n";
+ }
+ OS << "#endif\n\n";
+
+ OS << "#ifdef GET_VT_VECATTR // (Ty, Sc, nElem, ElTy, ElSz)\n";
+ for (const auto *VT : VTsByNumber) {
+ if (!VT || !VT->getValueAsInt("isVector"))
+ continue;
+ const auto *ElTy = VT->getValueAsDef("ElementType");
+ assert(ElTy);
+ // clang-format off
+ OS << " GET_VT_VECATTR("
+ << VT->getValueAsString("LLVMName") << ", "
+ << VT->getValueAsInt("isScalable") << ", "
+ << VT->getValueAsInt("nElem") << ", "
+ << ElTy->getName() << ", "
+ << ElTy->getValueAsInt("Size") << ")\n";
+ // clang-format on
+ }
+ OS << "#endif\n\n";
+}
+
+static TableGen::Emitter::OptClass<VTEmitter> X("gen-vt", "Generate ValueType");
diff --git a/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp b/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp
index 2c1acd8d910c..85da547d04c1 100644
--- a/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp
+++ b/llvm/utils/TableGen/VarLenCodeEmitterGen.cpp
@@ -58,6 +58,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
using namespace llvm;
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 601591d9f53d..708c92aecfc8 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -76,7 +76,7 @@ static inline const char* stringForOperandEncoding(OperandEncoding encoding) {
/// @return - True if child is a subset of parent, false otherwise.
static inline bool inheritsFrom(InstructionContext child,
InstructionContext parent, bool noPrefix = true,
- bool VEX_LIG = false, bool VEX_WIG = false,
+ bool VEX_LIG = false, bool WIG = false,
bool AdSize64 = false) {
if (child == parent)
return true;
@@ -144,20 +144,20 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_64BIT_REXW_ADSIZE:
return false;
case IC_VEX:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W)) ||
- (VEX_WIG && inheritsFrom(child, IC_VEX_W)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_VEX_L_W)) ||
+ (WIG && inheritsFrom(child, IC_VEX_W)) ||
(VEX_LIG && inheritsFrom(child, IC_VEX_L));
case IC_VEX_XS:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W_XS)) ||
- (VEX_WIG && inheritsFrom(child, IC_VEX_W_XS)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_VEX_L_W_XS)) ||
+ (WIG && inheritsFrom(child, IC_VEX_W_XS)) ||
(VEX_LIG && inheritsFrom(child, IC_VEX_L_XS));
case IC_VEX_XD:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W_XD)) ||
- (VEX_WIG && inheritsFrom(child, IC_VEX_W_XD)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_VEX_L_W_XD)) ||
+ (WIG && inheritsFrom(child, IC_VEX_W_XD)) ||
(VEX_LIG && inheritsFrom(child, IC_VEX_L_XD));
case IC_VEX_OPSIZE:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE)) ||
- (VEX_WIG && inheritsFrom(child, IC_VEX_W_OPSIZE)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE)) ||
+ (WIG && inheritsFrom(child, IC_VEX_W_OPSIZE)) ||
(VEX_LIG && inheritsFrom(child, IC_VEX_L_OPSIZE));
case IC_VEX_W:
return VEX_LIG && inheritsFrom(child, IC_VEX_L_W);
@@ -168,88 +168,88 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_VEX_W_OPSIZE:
return VEX_LIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE);
case IC_VEX_L:
- return VEX_WIG && inheritsFrom(child, IC_VEX_L_W);
+ return WIG && inheritsFrom(child, IC_VEX_L_W);
case IC_VEX_L_XS:
- return VEX_WIG && inheritsFrom(child, IC_VEX_L_W_XS);
+ return WIG && inheritsFrom(child, IC_VEX_L_W_XS);
case IC_VEX_L_XD:
- return VEX_WIG && inheritsFrom(child, IC_VEX_L_W_XD);
+ return WIG && inheritsFrom(child, IC_VEX_L_W_XD);
case IC_VEX_L_OPSIZE:
- return VEX_WIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE);
+ return WIG && inheritsFrom(child, IC_VEX_L_W_OPSIZE);
case IC_VEX_L_W:
case IC_VEX_L_W_XS:
case IC_VEX_L_W_XD:
case IC_VEX_L_W_OPSIZE:
return false;
case IC_EVEX:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2));
case IC_EVEX_XS:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XS)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_XS)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_XS)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_XS)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_XS)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XS));
case IC_EVEX_XD:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XD)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_XD)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_XD)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_XD)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_XD)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XD));
case IC_EVEX_OPSIZE:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_OPSIZE)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_OPSIZE));
case IC_EVEX_K:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_K)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_K)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_K)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_K)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_K)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_K)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_K)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_K));
case IC_EVEX_XS_K:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_K)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XS_K)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_XS_K)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_XS_K)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_XS_K)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XS_K));
case IC_EVEX_XD_K:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_K)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XD_K)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_XD_K)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_XD_K)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_XD_K)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XD_K));
case IC_EVEX_OPSIZE_K:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_K)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_K)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_K)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_K)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_K)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_K)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_OPSIZE_K)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_OPSIZE_K));
case IC_EVEX_KZ:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_KZ)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_KZ)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_KZ)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_KZ)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_KZ)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_KZ)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_KZ)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_KZ));
case IC_EVEX_XS_KZ:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XS_KZ)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_XS_KZ)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_XS_KZ)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XS_KZ));
case IC_EVEX_XD_KZ:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XD_KZ)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_XD_KZ)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_XD_KZ)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XD_KZ));
case IC_EVEX_OPSIZE_KZ:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_KZ)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_KZ)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_OPSIZE_KZ)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_OPSIZE_KZ));
case IC_EVEX_W:
@@ -289,29 +289,29 @@ static inline bool inheritsFrom(InstructionContext child,
return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ));
case IC_EVEX_L:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W);
case IC_EVEX_L_XS:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_XS);
case IC_EVEX_L_XD:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_XD);
case IC_EVEX_L_OPSIZE:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE);
case IC_EVEX_L_K:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_K);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_K);
case IC_EVEX_L_XS_K:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_K);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_XS_K);
case IC_EVEX_L_XD_K:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_K);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_XD_K);
case IC_EVEX_L_OPSIZE_K:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_K);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_K);
case IC_EVEX_L_KZ:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_KZ);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_KZ);
case IC_EVEX_L_XS_KZ:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ);
case IC_EVEX_L_XD_KZ:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ);
case IC_EVEX_L_OPSIZE_KZ:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ);
case IC_EVEX_L_W:
case IC_EVEX_L_W_XS:
case IC_EVEX_L_W_XD:
@@ -328,29 +328,29 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_EVEX_L_W_OPSIZE_KZ:
return false;
case IC_EVEX_L2:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W);
case IC_EVEX_L2_XS:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_XS);
case IC_EVEX_L2_XD:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_XD);
case IC_EVEX_L2_OPSIZE:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE);
case IC_EVEX_L2_K:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_K);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_K);
case IC_EVEX_L2_XS_K:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K);
case IC_EVEX_L2_XD_K:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K);
case IC_EVEX_L2_OPSIZE_K:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_K);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_K);
case IC_EVEX_L2_KZ:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_KZ);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_KZ);
case IC_EVEX_L2_XS_KZ:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ);
case IC_EVEX_L2_XD_KZ:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ);
case IC_EVEX_L2_OPSIZE_KZ:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ);
case IC_EVEX_L2_W:
case IC_EVEX_L2_W_XS:
case IC_EVEX_L2_W_XD:
@@ -367,79 +367,79 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_EVEX_L2_W_OPSIZE_KZ:
return false;
case IC_EVEX_B:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_B)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_B)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_B)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_B)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_B)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_B));
case IC_EVEX_XS_B:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_B)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_B)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XS_B)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_XS_B)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_B)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_XS_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_XS_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XS_B));
case IC_EVEX_XD_B:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_B)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_B)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XD_B)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_XD_B)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_B)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_XD_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_XD_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XD_B));
case IC_EVEX_OPSIZE_B:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_B)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_B)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_B)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_B)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_B)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_OPSIZE_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_OPSIZE_B));
case IC_EVEX_K_B:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_K_B)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_K_B)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_K_B)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_K_B)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_K_B)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_K_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_K_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_K_B));
case IC_EVEX_XS_K_B:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_K_B)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K_B)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XS_K_B)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_XS_K_B)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K_B)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_XS_K_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_XS_K_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XS_K_B));
case IC_EVEX_XD_K_B:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_K_B)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K_B)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XD_K_B)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_XD_K_B)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K_B)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_XD_K_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_XD_K_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XD_K_B));
case IC_EVEX_OPSIZE_K_B:
- return (VEX_LIG && VEX_WIG &&
+ return (VEX_LIG && WIG &&
inheritsFrom(child, IC_EVEX_L_W_OPSIZE_K_B)) ||
- (VEX_LIG && VEX_WIG &&
+ (VEX_LIG && WIG &&
inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_K_B)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_K_B)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_K_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_OPSIZE_K_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_OPSIZE_K_B));
case IC_EVEX_KZ_B:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_KZ_B)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_KZ_B)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_KZ_B)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_KZ_B)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_KZ_B)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_KZ_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_KZ_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_KZ_B));
case IC_EVEX_XS_KZ_B:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ_B)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ_B)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XS_KZ_B)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ_B)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ_B)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_XS_KZ_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_XS_KZ_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XS_KZ_B));
case IC_EVEX_XD_KZ_B:
- return (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ_B)) ||
- (VEX_LIG && VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ_B)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_XD_KZ_B)) ||
+ return (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ_B)) ||
+ (VEX_LIG && WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ_B)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_XD_KZ_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_XD_KZ_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_XD_KZ_B));
case IC_EVEX_OPSIZE_KZ_B:
- return (VEX_LIG && VEX_WIG &&
+ return (VEX_LIG && WIG &&
inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ_B)) ||
- (VEX_LIG && VEX_WIG &&
+ (VEX_LIG && WIG &&
inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ_B)) ||
- (VEX_WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_KZ_B)) ||
+ (WIG && inheritsFrom(child, IC_EVEX_W_OPSIZE_KZ_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L_OPSIZE_KZ_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_OPSIZE_KZ_B));
case IC_EVEX_W_B:
@@ -479,29 +479,29 @@ static inline bool inheritsFrom(InstructionContext child,
return (VEX_LIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ_B)) ||
(VEX_LIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ_B));
case IC_EVEX_L_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_B);
case IC_EVEX_L_XS_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_XS_B);
case IC_EVEX_L_XD_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_XD_B);
case IC_EVEX_L_OPSIZE_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_B);
case IC_EVEX_L_K_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_K_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_K_B);
case IC_EVEX_L_XS_K_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_K_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_XS_K_B);
case IC_EVEX_L_XD_K_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_K_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_XD_K_B);
case IC_EVEX_L_OPSIZE_K_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_K_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_K_B);
case IC_EVEX_L_KZ_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_KZ_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_KZ_B);
case IC_EVEX_L_XS_KZ_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_XS_KZ_B);
case IC_EVEX_L_XD_KZ_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_XD_KZ_B);
case IC_EVEX_L_OPSIZE_KZ_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L_W_OPSIZE_KZ_B);
case IC_EVEX_L_W_B:
case IC_EVEX_L_W_XS_B:
case IC_EVEX_L_W_XD_B:
@@ -518,29 +518,29 @@ static inline bool inheritsFrom(InstructionContext child,
case IC_EVEX_L_W_OPSIZE_KZ_B:
return false;
case IC_EVEX_L2_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_B);
case IC_EVEX_L2_XS_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_B);
case IC_EVEX_L2_XD_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_B);
case IC_EVEX_L2_OPSIZE_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_B);
case IC_EVEX_L2_K_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_K_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_K_B);
case IC_EVEX_L2_XS_K_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_K_B);
case IC_EVEX_L2_XD_K_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_K_B);
case IC_EVEX_L2_OPSIZE_K_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_K_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_K_B);
case IC_EVEX_L2_KZ_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_KZ_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_KZ_B);
case IC_EVEX_L2_XS_KZ_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_XS_KZ_B);
case IC_EVEX_L2_XD_KZ_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_XD_KZ_B);
case IC_EVEX_L2_OPSIZE_KZ_B:
- return VEX_WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ_B);
+ return WIG && inheritsFrom(child, IC_EVEX_L2_W_OPSIZE_KZ_B);
case IC_EVEX_L2_W_B:
case IC_EVEX_L2_W_XS_B:
case IC_EVEX_L2_W_XD_B:
@@ -1068,7 +1068,7 @@ void DisassemblerTables::setTableFields(OpcodeType type,
bool is32bit,
bool noPrefix,
bool ignoresVEX_L,
- bool ignoresVEX_W,
+ bool ignoresW,
unsigned addressSize) {
ContextDecision &decision = *Tables[type];
@@ -1080,7 +1080,7 @@ void DisassemblerTables::setTableFields(OpcodeType type,
bool adSize64 = addressSize == 64;
if (inheritsFrom((InstructionContext)index,
InstructionSpecifiers[uid].insnContext, noPrefix,
- ignoresVEX_L, ignoresVEX_W, adSize64))
+ ignoresVEX_L, ignoresW, adSize64))
setTableFields(decision.opcodeDecisions[index].modRMDecisions[opcode],
filter,
uid,
diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
index 1384330ee8a1..35792ab67a4f 100644
--- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp
@@ -15,6 +15,7 @@
#include "CodeGenTarget.h"
#include "X86RecognizableInstr.h"
#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
using namespace llvm;
@@ -113,10 +114,10 @@ public:
bool operator()(const CodeGenInstruction *VEXInst) {
RecognizableInstrBase VEXRI(*VEXInst);
RecognizableInstrBase EVEXRI(*EVEXInst);
- bool VEX_W = VEXRI.HasVEX_W;
- bool EVEX_W = EVEXRI.HasVEX_W;
- bool VEX_WIG = VEXRI.IgnoresVEX_W;
- bool EVEX_WIG = EVEXRI.IgnoresVEX_W;
+ bool VEX_W = VEXRI.HasREX_W;
+ bool EVEX_W = EVEXRI.HasREX_W;
+ bool VEX_WIG = VEXRI.IgnoresW;
+ bool EVEX_WIG = EVEXRI.IgnoresW;
bool EVEX_W1_VEX_W0 = EVEXInst->TheDef->getValueAsBit("EVEX_W1_VEX_W0");
if (VEXRI.IsCodeGenOnly != EVEXRI.IsCodeGenOnly ||
@@ -237,10 +238,7 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) {
// Print CheckVEXInstPredicate function.
printCheckPredicate(EVEX2VEXPredicates, OS);
}
-}
+} // namespace
-namespace llvm {
-void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS) {
- X86EVEX2VEXTablesEmitter(RK).run(OS);
-}
-}
+static TableGen::Emitter::OptClass<X86EVEX2VEXTablesEmitter>
+ X("gen-x86-EVEX2VEX-tables", "Generate X86 EVEX to VEX compress tables");
diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
index 5b3f11848de6..89d93e4d3cbc 100644
--- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -11,34 +11,24 @@
//
//===----------------------------------------------------------------------===//
+#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
#include "X86RecognizableInstr.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/TableGen/Error.h"
+#include "llvm/Support/X86FoldTablesUtils.h"
+#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
using namespace llvm;
using namespace X86Disassembler;
namespace {
-
-// 3 possible strategies for the unfolding flag (TB_NO_REVERSE) of the
-// manual added entries.
-enum UnfoldStrategy {
- UNFOLD, // Allow unfolding
- NO_UNFOLD, // Prevent unfolding
- NO_STRATEGY // Make decision according to operands' sizes
-};
-
// Represents an entry in the manual mapped instructions set.
struct ManualMapEntry {
const char *RegInstStr;
const char *MemInstStr;
- UnfoldStrategy Strategy;
-
- ManualMapEntry(const char *RegInstStr, const char *MemInstStr,
- UnfoldStrategy Strategy = NO_STRATEGY)
- : RegInstStr(RegInstStr), MemInstStr(MemInstStr), Strategy(Strategy) {}
+ uint16_t Strategy;
};
// List of instructions requiring explicitly aligned memory.
@@ -50,36 +40,15 @@ const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD",
"PCMPESTRM", "PCMPESTRI",
"PCMPISTRM", "PCMPISTRI" };
-// For manually mapping instructions that do not match by their encoding.
const ManualMapEntry ManualMapSet[] = {
- { "ADD16ri_DB", "ADD16mi", NO_UNFOLD },
- { "ADD16ri8_DB", "ADD16mi8", NO_UNFOLD },
- { "ADD16rr_DB", "ADD16mr", NO_UNFOLD },
- { "ADD32ri_DB", "ADD32mi", NO_UNFOLD },
- { "ADD32ri8_DB", "ADD32mi8", NO_UNFOLD },
- { "ADD32rr_DB", "ADD32mr", NO_UNFOLD },
- { "ADD64ri32_DB", "ADD64mi32", NO_UNFOLD },
- { "ADD64ri8_DB", "ADD64mi8", NO_UNFOLD },
- { "ADD64rr_DB", "ADD64mr", NO_UNFOLD },
- { "ADD8ri_DB", "ADD8mi", NO_UNFOLD },
- { "ADD8rr_DB", "ADD8mr", NO_UNFOLD },
- { "ADD16rr_DB", "ADD16rm", NO_UNFOLD },
- { "ADD32rr_DB", "ADD32rm", NO_UNFOLD },
- { "ADD64rr_DB", "ADD64rm", NO_UNFOLD },
- { "ADD8rr_DB", "ADD8rm", NO_UNFOLD },
- { "MMX_MOVD64from64rr", "MMX_MOVQ64mr", UNFOLD },
- { "MMX_MOVD64grr", "MMX_MOVD64mr", UNFOLD },
- { "MOVLHPSrr", "MOVHPSrm", NO_UNFOLD },
- { "PUSH16r", "PUSH16rmm", UNFOLD },
- { "PUSH32r", "PUSH32rmm", UNFOLD },
- { "PUSH64r", "PUSH64rmm", UNFOLD },
- { "TAILJMPr", "TAILJMPm", UNFOLD },
- { "TAILJMPr64", "TAILJMPm64", UNFOLD },
- { "TAILJMPr64_REX", "TAILJMPm64_REX", UNFOLD },
- { "VMOVLHPSZrr", "VMOVHPSZ128rm", NO_UNFOLD },
- { "VMOVLHPSrr", "VMOVHPSrm", NO_UNFOLD },
+#define ENTRY(REG, MEM, FLAGS) {#REG, #MEM, FLAGS},
+#include "X86ManualFoldTables.def"
};
+const std::set<StringRef> NoFoldSet= {
+#define NOFOLD(INSN) #INSN,
+#include "X86ManualFoldTables.def"
+};
static bool isExplicitAlign(const CodeGenInstruction *Inst) {
return any_of(ExplicitAlign, [Inst](const char *InstStr) {
@@ -103,51 +72,76 @@ class X86FoldTablesEmitter {
const CodeGenInstruction *MemInst;
public:
- bool CannotUnfold = false;
- bool IsLoad = false;
- bool IsStore = false;
- bool IsAligned = false;
- unsigned int Alignment = 0;
+ bool NoReverse = false;
+ bool NoForward = false;
+ bool FoldLoad = false;
+ bool FoldStore = false;
+ Align Alignment;
+ X86FoldTableEntry() = default;
X86FoldTableEntry(const CodeGenInstruction *RegInst,
const CodeGenInstruction *MemInst)
: RegInst(RegInst), MemInst(MemInst) {}
void print(formatted_raw_ostream &OS) const {
OS.indent(2);
- OS << "{ X86::" << RegInst->TheDef->getName() << ",";
- OS.PadToColumn(40);
- OS << "X86::" << MemInst->TheDef->getName() << ",";
- OS.PadToColumn(75);
+ OS << "{X86::" << RegInst->TheDef->getName() << ", ";
+ OS << "X86::" << MemInst->TheDef->getName() << ", ";
std::string Attrs;
- if (IsLoad)
- Attrs += "TB_FOLDED_LOAD | ";
- if (IsStore)
- Attrs += "TB_FOLDED_STORE | ";
- if (CannotUnfold)
- Attrs += "TB_NO_REVERSE | ";
- if (IsAligned)
- Attrs += "TB_ALIGN_" + std::to_string(Alignment) + " | ";
-
- StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("| ");
+ if (FoldLoad)
+ Attrs += "TB_FOLDED_LOAD|";
+ if (FoldStore)
+ Attrs += "TB_FOLDED_STORE|";
+ if (NoReverse)
+ Attrs += "TB_NO_REVERSE|";
+ if (NoForward)
+ Attrs += "TB_NO_FORWARD|";
+ if (Alignment != Align(1))
+ Attrs += "TB_ALIGN_" + std::to_string(Alignment.value()) + "|";
+
+ StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("|");
if (SimplifiedAttrs.empty())
SimplifiedAttrs = "0";
- OS << SimplifiedAttrs << " },\n";
+ OS << SimplifiedAttrs << "},\n";
}
- bool operator<(const X86FoldTableEntry &RHS) const {
- bool LHSpseudo = RegInst->TheDef->getValueAsBit("isPseudo");
- bool RHSpseudo = RHS.RegInst->TheDef->getValueAsBit("isPseudo");
- if (LHSpseudo != RHSpseudo)
- return LHSpseudo;
+#ifndef NDEBUG
+ // Check that Uses and Defs are same after memory fold.
+ void checkCorrectness() const {
+ auto &RegInstRec = *RegInst->TheDef;
+ auto &MemInstRec = *MemInst->TheDef;
+ auto ListOfUsesReg = RegInstRec.getValueAsListOfDefs("Uses");
+ auto ListOfUsesMem = MemInstRec.getValueAsListOfDefs("Uses");
+ auto ListOfDefsReg = RegInstRec.getValueAsListOfDefs("Defs");
+ auto ListOfDefsMem = MemInstRec.getValueAsListOfDefs("Defs");
+ if (ListOfUsesReg != ListOfUsesMem || ListOfDefsReg != ListOfDefsMem)
+ report_fatal_error("Uses/Defs couldn't be changed after folding " +
+ RegInstRec.getName() + " to " +
+ MemInstRec.getName());
+ }
+#endif
+ };
- return RegInst->TheDef->getName() < RHS.RegInst->TheDef->getName();
+ // NOTE: We check the fold tables are sorted in X86InstrFoldTables.cpp by the enum of the
+ // instruction, which is computed in CodeGenTarget::ComputeInstrsByEnum. So we should
+ // use the same comparator here.
+ // FIXME: Could we share the code with CodeGenTarget::ComputeInstrsByEnum?
+ struct CompareInstrsByEnum {
+ bool operator()(const CodeGenInstruction *LHS,
+ const CodeGenInstruction *RHS) const {
+ assert(LHS && RHS && "LHS and RHS shouldn't be nullptr");
+ const auto &D1 = *LHS->TheDef;
+ const auto &D2 = *RHS->TheDef;
+ return std::make_tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) <
+ std::make_tuple(!D2.getValueAsBit("isPseudo"), D2.getName());
}
};
- typedef std::vector<X86FoldTableEntry> FoldTable;
+ typedef std::map<const CodeGenInstruction *, X86FoldTableEntry,
+ CompareInstrsByEnum>
+ FoldTable;
// std::vector for each folding table.
// Table2Addr - Holds instructions which their memory form performs load+store
// Table#i - Holds instructions which the their memory form perform a load OR
@@ -163,20 +157,20 @@ public:
X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}
// run - Generate the 6 X86 memory fold tables.
- void run(formatted_raw_ostream &OS);
+ void run(raw_ostream &OS);
private:
// Decides to which table to add the entry with the given instructions.
// S sets the strategy of adding the TB_NO_REVERSE flag.
void updateTables(const CodeGenInstruction *RegInstr,
- const CodeGenInstruction *MemInstr,
- const UnfoldStrategy S = NO_STRATEGY);
+ const CodeGenInstruction *MemInstr, uint16_t S = 0,
+ bool IsManual = false);
// Generates X86FoldTableEntry with the given instructions and fill it with
// the appropriate flags - then adds it to Table.
void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInstr,
- const CodeGenInstruction *MemInstr,
- const UnfoldStrategy S, const unsigned int FoldedInd);
+ const CodeGenInstruction *MemInstr, uint16_t S,
+ unsigned FoldedIdx, bool isManual);
// Print the given table as a static const C++ array of type
// X86MemoryFoldTableEntry.
@@ -185,8 +179,8 @@ private:
OS << "static const X86MemoryFoldTableEntry MemoryFold" << TableName
<< "[] = {\n";
- for (const X86FoldTableEntry &E : Table)
- E.print(OS);
+ for (auto &E : Table)
+ E.second.print(OS);
OS << "};\n\n";
}
@@ -206,76 +200,110 @@ static bool hasPtrTailcallRegClass(const CodeGenInstruction *Inst) {
});
}
-// Calculates the integer value representing the BitsInit object
-static inline uint64_t getValueFromBitsInit(const BitsInit *B) {
- assert(B->getNumBits() <= sizeof(uint64_t) * 8 && "BitInits' too long!");
+static uint8_t byteFromBitsInit(const BitsInit *B) {
+ unsigned N = B->getNumBits();
+ assert(N <= 8 && "Field is too large for uint8_t!");
- uint64_t Value = 0;
- for (unsigned i = 0, e = B->getNumBits(); i != e; ++i) {
- BitInit *Bit = cast<BitInit>(B->getBit(i));
- Value |= uint64_t(Bit->getValue()) << i;
+ uint8_t Value = 0;
+ for (unsigned I = 0; I != N; ++I) {
+ BitInit *Bit = cast<BitInit>(B->getBit(I));
+ Value |= Bit->getValue() << I;
}
return Value;
}
-// Return true if the instruction defined as a register flavor.
-static inline bool hasRegisterFormat(const Record *Inst) {
- const BitsInit *FormBits = Inst->getValueAsBitsInit("FormBits");
- uint64_t FormBitsNum = getValueFromBitsInit(FormBits);
-
- // Values from X86Local namespace defined in X86RecognizableInstr.cpp
- return FormBitsNum >= X86Local::MRMDestReg && FormBitsNum <= X86Local::MRM7r;
+static bool mayFoldFromForm(uint8_t Form) {
+ switch (Form) {
+ default:
+ return Form >= X86Local::MRM0r && Form <= X86Local::MRM7r;
+ case X86Local::MRMXr:
+ case X86Local::MRMXrCC:
+ case X86Local::MRMDestReg:
+ case X86Local::MRMSrcReg:
+ case X86Local::MRMSrcReg4VOp3:
+ case X86Local::MRMSrcRegOp4:
+ case X86Local::MRMSrcRegCC:
+ return true;
+ }
}
-// Return true if the instruction defined as a memory flavor.
-static inline bool hasMemoryFormat(const Record *Inst) {
- const BitsInit *FormBits = Inst->getValueAsBitsInit("FormBits");
- uint64_t FormBitsNum = getValueFromBitsInit(FormBits);
-
- // Values from X86Local namespace defined in X86RecognizableInstr.cpp
- return FormBitsNum >= X86Local::MRMDestMem && FormBitsNum <= X86Local::MRM7m;
+static bool mayFoldToForm(uint8_t Form) {
+ switch (Form) {
+ default:
+ return Form >= X86Local::MRM0m && Form <= X86Local::MRM7m;
+ case X86Local::MRMXm:
+ case X86Local::MRMXmCC:
+ case X86Local::MRMDestMem:
+ case X86Local::MRMSrcMem:
+ case X86Local::MRMSrcMem4VOp3:
+ case X86Local::MRMSrcMemOp4:
+ case X86Local::MRMSrcMemCC:
+ return true;
+ }
}
-static inline bool isNOREXRegClass(const Record *Op) {
- return Op->getName().contains("_NOREX");
+static bool mayFoldFromLeftToRight(uint8_t LHS, uint8_t RHS) {
+ switch (LHS) {
+ default:
+ llvm_unreachable("Unexpected Form!");
+ case X86Local::MRM0r:
+ return RHS == X86Local::MRM0m;
+ case X86Local::MRM1r:
+ return RHS == X86Local::MRM1m;
+ case X86Local::MRM2r:
+ return RHS == X86Local::MRM2m;
+ case X86Local::MRM3r:
+ return RHS == X86Local::MRM3m;
+ case X86Local::MRM4r:
+ return RHS == X86Local::MRM4m;
+ case X86Local::MRM5r:
+ return RHS == X86Local::MRM5m;
+ case X86Local::MRM6r:
+ return RHS == X86Local::MRM6m;
+ case X86Local::MRM7r:
+ return RHS == X86Local::MRM7m;
+ case X86Local::MRMXr:
+ return RHS == X86Local::MRMXm;
+ case X86Local::MRMXrCC:
+ return RHS == X86Local::MRMXmCC;
+ case X86Local::MRMDestReg:
+ return RHS == X86Local::MRMDestMem;
+ case X86Local::MRMSrcReg:
+ return RHS == X86Local::MRMSrcMem;
+ case X86Local::MRMSrcReg4VOp3:
+ return RHS == X86Local::MRMSrcMem4VOp3;
+ case X86Local::MRMSrcRegOp4:
+ return RHS == X86Local::MRMSrcMemOp4;
+ case X86Local::MRMSrcRegCC:
+ return RHS == X86Local::MRMSrcMemCC;
+ }
}
-// Get the alternative instruction pointed by "FoldGenRegForm" field.
-static inline const CodeGenInstruction *
-getAltRegInst(const CodeGenInstruction *I, const RecordKeeper &Records,
- const CodeGenTarget &Target) {
-
- StringRef AltRegInstStr = I->TheDef->getValueAsString("FoldGenRegForm");
- Record *AltRegInstRec = Records.getDef(AltRegInstStr);
- assert(AltRegInstRec &&
- "Alternative register form instruction def not found");
- CodeGenInstruction &AltRegInst = Target.getInstruction(AltRegInstRec);
- return &AltRegInst;
+static bool isNOREXRegClass(const Record *Op) {
+ return Op->getName().contains("_NOREX");
}
-// Function object - Operator() returns true if the given VEX instruction
-// matches the EVEX instruction of this object.
+// Function object - Operator() returns true if the given Reg instruction
+// matches the Mem instruction of this object.
class IsMatch {
const CodeGenInstruction *MemInst;
- unsigned Variant;
+ const X86Disassembler::RecognizableInstrBase MemRI;
+ const unsigned Variant;
public:
IsMatch(const CodeGenInstruction *Inst, unsigned V)
- : MemInst(Inst), Variant(V) {}
+ : MemInst(Inst), MemRI(*MemInst), Variant(V) {}
bool operator()(const CodeGenInstruction *RegInst) {
X86Disassembler::RecognizableInstrBase RegRI(*RegInst);
- X86Disassembler::RecognizableInstrBase MemRI(*MemInst);
const Record *RegRec = RegInst->TheDef;
const Record *MemRec = MemInst->TheDef;
// EVEX_B means different things for memory and register forms.
- if (RegRI.HasEVEX_B != 0 || MemRI.HasEVEX_B != 0)
+ if (RegRI.HasEVEX_B || MemRI.HasEVEX_B)
return false;
- // Instruction's format - The register form's "Form" field should be
- // the opposite of the memory form's "Form" field.
- if (!areOppositeForms(RegRI.Form, MemRI.Form))
+ if (!mayFoldFromLeftToRight(RegRI.Form, MemRI.Form))
return false;
// X86 encoding is crazy, e.g
@@ -288,38 +316,32 @@ public:
X86Disassembler::getMnemonic(RegInst, Variant))
return false;
- // Return false if one (at least) of the encoding fields of both
- // instructions do not match.
- if (RegRI.Encoding != MemRI.Encoding || RegRI.Opcode != MemRI.Opcode ||
- RegRI.OpPrefix != MemRI.OpPrefix || RegRI.OpMap != MemRI.OpMap ||
- RegRI.OpSize != MemRI.OpSize || RegRI.AdSize != MemRI.AdSize ||
- RegRI.HasREX_W != MemRI.HasREX_W ||
- RegRI.HasVEX_4V != MemRI.HasVEX_4V ||
- RegRI.HasVEX_L != MemRI.HasVEX_L ||
- RegRI.HasVEX_W != MemRI.HasVEX_W ||
- RegRI.IgnoresVEX_L != MemRI.IgnoresVEX_L ||
- RegRI.IgnoresVEX_W != MemRI.IgnoresVEX_W ||
- RegRI.HasEVEX_K != MemRI.HasEVEX_K ||
- RegRI.HasEVEX_KZ != MemRI.HasEVEX_KZ ||
- RegRI.HasEVEX_L2 != MemRI.HasEVEX_L2 ||
- RegRec->getValueAsBit("hasEVEX_RC") !=
- MemRec->getValueAsBit("hasEVEX_RC") ||
- RegRec->getValueAsBit("hasLockPrefix") !=
- MemRec->getValueAsBit("hasLockPrefix") ||
- RegRec->getValueAsBit("hasNoTrackPrefix") !=
- MemRec->getValueAsBit("hasNoTrackPrefix") ||
- RegRec->getValueAsBit("EVEX_W1_VEX_W0") !=
- MemRec->getValueAsBit("EVEX_W1_VEX_W0"))
+ // Return false if any of the following fields of does not match.
+ if (std::make_tuple(RegRI.Encoding, RegRI.Opcode, RegRI.OpPrefix,
+ RegRI.OpMap, RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W,
+ RegRI.HasVEX_4V, RegRI.HasVEX_L, RegRI.IgnoresVEX_L,
+ RegRI.IgnoresW, RegRI.HasEVEX_K, RegRI.HasEVEX_KZ,
+ RegRI.HasEVEX_L2, RegRec->getValueAsBit("hasEVEX_RC"),
+ RegRec->getValueAsBit("hasLockPrefix"),
+ RegRec->getValueAsBit("hasNoTrackPrefix"),
+ RegRec->getValueAsBit("EVEX_W1_VEX_W0")) !=
+ std::make_tuple(MemRI.Encoding, MemRI.Opcode, MemRI.OpPrefix,
+ MemRI.OpMap, MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W,
+ MemRI.HasVEX_4V, MemRI.HasVEX_L, MemRI.IgnoresVEX_L,
+ MemRI.IgnoresW, MemRI.HasEVEX_K, MemRI.HasEVEX_KZ,
+ MemRI.HasEVEX_L2, MemRec->getValueAsBit("hasEVEX_RC"),
+ MemRec->getValueAsBit("hasLockPrefix"),
+ MemRec->getValueAsBit("hasNoTrackPrefix"),
+ MemRec->getValueAsBit("EVEX_W1_VEX_W0")))
return false;
// Make sure the sizes of the operands of both instructions suit each other.
// This is needed for instructions with intrinsic version (_Int).
// Where the only difference is the size of the operands.
- // For example: VUCOMISDZrm and Int_VUCOMISDrm
+ // For example: VUCOMISDZrm and VUCOMISDrm_Int
// Also for instructions that their EVEX version was upgraded to work with
// k-registers. For example VPCMPEQBrm (xmm output register) and
// VPCMPEQBZ128rm (k register output register).
- bool ArgFolded = false;
unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs();
unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs();
unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();
@@ -330,59 +352,36 @@ public:
unsigned RegStartIdx =
(MemOutSize + 1 == RegOutSize) && (MemInSize == RegInSize) ? 1 : 0;
- for (unsigned i = 0, e = MemInst->Operands.size(); i < e; i++) {
- Record *MemOpRec = MemInst->Operands[i].Rec;
- Record *RegOpRec = RegInst->Operands[i + RegStartIdx].Rec;
+ bool FoundFoldedOp = false;
+ for (unsigned I = 0, E = MemInst->Operands.size(); I != E; I++) {
+ Record *MemOpRec = MemInst->Operands[I].Rec;
+ Record *RegOpRec = RegInst->Operands[I + RegStartIdx].Rec;
if (MemOpRec == RegOpRec)
continue;
- if (isRegisterOperand(MemOpRec) && isRegisterOperand(RegOpRec)) {
- if (getRegOperandSize(MemOpRec) != getRegOperandSize(RegOpRec) ||
- isNOREXRegClass(MemOpRec) != isNOREXRegClass(RegOpRec))
- return false;
- } else if (isMemoryOperand(MemOpRec) && isMemoryOperand(RegOpRec)) {
- if (getMemOperandSize(MemOpRec) != getMemOperandSize(RegOpRec))
- return false;
- } else if (isImmediateOperand(MemOpRec) && isImmediateOperand(RegOpRec)) {
- if (MemOpRec->getValueAsDef("Type") != RegOpRec->getValueAsDef("Type"))
- return false;
- } else {
- // Only one operand can be folded.
- if (ArgFolded)
- return false;
-
- assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec));
- ArgFolded = true;
- }
- }
+ if (isRegisterOperand(MemOpRec) && isRegisterOperand(RegOpRec) &&
+ ((getRegOperandSize(MemOpRec) != getRegOperandSize(RegOpRec)) ||
+ (isNOREXRegClass(MemOpRec) != isNOREXRegClass(RegOpRec))))
+ return false;
- return true;
- }
+ if (isMemoryOperand(MemOpRec) && isMemoryOperand(RegOpRec) &&
+ (getMemOperandSize(MemOpRec) != getMemOperandSize(RegOpRec)))
+ return false;
-private:
- // Return true of the 2 given forms are the opposite of each other.
- bool areOppositeForms(unsigned RegForm, unsigned MemForm) {
- if ((MemForm == X86Local::MRM0m && RegForm == X86Local::MRM0r) ||
- (MemForm == X86Local::MRM1m && RegForm == X86Local::MRM1r) ||
- (MemForm == X86Local::MRM2m && RegForm == X86Local::MRM2r) ||
- (MemForm == X86Local::MRM3m && RegForm == X86Local::MRM3r) ||
- (MemForm == X86Local::MRM4m && RegForm == X86Local::MRM4r) ||
- (MemForm == X86Local::MRM5m && RegForm == X86Local::MRM5r) ||
- (MemForm == X86Local::MRM6m && RegForm == X86Local::MRM6r) ||
- (MemForm == X86Local::MRM7m && RegForm == X86Local::MRM7r) ||
- (MemForm == X86Local::MRMXm && RegForm == X86Local::MRMXr) ||
- (MemForm == X86Local::MRMXmCC && RegForm == X86Local::MRMXrCC) ||
- (MemForm == X86Local::MRMDestMem && RegForm == X86Local::MRMDestReg) ||
- (MemForm == X86Local::MRMSrcMem && RegForm == X86Local::MRMSrcReg) ||
- (MemForm == X86Local::MRMSrcMem4VOp3 &&
- RegForm == X86Local::MRMSrcReg4VOp3) ||
- (MemForm == X86Local::MRMSrcMemOp4 &&
- RegForm == X86Local::MRMSrcRegOp4) ||
- (MemForm == X86Local::MRMSrcMemCC && RegForm == X86Local::MRMSrcRegCC))
- return true;
-
- return false;
+ if (isImmediateOperand(MemOpRec) && isImmediateOperand(RegOpRec) &&
+ (MemOpRec->getValueAsDef("Type") != RegOpRec->getValueAsDef("Type")))
+ return false;
+
+ // Only one operand can be folded.
+ if (FoundFoldedOp)
+ return false;
+
+ assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec));
+ FoundFoldedOp = true;
+ }
+
+ return FoundFoldedOp;
}
};
@@ -391,13 +390,23 @@ private:
void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,
const CodeGenInstruction *RegInstr,
const CodeGenInstruction *MemInstr,
- const UnfoldStrategy S,
- const unsigned int FoldedInd) {
+ uint16_t S, unsigned FoldedIdx,
+ bool isManual) {
X86FoldTableEntry Result = X86FoldTableEntry(RegInstr, MemInstr);
Record *RegRec = RegInstr->TheDef;
Record *MemRec = MemInstr->TheDef;
+ if (isManual) {
+ Result.NoReverse = S & TB_NO_REVERSE;
+ Result.NoForward = S & TB_NO_FORWARD;
+ Result.FoldLoad = S & TB_FOLDED_LOAD;
+ Result.FoldStore = S & TB_FOLDED_STORE;
+ Result.Alignment = Align(1ULL << ((S & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT));
+ Table[RegInstr] = Result;
+ return;
+ }
+
// Only table0 entries should explicitly specify a load or store flag.
if (&Table == &Table0) {
unsigned MemInOpsNum = MemRec->getValueAsDag("InOperandList")->getNumArgs();
@@ -408,48 +417,62 @@ void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,
// If the instruction reads from the folded operand, it well appear as in
// input in both forms.
if (MemInOpsNum == RegInOpsNum)
- Result.IsLoad = true;
+ Result.FoldLoad = true;
else
- Result.IsStore = true;
+ Result.FoldStore = true;
}
- Record *RegOpRec = RegInstr->Operands[FoldedInd].Rec;
- Record *MemOpRec = MemInstr->Operands[FoldedInd].Rec;
+ Record *RegOpRec = RegInstr->Operands[FoldedIdx].Rec;
+ Record *MemOpRec = MemInstr->Operands[FoldedIdx].Rec;
// Unfolding code generates a load/store instruction according to the size of
// the register in the register form instruction.
// If the register's size is greater than the memory's operand size, do not
// allow unfolding.
- if (S == UNFOLD)
- Result.CannotUnfold = false;
- else if (S == NO_UNFOLD)
- Result.CannotUnfold = true;
- else if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec))
- Result.CannotUnfold = true; // S == NO_STRATEGY
-
- uint64_t Enc = getValueFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits"));
+
+ // the unfolded load size will be based on the register size. If that’s bigger
+ // than the memory operand size, the unfolded load will load more memory and
+ // potentially cause a memory fault.
+ if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec))
+ Result.NoReverse = true;
+
+ // Check no-kz version's isMoveReg
+ StringRef RegInstName = RegRec->getName();
+ unsigned DropLen =
+ RegInstName.endswith("rkz") ? 2 : (RegInstName.endswith("rk") ? 1 : 0);
+ Record *BaseDef =
+ DropLen ? Records.getDef(RegInstName.drop_back(DropLen)) : nullptr;
+ bool IsMoveReg =
+ BaseDef ? Target.getInstruction(BaseDef).isMoveReg : RegInstr->isMoveReg;
+ // A masked load can not be unfolded to a full load, otherwise it would access
+ // unexpected memory. A simple store can not be unfolded.
+ if (IsMoveReg && (BaseDef || Result.FoldStore))
+ Result.NoReverse = true;
+
+ uint8_t Enc = byteFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits"));
if (isExplicitAlign(RegInstr)) {
// The instruction require explicitly aligned memory.
BitsInit *VectSize = RegRec->getValueAsBitsInit("VectSize");
- uint64_t Value = getValueFromBitsInit(VectSize);
- Result.IsAligned = true;
- Result.Alignment = Value;
- } else if (Enc != X86Local::XOP && Enc != X86Local::VEX &&
- Enc != X86Local::EVEX) {
- // Instructions with VEX encoding do not require alignment.
- if (!isExplicitUnalign(RegInstr) && getMemOperandSize(MemOpRec) > 64) {
- // SSE packed vector instructions require a 16 byte alignment.
- Result.IsAligned = true;
- Result.Alignment = 16;
- }
+ Result.Alignment = Align(byteFromBitsInit(VectSize));
+ } else if (!Enc && !isExplicitUnalign(RegInstr) &&
+ getMemOperandSize(MemOpRec) > 64) {
+ // Instructions with XOP/VEX/EVEX encoding do not require alignment while
+ // SSE packed vector instructions require a 16 byte alignment.
+ Result.Alignment = Align(16);
}
-
- Table.push_back(Result);
+ // Expand is only ever created as a masked instruction. It is not safe to
+ // unfold a masked expand because we don't know if it came from an expand load
+ // intrinsic or folding a plain load. If it is from a expand load intrinsic,
+ // Unfolding to plain load would read more elements and could trigger a fault.
+ if (RegRec->getName().contains("EXPAND"))
+ Result.NoReverse = true;
+
+ Table[RegInstr] = Result;
}
void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
const CodeGenInstruction *MemInstr,
- const UnfoldStrategy S) {
+ uint16_t S, bool IsManual) {
Record *RegRec = RegInstr->TheDef;
Record *MemRec = MemInstr->TheDef;
@@ -459,8 +482,8 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();
// Instructions which Read-Modify-Write should be added to Table2Addr.
- if (MemOutSize != RegOutSize && MemInSize == RegInSize) {
- addEntryWithFlags(Table2Addr, RegInstr, MemInstr, S, 0);
+ if (!MemOutSize && RegOutSize == 1 && MemInSize == RegInSize) {
+ addEntryWithFlags(Table2Addr, RegInstr, MemInstr, S, 0, IsManual);
return;
}
@@ -477,19 +500,19 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
isMemoryOperand(MemOpRec)) {
switch (i) {
case 0:
- addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0);
+ addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0, IsManual);
return;
case 1:
- addEntryWithFlags(Table1, RegInstr, MemInstr, S, 1);
+ addEntryWithFlags(Table1, RegInstr, MemInstr, S, 1, IsManual);
return;
case 2:
- addEntryWithFlags(Table2, RegInstr, MemInstr, S, 2);
+ addEntryWithFlags(Table2, RegInstr, MemInstr, S, 2, IsManual);
return;
case 3:
- addEntryWithFlags(Table3, RegInstr, MemInstr, S, 3);
+ addEntryWithFlags(Table3, RegInstr, MemInstr, S, 3, IsManual);
return;
case 4:
- addEntryWithFlags(Table4, RegInstr, MemInstr, S, 4);
+ addEntryWithFlags(Table4, RegInstr, MemInstr, S, 4, IsManual);
return;
}
}
@@ -506,12 +529,12 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr,
Record *MemOpRec = MemInstr->Operands[RegOutSize - 1].Rec;
if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec) &&
getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec))
- addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0);
+ addEntryWithFlags(Table0, RegInstr, MemInstr, S, 0, IsManual);
}
}
-void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
- emitSourceFileHeader("X86 fold tables", OS);
+void X86FoldTablesEmitter::run(raw_ostream &o) {
+ formatted_raw_ostream OS(o);
// Holds all memory instructions
std::vector<const CodeGenInstruction *> MemInsts;
@@ -526,7 +549,9 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
if (!Rec->isSubClassOf("X86Inst") || Rec->getValueAsBit("isAsmParserOnly"))
continue;
- // - Do not proceed if the instruction is marked as notMemoryFoldable.
+ if (NoFoldSet.find(Rec->getName()) != NoFoldSet.end())
+ continue;
+
// - Instructions including RST register class operands are not relevant
// for memory folding (for further details check the explanation in
// lib/Target/X86/X86InstrFPStack.td file).
@@ -534,17 +559,18 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
// class ptr_rc_tailcall, which can be of a size 32 or 64, to ensure
// safe mapping of these instruction we manually map them and exclude
// them from the automation.
- if (Rec->getValueAsBit("isMemoryFoldable") == false ||
- hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst))
+ if (hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst))
continue;
// Add all the memory form instructions to MemInsts, and all the register
- // form instructions to RegInsts[Opc], where Opc in the opcode of each
+ // form instructions to RegInsts[Opc], where Opc is the opcode of each
// instructions. this helps reducing the runtime of the backend.
- if (hasMemoryFormat(Rec))
+ const BitsInit *FormBits = Rec->getValueAsBitsInit("FormBits");
+ uint8_t Form = byteFromBitsInit(FormBits);
+ if (mayFoldToForm(Form))
MemInsts.push_back(Inst);
- else if (hasRegisterFormat(Rec)) {
- uint8_t Opc = getValueFromBitsInit(Rec->getValueAsBitsInit("Opcode"));
+ else if (mayFoldFromForm(Form)) {
+ uint8_t Opc = byteFromBitsInit(Rec->getValueAsBitsInit("Opcode"));
RegInsts[Opc].push_back(Inst);
}
}
@@ -555,7 +581,7 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
// instruction.
for (const CodeGenInstruction *MemInst : MemInsts) {
uint8_t Opc =
- getValueFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode"));
+ byteFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode"));
auto RegInstsIt = RegInsts.find(Opc);
if (RegInstsIt == RegInsts.end())
@@ -569,16 +595,13 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
auto Match = find_if(OpcRegInsts, IsMatch(MemInst, Variant));
if (Match != OpcRegInsts.end()) {
const CodeGenInstruction *RegInst = *Match;
- // If the matched instruction has it's "FoldGenRegForm" set, map the
- // memory form instruction to the register form instruction pointed by
- // this field
- if (RegInst->TheDef->isValueUnset("FoldGenRegForm")) {
- updateTables(RegInst, MemInst);
- } else {
- const CodeGenInstruction *AltRegInst =
- getAltRegInst(RegInst, Records, Target);
- updateTables(AltRegInst, MemInst);
+ StringRef RegInstName = RegInst->TheDef->getName();
+ if (RegInstName.endswith("_REV") || RegInstName.endswith("_alt")) {
+ if (auto *RegAltRec = Records.getDef(RegInstName.drop_back(4))) {
+ RegInst = &Target.getInstruction(RegAltRec);
+ }
}
+ updateTables(RegInst, MemInst);
OpcRegInsts.erase(Match);
}
}
@@ -589,17 +612,23 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
Record *MemInstIter = Records.getDef(Entry.MemInstStr);
updateTables(&(Target.getInstruction(RegInstIter)),
- &(Target.getInstruction(MemInstIter)), Entry.Strategy);
+ &(Target.getInstruction(MemInstIter)), Entry.Strategy, true);
}
- // Sort the tables before printing.
- llvm::sort(Table2Addr);
- llvm::sort(Table0);
- llvm::sort(Table1);
- llvm::sort(Table2);
- llvm::sort(Table3);
- llvm::sort(Table4);
-
+#ifndef NDEBUG
+ auto CheckMemFoldTable = [](const FoldTable &Table) -> void {
+ for (const auto &Record : Table) {
+ auto &FoldEntry = Record.second;
+ FoldEntry.checkCorrectness();
+ }
+ };
+ CheckMemFoldTable(Table2Addr);
+ CheckMemFoldTable(Table0);
+ CheckMemFoldTable(Table1);
+ CheckMemFoldTable(Table2);
+ CheckMemFoldTable(Table3);
+ CheckMemFoldTable(Table4);
+#endif
// Print all tables.
printTable(Table2Addr, "Table2Addr", OS);
printTable(Table0, "Table0", OS);
@@ -609,10 +638,5 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) {
printTable(Table4, "Table4", OS);
}
-namespace llvm {
-
-void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &o) {
- formatted_raw_ostream OS(o);
- X86FoldTablesEmitter(RK).run(OS);
-}
-} // namespace llvm
+static TableGen::Emitter::OptClass<X86FoldTablesEmitter>
+ X("gen-x86-fold-tables", "Generate X86 fold tables");
diff --git a/llvm/utils/TableGen/X86ManualFoldTables.def b/llvm/utils/TableGen/X86ManualFoldTables.def
new file mode 100644
index 000000000000..d949830b0988
--- /dev/null
+++ b/llvm/utils/TableGen/X86ManualFoldTables.def
@@ -0,0 +1,288 @@
+//===- X86ManualFoldTables.def ----------------------------*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// \file
+// This file defines all the entries in X86 memory folding tables that need
+// special handling.
+//===----------------------------------------------------------------------===//
+
+#ifndef NOFOLD
+#define NOFOLD(INSN)
+#endif
+NOFOLD(BTC16rr)
+NOFOLD(BTC32rr)
+NOFOLD(BTC64rr)
+NOFOLD(BTR16rr)
+NOFOLD(BTR32rr)
+NOFOLD(BTR64rr)
+NOFOLD(BTS16rr)
+NOFOLD(BTS32rr)
+NOFOLD(BTS64rr)
+NOFOLD(VCOMPRESSPDZ128rrk)
+NOFOLD(VCOMPRESSPDZ256rrk)
+NOFOLD(VCOMPRESSPDZrrk)
+NOFOLD(VCOMPRESSPSZ128rrk)
+NOFOLD(VCOMPRESSPSZ256rrk)
+NOFOLD(VCOMPRESSPSZrrk)
+NOFOLD(VCVTPS2PHZ128rrk)
+NOFOLD(VCVTPS2PHZ256rrk)
+NOFOLD(VCVTPS2PHZrrk)
+NOFOLD(VEXTRACTF32x4Z256rrk)
+NOFOLD(VEXTRACTF32x4Zrrk)
+NOFOLD(VEXTRACTF32x8Zrrk)
+NOFOLD(VEXTRACTF64x2Z256rrk)
+NOFOLD(VEXTRACTF64x2Zrrk)
+NOFOLD(VEXTRACTF64x4Zrrk)
+NOFOLD(VEXTRACTI32x4Z256rrk)
+NOFOLD(VEXTRACTI32x4Zrrk)
+NOFOLD(VEXTRACTI32x8Zrrk)
+NOFOLD(VEXTRACTI64x2Z256rrk)
+NOFOLD(VEXTRACTI64x2Zrrk)
+NOFOLD(VEXTRACTI64x4Zrrk)
+NOFOLD(VMOVAPDZ128mrk)
+NOFOLD(VMOVAPDZ256mrk)
+NOFOLD(VMOVAPDZmrk)
+NOFOLD(VMOVAPSZ128mrk)
+NOFOLD(VMOVAPSZ256mrk)
+NOFOLD(VMOVAPSZmrk)
+NOFOLD(VMOVDQA32Z128mrk)
+NOFOLD(VMOVDQA32Z256mrk)
+NOFOLD(VMOVDQA32Zmrk)
+NOFOLD(VMOVDQA64Z128mrk)
+NOFOLD(VMOVDQA64Z256mrk)
+NOFOLD(VMOVDQA64Zmrk)
+NOFOLD(VMOVDQU16Z128mrk)
+NOFOLD(VMOVDQU16Z256mrk)
+NOFOLD(VMOVDQU16Zmrk)
+NOFOLD(VMOVDQU32Z128mrk)
+NOFOLD(VMOVDQU32Z256mrk)
+NOFOLD(VMOVDQU32Zmrk)
+NOFOLD(VMOVDQU64Z128mrk)
+NOFOLD(VMOVDQU64Z256mrk)
+NOFOLD(VMOVDQU64Zmrk)
+NOFOLD(VMOVDQU8Z128mrk)
+NOFOLD(VMOVDQU8Z256mrk)
+NOFOLD(VMOVDQU8Zmrk)
+NOFOLD(VMOVUPDZ128mrk)
+NOFOLD(VMOVUPDZ256mrk)
+NOFOLD(VMOVUPDZmrk)
+NOFOLD(VMOVUPSZ128mrk)
+NOFOLD(VMOVUPSZ256mrk)
+NOFOLD(VMOVUPSZmrk)
+NOFOLD(VPCOMPRESSBZ128rrk)
+NOFOLD(VPCOMPRESSBZ256rrk)
+NOFOLD(VPCOMPRESSBZrrk)
+NOFOLD(VPCOMPRESSDZ128rrk)
+NOFOLD(VPCOMPRESSDZ256rrk)
+NOFOLD(VPCOMPRESSDZrrk)
+NOFOLD(VPCOMPRESSQZ128rrk)
+NOFOLD(VPCOMPRESSQZ256rrk)
+NOFOLD(VPCOMPRESSQZrrk)
+NOFOLD(VPCOMPRESSWZ128rrk)
+NOFOLD(VPCOMPRESSWZ256rrk)
+NOFOLD(VPCOMPRESSWZrrk)
+NOFOLD(VPMOVDBZ128rrk)
+NOFOLD(VPMOVDBZ256rrk)
+NOFOLD(VPMOVDBZrrk)
+NOFOLD(VPMOVDWZ128rrk)
+NOFOLD(VPMOVDWZ256rrk)
+NOFOLD(VPMOVDWZrrk)
+NOFOLD(VPMOVQBZ128rrk)
+NOFOLD(VPMOVQBZ256rrk)
+NOFOLD(VPMOVQBZrrk)
+NOFOLD(VPMOVQDZ128rrk)
+NOFOLD(VPMOVQDZ256rrk)
+NOFOLD(VPMOVQDZrrk)
+NOFOLD(VPMOVQWZ128rrk)
+NOFOLD(VPMOVQWZ256rrk)
+NOFOLD(VPMOVQWZrrk)
+NOFOLD(VPMOVSDBZ128rrk)
+NOFOLD(VPMOVSDBZ256rrk)
+NOFOLD(VPMOVSDBZrrk)
+NOFOLD(VPMOVSDWZ128rrk)
+NOFOLD(VPMOVSDWZ256rrk)
+NOFOLD(VPMOVSDWZrrk)
+NOFOLD(VPMOVSQBZ128rrk)
+NOFOLD(VPMOVSQBZ256rrk)
+NOFOLD(VPMOVSQBZrrk)
+NOFOLD(VPMOVSQDZ128rrk)
+NOFOLD(VPMOVSQDZ256rrk)
+NOFOLD(VPMOVSQDZrrk)
+NOFOLD(VPMOVSQWZ128rrk)
+NOFOLD(VPMOVSQWZ256rrk)
+NOFOLD(VPMOVSQWZrrk)
+NOFOLD(VPMOVSWBZ128rrk)
+NOFOLD(VPMOVSWBZ256rrk)
+NOFOLD(VPMOVSWBZrrk)
+NOFOLD(VPMOVUSDBZ128rrk)
+NOFOLD(VPMOVUSDBZ256rrk)
+NOFOLD(VPMOVUSDBZrrk)
+NOFOLD(VPMOVUSDWZ128rrk)
+NOFOLD(VPMOVUSDWZ256rrk)
+NOFOLD(VPMOVUSDWZrrk)
+NOFOLD(VPMOVUSQBZ128rrk)
+NOFOLD(VPMOVUSQBZ256rrk)
+NOFOLD(VPMOVUSQBZrrk)
+NOFOLD(VPMOVUSQDZ128rrk)
+NOFOLD(VPMOVUSQDZ256rrk)
+NOFOLD(VPMOVUSQDZrrk)
+NOFOLD(VPMOVUSQWZ128rrk)
+NOFOLD(VPMOVUSQWZ256rrk)
+NOFOLD(VPMOVUSQWZrrk)
+NOFOLD(VPMOVUSWBZ128rrk)
+NOFOLD(VPMOVUSWBZ256rrk)
+NOFOLD(VPMOVUSWBZrrk)
+NOFOLD(VPMOVWBZ128rrk)
+NOFOLD(VPMOVWBZ256rrk)
+NOFOLD(VPMOVWBZrrk)
+NOFOLD(ARPL16rr)
+NOFOLD(BT16rr)
+NOFOLD(BT32rr)
+NOFOLD(BT64rr)
+NOFOLD(CMPXCHG16rr)
+NOFOLD(CMPXCHG32rr)
+NOFOLD(CMPXCHG64rr)
+NOFOLD(CMPXCHG8rr)
+NOFOLD(LLDT16r)
+NOFOLD(LMSW16r)
+NOFOLD(LTRr)
+NOFOLD(NOOPLr)
+NOFOLD(NOOPQr)
+NOFOLD(NOOPWr)
+NOFOLD(POP16rmr)
+NOFOLD(POP32rmr)
+NOFOLD(POP64rmr)
+NOFOLD(PUSH16rmr)
+NOFOLD(PUSH32rmr)
+NOFOLD(PUSH64rmr)
+NOFOLD(VCOMPRESSPDZ128rr)
+NOFOLD(VCOMPRESSPDZ256rr)
+NOFOLD(VCOMPRESSPDZrr)
+NOFOLD(VCOMPRESSPSZ128rr)
+NOFOLD(VCOMPRESSPSZ256rr)
+NOFOLD(VCOMPRESSPSZrr)
+NOFOLD(VERRr)
+NOFOLD(VERWr)
+NOFOLD(VMREAD32rr)
+NOFOLD(VMREAD64rr)
+NOFOLD(VPCOMPRESSBZ128rr)
+NOFOLD(VPCOMPRESSBZ256rr)
+NOFOLD(VPCOMPRESSBZrr)
+NOFOLD(VPCOMPRESSDZ128rr)
+NOFOLD(VPCOMPRESSDZ256rr)
+NOFOLD(VPCOMPRESSDZrr)
+NOFOLD(VPCOMPRESSQZ128rr)
+NOFOLD(VPCOMPRESSQZ256rr)
+NOFOLD(VPCOMPRESSQZrr)
+NOFOLD(VPCOMPRESSWZ128rr)
+NOFOLD(VPCOMPRESSWZ256rr)
+NOFOLD(VPCOMPRESSWZrr)
+NOFOLD(LAR16rr)
+NOFOLD(LAR32rr)
+NOFOLD(LAR64rr)
+NOFOLD(LSL16rr)
+NOFOLD(LSL32rr)
+NOFOLD(LSL64rr)
+NOFOLD(MOVSX16rr16)
+NOFOLD(MOVZX16rr16)
+NOFOLD(VMWRITE32rr)
+NOFOLD(VMWRITE64rr)
+NOFOLD(VBLENDMPDZ128rrkz)
+NOFOLD(VBLENDMPDZ256rrkz)
+NOFOLD(VBLENDMPDZrrkz)
+NOFOLD(VBLENDMPSZ128rrkz)
+NOFOLD(VBLENDMPSZ256rrkz)
+NOFOLD(VBLENDMPSZrrkz)
+NOFOLD(VPBLENDMBZ128rrkz)
+NOFOLD(VPBLENDMBZ256rrkz)
+NOFOLD(VPBLENDMBZrrkz)
+NOFOLD(VPBLENDMDZ128rrkz)
+NOFOLD(VPBLENDMDZ256rrkz)
+NOFOLD(VPBLENDMDZrrkz)
+NOFOLD(VPBLENDMQZ128rrkz)
+NOFOLD(VPBLENDMQZ256rrkz)
+NOFOLD(VPBLENDMQZrrkz)
+NOFOLD(VPBLENDMWZ128rrkz)
+NOFOLD(VPBLENDMWZ256rrkz)
+NOFOLD(VPBLENDMWZrrkz)
+NOFOLD(UD1Lr)
+NOFOLD(UD1Qr)
+NOFOLD(UD1Wr)
+// Exclude these two b/c they would conflict with {MMX_MOVD64from64rr, MMX_MOVQ64mr} in unfolding table
+NOFOLD(MMX_MOVQ64rr)
+NOFOLD(MMX_MOVQ64rr_REV)
+// INSERTPSrm has no count_s while INSERTPSrr has count_s.
+// count_s is to indicate which element in dst vector is inserted.
+// if count_s!=0, we can not fold INSERTPSrr into INSERTPSrm
+//
+// the following folding can happen when count_s==0
+// load xmm0, m32
+// insertpsrr xmm1, xmm0, imm
+// =>
+// insertpsrm xmm1, m32, imm
+NOFOLD(INSERTPSrr)
+#undef NOFOLD
+
+#ifndef ENTRY
+#define ENTRY(REG, MEM, FLAGS)
+#endif
+// The following entries are added manually b/c the encodings of reg form does not match the
+// encoding of memory form
+ENTRY(ADD16ri_DB, ADD16mi, TB_NO_REVERSE)
+ENTRY(ADD16rr_DB, ADD16mr, TB_NO_REVERSE)
+ENTRY(ADD32ri_DB, ADD32mi, TB_NO_REVERSE)
+ENTRY(ADD32rr_DB, ADD32mr, TB_NO_REVERSE)
+ENTRY(ADD64ri32_DB, ADD64mi32, TB_NO_REVERSE)
+ENTRY(ADD64rr_DB, ADD64mr, TB_NO_REVERSE)
+ENTRY(ADD8ri_DB, ADD8mi, TB_NO_REVERSE)
+ENTRY(ADD8rr_DB, ADD8mr, TB_NO_REVERSE)
+ENTRY(ADD16rr_DB, ADD16rm, TB_NO_REVERSE)
+ENTRY(ADD32rr_DB, ADD32rm, TB_NO_REVERSE)
+ENTRY(ADD64rr_DB, ADD64rm, TB_NO_REVERSE)
+ENTRY(ADD8rr_DB, ADD8rm, TB_NO_REVERSE)
+ENTRY(MMX_MOVD64from64rr, MMX_MOVQ64mr, TB_FOLDED_STORE)
+ENTRY(MMX_MOVD64grr, MMX_MOVD64mr, TB_FOLDED_STORE)
+ENTRY(MOV64toSDrr, MOV64mr, TB_FOLDED_STORE | TB_NO_REVERSE)
+ENTRY(MOVDI2SSrr, MOV32mr, TB_FOLDED_STORE | TB_NO_REVERSE)
+ENTRY(MOVPQIto64rr, MOVPQI2QImr, TB_FOLDED_STORE | TB_NO_REVERSE)
+ENTRY(MOVSDto64rr, MOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE)
+ENTRY(MOVSS2DIrr, MOVSSmr, TB_FOLDED_STORE)
+ENTRY(MOVLHPSrr, MOVHPSrm, TB_NO_REVERSE)
+ENTRY(PUSH16r, PUSH16rmm, TB_FOLDED_LOAD)
+ENTRY(PUSH32r, PUSH32rmm, TB_FOLDED_LOAD)
+ENTRY(PUSH64r, PUSH64rmm, TB_FOLDED_LOAD)
+ENTRY(TAILJMPr, TAILJMPm, TB_FOLDED_LOAD)
+ENTRY(TAILJMPr64, TAILJMPm64, TB_FOLDED_LOAD)
+ENTRY(TAILJMPr64_REX, TAILJMPm64_REX, TB_FOLDED_LOAD)
+ENTRY(TCRETURNri, TCRETURNmi, TB_FOLDED_LOAD | TB_NO_FORWARD)
+ENTRY(TCRETURNri64, TCRETURNmi64, TB_FOLDED_LOAD | TB_NO_FORWARD)
+ENTRY(VMOVLHPSZrr, VMOVHPSZ128rm, TB_NO_REVERSE)
+ENTRY(VMOVLHPSrr, VMOVHPSrm, TB_NO_REVERSE)
+ENTRY(VMOV64toSDZrr, MOV64mr, TB_FOLDED_STORE | TB_NO_REVERSE)
+ENTRY(VMOV64toSDrr, MOV64mr, TB_FOLDED_STORE | TB_NO_REVERSE)
+ENTRY(VMOVDI2SSZrr, MOV32mr, TB_FOLDED_STORE | TB_NO_REVERSE)
+ENTRY(VMOVDI2SSrr, MOV32mr, TB_FOLDED_STORE | TB_NO_REVERSE)
+ENTRY(VMOVPQIto64Zrr, VMOVPQI2QIZmr, TB_FOLDED_STORE | TB_NO_REVERSE)
+ENTRY(VMOVPQIto64rr, VMOVPQI2QImr, TB_FOLDED_STORE | TB_NO_REVERSE)
+ENTRY(VMOVSDto64Zrr, VMOVSDZmr, TB_FOLDED_STORE | TB_NO_REVERSE)
+ENTRY(VMOVSDto64rr, VMOVSDmr, TB_FOLDED_STORE | TB_NO_REVERSE)
+ENTRY(VMOVSS2DIZrr, VMOVSSZmr, TB_FOLDED_STORE)
+ENTRY(VMOVSS2DIrr, VMOVSSmr, TB_FOLDED_STORE)
+ENTRY(MMX_MOVD64to64rr, MMX_MOVQ64rm, 0)
+ENTRY(MOV64toPQIrr, MOVQI2PQIrm, TB_NO_REVERSE)
+ENTRY(MOV64toSDrr, MOVSDrm_alt, TB_NO_REVERSE)
+ENTRY(MOVDI2SSrr, MOVSSrm_alt, 0)
+ENTRY(VMOV64toPQIZrr, VMOVQI2PQIZrm, TB_NO_REVERSE)
+ENTRY(VMOV64toPQIrr, VMOVQI2PQIrm, TB_NO_REVERSE)
+ENTRY(VMOV64toSDZrr, VMOVSDZrm_alt, TB_NO_REVERSE)
+ENTRY(VMOV64toSDrr, VMOVSDrm_alt, TB_NO_REVERSE)
+ENTRY(VMOVDI2SSZrr, VMOVSSZrm_alt, 0)
+ENTRY(VMOVDI2SSrr, VMOVSSrm_alt, 0)
+ENTRY(MOVSDrr, MOVLPDrm, TB_NO_REVERSE)
+ENTRY(VMOVSDZrr, VMOVLPDZ128rm, TB_NO_REVERSE)
+ENTRY(VMOVSDrr, VMOVLPDrm, TB_NO_REVERSE)
+#undef ENTRY
diff --git a/llvm/utils/TableGen/X86MnemonicTables.cpp b/llvm/utils/TableGen/X86MnemonicTables.cpp
index f405e051e355..aeafee157462 100644
--- a/llvm/utils/TableGen/X86MnemonicTables.cpp
+++ b/llvm/utils/TableGen/X86MnemonicTables.cpp
@@ -14,7 +14,7 @@
#include "CodeGenInstruction.h"
#include "CodeGenTarget.h"
#include "X86RecognizableInstr.h"
-#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
#include "llvm/TableGen/TableGenBackend.h"
using namespace llvm;
@@ -87,8 +87,5 @@ void X86MnemonicTablesEmitter::run(raw_ostream &OS) {
} // namespace
-namespace llvm {
-void EmitX86MnemonicTables(RecordKeeper &RK, raw_ostream &OS) {
- X86MnemonicTablesEmitter(RK).run(OS);
-}
-} // namespace llvm
+static TableGen::Emitter::OptClass<X86MnemonicTablesEmitter>
+ X("gen-x86-mnemonic-tables", "Generate X86 mnemonic tables");
diff --git a/llvm/utils/TableGen/X86ModRMFilters.h b/llvm/utils/TableGen/X86ModRMFilters.h
index e2d0907b4f8b..d2169a8e879b 100644
--- a/llvm/utils/TableGen/X86ModRMFilters.h
+++ b/llvm/utils/TableGen/X86ModRMFilters.h
@@ -17,7 +17,7 @@
#ifndef LLVM_UTILS_TABLEGEN_X86MODRMFILTERS_H
#define LLVM_UTILS_TABLEGEN_X86MODRMFILTERS_H
-#include "llvm/Support/DataTypes.h"
+#include <cstdint>
namespace llvm {
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index e5c1e53936f6..b2f51ba01689 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -118,8 +118,7 @@ RecognizableInstrBase::RecognizableInstrBase(const CodeGenInstruction &insn) {
AdSize = byteFromRec(Rec, "AdSizeBits");
HasREX_W = Rec->getValueAsBit("hasREX_W");
HasVEX_4V = Rec->getValueAsBit("hasVEX_4V");
- HasVEX_W = Rec->getValueAsBit("HasVEX_W");
- IgnoresVEX_W = Rec->getValueAsBit("IgnoresVEX_W");
+ IgnoresW = Rec->getValueAsBit("IgnoresW");
IgnoresVEX_L = Rec->getValueAsBit("ignoresVEX_L");
HasEVEX_L2 = Rec->getValueAsBit("hasEVEX_L2");
HasEVEX_K = Rec->getValueAsBit("hasEVEX_K");
@@ -189,7 +188,7 @@ InstructionContext RecognizableInstr::insnContext() const {
llvm_unreachable("Don't support VEX.L if EVEX_L2 is enabled");
}
// VEX_L & VEX_W
- if (!EncodeRC && HasVEX_L && HasVEX_W) {
+ if (!EncodeRC && HasVEX_L && HasREX_W) {
if (OpPrefix == X86Local::PD)
insnContext = EVEX_KB(IC_EVEX_L_W_OPSIZE);
else if (OpPrefix == X86Local::XS)
@@ -216,7 +215,7 @@ InstructionContext RecognizableInstr::insnContext() const {
errs() << "Instruction does not use a prefix: " << Name << "\n";
llvm_unreachable("Invalid prefix");
}
- } else if (!EncodeRC && HasEVEX_L2 && HasVEX_W) {
+ } else if (!EncodeRC && HasEVEX_L2 && HasREX_W) {
// EVEX_L2 & VEX_W
if (OpPrefix == X86Local::PD)
insnContext = EVEX_KB(IC_EVEX_L2_W_OPSIZE);
@@ -245,7 +244,7 @@ InstructionContext RecognizableInstr::insnContext() const {
llvm_unreachable("Invalid prefix");
}
}
- else if (HasVEX_W) {
+ else if (HasREX_W) {
// VEX_W
if (OpPrefix == X86Local::PD)
insnContext = EVEX_KB(IC_EVEX_W_OPSIZE);
@@ -275,7 +274,7 @@ InstructionContext RecognizableInstr::insnContext() const {
}
/// eof EVEX
} else if (Encoding == X86Local::VEX || Encoding == X86Local::XOP) {
- if (HasVEX_L && HasVEX_W) {
+ if (HasVEX_L && HasREX_W) {
if (OpPrefix == X86Local::PD)
insnContext = IC_VEX_L_W_OPSIZE;
else if (OpPrefix == X86Local::XS)
@@ -290,7 +289,7 @@ InstructionContext RecognizableInstr::insnContext() const {
}
} else if (OpPrefix == X86Local::PD && HasVEX_L)
insnContext = IC_VEX_L_OPSIZE;
- else if (OpPrefix == X86Local::PD && HasVEX_W)
+ else if (OpPrefix == X86Local::PD && HasREX_W)
insnContext = IC_VEX_W_OPSIZE;
else if (OpPrefix == X86Local::PD)
insnContext = IC_VEX_OPSIZE;
@@ -298,11 +297,11 @@ InstructionContext RecognizableInstr::insnContext() const {
insnContext = IC_VEX_L_XS;
else if (HasVEX_L && OpPrefix == X86Local::XD)
insnContext = IC_VEX_L_XD;
- else if (HasVEX_W && OpPrefix == X86Local::XS)
+ else if (HasREX_W && OpPrefix == X86Local::XS)
insnContext = IC_VEX_W_XS;
- else if (HasVEX_W && OpPrefix == X86Local::XD)
+ else if (HasREX_W && OpPrefix == X86Local::XD)
insnContext = IC_VEX_W_XD;
- else if (HasVEX_W && OpPrefix == X86Local::PS)
+ else if (HasREX_W && OpPrefix == X86Local::PS)
insnContext = IC_VEX_W;
else if (HasVEX_L && OpPrefix == X86Local::PS)
insnContext = IC_VEX_L;
@@ -532,7 +531,7 @@ void RecognizableInstr::emitInstructionSpecifier() {
// Operand 3 (optional) is an immediate.
assert(numPhysicalOperands >= 2 + additionalOperands &&
numPhysicalOperands <= 3 + additionalOperands &&
- "Unexpected number of operands for MRMDestRegFrm");
+ "Unexpected number of operands for MRMDestReg");
HANDLE_OPERAND(rmRegister)
if (HasEVEX_K)
@@ -883,11 +882,11 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
tables.setTableFields(*opcodeType, insnContext(), currentOpcode, *filter,
UID, Is32Bit, OpPrefix == 0,
IgnoresVEX_L || EncodeRC,
- IgnoresVEX_W, AddressSize);
+ IgnoresW, AddressSize);
} else {
tables.setTableFields(*opcodeType, insnContext(), opcodeToSet, *filter, UID,
Is32Bit, OpPrefix == 0, IgnoresVEX_L || EncodeRC,
- IgnoresVEX_W, AddressSize);
+ IgnoresW, AddressSize);
}
#undef MAP
@@ -955,6 +954,9 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
TYPE("i128mem", TYPE_M)
TYPE("i256mem", TYPE_M)
TYPE("i512mem", TYPE_M)
+ TYPE("i512mem_GR16", TYPE_M)
+ TYPE("i512mem_GR32", TYPE_M)
+ TYPE("i512mem_GR64", TYPE_M)
TYPE("i64i32imm_brtarget", TYPE_REL)
TYPE("i16imm_brtarget", TYPE_REL)
TYPE("i32imm_brtarget", TYPE_REL)
@@ -1221,6 +1223,9 @@ RecognizableInstr::memoryEncodingFromString(const std::string &s,
ENCODING("i128mem", ENCODING_RM)
ENCODING("i256mem", ENCODING_RM)
ENCODING("i512mem", ENCODING_RM)
+ ENCODING("i512mem_GR16", ENCODING_RM)
+ ENCODING("i512mem_GR32", ENCODING_RM)
+ ENCODING("i512mem_GR64", ENCODING_RM)
ENCODING("f80mem", ENCODING_RM)
ENCODING("lea64_32mem", ENCODING_RM)
ENCODING("lea64mem", ENCODING_RM)
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.h b/llvm/utils/TableGen/X86RecognizableInstr.h
index ea56a9d7d994..5efacdb27465 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.h
+++ b/llvm/utils/TableGen/X86RecognizableInstr.h
@@ -17,8 +17,10 @@
#define LLVM_UTILS_TABLEGEN_X86RECOGNIZABLEINSTR_H
#include "CodeGenInstruction.h"
-#include "llvm/Support/DataTypes.h"
#include "llvm/Support/X86DisassemblerDecoderCommon.h"
+#include <cstdint>
+#include <string>
+#include <vector>
struct InstructionSpecifier;
@@ -180,10 +182,8 @@ struct RecognizableInstrBase {
bool HasREX_W;
/// The hasVEX_4V field from the record
bool HasVEX_4V;
- /// The HasVEX_WPrefix field from the record
- bool HasVEX_W;
- /// The IgnoresVEX_W field from the record
- bool IgnoresVEX_W;
+ /// The IgnoresW field from the record
+ bool IgnoresW;
/// The hasVEX_L field from the record
bool HasVEX_L;
/// The ignoreVEX_L field from the record